aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2015-05-27 18:44:32 +0000
committerDimitry Andric <dim@FreeBSD.org>2015-05-27 18:44:32 +0000
commit5a5ac124e1efaf208671f01c46edb15f29ed2a0b (patch)
treea6140557876943cdd800ee997c9317283394b22c /test/CodeGen
parentf03b5bed27d0d2eafd68562ce14f8b5e3f1f0801 (diff)
Vendor import of llvm trunk r238337:vendor/llvm/llvm-trunk-r238337
Notes
Notes: svn path=/vendor/llvm/dist/; revision=283625 svn path=/vendor/llvm/llvm-trunk-r238337/; revision=283626; tag=vendor/llvm/llvm-trunk-r238337
Diffstat (limited to 'test/CodeGen')
-rw-r--r--test/CodeGen/AArch64/128bit_load_store.ll8
-rw-r--r--test/CodeGen/AArch64/PBQP-chain.ll90
-rw-r--r--test/CodeGen/AArch64/PBQP-coalesce-benefit.ll6
-rw-r--r--test/CodeGen/AArch64/PBQP-csr.ll54
-rw-r--r--test/CodeGen/AArch64/Redundantstore.ll6
-rw-r--r--test/CodeGen/AArch64/a57-csel.ll2
-rw-r--r--test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll105
-rw-r--r--test/CodeGen/AArch64/aarch64-2014-12-02-combine-soften.ll2
-rw-r--r--test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll168
-rw-r--r--test/CodeGen/AArch64/aarch64-address-type-promotion-assertion.ll12
-rw-r--r--test/CodeGen/AArch64/aarch64-address-type-promotion.ll10
-rw-r--r--test/CodeGen/AArch64/aarch64-be-bv.ll64
-rw-r--r--test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll491
-rw-r--r--test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll58
-rw-r--r--test/CodeGen/AArch64/aarch64-gep-opt.ll46
-rw-r--r--test/CodeGen/AArch64/aarch64-smull.ll96
-rw-r--r--test/CodeGen/AArch64/addsub-shifted.ll34
-rw-r--r--test/CodeGen/AArch64/addsub.ll46
-rw-r--r--test/CodeGen/AArch64/addsub_ext.ll16
-rw-r--r--test/CodeGen/AArch64/alloca.ll2
-rw-r--r--test/CodeGen/AArch64/analyzecmp.ll8
-rw-r--r--test/CodeGen/AArch64/and-mask-removal.ll8
-rw-r--r--test/CodeGen/AArch64/argument-blocks.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll40
-rw-r--r--test/CodeGen/AArch64/arm64-2011-03-21-Unaligned-Frame-Index.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-2011-04-21-CPSRBug.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll18
-rw-r--r--test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll24
-rw-r--r--test/CodeGen/AArch64/arm64-2012-07-11-InstrEmitterBug.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-aapcs.ll13
-rw-r--r--test/CodeGen/AArch64/arm64-abi-varargs.ll44
-rw-r--r--test/CodeGen/AArch64/arm64-abi.ll12
-rw-r--r--test/CodeGen/AArch64/arm64-abi_align.ll68
-rw-r--r--test/CodeGen/AArch64/arm64-addr-mode-folding.ll64
-rw-r--r--test/CodeGen/AArch64/arm64-addr-type-promotion.ll26
-rw-r--r--test/CodeGen/AArch64/arm64-addrmode.ll48
-rw-r--r--test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll10
-rw-r--r--test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-anyregcc-crash.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-anyregcc.ll16
-rw-r--r--test/CodeGen/AArch64/arm64-atomic-128.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-atomic.ll194
-rw-r--r--test/CodeGen/AArch64/arm64-basic-pic.ll8
-rw-r--r--test/CodeGen/AArch64/arm64-bcc.ll8
-rw-r--r--test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll168
-rw-r--r--test/CodeGen/AArch64/arm64-big-endian-varargs.ll18
-rw-r--r--test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll340
-rw-r--r--test/CodeGen/AArch64/arm64-big-stack.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-bitfield-extract.ll52
-rw-r--r--test/CodeGen/AArch64/arm64-blockaddress.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-call-tailcalls.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-cast-opt.ll10
-rw-r--r--test/CodeGen/AArch64/arm64-ccmp-heuristics.ll108
-rw-r--r--test/CodeGen/AArch64/arm64-ccmp.ll6
-rw-r--r--test/CodeGen/AArch64/arm64-code-model-large-abs.ll8
-rw-r--r--test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll638
-rw-r--r--test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll8
-rw-r--r--test/CodeGen/AArch64/arm64-collect-loh-str.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-collect-loh.ll10
-rw-r--r--test/CodeGen/AArch64/arm64-complex-copy-noneon.ll12
-rw-r--r--test/CodeGen/AArch64/arm64-const-addr.ll12
-rw-r--r--test/CodeGen/AArch64/arm64-convert-v4f64.ll63
-rw-r--r--test/CodeGen/AArch64/arm64-cse.ll10
-rw-r--r--test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll42
-rw-r--r--test/CodeGen/AArch64/arm64-dup.ll53
-rw-r--r--test/CodeGen/AArch64/arm64-early-ifcvt.ll10
-rw-r--r--test/CodeGen/AArch64/arm64-elf-globals.ll18
-rw-r--r--test/CodeGen/AArch64/arm64-ext.ll46
-rw-r--r--test/CodeGen/AArch64/arm64-extend.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-extern-weak.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-extload-knownzero.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll12
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-alloca.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-br.ll20
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-call.ll10
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-conversion.ll28
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-gv.ll6
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-icmp.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll10
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll20
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-materialize.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-rem.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-ret.ll12
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel-store.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-fast-isel.ll62
-rw-r--r--test/CodeGen/AArch64/arm64-fastisel-gep-promote-before-add.ll6
-rw-r--r--test/CodeGen/AArch64/arm64-fcopysign.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-fmax.ll34
-rw-r--r--test/CodeGen/AArch64/arm64-fmuladd.ll42
-rw-r--r--test/CodeGen/AArch64/arm64-fold-address.ll36
-rw-r--r--test/CodeGen/AArch64/arm64-fold-lsl.ll18
-rw-r--r--test/CodeGen/AArch64/arm64-fp128-folding.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-fp128.ll48
-rw-r--r--test/CodeGen/AArch64/arm64-hello.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-i16-subreg-extract.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-indexed-memory.ll126
-rw-r--r--test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll6
-rw-r--r--test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll1389
-rw-r--r--test/CodeGen/AArch64/arm64-inline-asm.ll14
-rw-r--r--test/CodeGen/AArch64/arm64-join-reserved.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-large-frame.ll6
-rw-r--r--test/CodeGen/AArch64/arm64-ld1.ll360
-rw-r--r--test/CodeGen/AArch64/arm64-ldp-aa.ll60
-rw-r--r--test/CodeGen/AArch64/arm64-ldp.ll263
-rw-r--r--test/CodeGen/AArch64/arm64-ldur.ll28
-rw-r--r--test/CodeGen/AArch64/arm64-memcpy-inline.ll14
-rw-r--r--test/CodeGen/AArch64/arm64-memset-inline.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll14
-rw-r--r--test/CodeGen/AArch64/arm64-misched-basic-A53.ll30
-rw-r--r--test/CodeGen/AArch64/arm64-misched-basic-A57.ll32
-rw-r--r--test/CodeGen/AArch64/arm64-neon-copy.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-neon-select_cc.ll30
-rw-r--r--test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll52
-rw-r--r--test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll8
-rw-r--r--test/CodeGen/AArch64/arm64-neon-v8.1a.ll456
-rw-r--r--test/CodeGen/AArch64/arm64-patchpoint-scratch-regs.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll8
-rw-r--r--test/CodeGen/AArch64/arm64-patchpoint.ll24
-rw-r--r--test/CodeGen/AArch64/arm64-pic-local-symbol.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-platform-reg.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-popcnt.ll20
-rw-r--r--test/CodeGen/AArch64/arm64-prefetch.ll50
-rw-r--r--test/CodeGen/AArch64/arm64-promote-const.ll113
-rw-r--r--test/CodeGen/AArch64/arm64-redzone.ll6
-rw-r--r--test/CodeGen/AArch64/arm64-register-offset-addressing.ll28
-rw-r--r--test/CodeGen/AArch64/arm64-regress-interphase-shift.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-return-vector.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-rev.ll38
-rw-r--r--test/CodeGen/AArch64/arm64-scaled_iv.ll12
-rw-r--r--test/CodeGen/AArch64/arm64-scvt.ll176
-rw-r--r--test/CodeGen/AArch64/arm64-shrink-wrapping.ll502
-rw-r--r--test/CodeGen/AArch64/arm64-sitofp-combine-chains.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-smaxv.ll72
-rw-r--r--test/CodeGen/AArch64/arm64-sminv.ll72
-rw-r--r--test/CodeGen/AArch64/arm64-spill-lr.ll46
-rw-r--r--test/CodeGen/AArch64/arm64-spill.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-st1.ll89
-rw-r--r--test/CodeGen/AArch64/arm64-stack-no-frame.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-stackmap-nops.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-stackmap.ll24
-rw-r--r--test/CodeGen/AArch64/arm64-stp-aa.ll111
-rw-r--r--test/CodeGen/AArch64/arm64-stp.ll24
-rw-r--r--test/CodeGen/AArch64/arm64-strict-align.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-stur.ll12
-rw-r--r--test/CodeGen/AArch64/arm64-this-return.ll10
-rw-r--r--test/CodeGen/AArch64/arm64-tls-darwin.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-tls-dynamic-together.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-tls-dynamics.ll8
-rw-r--r--test/CodeGen/AArch64/arm64-tls-execs.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll8
-rw-r--r--test/CodeGen/AArch64/arm64-trn.ll40
-rw-r--r--test/CodeGen/AArch64/arm64-trunc-store.ll12
-rw-r--r--test/CodeGen/AArch64/arm64-umaxv.ll74
-rw-r--r--test/CodeGen/AArch64/arm64-uminv.ll73
-rw-r--r--test/CodeGen/AArch64/arm64-unaligned_ldst.ll6
-rw-r--r--test/CodeGen/AArch64/arm64-uzp.ll32
-rw-r--r--test/CodeGen/AArch64/arm64-vabs.ll288
-rw-r--r--test/CodeGen/AArch64/arm64-vadd.ll248
-rw-r--r--test/CodeGen/AArch64/arm64-vaddv.ll164
-rw-r--r--test/CodeGen/AArch64/arm64-vbitwise.ll18
-rw-r--r--test/CodeGen/AArch64/arm64-vcmp.ll52
-rw-r--r--test/CodeGen/AArch64/arm64-vcnt.ll12
-rw-r--r--test/CodeGen/AArch64/arm64-vcvt.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-vector-imm.ll8
-rw-r--r--test/CodeGen/AArch64/arm64-vector-ldst.ll240
-rw-r--r--test/CodeGen/AArch64/arm64-vext.ll176
-rw-r--r--test/CodeGen/AArch64/arm64-vhadd.ll96
-rw-r--r--test/CodeGen/AArch64/arm64-vhsub.ll48
-rw-r--r--test/CodeGen/AArch64/arm64-virtual_base.ll4
-rw-r--r--test/CodeGen/AArch64/arm64-vmax.ll264
-rw-r--r--test/CodeGen/AArch64/arm64-vmul.ll446
-rw-r--r--test/CodeGen/AArch64/arm64-volatile.ll16
-rw-r--r--test/CodeGen/AArch64/arm64-vqadd.ll112
-rw-r--r--test/CodeGen/AArch64/arm64-vqsub.ll56
-rw-r--r--test/CodeGen/AArch64/arm64-vshift.ll608
-rw-r--r--test/CodeGen/AArch64/arm64-vshr.ll12
-rw-r--r--test/CodeGen/AArch64/arm64-vshuffle.ll93
-rw-r--r--test/CodeGen/AArch64/arm64-vsqrt.ll56
-rw-r--r--test/CodeGen/AArch64/arm64-vsra.ll56
-rw-r--r--test/CodeGen/AArch64/arm64-vsub.ll120
-rw-r--r--test/CodeGen/AArch64/arm64-weak-reference.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-xaluo.ll2
-rw-r--r--test/CodeGen/AArch64/arm64-zextload-unscaled.ll12
-rw-r--r--test/CodeGen/AArch64/arm64-zip.ll32
-rw-r--r--test/CodeGen/AArch64/assertion-rc-mismatch.ll4
-rw-r--r--test/CodeGen/AArch64/atomic-ops-not-barriers.ll2
-rw-r--r--test/CodeGen/AArch64/atomic-ops.ll14
-rw-r--r--test/CodeGen/AArch64/basic-pic.ll8
-rw-r--r--test/CodeGen/AArch64/bitcast.ll27
-rw-r--r--test/CodeGen/AArch64/bitfield-insert-0.ll4
-rw-r--r--test/CodeGen/AArch64/bitfield-insert.ll36
-rw-r--r--test/CodeGen/AArch64/bitfield.ll8
-rw-r--r--test/CodeGen/AArch64/blockaddress.ll2
-rw-r--r--test/CodeGen/AArch64/bool-loads.ll8
-rw-r--r--test/CodeGen/AArch64/br-to-eh-lpad.ll8
-rw-r--r--test/CodeGen/AArch64/breg.ll2
-rw-r--r--test/CodeGen/AArch64/callee-save.ll64
-rw-r--r--test/CodeGen/AArch64/cmpwithshort.ll12
-rw-r--r--test/CodeGen/AArch64/code-model-large-abs.ll8
-rw-r--r--test/CodeGen/AArch64/combine-comparisons-by-cse.ll89
-rw-r--r--test/CodeGen/AArch64/compare-branch.ll8
-rw-r--r--test/CodeGen/AArch64/complex-copy-noneon.ll12
-rw-r--r--test/CodeGen/AArch64/complex-int-to-fp.ll2
-rw-r--r--test/CodeGen/AArch64/concat_vector-scalar-combine.ll125
-rw-r--r--test/CodeGen/AArch64/concat_vector-truncate-combine.ll43
-rw-r--r--test/CodeGen/AArch64/concat_vector-truncated-scalar-combine.ll18
-rw-r--r--test/CodeGen/AArch64/cpus.ll1
-rw-r--r--test/CodeGen/AArch64/dag-combine-invaraints.ll10
-rw-r--r--test/CodeGen/AArch64/dp-3source.ll4
-rw-r--r--test/CodeGen/AArch64/dp1.ll28
-rw-r--r--test/CodeGen/AArch64/dp2.ll54
-rw-r--r--test/CodeGen/AArch64/eliminate-trunc.ll8
-rw-r--r--test/CodeGen/AArch64/extern-weak.ll2
-rw-r--r--test/CodeGen/AArch64/f16-convert.ll67
-rw-r--r--test/CodeGen/AArch64/f16-instructions.ll765
-rw-r--r--test/CodeGen/AArch64/fast-isel-address-extends.ll39
-rw-r--r--test/CodeGen/AArch64/fast-isel-addressing-modes.ll92
-rw-r--r--test/CodeGen/AArch64/fast-isel-branch-cond-split.ll2
-rw-r--r--test/CodeGen/AArch64/fast-isel-branch_weights.ll2
-rw-r--r--test/CodeGen/AArch64/fast-isel-call-return.ll2
-rw-r--r--test/CodeGen/AArch64/fast-isel-cbz.ll2
-rw-r--r--test/CodeGen/AArch64/fast-isel-cmp-branch.ll2
-rw-r--r--test/CodeGen/AArch64/fast-isel-folding.ll2
-rw-r--r--test/CodeGen/AArch64/fast-isel-gep.ll14
-rw-r--r--test/CodeGen/AArch64/fast-isel-int-ext.ll86
-rw-r--r--test/CodeGen/AArch64/fast-isel-int-ext2.ll62
-rw-r--r--test/CodeGen/AArch64/fast-isel-int-ext3.ll20
-rw-r--r--test/CodeGen/AArch64/fast-isel-int-ext4.ll2
-rw-r--r--test/CodeGen/AArch64/fast-isel-int-ext5.ll19
-rw-r--r--test/CodeGen/AArch64/fast-isel-logic-op.ll2
-rw-r--r--test/CodeGen/AArch64/fast-isel-memcpy.ll2
-rw-r--r--test/CodeGen/AArch64/fast-isel-mul.ll2
-rw-r--r--test/CodeGen/AArch64/fast-isel-runtime-libcall.ll4
-rw-r--r--test/CodeGen/AArch64/fast-isel-sdiv.ll2
-rw-r--r--test/CodeGen/AArch64/fast-isel-select.ll2
-rw-r--r--test/CodeGen/AArch64/fast-isel-shift.ll2
-rw-r--r--test/CodeGen/AArch64/fast-isel-sqrt.ll2
-rw-r--r--test/CodeGen/AArch64/fast-isel-tbz.ll2
-rw-r--r--test/CodeGen/AArch64/fast-isel-trunc.ll2
-rw-r--r--test/CodeGen/AArch64/fast-isel-vector-arithmetic.ll2
-rw-r--r--test/CodeGen/AArch64/fast-isel-vret.ll2
-rw-r--r--test/CodeGen/AArch64/floatdp_1source.ll10
-rw-r--r--test/CodeGen/AArch64/floatdp_2source.ll4
-rw-r--r--test/CodeGen/AArch64/fold-constants.ll21
-rw-r--r--test/CodeGen/AArch64/fp128-folding.ll2
-rw-r--r--test/CodeGen/AArch64/fp16-instructions.ll109
-rw-r--r--test/CodeGen/AArch64/fp16-v16-instructions.ll105
-rw-r--r--test/CodeGen/AArch64/fp16-v4-instructions.ll103
-rw-r--r--test/CodeGen/AArch64/fp16-v8-instructions.ll118
-rw-r--r--test/CodeGen/AArch64/fp16-vector-load-store.ll12
-rw-r--r--test/CodeGen/AArch64/fp16-vector-nvcast.ll89
-rw-r--r--test/CodeGen/AArch64/fpimm.ll6
-rw-r--r--test/CodeGen/AArch64/frameaddr.ll2
-rw-r--r--test/CodeGen/AArch64/free-zext.ll4
-rw-r--r--test/CodeGen/AArch64/func-argpassing.ll32
-rw-r--r--test/CodeGen/AArch64/func-calls.ll12
-rw-r--r--test/CodeGen/AArch64/funcptr_cast.ll2
-rw-r--r--test/CodeGen/AArch64/function-subtarget-features.ll21
-rw-r--r--test/CodeGen/AArch64/ghc-cc.ll6
-rw-r--r--test/CodeGen/AArch64/global-alignment.ll10
-rw-r--r--test/CodeGen/AArch64/global-merge-1.ll12
-rw-r--r--test/CodeGen/AArch64/global-merge-2.ll6
-rw-r--r--test/CodeGen/AArch64/global-merge-3.ll10
-rw-r--r--test/CodeGen/AArch64/global-merge-4.ll54
-rw-r--r--test/CodeGen/AArch64/global-merge-group-by-use.ll94
-rw-r--r--test/CodeGen/AArch64/global-merge-ignore-single-use.ll63
-rw-r--r--test/CodeGen/AArch64/half.ll8
-rw-r--r--test/CodeGen/AArch64/i1-contents.ll4
-rw-r--r--test/CodeGen/AArch64/i128-align.ll4
-rw-r--r--test/CodeGen/AArch64/inline-asm-globaladdress.ll20
-rw-r--r--test/CodeGen/AArch64/intrinsics-memory-barrier.ll6
-rw-r--r--test/CodeGen/AArch64/large_shift.ll21
-rw-r--r--test/CodeGen/AArch64/ldst-opt.ll270
-rw-r--r--test/CodeGen/AArch64/ldst-regoffset.ll106
-rw-r--r--test/CodeGen/AArch64/ldst-unscaledimm.ll92
-rw-r--r--test/CodeGen/AArch64/ldst-unsignedimm.ll80
-rw-r--r--test/CodeGen/AArch64/lit.local.cfg2
-rw-r--r--test/CodeGen/AArch64/literal_pools_float.ll4
-rw-r--r--test/CodeGen/AArch64/local_vars.ll2
-rw-r--r--test/CodeGen/AArch64/logical_shifted_reg.ll12
-rw-r--r--test/CodeGen/AArch64/machine-copy-prop.ll2
-rw-r--r--test/CodeGen/AArch64/machine-sink-kill-flags.ll29
-rw-r--r--test/CodeGen/AArch64/machine_cse.ll10
-rw-r--r--test/CodeGen/AArch64/merge-store.ll20
-rw-r--r--test/CodeGen/AArch64/minmax.ll96
-rw-r--r--test/CodeGen/AArch64/mul_pow2.ll3
-rw-r--r--test/CodeGen/AArch64/neon-fpround_f128.ll4
-rw-r--r--test/CodeGen/AArch64/neon-scalar-copy.ll126
-rw-r--r--test/CodeGen/AArch64/neon-truncStore-extLoad.ll6
-rw-r--r--test/CodeGen/AArch64/nzcv-save.ll4
-rw-r--r--test/CodeGen/AArch64/or-combine.ll44
-rw-r--r--test/CodeGen/AArch64/paired-load.ll8
-rw-r--r--test/CodeGen/AArch64/pic-eh-stubs.ll2
-rw-r--r--test/CodeGen/AArch64/print-mrs-system-register.ll11
-rw-r--r--test/CodeGen/AArch64/ragreedy-csr.ll106
-rw-r--r--test/CodeGen/AArch64/regress-tail-livereg.ll2
-rw-r--r--test/CodeGen/AArch64/regress-tblgen-chains.ll4
-rw-r--r--test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll18
-rw-r--r--test/CodeGen/AArch64/remat-float0.ll18
-rw-r--r--test/CodeGen/AArch64/remat.ll1
-rw-r--r--test/CodeGen/AArch64/rm_redundant_cmp.ll64
-rw-r--r--test/CodeGen/AArch64/sibling-call.ll4
-rw-r--r--test/CodeGen/AArch64/stack-guard-remat-bitcast.ll2
-rw-r--r--test/CodeGen/AArch64/stack_guard_remat.ll2
-rw-r--r--test/CodeGen/AArch64/stackmap-liveness.ll47
-rw-r--r--test/CodeGen/AArch64/tail-call.ll38
-rw-r--r--test/CodeGen/AArch64/tailcall-explicit-sret.ll8
-rw-r--r--test/CodeGen/AArch64/tailcall-mem-intrinsics.ll31
-rw-r--r--test/CodeGen/AArch64/tailcall_misched_graph.ll42
-rw-r--r--test/CodeGen/AArch64/tbz-tbnz.ll2
-rw-r--r--test/CodeGen/AArch64/tst-br.ll4
-rw-r--r--test/CodeGen/AArch64/vcvt-oversize.ll16
-rw-r--r--test/CodeGen/AArch64/zero-reg.ll2
-rw-r--r--test/CodeGen/ARM/2006-11-10-CycleInDAG.ll6
-rw-r--r--test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll34
-rw-r--r--test/CodeGen/ARM/2007-03-07-CombinerCrash.ll6
-rw-r--r--test/CodeGen/ARM/2007-03-13-InstrSched.ll24
-rw-r--r--test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll10
-rw-r--r--test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll6
-rw-r--r--test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll6
-rw-r--r--test/CodeGen/ARM/2007-04-03-PEIBug.ll2
-rw-r--r--test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll48
-rw-r--r--test/CodeGen/ARM/2007-04-30-CombinerCrash.ll10
-rw-r--r--test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll12
-rw-r--r--test/CodeGen/ARM/2007-05-07-tailmerge-1.ll24
-rw-r--r--test/CodeGen/ARM/2007-05-09-tailmerge-2.ll26
-rw-r--r--test/CodeGen/ARM/2007-05-22-tailmerge-3.ll26
-rw-r--r--test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll6
-rw-r--r--test/CodeGen/ARM/2007-08-15-ReuseBug.ll18
-rw-r--r--test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll2
-rw-r--r--test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll2
-rw-r--r--test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll2
-rw-r--r--test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll4
-rw-r--r--test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll2
-rw-r--r--test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll36
-rw-r--r--test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll6
-rw-r--r--test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll2
-rw-r--r--test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll2
-rw-r--r--test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll2
-rw-r--r--test/CodeGen/ARM/2009-02-16-SpillerBug.ll4
-rw-r--r--test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll2
-rw-r--r--test/CodeGen/ARM/2009-02-27-SpillerBug.ll4
-rw-r--r--test/CodeGen/ARM/2009-03-07-SpillerBug.ll2
-rw-r--r--test/CodeGen/ARM/2009-03-09-AddrModeBug.ll2
-rw-r--r--test/CodeGen/ARM/2009-04-06-AsmModifier.ll6
-rw-r--r--test/CodeGen/ARM/2009-04-08-AggregateAddr.ll10
-rw-r--r--test/CodeGen/ARM/2009-04-08-FREM.ll2
-rw-r--r--test/CodeGen/ARM/2009-04-08-FloatUndef.ll2
-rw-r--r--test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll2
-rw-r--r--test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll2
-rw-r--r--test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll4
-rw-r--r--test/CodeGen/ARM/2009-06-02-ISelCrash.ll2
-rw-r--r--test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll2
-rw-r--r--test/CodeGen/ARM/2009-06-22-CoalescerBug.ll6
-rw-r--r--test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll18
-rw-r--r--test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll14
-rw-r--r--test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll14
-rw-r--r--test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll18
-rw-r--r--test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll2
-rw-r--r--test/CodeGen/ARM/2009-07-01-CommuteBug.ll14
-rw-r--r--test/CodeGen/ARM/2009-07-18-RewriterBug.ll868
-rw-r--r--test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll2
-rw-r--r--test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll4
-rw-r--r--test/CodeGen/ARM/2009-07-29-VFP3Registers.ll2
-rw-r--r--test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll8
-rw-r--r--test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll4
-rw-r--r--test/CodeGen/ARM/2009-08-21-PostRAKill.ll12
-rw-r--r--test/CodeGen/ARM/2009-08-21-PostRAKill2.ll2
-rw-r--r--test/CodeGen/ARM/2009-08-21-PostRAKill3.ll6
-rw-r--r--test/CodeGen/ARM/2009-08-31-LSDA-Name.ll26
-rw-r--r--test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll2
-rw-r--r--test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll4
-rw-r--r--test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll8
-rw-r--r--test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll14
-rw-r--r--test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll2
-rw-r--r--test/CodeGen/ARM/2009-09-24-spill-align.ll2
-rw-r--r--test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll4
-rw-r--r--test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll4
-rw-r--r--test/CodeGen/ARM/2009-10-16-Scope.ll20
-rw-r--r--test/CodeGen/ARM/2009-10-27-double-align.ll2
-rw-r--r--test/CodeGen/ARM/2009-11-01-NeonMoves.ll2
-rw-r--r--test/CodeGen/ARM/2009-11-02-NegativeLane.ll2
-rw-r--r--test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll8
-rw-r--r--test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll6
-rw-r--r--test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll46
-rw-r--r--test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll34
-rw-r--r--test/CodeGen/ARM/2009-12-02-vtrn-undef.ll8
-rw-r--r--test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll24
-rw-r--r--test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll14
-rw-r--r--test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll30
-rw-r--r--test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll8
-rw-r--r--test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll8
-rw-r--r--test/CodeGen/ARM/2010-05-18-PostIndexBug.ll4
-rw-r--r--test/CodeGen/ARM/2010-05-19-Shuffles.ll2
-rw-r--r--test/CodeGen/ARM/2010-05-21-BuildVector.ll18
-rw-r--r--test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll2
-rw-r--r--test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll36
-rwxr-xr-xtest/CodeGen/ARM/2010-06-21-nondarwin-tc.ll40
-rw-r--r--test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll62
-rw-r--r--test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll4
-rw-r--r--test/CodeGen/ARM/2010-07-26-GlobalMerge.ll28
-rw-r--r--test/CodeGen/ARM/2010-08-04-EHCrash.ll12
-rw-r--r--test/CodeGen/ARM/2010-08-04-StackVariable.ll122
-rw-r--r--test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll30
-rw-r--r--test/CodeGen/ARM/2010-12-08-tpsoft.ll2
-rw-r--r--test/CodeGen/ARM/2010-12-15-elf-lcomm.ll13
-rw-r--r--test/CodeGen/ARM/2010-12-17-LocalStackSlotCrash.ll2
-rw-r--r--test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll114
-rw-r--r--test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll2
-rw-r--r--test/CodeGen/ARM/2011-02-07-AntidepClobber.ll12
-rw-r--r--test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll6
-rw-r--r--test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll12
-rw-r--r--test/CodeGen/ARM/2011-04-07-schediv.ll6
-rw-r--r--test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll4
-rw-r--r--test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll2
-rw-r--r--test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll2
-rw-r--r--test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll4
-rw-r--r--test/CodeGen/ARM/2011-04-26-SchedTweak.ll10
-rw-r--r--test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll2
-rw-r--r--test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll108
-rw-r--r--test/CodeGen/ARM/2011-08-29-SchedCycle.ll2
-rw-r--r--test/CodeGen/ARM/2011-08-29-ldr_pre_imm.ll6
-rw-r--r--test/CodeGen/ARM/2011-09-09-OddVectorDivision.ll8
-rw-r--r--test/CodeGen/ARM/2011-09-28-CMovCombineBug.ll2
-rw-r--r--test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll10
-rw-r--r--test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll8
-rw-r--r--test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll2
-rw-r--r--test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll8
-rw-r--r--test/CodeGen/ARM/2011-11-14-EarlyClobber.ll24
-rw-r--r--test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll16
-rw-r--r--test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll22
-rw-r--r--test/CodeGen/ARM/2011-11-30-MergeAlignment.ll4
-rw-r--r--test/CodeGen/ARM/2011-12-14-machine-sink.ll31
-rw-r--r--test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll4
-rw-r--r--test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll14
-rw-r--r--test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll2
-rw-r--r--test/CodeGen/ARM/2012-01-26-CopyPropKills.ll8
-rw-r--r--test/CodeGen/ARM/2012-02-01-CoalescerBug.ll2
-rw-r--r--test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll2
-rw-r--r--test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll4
-rw-r--r--test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll2
-rw-r--r--test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll2
-rw-r--r--test/CodeGen/ARM/2012-08-09-neon-extload.ll12
-rw-r--r--test/CodeGen/ARM/2012-08-23-legalize-vmull.ll30
-rw-r--r--test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll6
-rw-r--r--test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll14
-rw-r--r--test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll6
-rw-r--r--test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll3
-rw-r--r--test/CodeGen/ARM/2013-01-21-PR14992.ll12
-rw-r--r--test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll16
-rw-r--r--test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll2
-rw-r--r--test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll20
-rw-r--r--test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll4
-rw-r--r--test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll11
-rw-r--r--test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll2
-rw-r--r--test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll38
-rw-r--r--test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll8
-rw-r--r--test/CodeGen/ARM/2013-05-31-char-shift-crash.ll2
-rw-r--r--test/CodeGen/ARM/2013-07-29-vector-or-combine.ll2
-rw-r--r--test/CodeGen/ARM/2013-10-11-select-stalls.ll2
-rw-r--r--test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll8
-rw-r--r--test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll5
-rw-r--r--test/CodeGen/ARM/2014-07-18-earlyclobber-str-post.ll6
-rw-r--r--test/CodeGen/ARM/2014-08-04-muls-it.ll4
-rw-r--r--test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll91
-rw-r--r--test/CodeGen/ARM/MergeConsecutiveStores.ll48
-rw-r--r--test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll6
-rw-r--r--test/CodeGen/ARM/Windows/dllimport.ll4
-rw-r--r--test/CodeGen/ARM/Windows/frame-register.ll6
-rw-r--r--test/CodeGen/ARM/Windows/long-calls.ll2
-rw-r--r--test/CodeGen/ARM/Windows/movw-movt-relocations.ll4
-rw-r--r--test/CodeGen/ARM/Windows/pic.ll2
-rw-r--r--test/CodeGen/ARM/Windows/read-only-data.ll2
-rw-r--r--test/CodeGen/ARM/Windows/stack-probe-non-default.ll27
-rw-r--r--test/CodeGen/ARM/Windows/vla.ll4
-rw-r--r--test/CodeGen/ARM/a15-partial-update.ll8
-rw-r--r--test/CodeGen/ARM/addrmode.ll4
-rw-r--r--test/CodeGen/ARM/aggregate-padding.ll101
-rw-r--r--test/CodeGen/ARM/aliases.ll8
-rw-r--r--test/CodeGen/ARM/alloc-no-stack-realign.ll25
-rw-r--r--test/CodeGen/ARM/arguments.ll2
-rw-r--r--test/CodeGen/ARM/arm-and-tst-peephole.ll58
-rw-r--r--test/CodeGen/ARM/arm-asm.ll2
-rw-r--r--test/CodeGen/ARM/arm-modifier.ll8
-rw-r--r--test/CodeGen/ARM/arm-negative-stride.ll4
-rw-r--r--test/CodeGen/ARM/arm-ttype-target2.ll2
-rw-r--r--test/CodeGen/ARM/atomic-64bit.ll2
-rw-r--r--test/CodeGen/ARM/atomic-load-store.ll8
-rw-r--r--test/CodeGen/ARM/atomic-op.ll8
-rw-r--r--test/CodeGen/ARM/atomic-ops-v8.ll18
-rw-r--r--test/CodeGen/ARM/available_externally.ll2
-rw-r--r--test/CodeGen/ARM/avoid-cpsr-rmw.ll44
-rw-r--r--test/CodeGen/ARM/bfi.ll2
-rw-r--r--test/CodeGen/ARM/bfx.ll12
-rw-r--r--test/CodeGen/ARM/big-endian-neon-bitconv.ll96
-rw-r--r--test/CodeGen/ARM/big-endian-neon-extend.ll85
-rw-r--r--test/CodeGen/ARM/big-endian-neon-trunc-store.ll4
-rw-r--r--test/CodeGen/ARM/big-endian-ret-f64.ll2
-rw-r--r--test/CodeGen/ARM/big-endian-vector-caller.ll168
-rw-r--r--test/CodeGen/ARM/bswap16.ll4
-rw-r--r--test/CodeGen/ARM/build-attributes.ll297
-rw-r--r--test/CodeGen/ARM/bx_fold.ll4
-rw-r--r--test/CodeGen/ARM/byval-align.ll76
-rw-r--r--test/CodeGen/ARM/cache-intrinsic.ll8
-rw-r--r--test/CodeGen/ARM/call-tc.ll6
-rw-r--r--test/CodeGen/ARM/call.ll8
-rw-r--r--test/CodeGen/ARM/call_nolink.ll40
-rw-r--r--test/CodeGen/ARM/coalesce-dbgvalue.ll58
-rw-r--r--test/CodeGen/ARM/coalesce-subregs.ll43
-rw-r--r--test/CodeGen/ARM/code-placement.ll8
-rw-r--r--test/CodeGen/ARM/combine-movc-sub.ll63
-rw-r--r--test/CodeGen/ARM/commute-movcc.ll4
-rw-r--r--test/CodeGen/ARM/compare-call.ll8
-rw-r--r--test/CodeGen/ARM/copy-paired-reg.ll2
-rw-r--r--test/CodeGen/ARM/crash-O0.ll4
-rw-r--r--test/CodeGen/ARM/crash-greedy-v6.ll14
-rw-r--r--test/CodeGen/ARM/crash.ll4
-rw-r--r--test/CodeGen/ARM/cse-call.ll4
-rw-r--r--test/CodeGen/ARM/cse-ldrlit.ll4
-rw-r--r--test/CodeGen/ARM/cse-libcalls.ll2
-rw-r--r--test/CodeGen/ARM/dagcombine-anyexttozeroext.ll6
-rw-r--r--test/CodeGen/ARM/darwin-section-order.ll21
-rw-r--r--test/CodeGen/ARM/debug-frame-large-stack.ll2
-rw-r--r--test/CodeGen/ARM/debug-frame-vararg.ll126
-rw-r--r--test/CodeGen/ARM/debug-frame.ll98
-rw-r--r--test/CodeGen/ARM/debug-info-arg.ll72
-rw-r--r--test/CodeGen/ARM/debug-info-blocks.ll352
-rw-r--r--test/CodeGen/ARM/debug-info-branch-folding.ll104
-rw-r--r--test/CodeGen/ARM/debug-info-d16-reg.ll125
-rw-r--r--test/CodeGen/ARM/debug-info-no-frame.ll36
-rw-r--r--test/CodeGen/ARM/debug-info-qreg.ll108
-rw-r--r--test/CodeGen/ARM/debug-info-s16-reg.ll139
-rw-r--r--test/CodeGen/ARM/debug-info-sreg2.ll38
-rw-r--r--test/CodeGen/ARM/debug-segmented-stacks.ll56
-rw-r--r--test/CodeGen/ARM/disable-fp-elim.ll25
-rw-r--r--test/CodeGen/ARM/div.ll24
-rw-r--r--test/CodeGen/ARM/divmod.ll10
-rw-r--r--test/CodeGen/ARM/dwarf-eh.ll14
-rw-r--r--test/CodeGen/ARM/dyn-stackalloc.ll24
-rw-r--r--test/CodeGen/ARM/ehabi.ll16
-rw-r--r--test/CodeGen/ARM/emit-big-cst.ll2
-rw-r--r--test/CodeGen/ARM/extload-knownzero.ll2
-rw-r--r--test/CodeGen/ARM/extloadi1.ll2
-rw-r--r--test/CodeGen/ARM/fast-isel-GEP-coalesce.ll22
-rw-r--r--test/CodeGen/ARM/fast-isel-align.ll34
-rw-r--r--test/CodeGen/ARM/fast-isel-binary.ll6
-rw-r--r--test/CodeGen/ARM/fast-isel-br-const.ll6
-rw-r--r--test/CodeGen/ARM/fast-isel-br-phi.ll2
-rw-r--r--test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll6
-rw-r--r--test/CodeGen/ARM/fast-isel-call.ll20
-rw-r--r--test/CodeGen/ARM/fast-isel-cmp-imm.ll6
-rw-r--r--test/CodeGen/ARM/fast-isel-conversion.ll6
-rw-r--r--test/CodeGen/ARM/fast-isel-crash.ll4
-rw-r--r--test/CodeGen/ARM/fast-isel-deadcode.ll2
-rw-r--r--test/CodeGen/ARM/fast-isel-ext.ll14
-rw-r--r--test/CodeGen/ARM/fast-isel-fold.ll18
-rw-r--r--test/CodeGen/ARM/fast-isel-frameaddr.ll8
-rw-r--r--test/CodeGen/ARM/fast-isel-icmp.ll6
-rw-r--r--test/CodeGen/ARM/fast-isel-indirectbr.ll6
-rw-r--r--test/CodeGen/ARM/fast-isel-intrinsic.ll26
-rw-r--r--test/CodeGen/ARM/fast-isel-ldr-str-arm.ll26
-rw-r--r--test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll56
-rw-r--r--test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll56
-rw-r--r--test/CodeGen/ARM/fast-isel-load-store-verify.ll10
-rw-r--r--test/CodeGen/ARM/fast-isel-mvn.ll10
-rw-r--r--test/CodeGen/ARM/fast-isel-pic.ll14
-rw-r--r--test/CodeGen/ARM/fast-isel-pred.ll24
-rw-r--r--test/CodeGen/ARM/fast-isel-redefinition.ll6
-rw-r--r--test/CodeGen/ARM/fast-isel-remat-same-constant.ll29
-rw-r--r--test/CodeGen/ARM/fast-isel-ret.ll6
-rw-r--r--test/CodeGen/ARM/fast-isel-select.ll32
-rw-r--r--test/CodeGen/ARM/fast-isel-shift-materialize.ll25
-rw-r--r--test/CodeGen/ARM/fast-isel-shifter.ll4
-rw-r--r--test/CodeGen/ARM/fast-isel-static.ll18
-rw-r--r--test/CodeGen/ARM/fast-isel-update-valuemap-for-extract.ll24
-rw-r--r--test/CodeGen/ARM/fast-isel-vaddd.ll33
-rw-r--r--test/CodeGen/ARM/fast-isel-vararg.ll20
-rw-r--r--test/CodeGen/ARM/fast-isel.ll18
-rw-r--r--test/CodeGen/ARM/fastisel-gep-promote-before-add.ll6
-rw-r--r--test/CodeGen/ARM/fcopysign.ll2
-rw-r--r--test/CodeGen/ARM/flag-crash.ll12
-rw-r--r--test/CodeGen/ARM/fnegs.ll4
-rw-r--r--test/CodeGen/ARM/fold-stack-adjust.ll22
-rw-r--r--test/CodeGen/ARM/fp.ll2
-rw-r--r--test/CodeGen/ARM/fp16-promote.ll903
-rw-r--r--test/CodeGen/ARM/fp16.ll4
-rw-r--r--test/CodeGen/ARM/fpcmp-opt.ll8
-rw-r--r--test/CodeGen/ARM/fpmem.ll10
-rw-r--r--test/CodeGen/ARM/fptoint.ll4
-rw-r--r--test/CodeGen/ARM/frame-register.ll12
-rw-r--r--test/CodeGen/ARM/fusedMAC.ll2
-rw-r--r--test/CodeGen/ARM/ghc-tcreturn-lowered.ll4
-rw-r--r--test/CodeGen/ARM/global-merge-1.ll45
-rw-r--r--test/CodeGen/ARM/global-merge-addrspace.ll4
-rw-r--r--test/CodeGen/ARM/global-merge.ll8
-rw-r--r--test/CodeGen/ARM/globals.ll2
-rw-r--r--test/CodeGen/ARM/gv-stubs-crash.ll2
-rw-r--r--test/CodeGen/ARM/half.ll14
-rw-r--r--test/CodeGen/ARM/hello.ll2
-rw-r--r--test/CodeGen/ARM/hidden-vis-2.ll2
-rw-r--r--test/CodeGen/ARM/hidden-vis-3.ll4
-rw-r--r--test/CodeGen/ARM/ifconv-kills.ll6
-rw-r--r--test/CodeGen/ARM/ifconv-regmask.ll2
-rw-r--r--test/CodeGen/ARM/ifcvt-branch-weight-bug.ll2
-rw-r--r--test/CodeGen/ARM/ifcvt-branch-weight.ll8
-rw-r--r--test/CodeGen/ARM/ifcvt-dead-def.ll55
-rw-r--r--test/CodeGen/ARM/ifcvt-iter-indbr.ll56
-rw-r--r--test/CodeGen/ARM/ifcvt-regmask-noreturn.ll45
-rw-r--r--test/CodeGen/ARM/ifcvt11.ll12
-rw-r--r--test/CodeGen/ARM/ifcvt3.ll4
-rw-r--r--test/CodeGen/ARM/ifcvt5.ll2
-rw-r--r--test/CodeGen/ARM/ifcvt6.ll2
-rw-r--r--test/CodeGen/ARM/ifcvt7.ll6
-rw-r--r--test/CodeGen/ARM/illegal-vector-bitcast.ll4
-rw-r--r--test/CodeGen/ARM/indirect-reg-input.ll2
-rw-r--r--test/CodeGen/ARM/indirectbr-2.ll10
-rw-r--r--test/CodeGen/ARM/indirectbr.ll6
-rw-r--r--test/CodeGen/ARM/inline-diagnostics.ll4
-rw-r--r--test/CodeGen/ARM/inlineasm-64bit.ll2
-rw-r--r--test/CodeGen/ARM/interrupt-attr.ll12
-rw-r--r--test/CodeGen/ARM/intrinsics-crypto.ll10
-rw-r--r--test/CodeGen/ARM/intrinsics-memory-barrier.ll6
-rw-r--r--test/CodeGen/ARM/invoke-donothing-assert.ll2
-rw-r--r--test/CodeGen/ARM/isel-v8i32-crash.ll2
-rw-r--r--test/CodeGen/ARM/krait-cpu-div-attribute.ll36
-rw-r--r--test/CodeGen/ARM/large-stack.ll2
-rw-r--r--test/CodeGen/ARM/ldm.ll16
-rw-r--r--test/CodeGen/ARM/ldr.ll18
-rw-r--r--test/CodeGen/ARM/ldr_ext.ll10
-rw-r--r--test/CodeGen/ARM/ldr_frame.ll16
-rw-r--r--test/CodeGen/ARM/ldr_post.ll4
-rw-r--r--test/CodeGen/ARM/ldr_pre.ll6
-rw-r--r--test/CodeGen/ARM/ldrd-memoper.ll4
-rw-r--r--test/CodeGen/ARM/ldrd.ll20
-rw-r--r--test/CodeGen/ARM/ldst-f32-2-i32.ll6
-rw-r--r--test/CodeGen/ARM/ldstrex-m.ll6
-rw-r--r--test/CodeGen/ARM/ldstrex.ll12
-rw-r--r--test/CodeGen/ARM/load-global.ll2
-rw-r--r--test/CodeGen/ARM/load.ll8
-rw-r--r--test/CodeGen/ARM/load_i1_select.ll2
-rw-r--r--test/CodeGen/ARM/long.ll2
-rw-r--r--test/CodeGen/ARM/longMAC.ll41
-rw-r--r--test/CodeGen/ARM/lsr-code-insertion.ll24
-rw-r--r--test/CodeGen/ARM/lsr-icmp-imm.ll4
-rw-r--r--test/CodeGen/ARM/lsr-scale-addr-mode.ll2
-rw-r--r--test/CodeGen/ARM/lsr-unfolded-offset.ll16
-rw-r--r--test/CodeGen/ARM/machine-cse-cmp.ll8
-rw-r--r--test/CodeGen/ARM/machine-licm.ll6
-rw-r--r--test/CodeGen/ARM/memcpy-inline.ll41
-rw-r--r--test/CodeGen/ARM/memfunc.ll386
-rw-r--r--test/CodeGen/ARM/memset-inline.ll4
-rw-r--r--test/CodeGen/ARM/minsize-litpools.ll4
-rw-r--r--test/CodeGen/ARM/misched-copy-arm.ll8
-rw-r--r--test/CodeGen/ARM/movcc-double.ll50
-rw-r--r--test/CodeGen/ARM/mult-alt-generic-arm.ll46
-rw-r--r--test/CodeGen/ARM/negative-offset.ll8
-rw-r--r--test/CodeGen/ARM/neon-spfp.ll2
-rw-r--r--test/CodeGen/ARM/neon-v8.1a.ll166
-rw-r--r--test/CodeGen/ARM/neon_cmp.ll4
-rw-r--r--test/CodeGen/ARM/neon_div.ll16
-rw-r--r--test/CodeGen/ARM/neon_fpconv.ll4
-rw-r--r--test/CodeGen/ARM/neon_ld1.ll8
-rw-r--r--test/CodeGen/ARM/neon_ld2.ll12
-rw-r--r--test/CodeGen/ARM/neon_spill.ll2
-rw-r--r--test/CodeGen/ARM/no-fpu.ll2
-rw-r--r--test/CodeGen/ARM/no-tail-call.ll44
-rw-r--r--test/CodeGen/ARM/none-macho.ll11
-rw-r--r--test/CodeGen/ARM/noopt-dmb-v7.ll15
-rw-r--r--test/CodeGen/ARM/nop_concat_vectors.ll2
-rw-r--r--test/CodeGen/ARM/null-streamer.ll2
-rw-r--r--test/CodeGen/ARM/odr_comdat.ll16
-rw-r--r--test/CodeGen/ARM/optimize-dmbs-v7.ll2
-rw-r--r--test/CodeGen/ARM/optselect-regclass.ll4
-rw-r--r--test/CodeGen/ARM/phi.ll6
-rw-r--r--test/CodeGen/ARM/popcnt.ll36
-rw-r--r--test/CodeGen/ARM/pr13249.ll8
-rw-r--r--test/CodeGen/ARM/pr18364-movw.ll4
-rw-r--r--test/CodeGen/ARM/pr3502.ll2
-rw-r--r--test/CodeGen/ARM/prefetch.ll6
-rw-r--r--test/CodeGen/ARM/print-memb-operand.ll12
-rw-r--r--test/CodeGen/ARM/private.ll2
-rw-r--r--test/CodeGen/ARM/reg_sequence.ll26
-rw-r--r--test/CodeGen/ARM/regpair_hint_phys.ll22
-rw-r--r--test/CodeGen/ARM/saxpy10-a9.ll88
-rw-r--r--test/CodeGen/ARM/sched-it-debug-nodes.ll88
-rw-r--r--test/CodeGen/ARM/section-name.ll2
-rw-r--r--test/CodeGen/ARM/segmented-stacks.ll2
-rw-r--r--test/CodeGen/ARM/select_xform.ll2
-rw-r--r--test/CodeGen/ARM/shifter_operand.ll10
-rw-r--r--test/CodeGen/ARM/sjlj-prepare-critical-edge.ll128
-rw-r--r--test/CodeGen/ARM/smul.ll2
-rw-r--r--test/CodeGen/ARM/space-directive.ll2
-rw-r--r--test/CodeGen/ARM/spill-q.ll2
-rw-r--r--test/CodeGen/ARM/ssp-data-layout.ll222
-rw-r--r--test/CodeGen/ARM/stack-alignment.ll120
-rw-r--r--test/CodeGen/ARM/stack-protector-bmovpcb_call.ll6
-rw-r--r--test/CodeGen/ARM/stack_guard_remat.ll2
-rw-r--r--test/CodeGen/ARM/stm.ll4
-rw-r--r--test/CodeGen/ARM/str_post.ll4
-rw-r--r--test/CodeGen/ARM/str_pre-2.ll4
-rw-r--r--test/CodeGen/ARM/str_pre.ll8
-rw-r--r--test/CodeGen/ARM/struct-byval-frame-index.ll58
-rw-r--r--test/CodeGen/ARM/struct_byval.ll21
-rw-r--r--test/CodeGen/ARM/sub-cmp-peephole.ll4
-rw-r--r--test/CodeGen/ARM/swift-atomics.ll4
-rw-r--r--test/CodeGen/ARM/swift-vldm.ll16
-rw-r--r--test/CodeGen/ARM/t2abs-killflags.ll23
-rw-r--r--test/CodeGen/ARM/tail-dup-kill-flags.ll54
-rw-r--r--test/CodeGen/ARM/tail-dup.ll16
-rw-r--r--test/CodeGen/ARM/test-sharedidx.ll40
-rw-r--r--test/CodeGen/ARM/this-return.ll10
-rw-r--r--test/CodeGen/ARM/thumb-alignment.ll54
-rw-r--r--test/CodeGen/ARM/thumb-big-stack.ll6844
-rw-r--r--test/CodeGen/ARM/thumb1-varalloc.ll36
-rw-r--r--test/CodeGen/ARM/thumb1_return_sequence.ll68
-rw-r--r--test/CodeGen/ARM/thumb2-size-reduction-internal-flags.ll173
-rw-r--r--test/CodeGen/ARM/thumb_indirect_calls.ll2
-rw-r--r--test/CodeGen/ARM/tls1.ll2
-rw-r--r--test/CodeGen/ARM/tls2.ll2
-rw-r--r--test/CodeGen/ARM/tls3.ll2
-rw-r--r--test/CodeGen/ARM/trunc_ldr.ll8
-rw-r--r--test/CodeGen/ARM/truncstore-dag-combine.ll4
-rw-r--r--test/CodeGen/ARM/twoaddrinstr.ll2
-rw-r--r--test/CodeGen/ARM/uint64tof64.ll4
-rw-r--r--test/CodeGen/ARM/umulo-32.ll2
-rw-r--r--test/CodeGen/ARM/unaligned_load_store.ll8
-rw-r--r--test/CodeGen/ARM/unaligned_load_store_vector.ll162
-rw-r--r--test/CodeGen/ARM/undef-sext.ll4
-rw-r--r--test/CodeGen/ARM/vaba.ll108
-rw-r--r--test/CodeGen/ARM/vabd.ll80
-rw-r--r--test/CodeGen/ARM/vabs.ll28
-rw-r--r--test/CodeGen/ARM/vadd.ll100
-rw-r--r--test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll4
-rw-r--r--test/CodeGen/ARM/vargs.ll4
-rw-r--r--test/CodeGen/ARM/vargs_align.ll12
-rw-r--r--test/CodeGen/ARM/vbits.ll208
-rw-r--r--test/CodeGen/ARM/vbsl-constant.ll48
-rw-r--r--test/CodeGen/ARM/vbsl.ll48
-rw-r--r--test/CodeGen/ARM/vceq.ll34
-rw-r--r--test/CodeGen/ARM/vcge.ll68
-rw-r--r--test/CodeGen/ARM/vcgt.ll72
-rw-r--r--test/CodeGen/ARM/vcnt.ll28
-rw-r--r--test/CodeGen/ARM/vcombine.ll24
-rw-r--r--test/CodeGen/ARM/vcvt-cost.ll20
-rw-r--r--test/CodeGen/ARM/vcvt-v8.ll32
-rw-r--r--test/CodeGen/ARM/vcvt.ll40
-rw-r--r--test/CodeGen/ARM/vcvt_combine.ll12
-rw-r--r--test/CodeGen/ARM/vdiv_combine.ll12
-rw-r--r--test/CodeGen/ARM/vdup.ll22
-rw-r--r--test/CodeGen/ARM/vector-DAGCombine.ll40
-rw-r--r--test/CodeGen/ARM/vector-extend-narrow.ll10
-rw-r--r--test/CodeGen/ARM/vector-load.ll253
-rw-r--r--test/CodeGen/ARM/vector-promotion.ll76
-rw-r--r--test/CodeGen/ARM/vector-spilling.ll18
-rw-r--r--test/CodeGen/ARM/vector-store.ll258
-rw-r--r--test/CodeGen/ARM/vext.ll50
-rw-r--r--test/CodeGen/ARM/vfcmp.ll44
-rw-r--r--test/CodeGen/ARM/vfp-libcalls.ll4
-rw-r--r--test/CodeGen/ARM/vfp-regs-dwarf.ll12
-rw-r--r--test/CodeGen/ARM/vfp.ll46
-rw-r--r--test/CodeGen/ARM/vget_lane.ll44
-rw-r--r--test/CodeGen/ARM/vhadd.ll96
-rw-r--r--test/CodeGen/ARM/vhsub.ll48
-rw-r--r--test/CodeGen/ARM/vicmp.ll40
-rw-r--r--test/CodeGen/ARM/vld1.ll12
-rw-r--r--test/CodeGen/ARM/vld2.ll8
-rw-r--r--test/CodeGen/ARM/vld3.ll10
-rw-r--r--test/CodeGen/ARM/vld4.ll10
-rw-r--r--test/CodeGen/ARM/vlddup.ll24
-rw-r--r--test/CodeGen/ARM/vldlane.ll96
-rw-r--r--test/CodeGen/ARM/vldm-liveness.ll16
-rw-r--r--test/CodeGen/ARM/vldm-sched-a9.ll20
-rw-r--r--test/CodeGen/ARM/vminmax.ll112
-rw-r--r--test/CodeGen/ARM/vminmaxnm.ll398
-rw-r--r--test/CodeGen/ARM/vmla.ll84
-rw-r--r--test/CodeGen/ARM/vmls.ll84
-rw-r--r--test/CodeGen/ARM/vmov.ll44
-rw-r--r--test/CodeGen/ARM/vmul.ll106
-rw-r--r--test/CodeGen/ARM/vneg.ll28
-rw-r--r--test/CodeGen/ARM/vpadal.ll48
-rw-r--r--test/CodeGen/ARM/vpadd.ll44
-rw-r--r--test/CodeGen/ARM/vpminmax.ll56
-rw-r--r--test/CodeGen/ARM/vqadd.ll64
-rw-r--r--test/CodeGen/ARM/vqdmul.ll64
-rw-r--r--test/CodeGen/ARM/vqshl.ll176
-rw-r--r--test/CodeGen/ARM/vqshrn.ll36
-rw-r--r--test/CodeGen/ARM/vqsub.ll64
-rw-r--r--test/CodeGen/ARM/vrec.ll32
-rw-r--r--test/CodeGen/ARM/vrev.ll40
-rw-r--r--test/CodeGen/ARM/vselect_imax.ll24
-rw-r--r--test/CodeGen/ARM/vshift.ll144
-rw-r--r--test/CodeGen/ARM/vshiftins.ll64
-rw-r--r--test/CodeGen/ARM/vshl.ll208
-rw-r--r--test/CodeGen/ARM/vshll.ll24
-rw-r--r--test/CodeGen/ARM/vshrn.ll18
-rw-r--r--test/CodeGen/ARM/vsra.ll128
-rw-r--r--test/CodeGen/ARM/vst1.ll34
-rw-r--r--test/CodeGen/ARM/vst2.ll38
-rw-r--r--test/CodeGen/ARM/vst3.ll36
-rw-r--r--test/CodeGen/ARM/vst4.ll36
-rw-r--r--test/CodeGen/ARM/vstlane.ll88
-rw-r--r--test/CodeGen/ARM/vsub.ll100
-rw-r--r--test/CodeGen/ARM/vtbl.ll40
-rw-r--r--test/CodeGen/ARM/vtrn.ll40
-rw-r--r--test/CodeGen/ARM/vuzp.ll32
-rw-r--r--test/CodeGen/ARM/vzip.ll32
-rw-r--r--test/CodeGen/ARM/warn-stack.ll4
-rw-r--r--test/CodeGen/ARM/weak2.ll2
-rw-r--r--test/CodeGen/ARM/wrong-t2stmia-size-opt.ll4
-rw-r--r--test/CodeGen/ARM/zextload_demandedbits.ll6
-rw-r--r--test/CodeGen/BPF/alu8.ll46
-rw-r--r--test/CodeGen/BPF/atomics.ll20
-rw-r--r--test/CodeGen/BPF/basictest.ll28
-rw-r--r--test/CodeGen/BPF/byval.ll27
-rw-r--r--test/CodeGen/BPF/cc_args.ll96
-rw-r--r--test/CodeGen/BPF/cc_ret.ll48
-rw-r--r--test/CodeGen/BPF/cmp.ll119
-rw-r--r--test/CodeGen/BPF/ex1.ll46
-rw-r--r--test/CodeGen/BPF/intrinsics.ll88
-rw-r--r--test/CodeGen/BPF/lit.local.cfg2
-rw-r--r--test/CodeGen/BPF/load.ll43
-rw-r--r--test/CodeGen/BPF/loops.ll111
-rw-r--r--test/CodeGen/BPF/many_args1.ll12
-rw-r--r--test/CodeGen/BPF/many_args2.ll15
-rw-r--r--test/CodeGen/BPF/sanity.ll117
-rw-r--r--test/CodeGen/BPF/setcc.ll99
-rw-r--r--test/CodeGen/BPF/shifts.ll101
-rw-r--r--test/CodeGen/BPF/sockex2.ll326
-rw-r--r--test/CodeGen/BPF/struct_ret1.ll17
-rw-r--r--test/CodeGen/BPF/struct_ret2.ll12
-rw-r--r--test/CodeGen/BPF/vararg1.ll9
-rw-r--r--test/CodeGen/CPP/2009-05-01-Long-Double.ll4
-rw-r--r--test/CodeGen/CPP/2009-05-04-CondBr.ll10
-rw-r--r--test/CodeGen/Generic/2003-05-28-ManyArgs.ll190
-rw-r--r--test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll10
-rw-r--r--test/CodeGen/Generic/2003-07-06-BadIntCmp.ll4
-rw-r--r--test/CodeGen/Generic/2003-07-07-BadLongConst.ll2
-rw-r--r--test/CodeGen/Generic/2003-07-08-BadCastToBool.ll2
-rw-r--r--test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll8
-rw-r--r--test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll4
-rw-r--r--test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll2
-rw-r--r--test/CodeGen/Generic/2005-12-01-Crash.ll2
-rw-r--r--test/CodeGen/Generic/2006-02-12-InsertLibcall.ll2
-rw-r--r--test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll30
-rw-r--r--test/CodeGen/Generic/2006-04-26-SetCCAnd.ll4
-rw-r--r--test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll4
-rw-r--r--test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll8
-rw-r--r--test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll8
-rw-r--r--test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll24
-rw-r--r--test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll4
-rw-r--r--test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll4
-rw-r--r--test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll4
-rw-r--r--test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll12
-rw-r--r--test/CodeGen/Generic/2008-01-30-LoadCrash.ll6
-rw-r--r--test/CodeGen/Generic/2008-02-04-Ctlz.ll2
-rw-r--r--test/CodeGen/Generic/2008-02-20-MatchingMem.ll3
-rw-r--r--test/CodeGen/Generic/2008-02-25-NegateZero.ll6
-rw-r--r--test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll2
-rw-r--r--test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll10
-rw-r--r--test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll4
-rw-r--r--test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll4
-rw-r--r--test/CodeGen/Generic/2012-06-08-APIntCrash.ll2
-rw-r--r--test/CodeGen/Generic/2014-02-05-OpaqueConstants.ll4
-rw-r--r--test/CodeGen/Generic/APIntLoadStore.ll512
-rw-r--r--test/CodeGen/Generic/ConstantExprLowering.ll2
-rw-r--r--test/CodeGen/Generic/MachineBranchProb.ll43
-rw-r--r--test/CodeGen/Generic/PBQP.ll34
-rw-r--r--test/CodeGen/Generic/add-with-overflow-128.ll4
-rw-r--r--test/CodeGen/Generic/add-with-overflow-24.ll8
-rw-r--r--test/CodeGen/Generic/add-with-overflow.ll8
-rw-r--r--test/CodeGen/Generic/annotate.ll2
-rw-r--r--test/CodeGen/Generic/badFoldGEP.ll6
-rw-r--r--test/CodeGen/Generic/badarg6.ll2
-rw-r--r--test/CodeGen/Generic/builtin-expect.ll46
-rw-r--r--test/CodeGen/Generic/cast-fp.ll28
-rw-r--r--test/CodeGen/Generic/constindices.ll30
-rw-r--r--test/CodeGen/Generic/crash.ll18
-rw-r--r--test/CodeGen/Generic/dag-combine-crash.ll2
-rw-r--r--test/CodeGen/Generic/dbg_value.ll4
-rw-r--r--test/CodeGen/Generic/empty-load-store.ll4
-rw-r--r--test/CodeGen/Generic/empty-phi.ll2
-rw-r--r--test/CodeGen/Generic/fastcall.ll2
-rw-r--r--test/CodeGen/Generic/fp-to-int-invalid.ll4
-rw-r--r--test/CodeGen/Generic/fwdtwice.ll2
-rw-r--r--test/CodeGen/Generic/global-ret0.ll2
-rw-r--r--test/CodeGen/Generic/hello.ll4
-rw-r--r--test/CodeGen/Generic/icmp-illegal.ll50
-rw-r--r--test/CodeGen/Generic/inline-asm-mem-clobber.ll6
-rw-r--r--test/CodeGen/Generic/invalid-memcpy.ll2
-rw-r--r--test/CodeGen/Generic/negintconst.ll6
-rw-r--r--test/CodeGen/Generic/overloaded-intrinsic-name.ll57
-rw-r--r--test/CodeGen/Generic/pr2625.ll4
-rw-r--r--test/CodeGen/Generic/print-add.ll8
-rw-r--r--test/CodeGen/Generic/print-arith-fp.ll56
-rw-r--r--test/CodeGen/Generic/print-arith-int.ll76
-rw-r--r--test/CodeGen/Generic/print-int.ll4
-rw-r--r--test/CodeGen/Generic/print-mul-exp.ll48
-rw-r--r--test/CodeGen/Generic/print-mul.ll16
-rw-r--r--test/CodeGen/Generic/print-shift.ll16
-rw-r--r--test/CodeGen/Generic/ptr-annotate.ll2
-rw-r--r--test/CodeGen/Generic/select.ll8
-rw-r--r--test/CodeGen/Generic/stop-after.ll2
-rw-r--r--test/CodeGen/Generic/undef-phi.ll8
-rw-r--r--test/CodeGen/Generic/v-split.ll4
-rw-r--r--test/CodeGen/Generic/vector-casts.ll14
-rw-r--r--test/CodeGen/Generic/vector-identity-shuffle.ll2
-rw-r--r--test/CodeGen/Generic/vector.ll45
-rw-r--r--test/CodeGen/Hexagon/BranchPredict.ll4
-rw-r--r--test/CodeGen/Hexagon/absaddr-store.ll6
-rw-r--r--test/CodeGen/Hexagon/absimm.ll2
-rw-r--r--test/CodeGen/Hexagon/adde.ll4
-rw-r--r--test/CodeGen/Hexagon/alu64.ll599
-rw-r--r--test/CodeGen/Hexagon/always-ext.ll9
-rw-r--r--test/CodeGen/Hexagon/block-addr.ll19
-rw-r--r--test/CodeGen/Hexagon/brev_ld.ll140
-rw-r--r--test/CodeGen/Hexagon/brev_st.ll112
-rw-r--r--test/CodeGen/Hexagon/calling-conv-2.ll13
-rw-r--r--test/CodeGen/Hexagon/calling-conv.ll73
-rw-r--r--test/CodeGen/Hexagon/cext-check.ll18
-rw-r--r--test/CodeGen/Hexagon/cext-valid-packet2.ll24
-rw-r--r--test/CodeGen/Hexagon/circ_ld.ll135
-rw-r--r--test/CodeGen/Hexagon/circ_ldd_bug.ll255
-rw-r--r--test/CodeGen/Hexagon/circ_ldw.ll18
-rw-r--r--test/CodeGen/Hexagon/circ_st.ll108
-rw-r--r--test/CodeGen/Hexagon/clr_set_toggle.ll160
-rw-r--r--test/CodeGen/Hexagon/cmp-not.ll50
-rw-r--r--test/CodeGen/Hexagon/cmp-to-predreg.ll4
-rw-r--r--test/CodeGen/Hexagon/cmp_pred.ll1
-rw-r--r--test/CodeGen/Hexagon/cmp_pred2.ll8
-rw-r--r--test/CodeGen/Hexagon/cmp_pred_reg.ll1
-rw-r--r--test/CodeGen/Hexagon/cmpb_pred.ll7
-rw-r--r--test/CodeGen/Hexagon/combine.ll4
-rw-r--r--test/CodeGen/Hexagon/combine_ir.ll14
-rw-r--r--test/CodeGen/Hexagon/convertdptoint.ll8
-rw-r--r--test/CodeGen/Hexagon/convertdptoll.ll8
-rw-r--r--test/CodeGen/Hexagon/convertsptoint.ll8
-rw-r--r--test/CodeGen/Hexagon/convertsptoll.ll8
-rw-r--r--test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll8
-rw-r--r--test/CodeGen/Hexagon/dadd.ll6
-rw-r--r--test/CodeGen/Hexagon/dmul.ll6
-rw-r--r--test/CodeGen/Hexagon/double.ll10
-rw-r--r--test/CodeGen/Hexagon/doubleconvert-ieee-rnd-near.ll8
-rw-r--r--test/CodeGen/Hexagon/dsub.ll6
-rw-r--r--test/CodeGen/Hexagon/dualstore.ll17
-rw-r--r--test/CodeGen/Hexagon/expand-condsets-basic.ll11
-rw-r--r--test/CodeGen/Hexagon/expand-condsets-rm-segment.ll131
-rw-r--r--test/CodeGen/Hexagon/expand-condsets-undef.ll28
-rw-r--r--test/CodeGen/Hexagon/extload-combine.ll12
-rw-r--r--test/CodeGen/Hexagon/fadd.ll4
-rw-r--r--test/CodeGen/Hexagon/fcmp.ll6
-rw-r--r--test/CodeGen/Hexagon/float.ll10
-rw-r--r--test/CodeGen/Hexagon/floatconvert-ieee-rnd-near.ll10
-rw-r--r--test/CodeGen/Hexagon/fmul.ll4
-rw-r--r--test/CodeGen/Hexagon/frame.ll10
-rw-r--r--test/CodeGen/Hexagon/fsub.ll4
-rw-r--r--test/CodeGen/Hexagon/fusedandshift.ll2
-rw-r--r--test/CodeGen/Hexagon/gp-plus-offset-load.ll6
-rw-r--r--test/CodeGen/Hexagon/gp-plus-offset-store.ll4
-rw-r--r--test/CodeGen/Hexagon/gp-rel.ll6
-rw-r--r--test/CodeGen/Hexagon/hwloop-cleanup.ll13
-rw-r--r--test/CodeGen/Hexagon/hwloop-const.ll4
-rw-r--r--test/CodeGen/Hexagon/hwloop-crit-edge.ll58
-rw-r--r--test/CodeGen/Hexagon/hwloop-dbg.ll56
-rw-r--r--test/CodeGen/Hexagon/hwloop-le.ll60
-rw-r--r--test/CodeGen/Hexagon/hwloop-loop1.ll68
-rw-r--r--test/CodeGen/Hexagon/hwloop-lt.ll209
-rw-r--r--test/CodeGen/Hexagon/hwloop-lt1.ll8
-rw-r--r--test/CodeGen/Hexagon/hwloop-missed.ll49
-rw-r--r--test/CodeGen/Hexagon/hwloop-ne.ll60
-rw-r--r--test/CodeGen/Hexagon/hwloop-ph-deadcode.ll23
-rw-r--r--test/CodeGen/Hexagon/hwloop-pos-ivbump1.ll45
-rw-r--r--test/CodeGen/Hexagon/hwloop-preheader.ll40
-rw-r--r--test/CodeGen/Hexagon/hwloop-range.ll36
-rw-r--r--test/CodeGen/Hexagon/hwloop-recursion.ll64
-rw-r--r--test/CodeGen/Hexagon/hwloop-wrap.ll22
-rw-r--r--test/CodeGen/Hexagon/hwloop-wrap2.ll67
-rw-r--r--test/CodeGen/Hexagon/hwloop1.ll161
-rw-r--r--test/CodeGen/Hexagon/hwloop2.ll37
-rw-r--r--test/CodeGen/Hexagon/hwloop3.ll27
-rw-r--r--test/CodeGen/Hexagon/hwloop4.ll76
-rw-r--r--test/CodeGen/Hexagon/hwloop5.ll93
-rw-r--r--test/CodeGen/Hexagon/i16_VarArg.ll18
-rw-r--r--test/CodeGen/Hexagon/i1_VarArg.ll28
-rw-r--r--test/CodeGen/Hexagon/i8_VarArg.ll18
-rw-r--r--test/CodeGen/Hexagon/idxload-with-zero-offset.ll64
-rw-r--r--test/CodeGen/Hexagon/intrinsics/alu32_alu.ll202
-rw-r--r--test/CodeGen/Hexagon/intrinsics/alu32_perm.ll104
-rw-r--r--test/CodeGen/Hexagon/intrinsics/cr.ll132
-rw-r--r--test/CodeGen/Hexagon/intrinsics/xtype_alu.ll1020
-rw-r--r--test/CodeGen/Hexagon/intrinsics/xtype_bit.ll329
-rw-r--r--test/CodeGen/Hexagon/intrinsics/xtype_complex.ll349
-rw-r--r--test/CodeGen/Hexagon/intrinsics/xtype_fp.ll388
-rw-r--r--test/CodeGen/Hexagon/intrinsics/xtype_mpy.ll1525
-rw-r--r--test/CodeGen/Hexagon/intrinsics/xtype_perm.ll252
-rw-r--r--test/CodeGen/Hexagon/intrinsics/xtype_pred.ll351
-rw-r--r--test/CodeGen/Hexagon/intrinsics/xtype_shift.ll723
-rw-r--r--test/CodeGen/Hexagon/macint.ll4
-rw-r--r--test/CodeGen/Hexagon/mem-fi-add.ll29
-rw-r--r--test/CodeGen/Hexagon/memops.ll420
-rw-r--r--test/CodeGen/Hexagon/memops1.ll16
-rw-r--r--test/CodeGen/Hexagon/memops2.ll8
-rw-r--r--test/CodeGen/Hexagon/memops3.ll8
-rw-r--r--test/CodeGen/Hexagon/misaligned-access.ll4
-rw-r--r--test/CodeGen/Hexagon/mpy.ll6
-rw-r--r--test/CodeGen/Hexagon/newvaluejump.ll4
-rw-r--r--test/CodeGen/Hexagon/newvaluejump2.ll17
-rw-r--r--test/CodeGen/Hexagon/newvaluestore.ll6
-rw-r--r--test/CodeGen/Hexagon/opt-fabs.ll2
-rw-r--r--test/CodeGen/Hexagon/opt-fneg.ll2
-rw-r--r--test/CodeGen/Hexagon/postinc-load.ll8
-rw-r--r--test/CodeGen/Hexagon/postinc-store.ll8
-rw-r--r--test/CodeGen/Hexagon/pred-absolute-store.ll5
-rw-r--r--test/CodeGen/Hexagon/pred-gp.ll4
-rw-r--r--test/CodeGen/Hexagon/pred-instrs.ll2
-rw-r--r--test/CodeGen/Hexagon/remove-endloop.ll56
-rw-r--r--test/CodeGen/Hexagon/remove_lsr.ll26
-rw-r--r--test/CodeGen/Hexagon/shrink-frame-basic.ll36
-rw-r--r--test/CodeGen/Hexagon/stack-align1.ll21
-rw-r--r--test/CodeGen/Hexagon/stack-align2.ll27
-rw-r--r--test/CodeGen/Hexagon/stack-alloca1.ll18
-rw-r--r--test/CodeGen/Hexagon/stack-alloca2.ll23
-rw-r--r--test/CodeGen/Hexagon/static.ll6
-rw-r--r--test/CodeGen/Hexagon/struct_args.ll2
-rw-r--r--test/CodeGen/Hexagon/struct_args_large.ll3
-rw-r--r--test/CodeGen/Hexagon/sube.ll4
-rw-r--r--test/CodeGen/Hexagon/tail-call-mem-intrinsics.ll31
-rw-r--r--test/CodeGen/Hexagon/tfr-to-combine.ll4
-rw-r--r--test/CodeGen/Hexagon/union-1.ll8
-rw-r--r--test/CodeGen/Hexagon/vaddh.ll4
-rw-r--r--test/CodeGen/Hexagon/validate-offset.ll14
-rw-r--r--test/CodeGen/Hexagon/vect/vect-anyextend.ll15
-rw-r--r--test/CodeGen/Hexagon/vect/vect-apint-truncate.ll27
-rw-r--r--test/CodeGen/Hexagon/vect/vect-bad-bitcast.ll61
-rw-r--r--test/CodeGen/Hexagon/vect/vect-bitcast-1.ll68
-rw-r--r--test/CodeGen/Hexagon/vect/vect-bitcast.ll56
-rw-r--r--test/CodeGen/Hexagon/vect/vect-cst-v4i32.ll29
-rw-r--r--test/CodeGen/Hexagon/vect/vect-cst-v4i8.ll30
-rw-r--r--test/CodeGen/Hexagon/vect/vect-cst.ll29
-rw-r--r--test/CodeGen/Hexagon/vect/vect-extract.ll96
-rw-r--r--test/CodeGen/Hexagon/vect/vect-fma.ll26
-rw-r--r--test/CodeGen/Hexagon/vect/vect-illegal-type.ll50
-rw-r--r--test/CodeGen/Hexagon/vect/vect-insert-extract-elt.ll71
-rw-r--r--test/CodeGen/Hexagon/vect/vect-load-1.ll26
-rw-r--r--test/CodeGen/Hexagon/vect/vect-load.ll76
-rw-r--r--test/CodeGen/Hexagon/vect/vect-loadv4i16.ll73
-rw-r--r--test/CodeGen/Hexagon/vect/vect-mul-v2i16.ll9
-rw-r--r--test/CodeGen/Hexagon/vect/vect-mul-v2i32.ll9
-rw-r--r--test/CodeGen/Hexagon/vect/vect-mul-v4i16.ll10
-rw-r--r--test/CodeGen/Hexagon/vect/vect-mul-v4i8.ll9
-rw-r--r--test/CodeGen/Hexagon/vect/vect-mul-v8i8.ll9
-rw-r--r--test/CodeGen/Hexagon/vect/vect-no-tfrs-1.ll8
-rw-r--r--test/CodeGen/Hexagon/vect/vect-no-tfrs.ll8
-rw-r--r--test/CodeGen/Hexagon/vect/vect-packhl.ll10
-rw-r--r--test/CodeGen/Hexagon/vect/vect-shift-imm.ll41
-rw-r--r--test/CodeGen/Hexagon/vect/vect-shuffle.ll47
-rw-r--r--test/CodeGen/Hexagon/vect/vect-splat.ll16
-rw-r--r--test/CodeGen/Hexagon/vect/vect-store-v2i16.ll51
-rw-r--r--test/CodeGen/Hexagon/vect/vect-truncate.ll42
-rw-r--r--test/CodeGen/Hexagon/vect/vect-vaddb-1.ll8
-rw-r--r--test/CodeGen/Hexagon/vect/vect-vaddb.ll8
-rw-r--r--test/CodeGen/Hexagon/vect/vect-vaddh-1.ll8
-rw-r--r--test/CodeGen/Hexagon/vect/vect-vaddh.ll8
-rw-r--r--test/CodeGen/Hexagon/vect/vect-vaddw.ll8
-rw-r--r--test/CodeGen/Hexagon/vect/vect-vaslw.ll33
-rw-r--r--test/CodeGen/Hexagon/vect/vect-vshifts.ll279
-rw-r--r--test/CodeGen/Hexagon/vect/vect-vsplatb.ll29
-rw-r--r--test/CodeGen/Hexagon/vect/vect-vsplath.ll29
-rw-r--r--test/CodeGen/Hexagon/vect/vect-vsubb-1.ll8
-rw-r--r--test/CodeGen/Hexagon/vect/vect-vsubb.ll8
-rw-r--r--test/CodeGen/Hexagon/vect/vect-vsubh-1.ll8
-rw-r--r--test/CodeGen/Hexagon/vect/vect-vsubh.ll8
-rw-r--r--test/CodeGen/Hexagon/vect/vect-vsubw.ll8
-rw-r--r--test/CodeGen/Hexagon/vect/vect-xor.ll38
-rw-r--r--test/CodeGen/Hexagon/vect/vect-zeroextend.ll23
-rw-r--r--test/CodeGen/Hexagon/zextloadi1.ll4
-rw-r--r--test/CodeGen/Inputs/DbgValueOtherTargets.ll26
-rw-r--r--test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll2
-rw-r--r--test/CodeGen/MSP430/2009-05-17-Rot.ll6
-rw-r--r--test/CodeGen/MSP430/2009-05-17-Shift.ll4
-rw-r--r--test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll4
-rw-r--r--test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll8
-rw-r--r--test/CodeGen/MSP430/2009-10-10-OrImpDef.ll2
-rw-r--r--test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll6
-rw-r--r--test/CodeGen/MSP430/2009-12-22-InlineAsm.ll4
-rw-r--r--test/CodeGen/MSP430/2010-05-01-CombinerAnd.ll2
-rw-r--r--test/CodeGen/MSP430/AddrMode-bis-rx.ll22
-rw-r--r--test/CodeGen/MSP430/AddrMode-bis-xr.ll24
-rw-r--r--test/CodeGen/MSP430/AddrMode-mov-rx.ll22
-rw-r--r--test/CodeGen/MSP430/AddrMode-mov-xr.ll10
-rw-r--r--test/CodeGen/MSP430/Inst16mi.ll8
-rw-r--r--test/CodeGen/MSP430/Inst16mm.ll22
-rw-r--r--test/CodeGen/MSP430/Inst16mr.ll10
-rw-r--r--test/CodeGen/MSP430/Inst16rm.ll10
-rw-r--r--test/CodeGen/MSP430/Inst8mi.ll8
-rw-r--r--test/CodeGen/MSP430/Inst8mm.ll18
-rw-r--r--test/CodeGen/MSP430/Inst8mr.ll10
-rw-r--r--test/CodeGen/MSP430/Inst8rm.ll10
-rw-r--r--test/CodeGen/MSP430/bit.ll24
-rw-r--r--test/CodeGen/MSP430/byval.ll4
-rw-r--r--test/CodeGen/MSP430/indirectbr.ll6
-rw-r--r--test/CodeGen/MSP430/indirectbr2.ll4
-rw-r--r--test/CodeGen/MSP430/inline-asm.ll4
-rw-r--r--test/CodeGen/MSP430/jumptable.ll4
-rw-r--r--test/CodeGen/MSP430/memset.ll2
-rw-r--r--test/CodeGen/MSP430/misched-msp430.ll2
-rw-r--r--test/CodeGen/MSP430/mult-alt-generic-msp430.ll46
-rw-r--r--test/CodeGen/MSP430/postinc.ll20
-rw-r--r--test/CodeGen/Mips/2008-07-03-SRet.ll6
-rw-r--r--test/CodeGen/Mips/2008-07-15-InternalConstant.ll4
-rw-r--r--test/CodeGen/Mips/2008-07-15-SmallSection.ll6
-rw-r--r--test/CodeGen/Mips/2008-08-01-AsmInline.ll12
-rw-r--r--test/CodeGen/Mips/2008-08-03-ReturnDouble.ll4
-rw-r--r--test/CodeGen/Mips/2008-10-13-LegalizerBug.ll4
-rw-r--r--test/CodeGen/Mips/2008-11-10-xint_to_fp.ll24
-rw-r--r--test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll8
-rw-r--r--test/CodeGen/Mips/2010-07-20-Switch.ll2
-rw-r--r--test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll2
-rw-r--r--test/CodeGen/Mips/Fast-ISel/br1.ll6
-rw-r--r--test/CodeGen/Mips/Fast-ISel/callabi.ll847
-rw-r--r--test/CodeGen/Mips/Fast-ISel/constexpr-address.ll18
-rw-r--r--test/CodeGen/Mips/Fast-ISel/fastalloca.ll32
-rw-r--r--test/CodeGen/Mips/Fast-ISel/fpcmpa.ll52
-rw-r--r--test/CodeGen/Mips/Fast-ISel/fpext.ll6
-rw-r--r--test/CodeGen/Mips/Fast-ISel/fpintconv.ll8
-rw-r--r--test/CodeGen/Mips/Fast-ISel/fptrunc.ll6
-rw-r--r--test/CodeGen/Mips/Fast-ISel/icmpa.ll44
-rw-r--r--test/CodeGen/Mips/Fast-ISel/loadstore2.ll14
-rw-r--r--test/CodeGen/Mips/Fast-ISel/loadstoreconv.ll38
-rw-r--r--test/CodeGen/Mips/Fast-ISel/loadstrconst.ll6
-rw-r--r--test/CodeGen/Mips/Fast-ISel/logopm.ll606
-rw-r--r--test/CodeGen/Mips/Fast-ISel/nullvoid.ll4
-rw-r--r--test/CodeGen/Mips/Fast-ISel/overflt.ll64
-rw-r--r--test/CodeGen/Mips/Fast-ISel/retabi.ll108
-rw-r--r--test/CodeGen/Mips/Fast-ISel/shftopm.ll122
-rw-r--r--test/CodeGen/Mips/Fast-ISel/shift.ll2
-rw-r--r--test/CodeGen/Mips/Fast-ISel/simplestore.ll4
-rw-r--r--test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll8
-rw-r--r--test/CodeGen/Mips/Fast-ISel/simplestorei.ll4
-rw-r--r--test/CodeGen/Mips/abiflags32.ll2
-rw-r--r--test/CodeGen/Mips/addi.ll8
-rw-r--r--test/CodeGen/Mips/addressing-mode.ll8
-rw-r--r--test/CodeGen/Mips/align16.ll8
-rw-r--r--test/CodeGen/Mips/alloca.ll34
-rw-r--r--test/CodeGen/Mips/alloca16.ll58
-rw-r--r--test/CodeGen/Mips/analyzebranch.ll2
-rw-r--r--test/CodeGen/Mips/and1.ll6
-rw-r--r--test/CodeGen/Mips/atomic.ll6
-rw-r--r--test/CodeGen/Mips/atomicops.ll12
-rw-r--r--test/CodeGen/Mips/beqzc.ll4
-rw-r--r--test/CodeGen/Mips/beqzc1.ll4
-rw-r--r--test/CodeGen/Mips/biggot.ll2
-rw-r--r--test/CodeGen/Mips/blockaddr.ll8
-rw-r--r--test/CodeGen/Mips/brconeq.ll4
-rw-r--r--test/CodeGen/Mips/brconeqk.ll2
-rw-r--r--test/CodeGen/Mips/brconeqz.ll2
-rw-r--r--test/CodeGen/Mips/brconge.ll6
-rw-r--r--test/CodeGen/Mips/brcongt.ll4
-rw-r--r--test/CodeGen/Mips/brconle.ll6
-rw-r--r--test/CodeGen/Mips/brconlt.ll4
-rw-r--r--test/CodeGen/Mips/brconne.ll4
-rw-r--r--test/CodeGen/Mips/brconnek.ll2
-rw-r--r--test/CodeGen/Mips/brconnez.ll2
-rw-r--r--test/CodeGen/Mips/brdelayslot.ll14
-rw-r--r--test/CodeGen/Mips/brind.ll14
-rw-r--r--test/CodeGen/Mips/brsize3.ll4
-rw-r--r--test/CodeGen/Mips/brsize3a.ll2
-rw-r--r--test/CodeGen/Mips/cache-intrinsic.ll8
-rw-r--r--test/CodeGen/Mips/cconv/arguments-float.ll62
-rw-r--r--test/CodeGen/Mips/cconv/arguments-fp128.ll18
-rw-r--r--test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll20
-rw-r--r--test/CodeGen/Mips/cconv/arguments-hard-float.ll56
-rw-r--r--test/CodeGen/Mips/cconv/arguments-hard-fp128.ll18
-rw-r--r--test/CodeGen/Mips/cconv/arguments-small-structures-bigger-than-32bits.ll8
-rw-r--r--test/CodeGen/Mips/cconv/arguments-struct.ll14
-rw-r--r--test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll76
-rw-r--r--test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll40
-rw-r--r--test/CodeGen/Mips/cconv/arguments-varargs-small-structs-multiple-args.ll56
-rw-r--r--test/CodeGen/Mips/cconv/arguments-varargs.ll48
-rw-r--r--test/CodeGen/Mips/cconv/arguments.ll50
-rw-r--r--test/CodeGen/Mips/cconv/callee-saved-float.ll24
-rw-r--r--test/CodeGen/Mips/cconv/callee-saved.ll24
-rw-r--r--test/CodeGen/Mips/cconv/memory-layout.ll12
-rw-r--r--test/CodeGen/Mips/cconv/reserved-space.ll12
-rw-r--r--test/CodeGen/Mips/cconv/return-float.ll20
-rw-r--r--test/CodeGen/Mips/cconv/return-hard-float.ll18
-rw-r--r--test/CodeGen/Mips/cconv/return-hard-fp128.ll10
-rw-r--r--test/CodeGen/Mips/cconv/return-hard-struct-f128.ll10
-rw-r--r--test/CodeGen/Mips/cconv/return-struct.ll22
-rw-r--r--test/CodeGen/Mips/cconv/return.ll18
-rw-r--r--test/CodeGen/Mips/cconv/stack-alignment.ll12
-rw-r--r--test/CodeGen/Mips/cfi_offset.ll6
-rw-r--r--test/CodeGen/Mips/check-adde-redundant-moves.ll6
-rw-r--r--test/CodeGen/Mips/ci2.ll4
-rwxr-xr-xtest/CodeGen/Mips/cmov.ll8
-rw-r--r--test/CodeGen/Mips/cmplarge.ll6
-rw-r--r--test/CodeGen/Mips/const1.ll2
-rw-r--r--test/CodeGen/Mips/const4a.ll4
-rw-r--r--test/CodeGen/Mips/const6.ll4
-rw-r--r--test/CodeGen/Mips/const6a.ll4
-rw-r--r--test/CodeGen/Mips/ctlz.ll2
-rw-r--r--test/CodeGen/Mips/dagcombine_crash.ll25
-rw-r--r--test/CodeGen/Mips/delay-slot-fill-forward.ll183
-rw-r--r--test/CodeGen/Mips/delay-slot-kill.ll14
-rw-r--r--test/CodeGen/Mips/disable-tail-merge.ll6
-rw-r--r--test/CodeGen/Mips/div.ll4
-rw-r--r--test/CodeGen/Mips/div_rem.ll4
-rw-r--r--test/CodeGen/Mips/divrem.ll4
-rw-r--r--test/CodeGen/Mips/divu.ll4
-rw-r--r--test/CodeGen/Mips/divu_remu.ll4
-rw-r--r--test/CodeGen/Mips/dsp-patterns.ll12
-rw-r--r--test/CodeGen/Mips/dsp-vec-load-store.ll2
-rw-r--r--test/CodeGen/Mips/eh-return32.ll2
-rw-r--r--test/CodeGen/Mips/eh-return64.ll2
-rw-r--r--test/CodeGen/Mips/eh.ll3
-rw-r--r--test/CodeGen/Mips/ehframe-indirect.ll45
-rw-r--r--test/CodeGen/Mips/emergency-spill-slot-near-fp.ll32
-rw-r--r--test/CodeGen/Mips/emit-big-cst.ll2
-rw-r--r--test/CodeGen/Mips/ex2.ll4
-rw-r--r--test/CodeGen/Mips/extins.ll2
-rw-r--r--test/CodeGen/Mips/f16abs.ll4
-rw-r--r--test/CodeGen/Mips/fastcc.ll164
-rw-r--r--test/CodeGen/Mips/fcopysign-f32-f64.ll6
-rw-r--r--test/CodeGen/Mips/fcopysign.ll6
-rw-r--r--test/CodeGen/Mips/fixdfsf.ll2
-rw-r--r--test/CodeGen/Mips/fmadd1.ll12
-rw-r--r--test/CodeGen/Mips/fp-indexed-ls.ll44
-rw-r--r--test/CodeGen/Mips/fp-spill-reload.ll30
-rw-r--r--test/CodeGen/Mips/fp16-promote.ll98
-rw-r--r--test/CodeGen/Mips/fp16instrinsmc.ll60
-rw-r--r--test/CodeGen/Mips/fp16static.ll4
-rw-r--r--test/CodeGen/Mips/fpbr.ll24
-rw-r--r--test/CodeGen/Mips/fpneeded.ll6
-rw-r--r--test/CodeGen/Mips/fpnotneeded.ll2
-rw-r--r--test/CodeGen/Mips/fpxx.ll8
-rw-r--r--test/CodeGen/Mips/global-address.ll12
-rw-r--r--test/CodeGen/Mips/gpreg-lazy-binding.ll2
-rw-r--r--test/CodeGen/Mips/gprestore.ll8
-rw-r--r--test/CodeGen/Mips/helloworld.ll2
-rw-r--r--test/CodeGen/Mips/hf16_1.ll80
-rw-r--r--test/CodeGen/Mips/hf16call32.ll642
-rw-r--r--test/CodeGen/Mips/hf16call32_body.ll54
-rw-r--r--test/CodeGen/Mips/hf1_body.ll2
-rw-r--r--test/CodeGen/Mips/hfptrcall.ll56
-rw-r--r--test/CodeGen/Mips/i32k.ll4
-rw-r--r--test/CodeGen/Mips/inlineasm-assembler-directives.ll2
-rw-r--r--test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll2
-rw-r--r--test/CodeGen/Mips/inlineasm-operand-code.ll6
-rw-r--r--test/CodeGen/Mips/inlineasm64.ll6
-rw-r--r--test/CodeGen/Mips/inlineasm_constraint.ll9
-rw-r--r--test/CodeGen/Mips/inlineasm_constraint_R.ll60
-rw-r--r--test/CodeGen/Mips/inlineasm_constraint_ZC.ll167
-rw-r--r--test/CodeGen/Mips/inlineasm_constraint_m.ll61
-rw-r--r--test/CodeGen/Mips/inlineasmmemop.ll15
-rw-r--r--test/CodeGen/Mips/insn-zero-size-bb.ll27
-rw-r--r--test/CodeGen/Mips/internalfunc.ll10
-rw-r--r--test/CodeGen/Mips/jtstat.ll2
-rw-r--r--test/CodeGen/Mips/l3mc.ll52
-rw-r--r--test/CodeGen/Mips/largeimm1.ll2
-rw-r--r--test/CodeGen/Mips/largeimmprinting.ll8
-rw-r--r--test/CodeGen/Mips/lb1.ll6
-rw-r--r--test/CodeGen/Mips/lbu1.ll6
-rw-r--r--test/CodeGen/Mips/lcb2.ll20
-rw-r--r--test/CodeGen/Mips/lcb3c.ll6
-rw-r--r--test/CodeGen/Mips/lcb4a.ll6
-rw-r--r--test/CodeGen/Mips/lcb5.ll34
-rw-r--r--test/CodeGen/Mips/lh1.ll6
-rw-r--r--test/CodeGen/Mips/lhu1.ll6
-rw-r--r--test/CodeGen/Mips/llcarry.ll10
-rw-r--r--test/CodeGen/Mips/llvm-ir/add.ll8
-rw-r--r--test/CodeGen/Mips/llvm-ir/and.ll13
-rw-r--r--test/CodeGen/Mips/llvm-ir/ashr.ll93
-rw-r--r--test/CodeGen/Mips/llvm-ir/call.ll4
-rw-r--r--test/CodeGen/Mips/llvm-ir/indirectbr.ll4
-rw-r--r--test/CodeGen/Mips/llvm-ir/lshr.ll93
-rw-r--r--test/CodeGen/Mips/llvm-ir/mul.ll58
-rw-r--r--test/CodeGen/Mips/llvm-ir/or.ll14
-rw-r--r--test/CodeGen/Mips/llvm-ir/ret.ll4
-rw-r--r--test/CodeGen/Mips/llvm-ir/sdiv.ll28
-rw-r--r--test/CodeGen/Mips/llvm-ir/select.ll24
-rw-r--r--test/CodeGen/Mips/llvm-ir/shl.ll77
-rw-r--r--test/CodeGen/Mips/llvm-ir/srem.ll30
-rw-r--r--test/CodeGen/Mips/llvm-ir/sub.ll8
-rw-r--r--test/CodeGen/Mips/llvm-ir/udiv.ll8
-rw-r--r--test/CodeGen/Mips/llvm-ir/urem.ll38
-rw-r--r--test/CodeGen/Mips/llvm-ir/xor.ll13
-rw-r--r--test/CodeGen/Mips/load-store-left-right.ll42
-rw-r--r--test/CodeGen/Mips/longbranch.ll9
-rw-r--r--test/CodeGen/Mips/machineverifier.ll2
-rw-r--r--test/CodeGen/Mips/mbrsize4a.ll6
-rw-r--r--test/CodeGen/Mips/memcpy.ll6
-rw-r--r--test/CodeGen/Mips/micromips-addiu.ll18
-rw-r--r--test/CodeGen/Mips/micromips-addu16.ll18
-rw-r--r--test/CodeGen/Mips/micromips-and16.ll18
-rw-r--r--test/CodeGen/Mips/micromips-andi.ll12
-rw-r--r--test/CodeGen/Mips/micromips-compact-branches.ll2
-rw-r--r--test/CodeGen/Mips/micromips-compact-jump.ll11
-rw-r--r--test/CodeGen/Mips/micromips-delay-slot-jr.ll16
-rw-r--r--test/CodeGen/Mips/micromips-delay-slot.ll2
-rw-r--r--test/CodeGen/Mips/micromips-gp-rc.ll18
-rw-r--r--test/CodeGen/Mips/micromips-jal.ll10
-rw-r--r--test/CodeGen/Mips/micromips-load-effective-address.ll8
-rw-r--r--test/CodeGen/Mips/micromips-not16.ll26
-rw-r--r--test/CodeGen/Mips/micromips-or16.ll18
-rw-r--r--test/CodeGen/Mips/micromips-rdhwr-directives.ll2
-rw-r--r--test/CodeGen/Mips/micromips-shift.ll8
-rw-r--r--test/CodeGen/Mips/micromips-subu16.ll18
-rw-r--r--test/CodeGen/Mips/micromips-sw-lw-16.ll27
-rw-r--r--test/CodeGen/Mips/micromips-xor16.ll18
-rw-r--r--test/CodeGen/Mips/mips16_32_8.ll16
-rw-r--r--test/CodeGen/Mips/mips16_fpret.ll24
-rw-r--r--test/CodeGen/Mips/mips16ex.ll22
-rw-r--r--test/CodeGen/Mips/mips16fpe.ll112
-rw-r--r--test/CodeGen/Mips/mips64-f128-call.ll4
-rw-r--r--test/CodeGen/Mips/mips64-f128.ll80
-rw-r--r--test/CodeGen/Mips/mips64-libcall.ll2
-rw-r--r--test/CodeGen/Mips/mips64-sret.ll2
-rw-r--r--test/CodeGen/Mips/mips64directive.ll6
-rw-r--r--test/CodeGen/Mips/mips64ext.ll4
-rw-r--r--test/CodeGen/Mips/mips64extins.ll2
-rw-r--r--test/CodeGen/Mips/mips64fpimm0.ll4
-rw-r--r--test/CodeGen/Mips/mips64fpldst.ll16
-rw-r--r--test/CodeGen/Mips/mips64instrs.ll8
-rw-r--r--test/CodeGen/Mips/mips64intldst.ll30
-rw-r--r--test/CodeGen/Mips/mips64shift.ll6
-rw-r--r--test/CodeGen/Mips/mips64signextendsesf.ll40
-rw-r--r--test/CodeGen/Mips/mips64sinttofpsf.ll4
-rw-r--r--test/CodeGen/Mips/mipslopat.ll4
-rw-r--r--test/CodeGen/Mips/misha.ll12
-rw-r--r--test/CodeGen/Mips/mno-ldc1-sdc1.ll8
-rw-r--r--test/CodeGen/Mips/msa/2r.ll24
-rw-r--r--test/CodeGen/Mips/msa/2r_vector_scalar.ll8
-rw-r--r--test/CodeGen/Mips/msa/2rf.ll32
-rw-r--r--test/CodeGen/Mips/msa/2rf_exup.ll8
-rw-r--r--test/CodeGen/Mips/msa/2rf_float_int.ll8
-rw-r--r--test/CodeGen/Mips/msa/2rf_fq.ll8
-rw-r--r--test/CodeGen/Mips/msa/2rf_int_float.ll20
-rw-r--r--test/CodeGen/Mips/msa/2rf_tq.ll8
-rw-r--r--test/CodeGen/Mips/msa/3r-a.ll192
-rw-r--r--test/CodeGen/Mips/msa/3r-b.ll96
-rw-r--r--test/CodeGen/Mips/msa/3r-c.ll80
-rw-r--r--test/CodeGen/Mips/msa/3r-d.ll88
-rw-r--r--test/CodeGen/Mips/msa/3r-i.ll64
-rw-r--r--test/CodeGen/Mips/msa/3r-m.ll160
-rw-r--r--test/CodeGen/Mips/msa/3r-p.ll32
-rw-r--r--test/CodeGen/Mips/msa/3r-s.ll248
-rw-r--r--test/CodeGen/Mips/msa/3r-v.ll24
-rw-r--r--test/CodeGen/Mips/msa/3r_4r.ll48
-rw-r--r--test/CodeGen/Mips/msa/3r_4r_widen.ll72
-rw-r--r--test/CodeGen/Mips/msa/3r_splat.ll8
-rw-r--r--test/CodeGen/Mips/msa/3rf.ll96
-rw-r--r--test/CodeGen/Mips/msa/3rf_4rf.ll24
-rw-r--r--test/CodeGen/Mips/msa/3rf_4rf_q.ll48
-rw-r--r--test/CodeGen/Mips/msa/3rf_exdo.ll8
-rw-r--r--test/CodeGen/Mips/msa/3rf_float_int.ll8
-rw-r--r--test/CodeGen/Mips/msa/3rf_int_float.ll176
-rw-r--r--test/CodeGen/Mips/msa/3rf_q.ll16
-rw-r--r--test/CodeGen/Mips/msa/arithmetic.ll176
-rw-r--r--test/CodeGen/Mips/msa/arithmetic_float.ll88
-rw-r--r--test/CodeGen/Mips/msa/basic_operations.ll825
-rw-r--r--test/CodeGen/Mips/msa/basic_operations_float.ll305
-rw-r--r--test/CodeGen/Mips/msa/bit.ll56
-rw-r--r--test/CodeGen/Mips/msa/bitcast.ll98
-rw-r--r--test/CodeGen/Mips/msa/bitwise.ll310
-rw-r--r--test/CodeGen/Mips/msa/compare.ll408
-rw-r--r--test/CodeGen/Mips/msa/compare_float.ll156
-rw-r--r--test/CodeGen/Mips/msa/elm_copy.ll16
-rw-r--r--test/CodeGen/Mips/msa/elm_insv.ll32
-rw-r--r--test/CodeGen/Mips/msa/elm_move.ll2
-rw-r--r--test/CodeGen/Mips/msa/elm_shift_slide.ll24
-rw-r--r--test/CodeGen/Mips/msa/frameindex.ll58
-rw-r--r--test/CodeGen/Mips/msa/i10.ll8
-rw-r--r--test/CodeGen/Mips/msa/i5-a.ll8
-rw-r--r--test/CodeGen/Mips/msa/i5-b.ll56
-rw-r--r--test/CodeGen/Mips/msa/i5-c.ll40
-rw-r--r--test/CodeGen/Mips/msa/i5-m.ll32
-rw-r--r--test/CodeGen/Mips/msa/i5-s.ll8
-rw-r--r--test/CodeGen/Mips/msa/i5_ld_st.ll8
-rw-r--r--test/CodeGen/Mips/msa/i8.ll26
-rw-r--r--test/CodeGen/Mips/msa/inline-asm.ll4
-rw-r--r--test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll22
-rw-r--r--test/CodeGen/Mips/msa/llvm-stress-s1935737938.ll22
-rw-r--r--test/CodeGen/Mips/msa/llvm-stress-s2704903805.ll22
-rw-r--r--test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll22
-rw-r--r--test/CodeGen/Mips/msa/llvm-stress-s3926023935.ll22
-rw-r--r--test/CodeGen/Mips/msa/llvm-stress-s3997499501.ll22
-rw-r--r--test/CodeGen/Mips/msa/llvm-stress-s525530439.ll22
-rw-r--r--test/CodeGen/Mips/msa/llvm-stress-s997348632.ll22
-rw-r--r--test/CodeGen/Mips/msa/shuffle.ll1080
-rw-r--r--test/CodeGen/Mips/msa/spill.ll536
-rw-r--r--test/CodeGen/Mips/msa/vec.ll184
-rw-r--r--test/CodeGen/Mips/msa/vecs10.ll4
-rw-r--r--test/CodeGen/Mips/mul.ll4
-rw-r--r--test/CodeGen/Mips/mulll.ll4
-rw-r--r--test/CodeGen/Mips/mulull.ll4
-rw-r--r--test/CodeGen/Mips/nacl-align.ll4
-rw-r--r--test/CodeGen/Mips/nacl-branch-delay.ll2
-rw-r--r--test/CodeGen/Mips/nacl-reserved-regs.ll32
-rw-r--r--test/CodeGen/Mips/named-register-n32.ll2
-rw-r--r--test/CodeGen/Mips/neg1.ll4
-rw-r--r--test/CodeGen/Mips/no-odd-spreg-msa.ll8
-rw-r--r--test/CodeGen/Mips/nomips16.ll4
-rw-r--r--test/CodeGen/Mips/not1.ll4
-rw-r--r--test/CodeGen/Mips/o32_cc_byval.ll46
-rw-r--r--test/CodeGen/Mips/o32_cc_vararg.ll20
-rw-r--r--test/CodeGen/Mips/octeon.ll138
-rw-r--r--test/CodeGen/Mips/optimize-pic-o0.ll6
-rw-r--r--test/CodeGen/Mips/or1.ll6
-rw-r--r--test/CodeGen/Mips/prevent-hoisting.ll32
-rw-r--r--test/CodeGen/Mips/private.ll2
-rw-r--r--test/CodeGen/Mips/ra-allocatable.ll242
-rw-r--r--test/CodeGen/Mips/rdhwr-directives.ll2
-rw-r--r--test/CodeGen/Mips/rem.ll4
-rw-r--r--test/CodeGen/Mips/remat-immed-load.ll4
-rw-r--r--test/CodeGen/Mips/remu.ll4
-rw-r--r--test/CodeGen/Mips/return-vector.ll12
-rw-r--r--test/CodeGen/Mips/s2rem.ll4
-rw-r--r--test/CodeGen/Mips/sb1.ll8
-rw-r--r--test/CodeGen/Mips/sel1c.ll6
-rw-r--r--test/CodeGen/Mips/sel2c.ll6
-rw-r--r--test/CodeGen/Mips/selTBteqzCmpi.ll6
-rw-r--r--test/CodeGen/Mips/selTBtnezCmpi.ll6
-rw-r--r--test/CodeGen/Mips/selTBtnezSlti.ll6
-rw-r--r--test/CodeGen/Mips/select.ll12
-rw-r--r--test/CodeGen/Mips/seleq.ll32
-rw-r--r--test/CodeGen/Mips/seleqk.ll24
-rw-r--r--test/CodeGen/Mips/selgek.ll24
-rw-r--r--test/CodeGen/Mips/selgt.ll34
-rw-r--r--test/CodeGen/Mips/selle.ll32
-rw-r--r--test/CodeGen/Mips/selltk.ll24
-rw-r--r--test/CodeGen/Mips/selne.ll32
-rw-r--r--test/CodeGen/Mips/selnek.ll40
-rw-r--r--test/CodeGen/Mips/selpat.ll136
-rw-r--r--test/CodeGen/Mips/seteq.ll4
-rw-r--r--test/CodeGen/Mips/seteqz.ll4
-rw-r--r--test/CodeGen/Mips/setge.ll6
-rw-r--r--test/CodeGen/Mips/setgek.ll2
-rw-r--r--test/CodeGen/Mips/setle.ll6
-rw-r--r--test/CodeGen/Mips/setlt.ll4
-rw-r--r--test/CodeGen/Mips/setltk.ll2
-rw-r--r--test/CodeGen/Mips/setne.ll4
-rw-r--r--test/CodeGen/Mips/setuge.ll6
-rw-r--r--test/CodeGen/Mips/setugt.ll4
-rw-r--r--test/CodeGen/Mips/setule.ll6
-rw-r--r--test/CodeGen/Mips/setult.ll4
-rw-r--r--test/CodeGen/Mips/setultk.ll2
-rw-r--r--test/CodeGen/Mips/sh1.ll8
-rw-r--r--test/CodeGen/Mips/simplebr.ll4
-rw-r--r--test/CodeGen/Mips/sitofp-selectcc-opt.ll2
-rw-r--r--test/CodeGen/Mips/sll1.ll6
-rw-r--r--test/CodeGen/Mips/sll2.ll8
-rw-r--r--test/CodeGen/Mips/small-section-reserve-gp.ll2
-rw-r--r--test/CodeGen/Mips/spill-copy-acreg.ll6
-rw-r--r--test/CodeGen/Mips/sr1.ll8
-rw-r--r--test/CodeGen/Mips/sra1.ll4
-rw-r--r--test/CodeGen/Mips/sra2.ll6
-rw-r--r--test/CodeGen/Mips/srl1.ll6
-rw-r--r--test/CodeGen/Mips/srl2.ll8
-rw-r--r--test/CodeGen/Mips/stackcoloring.ll8
-rw-r--r--test/CodeGen/Mips/start-asm-file.ll16
-rw-r--r--test/CodeGen/Mips/stchar.ll40
-rw-r--r--test/CodeGen/Mips/stldst.ll20
-rw-r--r--test/CodeGen/Mips/sub1.ll4
-rw-r--r--test/CodeGen/Mips/sub2.ll6
-rw-r--r--test/CodeGen/Mips/swzero.ll2
-rw-r--r--test/CodeGen/Mips/tail16.ll2
-rw-r--r--test/CodeGen/Mips/tailcall.ll24
-rw-r--r--test/CodeGen/Mips/tls.ll6
-rw-r--r--test/CodeGen/Mips/tls16.ll2
-rw-r--r--test/CodeGen/Mips/tls16_2.ll2
-rw-r--r--test/CodeGen/Mips/uitofp.ll2
-rw-r--r--test/CodeGen/Mips/ul1.ll2
-rw-r--r--test/CodeGen/Mips/unalignedload.ll2
-rw-r--r--test/CodeGen/Mips/vector-load-store.ll4
-rw-r--r--test/CodeGen/Mips/vector-setcc.ll4
-rw-r--r--test/CodeGen/Mips/xor1.ll6
-rw-r--r--test/CodeGen/Mips/zeroreg.ll8
-rw-r--r--test/CodeGen/NVPTX/access-non-generic.ll20
-rw-r--r--test/CodeGen/NVPTX/addrspacecast-gvar.ll4
-rw-r--r--test/CodeGen/NVPTX/addrspacecast.ll16
-rw-r--r--test/CodeGen/NVPTX/bug21465.ll4
-rw-r--r--test/CodeGen/NVPTX/bug22246.ll14
-rw-r--r--test/CodeGen/NVPTX/bug22322.ll62
-rw-r--r--test/CodeGen/NVPTX/call-with-alloca-buffer.ll22
-rw-r--r--test/CodeGen/NVPTX/fp16.ll8
-rw-r--r--test/CodeGen/NVPTX/function-align.ll7
-rw-r--r--test/CodeGen/NVPTX/generic-to-nvvm.ll4
-rw-r--r--test/CodeGen/NVPTX/half.ll14
-rw-r--r--test/CodeGen/NVPTX/i1-global.ll2
-rw-r--r--test/CodeGen/NVPTX/i8-param.ll2
-rw-r--r--test/CodeGen/NVPTX/ld-addrspace.ll36
-rw-r--r--test/CodeGen/NVPTX/ld-generic.ll12
-rw-r--r--test/CodeGen/NVPTX/ldu-reg-plus-offset.ll4
-rw-r--r--test/CodeGen/NVPTX/load-sext-i1.ll4
-rw-r--r--test/CodeGen/NVPTX/machine-sink.ll4
-rw-r--r--test/CodeGen/NVPTX/misaligned-vector-ldst.ll8
-rw-r--r--test/CodeGen/NVPTX/noduplicate-syncthreads.ll28
-rw-r--r--test/CodeGen/NVPTX/nounroll.ll37
-rw-r--r--test/CodeGen/NVPTX/nvvm-reflect.ll35
-rw-r--r--test/CodeGen/NVPTX/pr13291-i1-store.ll2
-rw-r--r--test/CodeGen/NVPTX/pr16278.ll2
-rw-r--r--test/CodeGen/NVPTX/pr17529.ll6
-rw-r--r--test/CodeGen/NVPTX/ptx-version-30.ll6
-rw-r--r--test/CodeGen/NVPTX/ptx-version-31.ll6
-rw-r--r--test/CodeGen/NVPTX/refl1.ll2
-rw-r--r--test/CodeGen/NVPTX/sched1.ll16
-rw-r--r--test/CodeGen/NVPTX/sched2.ll16
-rw-r--r--test/CodeGen/NVPTX/shift-parts.ll8
-rw-r--r--test/CodeGen/NVPTX/simple-call.ll2
-rw-r--r--test/CodeGen/NVPTX/sm-version-30.ll1
-rw-r--r--test/CodeGen/NVPTX/sm-version-32.ll7
-rw-r--r--test/CodeGen/NVPTX/sm-version-35.ll1
-rw-r--r--test/CodeGen/NVPTX/sm-version-37.ll7
-rw-r--r--test/CodeGen/NVPTX/sm-version-50.ll7
-rw-r--r--test/CodeGen/NVPTX/sm-version-52.ll7
-rw-r--r--test/CodeGen/NVPTX/sm-version-53.ll7
-rw-r--r--test/CodeGen/NVPTX/symbol-naming.ll2
-rw-r--r--test/CodeGen/NVPTX/vector-compare.ll4
-rw-r--r--test/CodeGen/NVPTX/vector-loads.ll12
-rw-r--r--test/CodeGen/NVPTX/vector-select.ll6
-rw-r--r--test/CodeGen/NVPTX/weak-global.ll2
-rw-r--r--test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll2
-rw-r--r--test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll4
-rw-r--r--test/CodeGen/PowerPC/2006-04-05-splat-ish.ll2
-rw-r--r--test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll28
-rw-r--r--test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll12
-rw-r--r--test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll2
-rw-r--r--test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll2
-rw-r--r--test/CodeGen/PowerPC/2006-10-13-Miscompile.ll2
-rw-r--r--test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll2
-rw-r--r--test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll2
-rw-r--r--test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll2
-rw-r--r--test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll12
-rw-r--r--test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll4
-rw-r--r--test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll2
-rw-r--r--test/CodeGen/PowerPC/2007-03-24-cntlzd.ll2
-rw-r--r--test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll1594
-rw-r--r--test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll2
-rw-r--r--test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll2
-rw-r--r--test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll2
-rw-r--r--test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll26
-rw-r--r--test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll2
-rw-r--r--test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll2
-rw-r--r--test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll4
-rw-r--r--test/CodeGen/PowerPC/2007-09-08-unaligned.ll28
-rw-r--r--test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll4
-rw-r--r--test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll12
-rw-r--r--test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll14
-rw-r--r--test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll4
-rw-r--r--test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll2
-rw-r--r--test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll2
-rw-r--r--test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll2
-rw-r--r--test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll2
-rw-r--r--test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll4
-rw-r--r--test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll4
-rw-r--r--test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll2
-rw-r--r--test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll14
-rw-r--r--test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll2
-rw-r--r--test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll2
-rw-r--r--test/CodeGen/PowerPC/2008-07-15-Bswap.ll98
-rw-r--r--test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll2
-rw-r--r--test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll2
-rw-r--r--test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll110
-rw-r--r--test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll2
-rw-r--r--test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll10
-rw-r--r--test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll2
-rw-r--r--test/CodeGen/PowerPC/2009-03-17-LSRBug.ll6
-rw-r--r--test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll4
-rw-r--r--test/CodeGen/PowerPC/2009-11-15-ProcImpDefsBug.ll2
-rw-r--r--test/CodeGen/PowerPC/2010-02-12-saveCR.ll8
-rw-r--r--test/CodeGen/PowerPC/2010-03-09-indirect-call.ll4
-rw-r--r--test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll4
-rw-r--r--test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll70
-rw-r--r--test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll72
-rw-r--r--test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll2
-rw-r--r--test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll8
-rw-r--r--test/CodeGen/PowerPC/2013-05-15-preinc-fold.ll8
-rw-r--r--test/CodeGen/PowerPC/2013-07-01-PHIElimBug.ll6
-rw-r--r--test/CodeGen/PowerPC/Atomics-64.ll160
-rw-r--r--test/CodeGen/PowerPC/MergeConsecutiveStores.ll68
-rw-r--r--test/CodeGen/PowerPC/a2-fp-basic.ll28
-rw-r--r--test/CodeGen/PowerPC/add-fi.ll4
-rw-r--r--test/CodeGen/PowerPC/addi-licm.ll19
-rw-r--r--test/CodeGen/PowerPC/addi-reassoc.ll8
-rw-r--r--test/CodeGen/PowerPC/alias.ll12
-rw-r--r--test/CodeGen/PowerPC/and-branch.ll2
-rw-r--r--test/CodeGen/PowerPC/and-elim.ll2
-rw-r--r--test/CodeGen/PowerPC/anon_aggr.ll42
-rw-r--r--test/CodeGen/PowerPC/asm-constraints.ll4
-rw-r--r--test/CodeGen/PowerPC/atomic-2.ll60
-rw-r--r--test/CodeGen/PowerPC/atomics-fences.ll11
-rw-r--r--test/CodeGen/PowerPC/atomics-indexed.ll37
-rw-r--r--test/CodeGen/PowerPC/atomics.ll40
-rw-r--r--test/CodeGen/PowerPC/bdzlr.ll6
-rw-r--r--test/CodeGen/PowerPC/bperm.ll32
-rw-r--r--test/CodeGen/PowerPC/branch-opt.ll10
-rw-r--r--test/CodeGen/PowerPC/bswap-load-store.ll18
-rw-r--r--test/CodeGen/PowerPC/buildvec_canonicalize.ll4
-rw-r--r--test/CodeGen/PowerPC/byval-aliased.ll4
-rw-r--r--test/CodeGen/PowerPC/cmpb-ppc32.ll2
-rw-r--r--test/CodeGen/PowerPC/cmpb.ll6
-rw-r--r--test/CodeGen/PowerPC/code-align.ll28
-rw-r--r--test/CodeGen/PowerPC/compare-simm.ll6
-rw-r--r--test/CodeGen/PowerPC/complex-return.ll36
-rw-r--r--test/CodeGen/PowerPC/cr-spills.ll168
-rw-r--r--test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll4
-rw-r--r--test/CodeGen/PowerPC/cr1eq.ll4
-rw-r--r--test/CodeGen/PowerPC/cr_spilling.ll2
-rw-r--r--test/CodeGen/PowerPC/crbit-asm.ll6
-rw-r--r--test/CodeGen/PowerPC/crbits.ll4
-rw-r--r--test/CodeGen/PowerPC/crsave.ll4
-rw-r--r--test/CodeGen/PowerPC/crypto_bifs.ll275
-rw-r--r--test/CodeGen/PowerPC/ctrloop-cpsgn.ll4
-rw-r--r--test/CodeGen/PowerPC/ctrloop-fp64.ll10
-rw-r--r--test/CodeGen/PowerPC/ctrloop-i64.ll16
-rw-r--r--test/CodeGen/PowerPC/ctrloop-le.ll60
-rw-r--r--test/CodeGen/PowerPC/ctrloop-lt.ll60
-rw-r--r--test/CodeGen/PowerPC/ctrloop-ne.ll60
-rw-r--r--test/CodeGen/PowerPC/ctrloop-s000.ll98
-rw-r--r--test/CodeGen/PowerPC/ctrloop-sh.ll12
-rw-r--r--test/CodeGen/PowerPC/ctrloop-sums.ll14
-rw-r--r--test/CodeGen/PowerPC/ctrloops.ll6
-rw-r--r--test/CodeGen/PowerPC/cttz-ctlz-spec.ll41
-rw-r--r--test/CodeGen/PowerPC/cttz.ll2
-rw-r--r--test/CodeGen/PowerPC/dbg.ll36
-rw-r--r--test/CodeGen/PowerPC/dcbt-sched.ll4
-rw-r--r--test/CodeGen/PowerPC/delete-node.ll6
-rw-r--r--test/CodeGen/PowerPC/div-e-32.ll31
-rw-r--r--test/CodeGen/PowerPC/div-e-all.ll54
-rw-r--r--test/CodeGen/PowerPC/dyn-alloca-aligned.ll10
-rw-r--r--test/CodeGen/PowerPC/early-ret.ll35
-rw-r--r--test/CodeGen/PowerPC/ec-input.ll155
-rw-r--r--test/CodeGen/PowerPC/empty-functions.ll10
-rw-r--r--test/CodeGen/PowerPC/emptystruct.ll2
-rw-r--r--test/CodeGen/PowerPC/eqv-andc-orc-nor.ll8
-rw-r--r--test/CodeGen/PowerPC/extra-toc-reg-deps.ll430
-rw-r--r--test/CodeGen/PowerPC/f32-to-i64.ll23
-rw-r--r--test/CodeGen/PowerPC/fast-isel-GEP-coalesce.ll18
-rw-r--r--test/CodeGen/PowerPC/fast-isel-binary.ll2
-rw-r--r--test/CodeGen/PowerPC/fast-isel-br-const.ll2
-rw-r--r--test/CodeGen/PowerPC/fast-isel-call.ll6
-rw-r--r--test/CodeGen/PowerPC/fast-isel-cmp-imm.ll6
-rw-r--r--test/CodeGen/PowerPC/fast-isel-const.ll2
-rw-r--r--test/CodeGen/PowerPC/fast-isel-conversion-p5.ll2
-rw-r--r--test/CodeGen/PowerPC/fast-isel-conversion.ll16
-rw-r--r--test/CodeGen/PowerPC/fast-isel-crash.ll2
-rw-r--r--test/CodeGen/PowerPC/fast-isel-ext.ll6
-rw-r--r--test/CodeGen/PowerPC/fast-isel-fold.ll28
-rw-r--r--test/CodeGen/PowerPC/fast-isel-icmp-split.ll72
-rw-r--r--test/CodeGen/PowerPC/fast-isel-indirectbr.ll2
-rw-r--r--test/CodeGen/PowerPC/fast-isel-load-store-vsx.ll28
-rw-r--r--test/CodeGen/PowerPC/fast-isel-load-store.ll26
-rw-r--r--test/CodeGen/PowerPC/fast-isel-redefinition.ll6
-rw-r--r--test/CodeGen/PowerPC/fast-isel-ret.ll4
-rw-r--r--test/CodeGen/PowerPC/fast-isel-shifter.ll2
-rw-r--r--test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll6
-rw-r--r--test/CodeGen/PowerPC/floatPSA.ll30
-rw-r--r--test/CodeGen/PowerPC/flt-preinc.ll40
-rw-r--r--test/CodeGen/PowerPC/fma-assoc.ll196
-rw-r--r--test/CodeGen/PowerPC/fma-ext.ll22
-rw-r--r--test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll426
-rw-r--r--test/CodeGen/PowerPC/fp-to-int-ext.ll8
-rw-r--r--test/CodeGen/PowerPC/frounds.ll4
-rw-r--r--test/CodeGen/PowerPC/glob-comp-aa-crash.ll16
-rw-r--r--test/CodeGen/PowerPC/hello.ll2
-rw-r--r--test/CodeGen/PowerPC/hidden-vis-2.ll4
-rw-r--r--test/CodeGen/PowerPC/hidden-vis.ll2
-rw-r--r--test/CodeGen/PowerPC/htm.ll125
-rw-r--r--test/CodeGen/PowerPC/i64_fp_round.ll2
-rw-r--r--test/CodeGen/PowerPC/ia-mem-r0.ll44
-rw-r--r--test/CodeGen/PowerPC/indexed-load.ll4
-rw-r--r--test/CodeGen/PowerPC/indirectbr.ll6
-rw-r--r--test/CodeGen/PowerPC/inlineasm-i64-reg.ll20
-rw-r--r--test/CodeGen/PowerPC/isel-rc-nox0.ll4
-rw-r--r--test/CodeGen/PowerPC/lbz-from-ld-shift.ll2
-rw-r--r--test/CodeGen/PowerPC/lbzux.ll8
-rw-r--r--test/CodeGen/PowerPC/ld-st-upd.ll4
-rw-r--r--test/CodeGen/PowerPC/ldtoc-inv.ll39
-rw-r--r--test/CodeGen/PowerPC/lha.ll2
-rw-r--r--test/CodeGen/PowerPC/load-constant-addr.ll2
-rw-r--r--test/CodeGen/PowerPC/load-shift-combine.ll16
-rw-r--r--test/CodeGen/PowerPC/long-compare.ll2
-rw-r--r--test/CodeGen/PowerPC/loop-data-prefetch-inner.ll66
-rw-r--r--test/CodeGen/PowerPC/loop-data-prefetch.ll29
-rw-r--r--test/CodeGen/PowerPC/loop-prep-all.ll48
-rw-r--r--test/CodeGen/PowerPC/lsa.ll12
-rw-r--r--test/CodeGen/PowerPC/lsr-postinc-pos.ll8
-rw-r--r--test/CodeGen/PowerPC/mask64.ll4
-rw-r--r--test/CodeGen/PowerPC/mature-mc-support.ll4
-rw-r--r--test/CodeGen/PowerPC/mcm-1.ll2
-rw-r--r--test/CodeGen/PowerPC/mcm-10.ll2
-rw-r--r--test/CodeGen/PowerPC/mcm-11.ll2
-rw-r--r--test/CodeGen/PowerPC/mcm-2.ll2
-rw-r--r--test/CodeGen/PowerPC/mcm-3.ll2
-rw-r--r--test/CodeGen/PowerPC/mcm-5.ll16
-rw-r--r--test/CodeGen/PowerPC/mcm-6.ll2
-rw-r--r--test/CodeGen/PowerPC/mcm-7.ll2
-rw-r--r--test/CodeGen/PowerPC/mcm-8.ll4
-rw-r--r--test/CodeGen/PowerPC/mcm-9.ll2
-rw-r--r--test/CodeGen/PowerPC/mcm-default.ll2
-rw-r--r--test/CodeGen/PowerPC/mcm-obj-2.ll6
-rw-r--r--test/CodeGen/PowerPC/mcm-obj.ll119
-rw-r--r--test/CodeGen/PowerPC/mem-rr-addr-mode.ll10
-rw-r--r--test/CodeGen/PowerPC/mem_update.ll28
-rw-r--r--test/CodeGen/PowerPC/memcpy-vec.ll110
-rw-r--r--test/CodeGen/PowerPC/memset-nc-le.ll24
-rw-r--r--test/CodeGen/PowerPC/memset-nc.ll48
-rw-r--r--test/CodeGen/PowerPC/misched-inorder-latency.ll6
-rw-r--r--test/CodeGen/PowerPC/misched.ll2
-rw-r--r--test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll46
-rw-r--r--test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll46
-rw-r--r--test/CodeGen/PowerPC/named-reg-alloc-r2-64.ll9
-rw-r--r--test/CodeGen/PowerPC/named-reg-alloc-r2.ll6
-rw-r--r--test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll4
-rw-r--r--test/CodeGen/PowerPC/no-pref-jumps.ll36
-rw-r--r--test/CodeGen/PowerPC/novrsave.ll2
-rw-r--r--test/CodeGen/PowerPC/optnone-crbits-i1-ret.ll37
-rw-r--r--test/CodeGen/PowerPC/or-addressing-mode.ll4
-rw-r--r--test/CodeGen/PowerPC/p8-isel-sched.ll33
-rw-r--r--test/CodeGen/PowerPC/pip-inner.ll52
-rw-r--r--test/CodeGen/PowerPC/post-ra-ec.ll6
-rw-r--r--test/CodeGen/PowerPC/ppc-crbits-onoff.ll43
-rw-r--r--test/CodeGen/PowerPC/ppc-empty-fs.ll32
-rw-r--r--test/CodeGen/PowerPC/ppc-prologue.ll4
-rw-r--r--test/CodeGen/PowerPC/ppc32-cyclecounter.ll4
-rw-r--r--test/CodeGen/PowerPC/ppc32-i1-vaarg.ll2
-rw-r--r--test/CodeGen/PowerPC/ppc32-lshrti3.ll2
-rw-r--r--test/CodeGen/PowerPC/ppc32-pic-large.ll4
-rw-r--r--test/CodeGen/PowerPC/ppc32-pic.ll4
-rw-r--r--test/CodeGen/PowerPC/ppc440-fp-basic.ll28
-rw-r--r--test/CodeGen/PowerPC/ppc64-abi-extend.ll8
-rw-r--r--test/CodeGen/PowerPC/ppc64-align-long-double.ll4
-rw-r--r--test/CodeGen/PowerPC/ppc64-anyregcc-crash.ll2
-rw-r--r--test/CodeGen/PowerPC/ppc64-anyregcc.ll61
-rw-r--r--test/CodeGen/PowerPC/ppc64-byval-align.ll12
-rw-r--r--test/CodeGen/PowerPC/ppc64-calls.ll2
-rw-r--r--test/CodeGen/PowerPC/ppc64-elf-abi.ll8
-rw-r--r--test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll56
-rw-r--r--test/CodeGen/PowerPC/ppc64-fastcc.ll540
-rw-r--r--test/CodeGen/PowerPC/ppc64-func-desc-hoist.ll47
-rw-r--r--test/CodeGen/PowerPC/ppc64-gep-opt.ll40
-rw-r--r--test/CodeGen/PowerPC/ppc64-i128-abi.ll274
-rw-r--r--test/CodeGen/PowerPC/ppc64-icbt-pwr7.ll19
-rw-r--r--test/CodeGen/PowerPC/ppc64-icbt-pwr8.ll16
-rw-r--r--test/CodeGen/PowerPC/ppc64-linux-func-size.ll6
-rw-r--r--test/CodeGen/PowerPC/ppc64-nonfunc-calls.ll2
-rw-r--r--test/CodeGen/PowerPC/ppc64-patchpoint.ll56
-rw-r--r--test/CodeGen/PowerPC/ppc64-r2-alloc.ll81
-rw-r--r--test/CodeGen/PowerPC/ppc64-smallarg.ll4
-rw-r--r--test/CodeGen/PowerPC/ppc64-stackmap-nops.ll2
-rw-r--r--test/CodeGen/PowerPC/ppc64-stackmap.ll80
-rw-r--r--test/CodeGen/PowerPC/ppc64-toc.ll11
-rw-r--r--test/CodeGen/PowerPC/ppc64-zext.ll2
-rw-r--r--test/CodeGen/PowerPC/ppc64le-aggregates.ll57
-rw-r--r--test/CodeGen/PowerPC/ppc64le-calls.ll4
-rw-r--r--test/CodeGen/PowerPC/ppc64le-localentry.ll10
-rw-r--r--test/CodeGen/PowerPC/ppc64le-smallarg.ll10
-rw-r--r--test/CodeGen/PowerPC/ppcf128-1.ll32
-rw-r--r--test/CodeGen/PowerPC/ppcf128-3.ll8
-rw-r--r--test/CodeGen/PowerPC/ppcf128-endian.ll6
-rw-r--r--test/CodeGen/PowerPC/pr13891.ll2
-rw-r--r--test/CodeGen/PowerPC/pr15031.ll28
-rw-r--r--test/CodeGen/PowerPC/pr15630.ll2
-rw-r--r--test/CodeGen/PowerPC/pr16556-2.ll12
-rw-r--r--test/CodeGen/PowerPC/pr17168.ll822
-rw-r--r--test/CodeGen/PowerPC/pr17354.ll4
-rw-r--r--test/CodeGen/PowerPC/pr18663.ll18
-rw-r--r--test/CodeGen/PowerPC/pr20442.ll16
-rw-r--r--test/CodeGen/PowerPC/pr22711.ll78
-rw-r--r--test/CodeGen/PowerPC/preinc-ld-sel-crash.ll63
-rw-r--r--test/CodeGen/PowerPC/preincprep-invoke.ll50
-rw-r--r--test/CodeGen/PowerPC/private.ll2
-rw-r--r--test/CodeGen/PowerPC/pwr7-gt-nop.ll6
-rw-r--r--test/CodeGen/PowerPC/qpx-bv-sint.ll33
-rw-r--r--test/CodeGen/PowerPC/qpx-bv.ll37
-rw-r--r--test/CodeGen/PowerPC/qpx-func-clobber.ll22
-rw-r--r--test/CodeGen/PowerPC/qpx-load.ll26
-rw-r--r--test/CodeGen/PowerPC/qpx-recipest.ll194
-rw-r--r--test/CodeGen/PowerPC/qpx-rounding-ops.ll109
-rw-r--r--test/CodeGen/PowerPC/qpx-s-load.ll26
-rw-r--r--test/CodeGen/PowerPC/qpx-s-sel.ll144
-rw-r--r--test/CodeGen/PowerPC/qpx-s-store.ll25
-rw-r--r--test/CodeGen/PowerPC/qpx-sel.ll152
-rw-r--r--test/CodeGen/PowerPC/qpx-split-vsetcc.ll40
-rw-r--r--test/CodeGen/PowerPC/qpx-store.ll25
-rw-r--r--test/CodeGen/PowerPC/qpx-unalperm.ll64
-rw-r--r--test/CodeGen/PowerPC/quadint-return.ll2
-rw-r--r--test/CodeGen/PowerPC/reg-coalesce-simple.ll4
-rw-r--r--test/CodeGen/PowerPC/reloc-align.ll2
-rw-r--r--test/CodeGen/PowerPC/remat-imm.ll2
-rw-r--r--test/CodeGen/PowerPC/resolvefi-basereg.ll350
-rw-r--r--test/CodeGen/PowerPC/resolvefi-disp.ll16
-rw-r--r--test/CodeGen/PowerPC/retaddr2.ll3
-rw-r--r--test/CodeGen/PowerPC/return-val-i128.ll10
-rw-r--r--test/CodeGen/PowerPC/rlwimi-and.ll6
-rw-r--r--test/CodeGen/PowerPC/rlwimi-commute.ll8
-rw-r--r--test/CodeGen/PowerPC/rlwimi-dyn-and.ll8
-rw-r--r--test/CodeGen/PowerPC/rm-zext.ll4
-rw-r--r--test/CodeGen/PowerPC/rotl-2.ll5
-rw-r--r--test/CodeGen/PowerPC/rotl-64.ll4
-rw-r--r--test/CodeGen/PowerPC/rotl.ll6
-rw-r--r--test/CodeGen/PowerPC/rs-undef-use.ll6
-rw-r--r--test/CodeGen/PowerPC/s000-alias-misched.ll30
-rw-r--r--test/CodeGen/PowerPC/sdag-ppcf128.ll2
-rw-r--r--test/CodeGen/PowerPC/seteq-0.ll2
-rw-r--r--test/CodeGen/PowerPC/sjlj.ll8
-rw-r--r--test/CodeGen/PowerPC/small-arguments.ll6
-rw-r--r--test/CodeGen/PowerPC/split-index-tc.ll8
-rw-r--r--test/CodeGen/PowerPC/stack-protector.ll4
-rw-r--r--test/CodeGen/PowerPC/stack-realign.ll56
-rw-r--r--test/CodeGen/PowerPC/std-unal-fi.ll14
-rw-r--r--test/CodeGen/PowerPC/stdux-constuse.ll10
-rw-r--r--test/CodeGen/PowerPC/stfiwx.ll4
-rw-r--r--test/CodeGen/PowerPC/store-load-fwd.ll2
-rw-r--r--test/CodeGen/PowerPC/store-update.ll28
-rw-r--r--test/CodeGen/PowerPC/structsinmem.ll60
-rw-r--r--test/CodeGen/PowerPC/structsinregs.ll60
-rw-r--r--test/CodeGen/PowerPC/stwu-gta.ll4
-rw-r--r--test/CodeGen/PowerPC/stwu8.ll2
-rw-r--r--test/CodeGen/PowerPC/stwux.ll2
-rw-r--r--test/CodeGen/PowerPC/subreg-postra-2.ll8
-rw-r--r--test/CodeGen/PowerPC/subreg-postra.ll12
-rw-r--r--test/CodeGen/PowerPC/subsumes-pred-regs.ll2
-rw-r--r--test/CodeGen/PowerPC/swaps-le-1.ll147
-rw-r--r--test/CodeGen/PowerPC/swaps-le-2.ll91
-rw-r--r--test/CodeGen/PowerPC/tls-cse.ll52
-rw-r--r--test/CodeGen/PowerPC/tls-pic.ll20
-rw-r--r--test/CodeGen/PowerPC/tls-store2.ll11
-rw-r--r--test/CodeGen/PowerPC/tls.ll2
-rw-r--r--test/CodeGen/PowerPC/toc-load-sched-bug.ll152
-rw-r--r--test/CodeGen/PowerPC/trampoline.ll96
-rw-r--r--test/CodeGen/PowerPC/unal-altivec-wint.ll8
-rw-r--r--test/CodeGen/PowerPC/unal-altivec.ll12
-rw-r--r--test/CodeGen/PowerPC/unal-altivec2.ll98
-rw-r--r--test/CodeGen/PowerPC/unaligned.ll12
-rw-r--r--test/CodeGen/PowerPC/unwind-dw2-g.ll16
-rw-r--r--test/CodeGen/PowerPC/vaddsplat.ll24
-rw-r--r--test/CodeGen/PowerPC/varargs-struct-float.ll8
-rw-r--r--test/CodeGen/PowerPC/vcmp-fold.ll8
-rw-r--r--test/CodeGen/PowerPC/vec-abi-align.ll48
-rw-r--r--test/CodeGen/PowerPC/vec_add_sub_doubleword.ll62
-rw-r--r--test/CodeGen/PowerPC/vec_add_sub_quadword.ll130
-rw-r--r--test/CodeGen/PowerPC/vec_auto_constant.ll4
-rw-r--r--test/CodeGen/PowerPC/vec_br_cmp.ll4
-rw-r--r--test/CodeGen/PowerPC/vec_buildvector_loadstore.ll2
-rw-r--r--test/CodeGen/PowerPC/vec_clz.ll40
-rw-r--r--test/CodeGen/PowerPC/vec_cmpd.ll258
-rw-r--r--test/CodeGen/PowerPC/vec_constants.ll12
-rw-r--r--test/CodeGen/PowerPC/vec_conv.ll8
-rw-r--r--test/CodeGen/PowerPC/vec_fneg.ll2
-rw-r--r--test/CodeGen/PowerPC/vec_minmax.ll34
-rw-r--r--test/CodeGen/PowerPC/vec_misaligned.ll18
-rw-r--r--test/CodeGen/PowerPC/vec_mul.ll22
-rw-r--r--test/CodeGen/PowerPC/vec_mul_even_odd.ll42
-rw-r--r--test/CodeGen/PowerPC/vec_perf_shuffle.ll20
-rw-r--r--test/CodeGen/PowerPC/vec_popcnt.ll72
-rw-r--r--test/CodeGen/PowerPC/vec_rotate_shift.ll36
-rw-r--r--test/CodeGen/PowerPC/vec_shuffle.ll56
-rw-r--r--test/CodeGen/PowerPC/vec_shuffle_le.ll54
-rw-r--r--test/CodeGen/PowerPC/vec_shuffle_p8vector.ll54
-rw-r--r--test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll43
-rw-r--r--test/CodeGen/PowerPC/vec_splat.ll10
-rw-r--r--test/CodeGen/PowerPC/vec_splat_constant.ll4
-rw-r--r--test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll29
-rw-r--r--test/CodeGen/PowerPC/vec_zero.ll2
-rw-r--r--test/CodeGen/PowerPC/vector-identity-shuffle.ll2
-rw-r--r--test/CodeGen/PowerPC/vector.ll46
-rw-r--r--test/CodeGen/PowerPC/vperm-lowering.ll57
-rw-r--r--test/CodeGen/PowerPC/vsx-div.ll4
-rw-r--r--test/CodeGen/PowerPC/vsx-elementary-arith.ll120
-rw-r--r--test/CodeGen/PowerPC/vsx-fma-m.ll40
-rw-r--r--test/CodeGen/PowerPC/vsx-infl-copy1.ll133
-rw-r--r--test/CodeGen/PowerPC/vsx-infl-copy2.ll114
-rw-r--r--test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll97
-rw-r--r--test/CodeGen/PowerPC/vsx-ldst.ll13
-rw-r--r--test/CodeGen/PowerPC/vsx-minmax.ll22
-rw-r--r--test/CodeGen/PowerPC/vsx-p8.ll4
-rw-r--r--test/CodeGen/PowerPC/vsx-recip-est.ll62
-rw-r--r--test/CodeGen/PowerPC/vsx-spill-norwstore.ll63
-rw-r--r--test/CodeGen/PowerPC/vsx.ll28
-rw-r--r--test/CodeGen/PowerPC/vsx_insert_extract_le.ll28
-rw-r--r--test/CodeGen/PowerPC/vsx_scalar_ld_st.ll139
-rw-r--r--test/CodeGen/PowerPC/vsx_shuffle_le.ll132
-rw-r--r--test/CodeGen/PowerPC/weak_def_can_be_hidden.ll4
-rw-r--r--test/CodeGen/PowerPC/xxleqv_xxlnand_xxlorc.ll52
-rw-r--r--test/CodeGen/PowerPC/zero-not-run.ll2
-rw-r--r--test/CodeGen/PowerPC/zext-free.ll10
-rw-r--r--test/CodeGen/R600/128bit-kernel-args.ll29
-rw-r--r--test/CodeGen/R600/32-bit-local-address-space.ll30
-rw-r--r--test/CodeGen/R600/64bit-kernel-args.ll11
-rw-r--r--test/CodeGen/R600/add-debug.ll2
-rw-r--r--test/CodeGen/R600/add.ll48
-rw-r--r--test/CodeGen/R600/add_i64.ll20
-rw-r--r--test/CodeGen/R600/address-space.ll11
-rw-r--r--test/CodeGen/R600/and.ll181
-rw-r--r--test/CodeGen/R600/array-ptr-calc-i32.ll16
-rw-r--r--test/CodeGen/R600/array-ptr-calc-i64.ll8
-rw-r--r--test/CodeGen/R600/atomic_cmp_swap_local.ll93
-rw-r--r--test/CodeGen/R600/atomic_load_add.ll4
-rw-r--r--test/CodeGen/R600/atomic_load_sub.ll4
-rw-r--r--test/CodeGen/R600/bfe_uint.ll2
-rw-r--r--test/CodeGen/R600/big_alu.ll40
-rw-r--r--test/CodeGen/R600/bitcast.ll16
-rw-r--r--test/CodeGen/R600/bswap.ll14
-rw-r--r--test/CodeGen/R600/call.ll12
-rw-r--r--test/CodeGen/R600/call_fs.ll4
-rw-r--r--test/CodeGen/R600/coalescer_remat.ll57
-rw-r--r--test/CodeGen/R600/codegen-prepare-addrmode-sext.ll2
-rw-r--r--test/CodeGen/R600/combine_vloads.ll4
-rw-r--r--test/CodeGen/R600/commute-compares.ll697
-rw-r--r--test/CodeGen/R600/commute_modifiers.ll66
-rw-r--r--test/CodeGen/R600/concat_vectors.ll2
-rw-r--r--test/CodeGen/R600/copy-illegal-type.ll18
-rw-r--r--test/CodeGen/R600/copy-to-reg.ll6
-rw-r--r--test/CodeGen/R600/ctlz_zero_undef.ll6
-rw-r--r--test/CodeGen/R600/ctpop.ll211
-rw-r--r--test/CodeGen/R600/ctpop64.ll86
-rw-r--r--test/CodeGen/R600/cttz-ctlz.ll225
-rw-r--r--test/CodeGen/R600/cttz_zero_undef.ll6
-rw-r--r--test/CodeGen/R600/cvt_f32_ubyte.ll30
-rw-r--r--test/CodeGen/R600/cvt_flr_i32_f32.ll86
-rw-r--r--test/CodeGen/R600/cvt_rpi_i32_f32.ll83
-rw-r--r--test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll8
-rw-r--r--test/CodeGen/R600/debug.ll10
-rw-r--r--test/CodeGen/R600/disconnected-predset-break-bug.ll2
-rw-r--r--test/CodeGen/R600/dot4-folding.ll4
-rw-r--r--test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll24
-rw-r--r--test/CodeGen/R600/ds_read2.ll274
-rw-r--r--test/CodeGen/R600/ds_read2_offset_order.ll28
-rw-r--r--test/CodeGen/R600/ds_read2st64.ll138
-rw-r--r--test/CodeGen/R600/ds_write2.ll238
-rw-r--r--test/CodeGen/R600/ds_write2st64.ll62
-rw-r--r--test/CodeGen/R600/elf.ll7
-rw-r--r--test/CodeGen/R600/empty-function.ll4
-rw-r--r--test/CodeGen/R600/endcf-loop-header.ll2
-rw-r--r--test/CodeGen/R600/extload-private.ll8
-rw-r--r--test/CodeGen/R600/extload.ll30
-rw-r--r--test/CodeGen/R600/extract_vector_elt_i16.ll4
-rw-r--r--test/CodeGen/R600/fabs.f64.ll4
-rw-r--r--test/CodeGen/R600/fabs.ll35
-rw-r--r--test/CodeGen/R600/fadd.ll6
-rw-r--r--test/CodeGen/R600/fadd64.ll4
-rw-r--r--test/CodeGen/R600/fceil64.ll13
-rw-r--r--test/CodeGen/R600/fcmp-cnd.ll2
-rw-r--r--test/CodeGen/R600/fcmp-cnde-int-args.ll2
-rw-r--r--test/CodeGen/R600/fcmp.ll8
-rw-r--r--test/CodeGen/R600/fcmp64.ll24
-rw-r--r--test/CodeGen/R600/fconst64.ll2
-rw-r--r--test/CodeGen/R600/fcopysign.f32.ll21
-rw-r--r--test/CodeGen/R600/fcopysign.f64.ll23
-rw-r--r--test/CodeGen/R600/fdiv.f64.ll24
-rw-r--r--test/CodeGen/R600/fdiv.ll6
-rw-r--r--test/CodeGen/R600/fetch-limits.r600.ll18
-rw-r--r--test/CodeGen/R600/fetch-limits.r700+.ll34
-rw-r--r--test/CodeGen/R600/ffloor.f64.ll127
-rw-r--r--test/CodeGen/R600/ffloor.ll131
-rw-r--r--test/CodeGen/R600/flat-address-space.ll20
-rw-r--r--test/CodeGen/R600/floor.ll7
-rw-r--r--test/CodeGen/R600/fma-combine.ll368
-rw-r--r--test/CodeGen/R600/fma.f64.ll18
-rw-r--r--test/CodeGen/R600/fma.ll38
-rw-r--r--test/CodeGen/R600/fmax3.f64.ll10
-rw-r--r--test/CodeGen/R600/fmax3.ll18
-rw-r--r--test/CodeGen/R600/fmax_legacy.f64.ll34
-rw-r--r--test/CodeGen/R600/fmax_legacy.ll40
-rw-r--r--test/CodeGen/R600/fmaxnum.ll91
-rw-r--r--test/CodeGen/R600/fmin3.ll18
-rw-r--r--test/CodeGen/R600/fmin_legacy.f64.ll34
-rw-r--r--test/CodeGen/R600/fmin_legacy.ll40
-rw-r--r--test/CodeGen/R600/fminnum.ll91
-rw-r--r--test/CodeGen/R600/fmul.ll6
-rw-r--r--test/CodeGen/R600/fmul64.ll12
-rw-r--r--test/CodeGen/R600/fmuladd.ll92
-rw-r--r--test/CodeGen/R600/fneg-fabs.f64.ll10
-rw-r--r--test/CodeGen/R600/fneg-fabs.ll2
-rw-r--r--test/CodeGen/R600/fneg.f64.ll27
-rw-r--r--test/CodeGen/R600/fneg.ll24
-rw-r--r--test/CodeGen/R600/fp-classify.ll4
-rw-r--r--test/CodeGen/R600/fp16_to_fp.ll4
-rw-r--r--test/CodeGen/R600/fp32_to_fp16.ll2
-rw-r--r--test/CodeGen/R600/fp_to_sint.f64.ll4
-rw-r--r--test/CodeGen/R600/fp_to_sint.ll2
-rw-r--r--test/CodeGen/R600/fp_to_uint.f64.ll4
-rw-r--r--test/CodeGen/R600/fp_to_uint.ll2
-rw-r--r--test/CodeGen/R600/fpext.ll45
-rw-r--r--test/CodeGen/R600/fptrunc.ll45
-rw-r--r--test/CodeGen/R600/frem.ll105
-rw-r--r--test/CodeGen/R600/fsqrt.ll4
-rw-r--r--test/CodeGen/R600/fsub.ll12
-rw-r--r--test/CodeGen/R600/fsub64.ll104
-rw-r--r--test/CodeGen/R600/ftrunc.f64.ll11
-rw-r--r--test/CodeGen/R600/gep-address-space.ll9
-rw-r--r--test/CodeGen/R600/global-directive.ll6
-rw-r--r--test/CodeGen/R600/global-extload-i1.ll68
-rw-r--r--test/CodeGen/R600/global-extload-i16.ll68
-rw-r--r--test/CodeGen/R600/global-extload-i32.ll32
-rw-r--r--test/CodeGen/R600/global-extload-i8.ll68
-rw-r--r--test/CodeGen/R600/global-zero-initializer.ll4
-rw-r--r--test/CodeGen/R600/global_atomics.ll160
-rw-r--r--test/CodeGen/R600/gv-const-addrspace-fail.ll16
-rw-r--r--test/CodeGen/R600/gv-const-addrspace.ll28
-rw-r--r--test/CodeGen/R600/half.ll12
-rw-r--r--test/CodeGen/R600/hsa.ll4
-rw-r--r--test/CodeGen/R600/i1-copy-phi.ll2
-rw-r--r--test/CodeGen/R600/i8-to-double-to-float.ll2
-rw-r--r--test/CodeGen/R600/icmp-select-sete-reverse-args.ll6
-rw-r--r--test/CodeGen/R600/imm.ll361
-rw-r--r--test/CodeGen/R600/indirect-private-64.ll24
-rw-r--r--test/CodeGen/R600/inline-asm.ll4
-rw-r--r--test/CodeGen/R600/insert_vector_elt.ll6
-rw-r--r--test/CodeGen/R600/jump-address.ll6
-rw-r--r--test/CodeGen/R600/kcache-fold.ll48
-rw-r--r--test/CodeGen/R600/kernel-args.ll256
-rw-r--r--test/CodeGen/R600/large-alloca.ll6
-rw-r--r--test/CodeGen/R600/large-constant-initializer.ll2
-rw-r--r--test/CodeGen/R600/lds-initializer.ll4
-rw-r--r--test/CodeGen/R600/lds-oqap-crash.ll2
-rw-r--r--test/CodeGen/R600/lds-output-queue.ll18
-rw-r--r--test/CodeGen/R600/lds-size.ll2
-rw-r--r--test/CodeGen/R600/lds-zero-initializer.ll4
-rw-r--r--test/CodeGen/R600/legalizedag-bug-expand-setcc.ll2
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.abs.ll4
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll6
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll6
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll37
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll96
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.bfm.ll27
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.brev.ll2
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.class.ll76
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.cube.ll8
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll8
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll22
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll111
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.div_scale.ll88
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.flbit.i32.ll2
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.fract.f64.ll60
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.fract.ll8
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.imax.ll2
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.imin.ll2
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.kill.ll2
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll12
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll9
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.tex.ll2
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll6
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.umad24.ll10
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.umax.ll4
-rw-r--r--test/CodeGen/R600/llvm.AMDGPU.umin.ll4
-rw-r--r--test/CodeGen/R600/llvm.SI.fs.interp.ll45
-rw-r--r--test/CodeGen/R600/llvm.SI.imageload.ll20
-rw-r--r--test/CodeGen/R600/llvm.SI.load.dword.ll52
-rw-r--r--test/CodeGen/R600/llvm.SI.sendmsg.ll2
-rw-r--r--test/CodeGen/R600/llvm.SI.tid.ll8
-rw-r--r--test/CodeGen/R600/llvm.amdgpu.dp4.ll4
-rw-r--r--test/CodeGen/R600/llvm.floor.ll54
-rw-r--r--test/CodeGen/R600/llvm.memcpy.ll34
-rw-r--r--test/CodeGen/R600/llvm.rint.f64.ll1
-rw-r--r--test/CodeGen/R600/llvm.round.f64.ll74
-rw-r--r--test/CodeGen/R600/llvm.round.ll78
-rw-r--r--test/CodeGen/R600/llvm.sqrt.ll54
-rw-r--r--test/CodeGen/R600/llvm.trunc.ll13
-rw-r--r--test/CodeGen/R600/load-i1.ll14
-rw-r--r--test/CodeGen/R600/load-input-fold.ll38
-rw-r--r--test/CodeGen/R600/load.ll211
-rw-r--r--test/CodeGen/R600/load.vec.ll4
-rw-r--r--test/CodeGen/R600/load64.ll6
-rw-r--r--test/CodeGen/R600/local-64.ll83
-rw-r--r--test/CodeGen/R600/local-atomics.ll319
-rw-r--r--test/CodeGen/R600/local-atomics64.ll279
-rw-r--r--test/CodeGen/R600/local-memory-two-objects.ll25
-rw-r--r--test/CodeGen/R600/local-memory.ll11
-rw-r--r--test/CodeGen/R600/loop-address.ll2
-rw-r--r--test/CodeGen/R600/loop-idiom.ll8
-rw-r--r--test/CodeGen/R600/m0-spill.ll4
-rw-r--r--test/CodeGen/R600/mad-combine.ll567
-rw-r--r--test/CodeGen/R600/mad-sub.ll118
-rw-r--r--test/CodeGen/R600/madak.ll193
-rw-r--r--test/CodeGen/R600/madmk.ll205
-rw-r--r--test/CodeGen/R600/max.ll58
-rw-r--r--test/CodeGen/R600/max3.ll28
-rw-r--r--test/CodeGen/R600/merge-stores.ll536
-rw-r--r--test/CodeGen/R600/min.ll70
-rw-r--r--test/CodeGen/R600/min3.ll76
-rw-r--r--test/CodeGen/R600/misaligned-load.ll18
-rw-r--r--test/CodeGen/R600/missing-store.ll6
-rw-r--r--test/CodeGen/R600/mubuf.ll72
-rw-r--r--test/CodeGen/R600/mul.ll34
-rw-r--r--test/CodeGen/R600/no-initializer-constant-addrspace.ll4
-rw-r--r--test/CodeGen/R600/no-shrink-extloads.ll50
-rw-r--r--test/CodeGen/R600/operand-folding.ll4
-rw-r--r--test/CodeGen/R600/operand-spacing.ll13
-rw-r--r--test/CodeGen/R600/or.ll88
-rw-r--r--test/CodeGen/R600/parallelandifcollapse.ll16
-rw-r--r--test/CodeGen/R600/parallelorifcollapse.ll16
-rw-r--r--test/CodeGen/R600/private-memory-atomics.ll12
-rw-r--r--test/CodeGen/R600/private-memory-broken.ll6
-rw-r--r--test/CodeGen/R600/private-memory.ll154
-rw-r--r--test/CodeGen/R600/pv-packing.ll4
-rw-r--r--test/CodeGen/R600/pv.ll68
-rw-r--r--test/CodeGen/R600/r600-export-fix.ll50
-rw-r--r--test/CodeGen/R600/r600cfg.ll2
-rw-r--r--test/CodeGen/R600/register-count-comments.ll10
-rw-r--r--test/CodeGen/R600/reorder-stores.ll16
-rw-r--r--test/CodeGen/R600/rotl.i64.ll4
-rw-r--r--test/CodeGen/R600/rotr.i64.ll8
-rw-r--r--test/CodeGen/R600/rsq.ll18
-rw-r--r--test/CodeGen/R600/s_movk_i32.ll26
-rw-r--r--test/CodeGen/R600/saddo.ll8
-rw-r--r--test/CodeGen/R600/salu-to-valu.ll30
-rw-r--r--test/CodeGen/R600/scalar_to_vector.ll6
-rw-r--r--test/CodeGen/R600/schedule-fs-loop-nested.ll8
-rw-r--r--test/CodeGen/R600/schedule-fs-loop.ll8
-rw-r--r--test/CodeGen/R600/schedule-global-loads.ll12
-rw-r--r--test/CodeGen/R600/schedule-if-2.ll8
-rw-r--r--test/CodeGen/R600/schedule-if.ll6
-rw-r--r--test/CodeGen/R600/schedule-kernel-arg-loads.ll7
-rw-r--r--test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll32
-rw-r--r--test/CodeGen/R600/schedule-vs-if-nested-loop.ll32
-rw-r--r--test/CodeGen/R600/scratch-buffer.ll31
-rw-r--r--test/CodeGen/R600/sdiv.ll26
-rw-r--r--test/CodeGen/R600/sdivrem24.ll72
-rw-r--r--test/CodeGen/R600/sdivrem64.ll225
-rw-r--r--test/CodeGen/R600/select64.ll21
-rw-r--r--test/CodeGen/R600/selectcc-cnd.ll2
-rw-r--r--test/CodeGen/R600/selectcc-cnde-int.ll2
-rw-r--r--test/CodeGen/R600/selectcc-icmp-select-float.ll2
-rw-r--r--test/CodeGen/R600/selectcc-opt.ll4
-rw-r--r--test/CodeGen/R600/setcc-opt.ll161
-rw-r--r--test/CodeGen/R600/setcc.ll26
-rw-r--r--test/CodeGen/R600/sext-in-reg.ll89
-rw-r--r--test/CodeGen/R600/sgpr-control-flow.ll14
-rw-r--r--test/CodeGen/R600/sgpr-copy-duplicate-operand.ll2
-rw-r--r--test/CodeGen/R600/sgpr-copy.ll46
-rw-r--r--test/CodeGen/R600/shl.ll30
-rw-r--r--test/CodeGen/R600/shl_add_constant.ll12
-rw-r--r--test/CodeGen/R600/shl_add_ptr.ll60
-rw-r--r--test/CodeGen/R600/si-annotate-cf.ll4
-rw-r--r--test/CodeGen/R600/si-lod-bias.ll12
-rw-r--r--test/CodeGen/R600/si-sgpr-spill.ll184
-rw-r--r--test/CodeGen/R600/si-spill-cf.ll501
-rw-r--r--test/CodeGen/R600/si-triv-disjoint-mem-access.ll104
-rw-r--r--test/CodeGen/R600/si-vector-hang.ll74
-rw-r--r--test/CodeGen/R600/sign_extend.ll7
-rw-r--r--test/CodeGen/R600/simplify-demanded-bits-build-pair.ll12
-rw-r--r--test/CodeGen/R600/sint_to_fp.f64.ll7
-rw-r--r--test/CodeGen/R600/sint_to_fp.ll2
-rw-r--r--test/CodeGen/R600/smrd.ll86
-rw-r--r--test/CodeGen/R600/split-scalar-i64-add.ll4
-rw-r--r--test/CodeGen/R600/sra.ll30
-rw-r--r--test/CodeGen/R600/srem.ll89
-rw-r--r--test/CodeGen/R600/srl.ll55
-rw-r--r--test/CodeGen/R600/ssubo.ll8
-rw-r--r--test/CodeGen/R600/store-barrier.ll28
-rw-r--r--test/CodeGen/R600/store-v3i64.ll4
-rw-r--r--test/CodeGen/R600/store-vector-ptrs.ll2
-rw-r--r--test/CodeGen/R600/store.ll99
-rw-r--r--test/CodeGen/R600/store.r600.ll4
-rw-r--r--test/CodeGen/R600/sub.ll62
-rw-r--r--test/CodeGen/R600/subreg-coalescer-crash.ll69
-rw-r--r--test/CodeGen/R600/swizzle-export.ll42
-rw-r--r--test/CodeGen/R600/trunc-cmp-constant.ll52
-rw-r--r--test/CodeGen/R600/trunc.ll18
-rw-r--r--test/CodeGen/R600/tti-unroll-prefs.ll2
-rw-r--r--test/CodeGen/R600/uaddo.ll25
-rw-r--r--test/CodeGen/R600/udiv.ll18
-rw-r--r--test/CodeGen/R600/udivrem.ll24
-rw-r--r--test/CodeGen/R600/udivrem24.ll72
-rw-r--r--test/CodeGen/R600/udivrem64.ll148
-rw-r--r--test/CodeGen/R600/uint_to_fp.f64.ll7
-rw-r--r--test/CodeGen/R600/uint_to_fp.ll4
-rw-r--r--test/CodeGen/R600/unaligned-load-store.ll36
-rw-r--r--test/CodeGen/R600/unhandled-loop-condition-assertion.ll60
-rw-r--r--test/CodeGen/R600/unroll.ll6
-rw-r--r--test/CodeGen/R600/urem.ll51
-rw-r--r--test/CodeGen/R600/use-sgpr-multiple-times.ll73
-rw-r--r--test/CodeGen/R600/usubo.ll29
-rw-r--r--test/CodeGen/R600/v_cndmask.ll4
-rw-r--r--test/CodeGen/R600/valu-i1.ll47
-rw-r--r--test/CodeGen/R600/vector-alloca.ll40
-rw-r--r--test/CodeGen/R600/vertex-fetch-encoding.ll4
-rw-r--r--test/CodeGen/R600/vop-shrink.ll2
-rw-r--r--test/CodeGen/R600/vselect.ll16
-rw-r--r--test/CodeGen/R600/vtx-fetch-branch.ll2
-rw-r--r--test/CodeGen/R600/vtx-schedule.ll4
-rw-r--r--test/CodeGen/R600/wait.ll15
-rw-r--r--test/CodeGen/R600/work-item-intrinsics.ll72
-rw-r--r--test/CodeGen/R600/wrong-transalu-pos-fix.ll2
-rw-r--r--test/CodeGen/R600/xor.ll40
-rw-r--r--test/CodeGen/R600/zero_extend.ll2
-rw-r--r--test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll4
-rw-r--r--test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll2
-rw-r--r--test/CodeGen/SPARC/2009-08-28-PIC.ll4
-rwxr-xr-xtest/CodeGen/SPARC/2011-01-11-CC.ll6
-rw-r--r--test/CodeGen/SPARC/2011-01-11-Call.ll6
-rw-r--r--test/CodeGen/SPARC/2011-01-19-DelaySlot.ll2
-rw-r--r--test/CodeGen/SPARC/2011-01-22-SRet.ll18
-rw-r--r--test/CodeGen/SPARC/64abi.ll20
-rw-r--r--test/CodeGen/SPARC/64bit.ll30
-rw-r--r--test/CodeGen/SPARC/atomics.ll8
-rw-r--r--test/CodeGen/SPARC/basictest.ll50
-rw-r--r--test/CodeGen/SPARC/empty-functions.ll2
-rw-r--r--test/CodeGen/SPARC/exception.ll4
-rw-r--r--test/CodeGen/SPARC/float.ll4
-rw-r--r--test/CodeGen/SPARC/fp128.ll58
-rw-r--r--test/CodeGen/SPARC/globals.ll2
-rw-r--r--test/CodeGen/SPARC/inlineasm.ll2
-rw-r--r--test/CodeGen/SPARC/leafproc.ll8
-rw-r--r--test/CodeGen/SPARC/mult-alt-generic-sparc.ll48
-rw-r--r--test/CodeGen/SPARC/obj-relocs.ll2
-rw-r--r--test/CodeGen/SPARC/private.ll2
-rw-r--r--test/CodeGen/SPARC/setjmp.ll14
-rw-r--r--test/CodeGen/SPARC/spillsize.ll6
-rw-r--r--test/CodeGen/SPARC/tls.ll6
-rw-r--r--test/CodeGen/SPARC/varargs.ll6
-rw-r--r--test/CodeGen/SystemZ/Large/branch-range-01.py10
-rw-r--r--test/CodeGen/SystemZ/Large/branch-range-02.py4
-rw-r--r--test/CodeGen/SystemZ/Large/branch-range-03.py10
-rw-r--r--test/CodeGen/SystemZ/Large/branch-range-04.py10
-rw-r--r--test/CodeGen/SystemZ/Large/branch-range-05.py6
-rw-r--r--test/CodeGen/SystemZ/Large/branch-range-06.py6
-rw-r--r--test/CodeGen/SystemZ/Large/branch-range-07.py6
-rw-r--r--test/CodeGen/SystemZ/Large/branch-range-08.py6
-rw-r--r--test/CodeGen/SystemZ/Large/branch-range-09.py10
-rw-r--r--test/CodeGen/SystemZ/Large/branch-range-10.py10
-rw-r--r--test/CodeGen/SystemZ/Large/branch-range-11.py10
-rw-r--r--test/CodeGen/SystemZ/Large/branch-range-12.py10
-rw-r--r--test/CodeGen/SystemZ/Large/lit.local.cfg4
-rw-r--r--test/CodeGen/SystemZ/Large/spill-01.py4
-rw-r--r--test/CodeGen/SystemZ/Large/spill-02.py6
-rw-r--r--test/CodeGen/SystemZ/addr-01.ll16
-rw-r--r--test/CodeGen/SystemZ/addr-02.ll16
-rw-r--r--test/CodeGen/SystemZ/addr-03.ll10
-rw-r--r--test/CodeGen/SystemZ/alias-01.ll2
-rw-r--r--test/CodeGen/SystemZ/alloca-01.ll8
-rw-r--r--test/CodeGen/SystemZ/alloca-02.ll8
-rw-r--r--test/CodeGen/SystemZ/and-01.ll74
-rw-r--r--test/CodeGen/SystemZ/and-03.ll62
-rw-r--r--test/CodeGen/SystemZ/and-05.ll40
-rw-r--r--test/CodeGen/SystemZ/and-06.ll16
-rw-r--r--test/CodeGen/SystemZ/and-08.ll138
-rw-r--r--test/CodeGen/SystemZ/asm-18.ll76
-rw-r--r--test/CodeGen/SystemZ/atomic-load-01.ll2
-rw-r--r--test/CodeGen/SystemZ/atomic-load-02.ll2
-rw-r--r--test/CodeGen/SystemZ/atomic-load-03.ll2
-rw-r--r--test/CodeGen/SystemZ/atomic-load-04.ll2
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-add-05.ll8
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-add-06.ll8
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-and-05.ll8
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-and-06.ll8
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-minmax-03.ll14
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-minmax-04.ll8
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-or-05.ll8
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-or-06.ll8
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-sub-05.ll8
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-sub-06.ll8
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-xchg-03.ll14
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-xchg-04.ll8
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-xor-05.ll8
-rw-r--r--test/CodeGen/SystemZ/atomicrmw-xor-06.ll8
-rw-r--r--test/CodeGen/SystemZ/branch-02.ll12
-rw-r--r--test/CodeGen/SystemZ/branch-03.ll8
-rw-r--r--test/CodeGen/SystemZ/branch-04.ll28
-rw-r--r--test/CodeGen/SystemZ/branch-06.ll18
-rw-r--r--test/CodeGen/SystemZ/branch-08.ll2
-rw-r--r--test/CodeGen/SystemZ/bswap-02.ll58
-rw-r--r--test/CodeGen/SystemZ/bswap-03.ll58
-rw-r--r--test/CodeGen/SystemZ/bswap-04.ll10
-rw-r--r--test/CodeGen/SystemZ/bswap-05.ll10
-rw-r--r--test/CodeGen/SystemZ/cmpxchg-03.ll14
-rw-r--r--test/CodeGen/SystemZ/cmpxchg-04.ll10
-rw-r--r--test/CodeGen/SystemZ/cond-load-01.ll26
-rw-r--r--test/CodeGen/SystemZ/cond-load-02.ll26
-rw-r--r--test/CodeGen/SystemZ/cond-store-01.ll56
-rw-r--r--test/CodeGen/SystemZ/cond-store-02.ll56
-rw-r--r--test/CodeGen/SystemZ/cond-store-03.ll48
-rw-r--r--test/CodeGen/SystemZ/cond-store-04.ll32
-rw-r--r--test/CodeGen/SystemZ/cond-store-05.ll36
-rw-r--r--test/CodeGen/SystemZ/cond-store-06.ll36
-rw-r--r--test/CodeGen/SystemZ/cond-store-07.ll30
-rw-r--r--test/CodeGen/SystemZ/cond-store-08.ll22
-rw-r--r--test/CodeGen/SystemZ/ctpop-01.ll96
-rw-r--r--test/CodeGen/SystemZ/fp-abs-01.ll7
-rw-r--r--test/CodeGen/SystemZ/fp-abs-02.ll7
-rw-r--r--test/CodeGen/SystemZ/fp-add-01.ll62
-rw-r--r--test/CodeGen/SystemZ/fp-add-02.ll69
-rw-r--r--test/CodeGen/SystemZ/fp-add-03.ll2
-rw-r--r--test/CodeGen/SystemZ/fp-cmp-01.ll86
-rw-r--r--test/CodeGen/SystemZ/fp-cmp-02.ll115
-rw-r--r--test/CodeGen/SystemZ/fp-cmp-03.ll4
-rw-r--r--test/CodeGen/SystemZ/fp-cmp-04.ll6
-rw-r--r--test/CodeGen/SystemZ/fp-conv-01.ll22
-rw-r--r--test/CodeGen/SystemZ/fp-conv-02.ll60
-rw-r--r--test/CodeGen/SystemZ/fp-conv-03.ll54
-rw-r--r--test/CodeGen/SystemZ/fp-conv-04.ll54
-rw-r--r--test/CodeGen/SystemZ/fp-conv-09.ll2
-rw-r--r--test/CodeGen/SystemZ/fp-conv-10.ll2
-rw-r--r--test/CodeGen/SystemZ/fp-conv-11.ll2
-rw-r--r--test/CodeGen/SystemZ/fp-conv-12.ll2
-rw-r--r--test/CodeGen/SystemZ/fp-conv-14.ll4
-rw-r--r--test/CodeGen/SystemZ/fp-copysign-01.ll12
-rw-r--r--test/CodeGen/SystemZ/fp-div-01.ll62
-rw-r--r--test/CodeGen/SystemZ/fp-div-02.ll68
-rw-r--r--test/CodeGen/SystemZ/fp-div-03.ll2
-rw-r--r--test/CodeGen/SystemZ/fp-move-01.ll8
-rw-r--r--test/CodeGen/SystemZ/fp-move-02.ll48
-rw-r--r--test/CodeGen/SystemZ/fp-move-03.ll34
-rw-r--r--test/CodeGen/SystemZ/fp-move-04.ll37
-rw-r--r--test/CodeGen/SystemZ/fp-move-05.ll22
-rw-r--r--test/CodeGen/SystemZ/fp-move-06.ll14
-rw-r--r--test/CodeGen/SystemZ/fp-move-07.ll17
-rw-r--r--test/CodeGen/SystemZ/fp-move-09.ll8
-rw-r--r--test/CodeGen/SystemZ/fp-move-10.ll61
-rw-r--r--test/CodeGen/SystemZ/fp-move-11.ll110
-rw-r--r--test/CodeGen/SystemZ/fp-mul-01.ll62
-rw-r--r--test/CodeGen/SystemZ/fp-mul-02.ll64
-rw-r--r--test/CodeGen/SystemZ/fp-mul-03.ll68
-rw-r--r--test/CodeGen/SystemZ/fp-mul-04.ll64
-rw-r--r--test/CodeGen/SystemZ/fp-mul-05.ll2
-rw-r--r--test/CodeGen/SystemZ/fp-mul-06.ll26
-rw-r--r--test/CodeGen/SystemZ/fp-mul-07.ll36
-rw-r--r--test/CodeGen/SystemZ/fp-mul-08.ll26
-rw-r--r--test/CodeGen/SystemZ/fp-mul-09.ll36
-rw-r--r--test/CodeGen/SystemZ/fp-neg-01.ll7
-rw-r--r--test/CodeGen/SystemZ/fp-round-01.ll2
-rw-r--r--test/CodeGen/SystemZ/fp-round-02.ll20
-rw-r--r--test/CodeGen/SystemZ/fp-sqrt-01.ll54
-rw-r--r--test/CodeGen/SystemZ/fp-sqrt-02.ll60
-rw-r--r--test/CodeGen/SystemZ/fp-sqrt-03.ll2
-rw-r--r--test/CodeGen/SystemZ/fp-sub-01.ll62
-rw-r--r--test/CodeGen/SystemZ/fp-sub-02.ll68
-rw-r--r--test/CodeGen/SystemZ/fp-sub-03.ll2
-rw-r--r--test/CodeGen/SystemZ/frame-01.ll12
-rw-r--r--test/CodeGen/SystemZ/frame-02.ll96
-rw-r--r--test/CodeGen/SystemZ/frame-03.ll98
-rw-r--r--test/CodeGen/SystemZ/frame-04.ll48
-rw-r--r--test/CodeGen/SystemZ/frame-05.ll84
-rw-r--r--test/CodeGen/SystemZ/frame-06.ll84
-rw-r--r--test/CodeGen/SystemZ/frame-07.ll72
-rw-r--r--test/CodeGen/SystemZ/frame-08.ll104
-rw-r--r--test/CodeGen/SystemZ/frame-09.ll32
-rw-r--r--test/CodeGen/SystemZ/frame-13.ll82
-rw-r--r--test/CodeGen/SystemZ/frame-14.ll82
-rw-r--r--test/CodeGen/SystemZ/frame-15.ll146
-rw-r--r--test/CodeGen/SystemZ/frame-16.ll82
-rw-r--r--test/CodeGen/SystemZ/frame-17.ll88
-rw-r--r--test/CodeGen/SystemZ/frame-18.ll60
-rw-r--r--test/CodeGen/SystemZ/frame-19.ll314
-rw-r--r--test/CodeGen/SystemZ/frame-20.ll445
-rw-r--r--test/CodeGen/SystemZ/htm-intrinsics.ll352
-rw-r--r--test/CodeGen/SystemZ/insert-01.ll56
-rw-r--r--test/CodeGen/SystemZ/insert-02.ll56
-rw-r--r--test/CodeGen/SystemZ/insert-06.ll6
-rw-r--r--test/CodeGen/SystemZ/int-add-01.ll34
-rw-r--r--test/CodeGen/SystemZ/int-add-02.ll74
-rw-r--r--test/CodeGen/SystemZ/int-add-03.ll64
-rw-r--r--test/CodeGen/SystemZ/int-add-04.ll64
-rw-r--r--test/CodeGen/SystemZ/int-add-05.ll62
-rw-r--r--test/CodeGen/SystemZ/int-add-08.ll46
-rw-r--r--test/CodeGen/SystemZ/int-add-09.ll8
-rw-r--r--test/CodeGen/SystemZ/int-add-10.ll44
-rw-r--r--test/CodeGen/SystemZ/int-add-11.ll92
-rw-r--r--test/CodeGen/SystemZ/int-add-12.ll92
-rw-r--r--test/CodeGen/SystemZ/int-cmp-01.ll36
-rw-r--r--test/CodeGen/SystemZ/int-cmp-02.ll36
-rw-r--r--test/CodeGen/SystemZ/int-cmp-03.ll36
-rw-r--r--test/CodeGen/SystemZ/int-cmp-04.ll26
-rw-r--r--test/CodeGen/SystemZ/int-cmp-05.ll70
-rw-r--r--test/CodeGen/SystemZ/int-cmp-06.ll70
-rw-r--r--test/CodeGen/SystemZ/int-cmp-07.ll26
-rw-r--r--test/CodeGen/SystemZ/int-cmp-08.ll26
-rw-r--r--test/CodeGen/SystemZ/int-cmp-12.ll15
-rw-r--r--test/CodeGen/SystemZ/int-cmp-15.ll52
-rw-r--r--test/CodeGen/SystemZ/int-cmp-16.ll20
-rw-r--r--test/CodeGen/SystemZ/int-cmp-17.ll20
-rw-r--r--test/CodeGen/SystemZ/int-cmp-18.ll20
-rw-r--r--test/CodeGen/SystemZ/int-cmp-19.ll20
-rw-r--r--test/CodeGen/SystemZ/int-cmp-20.ll32
-rw-r--r--test/CodeGen/SystemZ/int-cmp-21.ll32
-rw-r--r--test/CodeGen/SystemZ/int-cmp-22.ll24
-rw-r--r--test/CodeGen/SystemZ/int-cmp-23.ll18
-rw-r--r--test/CodeGen/SystemZ/int-cmp-24.ll8
-rw-r--r--test/CodeGen/SystemZ/int-cmp-25.ll8
-rw-r--r--test/CodeGen/SystemZ/int-cmp-26.ll20
-rw-r--r--test/CodeGen/SystemZ/int-cmp-27.ll20
-rw-r--r--test/CodeGen/SystemZ/int-cmp-28.ll20
-rw-r--r--test/CodeGen/SystemZ/int-cmp-29.ll20
-rw-r--r--test/CodeGen/SystemZ/int-cmp-30.ll32
-rw-r--r--test/CodeGen/SystemZ/int-cmp-31.ll32
-rw-r--r--test/CodeGen/SystemZ/int-cmp-32.ll42
-rw-r--r--test/CodeGen/SystemZ/int-cmp-33.ll26
-rw-r--r--test/CodeGen/SystemZ/int-cmp-34.ll42
-rw-r--r--test/CodeGen/SystemZ/int-cmp-35.ll26
-rw-r--r--test/CodeGen/SystemZ/int-cmp-36.ll12
-rw-r--r--test/CodeGen/SystemZ/int-cmp-37.ll12
-rw-r--r--test/CodeGen/SystemZ/int-cmp-38.ll14
-rw-r--r--test/CodeGen/SystemZ/int-cmp-39.ll12
-rw-r--r--test/CodeGen/SystemZ/int-cmp-40.ll12
-rw-r--r--test/CodeGen/SystemZ/int-cmp-41.ll12
-rw-r--r--test/CodeGen/SystemZ/int-cmp-42.ll12
-rw-r--r--test/CodeGen/SystemZ/int-cmp-43.ll12
-rw-r--r--test/CodeGen/SystemZ/int-cmp-44.ll20
-rw-r--r--test/CodeGen/SystemZ/int-cmp-45.ll12
-rw-r--r--test/CodeGen/SystemZ/int-cmp-47.ll3
-rw-r--r--test/CodeGen/SystemZ/int-cmp-48.ll46
-rw-r--r--test/CodeGen/SystemZ/int-cmp-50.ll30
-rw-r--r--test/CodeGen/SystemZ/int-const-03.ll14
-rw-r--r--test/CodeGen/SystemZ/int-const-04.ll6
-rw-r--r--test/CodeGen/SystemZ/int-const-05.ll6
-rw-r--r--test/CodeGen/SystemZ/int-const-06.ll6
-rw-r--r--test/CodeGen/SystemZ/int-conv-01.ll56
-rw-r--r--test/CodeGen/SystemZ/int-conv-02.ll56
-rw-r--r--test/CodeGen/SystemZ/int-conv-03.ll56
-rw-r--r--test/CodeGen/SystemZ/int-conv-04.ll56
-rw-r--r--test/CodeGen/SystemZ/int-conv-05.ll66
-rw-r--r--test/CodeGen/SystemZ/int-conv-06.ll56
-rw-r--r--test/CodeGen/SystemZ/int-conv-07.ll56
-rw-r--r--test/CodeGen/SystemZ/int-conv-08.ll56
-rw-r--r--test/CodeGen/SystemZ/int-conv-09.ll24
-rw-r--r--test/CodeGen/SystemZ/int-conv-10.ll24
-rw-r--r--test/CodeGen/SystemZ/int-conv-11.ll128
-rw-r--r--test/CodeGen/SystemZ/int-div-01.ll72
-rw-r--r--test/CodeGen/SystemZ/int-div-02.ll68
-rw-r--r--test/CodeGen/SystemZ/int-div-03.ll30
-rw-r--r--test/CodeGen/SystemZ/int-div-04.ll72
-rw-r--r--test/CodeGen/SystemZ/int-div-05.ll72
-rw-r--r--test/CodeGen/SystemZ/int-move-02.ll34
-rw-r--r--test/CodeGen/SystemZ/int-move-03.ll24
-rw-r--r--test/CodeGen/SystemZ/int-move-04.ll14
-rw-r--r--test/CodeGen/SystemZ/int-move-05.ll14
-rw-r--r--test/CodeGen/SystemZ/int-move-06.ll14
-rw-r--r--test/CodeGen/SystemZ/int-move-07.ll10
-rw-r--r--test/CodeGen/SystemZ/int-move-08.ll24
-rw-r--r--test/CodeGen/SystemZ/int-move-09.ll20
-rw-r--r--test/CodeGen/SystemZ/int-mul-01.ll34
-rw-r--r--test/CodeGen/SystemZ/int-mul-02.ll74
-rw-r--r--test/CodeGen/SystemZ/int-mul-03.ll64
-rw-r--r--test/CodeGen/SystemZ/int-mul-04.ll62
-rw-r--r--test/CodeGen/SystemZ/int-mul-08.ll62
-rw-r--r--test/CodeGen/SystemZ/int-sub-01.ll74
-rw-r--r--test/CodeGen/SystemZ/int-sub-02.ll64
-rw-r--r--test/CodeGen/SystemZ/int-sub-03.ll64
-rw-r--r--test/CodeGen/SystemZ/int-sub-04.ll62
-rw-r--r--test/CodeGen/SystemZ/int-sub-05.ll58
-rw-r--r--test/CodeGen/SystemZ/int-sub-06.ll44
-rw-r--r--test/CodeGen/SystemZ/int-sub-07.ll34
-rw-r--r--test/CodeGen/SystemZ/loop-01.ll8
-rw-r--r--test/CodeGen/SystemZ/memchr-02.ll4
-rw-r--r--test/CodeGen/SystemZ/memcpy-01.ll16
-rw-r--r--test/CodeGen/SystemZ/memcpy-02.ll104
-rw-r--r--test/CodeGen/SystemZ/or-01.ll74
-rw-r--r--test/CodeGen/SystemZ/or-03.ll62
-rw-r--r--test/CodeGen/SystemZ/or-05.ll40
-rw-r--r--test/CodeGen/SystemZ/or-06.ll16
-rw-r--r--test/CodeGen/SystemZ/or-08.ll24
-rw-r--r--test/CodeGen/SystemZ/prefetch-01.ll8
-rw-r--r--test/CodeGen/SystemZ/risbg-03.ll30
-rw-r--r--test/CodeGen/SystemZ/serialize-01.ll2
-rw-r--r--test/CodeGen/SystemZ/shift-01.ll2
-rw-r--r--test/CodeGen/SystemZ/shift-02.ll2
-rw-r--r--test/CodeGen/SystemZ/shift-03.ll2
-rw-r--r--test/CodeGen/SystemZ/shift-04.ll2
-rw-r--r--test/CodeGen/SystemZ/shift-05.ll2
-rw-r--r--test/CodeGen/SystemZ/shift-06.ll2
-rw-r--r--test/CodeGen/SystemZ/shift-07.ll2
-rw-r--r--test/CodeGen/SystemZ/shift-08.ll2
-rw-r--r--test/CodeGen/SystemZ/spill-01.ll338
-rw-r--r--test/CodeGen/SystemZ/strcpy-01.ll2
-rw-r--r--test/CodeGen/SystemZ/tail-call-mem-intrinsics.ll31
-rw-r--r--test/CodeGen/SystemZ/tls-01.ll6
-rw-r--r--test/CodeGen/SystemZ/tls-02.ll18
-rw-r--r--test/CodeGen/SystemZ/tls-03.ll23
-rw-r--r--test/CodeGen/SystemZ/tls-04.ll28
-rw-r--r--test/CodeGen/SystemZ/tls-05.ll15
-rw-r--r--test/CodeGen/SystemZ/tls-06.ll17
-rw-r--r--test/CodeGen/SystemZ/tls-07.ll16
-rw-r--r--test/CodeGen/SystemZ/unaligned-01.ll16
-rw-r--r--test/CodeGen/SystemZ/vec-abi-align.ll49
-rw-r--r--test/CodeGen/SystemZ/vec-abs-01.ll146
-rw-r--r--test/CodeGen/SystemZ/vec-abs-02.ll142
-rw-r--r--test/CodeGen/SystemZ/vec-abs-03.ll138
-rw-r--r--test/CodeGen/SystemZ/vec-abs-04.ll138
-rw-r--r--test/CodeGen/SystemZ/vec-abs-05.ll46
-rw-r--r--test/CodeGen/SystemZ/vec-add-01.ll60
-rw-r--r--test/CodeGen/SystemZ/vec-and-01.ll39
-rw-r--r--test/CodeGen/SystemZ/vec-and-02.ll91
-rw-r--r--test/CodeGen/SystemZ/vec-and-03.ll113
-rw-r--r--test/CodeGen/SystemZ/vec-args-01.ll48
-rw-r--r--test/CodeGen/SystemZ/vec-args-02.ll31
-rw-r--r--test/CodeGen/SystemZ/vec-args-03.ll30
-rw-r--r--test/CodeGen/SystemZ/vec-args-04.ll50
-rw-r--r--test/CodeGen/SystemZ/vec-args-05.ll32
-rw-r--r--test/CodeGen/SystemZ/vec-args-error-01.ll9
-rw-r--r--test/CodeGen/SystemZ/vec-args-error-02.ll9
-rw-r--r--test/CodeGen/SystemZ/vec-args-error-03.ll12
-rw-r--r--test/CodeGen/SystemZ/vec-args-error-04.ll12
-rw-r--r--test/CodeGen/SystemZ/vec-args-error-05.ll9
-rw-r--r--test/CodeGen/SystemZ/vec-args-error-06.ll9
-rw-r--r--test/CodeGen/SystemZ/vec-args-error-07.ll12
-rw-r--r--test/CodeGen/SystemZ/vec-args-error-08.ll12
-rw-r--r--test/CodeGen/SystemZ/vec-cmp-01.ll228
-rw-r--r--test/CodeGen/SystemZ/vec-cmp-02.ll228
-rw-r--r--test/CodeGen/SystemZ/vec-cmp-03.ll228
-rw-r--r--test/CodeGen/SystemZ/vec-cmp-04.ll228
-rw-r--r--test/CodeGen/SystemZ/vec-cmp-05.ll472
-rw-r--r--test/CodeGen/SystemZ/vec-cmp-06.ll349
-rw-r--r--test/CodeGen/SystemZ/vec-combine-01.ll155
-rw-r--r--test/CodeGen/SystemZ/vec-combine-02.ll433
-rw-r--r--test/CodeGen/SystemZ/vec-const-01.ll103
-rw-r--r--test/CodeGen/SystemZ/vec-const-02.ll79
-rw-r--r--test/CodeGen/SystemZ/vec-const-03.ll59
-rw-r--r--test/CodeGen/SystemZ/vec-const-04.ll43
-rw-r--r--test/CodeGen/SystemZ/vec-const-05.ll63
-rw-r--r--test/CodeGen/SystemZ/vec-const-06.ll43
-rw-r--r--test/CodeGen/SystemZ/vec-const-07.ll229
-rw-r--r--test/CodeGen/SystemZ/vec-const-08.ll189
-rw-r--r--test/CodeGen/SystemZ/vec-const-09.ll169
-rw-r--r--test/CodeGen/SystemZ/vec-const-10.ll169
-rw-r--r--test/CodeGen/SystemZ/vec-const-11.ll189
-rw-r--r--test/CodeGen/SystemZ/vec-const-12.ll169
-rw-r--r--test/CodeGen/SystemZ/vec-const-13.ll193
-rw-r--r--test/CodeGen/SystemZ/vec-const-14.ll113
-rw-r--r--test/CodeGen/SystemZ/vec-const-15.ll85
-rw-r--r--test/CodeGen/SystemZ/vec-const-16.ll85
-rw-r--r--test/CodeGen/SystemZ/vec-const-17.ll95
-rw-r--r--test/CodeGen/SystemZ/vec-const-18.ll85
-rw-r--r--test/CodeGen/SystemZ/vec-conv-01.ll95
-rw-r--r--test/CodeGen/SystemZ/vec-conv-02.ll33
-rw-r--r--test/CodeGen/SystemZ/vec-ctlz-01.ll81
-rw-r--r--test/CodeGen/SystemZ/vec-ctpop-01.ll53
-rw-r--r--test/CodeGen/SystemZ/vec-cttz-01.ll81
-rw-r--r--test/CodeGen/SystemZ/vec-div-01.ll83
-rw-r--r--test/CodeGen/SystemZ/vec-extract-01.ll13
-rw-r--r--test/CodeGen/SystemZ/vec-extract-02.ll15
-rw-r--r--test/CodeGen/SystemZ/vec-intrinsics.ll3335
-rw-r--r--test/CodeGen/SystemZ/vec-log-01.ll15
-rw-r--r--test/CodeGen/SystemZ/vec-max-01.ll83
-rw-r--r--test/CodeGen/SystemZ/vec-max-02.ll83
-rw-r--r--test/CodeGen/SystemZ/vec-max-03.ll83
-rw-r--r--test/CodeGen/SystemZ/vec-max-04.ll83
-rw-r--r--test/CodeGen/SystemZ/vec-min-01.ll83
-rw-r--r--test/CodeGen/SystemZ/vec-min-02.ll83
-rw-r--r--test/CodeGen/SystemZ/vec-min-03.ll83
-rw-r--r--test/CodeGen/SystemZ/vec-min-04.ll83
-rw-r--r--test/CodeGen/SystemZ/vec-move-01.ll107
-rw-r--r--test/CodeGen/SystemZ/vec-move-02.ll174
-rw-r--r--test/CodeGen/SystemZ/vec-move-03.ll174
-rw-r--r--test/CodeGen/SystemZ/vec-move-04.ll179
-rw-r--r--test/CodeGen/SystemZ/vec-move-05.ll249
-rw-r--r--test/CodeGen/SystemZ/vec-move-06.ll13
-rw-r--r--test/CodeGen/SystemZ/vec-move-07.ll57
-rw-r--r--test/CodeGen/SystemZ/vec-move-08.ll444
-rw-r--r--test/CodeGen/SystemZ/vec-move-09.ll291
-rw-r--r--test/CodeGen/SystemZ/vec-move-10.ll499
-rw-r--r--test/CodeGen/SystemZ/vec-move-11.ll111
-rw-r--r--test/CodeGen/SystemZ/vec-move-12.ll123
-rw-r--r--test/CodeGen/SystemZ/vec-move-13.ll69
-rw-r--r--test/CodeGen/SystemZ/vec-move-14.ll96
-rw-r--r--test/CodeGen/SystemZ/vec-move-15.ll105
-rw-r--r--test/CodeGen/SystemZ/vec-move-16.ll105
-rw-r--r--test/CodeGen/SystemZ/vec-move-17.ll104
-rw-r--r--test/CodeGen/SystemZ/vec-mul-01.ll60
-rw-r--r--test/CodeGen/SystemZ/vec-mul-02.ll63
-rw-r--r--test/CodeGen/SystemZ/vec-neg-01.ll58
-rw-r--r--test/CodeGen/SystemZ/vec-or-01.ll39
-rw-r--r--test/CodeGen/SystemZ/vec-or-02.ll107
-rw-r--r--test/CodeGen/SystemZ/vec-perm-01.ll175
-rw-r--r--test/CodeGen/SystemZ/vec-perm-02.ll200
-rw-r--r--test/CodeGen/SystemZ/vec-perm-03.ll251
-rw-r--r--test/CodeGen/SystemZ/vec-perm-04.ll200
-rw-r--r--test/CodeGen/SystemZ/vec-perm-05.ll200
-rw-r--r--test/CodeGen/SystemZ/vec-perm-06.ll160
-rw-r--r--test/CodeGen/SystemZ/vec-perm-07.ll145
-rw-r--r--test/CodeGen/SystemZ/vec-perm-08.ll170
-rw-r--r--test/CodeGen/SystemZ/vec-perm-09.ll38
-rw-r--r--test/CodeGen/SystemZ/vec-perm-10.ll36
-rw-r--r--test/CodeGen/SystemZ/vec-perm-11.ll35
-rw-r--r--test/CodeGen/SystemZ/vec-round-01.ll118
-rw-r--r--test/CodeGen/SystemZ/vec-shift-01.ll39
-rw-r--r--test/CodeGen/SystemZ/vec-shift-02.ll39
-rw-r--r--test/CodeGen/SystemZ/vec-shift-03.ll39
-rw-r--r--test/CodeGen/SystemZ/vec-shift-04.ll134
-rw-r--r--test/CodeGen/SystemZ/vec-shift-05.ll134
-rw-r--r--test/CodeGen/SystemZ/vec-shift-06.ll134
-rw-r--r--test/CodeGen/SystemZ/vec-shift-07.ll182
-rw-r--r--test/CodeGen/SystemZ/vec-sqrt-01.ll23
-rw-r--r--test/CodeGen/SystemZ/vec-sub-01.ll148
-rw-r--r--test/CodeGen/SystemZ/vec-xor-01.ll39
-rw-r--r--test/CodeGen/SystemZ/xor-01.ll74
-rw-r--r--test/CodeGen/SystemZ/xor-03.ll62
-rw-r--r--test/CodeGen/SystemZ/xor-05.ll40
-rw-r--r--test/CodeGen/SystemZ/xor-06.ll16
-rw-r--r--test/CodeGen/SystemZ/xor-08.ll24
-rw-r--r--test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll4
-rw-r--r--test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll14
-rw-r--r--test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll2
-rw-r--r--test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll592
-rw-r--r--test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll14
-rw-r--r--test/CodeGen/Thumb/2009-08-20-ISelBug.ll12
-rw-r--r--test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll24
-rw-r--r--test/CodeGen/Thumb/2010-07-15-debugOrdering.ll178
-rw-r--r--test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll28
-rw-r--r--test/CodeGen/Thumb/2011-06-16-NoGPRs.ll2
-rw-r--r--test/CodeGen/Thumb/2011-EpilogueBug.ll2
-rw-r--r--test/CodeGen/Thumb/2014-06-10-thumb1-ldst-opt-bug.ll6
-rw-r--r--test/CodeGen/Thumb/PR17309.ll2
-rw-r--r--test/CodeGen/Thumb/asmprinter-bug.ll68
-rw-r--r--test/CodeGen/Thumb/cortex-m0-unaligned-access.ll2
-rw-r--r--test/CodeGen/Thumb/dyn-stackalloc.ll10
-rw-r--r--test/CodeGen/Thumb/large-stack.ll2
-rw-r--r--test/CodeGen/Thumb/ldm-merge-call.ll6
-rw-r--r--test/CodeGen/Thumb/ldm-merge-struct.ll4
-rw-r--r--test/CodeGen/Thumb/ldm-stm-base-materialization.ll8
-rw-r--r--test/CodeGen/Thumb/ldr_ext.ll10
-rw-r--r--test/CodeGen/Thumb/ldr_frame.ll16
-rw-r--r--test/CodeGen/Thumb/long.ll2
-rw-r--r--test/CodeGen/Thumb/segmented-stacks.ll2
-rw-r--r--test/CodeGen/Thumb/stack-access.ll127
-rw-r--r--test/CodeGen/Thumb/stack_guard_remat.ll2
-rw-r--r--test/CodeGen/Thumb/stm-merge.ll11
-rw-r--r--test/CodeGen/Thumb/thumb-ldm.ll16
-rw-r--r--test/CodeGen/Thumb/vargs.ll24
-rw-r--r--test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll6
-rw-r--r--test/CodeGen/Thumb2/2009-07-21-ISelBug.ll18
-rw-r--r--test/CodeGen/Thumb2/2009-07-30-PEICrash.ll82
-rw-r--r--test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll14
-rw-r--r--test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll10
-rw-r--r--test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll2
-rw-r--r--test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll28
-rw-r--r--test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll156
-rw-r--r--test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll2
-rw-r--r--test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll4
-rw-r--r--test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll8
-rw-r--r--test/CodeGen/Thumb2/2009-08-10-ISelBug.ll8
-rw-r--r--test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll8
-rw-r--r--test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll26
-rw-r--r--test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll24
-rw-r--r--test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll18
-rw-r--r--test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll46
-rw-r--r--test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll22
-rw-r--r--test/CodeGen/Thumb2/2010-01-19-RemovePredicates.ll2
-rw-r--r--test/CodeGen/Thumb2/2010-03-08-addi12-ccout.ll18
-rw-r--r--test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll26
-rw-r--r--test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll4
-rw-r--r--test/CodeGen/Thumb2/2010-06-19-ITBlockCrash.ll2
-rw-r--r--test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll6
-rw-r--r--test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll8
-rw-r--r--test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll14
-rw-r--r--test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll10
-rw-r--r--test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll24
-rw-r--r--test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll12
-rw-r--r--test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll6
-rw-r--r--test/CodeGen/Thumb2/aligned-constants.ll4
-rw-r--r--test/CodeGen/Thumb2/aligned-spill.ll2
-rw-r--r--test/CodeGen/Thumb2/bfi.ll2
-rw-r--r--test/CodeGen/Thumb2/cbnz.ll54
-rw-r--r--test/CodeGen/Thumb2/constant-islands-new-island-padding.ll8
-rw-r--r--test/CodeGen/Thumb2/constant-islands.ll602
-rw-r--r--test/CodeGen/Thumb2/crash.ll24
-rw-r--r--test/CodeGen/Thumb2/cross-rc-coalescing-2.ll14
-rw-r--r--test/CodeGen/Thumb2/div.ll4
-rw-r--r--test/CodeGen/Thumb2/float-ops.ll6
-rw-r--r--test/CodeGen/Thumb2/frameless2.ll4
-rw-r--r--test/CodeGen/Thumb2/ifcvt-compare.ll47
-rw-r--r--test/CodeGen/Thumb2/ifcvt-neon.ll10
-rw-r--r--test/CodeGen/Thumb2/inflate-regs.ll4
-rw-r--r--test/CodeGen/Thumb2/large-call.ll6
-rw-r--r--test/CodeGen/Thumb2/large-stack.ll4
-rw-r--r--test/CodeGen/Thumb2/lsr-deficiency.ll8
-rw-r--r--test/CodeGen/Thumb2/machine-licm.ll10
-rw-r--r--test/CodeGen/Thumb2/pic-load.ll4
-rw-r--r--test/CodeGen/Thumb2/stack_guard_remat.ll2
-rw-r--r--test/CodeGen/Thumb2/tail-call-r9.ll2
-rw-r--r--test/CodeGen/Thumb2/thumb2-call-tc.ll2
-rw-r--r--test/CodeGen/Thumb2/thumb2-call.ll2
-rw-r--r--test/CodeGen/Thumb2/thumb2-cbnz.ll2
-rw-r--r--test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll2
-rw-r--r--test/CodeGen/Thumb2/thumb2-ifcvt1.ll2
-rw-r--r--test/CodeGen/Thumb2/thumb2-ifcvt2.ll8
-rw-r--r--test/CodeGen/Thumb2/thumb2-ifcvt3.ll2
-rw-r--r--test/CodeGen/Thumb2/thumb2-ldm.ll16
-rw-r--r--test/CodeGen/Thumb2/thumb2-ldr.ll18
-rw-r--r--test/CodeGen/Thumb2/thumb2-ldr_ext.ll8
-rw-r--r--test/CodeGen/Thumb2/thumb2-ldr_post.ll2
-rw-r--r--test/CodeGen/Thumb2/thumb2-ldr_pre.ll10
-rw-r--r--test/CodeGen/Thumb2/thumb2-ldrb.ll16
-rw-r--r--test/CodeGen/Thumb2/thumb2-ldrd.ll4
-rw-r--r--test/CodeGen/Thumb2/thumb2-ldrh.ll18
-rw-r--r--test/CodeGen/Thumb2/thumb2-smul.ll2
-rw-r--r--test/CodeGen/Thumb2/thumb2-spill-q.ll2
-rw-r--r--test/CodeGen/Thumb2/thumb2-str.ll6
-rw-r--r--test/CodeGen/Thumb2/thumb2-str_post.ll4
-rw-r--r--test/CodeGen/Thumb2/thumb2-str_pre.ll8
-rw-r--r--test/CodeGen/Thumb2/thumb2-strb.ll6
-rw-r--r--test/CodeGen/Thumb2/thumb2-strh.ll6
-rw-r--r--test/CodeGen/Thumb2/thumb2-tbb.ll26
-rw-r--r--test/CodeGen/Thumb2/thumb2-tbh.ll4
-rw-r--r--test/CodeGen/Thumb2/tls1.ll2
-rw-r--r--test/CodeGen/Thumb2/tls2.ll2
-rw-r--r--test/CodeGen/Thumb2/tpsoft.ll14
-rw-r--r--test/CodeGen/Thumb2/v8_IT_2.ll6
-rw-r--r--test/CodeGen/Thumb2/v8_IT_3.ll14
-rw-r--r--test/CodeGen/Thumb2/v8_IT_5.ll3
-rw-r--r--test/CodeGen/WinEH/cppeh-alloca-sink.ll180
-rw-r--r--test/CodeGen/WinEH/cppeh-catch-all.ll97
-rw-r--r--test/CodeGen/WinEH/cppeh-catch-and-throw.ll143
-rw-r--r--test/CodeGen/WinEH/cppeh-catch-scalar.ll126
-rw-r--r--test/CodeGen/WinEH/cppeh-catch-unwind.ll240
-rw-r--r--test/CodeGen/WinEH/cppeh-cleanup-invoke.ll91
-rw-r--r--test/CodeGen/WinEH/cppeh-demote-liveout.ll72
-rw-r--r--test/CodeGen/WinEH/cppeh-frame-vars.ll272
-rw-r--r--test/CodeGen/WinEH/cppeh-inalloca.ll194
-rw-r--r--test/CodeGen/WinEH/cppeh-min-unwind.ll99
-rw-r--r--test/CodeGen/WinEH/cppeh-mixed-catch-and-cleanup.ll106
-rw-r--r--test/CodeGen/WinEH/cppeh-multi-catch.ll226
-rw-r--r--test/CodeGen/WinEH/cppeh-nested-1.ll194
-rw-r--r--test/CodeGen/WinEH/cppeh-nested-2.ll324
-rw-r--r--test/CodeGen/WinEH/cppeh-nested-3.ll260
-rw-r--r--test/CodeGen/WinEH/cppeh-nested-rethrow.ll212
-rw-r--r--test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll278
-rw-r--r--test/CodeGen/WinEH/cppeh-prepared-catch-all.ll47
-rw-r--r--test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll164
-rw-r--r--test/CodeGen/WinEH/cppeh-prepared-catch.ll210
-rw-r--r--test/CodeGen/WinEH/cppeh-prepared-cleanups.ll243
-rw-r--r--test/CodeGen/WinEH/cppeh-shared-empty-catch.ll110
-rw-r--r--test/CodeGen/WinEH/cppeh-similar-catch-blocks.ll394
-rw-r--r--test/CodeGen/WinEH/cppeh-state-calc-1.ll289
-rw-r--r--test/CodeGen/WinEH/lit.local.cfg12
-rw-r--r--test/CodeGen/WinEH/seh-catch-all.ll59
-rw-r--r--test/CodeGen/WinEH/seh-inlined-finally.ll83
-rw-r--r--test/CodeGen/WinEH/seh-outlined-finally.ll155
-rw-r--r--test/CodeGen/WinEH/seh-prepared-basic.ll83
-rw-r--r--test/CodeGen/WinEH/seh-resume-phi.ll66
-rw-r--r--test/CodeGen/WinEH/seh-simple.ll201
-rw-r--r--test/CodeGen/X86/2005-01-17-CycleInDAG.ll4
-rw-r--r--test/CodeGen/X86/2005-02-14-IllegalAssembler.ll2
-rw-r--r--test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll2
-rw-r--r--test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll8
-rw-r--r--test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll8
-rw-r--r--test/CodeGen/X86/2006-05-02-InstrSched1.ll12
-rw-r--r--test/CodeGen/X86/2006-05-02-InstrSched2.ll4
-rw-r--r--test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll4
-rw-r--r--test/CodeGen/X86/2006-05-08-InstrSched.ll10
-rw-r--r--test/CodeGen/X86/2006-05-11-InstrSched.ll18
-rw-r--r--test/CodeGen/X86/2006-05-22-FPSetEQ.ll9
-rw-r--r--test/CodeGen/X86/2006-05-25-CycleInDAG.ll2
-rw-r--r--test/CodeGen/X86/2006-07-20-InlineAsm.ll4
-rw-r--r--test/CodeGen/X86/2006-08-07-CycleInDAG.ll4
-rw-r--r--test/CodeGen/X86/2006-08-16-CycleInDAG.ll4
-rw-r--r--test/CodeGen/X86/2006-09-01-CycleInDAG.ll20
-rw-r--r--test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll15
-rw-r--r--test/CodeGen/X86/2006-10-09-CycleInDAG.ll4
-rw-r--r--test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll8
-rw-r--r--test/CodeGen/X86/2006-10-12-CycleInDAG.ll6
-rw-r--r--test/CodeGen/X86/2006-10-13-CycleInDAG.ll6
-rw-r--r--test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll4
-rw-r--r--test/CodeGen/X86/2006-11-12-CSRetCC.ll52
-rw-r--r--test/CodeGen/X86/2006-11-17-IllegalMove.ll6
-rw-r--r--test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll4
-rw-r--r--test/CodeGen/X86/2006-12-19-IntelSyntax.ll26
-rw-r--r--test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll2
-rw-r--r--test/CodeGen/X86/2007-01-13-StackPtrIndex.ll112
-rw-r--r--test/CodeGen/X86/2007-02-04-OrAddrMode.ll4
-rw-r--r--test/CodeGen/X86/2007-02-16-BranchFold.ll16
-rw-r--r--test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll6
-rw-r--r--test/CodeGen/X86/2007-03-01-SpillerCrash.ll2
-rw-r--r--test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll56
-rw-r--r--test/CodeGen/X86/2007-03-16-InlineAsm.ll8
-rw-r--r--test/CodeGen/X86/2007-03-26-CoalescerBug.ll2
-rw-r--r--test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll2
-rw-r--r--test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll4
-rw-r--r--test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll64
-rw-r--r--test/CodeGen/X86/2007-05-05-VecCastExpand.ll6
-rw-r--r--test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll2
-rw-r--r--test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll2
-rw-r--r--test/CodeGen/X86/2007-06-15-IntToMMX.ll19
-rw-r--r--test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll4
-rw-r--r--test/CodeGen/X86/2007-07-10-StackerAssert.ll6
-rw-r--r--test/CodeGen/X86/2007-07-18-Vector-Extract.ll8
-rw-r--r--test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll118
-rw-r--r--test/CodeGen/X86/2007-08-13-AppendingLinkage.ll2
-rw-r--r--test/CodeGen/X86/2007-09-05-InvalidAsm.ll22
-rw-r--r--test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll2
-rw-r--r--test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll8
-rw-r--r--test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll16
-rw-r--r--test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll12
-rw-r--r--test/CodeGen/X86/2007-10-14-CoalescerCrash.ll4
-rw-r--r--test/CodeGen/X86/2007-10-15-CoalescerCrash.ll2
-rw-r--r--test/CodeGen/X86/2007-10-19-SpillerUnfold.ll8
-rw-r--r--test/CodeGen/X86/2007-10-29-ExtendSetCC.ll2
-rw-r--r--test/CodeGen/X86/2007-10-30-LSRCrash.ll2
-rw-r--r--test/CodeGen/X86/2007-10-31-extractelement-i64.ll24
-rw-r--r--test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll4
-rw-r--r--test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll2
-rw-r--r--test/CodeGen/X86/2007-11-06-InstrSched.ll8
-rw-r--r--test/CodeGen/X86/2007-11-07-MulBy4.ll4
-rw-r--r--test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll6
-rw-r--r--test/CodeGen/X86/2007-12-18-LoadCSEBug.ll6
-rw-r--r--test/CodeGen/X86/2008-01-08-SchedulerCrash.ll4
-rw-r--r--test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll10
-rw-r--r--test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll36
-rw-r--r--test/CodeGen/X86/2008-02-05-ISelCrash.ll2
-rw-r--r--test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll12
-rw-r--r--test/CodeGen/X86/2008-02-18-TailMergingBug.ll40
-rw-r--r--test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll4
-rw-r--r--test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll44
-rw-r--r--test/CodeGen/X86/2008-02-25-InlineAsmBug.ll14
-rw-r--r--test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll10
-rw-r--r--test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll16
-rw-r--r--test/CodeGen/X86/2008-03-07-APIntBug.ll68
-rw-r--r--test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll2
-rw-r--r--test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll12
-rw-r--r--test/CodeGen/X86/2008-03-14-SpillerCrash.ll8
-rw-r--r--test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll22
-rw-r--r--test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll14
-rw-r--r--test/CodeGen/X86/2008-04-09-BranchFolding.ll4
-rw-r--r--test/CodeGen/X86/2008-04-15-LiveVariableBug.ll6
-rw-r--r--test/CodeGen/X86/2008-04-16-CoalescerBug.ll2
-rw-r--r--test/CodeGen/X86/2008-04-16-ReMatBug.ll2
-rw-r--r--test/CodeGen/X86/2008-04-17-CoalescerBug.ll12
-rw-r--r--test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll2
-rw-r--r--test/CodeGen/X86/2008-04-28-CoalescerBug.ll2
-rw-r--r--test/CodeGen/X86/2008-05-09-PHIElimBug.ll2
-rw-r--r--test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll2
-rw-r--r--test/CodeGen/X86/2008-05-12-tailmerge-5.ll52
-rw-r--r--test/CodeGen/X86/2008-05-21-CoalescerBug.ll6
-rw-r--r--test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll2
-rw-r--r--test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll2
-rw-r--r--test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll4
-rw-r--r--test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll4
-rw-r--r--test/CodeGen/X86/2008-06-16-SubregsBug.ll2
-rw-r--r--test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll10
-rw-r--r--test/CodeGen/X86/2008-07-16-CoalescerCrash.ll2
-rw-r--r--test/CodeGen/X86/2008-07-19-movups-spills.ll128
-rw-r--r--test/CodeGen/X86/2008-07-22-CombinerCrash.ll2
-rw-r--r--test/CodeGen/X86/2008-08-06-CmpStride.ll2
-rw-r--r--test/CodeGen/X86/2008-08-06-RewriterBug.ll10
-rw-r--r--test/CodeGen/X86/2008-08-31-EH_RETURN64.ll4
-rw-r--r--test/CodeGen/X86/2008-09-09-LinearScanBug.ll4
-rw-r--r--test/CodeGen/X86/2008-09-11-CoalescerBug.ll10
-rw-r--r--test/CodeGen/X86/2008-09-11-CoalescerBug2.ll6
-rw-r--r--test/CodeGen/X86/2008-09-17-inline-asm-1.ll2
-rw-r--r--test/CodeGen/X86/2008-09-18-inline-asm-2.ll12
-rw-r--r--test/CodeGen/X86/2008-09-19-RegAllocBug.ll2
-rw-r--r--test/CodeGen/X86/2008-09-29-ReMatBug.ll14
-rw-r--r--test/CodeGen/X86/2008-09-29-VolatileBug.ll2
-rw-r--r--test/CodeGen/X86/2008-10-06-MMXISelBug.ll12
-rw-r--r--test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll2
-rw-r--r--test/CodeGen/X86/2008-10-07-SSEISelBug.ll6
-rw-r--r--test/CodeGen/X86/2008-10-11-CallCrash.ll8
-rw-r--r--test/CodeGen/X86/2008-10-13-CoalescerBug.ll4
-rw-r--r--test/CodeGen/X86/2008-10-16-VecUnaryOp.ll2
-rw-r--r--test/CodeGen/X86/2008-10-27-CoalescerBug.ll2
-rw-r--r--test/CodeGen/X86/2008-11-06-testb.ll6
-rw-r--r--test/CodeGen/X86/2008-11-29-ULT-Sign.ll2
-rw-r--r--test/CodeGen/X86/2008-12-01-SpillerAssert.ll2
-rw-r--r--test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll6
-rw-r--r--test/CodeGen/X86/2008-12-02-IllegalResultType.ll2
-rw-r--r--test/CodeGen/X86/2008-12-02-dagcombine-1.ll2
-rw-r--r--test/CodeGen/X86/2008-12-02-dagcombine-2.ll2
-rw-r--r--test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll2
-rw-r--r--test/CodeGen/X86/2008-12-23-crazy-address.ll10
-rw-r--r--test/CodeGen/X86/2009-01-16-SchedulerBug.ll4
-rw-r--r--test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll2
-rw-r--r--test/CodeGen/X86/2009-01-25-NoSSE.ll4
-rw-r--r--test/CodeGen/X86/2009-01-31-BigShift2.ll2
-rw-r--r--test/CodeGen/X86/2009-02-01-LargeMask.ll2
-rw-r--r--test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll2
-rw-r--r--test/CodeGen/X86/2009-02-04-sext-i64-gep.ll2
-rw-r--r--test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll2
-rw-r--r--test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll10
-rw-r--r--test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll78
-rw-r--r--test/CodeGen/X86/2009-02-26-MachineLICMBug.ll16
-rw-r--r--test/CodeGen/X86/2009-03-03-BTHang.ll10
-rw-r--r--test/CodeGen/X86/2009-03-05-burr-list-crash.ll4
-rw-r--r--test/CodeGen/X86/2009-03-09-APIntCrash.ll2
-rw-r--r--test/CodeGen/X86/2009-03-10-CoalescerBug.ll2
-rw-r--r--test/CodeGen/X86/2009-03-23-LinearScanBug.ll8
-rw-r--r--test/CodeGen/X86/2009-03-23-MultiUseSched.ll48
-rw-r--r--test/CodeGen/X86/2009-03-25-TestBug.ll6
-rw-r--r--test/CodeGen/X86/2009-04-12-picrel.ll2
-rw-r--r--test/CodeGen/X86/2009-04-13-2AddrAssert.ll2
-rw-r--r--test/CodeGen/X86/2009-04-14-IllegalRegs.ll22
-rw-r--r--test/CodeGen/X86/2009-04-16-SpillerUnfold.ll26
-rw-r--r--test/CodeGen/X86/2009-04-24.ll2
-rw-r--r--test/CodeGen/X86/2009-04-25-CoalescerBug.ll2
-rw-r--r--test/CodeGen/X86/2009-04-27-CoalescerAssert.ll310
-rw-r--r--test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll24
-rw-r--r--test/CodeGen/X86/2009-04-29-LinearScanBug.ll32
-rw-r--r--test/CodeGen/X86/2009-04-29-RegAllocAssert.ll12
-rw-r--r--test/CodeGen/X86/2009-04-scale.ll10
-rw-r--r--test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll2
-rw-r--r--test/CodeGen/X86/2009-05-11-tailmerge-crash.ll2
-rw-r--r--test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll2
-rw-r--r--test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll2
-rw-r--r--test/CodeGen/X86/2009-05-30-ISelBug.ll8
-rw-r--r--test/CodeGen/X86/2009-06-02-RewriterBug.ll52
-rw-r--r--test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll3
-rw-r--r--test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll4
-rw-r--r--test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll9
-rw-r--r--test/CodeGen/X86/2009-06-05-VZextByteShort.ll8
-rw-r--r--test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll10
-rw-r--r--test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll2
-rw-r--r--test/CodeGen/X86/2009-07-15-CoalescerBug.ll2
-rw-r--r--test/CodeGen/X86/2009-07-20-CoalescerBug.ll10
-rw-r--r--test/CodeGen/X86/2009-07-20-DAGCombineBug.ll2
-rw-r--r--test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll12
-rw-r--r--test/CodeGen/X86/2009-08-06-branchfolder-crash.ll8
-rw-r--r--test/CodeGen/X86/2009-08-06-inlineasm.ll12
-rw-r--r--test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll14
-rw-r--r--test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll2
-rw-r--r--test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll8
-rw-r--r--test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll4
-rw-r--r--test/CodeGen/X86/2009-09-10-SpillComments.ll54
-rw-r--r--test/CodeGen/X86/2009-09-16-CoalescerBug.ll2
-rw-r--r--test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll8
-rw-r--r--test/CodeGen/X86/2009-09-22-CoalescerBug.ll2
-rw-r--r--test/CodeGen/X86/2009-10-16-Scope.ll20
-rw-r--r--test/CodeGen/X86/2009-10-19-EmergencySpill.ll20
-rw-r--r--test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll13
-rw-r--r--test/CodeGen/X86/2009-10-25-RewriterBug.ll20
-rw-r--r--test/CodeGen/X86/2009-11-16-MachineLICM.ll16
-rw-r--r--test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll4
-rw-r--r--test/CodeGen/X86/2009-11-17-UpdateTerminator.ll4
-rw-r--r--test/CodeGen/X86/2009-11-25-ImpDefBug.ll2
-rw-r--r--test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll8
-rw-r--r--test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll14
-rw-r--r--test/CodeGen/X86/20090313-signext.ll2
-rw-r--r--test/CodeGen/X86/2010-01-08-Atomic64Bug.ll3
-rw-r--r--test/CodeGen/X86/2010-01-13-OptExtBug.ll22
-rw-r--r--test/CodeGen/X86/2010-01-15-SelectionDAGCycle.ll6
-rw-r--r--test/CodeGen/X86/2010-01-18-DbgValue.ll48
-rw-r--r--test/CodeGen/X86/2010-01-19-OptExtBug.ll2
-rw-r--r--test/CodeGen/X86/2010-02-01-DbgValueCrash.ll28
-rw-r--r--test/CodeGen/X86/2010-02-04-SchedulerBug.ll12
-rw-r--r--test/CodeGen/X86/2010-02-11-NonTemporal.ll4
-rw-r--r--test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll2
-rw-r--r--test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll40
-rw-r--r--test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll8
-rw-r--r--test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll2
-rw-r--r--test/CodeGen/X86/2010-03-17-ISelBug.ll16
-rw-r--r--test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll2
-rw-r--r--test/CodeGen/X86/2010-04-08-CoalescerBug.ll6
-rw-r--r--test/CodeGen/X86/2010-04-13-AnalyzeBranchCrash.ll2
-rw-r--r--test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll100
-rw-r--r--test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll20
-rw-r--r--test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll4
-rw-r--r--test/CodeGen/X86/2010-05-07-ldconvert.ll4
-rw-r--r--test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll2
-rw-r--r--test/CodeGen/X86/2010-05-16-nosseconversion.ll2
-rw-r--r--test/CodeGen/X86/2010-05-25-DotDebugLoc.ll139
-rw-r--r--test/CodeGen/X86/2010-05-26-DotDebugLoc.ll74
-rw-r--r--test/CodeGen/X86/2010-05-26-FP_TO_INT-crash.ll2
-rw-r--r--test/CodeGen/X86/2010-05-28-Crash.ll42
-rw-r--r--test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll66
-rw-r--r--test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll2
-rw-r--r--test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll8
-rw-r--r--test/CodeGen/X86/2010-06-24-g-constraint-crash.ll2
-rw-r--r--test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll8
-rw-r--r--test/CodeGen/X86/2010-06-25-asm-RA-crash.ll4
-rw-r--r--test/CodeGen/X86/2010-06-28-matched-g-constraint.ll2
-rw-r--r--test/CodeGen/X86/2010-07-02-UnfoldBug.ll2
-rw-r--r--test/CodeGen/X86/2010-07-06-DbgCrash.ll30
-rw-r--r--test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll2
-rw-r--r--test/CodeGen/X86/2010-07-15-Crash.ll4
-rw-r--r--test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll6
-rw-r--r--test/CodeGen/X86/2010-08-04-StackVariable.ll124
-rw-r--r--test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll2
-rw-r--r--test/CodeGen/X86/2010-09-16-EmptyFilename.ll32
-rw-r--r--test/CodeGen/X86/2010-09-16-asmcrash.ll2
-rw-r--r--test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll8
-rw-r--r--test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll4
-rw-r--r--test/CodeGen/X86/2010-10-08-cmpxchg8b.ll3
-rw-r--r--test/CodeGen/X86/2010-11-02-DbgParameter.ll34
-rw-r--r--test/CodeGen/X86/2010-11-09-MOVLPS.ll24
-rw-r--r--test/CodeGen/X86/2010-11-18-SelectOfExtload.ll4
-rw-r--r--test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll72
-rw-r--r--test/CodeGen/X86/2011-02-12-shuffle.ll2
-rw-r--r--test/CodeGen/X86/2011-02-21-VirtRegRewriter-KillSubReg.ll4
-rw-r--r--test/CodeGen/X86/2011-02-23-UnfoldBug.ll2
-rw-r--r--test/CodeGen/X86/2011-03-02-DAGCombiner.ll20
-rw-r--r--test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll4
-rw-r--r--test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll8
-rw-r--r--test/CodeGen/X86/2011-05-09-loaduse.ll2
-rw-r--r--test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll4
-rw-r--r--test/CodeGen/X86/2011-05-27-CrossClassCoalescing.ll6
-rw-r--r--test/CodeGen/X86/2011-06-01-fildll.ll2
-rw-r--r--test/CodeGen/X86/2011-06-03-x87chain.ll8
-rw-r--r--test/CodeGen/X86/2011-06-12-FastAllocSpill.ll10
-rw-r--r--test/CodeGen/X86/2011-06-19-QuicksortCoalescerBug.ll2
-rw-r--r--test/CodeGen/X86/2011-07-13-BadFrameIndexDisplacement.ll4
-rw-r--r--test/CodeGen/X86/2011-09-14-valcoalesce.ll4
-rw-r--r--test/CodeGen/X86/2011-09-21-setcc-bug.ll16
-rw-r--r--test/CodeGen/X86/2011-10-11-srl.ll2
-rw-r--r--test/CodeGen/X86/2011-10-12-MachineCSE.ll92
-rw-r--r--test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll10
-rw-r--r--test/CodeGen/X86/2011-10-19-LegelizeLoad.ll6
-rw-r--r--test/CodeGen/X86/2011-10-19-widen_vselect.ll6
-rw-r--r--test/CodeGen/X86/2011-10-27-tstore.ll2
-rw-r--r--test/CodeGen/X86/2011-11-22-AVX2-Domains.ll18
-rw-r--r--test/CodeGen/X86/2011-11-30-or.ll14
-rw-r--r--test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll3
-rw-r--r--test/CodeGen/X86/2011-12-08-AVXISelBugs.ll8
-rw-r--r--test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll2
-rw-r--r--test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll10
-rw-r--r--test/CodeGen/X86/2012-01-11-split-cv.ll2
-rw-r--r--test/CodeGen/X86/2012-01-12-extract-sv.ll21
-rw-r--r--test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll5
-rw-r--r--test/CodeGen/X86/2012-02-12-dagco.ll4
-rw-r--r--test/CodeGen/X86/2012-02-29-CoalescerBug.ll4
-rw-r--r--test/CodeGen/X86/2012-03-26-PostRALICMBug.ll16
-rw-r--r--test/CodeGen/X86/2012-04-26-sdglue.ll10
-rw-r--r--test/CodeGen/X86/2012-07-10-extload64.ll8
-rw-r--r--test/CodeGen/X86/2012-07-15-broadcastfold.ll3
-rw-r--r--test/CodeGen/X86/2012-08-17-legalizer-crash.ll4
-rw-r--r--test/CodeGen/X86/2012-09-28-CGPBug.ll12
-rw-r--r--test/CodeGen/X86/2012-1-10-buildvector.ll26
-rw-r--r--test/CodeGen/X86/2012-10-02-DAGCycle.ll14
-rw-r--r--test/CodeGen/X86/2012-10-03-DAGCycle.ll12
-rw-r--r--test/CodeGen/X86/2012-10-18-crash-dagco.ll20
-rw-r--r--test/CodeGen/X86/2012-11-28-merge-store-alias.ll26
-rw-r--r--test/CodeGen/X86/2012-11-30-handlemove-dbg.ll28
-rw-r--r--test/CodeGen/X86/2012-11-30-misched-dbg.ll64
-rw-r--r--test/CodeGen/X86/2012-11-30-regpres-dbg.ll22
-rw-r--r--test/CodeGen/X86/2012-12-06-python27-miscompile.ll6
-rw-r--r--test/CodeGen/X86/2012-12-1-merge-multiple.ll16
-rw-r--r--test/CodeGen/X86/2012-12-19-NoImplicitFloat.ll6
-rw-r--r--test/CodeGen/X86/2013-02-12-ShuffleToZext.ll14
-rw-r--r--test/CodeGen/X86/2013-03-13-VEX-DestReg.ll4
-rw-r--r--test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll6
-rw-r--r--test/CodeGen/X86/2014-08-29-CompactUnwind.ll6
-rw-r--r--test/CodeGen/X86/Atomics-64.ll200
-rw-r--r--test/CodeGen/X86/GC/alloc_loop.ll4
-rw-r--r--test/CodeGen/X86/GC/argpromotion.ll2
-rw-r--r--test/CodeGen/X86/GC/badreadproto.ll2
-rw-r--r--test/CodeGen/X86/GC/badwriteproto.ll6
-rw-r--r--test/CodeGen/X86/GC/dynamic-frame-size.ll28
-rw-r--r--test/CodeGen/X86/GC/inline.ll4
-rw-r--r--test/CodeGen/X86/GC/inline2.ll4
-rw-r--r--test/CodeGen/X86/MachineBranchProb.ll6
-rw-r--r--test/CodeGen/X86/MachineSink-DbgValue.ll48
-rw-r--r--test/CodeGen/X86/MachineSink-eflags.ll28
-rw-r--r--test/CodeGen/X86/MergeConsecutiveStores.ll284
-rw-r--r--test/CodeGen/X86/StackColoring-dbg.ll16
-rw-r--r--test/CodeGen/X86/StackColoring.ll4
-rw-r--r--test/CodeGen/X86/SwitchLowering.ll6
-rw-r--r--test/CodeGen/X86/SwizzleShuff.ll35
-rw-r--r--test/CodeGen/X86/abi-isel.ll436
-rw-r--r--test/CodeGen/X86/add-of-carry.ll27
-rw-r--r--test/CodeGen/X86/add32ri8.ll10
-rw-r--r--test/CodeGen/X86/addr-mode-matcher.ll10
-rw-r--r--test/CodeGen/X86/address-type-promotion-constantexpr.ll2
-rw-r--r--test/CodeGen/X86/aliases.ll10
-rw-r--r--test/CodeGen/X86/aligned-variadic.ll8
-rw-r--r--test/CodeGen/X86/alignment-2.ll2
-rw-r--r--test/CodeGen/X86/and-load-fold.ll2
-rw-r--r--test/CodeGen/X86/and-or-fold.ll2
-rw-r--r--test/CodeGen/X86/and-su.ll2
-rw-r--r--test/CodeGen/X86/andimm8.ll12
-rw-r--r--test/CodeGen/X86/anyregcc-crash.ll2
-rw-r--r--test/CodeGen/X86/anyregcc.ll16
-rw-r--r--test/CodeGen/X86/asm-global-imm.ll2
-rw-r--r--test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll28
-rw-r--r--test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll42
-rw-r--r--test/CodeGen/X86/atom-call-reg-indirect.ll8
-rw-r--r--test/CodeGen/X86/atom-cmpb.ll8
-rw-r--r--test/CodeGen/X86/atom-fixup-lea1.ll4
-rw-r--r--test/CodeGen/X86/atom-fixup-lea2.ll24
-rw-r--r--test/CodeGen/X86/atom-fixup-lea3.ll10
-rw-r--r--test/CodeGen/X86/atom-fixup-lea4.ll4
-rw-r--r--test/CodeGen/X86/atom-lea-addw-bug.ll6
-rw-r--r--test/CodeGen/X86/atom-lea-sp.ll2
-rw-r--r--test/CodeGen/X86/atom-sched.ll8
-rw-r--r--test/CodeGen/X86/atomic-dagsched.ll30
-rw-r--r--test/CodeGen/X86/atomic-load-store-wide.ll8
-rw-r--r--test/CodeGen/X86/atomic-load-store.ll2
-rw-r--r--test/CodeGen/X86/atomic-minmax-i6432.ll12
-rw-r--r--test/CodeGen/X86/atomic-or.ll10
-rw-r--r--test/CodeGen/X86/atomic-pointer.ll2
-rw-r--r--test/CodeGen/X86/atomic128.ll4
-rw-r--r--test/CodeGen/X86/atomic32.ll275
-rw-r--r--test/CodeGen/X86/atomic64.ll2
-rw-r--r--test/CodeGen/X86/atomic6432.ll13
-rw-r--r--test/CodeGen/X86/atomic_mi.ll60
-rw-r--r--test/CodeGen/X86/atomic_op.ll2
-rw-r--r--test/CodeGen/X86/avoid-loop-align-2.ll6
-rw-r--r--test/CodeGen/X86/avoid-loop-align.ll8
-rw-r--r--test/CodeGen/X86/avoid_complex_am.ll12
-rw-r--r--test/CodeGen/X86/avx-arith.ll6
-rw-r--r--test/CodeGen/X86/avx-basic.ll15
-rw-r--r--test/CodeGen/X86/avx-bitcast.ll11
-rw-r--r--test/CodeGen/X86/avx-cast.ll75
-rw-r--r--test/CodeGen/X86/avx-cvt-2.ll52
-rw-r--r--test/CodeGen/X86/avx-cvt.ll109
-rw-r--r--test/CodeGen/X86/avx-insertelt.ll83
-rw-r--r--test/CodeGen/X86/avx-intel-ocl.ll12
-rw-r--r--test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll125
-rw-r--r--test/CodeGen/X86/avx-intrinsics-x86.ll1975
-rw-r--r--test/CodeGen/X86/avx-load-store.ll22
-rw-r--r--test/CodeGen/X86/avx-logic.ll114
-rw-r--r--test/CodeGen/X86/avx-shift.ll221
-rw-r--r--test/CodeGen/X86/avx-splat.ll10
-rwxr-xr-xtest/CodeGen/X86/avx-trunc.ll6
-rw-r--r--test/CodeGen/X86/avx-unpack.ll16
-rw-r--r--test/CodeGen/X86/avx-varargs-x86_64.ll4
-rw-r--r--test/CodeGen/X86/avx-vbroadcast.ll30
-rw-r--r--test/CodeGen/X86/avx-vextractf128.ll27
-rw-r--r--test/CodeGen/X86/avx-vinsertf128.ll50
-rw-r--r--test/CodeGen/X86/avx-vperm2x128.ll99
-rw-r--r--test/CodeGen/X86/avx-vzeroupper.ll6
-rw-r--r--test/CodeGen/X86/avx.ll20
-rw-r--r--test/CodeGen/X86/avx1-logical-load-folding.ll8
-rw-r--r--test/CodeGen/X86/avx1-stack-reload-folding.ll83
-rw-r--r--test/CodeGen/X86/avx2-arith.ll43
-rwxr-xr-xtest/CodeGen/X86/avx2-conversions.ll12
-rw-r--r--test/CodeGen/X86/avx2-gather.ll27
-rw-r--r--test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll54
-rw-r--r--test/CodeGen/X86/avx2-intrinsics-x86.ll57
-rw-r--r--test/CodeGen/X86/avx2-pmovx-256-old-shuffle.ll29
-rw-r--r--test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll24
-rw-r--r--test/CodeGen/X86/avx2-shift.ll20
-rw-r--r--test/CodeGen/X86/avx2-vbroadcast.ll52
-rw-r--r--test/CodeGen/X86/avx2-vector-shifts.ll91
-rw-r--r--test/CodeGen/X86/avx512-arith.ll28
-rw-r--r--test/CodeGen/X86/avx512-bugfix-23634.ll35
-rw-r--r--test/CodeGen/X86/avx512-build-vector.ll2
-rw-r--r--test/CodeGen/X86/avx512-calling-conv.ll154
-rw-r--r--test/CodeGen/X86/avx512-cvt.ll29
-rw-r--r--test/CodeGen/X86/avx512-fma-intrinsics.ll326
-rw-r--r--test/CodeGen/X86/avx512-gather-scatter-intrin.ll8
-rwxr-xr-xtest/CodeGen/X86/avx512-i1test.ll6
-rw-r--r--test/CodeGen/X86/avx512-insert-extract.ll52
-rw-r--r--test/CodeGen/X86/avx512-intel-ocl.ll105
-rw-r--r--test/CodeGen/X86/avx512-intrinsics.ll1479
-rw-r--r--test/CodeGen/X86/avx512-logic.ll4
-rwxr-xr-xtest/CodeGen/X86/avx512-mask-bugfix.ll57
-rw-r--r--test/CodeGen/X86/avx512-mask-op.ll363
-rw-r--r--test/CodeGen/X86/avx512-mov.ll60
-rw-r--r--test/CodeGen/X86/avx512-round.ll106
-rw-r--r--test/CodeGen/X86/avx512-scalar.ll92
-rw-r--r--test/CodeGen/X86/avx512-select.ll21
-rw-r--r--test/CodeGen/X86/avx512-shift.ll37
-rw-r--r--test/CodeGen/X86/avx512-trunc-ext.ll921
-rw-r--r--test/CodeGen/X86/avx512-vbroadcast.ll40
-rw-r--r--test/CodeGen/X86/avx512-vec-cmp.ll593
-rw-r--r--test/CodeGen/X86/avx512bw-arith.ll8
-rw-r--r--test/CodeGen/X86/avx512bw-intrinsics.ll437
-rw-r--r--test/CodeGen/X86/avx512bw-mask-op.ll4
-rw-r--r--test/CodeGen/X86/avx512bw-mov.ll12
-rw-r--r--test/CodeGen/X86/avx512bw-vec-cmp.ll12
-rw-r--r--test/CodeGen/X86/avx512bwvl-arith.ll16
-rw-r--r--test/CodeGen/X86/avx512bwvl-intrinsics.ll1700
-rw-r--r--test/CodeGen/X86/avx512bwvl-mov.ll24
-rw-r--r--test/CodeGen/X86/avx512bwvl-vec-cmp.ll24
-rw-r--r--test/CodeGen/X86/avx512dq-mask-op.ll2
-rw-r--r--test/CodeGen/X86/avx512dqvl-intrinsics.ll1155
-rw-r--r--test/CodeGen/X86/avx512er-intrinsics.ll30
-rw-r--r--test/CodeGen/X86/avx512vl-arith.ll40
-rw-r--r--test/CodeGen/X86/avx512vl-intrinsics.ll1695
-rw-r--r--test/CodeGen/X86/avx512vl-mov.ll96
-rw-r--r--test/CodeGen/X86/avx512vl-vec-cmp.ll40
-rw-r--r--test/CodeGen/X86/barrier.ll3
-rw-r--r--test/CodeGen/X86/bitcast-mmx.ll77
-rw-r--r--test/CodeGen/X86/block-placement.ll236
-rw-r--r--test/CodeGen/X86/bmi.ll54
-rw-r--r--test/CodeGen/X86/bool-zext.ll4
-rw-r--r--test/CodeGen/X86/branchfolding-landingpads.ll45
-rw-r--r--test/CodeGen/X86/brcond.ll4
-rw-r--r--test/CodeGen/X86/break-anti-dependencies.ll4
-rw-r--r--test/CodeGen/X86/break-false-dep.ll34
-rw-r--r--test/CodeGen/X86/bswap-vector.ll366
-rw-r--r--test/CodeGen/X86/bswap.ll6
-rw-r--r--test/CodeGen/X86/byval-align.ll16
-rw-r--r--test/CodeGen/X86/byval.ll4
-rw-r--r--test/CodeGen/X86/byval2.ll6
-rw-r--r--test/CodeGen/X86/byval3.ll12
-rw-r--r--test/CodeGen/X86/byval4.ll12
-rw-r--r--test/CodeGen/X86/byval5.ll12
-rw-r--r--test/CodeGen/X86/byval6.ll4
-rw-r--r--test/CodeGen/X86/byval7.ll2
-rw-r--r--test/CodeGen/X86/cache-intrinsic.ll8
-rw-r--r--test/CodeGen/X86/call-push.ll4
-rw-r--r--test/CodeGen/X86/cas.ll24
-rw-r--r--test/CodeGen/X86/catch.ll2
-rw-r--r--test/CodeGen/X86/cfi_enforcing.ll34
-rw-r--r--test/CodeGen/X86/cfi_invoke.ll35
-rw-r--r--test/CodeGen/X86/cfi_non_default_function.ll27
-rw-r--r--test/CodeGen/X86/cfi_simple_indirect_call.ll43
-rw-r--r--test/CodeGen/X86/cfstring.ll4
-rw-r--r--test/CodeGen/X86/chain_order.ll14
-rw-r--r--test/CodeGen/X86/change-compare-stride-1.ll38
-rw-r--r--test/CodeGen/X86/clobber-fi0.ll6
-rw-r--r--test/CodeGen/X86/cmov-double.ll52
-rw-r--r--test/CodeGen/X86/cmov-into-branch.ll8
-rw-r--r--test/CodeGen/X86/cmov.ll18
-rw-r--r--test/CodeGen/X86/cmovcmov.ll226
-rw-r--r--test/CodeGen/X86/cmp-fast-isel.ll45
-rw-r--r--test/CodeGen/X86/cmp.ll12
-rw-r--r--test/CodeGen/X86/cmpxchg-clobber-flags.ll2
-rw-r--r--test/CodeGen/X86/cmpxchg-i1.ll2
-rw-r--r--test/CodeGen/X86/cmpxchg-i128-i1.ll2
-rw-r--r--test/CodeGen/X86/cmpxchg16b.ll3
-rw-r--r--test/CodeGen/X86/coalesce-esp.ll6
-rw-r--r--test/CodeGen/X86/coalesce-implicitdef.ll12
-rw-r--r--test/CodeGen/X86/coalescer-commute1.ll6
-rw-r--r--test/CodeGen/X86/coalescer-commute4.ll8
-rw-r--r--test/CodeGen/X86/coalescer-cross.ll12
-rw-r--r--test/CodeGen/X86/coalescer-dce2.ll16
-rw-r--r--test/CodeGen/X86/coalescer-identity.ll6
-rw-r--r--test/CodeGen/X86/coalescer-remat.ll2
-rw-r--r--test/CodeGen/X86/coalescer-subreg.ll29
-rw-r--r--test/CodeGen/X86/code_placement.ll80
-rw-r--r--test/CodeGen/X86/code_placement_outline_optional_branches.ll77
-rw-r--r--test/CodeGen/X86/codegen-prepare-addrmode-sext.ll162
-rw-r--r--test/CodeGen/X86/codegen-prepare-cast.ll8
-rw-r--r--test/CodeGen/X86/codegen-prepare-crash.ll2
-rw-r--r--test/CodeGen/X86/codegen-prepare-extload.ll81
-rw-r--r--test/CodeGen/X86/codegen-prepare.ll14
-rw-r--r--test/CodeGen/X86/codemodel.ll12
-rw-r--r--test/CodeGen/X86/coff-comdat.ll2
-rw-r--r--test/CodeGen/X86/combine-and.ll148
-rw-r--r--test/CodeGen/X86/combine-or.ll97
-rw-r--r--test/CodeGen/X86/combiner-aa-0.ll8
-rw-r--r--test/CodeGen/X86/combiner-aa-1.ll6
-rw-r--r--test/CodeGen/X86/commute-blend-avx2.ll20
-rw-r--r--test/CodeGen/X86/commute-blend-sse41.ll10
-rw-r--r--test/CodeGen/X86/commute-clmul.ll60
-rw-r--r--test/CodeGen/X86/commute-fcmp.ll340
-rw-r--r--test/CodeGen/X86/commute-intrinsic.ll2
-rw-r--r--test/CodeGen/X86/commute-xop.ll184
-rw-r--r--test/CodeGen/X86/compact-unwind.ll18
-rw-r--r--test/CodeGen/X86/complex-asm.ll8
-rw-r--r--test/CodeGen/X86/complex-fca.ll26
-rw-r--r--test/CodeGen/X86/computeKnownBits_urem.ll2
-rw-r--r--test/CodeGen/X86/const-base-addr.ll12
-rw-r--r--test/CodeGen/X86/constant-combines.ll6
-rw-r--r--test/CodeGen/X86/constant-hoisting-optnone.ll21
-rw-r--r--test/CodeGen/X86/constant-hoisting-shift-immediate.ll4
-rw-r--r--test/CodeGen/X86/constructor.ll8
-rw-r--r--test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll4
-rw-r--r--test/CodeGen/X86/copy-propagation.ll44
-rw-r--r--test/CodeGen/X86/crash-O0.ll4
-rw-r--r--test/CodeGen/X86/crash-nosse.ll2
-rw-r--r--test/CodeGen/X86/crash.ll58
-rw-r--r--test/CodeGen/X86/critical-anti-dep-breaker.ll4
-rw-r--r--test/CodeGen/X86/critical-edge-split-2.ll2
-rw-r--r--test/CodeGen/X86/cse-add-with-overflow.ll4
-rw-r--r--test/CodeGen/X86/cttz-ctlz.ll422
-rw-r--r--test/CodeGen/X86/cvt16.ll8
-rw-r--r--test/CodeGen/X86/dag-optnone.ll2
-rw-r--r--test/CodeGen/X86/dagcombine-and-setcc.ll2
-rw-r--r--test/CodeGen/X86/dagcombine-buildvector.ll2
-rw-r--r--test/CodeGen/X86/dagcombine-cse.ll8
-rw-r--r--test/CodeGen/X86/darwin-quote.ll2
-rw-r--r--test/CodeGen/X86/darwin-stub.ll2
-rw-r--r--test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll200
-rw-r--r--test/CodeGen/X86/dbg-changes-codegen.ll20
-rw-r--r--test/CodeGen/X86/dbg-combine.ll113
-rw-r--r--test/CodeGen/X86/discontiguous-loops.ll6
-rw-r--r--test/CodeGen/X86/div8.ll6
-rw-r--r--test/CodeGen/X86/dllexport-x86_64.ll5
-rw-r--r--test/CodeGen/X86/dllexport.ll5
-rw-r--r--test/CodeGen/X86/dllimport-x86_64.ll10
-rw-r--r--test/CodeGen/X86/dllimport.ll10
-rw-r--r--test/CodeGen/X86/dollar-name.ll4
-rw-r--r--test/CodeGen/X86/dont-trunc-store-double-to-float.ll2
-rw-r--r--test/CodeGen/X86/dwarf-comp-dir.ll10
-rw-r--r--test/CodeGen/X86/dwarf-eh-prepare.ll158
-rw-r--r--test/CodeGen/X86/dynamic-alloca-lifetime.ll4
-rw-r--r--test/CodeGen/X86/dynamic-allocas-VLAs.ll22
-rw-r--r--test/CodeGen/X86/early-ifcvt.ll8
-rw-r--r--test/CodeGen/X86/eh-label.ll24
-rw-r--r--test/CodeGen/X86/emit-big-cst.ll2
-rw-r--r--test/CodeGen/X86/empty-functions.ll2
-rw-r--r--test/CodeGen/X86/exception-label.ll23
-rw-r--r--test/CodeGen/X86/exedeps-movq.ll68
-rw-r--r--test/CodeGen/X86/expand-opaque-const.ll6
-rw-r--r--test/CodeGen/X86/extend.ll4
-rw-r--r--test/CodeGen/X86/extern_weak.ll2
-rw-r--r--test/CodeGen/X86/extract-extract.ll6
-rw-r--r--test/CodeGen/X86/extract-store.ll30
-rw-r--r--test/CodeGen/X86/extractelement-index.ll51
-rw-r--r--test/CodeGen/X86/extractelement-legalization-store-ordering.ll57
-rw-r--r--test/CodeGen/X86/extractelement-load.ll8
-rw-r--r--test/CodeGen/X86/extractps.ll4
-rw-r--r--test/CodeGen/X86/f16c-intrinsics.ll71
-rw-r--r--test/CodeGen/X86/fast-isel-args-fail.ll2
-rw-r--r--test/CodeGen/X86/fast-isel-args-fail2.ll2
-rw-r--r--test/CodeGen/X86/fast-isel-args.ll2
-rw-r--r--test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll6
-rw-r--r--test/CodeGen/X86/fast-isel-branch_weights.ll2
-rw-r--r--test/CodeGen/X86/fast-isel-call-bool.ll2
-rw-r--r--test/CodeGen/X86/fast-isel-call.ll2
-rw-r--r--test/CodeGen/X86/fast-isel-cmp-branch2.ll2
-rw-r--r--test/CodeGen/X86/fast-isel-cmp-branch3.ll2
-rw-r--r--test/CodeGen/X86/fast-isel-cmp.ll2
-rw-r--r--test/CodeGen/X86/fast-isel-constrain-store-indexreg.ll25
-rw-r--r--test/CodeGen/X86/fast-isel-divrem-x86-64.ll2
-rw-r--r--test/CodeGen/X86/fast-isel-divrem.ll4
-rw-r--r--test/CodeGen/X86/fast-isel-double-half-convertion.ll23
-rw-r--r--test/CodeGen/X86/fast-isel-extract.ll2
-rw-r--r--test/CodeGen/X86/fast-isel-float-half-convertion.ll28
-rw-r--r--test/CodeGen/X86/fast-isel-fneg.ll2
-rw-r--r--test/CodeGen/X86/fast-isel-fold-mem.ll4
-rw-r--r--test/CodeGen/X86/fast-isel-fptrunc-fpext.ll65
-rw-r--r--test/CodeGen/X86/fast-isel-gep.ll34
-rw-r--r--test/CodeGen/X86/fast-isel-gv.ll6
-rw-r--r--test/CodeGen/X86/fast-isel-i1.ll13
-rw-r--r--test/CodeGen/X86/fast-isel-int-float-conversion.ll45
-rw-r--r--test/CodeGen/X86/fast-isel-mem.ll6
-rw-r--r--test/CodeGen/X86/fast-isel-movsbl-indexreg.ll20
-rw-r--r--test/CodeGen/X86/fast-isel-ret-ext.ll4
-rw-r--r--test/CodeGen/X86/fast-isel-select-cmov.ll2
-rw-r--r--test/CodeGen/X86/fast-isel-select-cmov2.ll26
-rw-r--r--test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll4
-rw-r--r--test/CodeGen/X86/fast-isel-select-sse.ll100
-rw-r--r--test/CodeGen/X86/fast-isel-sext.ll9
-rw-r--r--test/CodeGen/X86/fast-isel-sse12-fptoint.ll4
-rw-r--r--test/CodeGen/X86/fast-isel-store.ll4
-rw-r--r--test/CodeGen/X86/fast-isel-tailcall.ll2
-rw-r--r--test/CodeGen/X86/fast-isel-tls.ll4
-rw-r--r--test/CodeGen/X86/fast-isel-trunc-kill-subreg.ll40
-rw-r--r--test/CodeGen/X86/fast-isel-vecload.ll185
-rw-r--r--test/CodeGen/X86/fast-isel-x32.ll4
-rw-r--r--test/CodeGen/X86/fast-isel-x86-64.ll16
-rw-r--r--test/CodeGen/X86/fast-isel-x86.ll4
-rw-r--r--test/CodeGen/X86/fast-isel.ll24
-rw-r--r--test/CodeGen/X86/fastcall-correct-mangling.ll2
-rw-r--r--test/CodeGen/X86/fastcc-byval.ll4
-rw-r--r--test/CodeGen/X86/fastcc-sret.ll10
-rw-r--r--test/CodeGen/X86/fastcc.ll8
-rw-r--r--test/CodeGen/X86/fastisel-gep-promote-before-add.ll14
-rw-r--r--test/CodeGen/X86/fastmath-float-half-conversion.ll52
-rw-r--r--test/CodeGen/X86/fcmove.ll15
-rw-r--r--test/CodeGen/X86/fdiv-combine.ll31
-rw-r--r--test/CodeGen/X86/float-conv-elim.ll32
-rw-r--r--test/CodeGen/X86/floor-soft-float.ll4
-rw-r--r--test/CodeGen/X86/fltused.ll2
-rw-r--r--test/CodeGen/X86/fltused_function_pointer.ll2
-rw-r--r--test/CodeGen/X86/fma-do-not-commute.ll4
-rwxr-xr-xtest/CodeGen/X86/fma3-intrinsics.ll32
-rw-r--r--test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll24
-rw-r--r--test/CodeGen/X86/fma_patterns.ll4
-rw-r--r--test/CodeGen/X86/fmul-combines.ll34
-rw-r--r--test/CodeGen/X86/fmul-zero.ll2
-rw-r--r--test/CodeGen/X86/fnabs.ll2
-rw-r--r--test/CodeGen/X86/fold-add.ll6
-rw-r--r--test/CodeGen/X86/fold-and-shift.ll20
-rw-r--r--test/CodeGen/X86/fold-call-2.ll2
-rw-r--r--test/CodeGen/X86/fold-call-3.ll18
-rw-r--r--test/CodeGen/X86/fold-call-oper.ll8
-rw-r--r--test/CodeGen/X86/fold-call.ll4
-rw-r--r--test/CodeGen/X86/fold-load-unops.ll79
-rw-r--r--test/CodeGen/X86/fold-load-vec.ll26
-rw-r--r--test/CodeGen/X86/fold-load.ll8
-rw-r--r--test/CodeGen/X86/fold-mul-lohi.ll8
-rw-r--r--test/CodeGen/X86/fold-pcmpeqd-2.ll4
-rw-r--r--test/CodeGen/X86/fold-sext-trunc.ll4
-rw-r--r--test/CodeGen/X86/fold-tied-op.ll20
-rw-r--r--test/CodeGen/X86/fold-vector-bv-crash.ll17
-rw-r--r--test/CodeGen/X86/fold-vector-shuffle-crash.ll386
-rw-r--r--test/CodeGen/X86/fold-vector-trunc-sitofp.ll13
-rw-r--r--test/CodeGen/X86/fold-vex.ll2
-rw-r--r--test/CodeGen/X86/fold-zext-trunc.ll4
-rw-r--r--test/CodeGen/X86/force-align-stack-alloca.ll2
-rw-r--r--test/CodeGen/X86/fp-double-rounding.ll31
-rw-r--r--test/CodeGen/X86/fp-fast.ll98
-rw-r--r--test/CodeGen/X86/fp-load-trunc.ll8
-rw-r--r--test/CodeGen/X86/fp-stack-O0-crash.ll8
-rw-r--r--test/CodeGen/X86/fp-stack-O0.ll2
-rw-r--r--test/CodeGen/X86/fp-stack-compare-cmov.ll2
-rw-r--r--test/CodeGen/X86/fp-stack-compare.ll2
-rw-r--r--test/CodeGen/X86/fp-stack-ret-store.ll4
-rw-r--r--test/CodeGen/X86/fp-stack-ret.ll2
-rw-r--r--test/CodeGen/X86/fp-stack.ll6
-rw-r--r--test/CodeGen/X86/fp-trunc.ll8
-rw-r--r--test/CodeGen/X86/fp2sint.ll4
-rw-r--r--test/CodeGen/X86/fp_load_cast_fold.ll6
-rw-r--r--test/CodeGen/X86/fp_load_fold.ll12
-rw-r--r--test/CodeGen/X86/fpstack-debuginstr-kill.ll42
-rw-r--r--test/CodeGen/X86/frameaddr.ll23
-rw-r--r--test/CodeGen/X86/frameallocate.ll39
-rw-r--r--test/CodeGen/X86/frameescape.ll128
-rw-r--r--test/CodeGen/X86/full-lsr.ll20
-rw-r--r--test/CodeGen/X86/function-subtarget-features-2.ll26
-rw-r--r--test/CodeGen/X86/function-subtarget-features.ll81
-rw-r--r--test/CodeGen/X86/ga-offset.ll2
-rw-r--r--test/CodeGen/X86/gather-addresses.ll24
-rw-r--r--test/CodeGen/X86/gcc_except_table.ll3
-rw-r--r--test/CodeGen/X86/gcc_except_table_functions.ll1
-rw-r--r--test/CodeGen/X86/getelementptr.ll20
-rw-r--r--test/CodeGen/X86/ghc-cc.ll8
-rw-r--r--test/CodeGen/X86/ghc-cc64.ll32
-rw-r--r--test/CodeGen/X86/global-sections-comdat.ll46
-rw-r--r--test/CodeGen/X86/global-sections.ll102
-rw-r--r--test/CodeGen/X86/gs-fold.ll6
-rw-r--r--test/CodeGen/X86/h-register-addressing-32.ll28
-rw-r--r--test/CodeGen/X86/h-register-addressing-64.ll28
-rw-r--r--test/CodeGen/X86/h-registers-2.ll2
-rw-r--r--test/CodeGen/X86/h-registers-3.ll2
-rw-r--r--test/CodeGen/X86/haddsub-2.ll4
-rw-r--r--test/CodeGen/X86/haddsub-undef.ll6
-rw-r--r--test/CodeGen/X86/haddsub.ll15
-rw-r--r--test/CodeGen/X86/half.ll216
-rw-r--r--test/CodeGen/X86/hidden-vis-2.ll2
-rw-r--r--test/CodeGen/X86/hidden-vis-3.ll4
-rw-r--r--test/CodeGen/X86/hidden-vis-4.ll2
-rw-r--r--test/CodeGen/X86/hidden-vis-pic.ll4
-rw-r--r--test/CodeGen/X86/hipe-cc.ll12
-rw-r--r--test/CodeGen/X86/hipe-cc64.ll14
-rw-r--r--test/CodeGen/X86/hoist-common.ll2
-rw-r--r--test/CodeGen/X86/hoist-invariant-load.ll6
-rw-r--r--test/CodeGen/X86/huge-stack-offset.ll59
-rw-r--r--test/CodeGen/X86/i128-mul.ll6
-rw-r--r--test/CodeGen/X86/i128-ret.ll2
-rw-r--r--test/CodeGen/X86/i1narrowfail.ll10
-rw-r--r--test/CodeGen/X86/i256-add.ll8
-rw-r--r--test/CodeGen/X86/i2k.ll4
-rw-r--r--test/CodeGen/X86/i486-fence-loop.ll7
-rw-r--r--test/CodeGen/X86/i64-mem-copy.ll83
-rw-r--r--test/CodeGen/X86/illegal-vector-args-return.ll2
-rw-r--r--test/CodeGen/X86/imul.ll110
-rw-r--r--test/CodeGen/X86/imul64-lea.ll25
-rw-r--r--test/CodeGen/X86/inalloca-ctor.ll6
-rw-r--r--test/CodeGen/X86/inalloca-invoke.ll13
-rw-r--r--test/CodeGen/X86/inalloca-stdcall.ll9
-rw-r--r--test/CodeGen/X86/inalloca.ll27
-rw-r--r--test/CodeGen/X86/init-priority.ll51
-rw-r--r--test/CodeGen/X86/inline-asm-duplicated-constraint.ll12
-rw-r--r--test/CodeGen/X86/inline-asm-fpstack.ll8
-rw-r--r--test/CodeGen/X86/inline-asm-out-regs.ll4
-rw-r--r--test/CodeGen/X86/inline-asm-ptr-cast.ll6
-rw-r--r--test/CodeGen/X86/inline-asm-stack-realign.ll2
-rw-r--r--test/CodeGen/X86/inline-asm-stack-realign2.ll2
-rw-r--r--test/CodeGen/X86/inline-asm-stack-realign3.ll2
-rw-r--r--test/CodeGen/X86/inline-asm-tied.ll6
-rw-r--r--test/CodeGen/X86/ins_split_regalloc.ll2
-rw-r--r--test/CodeGen/X86/ins_subreg_coalesce-1.ll2
-rw-r--r--test/CodeGen/X86/ins_subreg_coalesce-3.ll24
-rw-r--r--test/CodeGen/X86/insert-positions.ll2
-rw-r--r--test/CodeGen/X86/insertps-O0-bug.ll52
-rw-r--r--test/CodeGen/X86/invalid-shift-immediate.ll4
-rw-r--r--test/CodeGen/X86/isel-optnone.ll20
-rw-r--r--test/CodeGen/X86/isel-sink.ll4
-rw-r--r--test/CodeGen/X86/isel-sink2.ll8
-rw-r--r--test/CodeGen/X86/isel-sink3.ll8
-rw-r--r--test/CodeGen/X86/isint.ll6
-rw-r--r--test/CodeGen/X86/jump_sign.ll28
-rw-r--r--test/CodeGen/X86/jump_table_alias.ll32
-rw-r--r--test/CodeGen/X86/jump_table_align.ll29
-rw-r--r--test/CodeGen/X86/jump_table_bitcast.ll43
-rw-r--r--test/CodeGen/X86/jump_tables.ll255
-rw-r--r--test/CodeGen/X86/large-code-model-isel.ll2
-rw-r--r--test/CodeGen/X86/large-constants.ll16
-rw-r--r--test/CodeGen/X86/large-gep-chain.ll50554
-rw-r--r--test/CodeGen/X86/large-gep-scale.ll2
-rw-r--r--test/CodeGen/X86/ldzero.ll12
-rw-r--r--test/CodeGen/X86/lea-5.ll8
-rw-r--r--test/CodeGen/X86/lea-recursion.ll30
-rw-r--r--test/CodeGen/X86/leaf-fp-elim.ll2
-rw-r--r--test/CodeGen/X86/legalize-shift-64.ll2
-rw-r--r--test/CodeGen/X86/legalize-sub-zero-2.ll2
-rw-r--r--test/CodeGen/X86/licm-nested.ll14
-rw-r--r--test/CodeGen/X86/licm-regpressure.ll39
-rw-r--r--test/CodeGen/X86/licm-symbol.ll4
-rw-r--r--test/CodeGen/X86/liveness-local-regalloc.ll4
-rw-r--r--test/CodeGen/X86/llc-override-mcpu-mattr.ll21
-rw-r--r--test/CodeGen/X86/load-slice.ll26
-rw-r--r--test/CodeGen/X86/logical-load-fold.ll53
-rw-r--r--test/CodeGen/X86/longlong-deadload.ll2
-rw-r--r--test/CodeGen/X86/loop-hoist.ll2
-rw-r--r--test/CodeGen/X86/loop-strength-reduce-2.ll4
-rw-r--r--test/CodeGen/X86/loop-strength-reduce-3.ll4
-rw-r--r--test/CodeGen/X86/loop-strength-reduce.ll4
-rw-r--r--test/CodeGen/X86/loop-strength-reduce2.ll2
-rw-r--r--test/CodeGen/X86/loop-strength-reduce4.ll32
-rw-r--r--test/CodeGen/X86/loop-strength-reduce7.ll6
-rw-r--r--test/CodeGen/X86/loop-strength-reduce8.ll16
-rw-r--r--test/CodeGen/X86/lower-vec-shift-2.ll130
-rw-r--r--test/CodeGen/X86/lsr-delayed-fold.ll6
-rw-r--r--test/CodeGen/X86/lsr-i386.ll4
-rw-r--r--test/CodeGen/X86/lsr-interesting-step.ll2
-rw-r--r--test/CodeGen/X86/lsr-loop-exit-cond.ll84
-rw-r--r--test/CodeGen/X86/lsr-normalization.ll26
-rw-r--r--test/CodeGen/X86/lsr-quadratic-expand.ll2
-rw-r--r--test/CodeGen/X86/lsr-redundant-addressing.ll22
-rw-r--r--test/CodeGen/X86/lsr-reuse-trunc.ll10
-rw-r--r--test/CodeGen/X86/lsr-reuse.ll224
-rw-r--r--test/CodeGen/X86/lsr-static-addr.ll4
-rw-r--r--test/CodeGen/X86/lsr-wrap.ll2
-rw-r--r--test/CodeGen/X86/lzcnt-tzcnt.ll36
-rw-r--r--test/CodeGen/X86/machine-cse.ll10
-rw-r--r--test/CodeGen/X86/masked-iv-safe.ll96
-rw-r--r--test/CodeGen/X86/masked-iv-unsafe.ll156
-rw-r--r--test/CodeGen/X86/masked_gather_scatter.ll142
-rw-r--r--test/CodeGen/X86/masked_memop.ll54
-rw-r--r--test/CodeGen/X86/mcinst-lowering.ll2
-rw-r--r--test/CodeGen/X86/mem-intrin-base-reg.ll18
-rw-r--r--test/CodeGen/X86/mem-promote-integers.ll70
-rw-r--r--test/CodeGen/X86/memcmp.ll12
-rw-r--r--test/CodeGen/X86/memcpy-2.ll4
-rw-r--r--test/CodeGen/X86/memcpy.ll4
-rw-r--r--test/CodeGen/X86/memset-3.ll2
-rw-r--r--test/CodeGen/X86/memset.ll2
-rw-r--r--test/CodeGen/X86/merge-consecutive-stores-i1.ll15
-rw-r--r--test/CodeGen/X86/merge_store.ll8
-rw-r--r--test/CodeGen/X86/mingw-alloca.ll2
-rw-r--r--test/CodeGen/X86/misaligned-memset.ll4
-rw-r--r--test/CodeGen/X86/misched-aa-colored.ll8
-rw-r--r--test/CodeGen/X86/misched-aa-mmos.ll8
-rw-r--r--test/CodeGen/X86/misched-balance.ll172
-rw-r--r--test/CodeGen/X86/misched-code-difference-with-debug.ll56
-rw-r--r--test/CodeGen/X86/misched-crash.ll6
-rw-r--r--test/CodeGen/X86/misched-fusion.ll22
-rw-r--r--test/CodeGen/X86/misched-matmul.ll158
-rw-r--r--test/CodeGen/X86/misched-matrix.ll112
-rw-r--r--test/CodeGen/X86/misched-new.ll6
-rw-r--r--test/CodeGen/X86/mmx-arg-passing-x86-64.ll54
-rw-r--r--test/CodeGen/X86/mmx-arg-passing.ll45
-rw-r--r--test/CodeGen/X86/mmx-arg-passing2.ll28
-rw-r--r--test/CodeGen/X86/mmx-arith.ll543
-rw-r--r--test/CodeGen/X86/mmx-bitcast-to-i64.ll31
-rw-r--r--test/CodeGen/X86/mmx-bitcast.ll108
-rw-r--r--test/CodeGen/X86/mmx-copy-gprs.ll2
-rw-r--r--test/CodeGen/X86/mmx-emms.ll11
-rw-r--r--test/CodeGen/X86/mmx-fold-load.ll282
-rw-r--r--test/CodeGen/X86/mmx-insert-element.ll9
-rw-r--r--test/CodeGen/X86/mmx-intrinsics.ll (renamed from test/CodeGen/X86/mmx-builtins.ll)9
-rw-r--r--test/CodeGen/X86/mmx-pinsrw.ll17
-rw-r--r--test/CodeGen/X86/mmx-punpckhdq.ll31
-rw-r--r--test/CodeGen/X86/mmx-s2v.ll15
-rw-r--r--test/CodeGen/X86/mmx-shift.ll39
-rw-r--r--test/CodeGen/X86/mmx-shuffle.ll31
-rw-r--r--test/CodeGen/X86/movbe.ll6
-rw-r--r--test/CodeGen/X86/movfs.ll4
-rw-r--r--test/CodeGen/X86/movgs.ll16
-rw-r--r--test/CodeGen/X86/movmsk.ll16
-rw-r--r--test/CodeGen/X86/movtopush.ll268
-rw-r--r--test/CodeGen/X86/ms-inline-asm.ll12
-rw-r--r--test/CodeGen/X86/mul128_sext_loop.ll4
-rw-r--r--test/CodeGen/X86/muloti.ll18
-rw-r--r--test/CodeGen/X86/mult-alt-generic-i686.ll46
-rw-r--r--test/CodeGen/X86/mult-alt-generic-x86_64.ll46
-rw-r--r--test/CodeGen/X86/mult-alt-x86.ll48
-rw-r--r--test/CodeGen/X86/multiple-loop-post-inc.ll78
-rw-r--r--test/CodeGen/X86/mulx32.ll2
-rw-r--r--test/CodeGen/X86/mulx64.ll2
-rw-r--r--test/CodeGen/X86/musttail-fastcall.ll12
-rw-r--r--test/CodeGen/X86/musttail-indirect.ll44
-rw-r--r--test/CodeGen/X86/musttail-thiscall.ll8
-rw-r--r--test/CodeGen/X86/musttail-varargs.ll22
-rw-r--r--test/CodeGen/X86/nancvt.ll106
-rw-r--r--test/CodeGen/X86/narrow-shl-cst.ll23
-rw-r--r--test/CodeGen/X86/narrow-shl-load.ll6
-rw-r--r--test/CodeGen/X86/narrow_op-1.ll8
-rw-r--r--test/CodeGen/X86/negate-add-zero.ll74
-rw-r--r--test/CodeGen/X86/negative-subscript.ll2
-rw-r--r--test/CodeGen/X86/no-cmov.ll2
-rw-r--r--test/CodeGen/X86/non-unique-sections.ll15
-rw-r--r--test/CodeGen/X86/nontemporal-2.ll286
-rw-r--r--test/CodeGen/X86/norex-subreg.ll6
-rw-r--r--test/CodeGen/X86/nosse-error1.ll8
-rw-r--r--test/CodeGen/X86/nosse-error2.ll8
-rw-r--r--test/CodeGen/X86/nosse-varargs.ll25
-rw-r--r--test/CodeGen/X86/null-streamer.ll15
-rw-r--r--test/CodeGen/X86/object-size.ll20
-rw-r--r--test/CodeGen/X86/odr_comdat.ll16
-rw-r--r--test/CodeGen/X86/opaque-constant-asm.ll2
-rw-r--r--test/CodeGen/X86/opt-ext-uses.ll2
-rw-r--r--test/CodeGen/X86/optimize-max-0.ll60
-rw-r--r--test/CodeGen/X86/optimize-max-1.ll8
-rw-r--r--test/CodeGen/X86/optimize-max-2.ll4
-rw-r--r--test/CodeGen/X86/optimize-max-3.ll4
-rw-r--r--test/CodeGen/X86/or-address.ll16
-rw-r--r--test/CodeGen/X86/or-branch.ll4
-rw-r--r--test/CodeGen/X86/packed_struct.ll10
-rw-r--r--test/CodeGen/X86/palignr-2.ll28
-rw-r--r--test/CodeGen/X86/palignr.ll4
-rw-r--r--test/CodeGen/X86/patchpoint-invoke.ll4
-rw-r--r--test/CodeGen/X86/patchpoint-webkit_jscc.ll10
-rw-r--r--test/CodeGen/X86/patchpoint.ll42
-rw-r--r--test/CodeGen/X86/peep-test-0.ll4
-rw-r--r--test/CodeGen/X86/peep-test-1.ll4
-rw-r--r--test/CodeGen/X86/peephole-fold-movsd.ll8
-rw-r--r--test/CodeGen/X86/peephole-multiple-folds.ll4
-rw-r--r--test/CodeGen/X86/phi-bit-propagation.ll6
-rw-r--r--test/CodeGen/X86/phielim-split.ll43
-rw-r--r--test/CodeGen/X86/phys-reg-local-regalloc.ll8
-rw-r--r--test/CodeGen/X86/phys_subreg_coalesce-2.ll1
-rw-r--r--test/CodeGen/X86/phys_subreg_coalesce-3.ll6
-rw-r--r--test/CodeGen/X86/pic.ll40
-rw-r--r--test/CodeGen/X86/pic_jumptable.ll8
-rw-r--r--test/CodeGen/X86/pmovext.ll2
-rw-r--r--test/CodeGen/X86/pmovsx-inreg.ll24
-rw-r--r--test/CodeGen/X86/pmul.ll220
-rw-r--r--test/CodeGen/X86/pmulld.ll2
-rw-r--r--test/CodeGen/X86/pointer-vector.ll27
-rw-r--r--test/CodeGen/X86/postra-licm.ll26
-rw-r--r--test/CodeGen/X86/pr10475.ll2
-rw-r--r--test/CodeGen/X86/pr10525.ll2
-rw-r--r--test/CodeGen/X86/pr11334.ll2
-rw-r--r--test/CodeGen/X86/pr12360.ll4
-rw-r--r--test/CodeGen/X86/pr12889.ll2
-rw-r--r--test/CodeGen/X86/pr13209.ll30
-rw-r--r--test/CodeGen/X86/pr13458.ll2
-rw-r--r--test/CodeGen/X86/pr13859.ll2
-rw-r--r--test/CodeGen/X86/pr13899.ll38
-rw-r--r--test/CodeGen/X86/pr14161.ll9
-rw-r--r--test/CodeGen/X86/pr14333.ll4
-rw-r--r--test/CodeGen/X86/pr14562.ll2
-rw-r--r--test/CodeGen/X86/pr1489.ll2
-rw-r--r--test/CodeGen/X86/pr1505b.ll8
-rw-r--r--test/CodeGen/X86/pr15267.ll22
-rw-r--r--test/CodeGen/X86/pr15309.ll6
-rw-r--r--test/CodeGen/X86/pr18023.ll22
-rw-r--r--test/CodeGen/X86/pr18162.ll10
-rw-r--r--test/CodeGen/X86/pr18846.ll32
-rw-r--r--test/CodeGen/X86/pr20020.ll22
-rw-r--r--test/CodeGen/X86/pr21099.ll3
-rw-r--r--test/CodeGen/X86/pr2177.ll6
-rw-r--r--test/CodeGen/X86/pr21792.ll41
-rw-r--r--test/CodeGen/X86/pr2182.ll8
-rw-r--r--test/CodeGen/X86/pr22774.ll11
-rw-r--r--test/CodeGen/X86/pr23103.ll21
-rw-r--r--test/CodeGen/X86/pr23246.ll19
-rw-r--r--test/CodeGen/X86/pr2326.ll10
-rw-r--r--test/CodeGen/X86/pr23273.ll17
-rw-r--r--test/CodeGen/X86/pr2656.ll10
-rw-r--r--test/CodeGen/X86/pr2849.ll14
-rw-r--r--test/CodeGen/X86/pr2924.ll8
-rw-r--r--test/CodeGen/X86/pr2982.ll8
-rw-r--r--test/CodeGen/X86/pr3154.ll26
-rw-r--r--test/CodeGen/X86/pr3216.ll2
-rw-r--r--test/CodeGen/X86/pr3241.ll2
-rw-r--r--test/CodeGen/X86/pr3244.ll6
-rw-r--r--test/CodeGen/X86/pr3250.ll2
-rw-r--r--test/CodeGen/X86/pr3317.ll16
-rw-r--r--test/CodeGen/X86/pr3366.ll2
-rw-r--r--test/CodeGen/X86/pr3457.ll4
-rw-r--r--test/CodeGen/X86/pr3522.ll2
-rw-r--r--test/CodeGen/X86/pr5145.ll12
-rw-r--r--test/CodeGen/X86/pr9127.ll2
-rw-r--r--test/CodeGen/X86/pre-ra-sched.ll28
-rw-r--r--test/CodeGen/X86/private-2.ll4
-rw-r--r--test/CodeGen/X86/private.ll2
-rw-r--r--test/CodeGen/X86/promote-assert-zext.ll2
-rw-r--r--test/CodeGen/X86/promote-trunc.ll4
-rw-r--r--test/CodeGen/X86/promote.ll4
-rw-r--r--test/CodeGen/X86/pshufb-mask-comments.ll4
-rw-r--r--test/CodeGen/X86/psubus.ll340
-rw-r--r--test/CodeGen/X86/ptrtoint-constexpr.ll2
-rw-r--r--test/CodeGen/X86/ragreedy-bug.ll106
-rw-r--r--test/CodeGen/X86/ragreedy-hoist-spill.ll26
-rw-r--r--test/CodeGen/X86/ragreedy-last-chance-recoloring.ll52
-rw-r--r--test/CodeGen/X86/rd-mod-wr-eflags.ll54
-rw-r--r--test/CodeGen/X86/rdrand.ll2
-rw-r--r--test/CodeGen/X86/recip-fastmath.ll48
-rw-r--r--test/CodeGen/X86/regalloc-reconcile-broken-hints.ll32
-rw-r--r--test/CodeGen/X86/regpressure.ll114
-rw-r--r--test/CodeGen/X86/remat-constant.ll2
-rw-r--r--test/CodeGen/X86/remat-fold-load.ll34
-rw-r--r--test/CodeGen/X86/remat-invalid-liveness.ll12
-rw-r--r--test/CodeGen/X86/remat-scalar-zero.ll98
-rw-r--r--test/CodeGen/X86/reverse_branches.ll20
-rw-r--r--test/CodeGen/X86/rip-rel-address.ll2
-rw-r--r--test/CodeGen/X86/rip-rel-lea.ll2
-rw-r--r--test/CodeGen/X86/rot32.ll4
-rw-r--r--test/CodeGen/X86/rot64.ll4
-rw-r--r--test/CodeGen/X86/rotate4.ll8
-rw-r--r--test/CodeGen/X86/sandybridge-loads.ll10
-rw-r--r--test/CodeGen/X86/scalar-extract.ll2
-rw-r--r--test/CodeGen/X86/scalar_sse_minmax.ll61
-rw-r--r--test/CodeGen/X86/scalar_widen_div.ll30
-rw-r--r--test/CodeGen/X86/scalarize-bitcast.ll4
-rw-r--r--test/CodeGen/X86/scev-interchange.ll2
-rw-r--r--test/CodeGen/X86/segmented-stacks.ll22
-rw-r--r--test/CodeGen/X86/seh-catch-all.ll44
-rw-r--r--test/CodeGen/X86/seh-except-finally.ll167
-rw-r--r--test/CodeGen/X86/seh-filter.ll21
-rw-r--r--test/CodeGen/X86/seh-finally.ll51
-rw-r--r--test/CodeGen/X86/seh-safe-div.ll180
-rw-r--r--test/CodeGen/X86/select-with-and-or.ll2
-rw-r--r--test/CodeGen/X86/select.ll12
-rw-r--r--test/CodeGen/X86/selectiondag-crash.ll15
-rw-r--r--test/CodeGen/X86/selectiondag-cse.ll8
-rw-r--r--test/CodeGen/X86/setcc-narrowing.ll2
-rw-r--r--test/CodeGen/X86/setcc.ll2
-rw-r--r--test/CodeGen/X86/sext-load.ll4
-rw-r--r--test/CodeGen/X86/sha.ll14
-rw-r--r--test/CodeGen/X86/shift-and.ll4
-rwxr-xr-xtest/CodeGen/X86/shift-avx2-crash.ll38
-rw-r--r--test/CodeGen/X86/shift-bmi2.ll16
-rw-r--r--test/CodeGen/X86/shift-coalesce.ll2
-rw-r--r--test/CodeGen/X86/shift-codegen.ll4
-rw-r--r--test/CodeGen/X86/shift-combine.ll4
-rw-r--r--test/CodeGen/X86/shift-folding.ll12
-rw-r--r--test/CodeGen/X86/shift-i256.ll18
-rw-r--r--test/CodeGen/X86/shift-one.ll2
-rw-r--r--test/CodeGen/X86/shift-pair.ll2
-rw-r--r--test/CodeGen/X86/shift-parts.ll2
-rw-r--r--test/CodeGen/X86/shl-i64.ll8
-rw-r--r--test/CodeGen/X86/shl_undef.ll4
-rw-r--r--test/CodeGen/X86/shrink-compare.ll4
-rw-r--r--test/CodeGen/X86/shuffle-combine-crash.ll2
-rw-r--r--test/CodeGen/X86/sibcall-4.ll4
-rw-r--r--test/CodeGen/X86/sibcall-5.ll2
-rw-r--r--test/CodeGen/X86/sibcall-win64.ll42
-rw-r--r--test/CodeGen/X86/sibcall.ll99
-rw-r--r--test/CodeGen/X86/simple-zext.ll2
-rw-r--r--test/CodeGen/X86/sincos-opt.ll5
-rw-r--r--test/CodeGen/X86/sink-cheap-instructions.ll62
-rw-r--r--test/CodeGen/X86/sink-hoist.ll20
-rw-r--r--test/CodeGen/X86/sink-out-of-loop.ll2
-rw-r--r--test/CodeGen/X86/sjlj.ll4
-rw-r--r--test/CodeGen/X86/slow-incdec.ll8
-rw-r--r--test/CodeGen/X86/smul-with-overflow.ll8
-rw-r--r--test/CodeGen/X86/soft-fp.ll10
-rw-r--r--test/CodeGen/X86/splat-const.ll40
-rw-r--r--test/CodeGen/X86/splat-for-size.ll14
-rw-r--r--test/CodeGen/X86/split-eh-lpad-edges.ll2
-rw-r--r--test/CodeGen/X86/split-vector-bitcast.ll2
-rw-r--r--test/CodeGen/X86/sqrt-fastmath.ll197
-rw-r--r--test/CodeGen/X86/sqrt.ll4
-rw-r--r--test/CodeGen/X86/sret-implicit.ll34
-rw-r--r--test/CodeGen/X86/sse-align-0.ll4
-rw-r--r--test/CodeGen/X86/sse-align-1.ll4
-rw-r--r--test/CodeGen/X86/sse-align-10.ll2
-rw-r--r--test/CodeGen/X86/sse-align-12.ll8
-rw-r--r--test/CodeGen/X86/sse-align-2.ll4
-rw-r--r--test/CodeGen/X86/sse-align-5.ll2
-rw-r--r--test/CodeGen/X86/sse-align-6.ll2
-rw-r--r--test/CodeGen/X86/sse-align-9.ll4
-rw-r--r--test/CodeGen/X86/sse-domains.ll4
-rw-r--r--test/CodeGen/X86/sse-fcopysign.ll132
-rw-r--r--test/CodeGen/X86/sse-intel-ocl.ll4
-rw-r--r--test/CodeGen/X86/sse-intrinsics-x86.ll1
-rw-r--r--test/CodeGen/X86/sse-load-ret.ll2
-rw-r--r--test/CodeGen/X86/sse-minmax.ll24
-rw-r--r--test/CodeGen/X86/sse-scalar-fp-arith-unary.ll73
-rw-r--r--test/CodeGen/X86/sse-scalar-fp-arith.ll217
-rw-r--r--test/CodeGen/X86/sse-unaligned-mem-feature.ll4
-rw-r--r--test/CodeGen/X86/sse-varargs.ll2
-rw-r--r--test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll31
-rw-r--r--test/CodeGen/X86/sse2-intrinsics-x86.ll39
-rw-r--r--test/CodeGen/X86/sse2.ll56
-rw-r--r--test/CodeGen/X86/sse3-avx-addsub-2.ll15
-rw-r--r--test/CodeGen/X86/sse3-avx-addsub.ll8
-rw-r--r--test/CodeGen/X86/sse3.ll69
-rw-r--r--test/CodeGen/X86/sse41-intrinsics-x86.ll1
-rw-r--r--test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll24
-rw-r--r--test/CodeGen/X86/sse41.ll310
-rw-r--r--test/CodeGen/X86/sse42-intrinsics-x86.ll12
-rw-r--r--test/CodeGen/X86/sse4a.ll1
-rw-r--r--test/CodeGen/X86/sse_partial_update.ll29
-rw-r--r--test/CodeGen/X86/ssp-data-layout.ll156
-rw-r--r--test/CodeGen/X86/stack-align.ll28
-rw-r--r--test/CodeGen/X86/stack-folding-3dnow.ll217
-rw-r--r--test/CodeGen/X86/stack-folding-fp-avx1.ll1827
-rw-r--r--test/CodeGen/X86/stack-folding-fp-sse42.ll1097
-rw-r--r--test/CodeGen/X86/stack-folding-int-avx1.ll1156
-rw-r--r--test/CodeGen/X86/stack-folding-int-avx2.ll1211
-rw-r--r--test/CodeGen/X86/stack-folding-int-sse42.ll1174
-rw-r--r--test/CodeGen/X86/stack-folding-mmx.ll566
-rw-r--r--test/CodeGen/X86/stack-folding-xop.ll718
-rw-r--r--test/CodeGen/X86/stack-protector-dbginfo.ll107
-rw-r--r--test/CodeGen/X86/stack-protector-vreg-to-vreg-copy.ll4
-rw-r--r--test/CodeGen/X86/stack-protector-weight.ll6
-rw-r--r--test/CodeGen/X86/stack-protector.ll506
-rw-r--r--test/CodeGen/X86/stack-update-frame-opcode.ll2
-rw-r--r--test/CodeGen/X86/stack_guard_remat.ll2
-rw-r--r--test/CodeGen/X86/stackmap-fast-isel.ll16
-rw-r--r--test/CodeGen/X86/stackmap-large-constants.ll4
-rw-r--r--test/CodeGen/X86/stackmap-liveness.ll10
-rw-r--r--test/CodeGen/X86/stackmap-nops.ll64
-rw-r--r--test/CodeGen/X86/stackmap-shadow-optimization.ll2
-rw-r--r--test/CodeGen/X86/stackmap.ll44
-rw-r--r--test/CodeGen/X86/statepoint-allocas.ll130
-rw-r--r--test/CodeGen/X86/statepoint-call-lowering.ll79
-rw-r--r--test/CodeGen/X86/statepoint-forward.ll27
-rw-r--r--test/CodeGen/X86/statepoint-gctransition-call-lowering.ll133
-rw-r--r--test/CodeGen/X86/statepoint-invoke.ll198
-rw-r--r--test/CodeGen/X86/statepoint-stack-usage.ll50
-rw-r--r--test/CodeGen/X86/statepoint-stackmap-format.ll211
-rw-r--r--test/CodeGen/X86/stdarg.ll2
-rw-r--r--test/CodeGen/X86/store-narrow.ll22
-rw-r--r--test/CodeGen/X86/store_op_load_fold.ll6
-rw-r--r--test/CodeGen/X86/store_op_load_fold2.ll6
-rw-r--r--test/CodeGen/X86/stores-merging.ll6
-rw-r--r--test/CodeGen/X86/stride-nine-with-base-reg.ll10
-rw-r--r--test/CodeGen/X86/stride-reuse.ll8
-rw-r--r--test/CodeGen/X86/sub-with-overflow.ll8
-rw-r--r--test/CodeGen/X86/subreg-to-reg-0.ll2
-rw-r--r--test/CodeGen/X86/subreg-to-reg-2.ll8
-rw-r--r--test/CodeGen/X86/subreg-to-reg-4.ll16
-rw-r--r--test/CodeGen/X86/subreg-to-reg-6.ll2
-rw-r--r--test/CodeGen/X86/sunkaddr-ext.ll4
-rw-r--r--test/CodeGen/X86/switch-bt.ll60
-rw-r--r--test/CodeGen/X86/switch-crit-edge-constant.ll28
-rw-r--r--test/CodeGen/X86/switch-or.ll2
-rw-r--r--test/CodeGen/X86/switch-zextload.ll2
-rw-r--r--test/CodeGen/X86/switch.ll536
-rw-r--r--test/CodeGen/X86/tail-call-win64.ll36
-rw-r--r--test/CodeGen/X86/tail-dup-addr.ll2
-rw-r--r--test/CodeGen/X86/tail-opts.ll44
-rw-r--r--test/CodeGen/X86/tailcall-64.ll16
-rw-r--r--test/CodeGen/X86/tailcall-fastisel.ll4
-rw-r--r--test/CodeGen/X86/tailcall-mem-intrinsics.ll31
-rw-r--r--test/CodeGen/X86/tailcall-returndup-void.ll8
-rw-r--r--test/CodeGen/X86/tailcall-ri64.ll6
-rw-r--r--test/CodeGen/X86/tailcallbyval.ll4
-rw-r--r--test/CodeGen/X86/tailcallbyval64.ll4
-rw-r--r--test/CodeGen/X86/tailcallstack64.ll4
-rw-r--r--test/CodeGen/X86/tbm-intrinsics-x86_64.ll4
-rw-r--r--test/CodeGen/X86/tbm_patterns.ll4
-rw-r--r--test/CodeGen/X86/test-shrink-bug.ll2
-rw-r--r--test/CodeGen/X86/testl-commute.ll12
-rw-r--r--test/CodeGen/X86/this-return-64.ll12
-rw-r--r--test/CodeGen/X86/tls-addr-non-leaf-function.ll2
-rw-r--r--test/CodeGen/X86/tls-local-dynamic.ll4
-rw-r--r--test/CodeGen/X86/tls-pic.ll8
-rw-r--r--test/CodeGen/X86/tls-pie.ll4
-rw-r--r--test/CodeGen/X86/tls.ll47
-rw-r--r--test/CodeGen/X86/tlv-1.ll6
-rw-r--r--test/CodeGen/X86/trap.ll20
-rw-r--r--test/CodeGen/X86/trunc-ext-ld-st.ll14
-rw-r--r--test/CodeGen/X86/trunc-to-bool.ll2
-rw-r--r--test/CodeGen/X86/twoaddr-coalesce-3.ll84
-rw-r--r--test/CodeGen/X86/twoaddr-coalesce.ll2
-rw-r--r--test/CodeGen/X86/twoaddr-pass-sink.ll14
-rw-r--r--test/CodeGen/X86/uint64-to-float.ll4
-rw-r--r--test/CodeGen/X86/uint_to_fp-2.ll2
-rw-r--r--test/CodeGen/X86/umul-with-carry.ll4
-rw-r--r--test/CodeGen/X86/unaligned-32-byte-memops.ll123
-rw-r--r--test/CodeGen/X86/unaligned-load.ll4
-rw-r--r--test/CodeGen/X86/unaligned-spill-folding.ll4
-rw-r--r--test/CodeGen/X86/unknown-location.ll24
-rw-r--r--test/CodeGen/X86/unwindraise.ll70
-rw-r--r--test/CodeGen/X86/use-add-flags.ll2
-rw-r--r--test/CodeGen/X86/utf16-cfstrings.ll4
-rw-r--r--test/CodeGen/X86/v2f32.ll6
-rw-r--r--test/CodeGen/X86/v4i32load-crash.ll8
-rw-r--r--test/CodeGen/X86/v8i1-masks.ll10
-rw-r--r--test/CodeGen/X86/vaargs.ll10
-rw-r--r--test/CodeGen/X86/vararg-callee-cleanup.ll12
-rw-r--r--test/CodeGen/X86/vararg_tailcall.ll40
-rw-r--r--test/CodeGen/X86/variadic-node-pic.ll2
-rw-r--r--test/CodeGen/X86/vec-loadsingles-alignment.ll16
-rw-r--r--test/CodeGen/X86/vec-trunc-store.ll4
-rw-r--r--test/CodeGen/X86/vec_align.ll16
-rw-r--r--test/CodeGen/X86/vec_anyext.ll24
-rw-r--r--test/CodeGen/X86/vec_cast2.ll111
-rw-r--r--test/CodeGen/X86/vec_clear.ll13
-rw-r--r--test/CodeGen/X86/vec_compare.ll52
-rw-r--r--test/CodeGen/X86/vec_extract-mmx.ll71
-rw-r--r--test/CodeGen/X86/vec_extract-sse4.ll8
-rw-r--r--test/CodeGen/X86/vec_extract.ll6
-rw-r--r--test/CodeGen/X86/vec_fabs.ll2
-rw-r--r--test/CodeGen/X86/vec_floor.ll46
-rw-r--r--test/CodeGen/X86/vec_fneg.ll2
-rw-r--r--test/CodeGen/X86/vec_fp_to_int.ll955
-rw-r--r--test/CodeGen/X86/vec_fpext.ll18
-rw-r--r--test/CodeGen/X86/vec_i64.ll4
-rw-r--r--test/CodeGen/X86/vec_ins_extract.ll16
-rw-r--r--test/CodeGen/X86/vec_insert-3.ll10
-rw-r--r--test/CodeGen/X86/vec_insert-5.ll46
-rw-r--r--test/CodeGen/X86/vec_insert-mmx.ll58
-rw-r--r--test/CodeGen/X86/vec_int_to_fp.ll714
-rw-r--r--test/CodeGen/X86/vec_loadsingles.ll92
-rw-r--r--test/CodeGen/X86/vec_logical.ll2
-rw-r--r--test/CodeGen/X86/vec_partial.ll32
-rw-r--r--test/CodeGen/X86/vec_reassociate.ll119
-rw-r--r--test/CodeGen/X86/vec_set-7.ll2
-rw-r--r--test/CodeGen/X86/vec_set-F.ll2
-rw-r--r--test/CodeGen/X86/vec_setcc-2.ll12
-rw-r--r--test/CodeGen/X86/vec_shift5.ll2
-rw-r--r--test/CodeGen/X86/vec_shift6.ll6
-rw-r--r--test/CodeGen/X86/vec_shift7.ll12
-rw-r--r--test/CodeGen/X86/vec_split.ll6
-rw-r--r--test/CodeGen/X86/vec_ss_load_fold.ll4
-rw-r--r--test/CodeGen/X86/vec_trunc_sext.ll2
-rw-r--r--test/CodeGen/X86/vec_zero.ll4
-rw-r--r--test/CodeGen/X86/vec_zero_cse.ll19
-rw-r--r--test/CodeGen/X86/vector-blend.ll258
-rw-r--r--test/CodeGen/X86/vector-ctpop.ll6
-rw-r--r--test/CodeGen/X86/vector-gep.ll20
-rw-r--r--test/CodeGen/X86/vector-idiv.ll714
-rw-r--r--test/CodeGen/X86/vector-intrinsics.ll8
-rw-r--r--test/CodeGen/X86/vector-sext.ll617
-rw-r--r--test/CodeGen/X86/vector-shuffle-128-v16.ll622
-rw-r--r--test/CodeGen/X86/vector-shuffle-128-v2.ll210
-rw-r--r--test/CodeGen/X86/vector-shuffle-128-v4.ll791
-rw-r--r--test/CodeGen/X86/vector-shuffle-128-v8.ll660
-rw-r--r--test/CodeGen/X86/vector-shuffle-256-v16.ll2091
-rw-r--r--test/CodeGen/X86/vector-shuffle-256-v32.ll622
-rw-r--r--test/CodeGen/X86/vector-shuffle-256-v4.ll166
-rw-r--r--test/CodeGen/X86/vector-shuffle-256-v8.ll368
-rw-r--r--test/CodeGen/X86/vector-shuffle-512-v16.ll4
-rw-r--r--test/CodeGen/X86/vector-shuffle-512-v8.ll281
-rw-r--r--test/CodeGen/X86/vector-shuffle-combining.ll1068
-rw-r--r--test/CodeGen/X86/vector-shuffle-mmx.ll105
-rw-r--r--test/CodeGen/X86/vector-shuffle-sse1.ll36
-rw-r--r--test/CodeGen/X86/vector-trunc.ll248
-rw-r--r--test/CodeGen/X86/vector-variable-idx2.ll8
-rw-r--r--test/CodeGen/X86/vector-zext.ll441
-rw-r--r--test/CodeGen/X86/vector-zmov.ll4
-rw-r--r--test/CodeGen/X86/vector.ll42
-rw-r--r--test/CodeGen/X86/viabs.ll10
-rw-r--r--test/CodeGen/X86/visibility2.ll2
-rw-r--r--test/CodeGen/X86/volatile.ll6
-rw-r--r--test/CodeGen/X86/vortex-bug.ll2
-rw-r--r--test/CodeGen/X86/vselect-2.ll44
-rw-r--r--test/CodeGen/X86/vselect-avx.ll54
-rw-r--r--test/CodeGen/X86/vselect-minmax.ll1536
-rw-r--r--test/CodeGen/X86/vselect.ll49
-rw-r--r--test/CodeGen/X86/vshift-4.ll9
-rw-r--r--test/CodeGen/X86/vshift-5.ll4
-rw-r--r--test/CodeGen/X86/vshift-6.ll4
-rw-r--r--test/CodeGen/X86/warn-stack.ll4
-rw-r--r--test/CodeGen/X86/weak_def_can_be_hidden.ll4
-rw-r--r--test/CodeGen/X86/widen_arith-1.ll20
-rw-r--r--test/CodeGen/X86/widen_arith-2.ll32
-rw-r--r--test/CodeGen/X86/widen_arith-3.ll20
-rw-r--r--test/CodeGen/X86/widen_arith-4.ll20
-rw-r--r--test/CodeGen/X86/widen_arith-5.ll20
-rw-r--r--test/CodeGen/X86/widen_arith-6.ll22
-rw-r--r--test/CodeGen/X86/widen_cast-1.ll24
-rw-r--r--test/CodeGen/X86/widen_cast-2.ll18
-rw-r--r--test/CodeGen/X86/widen_cast-4.ll34
-rw-r--r--test/CodeGen/X86/widen_cast-5.ll2
-rw-r--r--test/CodeGen/X86/widen_conversions.ll4
-rw-r--r--test/CodeGen/X86/widen_load-0.ll6
-rw-r--r--test/CodeGen/X86/widen_load-1.ll6
-rw-r--r--test/CodeGen/X86/widen_load-2.ll64
-rw-r--r--test/CodeGen/X86/widen_shuffle-1.ll6
-rw-r--r--test/CodeGen/X86/win32-eh.ll91
-rw-r--r--test/CodeGen/X86/win32_sret.ll36
-rw-r--r--test/CodeGen/X86/win64_alloca_dynalloca.ll20
-rw-r--r--test/CodeGen/X86/win64_eh.ll57
-rw-r--r--test/CodeGen/X86/win64_frame.ll122
-rw-r--r--test/CodeGen/X86/win64_nonvol.ll28
-rw-r--r--test/CodeGen/X86/win_cst_pool.ll2
-rw-r--r--test/CodeGen/X86/win_eh_prepare.ll82
-rw-r--r--test/CodeGen/X86/x32-function_pointer-1.ll4
-rw-r--r--test/CodeGen/X86/x32-lea-1.ll10
-rw-r--r--test/CodeGen/X86/x86-32-vector-calling-conv.ll44
-rw-r--r--test/CodeGen/X86/x86-64-and-mask.ll2
-rw-r--r--test/CodeGen/X86/x86-64-asm.ll2
-rw-r--r--test/CodeGen/X86/x86-64-baseptr.ll26
-rw-r--r--test/CodeGen/X86/x86-64-disp.ll2
-rw-r--r--test/CodeGen/X86/x86-64-gv-offset.ll4
-rw-r--r--test/CodeGen/X86/x86-64-jumps.ll8
-rw-r--r--test/CodeGen/X86/x86-64-mem.ll8
-rw-r--r--test/CodeGen/X86/x86-64-pic-4.ll2
-rw-r--r--test/CodeGen/X86/x86-64-pic-5.ll2
-rw-r--r--test/CodeGen/X86/x86-64-pic-6.ll2
-rw-r--r--test/CodeGen/X86/x86-64-psub.ll2
-rw-r--r--test/CodeGen/X86/x86-64-ptr-arg-simple.ll2
-rw-r--r--test/CodeGen/X86/x86-64-sret-return.ll58
-rw-r--r--test/CodeGen/X86/x86-64-static-relo-movl.ll4
-rw-r--r--test/CodeGen/X86/x86-64-tls-1.ll2
-rw-r--r--test/CodeGen/X86/x86-64-varargs.ll2
-rw-r--r--test/CodeGen/X86/x86-fold-pshufb.ll17
-rw-r--r--test/CodeGen/X86/x86-framelowering-trap.ll15
-rw-r--r--test/CodeGen/X86/x86-mixed-alignment-dagcombine.ll8
-rw-r--r--test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll15
-rw-r--r--test/CodeGen/X86/x86-shifts.ll16
-rw-r--r--test/CodeGen/X86/x86-shrink-wrapping.ll600
-rw-r--r--test/CodeGen/X86/x86-upgrade-avx2-vbroadcast.ll18
-rw-r--r--test/CodeGen/X86/xaluo.ll2
-rw-r--r--test/CodeGen/X86/xmulo.ll6
-rw-r--r--test/CodeGen/X86/xop-intrinsics-x86_64.ll170
-rw-r--r--test/CodeGen/X86/xor-icmp.ll6
-rw-r--r--test/CodeGen/X86/xor.ll18
-rw-r--r--test/CodeGen/X86/zext-extract_subreg.ll2
-rw-r--r--test/CodeGen/X86/zext-sext.ll21
-rw-r--r--test/CodeGen/X86/zlib-longest-match.ll159
-rw-r--r--test/CodeGen/XCore/2009-01-08-Crash.ll4
-rw-r--r--test/CodeGen/XCore/2010-02-25-LSR-Crash.ll4
-rw-r--r--test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll2
-rw-r--r--test/CodeGen/XCore/atomic.ll18
-rw-r--r--test/CodeGen/XCore/codemodel.ll22
-rw-r--r--test/CodeGen/XCore/dwarf_debug.ll23
-rw-r--r--test/CodeGen/XCore/epilogue_prologue.ll8
-rw-r--r--test/CodeGen/XCore/exception.ll14
-rw-r--r--test/CodeGen/XCore/indirectbr.ll6
-rw-r--r--test/CodeGen/XCore/llvm-intrinsics.ll8
-rw-r--r--test/CodeGen/XCore/load.ll18
-rw-r--r--test/CodeGen/XCore/offset_folding.ll8
-rw-r--r--test/CodeGen/XCore/private.ll2
-rw-r--r--test/CodeGen/XCore/scavenging.ll40
-rw-r--r--test/CodeGen/XCore/store.ll8
-rw-r--r--test/CodeGen/XCore/threads.ll16
-rw-r--r--test/CodeGen/XCore/trampoline.ll10
-rw-r--r--test/CodeGen/XCore/unaligned_load.ll6
-rw-r--r--test/CodeGen/XCore/unaligned_store_combine.ll2
-rw-r--r--test/CodeGen/XCore/zextfree.ll2
4000 files changed, 170307 insertions, 73763 deletions
diff --git a/test/CodeGen/AArch64/128bit_load_store.ll b/test/CodeGen/AArch64/128bit_load_store.ll
index a6f077698e40..94fd386e0eaf 100644
--- a/test/CodeGen/AArch64/128bit_load_store.ll
+++ b/test/CodeGen/AArch64/128bit_load_store.ll
@@ -12,7 +12,7 @@ define fp128 @test_load_f128(fp128* readonly %ptr) #2 {
; CHECK-LABEL: test_load_f128
; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}]
entry:
- %0 = load fp128* %ptr, align 16
+ %0 = load fp128, fp128* %ptr, align 16
ret fp128 %0
}
@@ -33,7 +33,7 @@ define i128 @test_vldrq_p128(i128* readonly %ptr) #2 {
entry:
%0 = bitcast i128* %ptr to fp128*
- %1 = load fp128* %0, align 16
+ %1 = load fp128, fp128* %0, align 16
%2 = bitcast fp128 %1 to i128
ret i128 %2
}
@@ -44,8 +44,8 @@ define void @test_ld_st_p128(i128* nocapture %ptr) #0 {
; CHECK-NEXT: str {{q[0-9]+}}, [{{x[0-9]+}}, #16]
entry:
%0 = bitcast i128* %ptr to fp128*
- %1 = load fp128* %0, align 16
- %add.ptr = getelementptr inbounds i128* %ptr, i64 1
+ %1 = load fp128, fp128* %0, align 16
+ %add.ptr = getelementptr inbounds i128, i128* %ptr, i64 1
%2 = bitcast i128* %add.ptr to fp128*
store fp128 %1, fp128* %2, align 16
ret void
diff --git a/test/CodeGen/AArch64/PBQP-chain.ll b/test/CodeGen/AArch64/PBQP-chain.ll
index c4ba026ea428..3e5fa741c243 100644
--- a/test/CodeGen/AArch64/PBQP-chain.ll
+++ b/test/CodeGen/AArch64/PBQP-chain.ll
@@ -22,79 +22,79 @@ target triple = "aarch64"
; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
define void @fir(double* nocapture %rx, double* nocapture %ry, double* nocapture %c, double* nocapture %x, double* nocapture %y) {
entry:
- %0 = load double* %c, align 8
- %1 = load double* %x, align 8
+ %0 = load double, double* %c, align 8
+ %1 = load double, double* %x, align 8
%mul = fmul fast double %1, %0
- %2 = load double* %y, align 8
+ %2 = load double, double* %y, align 8
%mul7 = fmul fast double %2, %0
- %arrayidx.1 = getelementptr inbounds double* %c, i64 1
- %3 = load double* %arrayidx.1, align 8
- %arrayidx2.1 = getelementptr inbounds double* %x, i64 1
- %4 = load double* %arrayidx2.1, align 8
+ %arrayidx.1 = getelementptr inbounds double, double* %c, i64 1
+ %3 = load double, double* %arrayidx.1, align 8
+ %arrayidx2.1 = getelementptr inbounds double, double* %x, i64 1
+ %4 = load double, double* %arrayidx2.1, align 8
%mul.1 = fmul fast double %4, %3
%add.1 = fadd fast double %mul.1, %mul
- %arrayidx6.1 = getelementptr inbounds double* %y, i64 1
- %5 = load double* %arrayidx6.1, align 8
+ %arrayidx6.1 = getelementptr inbounds double, double* %y, i64 1
+ %5 = load double, double* %arrayidx6.1, align 8
%mul7.1 = fmul fast double %5, %3
%add8.1 = fadd fast double %mul7.1, %mul7
- %arrayidx.2 = getelementptr inbounds double* %c, i64 2
- %6 = load double* %arrayidx.2, align 8
- %arrayidx2.2 = getelementptr inbounds double* %x, i64 2
- %7 = load double* %arrayidx2.2, align 8
+ %arrayidx.2 = getelementptr inbounds double, double* %c, i64 2
+ %6 = load double, double* %arrayidx.2, align 8
+ %arrayidx2.2 = getelementptr inbounds double, double* %x, i64 2
+ %7 = load double, double* %arrayidx2.2, align 8
%mul.2 = fmul fast double %7, %6
%add.2 = fadd fast double %mul.2, %add.1
- %arrayidx6.2 = getelementptr inbounds double* %y, i64 2
- %8 = load double* %arrayidx6.2, align 8
+ %arrayidx6.2 = getelementptr inbounds double, double* %y, i64 2
+ %8 = load double, double* %arrayidx6.2, align 8
%mul7.2 = fmul fast double %8, %6
%add8.2 = fadd fast double %mul7.2, %add8.1
- %arrayidx.3 = getelementptr inbounds double* %c, i64 3
- %9 = load double* %arrayidx.3, align 8
- %arrayidx2.3 = getelementptr inbounds double* %x, i64 3
- %10 = load double* %arrayidx2.3, align 8
+ %arrayidx.3 = getelementptr inbounds double, double* %c, i64 3
+ %9 = load double, double* %arrayidx.3, align 8
+ %arrayidx2.3 = getelementptr inbounds double, double* %x, i64 3
+ %10 = load double, double* %arrayidx2.3, align 8
%mul.3 = fmul fast double %10, %9
%add.3 = fadd fast double %mul.3, %add.2
- %arrayidx6.3 = getelementptr inbounds double* %y, i64 3
- %11 = load double* %arrayidx6.3, align 8
+ %arrayidx6.3 = getelementptr inbounds double, double* %y, i64 3
+ %11 = load double, double* %arrayidx6.3, align 8
%mul7.3 = fmul fast double %11, %9
%add8.3 = fadd fast double %mul7.3, %add8.2
- %arrayidx.4 = getelementptr inbounds double* %c, i64 4
- %12 = load double* %arrayidx.4, align 8
- %arrayidx2.4 = getelementptr inbounds double* %x, i64 4
- %13 = load double* %arrayidx2.4, align 8
+ %arrayidx.4 = getelementptr inbounds double, double* %c, i64 4
+ %12 = load double, double* %arrayidx.4, align 8
+ %arrayidx2.4 = getelementptr inbounds double, double* %x, i64 4
+ %13 = load double, double* %arrayidx2.4, align 8
%mul.4 = fmul fast double %13, %12
%add.4 = fadd fast double %mul.4, %add.3
- %arrayidx6.4 = getelementptr inbounds double* %y, i64 4
- %14 = load double* %arrayidx6.4, align 8
+ %arrayidx6.4 = getelementptr inbounds double, double* %y, i64 4
+ %14 = load double, double* %arrayidx6.4, align 8
%mul7.4 = fmul fast double %14, %12
%add8.4 = fadd fast double %mul7.4, %add8.3
- %arrayidx.5 = getelementptr inbounds double* %c, i64 5
- %15 = load double* %arrayidx.5, align 8
- %arrayidx2.5 = getelementptr inbounds double* %x, i64 5
- %16 = load double* %arrayidx2.5, align 8
+ %arrayidx.5 = getelementptr inbounds double, double* %c, i64 5
+ %15 = load double, double* %arrayidx.5, align 8
+ %arrayidx2.5 = getelementptr inbounds double, double* %x, i64 5
+ %16 = load double, double* %arrayidx2.5, align 8
%mul.5 = fmul fast double %16, %15
%add.5 = fadd fast double %mul.5, %add.4
- %arrayidx6.5 = getelementptr inbounds double* %y, i64 5
- %17 = load double* %arrayidx6.5, align 8
+ %arrayidx6.5 = getelementptr inbounds double, double* %y, i64 5
+ %17 = load double, double* %arrayidx6.5, align 8
%mul7.5 = fmul fast double %17, %15
%add8.5 = fadd fast double %mul7.5, %add8.4
- %arrayidx.6 = getelementptr inbounds double* %c, i64 6
- %18 = load double* %arrayidx.6, align 8
- %arrayidx2.6 = getelementptr inbounds double* %x, i64 6
- %19 = load double* %arrayidx2.6, align 8
+ %arrayidx.6 = getelementptr inbounds double, double* %c, i64 6
+ %18 = load double, double* %arrayidx.6, align 8
+ %arrayidx2.6 = getelementptr inbounds double, double* %x, i64 6
+ %19 = load double, double* %arrayidx2.6, align 8
%mul.6 = fmul fast double %19, %18
%add.6 = fadd fast double %mul.6, %add.5
- %arrayidx6.6 = getelementptr inbounds double* %y, i64 6
- %20 = load double* %arrayidx6.6, align 8
+ %arrayidx6.6 = getelementptr inbounds double, double* %y, i64 6
+ %20 = load double, double* %arrayidx6.6, align 8
%mul7.6 = fmul fast double %20, %18
%add8.6 = fadd fast double %mul7.6, %add8.5
- %arrayidx.7 = getelementptr inbounds double* %c, i64 7
- %21 = load double* %arrayidx.7, align 8
- %arrayidx2.7 = getelementptr inbounds double* %x, i64 7
- %22 = load double* %arrayidx2.7, align 8
+ %arrayidx.7 = getelementptr inbounds double, double* %c, i64 7
+ %21 = load double, double* %arrayidx.7, align 8
+ %arrayidx2.7 = getelementptr inbounds double, double* %x, i64 7
+ %22 = load double, double* %arrayidx2.7, align 8
%mul.7 = fmul fast double %22, %21
%add.7 = fadd fast double %mul.7, %add.6
- %arrayidx6.7 = getelementptr inbounds double* %y, i64 7
- %23 = load double* %arrayidx6.7, align 8
+ %arrayidx6.7 = getelementptr inbounds double, double* %y, i64 7
+ %23 = load double, double* %arrayidx6.7, align 8
%mul7.7 = fmul fast double %23, %21
%add8.7 = fadd fast double %mul7.7, %add8.6
store double %add.7, double* %rx, align 8
diff --git a/test/CodeGen/AArch64/PBQP-coalesce-benefit.ll b/test/CodeGen/AArch64/PBQP-coalesce-benefit.ll
index 45ac5e65c002..bd50b2d84b74 100644
--- a/test/CodeGen/AArch64/PBQP-coalesce-benefit.ll
+++ b/test/CodeGen/AArch64/PBQP-coalesce-benefit.ll
@@ -3,11 +3,11 @@
; CHECK-LABEL: test:
define i32 @test(i32 %acc, i32* nocapture readonly %c) {
entry:
- %0 = load i32* %c, align 4
+ %0 = load i32, i32* %c, align 4
; CHECK-NOT: mov w{{[0-9]*}}, w0
%add = add nsw i32 %0, %acc
- %arrayidx1 = getelementptr inbounds i32* %c, i64 1
- %1 = load i32* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %c, i64 1
+ %1 = load i32, i32* %arrayidx1, align 4
%add2 = add nsw i32 %add, %1
ret i32 %add2
}
diff --git a/test/CodeGen/AArch64/PBQP-csr.ll b/test/CodeGen/AArch64/PBQP-csr.ll
index 64335ae353a1..16d7f8cb7a5a 100644
--- a/test/CodeGen/AArch64/PBQP-csr.ll
+++ b/test/CodeGen/AArch64/PBQP-csr.ll
@@ -11,27 +11,27 @@
define void @test_csr(%pl* nocapture readnone %this, %rs* nocapture %r) align 2 {
;CHECK-NOT: stp {{d[0-9]+}}, {{d[0-9]+}}
entry:
- %x.i = getelementptr inbounds %rs* %r, i64 0, i32 7, i32 0
- %y.i = getelementptr inbounds %rs* %r, i64 0, i32 7, i32 1
- %z.i = getelementptr inbounds %rs* %r, i64 0, i32 7, i32 2
- %x.i61 = getelementptr inbounds %rs* %r, i64 0, i32 8, i32 0
- %y.i62 = getelementptr inbounds %rs* %r, i64 0, i32 8, i32 1
- %z.i63 = getelementptr inbounds %rs* %r, i64 0, i32 8, i32 2
- %x.i58 = getelementptr inbounds %rs* %r, i64 0, i32 9, i32 0
- %y.i59 = getelementptr inbounds %rs* %r, i64 0, i32 9, i32 1
- %z.i60 = getelementptr inbounds %rs* %r, i64 0, i32 9, i32 2
- %na = getelementptr inbounds %rs* %r, i64 0, i32 0
+ %x.i = getelementptr inbounds %rs, %rs* %r, i64 0, i32 7, i32 0
+ %y.i = getelementptr inbounds %rs, %rs* %r, i64 0, i32 7, i32 1
+ %z.i = getelementptr inbounds %rs, %rs* %r, i64 0, i32 7, i32 2
+ %x.i61 = getelementptr inbounds %rs, %rs* %r, i64 0, i32 8, i32 0
+ %y.i62 = getelementptr inbounds %rs, %rs* %r, i64 0, i32 8, i32 1
+ %z.i63 = getelementptr inbounds %rs, %rs* %r, i64 0, i32 8, i32 2
+ %x.i58 = getelementptr inbounds %rs, %rs* %r, i64 0, i32 9, i32 0
+ %y.i59 = getelementptr inbounds %rs, %rs* %r, i64 0, i32 9, i32 1
+ %z.i60 = getelementptr inbounds %rs, %rs* %r, i64 0, i32 9, i32 2
+ %na = getelementptr inbounds %rs, %rs* %r, i64 0, i32 0
%0 = bitcast double* %x.i to i8*
call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 72, i32 8, i1 false)
- %1 = load i32* %na, align 4
+ %1 = load i32, i32* %na, align 4
%cmp70 = icmp sgt i32 %1, 0
br i1 %cmp70, label %for.body.lr.ph, label %for.end
for.body.lr.ph: ; preds = %entry
- %fn = getelementptr inbounds %rs* %r, i64 0, i32 4
- %2 = load %v** %fn, align 8
- %fs = getelementptr inbounds %rs* %r, i64 0, i32 5
- %3 = load %v** %fs, align 8
+ %fn = getelementptr inbounds %rs, %rs* %r, i64 0, i32 4
+ %2 = load %v*, %v** %fn, align 8
+ %fs = getelementptr inbounds %rs, %rs* %r, i64 0, i32 5
+ %3 = load %v*, %v** %fs, align 8
%4 = sext i32 %1 to i64
br label %for.body
@@ -42,31 +42,31 @@ for.body: ; preds = %for.body.lr.ph, %fo
%7 = phi <2 x double> [ zeroinitializer, %for.body.lr.ph ], [ %22, %for.body ]
%8 = phi <2 x double> [ zeroinitializer, %for.body.lr.ph ], [ %26, %for.body ]
%9 = phi <2 x double> [ zeroinitializer, %for.body.lr.ph ], [ %28, %for.body ]
- %x.i54 = getelementptr inbounds %v* %2, i64 %indvars.iv, i32 0
- %x1.i = getelementptr inbounds %v* %3, i64 %indvars.iv, i32 0
- %y.i56 = getelementptr inbounds %v* %2, i64 %indvars.iv, i32 1
+ %x.i54 = getelementptr inbounds %v, %v* %2, i64 %indvars.iv, i32 0
+ %x1.i = getelementptr inbounds %v, %v* %3, i64 %indvars.iv, i32 0
+ %y.i56 = getelementptr inbounds %v, %v* %2, i64 %indvars.iv, i32 1
%10 = bitcast double* %x.i54 to <2 x double>*
- %11 = load <2 x double>* %10, align 8
- %y2.i = getelementptr inbounds %v* %3, i64 %indvars.iv, i32 1
+ %11 = load <2 x double>, <2 x double>* %10, align 8
+ %y2.i = getelementptr inbounds %v, %v* %3, i64 %indvars.iv, i32 1
%12 = bitcast double* %x1.i to <2 x double>*
- %13 = load <2 x double>* %12, align 8
+ %13 = load <2 x double>, <2 x double>* %12, align 8
%14 = fadd fast <2 x double> %13, %11
- %z.i57 = getelementptr inbounds %v* %2, i64 %indvars.iv, i32 2
- %15 = load double* %z.i57, align 8
- %z4.i = getelementptr inbounds %v* %3, i64 %indvars.iv, i32 2
- %16 = load double* %z4.i, align 8
+ %z.i57 = getelementptr inbounds %v, %v* %2, i64 %indvars.iv, i32 2
+ %15 = load double, double* %z.i57, align 8
+ %z4.i = getelementptr inbounds %v, %v* %3, i64 %indvars.iv, i32 2
+ %16 = load double, double* %z4.i, align 8
%add5.i = fadd fast double %16, %15
%17 = fadd fast <2 x double> %6, %11
%18 = bitcast double* %x.i to <2 x double>*
store <2 x double> %17, <2 x double>* %18, align 8
- %19 = load double* %x1.i, align 8
+ %19 = load double, double* %x1.i, align 8
%20 = insertelement <2 x double> undef, double %15, i32 0
%21 = insertelement <2 x double> %20, double %19, i32 1
%22 = fadd fast <2 x double> %7, %21
%23 = bitcast double* %z.i to <2 x double>*
store <2 x double> %22, <2 x double>* %23, align 8
%24 = bitcast double* %y2.i to <2 x double>*
- %25 = load <2 x double>* %24, align 8
+ %25 = load <2 x double>, <2 x double>* %24, align 8
%26 = fadd fast <2 x double> %8, %25
%27 = bitcast double* %y.i62 to <2 x double>*
store <2 x double> %26, <2 x double>* %27, align 8
diff --git a/test/CodeGen/AArch64/Redundantstore.ll b/test/CodeGen/AArch64/Redundantstore.ll
index 72f7f4679e6d..b2072682cd91 100644
--- a/test/CodeGen/AArch64/Redundantstore.ll
+++ b/test/CodeGen/AArch64/Redundantstore.ll
@@ -8,16 +8,16 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; CHECK-NOT: stur
define i8* @test(i32 %size) {
entry:
- %0 = load i8** @end_of_array, align 8
+ %0 = load i8*, i8** @end_of_array, align 8
%conv = sext i32 %size to i64
%and = and i64 %conv, -8
%conv2 = trunc i64 %and to i32
%add.ptr.sum = add nsw i64 %and, -4
- %add.ptr3 = getelementptr inbounds i8* %0, i64 %add.ptr.sum
+ %add.ptr3 = getelementptr inbounds i8, i8* %0, i64 %add.ptr.sum
%size4 = bitcast i8* %add.ptr3 to i32*
store i32 %conv2, i32* %size4, align 4
%add.ptr.sum9 = add nsw i64 %and, -4
- %add.ptr5 = getelementptr inbounds i8* %0, i64 %add.ptr.sum9
+ %add.ptr5 = getelementptr inbounds i8, i8* %0, i64 %add.ptr.sum9
%size6 = bitcast i8* %add.ptr5 to i32*
store i32 %conv2, i32* %size6, align 4
ret i8* %0
diff --git a/test/CodeGen/AArch64/a57-csel.ll b/test/CodeGen/AArch64/a57-csel.ll
index 9d16d1a0f104..f5496f777765 100644
--- a/test/CodeGen/AArch64/a57-csel.ll
+++ b/test/CodeGen/AArch64/a57-csel.ll
@@ -3,7 +3,7 @@
; Check that the select is expanded into a branch sequence.
define i64 @f(i64 %a, i64 %b, i64* %c, i64 %d, i64 %e) {
; CHECK: cbz
- %x0 = load i64* %c
+ %x0 = load i64, i64* %c
%x1 = icmp eq i64 %x0, 0
%x2 = select i1 %x1, i64 %a, i64 %b
%x3 = add i64 %x2, %d
diff --git a/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll b/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
index 73ee522cbf55..b075573cc674 100644
--- a/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
+++ b/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
@@ -8,15 +8,15 @@ entry:
br label %for.body, !dbg !39
for.body: ; preds = %for.body, %entry
- %arrayidx5 = getelementptr inbounds i32* null, i64 1, !dbg !43
- %0 = load i32* null, align 4, !dbg !45, !tbaa !46
+ %arrayidx5 = getelementptr inbounds i32, i32* null, i64 1, !dbg !43
+ %0 = load i32, i32* null, align 4, !dbg !45, !tbaa !46
%s1 = sub nsw i32 0, %0, !dbg !50
%n1 = sext i32 %s1 to i64, !dbg !50
- %arrayidx21 = getelementptr inbounds i32* null, i64 3, !dbg !51
+ %arrayidx21 = getelementptr inbounds i32, i32* null, i64 3, !dbg !51
%add53 = add nsw i64 %n1, 0, !dbg !52
%add55 = add nsw i64 %n1, 0, !dbg !53
%mul63 = mul nsw i64 %add53, -20995, !dbg !54
- tail call void @llvm.dbg.value(metadata i64 %mul63, i64 0, metadata !30, metadata !{!"0x102"}), !dbg !55
+ tail call void @llvm.dbg.value(metadata i64 %mul63, i64 0, metadata !30, metadata !DIExpression()), !dbg !55
%mul65 = mul nsw i64 %add55, -3196, !dbg !56
%add67 = add nsw i64 0, %mul65, !dbg !57
%add80 = add i64 0, 1024, !dbg !58
@@ -44,63 +44,60 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!36, !37}
!llvm.ident = !{!38}
-!0 = !{!"0x11\0012\00clang version 3.6.0 \001\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [] [] []
-!1 = !{!"test.c", !""}
+!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "test.c", directory: "")
!2 = !{}
!3 = !{!4}
-!4 = !{!"0x2e\00\00\00\00140\000\001\000\006\00256\001\00141", !1, !5, !6, null, void ()* @test, null, null, !12} ; [ DW_TAG_subprogram ] [] [] [def] [scope 141] []
-!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [] []
-!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [] [] [from ]
+!4 = !DISubprogram(name: "", line: 140, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 141, file: !1, scope: !1, type: !6, function: void ()* @test, variables: !12)
+!6 = !DISubroutineType(types: !7)
!7 = !{null, !8}
-!8 = !{!"0xf\00\000\0064\0064\000\000", null, null, !9} ; [ DW_TAG_pointer_type ] [] [] []
-!9 = !{!"0x16\00\0030\000\000\000\000", !10, null, !11} ; [ DW_TAG_typedef ] [] [] [] [from int]
-!10 = !{!"", !""}
-!11 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [] [int] []
+!8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !9)
+!9 = !DIDerivedType(tag: DW_TAG_typedef, line: 30, file: !1, baseType: !11)
+!11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!12 = !{!13, !14, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35}
-!13 = !{!"0x101\00\0016777356\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [] [data] []
-!14 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
-!15 = !{!"0x16\00\00183\000\000\000\000", !16, null, !17} ; [ DW_TAG_typedef ] [] [INT32] [] [from long int]
-!16 = !{!"", !""}
-!17 = !{!"0x24\00\000\0064\0064\000\000\005", null, null} ; [ DW_TAG_base_type ] [] [long int] []
-!18 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
-!19 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
-!20 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
-!21 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
-!22 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
-!23 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
-!24 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [ ] [] []
-!25 = !{!"0x100\00\00143\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [ ] [] []
-!26 = !{!"0x100\00\00143\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [ ] [] []
-!27 = !{!"0x100\00\00143\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [ ] [] []
-!28 = !{!"0x100\00\00143\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [ ] [] []
-!29 = !{!"0x100\00\00144\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [ ] [] []
-!30 = !{!"0x100\00\00144\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [ ] [] []
-!31 = !{!"0x100\00\00144\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [ ] [] []
-!32 = !{!"0x100\00\00144\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [ ] [] []
-!33 = !{!"0x100\00\00144\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [ ] [] []
-!34 = !{!"0x100\00\00145\000", !4, !5, !8} ; [ DW_TAG_auto_variable ] [ ] [] []
-!35 = !{!"0x100\00\00146\000", !4, !5, !11} ; [ DW_TAG_auto_variable ] [ ] [] []
+!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 140, arg: 1, scope: !4, file: !1, type: !8)
+!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
+!15 = !DIDerivedType(tag: DW_TAG_typedef, line: 183, file: !1, baseType: !17)
+!17 = !DIBasicType(tag: DW_TAG_base_type, size: 64, align: 64, encoding: DW_ATE_signed)
+!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
+!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
+!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
+!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
+!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
+!23 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
+!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
+!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 143, scope: !4, file: !1, type: !15)
+!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 143, scope: !4, file: !1, type: !15)
+!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 143, scope: !4, file: !1, type: !15)
+!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 143, scope: !4, file: !1, type: !15)
+!29 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 144, scope: !4, file: !1, type: !15)
+!30 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 144, scope: !4, file: !1, type: !15)
+!31 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 144, scope: !4, file: !1, type: !15)
+!32 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 144, scope: !4, file: !1, type: !15)
+!33 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 144, scope: !4, file: !1, type: !15)
+!34 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 145, scope: !4, file: !1, type: !8)
+!35 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 146, scope: !4, file: !1, type: !11)
!36 = !{i32 2, !"Dwarf Version", i32 4}
-!37 = !{i32 2, !"Debug Info Version", i32 2}
+!37 = !{i32 2, !"Debug Info Version", i32 3}
!38 = !{!"clang version 3.6.0 "}
-!39 = !MDLocation(line: 154, column: 8, scope: !40)
-!40 = !{!"0xb\00154\008\002", !1, !41} ; [ DW_TAG_lexical_block ] [ ] []
-!41 = !{!"0xb\00154\008\001", !1, !42} ; [ DW_TAG_lexical_block ] [ ] []
-!42 = !{!"0xb\00154\003\000", !1, !4} ; [ DW_TAG_lexical_block ] [ ] []
-!43 = !MDLocation(line: 157, column: 5, scope: !44)
-!44 = !{!"0xb\00154\0042\000", !1, !42} ; [ DW_TAG_lexical_block ] [ ] []
-!45 = !MDLocation(line: 159, column: 5, scope: !44)
+!39 = !DILocation(line: 154, column: 8, scope: !40)
+!40 = distinct !DILexicalBlock(line: 154, column: 8, file: !1, scope: !41)
+!41 = distinct !DILexicalBlock(line: 154, column: 8, file: !1, scope: !42)
+!42 = distinct !DILexicalBlock(line: 154, column: 3, file: !1, scope: !4)
+!43 = !DILocation(line: 157, column: 5, scope: !44)
+!44 = distinct !DILexicalBlock(line: 154, column: 42, file: !1, scope: !42)
+!45 = !DILocation(line: 159, column: 5, scope: !44)
!46 = !{!47, !47, i64 0}
!47 = !{!"int", !48, i64 0}
!48 = !{!"omnipotent char", !49, i64 0}
!49 = !{!"Simple C/C++ TBAA"}
-!50 = !MDLocation(line: 160, column: 5, scope: !44)
-!51 = !MDLocation(line: 161, column: 5, scope: !44)
-!52 = !MDLocation(line: 188, column: 5, scope: !44)
-!53 = !MDLocation(line: 190, column: 5, scope: !44)
-!54 = !MDLocation(line: 198, column: 5, scope: !44)
-!55 = !MDLocation(line: 144, column: 13, scope: !4)
-!56 = !MDLocation(line: 200, column: 5, scope: !44)
-!57 = !MDLocation(line: 203, column: 5, scope: !44)
-!58 = !MDLocation(line: 207, column: 5, scope: !44)
-!59 = !MDLocation(line: 208, column: 5, scope: !44)
+!50 = !DILocation(line: 160, column: 5, scope: !44)
+!51 = !DILocation(line: 161, column: 5, scope: !44)
+!52 = !DILocation(line: 188, column: 5, scope: !44)
+!53 = !DILocation(line: 190, column: 5, scope: !44)
+!54 = !DILocation(line: 198, column: 5, scope: !44)
+!55 = !DILocation(line: 144, column: 13, scope: !4)
+!56 = !DILocation(line: 200, column: 5, scope: !44)
+!57 = !DILocation(line: 203, column: 5, scope: !44)
+!58 = !DILocation(line: 207, column: 5, scope: !44)
+!59 = !DILocation(line: 208, column: 5, scope: !44)
diff --git a/test/CodeGen/AArch64/aarch64-2014-12-02-combine-soften.ll b/test/CodeGen/AArch64/aarch64-2014-12-02-combine-soften.ll
index 455325166505..b2ee517f8868 100644
--- a/test/CodeGen/AArch64/aarch64-2014-12-02-combine-soften.ll
+++ b/test/CodeGen/AArch64/aarch64-2014-12-02-combine-soften.ll
@@ -9,7 +9,7 @@ define void @foo() {
entry:
;CHECK-LABEL: foo:
;CHECK: __floatsisf
- %0 = load i32* @x, align 4
+ %0 = load i32, i32* @x, align 4
%conv = sitofp i32 %0 to float
store float %conv, float* bitcast (i32* @t to float*), align 4
ret void
diff --git a/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll b/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll
index 7108bc0e91a9..b0e9d4aa7703 100644
--- a/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll
+++ b/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll
@@ -29,15 +29,15 @@ target triple = "aarch64"
define void @f1(double* nocapture readonly %p, double* nocapture %q) #0 {
entry:
- %0 = load double* %p, align 8
- %arrayidx1 = getelementptr inbounds double* %p, i64 1
- %1 = load double* %arrayidx1, align 8
- %arrayidx2 = getelementptr inbounds double* %p, i64 2
- %2 = load double* %arrayidx2, align 8
- %arrayidx3 = getelementptr inbounds double* %p, i64 3
- %3 = load double* %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds double* %p, i64 4
- %4 = load double* %arrayidx4, align 8
+ %0 = load double, double* %p, align 8
+ %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
+ %1 = load double, double* %arrayidx1, align 8
+ %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
+ %2 = load double, double* %arrayidx2, align 8
+ %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
+ %3 = load double, double* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
+ %4 = load double, double* %arrayidx4, align 8
%mul = fmul fast double %0, %1
%add = fadd fast double %mul, %4
%mul5 = fmul fast double %1, %2
@@ -47,18 +47,18 @@ entry:
%mul8 = fmul fast double %2, %3
%add9 = fadd fast double %mul8, %sub
store double %add9, double* %q, align 8
- %arrayidx11 = getelementptr inbounds double* %p, i64 5
- %5 = load double* %arrayidx11, align 8
- %arrayidx12 = getelementptr inbounds double* %p, i64 6
- %6 = load double* %arrayidx12, align 8
- %arrayidx13 = getelementptr inbounds double* %p, i64 7
- %7 = load double* %arrayidx13, align 8
+ %arrayidx11 = getelementptr inbounds double, double* %p, i64 5
+ %5 = load double, double* %arrayidx11, align 8
+ %arrayidx12 = getelementptr inbounds double, double* %p, i64 6
+ %6 = load double, double* %arrayidx12, align 8
+ %arrayidx13 = getelementptr inbounds double, double* %p, i64 7
+ %7 = load double, double* %arrayidx13, align 8
%mul15 = fmul fast double %6, %7
%mul16 = fmul fast double %0, %5
%add17 = fadd fast double %mul16, %mul15
%mul18 = fmul fast double %5, %6
%add19 = fadd fast double %mul18, %add17
- %arrayidx20 = getelementptr inbounds double* %q, i64 1
+ %arrayidx20 = getelementptr inbounds double, double* %q, i64 1
store double %add19, double* %arrayidx20, align 8
ret void
}
@@ -81,21 +81,21 @@ entry:
define void @f2(double* nocapture readonly %p, double* nocapture %q) #0 {
entry:
- %0 = load double* %p, align 8
- %arrayidx1 = getelementptr inbounds double* %p, i64 1
- %1 = load double* %arrayidx1, align 8
- %arrayidx2 = getelementptr inbounds double* %p, i64 2
- %2 = load double* %arrayidx2, align 8
- %arrayidx3 = getelementptr inbounds double* %p, i64 3
- %3 = load double* %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds double* %p, i64 4
- %4 = load double* %arrayidx4, align 8
- %arrayidx5 = getelementptr inbounds double* %p, i64 5
- %5 = load double* %arrayidx5, align 8
- %arrayidx6 = getelementptr inbounds double* %p, i64 6
- %6 = load double* %arrayidx6, align 8
- %arrayidx7 = getelementptr inbounds double* %p, i64 7
- %7 = load double* %arrayidx7, align 8
+ %0 = load double, double* %p, align 8
+ %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
+ %1 = load double, double* %arrayidx1, align 8
+ %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
+ %2 = load double, double* %arrayidx2, align 8
+ %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
+ %3 = load double, double* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
+ %4 = load double, double* %arrayidx4, align 8
+ %arrayidx5 = getelementptr inbounds double, double* %p, i64 5
+ %5 = load double, double* %arrayidx5, align 8
+ %arrayidx6 = getelementptr inbounds double, double* %p, i64 6
+ %6 = load double, double* %arrayidx6, align 8
+ %arrayidx7 = getelementptr inbounds double, double* %p, i64 7
+ %7 = load double, double* %arrayidx7, align 8
%mul = fmul fast double %0, %1
%add = fadd fast double %mul, %7
%mul8 = fmul fast double %5, %6
@@ -110,7 +110,7 @@ entry:
%mul16 = fmul fast double %2, %3
%add17 = fadd fast double %mul16, %sub
store double %add17, double* %q, align 8
- %arrayidx19 = getelementptr inbounds double* %q, i64 1
+ %arrayidx19 = getelementptr inbounds double, double* %q, i64 1
store double %add15, double* %arrayidx19, align 8
ret void
}
@@ -127,15 +127,15 @@ entry:
define void @f3(double* nocapture readonly %p, double* nocapture %q) #0 {
entry:
- %0 = load double* %p, align 8
- %arrayidx1 = getelementptr inbounds double* %p, i64 1
- %1 = load double* %arrayidx1, align 8
- %arrayidx2 = getelementptr inbounds double* %p, i64 2
- %2 = load double* %arrayidx2, align 8
- %arrayidx3 = getelementptr inbounds double* %p, i64 3
- %3 = load double* %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds double* %p, i64 4
- %4 = load double* %arrayidx4, align 8
+ %0 = load double, double* %p, align 8
+ %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
+ %1 = load double, double* %arrayidx1, align 8
+ %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
+ %2 = load double, double* %arrayidx2, align 8
+ %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
+ %3 = load double, double* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
+ %4 = load double, double* %arrayidx4, align 8
%mul = fmul fast double %0, %1
%add = fadd fast double %mul, %4
%mul5 = fmul fast double %1, %2
@@ -176,21 +176,21 @@ declare void @g(...) #1
define void @f4(float* nocapture readonly %p, float* nocapture %q) #0 {
entry:
- %0 = load float* %p, align 4
- %arrayidx1 = getelementptr inbounds float* %p, i64 1
- %1 = load float* %arrayidx1, align 4
- %arrayidx2 = getelementptr inbounds float* %p, i64 2
- %2 = load float* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds float* %p, i64 3
- %3 = load float* %arrayidx3, align 4
- %arrayidx4 = getelementptr inbounds float* %p, i64 4
- %4 = load float* %arrayidx4, align 4
- %arrayidx5 = getelementptr inbounds float* %p, i64 5
- %5 = load float* %arrayidx5, align 4
- %arrayidx6 = getelementptr inbounds float* %p, i64 6
- %6 = load float* %arrayidx6, align 4
- %arrayidx7 = getelementptr inbounds float* %p, i64 7
- %7 = load float* %arrayidx7, align 4
+ %0 = load float, float* %p, align 4
+ %arrayidx1 = getelementptr inbounds float, float* %p, i64 1
+ %1 = load float, float* %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds float, float* %p, i64 2
+ %2 = load float, float* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds float, float* %p, i64 3
+ %3 = load float, float* %arrayidx3, align 4
+ %arrayidx4 = getelementptr inbounds float, float* %p, i64 4
+ %4 = load float, float* %arrayidx4, align 4
+ %arrayidx5 = getelementptr inbounds float, float* %p, i64 5
+ %5 = load float, float* %arrayidx5, align 4
+ %arrayidx6 = getelementptr inbounds float, float* %p, i64 6
+ %6 = load float, float* %arrayidx6, align 4
+ %arrayidx7 = getelementptr inbounds float, float* %p, i64 7
+ %7 = load float, float* %arrayidx7, align 4
%mul = fmul fast float %0, %1
%add = fadd fast float %mul, %7
%mul8 = fmul fast float %5, %6
@@ -205,7 +205,7 @@ entry:
%mul16 = fmul fast float %2, %3
%add17 = fadd fast float %mul16, %sub
store float %add17, float* %q, align 4
- %arrayidx19 = getelementptr inbounds float* %q, i64 1
+ %arrayidx19 = getelementptr inbounds float, float* %q, i64 1
store float %add15, float* %arrayidx19, align 4
ret void
}
@@ -222,15 +222,15 @@ entry:
define void @f5(float* nocapture readonly %p, float* nocapture %q) #0 {
entry:
- %0 = load float* %p, align 4
- %arrayidx1 = getelementptr inbounds float* %p, i64 1
- %1 = load float* %arrayidx1, align 4
- %arrayidx2 = getelementptr inbounds float* %p, i64 2
- %2 = load float* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds float* %p, i64 3
- %3 = load float* %arrayidx3, align 4
- %arrayidx4 = getelementptr inbounds float* %p, i64 4
- %4 = load float* %arrayidx4, align 4
+ %0 = load float, float* %p, align 4
+ %arrayidx1 = getelementptr inbounds float, float* %p, i64 1
+ %1 = load float, float* %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds float, float* %p, i64 2
+ %2 = load float, float* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds float, float* %p, i64 3
+ %3 = load float, float* %arrayidx3, align 4
+ %arrayidx4 = getelementptr inbounds float, float* %p, i64 4
+ %4 = load float, float* %arrayidx4, align 4
%mul = fmul fast float %0, %1
%add = fadd fast float %mul, %4
%mul5 = fmul fast float %1, %2
@@ -264,15 +264,15 @@ if.end: ; preds = %if.then, %entry
define void @f6(double* nocapture readonly %p, double* nocapture %q) #0 {
entry:
- %0 = load double* %p, align 8
- %arrayidx1 = getelementptr inbounds double* %p, i64 1
- %1 = load double* %arrayidx1, align 8
- %arrayidx2 = getelementptr inbounds double* %p, i64 2
- %2 = load double* %arrayidx2, align 8
- %arrayidx3 = getelementptr inbounds double* %p, i64 3
- %3 = load double* %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds double* %p, i64 4
- %4 = load double* %arrayidx4, align 8
+ %0 = load double, double* %p, align 8
+ %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
+ %1 = load double, double* %arrayidx1, align 8
+ %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
+ %2 = load double, double* %arrayidx2, align 8
+ %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
+ %3 = load double, double* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
+ %4 = load double, double* %arrayidx4, align 8
%mul = fmul fast double %0, %1
%add = fadd fast double %mul, %4
%mul5 = fmul fast double %1, %2
@@ -299,15 +299,15 @@ declare double @hh(double) #1
define void @f7(double* nocapture readonly %p, double* nocapture %q) #0 {
entry:
- %0 = load double* %p, align 8
- %arrayidx1 = getelementptr inbounds double* %p, i64 1
- %1 = load double* %arrayidx1, align 8
- %arrayidx2 = getelementptr inbounds double* %p, i64 2
- %2 = load double* %arrayidx2, align 8
- %arrayidx3 = getelementptr inbounds double* %p, i64 3
- %3 = load double* %arrayidx3, align 8
- %arrayidx4 = getelementptr inbounds double* %p, i64 4
- %4 = load double* %arrayidx4, align 8
+ %0 = load double, double* %p, align 8
+ %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
+ %1 = load double, double* %arrayidx1, align 8
+ %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
+ %2 = load double, double* %arrayidx2, align 8
+ %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
+ %3 = load double, double* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
+ %4 = load double, double* %arrayidx4, align 8
%mul = fmul fast double %0, %1
%add = fadd fast double %mul, %4
%mul5 = fmul fast double %1, %2
diff --git a/test/CodeGen/AArch64/aarch64-address-type-promotion-assertion.ll b/test/CodeGen/AArch64/aarch64-address-type-promotion-assertion.ll
index 2df9c375bdce..0c6be21f8907 100644
--- a/test/CodeGen/AArch64/aarch64-address-type-promotion-assertion.ll
+++ b/test/CodeGen/AArch64/aarch64-address-type-promotion-assertion.ll
@@ -11,14 +11,14 @@ invoke.cont145:
br i1 %or.cond, label %if.then274, label %invoke.cont145
if.then274:
- %0 = load i32* null, align 4
+ %0 = load i32, i32* null, align 4
br i1 undef, label %invoke.cont291, label %if.else313
invoke.cont291:
%idxprom.i.i.i605 = sext i32 %0 to i64
- %arrayidx.i.i.i607 = getelementptr inbounds double* undef, i64 %idxprom.i.i.i605
+ %arrayidx.i.i.i607 = getelementptr inbounds double, double* undef, i64 %idxprom.i.i.i605
%idxprom.i.i.i596 = sext i32 %0 to i64
- %arrayidx.i.i.i598 = getelementptr inbounds double* undef, i64 %idxprom.i.i.i596
+ %arrayidx.i.i.i598 = getelementptr inbounds double, double* undef, i64 %idxprom.i.i.i596
br label %if.end356
if.else313:
@@ -30,7 +30,7 @@ invoke.cont317:
invoke.cont326:
%idxprom.i.i.i587 = sext i32 %0 to i64
- %arrayidx.i.i.i589 = getelementptr inbounds double* undef, i64 %idxprom.i.i.i587
+ %arrayidx.i.i.i589 = getelementptr inbounds double, double* undef, i64 %idxprom.i.i.i587
%sub329 = fsub fast double undef, undef
br label %invoke.cont334
@@ -40,12 +40,12 @@ invoke.cont334:
invoke.cont342:
%idxprom.i.i.i578 = sext i32 %0 to i64
- %arrayidx.i.i.i580 = getelementptr inbounds double* undef, i64 %idxprom.i.i.i578
+ %arrayidx.i.i.i580 = getelementptr inbounds double, double* undef, i64 %idxprom.i.i.i578
br label %if.end356
invoke.cont353:
%idxprom.i.i.i572 = sext i32 %0 to i64
- %arrayidx.i.i.i574 = getelementptr inbounds double* undef, i64 %idxprom.i.i.i572
+ %arrayidx.i.i.i574 = getelementptr inbounds double, double* undef, i64 %idxprom.i.i.i572
br label %if.end356
if.end356:
diff --git a/test/CodeGen/AArch64/aarch64-address-type-promotion.ll b/test/CodeGen/AArch64/aarch64-address-type-promotion.ll
index ee90d199b458..07e0ba654d21 100644
--- a/test/CodeGen/AArch64/aarch64-address-type-promotion.ll
+++ b/test/CodeGen/AArch64/aarch64-address-type-promotion.ll
@@ -14,15 +14,15 @@ entry:
; CHECK-NEXT: ret
%add = add nsw i32 %i, 1
%idxprom = sext i32 %add to i64
- %arrayidx = getelementptr inbounds i32* %a, i64 %idxprom
- %0 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
%add1 = add nsw i32 %i, 2
%idxprom2 = sext i32 %add1 to i64
- %arrayidx3 = getelementptr inbounds i32* %a, i64 %idxprom2
- %1 = load i32* %arrayidx3, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %idxprom2
+ %1 = load i32, i32* %arrayidx3, align 4
%add4 = add nsw i32 %1, %0
%idxprom5 = sext i32 %i to i64
- %arrayidx6 = getelementptr inbounds i32* %a, i64 %idxprom5
+ %arrayidx6 = getelementptr inbounds i32, i32* %a, i64 %idxprom5
store i32 %add4, i32* %arrayidx6, align 4
ret void
}
diff --git a/test/CodeGen/AArch64/aarch64-be-bv.ll b/test/CodeGen/AArch64/aarch64-be-bv.ll
index 01642a4f3bf8..fb41156c09df 100644
--- a/test/CodeGen/AArch64/aarch64-be-bv.ll
+++ b/test/CodeGen/AArch64/aarch64-be-bv.ll
@@ -8,7 +8,7 @@ define i16 @movi_modimm_t1() nounwind {
; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -20,7 +20,7 @@ define i16 @movi_modimm_t2() nounwind {
; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, lsl #8
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 256, i16 0, i16 256, i16 0, i16 256, i16 0, i16 256, i16 0>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -32,7 +32,7 @@ define i16 @movi_modimm_t3() nounwind {
; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, lsl #16
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -44,7 +44,7 @@ define i16 @movi_modimm_t4() nounwind {
; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, lsl #24
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 0, i16 256, i16 0, i16 256, i16 0, i16 256, i16 0, i16 256>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -56,7 +56,7 @@ define i16 @movi_modimm_t5() nounwind {
; CHECK-NEXT: movi v[[REG2:[0-9]+]].8h, #0x1
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -68,7 +68,7 @@ define i16 @movi_modimm_t6() nounwind {
; CHECK-NEXT: movi v[[REG2:[0-9]+]].8h, #0x1, lsl #8
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -80,7 +80,7 @@ define i16 @movi_modimm_t7() nounwind {
; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, msl #8
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 511, i16 0, i16 511, i16 0, i16 511, i16 0, i16 511, i16 0>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -92,7 +92,7 @@ define i16 @movi_modimm_t8() nounwind {
; CHECK-NEXT: movi v[[REG2:[0-9]+]].4s, #0x1, msl #16
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 65535, i16 1, i16 65535, i16 1, i16 65535, i16 1, i16 65535, i16 1>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -104,7 +104,7 @@ define i16 @movi_modimm_t9() nounwind {
; CHECK-NEXT: movi v[[REG2:[0-9]+]].16b, #0x1
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -116,7 +116,7 @@ define i16 @movi_modimm_t10() nounwind {
; CHECK-NEXT: movi v[[REG2:[0-9]+]].2d, #0x00ffff0000ffff
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 -1, i16 0, i16 -1, i16 0, i16 -1, i16 0, i16 -1, i16 0>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -128,7 +128,7 @@ define i16 @fmov_modimm_t11() nounwind {
; CHECK-NEXT: fmov v[[REG2:[0-9]+]].4s, #3.00000000
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 0, i16 16448, i16 0, i16 16448, i16 0, i16 16448, i16 0, i16 16448>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -140,7 +140,7 @@ define i16 @fmov_modimm_t12() nounwind {
; CHECK-NEXT: fmov v[[REG2:[0-9]+]].2d, #0.17968750
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 0, i16 0, i16 0, i16 16327, i16 0, i16 0, i16 0, i16 16327>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -152,7 +152,7 @@ define i16 @mvni_modimm_t1() nounwind {
; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -164,7 +164,7 @@ define i16 @mvni_modimm_t2() nounwind {
; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, lsl #8
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -176,7 +176,7 @@ define i16 @mvni_modimm_t3() nounwind {
; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, lsl #16
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -188,7 +188,7 @@ define i16 @mvni_modimm_t4() nounwind {
; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, lsl #24
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -200,7 +200,7 @@ define i16 @mvni_modimm_t5() nounwind {
; CHECK-NEXT: mvni v[[REG2:[0-9]+]].8h, #0x1
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -212,7 +212,7 @@ define i16 @mvni_modimm_t6() nounwind {
; CHECK-NEXT: mvni v[[REG2:[0-9]+]].8h, #0x1, lsl #8
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -224,7 +224,7 @@ define i16 @mvni_modimm_t7() nounwind {
; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, msl #8
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 65024, i16 65535, i16 65024, i16 65535, i16 65024, i16 65535, i16 65024, i16 65535>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -236,7 +236,7 @@ define i16 @mvni_modimm_t8() nounwind {
; CHECK-NEXT: mvni v[[REG2:[0-9]+]].4s, #0x1, msl #16
; CHECK-NEXT: add v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = add <8 x i16> %in, <i16 0, i16 65534, i16 0, i16 65534, i16 0, i16 65534, i16 0, i16 65534>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -247,7 +247,7 @@ define i16 @bic_modimm_t1() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: bic v[[REG2:[0-9]+]].4s, #0x1
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = and <8 x i16> %in, <i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -258,7 +258,7 @@ define i16 @bic_modimm_t2() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: bic v[[REG2:[0-9]+]].4s, #0x1, lsl #8
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = and <8 x i16> %in, <i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -269,7 +269,7 @@ define i16 @bic_modimm_t3() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: bic v[[REG2:[0-9]+]].4s, #0x1, lsl #16
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = and <8 x i16> %in, <i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -280,7 +280,7 @@ define i16 @bic_modimm_t4() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: bic v[[REG2:[0-9]+]].4s, #0x1, lsl #24
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = and <8 x i16> %in, <i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -291,7 +291,7 @@ define i16 @bic_modimm_t5() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: bic v[[REG2:[0-9]+]].8h, #0x1
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = and <8 x i16> %in, <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -302,7 +302,7 @@ define i16 @bic_modimm_t6() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: bic v[[REG2:[0-9]+]].8h, #0x1, lsl #8
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = and <8 x i16> %in, <i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -313,7 +313,7 @@ define i16 @orr_modimm_t1() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: orr v[[REG2:[0-9]+]].4s, #0x1
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = or <8 x i16> %in, <i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -324,7 +324,7 @@ define i16 @orr_modimm_t2() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: orr v[[REG2:[0-9]+]].4s, #0x1, lsl #8
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = or <8 x i16> %in, <i16 256, i16 0, i16 256, i16 0, i16 256, i16 0, i16 256, i16 0>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -335,7 +335,7 @@ define i16 @orr_modimm_t3() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: orr v[[REG2:[0-9]+]].4s, #0x1, lsl #16
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = or <8 x i16> %in, <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -346,7 +346,7 @@ define i16 @orr_modimm_t4() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: orr v[[REG2:[0-9]+]].4s, #0x1, lsl #24
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = or <8 x i16> %in, <i16 0, i16 256, i16 0, i16 256, i16 0, i16 256, i16 0, i16 256>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -357,7 +357,7 @@ define i16 @orr_modimm_t5() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: orr v[[REG2:[0-9]+]].8h, #0x1
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = or <8 x i16> %in, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
@@ -368,7 +368,7 @@ define i16 @orr_modimm_t6() nounwind {
; CHECK: ld1 { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
; CHECK-NEXT: orr v[[REG2:[0-9]+]].8h, #0x1, lsl #8
; CHECK-NEXT: umov w{{[0-9]+}}, v[[REG1]].h[0]
- %in = load <8 x i16>* @vec_v8i16
+ %in = load <8 x i16>, <8 x i16>* @vec_v8i16
%rv = or <8 x i16> %in, <i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256>
%el = extractelement <8 x i16> %rv, i32 0
ret i16 %el
diff --git a/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll b/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
new file mode 100644
index 000000000000..a31c66bad4be
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
@@ -0,0 +1,491 @@
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; This test aims to check basic correctness of frame layout &
+; frame access code. There are 8 functions in this test file,
+; each function implements one element in the cartesian product
+; of:
+; . a function having a VLA/noVLA
+; . a function with dynamic stack realignment/no dynamic stack realignment.
+; . a function needing a frame pionter/no frame pointer,
+; since the presence/absence of these has influence on the frame
+; layout and which pointer to use to access various part of the
+; frame (bp,sp,fp).
+;
+; Furthermore: in every test function:
+; . there is always one integer and 1 floating point argument to be able
+; to check those are accessed correctly.
+; . there is always one local variable to check that is accessed
+; correctly
+;
+; The LLVM-IR below was produced by clang on the following C++ code:
+;extern "C" int g();
+;extern "C" int novla_nodynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+; // use an argument passed on the stack.
+; volatile int l1;
+; return i10 + (int)d10 + l1 + g();
+;}
+;extern "C" int novla_nodynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+; // use an argument passed on the stack.
+; volatile int l1;
+; return i10 + (int)d10 + l1;
+;}
+;extern "C" int novla_dynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+; // use an argument passed on the stack.
+; alignas(128) volatile int l1;
+; return i10 + (int)d10 + l1 + g();
+;}
+;extern "C" int novla_dynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+; // use an argument passed on the stack.
+; alignas(128) volatile int l1;
+; return i10 + (int)d10 + l1;
+;}
+;
+;extern "C" int vla_nodynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+; // use an argument passed on the stack.
+; volatile int l1;
+; volatile int vla[i1];
+; return i10 + (int)d10 + l1 + g() + vla[0];
+;}
+;extern "C" int vla_nodynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+; // use an argument passed on the stack.
+; volatile int l1;
+; volatile int vla[i1];
+; return i10 + (int)d10 + l1 + vla[0];
+;}
+;extern "C" int vla_dynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+; // use an argument passed on the stack.
+; alignas(128) volatile int l1;
+; volatile int vla[i1];
+; return i10 + (int)d10 + l1 + g() + vla[0];
+;}
+;extern "C" int vla_dynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
+; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
+;{
+; // use an argument passed on the stack.
+; alignas(128) volatile int l1;
+; volatile int vla[i1];
+; return i10 + (int)d10 + l1 + vla[0];
+;}
+
+
+
+define i32 @novla_nodynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 {
+entry:
+ %l1 = alloca i32, align 4
+ %conv = fptosi double %d10 to i32
+ %add = add nsw i32 %conv, %i10
+ %l1.0.l1.0. = load volatile i32, i32* %l1, align 4
+ %add1 = add nsw i32 %add, %l1.0.l1.0.
+ %call = tail call i32 @g()
+ %add2 = add nsw i32 %add1, %call
+ ret i32 %add2
+}
+; CHECK-LABEL: novla_nodynamicrealign_call
+; CHECK: .cfi_startproc
+; Check that used callee-saved registers are saved
+; CHECK: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK: stp x29, x30, [sp, #16]
+; CHECK: add x29, sp, #16
+; Check correctness of cfi pseudo-instructions
+; CHECK: .cfi_def_cfa w29, 16
+; CHECK: .cfi_offset w30, -8
+; CHECK: .cfi_offset w29, -16
+; CHECK: .cfi_offset w19, -24
+; CHECK: .cfi_offset w20, -32
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
+; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
+; Check correct access to local variable on the stack, through stack pointer
+; CHECK: ldr w[[ILOC:[0-9]+]], [sp, #12]
+; Check epilogue:
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ret
+; CHECK: .cfi_endproc
+
+
+declare i32 @g() #0
+
+; Function Attrs: nounwind
+define i32 @novla_nodynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
+entry:
+ %l1 = alloca i32, align 4
+ %conv = fptosi double %d10 to i32
+ %add = add nsw i32 %conv, %i10
+ %l1.0.l1.0. = load volatile i32, i32* %l1, align 4
+ %add1 = add nsw i32 %add, %l1.0.l1.0.
+ ret i32 %add1
+}
+; CHECK-LABEL: novla_nodynamicrealign_nocall
+; Check that space is reserved for one local variable on the stack.
+; CHECK: sub sp, sp, #16 // =16
+; Check correct access to arguments passed on the stack, through stack pointer
+; CHECK: ldr d[[DARG:[0-9]+]], [sp, #40]
+; CHECK: ldr w[[IARG:[0-9]+]], [sp, #24]
+; Check correct access to local variable on the stack, through stack pointer
+; CHECK: ldr w[[ILOC:[0-9]+]], [sp, #12]
+; Check epilogue:
+; CHECK: add sp, sp, #16 // =16
+; CHECK: ret
+
+
+define i32 @novla_dynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 {
+entry:
+ %l1 = alloca i32, align 128
+ %conv = fptosi double %d10 to i32
+ %add = add nsw i32 %conv, %i10
+ %l1.0.l1.0. = load volatile i32, i32* %l1, align 128
+ %add1 = add nsw i32 %add, %l1.0.l1.0.
+ %call = tail call i32 @g()
+ %add2 = add nsw i32 %add1, %call
+ ret i32 %add2
+}
+
+; CHECK-LABEL: novla_dynamicrealign_call
+; CHECK: .cfi_startproc
+; Check that used callee-saved registers are saved
+; CHECK: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK: stp x29, x30, [sp, #16]
+; CHECK: add x29, sp, #16
+; Check the dynamic realignment of the stack pointer to a 128-byte boundary
+; CHECK: sub x9, sp, #96
+; CHECK: and sp, x9, #0xffffffffffffff80
+; Check correctness of cfi pseudo-instructions
+; CHECK: .cfi_def_cfa w29, 16
+; CHECK: .cfi_offset w30, -8
+; CHECK: .cfi_offset w29, -16
+; CHECK: .cfi_offset w19, -24
+; CHECK: .cfi_offset w20, -32
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
+; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
+; Check correct access to local variable on the stack, through re-aligned stack pointer
+; CHECK: ldr w[[ILOC:[0-9]+]], [sp]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK: sub sp, x29, #16 // =16
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ret
+; CHECK: .cfi_endproc
+
+
+; Function Attrs: nounwind
+define i32 @novla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
+entry:
+ %l1 = alloca i32, align 128
+ %conv = fptosi double %d10 to i32
+ %add = add nsw i32 %conv, %i10
+ %l1.0.l1.0. = load volatile i32, i32* %l1, align 128
+ %add1 = add nsw i32 %add, %l1.0.l1.0.
+ ret i32 %add1
+}
+
+; CHECK-LABEL: novla_dynamicrealign_nocall
+; Check that the frame pointer is created:
+; CHECK: stp x29, x30, [sp, #-16]!
+; CHECK: mov x29, sp
+; Check the dynamic realignment of the stack pointer to a 128-byte boundary
+; CHECK: sub x9, sp, #112
+; CHECK: and sp, x9, #0xffffffffffffff80
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
+; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
+; Check correct access to local variable on the stack, through re-aligned stack pointer
+; CHECK: ldr w[[ILOC:[0-9]+]], [sp]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK: mov sp, x29
+; CHECK: ldp x29, x30, [sp], #16
+; CHECK: ret
+
+
+define i32 @vla_nodynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 {
+entry:
+ %l1 = alloca i32, align 4
+ %0 = zext i32 %i1 to i64
+ %vla = alloca i32, i64 %0, align 4
+ %conv = fptosi double %d10 to i32
+ %add = add nsw i32 %conv, %i10
+ %l1.0.l1.0. = load volatile i32, i32* %l1, align 4
+ %add1 = add nsw i32 %add, %l1.0.l1.0.
+ %call = tail call i32 @g()
+ %add2 = add nsw i32 %add1, %call
+ %1 = load volatile i32, i32* %vla, align 4, !tbaa !1
+ %add3 = add nsw i32 %add2, %1
+ ret i32 %add3
+}
+
+; CHECK-LABEL: vla_nodynamicrealign_call
+; CHECK: .cfi_startproc
+; Check that used callee-saved registers are saved
+; CHECK: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK: stp x29, x30, [sp, #16]
+; CHECK: add x29, sp, #16
+; Check that space is reserved on the stack for the local variable,
+; rounded up to a multiple of 16 to keep the stack pointer 16-byte aligned.
+; CHECK: sub sp, sp, #16
+; Check correctness of cfi pseudo-instructions
+; CHECK: .cfi_def_cfa w29, 16
+; CHECK: .cfi_offset w30, -8
+; CHECK: .cfi_offset w29, -16
+; CHECK: .cfi_offset w19, -24
+; CHECK: .cfi_offset w20, -32
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; CHECK: ubfx x9, x0, #0, #32
+; CHECK: lsl x9, x9, #2
+; CHECK: add x9, x9, #15
+; CHECK: and x9, x9, #0xfffffffffffffff0
+; CHECK: mov x10, sp
+; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through frame pointer
+; CHECK: ldur w[[ILOC:[0-9]+]], [x29, #-20]
+; Check correct accessing of the VLA variable through the base pointer
+; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK: sub sp, x29, #16 // =16
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ret
+; CHECK: .cfi_endproc
+
+
+; Function Attrs: nounwind
+define i32 @vla_nodynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
+entry:
+ %l1 = alloca i32, align 4
+ %0 = zext i32 %i1 to i64
+ %vla = alloca i32, i64 %0, align 4
+ %conv = fptosi double %d10 to i32
+ %add = add nsw i32 %conv, %i10
+ %l1.0.l1.0. = load volatile i32, i32* %l1, align 4
+ %add1 = add nsw i32 %add, %l1.0.l1.0.
+ %1 = load volatile i32, i32* %vla, align 4, !tbaa !1
+ %add2 = add nsw i32 %add1, %1
+ ret i32 %add2
+}
+
+; CHECK-LABEL: vla_nodynamicrealign_nocall
+; Check that the frame pointer is created:
+; CHECK: stp x29, x30, [sp, #-16]!
+; CHECK: mov x29, sp
+; Check that space is reserved on the stack for the local variable,
+; rounded up to a multiple of 16 to keep the stack pointer 16-byte aligned.
+; CHECK: sub sp, sp, #16
+; Check correctness of cfi pseudo-instructions
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; CHECK: ubfx x9, x0, #0, #32
+; CHECK: lsl x9, x9, #2
+; CHECK: add x9, x9, #15
+; CHECK: and x9, x9, #0xfffffffffffffff0
+; CHECK: mov x10, sp
+; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through frame pointer
+; CHECK: ldur w[[ILOC:[0-9]+]], [x29, #-4]
+; Check correct accessing of the VLA variable through the base pointer
+; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK: mov sp, x29
+; CHECK: ldp x29, x30, [sp], #16
+; CHECK: ret
+
+
+define i32 @vla_dynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 {
+entry:
+ %l1 = alloca i32, align 128
+ %0 = zext i32 %i1 to i64
+ %vla = alloca i32, i64 %0, align 4
+ %conv = fptosi double %d10 to i32
+ %add = add nsw i32 %conv, %i10
+ %l1.0.l1.0. = load volatile i32, i32* %l1, align 128
+ %add1 = add nsw i32 %add, %l1.0.l1.0.
+ %call = tail call i32 @g()
+ %add2 = add nsw i32 %add1, %call
+ %1 = load volatile i32, i32* %vla, align 4, !tbaa !1
+ %add3 = add nsw i32 %add2, %1
+ ret i32 %add3
+}
+
+; CHECK-LABEL: vla_dynamicrealign_call
+; CHECK: .cfi_startproc
+; Check that used callee-saved registers are saved
+; CHECK: stp x22, x21, [sp, #-48]!
+; CHECK: stp x20, x19, [sp, #16]
+; Check that the frame pointer is created:
+; CHECK: stp x29, x30, [sp, #32]
+; CHECK: add x29, sp, #32
+; Check that the stack pointer gets re-aligned to 128
+; bytes & the base pointer (x19) gets initialized to
+; this 128-byte aligned area for local variables &
+; spill slots
+; CHECK: sub x9, sp, #80 // =80
+; CHECK: and sp, x9, #0xffffffffffffff80
+; CHECK: mov x19, sp
+; Check correctness of cfi pseudo-instructions
+; CHECK: .cfi_def_cfa w29, 16
+; CHECK: .cfi_offset w30, -8
+; CHECK: .cfi_offset w29, -16
+; CHECK: .cfi_offset w19, -24
+; CHECK: .cfi_offset w20, -32
+; CHECK: .cfi_offset w21, -40
+; CHECK: .cfi_offset w22, -48
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; and set-up of base pointer (x19).
+; CHECK: ubfx x9, x0, #0, #32
+; CHECK: lsl x9, x9, #2
+; CHECK: add x9, x9, #15
+; CHECK: and x9, x9, #0xfffffffffffffff0
+; CHECK: mov x10, sp
+; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through base pointer
+; CHECK: ldr w[[ILOC:[0-9]+]], [x19]
+; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK: sub sp, x29, #32
+; CHECK: ldp x29, x30, [sp, #32]
+; CHECK: ldp x20, x19, [sp, #16]
+; CHECK: ldp x22, x21, [sp], #48
+; CHECK: ret
+; CHECK: .cfi_endproc
+
+
+; Function Attrs: nounwind
+define i32 @vla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
+entry:
+ %l1 = alloca i32, align 128
+ %0 = zext i32 %i1 to i64
+ %vla = alloca i32, i64 %0, align 4
+ %conv = fptosi double %d10 to i32
+ %add = add nsw i32 %conv, %i10
+ %l1.0.l1.0. = load volatile i32, i32* %l1, align 128
+ %add1 = add nsw i32 %add, %l1.0.l1.0.
+ %1 = load volatile i32, i32* %vla, align 4, !tbaa !1
+ %add2 = add nsw i32 %add1, %1
+ ret i32 %add2
+}
+
+; CHECK-LABEL: vla_dynamicrealign_nocall
+; Check that used callee-saved registers are saved
+; CHECK: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK: stp x29, x30, [sp, #16]
+; CHECK: add x29, sp, #16
+; Check that the stack pointer gets re-aligned to 128
+; bytes & the base pointer (x19) gets initialized to
+; this 128-byte aligned area for local variables &
+; spill slots
+; CHECK: sub x9, sp, #96
+; CHECK: and sp, x9, #0xffffffffffffff80
+; CHECK: mov x19, sp
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; and set-up of base pointer (x19).
+; CHECK: ubfx x9, x0, #0, #32
+; CHECK: lsl x9, x9, #2
+; CHECK: add x9, x9, #15
+; CHECK: and x9, x9, #0xfffffffffffffff0
+; CHECK: mov x10, sp
+; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through base pointer
+; CHECK: ldr w[[ILOC:[0-9]+]], [x19]
+; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK: sub sp, x29, #16
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ret
+
+
+; Function Attrs: nounwind
+define i32 @vla_dynamicrealign_nocall_large_align(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
+entry:
+ %l1 = alloca i32, align 32768
+ %0 = zext i32 %i1 to i64
+ %vla = alloca i32, i64 %0, align 4
+ %conv = fptosi double %d10 to i32
+ %add = add nsw i32 %conv, %i10
+ %l1.0.l1.0. = load volatile i32, i32* %l1, align 32768
+ %add1 = add nsw i32 %add, %l1.0.l1.0.
+ %1 = load volatile i32, i32* %vla, align 4, !tbaa !1
+ %add2 = add nsw i32 %add1, %1
+ ret i32 %add2
+}
+
+; CHECK-LABEL: vla_dynamicrealign_nocall_large_align
+; Check that used callee-saved registers are saved
+; CHECK: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK: stp x29, x30, [sp, #16]
+; CHECK: add x29, sp, #16
+; Check that the stack pointer gets re-aligned to 128
+; bytes & the base pointer (x19) gets initialized to
+; this 128-byte aligned area for local variables &
+; spill slots
+; CHECK: sub x9, sp, #7, lsl #12
+; CHECK: and sp, x9, #0xffffffffffff8000
+; CHECK: mov x19, sp
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
+; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; and set-up of base pointer (x19).
+; CHECK: ubfx x9, x0, #0, #32
+; CHECK: lsl x9, x9, #2
+; CHECK: add x9, x9, #15
+; CHECK: and x9, x9, #0xfffffffffffffff0
+; CHECK: mov x10, sp
+; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through base pointer
+; CHECK: ldr w[[ILOC:[0-9]+]], [x19]
+; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK: sub sp, x29, #16
+; CHECK: ldp x29, x30, [sp, #16]
+; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ret
+
+attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll b/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll
index 64d91eea9739..2170e4b902d4 100644
--- a/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll
+++ b/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll
@@ -22,7 +22,7 @@ target triple = "aarch64--linux-gnu"
define i64 @f_load_madd_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
entry:
- %0 = load i64* %c, align 8
+ %0 = load i64, i64* %c, align 8
%mul = mul nsw i64 %0, %b
%add = add nsw i64 %mul, %a
ret i64 %add
@@ -41,7 +41,7 @@ entry:
define i32 @f_load_madd_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
entry:
- %0 = load i32* %c, align 4
+ %0 = load i32, i32* %c, align 4
%mul = mul nsw i32 %0, %b
%add = add nsw i32 %mul, %a
ret i32 %add
@@ -56,7 +56,7 @@ entry:
define i64 @f_load_msub_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
entry:
- %0 = load i64* %c, align 8
+ %0 = load i64, i64* %c, align 8
%mul = mul nsw i64 %0, %b
%sub = sub nsw i64 %a, %mul
ret i64 %sub
@@ -72,7 +72,7 @@ entry:
define i32 @f_load_msub_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
entry:
- %0 = load i32* %c, align 4
+ %0 = load i32, i32* %c, align 4
%mul = mul nsw i32 %0, %b
%sub = sub nsw i32 %a, %mul
ret i32 %sub
@@ -87,7 +87,7 @@ entry:
define i64 @f_load_mul_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
entry:
- %0 = load i64* %c, align 8
+ %0 = load i64, i64* %c, align 8
%mul = mul nsw i64 %0, %b
ret i64 %mul
}
@@ -101,7 +101,7 @@ entry:
define i32 @f_load_mul_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
entry:
- %0 = load i32* %c, align 4
+ %0 = load i32, i32* %c, align 4
%mul = mul nsw i32 %0, %b
ret i32 %mul
}
@@ -115,7 +115,7 @@ entry:
define i64 @f_load_mneg_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
entry:
- %0 = load i64* %c, align 8
+ %0 = load i64, i64* %c, align 8
%mul = sub i64 0, %b
%sub = mul i64 %0, %mul
ret i64 %sub
@@ -133,7 +133,7 @@ entry:
define i32 @f_load_mneg_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
entry:
- %0 = load i32* %c, align 4
+ %0 = load i32, i32* %c, align 4
%mul = sub i32 0, %b
%sub = mul i32 %0, %mul
ret i32 %sub
@@ -154,7 +154,7 @@ entry:
%conv1 = sext i32 %c to i64
%mul = mul nsw i64 %conv1, %conv
%add = add nsw i64 %mul, %a
- %0 = load i32* %d, align 4
+ %0 = load i32, i32* %d, align 4
%conv2 = sext i32 %0 to i64
%add3 = add nsw i64 %add, %conv2
ret i64 %add3
@@ -174,7 +174,7 @@ entry:
%conv1 = sext i32 %c to i64
%mul = mul nsw i64 %conv1, %conv
%sub = sub i64 %a, %mul
- %0 = load i32* %d, align 4
+ %0 = load i32, i32* %d, align 4
%conv2 = sext i32 %0 to i64
%add = add nsw i64 %sub, %conv2
ret i64 %add
@@ -193,7 +193,7 @@ entry:
%conv = sext i32 %b to i64
%conv1 = sext i32 %c to i64
%mul = mul nsw i64 %conv1, %conv
- %0 = load i32* %d, align 4
+ %0 = load i32, i32* %d, align 4
%conv2 = sext i32 %0 to i64
%div = sdiv i64 %mul, %conv2
ret i64 %div
@@ -212,7 +212,7 @@ entry:
%conv1 = sext i32 %c to i64
%mul = sub nsw i64 0, %conv
%sub = mul i64 %conv1, %mul
- %0 = load i32* %d, align 4
+ %0 = load i32, i32* %d, align 4
%conv2 = sext i32 %0 to i64
%div = sdiv i64 %sub, %conv2
ret i64 %div
@@ -229,7 +229,7 @@ entry:
%conv1 = zext i32 %c to i64
%mul = mul i64 %conv1, %conv
%add = add i64 %mul, %a
- %0 = load i32* %d, align 4
+ %0 = load i32, i32* %d, align 4
%conv2 = zext i32 %0 to i64
%add3 = add i64 %add, %conv2
ret i64 %add3
@@ -249,7 +249,7 @@ entry:
%conv1 = zext i32 %c to i64
%mul = mul i64 %conv1, %conv
%sub = sub i64 %a, %mul
- %0 = load i32* %d, align 4
+ %0 = load i32, i32* %d, align 4
%conv2 = zext i32 %0 to i64
%add = add i64 %sub, %conv2
ret i64 %add
@@ -268,7 +268,7 @@ entry:
%conv = zext i32 %b to i64
%conv1 = zext i32 %c to i64
%mul = mul i64 %conv1, %conv
- %0 = load i32* %d, align 4
+ %0 = load i32, i32* %d, align 4
%conv2 = zext i32 %0 to i64
%div = udiv i64 %mul, %conv2
ret i64 %div
@@ -287,7 +287,7 @@ entry:
%conv1 = zext i32 %c to i64
%mul = sub nsw i64 0, %conv
%sub = mul i64 %conv1, %mul
- %0 = load i32* %d, align 4
+ %0 = load i32, i32* %d, align 4
%conv2 = zext i32 %0 to i64
%div = udiv i64 %sub, %conv2
ret i64 %div
@@ -300,7 +300,7 @@ entry:
define i64 @f_store_madd_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
entry:
- %0 = load i64* %cp, align 8
+ %0 = load i64, i64* %cp, align 8
store i64 %a, i64* %e, align 8
%mul = mul nsw i64 %0, %b
%add = add nsw i64 %mul, %a
@@ -317,7 +317,7 @@ entry:
define i32 @f_store_madd_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
entry:
- %0 = load i32* %cp, align 4
+ %0 = load i32, i32* %cp, align 4
store i32 %a, i32* %e, align 4
%mul = mul nsw i32 %0, %b
%add = add nsw i32 %mul, %a
@@ -333,7 +333,7 @@ entry:
define i64 @f_store_msub_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
entry:
- %0 = load i64* %cp, align 8
+ %0 = load i64, i64* %cp, align 8
store i64 %a, i64* %e, align 8
%mul = mul nsw i64 %0, %b
%sub = sub nsw i64 %a, %mul
@@ -350,7 +350,7 @@ entry:
define i32 @f_store_msub_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
entry:
- %0 = load i32* %cp, align 4
+ %0 = load i32, i32* %cp, align 4
store i32 %a, i32* %e, align 4
%mul = mul nsw i32 %0, %b
%sub = sub nsw i32 %a, %mul
@@ -366,7 +366,7 @@ entry:
define i64 @f_store_mul_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
entry:
- %0 = load i64* %cp, align 8
+ %0 = load i64, i64* %cp, align 8
store i64 %a, i64* %e, align 8
%mul = mul nsw i64 %0, %b
ret i64 %mul
@@ -381,7 +381,7 @@ entry:
define i32 @f_store_mul_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
entry:
- %0 = load i32* %cp, align 4
+ %0 = load i32, i32* %cp, align 4
store i32 %a, i32* %e, align 4
%mul = mul nsw i32 %0, %b
ret i32 %mul
@@ -396,7 +396,7 @@ entry:
define i64 @f_prefetch_madd_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
entry:
- %0 = load i64* %cp, align 8
+ %0 = load i64, i64* %cp, align 8
%1 = bitcast i64* %e to i8*
tail call void @llvm.prefetch(i8* %1, i32 0, i32 0, i32 1)
%mul = mul nsw i64 %0, %b
@@ -415,7 +415,7 @@ declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) #2
define i32 @f_prefetch_madd_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
entry:
- %0 = load i32* %cp, align 4
+ %0 = load i32, i32* %cp, align 4
%1 = bitcast i32* %e to i8*
tail call void @llvm.prefetch(i8* %1, i32 1, i32 0, i32 1)
%mul = mul nsw i32 %0, %b
@@ -431,7 +431,7 @@ entry:
define i64 @f_prefetch_msub_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
entry:
- %0 = load i64* %cp, align 8
+ %0 = load i64, i64* %cp, align 8
%1 = bitcast i64* %e to i8*
tail call void @llvm.prefetch(i8* %1, i32 0, i32 1, i32 1)
%mul = mul nsw i64 %0, %b
@@ -448,7 +448,7 @@ entry:
define i32 @f_prefetch_msub_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
entry:
- %0 = load i32* %cp, align 4
+ %0 = load i32, i32* %cp, align 4
%1 = bitcast i32* %e to i8*
tail call void @llvm.prefetch(i8* %1, i32 1, i32 1, i32 1)
%mul = mul nsw i32 %0, %b
@@ -464,7 +464,7 @@ entry:
define i64 @f_prefetch_mul_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
entry:
- %0 = load i64* %cp, align 8
+ %0 = load i64, i64* %cp, align 8
%1 = bitcast i64* %e to i8*
tail call void @llvm.prefetch(i8* %1, i32 0, i32 3, i32 1)
%mul = mul nsw i64 %0, %b
@@ -479,7 +479,7 @@ entry:
define i32 @f_prefetch_mul_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
entry:
- %0 = load i32* %cp, align 4
+ %0 = load i32, i32* %cp, align 4
%1 = bitcast i32* %e to i8*
tail call void @llvm.prefetch(i8* %1, i32 1, i32 3, i32 1)
%mul = mul nsw i32 %0, %b
@@ -494,7 +494,7 @@ entry:
define i64 @fall_through(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
entry:
- %0 = load i64* %c, align 8
+ %0 = load i64, i64* %c, align 8
br label %block1
block1:
diff --git a/test/CodeGen/AArch64/aarch64-gep-opt.ll b/test/CodeGen/AArch64/aarch64-gep-opt.ll
index 811eed9d1fbe..93e2ff14ac71 100644
--- a/test/CodeGen/AArch64/aarch64-gep-opt.ll
+++ b/test/CodeGen/AArch64/aarch64-gep-opt.ll
@@ -1,6 +1,6 @@
-; RUN: llc -O3 -verify-machineinstrs %s -o - | FileCheck %s
-; RUN: llc -O3 -print-after=codegenprepare -mcpu=cyclone < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-NoAA <%t %s
-; RUN: llc -O3 -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-UseAA <%t %s
+; RUN: llc -O3 -aarch64-gep-opt=true -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -O3 -aarch64-gep-opt=true -print-after=codegenprepare -mcpu=cyclone < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-NoAA <%t %s
+; RUN: llc -O3 -aarch64-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-UseAA <%t %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-linux-gnueabi"
@@ -14,14 +14,14 @@ target triple = "aarch64-linux-gnueabi"
; Check that when two complex GEPs are used in two basic blocks, LLVM can
; elimilate the common subexpression for the second use.
define void @test_GEP_CSE([240 x %struct]* %string, i32* %adj, i32 %lib, i64 %idxprom) {
- %liberties = getelementptr [240 x %struct]* %string, i64 1, i64 %idxprom, i32 3
- %1 = load i32* %liberties, align 4
+ %liberties = getelementptr [240 x %struct], [240 x %struct]* %string, i64 1, i64 %idxprom, i32 3
+ %1 = load i32, i32* %liberties, align 4
%cmp = icmp eq i32 %1, %lib
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
- %origin = getelementptr [240 x %struct]* %string, i64 1, i64 %idxprom, i32 2
- %2 = load i32* %origin, align 4
+ %origin = getelementptr [240 x %struct], [240 x %struct]* %string, i64 1, i64 %idxprom, i32 2
+ %2 = load i32, i32* %origin, align 4
store i32 %2, i32* %adj, align 4
br label %if.end
@@ -50,11 +50,11 @@ if.end: ; preds = %if.then, %entry
; CHECK-UseAA-LABEL: @test_GEP_CSE(
; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = bitcast [240 x %struct]* %string to i8*
; CHECK-UseAA: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
-; CHECK-UseAA: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8* [[PTR0]], i64 [[IDX]]
-; CHECK-UseAA: getelementptr i8* [[PTR1]], i64 23052
+; CHECK-UseAA: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8, i8* [[PTR0]], i64 [[IDX]]
+; CHECK-UseAA: getelementptr i8, i8* [[PTR1]], i64 23052
; CHECK-UseAA: bitcast
; CHECK-UseAA: if.then:
-; CHECK-UseAA: getelementptr i8* [[PTR1]], i64 23048
+; CHECK-UseAA: getelementptr i8, i8* [[PTR1]], i64 23048
; CHECK-UseAA: bitcast
%class.my = type { i32, [128 x i32], i32, [256 x %struct.pt]}
@@ -65,10 +65,10 @@ if.end: ; preds = %if.then, %entry
; calculation and code gen can generate a better addressing mode for the second
; use.
define void @test_GEP_across_BB(%class.my* %this, i64 %idx) {
- %1 = getelementptr %class.my* %this, i64 0, i32 3, i64 %idx, i32 1
- %2 = load i32* %1, align 4
- %3 = getelementptr %class.my* %this, i64 0, i32 3, i64 %idx, i32 2
- %4 = load i32* %3, align 4
+ %1 = getelementptr %class.my, %class.my* %this, i64 0, i32 3, i64 %idx, i32 1
+ %2 = load i32, i32* %1, align 4
+ %3 = getelementptr %class.my, %class.my* %this, i64 0, i32 3, i64 %idx, i32 2
+ %4 = load i32, i32* %3, align 4
%5 = icmp eq i32 %2, %4
br i1 %5, label %if.true, label %exit
@@ -99,12 +99,12 @@ exit:
; CHECK-UseAA-LABEL: test_GEP_across_BB(
; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = getelementptr
-; CHECK-UseAA: getelementptr i8* [[PTR0]], i64 528
-; CHECK-UseAA: getelementptr i8* [[PTR0]], i64 532
+; CHECK-UseAA: getelementptr i8, i8* [[PTR0]], i64 528
+; CHECK-UseAA: getelementptr i8, i8* [[PTR0]], i64 532
; CHECK-UseAA: if.true:
-; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8* [[PTR0]], i64 532
+; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8, i8* [[PTR0]], i64 532
; CHECK-UseAA: exit:
-; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8* [[PTR0]], i64 528
+; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8, i8* [[PTR0]], i64 528
%struct.S = type { float, double }
@struct_array = global [1024 x %struct.S] zeroinitializer, align 16
@@ -118,7 +118,7 @@ define double* @test-struct_1(i32 %i) {
entry:
%add = add nsw i32 %i, 5
%idxprom = sext i32 %add to i64
- %p = getelementptr [1024 x %struct.S]* @struct_array, i64 0, i64 %idxprom, i32 1
+ %p = getelementptr [1024 x %struct.S], [1024 x %struct.S]* @struct_array, i64 0, i64 %idxprom, i32 1
ret double* %p
}
; CHECK-NoAA-LABEL: @test-struct_1(
@@ -126,7 +126,7 @@ entry:
; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, 88
; CHECK-UseAA-LABEL: @test-struct_1(
-; CHECK-UseAA: getelementptr i8* %{{[a-zA-Z0-9]+}}, i64 88
+; CHECK-UseAA: getelementptr i8, i8* %{{[a-zA-Z0-9]+}}, i64 88
%struct3 = type { i64, i32 }
%struct2 = type { %struct3, i32 }
@@ -140,7 +140,7 @@ entry:
define %struct2* @test-struct_2(%struct0* %ptr, i64 %idx) {
entry:
%arrayidx = add nsw i64 %idx, -2
- %ptr2 = getelementptr %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
+ %ptr2 = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
ret %struct2* %ptr2
}
; CHECK-NoAA-LABEL: @test-struct_2(
@@ -148,14 +148,14 @@ entry:
; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, -40
; CHECK-UseAA-LABEL: @test-struct_2(
-; CHECK-UseAA: getelementptr i8* %{{[a-zA-Z0-9]+}}, i64 -40
+; CHECK-UseAA: getelementptr i8, i8* %{{[a-zA-Z0-9]+}}, i64 -40
; Test that when a index is added from two constant, SeparateConstOffsetFromGEP
; pass does not generate incorrect result.
define void @test_const_add([3 x i32]* %in) {
%inc = add nsw i32 2, 1
%idxprom = sext i32 %inc to i64
- %arrayidx = getelementptr [3 x i32]* %in, i64 %idxprom, i64 2
+ %arrayidx = getelementptr [3 x i32], [3 x i32]* %in, i64 %idxprom, i64 2
store i32 0, i32* %arrayidx, align 4
ret void
}
diff --git a/test/CodeGen/AArch64/aarch64-smull.ll b/test/CodeGen/AArch64/aarch64-smull.ll
index 92582d7d25e9..ec0e2de92d0d 100644
--- a/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/test/CodeGen/AArch64/aarch64-smull.ll
@@ -3,8 +3,8 @@
define <8 x i16> @smull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK-LABEL: smull_v8i8_v8i16:
; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
%tmp5 = mul <8 x i16> %tmp3, %tmp4
@@ -14,8 +14,8 @@ define <8 x i16> @smull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @smull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind {
; CHECK-LABEL: smull_v4i16_v4i32:
; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
%tmp5 = mul <4 x i32> %tmp3, %tmp4
@@ -25,8 +25,8 @@ define <4 x i32> @smull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @smull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind {
; CHECK-LABEL: smull_v2i32_v2i64:
; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
%tmp5 = mul <2 x i64> %tmp3, %tmp4
@@ -36,8 +36,8 @@ define <2 x i64> @smull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @umull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK-LABEL: umull_v8i8_v8i16:
; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
%tmp5 = mul <8 x i16> %tmp3, %tmp4
@@ -47,8 +47,8 @@ define <8 x i16> @umull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @umull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind {
; CHECK-LABEL: umull_v4i16_v4i32:
; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
%tmp5 = mul <4 x i32> %tmp3, %tmp4
@@ -58,8 +58,8 @@ define <4 x i32> @umull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @umull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind {
; CHECK-LABEL: umull_v2i32_v2i64:
; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
%tmp5 = mul <2 x i64> %tmp3, %tmp4
@@ -69,9 +69,9 @@ define <2 x i64> @umull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @smlal_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
; CHECK-LABEL: smlal_v8i8_v8i16:
; CHECK: smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i8>, <8 x i8>* %C
%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
%tmp5 = sext <8 x i8> %tmp3 to <8 x i16>
%tmp6 = mul <8 x i16> %tmp4, %tmp5
@@ -82,9 +82,9 @@ define <8 x i16> @smlal_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) no
define <4 x i32> @smlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
; CHECK-LABEL: smlal_v4i16_v4i32:
; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i16>, <4 x i16>* %C
%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
%tmp5 = sext <4 x i16> %tmp3 to <4 x i32>
%tmp6 = mul <4 x i32> %tmp4, %tmp5
@@ -95,9 +95,9 @@ define <4 x i32> @smlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C)
define <2 x i64> @smlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
; CHECK-LABEL: smlal_v2i32_v2i64:
; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i32>, <2 x i32>* %C
%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
%tmp5 = sext <2 x i32> %tmp3 to <2 x i64>
%tmp6 = mul <2 x i64> %tmp4, %tmp5
@@ -108,9 +108,9 @@ define <2 x i64> @smlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C)
define <8 x i16> @umlal_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
; CHECK-LABEL: umlal_v8i8_v8i16:
; CHECK: umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i8>, <8 x i8>* %C
%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
%tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
%tmp6 = mul <8 x i16> %tmp4, %tmp5
@@ -121,9 +121,9 @@ define <8 x i16> @umlal_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) no
define <4 x i32> @umlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
; CHECK-LABEL: umlal_v4i16_v4i32:
; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i16>, <4 x i16>* %C
%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
%tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
%tmp6 = mul <4 x i32> %tmp4, %tmp5
@@ -134,9 +134,9 @@ define <4 x i32> @umlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C)
define <2 x i64> @umlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
; CHECK-LABEL: umlal_v2i32_v2i64:
; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i32>, <2 x i32>* %C
%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
%tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
%tmp6 = mul <2 x i64> %tmp4, %tmp5
@@ -147,9 +147,9 @@ define <2 x i64> @umlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C)
define <8 x i16> @smlsl_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
; CHECK-LABEL: smlsl_v8i8_v8i16:
; CHECK: smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i8>, <8 x i8>* %C
%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
%tmp5 = sext <8 x i8> %tmp3 to <8 x i16>
%tmp6 = mul <8 x i16> %tmp4, %tmp5
@@ -160,9 +160,9 @@ define <8 x i16> @smlsl_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) no
define <4 x i32> @smlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
; CHECK-LABEL: smlsl_v4i16_v4i32:
; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i16>, <4 x i16>* %C
%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
%tmp5 = sext <4 x i16> %tmp3 to <4 x i32>
%tmp6 = mul <4 x i32> %tmp4, %tmp5
@@ -173,9 +173,9 @@ define <4 x i32> @smlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C)
define <2 x i64> @smlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
; CHECK-LABEL: smlsl_v2i32_v2i64:
; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i32>, <2 x i32>* %C
%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
%tmp5 = sext <2 x i32> %tmp3 to <2 x i64>
%tmp6 = mul <2 x i64> %tmp4, %tmp5
@@ -186,9 +186,9 @@ define <2 x i64> @smlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C)
define <8 x i16> @umlsl_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
; CHECK-LABEL: umlsl_v8i8_v8i16:
; CHECK: umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i8>, <8 x i8>* %C
%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
%tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
%tmp6 = mul <8 x i16> %tmp4, %tmp5
@@ -199,9 +199,9 @@ define <8 x i16> @umlsl_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) no
define <4 x i32> @umlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
; CHECK-LABEL: umlsl_v4i16_v4i32:
; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i16>, <4 x i16>* %C
%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
%tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
%tmp6 = mul <4 x i32> %tmp4, %tmp5
@@ -212,9 +212,9 @@ define <4 x i32> @umlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C)
define <2 x i64> @umlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
; CHECK-LABEL: umlsl_v2i32_v2i64:
; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i32>, <2 x i32>* %C
%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
%tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
%tmp6 = mul <2 x i64> %tmp4, %tmp5
diff --git a/test/CodeGen/AArch64/addsub-shifted.ll b/test/CodeGen/AArch64/addsub-shifted.ll
index 0a93edd8290a..7c7d65459938 100644
--- a/test/CodeGen/AArch64/addsub-shifted.ll
+++ b/test/CodeGen/AArch64/addsub-shifted.ll
@@ -6,63 +6,63 @@
define void @test_lsl_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
; CHECK-LABEL: test_lsl_arith:
- %rhs1 = load volatile i32* @var32
+ %rhs1 = load volatile i32, i32* @var32
%shift1 = shl i32 %rhs1, 18
%val1 = add i32 %lhs32, %shift1
store volatile i32 %val1, i32* @var32
; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #18
- %rhs2 = load volatile i32* @var32
+ %rhs2 = load volatile i32, i32* @var32
%shift2 = shl i32 %rhs2, 31
%val2 = add i32 %shift2, %lhs32
store volatile i32 %val2, i32* @var32
; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
- %rhs3 = load volatile i32* @var32
+ %rhs3 = load volatile i32, i32* @var32
%shift3 = shl i32 %rhs3, 5
%val3 = sub i32 %lhs32, %shift3
store volatile i32 %val3, i32* @var32
; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #5
; Subtraction is not commutative!
- %rhs4 = load volatile i32* @var32
+ %rhs4 = load volatile i32, i32* @var32
%shift4 = shl i32 %rhs4, 19
%val4 = sub i32 %shift4, %lhs32
store volatile i32 %val4, i32* @var32
; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #19
- %lhs4a = load volatile i32* @var32
+ %lhs4a = load volatile i32, i32* @var32
%shift4a = shl i32 %lhs4a, 15
%val4a = sub i32 0, %shift4a
store volatile i32 %val4a, i32* @var32
; CHECK: neg {{w[0-9]+}}, {{w[0-9]+}}, lsl #15
- %rhs5 = load volatile i64* @var64
+ %rhs5 = load volatile i64, i64* @var64
%shift5 = shl i64 %rhs5, 18
%val5 = add i64 %lhs64, %shift5
store volatile i64 %val5, i64* @var64
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #18
- %rhs6 = load volatile i64* @var64
+ %rhs6 = load volatile i64, i64* @var64
%shift6 = shl i64 %rhs6, 31
%val6 = add i64 %shift6, %lhs64
store volatile i64 %val6, i64* @var64
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #31
- %rhs7 = load volatile i64* @var64
+ %rhs7 = load volatile i64, i64* @var64
%shift7 = shl i64 %rhs7, 5
%val7 = sub i64 %lhs64, %shift7
store volatile i64 %val7, i64* @var64
; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #5
; Subtraction is not commutative!
- %rhs8 = load volatile i64* @var64
+ %rhs8 = load volatile i64, i64* @var64
%shift8 = shl i64 %rhs8, 19
%val8 = sub i64 %shift8, %lhs64
store volatile i64 %val8, i64* @var64
; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #19
- %lhs8a = load volatile i64* @var64
+ %lhs8a = load volatile i64, i64* @var64
%shift8a = shl i64 %lhs8a, 60
%val8a = sub i64 0, %shift8a
store volatile i64 %val8a, i64* @var64
@@ -190,7 +190,7 @@ define void @test_asr_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
; CHECK: ret
}
-define i32 @test_cmp(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
+define void @test_cmp(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64, i32 %v) {
; CHECK-LABEL: test_cmp:
%shift1 = shl i32 %rhs32, 13
@@ -199,40 +199,46 @@ define i32 @test_cmp(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, lsl #13
t2:
+ store volatile i32 %v, i32* @var32
%shift2 = lshr i32 %rhs32, 20
%tst2 = icmp ne i32 %lhs32, %shift2
br i1 %tst2, label %t3, label %end
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, lsr #20
t3:
+ store volatile i32 %v, i32* @var32
%shift3 = ashr i32 %rhs32, 9
%tst3 = icmp ne i32 %lhs32, %shift3
br i1 %tst3, label %t4, label %end
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}, asr #9
t4:
+ store volatile i32 %v, i32* @var32
%shift4 = shl i64 %rhs64, 43
%tst4 = icmp uge i64 %lhs64, %shift4
br i1 %tst4, label %t5, label %end
; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, lsl #43
t5:
+ store volatile i32 %v, i32* @var32
%shift5 = lshr i64 %rhs64, 20
%tst5 = icmp ne i64 %lhs64, %shift5
br i1 %tst5, label %t6, label %end
; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, lsr #20
t6:
+ store volatile i32 %v, i32* @var32
%shift6 = ashr i64 %rhs64, 59
%tst6 = icmp ne i64 %lhs64, %shift6
br i1 %tst6, label %t7, label %end
; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}, asr #59
t7:
- ret i32 1
-end:
+ store volatile i32 %v, i32* @var32
+ br label %end
- ret i32 0
+end:
+ ret void
; CHECK: ret
}
diff --git a/test/CodeGen/AArch64/addsub.ll b/test/CodeGen/AArch64/addsub.ll
index b85fdbb14ce2..d6350a6db0ee 100644
--- a/test/CodeGen/AArch64/addsub.ll
+++ b/test/CodeGen/AArch64/addsub.ll
@@ -12,29 +12,57 @@ define void @add_small() {
; CHECK-LABEL: add_small:
; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #4095
- %val32 = load i32* @var_i32
+ %val32 = load i32, i32* @var_i32
%newval32 = add i32 %val32, 4095
store i32 %newval32, i32* @var_i32
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #52
- %val64 = load i64* @var_i64
+ %val64 = load i64, i64* @var_i64
%newval64 = add i64 %val64, 52
store i64 %newval64, i64* @var_i64
ret void
}
+; Make sure we grab the imm variant when the register operand
+; can be implicitly zero-extend.
+; We used to generate something horrible like this:
+; wA = ldrb
+; xB = ldimm 12
+; xC = add xB, wA, uxtb
+; whereas this can be achieved with:
+; wA = ldrb
+; xC = add xA, #12 ; <- xA implicitly zero extend wA.
+define void @add_small_imm(i8* %p, i64* %q, i32 %b, i32* %addr) {
+; CHECK-LABEL: add_small_imm:
+entry:
+
+; CHECK: ldrb w[[LOAD32:[0-9]+]], [x0]
+ %t = load i8, i8* %p
+ %promoted = zext i8 %t to i64
+ %zextt = zext i8 %t to i32
+ %add = add nuw i32 %zextt, %b
+
+; CHECK: add [[ADD2:x[0-9]+]], x[[LOAD32]], #12
+ %add2 = add nuw i64 %promoted, 12
+ store i32 %add, i32* %addr
+
+; CHECK: str [[ADD2]], [x1]
+ store i64 %add2, i64* %q
+ ret void
+}
+
; Add 12-bit immediates, shifted left by 12 bits
define void @add_med() {
; CHECK-LABEL: add_med:
; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{#3567, lsl #12|#14610432}}
- %val32 = load i32* @var_i32
+ %val32 = load i32, i32* @var_i32
%newval32 = add i32 %val32, 14610432 ; =0xdef000
store i32 %newval32, i32* @var_i32
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{#4095, lsl #12|#16773120}}
- %val64 = load i64* @var_i64
+ %val64 = load i64, i64* @var_i64
%newval64 = add i64 %val64, 16773120 ; =0xfff000
store i64 %newval64, i64* @var_i64
@@ -46,12 +74,12 @@ define void @sub_small() {
; CHECK-LABEL: sub_small:
; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, #4095
- %val32 = load i32* @var_i32
+ %val32 = load i32, i32* @var_i32
%newval32 = sub i32 %val32, 4095
store i32 %newval32, i32* @var_i32
; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, #52
- %val64 = load i64* @var_i64
+ %val64 = load i64, i64* @var_i64
%newval64 = sub i64 %val64, 52
store i64 %newval64, i64* @var_i64
@@ -63,12 +91,12 @@ define void @sub_med() {
; CHECK-LABEL: sub_med:
; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{#3567, lsl #12|#14610432}}
- %val32 = load i32* @var_i32
+ %val32 = load i32, i32* @var_i32
%newval32 = sub i32 %val32, 14610432 ; =0xdef000
store i32 %newval32, i32* @var_i32
; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{#4095, lsl #12|#16773120}}
- %val64 = load i64* @var_i64
+ %val64 = load i64, i64* @var_i64
%newval64 = sub i64 %val64, 16773120 ; =0xfff000
store i64 %newval64, i64* @var_i64
@@ -77,7 +105,7 @@ define void @sub_med() {
define void @testing() {
; CHECK-LABEL: testing:
- %val = load i32* @var_i32
+ %val = load i32, i32* @var_i32
; CHECK: cmp {{w[0-9]+}}, #4095
; CHECK: b.ne [[RET:.?LBB[0-9]+_[0-9]+]]
diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll
index ceea8a08ecee..f0c7572ebf13 100644
--- a/test/CodeGen/AArch64/addsub_ext.ll
+++ b/test/CodeGen/AArch64/addsub_ext.ll
@@ -7,9 +7,9 @@
define void @addsub_i8rhs() minsize {
; CHECK-LABEL: addsub_i8rhs:
- %val8_tmp = load i8* @var8
- %lhs32 = load i32* @var32
- %lhs64 = load i64* @var64
+ %val8_tmp = load i8, i8* @var8
+ %lhs32 = load i32, i32* @var32
+ %lhs64 = load i64, i64* @var64
; Need this to prevent extension upon load and give a vanilla i8 operand.
%val8 = add i8 %val8_tmp, 123
@@ -82,9 +82,9 @@ end:
define void @addsub_i16rhs() minsize {
; CHECK-LABEL: addsub_i16rhs:
- %val16_tmp = load i16* @var16
- %lhs32 = load i32* @var32
- %lhs64 = load i64* @var64
+ %val16_tmp = load i16, i16* @var16
+ %lhs32 = load i32, i32* @var32
+ %lhs64 = load i64, i64* @var64
; Need this to prevent extension upon load and give a vanilla i16 operand.
%val16 = add i16 %val16_tmp, 123
@@ -160,8 +160,8 @@ end:
; in the face of "add/sub (shifted register)" so I don't intend to.
define void @addsub_i32rhs() minsize {
; CHECK-LABEL: addsub_i32rhs:
- %val32_tmp = load i32* @var32
- %lhs64 = load i64* @var64
+ %val32_tmp = load i32, i32* @var32
+ %lhs64 = load i64, i64* @var64
%val32 = add i32 %val32_tmp, 123
diff --git a/test/CodeGen/AArch64/alloca.ll b/test/CodeGen/AArch64/alloca.ll
index f93efbc42e65..5b2278ce8a35 100644
--- a/test/CodeGen/AArch64/alloca.ll
+++ b/test/CodeGen/AArch64/alloca.ll
@@ -51,7 +51,7 @@ define i64 @test_alloca_with_local(i64 %n) {
call void @use_addr_loc(i8* %buf, i64* %loc)
; CHECK: bl use_addr
- %val = load i64* %loc
+ %val = load i64, i64* %loc
; CHECK: ldur x0, [x29, #-[[LOC_FROM_FP]]]
diff --git a/test/CodeGen/AArch64/analyzecmp.ll b/test/CodeGen/AArch64/analyzecmp.ll
index 8962505cbd15..0b3bcd887b5b 100644
--- a/test/CodeGen/AArch64/analyzecmp.ll
+++ b/test/CodeGen/AArch64/analyzecmp.ll
@@ -1,9 +1,9 @@
; RUN: llc -O3 -mcpu=cortex-a57 < %s | FileCheck %s
-; CHECK-LABLE: @test
-; CHECK: tst [[CMP:x[0-9]+]], #0x8000000000000000
-; CHECK: csel [[R0:x[0-9]+]], [[S0:x[0-9]+]], [[S1:x[0-9]+]], eq
-; CHECK: csel [[R1:x[0-9]+]], [[S2:x[0-9]+]], [[S3:x[0-9]+]], eq
+; CHECK-LABEL: @test
+; CHECK: and
+; CHECK: csel
+; CHECK: csel
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "arm64--linux-gnueabi"
diff --git a/test/CodeGen/AArch64/and-mask-removal.ll b/test/CodeGen/AArch64/and-mask-removal.ll
index f803b85f733b..8291516d81ea 100644
--- a/test/CodeGen/AArch64/and-mask-removal.ll
+++ b/test/CodeGen/AArch64/and-mask-removal.ll
@@ -8,15 +8,15 @@
define void @new_position(i32 %pos) {
entry:
%idxprom = sext i32 %pos to i64
- %arrayidx = getelementptr inbounds [400 x i8]* @board, i64 0, i64 %idxprom
- %tmp = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds [400 x i8], [400 x i8]* @board, i64 0, i64 %idxprom
+ %tmp = load i8, i8* %arrayidx, align 1
%.off = add i8 %tmp, -1
%switch = icmp ult i8 %.off, 2
br i1 %switch, label %if.then, label %if.end
if.then: ; preds = %entry
- %tmp1 = load i32* @next_string, align 4
- %arrayidx8 = getelementptr inbounds [400 x i32]* @string_number, i64 0, i64 %idxprom
+ %tmp1 = load i32, i32* @next_string, align 4
+ %arrayidx8 = getelementptr inbounds [400 x i32], [400 x i32]* @string_number, i64 0, i64 %idxprom
store i32 %tmp1, i32* %arrayidx8, align 4
br label %if.end
diff --git a/test/CodeGen/AArch64/argument-blocks.ll b/test/CodeGen/AArch64/argument-blocks.ll
index f1dcfa67d0eb..3169abc2dcb3 100644
--- a/test/CodeGen/AArch64/argument-blocks.ll
+++ b/test/CodeGen/AArch64/argument-blocks.ll
@@ -64,7 +64,7 @@ define void @test_varargs_stackalign() {
; CHECK-LABEL: test_varargs_stackalign:
; CHECK-DARWINPCS: stp {{w[0-9]+}}, {{w[0-9]+}}, [sp, #16]
- call void(...)* @callee([3 x float] undef, [2 x float] [float 1.0, float 2.0])
+ call void(...) @callee([3 x float] undef, [2 x float] [float 1.0, float 2.0])
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
index 8b88c0b40887..173a440326ac 100644
--- a/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
+++ b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
@@ -11,7 +11,7 @@ if.then24: ; preds = %entry
unreachable
if.else295: ; preds = %entry
- call void @llvm.dbg.declare(metadata i32* %do_tab_convert, metadata !16, metadata !{!"0x102"}), !dbg !18
+ call void @llvm.dbg.declare(metadata i32* %do_tab_convert, metadata !16, metadata !DIExpression()), !dbg !18
store i32 0, i32* %do_tab_convert, align 4, !dbg !19
unreachable
}
@@ -21,25 +21,25 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.gv = !{!0}
!llvm.dbg.sp = !{!1, !7, !10, !11, !12}
-!0 = !{!"0x34\00vsplive\00vsplive\00\00617\001\001", !1, !2, !6, null, null} ; [ DW_TAG_variable ]
-!1 = !{!"0x2e\00drt_vsprintf\00drt_vsprintf\00\00616\000\001\000\006\00256\000\000", !20, !2, !4, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!2 = !{!"0x29", !20} ; [ DW_TAG_file_type ]
-!3 = !{!"0x11\0012\00clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)\001\00\000\00\000", !20, !21, !21, null, null, null} ; [ DW_TAG_compile_unit ]
-!4 = !{!"0x15\00\000\000\000\000\000\000", !20, !2, null, !5, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = !DIGlobalVariable(name: "vsplive", line: 617, isLocal: true, isDefinition: true, scope: !1, file: !2, type: !6)
+!1 = !DISubprogram(name: "drt_vsprintf", line: 616, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4)
+!2 = !DIFile(filename: "print.i", directory: "/Volumes/Ebi/echeng/radars/r9146594")
+!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21)
+!4 = !DISubroutineType(types: !5)
!5 = !{!6}
-!6 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !3} ; [ DW_TAG_base_type ]
-!7 = !{!"0x2e\00putc_mem\00putc_mem\00\0030\001\001\000\006\00256\000\000", !20, !2, !8, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!8 = !{!"0x15\00\000\000\000\000\000\000", !20, !2, null, !9, i32 0} ; [ DW_TAG_subroutine_type ]
+!6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!7 = !DISubprogram(name: "putc_mem", line: 30, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !8)
+!8 = !DISubroutineType(types: !9)
!9 = !{null}
-!10 = !{!"0x2e\00print_double\00print_double\00\00203\001\001\000\006\00256\000\000", !20, !2, !4, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!11 = !{!"0x2e\00print_number\00print_number\00\0075\001\001\000\006\00256\000\000", !20, !2, !4, i32 0, null, null, null, null} ; [ DW_TAG_subprogram ]
-!12 = !{!"0x2e\00get_flags\00get_flags\00\00508\001\001\000\006\00256\000\000", !20, !2, !8, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!13 = !MDLocation(line: 653, column: 5, scope: !14)
-!14 = !{!"0xb\00652\0035\002", !20, !15} ; [ DW_TAG_lexical_block ]
-!15 = !{!"0xb\00616\001\000", !20, !1} ; [ DW_TAG_lexical_block ]
-!16 = !{!"0x100\00do_tab_convert\00853\000", !17, !2, !6} ; [ DW_TAG_auto_variable ]
-!17 = !{!"0xb\00850\0012\0033", !20, !14} ; [ DW_TAG_lexical_block ]
-!18 = !MDLocation(line: 853, column: 11, scope: !17)
-!19 = !MDLocation(line: 853, column: 29, scope: !17)
-!20 = !{!"print.i", !"/Volumes/Ebi/echeng/radars/r9146594"}
+!10 = !DISubprogram(name: "print_double", line: 203, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4)
+!11 = !DISubprogram(name: "print_number", line: 75, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4)
+!12 = !DISubprogram(name: "get_flags", line: 508, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !8)
+!13 = !DILocation(line: 653, column: 5, scope: !14)
+!14 = distinct !DILexicalBlock(line: 652, column: 35, file: !20, scope: !15)
+!15 = distinct !DILexicalBlock(line: 616, column: 1, file: !20, scope: !1)
+!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "do_tab_convert", line: 853, scope: !17, file: !2, type: !6)
+!17 = distinct !DILexicalBlock(line: 850, column: 12, file: !20, scope: !14)
+!18 = !DILocation(line: 853, column: 11, scope: !17)
+!19 = !DILocation(line: 853, column: 29, scope: !17)
+!20 = !DIFile(filename: "print.i", directory: "/Volumes/Ebi/echeng/radars/r9146594")
!21 = !{i32 0}
diff --git a/test/CodeGen/AArch64/arm64-2011-03-21-Unaligned-Frame-Index.ll b/test/CodeGen/AArch64/arm64-2011-03-21-Unaligned-Frame-Index.ll
index 6f0ec34fc1dd..491433ce71f7 100644
--- a/test/CodeGen/AArch64/arm64-2011-03-21-Unaligned-Frame-Index.ll
+++ b/test/CodeGen/AArch64/arm64-2011-03-21-Unaligned-Frame-Index.ll
@@ -5,7 +5,7 @@ define void @foo(i64 %val) {
; instruction that can handle that.
; CHECK: stur x0, [sp, #20]
%a = alloca [49 x i32], align 4
- %p32 = getelementptr inbounds [49 x i32]* %a, i64 0, i64 2
+ %p32 = getelementptr inbounds [49 x i32], [49 x i32]* %a, i64 0, i64 2
%p = bitcast i32* %p32 to i64*
store i64 %val, i64* %p, align 8
ret void
diff --git a/test/CodeGen/AArch64/arm64-2011-04-21-CPSRBug.ll b/test/CodeGen/AArch64/arm64-2011-04-21-CPSRBug.ll
index 88232fcc0b4d..e2c39e0b6232 100644
--- a/test/CodeGen/AArch64/arm64-2011-04-21-CPSRBug.ll
+++ b/test/CodeGen/AArch64/arm64-2011-04-21-CPSRBug.ll
@@ -8,7 +8,7 @@ entry:
%cmp = icmp eq i32* null, undef
%frombool = zext i1 %cmp to i8
store i8 %frombool, i8* undef, align 1
- %tmp4 = load i8* undef, align 1
+ %tmp4 = load i8, i8* undef, align 1
%tobool = trunc i8 %tmp4 to i1
br i1 %tobool, label %land.lhs.true, label %if.end
diff --git a/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll b/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
index a83f1646ef62..b69cd2421166 100644
--- a/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
+++ b/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
@@ -19,7 +19,7 @@ for.body:
%0 = shl nsw i64 %indvars.iv, 12
%add = add nsw i64 %0, 34628173824
%1 = inttoptr i64 %add to i32*
- %2 = load volatile i32* %1, align 4096
+ %2 = load volatile i32, i32* %1, align 4096
store volatile i32 %2, i32* @test_data, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/test/CodeGen/AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll b/test/CodeGen/AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll
index d47dbb28164c..8d0b1b6f84cc 100644
--- a/test/CodeGen/AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll
+++ b/test/CodeGen/AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll
@@ -13,7 +13,7 @@ lor.lhs.false:
br i1 undef, label %return, label %if.end
if.end:
- %tmp.i = load i64* undef, align 8
+ %tmp.i = load i64, i64* undef, align 8
%and.i.i.i = and i64 %tmp.i, -16
br i1 %IsArrow, label %if.else_crit_edge, label %if.end32
diff --git a/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll b/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll
index b5b1b70975de..ef8d6f3b4ef9 100644
--- a/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll
+++ b/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll
@@ -15,23 +15,23 @@ define hidden %struct.CGRect @t(%0* nocapture %self, i8* nocapture %_cmd) nounwi
entry:
; CHECK-LABEL: t:
; CHECK: ldp d{{[0-9]+}}, d{{[0-9]+}}
- %ivar = load i64* @"OBJC_IVAR_$_UIScreen._bounds", align 8, !invariant.load !4
+ %ivar = load i64, i64* @"OBJC_IVAR_$_UIScreen._bounds", align 8, !invariant.load !4
%0 = bitcast %0* %self to i8*
- %add.ptr = getelementptr inbounds i8* %0, i64 %ivar
+ %add.ptr = getelementptr inbounds i8, i8* %0, i64 %ivar
%add.ptr10.0 = bitcast i8* %add.ptr to double*
- %tmp11 = load double* %add.ptr10.0, align 8
+ %tmp11 = load double, double* %add.ptr10.0, align 8
%add.ptr.sum = add i64 %ivar, 8
- %add.ptr10.1 = getelementptr inbounds i8* %0, i64 %add.ptr.sum
+ %add.ptr10.1 = getelementptr inbounds i8, i8* %0, i64 %add.ptr.sum
%1 = bitcast i8* %add.ptr10.1 to double*
- %tmp12 = load double* %1, align 8
+ %tmp12 = load double, double* %1, align 8
%add.ptr.sum17 = add i64 %ivar, 16
- %add.ptr4.1 = getelementptr inbounds i8* %0, i64 %add.ptr.sum17
+ %add.ptr4.1 = getelementptr inbounds i8, i8* %0, i64 %add.ptr.sum17
%add.ptr4.1.0 = bitcast i8* %add.ptr4.1 to double*
- %tmp = load double* %add.ptr4.1.0, align 8
+ %tmp = load double, double* %add.ptr4.1.0, align 8
%add.ptr4.1.sum = add i64 %ivar, 24
- %add.ptr4.1.1 = getelementptr inbounds i8* %0, i64 %add.ptr4.1.sum
+ %add.ptr4.1.1 = getelementptr inbounds i8, i8* %0, i64 %add.ptr4.1.sum
%2 = bitcast i8* %add.ptr4.1.1 to double*
- %tmp5 = load double* %2, align 8
+ %tmp5 = load double, double* %2, align 8
%insert14 = insertvalue %struct.CGPoint undef, double %tmp11, 0
%insert16 = insertvalue %struct.CGPoint %insert14, double %tmp12, 1
%insert = insertvalue %struct.CGRect undef, %struct.CGPoint %insert16, 0
diff --git a/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll b/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
index 4db1f59a2c6c..b760261f7881 100644
--- a/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
+++ b/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
@@ -13,14 +13,14 @@ define void @testDouble(double %d) ssp {
entry:
%d.addr = alloca double, align 8
store double %d, double* %d.addr, align 8
- %0 = load double* %d.addr, align 8
- %1 = load double* %d.addr, align 8
+ %0 = load double, double* %d.addr, align 8
+ %1 = load double, double* %d.addr, align 8
%conv = fptoui double %1 to i64
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), double %0, i64 %conv)
- %2 = load double* %d.addr, align 8
- %3 = load double* %d.addr, align 8
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), double %0, i64 %conv)
+ %2 = load double, double* %d.addr, align 8
+ %3 = load double, double* %d.addr, align 8
%conv1 = fptoui double %3 to i32
- %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str1, i32 0, i32 0), double %2, i32 %conv1)
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str1, i32 0, i32 0), double %2, i32 %conv1)
ret void
}
@@ -33,16 +33,16 @@ define void @testFloat(float %f) ssp {
entry:
%f.addr = alloca float, align 4
store float %f, float* %f.addr, align 4
- %0 = load float* %f.addr, align 4
+ %0 = load float, float* %f.addr, align 4
%conv = fpext float %0 to double
- %1 = load float* %f.addr, align 4
+ %1 = load float, float* %f.addr, align 4
%conv1 = fptoui float %1 to i64
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str2, i32 0, i32 0), double %conv, i64 %conv1)
- %2 = load float* %f.addr, align 4
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str2, i32 0, i32 0), double %conv, i64 %conv1)
+ %2 = load float, float* %f.addr, align 4
%conv2 = fpext float %2 to double
- %3 = load float* %f.addr, align 4
+ %3 = load float, float* %f.addr, align 4
%conv3 = fptoui float %3 to i32
- %call4 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str3, i32 0, i32 0), double %conv2, i32 %conv3)
+ %call4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str3, i32 0, i32 0), double %conv2, i32 %conv3)
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-2012-07-11-InstrEmitterBug.ll b/test/CodeGen/AArch64/arm64-2012-07-11-InstrEmitterBug.ll
index 55ecfb5d2bd6..997431bda560 100644
--- a/test/CodeGen/AArch64/arm64-2012-07-11-InstrEmitterBug.ll
+++ b/test/CodeGen/AArch64/arm64-2012-07-11-InstrEmitterBug.ll
@@ -44,7 +44,7 @@ cond.true43: ; preds = %for.body14
unreachable
cond.false45: ; preds = %for.body14
- %add.ptr = getelementptr inbounds i8* %path, i64 %conv30
+ %add.ptr = getelementptr inbounds i8, i8* %path, i64 %conv30
unreachable
if.end56: ; preds = %for.cond10, %entry
diff --git a/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll b/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
index 6266d1cc9b33..8784abdadfab 100644
--- a/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
+++ b/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
@@ -7,13 +7,13 @@ define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
; CHECK-LABEL: bar:
; CHECK: add.2d v[[REG:[0-9]+]], v0, v1
; CHECK: add d[[REG3:[0-9]+]], d[[REG]], d1
+; CHECK: sub d[[REG2:[0-9]+]], d[[REG]], d1
; Without advanced copy optimization, we end up with cross register
; banks copies that cannot be coalesced.
; CHECK-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
; With advanced copy optimization, we end up with just one copy
; to insert the computed high part into the V register.
; CHECK-OPT-NOT: fmov
-; CHECK: sub d[[REG2:[0-9]+]], d[[REG]], d1
; CHECK: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
; CHECK-NOOPT: fmov d0, [[COPY_REG3]]
; CHECK-OPT-NOT: fmov
@@ -23,9 +23,9 @@ define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
; GENERIC-LABEL: bar:
; GENERIC: add v[[REG:[0-9]+]].2d, v0.2d, v1.2d
; GENERIC: add d[[REG3:[0-9]+]], d[[REG]], d1
+; GENERIC: sub d[[REG2:[0-9]+]], d[[REG]], d1
; GENERIC-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
; GENERIC-OPT-NOT: fmov
-; GENERIC: sub d[[REG2:[0-9]+]], d[[REG]], d1
; GENERIC: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
; GENERIC-NOOPT: fmov d0, [[COPY_REG3]]
; GENERIC-OPT-NOT: fmov
diff --git a/test/CodeGen/AArch64/arm64-aapcs.ll b/test/CodeGen/AArch64/arm64-aapcs.ll
index 41c3ad5766c3..d0880cd4f3eb 100644
--- a/test/CodeGen/AArch64/arm64-aapcs.ll
+++ b/test/CodeGen/AArch64/arm64-aapcs.ll
@@ -2,6 +2,7 @@
@var = global i32 0, align 4
+; CHECK-LABEL: @test_i128_align
define i128 @test_i128_align(i32, i128 %arg, i32 %after) {
store i32 %after, i32* @var, align 4
; CHECK: str w4, [{{x[0-9]+}}, :lo12:var]
@@ -11,6 +12,16 @@ define i128 @test_i128_align(i32, i128 %arg, i32 %after) {
; CHECK: mov x1, x3
}
+; CHECK-LABEL: @test_i64x2_align
+define [2 x i64] @test_i64x2_align(i32, [2 x i64] %arg, i32 %after) {
+ store i32 %after, i32* @var, align 4
+; CHECK: str w3, [{{x[0-9]+}}, :lo12:var]
+
+ ret [2 x i64] %arg
+; CHECK: mov x0, x1
+; CHECK: mov x1, x2
+}
+
@var64 = global i64 0, align 8
; Check stack slots are 64-bit at all times.
@@ -78,7 +89,7 @@ declare void @variadic(i32 %a, ...)
; Under AAPCS variadic functions have the same calling convention as
; others. The extra arguments should go in registers rather than on the stack.
define void @test_variadic() {
- call void(i32, ...)* @variadic(i32 0, i64 1, double 2.0)
+ call void(i32, ...) @variadic(i32 0, i64 1, double 2.0)
; CHECK: fmov d0, #2.0
; CHECK: orr w1, wzr, #0x1
; CHECK: bl variadic
diff --git a/test/CodeGen/AArch64/arm64-abi-varargs.ll b/test/CodeGen/AArch64/arm64-abi-varargs.ll
index 92db392cd041..03414b56144c 100644
--- a/test/CodeGen/AArch64/arm64-abi-varargs.ll
+++ b/test/CodeGen/AArch64/arm64-abi-varargs.ll
@@ -82,19 +82,19 @@ define i32 @main() nounwind ssp {
store i32 10, i32* %a10, align 4
store i32 11, i32* %a11, align 4
store i32 12, i32* %a12, align 4
- %1 = load i32* %a1, align 4
- %2 = load i32* %a2, align 4
- %3 = load i32* %a3, align 4
- %4 = load i32* %a4, align 4
- %5 = load i32* %a5, align 4
- %6 = load i32* %a6, align 4
- %7 = load i32* %a7, align 4
- %8 = load i32* %a8, align 4
- %9 = load i32* %a9, align 4
- %10 = load i32* %a10, align 4
- %11 = load i32* %a11, align 4
- %12 = load i32* %a12, align 4
- call void (i32, i32, i32, i32, i32, i32, i32, i32, i32, ...)* @fn9(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12)
+ %1 = load i32, i32* %a1, align 4
+ %2 = load i32, i32* %a2, align 4
+ %3 = load i32, i32* %a3, align 4
+ %4 = load i32, i32* %a4, align 4
+ %5 = load i32, i32* %a5, align 4
+ %6 = load i32, i32* %a6, align 4
+ %7 = load i32, i32* %a7, align 4
+ %8 = load i32, i32* %a8, align 4
+ %9 = load i32, i32* %a9, align 4
+ %10 = load i32, i32* %a10, align 4
+ %11 = load i32, i32* %a11, align 4
+ %12 = load i32, i32* %a12, align 4
+ call void (i32, i32, i32, i32, i32, i32, i32, i32, i32, ...) @fn9(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12)
ret i32 0
}
@@ -131,9 +131,9 @@ entry:
%y.addr = alloca <4 x i32>, align 16
store i32 %x, i32* %x.addr, align 4
store <4 x i32> %y, <4 x i32>* %y.addr, align 16
- %0 = load i32* %x.addr, align 4
- %1 = load <4 x i32>* %y.addr, align 16
- call void (i8*, ...)* @foo(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %0, <4 x i32> %1)
+ %0 = load i32, i32* %x.addr, align 4
+ %1 = load <4 x i32>, <4 x i32>* %y.addr, align 16
+ call void (i8*, ...) @foo(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %0, <4 x i32> %1)
ret void
}
@@ -158,12 +158,12 @@ entry:
call void @llvm.va_start(i8* %args1)
%0 = va_arg i8** %args, i32
store i32 %0, i32* %vc, align 4
- %ap.cur = load i8** %args
- %1 = getelementptr i8* %ap.cur, i32 15
+ %ap.cur = load i8*, i8** %args
+ %1 = getelementptr i8, i8* %ap.cur, i32 15
%2 = ptrtoint i8* %1 to i64
%3 = and i64 %2, -16
%ap.align = inttoptr i64 %3 to i8*
- %ap.next = getelementptr i8* %ap.align, i32 16
+ %ap.next = getelementptr i8, i8* %ap.align, i32 16
store i8* %ap.next, i8** %args
%4 = bitcast i8* %ap.align to %struct.s41*
%5 = bitcast %struct.s41* %vs to i8*
@@ -183,9 +183,9 @@ entry:
store i32 %x, i32* %x.addr, align 4
%0 = bitcast %struct.s41* %s41 to i128*
store i128 %s41.coerce, i128* %0, align 1
- %1 = load i32* %x.addr, align 4
+ %1 = load i32, i32* %x.addr, align 4
%2 = bitcast %struct.s41* %s41 to i128*
- %3 = load i128* %2, align 1
- call void (i8*, ...)* @foo2(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %1, i128 %3)
+ %3 = load i128, i128* %2, align 1
+ call void (i8*, ...) @foo2(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %1, i128 %3)
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-abi.ll b/test/CodeGen/AArch64/arm64-abi.ll
index 8a6b64d6ff35..36a682242aaa 100644
--- a/test/CodeGen/AArch64/arm64-abi.ll
+++ b/test/CodeGen/AArch64/arm64-abi.ll
@@ -79,7 +79,7 @@ entry:
; FAST: sub sp, sp
; FAST: mov x[[ADDR:[0-9]+]], sp
; FAST: str [[REG_1:q[0-9]+]], [x[[ADDR]], #16]
- %0 = load <4 x i32>* %in, align 16
+ %0 = load <4 x i32>, <4 x i32>* %in, align 16
%call = tail call double @args_vec_4i(double 3.000000e+00, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, double 3.000000e+00, <4 x i32> %0, i8 signext 3)
ret double %call
}
@@ -133,7 +133,7 @@ entry:
; FAST: sub sp, sp, #32
; FAST: mov x[[ADDR:[0-9]+]], sp
; FAST: str [[REG_1:d[0-9]+]], [x[[ADDR]], #8]
- %0 = load <2 x i32>* %in, align 8
+ %0 = load <2 x i32>, <2 x i32>* %in, align 8
%call = tail call double @args_vec_2i(double 3.000000e+00, <2 x i32> %0,
<2 x i32> %0, <2 x i32> %0, <2 x i32> %0, <2 x i32> %0, <2 x i32> %0,
<2 x i32> %0, float 3.000000e+00, <2 x i32> %0, i8 signext 3)
@@ -148,7 +148,7 @@ entry:
; CHECK: str [[REG_1:d[0-9]+]], [sp, #8]
; CHECK: str [[REG_2:w[0-9]+]], [sp]
; CHECK: orr w0, wzr, #0x3
- %0 = load double* %in, align 8
+ %0 = load double, double* %in, align 8
%call = tail call double @args_f64(double 3.000000e+00, double %0, double %0,
double %0, double %0, double %0, double %0, double %0,
float 3.000000e+00, double %0, i8 signext 3)
@@ -163,7 +163,7 @@ entry:
; CHECK: strb [[REG_3:w[0-9]+]], [sp, #16]
; CHECK: str [[REG_1:x[0-9]+]], [sp, #8]
; CHECK: str [[REG_2:w[0-9]+]], [sp]
- %0 = load i64* %in, align 8
+ %0 = load i64, i64* %in, align 8
%call = tail call i64 @args_i64(i64 3, i64 %0, i64 %0, i64 %0, i64 %0, i64 %0,
i64 %0, i64 %0, i32 3, i64 %0, i8 signext 3)
ret i64 %call
@@ -177,7 +177,7 @@ entry:
; CHECK: strb [[REG_2:w[0-9]+]], [sp, #8]
; CHECK: str [[REG_1:s[0-9]+]], [sp, #4]
; CHECK: strh [[REG_3:w[0-9]+]], [sp]
- %0 = load float* %in, align 4
+ %0 = load float, float* %in, align 4
%call = tail call i32 @args_f32(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
i32 7, i32 8, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
float 6.0, float 7.0, float 8.0, i16 signext 3, float %0,
@@ -194,7 +194,7 @@ entry:
; CHECK: strb [[REG_2:w[0-9]+]], [sp, #8]
; CHECK: str [[REG_1:w[0-9]+]], [sp, #4]
; CHECK: strh [[REG_3:w[0-9]+]], [sp]
- %0 = load i32* %in, align 4
+ %0 = load i32, i32* %in, align 4
%call = tail call i32 @args_i32(i32 3, i32 %0, i32 %0, i32 %0, i32 %0, i32 %0,
i32 %0, i32 %0, i16 signext 3, i32 %0, i8 signext 4)
ret i32 %call
diff --git a/test/CodeGen/AArch64/arm64-abi_align.ll b/test/CodeGen/AArch64/arm64-abi_align.ll
index e03d7fadaf44..1c1b58b8b140 100644
--- a/test/CodeGen/AArch64/arm64-abi_align.ll
+++ b/test/CodeGen/AArch64/arm64-abi_align.ll
@@ -59,8 +59,8 @@ entry:
; CHECK-LABEL: caller38
; CHECK: ldr x1,
; CHECK: ldr x2,
- %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4
- %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
+ %0 = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
+ %1 = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
%call = tail call i32 @f38(i32 3, i64 %0, i64 %1) #5
ret i32 %call
}
@@ -76,8 +76,8 @@ entry:
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
; CHECK: movz w[[C:[0-9]+]], #0x9
; CHECK: str w[[C]], [sp]
- %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4
- %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
+ %0 = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
+ %1 = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
%call = tail call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
i32 7, i32 8, i32 9, i64 %0, i64 %1) #5
ret i32 %call
@@ -112,8 +112,8 @@ entry:
; CHECK-LABEL: caller39
; CHECK: ldp x1, x2,
; CHECK: ldp x3, x4,
- %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16
- %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
+ %0 = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
+ %1 = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
%call = tail call i32 @f39(i32 3, i128 %0, i128 %1) #5
ret i32 %call
}
@@ -130,8 +130,8 @@ entry:
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
; CHECK: movz w[[C:[0-9]+]], #0x9
; CHECK: str w[[C]], [sp]
- %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16
- %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
+ %0 = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
+ %1 = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
%call = tail call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
ret i32 %call
@@ -168,8 +168,8 @@ entry:
; CHECK-LABEL: caller40
; CHECK: ldp x1, x2,
; CHECK: ldp x3, x4,
- %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
- %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
+ %0 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
+ %1 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
%call = tail call i32 @f40(i32 3, [2 x i64] %0, [2 x i64] %1) #5
ret i32 %call
}
@@ -186,8 +186,8 @@ entry:
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
; CHECK: movz w[[C:[0-9]+]], #0x9
; CHECK: str w[[C]], [sp]
- %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
- %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
+ %0 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
+ %1 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
%call = tail call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
i32 7, i32 8, i32 9, [2 x i64] %0, [2 x i64] %1) #5
ret i32 %call
@@ -222,8 +222,8 @@ entry:
; CHECK-LABEL: caller41
; CHECK: ldp x1, x2,
; CHECK: ldp x3, x4,
- %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
- %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
+ %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
+ %1 = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
%call = tail call i32 @f41(i32 3, i128 %0, i128 %1) #5
ret i32 %call
}
@@ -240,8 +240,8 @@ entry:
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
; CHECK: movz w[[C:[0-9]+]], #0x9
; CHECK: str w[[C]], [sp]
- %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
- %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
+ %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
+ %1 = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
%call = tail call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
ret i32 %call
@@ -260,15 +260,15 @@ entry:
; FAST: ldr w[[B:[0-9]+]], [x2]
; FAST: add w[[C:[0-9]+]], w[[A]], w0
; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
- %i1 = getelementptr inbounds %struct.s42* %s1, i64 0, i32 0
- %0 = load i32* %i1, align 4, !tbaa !0
- %i2 = getelementptr inbounds %struct.s42* %s2, i64 0, i32 0
- %1 = load i32* %i2, align 4, !tbaa !0
- %s = getelementptr inbounds %struct.s42* %s1, i64 0, i32 1
- %2 = load i16* %s, align 2, !tbaa !3
+ %i1 = getelementptr inbounds %struct.s42, %struct.s42* %s1, i64 0, i32 0
+ %0 = load i32, i32* %i1, align 4, !tbaa !0
+ %i2 = getelementptr inbounds %struct.s42, %struct.s42* %s2, i64 0, i32 0
+ %1 = load i32, i32* %i2, align 4, !tbaa !0
+ %s = getelementptr inbounds %struct.s42, %struct.s42* %s1, i64 0, i32 1
+ %2 = load i16, i16* %s, align 2, !tbaa !3
%conv = sext i16 %2 to i32
- %s5 = getelementptr inbounds %struct.s42* %s2, i64 0, i32 1
- %3 = load i16* %s5, align 2, !tbaa !3
+ %s5 = getelementptr inbounds %struct.s42, %struct.s42* %s2, i64 0, i32 1
+ %3 = load i16, i16* %s5, align 2, !tbaa !3
%conv6 = sext i16 %3 to i32
%add = add i32 %0, %i
%add3 = add i32 %add, %1
@@ -369,15 +369,15 @@ entry:
; FAST: ldr w[[B:[0-9]+]], [x2]
; FAST: add w[[C:[0-9]+]], w[[A]], w0
; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
- %i1 = getelementptr inbounds %struct.s43* %s1, i64 0, i32 0
- %0 = load i32* %i1, align 4, !tbaa !0
- %i2 = getelementptr inbounds %struct.s43* %s2, i64 0, i32 0
- %1 = load i32* %i2, align 4, !tbaa !0
- %s = getelementptr inbounds %struct.s43* %s1, i64 0, i32 1
- %2 = load i16* %s, align 2, !tbaa !3
+ %i1 = getelementptr inbounds %struct.s43, %struct.s43* %s1, i64 0, i32 0
+ %0 = load i32, i32* %i1, align 4, !tbaa !0
+ %i2 = getelementptr inbounds %struct.s43, %struct.s43* %s2, i64 0, i32 0
+ %1 = load i32, i32* %i2, align 4, !tbaa !0
+ %s = getelementptr inbounds %struct.s43, %struct.s43* %s1, i64 0, i32 1
+ %2 = load i16, i16* %s, align 2, !tbaa !3
%conv = sext i16 %2 to i32
- %s5 = getelementptr inbounds %struct.s43* %s2, i64 0, i32 1
- %3 = load i16* %s5, align 2, !tbaa !3
+ %s5 = getelementptr inbounds %struct.s43, %struct.s43* %s2, i64 0, i32 1
+ %3 = load i16, i16* %s5, align 2, !tbaa !3
%conv6 = sext i16 %3 to i32
%add = add i32 %0, %i
%add3 = add i32 %add, %1
@@ -493,7 +493,7 @@ entry:
; Load/Store opt is disabled with -O0, so the i128 is split.
; FAST: str {{x[0-9]+}}, [x[[ADDR]], #8]
; FAST: str {{x[0-9]+}}, [x[[ADDR]]]
- %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
+ %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
%call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5,
i32 6, i32 7, i128 %0, i32 8) #5
ret i32 %call
@@ -514,7 +514,7 @@ entry:
; FAST: mov x[[R0:[0-9]+]], sp
; FAST: orr w[[R1:[0-9]+]], wzr, #0x8
; FAST: str w[[R1]], {{\[}}x[[R0]]{{\]}}
- %0 = load i64* bitcast (%struct.s41* @g41 to i64*), align 16
+ %0 = load i64, i64* bitcast (%struct.s41* @g41 to i64*), align 16
%call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5,
i32 6, i32 7, i64 %0, i32 8) #5
ret i32 %call
diff --git a/test/CodeGen/AArch64/arm64-addr-mode-folding.ll b/test/CodeGen/AArch64/arm64-addr-mode-folding.ll
index 74bb3981ba3e..3197f5bd27ec 100644
--- a/test/CodeGen/AArch64/arm64-addr-mode-folding.ll
+++ b/test/CodeGen/AArch64/arm64-addr-mode-folding.ll
@@ -11,12 +11,12 @@ define i32 @fct(i32 %i1, i32 %i2) {
; _CHECK-NOT_: , sxtw]
entry:
%idxprom = sext i32 %i1 to i64
- %0 = load i8** @block, align 8
- %arrayidx = getelementptr inbounds i8* %0, i64 %idxprom
- %1 = load i8* %arrayidx, align 1
+ %0 = load i8*, i8** @block, align 8
+ %arrayidx = getelementptr inbounds i8, i8* %0, i64 %idxprom
+ %1 = load i8, i8* %arrayidx, align 1
%idxprom1 = sext i32 %i2 to i64
- %arrayidx2 = getelementptr inbounds i8* %0, i64 %idxprom1
- %2 = load i8* %arrayidx2, align 1
+ %arrayidx2 = getelementptr inbounds i8, i8* %0, i64 %idxprom1
+ %2 = load i8, i8* %arrayidx2, align 1
%cmp = icmp eq i8 %1, %2
br i1 %cmp, label %if.end, label %if.then
@@ -29,11 +29,11 @@ if.end: ; preds = %entry
%inc = add nsw i32 %i1, 1
%inc9 = add nsw i32 %i2, 1
%idxprom10 = sext i32 %inc to i64
- %arrayidx11 = getelementptr inbounds i8* %0, i64 %idxprom10
- %3 = load i8* %arrayidx11, align 1
+ %arrayidx11 = getelementptr inbounds i8, i8* %0, i64 %idxprom10
+ %3 = load i8, i8* %arrayidx11, align 1
%idxprom12 = sext i32 %inc9 to i64
- %arrayidx13 = getelementptr inbounds i8* %0, i64 %idxprom12
- %4 = load i8* %arrayidx13, align 1
+ %arrayidx13 = getelementptr inbounds i8, i8* %0, i64 %idxprom12
+ %4 = load i8, i8* %arrayidx13, align 1
%cmp16 = icmp eq i8 %3, %4
br i1 %cmp16, label %if.end23, label %if.then18
@@ -46,11 +46,11 @@ if.end23: ; preds = %if.end
%inc24 = add nsw i32 %i1, 2
%inc25 = add nsw i32 %i2, 2
%idxprom26 = sext i32 %inc24 to i64
- %arrayidx27 = getelementptr inbounds i8* %0, i64 %idxprom26
- %5 = load i8* %arrayidx27, align 1
+ %arrayidx27 = getelementptr inbounds i8, i8* %0, i64 %idxprom26
+ %5 = load i8, i8* %arrayidx27, align 1
%idxprom28 = sext i32 %inc25 to i64
- %arrayidx29 = getelementptr inbounds i8* %0, i64 %idxprom28
- %6 = load i8* %arrayidx29, align 1
+ %arrayidx29 = getelementptr inbounds i8, i8* %0, i64 %idxprom28
+ %6 = load i8, i8* %arrayidx29, align 1
%cmp32 = icmp eq i8 %5, %6
br i1 %cmp32, label %return, label %if.then34
@@ -71,12 +71,12 @@ define i32 @fct1(i32 %i1, i32 %i2) optsize {
; CHECK: , sxtw]
entry:
%idxprom = sext i32 %i1 to i64
- %0 = load i8** @block, align 8
- %arrayidx = getelementptr inbounds i8* %0, i64 %idxprom
- %1 = load i8* %arrayidx, align 1
+ %0 = load i8*, i8** @block, align 8
+ %arrayidx = getelementptr inbounds i8, i8* %0, i64 %idxprom
+ %1 = load i8, i8* %arrayidx, align 1
%idxprom1 = sext i32 %i2 to i64
- %arrayidx2 = getelementptr inbounds i8* %0, i64 %idxprom1
- %2 = load i8* %arrayidx2, align 1
+ %arrayidx2 = getelementptr inbounds i8, i8* %0, i64 %idxprom1
+ %2 = load i8, i8* %arrayidx2, align 1
%cmp = icmp eq i8 %1, %2
br i1 %cmp, label %if.end, label %if.then
@@ -89,11 +89,11 @@ if.end: ; preds = %entry
%inc = add nsw i32 %i1, 1
%inc9 = add nsw i32 %i2, 1
%idxprom10 = sext i32 %inc to i64
- %arrayidx11 = getelementptr inbounds i8* %0, i64 %idxprom10
- %3 = load i8* %arrayidx11, align 1
+ %arrayidx11 = getelementptr inbounds i8, i8* %0, i64 %idxprom10
+ %3 = load i8, i8* %arrayidx11, align 1
%idxprom12 = sext i32 %inc9 to i64
- %arrayidx13 = getelementptr inbounds i8* %0, i64 %idxprom12
- %4 = load i8* %arrayidx13, align 1
+ %arrayidx13 = getelementptr inbounds i8, i8* %0, i64 %idxprom12
+ %4 = load i8, i8* %arrayidx13, align 1
%cmp16 = icmp eq i8 %3, %4
br i1 %cmp16, label %if.end23, label %if.then18
@@ -106,11 +106,11 @@ if.end23: ; preds = %if.end
%inc24 = add nsw i32 %i1, 2
%inc25 = add nsw i32 %i2, 2
%idxprom26 = sext i32 %inc24 to i64
- %arrayidx27 = getelementptr inbounds i8* %0, i64 %idxprom26
- %5 = load i8* %arrayidx27, align 1
+ %arrayidx27 = getelementptr inbounds i8, i8* %0, i64 %idxprom26
+ %5 = load i8, i8* %arrayidx27, align 1
%idxprom28 = sext i32 %inc25 to i64
- %arrayidx29 = getelementptr inbounds i8* %0, i64 %idxprom28
- %6 = load i8* %arrayidx29, align 1
+ %arrayidx29 = getelementptr inbounds i8, i8* %0, i64 %idxprom28
+ %6 = load i8, i8* %arrayidx29, align 1
%cmp32 = icmp eq i8 %5, %6
br i1 %cmp32, label %return, label %if.then34
@@ -135,9 +135,9 @@ entry:
if.then: ; preds = %entry
%idxprom = zext i8 %c to i64
- %arrayidx = getelementptr inbounds i32* %array, i64 %idxprom
- %0 = load volatile i32* %arrayidx, align 4
- %1 = load volatile i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %array, i64 %idxprom
+ %0 = load volatile i32, i32* %arrayidx, align 4
+ %1 = load volatile i32, i32* %arrayidx, align 4
%add3 = add nsw i32 %1, %0
br label %if.end
@@ -159,9 +159,9 @@ entry:
if.then: ; preds = %entry
%idxprom = zext i8 %c to i64
- %arrayidx = getelementptr inbounds i32* %array, i64 %idxprom
- %0 = load volatile i32* %arrayidx, align 4
- %1 = load volatile i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %array, i64 %idxprom
+ %0 = load volatile i32, i32* %arrayidx, align 4
+ %1 = load volatile i32, i32* %arrayidx, align 4
%add3 = add nsw i32 %1, %0
br label %if.end
diff --git a/test/CodeGen/AArch64/arm64-addr-type-promotion.ll b/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
index 1a3ca8bd5b8c..4703d25a6016 100644
--- a/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
+++ b/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
@@ -28,12 +28,12 @@ define zeroext i8 @fullGtU(i32 %i1, i32 %i2) {
; CHECK-NEXT: cmp [[LOADEDVAL3]], [[LOADEDVAL4]]
entry:
%idxprom = sext i32 %i1 to i64
- %tmp = load i8** @block, align 8
- %arrayidx = getelementptr inbounds i8* %tmp, i64 %idxprom
- %tmp1 = load i8* %arrayidx, align 1
+ %tmp = load i8*, i8** @block, align 8
+ %arrayidx = getelementptr inbounds i8, i8* %tmp, i64 %idxprom
+ %tmp1 = load i8, i8* %arrayidx, align 1
%idxprom1 = sext i32 %i2 to i64
- %arrayidx2 = getelementptr inbounds i8* %tmp, i64 %idxprom1
- %tmp2 = load i8* %arrayidx2, align 1
+ %arrayidx2 = getelementptr inbounds i8, i8* %tmp, i64 %idxprom1
+ %tmp2 = load i8, i8* %arrayidx2, align 1
%cmp = icmp eq i8 %tmp1, %tmp2
br i1 %cmp, label %if.end, label %if.then
@@ -46,11 +46,11 @@ if.end: ; preds = %entry
%inc = add nsw i32 %i1, 1
%inc10 = add nsw i32 %i2, 1
%idxprom11 = sext i32 %inc to i64
- %arrayidx12 = getelementptr inbounds i8* %tmp, i64 %idxprom11
- %tmp3 = load i8* %arrayidx12, align 1
+ %arrayidx12 = getelementptr inbounds i8, i8* %tmp, i64 %idxprom11
+ %tmp3 = load i8, i8* %arrayidx12, align 1
%idxprom13 = sext i32 %inc10 to i64
- %arrayidx14 = getelementptr inbounds i8* %tmp, i64 %idxprom13
- %tmp4 = load i8* %arrayidx14, align 1
+ %arrayidx14 = getelementptr inbounds i8, i8* %tmp, i64 %idxprom13
+ %tmp4 = load i8, i8* %arrayidx14, align 1
%cmp17 = icmp eq i8 %tmp3, %tmp4
br i1 %cmp17, label %if.end25, label %if.then19
@@ -63,11 +63,11 @@ if.end25: ; preds = %if.end
%inc26 = add nsw i32 %i1, 2
%inc27 = add nsw i32 %i2, 2
%idxprom28 = sext i32 %inc26 to i64
- %arrayidx29 = getelementptr inbounds i8* %tmp, i64 %idxprom28
- %tmp5 = load i8* %arrayidx29, align 1
+ %arrayidx29 = getelementptr inbounds i8, i8* %tmp, i64 %idxprom28
+ %tmp5 = load i8, i8* %arrayidx29, align 1
%idxprom30 = sext i32 %inc27 to i64
- %arrayidx31 = getelementptr inbounds i8* %tmp, i64 %idxprom30
- %tmp6 = load i8* %arrayidx31, align 1
+ %arrayidx31 = getelementptr inbounds i8, i8* %tmp, i64 %idxprom30
+ %tmp6 = load i8, i8* %arrayidx31, align 1
%cmp34 = icmp eq i8 %tmp5, %tmp6
br i1 %cmp34, label %return, label %if.then36
diff --git a/test/CodeGen/AArch64/arm64-addrmode.ll b/test/CodeGen/AArch64/arm64-addrmode.ll
index 5433a8c312fe..c22d0312b24d 100644
--- a/test/CodeGen/AArch64/arm64-addrmode.ll
+++ b/test/CodeGen/AArch64/arm64-addrmode.ll
@@ -8,8 +8,8 @@
; CHECK: ldr xzr, [x{{[0-9]+}}, #8]
; CHECK: ret
define void @t1() {
- %incdec.ptr = getelementptr inbounds i64* @object, i64 1
- %tmp = load volatile i64* %incdec.ptr, align 8
+ %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 1
+ %tmp = load volatile i64, i64* %incdec.ptr, align 8
ret void
}
@@ -20,8 +20,8 @@ define void @t1() {
; CHECK: [[ADDREG]]]
; CHECK: ret
define void @t2() {
- %incdec.ptr = getelementptr inbounds i64* @object, i64 -33
- %tmp = load volatile i64* %incdec.ptr, align 8
+ %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 -33
+ %tmp = load volatile i64, i64* %incdec.ptr, align 8
ret void
}
@@ -30,8 +30,8 @@ define void @t2() {
; CHECK: ldr xzr, [x{{[0-9]+}}, #32760]
; CHECK: ret
define void @t3() {
- %incdec.ptr = getelementptr inbounds i64* @object, i64 4095
- %tmp = load volatile i64* %incdec.ptr, align 8
+ %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4095
+ %tmp = load volatile i64, i64* %incdec.ptr, align 8
ret void
}
@@ -41,8 +41,8 @@ define void @t3() {
; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]]
; CHECK: ret
define void @t4() {
- %incdec.ptr = getelementptr inbounds i64* @object, i64 4096
- %tmp = load volatile i64* %incdec.ptr, align 8
+ %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4096
+ %tmp = load volatile i64, i64* %incdec.ptr, align 8
ret void
}
@@ -51,8 +51,8 @@ define void @t4() {
; CHECK: ldr xzr, [x{{[0-9]+}}, x{{[0-9]+}}, lsl #3]
; CHECK: ret
define void @t5(i64 %a) {
- %incdec.ptr = getelementptr inbounds i64* @object, i64 %a
- %tmp = load volatile i64* %incdec.ptr, align 8
+ %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 %a
+ %tmp = load volatile i64, i64* %incdec.ptr, align 8
ret void
}
@@ -63,9 +63,9 @@ define void @t5(i64 %a) {
; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]]
; CHECK: ret
define void @t6(i64 %a) {
- %tmp1 = getelementptr inbounds i64* @object, i64 %a
- %incdec.ptr = getelementptr inbounds i64* %tmp1, i64 4096
- %tmp = load volatile i64* %incdec.ptr, align 8
+ %tmp1 = getelementptr inbounds i64, i64* @object, i64 %a
+ %incdec.ptr = getelementptr inbounds i64, i64* %tmp1, i64 4096
+ %tmp = load volatile i64, i64* %incdec.ptr, align 8
ret void
}
@@ -76,7 +76,7 @@ define void @t7(i64 %a) {
; CHECK-NEXT: ldr xzr, [x0, x[[NUM]]]
%1 = add i64 %a, 65535 ;0xffff
%2 = inttoptr i64 %1 to i64*
- %3 = load volatile i64* %2, align 8
+ %3 = load volatile i64, i64* %2, align 8
ret void
}
@@ -86,7 +86,7 @@ define void @t8(i64 %a) {
; CHECK-NEXT: ldr xzr, [x0, [[REG]]]
%1 = sub i64 %a, 4662 ;-4662 is 0xffffffffffffedca
%2 = inttoptr i64 %1 to i64*
- %3 = load volatile i64* %2, align 8
+ %3 = load volatile i64, i64* %2, align 8
ret void
}
@@ -96,7 +96,7 @@ define void @t9(i64 %a) {
; CHECK-NEXT: ldr xzr, [x0, [[REG]]]
%1 = add i64 -305463297, %a ;-305463297 is 0xffffffffedcaffff
%2 = inttoptr i64 %1 to i64*
- %3 = load volatile i64* %2, align 8
+ %3 = load volatile i64, i64* %2, align 8
ret void
}
@@ -106,7 +106,7 @@ define void @t10(i64 %a) {
; CHECK-NEXT: ldr xzr, [x0, [[REG]]]
%1 = add i64 %a, 81909218222800896 ;0x123000000000000
%2 = inttoptr i64 %1 to i64*
- %3 = load volatile i64* %2, align 8
+ %3 = load volatile i64, i64* %2, align 8
ret void
}
@@ -117,7 +117,7 @@ define void @t11(i64 %a) {
; CHECK-NEXT: ldr xzr, [x0, x[[NUM]]]
%1 = add i64 %a, 19088743 ;0x1234567
%2 = inttoptr i64 %1 to i64*
- %3 = load volatile i64* %2, align 8
+ %3 = load volatile i64, i64* %2, align 8
ret void
}
@@ -128,7 +128,7 @@ define void @t12(i64 %a) {
; CHECK-NEXT: ldr xzr, {{\[}}[[REG]]]
%1 = add i64 %a, 4095 ;0xfff
%2 = inttoptr i64 %1 to i64*
- %3 = load volatile i64* %2, align 8
+ %3 = load volatile i64, i64* %2, align 8
ret void
}
@@ -138,7 +138,7 @@ define void @t13(i64 %a) {
; CHECK-NEXT: ldr xzr, {{\[}}[[REG]]]
%1 = add i64 %a, -4095 ;-0xfff
%2 = inttoptr i64 %1 to i64*
- %3 = load volatile i64* %2, align 8
+ %3 = load volatile i64, i64* %2, align 8
ret void
}
@@ -148,7 +148,7 @@ define void @t14(i64 %a) {
; CHECK-NEXT: ldr xzr, {{\[}}[[REG]]]
%1 = add i64 %a, 1191936 ;0x123000
%2 = inttoptr i64 %1 to i64*
- %3 = load volatile i64* %2, align 8
+ %3 = load volatile i64, i64* %2, align 8
ret void
}
@@ -158,7 +158,7 @@ define void @t15(i64 %a) {
; CHECK-NEXT: ldr xzr, {{\[}}[[REG]]]
%1 = add i64 %a, -1191936 ;0xFFFFFFFFFFEDD000
%2 = inttoptr i64 %1 to i64*
- %3 = load volatile i64* %2, align 8
+ %3 = load volatile i64, i64* %2, align 8
ret void
}
@@ -167,7 +167,7 @@ define void @t16(i64 %a) {
; CHECK: ldr xzr, [x0, #28672]
%1 = add i64 %a, 28672 ;0x7000
%2 = inttoptr i64 %1 to i64*
- %3 = load volatile i64* %2, align 8
+ %3 = load volatile i64, i64* %2, align 8
ret void
}
@@ -176,6 +176,6 @@ define void @t17(i64 %a) {
; CHECK: ldur xzr, [x0, #-256]
%1 = add i64 %a, -256 ;-0x100
%2 = inttoptr i64 %1 to i64*
- %3 = load volatile i64* %2, align 8
+ %3 = load volatile i64, i64* %2, align 8
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll b/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll
index f396bc991708..bf2d2cfa6066 100644
--- a/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll
+++ b/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll
@@ -13,9 +13,9 @@ entry:
; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], {{\[}}[[BASE:x[0-9]+]], #32]
; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], {{\[}}[[BASE]]]
%retval = alloca <16 x float>, align 16
- %0 = load <16 x float>* @T3_retval, align 16
+ %0 = load <16 x float>, <16 x float>* @T3_retval, align 16
store <16 x float> %0, <16 x float>* %retval
- %1 = load <16 x float>* %retval
+ %1 = load <16 x float>, <16 x float>* %retval
store <16 x float> %1, <16 x float>* %agg.result, align 16
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll b/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
index 3750f31b3734..eb0cd3547bda 100644
--- a/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
+++ b/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
@@ -13,17 +13,17 @@ define i32 @foo(i32 %a) nounwind {
%arr2 = alloca [32 x i32], align 4
%j = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
- %tmp = load i32* %a.addr, align 4
+ %tmp = load i32, i32* %a.addr, align 4
%tmp1 = zext i32 %tmp to i64
%v = mul i64 4, %tmp1
%vla = alloca i8, i64 %v, align 4
%tmp2 = bitcast i8* %vla to i32*
- %tmp3 = load i32* %a.addr, align 4
+ %tmp3 = load i32, i32* %a.addr, align 4
store i32 %tmp3, i32* %i, align 4
- %tmp4 = load i32* %a.addr, align 4
+ %tmp4 = load i32, i32* %a.addr, align 4
store i32 %tmp4, i32* %j, align 4
- %tmp5 = load i32* %j, align 4
+ %tmp5 = load i32, i32* %j, align 4
store i32 %tmp5, i32* %retval
- %x = load i32* %retval
+ %x = load i32, i32* %retval
ret i32 %x
}
diff --git a/test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll b/test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll
index 419497722f4c..71e64807f524 100644
--- a/test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll
+++ b/test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll
@@ -29,7 +29,7 @@ _ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit: ; preds = %lor.rhs.i.i
br i1 %cmp.i.i.i.i, label %if.then3, label %if.end5
if.then3: ; preds = %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, %land.rhs.i
- %tmp11 = load i8* %str14, align 8
+ %tmp11 = load i8, i8* %str14, align 8
%tmp12 = and i8 %tmp11, 2
%tmp13 = icmp ne i8 %tmp12, 0
br label %return
@@ -55,7 +55,7 @@ if.then7: ; preds = %_ZNK7WebCore4Node10
br i1 %isTextField, label %if.then9, label %if.end12
if.then9: ; preds = %if.then7
- %tmp23 = load i8* %str5, align 8
+ %tmp23 = load i8, i8* %str5, align 8
%tmp24 = and i8 %tmp23, 2
%tmp25 = icmp ne i8 %tmp24, 0
br label %return
diff --git a/test/CodeGen/AArch64/arm64-anyregcc-crash.ll b/test/CodeGen/AArch64/arm64-anyregcc-crash.ll
index 241cf974c05b..56c62d5fe7db 100644
--- a/test/CodeGen/AArch64/arm64-anyregcc-crash.ll
+++ b/test/CodeGen/AArch64/arm64-anyregcc-crash.ll
@@ -8,7 +8,7 @@ define i64 @anyreglimit(i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i6
i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64 %v22, i64 %v23, i64 %v24,
i64 %v25, i64 %v26, i64 %v27, i64 %v28, i64 %v29, i64 %v30, i64 %v31, i64 %v32) {
entry:
- %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 32,
+ %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 32,
i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i64 %v7, i64 %v8,
i64 %v9, i64 %v10, i64 %v11, i64 %v12, i64 %v13, i64 %v14, i64 %v15, i64 %v16,
i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64 %v22, i64 %v23, i64 %v24,
diff --git a/test/CodeGen/AArch64/arm64-anyregcc.ll b/test/CodeGen/AArch64/arm64-anyregcc.ll
index e26875d52f99..2a2f45196046 100644
--- a/test/CodeGen/AArch64/arm64-anyregcc.ll
+++ b/test/CodeGen/AArch64/arm64-anyregcc.ll
@@ -55,7 +55,7 @@
; CHECK-NEXT: .long 3
define i64 @test() nounwind ssp uwtable {
entry:
- call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 0, i32 16, i8* null, i32 2, i32 1, i32 2, i64 3)
+ call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 0, i32 16, i8* null, i32 2, i32 1, i32 2, i64 3)
ret i64 0
}
@@ -77,7 +77,7 @@ entry:
define i64 @property_access1(i8* %obj) nounwind ssp uwtable {
entry:
%f = inttoptr i64 281474417671919 to i8*
- %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 1, i32 20, i8* %f, i32 1, i8* %obj)
+ %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 1, i32 20, i8* %f, i32 1, i8* %obj)
ret i64 %ret
}
@@ -100,7 +100,7 @@ define i64 @property_access2() nounwind ssp uwtable {
entry:
%obj = alloca i64, align 8
%f = inttoptr i64 281474417671919 to i8*
- %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 20, i8* %f, i32 1, i64* %obj)
+ %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 20, i8* %f, i32 1, i64* %obj)
ret i64 %ret
}
@@ -123,7 +123,7 @@ define i64 @property_access3() nounwind ssp uwtable {
entry:
%obj = alloca i64, align 8
%f = inttoptr i64 281474417671919 to i8*
- %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 3, i32 20, i8* %f, i32 0, i64* %obj)
+ %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 3, i32 20, i8* %f, i32 0, i64* %obj)
ret i64 %ret
}
@@ -205,7 +205,7 @@ entry:
define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
entry:
%f = inttoptr i64 281474417671919 to i8*
- %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 4, i32 20, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
+ %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 4, i32 20, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
ret i64 %ret
}
@@ -287,7 +287,7 @@ entry:
define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
entry:
%f = inttoptr i64 281474417671919 to i8*
- %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
+ %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
ret i64 %ret
}
@@ -315,7 +315,7 @@ entry:
; CHECK-NEXT: .long 0
define i64 @patchpoint_spilldef(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
entry:
- %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 16, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2)
+ %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 12, i32 16, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2)
tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() nounwind
ret i64 %result
}
@@ -355,7 +355,7 @@ entry:
define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
entry:
tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() nounwind
- %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 13, i32 16, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+ %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 13, i32 16, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
ret i64 %result
}
diff --git a/test/CodeGen/AArch64/arm64-atomic-128.ll b/test/CodeGen/AArch64/arm64-atomic-128.ll
index 642d72aac47e..a76cf74a6d0c 100644
--- a/test/CodeGen/AArch64/arm64-atomic-128.ll
+++ b/test/CodeGen/AArch64/arm64-atomic-128.ll
@@ -169,7 +169,7 @@ define i128 @atomic_load_seq_cst(i128* %p) {
; CHECK-NOT: dmb
; CHECK-LABEL: ldaxp
; CHECK-NOT: dmb
- %r = load atomic i128* %p seq_cst, align 16
+ %r = load atomic i128, i128* %p seq_cst, align 16
ret i128 %r
}
@@ -178,7 +178,7 @@ define i128 @atomic_load_relaxed(i128* %p) {
; CHECK-NOT: dmb
; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0]
; CHECK-NOT: dmb
- %r = load atomic i128* %p monotonic, align 16
+ %r = load atomic i128, i128* %p monotonic, align 16
ret i128 %r
}
diff --git a/test/CodeGen/AArch64/arm64-atomic.ll b/test/CodeGen/AArch64/arm64-atomic.ll
index b56f91ddd111..9136fb6271b5 100644
--- a/test/CodeGen/AArch64/arm64-atomic.ll
+++ b/test/CodeGen/AArch64/arm64-atomic.ll
@@ -1,37 +1,49 @@
-; RUN: llc < %s -march=arm64 -verify-machineinstrs -mcpu=cyclone | FileCheck %s
+; RUN: llc < %s -march=arm64 -asm-verbose=false -verify-machineinstrs -mcpu=cyclone | FileCheck %s
-define i32 @val_compare_and_swap(i32* %p) {
+define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
; CHECK-LABEL: val_compare_and_swap:
-; CHECK: orr [[NEWVAL_REG:w[0-9]+]], wzr, #0x4
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldaxr [[RESULT:w[0-9]+]], [x0]
-; CHECK: cmp [[RESULT]], #7
-; CHECK: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]]
-; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[NEWVAL_REG]], [x0]
-; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
-; CHECK: [[LABEL2]]:
- %pair = cmpxchg i32* %p, i32 7, i32 4 acquire acquire
+; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0]
+; CHECK-NEXT: cmp [[RESULT]], w1
+; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], w2, [x0]
+; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]]
+; CHECK-NEXT: [[LABEL2]]:
+ %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
%val = extractvalue { i32, i1 } %pair, 0
ret i32 %val
}
-define i64 @val_compare_and_swap_64(i64* %p) {
+define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 {
+; CHECK-LABEL: val_compare_and_swap_rel:
+; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0]
+; CHECK-NEXT: cmp [[RESULT]], w1
+; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x0]
+; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]]
+; CHECK-NEXT: [[LABEL2]]:
+ %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic
+ %val = extractvalue { i32, i1 } %pair, 0
+ ret i32 %val
+}
+
+define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 {
; CHECK-LABEL: val_compare_and_swap_64:
-; CHECK: orr w[[NEWVAL_REG:[0-9]+]], wzr, #0x4
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: ldxr [[RESULT:x[0-9]+]], [x0]
-; CHECK: cmp [[RESULT]], #7
-; CHECK: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]]
-; CHECK-NOT: stxr x[[NEWVAL_REG]], x[[NEWVAL_REG]]
-; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], x[[NEWVAL_REG]], [x0]
-; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
-; CHECK: [[LABEL2]]:
- %pair = cmpxchg i64* %p, i64 7, i64 4 monotonic monotonic
+; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0
+; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: ldxr [[RESULT:x[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: cmp [[RESULT]], x1
+; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], x2, [x[[ADDR]]]
+; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]]
+; CHECK-NEXT: [[LABEL2]]:
+ %pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic monotonic
%val = extractvalue { i64, i1 } %pair, 0
ret i64 %val
}
-define i32 @fetch_and_nand(i32* %p) {
+define i32 @fetch_and_nand(i32* %p) #0 {
; CHECK-LABEL: fetch_and_nand:
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
; CHECK: ldxr w[[DEST_REG:[0-9]+]], [x0]
@@ -45,7 +57,7 @@ define i32 @fetch_and_nand(i32* %p) {
ret i32 %val
}
-define i64 @fetch_and_nand_64(i64* %p) {
+define i64 @fetch_and_nand_64(i64* %p) #0 {
; CHECK-LABEL: fetch_and_nand_64:
; CHECK: mov x[[ADDR:[0-9]+]], x0
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
@@ -59,7 +71,7 @@ define i64 @fetch_and_nand_64(i64* %p) {
ret i64 %val
}
-define i32 @fetch_and_or(i32* %p) {
+define i32 @fetch_and_or(i32* %p) #0 {
; CHECK-LABEL: fetch_and_or:
; CHECK: movz [[OLDVAL_REG:w[0-9]+]], #0x5
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
@@ -73,7 +85,7 @@ define i32 @fetch_and_or(i32* %p) {
ret i32 %val
}
-define i64 @fetch_and_or_64(i64* %p) {
+define i64 @fetch_and_or_64(i64* %p) #0 {
; CHECK: fetch_and_or_64:
; CHECK: mov x[[ADDR:[0-9]+]], x0
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
@@ -85,52 +97,52 @@ define i64 @fetch_and_or_64(i64* %p) {
ret i64 %val
}
-define void @acquire_fence() {
+define void @acquire_fence() #0 {
fence acquire
ret void
; CHECK-LABEL: acquire_fence:
; CHECK: dmb ishld
}
-define void @release_fence() {
+define void @release_fence() #0 {
fence release
ret void
; CHECK-LABEL: release_fence:
; CHECK: dmb ish{{$}}
}
-define void @seq_cst_fence() {
+define void @seq_cst_fence() #0 {
fence seq_cst
ret void
; CHECK-LABEL: seq_cst_fence:
; CHECK: dmb ish{{$}}
}
-define i32 @atomic_load(i32* %p) {
- %r = load atomic i32* %p seq_cst, align 4
+define i32 @atomic_load(i32* %p) #0 {
+ %r = load atomic i32, i32* %p seq_cst, align 4
ret i32 %r
; CHECK-LABEL: atomic_load:
; CHECK: ldar
}
-define i8 @atomic_load_relaxed_8(i8* %p, i32 %off32) {
+define i8 @atomic_load_relaxed_8(i8* %p, i32 %off32) #0 {
; CHECK-LABEL: atomic_load_relaxed_8:
- %ptr_unsigned = getelementptr i8* %p, i32 4095
- %val_unsigned = load atomic i8* %ptr_unsigned monotonic, align 1
+ %ptr_unsigned = getelementptr i8, i8* %p, i32 4095
+ %val_unsigned = load atomic i8, i8* %ptr_unsigned monotonic, align 1
; CHECK: ldrb {{w[0-9]+}}, [x0, #4095]
- %ptr_regoff = getelementptr i8* %p, i32 %off32
- %val_regoff = load atomic i8* %ptr_regoff unordered, align 1
+ %ptr_regoff = getelementptr i8, i8* %p, i32 %off32
+ %val_regoff = load atomic i8, i8* %ptr_regoff unordered, align 1
%tot1 = add i8 %val_unsigned, %val_regoff
; CHECK: ldrb {{w[0-9]+}}, [x0, w1, sxtw]
- %ptr_unscaled = getelementptr i8* %p, i32 -256
- %val_unscaled = load atomic i8* %ptr_unscaled monotonic, align 1
+ %ptr_unscaled = getelementptr i8, i8* %p, i32 -256
+ %val_unscaled = load atomic i8, i8* %ptr_unscaled monotonic, align 1
%tot2 = add i8 %tot1, %val_unscaled
; CHECK: ldurb {{w[0-9]+}}, [x0, #-256]
- %ptr_random = getelementptr i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm)
- %val_random = load atomic i8* %ptr_random unordered, align 1
+ %ptr_random = getelementptr i8, i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm)
+ %val_random = load atomic i8, i8* %ptr_random unordered, align 1
%tot3 = add i8 %tot2, %val_random
; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
; CHECK: ldrb {{w[0-9]+}}, [x[[ADDR]]]
@@ -138,24 +150,24 @@ define i8 @atomic_load_relaxed_8(i8* %p, i32 %off32) {
ret i8 %tot3
}
-define i16 @atomic_load_relaxed_16(i16* %p, i32 %off32) {
+define i16 @atomic_load_relaxed_16(i16* %p, i32 %off32) #0 {
; CHECK-LABEL: atomic_load_relaxed_16:
- %ptr_unsigned = getelementptr i16* %p, i32 4095
- %val_unsigned = load atomic i16* %ptr_unsigned monotonic, align 2
+ %ptr_unsigned = getelementptr i16, i16* %p, i32 4095
+ %val_unsigned = load atomic i16, i16* %ptr_unsigned monotonic, align 2
; CHECK: ldrh {{w[0-9]+}}, [x0, #8190]
- %ptr_regoff = getelementptr i16* %p, i32 %off32
- %val_regoff = load atomic i16* %ptr_regoff unordered, align 2
+ %ptr_regoff = getelementptr i16, i16* %p, i32 %off32
+ %val_regoff = load atomic i16, i16* %ptr_regoff unordered, align 2
%tot1 = add i16 %val_unsigned, %val_regoff
; CHECK: ldrh {{w[0-9]+}}, [x0, w1, sxtw #1]
- %ptr_unscaled = getelementptr i16* %p, i32 -128
- %val_unscaled = load atomic i16* %ptr_unscaled monotonic, align 2
+ %ptr_unscaled = getelementptr i16, i16* %p, i32 -128
+ %val_unscaled = load atomic i16, i16* %ptr_unscaled monotonic, align 2
%tot2 = add i16 %tot1, %val_unscaled
; CHECK: ldurh {{w[0-9]+}}, [x0, #-256]
- %ptr_random = getelementptr i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm)
- %val_random = load atomic i16* %ptr_random unordered, align 2
+ %ptr_random = getelementptr i16, i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm)
+ %val_random = load atomic i16, i16* %ptr_random unordered, align 2
%tot3 = add i16 %tot2, %val_random
; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
; CHECK: ldrh {{w[0-9]+}}, [x[[ADDR]]]
@@ -163,24 +175,24 @@ define i16 @atomic_load_relaxed_16(i16* %p, i32 %off32) {
ret i16 %tot3
}
-define i32 @atomic_load_relaxed_32(i32* %p, i32 %off32) {
+define i32 @atomic_load_relaxed_32(i32* %p, i32 %off32) #0 {
; CHECK-LABEL: atomic_load_relaxed_32:
- %ptr_unsigned = getelementptr i32* %p, i32 4095
- %val_unsigned = load atomic i32* %ptr_unsigned monotonic, align 4
+ %ptr_unsigned = getelementptr i32, i32* %p, i32 4095
+ %val_unsigned = load atomic i32, i32* %ptr_unsigned monotonic, align 4
; CHECK: ldr {{w[0-9]+}}, [x0, #16380]
- %ptr_regoff = getelementptr i32* %p, i32 %off32
- %val_regoff = load atomic i32* %ptr_regoff unordered, align 4
+ %ptr_regoff = getelementptr i32, i32* %p, i32 %off32
+ %val_regoff = load atomic i32, i32* %ptr_regoff unordered, align 4
%tot1 = add i32 %val_unsigned, %val_regoff
; CHECK: ldr {{w[0-9]+}}, [x0, w1, sxtw #2]
- %ptr_unscaled = getelementptr i32* %p, i32 -64
- %val_unscaled = load atomic i32* %ptr_unscaled monotonic, align 4
+ %ptr_unscaled = getelementptr i32, i32* %p, i32 -64
+ %val_unscaled = load atomic i32, i32* %ptr_unscaled monotonic, align 4
%tot2 = add i32 %tot1, %val_unscaled
; CHECK: ldur {{w[0-9]+}}, [x0, #-256]
- %ptr_random = getelementptr i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm)
- %val_random = load atomic i32* %ptr_random unordered, align 4
+ %ptr_random = getelementptr i32, i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm)
+ %val_random = load atomic i32, i32* %ptr_random unordered, align 4
%tot3 = add i32 %tot2, %val_random
; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
; CHECK: ldr {{w[0-9]+}}, [x[[ADDR]]]
@@ -188,24 +200,24 @@ define i32 @atomic_load_relaxed_32(i32* %p, i32 %off32) {
ret i32 %tot3
}
-define i64 @atomic_load_relaxed_64(i64* %p, i32 %off32) {
+define i64 @atomic_load_relaxed_64(i64* %p, i32 %off32) #0 {
; CHECK-LABEL: atomic_load_relaxed_64:
- %ptr_unsigned = getelementptr i64* %p, i32 4095
- %val_unsigned = load atomic i64* %ptr_unsigned monotonic, align 8
+ %ptr_unsigned = getelementptr i64, i64* %p, i32 4095
+ %val_unsigned = load atomic i64, i64* %ptr_unsigned monotonic, align 8
; CHECK: ldr {{x[0-9]+}}, [x0, #32760]
- %ptr_regoff = getelementptr i64* %p, i32 %off32
- %val_regoff = load atomic i64* %ptr_regoff unordered, align 8
+ %ptr_regoff = getelementptr i64, i64* %p, i32 %off32
+ %val_regoff = load atomic i64, i64* %ptr_regoff unordered, align 8
%tot1 = add i64 %val_unsigned, %val_regoff
; CHECK: ldr {{x[0-9]+}}, [x0, w1, sxtw #3]
- %ptr_unscaled = getelementptr i64* %p, i32 -32
- %val_unscaled = load atomic i64* %ptr_unscaled monotonic, align 8
+ %ptr_unscaled = getelementptr i64, i64* %p, i32 -32
+ %val_unscaled = load atomic i64, i64* %ptr_unscaled monotonic, align 8
%tot2 = add i64 %tot1, %val_unscaled
; CHECK: ldur {{x[0-9]+}}, [x0, #-256]
- %ptr_random = getelementptr i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm)
- %val_random = load atomic i64* %ptr_random unordered, align 8
+ %ptr_random = getelementptr i64, i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm)
+ %val_random = load atomic i64, i64* %ptr_random unordered, align 8
%tot3 = add i64 %tot2, %val_random
; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
; CHECK: ldr {{x[0-9]+}}, [x[[ADDR]]]
@@ -214,28 +226,28 @@ define i64 @atomic_load_relaxed_64(i64* %p, i32 %off32) {
}
-define void @atomc_store(i32* %p) {
+define void @atomc_store(i32* %p) #0 {
store atomic i32 4, i32* %p seq_cst, align 4
ret void
; CHECK-LABEL: atomc_store:
; CHECK: stlr
}
-define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) {
+define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) #0 {
; CHECK-LABEL: atomic_store_relaxed_8:
- %ptr_unsigned = getelementptr i8* %p, i32 4095
+ %ptr_unsigned = getelementptr i8, i8* %p, i32 4095
store atomic i8 %val, i8* %ptr_unsigned monotonic, align 1
; CHECK: strb {{w[0-9]+}}, [x0, #4095]
- %ptr_regoff = getelementptr i8* %p, i32 %off32
+ %ptr_regoff = getelementptr i8, i8* %p, i32 %off32
store atomic i8 %val, i8* %ptr_regoff unordered, align 1
; CHECK: strb {{w[0-9]+}}, [x0, w1, sxtw]
- %ptr_unscaled = getelementptr i8* %p, i32 -256
+ %ptr_unscaled = getelementptr i8, i8* %p, i32 -256
store atomic i8 %val, i8* %ptr_unscaled monotonic, align 1
; CHECK: sturb {{w[0-9]+}}, [x0, #-256]
- %ptr_random = getelementptr i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm)
+ %ptr_random = getelementptr i8, i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm)
store atomic i8 %val, i8* %ptr_random unordered, align 1
; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
; CHECK: strb {{w[0-9]+}}, [x[[ADDR]]]
@@ -243,21 +255,21 @@ define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) {
ret void
}
-define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) {
+define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) #0 {
; CHECK-LABEL: atomic_store_relaxed_16:
- %ptr_unsigned = getelementptr i16* %p, i32 4095
+ %ptr_unsigned = getelementptr i16, i16* %p, i32 4095
store atomic i16 %val, i16* %ptr_unsigned monotonic, align 2
; CHECK: strh {{w[0-9]+}}, [x0, #8190]
- %ptr_regoff = getelementptr i16* %p, i32 %off32
+ %ptr_regoff = getelementptr i16, i16* %p, i32 %off32
store atomic i16 %val, i16* %ptr_regoff unordered, align 2
; CHECK: strh {{w[0-9]+}}, [x0, w1, sxtw #1]
- %ptr_unscaled = getelementptr i16* %p, i32 -128
+ %ptr_unscaled = getelementptr i16, i16* %p, i32 -128
store atomic i16 %val, i16* %ptr_unscaled monotonic, align 2
; CHECK: sturh {{w[0-9]+}}, [x0, #-256]
- %ptr_random = getelementptr i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm)
+ %ptr_random = getelementptr i16, i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm)
store atomic i16 %val, i16* %ptr_random unordered, align 2
; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
; CHECK: strh {{w[0-9]+}}, [x[[ADDR]]]
@@ -265,21 +277,21 @@ define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) {
ret void
}
-define void @atomic_store_relaxed_32(i32* %p, i32 %off32, i32 %val) {
+define void @atomic_store_relaxed_32(i32* %p, i32 %off32, i32 %val) #0 {
; CHECK-LABEL: atomic_store_relaxed_32:
- %ptr_unsigned = getelementptr i32* %p, i32 4095
+ %ptr_unsigned = getelementptr i32, i32* %p, i32 4095
store atomic i32 %val, i32* %ptr_unsigned monotonic, align 4
; CHECK: str {{w[0-9]+}}, [x0, #16380]
- %ptr_regoff = getelementptr i32* %p, i32 %off32
+ %ptr_regoff = getelementptr i32, i32* %p, i32 %off32
store atomic i32 %val, i32* %ptr_regoff unordered, align 4
; CHECK: str {{w[0-9]+}}, [x0, w1, sxtw #2]
- %ptr_unscaled = getelementptr i32* %p, i32 -64
+ %ptr_unscaled = getelementptr i32, i32* %p, i32 -64
store atomic i32 %val, i32* %ptr_unscaled monotonic, align 4
; CHECK: stur {{w[0-9]+}}, [x0, #-256]
- %ptr_random = getelementptr i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm)
+ %ptr_random = getelementptr i32, i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm)
store atomic i32 %val, i32* %ptr_random unordered, align 4
; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
; CHECK: str {{w[0-9]+}}, [x[[ADDR]]]
@@ -287,21 +299,21 @@ define void @atomic_store_relaxed_32(i32* %p, i32 %off32, i32 %val) {
ret void
}
-define void @atomic_store_relaxed_64(i64* %p, i32 %off32, i64 %val) {
+define void @atomic_store_relaxed_64(i64* %p, i32 %off32, i64 %val) #0 {
; CHECK-LABEL: atomic_store_relaxed_64:
- %ptr_unsigned = getelementptr i64* %p, i32 4095
+ %ptr_unsigned = getelementptr i64, i64* %p, i32 4095
store atomic i64 %val, i64* %ptr_unsigned monotonic, align 8
; CHECK: str {{x[0-9]+}}, [x0, #32760]
- %ptr_regoff = getelementptr i64* %p, i32 %off32
+ %ptr_regoff = getelementptr i64, i64* %p, i32 %off32
store atomic i64 %val, i64* %ptr_regoff unordered, align 8
; CHECK: str {{x[0-9]+}}, [x0, w1, sxtw #3]
- %ptr_unscaled = getelementptr i64* %p, i32 -32
+ %ptr_unscaled = getelementptr i64, i64* %p, i32 -32
store atomic i64 %val, i64* %ptr_unscaled monotonic, align 8
; CHECK: stur {{x[0-9]+}}, [x0, #-256]
- %ptr_random = getelementptr i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm)
+ %ptr_random = getelementptr i64, i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm)
store atomic i64 %val, i64* %ptr_random unordered, align 8
; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
; CHECK: str {{x[0-9]+}}, [x[[ADDR]]]
@@ -319,13 +331,13 @@ define void @atomic_store_relaxed_64(i64* %p, i32 %off32, i64 %val) {
define i32 @next_id() nounwind optsize ssp align 2 {
entry:
- %0 = atomicrmw add i32* getelementptr inbounds (%"class.X::Atomic"* @counter, i64 0, i32 0, i32 0), i32 1 seq_cst
+ %0 = atomicrmw add i32* getelementptr inbounds (%"class.X::Atomic", %"class.X::Atomic"* @counter, i64 0, i32 0, i32 0), i32 1 seq_cst
%add.i = add i32 %0, 1
%tobool = icmp eq i32 %add.i, 0
br i1 %tobool, label %if.else, label %return
if.else: ; preds = %entry
- %1 = atomicrmw add i32* getelementptr inbounds (%"class.X::Atomic"* @counter, i64 0, i32 0, i32 0), i32 1 seq_cst
+ %1 = atomicrmw add i32* getelementptr inbounds (%"class.X::Atomic", %"class.X::Atomic"* @counter, i64 0, i32 0, i32 0), i32 1 seq_cst
%add.i2 = add i32 %1, 1
br label %return
@@ -333,3 +345,5 @@ return: ; preds = %if.else, %entry
%retval.0 = phi i32 [ %add.i2, %if.else ], [ %add.i, %entry ]
ret i32 %retval.0
}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/arm64-basic-pic.ll b/test/CodeGen/AArch64/arm64-basic-pic.ll
index 9fdb1e91385b..e11274e45ff1 100644
--- a/test/CodeGen/AArch64/arm64-basic-pic.ll
+++ b/test/CodeGen/AArch64/arm64-basic-pic.ll
@@ -5,7 +5,7 @@
define i32 @get_globalvar() {
; CHECK-LABEL: get_globalvar:
- %val = load i32* @var
+ %val = load i32, i32* @var
; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
; CHECK: ldr x[[GOTLOC:[0-9]+]], [x[[GOTHI]], :got_lo12:var]
; CHECK: ldr w0, [x[[GOTLOC]]]
@@ -16,7 +16,7 @@ define i32 @get_globalvar() {
define i32* @get_globalvaraddr() {
; CHECK-LABEL: get_globalvaraddr:
- %val = load i32* @var
+ %val = load i32, i32* @var
; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
; CHECK: ldr x0, [x[[GOTHI]], :got_lo12:var]
@@ -28,7 +28,7 @@ define i32* @get_globalvaraddr() {
define i32 @get_hiddenvar() {
; CHECK-LABEL: get_hiddenvar:
- %val = load i32* @hiddenvar
+ %val = load i32, i32* @hiddenvar
; CHECK: adrp x[[HI:[0-9]+]], hiddenvar
; CHECK: ldr w0, [x[[HI]], :lo12:hiddenvar]
@@ -38,7 +38,7 @@ define i32 @get_hiddenvar() {
define i32* @get_hiddenvaraddr() {
; CHECK-LABEL: get_hiddenvaraddr:
- %val = load i32* @hiddenvar
+ %val = load i32, i32* @hiddenvar
; CHECK: adrp [[HI:x[0-9]+]], hiddenvar
; CHECK: add x0, [[HI]], :lo12:hiddenvar
diff --git a/test/CodeGen/AArch64/arm64-bcc.ll b/test/CodeGen/AArch64/arm64-bcc.ll
index 138ae9036092..66d2f52ab969 100644
--- a/test/CodeGen/AArch64/arm64-bcc.ll
+++ b/test/CodeGen/AArch64/arm64-bcc.ll
@@ -27,10 +27,10 @@ entry:
define { i64, i1 } @foo(i64* , %Sstruct* , i1, i64) {
entry:
%.sroa.0 = alloca i72, align 16
- %.count.value = getelementptr inbounds %Sstruct* %1, i64 0, i32 0, i32 0
- %4 = load i64* %.count.value, align 8
- %.repeatedValue.value = getelementptr inbounds %Sstruct* %1, i64 0, i32 1, i32 0
- %5 = load i32* %.repeatedValue.value, align 8
+ %.count.value = getelementptr inbounds %Sstruct, %Sstruct* %1, i64 0, i32 0, i32 0
+ %4 = load i64, i64* %.count.value, align 8
+ %.repeatedValue.value = getelementptr inbounds %Sstruct, %Sstruct* %1, i64 0, i32 1, i32 0
+ %5 = load i32, i32* %.repeatedValue.value, align 8
%6 = icmp eq i64 %4, 0
br label %7
diff --git a/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll b/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll
index d2985f4dd66a..876a69193b47 100644
--- a/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll
+++ b/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll
@@ -5,7 +5,7 @@
define void @test_i64_f64(double* %p, i64* %q) {
; CHECK: ldr
; CHECK: str
- %1 = load double* %p
+ %1 = load double, double* %p
%2 = fadd double %1, %1
%3 = bitcast double %2 to i64
%4 = add i64 %3, %3
@@ -17,7 +17,7 @@ define void @test_i64_f64(double* %p, i64* %q) {
define void @test_i64_v1i64(<1 x i64>* %p, i64* %q) {
; CHECK: ldr
; CHECK: str
- %1 = load <1 x i64>* %p
+ %1 = load <1 x i64>, <1 x i64>* %p
%2 = add <1 x i64> %1, %1
%3 = bitcast <1 x i64> %2 to i64
%4 = add i64 %3, %3
@@ -30,7 +30,7 @@ define void @test_i64_v2f32(<2 x float>* %p, i64* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: str
- %1 = load <2 x float>* %p
+ %1 = load <2 x float>, <2 x float>* %p
%2 = fadd <2 x float> %1, %1
%3 = bitcast <2 x float> %2 to i64
%4 = add i64 %3, %3
@@ -43,7 +43,7 @@ define void @test_i64_v2i32(<2 x i32>* %p, i64* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: str
- %1 = load <2 x i32>* %p
+ %1 = load <2 x i32>, <2 x i32>* %p
%2 = add <2 x i32> %1, %1
%3 = bitcast <2 x i32> %2 to i64
%4 = add i64 %3, %3
@@ -56,7 +56,7 @@ define void @test_i64_v4i16(<4 x i16>* %p, i64* %q) {
; CHECK: ld1 { v{{[0-9]+}}.4h }
; CHECK: rev64 v{{[0-9]+}}.4h
; CHECK: str
- %1 = load <4 x i16>* %p
+ %1 = load <4 x i16>, <4 x i16>* %p
%2 = add <4 x i16> %1, %1
%3 = bitcast <4 x i16> %2 to i64
%4 = add i64 %3, %3
@@ -69,7 +69,7 @@ define void @test_i64_v8i8(<8 x i8>* %p, i64* %q) {
; CHECK: ld1 { v{{[0-9]+}}.8b }
; CHECK: rev64 v{{[0-9]+}}.8b
; CHECK: str
- %1 = load <8 x i8>* %p
+ %1 = load <8 x i8>, <8 x i8>* %p
%2 = add <8 x i8> %1, %1
%3 = bitcast <8 x i8> %2 to i64
%4 = add i64 %3, %3
@@ -81,7 +81,7 @@ define void @test_i64_v8i8(<8 x i8>* %p, i64* %q) {
define void @test_f64_i64(i64* %p, double* %q) {
; CHECK: ldr
; CHECK: str
- %1 = load i64* %p
+ %1 = load i64, i64* %p
%2 = add i64 %1, %1
%3 = bitcast i64 %2 to double
%4 = fadd double %3, %3
@@ -93,7 +93,7 @@ define void @test_f64_i64(i64* %p, double* %q) {
define void @test_f64_v1i64(<1 x i64>* %p, double* %q) {
; CHECK: ldr
; CHECK: str
- %1 = load <1 x i64>* %p
+ %1 = load <1 x i64>, <1 x i64>* %p
%2 = add <1 x i64> %1, %1
%3 = bitcast <1 x i64> %2 to double
%4 = fadd double %3, %3
@@ -106,7 +106,7 @@ define void @test_f64_v2f32(<2 x float>* %p, double* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: str
- %1 = load <2 x float>* %p
+ %1 = load <2 x float>, <2 x float>* %p
%2 = fadd <2 x float> %1, %1
%3 = bitcast <2 x float> %2 to double
%4 = fadd double %3, %3
@@ -119,7 +119,7 @@ define void @test_f64_v2i32(<2 x i32>* %p, double* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: str
- %1 = load <2 x i32>* %p
+ %1 = load <2 x i32>, <2 x i32>* %p
%2 = add <2 x i32> %1, %1
%3 = bitcast <2 x i32> %2 to double
%4 = fadd double %3, %3
@@ -132,7 +132,7 @@ define void @test_f64_v4i16(<4 x i16>* %p, double* %q) {
; CHECK: ld1 { v{{[0-9]+}}.4h }
; CHECK: rev64 v{{[0-9]+}}.4h
; CHECK: str
- %1 = load <4 x i16>* %p
+ %1 = load <4 x i16>, <4 x i16>* %p
%2 = add <4 x i16> %1, %1
%3 = bitcast <4 x i16> %2 to double
%4 = fadd double %3, %3
@@ -145,7 +145,7 @@ define void @test_f64_v8i8(<8 x i8>* %p, double* %q) {
; CHECK: ld1 { v{{[0-9]+}}.8b }
; CHECK: rev64 v{{[0-9]+}}.8b
; CHECK: str
- %1 = load <8 x i8>* %p
+ %1 = load <8 x i8>, <8 x i8>* %p
%2 = add <8 x i8> %1, %1
%3 = bitcast <8 x i8> %2 to double
%4 = fadd double %3, %3
@@ -157,7 +157,7 @@ define void @test_f64_v8i8(<8 x i8>* %p, double* %q) {
define void @test_v1i64_i64(i64* %p, <1 x i64>* %q) {
; CHECK: ldr
; CHECK: str
- %1 = load i64* %p
+ %1 = load i64, i64* %p
%2 = add i64 %1, %1
%3 = bitcast i64 %2 to <1 x i64>
%4 = add <1 x i64> %3, %3
@@ -169,7 +169,7 @@ define void @test_v1i64_i64(i64* %p, <1 x i64>* %q) {
define void @test_v1i64_f64(double* %p, <1 x i64>* %q) {
; CHECK: ldr
; CHECK: str
- %1 = load double* %p
+ %1 = load double, double* %p
%2 = fadd double %1, %1
%3 = bitcast double %2 to <1 x i64>
%4 = add <1 x i64> %3, %3
@@ -182,7 +182,7 @@ define void @test_v1i64_v2f32(<2 x float>* %p, <1 x i64>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: str
- %1 = load <2 x float>* %p
+ %1 = load <2 x float>, <2 x float>* %p
%2 = fadd <2 x float> %1, %1
%3 = bitcast <2 x float> %2 to <1 x i64>
%4 = add <1 x i64> %3, %3
@@ -195,7 +195,7 @@ define void @test_v1i64_v2i32(<2 x i32>* %p, <1 x i64>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: str
- %1 = load <2 x i32>* %p
+ %1 = load <2 x i32>, <2 x i32>* %p
%2 = add <2 x i32> %1, %1
%3 = bitcast <2 x i32> %2 to <1 x i64>
%4 = add <1 x i64> %3, %3
@@ -208,7 +208,7 @@ define void @test_v1i64_v4i16(<4 x i16>* %p, <1 x i64>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.4h }
; CHECK: rev64 v{{[0-9]+}}.4h
; CHECK: str
- %1 = load <4 x i16>* %p
+ %1 = load <4 x i16>, <4 x i16>* %p
%2 = add <4 x i16> %1, %1
%3 = bitcast <4 x i16> %2 to <1 x i64>
%4 = add <1 x i64> %3, %3
@@ -221,7 +221,7 @@ define void @test_v1i64_v8i8(<8 x i8>* %p, <1 x i64>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.8b }
; CHECK: rev64 v{{[0-9]+}}.8b
; CHECK: str
- %1 = load <8 x i8>* %p
+ %1 = load <8 x i8>, <8 x i8>* %p
%2 = add <8 x i8> %1, %1
%3 = bitcast <8 x i8> %2 to <1 x i64>
%4 = add <1 x i64> %3, %3
@@ -234,7 +234,7 @@ define void @test_v2f32_i64(i64* %p, <2 x float>* %q) {
; CHECK: ldr
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: st1 { v{{[0-9]+}}.2s }
- %1 = load i64* %p
+ %1 = load i64, i64* %p
%2 = add i64 %1, %1
%3 = bitcast i64 %2 to <2 x float>
%4 = fadd <2 x float> %3, %3
@@ -247,7 +247,7 @@ define void @test_v2f32_f64(double* %p, <2 x float>* %q) {
; CHECK: ldr
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: st1 { v{{[0-9]+}}.2s }
- %1 = load double* %p
+ %1 = load double, double* %p
%2 = fadd double %1, %1
%3 = bitcast double %2 to <2 x float>
%4 = fadd <2 x float> %3, %3
@@ -260,7 +260,7 @@ define void @test_v2f32_v1i64(<1 x i64>* %p, <2 x float>* %q) {
; CHECK: ldr
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: st1 { v{{[0-9]+}}.2s }
- %1 = load <1 x i64>* %p
+ %1 = load <1 x i64>, <1 x i64>* %p
%2 = add <1 x i64> %1, %1
%3 = bitcast <1 x i64> %2 to <2 x float>
%4 = fadd <2 x float> %3, %3
@@ -272,7 +272,7 @@ define void @test_v2f32_v1i64(<1 x i64>* %p, <2 x float>* %q) {
define void @test_v2f32_v2i32(<2 x i32>* %p, <2 x float>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: st1 { v{{[0-9]+}}.2s }
- %1 = load <2 x i32>* %p
+ %1 = load <2 x i32>, <2 x i32>* %p
%2 = add <2 x i32> %1, %1
%3 = bitcast <2 x i32> %2 to <2 x float>
%4 = fadd <2 x float> %3, %3
@@ -285,7 +285,7 @@ define void @test_v2f32_v4i16(<4 x i16>* %p, <2 x float>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.4h }
; CHECK: rev32 v{{[0-9]+}}.4h
; CHECK: st1 { v{{[0-9]+}}.2s }
- %1 = load <4 x i16>* %p
+ %1 = load <4 x i16>, <4 x i16>* %p
%2 = add <4 x i16> %1, %1
%3 = bitcast <4 x i16> %2 to <2 x float>
%4 = fadd <2 x float> %3, %3
@@ -298,7 +298,7 @@ define void @test_v2f32_v8i8(<8 x i8>* %p, <2 x float>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.8b }
; CHECK: rev32 v{{[0-9]+}}.8b
; CHECK: st1 { v{{[0-9]+}}.2s }
- %1 = load <8 x i8>* %p
+ %1 = load <8 x i8>, <8 x i8>* %p
%2 = add <8 x i8> %1, %1
%3 = bitcast <8 x i8> %2 to <2 x float>
%4 = fadd <2 x float> %3, %3
@@ -311,7 +311,7 @@ define void @test_v2i32_i64(i64* %p, <2 x i32>* %q) {
; CHECK: ldr
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: st1 { v{{[0-9]+}}.2s }
- %1 = load i64* %p
+ %1 = load i64, i64* %p
%2 = add i64 %1, %1
%3 = bitcast i64 %2 to <2 x i32>
%4 = add <2 x i32> %3, %3
@@ -324,7 +324,7 @@ define void @test_v2i32_f64(double* %p, <2 x i32>* %q) {
; CHECK: ldr
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: st1 { v{{[0-9]+}}.2s }
- %1 = load double* %p
+ %1 = load double, double* %p
%2 = fadd double %1, %1
%3 = bitcast double %2 to <2 x i32>
%4 = add <2 x i32> %3, %3
@@ -337,7 +337,7 @@ define void @test_v2i32_v1i64(<1 x i64>* %p, <2 x i32>* %q) {
; CHECK: ldr
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: st1 { v{{[0-9]+}}.2s }
- %1 = load <1 x i64>* %p
+ %1 = load <1 x i64>, <1 x i64>* %p
%2 = add <1 x i64> %1, %1
%3 = bitcast <1 x i64> %2 to <2 x i32>
%4 = add <2 x i32> %3, %3
@@ -349,7 +349,7 @@ define void @test_v2i32_v1i64(<1 x i64>* %p, <2 x i32>* %q) {
define void @test_v2i32_v2f32(<2 x float>* %p, <2 x i32>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: st1 { v{{[0-9]+}}.2s }
- %1 = load <2 x float>* %p
+ %1 = load <2 x float>, <2 x float>* %p
%2 = fadd <2 x float> %1, %1
%3 = bitcast <2 x float> %2 to <2 x i32>
%4 = add <2 x i32> %3, %3
@@ -362,7 +362,7 @@ define void @test_v2i32_v4i16(<4 x i16>* %p, <2 x i32>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.4h }
; CHECK: rev32 v{{[0-9]+}}.4h
; CHECK: st1 { v{{[0-9]+}}.2s }
- %1 = load <4 x i16>* %p
+ %1 = load <4 x i16>, <4 x i16>* %p
%2 = add <4 x i16> %1, %1
%3 = bitcast <4 x i16> %2 to <2 x i32>
%4 = add <2 x i32> %3, %3
@@ -375,7 +375,7 @@ define void @test_v2i32_v8i8(<8 x i8>* %p, <2 x i32>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.8b }
; CHECK: rev32 v{{[0-9]+}}.8b
; CHECK: st1 { v{{[0-9]+}}.2s }
- %1 = load <8 x i8>* %p
+ %1 = load <8 x i8>, <8 x i8>* %p
%2 = add <8 x i8> %1, %1
%3 = bitcast <8 x i8> %2 to <2 x i32>
%4 = add <2 x i32> %3, %3
@@ -388,7 +388,7 @@ define void @test_v4i16_i64(i64* %p, <4 x i16>* %q) {
; CHECK: ldr
; CHECK: rev64 v{{[0-9]+}}.4h
; CHECK: st1 { v{{[0-9]+}}.4h }
- %1 = load i64* %p
+ %1 = load i64, i64* %p
%2 = add i64 %1, %1
%3 = bitcast i64 %2 to <4 x i16>
%4 = add <4 x i16> %3, %3
@@ -401,7 +401,7 @@ define void @test_v4i16_f64(double* %p, <4 x i16>* %q) {
; CHECK: ldr
; CHECK: rev64 v{{[0-9]+}}.4h
; CHECK: st1 { v{{[0-9]+}}.4h }
- %1 = load double* %p
+ %1 = load double, double* %p
%2 = fadd double %1, %1
%3 = bitcast double %2 to <4 x i16>
%4 = add <4 x i16> %3, %3
@@ -414,7 +414,7 @@ define void @test_v4i16_v1i64(<1 x i64>* %p, <4 x i16>* %q) {
; CHECK: ldr
; CHECK: rev64 v{{[0-9]+}}.4h
; CHECK: st1 { v{{[0-9]+}}.4h }
- %1 = load <1 x i64>* %p
+ %1 = load <1 x i64>, <1 x i64>* %p
%2 = add <1 x i64> %1, %1
%3 = bitcast <1 x i64> %2 to <4 x i16>
%4 = add <4 x i16> %3, %3
@@ -427,7 +427,7 @@ define void @test_v4i16_v2f32(<2 x float>* %p, <4 x i16>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: rev32 v{{[0-9]+}}.4h
; CHECK: st1 { v{{[0-9]+}}.4h }
- %1 = load <2 x float>* %p
+ %1 = load <2 x float>, <2 x float>* %p
%2 = fadd <2 x float> %1, %1
%3 = bitcast <2 x float> %2 to <4 x i16>
%4 = add <4 x i16> %3, %3
@@ -440,7 +440,7 @@ define void @test_v4i16_v2i32(<2 x i32>* %p, <4 x i16>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: rev32 v{{[0-9]+}}.4h
; CHECK: st1 { v{{[0-9]+}}.4h }
- %1 = load <2 x i32>* %p
+ %1 = load <2 x i32>, <2 x i32>* %p
%2 = add <2 x i32> %1, %1
%3 = bitcast <2 x i32> %2 to <4 x i16>
%4 = add <4 x i16> %3, %3
@@ -453,7 +453,7 @@ define void @test_v4i16_v8i8(<8 x i8>* %p, <4 x i16>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.8b }
; CHECK: rev16 v{{[0-9]+}}.8b
; CHECK: st1 { v{{[0-9]+}}.4h }
- %1 = load <8 x i8>* %p
+ %1 = load <8 x i8>, <8 x i8>* %p
%2 = add <8 x i8> %1, %1
%3 = bitcast <8 x i8> %2 to <4 x i16>
%4 = add <4 x i16> %3, %3
@@ -466,7 +466,7 @@ define void @test_v8i8_i64(i64* %p, <8 x i8>* %q) {
; CHECK: ldr
; CHECK: rev64 v{{[0-9]+}}.8b
; CHECK: st1 { v{{[0-9]+}}.8b }
- %1 = load i64* %p
+ %1 = load i64, i64* %p
%2 = add i64 %1, %1
%3 = bitcast i64 %2 to <8 x i8>
%4 = add <8 x i8> %3, %3
@@ -479,7 +479,7 @@ define void @test_v8i8_f64(double* %p, <8 x i8>* %q) {
; CHECK: ldr
; CHECK: rev64 v{{[0-9]+}}.8b
; CHECK: st1 { v{{[0-9]+}}.8b }
- %1 = load double* %p
+ %1 = load double, double* %p
%2 = fadd double %1, %1
%3 = bitcast double %2 to <8 x i8>
%4 = add <8 x i8> %3, %3
@@ -492,7 +492,7 @@ define void @test_v8i8_v1i64(<1 x i64>* %p, <8 x i8>* %q) {
; CHECK: ldr
; CHECK: rev64 v{{[0-9]+}}.8b
; CHECK: st1 { v{{[0-9]+}}.8b }
- %1 = load <1 x i64>* %p
+ %1 = load <1 x i64>, <1 x i64>* %p
%2 = add <1 x i64> %1, %1
%3 = bitcast <1 x i64> %2 to <8 x i8>
%4 = add <8 x i8> %3, %3
@@ -505,7 +505,7 @@ define void @test_v8i8_v2f32(<2 x float>* %p, <8 x i8>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: rev32 v{{[0-9]+}}.8b
; CHECK: st1 { v{{[0-9]+}}.8b }
- %1 = load <2 x float>* %p
+ %1 = load <2 x float>, <2 x float>* %p
%2 = fadd <2 x float> %1, %1
%3 = bitcast <2 x float> %2 to <8 x i8>
%4 = add <8 x i8> %3, %3
@@ -518,7 +518,7 @@ define void @test_v8i8_v2i32(<2 x i32>* %p, <8 x i8>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2s }
; CHECK: rev32 v{{[0-9]+}}.8b
; CHECK: st1 { v{{[0-9]+}}.8b }
- %1 = load <2 x i32>* %p
+ %1 = load <2 x i32>, <2 x i32>* %p
%2 = add <2 x i32> %1, %1
%3 = bitcast <2 x i32> %2 to <8 x i8>
%4 = add <8 x i8> %3, %3
@@ -531,7 +531,7 @@ define void @test_v8i8_v4i16(<4 x i16>* %p, <8 x i8>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.4h }
; CHECK: rev16 v{{[0-9]+}}.8b
; CHECK: st1 { v{{[0-9]+}}.8b }
- %1 = load <4 x i16>* %p
+ %1 = load <4 x i16>, <4 x i16>* %p
%2 = add <4 x i16> %1, %1
%3 = bitcast <4 x i16> %2 to <8 x i8>
%4 = add <8 x i8> %3, %3
@@ -544,7 +544,7 @@ define void @test_f128_v2f64(<2 x double>* %p, fp128* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: ext
; CHECK: str
- %1 = load <2 x double>* %p
+ %1 = load <2 x double>, <2 x double>* %p
%2 = fadd <2 x double> %1, %1
%3 = bitcast <2 x double> %2 to fp128
%4 = fadd fp128 %3, %3
@@ -557,7 +557,7 @@ define void @test_f128_v2i64(<2 x i64>* %p, fp128* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: ext
; CHECK: str
- %1 = load <2 x i64>* %p
+ %1 = load <2 x i64>, <2 x i64>* %p
%2 = add <2 x i64> %1, %1
%3 = bitcast <2 x i64> %2 to fp128
%4 = fadd fp128 %3, %3
@@ -572,7 +572,7 @@ define void @test_f128_v4f32(<4 x float>* %p, fp128* %q) {
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
; CHECK: str q
- %1 = load <4 x float>* %p
+ %1 = load <4 x float>, <4 x float>* %p
%2 = fadd <4 x float> %1, %1
%3 = bitcast <4 x float> %2 to fp128
%4 = fadd fp128 %3, %3
@@ -586,7 +586,7 @@ define void @test_f128_v4i32(<4 x i32>* %p, fp128* %q) {
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
; CHECK: str
- %1 = load <4 x i32>* %p
+ %1 = load <4 x i32>, <4 x i32>* %p
%2 = add <4 x i32> %1, %1
%3 = bitcast <4 x i32> %2 to fp128
%4 = fadd fp128 %3, %3
@@ -600,7 +600,7 @@ define void @test_f128_v8i16(<8 x i16>* %p, fp128* %q) {
; CHECK: rev64 v{{[0-9]+}}.8h
; CHECK: ext
; CHECK: str
- %1 = load <8 x i16>* %p
+ %1 = load <8 x i16>, <8 x i16>* %p
%2 = add <8 x i16> %1, %1
%3 = bitcast <8 x i16> %2 to fp128
%4 = fadd fp128 %3, %3
@@ -613,7 +613,7 @@ define void @test_f128_v16i8(<16 x i8>* %p, fp128* %q) {
; CHECK: ld1 { v{{[0-9]+}}.16b }
; CHECK: ext
; CHECK: str q
- %1 = load <16 x i8>* %p
+ %1 = load <16 x i8>, <16 x i8>* %p
%2 = add <16 x i8> %1, %1
%3 = bitcast <16 x i8> %2 to fp128
%4 = fadd fp128 %3, %3
@@ -626,7 +626,7 @@ define void @test_v2f64_f128(fp128* %p, <2 x double>* %q) {
; CHECK: ldr
; CHECK: ext
; CHECK: st1 { v{{[0-9]+}}.2d }
- %1 = load fp128* %p
+ %1 = load fp128, fp128* %p
%2 = fadd fp128 %1, %1
%3 = bitcast fp128 %2 to <2 x double>
%4 = fadd <2 x double> %3, %3
@@ -638,7 +638,7 @@ define void @test_v2f64_f128(fp128* %p, <2 x double>* %q) {
define void @test_v2f64_v2i64(<2 x i64>* %p, <2 x double>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: st1 { v{{[0-9]+}}.2d }
- %1 = load <2 x i64>* %p
+ %1 = load <2 x i64>, <2 x i64>* %p
%2 = add <2 x i64> %1, %1
%3 = bitcast <2 x i64> %2 to <2 x double>
%4 = fadd <2 x double> %3, %3
@@ -652,7 +652,7 @@ define void @test_v2f64_v4f32(<4 x float>* %p, <2 x double>* %q) {
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: st1 { v{{[0-9]+}}.2d }
- %1 = load <4 x float>* %p
+ %1 = load <4 x float>, <4 x float>* %p
%2 = fadd <4 x float> %1, %1
%3 = bitcast <4 x float> %2 to <2 x double>
%4 = fadd <2 x double> %3, %3
@@ -665,7 +665,7 @@ define void @test_v2f64_v4i32(<4 x i32>* %p, <2 x double>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.4s }
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: st1 { v{{[0-9]+}}.2d }
- %1 = load <4 x i32>* %p
+ %1 = load <4 x i32>, <4 x i32>* %p
%2 = add <4 x i32> %1, %1
%3 = bitcast <4 x i32> %2 to <2 x double>
%4 = fadd <2 x double> %3, %3
@@ -678,7 +678,7 @@ define void @test_v2f64_v8i16(<8 x i16>* %p, <2 x double>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.8h }
; CHECK: rev64 v{{[0-9]+}}.8h
; CHECK: st1 { v{{[0-9]+}}.2d }
- %1 = load <8 x i16>* %p
+ %1 = load <8 x i16>, <8 x i16>* %p
%2 = add <8 x i16> %1, %1
%3 = bitcast <8 x i16> %2 to <2 x double>
%4 = fadd <2 x double> %3, %3
@@ -691,7 +691,7 @@ define void @test_v2f64_v16i8(<16 x i8>* %p, <2 x double>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.16b }
; CHECK: rev64 v{{[0-9]+}}.16b
; CHECK: st1 { v{{[0-9]+}}.2d }
- %1 = load <16 x i8>* %p
+ %1 = load <16 x i8>, <16 x i8>* %p
%2 = add <16 x i8> %1, %1
%3 = bitcast <16 x i8> %2 to <2 x double>
%4 = fadd <2 x double> %3, %3
@@ -704,7 +704,7 @@ define void @test_v2i64_f128(fp128* %p, <2 x i64>* %q) {
; CHECK: ldr
; CHECK: ext
; CHECK: st1 { v{{[0-9]+}}.2d }
- %1 = load fp128* %p
+ %1 = load fp128, fp128* %p
%2 = fadd fp128 %1, %1
%3 = bitcast fp128 %2 to <2 x i64>
%4 = add <2 x i64> %3, %3
@@ -716,7 +716,7 @@ define void @test_v2i64_f128(fp128* %p, <2 x i64>* %q) {
define void @test_v2i64_v2f64(<2 x double>* %p, <2 x i64>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: st1 { v{{[0-9]+}}.2d }
- %1 = load <2 x double>* %p
+ %1 = load <2 x double>, <2 x double>* %p
%2 = fadd <2 x double> %1, %1
%3 = bitcast <2 x double> %2 to <2 x i64>
%4 = add <2 x i64> %3, %3
@@ -730,7 +730,7 @@ define void @test_v2i64_v4f32(<4 x float>* %p, <2 x i64>* %q) {
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: st1 { v{{[0-9]+}}.2d }
- %1 = load <4 x float>* %p
+ %1 = load <4 x float>, <4 x float>* %p
%2 = fadd <4 x float> %1, %1
%3 = bitcast <4 x float> %2 to <2 x i64>
%4 = add <2 x i64> %3, %3
@@ -743,7 +743,7 @@ define void @test_v2i64_v4i32(<4 x i32>* %p, <2 x i64>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.4s }
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: st1 { v{{[0-9]+}}.2d }
- %1 = load <4 x i32>* %p
+ %1 = load <4 x i32>, <4 x i32>* %p
%2 = add <4 x i32> %1, %1
%3 = bitcast <4 x i32> %2 to <2 x i64>
%4 = add <2 x i64> %3, %3
@@ -756,7 +756,7 @@ define void @test_v2i64_v8i16(<8 x i16>* %p, <2 x i64>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.8h }
; CHECK: rev64 v{{[0-9]+}}.8h
; CHECK: st1 { v{{[0-9]+}}.2d }
- %1 = load <8 x i16>* %p
+ %1 = load <8 x i16>, <8 x i16>* %p
%2 = add <8 x i16> %1, %1
%3 = bitcast <8 x i16> %2 to <2 x i64>
%4 = add <2 x i64> %3, %3
@@ -769,7 +769,7 @@ define void @test_v2i64_v16i8(<16 x i8>* %p, <2 x i64>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.16b }
; CHECK: rev64 v{{[0-9]+}}.16b
; CHECK: st1 { v{{[0-9]+}}.2d }
- %1 = load <16 x i8>* %p
+ %1 = load <16 x i8>, <16 x i8>* %p
%2 = add <16 x i8> %1, %1
%3 = bitcast <16 x i8> %2 to <2 x i64>
%4 = add <2 x i64> %3, %3
@@ -784,7 +784,7 @@ define void @test_v4f32_f128(fp128* %p, <4 x float>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: st1 { v{{[0-9]+}}.2d }
- %1 = load fp128* %p
+ %1 = load fp128, fp128* %p
%2 = fadd fp128 %1, %1
%3 = bitcast fp128 %2 to <4 x float>
%4 = fadd <4 x float> %3, %3
@@ -798,7 +798,7 @@ define void @test_v4f32_v2f64(<2 x double>* %p, <4 x float>* %q) {
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: st1 { v{{[0-9]+}}.2d }
- %1 = load <2 x double>* %p
+ %1 = load <2 x double>, <2 x double>* %p
%2 = fadd <2 x double> %1, %1
%3 = bitcast <2 x double> %2 to <4 x float>
%4 = fadd <4 x float> %3, %3
@@ -812,7 +812,7 @@ define void @test_v4f32_v2i64(<2 x i64>* %p, <4 x float>* %q) {
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: st1 { v{{[0-9]+}}.2d }
- %1 = load <2 x i64>* %p
+ %1 = load <2 x i64>, <2 x i64>* %p
%2 = add <2 x i64> %1, %1
%3 = bitcast <2 x i64> %2 to <4 x float>
%4 = fadd <4 x float> %3, %3
@@ -825,7 +825,7 @@ define void @test_v4f32_v4i32(<4 x i32>* %p, <4 x float>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.4s }
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: st1 { v{{[0-9]+}}.2d }
- %1 = load <4 x i32>* %p
+ %1 = load <4 x i32>, <4 x i32>* %p
%2 = add <4 x i32> %1, %1
%3 = bitcast <4 x i32> %2 to <4 x float>
%4 = fadd <4 x float> %3, %3
@@ -839,7 +839,7 @@ define void @test_v4f32_v8i16(<8 x i16>* %p, <4 x float>* %q) {
; CHECK: rev32 v{{[0-9]+}}.8h
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: st1 { v{{[0-9]+}}.2d }
- %1 = load <8 x i16>* %p
+ %1 = load <8 x i16>, <8 x i16>* %p
%2 = add <8 x i16> %1, %1
%3 = bitcast <8 x i16> %2 to <4 x float>
%4 = fadd <4 x float> %3, %3
@@ -853,7 +853,7 @@ define void @test_v4f32_v16i8(<16 x i8>* %p, <4 x float>* %q) {
; CHECK: rev32 v{{[0-9]+}}.16b
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: st1 { v{{[0-9]+}}.2d }
- %1 = load <16 x i8>* %p
+ %1 = load <16 x i8>, <16 x i8>* %p
%2 = add <16 x i8> %1, %1
%3 = bitcast <16 x i8> %2 to <4 x float>
%4 = fadd <4 x float> %3, %3
@@ -867,7 +867,7 @@ define void @test_v4i32_f128(fp128* %p, <4 x i32>* %q) {
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
; CHECK: st1 { v{{[0-9]+}}.4s }
- %1 = load fp128* %p
+ %1 = load fp128, fp128* %p
%2 = fadd fp128 %1, %1
%3 = bitcast fp128 %2 to <4 x i32>
%4 = add <4 x i32> %3, %3
@@ -880,7 +880,7 @@ define void @test_v4i32_v2f64(<2 x double>* %p, <4 x i32>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: st1 { v{{[0-9]+}}.4s }
- %1 = load <2 x double>* %p
+ %1 = load <2 x double>, <2 x double>* %p
%2 = fadd <2 x double> %1, %1
%3 = bitcast <2 x double> %2 to <4 x i32>
%4 = add <4 x i32> %3, %3
@@ -893,7 +893,7 @@ define void @test_v4i32_v2i64(<2 x i64>* %p, <4 x i32>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: st1 { v{{[0-9]+}}.4s }
- %1 = load <2 x i64>* %p
+ %1 = load <2 x i64>, <2 x i64>* %p
%2 = add <2 x i64> %1, %1
%3 = bitcast <2 x i64> %2 to <4 x i32>
%4 = add <4 x i32> %3, %3
@@ -906,7 +906,7 @@ define void @test_v4i32_v4f32(<4 x float>* %p, <4 x i32>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: st1 { v{{[0-9]+}}.4s }
- %1 = load <4 x float>* %p
+ %1 = load <4 x float>, <4 x float>* %p
%2 = fadd <4 x float> %1, %1
%3 = bitcast <4 x float> %2 to <4 x i32>
%4 = add <4 x i32> %3, %3
@@ -919,7 +919,7 @@ define void @test_v4i32_v8i16(<8 x i16>* %p, <4 x i32>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.8h }
; CHECK: rev32 v{{[0-9]+}}.8h
; CHECK: st1 { v{{[0-9]+}}.4s }
- %1 = load <8 x i16>* %p
+ %1 = load <8 x i16>, <8 x i16>* %p
%2 = add <8 x i16> %1, %1
%3 = bitcast <8 x i16> %2 to <4 x i32>
%4 = add <4 x i32> %3, %3
@@ -932,7 +932,7 @@ define void @test_v4i32_v16i8(<16 x i8>* %p, <4 x i32>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.16b }
; CHECK: rev32 v{{[0-9]+}}.16b
; CHECK: st1 { v{{[0-9]+}}.4s }
- %1 = load <16 x i8>* %p
+ %1 = load <16 x i8>, <16 x i8>* %p
%2 = add <16 x i8> %1, %1
%3 = bitcast <16 x i8> %2 to <4 x i32>
%4 = add <4 x i32> %3, %3
@@ -946,7 +946,7 @@ define void @test_v8i16_f128(fp128* %p, <8 x i16>* %q) {
; CHECK: rev64 v{{[0-9]+}}.8h
; CHECK: ext
; CHECK: st1 { v{{[0-9]+}}.8h }
- %1 = load fp128* %p
+ %1 = load fp128, fp128* %p
%2 = fadd fp128 %1, %1
%3 = bitcast fp128 %2 to <8 x i16>
%4 = add <8 x i16> %3, %3
@@ -959,7 +959,7 @@ define void @test_v8i16_v2f64(<2 x double>* %p, <8 x i16>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: rev64 v{{[0-9]+}}.8h
; CHECK: st1 { v{{[0-9]+}}.8h }
- %1 = load <2 x double>* %p
+ %1 = load <2 x double>, <2 x double>* %p
%2 = fadd <2 x double> %1, %1
%3 = bitcast <2 x double> %2 to <8 x i16>
%4 = add <8 x i16> %3, %3
@@ -972,7 +972,7 @@ define void @test_v8i16_v2i64(<2 x i64>* %p, <8 x i16>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: rev64 v{{[0-9]+}}.8h
; CHECK: st1 { v{{[0-9]+}}.8h }
- %1 = load <2 x i64>* %p
+ %1 = load <2 x i64>, <2 x i64>* %p
%2 = add <2 x i64> %1, %1
%3 = bitcast <2 x i64> %2 to <8 x i16>
%4 = add <8 x i16> %3, %3
@@ -986,7 +986,7 @@ define void @test_v8i16_v4f32(<4 x float>* %p, <8 x i16>* %q) {
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: rev32 v{{[0-9]+}}.8h
; CHECK: st1 { v{{[0-9]+}}.8h }
- %1 = load <4 x float>* %p
+ %1 = load <4 x float>, <4 x float>* %p
%2 = fadd <4 x float> %1, %1
%3 = bitcast <4 x float> %2 to <8 x i16>
%4 = add <8 x i16> %3, %3
@@ -999,7 +999,7 @@ define void @test_v8i16_v4i32(<4 x i32>* %p, <8 x i16>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.4s }
; CHECK: rev32 v{{[0-9]+}}.8h
; CHECK: st1 { v{{[0-9]+}}.8h }
- %1 = load <4 x i32>* %p
+ %1 = load <4 x i32>, <4 x i32>* %p
%2 = add <4 x i32> %1, %1
%3 = bitcast <4 x i32> %2 to <8 x i16>
%4 = add <8 x i16> %3, %3
@@ -1012,7 +1012,7 @@ define void @test_v8i16_v16i8(<16 x i8>* %p, <8 x i16>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.16b }
; CHECK: rev16 v{{[0-9]+}}.16b
; CHECK: st1 { v{{[0-9]+}}.8h }
- %1 = load <16 x i8>* %p
+ %1 = load <16 x i8>, <16 x i8>* %p
%2 = add <16 x i8> %1, %1
%3 = bitcast <16 x i8> %2 to <8 x i16>
%4 = add <8 x i16> %3, %3
@@ -1026,7 +1026,7 @@ define void @test_v16i8_f128(fp128* %p, <16 x i8>* %q) {
; CHECK: rev64 v{{[0-9]+}}.16b
; CHECK: ext
; CHECK: st1 { v{{[0-9]+}}.16b }
- %1 = load fp128* %p
+ %1 = load fp128, fp128* %p
%2 = fadd fp128 %1, %1
%3 = bitcast fp128 %2 to <16 x i8>
%4 = add <16 x i8> %3, %3
@@ -1039,7 +1039,7 @@ define void @test_v16i8_v2f64(<2 x double>* %p, <16 x i8>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: rev64 v{{[0-9]+}}.16b
; CHECK: st1 { v{{[0-9]+}}.16b }
- %1 = load <2 x double>* %p
+ %1 = load <2 x double>, <2 x double>* %p
%2 = fadd <2 x double> %1, %1
%3 = bitcast <2 x double> %2 to <16 x i8>
%4 = add <16 x i8> %3, %3
@@ -1052,7 +1052,7 @@ define void @test_v16i8_v2i64(<2 x i64>* %p, <16 x i8>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.2d }
; CHECK: rev64 v{{[0-9]+}}.16b
; CHECK: st1 { v{{[0-9]+}}.16b }
- %1 = load <2 x i64>* %p
+ %1 = load <2 x i64>, <2 x i64>* %p
%2 = add <2 x i64> %1, %1
%3 = bitcast <2 x i64> %2 to <16 x i8>
%4 = add <16 x i8> %3, %3
@@ -1066,7 +1066,7 @@ define void @test_v16i8_v4f32(<4 x float>* %p, <16 x i8>* %q) {
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: rev32 v{{[0-9]+}}.16b
; CHECK: st1 { v{{[0-9]+}}.16b }
- %1 = load <4 x float>* %p
+ %1 = load <4 x float>, <4 x float>* %p
%2 = fadd <4 x float> %1, %1
%3 = bitcast <4 x float> %2 to <16 x i8>
%4 = add <16 x i8> %3, %3
@@ -1079,7 +1079,7 @@ define void @test_v16i8_v4i32(<4 x i32>* %p, <16 x i8>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.4s }
; CHECK: rev32 v{{[0-9]+}}.16b
; CHECK: st1 { v{{[0-9]+}}.16b }
- %1 = load <4 x i32>* %p
+ %1 = load <4 x i32>, <4 x i32>* %p
%2 = add <4 x i32> %1, %1
%3 = bitcast <4 x i32> %2 to <16 x i8>
%4 = add <16 x i8> %3, %3
@@ -1092,7 +1092,7 @@ define void @test_v16i8_v8i16(<8 x i16>* %p, <16 x i8>* %q) {
; CHECK: ld1 { v{{[0-9]+}}.8h }
; CHECK: rev16 v{{[0-9]+}}.16b
; CHECK: st1 { v{{[0-9]+}}.16b }
- %1 = load <8 x i16>* %p
+ %1 = load <8 x i16>, <8 x i16>* %p
%2 = add <8 x i16> %1, %1
%3 = bitcast <8 x i16> %2 to <16 x i8>
%4 = add <16 x i8> %3, %3
diff --git a/test/CodeGen/AArch64/arm64-big-endian-varargs.ll b/test/CodeGen/AArch64/arm64-big-endian-varargs.ll
index db1f48c6fd5e..e5e16848a4b0 100644
--- a/test/CodeGen/AArch64/arm64-big-endian-varargs.ll
+++ b/test/CodeGen/AArch64/arm64-big-endian-varargs.ll
@@ -21,8 +21,8 @@ entry:
%vl = alloca %struct.__va_list, align 8
%vl1 = bitcast %struct.__va_list* %vl to i8*
call void @llvm.va_start(i8* %vl1)
- %vr_offs_p = getelementptr inbounds %struct.__va_list* %vl, i64 0, i32 4
- %vr_offs = load i32* %vr_offs_p, align 4
+ %vr_offs_p = getelementptr inbounds %struct.__va_list, %struct.__va_list* %vl, i64 0, i32 4
+ %vr_offs = load i32, i32* %vr_offs_p, align 4
%0 = icmp sgt i32 %vr_offs, -1
br i1 %0, label %vaarg.on_stack, label %vaarg.maybe_reg
@@ -33,26 +33,26 @@ vaarg.maybe_reg: ; preds = %entry
br i1 %inreg, label %vaarg.in_reg, label %vaarg.on_stack
vaarg.in_reg: ; preds = %vaarg.maybe_reg
- %reg_top_p = getelementptr inbounds %struct.__va_list* %vl, i64 0, i32 2
- %reg_top = load i8** %reg_top_p, align 8
+ %reg_top_p = getelementptr inbounds %struct.__va_list, %struct.__va_list* %vl, i64 0, i32 2
+ %reg_top = load i8*, i8** %reg_top_p, align 8
%1 = sext i32 %vr_offs to i64
- %2 = getelementptr i8* %reg_top, i64 %1
+ %2 = getelementptr i8, i8* %reg_top, i64 %1
%3 = ptrtoint i8* %2 to i64
%align_be = add i64 %3, 8
%4 = inttoptr i64 %align_be to i8*
br label %vaarg.end
vaarg.on_stack: ; preds = %vaarg.maybe_reg, %entry
- %stack_p = getelementptr inbounds %struct.__va_list* %vl, i64 0, i32 0
- %stack = load i8** %stack_p, align 8
- %new_stack = getelementptr i8* %stack, i64 8
+ %stack_p = getelementptr inbounds %struct.__va_list, %struct.__va_list* %vl, i64 0, i32 0
+ %stack = load i8*, i8** %stack_p, align 8
+ %new_stack = getelementptr i8, i8* %stack, i64 8
store i8* %new_stack, i8** %stack_p, align 8
br label %vaarg.end
vaarg.end: ; preds = %vaarg.on_stack, %vaarg.in_reg
%.sink = phi i8* [ %4, %vaarg.in_reg ], [ %stack, %vaarg.on_stack ]
%5 = bitcast i8* %.sink to double*
- %6 = load double* %5, align 8
+ %6 = load double, double* %5, align 8
call void @llvm.va_end(i8* %vl1)
ret double %6
}
diff --git a/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll b/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll
index d72d0a5db41e..d08976788e91 100644
--- a/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll
+++ b/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll
@@ -1,13 +1,19 @@
; RUN: llc -mtriple aarch64_be < %s -aarch64-load-store-opt=false -o - | FileCheck %s
; RUN: llc -mtriple aarch64_be < %s -aarch64-load-store-opt=false -fast-isel=true -O0 -o - | FileCheck %s
+; Note, we split the functions in to multiple BBs below to isolate the call
+; instruction we want to test, from fast-isel failing to select instructions
+; after it.
+
; CHECK-LABEL: test_i64_f64:
declare i64 @test_i64_f64_helper(double %p)
define void @test_i64_f64(double* %p, i64* %q) {
; CHECK-NOT: rev
- %1 = load double* %p
+ %1 = load double, double* %p
%2 = fadd double %1, %1
%3 = call i64 @test_i64_f64_helper(double %2)
+ br label %return_bb
+return_bb:
%4 = add i64 %3, %3
store i64 %4, i64* %q
ret void
@@ -17,9 +23,11 @@ define void @test_i64_f64(double* %p, i64* %q) {
declare i64 @test_i64_v1i64_helper(<1 x i64> %p)
define void @test_i64_v1i64(<1 x i64>* %p, i64* %q) {
; CHECK-NOT: rev
- %1 = load <1 x i64>* %p
+ %1 = load <1 x i64>, <1 x i64>* %p
%2 = add <1 x i64> %1, %1
%3 = call i64 @test_i64_v1i64_helper(<1 x i64> %2)
+ br label %return_bb
+return_bb:
%4 = add i64 %3, %3
store i64 %4, i64* %q
ret void
@@ -29,9 +37,11 @@ define void @test_i64_v1i64(<1 x i64>* %p, i64* %q) {
declare i64 @test_i64_v2f32_helper(<2 x float> %p)
define void @test_i64_v2f32(<2 x float>* %p, i64* %q) {
; CHECK: rev64 v{{[0-9]+}}.2s
- %1 = load <2 x float>* %p
+ %1 = load <2 x float>, <2 x float>* %p
%2 = fadd <2 x float> %1, %1
%3 = call i64 @test_i64_v2f32_helper(<2 x float> %2)
+ br label %return_bb
+return_bb:
%4 = add i64 %3, %3
store i64 %4, i64* %q
ret void
@@ -41,9 +51,11 @@ define void @test_i64_v2f32(<2 x float>* %p, i64* %q) {
declare i64 @test_i64_v2i32_helper(<2 x i32> %p)
define void @test_i64_v2i32(<2 x i32>* %p, i64* %q) {
; CHECK: rev64 v{{[0-9]+}}.2s
- %1 = load <2 x i32>* %p
+ %1 = load <2 x i32>, <2 x i32>* %p
%2 = add <2 x i32> %1, %1
%3 = call i64 @test_i64_v2i32_helper(<2 x i32> %2)
+ br label %return_bb
+return_bb:
%4 = add i64 %3, %3
store i64 %4, i64* %q
ret void
@@ -53,9 +65,11 @@ define void @test_i64_v2i32(<2 x i32>* %p, i64* %q) {
declare i64 @test_i64_v4i16_helper(<4 x i16> %p)
define void @test_i64_v4i16(<4 x i16>* %p, i64* %q) {
; CHECK: rev64 v{{[0-9]+}}.4h
- %1 = load <4 x i16>* %p
+ %1 = load <4 x i16>, <4 x i16>* %p
%2 = add <4 x i16> %1, %1
%3 = call i64 @test_i64_v4i16_helper(<4 x i16> %2)
+ br label %return_bb
+return_bb:
%4 = add i64 %3, %3
store i64 %4, i64* %q
ret void
@@ -65,9 +79,11 @@ define void @test_i64_v4i16(<4 x i16>* %p, i64* %q) {
declare i64 @test_i64_v8i8_helper(<8 x i8> %p)
define void @test_i64_v8i8(<8 x i8>* %p, i64* %q) {
; CHECK: rev64 v{{[0-9]+}}.8b
- %1 = load <8 x i8>* %p
+ %1 = load <8 x i8>, <8 x i8>* %p
%2 = add <8 x i8> %1, %1
%3 = call i64 @test_i64_v8i8_helper(<8 x i8> %2)
+ br label %return_bb
+return_bb:
%4 = add i64 %3, %3
store i64 %4, i64* %q
ret void
@@ -77,9 +93,11 @@ define void @test_i64_v8i8(<8 x i8>* %p, i64* %q) {
declare double @test_f64_i64_helper(i64 %p)
define void @test_f64_i64(i64* %p, double* %q) {
; CHECK-NOT: rev
- %1 = load i64* %p
+ %1 = load i64, i64* %p
%2 = add i64 %1, %1
%3 = call double @test_f64_i64_helper(i64 %2)
+ br label %return_bb
+return_bb:
%4 = fadd double %3, %3
store double %4, double* %q
ret void
@@ -89,9 +107,11 @@ define void @test_f64_i64(i64* %p, double* %q) {
declare double @test_f64_v1i64_helper(<1 x i64> %p)
define void @test_f64_v1i64(<1 x i64>* %p, double* %q) {
; CHECK-NOT: rev
- %1 = load <1 x i64>* %p
+ %1 = load <1 x i64>, <1 x i64>* %p
%2 = add <1 x i64> %1, %1
%3 = call double @test_f64_v1i64_helper(<1 x i64> %2)
+ br label %return_bb
+return_bb:
%4 = fadd double %3, %3
store double %4, double* %q
ret void
@@ -101,9 +121,11 @@ define void @test_f64_v1i64(<1 x i64>* %p, double* %q) {
declare double @test_f64_v2f32_helper(<2 x float> %p)
define void @test_f64_v2f32(<2 x float>* %p, double* %q) {
; CHECK: rev64 v{{[0-9]+}}.2s
- %1 = load <2 x float>* %p
+ %1 = load <2 x float>, <2 x float>* %p
%2 = fadd <2 x float> %1, %1
%3 = call double @test_f64_v2f32_helper(<2 x float> %2)
+ br label %return_bb
+return_bb:
%4 = fadd double %3, %3
store double %4, double* %q
ret void
@@ -113,9 +135,11 @@ define void @test_f64_v2f32(<2 x float>* %p, double* %q) {
declare double @test_f64_v2i32_helper(<2 x i32> %p)
define void @test_f64_v2i32(<2 x i32>* %p, double* %q) {
; CHECK: rev64 v{{[0-9]+}}.2s
- %1 = load <2 x i32>* %p
+ %1 = load <2 x i32>, <2 x i32>* %p
%2 = add <2 x i32> %1, %1
%3 = call double @test_f64_v2i32_helper(<2 x i32> %2)
+ br label %return_bb
+return_bb:
%4 = fadd double %3, %3
store double %4, double* %q
ret void
@@ -125,9 +149,11 @@ define void @test_f64_v2i32(<2 x i32>* %p, double* %q) {
declare double @test_f64_v4i16_helper(<4 x i16> %p)
define void @test_f64_v4i16(<4 x i16>* %p, double* %q) {
; CHECK: rev64 v{{[0-9]+}}.4h
- %1 = load <4 x i16>* %p
+ %1 = load <4 x i16>, <4 x i16>* %p
%2 = add <4 x i16> %1, %1
%3 = call double @test_f64_v4i16_helper(<4 x i16> %2)
+ br label %return_bb
+return_bb:
%4 = fadd double %3, %3
store double %4, double* %q
ret void
@@ -137,9 +163,11 @@ define void @test_f64_v4i16(<4 x i16>* %p, double* %q) {
declare double @test_f64_v8i8_helper(<8 x i8> %p)
define void @test_f64_v8i8(<8 x i8>* %p, double* %q) {
; CHECK: rev64 v{{[0-9]+}}.8b
- %1 = load <8 x i8>* %p
+ %1 = load <8 x i8>, <8 x i8>* %p
%2 = add <8 x i8> %1, %1
%3 = call double @test_f64_v8i8_helper(<8 x i8> %2)
+ br label %return_bb
+return_bb:
%4 = fadd double %3, %3
store double %4, double* %q
ret void
@@ -149,9 +177,11 @@ define void @test_f64_v8i8(<8 x i8>* %p, double* %q) {
declare <1 x i64> @test_v1i64_i64_helper(i64 %p)
define void @test_v1i64_i64(i64* %p, <1 x i64>* %q) {
; CHECK-NOT: rev
- %1 = load i64* %p
+ %1 = load i64, i64* %p
%2 = add i64 %1, %1
%3 = call <1 x i64> @test_v1i64_i64_helper(i64 %2)
+ br label %return_bb
+return_bb:
%4 = add <1 x i64> %3, %3
store <1 x i64> %4, <1 x i64>* %q
ret void
@@ -161,9 +191,11 @@ define void @test_v1i64_i64(i64* %p, <1 x i64>* %q) {
declare <1 x i64> @test_v1i64_f64_helper(double %p)
define void @test_v1i64_f64(double* %p, <1 x i64>* %q) {
; CHECK-NOT: rev
- %1 = load double* %p
+ %1 = load double, double* %p
%2 = fadd double %1, %1
%3 = call <1 x i64> @test_v1i64_f64_helper(double %2)
+ br label %return_bb
+return_bb:
%4 = add <1 x i64> %3, %3
store <1 x i64> %4, <1 x i64>* %q
ret void
@@ -173,9 +205,11 @@ define void @test_v1i64_f64(double* %p, <1 x i64>* %q) {
declare <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %p)
define void @test_v1i64_v2f32(<2 x float>* %p, <1 x i64>* %q) {
; CHECK: rev64 v{{[0-9]+}}.2s
- %1 = load <2 x float>* %p
+ %1 = load <2 x float>, <2 x float>* %p
%2 = fadd <2 x float> %1, %1
%3 = call <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %2)
+ br label %return_bb
+return_bb:
%4 = add <1 x i64> %3, %3
store <1 x i64> %4, <1 x i64>* %q
ret void
@@ -185,9 +219,11 @@ define void @test_v1i64_v2f32(<2 x float>* %p, <1 x i64>* %q) {
declare <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %p)
define void @test_v1i64_v2i32(<2 x i32>* %p, <1 x i64>* %q) {
; CHECK: rev64 v{{[0-9]+}}.2s
- %1 = load <2 x i32>* %p
+ %1 = load <2 x i32>, <2 x i32>* %p
%2 = add <2 x i32> %1, %1
%3 = call <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %2)
+ br label %return_bb
+return_bb:
%4 = add <1 x i64> %3, %3
store <1 x i64> %4, <1 x i64>* %q
ret void
@@ -197,9 +233,11 @@ define void @test_v1i64_v2i32(<2 x i32>* %p, <1 x i64>* %q) {
declare <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %p)
define void @test_v1i64_v4i16(<4 x i16>* %p, <1 x i64>* %q) {
; CHECK: rev64 v{{[0-9]+}}.4h
- %1 = load <4 x i16>* %p
+ %1 = load <4 x i16>, <4 x i16>* %p
%2 = add <4 x i16> %1, %1
%3 = call <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %2)
+ br label %return_bb
+return_bb:
%4 = add <1 x i64> %3, %3
store <1 x i64> %4, <1 x i64>* %q
ret void
@@ -209,9 +247,11 @@ define void @test_v1i64_v4i16(<4 x i16>* %p, <1 x i64>* %q) {
declare <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %p)
define void @test_v1i64_v8i8(<8 x i8>* %p, <1 x i64>* %q) {
; CHECK: rev64 v{{[0-9]+}}.8b
- %1 = load <8 x i8>* %p
+ %1 = load <8 x i8>, <8 x i8>* %p
%2 = add <8 x i8> %1, %1
%3 = call <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %2)
+ br label %return_bb
+return_bb:
%4 = add <1 x i64> %3, %3
store <1 x i64> %4, <1 x i64>* %q
ret void
@@ -221,9 +261,11 @@ define void @test_v1i64_v8i8(<8 x i8>* %p, <1 x i64>* %q) {
declare <2 x float> @test_v2f32_i64_helper(i64 %p)
define void @test_v2f32_i64(i64* %p, <2 x float>* %q) {
; CHECK: rev64 v{{[0-9]+}}.2s
- %1 = load i64* %p
+ %1 = load i64, i64* %p
%2 = add i64 %1, %1
%3 = call <2 x float> @test_v2f32_i64_helper(i64 %2)
+ br label %return_bb
+return_bb:
%4 = fadd <2 x float> %3, %3
store <2 x float> %4, <2 x float>* %q
ret void
@@ -233,9 +275,11 @@ define void @test_v2f32_i64(i64* %p, <2 x float>* %q) {
declare <2 x float> @test_v2f32_f64_helper(double %p)
define void @test_v2f32_f64(double* %p, <2 x float>* %q) {
; CHECK: rev64 v{{[0-9]+}}.2s
- %1 = load double* %p
+ %1 = load double, double* %p
%2 = fadd double %1, %1
%3 = call <2 x float> @test_v2f32_f64_helper(double %2)
+ br label %return_bb
+return_bb:
%4 = fadd <2 x float> %3, %3
store <2 x float> %4, <2 x float>* %q
ret void
@@ -245,9 +289,11 @@ define void @test_v2f32_f64(double* %p, <2 x float>* %q) {
declare <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %p)
define void @test_v2f32_v1i64(<1 x i64>* %p, <2 x float>* %q) {
; CHECK: rev64 v{{[0-9]+}}.2s
- %1 = load <1 x i64>* %p
+ %1 = load <1 x i64>, <1 x i64>* %p
%2 = add <1 x i64> %1, %1
%3 = call <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %2)
+ br label %return_bb
+return_bb:
%4 = fadd <2 x float> %3, %3
store <2 x float> %4, <2 x float>* %q
ret void
@@ -258,9 +304,11 @@ declare <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %p)
define void @test_v2f32_v2i32(<2 x i32>* %p, <2 x float>* %q) {
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: rev64 v{{[0-9]+}}.2s
- %1 = load <2 x i32>* %p
+ %1 = load <2 x i32>, <2 x i32>* %p
%2 = add <2 x i32> %1, %1
%3 = call <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %2)
+ br label %return_bb
+return_bb:
%4 = fadd <2 x float> %3, %3
store <2 x float> %4, <2 x float>* %q
ret void
@@ -271,9 +319,11 @@ declare <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %p)
define void @test_v2f32_v4i16(<4 x i16>* %p, <2 x float>* %q) {
; CHECK: rev64 v{{[0-9]+}}.4h
; CHECK: rev64 v{{[0-9]+}}.2s
- %1 = load <4 x i16>* %p
+ %1 = load <4 x i16>, <4 x i16>* %p
%2 = add <4 x i16> %1, %1
%3 = call <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %2)
+ br label %return_bb
+return_bb:
%4 = fadd <2 x float> %3, %3
store <2 x float> %4, <2 x float>* %q
ret void
@@ -284,9 +334,11 @@ declare <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %p)
define void @test_v2f32_v8i8(<8 x i8>* %p, <2 x float>* %q) {
; CHECK: rev64 v{{[0-9]+}}.8b
; CHECK: rev64 v{{[0-9]+}}.2s
- %1 = load <8 x i8>* %p
+ %1 = load <8 x i8>, <8 x i8>* %p
%2 = add <8 x i8> %1, %1
%3 = call <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %2)
+ br label %return_bb
+return_bb:
%4 = fadd <2 x float> %3, %3
store <2 x float> %4, <2 x float>* %q
ret void
@@ -296,9 +348,11 @@ define void @test_v2f32_v8i8(<8 x i8>* %p, <2 x float>* %q) {
declare <2 x i32> @test_v2i32_i64_helper(i64 %p)
define void @test_v2i32_i64(i64* %p, <2 x i32>* %q) {
; CHECK: rev64 v{{[0-9]+}}.2s
- %1 = load i64* %p
+ %1 = load i64, i64* %p
%2 = add i64 %1, %1
%3 = call <2 x i32> @test_v2i32_i64_helper(i64 %2)
+ br label %return_bb
+return_bb:
%4 = add <2 x i32> %3, %3
store <2 x i32> %4, <2 x i32>* %q
ret void
@@ -308,9 +362,11 @@ define void @test_v2i32_i64(i64* %p, <2 x i32>* %q) {
declare <2 x i32> @test_v2i32_f64_helper(double %p)
define void @test_v2i32_f64(double* %p, <2 x i32>* %q) {
; CHECK: rev64 v{{[0-9]+}}.2s
- %1 = load double* %p
+ %1 = load double, double* %p
%2 = fadd double %1, %1
%3 = call <2 x i32> @test_v2i32_f64_helper(double %2)
+ br label %return_bb
+return_bb:
%4 = add <2 x i32> %3, %3
store <2 x i32> %4, <2 x i32>* %q
ret void
@@ -320,9 +376,11 @@ define void @test_v2i32_f64(double* %p, <2 x i32>* %q) {
declare <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %p)
define void @test_v2i32_v1i64(<1 x i64>* %p, <2 x i32>* %q) {
; CHECK: rev64 v{{[0-9]+}}.2s
- %1 = load <1 x i64>* %p
+ %1 = load <1 x i64>, <1 x i64>* %p
%2 = add <1 x i64> %1, %1
%3 = call <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %2)
+ br label %return_bb
+return_bb:
%4 = add <2 x i32> %3, %3
store <2 x i32> %4, <2 x i32>* %q
ret void
@@ -333,9 +391,11 @@ declare <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %p)
define void @test_v2i32_v2f32(<2 x float>* %p, <2 x i32>* %q) {
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: rev64 v{{[0-9]+}}.2s
- %1 = load <2 x float>* %p
+ %1 = load <2 x float>, <2 x float>* %p
%2 = fadd <2 x float> %1, %1
%3 = call <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %2)
+ br label %return_bb
+return_bb:
%4 = add <2 x i32> %3, %3
store <2 x i32> %4, <2 x i32>* %q
ret void
@@ -346,9 +406,11 @@ declare <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %p)
define void @test_v2i32_v4i16(<4 x i16>* %p, <2 x i32>* %q) {
; CHECK: rev64 v{{[0-9]+}}.4h
; CHECK: rev64 v{{[0-9]+}}.2s
- %1 = load <4 x i16>* %p
+ %1 = load <4 x i16>, <4 x i16>* %p
%2 = add <4 x i16> %1, %1
%3 = call <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %2)
+ br label %return_bb
+return_bb:
%4 = add <2 x i32> %3, %3
store <2 x i32> %4, <2 x i32>* %q
ret void
@@ -359,9 +421,11 @@ declare <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %p)
define void @test_v2i32_v8i8(<8 x i8>* %p, <2 x i32>* %q) {
; CHECK: rev64 v{{[0-9]+}}.8b
; CHECK: rev64 v{{[0-9]+}}.2s
- %1 = load <8 x i8>* %p
+ %1 = load <8 x i8>, <8 x i8>* %p
%2 = add <8 x i8> %1, %1
%3 = call <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %2)
+ br label %return_bb
+return_bb:
%4 = add <2 x i32> %3, %3
store <2 x i32> %4, <2 x i32>* %q
ret void
@@ -371,9 +435,11 @@ define void @test_v2i32_v8i8(<8 x i8>* %p, <2 x i32>* %q) {
declare <4 x i16> @test_v4i16_i64_helper(i64 %p)
define void @test_v4i16_i64(i64* %p, <4 x i16>* %q) {
; CHECK: rev64 v{{[0-9]+}}.4h
- %1 = load i64* %p
+ %1 = load i64, i64* %p
%2 = add i64 %1, %1
%3 = call <4 x i16> @test_v4i16_i64_helper(i64 %2)
+ br label %return_bb
+return_bb:
%4 = add <4 x i16> %3, %3
store <4 x i16> %4, <4 x i16>* %q
ret void
@@ -383,9 +449,11 @@ define void @test_v4i16_i64(i64* %p, <4 x i16>* %q) {
declare <4 x i16> @test_v4i16_f64_helper(double %p)
define void @test_v4i16_f64(double* %p, <4 x i16>* %q) {
; CHECK: rev64 v{{[0-9]+}}.4h
- %1 = load double* %p
+ %1 = load double, double* %p
%2 = fadd double %1, %1
%3 = call <4 x i16> @test_v4i16_f64_helper(double %2)
+ br label %return_bb
+return_bb:
%4 = add <4 x i16> %3, %3
store <4 x i16> %4, <4 x i16>* %q
ret void
@@ -395,9 +463,11 @@ define void @test_v4i16_f64(double* %p, <4 x i16>* %q) {
declare <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %p)
define void @test_v4i16_v1i64(<1 x i64>* %p, <4 x i16>* %q) {
; CHECK: rev64 v{{[0-9]+}}.4h
- %1 = load <1 x i64>* %p
+ %1 = load <1 x i64>, <1 x i64>* %p
%2 = add <1 x i64> %1, %1
%3 = call <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %2)
+ br label %return_bb
+return_bb:
%4 = add <4 x i16> %3, %3
store <4 x i16> %4, <4 x i16>* %q
ret void
@@ -408,9 +478,11 @@ declare <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %p)
define void @test_v4i16_v2f32(<2 x float>* %p, <4 x i16>* %q) {
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: rev64 v{{[0-9]+}}.4h
- %1 = load <2 x float>* %p
+ %1 = load <2 x float>, <2 x float>* %p
%2 = fadd <2 x float> %1, %1
%3 = call <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %2)
+ br label %return_bb
+return_bb:
%4 = add <4 x i16> %3, %3
store <4 x i16> %4, <4 x i16>* %q
ret void
@@ -421,9 +493,11 @@ declare <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %p)
define void @test_v4i16_v2i32(<2 x i32>* %p, <4 x i16>* %q) {
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: rev64 v{{[0-9]+}}.4h
- %1 = load <2 x i32>* %p
+ %1 = load <2 x i32>, <2 x i32>* %p
%2 = add <2 x i32> %1, %1
%3 = call <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %2)
+ br label %return_bb
+return_bb:
%4 = add <4 x i16> %3, %3
store <4 x i16> %4, <4 x i16>* %q
ret void
@@ -434,9 +508,11 @@ declare <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %p)
define void @test_v4i16_v8i8(<8 x i8>* %p, <4 x i16>* %q) {
; CHECK: rev64 v{{[0-9]+}}.8b
; CHECK: rev64 v{{[0-9]+}}.4h
- %1 = load <8 x i8>* %p
+ %1 = load <8 x i8>, <8 x i8>* %p
%2 = add <8 x i8> %1, %1
%3 = call <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %2)
+ br label %return_bb
+return_bb:
%4 = add <4 x i16> %3, %3
store <4 x i16> %4, <4 x i16>* %q
ret void
@@ -446,9 +522,11 @@ define void @test_v4i16_v8i8(<8 x i8>* %p, <4 x i16>* %q) {
declare <8 x i8> @test_v8i8_i64_helper(i64 %p)
define void @test_v8i8_i64(i64* %p, <8 x i8>* %q) {
; CHECK: rev64 v{{[0-9]+}}.8b
- %1 = load i64* %p
+ %1 = load i64, i64* %p
%2 = add i64 %1, %1
%3 = call <8 x i8> @test_v8i8_i64_helper(i64 %2)
+ br label %return_bb
+return_bb:
%4 = add <8 x i8> %3, %3
store <8 x i8> %4, <8 x i8>* %q
ret void
@@ -458,9 +536,11 @@ define void @test_v8i8_i64(i64* %p, <8 x i8>* %q) {
declare <8 x i8> @test_v8i8_f64_helper(double %p)
define void @test_v8i8_f64(double* %p, <8 x i8>* %q) {
; CHECK: rev64 v{{[0-9]+}}.8b
- %1 = load double* %p
+ %1 = load double, double* %p
%2 = fadd double %1, %1
%3 = call <8 x i8> @test_v8i8_f64_helper(double %2)
+ br label %return_bb
+return_bb:
%4 = add <8 x i8> %3, %3
store <8 x i8> %4, <8 x i8>* %q
ret void
@@ -470,9 +550,11 @@ define void @test_v8i8_f64(double* %p, <8 x i8>* %q) {
declare <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %p)
define void @test_v8i8_v1i64(<1 x i64>* %p, <8 x i8>* %q) {
; CHECK: rev64 v{{[0-9]+}}.8b
- %1 = load <1 x i64>* %p
+ %1 = load <1 x i64>, <1 x i64>* %p
%2 = add <1 x i64> %1, %1
%3 = call <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %2)
+ br label %return_bb
+return_bb:
%4 = add <8 x i8> %3, %3
store <8 x i8> %4, <8 x i8>* %q
ret void
@@ -483,9 +565,11 @@ declare <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %p)
define void @test_v8i8_v2f32(<2 x float>* %p, <8 x i8>* %q) {
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: rev64 v{{[0-9]+}}.8b
- %1 = load <2 x float>* %p
+ %1 = load <2 x float>, <2 x float>* %p
%2 = fadd <2 x float> %1, %1
%3 = call <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %2)
+ br label %return_bb
+return_bb:
%4 = add <8 x i8> %3, %3
store <8 x i8> %4, <8 x i8>* %q
ret void
@@ -496,9 +580,11 @@ declare <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %p)
define void @test_v8i8_v2i32(<2 x i32>* %p, <8 x i8>* %q) {
; CHECK: rev64 v{{[0-9]+}}.2s
; CHECK: rev64 v{{[0-9]+}}.8b
- %1 = load <2 x i32>* %p
+ %1 = load <2 x i32>, <2 x i32>* %p
%2 = add <2 x i32> %1, %1
%3 = call <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %2)
+ br label %return_bb
+return_bb:
%4 = add <8 x i8> %3, %3
store <8 x i8> %4, <8 x i8>* %q
ret void
@@ -509,9 +595,11 @@ declare <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %p)
define void @test_v8i8_v4i16(<4 x i16>* %p, <8 x i8>* %q) {
; CHECK: rev64 v{{[0-9]+}}.4h
; CHECK: rev64 v{{[0-9]+}}.8b
- %1 = load <4 x i16>* %p
+ %1 = load <4 x i16>, <4 x i16>* %p
%2 = add <4 x i16> %1, %1
%3 = call <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %2)
+ br label %return_bb
+return_bb:
%4 = add <8 x i8> %3, %3
store <8 x i8> %4, <8 x i8>* %q
ret void
@@ -521,9 +609,11 @@ define void @test_v8i8_v4i16(<4 x i16>* %p, <8 x i8>* %q) {
declare fp128 @test_f128_v2f64_helper(<2 x double> %p)
define void @test_f128_v2f64(<2 x double>* %p, fp128* %q) {
; CHECK: ext
- %1 = load <2 x double>* %p
+ %1 = load <2 x double>, <2 x double>* %p
%2 = fadd <2 x double> %1, %1
%3 = call fp128 @test_f128_v2f64_helper(<2 x double> %2)
+ br label %return_bb
+return_bb:
%4 = fadd fp128 %3, %3
store fp128 %4, fp128* %q
ret void
@@ -533,9 +623,11 @@ define void @test_f128_v2f64(<2 x double>* %p, fp128* %q) {
declare fp128 @test_f128_v2i64_helper(<2 x i64> %p)
define void @test_f128_v2i64(<2 x i64>* %p, fp128* %q) {
; CHECK: ext
- %1 = load <2 x i64>* %p
+ %1 = load <2 x i64>, <2 x i64>* %p
%2 = add <2 x i64> %1, %1
%3 = call fp128 @test_f128_v2i64_helper(<2 x i64> %2)
+ br label %return_bb
+return_bb:
%4 = fadd fp128 %3, %3
store fp128 %4, fp128* %q
ret void
@@ -546,9 +638,11 @@ declare fp128 @test_f128_v4f32_helper(<4 x float> %p)
define void @test_f128_v4f32(<4 x float>* %p, fp128* %q) {
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
- %1 = load <4 x float>* %p
+ %1 = load <4 x float>, <4 x float>* %p
%2 = fadd <4 x float> %1, %1
%3 = call fp128 @test_f128_v4f32_helper(<4 x float> %2)
+ br label %return_bb
+return_bb:
%4 = fadd fp128 %3, %3
store fp128 %4, fp128* %q
ret void
@@ -559,9 +653,11 @@ declare fp128 @test_f128_v4i32_helper(<4 x i32> %p)
define void @test_f128_v4i32(<4 x i32>* %p, fp128* %q) {
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
- %1 = load <4 x i32>* %p
+ %1 = load <4 x i32>, <4 x i32>* %p
%2 = add <4 x i32> %1, %1
%3 = call fp128 @test_f128_v4i32_helper(<4 x i32> %2)
+ br label %return_bb
+return_bb:
%4 = fadd fp128 %3, %3
store fp128 %4, fp128* %q
ret void
@@ -572,9 +668,11 @@ declare fp128 @test_f128_v8i16_helper(<8 x i16> %p)
define void @test_f128_v8i16(<8 x i16>* %p, fp128* %q) {
; CHECK: rev64 v{{[0-9]+}}.8h
; CHECK: ext
- %1 = load <8 x i16>* %p
+ %1 = load <8 x i16>, <8 x i16>* %p
%2 = add <8 x i16> %1, %1
%3 = call fp128 @test_f128_v8i16_helper(<8 x i16> %2)
+ br label %return_bb
+return_bb:
%4 = fadd fp128 %3, %3
store fp128 %4, fp128* %q
ret void
@@ -585,9 +683,11 @@ declare fp128 @test_f128_v16i8_helper(<16 x i8> %p)
define void @test_f128_v16i8(<16 x i8>* %p, fp128* %q) {
; CHECK: rev64 v{{[0-9]+}}.16b
; CHECK: ext
- %1 = load <16 x i8>* %p
+ %1 = load <16 x i8>, <16 x i8>* %p
%2 = add <16 x i8> %1, %1
%3 = call fp128 @test_f128_v16i8_helper(<16 x i8> %2)
+ br label %return_bb
+return_bb:
%4 = fadd fp128 %3, %3
store fp128 %4, fp128* %q
ret void
@@ -597,9 +697,11 @@ define void @test_f128_v16i8(<16 x i8>* %p, fp128* %q) {
declare <2 x double> @test_v2f64_f128_helper(fp128 %p)
define void @test_v2f64_f128(fp128* %p, <2 x double>* %q) {
; CHECK: ext
- %1 = load fp128* %p
+ %1 = load fp128, fp128* %p
%2 = fadd fp128 %1, %1
%3 = call <2 x double> @test_v2f64_f128_helper(fp128 %2)
+ br label %return_bb
+return_bb:
%4 = fadd <2 x double> %3, %3
store <2 x double> %4, <2 x double>* %q
ret void
@@ -610,9 +712,11 @@ declare <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %p)
define void @test_v2f64_v2i64(<2 x i64>* %p, <2 x double>* %q) {
; CHECK: ext
; CHECK: ext
- %1 = load <2 x i64>* %p
+ %1 = load <2 x i64>, <2 x i64>* %p
%2 = add <2 x i64> %1, %1
%3 = call <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %2)
+ br label %return_bb
+return_bb:
%4 = fadd <2 x double> %3, %3
store <2 x double> %4, <2 x double>* %q
ret void
@@ -624,9 +728,11 @@ define void @test_v2f64_v4f32(<4 x float>* %p, <2 x double>* %q) {
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
; CHECK: ext
- %1 = load <4 x float>* %p
+ %1 = load <4 x float>, <4 x float>* %p
%2 = fadd <4 x float> %1, %1
%3 = call <2 x double> @test_v2f64_v4f32_helper(<4 x float> %2)
+ br label %return_bb
+return_bb:
%4 = fadd <2 x double> %3, %3
store <2 x double> %4, <2 x double>* %q
ret void
@@ -638,9 +744,11 @@ define void @test_v2f64_v4i32(<4 x i32>* %p, <2 x double>* %q) {
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
; CHECK: ext
- %1 = load <4 x i32>* %p
+ %1 = load <4 x i32>, <4 x i32>* %p
%2 = add <4 x i32> %1, %1
%3 = call <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %2)
+ br label %return_bb
+return_bb:
%4 = fadd <2 x double> %3, %3
store <2 x double> %4, <2 x double>* %q
ret void
@@ -652,9 +760,11 @@ define void @test_v2f64_v8i16(<8 x i16>* %p, <2 x double>* %q) {
; CHECK: rev64 v{{[0-9]+}}.8h
; CHECK: ext
; CHECK: ext
- %1 = load <8 x i16>* %p
+ %1 = load <8 x i16>, <8 x i16>* %p
%2 = add <8 x i16> %1, %1
%3 = call <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %2)
+ br label %return_bb
+return_bb:
%4 = fadd <2 x double> %3, %3
store <2 x double> %4, <2 x double>* %q
ret void
@@ -666,9 +776,11 @@ define void @test_v2f64_v16i8(<16 x i8>* %p, <2 x double>* %q) {
; CHECK: rev64 v{{[0-9]+}}.16b
; CHECK: ext
; CHECK: ext
- %1 = load <16 x i8>* %p
+ %1 = load <16 x i8>, <16 x i8>* %p
%2 = add <16 x i8> %1, %1
%3 = call <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %2)
+ br label %return_bb
+return_bb:
%4 = fadd <2 x double> %3, %3
store <2 x double> %4, <2 x double>* %q
ret void
@@ -678,9 +790,11 @@ define void @test_v2f64_v16i8(<16 x i8>* %p, <2 x double>* %q) {
declare <2 x i64> @test_v2i64_f128_helper(fp128 %p)
define void @test_v2i64_f128(fp128* %p, <2 x i64>* %q) {
; CHECK: ext
- %1 = load fp128* %p
+ %1 = load fp128, fp128* %p
%2 = fadd fp128 %1, %1
%3 = call <2 x i64> @test_v2i64_f128_helper(fp128 %2)
+ br label %return_bb
+return_bb:
%4 = add <2 x i64> %3, %3
store <2 x i64> %4, <2 x i64>* %q
ret void
@@ -691,9 +805,11 @@ declare <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %p)
define void @test_v2i64_v2f64(<2 x double>* %p, <2 x i64>* %q) {
; CHECK: ext
; CHECK: ext
- %1 = load <2 x double>* %p
+ %1 = load <2 x double>, <2 x double>* %p
%2 = fadd <2 x double> %1, %1
%3 = call <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %2)
+ br label %return_bb
+return_bb:
%4 = add <2 x i64> %3, %3
store <2 x i64> %4, <2 x i64>* %q
ret void
@@ -705,9 +821,11 @@ define void @test_v2i64_v4f32(<4 x float>* %p, <2 x i64>* %q) {
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
; CHECK: ext
- %1 = load <4 x float>* %p
+ %1 = load <4 x float>, <4 x float>* %p
%2 = fadd <4 x float> %1, %1
%3 = call <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %2)
+ br label %return_bb
+return_bb:
%4 = add <2 x i64> %3, %3
store <2 x i64> %4, <2 x i64>* %q
ret void
@@ -719,9 +837,11 @@ define void @test_v2i64_v4i32(<4 x i32>* %p, <2 x i64>* %q) {
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
; CHECK: ext
- %1 = load <4 x i32>* %p
+ %1 = load <4 x i32>, <4 x i32>* %p
%2 = add <4 x i32> %1, %1
%3 = call <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %2)
+ br label %return_bb
+return_bb:
%4 = add <2 x i64> %3, %3
store <2 x i64> %4, <2 x i64>* %q
ret void
@@ -733,9 +853,11 @@ define void @test_v2i64_v8i16(<8 x i16>* %p, <2 x i64>* %q) {
; CHECK: rev64 v{{[0-9]+}}.8h
; CHECK: ext
; CHECK: ext
- %1 = load <8 x i16>* %p
+ %1 = load <8 x i16>, <8 x i16>* %p
%2 = add <8 x i16> %1, %1
%3 = call <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %2)
+ br label %return_bb
+return_bb:
%4 = add <2 x i64> %3, %3
store <2 x i64> %4, <2 x i64>* %q
ret void
@@ -747,9 +869,11 @@ define void @test_v2i64_v16i8(<16 x i8>* %p, <2 x i64>* %q) {
; CHECK: rev64 v{{[0-9]+}}.16b
; CHECK: ext
; CHECK: ext
- %1 = load <16 x i8>* %p
+ %1 = load <16 x i8>, <16 x i8>* %p
%2 = add <16 x i8> %1, %1
%3 = call <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %2)
+ br label %return_bb
+return_bb:
%4 = add <2 x i64> %3, %3
store <2 x i64> %4, <2 x i64>* %q
ret void
@@ -760,9 +884,11 @@ declare <4 x float> @test_v4f32_f128_helper(fp128 %p)
define void @test_v4f32_f128(fp128* %p, <4 x float>* %q) {
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
- %1 = load fp128* %p
+ %1 = load fp128, fp128* %p
%2 = fadd fp128 %1, %1
%3 = call <4 x float> @test_v4f32_f128_helper(fp128 %2)
+ br label %return_bb
+return_bb:
%4 = fadd <4 x float> %3, %3
store <4 x float> %4, <4 x float>* %q
ret void
@@ -774,9 +900,11 @@ define void @test_v4f32_v2f64(<2 x double>* %p, <4 x float>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
- %1 = load <2 x double>* %p
+ %1 = load <2 x double>, <2 x double>* %p
%2 = fadd <2 x double> %1, %1
%3 = call <4 x float> @test_v4f32_v2f64_helper(<2 x double> %2)
+ br label %return_bb
+return_bb:
%4 = fadd <4 x float> %3, %3
store <4 x float> %4, <4 x float>* %q
ret void
@@ -788,9 +916,11 @@ define void @test_v4f32_v2i64(<2 x i64>* %p, <4 x float>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
- %1 = load <2 x i64>* %p
+ %1 = load <2 x i64>, <2 x i64>* %p
%2 = add <2 x i64> %1, %1
%3 = call <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %2)
+ br label %return_bb
+return_bb:
%4 = fadd <4 x float> %3, %3
store <4 x float> %4, <4 x float>* %q
ret void
@@ -803,9 +933,11 @@ define void @test_v4f32_v4i32(<4 x i32>* %p, <4 x float>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
- %1 = load <4 x i32>* %p
+ %1 = load <4 x i32>, <4 x i32>* %p
%2 = add <4 x i32> %1, %1
%3 = call <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %2)
+ br label %return_bb
+return_bb:
%4 = fadd <4 x float> %3, %3
store <4 x float> %4, <4 x float>* %q
ret void
@@ -818,9 +950,11 @@ define void @test_v4f32_v8i16(<8 x i16>* %p, <4 x float>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
- %1 = load <8 x i16>* %p
+ %1 = load <8 x i16>, <8 x i16>* %p
%2 = add <8 x i16> %1, %1
%3 = call <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %2)
+ br label %return_bb
+return_bb:
%4 = fadd <4 x float> %3, %3
store <4 x float> %4, <4 x float>* %q
ret void
@@ -833,9 +967,11 @@ define void @test_v4f32_v16i8(<16 x i8>* %p, <4 x float>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
- %1 = load <16 x i8>* %p
+ %1 = load <16 x i8>, <16 x i8>* %p
%2 = add <16 x i8> %1, %1
%3 = call <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %2)
+ br label %return_bb
+return_bb:
%4 = fadd <4 x float> %3, %3
store <4 x float> %4, <4 x float>* %q
ret void
@@ -846,9 +982,11 @@ declare <4 x i32> @test_v4i32_f128_helper(fp128 %p)
define void @test_v4i32_f128(fp128* %p, <4 x i32>* %q) {
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
- %1 = load fp128* %p
+ %1 = load fp128, fp128* %p
%2 = fadd fp128 %1, %1
%3 = call <4 x i32> @test_v4i32_f128_helper(fp128 %2)
+ br label %return_bb
+return_bb:
%4 = add <4 x i32> %3, %3
store <4 x i32> %4, <4 x i32>* %q
ret void
@@ -860,9 +998,11 @@ define void @test_v4i32_v2f64(<2 x double>* %p, <4 x i32>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
- %1 = load <2 x double>* %p
+ %1 = load <2 x double>, <2 x double>* %p
%2 = fadd <2 x double> %1, %1
%3 = call <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %2)
+ br label %return_bb
+return_bb:
%4 = add <4 x i32> %3, %3
store <4 x i32> %4, <4 x i32>* %q
ret void
@@ -874,9 +1014,11 @@ define void @test_v4i32_v2i64(<2 x i64>* %p, <4 x i32>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
- %1 = load <2 x i64>* %p
+ %1 = load <2 x i64>, <2 x i64>* %p
%2 = add <2 x i64> %1, %1
%3 = call <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %2)
+ br label %return_bb
+return_bb:
%4 = add <4 x i32> %3, %3
store <4 x i32> %4, <4 x i32>* %q
ret void
@@ -889,9 +1031,11 @@ define void @test_v4i32_v4f32(<4 x float>* %p, <4 x i32>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
- %1 = load <4 x float>* %p
+ %1 = load <4 x float>, <4 x float>* %p
%2 = fadd <4 x float> %1, %1
%3 = call <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %2)
+ br label %return_bb
+return_bb:
%4 = add <4 x i32> %3, %3
store <4 x i32> %4, <4 x i32>* %q
ret void
@@ -904,9 +1048,11 @@ define void @test_v4i32_v8i16(<8 x i16>* %p, <4 x i32>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
- %1 = load <8 x i16>* %p
+ %1 = load <8 x i16>, <8 x i16>* %p
%2 = add <8 x i16> %1, %1
%3 = call <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %2)
+ br label %return_bb
+return_bb:
%4 = add <4 x i32> %3, %3
store <4 x i32> %4, <4 x i32>* %q
ret void
@@ -919,9 +1065,11 @@ define void @test_v4i32_v16i8(<16 x i8>* %p, <4 x i32>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.4s
; CHECK: ext
- %1 = load <16 x i8>* %p
+ %1 = load <16 x i8>, <16 x i8>* %p
%2 = add <16 x i8> %1, %1
%3 = call <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %2)
+ br label %return_bb
+return_bb:
%4 = add <4 x i32> %3, %3
store <4 x i32> %4, <4 x i32>* %q
ret void
@@ -932,9 +1080,11 @@ declare <8 x i16> @test_v8i16_f128_helper(fp128 %p)
define void @test_v8i16_f128(fp128* %p, <8 x i16>* %q) {
; CHECK: rev64 v{{[0-9]+}}.8h
; CHECK: ext
- %1 = load fp128* %p
+ %1 = load fp128, fp128* %p
%2 = fadd fp128 %1, %1
%3 = call <8 x i16> @test_v8i16_f128_helper(fp128 %2)
+ br label %return_bb
+return_bb:
%4 = add <8 x i16> %3, %3
store <8 x i16> %4, <8 x i16>* %q
ret void
@@ -946,9 +1096,11 @@ define void @test_v8i16_v2f64(<2 x double>* %p, <8 x i16>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.8h
; CHECK: ext
- %1 = load <2 x double>* %p
+ %1 = load <2 x double>, <2 x double>* %p
%2 = fadd <2 x double> %1, %1
%3 = call <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %2)
+ br label %return_bb
+return_bb:
%4 = add <8 x i16> %3, %3
store <8 x i16> %4, <8 x i16>* %q
ret void
@@ -960,9 +1112,11 @@ define void @test_v8i16_v2i64(<2 x i64>* %p, <8 x i16>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.8h
; CHECK: ext
- %1 = load <2 x i64>* %p
+ %1 = load <2 x i64>, <2 x i64>* %p
%2 = add <2 x i64> %1, %1
%3 = call <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %2)
+ br label %return_bb
+return_bb:
%4 = add <8 x i16> %3, %3
store <8 x i16> %4, <8 x i16>* %q
ret void
@@ -975,9 +1129,11 @@ define void @test_v8i16_v4f32(<4 x float>* %p, <8 x i16>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.8h
; CHECK: ext
- %1 = load <4 x float>* %p
+ %1 = load <4 x float>, <4 x float>* %p
%2 = fadd <4 x float> %1, %1
%3 = call <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %2)
+ br label %return_bb
+return_bb:
%4 = add <8 x i16> %3, %3
store <8 x i16> %4, <8 x i16>* %q
ret void
@@ -990,9 +1146,11 @@ define void @test_v8i16_v4i32(<4 x i32>* %p, <8 x i16>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.8h
; CHECK: ext
- %1 = load <4 x i32>* %p
+ %1 = load <4 x i32>, <4 x i32>* %p
%2 = add <4 x i32> %1, %1
%3 = call <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %2)
+ br label %return_bb
+return_bb:
%4 = add <8 x i16> %3, %3
store <8 x i16> %4, <8 x i16>* %q
ret void
@@ -1005,9 +1163,11 @@ define void @test_v8i16_v16i8(<16 x i8>* %p, <8 x i16>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.8h
; CHECK: ext
- %1 = load <16 x i8>* %p
+ %1 = load <16 x i8>, <16 x i8>* %p
%2 = add <16 x i8> %1, %1
%3 = call <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %2)
+ br label %return_bb
+return_bb:
%4 = add <8 x i16> %3, %3
store <8 x i16> %4, <8 x i16>* %q
ret void
@@ -1018,9 +1178,11 @@ declare <16 x i8> @test_v16i8_f128_helper(fp128 %p)
define void @test_v16i8_f128(fp128* %p, <16 x i8>* %q) {
; CHECK: rev64 v{{[0-9]+}}.16b
; CHECK: ext
- %1 = load fp128* %p
+ %1 = load fp128, fp128* %p
%2 = fadd fp128 %1, %1
%3 = call <16 x i8> @test_v16i8_f128_helper(fp128 %2)
+ br label %return_bb
+return_bb:
%4 = add <16 x i8> %3, %3
store <16 x i8> %4, <16 x i8>* %q
ret void
@@ -1032,9 +1194,11 @@ define void @test_v16i8_v2f64(<2 x double>* %p, <16 x i8>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.16b
; CHECK: ext
- %1 = load <2 x double>* %p
+ %1 = load <2 x double>, <2 x double>* %p
%2 = fadd <2 x double> %1, %1
%3 = call <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %2)
+ br label %return_bb
+return_bb:
%4 = add <16 x i8> %3, %3
store <16 x i8> %4, <16 x i8>* %q
ret void
@@ -1046,9 +1210,11 @@ define void @test_v16i8_v2i64(<2 x i64>* %p, <16 x i8>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.16b
; CHECK: ext
- %1 = load <2 x i64>* %p
+ %1 = load <2 x i64>, <2 x i64>* %p
%2 = add <2 x i64> %1, %1
%3 = call <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %2)
+ br label %return_bb
+return_bb:
%4 = add <16 x i8> %3, %3
store <16 x i8> %4, <16 x i8>* %q
ret void
@@ -1061,9 +1227,11 @@ define void @test_v16i8_v4f32(<4 x float>* %p, <16 x i8>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.16b
; CHECK: ext
- %1 = load <4 x float>* %p
+ %1 = load <4 x float>, <4 x float>* %p
%2 = fadd <4 x float> %1, %1
%3 = call <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %2)
+ br label %return_bb
+return_bb:
%4 = add <16 x i8> %3, %3
store <16 x i8> %4, <16 x i8>* %q
ret void
@@ -1076,9 +1244,11 @@ define void @test_v16i8_v4i32(<4 x i32>* %p, <16 x i8>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.16b
; CHECK: ext
- %1 = load <4 x i32>* %p
+ %1 = load <4 x i32>, <4 x i32>* %p
%2 = add <4 x i32> %1, %1
%3 = call <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %2)
+ br label %return_bb
+return_bb:
%4 = add <16 x i8> %3, %3
store <16 x i8> %4, <16 x i8>* %q
ret void
@@ -1091,9 +1261,11 @@ define void @test_v16i8_v8i16(<8 x i16>* %p, <16 x i8>* %q) {
; CHECK: ext
; CHECK: rev64 v{{[0-9]+}}.16b
; CHECK: ext
- %1 = load <8 x i16>* %p
+ %1 = load <8 x i16>, <8 x i16>* %p
%2 = add <8 x i16> %1, %1
%3 = call <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %2)
+ br label %return_bb
+return_bb:
%4 = add <16 x i8> %3, %3
store <16 x i8> %4, <16 x i8>* %q
ret void
diff --git a/test/CodeGen/AArch64/arm64-big-stack.ll b/test/CodeGen/AArch64/arm64-big-stack.ll
index 3f91bb3c2482..c9acbc5f054c 100644
--- a/test/CodeGen/AArch64/arm64-big-stack.ll
+++ b/test/CodeGen/AArch64/arm64-big-stack.ll
@@ -13,7 +13,7 @@ target triple = "arm64-apple-macosx10"
define void @foo() nounwind ssp {
entry:
%buffer = alloca [33554432 x i8], align 1
- %arraydecay = getelementptr inbounds [33554432 x i8]* %buffer, i64 0, i64 0
+ %arraydecay = getelementptr inbounds [33554432 x i8], [33554432 x i8]* %buffer, i64 0, i64 0
call void @doit(i8* %arraydecay) nounwind
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-bitfield-extract.ll b/test/CodeGen/AArch64/arm64-bitfield-extract.ll
index 112efddd4fad..5dca92941211 100644
--- a/test/CodeGen/AArch64/arm64-bitfield-extract.ll
+++ b/test/CodeGen/AArch64/arm64-bitfield-extract.ll
@@ -12,8 +12,8 @@ define void @foo(%struct.X* nocapture %x, %struct.Y* nocapture %y) nounwind opts
; CHECK: ret
%tmp = bitcast %struct.X* %x to i32*
- %tmp1 = load i32* %tmp, align 4
- %b = getelementptr inbounds %struct.Y* %y, i64 0, i32 1
+ %tmp1 = load i32, i32* %tmp, align 4
+ %b = getelementptr inbounds %struct.Y, %struct.Y* %y, i64 0, i32 1
%bf.clear = lshr i32 %tmp1, 3
%bf.clear.lobit = and i32 %bf.clear, 1
%frombool = trunc i32 %bf.clear.lobit to i8
@@ -46,8 +46,8 @@ define void @fct1(%struct.Z* nocapture %x, %struct.A* nocapture %y) nounwind opt
; CHECK: ret
%tmp = bitcast %struct.Z* %x to i64*
- %tmp1 = load i64* %tmp, align 4
- %b = getelementptr inbounds %struct.A* %y, i64 0, i32 0
+ %tmp1 = load i64, i64* %tmp, align 4
+ %b = getelementptr inbounds %struct.A, %struct.A* %y, i64 0, i32 0
%bf.clear = lshr i64 %tmp1, 3
%bf.clear.lobit = and i64 %bf.clear, 1
store i64 %bf.clear.lobit, i64* %b, align 8
@@ -77,7 +77,7 @@ entry:
; CHECK-NEXT: bfxil [[REG1]], x1, #16, #24
; CHECK-NEXT: str [[REG1]],
; CHECK-NEXT: ret
- %0 = load i64* %y, align 8
+ %0 = load i64, i64* %y, align 8
%and = and i64 %0, -16777216
%shr = lshr i64 %x, 16
%and1 = and i64 %shr, 16777215
@@ -93,7 +93,7 @@ entry:
; CHECK-NEXT: bfxil [[REG1]], w1, #16, #3
; CHECK-NEXT: str [[REG1]],
; CHECK-NEXT: ret
- %0 = load i32* %y, align 8
+ %0 = load i32, i32* %y, align 8
%and = and i32 %0, -8
%shr = lshr i32 %x, 16
%and1 = and i32 %shr, 7
@@ -112,7 +112,7 @@ entry:
; CHECK-NEXT: lsr [[REG2:w[0-9]+]], [[REG1]], #2
; CHECK-NEXT: str [[REG2]],
; CHECK-NEXT: ret
- %0 = load i32* %y, align 8
+ %0 = load i32, i32* %y, align 8
%and = and i32 %0, -8
%shr = lshr i32 %x, 16
%and1 = and i32 %shr, 7
@@ -133,7 +133,7 @@ entry:
; CHECK-NEXT: lsl [[REG2:w[0-9]+]], [[REG1]], #2
; CHECK-NEXT: str [[REG2]],
; CHECK-NEXT: ret
- %0 = load i32* %y, align 8
+ %0 = load i32, i32* %y, align 8
%and = and i32 %0, -8
%shr = lshr i32 %x, 16
%and1 = and i32 %shr, 7
@@ -155,7 +155,7 @@ entry:
; CHECK-NEXT: lsr [[REG2:x[0-9]+]], [[REG1]], #2
; CHECK-NEXT: str [[REG2]],
; CHECK-NEXT: ret
- %0 = load i64* %y, align 8
+ %0 = load i64, i64* %y, align 8
%and = and i64 %0, -8
%shr = lshr i64 %x, 16
%and1 = and i64 %shr, 7
@@ -177,7 +177,7 @@ entry:
; CHECK-NEXT: lsl [[REG2:x[0-9]+]], [[REG1]], #2
; CHECK-NEXT: str [[REG2]],
; CHECK-NEXT: ret
- %0 = load i64* %y, align 8
+ %0 = load i64, i64* %y, align 8
%and = and i64 %0, -8
%shr = lshr i64 %x, 16
%and1 = and i64 %shr, 7
@@ -198,7 +198,7 @@ entry:
; CHECK-NEXT: lsl [[REG2:w[0-9]+]], [[REG1]], #2
; CHECK-NEXT: str [[REG2]],
; CHECK-NEXT: ret
- %0 = load i32* %y, align 8
+ %0 = load i32, i32* %y, align 8
%and = and i32 %0, -8
%and1 = and i32 %x, 7
%or = or i32 %and, %and1
@@ -218,7 +218,7 @@ entry:
; CHECK-NEXT: lsl [[REG2:x[0-9]+]], [[REG1]], #2
; CHECK-NEXT: str [[REG2]],
; CHECK-NEXT: ret
- %0 = load i64* %y, align 8
+ %0 = load i64, i64* %y, align 8
%and = and i64 %0, -8
%and1 = and i64 %x, 7
%or = or i64 %and, %and1
@@ -247,7 +247,7 @@ entry:
; CHECK-NEXT: ubfx [[REG2:w[0-9]+]], [[REG1]], #2, #28
; CHECK-NEXT: str [[REG2]],
; CHECK-NEXT: ret
- %0 = load i32* %y, align 8
+ %0 = load i32, i32* %y, align 8
%and = and i32 %0, -8
%shr = lshr i32 %x, 16
%and1 = and i32 %shr, 7
@@ -270,7 +270,7 @@ entry:
; CHECK-NEXT: ubfx [[REG2:x[0-9]+]], [[REG1]], #2, #60
; CHECK-NEXT: str [[REG2]],
; CHECK-NEXT: ret
- %0 = load i64* %y, align 8
+ %0 = load i64, i64* %y, align 8
%and = and i64 %0, -8
%shr = lshr i64 %x, 16
%and1 = and i64 %shr, 7
@@ -296,7 +296,7 @@ entry:
; CHECK-NEXT: lsl [[REG3:w[0-9]+]], [[REG2]], #2
; CHECK-NEXT: str [[REG3]],
; CHECK-NEXT: ret
- %0 = load i32* %y, align 8
+ %0 = load i32, i32* %y, align 8
%and = and i32 %0, -256
%shr = lshr i32 %x, 16
%and1 = and i32 %shr, 255
@@ -326,7 +326,7 @@ entry:
; CHECK-NEXT: lsl [[REG3:x[0-9]+]], [[REG2]], #2
; CHECK-NEXT: str [[REG3]],
; CHECK-NEXT: ret
- %0 = load i64* %y, align 8
+ %0 = load i64, i64* %y, align 8
%and = and i64 %0, -256
%shr = lshr i64 %x, 16
%and1 = and i64 %shr, 255
@@ -357,7 +357,7 @@ entry:
; CHECK-NEXT: ubfx [[REG3:w[0-9]+]], [[REG2]], #2, #28
; CHECK-NEXT: str [[REG3]],
; CHECK-NEXT: ret
- %0 = load i32* %y, align 8
+ %0 = load i32, i32* %y, align 8
%and = and i32 %0, 1737056
%shr = lshr i32 %x, 16
%and1 = and i32 %shr, 7
@@ -386,7 +386,7 @@ entry:
; CHECK-NEXT: ubfx [[REG3:x[0-9]+]], [[REG2]], #2, #60
; CHECK-NEXT: str [[REG3]],
; CHECK-NEXT: ret
- %0 = load i64* %y, align 8
+ %0 = load i64, i64* %y, align 8
%and = and i64 %0, 1737056
%shr = lshr i64 %x, 16
%and1 = and i64 %shr, 7
@@ -421,8 +421,8 @@ entry:
br i1 %tobool, label %if.end, label %if.then
if.then: ; preds = %entry
- %arrayidx3 = getelementptr inbounds [65536 x i8]* @first_ones, i64 0, i64 %x.sroa.5.0.extract.shift
- %0 = load i8* %arrayidx3, align 1
+ %arrayidx3 = getelementptr inbounds [65536 x i8], [65536 x i8]* @first_ones, i64 0, i64 %x.sroa.5.0.extract.shift
+ %0 = load i8, i8* %arrayidx3, align 1
%conv = zext i8 %0 to i32
br label %return
@@ -443,8 +443,8 @@ if.then7: ; preds = %if.end
; CHECK-NOT: and
; CHECK-NOT: ubfm
%idxprom10 = and i64 %x.sroa.3.0.extract.shift, 65535
- %arrayidx11 = getelementptr inbounds [65536 x i8]* @first_ones, i64 0, i64 %idxprom10
- %1 = load i8* %arrayidx11, align 1
+ %arrayidx11 = getelementptr inbounds [65536 x i8], [65536 x i8]* @first_ones, i64 0, i64 %idxprom10
+ %1 = load i8, i8* %arrayidx11, align 1
%conv12 = zext i8 %1 to i32
%add = add nsw i32 %conv12, 16
br label %return
@@ -466,8 +466,8 @@ if.then17: ; preds = %if.end13
; CHECK-NOT: and
; CHECK-NOT: ubfm
%idxprom20 = and i64 %x.sroa.1.0.extract.shift, 65535
- %arrayidx21 = getelementptr inbounds [65536 x i8]* @first_ones, i64 0, i64 %idxprom20
- %2 = load i8* %arrayidx21, align 1
+ %arrayidx21 = getelementptr inbounds [65536 x i8], [65536 x i8]* @first_ones, i64 0, i64 %idxprom20
+ %2 = load i8, i8* %arrayidx21, align 1
%conv22 = zext i8 %2 to i32
%add23 = add nsw i32 %conv22, 32
br label %return
@@ -509,8 +509,8 @@ define i64 @fct21(i64 %x) {
entry:
%shr = lshr i64 %x, 4
%and = and i64 %shr, 15
- %arrayidx = getelementptr inbounds [8 x [64 x i64]]* @arr, i64 0, i64 0, i64 %and
- %0 = load i64* %arrayidx, align 8
+ %arrayidx = getelementptr inbounds [8 x [64 x i64]], [8 x [64 x i64]]* @arr, i64 0, i64 0, i64 %and
+ %0 = load i64, i64* %arrayidx, align 8
ret i64 %0
}
diff --git a/test/CodeGen/AArch64/arm64-blockaddress.ll b/test/CodeGen/AArch64/arm64-blockaddress.ll
index ac4f19e65dff..5df840216352 100644
--- a/test/CodeGen/AArch64/arm64-blockaddress.ll
+++ b/test/CodeGen/AArch64/arm64-blockaddress.ll
@@ -25,6 +25,6 @@ entry:
br label %mylabel
mylabel:
- %tmp = load volatile i64* %recover, align 8
+ %tmp = load volatile i64, i64* %recover, align 8
ret i64 %tmp
}
diff --git a/test/CodeGen/AArch64/arm64-call-tailcalls.ll b/test/CodeGen/AArch64/arm64-call-tailcalls.ll
index 487c1d9bec3b..6621db25da5b 100644
--- a/test/CodeGen/AArch64/arm64-call-tailcalls.ll
+++ b/test/CodeGen/AArch64/arm64-call-tailcalls.ll
@@ -9,7 +9,7 @@ define void @t2() {
; CHECK: ldr x[[ADDR:[0-9]+]], [x[[GOTADDR]], _t@GOTPAGEOFF]
; CHECK: ldr x[[DEST:[0-9]+]], [x[[ADDR]]]
; CHECK: br x[[DEST]]
- %tmp = load i32 ()** @t
+ %tmp = load i32 ()*, i32 ()** @t
%tmp.upgrd.2 = tail call i32 %tmp()
ret void
}
@@ -53,9 +53,9 @@ bb: ; preds = %entry
define i32 @t8(i32 %x) nounwind ssp {
; CHECK-LABEL: t8:
+; CHECK: b _c
; CHECK: b _a
; CHECK: b _b
-; CHECK: b _c
%and = and i32 %x, 1
%tobool = icmp eq i32 %and, 0
br i1 %tobool, label %if.end, label %if.then
diff --git a/test/CodeGen/AArch64/arm64-cast-opt.ll b/test/CodeGen/AArch64/arm64-cast-opt.ll
index 65a871d43685..463add5688e3 100644
--- a/test/CodeGen/AArch64/arm64-cast-opt.ll
+++ b/test/CodeGen/AArch64/arm64-cast-opt.ll
@@ -11,12 +11,12 @@ define zeroext i8 @foo(i32 %i1, i32 %i2) {
; CHECK-NOT: and
entry:
%idxprom = sext i32 %i1 to i64
- %0 = load i8** @block, align 8
- %arrayidx = getelementptr inbounds i8* %0, i64 %idxprom
- %1 = load i8* %arrayidx, align 1
+ %0 = load i8*, i8** @block, align 8
+ %arrayidx = getelementptr inbounds i8, i8* %0, i64 %idxprom
+ %1 = load i8, i8* %arrayidx, align 1
%idxprom1 = sext i32 %i2 to i64
- %arrayidx2 = getelementptr inbounds i8* %0, i64 %idxprom1
- %2 = load i8* %arrayidx2, align 1
+ %arrayidx2 = getelementptr inbounds i8, i8* %0, i64 %idxprom1
+ %2 = load i8, i8* %arrayidx2, align 1
%cmp = icmp eq i8 %1, %2
br i1 %cmp, label %return, label %if.then
diff --git a/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll b/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
index b032d9c89ef6..4e47ab6c03f3 100644
--- a/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
+++ b/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
@@ -21,7 +21,7 @@ target triple = "arm64-apple-ios7.0.0"
; CHECK-NEXT b.cc
define i32 @Maze1() nounwind ssp {
entry:
- %0 = load i64* @channelColumns, align 8, !tbaa !0
+ %0 = load i64, i64* @channelColumns, align 8, !tbaa !0
%cmp90 = icmp eq i64 %0, 0
br i1 %cmp90, label %for.end, label %for.body
@@ -29,51 +29,51 @@ for.body: ; preds = %for.inc, %entry
%1 = phi i64 [ %0, %entry ], [ %37, %for.inc ]
%i.092 = phi i64 [ 1, %entry ], [ %inc53, %for.inc ]
%numLeft.091 = phi i32 [ 0, %entry ], [ %numLeft.1, %for.inc ]
- %2 = load i8** @mazeRoute, align 8, !tbaa !3
- %arrayidx = getelementptr inbounds i8* %2, i64 %i.092
- %3 = load i8* %arrayidx, align 1, !tbaa !1
+ %2 = load i8*, i8** @mazeRoute, align 8, !tbaa !3
+ %arrayidx = getelementptr inbounds i8, i8* %2, i64 %i.092
+ %3 = load i8, i8* %arrayidx, align 1, !tbaa !1
%tobool = icmp eq i8 %3, 0
br i1 %tobool, label %for.inc, label %if.then
if.then: ; preds = %for.body
- %4 = load i64** @TOP, align 8, !tbaa !3
- %arrayidx1 = getelementptr inbounds i64* %4, i64 %i.092
- %5 = load i64* %arrayidx1, align 8, !tbaa !0
- %6 = load i64** @netsAssign, align 8, !tbaa !3
- %arrayidx2 = getelementptr inbounds i64* %6, i64 %5
- %7 = load i64* %arrayidx2, align 8, !tbaa !0
- %8 = load i64** @BOT, align 8, !tbaa !3
- %arrayidx3 = getelementptr inbounds i64* %8, i64 %i.092
- %9 = load i64* %arrayidx3, align 8, !tbaa !0
- %arrayidx4 = getelementptr inbounds i64* %6, i64 %9
- %10 = load i64* %arrayidx4, align 8, !tbaa !0
+ %4 = load i64*, i64** @TOP, align 8, !tbaa !3
+ %arrayidx1 = getelementptr inbounds i64, i64* %4, i64 %i.092
+ %5 = load i64, i64* %arrayidx1, align 8, !tbaa !0
+ %6 = load i64*, i64** @netsAssign, align 8, !tbaa !3
+ %arrayidx2 = getelementptr inbounds i64, i64* %6, i64 %5
+ %7 = load i64, i64* %arrayidx2, align 8, !tbaa !0
+ %8 = load i64*, i64** @BOT, align 8, !tbaa !3
+ %arrayidx3 = getelementptr inbounds i64, i64* %8, i64 %i.092
+ %9 = load i64, i64* %arrayidx3, align 8, !tbaa !0
+ %arrayidx4 = getelementptr inbounds i64, i64* %6, i64 %9
+ %10 = load i64, i64* %arrayidx4, align 8, !tbaa !0
%cmp5 = icmp ugt i64 %i.092, 1
%cmp6 = icmp ugt i64 %10, 1
%or.cond = and i1 %cmp5, %cmp6
br i1 %or.cond, label %land.lhs.true7, label %if.else
land.lhs.true7: ; preds = %if.then
- %11 = load i64* @channelTracks, align 8, !tbaa !0
+ %11 = load i64, i64* @channelTracks, align 8, !tbaa !0
%add = add i64 %11, 1
%call = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 %add, i64 %10, i64 0, i64 %7, i32 -1, i32 -1)
%tobool8 = icmp eq i32 %call, 0
br i1 %tobool8, label %land.lhs.true7.if.else_crit_edge, label %if.then9
land.lhs.true7.if.else_crit_edge: ; preds = %land.lhs.true7
- %.pre = load i64* @channelColumns, align 8, !tbaa !0
+ %.pre = load i64, i64* @channelColumns, align 8, !tbaa !0
br label %if.else
if.then9: ; preds = %land.lhs.true7
- %12 = load i8** @mazeRoute, align 8, !tbaa !3
- %arrayidx10 = getelementptr inbounds i8* %12, i64 %i.092
+ %12 = load i8*, i8** @mazeRoute, align 8, !tbaa !3
+ %arrayidx10 = getelementptr inbounds i8, i8* %12, i64 %i.092
store i8 0, i8* %arrayidx10, align 1, !tbaa !1
- %13 = load i64** @TOP, align 8, !tbaa !3
- %arrayidx11 = getelementptr inbounds i64* %13, i64 %i.092
- %14 = load i64* %arrayidx11, align 8, !tbaa !0
+ %13 = load i64*, i64** @TOP, align 8, !tbaa !3
+ %arrayidx11 = getelementptr inbounds i64, i64* %13, i64 %i.092
+ %14 = load i64, i64* %arrayidx11, align 8, !tbaa !0
tail call fastcc void @CleanNet(i64 %14)
- %15 = load i64** @BOT, align 8, !tbaa !3
- %arrayidx12 = getelementptr inbounds i64* %15, i64 %i.092
- %16 = load i64* %arrayidx12, align 8, !tbaa !0
+ %15 = load i64*, i64** @BOT, align 8, !tbaa !3
+ %arrayidx12 = getelementptr inbounds i64, i64* %15, i64 %i.092
+ %16 = load i64, i64* %arrayidx12, align 8, !tbaa !0
tail call fastcc void @CleanNet(i64 %16)
br label %for.inc
@@ -84,23 +84,23 @@ if.else: ; preds = %land.lhs.true7.if.e
br i1 %or.cond89, label %land.lhs.true16, label %if.else24
land.lhs.true16: ; preds = %if.else
- %18 = load i64* @channelTracks, align 8, !tbaa !0
+ %18 = load i64, i64* @channelTracks, align 8, !tbaa !0
%add17 = add i64 %18, 1
%call18 = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 %add17, i64 %10, i64 0, i64 %7, i32 1, i32 -1)
%tobool19 = icmp eq i32 %call18, 0
br i1 %tobool19, label %if.else24, label %if.then20
if.then20: ; preds = %land.lhs.true16
- %19 = load i8** @mazeRoute, align 8, !tbaa !3
- %arrayidx21 = getelementptr inbounds i8* %19, i64 %i.092
+ %19 = load i8*, i8** @mazeRoute, align 8, !tbaa !3
+ %arrayidx21 = getelementptr inbounds i8, i8* %19, i64 %i.092
store i8 0, i8* %arrayidx21, align 1, !tbaa !1
- %20 = load i64** @TOP, align 8, !tbaa !3
- %arrayidx22 = getelementptr inbounds i64* %20, i64 %i.092
- %21 = load i64* %arrayidx22, align 8, !tbaa !0
+ %20 = load i64*, i64** @TOP, align 8, !tbaa !3
+ %arrayidx22 = getelementptr inbounds i64, i64* %20, i64 %i.092
+ %21 = load i64, i64* %arrayidx22, align 8, !tbaa !0
tail call fastcc void @CleanNet(i64 %21)
- %22 = load i64** @BOT, align 8, !tbaa !3
- %arrayidx23 = getelementptr inbounds i64* %22, i64 %i.092
- %23 = load i64* %arrayidx23, align 8, !tbaa !0
+ %22 = load i64*, i64** @BOT, align 8, !tbaa !3
+ %arrayidx23 = getelementptr inbounds i64, i64* %22, i64 %i.092
+ %23 = load i64, i64* %arrayidx23, align 8, !tbaa !0
tail call fastcc void @CleanNet(i64 %23)
br label %for.inc
@@ -108,7 +108,7 @@ if.else24: ; preds = %land.lhs.true16, %i
br i1 %cmp5, label %land.lhs.true26, label %if.else36
land.lhs.true26: ; preds = %if.else24
- %24 = load i64* @channelTracks, align 8, !tbaa !0
+ %24 = load i64, i64* @channelTracks, align 8, !tbaa !0
%cmp27 = icmp ult i64 %7, %24
br i1 %cmp27, label %land.lhs.true28, label %if.else36
@@ -119,26 +119,26 @@ land.lhs.true28: ; preds = %land.lhs.true26
br i1 %tobool31, label %if.else36, label %if.then32
if.then32: ; preds = %land.lhs.true28
- %25 = load i8** @mazeRoute, align 8, !tbaa !3
- %arrayidx33 = getelementptr inbounds i8* %25, i64 %i.092
+ %25 = load i8*, i8** @mazeRoute, align 8, !tbaa !3
+ %arrayidx33 = getelementptr inbounds i8, i8* %25, i64 %i.092
store i8 0, i8* %arrayidx33, align 1, !tbaa !1
- %26 = load i64** @TOP, align 8, !tbaa !3
- %arrayidx34 = getelementptr inbounds i64* %26, i64 %i.092
- %27 = load i64* %arrayidx34, align 8, !tbaa !0
+ %26 = load i64*, i64** @TOP, align 8, !tbaa !3
+ %arrayidx34 = getelementptr inbounds i64, i64* %26, i64 %i.092
+ %27 = load i64, i64* %arrayidx34, align 8, !tbaa !0
tail call fastcc void @CleanNet(i64 %27)
- %28 = load i64** @BOT, align 8, !tbaa !3
- %arrayidx35 = getelementptr inbounds i64* %28, i64 %i.092
- %29 = load i64* %arrayidx35, align 8, !tbaa !0
+ %28 = load i64*, i64** @BOT, align 8, !tbaa !3
+ %arrayidx35 = getelementptr inbounds i64, i64* %28, i64 %i.092
+ %29 = load i64, i64* %arrayidx35, align 8, !tbaa !0
tail call fastcc void @CleanNet(i64 %29)
br label %for.inc
if.else36: ; preds = %land.lhs.true28, %land.lhs.true26, %if.else24
- %30 = load i64* @channelColumns, align 8, !tbaa !0
+ %30 = load i64, i64* @channelColumns, align 8, !tbaa !0
%cmp37 = icmp ult i64 %i.092, %30
br i1 %cmp37, label %land.lhs.true38, label %if.else48
land.lhs.true38: ; preds = %if.else36
- %31 = load i64* @channelTracks, align 8, !tbaa !0
+ %31 = load i64, i64* @channelTracks, align 8, !tbaa !0
%cmp39 = icmp ult i64 %7, %31
br i1 %cmp39, label %land.lhs.true40, label %if.else48
@@ -149,16 +149,16 @@ land.lhs.true40: ; preds = %land.lhs.true38
br i1 %tobool43, label %if.else48, label %if.then44
if.then44: ; preds = %land.lhs.true40
- %32 = load i8** @mazeRoute, align 8, !tbaa !3
- %arrayidx45 = getelementptr inbounds i8* %32, i64 %i.092
+ %32 = load i8*, i8** @mazeRoute, align 8, !tbaa !3
+ %arrayidx45 = getelementptr inbounds i8, i8* %32, i64 %i.092
store i8 0, i8* %arrayidx45, align 1, !tbaa !1
- %33 = load i64** @TOP, align 8, !tbaa !3
- %arrayidx46 = getelementptr inbounds i64* %33, i64 %i.092
- %34 = load i64* %arrayidx46, align 8, !tbaa !0
+ %33 = load i64*, i64** @TOP, align 8, !tbaa !3
+ %arrayidx46 = getelementptr inbounds i64, i64* %33, i64 %i.092
+ %34 = load i64, i64* %arrayidx46, align 8, !tbaa !0
tail call fastcc void @CleanNet(i64 %34)
- %35 = load i64** @BOT, align 8, !tbaa !3
- %arrayidx47 = getelementptr inbounds i64* %35, i64 %i.092
- %36 = load i64* %arrayidx47, align 8, !tbaa !0
+ %35 = load i64*, i64** @BOT, align 8, !tbaa !3
+ %arrayidx47 = getelementptr inbounds i64, i64* %35, i64 %i.092
+ %36 = load i64, i64* %arrayidx47, align 8, !tbaa !0
tail call fastcc void @CleanNet(i64 %36)
br label %for.inc
@@ -169,7 +169,7 @@ if.else48: ; preds = %land.lhs.true40, %l
for.inc: ; preds = %if.else48, %if.then44, %if.then32, %if.then20, %if.then9, %for.body
%numLeft.1 = phi i32 [ %numLeft.091, %if.then9 ], [ %numLeft.091, %if.then20 ], [ %numLeft.091, %if.then32 ], [ %numLeft.091, %if.then44 ], [ %inc, %if.else48 ], [ %numLeft.091, %for.body ]
%inc53 = add i64 %i.092, 1
- %37 = load i64* @channelColumns, align 8, !tbaa !0
+ %37 = load i64, i64* @channelColumns, align 8, !tbaa !0
%cmp = icmp ugt i64 %inc53, %37
br i1 %cmp, label %for.end, label %for.body
diff --git a/test/CodeGen/AArch64/arm64-ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll
index 63965f9538b5..ff18f7364337 100644
--- a/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -281,9 +281,9 @@ if.end85:
sw.bb.i.i:
%ref.tr.i.i = phi %str1* [ %0, %sw.bb.i.i ], [ undef, %entry ]
- %operands.i.i = getelementptr inbounds %str1* %ref.tr.i.i, i64 0, i32 0, i32 2
+ %operands.i.i = getelementptr inbounds %str1, %str1* %ref.tr.i.i, i64 0, i32 0, i32 2
%arrayidx.i.i = bitcast i32* %operands.i.i to %str1**
- %0 = load %str1** %arrayidx.i.i, align 8
- %code1.i.i.phi.trans.insert = getelementptr inbounds %str1* %0, i64 0, i32 0, i32 0, i64 16
+ %0 = load %str1*, %str1** %arrayidx.i.i, align 8
+ %code1.i.i.phi.trans.insert = getelementptr inbounds %str1, %str1* %0, i64 0, i32 0, i32 0, i64 16
br label %sw.bb.i.i
}
diff --git a/test/CodeGen/AArch64/arm64-code-model-large-abs.ll b/test/CodeGen/AArch64/arm64-code-model-large-abs.ll
index 264da2da25bc..9f50fea370e4 100644
--- a/test/CodeGen/AArch64/arm64-code-model-large-abs.ll
+++ b/test/CodeGen/AArch64/arm64-code-model-large-abs.ll
@@ -18,7 +18,7 @@ define i8* @global_addr() {
define i8 @global_i8() {
; CHECK-LABEL: global_i8:
- %val = load i8* @var8
+ %val = load i8, i8* @var8
ret i8 %val
; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var8
; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var8
@@ -29,7 +29,7 @@ define i8 @global_i8() {
define i16 @global_i16() {
; CHECK-LABEL: global_i16:
- %val = load i16* @var16
+ %val = load i16, i16* @var16
ret i16 %val
; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var16
; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var16
@@ -40,7 +40,7 @@ define i16 @global_i16() {
define i32 @global_i32() {
; CHECK-LABEL: global_i32:
- %val = load i32* @var32
+ %val = load i32, i32* @var32
ret i32 %val
; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var32
; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var32
@@ -51,7 +51,7 @@ define i32 @global_i32() {
define i64 @global_i64() {
; CHECK-LABEL: global_i64:
- %val = load i64* @var64
+ %val = load i64, i64* @var64
ret i64 %val
; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var64
; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var64
diff --git a/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll b/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll
new file mode 100644
index 000000000000..f0b8299a66e3
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-codegen-prepare-extload.ll
@@ -0,0 +1,638 @@
+; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS
+; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS
+; RUN: opt -codegenprepare < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=DISABLE
+
+; CodeGenPrepare should move the zext into the block with the load
+; so that SelectionDAG can select it with the load.
+;
+; OPTALL-LABEL: @foo
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+; OPTALL-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; OPTALL: store i32 [[ZEXT]], i32* %q
+; OPTALL: ret
+define void @foo(i8* %p, i32* %q) {
+entry:
+ %t = load i8, i8* %p
+ %a = icmp slt i8 %t, 20
+ br i1 %a, label %true, label %false
+true:
+ %s = zext i8 %t to i32
+ store i32 %s, i32* %q
+ ret void
+false:
+ ret void
+}
+
+; Check that we manage to form a zextload is an operation with only one
+; argument to explicitly extend is in the the way.
+; OPTALL-LABEL: @promoteOneArg
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT]], 2
+; Make sure the operation is not promoted when the promotion pass is disabled.
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], 2
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteOneArg(i8* %p, i32* %q) {
+entry:
+ %t = load i8, i8* %p
+ %add = add nuw i8 %t, 2
+ %a = icmp slt i8 %t, 20
+ br i1 %a, label %true, label %false
+true:
+ %s = zext i8 %add to i32
+ store i32 %s, i32* %q
+ ret void
+false:
+ ret void
+}
+
+; Check that we manage to form a sextload is an operation with only one
+; argument to explicitly extend is in the the way.
+; Version with sext.
+; OPTALL-LABEL: @promoteOneArgSExt
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
+; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXT]], 2
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], 2
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteOneArgSExt(i8* %p, i32* %q) {
+entry:
+ %t = load i8, i8* %p
+ %add = add nsw i8 %t, 2
+ %a = icmp slt i8 %t, 20
+ br i1 %a, label %true, label %false
+true:
+ %s = sext i8 %add to i32
+ store i32 %s, i32* %q
+ ret void
+false:
+ ret void
+}
+
+; Check that we manage to form a zextload is an operation with two
+; arguments to explicitly extend is in the the way.
+; Extending %add will create two extensions:
+; 1. One for %b.
+; 2. One for %t.
+; #1 will not be removed as we do not know anything about %b.
+; #2 may not be merged with the load because %t is used in a comparison.
+; Since two extensions may be emitted in the end instead of one before the
+; transformation, the regular heuristic does not apply the optimization.
+;
+; OPTALL-LABEL: @promoteTwoArgZext
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
+; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
+;
+; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
+; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
+;
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
+;
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteTwoArgZext(i8* %p, i32* %q, i8 %b) {
+entry:
+ %t = load i8, i8* %p
+ %add = add nuw i8 %t, %b
+ %a = icmp slt i8 %t, 20
+ br i1 %a, label %true, label %false
+true:
+ %s = zext i8 %add to i32
+ store i32 %s, i32* %q
+ ret void
+false:
+ ret void
+}
+
+; Check that we manage to form a sextload is an operation with two
+; arguments to explicitly extend is in the the way.
+; Version with sext.
+; OPTALL-LABEL: @promoteTwoArgSExt
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; STRESS-NEXT: [[SEXTLD:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
+; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i8 %b to i32
+; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXTLD]], [[SEXTB]]
+;
+; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
+; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
+;
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteTwoArgSExt(i8* %p, i32* %q, i8 %b) {
+entry:
+ %t = load i8, i8* %p
+ %add = add nsw i8 %t, %b
+ %a = icmp slt i8 %t, 20
+ br i1 %a, label %true, label %false
+true:
+ %s = sext i8 %add to i32
+ store i32 %s, i32* %q
+ ret void
+false:
+ ret void
+}
+
+; Check that we do not a zextload if we need to introduce more than
+; one additional extension.
+; OPTALL-LABEL: @promoteThreeArgZext
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
+; STRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
+; STRESS-NEXT: [[ZEXTC:%[a-zA-Z_0-9-]+]] = zext i8 %c to i32
+; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[TMP]], [[ZEXTC]]
+;
+; NONSTRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
+; NONSTRESS-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[TMP]], %c
+; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
+;
+; DISABLE: add nuw i8
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
+;
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteThreeArgZext(i8* %p, i32* %q, i8 %b, i8 %c) {
+entry:
+ %t = load i8, i8* %p
+ %tmp = add nuw i8 %t, %b
+ %add = add nuw i8 %tmp, %c
+ %a = icmp slt i8 %t, 20
+ br i1 %a, label %true, label %false
+true:
+ %s = zext i8 %add to i32
+ store i32 %s, i32* %q
+ ret void
+false:
+ ret void
+}
+
+; Check that we manage to form a zextload after promoting and merging
+; two extensions.
+; OPTALL-LABEL: @promoteMergeExtArgZExt
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i16 %b to i32
+; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
+;
+; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
+; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
+; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
+;
+; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
+;
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteMergeExtArgZExt(i8* %p, i32* %q, i16 %b) {
+entry:
+ %t = load i8, i8* %p
+ %ext = zext i8 %t to i16
+ %add = add nuw i16 %ext, %b
+ %a = icmp slt i8 %t, 20
+ br i1 %a, label %true, label %false
+true:
+ %s = zext i16 %add to i32
+ store i32 %s, i32* %q
+ ret void
+false:
+ ret void
+}
+
+; Check that we manage to form a sextload after promoting and merging
+; two extensions.
+; Version with sext.
+; OPTALL-LABEL: @promoteMergeExtArgSExt
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = sext i16 %b to i32
+; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXTLD]], [[ZEXTB]]
+;
+; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
+; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
+; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
+;
+; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteMergeExtArgSExt(i8* %p, i32* %q, i16 %b) {
+entry:
+ %t = load i8, i8* %p
+ %ext = zext i8 %t to i16
+ %add = add nsw i16 %ext, %b
+ %a = icmp slt i8 %t, 20
+ br i1 %a, label %true, label %false
+true:
+ %s = sext i16 %add to i32
+ store i32 %s, i32* %q
+ ret void
+false:
+ ret void
+}
+
+; Check that we manage to catch all the extload opportunities that are exposed
+; by the different iterations of codegen prepare.
+; Moreover, check that we do not promote more than we need to.
+; Here is what is happening in this test (not necessarly in this order):
+; 1. We try to promote the operand of %sextadd.
+; a. This creates one sext of %ld2 and one of %zextld
+; b. The sext of %ld2 can be combine with %ld2, so we remove one sext but
+; introduced one. This is fine with the current heuristic: neutral.
+; => We have one zext of %zextld left and we created one sext of %ld2.
+; 2. We try to promote the operand of %sextaddza.
+; a. This creates one sext of %zexta and one of %zextld
+; b. The sext of %zexta does not lead to any load, it stays here, even if it
+; could have been combine with the zext of %a.
+; c. The sext of %zextld leads to %ld and can be combined with it. This is
+; done by promoting %zextld. This is fine with the current heuristic:
+; neutral.
+; => We have created a new zext of %ld and we created one sext of %zexta.
+; 3. We try to promote the operand of %sextaddb.
+; a. This creates one sext of %b and one of %zextld
+; b. The sext of %b is a dead-end, nothing to be done.
+; c. Same thing as 2.c. happens.
+; => We have created a new zext of %ld and we created one sext of %b.
+; 4. We try to promote the operand of the zext of %zextld introduced in #1.
+; a. Same thing as 2.c. happens.
+; b. %zextld does not have any other uses. It is dead coded.
+; => We have created a new zext of %ld and we removed a zext of %zextld and
+; a zext of %ld.
+; Currently we do not try to reuse existing extensions, so in the end we have
+; 3 identical zext of %ld. The extensions will be CSE'ed by SDag.
+;
+; OPTALL-LABEL: @severalPromotions
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %addr1
+; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32, i32* %addr2
+; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64
+; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_1]]
+; We do not combine this one: see 2.b.
+; OPT-NEXT: [[ZEXTA:%[a-zA-Z_0-9-]+]] = zext i8 %a to i32
+; OPT-NEXT: [[SEXTZEXTA:%[a-zA-Z_0-9-]+]] = sext i32 [[ZEXTA]] to i64
+; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTZEXTA]], [[ZEXTLD1_3]]
+; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
+; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_2]]
+;
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64
+; DISABLE: [[ADDZA:%[a-zA-Z_0-9-]+]] = add nsw i32
+; DISABLE: [[RESZA:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDZA]] to i64
+; DISABLE: [[ADDB:%[a-zA-Z_0-9-]+]] = add nsw i32
+; DISABLE: [[RESB:%[a-zA-Z_0-9-]+]] = sext i32 [[ADDB]] to i64
+;
+; OPTALL: call void @dummy(i64 [[RES]], i64 [[RESZA]], i64 [[RESB]])
+; OPTALL: ret
+define void @severalPromotions(i8* %addr1, i32* %addr2, i8 %a, i32 %b) {
+ %ld = load i8, i8* %addr1
+ %zextld = zext i8 %ld to i32
+ %ld2 = load i32, i32* %addr2
+ %add = add nsw i32 %ld2, %zextld
+ %sextadd = sext i32 %add to i64
+ %zexta = zext i8 %a to i32
+ %addza = add nsw i32 %zexta, %zextld
+ %sextaddza = sext i32 %addza to i64
+ %addb = add nsw i32 %b, %zextld
+ %sextaddb = sext i32 %addb to i64
+ call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb)
+ ret void
+}
+
+declare void @dummy(i64, i64, i64)
+
+; Make sure we do not try to promote vector types since the type promotion
+; helper does not support them for now.
+; OPTALL-LABEL: @vectorPromotion
+; OPTALL: [[SHL:%[a-zA-Z_0-9-]+]] = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8>
+; OPTALL: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext <2 x i32> [[SHL]] to <2 x i64>
+; OPTALL: ret
+define void @vectorPromotion() {
+entry:
+ %a = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8>
+ %b = zext <2 x i32> %a to <2 x i64>
+ ret void
+}
+
+@a = common global i32 0, align 4
+@c = common global [2 x i32] zeroinitializer, align 4
+
+; Make sure we support promotion of operands that produces a Value as opposed
+; to an instruction.
+; This used to cause a crash.
+; OPTALL-LABEL: @promotionOfArgEndsUpInValue
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i16, i16* %addr
+;
+; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i16 [[LD]] to i32
+; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw nsw i32 [[SEXT]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i32)
+;
+; DISABLE-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw nsw i16 [[LD]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
+; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
+;
+; OPTALL-NEXT: ret i32 [[RES]]
+define i32 @promotionOfArgEndsUpInValue(i16* %addr) {
+entry:
+ %val = load i16, i16* %addr
+ %add = add nuw nsw i16 %val, zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
+ %conv3 = sext i16 %add to i32
+ ret i32 %conv3
+}
+
+; Check that we see that one zext can be derived from the other for free.
+; OPTALL-LABEL: @promoteTwoArgZextWithSourceExtendedTwice
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
+; OPT-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], 12
+; OPT-NEXT: store i32 [[RES32]], i32* %addr
+; OPT-NEXT: store i64 [[RES64]], i64* %q
+;
+; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
+; DISABLE-NEXT: [[RES2_32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], 12
+; DISABLE-NEXT: store i32 [[RES32]], i32* %addr
+; DISABLE-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES2_32]] to i64
+; DISABLE-NEXT: store i64 [[ZEXT64]], i64* %q
+;
+; OPTALL-NEXT: ret void
+define void @promoteTwoArgZextWithSourceExtendedTwice(i8* %p, i64* %q, i32 %b, i32* %addr) {
+entry:
+ %t = load i8, i8* %p
+ %zextt = zext i8 %t to i32
+ %add = add nuw i32 %zextt, %b
+ %add2 = add nuw i32 %zextt, 12
+ store i32 %add, i32 *%addr
+ %s = zext i32 %add2 to i64
+ store i64 %s, i64* %q
+ ret void
+}
+
+; Check that we do not increase the cost of the code.
+; The input has one free zext and one free sext. If we would have promoted
+; all the way through the load we would end up with a free zext and a
+; non-free sext (of %b).
+; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
+; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
+; STRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32
+;
+; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
+; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
+;
+; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
+; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
+;
+; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]]
+; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]]
+; OPTALL-NEXT: ret void
+define void @doNotPromoteFreeSExtFromAddrMode(i8* %p, i32 %b, i32* %addr) {
+entry:
+ %t = load i8, i8* %p
+ %zextt = zext i8 %t to i32
+ %add = add nsw i32 %zextt, %b
+ %idx64 = sext i32 %add to i64
+ %staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64
+ store i32 %add, i32 *%staddr
+ ret void
+}
+
+; Check that we do not increase the cost of the code.
+; The input has one free zext and one free sext. If we would have promoted
+; all the way through the load we would end up with a free zext and a
+; non-free sext (of %b).
+; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode64
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
+; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
+;
+; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
+; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
+;
+; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
+; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
+;
+; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i64, i64* %addr, i64 [[IDX64]]
+; OPTALL-NEXT: store i64 %stuff, i64* [[GEP]]
+; OPTALL-NEXT: ret void
+define void @doNotPromoteFreeSExtFromAddrMode64(i8* %p, i32 %b, i64* %addr, i64 %stuff) {
+entry:
+ %t = load i8, i8* %p
+ %zextt = zext i8 %t to i32
+ %add = add nsw i32 %zextt, %b
+ %idx64 = sext i32 %add to i64
+ %staddr = getelementptr inbounds i64, i64* %addr, i64 %idx64
+ store i64 %stuff, i64 *%staddr
+ ret void
+}
+
+; Check that we do not increase the cost of the code.
+; The input has one free zext and one free sext. If we would have promoted
+; all the way through the load we would end up with a free zext and a
+; non-free sext (of %b).
+; OPTALL-LABEL: @doNotPromoteFreeSExtFromAddrMode128
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
+; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
+;
+; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
+; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
+;
+; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
+; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
+;
+; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i128, i128* %addr, i64 [[IDX64]]
+; OPTALL-NEXT: store i128 %stuff, i128* [[GEP]]
+; OPTALL-NEXT: ret void
+define void @doNotPromoteFreeSExtFromAddrMode128(i8* %p, i32 %b, i128* %addr, i128 %stuff) {
+entry:
+ %t = load i8, i8* %p
+ %zextt = zext i8 %t to i32
+ %add = add nsw i32 %zextt, %b
+ %idx64 = sext i32 %add to i64
+ %staddr = getelementptr inbounds i128, i128* %addr, i64 %idx64
+ store i128 %stuff, i128 *%staddr
+ ret void
+}
+
+
+; Check that we do not increase the cost of the code.
+; The input has one free zext and one free sext. If we would have promoted
+; all the way through the load we would end up with a free zext and a
+; non-free sext (of %b).
+; OPTALL-LABEL: @promoteSExtFromAddrMode256
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
+; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
+;
+; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
+; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
+;
+; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i256, i256* %addr, i64 [[IDX64]]
+; OPTALL-NEXT: store i256 %stuff, i256* [[GEP]]
+; OPTALL-NEXT: ret void
+define void @promoteSExtFromAddrMode256(i8* %p, i32 %b, i256* %addr, i256 %stuff) {
+entry:
+ %t = load i8, i8* %p
+ %zextt = zext i8 %t to i32
+ %add = add nsw i32 %zextt, %b
+ %idx64 = sext i32 %add to i64
+ %staddr = getelementptr inbounds i256, i256* %addr, i64 %idx64
+ store i256 %stuff, i256 *%staddr
+ ret void
+}
+
+; Check that we do not increase the cost of the code.
+; The input has one free zext and one free zext.
+; When we promote all the way through the load, we end up with
+; a free zext and a non-free zext (of %b).
+; However, the current target lowering says zext i32 to i64 is free
+; so the promotion happens because the cost did not change and may
+; expose more opportunities.
+; This would need to be fixed at some point.
+; OPTALL-LABEL: @doNotPromoteFreeZExtFromAddrMode
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; This transformation should really happen only for stress mode.
+; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64
+; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]]
+; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = trunc i64 [[IDX64]] to i32
+;
+; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
+; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64
+;
+; OPTALL-NEXT: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %addr, i64 [[IDX64]]
+; OPTALL-NEXT: store i32 [[RES32]], i32* [[GEP]]
+; OPTALL-NEXT: ret void
+define void @doNotPromoteFreeZExtFromAddrMode(i8* %p, i32 %b, i32* %addr) {
+entry:
+ %t = load i8, i8* %p
+ %zextt = zext i8 %t to i32
+ %add = add nuw i32 %zextt, %b
+ %idx64 = zext i32 %add to i64
+ %staddr = getelementptr inbounds i32, i32* %addr, i64 %idx64
+ store i32 %add, i32 *%staddr
+ ret void
+}
+
+; OPTALL-LABEL: @doNotPromoteFreeSExtFromShift
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; STRESS-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
+; STRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ZEXT64]], [[SEXTB]]
+;
+; NONSTRESS-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; NONSTRESS-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
+; NONSTRESS-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
+;
+; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXT32]], %b
+; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = sext i32 [[RES32]] to i64
+;
+; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12
+; OPTALL-NEXT: ret i64 %staddr
+define i64 @doNotPromoteFreeSExtFromShift(i8* %p, i32 %b) {
+entry:
+ %t = load i8, i8* %p
+ %zextt = zext i8 %t to i32
+ %add = add nsw i32 %zextt, %b
+ %idx64 = sext i32 %add to i64
+ %staddr = shl i64 %idx64, 12
+ ret i64 %staddr
+}
+
+; Same comment as doNotPromoteFreeZExtFromAddrMode.
+; OPTALL-LABEL: @doNotPromoteFreeZExtFromShift
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+;
+; This transformation should really happen only for stress mode.
+; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i32 %b to i64
+; OPT-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], [[ZEXTB]]
+;
+; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
+; DISABLE-NEXT: [[IDX64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES32]] to i64
+;
+; OPTALL-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = shl i64 [[IDX64]], 12
+; OPTALL-NEXT: ret i64 %staddr
+define i64 @doNotPromoteFreeZExtFromShift(i8* %p, i32 %b) {
+entry:
+ %t = load i8, i8* %p
+ %zextt = zext i8 %t to i32
+ %add = add nuw i32 %zextt, %b
+ %idx64 = zext i32 %add to i64
+ %staddr = shl i64 %idx64, 12
+ ret i64 %staddr
+}
+
+; The input has one free zext and one non-free sext.
+; When we promote all the way through to the load, we end up with
+; a free zext, a free sext (%ld1), and a non-free sext (of %cst).
+; However, we when generate load pair and the free sext(%ld1) becomes
+; non-free. So technically, we trade a non-free sext to two non-free
+; sext.
+; This would need to be fixed at some point.
+; OPTALL-LABEL: @doNotPromoteBecauseOfPairedLoad
+; OPTALL: [[LD0:%[a-zA-Z_0-9-]+]] = load i32, i32* %p
+; OPTALL: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i32, i32* %p, i64 1
+; OPTALL: [[LD1:%[a-zA-Z_0-9-]+]] = load i32, i32* [[GEP]]
+;
+; This transformation should really happen only for stress mode.
+; OPT-NEXT: [[SEXTLD1:%[a-zA-Z_0-9-]+]] = sext i32 [[LD1]] to i64
+; OPT-NEXT: [[SEXTCST:%[a-zA-Z_0-9-]+]] = sext i32 %cst to i64
+; OPT-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD1]], [[SEXTCST]]
+;
+; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[LD1]], %cst
+; DISABLE-NEXT: [[SEXTRES:%[a-zA-Z_0-9-]+]] = sext i32 [[RES]] to i64
+;
+; OPTALL-NEXT: [[ZEXTLD0:%[a-zA-Z_0-9-]+]] = zext i32 [[LD0]] to i64
+; OPTALL-NEXT: [[FINAL:%[a-zA-Z_0-9-]+]] = add i64 [[SEXTRES]], [[ZEXTLD0]]
+; OPTALL-NEXT: ret i64 [[FINAL]]
+define i64 @doNotPromoteBecauseOfPairedLoad(i32* %p, i32 %cst) {
+ %ld0 = load i32, i32* %p
+ %idxLd1 = getelementptr inbounds i32, i32* %p, i64 1
+ %ld1 = load i32, i32* %idxLd1
+ %res = add nsw i32 %ld1, %cst
+ %sextres = sext i32 %res to i64
+ %zextLd0 = zext i32 %ld0 to i64
+ %final = add i64 %sextres, %zextLd0
+ ret i64 %final
+}
diff --git a/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll b/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll
index 81cee38420aa..e34ef39bcfec 100644
--- a/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll
+++ b/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll
@@ -22,13 +22,13 @@ define void @foo() {
entry:
br label %if.then83
if.then83: ; preds = %if.end81
- %tmp = load %"class.H4ISP::H4ISPDevice"** @pH4ISPDevice, align 8
+ %tmp = load %"class.H4ISP::H4ISPDevice"*, %"class.H4ISP::H4ISPDevice"** @pH4ISPDevice, align 8
%call84 = call i32 @_ZN5H4ISP11H4ISPDevice32ISP_SelectBestMIPIFrequencyIndexEjPj(%"class.H4ISP::H4ISPDevice"* %tmp) #19
tail call void asm sideeffect "", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27}"()
- %tmp2 = load %"class.H4ISP::H4ISPDevice"** @pH4ISPDevice, align 8
+ %tmp2 = load %"class.H4ISP::H4ISPDevice"*, %"class.H4ISP::H4ISPDevice"** @pH4ISPDevice, align 8
tail call void asm sideeffect "", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x28}"()
- %pCameraManager.i268 = getelementptr inbounds %"class.H4ISP::H4ISPDevice"* %tmp2, i64 0, i32 3
- %tmp3 = load %"class.H4ISP::H4ISPCameraManager"** %pCameraManager.i268, align 8
+ %pCameraManager.i268 = getelementptr inbounds %"class.H4ISP::H4ISPDevice", %"class.H4ISP::H4ISPDevice"* %tmp2, i64 0, i32 3
+ %tmp3 = load %"class.H4ISP::H4ISPCameraManager"*, %"class.H4ISP::H4ISPCameraManager"** %pCameraManager.i268, align 8
%tobool.i269 = icmp eq %"class.H4ISP::H4ISPCameraManager"* %tmp3, null
br i1 %tobool.i269, label %if.then83, label %end
end:
diff --git a/test/CodeGen/AArch64/arm64-collect-loh-str.ll b/test/CodeGen/AArch64/arm64-collect-loh-str.ll
index d7bc00e318f7..8889cb4bf52a 100644
--- a/test/CodeGen/AArch64/arm64-collect-loh-str.ll
+++ b/test/CodeGen/AArch64/arm64-collect-loh-str.ll
@@ -15,8 +15,8 @@
; CHECK-NOT: AdrpAddStr
define i32 @pptp_wan_init() {
entry:
- store i32* null, i32** getelementptr inbounds (%struct.anon* @pptp_wan_head, i64 0, i32 0), align 8
- store i32** getelementptr inbounds (%struct.anon* @pptp_wan_head, i64 0, i32 0), i32*** getelementptr inbounds (%struct.anon* @pptp_wan_head, i64 0, i32 1), align 8
+ store i32* null, i32** getelementptr inbounds (%struct.anon, %struct.anon* @pptp_wan_head, i64 0, i32 0), align 8
+ store i32** getelementptr inbounds (%struct.anon, %struct.anon* @pptp_wan_head, i64 0, i32 0), i32*** getelementptr inbounds (%struct.anon, %struct.anon* @pptp_wan_head, i64 0, i32 1), align 8
ret i32 0
}
diff --git a/test/CodeGen/AArch64/arm64-collect-loh.ll b/test/CodeGen/AArch64/arm64-collect-loh.ll
index 6d73daac6209..c0aa63cc4331 100644
--- a/test/CodeGen/AArch64/arm64-collect-loh.ll
+++ b/test/CodeGen/AArch64/arm64-collect-loh.ll
@@ -12,7 +12,7 @@
; Function Attrs: noinline nounwind ssp
define void @foo(i32 %t) {
entry:
- %tmp = load i32* @a, align 4
+ %tmp = load i32, i32* @a, align 4
%add = add nsw i32 %tmp, %t
store i32 %add, i32* @a, align 4
ret void
@@ -32,22 +32,22 @@ entry:
br i1 %cmp, label %if.then, label %if.end4
if.then: ; preds = %entry
- %tmp = load i32* @a, align 4
+ %tmp = load i32, i32* @a, align 4
%add = add nsw i32 %tmp, %t
%cmp1 = icmp sgt i32 %add, 12
br i1 %cmp1, label %if.then2, label %if.end4
if.then2: ; preds = %if.then
tail call void @foo(i32 %add)
- %tmp1 = load i32* @a, align 4
+ %tmp1 = load i32, i32* @a, align 4
br label %if.end4
if.end4: ; preds = %if.then2, %if.then, %entry
%t.addr.0 = phi i32 [ %tmp1, %if.then2 ], [ %t, %if.then ], [ %t, %entry ]
- %tmp2 = load i32* @b, align 4
+ %tmp2 = load i32, i32* @b, align 4
%add5 = add nsw i32 %tmp2, %t.addr.0
tail call void @foo(i32 %add5)
- %tmp3 = load i32* @b, align 4
+ %tmp3 = load i32, i32* @b, align 4
%add6 = add nsw i32 %tmp3, %t.addr.0
ret i32 %add6
}
diff --git a/test/CodeGen/AArch64/arm64-complex-copy-noneon.ll b/test/CodeGen/AArch64/arm64-complex-copy-noneon.ll
index f65b11612828..938bc62808f5 100644
--- a/test/CodeGen/AArch64/arm64-complex-copy-noneon.ll
+++ b/test/CodeGen/AArch64/arm64-complex-copy-noneon.ll
@@ -8,13 +8,13 @@ define void @store_combine() nounwind {
%src = alloca { double, double }, align 8
%dst = alloca { double, double }, align 8
- %src.realp = getelementptr inbounds { double, double }* %src, i32 0, i32 0
- %src.real = load double* %src.realp
- %src.imagp = getelementptr inbounds { double, double }* %src, i32 0, i32 1
- %src.imag = load double* %src.imagp
+ %src.realp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 0
+ %src.real = load double, double* %src.realp
+ %src.imagp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 1
+ %src.imag = load double, double* %src.imagp
- %dst.realp = getelementptr inbounds { double, double }* %dst, i32 0, i32 0
- %dst.imagp = getelementptr inbounds { double, double }* %dst, i32 0, i32 1
+ %dst.realp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 0
+ %dst.imagp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 1
store double %src.real, double* %dst.realp
store double %src.imag, double* %dst.imagp
ret void
diff --git a/test/CodeGen/AArch64/arm64-const-addr.ll b/test/CodeGen/AArch64/arm64-const-addr.ll
index c55a9226cc7a..ffc153344d3a 100644
--- a/test/CodeGen/AArch64/arm64-const-addr.ll
+++ b/test/CodeGen/AArch64/arm64-const-addr.ll
@@ -10,13 +10,13 @@ define i32 @test1() nounwind {
; CHECK-NEXT: ldp w9, w10, [x8, #4]
; CHECK: ldr w8, [x8, #12]
%at = inttoptr i64 68141056 to %T*
- %o1 = getelementptr %T* %at, i32 0, i32 1
- %t1 = load i32* %o1
- %o2 = getelementptr %T* %at, i32 0, i32 2
- %t2 = load i32* %o2
+ %o1 = getelementptr %T, %T* %at, i32 0, i32 1
+ %t1 = load i32, i32* %o1
+ %o2 = getelementptr %T, %T* %at, i32 0, i32 2
+ %t2 = load i32, i32* %o2
%a1 = add i32 %t1, %t2
- %o3 = getelementptr %T* %at, i32 0, i32 3
- %t3 = load i32* %o3
+ %o3 = getelementptr %T, %T* %at, i32 0, i32 3
+ %t3 = load i32, i32* %o3
%a2 = add i32 %a1, %t3
ret i32 %a2
}
diff --git a/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
index 7123e5e0b235..b8da39910312 100644
--- a/test/CodeGen/AArch64/arm64-convert-v4f64.ll
+++ b/test/CodeGen/AArch64/arm64-convert-v4f64.ll
@@ -3,31 +3,64 @@
define <4 x i16> @fptosi_v4f64_to_v4i16(<4 x double>* %ptr) {
; CHECK: fptosi_v4f64_to_v4i16
-; CHECK-DAG: fcvtzs v[[LHS:[0-9]+]].2d, v1.2d
-; CHECK-DAG: fcvtzs v[[RHS:[0-9]+]].2d, v0.2d
-; CHECK-DAG: xtn v[[LHS_NA:[0-9]+]].2s, v[[LHS]].2d
-; CHECK-DAG: xtn v[[RHS_NA:[0-9]+]].2s, v[[RHS]].2d
-; CHECK: uzp1 v0.4h, v[[RHS_NA]].4h, v[[LHS_NA]].4h
- %tmp1 = load <4 x double>* %ptr
+; CHECK-DAG: fcvtzs v[[LHS:[0-9]+]].2d, v0.2d
+; CHECK-DAG: fcvtzs v[[RHS:[0-9]+]].2d, v1.2d
+; CHECK-DAG: xtn v[[MID:[0-9]+]].2s, v[[LHS]].2d
+; CHECK-DAG: xtn2 v[[MID]].4s, v[[RHS]].2d
+; CHECK: xtn v0.4h, v[[MID]].4s
+ %tmp1 = load <4 x double>, <4 x double>* %ptr
%tmp2 = fptosi <4 x double> %tmp1 to <4 x i16>
ret <4 x i16> %tmp2
}
define <8 x i8> @fptosi_v4f64_to_v4i8(<8 x double>* %ptr) {
; CHECK: fptosi_v4f64_to_v4i8
-; CHECK-DAG: fcvtzs v[[CONV3:[0-9]+]].2d, v3.2d
-; CHECK-DAG: fcvtzs v[[CONV2:[0-9]+]].2d, v2.2d
-; CHECK-DAG: fcvtzs v[[CONV1:[0-9]+]].2d, v1.2d
; CHECK-DAG: fcvtzs v[[CONV0:[0-9]+]].2d, v0.2d
-; CHECK-DAG: xtn v[[NA3:[0-9]+]].2s, v[[CONV3]].2d
+; CHECK-DAG: fcvtzs v[[CONV1:[0-9]+]].2d, v1.2d
+; CHECK-DAG: fcvtzs v[[CONV2:[0-9]+]].2d, v2.2d
+; CHECK-DAG: fcvtzs v[[CONV3:[0-9]+]].2d, v3.2d
; CHECK-DAG: xtn v[[NA2:[0-9]+]].2s, v[[CONV2]].2d
-; CHECK-DAG: xtn v[[NA1:[0-9]+]].2s, v[[CONV1]].2d
+; CHECK-DAG: xtn2 v[[NA2]].4s, v[[CONV3]].2d
; CHECK-DAG: xtn v[[NA0:[0-9]+]].2s, v[[CONV0]].2d
-; CHECK-DAG: uzp1 v[[TMP1:[0-9]+]].4h, v[[CONV2]].4h, v[[CONV3]].4h
-; CHECK-DAG: uzp1 v[[TMP2:[0-9]+]].4h, v[[CONV0]].4h, v[[CONV1]].4h
-; CHECK: uzp1 v0.8b, v[[TMP2]].8b, v[[TMP1]].8b
- %tmp1 = load <8 x double>* %ptr
+; CHECK-DAG: xtn2 v[[NA0]].4s, v[[CONV1]].2d
+; CHECK-DAG: xtn v[[TMP1:[0-9]+]].4h, v[[NA0]].4s
+; CHECK-DAG: xtn2 v[[TMP1]].8h, v[[NA2]].4s
+; CHECK: xtn v0.8b, v[[TMP1]].8h
+ %tmp1 = load <8 x double>, <8 x double>* %ptr
%tmp2 = fptosi <8 x double> %tmp1 to <8 x i8>
ret <8 x i8> %tmp2
}
+define <4 x half> @uitofp_v4i64_to_v4f16(<4 x i64>* %ptr) {
+; CHECK: uitofp_v4i64_to_v4f16
+; CHECK-DAG: ucvtf v[[LHS:[0-9]+]].2d, v0.2d
+; CHECK-DAG: ucvtf v[[RHS:[0-9]+]].2d, v1.2d
+; CHECK-DAG: fcvtn v[[MID:[0-9]+]].2s, v[[LHS]].2d
+; CHECK-DAG: fcvtn2 v[[MID]].4s, v[[RHS]].2d
+; CHECK: fcvtn v0.4h, v[[MID]].4s
+ %tmp1 = load <4 x i64>, <4 x i64>* %ptr
+ %tmp2 = uitofp <4 x i64> %tmp1 to <4 x half>
+ ret <4 x half> %tmp2
+}
+
+define <4 x i16> @trunc_v4i64_to_v4i16(<4 x i64>* %ptr) {
+; CHECK: trunc_v4i64_to_v4i16
+; CHECK: xtn
+; CHECK: xtn2
+; CHECK: xtn
+ %tmp1 = load <4 x i64>, <4 x i64>* %ptr
+ %tmp2 = trunc <4 x i64> %tmp1 to <4 x i16>
+ ret <4 x i16> %tmp2
+}
+
+define <4 x i16> @fptoui_v4f64_to_v4i16(<4 x double>* %ptr) {
+; CHECK: fptoui_v4f64_to_v4i16
+; CHECK-DAG: fcvtzu v[[LHS:[0-9]+]].2d, v0.2d
+; CHECK-DAG: fcvtzu v[[RHS:[0-9]+]].2d, v1.2d
+; CHECK-DAG: xtn v[[MID:[0-9]+]].2s, v[[LHS]].2d
+; CHECK-DAG: xtn2 v[[MID]].4s, v[[RHS]].2d
+; CHECK: xtn v0.4h, v[[MID]].4s
+ %tmp1 = load <4 x double>, <4 x double>* %ptr
+ %tmp2 = fptoui <4 x double> %tmp1 to <4 x i16>
+ ret <4 x i16> %tmp2
+}
diff --git a/test/CodeGen/AArch64/arm64-cse.ll b/test/CodeGen/AArch64/arm64-cse.ll
index b74ece8d288b..8d4bf5dbeb75 100644
--- a/test/CodeGen/AArch64/arm64-cse.ll
+++ b/test/CodeGen/AArch64/arm64-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 < %s -aarch64-atomic-cfg-tidy=0 -aarch64-gep-opt=false | FileCheck %s
+; RUN: llc -O3 < %s -aarch64-atomic-cfg-tidy=0 -aarch64-gep-opt=false -verify-machineinstrs | FileCheck %s
target triple = "arm64-apple-ios"
; rdar://12462006
@@ -15,7 +15,7 @@ entry:
; CHECK: sub
; CHECK-NOT: sub
; CHECK: ret
- %0 = load i32* %offset, align 4
+ %0 = load i32, i32* %offset, align 4
%cmp = icmp slt i32 %0, %size
%s = sub nsw i32 %0, %size
br i1 %cmp, label %return, label %if.end
@@ -25,7 +25,7 @@ if.end:
%s2 = sub nsw i32 %s, %size
%s3 = sub nsw i32 %sub, %s2
store i32 %s3, i32* %offset, align 4
- %add.ptr = getelementptr inbounds i8* %base, i32 %sub
+ %add.ptr = getelementptr inbounds i8, i8* %base, i32 %sub
br label %return
return:
@@ -43,14 +43,14 @@ entry:
; CHECK: b.lt
; CHECK-NOT: sub
; CHECK: ret
- %0 = load i32* %offset, align 4
+ %0 = load i32, i32* %offset, align 4
%cmp = icmp slt i32 %0, 1
br i1 %cmp, label %return, label %if.end
if.end:
%sub = sub nsw i32 %0, 1
store i32 %sub, i32* %offset, align 4
- %add.ptr = getelementptr inbounds i8* %base, i32 %sub
+ %add.ptr = getelementptr inbounds i8, i8* %base, i32 %sub
br label %return
return:
diff --git a/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll b/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll
index 2eb6307b201b..37f3504be935 100644
--- a/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll
+++ b/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll
@@ -17,9 +17,9 @@ target triple = "arm64-apple-ios"
; CHECK-NOT: str
define void @test(%"struct.SU"* nocapture %su) {
entry:
- %r1 = getelementptr inbounds %"struct.SU"* %su, i64 1, i32 5
+ %r1 = getelementptr inbounds %"struct.SU", %"struct.SU"* %su, i64 1, i32 5
%r2 = bitcast %"struct.BO"* %r1 to i48*
- %r3 = load i48* %r2, align 8
+ %r3 = load i48, i48* %r2, align 8
%r4 = and i48 %r3, -4294967296
%r5 = or i48 0, %r4
store i48 %r5, i48* %r2, align 8
diff --git a/test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll b/test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll
index 0679014e59ae..09483ea09bd3 100644
--- a/test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll
+++ b/test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll
@@ -14,22 +14,22 @@
; CHECK: ret
define void @test(%class.Complex* nocapture %out, i64 %out_start) {
entry:
- %arrayidx = getelementptr inbounds %class.Complex* %out, i64 %out_start
+ %arrayidx = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %out_start
%0 = bitcast %class.Complex* %arrayidx to i64*
- %1 = load i64* %0, align 4
+ %1 = load i64, i64* %0, align 4
%t0.sroa.0.0.extract.trunc = trunc i64 %1 to i32
%2 = bitcast i32 %t0.sroa.0.0.extract.trunc to float
%t0.sroa.2.0.extract.shift = lshr i64 %1, 32
%t0.sroa.2.0.extract.trunc = trunc i64 %t0.sroa.2.0.extract.shift to i32
%3 = bitcast i32 %t0.sroa.2.0.extract.trunc to float
%add = add i64 %out_start, 8
- %arrayidx2 = getelementptr inbounds %class.Complex* %out, i64 %add
- %i.i = getelementptr inbounds %class.Complex* %arrayidx2, i64 0, i32 0
- %4 = load float* %i.i, align 4
+ %arrayidx2 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %add
+ %i.i = getelementptr inbounds %class.Complex, %class.Complex* %arrayidx2, i64 0, i32 0
+ %4 = load float, float* %i.i, align 4
%add.i = fadd float %4, %2
%retval.sroa.0.0.vec.insert.i = insertelement <2 x float> undef, float %add.i, i32 0
- %r.i = getelementptr inbounds %class.Complex* %arrayidx2, i64 0, i32 1
- %5 = load float* %r.i, align 4
+ %r.i = getelementptr inbounds %class.Complex, %class.Complex* %arrayidx2, i64 0, i32 1
+ %5 = load float, float* %r.i, align 4
%add5.i = fadd float %5, %3
%retval.sroa.0.4.vec.insert.i = insertelement <2 x float> %retval.sroa.0.0.vec.insert.i, float %add5.i, i32 1
%ref.tmp.sroa.0.0.cast = bitcast %class.Complex* %arrayidx to <2 x float>*
@@ -46,22 +46,22 @@ entry:
; CHECK: ret
define void @test_int(%class.Complex_int* nocapture %out, i64 %out_start) {
entry:
- %arrayidx = getelementptr inbounds %class.Complex_int* %out, i64 %out_start
+ %arrayidx = getelementptr inbounds %class.Complex_int, %class.Complex_int* %out, i64 %out_start
%0 = bitcast %class.Complex_int* %arrayidx to i64*
- %1 = load i64* %0, align 4
+ %1 = load i64, i64* %0, align 4
%t0.sroa.0.0.extract.trunc = trunc i64 %1 to i32
%2 = bitcast i32 %t0.sroa.0.0.extract.trunc to i32
%t0.sroa.2.0.extract.shift = lshr i64 %1, 32
%t0.sroa.2.0.extract.trunc = trunc i64 %t0.sroa.2.0.extract.shift to i32
%3 = bitcast i32 %t0.sroa.2.0.extract.trunc to i32
%add = add i64 %out_start, 8
- %arrayidx2 = getelementptr inbounds %class.Complex_int* %out, i64 %add
- %i.i = getelementptr inbounds %class.Complex_int* %arrayidx2, i64 0, i32 0
- %4 = load i32* %i.i, align 4
+ %arrayidx2 = getelementptr inbounds %class.Complex_int, %class.Complex_int* %out, i64 %add
+ %i.i = getelementptr inbounds %class.Complex_int, %class.Complex_int* %arrayidx2, i64 0, i32 0
+ %4 = load i32, i32* %i.i, align 4
%add.i = add i32 %4, %2
%retval.sroa.0.0.vec.insert.i = insertelement <2 x i32> undef, i32 %add.i, i32 0
- %r.i = getelementptr inbounds %class.Complex_int* %arrayidx2, i64 0, i32 1
- %5 = load i32* %r.i, align 4
+ %r.i = getelementptr inbounds %class.Complex_int, %class.Complex_int* %arrayidx2, i64 0, i32 1
+ %5 = load i32, i32* %r.i, align 4
%add5.i = add i32 %5, %3
%retval.sroa.0.4.vec.insert.i = insertelement <2 x i32> %retval.sroa.0.0.vec.insert.i, i32 %add5.i, i32 1
%ref.tmp.sroa.0.0.cast = bitcast %class.Complex_int* %arrayidx to <2 x i32>*
@@ -78,22 +78,22 @@ entry:
; CHECK: ret
define void @test_long(%class.Complex_long* nocapture %out, i64 %out_start) {
entry:
- %arrayidx = getelementptr inbounds %class.Complex_long* %out, i64 %out_start
+ %arrayidx = getelementptr inbounds %class.Complex_long, %class.Complex_long* %out, i64 %out_start
%0 = bitcast %class.Complex_long* %arrayidx to i128*
- %1 = load i128* %0, align 4
+ %1 = load i128, i128* %0, align 4
%t0.sroa.0.0.extract.trunc = trunc i128 %1 to i64
%2 = bitcast i64 %t0.sroa.0.0.extract.trunc to i64
%t0.sroa.2.0.extract.shift = lshr i128 %1, 64
%t0.sroa.2.0.extract.trunc = trunc i128 %t0.sroa.2.0.extract.shift to i64
%3 = bitcast i64 %t0.sroa.2.0.extract.trunc to i64
%add = add i64 %out_start, 8
- %arrayidx2 = getelementptr inbounds %class.Complex_long* %out, i64 %add
- %i.i = getelementptr inbounds %class.Complex_long* %arrayidx2, i32 0, i32 0
- %4 = load i64* %i.i, align 4
+ %arrayidx2 = getelementptr inbounds %class.Complex_long, %class.Complex_long* %out, i64 %add
+ %i.i = getelementptr inbounds %class.Complex_long, %class.Complex_long* %arrayidx2, i32 0, i32 0
+ %4 = load i64, i64* %i.i, align 4
%add.i = add i64 %4, %2
%retval.sroa.0.0.vec.insert.i = insertelement <2 x i64> undef, i64 %add.i, i32 0
- %r.i = getelementptr inbounds %class.Complex_long* %arrayidx2, i32 0, i32 1
- %5 = load i64* %r.i, align 4
+ %r.i = getelementptr inbounds %class.Complex_long, %class.Complex_long* %arrayidx2, i32 0, i32 1
+ %5 = load i64, i64* %r.i, align 4
%add5.i = add i64 %5, %3
%retval.sroa.0.4.vec.insert.i = insertelement <2 x i64> %retval.sroa.0.0.vec.insert.i, i64 %add5.i, i32 1
%ref.tmp.sroa.0.0.cast = bitcast %class.Complex_long* %arrayidx to <2 x i64>*
diff --git a/test/CodeGen/AArch64/arm64-dup.ll b/test/CodeGen/AArch64/arm64-dup.ll
index 0c56b46c4176..c6b7de366d23 100644
--- a/test/CodeGen/AArch64/arm64-dup.ll
+++ b/test/CodeGen/AArch64/arm64-dup.ll
@@ -165,7 +165,7 @@ define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vduplane8:
;CHECK: dup.8b
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
ret <8 x i8> %tmp2
}
@@ -173,7 +173,7 @@ define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vduplane16:
;CHECK: dup.4h
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
ret <4 x i16> %tmp2
}
@@ -181,7 +181,7 @@ define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vduplane32:
;CHECK: dup.2s
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
ret <2 x i32> %tmp2
}
@@ -189,7 +189,7 @@ define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
;CHECK-LABEL: vduplanefloat:
;CHECK: dup.2s
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
ret <2 x float> %tmp2
}
@@ -197,7 +197,7 @@ define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vduplaneQ8:
;CHECK: dup.16b
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
ret <16 x i8> %tmp2
}
@@ -205,7 +205,7 @@ define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vduplaneQ16:
;CHECK: dup.8h
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
ret <8 x i16> %tmp2
}
@@ -213,7 +213,7 @@ define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vduplaneQ32:
;CHECK: dup.4s
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
ret <4 x i32> %tmp2
}
@@ -221,7 +221,7 @@ define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
;CHECK-LABEL: vduplaneQfloat:
;CHECK: dup.4s
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
ret <4 x float> %tmp2
}
@@ -321,3 +321,40 @@ entry:
%sub = sub <4 x i16> %a, %mul
ret <4 x i16> %sub
}
+
+; Also test the DUP path in the PerfectShuffle generator.
+
+; CHECK-LABEL: test_perfectshuffle_dupext_v4i16:
+; CHECK-NEXT: dup.4h v0, v0[0]
+; CHECK-NEXT: ext.8b v0, v0, v1, #4
+define <4 x i16> @test_perfectshuffle_dupext_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
+ %r = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
+ ret <4 x i16> %r
+}
+
+; CHECK-LABEL: test_perfectshuffle_dupext_v4f16:
+; CHECK-NEXT: dup.4h v0, v0[0]
+; CHECK-NEXT: ext.8b v0, v0, v1, #4
+; CHECK-NEXT: ret
+define <4 x half> @test_perfectshuffle_dupext_v4f16(<4 x half> %a, <4 x half> %b) nounwind {
+ %r = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
+ ret <4 x half> %r
+}
+
+; CHECK-LABEL: test_perfectshuffle_dupext_v4i32:
+; CHECK-NEXT: dup.4s v0, v0[0]
+; CHECK-NEXT: ext.16b v0, v0, v1, #8
+; CHECK-NEXT: ret
+define <4 x i32> @test_perfectshuffle_dupext_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+ %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
+ ret <4 x i32> %r
+}
+
+; CHECK-LABEL: test_perfectshuffle_dupext_v4f32:
+; CHECK-NEXT: dup.4s v0, v0[0]
+; CHECK-NEXT: ext.16b v0, v0, v1, #8
+; CHECK-NEXT: ret
+define <4 x float> @test_perfectshuffle_dupext_v4f32(<4 x float> %a, <4 x float> %b) nounwind {
+ %r = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
+ ret <4 x float> %r
+}
diff --git a/test/CodeGen/AArch64/arm64-early-ifcvt.ll b/test/CodeGen/AArch64/arm64-early-ifcvt.ll
index 44150c29aeb0..8164f46664b6 100644
--- a/test/CodeGen/AArch64/arm64-early-ifcvt.ll
+++ b/test/CodeGen/AArch64/arm64-early-ifcvt.ll
@@ -14,8 +14,8 @@ do.body:
%min.0 = phi i32 [ 0, %entry ], [ %min.1, %do.cond ]
%n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.cond ]
%p.addr.0 = phi i32* [ %p, %entry ], [ %incdec.ptr, %do.cond ]
- %incdec.ptr = getelementptr inbounds i32* %p.addr.0, i64 1
- %0 = load i32* %p.addr.0, align 4
+ %incdec.ptr = getelementptr inbounds i32, i32* %p.addr.0, i64 1
+ %0 = load i32, i32* %p.addr.0, align 4
%cmp = icmp sgt i32 %0, %max.0
br i1 %cmp, label %do.cond, label %if.else
@@ -400,7 +400,7 @@ entry:
br label %for.body
for.body:
- %x0 = load i32* undef, align 4
+ %x0 = load i32, i32* undef, align 4
br i1 undef, label %if.then.i146, label %is_sbox.exit155
if.then.i146:
@@ -412,8 +412,8 @@ if.then.i146:
is_sbox.exit155: ; preds = %if.then.i146, %for.body
%seg_offset.0.i151 = phi i32 [ %add9.i145, %if.then.i146 ], [ undef, %for.body ]
%idxprom15.i152 = sext i32 %seg_offset.0.i151 to i64
- %arrayidx18.i154 = getelementptr inbounds i32* null, i64 %idxprom15.i152
- %x1 = load i32* %arrayidx18.i154, align 4
+ %arrayidx18.i154 = getelementptr inbounds i32, i32* null, i64 %idxprom15.i152
+ %x1 = load i32, i32* %arrayidx18.i154, align 4
br i1 undef, label %for.body51, label %for.body
for.body51: ; preds = %is_sbox.exit155
diff --git a/test/CodeGen/AArch64/arm64-elf-globals.ll b/test/CodeGen/AArch64/arm64-elf-globals.ll
index 4ed44e7c17af..b1d5524aee87 100644
--- a/test/CodeGen/AArch64/arm64-elf-globals.ll
+++ b/test/CodeGen/AArch64/arm64-elf-globals.ll
@@ -9,7 +9,7 @@
@var64 = external global i64, align 8
define i8 @test_i8(i8 %new) {
- %val = load i8* @var8, align 1
+ %val = load i8, i8* @var8, align 1
store i8 %new, i8* @var8
ret i8 %val
; CHECK-LABEL: test_i8:
@@ -31,7 +31,7 @@ define i8 @test_i8(i8 %new) {
}
define i16 @test_i16(i16 %new) {
- %val = load i16* @var16, align 2
+ %val = load i16, i16* @var16, align 2
store i16 %new, i16* @var16
ret i16 %val
; CHECK-LABEL: test_i16:
@@ -44,7 +44,7 @@ define i16 @test_i16(i16 %new) {
}
define i32 @test_i32(i32 %new) {
- %val = load i32* @var32, align 4
+ %val = load i32, i32* @var32, align 4
store i32 %new, i32* @var32
ret i32 %val
; CHECK-LABEL: test_i32:
@@ -57,7 +57,7 @@ define i32 @test_i32(i32 %new) {
}
define i64 @test_i64(i64 %new) {
- %val = load i64* @var64, align 8
+ %val = load i64, i64* @var64, align 8
store i64 %new, i64* @var64
ret i64 %val
; CHECK-LABEL: test_i64:
@@ -83,8 +83,8 @@ define i64* @test_addr() {
@protectedvar = protected global i32 0, align 4
define i32 @test_vis() {
- %lhs = load i32* @hiddenvar, align 4
- %rhs = load i32* @protectedvar, align 4
+ %lhs = load i32, i32* @hiddenvar, align 4
+ %rhs = load i32, i32* @protectedvar, align 4
%ret = add i32 %lhs, %rhs
ret i32 %ret
; CHECK-PIC: adrp {{x[0-9]+}}, hiddenvar
@@ -96,8 +96,8 @@ define i32 @test_vis() {
@var_default = external global [2 x i32]
define i32 @test_default_align() {
- %addr = getelementptr [2 x i32]* @var_default, i32 0, i32 0
- %val = load i32* %addr
+ %addr = getelementptr [2 x i32], [2 x i32]* @var_default, i32 0, i32 0
+ %val = load i32, i32* %addr
ret i32 %val
; CHECK-LABEL: test_default_align:
; CHECK: adrp x[[HIREG:[0-9]+]], var_default
@@ -106,7 +106,7 @@ define i32 @test_default_align() {
define i64 @test_default_unaligned() {
%addr = bitcast [2 x i32]* @var_default to i64*
- %val = load i64* %addr
+ %val = load i64, i64* %addr
ret i64 %val
; CHECK-LABEL: test_default_unaligned:
; CHECK: adrp [[HIREG:x[0-9]+]], var_default
diff --git a/test/CodeGen/AArch64/arm64-ext.ll b/test/CodeGen/AArch64/arm64-ext.ll
index 67860de51b0f..8315ffcfb078 100644
--- a/test/CodeGen/AArch64/arm64-ext.ll
+++ b/test/CodeGen/AArch64/arm64-ext.ll
@@ -3,8 +3,8 @@
define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: test_vextd:
;CHECK: {{ext.8b.*#3}}
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: test_vextRd:
;CHECK: {{ext.8b.*#5}}
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
ret <8 x i8> %tmp3
}
@@ -21,8 +21,8 @@ define <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: test_vextq:
;CHECK: {{ext.16b.*3}}
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
ret <16 x i8> %tmp3
}
@@ -30,8 +30,8 @@ define <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: test_vextRq:
;CHECK: {{ext.16b.*7}}
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
ret <16 x i8> %tmp3
}
@@ -39,8 +39,8 @@ define <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: test_vextd16:
;CHECK: {{ext.8b.*#6}}
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
ret <4 x i16> %tmp3
}
@@ -48,8 +48,8 @@ define <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: test_vextq32:
;CHECK: {{ext.16b.*12}}
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
ret <4 x i32> %tmp3
}
@@ -59,8 +59,8 @@ define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: test_vextd_undef:
;CHECK: {{ext.8b.*}}
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10>
ret <8 x i8> %tmp3
}
@@ -68,8 +68,8 @@ define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <8 x i8> @test_vextd_undef2(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: test_vextd_undef2:
;CHECK: {{ext.8b.*#6}}
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 5>
ret <8 x i8> %tmp3
}
@@ -77,8 +77,8 @@ define <8 x i8> @test_vextd_undef2(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: test_vextRq_undef:
;CHECK: {{ext.16b.*#7}}
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 undef, i32 undef, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6>
ret <16 x i8> %tmp3
}
@@ -86,7 +86,7 @@ define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @test_vextRq_undef2(<8 x i16>* %A) nounwind {
;CHECK-LABEL: test_vextRq_undef2:
;CHECK: {{ext.16b.*#10}}
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%vext = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 4>
ret <8 x i16> %vext;
}
@@ -101,8 +101,8 @@ define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: test_interleaved:
;CHECK: ext.8b
;CHECK: zip1.4h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 3, i32 8, i32 5, i32 9>
ret <4 x i16> %tmp3
}
@@ -111,8 +111,8 @@ define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: test_undef:
;CHECK: zip1.4h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 undef, i32 8, i32 5, i32 9>
ret <4 x i16> %tmp3
}
diff --git a/test/CodeGen/AArch64/arm64-extend.ll b/test/CodeGen/AArch64/arm64-extend.ll
index afcaca2c4920..0ef68f8a5301 100644
--- a/test/CodeGen/AArch64/arm64-extend.ll
+++ b/test/CodeGen/AArch64/arm64-extend.ll
@@ -8,8 +8,8 @@ define i64 @foo(i32 %i) {
; CHECK: ldrsw x0, [x[[REG1]], w0, sxtw #2]
; CHECK: ret
%idxprom = sext i32 %i to i64
- %arrayidx = getelementptr inbounds [0 x i32]* @array, i64 0, i64 %idxprom
- %tmp1 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @array, i64 0, i64 %idxprom
+ %tmp1 = load i32, i32* %arrayidx, align 4
%conv = sext i32 %tmp1 to i64
ret i64 %conv
}
diff --git a/test/CodeGen/AArch64/arm64-extern-weak.ll b/test/CodeGen/AArch64/arm64-extern-weak.ll
index 06bd9270ba47..020c07c739d9 100644
--- a/test/CodeGen/AArch64/arm64-extern-weak.ll
+++ b/test/CodeGen/AArch64/arm64-extern-weak.ll
@@ -30,7 +30,7 @@ define i32()* @foo() {
@arr_var = extern_weak global [10 x i32]
define i32* @bar() {
- %addr = getelementptr [10 x i32]* @arr_var, i32 0, i32 5
+ %addr = getelementptr [10 x i32], [10 x i32]* @arr_var, i32 0, i32 5
; CHECK: adrp x[[ARR_VAR_HI:[0-9]+]], :got:arr_var
; CHECK: ldr [[ARR_VAR:x[0-9]+]], [x[[ARR_VAR_HI]], :got_lo12:arr_var]
; CHECK: add x0, [[ARR_VAR]], #20
diff --git a/test/CodeGen/AArch64/arm64-extload-knownzero.ll b/test/CodeGen/AArch64/arm64-extload-knownzero.ll
index 14e5fd310d7b..642af876423a 100644
--- a/test/CodeGen/AArch64/arm64-extload-knownzero.ll
+++ b/test/CodeGen/AArch64/arm64-extload-knownzero.ll
@@ -9,7 +9,7 @@ entry:
bb1:
; CHECK: %bb1
; CHECK: ldrh [[REG:w[0-9]+]]
- %tmp2 = load i16* %ptr, align 2
+ %tmp2 = load i16, i16* %ptr, align 2
br label %bb2
bb2:
; CHECK: %bb2
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll b/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll
index d81bc7cee114..e4dc948c4603 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
@sortlist = common global [5001 x i32] zeroinitializer, align 16
@sortlist2 = common global [5001 x i64] zeroinitializer, align 16
@@ -13,7 +13,7 @@ entry:
; CHECK: add x[[REG3:[0-9]+]], x[[REG1]], x[[REG2]]
; CHECK: ldr w0, [x[[REG3]]]
; CHECK: ret
- %0 = load i32* getelementptr inbounds ([5001 x i32]* @sortlist, i32 0, i64 5000), align 4
+ %0 = load i32, i32* getelementptr inbounds ([5001 x i32], [5001 x i32]* @sortlist, i32 0, i64 5000), align 4
ret i32 %0
}
@@ -26,7 +26,7 @@ entry:
; CHECK: add x[[REG3:[0-9]+]], x[[REG1]], x[[REG2]]
; CHECK: ldr x0, [x[[REG3]]]
; CHECK: ret
- %0 = load i64* getelementptr inbounds ([5001 x i64]* @sortlist2, i32 0, i64 5000), align 4
+ %0 = load i64, i64* getelementptr inbounds ([5001 x i64], [5001 x i64]* @sortlist2, i32 0, i64 5000), align 4
ret i64 %0
}
@@ -40,8 +40,8 @@ entry:
; CHECK: movz x[[REG:[0-9]+]], #0xb3a, lsl #32
; CHECK: movk x[[REG]], #0x73ce, lsl #16
; CHECK: movk x[[REG]], #0x2ff2
- %0 = load i8** @pd2, align 8
- %arrayidx = getelementptr inbounds i8* %0, i64 12345678901234
- %1 = load i8* %arrayidx, align 1
+ %0 = load i8*, i8** @pd2, align 8
+ %arrayidx = getelementptr inbounds i8, i8* %0, i64 12345678901234
+ %1 = load i8, i8* %arrayidx, align 1
ret i8 %1
}
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll b/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll
index a8417027ce2d..a506607a0a5d 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll
@@ -1,5 +1,5 @@
; This test should cause the TargetMaterializeAlloca to be invoked
-; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
%struct.S1Ty = type { i64 }
%struct.S2Ty = type { %struct.S1Ty, %struct.S1Ty }
@@ -18,7 +18,7 @@ entry:
; CHECK: mov [[REG:x[0-9]+]], sp
; CHECK-NEXT: add x0, [[REG]], #8
%E = alloca %struct.S2Ty, align 4
- %B = getelementptr inbounds %struct.S2Ty* %E, i32 0, i32 1
+ %B = getelementptr inbounds %struct.S2Ty, %struct.S2Ty* %E, i32 0, i32 1
call void @takeS1(%struct.S1Ty* %B)
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-br.ll b/test/CodeGen/AArch64/arm64-fast-isel-br.ll
index f896d8517382..0ef7b143df80 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-br.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-br.ll
@@ -1,9 +1,9 @@
-; RUN: llc -O0 -fast-isel-abort -mtriple=arm64-apple-darwin -mcpu=cyclone -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort=1 -mtriple=arm64-apple-darwin -mcpu=cyclone -verify-machineinstrs < %s | FileCheck %s
define void @branch1() nounwind uwtable ssp {
%x = alloca i32, align 4
store i32 0, i32* %x, align 4
- %1 = load i32* %x, align 4
+ %1 = load i32, i32* %x, align 4
%2 = icmp ne i32 %1, 0
br i1 %2, label %3, label %4
@@ -23,7 +23,7 @@ define void @branch2() nounwind uwtable ssp {
store i32 1, i32* %y, align 4
store i32 1, i32* %x, align 4
store i32 0, i32* %z, align 4
- %2 = load i32* %x, align 4
+ %2 = load i32, i32* %x, align 4
%3 = icmp ne i32 %2, 0
br i1 %3, label %4, label %5
@@ -32,12 +32,12 @@ define void @branch2() nounwind uwtable ssp {
br label %14
; <label>:5 ; preds = %0
- %6 = load i32* %y, align 4
+ %6 = load i32, i32* %y, align 4
%7 = icmp ne i32 %6, 0
br i1 %7, label %8, label %13
; <label>:8 ; preds = %5
- %9 = load i32* %z, align 4
+ %9 = load i32, i32* %z, align 4
%10 = icmp ne i32 %9, 0
br i1 %10, label %11, label %12
@@ -53,7 +53,7 @@ define void @branch2() nounwind uwtable ssp {
br label %14
; <label>:14 ; preds = %4, %11, %12, %13
- %15 = load i32* %1
+ %15 = load i32, i32* %1
ret void
}
@@ -93,7 +93,7 @@ entry:
store i16 %b, i16* %b.addr, align 2
store i32 %c, i32* %c.addr, align 4
store i64 %d, i64* %d.addr, align 8
- %0 = load i16* %b.addr, align 2
+ %0 = load i16, i16* %b.addr, align 2
; CHECK: and w0, w0, #0x1
; CHECK: cmp w0, #0
; CHECK: b.eq LBB4_2
@@ -105,7 +105,7 @@ if.then: ; preds = %entry
br label %if.end
if.end: ; preds = %if.then, %entry
- %1 = load i32* %c.addr, align 4
+ %1 = load i32, i32* %c.addr, align 4
; CHECK: and w[[REG:[0-9]+]], w{{[0-9]+}}, #0x1
; CHECK: cmp w[[REG]], #0
; CHECK: b.eq LBB4_4
@@ -117,7 +117,7 @@ if.then3: ; preds = %if.end
br label %if.end4
if.end4: ; preds = %if.then3, %if.end
- %2 = load i64* %d.addr, align 8
+ %2 = load i64, i64* %d.addr, align 8
; CHECK: cmp w{{[0-9]+}}, #0
; CHECK: b.eq LBB4_6
%conv5 = trunc i64 %2 to i1
@@ -128,7 +128,7 @@ if.then7: ; preds = %if.end4
br label %if.end8
if.end8: ; preds = %if.then7, %if.end4
- %3 = load i8* %a.addr, align 1
+ %3 = load i8, i8* %a.addr, align 1
ret i8 %3
}
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-call.ll b/test/CodeGen/AArch64/arm64-fast-isel-call.ll
index f1e2c40a33c4..d6957f9191e2 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-call.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-call.ll
@@ -1,6 +1,6 @@
-; RUN: llc -O0 -fast-isel-abort -fast-isel-abort-args -code-model=small -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
-; RUN: llc -O0 -fast-isel-abort -fast-isel-abort-args -code-model=large -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s --check-prefix=LARGE
-; RUN: llc -O0 -fast-isel-abort -fast-isel-abort-args -code-model=small -verify-machineinstrs -mtriple=aarch64_be-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -O0 -fast-isel-abort=2 -code-model=small -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort=2 -code-model=large -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s --check-prefix=LARGE
+; RUN: llc -O0 -fast-isel-abort=2 -code-model=small -verify-machineinstrs -mtriple=aarch64_be-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-BE
define void @call0() nounwind {
entry:
@@ -23,7 +23,7 @@ define i32 @call1(i32 %a) nounwind {
entry:
%a.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
- %tmp = load i32* %a.addr, align 4
+ %tmp = load i32, i32* %a.addr, align 4
ret i32 %tmp
}
@@ -35,7 +35,7 @@ entry:
; CHECK-NEXT: bl _call1
%a.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
- %tmp = load i32* %a.addr, align 4
+ %tmp = load i32, i32* %a.addr, align 4
%call = call i32 @call1(i32 %tmp)
ret i32 %call
}
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll b/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
index e5151847a598..1b6886523311 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin -mcpu=cyclone < %s | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -mcpu=cyclone < %s | FileCheck %s
;; Test various conversions.
define zeroext i32 @trunc_(i8 zeroext %a, i16 zeroext %b, i32 %c, i64 %d) nounwind ssp {
@@ -27,16 +27,16 @@ entry:
store i16 %b, i16* %b.addr, align 2
store i32 %c, i32* %c.addr, align 4
store i64 %d, i64* %d.addr, align 8
- %tmp = load i64* %d.addr, align 8
+ %tmp = load i64, i64* %d.addr, align 8
%conv = trunc i64 %tmp to i32
store i32 %conv, i32* %c.addr, align 4
- %tmp1 = load i32* %c.addr, align 4
+ %tmp1 = load i32, i32* %c.addr, align 4
%conv2 = trunc i32 %tmp1 to i16
store i16 %conv2, i16* %b.addr, align 2
- %tmp3 = load i16* %b.addr, align 2
+ %tmp3 = load i16, i16* %b.addr, align 2
%conv4 = trunc i16 %tmp3 to i8
store i8 %conv4, i8* %a.addr, align 1
- %tmp5 = load i8* %a.addr, align 1
+ %tmp5 = load i8, i8* %a.addr, align 1
%conv6 = zext i8 %tmp5 to i32
ret i32 %conv6
}
@@ -66,16 +66,16 @@ entry:
store i16 %b, i16* %b.addr, align 2
store i32 %c, i32* %c.addr, align 4
store i64 %d, i64* %d.addr, align 8
- %tmp = load i8* %a.addr, align 1
+ %tmp = load i8, i8* %a.addr, align 1
%conv = zext i8 %tmp to i16
store i16 %conv, i16* %b.addr, align 2
- %tmp1 = load i16* %b.addr, align 2
+ %tmp1 = load i16, i16* %b.addr, align 2
%conv2 = zext i16 %tmp1 to i32
store i32 %conv2, i32* %c.addr, align 4
- %tmp3 = load i32* %c.addr, align 4
+ %tmp3 = load i32, i32* %c.addr, align 4
%conv4 = zext i32 %tmp3 to i64
store i64 %conv4, i64* %d.addr, align 8
- %tmp5 = load i64* %d.addr, align 8
+ %tmp5 = load i64, i64* %d.addr, align 8
ret i64 %tmp5
}
@@ -121,16 +121,16 @@ entry:
store i16 %b, i16* %b.addr, align 2
store i32 %c, i32* %c.addr, align 4
store i64 %d, i64* %d.addr, align 8
- %tmp = load i8* %a.addr, align 1
+ %tmp = load i8, i8* %a.addr, align 1
%conv = sext i8 %tmp to i16
store i16 %conv, i16* %b.addr, align 2
- %tmp1 = load i16* %b.addr, align 2
+ %tmp1 = load i16, i16* %b.addr, align 2
%conv2 = sext i16 %tmp1 to i32
store i32 %conv2, i32* %c.addr, align 4
- %tmp3 = load i32* %c.addr, align 4
+ %tmp3 = load i32, i32* %c.addr, align 4
%conv4 = sext i32 %tmp3 to i64
store i64 %conv4, i64* %d.addr, align 8
- %tmp5 = load i64* %d.addr, align 8
+ %tmp5 = load i64, i64* %d.addr, align 8
ret i64 %tmp5
}
@@ -409,7 +409,7 @@ define void @stack_trunc() nounwind {
; CHECK: add sp, sp, #16
%a = alloca i8, align 1
%b = alloca i64, align 8
- %c = load i64* %b, align 8
+ %c = load i64, i64* %b, align 8
%d = trunc i64 %c to i8
store i8 %d, i8* %a, align 1
ret void
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll b/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll
index 111b6bd3d49e..c77949f996c3 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
define zeroext i1 @fcmp_float1(float %a) {
; CHECK-LABEL: fcmp_float1
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-gv.ll b/test/CodeGen/AArch64/arm64-fast-isel-gv.ll
index 1a4e8eab2d84..ab29824ccb60 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-gv.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-gv.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
; Test load/store of global value from global offset table.
@seed = common global i64 0, align 8
@@ -26,12 +26,12 @@ entry:
; CHECK: and [[REG8:x[0-9]+]], [[REG7]], #0xffff
; CHECK: str [[REG8]], {{\[}}[[REG1]]{{\]}}
; CHECK: ldr {{x[0-9]+}}, {{\[}}[[REG1]]{{\]}}
- %0 = load i64* @seed, align 8
+ %0 = load i64, i64* @seed, align 8
%mul = mul nsw i64 %0, 1309
%add = add nsw i64 %mul, 13849
%and = and i64 %add, 65535
store i64 %and, i64* @seed, align 8
- %1 = load i64* @seed, align 8
+ %1 = load i64, i64* @seed, align 8
%conv = trunc i64 %1 to i32
ret i32 %conv
}
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll b/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll
index 245c70e8905f..4bc02ebdd3e1 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
define i32 @icmp_eq_imm(i32 %a) nounwind ssp {
entry:
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll b/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll
index a5f45249678b..cb54e4530a58 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
@fn.table = internal global [2 x i8*] [i8* blockaddress(@fn, %ZERO), i8* blockaddress(@fn, %ONE)], align 8
@@ -8,10 +8,10 @@ entry:
%retval = alloca i32, align 4
%target.addr = alloca i32, align 4
store i32 %target, i32* %target.addr, align 4
- %0 = load i32* %target.addr, align 4
+ %0 = load i32, i32* %target.addr, align 4
%idxprom = zext i32 %0 to i64
- %arrayidx = getelementptr inbounds [2 x i8*]* @fn.table, i32 0, i64 %idxprom
- %1 = load i8** %arrayidx, align 8
+ %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @fn.table, i32 0, i64 %idxprom
+ %1 = load i8*, i8** %arrayidx, align 8
br label %indirectgoto
ZERO: ; preds = %indirectgoto
@@ -25,7 +25,7 @@ ONE: ; preds = %indirectgoto
br label %return
return: ; preds = %ONE, %ZERO
- %2 = load i32* %retval
+ %2 = load i32, i32* %retval
ret i32 %2
indirectgoto: ; preds = %entry
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll b/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll
index 9ac3e4431830..bb2889eaf4be 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=arm64-apple-ios < %s | FileCheck %s --check-prefix=ARM64
+; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=arm64-apple-ios < %s | FileCheck %s --check-prefix=ARM64
@message = global [80 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 16
@temp = common global [80 x i8] zeroinitializer, align 16
@@ -11,7 +11,7 @@ define void @t1() {
; ARM64: movz x2, #0x50
; ARM64: uxtb w1, w9
; ARM64: bl _memset
- call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i8 0, i64 80, i32 16, i1 false)
+ call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i8 0, i64 80, i32 16, i1 false)
ret void
}
@@ -25,7 +25,7 @@ define void @t2() {
; ARM64: add x1, x8, _message@PAGEOFF
; ARM64: movz x2, #0x50
; ARM64: bl _memcpy
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 80, i32 16, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 80, i32 16, i1 false)
ret void
}
@@ -39,7 +39,7 @@ define void @t3() {
; ARM64: add x1, x8, _message@PAGEOFF
; ARM64: movz x2, #0x14
; ARM64: bl _memmove
- call void @llvm.memmove.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 20, i32 16, i1 false)
+ call void @llvm.memmove.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 20, i32 16, i1 false)
ret void
}
@@ -58,7 +58,7 @@ define void @t4() {
; ARM64: ldrb w11, [x9, #16]
; ARM64: strb w11, [x8, #16]
; ARM64: ret
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 17, i32 16, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i32 16, i1 false)
ret void
}
@@ -75,7 +75,7 @@ define void @t5() {
; ARM64: ldrb w11, [x9, #16]
; ARM64: strb w11, [x8, #16]
; ARM64: ret
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 17, i32 8, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i32 8, i1 false)
ret void
}
@@ -92,7 +92,7 @@ define void @t6() {
; ARM64: ldrb w10, [x9, #8]
; ARM64: strb w10, [x8, #8]
; ARM64: ret
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 9, i32 4, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 9, i32 4, i1 false)
ret void
}
@@ -111,7 +111,7 @@ define void @t7() {
; ARM64: ldrb w10, [x9, #6]
; ARM64: strb w10, [x8, #6]
; ARM64: ret
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 7, i32 2, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 7, i32 2, i1 false)
ret void
}
@@ -130,7 +130,7 @@ define void @t8() {
; ARM64: ldrb w10, [x9, #3]
; ARM64: strb w10, [x8, #3]
; ARM64: ret
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8]* @message, i32 0, i32 0), i64 4, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 4, i32 1, i1 false)
ret void
}
@@ -142,7 +142,7 @@ define void @test_distant_memcpy(i8* %dst) {
; ARM64: ldrb [[BYTE:w[0-9]+]], [x[[ADDR]]]
; ARM64: strb [[BYTE]], [x0]
%array = alloca i8, i32 8192
- %elem = getelementptr i8* %array, i32 8000
+ %elem = getelementptr i8, i8* %array, i32 8000
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %elem, i64 1, i32 1, i1 false)
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll b/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll
index 1dea5d944be0..b5a08c148930 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
; Materialize using fmov
define float @fmov_float1() {
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-rem.ll b/test/CodeGen/AArch64/arm64-fast-isel-rem.ll
index 26f9afaccee7..05aa96997b57 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-rem.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-rem.ll
@@ -1,5 +1,5 @@
-; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
-; RUN: llc %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin -print-machineinstrs=expand-isel-pseudos -o /dev/null 2> %t
+; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
+; RUN: llc %s -O0 -fast-isel-abort=1 -mtriple=arm64-apple-darwin -print-machineinstrs=expand-isel-pseudos -o /dev/null 2> %t
; RUN: FileCheck %s < %t --check-prefix=CHECK-SSA
; CHECK-SSA-LABEL: Machine code for function t1
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-ret.ll b/test/CodeGen/AArch64/arm64-fast-isel-ret.ll
index f84c75504f64..1f6a60e77cc3 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-ret.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-ret.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
;; Test returns.
define void @t0() nounwind ssp {
@@ -16,7 +16,7 @@ entry:
; CHECK: ret
%a.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
- %tmp = load i32* %a.addr, align 4
+ %tmp = load i32, i32* %a.addr, align 4
ret i32 %tmp
}
@@ -28,7 +28,7 @@ entry:
; CHECK: ret
%a.addr = alloca i64, align 8
store i64 %a, i64* %a.addr, align 8
- %tmp = load i64* %a.addr, align 8
+ %tmp = load i64, i64* %a.addr, align 8
ret i64 %tmp
}
@@ -38,7 +38,7 @@ entry:
; CHECK: sxth w0, w0
%a.addr = alloca i16, align 1
store i16 %a, i16* %a.addr, align 1
- %0 = load i16* %a.addr, align 1
+ %0 = load i16, i16* %a.addr, align 1
ret i16 %0
}
@@ -48,7 +48,7 @@ entry:
; CHECK: sxtb w0, w0
%a.addr = alloca i8, align 1
store i8 %a, i8* %a.addr, align 1
- %0 = load i8* %a.addr, align 1
+ %0 = load i8, i8* %a.addr, align 1
ret i8 %0
}
@@ -58,6 +58,6 @@ entry:
; CHECK: and w0, w0, #0x1
%a.addr = alloca i1, align 1
store i1 %a, i1* %a.addr, align 1
- %0 = load i1* %a.addr, align 1
+ %0 = load i1, i1* %a.addr, align 1
ret i1 %0
}
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-store.ll b/test/CodeGen/AArch64/arm64-fast-isel-store.ll
index 9494d5553010..47d4cdb3321b 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-store.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-store.ll
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=aarch64-unknown-unknown -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-unknown-unknown -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-unknown-unknown -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
define void @store_i8(i8* %a) {
; CHECK-LABEL: store_i8
diff --git a/test/CodeGen/AArch64/arm64-fast-isel.ll b/test/CodeGen/AArch64/arm64-fast-isel.ll
index 434994607c62..9f83a9c359a2 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
define void @t0(i32 %a) nounwind {
entry:
@@ -9,7 +9,7 @@ entry:
; CHECK: ret
%a.addr = alloca i32, align 4
store i32 %a, i32* %a.addr
- %tmp = load i32* %a.addr
+ %tmp = load i32, i32* %a.addr
store i32 %tmp, i32* %a.addr
ret void
}
@@ -22,7 +22,7 @@ define void @t1(i64 %a) nounwind {
; CHECK: ret
%a.addr = alloca i64, align 4
store i64 %a, i64* %a.addr
- %tmp = load i64* %a.addr
+ %tmp = load i64, i64* %a.addr
store i64 %tmp, i64* %a.addr
ret void
}
@@ -39,7 +39,7 @@ entry:
; CHECK: ret
%a.addr = alloca i1, align 1
store i1 %a, i1* %a.addr, align 1
- %0 = load i1* %a.addr, align 1
+ %0 = load i1, i1* %a.addr, align 1
ret i1 %0
}
@@ -48,8 +48,8 @@ entry:
; CHECK-LABEL: t2:
; CHECK: ldur w0, [x0, #-4]
; CHECK: ret
- %0 = getelementptr i32 *%ptr, i32 -1
- %1 = load i32* %0, align 4
+ %0 = getelementptr i32, i32 *%ptr, i32 -1
+ %1 = load i32, i32* %0, align 4
ret i32 %1
}
@@ -58,8 +58,8 @@ entry:
; CHECK-LABEL: t3:
; CHECK: ldur w0, [x0, #-256]
; CHECK: ret
- %0 = getelementptr i32 *%ptr, i32 -64
- %1 = load i32* %0, align 4
+ %0 = getelementptr i32, i32 *%ptr, i32 -64
+ %1 = load i32, i32* %0, align 4
ret i32 %1
}
@@ -68,7 +68,7 @@ entry:
; CHECK-LABEL: t4:
; CHECK: stur wzr, [x0, #-4]
; CHECK: ret
- %0 = getelementptr i32 *%ptr, i32 -1
+ %0 = getelementptr i32, i32 *%ptr, i32 -1
store i32 0, i32* %0, align 4
ret void
}
@@ -78,7 +78,7 @@ entry:
; CHECK-LABEL: t5:
; CHECK: stur wzr, [x0, #-256]
; CHECK: ret
- %0 = getelementptr i32 *%ptr, i32 -64
+ %0 = getelementptr i32, i32 *%ptr, i32 -64
store i32 0, i32* %0, align 4
ret void
}
@@ -91,3 +91,45 @@ define void @t6() nounwind {
}
declare void @llvm.trap() nounwind
+
+define void @ands(i32* %addr) {
+; CHECK-LABEL: ands:
+; CHECK: tst [[COND:w[0-9]+]], #0x1
+; CHECK-NEXT: csel [[COND]],
+entry:
+ %cond91 = select i1 undef, i32 1, i32 2
+ store i32 %cond91, i32* %addr, align 4
+ ret void
+}
+
+define i64 @mul_umul(i64 %arg) {
+; CHECK-LABEL: mul_umul:
+; CHECK: mul x{{[0-9]+}}, [[ARG1:x[0-9]+]], [[ARG2:x[0-9]+]]
+; CHECK-NEXT: umulh x{{[0-9]+}}, [[ARG1]], [[ARG2]]
+entry:
+ %sub.ptr.div = sdiv exact i64 %arg, 8
+ %tmp = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %sub.ptr.div, i64 8)
+ %tmp1 = extractvalue { i64, i1 } %tmp, 0
+ ret i64 %tmp1
+}
+
+declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64)
+
+define void @logicalReg() {
+; Make sure we generate a logical reg = reg, reg instruction without any
+; machine verifier errors.
+; CHECK-LABEL: logicalReg:
+; CHECK: orr w{{[0-9]+}}, w{{[0-9]+}}, w{{[0-9]+}}
+; CHECK: ret
+entry:
+ br i1 undef, label %cond.end, label %cond.false
+
+cond.false:
+ %cond = select i1 undef, i1 true, i1 false
+ br label %cond.end
+
+cond.end:
+ %cond13 = phi i1 [ %cond, %cond.false ], [ true, %entry ]
+ ret void
+}
+
diff --git a/test/CodeGen/AArch64/arm64-fastisel-gep-promote-before-add.ll b/test/CodeGen/AArch64/arm64-fastisel-gep-promote-before-add.ll
index af9fe0561737..8268bcff1f80 100644
--- a/test/CodeGen/AArch64/arm64-fastisel-gep-promote-before-add.ll
+++ b/test/CodeGen/AArch64/arm64-fastisel-gep-promote-before-add.ll
@@ -6,13 +6,13 @@ define zeroext i8 @gep_promotion(i8* %ptr) nounwind uwtable ssp {
entry:
%ptr.addr = alloca i8*, align 8
%add = add i8 64, 64 ; 0x40 + 0x40
- %0 = load i8** %ptr.addr, align 8
+ %0 = load i8*, i8** %ptr.addr, align 8
; CHECK-LABEL: _gep_promotion:
; CHECK: ldrb {{[a-z][0-9]+}}, {{\[[a-z][0-9]+\]}}
- %arrayidx = getelementptr inbounds i8* %0, i8 %add
+ %arrayidx = getelementptr inbounds i8, i8* %0, i8 %add
- %1 = load i8* %arrayidx, align 1
+ %1 = load i8, i8* %arrayidx, align 1
ret i8 %1
}
diff --git a/test/CodeGen/AArch64/arm64-fcopysign.ll b/test/CodeGen/AArch64/arm64-fcopysign.ll
index 66241df9444c..feffd41f002a 100644
--- a/test/CodeGen/AArch64/arm64-fcopysign.ll
+++ b/test/CodeGen/AArch64/arm64-fcopysign.ll
@@ -39,7 +39,7 @@ entry:
; CHECK: fcvt s0, d0
; CHECK: movi.4s v[[CONST:[0-9]+]], #0x80, lsl #24
; CHECK: bit.16b v{{[0-9]+}}, v0, v[[CONST]]
- %0 = tail call double (...)* @bar() nounwind
+ %0 = tail call double (...) @bar() nounwind
%1 = fptrunc double %0 to float
%2 = tail call float @copysignf(float 5.000000e-01, float %1) nounwind readnone
%3 = fadd float %1, %2
diff --git a/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll b/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
index e51c38b2b95e..e41e19e50eea 100644
--- a/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
+++ b/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
@@ -6,7 +6,7 @@
; rdar://11855286
define double @foo0(<2 x i64> %a) nounwind {
; CHECK: scvtf.2d [[REG:v[0-9]+]], v0, #9
-; CHECK-NEXT: ins.d v0[0], [[REG]][1]
+; CHECK-NEXT: mov d0, [[REG]][1]
%vecext = extractelement <2 x i64> %a, i32 1
%fcvt_n = tail call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %vecext, i32 9)
ret double %fcvt_n
diff --git a/test/CodeGen/AArch64/arm64-fmax.ll b/test/CodeGen/AArch64/arm64-fmax.ll
index 94b745437bd3..ea281528b84c 100644
--- a/test/CodeGen/AArch64/arm64-fmax.ll
+++ b/test/CodeGen/AArch64/arm64-fmax.ll
@@ -1,29 +1,49 @@
; RUN: llc -march=arm64 -enable-no-nans-fp-math < %s | FileCheck %s
+; RUN: llc -march=arm64 < %s | FileCheck %s --check-prefix=CHECK-SAFE
-define double @test_direct(float %in) #1 {
+define double @test_direct(float %in) {
; CHECK-LABEL: test_direct:
+; CHECK-SAFE-LABEL: test_direct:
%cmp = fcmp olt float %in, 0.000000e+00
%longer = fpext float %in to double
%val = select i1 %cmp, double 0.000000e+00, double %longer
ret double %val
; CHECK: fmax
+; CHECK-SAFE: fmax
}
-define double @test_cross(float %in) #1 {
+define double @test_cross(float %in) {
; CHECK-LABEL: test_cross:
+; CHECK-SAFE-LABEL: test_cross:
+ %cmp = fcmp ult float %in, 0.000000e+00
+ %longer = fpext float %in to double
+ %val = select i1 %cmp, double %longer, double 0.000000e+00
+ ret double %val
+
+; CHECK: fmin
+; CHECK-SAFE: fmin
+}
+
+; Same as previous, but with ordered comparison;
+; can't be converted in safe-math mode.
+define double @test_cross_fail_nan(float %in) {
+; CHECK-LABEL: test_cross_fail_nan:
+; CHECK-SAFE-LABEL: test_cross_fail_nan:
%cmp = fcmp olt float %in, 0.000000e+00
%longer = fpext float %in to double
%val = select i1 %cmp, double %longer, double 0.000000e+00
ret double %val
; CHECK: fmin
+; CHECK-SAFE: fcsel d0, d1, d0, mi
}
; This isn't a min or a max, but passes the first condition for swapping the
; results. Make sure they're put back before we resort to the normal fcsel.
define float @test_cross_fail(float %lhs, float %rhs) {
; CHECK-LABEL: test_cross_fail:
+; CHECK-SAFE-LABEL: test_cross_fail:
%tst = fcmp une float %lhs, %rhs
%res = select i1 %tst, float %rhs, float %lhs
ret float %res
@@ -31,4 +51,12 @@ define float @test_cross_fail(float %lhs, float %rhs) {
; The register allocator would have to decide to be deliberately obtuse before
; other register were used.
; CHECK: fcsel s0, s1, s0, ne
-} \ No newline at end of file
+; CHECK-SAFE: fcsel s0, s1, s0, ne
+}
+
+; Make sure the transformation isn't triggered for integers
+define i64 @test_integer(i64 %in) {
+ %cmp = icmp slt i64 %in, 0
+ %val = select i1 %cmp, i64 0, i64 %in
+ ret i64 %val
+}
diff --git a/test/CodeGen/AArch64/arm64-fmuladd.ll b/test/CodeGen/AArch64/arm64-fmuladd.ll
index 6c5eecabd755..cfc8b5fe65ef 100644
--- a/test/CodeGen/AArch64/arm64-fmuladd.ll
+++ b/test/CodeGen/AArch64/arm64-fmuladd.ll
@@ -4,9 +4,9 @@ define float @test_f32(float* %A, float* %B, float* %C) nounwind {
;CHECK-LABEL: test_f32:
;CHECK: fmadd
;CHECK-NOT: fmadd
- %tmp1 = load float* %A
- %tmp2 = load float* %B
- %tmp3 = load float* %C
+ %tmp1 = load float, float* %A
+ %tmp2 = load float, float* %B
+ %tmp3 = load float, float* %C
%tmp4 = call float @llvm.fmuladd.f32(float %tmp1, float %tmp2, float %tmp3)
ret float %tmp4
}
@@ -15,9 +15,9 @@ define <2 x float> @test_v2f32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C
;CHECK-LABEL: test_v2f32:
;CHECK: fmla.2s
;CHECK-NOT: fmla.2s
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
- %tmp3 = load <2 x float>* %C
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
+ %tmp3 = load <2 x float>, <2 x float>* %C
%tmp4 = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2, <2 x float> %tmp3)
ret <2 x float> %tmp4
}
@@ -26,9 +26,9 @@ define <4 x float> @test_v4f32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C
;CHECK-LABEL: test_v4f32:
;CHECK: fmla.4s
;CHECK-NOT: fmla.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
- %tmp3 = load <4 x float>* %C
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
+ %tmp3 = load <4 x float>, <4 x float>* %C
%tmp4 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %tmp1, <4 x float> %tmp2, <4 x float> %tmp3)
ret <4 x float> %tmp4
}
@@ -38,9 +38,9 @@ define <8 x float> @test_v8f32(<8 x float>* %A, <8 x float>* %B, <8 x float>* %C
;CHECK: fmla.4s
;CHECK: fmla.4s
;CHECK-NOT: fmla.4s
- %tmp1 = load <8 x float>* %A
- %tmp2 = load <8 x float>* %B
- %tmp3 = load <8 x float>* %C
+ %tmp1 = load <8 x float>, <8 x float>* %A
+ %tmp2 = load <8 x float>, <8 x float>* %B
+ %tmp3 = load <8 x float>, <8 x float>* %C
%tmp4 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %tmp1, <8 x float> %tmp2, <8 x float> %tmp3)
ret <8 x float> %tmp4
}
@@ -49,9 +49,9 @@ define double @test_f64(double* %A, double* %B, double* %C) nounwind {
;CHECK-LABEL: test_f64:
;CHECK: fmadd
;CHECK-NOT: fmadd
- %tmp1 = load double* %A
- %tmp2 = load double* %B
- %tmp3 = load double* %C
+ %tmp1 = load double, double* %A
+ %tmp2 = load double, double* %B
+ %tmp3 = load double, double* %C
%tmp4 = call double @llvm.fmuladd.f64(double %tmp1, double %tmp2, double %tmp3)
ret double %tmp4
}
@@ -60,9 +60,9 @@ define <2 x double> @test_v2f64(<2 x double>* %A, <2 x double>* %B, <2 x double>
;CHECK-LABEL: test_v2f64:
;CHECK: fmla.2d
;CHECK-NOT: fmla.2d
- %tmp1 = load <2 x double>* %A
- %tmp2 = load <2 x double>* %B
- %tmp3 = load <2 x double>* %C
+ %tmp1 = load <2 x double>, <2 x double>* %A
+ %tmp2 = load <2 x double>, <2 x double>* %B
+ %tmp3 = load <2 x double>, <2 x double>* %C
%tmp4 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %tmp1, <2 x double> %tmp2, <2 x double> %tmp3)
ret <2 x double> %tmp4
}
@@ -72,9 +72,9 @@ define <4 x double> @test_v4f64(<4 x double>* %A, <4 x double>* %B, <4 x double>
;CHECK: fmla.2d
;CHECK: fmla.2d
;CHECK-NOT: fmla.2d
- %tmp1 = load <4 x double>* %A
- %tmp2 = load <4 x double>* %B
- %tmp3 = load <4 x double>* %C
+ %tmp1 = load <4 x double>, <4 x double>* %A
+ %tmp2 = load <4 x double>, <4 x double>* %B
+ %tmp3 = load <4 x double>, <4 x double>* %C
%tmp4 = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> %tmp1, <4 x double> %tmp2, <4 x double> %tmp3)
ret <4 x double> %tmp4
}
diff --git a/test/CodeGen/AArch64/arm64-fold-address.ll b/test/CodeGen/AArch64/arm64-fold-address.ll
index 1f0b918ecd9c..6d2ea17d3424 100644
--- a/test/CodeGen/AArch64/arm64-fold-address.ll
+++ b/test/CodeGen/AArch64/arm64-fold-address.ll
@@ -14,23 +14,23 @@ entry:
; CHECK: ldp d0, d1, [x[[REG]]]
; CHECK: ldp d2, d3, [x[[REG]], #16]
; CHECK: ret
- %ivar = load i64* @"OBJC_IVAR_$_UIScreen._bounds", align 8, !invariant.load !4
+ %ivar = load i64, i64* @"OBJC_IVAR_$_UIScreen._bounds", align 8, !invariant.load !4
%0 = bitcast %0* %self to i8*
- %add.ptr = getelementptr inbounds i8* %0, i64 %ivar
+ %add.ptr = getelementptr inbounds i8, i8* %0, i64 %ivar
%add.ptr10.0 = bitcast i8* %add.ptr to double*
- %tmp11 = load double* %add.ptr10.0, align 8
+ %tmp11 = load double, double* %add.ptr10.0, align 8
%add.ptr.sum = add i64 %ivar, 8
- %add.ptr10.1 = getelementptr inbounds i8* %0, i64 %add.ptr.sum
+ %add.ptr10.1 = getelementptr inbounds i8, i8* %0, i64 %add.ptr.sum
%1 = bitcast i8* %add.ptr10.1 to double*
- %tmp12 = load double* %1, align 8
+ %tmp12 = load double, double* %1, align 8
%add.ptr.sum17 = add i64 %ivar, 16
- %add.ptr4.1 = getelementptr inbounds i8* %0, i64 %add.ptr.sum17
+ %add.ptr4.1 = getelementptr inbounds i8, i8* %0, i64 %add.ptr.sum17
%add.ptr4.1.0 = bitcast i8* %add.ptr4.1 to double*
- %tmp = load double* %add.ptr4.1.0, align 8
+ %tmp = load double, double* %add.ptr4.1.0, align 8
%add.ptr4.1.sum = add i64 %ivar, 24
- %add.ptr4.1.1 = getelementptr inbounds i8* %0, i64 %add.ptr4.1.sum
+ %add.ptr4.1.1 = getelementptr inbounds i8, i8* %0, i64 %add.ptr4.1.sum
%2 = bitcast i8* %add.ptr4.1.1 to double*
- %tmp5 = load double* %2, align 8
+ %tmp5 = load double, double* %2, align 8
%insert14 = insertvalue %struct.CGPoint undef, double %tmp11, 0
%insert16 = insertvalue %struct.CGPoint %insert14, double %tmp12, 1
%insert = insertvalue %struct.CGRect undef, %struct.CGPoint %insert16, 0
@@ -46,20 +46,20 @@ entry:
; CHECK: ldr d0, [x0, x{{[0-9]+}}]
; CHECK-NOT: add x0, x0, x1
; CHECK: ret
- %ivar = load i64* @"OBJC_IVAR_$_UIScreen._bounds", align 8, !invariant.load !4
+ %ivar = load i64, i64* @"OBJC_IVAR_$_UIScreen._bounds", align 8, !invariant.load !4
%0 = bitcast %0* %self to i8*
- %add.ptr = getelementptr inbounds i8* %0, i64 %ivar
+ %add.ptr = getelementptr inbounds i8, i8* %0, i64 %ivar
%add.ptr10.0 = bitcast i8* %add.ptr to double*
- %tmp11 = load double* %add.ptr10.0, align 8
- %add.ptr10.1 = getelementptr inbounds i8* %0, i64 %ivar
+ %tmp11 = load double, double* %add.ptr10.0, align 8
+ %add.ptr10.1 = getelementptr inbounds i8, i8* %0, i64 %ivar
%1 = bitcast i8* %add.ptr10.1 to double*
- %tmp12 = load double* %1, align 8
- %add.ptr4.1 = getelementptr inbounds i8* %0, i64 %ivar
+ %tmp12 = load double, double* %1, align 8
+ %add.ptr4.1 = getelementptr inbounds i8, i8* %0, i64 %ivar
%add.ptr4.1.0 = bitcast i8* %add.ptr4.1 to double*
- %tmp = load double* %add.ptr4.1.0, align 8
- %add.ptr4.1.1 = getelementptr inbounds i8* %0, i64 %ivar
+ %tmp = load double, double* %add.ptr4.1.0, align 8
+ %add.ptr4.1.1 = getelementptr inbounds i8, i8* %0, i64 %ivar
%2 = bitcast i8* %add.ptr4.1.1 to double*
- %tmp5 = load double* %2, align 8
+ %tmp5 = load double, double* %2, align 8
%insert14 = insertvalue %struct.CGPoint undef, double %tmp11, 0
%insert16 = insertvalue %struct.CGPoint %insert14, double %tmp12, 1
%insert = insertvalue %struct.CGRect undef, %struct.CGPoint %insert16, 0
diff --git a/test/CodeGen/AArch64/arm64-fold-lsl.ll b/test/CodeGen/AArch64/arm64-fold-lsl.ll
index ec65e467e37d..e1acd6fdea74 100644
--- a/test/CodeGen/AArch64/arm64-fold-lsl.ll
+++ b/test/CodeGen/AArch64/arm64-fold-lsl.ll
@@ -13,8 +13,8 @@ define i16 @load_halfword(%struct.a* %ctx, i32 %xor72) nounwind {
%shr81 = lshr i32 %xor72, 9
%conv82 = zext i32 %shr81 to i64
%idxprom83 = and i64 %conv82, 255
- %arrayidx86 = getelementptr inbounds %struct.a* %ctx, i64 0, i64 %idxprom83
- %result = load i16* %arrayidx86, align 2
+ %arrayidx86 = getelementptr inbounds %struct.a, %struct.a* %ctx, i64 0, i64 %idxprom83
+ %result = load i16, i16* %arrayidx86, align 2
ret i16 %result
}
@@ -25,8 +25,8 @@ define i32 @load_word(%struct.b* %ctx, i32 %xor72) nounwind {
%shr81 = lshr i32 %xor72, 9
%conv82 = zext i32 %shr81 to i64
%idxprom83 = and i64 %conv82, 255
- %arrayidx86 = getelementptr inbounds %struct.b* %ctx, i64 0, i64 %idxprom83
- %result = load i32* %arrayidx86, align 4
+ %arrayidx86 = getelementptr inbounds %struct.b, %struct.b* %ctx, i64 0, i64 %idxprom83
+ %result = load i32, i32* %arrayidx86, align 4
ret i32 %result
}
@@ -37,8 +37,8 @@ define i64 @load_doubleword(%struct.c* %ctx, i32 %xor72) nounwind {
%shr81 = lshr i32 %xor72, 9
%conv82 = zext i32 %shr81 to i64
%idxprom83 = and i64 %conv82, 255
- %arrayidx86 = getelementptr inbounds %struct.c* %ctx, i64 0, i64 %idxprom83
- %result = load i64* %arrayidx86, align 8
+ %arrayidx86 = getelementptr inbounds %struct.c, %struct.c* %ctx, i64 0, i64 %idxprom83
+ %result = load i64, i64* %arrayidx86, align 8
ret i64 %result
}
@@ -49,7 +49,7 @@ define void @store_halfword(%struct.a* %ctx, i32 %xor72, i16 %val) nounwind {
%shr81 = lshr i32 %xor72, 9
%conv82 = zext i32 %shr81 to i64
%idxprom83 = and i64 %conv82, 255
- %arrayidx86 = getelementptr inbounds %struct.a* %ctx, i64 0, i64 %idxprom83
+ %arrayidx86 = getelementptr inbounds %struct.a, %struct.a* %ctx, i64 0, i64 %idxprom83
store i16 %val, i16* %arrayidx86, align 8
ret void
}
@@ -61,7 +61,7 @@ define void @store_word(%struct.b* %ctx, i32 %xor72, i32 %val) nounwind {
%shr81 = lshr i32 %xor72, 9
%conv82 = zext i32 %shr81 to i64
%idxprom83 = and i64 %conv82, 255
- %arrayidx86 = getelementptr inbounds %struct.b* %ctx, i64 0, i64 %idxprom83
+ %arrayidx86 = getelementptr inbounds %struct.b, %struct.b* %ctx, i64 0, i64 %idxprom83
store i32 %val, i32* %arrayidx86, align 8
ret void
}
@@ -73,7 +73,7 @@ define void @store_doubleword(%struct.c* %ctx, i32 %xor72, i64 %val) nounwind {
%shr81 = lshr i32 %xor72, 9
%conv82 = zext i32 %shr81 to i64
%idxprom83 = and i64 %conv82, 255
- %arrayidx86 = getelementptr inbounds %struct.c* %ctx, i64 0, i64 %idxprom83
+ %arrayidx86 = getelementptr inbounds %struct.c, %struct.c* %ctx, i64 0, i64 %idxprom83
store i64 %val, i64* %arrayidx86, align 8
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-fp128-folding.ll b/test/CodeGen/AArch64/arm64-fp128-folding.ll
index 6a7d203f5b17..4024dc984f63 100644
--- a/test/CodeGen/AArch64/arm64-fp128-folding.ll
+++ b/test/CodeGen/AArch64/arm64-fp128-folding.ll
@@ -8,7 +8,7 @@ define fp128 @test_folding() {
; CHECK-LABEL: test_folding:
%l = alloca i32
store i32 42, i32* %l
- %val = load i32* %l
+ %val = load i32, i32* %l
%fpval = sitofp i32 %val to fp128
; If the value is loaded from a constant pool into an fp128, it's been folded
; successfully.
diff --git a/test/CodeGen/AArch64/arm64-fp128.ll b/test/CodeGen/AArch64/arm64-fp128.ll
index b1d50102aa28..aaef39fcf512 100644
--- a/test/CodeGen/AArch64/arm64-fp128.ll
+++ b/test/CodeGen/AArch64/arm64-fp128.ll
@@ -6,8 +6,8 @@
define fp128 @test_add() {
; CHECK-LABEL: test_add:
- %lhs = load fp128* @lhs, align 16
- %rhs = load fp128* @rhs, align 16
+ %lhs = load fp128, fp128* @lhs, align 16
+ %rhs = load fp128, fp128* @rhs, align 16
; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
@@ -19,8 +19,8 @@ define fp128 @test_add() {
define fp128 @test_sub() {
; CHECK-LABEL: test_sub:
- %lhs = load fp128* @lhs, align 16
- %rhs = load fp128* @rhs, align 16
+ %lhs = load fp128, fp128* @lhs, align 16
+ %rhs = load fp128, fp128* @rhs, align 16
; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
@@ -32,8 +32,8 @@ define fp128 @test_sub() {
define fp128 @test_mul() {
; CHECK-LABEL: test_mul:
- %lhs = load fp128* @lhs, align 16
- %rhs = load fp128* @rhs, align 16
+ %lhs = load fp128, fp128* @lhs, align 16
+ %rhs = load fp128, fp128* @rhs, align 16
; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
@@ -45,8 +45,8 @@ define fp128 @test_mul() {
define fp128 @test_div() {
; CHECK-LABEL: test_div:
- %lhs = load fp128* @lhs, align 16
- %rhs = load fp128* @rhs, align 16
+ %lhs = load fp128, fp128* @lhs, align 16
+ %rhs = load fp128, fp128* @rhs, align 16
; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
@@ -60,7 +60,7 @@ define fp128 @test_div() {
define void @test_fptosi() {
; CHECK-LABEL: test_fptosi:
- %val = load fp128* @lhs, align 16
+ %val = load fp128, fp128* @lhs, align 16
%val32 = fptosi fp128 %val to i32
store i32 %val32, i32* @var32
@@ -75,7 +75,7 @@ define void @test_fptosi() {
define void @test_fptoui() {
; CHECK-LABEL: test_fptoui:
- %val = load fp128* @lhs, align 16
+ %val = load fp128, fp128* @lhs, align 16
%val32 = fptoui fp128 %val to i32
store i32 %val32, i32* @var32
@@ -91,12 +91,12 @@ define void @test_fptoui() {
define void @test_sitofp() {
; CHECK-LABEL: test_sitofp:
- %src32 = load i32* @var32
+ %src32 = load i32, i32* @var32
%val32 = sitofp i32 %src32 to fp128
store volatile fp128 %val32, fp128* @lhs
; CHECK: bl __floatsitf
- %src64 = load i64* @var64
+ %src64 = load i64, i64* @var64
%val64 = sitofp i64 %src64 to fp128
store volatile fp128 %val64, fp128* @lhs
; CHECK: bl __floatditf
@@ -107,12 +107,12 @@ define void @test_sitofp() {
define void @test_uitofp() {
; CHECK-LABEL: test_uitofp:
- %src32 = load i32* @var32
+ %src32 = load i32, i32* @var32
%val32 = uitofp i32 %src32 to fp128
store volatile fp128 %val32, fp128* @lhs
; CHECK: bl __floatunsitf
- %src64 = load i64* @var64
+ %src64 = load i64, i64* @var64
%val64 = uitofp i64 %src64 to fp128
store volatile fp128 %val64, fp128* @lhs
; CHECK: bl __floatunditf
@@ -123,8 +123,8 @@ define void @test_uitofp() {
define i1 @test_setcc1() {
; CHECK-LABEL: test_setcc1:
- %lhs = load fp128* @lhs, align 16
- %rhs = load fp128* @rhs, align 16
+ %lhs = load fp128, fp128* @lhs, align 16
+ %rhs = load fp128, fp128* @rhs, align 16
; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
@@ -142,8 +142,8 @@ define i1 @test_setcc1() {
define i1 @test_setcc2() {
; CHECK-LABEL: test_setcc2:
- %lhs = load fp128* @lhs, align 16
- %rhs = load fp128* @rhs, align 16
+ %lhs = load fp128, fp128* @lhs, align 16
+ %rhs = load fp128, fp128* @rhs, align 16
; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
@@ -164,8 +164,8 @@ define i1 @test_setcc2() {
define i32 @test_br_cc() {
; CHECK-LABEL: test_br_cc:
- %lhs = load fp128* @lhs, align 16
- %rhs = load fp128* @rhs, align 16
+ %lhs = load fp128, fp128* @lhs, align 16
+ %rhs = load fp128, fp128* @rhs, align 16
; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
@@ -218,7 +218,7 @@ define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) {
define void @test_round() {
; CHECK-LABEL: test_round:
- %val = load fp128* @lhs, align 16
+ %val = load fp128, fp128* @lhs, align 16
%float = fptrunc fp128 %val to float
store float %float, float* @varfloat, align 4
@@ -236,15 +236,15 @@ define void @test_round() {
define void @test_extend() {
; CHECK-LABEL: test_extend:
- %val = load fp128* @lhs, align 16
+ %val = load fp128, fp128* @lhs, align 16
- %float = load float* @varfloat
+ %float = load float, float* @varfloat
%fromfloat = fpext float %float to fp128
store volatile fp128 %fromfloat, fp128* @lhs, align 16
; CHECK: bl __extendsftf2
; CHECK: str q0, [{{x[0-9]+}}, :lo12:lhs]
- %double = load double* @vardouble
+ %double = load double, double* @vardouble
%fromdouble = fpext double %double to fp128
store volatile fp128 %fromdouble, fp128* @lhs, align 16
; CHECK: bl __extenddftf2
diff --git a/test/CodeGen/AArch64/arm64-hello.ll b/test/CodeGen/AArch64/arm64-hello.ll
index a6346fb467fb..f1c4e9bbaed9 100644
--- a/test/CodeGen/AArch64/arm64-hello.ll
+++ b/test/CodeGen/AArch64/arm64-hello.ll
@@ -31,7 +31,7 @@ define i32 @main() nounwind ssp {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
- %call = call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0))
+ %call = call i32 @puts(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0))
ret i32 %call
}
diff --git a/test/CodeGen/AArch64/arm64-i16-subreg-extract.ll b/test/CodeGen/AArch64/arm64-i16-subreg-extract.ll
index ba759e32aae5..8d74ce7f5182 100644
--- a/test/CodeGen/AArch64/arm64-i16-subreg-extract.ll
+++ b/test/CodeGen/AArch64/arm64-i16-subreg-extract.ll
@@ -3,7 +3,7 @@
define i32 @foo(<4 x i16>* %__a) nounwind {
; CHECK-LABEL: foo:
; CHECK: umov.h w{{[0-9]+}}, v{{[0-9]+}}[0]
- %tmp18 = load <4 x i16>* %__a, align 8
+ %tmp18 = load <4 x i16>, <4 x i16>* %__a, align 8
%vget_lane = extractelement <4 x i16> %tmp18, i32 0
%conv = zext i16 %vget_lane to i32
%mul = mul nsw i32 3, %conv
diff --git a/test/CodeGen/AArch64/arm64-indexed-memory.ll b/test/CodeGen/AArch64/arm64-indexed-memory.ll
index a8620f428da4..b52cddf600ac 100644
--- a/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -4,8 +4,8 @@ define void @store64(i64** nocapture %out, i64 %index, i64 %spacing) nounwind no
; CHECK-LABEL: store64:
; CHECK: str x{{[0-9+]}}, [x{{[0-9+]}}], #8
; CHECK: ret
- %tmp = load i64** %out, align 8
- %incdec.ptr = getelementptr inbounds i64* %tmp, i64 1
+ %tmp = load i64*, i64** %out, align 8
+ %incdec.ptr = getelementptr inbounds i64, i64* %tmp, i64 1
store i64 %spacing, i64* %tmp, align 4
store i64* %incdec.ptr, i64** %out, align 8
ret void
@@ -15,8 +15,8 @@ define void @store32(i32** nocapture %out, i32 %index, i32 %spacing) nounwind no
; CHECK-LABEL: store32:
; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}], #4
; CHECK: ret
- %tmp = load i32** %out, align 8
- %incdec.ptr = getelementptr inbounds i32* %tmp, i64 1
+ %tmp = load i32*, i32** %out, align 8
+ %incdec.ptr = getelementptr inbounds i32, i32* %tmp, i64 1
store i32 %spacing, i32* %tmp, align 4
store i32* %incdec.ptr, i32** %out, align 8
ret void
@@ -26,8 +26,8 @@ define void @store16(i16** nocapture %out, i16 %index, i16 %spacing) nounwind no
; CHECK-LABEL: store16:
; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}], #2
; CHECK: ret
- %tmp = load i16** %out, align 8
- %incdec.ptr = getelementptr inbounds i16* %tmp, i64 1
+ %tmp = load i16*, i16** %out, align 8
+ %incdec.ptr = getelementptr inbounds i16, i16* %tmp, i64 1
store i16 %spacing, i16* %tmp, align 4
store i16* %incdec.ptr, i16** %out, align 8
ret void
@@ -37,8 +37,8 @@ define void @store8(i8** nocapture %out, i8 %index, i8 %spacing) nounwind noinli
; CHECK-LABEL: store8:
; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}], #1
; CHECK: ret
- %tmp = load i8** %out, align 8
- %incdec.ptr = getelementptr inbounds i8* %tmp, i64 1
+ %tmp = load i8*, i8** %out, align 8
+ %incdec.ptr = getelementptr inbounds i8, i8* %tmp, i64 1
store i8 %spacing, i8* %tmp, align 4
store i8* %incdec.ptr, i8** %out, align 8
ret void
@@ -48,8 +48,8 @@ define void @truncst64to32(i32** nocapture %out, i32 %index, i64 %spacing) nounw
; CHECK-LABEL: truncst64to32:
; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}], #4
; CHECK: ret
- %tmp = load i32** %out, align 8
- %incdec.ptr = getelementptr inbounds i32* %tmp, i64 1
+ %tmp = load i32*, i32** %out, align 8
+ %incdec.ptr = getelementptr inbounds i32, i32* %tmp, i64 1
%trunc = trunc i64 %spacing to i32
store i32 %trunc, i32* %tmp, align 4
store i32* %incdec.ptr, i32** %out, align 8
@@ -60,8 +60,8 @@ define void @truncst64to16(i16** nocapture %out, i16 %index, i64 %spacing) nounw
; CHECK-LABEL: truncst64to16:
; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}], #2
; CHECK: ret
- %tmp = load i16** %out, align 8
- %incdec.ptr = getelementptr inbounds i16* %tmp, i64 1
+ %tmp = load i16*, i16** %out, align 8
+ %incdec.ptr = getelementptr inbounds i16, i16* %tmp, i64 1
%trunc = trunc i64 %spacing to i16
store i16 %trunc, i16* %tmp, align 4
store i16* %incdec.ptr, i16** %out, align 8
@@ -72,8 +72,8 @@ define void @truncst64to8(i8** nocapture %out, i8 %index, i64 %spacing) nounwind
; CHECK-LABEL: truncst64to8:
; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}], #1
; CHECK: ret
- %tmp = load i8** %out, align 8
- %incdec.ptr = getelementptr inbounds i8* %tmp, i64 1
+ %tmp = load i8*, i8** %out, align 8
+ %incdec.ptr = getelementptr inbounds i8, i8* %tmp, i64 1
%trunc = trunc i64 %spacing to i8
store i8 %trunc, i8* %tmp, align 4
store i8* %incdec.ptr, i8** %out, align 8
@@ -85,8 +85,8 @@ define void @storef32(float** nocapture %out, float %index, float %spacing) noun
; CHECK-LABEL: storef32:
; CHECK: str s{{[0-9+]}}, [x{{[0-9+]}}], #4
; CHECK: ret
- %tmp = load float** %out, align 8
- %incdec.ptr = getelementptr inbounds float* %tmp, i64 1
+ %tmp = load float*, float** %out, align 8
+ %incdec.ptr = getelementptr inbounds float, float* %tmp, i64 1
store float %spacing, float* %tmp, align 4
store float* %incdec.ptr, float** %out, align 8
ret void
@@ -96,8 +96,8 @@ define void @storef64(double** nocapture %out, double %index, double %spacing) n
; CHECK-LABEL: storef64:
; CHECK: str d{{[0-9+]}}, [x{{[0-9+]}}], #8
; CHECK: ret
- %tmp = load double** %out, align 8
- %incdec.ptr = getelementptr inbounds double* %tmp, i64 1
+ %tmp = load double*, double** %out, align 8
+ %incdec.ptr = getelementptr inbounds double, double* %tmp, i64 1
store double %spacing, double* %tmp, align 4
store double* %incdec.ptr, double** %out, align 8
ret void
@@ -108,8 +108,8 @@ define double * @pref64(double** nocapture %out, double %spacing) nounwind noinl
; CHECK: ldr x0, [x0]
; CHECK-NEXT: str d0, [x0, #32]!
; CHECK-NEXT: ret
- %tmp = load double** %out, align 8
- %ptr = getelementptr inbounds double* %tmp, i64 4
+ %tmp = load double*, double** %out, align 8
+ %ptr = getelementptr inbounds double, double* %tmp, i64 4
store double %spacing, double* %ptr, align 4
ret double *%ptr
}
@@ -119,8 +119,8 @@ define float * @pref32(float** nocapture %out, float %spacing) nounwind noinline
; CHECK: ldr x0, [x0]
; CHECK-NEXT: str s0, [x0, #12]!
; CHECK-NEXT: ret
- %tmp = load float** %out, align 8
- %ptr = getelementptr inbounds float* %tmp, i64 3
+ %tmp = load float*, float** %out, align 8
+ %ptr = getelementptr inbounds float, float* %tmp, i64 3
store float %spacing, float* %ptr, align 4
ret float *%ptr
}
@@ -130,8 +130,8 @@ define i64 * @pre64(i64** nocapture %out, i64 %spacing) nounwind noinline ssp {
; CHECK: ldr x0, [x0]
; CHECK-NEXT: str x1, [x0, #16]!
; CHECK-NEXT: ret
- %tmp = load i64** %out, align 8
- %ptr = getelementptr inbounds i64* %tmp, i64 2
+ %tmp = load i64*, i64** %out, align 8
+ %ptr = getelementptr inbounds i64, i64* %tmp, i64 2
store i64 %spacing, i64* %ptr, align 4
ret i64 *%ptr
}
@@ -141,8 +141,8 @@ define i32 * @pre32(i32** nocapture %out, i32 %spacing) nounwind noinline ssp {
; CHECK: ldr x0, [x0]
; CHECK-NEXT: str w1, [x0, #8]!
; CHECK-NEXT: ret
- %tmp = load i32** %out, align 8
- %ptr = getelementptr inbounds i32* %tmp, i64 2
+ %tmp = load i32*, i32** %out, align 8
+ %ptr = getelementptr inbounds i32, i32* %tmp, i64 2
store i32 %spacing, i32* %ptr, align 4
ret i32 *%ptr
}
@@ -152,8 +152,8 @@ define i16 * @pre16(i16** nocapture %out, i16 %spacing) nounwind noinline ssp {
; CHECK: ldr x0, [x0]
; CHECK-NEXT: strh w1, [x0, #4]!
; CHECK-NEXT: ret
- %tmp = load i16** %out, align 8
- %ptr = getelementptr inbounds i16* %tmp, i64 2
+ %tmp = load i16*, i16** %out, align 8
+ %ptr = getelementptr inbounds i16, i16* %tmp, i64 2
store i16 %spacing, i16* %ptr, align 4
ret i16 *%ptr
}
@@ -163,8 +163,8 @@ define i8 * @pre8(i8** nocapture %out, i8 %spacing) nounwind noinline ssp {
; CHECK: ldr x0, [x0]
; CHECK-NEXT: strb w1, [x0, #2]!
; CHECK-NEXT: ret
- %tmp = load i8** %out, align 8
- %ptr = getelementptr inbounds i8* %tmp, i64 2
+ %tmp = load i8*, i8** %out, align 8
+ %ptr = getelementptr inbounds i8, i8* %tmp, i64 2
store i8 %spacing, i8* %ptr, align 4
ret i8 *%ptr
}
@@ -174,8 +174,8 @@ define i32 * @pretrunc64to32(i32** nocapture %out, i64 %spacing) nounwind noinli
; CHECK: ldr x0, [x0]
; CHECK-NEXT: str w1, [x0, #8]!
; CHECK-NEXT: ret
- %tmp = load i32** %out, align 8
- %ptr = getelementptr inbounds i32* %tmp, i64 2
+ %tmp = load i32*, i32** %out, align 8
+ %ptr = getelementptr inbounds i32, i32* %tmp, i64 2
%trunc = trunc i64 %spacing to i32
store i32 %trunc, i32* %ptr, align 4
ret i32 *%ptr
@@ -186,8 +186,8 @@ define i16 * @pretrunc64to16(i16** nocapture %out, i64 %spacing) nounwind noinli
; CHECK: ldr x0, [x0]
; CHECK-NEXT: strh w1, [x0, #4]!
; CHECK-NEXT: ret
- %tmp = load i16** %out, align 8
- %ptr = getelementptr inbounds i16* %tmp, i64 2
+ %tmp = load i16*, i16** %out, align 8
+ %ptr = getelementptr inbounds i16, i16* %tmp, i64 2
%trunc = trunc i64 %spacing to i16
store i16 %trunc, i16* %ptr, align 4
ret i16 *%ptr
@@ -198,8 +198,8 @@ define i8 * @pretrunc64to8(i8** nocapture %out, i64 %spacing) nounwind noinline
; CHECK: ldr x0, [x0]
; CHECK-NEXT: strb w1, [x0, #2]!
; CHECK-NEXT: ret
- %tmp = load i8** %out, align 8
- %ptr = getelementptr inbounds i8* %tmp, i64 2
+ %tmp = load i8*, i8** %out, align 8
+ %ptr = getelementptr inbounds i8, i8* %tmp, i64 2
%trunc = trunc i64 %spacing to i8
store i8 %trunc, i8* %ptr, align 4
ret i8 *%ptr
@@ -213,8 +213,8 @@ define double* @preidxf64(double* %src, double* %out) {
; CHECK: ldr d0, [x0, #8]!
; CHECK: str d0, [x1]
; CHECK: ret
- %ptr = getelementptr inbounds double* %src, i64 1
- %tmp = load double* %ptr, align 4
+ %ptr = getelementptr inbounds double, double* %src, i64 1
+ %tmp = load double, double* %ptr, align 4
store double %tmp, double* %out, align 4
ret double* %ptr
}
@@ -224,8 +224,8 @@ define float* @preidxf32(float* %src, float* %out) {
; CHECK: ldr s0, [x0, #4]!
; CHECK: str s0, [x1]
; CHECK: ret
- %ptr = getelementptr inbounds float* %src, i64 1
- %tmp = load float* %ptr, align 4
+ %ptr = getelementptr inbounds float, float* %src, i64 1
+ %tmp = load float, float* %ptr, align 4
store float %tmp, float* %out, align 4
ret float* %ptr
}
@@ -235,8 +235,8 @@ define i64* @preidx64(i64* %src, i64* %out) {
; CHECK: ldr x[[REG:[0-9]+]], [x0, #8]!
; CHECK: str x[[REG]], [x1]
; CHECK: ret
- %ptr = getelementptr inbounds i64* %src, i64 1
- %tmp = load i64* %ptr, align 4
+ %ptr = getelementptr inbounds i64, i64* %src, i64 1
+ %tmp = load i64, i64* %ptr, align 4
store i64 %tmp, i64* %out, align 4
ret i64* %ptr
}
@@ -245,8 +245,8 @@ define i32* @preidx32(i32* %src, i32* %out) {
; CHECK: ldr w[[REG:[0-9]+]], [x0, #4]!
; CHECK: str w[[REG]], [x1]
; CHECK: ret
- %ptr = getelementptr inbounds i32* %src, i64 1
- %tmp = load i32* %ptr, align 4
+ %ptr = getelementptr inbounds i32, i32* %src, i64 1
+ %tmp = load i32, i32* %ptr, align 4
store i32 %tmp, i32* %out, align 4
ret i32* %ptr
}
@@ -255,8 +255,8 @@ define i16* @preidx16zext32(i16* %src, i32* %out) {
; CHECK: ldrh w[[REG:[0-9]+]], [x0, #2]!
; CHECK: str w[[REG]], [x1]
; CHECK: ret
- %ptr = getelementptr inbounds i16* %src, i64 1
- %tmp = load i16* %ptr, align 4
+ %ptr = getelementptr inbounds i16, i16* %src, i64 1
+ %tmp = load i16, i16* %ptr, align 4
%ext = zext i16 %tmp to i32
store i32 %ext, i32* %out, align 4
ret i16* %ptr
@@ -266,8 +266,8 @@ define i16* @preidx16zext64(i16* %src, i64* %out) {
; CHECK: ldrh w[[REG:[0-9]+]], [x0, #2]!
; CHECK: str x[[REG]], [x1]
; CHECK: ret
- %ptr = getelementptr inbounds i16* %src, i64 1
- %tmp = load i16* %ptr, align 4
+ %ptr = getelementptr inbounds i16, i16* %src, i64 1
+ %tmp = load i16, i16* %ptr, align 4
%ext = zext i16 %tmp to i64
store i64 %ext, i64* %out, align 4
ret i16* %ptr
@@ -277,8 +277,8 @@ define i8* @preidx8zext32(i8* %src, i32* %out) {
; CHECK: ldrb w[[REG:[0-9]+]], [x0, #1]!
; CHECK: str w[[REG]], [x1]
; CHECK: ret
- %ptr = getelementptr inbounds i8* %src, i64 1
- %tmp = load i8* %ptr, align 4
+ %ptr = getelementptr inbounds i8, i8* %src, i64 1
+ %tmp = load i8, i8* %ptr, align 4
%ext = zext i8 %tmp to i32
store i32 %ext, i32* %out, align 4
ret i8* %ptr
@@ -288,8 +288,8 @@ define i8* @preidx8zext64(i8* %src, i64* %out) {
; CHECK: ldrb w[[REG:[0-9]+]], [x0, #1]!
; CHECK: str x[[REG]], [x1]
; CHECK: ret
- %ptr = getelementptr inbounds i8* %src, i64 1
- %tmp = load i8* %ptr, align 4
+ %ptr = getelementptr inbounds i8, i8* %src, i64 1
+ %tmp = load i8, i8* %ptr, align 4
%ext = zext i8 %tmp to i64
store i64 %ext, i64* %out, align 4
ret i8* %ptr
@@ -299,8 +299,8 @@ define i32* @preidx32sext64(i32* %src, i64* %out) {
; CHECK: ldrsw x[[REG:[0-9]+]], [x0, #4]!
; CHECK: str x[[REG]], [x1]
; CHECK: ret
- %ptr = getelementptr inbounds i32* %src, i64 1
- %tmp = load i32* %ptr, align 4
+ %ptr = getelementptr inbounds i32, i32* %src, i64 1
+ %tmp = load i32, i32* %ptr, align 4
%ext = sext i32 %tmp to i64
store i64 %ext, i64* %out, align 8
ret i32* %ptr
@@ -310,8 +310,8 @@ define i16* @preidx16sext32(i16* %src, i32* %out) {
; CHECK: ldrsh w[[REG:[0-9]+]], [x0, #2]!
; CHECK: str w[[REG]], [x1]
; CHECK: ret
- %ptr = getelementptr inbounds i16* %src, i64 1
- %tmp = load i16* %ptr, align 4
+ %ptr = getelementptr inbounds i16, i16* %src, i64 1
+ %tmp = load i16, i16* %ptr, align 4
%ext = sext i16 %tmp to i32
store i32 %ext, i32* %out, align 4
ret i16* %ptr
@@ -321,8 +321,8 @@ define i16* @preidx16sext64(i16* %src, i64* %out) {
; CHECK: ldrsh x[[REG:[0-9]+]], [x0, #2]!
; CHECK: str x[[REG]], [x1]
; CHECK: ret
- %ptr = getelementptr inbounds i16* %src, i64 1
- %tmp = load i16* %ptr, align 4
+ %ptr = getelementptr inbounds i16, i16* %src, i64 1
+ %tmp = load i16, i16* %ptr, align 4
%ext = sext i16 %tmp to i64
store i64 %ext, i64* %out, align 4
ret i16* %ptr
@@ -332,8 +332,8 @@ define i8* @preidx8sext32(i8* %src, i32* %out) {
; CHECK: ldrsb w[[REG:[0-9]+]], [x0, #1]!
; CHECK: str w[[REG]], [x1]
; CHECK: ret
- %ptr = getelementptr inbounds i8* %src, i64 1
- %tmp = load i8* %ptr, align 4
+ %ptr = getelementptr inbounds i8, i8* %src, i64 1
+ %tmp = load i8, i8* %ptr, align 4
%ext = sext i8 %tmp to i32
store i32 %ext, i32* %out, align 4
ret i8* %ptr
@@ -343,8 +343,8 @@ define i8* @preidx8sext64(i8* %src, i64* %out) {
; CHECK: ldrsb x[[REG:[0-9]+]], [x0, #1]!
; CHECK: str x[[REG]], [x1]
; CHECK: ret
- %ptr = getelementptr inbounds i8* %src, i64 1
- %tmp = load i8* %ptr, align 4
+ %ptr = getelementptr inbounds i8, i8* %src, i64 1
+ %tmp = load i8, i8* %ptr, align 4
%ext = sext i8 %tmp to i64
store i64 %ext, i64* %out, align 4
ret i8* %ptr
@@ -358,6 +358,6 @@ define i64* @postidx_clobber(i64* %addr) nounwind noinline ssp {
; ret
%paddr = bitcast i64* %addr to i64**
store i64* %addr, i64** %paddr
- %newaddr = getelementptr i64* %addr, i32 1
+ %newaddr = getelementptr i64, i64* %addr, i32 1
ret i64* %newaddr
}
diff --git a/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
index 917911ad2d40..14beb1ae9c36 100644
--- a/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
+++ b/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
@@ -9,9 +9,9 @@ target triple = "arm64-apple-ios7.0.0"
; Function Attrs: nounwind ssp
define void @f(double* %P1) #0 {
entry:
- %arrayidx4 = getelementptr inbounds double* %P1, i64 1
- %0 = load double* %arrayidx4, align 8, !tbaa !1
- %1 = load double* %P1, align 8, !tbaa !1
+ %arrayidx4 = getelementptr inbounds double, double* %P1, i64 1
+ %0 = load double, double* %arrayidx4, align 8, !tbaa !1
+ %1 = load double, double* %P1, align 8, !tbaa !1
%2 = insertelement <2 x double> undef, double %0, i32 0
%3 = insertelement <2 x double> %2, double %1, i32 1
%4 = fsub <2 x double> zeroinitializer, %3
diff --git a/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index 9ee4063658b2..ba31513172d5 100644
--- a/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -5,8 +5,8 @@
define <8 x i8> @test_v8i8_pre_load(<8 x i8>* %addr) {
; CHECK-LABEL: test_v8i8_pre_load:
; CHECK: ldr d0, [x0, #40]!
- %newaddr = getelementptr <8 x i8>* %addr, i32 5
- %val = load <8 x i8>* %newaddr, align 8
+ %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
+ %val = load <8 x i8>, <8 x i8>* %newaddr, align 8
store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
ret <8 x i8> %val
}
@@ -14,8 +14,8 @@ define <8 x i8> @test_v8i8_pre_load(<8 x i8>* %addr) {
define <8 x i8> @test_v8i8_post_load(<8 x i8>* %addr) {
; CHECK-LABEL: test_v8i8_post_load:
; CHECK: ldr d0, [x0], #40
- %newaddr = getelementptr <8 x i8>* %addr, i32 5
- %val = load <8 x i8>* %addr, align 8
+ %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
+ %val = load <8 x i8>, <8 x i8>* %addr, align 8
store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
ret <8 x i8> %val
}
@@ -23,7 +23,7 @@ define <8 x i8> @test_v8i8_post_load(<8 x i8>* %addr) {
define void @test_v8i8_pre_store(<8 x i8> %in, <8 x i8>* %addr) {
; CHECK-LABEL: test_v8i8_pre_store:
; CHECK: str d0, [x0, #40]!
- %newaddr = getelementptr <8 x i8>* %addr, i32 5
+ %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
store <8 x i8> %in, <8 x i8>* %newaddr, align 8
store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
ret void
@@ -32,7 +32,7 @@ define void @test_v8i8_pre_store(<8 x i8> %in, <8 x i8>* %addr) {
define void @test_v8i8_post_store(<8 x i8> %in, <8 x i8>* %addr) {
; CHECK-LABEL: test_v8i8_post_store:
; CHECK: str d0, [x0], #40
- %newaddr = getelementptr <8 x i8>* %addr, i32 5
+ %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
store <8 x i8> %in, <8 x i8>* %addr, align 8
store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
ret void
@@ -41,8 +41,8 @@ define void @test_v8i8_post_store(<8 x i8> %in, <8 x i8>* %addr) {
define <4 x i16> @test_v4i16_pre_load(<4 x i16>* %addr) {
; CHECK-LABEL: test_v4i16_pre_load:
; CHECK: ldr d0, [x0, #40]!
- %newaddr = getelementptr <4 x i16>* %addr, i32 5
- %val = load <4 x i16>* %newaddr, align 8
+ %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
+ %val = load <4 x i16>, <4 x i16>* %newaddr, align 8
store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
ret <4 x i16> %val
}
@@ -50,8 +50,8 @@ define <4 x i16> @test_v4i16_pre_load(<4 x i16>* %addr) {
define <4 x i16> @test_v4i16_post_load(<4 x i16>* %addr) {
; CHECK-LABEL: test_v4i16_post_load:
; CHECK: ldr d0, [x0], #40
- %newaddr = getelementptr <4 x i16>* %addr, i32 5
- %val = load <4 x i16>* %addr, align 8
+ %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
+ %val = load <4 x i16>, <4 x i16>* %addr, align 8
store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
ret <4 x i16> %val
}
@@ -59,7 +59,7 @@ define <4 x i16> @test_v4i16_post_load(<4 x i16>* %addr) {
define void @test_v4i16_pre_store(<4 x i16> %in, <4 x i16>* %addr) {
; CHECK-LABEL: test_v4i16_pre_store:
; CHECK: str d0, [x0, #40]!
- %newaddr = getelementptr <4 x i16>* %addr, i32 5
+ %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
store <4 x i16> %in, <4 x i16>* %newaddr, align 8
store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
ret void
@@ -68,7 +68,7 @@ define void @test_v4i16_pre_store(<4 x i16> %in, <4 x i16>* %addr) {
define void @test_v4i16_post_store(<4 x i16> %in, <4 x i16>* %addr) {
; CHECK-LABEL: test_v4i16_post_store:
; CHECK: str d0, [x0], #40
- %newaddr = getelementptr <4 x i16>* %addr, i32 5
+ %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
store <4 x i16> %in, <4 x i16>* %addr, align 8
store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
ret void
@@ -77,8 +77,8 @@ define void @test_v4i16_post_store(<4 x i16> %in, <4 x i16>* %addr) {
define <2 x i32> @test_v2i32_pre_load(<2 x i32>* %addr) {
; CHECK-LABEL: test_v2i32_pre_load:
; CHECK: ldr d0, [x0, #40]!
- %newaddr = getelementptr <2 x i32>* %addr, i32 5
- %val = load <2 x i32>* %newaddr, align 8
+ %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
+ %val = load <2 x i32>, <2 x i32>* %newaddr, align 8
store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
ret <2 x i32> %val
}
@@ -86,8 +86,8 @@ define <2 x i32> @test_v2i32_pre_load(<2 x i32>* %addr) {
define <2 x i32> @test_v2i32_post_load(<2 x i32>* %addr) {
; CHECK-LABEL: test_v2i32_post_load:
; CHECK: ldr d0, [x0], #40
- %newaddr = getelementptr <2 x i32>* %addr, i32 5
- %val = load <2 x i32>* %addr, align 8
+ %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
+ %val = load <2 x i32>, <2 x i32>* %addr, align 8
store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
ret <2 x i32> %val
}
@@ -95,7 +95,7 @@ define <2 x i32> @test_v2i32_post_load(<2 x i32>* %addr) {
define void @test_v2i32_pre_store(<2 x i32> %in, <2 x i32>* %addr) {
; CHECK-LABEL: test_v2i32_pre_store:
; CHECK: str d0, [x0, #40]!
- %newaddr = getelementptr <2 x i32>* %addr, i32 5
+ %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
store <2 x i32> %in, <2 x i32>* %newaddr, align 8
store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
ret void
@@ -104,7 +104,7 @@ define void @test_v2i32_pre_store(<2 x i32> %in, <2 x i32>* %addr) {
define void @test_v2i32_post_store(<2 x i32> %in, <2 x i32>* %addr) {
; CHECK-LABEL: test_v2i32_post_store:
; CHECK: str d0, [x0], #40
- %newaddr = getelementptr <2 x i32>* %addr, i32 5
+ %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
store <2 x i32> %in, <2 x i32>* %addr, align 8
store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
ret void
@@ -113,8 +113,8 @@ define void @test_v2i32_post_store(<2 x i32> %in, <2 x i32>* %addr) {
define <2 x float> @test_v2f32_pre_load(<2 x float>* %addr) {
; CHECK-LABEL: test_v2f32_pre_load:
; CHECK: ldr d0, [x0, #40]!
- %newaddr = getelementptr <2 x float>* %addr, i32 5
- %val = load <2 x float>* %newaddr, align 8
+ %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
+ %val = load <2 x float>, <2 x float>* %newaddr, align 8
store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
ret <2 x float> %val
}
@@ -122,8 +122,8 @@ define <2 x float> @test_v2f32_pre_load(<2 x float>* %addr) {
define <2 x float> @test_v2f32_post_load(<2 x float>* %addr) {
; CHECK-LABEL: test_v2f32_post_load:
; CHECK: ldr d0, [x0], #40
- %newaddr = getelementptr <2 x float>* %addr, i32 5
- %val = load <2 x float>* %addr, align 8
+ %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
+ %val = load <2 x float>, <2 x float>* %addr, align 8
store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
ret <2 x float> %val
}
@@ -131,7 +131,7 @@ define <2 x float> @test_v2f32_post_load(<2 x float>* %addr) {
define void @test_v2f32_pre_store(<2 x float> %in, <2 x float>* %addr) {
; CHECK-LABEL: test_v2f32_pre_store:
; CHECK: str d0, [x0, #40]!
- %newaddr = getelementptr <2 x float>* %addr, i32 5
+ %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
store <2 x float> %in, <2 x float>* %newaddr, align 8
store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
ret void
@@ -140,7 +140,7 @@ define void @test_v2f32_pre_store(<2 x float> %in, <2 x float>* %addr) {
define void @test_v2f32_post_store(<2 x float> %in, <2 x float>* %addr) {
; CHECK-LABEL: test_v2f32_post_store:
; CHECK: str d0, [x0], #40
- %newaddr = getelementptr <2 x float>* %addr, i32 5
+ %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
store <2 x float> %in, <2 x float>* %addr, align 8
store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
ret void
@@ -149,8 +149,8 @@ define void @test_v2f32_post_store(<2 x float> %in, <2 x float>* %addr) {
define <1 x i64> @test_v1i64_pre_load(<1 x i64>* %addr) {
; CHECK-LABEL: test_v1i64_pre_load:
; CHECK: ldr d0, [x0, #40]!
- %newaddr = getelementptr <1 x i64>* %addr, i32 5
- %val = load <1 x i64>* %newaddr, align 8
+ %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
+ %val = load <1 x i64>, <1 x i64>* %newaddr, align 8
store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
ret <1 x i64> %val
}
@@ -158,8 +158,8 @@ define <1 x i64> @test_v1i64_pre_load(<1 x i64>* %addr) {
define <1 x i64> @test_v1i64_post_load(<1 x i64>* %addr) {
; CHECK-LABEL: test_v1i64_post_load:
; CHECK: ldr d0, [x0], #40
- %newaddr = getelementptr <1 x i64>* %addr, i32 5
- %val = load <1 x i64>* %addr, align 8
+ %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
+ %val = load <1 x i64>, <1 x i64>* %addr, align 8
store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
ret <1 x i64> %val
}
@@ -167,7 +167,7 @@ define <1 x i64> @test_v1i64_post_load(<1 x i64>* %addr) {
define void @test_v1i64_pre_store(<1 x i64> %in, <1 x i64>* %addr) {
; CHECK-LABEL: test_v1i64_pre_store:
; CHECK: str d0, [x0, #40]!
- %newaddr = getelementptr <1 x i64>* %addr, i32 5
+ %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
store <1 x i64> %in, <1 x i64>* %newaddr, align 8
store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
ret void
@@ -176,7 +176,7 @@ define void @test_v1i64_pre_store(<1 x i64> %in, <1 x i64>* %addr) {
define void @test_v1i64_post_store(<1 x i64> %in, <1 x i64>* %addr) {
; CHECK-LABEL: test_v1i64_post_store:
; CHECK: str d0, [x0], #40
- %newaddr = getelementptr <1 x i64>* %addr, i32 5
+ %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
store <1 x i64> %in, <1 x i64>* %addr, align 8
store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
ret void
@@ -185,8 +185,8 @@ define void @test_v1i64_post_store(<1 x i64> %in, <1 x i64>* %addr) {
define <16 x i8> @test_v16i8_pre_load(<16 x i8>* %addr) {
; CHECK-LABEL: test_v16i8_pre_load:
; CHECK: ldr q0, [x0, #80]!
- %newaddr = getelementptr <16 x i8>* %addr, i32 5
- %val = load <16 x i8>* %newaddr, align 8
+ %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
+ %val = load <16 x i8>, <16 x i8>* %newaddr, align 8
store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
ret <16 x i8> %val
}
@@ -194,8 +194,8 @@ define <16 x i8> @test_v16i8_pre_load(<16 x i8>* %addr) {
define <16 x i8> @test_v16i8_post_load(<16 x i8>* %addr) {
; CHECK-LABEL: test_v16i8_post_load:
; CHECK: ldr q0, [x0], #80
- %newaddr = getelementptr <16 x i8>* %addr, i32 5
- %val = load <16 x i8>* %addr, align 8
+ %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
+ %val = load <16 x i8>, <16 x i8>* %addr, align 8
store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
ret <16 x i8> %val
}
@@ -203,7 +203,7 @@ define <16 x i8> @test_v16i8_post_load(<16 x i8>* %addr) {
define void @test_v16i8_pre_store(<16 x i8> %in, <16 x i8>* %addr) {
; CHECK-LABEL: test_v16i8_pre_store:
; CHECK: str q0, [x0, #80]!
- %newaddr = getelementptr <16 x i8>* %addr, i32 5
+ %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
store <16 x i8> %in, <16 x i8>* %newaddr, align 8
store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
ret void
@@ -212,7 +212,7 @@ define void @test_v16i8_pre_store(<16 x i8> %in, <16 x i8>* %addr) {
define void @test_v16i8_post_store(<16 x i8> %in, <16 x i8>* %addr) {
; CHECK-LABEL: test_v16i8_post_store:
; CHECK: str q0, [x0], #80
- %newaddr = getelementptr <16 x i8>* %addr, i32 5
+ %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
store <16 x i8> %in, <16 x i8>* %addr, align 8
store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
ret void
@@ -221,8 +221,8 @@ define void @test_v16i8_post_store(<16 x i8> %in, <16 x i8>* %addr) {
define <8 x i16> @test_v8i16_pre_load(<8 x i16>* %addr) {
; CHECK-LABEL: test_v8i16_pre_load:
; CHECK: ldr q0, [x0, #80]!
- %newaddr = getelementptr <8 x i16>* %addr, i32 5
- %val = load <8 x i16>* %newaddr, align 8
+ %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
+ %val = load <8 x i16>, <8 x i16>* %newaddr, align 8
store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
ret <8 x i16> %val
}
@@ -230,8 +230,8 @@ define <8 x i16> @test_v8i16_pre_load(<8 x i16>* %addr) {
define <8 x i16> @test_v8i16_post_load(<8 x i16>* %addr) {
; CHECK-LABEL: test_v8i16_post_load:
; CHECK: ldr q0, [x0], #80
- %newaddr = getelementptr <8 x i16>* %addr, i32 5
- %val = load <8 x i16>* %addr, align 8
+ %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
+ %val = load <8 x i16>, <8 x i16>* %addr, align 8
store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
ret <8 x i16> %val
}
@@ -239,7 +239,7 @@ define <8 x i16> @test_v8i16_post_load(<8 x i16>* %addr) {
define void @test_v8i16_pre_store(<8 x i16> %in, <8 x i16>* %addr) {
; CHECK-LABEL: test_v8i16_pre_store:
; CHECK: str q0, [x0, #80]!
- %newaddr = getelementptr <8 x i16>* %addr, i32 5
+ %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
store <8 x i16> %in, <8 x i16>* %newaddr, align 8
store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
ret void
@@ -248,7 +248,7 @@ define void @test_v8i16_pre_store(<8 x i16> %in, <8 x i16>* %addr) {
define void @test_v8i16_post_store(<8 x i16> %in, <8 x i16>* %addr) {
; CHECK-LABEL: test_v8i16_post_store:
; CHECK: str q0, [x0], #80
- %newaddr = getelementptr <8 x i16>* %addr, i32 5
+ %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
store <8 x i16> %in, <8 x i16>* %addr, align 8
store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
ret void
@@ -257,8 +257,8 @@ define void @test_v8i16_post_store(<8 x i16> %in, <8 x i16>* %addr) {
define <4 x i32> @test_v4i32_pre_load(<4 x i32>* %addr) {
; CHECK-LABEL: test_v4i32_pre_load:
; CHECK: ldr q0, [x0, #80]!
- %newaddr = getelementptr <4 x i32>* %addr, i32 5
- %val = load <4 x i32>* %newaddr, align 8
+ %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
+ %val = load <4 x i32>, <4 x i32>* %newaddr, align 8
store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
ret <4 x i32> %val
}
@@ -266,8 +266,8 @@ define <4 x i32> @test_v4i32_pre_load(<4 x i32>* %addr) {
define <4 x i32> @test_v4i32_post_load(<4 x i32>* %addr) {
; CHECK-LABEL: test_v4i32_post_load:
; CHECK: ldr q0, [x0], #80
- %newaddr = getelementptr <4 x i32>* %addr, i32 5
- %val = load <4 x i32>* %addr, align 8
+ %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
+ %val = load <4 x i32>, <4 x i32>* %addr, align 8
store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
ret <4 x i32> %val
}
@@ -275,7 +275,7 @@ define <4 x i32> @test_v4i32_post_load(<4 x i32>* %addr) {
define void @test_v4i32_pre_store(<4 x i32> %in, <4 x i32>* %addr) {
; CHECK-LABEL: test_v4i32_pre_store:
; CHECK: str q0, [x0, #80]!
- %newaddr = getelementptr <4 x i32>* %addr, i32 5
+ %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
store <4 x i32> %in, <4 x i32>* %newaddr, align 8
store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
ret void
@@ -284,7 +284,7 @@ define void @test_v4i32_pre_store(<4 x i32> %in, <4 x i32>* %addr) {
define void @test_v4i32_post_store(<4 x i32> %in, <4 x i32>* %addr) {
; CHECK-LABEL: test_v4i32_post_store:
; CHECK: str q0, [x0], #80
- %newaddr = getelementptr <4 x i32>* %addr, i32 5
+ %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
store <4 x i32> %in, <4 x i32>* %addr, align 8
store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
ret void
@@ -294,8 +294,8 @@ define void @test_v4i32_post_store(<4 x i32> %in, <4 x i32>* %addr) {
define <4 x float> @test_v4f32_pre_load(<4 x float>* %addr) {
; CHECK-LABEL: test_v4f32_pre_load:
; CHECK: ldr q0, [x0, #80]!
- %newaddr = getelementptr <4 x float>* %addr, i32 5
- %val = load <4 x float>* %newaddr, align 8
+ %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
+ %val = load <4 x float>, <4 x float>* %newaddr, align 8
store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
ret <4 x float> %val
}
@@ -303,8 +303,8 @@ define <4 x float> @test_v4f32_pre_load(<4 x float>* %addr) {
define <4 x float> @test_v4f32_post_load(<4 x float>* %addr) {
; CHECK-LABEL: test_v4f32_post_load:
; CHECK: ldr q0, [x0], #80
- %newaddr = getelementptr <4 x float>* %addr, i32 5
- %val = load <4 x float>* %addr, align 8
+ %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
+ %val = load <4 x float>, <4 x float>* %addr, align 8
store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
ret <4 x float> %val
}
@@ -312,7 +312,7 @@ define <4 x float> @test_v4f32_post_load(<4 x float>* %addr) {
define void @test_v4f32_pre_store(<4 x float> %in, <4 x float>* %addr) {
; CHECK-LABEL: test_v4f32_pre_store:
; CHECK: str q0, [x0, #80]!
- %newaddr = getelementptr <4 x float>* %addr, i32 5
+ %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
store <4 x float> %in, <4 x float>* %newaddr, align 8
store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
ret void
@@ -321,7 +321,7 @@ define void @test_v4f32_pre_store(<4 x float> %in, <4 x float>* %addr) {
define void @test_v4f32_post_store(<4 x float> %in, <4 x float>* %addr) {
; CHECK-LABEL: test_v4f32_post_store:
; CHECK: str q0, [x0], #80
- %newaddr = getelementptr <4 x float>* %addr, i32 5
+ %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
store <4 x float> %in, <4 x float>* %addr, align 8
store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
ret void
@@ -331,8 +331,8 @@ define void @test_v4f32_post_store(<4 x float> %in, <4 x float>* %addr) {
define <2 x i64> @test_v2i64_pre_load(<2 x i64>* %addr) {
; CHECK-LABEL: test_v2i64_pre_load:
; CHECK: ldr q0, [x0, #80]!
- %newaddr = getelementptr <2 x i64>* %addr, i32 5
- %val = load <2 x i64>* %newaddr, align 8
+ %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
+ %val = load <2 x i64>, <2 x i64>* %newaddr, align 8
store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
ret <2 x i64> %val
}
@@ -340,8 +340,8 @@ define <2 x i64> @test_v2i64_pre_load(<2 x i64>* %addr) {
define <2 x i64> @test_v2i64_post_load(<2 x i64>* %addr) {
; CHECK-LABEL: test_v2i64_post_load:
; CHECK: ldr q0, [x0], #80
- %newaddr = getelementptr <2 x i64>* %addr, i32 5
- %val = load <2 x i64>* %addr, align 8
+ %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
+ %val = load <2 x i64>, <2 x i64>* %addr, align 8
store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
ret <2 x i64> %val
}
@@ -349,7 +349,7 @@ define <2 x i64> @test_v2i64_post_load(<2 x i64>* %addr) {
define void @test_v2i64_pre_store(<2 x i64> %in, <2 x i64>* %addr) {
; CHECK-LABEL: test_v2i64_pre_store:
; CHECK: str q0, [x0, #80]!
- %newaddr = getelementptr <2 x i64>* %addr, i32 5
+ %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
store <2 x i64> %in, <2 x i64>* %newaddr, align 8
store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
ret void
@@ -358,7 +358,7 @@ define void @test_v2i64_pre_store(<2 x i64> %in, <2 x i64>* %addr) {
define void @test_v2i64_post_store(<2 x i64> %in, <2 x i64>* %addr) {
; CHECK-LABEL: test_v2i64_post_store:
; CHECK: str q0, [x0], #80
- %newaddr = getelementptr <2 x i64>* %addr, i32 5
+ %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
store <2 x i64> %in, <2 x i64>* %addr, align 8
store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
ret void
@@ -368,8 +368,8 @@ define void @test_v2i64_post_store(<2 x i64> %in, <2 x i64>* %addr) {
define <2 x double> @test_v2f64_pre_load(<2 x double>* %addr) {
; CHECK-LABEL: test_v2f64_pre_load:
; CHECK: ldr q0, [x0, #80]!
- %newaddr = getelementptr <2 x double>* %addr, i32 5
- %val = load <2 x double>* %newaddr, align 8
+ %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
+ %val = load <2 x double>, <2 x double>* %newaddr, align 8
store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
ret <2 x double> %val
}
@@ -377,8 +377,8 @@ define <2 x double> @test_v2f64_pre_load(<2 x double>* %addr) {
define <2 x double> @test_v2f64_post_load(<2 x double>* %addr) {
; CHECK-LABEL: test_v2f64_post_load:
; CHECK: ldr q0, [x0], #80
- %newaddr = getelementptr <2 x double>* %addr, i32 5
- %val = load <2 x double>* %addr, align 8
+ %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
+ %val = load <2 x double>, <2 x double>* %addr, align 8
store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
ret <2 x double> %val
}
@@ -386,7 +386,7 @@ define <2 x double> @test_v2f64_post_load(<2 x double>* %addr) {
define void @test_v2f64_pre_store(<2 x double> %in, <2 x double>* %addr) {
; CHECK-LABEL: test_v2f64_pre_store:
; CHECK: str q0, [x0, #80]!
- %newaddr = getelementptr <2 x double>* %addr, i32 5
+ %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
store <2 x double> %in, <2 x double>* %newaddr, align 8
store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
ret void
@@ -395,7 +395,7 @@ define void @test_v2f64_pre_store(<2 x double> %in, <2 x double>* %addr) {
define void @test_v2f64_post_store(<2 x double> %in, <2 x double>* %addr) {
; CHECK-LABEL: test_v2f64_post_store:
; CHECK: str q0, [x0], #80
- %newaddr = getelementptr <2 x double>* %addr, i32 5
+ %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
store <2 x double> %in, <2 x double>* %addr, align 8
store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
ret void
@@ -407,7 +407,7 @@ define i8* @test_v16i8_post_imm_st1_lane(<16 x i8> %in, i8* %addr) {
%elt = extractelement <16 x i8> %in, i32 3
store i8 %elt, i8* %addr
- %newaddr = getelementptr i8* %addr, i32 1
+ %newaddr = getelementptr i8, i8* %addr, i32 1
ret i8* %newaddr
}
@@ -418,7 +418,7 @@ define i8* @test_v16i8_post_reg_st1_lane(<16 x i8> %in, i8* %addr) {
%elt = extractelement <16 x i8> %in, i32 3
store i8 %elt, i8* %addr
- %newaddr = getelementptr i8* %addr, i32 2
+ %newaddr = getelementptr i8, i8* %addr, i32 2
ret i8* %newaddr
}
@@ -429,7 +429,7 @@ define i16* @test_v8i16_post_imm_st1_lane(<8 x i16> %in, i16* %addr) {
%elt = extractelement <8 x i16> %in, i32 3
store i16 %elt, i16* %addr
- %newaddr = getelementptr i16* %addr, i32 1
+ %newaddr = getelementptr i16, i16* %addr, i32 1
ret i16* %newaddr
}
@@ -440,7 +440,7 @@ define i16* @test_v8i16_post_reg_st1_lane(<8 x i16> %in, i16* %addr) {
%elt = extractelement <8 x i16> %in, i32 3
store i16 %elt, i16* %addr
- %newaddr = getelementptr i16* %addr, i32 2
+ %newaddr = getelementptr i16, i16* %addr, i32 2
ret i16* %newaddr
}
@@ -450,7 +450,7 @@ define i32* @test_v4i32_post_imm_st1_lane(<4 x i32> %in, i32* %addr) {
%elt = extractelement <4 x i32> %in, i32 3
store i32 %elt, i32* %addr
- %newaddr = getelementptr i32* %addr, i32 1
+ %newaddr = getelementptr i32, i32* %addr, i32 1
ret i32* %newaddr
}
@@ -461,7 +461,7 @@ define i32* @test_v4i32_post_reg_st1_lane(<4 x i32> %in, i32* %addr) {
%elt = extractelement <4 x i32> %in, i32 3
store i32 %elt, i32* %addr
- %newaddr = getelementptr i32* %addr, i32 2
+ %newaddr = getelementptr i32, i32* %addr, i32 2
ret i32* %newaddr
}
@@ -471,7 +471,7 @@ define float* @test_v4f32_post_imm_st1_lane(<4 x float> %in, float* %addr) {
%elt = extractelement <4 x float> %in, i32 3
store float %elt, float* %addr
- %newaddr = getelementptr float* %addr, i32 1
+ %newaddr = getelementptr float, float* %addr, i32 1
ret float* %newaddr
}
@@ -482,7 +482,7 @@ define float* @test_v4f32_post_reg_st1_lane(<4 x float> %in, float* %addr) {
%elt = extractelement <4 x float> %in, i32 3
store float %elt, float* %addr
- %newaddr = getelementptr float* %addr, i32 2
+ %newaddr = getelementptr float, float* %addr, i32 2
ret float* %newaddr
}
@@ -492,7 +492,7 @@ define i64* @test_v2i64_post_imm_st1_lane(<2 x i64> %in, i64* %addr) {
%elt = extractelement <2 x i64> %in, i64 1
store i64 %elt, i64* %addr
- %newaddr = getelementptr i64* %addr, i64 1
+ %newaddr = getelementptr i64, i64* %addr, i64 1
ret i64* %newaddr
}
@@ -503,7 +503,7 @@ define i64* @test_v2i64_post_reg_st1_lane(<2 x i64> %in, i64* %addr) {
%elt = extractelement <2 x i64> %in, i64 1
store i64 %elt, i64* %addr
- %newaddr = getelementptr i64* %addr, i64 2
+ %newaddr = getelementptr i64, i64* %addr, i64 2
ret i64* %newaddr
}
@@ -513,7 +513,7 @@ define double* @test_v2f64_post_imm_st1_lane(<2 x double> %in, double* %addr) {
%elt = extractelement <2 x double> %in, i32 1
store double %elt, double* %addr
- %newaddr = getelementptr double* %addr, i32 1
+ %newaddr = getelementptr double, double* %addr, i32 1
ret double* %newaddr
}
@@ -524,7 +524,7 @@ define double* @test_v2f64_post_reg_st1_lane(<2 x double> %in, double* %addr) {
%elt = extractelement <2 x double> %in, i32 1
store double %elt, double* %addr
- %newaddr = getelementptr double* %addr, i32 2
+ %newaddr = getelementptr double, double* %addr, i32 2
ret double* %newaddr
}
@@ -534,7 +534,7 @@ define i8* @test_v8i8_post_imm_st1_lane(<8 x i8> %in, i8* %addr) {
%elt = extractelement <8 x i8> %in, i32 3
store i8 %elt, i8* %addr
- %newaddr = getelementptr i8* %addr, i32 1
+ %newaddr = getelementptr i8, i8* %addr, i32 1
ret i8* %newaddr
}
@@ -545,7 +545,7 @@ define i8* @test_v8i8_post_reg_st1_lane(<8 x i8> %in, i8* %addr) {
%elt = extractelement <8 x i8> %in, i32 3
store i8 %elt, i8* %addr
- %newaddr = getelementptr i8* %addr, i32 2
+ %newaddr = getelementptr i8, i8* %addr, i32 2
ret i8* %newaddr
}
@@ -555,7 +555,7 @@ define i16* @test_v4i16_post_imm_st1_lane(<4 x i16> %in, i16* %addr) {
%elt = extractelement <4 x i16> %in, i32 3
store i16 %elt, i16* %addr
- %newaddr = getelementptr i16* %addr, i32 1
+ %newaddr = getelementptr i16, i16* %addr, i32 1
ret i16* %newaddr
}
@@ -566,7 +566,7 @@ define i16* @test_v4i16_post_reg_st1_lane(<4 x i16> %in, i16* %addr) {
%elt = extractelement <4 x i16> %in, i32 3
store i16 %elt, i16* %addr
- %newaddr = getelementptr i16* %addr, i32 2
+ %newaddr = getelementptr i16, i16* %addr, i32 2
ret i16* %newaddr
}
@@ -576,7 +576,7 @@ define i32* @test_v2i32_post_imm_st1_lane(<2 x i32> %in, i32* %addr) {
%elt = extractelement <2 x i32> %in, i32 1
store i32 %elt, i32* %addr
- %newaddr = getelementptr i32* %addr, i32 1
+ %newaddr = getelementptr i32, i32* %addr, i32 1
ret i32* %newaddr
}
@@ -587,7 +587,7 @@ define i32* @test_v2i32_post_reg_st1_lane(<2 x i32> %in, i32* %addr) {
%elt = extractelement <2 x i32> %in, i32 1
store i32 %elt, i32* %addr
- %newaddr = getelementptr i32* %addr, i32 2
+ %newaddr = getelementptr i32, i32* %addr, i32 2
ret i32* %newaddr
}
@@ -597,7 +597,7 @@ define float* @test_v2f32_post_imm_st1_lane(<2 x float> %in, float* %addr) {
%elt = extractelement <2 x float> %in, i32 1
store float %elt, float* %addr
- %newaddr = getelementptr float* %addr, i32 1
+ %newaddr = getelementptr float, float* %addr, i32 1
ret float* %newaddr
}
@@ -608,7 +608,7 @@ define float* @test_v2f32_post_reg_st1_lane(<2 x float> %in, float* %addr) {
%elt = extractelement <2 x float> %in, i32 1
store float %elt, float* %addr
- %newaddr = getelementptr float* %addr, i32 2
+ %newaddr = getelementptr float, float* %addr, i32 2
ret float* %newaddr
}
@@ -616,7 +616,7 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) {
;CHECK-LABEL: test_v16i8_post_imm_ld2:
;CHECK: ld2.16b { v0, v1 }, [x0], #32
%ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i32 32
+ %tmp = getelementptr i8, i8* %A, i32 32
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8> } %ld2
}
@@ -625,7 +625,7 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2(i8* %A, i8** %ptr, i64
;CHECK-LABEL: test_v16i8_post_reg_ld2:
;CHECK: ld2.16b { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8> } %ld2
}
@@ -637,7 +637,7 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(i8* %A, i8** %ptr) {
;CHECK-LABEL: test_v8i8_post_imm_ld2:
;CHECK: ld2.8b { v0, v1 }, [x0], #16
%ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i32 16
+ %tmp = getelementptr i8, i8* %A, i32 16
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8> } %ld2
}
@@ -646,7 +646,7 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %in
;CHECK-LABEL: test_v8i8_post_reg_ld2:
;CHECK: ld2.8b { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8> } %ld2
}
@@ -658,7 +658,7 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(i16* %A, i16** %ptr) {
;CHECK-LABEL: test_v8i16_post_imm_ld2:
;CHECK: ld2.8h { v0, v1 }, [x0], #32
%ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i32 16
+ %tmp = getelementptr i16, i16* %A, i32 16
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16> } %ld2
}
@@ -667,7 +667,7 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2(i16* %A, i16** %ptr, i6
;CHECK-LABEL: test_v8i16_post_reg_ld2:
;CHECK: ld2.8h { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16> } %ld2
}
@@ -679,7 +679,7 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(i16* %A, i16** %ptr) {
;CHECK-LABEL: test_v4i16_post_imm_ld2:
;CHECK: ld2.4h { v0, v1 }, [x0], #16
%ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i32 8
+ %tmp = getelementptr i16, i16* %A, i32 8
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16> } %ld2
}
@@ -688,7 +688,7 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2(i16* %A, i16** %ptr, i6
;CHECK-LABEL: test_v4i16_post_reg_ld2:
;CHECK: ld2.4h { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16> } %ld2
}
@@ -700,7 +700,7 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(i32* %A, i32** %ptr) {
;CHECK-LABEL: test_v4i32_post_imm_ld2:
;CHECK: ld2.4s { v0, v1 }, [x0], #32
%ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i32 8
+ %tmp = getelementptr i32, i32* %A, i32 8
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32> } %ld2
}
@@ -709,7 +709,7 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2(i32* %A, i32** %ptr, i6
;CHECK-LABEL: test_v4i32_post_reg_ld2:
;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32> } %ld2
}
@@ -721,7 +721,7 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(i32* %A, i32** %ptr) {
;CHECK-LABEL: test_v2i32_post_imm_ld2:
;CHECK: ld2.2s { v0, v1 }, [x0], #16
%ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i32 4
+ %tmp = getelementptr i32, i32* %A, i32 4
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32> } %ld2
}
@@ -730,7 +730,7 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2(i32* %A, i32** %ptr, i6
;CHECK-LABEL: test_v2i32_post_reg_ld2:
;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32> } %ld2
}
@@ -742,7 +742,7 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(i64* %A, i64** %ptr) {
;CHECK-LABEL: test_v2i64_post_imm_ld2:
;CHECK: ld2.2d { v0, v1 }, [x0], #32
%ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i32 4
+ %tmp = getelementptr i64, i64* %A, i32 4
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64> } %ld2
}
@@ -751,7 +751,7 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2(i64* %A, i64** %ptr, i6
;CHECK-LABEL: test_v2i64_post_reg_ld2:
;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64> } %ld2
}
@@ -763,7 +763,7 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(i64* %A, i64** %ptr) {
;CHECK-LABEL: test_v1i64_post_imm_ld2:
;CHECK: ld1.1d { v0, v1 }, [x0], #16
%ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i32 2
+ %tmp = getelementptr i64, i64* %A, i32 2
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64> } %ld2
}
@@ -772,7 +772,7 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2(i64* %A, i64** %ptr, i6
;CHECK-LABEL: test_v1i64_post_reg_ld2:
;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64> } %ld2
}
@@ -784,7 +784,7 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(float* %A, float**
;CHECK-LABEL: test_v4f32_post_imm_ld2:
;CHECK: ld2.4s { v0, v1 }, [x0], #32
%ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i32 8
+ %tmp = getelementptr float, float* %A, i32 8
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float> } %ld2
}
@@ -793,7 +793,7 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2(float* %A, float**
;CHECK-LABEL: test_v4f32_post_reg_ld2:
;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float> } %ld2
}
@@ -805,7 +805,7 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(float* %A, float**
;CHECK-LABEL: test_v2f32_post_imm_ld2:
;CHECK: ld2.2s { v0, v1 }, [x0], #16
%ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i32 4
+ %tmp = getelementptr float, float* %A, i32 4
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float> } %ld2
}
@@ -814,7 +814,7 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2(float* %A, float**
;CHECK-LABEL: test_v2f32_post_reg_ld2:
;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float> } %ld2
}
@@ -826,7 +826,7 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(double* %A, doubl
;CHECK-LABEL: test_v2f64_post_imm_ld2:
;CHECK: ld2.2d { v0, v1 }, [x0], #32
%ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i32 4
+ %tmp = getelementptr double, double* %A, i32 4
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double> } %ld2
}
@@ -835,7 +835,7 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2(double* %A, doubl
;CHECK-LABEL: test_v2f64_post_reg_ld2:
;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double> } %ld2
}
@@ -847,7 +847,7 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(double* %A, doubl
;CHECK-LABEL: test_v1f64_post_imm_ld2:
;CHECK: ld1.1d { v0, v1 }, [x0], #16
%ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i32 2
+ %tmp = getelementptr double, double* %A, i32 2
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double> } %ld2
}
@@ -856,7 +856,7 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2(double* %A, doubl
;CHECK-LABEL: test_v1f64_post_reg_ld2:
;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double> } %ld2
}
@@ -868,7 +868,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(i8* %A, i8**
;CHECK-LABEL: test_v16i8_post_imm_ld3:
;CHECK: ld3.16b { v0, v1, v2 }, [x0], #48
%ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i32 48
+ %tmp = getelementptr i8, i8* %A, i32 48
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
}
@@ -877,7 +877,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3(i8* %A, i8**
;CHECK-LABEL: test_v16i8_post_reg_ld3:
;CHECK: ld3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
}
@@ -889,7 +889,7 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(i8* %A, i8** %pt
;CHECK-LABEL: test_v8i8_post_imm_ld3:
;CHECK: ld3.8b { v0, v1, v2 }, [x0], #24
%ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i32 24
+ %tmp = getelementptr i8, i8* %A, i32 24
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
}
@@ -898,7 +898,7 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3(i8* %A, i8** %pt
;CHECK-LABEL: test_v8i8_post_reg_ld3:
;CHECK: ld3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
}
@@ -910,7 +910,7 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(i16* %A, i16
;CHECK-LABEL: test_v8i16_post_imm_ld3:
;CHECK: ld3.8h { v0, v1, v2 }, [x0], #48
%ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i32 24
+ %tmp = getelementptr i16, i16* %A, i32 24
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
}
@@ -919,7 +919,7 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3(i16* %A, i16
;CHECK-LABEL: test_v8i16_post_reg_ld3:
;CHECK: ld3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
}
@@ -931,7 +931,7 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(i16* %A, i16
;CHECK-LABEL: test_v4i16_post_imm_ld3:
;CHECK: ld3.4h { v0, v1, v2 }, [x0], #24
%ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i32 12
+ %tmp = getelementptr i16, i16* %A, i32 12
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
}
@@ -940,7 +940,7 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3(i16* %A, i16
;CHECK-LABEL: test_v4i16_post_reg_ld3:
;CHECK: ld3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
}
@@ -952,7 +952,7 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(i32* %A, i32
;CHECK-LABEL: test_v4i32_post_imm_ld3:
;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
%ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i32 12
+ %tmp = getelementptr i32, i32* %A, i32 12
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
}
@@ -961,7 +961,7 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3(i32* %A, i32
;CHECK-LABEL: test_v4i32_post_reg_ld3:
;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
}
@@ -973,7 +973,7 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(i32* %A, i32
;CHECK-LABEL: test_v2i32_post_imm_ld3:
;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
%ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i32 6
+ %tmp = getelementptr i32, i32* %A, i32 6
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
}
@@ -982,7 +982,7 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3(i32* %A, i32
;CHECK-LABEL: test_v2i32_post_reg_ld3:
;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
}
@@ -994,7 +994,7 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(i64* %A, i64
;CHECK-LABEL: test_v2i64_post_imm_ld3:
;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
%ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i32 6
+ %tmp = getelementptr i64, i64* %A, i32 6
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
}
@@ -1003,7 +1003,7 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3(i64* %A, i64
;CHECK-LABEL: test_v2i64_post_reg_ld3:
;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
}
@@ -1015,7 +1015,7 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(i64* %A, i64
;CHECK-LABEL: test_v1i64_post_imm_ld3:
;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
%ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i32 3
+ %tmp = getelementptr i64, i64* %A, i32 3
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
}
@@ -1024,7 +1024,7 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3(i64* %A, i64
;CHECK-LABEL: test_v1i64_post_reg_ld3:
;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
}
@@ -1036,7 +1036,7 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(float*
;CHECK-LABEL: test_v4f32_post_imm_ld3:
;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
%ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i32 12
+ %tmp = getelementptr float, float* %A, i32 12
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float>, <4 x float> } %ld3
}
@@ -1045,7 +1045,7 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3(float*
;CHECK-LABEL: test_v4f32_post_reg_ld3:
;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float>, <4 x float> } %ld3
}
@@ -1057,7 +1057,7 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(float*
;CHECK-LABEL: test_v2f32_post_imm_ld3:
;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
%ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i32 6
+ %tmp = getelementptr float, float* %A, i32 6
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float>, <2 x float> } %ld3
}
@@ -1066,7 +1066,7 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3(float*
;CHECK-LABEL: test_v2f32_post_reg_ld3:
;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float>, <2 x float> } %ld3
}
@@ -1078,7 +1078,7 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(dou
;CHECK-LABEL: test_v2f64_post_imm_ld3:
;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
%ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i32 6
+ %tmp = getelementptr double, double* %A, i32 6
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double>, <2 x double> } %ld3
}
@@ -1087,7 +1087,7 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3(dou
;CHECK-LABEL: test_v2f64_post_reg_ld3:
;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double>, <2 x double> } %ld3
}
@@ -1099,7 +1099,7 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(dou
;CHECK-LABEL: test_v1f64_post_imm_ld3:
;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
%ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i32 3
+ %tmp = getelementptr double, double* %A, i32 3
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double>, <1 x double> } %ld3
}
@@ -1108,7 +1108,7 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3(dou
;CHECK-LABEL: test_v1f64_post_reg_ld3:
;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double>, <1 x double> } %ld3
}
@@ -1120,7 +1120,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(i
;CHECK-LABEL: test_v16i8_post_imm_ld4:
;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], #64
%ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i32 64
+ %tmp = getelementptr i8, i8* %A, i32 64
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
}
@@ -1129,7 +1129,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4(i
;CHECK-LABEL: test_v16i8_post_reg_ld4:
;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
}
@@ -1141,7 +1141,7 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(i8* %A
;CHECK-LABEL: test_v8i8_post_imm_ld4:
;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], #32
%ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i32 32
+ %tmp = getelementptr i8, i8* %A, i32 32
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
}
@@ -1150,7 +1150,7 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4(i8* %A
;CHECK-LABEL: test_v8i8_post_reg_ld4:
;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
}
@@ -1162,7 +1162,7 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(i
;CHECK-LABEL: test_v8i16_post_imm_ld4:
;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], #64
%ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i32 32
+ %tmp = getelementptr i16, i16* %A, i32 32
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
}
@@ -1171,7 +1171,7 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4(i
;CHECK-LABEL: test_v8i16_post_reg_ld4:
;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
}
@@ -1183,7 +1183,7 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(i
;CHECK-LABEL: test_v4i16_post_imm_ld4:
;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], #32
%ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i32 16
+ %tmp = getelementptr i16, i16* %A, i32 16
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
}
@@ -1192,7 +1192,7 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4(i
;CHECK-LABEL: test_v4i16_post_reg_ld4:
;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
}
@@ -1204,7 +1204,7 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(i
;CHECK-LABEL: test_v4i32_post_imm_ld4:
;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
%ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i32 16
+ %tmp = getelementptr i32, i32* %A, i32 16
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
}
@@ -1213,7 +1213,7 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4(i
;CHECK-LABEL: test_v4i32_post_reg_ld4:
;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
}
@@ -1225,7 +1225,7 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(i
;CHECK-LABEL: test_v2i32_post_imm_ld4:
;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
%ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i32 8
+ %tmp = getelementptr i32, i32* %A, i32 8
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
}
@@ -1234,7 +1234,7 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4(i
;CHECK-LABEL: test_v2i32_post_reg_ld4:
;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
}
@@ -1246,7 +1246,7 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(i
;CHECK-LABEL: test_v2i64_post_imm_ld4:
;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
%ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i32 8
+ %tmp = getelementptr i64, i64* %A, i32 8
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
}
@@ -1255,7 +1255,7 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4(i
;CHECK-LABEL: test_v2i64_post_reg_ld4:
;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
}
@@ -1267,7 +1267,7 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(i
;CHECK-LABEL: test_v1i64_post_imm_ld4:
;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
%ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i32 4
+ %tmp = getelementptr i64, i64* %A, i32 4
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
}
@@ -1276,7 +1276,7 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4(i
;CHECK-LABEL: test_v1i64_post_reg_ld4:
;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
}
@@ -1288,7 +1288,7 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i
;CHECK-LABEL: test_v4f32_post_imm_ld4:
;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
%ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i32 16
+ %tmp = getelementptr float, float* %A, i32 16
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
}
@@ -1297,7 +1297,7 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_r
;CHECK-LABEL: test_v4f32_post_reg_ld4:
;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
}
@@ -1309,7 +1309,7 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i
;CHECK-LABEL: test_v2f32_post_imm_ld4:
;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
%ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i32 8
+ %tmp = getelementptr float, float* %A, i32 8
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
}
@@ -1318,7 +1318,7 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_r
;CHECK-LABEL: test_v2f32_post_reg_ld4:
;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
}
@@ -1330,7 +1330,7 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po
;CHECK-LABEL: test_v2f64_post_imm_ld4:
;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
%ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i32 8
+ %tmp = getelementptr double, double* %A, i32 8
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
}
@@ -1339,7 +1339,7 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po
;CHECK-LABEL: test_v2f64_post_reg_ld4:
;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
}
@@ -1351,7 +1351,7 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po
;CHECK-LABEL: test_v1f64_post_imm_ld4:
;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
%ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i32 4
+ %tmp = getelementptr double, double* %A, i32 4
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
}
@@ -1360,7 +1360,7 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po
;CHECK-LABEL: test_v1f64_post_reg_ld4:
;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
}
@@ -1371,7 +1371,7 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
;CHECK-LABEL: test_v16i8_post_imm_ld1x2:
;CHECK: ld1.16b { v0, v1 }, [x0], #32
%ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i32 32
+ %tmp = getelementptr i8, i8* %A, i32 32
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8> } %ld1x2
}
@@ -1380,7 +1380,7 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x2(i8* %A, i8** %ptr, i6
;CHECK-LABEL: test_v16i8_post_reg_ld1x2:
;CHECK: ld1.16b { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8> } %ld1x2
}
@@ -1392,7 +1392,7 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
;CHECK-LABEL: test_v8i8_post_imm_ld1x2:
;CHECK: ld1.8b { v0, v1 }, [x0], #16
%ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i32 16
+ %tmp = getelementptr i8, i8* %A, i32 16
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8> } %ld1x2
}
@@ -1401,7 +1401,7 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %
;CHECK-LABEL: test_v8i8_post_reg_ld1x2:
;CHECK: ld1.8b { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8> } %ld1x2
}
@@ -1413,7 +1413,7 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(i16* %A, i16** %ptr)
;CHECK-LABEL: test_v8i16_post_imm_ld1x2:
;CHECK: ld1.8h { v0, v1 }, [x0], #32
%ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i32 16
+ %tmp = getelementptr i16, i16* %A, i32 16
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16> } %ld1x2
}
@@ -1422,7 +1422,7 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x2(i16* %A, i16** %ptr,
;CHECK-LABEL: test_v8i16_post_reg_ld1x2:
;CHECK: ld1.8h { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16> } %ld1x2
}
@@ -1434,7 +1434,7 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(i16* %A, i16** %ptr)
;CHECK-LABEL: test_v4i16_post_imm_ld1x2:
;CHECK: ld1.4h { v0, v1 }, [x0], #16
%ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i32 8
+ %tmp = getelementptr i16, i16* %A, i32 8
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16> } %ld1x2
}
@@ -1443,7 +1443,7 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x2(i16* %A, i16** %ptr,
;CHECK-LABEL: test_v4i16_post_reg_ld1x2:
;CHECK: ld1.4h { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16> } %ld1x2
}
@@ -1455,7 +1455,7 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(i32* %A, i32** %ptr)
;CHECK-LABEL: test_v4i32_post_imm_ld1x2:
;CHECK: ld1.4s { v0, v1 }, [x0], #32
%ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i32 8
+ %tmp = getelementptr i32, i32* %A, i32 8
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32> } %ld1x2
}
@@ -1464,7 +1464,7 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x2(i32* %A, i32** %ptr,
;CHECK-LABEL: test_v4i32_post_reg_ld1x2:
;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32> } %ld1x2
}
@@ -1476,7 +1476,7 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(i32* %A, i32** %ptr)
;CHECK-LABEL: test_v2i32_post_imm_ld1x2:
;CHECK: ld1.2s { v0, v1 }, [x0], #16
%ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i32 4
+ %tmp = getelementptr i32, i32* %A, i32 4
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32> } %ld1x2
}
@@ -1485,7 +1485,7 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x2(i32* %A, i32** %ptr,
;CHECK-LABEL: test_v2i32_post_reg_ld1x2:
;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32> } %ld1x2
}
@@ -1497,7 +1497,7 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(i64* %A, i64** %ptr)
;CHECK-LABEL: test_v2i64_post_imm_ld1x2:
;CHECK: ld1.2d { v0, v1 }, [x0], #32
%ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i32 4
+ %tmp = getelementptr i64, i64* %A, i32 4
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64> } %ld1x2
}
@@ -1506,7 +1506,7 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x2(i64* %A, i64** %ptr,
;CHECK-LABEL: test_v2i64_post_reg_ld1x2:
;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64> } %ld1x2
}
@@ -1518,7 +1518,7 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(i64* %A, i64** %ptr)
;CHECK-LABEL: test_v1i64_post_imm_ld1x2:
;CHECK: ld1.1d { v0, v1 }, [x0], #16
%ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i32 2
+ %tmp = getelementptr i64, i64* %A, i32 2
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64> } %ld1x2
}
@@ -1527,7 +1527,7 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x2(i64* %A, i64** %ptr,
;CHECK-LABEL: test_v1i64_post_reg_ld1x2:
;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64> } %ld1x2
}
@@ -1539,7 +1539,7 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(float* %A, float*
;CHECK-LABEL: test_v4f32_post_imm_ld1x2:
;CHECK: ld1.4s { v0, v1 }, [x0], #32
%ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i32 8
+ %tmp = getelementptr float, float* %A, i32 8
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float> } %ld1x2
}
@@ -1548,7 +1548,7 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x2(float* %A, float*
;CHECK-LABEL: test_v4f32_post_reg_ld1x2:
;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float> } %ld1x2
}
@@ -1560,7 +1560,7 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(float* %A, float*
;CHECK-LABEL: test_v2f32_post_imm_ld1x2:
;CHECK: ld1.2s { v0, v1 }, [x0], #16
%ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i32 4
+ %tmp = getelementptr float, float* %A, i32 4
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float> } %ld1x2
}
@@ -1569,7 +1569,7 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x2(float* %A, float*
;CHECK-LABEL: test_v2f32_post_reg_ld1x2:
;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float> } %ld1x2
}
@@ -1581,7 +1581,7 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(double* %A, dou
;CHECK-LABEL: test_v2f64_post_imm_ld1x2:
;CHECK: ld1.2d { v0, v1 }, [x0], #32
%ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i32 4
+ %tmp = getelementptr double, double* %A, i32 4
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double> } %ld1x2
}
@@ -1590,7 +1590,7 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x2(double* %A, dou
;CHECK-LABEL: test_v2f64_post_reg_ld1x2:
;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double> } %ld1x2
}
@@ -1602,7 +1602,7 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(double* %A, dou
;CHECK-LABEL: test_v1f64_post_imm_ld1x2:
;CHECK: ld1.1d { v0, v1 }, [x0], #16
%ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i32 2
+ %tmp = getelementptr double, double* %A, i32 2
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double> } %ld1x2
}
@@ -1611,7 +1611,7 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x2(double* %A, dou
;CHECK-LABEL: test_v1f64_post_reg_ld1x2:
;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
%ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double> } %ld1x2
}
@@ -1623,7 +1623,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(i8* %A, i8
;CHECK-LABEL: test_v16i8_post_imm_ld1x3:
;CHECK: ld1.16b { v0, v1, v2 }, [x0], #48
%ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i32 48
+ %tmp = getelementptr i8, i8* %A, i32 48
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
}
@@ -1632,7 +1632,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x3(i8* %A, i8
;CHECK-LABEL: test_v16i8_post_reg_ld1x3:
;CHECK: ld1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
}
@@ -1644,7 +1644,7 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(i8* %A, i8** %
;CHECK-LABEL: test_v8i8_post_imm_ld1x3:
;CHECK: ld1.8b { v0, v1, v2 }, [x0], #24
%ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i32 24
+ %tmp = getelementptr i8, i8* %A, i32 24
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
}
@@ -1653,7 +1653,7 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x3(i8* %A, i8** %
;CHECK-LABEL: test_v8i8_post_reg_ld1x3:
;CHECK: ld1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
}
@@ -1665,7 +1665,7 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(i16* %A, i
;CHECK-LABEL: test_v8i16_post_imm_ld1x3:
;CHECK: ld1.8h { v0, v1, v2 }, [x0], #48
%ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i32 24
+ %tmp = getelementptr i16, i16* %A, i32 24
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
}
@@ -1674,7 +1674,7 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x3(i16* %A, i
;CHECK-LABEL: test_v8i16_post_reg_ld1x3:
;CHECK: ld1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
}
@@ -1686,7 +1686,7 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(i16* %A, i
;CHECK-LABEL: test_v4i16_post_imm_ld1x3:
;CHECK: ld1.4h { v0, v1, v2 }, [x0], #24
%ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i32 12
+ %tmp = getelementptr i16, i16* %A, i32 12
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
}
@@ -1695,7 +1695,7 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x3(i16* %A, i
;CHECK-LABEL: test_v4i16_post_reg_ld1x3:
;CHECK: ld1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
}
@@ -1707,7 +1707,7 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(i32* %A, i
;CHECK-LABEL: test_v4i32_post_imm_ld1x3:
;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48
%ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i32 12
+ %tmp = getelementptr i32, i32* %A, i32 12
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
}
@@ -1716,7 +1716,7 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x3(i32* %A, i
;CHECK-LABEL: test_v4i32_post_reg_ld1x3:
;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
}
@@ -1728,7 +1728,7 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x3(i32* %A, i
;CHECK-LABEL: test_v2i32_post_imm_ld1x3:
;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24
%ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i32 6
+ %tmp = getelementptr i32, i32* %A, i32 6
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3
}
@@ -1737,7 +1737,7 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x3(i32* %A, i
;CHECK-LABEL: test_v2i32_post_reg_ld1x3:
;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3
}
@@ -1749,7 +1749,7 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x3(i64* %A, i
;CHECK-LABEL: test_v2i64_post_imm_ld1x3:
;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48
%ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i32 6
+ %tmp = getelementptr i64, i64* %A, i32 6
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3
}
@@ -1758,7 +1758,7 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x3(i64* %A, i
;CHECK-LABEL: test_v2i64_post_reg_ld1x3:
;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3
}
@@ -1770,7 +1770,7 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x3(i64* %A, i
;CHECK-LABEL: test_v1i64_post_imm_ld1x3:
;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
%ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i32 3
+ %tmp = getelementptr i64, i64* %A, i32 3
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3
}
@@ -1779,7 +1779,7 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x3(i64* %A, i
;CHECK-LABEL: test_v1i64_post_reg_ld1x3:
;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3
}
@@ -1791,7 +1791,7 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x3(floa
;CHECK-LABEL: test_v4f32_post_imm_ld1x3:
;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48
%ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i32 12
+ %tmp = getelementptr float, float* %A, i32 12
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3
}
@@ -1800,7 +1800,7 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x3(floa
;CHECK-LABEL: test_v4f32_post_reg_ld1x3:
;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3
}
@@ -1812,7 +1812,7 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x3(floa
;CHECK-LABEL: test_v2f32_post_imm_ld1x3:
;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24
%ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i32 6
+ %tmp = getelementptr float, float* %A, i32 6
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3
}
@@ -1821,7 +1821,7 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x3(floa
;CHECK-LABEL: test_v2f32_post_reg_ld1x3:
;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3
}
@@ -1833,7 +1833,7 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x3(d
;CHECK-LABEL: test_v2f64_post_imm_ld1x3:
;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48
%ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i32 6
+ %tmp = getelementptr double, double* %A, i32 6
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3
}
@@ -1842,7 +1842,7 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x3(d
;CHECK-LABEL: test_v2f64_post_reg_ld1x3:
;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3
}
@@ -1854,7 +1854,7 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x3(d
;CHECK-LABEL: test_v1f64_post_imm_ld1x3:
;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
%ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i32 3
+ %tmp = getelementptr double, double* %A, i32 3
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3
}
@@ -1863,7 +1863,7 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x3(d
;CHECK-LABEL: test_v1f64_post_reg_ld1x3:
;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3
}
@@ -1875,7 +1875,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x4
;CHECK-LABEL: test_v16i8_post_imm_ld1x4:
;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], #64
%ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i32 64
+ %tmp = getelementptr i8, i8* %A, i32 64
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4
}
@@ -1884,7 +1884,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x4
;CHECK-LABEL: test_v16i8_post_reg_ld1x4:
;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4
}
@@ -1896,7 +1896,7 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x4(i8*
;CHECK-LABEL: test_v8i8_post_imm_ld1x4:
;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], #32
%ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i32 32
+ %tmp = getelementptr i8, i8* %A, i32 32
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4
}
@@ -1905,7 +1905,7 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x4(i8*
;CHECK-LABEL: test_v8i8_post_reg_ld1x4:
;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4
}
@@ -1917,7 +1917,7 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x4
;CHECK-LABEL: test_v8i16_post_imm_ld1x4:
;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], #64
%ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i32 32
+ %tmp = getelementptr i16, i16* %A, i32 32
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4
}
@@ -1926,7 +1926,7 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x4
;CHECK-LABEL: test_v8i16_post_reg_ld1x4:
;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4
}
@@ -1938,7 +1938,7 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x4
;CHECK-LABEL: test_v4i16_post_imm_ld1x4:
;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], #32
%ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i32 16
+ %tmp = getelementptr i16, i16* %A, i32 16
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4
}
@@ -1947,7 +1947,7 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x4
;CHECK-LABEL: test_v4i16_post_reg_ld1x4:
;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4
}
@@ -1959,7 +1959,7 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x4
;CHECK-LABEL: test_v4i32_post_imm_ld1x4:
;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64
%ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i32 16
+ %tmp = getelementptr i32, i32* %A, i32 16
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4
}
@@ -1968,7 +1968,7 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x4
;CHECK-LABEL: test_v4i32_post_reg_ld1x4:
;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4
}
@@ -1980,7 +1980,7 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x4
;CHECK-LABEL: test_v2i32_post_imm_ld1x4:
;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32
%ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i32 8
+ %tmp = getelementptr i32, i32* %A, i32 8
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4
}
@@ -1989,7 +1989,7 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x4
;CHECK-LABEL: test_v2i32_post_reg_ld1x4:
;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4
}
@@ -2001,7 +2001,7 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x4
;CHECK-LABEL: test_v2i64_post_imm_ld1x4:
;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64
%ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i32 8
+ %tmp = getelementptr i64, i64* %A, i32 8
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4
}
@@ -2010,7 +2010,7 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x4
;CHECK-LABEL: test_v2i64_post_reg_ld1x4:
;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4
}
@@ -2022,7 +2022,7 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x4
;CHECK-LABEL: test_v1i64_post_imm_ld1x4:
;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
%ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i32 4
+ %tmp = getelementptr i64, i64* %A, i32 4
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4
}
@@ -2031,7 +2031,7 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x4
;CHECK-LABEL: test_v1i64_post_reg_ld1x4:
;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4
}
@@ -2043,7 +2043,7 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i
;CHECK-LABEL: test_v4f32_post_imm_ld1x4:
;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64
%ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i32 16
+ %tmp = getelementptr float, float* %A, i32 16
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4
}
@@ -2052,7 +2052,7 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_r
;CHECK-LABEL: test_v4f32_post_reg_ld1x4:
;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4
}
@@ -2064,7 +2064,7 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i
;CHECK-LABEL: test_v2f32_post_imm_ld1x4:
;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32
%ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i32 8
+ %tmp = getelementptr float, float* %A, i32 8
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4
}
@@ -2073,7 +2073,7 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_r
;CHECK-LABEL: test_v2f32_post_reg_ld1x4:
;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4
}
@@ -2085,7 +2085,7 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po
;CHECK-LABEL: test_v2f64_post_imm_ld1x4:
;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64
%ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i32 8
+ %tmp = getelementptr double, double* %A, i32 8
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4
}
@@ -2094,7 +2094,7 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po
;CHECK-LABEL: test_v2f64_post_reg_ld1x4:
;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4
}
@@ -2106,7 +2106,7 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po
;CHECK-LABEL: test_v1f64_post_imm_ld1x4:
;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
%ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i32 4
+ %tmp = getelementptr double, double* %A, i32 4
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4
}
@@ -2115,7 +2115,7 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po
;CHECK-LABEL: test_v1f64_post_reg_ld1x4:
;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4
}
@@ -2127,7 +2127,7 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2r(i8* %A, i8** %ptr) nou
;CHECK-LABEL: test_v16i8_post_imm_ld2r:
;CHECK: ld2r.16b { v0, v1 }, [x0], #2
%ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i32 2
+ %tmp = getelementptr i8, i8* %A, i32 2
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8> } %ld2
}
@@ -2136,7 +2136,7 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2r(i8* %A, i8** %ptr, i64
;CHECK-LABEL: test_v16i8_post_reg_ld2r:
;CHECK: ld2r.16b { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8> } %ld2
}
@@ -2148,7 +2148,7 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwi
;CHECK-LABEL: test_v8i8_post_imm_ld2r:
;CHECK: ld2r.8b { v0, v1 }, [x0], #2
%ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i32 2
+ %tmp = getelementptr i8, i8* %A, i32 2
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8> } %ld2
}
@@ -2157,7 +2157,7 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %i
;CHECK-LABEL: test_v8i8_post_reg_ld2r:
;CHECK: ld2r.8b { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8> } %ld2
}
@@ -2169,7 +2169,7 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2r(i16* %A, i16** %ptr) n
;CHECK-LABEL: test_v8i16_post_imm_ld2r:
;CHECK: ld2r.8h { v0, v1 }, [x0], #4
%ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i32 2
+ %tmp = getelementptr i16, i16* %A, i32 2
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16> } %ld2
}
@@ -2178,7 +2178,7 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2r(i16* %A, i16** %ptr, i
;CHECK-LABEL: test_v8i16_post_reg_ld2r:
;CHECK: ld2r.8h { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16> } %ld2
}
@@ -2190,7 +2190,7 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2r(i16* %A, i16** %ptr) n
;CHECK-LABEL: test_v4i16_post_imm_ld2r:
;CHECK: ld2r.4h { v0, v1 }, [x0], #4
%ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i32 2
+ %tmp = getelementptr i16, i16* %A, i32 2
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16> } %ld2
}
@@ -2199,7 +2199,7 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2r(i16* %A, i16** %ptr, i
;CHECK-LABEL: test_v4i16_post_reg_ld2r:
;CHECK: ld2r.4h { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16> } %ld2
}
@@ -2211,7 +2211,7 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2r(i32* %A, i32** %ptr) n
;CHECK-LABEL: test_v4i32_post_imm_ld2r:
;CHECK: ld2r.4s { v0, v1 }, [x0], #8
%ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i32 2
+ %tmp = getelementptr i32, i32* %A, i32 2
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32> } %ld2
}
@@ -2220,7 +2220,7 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2r(i32* %A, i32** %ptr, i
;CHECK-LABEL: test_v4i32_post_reg_ld2r:
;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32> } %ld2
}
@@ -2231,7 +2231,7 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2r(i32* %A, i32** %ptr) n
;CHECK-LABEL: test_v2i32_post_imm_ld2r:
;CHECK: ld2r.2s { v0, v1 }, [x0], #8
%ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i32 2
+ %tmp = getelementptr i32, i32* %A, i32 2
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32> } %ld2
}
@@ -2240,7 +2240,7 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2r(i32* %A, i32** %ptr, i
;CHECK-LABEL: test_v2i32_post_reg_ld2r:
;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32> } %ld2
}
@@ -2252,7 +2252,7 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2r(i64* %A, i64** %ptr) n
;CHECK-LABEL: test_v2i64_post_imm_ld2r:
;CHECK: ld2r.2d { v0, v1 }, [x0], #16
%ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i32 2
+ %tmp = getelementptr i64, i64* %A, i32 2
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64> } %ld2
}
@@ -2261,7 +2261,7 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2r(i64* %A, i64** %ptr, i
;CHECK-LABEL: test_v2i64_post_reg_ld2r:
;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64> } %ld2
}
@@ -2272,7 +2272,7 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2r(i64* %A, i64** %ptr) n
;CHECK-LABEL: test_v1i64_post_imm_ld2r:
;CHECK: ld2r.1d { v0, v1 }, [x0], #16
%ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i32 2
+ %tmp = getelementptr i64, i64* %A, i32 2
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64> } %ld2
}
@@ -2281,7 +2281,7 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2r(i64* %A, i64** %ptr, i
;CHECK-LABEL: test_v1i64_post_reg_ld2r:
;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64> } %ld2
}
@@ -2293,7 +2293,7 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2r(float* %A, float**
;CHECK-LABEL: test_v4f32_post_imm_ld2r:
;CHECK: ld2r.4s { v0, v1 }, [x0], #8
%ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i32 2
+ %tmp = getelementptr float, float* %A, i32 2
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float> } %ld2
}
@@ -2302,7 +2302,7 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2r(float* %A, float**
;CHECK-LABEL: test_v4f32_post_reg_ld2r:
;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float> } %ld2
}
@@ -2313,7 +2313,7 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2r(float* %A, float**
;CHECK-LABEL: test_v2f32_post_imm_ld2r:
;CHECK: ld2r.2s { v0, v1 }, [x0], #8
%ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i32 2
+ %tmp = getelementptr float, float* %A, i32 2
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float> } %ld2
}
@@ -2322,7 +2322,7 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2r(float* %A, float**
;CHECK-LABEL: test_v2f32_post_reg_ld2r:
;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float> } %ld2
}
@@ -2334,7 +2334,7 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2r(double* %A, doub
;CHECK-LABEL: test_v2f64_post_imm_ld2r:
;CHECK: ld2r.2d { v0, v1 }, [x0], #16
%ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i32 2
+ %tmp = getelementptr double, double* %A, i32 2
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double> } %ld2
}
@@ -2343,7 +2343,7 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2r(double* %A, doub
;CHECK-LABEL: test_v2f64_post_reg_ld2r:
;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double> } %ld2
}
@@ -2354,7 +2354,7 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2r(double* %A, doub
;CHECK-LABEL: test_v1f64_post_imm_ld2r:
;CHECK: ld2r.1d { v0, v1 }, [x0], #16
%ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i32 2
+ %tmp = getelementptr double, double* %A, i32 2
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double> } %ld2
}
@@ -2363,7 +2363,7 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2r(double* %A, doub
;CHECK-LABEL: test_v1f64_post_reg_ld2r:
;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}}
%ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double> } %ld2
}
@@ -2375,7 +2375,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3r(i8* %A, i8*
;CHECK-LABEL: test_v16i8_post_imm_ld3r:
;CHECK: ld3r.16b { v0, v1, v2 }, [x0], #3
%ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i32 3
+ %tmp = getelementptr i8, i8* %A, i32 3
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
}
@@ -2384,7 +2384,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3r(i8* %A, i8*
;CHECK-LABEL: test_v16i8_post_reg_ld3r:
;CHECK: ld3r.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
}
@@ -2396,7 +2396,7 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3r(i8* %A, i8** %p
;CHECK-LABEL: test_v8i8_post_imm_ld3r:
;CHECK: ld3r.8b { v0, v1, v2 }, [x0], #3
%ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i32 3
+ %tmp = getelementptr i8, i8* %A, i32 3
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
}
@@ -2405,7 +2405,7 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3r(i8* %A, i8** %p
;CHECK-LABEL: test_v8i8_post_reg_ld3r:
;CHECK: ld3r.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
}
@@ -2417,7 +2417,7 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3r(i16* %A, i1
;CHECK-LABEL: test_v8i16_post_imm_ld3r:
;CHECK: ld3r.8h { v0, v1, v2 }, [x0], #6
%ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i32 3
+ %tmp = getelementptr i16, i16* %A, i32 3
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
}
@@ -2426,7 +2426,7 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3r(i16* %A, i1
;CHECK-LABEL: test_v8i16_post_reg_ld3r:
;CHECK: ld3r.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
}
@@ -2438,7 +2438,7 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3r(i16* %A, i1
;CHECK-LABEL: test_v4i16_post_imm_ld3r:
;CHECK: ld3r.4h { v0, v1, v2 }, [x0], #6
%ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i32 3
+ %tmp = getelementptr i16, i16* %A, i32 3
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
}
@@ -2447,7 +2447,7 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3r(i16* %A, i1
;CHECK-LABEL: test_v4i16_post_reg_ld3r:
;CHECK: ld3r.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
}
@@ -2459,7 +2459,7 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3r(i32* %A, i3
;CHECK-LABEL: test_v4i32_post_imm_ld3r:
;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12
%ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i32 3
+ %tmp = getelementptr i32, i32* %A, i32 3
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
}
@@ -2468,7 +2468,7 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3r(i32* %A, i3
;CHECK-LABEL: test_v4i32_post_reg_ld3r:
;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
}
@@ -2479,7 +2479,7 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3r(i32* %A, i3
;CHECK-LABEL: test_v2i32_post_imm_ld3r:
;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12
%ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i32 3
+ %tmp = getelementptr i32, i32* %A, i32 3
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
}
@@ -2488,7 +2488,7 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3r(i32* %A, i3
;CHECK-LABEL: test_v2i32_post_reg_ld3r:
;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
}
@@ -2500,7 +2500,7 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3r(i64* %A, i6
;CHECK-LABEL: test_v2i64_post_imm_ld3r:
;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24
%ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i32 3
+ %tmp = getelementptr i64, i64* %A, i32 3
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
}
@@ -2509,7 +2509,7 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3r(i64* %A, i6
;CHECK-LABEL: test_v2i64_post_reg_ld3r:
;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
}
@@ -2520,7 +2520,7 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3r(i64* %A, i6
;CHECK-LABEL: test_v1i64_post_imm_ld3r:
;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24
%ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i32 3
+ %tmp = getelementptr i64, i64* %A, i32 3
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
}
@@ -2529,7 +2529,7 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3r(i64* %A, i6
;CHECK-LABEL: test_v1i64_post_reg_ld3r:
;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
}
@@ -2541,7 +2541,7 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3r(float
;CHECK-LABEL: test_v4f32_post_imm_ld3r:
;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12
%ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i32 3
+ %tmp = getelementptr float, float* %A, i32 3
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float>, <4 x float> } %ld3
}
@@ -2550,7 +2550,7 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3r(float
;CHECK-LABEL: test_v4f32_post_reg_ld3r:
;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float>, <4 x float> } %ld3
}
@@ -2561,7 +2561,7 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3r(float
;CHECK-LABEL: test_v2f32_post_imm_ld3r:
;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12
%ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i32 3
+ %tmp = getelementptr float, float* %A, i32 3
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float>, <2 x float> } %ld3
}
@@ -2570,7 +2570,7 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3r(float
;CHECK-LABEL: test_v2f32_post_reg_ld3r:
;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float>, <2 x float> } %ld3
}
@@ -2582,7 +2582,7 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3r(do
;CHECK-LABEL: test_v2f64_post_imm_ld3r:
;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24
%ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i32 3
+ %tmp = getelementptr double, double* %A, i32 3
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double>, <2 x double> } %ld3
}
@@ -2591,7 +2591,7 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3r(do
;CHECK-LABEL: test_v2f64_post_reg_ld3r:
;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double>, <2 x double> } %ld3
}
@@ -2602,7 +2602,7 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3r(do
;CHECK-LABEL: test_v1f64_post_imm_ld3r:
;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24
%ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i32 3
+ %tmp = getelementptr double, double* %A, i32 3
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double>, <1 x double> } %ld3
}
@@ -2611,7 +2611,7 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3r(do
;CHECK-LABEL: test_v1f64_post_reg_ld3r:
;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
%ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double>, <1 x double> } %ld3
}
@@ -2623,7 +2623,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4r(
;CHECK-LABEL: test_v16i8_post_imm_ld4r:
;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], #4
%ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i32 4
+ %tmp = getelementptr i8, i8* %A, i32 4
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
}
@@ -2632,7 +2632,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4r(
;CHECK-LABEL: test_v16i8_post_reg_ld4r:
;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
}
@@ -2644,7 +2644,7 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4r(i8* %
;CHECK-LABEL: test_v8i8_post_imm_ld4r:
;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], #4
%ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i32 4
+ %tmp = getelementptr i8, i8* %A, i32 4
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
}
@@ -2653,7 +2653,7 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4r(i8* %
;CHECK-LABEL: test_v8i8_post_reg_ld4r:
;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
}
@@ -2665,7 +2665,7 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4r(
;CHECK-LABEL: test_v8i16_post_imm_ld4r:
;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], #8
%ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i32 4
+ %tmp = getelementptr i16, i16* %A, i32 4
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
}
@@ -2674,7 +2674,7 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4r(
;CHECK-LABEL: test_v8i16_post_reg_ld4r:
;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
}
@@ -2686,7 +2686,7 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4r(
;CHECK-LABEL: test_v4i16_post_imm_ld4r:
;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], #8
%ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i32 4
+ %tmp = getelementptr i16, i16* %A, i32 4
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
}
@@ -2695,7 +2695,7 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4r(
;CHECK-LABEL: test_v4i16_post_reg_ld4r:
;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
}
@@ -2707,7 +2707,7 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4r(
;CHECK-LABEL: test_v4i32_post_imm_ld4r:
;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16
%ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i32 4
+ %tmp = getelementptr i32, i32* %A, i32 4
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
}
@@ -2716,7 +2716,7 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4r(
;CHECK-LABEL: test_v4i32_post_reg_ld4r:
;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
}
@@ -2727,7 +2727,7 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4r(
;CHECK-LABEL: test_v2i32_post_imm_ld4r:
;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16
%ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i32 4
+ %tmp = getelementptr i32, i32* %A, i32 4
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
}
@@ -2736,7 +2736,7 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4r(
;CHECK-LABEL: test_v2i32_post_reg_ld4r:
;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
}
@@ -2748,7 +2748,7 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4r(
;CHECK-LABEL: test_v2i64_post_imm_ld4r:
;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32
%ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i32 4
+ %tmp = getelementptr i64, i64* %A, i32 4
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
}
@@ -2757,7 +2757,7 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4r(
;CHECK-LABEL: test_v2i64_post_reg_ld4r:
;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
}
@@ -2768,7 +2768,7 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4r(
;CHECK-LABEL: test_v1i64_post_imm_ld4r:
;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32
%ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i32 4
+ %tmp = getelementptr i64, i64* %A, i32 4
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
}
@@ -2777,7 +2777,7 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4r(
;CHECK-LABEL: test_v1i64_post_reg_ld4r:
;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
}
@@ -2789,7 +2789,7 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i
;CHECK-LABEL: test_v4f32_post_imm_ld4r:
;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16
%ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i32 4
+ %tmp = getelementptr float, float* %A, i32 4
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
}
@@ -2798,7 +2798,7 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_r
;CHECK-LABEL: test_v4f32_post_reg_ld4r:
;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
}
@@ -2809,7 +2809,7 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i
;CHECK-LABEL: test_v2f32_post_imm_ld4r:
;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16
%ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i32 4
+ %tmp = getelementptr float, float* %A, i32 4
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
}
@@ -2818,7 +2818,7 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_r
;CHECK-LABEL: test_v2f32_post_reg_ld4r:
;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
}
@@ -2830,7 +2830,7 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po
;CHECK-LABEL: test_v2f64_post_imm_ld4r:
;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32
%ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i32 4
+ %tmp = getelementptr double, double* %A, i32 4
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
}
@@ -2839,7 +2839,7 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po
;CHECK-LABEL: test_v2f64_post_reg_ld4r:
;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
}
@@ -2850,7 +2850,7 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po
;CHECK-LABEL: test_v1f64_post_imm_ld4r:
;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32
%ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i32 4
+ %tmp = getelementptr double, double* %A, i32 4
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
}
@@ -2859,7 +2859,7 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po
;CHECK-LABEL: test_v1f64_post_reg_ld4r:
;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
%ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
}
@@ -2871,7 +2871,7 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(i8* %A, i8** %ptr,
;CHECK-LABEL: test_v16i8_post_imm_ld2lane:
;CHECK: ld2.b { v0, v1 }[0], [x0], #2
%ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i32 2
+ %tmp = getelementptr i8, i8* %A, i32 2
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8> } %ld2
}
@@ -2880,7 +2880,7 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(i8* %A, i8** %ptr,
;CHECK-LABEL: test_v16i8_post_reg_ld2lane:
;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
%ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8> } %ld2
}
@@ -2892,7 +2892,7 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(i8* %A, i8** %ptr, <8
;CHECK-LABEL: test_v8i8_post_imm_ld2lane:
;CHECK: ld2.b { v0, v1 }[0], [x0], #2
%ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i32 2
+ %tmp = getelementptr i8, i8* %A, i32 2
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8> } %ld2
}
@@ -2901,7 +2901,7 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64
;CHECK-LABEL: test_v8i8_post_reg_ld2lane:
;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
%ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8> } %ld2
}
@@ -2913,7 +2913,7 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(i16* %A, i16** %ptr
;CHECK-LABEL: test_v8i16_post_imm_ld2lane:
;CHECK: ld2.h { v0, v1 }[0], [x0], #4
%ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i32 2
+ %tmp = getelementptr i16, i16* %A, i32 2
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16> } %ld2
}
@@ -2922,7 +2922,7 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(i16* %A, i16** %ptr
;CHECK-LABEL: test_v8i16_post_reg_ld2lane:
;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
%ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16> } %ld2
}
@@ -2934,7 +2934,7 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(i16* %A, i16** %ptr
;CHECK-LABEL: test_v4i16_post_imm_ld2lane:
;CHECK: ld2.h { v0, v1 }[0], [x0], #4
%ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i32 2
+ %tmp = getelementptr i16, i16* %A, i32 2
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16> } %ld2
}
@@ -2943,7 +2943,7 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(i16* %A, i16** %ptr
;CHECK-LABEL: test_v4i16_post_reg_ld2lane:
;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
%ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16> } %ld2
}
@@ -2955,7 +2955,7 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(i32* %A, i32** %ptr
;CHECK-LABEL: test_v4i32_post_imm_ld2lane:
;CHECK: ld2.s { v0, v1 }[0], [x0], #8
%ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i32 2
+ %tmp = getelementptr i32, i32* %A, i32 2
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32> } %ld2
}
@@ -2964,7 +2964,7 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(i32* %A, i32** %ptr
;CHECK-LABEL: test_v4i32_post_reg_ld2lane:
;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
%ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32> } %ld2
}
@@ -2976,7 +2976,7 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(i32* %A, i32** %ptr
;CHECK-LABEL: test_v2i32_post_imm_ld2lane:
;CHECK: ld2.s { v0, v1 }[0], [x0], #8
%ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i32 2
+ %tmp = getelementptr i32, i32* %A, i32 2
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32> } %ld2
}
@@ -2985,7 +2985,7 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(i32* %A, i32** %ptr
;CHECK-LABEL: test_v2i32_post_reg_ld2lane:
;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
%ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32> } %ld2
}
@@ -2997,7 +2997,7 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(i64* %A, i64** %ptr
;CHECK-LABEL: test_v2i64_post_imm_ld2lane:
;CHECK: ld2.d { v0, v1 }[0], [x0], #16
%ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i32 2
+ %tmp = getelementptr i64, i64* %A, i32 2
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64> } %ld2
}
@@ -3006,7 +3006,7 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(i64* %A, i64** %ptr
;CHECK-LABEL: test_v2i64_post_reg_ld2lane:
;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
%ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64> } %ld2
}
@@ -3018,7 +3018,7 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(i64* %A, i64** %ptr
;CHECK-LABEL: test_v1i64_post_imm_ld2lane:
;CHECK: ld2.d { v0, v1 }[0], [x0], #16
%ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i32 2
+ %tmp = getelementptr i64, i64* %A, i32 2
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64> } %ld2
}
@@ -3027,7 +3027,7 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(i64* %A, i64** %ptr
;CHECK-LABEL: test_v1i64_post_reg_ld2lane:
;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
%ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64> } %ld2
}
@@ -3039,7 +3039,7 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(float* %A, floa
;CHECK-LABEL: test_v4f32_post_imm_ld2lane:
;CHECK: ld2.s { v0, v1 }[0], [x0], #8
%ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
- %tmp = getelementptr float* %A, i32 2
+ %tmp = getelementptr float, float* %A, i32 2
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float> } %ld2
}
@@ -3048,7 +3048,7 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(float* %A, floa
;CHECK-LABEL: test_v4f32_post_reg_ld2lane:
;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
%ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float> } %ld2
}
@@ -3060,7 +3060,7 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(float* %A, floa
;CHECK-LABEL: test_v2f32_post_imm_ld2lane:
;CHECK: ld2.s { v0, v1 }[0], [x0], #8
%ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
- %tmp = getelementptr float* %A, i32 2
+ %tmp = getelementptr float, float* %A, i32 2
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float> } %ld2
}
@@ -3069,7 +3069,7 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(float* %A, floa
;CHECK-LABEL: test_v2f32_post_reg_ld2lane:
;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
%ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float> } %ld2
}
@@ -3081,7 +3081,7 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(double* %A, d
;CHECK-LABEL: test_v2f64_post_imm_ld2lane:
;CHECK: ld2.d { v0, v1 }[0], [x0], #16
%ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
- %tmp = getelementptr double* %A, i32 2
+ %tmp = getelementptr double, double* %A, i32 2
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double> } %ld2
}
@@ -3090,7 +3090,7 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(double* %A, d
;CHECK-LABEL: test_v2f64_post_reg_ld2lane:
;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
%ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double> } %ld2
}
@@ -3102,7 +3102,7 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(double* %A, d
;CHECK-LABEL: test_v1f64_post_imm_ld2lane:
;CHECK: ld2.d { v0, v1 }[0], [x0], #16
%ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
- %tmp = getelementptr double* %A, i32 2
+ %tmp = getelementptr double, double* %A, i32 2
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double> } %ld2
}
@@ -3111,7 +3111,7 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(double* %A, d
;CHECK-LABEL: test_v1f64_post_reg_ld2lane:
;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
%ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double> } %ld2
}
@@ -3123,7 +3123,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(i8* %A,
;CHECK-LABEL: test_v16i8_post_imm_ld3lane:
;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3
%ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i32 3
+ %tmp = getelementptr i8, i8* %A, i32 3
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
}
@@ -3132,7 +3132,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(i8* %A,
;CHECK-LABEL: test_v16i8_post_reg_ld3lane:
;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
%ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
}
@@ -3144,7 +3144,7 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(i8* %A, i8**
;CHECK-LABEL: test_v8i8_post_imm_ld3lane:
;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3
%ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i32 3
+ %tmp = getelementptr i8, i8* %A, i32 3
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
}
@@ -3153,7 +3153,7 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(i8* %A, i8**
;CHECK-LABEL: test_v8i8_post_reg_ld3lane:
;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
%ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
}
@@ -3165,7 +3165,7 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(i16* %A,
;CHECK-LABEL: test_v8i16_post_imm_ld3lane:
;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6
%ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i32 3
+ %tmp = getelementptr i16, i16* %A, i32 3
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
}
@@ -3174,7 +3174,7 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(i16* %A,
;CHECK-LABEL: test_v8i16_post_reg_ld3lane:
;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
%ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
}
@@ -3186,7 +3186,7 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(i16* %A,
;CHECK-LABEL: test_v4i16_post_imm_ld3lane:
;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6
%ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i32 3
+ %tmp = getelementptr i16, i16* %A, i32 3
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
}
@@ -3195,7 +3195,7 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(i16* %A,
;CHECK-LABEL: test_v4i16_post_reg_ld3lane:
;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
%ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
}
@@ -3207,7 +3207,7 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(i32* %A,
;CHECK-LABEL: test_v4i32_post_imm_ld3lane:
;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
%ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i32 3
+ %tmp = getelementptr i32, i32* %A, i32 3
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
}
@@ -3216,7 +3216,7 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(i32* %A,
;CHECK-LABEL: test_v4i32_post_reg_ld3lane:
;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
%ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
}
@@ -3228,7 +3228,7 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(i32* %A,
;CHECK-LABEL: test_v2i32_post_imm_ld3lane:
;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
%ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i32 3
+ %tmp = getelementptr i32, i32* %A, i32 3
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
}
@@ -3237,7 +3237,7 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(i32* %A,
;CHECK-LABEL: test_v2i32_post_reg_ld3lane:
;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
%ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
}
@@ -3249,7 +3249,7 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(i64* %A,
;CHECK-LABEL: test_v2i64_post_imm_ld3lane:
;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
%ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i32 3
+ %tmp = getelementptr i64, i64* %A, i32 3
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
}
@@ -3258,7 +3258,7 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(i64* %A,
;CHECK-LABEL: test_v2i64_post_reg_ld3lane:
;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
%ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
}
@@ -3270,7 +3270,7 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(i64* %A,
;CHECK-LABEL: test_v1i64_post_imm_ld3lane:
;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
%ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i32 3
+ %tmp = getelementptr i64, i64* %A, i32 3
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
}
@@ -3279,7 +3279,7 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(i64* %A,
;CHECK-LABEL: test_v1i64_post_reg_ld3lane:
;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
%ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
}
@@ -3291,7 +3291,7 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(fl
;CHECK-LABEL: test_v4f32_post_imm_ld3lane:
;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
%ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
- %tmp = getelementptr float* %A, i32 3
+ %tmp = getelementptr float, float* %A, i32 3
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float>, <4 x float> } %ld3
}
@@ -3300,7 +3300,7 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(fl
;CHECK-LABEL: test_v4f32_post_reg_ld3lane:
;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
%ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float>, <4 x float> } %ld3
}
@@ -3312,7 +3312,7 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(fl
;CHECK-LABEL: test_v2f32_post_imm_ld3lane:
;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
%ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
- %tmp = getelementptr float* %A, i32 3
+ %tmp = getelementptr float, float* %A, i32 3
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float>, <2 x float> } %ld3
}
@@ -3321,7 +3321,7 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(fl
;CHECK-LABEL: test_v2f32_post_reg_ld3lane:
;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
%ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float>, <2 x float> } %ld3
}
@@ -3333,7 +3333,7 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane
;CHECK-LABEL: test_v2f64_post_imm_ld3lane:
;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
%ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
- %tmp = getelementptr double* %A, i32 3
+ %tmp = getelementptr double, double* %A, i32 3
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double>, <2 x double> } %ld3
}
@@ -3342,7 +3342,7 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane
;CHECK-LABEL: test_v2f64_post_reg_ld3lane:
;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
%ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double>, <2 x double> } %ld3
}
@@ -3354,7 +3354,7 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane
;CHECK-LABEL: test_v1f64_post_imm_ld3lane:
;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
%ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
- %tmp = getelementptr double* %A, i32 3
+ %tmp = getelementptr double, double* %A, i32 3
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double>, <1 x double> } %ld3
}
@@ -3363,7 +3363,7 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane
;CHECK-LABEL: test_v1f64_post_reg_ld3lane:
;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
%ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double>, <1 x double> } %ld3
}
@@ -3375,7 +3375,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4la
;CHECK-LABEL: test_v16i8_post_imm_ld4lane:
;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4
%ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i32 4
+ %tmp = getelementptr i8, i8* %A, i32 4
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
}
@@ -3384,7 +3384,7 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4la
;CHECK-LABEL: test_v16i8_post_reg_ld4lane:
;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
%ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
}
@@ -3396,7 +3396,7 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(i8
;CHECK-LABEL: test_v8i8_post_imm_ld4lane:
;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4
%ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i32 4
+ %tmp = getelementptr i8, i8* %A, i32 4
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
}
@@ -3405,7 +3405,7 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(i8
;CHECK-LABEL: test_v8i8_post_reg_ld4lane:
;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
%ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
store i8* %tmp, i8** %ptr
ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
}
@@ -3417,7 +3417,7 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4la
;CHECK-LABEL: test_v8i16_post_imm_ld4lane:
;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8
%ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i32 4
+ %tmp = getelementptr i16, i16* %A, i32 4
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
}
@@ -3426,7 +3426,7 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4la
;CHECK-LABEL: test_v8i16_post_reg_ld4lane:
;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
%ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
}
@@ -3438,7 +3438,7 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4la
;CHECK-LABEL: test_v4i16_post_imm_ld4lane:
;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8
%ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i32 4
+ %tmp = getelementptr i16, i16* %A, i32 4
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
}
@@ -3447,7 +3447,7 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4la
;CHECK-LABEL: test_v4i16_post_reg_ld4lane:
;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
%ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
store i16* %tmp, i16** %ptr
ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
}
@@ -3459,7 +3459,7 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4la
;CHECK-LABEL: test_v4i32_post_imm_ld4lane:
;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
%ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i32 4
+ %tmp = getelementptr i32, i32* %A, i32 4
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
}
@@ -3468,7 +3468,7 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4la
;CHECK-LABEL: test_v4i32_post_reg_ld4lane:
;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
%ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
}
@@ -3480,7 +3480,7 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4la
;CHECK-LABEL: test_v2i32_post_imm_ld4lane:
;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
%ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i32 4
+ %tmp = getelementptr i32, i32* %A, i32 4
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
}
@@ -3489,7 +3489,7 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4la
;CHECK-LABEL: test_v2i32_post_reg_ld4lane:
;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
%ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
store i32* %tmp, i32** %ptr
ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
}
@@ -3501,7 +3501,7 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4la
;CHECK-LABEL: test_v2i64_post_imm_ld4lane:
;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
%ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i32 4
+ %tmp = getelementptr i64, i64* %A, i32 4
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
}
@@ -3510,7 +3510,7 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4la
;CHECK-LABEL: test_v2i64_post_reg_ld4lane:
;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
%ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
}
@@ -3522,7 +3522,7 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4la
;CHECK-LABEL: test_v1i64_post_imm_ld4lane:
;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
%ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i32 4
+ %tmp = getelementptr i64, i64* %A, i32 4
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
}
@@ -3531,7 +3531,7 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4la
;CHECK-LABEL: test_v1i64_post_reg_ld4lane:
;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
%ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
store i64* %tmp, i64** %ptr
ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
}
@@ -3543,7 +3543,7 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i
;CHECK-LABEL: test_v4f32_post_imm_ld4lane:
;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
%ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
- %tmp = getelementptr float* %A, i32 4
+ %tmp = getelementptr float, float* %A, i32 4
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
}
@@ -3552,7 +3552,7 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_r
;CHECK-LABEL: test_v4f32_post_reg_ld4lane:
;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
%ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
}
@@ -3564,7 +3564,7 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i
;CHECK-LABEL: test_v2f32_post_imm_ld4lane:
;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
%ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
- %tmp = getelementptr float* %A, i32 4
+ %tmp = getelementptr float, float* %A, i32 4
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
}
@@ -3573,7 +3573,7 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_r
;CHECK-LABEL: test_v2f32_post_reg_ld4lane:
;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
%ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
store float* %tmp, float** %ptr
ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
}
@@ -3585,7 +3585,7 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po
;CHECK-LABEL: test_v2f64_post_imm_ld4lane:
;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
%ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
- %tmp = getelementptr double* %A, i32 4
+ %tmp = getelementptr double, double* %A, i32 4
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
}
@@ -3594,7 +3594,7 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po
;CHECK-LABEL: test_v2f64_post_reg_ld4lane:
;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
%ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
}
@@ -3606,7 +3606,7 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po
;CHECK-LABEL: test_v1f64_post_imm_ld4lane:
;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
%ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
- %tmp = getelementptr double* %A, i32 4
+ %tmp = getelementptr double, double* %A, i32 4
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
}
@@ -3615,7 +3615,7 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po
;CHECK-LABEL: test_v1f64_post_reg_ld4lane:
;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
%ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
store double* %tmp, double** %ptr
ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
}
@@ -3627,7 +3627,7 @@ define i8* @test_v16i8_post_imm_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %
;CHECK-LABEL: test_v16i8_post_imm_st2:
;CHECK: st2.16b { v0, v1 }, [x0], #32
call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
- %tmp = getelementptr i8* %A, i32 32
+ %tmp = getelementptr i8, i8* %A, i32 32
ret i8* %tmp
}
@@ -3635,7 +3635,7 @@ define i8* @test_v16i8_post_reg_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %
;CHECK-LABEL: test_v16i8_post_reg_st2:
;CHECK: st2.16b { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
ret i8* %tmp
}
@@ -3646,7 +3646,7 @@ define i8* @test_v8i8_post_imm_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C)
;CHECK-LABEL: test_v8i8_post_imm_st2:
;CHECK: st2.8b { v0, v1 }, [x0], #16
call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
- %tmp = getelementptr i8* %A, i32 16
+ %tmp = getelementptr i8, i8* %A, i32 16
ret i8* %tmp
}
@@ -3654,7 +3654,7 @@ define i8* @test_v8i8_post_reg_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C,
;CHECK-LABEL: test_v8i8_post_reg_st2:
;CHECK: st2.8b { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
ret i8* %tmp
}
@@ -3665,7 +3665,7 @@ define i16* @test_v8i16_post_imm_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16
;CHECK-LABEL: test_v8i16_post_imm_st2:
;CHECK: st2.8h { v0, v1 }, [x0], #32
call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
- %tmp = getelementptr i16* %A, i32 16
+ %tmp = getelementptr i16, i16* %A, i32 16
ret i16* %tmp
}
@@ -3673,7 +3673,7 @@ define i16* @test_v8i16_post_reg_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16
;CHECK-LABEL: test_v8i16_post_reg_st2:
;CHECK: st2.8h { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
ret i16* %tmp
}
@@ -3684,7 +3684,7 @@ define i16* @test_v4i16_post_imm_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16
;CHECK-LABEL: test_v4i16_post_imm_st2:
;CHECK: st2.4h { v0, v1 }, [x0], #16
call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
- %tmp = getelementptr i16* %A, i32 8
+ %tmp = getelementptr i16, i16* %A, i32 8
ret i16* %tmp
}
@@ -3692,7 +3692,7 @@ define i16* @test_v4i16_post_reg_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16
;CHECK-LABEL: test_v4i16_post_reg_st2:
;CHECK: st2.4h { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
ret i16* %tmp
}
@@ -3703,7 +3703,7 @@ define i32* @test_v4i32_post_imm_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32
;CHECK-LABEL: test_v4i32_post_imm_st2:
;CHECK: st2.4s { v0, v1 }, [x0], #32
call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
- %tmp = getelementptr i32* %A, i32 8
+ %tmp = getelementptr i32, i32* %A, i32 8
ret i32* %tmp
}
@@ -3711,7 +3711,7 @@ define i32* @test_v4i32_post_reg_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32
;CHECK-LABEL: test_v4i32_post_reg_st2:
;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
ret i32* %tmp
}
@@ -3722,7 +3722,7 @@ define i32* @test_v2i32_post_imm_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32
;CHECK-LABEL: test_v2i32_post_imm_st2:
;CHECK: st2.2s { v0, v1 }, [x0], #16
call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
- %tmp = getelementptr i32* %A, i32 4
+ %tmp = getelementptr i32, i32* %A, i32 4
ret i32* %tmp
}
@@ -3730,7 +3730,7 @@ define i32* @test_v2i32_post_reg_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32
;CHECK-LABEL: test_v2i32_post_reg_st2:
;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
ret i32* %tmp
}
@@ -3741,7 +3741,7 @@ define i64* @test_v2i64_post_imm_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64
;CHECK-LABEL: test_v2i64_post_imm_st2:
;CHECK: st2.2d { v0, v1 }, [x0], #32
call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
- %tmp = getelementptr i64* %A, i64 4
+ %tmp = getelementptr i64, i64* %A, i64 4
ret i64* %tmp
}
@@ -3749,7 +3749,7 @@ define i64* @test_v2i64_post_reg_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64
;CHECK-LABEL: test_v2i64_post_reg_st2:
;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
ret i64* %tmp
}
@@ -3760,7 +3760,7 @@ define i64* @test_v1i64_post_imm_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64
;CHECK-LABEL: test_v1i64_post_imm_st2:
;CHECK: st1.1d { v0, v1 }, [x0], #16
call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
- %tmp = getelementptr i64* %A, i64 2
+ %tmp = getelementptr i64, i64* %A, i64 2
ret i64* %tmp
}
@@ -3768,7 +3768,7 @@ define i64* @test_v1i64_post_reg_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64
;CHECK-LABEL: test_v1i64_post_reg_st2:
;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
ret i64* %tmp
}
@@ -3779,7 +3779,7 @@ define float* @test_v4f32_post_imm_st2(float* %A, float** %ptr, <4 x float> %B,
;CHECK-LABEL: test_v4f32_post_imm_st2:
;CHECK: st2.4s { v0, v1 }, [x0], #32
call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
- %tmp = getelementptr float* %A, i32 8
+ %tmp = getelementptr float, float* %A, i32 8
ret float* %tmp
}
@@ -3787,7 +3787,7 @@ define float* @test_v4f32_post_reg_st2(float* %A, float** %ptr, <4 x float> %B,
;CHECK-LABEL: test_v4f32_post_reg_st2:
;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
ret float* %tmp
}
@@ -3798,7 +3798,7 @@ define float* @test_v2f32_post_imm_st2(float* %A, float** %ptr, <2 x float> %B,
;CHECK-LABEL: test_v2f32_post_imm_st2:
;CHECK: st2.2s { v0, v1 }, [x0], #16
call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
- %tmp = getelementptr float* %A, i32 4
+ %tmp = getelementptr float, float* %A, i32 4
ret float* %tmp
}
@@ -3806,7 +3806,7 @@ define float* @test_v2f32_post_reg_st2(float* %A, float** %ptr, <2 x float> %B,
;CHECK-LABEL: test_v2f32_post_reg_st2:
;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
ret float* %tmp
}
@@ -3817,7 +3817,7 @@ define double* @test_v2f64_post_imm_st2(double* %A, double** %ptr, <2 x double>
;CHECK-LABEL: test_v2f64_post_imm_st2:
;CHECK: st2.2d { v0, v1 }, [x0], #32
call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
- %tmp = getelementptr double* %A, i64 4
+ %tmp = getelementptr double, double* %A, i64 4
ret double* %tmp
}
@@ -3825,7 +3825,7 @@ define double* @test_v2f64_post_reg_st2(double* %A, double** %ptr, <2 x double>
;CHECK-LABEL: test_v2f64_post_reg_st2:
;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
ret double* %tmp
}
@@ -3836,7 +3836,7 @@ define double* @test_v1f64_post_imm_st2(double* %A, double** %ptr, <1 x double>
;CHECK-LABEL: test_v1f64_post_imm_st2:
;CHECK: st1.1d { v0, v1 }, [x0], #16
call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
- %tmp = getelementptr double* %A, i64 2
+ %tmp = getelementptr double, double* %A, i64 2
ret double* %tmp
}
@@ -3844,7 +3844,7 @@ define double* @test_v1f64_post_reg_st2(double* %A, double** %ptr, <1 x double>
;CHECK-LABEL: test_v1f64_post_reg_st2:
;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
ret double* %tmp
}
@@ -3855,7 +3855,7 @@ define i8* @test_v16i8_post_imm_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %
;CHECK-LABEL: test_v16i8_post_imm_st3:
;CHECK: st3.16b { v0, v1, v2 }, [x0], #48
call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
- %tmp = getelementptr i8* %A, i32 48
+ %tmp = getelementptr i8, i8* %A, i32 48
ret i8* %tmp
}
@@ -3863,7 +3863,7 @@ define i8* @test_v16i8_post_reg_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %
;CHECK-LABEL: test_v16i8_post_reg_st3:
;CHECK: st3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
ret i8* %tmp
}
@@ -3874,7 +3874,7 @@ define i8* @test_v8i8_post_imm_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C,
;CHECK-LABEL: test_v8i8_post_imm_st3:
;CHECK: st3.8b { v0, v1, v2 }, [x0], #24
call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
- %tmp = getelementptr i8* %A, i32 24
+ %tmp = getelementptr i8, i8* %A, i32 24
ret i8* %tmp
}
@@ -3882,7 +3882,7 @@ define i8* @test_v8i8_post_reg_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C,
;CHECK-LABEL: test_v8i8_post_reg_st3:
;CHECK: st3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
ret i8* %tmp
}
@@ -3893,7 +3893,7 @@ define i16* @test_v8i16_post_imm_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16
;CHECK-LABEL: test_v8i16_post_imm_st3:
;CHECK: st3.8h { v0, v1, v2 }, [x0], #48
call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
- %tmp = getelementptr i16* %A, i32 24
+ %tmp = getelementptr i16, i16* %A, i32 24
ret i16* %tmp
}
@@ -3901,7 +3901,7 @@ define i16* @test_v8i16_post_reg_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16
;CHECK-LABEL: test_v8i16_post_reg_st3:
;CHECK: st3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
ret i16* %tmp
}
@@ -3912,7 +3912,7 @@ define i16* @test_v4i16_post_imm_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16
;CHECK-LABEL: test_v4i16_post_imm_st3:
;CHECK: st3.4h { v0, v1, v2 }, [x0], #24
call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
- %tmp = getelementptr i16* %A, i32 12
+ %tmp = getelementptr i16, i16* %A, i32 12
ret i16* %tmp
}
@@ -3920,7 +3920,7 @@ define i16* @test_v4i16_post_reg_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16
;CHECK-LABEL: test_v4i16_post_reg_st3:
;CHECK: st3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
ret i16* %tmp
}
@@ -3931,7 +3931,7 @@ define i32* @test_v4i32_post_imm_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32
;CHECK-LABEL: test_v4i32_post_imm_st3:
;CHECK: st3.4s { v0, v1, v2 }, [x0], #48
call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
- %tmp = getelementptr i32* %A, i32 12
+ %tmp = getelementptr i32, i32* %A, i32 12
ret i32* %tmp
}
@@ -3939,7 +3939,7 @@ define i32* @test_v4i32_post_reg_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32
;CHECK-LABEL: test_v4i32_post_reg_st3:
;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
ret i32* %tmp
}
@@ -3950,7 +3950,7 @@ define i32* @test_v2i32_post_imm_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32
;CHECK-LABEL: test_v2i32_post_imm_st3:
;CHECK: st3.2s { v0, v1, v2 }, [x0], #24
call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
- %tmp = getelementptr i32* %A, i32 6
+ %tmp = getelementptr i32, i32* %A, i32 6
ret i32* %tmp
}
@@ -3958,7 +3958,7 @@ define i32* @test_v2i32_post_reg_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32
;CHECK-LABEL: test_v2i32_post_reg_st3:
;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
ret i32* %tmp
}
@@ -3969,7 +3969,7 @@ define i64* @test_v2i64_post_imm_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64
;CHECK-LABEL: test_v2i64_post_imm_st3:
;CHECK: st3.2d { v0, v1, v2 }, [x0], #48
call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
- %tmp = getelementptr i64* %A, i64 6
+ %tmp = getelementptr i64, i64* %A, i64 6
ret i64* %tmp
}
@@ -3977,7 +3977,7 @@ define i64* @test_v2i64_post_reg_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64
;CHECK-LABEL: test_v2i64_post_reg_st3:
;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
ret i64* %tmp
}
@@ -3988,7 +3988,7 @@ define i64* @test_v1i64_post_imm_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64
;CHECK-LABEL: test_v1i64_post_imm_st3:
;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
- %tmp = getelementptr i64* %A, i64 3
+ %tmp = getelementptr i64, i64* %A, i64 3
ret i64* %tmp
}
@@ -3996,7 +3996,7 @@ define i64* @test_v1i64_post_reg_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64
;CHECK-LABEL: test_v1i64_post_reg_st3:
;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
ret i64* %tmp
}
@@ -4007,7 +4007,7 @@ define float* @test_v4f32_post_imm_st3(float* %A, float** %ptr, <4 x float> %B,
;CHECK-LABEL: test_v4f32_post_imm_st3:
;CHECK: st3.4s { v0, v1, v2 }, [x0], #48
call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
- %tmp = getelementptr float* %A, i32 12
+ %tmp = getelementptr float, float* %A, i32 12
ret float* %tmp
}
@@ -4015,7 +4015,7 @@ define float* @test_v4f32_post_reg_st3(float* %A, float** %ptr, <4 x float> %B,
;CHECK-LABEL: test_v4f32_post_reg_st3:
;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
ret float* %tmp
}
@@ -4026,7 +4026,7 @@ define float* @test_v2f32_post_imm_st3(float* %A, float** %ptr, <2 x float> %B,
;CHECK-LABEL: test_v2f32_post_imm_st3:
;CHECK: st3.2s { v0, v1, v2 }, [x0], #24
call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
- %tmp = getelementptr float* %A, i32 6
+ %tmp = getelementptr float, float* %A, i32 6
ret float* %tmp
}
@@ -4034,7 +4034,7 @@ define float* @test_v2f32_post_reg_st3(float* %A, float** %ptr, <2 x float> %B,
;CHECK-LABEL: test_v2f32_post_reg_st3:
;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
ret float* %tmp
}
@@ -4045,7 +4045,7 @@ define double* @test_v2f64_post_imm_st3(double* %A, double** %ptr, <2 x double>
;CHECK-LABEL: test_v2f64_post_imm_st3:
;CHECK: st3.2d { v0, v1, v2 }, [x0], #48
call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
- %tmp = getelementptr double* %A, i64 6
+ %tmp = getelementptr double, double* %A, i64 6
ret double* %tmp
}
@@ -4053,7 +4053,7 @@ define double* @test_v2f64_post_reg_st3(double* %A, double** %ptr, <2 x double>
;CHECK-LABEL: test_v2f64_post_reg_st3:
;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
ret double* %tmp
}
@@ -4064,7 +4064,7 @@ define double* @test_v1f64_post_imm_st3(double* %A, double** %ptr, <1 x double>
;CHECK-LABEL: test_v1f64_post_imm_st3:
;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
- %tmp = getelementptr double* %A, i64 3
+ %tmp = getelementptr double, double* %A, i64 3
ret double* %tmp
}
@@ -4072,7 +4072,7 @@ define double* @test_v1f64_post_reg_st3(double* %A, double** %ptr, <1 x double>
;CHECK-LABEL: test_v1f64_post_reg_st3:
;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
ret double* %tmp
}
@@ -4083,7 +4083,7 @@ define i8* @test_v16i8_post_imm_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %
;CHECK-LABEL: test_v16i8_post_imm_st4:
;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], #64
call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
- %tmp = getelementptr i8* %A, i32 64
+ %tmp = getelementptr i8, i8* %A, i32 64
ret i8* %tmp
}
@@ -4091,7 +4091,7 @@ define i8* @test_v16i8_post_reg_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %
;CHECK-LABEL: test_v16i8_post_reg_st4:
;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
ret i8* %tmp
}
@@ -4102,7 +4102,7 @@ define i8* @test_v8i8_post_imm_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C,
;CHECK-LABEL: test_v8i8_post_imm_st4:
;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], #32
call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
- %tmp = getelementptr i8* %A, i32 32
+ %tmp = getelementptr i8, i8* %A, i32 32
ret i8* %tmp
}
@@ -4110,7 +4110,7 @@ define i8* @test_v8i8_post_reg_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C,
;CHECK-LABEL: test_v8i8_post_reg_st4:
;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
ret i8* %tmp
}
@@ -4121,7 +4121,7 @@ define i16* @test_v8i16_post_imm_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16
;CHECK-LABEL: test_v8i16_post_imm_st4:
;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], #64
call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
- %tmp = getelementptr i16* %A, i32 32
+ %tmp = getelementptr i16, i16* %A, i32 32
ret i16* %tmp
}
@@ -4129,7 +4129,7 @@ define i16* @test_v8i16_post_reg_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16
;CHECK-LABEL: test_v8i16_post_reg_st4:
;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
ret i16* %tmp
}
@@ -4140,7 +4140,7 @@ define i16* @test_v4i16_post_imm_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16
;CHECK-LABEL: test_v4i16_post_imm_st4:
;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], #32
call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
- %tmp = getelementptr i16* %A, i32 16
+ %tmp = getelementptr i16, i16* %A, i32 16
ret i16* %tmp
}
@@ -4148,7 +4148,7 @@ define i16* @test_v4i16_post_reg_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16
;CHECK-LABEL: test_v4i16_post_reg_st4:
;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
ret i16* %tmp
}
@@ -4159,7 +4159,7 @@ define i32* @test_v4i32_post_imm_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32
;CHECK-LABEL: test_v4i32_post_imm_st4:
;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64
call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
- %tmp = getelementptr i32* %A, i32 16
+ %tmp = getelementptr i32, i32* %A, i32 16
ret i32* %tmp
}
@@ -4167,7 +4167,7 @@ define i32* @test_v4i32_post_reg_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32
;CHECK-LABEL: test_v4i32_post_reg_st4:
;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
ret i32* %tmp
}
@@ -4178,7 +4178,7 @@ define i32* @test_v2i32_post_imm_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32
;CHECK-LABEL: test_v2i32_post_imm_st4:
;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32
call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
- %tmp = getelementptr i32* %A, i32 8
+ %tmp = getelementptr i32, i32* %A, i32 8
ret i32* %tmp
}
@@ -4186,7 +4186,7 @@ define i32* @test_v2i32_post_reg_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32
;CHECK-LABEL: test_v2i32_post_reg_st4:
;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
ret i32* %tmp
}
@@ -4197,7 +4197,7 @@ define i64* @test_v2i64_post_imm_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64
;CHECK-LABEL: test_v2i64_post_imm_st4:
;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64
call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
- %tmp = getelementptr i64* %A, i64 8
+ %tmp = getelementptr i64, i64* %A, i64 8
ret i64* %tmp
}
@@ -4205,7 +4205,7 @@ define i64* @test_v2i64_post_reg_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64
;CHECK-LABEL: test_v2i64_post_reg_st4:
;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
ret i64* %tmp
}
@@ -4216,7 +4216,7 @@ define i64* @test_v1i64_post_imm_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64
;CHECK-LABEL: test_v1i64_post_imm_st4:
;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
- %tmp = getelementptr i64* %A, i64 4
+ %tmp = getelementptr i64, i64* %A, i64 4
ret i64* %tmp
}
@@ -4224,7 +4224,7 @@ define i64* @test_v1i64_post_reg_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64
;CHECK-LABEL: test_v1i64_post_reg_st4:
;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
ret i64* %tmp
}
@@ -4235,7 +4235,7 @@ define float* @test_v4f32_post_imm_st4(float* %A, float** %ptr, <4 x float> %B,
;CHECK-LABEL: test_v4f32_post_imm_st4:
;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64
call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
- %tmp = getelementptr float* %A, i32 16
+ %tmp = getelementptr float, float* %A, i32 16
ret float* %tmp
}
@@ -4243,7 +4243,7 @@ define float* @test_v4f32_post_reg_st4(float* %A, float** %ptr, <4 x float> %B,
;CHECK-LABEL: test_v4f32_post_reg_st4:
;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
ret float* %tmp
}
@@ -4254,7 +4254,7 @@ define float* @test_v2f32_post_imm_st4(float* %A, float** %ptr, <2 x float> %B,
;CHECK-LABEL: test_v2f32_post_imm_st4:
;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32
call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
- %tmp = getelementptr float* %A, i32 8
+ %tmp = getelementptr float, float* %A, i32 8
ret float* %tmp
}
@@ -4262,7 +4262,7 @@ define float* @test_v2f32_post_reg_st4(float* %A, float** %ptr, <2 x float> %B,
;CHECK-LABEL: test_v2f32_post_reg_st4:
;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
ret float* %tmp
}
@@ -4273,7 +4273,7 @@ define double* @test_v2f64_post_imm_st4(double* %A, double** %ptr, <2 x double>
;CHECK-LABEL: test_v2f64_post_imm_st4:
;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64
call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
- %tmp = getelementptr double* %A, i64 8
+ %tmp = getelementptr double, double* %A, i64 8
ret double* %tmp
}
@@ -4281,7 +4281,7 @@ define double* @test_v2f64_post_reg_st4(double* %A, double** %ptr, <2 x double>
;CHECK-LABEL: test_v2f64_post_reg_st4:
;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
ret double* %tmp
}
@@ -4292,7 +4292,7 @@ define double* @test_v1f64_post_imm_st4(double* %A, double** %ptr, <1 x double>
;CHECK-LABEL: test_v1f64_post_imm_st4:
;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
- %tmp = getelementptr double* %A, i64 4
+ %tmp = getelementptr double, double* %A, i64 4
ret double* %tmp
}
@@ -4300,7 +4300,7 @@ define double* @test_v1f64_post_reg_st4(double* %A, double** %ptr, <1 x double>
;CHECK-LABEL: test_v1f64_post_reg_st4:
;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
ret double* %tmp
}
@@ -4311,7 +4311,7 @@ define i8* @test_v16i8_post_imm_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8>
;CHECK-LABEL: test_v16i8_post_imm_st1x2:
;CHECK: st1.16b { v0, v1 }, [x0], #32
call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
- %tmp = getelementptr i8* %A, i32 32
+ %tmp = getelementptr i8, i8* %A, i32 32
ret i8* %tmp
}
@@ -4319,7 +4319,7 @@ define i8* @test_v16i8_post_reg_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8>
;CHECK-LABEL: test_v16i8_post_reg_st1x2:
;CHECK: st1.16b { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
ret i8* %tmp
}
@@ -4330,7 +4330,7 @@ define i8* @test_v8i8_post_imm_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C
;CHECK-LABEL: test_v8i8_post_imm_st1x2:
;CHECK: st1.8b { v0, v1 }, [x0], #16
call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
- %tmp = getelementptr i8* %A, i32 16
+ %tmp = getelementptr i8, i8* %A, i32 16
ret i8* %tmp
}
@@ -4338,7 +4338,7 @@ define i8* @test_v8i8_post_reg_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C
;CHECK-LABEL: test_v8i8_post_reg_st1x2:
;CHECK: st1.8b { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
ret i8* %tmp
}
@@ -4349,7 +4349,7 @@ define i16* @test_v8i16_post_imm_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i
;CHECK-LABEL: test_v8i16_post_imm_st1x2:
;CHECK: st1.8h { v0, v1 }, [x0], #32
call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
- %tmp = getelementptr i16* %A, i32 16
+ %tmp = getelementptr i16, i16* %A, i32 16
ret i16* %tmp
}
@@ -4357,7 +4357,7 @@ define i16* @test_v8i16_post_reg_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i
;CHECK-LABEL: test_v8i16_post_reg_st1x2:
;CHECK: st1.8h { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
ret i16* %tmp
}
@@ -4368,7 +4368,7 @@ define i16* @test_v4i16_post_imm_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i
;CHECK-LABEL: test_v4i16_post_imm_st1x2:
;CHECK: st1.4h { v0, v1 }, [x0], #16
call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
- %tmp = getelementptr i16* %A, i32 8
+ %tmp = getelementptr i16, i16* %A, i32 8
ret i16* %tmp
}
@@ -4376,7 +4376,7 @@ define i16* @test_v4i16_post_reg_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i
;CHECK-LABEL: test_v4i16_post_reg_st1x2:
;CHECK: st1.4h { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
ret i16* %tmp
}
@@ -4387,7 +4387,7 @@ define i32* @test_v4i32_post_imm_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i
;CHECK-LABEL: test_v4i32_post_imm_st1x2:
;CHECK: st1.4s { v0, v1 }, [x0], #32
call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
- %tmp = getelementptr i32* %A, i32 8
+ %tmp = getelementptr i32, i32* %A, i32 8
ret i32* %tmp
}
@@ -4395,7 +4395,7 @@ define i32* @test_v4i32_post_reg_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i
;CHECK-LABEL: test_v4i32_post_reg_st1x2:
;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
ret i32* %tmp
}
@@ -4406,7 +4406,7 @@ define i32* @test_v2i32_post_imm_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i
;CHECK-LABEL: test_v2i32_post_imm_st1x2:
;CHECK: st1.2s { v0, v1 }, [x0], #16
call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
- %tmp = getelementptr i32* %A, i32 4
+ %tmp = getelementptr i32, i32* %A, i32 4
ret i32* %tmp
}
@@ -4414,7 +4414,7 @@ define i32* @test_v2i32_post_reg_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i
;CHECK-LABEL: test_v2i32_post_reg_st1x2:
;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
ret i32* %tmp
}
@@ -4425,7 +4425,7 @@ define i64* @test_v2i64_post_imm_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i
;CHECK-LABEL: test_v2i64_post_imm_st1x2:
;CHECK: st1.2d { v0, v1 }, [x0], #32
call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
- %tmp = getelementptr i64* %A, i64 4
+ %tmp = getelementptr i64, i64* %A, i64 4
ret i64* %tmp
}
@@ -4433,7 +4433,7 @@ define i64* @test_v2i64_post_reg_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i
;CHECK-LABEL: test_v2i64_post_reg_st1x2:
;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
ret i64* %tmp
}
@@ -4444,7 +4444,7 @@ define i64* @test_v1i64_post_imm_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i
;CHECK-LABEL: test_v1i64_post_imm_st1x2:
;CHECK: st1.1d { v0, v1 }, [x0], #16
call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
- %tmp = getelementptr i64* %A, i64 2
+ %tmp = getelementptr i64, i64* %A, i64 2
ret i64* %tmp
}
@@ -4452,7 +4452,7 @@ define i64* @test_v1i64_post_reg_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i
;CHECK-LABEL: test_v1i64_post_reg_st1x2:
;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
ret i64* %tmp
}
@@ -4463,7 +4463,7 @@ define float* @test_v4f32_post_imm_st1x2(float* %A, float** %ptr, <4 x float> %B
;CHECK-LABEL: test_v4f32_post_imm_st1x2:
;CHECK: st1.4s { v0, v1 }, [x0], #32
call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
- %tmp = getelementptr float* %A, i32 8
+ %tmp = getelementptr float, float* %A, i32 8
ret float* %tmp
}
@@ -4471,7 +4471,7 @@ define float* @test_v4f32_post_reg_st1x2(float* %A, float** %ptr, <4 x float> %B
;CHECK-LABEL: test_v4f32_post_reg_st1x2:
;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
ret float* %tmp
}
@@ -4482,7 +4482,7 @@ define float* @test_v2f32_post_imm_st1x2(float* %A, float** %ptr, <2 x float> %B
;CHECK-LABEL: test_v2f32_post_imm_st1x2:
;CHECK: st1.2s { v0, v1 }, [x0], #16
call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
- %tmp = getelementptr float* %A, i32 4
+ %tmp = getelementptr float, float* %A, i32 4
ret float* %tmp
}
@@ -4490,7 +4490,7 @@ define float* @test_v2f32_post_reg_st1x2(float* %A, float** %ptr, <2 x float> %B
;CHECK-LABEL: test_v2f32_post_reg_st1x2:
;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
ret float* %tmp
}
@@ -4501,7 +4501,7 @@ define double* @test_v2f64_post_imm_st1x2(double* %A, double** %ptr, <2 x double
;CHECK-LABEL: test_v2f64_post_imm_st1x2:
;CHECK: st1.2d { v0, v1 }, [x0], #32
call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
- %tmp = getelementptr double* %A, i64 4
+ %tmp = getelementptr double, double* %A, i64 4
ret double* %tmp
}
@@ -4509,7 +4509,7 @@ define double* @test_v2f64_post_reg_st1x2(double* %A, double** %ptr, <2 x double
;CHECK-LABEL: test_v2f64_post_reg_st1x2:
;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
ret double* %tmp
}
@@ -4520,7 +4520,7 @@ define double* @test_v1f64_post_imm_st1x2(double* %A, double** %ptr, <1 x double
;CHECK-LABEL: test_v1f64_post_imm_st1x2:
;CHECK: st1.1d { v0, v1 }, [x0], #16
call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
- %tmp = getelementptr double* %A, i64 2
+ %tmp = getelementptr double, double* %A, i64 2
ret double* %tmp
}
@@ -4528,7 +4528,7 @@ define double* @test_v1f64_post_reg_st1x2(double* %A, double** %ptr, <1 x double
;CHECK-LABEL: test_v1f64_post_reg_st1x2:
;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
ret double* %tmp
}
@@ -4539,7 +4539,7 @@ define i8* @test_v16i8_post_imm_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8>
;CHECK-LABEL: test_v16i8_post_imm_st1x3:
;CHECK: st1.16b { v0, v1, v2 }, [x0], #48
call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
- %tmp = getelementptr i8* %A, i32 48
+ %tmp = getelementptr i8, i8* %A, i32 48
ret i8* %tmp
}
@@ -4547,7 +4547,7 @@ define i8* @test_v16i8_post_reg_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8>
;CHECK-LABEL: test_v16i8_post_reg_st1x3:
;CHECK: st1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
ret i8* %tmp
}
@@ -4558,7 +4558,7 @@ define i8* @test_v8i8_post_imm_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C
;CHECK-LABEL: test_v8i8_post_imm_st1x3:
;CHECK: st1.8b { v0, v1, v2 }, [x0], #24
call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
- %tmp = getelementptr i8* %A, i32 24
+ %tmp = getelementptr i8, i8* %A, i32 24
ret i8* %tmp
}
@@ -4566,7 +4566,7 @@ define i8* @test_v8i8_post_reg_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C
;CHECK-LABEL: test_v8i8_post_reg_st1x3:
;CHECK: st1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
ret i8* %tmp
}
@@ -4577,7 +4577,7 @@ define i16* @test_v8i16_post_imm_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i
;CHECK-LABEL: test_v8i16_post_imm_st1x3:
;CHECK: st1.8h { v0, v1, v2 }, [x0], #48
call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
- %tmp = getelementptr i16* %A, i32 24
+ %tmp = getelementptr i16, i16* %A, i32 24
ret i16* %tmp
}
@@ -4585,7 +4585,7 @@ define i16* @test_v8i16_post_reg_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i
;CHECK-LABEL: test_v8i16_post_reg_st1x3:
;CHECK: st1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
ret i16* %tmp
}
@@ -4596,7 +4596,7 @@ define i16* @test_v4i16_post_imm_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i
;CHECK-LABEL: test_v4i16_post_imm_st1x3:
;CHECK: st1.4h { v0, v1, v2 }, [x0], #24
call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
- %tmp = getelementptr i16* %A, i32 12
+ %tmp = getelementptr i16, i16* %A, i32 12
ret i16* %tmp
}
@@ -4604,7 +4604,7 @@ define i16* @test_v4i16_post_reg_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i
;CHECK-LABEL: test_v4i16_post_reg_st1x3:
;CHECK: st1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
ret i16* %tmp
}
@@ -4615,7 +4615,7 @@ define i32* @test_v4i32_post_imm_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i
;CHECK-LABEL: test_v4i32_post_imm_st1x3:
;CHECK: st1.4s { v0, v1, v2 }, [x0], #48
call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
- %tmp = getelementptr i32* %A, i32 12
+ %tmp = getelementptr i32, i32* %A, i32 12
ret i32* %tmp
}
@@ -4623,7 +4623,7 @@ define i32* @test_v4i32_post_reg_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i
;CHECK-LABEL: test_v4i32_post_reg_st1x3:
;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
ret i32* %tmp
}
@@ -4634,7 +4634,7 @@ define i32* @test_v2i32_post_imm_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i
;CHECK-LABEL: test_v2i32_post_imm_st1x3:
;CHECK: st1.2s { v0, v1, v2 }, [x0], #24
call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
- %tmp = getelementptr i32* %A, i32 6
+ %tmp = getelementptr i32, i32* %A, i32 6
ret i32* %tmp
}
@@ -4642,7 +4642,7 @@ define i32* @test_v2i32_post_reg_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i
;CHECK-LABEL: test_v2i32_post_reg_st1x3:
;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
ret i32* %tmp
}
@@ -4653,7 +4653,7 @@ define i64* @test_v2i64_post_imm_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i
;CHECK-LABEL: test_v2i64_post_imm_st1x3:
;CHECK: st1.2d { v0, v1, v2 }, [x0], #48
call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
- %tmp = getelementptr i64* %A, i64 6
+ %tmp = getelementptr i64, i64* %A, i64 6
ret i64* %tmp
}
@@ -4661,7 +4661,7 @@ define i64* @test_v2i64_post_reg_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i
;CHECK-LABEL: test_v2i64_post_reg_st1x3:
;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
ret i64* %tmp
}
@@ -4672,7 +4672,7 @@ define i64* @test_v1i64_post_imm_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i
;CHECK-LABEL: test_v1i64_post_imm_st1x3:
;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
- %tmp = getelementptr i64* %A, i64 3
+ %tmp = getelementptr i64, i64* %A, i64 3
ret i64* %tmp
}
@@ -4680,7 +4680,7 @@ define i64* @test_v1i64_post_reg_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i
;CHECK-LABEL: test_v1i64_post_reg_st1x3:
;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
ret i64* %tmp
}
@@ -4691,7 +4691,7 @@ define float* @test_v4f32_post_imm_st1x3(float* %A, float** %ptr, <4 x float> %B
;CHECK-LABEL: test_v4f32_post_imm_st1x3:
;CHECK: st1.4s { v0, v1, v2 }, [x0], #48
call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
- %tmp = getelementptr float* %A, i32 12
+ %tmp = getelementptr float, float* %A, i32 12
ret float* %tmp
}
@@ -4699,7 +4699,7 @@ define float* @test_v4f32_post_reg_st1x3(float* %A, float** %ptr, <4 x float> %B
;CHECK-LABEL: test_v4f32_post_reg_st1x3:
;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
ret float* %tmp
}
@@ -4710,7 +4710,7 @@ define float* @test_v2f32_post_imm_st1x3(float* %A, float** %ptr, <2 x float> %B
;CHECK-LABEL: test_v2f32_post_imm_st1x3:
;CHECK: st1.2s { v0, v1, v2 }, [x0], #24
call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
- %tmp = getelementptr float* %A, i32 6
+ %tmp = getelementptr float, float* %A, i32 6
ret float* %tmp
}
@@ -4718,7 +4718,7 @@ define float* @test_v2f32_post_reg_st1x3(float* %A, float** %ptr, <2 x float> %B
;CHECK-LABEL: test_v2f32_post_reg_st1x3:
;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
ret float* %tmp
}
@@ -4729,7 +4729,7 @@ define double* @test_v2f64_post_imm_st1x3(double* %A, double** %ptr, <2 x double
;CHECK-LABEL: test_v2f64_post_imm_st1x3:
;CHECK: st1.2d { v0, v1, v2 }, [x0], #48
call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
- %tmp = getelementptr double* %A, i64 6
+ %tmp = getelementptr double, double* %A, i64 6
ret double* %tmp
}
@@ -4737,7 +4737,7 @@ define double* @test_v2f64_post_reg_st1x3(double* %A, double** %ptr, <2 x double
;CHECK-LABEL: test_v2f64_post_reg_st1x3:
;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
ret double* %tmp
}
@@ -4748,7 +4748,7 @@ define double* @test_v1f64_post_imm_st1x3(double* %A, double** %ptr, <1 x double
;CHECK-LABEL: test_v1f64_post_imm_st1x3:
;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
- %tmp = getelementptr double* %A, i64 3
+ %tmp = getelementptr double, double* %A, i64 3
ret double* %tmp
}
@@ -4756,7 +4756,7 @@ define double* @test_v1f64_post_reg_st1x3(double* %A, double** %ptr, <1 x double
;CHECK-LABEL: test_v1f64_post_reg_st1x3:
;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
ret double* %tmp
}
@@ -4767,7 +4767,7 @@ define i8* @test_v16i8_post_imm_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8>
;CHECK-LABEL: test_v16i8_post_imm_st1x4:
;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], #64
call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
- %tmp = getelementptr i8* %A, i32 64
+ %tmp = getelementptr i8, i8* %A, i32 64
ret i8* %tmp
}
@@ -4775,7 +4775,7 @@ define i8* @test_v16i8_post_reg_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8>
;CHECK-LABEL: test_v16i8_post_reg_st1x4:
;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
ret i8* %tmp
}
@@ -4786,7 +4786,7 @@ define i8* @test_v8i8_post_imm_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C
;CHECK-LABEL: test_v8i8_post_imm_st1x4:
;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], #32
call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
- %tmp = getelementptr i8* %A, i32 32
+ %tmp = getelementptr i8, i8* %A, i32 32
ret i8* %tmp
}
@@ -4794,7 +4794,7 @@ define i8* @test_v8i8_post_reg_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C
;CHECK-LABEL: test_v8i8_post_reg_st1x4:
;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
ret i8* %tmp
}
@@ -4805,7 +4805,7 @@ define i16* @test_v8i16_post_imm_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i
;CHECK-LABEL: test_v8i16_post_imm_st1x4:
;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], #64
call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
- %tmp = getelementptr i16* %A, i32 32
+ %tmp = getelementptr i16, i16* %A, i32 32
ret i16* %tmp
}
@@ -4813,7 +4813,7 @@ define i16* @test_v8i16_post_reg_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i
;CHECK-LABEL: test_v8i16_post_reg_st1x4:
;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
ret i16* %tmp
}
@@ -4824,7 +4824,7 @@ define i16* @test_v4i16_post_imm_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i
;CHECK-LABEL: test_v4i16_post_imm_st1x4:
;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], #32
call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
- %tmp = getelementptr i16* %A, i32 16
+ %tmp = getelementptr i16, i16* %A, i32 16
ret i16* %tmp
}
@@ -4832,7 +4832,7 @@ define i16* @test_v4i16_post_reg_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i
;CHECK-LABEL: test_v4i16_post_reg_st1x4:
;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
ret i16* %tmp
}
@@ -4843,7 +4843,7 @@ define i32* @test_v4i32_post_imm_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i
;CHECK-LABEL: test_v4i32_post_imm_st1x4:
;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64
call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
- %tmp = getelementptr i32* %A, i32 16
+ %tmp = getelementptr i32, i32* %A, i32 16
ret i32* %tmp
}
@@ -4851,7 +4851,7 @@ define i32* @test_v4i32_post_reg_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i
;CHECK-LABEL: test_v4i32_post_reg_st1x4:
;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
ret i32* %tmp
}
@@ -4862,7 +4862,7 @@ define i32* @test_v2i32_post_imm_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i
;CHECK-LABEL: test_v2i32_post_imm_st1x4:
;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32
call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
- %tmp = getelementptr i32* %A, i32 8
+ %tmp = getelementptr i32, i32* %A, i32 8
ret i32* %tmp
}
@@ -4870,7 +4870,7 @@ define i32* @test_v2i32_post_reg_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i
;CHECK-LABEL: test_v2i32_post_reg_st1x4:
;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
ret i32* %tmp
}
@@ -4881,7 +4881,7 @@ define i64* @test_v2i64_post_imm_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i
;CHECK-LABEL: test_v2i64_post_imm_st1x4:
;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64
call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
- %tmp = getelementptr i64* %A, i64 8
+ %tmp = getelementptr i64, i64* %A, i64 8
ret i64* %tmp
}
@@ -4889,7 +4889,7 @@ define i64* @test_v2i64_post_reg_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i
;CHECK-LABEL: test_v2i64_post_reg_st1x4:
;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
ret i64* %tmp
}
@@ -4900,7 +4900,7 @@ define i64* @test_v1i64_post_imm_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i
;CHECK-LABEL: test_v1i64_post_imm_st1x4:
;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
- %tmp = getelementptr i64* %A, i64 4
+ %tmp = getelementptr i64, i64* %A, i64 4
ret i64* %tmp
}
@@ -4908,7 +4908,7 @@ define i64* @test_v1i64_post_reg_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i
;CHECK-LABEL: test_v1i64_post_reg_st1x4:
;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
ret i64* %tmp
}
@@ -4919,7 +4919,7 @@ define float* @test_v4f32_post_imm_st1x4(float* %A, float** %ptr, <4 x float> %B
;CHECK-LABEL: test_v4f32_post_imm_st1x4:
;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64
call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
- %tmp = getelementptr float* %A, i32 16
+ %tmp = getelementptr float, float* %A, i32 16
ret float* %tmp
}
@@ -4927,7 +4927,7 @@ define float* @test_v4f32_post_reg_st1x4(float* %A, float** %ptr, <4 x float> %B
;CHECK-LABEL: test_v4f32_post_reg_st1x4:
;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
ret float* %tmp
}
@@ -4938,7 +4938,7 @@ define float* @test_v2f32_post_imm_st1x4(float* %A, float** %ptr, <2 x float> %B
;CHECK-LABEL: test_v2f32_post_imm_st1x4:
;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32
call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
- %tmp = getelementptr float* %A, i32 8
+ %tmp = getelementptr float, float* %A, i32 8
ret float* %tmp
}
@@ -4946,7 +4946,7 @@ define float* @test_v2f32_post_reg_st1x4(float* %A, float** %ptr, <2 x float> %B
;CHECK-LABEL: test_v2f32_post_reg_st1x4:
;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
ret float* %tmp
}
@@ -4957,7 +4957,7 @@ define double* @test_v2f64_post_imm_st1x4(double* %A, double** %ptr, <2 x double
;CHECK-LABEL: test_v2f64_post_imm_st1x4:
;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64
call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
- %tmp = getelementptr double* %A, i64 8
+ %tmp = getelementptr double, double* %A, i64 8
ret double* %tmp
}
@@ -4965,7 +4965,7 @@ define double* @test_v2f64_post_reg_st1x4(double* %A, double** %ptr, <2 x double
;CHECK-LABEL: test_v2f64_post_reg_st1x4:
;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
ret double* %tmp
}
@@ -4976,7 +4976,7 @@ define double* @test_v1f64_post_imm_st1x4(double* %A, double** %ptr, <1 x double
;CHECK-LABEL: test_v1f64_post_imm_st1x4:
;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
- %tmp = getelementptr double* %A, i64 4
+ %tmp = getelementptr double, double* %A, i64 4
ret double* %tmp
}
@@ -4984,7 +4984,7 @@ define double* @test_v1f64_post_reg_st1x4(double* %A, double** %ptr, <1 x double
;CHECK-LABEL: test_v1f64_post_reg_st1x4:
;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
ret double* %tmp
}
@@ -4993,13 +4993,13 @@ declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1
define i8* @test_v16i8_post_imm_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) {
call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A)
- %tmp = getelementptr i8* %A, i32 2
+ %tmp = getelementptr i8, i8* %A, i32 2
ret i8* %tmp
}
define i8* @test_v16i8_post_reg_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) {
call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
ret i8* %tmp
}
@@ -5010,7 +5010,7 @@ define i8* @test_v16i8_post_imm_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i
;CHECK-LABEL: test_v16i8_post_imm_st2lane:
;CHECK: st2.b { v0, v1 }[0], [x0], #2
call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i32 2
+ %tmp = getelementptr i8, i8* %A, i32 2
ret i8* %tmp
}
@@ -5018,7 +5018,7 @@ define i8* @test_v16i8_post_reg_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i
;CHECK-LABEL: test_v16i8_post_reg_st2lane:
;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
ret i8* %tmp
}
@@ -5029,7 +5029,7 @@ define i8* @test_v8i8_post_imm_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8>
;CHECK-LABEL: test_v8i8_post_imm_st2lane:
;CHECK: st2.b { v0, v1 }[0], [x0], #2
call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i32 2
+ %tmp = getelementptr i8, i8* %A, i32 2
ret i8* %tmp
}
@@ -5037,7 +5037,7 @@ define i8* @test_v8i8_post_reg_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8>
;CHECK-LABEL: test_v8i8_post_reg_st2lane:
;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
ret i8* %tmp
}
@@ -5048,7 +5048,7 @@ define i16* @test_v8i16_post_imm_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x
;CHECK-LABEL: test_v8i16_post_imm_st2lane:
;CHECK: st2.h { v0, v1 }[0], [x0], #4
call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i32 2
+ %tmp = getelementptr i16, i16* %A, i32 2
ret i16* %tmp
}
@@ -5056,7 +5056,7 @@ define i16* @test_v8i16_post_reg_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x
;CHECK-LABEL: test_v8i16_post_reg_st2lane:
;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
ret i16* %tmp
}
@@ -5067,7 +5067,7 @@ define i16* @test_v4i16_post_imm_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x
;CHECK-LABEL: test_v4i16_post_imm_st2lane:
;CHECK: st2.h { v0, v1 }[0], [x0], #4
call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i32 2
+ %tmp = getelementptr i16, i16* %A, i32 2
ret i16* %tmp
}
@@ -5075,7 +5075,7 @@ define i16* @test_v4i16_post_reg_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x
;CHECK-LABEL: test_v4i16_post_reg_st2lane:
;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
ret i16* %tmp
}
@@ -5086,7 +5086,7 @@ define i32* @test_v4i32_post_imm_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x
;CHECK-LABEL: test_v4i32_post_imm_st2lane:
;CHECK: st2.s { v0, v1 }[0], [x0], #8
call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i32 2
+ %tmp = getelementptr i32, i32* %A, i32 2
ret i32* %tmp
}
@@ -5094,7 +5094,7 @@ define i32* @test_v4i32_post_reg_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x
;CHECK-LABEL: test_v4i32_post_reg_st2lane:
;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
ret i32* %tmp
}
@@ -5105,7 +5105,7 @@ define i32* @test_v2i32_post_imm_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x
;CHECK-LABEL: test_v2i32_post_imm_st2lane:
;CHECK: st2.s { v0, v1 }[0], [x0], #8
call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i32 2
+ %tmp = getelementptr i32, i32* %A, i32 2
ret i32* %tmp
}
@@ -5113,7 +5113,7 @@ define i32* @test_v2i32_post_reg_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x
;CHECK-LABEL: test_v2i32_post_reg_st2lane:
;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
ret i32* %tmp
}
@@ -5124,7 +5124,7 @@ define i64* @test_v2i64_post_imm_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x
;CHECK-LABEL: test_v2i64_post_imm_st2lane:
;CHECK: st2.d { v0, v1 }[0], [x0], #16
call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i64 2
+ %tmp = getelementptr i64, i64* %A, i64 2
ret i64* %tmp
}
@@ -5132,7 +5132,7 @@ define i64* @test_v2i64_post_reg_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x
;CHECK-LABEL: test_v2i64_post_reg_st2lane:
;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
ret i64* %tmp
}
@@ -5143,7 +5143,7 @@ define i64* @test_v1i64_post_imm_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x
;CHECK-LABEL: test_v1i64_post_imm_st2lane:
;CHECK: st2.d { v0, v1 }[0], [x0], #16
call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i64 2
+ %tmp = getelementptr i64, i64* %A, i64 2
ret i64* %tmp
}
@@ -5151,7 +5151,7 @@ define i64* @test_v1i64_post_reg_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x
;CHECK-LABEL: test_v1i64_post_reg_st2lane:
;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
ret i64* %tmp
}
@@ -5162,7 +5162,7 @@ define float* @test_v4f32_post_imm_st2lane(float* %A, float** %ptr, <4 x float>
;CHECK-LABEL: test_v4f32_post_imm_st2lane:
;CHECK: st2.s { v0, v1 }[0], [x0], #8
call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
- %tmp = getelementptr float* %A, i32 2
+ %tmp = getelementptr float, float* %A, i32 2
ret float* %tmp
}
@@ -5170,7 +5170,7 @@ define float* @test_v4f32_post_reg_st2lane(float* %A, float** %ptr, <4 x float>
;CHECK-LABEL: test_v4f32_post_reg_st2lane:
;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
ret float* %tmp
}
@@ -5181,7 +5181,7 @@ define float* @test_v2f32_post_imm_st2lane(float* %A, float** %ptr, <2 x float>
;CHECK-LABEL: test_v2f32_post_imm_st2lane:
;CHECK: st2.s { v0, v1 }[0], [x0], #8
call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
- %tmp = getelementptr float* %A, i32 2
+ %tmp = getelementptr float, float* %A, i32 2
ret float* %tmp
}
@@ -5189,7 +5189,7 @@ define float* @test_v2f32_post_reg_st2lane(float* %A, float** %ptr, <2 x float>
;CHECK-LABEL: test_v2f32_post_reg_st2lane:
;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
ret float* %tmp
}
@@ -5200,7 +5200,7 @@ define double* @test_v2f64_post_imm_st2lane(double* %A, double** %ptr, <2 x doub
;CHECK-LABEL: test_v2f64_post_imm_st2lane:
;CHECK: st2.d { v0, v1 }[0], [x0], #16
call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
- %tmp = getelementptr double* %A, i64 2
+ %tmp = getelementptr double, double* %A, i64 2
ret double* %tmp
}
@@ -5208,7 +5208,7 @@ define double* @test_v2f64_post_reg_st2lane(double* %A, double** %ptr, <2 x doub
;CHECK-LABEL: test_v2f64_post_reg_st2lane:
;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
ret double* %tmp
}
@@ -5219,7 +5219,7 @@ define double* @test_v1f64_post_imm_st2lane(double* %A, double** %ptr, <1 x doub
;CHECK-LABEL: test_v1f64_post_imm_st2lane:
;CHECK: st2.d { v0, v1 }[0], [x0], #16
call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
- %tmp = getelementptr double* %A, i64 2
+ %tmp = getelementptr double, double* %A, i64 2
ret double* %tmp
}
@@ -5227,7 +5227,7 @@ define double* @test_v1f64_post_reg_st2lane(double* %A, double** %ptr, <1 x doub
;CHECK-LABEL: test_v1f64_post_reg_st2lane:
;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
ret double* %tmp
}
@@ -5238,7 +5238,7 @@ define i8* @test_v16i8_post_imm_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i
;CHECK-LABEL: test_v16i8_post_imm_st3lane:
;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3
call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i32 3
+ %tmp = getelementptr i8, i8* %A, i32 3
ret i8* %tmp
}
@@ -5246,7 +5246,7 @@ define i8* @test_v16i8_post_reg_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i
;CHECK-LABEL: test_v16i8_post_reg_st3lane:
;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
ret i8* %tmp
}
@@ -5257,7 +5257,7 @@ define i8* @test_v8i8_post_imm_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8>
;CHECK-LABEL: test_v8i8_post_imm_st3lane:
;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3
call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i32 3
+ %tmp = getelementptr i8, i8* %A, i32 3
ret i8* %tmp
}
@@ -5265,7 +5265,7 @@ define i8* @test_v8i8_post_reg_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8>
;CHECK-LABEL: test_v8i8_post_reg_st3lane:
;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
ret i8* %tmp
}
@@ -5276,7 +5276,7 @@ define i16* @test_v8i16_post_imm_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x
;CHECK-LABEL: test_v8i16_post_imm_st3lane:
;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6
call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i32 3
+ %tmp = getelementptr i16, i16* %A, i32 3
ret i16* %tmp
}
@@ -5284,7 +5284,7 @@ define i16* @test_v8i16_post_reg_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x
;CHECK-LABEL: test_v8i16_post_reg_st3lane:
;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
ret i16* %tmp
}
@@ -5295,7 +5295,7 @@ define i16* @test_v4i16_post_imm_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x
;CHECK-LABEL: test_v4i16_post_imm_st3lane:
;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6
call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i32 3
+ %tmp = getelementptr i16, i16* %A, i32 3
ret i16* %tmp
}
@@ -5303,7 +5303,7 @@ define i16* @test_v4i16_post_reg_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x
;CHECK-LABEL: test_v4i16_post_reg_st3lane:
;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
ret i16* %tmp
}
@@ -5314,7 +5314,7 @@ define i32* @test_v4i32_post_imm_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x
;CHECK-LABEL: test_v4i32_post_imm_st3lane:
;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i32 3
+ %tmp = getelementptr i32, i32* %A, i32 3
ret i32* %tmp
}
@@ -5322,7 +5322,7 @@ define i32* @test_v4i32_post_reg_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x
;CHECK-LABEL: test_v4i32_post_reg_st3lane:
;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
ret i32* %tmp
}
@@ -5333,7 +5333,7 @@ define i32* @test_v2i32_post_imm_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x
;CHECK-LABEL: test_v2i32_post_imm_st3lane:
;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i32 3
+ %tmp = getelementptr i32, i32* %A, i32 3
ret i32* %tmp
}
@@ -5341,7 +5341,7 @@ define i32* @test_v2i32_post_reg_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x
;CHECK-LABEL: test_v2i32_post_reg_st3lane:
;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
ret i32* %tmp
}
@@ -5352,7 +5352,7 @@ define i64* @test_v2i64_post_imm_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x
;CHECK-LABEL: test_v2i64_post_imm_st3lane:
;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i64 3
+ %tmp = getelementptr i64, i64* %A, i64 3
ret i64* %tmp
}
@@ -5360,7 +5360,7 @@ define i64* @test_v2i64_post_reg_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x
;CHECK-LABEL: test_v2i64_post_reg_st3lane:
;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
ret i64* %tmp
}
@@ -5371,7 +5371,7 @@ define i64* @test_v1i64_post_imm_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x
;CHECK-LABEL: test_v1i64_post_imm_st3lane:
;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i64 3
+ %tmp = getelementptr i64, i64* %A, i64 3
ret i64* %tmp
}
@@ -5379,7 +5379,7 @@ define i64* @test_v1i64_post_reg_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x
;CHECK-LABEL: test_v1i64_post_reg_st3lane:
;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
ret i64* %tmp
}
@@ -5390,7 +5390,7 @@ define float* @test_v4f32_post_imm_st3lane(float* %A, float** %ptr, <4 x float>
;CHECK-LABEL: test_v4f32_post_imm_st3lane:
;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
- %tmp = getelementptr float* %A, i32 3
+ %tmp = getelementptr float, float* %A, i32 3
ret float* %tmp
}
@@ -5398,7 +5398,7 @@ define float* @test_v4f32_post_reg_st3lane(float* %A, float** %ptr, <4 x float>
;CHECK-LABEL: test_v4f32_post_reg_st3lane:
;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
ret float* %tmp
}
@@ -5409,7 +5409,7 @@ define float* @test_v2f32_post_imm_st3lane(float* %A, float** %ptr, <2 x float>
;CHECK-LABEL: test_v2f32_post_imm_st3lane:
;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
- %tmp = getelementptr float* %A, i32 3
+ %tmp = getelementptr float, float* %A, i32 3
ret float* %tmp
}
@@ -5417,7 +5417,7 @@ define float* @test_v2f32_post_reg_st3lane(float* %A, float** %ptr, <2 x float>
;CHECK-LABEL: test_v2f32_post_reg_st3lane:
;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
ret float* %tmp
}
@@ -5428,7 +5428,7 @@ define double* @test_v2f64_post_imm_st3lane(double* %A, double** %ptr, <2 x doub
;CHECK-LABEL: test_v2f64_post_imm_st3lane:
;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
- %tmp = getelementptr double* %A, i64 3
+ %tmp = getelementptr double, double* %A, i64 3
ret double* %tmp
}
@@ -5436,7 +5436,7 @@ define double* @test_v2f64_post_reg_st3lane(double* %A, double** %ptr, <2 x doub
;CHECK-LABEL: test_v2f64_post_reg_st3lane:
;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
ret double* %tmp
}
@@ -5447,7 +5447,7 @@ define double* @test_v1f64_post_imm_st3lane(double* %A, double** %ptr, <1 x doub
;CHECK-LABEL: test_v1f64_post_imm_st3lane:
;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
- %tmp = getelementptr double* %A, i64 3
+ %tmp = getelementptr double, double* %A, i64 3
ret double* %tmp
}
@@ -5455,7 +5455,7 @@ define double* @test_v1f64_post_reg_st3lane(double* %A, double** %ptr, <1 x doub
;CHECK-LABEL: test_v1f64_post_reg_st3lane:
;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
ret double* %tmp
}
@@ -5466,7 +5466,7 @@ define i8* @test_v16i8_post_imm_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i
;CHECK-LABEL: test_v16i8_post_imm_st4lane:
;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4
call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i32 4
+ %tmp = getelementptr i8, i8* %A, i32 4
ret i8* %tmp
}
@@ -5474,7 +5474,7 @@ define i8* @test_v16i8_post_reg_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i
;CHECK-LABEL: test_v16i8_post_reg_st4lane:
;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
ret i8* %tmp
}
@@ -5485,7 +5485,7 @@ define i8* @test_v8i8_post_imm_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8>
;CHECK-LABEL: test_v8i8_post_imm_st4lane:
;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4
call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i32 4
+ %tmp = getelementptr i8, i8* %A, i32 4
ret i8* %tmp
}
@@ -5493,7 +5493,7 @@ define i8* @test_v8i8_post_reg_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8>
;CHECK-LABEL: test_v8i8_post_reg_st4lane:
;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
- %tmp = getelementptr i8* %A, i64 %inc
+ %tmp = getelementptr i8, i8* %A, i64 %inc
ret i8* %tmp
}
@@ -5504,7 +5504,7 @@ define i16* @test_v8i16_post_imm_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x
;CHECK-LABEL: test_v8i16_post_imm_st4lane:
;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8
call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i32 4
+ %tmp = getelementptr i16, i16* %A, i32 4
ret i16* %tmp
}
@@ -5512,7 +5512,7 @@ define i16* @test_v8i16_post_reg_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x
;CHECK-LABEL: test_v8i16_post_reg_st4lane:
;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
ret i16* %tmp
}
@@ -5523,7 +5523,7 @@ define i16* @test_v4i16_post_imm_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x
;CHECK-LABEL: test_v4i16_post_imm_st4lane:
;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8
call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i32 4
+ %tmp = getelementptr i16, i16* %A, i32 4
ret i16* %tmp
}
@@ -5531,7 +5531,7 @@ define i16* @test_v4i16_post_reg_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x
;CHECK-LABEL: test_v4i16_post_reg_st4lane:
;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
- %tmp = getelementptr i16* %A, i64 %inc
+ %tmp = getelementptr i16, i16* %A, i64 %inc
ret i16* %tmp
}
@@ -5542,7 +5542,7 @@ define i32* @test_v4i32_post_imm_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x
;CHECK-LABEL: test_v4i32_post_imm_st4lane:
;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i32 4
+ %tmp = getelementptr i32, i32* %A, i32 4
ret i32* %tmp
}
@@ -5550,7 +5550,7 @@ define i32* @test_v4i32_post_reg_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x
;CHECK-LABEL: test_v4i32_post_reg_st4lane:
;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
ret i32* %tmp
}
@@ -5561,7 +5561,7 @@ define i32* @test_v2i32_post_imm_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x
;CHECK-LABEL: test_v2i32_post_imm_st4lane:
;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i32 4
+ %tmp = getelementptr i32, i32* %A, i32 4
ret i32* %tmp
}
@@ -5569,7 +5569,7 @@ define i32* @test_v2i32_post_reg_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x
;CHECK-LABEL: test_v2i32_post_reg_st4lane:
;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
- %tmp = getelementptr i32* %A, i64 %inc
+ %tmp = getelementptr i32, i32* %A, i64 %inc
ret i32* %tmp
}
@@ -5580,7 +5580,7 @@ define i64* @test_v2i64_post_imm_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x
;CHECK-LABEL: test_v2i64_post_imm_st4lane:
;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i64 4
+ %tmp = getelementptr i64, i64* %A, i64 4
ret i64* %tmp
}
@@ -5588,7 +5588,7 @@ define i64* @test_v2i64_post_reg_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x
;CHECK-LABEL: test_v2i64_post_reg_st4lane:
;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
ret i64* %tmp
}
@@ -5599,7 +5599,7 @@ define i64* @test_v1i64_post_imm_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x
;CHECK-LABEL: test_v1i64_post_imm_st4lane:
;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i64 4
+ %tmp = getelementptr i64, i64* %A, i64 4
ret i64* %tmp
}
@@ -5607,7 +5607,7 @@ define i64* @test_v1i64_post_reg_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x
;CHECK-LABEL: test_v1i64_post_reg_st4lane:
;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
- %tmp = getelementptr i64* %A, i64 %inc
+ %tmp = getelementptr i64, i64* %A, i64 %inc
ret i64* %tmp
}
@@ -5618,7 +5618,7 @@ define float* @test_v4f32_post_imm_st4lane(float* %A, float** %ptr, <4 x float>
;CHECK-LABEL: test_v4f32_post_imm_st4lane:
;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
- %tmp = getelementptr float* %A, i32 4
+ %tmp = getelementptr float, float* %A, i32 4
ret float* %tmp
}
@@ -5626,7 +5626,7 @@ define float* @test_v4f32_post_reg_st4lane(float* %A, float** %ptr, <4 x float>
;CHECK-LABEL: test_v4f32_post_reg_st4lane:
;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
ret float* %tmp
}
@@ -5637,7 +5637,7 @@ define float* @test_v2f32_post_imm_st4lane(float* %A, float** %ptr, <2 x float>
;CHECK-LABEL: test_v2f32_post_imm_st4lane:
;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
- %tmp = getelementptr float* %A, i32 4
+ %tmp = getelementptr float, float* %A, i32 4
ret float* %tmp
}
@@ -5645,7 +5645,7 @@ define float* @test_v2f32_post_reg_st4lane(float* %A, float** %ptr, <2 x float>
;CHECK-LABEL: test_v2f32_post_reg_st4lane:
;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
- %tmp = getelementptr float* %A, i64 %inc
+ %tmp = getelementptr float, float* %A, i64 %inc
ret float* %tmp
}
@@ -5656,7 +5656,7 @@ define double* @test_v2f64_post_imm_st4lane(double* %A, double** %ptr, <2 x doub
;CHECK-LABEL: test_v2f64_post_imm_st4lane:
;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
- %tmp = getelementptr double* %A, i64 4
+ %tmp = getelementptr double, double* %A, i64 4
ret double* %tmp
}
@@ -5664,7 +5664,7 @@ define double* @test_v2f64_post_reg_st4lane(double* %A, double** %ptr, <2 x doub
;CHECK-LABEL: test_v2f64_post_reg_st4lane:
;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
ret double* %tmp
}
@@ -5675,7 +5675,7 @@ define double* @test_v1f64_post_imm_st4lane(double* %A, double** %ptr, <1 x doub
;CHECK-LABEL: test_v1f64_post_imm_st4lane:
;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
- %tmp = getelementptr double* %A, i64 4
+ %tmp = getelementptr double, double* %A, i64 4
ret double* %tmp
}
@@ -5683,7 +5683,7 @@ define double* @test_v1f64_post_reg_st4lane(double* %A, double** %ptr, <1 x doub
;CHECK-LABEL: test_v1f64_post_reg_st4lane:
;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
- %tmp = getelementptr double* %A, i64 %inc
+ %tmp = getelementptr double, double* %A, i64 %inc
ret double* %tmp
}
@@ -5692,7 +5692,7 @@ declare void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double>, <1 x double>,
define <16 x i8> @test_v16i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
; CHECK-LABEL: test_v16i8_post_imm_ld1r:
; CHECK: ld1r.16b { v0 }, [x0], #1
- %tmp1 = load i8* %bar
+ %tmp1 = load i8, i8* %bar
%tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
%tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
%tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
@@ -5709,7 +5709,7 @@ define <16 x i8> @test_v16i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
%tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
%tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
%tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
- %tmp18 = getelementptr i8* %bar, i64 1
+ %tmp18 = getelementptr i8, i8* %bar, i64 1
store i8* %tmp18, i8** %ptr
ret <16 x i8> %tmp17
}
@@ -5717,7 +5717,7 @@ define <16 x i8> @test_v16i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
define <16 x i8> @test_v16i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
; CHECK-LABEL: test_v16i8_post_reg_ld1r:
; CHECK: ld1r.16b { v0 }, [x0], x{{[0-9]+}}
- %tmp1 = load i8* %bar
+ %tmp1 = load i8, i8* %bar
%tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
%tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
%tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
@@ -5734,7 +5734,7 @@ define <16 x i8> @test_v16i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
%tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
%tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
%tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
- %tmp18 = getelementptr i8* %bar, i64 %inc
+ %tmp18 = getelementptr i8, i8* %bar, i64 %inc
store i8* %tmp18, i8** %ptr
ret <16 x i8> %tmp17
}
@@ -5742,7 +5742,7 @@ define <16 x i8> @test_v16i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
define <8 x i8> @test_v8i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
; CHECK-LABEL: test_v8i8_post_imm_ld1r:
; CHECK: ld1r.8b { v0 }, [x0], #1
- %tmp1 = load i8* %bar
+ %tmp1 = load i8, i8* %bar
%tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
%tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
%tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
@@ -5751,7 +5751,7 @@ define <8 x i8> @test_v8i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
%tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
%tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
%tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
- %tmp10 = getelementptr i8* %bar, i64 1
+ %tmp10 = getelementptr i8, i8* %bar, i64 1
store i8* %tmp10, i8** %ptr
ret <8 x i8> %tmp9
}
@@ -5759,7 +5759,7 @@ define <8 x i8> @test_v8i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
define <8 x i8> @test_v8i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
; CHECK-LABEL: test_v8i8_post_reg_ld1r:
; CHECK: ld1r.8b { v0 }, [x0], x{{[0-9]+}}
- %tmp1 = load i8* %bar
+ %tmp1 = load i8, i8* %bar
%tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
%tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
%tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
@@ -5768,7 +5768,7 @@ define <8 x i8> @test_v8i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
%tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
%tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
%tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
- %tmp10 = getelementptr i8* %bar, i64 %inc
+ %tmp10 = getelementptr i8, i8* %bar, i64 %inc
store i8* %tmp10, i8** %ptr
ret <8 x i8> %tmp9
}
@@ -5776,7 +5776,7 @@ define <8 x i8> @test_v8i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
define <8 x i16> @test_v8i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
; CHECK-LABEL: test_v8i16_post_imm_ld1r:
; CHECK: ld1r.8h { v0 }, [x0], #2
- %tmp1 = load i16* %bar
+ %tmp1 = load i16, i16* %bar
%tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
%tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
%tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
@@ -5785,7 +5785,7 @@ define <8 x i16> @test_v8i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
%tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
%tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
%tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
- %tmp10 = getelementptr i16* %bar, i64 1
+ %tmp10 = getelementptr i16, i16* %bar, i64 1
store i16* %tmp10, i16** %ptr
ret <8 x i16> %tmp9
}
@@ -5793,7 +5793,7 @@ define <8 x i16> @test_v8i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
define <8 x i16> @test_v8i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
; CHECK-LABEL: test_v8i16_post_reg_ld1r:
; CHECK: ld1r.8h { v0 }, [x0], x{{[0-9]+}}
- %tmp1 = load i16* %bar
+ %tmp1 = load i16, i16* %bar
%tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
%tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
%tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
@@ -5802,7 +5802,7 @@ define <8 x i16> @test_v8i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
%tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
%tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
%tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
- %tmp10 = getelementptr i16* %bar, i64 %inc
+ %tmp10 = getelementptr i16, i16* %bar, i64 %inc
store i16* %tmp10, i16** %ptr
ret <8 x i16> %tmp9
}
@@ -5810,12 +5810,12 @@ define <8 x i16> @test_v8i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
define <4 x i16> @test_v4i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
; CHECK-LABEL: test_v4i16_post_imm_ld1r:
; CHECK: ld1r.4h { v0 }, [x0], #2
- %tmp1 = load i16* %bar
+ %tmp1 = load i16, i16* %bar
%tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
%tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
%tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
%tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
- %tmp6 = getelementptr i16* %bar, i64 1
+ %tmp6 = getelementptr i16, i16* %bar, i64 1
store i16* %tmp6, i16** %ptr
ret <4 x i16> %tmp5
}
@@ -5823,12 +5823,12 @@ define <4 x i16> @test_v4i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
define <4 x i16> @test_v4i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
; CHECK-LABEL: test_v4i16_post_reg_ld1r:
; CHECK: ld1r.4h { v0 }, [x0], x{{[0-9]+}}
- %tmp1 = load i16* %bar
+ %tmp1 = load i16, i16* %bar
%tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
%tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
%tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
%tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
- %tmp6 = getelementptr i16* %bar, i64 %inc
+ %tmp6 = getelementptr i16, i16* %bar, i64 %inc
store i16* %tmp6, i16** %ptr
ret <4 x i16> %tmp5
}
@@ -5836,12 +5836,12 @@ define <4 x i16> @test_v4i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
define <4 x i32> @test_v4i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
; CHECK-LABEL: test_v4i32_post_imm_ld1r:
; CHECK: ld1r.4s { v0 }, [x0], #4
- %tmp1 = load i32* %bar
+ %tmp1 = load i32, i32* %bar
%tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
%tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
%tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
%tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
- %tmp6 = getelementptr i32* %bar, i64 1
+ %tmp6 = getelementptr i32, i32* %bar, i64 1
store i32* %tmp6, i32** %ptr
ret <4 x i32> %tmp5
}
@@ -5849,12 +5849,12 @@ define <4 x i32> @test_v4i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
define <4 x i32> @test_v4i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
; CHECK-LABEL: test_v4i32_post_reg_ld1r:
; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}}
- %tmp1 = load i32* %bar
+ %tmp1 = load i32, i32* %bar
%tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
%tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
%tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
%tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
- %tmp6 = getelementptr i32* %bar, i64 %inc
+ %tmp6 = getelementptr i32, i32* %bar, i64 %inc
store i32* %tmp6, i32** %ptr
ret <4 x i32> %tmp5
}
@@ -5862,10 +5862,10 @@ define <4 x i32> @test_v4i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
define <2 x i32> @test_v2i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
; CHECK-LABEL: test_v2i32_post_imm_ld1r:
; CHECK: ld1r.2s { v0 }, [x0], #4
- %tmp1 = load i32* %bar
+ %tmp1 = load i32, i32* %bar
%tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
%tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
- %tmp4 = getelementptr i32* %bar, i64 1
+ %tmp4 = getelementptr i32, i32* %bar, i64 1
store i32* %tmp4, i32** %ptr
ret <2 x i32> %tmp3
}
@@ -5873,10 +5873,10 @@ define <2 x i32> @test_v2i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
define <2 x i32> @test_v2i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
; CHECK-LABEL: test_v2i32_post_reg_ld1r:
; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}}
- %tmp1 = load i32* %bar
+ %tmp1 = load i32, i32* %bar
%tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
%tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
- %tmp4 = getelementptr i32* %bar, i64 %inc
+ %tmp4 = getelementptr i32, i32* %bar, i64 %inc
store i32* %tmp4, i32** %ptr
ret <2 x i32> %tmp3
}
@@ -5884,10 +5884,10 @@ define <2 x i32> @test_v2i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
define <2 x i64> @test_v2i64_post_imm_ld1r(i64* %bar, i64** %ptr) {
; CHECK-LABEL: test_v2i64_post_imm_ld1r:
; CHECK: ld1r.2d { v0 }, [x0], #8
- %tmp1 = load i64* %bar
+ %tmp1 = load i64, i64* %bar
%tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
%tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
- %tmp4 = getelementptr i64* %bar, i64 1
+ %tmp4 = getelementptr i64, i64* %bar, i64 1
store i64* %tmp4, i64** %ptr
ret <2 x i64> %tmp3
}
@@ -5895,10 +5895,10 @@ define <2 x i64> @test_v2i64_post_imm_ld1r(i64* %bar, i64** %ptr) {
define <2 x i64> @test_v2i64_post_reg_ld1r(i64* %bar, i64** %ptr, i64 %inc) {
; CHECK-LABEL: test_v2i64_post_reg_ld1r:
; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}}
- %tmp1 = load i64* %bar
+ %tmp1 = load i64, i64* %bar
%tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
%tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
- %tmp4 = getelementptr i64* %bar, i64 %inc
+ %tmp4 = getelementptr i64, i64* %bar, i64 %inc
store i64* %tmp4, i64** %ptr
ret <2 x i64> %tmp3
}
@@ -5906,12 +5906,12 @@ define <2 x i64> @test_v2i64_post_reg_ld1r(i64* %bar, i64** %ptr, i64 %inc) {
define <4 x float> @test_v4f32_post_imm_ld1r(float* %bar, float** %ptr) {
; CHECK-LABEL: test_v4f32_post_imm_ld1r:
; CHECK: ld1r.4s { v0 }, [x0], #4
- %tmp1 = load float* %bar
+ %tmp1 = load float, float* %bar
%tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
%tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
%tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2
%tmp5 = insertelement <4 x float> %tmp4, float %tmp1, i32 3
- %tmp6 = getelementptr float* %bar, i64 1
+ %tmp6 = getelementptr float, float* %bar, i64 1
store float* %tmp6, float** %ptr
ret <4 x float> %tmp5
}
@@ -5919,12 +5919,12 @@ define <4 x float> @test_v4f32_post_imm_ld1r(float* %bar, float** %ptr) {
define <4 x float> @test_v4f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) {
; CHECK-LABEL: test_v4f32_post_reg_ld1r:
; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}}
- %tmp1 = load float* %bar
+ %tmp1 = load float, float* %bar
%tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
%tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
%tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2
%tmp5 = insertelement <4 x float> %tmp4, float %tmp1, i32 3
- %tmp6 = getelementptr float* %bar, i64 %inc
+ %tmp6 = getelementptr float, float* %bar, i64 %inc
store float* %tmp6, float** %ptr
ret <4 x float> %tmp5
}
@@ -5932,10 +5932,10 @@ define <4 x float> @test_v4f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc
define <2 x float> @test_v2f32_post_imm_ld1r(float* %bar, float** %ptr) {
; CHECK-LABEL: test_v2f32_post_imm_ld1r:
; CHECK: ld1r.2s { v0 }, [x0], #4
- %tmp1 = load float* %bar
+ %tmp1 = load float, float* %bar
%tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
%tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
- %tmp4 = getelementptr float* %bar, i64 1
+ %tmp4 = getelementptr float, float* %bar, i64 1
store float* %tmp4, float** %ptr
ret <2 x float> %tmp3
}
@@ -5943,10 +5943,10 @@ define <2 x float> @test_v2f32_post_imm_ld1r(float* %bar, float** %ptr) {
define <2 x float> @test_v2f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) {
; CHECK-LABEL: test_v2f32_post_reg_ld1r:
; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}}
- %tmp1 = load float* %bar
+ %tmp1 = load float, float* %bar
%tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
%tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
- %tmp4 = getelementptr float* %bar, i64 %inc
+ %tmp4 = getelementptr float, float* %bar, i64 %inc
store float* %tmp4, float** %ptr
ret <2 x float> %tmp3
}
@@ -5954,10 +5954,10 @@ define <2 x float> @test_v2f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc
define <2 x double> @test_v2f64_post_imm_ld1r(double* %bar, double** %ptr) {
; CHECK-LABEL: test_v2f64_post_imm_ld1r:
; CHECK: ld1r.2d { v0 }, [x0], #8
- %tmp1 = load double* %bar
+ %tmp1 = load double, double* %bar
%tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
%tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
- %tmp4 = getelementptr double* %bar, i64 1
+ %tmp4 = getelementptr double, double* %bar, i64 1
store double* %tmp4, double** %ptr
ret <2 x double> %tmp3
}
@@ -5965,10 +5965,10 @@ define <2 x double> @test_v2f64_post_imm_ld1r(double* %bar, double** %ptr) {
define <2 x double> @test_v2f64_post_reg_ld1r(double* %bar, double** %ptr, i64 %inc) {
; CHECK-LABEL: test_v2f64_post_reg_ld1r:
; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}}
- %tmp1 = load double* %bar
+ %tmp1 = load double, double* %bar
%tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
%tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
- %tmp4 = getelementptr double* %bar, i64 %inc
+ %tmp4 = getelementptr double, double* %bar, i64 %inc
store double* %tmp4, double** %ptr
ret <2 x double> %tmp3
}
@@ -5976,9 +5976,9 @@ define <2 x double> @test_v2f64_post_reg_ld1r(double* %bar, double** %ptr, i64 %
define <16 x i8> @test_v16i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <16 x i8> %A) {
; CHECK-LABEL: test_v16i8_post_imm_ld1lane:
; CHECK: ld1.b { v0 }[1], [x0], #1
- %tmp1 = load i8* %bar
+ %tmp1 = load i8, i8* %bar
%tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
- %tmp3 = getelementptr i8* %bar, i64 1
+ %tmp3 = getelementptr i8, i8* %bar, i64 1
store i8* %tmp3, i8** %ptr
ret <16 x i8> %tmp2
}
@@ -5986,9 +5986,9 @@ define <16 x i8> @test_v16i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <16 x i8> %A)
define <16 x i8> @test_v16i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <16 x i8> %A) {
; CHECK-LABEL: test_v16i8_post_reg_ld1lane:
; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}}
- %tmp1 = load i8* %bar
+ %tmp1 = load i8, i8* %bar
%tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
- %tmp3 = getelementptr i8* %bar, i64 %inc
+ %tmp3 = getelementptr i8, i8* %bar, i64 %inc
store i8* %tmp3, i8** %ptr
ret <16 x i8> %tmp2
}
@@ -5996,9 +5996,9 @@ define <16 x i8> @test_v16i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <16
define <8 x i8> @test_v8i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <8 x i8> %A) {
; CHECK-LABEL: test_v8i8_post_imm_ld1lane:
; CHECK: ld1.b { v0 }[1], [x0], #1
- %tmp1 = load i8* %bar
+ %tmp1 = load i8, i8* %bar
%tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
- %tmp3 = getelementptr i8* %bar, i64 1
+ %tmp3 = getelementptr i8, i8* %bar, i64 1
store i8* %tmp3, i8** %ptr
ret <8 x i8> %tmp2
}
@@ -6006,9 +6006,9 @@ define <8 x i8> @test_v8i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <8 x i8> %A) {
define <8 x i8> @test_v8i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <8 x i8> %A) {
; CHECK-LABEL: test_v8i8_post_reg_ld1lane:
; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}}
- %tmp1 = load i8* %bar
+ %tmp1 = load i8, i8* %bar
%tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
- %tmp3 = getelementptr i8* %bar, i64 %inc
+ %tmp3 = getelementptr i8, i8* %bar, i64 %inc
store i8* %tmp3, i8** %ptr
ret <8 x i8> %tmp2
}
@@ -6016,9 +6016,9 @@ define <8 x i8> @test_v8i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <8 x
define <8 x i16> @test_v8i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <8 x i16> %A) {
; CHECK-LABEL: test_v8i16_post_imm_ld1lane:
; CHECK: ld1.h { v0 }[1], [x0], #2
- %tmp1 = load i16* %bar
+ %tmp1 = load i16, i16* %bar
%tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
- %tmp3 = getelementptr i16* %bar, i64 1
+ %tmp3 = getelementptr i16, i16* %bar, i64 1
store i16* %tmp3, i16** %ptr
ret <8 x i16> %tmp2
}
@@ -6026,9 +6026,9 @@ define <8 x i16> @test_v8i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <8 x i16> %
define <8 x i16> @test_v8i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <8 x i16> %A) {
; CHECK-LABEL: test_v8i16_post_reg_ld1lane:
; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
- %tmp1 = load i16* %bar
+ %tmp1 = load i16, i16* %bar
%tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
- %tmp3 = getelementptr i16* %bar, i64 %inc
+ %tmp3 = getelementptr i16, i16* %bar, i64 %inc
store i16* %tmp3, i16** %ptr
ret <8 x i16> %tmp2
}
@@ -6036,9 +6036,9 @@ define <8 x i16> @test_v8i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <
define <4 x i16> @test_v4i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <4 x i16> %A) {
; CHECK-LABEL: test_v4i16_post_imm_ld1lane:
; CHECK: ld1.h { v0 }[1], [x0], #2
- %tmp1 = load i16* %bar
+ %tmp1 = load i16, i16* %bar
%tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
- %tmp3 = getelementptr i16* %bar, i64 1
+ %tmp3 = getelementptr i16, i16* %bar, i64 1
store i16* %tmp3, i16** %ptr
ret <4 x i16> %tmp2
}
@@ -6046,9 +6046,9 @@ define <4 x i16> @test_v4i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <4 x i16> %
define <4 x i16> @test_v4i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <4 x i16> %A) {
; CHECK-LABEL: test_v4i16_post_reg_ld1lane:
; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
- %tmp1 = load i16* %bar
+ %tmp1 = load i16, i16* %bar
%tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
- %tmp3 = getelementptr i16* %bar, i64 %inc
+ %tmp3 = getelementptr i16, i16* %bar, i64 %inc
store i16* %tmp3, i16** %ptr
ret <4 x i16> %tmp2
}
@@ -6056,9 +6056,9 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <
define <4 x i32> @test_v4i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <4 x i32> %A) {
; CHECK-LABEL: test_v4i32_post_imm_ld1lane:
; CHECK: ld1.s { v0 }[1], [x0], #4
- %tmp1 = load i32* %bar
+ %tmp1 = load i32, i32* %bar
%tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
- %tmp3 = getelementptr i32* %bar, i64 1
+ %tmp3 = getelementptr i32, i32* %bar, i64 1
store i32* %tmp3, i32** %ptr
ret <4 x i32> %tmp2
}
@@ -6066,9 +6066,9 @@ define <4 x i32> @test_v4i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <4 x i32> %
define <4 x i32> @test_v4i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <4 x i32> %A) {
; CHECK-LABEL: test_v4i32_post_reg_ld1lane:
; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
- %tmp1 = load i32* %bar
+ %tmp1 = load i32, i32* %bar
%tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
- %tmp3 = getelementptr i32* %bar, i64 %inc
+ %tmp3 = getelementptr i32, i32* %bar, i64 %inc
store i32* %tmp3, i32** %ptr
ret <4 x i32> %tmp2
}
@@ -6076,9 +6076,9 @@ define <4 x i32> @test_v4i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <
define <2 x i32> @test_v2i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <2 x i32> %A) {
; CHECK-LABEL: test_v2i32_post_imm_ld1lane:
; CHECK: ld1.s { v0 }[1], [x0], #4
- %tmp1 = load i32* %bar
+ %tmp1 = load i32, i32* %bar
%tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
- %tmp3 = getelementptr i32* %bar, i64 1
+ %tmp3 = getelementptr i32, i32* %bar, i64 1
store i32* %tmp3, i32** %ptr
ret <2 x i32> %tmp2
}
@@ -6086,9 +6086,9 @@ define <2 x i32> @test_v2i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <2 x i32> %
define <2 x i32> @test_v2i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <2 x i32> %A) {
; CHECK-LABEL: test_v2i32_post_reg_ld1lane:
; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
- %tmp1 = load i32* %bar
+ %tmp1 = load i32, i32* %bar
%tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
- %tmp3 = getelementptr i32* %bar, i64 %inc
+ %tmp3 = getelementptr i32, i32* %bar, i64 %inc
store i32* %tmp3, i32** %ptr
ret <2 x i32> %tmp2
}
@@ -6096,9 +6096,9 @@ define <2 x i32> @test_v2i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <
define <2 x i64> @test_v2i64_post_imm_ld1lane(i64* %bar, i64** %ptr, <2 x i64> %A) {
; CHECK-LABEL: test_v2i64_post_imm_ld1lane:
; CHECK: ld1.d { v0 }[1], [x0], #8
- %tmp1 = load i64* %bar
+ %tmp1 = load i64, i64* %bar
%tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
- %tmp3 = getelementptr i64* %bar, i64 1
+ %tmp3 = getelementptr i64, i64* %bar, i64 1
store i64* %tmp3, i64** %ptr
ret <2 x i64> %tmp2
}
@@ -6106,9 +6106,9 @@ define <2 x i64> @test_v2i64_post_imm_ld1lane(i64* %bar, i64** %ptr, <2 x i64> %
define <2 x i64> @test_v2i64_post_reg_ld1lane(i64* %bar, i64** %ptr, i64 %inc, <2 x i64> %A) {
; CHECK-LABEL: test_v2i64_post_reg_ld1lane:
; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}}
- %tmp1 = load i64* %bar
+ %tmp1 = load i64, i64* %bar
%tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
- %tmp3 = getelementptr i64* %bar, i64 %inc
+ %tmp3 = getelementptr i64, i64* %bar, i64 %inc
store i64* %tmp3, i64** %ptr
ret <2 x i64> %tmp2
}
@@ -6116,9 +6116,9 @@ define <2 x i64> @test_v2i64_post_reg_ld1lane(i64* %bar, i64** %ptr, i64 %inc, <
define <4 x float> @test_v4f32_post_imm_ld1lane(float* %bar, float** %ptr, <4 x float> %A) {
; CHECK-LABEL: test_v4f32_post_imm_ld1lane:
; CHECK: ld1.s { v0 }[1], [x0], #4
- %tmp1 = load float* %bar
+ %tmp1 = load float, float* %bar
%tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
- %tmp3 = getelementptr float* %bar, i64 1
+ %tmp3 = getelementptr float, float* %bar, i64 1
store float* %tmp3, float** %ptr
ret <4 x float> %tmp2
}
@@ -6126,9 +6126,9 @@ define <4 x float> @test_v4f32_post_imm_ld1lane(float* %bar, float** %ptr, <4 x
define <4 x float> @test_v4f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <4 x float> %A) {
; CHECK-LABEL: test_v4f32_post_reg_ld1lane:
; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
- %tmp1 = load float* %bar
+ %tmp1 = load float, float* %bar
%tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
- %tmp3 = getelementptr float* %bar, i64 %inc
+ %tmp3 = getelementptr float, float* %bar, i64 %inc
store float* %tmp3, float** %ptr
ret <4 x float> %tmp2
}
@@ -6136,9 +6136,9 @@ define <4 x float> @test_v4f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %
define <2 x float> @test_v2f32_post_imm_ld1lane(float* %bar, float** %ptr, <2 x float> %A) {
; CHECK-LABEL: test_v2f32_post_imm_ld1lane:
; CHECK: ld1.s { v0 }[1], [x0], #4
- %tmp1 = load float* %bar
+ %tmp1 = load float, float* %bar
%tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
- %tmp3 = getelementptr float* %bar, i64 1
+ %tmp3 = getelementptr float, float* %bar, i64 1
store float* %tmp3, float** %ptr
ret <2 x float> %tmp2
}
@@ -6146,9 +6146,9 @@ define <2 x float> @test_v2f32_post_imm_ld1lane(float* %bar, float** %ptr, <2 x
define <2 x float> @test_v2f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <2 x float> %A) {
; CHECK-LABEL: test_v2f32_post_reg_ld1lane:
; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
- %tmp1 = load float* %bar
+ %tmp1 = load float, float* %bar
%tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
- %tmp3 = getelementptr float* %bar, i64 %inc
+ %tmp3 = getelementptr float, float* %bar, i64 %inc
store float* %tmp3, float** %ptr
ret <2 x float> %tmp2
}
@@ -6156,9 +6156,9 @@ define <2 x float> @test_v2f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %
define <2 x double> @test_v2f64_post_imm_ld1lane(double* %bar, double** %ptr, <2 x double> %A) {
; CHECK-LABEL: test_v2f64_post_imm_ld1lane:
; CHECK: ld1.d { v0 }[1], [x0], #8
- %tmp1 = load double* %bar
+ %tmp1 = load double, double* %bar
%tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
- %tmp3 = getelementptr double* %bar, i64 1
+ %tmp3 = getelementptr double, double* %bar, i64 1
store double* %tmp3, double** %ptr
ret <2 x double> %tmp2
}
@@ -6166,9 +6166,52 @@ define <2 x double> @test_v2f64_post_imm_ld1lane(double* %bar, double** %ptr, <2
define <2 x double> @test_v2f64_post_reg_ld1lane(double* %bar, double** %ptr, i64 %inc, <2 x double> %A) {
; CHECK-LABEL: test_v2f64_post_reg_ld1lane:
; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}}
- %tmp1 = load double* %bar
+ %tmp1 = load double, double* %bar
%tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
- %tmp3 = getelementptr double* %bar, i64 %inc
+ %tmp3 = getelementptr double, double* %bar, i64 %inc
store double* %tmp3, double** %ptr
ret <2 x double> %tmp2
-} \ No newline at end of file
+}
+
+; Check for dependencies between the vector and the scalar load.
+define <4 x float> @test_v4f32_post_reg_ld1lane_dep_vec_on_load(float* %bar, float** %ptr, i64 %inc, <4 x float>* %dep_ptr_1, <4 x float>* %dep_ptr_2) {
+; CHECK-LABEL: test_v4f32_post_reg_ld1lane_dep_vec_on_load:
+; CHECK: BB#0:
+; CHECK-NEXT: ldr s[[LD:[0-9]+]], [x0]
+; CHECK-NEXT: movi.2d v0, #0000000000000000
+; CHECK-NEXT: str q0, [x3]
+; CHECK-NEXT: ldr q0, [x4]
+; CHECK-NEXT: ins.s v0[1], v[[LD]][0]
+; CHECK-NEXT: add [[POST:x[0-9]]], x0, x2, lsl #2
+; CHECK-NEXT: str [[POST]], [x1]
+; CHECK-NEXT: ret
+ %tmp1 = load float, float* %bar
+ store <4 x float> zeroinitializer, <4 x float>* %dep_ptr_1, align 16
+ %A = load <4 x float>, <4 x float>* %dep_ptr_2, align 16
+ %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
+ %tmp3 = getelementptr float, float* %bar, i64 %inc
+ store float* %tmp3, float** %ptr
+ ret <4 x float> %tmp2
+}
+
+; Make sure that we test the narrow V64 code path.
+; The tests above don't, because there, 64-bit insert_vector_elt nodes will be
+; widened to 128-bit before the LD1LANEpost combine has the chance to run,
+; making it avoid narrow vector types.
+; One way to trick that combine into running early is to force the vector ops
+; legalizer to run. We achieve that using the ctpop.
+; PR23265
+define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(i16* %bar, i16** %ptr, i64 %inc, <4 x i16> %A, <2 x i32>* %d) {
+; CHECK-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow:
+; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
+ %tmp1 = load i16, i16* %bar
+ %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
+ %tmp3 = getelementptr i16, i16* %bar, i64 %inc
+ store i16* %tmp3, i16** %ptr
+ %dl = load <2 x i32>, <2 x i32>* %d
+ %dr = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %dl)
+ store <2 x i32> %dr, <2 x i32>* %d
+ ret <4 x i16> %tmp2
+}
+
+declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
diff --git a/test/CodeGen/AArch64/arm64-inline-asm.ll b/test/CodeGen/AArch64/arm64-inline-asm.ll
index 9c8bcaadc17c..802d95826ce4 100644
--- a/test/CodeGen/AArch64/arm64-inline-asm.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm.ll
@@ -125,7 +125,7 @@ define void @t9() nounwind {
entry:
; CHECK-LABEL: t9:
%data = alloca <2 x double>, align 16
- %0 = load <2 x double>* %data, align 16
+ %0 = load <2 x double>, <2 x double>* %data, align 16
call void asm sideeffect "mov.2d v4, $0\0A", "w,~{v4}"(<2 x double> %0) nounwind
; CHECK: mov.2d v4, {{v[0-9]+}}
ret void
@@ -136,8 +136,8 @@ entry:
; CHECK-LABEL: t10:
%data = alloca <2 x float>, align 8
%a = alloca [2 x float], align 4
- %arraydecay = getelementptr inbounds [2 x float]* %a, i32 0, i32 0
- %0 = load <2 x float>* %data, align 8
+ %arraydecay = getelementptr inbounds [2 x float], [2 x float]* %a, i32 0, i32 0
+ %0 = load <2 x float>, <2 x float>* %data, align 8
call void asm sideeffect "ldr ${1:q}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}]
call void asm sideeffect "ldr ${1:d}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
@@ -155,10 +155,10 @@ define void @t11() nounwind {
entry:
; CHECK-LABEL: t11:
%a = alloca i32, align 4
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
call void asm sideeffect "mov ${1:x}, ${0:x}\0A", "r,i"(i32 %0, i32 0) nounwind
; CHECK: mov xzr, {{x[0-9]+}}
- %1 = load i32* %a, align 4
+ %1 = load i32, i32* %a, align 4
call void asm sideeffect "mov ${1:w}, ${0:w}\0A", "r,i"(i32 %1, i32 0) nounwind
; CHECK: mov wzr, {{w[0-9]+}}
ret void
@@ -168,9 +168,9 @@ define void @t12() nounwind {
entry:
; CHECK-LABEL: t12:
%data = alloca <4 x float>, align 16
- %0 = load <4 x float>* %data, align 16
+ %0 = load <4 x float>, <4 x float>* %data, align 16
call void asm sideeffect "mov.2d v4, $0\0A", "x,~{v4}"(<4 x float> %0) nounwind
- ; CHECK mov.2d v4, {{v([0-9])|(1[0-5])}}
+ ; CHECK: mov.2d v4, {{v([0-9])|(1[0-5])}}
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-join-reserved.ll b/test/CodeGen/AArch64/arm64-join-reserved.ll
index e99168b5eba3..dee034483541 100644
--- a/test/CodeGen/AArch64/arm64-join-reserved.ll
+++ b/test/CodeGen/AArch64/arm64-join-reserved.ll
@@ -10,7 +10,7 @@ target triple = "arm64-apple-macosx10"
; CHECK: ret
define void @g() nounwind ssp {
entry:
- tail call void (i32, ...)* @f(i32 0, i32 0) nounwind
+ tail call void (i32, ...) @f(i32 0, i32 0) nounwind
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-large-frame.ll b/test/CodeGen/AArch64/arm64-large-frame.ll
index 5a53da693882..c4cce36bcb74 100644
--- a/test/CodeGen/AArch64/arm64-large-frame.ll
+++ b/test/CodeGen/AArch64/arm64-large-frame.ll
@@ -23,7 +23,7 @@ define void @test_bigframe() {
; CHECK: add {{x[0-9]+}}, [[TMP1]], #3344
store volatile i8* %var1, i8** @addr
- %var1plus2 = getelementptr i8* %var1, i32 2
+ %var1plus2 = getelementptr i8, i8* %var1, i32 2
store volatile i8* %var1plus2, i8** @addr
; CHECK: add [[TMP:x[0-9]+]], sp, #4095, lsl #12
@@ -31,12 +31,12 @@ define void @test_bigframe() {
; CHECK: add {{x[0-9]+}}, [[TMP1]], #3328
store volatile i8* %var2, i8** @addr
- %var2plus2 = getelementptr i8* %var2, i32 2
+ %var2plus2 = getelementptr i8, i8* %var2, i32 2
store volatile i8* %var2plus2, i8** @addr
store volatile i8* %var3, i8** @addr
- %var3plus2 = getelementptr i8* %var3, i32 2
+ %var3plus2 = getelementptr i8, i8* %var3, i32 2
store volatile i8* %var3plus2, i8** @addr
; CHECK: add sp, sp, #4095, lsl #12
diff --git a/test/CodeGen/AArch64/arm64-ld1.ll b/test/CodeGen/AArch64/arm64-ld1.ll
index 72d808ccc347..a83a2703addc 100644
--- a/test/CodeGen/AArch64/arm64-ld1.ll
+++ b/test/CodeGen/AArch64/arm64-ld1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs -asm-verbose=false | FileCheck %s
%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
@@ -8,8 +8,8 @@ define %struct.__neon_int8x8x2_t @ld2_8b(i8* %A) nounwind {
; CHECK-LABEL: ld2_8b
; Make sure we are loading into the results defined by the ABI (i.e., v0, v1)
; and from the argument of the function also defined by ABI (i.e., x0)
-; CHECK ld2.8b { v0, v1 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld2.8b { v0, v1 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
ret %struct.__neon_int8x8x2_t %tmp2
}
@@ -17,8 +17,8 @@ define %struct.__neon_int8x8x2_t @ld2_8b(i8* %A) nounwind {
define %struct.__neon_int8x8x3_t @ld3_8b(i8* %A) nounwind {
; CHECK-LABEL: ld3_8b
; Make sure we are using the operands defined by the ABI
-; CHECK ld3.8b { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld3.8b { v0, v1, v2 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
ret %struct.__neon_int8x8x3_t %tmp2
}
@@ -26,8 +26,8 @@ define %struct.__neon_int8x8x3_t @ld3_8b(i8* %A) nounwind {
define %struct.__neon_int8x8x4_t @ld4_8b(i8* %A) nounwind {
; CHECK-LABEL: ld4_8b
; Make sure we are using the operands defined by the ABI
-; CHECK ld4.8b { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld4.8b { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
ret %struct.__neon_int8x8x4_t %tmp2
}
@@ -43,8 +43,8 @@ declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0i8(i8*) nounwind
define %struct.__neon_int8x16x2_t @ld2_16b(i8* %A) nounwind {
; CHECK-LABEL: ld2_16b
; Make sure we are using the operands defined by the ABI
-; CHECK ld2.16b { v0, v1 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld2.16b { v0, v1 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
ret %struct.__neon_int8x16x2_t %tmp2
}
@@ -52,8 +52,8 @@ define %struct.__neon_int8x16x2_t @ld2_16b(i8* %A) nounwind {
define %struct.__neon_int8x16x3_t @ld3_16b(i8* %A) nounwind {
; CHECK-LABEL: ld3_16b
; Make sure we are using the operands defined by the ABI
-; CHECK ld3.16b { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld3.16b { v0, v1, v2 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
ret %struct.__neon_int8x16x3_t %tmp2
}
@@ -61,8 +61,8 @@ define %struct.__neon_int8x16x3_t @ld3_16b(i8* %A) nounwind {
define %struct.__neon_int8x16x4_t @ld4_16b(i8* %A) nounwind {
; CHECK-LABEL: ld4_16b
; Make sure we are using the operands defined by the ABI
-; CHECK ld4.16b { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld4.16b { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
ret %struct.__neon_int8x16x4_t %tmp2
}
@@ -78,8 +78,8 @@ declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*) nounwi
define %struct.__neon_int16x4x2_t @ld2_4h(i16* %A) nounwind {
; CHECK-LABEL: ld2_4h
; Make sure we are using the operands defined by the ABI
-; CHECK ld2.4h { v0, v1 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld2.4h { v0, v1 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
ret %struct.__neon_int16x4x2_t %tmp2
}
@@ -87,8 +87,8 @@ define %struct.__neon_int16x4x2_t @ld2_4h(i16* %A) nounwind {
define %struct.__neon_int16x4x3_t @ld3_4h(i16* %A) nounwind {
; CHECK-LABEL: ld3_4h
; Make sure we are using the operands defined by the ABI
-; CHECK ld3.4h { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld3.4h { v0, v1, v2 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
ret %struct.__neon_int16x4x3_t %tmp2
}
@@ -96,8 +96,8 @@ define %struct.__neon_int16x4x3_t @ld3_4h(i16* %A) nounwind {
define %struct.__neon_int16x4x4_t @ld4_4h(i16* %A) nounwind {
; CHECK-LABEL: ld4_4h
; Make sure we are using the operands defined by the ABI
-; CHECK ld4.4h { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld4.4h { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
ret %struct.__neon_int16x4x4_t %tmp2
}
@@ -113,8 +113,8 @@ declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*) noun
define %struct.__neon_int16x8x2_t @ld2_8h(i16* %A) nounwind {
; CHECK-LABEL: ld2_8h
; Make sure we are using the operands defined by the ABI
-; CHECK ld2.8h { v0, v1 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld2.8h { v0, v1 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
ret %struct.__neon_int16x8x2_t %tmp2
}
@@ -122,8 +122,8 @@ define %struct.__neon_int16x8x2_t @ld2_8h(i16* %A) nounwind {
define %struct.__neon_int16x8x3_t @ld3_8h(i16* %A) nounwind {
; CHECK-LABEL: ld3_8h
; Make sure we are using the operands defined by the ABI
-; CHECK ld3.8h { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld3.8h { v0, v1, v2 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
ret %struct.__neon_int16x8x3_t %tmp2
}
@@ -131,8 +131,8 @@ define %struct.__neon_int16x8x3_t @ld3_8h(i16* %A) nounwind {
define %struct.__neon_int16x8x4_t @ld4_8h(i16* %A) nounwind {
; CHECK-LABEL: ld4_8h
; Make sure we are using the operands defined by the ABI
-; CHECK ld4.8h { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld4.8h { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
ret %struct.__neon_int16x8x4_t %tmp2
}
@@ -148,8 +148,8 @@ declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0i16(i16*) noun
define %struct.__neon_int32x2x2_t @ld2_2s(i32* %A) nounwind {
; CHECK-LABEL: ld2_2s
; Make sure we are using the operands defined by the ABI
-; CHECK ld2.2s { v0, v1 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld2.2s { v0, v1 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
ret %struct.__neon_int32x2x2_t %tmp2
}
@@ -157,8 +157,8 @@ define %struct.__neon_int32x2x2_t @ld2_2s(i32* %A) nounwind {
define %struct.__neon_int32x2x3_t @ld3_2s(i32* %A) nounwind {
; CHECK-LABEL: ld3_2s
; Make sure we are using the operands defined by the ABI
-; CHECK ld3.2s { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld3.2s { v0, v1, v2 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
ret %struct.__neon_int32x2x3_t %tmp2
}
@@ -166,8 +166,8 @@ define %struct.__neon_int32x2x3_t @ld3_2s(i32* %A) nounwind {
define %struct.__neon_int32x2x4_t @ld4_2s(i32* %A) nounwind {
; CHECK-LABEL: ld4_2s
; Make sure we are using the operands defined by the ABI
-; CHECK ld4.2s { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld4.2s { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
ret %struct.__neon_int32x2x4_t %tmp2
}
@@ -183,8 +183,8 @@ declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0i32(i32*) noun
define %struct.__neon_int32x4x2_t @ld2_4s(i32* %A) nounwind {
; CHECK-LABEL: ld2_4s
; Make sure we are using the operands defined by the ABI
-; CHECK ld2.4s { v0, v1 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld2.4s { v0, v1 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
ret %struct.__neon_int32x4x2_t %tmp2
}
@@ -192,8 +192,8 @@ define %struct.__neon_int32x4x2_t @ld2_4s(i32* %A) nounwind {
define %struct.__neon_int32x4x3_t @ld3_4s(i32* %A) nounwind {
; CHECK-LABEL: ld3_4s
; Make sure we are using the operands defined by the ABI
-; CHECK ld3.4s { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld3.4s { v0, v1, v2 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
ret %struct.__neon_int32x4x3_t %tmp2
}
@@ -201,8 +201,8 @@ define %struct.__neon_int32x4x3_t @ld3_4s(i32* %A) nounwind {
define %struct.__neon_int32x4x4_t @ld4_4s(i32* %A) nounwind {
; CHECK-LABEL: ld4_4s
; Make sure we are using the operands defined by the ABI
-; CHECK ld4.4s { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld4.4s { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
ret %struct.__neon_int32x4x4_t %tmp2
}
@@ -218,8 +218,8 @@ declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0i32(i32*) noun
define %struct.__neon_int64x2x2_t @ld2_2d(i64* %A) nounwind {
; CHECK-LABEL: ld2_2d
; Make sure we are using the operands defined by the ABI
-; CHECK ld2.2d { v0, v1 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld2.2d { v0, v1 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
ret %struct.__neon_int64x2x2_t %tmp2
}
@@ -227,8 +227,8 @@ define %struct.__neon_int64x2x2_t @ld2_2d(i64* %A) nounwind {
define %struct.__neon_int64x2x3_t @ld3_2d(i64* %A) nounwind {
; CHECK-LABEL: ld3_2d
; Make sure we are using the operands defined by the ABI
-; CHECK ld3.2d { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld3.2d { v0, v1, v2 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
ret %struct.__neon_int64x2x3_t %tmp2
}
@@ -236,8 +236,8 @@ define %struct.__neon_int64x2x3_t @ld3_2d(i64* %A) nounwind {
define %struct.__neon_int64x2x4_t @ld4_2d(i64* %A) nounwind {
; CHECK-LABEL: ld4_2d
; Make sure we are using the operands defined by the ABI
-; CHECK ld4.2d { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld4.2d { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
ret %struct.__neon_int64x2x4_t %tmp2
}
@@ -254,8 +254,8 @@ declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0i64(i64*) noun
define %struct.__neon_int64x1x2_t @ld2_1di64(i64* %A) nounwind {
; CHECK-LABEL: ld2_1di64
; Make sure we are using the operands defined by the ABI
-; CHECK ld1.1d { v0, v1 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld1.1d { v0, v1 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
ret %struct.__neon_int64x1x2_t %tmp2
}
@@ -263,8 +263,8 @@ define %struct.__neon_int64x1x2_t @ld2_1di64(i64* %A) nounwind {
define %struct.__neon_int64x1x3_t @ld3_1di64(i64* %A) nounwind {
; CHECK-LABEL: ld3_1di64
; Make sure we are using the operands defined by the ABI
-; CHECK ld1.1d { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld1.1d { v0, v1, v2 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
ret %struct.__neon_int64x1x3_t %tmp2
}
@@ -272,8 +272,8 @@ define %struct.__neon_int64x1x3_t @ld3_1di64(i64* %A) nounwind {
define %struct.__neon_int64x1x4_t @ld4_1di64(i64* %A) nounwind {
; CHECK-LABEL: ld4_1di64
; Make sure we are using the operands defined by the ABI
-; CHECK ld1.1d { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld1.1d { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
ret %struct.__neon_int64x1x4_t %tmp2
}
@@ -291,8 +291,8 @@ declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0i64(i64*) noun
define %struct.__neon_float64x1x2_t @ld2_1df64(double* %A) nounwind {
; CHECK-LABEL: ld2_1df64
; Make sure we are using the operands defined by the ABI
-; CHECK ld1.1d { v0, v1 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld1.1d { v0, v1 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
ret %struct.__neon_float64x1x2_t %tmp2
}
@@ -300,8 +300,8 @@ define %struct.__neon_float64x1x2_t @ld2_1df64(double* %A) nounwind {
define %struct.__neon_float64x1x3_t @ld3_1df64(double* %A) nounwind {
; CHECK-LABEL: ld3_1df64
; Make sure we are using the operands defined by the ABI
-; CHECK ld1.1d { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld1.1d { v0, v1, v2 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
ret %struct.__neon_float64x1x3_t %tmp2
}
@@ -309,8 +309,8 @@ define %struct.__neon_float64x1x3_t @ld3_1df64(double* %A) nounwind {
define %struct.__neon_float64x1x4_t @ld4_1df64(double* %A) nounwind {
; CHECK-LABEL: ld4_1df64
; Make sure we are using the operands defined by the ABI
-; CHECK ld1.1d { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld1.1d { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
ret %struct.__neon_float64x1x4_t %tmp2
}
@@ -323,8 +323,8 @@ declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0f64(double*)
define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, i8* %A) nounwind {
; Make sure we are using the operands defined by the ABI
; CHECK: ld2lane_16b
-; CHECK ld2.b { v0, v1 }[1], [x0]
-; CHECK-NEXT ret
+; CHECK: ld2.b { v0, v1 }[1], [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, i64 1, i8* %A)
ret %struct.__neon_int8x16x2_t %tmp2
}
@@ -332,8 +332,8 @@ define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, i8*
define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i8* %A) nounwind {
; Make sure we are using the operands defined by the ABI
; CHECK: ld3lane_16b
-; CHECK ld3.b { v0, v1, v2 }[1], [x0]
-; CHECK-NEXT ret
+; CHECK: ld3.b { v0, v1, v2 }[1], [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, i8* %A)
ret %struct.__neon_int8x16x3_t %tmp2
}
@@ -341,8 +341,8 @@ define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16
define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i8* %A) nounwind {
; Make sure we are using the operands defined by the ABI
; CHECK: ld4lane_16b
-; CHECK ld4.b { v0, v1, v2, v3 }[1], [x0]
-; CHECK-NEXT ret
+; CHECK: ld4.b { v0, v1, v2, v3 }[1], [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, i8* %A)
ret %struct.__neon_int8x16x4_t %tmp2
}
@@ -354,8 +354,8 @@ declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i
define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, i16* %A) nounwind {
; Make sure we are using the operands defined by the ABI
; CHECK: ld2lane_8h
-; CHECK ld2.h { v0, v1 }[1], [x0]
-; CHECK-NEXT ret
+; CHECK: ld2.h { v0, v1 }[1], [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, i64 1, i16* %A)
ret %struct.__neon_int16x8x2_t %tmp2
}
@@ -363,8 +363,8 @@ define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, i16*
define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i16* %A) nounwind {
; Make sure we are using the operands defined by the ABI
; CHECK: ld3lane_8h
-; CHECK ld3.h { v0, v1, v3 }[1], [x0]
-; CHECK-NEXT ret
+; CHECK: ld3.h { v0, v1, v2 }[1], [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, i16* %A)
ret %struct.__neon_int16x8x3_t %tmp2
}
@@ -372,8 +372,8 @@ define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x
define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i16* %A) nounwind {
; Make sure we are using the operands defined by the ABI
; CHECK: ld4lane_8h
-; CHECK ld4.h { v0, v1, v2, v3 }[1], [x0]
-; CHECK-NEXT ret
+; CHECK: ld4.h { v0, v1, v2, v3 }[1], [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, i16* %A)
ret %struct.__neon_int16x8x4_t %tmp2
}
@@ -385,8 +385,8 @@ declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i
define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, i32* %A) nounwind {
; Make sure we are using the operands defined by the ABI
; CHECK: ld2lane_4s
-; CHECK ld2.s { v0, v1 }[1], [x0]
-; CHECK-NEXT ret
+; CHECK: ld2.s { v0, v1 }[1], [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, i64 1, i32* %A)
ret %struct.__neon_int32x4x2_t %tmp2
}
@@ -394,8 +394,8 @@ define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, i32*
define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i32* %A) nounwind {
; Make sure we are using the operands defined by the ABI
; CHECK: ld3lane_4s
-; CHECK ld3.s { v0, v1, v2 }[1], [x0]
-; CHECK-NEXT ret
+; CHECK: ld3.s { v0, v1, v2 }[1], [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, i32* %A)
ret %struct.__neon_int32x4x3_t %tmp2
}
@@ -403,8 +403,8 @@ define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x
define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i32* %A) nounwind {
; Make sure we are using the operands defined by the ABI
; CHECK: ld4lane_4s
-; CHECK ld4.s { v0, v1, v2, v3 }[1], [x0]
-; CHECK-NEXT ret
+; CHECK: ld4.s { v0, v1, v2, v3 }[1], [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, i32* %A)
ret %struct.__neon_int32x4x4_t %tmp2
}
@@ -416,8 +416,8 @@ declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i
define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, i64* %A) nounwind {
; Make sure we are using the operands defined by the ABI
; CHECK: ld2lane_2d
-; CHECK ld2.d { v0, v1 }[1], [x0]
-; CHECK-NEXT ret
+; CHECK: ld2.d { v0, v1 }[1], [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, i64 1, i64* %A)
ret %struct.__neon_int64x2x2_t %tmp2
}
@@ -425,8 +425,8 @@ define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, i64*
define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64* %A) nounwind {
; Make sure we are using the operands defined by the ABI
; CHECK: ld3lane_2d
-; CHECK ld3.d { v0, v1, v3 }[1], [x0]
-; CHECK-NEXT ret
+; CHECK: ld3.d { v0, v1, v2 }[1], [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, i64* %A)
ret %struct.__neon_int64x2x3_t %tmp2
}
@@ -434,8 +434,8 @@ define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x
define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64* %A) nounwind {
; Make sure we are using the operands defined by the ABI
; CHECK: ld4lane_2d
-; CHECK ld4.d { v0, v1, v2, v3 }[1], [x0]
-; CHECK-NEXT ret
+; CHECK: ld4.d { v0, v1, v2, v3 }[1], [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, i64* %A)
ret %struct.__neon_int64x2x4_t %tmp2
}
@@ -448,8 +448,8 @@ define <8 x i8> @ld1r_8b(i8* %bar) {
; CHECK: ld1r_8b
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.8b { v0 }, [x0]
-; CHECK-NEXT ret
- %tmp1 = load i8* %bar
+; CHECK-NEXT: ret
+ %tmp1 = load i8, i8* %bar
%tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
%tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
%tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
@@ -465,8 +465,8 @@ define <16 x i8> @ld1r_16b(i8* %bar) {
; CHECK: ld1r_16b
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.16b { v0 }, [x0]
-; CHECK-NEXT ret
- %tmp1 = load i8* %bar
+; CHECK-NEXT: ret
+ %tmp1 = load i8, i8* %bar
%tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
%tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
%tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
@@ -490,8 +490,8 @@ define <4 x i16> @ld1r_4h(i16* %bar) {
; CHECK: ld1r_4h
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.4h { v0 }, [x0]
-; CHECK-NEXT ret
- %tmp1 = load i16* %bar
+; CHECK-NEXT: ret
+ %tmp1 = load i16, i16* %bar
%tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
%tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
%tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
@@ -503,8 +503,8 @@ define <8 x i16> @ld1r_8h(i16* %bar) {
; CHECK: ld1r_8h
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.8h { v0 }, [x0]
-; CHECK-NEXT ret
- %tmp1 = load i16* %bar
+; CHECK-NEXT: ret
+ %tmp1 = load i16, i16* %bar
%tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
%tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
%tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
@@ -520,8 +520,8 @@ define <2 x i32> @ld1r_2s(i32* %bar) {
; CHECK: ld1r_2s
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.2s { v0 }, [x0]
-; CHECK-NEXT ret
- %tmp1 = load i32* %bar
+; CHECK-NEXT: ret
+ %tmp1 = load i32, i32* %bar
%tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
%tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
ret <2 x i32> %tmp3
@@ -531,8 +531,8 @@ define <4 x i32> @ld1r_4s(i32* %bar) {
; CHECK: ld1r_4s
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.4s { v0 }, [x0]
-; CHECK-NEXT ret
- %tmp1 = load i32* %bar
+; CHECK-NEXT: ret
+ %tmp1 = load i32, i32* %bar
%tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
%tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
%tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
@@ -544,8 +544,8 @@ define <2 x i64> @ld1r_2d(i64* %bar) {
; CHECK: ld1r_2d
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.2d { v0 }, [x0]
-; CHECK-NEXT ret
- %tmp1 = load i64* %bar
+; CHECK-NEXT: ret
+ %tmp1 = load i64, i64* %bar
%tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
%tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
ret <2 x i64> %tmp3
@@ -554,8 +554,8 @@ define <2 x i64> @ld1r_2d(i64* %bar) {
define %struct.__neon_int8x8x2_t @ld2r_8b(i8* %A) nounwind {
; CHECK: ld2r_8b
; Make sure we are using the operands defined by the ABI
-; CHECK ld2r.8b { v0, v1 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld2r.8b { v0, v1 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
ret %struct.__neon_int8x8x2_t %tmp2
}
@@ -563,8 +563,8 @@ define %struct.__neon_int8x8x2_t @ld2r_8b(i8* %A) nounwind {
define %struct.__neon_int8x8x3_t @ld3r_8b(i8* %A) nounwind {
; CHECK: ld3r_8b
; Make sure we are using the operands defined by the ABI
-; CHECK ld3r.8b { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld3r.8b { v0, v1, v2 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
ret %struct.__neon_int8x8x3_t %tmp2
}
@@ -572,8 +572,8 @@ define %struct.__neon_int8x8x3_t @ld3r_8b(i8* %A) nounwind {
define %struct.__neon_int8x8x4_t @ld4r_8b(i8* %A) nounwind {
; CHECK: ld4r_8b
; Make sure we are using the operands defined by the ABI
-; CHECK ld4r.8b { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
ret %struct.__neon_int8x8x4_t %tmp2
}
@@ -585,8 +585,8 @@ declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8*) nounwin
define %struct.__neon_int8x16x2_t @ld2r_16b(i8* %A) nounwind {
; CHECK: ld2r_16b
; Make sure we are using the operands defined by the ABI
-; CHECK ld2r.16b { v0, v1 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld2r.16b { v0, v1 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
ret %struct.__neon_int8x16x2_t %tmp2
}
@@ -594,8 +594,8 @@ define %struct.__neon_int8x16x2_t @ld2r_16b(i8* %A) nounwind {
define %struct.__neon_int8x16x3_t @ld3r_16b(i8* %A) nounwind {
; CHECK: ld3r_16b
; Make sure we are using the operands defined by the ABI
-; CHECK ld3r.16b { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld3r.16b { v0, v1, v2 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
ret %struct.__neon_int8x16x3_t %tmp2
}
@@ -603,8 +603,8 @@ define %struct.__neon_int8x16x3_t @ld3r_16b(i8* %A) nounwind {
define %struct.__neon_int8x16x4_t @ld4r_16b(i8* %A) nounwind {
; CHECK: ld4r_16b
; Make sure we are using the operands defined by the ABI
-; CHECK ld4r.16b { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
ret %struct.__neon_int8x16x4_t %tmp2
}
@@ -616,8 +616,8 @@ declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8*) nounw
define %struct.__neon_int16x4x2_t @ld2r_4h(i16* %A) nounwind {
; CHECK: ld2r_4h
; Make sure we are using the operands defined by the ABI
-; CHECK ld2r.4h { v0, v1 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld2r.4h { v0, v1 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
ret %struct.__neon_int16x4x2_t %tmp2
}
@@ -625,8 +625,8 @@ define %struct.__neon_int16x4x2_t @ld2r_4h(i16* %A) nounwind {
define %struct.__neon_int16x4x3_t @ld3r_4h(i16* %A) nounwind {
; CHECK: ld3r_4h
; Make sure we are using the operands defined by the ABI
-; CHECK ld3r.4h { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld3r.4h { v0, v1, v2 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
ret %struct.__neon_int16x4x3_t %tmp2
}
@@ -634,8 +634,8 @@ define %struct.__neon_int16x4x3_t @ld3r_4h(i16* %A) nounwind {
define %struct.__neon_int16x4x4_t @ld4r_4h(i16* %A) nounwind {
; CHECK: ld4r_4h
; Make sure we are using the operands defined by the ABI
-; CHECK ld4r.4h { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
ret %struct.__neon_int16x4x4_t %tmp2
}
@@ -647,8 +647,8 @@ declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16*) nou
define %struct.__neon_int16x8x2_t @ld2r_8h(i16* %A) nounwind {
; CHECK: ld2r_8h
; Make sure we are using the operands defined by the ABI
-; CHECK ld2r.8h { v0, v1 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld2r.8h { v0, v1 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
ret %struct.__neon_int16x8x2_t %tmp2
}
@@ -656,8 +656,8 @@ define %struct.__neon_int16x8x2_t @ld2r_8h(i16* %A) nounwind {
define %struct.__neon_int16x8x3_t @ld3r_8h(i16* %A) nounwind {
; CHECK: ld3r_8h
; Make sure we are using the operands defined by the ABI
-; CHECK ld3r.8h { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld3r.8h { v0, v1, v2 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
ret %struct.__neon_int16x8x3_t %tmp2
}
@@ -665,8 +665,8 @@ define %struct.__neon_int16x8x3_t @ld3r_8h(i16* %A) nounwind {
define %struct.__neon_int16x8x4_t @ld4r_8h(i16* %A) nounwind {
; CHECK: ld4r_8h
; Make sure we are using the operands defined by the ABI
-; CHECK ld4r.8h { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
ret %struct.__neon_int16x8x4_t %tmp2
}
@@ -678,8 +678,8 @@ declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16*) nou
define %struct.__neon_int32x2x2_t @ld2r_2s(i32* %A) nounwind {
; CHECK: ld2r_2s
; Make sure we are using the operands defined by the ABI
-; CHECK ld2r.2s { v0, v1 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld2r.2s { v0, v1 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
ret %struct.__neon_int32x2x2_t %tmp2
}
@@ -687,8 +687,8 @@ define %struct.__neon_int32x2x2_t @ld2r_2s(i32* %A) nounwind {
define %struct.__neon_int32x2x3_t @ld3r_2s(i32* %A) nounwind {
; CHECK: ld3r_2s
; Make sure we are using the operands defined by the ABI
-; CHECK ld3r.2s { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld3r.2s { v0, v1, v2 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
ret %struct.__neon_int32x2x3_t %tmp2
}
@@ -696,8 +696,8 @@ define %struct.__neon_int32x2x3_t @ld3r_2s(i32* %A) nounwind {
define %struct.__neon_int32x2x4_t @ld4r_2s(i32* %A) nounwind {
; CHECK: ld4r_2s
; Make sure we are using the operands defined by the ABI
-; CHECK ld4r.2s { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
ret %struct.__neon_int32x2x4_t %tmp2
}
@@ -709,8 +709,8 @@ declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32*) nou
define %struct.__neon_int32x4x2_t @ld2r_4s(i32* %A) nounwind {
; CHECK: ld2r_4s
; Make sure we are using the operands defined by the ABI
-; CHECK ld2r.4s { v0, v1 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld2r.4s { v0, v1 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
ret %struct.__neon_int32x4x2_t %tmp2
}
@@ -718,8 +718,8 @@ define %struct.__neon_int32x4x2_t @ld2r_4s(i32* %A) nounwind {
define %struct.__neon_int32x4x3_t @ld3r_4s(i32* %A) nounwind {
; CHECK: ld3r_4s
; Make sure we are using the operands defined by the ABI
-; CHECK ld3r.4s { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld3r.4s { v0, v1, v2 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
ret %struct.__neon_int32x4x3_t %tmp2
}
@@ -727,8 +727,8 @@ define %struct.__neon_int32x4x3_t @ld3r_4s(i32* %A) nounwind {
define %struct.__neon_int32x4x4_t @ld4r_4s(i32* %A) nounwind {
; CHECK: ld4r_4s
; Make sure we are using the operands defined by the ABI
-; CHECK ld4r.4s { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
ret %struct.__neon_int32x4x4_t %tmp2
}
@@ -740,8 +740,8 @@ declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32*) nou
define %struct.__neon_int64x1x2_t @ld2r_1d(i64* %A) nounwind {
; CHECK: ld2r_1d
; Make sure we are using the operands defined by the ABI
-; CHECK ld2r.1d { v0, v1 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld2r.1d { v0, v1 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
ret %struct.__neon_int64x1x2_t %tmp2
}
@@ -749,8 +749,8 @@ define %struct.__neon_int64x1x2_t @ld2r_1d(i64* %A) nounwind {
define %struct.__neon_int64x1x3_t @ld3r_1d(i64* %A) nounwind {
; CHECK: ld3r_1d
; Make sure we are using the operands defined by the ABI
-; CHECK ld3r.1d { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld3r.1d { v0, v1, v2 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
ret %struct.__neon_int64x1x3_t %tmp2
}
@@ -758,8 +758,8 @@ define %struct.__neon_int64x1x3_t @ld3r_1d(i64* %A) nounwind {
define %struct.__neon_int64x1x4_t @ld4r_1d(i64* %A) nounwind {
; CHECK: ld4r_1d
; Make sure we are using the operands defined by the ABI
-; CHECK ld4r.1d { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
ret %struct.__neon_int64x1x4_t %tmp2
}
@@ -771,8 +771,8 @@ declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64*) nou
define %struct.__neon_int64x2x2_t @ld2r_2d(i64* %A) nounwind {
; CHECK: ld2r_2d
; Make sure we are using the operands defined by the ABI
-; CHECK ld2r.2d { v0, v1 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld2r.2d { v0, v1 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
ret %struct.__neon_int64x2x2_t %tmp2
}
@@ -780,8 +780,8 @@ define %struct.__neon_int64x2x2_t @ld2r_2d(i64* %A) nounwind {
define %struct.__neon_int64x2x3_t @ld3r_2d(i64* %A) nounwind {
; CHECK: ld3r_2d
; Make sure we are using the operands defined by the ABI
-; CHECK ld3r.2d { v0, v1, v2 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld3r.2d { v0, v1, v2 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
ret %struct.__neon_int64x2x3_t %tmp2
}
@@ -789,8 +789,8 @@ define %struct.__neon_int64x2x3_t @ld3r_2d(i64* %A) nounwind {
define %struct.__neon_int64x2x4_t @ld4r_2d(i64* %A) nounwind {
; CHECK: ld4r_2d
; Make sure we are using the operands defined by the ABI
-; CHECK ld4r.2d { v0, v1, v2, v3 }, [x0]
-; CHECK-NEXT ret
+; CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0]
+; CHECK-NEXT: ret
%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
ret %struct.__neon_int64x2x4_t %tmp2
}
@@ -803,8 +803,8 @@ define <16 x i8> @ld1_16b(<16 x i8> %V, i8* %bar) {
; CHECK-LABEL: ld1_16b
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.b { v0 }[0], [x0]
-; CHECK-NEXT ret
- %tmp1 = load i8* %bar
+; CHECK-NEXT: ret
+ %tmp1 = load i8, i8* %bar
%tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0
ret <16 x i8> %tmp2
}
@@ -813,8 +813,8 @@ define <8 x i16> @ld1_8h(<8 x i16> %V, i16* %bar) {
; CHECK-LABEL: ld1_8h
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.h { v0 }[0], [x0]
-; CHECK-NEXT ret
- %tmp1 = load i16* %bar
+; CHECK-NEXT: ret
+ %tmp1 = load i16, i16* %bar
%tmp2 = insertelement <8 x i16> %V, i16 %tmp1, i32 0
ret <8 x i16> %tmp2
}
@@ -823,8 +823,8 @@ define <4 x i32> @ld1_4s(<4 x i32> %V, i32* %bar) {
; CHECK-LABEL: ld1_4s
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.s { v0 }[0], [x0]
-; CHECK-NEXT ret
- %tmp1 = load i32* %bar
+; CHECK-NEXT: ret
+ %tmp1 = load i32, i32* %bar
%tmp2 = insertelement <4 x i32> %V, i32 %tmp1, i32 0
ret <4 x i32> %tmp2
}
@@ -833,8 +833,8 @@ define <4 x float> @ld1_4s_float(<4 x float> %V, float* %bar) {
; CHECK-LABEL: ld1_4s_float:
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.s { v0 }[0], [x0]
-; CHECK-NEXT ret
- %tmp1 = load float* %bar
+; CHECK-NEXT: ret
+ %tmp1 = load float, float* %bar
%tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0
ret <4 x float> %tmp2
}
@@ -843,8 +843,8 @@ define <2 x i64> @ld1_2d(<2 x i64> %V, i64* %bar) {
; CHECK-LABEL: ld1_2d
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.d { v0 }[0], [x0]
-; CHECK-NEXT ret
- %tmp1 = load i64* %bar
+; CHECK-NEXT: ret
+ %tmp1 = load i64, i64* %bar
%tmp2 = insertelement <2 x i64> %V, i64 %tmp1, i32 0
ret <2 x i64> %tmp2
}
@@ -853,8 +853,8 @@ define <2 x double> @ld1_2d_double(<2 x double> %V, double* %bar) {
; CHECK-LABEL: ld1_2d_double:
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.d { v0 }[0], [x0]
-; CHECK-NEXT ret
- %tmp1 = load double* %bar
+; CHECK-NEXT: ret
+ %tmp1 = load double, double* %bar
%tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0
ret <2 x double> %tmp2
}
@@ -864,7 +864,7 @@ define <1 x i64> @ld1_1d(<1 x i64>* %p) {
; Make sure we are using the operands defined by the ABI
; CHECK: ldr [[REG:d[0-9]+]], [x0]
; CHECK-NEXT: ret
- %tmp = load <1 x i64>* %p, align 8
+ %tmp = load <1 x i64>, <1 x i64>* %p, align 8
ret <1 x i64> %tmp
}
@@ -872,8 +872,8 @@ define <8 x i8> @ld1_8b(<8 x i8> %V, i8* %bar) {
; CHECK-LABEL: ld1_8b
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.b { v0 }[0], [x0]
-; CHECK-NEXT ret
- %tmp1 = load i8* %bar
+; CHECK-NEXT: ret
+ %tmp1 = load i8, i8* %bar
%tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0
ret <8 x i8> %tmp2
}
@@ -882,8 +882,8 @@ define <4 x i16> @ld1_4h(<4 x i16> %V, i16* %bar) {
; CHECK-LABEL: ld1_4h
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.h { v0 }[0], [x0]
-; CHECK-NEXT ret
- %tmp1 = load i16* %bar
+; CHECK-NEXT: ret
+ %tmp1 = load i16, i16* %bar
%tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0
ret <4 x i16> %tmp2
}
@@ -892,8 +892,8 @@ define <2 x i32> @ld1_2s(<2 x i32> %V, i32* %bar) {
; CHECK-LABEL: ld1_2s:
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.s { v0 }[0], [x0]
-; CHECK-NEXT ret
- %tmp1 = load i32* %bar
+; CHECK-NEXT: ret
+ %tmp1 = load i32, i32* %bar
%tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0
ret <2 x i32> %tmp2
}
@@ -902,8 +902,8 @@ define <2 x float> @ld1_2s_float(<2 x float> %V, float* %bar) {
; CHECK-LABEL: ld1_2s_float:
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.s { v0 }[0], [x0]
-; CHECK-NEXT ret
- %tmp1 = load float* %bar
+; CHECK-NEXT: ret
+ %tmp1 = load float, float* %bar
%tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0
ret <2 x float> %tmp2
}
@@ -919,12 +919,12 @@ entry:
; CHECK-NEXT: str d[[RESREGNUM]], [x2]
; CHECK-NEXT: ret
%tmp = bitcast i8* %a to i32*
- %tmp1 = load i32* %tmp, align 4
+ %tmp1 = load i32, i32* %tmp, align 4
%tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
%lane = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
%tmp3 = bitcast <2 x i32> %lane to <8 x i8>
%tmp4 = bitcast i8* %b to i32*
- %tmp5 = load i32* %tmp4, align 4
+ %tmp5 = load i32, i32* %tmp4, align 4
%tmp6 = insertelement <2 x i32> undef, i32 %tmp5, i32 0
%lane1 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32> zeroinitializer
%tmp7 = bitcast <2 x i32> %lane1 to <8 x i8>
@@ -945,8 +945,8 @@ entry:
; CHECK-LABEL: ld1r_4s_float
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.4s { v0 }, [x0]
-; CHECK-NEXT ret
- %tmp = load float* %x, align 4
+; CHECK-NEXT: ret
+ %tmp = load float, float* %x, align 4
%tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
%tmp2 = insertelement <4 x float> %tmp1, float %tmp, i32 1
%tmp3 = insertelement <4 x float> %tmp2, float %tmp, i32 2
@@ -959,8 +959,8 @@ entry:
; CHECK-LABEL: ld1r_2s_float
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.2s { v0 }, [x0]
-; CHECK-NEXT ret
- %tmp = load float* %x, align 4
+; CHECK-NEXT: ret
+ %tmp = load float, float* %x, align 4
%tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
%tmp2 = insertelement <2 x float> %tmp1, float %tmp, i32 1
ret <2 x float> %tmp2
@@ -971,8 +971,8 @@ entry:
; CHECK-LABEL: ld1r_2d_double
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.2d { v0 }, [x0]
-; CHECK-NEXT ret
- %tmp = load double* %x, align 4
+; CHECK-NEXT: ret
+ %tmp = load double, double* %x, align 4
%tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
%tmp2 = insertelement <2 x double> %tmp1, double %tmp, i32 1
ret <2 x double> %tmp2
@@ -983,8 +983,8 @@ entry:
; CHECK-LABEL: ld1r_1d_double
; Make sure we are using the operands defined by the ABI
; CHECK: ldr d0, [x0]
-; CHECK-NEXT ret
- %tmp = load double* %x, align 4
+; CHECK-NEXT: ret
+ %tmp = load double, double* %x, align 4
%tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
ret <1 x double> %tmp1
}
@@ -994,8 +994,8 @@ entry:
; CHECK-LABEL: ld1r_4s_float_shuff
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.4s { v0 }, [x0]
-; CHECK-NEXT ret
- %tmp = load float* %x, align 4
+; CHECK-NEXT: ret
+ %tmp = load float, float* %x, align 4
%tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
%lane = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %lane
@@ -1006,8 +1006,8 @@ entry:
; CHECK-LABEL: ld1r_2s_float_shuff
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.2s { v0 }, [x0]
-; CHECK-NEXT ret
- %tmp = load float* %x, align 4
+; CHECK-NEXT: ret
+ %tmp = load float, float* %x, align 4
%tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
%lane = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
ret <2 x float> %lane
@@ -1018,8 +1018,8 @@ entry:
; CHECK-LABEL: ld1r_2d_double_shuff
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.2d { v0 }, [x0]
-; CHECK-NEXT ret
- %tmp = load double* %x, align 4
+; CHECK-NEXT: ret
+ %tmp = load double, double* %x, align 4
%tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
%lane = shufflevector <2 x double> %tmp1, <2 x double> undef, <2 x i32> zeroinitializer
ret <2 x double> %lane
@@ -1030,8 +1030,8 @@ entry:
; CHECK-LABEL: ld1r_1d_double_shuff
; Make sure we are using the operands defined by the ABI
; CHECK: ldr d0, [x0]
-; CHECK-NEXT ret
- %tmp = load double* %x, align 4
+; CHECK-NEXT: ret
+ %tmp = load double, double* %x, align 4
%tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
%lane = shufflevector <1 x double> %tmp1, <1 x double> undef, <1 x i32> zeroinitializer
ret <1 x double> %lane
diff --git a/test/CodeGen/AArch64/arm64-ldp-aa.ll b/test/CodeGen/AArch64/arm64-ldp-aa.ll
new file mode 100644
index 000000000000..ad5c01cfe34e
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-ldp-aa.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -march=arm64 -enable-misched=false -verify-machineinstrs | FileCheck %s
+
+; The next set of tests makes sure we can combine the second instruction into
+; the first.
+
+; CHECK-LABEL: ldp_int_aa
+; CHECK: ldp w8, w9, [x1]
+; CHECK: str w0, [x1, #8]
+; CHECK: ret
+define i32 @ldp_int_aa(i32 %a, i32* %p) nounwind {
+ %tmp = load i32, i32* %p, align 4
+ %str.ptr = getelementptr inbounds i32, i32* %p, i64 2
+ store i32 %a, i32* %str.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ %tmp1 = load i32, i32* %add.ptr, align 4
+ %add = add nsw i32 %tmp1, %tmp
+ ret i32 %add
+}
+
+; CHECK-LABEL: ldp_long_aa
+; CHECK: ldp x8, x9, [x1]
+; CHECK: str x0, [x1, #16]
+; CHECK: ret
+define i64 @ldp_long_aa(i64 %a, i64* %p) nounwind {
+ %tmp = load i64, i64* %p, align 8
+ %str.ptr = getelementptr inbounds i64, i64* %p, i64 2
+ store i64 %a, i64* %str.ptr, align 4
+ %add.ptr = getelementptr inbounds i64, i64* %p, i64 1
+ %tmp1 = load i64, i64* %add.ptr, align 8
+ %add = add nsw i64 %tmp1, %tmp
+ ret i64 %add
+}
+
+; CHECK-LABEL: ldp_float_aa
+; CHECK: str s0, [x0, #8]
+; CHECK: ldp s1, s0, [x0]
+; CHECK: ret
+define float @ldp_float_aa(float %a, float* %p) nounwind {
+ %tmp = load float, float* %p, align 4
+ %str.ptr = getelementptr inbounds float, float* %p, i64 2
+ store float %a, float* %str.ptr, align 4
+ %add.ptr = getelementptr inbounds float, float* %p, i64 1
+ %tmp1 = load float, float* %add.ptr, align 4
+ %add = fadd float %tmp, %tmp1
+ ret float %add
+}
+
+; CHECK-LABEL: ldp_double_aa
+; CHECK: str d0, [x0, #16]
+; CHECK: ldp d1, d0, [x0]
+; CHECK: ret
+define double @ldp_double_aa(double %a, double* %p) nounwind {
+ %tmp = load double, double* %p, align 8
+ %str.ptr = getelementptr inbounds double, double* %p, i64 2
+ store double %a, double* %str.ptr, align 4
+ %add.ptr = getelementptr inbounds double, double* %p, i64 1
+ %tmp1 = load double, double* %add.ptr, align 8
+ %add = fadd double %tmp, %tmp1
+ ret double %add
+}
diff --git a/test/CodeGen/AArch64/arm64-ldp.ll b/test/CodeGen/AArch64/arm64-ldp.ll
index 5a986261b31b..a192eab112fa 100644
--- a/test/CodeGen/AArch64/arm64-ldp.ll
+++ b/test/CodeGen/AArch64/arm64-ldp.ll
@@ -5,19 +5,58 @@
; CHECK: ldp_int
; CHECK: ldp
define i32 @ldp_int(i32* %p) nounwind {
- %tmp = load i32* %p, align 4
- %add.ptr = getelementptr inbounds i32* %p, i64 1
- %tmp1 = load i32* %add.ptr, align 4
+ %tmp = load i32, i32* %p, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ %tmp1 = load i32, i32* %add.ptr, align 4
%add = add nsw i32 %tmp1, %tmp
ret i32 %add
}
+; CHECK: ldp_sext_int
+; CHECK: ldpsw
+define i64 @ldp_sext_int(i32* %p) nounwind {
+ %tmp = load i32, i32* %p, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ %tmp1 = load i32, i32* %add.ptr, align 4
+ %sexttmp = sext i32 %tmp to i64
+ %sexttmp1 = sext i32 %tmp1 to i64
+ %add = add nsw i64 %sexttmp1, %sexttmp
+ ret i64 %add
+}
+
+; CHECK-LABEL: ldp_half_sext_res0_int:
+; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0]
+; CHECK: sxtw x[[DST1]], w[[DST1]]
+define i64 @ldp_half_sext_res0_int(i32* %p) nounwind {
+ %tmp = load i32, i32* %p, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ %tmp1 = load i32, i32* %add.ptr, align 4
+ %sexttmp = sext i32 %tmp to i64
+ %sexttmp1 = zext i32 %tmp1 to i64
+ %add = add nsw i64 %sexttmp1, %sexttmp
+ ret i64 %add
+}
+
+; CHECK-LABEL: ldp_half_sext_res1_int:
+; CHECK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0]
+; CHECK: sxtw x[[DST2]], w[[DST2]]
+define i64 @ldp_half_sext_res1_int(i32* %p) nounwind {
+ %tmp = load i32, i32* %p, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ %tmp1 = load i32, i32* %add.ptr, align 4
+ %sexttmp = zext i32 %tmp to i64
+ %sexttmp1 = sext i32 %tmp1 to i64
+ %add = add nsw i64 %sexttmp1, %sexttmp
+ ret i64 %add
+}
+
+
; CHECK: ldp_long
; CHECK: ldp
define i64 @ldp_long(i64* %p) nounwind {
- %tmp = load i64* %p, align 8
- %add.ptr = getelementptr inbounds i64* %p, i64 1
- %tmp1 = load i64* %add.ptr, align 8
+ %tmp = load i64, i64* %p, align 8
+ %add.ptr = getelementptr inbounds i64, i64* %p, i64 1
+ %tmp1 = load i64, i64* %add.ptr, align 8
%add = add nsw i64 %tmp1, %tmp
ret i64 %add
}
@@ -25,9 +64,9 @@ define i64 @ldp_long(i64* %p) nounwind {
; CHECK: ldp_float
; CHECK: ldp
define float @ldp_float(float* %p) nounwind {
- %tmp = load float* %p, align 4
- %add.ptr = getelementptr inbounds float* %p, i64 1
- %tmp1 = load float* %add.ptr, align 4
+ %tmp = load float, float* %p, align 4
+ %add.ptr = getelementptr inbounds float, float* %p, i64 1
+ %tmp1 = load float, float* %add.ptr, align 4
%add = fadd float %tmp, %tmp1
ret float %add
}
@@ -35,9 +74,9 @@ define float @ldp_float(float* %p) nounwind {
; CHECK: ldp_double
; CHECK: ldp
define double @ldp_double(double* %p) nounwind {
- %tmp = load double* %p, align 8
- %add.ptr = getelementptr inbounds double* %p, i64 1
- %tmp1 = load double* %add.ptr, align 8
+ %tmp = load double, double* %p, align 8
+ %add.ptr = getelementptr inbounds double, double* %p, i64 1
+ %tmp1 = load double, double* %add.ptr, align 8
%add = fadd double %tmp, %tmp1
ret double %add
}
@@ -48,23 +87,71 @@ define i32 @ldur_int(i32* %a) nounwind {
; LDUR_CHK: ldp [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8]
; LDUR_CHK-NEXT: add w{{[0-9]+}}, [[DST2]], [[DST1]]
; LDUR_CHK-NEXT: ret
- %p1 = getelementptr inbounds i32* %a, i32 -1
- %tmp1 = load i32* %p1, align 2
- %p2 = getelementptr inbounds i32* %a, i32 -2
- %tmp2 = load i32* %p2, align 2
+ %p1 = getelementptr inbounds i32, i32* %a, i32 -1
+ %tmp1 = load i32, i32* %p1, align 2
+ %p2 = getelementptr inbounds i32, i32* %a, i32 -2
+ %tmp2 = load i32, i32* %p2, align 2
%tmp3 = add i32 %tmp1, %tmp2
ret i32 %tmp3
}
+define i64 @ldur_sext_int(i32* %a) nounwind {
+; LDUR_CHK: ldur_sext_int
+; LDUR_CHK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8]
+; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
+; LDUR_CHK-NEXT: ret
+ %p1 = getelementptr inbounds i32, i32* %a, i32 -1
+ %tmp1 = load i32, i32* %p1, align 2
+ %p2 = getelementptr inbounds i32, i32* %a, i32 -2
+ %tmp2 = load i32, i32* %p2, align 2
+ %sexttmp1 = sext i32 %tmp1 to i64
+ %sexttmp2 = sext i32 %tmp2 to i64
+ %tmp3 = add i64 %sexttmp1, %sexttmp2
+ ret i64 %tmp3
+}
+
+define i64 @ldur_half_sext_int_res0(i32* %a) nounwind {
+; LDUR_CHK: ldur_half_sext_int_res0
+; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
+; LDUR_CHK: sxtw x[[DST1]], w[[DST1]]
+; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]]
+; LDUR_CHK-NEXT: ret
+ %p1 = getelementptr inbounds i32, i32* %a, i32 -1
+ %tmp1 = load i32, i32* %p1, align 2
+ %p2 = getelementptr inbounds i32, i32* %a, i32 -2
+ %tmp2 = load i32, i32* %p2, align 2
+ %sexttmp1 = zext i32 %tmp1 to i64
+ %sexttmp2 = sext i32 %tmp2 to i64
+ %tmp3 = add i64 %sexttmp1, %sexttmp2
+ ret i64 %tmp3
+}
+
+define i64 @ldur_half_sext_int_res1(i32* %a) nounwind {
+; LDUR_CHK: ldur_half_sext_int_res1
+; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
+; LDUR_CHK: sxtw x[[DST2]], w[[DST2]]
+; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]]
+; LDUR_CHK-NEXT: ret
+ %p1 = getelementptr inbounds i32, i32* %a, i32 -1
+ %tmp1 = load i32, i32* %p1, align 2
+ %p2 = getelementptr inbounds i32, i32* %a, i32 -2
+ %tmp2 = load i32, i32* %p2, align 2
+ %sexttmp1 = sext i32 %tmp1 to i64
+ %sexttmp2 = zext i32 %tmp2 to i64
+ %tmp3 = add i64 %sexttmp1, %sexttmp2
+ ret i64 %tmp3
+}
+
+
define i64 @ldur_long(i64* %a) nounwind ssp {
; LDUR_CHK: ldur_long
; LDUR_CHK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16]
; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
; LDUR_CHK-NEXT: ret
- %p1 = getelementptr inbounds i64* %a, i64 -1
- %tmp1 = load i64* %p1, align 2
- %p2 = getelementptr inbounds i64* %a, i64 -2
- %tmp2 = load i64* %p2, align 2
+ %p1 = getelementptr inbounds i64, i64* %a, i64 -1
+ %tmp1 = load i64, i64* %p1, align 2
+ %p2 = getelementptr inbounds i64, i64* %a, i64 -2
+ %tmp2 = load i64, i64* %p2, align 2
%tmp3 = add i64 %tmp1, %tmp2
ret i64 %tmp3
}
@@ -74,10 +161,10 @@ define float @ldur_float(float* %a) {
; LDUR_CHK: ldp [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8]
; LDUR_CHK-NEXT: add s{{[0-9]+}}, [[DST2]], [[DST1]]
; LDUR_CHK-NEXT: ret
- %p1 = getelementptr inbounds float* %a, i64 -1
- %tmp1 = load float* %p1, align 2
- %p2 = getelementptr inbounds float* %a, i64 -2
- %tmp2 = load float* %p2, align 2
+ %p1 = getelementptr inbounds float, float* %a, i64 -1
+ %tmp1 = load float, float* %p1, align 2
+ %p2 = getelementptr inbounds float, float* %a, i64 -2
+ %tmp2 = load float, float* %p2, align 2
%tmp3 = fadd float %tmp1, %tmp2
ret float %tmp3
}
@@ -87,10 +174,10 @@ define double @ldur_double(double* %a) {
; LDUR_CHK: ldp [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16]
; LDUR_CHK-NEXT: add d{{[0-9]+}}, [[DST2]], [[DST1]]
; LDUR_CHK-NEXT: ret
- %p1 = getelementptr inbounds double* %a, i64 -1
- %tmp1 = load double* %p1, align 2
- %p2 = getelementptr inbounds double* %a, i64 -2
- %tmp2 = load double* %p2, align 2
+ %p1 = getelementptr inbounds double, double* %a, i64 -1
+ %tmp1 = load double, double* %p1, align 2
+ %p2 = getelementptr inbounds double, double* %a, i64 -2
+ %tmp2 = load double, double* %p2, align 2
%tmp3 = fadd double %tmp1, %tmp2
ret double %tmp3
}
@@ -102,14 +189,64 @@ define i64 @pairUpBarelyIn(i64* %a) nounwind ssp {
; LDUR_CHK: ldp [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
; LDUR_CHK-NEXT: ret
- %p1 = getelementptr inbounds i64* %a, i64 -31
- %tmp1 = load i64* %p1, align 2
- %p2 = getelementptr inbounds i64* %a, i64 -32
- %tmp2 = load i64* %p2, align 2
+ %p1 = getelementptr inbounds i64, i64* %a, i64 -31
+ %tmp1 = load i64, i64* %p1, align 2
+ %p2 = getelementptr inbounds i64, i64* %a, i64 -32
+ %tmp2 = load i64, i64* %p2, align 2
%tmp3 = add i64 %tmp1, %tmp2
ret i64 %tmp3
}
+define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp {
+; LDUR_CHK: pairUpBarelyInSext
+; LDUR_CHK-NOT: ldur
+; LDUR_CHK: ldpsw [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
+; LDUR_CHK-NEXT: add x{{[0-9]+}}, [[DST2]], [[DST1]]
+; LDUR_CHK-NEXT: ret
+ %p1 = getelementptr inbounds i32, i32* %a, i64 -63
+ %tmp1 = load i32, i32* %p1, align 2
+ %p2 = getelementptr inbounds i32, i32* %a, i64 -64
+ %tmp2 = load i32, i32* %p2, align 2
+ %sexttmp1 = sext i32 %tmp1 to i64
+ %sexttmp2 = sext i32 %tmp2 to i64
+ %tmp3 = add i64 %sexttmp1, %sexttmp2
+ ret i64 %tmp3
+}
+
+define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp {
+; LDUR_CHK: pairUpBarelyInHalfSextRes0
+; LDUR_CHK-NOT: ldur
+; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
+; LDUR_CHK: sxtw x[[DST1]], w[[DST1]]
+; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]]
+; LDUR_CHK-NEXT: ret
+ %p1 = getelementptr inbounds i32, i32* %a, i64 -63
+ %tmp1 = load i32, i32* %p1, align 2
+ %p2 = getelementptr inbounds i32, i32* %a, i64 -64
+ %tmp2 = load i32, i32* %p2, align 2
+ %sexttmp1 = zext i32 %tmp1 to i64
+ %sexttmp2 = sext i32 %tmp2 to i64
+ %tmp3 = add i64 %sexttmp1, %sexttmp2
+ ret i64 %tmp3
+}
+
+define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp {
+; LDUR_CHK: pairUpBarelyInHalfSextRes1
+; LDUR_CHK-NOT: ldur
+; LDUR_CHK: ldp w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
+; LDUR_CHK: sxtw x[[DST2]], w[[DST2]]
+; LDUR_CHK-NEXT: add x{{[0-9]+}}, x[[DST2]], x[[DST1]]
+; LDUR_CHK-NEXT: ret
+ %p1 = getelementptr inbounds i32, i32* %a, i64 -63
+ %tmp1 = load i32, i32* %p1, align 2
+ %p2 = getelementptr inbounds i32, i32* %a, i64 -64
+ %tmp2 = load i32, i32* %p2, align 2
+ %sexttmp1 = sext i32 %tmp1 to i64
+ %sexttmp2 = zext i32 %tmp2 to i64
+ %tmp3 = add i64 %sexttmp1, %sexttmp2
+ ret i64 %tmp3
+}
+
define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
; LDUR_CHK: pairUpBarelyOut
; LDUR_CHK-NOT: ldp
@@ -117,14 +254,31 @@ define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
; are used---just check that there isn't an ldp before the add
; LDUR_CHK: add
; LDUR_CHK-NEXT: ret
- %p1 = getelementptr inbounds i64* %a, i64 -32
- %tmp1 = load i64* %p1, align 2
- %p2 = getelementptr inbounds i64* %a, i64 -33
- %tmp2 = load i64* %p2, align 2
+ %p1 = getelementptr inbounds i64, i64* %a, i64 -32
+ %tmp1 = load i64, i64* %p1, align 2
+ %p2 = getelementptr inbounds i64, i64* %a, i64 -33
+ %tmp2 = load i64, i64* %p2, align 2
%tmp3 = add i64 %tmp1, %tmp2
ret i64 %tmp3
}
+define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp {
+; LDUR_CHK: pairUpBarelyOutSext
+; LDUR_CHK-NOT: ldp
+; Don't be fragile about which loads or manipulations of the base register
+; are used---just check that there isn't an ldp before the add
+; LDUR_CHK: add
+; LDUR_CHK-NEXT: ret
+ %p1 = getelementptr inbounds i32, i32* %a, i64 -64
+ %tmp1 = load i32, i32* %p1, align 2
+ %p2 = getelementptr inbounds i32, i32* %a, i64 -65
+ %tmp2 = load i32, i32* %p2, align 2
+ %sexttmp1 = sext i32 %tmp1 to i64
+ %sexttmp2 = sext i32 %tmp2 to i64
+ %tmp3 = add i64 %sexttmp1, %sexttmp2
+ ret i64 %tmp3
+}
+
define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
; LDUR_CHK: pairUpNotAligned
; LDUR_CHK-NOT: ldp
@@ -132,18 +286,43 @@ define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
; LDUR_CHK-NEXT: ldur
; LDUR_CHK-NEXT: add
; LDUR_CHK-NEXT: ret
- %p1 = getelementptr inbounds i64* %a, i64 -18
+ %p1 = getelementptr inbounds i64, i64* %a, i64 -18
%bp1 = bitcast i64* %p1 to i8*
- %bp1p1 = getelementptr inbounds i8* %bp1, i64 1
+ %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
%dp1 = bitcast i8* %bp1p1 to i64*
- %tmp1 = load i64* %dp1, align 1
+ %tmp1 = load i64, i64* %dp1, align 1
- %p2 = getelementptr inbounds i64* %a, i64 -17
+ %p2 = getelementptr inbounds i64, i64* %a, i64 -17
%bp2 = bitcast i64* %p2 to i8*
- %bp2p1 = getelementptr inbounds i8* %bp2, i64 1
+ %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
%dp2 = bitcast i8* %bp2p1 to i64*
- %tmp2 = load i64* %dp2, align 1
+ %tmp2 = load i64, i64* %dp2, align 1
%tmp3 = add i64 %tmp1, %tmp2
ret i64 %tmp3
}
+
+define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp {
+; LDUR_CHK: pairUpNotAlignedSext
+; LDUR_CHK-NOT: ldp
+; LDUR_CHK: ldursw
+; LDUR_CHK-NEXT: ldursw
+; LDUR_CHK-NEXT: add
+; LDUR_CHK-NEXT: ret
+ %p1 = getelementptr inbounds i32, i32* %a, i64 -18
+ %bp1 = bitcast i32* %p1 to i8*
+ %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
+ %dp1 = bitcast i8* %bp1p1 to i32*
+ %tmp1 = load i32, i32* %dp1, align 1
+
+ %p2 = getelementptr inbounds i32, i32* %a, i64 -17
+ %bp2 = bitcast i32* %p2 to i8*
+ %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
+ %dp2 = bitcast i8* %bp2p1 to i32*
+ %tmp2 = load i32, i32* %dp2, align 1
+
+ %sexttmp1 = sext i32 %tmp1 to i64
+ %sexttmp2 = sext i32 %tmp2 to i64
+ %tmp3 = add i64 %sexttmp1, %sexttmp2
+ ret i64 %tmp3
+}
diff --git a/test/CodeGen/AArch64/arm64-ldur.ll b/test/CodeGen/AArch64/arm64-ldur.ll
index 2848c06f9bb0..c4bf397d5d03 100644
--- a/test/CodeGen/AArch64/arm64-ldur.ll
+++ b/test/CodeGen/AArch64/arm64-ldur.ll
@@ -4,32 +4,32 @@ define i64 @_f0(i64* %p) {
; CHECK: f0:
; CHECK: ldur x0, [x0, #-8]
; CHECK-NEXT: ret
- %tmp = getelementptr inbounds i64* %p, i64 -1
- %ret = load i64* %tmp, align 2
+ %tmp = getelementptr inbounds i64, i64* %p, i64 -1
+ %ret = load i64, i64* %tmp, align 2
ret i64 %ret
}
define i32 @_f1(i32* %p) {
; CHECK: f1:
; CHECK: ldur w0, [x0, #-4]
; CHECK-NEXT: ret
- %tmp = getelementptr inbounds i32* %p, i64 -1
- %ret = load i32* %tmp, align 2
+ %tmp = getelementptr inbounds i32, i32* %p, i64 -1
+ %ret = load i32, i32* %tmp, align 2
ret i32 %ret
}
define i16 @_f2(i16* %p) {
; CHECK: f2:
; CHECK: ldurh w0, [x0, #-2]
; CHECK-NEXT: ret
- %tmp = getelementptr inbounds i16* %p, i64 -1
- %ret = load i16* %tmp, align 2
+ %tmp = getelementptr inbounds i16, i16* %p, i64 -1
+ %ret = load i16, i16* %tmp, align 2
ret i16 %ret
}
define i8 @_f3(i8* %p) {
; CHECK: f3:
; CHECK: ldurb w0, [x0, #-1]
; CHECK-NEXT: ret
- %tmp = getelementptr inbounds i8* %p, i64 -1
- %ret = load i8* %tmp, align 2
+ %tmp = getelementptr inbounds i8, i8* %p, i64 -1
+ %ret = load i8, i8* %tmp, align 2
ret i8 %ret
}
@@ -37,9 +37,9 @@ define i64 @zext32(i8* %a) nounwind ssp {
; CHECK-LABEL: zext32:
; CHECK: ldur w0, [x0, #-12]
; CHECK-NEXT: ret
- %p = getelementptr inbounds i8* %a, i64 -12
+ %p = getelementptr inbounds i8, i8* %a, i64 -12
%tmp1 = bitcast i8* %p to i32*
- %tmp2 = load i32* %tmp1, align 4
+ %tmp2 = load i32, i32* %tmp1, align 4
%ret = zext i32 %tmp2 to i64
ret i64 %ret
@@ -48,9 +48,9 @@ define i64 @zext16(i8* %a) nounwind ssp {
; CHECK-LABEL: zext16:
; CHECK: ldurh w0, [x0, #-12]
; CHECK-NEXT: ret
- %p = getelementptr inbounds i8* %a, i64 -12
+ %p = getelementptr inbounds i8, i8* %a, i64 -12
%tmp1 = bitcast i8* %p to i16*
- %tmp2 = load i16* %tmp1, align 2
+ %tmp2 = load i16, i16* %tmp1, align 2
%ret = zext i16 %tmp2 to i64
ret i64 %ret
@@ -59,8 +59,8 @@ define i64 @zext8(i8* %a) nounwind ssp {
; CHECK-LABEL: zext8:
; CHECK: ldurb w0, [x0, #-12]
; CHECK-NEXT: ret
- %p = getelementptr inbounds i8* %a, i64 -12
- %tmp2 = load i8* %p, align 1
+ %p = getelementptr inbounds i8, i8* %a, i64 -12
+ %tmp2 = load i8, i8* %p, align 1
%ret = zext i8 %tmp2 to i64
ret i64 %ret
diff --git a/test/CodeGen/AArch64/arm64-memcpy-inline.ll b/test/CodeGen/AArch64/arm64-memcpy-inline.ll
index f921a592451d..2f91ce252336 100644
--- a/test/CodeGen/AArch64/arm64-memcpy-inline.ll
+++ b/test/CodeGen/AArch64/arm64-memcpy-inline.ll
@@ -22,7 +22,7 @@ entry:
; CHECK: strh [[REG1]], [x[[BASEREG2]], #8]
; CHECK: ldr [[REG2:x[0-9]+]],
; CHECK: str [[REG2]],
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false)
ret i32 0
}
@@ -33,7 +33,7 @@ entry:
; CHECK: stur [[DEST]], [x0, #15]
; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]]
; CHECK: str [[DEST]], [x0]
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false)
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false)
ret void
}
@@ -45,7 +45,7 @@ entry:
; CHECK: str [[REG3]], [x0, #32]
; CHECK: ldp [[DEST1:q[0-9]+]], [[DEST2:q[0-9]+]], [x{{[0-9]+}}]
; CHECK: stp [[DEST1]], [[DEST2]], [x0]
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false)
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false)
ret void
}
@@ -56,7 +56,7 @@ entry:
; CHECK: str [[REG4]], [x0, #16]
; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]]
; CHECK: str [[DEST]], [x0]
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false)
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false)
ret void
}
@@ -67,7 +67,7 @@ entry:
; CHECK: strh [[REG5]], [x0, #16]
; CHECK: ldr [[REG6:q[0-9]+]], [x{{[0-9]+}}]
; CHECK: str [[REG6]], [x0]
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false)
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false)
ret void
}
@@ -80,7 +80,7 @@ entry:
; CHECK: movz [[REG8:w[0-9]+]],
; CHECK: movk [[REG8]],
; CHECK: str [[REG8]], [x0]
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false)
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false)
ret void
}
@@ -91,7 +91,7 @@ entry:
; CHECK: stur [[REG9]], [x{{[0-9]+}}, #6]
; CHECK: ldr
; CHECK: str
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8]* @.str6, i64 0, i64 0), i64 14, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i32 1, i1 false)
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-memset-inline.ll b/test/CodeGen/AArch64/arm64-memset-inline.ll
index 2e237f4a882d..56959ade0439 100644
--- a/test/CodeGen/AArch64/arm64-memset-inline.ll
+++ b/test/CodeGen/AArch64/arm64-memset-inline.ll
@@ -16,7 +16,7 @@ entry:
; CHECK: stp xzr, xzr, [sp, #16]
; CHECK: str xzr, [sp, #8]
%buf = alloca [26 x i8], align 1
- %0 = getelementptr inbounds [26 x i8]* %buf, i32 0, i32 0
+ %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0
call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false)
call void @something(i8* %0) nounwind
ret void
diff --git a/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll b/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll
new file mode 100644
index 000000000000..5bc4d71501ba
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple=arm64-apple-ios -aarch64-strict-align < %s | FileCheck %s
+
+; Small (16-bytes here) unaligned memcpys should stay memcpy calls if
+; strict-alignment is turned on.
+define void @t0(i8* %out, i8* %in) {
+; CHECK-LABEL: t0:
+; CHECK: orr w2, wzr, #0x10
+; CHECK-NEXT: bl _memcpy
+entry:
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 16, i32 1, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
diff --git a/test/CodeGen/AArch64/arm64-misched-basic-A53.ll b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
index bc7ed7fbdf83..6db21043f670 100644
--- a/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
+++ b/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
@@ -34,44 +34,44 @@ entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
- %2 = load i32* %i, align 4
+ %2 = load i32, i32* %i, align 4
%cmp = icmp slt i32 %2, 8
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
- %3 = load i32* %i, align 4
+ %3 = load i32, i32* %i, align 4
%idxprom = sext i32 %3 to i64
- %arrayidx = getelementptr inbounds [8 x i32]* %x, i32 0, i64 %idxprom
- %4 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %x, i32 0, i64 %idxprom
+ %4 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %4, 1
store i32 %add, i32* %xx, align 4
- %5 = load i32* %xx, align 4
+ %5 = load i32, i32* %xx, align 4
%add1 = add nsw i32 %5, 12
store i32 %add1, i32* %xx, align 4
- %6 = load i32* %xx, align 4
+ %6 = load i32, i32* %xx, align 4
%add2 = add nsw i32 %6, 23
store i32 %add2, i32* %xx, align 4
- %7 = load i32* %xx, align 4
+ %7 = load i32, i32* %xx, align 4
%add3 = add nsw i32 %7, 34
store i32 %add3, i32* %xx, align 4
- %8 = load i32* %i, align 4
+ %8 = load i32, i32* %i, align 4
%idxprom4 = sext i32 %8 to i64
- %arrayidx5 = getelementptr inbounds [8 x i32]* %y, i32 0, i64 %idxprom4
- %9 = load i32* %arrayidx5, align 4
- %10 = load i32* %yy, align 4
+ %arrayidx5 = getelementptr inbounds [8 x i32], [8 x i32]* %y, i32 0, i64 %idxprom4
+ %9 = load i32, i32* %arrayidx5, align 4
+ %10 = load i32, i32* %yy, align 4
%mul = mul nsw i32 %10, %9
store i32 %mul, i32* %yy, align 4
br label %for.inc
for.inc: ; preds = %for.body
- %11 = load i32* %i, align 4
+ %11 = load i32, i32* %i, align 4
%inc = add nsw i32 %11, 1
store i32 %inc, i32* %i, align 4
br label %for.cond
for.end: ; preds = %for.cond
- %12 = load i32* %xx, align 4
- %13 = load i32* %yy, align 4
+ %12 = load i32, i32* %xx, align 4
+ %13 = load i32, i32* %yy, align 4
%add6 = add nsw i32 %12, %13
ret i32 %add6
}
@@ -116,7 +116,7 @@ attributes #1 = { nounwind }
; Nothing explicit to check other than llc not crashing.
define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) {
%ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
- %tmp = getelementptr i8* %A, i32 32
+ %tmp = getelementptr i8, i8* %A, i32 32
store i8* %tmp, i8** %ptr
ret { <16 x i8>, <16 x i8> } %ld2
}
diff --git a/test/CodeGen/AArch64/arm64-misched-basic-A57.ll b/test/CodeGen/AArch64/arm64-misched-basic-A57.ll
index 238474a12c65..fac5f8ad2e9f 100644
--- a/test/CodeGen/AArch64/arm64-misched-basic-A57.ll
+++ b/test/CodeGen/AArch64/arm64-misched-basic-A57.ll
@@ -9,8 +9,8 @@
; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
; CHECK: ********** MI Scheduling **********
; CHECK: main:BB#2
-; CHECK LDR
-; CHECK Latency : 4
+; CHECK: LDR
+; CHECK: Latency : 4
; CHECK: *** Final schedule for BB#2 ***
; CHECK: LDR
; CHECK: LDR
@@ -41,31 +41,31 @@ entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
- %2 = load i32* %i, align 4
+ %2 = load i32, i32* %i, align 4
%cmp = icmp slt i32 %2, 8
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
- %3 = load i32* %yy, align 4
- %4 = load i32* %i, align 4
+ %3 = load i32, i32* %yy, align 4
+ %4 = load i32, i32* %i, align 4
%idxprom = sext i32 %4 to i64
- %arrayidx = getelementptr inbounds [8 x i32]* %x, i32 0, i64 %idxprom
- %5 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %x, i32 0, i64 %idxprom
+ %5 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %5, 1
store i32 %add, i32* %xx, align 4
- %6 = load i32* %xx, align 4
+ %6 = load i32, i32* %xx, align 4
%add1 = add nsw i32 %6, 12
store i32 %add1, i32* %xx, align 4
- %7 = load i32* %xx, align 4
+ %7 = load i32, i32* %xx, align 4
%add2 = add nsw i32 %7, 23
store i32 %add2, i32* %xx, align 4
- %8 = load i32* %xx, align 4
+ %8 = load i32, i32* %xx, align 4
%add3 = add nsw i32 %8, 34
store i32 %add3, i32* %xx, align 4
- %9 = load i32* %i, align 4
+ %9 = load i32, i32* %i, align 4
%idxprom4 = sext i32 %9 to i64
- %arrayidx5 = getelementptr inbounds [8 x i32]* %y, i32 0, i64 %idxprom4
- %10 = load i32* %arrayidx5, align 4
+ %arrayidx5 = getelementptr inbounds [8 x i32], [8 x i32]* %y, i32 0, i64 %idxprom4
+ %10 = load i32, i32* %arrayidx5, align 4
%add4 = add nsw i32 %9, %add
%add5 = add nsw i32 %10, %add1
@@ -92,14 +92,14 @@ for.body: ; preds = %for.cond
br label %for.inc
for.inc: ; preds = %for.body
- %11 = load i32* %i, align 4
+ %11 = load i32, i32* %i, align 4
%inc = add nsw i32 %11, 1
store i32 %inc, i32* %i, align 4
br label %for.cond
for.end: ; preds = %for.cond
- %12 = load i32* %xx, align 4
- %13 = load i32* %yy, align 4
+ %12 = load i32, i32* %xx, align 4
+ %13 = load i32, i32* %yy, align 4
%add67 = add nsw i32 %12, %13
ret i32 %add67
}
diff --git a/test/CodeGen/AArch64/arm64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll
index 1cfba826d510..b74a40626cee 100644
--- a/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -188,7 +188,7 @@ define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
; CHECK-LABEL: ins2f1:
-; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+; CHECK: mov {{d[0-9]+}}, {{v[0-9]+}}.d[1]
%tmp3 = extractelement <2 x double> %tmp1, i32 1
%tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
ret <1 x double> %tmp4
@@ -1086,7 +1086,7 @@ define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
-; CHECK-NEXT: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+; CHECK: ins {{v[0-9]+}}.s[1], w{{[0-9]+}}
entry:
%c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
%d = insertelement <2 x i32> undef, i32 %c, i32 0
diff --git a/test/CodeGen/AArch64/arm64-neon-select_cc.ll b/test/CodeGen/AArch64/arm64-neon-select_cc.ll
index d334c0846ace..fe765f4ef984 100644
--- a/test/CodeGen/AArch64/arm64-neon-select_cc.ll
+++ b/test/CodeGen/AArch64/arm64-neon-select_cc.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
+; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast \
+; RUN: < %s -verify-machineinstrs -asm-verbose=false | FileCheck %s
define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) {
; CHECK-LABEL: test_select_cc_v8i8_i8:
@@ -219,3 +220,30 @@ define <2 x i32> @test_select_cc_v2i32_icmpi1(i1 %cc, <2 x i32> %a, <2 x i32> %b
%e = select i1 %cmp, <2 x i32> %a, <2 x i32> %b
ret <2 x i32> %e
}
+
+; Also make sure we support irregular/non-power-of-2 types such as v3f32.
+define <3 x float> @test_select_cc_v3f32_fcmp_f32(<3 x float> %a, <3 x float> %b, float %c1, float %c2) #0 {
+; CHECK-LABEL: test_select_cc_v3f32_fcmp_f32:
+; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].4s, v2.4s, v3.4s
+; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
+; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b
+; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b
+; CHECK-NEXT: ret
+ %cc = fcmp oeq float %c1, %c2
+ %r = select i1 %cc, <3 x float> %a, <3 x float> %b
+ ret <3 x float> %r
+}
+
+define <3 x float> @test_select_cc_v3f32_fcmp_f64(<3 x float> %a, <3 x float> %b, double %c1, double %c2) #0 {
+; CHECK-LABEL: test_select_cc_v3f32_fcmp_f64:
+; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].2d, v2.2d, v3.2d
+; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
+; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b
+; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b
+; CHECK-NEXT: ret
+ %cc = fcmp oeq double %c1, %c2
+ %r = select i1 %cc, <3 x float> %a, <3 x float> %b
+ ret <3 x float> %r
+}
+
+attributes #0 = { nounwind}
diff --git a/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll b/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
index cca6bfef7307..b63200efb6b5 100644
--- a/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
+++ b/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
@@ -126,7 +126,7 @@ define <16 x i8> @test_vld1q_dup_s8(i8* %a) {
; CHECK-LABEL: test_vld1q_dup_s8:
; CHECK: ld1r {{{ ?v[0-9]+.16b ?}}}, [x0]
entry:
- %0 = load i8* %a, align 1
+ %0 = load i8, i8* %a, align 1
%1 = insertelement <16 x i8> undef, i8 %0, i32 0
%lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %lane
@@ -136,7 +136,7 @@ define <8 x i16> @test_vld1q_dup_s16(i16* %a) {
; CHECK-LABEL: test_vld1q_dup_s16:
; CHECK: ld1r {{{ ?v[0-9]+.8h ?}}}, [x0]
entry:
- %0 = load i16* %a, align 2
+ %0 = load i16, i16* %a, align 2
%1 = insertelement <8 x i16> undef, i16 %0, i32 0
%lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %lane
@@ -146,7 +146,7 @@ define <4 x i32> @test_vld1q_dup_s32(i32* %a) {
; CHECK-LABEL: test_vld1q_dup_s32:
; CHECK: ld1r {{{ ?v[0-9]+.4s ?}}}, [x0]
entry:
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%1 = insertelement <4 x i32> undef, i32 %0, i32 0
%lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %lane
@@ -156,7 +156,7 @@ define <2 x i64> @test_vld1q_dup_s64(i64* %a) {
; CHECK-LABEL: test_vld1q_dup_s64:
; CHECK: ld1r {{{ ?v[0-9]+.2d ?}}}, [x0]
entry:
- %0 = load i64* %a, align 8
+ %0 = load i64, i64* %a, align 8
%1 = insertelement <2 x i64> undef, i64 %0, i32 0
%lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %lane
@@ -166,7 +166,7 @@ define <4 x float> @test_vld1q_dup_f32(float* %a) {
; CHECK-LABEL: test_vld1q_dup_f32:
; CHECK: ld1r {{{ ?v[0-9]+.4s ?}}}, [x0]
entry:
- %0 = load float* %a, align 4
+ %0 = load float, float* %a, align 4
%1 = insertelement <4 x float> undef, float %0, i32 0
%lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %lane
@@ -176,7 +176,7 @@ define <2 x double> @test_vld1q_dup_f64(double* %a) {
; CHECK-LABEL: test_vld1q_dup_f64:
; CHECK: ld1r {{{ ?v[0-9]+.2d ?}}}, [x0]
entry:
- %0 = load double* %a, align 8
+ %0 = load double, double* %a, align 8
%1 = insertelement <2 x double> undef, double %0, i32 0
%lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
ret <2 x double> %lane
@@ -186,7 +186,7 @@ define <8 x i8> @test_vld1_dup_s8(i8* %a) {
; CHECK-LABEL: test_vld1_dup_s8:
; CHECK: ld1r {{{ ?v[0-9]+.8b ?}}}, [x0]
entry:
- %0 = load i8* %a, align 1
+ %0 = load i8, i8* %a, align 1
%1 = insertelement <8 x i8> undef, i8 %0, i32 0
%lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
ret <8 x i8> %lane
@@ -196,7 +196,7 @@ define <4 x i16> @test_vld1_dup_s16(i16* %a) {
; CHECK-LABEL: test_vld1_dup_s16:
; CHECK: ld1r {{{ ?v[0-9]+.4h ?}}}, [x0]
entry:
- %0 = load i16* %a, align 2
+ %0 = load i16, i16* %a, align 2
%1 = insertelement <4 x i16> undef, i16 %0, i32 0
%lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
ret <4 x i16> %lane
@@ -206,7 +206,7 @@ define <2 x i32> @test_vld1_dup_s32(i32* %a) {
; CHECK-LABEL: test_vld1_dup_s32:
; CHECK: ld1r {{{ ?v[0-9]+.2s ?}}}, [x0]
entry:
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%1 = insertelement <2 x i32> undef, i32 %0, i32 0
%lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
ret <2 x i32> %lane
@@ -216,7 +216,7 @@ define <1 x i64> @test_vld1_dup_s64(i64* %a) {
; CHECK-LABEL: test_vld1_dup_s64:
; CHECK: ldr {{d[0-9]+}}, [x0]
entry:
- %0 = load i64* %a, align 8
+ %0 = load i64, i64* %a, align 8
%1 = insertelement <1 x i64> undef, i64 %0, i32 0
ret <1 x i64> %1
}
@@ -225,7 +225,7 @@ define <2 x float> @test_vld1_dup_f32(float* %a) {
; CHECK-LABEL: test_vld1_dup_f32:
; CHECK: ld1r {{{ ?v[0-9]+.2s ?}}}, [x0]
entry:
- %0 = load float* %a, align 4
+ %0 = load float, float* %a, align 4
%1 = insertelement <2 x float> undef, float %0, i32 0
%lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
ret <2 x float> %lane
@@ -235,7 +235,7 @@ define <1 x double> @test_vld1_dup_f64(double* %a) {
; CHECK-LABEL: test_vld1_dup_f64:
; CHECK: ldr {{d[0-9]+}}, [x0]
entry:
- %0 = load double* %a, align 8
+ %0 = load double, double* %a, align 8
%1 = insertelement <1 x double> undef, double %0, i32 0
ret <1 x double> %1
}
@@ -247,7 +247,7 @@ define <1 x i64> @testDUP.v1i64(i64* %a, i64* %b) #0 {
; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}]
; CHECK-DAG: fmov {{d[0-9]+}}, {{x[0-9]+}}
; CHECK-DAG: str {{x[0-9]+}}, [{{x[0-9]+}}]
- %1 = load i64* %a, align 8
+ %1 = load i64, i64* %a, align 8
store i64 %1, i64* %b, align 8
%vecinit.i = insertelement <1 x i64> undef, i64 %1, i32 0
ret <1 x i64> %vecinit.i
@@ -259,7 +259,7 @@ define <1 x double> @testDUP.v1f64(double* %a, double* %b) #0 {
; CHECK-LABEL: testDUP.v1f64:
; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}]
; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}]
- %1 = load double* %a, align 8
+ %1 = load double, double* %a, align 8
store double %1, double* %b, align 8
%vecinit.i = insertelement <1 x double> undef, double %1, i32 0
ret <1 x double> %vecinit.i
@@ -269,7 +269,7 @@ define <16 x i8> @test_vld1q_lane_s8(i8* %a, <16 x i8> %b) {
; CHECK-LABEL: test_vld1q_lane_s8:
; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
entry:
- %0 = load i8* %a, align 1
+ %0 = load i8, i8* %a, align 1
%vld1_lane = insertelement <16 x i8> %b, i8 %0, i32 15
ret <16 x i8> %vld1_lane
}
@@ -278,7 +278,7 @@ define <8 x i16> @test_vld1q_lane_s16(i16* %a, <8 x i16> %b) {
; CHECK-LABEL: test_vld1q_lane_s16:
; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
entry:
- %0 = load i16* %a, align 2
+ %0 = load i16, i16* %a, align 2
%vld1_lane = insertelement <8 x i16> %b, i16 %0, i32 7
ret <8 x i16> %vld1_lane
}
@@ -287,7 +287,7 @@ define <4 x i32> @test_vld1q_lane_s32(i32* %a, <4 x i32> %b) {
; CHECK-LABEL: test_vld1q_lane_s32:
; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
entry:
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%vld1_lane = insertelement <4 x i32> %b, i32 %0, i32 3
ret <4 x i32> %vld1_lane
}
@@ -296,7 +296,7 @@ define <2 x i64> @test_vld1q_lane_s64(i64* %a, <2 x i64> %b) {
; CHECK-LABEL: test_vld1q_lane_s64:
; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
entry:
- %0 = load i64* %a, align 8
+ %0 = load i64, i64* %a, align 8
%vld1_lane = insertelement <2 x i64> %b, i64 %0, i32 1
ret <2 x i64> %vld1_lane
}
@@ -305,7 +305,7 @@ define <4 x float> @test_vld1q_lane_f32(float* %a, <4 x float> %b) {
; CHECK-LABEL: test_vld1q_lane_f32:
; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
entry:
- %0 = load float* %a, align 4
+ %0 = load float, float* %a, align 4
%vld1_lane = insertelement <4 x float> %b, float %0, i32 3
ret <4 x float> %vld1_lane
}
@@ -314,7 +314,7 @@ define <2 x double> @test_vld1q_lane_f64(double* %a, <2 x double> %b) {
; CHECK-LABEL: test_vld1q_lane_f64:
; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
entry:
- %0 = load double* %a, align 8
+ %0 = load double, double* %a, align 8
%vld1_lane = insertelement <2 x double> %b, double %0, i32 1
ret <2 x double> %vld1_lane
}
@@ -323,7 +323,7 @@ define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) {
; CHECK-LABEL: test_vld1_lane_s8:
; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
entry:
- %0 = load i8* %a, align 1
+ %0 = load i8, i8* %a, align 1
%vld1_lane = insertelement <8 x i8> %b, i8 %0, i32 7
ret <8 x i8> %vld1_lane
}
@@ -332,7 +332,7 @@ define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) {
; CHECK-LABEL: test_vld1_lane_s16:
; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
entry:
- %0 = load i16* %a, align 2
+ %0 = load i16, i16* %a, align 2
%vld1_lane = insertelement <4 x i16> %b, i16 %0, i32 3
ret <4 x i16> %vld1_lane
}
@@ -341,7 +341,7 @@ define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) {
; CHECK-LABEL: test_vld1_lane_s32:
; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
entry:
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%vld1_lane = insertelement <2 x i32> %b, i32 %0, i32 1
ret <2 x i32> %vld1_lane
}
@@ -350,7 +350,7 @@ define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) {
; CHECK-LABEL: test_vld1_lane_s64:
; CHECK: ldr {{d[0-9]+}}, [x0]
entry:
- %0 = load i64* %a, align 8
+ %0 = load i64, i64* %a, align 8
%vld1_lane = insertelement <1 x i64> undef, i64 %0, i32 0
ret <1 x i64> %vld1_lane
}
@@ -359,7 +359,7 @@ define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) {
; CHECK-LABEL: test_vld1_lane_f32:
; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
entry:
- %0 = load float* %a, align 4
+ %0 = load float, float* %a, align 4
%vld1_lane = insertelement <2 x float> %b, float %0, i32 1
ret <2 x float> %vld1_lane
}
@@ -368,7 +368,7 @@ define <1 x double> @test_vld1_lane_f64(double* %a, <1 x double> %b) {
; CHECK-LABEL: test_vld1_lane_f64:
; CHECK: ldr {{d[0-9]+}}, [x0]
entry:
- %0 = load double* %a, align 8
+ %0 = load double, double* %a, align 8
%vld1_lane = insertelement <1 x double> undef, double %0, i32 0
ret <1 x double> %vld1_lane
}
diff --git a/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll b/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
index 74e3af8206f5..c739e9dcd906 100644
--- a/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
+++ b/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
@@ -50,9 +50,15 @@ define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x doubl
ret <1 x double> %res
}
+; For v1i64, it's not clear which of the vector or scalar compare is better.
+; Let's stick to the vector form, like for all other vector selects fed by a
+; scalar setcc. If anything, it exposes more ILP.
define <1 x i64> @test_select_v1i1_3(i64 %lhs, i64 %rhs, <1 x i64> %v3) {
; CHECK-LABEL: test_select_v1i1_3:
-; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: fmov d{{[0-9]+}}, x{{[0-9]+}}
+; CHECK: fmov d{{[0-9]+}}, x{{[0-9]+}}
+; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
%tst = icmp eq i64 %lhs, %rhs
%evil = insertelement <1 x i1> undef, i1 %tst, i32 0
%res = select <1 x i1> %evil, <1 x i64> zeroinitializer, <1 x i64> %v3
diff --git a/test/CodeGen/AArch64/arm64-neon-v8.1a.ll b/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
new file mode 100644
index 000000000000..51ed8a13cd2e
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
@@ -0,0 +1,456 @@
+; RUN: llc < %s -verify-machineinstrs -march=arm64 -aarch64-neon-syntax=generic | FileCheck %s --check-prefix=CHECK-V8a
+; RUN: llc < %s -verify-machineinstrs -march=arm64 -mattr=+v8.1a -aarch64-neon-syntax=generic | FileCheck %s --check-prefix=CHECK-V81a
+; RUN: llc < %s -verify-machineinstrs -march=arm64 -mattr=+v8.1a -aarch64-neon-syntax=apple | FileCheck %s --check-prefix=CHECK-V81a-apple
+
+declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>)
+declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>)
+declare i32 @llvm.aarch64.neon.sqrdmulh.i32(i32, i32)
+declare i16 @llvm.aarch64.neon.sqrdmulh.i16(i16, i16)
+
+declare <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16>, <4 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16>, <8 x i16>)
+declare <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>)
+declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32)
+declare i16 @llvm.aarch64.neon.sqadd.i16(i16, i16)
+
+declare <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16>, <4 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16>, <8 x i16>)
+declare <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>)
+declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32)
+declare i16 @llvm.aarch64.neon.sqsub.i16(i16, i16)
+
+;-----------------------------------------------------------------------------
+; RDMA Vector
+; test for SIMDThreeSameVectorSQRDMLxHTiedHS
+
+define <4 x i16> @test_sqrdmlah_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) {
+; CHECK-LABEL: test_sqrdmlah_v4i16:
+ %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs)
+ %retval = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc, <4 x i16> %prod)
+; CHECK-V8a: sqrdmulh v1.4h, v1.4h, v2.4h
+; CHECK-V81a: sqrdmlah v0.4h, v1.4h, v2.4h
+; CHECK-V81a-apple: sqrdmlah.4h v0, v1, v2
+ ret <4 x i16> %retval
+}
+
+define <8 x i16> @test_sqrdmlah_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) {
+; CHECK-LABEL: test_sqrdmlah_v8i16:
+ %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs)
+ %retval = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %acc, <8 x i16> %prod)
+; CHECK-V8a: sqrdmulh v1.8h, v1.8h, v2.8h
+; CHECK-V81a: sqrdmlah v0.8h, v1.8h, v2.8h
+; CHECK-V81a-apple: sqrdmlah.8h v0, v1, v2
+ ret <8 x i16> %retval
+}
+
+define <2 x i32> @test_sqrdmlah_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) {
+; CHECK-LABEL: test_sqrdmlah_v2i32:
+ %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs)
+ %retval = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %acc, <2 x i32> %prod)
+; CHECK-V8a: sqrdmulh v1.2s, v1.2s, v2.2s
+; CHECK-V81a: sqrdmlah v0.2s, v1.2s, v2.2s
+; CHECK-V81a-apple: sqrdmlah.2s v0, v1, v2
+ ret <2 x i32> %retval
+}
+
+define <4 x i32> @test_sqrdmlah_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) {
+; CHECK-LABEL: test_sqrdmlah_v4i32:
+ %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs)
+ %retval = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %acc, <4 x i32> %prod)
+; CHECK-V81: sqrdmulh v1.4s, v1.4s, v2.4s
+; CHECK-V81a: sqrdmlah v0.4s, v1.4s, v2.4s
+; CHECK-V81a-apple: sqrdmlah.4s v0, v1, v2
+ ret <4 x i32> %retval
+}
+
+define <4 x i16> @test_sqrdmlsh_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) {
+; CHECK-LABEL: test_sqrdmlsh_v4i16:
+ %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs)
+ %retval = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc, <4 x i16> %prod)
+; CHECK-V8a: sqrdmulh v1.4h, v1.4h, v2.4h
+; CHECK-V81a: sqrdmlsh v0.4h, v1.4h, v2.4h
+; CHECK-V81a-apple: sqrdmlsh.4h v0, v1, v2
+ ret <4 x i16> %retval
+}
+
+define <8 x i16> @test_sqrdmlsh_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) {
+; CHECK-LABEL: test_sqrdmlsh_v8i16:
+ %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs)
+ %retval = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc, <8 x i16> %prod)
+; CHECK-V8a: sqrdmulh v1.8h, v1.8h, v2.8h
+; CHECK-V81a: sqrdmlsh v0.8h, v1.8h, v2.8h
+; CHECK-V81a-apple: sqrdmlsh.8h v0, v1, v2
+ ret <8 x i16> %retval
+}
+
+define <2 x i32> @test_sqrdmlsh_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) {
+; CHECK-LABEL: test_sqrdmlsh_v2i32:
+ %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs)
+ %retval = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %acc, <2 x i32> %prod)
+; CHECK-V8a: sqrdmulh v1.2s, v1.2s, v2.2s
+; CHECK-V81a: sqrdmlsh v0.2s, v1.2s, v2.2s
+; CHECK-V81a-apple: sqrdmlsh.2s v0, v1, v2
+ ret <2 x i32> %retval
+}
+
+define <4 x i32> @test_sqrdmlsh_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) {
+; CHECK-LABEL: test_sqrdmlsh_v4i32:
+ %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs)
+ %retval = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %acc, <4 x i32> %prod)
+; CHECK-V8a: sqrdmulh v1.4s, v1.4s, v2.4s
+; CHECK-V81a: sqrdmlsh v0.4s, v1.4s, v2.4s
+; CHECK-V81a-apple: sqrdmlsh.4s v0, v1, v2
+ ret <4 x i32> %retval
+}
+
+;-----------------------------------------------------------------------------
+; RDMA Vector, by element
+; tests for vXiYY_indexed in SIMDIndexedSQRDMLxHSDTied
+
+define <4 x i16> @test_sqrdmlah_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) {
+; CHECK-LABEL: test_sqrdmlah_lane_s16:
+entry:
+ %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
+ %retval = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc, <4 x i16> %prod)
+; CHECK-V8a : sqrdmulh v1.4h, v1.4h, v2.h[3]
+; CHECK-V81a: sqrdmlah v0.4h, v1.4h, v2.h[3]
+; CHECK-V81a-apple: sqrdmlah.4h v0, v1, v2[3]
+ ret <4 x i16> %retval
+}
+
+define <8 x i16> @test_sqrdmlahq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <8 x i16> %v) {
+; CHECK-LABEL: test_sqrdmlahq_lane_s16:
+entry:
+ %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+ %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
+ %retval = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %acc, <8 x i16> %prod)
+; CHECK-V8a: sqrdmulh v1.8h, v1.8h, v2.h[2]
+; CHECK-V81a: sqrdmlah v0.8h, v1.8h, v2.h[2]
+; CHECK-V81a-apple: sqrdmlah.8h v0, v1, v2[2]
+ ret <8 x i16> %retval
+}
+
+define <2 x i32> @test_sqrdmlah_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) {
+; CHECK-LABEL: test_sqrdmlah_lane_s32:
+entry:
+ %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+ %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
+ %retval = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %acc, <2 x i32> %prod)
+; CHECK-V8a: sqrdmulh v1.2s, v1.2s, v2.s[1]
+; CHECK-V81a: sqrdmlah v0.2s, v1.2s, v2.s[1]
+; CHECK-V81a-apple: sqrdmlah.2s v0, v1, v2[1]
+ ret <2 x i32> %retval
+}
+
+define <4 x i32> @test_sqrdmlahq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <4 x i32> %v) {
+; CHECK-LABEL: test_sqrdmlahq_lane_s32:
+entry:
+ %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
+ %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
+ %retval = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %acc, <4 x i32> %prod)
+; CHECK-V8a: sqrdmulh v1.4s, v1.4s, v2.s[0]
+; CHECK-V81a: sqrdmlah v0.4s, v1.4s, v2.s[0]
+; CHECK-V81a-apple: sqrdmlah.4s v0, v1, v2[0]
+ ret <4 x i32> %retval
+}
+
+define <4 x i16> @test_sqrdmlsh_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) {
+; CHECK-LABEL: test_sqrdmlsh_lane_s16:
+entry:
+ %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
+ %retval = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc, <4 x i16> %prod)
+; CHECK-V8a: sqrdmulh v1.4h, v1.4h, v2.h[3]
+; CHECK-V81a: sqrdmlsh v0.4h, v1.4h, v2.h[3]
+; CHECK-V81a-apple: sqrdmlsh.4h v0, v1, v2[3]
+ ret <4 x i16> %retval
+}
+
+define <8 x i16> @test_sqrdmlshq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <8 x i16> %v) {
+; CHECK-LABEL: test_sqrdmlshq_lane_s16:
+entry:
+ %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+ %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
+ %retval = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc, <8 x i16> %prod)
+; CHECK-V8a: sqrdmulh v1.8h, v1.8h, v2.h[2]
+; CHECK-V81a: sqrdmlsh v0.8h, v1.8h, v2.h[2]
+; CHECK-V81a-apple: sqrdmlsh.8h v0, v1, v2[2]
+ ret <8 x i16> %retval
+}
+
+define <2 x i32> @test_sqrdmlsh_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) {
+; CHECK-LABEL: test_sqrdmlsh_lane_s32:
+entry:
+ %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+ %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
+ %retval = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %acc, <2 x i32> %prod)
+; CHECK-V8a: sqrdmulh v1.2s, v1.2s, v2.s[1]
+; CHECK-V81a: sqrdmlsh v0.2s, v1.2s, v2.s[1]
+; CHECK-V81a-apple: sqrdmlsh.2s v0, v1, v2[1]
+ ret <2 x i32> %retval
+}
+
+define <4 x i32> @test_sqrdmlshq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <4 x i32> %v) {
+; CHECK-LABEL: test_sqrdmlshq_lane_s32:
+entry:
+ %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
+ %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
+ %retval = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %acc, <4 x i32> %prod)
+; CHECK-V8a: sqrdmulh v1.4s, v1.4s, v2.s[0]
+; CHECK-V81a: sqrdmlsh v0.4s, v1.4s, v2.s[0]
+; CHECK-V81a-apple: sqrdmlsh.4s v0, v1, v2[0]
+ ret <4 x i32> %retval
+}
+
+;-----------------------------------------------------------------------------
+; RDMA Vector, by element, extracted
+; i16 tests are for vXi16_indexed in SIMDIndexedSQRDMLxHSDTied, with IR in ACLE style
+; i32 tests are for "def : Pat" in SIMDIndexedSQRDMLxHSDTied
+
+define i16 @test_sqrdmlah_extracted_lane_s16(i16 %acc,<4 x i16> %x, <4 x i16> %v) {
+; CHECK-LABEL: test_sqrdmlah_extracted_lane_s16:
+entry:
+ %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1,i32 1,i32 1,i32 1>
+ %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
+ %acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0
+ %retval_vec = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod)
+ %retval = extractelement <4 x i16> %retval_vec, i64 0
+; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4h, v0.4h, v1.h[1]
+; CHECK-V81a: sqrdmlah {{v[2-9]+}}.4h, v0.4h, v1.h[1]
+; CHECK-V81a-apple: sqrdmlah.4h {{v[2-9]+}}, v0, v1[1]
+ ret i16 %retval
+}
+
+define i16 @test_sqrdmlahq_extracted_lane_s16(i16 %acc,<8 x i16> %x, <8 x i16> %v) {
+; CHECK-LABEL: test_sqrdmlahq_extracted_lane_s16:
+entry:
+ %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 1,i32 1,i32 1,i32 1, i32 1,i32 1,i32 1,i32 1>
+ %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
+ %acc_vec = insertelement <8 x i16> undef, i16 %acc, i64 0
+ %retval_vec = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %acc_vec, <8 x i16> %prod)
+ %retval = extractelement <8 x i16> %retval_vec, i64 0
+; CHECK-V8a: sqrdmulh {{v[0-9]+}}.8h, v0.8h, v1.h[1]
+; CHECK-V81a: sqrdmlah {{v[2-9]+}}.8h, v0.8h, v1.h[1]
+; CHECK-V81a-apple: sqrdmlah.8h {{v[2-9]+}}, v0, v1[1]
+ ret i16 %retval
+}
+
+define i32 @test_sqrdmlah_extracted_lane_s32(i32 %acc,<2 x i32> %x, <2 x i32> %v) {
+; CHECK-LABEL: test_sqrdmlah_extracted_lane_s32:
+entry:
+ %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+ %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
+ %extract = extractelement <2 x i32> %prod, i64 0
+ %retval = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc, i32 %extract)
+; CHECK-V8a: sqrdmulh v0.2s, v0.2s, v1.s[0]
+; CHECK-V81a: sqrdmlah v2.2s, v0.2s, v1.s[0]
+; CHECK-V81a-apple: sqrdmlah.2s v2, v0, v1[0]
+ ret i32 %retval
+}
+
+define i32 @test_sqrdmlahq_extracted_lane_s32(i32 %acc,<4 x i32> %x, <4 x i32> %v) {
+; CHECK-LABEL: test_sqrdmlahq_extracted_lane_s32:
+entry:
+ %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
+ %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
+ %extract = extractelement <4 x i32> %prod, i64 0
+ %retval = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc, i32 %extract)
+; CHECK-V8a: sqrdmulh v0.4s, v0.4s, v1.s[0]
+; CHECK-V81a: sqrdmlah v2.4s, v0.4s, v1.s[0]
+; CHECK-V81a-apple: sqrdmlah.4s v2, v0, v1[0]
+ ret i32 %retval
+}
+
+define i16 @test_sqrdmlsh_extracted_lane_s16(i16 %acc,<4 x i16> %x, <4 x i16> %v) {
+; CHECK-LABEL: test_sqrdmlsh_extracted_lane_s16:
+entry:
+ %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 1,i32 1,i32 1,i32 1>
+ %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
+ %acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0
+ %retval_vec = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod)
+ %retval = extractelement <4 x i16> %retval_vec, i64 0
+; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4h, v0.4h, v1.h[1]
+; CHECK-V81a: sqrdmlsh {{v[2-9]+}}.4h, v0.4h, v1.h[1]
+; CHECK-V81a-apple: sqrdmlsh.4h {{v[2-9]+}}, v0, v1[1]
+ ret i16 %retval
+}
+
+define i16 @test_sqrdmlshq_extracted_lane_s16(i16 %acc,<8 x i16> %x, <8 x i16> %v) {
+; CHECK-LABEL: test_sqrdmlshq_extracted_lane_s16:
+entry:
+ %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 1,i32 1,i32 1,i32 1, i32 1,i32 1,i32 1,i32 1>
+ %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
+ %acc_vec = insertelement <8 x i16> undef, i16 %acc, i64 0
+ %retval_vec = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc_vec, <8 x i16> %prod)
+ %retval = extractelement <8 x i16> %retval_vec, i64 0
+; CHECK-V8a: sqrdmulh {{v[0-9]+}}.8h, v0.8h, v1.h[1]
+; CHECK-V81a: sqrdmlsh {{v[2-9]+}}.8h, v0.8h, v1.h[1]
+; CHECK-V81a-apple: sqrdmlsh.8h {{v[2-9]+}}, v0, v1[1]
+ ret i16 %retval
+}
+
+define i32 @test_sqrdmlsh_extracted_lane_s32(i32 %acc,<2 x i32> %x, <2 x i32> %v) {
+; CHECK-LABEL: test_sqrdmlsh_extracted_lane_s32:
+entry:
+ %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer
+ %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
+ %extract = extractelement <2 x i32> %prod, i64 0
+ %retval = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc, i32 %extract)
+; CHECK-V8a: sqrdmulh v0.2s, v0.2s, v1.s[0]
+; CHECK-V81a: sqrdmlsh v2.2s, v0.2s, v1.s[0]
+; CHECK-V81a-apple: sqrdmlsh.2s v2, v0, v1[0]
+ ret i32 %retval
+}
+
+define i32 @test_sqrdmlshq_extracted_lane_s32(i32 %acc,<4 x i32> %x, <4 x i32> %v) {
+; CHECK-LABEL: test_sqrdmlshq_extracted_lane_s32:
+entry:
+ %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer
+ %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
+ %extract = extractelement <4 x i32> %prod, i64 0
+ %retval = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc, i32 %extract)
+; CHECK-V8a: sqrdmulh v0.4s, v0.4s, v1.s[0]
+; CHECK-V81a: sqrdmlsh v2.4s, v0.4s, v1.s[0]
+; CHECK-V81a-apple: sqrdmlsh.4s v2, v0, v1[0]
+ ret i32 %retval
+}
+
+;-----------------------------------------------------------------------------
+; RDMA Scalar
+; test for "def : Pat" near SIMDThreeScalarHSTied in AArch64InstInfo.td
+
+define i16 @test_sqrdmlah_v1i16(i16 %acc, i16 %x, i16 %y) {
+; CHECK-LABEL: test_sqrdmlah_v1i16:
+ %x_vec = insertelement <4 x i16> undef, i16 %x, i64 0
+ %y_vec = insertelement <4 x i16> undef, i16 %y, i64 0
+ %prod_vec = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x_vec, <4 x i16> %y_vec)
+ %acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0
+ %retval_vec = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod_vec)
+ %retval = extractelement <4 x i16> %retval_vec, i64 0
+; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK-V81a: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK-V81a-apple: sqrdmlah.4h {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+ ret i16 %retval
+}
+
+define i32 @test_sqrdmlah_v1i32(i32 %acc, i32 %x, i32 %y) {
+; CHECK-LABEL: test_sqrdmlah_v1i32:
+ %x_vec = insertelement <4 x i32> undef, i32 %x, i64 0
+ %y_vec = insertelement <4 x i32> undef, i32 %y, i64 0
+ %prod_vec = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x_vec, <4 x i32> %y_vec)
+ %acc_vec = insertelement <4 x i32> undef, i32 %acc, i64 0
+ %retval_vec = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %acc_vec, <4 x i32> %prod_vec)
+ %retval = extractelement <4 x i32> %retval_vec, i64 0
+; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-V81a: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-V81a-apple: sqrdmlah.4s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+ ret i32 %retval
+}
+
+
+define i16 @test_sqrdmlsh_v1i16(i16 %acc, i16 %x, i16 %y) {
+; CHECK-LABEL: test_sqrdmlsh_v1i16:
+ %x_vec = insertelement <4 x i16> undef, i16 %x, i64 0
+ %y_vec = insertelement <4 x i16> undef, i16 %y, i64 0
+ %prod_vec = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x_vec, <4 x i16> %y_vec)
+ %acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0
+ %retval_vec = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod_vec)
+ %retval = extractelement <4 x i16> %retval_vec, i64 0
+; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK-V81a: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+; CHECK-V81a-apple: sqrdmlsh.4h {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+ ret i16 %retval
+}
+
+define i32 @test_sqrdmlsh_v1i32(i32 %acc, i32 %x, i32 %y) {
+; CHECK-LABEL: test_sqrdmlsh_v1i32:
+ %x_vec = insertelement <4 x i32> undef, i32 %x, i64 0
+ %y_vec = insertelement <4 x i32> undef, i32 %y, i64 0
+ %prod_vec = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %x_vec, <4 x i32> %y_vec)
+ %acc_vec = insertelement <4 x i32> undef, i32 %acc, i64 0
+ %retval_vec = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %acc_vec, <4 x i32> %prod_vec)
+ %retval = extractelement <4 x i32> %retval_vec, i64 0
+; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-V81a: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-V81a-apple: sqrdmlsh.4s {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+ ret i32 %retval
+}
+define i32 @test_sqrdmlah_i32(i32 %acc, i32 %mhs, i32 %rhs) {
+; CHECK-LABEL: test_sqrdmlah_i32:
+ %prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs, i32 %rhs)
+ %retval = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc, i32 %prod)
+; CHECK-V8a: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-V81a: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-V81a-apple: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+ ret i32 %retval
+}
+
+define i32 @test_sqrdmlsh_i32(i32 %acc, i32 %mhs, i32 %rhs) {
+; CHECK-LABEL: test_sqrdmlsh_i32:
+ %prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs, i32 %rhs)
+ %retval = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc, i32 %prod)
+; CHECK-V8a: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-V81a: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-V81a-apple: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+ ret i32 %retval
+}
+
+;-----------------------------------------------------------------------------
+; RDMA Scalar, by element
+; i16 tests are performed via tests in above chapter, with IR in ACLE style
+; i32 tests are for i32_indexed in SIMDIndexedSQRDMLxHSDTied
+
+define i16 @test_sqrdmlah_extract_i16(i16 %acc, i16 %x, <4 x i16> %y_vec) {
+; CHECK-LABEL: test_sqrdmlah_extract_i16:
+ %shuffle = shufflevector <4 x i16> %y_vec, <4 x i16> undef, <4 x i32> <i32 1,i32 1,i32 1,i32 1>
+ %x_vec = insertelement <4 x i16> undef, i16 %x, i64 0
+ %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %x_vec, <4 x i16> %shuffle)
+ %acc_vec = insertelement <4 x i16> undef, i16 %acc, i64 0
+ %retval_vec = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %acc_vec, <4 x i16> %prod)
+ %retval = extractelement <4 x i16> %retval_vec, i32 0
+; CHECK-V8a: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, v0.h[1]
+; CHECK-V81a: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, v0.h[1]
+; CHECK-V81a-apple: sqrdmlah.4h {{v[0-9]+}}, {{v[0-9]+}}, v0[1]
+ ret i16 %retval
+}
+
+define i32 @test_sqrdmlah_extract_i32(i32 %acc, i32 %mhs, <4 x i32> %rhs) {
+; CHECK-LABEL: test_sqrdmlah_extract_i32:
+ %extract = extractelement <4 x i32> %rhs, i32 3
+ %prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs, i32 %extract)
+ %retval = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %acc, i32 %prod)
+; CHECK-V8a: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3]
+; CHECK-V81a: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3]
+; CHECK-V81a-apple: sqrdmlah.s {{s[0-9]+}}, {{s[0-9]+}}, v0[3]
+ ret i32 %retval
+}
+
+define i16 @test_sqrdmlshq_extract_i16(i16 %acc, i16 %x, <8 x i16> %y_vec) {
+; CHECK-LABEL: test_sqrdmlshq_extract_i16:
+ %shuffle = shufflevector <8 x i16> %y_vec, <8 x i16> undef, <8 x i32> <i32 1,i32 1,i32 1,i32 1,i32 1,i32 1,i32 1,i32 1>
+ %x_vec = insertelement <8 x i16> undef, i16 %x, i64 0
+ %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %x_vec, <8 x i16> %shuffle)
+ %acc_vec = insertelement <8 x i16> undef, i16 %acc, i64 0
+ %retval_vec = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %acc_vec, <8 x i16> %prod)
+ %retval = extractelement <8 x i16> %retval_vec, i32 0
+; CHECK-V8a: sqrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, v0.h[1]
+; CHECK-V81a: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, v0.h[1]
+; CHECK-V81a-apple: sqrdmlsh.8h {{v[0-9]+}}, {{v[0-9]+}}, v0[1]
+ ret i16 %retval
+}
+
+define i32 @test_sqrdmlsh_extract_i32(i32 %acc, i32 %mhs, <4 x i32> %rhs) {
+; CHECK-LABEL: test_sqrdmlsh_extract_i32:
+ %extract = extractelement <4 x i32> %rhs, i32 3
+ %prod = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %mhs, i32 %extract)
+ %retval = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %acc, i32 %prod)
+; CHECK-V8a: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3]
+; CHECK-V81a: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, v0.s[3]
+; CHECK-V81a-apple: sqrdmlsh.s {{s[0-9]+}}, {{s[0-9]+}}, v0[3]
+ ret i32 %retval
+}
diff --git a/test/CodeGen/AArch64/arm64-patchpoint-scratch-regs.ll b/test/CodeGen/AArch64/arm64-patchpoint-scratch-regs.ll
index d39722b9c8a5..2651f119412b 100644
--- a/test/CodeGen/AArch64/arm64-patchpoint-scratch-regs.ll
+++ b/test/CodeGen/AArch64/arm64-patchpoint-scratch-regs.ll
@@ -7,9 +7,9 @@
; CHECK-NEXT: Ltmp
; CHECK-NEXT: nop
define void @clobberScratch(i32* %p) {
- %v = load i32* %p
+ %v = load i32, i32* %p
tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() nounwind
- tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 5, i32 20, i8* null, i32 0, i32* %p, i32 %v)
+ tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 5, i32 20, i8* null, i32 0, i32* %p, i32 %v)
store i32 %v, i32* %p
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll
index 8f79f80ba33d..b8236c5b2479 100644
--- a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll
+++ b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll
@@ -23,9 +23,9 @@ entry:
; FAST-NEXT: movk x16, #0xbeef
; FAST-NEXT: blr x16
%resolveCall2 = inttoptr i64 281474417671919 to i8*
- %result = tail call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %resolveCall2, i32 2, i64 %p4, i64 %p2)
+ %result = tail call webkit_jscc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %resolveCall2, i32 2, i64 %p4, i64 %p2)
%resolveCall3 = inttoptr i64 244837814038255 to i8*
- tail call webkit_jscc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 20, i8* %resolveCall3, i32 2, i64 %p4, i64 %result)
+ tail call webkit_jscc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 6, i32 20, i8* %resolveCall3, i32 2, i64 %p4, i64 %result)
ret void
}
@@ -59,7 +59,7 @@ entry:
; FAST-NEXT: movk x16, #0xbeef
; FAST-NEXT: blr x16
%call = inttoptr i64 281474417671919 to i8*
- %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 20, i8* %call, i32 6, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6)
+ %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 7, i32 20, i8* %call, i32 6, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6)
ret i64 %result
}
@@ -101,7 +101,7 @@ entry:
; FAST-NEXT: movk x16, #0xbeef
; FAST-NEXT: blr x16
%call = inttoptr i64 281474417671919 to i8*
- %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 20, i8* %call, i32 10, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6, i32 undef, i32 8, i32 undef, i64 10)
+ %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 7, i32 20, i8* %call, i32 10, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6, i32 undef, i32 8, i32 undef, i64 10)
ret i64 %result
}
diff --git a/test/CodeGen/AArch64/arm64-patchpoint.ll b/test/CodeGen/AArch64/arm64-patchpoint.ll
index 278cba5d9f49..d9ec7e50ff80 100644
--- a/test/CodeGen/AArch64/arm64-patchpoint.ll
+++ b/test/CodeGen/AArch64/arm64-patchpoint.ll
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=arm64-apple-darwin -enable-misched=0 -mcpu=cyclone < %s | FileCheck %s
-; RUN: llc -mtriple=arm64-apple-darwin -enable-misched=0 -mcpu=cyclone -fast-isel -fast-isel-abort < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin -enable-misched=0 -mcpu=cyclone -fast-isel -fast-isel-abort=1 < %s | FileCheck %s
; Trivial patchpoint codegen
;
@@ -16,9 +16,9 @@ entry:
; CHECK-NEXT: blr x16
; CHECK: ret
%resolveCall2 = inttoptr i64 244837814094590 to i8*
- %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 20, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+ %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 20, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
%resolveCall3 = inttoptr i64 244837814094591 to i8*
- tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 3, i32 20, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
+ tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 3, i32 20, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
ret i64 %result
}
@@ -38,7 +38,7 @@ entry:
store i64 11, i64* %metadata
store i64 12, i64* %metadata
store i64 13, i64* %metadata
- call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
+ call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
ret void
}
@@ -50,15 +50,15 @@ define i64 @testLowerConstant(i64 %arg, i64 %tmp2, i64 %tmp10, i64* %tmp33, i64
entry:
%tmp80 = add i64 %tmp79, -16
%tmp81 = inttoptr i64 %tmp80 to i64*
- %tmp82 = load i64* %tmp81, align 8
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 8, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
- tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 15, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
- %tmp83 = load i64* %tmp33, align 8
+ %tmp82 = load i64, i64* %tmp81, align 8
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 14, i32 8, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
+ tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 15, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
+ %tmp83 = load i64, i64* %tmp33, align 8
%tmp84 = add i64 %tmp83, -24
%tmp85 = inttoptr i64 %tmp84 to i64*
- %tmp86 = load i64* %tmp85, align 8
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 17, i32 8, i64 %arg, i64 %tmp10, i64 %tmp86)
- tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 18, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
+ %tmp86 = load i64, i64* %tmp85, align 8
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 17, i32 8, i64 %arg, i64 %tmp10, i64 %tmp86)
+ tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 18, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
ret i64 10
}
@@ -74,7 +74,7 @@ entry:
; CHECK-NEXT: nop
; CHECK-NEXT: ldp
; CHECK-NEXT: ret
- %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* null, i32 2, i64 %p1, i64 %p2)
+ %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* null, i32 2, i64 %p1, i64 %p2)
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-pic-local-symbol.ll b/test/CodeGen/AArch64/arm64-pic-local-symbol.ll
index 627e741fc32d..dae243e8da2c 100644
--- a/test/CodeGen/AArch64/arm64-pic-local-symbol.ll
+++ b/test/CodeGen/AArch64/arm64-pic-local-symbol.ll
@@ -7,7 +7,7 @@ define i32 @get() {
; CHECK: get:
; CHECK: adrp x{{[0-9]+}}, a
; CHECK-NEXT: ldr w{{[0-9]+}}, [x{{[0-9]}}, :lo12:a]
- %res = load i32* @a, align 4
+ %res = load i32, i32* @a, align 4
ret i32 %res
}
@@ -15,7 +15,7 @@ define void @foo() nounwind {
; CHECK: foo:
; CHECK: adrp x{{[0-9]}}, .L.str
; CHECK-NEXT: add x{{[0-9]}}, x{{[0-9]}}, :lo12:.L.str
- tail call void @bar(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0))
+ tail call void @bar(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0))
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-platform-reg.ll b/test/CodeGen/AArch64/arm64-platform-reg.ll
index b0d3ee0ff8a3..60672aa38486 100644
--- a/test/CodeGen/AArch64/arm64-platform-reg.ll
+++ b/test/CodeGen/AArch64/arm64-platform-reg.ll
@@ -11,7 +11,7 @@
@var = global [30 x i64] zeroinitializer
define void @keep_live() {
- %val = load volatile [30 x i64]* @var
+ %val = load volatile [30 x i64], [30 x i64]* @var
store volatile [30 x i64] %val, [30 x i64]* @var
; CHECK: ldr x18
diff --git a/test/CodeGen/AArch64/arm64-popcnt.ll b/test/CodeGen/AArch64/arm64-popcnt.ll
index 117ab3a5e05a..b0b529a13f41 100644
--- a/test/CodeGen/AArch64/arm64-popcnt.ll
+++ b/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -4,7 +4,8 @@
define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
ret i32 %cnt
-; CHECK: fmov s0, w0
+; CHECK: ubfx x{{[0-9]+}}
+; CHECK: fmov d0, x{{[0-9]+}}
; CHECK: cnt.8b v0, v0
; CHECK: uaddlv.8b h0, v0
; CHECK: fmov w0, s0
@@ -15,7 +16,24 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0x33333333
; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0xf0f0f0f
; CHECK-NONEON: mul
+}
+define i32 @cnt32_advsimd_2(<2 x i32> %x) {
+ %1 = extractelement <2 x i32> %x, i64 0
+ %2 = tail call i32 @llvm.ctpop.i32(i32 %1)
+ ret i32 %2
+; CHECK: fmov w0, s0
+; CHECK: fmov d0, x0
+; CHECK: cnt.8b v0, v0
+; CHECK: uaddlv.8b h0, v0
+; CHECK: fmov w0, s0
+; CHECK: ret
+; CHECK-NONEON-LABEL: cnt32_advsimd_2
+; CHECK-NONEON-NOT: 8b
+; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0x55555555
+; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0x33333333
+; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0xf0f0f0f
+; CHECK-NONEON: mul
}
define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
diff --git a/test/CodeGen/AArch64/arm64-prefetch.ll b/test/CodeGen/AArch64/arm64-prefetch.ll
index aac3515c4278..bdeacb231fdd 100644
--- a/test/CodeGen/AArch64/arm64-prefetch.ll
+++ b/test/CodeGen/AArch64/arm64-prefetch.ll
@@ -35,79 +35,79 @@ entry:
; CHECK: prfum pstl1keep
call void @llvm.prefetch(i8* %tmp, i32 1, i32 3, i32 1)
- %tmp1 = load i32* %j.addr, align 4, !tbaa !0
+ %tmp1 = load i32, i32* %j.addr, align 4, !tbaa !0
%add = add nsw i32 %tmp1, %i
%idxprom = sext i32 %add to i64
- %tmp2 = load i32** @a, align 8, !tbaa !3
- %arrayidx = getelementptr inbounds i32* %tmp2, i64 %idxprom
+ %tmp2 = load i32*, i32** @a, align 8, !tbaa !3
+ %arrayidx = getelementptr inbounds i32, i32* %tmp2, i64 %idxprom
%tmp3 = bitcast i32* %arrayidx to i8*
; CHECK: prfm pldl1strm
call void @llvm.prefetch(i8* %tmp3, i32 0, i32 0, i32 1)
- %tmp4 = load i32** @a, align 8, !tbaa !3
- %arrayidx3 = getelementptr inbounds i32* %tmp4, i64 %idxprom
+ %tmp4 = load i32*, i32** @a, align 8, !tbaa !3
+ %arrayidx3 = getelementptr inbounds i32, i32* %tmp4, i64 %idxprom
%tmp5 = bitcast i32* %arrayidx3 to i8*
; CHECK: prfm pldl3keep
call void @llvm.prefetch(i8* %tmp5, i32 0, i32 1, i32 1)
- %tmp6 = load i32** @a, align 8, !tbaa !3
- %arrayidx6 = getelementptr inbounds i32* %tmp6, i64 %idxprom
+ %tmp6 = load i32*, i32** @a, align 8, !tbaa !3
+ %arrayidx6 = getelementptr inbounds i32, i32* %tmp6, i64 %idxprom
%tmp7 = bitcast i32* %arrayidx6 to i8*
; CHECK: prfm pldl2keep
call void @llvm.prefetch(i8* %tmp7, i32 0, i32 2, i32 1)
- %tmp8 = load i32** @a, align 8, !tbaa !3
- %arrayidx9 = getelementptr inbounds i32* %tmp8, i64 %idxprom
+ %tmp8 = load i32*, i32** @a, align 8, !tbaa !3
+ %arrayidx9 = getelementptr inbounds i32, i32* %tmp8, i64 %idxprom
%tmp9 = bitcast i32* %arrayidx9 to i8*
; CHECK: prfm pldl1keep
call void @llvm.prefetch(i8* %tmp9, i32 0, i32 3, i32 1)
- %tmp10 = load i32** @a, align 8, !tbaa !3
- %arrayidx12 = getelementptr inbounds i32* %tmp10, i64 %idxprom
+ %tmp10 = load i32*, i32** @a, align 8, !tbaa !3
+ %arrayidx12 = getelementptr inbounds i32, i32* %tmp10, i64 %idxprom
%tmp11 = bitcast i32* %arrayidx12 to i8*
; CHECK: prfm plil1strm
call void @llvm.prefetch(i8* %tmp11, i32 0, i32 0, i32 0)
- %tmp12 = load i32** @a, align 8, !tbaa !3
- %arrayidx15 = getelementptr inbounds i32* %tmp12, i64 %idxprom
+ %tmp12 = load i32*, i32** @a, align 8, !tbaa !3
+ %arrayidx15 = getelementptr inbounds i32, i32* %tmp12, i64 %idxprom
%tmp13 = bitcast i32* %arrayidx3 to i8*
; CHECK: prfm plil3keep
call void @llvm.prefetch(i8* %tmp13, i32 0, i32 1, i32 0)
- %tmp14 = load i32** @a, align 8, !tbaa !3
- %arrayidx18 = getelementptr inbounds i32* %tmp14, i64 %idxprom
+ %tmp14 = load i32*, i32** @a, align 8, !tbaa !3
+ %arrayidx18 = getelementptr inbounds i32, i32* %tmp14, i64 %idxprom
%tmp15 = bitcast i32* %arrayidx6 to i8*
; CHECK: prfm plil2keep
call void @llvm.prefetch(i8* %tmp15, i32 0, i32 2, i32 0)
- %tmp16 = load i32** @a, align 8, !tbaa !3
- %arrayidx21 = getelementptr inbounds i32* %tmp16, i64 %idxprom
+ %tmp16 = load i32*, i32** @a, align 8, !tbaa !3
+ %arrayidx21 = getelementptr inbounds i32, i32* %tmp16, i64 %idxprom
%tmp17 = bitcast i32* %arrayidx9 to i8*
; CHECK: prfm plil1keep
call void @llvm.prefetch(i8* %tmp17, i32 0, i32 3, i32 0)
- %tmp18 = load i32** @a, align 8, !tbaa !3
- %arrayidx24 = getelementptr inbounds i32* %tmp18, i64 %idxprom
+ %tmp18 = load i32*, i32** @a, align 8, !tbaa !3
+ %arrayidx24 = getelementptr inbounds i32, i32* %tmp18, i64 %idxprom
%tmp19 = bitcast i32* %arrayidx12 to i8*
; CHECK: prfm pstl1strm
call void @llvm.prefetch(i8* %tmp19, i32 1, i32 0, i32 1)
- %tmp20 = load i32** @a, align 8, !tbaa !3
- %arrayidx27 = getelementptr inbounds i32* %tmp20, i64 %idxprom
+ %tmp20 = load i32*, i32** @a, align 8, !tbaa !3
+ %arrayidx27 = getelementptr inbounds i32, i32* %tmp20, i64 %idxprom
%tmp21 = bitcast i32* %arrayidx15 to i8*
; CHECK: prfm pstl3keep
call void @llvm.prefetch(i8* %tmp21, i32 1, i32 1, i32 1)
- %tmp22 = load i32** @a, align 8, !tbaa !3
- %arrayidx30 = getelementptr inbounds i32* %tmp22, i64 %idxprom
+ %tmp22 = load i32*, i32** @a, align 8, !tbaa !3
+ %arrayidx30 = getelementptr inbounds i32, i32* %tmp22, i64 %idxprom
%tmp23 = bitcast i32* %arrayidx18 to i8*
; CHECK: prfm pstl2keep
call void @llvm.prefetch(i8* %tmp23, i32 1, i32 2, i32 1)
- %tmp24 = load i32** @a, align 8, !tbaa !3
- %arrayidx33 = getelementptr inbounds i32* %tmp24, i64 %idxprom
+ %tmp24 = load i32*, i32** @a, align 8, !tbaa !3
+ %arrayidx33 = getelementptr inbounds i32, i32* %tmp24, i64 %idxprom
%tmp25 = bitcast i32* %arrayidx21 to i8*
; CHECK: prfm pstl1keep
diff --git a/test/CodeGen/AArch64/arm64-promote-const.ll b/test/CodeGen/AArch64/arm64-promote-const.ll
index 94fd8e33b892..0be2f5c08c00 100644
--- a/test/CodeGen/AArch64/arm64-promote-const.ll
+++ b/test/CodeGen/AArch64/arm64-promote-const.ll
@@ -40,7 +40,7 @@ define <16 x i8> @test2(<16 x i8> %arg) {
entry:
; PROMOTED-LABEL: test2:
; In stress mode, constant vector are promoted
-; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1:__PromotedConst[0-9]+]]@PAGE
+; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1:__PromotedConst.[0-9]+]]@PAGE
; PROMOTED: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTV1]]@PAGEOFF]
; Destination register is defined by ABI
; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]]
@@ -63,49 +63,23 @@ entry:
ret <16 x i8> %add.i9
}
-; Two different uses of the sane constant in two different basic blocks,
+; Two different uses of the same constant in two different basic blocks,
; one dominates the other
define <16 x i8> @test3(<16 x i8> %arg, i32 %path) {
; PROMOTED-LABEL: test3:
; In stress mode, constant vector are promoted
; Since, the constant is the same as the previous function,
; the same address must be used
-; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1]]@PAGE
-; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTV1]]@PAGEOFF]
-; Destination register is defined by ABI
-; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]]
-; PROMOTED-NEXT: cbnz w0, [[LABEL:LBB.*]]
-; Next BB
-; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV2:__PromotedConst[0-9]+]]@PAGE
-; PROMOTED-NEXT: ldr q[[REGNUM]], {{\[}}[[PAGEADDR]], [[CSTV2]]@PAGEOFF]
-; Next BB
-; PROMOTED-NEXT: [[LABEL]]:
-; PROMOTED-NEXT: mul.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
-; PROMOTED-NEXT: add.16b v0, v0, [[DESTV]]
-; PROMOTED-NEXT: ret
+; PROMOTED: ldr
+; PROMOTED: ldr
+; PROMOTED-NOT: ldr
+; PROMOTED: ret
; REGULAR-LABEL: test3:
-; Regular mode does not elimitate common sub expression by its own.
-; In other words, the same loads appears several times.
-; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL1:lCP.*]]@PAGE
-; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL1]]@PAGEOFF]
-; Destination register is defined by ABI
-; REGULAR-NEXT: add.16b v0, v0, v[[REGNUM]]
-; REGULAR-NEXT: cbz w0, [[LABELelse:LBB.*]]
-; Next BB
-; Redundant load
-; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL1]]@PAGE
-; REGULAR-NEXT: ldr q[[REGNUM]], {{\[}}[[PAGEADDR]], [[CSTLABEL1]]@PAGEOFF]
-; REGULAR-NEXT: b [[LABELend:LBB.*]]
-; Next BB
-; REGULAR-NEXT: [[LABELelse]]
-; REGULAR-NEXT: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL2:lCP.*]]@PAGE
-; REGULAR-NEXT: ldr q[[REGNUM]], {{\[}}[[PAGEADDR]], [[CSTLABEL2]]@PAGEOFF]
-; Next BB
-; REGULAR-NEXT: [[LABELend]]:
-; REGULAR-NEXT: mul.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
-; REGULAR-NEXT: add.16b v0, v0, [[DESTV]]
-; REGULAR-NEXT: ret
+; REGULAR: ldr
+; REGULAR: ldr
+; REGULAR-NOT: ldr
+; REGULAR: ret
entry:
%add.i = add <16 x i8> %arg, <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>
%tobool = icmp eq i32 %path, 0
@@ -132,33 +106,14 @@ define <16 x i8> @test4(<16 x i8> %arg, i32 %path) {
; In stress mode, constant vector are promoted
; Since, the constant is the same as the previous function,
; the same address must be used
-; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1]]@PAGE
-; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTV1]]@PAGEOFF]
-; Destination register is defined by ABI
-; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]]
-; PROMOTED-NEXT: cbz w0, [[LABEL:LBB.*]]
-; Next BB
-; PROMOTED: mul.16b v0, v0, v[[REGNUM]]
-; Next BB
-; PROMOTED-NEXT: [[LABEL]]:
-; PROMOTED-NEXT: ret
-
+; PROMOTED: ldr
+; PROMOTED-NOT: ldr
+; PROMOTED: ret
; REGULAR-LABEL: test4:
-; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL3:lCP.*]]@PAGE
-; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL3]]@PAGEOFF]
-; Destination register is defined by ABI
-; REGULAR-NEXT: add.16b v0, v0, v[[REGNUM]]
-; REGULAR-NEXT: cbz w0, [[LABEL:LBB.*]]
-; Next BB
-; Redundant expression
-; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL3]]@PAGE
-; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL3]]@PAGEOFF]
-; Destination register is defined by ABI
-; REGULAR-NEXT: mul.16b v0, v0, v[[REGNUM]]
-; Next BB
-; REGULAR-NEXT: [[LABEL]]:
-; REGULAR-NEXT: ret
+; REGULAR: ldr
+; REGULAR-NOT: ldr
+; REGULAR: ret
entry:
%add.i = add <16 x i8> %arg, <i8 -40, i8 -93, i8 -118, i8 -99, i8 -75, i8 -105, i8 74, i8 -110, i8 62, i8 -115, i8 -119, i8 -120, i8 34, i8 -124, i8 0, i8 -128>
%tobool = icmp eq i32 %path, 0
@@ -180,39 +135,13 @@ define <16 x i8> @test5(<16 x i8> %arg, i32 %path) {
; In stress mode, constant vector are promoted
; Since, the constant is the same as the previous function,
; the same address must be used
-; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1]]@PAGE
-; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTV1]]@PAGEOFF]
-; PROMOTED-NEXT: cbz w0, [[LABEL:LBB.*]]
-; Next BB
-; PROMOTED: add.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
-; PROMOTED-NEXT: mul.16b v[[REGNUM]], [[DESTV]], v[[REGNUM]]
-; Next BB
-; PROMOTED-NEXT: [[LABEL]]:
-; PROMOTED-NEXT: mul.16b [[TMP1:v[0-9]+]], v[[REGNUM]], v[[REGNUM]]
-; PROMOTED-NEXT: mul.16b [[TMP2:v[0-9]+]], [[TMP1]], [[TMP1]]
-; PROMOTED-NEXT: mul.16b [[TMP3:v[0-9]+]], [[TMP2]], [[TMP2]]
-; PROMOTED-NEXT: mul.16b v0, [[TMP3]], [[TMP3]]
-; PROMOTED-NEXT: ret
+; PROMOTED: ldr
+; PROMOTED-NOT: ldr
+; PROMOTED: ret
; REGULAR-LABEL: test5:
-; REGULAR: cbz w0, [[LABELelse:LBB.*]]
-; Next BB
-; REGULAR: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
-; REGULAR-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
-; REGULAR-NEXT: add.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
-; REGULAR-NEXT: mul.16b v[[DESTREGNUM:[0-9]+]], [[DESTV]], v[[REGNUM]]
-; REGULAR-NEXT: b [[LABELend:LBB.*]]
-; Next BB
-; REGULAR-NEXT: [[LABELelse]]
-; REGULAR-NEXT: adrp [[PAGEADDR:x[0-9]+]], [[CSTLABEL:lCP.*]]@PAGE
-; REGULAR-NEXT: ldr q[[DESTREGNUM]], {{\[}}[[PAGEADDR]], [[CSTLABEL]]@PAGEOFF]
-; Next BB
-; REGULAR-NEXT: [[LABELend]]:
-; REGULAR-NEXT: mul.16b [[TMP1:v[0-9]+]], v[[DESTREGNUM]], v[[DESTREGNUM]]
-; REGULAR-NEXT: mul.16b [[TMP2:v[0-9]+]], [[TMP1]], [[TMP1]]
-; REGULAR-NEXT: mul.16b [[TMP3:v[0-9]+]], [[TMP2]], [[TMP2]]
-; REGULAR-NEXT: mul.16b v0, [[TMP3]], [[TMP3]]
-; REGULAR-NEXT: ret
+; REGULAR: ldr
+; REGULAR: ret
entry:
%tobool = icmp eq i32 %path, 0
br i1 %tobool, label %if.end, label %if.then
diff --git a/test/CodeGen/AArch64/arm64-redzone.ll b/test/CodeGen/AArch64/arm64-redzone.ll
index 9b0c384c4d9e..837249cb26c6 100644
--- a/test/CodeGen/AArch64/arm64-redzone.ll
+++ b/test/CodeGen/AArch64/arm64-redzone.ll
@@ -9,10 +9,10 @@ define i32 @foo(i32 %a, i32 %b) nounwind ssp {
%x = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
store i32 %b, i32* %b.addr, align 4
- %tmp = load i32* %a.addr, align 4
- %tmp1 = load i32* %b.addr, align 4
+ %tmp = load i32, i32* %a.addr, align 4
+ %tmp1 = load i32, i32* %b.addr, align 4
%add = add nsw i32 %tmp, %tmp1
store i32 %add, i32* %x, align 4
- %tmp2 = load i32* %x, align 4
+ %tmp2 = load i32, i32* %x, align 4
ret i32 %tmp2
}
diff --git a/test/CodeGen/AArch64/arm64-register-offset-addressing.ll b/test/CodeGen/AArch64/arm64-register-offset-addressing.ll
index 045712bea6ac..7078ffca5218 100644
--- a/test/CodeGen/AArch64/arm64-register-offset-addressing.ll
+++ b/test/CodeGen/AArch64/arm64-register-offset-addressing.ll
@@ -5,8 +5,8 @@ define i8 @test_64bit_add(i16* %a, i64 %b) {
; CHECK: lsl [[REG:x[0-9]+]], x1, #1
; CHECK: ldrb w0, [x0, [[REG]]]
; CHECK: ret
- %tmp1 = getelementptr inbounds i16* %a, i64 %b
- %tmp2 = load i16* %tmp1
+ %tmp1 = getelementptr inbounds i16, i16* %a, i64 %b
+ %tmp2 = load i16, i16* %tmp1
%tmp3 = trunc i16 %tmp2 to i8
ret i8 %tmp3
}
@@ -18,8 +18,8 @@ define void @ldst_8bit(i8* %base, i64 %offset) minsize {
%off32.sext.tmp = shl i64 %offset, 32
%off32.sext = ashr i64 %off32.sext.tmp, 32
- %addr8_sxtw = getelementptr i8* %base, i64 %off32.sext
- %val8_sxtw = load volatile i8* %addr8_sxtw
+ %addr8_sxtw = getelementptr i8, i8* %base, i64 %off32.sext
+ %val8_sxtw = load volatile i8, i8* %addr8_sxtw
%val32_signed = sext i8 %val8_sxtw to i32
store volatile i32 %val32_signed, i32* @var_32bit
; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
@@ -28,7 +28,7 @@ define void @ldst_8bit(i8* %base, i64 %offset) minsize {
%offset_uxtw = and i64 %offset, 4294967295
%addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
%addr_uxtw = inttoptr i64 %addrint1_uxtw to i8*
- %val8_uxtw = load volatile i8* %addr_uxtw
+ %val8_uxtw = load volatile i8, i8* %addr_uxtw
%newval8 = add i8 %val8_uxtw, 1
store volatile i8 %newval8, i8* @var_8bit
; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
@@ -44,7 +44,7 @@ define void @ldst_16bit(i16* %base, i64 %offset) minsize {
%offset_uxtw = and i64 %offset, 4294967295
%addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
%addr_uxtw = inttoptr i64 %addrint1_uxtw to i16*
- %val8_uxtw = load volatile i16* %addr_uxtw
+ %val8_uxtw = load volatile i16, i16* %addr_uxtw
%newval8 = add i16 %val8_uxtw, 1
store volatile i16 %newval8, i16* @var_16bit
; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
@@ -54,7 +54,7 @@ define void @ldst_16bit(i16* %base, i64 %offset) minsize {
%offset_sxtw = ashr i64 %offset_sxtw.tmp, 32
%addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
%addr_sxtw = inttoptr i64 %addrint_sxtw to i16*
- %val16_sxtw = load volatile i16* %addr_sxtw
+ %val16_sxtw = load volatile i16, i16* %addr_sxtw
%val64_signed = sext i16 %val16_sxtw to i64
store volatile i64 %val64_signed, i64* @var_64bit
; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
@@ -65,7 +65,7 @@ define void @ldst_16bit(i16* %base, i64 %offset) minsize {
%offset2_uxtwN = shl i64 %offset_uxtwN, 1
%addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
%addr_uxtwN = inttoptr i64 %addrint_uxtwN to i16*
- %val32 = load volatile i32* @var_32bit
+ %val32 = load volatile i32, i32* @var_32bit
%val16_trunc32 = trunc i32 %val32 to i16
store volatile i16 %val16_trunc32, i16* %addr_uxtwN
; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #1]
@@ -79,7 +79,7 @@ define void @ldst_32bit(i32* %base, i64 %offset) minsize {
%offset_uxtw = and i64 %offset, 4294967295
%addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
%addr_uxtw = inttoptr i64 %addrint1_uxtw to i32*
- %val32_uxtw = load volatile i32* %addr_uxtw
+ %val32_uxtw = load volatile i32, i32* %addr_uxtw
%newval32 = add i32 %val32_uxtw, 1
store volatile i32 %newval32, i32* @var_32bit
; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
@@ -89,7 +89,7 @@ define void @ldst_32bit(i32* %base, i64 %offset) minsize {
%offset_sxtw = ashr i64 %offset_sxtw.tmp, 32
%addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
%addr_sxtw = inttoptr i64 %addrint_sxtw to i32*
- %val32_sxtw = load volatile i32* %addr_sxtw
+ %val32_sxtw = load volatile i32, i32* %addr_sxtw
%val64_signed = sext i32 %val32_sxtw to i64
store volatile i64 %val64_signed, i64* @var_64bit
; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
@@ -100,7 +100,7 @@ define void @ldst_32bit(i32* %base, i64 %offset) minsize {
%offset2_uxtwN = shl i64 %offset_uxtwN, 2
%addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
%addr_uxtwN = inttoptr i64 %addrint_uxtwN to i32*
- %val32 = load volatile i32* @var_32bit
+ %val32 = load volatile i32, i32* @var_32bit
store volatile i32 %val32, i32* %addr_uxtwN
; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #2]
ret void
@@ -113,7 +113,7 @@ define void @ldst_64bit(i64* %base, i64 %offset) minsize {
%offset_uxtw = and i64 %offset, 4294967295
%addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
%addr_uxtw = inttoptr i64 %addrint1_uxtw to i64*
- %val64_uxtw = load volatile i64* %addr_uxtw
+ %val64_uxtw = load volatile i64, i64* %addr_uxtw
%newval8 = add i64 %val64_uxtw, 1
store volatile i64 %newval8, i64* @var_64bit
; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
@@ -123,7 +123,7 @@ define void @ldst_64bit(i64* %base, i64 %offset) minsize {
%offset_sxtw = ashr i64 %offset_sxtw.tmp, 32
%addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
%addr_sxtw = inttoptr i64 %addrint_sxtw to i64*
- %val64_sxtw = load volatile i64* %addr_sxtw
+ %val64_sxtw = load volatile i64, i64* %addr_sxtw
store volatile i64 %val64_sxtw, i64* @var_64bit
; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
@@ -133,7 +133,7 @@ define void @ldst_64bit(i64* %base, i64 %offset) minsize {
%offset2_uxtwN = shl i64 %offset_uxtwN, 3
%addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
%addr_uxtwN = inttoptr i64 %addrint_uxtwN to i64*
- %val64 = load volatile i64* @var_64bit
+ %val64 = load volatile i64, i64* @var_64bit
store volatile i64 %val64, i64* %addr_uxtwN
; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #3]
ret void
diff --git a/test/CodeGen/AArch64/arm64-regress-interphase-shift.ll b/test/CodeGen/AArch64/arm64-regress-interphase-shift.ll
index fec89334801e..d376aaf56817 100644
--- a/test/CodeGen/AArch64/arm64-regress-interphase-shift.ll
+++ b/test/CodeGen/AArch64/arm64-regress-interphase-shift.ll
@@ -11,7 +11,7 @@
define void @foo(i64* nocapture %d) {
; CHECK-LABEL: foo:
; CHECK: rorv
- %tmp = load i64* undef, align 8
+ %tmp = load i64, i64* undef, align 8
%sub397 = sub i64 0, %tmp
%and398 = and i64 %sub397, 4294967295
%shr404 = lshr i64 %and398, 0
diff --git a/test/CodeGen/AArch64/arm64-return-vector.ll b/test/CodeGen/AArch64/arm64-return-vector.ll
index 9457d8bc6d07..3262c91c04df 100644
--- a/test/CodeGen/AArch64/arm64-return-vector.ll
+++ b/test/CodeGen/AArch64/arm64-return-vector.ll
@@ -6,6 +6,6 @@ define <2 x double> @test(<2 x double>* %p) nounwind {
; CHECK: test
; CHECK: ldr q0, [x0]
; CHECK: ret
- %tmp1 = load <2 x double>* %p, align 16
+ %tmp1 = load <2 x double>, <2 x double>* %p, align 16
ret <2 x double> %tmp1
}
diff --git a/test/CodeGen/AArch64/arm64-rev.ll b/test/CodeGen/AArch64/arm64-rev.ll
index 30d9f4f3e670..74356d76d3c8 100644
--- a/test/CodeGen/AArch64/arm64-rev.ll
+++ b/test/CodeGen/AArch64/arm64-rev.ll
@@ -64,7 +64,7 @@ entry:
define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: test_vrev64D8:
;CHECK: rev64.8b
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <8 x i8> %tmp2
}
@@ -72,7 +72,7 @@ define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: test_vrev64D16:
;CHECK: rev64.4h
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x i16> %tmp2
}
@@ -80,7 +80,7 @@ define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: test_vrev64D32:
;CHECK: rev64.2s
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
ret <2 x i32> %tmp2
}
@@ -88,7 +88,7 @@ define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
;CHECK-LABEL: test_vrev64Df:
;CHECK: rev64.2s
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
ret <2 x float> %tmp2
}
@@ -96,7 +96,7 @@ define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: test_vrev64Q8:
;CHECK: rev64.16b
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
ret <16 x i8> %tmp2
}
@@ -104,7 +104,7 @@ define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: test_vrev64Q16:
;CHECK: rev64.8h
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
ret <8 x i16> %tmp2
}
@@ -112,7 +112,7 @@ define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: test_vrev64Q32:
;CHECK: rev64.4s
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
ret <4 x i32> %tmp2
}
@@ -120,7 +120,7 @@ define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
;CHECK-LABEL: test_vrev64Qf:
;CHECK: rev64.4s
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
ret <4 x float> %tmp2
}
@@ -128,7 +128,7 @@ define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: test_vrev32D8:
;CHECK: rev32.8b
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
ret <8 x i8> %tmp2
}
@@ -136,7 +136,7 @@ define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: test_vrev32D16:
;CHECK: rev32.4h
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
ret <4 x i16> %tmp2
}
@@ -144,7 +144,7 @@ define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: test_vrev32Q8:
;CHECK: rev32.16b
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
ret <16 x i8> %tmp2
}
@@ -152,7 +152,7 @@ define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: test_vrev32Q16:
;CHECK: rev32.8h
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
ret <8 x i16> %tmp2
}
@@ -160,7 +160,7 @@ define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: test_vrev16D8:
;CHECK: rev16.8b
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
ret <8 x i8> %tmp2
}
@@ -168,7 +168,7 @@ define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: test_vrev16Q8:
;CHECK: rev16.16b
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
ret <16 x i8> %tmp2
}
@@ -178,7 +178,7 @@ define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
;CHECK-LABEL: test_vrev64D8_undef:
;CHECK: rev64.8b
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <8 x i8> %tmp2
}
@@ -186,7 +186,7 @@ define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
;CHECK-LABEL: test_vrev32Q16_undef:
;CHECK: rev32.8h
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
ret <8 x i16> %tmp2
}
@@ -199,7 +199,7 @@ define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst
; CHECK: st1.h
entry:
%0 = bitcast <4 x i16>* %source to <8 x i16>*
- %tmp2 = load <8 x i16>* %0, align 4
+ %tmp2 = load <8 x i16>, <8 x i16>* %0, align 4
%tmp3 = extractelement <8 x i16> %tmp2, i32 6
%tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
%tmp9 = extractelement <8 x i16> %tmp2, i32 5
@@ -215,9 +215,9 @@ define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest
; CHECK: rev64.4s
entry:
%0 = bitcast float* %source to <4 x float>*
- %tmp2 = load <4 x float>* %0, align 4
+ %tmp2 = load <4 x float>, <4 x float>* %0, align 4
%tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
- %arrayidx8 = getelementptr inbounds <4 x float>* %dest, i32 11
+ %arrayidx8 = getelementptr inbounds <4 x float>, <4 x float>* %dest, i32 11
store <4 x float> %tmp5, <4 x float>* %arrayidx8, align 4
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-scaled_iv.ll b/test/CodeGen/AArch64/arm64-scaled_iv.ll
index 63428df9610c..24f04f44c3ea 100644
--- a/test/CodeGen/AArch64/arm64-scaled_iv.ll
+++ b/test/CodeGen/AArch64/arm64-scaled_iv.ll
@@ -17,15 +17,15 @@ for.body: ; preds = %for.body, %entry
; CHECK-NOT: phi
%indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
%tmp = add nsw i64 %indvars.iv, -1
- %arrayidx = getelementptr inbounds double* %b, i64 %tmp
- %tmp1 = load double* %arrayidx, align 8
+ %arrayidx = getelementptr inbounds double, double* %b, i64 %tmp
+ %tmp1 = load double, double* %arrayidx, align 8
; The induction variable should carry the scaling factor: 1 * 8 = 8.
-; CHECK: [[IVNEXT]] = add nuw i64 [[IV]], 8
+; CHECK: [[IVNEXT]] = add nuw nsw i64 [[IV]], 8
%indvars.iv.next = add i64 %indvars.iv, 1
- %arrayidx2 = getelementptr inbounds double* %c, i64 %indvars.iv.next
- %tmp2 = load double* %arrayidx2, align 8
+ %arrayidx2 = getelementptr inbounds double, double* %c, i64 %indvars.iv.next
+ %tmp2 = load double, double* %arrayidx2, align 8
%mul = fmul double %tmp1, %tmp2
- %arrayidx4 = getelementptr inbounds double* %a, i64 %indvars.iv
+ %arrayidx4 = getelementptr inbounds double, double* %a, i64 %indvars.iv
store double %mul, double* %arrayidx4, align 8
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
; Comparison should be 19 * 8 = 152.
diff --git a/test/CodeGen/AArch64/arm64-scvt.ll b/test/CodeGen/AArch64/arm64-scvt.ll
index 8baaf22238d3..fc64d7bfda68 100644
--- a/test/CodeGen/AArch64/arm64-scvt.ll
+++ b/test/CodeGen/AArch64/arm64-scvt.ll
@@ -7,7 +7,7 @@ entry:
; CHECK-LABEL: t1:
; CHECK: ldr s0, [x0]
; CHECK: scvtf s0, s0
- %tmp1 = load i32* %src, align 4
+ %tmp1 = load i32, i32* %src, align 4
%tmp2 = sitofp i32 %tmp1 to float
ret float %tmp2
}
@@ -17,7 +17,7 @@ entry:
; CHECK-LABEL: t2:
; CHECK: ldr s0, [x0]
; CHECK: ucvtf s0, s0
- %tmp1 = load i32* %src, align 4
+ %tmp1 = load i32, i32* %src, align 4
%tmp2 = uitofp i32 %tmp1 to float
ret float %tmp2
}
@@ -27,7 +27,7 @@ entry:
; CHECK-LABEL: t3:
; CHECK: ldr d0, [x0]
; CHECK: scvtf d0, d0
- %tmp1 = load i64* %src, align 4
+ %tmp1 = load i64, i64* %src, align 4
%tmp2 = sitofp i64 %tmp1 to double
ret double %tmp2
}
@@ -37,7 +37,7 @@ entry:
; CHECK-LABEL: t4:
; CHECK: ldr d0, [x0]
; CHECK: ucvtf d0, d0
- %tmp1 = load i64* %src, align 4
+ %tmp1 = load i64, i64* %src, align 4
%tmp2 = uitofp i64 %tmp1 to double
ret double %tmp2
}
@@ -48,7 +48,7 @@ entry:
; CHECK-LABEL: t5:
; CHECK: ldr [[REG:w[0-9]+]], [x0]
; CHECK: scvtf d0, [[REG]]
- %tmp1 = load i32* %src, align 4
+ %tmp1 = load i32, i32* %src, align 4
%tmp2 = sitofp i32 %tmp1 to double
ret double %tmp2
}
@@ -75,8 +75,8 @@ define float @fct1(i8* nocapture %sp0) {
; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i8* %sp0, i64 1
- %pix_sp0.0.copyload = load i8* %addr, align 1
+ %addr = getelementptr i8, i8* %sp0, i64 1
+ %pix_sp0.0.copyload = load i8, i8* %addr, align 1
%val = uitofp i8 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -88,8 +88,8 @@ define float @fct2(i16* nocapture %sp0) {
; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i16* %sp0, i64 1
- %pix_sp0.0.copyload = load i16* %addr, align 1
+ %addr = getelementptr i16, i16* %sp0, i64 1
+ %pix_sp0.0.copyload = load i16, i16* %addr, align 1
%val = uitofp i16 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -101,8 +101,8 @@ define float @fct3(i32* nocapture %sp0) {
; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i32* %sp0, i64 1
- %pix_sp0.0.copyload = load i32* %addr, align 1
+ %addr = getelementptr i32, i32* %sp0, i64 1
+ %pix_sp0.0.copyload = load i32, i32* %addr, align 1
%val = uitofp i32 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -115,8 +115,8 @@ define float @fct4(i64* nocapture %sp0) {
; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i64* %sp0, i64 1
- %pix_sp0.0.copyload = load i64* %addr, align 1
+ %addr = getelementptr i64, i64* %sp0, i64 1
+ %pix_sp0.0.copyload = load i64, i64* %addr, align 1
%val = uitofp i64 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -129,8 +129,8 @@ define float @fct5(i8* nocapture %sp0, i64 %offset) {
; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i8* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i8* %addr, align 1
+ %addr = getelementptr i8, i8* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i8, i8* %addr, align 1
%val = uitofp i8 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -142,8 +142,8 @@ define float @fct6(i16* nocapture %sp0, i64 %offset) {
; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i16* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i16* %addr, align 1
+ %addr = getelementptr i16, i16* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i16, i16* %addr, align 1
%val = uitofp i16 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -155,8 +155,8 @@ define float @fct7(i32* nocapture %sp0, i64 %offset) {
; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i32* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i32* %addr, align 1
+ %addr = getelementptr i32, i32* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i32, i32* %addr, align 1
%val = uitofp i32 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -169,8 +169,8 @@ define float @fct8(i64* nocapture %sp0, i64 %offset) {
; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i64* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i64* %addr, align 1
+ %addr = getelementptr i64, i64* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i64, i64* %addr, align 1
%val = uitofp i64 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -184,8 +184,8 @@ define double @fct9(i8* nocapture %sp0) {
; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i8* %sp0, i64 1
- %pix_sp0.0.copyload = load i8* %addr, align 1
+ %addr = getelementptr i8, i8* %sp0, i64 1
+ %pix_sp0.0.copyload = load i8, i8* %addr, align 1
%val = uitofp i8 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -197,8 +197,8 @@ define double @fct10(i16* nocapture %sp0) {
; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i16* %sp0, i64 1
- %pix_sp0.0.copyload = load i16* %addr, align 1
+ %addr = getelementptr i16, i16* %sp0, i64 1
+ %pix_sp0.0.copyload = load i16, i16* %addr, align 1
%val = uitofp i16 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -210,8 +210,8 @@ define double @fct11(i32* nocapture %sp0) {
; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i32* %sp0, i64 1
- %pix_sp0.0.copyload = load i32* %addr, align 1
+ %addr = getelementptr i32, i32* %sp0, i64 1
+ %pix_sp0.0.copyload = load i32, i32* %addr, align 1
%val = uitofp i32 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -223,8 +223,8 @@ define double @fct12(i64* nocapture %sp0) {
; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i64* %sp0, i64 1
- %pix_sp0.0.copyload = load i64* %addr, align 1
+ %addr = getelementptr i64, i64* %sp0, i64 1
+ %pix_sp0.0.copyload = load i64, i64* %addr, align 1
%val = uitofp i64 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -237,8 +237,8 @@ define double @fct13(i8* nocapture %sp0, i64 %offset) {
; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i8* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i8* %addr, align 1
+ %addr = getelementptr i8, i8* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i8, i8* %addr, align 1
%val = uitofp i8 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -250,8 +250,8 @@ define double @fct14(i16* nocapture %sp0, i64 %offset) {
; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i16* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i16* %addr, align 1
+ %addr = getelementptr i16, i16* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i16, i16* %addr, align 1
%val = uitofp i16 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -263,8 +263,8 @@ define double @fct15(i32* nocapture %sp0, i64 %offset) {
; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i32* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i32* %addr, align 1
+ %addr = getelementptr i32, i32* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i32, i32* %addr, align 1
%val = uitofp i32 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -276,8 +276,8 @@ define double @fct16(i64* nocapture %sp0, i64 %offset) {
; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i64* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i64* %addr, align 1
+ %addr = getelementptr i64, i64* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i64, i64* %addr, align 1
%val = uitofp i64 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -293,7 +293,7 @@ entry:
%bitcast = ptrtoint i8* %sp0 to i64
%add = add i64 %bitcast, -1
%addr = inttoptr i64 %add to i8*
- %pix_sp0.0.copyload = load i8* %addr, align 1
+ %pix_sp0.0.copyload = load i8, i8* %addr, align 1
%val = uitofp i8 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -307,7 +307,7 @@ define float @fct18(i16* nocapture %sp0) {
%bitcast = ptrtoint i16* %sp0 to i64
%add = add i64 %bitcast, 1
%addr = inttoptr i64 %add to i16*
- %pix_sp0.0.copyload = load i16* %addr, align 1
+ %pix_sp0.0.copyload = load i16, i16* %addr, align 1
%val = uitofp i16 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -321,7 +321,7 @@ define float @fct19(i32* nocapture %sp0) {
%bitcast = ptrtoint i32* %sp0 to i64
%add = add i64 %bitcast, 1
%addr = inttoptr i64 %add to i32*
- %pix_sp0.0.copyload = load i32* %addr, align 1
+ %pix_sp0.0.copyload = load i32, i32* %addr, align 1
%val = uitofp i32 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -336,7 +336,7 @@ define float @fct20(i64* nocapture %sp0) {
%bitcast = ptrtoint i64* %sp0 to i64
%add = add i64 %bitcast, 1
%addr = inttoptr i64 %add to i64*
- %pix_sp0.0.copyload = load i64* %addr, align 1
+ %pix_sp0.0.copyload = load i64, i64* %addr, align 1
%val = uitofp i64 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -353,7 +353,7 @@ entry:
%bitcast = ptrtoint i8* %sp0 to i64
%add = add i64 %bitcast, -1
%addr = inttoptr i64 %add to i8*
- %pix_sp0.0.copyload = load i8* %addr, align 1
+ %pix_sp0.0.copyload = load i8, i8* %addr, align 1
%val = uitofp i8 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -367,7 +367,7 @@ define double @fct22(i16* nocapture %sp0) {
%bitcast = ptrtoint i16* %sp0 to i64
%add = add i64 %bitcast, 1
%addr = inttoptr i64 %add to i16*
- %pix_sp0.0.copyload = load i16* %addr, align 1
+ %pix_sp0.0.copyload = load i16, i16* %addr, align 1
%val = uitofp i16 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -381,7 +381,7 @@ define double @fct23(i32* nocapture %sp0) {
%bitcast = ptrtoint i32* %sp0 to i64
%add = add i64 %bitcast, 1
%addr = inttoptr i64 %add to i32*
- %pix_sp0.0.copyload = load i32* %addr, align 1
+ %pix_sp0.0.copyload = load i32, i32* %addr, align 1
%val = uitofp i32 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -395,7 +395,7 @@ define double @fct24(i64* nocapture %sp0) {
%bitcast = ptrtoint i64* %sp0 to i64
%add = add i64 %bitcast, 1
%addr = inttoptr i64 %add to i64*
- %pix_sp0.0.copyload = load i64* %addr, align 1
+ %pix_sp0.0.copyload = load i64, i64* %addr, align 1
%val = uitofp i64 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -415,8 +415,8 @@ define float @sfct1(i8* nocapture %sp0) {
; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i8* %sp0, i64 1
- %pix_sp0.0.copyload = load i8* %addr, align 1
+ %addr = getelementptr i8, i8* %sp0, i64 1
+ %pix_sp0.0.copyload = load i8, i8* %addr, align 1
%val = sitofp i8 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -429,8 +429,8 @@ define float @sfct2(i16* nocapture %sp0) {
; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i16* %sp0, i64 1
- %pix_sp0.0.copyload = load i16* %addr, align 1
+ %addr = getelementptr i16, i16* %sp0, i64 1
+ %pix_sp0.0.copyload = load i16, i16* %addr, align 1
%val = sitofp i16 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -442,8 +442,8 @@ define float @sfct3(i32* nocapture %sp0) {
; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i32* %sp0, i64 1
- %pix_sp0.0.copyload = load i32* %addr, align 1
+ %addr = getelementptr i32, i32* %sp0, i64 1
+ %pix_sp0.0.copyload = load i32, i32* %addr, align 1
%val = sitofp i32 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -456,8 +456,8 @@ define float @sfct4(i64* nocapture %sp0) {
; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i64* %sp0, i64 1
- %pix_sp0.0.copyload = load i64* %addr, align 1
+ %addr = getelementptr i64, i64* %sp0, i64 1
+ %pix_sp0.0.copyload = load i64, i64* %addr, align 1
%val = sitofp i64 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -476,8 +476,8 @@ define float @sfct5(i8* nocapture %sp0, i64 %offset) {
; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i8* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i8* %addr, align 1
+ %addr = getelementptr i8, i8* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i8, i8* %addr, align 1
%val = sitofp i8 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -490,8 +490,8 @@ define float @sfct6(i16* nocapture %sp0, i64 %offset) {
; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i16* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i16* %addr, align 1
+ %addr = getelementptr i16, i16* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i16, i16* %addr, align 1
%val = sitofp i16 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -503,8 +503,8 @@ define float @sfct7(i32* nocapture %sp0, i64 %offset) {
; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i32* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i32* %addr, align 1
+ %addr = getelementptr i32, i32* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i32, i32* %addr, align 1
%val = sitofp i32 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -517,8 +517,8 @@ define float @sfct8(i64* nocapture %sp0, i64 %offset) {
; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i64* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i64* %addr, align 1
+ %addr = getelementptr i64, i64* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i64, i64* %addr, align 1
%val = sitofp i64 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -531,8 +531,8 @@ define double @sfct9(i8* nocapture %sp0) {
; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i8* %sp0, i64 1
- %pix_sp0.0.copyload = load i8* %addr, align 1
+ %addr = getelementptr i8, i8* %sp0, i64 1
+ %pix_sp0.0.copyload = load i8, i8* %addr, align 1
%val = sitofp i8 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -550,8 +550,8 @@ define double @sfct10(i16* nocapture %sp0) {
; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i16* %sp0, i64 1
- %pix_sp0.0.copyload = load i16* %addr, align 1
+ %addr = getelementptr i16, i16* %sp0, i64 1
+ %pix_sp0.0.copyload = load i16, i16* %addr, align 1
%val = sitofp i16 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -564,8 +564,8 @@ define double @sfct11(i32* nocapture %sp0) {
; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i32* %sp0, i64 1
- %pix_sp0.0.copyload = load i32* %addr, align 1
+ %addr = getelementptr i32, i32* %sp0, i64 1
+ %pix_sp0.0.copyload = load i32, i32* %addr, align 1
%val = sitofp i32 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -577,8 +577,8 @@ define double @sfct12(i64* nocapture %sp0) {
; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i64* %sp0, i64 1
- %pix_sp0.0.copyload = load i64* %addr, align 1
+ %addr = getelementptr i64, i64* %sp0, i64 1
+ %pix_sp0.0.copyload = load i64, i64* %addr, align 1
%val = sitofp i64 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -591,8 +591,8 @@ define double @sfct13(i8* nocapture %sp0, i64 %offset) {
; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i8* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i8* %addr, align 1
+ %addr = getelementptr i8, i8* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i8, i8* %addr, align 1
%val = sitofp i8 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -610,8 +610,8 @@ define double @sfct14(i16* nocapture %sp0, i64 %offset) {
; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i16* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i16* %addr, align 1
+ %addr = getelementptr i16, i16* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i16, i16* %addr, align 1
%val = sitofp i16 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -624,8 +624,8 @@ define double @sfct15(i32* nocapture %sp0, i64 %offset) {
; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i32* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i32* %addr, align 1
+ %addr = getelementptr i32, i32* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i32, i32* %addr, align 1
%val = sitofp i32 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -637,8 +637,8 @@ define double @sfct16(i64* nocapture %sp0, i64 %offset) {
; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i64* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i64* %addr, align 1
+ %addr = getelementptr i64, i64* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i64, i64* %addr, align 1
%val = sitofp i64 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -660,7 +660,7 @@ entry:
%bitcast = ptrtoint i8* %sp0 to i64
%add = add i64 %bitcast, -1
%addr = inttoptr i64 %add to i8*
- %pix_sp0.0.copyload = load i8* %addr, align 1
+ %pix_sp0.0.copyload = load i8, i8* %addr, align 1
%val = sitofp i8 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -675,7 +675,7 @@ define float @sfct18(i16* nocapture %sp0) {
%bitcast = ptrtoint i16* %sp0 to i64
%add = add i64 %bitcast, 1
%addr = inttoptr i64 %add to i16*
- %pix_sp0.0.copyload = load i16* %addr, align 1
+ %pix_sp0.0.copyload = load i16, i16* %addr, align 1
%val = sitofp i16 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -689,7 +689,7 @@ define float @sfct19(i32* nocapture %sp0) {
%bitcast = ptrtoint i32* %sp0 to i64
%add = add i64 %bitcast, 1
%addr = inttoptr i64 %add to i32*
- %pix_sp0.0.copyload = load i32* %addr, align 1
+ %pix_sp0.0.copyload = load i32, i32* %addr, align 1
%val = sitofp i32 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -704,7 +704,7 @@ define float @sfct20(i64* nocapture %sp0) {
%bitcast = ptrtoint i64* %sp0 to i64
%add = add i64 %bitcast, 1
%addr = inttoptr i64 %add to i64*
- %pix_sp0.0.copyload = load i64* %addr, align 1
+ %pix_sp0.0.copyload = load i64, i64* %addr, align 1
%val = sitofp i64 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -721,7 +721,7 @@ entry:
%bitcast = ptrtoint i8* %sp0 to i64
%add = add i64 %bitcast, -1
%addr = inttoptr i64 %add to i8*
- %pix_sp0.0.copyload = load i8* %addr, align 1
+ %pix_sp0.0.copyload = load i8, i8* %addr, align 1
%val = sitofp i8 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -741,7 +741,7 @@ define double @sfct22(i16* nocapture %sp0) {
%bitcast = ptrtoint i16* %sp0 to i64
%add = add i64 %bitcast, 1
%addr = inttoptr i64 %add to i16*
- %pix_sp0.0.copyload = load i16* %addr, align 1
+ %pix_sp0.0.copyload = load i16, i16* %addr, align 1
%val = sitofp i16 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -756,7 +756,7 @@ define double @sfct23(i32* nocapture %sp0) {
%bitcast = ptrtoint i32* %sp0 to i64
%add = add i64 %bitcast, 1
%addr = inttoptr i64 %add to i32*
- %pix_sp0.0.copyload = load i32* %addr, align 1
+ %pix_sp0.0.copyload = load i32, i32* %addr, align 1
%val = sitofp i32 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -770,7 +770,7 @@ define double @sfct24(i64* nocapture %sp0) {
%bitcast = ptrtoint i64* %sp0 to i64
%add = add i64 %bitcast, 1
%addr = inttoptr i64 %add to i64*
- %pix_sp0.0.copyload = load i64* %addr, align 1
+ %pix_sp0.0.copyload = load i64, i64* %addr, align 1
%val = sitofp i64 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
@@ -787,7 +787,7 @@ entry:
%bitcast = ptrtoint i8* %sp0 to i64
%add = add i64 %bitcast, -1
%addr = inttoptr i64 %add to i8*
- %pix_sp0.0.copyload = load i8* %addr, align 1
+ %pix_sp0.0.copyload = load i8, i8* %addr, align 1
%val = sitofp i8 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val
ret float %vmull.i
@@ -799,8 +799,8 @@ define double @codesize_sfct11(i32* nocapture %sp0) minsize {
; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
entry:
- %addr = getelementptr i32* %sp0, i64 1
- %pix_sp0.0.copyload = load i32* %addr, align 1
+ %addr = getelementptr i32, i32* %sp0, i64 1
+ %pix_sp0.0.copyload = load i32, i32* %addr, align 1
%val = sitofp i32 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val
ret double %vmull.i
diff --git a/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
new file mode 100644
index 000000000000..c1777513fa04
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
@@ -0,0 +1,502 @@
+; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
+; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios"
+
+
+; Initial motivating example: Simple diamond with a call just on one side.
+; CHECK-LABEL: foo:
+;
+; Compare the arguments and jump to exit.
+; No prologue needed.
+; ENABLE: cmp w0, w1
+; ENABLE-NEXT: b.ge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; CHECK: stp [[SAVE_SP:x[0-9]+]], [[CSR:x[0-9]+]], [sp, #-16]!
+; CHECK-NEXT: mov [[SAVE_SP]], sp
+; CHECK-NEXT: sub sp, sp, #16
+;
+; Compare the arguments and jump to exit.
+; After the prologue is set.
+; DISABLE: cmp w0, w1
+; DISABLE-NEXT: b.ge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Store %a in the alloca.
+; CHECK: stur w0, {{\[}}[[SAVE_SP]], #-4]
+; Set the alloca address in the second argument.
+; CHECK-NEXT: sub x1, [[SAVE_SP]], #4
+; Set the first argument to zero.
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: bl _doSomething
+;
+; Without shrink-wrapping, epilogue is in the exit block.
+; DISABLE: [[EXIT_LABEL]]:
+; Epilogue code.
+; CHECK-NEXT: mov sp, [[SAVE_SP]]
+; CHECK-NEXT: ldp [[SAVE_SP]], [[CSR]], [sp], #16
+;
+; With shrink-wrapping, exit block is a simple return.
+; ENABLE: [[EXIT_LABEL]]:
+; CHECK-NEXT: ret
+define i32 @foo(i32 %a, i32 %b) {
+ %tmp = alloca i32, align 4
+ %tmp2 = icmp slt i32 %a, %b
+ br i1 %tmp2, label %true, label %false
+
+true:
+ store i32 %a, i32* %tmp, align 4
+ %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
+ br label %false
+
+false:
+ %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
+ ret i32 %tmp.0
+}
+
+; Function Attrs: optsize
+declare i32 @doSomething(i32, i32*)
+
+
+; Check that we do not perform the restore inside the loop whereas the save
+; is outside.
+; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:
+;
+; Shrink-wrapping allows to skip the prologue in the else case.
+; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]!
+; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16]
+; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16
+;
+; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; CHECK: mov [[SUM:w[0-9]+]], wzr
+; CHECK-NEXT: movz [[IV:w[0-9]+]], #0xa
+;
+; Next BB.
+; CHECK: [[LOOP:LBB[0-9_]+]]: ; %for.body
+; CHECK: bl _something
+; CHECK-NEXT: add [[SUM]], w0, [[SUM]]
+; CHECK-NEXT: sub [[IV]], [[IV]], #1
+; CHECK-NEXT: cbnz [[IV]], [[LOOP]]
+;
+; Next BB.
+; Copy SUM into the returned register + << 3.
+; CHECK: lsl w0, [[SUM]], #3
+;
+; Jump to epilogue.
+; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]]
+;
+; DISABLE: [[ELSE_LABEL]]: ; %if.else
+; Shift second argument by one and store into returned register.
+; DISABLE: lsl w0, w1, #1
+; DISABLE: [[EPILOG_BB]]: ; %if.end
+;
+; Epilogue code.
+; CHECK: ldp [[CSR3]], [[CSR4]], [sp, #16]
+; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32
+; CHECK-NEXT: ret
+;
+; ENABLE: [[ELSE_LABEL]]: ; %if.else
+; Shift second argument by one and store into returned register.
+; ENABLE: lsl w0, w1, #1
+; ENABLE: ret
+define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %sum.04 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
+ %add = add nsw i32 %call, %sum.04
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ %shl = shl i32 %add, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %for.end
+ %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+ ret i32 %sum.1
+}
+
+declare i32 @something(...)
+
+; Check that we do not perform the shrink-wrapping inside the loop even
+; though that would be legal. The cost model must prevent that.
+; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2:
+; Prologue code.
+; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]!
+; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16]
+; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16
+; CHECK: mov [[SUM:w[0-9]+]], wzr
+; CHECK-NEXT: movz [[IV:w[0-9]+]], #0xa
+; Next BB.
+; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
+; CHECK: bl _something
+; CHECK-NEXT: add [[SUM]], w0, [[SUM]]
+; CHECK-NEXT: sub [[IV]], [[IV]], #1
+; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]]
+; Next BB.
+; CHECK: ; %for.end
+; CHECK: mov w0, [[SUM]]
+; CHECK-NEXT: ldp [[CSR3]], [[CSR4]], [sp, #16]
+; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32
+; CHECK-NEXT: ret
+define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %sum.03 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
+ %add = add nsw i32 %call, %sum.03
+ %inc = add nuw nsw i32 %i.04, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret i32 %add
+}
+
+; Check with a more complex case that we do not have save within the loop and
+; restore outside.
+; CHECK-LABEL: loopInfoSaveOutsideLoop:
+;
+; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]!
+; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16]
+; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16
+;
+; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; CHECK: mov [[SUM:w[0-9]+]], wzr
+; CHECK-NEXT: movz [[IV:w[0-9]+]], #0xa
+;
+; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
+; CHECK: bl _something
+; CHECK-NEXT: add [[SUM]], w0, [[SUM]]
+; CHECK-NEXT: sub [[IV]], [[IV]], #1
+; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]]
+; Next BB.
+; CHECK: bl _somethingElse
+; CHECK-NEXT: lsl w0, [[SUM]], #3
+;
+; Jump to epilogue.
+; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]]
+;
+; DISABLE: [[ELSE_LABEL]]: ; %if.else
+; Shift second argument by one and store into returned register.
+; DISABLE: lsl w0, w1, #1
+; DISABLE: [[EPILOG_BB]]: ; %if.end
+; Epilogue code.
+; CHECK-NEXT: ldp [[CSR3]], [[CSR4]], [sp, #16]
+; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32
+; CHECK-NEXT: ret
+;
+; ENABLE: [[ELSE_LABEL]]: ; %if.else
+; Shift second argument by one and store into returned register.
+; ENABLE: lsl w0, w1, #1
+; ENABLE: ret
+define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %sum.04 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
+ %add = add nsw i32 %call, %sum.04
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ tail call void bitcast (void (...)* @somethingElse to void ()*)()
+ %shl = shl i32 %add, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %for.end
+ %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+ ret i32 %sum.1
+}
+
+declare void @somethingElse(...)
+
+; Check with a more complex case that we do not have restore within the loop and
+; save outside.
+; CHECK-LABEL: loopInfoRestoreOutsideLoop:
+;
+; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-32]!
+; CHECK-NEXT: stp [[CSR3:x[0-9]+]], [[CSR4:x[0-9]+]], [sp, #16]
+; CHECK-NEXT: add [[NEW_SP:x[0-9]+]], sp, #16
+;
+; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; CHECK: bl _somethingElse
+; CHECK-NEXT: mov [[SUM:w[0-9]+]], wzr
+; CHECK-NEXT: movz [[IV:w[0-9]+]], #0xa
+;
+; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
+; CHECK: bl _something
+; CHECK-NEXT: add [[SUM]], w0, [[SUM]]
+; CHECK-NEXT: sub [[IV]], [[IV]], #1
+; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]]
+; Next BB.
+; CHECK: lsl w0, [[SUM]], #3
+;
+; Jump to epilogue.
+; DISABLE: b [[EPILOG_BB:LBB[0-9_]+]]
+;
+; DISABLE: [[ELSE_LABEL]]: ; %if.else
+; Shift second argument by one and store into returned register.
+; DISABLE: lsl w0, w1, #1
+; DISABLE: [[EPILOG_BB]]: ; %if.end
+; Epilogue code.
+; CHECK: ldp [[CSR3]], [[CSR4]], [sp, #16]
+; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #32
+; CHECK-NEXT: ret
+;
+; ENABLE: [[ELSE_LABEL]]: ; %if.else
+; Shift second argument by one and store into returned register.
+; ENABLE: lsl w0, w1, #1
+; ENABLE: ret
+define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ tail call void bitcast (void (...)* @somethingElse to void ()*)()
+ br label %for.body
+
+for.body: ; preds = %for.body, %if.then
+ %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
+ %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
+ %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
+ %add = add nsw i32 %call, %sum.04
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ %shl = shl i32 %add, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %for.end
+ %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+ ret i32 %sum.1
+}
+
+; Check that we handle function with no frame information correctly.
+; CHECK-LABEL: emptyFrame:
+; CHECK: ; %entry
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: ret
+define i32 @emptyFrame() {
+entry:
+ ret i32 0
+}
+
+; Check that we handle variadic function correctly.
+; CHECK-LABEL: variadicFunc:
+;
+; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; CHECK: sub sp, sp, #16
+; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Sum is merged with the returned register.
+; CHECK: mov [[SUM:w0]], wzr
+; CHECK-NEXT: add [[VA_BASE:x[0-9]+]], sp, #16
+; CHECK-NEXT: str [[VA_BASE]], [sp, #8]
+; CHECK-NEXT: cmp w1, #1
+; CHECK-NEXT: b.lt [[IFEND_LABEL:LBB[0-9_]+]]
+;
+; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
+; CHECK: ldr [[VA_ADDR:x[0-9]+]], [sp, #8]
+; CHECK-NEXT: add [[NEXT_VA_ADDR:x[0-9]+]], [[VA_ADDR]], #8
+; CHECK-NEXT: str [[NEXT_VA_ADDR]], [sp, #8]
+; CHECK-NEXT: ldr [[VA_VAL:w[0-9]+]], {{\[}}[[VA_ADDR]]]
+; CHECK-NEXT: add [[SUM]], [[SUM]], [[VA_VAL]]
+; CHECK-NEXT: sub w1, w1, #1
+; CHECK-NEXT: cbnz w1, [[LOOP_LABEL]]
+;
+; DISABLE-NEXT: b [[IFEND_LABEL]]
+; DISABLE: [[ELSE_LABEL]]: ; %if.else
+; DISABLE: lsl w0, w1, #1
+;
+; CHECK: [[IFEND_LABEL]]:
+; Epilogue code.
+; CHECK: add sp, sp, #16
+; CHECK-NEXT: ret
+;
+; ENABLE: [[ELSE_LABEL]]: ; %if.else
+; ENABLE: lsl w0, w1, #1
+; ENABLE-NEXT: ret
+define i32 @variadicFunc(i32 %cond, i32 %count, ...) #0 {
+entry:
+ %ap = alloca i8*, align 8
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ %ap1 = bitcast i8** %ap to i8*
+ call void @llvm.va_start(i8* %ap1)
+ %cmp6 = icmp sgt i32 %count, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %if.then, %for.body
+ %i.08 = phi i32 [ %inc, %for.body ], [ 0, %if.then ]
+ %sum.07 = phi i32 [ %add, %for.body ], [ 0, %if.then ]
+ %0 = va_arg i8** %ap, i32
+ %add = add nsw i32 %sum.07, %0
+ %inc = add nuw nsw i32 %i.08, 1
+ %exitcond = icmp eq i32 %inc, %count
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %if.then
+ %sum.0.lcssa = phi i32 [ 0, %if.then ], [ %add, %for.body ]
+ call void @llvm.va_end(i8* %ap1)
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %count, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %for.end
+ %sum.1 = phi i32 [ %sum.0.lcssa, %for.end ], [ %mul, %if.else ]
+ ret i32 %sum.1
+}
+
+declare void @llvm.va_start(i8*)
+
+declare void @llvm.va_end(i8*)
+
+; Check that we handle inline asm correctly.
+; CHECK-LABEL: inlineAsm:
+;
+; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: x19.
+; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x19]], [sp, #-16]!
+;
+; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; CHECK: movz [[IV:w[0-9]+]], #0xa
+;
+; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ; %for.body
+; Inline asm statement.
+; CHECK: add x19, x19, #1
+; CHECK: sub [[IV]], [[IV]], #1
+; CHECK-NEXT: cbnz [[IV]], [[LOOP_LABEL]]
+; Next BB.
+; CHECK: mov w0, wzr
+; Epilogue code.
+; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16
+; CHECK-NEXT: ret
+; Next BB.
+; CHECK: [[ELSE_LABEL]]: ; %if.else
+; CHECK-NEXT: lsl w0, w1, #1
+; Epilogue code.
+; DISABLE-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16
+; CHECK-NEXT: ret
+define i32 @inlineAsm(i32 %cond, i32 %N) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ tail call void asm sideeffect "add x19, x19, #1", "~{x19}"()
+ %inc = add nuw nsw i32 %i.03, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %if.end, label %for.body
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %for.body, %if.else
+ %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.body ]
+ ret i32 %sum.0
+}
+
+; Check that we handle calls to variadic functions correctly.
+; CHECK-LABEL: callVariadicFunc:
+;
+; ENABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x[0-9]+]], [sp, #-16]!
+; CHECK-NEXT: mov [[NEW_SP:x[0-9]+]], sp
+; CHECK-NEXT: sub sp, sp, #48
+;
+; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]]
+; Setup of the varags.
+; CHECK: stp x1, x1, [sp, #32]
+; CHECK-NEXT: stp x1, x1, [sp, #16]
+; CHECK-NEXT: stp x1, x1, [sp]
+; CHECK-NEXT: mov w0, w1
+; CHECK-NEXT: bl _someVariadicFunc
+; CHECK-NEXT: lsl w0, w0, #3
+;
+; DISABLE: b [[IFEND_LABEL:LBB[0-9_]+]]
+; DISABLE: [[ELSE_LABEL]]: ; %if.else
+; DISABLE-NEXT: lsl w0, w1, #1
+; DISABLE: [[IFEND_LABEL]]: ; %if.end
+;
+; Epilogue code.
+; CHECK: mov sp, [[NEW_SP]]
+; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16
+; CHECK-NEXT: ret
+;
+; ENABLE: [[ELSE_LABEL]]: ; %if.else
+; ENABLE-NEXT: lsl w0, w1, #1
+; ENABLE-NEXT: ret
+define i32 @callVariadicFunc(i32 %cond, i32 %N) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N)
+ %shl = shl i32 %call, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ]
+ ret i32 %sum.0
+}
+
+declare i32 @someVariadicFunc(i32, ...)
diff --git a/test/CodeGen/AArch64/arm64-sitofp-combine-chains.ll b/test/CodeGen/AArch64/arm64-sitofp-combine-chains.ll
index 10b433b97757..21131657820f 100644
--- a/test/CodeGen/AArch64/arm64-sitofp-combine-chains.ll
+++ b/test/CodeGen/AArch64/arm64-sitofp-combine-chains.ll
@@ -14,7 +14,7 @@ define float @foo() {
; CHECK: ldr [[SREG:s[0-9]+]], [x[[VARBASE]],
; CHECK: str wzr, [x[[VARBASE]],
- %val = load i32* @var, align 4
+ %val = load i32, i32* @var, align 4
store i32 0, i32* @var, align 4
%fltval = sitofp i32 %val to float
diff --git a/test/CodeGen/AArch64/arm64-smaxv.ll b/test/CodeGen/AArch64/arm64-smaxv.ll
index 183e667643cc..8cc4502f6caa 100644
--- a/test/CodeGen/AArch64/arm64-smaxv.ll
+++ b/test/CodeGen/AArch64/arm64-smaxv.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false < %s | FileCheck %s
define signext i8 @test_vmaxv_s8(<8 x i8> %a1) {
; CHECK: test_vmaxv_s8
@@ -65,6 +65,76 @@ entry:
ret i32 %vmaxv.i
}
+define <8 x i8> @test_vmaxv_s8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) {
+; CHECK-LABEL: test_vmaxv_s8_used_by_laneop:
+; CHECK: smaxv.8b b[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.b v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a2)
+ %1 = trunc i32 %0 to i8
+ %2 = insertelement <8 x i8> %a1, i8 %1, i32 3
+ ret <8 x i8> %2
+}
+
+define <4 x i16> @test_vmaxv_s16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) {
+; CHECK-LABEL: test_vmaxv_s16_used_by_laneop:
+; CHECK: smaxv.4h h[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.h v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a2)
+ %1 = trunc i32 %0 to i16
+ %2 = insertelement <4 x i16> %a1, i16 %1, i32 3
+ ret <4 x i16> %2
+}
+
+define <2 x i32> @test_vmaxv_s32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) {
+; CHECK-LABEL: test_vmaxv_s32_used_by_laneop:
+; CHECK: smaxp.2s v[[REGNUM:[0-9]+]], v1, v1
+; CHECK-NEXT: ins.s v0[1], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a2)
+ %1 = insertelement <2 x i32> %a1, i32 %0, i32 1
+ ret <2 x i32> %1
+}
+
+define <16 x i8> @test_vmaxvq_s8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) {
+; CHECK-LABEL: test_vmaxvq_s8_used_by_laneop:
+; CHECK: smaxv.16b b[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.b v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a2)
+ %1 = trunc i32 %0 to i8
+ %2 = insertelement <16 x i8> %a1, i8 %1, i32 3
+ ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vmaxvq_s16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) {
+; CHECK-LABEL: test_vmaxvq_s16_used_by_laneop:
+; CHECK: smaxv.8h h[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.h v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a2)
+ %1 = trunc i32 %0 to i16
+ %2 = insertelement <8 x i16> %a1, i16 %1, i32 3
+ ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vmaxvq_s32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) {
+; CHECK-LABEL: test_vmaxvq_s32_used_by_laneop:
+; CHECK: smaxv.4s s[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.s v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a2)
+ %1 = insertelement <4 x i32> %a1, i32 %0, i32 3
+ ret <4 x i32> %1
+}
+
declare i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32>)
declare i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16>)
declare i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8>)
diff --git a/test/CodeGen/AArch64/arm64-sminv.ll b/test/CodeGen/AArch64/arm64-sminv.ll
index 195c4e59dc41..c1650b5fb294 100644
--- a/test/CodeGen/AArch64/arm64-sminv.ll
+++ b/test/CodeGen/AArch64/arm64-sminv.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false < %s | FileCheck %s
define signext i8 @test_vminv_s8(<8 x i8> %a1) {
; CHECK: test_vminv_s8
@@ -65,6 +65,76 @@ entry:
ret i32 %vminv.i
}
+define <8 x i8> @test_vminv_s8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) {
+; CHECK-LABEL: test_vminv_s8_used_by_laneop:
+; CHECK: sminv.8b b[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.b v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a2)
+ %1 = trunc i32 %0 to i8
+ %2 = insertelement <8 x i8> %a1, i8 %1, i32 3
+ ret <8 x i8> %2
+}
+
+define <4 x i16> @test_vminv_s16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) {
+; CHECK-LABEL: test_vminv_s16_used_by_laneop:
+; CHECK: sminv.4h h[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.h v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a2)
+ %1 = trunc i32 %0 to i16
+ %2 = insertelement <4 x i16> %a1, i16 %1, i32 3
+ ret <4 x i16> %2
+}
+
+define <2 x i32> @test_vminv_s32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) {
+; CHECK-LABEL: test_vminv_s32_used_by_laneop:
+; CHECK: sminp.2s v[[REGNUM:[0-9]+]], v1, v1
+; CHECK-NEXT: ins.s v0[1], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a2)
+ %1 = insertelement <2 x i32> %a1, i32 %0, i32 1
+ ret <2 x i32> %1
+}
+
+define <16 x i8> @test_vminvq_s8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) {
+; CHECK-LABEL: test_vminvq_s8_used_by_laneop:
+; CHECK: sminv.16b b[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.b v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a2)
+ %1 = trunc i32 %0 to i8
+ %2 = insertelement <16 x i8> %a1, i8 %1, i32 3
+ ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vminvq_s16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) {
+; CHECK-LABEL: test_vminvq_s16_used_by_laneop:
+; CHECK: sminv.8h h[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.h v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a2)
+ %1 = trunc i32 %0 to i16
+ %2 = insertelement <8 x i16> %a1, i16 %1, i32 3
+ ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vminvq_s32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) {
+; CHECK-LABEL: test_vminvq_s32_used_by_laneop:
+; CHECK: sminv.4s s[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.s v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a2)
+ %1 = insertelement <4 x i32> %a1, i32 %0, i32 3
+ ret <4 x i32> %1
+}
+
declare i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32>)
declare i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16>)
declare i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8>)
diff --git a/test/CodeGen/AArch64/arm64-spill-lr.ll b/test/CodeGen/AArch64/arm64-spill-lr.ll
index fb6588e6ae46..88109088a2ff 100644
--- a/test/CodeGen/AArch64/arm64-spill-lr.ll
+++ b/test/CodeGen/AArch64/arm64-spill-lr.ll
@@ -11,31 +11,31 @@ entry:
%stack = alloca [128 x i32], align 4
%0 = bitcast [128 x i32]* %stack to i8*
%idxprom = sext i32 %a to i64
- %arrayidx = getelementptr inbounds [128 x i32]* %stack, i64 0, i64 %idxprom
+ %arrayidx = getelementptr inbounds [128 x i32], [128 x i32]* %stack, i64 0, i64 %idxprom
store i32 %b, i32* %arrayidx, align 4
- %1 = load volatile i32* @bar, align 4
- %2 = load volatile i32* @bar, align 4
- %3 = load volatile i32* @bar, align 4
- %4 = load volatile i32* @bar, align 4
- %5 = load volatile i32* @bar, align 4
- %6 = load volatile i32* @bar, align 4
- %7 = load volatile i32* @bar, align 4
- %8 = load volatile i32* @bar, align 4
- %9 = load volatile i32* @bar, align 4
- %10 = load volatile i32* @bar, align 4
- %11 = load volatile i32* @bar, align 4
- %12 = load volatile i32* @bar, align 4
- %13 = load volatile i32* @bar, align 4
- %14 = load volatile i32* @bar, align 4
- %15 = load volatile i32* @bar, align 4
- %16 = load volatile i32* @bar, align 4
- %17 = load volatile i32* @bar, align 4
- %18 = load volatile i32* @bar, align 4
- %19 = load volatile i32* @bar, align 4
- %20 = load volatile i32* @bar, align 4
+ %1 = load volatile i32, i32* @bar, align 4
+ %2 = load volatile i32, i32* @bar, align 4
+ %3 = load volatile i32, i32* @bar, align 4
+ %4 = load volatile i32, i32* @bar, align 4
+ %5 = load volatile i32, i32* @bar, align 4
+ %6 = load volatile i32, i32* @bar, align 4
+ %7 = load volatile i32, i32* @bar, align 4
+ %8 = load volatile i32, i32* @bar, align 4
+ %9 = load volatile i32, i32* @bar, align 4
+ %10 = load volatile i32, i32* @bar, align 4
+ %11 = load volatile i32, i32* @bar, align 4
+ %12 = load volatile i32, i32* @bar, align 4
+ %13 = load volatile i32, i32* @bar, align 4
+ %14 = load volatile i32, i32* @bar, align 4
+ %15 = load volatile i32, i32* @bar, align 4
+ %16 = load volatile i32, i32* @bar, align 4
+ %17 = load volatile i32, i32* @bar, align 4
+ %18 = load volatile i32, i32* @bar, align 4
+ %19 = load volatile i32, i32* @bar, align 4
+ %20 = load volatile i32, i32* @bar, align 4
%idxprom1 = sext i32 %c to i64
- %arrayidx2 = getelementptr inbounds [128 x i32]* %stack, i64 0, i64 %idxprom1
- %21 = load i32* %arrayidx2, align 4
+ %arrayidx2 = getelementptr inbounds [128 x i32], [128 x i32]* %stack, i64 0, i64 %idxprom1
+ %21 = load i32, i32* %arrayidx2, align 4
%factor = mul i32 %h, -2
%factor67 = mul i32 %g, -2
%factor68 = mul i32 %f, -2
diff --git a/test/CodeGen/AArch64/arm64-spill.ll b/test/CodeGen/AArch64/arm64-spill.ll
index 47cdc2bd95e4..9c0cf38f9a2e 100644
--- a/test/CodeGen/AArch64/arm64-spill.ll
+++ b/test/CodeGen/AArch64/arm64-spill.ll
@@ -8,7 +8,7 @@
; CHECK: st1.2d
define void @fpr128(<4 x float>* %p) nounwind ssp {
entry:
- %x = load <4 x float>* %p, align 16
+ %x = load <4 x float>, <4 x float>* %p, align 16
call void asm sideeffect "; inlineasm", "~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15},~{q16},~{q17},~{q18},~{q19},~{q20},~{q21},~{q22},~{q23},~{q24},~{q25},~{q26},~{q27},~{q28},~{q29},~{q30},~{q31},~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp},~{memory}"() nounwind
store <4 x float> %x, <4 x float>* %p, align 16
ret void
diff --git a/test/CodeGen/AArch64/arm64-st1.ll b/test/CodeGen/AArch64/arm64-st1.ll
index 76d52f44b480..0387a91ea0e8 100644
--- a/test/CodeGen/AArch64/arm64-st1.ll
+++ b/test/CodeGen/AArch64/arm64-st1.ll
@@ -12,7 +12,7 @@ define void @st1lane_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
; CHECK-LABEL: st1lane_ro_16b
; CHECK: add x[[XREG:[0-9]+]], x0, x1
; CHECK: st1.b { v0 }[1], [x[[XREG]]]
- %ptr = getelementptr i8* %D, i64 %offset
+ %ptr = getelementptr i8, i8* %D, i64 %offset
%tmp = extractelement <16 x i8> %A, i32 1
store i8 %tmp, i8* %ptr
ret void
@@ -22,7 +22,7 @@ define void @st1lane0_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
; CHECK-LABEL: st1lane0_ro_16b
; CHECK: add x[[XREG:[0-9]+]], x0, x1
; CHECK: st1.b { v0 }[0], [x[[XREG]]]
- %ptr = getelementptr i8* %D, i64 %offset
+ %ptr = getelementptr i8, i8* %D, i64 %offset
%tmp = extractelement <16 x i8> %A, i32 0
store i8 %tmp, i8* %ptr
ret void
@@ -40,7 +40,7 @@ define void @st1lane_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
; CHECK-LABEL: st1lane_ro_8h
; CHECK: add x[[XREG:[0-9]+]], x0, x1
; CHECK: st1.h { v0 }[1], [x[[XREG]]]
- %ptr = getelementptr i16* %D, i64 %offset
+ %ptr = getelementptr i16, i16* %D, i64 %offset
%tmp = extractelement <8 x i16> %A, i32 1
store i16 %tmp, i16* %ptr
ret void
@@ -49,7 +49,7 @@ define void @st1lane_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
define void @st1lane0_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
; CHECK-LABEL: st1lane0_ro_8h
; CHECK: str h0, [x0, x1, lsl #1]
- %ptr = getelementptr i16* %D, i64 %offset
+ %ptr = getelementptr i16, i16* %D, i64 %offset
%tmp = extractelement <8 x i16> %A, i32 0
store i16 %tmp, i16* %ptr
ret void
@@ -67,7 +67,7 @@ define void @st1lane_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
; CHECK-LABEL: st1lane_ro_4s
; CHECK: add x[[XREG:[0-9]+]], x0, x1
; CHECK: st1.s { v0 }[1], [x[[XREG]]]
- %ptr = getelementptr i32* %D, i64 %offset
+ %ptr = getelementptr i32, i32* %D, i64 %offset
%tmp = extractelement <4 x i32> %A, i32 1
store i32 %tmp, i32* %ptr
ret void
@@ -76,7 +76,7 @@ define void @st1lane_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
define void @st1lane0_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
; CHECK-LABEL: st1lane0_ro_4s
; CHECK: str s0, [x0, x1, lsl #2]
- %ptr = getelementptr i32* %D, i64 %offset
+ %ptr = getelementptr i32, i32* %D, i64 %offset
%tmp = extractelement <4 x i32> %A, i32 0
store i32 %tmp, i32* %ptr
ret void
@@ -94,7 +94,7 @@ define void @st1lane_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
; CHECK-LABEL: st1lane_ro_4s_float
; CHECK: add x[[XREG:[0-9]+]], x0, x1
; CHECK: st1.s { v0 }[1], [x[[XREG]]]
- %ptr = getelementptr float* %D, i64 %offset
+ %ptr = getelementptr float, float* %D, i64 %offset
%tmp = extractelement <4 x float> %A, i32 1
store float %tmp, float* %ptr
ret void
@@ -103,7 +103,7 @@ define void @st1lane_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
define void @st1lane0_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
; CHECK-LABEL: st1lane0_ro_4s_float
; CHECK: str s0, [x0, x1, lsl #2]
- %ptr = getelementptr float* %D, i64 %offset
+ %ptr = getelementptr float, float* %D, i64 %offset
%tmp = extractelement <4 x float> %A, i32 0
store float %tmp, float* %ptr
ret void
@@ -121,7 +121,7 @@ define void @st1lane_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
; CHECK-LABEL: st1lane_ro_2d
; CHECK: add x[[XREG:[0-9]+]], x0, x1
; CHECK: st1.d { v0 }[1], [x[[XREG]]]
- %ptr = getelementptr i64* %D, i64 %offset
+ %ptr = getelementptr i64, i64* %D, i64 %offset
%tmp = extractelement <2 x i64> %A, i32 1
store i64 %tmp, i64* %ptr
ret void
@@ -130,7 +130,7 @@ define void @st1lane_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
define void @st1lane0_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
; CHECK-LABEL: st1lane0_ro_2d
; CHECK: str d0, [x0, x1, lsl #3]
- %ptr = getelementptr i64* %D, i64 %offset
+ %ptr = getelementptr i64, i64* %D, i64 %offset
%tmp = extractelement <2 x i64> %A, i32 0
store i64 %tmp, i64* %ptr
ret void
@@ -148,7 +148,7 @@ define void @st1lane_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
; CHECK-LABEL: st1lane_ro_2d_double
; CHECK: add x[[XREG:[0-9]+]], x0, x1
; CHECK: st1.d { v0 }[1], [x[[XREG]]]
- %ptr = getelementptr double* %D, i64 %offset
+ %ptr = getelementptr double, double* %D, i64 %offset
%tmp = extractelement <2 x double> %A, i32 1
store double %tmp, double* %ptr
ret void
@@ -157,7 +157,7 @@ define void @st1lane_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
define void @st1lane0_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
; CHECK-LABEL: st1lane0_ro_2d_double
; CHECK: str d0, [x0, x1, lsl #3]
- %ptr = getelementptr double* %D, i64 %offset
+ %ptr = getelementptr double, double* %D, i64 %offset
%tmp = extractelement <2 x double> %A, i32 0
store double %tmp, double* %ptr
ret void
@@ -175,7 +175,7 @@ define void @st1lane_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
; CHECK-LABEL: st1lane_ro_8b
; CHECK: add x[[XREG:[0-9]+]], x0, x1
; CHECK: st1.b { v0 }[1], [x[[XREG]]]
- %ptr = getelementptr i8* %D, i64 %offset
+ %ptr = getelementptr i8, i8* %D, i64 %offset
%tmp = extractelement <8 x i8> %A, i32 1
store i8 %tmp, i8* %ptr
ret void
@@ -185,7 +185,7 @@ define void @st1lane0_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
; CHECK-LABEL: st1lane0_ro_8b
; CHECK: add x[[XREG:[0-9]+]], x0, x1
; CHECK: st1.b { v0 }[0], [x[[XREG]]]
- %ptr = getelementptr i8* %D, i64 %offset
+ %ptr = getelementptr i8, i8* %D, i64 %offset
%tmp = extractelement <8 x i8> %A, i32 0
store i8 %tmp, i8* %ptr
ret void
@@ -203,7 +203,7 @@ define void @st1lane_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
; CHECK-LABEL: st1lane_ro_4h
; CHECK: add x[[XREG:[0-9]+]], x0, x1
; CHECK: st1.h { v0 }[1], [x[[XREG]]]
- %ptr = getelementptr i16* %D, i64 %offset
+ %ptr = getelementptr i16, i16* %D, i64 %offset
%tmp = extractelement <4 x i16> %A, i32 1
store i16 %tmp, i16* %ptr
ret void
@@ -212,7 +212,7 @@ define void @st1lane_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
define void @st1lane0_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
; CHECK-LABEL: st1lane0_ro_4h
; CHECK: str h0, [x0, x1, lsl #1]
- %ptr = getelementptr i16* %D, i64 %offset
+ %ptr = getelementptr i16, i16* %D, i64 %offset
%tmp = extractelement <4 x i16> %A, i32 0
store i16 %tmp, i16* %ptr
ret void
@@ -230,7 +230,7 @@ define void @st1lane_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
; CHECK-LABEL: st1lane_ro_2s
; CHECK: add x[[XREG:[0-9]+]], x0, x1
; CHECK: st1.s { v0 }[1], [x[[XREG]]]
- %ptr = getelementptr i32* %D, i64 %offset
+ %ptr = getelementptr i32, i32* %D, i64 %offset
%tmp = extractelement <2 x i32> %A, i32 1
store i32 %tmp, i32* %ptr
ret void
@@ -239,7 +239,7 @@ define void @st1lane_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
define void @st1lane0_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
; CHECK-LABEL: st1lane0_ro_2s
; CHECK: str s0, [x0, x1, lsl #2]
- %ptr = getelementptr i32* %D, i64 %offset
+ %ptr = getelementptr i32, i32* %D, i64 %offset
%tmp = extractelement <2 x i32> %A, i32 0
store i32 %tmp, i32* %ptr
ret void
@@ -257,7 +257,7 @@ define void @st1lane_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
; CHECK-LABEL: st1lane_ro_2s_float
; CHECK: add x[[XREG:[0-9]+]], x0, x1
; CHECK: st1.s { v0 }[1], [x[[XREG]]]
- %ptr = getelementptr float* %D, i64 %offset
+ %ptr = getelementptr float, float* %D, i64 %offset
%tmp = extractelement <2 x float> %A, i32 1
store float %tmp, float* %ptr
ret void
@@ -266,7 +266,7 @@ define void @st1lane_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
define void @st1lane0_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
; CHECK-LABEL: st1lane0_ro_2s_float
; CHECK: str s0, [x0, x1, lsl #2]
- %ptr = getelementptr float* %D, i64 %offset
+ %ptr = getelementptr float, float* %D, i64 %offset
%tmp = extractelement <2 x float> %A, i32 0
store float %tmp, float* %ptr
ret void
@@ -374,21 +374,21 @@ declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i
define void @st2_8b(<8 x i8> %A, <8 x i8> %B, i8* %P) nounwind {
; CHECK-LABEL: st2_8b
-; CHECK st2.8b
+; CHECK: st2.8b
call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P)
ret void
}
define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) nounwind {
; CHECK-LABEL: st3_8b
-; CHECK st3.8b
+; CHECK: st3.8b
call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P)
ret void
}
define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) nounwind {
; CHECK-LABEL: st4_8b
-; CHECK st4.8b
+; CHECK: st4.8b
call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P)
ret void
}
@@ -399,21 +399,21 @@ declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x
define void @st2_16b(<16 x i8> %A, <16 x i8> %B, i8* %P) nounwind {
; CHECK-LABEL: st2_16b
-; CHECK st2.16b
+; CHECK: st2.16b
call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P)
ret void
}
define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) nounwind {
; CHECK-LABEL: st3_16b
-; CHECK st3.16b
+; CHECK: st3.16b
call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P)
ret void
}
define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) nounwind {
; CHECK-LABEL: st4_16b
-; CHECK st4.16b
+; CHECK: st4.16b
call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P)
ret void
}
@@ -424,21 +424,21 @@ declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>,
define void @st2_4h(<4 x i16> %A, <4 x i16> %B, i16* %P) nounwind {
; CHECK-LABEL: st2_4h
-; CHECK st2.4h
+; CHECK: st2.4h
call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P)
ret void
}
define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) nounwind {
; CHECK-LABEL: st3_4h
-; CHECK st3.4h
+; CHECK: st3.4h
call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P)
ret void
}
define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) nounwind {
; CHECK-LABEL: st4_4h
-; CHECK st4.4h
+; CHECK: st4.4h
call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P)
ret void
}
@@ -449,21 +449,21 @@ declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,
define void @st2_8h(<8 x i16> %A, <8 x i16> %B, i16* %P) nounwind {
; CHECK-LABEL: st2_8h
-; CHECK st2.8h
+; CHECK: st2.8h
call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P)
ret void
}
define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) nounwind {
; CHECK-LABEL: st3_8h
-; CHECK st3.8h
+; CHECK: st3.8h
call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P)
ret void
}
define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) nounwind {
; CHECK-LABEL: st4_8h
-; CHECK st4.8h
+; CHECK: st4.8h
call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P)
ret void
}
@@ -474,21 +474,21 @@ declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>,
define void @st2_2s(<2 x i32> %A, <2 x i32> %B, i32* %P) nounwind {
; CHECK-LABEL: st2_2s
-; CHECK st2.2s
+; CHECK: st2.2s
call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P)
ret void
}
define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) nounwind {
; CHECK-LABEL: st3_2s
-; CHECK st3.2s
+; CHECK: st3.2s
call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P)
ret void
}
define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) nounwind {
; CHECK-LABEL: st4_2s
-; CHECK st4.2s
+; CHECK: st4.2s
call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P)
ret void
}
@@ -499,21 +499,21 @@ declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>,
define void @st2_4s(<4 x i32> %A, <4 x i32> %B, i32* %P) nounwind {
; CHECK-LABEL: st2_4s
-; CHECK st2.4s
+; CHECK: st2.4s
call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P)
ret void
}
define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) nounwind {
; CHECK-LABEL: st3_4s
-; CHECK st3.4s
+; CHECK: st3.4s
call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P)
ret void
}
define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) nounwind {
; CHECK-LABEL: st4_4s
-; CHECK st4.4s
+; CHECK: st4.4s
call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P)
ret void
}
@@ -522,23 +522,24 @@ declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) noun
declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
+; If there's only one element, st2/3/4 don't make much sense, stick to st1.
define void @st2_1d(<1 x i64> %A, <1 x i64> %B, i64* %P) nounwind {
; CHECK-LABEL: st2_1d
-; CHECK st1.2d
+; CHECK: st1.1d
call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %P)
ret void
}
define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) nounwind {
; CHECK-LABEL: st3_1d
-; CHECK st1.3d
+; CHECK: st1.1d
call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P)
ret void
}
define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) nounwind {
; CHECK-LABEL: st4_1d
-; CHECK st1.4d
+; CHECK: st1.1d
call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P)
ret void
}
@@ -549,21 +550,21 @@ declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,
define void @st2_2d(<2 x i64> %A, <2 x i64> %B, i64* %P) nounwind {
; CHECK-LABEL: st2_2d
-; CHECK st2.2d
+; CHECK: st2.2d
call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P)
ret void
}
define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) nounwind {
; CHECK-LABEL: st3_2d
-; CHECK st2.3d
+; CHECK: st3.2d
call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P)
ret void
}
define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) nounwind {
; CHECK-LABEL: st4_2d
-; CHECK st2.4d
+; CHECK: st4.2d
call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P)
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-stack-no-frame.ll b/test/CodeGen/AArch64/arm64-stack-no-frame.ll
index b5970c00ff94..22a67070a129 100644
--- a/test/CodeGen/AArch64/arm64-stack-no-frame.ll
+++ b/test/CodeGen/AArch64/arm64-stack-no-frame.ll
@@ -9,10 +9,10 @@ define void @test_stack_no_frame() {
; CHECK: test_stack_no_frame
; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
%local = alloca [20 x i64]
- %val = load volatile [20 x i64]* @global, align 8
+ %val = load volatile [20 x i64], [20 x i64]* @global, align 8
store volatile [20 x i64] %val, [20 x i64]* %local, align 8
- %val2 = load volatile [20 x i64]* %local, align 8
+ %val2 = load volatile [20 x i64], [20 x i64]* %local, align 8
store volatile [20 x i64] %val2, [20 x i64]* @global, align 8
; CHECK: add sp, sp, #[[STACKSIZE]]
diff --git a/test/CodeGen/AArch64/arm64-stackmap-nops.ll b/test/CodeGen/AArch64/arm64-stackmap-nops.ll
index 5915b64edf0a..2647ac442969 100644
--- a/test/CodeGen/AArch64/arm64-stackmap-nops.ll
+++ b/test/CodeGen/AArch64/arm64-stackmap-nops.ll
@@ -8,7 +8,7 @@ entry:
; CHECK: nop
; CHECK-NEXT: nop
; CHECK-NOT: nop
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 0, i32 16)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 0, i32 16)
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-stackmap.ll b/test/CodeGen/AArch64/arm64-stackmap.ll
index 144c2fd2ee38..1a4df7a6f2d6 100644
--- a/test/CodeGen/AArch64/arm64-stackmap.ll
+++ b/test/CodeGen/AArch64/arm64-stackmap.ll
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=arm64-apple-darwin < %s | FileCheck %s
-; RUN: llc -mtriple=arm64-apple-darwin -fast-isel -fast-isel-abort < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin -fast-isel -fast-isel-abort=1 < %s | FileCheck %s
;
; Note: Print verbose stackmaps using -debug-only=stackmaps.
@@ -78,7 +78,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
define void @constantargs() {
entry:
%0 = inttoptr i64 244837814094590 to i8*
- tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 1, i32 20, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296)
+ tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 20, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296)
ret void
}
@@ -100,7 +100,7 @@ entry:
; Runtime void->void call.
call void inttoptr (i64 244837814094590 to void ()*)()
; Followed by inline OSR patchpoint with 12-byte shadow and 2 live vars.
- call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 3, i32 12, i64 %a, i64 %b)
+ call void (i64, i32, ...) @llvm.experimental.stackmap(i64 3, i32 12, i64 %a, i64 %b)
ret void
}
@@ -126,7 +126,7 @@ entry:
cold:
; OSR patchpoint with 12-byte nop-slide and 2 live vars.
%thunk = inttoptr i64 244837814094590 to i8*
- call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 4, i32 20, i8* %thunk, i32 0, i64 %a, i64 %b)
+ call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4, i32 20, i8* %thunk, i32 0, i64 %a, i64 %b)
unreachable
ret:
ret void
@@ -142,7 +142,7 @@ ret:
define i64 @propertyRead(i64* %obj) {
entry:
%resolveRead = inttoptr i64 244837814094590 to i8*
- %result = call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %resolveRead, i32 1, i64* %obj)
+ %result = call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %resolveRead, i32 1, i64* %obj)
%add = add i64 %result, 3
ret i64 %add
}
@@ -162,7 +162,7 @@ entry:
define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a) {
entry:
%resolveWrite = inttoptr i64 244837814094590 to i8*
- call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 20, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
+ call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 6, i32 20, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
ret void
}
@@ -184,7 +184,7 @@ entry:
define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
entry:
%resolveCall = inttoptr i64 244837814094590 to i8*
- call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 7, i32 20, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+ call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 7, i32 20, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
ret void
}
@@ -206,7 +206,7 @@ entry:
define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
entry:
%resolveCall = inttoptr i64 244837814094590 to i8*
- %result = call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 8, i32 20, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+ %result = call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 8, i32 20, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
%add = add i64 %result, 3
ret i64 %add
}
@@ -226,7 +226,7 @@ entry:
; CHECK-NEXT: .short 29
define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27) {
entry:
- call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 11, i32 20, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27)
+ call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 20, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27)
ret void
}
@@ -245,7 +245,7 @@ entry:
; CHECK-NEXT: .short 29
define webkit_jscc void @spilledStackMapValue(i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29) {
entry:
- call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 12, i32 16, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29)
+ call void (i64, i32, ...) @llvm.experimental.stackmap(i64 12, i32 16, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29)
ret void
}
@@ -263,7 +263,7 @@ entry:
; CHECK-NEXT: .long 33
define void @liveConstant() {
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 15, i32 8, i32 33)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 15, i32 8, i32 33)
ret void
}
@@ -280,7 +280,7 @@ define void @liveConstant() {
; CHECK-NEXT: .long -{{[0-9]+}}
define void @clobberLR(i32 %a) {
tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x31}"() nounwind
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 8, i32 %a)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 8, i32 %a)
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-stp-aa.ll b/test/CodeGen/AArch64/arm64-stp-aa.ll
new file mode 100644
index 000000000000..82d343d976b5
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-stp-aa.ll
@@ -0,0 +1,111 @@
+; RUN: llc < %s -march=arm64 -enable-misched=false -aarch64-stp-suppress=false -verify-machineinstrs | FileCheck %s
+
+; The next set of tests makes sure we can combine the second instruction into
+; the first.
+
+; CHECK-LABEL: stp_int_aa
+; CHECK: stp w0, w1, [x2]
+; CHECK: ldr w0, [x2, #8]
+; CHECK: ret
+define i32 @stp_int_aa(i32 %a, i32 %b, i32* nocapture %p) nounwind {
+ store i32 %a, i32* %p, align 4
+ %ld.ptr = getelementptr inbounds i32, i32* %p, i64 2
+ %tmp = load i32, i32* %ld.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ store i32 %b, i32* %add.ptr, align 4
+ ret i32 %tmp
+}
+
+; CHECK-LABEL: stp_long_aa
+; CHECK: stp x0, x1, [x2]
+; CHECK: ldr x0, [x2, #16]
+; CHECK: ret
+define i64 @stp_long_aa(i64 %a, i64 %b, i64* nocapture %p) nounwind {
+ store i64 %a, i64* %p, align 8
+ %ld.ptr = getelementptr inbounds i64, i64* %p, i64 2
+ %tmp = load i64, i64* %ld.ptr, align 4
+ %add.ptr = getelementptr inbounds i64, i64* %p, i64 1
+ store i64 %b, i64* %add.ptr, align 8
+ ret i64 %tmp
+}
+
+; CHECK-LABEL: stp_float_aa
+; CHECK: stp s0, s1, [x0]
+; CHECK: ldr s0, [x0, #8]
+; CHECK: ret
+define float @stp_float_aa(float %a, float %b, float* nocapture %p) nounwind {
+ store float %a, float* %p, align 4
+ %ld.ptr = getelementptr inbounds float, float* %p, i64 2
+ %tmp = load float, float* %ld.ptr, align 4
+ %add.ptr = getelementptr inbounds float, float* %p, i64 1
+ store float %b, float* %add.ptr, align 4
+ ret float %tmp
+}
+
+; CHECK-LABEL: stp_double_aa
+; CHECK: stp d0, d1, [x0]
+; CHECK: ldr d0, [x0, #16]
+; CHECK: ret
+define double @stp_double_aa(double %a, double %b, double* nocapture %p) nounwind {
+ store double %a, double* %p, align 8
+ %ld.ptr = getelementptr inbounds double, double* %p, i64 2
+ %tmp = load double, double* %ld.ptr, align 4
+ %add.ptr = getelementptr inbounds double, double* %p, i64 1
+ store double %b, double* %add.ptr, align 8
+ ret double %tmp
+}
+
+; The next set of tests makes sure we can combine the first instruction into
+; the second.
+
+; CHECK-LABEL: stp_int_aa_after
+; CHECK: ldr w0, [x3, #4]
+; CHECK: stp w1, w2, [x3]
+; CHECK: ret
+define i32 @stp_int_aa_after(i32 %w0, i32 %a, i32 %b, i32* nocapture %p) nounwind {
+ store i32 %a, i32* %p, align 4
+ %ld.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ %tmp = load i32, i32* %ld.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
+ store i32 %b, i32* %add.ptr, align 4
+ ret i32 %tmp
+}
+
+; CHECK-LABEL: stp_long_aa_after
+; CHECK: ldr x0, [x3, #8]
+; CHECK: stp x1, x2, [x3]
+; CHECK: ret
+define i64 @stp_long_aa_after(i64 %x0, i64 %a, i64 %b, i64* nocapture %p) nounwind {
+ store i64 %a, i64* %p, align 8
+ %ld.ptr = getelementptr inbounds i64, i64* %p, i64 1
+ %tmp = load i64, i64* %ld.ptr, align 4
+ %add.ptr = getelementptr inbounds i64, i64* %p, i64 1
+ store i64 %b, i64* %add.ptr, align 8
+ ret i64 %tmp
+}
+
+; CHECK-LABEL: stp_float_aa_after
+; CHECK: ldr s0, [x0, #4]
+; CHECK: stp s1, s2, [x0]
+; CHECK: ret
+define float @stp_float_aa_after(float %s0, float %a, float %b, float* nocapture %p) nounwind {
+ store float %a, float* %p, align 4
+ %ld.ptr = getelementptr inbounds float, float* %p, i64 1
+ %tmp = load float, float* %ld.ptr, align 4
+ %add.ptr = getelementptr inbounds float, float* %p, i64 1
+ store float %b, float* %add.ptr, align 4
+ ret float %tmp
+}
+
+; CHECK-LABEL: stp_double_aa_after
+; CHECK: ldr d0, [x0, #8]
+; CHECK: stp d1, d2, [x0]
+; CHECK: ret
+define double @stp_double_aa_after(double %d0, double %a, double %b, double* nocapture %p) nounwind {
+ store double %a, double* %p, align 8
+ %ld.ptr = getelementptr inbounds double, double* %p, i64 1
+ %tmp = load double, double* %ld.ptr, align 4
+ %add.ptr = getelementptr inbounds double, double* %p, i64 1
+ store double %b, double* %add.ptr, align 8
+ ret double %tmp
+}
diff --git a/test/CodeGen/AArch64/arm64-stp.ll b/test/CodeGen/AArch64/arm64-stp.ll
index 40bdf22c995c..4d76396471ad 100644
--- a/test/CodeGen/AArch64/arm64-stp.ll
+++ b/test/CodeGen/AArch64/arm64-stp.ll
@@ -6,7 +6,7 @@
; CHECK: stp w0, w1, [x2]
define void @stp_int(i32 %a, i32 %b, i32* nocapture %p) nounwind {
store i32 %a, i32* %p, align 4
- %add.ptr = getelementptr inbounds i32* %p, i64 1
+ %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
store i32 %b, i32* %add.ptr, align 4
ret void
}
@@ -15,7 +15,7 @@ define void @stp_int(i32 %a, i32 %b, i32* nocapture %p) nounwind {
; CHECK: stp x0, x1, [x2]
define void @stp_long(i64 %a, i64 %b, i64* nocapture %p) nounwind {
store i64 %a, i64* %p, align 8
- %add.ptr = getelementptr inbounds i64* %p, i64 1
+ %add.ptr = getelementptr inbounds i64, i64* %p, i64 1
store i64 %b, i64* %add.ptr, align 8
ret void
}
@@ -24,7 +24,7 @@ define void @stp_long(i64 %a, i64 %b, i64* nocapture %p) nounwind {
; CHECK: stp s0, s1, [x0]
define void @stp_float(float %a, float %b, float* nocapture %p) nounwind {
store float %a, float* %p, align 4
- %add.ptr = getelementptr inbounds float* %p, i64 1
+ %add.ptr = getelementptr inbounds float, float* %p, i64 1
store float %b, float* %add.ptr, align 4
ret void
}
@@ -33,7 +33,7 @@ define void @stp_float(float %a, float %b, float* nocapture %p) nounwind {
; CHECK: stp d0, d1, [x0]
define void @stp_double(double %a, double %b, double* nocapture %p) nounwind {
store double %a, double* %p, align 8
- %add.ptr = getelementptr inbounds double* %p, i64 1
+ %add.ptr = getelementptr inbounds double, double* %p, i64 1
store double %b, double* %add.ptr, align 8
ret void
}
@@ -43,9 +43,9 @@ define void @stur_int(i32 %a, i32 %b, i32* nocapture %p) nounwind {
; STUR_CHK: stur_int
; STUR_CHK: stp w{{[0-9]+}}, {{w[0-9]+}}, [x{{[0-9]+}}, #-8]
; STUR_CHK-NEXT: ret
- %p1 = getelementptr inbounds i32* %p, i32 -1
+ %p1 = getelementptr inbounds i32, i32* %p, i32 -1
store i32 %a, i32* %p1, align 2
- %p2 = getelementptr inbounds i32* %p, i32 -2
+ %p2 = getelementptr inbounds i32, i32* %p, i32 -2
store i32 %b, i32* %p2, align 2
ret void
}
@@ -54,9 +54,9 @@ define void @stur_long(i64 %a, i64 %b, i64* nocapture %p) nounwind {
; STUR_CHK: stur_long
; STUR_CHK: stp x{{[0-9]+}}, {{x[0-9]+}}, [x{{[0-9]+}}, #-16]
; STUR_CHK-NEXT: ret
- %p1 = getelementptr inbounds i64* %p, i32 -1
+ %p1 = getelementptr inbounds i64, i64* %p, i32 -1
store i64 %a, i64* %p1, align 2
- %p2 = getelementptr inbounds i64* %p, i32 -2
+ %p2 = getelementptr inbounds i64, i64* %p, i32 -2
store i64 %b, i64* %p2, align 2
ret void
}
@@ -65,9 +65,9 @@ define void @stur_float(float %a, float %b, float* nocapture %p) nounwind {
; STUR_CHK: stur_float
; STUR_CHK: stp s{{[0-9]+}}, {{s[0-9]+}}, [x{{[0-9]+}}, #-8]
; STUR_CHK-NEXT: ret
- %p1 = getelementptr inbounds float* %p, i32 -1
+ %p1 = getelementptr inbounds float, float* %p, i32 -1
store float %a, float* %p1, align 2
- %p2 = getelementptr inbounds float* %p, i32 -2
+ %p2 = getelementptr inbounds float, float* %p, i32 -2
store float %b, float* %p2, align 2
ret void
}
@@ -76,9 +76,9 @@ define void @stur_double(double %a, double %b, double* nocapture %p) nounwind {
; STUR_CHK: stur_double
; STUR_CHK: stp d{{[0-9]+}}, {{d[0-9]+}}, [x{{[0-9]+}}, #-16]
; STUR_CHK-NEXT: ret
- %p1 = getelementptr inbounds double* %p, i32 -1
+ %p1 = getelementptr inbounds double, double* %p, i32 -1
store double %a, double* %p1, align 2
- %p2 = getelementptr inbounds double* %p, i32 -2
+ %p2 = getelementptr inbounds double, double* %p, i32 -2
store double %b, double* %p2, align 2
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-strict-align.ll b/test/CodeGen/AArch64/arm64-strict-align.ll
index 5d137043a691..b707527f3c0c 100644
--- a/test/CodeGen/AArch64/arm64-strict-align.ll
+++ b/test/CodeGen/AArch64/arm64-strict-align.ll
@@ -10,7 +10,7 @@ define i32 @f0(i32* nocapture %p) nounwind {
; CHECK: ldr w0, [x0]
; CHECK: ret
- %tmp = load i32* %p, align 2
+ %tmp = load i32, i32* %p, align 2
ret i32 %tmp
}
@@ -21,6 +21,6 @@ define i64 @f1(i64* nocapture %p) nounwind {
; CHECK: ldr x0, [x0]
; CHECK: ret
- %tmp = load i64* %p, align 4
+ %tmp = load i64, i64* %p, align 4
ret i64 %tmp
}
diff --git a/test/CodeGen/AArch64/arm64-stur.ll b/test/CodeGen/AArch64/arm64-stur.ll
index a2e684dc9528..5f4cb9f3d95a 100644
--- a/test/CodeGen/AArch64/arm64-stur.ll
+++ b/test/CodeGen/AArch64/arm64-stur.ll
@@ -6,7 +6,7 @@ define void @foo1(i32* %p, i64 %val) nounwind {
; CHECK: stur w1, [x0, #-4]
; CHECK-NEXT: ret
%tmp1 = trunc i64 %val to i32
- %ptr = getelementptr inbounds i32* %p, i64 -1
+ %ptr = getelementptr inbounds i32, i32* %p, i64 -1
store i32 %tmp1, i32* %ptr, align 4
ret void
}
@@ -15,7 +15,7 @@ define void @foo2(i16* %p, i64 %val) nounwind {
; CHECK: sturh w1, [x0, #-2]
; CHECK-NEXT: ret
%tmp1 = trunc i64 %val to i16
- %ptr = getelementptr inbounds i16* %p, i64 -1
+ %ptr = getelementptr inbounds i16, i16* %p, i64 -1
store i16 %tmp1, i16* %ptr, align 2
ret void
}
@@ -24,7 +24,7 @@ define void @foo3(i8* %p, i64 %val) nounwind {
; CHECK: sturb w1, [x0, #-1]
; CHECK-NEXT: ret
%tmp1 = trunc i64 %val to i8
- %ptr = getelementptr inbounds i8* %p, i64 -1
+ %ptr = getelementptr inbounds i8, i8* %p, i64 -1
store i8 %tmp1, i8* %ptr, align 1
ret void
}
@@ -33,7 +33,7 @@ define void @foo4(i16* %p, i32 %val) nounwind {
; CHECK: sturh w1, [x0, #-2]
; CHECK-NEXT: ret
%tmp1 = trunc i32 %val to i16
- %ptr = getelementptr inbounds i16* %p, i32 -1
+ %ptr = getelementptr inbounds i16, i16* %p, i32 -1
store i16 %tmp1, i16* %ptr, align 2
ret void
}
@@ -42,7 +42,7 @@ define void @foo5(i8* %p, i32 %val) nounwind {
; CHECK: sturb w1, [x0, #-1]
; CHECK-NEXT: ret
%tmp1 = trunc i32 %val to i8
- %ptr = getelementptr inbounds i8* %p, i32 -1
+ %ptr = getelementptr inbounds i8, i8* %p, i32 -1
store i8 %tmp1, i8* %ptr, align 1
ret void
}
@@ -53,7 +53,7 @@ define void @foo(%struct.X* nocapture %p) nounwind optsize ssp {
; CHECK: stur xzr, [x0, #12]
; CHECK-NEXT: stur xzr, [x0, #4]
; CHECK-NEXT: ret
- %B = getelementptr inbounds %struct.X* %p, i64 0, i32 1
+ %B = getelementptr inbounds %struct.X, %struct.X* %p, i64 0, i32 1
%val = bitcast i64* %B to i8*
call void @llvm.memset.p0i8.i64(i8* %val, i8 0, i64 16, i32 1, i1 false)
ret void
diff --git a/test/CodeGen/AArch64/arm64-this-return.ll b/test/CodeGen/AArch64/arm64-this-return.ll
index 30f5b9b064a3..3be1a69237d7 100644
--- a/test/CodeGen/AArch64/arm64-this-return.ll
+++ b/test/CodeGen/AArch64/arm64-this-return.ll
@@ -23,7 +23,7 @@ entry:
; CHECK: b {{_?B_ctor_base}}
%0 = bitcast %struct.C* %this to %struct.A*
%call = tail call %struct.A* @A_ctor_base(%struct.A* %0)
- %1 = getelementptr inbounds %struct.C* %this, i32 0, i32 0
+ %1 = getelementptr inbounds %struct.C, %struct.C* %this, i32 0, i32 0
%call2 = tail call %struct.B* @B_ctor_base(%struct.B* %1, i32 %x)
ret %struct.C* %this
}
@@ -37,7 +37,7 @@ entry:
; CHECK-NOT: b {{_?B_ctor_base_nothisret}}
%0 = bitcast %struct.C* %this to %struct.A*
%call = tail call %struct.A* @A_ctor_base_nothisret(%struct.A* %0)
- %1 = getelementptr inbounds %struct.C* %this, i32 0, i32 0
+ %1 = getelementptr inbounds %struct.C, %struct.C* %this, i32 0, i32 0
%call2 = tail call %struct.B* @B_ctor_base_nothisret(%struct.B* %1, i32 %x)
ret %struct.C* %this
}
@@ -65,7 +65,7 @@ entry:
; CHECK: bl {{_?B_ctor_complete}}
; CHECK-NOT: mov x0, {{x[0-9]+}}
; CHECK: b {{_?B_ctor_complete}}
- %b = getelementptr inbounds %struct.D* %this, i32 0, i32 0
+ %b = getelementptr inbounds %struct.D, %struct.D* %this, i32 0, i32 0
%call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
%call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
ret %struct.D* %this
@@ -75,9 +75,9 @@ define %struct.E* @E_ctor_base(%struct.E* %this, i32 %x) {
entry:
; CHECK-LABEL: E_ctor_base:
; CHECK-NOT: b {{_?B_ctor_complete}}
- %b = getelementptr inbounds %struct.E* %this, i32 0, i32 0
+ %b = getelementptr inbounds %struct.E, %struct.E* %this, i32 0, i32 0
%call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
- %b2 = getelementptr inbounds %struct.E* %this, i32 0, i32 1
+ %b2 = getelementptr inbounds %struct.E, %struct.E* %this, i32 0, i32 1
%call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b2, i32 %x)
ret %struct.E* %this
}
diff --git a/test/CodeGen/AArch64/arm64-tls-darwin.ll b/test/CodeGen/AArch64/arm64-tls-darwin.ll
index 5e8ec33ba417..fa4e833d45e1 100644
--- a/test/CodeGen/AArch64/arm64-tls-darwin.ll
+++ b/test/CodeGen/AArch64/arm64-tls-darwin.ll
@@ -13,6 +13,6 @@ define i8 @get_var() {
; CHECK: blr [[TLV_GET_ADDR]]
; CHECK: ldrb w0, [x0]
- %val = load i8* @var, align 1
+ %val = load i8, i8* @var, align 1
ret i8 %val
}
diff --git a/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll b/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll
index 3daae625c84a..f94f88a1183f 100644
--- a/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll
+++ b/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll
@@ -10,7 +10,7 @@
define i32 @test_generaldynamic() {
; CHECK-LABEL: test_generaldynamic:
- %val = load i32* @general_dynamic_var
+ %val = load i32, i32* @general_dynamic_var
ret i32 %val
; CHECK: .tlsdesccall general_dynamic_var
diff --git a/test/CodeGen/AArch64/arm64-tls-dynamics.ll b/test/CodeGen/AArch64/arm64-tls-dynamics.ll
index a89c2c5e6fd5..88700a153437 100644
--- a/test/CodeGen/AArch64/arm64-tls-dynamics.ll
+++ b/test/CodeGen/AArch64/arm64-tls-dynamics.ll
@@ -8,7 +8,7 @@
define i32 @test_generaldynamic() {
; CHECK-LABEL: test_generaldynamic:
- %val = load i32* @general_dynamic_var
+ %val = load i32, i32* @general_dynamic_var
ret i32 %val
; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
@@ -72,7 +72,7 @@ define i32* @test_generaldynamic_addr() {
define i32 @test_localdynamic() {
; CHECK-LABEL: test_localdynamic:
- %val = load i32* @local_dynamic_var
+ %val = load i32, i32* @local_dynamic_var
ret i32 %val
; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
@@ -151,8 +151,8 @@ define i32* @test_localdynamic_addr() {
define i32 @test_localdynamic_deduplicate() {
; CHECK-LABEL: test_localdynamic_deduplicate:
- %val = load i32* @local_dynamic_var
- %val2 = load i32* @local_dynamic_var2
+ %val = load i32, i32* @local_dynamic_var
+ %val2 = load i32, i32* @local_dynamic_var2
%sum = add i32 %val, %val2
ret i32 %sum
diff --git a/test/CodeGen/AArch64/arm64-tls-execs.ll b/test/CodeGen/AArch64/arm64-tls-execs.ll
index e6d3d680f417..deced6988815 100644
--- a/test/CodeGen/AArch64/arm64-tls-execs.ll
+++ b/test/CodeGen/AArch64/arm64-tls-execs.ll
@@ -5,7 +5,7 @@
define i32 @test_initial_exec() {
; CHECK-LABEL: test_initial_exec:
- %val = load i32* @initial_exec_var
+ %val = load i32, i32* @initial_exec_var
; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var
; CHECK: ldr x[[TP_OFFSET:[0-9]+]], [x[[GOTADDR]], :gottprel_lo12:initial_exec_var]
@@ -36,7 +36,7 @@ define i32* @test_initial_exec_addr() {
define i32 @test_local_exec() {
; CHECK-LABEL: test_local_exec:
- %val = load i32* @local_exec_var
+ %val = load i32, i32* @local_exec_var
; CHECK: mrs x[[R1:[0-9]+]], TPIDR_EL0
; CHECK: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var
diff --git a/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll b/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
index 923742d0370e..1b1681dc49f3 100644
--- a/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
+++ b/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
@@ -8,11 +8,11 @@ define i32 @func(i32 %i, i32 %j, i32 %k) #0 {
entry:
; CHECK: ldr {{w[0-9]+}}, [x[[REG:[0-9]+]], #4]
; CHECK: str {{w[0-9]+}}, [x[[REG]], #8]
- %0 = load i32** @a, align 8, !tbaa !1
- %arrayidx = getelementptr inbounds i32* %0, i64 2
+ %0 = load i32*, i32** @a, align 8, !tbaa !1
+ %arrayidx = getelementptr inbounds i32, i32* %0, i64 2
store i32 %i, i32* %arrayidx, align 4, !tbaa !5
- %arrayidx1 = getelementptr inbounds i32* %0, i64 1
- %1 = load i32* %arrayidx1, align 4, !tbaa !5
+ %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 1
+ %1 = load i32, i32* %arrayidx1, align 4, !tbaa !5
%add = add nsw i32 %k, %i
store i32 %add, i32* @m, align 4, !tbaa !5
ret i32 %1
diff --git a/test/CodeGen/AArch64/arm64-trn.ll b/test/CodeGen/AArch64/arm64-trn.ll
index 2db7a14e7549..92ccf05a3c94 100644
--- a/test/CodeGen/AArch64/arm64-trn.ll
+++ b/test/CodeGen/AArch64/arm64-trn.ll
@@ -5,8 +5,8 @@ define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: trn1.8b
;CHECK: trn2.8b
;CHECK-NEXT: add.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
%tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -18,8 +18,8 @@ define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: trn1.4h
;CHECK: trn2.4h
;CHECK-NEXT: add.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
%tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -32,8 +32,8 @@ define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: zip1.2s
;CHECK: zip2.2s
;CHECK-NEXT: add.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2>
%tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 3>
%tmp5 = add <2 x i32> %tmp3, %tmp4
@@ -45,8 +45,8 @@ define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK: zip1.2s
;CHECK: zip2.2s
;CHECK-NEXT: fadd.2s
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2>
%tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3>
%tmp5 = fadd <2 x float> %tmp3, %tmp4
@@ -58,8 +58,8 @@ define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK: trn1.16b
;CHECK: trn2.16b
;CHECK-NEXT: add.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
%tmp5 = add <16 x i8> %tmp3, %tmp4
@@ -71,8 +71,8 @@ define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: trn1.8h
;CHECK: trn2.8h
;CHECK-NEXT: add.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
%tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -84,8 +84,8 @@ define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK: trn1.4s
;CHECK: trn2.4s
;CHECK-NEXT: add.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
%tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -97,8 +97,8 @@ define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK: trn1.4s
;CHECK: trn2.4s
;CHECK-NEXT: fadd.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
%tmp5 = fadd <4 x float> %tmp3, %tmp4
@@ -112,8 +112,8 @@ define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: trn1.8b
;CHECK: trn2.8b
;CHECK-NEXT: add.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14>
%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15>
%tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -125,8 +125,8 @@ define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: trn1.8h
;CHECK: trn2.8h
;CHECK-NEXT: add.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
%tmp5 = add <8 x i16> %tmp3, %tmp4
diff --git a/test/CodeGen/AArch64/arm64-trunc-store.ll b/test/CodeGen/AArch64/arm64-trunc-store.ll
index cf15247e1524..7cde629b33ae 100644
--- a/test/CodeGen/AArch64/arm64-trunc-store.ll
+++ b/test/CodeGen/AArch64/arm64-trunc-store.ll
@@ -25,10 +25,10 @@ define void @fct32(i32 %arg, i64 %var) {
; CHECK-NEXT: str w1, {{\[}}[[GLOBALADDR]], w[[OFFSETREGNUM]], sxtw #2]
; CHECK-NEXT: ret
bb:
- %.pre37 = load i32** @zptr32, align 8
+ %.pre37 = load i32*, i32** @zptr32, align 8
%dec = add nsw i32 %arg, -1
%idxprom8 = sext i32 %dec to i64
- %arrayidx9 = getelementptr inbounds i32* %.pre37, i64 %idxprom8
+ %arrayidx9 = getelementptr inbounds i32, i32* %.pre37, i64 %idxprom8
%tmp = trunc i64 %var to i32
store i32 %tmp, i32* %arrayidx9, align 4
ret void
@@ -45,10 +45,10 @@ define void @fct16(i32 %arg, i64 %var) {
; CHECK-NEXT: strh w1, {{\[}}[[GLOBALADDR]], w[[OFFSETREGNUM]], sxtw #1]
; CHECK-NEXT: ret
bb:
- %.pre37 = load i16** @zptr16, align 8
+ %.pre37 = load i16*, i16** @zptr16, align 8
%dec = add nsw i32 %arg, -1
%idxprom8 = sext i32 %dec to i64
- %arrayidx9 = getelementptr inbounds i16* %.pre37, i64 %idxprom8
+ %arrayidx9 = getelementptr inbounds i16, i16* %.pre37, i64 %idxprom8
%tmp = trunc i64 %var to i16
store i16 %tmp, i16* %arrayidx9, align 4
ret void
@@ -65,10 +65,10 @@ define void @fct8(i32 %arg, i64 %var) {
; CHECK-NEXT: sturb w1, {{\[}}[[ADDR]], #-1]
; CHECK-NEXT: ret
bb:
- %.pre37 = load i8** @zptr8, align 8
+ %.pre37 = load i8*, i8** @zptr8, align 8
%dec = add nsw i32 %arg, -1
%idxprom8 = sext i32 %dec to i64
- %arrayidx9 = getelementptr inbounds i8* %.pre37, i64 %idxprom8
+ %arrayidx9 = getelementptr inbounds i8, i8* %.pre37, i64 %idxprom8
%tmp = trunc i64 %var to i8
store i8 %tmp, i8* %arrayidx9, align 4
ret void
diff --git a/test/CodeGen/AArch64/arm64-umaxv.ll b/test/CodeGen/AArch64/arm64-umaxv.ll
index d523f317d087..a77f228cb156 100644
--- a/test/CodeGen/AArch64/arm64-umaxv.ll
+++ b/test/CodeGen/AArch64/arm64-umaxv.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
define i32 @vmax_u8x8(<8 x i8> %a) nounwind ssp {
; CHECK-LABEL: vmax_u8x8:
@@ -86,7 +86,79 @@ return:
ret i32 %retval.0
}
+define <8 x i8> @test_vmaxv_u8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) {
+; CHECK-LABEL: test_vmaxv_u8_used_by_laneop:
+; CHECK: umaxv.8b b[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.b v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a2)
+ %1 = trunc i32 %0 to i8
+ %2 = insertelement <8 x i8> %a1, i8 %1, i32 3
+ ret <8 x i8> %2
+}
+
+define <4 x i16> @test_vmaxv_u16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) {
+; CHECK-LABEL: test_vmaxv_u16_used_by_laneop:
+; CHECK: umaxv.4h h[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.h v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a2)
+ %1 = trunc i32 %0 to i16
+ %2 = insertelement <4 x i16> %a1, i16 %1, i32 3
+ ret <4 x i16> %2
+}
+
+define <2 x i32> @test_vmaxv_u32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) {
+; CHECK-LABEL: test_vmaxv_u32_used_by_laneop:
+; CHECK: umaxp.2s v[[REGNUM:[0-9]+]], v1, v1
+; CHECK-NEXT: ins.s v0[1], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> %a2)
+ %1 = insertelement <2 x i32> %a1, i32 %0, i32 1
+ ret <2 x i32> %1
+}
+
+define <16 x i8> @test_vmaxvq_u8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) {
+; CHECK-LABEL: test_vmaxvq_u8_used_by_laneop:
+; CHECK: umaxv.16b b[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.b v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a2)
+ %1 = trunc i32 %0 to i8
+ %2 = insertelement <16 x i8> %a1, i8 %1, i32 3
+ ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vmaxvq_u16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) {
+; CHECK-LABEL: test_vmaxvq_u16_used_by_laneop:
+; CHECK: umaxv.8h h[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.h v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a2)
+ %1 = trunc i32 %0 to i16
+ %2 = insertelement <8 x i16> %a1, i16 %1, i32 3
+ ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vmaxvq_u32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) {
+; CHECK-LABEL: test_vmaxvq_u32_used_by_laneop:
+; CHECK: umaxv.4s s[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.s v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a2)
+ %1 = insertelement <4 x i32> %a1, i32 %0, i32 3
+ ret <4 x i32> %1
+}
+
declare i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8>) nounwind readnone
declare i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16>) nounwind readnone
declare i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16>) nounwind readnone
declare i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8>) nounwind readnone
+declare i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32>) nounwind readnone
+declare i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-uminv.ll b/test/CodeGen/AArch64/arm64-uminv.ll
index 3bade4b28b8f..2181db46ea96 100644
--- a/test/CodeGen/AArch64/arm64-uminv.ll
+++ b/test/CodeGen/AArch64/arm64-uminv.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
define i32 @vmin_u8x8(<8 x i8> %a) nounwind ssp {
; CHECK-LABEL: vmin_u8x8:
@@ -86,7 +86,78 @@ return:
ret i32 %retval.0
}
+define <8 x i8> @test_vminv_u8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) {
+; CHECK-LABEL: test_vminv_u8_used_by_laneop:
+; CHECK: uminv.8b b[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.b v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a2)
+ %1 = trunc i32 %0 to i8
+ %2 = insertelement <8 x i8> %a1, i8 %1, i32 3
+ ret <8 x i8> %2
+}
+
+define <4 x i16> @test_vminv_u16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) {
+; CHECK-LABEL: test_vminv_u16_used_by_laneop:
+; CHECK: uminv.4h h[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.h v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a2)
+ %1 = trunc i32 %0 to i16
+ %2 = insertelement <4 x i16> %a1, i16 %1, i32 3
+ ret <4 x i16> %2
+}
+
+define <2 x i32> @test_vminv_u32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) {
+; CHECK-LABEL: test_vminv_u32_used_by_laneop:
+; CHECK: uminp.2s v[[REGNUM:[0-9]+]], v1, v1
+; CHECK-NEXT: ins.s v0[1], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> %a2)
+ %1 = insertelement <2 x i32> %a1, i32 %0, i32 1
+ ret <2 x i32> %1
+}
+
+define <16 x i8> @test_vminvq_u8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) {
+; CHECK-LABEL: test_vminvq_u8_used_by_laneop:
+; CHECK: uminv.16b b[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.b v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a2)
+ %1 = trunc i32 %0 to i8
+ %2 = insertelement <16 x i8> %a1, i8 %1, i32 3
+ ret <16 x i8> %2
+}
+
+define <8 x i16> @test_vminvq_u16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) {
+; CHECK-LABEL: test_vminvq_u16_used_by_laneop:
+; CHECK: uminv.8h h[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.h v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a2)
+ %1 = trunc i32 %0 to i16
+ %2 = insertelement <8 x i16> %a1, i16 %1, i32 3
+ ret <8 x i16> %2
+}
+
+define <4 x i32> @test_vminvq_u32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) {
+; CHECK-LABEL: test_vminvq_u32_used_by_laneop:
+; CHECK: uminv.4s s[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.s v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a2)
+ %1 = insertelement <4 x i32> %a1, i32 %0, i32 3
+ ret <4 x i32> %1
+}
declare i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8>) nounwind readnone
declare i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16>) nounwind readnone
declare i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16>) nounwind readnone
declare i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8>) nounwind readnone
+declare i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32>) nounwind readnone
+declare i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-unaligned_ldst.ll b/test/CodeGen/AArch64/arm64-unaligned_ldst.ll
index 20b80c09f72f..dab8b0f5b6d1 100644
--- a/test/CodeGen/AArch64/arm64-unaligned_ldst.ll
+++ b/test/CodeGen/AArch64/arm64-unaligned_ldst.ll
@@ -9,7 +9,7 @@ entry:
; CHECK: str [[X0]], [x0]
%tmp1 = bitcast i8* %b to i64*
%tmp2 = bitcast i8* %a to i64*
- %tmp3 = load i64* %tmp1, align 1
+ %tmp3 = load i64, i64* %tmp1, align 1
store i64 %tmp3, i64* %tmp2, align 1
ret void
}
@@ -22,7 +22,7 @@ entry:
; CHECK: str [[W0]], [x0]
%tmp1 = bitcast i8* %b to i32*
%tmp2 = bitcast i8* %a to i32*
- %tmp3 = load i32* %tmp1, align 1
+ %tmp3 = load i32, i32* %tmp1, align 1
store i32 %tmp3, i32* %tmp2, align 1
ret void
}
@@ -35,7 +35,7 @@ entry:
; CHECK: strh [[W0]], [x0]
%tmp1 = bitcast i8* %b to i16*
%tmp2 = bitcast i8* %a to i16*
- %tmp3 = load i16* %tmp1, align 1
+ %tmp3 = load i16, i16* %tmp1, align 1
store i16 %tmp3, i16* %tmp2, align 1
ret void
}
diff --git a/test/CodeGen/AArch64/arm64-uzp.ll b/test/CodeGen/AArch64/arm64-uzp.ll
index cdd8d31c9981..517ebae6dabd 100644
--- a/test/CodeGen/AArch64/arm64-uzp.ll
+++ b/test/CodeGen/AArch64/arm64-uzp.ll
@@ -5,8 +5,8 @@ define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: uzp1.8b
;CHECK: uzp2.8b
;CHECK-NEXT: add.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
%tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -18,8 +18,8 @@ define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: uzp1.4h
;CHECK: uzp2.4h
;CHECK-NEXT: add.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
%tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -31,8 +31,8 @@ define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK: uzp1.16b
;CHECK: uzp2.16b
;CHECK-NEXT: add.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
%tmp5 = add <16 x i8> %tmp3, %tmp4
@@ -44,8 +44,8 @@ define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: uzp1.8h
;CHECK: uzp2.8h
;CHECK-NEXT: add.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
%tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -57,8 +57,8 @@ define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK: uzp1.4s
;CHECK: uzp2.4s
;CHECK-NEXT: add.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
%tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -70,8 +70,8 @@ define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK: uzp1.4s
;CHECK: uzp2.4s
;CHECK-NEXT: fadd.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
%tmp5 = fadd <4 x float> %tmp3, %tmp4
@@ -85,8 +85,8 @@ define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: uzp1.8b
;CHECK: uzp2.8b
;CHECK-NEXT: add.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14>
%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
%tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -98,8 +98,8 @@ define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: uzp1.8h
;CHECK: uzp2.8h
;CHECK-NEXT: add.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14>
%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
%tmp5 = add <8 x i16> %tmp3, %tmp4
diff --git a/test/CodeGen/AArch64/arm64-vabs.ll b/test/CodeGen/AArch64/arm64-vabs.ll
index fae2b90e5ba1..a52c4ebf13e7 100644
--- a/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/test/CodeGen/AArch64/arm64-vabs.ll
@@ -4,8 +4,8 @@
define <8 x i16> @sabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: sabdl8h:
;CHECK: sabdl.8h
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
%tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
ret <8 x i16> %tmp4
@@ -14,8 +14,8 @@ define <8 x i16> @sabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @sabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: sabdl4s:
;CHECK: sabdl.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
%tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
@@ -24,8 +24,8 @@ define <4 x i32> @sabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @sabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: sabdl2d:
;CHECK: sabdl.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
%tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
ret <2 x i64> %tmp4
@@ -34,8 +34,8 @@ define <2 x i64> @sabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @sabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: sabdl2_8h:
;CHECK: sabdl2.8h
- %load1 = load <16 x i8>* %A
- %load2 = load <16 x i8>* %B
+ %load1 = load <16 x i8>, <16 x i8>* %A
+ %load2 = load <16 x i8>, <16 x i8>* %B
%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -46,8 +46,8 @@ define <8 x i16> @sabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i32> @sabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: sabdl2_4s:
;CHECK: sabdl2.4s
- %load1 = load <8 x i16>* %A
- %load2 = load <8 x i16>* %B
+ %load1 = load <8 x i16>, <8 x i16>* %A
+ %load2 = load <8 x i16>, <8 x i16>* %B
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -58,8 +58,8 @@ define <4 x i32> @sabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i64> @sabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: sabdl2_2d:
;CHECK: sabdl2.2d
- %load1 = load <4 x i32>* %A
- %load2 = load <4 x i32>* %B
+ %load1 = load <4 x i32>, <4 x i32>* %A
+ %load2 = load <4 x i32>, <4 x i32>* %B
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -70,8 +70,8 @@ define <2 x i64> @sabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <8 x i16> @uabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: uabdl8h:
;CHECK: uabdl.8h
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
%tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
ret <8 x i16> %tmp4
@@ -80,8 +80,8 @@ define <8 x i16> @uabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @uabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: uabdl4s:
;CHECK: uabdl.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
%tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
@@ -90,8 +90,8 @@ define <4 x i32> @uabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @uabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: uabdl2d:
;CHECK: uabdl.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
%tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
ret <2 x i64> %tmp4
@@ -100,8 +100,8 @@ define <2 x i64> @uabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @uabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: uabdl2_8h:
;CHECK: uabdl2.8h
- %load1 = load <16 x i8>* %A
- %load2 = load <16 x i8>* %B
+ %load1 = load <16 x i8>, <16 x i8>* %A
+ %load2 = load <16 x i8>, <16 x i8>* %B
%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -113,8 +113,8 @@ define <8 x i16> @uabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i32> @uabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: uabdl2_4s:
;CHECK: uabdl2.4s
- %load1 = load <8 x i16>* %A
- %load2 = load <8 x i16>* %B
+ %load1 = load <8 x i16>, <8 x i16>* %A
+ %load2 = load <8 x i16>, <8 x i16>* %B
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -125,8 +125,8 @@ define <4 x i32> @uabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: uabdl2_2d:
;CHECK: uabdl2.2d
- %load1 = load <4 x i32>* %A
- %load2 = load <4 x i32>* %B
+ %load1 = load <4 x i32>, <4 x i32>* %A
+ %load2 = load <4 x i32>, <4 x i32>* %B
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -137,8 +137,8 @@ define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x float> @fabd_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: fabd_2s:
;CHECK: fabd.2s
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -146,8 +146,8 @@ define <2 x float> @fabd_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
define <4 x float> @fabd_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: fabd_4s:
;CHECK: fabd.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
@@ -155,8 +155,8 @@ define <4 x float> @fabd_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
define <2 x double> @fabd_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
;CHECK-LABEL: fabd_2d:
;CHECK: fabd.2d
- %tmp1 = load <2 x double>* %A
- %tmp2 = load <2 x double>* %B
+ %tmp1 = load <2 x double>, <2 x double>* %A
+ %tmp2 = load <2 x double>, <2 x double>* %B
%tmp3 = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
ret <2 x double> %tmp3
}
@@ -168,8 +168,8 @@ declare <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double>, <2 x double>) n
define <8 x i8> @sabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: sabd_8b:
;CHECK: sabd.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -177,8 +177,8 @@ define <8 x i8> @sabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @sabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: sabd_16b:
;CHECK: sabd.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -186,8 +186,8 @@ define <16 x i8> @sabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @sabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: sabd_4h:
;CHECK: sabd.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -195,8 +195,8 @@ define <4 x i16> @sabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @sabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: sabd_8h:
;CHECK: sabd.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -204,8 +204,8 @@ define <8 x i16> @sabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i32> @sabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: sabd_2s:
;CHECK: sabd.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -213,8 +213,8 @@ define <2 x i32> @sabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @sabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: sabd_4s:
;CHECK: sabd.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -229,8 +229,8 @@ declare <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32>, <4 x i32>) nounwind r
define <8 x i8> @uabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: uabd_8b:
;CHECK: uabd.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -238,8 +238,8 @@ define <8 x i8> @uabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @uabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: uabd_16b:
;CHECK: uabd.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -247,8 +247,8 @@ define <16 x i8> @uabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @uabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: uabd_4h:
;CHECK: uabd.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -256,8 +256,8 @@ define <4 x i16> @uabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @uabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: uabd_8h:
;CHECK: uabd.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -265,8 +265,8 @@ define <8 x i16> @uabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i32> @uabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: uabd_2s:
;CHECK: uabd.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -274,8 +274,8 @@ define <2 x i32> @uabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @uabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: uabd_4s:
;CHECK: uabd.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -290,7 +290,7 @@ declare <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32>, <4 x i32>) nounwind r
define <8 x i8> @sqabs_8b(<8 x i8>* %A) nounwind {
;CHECK-LABEL: sqabs_8b:
;CHECK: sqabs.8b
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> %tmp1)
ret <8 x i8> %tmp3
}
@@ -298,7 +298,7 @@ define <8 x i8> @sqabs_8b(<8 x i8>* %A) nounwind {
define <16 x i8> @sqabs_16b(<16 x i8>* %A) nounwind {
;CHECK-LABEL: sqabs_16b:
;CHECK: sqabs.16b
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8> %tmp1)
ret <16 x i8> %tmp3
}
@@ -306,7 +306,7 @@ define <16 x i8> @sqabs_16b(<16 x i8>* %A) nounwind {
define <4 x i16> @sqabs_4h(<4 x i16>* %A) nounwind {
;CHECK-LABEL: sqabs_4h:
;CHECK: sqabs.4h
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> %tmp1)
ret <4 x i16> %tmp3
}
@@ -314,7 +314,7 @@ define <4 x i16> @sqabs_4h(<4 x i16>* %A) nounwind {
define <8 x i16> @sqabs_8h(<8 x i16>* %A) nounwind {
;CHECK-LABEL: sqabs_8h:
;CHECK: sqabs.8h
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16> %tmp1)
ret <8 x i16> %tmp3
}
@@ -322,7 +322,7 @@ define <8 x i16> @sqabs_8h(<8 x i16>* %A) nounwind {
define <2 x i32> @sqabs_2s(<2 x i32>* %A) nounwind {
;CHECK-LABEL: sqabs_2s:
;CHECK: sqabs.2s
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32> %tmp1)
ret <2 x i32> %tmp3
}
@@ -330,7 +330,7 @@ define <2 x i32> @sqabs_2s(<2 x i32>* %A) nounwind {
define <4 x i32> @sqabs_4s(<4 x i32>* %A) nounwind {
;CHECK-LABEL: sqabs_4s:
;CHECK: sqabs.4s
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32> %tmp1)
ret <4 x i32> %tmp3
}
@@ -345,7 +345,7 @@ declare <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32>) nounwind readnone
define <8 x i8> @sqneg_8b(<8 x i8>* %A) nounwind {
;CHECK-LABEL: sqneg_8b:
;CHECK: sqneg.8b
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> %tmp1)
ret <8 x i8> %tmp3
}
@@ -353,7 +353,7 @@ define <8 x i8> @sqneg_8b(<8 x i8>* %A) nounwind {
define <16 x i8> @sqneg_16b(<16 x i8>* %A) nounwind {
;CHECK-LABEL: sqneg_16b:
;CHECK: sqneg.16b
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8> %tmp1)
ret <16 x i8> %tmp3
}
@@ -361,7 +361,7 @@ define <16 x i8> @sqneg_16b(<16 x i8>* %A) nounwind {
define <4 x i16> @sqneg_4h(<4 x i16>* %A) nounwind {
;CHECK-LABEL: sqneg_4h:
;CHECK: sqneg.4h
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> %tmp1)
ret <4 x i16> %tmp3
}
@@ -369,7 +369,7 @@ define <4 x i16> @sqneg_4h(<4 x i16>* %A) nounwind {
define <8 x i16> @sqneg_8h(<8 x i16>* %A) nounwind {
;CHECK-LABEL: sqneg_8h:
;CHECK: sqneg.8h
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> %tmp1)
ret <8 x i16> %tmp3
}
@@ -377,7 +377,7 @@ define <8 x i16> @sqneg_8h(<8 x i16>* %A) nounwind {
define <2 x i32> @sqneg_2s(<2 x i32>* %A) nounwind {
;CHECK-LABEL: sqneg_2s:
;CHECK: sqneg.2s
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32> %tmp1)
ret <2 x i32> %tmp3
}
@@ -385,7 +385,7 @@ define <2 x i32> @sqneg_2s(<2 x i32>* %A) nounwind {
define <4 x i32> @sqneg_4s(<4 x i32>* %A) nounwind {
;CHECK-LABEL: sqneg_4s:
;CHECK: sqneg.4s
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32> %tmp1)
ret <4 x i32> %tmp3
}
@@ -400,7 +400,7 @@ declare <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32>) nounwind readnone
define <8 x i8> @abs_8b(<8 x i8>* %A) nounwind {
;CHECK-LABEL: abs_8b:
;CHECK: abs.8b
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8> %tmp1)
ret <8 x i8> %tmp3
}
@@ -408,7 +408,7 @@ define <8 x i8> @abs_8b(<8 x i8>* %A) nounwind {
define <16 x i8> @abs_16b(<16 x i8>* %A) nounwind {
;CHECK-LABEL: abs_16b:
;CHECK: abs.16b
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8> %tmp1)
ret <16 x i8> %tmp3
}
@@ -416,7 +416,7 @@ define <16 x i8> @abs_16b(<16 x i8>* %A) nounwind {
define <4 x i16> @abs_4h(<4 x i16>* %A) nounwind {
;CHECK-LABEL: abs_4h:
;CHECK: abs.4h
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16> %tmp1)
ret <4 x i16> %tmp3
}
@@ -424,7 +424,7 @@ define <4 x i16> @abs_4h(<4 x i16>* %A) nounwind {
define <8 x i16> @abs_8h(<8 x i16>* %A) nounwind {
;CHECK-LABEL: abs_8h:
;CHECK: abs.8h
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16> %tmp1)
ret <8 x i16> %tmp3
}
@@ -432,7 +432,7 @@ define <8 x i16> @abs_8h(<8 x i16>* %A) nounwind {
define <2 x i32> @abs_2s(<2 x i32>* %A) nounwind {
;CHECK-LABEL: abs_2s:
;CHECK: abs.2s
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32> %tmp1)
ret <2 x i32> %tmp3
}
@@ -440,7 +440,7 @@ define <2 x i32> @abs_2s(<2 x i32>* %A) nounwind {
define <4 x i32> @abs_4s(<4 x i32>* %A) nounwind {
;CHECK-LABEL: abs_4s:
;CHECK: abs.4s
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32> %tmp1)
ret <4 x i32> %tmp3
}
@@ -471,9 +471,9 @@ declare i64 @llvm.aarch64.neon.abs.i64(i64) nounwind readnone
define <8 x i16> @sabal8h(<8 x i8>* %A, <8 x i8>* %B, <8 x i16>* %C) nounwind {
;CHECK-LABEL: sabal8h:
;CHECK: sabal.8h
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i16>* %C
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i16>, <8 x i16>* %C
%tmp4 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
%tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
%tmp5 = add <8 x i16> %tmp3, %tmp4.1
@@ -483,9 +483,9 @@ define <8 x i16> @sabal8h(<8 x i8>* %A, <8 x i8>* %B, <8 x i16>* %C) nounwind {
define <4 x i32> @sabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: sabal4s:
;CHECK: sabal.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i32>* %C
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp4 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
%tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
%tmp5 = add <4 x i32> %tmp3, %tmp4.1
@@ -495,9 +495,9 @@ define <4 x i32> @sabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind
define <2 x i64> @sabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
;CHECK-LABEL: sabal2d:
;CHECK: sabal.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i64>* %C
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp4 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
%tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
%tmp4.1.1 = zext <2 x i32> %tmp4 to <2 x i64>
@@ -508,9 +508,9 @@ define <2 x i64> @sabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind
define <8 x i16> @sabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwind {
;CHECK-LABEL: sabal2_8h:
;CHECK: sabal2.8h
- %load1 = load <16 x i8>* %A
- %load2 = load <16 x i8>* %B
- %tmp3 = load <8 x i16>* %C
+ %load1 = load <16 x i8>, <16 x i8>* %A
+ %load2 = load <16 x i8>, <16 x i8>* %B
+ %tmp3 = load <8 x i16>, <8 x i16>* %C
%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp4 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -522,9 +522,9 @@ define <8 x i16> @sabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwin
define <4 x i32> @sabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: sabal2_4s:
;CHECK: sabal2.4s
- %load1 = load <8 x i16>* %A
- %load2 = load <8 x i16>* %B
- %tmp3 = load <4 x i32>* %C
+ %load1 = load <8 x i16>, <8 x i16>* %A
+ %load2 = load <8 x i16>, <8 x i16>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp4 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -536,9 +536,9 @@ define <4 x i32> @sabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwin
define <2 x i64> @sabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
;CHECK-LABEL: sabal2_2d:
;CHECK: sabal2.2d
- %load1 = load <4 x i32>* %A
- %load2 = load <4 x i32>* %B
- %tmp3 = load <2 x i64>* %C
+ %load1 = load <4 x i32>, <4 x i32>* %A
+ %load2 = load <4 x i32>, <4 x i32>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp4 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -550,9 +550,9 @@ define <2 x i64> @sabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwin
define <8 x i16> @uabal8h(<8 x i8>* %A, <8 x i8>* %B, <8 x i16>* %C) nounwind {
;CHECK-LABEL: uabal8h:
;CHECK: uabal.8h
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i16>* %C
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i16>, <8 x i16>* %C
%tmp4 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
%tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
%tmp5 = add <8 x i16> %tmp3, %tmp4.1
@@ -562,9 +562,9 @@ define <8 x i16> @uabal8h(<8 x i8>* %A, <8 x i8>* %B, <8 x i16>* %C) nounwind {
define <4 x i32> @uabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: uabal4s:
;CHECK: uabal.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i32>* %C
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp4 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
%tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
%tmp5 = add <4 x i32> %tmp3, %tmp4.1
@@ -574,9 +574,9 @@ define <4 x i32> @uabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind
define <2 x i64> @uabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
;CHECK-LABEL: uabal2d:
;CHECK: uabal.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i64>* %C
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp4 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
%tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
%tmp5 = add <2 x i64> %tmp3, %tmp4.1
@@ -586,9 +586,9 @@ define <2 x i64> @uabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind
define <8 x i16> @uabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwind {
;CHECK-LABEL: uabal2_8h:
;CHECK: uabal2.8h
- %load1 = load <16 x i8>* %A
- %load2 = load <16 x i8>* %B
- %tmp3 = load <8 x i16>* %C
+ %load1 = load <16 x i8>, <16 x i8>* %A
+ %load2 = load <16 x i8>, <16 x i8>* %B
+ %tmp3 = load <8 x i16>, <8 x i16>* %C
%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp4 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -600,9 +600,9 @@ define <8 x i16> @uabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwin
define <4 x i32> @uabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: uabal2_4s:
;CHECK: uabal2.4s
- %load1 = load <8 x i16>* %A
- %load2 = load <8 x i16>* %B
- %tmp3 = load <4 x i32>* %C
+ %load1 = load <8 x i16>, <8 x i16>* %A
+ %load2 = load <8 x i16>, <8 x i16>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp4 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -614,9 +614,9 @@ define <4 x i32> @uabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwin
define <2 x i64> @uabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
;CHECK-LABEL: uabal2_2d:
;CHECK: uabal2.2d
- %load1 = load <4 x i32>* %A
- %load2 = load <4 x i32>* %B
- %tmp3 = load <2 x i64>* %C
+ %load1 = load <4 x i32>, <4 x i32>* %A
+ %load2 = load <4 x i32>, <4 x i32>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp4 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -628,10 +628,10 @@ define <2 x i64> @uabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwin
define <8 x i8> @saba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
;CHECK-LABEL: saba_8b:
;CHECK: saba.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
- %tmp4 = load <8 x i8>* %C
+ %tmp4 = load <8 x i8>, <8 x i8>* %C
%tmp5 = add <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
}
@@ -639,10 +639,10 @@ define <8 x i8> @saba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
define <16 x i8> @saba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
;CHECK-LABEL: saba_16b:
;CHECK: saba.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
- %tmp4 = load <16 x i8>* %C
+ %tmp4 = load <16 x i8>, <16 x i8>* %C
%tmp5 = add <16 x i8> %tmp3, %tmp4
ret <16 x i8> %tmp5
}
@@ -650,10 +650,10 @@ define <16 x i8> @saba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
define <4 x i16> @saba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
;CHECK-LABEL: saba_4h:
;CHECK: saba.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
- %tmp4 = load <4 x i16>* %C
+ %tmp4 = load <4 x i16>, <4 x i16>* %C
%tmp5 = add <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
@@ -661,10 +661,10 @@ define <4 x i16> @saba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
define <8 x i16> @saba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
;CHECK-LABEL: saba_8h:
;CHECK: saba.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
- %tmp4 = load <8 x i16>* %C
+ %tmp4 = load <8 x i16>, <8 x i16>* %C
%tmp5 = add <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
}
@@ -672,10 +672,10 @@ define <8 x i16> @saba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
define <2 x i32> @saba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
;CHECK-LABEL: saba_2s:
;CHECK: saba.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
- %tmp4 = load <2 x i32>* %C
+ %tmp4 = load <2 x i32>, <2 x i32>* %C
%tmp5 = add <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
}
@@ -683,10 +683,10 @@ define <2 x i32> @saba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
define <4 x i32> @saba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: saba_4s:
;CHECK: saba.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
- %tmp4 = load <4 x i32>* %C
+ %tmp4 = load <4 x i32>, <4 x i32>* %C
%tmp5 = add <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
}
@@ -694,10 +694,10 @@ define <4 x i32> @saba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
define <8 x i8> @uaba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
;CHECK-LABEL: uaba_8b:
;CHECK: uaba.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
- %tmp4 = load <8 x i8>* %C
+ %tmp4 = load <8 x i8>, <8 x i8>* %C
%tmp5 = add <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
}
@@ -705,10 +705,10 @@ define <8 x i8> @uaba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
define <16 x i8> @uaba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
;CHECK-LABEL: uaba_16b:
;CHECK: uaba.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
- %tmp4 = load <16 x i8>* %C
+ %tmp4 = load <16 x i8>, <16 x i8>* %C
%tmp5 = add <16 x i8> %tmp3, %tmp4
ret <16 x i8> %tmp5
}
@@ -716,10 +716,10 @@ define <16 x i8> @uaba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
define <4 x i16> @uaba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
;CHECK-LABEL: uaba_4h:
;CHECK: uaba.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
- %tmp4 = load <4 x i16>* %C
+ %tmp4 = load <4 x i16>, <4 x i16>* %C
%tmp5 = add <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
@@ -727,10 +727,10 @@ define <4 x i16> @uaba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
define <8 x i16> @uaba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
;CHECK-LABEL: uaba_8h:
;CHECK: uaba.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
- %tmp4 = load <8 x i16>* %C
+ %tmp4 = load <8 x i16>, <8 x i16>* %C
%tmp5 = add <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
}
@@ -738,10 +738,10 @@ define <8 x i16> @uaba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
define <2 x i32> @uaba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
;CHECK-LABEL: uaba_2s:
;CHECK: uaba.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
- %tmp4 = load <2 x i32>* %C
+ %tmp4 = load <2 x i32>, <2 x i32>* %C
%tmp5 = add <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
}
@@ -749,10 +749,10 @@ define <2 x i32> @uaba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
define <4 x i32> @uaba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: uaba_4s:
;CHECK: uaba.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
- %tmp4 = load <4 x i32>* %C
+ %tmp4 = load <4 x i32>, <4 x i32>* %C
%tmp5 = add <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
}
diff --git a/test/CodeGen/AArch64/arm64-vadd.ll b/test/CodeGen/AArch64/arm64-vadd.ll
index 9ed8aa6d7c5d..e3d8dd256956 100644
--- a/test/CodeGen/AArch64/arm64-vadd.ll
+++ b/test/CodeGen/AArch64/arm64-vadd.ll
@@ -3,8 +3,8 @@
define <8 x i8> @addhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: addhn8b:
;CHECK: addhn.8b
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @addhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i16> @addhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: addhn4h:
;CHECK: addhn.4h
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @addhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i32> @addhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: addhn2s:
;CHECK: addhn.2s
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i32> %tmp3
}
@@ -65,8 +65,8 @@ declare <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16>, <8 x i16>) nounwind re
define <8 x i8> @raddhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: raddhn8b:
;CHECK: raddhn.8b
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i8> %tmp3
}
@@ -74,8 +74,8 @@ define <8 x i8> @raddhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i16> @raddhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: raddhn4h:
;CHECK: raddhn.4h
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i16> %tmp3
}
@@ -83,8 +83,8 @@ define <4 x i16> @raddhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i32> @raddhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: raddhn2s:
;CHECK: raddhn.2s
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i32> %tmp3
}
@@ -126,8 +126,8 @@ declare <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16>, <8 x i16>) nounwind r
define <8 x i16> @saddl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: saddl8h:
;CHECK: saddl.8h
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
%tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -137,8 +137,8 @@ define <8 x i16> @saddl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @saddl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: saddl4s:
;CHECK: saddl.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
%tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -148,8 +148,8 @@ define <4 x i32> @saddl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @saddl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: saddl2d:
;CHECK: saddl.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
%tmp5 = add <2 x i64> %tmp3, %tmp4
@@ -207,8 +207,8 @@ define <2 x i64> @saddl2_2d(<4 x i32> %a, <4 x i32> %b) nounwind {
define <8 x i16> @uaddl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: uaddl8h:
;CHECK: uaddl.8h
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
%tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -218,8 +218,8 @@ define <8 x i16> @uaddl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @uaddl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: uaddl4s:
;CHECK: uaddl.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
%tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -229,8 +229,8 @@ define <4 x i32> @uaddl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @uaddl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: uaddl2d:
;CHECK: uaddl.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
%tmp5 = add <2 x i64> %tmp3, %tmp4
@@ -289,8 +289,8 @@ define <2 x i64> @uaddl2_2d(<4 x i32> %a, <4 x i32> %b) nounwind {
define <8 x i16> @uaddw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: uaddw8h:
;CHECK: uaddw.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
%tmp4 = add <8 x i16> %tmp1, %tmp3
ret <8 x i16> %tmp4
@@ -299,8 +299,8 @@ define <8 x i16> @uaddw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @uaddw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: uaddw4s:
;CHECK: uaddw.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
%tmp4 = add <4 x i32> %tmp1, %tmp3
ret <4 x i32> %tmp4
@@ -309,8 +309,8 @@ define <4 x i32> @uaddw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @uaddw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: uaddw2d:
;CHECK: uaddw.2d
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
%tmp4 = add <2 x i64> %tmp1, %tmp3
ret <2 x i64> %tmp4
@@ -319,9 +319,9 @@ define <2 x i64> @uaddw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @uaddw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: uaddw2_8h:
;CHECK: uaddw2.8h
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%ext2 = zext <8 x i8> %high2 to <8 x i16>
@@ -332,9 +332,9 @@ define <8 x i16> @uaddw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
define <4 x i32> @uaddw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: uaddw2_4s:
;CHECK: uaddw2.4s
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%ext2 = zext <4 x i16> %high2 to <4 x i32>
@@ -345,9 +345,9 @@ define <4 x i32> @uaddw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
define <2 x i64> @uaddw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: uaddw2_2d:
;CHECK: uaddw2.2d
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%ext2 = zext <2 x i32> %high2 to <2 x i64>
@@ -358,8 +358,8 @@ define <2 x i64> @uaddw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
define <8 x i16> @saddw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: saddw8h:
;CHECK: saddw.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
%tmp4 = add <8 x i16> %tmp1, %tmp3
ret <8 x i16> %tmp4
@@ -368,8 +368,8 @@ define <8 x i16> @saddw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @saddw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: saddw4s:
;CHECK: saddw.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
%tmp4 = add <4 x i32> %tmp1, %tmp3
ret <4 x i32> %tmp4
@@ -378,8 +378,8 @@ define <4 x i32> @saddw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @saddw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: saddw2d:
;CHECK: saddw.2d
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
%tmp4 = add <2 x i64> %tmp1, %tmp3
ret <2 x i64> %tmp4
@@ -388,9 +388,9 @@ define <2 x i64> @saddw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @saddw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: saddw2_8h:
;CHECK: saddw2.8h
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%ext2 = sext <8 x i8> %high2 to <8 x i16>
@@ -401,9 +401,9 @@ define <8 x i16> @saddw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
define <4 x i32> @saddw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: saddw2_4s:
;CHECK: saddw2.4s
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%ext2 = sext <4 x i16> %high2 to <4 x i32>
@@ -414,9 +414,9 @@ define <4 x i32> @saddw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
define <2 x i64> @saddw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: saddw2_2d:
;CHECK: saddw2.2d
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%ext2 = sext <2 x i32> %high2 to <2 x i64>
@@ -427,7 +427,7 @@ define <2 x i64> @saddw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
define <4 x i16> @saddlp4h(<8 x i8>* %A) nounwind {
;CHECK-LABEL: saddlp4h:
;CHECK: saddlp.4h
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %tmp1)
ret <4 x i16> %tmp3
}
@@ -435,7 +435,7 @@ define <4 x i16> @saddlp4h(<8 x i8>* %A) nounwind {
define <2 x i32> @saddlp2s(<4 x i16>* %A) nounwind {
;CHECK-LABEL: saddlp2s:
;CHECK: saddlp.2s
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %tmp1)
ret <2 x i32> %tmp3
}
@@ -443,7 +443,7 @@ define <2 x i32> @saddlp2s(<4 x i16>* %A) nounwind {
define <1 x i64> @saddlp1d(<2 x i32>* %A) nounwind {
;CHECK-LABEL: saddlp1d:
;CHECK: saddlp.1d
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %tmp1)
ret <1 x i64> %tmp3
}
@@ -451,7 +451,7 @@ define <1 x i64> @saddlp1d(<2 x i32>* %A) nounwind {
define <8 x i16> @saddlp8h(<16 x i8>* %A) nounwind {
;CHECK-LABEL: saddlp8h:
;CHECK: saddlp.8h
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %tmp1)
ret <8 x i16> %tmp3
}
@@ -459,7 +459,7 @@ define <8 x i16> @saddlp8h(<16 x i8>* %A) nounwind {
define <4 x i32> @saddlp4s(<8 x i16>* %A) nounwind {
;CHECK-LABEL: saddlp4s:
;CHECK: saddlp.4s
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %tmp1)
ret <4 x i32> %tmp3
}
@@ -467,7 +467,7 @@ define <4 x i32> @saddlp4s(<8 x i16>* %A) nounwind {
define <2 x i64> @saddlp2d(<4 x i32>* %A) nounwind {
;CHECK-LABEL: saddlp2d:
;CHECK: saddlp.2d
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %tmp1)
ret <2 x i64> %tmp3
}
@@ -483,7 +483,7 @@ declare <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32>) nounwind read
define <4 x i16> @uaddlp4h(<8 x i8>* %A) nounwind {
;CHECK-LABEL: uaddlp4h:
;CHECK: uaddlp.4h
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %tmp1)
ret <4 x i16> %tmp3
}
@@ -491,7 +491,7 @@ define <4 x i16> @uaddlp4h(<8 x i8>* %A) nounwind {
define <2 x i32> @uaddlp2s(<4 x i16>* %A) nounwind {
;CHECK-LABEL: uaddlp2s:
;CHECK: uaddlp.2s
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %tmp1)
ret <2 x i32> %tmp3
}
@@ -499,7 +499,7 @@ define <2 x i32> @uaddlp2s(<4 x i16>* %A) nounwind {
define <1 x i64> @uaddlp1d(<2 x i32>* %A) nounwind {
;CHECK-LABEL: uaddlp1d:
;CHECK: uaddlp.1d
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp3 = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %tmp1)
ret <1 x i64> %tmp3
}
@@ -507,7 +507,7 @@ define <1 x i64> @uaddlp1d(<2 x i32>* %A) nounwind {
define <8 x i16> @uaddlp8h(<16 x i8>* %A) nounwind {
;CHECK-LABEL: uaddlp8h:
;CHECK: uaddlp.8h
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %tmp1)
ret <8 x i16> %tmp3
}
@@ -515,7 +515,7 @@ define <8 x i16> @uaddlp8h(<16 x i8>* %A) nounwind {
define <4 x i32> @uaddlp4s(<8 x i16>* %A) nounwind {
;CHECK-LABEL: uaddlp4s:
;CHECK: uaddlp.4s
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %tmp1)
ret <4 x i32> %tmp3
}
@@ -523,7 +523,7 @@ define <4 x i32> @uaddlp4s(<8 x i16>* %A) nounwind {
define <2 x i64> @uaddlp2d(<4 x i32>* %A) nounwind {
;CHECK-LABEL: uaddlp2d:
;CHECK: uaddlp.2d
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %tmp1)
ret <2 x i64> %tmp3
}
@@ -539,9 +539,9 @@ declare <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32>) nounwind read
define <4 x i16> @sadalp4h(<8 x i8>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: sadalp4h:
;CHECK: sadalp.4h
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %tmp1)
- %tmp4 = load <4 x i16>* %B
+ %tmp4 = load <4 x i16>, <4 x i16>* %B
%tmp5 = add <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
@@ -549,9 +549,9 @@ define <4 x i16> @sadalp4h(<8 x i8>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @sadalp2s(<4 x i16>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: sadalp2s:
;CHECK: sadalp.2s
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %tmp1)
- %tmp4 = load <2 x i32>* %B
+ %tmp4 = load <2 x i32>, <2 x i32>* %B
%tmp5 = add <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
}
@@ -559,9 +559,9 @@ define <2 x i32> @sadalp2s(<4 x i16>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @sadalp8h(<16 x i8>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: sadalp8h:
;CHECK: sadalp.8h
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %tmp1)
- %tmp4 = load <8 x i16>* %B
+ %tmp4 = load <8 x i16>, <8 x i16>* %B
%tmp5 = add <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
}
@@ -569,9 +569,9 @@ define <8 x i16> @sadalp8h(<16 x i8>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @sadalp4s(<8 x i16>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: sadalp4s:
;CHECK: sadalp.4s
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %tmp1)
- %tmp4 = load <4 x i32>* %B
+ %tmp4 = load <4 x i32>, <4 x i32>* %B
%tmp5 = add <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
}
@@ -579,9 +579,9 @@ define <4 x i32> @sadalp4s(<8 x i16>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @sadalp2d(<4 x i32>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: sadalp2d:
;CHECK: sadalp.2d
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %tmp1)
- %tmp4 = load <2 x i64>* %B
+ %tmp4 = load <2 x i64>, <2 x i64>* %B
%tmp5 = add <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
}
@@ -589,9 +589,9 @@ define <2 x i64> @sadalp2d(<4 x i32>* %A, <2 x i64>* %B) nounwind {
define <4 x i16> @uadalp4h(<8 x i8>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: uadalp4h:
;CHECK: uadalp.4h
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %tmp1)
- %tmp4 = load <4 x i16>* %B
+ %tmp4 = load <4 x i16>, <4 x i16>* %B
%tmp5 = add <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
@@ -599,9 +599,9 @@ define <4 x i16> @uadalp4h(<8 x i8>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @uadalp2s(<4 x i16>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: uadalp2s:
;CHECK: uadalp.2s
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %tmp1)
- %tmp4 = load <2 x i32>* %B
+ %tmp4 = load <2 x i32>, <2 x i32>* %B
%tmp5 = add <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
}
@@ -609,9 +609,9 @@ define <2 x i32> @uadalp2s(<4 x i16>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @uadalp8h(<16 x i8>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: uadalp8h:
;CHECK: uadalp.8h
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %tmp1)
- %tmp4 = load <8 x i16>* %B
+ %tmp4 = load <8 x i16>, <8 x i16>* %B
%tmp5 = add <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
}
@@ -619,9 +619,9 @@ define <8 x i16> @uadalp8h(<16 x i8>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @uadalp4s(<8 x i16>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: uadalp4s:
;CHECK: uadalp.4s
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %tmp1)
- %tmp4 = load <4 x i32>* %B
+ %tmp4 = load <4 x i32>, <4 x i32>* %B
%tmp5 = add <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
}
@@ -629,9 +629,9 @@ define <4 x i32> @uadalp4s(<8 x i16>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @uadalp2d(<4 x i32>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: uadalp2d:
;CHECK: uadalp.2d
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %tmp1)
- %tmp4 = load <2 x i64>* %B
+ %tmp4 = load <2 x i64>, <2 x i64>* %B
%tmp5 = add <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
}
@@ -639,8 +639,8 @@ define <2 x i64> @uadalp2d(<4 x i32>* %A, <2 x i64>* %B) nounwind {
define <8 x i8> @addp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: addp_8b:
;CHECK: addp.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -648,8 +648,8 @@ define <8 x i8> @addp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @addp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: addp_16b:
;CHECK: addp.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -657,8 +657,8 @@ define <16 x i8> @addp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @addp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: addp_4h:
;CHECK: addp.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -666,8 +666,8 @@ define <4 x i16> @addp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @addp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: addp_8h:
;CHECK: addp.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -675,8 +675,8 @@ define <8 x i16> @addp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i32> @addp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: addp_2s:
;CHECK: addp.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -684,8 +684,8 @@ define <2 x i32> @addp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @addp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: addp_4s:
;CHECK: addp.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -693,8 +693,8 @@ define <4 x i32> @addp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @addp_2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: addp_2d:
;CHECK: addp.2d
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
@@ -710,8 +710,8 @@ declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>) nounwind r
define <2 x float> @faddp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: faddp_2s:
;CHECK: faddp.2s
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -719,8 +719,8 @@ define <2 x float> @faddp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
define <4 x float> @faddp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: faddp_4s:
;CHECK: faddp.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
@@ -728,8 +728,8 @@ define <4 x float> @faddp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
define <2 x double> @faddp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
;CHECK-LABEL: faddp_2d:
;CHECK: faddp.2d
- %tmp1 = load <2 x double>* %A
- %tmp2 = load <2 x double>* %B
+ %tmp1 = load <2 x double>, <2 x double>* %A
+ %tmp2 = load <2 x double>, <2 x double>* %B
%tmp3 = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
ret <2 x double> %tmp3
}
@@ -805,8 +805,8 @@ define <2 x i64> @ssubl2_duplhs(i32 %lhs, <4 x i32> %rhs) {
define <8 x i8> @addhn8b_natural(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: addhn8b_natural:
;CHECK: addhn.8b
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%sum = add <8 x i16> %tmp1, %tmp2
%high_bits = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%narrowed = trunc <8 x i16> %high_bits to <8 x i8>
@@ -816,8 +816,8 @@ define <8 x i8> @addhn8b_natural(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i16> @addhn4h_natural(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: addhn4h_natural:
;CHECK: addhn.4h
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%sum = add <4 x i32> %tmp1, %tmp2
%high_bits = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
%narrowed = trunc <4 x i32> %high_bits to <4 x i16>
@@ -827,8 +827,8 @@ define <4 x i16> @addhn4h_natural(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i32> @addhn2s_natural(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: addhn2s_natural:
;CHECK: addhn.2s
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%sum = add <2 x i64> %tmp1, %tmp2
%high_bits = lshr <2 x i64> %sum, <i64 32, i64 32>
%narrowed = trunc <2 x i64> %high_bits to <2 x i32>
@@ -838,8 +838,8 @@ define <2 x i32> @addhn2s_natural(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <16 x i8> @addhn2_16b_natural(<8 x i8> %low, <8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: addhn2_16b_natural:
;CHECK: addhn2.16b
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%sum = add <8 x i16> %tmp1, %tmp2
%high_bits = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%narrowed = trunc <8 x i16> %high_bits to <8 x i8>
@@ -850,8 +850,8 @@ define <16 x i8> @addhn2_16b_natural(<8 x i8> %low, <8 x i16>* %A, <8 x i16>* %B
define <8 x i16> @addhn2_8h_natural(<4 x i16> %low, <4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: addhn2_8h_natural:
;CHECK: addhn2.8h
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%sum = add <4 x i32> %tmp1, %tmp2
%high_bits = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
%narrowed = trunc <4 x i32> %high_bits to <4 x i16>
@@ -862,8 +862,8 @@ define <8 x i16> @addhn2_8h_natural(<4 x i16> %low, <4 x i32>* %A, <4 x i32>* %B
define <4 x i32> @addhn2_4s_natural(<2 x i32> %low, <2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: addhn2_4s_natural:
;CHECK: addhn2.4s
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%sum = add <2 x i64> %tmp1, %tmp2
%high_bits = lshr <2 x i64> %sum, <i64 32, i64 32>
%narrowed = trunc <2 x i64> %high_bits to <2 x i32>
@@ -874,8 +874,8 @@ define <4 x i32> @addhn2_4s_natural(<2 x i32> %low, <2 x i64>* %A, <2 x i64>* %B
define <8 x i8> @subhn8b_natural(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: subhn8b_natural:
;CHECK: subhn.8b
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%diff = sub <8 x i16> %tmp1, %tmp2
%high_bits = lshr <8 x i16> %diff, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%narrowed = trunc <8 x i16> %high_bits to <8 x i8>
@@ -885,8 +885,8 @@ define <8 x i8> @subhn8b_natural(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i16> @subhn4h_natural(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: subhn4h_natural:
;CHECK: subhn.4h
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%diff = sub <4 x i32> %tmp1, %tmp2
%high_bits = lshr <4 x i32> %diff, <i32 16, i32 16, i32 16, i32 16>
%narrowed = trunc <4 x i32> %high_bits to <4 x i16>
@@ -896,8 +896,8 @@ define <4 x i16> @subhn4h_natural(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i32> @subhn2s_natural(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: subhn2s_natural:
;CHECK: subhn.2s
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%diff = sub <2 x i64> %tmp1, %tmp2
%high_bits = lshr <2 x i64> %diff, <i64 32, i64 32>
%narrowed = trunc <2 x i64> %high_bits to <2 x i32>
@@ -907,8 +907,8 @@ define <2 x i32> @subhn2s_natural(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <16 x i8> @subhn2_16b_natural(<8 x i8> %low, <8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: subhn2_16b_natural:
;CHECK: subhn2.16b
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%diff = sub <8 x i16> %tmp1, %tmp2
%high_bits = lshr <8 x i16> %diff, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%narrowed = trunc <8 x i16> %high_bits to <8 x i8>
@@ -919,8 +919,8 @@ define <16 x i8> @subhn2_16b_natural(<8 x i8> %low, <8 x i16>* %A, <8 x i16>* %B
define <8 x i16> @subhn2_8h_natural(<4 x i16> %low, <4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: subhn2_8h_natural:
;CHECK: subhn2.8h
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%diff = sub <4 x i32> %tmp1, %tmp2
%high_bits = lshr <4 x i32> %diff, <i32 16, i32 16, i32 16, i32 16>
%narrowed = trunc <4 x i32> %high_bits to <4 x i16>
@@ -931,8 +931,8 @@ define <8 x i16> @subhn2_8h_natural(<4 x i16> %low, <4 x i32>* %A, <4 x i32>* %B
define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, <2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: subhn2_4s_natural:
;CHECK: subhn2.4s
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%diff = sub <2 x i64> %tmp1, %tmp2
%high_bits = lshr <2 x i64> %diff, <i64 32, i64 32>
%narrowed = trunc <2 x i64> %high_bits to <2 x i32>
diff --git a/test/CodeGen/AArch64/arm64-vaddv.ll b/test/CodeGen/AArch64/arm64-vaddv.ll
index 2d92ce6ea570..589319bb3227 100644
--- a/test/CodeGen/AArch64/arm64-vaddv.ll
+++ b/test/CodeGen/AArch64/arm64-vaddv.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s -mcpu=cyclone | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-neon-syntax=apple < %s -asm-verbose=false -mcpu=cyclone | FileCheck %s
define signext i8 @test_vaddv_s8(<8 x i8> %a1) {
; CHECK-LABEL: test_vaddv_s8:
@@ -11,6 +11,18 @@ entry:
ret i8 %0
}
+define <8 x i8> @test_vaddv_s8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) {
+; CHECK-LABEL: test_vaddv_s8_used_by_laneop:
+; CHECK: addv.8b b[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.b v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a2)
+ %1 = trunc i32 %0 to i8
+ %2 = insertelement <8 x i8> %a1, i8 %1, i32 3
+ ret <8 x i8> %2
+}
+
define signext i16 @test_vaddv_s16(<4 x i16> %a1) {
; CHECK-LABEL: test_vaddv_s16:
; CHECK: addv.4h h[[REGNUM:[0-9]+]], v0
@@ -22,6 +34,18 @@ entry:
ret i16 %0
}
+define <4 x i16> @test_vaddv_s16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) {
+; CHECK-LABEL: test_vaddv_s16_used_by_laneop:
+; CHECK: addv.4h h[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.h v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a2)
+ %1 = trunc i32 %0 to i16
+ %2 = insertelement <4 x i16> %a1, i16 %1, i32 3
+ ret <4 x i16> %2
+}
+
define i32 @test_vaddv_s32(<2 x i32> %a1) {
; CHECK-LABEL: test_vaddv_s32:
; 2 x i32 is not supported by the ISA, thus, this is a special case
@@ -33,6 +57,17 @@ entry:
ret i32 %vaddv.i
}
+define <2 x i32> @test_vaddv_s32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) {
+; CHECK-LABEL: test_vaddv_s32_used_by_laneop:
+; CHECK: addp.2s v[[REGNUM:[0-9]+]], v1, v1
+; CHECK-NEXT: ins.s v0[1], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a2)
+ %1 = insertelement <2 x i32> %a1, i32 %0, i32 1
+ ret <2 x i32> %1
+}
+
define i64 @test_vaddv_s64(<2 x i64> %a1) {
; CHECK-LABEL: test_vaddv_s64:
; CHECK: addp.2d [[REGNUM:d[0-9]+]], v0
@@ -43,6 +78,17 @@ entry:
ret i64 %vaddv.i
}
+define <2 x i64> @test_vaddv_s64_used_by_laneop(<2 x i64> %a1, <2 x i64> %a2) {
+; CHECK-LABEL: test_vaddv_s64_used_by_laneop:
+; CHECK: addp.2d d[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.d v0[1], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a2)
+ %1 = insertelement <2 x i64> %a1, i64 %0, i64 1
+ ret <2 x i64> %1
+}
+
define zeroext i8 @test_vaddv_u8(<8 x i8> %a1) {
; CHECK-LABEL: test_vaddv_u8:
; CHECK: addv.8b b[[REGNUM:[0-9]+]], v0
@@ -54,6 +100,18 @@ entry:
ret i8 %0
}
+define <8 x i8> @test_vaddv_u8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) {
+; CHECK-LABEL: test_vaddv_u8_used_by_laneop:
+; CHECK: addv.8b b[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.b v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a2)
+ %1 = trunc i32 %0 to i8
+ %2 = insertelement <8 x i8> %a1, i8 %1, i32 3
+ ret <8 x i8> %2
+}
+
define i32 @test_vaddv_u8_masked(<8 x i8> %a1) {
; CHECK-LABEL: test_vaddv_u8_masked:
; CHECK: addv.8b b[[REGNUM:[0-9]+]], v0
@@ -76,6 +134,18 @@ entry:
ret i16 %0
}
+define <4 x i16> @test_vaddv_u16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) {
+; CHECK-LABEL: test_vaddv_u16_used_by_laneop:
+; CHECK: addv.4h h[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.h v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a2)
+ %1 = trunc i32 %0 to i16
+ %2 = insertelement <4 x i16> %a1, i16 %1, i32 3
+ ret <4 x i16> %2
+}
+
define i32 @test_vaddv_u16_masked(<4 x i16> %a1) {
; CHECK-LABEL: test_vaddv_u16_masked:
; CHECK: addv.4h h[[REGNUM:[0-9]+]], v0
@@ -98,6 +168,17 @@ entry:
ret i32 %vaddv.i
}
+define <2 x i32> @test_vaddv_u32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) {
+; CHECK-LABEL: test_vaddv_u32_used_by_laneop:
+; CHECK: addp.2s v[[REGNUM:[0-9]+]], v1, v1
+; CHECK-NEXT: ins.s v0[1], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a2)
+ %1 = insertelement <2 x i32> %a1, i32 %0, i32 1
+ ret <2 x i32> %1
+}
+
define float @test_vaddv_f32(<2 x float> %a1) {
; CHECK-LABEL: test_vaddv_f32:
; CHECK: faddp.2s s0, v0
@@ -136,6 +217,17 @@ entry:
ret i64 %vaddv.i
}
+define <2 x i64> @test_vaddv_u64_used_by_laneop(<2 x i64> %a1, <2 x i64> %a2) {
+; CHECK-LABEL: test_vaddv_u64_used_by_laneop:
+; CHECK: addp.2d d[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.d v0[1], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a2)
+ %1 = insertelement <2 x i64> %a1, i64 %0, i64 1
+ ret <2 x i64> %1
+}
+
define <1 x i64> @test_vaddv_u64_to_vec(<2 x i64> %a1) {
; CHECK-LABEL: test_vaddv_u64_to_vec:
; CHECK: addp.2d d0, v0
@@ -159,6 +251,18 @@ entry:
ret i8 %0
}
+define <16 x i8> @test_vaddvq_s8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) {
+; CHECK-LABEL: test_vaddvq_s8_used_by_laneop:
+; CHECK: addv.16b b[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.b v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a2)
+ %1 = trunc i32 %0 to i8
+ %2 = insertelement <16 x i8> %a1, i8 %1, i32 3
+ ret <16 x i8> %2
+}
+
define signext i16 @test_vaddvq_s16(<8 x i16> %a1) {
; CHECK-LABEL: test_vaddvq_s16:
; CHECK: addv.8h h[[REGNUM:[0-9]+]], v0
@@ -170,6 +274,18 @@ entry:
ret i16 %0
}
+define <8 x i16> @test_vaddvq_s16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) {
+; CHECK-LABEL: test_vaddvq_s16_used_by_laneop:
+; CHECK: addv.8h h[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.h v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a2)
+ %1 = trunc i32 %0 to i16
+ %2 = insertelement <8 x i16> %a1, i16 %1, i32 3
+ ret <8 x i16> %2
+}
+
define i32 @test_vaddvq_s32(<4 x i32> %a1) {
; CHECK-LABEL: test_vaddvq_s32:
; CHECK: addv.4s [[REGNUM:s[0-9]+]], v0
@@ -180,6 +296,17 @@ entry:
ret i32 %vaddv.i
}
+define <4 x i32> @test_vaddvq_s32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) {
+; CHECK-LABEL: test_vaddvq_s32_used_by_laneop:
+; CHECK: addv.4s s[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.s v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a2)
+ %1 = insertelement <4 x i32> %a1, i32 %0, i32 3
+ ret <4 x i32> %1
+}
+
define zeroext i8 @test_vaddvq_u8(<16 x i8> %a1) {
; CHECK-LABEL: test_vaddvq_u8:
; CHECK: addv.16b b[[REGNUM:[0-9]+]], v0
@@ -191,6 +318,18 @@ entry:
ret i8 %0
}
+define <16 x i8> @test_vaddvq_u8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) {
+; CHECK-LABEL: test_vaddvq_u8_used_by_laneop:
+; CHECK: addv.16b b[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.b v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> %a2)
+ %1 = trunc i32 %0 to i8
+ %2 = insertelement <16 x i8> %a1, i8 %1, i32 3
+ ret <16 x i8> %2
+}
+
define zeroext i16 @test_vaddvq_u16(<8 x i16> %a1) {
; CHECK-LABEL: test_vaddvq_u16:
; CHECK: addv.8h h[[REGNUM:[0-9]+]], v0
@@ -202,6 +341,18 @@ entry:
ret i16 %0
}
+define <8 x i16> @test_vaddvq_u16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) {
+; CHECK-LABEL: test_vaddvq_u16_used_by_laneop:
+; CHECK: addv.8h h[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.h v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> %a2)
+ %1 = trunc i32 %0 to i16
+ %2 = insertelement <8 x i16> %a1, i16 %1, i32 3
+ ret <8 x i16> %2
+}
+
define i32 @test_vaddvq_u32(<4 x i32> %a1) {
; CHECK-LABEL: test_vaddvq_u32:
; CHECK: addv.4s [[REGNUM:s[0-9]+]], v0
@@ -212,6 +363,17 @@ entry:
ret i32 %vaddv.i
}
+define <4 x i32> @test_vaddvq_u32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) {
+; CHECK-LABEL: test_vaddvq_u32_used_by_laneop:
+; CHECK: addv.4s s[[REGNUM:[0-9]+]], v1
+; CHECK-NEXT: ins.s v0[3], v[[REGNUM]][0]
+; CHECK-NEXT: ret
+entry:
+ %0 = tail call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> %a2)
+ %1 = insertelement <4 x i32> %a1, i32 %0, i32 3
+ ret <4 x i32> %1
+}
+
declare i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32>)
declare i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16>)
diff --git a/test/CodeGen/AArch64/arm64-vbitwise.ll b/test/CodeGen/AArch64/arm64-vbitwise.ll
index 93de95e52e53..9cfcaafe9491 100644
--- a/test/CodeGen/AArch64/arm64-vbitwise.ll
+++ b/test/CodeGen/AArch64/arm64-vbitwise.ll
@@ -3,7 +3,7 @@
define <8 x i8> @rbit_8b(<8 x i8>* %A) nounwind {
;CHECK-LABEL: rbit_8b:
;CHECK: rbit.8b
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %tmp1)
ret <8 x i8> %tmp3
}
@@ -11,7 +11,7 @@ define <8 x i8> @rbit_8b(<8 x i8>* %A) nounwind {
define <16 x i8> @rbit_16b(<16 x i8>* %A) nounwind {
;CHECK-LABEL: rbit_16b:
;CHECK: rbit.16b
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %tmp1)
ret <16 x i8> %tmp3
}
@@ -22,7 +22,7 @@ declare <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8>) nounwind readnone
define <8 x i16> @sxtl8h(<8 x i8>* %A) nounwind {
;CHECK-LABEL: sxtl8h:
;CHECK: sshll.8h
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
ret <8 x i16> %tmp2
}
@@ -30,7 +30,7 @@ define <8 x i16> @sxtl8h(<8 x i8>* %A) nounwind {
define <8 x i16> @uxtl8h(<8 x i8>* %A) nounwind {
;CHECK-LABEL: uxtl8h:
;CHECK: ushll.8h
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
ret <8 x i16> %tmp2
}
@@ -38,7 +38,7 @@ define <8 x i16> @uxtl8h(<8 x i8>* %A) nounwind {
define <4 x i32> @sxtl4s(<4 x i16>* %A) nounwind {
;CHECK-LABEL: sxtl4s:
;CHECK: sshll.4s
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
ret <4 x i32> %tmp2
}
@@ -46,7 +46,7 @@ define <4 x i32> @sxtl4s(<4 x i16>* %A) nounwind {
define <4 x i32> @uxtl4s(<4 x i16>* %A) nounwind {
;CHECK-LABEL: uxtl4s:
;CHECK: ushll.4s
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
ret <4 x i32> %tmp2
}
@@ -54,7 +54,7 @@ define <4 x i32> @uxtl4s(<4 x i16>* %A) nounwind {
define <2 x i64> @sxtl2d(<2 x i32>* %A) nounwind {
;CHECK-LABEL: sxtl2d:
;CHECK: sshll.2d
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
ret <2 x i64> %tmp2
}
@@ -62,7 +62,7 @@ define <2 x i64> @sxtl2d(<2 x i32>* %A) nounwind {
define <2 x i64> @uxtl2d(<2 x i32>* %A) nounwind {
;CHECK-LABEL: uxtl2d:
;CHECK: ushll.2d
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
ret <2 x i64> %tmp2
}
@@ -76,7 +76,7 @@ entry:
; CHECK: movi.2d [[REG1:v[0-9]+]], #0x0000ff000000ff
; CHECK: and.16b v{{[0-9]+}}, v{{[0-9]+}}, [[REG1]]
%0 = bitcast i8* %src to <16 x i8>*
- %1 = load <16 x i8>* %0, align 16
+ %1 = load <16 x i8>, <16 x i8>* %0, align 16
%and.i = and <16 x i8> %1, <i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0>
%2 = bitcast <16 x i8> %and.i to <8 x i16>
%vshl_n = shl <8 x i16> %2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
diff --git a/test/CodeGen/AArch64/arm64-vcmp.ll b/test/CodeGen/AArch64/arm64-vcmp.ll
index 982ab09ee69e..1b33eb58e86f 100644
--- a/test/CodeGen/AArch64/arm64-vcmp.ll
+++ b/test/CodeGen/AArch64/arm64-vcmp.ll
@@ -16,8 +16,8 @@ define void @fcmltz_4s(<4 x float> %a, <4 x i16>* %p) nounwind {
define <2 x i32> @facge_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: facge_2s:
;CHECK: facge.2s
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x i32> %tmp3
}
@@ -25,8 +25,8 @@ define <2 x i32> @facge_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
define <4 x i32> @facge_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: facge_4s:
;CHECK: facge.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x i32> %tmp3
}
@@ -34,8 +34,8 @@ define <4 x i32> @facge_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
define <2 x i64> @facge_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
;CHECK-LABEL: facge_2d:
;CHECK: facge.2d
- %tmp1 = load <2 x double>* %A
- %tmp2 = load <2 x double>* %B
+ %tmp1 = load <2 x double>, <2 x double>* %A
+ %tmp2 = load <2 x double>, <2 x double>* %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
ret <2 x i64> %tmp3
}
@@ -47,8 +47,8 @@ declare <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double>, <2 x double
define <2 x i32> @facgt_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: facgt_2s:
;CHECK: facgt.2s
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x i32> %tmp3
}
@@ -56,8 +56,8 @@ define <2 x i32> @facgt_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
define <4 x i32> @facgt_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: facgt_4s:
;CHECK: facgt.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x i32> %tmp3
}
@@ -65,8 +65,8 @@ define <4 x i32> @facgt_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
define <2 x i64> @facgt_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
;CHECK-LABEL: facgt_2d:
;CHECK: facgt.2d
- %tmp1 = load <2 x double>* %A
- %tmp2 = load <2 x double>* %B
+ %tmp1 = load <2 x double>, <2 x double>* %A
+ %tmp2 = load <2 x double>, <2 x double>* %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
ret <2 x i64> %tmp3
}
@@ -112,8 +112,8 @@ declare i32 @llvm.aarch64.neon.facgt.i32.f32(float, float)
define <8 x i8> @cmtst_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: cmtst_8b:
;CHECK: cmtst.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%commonbits = and <8 x i8> %tmp1, %tmp2
%mask = icmp ne <8 x i8> %commonbits, zeroinitializer
%res = sext <8 x i1> %mask to <8 x i8>
@@ -123,8 +123,8 @@ define <8 x i8> @cmtst_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @cmtst_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: cmtst_16b:
;CHECK: cmtst.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%commonbits = and <16 x i8> %tmp1, %tmp2
%mask = icmp ne <16 x i8> %commonbits, zeroinitializer
%res = sext <16 x i1> %mask to <16 x i8>
@@ -134,8 +134,8 @@ define <16 x i8> @cmtst_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @cmtst_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: cmtst_4h:
;CHECK: cmtst.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%commonbits = and <4 x i16> %tmp1, %tmp2
%mask = icmp ne <4 x i16> %commonbits, zeroinitializer
%res = sext <4 x i1> %mask to <4 x i16>
@@ -145,8 +145,8 @@ define <4 x i16> @cmtst_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @cmtst_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: cmtst_8h:
;CHECK: cmtst.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%commonbits = and <8 x i16> %tmp1, %tmp2
%mask = icmp ne <8 x i16> %commonbits, zeroinitializer
%res = sext <8 x i1> %mask to <8 x i16>
@@ -156,8 +156,8 @@ define <8 x i16> @cmtst_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i32> @cmtst_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: cmtst_2s:
;CHECK: cmtst.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%commonbits = and <2 x i32> %tmp1, %tmp2
%mask = icmp ne <2 x i32> %commonbits, zeroinitializer
%res = sext <2 x i1> %mask to <2 x i32>
@@ -167,8 +167,8 @@ define <2 x i32> @cmtst_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @cmtst_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: cmtst_4s:
;CHECK: cmtst.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%commonbits = and <4 x i32> %tmp1, %tmp2
%mask = icmp ne <4 x i32> %commonbits, zeroinitializer
%res = sext <4 x i1> %mask to <4 x i32>
@@ -178,8 +178,8 @@ define <4 x i32> @cmtst_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @cmtst_2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: cmtst_2d:
;CHECK: cmtst.2d
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%commonbits = and <2 x i64> %tmp1, %tmp2
%mask = icmp ne <2 x i64> %commonbits, zeroinitializer
%res = sext <2 x i1> %mask to <2 x i64>
diff --git a/test/CodeGen/AArch64/arm64-vcnt.ll b/test/CodeGen/AArch64/arm64-vcnt.ll
index 903501ec16a9..5cff10cb8d16 100644
--- a/test/CodeGen/AArch64/arm64-vcnt.ll
+++ b/test/CodeGen/AArch64/arm64-vcnt.ll
@@ -3,7 +3,7 @@
define <8 x i8> @cls_8b(<8 x i8>* %A) nounwind {
;CHECK-LABEL: cls_8b:
;CHECK: cls.8b
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.cls.v8i8(<8 x i8> %tmp1)
ret <8 x i8> %tmp3
}
@@ -11,7 +11,7 @@ define <8 x i8> @cls_8b(<8 x i8>* %A) nounwind {
define <16 x i8> @cls_16b(<16 x i8>* %A) nounwind {
;CHECK-LABEL: cls_16b:
;CHECK: cls.16b
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.cls.v16i8(<16 x i8> %tmp1)
ret <16 x i8> %tmp3
}
@@ -19,7 +19,7 @@ define <16 x i8> @cls_16b(<16 x i8>* %A) nounwind {
define <4 x i16> @cls_4h(<4 x i16>* %A) nounwind {
;CHECK-LABEL: cls_4h:
;CHECK: cls.4h
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.cls.v4i16(<4 x i16> %tmp1)
ret <4 x i16> %tmp3
}
@@ -27,7 +27,7 @@ define <4 x i16> @cls_4h(<4 x i16>* %A) nounwind {
define <8 x i16> @cls_8h(<8 x i16>* %A) nounwind {
;CHECK-LABEL: cls_8h:
;CHECK: cls.8h
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.cls.v8i16(<8 x i16> %tmp1)
ret <8 x i16> %tmp3
}
@@ -35,7 +35,7 @@ define <8 x i16> @cls_8h(<8 x i16>* %A) nounwind {
define <2 x i32> @cls_2s(<2 x i32>* %A) nounwind {
;CHECK-LABEL: cls_2s:
;CHECK: cls.2s
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.cls.v2i32(<2 x i32> %tmp1)
ret <2 x i32> %tmp3
}
@@ -43,7 +43,7 @@ define <2 x i32> @cls_2s(<2 x i32>* %A) nounwind {
define <4 x i32> @cls_4s(<4 x i32>* %A) nounwind {
;CHECK-LABEL: cls_4s:
;CHECK: cls.4s
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32> %tmp1)
ret <4 x i32> %tmp3
}
diff --git a/test/CodeGen/AArch64/arm64-vcvt.ll b/test/CodeGen/AArch64/arm64-vcvt.ll
index 6570f0e3e7eb..13d2d288b2c4 100644
--- a/test/CodeGen/AArch64/arm64-vcvt.ll
+++ b/test/CodeGen/AArch64/arm64-vcvt.ll
@@ -675,7 +675,7 @@ define void @autogen_SD28458(<8 x double> %val.f64, <8 x float>* %addr.f32) {
;CHECK: fcvt
;CHECK: ret
define void @autogen_SD19225(<8 x double>* %addr.f64, <8 x float>* %addr.f32) {
- %A = load <8 x float>* %addr.f32
+ %A = load <8 x float>, <8 x float>* %addr.f32
%Tr53 = fpext <8 x float> %A to <8 x double>
store <8 x double> %Tr53, <8 x double>* %addr.f64
ret void
diff --git a/test/CodeGen/AArch64/arm64-vector-imm.ll b/test/CodeGen/AArch64/arm64-vector-imm.ll
index 9fb088b9a497..d3de88d2049d 100644
--- a/test/CodeGen/AArch64/arm64-vector-imm.ll
+++ b/test/CodeGen/AArch64/arm64-vector-imm.ll
@@ -5,7 +5,7 @@ define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind {
; CHECK-NOT: mov
; CHECK-NOT: mvn
; CHECK: orr
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = or <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
ret <8 x i8> %tmp3
}
@@ -15,7 +15,7 @@ define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind {
; CHECK-NOT: mov
; CHECK-NOT: mvn
; CHECK: orr
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = or <16 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
ret <16 x i8> %tmp3
}
@@ -25,7 +25,7 @@ define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind {
; CHECK-NOT: mov
; CHECK-NOT: mvn
; CHECK: bic
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = and <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
ret <8 x i8> %tmp3
}
@@ -35,7 +35,7 @@ define <16 x i8> @v_bicimmQ(<16 x i8>* %A) nounwind {
; CHECK-NOT: mov
; CHECK-NOT: mvn
; CHECK: bic
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = and <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
ret <16 x i8> %tmp3
}
diff --git a/test/CodeGen/AArch64/arm64-vector-ldst.ll b/test/CodeGen/AArch64/arm64-vector-ldst.ll
index c00191577d17..26b9d62c8f6a 100644
--- a/test/CodeGen/AArch64/arm64-vector-ldst.ll
+++ b/test/CodeGen/AArch64/arm64-vector-ldst.ll
@@ -12,8 +12,8 @@ entry:
; CHECK-LABEL: t1:
; CHECK: ldr x[[REG:[0-9]+]], [x0]
; CHECK: str q0, [x[[REG]]]
- %tmp1 = load %type1** %argtable, align 8
- %tmp2 = getelementptr inbounds %type1* %tmp1, i64 0, i32 0
+ %tmp1 = load %type1*, %type1** %argtable, align 8
+ %tmp2 = getelementptr inbounds %type1, %type1* %tmp1, i64 0, i32 0
store <16 x i8> zeroinitializer, <16 x i8>* %tmp2, align 16
ret void
}
@@ -23,8 +23,8 @@ entry:
; CHECK-LABEL: t2:
; CHECK: ldr x[[REG:[0-9]+]], [x0]
; CHECK: str d0, [x[[REG]]]
- %tmp1 = load %type2** %argtable, align 8
- %tmp2 = getelementptr inbounds %type2* %tmp1, i64 0, i32 0
+ %tmp1 = load %type2*, %type2** %argtable, align 8
+ %tmp2 = getelementptr inbounds %type2, %type2* %tmp1, i64 0, i32 0
store <8 x i8> zeroinitializer, <8 x i8>* %tmp2, align 8
ret void
}
@@ -51,10 +51,10 @@ entry:
; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
- %arrayidx = getelementptr inbounds <2 x i64>* %array, i64 %offset
- %tmp = load <2 x i64>* %arrayidx, align 16
- %tmp1 = load <2 x i64>** @globalArray64x2, align 8
- %arrayidx1 = getelementptr inbounds <2 x i64>* %tmp1, i64 %offset
+ %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 %offset
+ %tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16
+ %tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8
+ %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 %offset
store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
ret void
}
@@ -65,10 +65,10 @@ entry:
; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
- %arrayidx = getelementptr inbounds <2 x i64>* %array, i64 3
- %tmp = load <2 x i64>* %arrayidx, align 16
- %tmp1 = load <2 x i64>** @globalArray64x2, align 8
- %arrayidx1 = getelementptr inbounds <2 x i64>* %tmp1, i64 5
+ %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 3
+ %tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16
+ %tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8
+ %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 5
store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
ret void
}
@@ -80,10 +80,10 @@ entry:
; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
- %arrayidx = getelementptr inbounds <4 x i32>* %array, i64 %offset
- %tmp = load <4 x i32>* %arrayidx, align 16
- %tmp1 = load <4 x i32>** @globalArray32x4, align 8
- %arrayidx1 = getelementptr inbounds <4 x i32>* %tmp1, i64 %offset
+ %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 %offset
+ %tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16
+ %tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8
+ %arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 %offset
store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
ret void
}
@@ -94,10 +94,10 @@ entry:
; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
- %arrayidx = getelementptr inbounds <4 x i32>* %array, i64 3
- %tmp = load <4 x i32>* %arrayidx, align 16
- %tmp1 = load <4 x i32>** @globalArray32x4, align 8
- %arrayidx1 = getelementptr inbounds <4 x i32>* %tmp1, i64 5
+ %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 3
+ %tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16
+ %tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8
+ %arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 5
store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
ret void
}
@@ -109,10 +109,10 @@ entry:
; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
- %arrayidx = getelementptr inbounds <8 x i16>* %array, i64 %offset
- %tmp = load <8 x i16>* %arrayidx, align 16
- %tmp1 = load <8 x i16>** @globalArray16x8, align 8
- %arrayidx1 = getelementptr inbounds <8 x i16>* %tmp1, i64 %offset
+ %arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 %offset
+ %tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16
+ %tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8
+ %arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 %offset
store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
ret void
}
@@ -123,10 +123,10 @@ entry:
; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
- %arrayidx = getelementptr inbounds <8 x i16>* %array, i64 3
- %tmp = load <8 x i16>* %arrayidx, align 16
- %tmp1 = load <8 x i16>** @globalArray16x8, align 8
- %arrayidx1 = getelementptr inbounds <8 x i16>* %tmp1, i64 5
+ %arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 3
+ %tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16
+ %tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8
+ %arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 5
store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
ret void
}
@@ -138,10 +138,10 @@ entry:
; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
- %arrayidx = getelementptr inbounds <16 x i8>* %array, i64 %offset
- %tmp = load <16 x i8>* %arrayidx, align 16
- %tmp1 = load <16 x i8>** @globalArray8x16, align 8
- %arrayidx1 = getelementptr inbounds <16 x i8>* %tmp1, i64 %offset
+ %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 %offset
+ %tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16
+ %tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8
+ %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 %offset
store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
ret void
}
@@ -152,10 +152,10 @@ entry:
; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
- %arrayidx = getelementptr inbounds <16 x i8>* %array, i64 3
- %tmp = load <16 x i8>* %arrayidx, align 16
- %tmp1 = load <16 x i8>** @globalArray8x16, align 8
- %arrayidx1 = getelementptr inbounds <16 x i8>* %tmp1, i64 5
+ %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 3
+ %tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16
+ %tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8
+ %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 5
store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
ret void
}
@@ -167,10 +167,10 @@ entry:
; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
- %arrayidx = getelementptr inbounds <1 x i64>* %array, i64 %offset
- %tmp = load <1 x i64>* %arrayidx, align 8
- %tmp1 = load <1 x i64>** @globalArray64x1, align 8
- %arrayidx1 = getelementptr inbounds <1 x i64>* %tmp1, i64 %offset
+ %arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 %offset
+ %tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8
+ %tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8
+ %arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 %offset
store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
ret void
}
@@ -181,10 +181,10 @@ entry:
; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
- %arrayidx = getelementptr inbounds <1 x i64>* %array, i64 3
- %tmp = load <1 x i64>* %arrayidx, align 8
- %tmp1 = load <1 x i64>** @globalArray64x1, align 8
- %arrayidx1 = getelementptr inbounds <1 x i64>* %tmp1, i64 5
+ %arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 3
+ %tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8
+ %tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8
+ %arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 5
store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
ret void
}
@@ -196,10 +196,10 @@ entry:
; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
- %arrayidx = getelementptr inbounds <2 x i32>* %array, i64 %offset
- %tmp = load <2 x i32>* %arrayidx, align 8
- %tmp1 = load <2 x i32>** @globalArray32x2, align 8
- %arrayidx1 = getelementptr inbounds <2 x i32>* %tmp1, i64 %offset
+ %arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 %offset
+ %tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8
+ %tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8
+ %arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 %offset
store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
ret void
}
@@ -210,10 +210,10 @@ entry:
; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
- %arrayidx = getelementptr inbounds <2 x i32>* %array, i64 3
- %tmp = load <2 x i32>* %arrayidx, align 8
- %tmp1 = load <2 x i32>** @globalArray32x2, align 8
- %arrayidx1 = getelementptr inbounds <2 x i32>* %tmp1, i64 5
+ %arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 3
+ %tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8
+ %tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8
+ %arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 5
store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
ret void
}
@@ -225,10 +225,10 @@ entry:
; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
- %arrayidx = getelementptr inbounds <4 x i16>* %array, i64 %offset
- %tmp = load <4 x i16>* %arrayidx, align 8
- %tmp1 = load <4 x i16>** @globalArray16x4, align 8
- %arrayidx1 = getelementptr inbounds <4 x i16>* %tmp1, i64 %offset
+ %arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 %offset
+ %tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8
+ %tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8
+ %arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 %offset
store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
ret void
}
@@ -239,10 +239,10 @@ entry:
; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
- %arrayidx = getelementptr inbounds <4 x i16>* %array, i64 3
- %tmp = load <4 x i16>* %arrayidx, align 8
- %tmp1 = load <4 x i16>** @globalArray16x4, align 8
- %arrayidx1 = getelementptr inbounds <4 x i16>* %tmp1, i64 5
+ %arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 3
+ %tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8
+ %tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8
+ %arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 5
store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
ret void
}
@@ -254,10 +254,10 @@ entry:
; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
; CHECK: ldr [[BASE:x[0-9]+]],
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
- %arrayidx = getelementptr inbounds <8 x i8>* %array, i64 %offset
- %tmp = load <8 x i8>* %arrayidx, align 8
- %tmp1 = load <8 x i8>** @globalArray8x8, align 8
- %arrayidx1 = getelementptr inbounds <8 x i8>* %tmp1, i64 %offset
+ %arrayidx = getelementptr inbounds <8 x i8>, <8 x i8>* %array, i64 %offset
+ %tmp = load <8 x i8>, <8 x i8>* %arrayidx, align 8
+ %tmp1 = load <8 x i8>*, <8 x i8>** @globalArray8x8, align 8
+ %arrayidx1 = getelementptr inbounds <8 x i8>, <8 x i8>* %tmp1, i64 %offset
store <8 x i8> %tmp, <8 x i8>* %arrayidx1, align 8
ret void
}
@@ -270,7 +270,7 @@ define <1 x i64> @fct0() nounwind readonly ssp {
entry:
; CHECK-LABEL: fct0:
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
+ %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
ret <1 x i64> %0
}
@@ -278,7 +278,7 @@ define <2 x i32> @fct1() nounwind readonly ssp {
entry:
; CHECK-LABEL: fct1:
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
+ %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
ret <2 x i32> %0
}
@@ -286,7 +286,7 @@ define <4 x i16> @fct2() nounwind readonly ssp {
entry:
; CHECK-LABEL: fct2:
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
+ %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
ret <4 x i16> %0
}
@@ -294,7 +294,7 @@ define <8 x i8> @fct3() nounwind readonly ssp {
entry:
; CHECK-LABEL: fct3:
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
+ %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
ret <8 x i8> %0
}
@@ -302,7 +302,7 @@ define <2 x i64> @fct4() nounwind readonly ssp {
entry:
; CHECK-LABEL: fct4:
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
+ %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
ret <2 x i64> %0
}
@@ -310,7 +310,7 @@ define <4 x i32> @fct5() nounwind readonly ssp {
entry:
; CHECK-LABEL: fct5:
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
+ %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
ret <4 x i32> %0
}
@@ -318,7 +318,7 @@ define <8 x i16> @fct6() nounwind readonly ssp {
entry:
; CHECK-LABEL: fct6:
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
+ %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
ret <8 x i16> %0
}
@@ -326,7 +326,7 @@ define <16 x i8> @fct7() nounwind readonly ssp {
entry:
; CHECK-LABEL: fct7:
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
- %0 = load <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
+ %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
ret <16 x i8> %0
}
@@ -335,8 +335,8 @@ entry:
; CHECK-LABEL: fct8:
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
- store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8
+ %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
+ store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8
ret void
}
@@ -345,8 +345,8 @@ entry:
; CHECK-LABEL: fct9:
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
- store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8
+ %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
+ store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8
ret void
}
@@ -355,8 +355,8 @@ entry:
; CHECK-LABEL: fct10:
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
- store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8
+ %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
+ store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8
ret void
}
@@ -365,8 +365,8 @@ entry:
; CHECK-LABEL: fct11:
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
- store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8
+ %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
+ store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8
ret void
}
@@ -375,8 +375,8 @@ entry:
; CHECK-LABEL: fct12:
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
- store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16
+ %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
+ store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16
ret void
}
@@ -385,8 +385,8 @@ entry:
; CHECK-LABEL: fct13:
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
- store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16
+ %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
+ store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16
ret void
}
@@ -395,8 +395,8 @@ entry:
; CHECK-LABEL: fct14:
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
- store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16
+ %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
+ store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16
ret void
}
@@ -405,8 +405,8 @@ entry:
; CHECK-LABEL: fct15:
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
- %0 = load <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
- store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16
+ %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
+ store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16
ret void
}
@@ -419,8 +419,8 @@ define <8 x i8> @fct16(i8* nocapture %sp0) {
; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
entry:
- %addr = getelementptr i8* %sp0, i64 1
- %pix_sp0.0.copyload = load i8* %addr, align 1
+ %addr = getelementptr i8, i8* %sp0, i64 1
+ %pix_sp0.0.copyload = load i8, i8* %addr, align 1
%vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <8 x i8> %vec, %vec
ret <8 x i8> %vmull.i
@@ -431,8 +431,8 @@ define <16 x i8> @fct17(i8* nocapture %sp0) {
; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
entry:
- %addr = getelementptr i8* %sp0, i64 1
- %pix_sp0.0.copyload = load i8* %addr, align 1
+ %addr = getelementptr i8, i8* %sp0, i64 1
+ %pix_sp0.0.copyload = load i8, i8* %addr, align 1
%vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <16 x i8> %vec, %vec
ret <16 x i8> %vmull.i
@@ -443,8 +443,8 @@ define <4 x i16> @fct18(i16* nocapture %sp0) {
; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
entry:
- %addr = getelementptr i16* %sp0, i64 1
- %pix_sp0.0.copyload = load i16* %addr, align 1
+ %addr = getelementptr i16, i16* %sp0, i64 1
+ %pix_sp0.0.copyload = load i16, i16* %addr, align 1
%vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <4 x i16> %vec, %vec
ret <4 x i16> %vmull.i
@@ -455,8 +455,8 @@ define <8 x i16> @fct19(i16* nocapture %sp0) {
; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
entry:
- %addr = getelementptr i16* %sp0, i64 1
- %pix_sp0.0.copyload = load i16* %addr, align 1
+ %addr = getelementptr i16, i16* %sp0, i64 1
+ %pix_sp0.0.copyload = load i16, i16* %addr, align 1
%vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <8 x i16> %vec, %vec
ret <8 x i16> %vmull.i
@@ -467,8 +467,8 @@ define <2 x i32> @fct20(i32* nocapture %sp0) {
; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
entry:
- %addr = getelementptr i32* %sp0, i64 1
- %pix_sp0.0.copyload = load i32* %addr, align 1
+ %addr = getelementptr i32, i32* %sp0, i64 1
+ %pix_sp0.0.copyload = load i32, i32* %addr, align 1
%vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <2 x i32> %vec, %vec
ret <2 x i32> %vmull.i
@@ -479,8 +479,8 @@ define <4 x i32> @fct21(i32* nocapture %sp0) {
; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
entry:
- %addr = getelementptr i32* %sp0, i64 1
- %pix_sp0.0.copyload = load i32* %addr, align 1
+ %addr = getelementptr i32, i32* %sp0, i64 1
+ %pix_sp0.0.copyload = load i32, i32* %addr, align 1
%vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <4 x i32> %vec, %vec
ret <4 x i32> %vmull.i
@@ -490,8 +490,8 @@ define <1 x i64> @fct22(i64* nocapture %sp0) {
; CHECK-LABEL: fct22:
; CHECK: ldr d0, [x0, #8]
entry:
- %addr = getelementptr i64* %sp0, i64 1
- %pix_sp0.0.copyload = load i64* %addr, align 1
+ %addr = getelementptr i64, i64* %sp0, i64 1
+ %pix_sp0.0.copyload = load i64, i64* %addr, align 1
%vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
ret <1 x i64> %vec
}
@@ -500,8 +500,8 @@ define <2 x i64> @fct23(i64* nocapture %sp0) {
; CHECK-LABEL: fct23:
; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
entry:
- %addr = getelementptr i64* %sp0, i64 1
- %pix_sp0.0.copyload = load i64* %addr, align 1
+ %addr = getelementptr i64, i64* %sp0, i64 1
+ %pix_sp0.0.copyload = load i64, i64* %addr, align 1
%vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
ret <2 x i64> %vec
}
@@ -513,8 +513,8 @@ define <8 x i8> @fct24(i8* nocapture %sp0, i64 %offset) {
; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
entry:
- %addr = getelementptr i8* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i8* %addr, align 1
+ %addr = getelementptr i8, i8* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i8, i8* %addr, align 1
%vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <8 x i8> %vec, %vec
ret <8 x i8> %vmull.i
@@ -525,8 +525,8 @@ define <16 x i8> @fct25(i8* nocapture %sp0, i64 %offset) {
; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
entry:
- %addr = getelementptr i8* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i8* %addr, align 1
+ %addr = getelementptr i8, i8* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i8, i8* %addr, align 1
%vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <16 x i8> %vec, %vec
ret <16 x i8> %vmull.i
@@ -537,8 +537,8 @@ define <4 x i16> @fct26(i16* nocapture %sp0, i64 %offset) {
; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
entry:
- %addr = getelementptr i16* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i16* %addr, align 1
+ %addr = getelementptr i16, i16* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i16, i16* %addr, align 1
%vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <4 x i16> %vec, %vec
ret <4 x i16> %vmull.i
@@ -549,8 +549,8 @@ define <8 x i16> @fct27(i16* nocapture %sp0, i64 %offset) {
; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
entry:
- %addr = getelementptr i16* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i16* %addr, align 1
+ %addr = getelementptr i16, i16* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i16, i16* %addr, align 1
%vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <8 x i16> %vec, %vec
ret <8 x i16> %vmull.i
@@ -561,8 +561,8 @@ define <2 x i32> @fct28(i32* nocapture %sp0, i64 %offset) {
; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
entry:
- %addr = getelementptr i32* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i32* %addr, align 1
+ %addr = getelementptr i32, i32* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i32, i32* %addr, align 1
%vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <2 x i32> %vec, %vec
ret <2 x i32> %vmull.i
@@ -573,8 +573,8 @@ define <4 x i32> @fct29(i32* nocapture %sp0, i64 %offset) {
; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
entry:
- %addr = getelementptr i32* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i32* %addr, align 1
+ %addr = getelementptr i32, i32* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i32, i32* %addr, align 1
%vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
%vmull.i = mul <4 x i32> %vec, %vec
ret <4 x i32> %vmull.i
@@ -584,8 +584,8 @@ define <1 x i64> @fct30(i64* nocapture %sp0, i64 %offset) {
; CHECK-LABEL: fct30:
; CHECK: ldr d0, [x0, x1, lsl #3]
entry:
- %addr = getelementptr i64* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i64* %addr, align 1
+ %addr = getelementptr i64, i64* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i64, i64* %addr, align 1
%vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
ret <1 x i64> %vec
}
@@ -594,8 +594,8 @@ define <2 x i64> @fct31(i64* nocapture %sp0, i64 %offset) {
; CHECK-LABEL: fct31:
; CHECK: ldr d0, [x0, x1, lsl #3]
entry:
- %addr = getelementptr i64* %sp0, i64 %offset
- %pix_sp0.0.copyload = load i64* %addr, align 1
+ %addr = getelementptr i64, i64* %sp0, i64 %offset
+ %pix_sp0.0.copyload = load i64, i64* %addr, align 1
%vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
ret <2 x i64> %vec
}
diff --git a/test/CodeGen/AArch64/arm64-vext.ll b/test/CodeGen/AArch64/arm64-vext.ll
index 2240dfd5a1ae..fa57eeb246cc 100644
--- a/test/CodeGen/AArch64/arm64-vext.ll
+++ b/test/CodeGen/AArch64/arm64-vext.ll
@@ -6,12 +6,12 @@ define void @test_vext_s8() nounwind ssp {
%xS8x8 = alloca <8 x i8>, align 8
%__a = alloca <8 x i8>, align 8
%__b = alloca <8 x i8>, align 8
- %tmp = load <8 x i8>* %xS8x8, align 8
+ %tmp = load <8 x i8>, <8 x i8>* %xS8x8, align 8
store <8 x i8> %tmp, <8 x i8>* %__a, align 8
- %tmp1 = load <8 x i8>* %xS8x8, align 8
+ %tmp1 = load <8 x i8>, <8 x i8>* %xS8x8, align 8
store <8 x i8> %tmp1, <8 x i8>* %__b, align 8
- %tmp2 = load <8 x i8>* %__a, align 8
- %tmp3 = load <8 x i8>* %__b, align 8
+ %tmp2 = load <8 x i8>, <8 x i8>* %__a, align 8
+ %tmp3 = load <8 x i8>, <8 x i8>* %__b, align 8
%vext = shufflevector <8 x i8> %tmp2, <8 x i8> %tmp3, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
store <8 x i8> %vext, <8 x i8>* %xS8x8, align 8
ret void
@@ -23,12 +23,12 @@ define void @test_vext_u8() nounwind ssp {
%xU8x8 = alloca <8 x i8>, align 8
%__a = alloca <8 x i8>, align 8
%__b = alloca <8 x i8>, align 8
- %tmp = load <8 x i8>* %xU8x8, align 8
+ %tmp = load <8 x i8>, <8 x i8>* %xU8x8, align 8
store <8 x i8> %tmp, <8 x i8>* %__a, align 8
- %tmp1 = load <8 x i8>* %xU8x8, align 8
+ %tmp1 = load <8 x i8>, <8 x i8>* %xU8x8, align 8
store <8 x i8> %tmp1, <8 x i8>* %__b, align 8
- %tmp2 = load <8 x i8>* %__a, align 8
- %tmp3 = load <8 x i8>* %__b, align 8
+ %tmp2 = load <8 x i8>, <8 x i8>* %__a, align 8
+ %tmp3 = load <8 x i8>, <8 x i8>* %__b, align 8
%vext = shufflevector <8 x i8> %tmp2, <8 x i8> %tmp3, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
store <8 x i8> %vext, <8 x i8>* %xU8x8, align 8
ret void
@@ -40,12 +40,12 @@ define void @test_vext_p8() nounwind ssp {
%xP8x8 = alloca <8 x i8>, align 8
%__a = alloca <8 x i8>, align 8
%__b = alloca <8 x i8>, align 8
- %tmp = load <8 x i8>* %xP8x8, align 8
+ %tmp = load <8 x i8>, <8 x i8>* %xP8x8, align 8
store <8 x i8> %tmp, <8 x i8>* %__a, align 8
- %tmp1 = load <8 x i8>* %xP8x8, align 8
+ %tmp1 = load <8 x i8>, <8 x i8>* %xP8x8, align 8
store <8 x i8> %tmp1, <8 x i8>* %__b, align 8
- %tmp2 = load <8 x i8>* %__a, align 8
- %tmp3 = load <8 x i8>* %__b, align 8
+ %tmp2 = load <8 x i8>, <8 x i8>* %__a, align 8
+ %tmp3 = load <8 x i8>, <8 x i8>* %__b, align 8
%vext = shufflevector <8 x i8> %tmp2, <8 x i8> %tmp3, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
store <8 x i8> %vext, <8 x i8>* %xP8x8, align 8
ret void
@@ -57,13 +57,13 @@ define void @test_vext_s16() nounwind ssp {
%xS16x4 = alloca <4 x i16>, align 8
%__a = alloca <4 x i16>, align 8
%__b = alloca <4 x i16>, align 8
- %tmp = load <4 x i16>* %xS16x4, align 8
+ %tmp = load <4 x i16>, <4 x i16>* %xS16x4, align 8
store <4 x i16> %tmp, <4 x i16>* %__a, align 8
- %tmp1 = load <4 x i16>* %xS16x4, align 8
+ %tmp1 = load <4 x i16>, <4 x i16>* %xS16x4, align 8
store <4 x i16> %tmp1, <4 x i16>* %__b, align 8
- %tmp2 = load <4 x i16>* %__a, align 8
+ %tmp2 = load <4 x i16>, <4 x i16>* %__a, align 8
%tmp3 = bitcast <4 x i16> %tmp2 to <8 x i8>
- %tmp4 = load <4 x i16>* %__b, align 8
+ %tmp4 = load <4 x i16>, <4 x i16>* %__b, align 8
%tmp5 = bitcast <4 x i16> %tmp4 to <8 x i8>
%tmp6 = bitcast <8 x i8> %tmp3 to <4 x i16>
%tmp7 = bitcast <8 x i8> %tmp5 to <4 x i16>
@@ -78,13 +78,13 @@ define void @test_vext_u16() nounwind ssp {
%xU16x4 = alloca <4 x i16>, align 8
%__a = alloca <4 x i16>, align 8
%__b = alloca <4 x i16>, align 8
- %tmp = load <4 x i16>* %xU16x4, align 8
+ %tmp = load <4 x i16>, <4 x i16>* %xU16x4, align 8
store <4 x i16> %tmp, <4 x i16>* %__a, align 8
- %tmp1 = load <4 x i16>* %xU16x4, align 8
+ %tmp1 = load <4 x i16>, <4 x i16>* %xU16x4, align 8
store <4 x i16> %tmp1, <4 x i16>* %__b, align 8
- %tmp2 = load <4 x i16>* %__a, align 8
+ %tmp2 = load <4 x i16>, <4 x i16>* %__a, align 8
%tmp3 = bitcast <4 x i16> %tmp2 to <8 x i8>
- %tmp4 = load <4 x i16>* %__b, align 8
+ %tmp4 = load <4 x i16>, <4 x i16>* %__b, align 8
%tmp5 = bitcast <4 x i16> %tmp4 to <8 x i8>
%tmp6 = bitcast <8 x i8> %tmp3 to <4 x i16>
%tmp7 = bitcast <8 x i8> %tmp5 to <4 x i16>
@@ -99,13 +99,13 @@ define void @test_vext_p16() nounwind ssp {
%xP16x4 = alloca <4 x i16>, align 8
%__a = alloca <4 x i16>, align 8
%__b = alloca <4 x i16>, align 8
- %tmp = load <4 x i16>* %xP16x4, align 8
+ %tmp = load <4 x i16>, <4 x i16>* %xP16x4, align 8
store <4 x i16> %tmp, <4 x i16>* %__a, align 8
- %tmp1 = load <4 x i16>* %xP16x4, align 8
+ %tmp1 = load <4 x i16>, <4 x i16>* %xP16x4, align 8
store <4 x i16> %tmp1, <4 x i16>* %__b, align 8
- %tmp2 = load <4 x i16>* %__a, align 8
+ %tmp2 = load <4 x i16>, <4 x i16>* %__a, align 8
%tmp3 = bitcast <4 x i16> %tmp2 to <8 x i8>
- %tmp4 = load <4 x i16>* %__b, align 8
+ %tmp4 = load <4 x i16>, <4 x i16>* %__b, align 8
%tmp5 = bitcast <4 x i16> %tmp4 to <8 x i8>
%tmp6 = bitcast <8 x i8> %tmp3 to <4 x i16>
%tmp7 = bitcast <8 x i8> %tmp5 to <4 x i16>
@@ -120,13 +120,13 @@ define void @test_vext_s32() nounwind ssp {
%xS32x2 = alloca <2 x i32>, align 8
%__a = alloca <2 x i32>, align 8
%__b = alloca <2 x i32>, align 8
- %tmp = load <2 x i32>* %xS32x2, align 8
+ %tmp = load <2 x i32>, <2 x i32>* %xS32x2, align 8
store <2 x i32> %tmp, <2 x i32>* %__a, align 8
- %tmp1 = load <2 x i32>* %xS32x2, align 8
+ %tmp1 = load <2 x i32>, <2 x i32>* %xS32x2, align 8
store <2 x i32> %tmp1, <2 x i32>* %__b, align 8
- %tmp2 = load <2 x i32>* %__a, align 8
+ %tmp2 = load <2 x i32>, <2 x i32>* %__a, align 8
%tmp3 = bitcast <2 x i32> %tmp2 to <8 x i8>
- %tmp4 = load <2 x i32>* %__b, align 8
+ %tmp4 = load <2 x i32>, <2 x i32>* %__b, align 8
%tmp5 = bitcast <2 x i32> %tmp4 to <8 x i8>
%tmp6 = bitcast <8 x i8> %tmp3 to <2 x i32>
%tmp7 = bitcast <8 x i8> %tmp5 to <2 x i32>
@@ -141,13 +141,13 @@ define void @test_vext_u32() nounwind ssp {
%xU32x2 = alloca <2 x i32>, align 8
%__a = alloca <2 x i32>, align 8
%__b = alloca <2 x i32>, align 8
- %tmp = load <2 x i32>* %xU32x2, align 8
+ %tmp = load <2 x i32>, <2 x i32>* %xU32x2, align 8
store <2 x i32> %tmp, <2 x i32>* %__a, align 8
- %tmp1 = load <2 x i32>* %xU32x2, align 8
+ %tmp1 = load <2 x i32>, <2 x i32>* %xU32x2, align 8
store <2 x i32> %tmp1, <2 x i32>* %__b, align 8
- %tmp2 = load <2 x i32>* %__a, align 8
+ %tmp2 = load <2 x i32>, <2 x i32>* %__a, align 8
%tmp3 = bitcast <2 x i32> %tmp2 to <8 x i8>
- %tmp4 = load <2 x i32>* %__b, align 8
+ %tmp4 = load <2 x i32>, <2 x i32>* %__b, align 8
%tmp5 = bitcast <2 x i32> %tmp4 to <8 x i8>
%tmp6 = bitcast <8 x i8> %tmp3 to <2 x i32>
%tmp7 = bitcast <8 x i8> %tmp5 to <2 x i32>
@@ -162,13 +162,13 @@ define void @test_vext_f32() nounwind ssp {
%xF32x2 = alloca <2 x float>, align 8
%__a = alloca <2 x float>, align 8
%__b = alloca <2 x float>, align 8
- %tmp = load <2 x float>* %xF32x2, align 8
+ %tmp = load <2 x float>, <2 x float>* %xF32x2, align 8
store <2 x float> %tmp, <2 x float>* %__a, align 8
- %tmp1 = load <2 x float>* %xF32x2, align 8
+ %tmp1 = load <2 x float>, <2 x float>* %xF32x2, align 8
store <2 x float> %tmp1, <2 x float>* %__b, align 8
- %tmp2 = load <2 x float>* %__a, align 8
+ %tmp2 = load <2 x float>, <2 x float>* %__a, align 8
%tmp3 = bitcast <2 x float> %tmp2 to <8 x i8>
- %tmp4 = load <2 x float>* %__b, align 8
+ %tmp4 = load <2 x float>, <2 x float>* %__b, align 8
%tmp5 = bitcast <2 x float> %tmp4 to <8 x i8>
%tmp6 = bitcast <8 x i8> %tmp3 to <2 x float>
%tmp7 = bitcast <8 x i8> %tmp5 to <2 x float>
@@ -184,13 +184,13 @@ define void @test_vext_s64() nounwind ssp {
%xS64x1 = alloca <1 x i64>, align 8
%__a = alloca <1 x i64>, align 8
%__b = alloca <1 x i64>, align 8
- %tmp = load <1 x i64>* %xS64x1, align 8
+ %tmp = load <1 x i64>, <1 x i64>* %xS64x1, align 8
store <1 x i64> %tmp, <1 x i64>* %__a, align 8
- %tmp1 = load <1 x i64>* %xS64x1, align 8
+ %tmp1 = load <1 x i64>, <1 x i64>* %xS64x1, align 8
store <1 x i64> %tmp1, <1 x i64>* %__b, align 8
- %tmp2 = load <1 x i64>* %__a, align 8
+ %tmp2 = load <1 x i64>, <1 x i64>* %__a, align 8
%tmp3 = bitcast <1 x i64> %tmp2 to <8 x i8>
- %tmp4 = load <1 x i64>* %__b, align 8
+ %tmp4 = load <1 x i64>, <1 x i64>* %__b, align 8
%tmp5 = bitcast <1 x i64> %tmp4 to <8 x i8>
%tmp6 = bitcast <8 x i8> %tmp3 to <1 x i64>
%tmp7 = bitcast <8 x i8> %tmp5 to <1 x i64>
@@ -206,13 +206,13 @@ define void @test_vext_u64() nounwind ssp {
%xU64x1 = alloca <1 x i64>, align 8
%__a = alloca <1 x i64>, align 8
%__b = alloca <1 x i64>, align 8
- %tmp = load <1 x i64>* %xU64x1, align 8
+ %tmp = load <1 x i64>, <1 x i64>* %xU64x1, align 8
store <1 x i64> %tmp, <1 x i64>* %__a, align 8
- %tmp1 = load <1 x i64>* %xU64x1, align 8
+ %tmp1 = load <1 x i64>, <1 x i64>* %xU64x1, align 8
store <1 x i64> %tmp1, <1 x i64>* %__b, align 8
- %tmp2 = load <1 x i64>* %__a, align 8
+ %tmp2 = load <1 x i64>, <1 x i64>* %__a, align 8
%tmp3 = bitcast <1 x i64> %tmp2 to <8 x i8>
- %tmp4 = load <1 x i64>* %__b, align 8
+ %tmp4 = load <1 x i64>, <1 x i64>* %__b, align 8
%tmp5 = bitcast <1 x i64> %tmp4 to <8 x i8>
%tmp6 = bitcast <8 x i8> %tmp3 to <1 x i64>
%tmp7 = bitcast <8 x i8> %tmp5 to <1 x i64>
@@ -227,12 +227,12 @@ define void @test_vextq_s8() nounwind ssp {
%xS8x16 = alloca <16 x i8>, align 16
%__a = alloca <16 x i8>, align 16
%__b = alloca <16 x i8>, align 16
- %tmp = load <16 x i8>* %xS8x16, align 16
+ %tmp = load <16 x i8>, <16 x i8>* %xS8x16, align 16
store <16 x i8> %tmp, <16 x i8>* %__a, align 16
- %tmp1 = load <16 x i8>* %xS8x16, align 16
+ %tmp1 = load <16 x i8>, <16 x i8>* %xS8x16, align 16
store <16 x i8> %tmp1, <16 x i8>* %__b, align 16
- %tmp2 = load <16 x i8>* %__a, align 16
- %tmp3 = load <16 x i8>* %__b, align 16
+ %tmp2 = load <16 x i8>, <16 x i8>* %__a, align 16
+ %tmp3 = load <16 x i8>, <16 x i8>* %__b, align 16
%vext = shufflevector <16 x i8> %tmp2, <16 x i8> %tmp3, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
store <16 x i8> %vext, <16 x i8>* %xS8x16, align 16
ret void
@@ -244,12 +244,12 @@ define void @test_vextq_u8() nounwind ssp {
%xU8x16 = alloca <16 x i8>, align 16
%__a = alloca <16 x i8>, align 16
%__b = alloca <16 x i8>, align 16
- %tmp = load <16 x i8>* %xU8x16, align 16
+ %tmp = load <16 x i8>, <16 x i8>* %xU8x16, align 16
store <16 x i8> %tmp, <16 x i8>* %__a, align 16
- %tmp1 = load <16 x i8>* %xU8x16, align 16
+ %tmp1 = load <16 x i8>, <16 x i8>* %xU8x16, align 16
store <16 x i8> %tmp1, <16 x i8>* %__b, align 16
- %tmp2 = load <16 x i8>* %__a, align 16
- %tmp3 = load <16 x i8>* %__b, align 16
+ %tmp2 = load <16 x i8>, <16 x i8>* %__a, align 16
+ %tmp3 = load <16 x i8>, <16 x i8>* %__b, align 16
%vext = shufflevector <16 x i8> %tmp2, <16 x i8> %tmp3, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
store <16 x i8> %vext, <16 x i8>* %xU8x16, align 16
ret void
@@ -261,12 +261,12 @@ define void @test_vextq_p8() nounwind ssp {
%xP8x16 = alloca <16 x i8>, align 16
%__a = alloca <16 x i8>, align 16
%__b = alloca <16 x i8>, align 16
- %tmp = load <16 x i8>* %xP8x16, align 16
+ %tmp = load <16 x i8>, <16 x i8>* %xP8x16, align 16
store <16 x i8> %tmp, <16 x i8>* %__a, align 16
- %tmp1 = load <16 x i8>* %xP8x16, align 16
+ %tmp1 = load <16 x i8>, <16 x i8>* %xP8x16, align 16
store <16 x i8> %tmp1, <16 x i8>* %__b, align 16
- %tmp2 = load <16 x i8>* %__a, align 16
- %tmp3 = load <16 x i8>* %__b, align 16
+ %tmp2 = load <16 x i8>, <16 x i8>* %__a, align 16
+ %tmp3 = load <16 x i8>, <16 x i8>* %__b, align 16
%vext = shufflevector <16 x i8> %tmp2, <16 x i8> %tmp3, <16 x i32> <i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21>
store <16 x i8> %vext, <16 x i8>* %xP8x16, align 16
ret void
@@ -278,13 +278,13 @@ define void @test_vextq_s16() nounwind ssp {
%xS16x8 = alloca <8 x i16>, align 16
%__a = alloca <8 x i16>, align 16
%__b = alloca <8 x i16>, align 16
- %tmp = load <8 x i16>* %xS16x8, align 16
+ %tmp = load <8 x i16>, <8 x i16>* %xS16x8, align 16
store <8 x i16> %tmp, <8 x i16>* %__a, align 16
- %tmp1 = load <8 x i16>* %xS16x8, align 16
+ %tmp1 = load <8 x i16>, <8 x i16>* %xS16x8, align 16
store <8 x i16> %tmp1, <8 x i16>* %__b, align 16
- %tmp2 = load <8 x i16>* %__a, align 16
+ %tmp2 = load <8 x i16>, <8 x i16>* %__a, align 16
%tmp3 = bitcast <8 x i16> %tmp2 to <16 x i8>
- %tmp4 = load <8 x i16>* %__b, align 16
+ %tmp4 = load <8 x i16>, <8 x i16>* %__b, align 16
%tmp5 = bitcast <8 x i16> %tmp4 to <16 x i8>
%tmp6 = bitcast <16 x i8> %tmp3 to <8 x i16>
%tmp7 = bitcast <16 x i8> %tmp5 to <8 x i16>
@@ -299,13 +299,13 @@ define void @test_vextq_u16() nounwind ssp {
%xU16x8 = alloca <8 x i16>, align 16
%__a = alloca <8 x i16>, align 16
%__b = alloca <8 x i16>, align 16
- %tmp = load <8 x i16>* %xU16x8, align 16
+ %tmp = load <8 x i16>, <8 x i16>* %xU16x8, align 16
store <8 x i16> %tmp, <8 x i16>* %__a, align 16
- %tmp1 = load <8 x i16>* %xU16x8, align 16
+ %tmp1 = load <8 x i16>, <8 x i16>* %xU16x8, align 16
store <8 x i16> %tmp1, <8 x i16>* %__b, align 16
- %tmp2 = load <8 x i16>* %__a, align 16
+ %tmp2 = load <8 x i16>, <8 x i16>* %__a, align 16
%tmp3 = bitcast <8 x i16> %tmp2 to <16 x i8>
- %tmp4 = load <8 x i16>* %__b, align 16
+ %tmp4 = load <8 x i16>, <8 x i16>* %__b, align 16
%tmp5 = bitcast <8 x i16> %tmp4 to <16 x i8>
%tmp6 = bitcast <16 x i8> %tmp3 to <8 x i16>
%tmp7 = bitcast <16 x i8> %tmp5 to <8 x i16>
@@ -320,13 +320,13 @@ define void @test_vextq_p16() nounwind ssp {
%xP16x8 = alloca <8 x i16>, align 16
%__a = alloca <8 x i16>, align 16
%__b = alloca <8 x i16>, align 16
- %tmp = load <8 x i16>* %xP16x8, align 16
+ %tmp = load <8 x i16>, <8 x i16>* %xP16x8, align 16
store <8 x i16> %tmp, <8 x i16>* %__a, align 16
- %tmp1 = load <8 x i16>* %xP16x8, align 16
+ %tmp1 = load <8 x i16>, <8 x i16>* %xP16x8, align 16
store <8 x i16> %tmp1, <8 x i16>* %__b, align 16
- %tmp2 = load <8 x i16>* %__a, align 16
+ %tmp2 = load <8 x i16>, <8 x i16>* %__a, align 16
%tmp3 = bitcast <8 x i16> %tmp2 to <16 x i8>
- %tmp4 = load <8 x i16>* %__b, align 16
+ %tmp4 = load <8 x i16>, <8 x i16>* %__b, align 16
%tmp5 = bitcast <8 x i16> %tmp4 to <16 x i8>
%tmp6 = bitcast <16 x i8> %tmp3 to <8 x i16>
%tmp7 = bitcast <16 x i8> %tmp5 to <8 x i16>
@@ -341,13 +341,13 @@ define void @test_vextq_s32() nounwind ssp {
%xS32x4 = alloca <4 x i32>, align 16
%__a = alloca <4 x i32>, align 16
%__b = alloca <4 x i32>, align 16
- %tmp = load <4 x i32>* %xS32x4, align 16
+ %tmp = load <4 x i32>, <4 x i32>* %xS32x4, align 16
store <4 x i32> %tmp, <4 x i32>* %__a, align 16
- %tmp1 = load <4 x i32>* %xS32x4, align 16
+ %tmp1 = load <4 x i32>, <4 x i32>* %xS32x4, align 16
store <4 x i32> %tmp1, <4 x i32>* %__b, align 16
- %tmp2 = load <4 x i32>* %__a, align 16
+ %tmp2 = load <4 x i32>, <4 x i32>* %__a, align 16
%tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
- %tmp4 = load <4 x i32>* %__b, align 16
+ %tmp4 = load <4 x i32>, <4 x i32>* %__b, align 16
%tmp5 = bitcast <4 x i32> %tmp4 to <16 x i8>
%tmp6 = bitcast <16 x i8> %tmp3 to <4 x i32>
%tmp7 = bitcast <16 x i8> %tmp5 to <4 x i32>
@@ -362,13 +362,13 @@ define void @test_vextq_u32() nounwind ssp {
%xU32x4 = alloca <4 x i32>, align 16
%__a = alloca <4 x i32>, align 16
%__b = alloca <4 x i32>, align 16
- %tmp = load <4 x i32>* %xU32x4, align 16
+ %tmp = load <4 x i32>, <4 x i32>* %xU32x4, align 16
store <4 x i32> %tmp, <4 x i32>* %__a, align 16
- %tmp1 = load <4 x i32>* %xU32x4, align 16
+ %tmp1 = load <4 x i32>, <4 x i32>* %xU32x4, align 16
store <4 x i32> %tmp1, <4 x i32>* %__b, align 16
- %tmp2 = load <4 x i32>* %__a, align 16
+ %tmp2 = load <4 x i32>, <4 x i32>* %__a, align 16
%tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
- %tmp4 = load <4 x i32>* %__b, align 16
+ %tmp4 = load <4 x i32>, <4 x i32>* %__b, align 16
%tmp5 = bitcast <4 x i32> %tmp4 to <16 x i8>
%tmp6 = bitcast <16 x i8> %tmp3 to <4 x i32>
%tmp7 = bitcast <16 x i8> %tmp5 to <4 x i32>
@@ -383,13 +383,13 @@ define void @test_vextq_f32() nounwind ssp {
%xF32x4 = alloca <4 x float>, align 16
%__a = alloca <4 x float>, align 16
%__b = alloca <4 x float>, align 16
- %tmp = load <4 x float>* %xF32x4, align 16
+ %tmp = load <4 x float>, <4 x float>* %xF32x4, align 16
store <4 x float> %tmp, <4 x float>* %__a, align 16
- %tmp1 = load <4 x float>* %xF32x4, align 16
+ %tmp1 = load <4 x float>, <4 x float>* %xF32x4, align 16
store <4 x float> %tmp1, <4 x float>* %__b, align 16
- %tmp2 = load <4 x float>* %__a, align 16
+ %tmp2 = load <4 x float>, <4 x float>* %__a, align 16
%tmp3 = bitcast <4 x float> %tmp2 to <16 x i8>
- %tmp4 = load <4 x float>* %__b, align 16
+ %tmp4 = load <4 x float>, <4 x float>* %__b, align 16
%tmp5 = bitcast <4 x float> %tmp4 to <16 x i8>
%tmp6 = bitcast <16 x i8> %tmp3 to <4 x float>
%tmp7 = bitcast <16 x i8> %tmp5 to <4 x float>
@@ -404,13 +404,13 @@ define void @test_vextq_s64() nounwind ssp {
%xS64x2 = alloca <2 x i64>, align 16
%__a = alloca <2 x i64>, align 16
%__b = alloca <2 x i64>, align 16
- %tmp = load <2 x i64>* %xS64x2, align 16
+ %tmp = load <2 x i64>, <2 x i64>* %xS64x2, align 16
store <2 x i64> %tmp, <2 x i64>* %__a, align 16
- %tmp1 = load <2 x i64>* %xS64x2, align 16
+ %tmp1 = load <2 x i64>, <2 x i64>* %xS64x2, align 16
store <2 x i64> %tmp1, <2 x i64>* %__b, align 16
- %tmp2 = load <2 x i64>* %__a, align 16
+ %tmp2 = load <2 x i64>, <2 x i64>* %__a, align 16
%tmp3 = bitcast <2 x i64> %tmp2 to <16 x i8>
- %tmp4 = load <2 x i64>* %__b, align 16
+ %tmp4 = load <2 x i64>, <2 x i64>* %__b, align 16
%tmp5 = bitcast <2 x i64> %tmp4 to <16 x i8>
%tmp6 = bitcast <16 x i8> %tmp3 to <2 x i64>
%tmp7 = bitcast <16 x i8> %tmp5 to <2 x i64>
@@ -425,13 +425,13 @@ define void @test_vextq_u64() nounwind ssp {
%xU64x2 = alloca <2 x i64>, align 16
%__a = alloca <2 x i64>, align 16
%__b = alloca <2 x i64>, align 16
- %tmp = load <2 x i64>* %xU64x2, align 16
+ %tmp = load <2 x i64>, <2 x i64>* %xU64x2, align 16
store <2 x i64> %tmp, <2 x i64>* %__a, align 16
- %tmp1 = load <2 x i64>* %xU64x2, align 16
+ %tmp1 = load <2 x i64>, <2 x i64>* %xU64x2, align 16
store <2 x i64> %tmp1, <2 x i64>* %__b, align 16
- %tmp2 = load <2 x i64>* %__a, align 16
+ %tmp2 = load <2 x i64>, <2 x i64>* %__a, align 16
%tmp3 = bitcast <2 x i64> %tmp2 to <16 x i8>
- %tmp4 = load <2 x i64>* %__b, align 16
+ %tmp4 = load <2 x i64>, <2 x i64>* %__b, align 16
%tmp5 = bitcast <2 x i64> %tmp4 to <16 x i8>
%tmp6 = bitcast <16 x i8> %tmp3 to <2 x i64>
%tmp7 = bitcast <16 x i8> %tmp5 to <2 x i64>
diff --git a/test/CodeGen/AArch64/arm64-vhadd.ll b/test/CodeGen/AArch64/arm64-vhadd.ll
index 6178bf9809dd..2e82b2a72541 100644
--- a/test/CodeGen/AArch64/arm64-vhadd.ll
+++ b/test/CodeGen/AArch64/arm64-vhadd.ll
@@ -3,8 +3,8 @@
define <8 x i8> @shadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: shadd8b:
;CHECK: shadd.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @shadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @shadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: shadd16b:
;CHECK: shadd.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -21,8 +21,8 @@ define <16 x i8> @shadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @shadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: shadd4h:
;CHECK: shadd.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -30,8 +30,8 @@ define <4 x i16> @shadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @shadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: shadd8h:
;CHECK: shadd.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -39,8 +39,8 @@ define <8 x i16> @shadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i32> @shadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: shadd2s:
;CHECK: shadd.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -48,8 +48,8 @@ define <2 x i32> @shadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @shadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: shadd4s:
;CHECK: shadd.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -57,8 +57,8 @@ define <4 x i32> @shadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <8 x i8> @uhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: uhadd8b:
;CHECK: uhadd.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -66,8 +66,8 @@ define <8 x i8> @uhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @uhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: uhadd16b:
;CHECK: uhadd.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -75,8 +75,8 @@ define <16 x i8> @uhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @uhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: uhadd4h:
;CHECK: uhadd.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -84,8 +84,8 @@ define <4 x i16> @uhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @uhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: uhadd8h:
;CHECK: uhadd.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -93,8 +93,8 @@ define <8 x i16> @uhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i32> @uhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: uhadd2s:
;CHECK: uhadd.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -102,8 +102,8 @@ define <2 x i32> @uhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @uhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: uhadd4s:
;CHECK: uhadd.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -127,8 +127,8 @@ declare <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32>, <4 x i32>) nounwind
define <8 x i8> @srhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: srhadd8b:
;CHECK: srhadd.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -136,8 +136,8 @@ define <8 x i8> @srhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @srhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: srhadd16b:
;CHECK: srhadd.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -145,8 +145,8 @@ define <16 x i8> @srhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @srhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: srhadd4h:
;CHECK: srhadd.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -154,8 +154,8 @@ define <4 x i16> @srhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @srhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: srhadd8h:
;CHECK: srhadd.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -163,8 +163,8 @@ define <8 x i16> @srhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i32> @srhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: srhadd2s:
;CHECK: srhadd.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -172,8 +172,8 @@ define <2 x i32> @srhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @srhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: srhadd4s:
;CHECK: srhadd.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -181,8 +181,8 @@ define <4 x i32> @srhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <8 x i8> @urhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: urhadd8b:
;CHECK: urhadd.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -190,8 +190,8 @@ define <8 x i8> @urhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @urhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: urhadd16b:
;CHECK: urhadd.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -199,8 +199,8 @@ define <16 x i8> @urhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @urhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: urhadd4h:
;CHECK: urhadd.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -208,8 +208,8 @@ define <4 x i16> @urhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @urhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: urhadd8h:
;CHECK: urhadd.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -217,8 +217,8 @@ define <8 x i16> @urhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i32> @urhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: urhadd2s:
;CHECK: urhadd.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -226,8 +226,8 @@ define <2 x i32> @urhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @urhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: urhadd4s:
;CHECK: urhadd.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
diff --git a/test/CodeGen/AArch64/arm64-vhsub.ll b/test/CodeGen/AArch64/arm64-vhsub.ll
index 13bfda3899e5..e50fd3d35896 100644
--- a/test/CodeGen/AArch64/arm64-vhsub.ll
+++ b/test/CodeGen/AArch64/arm64-vhsub.ll
@@ -3,8 +3,8 @@
define <8 x i8> @shsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: shsub8b:
;CHECK: shsub.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @shsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @shsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: shsub16b:
;CHECK: shsub.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -21,8 +21,8 @@ define <16 x i8> @shsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @shsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: shsub4h:
;CHECK: shsub.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -30,8 +30,8 @@ define <4 x i16> @shsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @shsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: shsub8h:
;CHECK: shsub.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -39,8 +39,8 @@ define <8 x i16> @shsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i32> @shsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: shsub2s:
;CHECK: shsub.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -48,8 +48,8 @@ define <2 x i32> @shsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @shsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: shsub4s:
;CHECK: shsub.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -57,8 +57,8 @@ define <4 x i32> @shsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <8 x i8> @uhsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: uhsub8b:
;CHECK: uhsub.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -66,8 +66,8 @@ define <8 x i8> @uhsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @uhsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: uhsub16b:
;CHECK: uhsub.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -75,8 +75,8 @@ define <16 x i8> @uhsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @uhsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: uhsub4h:
;CHECK: uhsub.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -84,8 +84,8 @@ define <4 x i16> @uhsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @uhsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: uhsub8h:
;CHECK: uhsub.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -93,8 +93,8 @@ define <8 x i16> @uhsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i32> @uhsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: uhsub2s:
;CHECK: uhsub.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -102,8 +102,8 @@ define <2 x i32> @uhsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @uhsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: uhsub4s:
;CHECK: uhsub.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
diff --git a/test/CodeGen/AArch64/arm64-virtual_base.ll b/test/CodeGen/AArch64/arm64-virtual_base.ll
index cb9595453348..703d81a8d4fe 100644
--- a/test/CodeGen/AArch64/arm64-virtual_base.ll
+++ b/test/CodeGen/AArch64/arm64-virtual_base.ll
@@ -39,9 +39,9 @@ define void @Precompute_Patch_Values(%struct.Bicubic_Patch_Struct* %Shape) {
; CHECK-NEXT: stur [[VAL2]], {{\[}}sp, #216]
entry:
%Control_Points = alloca [16 x [3 x double]], align 8
- %arraydecay5.3.1 = getelementptr inbounds [16 x [3 x double]]* %Control_Points, i64 0, i64 9, i64 0
+ %arraydecay5.3.1 = getelementptr inbounds [16 x [3 x double]], [16 x [3 x double]]* %Control_Points, i64 0, i64 9, i64 0
%tmp14 = bitcast double* %arraydecay5.3.1 to i8*
- %arraydecay11.3.1 = getelementptr inbounds %struct.Bicubic_Patch_Struct* %Shape, i64 0, i32 12, i64 1, i64 3, i64 0
+ %arraydecay11.3.1 = getelementptr inbounds %struct.Bicubic_Patch_Struct, %struct.Bicubic_Patch_Struct* %Shape, i64 0, i32 12, i64 1, i64 3, i64 0
%tmp15 = bitcast double* %arraydecay11.3.1 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp15, i64 24, i32 1, i1 false)
ret void
diff --git a/test/CodeGen/AArch64/arm64-vmax.ll b/test/CodeGen/AArch64/arm64-vmax.ll
index 3f2c134dec6e..7e363231b360 100644
--- a/test/CodeGen/AArch64/arm64-vmax.ll
+++ b/test/CodeGen/AArch64/arm64-vmax.ll
@@ -3,8 +3,8 @@
define <8 x i8> @smax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: smax_8b:
;CHECK: smax.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @smax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @smax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: smax_16b:
;CHECK: smax.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -21,8 +21,8 @@ define <16 x i8> @smax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @smax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: smax_4h:
;CHECK: smax.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -30,8 +30,8 @@ define <4 x i16> @smax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @smax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: smax_8h:
;CHECK: smax.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -39,8 +39,8 @@ define <8 x i16> @smax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i32> @smax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: smax_2s:
;CHECK: smax.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -48,8 +48,8 @@ define <2 x i32> @smax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @smax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: smax_4s:
;CHECK: smax.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -64,8 +64,8 @@ declare <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32>, <4 x i32>) nounwind r
define <8 x i8> @umax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: umax_8b:
;CHECK: umax.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -73,8 +73,8 @@ define <8 x i8> @umax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @umax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: umax_16b:
;CHECK: umax.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -82,8 +82,8 @@ define <16 x i8> @umax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @umax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: umax_4h:
;CHECK: umax.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -91,8 +91,8 @@ define <4 x i16> @umax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @umax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: umax_8h:
;CHECK: umax.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -100,8 +100,8 @@ define <8 x i16> @umax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i32> @umax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: umax_2s:
;CHECK: umax.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -109,8 +109,8 @@ define <2 x i32> @umax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @umax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: umax_4s:
;CHECK: umax.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -125,8 +125,8 @@ declare <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32>, <4 x i32>) nounwind r
define <8 x i8> @smin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: smin_8b:
;CHECK: smin.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -134,8 +134,8 @@ define <8 x i8> @smin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @smin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: smin_16b:
;CHECK: smin.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -143,8 +143,8 @@ define <16 x i8> @smin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @smin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: smin_4h:
;CHECK: smin.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -152,8 +152,8 @@ define <4 x i16> @smin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @smin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: smin_8h:
;CHECK: smin.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -161,8 +161,8 @@ define <8 x i16> @smin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i32> @smin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: smin_2s:
;CHECK: smin.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -170,8 +170,8 @@ define <2 x i32> @smin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @smin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: smin_4s:
;CHECK: smin.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -186,8 +186,8 @@ declare <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32>, <4 x i32>) nounwind r
define <8 x i8> @umin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: umin_8b:
;CHECK: umin.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -195,8 +195,8 @@ define <8 x i8> @umin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @umin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: umin_16b:
;CHECK: umin.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -204,8 +204,8 @@ define <16 x i8> @umin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @umin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: umin_4h:
;CHECK: umin.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -213,8 +213,8 @@ define <4 x i16> @umin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @umin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: umin_8h:
;CHECK: umin.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -222,8 +222,8 @@ define <8 x i16> @umin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i32> @umin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: umin_2s:
;CHECK: umin.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -231,8 +231,8 @@ define <2 x i32> @umin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @umin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: umin_4s:
;CHECK: umin.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -249,8 +249,8 @@ declare <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32>, <4 x i32>) nounwind r
define <8 x i8> @smaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: smaxp_8b:
;CHECK: smaxp.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -258,8 +258,8 @@ define <8 x i8> @smaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @smaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: smaxp_16b:
;CHECK: smaxp.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -267,8 +267,8 @@ define <16 x i8> @smaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @smaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: smaxp_4h:
;CHECK: smaxp.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -276,8 +276,8 @@ define <4 x i16> @smaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @smaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: smaxp_8h:
;CHECK: smaxp.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -285,8 +285,8 @@ define <8 x i16> @smaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i32> @smaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: smaxp_2s:
;CHECK: smaxp.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -294,8 +294,8 @@ define <2 x i32> @smaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @smaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: smaxp_4s:
;CHECK: smaxp.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -310,8 +310,8 @@ declare <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32>, <4 x i32>) nounwind
define <8 x i8> @umaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: umaxp_8b:
;CHECK: umaxp.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -319,8 +319,8 @@ define <8 x i8> @umaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @umaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: umaxp_16b:
;CHECK: umaxp.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -328,8 +328,8 @@ define <16 x i8> @umaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @umaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: umaxp_4h:
;CHECK: umaxp.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -337,8 +337,8 @@ define <4 x i16> @umaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @umaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: umaxp_8h:
;CHECK: umaxp.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -346,8 +346,8 @@ define <8 x i16> @umaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i32> @umaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: umaxp_2s:
;CHECK: umaxp.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -355,8 +355,8 @@ define <2 x i32> @umaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @umaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: umaxp_4s:
;CHECK: umaxp.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -373,8 +373,8 @@ declare <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32>, <4 x i32>) nounwind
define <8 x i8> @sminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: sminp_8b:
;CHECK: sminp.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -382,8 +382,8 @@ define <8 x i8> @sminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @sminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: sminp_16b:
;CHECK: sminp.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -391,8 +391,8 @@ define <16 x i8> @sminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @sminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: sminp_4h:
;CHECK: sminp.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -400,8 +400,8 @@ define <4 x i16> @sminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @sminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: sminp_8h:
;CHECK: sminp.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -409,8 +409,8 @@ define <8 x i16> @sminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i32> @sminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: sminp_2s:
;CHECK: sminp.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -418,8 +418,8 @@ define <2 x i32> @sminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @sminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: sminp_4s:
;CHECK: sminp.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -434,8 +434,8 @@ declare <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32>, <4 x i32>) nounwind
define <8 x i8> @uminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: uminp_8b:
;CHECK: uminp.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -443,8 +443,8 @@ define <8 x i8> @uminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @uminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: uminp_16b:
;CHECK: uminp.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -452,8 +452,8 @@ define <16 x i8> @uminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @uminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: uminp_4h:
;CHECK: uminp.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -461,8 +461,8 @@ define <4 x i16> @uminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @uminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: uminp_8h:
;CHECK: uminp.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -470,8 +470,8 @@ define <8 x i16> @uminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i32> @uminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: uminp_2s:
;CHECK: uminp.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -479,8 +479,8 @@ define <2 x i32> @uminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @uminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: uminp_4s:
;CHECK: uminp.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -495,8 +495,8 @@ declare <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32>, <4 x i32>) nounwind
define <2 x float> @fmax_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: fmax_2s:
;CHECK: fmax.2s
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -504,8 +504,8 @@ define <2 x float> @fmax_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
define <4 x float> @fmax_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: fmax_4s:
;CHECK: fmax.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
@@ -513,8 +513,8 @@ define <4 x float> @fmax_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
define <2 x double> @fmax_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
;CHECK-LABEL: fmax_2d:
;CHECK: fmax.2d
- %tmp1 = load <2 x double>* %A
- %tmp2 = load <2 x double>* %B
+ %tmp1 = load <2 x double>, <2 x double>* %A
+ %tmp2 = load <2 x double>, <2 x double>* %B
%tmp3 = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
ret <2 x double> %tmp3
}
@@ -526,8 +526,8 @@ declare <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double>, <2 x double>) n
define <2 x float> @fmaxp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: fmaxp_2s:
;CHECK: fmaxp.2s
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -535,8 +535,8 @@ define <2 x float> @fmaxp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
define <4 x float> @fmaxp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: fmaxp_4s:
;CHECK: fmaxp.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
@@ -544,8 +544,8 @@ define <4 x float> @fmaxp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
define <2 x double> @fmaxp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
;CHECK-LABEL: fmaxp_2d:
;CHECK: fmaxp.2d
- %tmp1 = load <2 x double>* %A
- %tmp2 = load <2 x double>* %B
+ %tmp1 = load <2 x double>, <2 x double>* %A
+ %tmp2 = load <2 x double>, <2 x double>* %B
%tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
ret <2 x double> %tmp3
}
@@ -557,8 +557,8 @@ declare <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double>, <2 x double>)
define <2 x float> @fmin_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: fmin_2s:
;CHECK: fmin.2s
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -566,8 +566,8 @@ define <2 x float> @fmin_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
define <4 x float> @fmin_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: fmin_4s:
;CHECK: fmin.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
@@ -575,8 +575,8 @@ define <4 x float> @fmin_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
define <2 x double> @fmin_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
;CHECK-LABEL: fmin_2d:
;CHECK: fmin.2d
- %tmp1 = load <2 x double>* %A
- %tmp2 = load <2 x double>* %B
+ %tmp1 = load <2 x double>, <2 x double>* %A
+ %tmp2 = load <2 x double>, <2 x double>* %B
%tmp3 = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
ret <2 x double> %tmp3
}
@@ -588,8 +588,8 @@ declare <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double>, <2 x double>) n
define <2 x float> @fminp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: fminp_2s:
;CHECK: fminp.2s
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -597,8 +597,8 @@ define <2 x float> @fminp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
define <4 x float> @fminp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: fminp_4s:
;CHECK: fminp.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
@@ -606,8 +606,8 @@ define <4 x float> @fminp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
define <2 x double> @fminp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
;CHECK-LABEL: fminp_2d:
;CHECK: fminp.2d
- %tmp1 = load <2 x double>* %A
- %tmp2 = load <2 x double>* %B
+ %tmp1 = load <2 x double>, <2 x double>* %A
+ %tmp2 = load <2 x double>, <2 x double>* %B
%tmp3 = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
ret <2 x double> %tmp3
}
@@ -619,8 +619,8 @@ declare <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double>, <2 x double>)
define <2 x float> @fminnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: fminnmp_2s:
;CHECK: fminnmp.2s
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -628,8 +628,8 @@ define <2 x float> @fminnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
define <4 x float> @fminnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: fminnmp_4s:
;CHECK: fminnmp.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
@@ -637,8 +637,8 @@ define <4 x float> @fminnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
define <2 x double> @fminnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
;CHECK-LABEL: fminnmp_2d:
;CHECK: fminnmp.2d
- %tmp1 = load <2 x double>* %A
- %tmp2 = load <2 x double>* %B
+ %tmp1 = load <2 x double>, <2 x double>* %A
+ %tmp2 = load <2 x double>, <2 x double>* %B
%tmp3 = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
ret <2 x double> %tmp3
}
@@ -650,8 +650,8 @@ declare <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double>, <2 x double>
define <2 x float> @fmaxnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: fmaxnmp_2s:
;CHECK: fmaxnmp.2s
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -659,8 +659,8 @@ define <2 x float> @fmaxnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
define <4 x float> @fmaxnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: fmaxnmp_4s:
;CHECK: fmaxnmp.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
@@ -668,8 +668,8 @@ define <4 x float> @fmaxnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
define <2 x double> @fmaxnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
;CHECK-LABEL: fmaxnmp_2d:
;CHECK: fmaxnmp.2d
- %tmp1 = load <2 x double>* %A
- %tmp2 = load <2 x double>* %B
+ %tmp1 = load <2 x double>, <2 x double>* %A
+ %tmp2 = load <2 x double>, <2 x double>* %B
%tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
ret <2 x double> %tmp3
}
diff --git a/test/CodeGen/AArch64/arm64-vmul.ll b/test/CodeGen/AArch64/arm64-vmul.ll
index 6fa60fe346af..3df847ec3748 100644
--- a/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/test/CodeGen/AArch64/arm64-vmul.ll
@@ -4,8 +4,8 @@
define <8 x i16> @smull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: smull8h:
;CHECK: smull.8h
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i16> %tmp3
}
@@ -13,8 +13,8 @@ define <8 x i16> @smull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @smull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: smull4s:
;CHECK: smull.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i32> %tmp3
}
@@ -22,8 +22,8 @@ define <4 x i32> @smull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @smull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: smull2d:
;CHECK: smull.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i64> %tmp3
}
@@ -35,8 +35,8 @@ declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) nounwind
define <8 x i16> @umull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: umull8h:
;CHECK: umull.8h
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i16> %tmp3
}
@@ -44,8 +44,8 @@ define <8 x i16> @umull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @umull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: umull4s:
;CHECK: umull.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i32> %tmp3
}
@@ -53,8 +53,8 @@ define <4 x i32> @umull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @umull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: umull2d:
;CHECK: umull.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i64> %tmp3
}
@@ -66,8 +66,8 @@ declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) nounwind
define <4 x i32> @sqdmull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: sqdmull4s:
;CHECK: sqdmull.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i32> %tmp3
}
@@ -75,8 +75,8 @@ define <4 x i32> @sqdmull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @sqdmull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: sqdmull2d:
;CHECK: sqdmull.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i64> %tmp3
}
@@ -84,8 +84,8 @@ define <2 x i64> @sqdmull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @sqdmull2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: sqdmull2_4s:
;CHECK: sqdmull2.4s
- %load1 = load <8 x i16>* %A
- %load2 = load <8 x i16>* %B
+ %load1 = load <8 x i16>, <8 x i16>* %A
+ %load2 = load <8 x i16>, <8 x i16>* %B
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -95,8 +95,8 @@ define <4 x i32> @sqdmull2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i64> @sqdmull2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: sqdmull2_2d:
;CHECK: sqdmull2.2d
- %load1 = load <4 x i32>* %A
- %load2 = load <4 x i32>* %B
+ %load1 = load <4 x i32>, <4 x i32>* %A
+ %load2 = load <4 x i32>, <4 x i32>* %B
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -110,8 +110,8 @@ declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) nounwin
define <8 x i16> @pmull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: pmull8h:
;CHECK: pmull.8h
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i16> %tmp3
}
@@ -121,8 +121,8 @@ declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) nounwind re
define <4 x i16> @sqdmulh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: sqdmulh_4h:
;CHECK: sqdmulh.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -130,8 +130,8 @@ define <4 x i16> @sqdmulh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @sqdmulh_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: sqdmulh_8h:
;CHECK: sqdmulh.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -139,8 +139,8 @@ define <8 x i16> @sqdmulh_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i32> @sqdmulh_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: sqdmulh_2s:
;CHECK: sqdmulh.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -148,8 +148,8 @@ define <2 x i32> @sqdmulh_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @sqdmulh_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: sqdmulh_4s:
;CHECK: sqdmulh.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -157,8 +157,8 @@ define <4 x i32> @sqdmulh_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define i32 @sqdmulh_1s(i32* %A, i32* %B) nounwind {
;CHECK-LABEL: sqdmulh_1s:
;CHECK: sqdmulh s0, {{s[0-9]+}}, {{s[0-9]+}}
- %tmp1 = load i32* %A
- %tmp2 = load i32* %B
+ %tmp1 = load i32, i32* %A
+ %tmp2 = load i32, i32* %B
%tmp3 = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %tmp1, i32 %tmp2)
ret i32 %tmp3
}
@@ -172,8 +172,8 @@ declare i32 @llvm.aarch64.neon.sqdmulh.i32(i32, i32) nounwind readnone
define <4 x i16> @sqrdmulh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: sqrdmulh_4h:
;CHECK: sqrdmulh.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -181,8 +181,8 @@ define <4 x i16> @sqrdmulh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <8 x i16> @sqrdmulh_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: sqrdmulh_8h:
;CHECK: sqrdmulh.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -190,8 +190,8 @@ define <8 x i16> @sqrdmulh_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i32> @sqrdmulh_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: sqrdmulh_2s:
;CHECK: sqrdmulh.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -199,8 +199,8 @@ define <2 x i32> @sqrdmulh_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <4 x i32> @sqrdmulh_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: sqrdmulh_4s:
;CHECK: sqrdmulh.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -208,8 +208,8 @@ define <4 x i32> @sqrdmulh_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define i32 @sqrdmulh_1s(i32* %A, i32* %B) nounwind {
;CHECK-LABEL: sqrdmulh_1s:
;CHECK: sqrdmulh s0, {{s[0-9]+}}, {{s[0-9]+}}
- %tmp1 = load i32* %A
- %tmp2 = load i32* %B
+ %tmp1 = load i32, i32* %A
+ %tmp2 = load i32, i32* %B
%tmp3 = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %tmp1, i32 %tmp2)
ret i32 %tmp3
}
@@ -223,8 +223,8 @@ declare i32 @llvm.aarch64.neon.sqrdmulh.i32(i32, i32) nounwind readnone
define <2 x float> @fmulx_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: fmulx_2s:
;CHECK: fmulx.2s
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -232,8 +232,8 @@ define <2 x float> @fmulx_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
define <4 x float> @fmulx_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: fmulx_4s:
;CHECK: fmulx.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
@@ -241,8 +241,8 @@ define <4 x float> @fmulx_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
define <2 x double> @fmulx_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
;CHECK-LABEL: fmulx_2d:
;CHECK: fmulx.2d
- %tmp1 = load <2 x double>* %A
- %tmp2 = load <2 x double>* %B
+ %tmp1 = load <2 x double>, <2 x double>* %A
+ %tmp2 = load <2 x double>, <2 x double>* %B
%tmp3 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
ret <2 x double> %tmp3
}
@@ -254,9 +254,9 @@ declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>)
define <4 x i32> @smlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: smlal4s:
;CHECK: smlal.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i32>* %C
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
%tmp5 = add <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
@@ -265,9 +265,9 @@ define <4 x i32> @smlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind
define <2 x i64> @smlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
;CHECK-LABEL: smlal2d:
;CHECK: smlal.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i64>* %C
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
%tmp5 = add <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
@@ -276,9 +276,9 @@ define <2 x i64> @smlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind
define <4 x i32> @smlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: smlsl4s:
;CHECK: smlsl.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i32>* %C
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
%tmp5 = sub <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
@@ -287,9 +287,9 @@ define <4 x i32> @smlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind
define <2 x i64> @smlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
;CHECK-LABEL: smlsl2d:
;CHECK: smlsl.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i64>* %C
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
%tmp5 = sub <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
@@ -303,9 +303,9 @@ declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>)
define <4 x i32> @sqdmlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: sqdmlal4s:
;CHECK: sqdmlal.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i32>* %C
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
%tmp5 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
ret <4 x i32> %tmp5
@@ -314,9 +314,9 @@ define <4 x i32> @sqdmlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwin
define <2 x i64> @sqdmlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
;CHECK-LABEL: sqdmlal2d:
;CHECK: sqdmlal.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i64>* %C
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
%tmp5 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
ret <2 x i64> %tmp5
@@ -325,9 +325,9 @@ define <2 x i64> @sqdmlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwin
define <4 x i32> @sqdmlal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: sqdmlal2_4s:
;CHECK: sqdmlal2.4s
- %load1 = load <8 x i16>* %A
- %load2 = load <8 x i16>* %B
- %tmp3 = load <4 x i32>* %C
+ %load1 = load <8 x i16>, <8 x i16>* %A
+ %load2 = load <8 x i16>, <8 x i16>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -338,9 +338,9 @@ define <4 x i32> @sqdmlal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounw
define <2 x i64> @sqdmlal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
;CHECK-LABEL: sqdmlal2_2d:
;CHECK: sqdmlal2.2d
- %load1 = load <4 x i32>* %A
- %load2 = load <4 x i32>* %B
- %tmp3 = load <2 x i64>* %C
+ %load1 = load <4 x i32>, <4 x i32>* %A
+ %load2 = load <4 x i32>, <4 x i32>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -351,9 +351,9 @@ define <2 x i64> @sqdmlal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounw
define <4 x i32> @sqdmlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: sqdmlsl4s:
;CHECK: sqdmlsl.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i32>* %C
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
%tmp5 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
ret <4 x i32> %tmp5
@@ -362,9 +362,9 @@ define <4 x i32> @sqdmlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwin
define <2 x i64> @sqdmlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
;CHECK-LABEL: sqdmlsl2d:
;CHECK: sqdmlsl.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i64>* %C
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
%tmp5 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
ret <2 x i64> %tmp5
@@ -373,9 +373,9 @@ define <2 x i64> @sqdmlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwin
define <4 x i32> @sqdmlsl2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: sqdmlsl2_4s:
;CHECK: sqdmlsl2.4s
- %load1 = load <8 x i16>* %A
- %load2 = load <8 x i16>* %B
- %tmp3 = load <4 x i32>* %C
+ %load1 = load <8 x i16>, <8 x i16>* %A
+ %load2 = load <8 x i16>, <8 x i16>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -386,9 +386,9 @@ define <4 x i32> @sqdmlsl2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounw
define <2 x i64> @sqdmlsl2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
;CHECK-LABEL: sqdmlsl2_2d:
;CHECK: sqdmlsl2.2d
- %load1 = load <4 x i32>* %A
- %load2 = load <4 x i32>* %B
- %tmp3 = load <2 x i64>* %C
+ %load1 = load <4 x i32>, <4 x i32>* %A
+ %load2 = load <4 x i32>, <4 x i32>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -399,9 +399,9 @@ define <2 x i64> @sqdmlsl2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounw
define <4 x i32> @umlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: umlal4s:
;CHECK: umlal.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i32>* %C
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
%tmp5 = add <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
@@ -410,9 +410,9 @@ define <4 x i32> @umlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind
define <2 x i64> @umlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
;CHECK-LABEL: umlal2d:
;CHECK: umlal.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i64>* %C
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
%tmp5 = add <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
@@ -421,9 +421,9 @@ define <2 x i64> @umlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind
define <4 x i32> @umlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: umlsl4s:
;CHECK: umlsl.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i32>* %C
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
%tmp5 = sub <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
@@ -432,9 +432,9 @@ define <4 x i32> @umlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind
define <2 x i64> @umlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
;CHECK-LABEL: umlsl2d:
;CHECK: umlsl.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i64>* %C
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
%tmp5 = sub <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
@@ -443,9 +443,9 @@ define <2 x i64> @umlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind
define <2 x float> @fmla_2s(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
;CHECK-LABEL: fmla_2s:
;CHECK: fmla.2s
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
- %tmp3 = load <2 x float>* %C
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
+ %tmp3 = load <2 x float>, <2 x float>* %C
%tmp4 = call <2 x float> @llvm.fma.v2f32(<2 x float> %tmp1, <2 x float> %tmp2, <2 x float> %tmp3)
ret <2 x float> %tmp4
}
@@ -453,9 +453,9 @@ define <2 x float> @fmla_2s(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) n
define <4 x float> @fmla_4s(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
;CHECK-LABEL: fmla_4s:
;CHECK: fmla.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
- %tmp3 = load <4 x float>* %C
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
+ %tmp3 = load <4 x float>, <4 x float>* %C
%tmp4 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp1, <4 x float> %tmp2, <4 x float> %tmp3)
ret <4 x float> %tmp4
}
@@ -463,9 +463,9 @@ define <4 x float> @fmla_4s(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) n
define <2 x double> @fmla_2d(<2 x double>* %A, <2 x double>* %B, <2 x double>* %C) nounwind {
;CHECK-LABEL: fmla_2d:
;CHECK: fmla.2d
- %tmp1 = load <2 x double>* %A
- %tmp2 = load <2 x double>* %B
- %tmp3 = load <2 x double>* %C
+ %tmp1 = load <2 x double>, <2 x double>* %A
+ %tmp2 = load <2 x double>, <2 x double>* %B
+ %tmp3 = load <2 x double>, <2 x double>* %C
%tmp4 = call <2 x double> @llvm.fma.v2f64(<2 x double> %tmp1, <2 x double> %tmp2, <2 x double> %tmp3)
ret <2 x double> %tmp4
}
@@ -477,9 +477,9 @@ declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) n
define <2 x float> @fmls_2s(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
;CHECK-LABEL: fmls_2s:
;CHECK: fmls.2s
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
- %tmp3 = load <2 x float>* %C
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
+ %tmp3 = load <2 x float>, <2 x float>* %C
%tmp4 = fsub <2 x float> <float -0.0, float -0.0>, %tmp2
%tmp5 = call <2 x float> @llvm.fma.v2f32(<2 x float> %tmp1, <2 x float> %tmp4, <2 x float> %tmp3)
ret <2 x float> %tmp5
@@ -488,9 +488,9 @@ define <2 x float> @fmls_2s(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) n
define <4 x float> @fmls_4s(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
;CHECK-LABEL: fmls_4s:
;CHECK: fmls.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
- %tmp3 = load <4 x float>* %C
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
+ %tmp3 = load <4 x float>, <4 x float>* %C
%tmp4 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %tmp2
%tmp5 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp1, <4 x float> %tmp4, <4 x float> %tmp3)
ret <4 x float> %tmp5
@@ -499,9 +499,9 @@ define <4 x float> @fmls_4s(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) n
define <2 x double> @fmls_2d(<2 x double>* %A, <2 x double>* %B, <2 x double>* %C) nounwind {
;CHECK-LABEL: fmls_2d:
;CHECK: fmls.2d
- %tmp1 = load <2 x double>* %A
- %tmp2 = load <2 x double>* %B
- %tmp3 = load <2 x double>* %C
+ %tmp1 = load <2 x double>, <2 x double>* %A
+ %tmp2 = load <2 x double>, <2 x double>* %B
+ %tmp3 = load <2 x double>, <2 x double>* %C
%tmp4 = fsub <2 x double> <double -0.0, double -0.0>, %tmp2
%tmp5 = call <2 x double> @llvm.fma.v2f64(<2 x double> %tmp1, <2 x double> %tmp4, <2 x double> %tmp3)
ret <2 x double> %tmp5
@@ -510,9 +510,9 @@ define <2 x double> @fmls_2d(<2 x double>* %A, <2 x double>* %B, <2 x double>* %
define <2 x float> @fmls_commuted_neg_2s(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
;CHECK-LABEL: fmls_commuted_neg_2s:
;CHECK: fmls.2s
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
- %tmp3 = load <2 x float>* %C
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
+ %tmp3 = load <2 x float>, <2 x float>* %C
%tmp4 = fsub <2 x float> <float -0.0, float -0.0>, %tmp2
%tmp5 = call <2 x float> @llvm.fma.v2f32(<2 x float> %tmp4, <2 x float> %tmp1, <2 x float> %tmp3)
ret <2 x float> %tmp5
@@ -521,9 +521,9 @@ define <2 x float> @fmls_commuted_neg_2s(<2 x float>* %A, <2 x float>* %B, <2 x
define <4 x float> @fmls_commuted_neg_4s(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
;CHECK-LABEL: fmls_commuted_neg_4s:
;CHECK: fmls.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
- %tmp3 = load <4 x float>* %C
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
+ %tmp3 = load <4 x float>, <4 x float>* %C
%tmp4 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %tmp2
%tmp5 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp4, <4 x float> %tmp1, <4 x float> %tmp3)
ret <4 x float> %tmp5
@@ -532,9 +532,9 @@ define <4 x float> @fmls_commuted_neg_4s(<4 x float>* %A, <4 x float>* %B, <4 x
define <2 x double> @fmls_commuted_neg_2d(<2 x double>* %A, <2 x double>* %B, <2 x double>* %C) nounwind {
;CHECK-LABEL: fmls_commuted_neg_2d:
;CHECK: fmls.2d
- %tmp1 = load <2 x double>* %A
- %tmp2 = load <2 x double>* %B
- %tmp3 = load <2 x double>* %C
+ %tmp1 = load <2 x double>, <2 x double>* %A
+ %tmp2 = load <2 x double>, <2 x double>* %B
+ %tmp3 = load <2 x double>, <2 x double>* %C
%tmp4 = fsub <2 x double> <double -0.0, double -0.0>, %tmp2
%tmp5 = call <2 x double> @llvm.fma.v2f64(<2 x double> %tmp4, <2 x double> %tmp1, <2 x double> %tmp3)
ret <2 x double> %tmp5
@@ -609,8 +609,8 @@ define <4 x i16> @mul_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: mul_4h:
;CHECK-NOT: dup
;CHECK: mul.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp4 = mul <4 x i16> %tmp1, %tmp3
ret <4 x i16> %tmp4
@@ -620,8 +620,8 @@ define <8 x i16> @mul_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: mul_8h:
;CHECK-NOT: dup
;CHECK: mul.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%tmp4 = mul <8 x i16> %tmp1, %tmp3
ret <8 x i16> %tmp4
@@ -631,8 +631,8 @@ define <2 x i32> @mul_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: mul_2s:
;CHECK-NOT: dup
;CHECK: mul.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
%tmp4 = mul <2 x i32> %tmp1, %tmp3
ret <2 x i32> %tmp4
@@ -642,8 +642,8 @@ define <4 x i32> @mul_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: mul_4s:
;CHECK-NOT: dup
;CHECK: mul.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp4 = mul <4 x i32> %tmp1, %tmp3
ret <4 x i32> %tmp4
@@ -661,8 +661,8 @@ define <2 x float> @fmul_lane_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: fmul_lane_2s:
;CHECK-NOT: dup
;CHECK: fmul.2s
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = shufflevector <2 x float> %tmp2, <2 x float> %tmp2, <2 x i32> <i32 1, i32 1>
%tmp4 = fmul <2 x float> %tmp1, %tmp3
ret <2 x float> %tmp4
@@ -672,8 +672,8 @@ define <4 x float> @fmul_lane_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: fmul_lane_4s:
;CHECK-NOT: dup
;CHECK: fmul.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = shufflevector <4 x float> %tmp2, <4 x float> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp4 = fmul <4 x float> %tmp1, %tmp3
ret <4 x float> %tmp4
@@ -683,8 +683,8 @@ define <2 x double> @fmul_lane_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
;CHECK-LABEL: fmul_lane_2d:
;CHECK-NOT: dup
;CHECK: fmul.2d
- %tmp1 = load <2 x double>* %A
- %tmp2 = load <2 x double>* %B
+ %tmp1 = load <2 x double>, <2 x double>* %A
+ %tmp2 = load <2 x double>, <2 x double>* %B
%tmp3 = shufflevector <2 x double> %tmp2, <2 x double> %tmp2, <2 x i32> <i32 1, i32 1>
%tmp4 = fmul <2 x double> %tmp1, %tmp3
ret <2 x double> %tmp4
@@ -714,8 +714,8 @@ define <2 x float> @fmulx_lane_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: fmulx_lane_2s:
;CHECK-NOT: dup
;CHECK: fmulx.2s
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = shufflevector <2 x float> %tmp2, <2 x float> %tmp2, <2 x i32> <i32 1, i32 1>
%tmp4 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp3)
ret <2 x float> %tmp4
@@ -725,8 +725,8 @@ define <4 x float> @fmulx_lane_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: fmulx_lane_4s:
;CHECK-NOT: dup
;CHECK: fmulx.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = shufflevector <4 x float> %tmp2, <4 x float> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp4 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp3)
ret <4 x float> %tmp4
@@ -736,8 +736,8 @@ define <2 x double> @fmulx_lane_2d(<2 x double>* %A, <2 x double>* %B) nounwind
;CHECK-LABEL: fmulx_lane_2d:
;CHECK-NOT: dup
;CHECK: fmulx.2d
- %tmp1 = load <2 x double>* %A
- %tmp2 = load <2 x double>* %B
+ %tmp1 = load <2 x double>, <2 x double>* %A
+ %tmp2 = load <2 x double>, <2 x double>* %B
%tmp3 = shufflevector <2 x double> %tmp2, <2 x double> %tmp2, <2 x i32> <i32 1, i32 1>
%tmp4 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp3)
ret <2 x double> %tmp4
@@ -747,8 +747,8 @@ define <4 x i16> @sqdmulh_lane_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: sqdmulh_lane_4h:
;CHECK-NOT: dup
;CHECK: sqdmulh.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp4 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
ret <4 x i16> %tmp4
@@ -758,8 +758,8 @@ define <8 x i16> @sqdmulh_lane_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: sqdmulh_lane_8h:
;CHECK-NOT: dup
;CHECK: sqdmulh.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%tmp4 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
ret <8 x i16> %tmp4
@@ -769,8 +769,8 @@ define <2 x i32> @sqdmulh_lane_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: sqdmulh_lane_2s:
;CHECK-NOT: dup
;CHECK: sqdmulh.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
%tmp4 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
ret <2 x i32> %tmp4
@@ -780,8 +780,8 @@ define <4 x i32> @sqdmulh_lane_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: sqdmulh_lane_4s:
;CHECK-NOT: dup
;CHECK: sqdmulh.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
ret <4 x i32> %tmp4
@@ -800,8 +800,8 @@ define <4 x i16> @sqrdmulh_lane_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: sqrdmulh_lane_4h:
;CHECK-NOT: dup
;CHECK: sqrdmulh.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp4 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
ret <4 x i16> %tmp4
@@ -811,8 +811,8 @@ define <8 x i16> @sqrdmulh_lane_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: sqrdmulh_lane_8h:
;CHECK-NOT: dup
;CHECK: sqrdmulh.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%tmp4 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
ret <8 x i16> %tmp4
@@ -822,8 +822,8 @@ define <2 x i32> @sqrdmulh_lane_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: sqrdmulh_lane_2s:
;CHECK-NOT: dup
;CHECK: sqrdmulh.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
%tmp4 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
ret <2 x i32> %tmp4
@@ -833,8 +833,8 @@ define <4 x i32> @sqrdmulh_lane_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: sqrdmulh_lane_4s:
;CHECK-NOT: dup
;CHECK: sqrdmulh.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp4 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
ret <4 x i32> %tmp4
@@ -853,8 +853,8 @@ define <4 x i32> @sqdmull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: sqdmull_lane_4s:
;CHECK-NOT: dup
;CHECK: sqdmull.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
ret <4 x i32> %tmp4
@@ -864,8 +864,8 @@ define <2 x i64> @sqdmull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: sqdmull_lane_2d:
;CHECK-NOT: dup
;CHECK: sqdmull.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
%tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
ret <2 x i64> %tmp4
@@ -875,8 +875,8 @@ define <4 x i32> @sqdmull2_lane_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: sqdmull2_lane_4s:
;CHECK-NOT: dup
;CHECK: sqdmull2.4s
- %load1 = load <8 x i16>* %A
- %load2 = load <8 x i16>* %B
+ %load1 = load <8 x i16>, <8 x i16>* %A
+ %load2 = load <8 x i16>, <8 x i16>* %B
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -887,8 +887,8 @@ define <2 x i64> @sqdmull2_lane_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: sqdmull2_lane_2d:
;CHECK-NOT: dup
;CHECK: sqdmull2.2d
- %load1 = load <4 x i32>* %A
- %load2 = load <4 x i32>* %B
+ %load1 = load <4 x i32>, <4 x i32>* %A
+ %load2 = load <4 x i32>, <4 x i32>* %B
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
%tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -899,8 +899,8 @@ define <4 x i32> @umull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: umull_lane_4s:
;CHECK-NOT: dup
;CHECK: umull.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
ret <4 x i32> %tmp4
@@ -910,8 +910,8 @@ define <2 x i64> @umull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: umull_lane_2d:
;CHECK-NOT: dup
;CHECK: umull.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
%tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
ret <2 x i64> %tmp4
@@ -921,8 +921,8 @@ define <4 x i32> @smull_lane_4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: smull_lane_4s:
;CHECK-NOT: dup
;CHECK: smull.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
ret <4 x i32> %tmp4
@@ -932,8 +932,8 @@ define <2 x i64> @smull_lane_2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: smull_lane_2d:
;CHECK-NOT: dup
;CHECK: smull.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
%tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
ret <2 x i64> %tmp4
@@ -943,9 +943,9 @@ define <4 x i32> @smlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nou
;CHECK-LABEL: smlal_lane_4s:
;CHECK-NOT: dup
;CHECK: smlal.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i32>* %C
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
%tmp6 = add <4 x i32> %tmp3, %tmp5
@@ -956,9 +956,9 @@ define <2 x i64> @smlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nou
;CHECK-LABEL: smlal_lane_2d:
;CHECK-NOT: dup
;CHECK: smlal.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i64>* %C
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
%tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
%tmp6 = add <2 x i64> %tmp3, %tmp5
@@ -969,9 +969,9 @@ define <4 x i32> @sqdmlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) n
;CHECK-LABEL: sqdmlal_lane_4s:
;CHECK-NOT: dup
;CHECK: sqdmlal.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i32>* %C
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
%tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
@@ -982,9 +982,9 @@ define <2 x i64> @sqdmlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) n
;CHECK-LABEL: sqdmlal_lane_2d:
;CHECK-NOT: dup
;CHECK: sqdmlal.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i64>* %C
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
%tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
%tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
@@ -995,9 +995,9 @@ define <4 x i32> @sqdmlal2_lane_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C)
;CHECK-LABEL: sqdmlal2_lane_4s:
;CHECK-NOT: dup
;CHECK: sqdmlal2.4s
- %load1 = load <8 x i16>* %A
- %load2 = load <8 x i16>* %B
- %tmp3 = load <4 x i32>* %C
+ %load1 = load <8 x i16>, <8 x i16>* %A
+ %load2 = load <8 x i16>, <8 x i16>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -1009,9 +1009,9 @@ define <2 x i64> @sqdmlal2_lane_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C)
;CHECK-LABEL: sqdmlal2_lane_2d:
;CHECK-NOT: dup
;CHECK: sqdmlal2.2d
- %load1 = load <4 x i32>* %A
- %load2 = load <4 x i32>* %B
- %tmp3 = load <2 x i64>* %C
+ %load1 = load <4 x i32>, <4 x i32>* %A
+ %load2 = load <4 x i32>, <4 x i32>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
%tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -1069,9 +1069,9 @@ define <4 x i32> @umlal_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nou
;CHECK-LABEL: umlal_lane_4s:
;CHECK-NOT: dup
;CHECK: umlal.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i32>* %C
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
%tmp6 = add <4 x i32> %tmp3, %tmp5
@@ -1082,9 +1082,9 @@ define <2 x i64> @umlal_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nou
;CHECK-LABEL: umlal_lane_2d:
;CHECK-NOT: dup
;CHECK: umlal.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i64>* %C
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
%tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
%tmp6 = add <2 x i64> %tmp3, %tmp5
@@ -1096,9 +1096,9 @@ define <4 x i32> @smlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nou
;CHECK-LABEL: smlsl_lane_4s:
;CHECK-NOT: dup
;CHECK: smlsl.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i32>* %C
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
%tmp6 = sub <4 x i32> %tmp3, %tmp5
@@ -1109,9 +1109,9 @@ define <2 x i64> @smlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nou
;CHECK-LABEL: smlsl_lane_2d:
;CHECK-NOT: dup
;CHECK: smlsl.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i64>* %C
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
%tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
%tmp6 = sub <2 x i64> %tmp3, %tmp5
@@ -1122,9 +1122,9 @@ define <4 x i32> @sqdmlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) n
;CHECK-LABEL: sqdmlsl_lane_4s:
;CHECK-NOT: dup
;CHECK: sqdmlsl.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i32>* %C
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
%tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
@@ -1135,9 +1135,9 @@ define <2 x i64> @sqdmlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) n
;CHECK-LABEL: sqdmlsl_lane_2d:
;CHECK-NOT: dup
;CHECK: sqdmlsl.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i64>* %C
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
%tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
%tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
@@ -1148,9 +1148,9 @@ define <4 x i32> @sqdmlsl2_lane_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C)
;CHECK-LABEL: sqdmlsl2_lane_4s:
;CHECK-NOT: dup
;CHECK: sqdmlsl2.4s
- %load1 = load <8 x i16>* %A
- %load2 = load <8 x i16>* %B
- %tmp3 = load <4 x i32>* %C
+ %load1 = load <8 x i16>, <8 x i16>* %A
+ %load2 = load <8 x i16>, <8 x i16>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -1162,9 +1162,9 @@ define <2 x i64> @sqdmlsl2_lane_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C)
;CHECK-LABEL: sqdmlsl2_lane_2d:
;CHECK-NOT: dup
;CHECK: sqdmlsl2.2d
- %load1 = load <4 x i32>* %A
- %load2 = load <4 x i32>* %B
- %tmp3 = load <2 x i64>* %C
+ %load1 = load <4 x i32>, <4 x i32>* %A
+ %load2 = load <4 x i32>, <4 x i32>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
%tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -1176,9 +1176,9 @@ define <4 x i32> @umlsl_lane_4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nou
;CHECK-LABEL: umlsl_lane_4s:
;CHECK-NOT: dup
;CHECK: umlsl.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i32>* %C
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
%tmp6 = sub <4 x i32> %tmp3, %tmp5
@@ -1189,9 +1189,9 @@ define <2 x i64> @umlsl_lane_2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nou
;CHECK-LABEL: umlsl_lane_2d:
;CHECK-NOT: dup
;CHECK: umlsl.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i64>* %C
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
%tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
%tmp6 = sub <2 x i64> %tmp3, %tmp5
diff --git a/test/CodeGen/AArch64/arm64-volatile.ll b/test/CodeGen/AArch64/arm64-volatile.ll
index e00ac5acb5fb..28facb6da7c6 100644
--- a/test/CodeGen/AArch64/arm64-volatile.ll
+++ b/test/CodeGen/AArch64/arm64-volatile.ll
@@ -4,10 +4,10 @@ define i64 @normal_load(i64* nocapture %bar) nounwind readonly {
; CHECK: ldp
; CHECK-NEXT: add
; CHECK-NEXT: ret
- %add.ptr = getelementptr inbounds i64* %bar, i64 1
- %tmp = load i64* %add.ptr, align 8
- %add.ptr1 = getelementptr inbounds i64* %bar, i64 2
- %tmp1 = load i64* %add.ptr1, align 8
+ %add.ptr = getelementptr inbounds i64, i64* %bar, i64 1
+ %tmp = load i64, i64* %add.ptr, align 8
+ %add.ptr1 = getelementptr inbounds i64, i64* %bar, i64 2
+ %tmp1 = load i64, i64* %add.ptr1, align 8
%add = add nsw i64 %tmp1, %tmp
ret i64 %add
}
@@ -18,10 +18,10 @@ define i64 @volatile_load(i64* nocapture %bar) nounwind {
; CHECK-NEXT: ldr
; CHECK-NEXT: add
; CHECK-NEXT: ret
- %add.ptr = getelementptr inbounds i64* %bar, i64 1
- %tmp = load volatile i64* %add.ptr, align 8
- %add.ptr1 = getelementptr inbounds i64* %bar, i64 2
- %tmp1 = load volatile i64* %add.ptr1, align 8
+ %add.ptr = getelementptr inbounds i64, i64* %bar, i64 1
+ %tmp = load volatile i64, i64* %add.ptr, align 8
+ %add.ptr1 = getelementptr inbounds i64, i64* %bar, i64 2
+ %tmp1 = load volatile i64, i64* %add.ptr1, align 8
%add = add nsw i64 %tmp1, %tmp
ret i64 %add
}
diff --git a/test/CodeGen/AArch64/arm64-vqadd.ll b/test/CodeGen/AArch64/arm64-vqadd.ll
index 20f7e2c7a893..9932899c6424 100644
--- a/test/CodeGen/AArch64/arm64-vqadd.ll
+++ b/test/CodeGen/AArch64/arm64-vqadd.ll
@@ -3,8 +3,8 @@
define <8 x i8> @sqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: sqadd8b:
;CHECK: sqadd.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @sqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @sqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: sqadd4h:
;CHECK: sqadd.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @sqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @sqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: sqadd2s:
;CHECK: sqadd.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -30,8 +30,8 @@ define <2 x i32> @sqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i8> @uqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: uqadd8b:
;CHECK: uqadd.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -39,8 +39,8 @@ define <8 x i8> @uqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @uqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: uqadd4h:
;CHECK: uqadd.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -48,8 +48,8 @@ define <4 x i16> @uqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @uqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: uqadd2s:
;CHECK: uqadd.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -57,8 +57,8 @@ define <2 x i32> @uqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <16 x i8> @sqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: sqadd16b:
;CHECK: sqadd.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -66,8 +66,8 @@ define <16 x i8> @sqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @sqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: sqadd8h:
;CHECK: sqadd.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -75,8 +75,8 @@ define <8 x i16> @sqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @sqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: sqadd4s:
;CHECK: sqadd.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -84,8 +84,8 @@ define <4 x i32> @sqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @sqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: sqadd2d:
;CHECK: sqadd.2d
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
@@ -93,8 +93,8 @@ define <2 x i64> @sqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <16 x i8> @uqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: uqadd16b:
;CHECK: uqadd.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -102,8 +102,8 @@ define <16 x i8> @uqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @uqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: uqadd8h:
;CHECK: uqadd.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -111,8 +111,8 @@ define <8 x i16> @uqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @uqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: uqadd4s:
;CHECK: uqadd.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -120,8 +120,8 @@ define <4 x i32> @uqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @uqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: uqadd2d:
;CHECK: uqadd.2d
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
@@ -149,8 +149,8 @@ declare <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64>, <2 x i64>) nounwind
define <8 x i8> @usqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: usqadd8b:
;CHECK: usqadd.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -158,8 +158,8 @@ define <8 x i8> @usqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @usqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: usqadd4h:
;CHECK: usqadd.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -167,8 +167,8 @@ define <4 x i16> @usqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @usqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: usqadd2s:
;CHECK: usqadd.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -176,8 +176,8 @@ define <2 x i32> @usqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <16 x i8> @usqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: usqadd16b:
;CHECK: usqadd.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -185,8 +185,8 @@ define <16 x i8> @usqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @usqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: usqadd8h:
;CHECK: usqadd.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -194,8 +194,8 @@ define <8 x i16> @usqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @usqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: usqadd4s:
;CHECK: usqadd.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -203,8 +203,8 @@ define <4 x i32> @usqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @usqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: usqadd2d:
;CHECK: usqadd.2d
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
@@ -238,8 +238,8 @@ declare <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64>, <2 x i64>) nounwind
define <8 x i8> @suqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: suqadd8b:
;CHECK: suqadd.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -247,8 +247,8 @@ define <8 x i8> @suqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @suqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: suqadd4h:
;CHECK: suqadd.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -256,8 +256,8 @@ define <4 x i16> @suqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @suqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: suqadd2s:
;CHECK: suqadd.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -265,8 +265,8 @@ define <2 x i32> @suqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <16 x i8> @suqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: suqadd16b:
;CHECK: suqadd.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -274,8 +274,8 @@ define <16 x i8> @suqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @suqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: suqadd8h:
;CHECK: suqadd.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -283,8 +283,8 @@ define <8 x i16> @suqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @suqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: suqadd4s:
;CHECK: suqadd.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -292,8 +292,8 @@ define <4 x i32> @suqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @suqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: suqadd2d:
;CHECK: suqadd.2d
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
diff --git a/test/CodeGen/AArch64/arm64-vqsub.ll b/test/CodeGen/AArch64/arm64-vqsub.ll
index dde3ac3478e4..4fc588d689f9 100644
--- a/test/CodeGen/AArch64/arm64-vqsub.ll
+++ b/test/CodeGen/AArch64/arm64-vqsub.ll
@@ -3,8 +3,8 @@
define <8 x i8> @sqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: sqsub8b:
;CHECK: sqsub.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @sqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @sqsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: sqsub4h:
;CHECK: sqsub.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @sqsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @sqsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: sqsub2s:
;CHECK: sqsub.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -30,8 +30,8 @@ define <2 x i32> @sqsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i8> @uqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: uqsub8b:
;CHECK: uqsub.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -39,8 +39,8 @@ define <8 x i8> @uqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @uqsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: uqsub4h:
;CHECK: uqsub.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -48,8 +48,8 @@ define <4 x i16> @uqsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @uqsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: uqsub2s:
;CHECK: uqsub.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -57,8 +57,8 @@ define <2 x i32> @uqsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <16 x i8> @sqsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: sqsub16b:
;CHECK: sqsub.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -66,8 +66,8 @@ define <16 x i8> @sqsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @sqsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: sqsub8h:
;CHECK: sqsub.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -75,8 +75,8 @@ define <8 x i16> @sqsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @sqsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: sqsub4s:
;CHECK: sqsub.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -84,8 +84,8 @@ define <4 x i32> @sqsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @sqsub2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: sqsub2d:
;CHECK: sqsub.2d
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
@@ -93,8 +93,8 @@ define <2 x i64> @sqsub2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <16 x i8> @uqsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: uqsub16b:
;CHECK: uqsub.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -102,8 +102,8 @@ define <16 x i8> @uqsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @uqsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: uqsub8h:
;CHECK: uqsub.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -111,8 +111,8 @@ define <8 x i16> @uqsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @uqsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: uqsub4s:
;CHECK: uqsub.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -120,8 +120,8 @@ define <4 x i32> @uqsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @uqsub2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: uqsub2d:
;CHECK: uqsub.2d
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
diff --git a/test/CodeGen/AArch64/arm64-vshift.ll b/test/CodeGen/AArch64/arm64-vshift.ll
index 65bd50cbe9d0..d5a12483db40 100644
--- a/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/test/CodeGen/AArch64/arm64-vshift.ll
@@ -3,8 +3,8 @@
define <8 x i8> @sqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: sqshl8b:
;CHECK: sqshl.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @sqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @sqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: sqshl4h:
;CHECK: sqshl.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @sqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @sqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: sqshl2s:
;CHECK: sqshl.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -30,8 +30,8 @@ define <2 x i32> @sqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i8> @uqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: uqshl8b:
;CHECK: uqshl.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -39,8 +39,8 @@ define <8 x i8> @uqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @uqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: uqshl4h:
;CHECK: uqshl.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -48,8 +48,8 @@ define <4 x i16> @uqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @uqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: uqshl2s:
;CHECK: uqshl.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -57,8 +57,8 @@ define <2 x i32> @uqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <16 x i8> @sqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: sqshl16b:
;CHECK: sqshl.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -66,8 +66,8 @@ define <16 x i8> @sqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @sqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: sqshl8h:
;CHECK: sqshl.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -75,8 +75,8 @@ define <8 x i16> @sqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @sqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: sqshl4s:
;CHECK: sqshl.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -84,8 +84,8 @@ define <4 x i32> @sqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @sqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: sqshl2d:
;CHECK: sqshl.2d
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
@@ -93,8 +93,8 @@ define <2 x i64> @sqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <16 x i8> @uqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: uqshl16b:
;CHECK: uqshl.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -102,8 +102,8 @@ define <16 x i8> @uqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @uqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: uqshl8h:
;CHECK: uqshl.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -111,8 +111,8 @@ define <8 x i16> @uqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @uqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: uqshl4s:
;CHECK: uqshl.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -120,8 +120,8 @@ define <4 x i32> @uqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @uqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: uqshl2d:
;CHECK: uqshl.2d
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
@@ -149,8 +149,8 @@ declare <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind
define <8 x i8> @srshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: srshl8b:
;CHECK: srshl.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -158,8 +158,8 @@ define <8 x i8> @srshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @srshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: srshl4h:
;CHECK: srshl.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -167,8 +167,8 @@ define <4 x i16> @srshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @srshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: srshl2s:
;CHECK: srshl.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -176,8 +176,8 @@ define <2 x i32> @srshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i8> @urshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: urshl8b:
;CHECK: urshl.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -185,8 +185,8 @@ define <8 x i8> @urshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @urshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: urshl4h:
;CHECK: urshl.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -194,8 +194,8 @@ define <4 x i16> @urshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @urshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: urshl2s:
;CHECK: urshl.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -203,8 +203,8 @@ define <2 x i32> @urshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <16 x i8> @srshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: srshl16b:
;CHECK: srshl.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -212,8 +212,8 @@ define <16 x i8> @srshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @srshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: srshl8h:
;CHECK: srshl.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -221,8 +221,8 @@ define <8 x i16> @srshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @srshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: srshl4s:
;CHECK: srshl.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -230,8 +230,8 @@ define <4 x i32> @srshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @srshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: srshl2d:
;CHECK: srshl.2d
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
@@ -239,8 +239,8 @@ define <2 x i64> @srshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <16 x i8> @urshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: urshl16b:
;CHECK: urshl.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -248,8 +248,8 @@ define <16 x i8> @urshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @urshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: urshl8h:
;CHECK: urshl.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -257,8 +257,8 @@ define <8 x i16> @urshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @urshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: urshl4s:
;CHECK: urshl.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -266,8 +266,8 @@ define <4 x i32> @urshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @urshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: urshl2d:
;CHECK: urshl.2d
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
@@ -295,8 +295,8 @@ declare <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind
define <8 x i8> @sqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: sqrshl8b:
;CHECK: sqrshl.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -304,8 +304,8 @@ define <8 x i8> @sqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @sqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: sqrshl4h:
;CHECK: sqrshl.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -313,8 +313,8 @@ define <4 x i16> @sqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @sqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: sqrshl2s:
;CHECK: sqrshl.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -322,8 +322,8 @@ define <2 x i32> @sqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i8> @uqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: uqrshl8b:
;CHECK: uqrshl.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -331,8 +331,8 @@ define <8 x i8> @uqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @uqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: uqrshl4h:
;CHECK: uqrshl.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -340,8 +340,8 @@ define <4 x i16> @uqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @uqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: uqrshl2s:
;CHECK: uqrshl.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -349,8 +349,8 @@ define <2 x i32> @uqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <16 x i8> @sqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: sqrshl16b:
;CHECK: sqrshl.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -358,8 +358,8 @@ define <16 x i8> @sqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @sqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: sqrshl8h:
;CHECK: sqrshl.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -367,8 +367,8 @@ define <8 x i16> @sqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @sqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: sqrshl4s:
;CHECK: sqrshl.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -376,8 +376,8 @@ define <4 x i32> @sqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @sqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: sqrshl2d:
;CHECK: sqrshl.2d
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
@@ -385,8 +385,8 @@ define <2 x i64> @sqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <16 x i8> @uqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: uqrshl16b:
;CHECK: uqrshl.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -394,8 +394,8 @@ define <16 x i8> @uqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @uqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: uqrshl8h:
;CHECK: uqrshl.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -403,8 +403,8 @@ define <8 x i16> @uqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @uqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: uqrshl4s:
;CHECK: uqrshl.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -412,8 +412,8 @@ define <4 x i32> @uqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @uqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: uqrshl2d:
;CHECK: uqrshl.2d
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
@@ -441,7 +441,7 @@ declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind
define <8 x i8> @urshr8b(<8 x i8>* %A) nounwind {
;CHECK-LABEL: urshr8b:
;CHECK: urshr.8b
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
ret <8 x i8> %tmp3
}
@@ -449,7 +449,7 @@ define <8 x i8> @urshr8b(<8 x i8>* %A) nounwind {
define <4 x i16> @urshr4h(<4 x i16>* %A) nounwind {
;CHECK-LABEL: urshr4h:
;CHECK: urshr.4h
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
ret <4 x i16> %tmp3
}
@@ -457,7 +457,7 @@ define <4 x i16> @urshr4h(<4 x i16>* %A) nounwind {
define <2 x i32> @urshr2s(<2 x i32>* %A) nounwind {
;CHECK-LABEL: urshr2s:
;CHECK: urshr.2s
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
ret <2 x i32> %tmp3
}
@@ -465,7 +465,7 @@ define <2 x i32> @urshr2s(<2 x i32>* %A) nounwind {
define <16 x i8> @urshr16b(<16 x i8>* %A) nounwind {
;CHECK-LABEL: urshr16b:
;CHECK: urshr.16b
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
ret <16 x i8> %tmp3
}
@@ -473,7 +473,7 @@ define <16 x i8> @urshr16b(<16 x i8>* %A) nounwind {
define <8 x i16> @urshr8h(<8 x i16>* %A) nounwind {
;CHECK-LABEL: urshr8h:
;CHECK: urshr.8h
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
ret <8 x i16> %tmp3
}
@@ -481,7 +481,7 @@ define <8 x i16> @urshr8h(<8 x i16>* %A) nounwind {
define <4 x i32> @urshr4s(<4 x i32>* %A) nounwind {
;CHECK-LABEL: urshr4s:
;CHECK: urshr.4s
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
ret <4 x i32> %tmp3
}
@@ -489,7 +489,7 @@ define <4 x i32> @urshr4s(<4 x i32>* %A) nounwind {
define <2 x i64> @urshr2d(<2 x i64>* %A) nounwind {
;CHECK-LABEL: urshr2d:
;CHECK: urshr.2d
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
ret <2 x i64> %tmp3
}
@@ -497,7 +497,7 @@ define <2 x i64> @urshr2d(<2 x i64>* %A) nounwind {
define <8 x i8> @srshr8b(<8 x i8>* %A) nounwind {
;CHECK-LABEL: srshr8b:
;CHECK: srshr.8b
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
ret <8 x i8> %tmp3
}
@@ -505,7 +505,7 @@ define <8 x i8> @srshr8b(<8 x i8>* %A) nounwind {
define <4 x i16> @srshr4h(<4 x i16>* %A) nounwind {
;CHECK-LABEL: srshr4h:
;CHECK: srshr.4h
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
ret <4 x i16> %tmp3
}
@@ -513,7 +513,7 @@ define <4 x i16> @srshr4h(<4 x i16>* %A) nounwind {
define <2 x i32> @srshr2s(<2 x i32>* %A) nounwind {
;CHECK-LABEL: srshr2s:
;CHECK: srshr.2s
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
ret <2 x i32> %tmp3
}
@@ -521,7 +521,7 @@ define <2 x i32> @srshr2s(<2 x i32>* %A) nounwind {
define <16 x i8> @srshr16b(<16 x i8>* %A) nounwind {
;CHECK-LABEL: srshr16b:
;CHECK: srshr.16b
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
ret <16 x i8> %tmp3
}
@@ -529,7 +529,7 @@ define <16 x i8> @srshr16b(<16 x i8>* %A) nounwind {
define <8 x i16> @srshr8h(<8 x i16>* %A) nounwind {
;CHECK-LABEL: srshr8h:
;CHECK: srshr.8h
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
ret <8 x i16> %tmp3
}
@@ -537,7 +537,7 @@ define <8 x i16> @srshr8h(<8 x i16>* %A) nounwind {
define <4 x i32> @srshr4s(<4 x i32>* %A) nounwind {
;CHECK-LABEL: srshr4s:
;CHECK: srshr.4s
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
ret <4 x i32> %tmp3
}
@@ -545,7 +545,7 @@ define <4 x i32> @srshr4s(<4 x i32>* %A) nounwind {
define <2 x i64> @srshr2d(<2 x i64>* %A) nounwind {
;CHECK-LABEL: srshr2d:
;CHECK: srshr.2d
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
ret <2 x i64> %tmp3
}
@@ -553,7 +553,7 @@ define <2 x i64> @srshr2d(<2 x i64>* %A) nounwind {
define <8 x i8> @sqshlu8b(<8 x i8>* %A) nounwind {
;CHECK-LABEL: sqshlu8b:
;CHECK: sqshlu.8b v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <8 x i8> %tmp3
}
@@ -561,7 +561,7 @@ define <8 x i8> @sqshlu8b(<8 x i8>* %A) nounwind {
define <4 x i16> @sqshlu4h(<4 x i16>* %A) nounwind {
;CHECK-LABEL: sqshlu4h:
;CHECK: sqshlu.4h v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
ret <4 x i16> %tmp3
}
@@ -569,7 +569,7 @@ define <4 x i16> @sqshlu4h(<4 x i16>* %A) nounwind {
define <2 x i32> @sqshlu2s(<2 x i32>* %A) nounwind {
;CHECK-LABEL: sqshlu2s:
;CHECK: sqshlu.2s v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
ret <2 x i32> %tmp3
}
@@ -577,7 +577,7 @@ define <2 x i32> @sqshlu2s(<2 x i32>* %A) nounwind {
define <16 x i8> @sqshlu16b(<16 x i8>* %A) nounwind {
;CHECK-LABEL: sqshlu16b:
;CHECK: sqshlu.16b v0, {{v[0-9]+}}, #1
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <16 x i8> %tmp3
}
@@ -585,7 +585,7 @@ define <16 x i8> @sqshlu16b(<16 x i8>* %A) nounwind {
define <8 x i16> @sqshlu8h(<8 x i16>* %A) nounwind {
;CHECK-LABEL: sqshlu8h:
;CHECK: sqshlu.8h v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %tmp3
}
@@ -593,7 +593,7 @@ define <8 x i16> @sqshlu8h(<8 x i16>* %A) nounwind {
define <4 x i32> @sqshlu4s(<4 x i32>* %A) nounwind {
;CHECK-LABEL: sqshlu4s:
;CHECK: sqshlu.4s v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
@@ -601,7 +601,7 @@ define <4 x i32> @sqshlu4s(<4 x i32>* %A) nounwind {
define <2 x i64> @sqshlu2d(<2 x i64>* %A) nounwind {
;CHECK-LABEL: sqshlu2d:
;CHECK: sqshlu.2d v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
ret <2 x i64> %tmp3
}
@@ -619,7 +619,7 @@ declare <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind
define <8 x i8> @rshrn8b(<8 x i16>* %A) nounwind {
;CHECK-LABEL: rshrn8b:
;CHECK: rshrn.8b v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
@@ -627,7 +627,7 @@ define <8 x i8> @rshrn8b(<8 x i16>* %A) nounwind {
define <4 x i16> @rshrn4h(<4 x i32>* %A) nounwind {
;CHECK-LABEL: rshrn4h:
;CHECK: rshrn.4h v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
@@ -635,7 +635,7 @@ define <4 x i16> @rshrn4h(<4 x i32>* %A) nounwind {
define <2 x i32> @rshrn2s(<2 x i64>* %A) nounwind {
;CHECK-LABEL: rshrn2s:
;CHECK: rshrn.2s v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
@@ -643,8 +643,8 @@ define <2 x i32> @rshrn2s(<2 x i64>* %A) nounwind {
define <16 x i8> @rshrn16b(<8 x i8> *%ret, <8 x i16>* %A) nounwind {
;CHECK-LABEL: rshrn16b:
;CHECK: rshrn2.16b v0, {{v[0-9]+}}, #1
- %out = load <8 x i8>* %ret
- %tmp1 = load <8 x i16>* %A
+ %out = load <8 x i8>, <8 x i8>* %ret
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
@@ -653,8 +653,8 @@ define <16 x i8> @rshrn16b(<8 x i8> *%ret, <8 x i16>* %A) nounwind {
define <8 x i16> @rshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
;CHECK-LABEL: rshrn8h:
;CHECK: rshrn2.8h v0, {{v[0-9]+}}, #1
- %out = load <4 x i16>* %ret
- %tmp1 = load <4 x i32>* %A
+ %out = load <4 x i16>, <4 x i16>* %ret
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
@@ -663,8 +663,8 @@ define <8 x i16> @rshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
define <4 x i32> @rshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
;CHECK-LABEL: rshrn4s:
;CHECK: rshrn2.4s v0, {{v[0-9]+}}, #1
- %out = load <2 x i32>* %ret
- %tmp1 = load <2 x i64>* %A
+ %out = load <2 x i32>, <2 x i32>* %ret
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
@@ -677,7 +677,7 @@ declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readno
define <8 x i8> @shrn8b(<8 x i16>* %A) nounwind {
;CHECK-LABEL: shrn8b:
;CHECK: shrn.8b v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
ret <8 x i8> %tmp3
@@ -686,7 +686,7 @@ define <8 x i8> @shrn8b(<8 x i16>* %A) nounwind {
define <4 x i16> @shrn4h(<4 x i32>* %A) nounwind {
;CHECK-LABEL: shrn4h:
;CHECK: shrn.4h v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
%tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
ret <4 x i16> %tmp3
@@ -695,7 +695,7 @@ define <4 x i16> @shrn4h(<4 x i32>* %A) nounwind {
define <2 x i32> @shrn2s(<2 x i64>* %A) nounwind {
;CHECK-LABEL: shrn2s:
;CHECK: shrn.2s v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
%tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
ret <2 x i32> %tmp3
@@ -704,8 +704,8 @@ define <2 x i32> @shrn2s(<2 x i64>* %A) nounwind {
define <16 x i8> @shrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
;CHECK-LABEL: shrn16b:
;CHECK: shrn2.16b v0, {{v[0-9]+}}, #1
- %out = load <8 x i8>* %ret
- %tmp1 = load <8 x i16>* %A
+ %out = load <8 x i8>, <8 x i8>* %ret
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -715,8 +715,8 @@ define <16 x i8> @shrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
define <8 x i16> @shrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
;CHECK-LABEL: shrn8h:
;CHECK: shrn2.8h v0, {{v[0-9]+}}, #1
- %out = load <4 x i16>* %ret
- %tmp1 = load <4 x i32>* %A
+ %out = load <4 x i16>, <4 x i16>* %ret
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
%tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -726,8 +726,8 @@ define <8 x i16> @shrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
define <4 x i32> @shrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
;CHECK-LABEL: shrn4s:
;CHECK: shrn2.4s v0, {{v[0-9]+}}, #1
- %out = load <2 x i32>* %ret
- %tmp1 = load <2 x i64>* %A
+ %out = load <2 x i32>, <2 x i32>* %ret
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
%tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -748,7 +748,7 @@ define i32 @sqshrn1s(i64 %A) nounwind {
define <8 x i8> @sqshrn8b(<8 x i16>* %A) nounwind {
;CHECK-LABEL: sqshrn8b:
;CHECK: sqshrn.8b v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
@@ -756,7 +756,7 @@ define <8 x i8> @sqshrn8b(<8 x i16>* %A) nounwind {
define <4 x i16> @sqshrn4h(<4 x i32>* %A) nounwind {
;CHECK-LABEL: sqshrn4h:
;CHECK: sqshrn.4h v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
@@ -764,7 +764,7 @@ define <4 x i16> @sqshrn4h(<4 x i32>* %A) nounwind {
define <2 x i32> @sqshrn2s(<2 x i64>* %A) nounwind {
;CHECK-LABEL: sqshrn2s:
;CHECK: sqshrn.2s v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
@@ -773,8 +773,8 @@ define <2 x i32> @sqshrn2s(<2 x i64>* %A) nounwind {
define <16 x i8> @sqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
;CHECK-LABEL: sqshrn16b:
;CHECK: sqshrn2.16b v0, {{v[0-9]+}}, #1
- %out = load <8 x i8>* %ret
- %tmp1 = load <8 x i16>* %A
+ %out = load <8 x i8>, <8 x i8>* %ret
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
@@ -783,8 +783,8 @@ define <16 x i8> @sqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
define <8 x i16> @sqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
;CHECK-LABEL: sqshrn8h:
;CHECK: sqshrn2.8h v0, {{v[0-9]+}}, #1
- %out = load <4 x i16>* %ret
- %tmp1 = load <4 x i32>* %A
+ %out = load <4 x i16>, <4 x i16>* %ret
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
@@ -793,8 +793,8 @@ define <8 x i16> @sqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
define <4 x i32> @sqshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
;CHECK-LABEL: sqshrn4s:
;CHECK: sqshrn2.4s v0, {{v[0-9]+}}, #1
- %out = load <2 x i32>* %ret
- %tmp1 = load <2 x i64>* %A
+ %out = load <2 x i32>, <2 x i32>* %ret
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
@@ -815,7 +815,7 @@ define i32 @sqshrun1s(i64 %A) nounwind {
define <8 x i8> @sqshrun8b(<8 x i16>* %A) nounwind {
;CHECK-LABEL: sqshrun8b:
;CHECK: sqshrun.8b v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
@@ -823,7 +823,7 @@ define <8 x i8> @sqshrun8b(<8 x i16>* %A) nounwind {
define <4 x i16> @sqshrun4h(<4 x i32>* %A) nounwind {
;CHECK-LABEL: sqshrun4h:
;CHECK: sqshrun.4h v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
@@ -831,7 +831,7 @@ define <4 x i16> @sqshrun4h(<4 x i32>* %A) nounwind {
define <2 x i32> @sqshrun2s(<2 x i64>* %A) nounwind {
;CHECK-LABEL: sqshrun2s:
;CHECK: sqshrun.2s v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
@@ -839,8 +839,8 @@ define <2 x i32> @sqshrun2s(<2 x i64>* %A) nounwind {
define <16 x i8> @sqshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
;CHECK-LABEL: sqshrun16b:
;CHECK: sqshrun2.16b v0, {{v[0-9]+}}, #1
- %out = load <8 x i8>* %ret
- %tmp1 = load <8 x i16>* %A
+ %out = load <8 x i8>, <8 x i8>* %ret
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
@@ -849,8 +849,8 @@ define <16 x i8> @sqshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
define <8 x i16> @sqshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
;CHECK-LABEL: sqshrun8h:
;CHECK: sqshrun2.8h v0, {{v[0-9]+}}, #1
- %out = load <4 x i16>* %ret
- %tmp1 = load <4 x i32>* %A
+ %out = load <4 x i16>, <4 x i16>* %ret
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
@@ -859,8 +859,8 @@ define <8 x i16> @sqshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
define <4 x i32> @sqshrun4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
;CHECK-LABEL: sqshrun4s:
;CHECK: sqshrun2.4s v0, {{v[0-9]+}}, #1
- %out = load <2 x i32>* %ret
- %tmp1 = load <2 x i64>* %A
+ %out = load <2 x i32>, <2 x i32>* %ret
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
@@ -881,7 +881,7 @@ define i32 @sqrshrn1s(i64 %A) nounwind {
define <8 x i8> @sqrshrn8b(<8 x i16>* %A) nounwind {
;CHECK-LABEL: sqrshrn8b:
;CHECK: sqrshrn.8b v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
@@ -889,7 +889,7 @@ define <8 x i8> @sqrshrn8b(<8 x i16>* %A) nounwind {
define <4 x i16> @sqrshrn4h(<4 x i32>* %A) nounwind {
;CHECK-LABEL: sqrshrn4h:
;CHECK: sqrshrn.4h v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
@@ -897,7 +897,7 @@ define <4 x i16> @sqrshrn4h(<4 x i32>* %A) nounwind {
define <2 x i32> @sqrshrn2s(<2 x i64>* %A) nounwind {
;CHECK-LABEL: sqrshrn2s:
;CHECK: sqrshrn.2s v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
@@ -905,8 +905,8 @@ define <2 x i32> @sqrshrn2s(<2 x i64>* %A) nounwind {
define <16 x i8> @sqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
;CHECK-LABEL: sqrshrn16b:
;CHECK: sqrshrn2.16b v0, {{v[0-9]+}}, #1
- %out = load <8 x i8>* %ret
- %tmp1 = load <8 x i16>* %A
+ %out = load <8 x i8>, <8 x i8>* %ret
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
@@ -915,8 +915,8 @@ define <16 x i8> @sqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
define <8 x i16> @sqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
;CHECK-LABEL: sqrshrn8h:
;CHECK: sqrshrn2.8h v0, {{v[0-9]+}}, #1
- %out = load <4 x i16>* %ret
- %tmp1 = load <4 x i32>* %A
+ %out = load <4 x i16>, <4 x i16>* %ret
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
@@ -925,8 +925,8 @@ define <8 x i16> @sqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
define <4 x i32> @sqrshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
;CHECK-LABEL: sqrshrn4s:
;CHECK: sqrshrn2.4s v0, {{v[0-9]+}}, #1
- %out = load <2 x i32>* %ret
- %tmp1 = load <2 x i64>* %A
+ %out = load <2 x i32>, <2 x i32>* %ret
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
@@ -947,7 +947,7 @@ define i32 @sqrshrun1s(i64 %A) nounwind {
define <8 x i8> @sqrshrun8b(<8 x i16>* %A) nounwind {
;CHECK-LABEL: sqrshrun8b:
;CHECK: sqrshrun.8b v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
@@ -955,7 +955,7 @@ define <8 x i8> @sqrshrun8b(<8 x i16>* %A) nounwind {
define <4 x i16> @sqrshrun4h(<4 x i32>* %A) nounwind {
;CHECK-LABEL: sqrshrun4h:
;CHECK: sqrshrun.4h v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
@@ -963,7 +963,7 @@ define <4 x i16> @sqrshrun4h(<4 x i32>* %A) nounwind {
define <2 x i32> @sqrshrun2s(<2 x i64>* %A) nounwind {
;CHECK-LABEL: sqrshrun2s:
;CHECK: sqrshrun.2s v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
@@ -971,8 +971,8 @@ define <2 x i32> @sqrshrun2s(<2 x i64>* %A) nounwind {
define <16 x i8> @sqrshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
;CHECK-LABEL: sqrshrun16b:
;CHECK: sqrshrun2.16b v0, {{v[0-9]+}}, #1
- %out = load <8 x i8>* %ret
- %tmp1 = load <8 x i16>* %A
+ %out = load <8 x i8>, <8 x i8>* %ret
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
@@ -981,8 +981,8 @@ define <16 x i8> @sqrshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
define <8 x i16> @sqrshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
;CHECK-LABEL: sqrshrun8h:
;CHECK: sqrshrun2.8h v0, {{v[0-9]+}}, #1
- %out = load <4 x i16>* %ret
- %tmp1 = load <4 x i32>* %A
+ %out = load <4 x i16>, <4 x i16>* %ret
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
@@ -991,8 +991,8 @@ define <8 x i16> @sqrshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
define <4 x i32> @sqrshrun4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
;CHECK-LABEL: sqrshrun4s:
;CHECK: sqrshrun2.4s v0, {{v[0-9]+}}, #1
- %out = load <2 x i32>* %ret
- %tmp1 = load <2 x i64>* %A
+ %out = load <2 x i32>, <2 x i32>* %ret
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
@@ -1013,7 +1013,7 @@ define i32 @uqrshrn1s(i64 %A) nounwind {
define <8 x i8> @uqrshrn8b(<8 x i16>* %A) nounwind {
;CHECK-LABEL: uqrshrn8b:
;CHECK: uqrshrn.8b v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
@@ -1021,7 +1021,7 @@ define <8 x i8> @uqrshrn8b(<8 x i16>* %A) nounwind {
define <4 x i16> @uqrshrn4h(<4 x i32>* %A) nounwind {
;CHECK-LABEL: uqrshrn4h:
;CHECK: uqrshrn.4h v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
@@ -1029,7 +1029,7 @@ define <4 x i16> @uqrshrn4h(<4 x i32>* %A) nounwind {
define <2 x i32> @uqrshrn2s(<2 x i64>* %A) nounwind {
;CHECK-LABEL: uqrshrn2s:
;CHECK: uqrshrn.2s v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
@@ -1037,8 +1037,8 @@ define <2 x i32> @uqrshrn2s(<2 x i64>* %A) nounwind {
define <16 x i8> @uqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
;CHECK-LABEL: uqrshrn16b:
;CHECK: uqrshrn2.16b v0, {{v[0-9]+}}, #1
- %out = load <8 x i8>* %ret
- %tmp1 = load <8 x i16>* %A
+ %out = load <8 x i8>, <8 x i8>* %ret
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
@@ -1047,8 +1047,8 @@ define <16 x i8> @uqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
define <8 x i16> @uqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
;CHECK-LABEL: uqrshrn8h:
;CHECK: uqrshrn2.8h v0, {{v[0-9]+}}, #1
- %out = load <4 x i16>* %ret
- %tmp1 = load <4 x i32>* %A
+ %out = load <4 x i16>, <4 x i16>* %ret
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
@@ -1057,8 +1057,8 @@ define <8 x i16> @uqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
define <4 x i32> @uqrshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
;CHECK-LABEL: uqrshrn4s:
;CHECK: uqrshrn2.4s v0, {{v[0-9]+}}, #1
- %out = load <2 x i32>* %ret
- %tmp1 = load <2 x i64>* %A
+ %out = load <2 x i32>, <2 x i32>* %ret
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
@@ -1079,7 +1079,7 @@ define i32 @uqshrn1s(i64 %A) nounwind {
define <8 x i8> @uqshrn8b(<8 x i16>* %A) nounwind {
;CHECK-LABEL: uqshrn8b:
;CHECK: uqshrn.8b v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
ret <8 x i8> %tmp3
}
@@ -1087,7 +1087,7 @@ define <8 x i8> @uqshrn8b(<8 x i16>* %A) nounwind {
define <4 x i16> @uqshrn4h(<4 x i32>* %A) nounwind {
;CHECK-LABEL: uqshrn4h:
;CHECK: uqshrn.4h v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
ret <4 x i16> %tmp3
}
@@ -1095,7 +1095,7 @@ define <4 x i16> @uqshrn4h(<4 x i32>* %A) nounwind {
define <2 x i32> @uqshrn2s(<2 x i64>* %A) nounwind {
;CHECK-LABEL: uqshrn2s:
;CHECK: uqshrn.2s v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
ret <2 x i32> %tmp3
}
@@ -1103,8 +1103,8 @@ define <2 x i32> @uqshrn2s(<2 x i64>* %A) nounwind {
define <16 x i8> @uqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
;CHECK-LABEL: uqshrn16b:
;CHECK: uqshrn2.16b v0, {{v[0-9]+}}, #1
- %out = load <8 x i8>* %ret
- %tmp1 = load <8 x i16>* %A
+ %out = load <8 x i8>, <8 x i8>* %ret
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
%tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp4
@@ -1113,8 +1113,8 @@ define <16 x i8> @uqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
define <8 x i16> @uqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
;CHECK-LABEL: uqshrn8h:
;CHECK: uqshrn2.8h v0, {{v[0-9]+}}, #1
- %out = load <4 x i16>* %ret
- %tmp1 = load <4 x i32>* %A
+ %out = load <4 x i16>, <4 x i16>* %ret
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
%tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp4
@@ -1123,8 +1123,8 @@ define <8 x i16> @uqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
define <4 x i32> @uqshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
;CHECK-LABEL: uqshrn4s:
;CHECK: uqshrn2.4s v0, {{v[0-9]+}}, #1
- %out = load <2 x i32>* %ret
- %tmp1 = load <2 x i64>* %A
+ %out = load <2 x i32>, <2 x i32>* %ret
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
%tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp4
@@ -1138,7 +1138,7 @@ declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readn
define <8 x i16> @ushll8h(<8 x i8>* %A) nounwind {
;CHECK-LABEL: ushll8h:
;CHECK: ushll.8h v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
%tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %tmp3
@@ -1147,7 +1147,7 @@ define <8 x i16> @ushll8h(<8 x i8>* %A) nounwind {
define <4 x i32> @ushll4s(<4 x i16>* %A) nounwind {
;CHECK-LABEL: ushll4s:
;CHECK: ushll.4s v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
%tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %tmp3
@@ -1156,7 +1156,7 @@ define <4 x i32> @ushll4s(<4 x i16>* %A) nounwind {
define <2 x i64> @ushll2d(<2 x i32>* %A) nounwind {
;CHECK-LABEL: ushll2d:
;CHECK: ushll.2d v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
%tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
ret <2 x i64> %tmp3
@@ -1165,7 +1165,7 @@ define <2 x i64> @ushll2d(<2 x i32>* %A) nounwind {
define <8 x i16> @ushll2_8h(<16 x i8>* %A) nounwind {
;CHECK-LABEL: ushll2_8h:
;CHECK: ushll2.8h v0, {{v[0-9]+}}, #1
- %load1 = load <16 x i8>* %A
+ %load1 = load <16 x i8>, <16 x i8>* %A
%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
%tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1175,7 +1175,7 @@ define <8 x i16> @ushll2_8h(<16 x i8>* %A) nounwind {
define <4 x i32> @ushll2_4s(<8 x i16>* %A) nounwind {
;CHECK-LABEL: ushll2_4s:
;CHECK: ushll2.4s v0, {{v[0-9]+}}, #1
- %load1 = load <8 x i16>* %A
+ %load1 = load <8 x i16>, <8 x i16>* %A
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
%tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
@@ -1185,7 +1185,7 @@ define <4 x i32> @ushll2_4s(<8 x i16>* %A) nounwind {
define <2 x i64> @ushll2_2d(<4 x i32>* %A) nounwind {
;CHECK-LABEL: ushll2_2d:
;CHECK: ushll2.2d v0, {{v[0-9]+}}, #1
- %load1 = load <4 x i32>* %A
+ %load1 = load <4 x i32>, <4 x i32>* %A
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
%tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
@@ -1195,7 +1195,7 @@ define <2 x i64> @ushll2_2d(<4 x i32>* %A) nounwind {
define <8 x i16> @sshll8h(<8 x i8>* %A) nounwind {
;CHECK-LABEL: sshll8h:
;CHECK: sshll.8h v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
%tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
ret <8 x i16> %tmp3
@@ -1204,7 +1204,7 @@ define <8 x i16> @sshll8h(<8 x i8>* %A) nounwind {
define <4 x i32> @sshll4s(<4 x i16>* %A) nounwind {
;CHECK-LABEL: sshll4s:
;CHECK: sshll.4s v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
%tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %tmp3
@@ -1213,7 +1213,7 @@ define <4 x i32> @sshll4s(<4 x i16>* %A) nounwind {
define <2 x i64> @sshll2d(<2 x i32>* %A) nounwind {
;CHECK-LABEL: sshll2d:
;CHECK: sshll.2d v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
%tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
ret <2 x i64> %tmp3
@@ -1222,7 +1222,7 @@ define <2 x i64> @sshll2d(<2 x i32>* %A) nounwind {
define <8 x i16> @sshll2_8h(<16 x i8>* %A) nounwind {
;CHECK-LABEL: sshll2_8h:
;CHECK: sshll2.8h v0, {{v[0-9]+}}, #1
- %load1 = load <16 x i8>* %A
+ %load1 = load <16 x i8>, <16 x i8>* %A
%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
%tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1232,7 +1232,7 @@ define <8 x i16> @sshll2_8h(<16 x i8>* %A) nounwind {
define <4 x i32> @sshll2_4s(<8 x i16>* %A) nounwind {
;CHECK-LABEL: sshll2_4s:
;CHECK: sshll2.4s v0, {{v[0-9]+}}, #1
- %load1 = load <8 x i16>* %A
+ %load1 = load <8 x i16>, <8 x i16>* %A
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
%tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
@@ -1242,7 +1242,7 @@ define <4 x i32> @sshll2_4s(<8 x i16>* %A) nounwind {
define <2 x i64> @sshll2_2d(<4 x i32>* %A) nounwind {
;CHECK-LABEL: sshll2_2d:
;CHECK: sshll2.2d v0, {{v[0-9]+}}, #1
- %load1 = load <4 x i32>* %A
+ %load1 = load <4 x i32>, <4 x i32>* %A
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
%tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
@@ -1252,7 +1252,7 @@ define <2 x i64> @sshll2_2d(<4 x i32>* %A) nounwind {
define <8 x i8> @sqshli8b(<8 x i8>* %A) nounwind {
;CHECK-LABEL: sqshli8b:
;CHECK: sqshl.8b v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <8 x i8> %tmp3
}
@@ -1260,7 +1260,7 @@ define <8 x i8> @sqshli8b(<8 x i8>* %A) nounwind {
define <4 x i16> @sqshli4h(<4 x i16>* %A) nounwind {
;CHECK-LABEL: sqshli4h:
;CHECK: sqshl.4h v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
ret <4 x i16> %tmp3
}
@@ -1268,7 +1268,7 @@ define <4 x i16> @sqshli4h(<4 x i16>* %A) nounwind {
define <2 x i32> @sqshli2s(<2 x i32>* %A) nounwind {
;CHECK-LABEL: sqshli2s:
;CHECK: sqshl.2s v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
ret <2 x i32> %tmp3
}
@@ -1276,7 +1276,7 @@ define <2 x i32> @sqshli2s(<2 x i32>* %A) nounwind {
define <16 x i8> @sqshli16b(<16 x i8>* %A) nounwind {
;CHECK-LABEL: sqshli16b:
;CHECK: sqshl.16b v0, {{v[0-9]+}}, #1
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <16 x i8> %tmp3
}
@@ -1284,7 +1284,7 @@ define <16 x i8> @sqshli16b(<16 x i8>* %A) nounwind {
define <8 x i16> @sqshli8h(<8 x i16>* %A) nounwind {
;CHECK-LABEL: sqshli8h:
;CHECK: sqshl.8h v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %tmp3
}
@@ -1292,7 +1292,7 @@ define <8 x i16> @sqshli8h(<8 x i16>* %A) nounwind {
define <4 x i32> @sqshli4s(<4 x i32>* %A) nounwind {
;CHECK-LABEL: sqshli4s:
;CHECK: sqshl.4s v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
@@ -1300,7 +1300,7 @@ define <4 x i32> @sqshli4s(<4 x i32>* %A) nounwind {
define <2 x i64> @sqshli2d(<2 x i64>* %A) nounwind {
;CHECK-LABEL: sqshli2d:
;CHECK: sqshl.2d v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
ret <2 x i64> %tmp3
}
@@ -1308,7 +1308,7 @@ define <2 x i64> @sqshli2d(<2 x i64>* %A) nounwind {
define <8 x i8> @uqshli8b(<8 x i8>* %A) nounwind {
;CHECK-LABEL: uqshli8b:
;CHECK: uqshl.8b v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <8 x i8> %tmp3
}
@@ -1317,7 +1317,7 @@ define <8 x i8> @uqshli8b_1(<8 x i8>* %A) nounwind {
;CHECK-LABEL: uqshli8b_1:
;CHECK: movi.8b [[REG:v[0-9]+]], #0x8
;CHECK: uqshl.8b v0, v0, [[REG]]
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
ret <8 x i8> %tmp3
}
@@ -1325,7 +1325,7 @@ define <8 x i8> @uqshli8b_1(<8 x i8>* %A) nounwind {
define <4 x i16> @uqshli4h(<4 x i16>* %A) nounwind {
;CHECK-LABEL: uqshli4h:
;CHECK: uqshl.4h v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
ret <4 x i16> %tmp3
}
@@ -1333,7 +1333,7 @@ define <4 x i16> @uqshli4h(<4 x i16>* %A) nounwind {
define <2 x i32> @uqshli2s(<2 x i32>* %A) nounwind {
;CHECK-LABEL: uqshli2s:
;CHECK: uqshl.2s v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
ret <2 x i32> %tmp3
}
@@ -1341,7 +1341,7 @@ define <2 x i32> @uqshli2s(<2 x i32>* %A) nounwind {
define <16 x i8> @uqshli16b(<16 x i8>* %A) nounwind {
;CHECK-LABEL: uqshli16b:
;CHECK: uqshl.16b
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
ret <16 x i8> %tmp3
}
@@ -1349,7 +1349,7 @@ define <16 x i8> @uqshli16b(<16 x i8>* %A) nounwind {
define <8 x i16> @uqshli8h(<8 x i16>* %A) nounwind {
;CHECK-LABEL: uqshli8h:
;CHECK: uqshl.8h v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
ret <8 x i16> %tmp3
}
@@ -1357,7 +1357,7 @@ define <8 x i16> @uqshli8h(<8 x i16>* %A) nounwind {
define <4 x i32> @uqshli4s(<4 x i32>* %A) nounwind {
;CHECK-LABEL: uqshli4s:
;CHECK: uqshl.4s v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
ret <4 x i32> %tmp3
}
@@ -1365,7 +1365,7 @@ define <4 x i32> @uqshli4s(<4 x i32>* %A) nounwind {
define <2 x i64> @uqshli2d(<2 x i64>* %A) nounwind {
;CHECK-LABEL: uqshli2d:
;CHECK: uqshl.2d v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
ret <2 x i64> %tmp3
}
@@ -1373,9 +1373,9 @@ define <2 x i64> @uqshli2d(<2 x i64>* %A) nounwind {
define <8 x i8> @ursra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: ursra8b:
;CHECK: ursra.8b v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
- %tmp4 = load <8 x i8>* %B
+ %tmp4 = load <8 x i8>, <8 x i8>* %B
%tmp5 = add <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
}
@@ -1383,9 +1383,9 @@ define <8 x i8> @ursra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @ursra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: ursra4h:
;CHECK: ursra.4h v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
- %tmp4 = load <4 x i16>* %B
+ %tmp4 = load <4 x i16>, <4 x i16>* %B
%tmp5 = add <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
@@ -1393,9 +1393,9 @@ define <4 x i16> @ursra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @ursra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: ursra2s:
;CHECK: ursra.2s v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
- %tmp4 = load <2 x i32>* %B
+ %tmp4 = load <2 x i32>, <2 x i32>* %B
%tmp5 = add <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
}
@@ -1403,9 +1403,9 @@ define <2 x i32> @ursra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <16 x i8> @ursra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: ursra16b:
;CHECK: ursra.16b v0, {{v[0-9]+}}, #1
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
- %tmp4 = load <16 x i8>* %B
+ %tmp4 = load <16 x i8>, <16 x i8>* %B
%tmp5 = add <16 x i8> %tmp3, %tmp4
ret <16 x i8> %tmp5
}
@@ -1413,9 +1413,9 @@ define <16 x i8> @ursra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @ursra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: ursra8h:
;CHECK: ursra.8h v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
- %tmp4 = load <8 x i16>* %B
+ %tmp4 = load <8 x i16>, <8 x i16>* %B
%tmp5 = add <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
}
@@ -1423,9 +1423,9 @@ define <8 x i16> @ursra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @ursra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: ursra4s:
;CHECK: ursra.4s v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
- %tmp4 = load <4 x i32>* %B
+ %tmp4 = load <4 x i32>, <4 x i32>* %B
%tmp5 = add <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
}
@@ -1433,9 +1433,9 @@ define <4 x i32> @ursra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @ursra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: ursra2d:
;CHECK: ursra.2d v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
- %tmp4 = load <2 x i64>* %B
+ %tmp4 = load <2 x i64>, <2 x i64>* %B
%tmp5 = add <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
}
@@ -1443,9 +1443,9 @@ define <2 x i64> @ursra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <8 x i8> @srsra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: srsra8b:
;CHECK: srsra.8b v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
- %tmp4 = load <8 x i8>* %B
+ %tmp4 = load <8 x i8>, <8 x i8>* %B
%tmp5 = add <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
}
@@ -1453,9 +1453,9 @@ define <8 x i8> @srsra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @srsra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: srsra4h:
;CHECK: srsra.4h v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
- %tmp4 = load <4 x i16>* %B
+ %tmp4 = load <4 x i16>, <4 x i16>* %B
%tmp5 = add <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
@@ -1463,9 +1463,9 @@ define <4 x i16> @srsra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @srsra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: srsra2s:
;CHECK: srsra.2s v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
- %tmp4 = load <2 x i32>* %B
+ %tmp4 = load <2 x i32>, <2 x i32>* %B
%tmp5 = add <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
}
@@ -1473,9 +1473,9 @@ define <2 x i32> @srsra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <16 x i8> @srsra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: srsra16b:
;CHECK: srsra.16b v0, {{v[0-9]+}}, #1
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
- %tmp4 = load <16 x i8>* %B
+ %tmp4 = load <16 x i8>, <16 x i8>* %B
%tmp5 = add <16 x i8> %tmp3, %tmp4
ret <16 x i8> %tmp5
}
@@ -1483,9 +1483,9 @@ define <16 x i8> @srsra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @srsra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: srsra8h:
;CHECK: srsra.8h v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
- %tmp4 = load <8 x i16>* %B
+ %tmp4 = load <8 x i16>, <8 x i16>* %B
%tmp5 = add <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
}
@@ -1493,9 +1493,9 @@ define <8 x i16> @srsra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @srsra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: srsra4s:
;CHECK: srsra.4s v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
- %tmp4 = load <4 x i32>* %B
+ %tmp4 = load <4 x i32>, <4 x i32>* %B
%tmp5 = add <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
}
@@ -1503,9 +1503,9 @@ define <4 x i32> @srsra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @srsra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: srsra2d:
;CHECK: srsra.2d v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
- %tmp4 = load <2 x i64>* %B
+ %tmp4 = load <2 x i64>, <2 x i64>* %B
%tmp5 = add <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
}
@@ -1513,9 +1513,9 @@ define <2 x i64> @srsra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <8 x i8> @usra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: usra8b:
;CHECK: usra.8b v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
- %tmp4 = load <8 x i8>* %B
+ %tmp4 = load <8 x i8>, <8 x i8>* %B
%tmp5 = add <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
}
@@ -1523,9 +1523,9 @@ define <8 x i8> @usra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @usra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: usra4h:
;CHECK: usra.4h v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
- %tmp4 = load <4 x i16>* %B
+ %tmp4 = load <4 x i16>, <4 x i16>* %B
%tmp5 = add <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
@@ -1533,9 +1533,9 @@ define <4 x i16> @usra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @usra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: usra2s:
;CHECK: usra.2s v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
- %tmp4 = load <2 x i32>* %B
+ %tmp4 = load <2 x i32>, <2 x i32>* %B
%tmp5 = add <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
}
@@ -1543,9 +1543,9 @@ define <2 x i32> @usra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <16 x i8> @usra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: usra16b:
;CHECK: usra.16b v0, {{v[0-9]+}}, #1
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
- %tmp4 = load <16 x i8>* %B
+ %tmp4 = load <16 x i8>, <16 x i8>* %B
%tmp5 = add <16 x i8> %tmp3, %tmp4
ret <16 x i8> %tmp5
}
@@ -1553,9 +1553,9 @@ define <16 x i8> @usra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @usra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: usra8h:
;CHECK: usra.8h v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
- %tmp4 = load <8 x i16>* %B
+ %tmp4 = load <8 x i16>, <8 x i16>* %B
%tmp5 = add <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
}
@@ -1563,9 +1563,9 @@ define <8 x i16> @usra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @usra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: usra4s:
;CHECK: usra.4s v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
- %tmp4 = load <4 x i32>* %B
+ %tmp4 = load <4 x i32>, <4 x i32>* %B
%tmp5 = add <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
}
@@ -1573,9 +1573,9 @@ define <4 x i32> @usra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @usra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: usra2d:
;CHECK: usra.2d v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
- %tmp4 = load <2 x i64>* %B
+ %tmp4 = load <2 x i64>, <2 x i64>* %B
%tmp5 = add <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
}
@@ -1583,9 +1583,9 @@ define <2 x i64> @usra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <8 x i8> @ssra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: ssra8b:
;CHECK: ssra.8b v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = ashr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
- %tmp4 = load <8 x i8>* %B
+ %tmp4 = load <8 x i8>, <8 x i8>* %B
%tmp5 = add <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
}
@@ -1593,9 +1593,9 @@ define <8 x i8> @ssra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @ssra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: ssra4h:
;CHECK: ssra.4h v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp3 = ashr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
- %tmp4 = load <4 x i16>* %B
+ %tmp4 = load <4 x i16>, <4 x i16>* %B
%tmp5 = add <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
}
@@ -1603,9 +1603,9 @@ define <4 x i16> @ssra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @ssra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: ssra2s:
;CHECK: ssra.2s v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp3 = ashr <2 x i32> %tmp1, <i32 1, i32 1>
- %tmp4 = load <2 x i32>* %B
+ %tmp4 = load <2 x i32>, <2 x i32>* %B
%tmp5 = add <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
}
@@ -1613,9 +1613,9 @@ define <2 x i32> @ssra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <16 x i8> @ssra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: ssra16b:
;CHECK: ssra.16b v0, {{v[0-9]+}}, #1
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = ashr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
- %tmp4 = load <16 x i8>* %B
+ %tmp4 = load <16 x i8>, <16 x i8>* %B
%tmp5 = add <16 x i8> %tmp3, %tmp4
ret <16 x i8> %tmp5
}
@@ -1623,9 +1623,9 @@ define <16 x i8> @ssra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @ssra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: ssra8h:
;CHECK: ssra.8h v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp3 = ashr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
- %tmp4 = load <8 x i16>* %B
+ %tmp4 = load <8 x i16>, <8 x i16>* %B
%tmp5 = add <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
}
@@ -1633,9 +1633,9 @@ define <8 x i16> @ssra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @ssra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: ssra4s:
;CHECK: ssra.4s v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = ashr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
- %tmp4 = load <4 x i32>* %B
+ %tmp4 = load <4 x i32>, <4 x i32>* %B
%tmp5 = add <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
}
@@ -1643,9 +1643,9 @@ define <4 x i32> @ssra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @ssra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: ssra2d:
;CHECK: ssra.2d v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp3 = ashr <2 x i64> %tmp1, <i64 1, i64 1>
- %tmp4 = load <2 x i64>* %B
+ %tmp4 = load <2 x i64>, <2 x i64>* %B
%tmp5 = add <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
}
@@ -1655,8 +1655,8 @@ define <8 x i8> @shr_orr8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: shr.8b v0, {{v[0-9]+}}, #1
;CHECK-NEXT: orr.8b
;CHECK-NEXT: ret
- %tmp1 = load <8 x i8>* %A
- %tmp4 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp4 = load <8 x i8>, <8 x i8>* %B
%tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp5 = or <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
@@ -1667,8 +1667,8 @@ define <4 x i16> @shr_orr4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: shr.4h v0, {{v[0-9]+}}, #1
;CHECK-NEXT: orr.8b
;CHECK-NEXT: ret
- %tmp1 = load <4 x i16>* %A
- %tmp4 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp4 = load <4 x i16>, <4 x i16>* %B
%tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
%tmp5 = or <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
@@ -1679,8 +1679,8 @@ define <2 x i32> @shr_orr2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: shr.2s v0, {{v[0-9]+}}, #1
;CHECK-NEXT: orr.8b
;CHECK-NEXT: ret
- %tmp1 = load <2 x i32>* %A
- %tmp4 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp4 = load <2 x i32>, <2 x i32>* %B
%tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
%tmp5 = or <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
@@ -1691,8 +1691,8 @@ define <16 x i8> @shr_orr16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK: shr.16b v0, {{v[0-9]+}}, #1
;CHECK-NEXT: orr.16b
;CHECK-NEXT: ret
- %tmp1 = load <16 x i8>* %A
- %tmp4 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp4 = load <16 x i8>, <16 x i8>* %B
%tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp5 = or <16 x i8> %tmp3, %tmp4
ret <16 x i8> %tmp5
@@ -1703,8 +1703,8 @@ define <8 x i16> @shr_orr8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: shr.8h v0, {{v[0-9]+}}, #1
;CHECK-NEXT: orr.16b
;CHECK-NEXT: ret
- %tmp1 = load <8 x i16>* %A
- %tmp4 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp4 = load <8 x i16>, <8 x i16>* %B
%tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp5 = or <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
@@ -1715,8 +1715,8 @@ define <4 x i32> @shr_orr4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK: shr.4s v0, {{v[0-9]+}}, #1
;CHECK-NEXT: orr.16b
;CHECK-NEXT: ret
- %tmp1 = load <4 x i32>* %A
- %tmp4 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp4 = load <4 x i32>, <4 x i32>* %B
%tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
%tmp5 = or <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
@@ -1727,8 +1727,8 @@ define <2 x i64> @shr_orr2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK: shr.2d v0, {{v[0-9]+}}, #1
;CHECK-NEXT: orr.16b
;CHECK-NEXT: ret
- %tmp1 = load <2 x i64>* %A
- %tmp4 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp4 = load <2 x i64>, <2 x i64>* %B
%tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
%tmp5 = or <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
@@ -1739,8 +1739,8 @@ define <8 x i8> @shl_orr8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: shl.8b v0, {{v[0-9]+}}, #1
;CHECK-NEXT: orr.8b
;CHECK-NEXT: ret
- %tmp1 = load <8 x i8>* %A
- %tmp4 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp4 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shl <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp5 = or <8 x i8> %tmp3, %tmp4
ret <8 x i8> %tmp5
@@ -1751,8 +1751,8 @@ define <4 x i16> @shl_orr4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: shl.4h v0, {{v[0-9]+}}, #1
;CHECK-NEXT: orr.8b
;CHECK-NEXT: ret
- %tmp1 = load <4 x i16>* %A
- %tmp4 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp4 = load <4 x i16>, <4 x i16>* %B
%tmp3 = shl <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
%tmp5 = or <4 x i16> %tmp3, %tmp4
ret <4 x i16> %tmp5
@@ -1763,8 +1763,8 @@ define <2 x i32> @shl_orr2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: shl.2s v0, {{v[0-9]+}}, #1
;CHECK-NEXT: orr.8b
;CHECK-NEXT: ret
- %tmp1 = load <2 x i32>* %A
- %tmp4 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp4 = load <2 x i32>, <2 x i32>* %B
%tmp3 = shl <2 x i32> %tmp1, <i32 1, i32 1>
%tmp5 = or <2 x i32> %tmp3, %tmp4
ret <2 x i32> %tmp5
@@ -1775,8 +1775,8 @@ define <16 x i8> @shl_orr16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK: shl.16b v0, {{v[0-9]+}}, #1
;CHECK-NEXT: orr.16b
;CHECK-NEXT: ret
- %tmp1 = load <16 x i8>* %A
- %tmp4 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp4 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shl <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%tmp5 = or <16 x i8> %tmp3, %tmp4
ret <16 x i8> %tmp5
@@ -1787,8 +1787,8 @@ define <8 x i16> @shl_orr8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: shl.8h v0, {{v[0-9]+}}, #1
;CHECK-NEXT: orr.16b
;CHECK-NEXT: ret
- %tmp1 = load <8 x i16>* %A
- %tmp4 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp4 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shl <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%tmp5 = or <8 x i16> %tmp3, %tmp4
ret <8 x i16> %tmp5
@@ -1799,8 +1799,8 @@ define <4 x i32> @shl_orr4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK: shl.4s v0, {{v[0-9]+}}, #1
;CHECK-NEXT: orr.16b
;CHECK-NEXT: ret
- %tmp1 = load <4 x i32>* %A
- %tmp4 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp4 = load <4 x i32>, <4 x i32>* %B
%tmp3 = shl <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
%tmp5 = or <4 x i32> %tmp3, %tmp4
ret <4 x i32> %tmp5
@@ -1811,8 +1811,8 @@ define <2 x i64> @shl_orr2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK: shl.2d v0, {{v[0-9]+}}, #1
;CHECK-NEXT: orr.16b
;CHECK-NEXT: ret
- %tmp1 = load <2 x i64>* %A
- %tmp4 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp4 = load <2 x i64>, <2 x i64>* %B
%tmp3 = shl <2 x i64> %tmp1, <i64 1, i64 1>
%tmp5 = or <2 x i64> %tmp3, %tmp4
ret <2 x i64> %tmp5
@@ -1838,8 +1838,8 @@ define <4 x i32> @shll_high(<8 x i16> %in) {
define <8 x i8> @sli8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: sli8b:
;CHECK: sli.8b v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
ret <8 x i8> %tmp3
}
@@ -1847,8 +1847,8 @@ define <8 x i8> @sli8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @sli4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: sli4h:
;CHECK: sli.4h v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
ret <4 x i16> %tmp3
}
@@ -1856,8 +1856,8 @@ define <4 x i16> @sli4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @sli2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: sli2s:
;CHECK: sli.2s v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
ret <2 x i32> %tmp3
}
@@ -1865,8 +1865,8 @@ define <2 x i32> @sli2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @sli1d(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: sli1d:
;CHECK: sli d0, {{d[0-9]+}}, #1
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
ret <1 x i64> %tmp3
}
@@ -1874,8 +1874,8 @@ define <1 x i64> @sli1d(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <16 x i8> @sli16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: sli16b:
;CHECK: sli.16b v0, {{v[0-9]+}}, #1
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
ret <16 x i8> %tmp3
}
@@ -1883,8 +1883,8 @@ define <16 x i8> @sli16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @sli8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: sli8h:
;CHECK: sli.8h v0, {{v[0-9]+}}, #1
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
ret <8 x i16> %tmp3
}
@@ -1892,8 +1892,8 @@ define <8 x i16> @sli8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @sli4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: sli4s:
;CHECK: sli.4s v0, {{v[0-9]+}}, #1
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
ret <4 x i32> %tmp3
}
@@ -1901,8 +1901,8 @@ define <4 x i32> @sli4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @sli2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: sli2d:
;CHECK: sli.2d v0, {{v[0-9]+}}, #1
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
ret <2 x i64> %tmp3
}
diff --git a/test/CodeGen/AArch64/arm64-vshr.ll b/test/CodeGen/AArch64/arm64-vshr.ll
index 21eb579f2522..8d263f22c54e 100644
--- a/test/CodeGen/AArch64/arm64-vshr.ll
+++ b/test/CodeGen/AArch64/arm64-vshr.ll
@@ -10,8 +10,8 @@ entry:
%b.addr = alloca <8 x i16>, align 16
store <8 x i16> %a, <8 x i16>* %a.addr, align 16
store <8 x i16> %b, <8 x i16>* %b.addr, align 16
- %0 = load <8 x i16>* %a.addr, align 16
- %1 = load <8 x i16>* %b.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %a.addr, align 16
+ %1 = load <8 x i16>, <8 x i16>* %b.addr, align 16
%shr = ashr <8 x i16> %0, %1
ret <8 x i16> %shr
}
@@ -25,8 +25,8 @@ entry:
%b.addr = alloca <4 x i32>, align 32
store <4 x i32> %a, <4 x i32>* %a.addr, align 32
store <4 x i32> %b, <4 x i32>* %b.addr, align 32
- %0 = load <4 x i32>* %a.addr, align 32
- %1 = load <4 x i32>* %b.addr, align 32
+ %0 = load <4 x i32>, <4 x i32>* %a.addr, align 32
+ %1 = load <4 x i32>, <4 x i32>* %b.addr, align 32
%shr = ashr <4 x i32> %0, %1
ret <4 x i32> %shr
}
@@ -40,8 +40,8 @@ entry:
%b.addr = alloca <8 x i16>, align 16
store <8 x i16> %a, <8 x i16>* %a.addr, align 16
store <8 x i16> %b, <8 x i16>* %b.addr, align 16
- %0 = load <8 x i16>* %a.addr, align 16
- %1 = load <8 x i16>* %b.addr, align 16
+ %0 = load <8 x i16>, <8 x i16>* %a.addr, align 16
+ %1 = load <8 x i16>, <8 x i16>* %b.addr, align 16
%shr = lshr <8 x i16> %0, %1
ret <8 x i16> %shr
}
diff --git a/test/CodeGen/AArch64/arm64-vshuffle.ll b/test/CodeGen/AArch64/arm64-vshuffle.ll
index 62fd96102d01..15ea21b7638d 100644
--- a/test/CodeGen/AArch64/arm64-vshuffle.ll
+++ b/test/CodeGen/AArch64/arm64-vshuffle.ll
@@ -1,22 +1,8 @@
; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -mcpu=cyclone | FileCheck %s
-; The mask:
-; CHECK: lCPI0_0:
-; CHECK: .byte 2 ; 0x2
-; CHECK: .byte 255 ; 0xff
-; CHECK: .byte 6 ; 0x6
-; CHECK: .byte 255 ; 0xff
-; The second vector is legalized to undef and the elements of the first vector
-; are used instead.
-; CHECK: .byte 2 ; 0x2
-; CHECK: .byte 4 ; 0x4
-; CHECK: .byte 6 ; 0x6
-; CHECK: .byte 0 ; 0x0
; CHECK: test1
-; CHECK: ldr d[[REG0:[0-9]+]], [{{.*}}, lCPI0_0
-; CHECK: movi.8h v[[REG1:[0-9]+]], #0x1, lsl #8
-; CHECK: tbl.8b v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
+; CHECK: movi d[[REG0:[0-9]+]], #0000000000000000
define <8 x i1> @test1() {
entry:
%Shuff = shufflevector <8 x i1> <i1 0, i1 1, i1 2, i1 3, i1 4, i1 5, i1 6,
@@ -29,19 +15,17 @@ entry:
}
; CHECK: lCPI1_0:
-; CHECK: .byte 2 ; 0x2
-; CHECK: .byte 255 ; 0xff
-; CHECK: .byte 6 ; 0x6
-; CHECK: .byte 255 ; 0xff
-; CHECK: .byte 10 ; 0xa
-; CHECK: .byte 12 ; 0xc
-; CHECK: .byte 14 ; 0xe
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 1 ; 0x1
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
; CHECK: test2
-; CHECK: ldr d[[REG0:[0-9]+]], [{{.*}}, lCPI1_0@PAGEOFF]
-; CHECK: adrp x[[REG2:[0-9]+]], lCPI1_1@PAGE
-; CHECK: ldr q[[REG1:[0-9]+]], [x[[REG2]], lCPI1_1@PAGEOFF]
-; CHECK: tbl.8b v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
+; CHECK: adrp x[[REG2:[0-9]+]], lCPI1_0@PAGE
+; CHECK: ldr d[[REG1:[0-9]+]], [x[[REG2]], lCPI1_0@PAGEOFF]
define <8 x i1>@test2() {
bb:
%Shuff = shufflevector <8 x i1> zeroinitializer,
@@ -51,28 +35,8 @@ bb:
ret <8 x i1> %Shuff
}
-; CHECK: lCPI2_0:
-; CHECK: .byte 2 ; 0x2
-; CHECK: .byte 255 ; 0xff
-; CHECK: .byte 6 ; 0x6
-; CHECK: .byte 255 ; 0xff
-; CHECK: .byte 10 ; 0xa
-; CHECK: .byte 12 ; 0xc
-; CHECK: .byte 14 ; 0xe
-; CHECK: .byte 0 ; 0x0
-; CHECK: .byte 2 ; 0x2
-; CHECK: .byte 255 ; 0xff
-; CHECK: .byte 6 ; 0x6
-; CHECK: .byte 255 ; 0xff
-; CHECK: .byte 10 ; 0xa
-; CHECK: .byte 12 ; 0xc
-; CHECK: .byte 14 ; 0xe
-; CHECK: .byte 0 ; 0x0
; CHECK: test3
-; CHECK: adrp x[[REG3:[0-9]+]], lCPI2_0@PAGE
-; CHECK: ldr q[[REG0:[0-9]+]], [x[[REG3]], lCPI2_0@PAGEOFF]
-; CHECK: ldr q[[REG1:[0-9]+]], [x[[REG3]], lCPI2_1@PAGEOFF]
-; CHECK: tbl.16b v{{[0-9]+}}, { v[[REG1]] }, v[[REG0]]
+; CHECK: movi.4s v{{[0-9]+}}, #0x1
define <16 x i1> @test3(i1* %ptr, i32 %v) {
bb:
%Shuff = shufflevector <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0>, <16 x i1> undef,
@@ -81,29 +45,26 @@ bb:
i32 14, i32 0>
ret <16 x i1> %Shuff
}
-; CHECK: lCPI3_1:
-; CHECK: .byte 2 ; 0x2
+; CHECK: lCPI3_0:
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
; CHECK: .byte 1 ; 0x1
-; CHECK: .byte 6 ; 0x6
-; CHECK: .byte 18 ; 0x12
-; CHECK: .byte 10 ; 0xa
-; CHECK: .byte 12 ; 0xc
-; CHECK: .byte 14 ; 0xe
; CHECK: .byte 0 ; 0x0
-; CHECK: .byte 2 ; 0x2
-; CHECK: .byte 31 ; 0x1f
-; CHECK: .byte 6 ; 0x6
-; CHECK: .byte 30 ; 0x1e
-; CHECK: .byte 10 ; 0xa
-; CHECK: .byte 12 ; 0xc
-; CHECK: .byte 14 ; 0xe
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
+; CHECK: .byte 0 ; 0x0
; CHECK: .byte 0 ; 0x0
; CHECK: _test4:
-; CHECK: ldr q[[REG1:[0-9]+]]
-; CHECK: movi.2d v[[REG0:[0-9]+]], #0000000000000000
-; CHECK: adrp x[[REG3:[0-9]+]], lCPI3_1@PAGE
-; CHECK: ldr q[[REG2:[0-9]+]], [x[[REG3]], lCPI3_1@PAGEOFF]
-; CHECK: tbl.16b v{{[0-9]+}}, { v[[REG0]], v[[REG1]] }, v[[REG2]]
+; CHECK: adrp x[[REG3:[0-9]+]], lCPI3_0@PAGE
+; CHECK: ldr q[[REG2:[0-9]+]], [x[[REG3]], lCPI3_0@PAGEOFF]
define <16 x i1> @test4(i1* %ptr, i32 %v) {
bb:
%Shuff = shufflevector <16 x i1> zeroinitializer,
diff --git a/test/CodeGen/AArch64/arm64-vsqrt.ll b/test/CodeGen/AArch64/arm64-vsqrt.ll
index 02b7c7ec5d80..20aebd9cae36 100644
--- a/test/CodeGen/AArch64/arm64-vsqrt.ll
+++ b/test/CodeGen/AArch64/arm64-vsqrt.ll
@@ -3,8 +3,8 @@
define <2 x float> @frecps_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: frecps_2s:
;CHECK: frecps.2s
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -12,8 +12,8 @@ define <2 x float> @frecps_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
define <4 x float> @frecps_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: frecps_4s:
;CHECK: frecps.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x float> @frecps_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
define <2 x double> @frecps_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
;CHECK-LABEL: frecps_2d:
;CHECK: frecps.2d
- %tmp1 = load <2 x double>* %A
- %tmp2 = load <2 x double>* %B
+ %tmp1 = load <2 x double>, <2 x double>* %A
+ %tmp2 = load <2 x double>, <2 x double>* %B
%tmp3 = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
ret <2 x double> %tmp3
}
@@ -35,8 +35,8 @@ declare <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double>, <2 x double>)
define <2 x float> @frsqrts_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: frsqrts_2s:
;CHECK: frsqrts.2s
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -44,8 +44,8 @@ define <2 x float> @frsqrts_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
define <4 x float> @frsqrts_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: frsqrts_4s:
;CHECK: frsqrts.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
@@ -53,8 +53,8 @@ define <4 x float> @frsqrts_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
define <2 x double> @frsqrts_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
;CHECK-LABEL: frsqrts_2d:
;CHECK: frsqrts.2d
- %tmp1 = load <2 x double>* %A
- %tmp2 = load <2 x double>* %B
+ %tmp1 = load <2 x double>, <2 x double>* %A
+ %tmp2 = load <2 x double>, <2 x double>* %B
%tmp3 = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
ret <2 x double> %tmp3
}
@@ -66,7 +66,7 @@ declare <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double>, <2 x double>
define <2 x float> @frecpe_2s(<2 x float>* %A) nounwind {
;CHECK-LABEL: frecpe_2s:
;CHECK: frecpe.2s
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp3 = call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> %tmp1)
ret <2 x float> %tmp3
}
@@ -74,7 +74,7 @@ define <2 x float> @frecpe_2s(<2 x float>* %A) nounwind {
define <4 x float> @frecpe_4s(<4 x float>* %A) nounwind {
;CHECK-LABEL: frecpe_4s:
;CHECK: frecpe.4s
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp3 = call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> %tmp1)
ret <4 x float> %tmp3
}
@@ -82,7 +82,7 @@ define <4 x float> @frecpe_4s(<4 x float>* %A) nounwind {
define <2 x double> @frecpe_2d(<2 x double>* %A) nounwind {
;CHECK-LABEL: frecpe_2d:
;CHECK: frecpe.2d
- %tmp1 = load <2 x double>* %A
+ %tmp1 = load <2 x double>, <2 x double>* %A
%tmp3 = call <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double> %tmp1)
ret <2 x double> %tmp3
}
@@ -90,7 +90,7 @@ define <2 x double> @frecpe_2d(<2 x double>* %A) nounwind {
define float @frecpe_s(float* %A) nounwind {
;CHECK-LABEL: frecpe_s:
;CHECK: frecpe s0, {{s[0-9]+}}
- %tmp1 = load float* %A
+ %tmp1 = load float, float* %A
%tmp3 = call float @llvm.aarch64.neon.frecpe.f32(float %tmp1)
ret float %tmp3
}
@@ -98,7 +98,7 @@ define float @frecpe_s(float* %A) nounwind {
define double @frecpe_d(double* %A) nounwind {
;CHECK-LABEL: frecpe_d:
;CHECK: frecpe d0, {{d[0-9]+}}
- %tmp1 = load double* %A
+ %tmp1 = load double, double* %A
%tmp3 = call double @llvm.aarch64.neon.frecpe.f64(double %tmp1)
ret double %tmp3
}
@@ -112,7 +112,7 @@ declare double @llvm.aarch64.neon.frecpe.f64(double) nounwind readnone
define float @frecpx_s(float* %A) nounwind {
;CHECK-LABEL: frecpx_s:
;CHECK: frecpx s0, {{s[0-9]+}}
- %tmp1 = load float* %A
+ %tmp1 = load float, float* %A
%tmp3 = call float @llvm.aarch64.neon.frecpx.f32(float %tmp1)
ret float %tmp3
}
@@ -120,7 +120,7 @@ define float @frecpx_s(float* %A) nounwind {
define double @frecpx_d(double* %A) nounwind {
;CHECK-LABEL: frecpx_d:
;CHECK: frecpx d0, {{d[0-9]+}}
- %tmp1 = load double* %A
+ %tmp1 = load double, double* %A
%tmp3 = call double @llvm.aarch64.neon.frecpx.f64(double %tmp1)
ret double %tmp3
}
@@ -131,7 +131,7 @@ declare double @llvm.aarch64.neon.frecpx.f64(double) nounwind readnone
define <2 x float> @frsqrte_2s(<2 x float>* %A) nounwind {
;CHECK-LABEL: frsqrte_2s:
;CHECK: frsqrte.2s
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp3 = call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> %tmp1)
ret <2 x float> %tmp3
}
@@ -139,7 +139,7 @@ define <2 x float> @frsqrte_2s(<2 x float>* %A) nounwind {
define <4 x float> @frsqrte_4s(<4 x float>* %A) nounwind {
;CHECK-LABEL: frsqrte_4s:
;CHECK: frsqrte.4s
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp3 = call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> %tmp1)
ret <4 x float> %tmp3
}
@@ -147,7 +147,7 @@ define <4 x float> @frsqrte_4s(<4 x float>* %A) nounwind {
define <2 x double> @frsqrte_2d(<2 x double>* %A) nounwind {
;CHECK-LABEL: frsqrte_2d:
;CHECK: frsqrte.2d
- %tmp1 = load <2 x double>* %A
+ %tmp1 = load <2 x double>, <2 x double>* %A
%tmp3 = call <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double> %tmp1)
ret <2 x double> %tmp3
}
@@ -155,7 +155,7 @@ define <2 x double> @frsqrte_2d(<2 x double>* %A) nounwind {
define float @frsqrte_s(float* %A) nounwind {
;CHECK-LABEL: frsqrte_s:
;CHECK: frsqrte s0, {{s[0-9]+}}
- %tmp1 = load float* %A
+ %tmp1 = load float, float* %A
%tmp3 = call float @llvm.aarch64.neon.frsqrte.f32(float %tmp1)
ret float %tmp3
}
@@ -163,7 +163,7 @@ define float @frsqrte_s(float* %A) nounwind {
define double @frsqrte_d(double* %A) nounwind {
;CHECK-LABEL: frsqrte_d:
;CHECK: frsqrte d0, {{d[0-9]+}}
- %tmp1 = load double* %A
+ %tmp1 = load double, double* %A
%tmp3 = call double @llvm.aarch64.neon.frsqrte.f64(double %tmp1)
ret double %tmp3
}
@@ -177,7 +177,7 @@ declare double @llvm.aarch64.neon.frsqrte.f64(double) nounwind readnone
define <2 x i32> @urecpe_2s(<2 x i32>* %A) nounwind {
;CHECK-LABEL: urecpe_2s:
;CHECK: urecpe.2s
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32> %tmp1)
ret <2 x i32> %tmp3
}
@@ -185,7 +185,7 @@ define <2 x i32> @urecpe_2s(<2 x i32>* %A) nounwind {
define <4 x i32> @urecpe_4s(<4 x i32>* %A) nounwind {
;CHECK-LABEL: urecpe_4s:
;CHECK: urecpe.4s
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32> %tmp1)
ret <4 x i32> %tmp3
}
@@ -196,7 +196,7 @@ declare <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32>) nounwind readnone
define <2 x i32> @ursqrte_2s(<2 x i32>* %A) nounwind {
;CHECK-LABEL: ursqrte_2s:
;CHECK: ursqrte.2s
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp3 = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %tmp1)
ret <2 x i32> %tmp3
}
@@ -204,7 +204,7 @@ define <2 x i32> @ursqrte_2s(<2 x i32>* %A) nounwind {
define <4 x i32> @ursqrte_4s(<4 x i32>* %A) nounwind {
;CHECK-LABEL: ursqrte_4s:
;CHECK: ursqrte.4s
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp3 = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %tmp1)
ret <4 x i32> %tmp3
}
diff --git a/test/CodeGen/AArch64/arm64-vsra.ll b/test/CodeGen/AArch64/arm64-vsra.ll
index 5e9cef3e7e28..d480dfe1f7d8 100644
--- a/test/CodeGen/AArch64/arm64-vsra.ll
+++ b/test/CodeGen/AArch64/arm64-vsra.ll
@@ -3,8 +3,8 @@
define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vsras8:
;CHECK: ssra.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = ashr <8 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
%tmp4 = add <8 x i8> %tmp1, %tmp3
ret <8 x i8> %tmp4
@@ -13,8 +13,8 @@ define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vsras16:
;CHECK: ssra.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = ashr <4 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15 >
%tmp4 = add <4 x i16> %tmp1, %tmp3
ret <4 x i16> %tmp4
@@ -23,8 +23,8 @@ define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vsras32:
;CHECK: ssra.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = ashr <2 x i32> %tmp2, < i32 31, i32 31 >
%tmp4 = add <2 x i32> %tmp1, %tmp3
ret <2 x i32> %tmp4
@@ -33,8 +33,8 @@ define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vsraQs8:
;CHECK: ssra.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = ashr <16 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
%tmp4 = add <16 x i8> %tmp1, %tmp3
ret <16 x i8> %tmp4
@@ -43,8 +43,8 @@ define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vsraQs16:
;CHECK: ssra.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = ashr <8 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
%tmp4 = add <8 x i16> %tmp1, %tmp3
ret <8 x i16> %tmp4
@@ -53,8 +53,8 @@ define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vsraQs32:
;CHECK: ssra.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = ashr <4 x i32> %tmp2, < i32 31, i32 31, i32 31, i32 31 >
%tmp4 = add <4 x i32> %tmp1, %tmp3
ret <4 x i32> %tmp4
@@ -63,8 +63,8 @@ define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vsraQs64:
;CHECK: ssra.2d
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = ashr <2 x i64> %tmp2, < i64 63, i64 63 >
%tmp4 = add <2 x i64> %tmp1, %tmp3
ret <2 x i64> %tmp4
@@ -73,8 +73,8 @@ define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vsrau8:
;CHECK: usra.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = lshr <8 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
%tmp4 = add <8 x i8> %tmp1, %tmp3
ret <8 x i8> %tmp4
@@ -83,8 +83,8 @@ define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vsrau16:
;CHECK: usra.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = lshr <4 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15 >
%tmp4 = add <4 x i16> %tmp1, %tmp3
ret <4 x i16> %tmp4
@@ -93,8 +93,8 @@ define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vsrau32:
;CHECK: usra.2s
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = lshr <2 x i32> %tmp2, < i32 31, i32 31 >
%tmp4 = add <2 x i32> %tmp1, %tmp3
ret <2 x i32> %tmp4
@@ -104,8 +104,8 @@ define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vsraQu8:
;CHECK: usra.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = lshr <16 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
%tmp4 = add <16 x i8> %tmp1, %tmp3
ret <16 x i8> %tmp4
@@ -114,8 +114,8 @@ define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vsraQu16:
;CHECK: usra.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = lshr <8 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
%tmp4 = add <8 x i16> %tmp1, %tmp3
ret <8 x i16> %tmp4
@@ -124,8 +124,8 @@ define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vsraQu32:
;CHECK: usra.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = lshr <4 x i32> %tmp2, < i32 31, i32 31, i32 31, i32 31 >
%tmp4 = add <4 x i32> %tmp1, %tmp3
ret <4 x i32> %tmp4
@@ -134,8 +134,8 @@ define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vsraQu64:
;CHECK: usra.2d
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = lshr <2 x i64> %tmp2, < i64 63, i64 63 >
%tmp4 = add <2 x i64> %tmp1, %tmp3
ret <2 x i64> %tmp4
diff --git a/test/CodeGen/AArch64/arm64-vsub.ll b/test/CodeGen/AArch64/arm64-vsub.ll
index c2c8755c0669..6b44b56b7bf0 100644
--- a/test/CodeGen/AArch64/arm64-vsub.ll
+++ b/test/CodeGen/AArch64/arm64-vsub.ll
@@ -3,8 +3,8 @@
define <8 x i8> @subhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: subhn8b:
;CHECK: subhn.8b
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @subhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i16> @subhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: subhn4h:
;CHECK: subhn.4h
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @subhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i32> @subhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: subhn2s:
;CHECK: subhn.2s
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i32> %tmp3
}
@@ -64,8 +64,8 @@ declare <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16>, <8 x i16>) nounwind re
define <8 x i8> @rsubhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: rsubhn8b:
;CHECK: rsubhn.8b
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i8> %tmp3
}
@@ -73,8 +73,8 @@ define <8 x i8> @rsubhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i16> @rsubhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: rsubhn4h:
;CHECK: rsubhn.4h
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i16> %tmp3
}
@@ -82,8 +82,8 @@ define <4 x i16> @rsubhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i32> @rsubhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: rsubhn2s:
;CHECK: rsubhn.2s
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i32> %tmp3
}
@@ -125,8 +125,8 @@ declare <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind r
define <8 x i16> @ssubl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: ssubl8h:
;CHECK: ssubl.8h
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
%tmp5 = sub <8 x i16> %tmp3, %tmp4
@@ -136,8 +136,8 @@ define <8 x i16> @ssubl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @ssubl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: ssubl4s:
;CHECK: ssubl.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
%tmp5 = sub <4 x i32> %tmp3, %tmp4
@@ -147,8 +147,8 @@ define <4 x i32> @ssubl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @ssubl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: ssubl2d:
;CHECK: ssubl.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
%tmp5 = sub <2 x i64> %tmp3, %tmp4
@@ -158,11 +158,11 @@ define <2 x i64> @ssubl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @ssubl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: ssubl2_8h:
;CHECK: ssubl2.8h
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%high1 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%ext1 = sext <8 x i8> %high1 to <8 x i16>
- %tmp2 = load <16 x i8>* %B
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%ext2 = sext <8 x i8> %high2 to <8 x i16>
@@ -173,11 +173,11 @@ define <8 x i16> @ssubl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i32> @ssubl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: ssubl2_4s:
;CHECK: ssubl2.4s
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%high1 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%ext1 = sext <4 x i16> %high1 to <4 x i32>
- %tmp2 = load <8 x i16>* %B
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%ext2 = sext <4 x i16> %high2 to <4 x i32>
@@ -188,11 +188,11 @@ define <4 x i32> @ssubl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i64> @ssubl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: ssubl2_2d:
;CHECK: ssubl2.2d
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%high1 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%ext1 = sext <2 x i32> %high1 to <2 x i64>
- %tmp2 = load <4 x i32>* %B
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%ext2 = sext <2 x i32> %high2 to <2 x i64>
@@ -203,8 +203,8 @@ define <2 x i64> @ssubl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <8 x i16> @usubl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: usubl8h:
;CHECK: usubl.8h
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
%tmp5 = sub <8 x i16> %tmp3, %tmp4
@@ -214,8 +214,8 @@ define <8 x i16> @usubl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @usubl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: usubl4s:
;CHECK: usubl.4s
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
%tmp5 = sub <4 x i32> %tmp3, %tmp4
@@ -225,8 +225,8 @@ define <4 x i32> @usubl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @usubl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: usubl2d:
;CHECK: usubl.2d
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
%tmp5 = sub <2 x i64> %tmp3, %tmp4
@@ -236,11 +236,11 @@ define <2 x i64> @usubl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @usubl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: usubl2_8h:
;CHECK: usubl2.8h
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%high1 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%ext1 = zext <8 x i8> %high1 to <8 x i16>
- %tmp2 = load <16 x i8>* %B
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%ext2 = zext <8 x i8> %high2 to <8 x i16>
@@ -251,11 +251,11 @@ define <8 x i16> @usubl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i32> @usubl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: usubl2_4s:
;CHECK: usubl2.4s
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%high1 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%ext1 = zext <4 x i16> %high1 to <4 x i32>
- %tmp2 = load <8 x i16>* %B
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%ext2 = zext <4 x i16> %high2 to <4 x i32>
@@ -266,11 +266,11 @@ define <4 x i32> @usubl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <2 x i64> @usubl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: usubl2_2d:
;CHECK: usubl2.2d
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%high1 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%ext1 = zext <2 x i32> %high1 to <2 x i64>
- %tmp2 = load <4 x i32>* %B
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%ext2 = zext <2 x i32> %high2 to <2 x i64>
@@ -281,8 +281,8 @@ define <2 x i64> @usubl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <8 x i16> @ssubw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: ssubw8h:
;CHECK: ssubw.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
%tmp4 = sub <8 x i16> %tmp1, %tmp3
ret <8 x i16> %tmp4
@@ -291,8 +291,8 @@ define <8 x i16> @ssubw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @ssubw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: ssubw4s:
;CHECK: ssubw.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
%tmp4 = sub <4 x i32> %tmp1, %tmp3
ret <4 x i32> %tmp4
@@ -301,8 +301,8 @@ define <4 x i32> @ssubw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @ssubw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: ssubw2d:
;CHECK: ssubw.2d
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
%tmp4 = sub <2 x i64> %tmp1, %tmp3
ret <2 x i64> %tmp4
@@ -311,9 +311,9 @@ define <2 x i64> @ssubw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @ssubw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: ssubw2_8h:
;CHECK: ssubw2.8h
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%ext2 = sext <8 x i8> %high2 to <8 x i16>
@@ -324,9 +324,9 @@ define <8 x i16> @ssubw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
define <4 x i32> @ssubw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: ssubw2_4s:
;CHECK: ssubw2.4s
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%ext2 = sext <4 x i16> %high2 to <4 x i32>
@@ -337,9 +337,9 @@ define <4 x i32> @ssubw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
define <2 x i64> @ssubw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: ssubw2_2d:
;CHECK: ssubw2.2d
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%ext2 = sext <2 x i32> %high2 to <2 x i64>
@@ -350,8 +350,8 @@ define <2 x i64> @ssubw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
define <8 x i16> @usubw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: usubw8h:
;CHECK: usubw.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
%tmp4 = sub <8 x i16> %tmp1, %tmp3
ret <8 x i16> %tmp4
@@ -360,8 +360,8 @@ define <8 x i16> @usubw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @usubw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: usubw4s:
;CHECK: usubw.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
%tmp4 = sub <4 x i32> %tmp1, %tmp3
ret <4 x i32> %tmp4
@@ -370,8 +370,8 @@ define <4 x i32> @usubw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @usubw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: usubw2d:
;CHECK: usubw.2d
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
%tmp4 = sub <2 x i64> %tmp1, %tmp3
ret <2 x i64> %tmp4
@@ -380,9 +380,9 @@ define <2 x i64> @usubw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @usubw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: usubw2_8h:
;CHECK: usubw2.8h
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%ext2 = zext <8 x i8> %high2 to <8 x i16>
@@ -393,9 +393,9 @@ define <8 x i16> @usubw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
define <4 x i32> @usubw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: usubw2_4s:
;CHECK: usubw2.4s
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%ext2 = zext <4 x i16> %high2 to <4 x i32>
@@ -406,9 +406,9 @@ define <4 x i32> @usubw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
define <2 x i64> @usubw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: usubw2_2d:
;CHECK: usubw2.2d
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%ext2 = zext <2 x i32> %high2 to <2 x i64>
diff --git a/test/CodeGen/AArch64/arm64-weak-reference.ll b/test/CodeGen/AArch64/arm64-weak-reference.ll
index b2135e0960cb..e8074def4e6e 100644
--- a/test/CodeGen/AArch64/arm64-weak-reference.ll
+++ b/test/CodeGen/AArch64/arm64-weak-reference.ll
@@ -5,6 +5,6 @@
define i32 @fn() nounwind ssp {
; CHECK-LABEL: fn:
; CHECK: .weak_reference
- %val = load i32* @x, align 4
+ %val = load i32, i32* @x, align 4
ret i32 %val
}
diff --git a/test/CodeGen/AArch64/arm64-xaluo.ll b/test/CodeGen/AArch64/arm64-xaluo.ll
index 59ce6848afb9..ce9c0a64b587 100644
--- a/test/CodeGen/AArch64/arm64-xaluo.ll
+++ b/test/CodeGen/AArch64/arm64-xaluo.ll
@@ -1,5 +1,5 @@
; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
;
; Get the actual value of the overflow bit.
diff --git a/test/CodeGen/AArch64/arm64-zextload-unscaled.ll b/test/CodeGen/AArch64/arm64-zextload-unscaled.ll
index c475dbd21eee..321cf10fe45c 100644
--- a/test/CodeGen/AArch64/arm64-zextload-unscaled.ll
+++ b/test/CodeGen/AArch64/arm64-zextload-unscaled.ll
@@ -6,8 +6,8 @@ define void @test_zextloadi1_unscaled(i1* %base) {
; CHECK-LABEL: test_zextloadi1_unscaled:
; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-7]
- %addr = getelementptr i1* %base, i32 -7
- %val = load i1* %addr, align 1
+ %addr = getelementptr i1, i1* %base, i32 -7
+ %val = load i1, i1* %addr, align 1
%extended = zext i1 %val to i32
store i32 %extended, i32* @var32, align 4
@@ -18,8 +18,8 @@ define void @test_zextloadi8_unscaled(i8* %base) {
; CHECK-LABEL: test_zextloadi8_unscaled:
; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-7]
- %addr = getelementptr i8* %base, i32 -7
- %val = load i8* %addr, align 1
+ %addr = getelementptr i8, i8* %base, i32 -7
+ %val = load i8, i8* %addr, align 1
%extended = zext i8 %val to i32
store i32 %extended, i32* @var32, align 4
@@ -30,8 +30,8 @@ define void @test_zextloadi16_unscaled(i16* %base) {
; CHECK-LABEL: test_zextloadi16_unscaled:
; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #-14]
- %addr = getelementptr i16* %base, i32 -7
- %val = load i16* %addr, align 2
+ %addr = getelementptr i16, i16* %base, i32 -7
+ %val = load i16, i16* %addr, align 2
%extended = zext i16 %val to i32
store i32 %extended, i32* @var32, align 4
diff --git a/test/CodeGen/AArch64/arm64-zip.ll b/test/CodeGen/AArch64/arm64-zip.ll
index 304b28099432..ddce002c25db 100644
--- a/test/CodeGen/AArch64/arm64-zip.ll
+++ b/test/CodeGen/AArch64/arm64-zip.ll
@@ -5,8 +5,8 @@ define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: zip1.8b
;CHECK: zip2.8b
;CHECK-NEXT: add.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
%tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -18,8 +18,8 @@ define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: zip1.4h
;CHECK: zip2.4h
;CHECK-NEXT: add.4h
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
%tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -31,8 +31,8 @@ define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK: zip1.16b
;CHECK: zip2.16b
;CHECK-NEXT: add.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
%tmp5 = add <16 x i8> %tmp3, %tmp4
@@ -44,8 +44,8 @@ define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: zip1.8h
;CHECK: zip2.8h
;CHECK-NEXT: add.8h
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
%tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -57,8 +57,8 @@ define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK: zip1.4s
;CHECK: zip2.4s
;CHECK-NEXT: add.4s
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
%tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -70,8 +70,8 @@ define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK: zip1.4s
;CHECK: zip2.4s
;CHECK-NEXT: fadd.4s
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
%tmp5 = fadd <4 x float> %tmp3, %tmp4
@@ -85,8 +85,8 @@ define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: zip1.8b
;CHECK: zip2.8b
;CHECK-NEXT: add.8b
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11>
%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
%tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -98,8 +98,8 @@ define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK: zip1.16b
;CHECK: zip2.16b
;CHECK-NEXT: add.16b
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
%tmp5 = add <16 x i8> %tmp3, %tmp4
diff --git a/test/CodeGen/AArch64/assertion-rc-mismatch.ll b/test/CodeGen/AArch64/assertion-rc-mismatch.ll
index bcf206ec9bed..c216c448e11b 100644
--- a/test/CodeGen/AArch64/assertion-rc-mismatch.ll
+++ b/test/CodeGen/AArch64/assertion-rc-mismatch.ll
@@ -11,8 +11,8 @@ if:
br label %end
else:
%tmp3 = call i8* @llvm.returnaddress(i32 0)
- %ptr = getelementptr inbounds i8* %tmp3, i64 -16
- %ld = load i8* %ptr, align 4
+ %ptr = getelementptr inbounds i8, i8* %tmp3, i64 -16
+ %ld = load i8, i8* %ptr, align 4
%tmp2 = inttoptr i8 %ld to i8*
br label %end
end:
diff --git a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
index da095a0a42c5..44e777161e8e 100644
--- a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
+++ b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
@@ -4,7 +4,7 @@ define i32 @foo(i32* %var, i1 %cond) {
; CHECK-LABEL: foo:
br i1 %cond, label %atomic_ver, label %simple_ver
simple_ver:
- %oldval = load i32* %var
+ %oldval = load i32, i32* %var
%newval = add nsw i32 %oldval, -1
store i32 %newval, i32* %var
br label %somewhere
diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll
index ef209e9c6e5b..cb90caeadc1f 100644
--- a/test/CodeGen/AArch64/atomic-ops.ll
+++ b/test/CodeGen/AArch64/atomic-ops.ll
@@ -972,7 +972,7 @@ define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
define i8 @test_atomic_load_monotonic_i8() nounwind {
; CHECK-LABEL: test_atomic_load_monotonic_i8:
- %val = load atomic i8* @var8 monotonic, align 1
+ %val = load atomic i8, i8* @var8 monotonic, align 1
; CHECK-NOT: dmb
; CHECK: adrp x[[HIADDR:[0-9]+]], var8
; CHECK: ldrb w0, [x[[HIADDR]], {{#?}}:lo12:var8]
@@ -986,7 +986,7 @@ define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
%addr_int = add i64 %base, %off
%addr = inttoptr i64 %addr_int to i8*
- %val = load atomic i8* %addr monotonic, align 1
+ %val = load atomic i8, i8* %addr monotonic, align 1
; CHECK-NOT: dmb
; CHECK: ldrb w0, [x0, x1]
; CHECK-NOT: dmb
@@ -996,7 +996,7 @@ define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
define i8 @test_atomic_load_acquire_i8() nounwind {
; CHECK-LABEL: test_atomic_load_acquire_i8:
- %val = load atomic i8* @var8 acquire, align 1
+ %val = load atomic i8, i8* @var8 acquire, align 1
; CHECK-NOT: dmb
; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
; CHECK-NOT: dmb
@@ -1009,7 +1009,7 @@ define i8 @test_atomic_load_acquire_i8() nounwind {
define i8 @test_atomic_load_seq_cst_i8() nounwind {
; CHECK-LABEL: test_atomic_load_seq_cst_i8:
- %val = load atomic i8* @var8 seq_cst, align 1
+ %val = load atomic i8, i8* @var8 seq_cst, align 1
; CHECK-NOT: dmb
; CHECK: adrp [[HIADDR:x[0-9]+]], var8
; CHECK-NOT: dmb
@@ -1022,7 +1022,7 @@ define i8 @test_atomic_load_seq_cst_i8() nounwind {
define i16 @test_atomic_load_monotonic_i16() nounwind {
; CHECK-LABEL: test_atomic_load_monotonic_i16:
- %val = load atomic i16* @var16 monotonic, align 2
+ %val = load atomic i16, i16* @var16 monotonic, align 2
; CHECK-NOT: dmb
; CHECK: adrp x[[HIADDR:[0-9]+]], var16
; CHECK-NOT: dmb
@@ -1037,7 +1037,7 @@ define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind
%addr_int = add i64 %base, %off
%addr = inttoptr i64 %addr_int to i32*
- %val = load atomic i32* %addr monotonic, align 4
+ %val = load atomic i32, i32* %addr monotonic, align 4
; CHECK-NOT: dmb
; CHECK: ldr w0, [x0, x1]
; CHECK-NOT: dmb
@@ -1047,7 +1047,7 @@ define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind
define i64 @test_atomic_load_seq_cst_i64() nounwind {
; CHECK-LABEL: test_atomic_load_seq_cst_i64:
- %val = load atomic i64* @var64 seq_cst, align 8
+ %val = load atomic i64, i64* @var64 seq_cst, align 8
; CHECK-NOT: dmb
; CHECK: adrp [[HIADDR:x[0-9]+]], var64
; CHECK-NOT: dmb
diff --git a/test/CodeGen/AArch64/basic-pic.ll b/test/CodeGen/AArch64/basic-pic.ll
index 62d41bcead6b..8765a6d1267c 100644
--- a/test/CodeGen/AArch64/basic-pic.ll
+++ b/test/CodeGen/AArch64/basic-pic.ll
@@ -5,7 +5,7 @@
define i32 @get_globalvar() {
; CHECK-LABEL: get_globalvar:
- %val = load i32* @var
+ %val = load i32, i32* @var
; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
; CHECK: ldr x[[GOTLOC:[0-9]+]], [x[[GOTHI]], {{#?}}:got_lo12:var]
; CHECK: ldr w0, [x[[GOTLOC]]]
@@ -16,7 +16,7 @@ define i32 @get_globalvar() {
define i32* @get_globalvaraddr() {
; CHECK-LABEL: get_globalvaraddr:
- %val = load i32* @var
+ %val = load i32, i32* @var
; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
; CHECK: ldr x0, [x[[GOTHI]], {{#?}}:got_lo12:var]
@@ -28,7 +28,7 @@ define i32* @get_globalvaraddr() {
define i32 @get_hiddenvar() {
; CHECK-LABEL: get_hiddenvar:
- %val = load i32* @hiddenvar
+ %val = load i32, i32* @hiddenvar
; CHECK: adrp x[[HI:[0-9]+]], hiddenvar
; CHECK: ldr w0, [x[[HI]], {{#?}}:lo12:hiddenvar]
@@ -38,7 +38,7 @@ define i32 @get_hiddenvar() {
define i32* @get_hiddenvaraddr() {
; CHECK-LABEL: get_hiddenvaraddr:
- %val = load i32* @hiddenvar
+ %val = load i32, i32* @hiddenvar
; CHECK: adrp [[HI:x[0-9]+]], hiddenvar
; CHECK: add x0, [[HI]], {{#?}}:lo12:hiddenvar
diff --git a/test/CodeGen/AArch64/bitcast.ll b/test/CodeGen/AArch64/bitcast.ll
new file mode 100644
index 000000000000..e88ea9ec0213
--- /dev/null
+++ b/test/CodeGen/AArch64/bitcast.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s
+
+; PR23065: SCALAR_TO_VECTOR implies the top elements 1 to N-1 of the N-element vector are undefined.
+
+define <4 x i16> @foo1(<2 x i32> %a) {
+; CHECK-LABEL: foo1:
+; CHECK: movi d0, #0000000000000000
+; CHECK-NEXT: ret
+
+ %1 = shufflevector <2 x i32> <i32 58712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>
+; Can't optimize the following bitcast to scalar_to_vector.
+ %2 = bitcast <2 x i32> %1 to <4 x i16>
+ %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ ret <4 x i16> %3
+}
+
+define <4 x i16> @foo2(<2 x i32> %a) {
+; CHECK-LABEL: foo2:
+; CHECK: movi d0, #0000000000000000
+; CHECK-NEXT: ret
+
+ %1 = shufflevector <2 x i32> <i32 712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>
+; Can't optimize the following bitcast to scalar_to_vector.
+ %2 = bitcast <2 x i32> %1 to <4 x i16>
+ %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ ret <4 x i16> %3
+}
diff --git a/test/CodeGen/AArch64/bitfield-insert-0.ll b/test/CodeGen/AArch64/bitfield-insert-0.ll
index da0ed8af3126..21f3895154c7 100644
--- a/test/CodeGen/AArch64/bitfield-insert-0.ll
+++ b/test/CodeGen/AArch64/bitfield-insert-0.ll
@@ -6,10 +6,10 @@
define void @test_bfi0(i32* %existing, i32* %new) {
; CHECK: bfxil {{w[0-9]+}}, {{w[0-9]+}}, #0, #18
- %oldval = load volatile i32* %existing
+ %oldval = load volatile i32, i32* %existing
%oldval_keep = and i32 %oldval, 4294705152 ; 0xfffc_0000
- %newval = load volatile i32* %new
+ %newval = load volatile i32, i32* %new
%newval_masked = and i32 %newval, 262143 ; = 0x0003_ffff
%combined = or i32 %newval_masked, %oldval_keep
diff --git a/test/CodeGen/AArch64/bitfield-insert.ll b/test/CodeGen/AArch64/bitfield-insert.ll
index 2369a55aa92d..9b731fa72a47 100644
--- a/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/test/CodeGen/AArch64/bitfield-insert.ll
@@ -28,10 +28,10 @@ define void @test_whole32(i32* %existing, i32* %new) {
; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #26, #5
- %oldval = load volatile i32* %existing
+ %oldval = load volatile i32, i32* %existing
%oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff
- %newval = load volatile i32* %new
+ %newval = load volatile i32, i32* %new
%newval_shifted = shl i32 %newval, 26
%newval_masked = and i32 %newval_shifted, 2080374784 ; = 0x7c000000
@@ -47,10 +47,10 @@ define void @test_whole64(i64* %existing, i64* %new) {
; CHECK-NOT: and
; CHECK: ret
- %oldval = load volatile i64* %existing
+ %oldval = load volatile i64, i64* %existing
%oldval_keep = and i64 %oldval, 18446742974265032703 ; = 0xffffff0003ffffffL
- %newval = load volatile i64* %new
+ %newval = load volatile i64, i64* %new
%newval_shifted = shl i64 %newval, 26
%newval_masked = and i64 %newval_shifted, 1099444518912 ; = 0xfffc000000
@@ -68,10 +68,10 @@ define void @test_whole32_from64(i64* %existing, i64* %new) {
; CHECK: ret
- %oldval = load volatile i64* %existing
+ %oldval = load volatile i64, i64* %existing
%oldval_keep = and i64 %oldval, 4294901760 ; = 0xffff0000
- %newval = load volatile i64* %new
+ %newval = load volatile i64, i64* %new
%newval_masked = and i64 %newval, 65535 ; = 0xffff
%combined = or i64 %oldval_keep, %newval_masked
@@ -86,10 +86,10 @@ define void @test_32bit_masked(i32 *%existing, i32 *%new) {
; CHECK: and
; CHECK: bfi [[INSERT:w[0-9]+]], {{w[0-9]+}}, #3, #4
- %oldval = load volatile i32* %existing
+ %oldval = load volatile i32, i32* %existing
%oldval_keep = and i32 %oldval, 135 ; = 0x87
- %newval = load volatile i32* %new
+ %newval = load volatile i32, i32* %new
%newval_shifted = shl i32 %newval, 3
%newval_masked = and i32 %newval_shifted, 120 ; = 0x78
@@ -104,10 +104,10 @@ define void @test_64bit_masked(i64 *%existing, i64 *%new) {
; CHECK: and
; CHECK: bfi [[INSERT:x[0-9]+]], {{x[0-9]+}}, #40, #8
- %oldval = load volatile i64* %existing
+ %oldval = load volatile i64, i64* %existing
%oldval_keep = and i64 %oldval, 1095216660480 ; = 0xff_0000_0000
- %newval = load volatile i64* %new
+ %newval = load volatile i64, i64* %new
%newval_shifted = shl i64 %newval, 40
%newval_masked = and i64 %newval_shifted, 280375465082880 ; = 0xff00_0000_0000
@@ -124,10 +124,10 @@ define void @test_32bit_complexmask(i32 *%existing, i32 *%new) {
; CHECK: and
; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #3, #4
- %oldval = load volatile i32* %existing
+ %oldval = load volatile i32, i32* %existing
%oldval_keep = and i32 %oldval, 647 ; = 0x287
- %newval = load volatile i32* %new
+ %newval = load volatile i32, i32* %new
%newval_shifted = shl i32 %newval, 3
%newval_masked = and i32 %newval_shifted, 120 ; = 0x278
@@ -144,10 +144,10 @@ define void @test_32bit_badmask(i32 *%existing, i32 *%new) {
; CHECK-NOT: bfm
; CHECK: ret
- %oldval = load volatile i32* %existing
+ %oldval = load volatile i32, i32* %existing
%oldval_keep = and i32 %oldval, 135 ; = 0x87
- %newval = load volatile i32* %new
+ %newval = load volatile i32, i32* %new
%newval_shifted = shl i32 %newval, 3
%newval_masked = and i32 %newval_shifted, 632 ; = 0x278
@@ -164,10 +164,10 @@ define void @test_64bit_badmask(i64 *%existing, i64 *%new) {
; CHECK-NOT: bfm
; CHECK: ret
- %oldval = load volatile i64* %existing
+ %oldval = load volatile i64, i64* %existing
%oldval_keep = and i64 %oldval, 135 ; = 0x87
- %newval = load volatile i64* %new
+ %newval = load volatile i64, i64* %new
%newval_shifted = shl i64 %newval, 3
%newval_masked = and i64 %newval_shifted, 664 ; = 0x278
@@ -182,10 +182,10 @@ define void @test_64bit_badmask(i64 *%existing, i64 *%new) {
define void @test_32bit_with_shr(i32* %existing, i32* %new) {
; CHECK-LABEL: test_32bit_with_shr:
- %oldval = load volatile i32* %existing
+ %oldval = load volatile i32, i32* %existing
%oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff
- %newval = load i32* %new
+ %newval = load i32, i32* %new
%newval_shifted = shl i32 %newval, 12
%newval_masked = and i32 %newval_shifted, 2080374784 ; = 0x7c000000
diff --git a/test/CodeGen/AArch64/bitfield.ll b/test/CodeGen/AArch64/bitfield.ll
index 0e1265372bd8..78399c80b5de 100644
--- a/test/CodeGen/AArch64/bitfield.ll
+++ b/test/CodeGen/AArch64/bitfield.ll
@@ -180,7 +180,7 @@ define i32 @test_ubfx32(i32* %addr) {
; CHECK-LABEL: test_ubfx32:
; CHECK: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #23, #3
- %fields = load i32* %addr
+ %fields = load i32, i32* %addr
%shifted = lshr i32 %fields, 23
%masked = and i32 %shifted, 7
ret i32 %masked
@@ -189,7 +189,7 @@ define i32 @test_ubfx32(i32* %addr) {
define i64 @test_ubfx64(i64* %addr) {
; CHECK-LABEL: test_ubfx64:
; CHECK: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #25, #10
- %fields = load i64* %addr
+ %fields = load i64, i64* %addr
%shifted = lshr i64 %fields, 25
%masked = and i64 %shifted, 1023
ret i64 %masked
@@ -199,7 +199,7 @@ define i32 @test_sbfx32(i32* %addr) {
; CHECK-LABEL: test_sbfx32:
; CHECK: sbfx {{w[0-9]+}}, {{w[0-9]+}}, #6, #3
- %fields = load i32* %addr
+ %fields = load i32, i32* %addr
%shifted = shl i32 %fields, 23
%extended = ashr i32 %shifted, 29
ret i32 %extended
@@ -209,7 +209,7 @@ define i64 @test_sbfx64(i64* %addr) {
; CHECK-LABEL: test_sbfx64:
; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #63
- %fields = load i64* %addr
+ %fields = load i64, i64* %addr
%shifted = shl i64 %fields, 1
%extended = ashr i64 %shifted, 1
ret i64 %extended
diff --git a/test/CodeGen/AArch64/blockaddress.ll b/test/CodeGen/AArch64/blockaddress.ll
index 3a5dbdc945ca..e93c69fd3ea3 100644
--- a/test/CodeGen/AArch64/blockaddress.ll
+++ b/test/CodeGen/AArch64/blockaddress.ll
@@ -6,7 +6,7 @@
define void @test_blockaddress() {
; CHECK-LABEL: test_blockaddress:
store volatile i8* blockaddress(@test_blockaddress, %block), i8** @addr
- %val = load volatile i8** @addr
+ %val = load volatile i8*, i8** @addr
indirectbr i8* %val, [label %block]
; CHECK: adrp [[DEST_HI:x[0-9]+]], [[DEST_LBL:.Ltmp[0-9]+]]
; CHECK: add [[DEST:x[0-9]+]], [[DEST_HI]], {{#?}}:lo12:[[DEST_LBL]]
diff --git a/test/CodeGen/AArch64/bool-loads.ll b/test/CodeGen/AArch64/bool-loads.ll
index 881aeaa15dd5..b0ee1b4aec54 100644
--- a/test/CodeGen/AArch64/bool-loads.ll
+++ b/test/CodeGen/AArch64/bool-loads.ll
@@ -5,7 +5,7 @@
define i32 @test_sextloadi32() {
; CHECK-LABEL: test_sextloadi32
- %val = load i1* @var
+ %val = load i1, i1* @var
%ret = sext i1 %val to i32
; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var]
; CHECK: {{sbfx x[0-9]+, x[0-9]+, #0, #1|sbfx w[0-9]+, w[0-9]+, #0, #1}}
@@ -17,7 +17,7 @@ define i32 @test_sextloadi32() {
define i64 @test_sextloadi64() {
; CHECK-LABEL: test_sextloadi64
- %val = load i1* @var
+ %val = load i1, i1* @var
%ret = sext i1 %val to i64
; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var]
; CHECK: {{sbfx x[0-9]+, x[0-9]+, #0, #1}}
@@ -32,7 +32,7 @@ define i32 @test_zextloadi32() {
; It's not actually necessary that "ret" is next, but as far as LLVM
; is concerned only 0 or 1 should be loadable so no extension is
; necessary.
- %val = load i1* @var
+ %val = load i1, i1* @var
%ret = zext i1 %val to i32
; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var]
@@ -46,7 +46,7 @@ define i64 @test_zextloadi64() {
; It's not actually necessary that "ret" is next, but as far as LLVM
; is concerned only 0 or 1 should be loadable so no extension is
; necessary.
- %val = load i1* @var
+ %val = load i1, i1* @var
%ret = zext i1 %val to i64
; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var]
diff --git a/test/CodeGen/AArch64/br-to-eh-lpad.ll b/test/CodeGen/AArch64/br-to-eh-lpad.ll
index 20bffd9e4129..f304ba4ca286 100644
--- a/test/CodeGen/AArch64/br-to-eh-lpad.ll
+++ b/test/CodeGen/AArch64/br-to-eh-lpad.ll
@@ -30,12 +30,12 @@ invoke.cont7:
unreachable
if.end50.thread:
- tail call void (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @.str1, i64 0, i64 0), i32 125)
- tail call void (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @.str1, i64 0, i64 0), i32 128)
+ tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str1, i64 0, i64 0), i32 125)
+ tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str1, i64 0, i64 0), i32 128)
unreachable
invoke.cont33:
- tail call void (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @.str1, i64 0, i64 0), i32 119)
+ tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str1, i64 0, i64 0), i32 119)
unreachable
invoke.cont41:
@@ -51,7 +51,7 @@ lpad40:
br label %finally.catchall
finally.catchall:
- tail call void (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @.str1, i64 0, i64 0), i32 125)
+ tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str1, i64 0, i64 0), i32 125)
unreachable
}
diff --git a/test/CodeGen/AArch64/breg.ll b/test/CodeGen/AArch64/breg.ll
index 952404495ce5..42061a851db2 100644
--- a/test/CodeGen/AArch64/breg.ll
+++ b/test/CodeGen/AArch64/breg.ll
@@ -4,7 +4,7 @@
define void @foo() {
; CHECK-LABEL: foo:
- %lab = load i8** @stored_label
+ %lab = load i8*, i8** @stored_label
indirectbr i8* %lab, [label %otherlab, label %retlab]
; CHECK: adrp {{x[0-9]+}}, stored_label
; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:stored_label]
diff --git a/test/CodeGen/AArch64/callee-save.ll b/test/CodeGen/AArch64/callee-save.ll
index 046e6ceac077..123403988d44 100644
--- a/test/CodeGen/AArch64/callee-save.ll
+++ b/test/CodeGen/AArch64/callee-save.ll
@@ -12,38 +12,38 @@ define void @foo() {
; Create lots of live variables to exhaust the supply of
; caller-saved registers
- %val1 = load volatile float* @var
- %val2 = load volatile float* @var
- %val3 = load volatile float* @var
- %val4 = load volatile float* @var
- %val5 = load volatile float* @var
- %val6 = load volatile float* @var
- %val7 = load volatile float* @var
- %val8 = load volatile float* @var
- %val9 = load volatile float* @var
- %val10 = load volatile float* @var
- %val11 = load volatile float* @var
- %val12 = load volatile float* @var
- %val13 = load volatile float* @var
- %val14 = load volatile float* @var
- %val15 = load volatile float* @var
- %val16 = load volatile float* @var
- %val17 = load volatile float* @var
- %val18 = load volatile float* @var
- %val19 = load volatile float* @var
- %val20 = load volatile float* @var
- %val21 = load volatile float* @var
- %val22 = load volatile float* @var
- %val23 = load volatile float* @var
- %val24 = load volatile float* @var
- %val25 = load volatile float* @var
- %val26 = load volatile float* @var
- %val27 = load volatile float* @var
- %val28 = load volatile float* @var
- %val29 = load volatile float* @var
- %val30 = load volatile float* @var
- %val31 = load volatile float* @var
- %val32 = load volatile float* @var
+ %val1 = load volatile float, float* @var
+ %val2 = load volatile float, float* @var
+ %val3 = load volatile float, float* @var
+ %val4 = load volatile float, float* @var
+ %val5 = load volatile float, float* @var
+ %val6 = load volatile float, float* @var
+ %val7 = load volatile float, float* @var
+ %val8 = load volatile float, float* @var
+ %val9 = load volatile float, float* @var
+ %val10 = load volatile float, float* @var
+ %val11 = load volatile float, float* @var
+ %val12 = load volatile float, float* @var
+ %val13 = load volatile float, float* @var
+ %val14 = load volatile float, float* @var
+ %val15 = load volatile float, float* @var
+ %val16 = load volatile float, float* @var
+ %val17 = load volatile float, float* @var
+ %val18 = load volatile float, float* @var
+ %val19 = load volatile float, float* @var
+ %val20 = load volatile float, float* @var
+ %val21 = load volatile float, float* @var
+ %val22 = load volatile float, float* @var
+ %val23 = load volatile float, float* @var
+ %val24 = load volatile float, float* @var
+ %val25 = load volatile float, float* @var
+ %val26 = load volatile float, float* @var
+ %val27 = load volatile float, float* @var
+ %val28 = load volatile float, float* @var
+ %val29 = load volatile float, float* @var
+ %val30 = load volatile float, float* @var
+ %val31 = load volatile float, float* @var
+ %val32 = load volatile float, float* @var
store volatile float %val1, float* @var
store volatile float %val2, float* @var
diff --git a/test/CodeGen/AArch64/cmpwithshort.ll b/test/CodeGen/AArch64/cmpwithshort.ll
index 14efdcc9d188..65909974af73 100644
--- a/test/CodeGen/AArch64/cmpwithshort.ll
+++ b/test/CodeGen/AArch64/cmpwithshort.ll
@@ -5,8 +5,8 @@ define i16 @test_1cmp_signed_1(i16* %ptr1) {
; CHECK: ldrsh
; CHECK-NEXT: cmn
entry:
- %addr = getelementptr inbounds i16* %ptr1, i16 0
- %val = load i16* %addr, align 2
+ %addr = getelementptr inbounds i16, i16* %ptr1, i16 0
+ %val = load i16, i16* %addr, align 2
%cmp = icmp eq i16 %val, -1
br i1 %cmp, label %if, label %if.then
if:
@@ -20,8 +20,8 @@ define i16 @test_1cmp_signed_2(i16* %ptr1) {
; CHECK: ldrsh
; CHECK-NEXT: cmn
entry:
- %addr = getelementptr inbounds i16* %ptr1, i16 0
- %val = load i16* %addr, align 2
+ %addr = getelementptr inbounds i16, i16* %ptr1, i16 0
+ %val = load i16, i16* %addr, align 2
%cmp = icmp sge i16 %val, -1
br i1 %cmp, label %if, label %if.then
if:
@@ -35,8 +35,8 @@ define i16 @test_1cmp_unsigned_1(i16* %ptr1) {
; CHECK: ldrsh
; CHECK-NEXT: cmn
entry:
- %addr = getelementptr inbounds i16* %ptr1, i16 0
- %val = load i16* %addr, align 2
+ %addr = getelementptr inbounds i16, i16* %ptr1, i16 0
+ %val = load i16, i16* %addr, align 2
%cmp = icmp uge i16 %val, -1
br i1 %cmp, label %if, label %if.then
if:
diff --git a/test/CodeGen/AArch64/code-model-large-abs.ll b/test/CodeGen/AArch64/code-model-large-abs.ll
index ca92500855b4..1680815d93ea 100644
--- a/test/CodeGen/AArch64/code-model-large-abs.ll
+++ b/test/CodeGen/AArch64/code-model-large-abs.ll
@@ -18,7 +18,7 @@ define i8* @global_addr() {
define i8 @global_i8() {
; CHECK-LABEL: global_i8:
- %val = load i8* @var8
+ %val = load i8, i8* @var8
ret i8 %val
; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var8
; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var8
@@ -29,7 +29,7 @@ define i8 @global_i8() {
define i16 @global_i16() {
; CHECK-LABEL: global_i16:
- %val = load i16* @var16
+ %val = load i16, i16* @var16
ret i16 %val
; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var16
; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var16
@@ -40,7 +40,7 @@ define i16 @global_i16() {
define i32 @global_i32() {
; CHECK-LABEL: global_i32:
- %val = load i32* @var32
+ %val = load i32, i32* @var32
ret i32 %val
; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var32
; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var32
@@ -51,7 +51,7 @@ define i32 @global_i32() {
define i64 @global_i64() {
; CHECK-LABEL: global_i64:
- %val = load i64* @var64
+ %val = load i64, i64* @var64
ret i64 %val
; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var64
; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var64
diff --git a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
index df8dc87176cc..c78fabac6187 100644
--- a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
+++ b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
@@ -15,13 +15,13 @@ define i32 @combine_gt_ge_10() #0 {
; CHECK-NOT: cmp
; CHECK: b.lt
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%cmp = icmp sgt i32 %0, 10
br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
land.lhs.true: ; preds = %entry
- %1 = load i32* @b, align 4
- %2 = load i32* @c, align 4
+ %1 = load i32, i32* @b, align 4
+ %2 = load i32, i32* @c, align 4
%cmp1 = icmp eq i32 %1, %2
br i1 %cmp1, label %return, label %land.lhs.true3
@@ -30,8 +30,8 @@ lor.lhs.false: ; preds = %entry
br i1 %cmp2, label %land.lhs.true3, label %if.end
land.lhs.true3: ; preds = %lor.lhs.false, %land.lhs.true
- %3 = load i32* @b, align 4
- %4 = load i32* @d, align 4
+ %3 = load i32, i32* @b, align 4
+ %4 = load i32, i32* @d, align 4
%cmp4 = icmp eq i32 %3, %4
br i1 %cmp4, label %return, label %if.end
@@ -52,13 +52,13 @@ define i32 @combine_gt_lt_5() #0 {
; CHECK-NOT: cmp
; CHECK: b.ge
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%cmp = icmp sgt i32 %0, 5
br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
land.lhs.true: ; preds = %entry
- %1 = load i32* @b, align 4
- %2 = load i32* @c, align 4
+ %1 = load i32, i32* @b, align 4
+ %2 = load i32, i32* @c, align 4
%cmp1 = icmp eq i32 %1, %2
br i1 %cmp1, label %return, label %if.end
@@ -67,8 +67,8 @@ lor.lhs.false: ; preds = %entry
br i1 %cmp2, label %land.lhs.true3, label %if.end
land.lhs.true3: ; preds = %lor.lhs.false
- %3 = load i32* @b, align 4
- %4 = load i32* @d, align 4
+ %3 = load i32, i32* @b, align 4
+ %4 = load i32, i32* @d, align 4
%cmp4 = icmp eq i32 %3, %4
br i1 %cmp4, label %return, label %if.end
@@ -89,13 +89,13 @@ define i32 @combine_lt_ge_5() #0 {
; CHECK-NOT: cmp
; CHECK: b.gt
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%cmp = icmp slt i32 %0, 5
br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
land.lhs.true: ; preds = %entry
- %1 = load i32* @b, align 4
- %2 = load i32* @c, align 4
+ %1 = load i32, i32* @b, align 4
+ %2 = load i32, i32* @c, align 4
%cmp1 = icmp eq i32 %1, %2
br i1 %cmp1, label %return, label %land.lhs.true3
@@ -104,8 +104,8 @@ lor.lhs.false: ; preds = %entry
br i1 %cmp2, label %land.lhs.true3, label %if.end
land.lhs.true3: ; preds = %lor.lhs.false, %land.lhs.true
- %3 = load i32* @b, align 4
- %4 = load i32* @d, align 4
+ %3 = load i32, i32* @b, align 4
+ %4 = load i32, i32* @d, align 4
%cmp4 = icmp eq i32 %3, %4
br i1 %cmp4, label %return, label %if.end
@@ -126,13 +126,13 @@ define i32 @combine_lt_gt_5() #0 {
; CHECK-NOT: cmp
; CHECK: b.le
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%cmp = icmp slt i32 %0, 5
br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
land.lhs.true: ; preds = %entry
- %1 = load i32* @b, align 4
- %2 = load i32* @c, align 4
+ %1 = load i32, i32* @b, align 4
+ %2 = load i32, i32* @c, align 4
%cmp1 = icmp eq i32 %1, %2
br i1 %cmp1, label %return, label %if.end
@@ -141,8 +141,8 @@ lor.lhs.false: ; preds = %entry
br i1 %cmp2, label %land.lhs.true3, label %if.end
land.lhs.true3: ; preds = %lor.lhs.false
- %3 = load i32* @b, align 4
- %4 = load i32* @d, align 4
+ %3 = load i32, i32* @b, align 4
+ %4 = load i32, i32* @d, align 4
%cmp4 = icmp eq i32 %3, %4
br i1 %cmp4, label %return, label %if.end
@@ -163,13 +163,13 @@ define i32 @combine_gt_lt_n5() #0 {
; CHECK-NOT: cmn
; CHECK: b.ge
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%cmp = icmp sgt i32 %0, -5
br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
land.lhs.true: ; preds = %entry
- %1 = load i32* @b, align 4
- %2 = load i32* @c, align 4
+ %1 = load i32, i32* @b, align 4
+ %2 = load i32, i32* @c, align 4
%cmp1 = icmp eq i32 %1, %2
br i1 %cmp1, label %return, label %if.end
@@ -178,8 +178,8 @@ lor.lhs.false: ; preds = %entry
br i1 %cmp2, label %land.lhs.true3, label %if.end
land.lhs.true3: ; preds = %lor.lhs.false
- %3 = load i32* @b, align 4
- %4 = load i32* @d, align 4
+ %3 = load i32, i32* @b, align 4
+ %4 = load i32, i32* @d, align 4
%cmp4 = icmp eq i32 %3, %4
br i1 %cmp4, label %return, label %if.end
@@ -200,13 +200,13 @@ define i32 @combine_lt_gt_n5() #0 {
; CHECK-NOT: cmn
; CHECK: b.le
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%cmp = icmp slt i32 %0, -5
br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
land.lhs.true: ; preds = %entry
- %1 = load i32* @b, align 4
- %2 = load i32* @c, align 4
+ %1 = load i32, i32* @b, align 4
+ %2 = load i32, i32* @c, align 4
%cmp1 = icmp eq i32 %1, %2
br i1 %cmp1, label %return, label %if.end
@@ -215,8 +215,8 @@ lor.lhs.false: ; preds = %entry
br i1 %cmp2, label %land.lhs.true3, label %if.end
land.lhs.true3: ; preds = %lor.lhs.false
- %3 = load i32* @b, align 4
- %4 = load i32* @d, align 4
+ %3 = load i32, i32* @b, align 4
+ %4 = load i32, i32* @d, align 4
%cmp4 = icmp eq i32 %3, %4
br i1 %cmp4, label %return, label %if.end
@@ -237,18 +237,18 @@ declare %struct.Struct* @Update(%struct.Struct*) #1
; no checks for this case, it just should be processed without errors
define void @combine_non_adjacent_cmp_br(%struct.Struct* nocapture readonly %hdCall) #0 {
entry:
- %size = getelementptr inbounds %struct.Struct* %hdCall, i64 0, i32 0
- %0 = load i64* %size, align 8
+ %size = getelementptr inbounds %struct.Struct, %struct.Struct* %hdCall, i64 0, i32 0
+ %0 = load i64, i64* %size, align 8
br label %land.rhs
land.rhs:
%rp.06 = phi i64 [ %0, %entry ], [ %sub, %while.body ]
- %1 = load i64* inttoptr (i64 24 to i64*), align 8
+ %1 = load i64, i64* inttoptr (i64 24 to i64*), align 8
%cmp2 = icmp sgt i64 %1, 0
br i1 %cmp2, label %while.body, label %while.end
while.body:
- %2 = load %struct.Struct** @glob, align 8
+ %2 = load %struct.Struct*, %struct.Struct** @glob, align 8
%call = tail call %struct.Struct* @Update(%struct.Struct* %2) #2
%sub = add nsw i64 %rp.06, -2
%cmp = icmp slt i64 %0, %rp.06
@@ -268,7 +268,7 @@ define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 {
; CHECK: cmp
; CHECK: b.gt
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%cmp4 = icmp slt i32 %0, -1
br i1 %cmp4, label %while.body.preheader, label %while.end
@@ -283,7 +283,7 @@ while.body: ; preds = %while.body, %while.
br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge
while.cond.while.end_crit_edge: ; preds = %while.body
- %.pre = load i32* @a, align 4
+ %.pre = load i32, i32* @a, align 4
br label %while.end
while.end: ; preds = %while.cond.while.end_crit_edge, %entry
@@ -292,8 +292,8 @@ while.end: ; preds = %while.cond.while.en
br i1 %cmp1, label %land.lhs.true, label %if.end
land.lhs.true: ; preds = %while.end
- %2 = load i32* @b, align 4
- %3 = load i32* @d, align 4
+ %2 = load i32, i32* @b, align 4
+ %3 = load i32, i32* @d, align 4
%cmp2 = icmp eq i32 %2, %3
br i1 %cmp2, label %return, label %if.end
@@ -312,7 +312,7 @@ define i32 @do_nothing_if_compares_can_not_be_adjusted_to_each_other() #0 {
; CHECK: cmn
; CHECK: b.lt
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%cmp4 = icmp slt i32 %0, 1
br i1 %cmp4, label %while.body.preheader, label %while.end
@@ -330,13 +330,13 @@ while.end.loopexit: ; preds = %while.body
br label %while.end
while.end: ; preds = %while.end.loopexit, %entry
- %1 = load i32* @c, align 4
+ %1 = load i32, i32* @c, align 4
%cmp1 = icmp sgt i32 %1, -3
br i1 %cmp1, label %land.lhs.true, label %if.end
land.lhs.true: ; preds = %while.end
- %2 = load i32* @b, align 4
- %3 = load i32* @d, align 4
+ %2 = load i32, i32* @b, align 4
+ %3 = load i32, i32* @d, align 4
%cmp2 = icmp eq i32 %2, %3
br i1 %cmp2, label %return, label %if.end
@@ -366,7 +366,6 @@ define i32 @fcmpri(i32 %argc, i8** nocapture readonly %argv) {
; CHECK-LABEL-DAG: .LBB9_3
; CHECK: cmp w19, #0
; CHECK: fcmp d8, #0.0
-; CHECK: b.gt .LBB9_5
; CHECK-NOT: cmp w19, #1
; CHECK-NOT: b.ge .LBB9_5
@@ -375,8 +374,8 @@ entry:
br i1 %cmp, label %land.lhs.true, label %if.end
land.lhs.true: ; preds = %entry
- %arrayidx = getelementptr inbounds i8** %argv, i64 1
- %0 = load i8** %arrayidx, align 8
+ %arrayidx = getelementptr inbounds i8*, i8** %argv, i64 1
+ %0 = load i8*, i8** %arrayidx, align 8
%cmp1 = icmp eq i8* %0, null
br i1 %cmp1, label %if.end, label %return
diff --git a/test/CodeGen/AArch64/compare-branch.ll b/test/CodeGen/AArch64/compare-branch.ll
index a1a87cf51a1a..4e0f69d195c2 100644
--- a/test/CodeGen/AArch64/compare-branch.ll
+++ b/test/CodeGen/AArch64/compare-branch.ll
@@ -6,25 +6,25 @@
define void @foo() {
; CHECK-LABEL: foo:
- %val1 = load volatile i32* @var32
+ %val1 = load volatile i32, i32* @var32
%tst1 = icmp eq i32 %val1, 0
br i1 %tst1, label %end, label %test2
; CHECK: cbz {{w[0-9]+}}, .LBB
test2:
- %val2 = load volatile i32* @var32
+ %val2 = load volatile i32, i32* @var32
%tst2 = icmp ne i32 %val2, 0
br i1 %tst2, label %end, label %test3
; CHECK: cbnz {{w[0-9]+}}, .LBB
test3:
- %val3 = load volatile i64* @var64
+ %val3 = load volatile i64, i64* @var64
%tst3 = icmp eq i64 %val3, 0
br i1 %tst3, label %end, label %test4
; CHECK: cbz {{x[0-9]+}}, .LBB
test4:
- %val4 = load volatile i64* @var64
+ %val4 = load volatile i64, i64* @var64
%tst4 = icmp ne i64 %val4, 0
br i1 %tst4, label %end, label %test5
; CHECK: cbnz {{x[0-9]+}}, .LBB
diff --git a/test/CodeGen/AArch64/complex-copy-noneon.ll b/test/CodeGen/AArch64/complex-copy-noneon.ll
index 4ae547856ecd..b7c704336023 100644
--- a/test/CodeGen/AArch64/complex-copy-noneon.ll
+++ b/test/CodeGen/AArch64/complex-copy-noneon.ll
@@ -8,13 +8,13 @@ define void @store_combine() nounwind {
%src = alloca { double, double }, align 8
%dst = alloca { double, double }, align 8
- %src.realp = getelementptr inbounds { double, double }* %src, i32 0, i32 0
- %src.real = load double* %src.realp
- %src.imagp = getelementptr inbounds { double, double }* %src, i32 0, i32 1
- %src.imag = load double* %src.imagp
+ %src.realp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 0
+ %src.real = load double, double* %src.realp
+ %src.imagp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 1
+ %src.imag = load double, double* %src.imagp
- %dst.realp = getelementptr inbounds { double, double }* %dst, i32 0, i32 0
- %dst.imagp = getelementptr inbounds { double, double }* %dst, i32 0, i32 1
+ %dst.realp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 0
+ %dst.imagp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 1
store double %src.real, double* %dst.realp
store double %src.imag, double* %dst.imagp
ret void
diff --git a/test/CodeGen/AArch64/complex-int-to-fp.ll b/test/CodeGen/AArch64/complex-int-to-fp.ll
index 5c943f95c355..1102553ab551 100644
--- a/test/CodeGen/AArch64/complex-int-to-fp.ll
+++ b/test/CodeGen/AArch64/complex-int-to-fp.ll
@@ -4,7 +4,7 @@
; CHECK: scvtf
; CHECK: ret
define void @autogen_SD19655(<2 x i64>* %addr, <2 x float>* %addrfloat) {
- %T = load <2 x i64>* %addr
+ %T = load <2 x i64>, <2 x i64>* %addr
%F = sitofp <2 x i64> %T to <2 x float>
store <2 x float> %F, <2 x float>* %addrfloat
ret void
diff --git a/test/CodeGen/AArch64/concat_vector-scalar-combine.ll b/test/CodeGen/AArch64/concat_vector-scalar-combine.ll
new file mode 100644
index 000000000000..1c64af636cb3
--- /dev/null
+++ b/test/CodeGen/AArch64/concat_vector-scalar-combine.ll
@@ -0,0 +1,125 @@
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; Test the (concat_vectors (bitcast (scalar)), ..) pattern.
+
+define <8 x i8> @test_concat_scalar_v2i8_to_v8i8_dup(i32 %x) #0 {
+entry:
+; CHECK-LABEL: test_concat_scalar_v2i8_to_v8i8_dup:
+; CHECK-NEXT: dup.4h v0, w0
+; CHECK-NEXT: ret
+ %t = trunc i32 %x to i16
+ %0 = bitcast i16 %t to <2 x i8>
+ %1 = shufflevector <2 x i8> %0, <2 x i8> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ ret <8 x i8> %1
+}
+
+define <8 x i8> @test_concat_scalar_v4i8_to_v8i8_dup(i32 %x) #0 {
+entry:
+; CHECK-LABEL: test_concat_scalar_v4i8_to_v8i8_dup:
+; CHECK-NEXT: dup.2s v0, w0
+; CHECK-NEXT: ret
+ %0 = bitcast i32 %x to <4 x i8>
+ %1 = shufflevector <4 x i8> %0, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ ret <8 x i8> %1
+}
+
+define <8 x i16> @test_concat_scalar_v2i16_to_v8i16_dup(i32 %x) #0 {
+entry:
+; CHECK-LABEL: test_concat_scalar_v2i16_to_v8i16_dup:
+; CHECK-NEXT: dup.4s v0, w0
+; CHECK-NEXT: ret
+ %0 = bitcast i32 %x to <2 x i16>
+ %1 = shufflevector <2 x i16> %0, <2 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 0, i32 1, i32 0, i32 1>
+ ret <8 x i16> %1
+}
+
+define <8 x i8> @test_concat_scalars_2x_v2i8_to_v8i8(i32 %x, i32 %y) #0 {
+entry:
+; CHECK-LABEL: test_concat_scalars_2x_v2i8_to_v8i8:
+; CHECK-NEXT: ins.h v0[0], w0
+; CHECK-NEXT: ins.h v0[1], w1
+; CHECK-NEXT: ins.h v0[3], w1
+; CHECK-NEXT: ret
+ %tx = trunc i32 %x to i16
+ %ty = trunc i32 %y to i16
+ %bx = bitcast i16 %tx to <2 x i8>
+ %by = bitcast i16 %ty to <2 x i8>
+ %r = shufflevector <2 x i8> %bx, <2 x i8> %by, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 2, i32 3>
+ ret <8 x i8> %r
+}
+
+define <8 x i8> @test_concat_scalars_2x_v4i8_to_v8i8_dup(i32 %x, i32 %y) #0 {
+entry:
+; CHECK-LABEL: test_concat_scalars_2x_v4i8_to_v8i8_dup:
+; CHECK-NEXT: fmov s0, w1
+; CHECK-NEXT: ins.s v0[1], w0
+; CHECK-NEXT: ret
+ %bx = bitcast i32 %x to <4 x i8>
+ %by = bitcast i32 %y to <4 x i8>
+ %r = shufflevector <4 x i8> %bx, <4 x i8> %by, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+ ret <8 x i8> %r
+}
+
+define <8 x i16> @test_concat_scalars_2x_v2i16_to_v8i16_dup(i32 %x, i32 %y) #0 {
+entry:
+; CHECK-LABEL: test_concat_scalars_2x_v2i16_to_v8i16_dup:
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ins.s v0[1], w1
+; CHECK-NEXT: ins.s v0[2], w1
+; CHECK-NEXT: ins.s v0[3], w0
+; CHECK-NEXT: ret
+ %bx = bitcast i32 %x to <2 x i16>
+ %by = bitcast i32 %y to <2 x i16>
+ %r = shufflevector <2 x i16> %bx, <2 x i16> %by, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1>
+ ret <8 x i16> %r
+}
+
+; Also make sure we minimize bitcasts.
+
+; This is a pretty artificial testcase: make sure we bitcast to floating-point
+; if any of the scalars is floating-point.
+define <8 x i8> @test_concat_scalars_mixed_2x_v2i8_to_v8i8(float %dummy, i32 %x, half %y) #0 {
+entry:
+; CHECK-LABEL: test_concat_scalars_mixed_2x_v2i8_to_v8i8:
+; CHECK-NEXT: fmov s[[X:[0-9]+]], w0
+; CHECK-NEXT: ins.h v0[0], v[[X]][0]
+; CHECK-NEXT: ins.h v0[1], v1[0]
+; CHECK-NEXT: ins.h v0[2], v[[X]][0]
+; CHECK-NEXT: ins.h v0[3], v1[0]
+; CHECK-NEXT: ret
+ %t = trunc i32 %x to i16
+ %0 = bitcast i16 %t to <2 x i8>
+ %y0 = bitcast half %y to <2 x i8>
+ %1 = shufflevector <2 x i8> %0, <2 x i8> %y0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ ret <8 x i8> %1
+}
+
+define <2 x float> @test_concat_scalars_fp_2x_v2i8_to_v8i8(float %dummy, half %x, half %y) #0 {
+entry:
+; CHECK-LABEL: test_concat_scalars_fp_2x_v2i8_to_v8i8:
+; CHECK-NEXT: ins.h v0[0], v1[0]
+; CHECK-NEXT: ins.h v0[1], v2[0]
+; CHECK-NEXT: ins.h v0[2], v1[0]
+; CHECK-NEXT: ins.h v0[3], v2[0]
+; CHECK-NEXT: ret
+ %0 = bitcast half %x to <2 x i8>
+ %y0 = bitcast half %y to <2 x i8>
+ %1 = shufflevector <2 x i8> %0, <2 x i8> %y0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = bitcast <8 x i8> %1 to <2 x float>
+ ret <2 x float> %2
+}
+
+define <4 x float> @test_concat_scalar_fp_v2i16_to_v16i8_dup(float %x) #0 {
+entry:
+; CHECK-LABEL: test_concat_scalar_fp_v2i16_to_v16i8_dup:
+; CHECK-NEXT: dup.4s v0, v0[0]
+; CHECK-NEXT: ret
+ %0 = bitcast float %x to <2 x i16>
+ %1 = shufflevector <2 x i16> %0, <2 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 0, i32 1, i32 0, i32 1>
+ %2 = bitcast <8 x i16> %1 to <4 x float>
+ ret <4 x float> %2
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/concat_vector-truncate-combine.ll b/test/CodeGen/AArch64/concat_vector-truncate-combine.ll
new file mode 100644
index 000000000000..ee5278600422
--- /dev/null
+++ b/test/CodeGen/AArch64/concat_vector-truncate-combine.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple arm64-apple-darwin -asm-verbose=false | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; Test the (concat_vectors (trunc), (trunc)) pattern.
+
+define <4 x i16> @test_concat_truncate_v2i64_to_v4i16(<2 x i64> %a, <2 x i64> %b) #0 {
+entry:
+; CHECK-LABEL: test_concat_truncate_v2i64_to_v4i16:
+; CHECK-NEXT: uzp1.4s v0, v0, v1
+; CHECK-NEXT: xtn.4h v0, v0
+; CHECK-NEXT: ret
+ %at = trunc <2 x i64> %a to <2 x i16>
+ %bt = trunc <2 x i64> %b to <2 x i16>
+ %shuffle = shufflevector <2 x i16> %at, <2 x i16> %bt, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i16> %shuffle
+}
+
+define <8 x i8> @test_concat_truncate_v4i32_to_v8i8(<4 x i32> %a, <4 x i32> %b) #0 {
+entry:
+; CHECK-LABEL: test_concat_truncate_v4i32_to_v8i8:
+; CHECK-NEXT: uzp1.8h v0, v0, v1
+; CHECK-NEXT: xtn.8b v0, v0
+; CHECK-NEXT: ret
+ %at = trunc <4 x i32> %a to <4 x i8>
+ %bt = trunc <4 x i32> %b to <4 x i8>
+ %shuffle = shufflevector <4 x i8> %at, <4 x i8> %bt, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i8> %shuffle
+}
+
+define <8 x i16> @test_concat_truncate_v4i32_to_v8i16(<4 x i32> %a, <4 x i32> %b) #0 {
+entry:
+; CHECK-LABEL: test_concat_truncate_v4i32_to_v8i16:
+; CHECK-NEXT: xtn.4h v0, v0
+; CHECK-NEXT: xtn2.8h v0, v1
+; CHECK-NEXT: ret
+ %at = trunc <4 x i32> %a to <4 x i16>
+ %bt = trunc <4 x i32> %b to <4 x i16>
+ %shuffle = shufflevector <4 x i16> %at, <4 x i16> %bt, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %shuffle
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/concat_vector-truncated-scalar-combine.ll b/test/CodeGen/AArch64/concat_vector-truncated-scalar-combine.ll
new file mode 100644
index 000000000000..eb6c80df855a
--- /dev/null
+++ b/test/CodeGen/AArch64/concat_vector-truncated-scalar-combine.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -asm-verbose=false | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; Test the (concat_vectors (bitcast (trunc (scalar))), undef..) pattern.
+
+define <8 x i8> @test_concat_from_truncated_scalar(i32 %x) #0 {
+entry:
+; CHECK-LABEL: test_concat_from_truncated_scalar:
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ret
+ %t = trunc i32 %x to i16
+ %0 = bitcast i16 %t to <2 x i8>
+ %1 = shufflevector <2 x i8> %0, <2 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+ ret <8 x i8> %1
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/cpus.ll b/test/CodeGen/AArch64/cpus.ll
index f0f36bd5cea5..1266842fcc6d 100644
--- a/test/CodeGen/AArch64/cpus.ll
+++ b/test/CodeGen/AArch64/cpus.ll
@@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a53 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a57 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a72 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID
; CHECK-NOT: {{.*}} is not a recognized processor for this target
diff --git a/test/CodeGen/AArch64/dag-combine-invaraints.ll b/test/CodeGen/AArch64/dag-combine-invaraints.ll
index 115fc64174c8..ac2d057ff3c9 100644
--- a/test/CodeGen/AArch64/dag-combine-invaraints.ll
+++ b/test/CodeGen/AArch64/dag-combine-invaraints.ll
@@ -12,15 +12,15 @@ main_:
store i32 0, i32* %tmp
store i32 15, i32* %i32T, align 4
store i32 5, i32* %i32F, align 4
- %tmp6 = load i32* %tmp, align 4
+ %tmp6 = load i32, i32* %tmp, align 4
%tmp7 = icmp ne i32 %tmp6, 0
%tmp8 = xor i1 %tmp7, true
- %tmp9 = load i32* %i32T, align 4
- %tmp10 = load i32* %i32F, align 4
+ %tmp9 = load i32, i32* %i32T, align 4
+ %tmp10 = load i32, i32* %i32F, align 4
%DHSelect = select i1 %tmp8, i32 %tmp9, i32 %tmp10
store i32 %DHSelect, i32* %i32X, align 4
- %tmp15 = load i32* %i32X, align 4
- %tmp17 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str2, i32 0, i32 0), i32 %tmp15)
+ %tmp15 = load i32, i32* %i32X, align 4
+ %tmp17 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str2, i32 0, i32 0), i32 %tmp15)
ret i32 0
; CHECK: main:
diff --git a/test/CodeGen/AArch64/dp-3source.ll b/test/CodeGen/AArch64/dp-3source.ll
index bd96ec728f52..3982fea95d6c 100644
--- a/test/CodeGen/AArch64/dp-3source.ll
+++ b/test/CodeGen/AArch64/dp-3source.ll
@@ -168,8 +168,8 @@ define i64 @test_umnegl(i32 %lhs, i32 %rhs) {
define void @test_mneg(){
; CHECK-LABEL: test_mneg:
- %1 = load i32* @a, align 4
- %2 = load i32* @b, align 4
+ %1 = load i32, i32* @a, align 4
+ %2 = load i32, i32* @b, align 4
%3 = sub i32 0, %1
%4 = mul i32 %2, %3
store i32 %4, i32* @c, align 4
diff --git a/test/CodeGen/AArch64/dp1.ll b/test/CodeGen/AArch64/dp1.ll
index 662b41588541..4247afa11b76 100644
--- a/test/CodeGen/AArch64/dp1.ll
+++ b/test/CodeGen/AArch64/dp1.ll
@@ -5,7 +5,7 @@
define void @rev_i32() {
; CHECK-LABEL: rev_i32:
- %val0_tmp = load i32* @var32
+ %val0_tmp = load i32, i32* @var32
%val1_tmp = call i32 @llvm.bswap.i32(i32 %val0_tmp)
; CHECK: rev {{w[0-9]+}}, {{w[0-9]+}}
store volatile i32 %val1_tmp, i32* @var32
@@ -14,7 +14,7 @@ define void @rev_i32() {
define void @rev_i64() {
; CHECK-LABEL: rev_i64:
- %val0_tmp = load i64* @var64
+ %val0_tmp = load i64, i64* @var64
%val1_tmp = call i64 @llvm.bswap.i64(i64 %val0_tmp)
; CHECK: rev {{x[0-9]+}}, {{x[0-9]+}}
store volatile i64 %val1_tmp, i64* @var64
@@ -23,7 +23,7 @@ define void @rev_i64() {
define void @rev32_i64() {
; CHECK-LABEL: rev32_i64:
- %val0_tmp = load i64* @var64
+ %val0_tmp = load i64, i64* @var64
%val1_tmp = shl i64 %val0_tmp, 32
%val5_tmp = sub i64 64, 32
%val2_tmp = lshr i64 %val0_tmp, %val5_tmp
@@ -36,7 +36,7 @@ define void @rev32_i64() {
define void @rev16_i32() {
; CHECK-LABEL: rev16_i32:
- %val0_tmp = load i32* @var32
+ %val0_tmp = load i32, i32* @var32
%val1_tmp = shl i32 %val0_tmp, 16
%val2_tmp = lshr i32 %val0_tmp, 16
%val3_tmp = or i32 %val1_tmp, %val2_tmp
@@ -48,7 +48,7 @@ define void @rev16_i32() {
define void @clz_zerodef_i32() {
; CHECK-LABEL: clz_zerodef_i32:
- %val0_tmp = load i32* @var32
+ %val0_tmp = load i32, i32* @var32
%val4_tmp = call i32 @llvm.ctlz.i32(i32 %val0_tmp, i1 0)
; CHECK: clz {{w[0-9]+}}, {{w[0-9]+}}
store volatile i32 %val4_tmp, i32* @var32
@@ -57,7 +57,7 @@ define void @clz_zerodef_i32() {
define void @clz_zerodef_i64() {
; CHECK-LABEL: clz_zerodef_i64:
- %val0_tmp = load i64* @var64
+ %val0_tmp = load i64, i64* @var64
%val4_tmp = call i64 @llvm.ctlz.i64(i64 %val0_tmp, i1 0)
; CHECK: clz {{x[0-9]+}}, {{x[0-9]+}}
store volatile i64 %val4_tmp, i64* @var64
@@ -66,7 +66,7 @@ define void @clz_zerodef_i64() {
define void @clz_zeroundef_i32() {
; CHECK-LABEL: clz_zeroundef_i32:
- %val0_tmp = load i32* @var32
+ %val0_tmp = load i32, i32* @var32
%val4_tmp = call i32 @llvm.ctlz.i32(i32 %val0_tmp, i1 1)
; CHECK: clz {{w[0-9]+}}, {{w[0-9]+}}
store volatile i32 %val4_tmp, i32* @var32
@@ -75,7 +75,7 @@ define void @clz_zeroundef_i32() {
define void @clz_zeroundef_i64() {
; CHECK-LABEL: clz_zeroundef_i64:
- %val0_tmp = load i64* @var64
+ %val0_tmp = load i64, i64* @var64
%val4_tmp = call i64 @llvm.ctlz.i64(i64 %val0_tmp, i1 1)
; CHECK: clz {{x[0-9]+}}, {{x[0-9]+}}
store volatile i64 %val4_tmp, i64* @var64
@@ -84,7 +84,7 @@ define void @clz_zeroundef_i64() {
define void @cttz_zerodef_i32() {
; CHECK-LABEL: cttz_zerodef_i32:
- %val0_tmp = load i32* @var32
+ %val0_tmp = load i32, i32* @var32
%val4_tmp = call i32 @llvm.cttz.i32(i32 %val0_tmp, i1 0)
; CHECK: rbit [[REVERSED:w[0-9]+]], {{w[0-9]+}}
; CHECK: clz {{w[0-9]+}}, [[REVERSED]]
@@ -94,7 +94,7 @@ define void @cttz_zerodef_i32() {
define void @cttz_zerodef_i64() {
; CHECK-LABEL: cttz_zerodef_i64:
- %val0_tmp = load i64* @var64
+ %val0_tmp = load i64, i64* @var64
%val4_tmp = call i64 @llvm.cttz.i64(i64 %val0_tmp, i1 0)
; CHECK: rbit [[REVERSED:x[0-9]+]], {{x[0-9]+}}
; CHECK: clz {{x[0-9]+}}, [[REVERSED]]
@@ -104,7 +104,7 @@ define void @cttz_zerodef_i64() {
define void @cttz_zeroundef_i32() {
; CHECK-LABEL: cttz_zeroundef_i32:
- %val0_tmp = load i32* @var32
+ %val0_tmp = load i32, i32* @var32
%val4_tmp = call i32 @llvm.cttz.i32(i32 %val0_tmp, i1 1)
; CHECK: rbit [[REVERSED:w[0-9]+]], {{w[0-9]+}}
; CHECK: clz {{w[0-9]+}}, [[REVERSED]]
@@ -114,7 +114,7 @@ define void @cttz_zeroundef_i32() {
define void @cttz_zeroundef_i64() {
; CHECK-LABEL: cttz_zeroundef_i64:
- %val0_tmp = load i64* @var64
+ %val0_tmp = load i64, i64* @var64
%val4_tmp = call i64 @llvm.cttz.i64(i64 %val0_tmp, i1 1)
; CHECK: rbit [[REVERSED:x[0-9]+]], {{x[0-9]+}}
; CHECK: clz {{x[0-9]+}}, [[REVERSED]]
@@ -126,7 +126,7 @@ define void @cttz_zeroundef_i64() {
; ISelLowering.
define void @ctpop_i32() {
; CHECK-LABEL: ctpop_i32:
- %val0_tmp = load i32* @var32
+ %val0_tmp = load i32, i32* @var32
%val4_tmp = call i32 @llvm.ctpop.i32(i32 %val0_tmp)
store volatile i32 %val4_tmp, i32* @var32
ret void
@@ -134,7 +134,7 @@ define void @ctpop_i32() {
define void @ctpop_i64() {
; CHECK-LABEL: ctpop_i64:
- %val0_tmp = load i64* @var64
+ %val0_tmp = load i64, i64* @var64
%val4_tmp = call i64 @llvm.ctpop.i64(i64 %val0_tmp)
store volatile i64 %val4_tmp, i64* @var64
ret void
diff --git a/test/CodeGen/AArch64/dp2.ll b/test/CodeGen/AArch64/dp2.ll
index 71b31696372a..0cd2d7ed84a7 100644
--- a/test/CodeGen/AArch64/dp2.ll
+++ b/test/CodeGen/AArch64/dp2.ll
@@ -7,8 +7,8 @@
define void @rorv_i64() {
; CHECK-LABEL: rorv_i64:
- %val0_tmp = load i64* @var64_0
- %val1_tmp = load i64* @var64_1
+ %val0_tmp = load i64, i64* @var64_0
+ %val1_tmp = load i64, i64* @var64_1
%val2_tmp = sub i64 64, %val1_tmp
%val3_tmp = shl i64 %val0_tmp, %val2_tmp
%val4_tmp = lshr i64 %val0_tmp, %val1_tmp
@@ -20,8 +20,8 @@ define void @rorv_i64() {
define void @asrv_i64() {
; CHECK-LABEL: asrv_i64:
- %val0_tmp = load i64* @var64_0
- %val1_tmp = load i64* @var64_1
+ %val0_tmp = load i64, i64* @var64_0
+ %val1_tmp = load i64, i64* @var64_1
%val4_tmp = ashr i64 %val0_tmp, %val1_tmp
; CHECK: {{asr|asrv}} {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
store volatile i64 %val4_tmp, i64* @var64_1
@@ -30,8 +30,8 @@ define void @asrv_i64() {
define void @lsrv_i64() {
; CHECK-LABEL: lsrv_i64:
- %val0_tmp = load i64* @var64_0
- %val1_tmp = load i64* @var64_1
+ %val0_tmp = load i64, i64* @var64_0
+ %val1_tmp = load i64, i64* @var64_1
%val4_tmp = lshr i64 %val0_tmp, %val1_tmp
; CHECK: {{lsr|lsrv}} {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
store volatile i64 %val4_tmp, i64* @var64_0
@@ -40,8 +40,8 @@ define void @lsrv_i64() {
define void @lslv_i64() {
; CHECK-LABEL: lslv_i64:
- %val0_tmp = load i64* @var64_0
- %val1_tmp = load i64* @var64_1
+ %val0_tmp = load i64, i64* @var64_0
+ %val1_tmp = load i64, i64* @var64_1
%val4_tmp = shl i64 %val0_tmp, %val1_tmp
; CHECK: {{lsl|lslv}} {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
store volatile i64 %val4_tmp, i64* @var64_1
@@ -50,8 +50,8 @@ define void @lslv_i64() {
define void @udiv_i64() {
; CHECK-LABEL: udiv_i64:
- %val0_tmp = load i64* @var64_0
- %val1_tmp = load i64* @var64_1
+ %val0_tmp = load i64, i64* @var64_0
+ %val1_tmp = load i64, i64* @var64_1
%val4_tmp = udiv i64 %val0_tmp, %val1_tmp
; CHECK: udiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
store volatile i64 %val4_tmp, i64* @var64_0
@@ -60,8 +60,8 @@ define void @udiv_i64() {
define void @sdiv_i64() {
; CHECK-LABEL: sdiv_i64:
- %val0_tmp = load i64* @var64_0
- %val1_tmp = load i64* @var64_1
+ %val0_tmp = load i64, i64* @var64_0
+ %val1_tmp = load i64, i64* @var64_1
%val4_tmp = sdiv i64 %val0_tmp, %val1_tmp
; CHECK: sdiv {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
store volatile i64 %val4_tmp, i64* @var64_1
@@ -71,8 +71,8 @@ define void @sdiv_i64() {
define void @lsrv_i32() {
; CHECK-LABEL: lsrv_i32:
- %val0_tmp = load i32* @var32_0
- %val1_tmp = load i32* @var32_1
+ %val0_tmp = load i32, i32* @var32_0
+ %val1_tmp = load i32, i32* @var32_1
%val2_tmp = add i32 1, %val1_tmp
%val4_tmp = lshr i32 %val0_tmp, %val2_tmp
; CHECK: {{lsr|lsrv}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
@@ -82,8 +82,8 @@ define void @lsrv_i32() {
define void @lslv_i32() {
; CHECK-LABEL: lslv_i32:
- %val0_tmp = load i32* @var32_0
- %val1_tmp = load i32* @var32_1
+ %val0_tmp = load i32, i32* @var32_0
+ %val1_tmp = load i32, i32* @var32_1
%val2_tmp = add i32 1, %val1_tmp
%val4_tmp = shl i32 %val0_tmp, %val2_tmp
; CHECK: {{lsl|lslv}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
@@ -93,8 +93,8 @@ define void @lslv_i32() {
define void @rorv_i32() {
; CHECK-LABEL: rorv_i32:
- %val0_tmp = load i32* @var32_0
- %val6_tmp = load i32* @var32_1
+ %val0_tmp = load i32, i32* @var32_0
+ %val6_tmp = load i32, i32* @var32_1
%val1_tmp = add i32 1, %val6_tmp
%val2_tmp = sub i32 32, %val1_tmp
%val3_tmp = shl i32 %val0_tmp, %val2_tmp
@@ -107,8 +107,8 @@ define void @rorv_i32() {
define void @asrv_i32() {
; CHECK-LABEL: asrv_i32:
- %val0_tmp = load i32* @var32_0
- %val1_tmp = load i32* @var32_1
+ %val0_tmp = load i32, i32* @var32_0
+ %val1_tmp = load i32, i32* @var32_1
%val2_tmp = add i32 1, %val1_tmp
%val4_tmp = ashr i32 %val0_tmp, %val2_tmp
; CHECK: {{asr|asrv}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
@@ -118,8 +118,8 @@ define void @asrv_i32() {
define void @sdiv_i32() {
; CHECK-LABEL: sdiv_i32:
- %val0_tmp = load i32* @var32_0
- %val1_tmp = load i32* @var32_1
+ %val0_tmp = load i32, i32* @var32_0
+ %val1_tmp = load i32, i32* @var32_1
%val4_tmp = sdiv i32 %val0_tmp, %val1_tmp
; CHECK: sdiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
store volatile i32 %val4_tmp, i32* @var32_1
@@ -128,8 +128,8 @@ define void @sdiv_i32() {
define void @udiv_i32() {
; CHECK-LABEL: udiv_i32:
- %val0_tmp = load i32* @var32_0
- %val1_tmp = load i32* @var32_1
+ %val0_tmp = load i32, i32* @var32_0
+ %val1_tmp = load i32, i32* @var32_1
%val4_tmp = udiv i32 %val0_tmp, %val1_tmp
; CHECK: udiv {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
store volatile i32 %val4_tmp, i32* @var32_0
@@ -141,7 +141,7 @@ define void @udiv_i32() {
define i32 @test_lsl32() {
; CHECK-LABEL: test_lsl32:
- %val = load i32* @var32_0
+ %val = load i32, i32* @var32_0
%ret = shl i32 1, %val
; CHECK: {{lsl|lslv}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
@@ -151,7 +151,7 @@ define i32 @test_lsl32() {
define i32 @test_lsr32() {
; CHECK-LABEL: test_lsr32:
- %val = load i32* @var32_0
+ %val = load i32, i32* @var32_0
%ret = lshr i32 1, %val
; CHECK: {{lsr|lsrv}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
@@ -161,7 +161,7 @@ define i32 @test_lsr32() {
define i32 @test_asr32(i32 %in) {
; CHECK-LABEL: test_asr32:
- %val = load i32* @var32_0
+ %val = load i32, i32* @var32_0
%ret = ashr i32 %in, %val
; CHECK: {{asr|asrv}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
diff --git a/test/CodeGen/AArch64/eliminate-trunc.ll b/test/CodeGen/AArch64/eliminate-trunc.ll
index ea86a084cb42..bc4ac7d71704 100644
--- a/test/CodeGen/AArch64/eliminate-trunc.ll
+++ b/test/CodeGen/AArch64/eliminate-trunc.ll
@@ -14,11 +14,11 @@ entry:
for.body4.us:
%indvars.iv = phi i64 [ 0, %for.body4.lr.ph.us ], [ %indvars.iv.next, %for.body4.us ]
- %arrayidx6.us = getelementptr inbounds [8 x i8]* %a, i64 %indvars.iv26, i64 %indvars.iv
- %0 = load i8* %arrayidx6.us, align 1
+ %arrayidx6.us = getelementptr inbounds [8 x i8], [8 x i8]* %a, i64 %indvars.iv26, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx6.us, align 1
%idxprom7.us = zext i8 %0 to i64
- %arrayidx8.us = getelementptr inbounds i8* %box, i64 %idxprom7.us
- %1 = load i8* %arrayidx8.us, align 1
+ %arrayidx8.us = getelementptr inbounds i8, i8* %box, i64 %idxprom7.us
+ %1 = load i8, i8* %arrayidx8.us, align 1
store i8 %1, i8* %arrayidx6.us, align 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%2 = trunc i64 %indvars.iv.next to i32
diff --git a/test/CodeGen/AArch64/extern-weak.ll b/test/CodeGen/AArch64/extern-weak.ll
index f647c4bcda51..2203c0c4e698 100644
--- a/test/CodeGen/AArch64/extern-weak.ll
+++ b/test/CodeGen/AArch64/extern-weak.ll
@@ -31,7 +31,7 @@ define i32()* @foo() {
@arr_var = extern_weak global [10 x i32]
define i32* @bar() {
- %addr = getelementptr [10 x i32]* @arr_var, i32 0, i32 5
+ %addr = getelementptr [10 x i32], [10 x i32]* @arr_var, i32 0, i32 5
; CHECK: adrp x[[ADDRHI:[0-9]+]], :got:arr_var
diff --git a/test/CodeGen/AArch64/f16-convert.ll b/test/CodeGen/AArch64/f16-convert.ll
index 12412d45aa6e..8caa1f574a7b 100644
--- a/test/CodeGen/AArch64/f16-convert.ll
+++ b/test/CodeGen/AArch64/f16-convert.ll
@@ -6,7 +6,7 @@ define float @load0(i16* nocapture readonly %a) nounwind {
; CHECK-NEXT: fcvt s0, [[HREG]]
; CHECK-NEXT: ret
- %tmp = load i16* %a, align 2
+ %tmp = load i16, i16* %a, align 2
%tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
ret float %tmp1
}
@@ -17,7 +17,7 @@ define double @load1(i16* nocapture readonly %a) nounwind {
; CHECK-NEXT: fcvt d0, [[HREG]]
; CHECK-NEXT: ret
- %tmp = load i16* %a, align 2
+ %tmp = load i16, i16* %a, align 2
%conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
ret double %conv
}
@@ -29,8 +29,8 @@ define float @load2(i16* nocapture readonly %a, i32 %i) nounwind {
; CHECK-NEXT: ret
%idxprom = sext i32 %i to i64
- %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
- %tmp = load i16* %arrayidx, align 2
+ %arrayidx = getelementptr inbounds i16, i16* %a, i64 %idxprom
+ %tmp = load i16, i16* %arrayidx, align 2
%tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
ret float %tmp1
}
@@ -42,8 +42,8 @@ define double @load3(i16* nocapture readonly %a, i32 %i) nounwind {
; CHECK-NEXT: ret
%idxprom = sext i32 %i to i64
- %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
- %tmp = load i16* %arrayidx, align 2
+ %arrayidx = getelementptr inbounds i16, i16* %a, i64 %idxprom
+ %tmp = load i16, i16* %arrayidx, align 2
%conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
ret double %conv
}
@@ -54,8 +54,8 @@ define float @load4(i16* nocapture readonly %a, i64 %i) nounwind {
; CHECK-NEXT: fcvt s0, [[HREG]]
; CHECK-NEXT: ret
- %arrayidx = getelementptr inbounds i16* %a, i64 %i
- %tmp = load i16* %arrayidx, align 2
+ %arrayidx = getelementptr inbounds i16, i16* %a, i64 %i
+ %tmp = load i16, i16* %arrayidx, align 2
%tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
ret float %tmp1
}
@@ -66,8 +66,8 @@ define double @load5(i16* nocapture readonly %a, i64 %i) nounwind {
; CHECK-NEXT: fcvt d0, [[HREG]]
; CHECK-NEXT: ret
- %arrayidx = getelementptr inbounds i16* %a, i64 %i
- %tmp = load i16* %arrayidx, align 2
+ %arrayidx = getelementptr inbounds i16, i16* %a, i64 %i
+ %tmp = load i16, i16* %arrayidx, align 2
%conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
ret double %conv
}
@@ -78,8 +78,8 @@ define float @load6(i16* nocapture readonly %a) nounwind {
; CHECK-NEXT: fcvt s0, [[HREG]]
; CHECK-NEXT: ret
- %arrayidx = getelementptr inbounds i16* %a, i64 10
- %tmp = load i16* %arrayidx, align 2
+ %arrayidx = getelementptr inbounds i16, i16* %a, i64 10
+ %tmp = load i16, i16* %arrayidx, align 2
%tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
ret float %tmp1
}
@@ -90,8 +90,8 @@ define double @load7(i16* nocapture readonly %a) nounwind {
; CHECK-NEXT: fcvt d0, [[HREG]]
; CHECK-NEXT: ret
- %arrayidx = getelementptr inbounds i16* %a, i64 10
- %tmp = load i16* %arrayidx, align 2
+ %arrayidx = getelementptr inbounds i16, i16* %a, i64 10
+ %tmp = load i16, i16* %arrayidx, align 2
%conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
ret double %conv
}
@@ -102,8 +102,8 @@ define float @load8(i16* nocapture readonly %a) nounwind {
; CHECK-NEXT: fcvt s0, [[HREG]]
; CHECK-NEXT: ret
- %arrayidx = getelementptr inbounds i16* %a, i64 -10
- %tmp = load i16* %arrayidx, align 2
+ %arrayidx = getelementptr inbounds i16, i16* %a, i64 -10
+ %tmp = load i16, i16* %arrayidx, align 2
%tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
ret float %tmp1
}
@@ -114,8 +114,8 @@ define double @load9(i16* nocapture readonly %a) nounwind {
; CHECK-NEXT: fcvt d0, [[HREG]]
; CHECK-NEXT: ret
- %arrayidx = getelementptr inbounds i16* %a, i64 -10
- %tmp = load i16* %arrayidx, align 2
+ %arrayidx = getelementptr inbounds i16, i16* %a, i64 -10
+ %tmp = load i16, i16* %arrayidx, align 2
%conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
ret double %conv
}
@@ -133,7 +133,8 @@ define void @store0(i16* nocapture %a, float %val) nounwind {
define void @store1(i16* nocapture %a, double %val) nounwind {
; CHECK-LABEL: store1:
-; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: fcvt s0, d0
+; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: str h0, [x0]
; CHECK-NEXT: ret
@@ -151,21 +152,22 @@ define void @store2(i16* nocapture %a, i32 %i, float %val) nounwind {
%tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val)
%idxprom = sext i32 %i to i64
- %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
+ %arrayidx = getelementptr inbounds i16, i16* %a, i64 %idxprom
store i16 %tmp, i16* %arrayidx, align 2
ret void
}
define void @store3(i16* nocapture %a, i32 %i, double %val) nounwind {
; CHECK-LABEL: store3:
-; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: fcvt s0, d0
+; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: str h0, [x0, w1, sxtw #1]
; CHECK-NEXT: ret
%conv = fptrunc double %val to float
%tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv)
%idxprom = sext i32 %i to i64
- %arrayidx = getelementptr inbounds i16* %a, i64 %idxprom
+ %arrayidx = getelementptr inbounds i16, i16* %a, i64 %idxprom
store i16 %tmp, i16* %arrayidx, align 2
ret void
}
@@ -177,20 +179,21 @@ define void @store4(i16* nocapture %a, i64 %i, float %val) nounwind {
; CHECK-NEXT: ret
%tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val)
- %arrayidx = getelementptr inbounds i16* %a, i64 %i
+ %arrayidx = getelementptr inbounds i16, i16* %a, i64 %i
store i16 %tmp, i16* %arrayidx, align 2
ret void
}
define void @store5(i16* nocapture %a, i64 %i, double %val) nounwind {
; CHECK-LABEL: store5:
-; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: fcvt s0, d0
+; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: str h0, [x0, x1, lsl #1]
; CHECK-NEXT: ret
%conv = fptrunc double %val to float
%tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv)
- %arrayidx = getelementptr inbounds i16* %a, i64 %i
+ %arrayidx = getelementptr inbounds i16, i16* %a, i64 %i
store i16 %tmp, i16* %arrayidx, align 2
ret void
}
@@ -202,20 +205,21 @@ define void @store6(i16* nocapture %a, float %val) nounwind {
; CHECK-NEXT: ret
%tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val)
- %arrayidx = getelementptr inbounds i16* %a, i64 10
+ %arrayidx = getelementptr inbounds i16, i16* %a, i64 10
store i16 %tmp, i16* %arrayidx, align 2
ret void
}
define void @store7(i16* nocapture %a, double %val) nounwind {
; CHECK-LABEL: store7:
-; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: fcvt s0, d0
+; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: str h0, [x0, #20]
; CHECK-NEXT: ret
%conv = fptrunc double %val to float
%tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv)
- %arrayidx = getelementptr inbounds i16* %a, i64 10
+ %arrayidx = getelementptr inbounds i16, i16* %a, i64 10
store i16 %tmp, i16* %arrayidx, align 2
ret void
}
@@ -227,20 +231,21 @@ define void @store8(i16* nocapture %a, float %val) nounwind {
; CHECK-NEXT: ret
%tmp = tail call i16 @llvm.convert.to.fp16.f32(float %val)
- %arrayidx = getelementptr inbounds i16* %a, i64 -10
+ %arrayidx = getelementptr inbounds i16, i16* %a, i64 -10
store i16 %tmp, i16* %arrayidx, align 2
ret void
}
define void @store9(i16* nocapture %a, double %val) nounwind {
; CHECK-LABEL: store9:
-; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: fcvt s0, d0
+; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: stur h0, [x0, #-20]
; CHECK-NEXT: ret
%conv = fptrunc double %val to float
%tmp = tail call i16 @llvm.convert.to.fp16.f32(float %conv)
- %arrayidx = getelementptr inbounds i16* %a, i64 -10
+ %arrayidx = getelementptr inbounds i16, i16* %a, i64 -10
store i16 %tmp, i16* %arrayidx, align 2
ret void
}
diff --git a/test/CodeGen/AArch64/f16-instructions.ll b/test/CodeGen/AArch64/f16-instructions.ll
new file mode 100644
index 000000000000..be5e2e51385d
--- /dev/null
+++ b/test/CodeGen/AArch64/f16-instructions.ll
@@ -0,0 +1,765 @@
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: test_fadd:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_fadd(half %a, half %b) #0 {
+ %r = fadd half %a, %b
+ ret half %r
+}
+
+; CHECK-LABEL: test_fsub:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fsub s0, s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_fsub(half %a, half %b) #0 {
+ %r = fsub half %a, %b
+ ret half %r
+}
+
+; CHECK-LABEL: test_fmul:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fmul s0, s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_fmul(half %a, half %b) #0 {
+ %r = fmul half %a, %b
+ ret half %r
+}
+
+; CHECK-LABEL: test_fdiv:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fdiv s0, s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_fdiv(half %a, half %b) #0 {
+ %r = fdiv half %a, %b
+ ret half %r
+}
+
+; CHECK-LABEL: test_frem:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: bl {{_?}}fmodf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_frem(half %a, half %b) #0 {
+ %r = frem half %a, %b
+ ret half %r
+}
+
+; CHECK-LABEL: test_store:
+; CHECK-NEXT: str h0, [x0]
+; CHECK-NEXT: ret
+define void @test_store(half %a, half* %b) #0 {
+ store half %a, half* %b
+ ret void
+}
+
+; CHECK-LABEL: test_load:
+; CHECK-NEXT: ldr h0, [x0]
+; CHECK-NEXT: ret
+define half @test_load(half* %a) #0 {
+ %r = load half, half* %a
+ ret half %r
+}
+
+
+declare half @test_callee(half %a, half %b) #0
+
+; CHECK-LABEL: test_call:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: bl {{_?}}test_callee
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_call(half %a, half %b) #0 {
+ %r = call half @test_callee(half %a, half %b)
+ ret half %r
+}
+
+; CHECK-LABEL: test_call_flipped:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: mov.16b v2, v0
+; CHECK-NEXT: mov.16b v0, v1
+; CHECK-NEXT: mov.16b v1, v2
+; CHECK-NEXT: bl {{_?}}test_callee
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_call_flipped(half %a, half %b) #0 {
+ %r = call half @test_callee(half %b, half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_tailcall_flipped:
+; CHECK-NEXT: mov.16b v2, v0
+; CHECK-NEXT: mov.16b v0, v1
+; CHECK-NEXT: mov.16b v1, v2
+; CHECK-NEXT: b {{_?}}test_callee
+define half @test_tailcall_flipped(half %a, half %b) #0 {
+ %r = tail call half @test_callee(half %b, half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_select:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: cmp w0, #0
+; CHECK-NEXT: fcsel s0, s0, s1, ne
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_select(half %a, half %b, i1 zeroext %c) #0 {
+ %r = select i1 %c, half %a, half %b
+ ret half %r
+}
+
+; CHECK-LABEL: test_select_cc:
+; CHECK-DAG: fcvt s3, h3
+; CHECK-DAG: fcvt s2, h2
+; CHECK-DAG: fcvt s1, h1
+; CHECK-DAG: fcvt s0, h0
+; CHECK-DAG: fcmp s2, s3
+; CHECK-DAG: cset [[CC:w[0-9]+]], ne
+; CHECK-DAG: cmp [[CC]], #0
+; CHECK-NEXT: fcsel s0, s0, s1, ne
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_select_cc(half %a, half %b, half %c, half %d) #0 {
+ %cc = fcmp une half %c, %d
+ %r = select i1 %cc, half %a, half %b
+ ret half %r
+}
+
+; CHECK-LABEL: test_fcmp_une:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+define i1 @test_fcmp_une(half %a, half %b) #0 {
+ %r = fcmp une half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_ueq:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: orr [[TRUE:w[0-9]+]], wzr, #0x1
+; CHECK-NEXT: csel [[CC:w[0-9]+]], [[TRUE]], wzr, eq
+; CHECK-NEXT: csel w0, [[TRUE]], [[CC]], vs
+; CHECK-NEXT: ret
+define i1 @test_fcmp_ueq(half %a, half %b) #0 {
+ %r = fcmp ueq half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_ugt:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset w0, hi
+; CHECK-NEXT: ret
+define i1 @test_fcmp_ugt(half %a, half %b) #0 {
+ %r = fcmp ugt half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_uge:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset w0, pl
+; CHECK-NEXT: ret
+define i1 @test_fcmp_uge(half %a, half %b) #0 {
+ %r = fcmp uge half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_ult:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset w0, lt
+; CHECK-NEXT: ret
+define i1 @test_fcmp_ult(half %a, half %b) #0 {
+ %r = fcmp ult half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_ule:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset w0, le
+; CHECK-NEXT: ret
+define i1 @test_fcmp_ule(half %a, half %b) #0 {
+ %r = fcmp ule half %a, %b
+ ret i1 %r
+}
+
+
+; CHECK-LABEL: test_fcmp_uno:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset w0, vs
+; CHECK-NEXT: ret
+define i1 @test_fcmp_uno(half %a, half %b) #0 {
+ %r = fcmp uno half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_one:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: orr [[TRUE:w[0-9]+]], wzr, #0x1
+; CHECK-NEXT: csel [[CC:w[0-9]+]], [[TRUE]], wzr, mi
+; CHECK-NEXT: csel w0, [[TRUE]], [[CC]], gt
+; CHECK-NEXT: ret
+define i1 @test_fcmp_one(half %a, half %b) #0 {
+ %r = fcmp one half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_oeq:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+define i1 @test_fcmp_oeq(half %a, half %b) #0 {
+ %r = fcmp oeq half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_ogt:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset w0, gt
+; CHECK-NEXT: ret
+define i1 @test_fcmp_ogt(half %a, half %b) #0 {
+ %r = fcmp ogt half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_oge:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset w0, ge
+; CHECK-NEXT: ret
+define i1 @test_fcmp_oge(half %a, half %b) #0 {
+ %r = fcmp oge half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_olt:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset w0, mi
+; CHECK-NEXT: ret
+define i1 @test_fcmp_olt(half %a, half %b) #0 {
+ %r = fcmp olt half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_ole:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset w0, ls
+; CHECK-NEXT: ret
+define i1 @test_fcmp_ole(half %a, half %b) #0 {
+ %r = fcmp ole half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_fcmp_ord:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: cset w0, vc
+; CHECK-NEXT: ret
+define i1 @test_fcmp_ord(half %a, half %b) #0 {
+ %r = fcmp ord half %a, %b
+ ret i1 %r
+}
+
+; CHECK-LABEL: test_br_cc:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcmp s0, s1
+; CHECK-NEXT: b.mi [[BRCC_ELSE:.?LBB[0-9_]+]]
+; CHECK-NEXT: str wzr, [x0]
+; CHECK-NEXT: ret
+; CHECK-NEXT: [[BRCC_ELSE]]:
+; CHECK-NEXT: str wzr, [x1]
+; CHECK-NEXT: ret
+define void @test_br_cc(half %a, half %b, i32* %p1, i32* %p2) #0 {
+ %c = fcmp uge half %a, %b
+ br i1 %c, label %then, label %else
+then:
+ store i32 0, i32* %p1
+ ret void
+else:
+ store i32 0, i32* %p2
+ ret void
+}
+
+; CHECK-LABEL: test_phi:
+; CHECK: mov x[[PTR:[0-9]+]], x0
+; CHECK: ldr h[[AB:[0-9]+]], [x[[PTR]]]
+; CHECK: [[LOOP:LBB[0-9_]+]]:
+; CHECK: mov.16b v[[R:[0-9]+]], v[[AB]]
+; CHECK: ldr h[[AB]], [x[[PTR]]]
+; CHECK: mov x0, x[[PTR]]
+; CHECK: bl {{_?}}test_dummy
+; CHECK: mov.16b v0, v[[R]]
+; CHECK: ret
+define half @test_phi(half* %p1) #0 {
+entry:
+ %a = load half, half* %p1
+ br label %loop
+loop:
+ %r = phi half [%a, %entry], [%b, %loop]
+ %b = load half, half* %p1
+ %c = call i1 @test_dummy(half* %p1)
+ br i1 %c, label %loop, label %return
+return:
+ ret half %r
+}
+declare i1 @test_dummy(half* %p1) #0
+
+; CHECK-LABEL: test_fptosi_i32:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvtzs w0, s0
+; CHECK-NEXT: ret
+define i32 @test_fptosi_i32(half %a) #0 {
+ %r = fptosi half %a to i32
+ ret i32 %r
+}
+
+; CHECK-LABEL: test_fptosi_i64:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvtzs x0, s0
+; CHECK-NEXT: ret
+define i64 @test_fptosi_i64(half %a) #0 {
+ %r = fptosi half %a to i64
+ ret i64 %r
+}
+
+; CHECK-LABEL: test_fptoui_i32:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvtzu w0, s0
+; CHECK-NEXT: ret
+define i32 @test_fptoui_i32(half %a) #0 {
+ %r = fptoui half %a to i32
+ ret i32 %r
+}
+
+; CHECK-LABEL: test_fptoui_i64:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvtzu x0, s0
+; CHECK-NEXT: ret
+define i64 @test_fptoui_i64(half %a) #0 {
+ %r = fptoui half %a to i64
+ ret i64 %r
+}
+
+; CHECK-LABEL: test_uitofp_i32:
+; CHECK-NEXT: ucvtf s0, w0
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_uitofp_i32(i32 %a) #0 {
+ %r = uitofp i32 %a to half
+ ret half %r
+}
+
+; CHECK-LABEL: test_uitofp_i64:
+; CHECK-NEXT: ucvtf s0, x0
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_uitofp_i64(i64 %a) #0 {
+ %r = uitofp i64 %a to half
+ ret half %r
+}
+
+; CHECK-LABEL: test_sitofp_i32:
+; CHECK-NEXT: scvtf s0, w0
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_sitofp_i32(i32 %a) #0 {
+ %r = sitofp i32 %a to half
+ ret half %r
+}
+
+; CHECK-LABEL: test_sitofp_i64:
+; CHECK-NEXT: scvtf s0, x0
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_sitofp_i64(i64 %a) #0 {
+ %r = sitofp i64 %a to half
+ ret half %r
+}
+
+; CHECK-LABEL: test_fptrunc_float:
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+
+define half @test_fptrunc_float(float %a) #0 {
+ %r = fptrunc float %a to half
+ ret half %r
+}
+
+; CHECK-LABEL: test_fptrunc_double:
+; CHECK-NEXT: fcvt h0, d0
+; CHECK-NEXT: ret
+define half @test_fptrunc_double(double %a) #0 {
+ %r = fptrunc double %a to half
+ ret half %r
+}
+
+; CHECK-LABEL: test_fpext_float:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: ret
+define float @test_fpext_float(half %a) #0 {
+ %r = fpext half %a to float
+ ret float %r
+}
+
+; CHECK-LABEL: test_fpext_double:
+; CHECK-NEXT: fcvt d0, h0
+; CHECK-NEXT: ret
+define double @test_fpext_double(half %a) #0 {
+ %r = fpext half %a to double
+ ret double %r
+}
+
+
+; CHECK-LABEL: test_bitcast_halftoi16:
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+define i16 @test_bitcast_halftoi16(half %a) #0 {
+ %r = bitcast half %a to i16
+ ret i16 %r
+}
+
+; CHECK-LABEL: test_bitcast_i16tohalf:
+; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: ret
+define half @test_bitcast_i16tohalf(i16 %a) #0 {
+ %r = bitcast i16 %a to half
+ ret half %r
+}
+
+
+declare half @llvm.sqrt.f16(half %a) #0
+declare half @llvm.powi.f16(half %a, i32 %b) #0
+declare half @llvm.sin.f16(half %a) #0
+declare half @llvm.cos.f16(half %a) #0
+declare half @llvm.pow.f16(half %a, half %b) #0
+declare half @llvm.exp.f16(half %a) #0
+declare half @llvm.exp2.f16(half %a) #0
+declare half @llvm.log.f16(half %a) #0
+declare half @llvm.log10.f16(half %a) #0
+declare half @llvm.log2.f16(half %a) #0
+declare half @llvm.fma.f16(half %a, half %b, half %c) #0
+declare half @llvm.fabs.f16(half %a) #0
+declare half @llvm.minnum.f16(half %a, half %b) #0
+declare half @llvm.maxnum.f16(half %a, half %b) #0
+declare half @llvm.copysign.f16(half %a, half %b) #0
+declare half @llvm.floor.f16(half %a) #0
+declare half @llvm.ceil.f16(half %a) #0
+declare half @llvm.trunc.f16(half %a) #0
+declare half @llvm.rint.f16(half %a) #0
+declare half @llvm.nearbyint.f16(half %a) #0
+declare half @llvm.round.f16(half %a) #0
+declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
+
+; CHECK-LABEL: test_sqrt:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fsqrt s0, s0
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_sqrt(half %a) #0 {
+ %r = call half @llvm.sqrt.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_powi:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl {{_?}}__powisf2
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_powi(half %a, i32 %b) #0 {
+ %r = call half @llvm.powi.f16(half %a, i32 %b)
+ ret half %r
+}
+
+; CHECK-LABEL: test_sin:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl {{_?}}sinf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_sin(half %a) #0 {
+ %r = call half @llvm.sin.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_cos:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl {{_?}}cosf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_cos(half %a) #0 {
+ %r = call half @llvm.cos.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_pow:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: bl {{_?}}powf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_pow(half %a, half %b) #0 {
+ %r = call half @llvm.pow.f16(half %a, half %b)
+ ret half %r
+}
+
+; CHECK-LABEL: test_exp:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl {{_?}}expf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_exp(half %a) #0 {
+ %r = call half @llvm.exp.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_exp2:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl {{_?}}exp2f
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_exp2(half %a) #0 {
+ %r = call half @llvm.exp2.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_log:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl {{_?}}logf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_log(half %a) #0 {
+ %r = call half @llvm.log.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_log10:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl {{_?}}log10f
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_log10(half %a) #0 {
+ %r = call half @llvm.log10.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_log2:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl {{_?}}log2f
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_log2(half %a) #0 {
+ %r = call half @llvm.log2.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_fma:
+; CHECK-NEXT: fcvt s2, h2
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fmadd s0, s0, s1, s2
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_fma(half %a, half %b, half %c) #0 {
+ %r = call half @llvm.fma.f16(half %a, half %b, half %c)
+ ret half %r
+}
+
+; CHECK-LABEL: test_fabs:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_fabs(half %a) #0 {
+ %r = call half @llvm.fabs.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_minnum:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: bl {{_?}}fminf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_minnum(half %a, half %b) #0 {
+ %r = call half @llvm.minnum.f16(half %a, half %b)
+ ret half %r
+}
+
+; CHECK-LABEL: test_maxnum:
+; CHECK-NEXT: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: mov x29, sp
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: bl {{_?}}fmaxf
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+define half @test_maxnum(half %a, half %b) #0 {
+ %r = call half @llvm.maxnum.f16(half %a, half %b)
+ ret half %r
+}
+
+; CHECK-LABEL: test_copysign:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT: bit.16b v0, v1, v2
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_copysign(half %a, half %b) #0 {
+ %r = call half @llvm.copysign.f16(half %a, half %b)
+ ret half %r
+}
+
+; CHECK-LABEL: test_floor:
+; CHECK-NEXT: fcvt s1, h0
+; CHECK-NEXT: frintm s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: frintx s1, s1
+; CHECK-NEXT: ret
+define half @test_floor(half %a) #0 {
+ %r = call half @llvm.floor.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_ceil:
+; CHECK-NEXT: fcvt s1, h0
+; CHECK-NEXT: frintp s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: frintx s1, s1
+; CHECK-NEXT: ret
+define half @test_ceil(half %a) #0 {
+ %r = call half @llvm.ceil.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_trunc:
+; CHECK-NEXT: fcvt s1, h0
+; CHECK-NEXT: frintz s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: frintx s1, s1
+; CHECK-NEXT: ret
+define half @test_trunc(half %a) #0 {
+ %r = call half @llvm.trunc.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_rint:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: frintx s0, s0
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_rint(half %a) #0 {
+ %r = call half @llvm.rint.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_nearbyint:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: frinti s0, s0
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_nearbyint(half %a) #0 {
+ %r = call half @llvm.nearbyint.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_round:
+; CHECK-NEXT: fcvt s1, h0
+; CHECK-NEXT: frinta s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: frintx s1, s1
+; CHECK-NEXT: ret
+define half @test_round(half %a) #0 {
+ %r = call half @llvm.round.f16(half %a)
+ ret half %r
+}
+
+; CHECK-LABEL: test_fmuladd:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fmul s0, s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fcvt s1, h2
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_fmuladd(half %a, half %b, half %c) #0 {
+ %r = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
+ ret half %r
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/fast-isel-address-extends.ll b/test/CodeGen/AArch64/fast-isel-address-extends.ll
new file mode 100644
index 000000000000..6a17ec502a02
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-address-extends.ll
@@ -0,0 +1,39 @@
+; RUN: llc %s -o - -O0 -verify-machineinstrs -fast-isel=true | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios8.0.0"
+
+; This test was trying to fold the sext %tmp142 in to the address arithmetic in %sunkaddr1.
+; This was incorrect as %.mux isn't available in the last bb.
+
+; CHECK: sxtw [[REG:x[0-9]+]]
+; CHECK: strh wzr, {{\[}}[[REG]], {{.*}}, lsl #1]
+
+; Function Attrs: nounwind optsize ssp
+define void @EdgeLoop(i32 %dir, i32 %edge, i32 %width, i16* %tmp89, i32 %tmp136, i16 %tmp144) #0 {
+bb:
+ %tmp2 = icmp eq i32 %dir, 0
+ %.mux = select i1 %tmp2, i32 %width, i32 1
+ %tmp142 = sext i32 %.mux to i64
+ %tmp151 = shl nsw i64 %tmp142, 1
+ %tmp153 = getelementptr inbounds i16, i16* %tmp89, i64 %tmp151
+ %tmp154 = load i16, i16* %tmp153, align 2
+ %tmp155 = zext i16 %tmp154 to i32
+ br i1 %tmp2, label %bb225, label %bb212
+
+bb212: ; preds = %bb
+ store i16 %tmp144, i16* %tmp89, align 2
+ ret void
+
+bb225: ; preds = %bb
+ %tmp248 = trunc i32 %tmp155 to i16
+ store i16 %tmp248, i16* %tmp89, align 2
+ %sunkaddr = ptrtoint i16* %tmp89 to i64
+ %sunkaddr1 = mul i64 %tmp142, 2
+ %sunkaddr2 = add i64 %sunkaddr, %sunkaddr1
+ %sunkaddr3 = inttoptr i64 %sunkaddr2 to i16*
+ store i16 0, i16* %sunkaddr3, align 2
+ ret void
+}
+
+attributes #0 = { nounwind optsize ssp }
diff --git a/test/CodeGen/AArch64/fast-isel-addressing-modes.ll b/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
index d86f00d38622..6ab6a66c355d 100644
--- a/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
+++ b/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
@@ -1,53 +1,53 @@
; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SDAG
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FAST
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FAST
; Load / Store Base Register only
define zeroext i1 @load_breg_i1(i1* %a) {
; CHECK-LABEL: load_breg_i1
; CHECK: ldrb {{w[0-9]+}}, [x0]
- %1 = load i1* %a
+ %1 = load i1, i1* %a
ret i1 %1
}
define zeroext i8 @load_breg_i8(i8* %a) {
; CHECK-LABEL: load_breg_i8
; CHECK: ldrb {{w[0-9]+}}, [x0]
- %1 = load i8* %a
+ %1 = load i8, i8* %a
ret i8 %1
}
define zeroext i16 @load_breg_i16(i16* %a) {
; CHECK-LABEL: load_breg_i16
; CHECK: ldrh {{w[0-9]+}}, [x0]
- %1 = load i16* %a
+ %1 = load i16, i16* %a
ret i16 %1
}
define i32 @load_breg_i32(i32* %a) {
; CHECK-LABEL: load_breg_i32
; CHECK: ldr {{w[0-9]+}}, [x0]
- %1 = load i32* %a
+ %1 = load i32, i32* %a
ret i32 %1
}
define i64 @load_breg_i64(i64* %a) {
; CHECK-LABEL: load_breg_i64
; CHECK: ldr {{x[0-9]+}}, [x0]
- %1 = load i64* %a
+ %1 = load i64, i64* %a
ret i64 %1
}
define float @load_breg_f32(float* %a) {
; CHECK-LABEL: load_breg_f32
; CHECK: ldr {{s[0-9]+}}, [x0]
- %1 = load float* %a
+ %1 = load float, float* %a
ret float %1
}
define double @load_breg_f64(double* %a) {
; CHECK-LABEL: load_breg_f64
; CHECK: ldr {{d[0-9]+}}, [x0]
- %1 = load double* %a
+ %1 = load double, double* %a
ret double %1
}
@@ -113,7 +113,7 @@ define i32 @load_immoff_1() {
; CHECK: orr {{w|x}}[[REG:[0-9]+]], {{wzr|xzr}}, #0x80
; CHECK: ldr {{w[0-9]+}}, {{\[}}x[[REG]]{{\]}}
%1 = inttoptr i64 128 to i32*
- %2 = load i32* %1
+ %2 = load i32, i32* %1
ret i32 %2
}
@@ -124,7 +124,7 @@ define i32 @load_breg_immoff_1(i64 %a) {
; CHECK: ldur {{w[0-9]+}}, [x0, #-256]
%1 = add i64 %a, -256
%2 = inttoptr i64 %1 to i32*
- %3 = load i32* %2
+ %3 = load i32, i32* %2
ret i32 %3
}
@@ -135,7 +135,7 @@ define i32 @load_breg_immoff_2(i64 %a) {
; CHECK-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
%1 = add i64 %a, -257
%2 = inttoptr i64 %1 to i32*
- %3 = load i32* %2
+ %3 = load i32, i32* %2
ret i32 %3
}
@@ -145,7 +145,7 @@ define i32 @load_breg_immoff_3(i64 %a) {
; CHECK: ldur {{w[0-9]+}}, [x0, #255]
%1 = add i64 %a, 255
%2 = inttoptr i64 %1 to i32*
- %3 = load i32* %2
+ %3 = load i32, i32* %2
ret i32 %3
}
@@ -156,7 +156,7 @@ define i32 @load_breg_immoff_4(i64 %a) {
; CHECK-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
%1 = add i64 %a, 257
%2 = inttoptr i64 %1 to i32*
- %3 = load i32* %2
+ %3 = load i32, i32* %2
ret i32 %3
}
@@ -166,7 +166,7 @@ define i32 @load_breg_immoff_5(i64 %a) {
; CHECK: ldr {{w[0-9]+}}, [x0, #16380]
%1 = add i64 %a, 16380
%2 = inttoptr i64 %1 to i32*
- %3 = load i32* %2
+ %3 = load i32, i32* %2
ret i32 %3
}
@@ -180,7 +180,7 @@ define i32 @load_breg_immoff_6(i64 %a) {
; FAST-NEXT: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
%1 = add i64 %a, 16384
%2 = inttoptr i64 %1 to i32*
- %3 = load i32* %2
+ %3 = load i32, i32* %2
ret i32 %3
}
@@ -255,7 +255,7 @@ define i64 @load_breg_immoff_7(i64 %a) {
; CHECK: ldr {{x[0-9]+}}, [x0, #48]
%1 = add i64 %a, 48
%2 = inttoptr i64 %1 to i64*
- %3 = load i64* %2
+ %3 = load i64, i64* %2
ret i64 %3
}
@@ -265,7 +265,7 @@ define i64 @load_breg_immoff_8(i64 %a) {
; CHECK: ldr {{x[0-9]+}}, [x0, #48]
%1 = add i64 48, %a
%2 = inttoptr i64 %1 to i64*
- %3 = load i64* %2
+ %3 = load i64, i64* %2
ret i64 %3
}
@@ -275,7 +275,7 @@ define i64 @load_breg_offreg_1(i64 %a, i64 %b) {
; CHECK: ldr {{x[0-9]+}}, [x0, x1]
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i64*
- %3 = load i64* %2
+ %3 = load i64, i64* %2
ret i64 %3
}
@@ -285,7 +285,7 @@ define i64 @load_breg_offreg_2(i64 %a, i64 %b) {
; CHECK: ldr {{x[0-9]+}}, [x1, x0]
%1 = add i64 %b, %a
%2 = inttoptr i64 %1 to i64*
- %3 = load i64* %2
+ %3 = load i64, i64* %2
ret i64 %3
}
@@ -297,7 +297,7 @@ define i64 @load_breg_offreg_immoff_1(i64 %a, i64 %b) {
%1 = add i64 %a, %b
%2 = add i64 %1, 48
%3 = inttoptr i64 %2 to i64*
- %4 = load i64* %3
+ %4 = load i64, i64* %3
ret i64 %4
}
@@ -312,7 +312,7 @@ define i64 @load_breg_offreg_immoff_2(i64 %a, i64 %b) {
%1 = add i64 %a, %b
%2 = add i64 %1, 61440
%3 = inttoptr i64 %2 to i64*
- %4 = load i64* %3
+ %4 = load i64, i64* %3
ret i64 %4
}
@@ -323,7 +323,7 @@ define i32 @load_shift_offreg_1(i64 %a) {
; CHECK: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
%1 = shl i64 %a, 2
%2 = inttoptr i64 %1 to i32*
- %3 = load i32* %2
+ %3 = load i32, i32* %2
ret i32 %3
}
@@ -333,7 +333,7 @@ define i32 @load_mul_offreg_1(i64 %a) {
; CHECK: ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
%1 = mul i64 %a, 4
%2 = inttoptr i64 %1 to i32*
- %3 = load i32* %2
+ %3 = load i32, i32* %2
ret i32 %3
}
@@ -344,7 +344,7 @@ define i32 @load_breg_shift_offreg_1(i64 %a, i64 %b) {
%1 = shl i64 %a, 2
%2 = add i64 %1, %b
%3 = inttoptr i64 %2 to i32*
- %4 = load i32* %3
+ %4 = load i32, i32* %3
ret i32 %4
}
@@ -354,7 +354,7 @@ define i32 @load_breg_shift_offreg_2(i64 %a, i64 %b) {
%1 = shl i64 %a, 2
%2 = add i64 %b, %1
%3 = inttoptr i64 %2 to i32*
- %4 = load i32* %3
+ %4 = load i32, i32* %3
ret i32 %4
}
@@ -369,7 +369,7 @@ define i32 @load_breg_shift_offreg_3(i64 %a, i64 %b) {
%2 = shl i64 %b, 2
%3 = add i64 %1, %2
%4 = inttoptr i64 %3 to i32*
- %5 = load i32* %4
+ %5 = load i32, i32* %4
ret i32 %5
}
@@ -384,7 +384,7 @@ define i32 @load_breg_shift_offreg_4(i64 %a, i64 %b) {
%2 = shl i64 %b, 2
%3 = add i64 %2, %1
%4 = inttoptr i64 %3 to i32*
- %5 = load i32* %4
+ %5 = load i32, i32* %4
ret i32 %5
}
@@ -399,7 +399,7 @@ define i32 @load_breg_shift_offreg_5(i64 %a, i64 %b) {
%2 = shl i64 %b, 3
%3 = add i64 %1, %2
%4 = inttoptr i64 %3 to i32*
- %5 = load i32* %4
+ %5 = load i32, i32* %4
ret i32 %5
}
@@ -409,7 +409,7 @@ define i32 @load_breg_mul_offreg_1(i64 %a, i64 %b) {
%1 = mul i64 %a, 4
%2 = add i64 %1, %b
%3 = inttoptr i64 %2 to i32*
- %4 = load i32* %3
+ %4 = load i32, i32* %3
ret i32 %4
}
@@ -419,7 +419,7 @@ define zeroext i8 @load_breg_and_offreg_1(i64 %a, i64 %b) {
%1 = and i64 %a, 4294967295
%2 = add i64 %1, %b
%3 = inttoptr i64 %2 to i8*
- %4 = load i8* %3
+ %4 = load i8, i8* %3
ret i8 %4
}
@@ -430,7 +430,7 @@ define zeroext i16 @load_breg_and_offreg_2(i64 %a, i64 %b) {
%2 = shl i64 %1, 1
%3 = add i64 %2, %b
%4 = inttoptr i64 %3 to i16*
- %5 = load i16* %4
+ %5 = load i16, i16* %4
ret i16 %5
}
@@ -441,7 +441,7 @@ define i32 @load_breg_and_offreg_3(i64 %a, i64 %b) {
%2 = shl i64 %1, 2
%3 = add i64 %2, %b
%4 = inttoptr i64 %3 to i32*
- %5 = load i32* %4
+ %5 = load i32, i32* %4
ret i32 %5
}
@@ -452,7 +452,7 @@ define i64 @load_breg_and_offreg_4(i64 %a, i64 %b) {
%2 = shl i64 %1, 3
%3 = add i64 %2, %b
%4 = inttoptr i64 %3 to i64*
- %5 = load i64* %4
+ %5 = load i64, i64* %4
ret i64 %5
}
@@ -464,7 +464,7 @@ define i64 @load_breg_and_offreg_5(i64 %a, i64 %b, i64 %c) {
%1 = and i64 %a, %c
%2 = add i64 %1, %b
%3 = inttoptr i64 %2 to i64*
- %4 = load i64* %3
+ %4 = load i64, i64* %3
ret i64 %4
}
@@ -476,7 +476,7 @@ define i64 @load_breg_and_offreg_6(i64 %a, i64 %b, i64 %c) {
%2 = shl i64 %1, 3
%3 = add i64 %2, %b
%4 = inttoptr i64 %3 to i64*
- %5 = load i64* %4
+ %5 = load i64, i64* %4
ret i64 %5
}
@@ -488,7 +488,7 @@ define i32 @load_breg_zext_shift_offreg_1(i32 %a, i64 %b) {
%2 = shl i64 %1, 2
%3 = add i64 %2, %b
%4 = inttoptr i64 %3 to i32*
- %5 = load i32* %4
+ %5 = load i32, i32* %4
ret i32 %5
}
@@ -499,7 +499,7 @@ define i32 @load_breg_zext_shift_offreg_2(i32 %a, i64 %b) {
%2 = shl i64 %1, 2
%3 = add i64 %b, %2
%4 = inttoptr i64 %3 to i32*
- %5 = load i32* %4
+ %5 = load i32, i32* %4
ret i32 %5
}
@@ -510,7 +510,7 @@ define i32 @load_breg_zext_mul_offreg_1(i32 %a, i64 %b) {
%2 = mul i64 %1, 4
%3 = add i64 %2, %b
%4 = inttoptr i64 %3 to i32*
- %5 = load i32* %4
+ %5 = load i32, i32* %4
ret i32 %5
}
@@ -521,7 +521,7 @@ define i32 @load_breg_sext_shift_offreg_1(i32 %a, i64 %b) {
%2 = shl i64 %1, 2
%3 = add i64 %2, %b
%4 = inttoptr i64 %3 to i32*
- %5 = load i32* %4
+ %5 = load i32, i32* %4
ret i32 %5
}
@@ -532,7 +532,7 @@ define i32 @load_breg_sext_shift_offreg_2(i32 %a, i64 %b) {
%2 = shl i64 %1, 2
%3 = add i64 %b, %2
%4 = inttoptr i64 %3 to i32*
- %5 = load i32* %4
+ %5 = load i32, i32* %4
ret i32 %5
}
@@ -546,7 +546,7 @@ define i32 @load_breg_sext_shift_offreg_3(i32 %a, i64 %b) {
%3 = shl i64 %2, 2
%4 = add i64 %b, %3
%5 = inttoptr i64 %4 to i32*
- %6 = load i32* %5
+ %6 = load i32, i32* %5
ret i32 %6
}
@@ -558,7 +558,7 @@ define i32 @load_breg_sext_mul_offreg_1(i32 %a, i64 %b) {
%2 = mul i64 %1, 4
%3 = add i64 %2, %b
%4 = inttoptr i64 %3 to i32*
- %5 = load i32* %4
+ %5 = load i32, i32* %4
ret i32 %5
}
@@ -571,7 +571,7 @@ define i64 @load_sext_shift_offreg_imm1(i32 %a) {
%2 = shl i64 %1, 3
%3 = add i64 %2, 8
%4 = inttoptr i64 %3 to i64*
- %5 = load i64* %4
+ %5 = load i64, i64* %4
ret i64 %5
}
@@ -585,7 +585,7 @@ define i64 @load_breg_sext_shift_offreg_imm1(i32 %a, i64 %b) {
%3 = add i64 %b, %2
%4 = add i64 %3, 8
%5 = inttoptr i64 %4 to i64*
- %6 = load i64* %5
+ %6 = load i64, i64* %5
ret i64 %6
}
@@ -594,7 +594,7 @@ define i64 @kill_reg(i64 %a) {
%1 = sub i64 %a, 8
%2 = add i64 %1, 96
%3 = inttoptr i64 %2 to i64*
- %4 = load i64* %3
+ %4 = load i64, i64* %3
%5 = add i64 %2, %4
ret i64 %5
}
@@ -621,7 +621,7 @@ define i32 @load_fi(i64 %i) {
%3 = mul i64 %i, 4
%4 = add i64 %2, %3
%5 = inttoptr i64 %4 to i32*
- %6 = load i32* %5, align 4
+ %6 = load i32, i32* %5, align 4
ret i32 %6
}
diff --git a/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
index bc4a210df62b..da6ddbf5101e 100644
--- a/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
+++ b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
; CHECK-label: test_or
; CHECK: cbnz w0, {{LBB[0-9]+_2}}
diff --git a/test/CodeGen/AArch64/fast-isel-branch_weights.ll b/test/CodeGen/AArch64/fast-isel-branch_weights.ll
index 70dbdf216c7d..ff57bbb33c48 100644
--- a/test/CodeGen/AArch64/fast-isel-branch_weights.ll
+++ b/test/CodeGen/AArch64/fast-isel-branch_weights.ll
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=arm64-apple-darwin -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=arm64-apple-darwin -aarch64-atomic-cfg-tidy=0 -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin -aarch64-atomic-cfg-tidy=0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
; Test if the BBs are reordred according to their branch weights.
define i64 @branch_weights_test(i64 %a, i64 %b) {
diff --git a/test/CodeGen/AArch64/fast-isel-call-return.ll b/test/CodeGen/AArch64/fast-isel-call-return.ll
index 9b10969417df..a03b12e8d3ea 100644
--- a/test/CodeGen/AArch64/fast-isel-call-return.ll
+++ b/test/CodeGen/AArch64/fast-isel-call-return.ll
@@ -1,4 +1,4 @@
-; RUN: llc -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-linux-gnu"
diff --git a/test/CodeGen/AArch64/fast-isel-cbz.ll b/test/CodeGen/AArch64/fast-isel-cbz.ll
index 6e31a045d285..a407b269dd82 100644
--- a/test/CodeGen/AArch64/fast-isel-cbz.ll
+++ b/test/CodeGen/AArch64/fast-isel-cbz.ll
@@ -1,4 +1,4 @@
-; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+; RUN: llc -fast-isel -fast-isel-abort=1 -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
define i32 @icmp_eq_i1(i1 %a) {
; CHECK-LABEL: icmp_eq_i1
diff --git a/test/CodeGen/AArch64/fast-isel-cmp-branch.ll b/test/CodeGen/AArch64/fast-isel-cmp-branch.ll
index 3651f194efda..1ac358f37aa8 100644
--- a/test/CodeGen/AArch64/fast-isel-cmp-branch.ll
+++ b/test/CodeGen/AArch64/fast-isel-cmp-branch.ll
@@ -1,5 +1,5 @@
; RUN: llc -aarch64-atomic-cfg-tidy=0 -mtriple=aarch64-apple-darwin < %s | FileCheck %s
-; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+; RUN: llc -fast-isel -fast-isel-abort=1 -aarch64-atomic-cfg-tidy=0 -mtriple=aarch64-apple-darwin < %s | FileCheck %s
define i32 @fcmp_oeq(float %x, float %y) {
; CHECK-LABEL: fcmp_oeq
diff --git a/test/CodeGen/AArch64/fast-isel-folding.ll b/test/CodeGen/AArch64/fast-isel-folding.ll
index 6b524ff2c099..883933b79923 100644
--- a/test/CodeGen/AArch64/fast-isel-folding.ll
+++ b/test/CodeGen/AArch64/fast-isel-folding.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-apple-darwin -O0 -fast-isel-abort -verify-machineinstrs < %s
+; RUN: llc -mtriple=aarch64-apple-darwin -O0 -fast-isel-abort=1 -verify-machineinstrs < %s
; Test that we don't fold the shift.
define i64 @fold_shift_test(i64 %a, i1 %c) {
diff --git a/test/CodeGen/AArch64/fast-isel-gep.ll b/test/CodeGen/AArch64/fast-isel-gep.ll
index 4dc0a05894f1..33adcdc3c464 100644
--- a/test/CodeGen/AArch64/fast-isel-gep.ll
+++ b/test/CodeGen/AArch64/fast-isel-gep.ll
@@ -1,11 +1,11 @@
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
%struct.foo = type { i32, i64, float, double }
define double* @test_struct(%struct.foo* %f) {
; CHECK-LABEL: test_struct
; CHECK: add x0, x0, #24
- %1 = getelementptr inbounds %struct.foo* %f, i64 0, i32 3
+ %1 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 3
ret double* %1
}
@@ -13,21 +13,21 @@ define i32* @test_array1(i32* %a, i64 %i) {
; CHECK-LABEL: test_array1
; CHECK: orr [[REG:x[0-9]+]], xzr, #0x4
; CHECK-NEXT: madd x0, x1, [[REG]], x0
- %1 = getelementptr inbounds i32* %a, i64 %i
+ %1 = getelementptr inbounds i32, i32* %a, i64 %i
ret i32* %1
}
define i32* @test_array2(i32* %a) {
; CHECK-LABEL: test_array2
; CHECK: add x0, x0, #16
- %1 = getelementptr inbounds i32* %a, i64 4
+ %1 = getelementptr inbounds i32, i32* %a, i64 4
ret i32* %1
}
define i32* @test_array3(i32* %a) {
; CHECK-LABEL: test_array3
; CHECK: add x0, x0, #1, lsl #12
- %1 = getelementptr inbounds i32* %a, i64 1024
+ %1 = getelementptr inbounds i32, i32* %a, i64 1024
ret i32* %1
}
@@ -35,7 +35,7 @@ define i32* @test_array4(i32* %a) {
; CHECK-LABEL: test_array4
; CHECK: movz [[REG:x[0-9]+]], #0x1008
; CHECK-NEXR: add x0, x0, [[REG]]
- %1 = getelementptr inbounds i32* %a, i64 1026
+ %1 = getelementptr inbounds i32, i32* %a, i64 1026
ret i32* %1
}
@@ -44,6 +44,6 @@ define i32* @test_array5(i32* %a, i32 %i) {
; CHECK: sxtw [[REG1:x[0-9]+]], w1
; CHECK-NEXT: orr [[REG2:x[0-9]+]], xzr, #0x4
; CHECK-NEXT: madd {{x[0-9]+}}, [[REG1]], [[REG2]], x0
- %1 = getelementptr inbounds i32* %a, i32 %i
+ %1 = getelementptr inbounds i32, i32* %a, i32 %i
ret i32* %1
}
diff --git a/test/CodeGen/AArch64/fast-isel-int-ext.ll b/test/CodeGen/AArch64/fast-isel-int-ext.ll
index 866febac2622..4b2cab5fa728 100644
--- a/test/CodeGen/AArch64/fast-isel-int-ext.ll
+++ b/test/CodeGen/AArch64/fast-isel-int-ext.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
;
; Test that we only use the sign/zero extend in the address calculation when
@@ -13,7 +13,7 @@ define i64 @load_addr_shift_zext1(i32 %a, i64 %b) {
%2 = shl i64 %1, 3
%3 = add i64 %b, %2
%4 = inttoptr i64 %3 to i64*
- %5 = load i64* %4
+ %5 = load i64, i64* %4
ret i64 %5
}
@@ -24,7 +24,7 @@ define i64 @load_addr_shift_zext2(i32 zeroext %a, i64 %b) {
%2 = shl i64 %1, 3
%3 = add i64 %b, %2
%4 = inttoptr i64 %3 to i64*
- %5 = load i64* %4
+ %5 = load i64, i64* %4
ret i64 %5
}
@@ -35,7 +35,7 @@ define i64 @load_addr_shift_zext3(i32 signext %a, i64 %b) {
%2 = shl i64 %1, 3
%3 = add i64 %b, %2
%4 = inttoptr i64 %3 to i64*
- %5 = load i64* %4
+ %5 = load i64, i64* %4
ret i64 %5
}
@@ -46,7 +46,7 @@ define i64 @load_addr_shift_sext1(i32 %a, i64 %b) {
%2 = shl i64 %1, 3
%3 = add i64 %b, %2
%4 = inttoptr i64 %3 to i64*
- %5 = load i64* %4
+ %5 = load i64, i64* %4
ret i64 %5
}
@@ -57,7 +57,7 @@ define i64 @load_addr_shift_sext2(i32 zeroext %a, i64 %b) {
%2 = shl i64 %1, 3
%3 = add i64 %b, %2
%4 = inttoptr i64 %3 to i64*
- %5 = load i64* %4
+ %5 = load i64, i64* %4
ret i64 %5
}
@@ -68,7 +68,7 @@ define i64 @load_addr_shift_sext3(i32 signext %a, i64 %b) {
%2 = shl i64 %1, 3
%3 = add i64 %b, %2
%4 = inttoptr i64 %3 to i64*
- %5 = load i64* %4
+ %5 = load i64, i64* %4
ret i64 %5
}
@@ -82,7 +82,7 @@ define i64 @load_addr_mul_zext1(i32 %a, i64 %b) {
%2 = mul i64 %1, 8
%3 = add i64 %b, %2
%4 = inttoptr i64 %3 to i64*
- %5 = load i64* %4
+ %5 = load i64, i64* %4
ret i64 %5
}
@@ -93,7 +93,7 @@ define i64 @load_addr_mul_zext2(i32 zeroext %a, i64 %b) {
%2 = mul i64 %1, 8
%3 = add i64 %b, %2
%4 = inttoptr i64 %3 to i64*
- %5 = load i64* %4
+ %5 = load i64, i64* %4
ret i64 %5
}
@@ -104,7 +104,7 @@ define i64 @load_addr_mul_zext3(i32 signext %a, i64 %b) {
%2 = mul i64 %1, 8
%3 = add i64 %b, %2
%4 = inttoptr i64 %3 to i64*
- %5 = load i64* %4
+ %5 = load i64, i64* %4
ret i64 %5
}
@@ -115,7 +115,7 @@ define i64 @load_addr_mul_sext1(i32 %a, i64 %b) {
%2 = mul i64 %1, 8
%3 = add i64 %b, %2
%4 = inttoptr i64 %3 to i64*
- %5 = load i64* %4
+ %5 = load i64, i64* %4
ret i64 %5
}
@@ -126,7 +126,7 @@ define i64 @load_addr_mul_sext2(i32 zeroext %a, i64 %b) {
%2 = mul i64 %1, 8
%3 = add i64 %b, %2
%4 = inttoptr i64 %3 to i64*
- %5 = load i64* %4
+ %5 = load i64, i64* %4
ret i64 %5
}
@@ -137,7 +137,7 @@ define i64 @load_addr_mul_sext3(i32 signext %a, i64 %b) {
%2 = mul i64 %1, 8
%3 = add i64 %b, %2
%4 = inttoptr i64 %3 to i64*
- %5 = load i64* %4
+ %5 = load i64, i64* %4
ret i64 %5
}
@@ -153,7 +153,7 @@ define i32 @load_unscaled_zext_i8_to_i32(i64 %a) {
; CHECK-NOT: uxtb
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i8*
- %3 = load i8* %2
+ %3 = load i8, i8* %2
%4 = zext i8 %3 to i32
ret i32 %4
}
@@ -164,7 +164,7 @@ define i32 @load_unscaled_zext_i16_to_i32(i64 %a) {
; CHECK-NOT: uxth
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i16*
- %3 = load i16* %2
+ %3 = load i16, i16* %2
%4 = zext i16 %3 to i32
ret i32 %4
}
@@ -175,7 +175,7 @@ define i64 @load_unscaled_zext_i8_to_i64(i64 %a) {
; CHECK-NOT: uxtb
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i8*
- %3 = load i8* %2
+ %3 = load i8, i8* %2
%4 = zext i8 %3 to i64
ret i64 %4
}
@@ -186,7 +186,7 @@ define i64 @load_unscaled_zext_i16_to_i64(i64 %a) {
; CHECK-NOT: uxth
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i16*
- %3 = load i16* %2
+ %3 = load i16, i16* %2
%4 = zext i16 %3 to i64
ret i64 %4
}
@@ -197,7 +197,7 @@ define i64 @load_unscaled_zext_i32_to_i64(i64 %a) {
; CHECK-NOT: uxtw
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i32*
- %3 = load i32* %2
+ %3 = load i32, i32* %2
%4 = zext i32 %3 to i64
ret i64 %4
}
@@ -208,7 +208,7 @@ define i32 @load_unscaled_sext_i8_to_i32(i64 %a) {
; CHECK-NOT: sxtb
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i8*
- %3 = load i8* %2
+ %3 = load i8, i8* %2
%4 = sext i8 %3 to i32
ret i32 %4
}
@@ -219,7 +219,7 @@ define i32 @load_unscaled_sext_i16_to_i32(i64 %a) {
; CHECK-NOT: sxth
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i16*
- %3 = load i16* %2
+ %3 = load i16, i16* %2
%4 = sext i16 %3 to i32
ret i32 %4
}
@@ -230,7 +230,7 @@ define i64 @load_unscaled_sext_i8_to_i64(i64 %a) {
; CHECK-NOT: sxtb
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i8*
- %3 = load i8* %2
+ %3 = load i8, i8* %2
%4 = sext i8 %3 to i64
ret i64 %4
}
@@ -241,7 +241,7 @@ define i64 @load_unscaled_sext_i16_to_i64(i64 %a) {
; CHECK-NOT: sxth
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i16*
- %3 = load i16* %2
+ %3 = load i16, i16* %2
%4 = sext i16 %3 to i64
ret i64 %4
}
@@ -252,7 +252,7 @@ define i64 @load_unscaled_sext_i32_to_i64(i64 %a) {
; CHECK-NOT: sxtw
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i32*
- %3 = load i32* %2
+ %3 = load i32, i32* %2
%4 = sext i32 %3 to i64
ret i64 %4
}
@@ -264,7 +264,7 @@ define i32 @load_register_zext_i8_to_i32(i64 %a, i64 %b) {
; CHECK-NOT: uxtb
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i8*
- %3 = load i8* %2
+ %3 = load i8, i8* %2
%4 = zext i8 %3 to i32
ret i32 %4
}
@@ -275,7 +275,7 @@ define i32 @load_register_zext_i16_to_i32(i64 %a, i64 %b) {
; CHECK-NOT: uxth
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i16*
- %3 = load i16* %2
+ %3 = load i16, i16* %2
%4 = zext i16 %3 to i32
ret i32 %4
}
@@ -286,7 +286,7 @@ define i64 @load_register_zext_i8_to_i64(i64 %a, i64 %b) {
; CHECK-NOT: uxtb
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i8*
- %3 = load i8* %2
+ %3 = load i8, i8* %2
%4 = zext i8 %3 to i64
ret i64 %4
}
@@ -297,7 +297,7 @@ define i64 @load_register_zext_i16_to_i64(i64 %a, i64 %b) {
; CHECK-NOT: uxth
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i16*
- %3 = load i16* %2
+ %3 = load i16, i16* %2
%4 = zext i16 %3 to i64
ret i64 %4
}
@@ -308,7 +308,7 @@ define i64 @load_register_zext_i32_to_i64(i64 %a, i64 %b) {
; CHECK-NOT: uxtw
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i32*
- %3 = load i32* %2
+ %3 = load i32, i32* %2
%4 = zext i32 %3 to i64
ret i64 %4
}
@@ -319,7 +319,7 @@ define i32 @load_register_sext_i8_to_i32(i64 %a, i64 %b) {
; CHECK-NOT: sxtb
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i8*
- %3 = load i8* %2
+ %3 = load i8, i8* %2
%4 = sext i8 %3 to i32
ret i32 %4
}
@@ -330,7 +330,7 @@ define i32 @load_register_sext_i16_to_i32(i64 %a, i64 %b) {
; CHECK-NOT: sxth
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i16*
- %3 = load i16* %2
+ %3 = load i16, i16* %2
%4 = sext i16 %3 to i32
ret i32 %4
}
@@ -341,7 +341,7 @@ define i64 @load_register_sext_i8_to_i64(i64 %a, i64 %b) {
; CHECK-NOT: sxtb
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i8*
- %3 = load i8* %2
+ %3 = load i8, i8* %2
%4 = sext i8 %3 to i64
ret i64 %4
}
@@ -352,7 +352,7 @@ define i64 @load_register_sext_i16_to_i64(i64 %a, i64 %b) {
; CHECK-NOT: sxth
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i16*
- %3 = load i16* %2
+ %3 = load i16, i16* %2
%4 = sext i16 %3 to i64
ret i64 %4
}
@@ -363,7 +363,7 @@ define i64 @load_register_sext_i32_to_i64(i64 %a, i64 %b) {
; CHECK-NOT: sxtw
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i32*
- %3 = load i32* %2
+ %3 = load i32, i32* %2
%4 = sext i32 %3 to i64
ret i64 %4
}
@@ -376,7 +376,7 @@ define i32 @load_extend_zext_i8_to_i32(i64 %a, i32 %b) {
%1 = sext i32 %b to i64
%2 = add i64 %a, %1
%3 = inttoptr i64 %2 to i8*
- %4 = load i8* %3
+ %4 = load i8, i8* %3
%5 = zext i8 %4 to i32
ret i32 %5
}
@@ -388,7 +388,7 @@ define i32 @load_extend_zext_i16_to_i32(i64 %a, i32 %b) {
%1 = sext i32 %b to i64
%2 = add i64 %a, %1
%3 = inttoptr i64 %2 to i16*
- %4 = load i16* %3
+ %4 = load i16, i16* %3
%5 = zext i16 %4 to i32
ret i32 %5
}
@@ -400,7 +400,7 @@ define i64 @load_extend_zext_i8_to_i64(i64 %a, i32 %b) {
%1 = sext i32 %b to i64
%2 = add i64 %a, %1
%3 = inttoptr i64 %2 to i8*
- %4 = load i8* %3
+ %4 = load i8, i8* %3
%5 = zext i8 %4 to i64
ret i64 %5
}
@@ -412,7 +412,7 @@ define i64 @load_extend_zext_i16_to_i64(i64 %a, i32 %b) {
%1 = sext i32 %b to i64
%2 = add i64 %a, %1
%3 = inttoptr i64 %2 to i16*
- %4 = load i16* %3
+ %4 = load i16, i16* %3
%5 = zext i16 %4 to i64
ret i64 %5
}
@@ -424,7 +424,7 @@ define i64 @load_extend_zext_i32_to_i64(i64 %a, i32 %b) {
%1 = sext i32 %b to i64
%2 = add i64 %a, %1
%3 = inttoptr i64 %2 to i32*
- %4 = load i32* %3
+ %4 = load i32, i32* %3
%5 = zext i32 %4 to i64
ret i64 %5
}
@@ -436,7 +436,7 @@ define i32 @load_extend_sext_i8_to_i32(i64 %a, i32 %b) {
%1 = sext i32 %b to i64
%2 = add i64 %a, %1
%3 = inttoptr i64 %2 to i8*
- %4 = load i8* %3
+ %4 = load i8, i8* %3
%5 = sext i8 %4 to i32
ret i32 %5
}
@@ -448,7 +448,7 @@ define i32 @load_extend_sext_i16_to_i32(i64 %a, i32 %b) {
%1 = sext i32 %b to i64
%2 = add i64 %a, %1
%3 = inttoptr i64 %2 to i16*
- %4 = load i16* %3
+ %4 = load i16, i16* %3
%5 = sext i16 %4 to i32
ret i32 %5
}
@@ -460,7 +460,7 @@ define i64 @load_extend_sext_i8_to_i64(i64 %a, i32 %b) {
%1 = sext i32 %b to i64
%2 = add i64 %a, %1
%3 = inttoptr i64 %2 to i8*
- %4 = load i8* %3
+ %4 = load i8, i8* %3
%5 = sext i8 %4 to i64
ret i64 %5
}
@@ -472,7 +472,7 @@ define i64 @load_extend_sext_i16_to_i64(i64 %a, i32 %b) {
%1 = sext i32 %b to i64
%2 = add i64 %a, %1
%3 = inttoptr i64 %2 to i16*
- %4 = load i16* %3
+ %4 = load i16, i16* %3
%5 = sext i16 %4 to i64
ret i64 %5
}
@@ -484,7 +484,7 @@ define i64 @load_extend_sext_i32_to_i64(i64 %a, i32 %b) {
%1 = sext i32 %b to i64
%2 = add i64 %a, %1
%3 = inttoptr i64 %2 to i32*
- %4 = load i32* %3
+ %4 = load i32, i32* %3
%5 = sext i32 %4 to i64
ret i64 %5
}
diff --git a/test/CodeGen/AArch64/fast-isel-int-ext2.ll b/test/CodeGen/AArch64/fast-isel-int-ext2.ll
index 8df26b26971a..93741d6c12d6 100644
--- a/test/CodeGen/AArch64/fast-isel-int-ext2.ll
+++ b/test/CodeGen/AArch64/fast-isel-int-ext2.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=false -disable-cgp-branch-opts -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -aarch64-atomic-cfg-tidy=false -disable-cgp-branch-opts -verify-machineinstrs < %s | FileCheck %s
;
; Test folding of the sign-/zero-extend into the load instruction.
@@ -11,7 +11,7 @@ define i32 @load_unscaled_zext_i8_to_i32(i64 %a) {
; CHECK-NOT: uxtb
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i8*
- %3 = load i8* %2
+ %3 = load i8, i8* %2
br label %bb2
bb2:
@@ -25,7 +25,7 @@ define i32 @load_unscaled_zext_i16_to_i32(i64 %a) {
; CHECK-NOT: uxth
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i16*
- %3 = load i16* %2
+ %3 = load i16, i16* %2
br label %bb2
bb2:
@@ -39,7 +39,7 @@ define i64 @load_unscaled_zext_i8_to_i64(i64 %a) {
; CHECK-NOT: uxtb
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i8*
- %3 = load i8* %2
+ %3 = load i8, i8* %2
br label %bb2
bb2:
@@ -53,7 +53,7 @@ define i64 @load_unscaled_zext_i16_to_i64(i64 %a) {
; CHECK-NOT: uxth
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i16*
- %3 = load i16* %2
+ %3 = load i16, i16* %2
br label %bb2
bb2:
@@ -67,7 +67,7 @@ define i64 @load_unscaled_zext_i32_to_i64(i64 %a) {
; CHECK-NOT: uxtw
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i32*
- %3 = load i32* %2
+ %3 = load i32, i32* %2
br label %bb2
bb2:
@@ -81,7 +81,7 @@ define i32 @load_unscaled_sext_i8_to_i32(i64 %a) {
; CHECK-NOT: sxtb
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i8*
- %3 = load i8* %2
+ %3 = load i8, i8* %2
br label %bb2
bb2:
@@ -95,7 +95,7 @@ define i32 @load_unscaled_sext_i16_to_i32(i64 %a) {
; CHECK-NOT: sxth
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i16*
- %3 = load i16* %2
+ %3 = load i16, i16* %2
br label %bb2
bb2:
@@ -109,7 +109,7 @@ define i64 @load_unscaled_sext_i8_to_i64(i64 %a) {
; CHECK-NOT: sxtb
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i8*
- %3 = load i8* %2
+ %3 = load i8, i8* %2
br label %bb2
bb2:
@@ -123,7 +123,7 @@ define i64 @load_unscaled_sext_i16_to_i64(i64 %a) {
; CHECK-NOT: sxth
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i16*
- %3 = load i16* %2
+ %3 = load i16, i16* %2
br label %bb2
bb2:
@@ -137,7 +137,7 @@ define i64 @load_unscaled_sext_i32_to_i64(i64 %a) {
; CHECK-NOT: sxtw
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i32*
- %3 = load i32* %2
+ %3 = load i32, i32* %2
br label %bb2
bb2:
@@ -152,7 +152,7 @@ define i32 @load_register_zext_i8_to_i32(i64 %a, i64 %b) {
; CHECK-NOT: uxtb
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i8*
- %3 = load i8* %2
+ %3 = load i8, i8* %2
br label %bb2
bb2:
@@ -166,7 +166,7 @@ define i32 @load_register_zext_i16_to_i32(i64 %a, i64 %b) {
; CHECK-NOT: uxth
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i16*
- %3 = load i16* %2
+ %3 = load i16, i16* %2
br label %bb2
bb2:
@@ -180,7 +180,7 @@ define i64 @load_register_zext_i8_to_i64(i64 %a, i64 %b) {
; CHECK-NOT: uxtb
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i8*
- %3 = load i8* %2
+ %3 = load i8, i8* %2
br label %bb2
bb2:
@@ -194,7 +194,7 @@ define i64 @load_register_zext_i16_to_i64(i64 %a, i64 %b) {
; CHECK-NOT: uxth
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i16*
- %3 = load i16* %2
+ %3 = load i16, i16* %2
br label %bb2
bb2:
@@ -208,7 +208,7 @@ define i64 @load_register_zext_i32_to_i64(i64 %a, i64 %b) {
; CHECK-NOT: uxtw
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i32*
- %3 = load i32* %2
+ %3 = load i32, i32* %2
br label %bb2
bb2:
@@ -222,7 +222,7 @@ define i32 @load_register_sext_i8_to_i32(i64 %a, i64 %b) {
; CHECK-NOT: sxtb
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i8*
- %3 = load i8* %2
+ %3 = load i8, i8* %2
br label %bb2
bb2:
@@ -236,7 +236,7 @@ define i32 @load_register_sext_i16_to_i32(i64 %a, i64 %b) {
; CHECK-NOT: sxth
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i16*
- %3 = load i16* %2
+ %3 = load i16, i16* %2
br label %bb2
bb2:
@@ -250,7 +250,7 @@ define i64 @load_register_sext_i8_to_i64(i64 %a, i64 %b) {
; CHECK-NOT: sxtb
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i8*
- %3 = load i8* %2
+ %3 = load i8, i8* %2
br label %bb2
bb2:
@@ -264,7 +264,7 @@ define i64 @load_register_sext_i16_to_i64(i64 %a, i64 %b) {
; CHECK-NOT: sxth
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i16*
- %3 = load i16* %2
+ %3 = load i16, i16* %2
br label %bb2
bb2:
@@ -278,7 +278,7 @@ define i64 @load_register_sext_i32_to_i64(i64 %a, i64 %b) {
; CHECK-NOT: sxtw
%1 = add i64 %a, %b
%2 = inttoptr i64 %1 to i32*
- %3 = load i32* %2
+ %3 = load i32, i32* %2
br label %bb2
bb2:
@@ -294,7 +294,7 @@ define i32 @load_extend_zext_i8_to_i32(i64 %a, i32 %b) {
%1 = sext i32 %b to i64
%2 = add i64 %a, %1
%3 = inttoptr i64 %2 to i8*
- %4 = load i8* %3
+ %4 = load i8, i8* %3
br label %bb2
bb2:
@@ -309,7 +309,7 @@ define i32 @load_extend_zext_i16_to_i32(i64 %a, i32 %b) {
%1 = sext i32 %b to i64
%2 = add i64 %a, %1
%3 = inttoptr i64 %2 to i16*
- %4 = load i16* %3
+ %4 = load i16, i16* %3
br label %bb2
bb2:
@@ -324,7 +324,7 @@ define i64 @load_extend_zext_i8_to_i64(i64 %a, i32 %b) {
%1 = sext i32 %b to i64
%2 = add i64 %a, %1
%3 = inttoptr i64 %2 to i8*
- %4 = load i8* %3
+ %4 = load i8, i8* %3
br label %bb2
bb2:
@@ -339,7 +339,7 @@ define i64 @load_extend_zext_i16_to_i64(i64 %a, i32 %b) {
%1 = sext i32 %b to i64
%2 = add i64 %a, %1
%3 = inttoptr i64 %2 to i16*
- %4 = load i16* %3
+ %4 = load i16, i16* %3
br label %bb2
bb2:
@@ -354,7 +354,7 @@ define i64 @load_extend_zext_i32_to_i64(i64 %a, i32 %b) {
%1 = sext i32 %b to i64
%2 = add i64 %a, %1
%3 = inttoptr i64 %2 to i32*
- %4 = load i32* %3
+ %4 = load i32, i32* %3
br label %bb2
bb2:
@@ -369,7 +369,7 @@ define i32 @load_extend_sext_i8_to_i32(i64 %a, i32 %b) {
%1 = sext i32 %b to i64
%2 = add i64 %a, %1
%3 = inttoptr i64 %2 to i8*
- %4 = load i8* %3
+ %4 = load i8, i8* %3
br label %bb2
bb2:
@@ -384,7 +384,7 @@ define i32 @load_extend_sext_i16_to_i32(i64 %a, i32 %b) {
%1 = sext i32 %b to i64
%2 = add i64 %a, %1
%3 = inttoptr i64 %2 to i16*
- %4 = load i16* %3
+ %4 = load i16, i16* %3
br label %bb2
bb2:
@@ -399,7 +399,7 @@ define i64 @load_extend_sext_i8_to_i64(i64 %a, i32 %b) {
%1 = sext i32 %b to i64
%2 = add i64 %a, %1
%3 = inttoptr i64 %2 to i8*
- %4 = load i8* %3
+ %4 = load i8, i8* %3
br label %bb2
bb2:
@@ -414,7 +414,7 @@ define i64 @load_extend_sext_i16_to_i64(i64 %a, i32 %b) {
%1 = sext i32 %b to i64
%2 = add i64 %a, %1
%3 = inttoptr i64 %2 to i16*
- %4 = load i16* %3
+ %4 = load i16, i16* %3
br label %bb2
bb2:
@@ -429,7 +429,7 @@ define i64 @load_extend_sext_i32_to_i64(i64 %a, i32 %b) {
%1 = sext i32 %b to i64
%2 = add i64 %a, %1
%3 = inttoptr i64 %2 to i32*
- %4 = load i32* %3
+ %4 = load i32, i32* %3
br label %bb2
bb2:
diff --git a/test/CodeGen/AArch64/fast-isel-int-ext3.ll b/test/CodeGen/AArch64/fast-isel-int-ext3.ll
index 5d55a6b38f6b..83740c8af5f2 100644
--- a/test/CodeGen/AArch64/fast-isel-int-ext3.ll
+++ b/test/CodeGen/AArch64/fast-isel-int-ext3.ll
@@ -11,7 +11,7 @@ define i32 @load_unscaled_zext_i8_to_i32(i64 %a) {
; CHECK: uxtb w0, [[REG]]
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i8 addrspace(256)*
- %3 = load i8 addrspace(256)* %2
+ %3 = load i8, i8 addrspace(256)* %2
%4 = zext i8 %3 to i32
ret i32 %4
}
@@ -22,7 +22,7 @@ define i32 @load_unscaled_zext_i16_to_i32(i64 %a) {
; CHECK: uxth w0, [[REG]]
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i16 addrspace(256)*
- %3 = load i16 addrspace(256)* %2
+ %3 = load i16, i16 addrspace(256)* %2
%4 = zext i16 %3 to i32
ret i32 %4
}
@@ -33,7 +33,7 @@ define i64 @load_unscaled_zext_i8_to_i64(i64 %a) {
; CHECK: ubfx x0, x[[REG]], #0, #8
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i8 addrspace(256)*
- %3 = load i8 addrspace(256)* %2
+ %3 = load i8, i8 addrspace(256)* %2
%4 = zext i8 %3 to i64
ret i64 %4
}
@@ -44,7 +44,7 @@ define i64 @load_unscaled_zext_i16_to_i64(i64 %a) {
; CHECK: ubfx x0, x[[REG]], #0, #16
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i16 addrspace(256)*
- %3 = load i16 addrspace(256)* %2
+ %3 = load i16, i16 addrspace(256)* %2
%4 = zext i16 %3 to i64
ret i64 %4
}
@@ -55,7 +55,7 @@ define i64 @load_unscaled_zext_i32_to_i64(i64 %a) {
; CHECK: ubfx x0, x[[REG]], #0, #32
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i32 addrspace(256)*
- %3 = load i32 addrspace(256)* %2
+ %3 = load i32, i32 addrspace(256)* %2
%4 = zext i32 %3 to i64
ret i64 %4
}
@@ -66,7 +66,7 @@ define i32 @load_unscaled_sext_i8_to_i32(i64 %a) {
; CHECK: sxtb w0, [[REG]]
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i8 addrspace(256)*
- %3 = load i8 addrspace(256)* %2
+ %3 = load i8, i8 addrspace(256)* %2
%4 = sext i8 %3 to i32
ret i32 %4
}
@@ -77,7 +77,7 @@ define i32 @load_unscaled_sext_i16_to_i32(i64 %a) {
; CHECK: sxth w0, [[REG]]
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i16 addrspace(256)*
- %3 = load i16 addrspace(256)* %2
+ %3 = load i16, i16 addrspace(256)* %2
%4 = sext i16 %3 to i32
ret i32 %4
}
@@ -88,7 +88,7 @@ define i64 @load_unscaled_sext_i8_to_i64(i64 %a) {
; CHECK: sxtb x0, [[REG]]
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i8 addrspace(256)*
- %3 = load i8 addrspace(256)* %2
+ %3 = load i8, i8 addrspace(256)* %2
%4 = sext i8 %3 to i64
ret i64 %4
}
@@ -99,7 +99,7 @@ define i64 @load_unscaled_sext_i16_to_i64(i64 %a) {
; CHECK: sxth x0, [[REG]]
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i16 addrspace(256)*
- %3 = load i16 addrspace(256)* %2
+ %3 = load i16, i16 addrspace(256)* %2
%4 = sext i16 %3 to i64
ret i64 %4
}
@@ -110,7 +110,7 @@ define i64 @load_unscaled_sext_i32_to_i64(i64 %a) {
; CHECK: sxtw x0, [[REG]]
%1 = sub i64 %a, 8
%2 = inttoptr i64 %1 to i32 addrspace(256)*
- %3 = load i32 addrspace(256)* %2
+ %3 = load i32, i32 addrspace(256)* %2
%4 = sext i32 %3 to i64
ret i64 %4
}
diff --git a/test/CodeGen/AArch64/fast-isel-int-ext4.ll b/test/CodeGen/AArch64/fast-isel-int-ext4.ll
index f25bb98af758..1ab951bf0712 100644
--- a/test/CodeGen/AArch64/fast-isel-int-ext4.ll
+++ b/test/CodeGen/AArch64/fast-isel-int-ext4.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
define i32 @kill_flag(i16 signext %a) {
; CHECK-LABEL: kill_flag
diff --git a/test/CodeGen/AArch64/fast-isel-int-ext5.ll b/test/CodeGen/AArch64/fast-isel-int-ext5.ll
new file mode 100644
index 000000000000..0f9ec62811df
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-int-ext5.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-LABEL: int_ext_opt
+define i64 @int_ext_opt(i8* %addr, i1 %c1, i1 %c2) {
+entry:
+ %0 = load i8, i8* %addr
+ br i1 %c1, label %bb1, label %bb2
+
+bb1:
+ %1 = zext i8 %0 to i64
+ br i1 %c2, label %bb2, label %exit
+
+bb2:
+ %2 = phi i64 [1, %entry], [%1, %bb1]
+ ret i64 %2
+
+exit:
+ ret i64 0
+}
diff --git a/test/CodeGen/AArch64/fast-isel-logic-op.ll b/test/CodeGen/AArch64/fast-isel-logic-op.ll
index 2c7486e4cf8a..89c5f2c48024 100644
--- a/test/CodeGen/AArch64/fast-isel-logic-op.ll
+++ b/test/CodeGen/AArch64/fast-isel-logic-op.ll
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=0 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=1 -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=1 -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
; AND
define zeroext i1 @and_rr_i1(i1 signext %a, i1 signext %b) {
diff --git a/test/CodeGen/AArch64/fast-isel-memcpy.ll b/test/CodeGen/AArch64/fast-isel-memcpy.ll
index 9161dad249a9..07595a954db0 100644
--- a/test/CodeGen/AArch64/fast-isel-memcpy.ll
+++ b/test/CodeGen/AArch64/fast-isel-memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
; Test that we don't segfault.
; CHECK-LABEL: test
diff --git a/test/CodeGen/AArch64/fast-isel-mul.ll b/test/CodeGen/AArch64/fast-isel-mul.ll
index f2fda27c2f7b..9d6fd00008e8 100644
--- a/test/CodeGen/AArch64/fast-isel-mul.ll
+++ b/test/CodeGen/AArch64/fast-isel-mul.ll
@@ -1,4 +1,4 @@
-; RUN: llc -fast-isel -fast-isel-abort -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+; RUN: llc -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
define zeroext i8 @test_mul8(i8 %lhs, i8 %rhs) {
; CHECK-LABEL: test_mul8:
diff --git a/test/CodeGen/AArch64/fast-isel-runtime-libcall.ll b/test/CodeGen/AArch64/fast-isel-runtime-libcall.ll
index 8d2d39a1a1ff..34d7983ff5fa 100644
--- a/test/CodeGen/AArch64/fast-isel-runtime-libcall.ll
+++ b/test/CodeGen/AArch64/fast-isel-runtime-libcall.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -code-model=small -verify-machineinstrs < %s | FileCheck %s --check-prefix=SMALL
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -code-model=large -verify-machineinstrs < %s | FileCheck %s --check-prefix=LARGE
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -code-model=small -verify-machineinstrs < %s | FileCheck %s --check-prefix=SMALL
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -code-model=large -verify-machineinstrs < %s | FileCheck %s --check-prefix=LARGE
define float @frem_f32(float %a, float %b) {
; SMALL-LABEL: frem_f32
diff --git a/test/CodeGen/AArch64/fast-isel-sdiv.ll b/test/CodeGen/AArch64/fast-isel-sdiv.ll
index 30807767fa79..3c8de43af6c1 100644
--- a/test/CodeGen/AArch64/fast-isel-sdiv.ll
+++ b/test/CodeGen/AArch64/fast-isel-sdiv.ll
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
define i32 @sdiv_i32_exact(i32 %a) {
; CHECK-LABEL: sdiv_i32_exact
diff --git a/test/CodeGen/AArch64/fast-isel-select.ll b/test/CodeGen/AArch64/fast-isel-select.ll
index 928e9d46741d..e06f74cb7fef 100644
--- a/test/CodeGen/AArch64/fast-isel-select.ll
+++ b/test/CodeGen/AArch64/fast-isel-select.ll
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
; First test the different supported value types for select.
define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) {
diff --git a/test/CodeGen/AArch64/fast-isel-shift.ll b/test/CodeGen/AArch64/fast-isel-shift.ll
index ce4ba49f4999..36fab0d51ed5 100644
--- a/test/CodeGen/AArch64/fast-isel-shift.ll
+++ b/test/CodeGen/AArch64/fast-isel-shift.ll
@@ -1,4 +1,4 @@
-; RUN: llc -fast-isel -fast-isel-abort -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -fast-isel -fast-isel-abort=1 -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
; CHECK-LABEL: asr_zext_i1_i16
; CHECK: uxth {{w[0-9]*}}, wzr
diff --git a/test/CodeGen/AArch64/fast-isel-sqrt.ll b/test/CodeGen/AArch64/fast-isel-sqrt.ll
index 1331d5c7de5b..80a0a469cd94 100644
--- a/test/CodeGen/AArch64/fast-isel-sqrt.ll
+++ b/test/CodeGen/AArch64/fast-isel-sqrt.ll
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=arm64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=arm64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
define float @test_sqrt_f32(float %a) {
; CHECK-LABEL: test_sqrt_f32
diff --git a/test/CodeGen/AArch64/fast-isel-tbz.ll b/test/CodeGen/AArch64/fast-isel-tbz.ll
index a5f02ffa39ac..598826763787 100644
--- a/test/CodeGen/AArch64/fast-isel-tbz.ll
+++ b/test/CodeGen/AArch64/fast-isel-tbz.ll
@@ -1,5 +1,5 @@
; RUN: llc -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK %s
-; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK --check-prefix=FAST %s
+; RUN: llc -fast-isel -fast-isel-abort=1 -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK --check-prefix=FAST %s
define i32 @icmp_eq_i8(i8 zeroext %a) {
; CHECK-LABEL: icmp_eq_i8
diff --git a/test/CodeGen/AArch64/fast-isel-trunc.ll b/test/CodeGen/AArch64/fast-isel-trunc.ll
index 55937eb76fa9..af58abe1f0c1 100644
--- a/test/CodeGen/AArch64/fast-isel-trunc.ll
+++ b/test/CodeGen/AArch64/fast-isel-trunc.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s
; Test that %1 doesn't get the kill flag set before its last use.
define i32 @test_trunc(i32 %a) {
diff --git a/test/CodeGen/AArch64/fast-isel-vector-arithmetic.ll b/test/CodeGen/AArch64/fast-isel-vector-arithmetic.ll
index eaa0db527949..82b8d47cd80e 100644
--- a/test/CodeGen/AArch64/fast-isel-vector-arithmetic.ll
+++ b/test/CodeGen/AArch64/fast-isel-vector-arithmetic.ll
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -fast-isel-abort-args -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=2 -verify-machineinstrs < %s | FileCheck %s
; Vector Integer Add
define <8 x i8> @add_v8i8_rr(<8 x i8> %a, <8 x i8> %b) {
diff --git a/test/CodeGen/AArch64/fast-isel-vret.ll b/test/CodeGen/AArch64/fast-isel-vret.ll
index 9ad92273d3af..d8a56ce48d76 100644
--- a/test/CodeGen/AArch64/fast-isel-vret.ll
+++ b/test/CodeGen/AArch64/fast-isel-vret.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
; Test that we don't abort fast-isle for ret
define <8 x i8> @ret_v8i8(<8 x i8> %a, <8 x i8> %b) {
diff --git a/test/CodeGen/AArch64/floatdp_1source.ll b/test/CodeGen/AArch64/floatdp_1source.ll
index 8c02787a2340..ec7d32dbfae6 100644
--- a/test/CodeGen/AArch64/floatdp_1source.ll
+++ b/test/CodeGen/AArch64/floatdp_1source.ll
@@ -27,7 +27,7 @@ declare double @nearbyint(double) readonly
define void @simple_float() {
; CHECK-LABEL: simple_float:
- %val1 = load volatile float* @varfloat
+ %val1 = load volatile float, float* @varfloat
%valabs = call float @fabsf(float %val1)
store volatile float %valabs, float* @varfloat
@@ -66,7 +66,7 @@ define void @simple_float() {
define void @simple_double() {
; CHECK-LABEL: simple_double:
- %val1 = load volatile double* @vardouble
+ %val1 = load volatile double, double* @vardouble
%valabs = call double @fabs(double %val1)
store volatile double %valabs, double* @vardouble
@@ -106,9 +106,9 @@ define void @simple_double() {
define void @converts() {
; CHECK-LABEL: converts:
- %val16 = load volatile half* @varhalf
- %val32 = load volatile float* @varfloat
- %val64 = load volatile double* @vardouble
+ %val16 = load volatile half, half* @varhalf
+ %val32 = load volatile float, float* @varfloat
+ %val64 = load volatile double, double* @vardouble
%val16to32 = fpext half %val16 to float
store volatile float %val16to32, float* @varfloat
diff --git a/test/CodeGen/AArch64/floatdp_2source.ll b/test/CodeGen/AArch64/floatdp_2source.ll
index 262271784ec6..30e2856a4f5b 100644
--- a/test/CodeGen/AArch64/floatdp_2source.ll
+++ b/test/CodeGen/AArch64/floatdp_2source.ll
@@ -5,7 +5,7 @@
define void @testfloat() {
; CHECK-LABEL: testfloat:
- %val1 = load float* @varfloat
+ %val1 = load float, float* @varfloat
%val2 = fadd float %val1, %val1
; CHECK: fadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
@@ -33,7 +33,7 @@ define void @testfloat() {
define void @testdouble() {
; CHECK-LABEL: testdouble:
- %val1 = load double* @vardouble
+ %val1 = load double, double* @vardouble
%val2 = fadd double %val1, %val1
; CHECK: fadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
diff --git a/test/CodeGen/AArch64/fold-constants.ll b/test/CodeGen/AArch64/fold-constants.ll
new file mode 100644
index 000000000000..2dd0d1245930
--- /dev/null
+++ b/test/CodeGen/AArch64/fold-constants.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
+
+define i64 @dotests_616() {
+; CHECK-LABEL: dotests_616
+; CHECK: movi d0, #0000000000000000
+; CHECK-NEXT: umov w8, v0.b[2]
+; CHECK-NEXT: sbfx w8, w8, #0, #1
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
+entry:
+ %0 = bitcast <2 x i64> zeroinitializer to <8 x i16>
+ %1 = and <8 x i16> zeroinitializer, %0
+ %2 = icmp ne <8 x i16> %1, zeroinitializer
+ %3 = extractelement <8 x i1> %2, i32 2
+ %vgetq_lane285 = sext i1 %3 to i16
+ %vset_lane = insertelement <4 x i16> undef, i16 %vgetq_lane285, i32 0
+ %4 = bitcast <4 x i16> %vset_lane to <1 x i64>
+ %vget_lane = extractelement <1 x i64> %4, i32 0
+ ret i64 %vget_lane
+}
diff --git a/test/CodeGen/AArch64/fp128-folding.ll b/test/CodeGen/AArch64/fp128-folding.ll
index 892b19c5cf33..5027e83c292a 100644
--- a/test/CodeGen/AArch64/fp128-folding.ll
+++ b/test/CodeGen/AArch64/fp128-folding.ll
@@ -8,7 +8,7 @@ define fp128 @test_folding() {
; CHECK-LABEL: test_folding:
%l = alloca i32
store i32 42, i32* %l
- %val = load i32* %l
+ %val = load i32, i32* %l
%fpval = sitofp i32 %val to fp128
; If the value is loaded from a constant pool into an fp128, it's been folded
; successfully.
diff --git a/test/CodeGen/AArch64/fp16-instructions.ll b/test/CodeGen/AArch64/fp16-instructions.ll
deleted file mode 100644
index 7a44cd128cb0..000000000000
--- a/test/CodeGen/AArch64/fp16-instructions.ll
+++ /dev/null
@@ -1,109 +0,0 @@
-; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s
-
-define half @add_h(half %a, half %b) {
-entry:
-; CHECK-LABEL: add_h:
-; CHECK-DAG: fcvt [[OP1:s[0-9]+]], h0
-; CHECK-DAG: fcvt [[OP2:s[0-9]+]], h1
-; CHECK: fadd [[RES:s[0-9]+]], [[OP1]], [[OP2]]
-; CHECK: fcvt h0, [[RES]]
- %0 = fadd half %a, %b
- ret half %0
-}
-
-
-define half @sub_h(half %a, half %b) {
-entry:
-; CHECK-LABEL: sub_h:
-; CHECK-DAG: fcvt [[OP1:s[0-9]+]], h0
-; CHECK-DAG: fcvt [[OP2:s[0-9]+]], h1
-; CHECK: fsub [[RES:s[0-9]+]], [[OP1]], [[OP2]]
-; CHECK: fcvt h0, [[RES]]
- %0 = fsub half %a, %b
- ret half %0
-}
-
-
-define half @mul_h(half %a, half %b) {
-entry:
-; CHECK-LABEL: mul_h:
-; CHECK-DAG: fcvt [[OP1:s[0-9]+]], h0
-; CHECK-DAG: fcvt [[OP2:s[0-9]+]], h1
-; CHECK: fmul [[RES:s[0-9]+]], [[OP1]], [[OP2]]
-; CHECK: fcvt h0, [[RES]]
- %0 = fmul half %a, %b
- ret half %0
-}
-
-
-define half @div_h(half %a, half %b) {
-entry:
-; CHECK-LABEL: div_h:
-; CHECK-DAG: fcvt [[OP1:s[0-9]+]], h0
-; CHECK-DAG: fcvt [[OP2:s[0-9]+]], h1
-; CHECK: fdiv [[RES:s[0-9]+]], [[OP1]], [[OP2]]
-; CHECK: fcvt h0, [[RES]]
- %0 = fdiv half %a, %b
- ret half %0
-}
-
-
-define half @load_h(half* %a) {
-entry:
-; CHECK-LABEL: load_h:
-; CHECK: ldr h0, [x0]
- %0 = load half* %a, align 4
- ret half %0
-}
-
-
-define void @store_h(half* %a, half %b) {
-entry:
-; CHECK-LABEL: store_h:
-; CHECK: str h0, [x0]
- store half %b, half* %a, align 4
- ret void
-}
-
-define half @s_to_h(float %a) {
-; CHECK-LABEL: s_to_h:
-; CHECK: fcvt h0, s0
- %1 = fptrunc float %a to half
- ret half %1
-}
-
-define half @d_to_h(double %a) {
-; CHECK-LABEL: d_to_h:
-; CHECK: fcvt h0, d0
- %1 = fptrunc double %a to half
- ret half %1
-}
-
-define float @h_to_s(half %a) {
-; CHECK-LABEL: h_to_s:
-; CHECK: fcvt s0, h0
- %1 = fpext half %a to float
- ret float %1
-}
-
-define double @h_to_d(half %a) {
-; CHECK-LABEL: h_to_d:
-; CHECK: fcvt d0, h0
- %1 = fpext half %a to double
- ret double %1
-}
-
-define half @bitcast_i_to_h(i16 %a) {
-; CHECK-LABEL: bitcast_i_to_h:
-; CHECK: fmov s0, w0
- %1 = bitcast i16 %a to half
- ret half %1
-}
-
-
-define i16 @bitcast_h_to_i(half %a) {
-; CHECK-LABEL: bitcast_h_to_i:
-; CHECK: fmov w0, s0
- %1 = bitcast half %a to i16
- ret i16 %1
-}
diff --git a/test/CodeGen/AArch64/fp16-v16-instructions.ll b/test/CodeGen/AArch64/fp16-v16-instructions.ll
new file mode 100644
index 000000000000..1af2bd10912f
--- /dev/null
+++ b/test/CodeGen/AArch64/fp16-v16-instructions.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s
+
+
+define <16 x half> @sitofp_i32(<16 x i32> %a) #0 {
+; CHECK-LABEL: sitofp_i32:
+; CHECK-DAG: scvtf [[S0:v[0-9]+\.4s]], v0.4s
+; CHECK-DAG: scvtf [[S1:v[0-9]+\.4s]], v1.4s
+; CHECK-DAG: scvtf [[S2:v[0-9]+\.4s]], v2.4s
+; CHECK-DAG: scvtf [[S3:v[0-9]+\.4s]], v3.4s
+; CHECK-DAG: fcvtn v0.4h, [[S0]]
+; CHECK-DAG: fcvtn v1.4h, [[S2]]
+; CHECK-DAG: v[[R1:[0-9]+]].4h, [[S1]]
+; CHECK-DAG: v[[R3:[0-9]+]].4h, [[S3]]
+; CHECK-DAg: ins v0.d[1], v[[R1]].d[0]
+; CHECK-DAG: ins v1.d[1], v[[R3]].d[0]
+
+ %1 = sitofp <16 x i32> %a to <16 x half>
+ ret <16 x half> %1
+}
+
+
+define <16 x half> @sitofp_i64(<16 x i64> %a) #0 {
+; CHECK-LABEL: sitofp_i64:
+; CHECK-DAG: scvtf [[D0:v[0-9]+\.2d]], v0.2d
+; CHECK-DAG: scvtf [[D1:v[0-9]+\.2d]], v1.2d
+; CHECK-DAG: scvtf [[D2:v[0-9]+\.2d]], v2.2d
+; CHECK-DAG: scvtf [[D3:v[0-9]+\.2d]], v3.2d
+; CHECK-DAG: scvtf [[D4:v[0-9]+\.2d]], v4.2d
+; CHECK-DAG: scvtf [[D5:v[0-9]+\.2d]], v5.2d
+; CHECK-DAG: scvtf [[D6:v[0-9]+\.2d]], v6.2d
+; CHECK-DAG: scvtf [[D7:v[0-9]+\.2d]], v7.2d
+
+; CHECK-DAG: fcvtn [[S0:v[0-9]+]].2s, [[D0]]
+; CHECK-DAG: fcvtn [[S1:v[0-9]+]].2s, [[D2]]
+; CHECK-DAG: fcvtn [[S2:v[0-9]+]].2s, [[D4]]
+; CHECK-DAG: fcvtn [[S3:v[0-9]+]].2s, [[D6]]
+
+; CHECK-DAG: fcvtn2 [[S0]].4s, [[D1]]
+; CHECK-DAG: fcvtn2 [[S1]].4s, [[D3]]
+; CHECK-DAG: fcvtn2 [[S2]].4s, [[D5]]
+; CHECK-DAG: fcvtn2 [[S3]].4s, [[D7]]
+
+; CHECK-DAG: fcvtn v0.4h, [[S0]].4s
+; CHECK-DAG: fcvtn v1.4h, [[S2]].4s
+; CHECK-DAG: fcvtn v[[R1:[0-9]+]].4h, [[S1]].4s
+; CHECK-DAG: fcvtn v[[R3:[0-9]+]].4h, [[S3]].4s
+; CHECK-DAG: ins v0.d[1], v[[R1]].d[0]
+; CHECK-DAG: ins v1.d[1], v[[R3]].d[0]
+
+ %1 = sitofp <16 x i64> %a to <16 x half>
+ ret <16 x half> %1
+}
+
+
+define <16 x half> @uitofp_i32(<16 x i32> %a) #0 {
+; CHECK-LABEL: uitofp_i32:
+; CHECK-DAG: ucvtf [[S0:v[0-9]+\.4s]], v0.4s
+; CHECK-DAG: ucvtf [[S1:v[0-9]+\.4s]], v1.4s
+; CHECK-DAG: ucvtf [[S2:v[0-9]+\.4s]], v2.4s
+; CHECK-DAG: ucvtf [[S3:v[0-9]+\.4s]], v3.4s
+; CHECK-DAG: fcvtn v0.4h, [[S0]]
+; CHECK-DAG: fcvtn v1.4h, [[S2]]
+; CHECK-DAG: v[[R1:[0-9]+]].4h, [[S1]]
+; CHECK-DAG: v[[R3:[0-9]+]].4h, [[S3]]
+; CHECK-DAg: ins v0.d[1], v[[R1]].d[0]
+; CHECK-DAG: ins v1.d[1], v[[R3]].d[0]
+
+ %1 = uitofp <16 x i32> %a to <16 x half>
+ ret <16 x half> %1
+}
+
+
+define <16 x half> @uitofp_i64(<16 x i64> %a) #0 {
+; CHECK-LABEL: uitofp_i64:
+; CHECK-DAG: ucvtf [[D0:v[0-9]+\.2d]], v0.2d
+; CHECK-DAG: ucvtf [[D1:v[0-9]+\.2d]], v1.2d
+; CHECK-DAG: ucvtf [[D2:v[0-9]+\.2d]], v2.2d
+; CHECK-DAG: ucvtf [[D3:v[0-9]+\.2d]], v3.2d
+; CHECK-DAG: ucvtf [[D4:v[0-9]+\.2d]], v4.2d
+; CHECK-DAG: ucvtf [[D5:v[0-9]+\.2d]], v5.2d
+; CHECK-DAG: ucvtf [[D6:v[0-9]+\.2d]], v6.2d
+; CHECK-DAG: ucvtf [[D7:v[0-9]+\.2d]], v7.2d
+
+; CHECK-DAG: fcvtn [[S0:v[0-9]+]].2s, [[D0]]
+; CHECK-DAG: fcvtn [[S1:v[0-9]+]].2s, [[D2]]
+; CHECK-DAG: fcvtn [[S2:v[0-9]+]].2s, [[D4]]
+; CHECK-DAG: fcvtn [[S3:v[0-9]+]].2s, [[D6]]
+
+; CHECK-DAG: fcvtn2 [[S0]].4s, [[D1]]
+; CHECK-DAG: fcvtn2 [[S1]].4s, [[D3]]
+; CHECK-DAG: fcvtn2 [[S2]].4s, [[D5]]
+; CHECK-DAG: fcvtn2 [[S3]].4s, [[D7]]
+
+; CHECK-DAG: fcvtn v0.4h, [[S0]].4s
+; CHECK-DAG: fcvtn v1.4h, [[S2]].4s
+; CHECK-DAG: fcvtn v[[R1:[0-9]+]].4h, [[S1]].4s
+; CHECK-DAG: fcvtn v[[R3:[0-9]+]].4h, [[S3]].4s
+; CHECK-DAG: ins v0.d[1], v[[R1]].d[0]
+; CHECK-DAG: ins v1.d[1], v[[R3]].d[0]
+
+ %1 = uitofp <16 x i64> %a to <16 x half>
+ ret <16 x half> %1
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/fp16-v4-instructions.ll b/test/CodeGen/AArch64/fp16-v4-instructions.ll
index 8e8968108c78..0dbda152fca9 100644
--- a/test/CodeGen/AArch64/fp16-v4-instructions.ll
+++ b/test/CodeGen/AArch64/fp16-v4-instructions.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi | FileCheck %s
define <4 x half> @add_h(<4 x half> %a, <4 x half> %b) {
entry:
@@ -12,6 +12,15 @@ entry:
}
+define <4 x half> @build_h4(<4 x half> %a) {
+entry:
+; CHECK-LABEL: build_h4:
+; CHECK: movz [[GPR:w[0-9]+]], #0x3ccd
+; CHECK: dup v0.4h, [[GPR]]
+ ret <4 x half> <half 0xH3CCD, half 0xH3CCD, half 0xH3CCD, half 0xH3CCD>
+}
+
+
define <4 x half> @sub_h(<4 x half> %a, <4 x half> %b) {
entry:
; CHECK-LABEL: sub_h:
@@ -52,7 +61,7 @@ define <4 x half> @load_h(<4 x half>* %a) {
entry:
; CHECK-LABEL: load_h:
; CHECK: ldr d0, [x0]
- %0 = load <4 x half>* %a, align 4
+ %0 = load <4 x half>, <4 x half>* %a, align 4
ret <4 x half> %0
}
@@ -120,3 +129,93 @@ define <4 x i16> @bitcast_h_to_i(float, <4 x half> %a) {
%2 = bitcast <4 x half> %a to <4 x i16>
ret <4 x i16> %2
}
+
+
+define <4 x half> @sitofp_i8(<4 x i8> %a) #0 {
+; CHECK-LABEL: sitofp_i8:
+; CHECK-NEXT: shl [[OP1:v[0-9]+\.4h]], v0.4h, #8
+; CHECK-NEXT: sshr [[OP2:v[0-9]+\.4h]], [[OP1]], #8
+; CHECK-NEXT: sshll [[OP3:v[0-9]+\.4s]], [[OP2]], #0
+; CHECK-NEXT: scvtf [[OP4:v[0-9]+\.4s]], [[OP3]]
+; CHECK-NEXT: fcvtn v0.4h, [[OP4]]
+; CHECK-NEXT: ret
+ %1 = sitofp <4 x i8> %a to <4 x half>
+ ret <4 x half> %1
+}
+
+
+define <4 x half> @sitofp_i16(<4 x i16> %a) #0 {
+; CHECK-LABEL: sitofp_i16:
+; CHECK-NEXT: sshll [[OP1:v[0-9]+\.4s]], v0.4h, #0
+; CHECK-NEXT: scvtf [[OP2:v[0-9]+\.4s]], [[OP1]]
+; CHECK-NEXT: fcvtn v0.4h, [[OP2]]
+; CHECK-NEXT: ret
+ %1 = sitofp <4 x i16> %a to <4 x half>
+ ret <4 x half> %1
+}
+
+
+define <4 x half> @sitofp_i32(<4 x i32> %a) #0 {
+; CHECK-LABEL: sitofp_i32:
+; CHECK-NEXT: scvtf [[OP1:v[0-9]+\.4s]], v0.4s
+; CHECK-NEXT: fcvtn v0.4h, [[OP1]]
+ %1 = sitofp <4 x i32> %a to <4 x half>
+ ret <4 x half> %1
+}
+
+
+define <4 x half> @sitofp_i64(<4 x i64> %a) #0 {
+; CHECK-LABEL: sitofp_i64:
+; CHECK-DAG: scvtf [[OP1:v[0-9]+\.2d]], v0.2d
+; CHECK-DAG: scvtf [[OP2:v[0-9]+\.2d]], v1.2d
+; CHECK-DAG: fcvtn [[OP3:v[0-9]+]].2s, [[OP1]]
+; CHECK-NEXT: fcvtn2 [[OP3]].4s, [[OP2]]
+; CHECK-NEXT: fcvtn v0.4h, [[OP3]].4s
+ %1 = sitofp <4 x i64> %a to <4 x half>
+ ret <4 x half> %1
+}
+
+define <4 x half> @uitofp_i8(<4 x i8> %a) #0 {
+; CHECK-LABEL: uitofp_i8:
+; CHECK-NEXT: bic v0.4h, #0xff, lsl #8
+; CHECK-NEXT: ushll [[OP1:v[0-9]+\.4s]], v0.4h, #0
+; CHECK-NEXT: ucvtf [[OP2:v[0-9]+\.4s]], [[OP1]]
+; CHECK-NEXT: fcvtn v0.4h, [[OP2]]
+; CHECK-NEXT: ret
+ %1 = uitofp <4 x i8> %a to <4 x half>
+ ret <4 x half> %1
+}
+
+
+define <4 x half> @uitofp_i16(<4 x i16> %a) #0 {
+; CHECK-LABEL: uitofp_i16:
+; CHECK-NEXT: ushll [[OP1:v[0-9]+\.4s]], v0.4h, #0
+; CHECK-NEXT: ucvtf [[OP2:v[0-9]+\.4s]], [[OP1]]
+; CHECK-NEXT: fcvtn v0.4h, [[OP2]]
+; CHECK-NEXT: ret
+ %1 = uitofp <4 x i16> %a to <4 x half>
+ ret <4 x half> %1
+}
+
+
+define <4 x half> @uitofp_i32(<4 x i32> %a) #0 {
+; CHECK-LABEL: uitofp_i32:
+; CHECK-NEXT: ucvtf [[OP1:v[0-9]+\.4s]], v0.4s
+; CHECK-NEXT: fcvtn v0.4h, [[OP1]]
+ %1 = uitofp <4 x i32> %a to <4 x half>
+ ret <4 x half> %1
+}
+
+
+define <4 x half> @uitofp_i64(<4 x i64> %a) #0 {
+; CHECK-LABEL: uitofp_i64:
+; CHECK-DAG: ucvtf [[OP1:v[0-9]+\.2d]], v0.2d
+; CHECK-DAG: ucvtf [[OP2:v[0-9]+\.2d]], v1.2d
+; CHECK-DAG: fcvtn [[OP3:v[0-9]+]].2s, [[OP1]]
+; CHECK-NEXT: fcvtn2 [[OP3]].4s, [[OP2]]
+; CHECK-NEXT: fcvtn v0.4h, [[OP3]].4s
+ %1 = uitofp <4 x i64> %a to <4 x half>
+ ret <4 x half> %1
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/fp16-v8-instructions.ll b/test/CodeGen/AArch64/fp16-v8-instructions.ll
index 9ee2296ace83..10a8c22d6f7e 100644
--- a/test/CodeGen/AArch64/fp16-v8-instructions.ll
+++ b/test/CodeGen/AArch64/fp16-v8-instructions.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi | FileCheck %s
define <8 x half> @add_h(<8 x half> %a, <8 x half> %b) {
entry:
@@ -164,7 +164,7 @@ define <8 x half> @load_h(<8 x half>* %a) {
entry:
; CHECK-LABEL: load_h:
; CHECK: ldr q0, [x0]
- %0 = load <8 x half>* %a, align 4
+ %0 = load <8 x half>, <8 x half>* %a, align 4
ret <8 x half> %0
}
@@ -188,10 +188,10 @@ define <8 x half> @s_to_h(<8 x float> %a) {
define <8 x half> @d_to_h(<8 x double> %a) {
; CHECK-LABEL: d_to_h:
-; CHECK-DAG: ins v{{[0-9]+}}.d
-; CHECK-DAG: ins v{{[0-9]+}}.d
-; CHECK-DAG: ins v{{[0-9]+}}.d
-; CHECK-DAG: ins v{{[0-9]+}}.d
+; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1]
+; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1]
+; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1]
+; CHECK-DAG: mov d{{[0-9]+}}, v{{[0-9]+}}.d[1]
; CHECK-DAG: fcvt h
; CHECK-DAG: fcvt h
; CHECK-DAG: fcvt h
@@ -253,3 +253,109 @@ define <8 x i16> @bitcast_h_to_i(float, <8 x half> %a) {
ret <8 x i16> %2
}
+
+define <8 x half> @sitofp_i8(<8 x i8> %a) #0 {
+; CHECK-LABEL: sitofp_i8:
+; CHECK-NEXT: sshll v[[REG1:[0-9]+]].8h, v0.8b, #0
+; CHECK-NEXT: sshll2 [[LO:v[0-9]+\.4s]], v[[REG1]].8h, #0
+; CHECK-NEXT: sshll [[HI:v[0-9]+\.4s]], v[[REG1]].4h, #0
+; CHECK-DAG: scvtf [[HIF:v[0-9]+\.4s]], [[HI]]
+; CHECK-DAG: scvtf [[LOF:v[0-9]+\.4s]], [[LO]]
+; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]]
+; CHECK-DAG: fcvtn v0.4h, [[HIF]]
+; CHECK: ins v0.d[1], v[[LOREG]].d[0]
+ %1 = sitofp <8 x i8> %a to <8 x half>
+ ret <8 x half> %1
+}
+
+
+define <8 x half> @sitofp_i16(<8 x i16> %a) #0 {
+; CHECK-LABEL: sitofp_i16:
+; CHECK-NEXT: sshll2 [[LO:v[0-9]+\.4s]], v0.8h, #0
+; CHECK-NEXT: sshll [[HI:v[0-9]+\.4s]], v0.4h, #0
+; CHECK-DAG: scvtf [[HIF:v[0-9]+\.4s]], [[HI]]
+; CHECK-DAG: scvtf [[LOF:v[0-9]+\.4s]], [[LO]]
+; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]]
+; CHECK-DAG: fcvtn v0.4h, [[HIF]]
+; CHECK: ins v0.d[1], v[[LOREG]].d[0]
+ %1 = sitofp <8 x i16> %a to <8 x half>
+ ret <8 x half> %1
+}
+
+
+define <8 x half> @sitofp_i32(<8 x i32> %a) #0 {
+; CHECK-LABEL: sitofp_i32:
+; CHECK-DAG: scvtf [[OP1:v[0-9]+\.4s]], v0.4s
+; CHECK-DAG: scvtf [[OP2:v[0-9]+\.4s]], v1.4s
+; CHECK-DAG: fcvtn v[[REG:[0-9]+]].4h, [[OP2]]
+; CHECK-DAG: fcvtn v0.4h, [[OP1]]
+; CHECK: ins v0.d[1], v[[REG]].d[0]
+ %1 = sitofp <8 x i32> %a to <8 x half>
+ ret <8 x half> %1
+}
+
+
+define <8 x half> @sitofp_i64(<8 x i64> %a) #0 {
+; CHECK-LABEL: sitofp_i64:
+; CHECK-DAG: scvtf [[OP1:v[0-9]+\.2d]], v0.2d
+; CHECK-DAG: scvtf [[OP2:v[0-9]+\.2d]], v1.2d
+; CHECK-DAG: fcvtn [[OP3:v[0-9]+]].2s, [[OP1]]
+; CHECK-DAG: fcvtn2 [[OP3]].4s, [[OP2]]
+; CHECK: fcvtn v0.4h, [[OP3]].4s
+ %1 = sitofp <8 x i64> %a to <8 x half>
+ ret <8 x half> %1
+}
+
+define <8 x half> @uitofp_i8(<8 x i8> %a) #0 {
+; CHECK-LABEL: uitofp_i8:
+; CHECK-NEXT: ushll v[[REG1:[0-9]+]].8h, v0.8b, #0
+; CHECK-NEXT: ushll2 [[LO:v[0-9]+\.4s]], v[[REG1]].8h, #0
+; CHECK-NEXT: ushll [[HI:v[0-9]+\.4s]], v[[REG1]].4h, #0
+; CHECK-DAG: ucvtf [[HIF:v[0-9]+\.4s]], [[HI]]
+; CHECK-DAG: ucvtf [[LOF:v[0-9]+\.4s]], [[LO]]
+; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]]
+; CHECK-DAG: fcvtn v0.4h, [[HIF]]
+; CHECK: ins v0.d[1], v[[LOREG]].d[0]
+ %1 = uitofp <8 x i8> %a to <8 x half>
+ ret <8 x half> %1
+}
+
+
+define <8 x half> @uitofp_i16(<8 x i16> %a) #0 {
+; CHECK-LABEL: uitofp_i16:
+; CHECK-NEXT: ushll2 [[LO:v[0-9]+\.4s]], v0.8h, #0
+; CHECK-NEXT: ushll [[HI:v[0-9]+\.4s]], v0.4h, #0
+; CHECK-DAG: ucvtf [[HIF:v[0-9]+\.4s]], [[HI]]
+; CHECK-DAG: ucvtf [[LOF:v[0-9]+\.4s]], [[LO]]
+; CHECK-DAG: fcvtn v[[LOREG:[0-9]+]].4h, [[LOF]]
+; CHECK-DAG: fcvtn v0.4h, [[HIF]]
+; CHECK: ins v0.d[1], v[[LOREG]].d[0]
+ %1 = uitofp <8 x i16> %a to <8 x half>
+ ret <8 x half> %1
+}
+
+
+define <8 x half> @uitofp_i32(<8 x i32> %a) #0 {
+; CHECK-LABEL: uitofp_i32:
+; CHECK-DAG: ucvtf [[OP1:v[0-9]+\.4s]], v0.4s
+; CHECK-DAG: ucvtf [[OP2:v[0-9]+\.4s]], v1.4s
+; CHECK-DAG: fcvtn v[[REG:[0-9]+]].4h, [[OP2]]
+; CHECK-DAG: fcvtn v0.4h, [[OP1]]
+; CHECK: ins v0.d[1], v[[REG]].d[0]
+ %1 = uitofp <8 x i32> %a to <8 x half>
+ ret <8 x half> %1
+}
+
+
+define <8 x half> @uitofp_i64(<8 x i64> %a) #0 {
+; CHECK-LABEL: uitofp_i64:
+; CHECK-DAG: ucvtf [[OP1:v[0-9]+\.2d]], v0.2d
+; CHECK-DAG: ucvtf [[OP2:v[0-9]+\.2d]], v1.2d
+; CHECK-DAG: fcvtn [[OP3:v[0-9]+]].2s, [[OP1]]
+; CHECK-DAG: fcvtn2 [[OP3]].4s, [[OP2]]
+; CHECK: fcvtn v0.4h, [[OP3]].4s
+ %1 = uitofp <8 x i64> %a to <8 x half>
+ ret <8 x half> %1
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/fp16-vector-load-store.ll b/test/CodeGen/AArch64/fp16-vector-load-store.ll
index edbbffe001c5..b71b39f1acd1 100644
--- a/test/CodeGen/AArch64/fp16-vector-load-store.ll
+++ b/test/CodeGen/AArch64/fp16-vector-load-store.ll
@@ -5,7 +5,7 @@ define <4 x half> @load_64(<4 x half>* nocapture readonly %a) #0 {
; CHECK-LABEL: load_64:
; CHECK: ldr d0, [x0]
entry:
- %0 = load <4 x half>* %a, align 8
+ %0 = load <4 x half>, <4 x half>* %a, align 8
ret <4 x half> %0
}
@@ -14,7 +14,7 @@ define <8 x half> @load_128(<8 x half>* nocapture readonly %a) #0 {
; CHECK-LABEL: load_128:
; CHECK: ldr q0, [x0]
entry:
- %0 = load <8 x half>* %a, align 16
+ %0 = load <8 x half>, <8 x half>* %a, align 16
ret <8 x half> %0
}
@@ -23,7 +23,7 @@ define <4 x half> @load_dup_64(half* nocapture readonly %a) #0 {
; CHECK-LABEL: load_dup_64:
; CHECK: ld1r { v0.4h }, [x0]
entry:
- %0 = load half* %a, align 2
+ %0 = load half, half* %a, align 2
%1 = insertelement <4 x half> undef, half %0, i32 0
%2 = shufflevector <4 x half> %1, <4 x half> undef, <4 x i32> zeroinitializer
ret <4 x half> %2
@@ -34,7 +34,7 @@ define <8 x half> @load_dup_128(half* nocapture readonly %a) #0 {
; CHECK-LABEL: load_dup_128:
; CHECK: ld1r { v0.8h }, [x0]
entry:
- %0 = load half* %a, align 2
+ %0 = load half, half* %a, align 2
%1 = insertelement <8 x half> undef, half %0, i32 0
%2 = shufflevector <8 x half> %1, <8 x half> undef, <8 x i32> zeroinitializer
ret <8 x half> %2
@@ -45,7 +45,7 @@ define <4 x half> @load_lane_64(half* nocapture readonly %a, <4 x half> %b) #0 {
; CHECK-LABEL: load_lane_64:
; CHECK: ld1 { v0.h }[2], [x0]
entry:
- %0 = load half* %a, align 2
+ %0 = load half, half* %a, align 2
%1 = insertelement <4 x half> %b, half %0, i32 2
ret <4 x half> %1
}
@@ -55,7 +55,7 @@ define <8 x half> @load_lane_128(half* nocapture readonly %a, <8 x half> %b) #0
; CHECK-LABEL: load_lane_128:
; CHECK: ld1 { v0.h }[5], [x0]
entry:
- %0 = load half* %a, align 2
+ %0 = load half, half* %a, align 2
%1 = insertelement <8 x half> %b, half %0, i32 5
ret <8 x half> %1
}
diff --git a/test/CodeGen/AArch64/fp16-vector-nvcast.ll b/test/CodeGen/AArch64/fp16-vector-nvcast.ll
new file mode 100644
index 000000000000..83e0df74c3da
--- /dev/null
+++ b/test/CodeGen/AArch64/fp16-vector-nvcast.ll
@@ -0,0 +1,89 @@
+; RUN: llc < %s -asm-verbose=false -mtriple=aarch64-none-eabi | FileCheck %s
+
+; Test pattern (v4f16 (AArch64NvCast (v2i32 FPR64:$src)))
+define void @nvcast_v2i32(<4 x half>* %a) #0 {
+; CHECK-LABEL: nvcast_v2i32:
+; CHECK-NEXT: movi v[[REG:[0-9]+]].2s, #0xab, lsl #16
+; CHECK-NEXT: str d[[REG]], [x0]
+; CHECK-NEXT: ret
+ store volatile <4 x half> <half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB>, <4 x half>* %a
+ ret void
+}
+
+
+; Test pattern (v4f16 (AArch64NvCast (v4i16 FPR64:$src)))
+define void @nvcast_v4i16(<4 x half>* %a) #0 {
+; CHECK-LABEL: nvcast_v4i16:
+; CHECK-NEXT: movi v[[REG:[0-9]+]].4h, #0xab
+; CHECK-NEXT: str d[[REG]], [x0]
+; CHECK-NEXT: ret
+ store volatile <4 x half> <half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB>, <4 x half>* %a
+ ret void
+}
+
+
+; Test pattern (v4f16 (AArch64NvCast (v8i8 FPR64:$src)))
+define void @nvcast_v8i8(<4 x half>* %a) #0 {
+; CHECK-LABEL: nvcast_v8i8:
+; CHECK-NEXT: movi v[[REG:[0-9]+]].8b, #0xab
+; CHECK-NEXT: str d[[REG]], [x0]
+; CHECK-NEXT: ret
+ store volatile <4 x half> <half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB>, <4 x half>* %a
+ ret void
+}
+
+
+; Test pattern (v4f16 (AArch64NvCast (f64 FPR64:$src)))
+define void @nvcast_f64(<4 x half>* %a) #0 {
+; CHECK-LABEL: nvcast_f64:
+; CHECK-NEXT: movi d[[REG:[0-9]+]], #0000000000000000
+; CHECK-NEXT: str d[[REG]], [x0]
+; CHECK-NEXT: ret
+ store volatile <4 x half> zeroinitializer, <4 x half>* %a
+ ret void
+}
+
+; Test pattern (v8f16 (AArch64NvCast (v4i32 FPR128:$src)))
+define void @nvcast_v4i32(<8 x half>* %a) #0 {
+; CHECK-LABEL: nvcast_v4i32:
+; CHECK-NEXT: movi v[[REG:[0-9]+]].4s, #0xab, lsl #16
+; CHECK-NEXT: str q[[REG]], [x0]
+; CHECK-NEXT: ret
+ store volatile <8 x half> <half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB, half 0xH0000, half 0xH00AB>, <8 x half>* %a
+ ret void
+}
+
+
+; Test pattern (v8f16 (AArch64NvCast (v8i16 FPR128:$src)))
+define void @nvcast_v8i16(<8 x half>* %a) #0 {
+; CHECK-LABEL: nvcast_v8i16:
+; CHECK-NEXT: movi v[[REG:[0-9]+]].8h, #0xab
+; CHECK-NEXT: str q[[REG]], [x0]
+; CHECK-NEXT: ret
+ store volatile <8 x half> <half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB, half 0xH00AB>, <8 x half>* %a
+ ret void
+}
+
+
+; Test pattern (v8f16 (AArch64NvCast (v16i8 FPR128:$src)))
+define void @nvcast_v16i8(<8 x half>* %a) #0 {
+; CHECK-LABEL: nvcast_v16i8:
+; CHECK-NEXT: movi v[[REG:[0-9]+]].16b, #0xab
+; CHECK-NEXT: str q[[REG]], [x0]
+; CHECK-NEXT: ret
+ store volatile <8 x half> <half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB, half 0xHABAB>, <8 x half>* %a
+ ret void
+}
+
+
+; Test pattern (v8f16 (AArch64NvCast (v2i64 FPR128:$src)))
+define void @nvcast_v2i64(<8 x half>* %a) #0 {
+; CHECK-LABEL: nvcast_v2i64:
+; CHECK-NEXT: movi v[[REG:[0-9]+]].2d, #0000000000000000
+; CHECK-NEXT: str q[[REG]], [x0]
+; CHECK-NEXT: ret
+ store volatile <8 x half> zeroinitializer, <8 x half>* %a
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/fpimm.ll b/test/CodeGen/AArch64/fpimm.ll
index b7db9182a393..6acb11108afc 100644
--- a/test/CodeGen/AArch64/fpimm.ll
+++ b/test/CodeGen/AArch64/fpimm.ll
@@ -1,6 +1,6 @@
; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-apple-darwin -code-model=large -verify-machineinstrs < %s | FileCheck %s --check-prefix=LARGE
-; RUN: llc -mtriple=aarch64-apple-darwin -code-model=large -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s --check-prefix=LARGE
+; RUN: llc -mtriple=aarch64-apple-darwin -code-model=large -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefix=LARGE
@varf32 = global float 0.0
@varf64 = global double 0.0
@@ -8,7 +8,7 @@
define void @check_float() {
; CHECK-LABEL: check_float:
- %val = load float* @varf32
+ %val = load float, float* @varf32
%newval1 = fadd float %val, 8.5
store volatile float %newval1, float* @varf32
; CHECK-DAG: fmov [[EIGHT5:s[0-9]+]], #8.5
@@ -24,7 +24,7 @@ define void @check_float() {
define void @check_double() {
; CHECK-LABEL: check_double:
- %val = load double* @varf64
+ %val = load double, double* @varf64
%newval1 = fadd double %val, 8.5
store volatile double %newval1, double* @varf64
; CHECK-DAG: fmov {{d[0-9]+}}, #8.5
diff --git a/test/CodeGen/AArch64/frameaddr.ll b/test/CodeGen/AArch64/frameaddr.ll
index d6bb50e57a73..d965809d875e 100644
--- a/test/CodeGen/AArch64/frameaddr.ll
+++ b/test/CodeGen/AArch64/frameaddr.ll
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
define i8* @test_frameaddress0() nounwind {
entry:
diff --git a/test/CodeGen/AArch64/free-zext.ll b/test/CodeGen/AArch64/free-zext.ll
index d69105eec381..cff11f85bda4 100644
--- a/test/CodeGen/AArch64/free-zext.ll
+++ b/test/CodeGen/AArch64/free-zext.ll
@@ -5,9 +5,9 @@ define i64 @test_free_zext(i8* %a, i16* %b) {
; CHECK-DAG: ldrb w[[A:[0-9]+]], [x0]
; CHECK: ldrh w[[B:[0-9]+]], [x1]
; CHECK: add x0, x[[B]], x[[A]]
- %1 = load i8* %a, align 1
+ %1 = load i8, i8* %a, align 1
%conv = zext i8 %1 to i64
- %2 = load i16* %b, align 2
+ %2 = load i16, i16* %b, align 2
%conv1 = zext i16 %2 to i64
%add = add nsw i64 %conv1, %conv
ret i64 %add
diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll
index 9fc9a5f0190f..9100ae39282b 100644
--- a/test/CodeGen/AArch64/func-argpassing.ll
+++ b/test/CodeGen/AArch64/func-argpassing.ll
@@ -34,16 +34,16 @@ define void @add_floats(float %val1, float %val2) {
; with memcpy.
define void @take_struct(%myStruct* byval %structval) {
; CHECK-LABEL: take_struct:
- %addr0 = getelementptr %myStruct* %structval, i64 0, i32 2
- %addr1 = getelementptr %myStruct* %structval, i64 0, i32 0
+ %addr0 = getelementptr %myStruct, %myStruct* %structval, i64 0, i32 2
+ %addr1 = getelementptr %myStruct, %myStruct* %structval, i64 0, i32 0
- %val0 = load volatile i32* %addr0
+ %val0 = load volatile i32, i32* %addr0
; Some weird move means x0 is used for one access
; CHECK: ldr [[REG32:w[0-9]+]], [{{x[0-9]+|sp}}, #12]
store volatile i32 %val0, i32* @var32
; CHECK: str [[REG32]], [{{x[0-9]+}}, {{#?}}:lo12:var32]
- %val1 = load volatile i64* %addr1
+ %val1 = load volatile i64, i64* %addr1
; CHECK: ldr [[REG64:x[0-9]+]], [{{x[0-9]+|sp}}]
store volatile i64 %val1, i64* @var64
; CHECK: str [[REG64]], [{{x[0-9]+}}, {{#?}}:lo12:var64]
@@ -55,16 +55,16 @@ define void @take_struct(%myStruct* byval %structval) {
define void @check_byval_align(i32* byval %ignore, %myStruct* byval align 16 %structval) {
; CHECK-LABEL: check_byval_align:
- %addr0 = getelementptr %myStruct* %structval, i64 0, i32 2
- %addr1 = getelementptr %myStruct* %structval, i64 0, i32 0
+ %addr0 = getelementptr %myStruct, %myStruct* %structval, i64 0, i32 2
+ %addr1 = getelementptr %myStruct, %myStruct* %structval, i64 0, i32 0
- %val0 = load volatile i32* %addr0
+ %val0 = load volatile i32, i32* %addr0
; Some weird move means x0 is used for one access
; CHECK: ldr [[REG32:w[0-9]+]], [sp, #28]
store i32 %val0, i32* @var32
; CHECK: str [[REG32]], [{{x[0-9]+}}, {{#?}}:lo12:var32]
- %val1 = load volatile i64* %addr1
+ %val1 = load volatile i64, i64* %addr1
; CHECK: ldr [[REG64:x[0-9]+]], [sp, #16]
store i64 %val1, i64* @var64
; CHECK: str [[REG64]], [{{x[0-9]+}}, {{#?}}:lo12:var64]
@@ -74,7 +74,7 @@ define void @check_byval_align(i32* byval %ignore, %myStruct* byval align 16 %st
define i32 @return_int() {
; CHECK-LABEL: return_int:
- %val = load i32* @var32
+ %val = load i32, i32* @var32
ret i32 %val
; CHECK: ldr w0, [{{x[0-9]+}}, {{#?}}:lo12:var32]
; Make sure epilogue follows
@@ -94,7 +94,7 @@ define double @return_double() {
define [2 x i64] @return_struct() {
; CHECK-LABEL: return_struct:
%addr = bitcast %myStruct* @varstruct to [2 x i64]*
- %val = load [2 x i64]* %addr
+ %val = load [2 x i64], [2 x i64]* %addr
ret [2 x i64] %val
; CHECK: add x[[VARSTRUCT:[0-9]+]], {{x[0-9]+}}, :lo12:varstruct
; CHECK: ldp x0, x1, [x[[VARSTRUCT]]]
@@ -108,9 +108,9 @@ define [2 x i64] @return_struct() {
; if LLVM does it to %myStruct too. So this is the simplest check
define void @return_large_struct(%myStruct* sret %retval) {
; CHECK-LABEL: return_large_struct:
- %addr0 = getelementptr %myStruct* %retval, i64 0, i32 0
- %addr1 = getelementptr %myStruct* %retval, i64 0, i32 1
- %addr2 = getelementptr %myStruct* %retval, i64 0, i32 2
+ %addr0 = getelementptr %myStruct, %myStruct* %retval, i64 0, i32 0
+ %addr1 = getelementptr %myStruct, %myStruct* %retval, i64 0, i32 1
+ %addr2 = getelementptr %myStruct, %myStruct* %retval, i64 0, i32 2
store i64 42, i64* %addr0
store i8 2, i8* %addr1
@@ -129,8 +129,8 @@ define i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var
i32* %var6, %myStruct* byval %struct, i32* byval %stacked,
double %notstacked) {
; CHECK-LABEL: struct_on_stack:
- %addr = getelementptr %myStruct* %struct, i64 0, i32 0
- %val64 = load volatile i64* %addr
+ %addr = getelementptr %myStruct, %myStruct* %struct, i64 0, i32 0
+ %val64 = load volatile i64, i64* %addr
store volatile i64 %val64, i64* @var64
; Currently nothing on local stack, so struct should be at sp
; CHECK: ldr [[VAL64:x[0-9]+]], [sp]
@@ -141,7 +141,7 @@ define i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var
; CHECK: str d0, [{{x[0-9]+}}, {{#?}}:lo12:vardouble
; CHECK-NOFP-NOT: str d0,
- %retval = load volatile i32* %stacked
+ %retval = load volatile i32, i32* %stacked
ret i32 %retval
; CHECK-LE: ldr w0, [sp, #16]
}
diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll
index 16157f83aaca..22a33157fd55 100644
--- a/test/CodeGen/AArch64/func-calls.ll
+++ b/test/CodeGen/AArch64/func-calls.ll
@@ -21,15 +21,15 @@ declare void @take_floats(float %val1, float %val2)
define void @simple_args() {
; CHECK-LABEL: simple_args:
- %char1 = load i8* @var8
- %char2 = load i8* @var8_2
+ %char1 = load i8, i8* @var8
+ %char2 = load i8, i8* @var8_2
call void @take_i8s(i8 %char1, i8 %char2)
; CHECK-DAG: ldrb w0, [{{x[0-9]+}}, {{#?}}:lo12:var8]
; CHECK-DAG: ldrb w1, [{{x[0-9]+}}, {{#?}}:lo12:var8_2]
; CHECK: bl take_i8s
- %float1 = load float* @varfloat
- %float2 = load float* @varfloat_2
+ %float1 = load float, float* @varfloat
+ %float2 = load float, float* @varfloat_2
call void @take_floats(float %float1, float %float2)
; CHECK-DAG: ldr s1, [{{x[0-9]+}}, {{#?}}:lo12:varfloat_2]
; CHECK-DAG: ldr s0, [{{x[0-9]+}}, {{#?}}:lo12:varfloat]
@@ -124,7 +124,7 @@ declare void @check_i128_regalign(i32 %val0, i128 %val1)
define void @check_i128_align() {
; CHECK-LABEL: check_i128_align:
- %val = load i128* @var128
+ %val = load i128, i128* @var128
call void @check_i128_stackalign(i32 0, i32 1, i32 2, i32 3,
i32 4, i32 5, i32 6, i32 7,
i32 42, i128 %val)
@@ -152,7 +152,7 @@ define void @check_i128_align() {
define void @check_indirect_call() {
; CHECK-LABEL: check_indirect_call:
- %func = load void()** @fptr
+ %func = load void()*, void()** @fptr
call void %func()
; CHECK: ldr [[FPTR:x[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:fptr]
; CHECK: blr [[FPTR]]
diff --git a/test/CodeGen/AArch64/funcptr_cast.ll b/test/CodeGen/AArch64/funcptr_cast.ll
index a00b7bcaf6a2..506485ec5b0c 100644
--- a/test/CodeGen/AArch64/funcptr_cast.ll
+++ b/test/CodeGen/AArch64/funcptr_cast.ll
@@ -6,7 +6,7 @@ define i8 @test() {
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, :lo12:foo
; CHECK: ldrb w0, [{{x[0-9]+}}]
entry:
- %0 = load i8* bitcast (void (...)* @foo to i8*), align 1
+ %0 = load i8, i8* bitcast (void (...)* @foo to i8*), align 1
ret i8 %0
}
diff --git a/test/CodeGen/AArch64/function-subtarget-features.ll b/test/CodeGen/AArch64/function-subtarget-features.ll
new file mode 100644
index 000000000000..fca6c931655a
--- /dev/null
+++ b/test/CodeGen/AArch64/function-subtarget-features.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; This test verifies that we can enable subtarget features via
+; the function attributes and generate appropriate code (or,
+; in this case, select the instruction at all).
+
+; Function Attrs: nounwind
+define <16 x i8> @foo(<16 x i8> %data, <16 x i8> %key) #0 {
+ %vaeseq_v.i = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data, <16 x i8> %key)
+ ret <16 x i8> %vaeseq_v.i
+}
+
+; CHECK: foo
+; CHECK: aese
+
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8>, <16 x i8>)
+
+attributes #0 = { nounwind "target-features"="+neon,+crc,+crypto" }
diff --git a/test/CodeGen/AArch64/ghc-cc.ll b/test/CodeGen/AArch64/ghc-cc.ll
index 505bd5fca66d..01ed78558c79 100644
--- a/test/CodeGen/AArch64/ghc-cc.ll
+++ b/test/CodeGen/AArch64/ghc-cc.ll
@@ -51,7 +51,7 @@ entry:
; CHECK-NEXT: bl bar_i64
; CHECK-NEXT: ret
- %0 = load i64* @base
+ %0 = load i64, i64* @base
tail call ghccc void @bar_i64( i64 %0 ) nounwind
ret void
}
@@ -64,7 +64,7 @@ entry:
; CHECK-NEXT: bl bar_float
; CHECK-NEXT: ret
- %0 = load float* @f1
+ %0 = load float, float* @f1
tail call ghccc void @bar_float( float %0 ) nounwind
ret void
}
@@ -77,7 +77,7 @@ entry:
; CHECK-NEXT: bl bar_double
; CHECK-NEXT: ret
- %0 = load double* @d1
+ %0 = load double, double* @d1
tail call ghccc void @bar_double( double %0 ) nounwind
ret void
}
diff --git a/test/CodeGen/AArch64/global-alignment.ll b/test/CodeGen/AArch64/global-alignment.ll
index 451b9d6741ee..657778e34187 100644
--- a/test/CodeGen/AArch64/global-alignment.ll
+++ b/test/CodeGen/AArch64/global-alignment.ll
@@ -11,7 +11,7 @@ define i64 @test_align32() {
; Since @var32 is only guaranteed to be aligned to 32-bits, it's invalid to
; emit an "LDR x0, [x0, #:lo12:var32] instruction to implement this load.
- %val = load i64* %addr
+ %val = load i64, i64* %addr
; CHECK: adrp [[HIBITS:x[0-9]+]], var32
; CHECK: add x[[ADDR:[0-9]+]], [[HIBITS]], {{#?}}:lo12:var32
; CHECK: ldr x0, [x[[ADDR]]]
@@ -25,7 +25,7 @@ define i64 @test_align64() {
; However, var64 *is* properly aligned and emitting an adrp/add/ldr would be
; inefficient.
- %val = load i64* %addr
+ %val = load i64, i64* %addr
; CHECK: adrp x[[HIBITS:[0-9]+]], var64
; CHECK-NOT: add x[[HIBITS]]
; CHECK: ldr x0, [x[[HIBITS]], {{#?}}:lo12:var64]
@@ -39,7 +39,7 @@ define i64 @test_var32_align64() {
; Since @var32 is only guaranteed to be aligned to 32-bits, it's invalid to
; emit an "LDR x0, [x0, #:lo12:var32] instruction to implement this load.
- %val = load i64* %addr
+ %val = load i64, i64* %addr
; CHECK: adrp x[[HIBITS:[0-9]+]], var32_align64
; CHECK-NOT: add x[[HIBITS]]
; CHECK: ldr x0, [x[[HIBITS]], {{#?}}:lo12:var32_align64]
@@ -52,7 +52,7 @@ define i64 @test_var32_alias() {
%addr = bitcast [3 x i32]* @alias to i64*
; Test that we can find the alignment for aliases.
- %val = load i64* %addr
+ %val = load i64, i64* %addr
; CHECK: adrp x[[HIBITS:[0-9]+]], alias
; CHECK-NOT: add x[[HIBITS]]
; CHECK: ldr x0, [x[[HIBITS]], {{#?}}:lo12:alias]
@@ -68,7 +68,7 @@ define i64 @test_yet_another_var() {
; @yet_another_var has a preferred alignment of 8, but that's not enough if
; we're going to be linking against other things. Its ABI alignment is only 4
; so we can't fold the load.
- %val = load i64* bitcast({i32, i32}* @yet_another_var to i64*)
+ %val = load i64, i64* bitcast({i32, i32}* @yet_another_var to i64*)
; CHECK: adrp [[HIBITS:x[0-9]+]], yet_another_var
; CHECK: add x[[ADDR:[0-9]+]], [[HIBITS]], {{#?}}:lo12:yet_another_var
; CHECK: ldr x0, [x[[ADDR]]]
diff --git a/test/CodeGen/AArch64/global-merge-1.ll b/test/CodeGen/AArch64/global-merge-1.ll
index 7dc8da1c444b..14b04303ffb3 100644
--- a/test/CodeGen/AArch64/global-merge-1.ll
+++ b/test/CodeGen/AArch64/global-merge-1.ll
@@ -1,11 +1,11 @@
-; RUN: llc %s -mtriple=aarch64-none-linux-gnu -enable-global-merge -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-none-linux-gnu -enable-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-global-merge -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-linux-gnuabi -enable-global-merge -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-linux-gnuabi -enable-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-apple-ios -enable-global-merge -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
-; RUN: llc %s -mtriple=aarch64-apple-ios -enable-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-global-merge -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
@m = internal global i32 0, align 4
@n = internal global i32 0, align 4
diff --git a/test/CodeGen/AArch64/global-merge-2.ll b/test/CodeGen/AArch64/global-merge-2.ll
index 70b700c7e57a..af684039bf10 100644
--- a/test/CodeGen/AArch64/global-merge-2.ll
+++ b/test/CodeGen/AArch64/global-merge-2.ll
@@ -1,6 +1,6 @@
-; RUN: llc %s -mtriple=aarch64-none-linux-gnu -enable-global-merge -global-merge-on-external -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-linux-gnuabi -enable-global-merge -global-merge-on-external -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-apple-ios -enable-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
@x = global i32 0, align 4
@y = global i32 0, align 4
diff --git a/test/CodeGen/AArch64/global-merge-3.ll b/test/CodeGen/AArch64/global-merge-3.ll
index d455d40edcc2..925108308e56 100644
--- a/test/CodeGen/AArch64/global-merge-3.ll
+++ b/test/CodeGen/AArch64/global-merge-3.ll
@@ -1,6 +1,6 @@
-; RUN: llc %s -mtriple=aarch64-none-linux-gnu -enable-global-merge -global-merge-on-external -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-linux-gnuabi -enable-global-merge -global-merge-on-external -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-apple-ios -enable-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
@x = global [1000 x i32] zeroinitializer, align 1
@y = global [1000 x i32] zeroinitializer, align 1
@@ -12,8 +12,8 @@ define void @f1(i32 %a1, i32 %a2, i32 %a3) {
;CHECK-APPLE-IOS: add x8, x8, __MergedGlobals_x@PAGEOFF
;CHECK-APPLE-IOS: adrp x9, __MergedGlobals_y@PAGE
;CHECK-APPLE-IOS: add x9, x9, __MergedGlobals_y@PAGEOFF
- %x3 = getelementptr inbounds [1000 x i32]* @x, i32 0, i64 3
- %y3 = getelementptr inbounds [1000 x i32]* @y, i32 0, i64 3
+ %x3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @x, i32 0, i64 3
+ %y3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @y, i32 0, i64 3
store i32 %a1, i32* %x3, align 4
store i32 %a2, i32* %y3, align 4
store i32 %a3, i32* @z, align 4
diff --git a/test/CodeGen/AArch64/global-merge-4.ll b/test/CodeGen/AArch64/global-merge-4.ll
index a525ccd8dee3..bc6b68a9c046 100644
--- a/test/CodeGen/AArch64/global-merge-4.ll
+++ b/test/CodeGen/AArch64/global-merge-4.ll
@@ -1,4 +1,4 @@
-; RUN: llc %s -mtriple=aarch64-linux-gnuabi -enable-global-merge -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -o - | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
target triple = "arm64-apple-ios7.0.0"
@@ -10,25 +10,25 @@ target triple = "arm64-apple-ios7.0.0"
; Function Attrs: nounwind ssp
define internal void @initialize() #0 {
%1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
- store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 0), align 4
+ store i32 %1, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i64 0, i64 0), align 4
%2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
- store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 0), align 4
+ store i32 %2, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i64 0, i64 0), align 4
%3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
- store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 1), align 4
+ store i32 %3, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i64 0, i64 1), align 4
%4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
- store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 1), align 4
+ store i32 %4, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i64 0, i64 1), align 4
%5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
- store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 2), align 4
+ store i32 %5, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i64 0, i64 2), align 4
%6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
- store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 2), align 4
+ store i32 %6, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i64 0, i64 2), align 4
%7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
- store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 3), align 4
+ store i32 %7, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i64 0, i64 3), align 4
%8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
- store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 3), align 4
+ store i32 %8, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i64 0, i64 3), align 4
%9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
- store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 4), align 4
+ store i32 %9, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i64 0, i64 4), align 4
%10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #2
- store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 4), align 4
+ store i32 %10, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i64 0, i64 4), align 4
ret void
}
@@ -36,32 +36,32 @@ declare i32 @calc(...)
; Function Attrs: nounwind ssp
define internal void @calculate() #0 {
- %1 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 0), align 4
- %2 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 0), align 4
+ %1 = load i32, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i64 0, i64 0), align 4
+ %2 = load i32, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i64 0, i64 0), align 4
%3 = mul nsw i32 %2, %1
- store i32 %3, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 0), align 4
- %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 1), align 4
- %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 1), align 4
+ store i32 %3, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @foo, i64 0, i64 0), align 4
+ %4 = load i32, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i64 0, i64 1), align 4
+ %5 = load i32, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i64 0, i64 1), align 4
%6 = mul nsw i32 %5, %4
- store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 1), align 4
- %7 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 2), align 4
- %8 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 2), align 4
+ store i32 %6, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @foo, i64 0, i64 1), align 4
+ %7 = load i32, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i64 0, i64 2), align 4
+ %8 = load i32, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i64 0, i64 2), align 4
%9 = mul nsw i32 %8, %7
- store i32 %9, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 2), align 4
- %10 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 3), align 4
- %11 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 3), align 4
+ store i32 %9, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @foo, i64 0, i64 2), align 4
+ %10 = load i32, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i64 0, i64 3), align 4
+ %11 = load i32, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i64 0, i64 3), align 4
%12 = mul nsw i32 %11, %10
- store i32 %12, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 3), align 4
- %13 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 4), align 4
- %14 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 4), align 4
+ store i32 %12, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @foo, i64 0, i64 3), align 4
+ %13 = load i32, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i64 0, i64 4), align 4
+ %14 = load i32, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i64 0, i64 4), align 4
%15 = mul nsw i32 %14, %13
- store i32 %15, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 4), align 4
+ store i32 %15, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @foo, i64 0, i64 4), align 4
ret void
}
; Function Attrs: nounwind readnone ssp
define internal i32* @returnFoo() #1 {
- ret i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 0)
+ ret i32* getelementptr inbounds ([5 x i32], [5 x i32]* @foo, i64 0, i64 0)
}
;CHECK: .type _MergedGlobals,@object // @_MergedGlobals
diff --git a/test/CodeGen/AArch64/global-merge-group-by-use.ll b/test/CodeGen/AArch64/global-merge-group-by-use.ll
new file mode 100644
index 000000000000..ddc044ed9e08
--- /dev/null
+++ b/test/CodeGen/AArch64/global-merge-group-by-use.ll
@@ -0,0 +1,94 @@
+; RUN: llc -mtriple=aarch64-apple-ios -asm-verbose=false -aarch64-collect-loh=false \
+; RUN: -aarch64-global-merge -global-merge-group-by-use -global-merge-ignore-single-use=false \
+; RUN: %s -o - | FileCheck %s
+
+; We assume that globals of the same size aren't reordered inside a set.
+
+; Check that we create two MergedGlobal instances for two functions using
+; disjoint sets of globals
+
+@m1 = internal global i32 0, align 4
+@n1 = internal global i32 0, align 4
+
+; CHECK-LABEL: f1:
+define void @f1(i32 %a1, i32 %a2) #0 {
+; CHECK-NEXT: adrp x8, [[SET1:__MergedGlobals.[0-9]*]]@PAGE
+; CHECK-NEXT: add x8, x8, [[SET1]]@PAGEOFF
+; CHECK-NEXT: stp w0, w1, [x8]
+; CHECK-NEXT: ret
+ store i32 %a1, i32* @m1, align 4
+ store i32 %a2, i32* @n1, align 4
+ ret void
+}
+
+@m2 = internal global i32 0, align 4
+@n2 = internal global i32 0, align 4
+@o2 = internal global i32 0, align 4
+
+; CHECK-LABEL: f2:
+define void @f2(i32 %a1, i32 %a2, i32 %a3) #0 {
+; CHECK-NEXT: adrp x8, [[SET2:__MergedGlobals.[0-9]*]]@PAGE
+; CHECK-NEXT: add x8, x8, [[SET2]]@PAGEOFF
+; CHECK-NEXT: stp w0, w1, [x8]
+; CHECK-NEXT: str w2, [x8, #8]
+; CHECK-NEXT: ret
+ store i32 %a1, i32* @m2, align 4
+ store i32 %a2, i32* @n2, align 4
+ store i32 %a3, i32* @o2, align 4
+ ret void
+}
+
+; Sanity-check (don't worry about cost models) that we pick the biggest subset
+; of all global used "together" directly or indirectly. Here, that means
+; merging n3, m4, and n4 together, but ignoring m3.
+
+@m3 = internal global i32 0, align 4
+@n3 = internal global i32 0, align 4
+
+; CHECK-LABEL: f3:
+define void @f3(i32 %a1, i32 %a2) #0 {
+; CHECK-NEXT: adrp x8, _m3@PAGE
+; CHECK-NEXT: adrp x9, [[SET3:__MergedGlobals[0-9]*]]@PAGE
+; CHECK-NEXT: str w0, [x8, _m3@PAGEOFF]
+; CHECK-NEXT: str w1, [x9, [[SET3]]@PAGEOFF]
+; CHECK-NEXT: ret
+ store i32 %a1, i32* @m3, align 4
+ store i32 %a2, i32* @n3, align 4
+ ret void
+}
+
+@m4 = internal global i32 0, align 4
+@n4 = internal global i32 0, align 4
+
+; CHECK-LABEL: f4:
+define void @f4(i32 %a1, i32 %a2, i32 %a3) #0 {
+; CHECK-NEXT: adrp x8, [[SET3]]@PAGE
+; CHECK-NEXT: add x8, x8, [[SET3]]@PAGEOFF
+; CHECK-NEXT: stp w0, w1, [x8, #4]
+; CHECK-NEXT: str w2, [x8]
+; CHECK-NEXT: ret
+ store i32 %a1, i32* @m4, align 4
+ store i32 %a2, i32* @n4, align 4
+ store i32 %a3, i32* @n3, align 4
+ ret void
+}
+
+; Finally, check that we don't do anything with one-element global sets.
+@o5 = internal global i32 0, align 4
+
+; CHECK-LABEL: f5:
+define void @f5(i32 %a1) #0 {
+; CHECK-NEXT: adrp x8, _o5@PAGE
+; CHECK-NEXT: str w0, [x8, _o5@PAGEOFF]
+; CHECK-NEXT: ret
+ store i32 %a1, i32* @o5, align 4
+ ret void
+}
+
+; CHECK-DAG: .zerofill __DATA,__bss,_o5,4,2
+
+; CHECK-DAG: .zerofill __DATA,__bss,[[SET1]],8,3
+; CHECK-DAG: .zerofill __DATA,__bss,[[SET2]],12,3
+; CHECK-DAG: .zerofill __DATA,__bss,[[SET3]],12,3
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/global-merge-ignore-single-use.ll b/test/CodeGen/AArch64/global-merge-ignore-single-use.ll
new file mode 100644
index 000000000000..e6de4699132a
--- /dev/null
+++ b/test/CodeGen/AArch64/global-merge-ignore-single-use.ll
@@ -0,0 +1,63 @@
+; RUN: llc -mtriple=aarch64-apple-ios -asm-verbose=false -aarch64-collect-loh=false \
+; RUN: -aarch64-global-merge -global-merge-group-by-use -global-merge-ignore-single-use \
+; RUN: %s -o - | FileCheck %s
+
+; We assume that globals of the same size aren't reordered inside a set.
+
+@m1 = internal global i32 0, align 4
+@n1 = internal global i32 0, align 4
+@o1 = internal global i32 0, align 4
+
+; CHECK-LABEL: f1:
+define void @f1(i32 %a1, i32 %a2) #0 {
+; CHECK-NEXT: adrp x8, [[SET:__MergedGlobals]]@PAGE
+; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
+; CHECK-NEXT: stp w0, w1, [x8]
+; CHECK-NEXT: ret
+ store i32 %a1, i32* @m1, align 4
+ store i32 %a2, i32* @n1, align 4
+ ret void
+}
+
+@m2 = internal global i32 0, align 4
+@n2 = internal global i32 0, align 4
+
+; CHECK-LABEL: f2:
+define void @f2(i32 %a1, i32 %a2, i32 %a3) #0 {
+; CHECK-NEXT: adrp x8, [[SET]]@PAGE
+; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
+; CHECK-NEXT: stp w0, w1, [x8]
+; CHECK-NEXT: str w2, [x8, #8]
+; CHECK-NEXT: ret
+ store i32 %a1, i32* @m1, align 4
+ store i32 %a2, i32* @n1, align 4
+ store i32 %a3, i32* @o1, align 4
+ ret void
+}
+
+; CHECK-LABEL: f3:
+define void @f3(i32 %a1, i32 %a2) #0 {
+; CHECK-NEXT: adrp x8, [[SET]]@PAGE
+; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
+; CHECK-NEXT: stp w0, w1, [x8, #12]
+; CHECK-NEXT: ret
+ store i32 %a1, i32* @m2, align 4
+ store i32 %a2, i32* @n2, align 4
+ ret void
+}
+
+@o2 = internal global i32 0, align 4
+
+; CHECK-LABEL: f4:
+define void @f4(i32 %a1) #0 {
+; CHECK-NEXT: adrp x8, _o2@PAGE
+; CHECK-NEXT: str w0, [x8, _o2@PAGEOFF]
+; CHECK-NEXT: ret
+ store i32 %a1, i32* @o2, align 4
+ ret void
+}
+
+; CHECK-DAG: .zerofill __DATA,__bss,[[SET]],20,4
+; CHECK-DAG: .zerofill __DATA,__bss,_o2,4,2
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/half.ll b/test/CodeGen/AArch64/half.ll
index a46094b9fb85..d4cbbc918a84 100644
--- a/test/CodeGen/AArch64/half.ll
+++ b/test/CodeGen/AArch64/half.ll
@@ -4,7 +4,7 @@ define void @test_load_store(half* %in, half* %out) {
; CHECK-LABEL: test_load_store:
; CHECK: ldr [[TMP:h[0-9]+]], [x0]
; CHECK: str [[TMP]], [x1]
- %val = load half* %in
+ %val = load half, half* %in
store half %val, half* %out
ret void
}
@@ -12,7 +12,7 @@ define void @test_load_store(half* %in, half* %out) {
define i16 @test_bitcast_from_half(half* %addr) {
; CHECK-LABEL: test_bitcast_from_half:
; CHECK: ldrh w0, [x0]
- %val = load half* %addr
+ %val = load half, half* %addr
%val_int = bitcast half %val to i16
ret i16 %val_int
}
@@ -50,7 +50,7 @@ define float @test_extend32(half* %addr) {
; CHECK-LABEL: test_extend32:
; CHECK: fcvt {{s[0-9]+}}, {{h[0-9]+}}
- %val16 = load half* %addr
+ %val16 = load half, half* %addr
%val32 = fpext half %val16 to float
ret float %val32
}
@@ -59,7 +59,7 @@ define double @test_extend64(half* %addr) {
; CHECK-LABEL: test_extend64:
; CHECK: fcvt {{d[0-9]+}}, {{h[0-9]+}}
- %val16 = load half* %addr
+ %val16 = load half, half* %addr
%val32 = fpext half %val16 to double
ret double %val32
}
diff --git a/test/CodeGen/AArch64/i1-contents.ll b/test/CodeGen/AArch64/i1-contents.ll
index 7f133fc3ea83..a3830e41ec5b 100644
--- a/test/CodeGen/AArch64/i1-contents.ll
+++ b/test/CodeGen/AArch64/i1-contents.ll
@@ -32,7 +32,7 @@ define i1 @produce_i1_ret() {
; CHECK-LABEL: produce_i1_ret:
; CHECK: ldr [[VAR32:w[0-9]+]], [{{x[0-9]+}}, :lo12:var]
; CHECK: and w0, [[VAR32]], #{{0x1|0xff}}
- %val = load %big* @var
+ %val = load %big, %big* @var
%val1 = trunc %big %val to i1
ret i1 %val1
}
@@ -42,7 +42,7 @@ define void @produce_i1_arg() {
; CHECK: ldr [[VAR32:w[0-9]+]], [{{x[0-9]+}}, :lo12:var]
; CHECK: and w0, [[VAR32]], #{{0x1|0xff}}
; CHECK: bl consume_i1_arg
- %val = load %big* @var
+ %val = load %big, %big* @var
%val1 = trunc %big %val to i1
call void @consume_i1_arg(i1 %val1)
ret void
diff --git a/test/CodeGen/AArch64/i128-align.ll b/test/CodeGen/AArch64/i128-align.ll
index a1b4d6f5a446..c948739853bb 100644
--- a/test/CodeGen/AArch64/i128-align.ll
+++ b/test/CodeGen/AArch64/i128-align.ll
@@ -8,7 +8,7 @@ define i64 @check_size() {
; CHECK-LABEL: check_size:
%starti = ptrtoint %struct* @var to i64
- %endp = getelementptr %struct* @var, i64 1
+ %endp = getelementptr %struct, %struct* @var, i64 1
%endi = ptrtoint %struct* %endp to i64
%diff = sub i64 %endi, %starti
@@ -20,7 +20,7 @@ define i64 @check_field() {
; CHECK-LABEL: check_field:
%starti = ptrtoint %struct* @var to i64
- %endp = getelementptr %struct* @var, i64 0, i32 1
+ %endp = getelementptr %struct, %struct* @var, i64 0, i32 1
%endi = ptrtoint i128* %endp to i64
%diff = sub i64 %endi, %starti
diff --git a/test/CodeGen/AArch64/inline-asm-globaladdress.ll b/test/CodeGen/AArch64/inline-asm-globaladdress.ll
new file mode 100644
index 000000000000..adebabaa29be
--- /dev/null
+++ b/test/CodeGen/AArch64/inline-asm-globaladdress.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple aarch64-gnu-linux | FileCheck %s
+; RUN: llc < %s -mtriple arm64-apple-darwin | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: test_inlineasm_globaladdress:
+; CHECK: b {{_?}}test_symbol
+define void @test_inlineasm_globaladdress() {
+ call void asm sideeffect "b $0", "i"(void ()* @test_symbol)
+ ret void
+}
+
+; CHECK-LABEL: test_inlineasm_globaladdress_offset:
+; CHECK: b {{_?}}test_symbol+4
+define void @test_inlineasm_globaladdress_offset() {
+ call void asm sideeffect "b $0", "i"(void ()* bitcast (i8* getelementptr (i8, i8* bitcast (void ()* @test_symbol to i8*), i64 4) to void ()*))
+ ret void
+}
+
+declare void @test_symbol()
diff --git a/test/CodeGen/AArch64/intrinsics-memory-barrier.ll b/test/CodeGen/AArch64/intrinsics-memory-barrier.ll
index 09e34ae2d2ed..2a73fee7718a 100644
--- a/test/CodeGen/AArch64/intrinsics-memory-barrier.ll
+++ b/test/CodeGen/AArch64/intrinsics-memory-barrier.ll
@@ -22,7 +22,7 @@ define void @test_dmb_reordering(i32 %a, i32 %b, i32* %d) {
call void @llvm.aarch64.dmb(i32 15); CHECK: dmb sy
- %d1 = getelementptr i32* %d, i64 1
+ %d1 = getelementptr i32, i32* %d, i64 1
store i32 %b, i32* %d1 ; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #4]
ret void
@@ -34,7 +34,7 @@ define void @test_dsb_reordering(i32 %a, i32 %b, i32* %d) {
call void @llvm.aarch64.dsb(i32 15); CHECK: dsb sy
- %d1 = getelementptr i32* %d, i64 1
+ %d1 = getelementptr i32, i32* %d, i64 1
store i32 %b, i32* %d1 ; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #4]
ret void
@@ -46,7 +46,7 @@ define void @test_isb_reordering(i32 %a, i32 %b, i32* %d) {
call void @llvm.aarch64.isb(i32 15); CHECK: isb
- %d1 = getelementptr i32* %d, i64 1
+ %d1 = getelementptr i32, i32* %d, i64 1
store i32 %b, i32* %d1 ; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #4]
ret void
diff --git a/test/CodeGen/AArch64/large_shift.ll b/test/CodeGen/AArch64/large_shift.ll
new file mode 100644
index 000000000000..f72c97d25aa3
--- /dev/null
+++ b/test/CodeGen/AArch64/large_shift.ll
@@ -0,0 +1,21 @@
+; RUN: llc -march=aarch64 -o - %s
+target triple = "arm64-unknown-unknown"
+
+; Make sure we don't run into an assert in the aarch64 code selection when
+; DAGCombining fails.
+
+declare void @t()
+
+define void @foo() {
+ %c = bitcast i64 270458 to i64
+ %t0 = lshr i64 %c, 422383
+ %t1 = trunc i64 %t0 to i1
+ br i1 %t1, label %BB1, label %BB0
+
+BB0:
+ call void @t()
+ br label %BB1
+
+BB1:
+ ret void
+}
diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll
index e4f4295c8503..b2c11c7517c0 100644
--- a/test/CodeGen/AArch64/ldst-opt.ll
+++ b/test/CodeGen/AArch64/ldst-opt.ll
@@ -30,11 +30,11 @@ define void @load-pre-indexed-word(%struct.word* %ptr) nounwind {
; CHECK-LABEL: load-pre-indexed-word
; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
entry:
- %a = getelementptr inbounds %struct.word* %ptr, i64 0, i32 1, i32 0
- %add = load i32* %a, align 4
+ %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0
+ %add = load i32, i32* %a, align 4
br label %bar
bar:
- %c = getelementptr inbounds %struct.word* %ptr, i64 0, i32 1
+ %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1
tail call void @bar_word(%s.word* %c, i32 %add)
ret void
}
@@ -43,11 +43,11 @@ define void @store-pre-indexed-word(%struct.word* %ptr, i32 %val) nounwind {
; CHECK-LABEL: store-pre-indexed-word
; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
entry:
- %a = getelementptr inbounds %struct.word* %ptr, i64 0, i32 1, i32 0
+ %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0
store i32 %val, i32* %a, align 4
br label %bar
bar:
- %c = getelementptr inbounds %struct.word* %ptr, i64 0, i32 1
+ %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1
tail call void @bar_word(%s.word* %c, i32 %val)
ret void
}
@@ -58,11 +58,11 @@ define void @load-pre-indexed-doubleword(%struct.doubleword* %ptr) nounwind {
; CHECK-LABEL: load-pre-indexed-doubleword
; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #32]!
entry:
- %a = getelementptr inbounds %struct.doubleword* %ptr, i64 0, i32 1, i32 0
- %add = load i64* %a, align 4
+ %a = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1, i32 0
+ %add = load i64, i64* %a, align 4
br label %bar
bar:
- %c = getelementptr inbounds %struct.doubleword* %ptr, i64 0, i32 1
+ %c = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1
tail call void @bar_doubleword(%s.doubleword* %c, i64 %add)
ret void
}
@@ -71,11 +71,11 @@ define void @store-pre-indexed-doubleword(%struct.doubleword* %ptr, i64 %val) no
; CHECK-LABEL: store-pre-indexed-doubleword
; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #32]!
entry:
- %a = getelementptr inbounds %struct.doubleword* %ptr, i64 0, i32 1, i32 0
+ %a = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1, i32 0
store i64 %val, i64* %a, align 4
br label %bar
bar:
- %c = getelementptr inbounds %struct.doubleword* %ptr, i64 0, i32 1
+ %c = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1
tail call void @bar_doubleword(%s.doubleword* %c, i64 %val)
ret void
}
@@ -86,11 +86,11 @@ define void @load-pre-indexed-quadword(%struct.quadword* %ptr) nounwind {
; CHECK-LABEL: load-pre-indexed-quadword
; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #32]!
entry:
- %a = getelementptr inbounds %struct.quadword* %ptr, i64 0, i32 1, i32 0
- %add = load fp128* %a, align 4
+ %a = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1, i32 0
+ %add = load fp128, fp128* %a, align 4
br label %bar
bar:
- %c = getelementptr inbounds %struct.quadword* %ptr, i64 0, i32 1
+ %c = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1
tail call void @bar_quadword(%s.quadword* %c, fp128 %add)
ret void
}
@@ -99,11 +99,11 @@ define void @store-pre-indexed-quadword(%struct.quadword* %ptr, fp128 %val) noun
; CHECK-LABEL: store-pre-indexed-quadword
; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #32]!
entry:
- %a = getelementptr inbounds %struct.quadword* %ptr, i64 0, i32 1, i32 0
+ %a = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1, i32 0
store fp128 %val, fp128* %a, align 4
br label %bar
bar:
- %c = getelementptr inbounds %struct.quadword* %ptr, i64 0, i32 1
+ %c = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1
tail call void @bar_quadword(%s.quadword* %c, fp128 %val)
ret void
}
@@ -114,11 +114,11 @@ define void @load-pre-indexed-float(%struct.float* %ptr) nounwind {
; CHECK-LABEL: load-pre-indexed-float
; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #32]!
entry:
- %a = getelementptr inbounds %struct.float* %ptr, i64 0, i32 1, i32 0
- %add = load float* %a, align 4
+ %a = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1, i32 0
+ %add = load float, float* %a, align 4
br label %bar
bar:
- %c = getelementptr inbounds %struct.float* %ptr, i64 0, i32 1
+ %c = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1
tail call void @bar_float(%s.float* %c, float %add)
ret void
}
@@ -127,11 +127,11 @@ define void @store-pre-indexed-float(%struct.float* %ptr, float %val) nounwind {
; CHECK-LABEL: store-pre-indexed-float
; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #32]!
entry:
- %a = getelementptr inbounds %struct.float* %ptr, i64 0, i32 1, i32 0
+ %a = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1, i32 0
store float %val, float* %a, align 4
br label %bar
bar:
- %c = getelementptr inbounds %struct.float* %ptr, i64 0, i32 1
+ %c = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1
tail call void @bar_float(%s.float* %c, float %val)
ret void
}
@@ -142,11 +142,11 @@ define void @load-pre-indexed-double(%struct.double* %ptr) nounwind {
; CHECK-LABEL: load-pre-indexed-double
; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #32]!
entry:
- %a = getelementptr inbounds %struct.double* %ptr, i64 0, i32 1, i32 0
- %add = load double* %a, align 4
+ %a = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1, i32 0
+ %add = load double, double* %a, align 4
br label %bar
bar:
- %c = getelementptr inbounds %struct.double* %ptr, i64 0, i32 1
+ %c = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1
tail call void @bar_double(%s.double* %c, double %add)
ret void
}
@@ -155,11 +155,11 @@ define void @store-pre-indexed-double(%struct.double* %ptr, double %val) nounwin
; CHECK-LABEL: store-pre-indexed-double
; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #32]!
entry:
- %a = getelementptr inbounds %struct.double* %ptr, i64 0, i32 1, i32 0
+ %a = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1, i32 0
store double %val, double* %a, align 4
br label %bar
bar:
- %c = getelementptr inbounds %struct.double* %ptr, i64 0, i32 1
+ %c = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1
tail call void @bar_double(%s.double* %c, double %val)
ret void
}
@@ -186,15 +186,15 @@ define i32 @load-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond,
; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #4]!
br i1 %cond, label %if.then, label %if.end
if.then:
- %load1 = load %pre.struct.i32** %this
- %gep1 = getelementptr inbounds %pre.struct.i32* %load1, i64 0, i32 1
+ %load1 = load %pre.struct.i32*, %pre.struct.i32** %this
+ %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 1
br label %return
if.end:
- %gep2 = getelementptr inbounds %pre.struct.i32* %load2, i64 0, i32 2
+ %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 2
br label %return
return:
%retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ]
- %ret = load i32* %retptr
+ %ret = load i32, i32* %retptr
ret i32 %ret
}
@@ -204,15 +204,15 @@ define i64 @load-pre-indexed-doubleword2(%pre.struct.i64** %this, i1 %cond,
; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #8]!
br i1 %cond, label %if.then, label %if.end
if.then:
- %load1 = load %pre.struct.i64** %this
- %gep1 = getelementptr inbounds %pre.struct.i64* %load1, i64 0, i32 1
+ %load1 = load %pre.struct.i64*, %pre.struct.i64** %this
+ %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 1
br label %return
if.end:
- %gep2 = getelementptr inbounds %pre.struct.i64* %load2, i64 0, i32 2
+ %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 2
br label %return
return:
%retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ]
- %ret = load i64* %retptr
+ %ret = load i64, i64* %retptr
ret i64 %ret
}
@@ -222,15 +222,15 @@ define <2 x i64> @load-pre-indexed-quadword2(%pre.struct.i128** %this, i1 %cond,
; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #16]!
br i1 %cond, label %if.then, label %if.end
if.then:
- %load1 = load %pre.struct.i128** %this
- %gep1 = getelementptr inbounds %pre.struct.i128* %load1, i64 0, i32 1
+ %load1 = load %pre.struct.i128*, %pre.struct.i128** %this
+ %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 1
br label %return
if.end:
- %gep2 = getelementptr inbounds %pre.struct.i128* %load2, i64 0, i32 2
+ %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 2
br label %return
return:
%retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ]
- %ret = load <2 x i64>* %retptr
+ %ret = load <2 x i64>, <2 x i64>* %retptr
ret <2 x i64> %ret
}
@@ -240,15 +240,15 @@ define float @load-pre-indexed-float2(%pre.struct.float** %this, i1 %cond,
; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #4]!
br i1 %cond, label %if.then, label %if.end
if.then:
- %load1 = load %pre.struct.float** %this
- %gep1 = getelementptr inbounds %pre.struct.float* %load1, i64 0, i32 1
+ %load1 = load %pre.struct.float*, %pre.struct.float** %this
+ %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 1
br label %return
if.end:
- %gep2 = getelementptr inbounds %pre.struct.float* %load2, i64 0, i32 2
+ %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 2
br label %return
return:
%retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ]
- %ret = load float* %retptr
+ %ret = load float, float* %retptr
ret float %ret
}
@@ -258,15 +258,15 @@ define double @load-pre-indexed-double2(%pre.struct.double** %this, i1 %cond,
; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #8]!
br i1 %cond, label %if.then, label %if.end
if.then:
- %load1 = load %pre.struct.double** %this
- %gep1 = getelementptr inbounds %pre.struct.double* %load1, i64 0, i32 1
+ %load1 = load %pre.struct.double*, %pre.struct.double** %this
+ %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 1
br label %return
if.end:
- %gep2 = getelementptr inbounds %pre.struct.double* %load2, i64 0, i32 2
+ %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 2
br label %return
return:
%retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ]
- %ret = load double* %retptr
+ %ret = load double, double* %retptr
ret double %ret
}
@@ -287,11 +287,11 @@ define void @store-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond,
; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #4]!
br i1 %cond, label %if.then, label %if.end
if.then:
- %load1 = load %pre.struct.i32** %this
- %gep1 = getelementptr inbounds %pre.struct.i32* %load1, i64 0, i32 1
+ %load1 = load %pre.struct.i32*, %pre.struct.i32** %this
+ %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 1
br label %return
if.end:
- %gep2 = getelementptr inbounds %pre.struct.i32* %load2, i64 0, i32 2
+ %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 2
br label %return
return:
%retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ]
@@ -306,11 +306,11 @@ define void @store-pre-indexed-doubleword2(%pre.struct.i64** %this, i1 %cond,
; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #8]!
br i1 %cond, label %if.then, label %if.end
if.then:
- %load1 = load %pre.struct.i64** %this
- %gep1 = getelementptr inbounds %pre.struct.i64* %load1, i64 0, i32 1
+ %load1 = load %pre.struct.i64*, %pre.struct.i64** %this
+ %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 1
br label %return
if.end:
- %gep2 = getelementptr inbounds %pre.struct.i64* %load2, i64 0, i32 2
+ %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 2
br label %return
return:
%retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ]
@@ -325,11 +325,11 @@ define void @store-pre-indexed-quadword2(%pre.struct.i128** %this, i1 %cond,
; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #16]!
br i1 %cond, label %if.then, label %if.end
if.then:
- %load1 = load %pre.struct.i128** %this
- %gep1 = getelementptr inbounds %pre.struct.i128* %load1, i64 0, i32 1
+ %load1 = load %pre.struct.i128*, %pre.struct.i128** %this
+ %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 1
br label %return
if.end:
- %gep2 = getelementptr inbounds %pre.struct.i128* %load2, i64 0, i32 2
+ %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 2
br label %return
return:
%retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ]
@@ -344,11 +344,11 @@ define void @store-pre-indexed-float2(%pre.struct.float** %this, i1 %cond,
; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #4]!
br i1 %cond, label %if.then, label %if.end
if.then:
- %load1 = load %pre.struct.float** %this
- %gep1 = getelementptr inbounds %pre.struct.float* %load1, i64 0, i32 1
+ %load1 = load %pre.struct.float*, %pre.struct.float** %this
+ %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 1
br label %return
if.end:
- %gep2 = getelementptr inbounds %pre.struct.float* %load2, i64 0, i32 2
+ %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 2
br label %return
return:
%retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ]
@@ -363,11 +363,11 @@ define void @store-pre-indexed-double2(%pre.struct.double** %this, i1 %cond,
; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #8]!
br i1 %cond, label %if.then, label %if.end
if.then:
- %load1 = load %pre.struct.double** %this
- %gep1 = getelementptr inbounds %pre.struct.double* %load1, i64 0, i32 1
+ %load1 = load %pre.struct.double*, %pre.struct.double** %this
+ %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 1
br label %return
if.end:
- %gep2 = getelementptr inbounds %pre.struct.double* %load2, i64 0, i32 2
+ %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 2
br label %return
return:
%retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ]
@@ -389,19 +389,19 @@ define void @load-post-indexed-word(i32* %array, i64 %count) nounwind {
; CHECK-LABEL: load-post-indexed-word
; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #16
entry:
- %gep1 = getelementptr i32* %array, i64 2
+ %gep1 = getelementptr i32, i32* %array, i64 2
br label %body
body:
%iv2 = phi i32* [ %gep3, %body ], [ %gep1, %entry ]
%iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
- %gep2 = getelementptr i32* %iv2, i64 -1
- %load = load i32* %gep2
+ %gep2 = getelementptr i32, i32* %iv2, i64 -1
+ %load = load i32, i32* %gep2
call void @use-word(i32 %load)
- %load2 = load i32* %iv2
+ %load2 = load i32, i32* %iv2
call void @use-word(i32 %load2)
%iv.next = add i64 %iv, -4
- %gep3 = getelementptr i32* %iv2, i64 4
+ %gep3 = getelementptr i32, i32* %iv2, i64 4
%cond = icmp eq i64 %iv.next, 0
br i1 %cond, label %exit, label %body
@@ -413,19 +413,19 @@ define void @load-post-indexed-doubleword(i64* %array, i64 %count) nounwind {
; CHECK-LABEL: load-post-indexed-doubleword
; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}], #32
entry:
- %gep1 = getelementptr i64* %array, i64 2
+ %gep1 = getelementptr i64, i64* %array, i64 2
br label %body
body:
%iv2 = phi i64* [ %gep3, %body ], [ %gep1, %entry ]
%iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
- %gep2 = getelementptr i64* %iv2, i64 -1
- %load = load i64* %gep2
+ %gep2 = getelementptr i64, i64* %iv2, i64 -1
+ %load = load i64, i64* %gep2
call void @use-doubleword(i64 %load)
- %load2 = load i64* %iv2
+ %load2 = load i64, i64* %iv2
call void @use-doubleword(i64 %load2)
%iv.next = add i64 %iv, -4
- %gep3 = getelementptr i64* %iv2, i64 4
+ %gep3 = getelementptr i64, i64* %iv2, i64 4
%cond = icmp eq i64 %iv.next, 0
br i1 %cond, label %exit, label %body
@@ -437,19 +437,19 @@ define void @load-post-indexed-quadword(<2 x i64>* %array, i64 %count) nounwind
; CHECK-LABEL: load-post-indexed-quadword
; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}], #64
entry:
- %gep1 = getelementptr <2 x i64>* %array, i64 2
+ %gep1 = getelementptr <2 x i64>, <2 x i64>* %array, i64 2
br label %body
body:
%iv2 = phi <2 x i64>* [ %gep3, %body ], [ %gep1, %entry ]
%iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
- %gep2 = getelementptr <2 x i64>* %iv2, i64 -1
- %load = load <2 x i64>* %gep2
+ %gep2 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 -1
+ %load = load <2 x i64>, <2 x i64>* %gep2
call void @use-quadword(<2 x i64> %load)
- %load2 = load <2 x i64>* %iv2
+ %load2 = load <2 x i64>, <2 x i64>* %iv2
call void @use-quadword(<2 x i64> %load2)
%iv.next = add i64 %iv, -4
- %gep3 = getelementptr <2 x i64>* %iv2, i64 4
+ %gep3 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 4
%cond = icmp eq i64 %iv.next, 0
br i1 %cond, label %exit, label %body
@@ -461,19 +461,19 @@ define void @load-post-indexed-float(float* %array, i64 %count) nounwind {
; CHECK-LABEL: load-post-indexed-float
; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}], #16
entry:
- %gep1 = getelementptr float* %array, i64 2
+ %gep1 = getelementptr float, float* %array, i64 2
br label %body
body:
%iv2 = phi float* [ %gep3, %body ], [ %gep1, %entry ]
%iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
- %gep2 = getelementptr float* %iv2, i64 -1
- %load = load float* %gep2
+ %gep2 = getelementptr float, float* %iv2, i64 -1
+ %load = load float, float* %gep2
call void @use-float(float %load)
- %load2 = load float* %iv2
+ %load2 = load float, float* %iv2
call void @use-float(float %load2)
%iv.next = add i64 %iv, -4
- %gep3 = getelementptr float* %iv2, i64 4
+ %gep3 = getelementptr float, float* %iv2, i64 4
%cond = icmp eq i64 %iv.next, 0
br i1 %cond, label %exit, label %body
@@ -485,19 +485,19 @@ define void @load-post-indexed-double(double* %array, i64 %count) nounwind {
; CHECK-LABEL: load-post-indexed-double
; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}], #32
entry:
- %gep1 = getelementptr double* %array, i64 2
+ %gep1 = getelementptr double, double* %array, i64 2
br label %body
body:
%iv2 = phi double* [ %gep3, %body ], [ %gep1, %entry ]
%iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
- %gep2 = getelementptr double* %iv2, i64 -1
- %load = load double* %gep2
+ %gep2 = getelementptr double, double* %iv2, i64 -1
+ %load = load double, double* %gep2
call void @use-double(double %load)
- %load2 = load double* %iv2
+ %load2 = load double, double* %iv2
call void @use-double(double %load2)
%iv.next = add i64 %iv, -4
- %gep3 = getelementptr double* %iv2, i64 4
+ %gep3 = getelementptr double, double* %iv2, i64 4
%cond = icmp eq i64 %iv.next, 0
br i1 %cond, label %exit, label %body
@@ -519,18 +519,18 @@ define void @store-post-indexed-word(i32* %array, i64 %count, i32 %val) nounwind
; CHECK-LABEL: store-post-indexed-word
; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #16
entry:
- %gep1 = getelementptr i32* %array, i64 2
+ %gep1 = getelementptr i32, i32* %array, i64 2
br label %body
body:
%iv2 = phi i32* [ %gep3, %body ], [ %gep1, %entry ]
%iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
- %gep2 = getelementptr i32* %iv2, i64 -1
- %load = load i32* %gep2
+ %gep2 = getelementptr i32, i32* %iv2, i64 -1
+ %load = load i32, i32* %gep2
call void @use-word(i32 %load)
store i32 %val, i32* %iv2
%iv.next = add i64 %iv, -4
- %gep3 = getelementptr i32* %iv2, i64 4
+ %gep3 = getelementptr i32, i32* %iv2, i64 4
%cond = icmp eq i64 %iv.next, 0
br i1 %cond, label %exit, label %body
@@ -542,18 +542,18 @@ define void @store-post-indexed-doubleword(i64* %array, i64 %count, i64 %val) no
; CHECK-LABEL: store-post-indexed-doubleword
; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}], #32
entry:
- %gep1 = getelementptr i64* %array, i64 2
+ %gep1 = getelementptr i64, i64* %array, i64 2
br label %body
body:
%iv2 = phi i64* [ %gep3, %body ], [ %gep1, %entry ]
%iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
- %gep2 = getelementptr i64* %iv2, i64 -1
- %load = load i64* %gep2
+ %gep2 = getelementptr i64, i64* %iv2, i64 -1
+ %load = load i64, i64* %gep2
call void @use-doubleword(i64 %load)
store i64 %val, i64* %iv2
%iv.next = add i64 %iv, -4
- %gep3 = getelementptr i64* %iv2, i64 4
+ %gep3 = getelementptr i64, i64* %iv2, i64 4
%cond = icmp eq i64 %iv.next, 0
br i1 %cond, label %exit, label %body
@@ -565,18 +565,18 @@ define void @store-post-indexed-quadword(<2 x i64>* %array, i64 %count, <2 x i64
; CHECK-LABEL: store-post-indexed-quadword
; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}], #64
entry:
- %gep1 = getelementptr <2 x i64>* %array, i64 2
+ %gep1 = getelementptr <2 x i64>, <2 x i64>* %array, i64 2
br label %body
body:
%iv2 = phi <2 x i64>* [ %gep3, %body ], [ %gep1, %entry ]
%iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
- %gep2 = getelementptr <2 x i64>* %iv2, i64 -1
- %load = load <2 x i64>* %gep2
+ %gep2 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 -1
+ %load = load <2 x i64>, <2 x i64>* %gep2
call void @use-quadword(<2 x i64> %load)
store <2 x i64> %val, <2 x i64>* %iv2
%iv.next = add i64 %iv, -4
- %gep3 = getelementptr <2 x i64>* %iv2, i64 4
+ %gep3 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 4
%cond = icmp eq i64 %iv.next, 0
br i1 %cond, label %exit, label %body
@@ -588,18 +588,18 @@ define void @store-post-indexed-float(float* %array, i64 %count, float %val) nou
; CHECK-LABEL: store-post-indexed-float
; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}], #16
entry:
- %gep1 = getelementptr float* %array, i64 2
+ %gep1 = getelementptr float, float* %array, i64 2
br label %body
body:
%iv2 = phi float* [ %gep3, %body ], [ %gep1, %entry ]
%iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
- %gep2 = getelementptr float* %iv2, i64 -1
- %load = load float* %gep2
+ %gep2 = getelementptr float, float* %iv2, i64 -1
+ %load = load float, float* %gep2
call void @use-float(float %load)
store float %val, float* %iv2
%iv.next = add i64 %iv, -4
- %gep3 = getelementptr float* %iv2, i64 4
+ %gep3 = getelementptr float, float* %iv2, i64 4
%cond = icmp eq i64 %iv.next, 0
br i1 %cond, label %exit, label %body
@@ -611,18 +611,18 @@ define void @store-post-indexed-double(double* %array, i64 %count, double %val)
; CHECK-LABEL: store-post-indexed-double
; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}], #32
entry:
- %gep1 = getelementptr double* %array, i64 2
+ %gep1 = getelementptr double, double* %array, i64 2
br label %body
body:
%iv2 = phi double* [ %gep3, %body ], [ %gep1, %entry ]
%iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
- %gep2 = getelementptr double* %iv2, i64 -1
- %load = load double* %gep2
+ %gep2 = getelementptr double, double* %iv2, i64 -1
+ %load = load double, double* %gep2
call void @use-double(double %load)
store double %val, double* %iv2
%iv.next = add i64 %iv, -4
- %gep3 = getelementptr double* %iv2, i64 4
+ %gep3 = getelementptr double, double* %iv2, i64 4
%cond = icmp eq i64 %iv.next, 0
br i1 %cond, label %exit, label %body
@@ -655,15 +655,15 @@ for.body:
%phi1 = phi i32* [ %gep4, %for.body ], [ %b, %0 ]
%phi2 = phi i32* [ %gep3, %for.body ], [ %a, %0 ]
%i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
- %gep1 = getelementptr i32* %phi1, i64 -1
- %load1 = load i32* %gep1
- %gep2 = getelementptr i32* %phi2, i64 -1
+ %gep1 = getelementptr i32, i32* %phi1, i64 -1
+ %load1 = load i32, i32* %gep1
+ %gep2 = getelementptr i32, i32* %phi2, i64 -1
store i32 %load1, i32* %gep2
- %load2 = load i32* %phi1
+ %load2 = load i32, i32* %phi1
store i32 %load2, i32* %phi2
%dec.i = add nsw i64 %i, -1
- %gep3 = getelementptr i32* %phi2, i64 -2
- %gep4 = getelementptr i32* %phi1, i64 -2
+ %gep3 = getelementptr i32, i32* %phi2, i64 -2
+ %gep4 = getelementptr i32, i32* %phi1, i64 -2
%cond = icmp sgt i64 %dec.i, 0
br i1 %cond, label %for.body, label %end
end:
@@ -679,15 +679,15 @@ for.body:
%phi1 = phi i64* [ %gep4, %for.body ], [ %b, %0 ]
%phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ]
%i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
- %gep1 = getelementptr i64* %phi1, i64 -1
- %load1 = load i64* %gep1
- %gep2 = getelementptr i64* %phi2, i64 -1
+ %gep1 = getelementptr i64, i64* %phi1, i64 -1
+ %load1 = load i64, i64* %gep1
+ %gep2 = getelementptr i64, i64* %phi2, i64 -1
store i64 %load1, i64* %gep2
- %load2 = load i64* %phi1
+ %load2 = load i64, i64* %phi1
store i64 %load2, i64* %phi2
%dec.i = add nsw i64 %i, -1
- %gep3 = getelementptr i64* %phi2, i64 -2
- %gep4 = getelementptr i64* %phi1, i64 -2
+ %gep3 = getelementptr i64, i64* %phi2, i64 -2
+ %gep4 = getelementptr i64, i64* %phi1, i64 -2
%cond = icmp sgt i64 %dec.i, 0
br i1 %cond, label %for.body, label %end
end:
@@ -703,15 +703,15 @@ for.body:
%phi1 = phi <2 x i64>* [ %gep4, %for.body ], [ %b, %0 ]
%phi2 = phi <2 x i64>* [ %gep3, %for.body ], [ %a, %0 ]
%i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
- %gep1 = getelementptr <2 x i64>* %phi1, i64 -1
- %load1 = load <2 x i64>* %gep1
- %gep2 = getelementptr <2 x i64>* %phi2, i64 -1
+ %gep1 = getelementptr <2 x i64>, <2 x i64>* %phi1, i64 -1
+ %load1 = load <2 x i64>, <2 x i64>* %gep1
+ %gep2 = getelementptr <2 x i64>, <2 x i64>* %phi2, i64 -1
store <2 x i64> %load1, <2 x i64>* %gep2
- %load2 = load <2 x i64>* %phi1
+ %load2 = load <2 x i64>, <2 x i64>* %phi1
store <2 x i64> %load2, <2 x i64>* %phi2
%dec.i = add nsw i64 %i, -1
- %gep3 = getelementptr <2 x i64>* %phi2, i64 -2
- %gep4 = getelementptr <2 x i64>* %phi1, i64 -2
+ %gep3 = getelementptr <2 x i64>, <2 x i64>* %phi2, i64 -2
+ %gep4 = getelementptr <2 x i64>, <2 x i64>* %phi1, i64 -2
%cond = icmp sgt i64 %dec.i, 0
br i1 %cond, label %for.body, label %end
end:
@@ -727,15 +727,15 @@ for.body:
%phi1 = phi float* [ %gep4, %for.body ], [ %b, %0 ]
%phi2 = phi float* [ %gep3, %for.body ], [ %a, %0 ]
%i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
- %gep1 = getelementptr float* %phi1, i64 -1
- %load1 = load float* %gep1
- %gep2 = getelementptr float* %phi2, i64 -1
+ %gep1 = getelementptr float, float* %phi1, i64 -1
+ %load1 = load float, float* %gep1
+ %gep2 = getelementptr float, float* %phi2, i64 -1
store float %load1, float* %gep2
- %load2 = load float* %phi1
+ %load2 = load float, float* %phi1
store float %load2, float* %phi2
%dec.i = add nsw i64 %i, -1
- %gep3 = getelementptr float* %phi2, i64 -2
- %gep4 = getelementptr float* %phi1, i64 -2
+ %gep3 = getelementptr float, float* %phi2, i64 -2
+ %gep4 = getelementptr float, float* %phi1, i64 -2
%cond = icmp sgt i64 %dec.i, 0
br i1 %cond, label %for.body, label %end
end:
@@ -751,15 +751,15 @@ for.body:
%phi1 = phi double* [ %gep4, %for.body ], [ %b, %0 ]
%phi2 = phi double* [ %gep3, %for.body ], [ %a, %0 ]
%i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
- %gep1 = getelementptr double* %phi1, i64 -1
- %load1 = load double* %gep1
- %gep2 = getelementptr double* %phi2, i64 -1
+ %gep1 = getelementptr double, double* %phi1, i64 -1
+ %load1 = load double, double* %gep1
+ %gep2 = getelementptr double, double* %phi2, i64 -1
store double %load1, double* %gep2
- %load2 = load double* %phi1
+ %load2 = load double, double* %phi1
store double %load2, double* %phi2
%dec.i = add nsw i64 %i, -1
- %gep3 = getelementptr double* %phi2, i64 -2
- %gep4 = getelementptr double* %phi1, i64 -2
+ %gep3 = getelementptr double, double* %phi2, i64 -2
+ %gep4 = getelementptr double, double* %phi1, i64 -2
%cond = icmp sgt i64 %dec.i, 0
br i1 %cond, label %for.body, label %end
end:
diff --git a/test/CodeGen/AArch64/ldst-regoffset.ll b/test/CodeGen/AArch64/ldst-regoffset.ll
index e2fa08bcce69..85d6db104b66 100644
--- a/test/CodeGen/AArch64/ldst-regoffset.ll
+++ b/test/CodeGen/AArch64/ldst-regoffset.ll
@@ -12,14 +12,14 @@
define void @ldst_8bit(i8* %base, i32 %off32, i64 %off64) minsize {
; CHECK-LABEL: ldst_8bit:
- %addr8_sxtw = getelementptr i8* %base, i32 %off32
- %val8_sxtw = load volatile i8* %addr8_sxtw
+ %addr8_sxtw = getelementptr i8, i8* %base, i32 %off32
+ %val8_sxtw = load volatile i8, i8* %addr8_sxtw
%val32_signed = sext i8 %val8_sxtw to i32
store volatile i32 %val32_signed, i32* @var_32bit
; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, {{[wx][0-9]+}}, sxtw]
- %addr_lsl = getelementptr i8* %base, i64 %off64
- %val8_lsl = load volatile i8* %addr_lsl
+ %addr_lsl = getelementptr i8, i8* %base, i64 %off64
+ %val8_lsl = load volatile i8, i8* %addr_lsl
%val32_unsigned = zext i8 %val8_lsl to i32
store volatile i32 %val32_unsigned, i32* @var_32bit
; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
@@ -28,7 +28,7 @@ define void @ldst_8bit(i8* %base, i32 %off32, i64 %off64) minsize {
%offset_uxtw = zext i32 %off32 to i64
%addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
%addr_uxtw = inttoptr i64 %addrint1_uxtw to i8*
- %val8_uxtw = load volatile i8* %addr_uxtw
+ %val8_uxtw = load volatile i8, i8* %addr_uxtw
%newval8 = add i8 %val8_uxtw, 1
store volatile i8 %newval8, i8* @var_8bit
; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
@@ -40,14 +40,14 @@ define void @ldst_8bit(i8* %base, i32 %off32, i64 %off64) minsize {
define void @ldst_16bit(i16* %base, i32 %off32, i64 %off64) minsize {
; CHECK-LABEL: ldst_16bit:
- %addr8_sxtwN = getelementptr i16* %base, i32 %off32
- %val8_sxtwN = load volatile i16* %addr8_sxtwN
+ %addr8_sxtwN = getelementptr i16, i16* %base, i32 %off32
+ %val8_sxtwN = load volatile i16, i16* %addr8_sxtwN
%val32_signed = sext i16 %val8_sxtwN to i32
store volatile i32 %val32_signed, i32* @var_32bit
; CHECK: ldrsh {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #1]
- %addr_lslN = getelementptr i16* %base, i64 %off64
- %val8_lslN = load volatile i16* %addr_lslN
+ %addr_lslN = getelementptr i16, i16* %base, i64 %off64
+ %val8_lslN = load volatile i16, i16* %addr_lslN
%val32_unsigned = zext i16 %val8_lslN to i32
store volatile i32 %val32_unsigned, i32* @var_32bit
; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #1]
@@ -56,7 +56,7 @@ define void @ldst_16bit(i16* %base, i32 %off32, i64 %off64) minsize {
%offset_uxtw = zext i32 %off32 to i64
%addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
%addr_uxtw = inttoptr i64 %addrint1_uxtw to i16*
- %val8_uxtw = load volatile i16* %addr_uxtw
+ %val8_uxtw = load volatile i16, i16* %addr_uxtw
%newval8 = add i16 %val8_uxtw, 1
store volatile i16 %newval8, i16* @var_16bit
; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
@@ -65,7 +65,7 @@ define void @ldst_16bit(i16* %base, i32 %off32, i64 %off64) minsize {
%offset_sxtw = sext i32 %off32 to i64
%addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
%addr_sxtw = inttoptr i64 %addrint_sxtw to i16*
- %val16_sxtw = load volatile i16* %addr_sxtw
+ %val16_sxtw = load volatile i16, i16* %addr_sxtw
%val64_signed = sext i16 %val16_sxtw to i64
store volatile i64 %val64_signed, i64* @var_64bit
; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, {{[wx][0-9]+}}, sxtw]
@@ -74,7 +74,7 @@ define void @ldst_16bit(i16* %base, i32 %off32, i64 %off64) minsize {
%base_lsl = ptrtoint i16* %base to i64
%addrint_lsl = add i64 %base_lsl, %off64
%addr_lsl = inttoptr i64 %addrint_lsl to i16*
- %val16_lsl = load volatile i16* %addr_lsl
+ %val16_lsl = load volatile i16, i16* %addr_lsl
%val64_unsigned = zext i16 %val16_lsl to i64
store volatile i64 %val64_unsigned, i64* @var_64bit
; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
@@ -84,7 +84,7 @@ define void @ldst_16bit(i16* %base, i32 %off32, i64 %off64) minsize {
%offset2_uxtwN = shl i64 %offset_uxtwN, 1
%addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
%addr_uxtwN = inttoptr i64 %addrint_uxtwN to i16*
- %val32 = load volatile i32* @var_32bit
+ %val32 = load volatile i32, i32* @var_32bit
%val16_trunc32 = trunc i32 %val32 to i16
store volatile i16 %val16_trunc32, i16* %addr_uxtwN
; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw #1]
@@ -94,13 +94,13 @@ define void @ldst_16bit(i16* %base, i32 %off32, i64 %off64) minsize {
define void @ldst_32bit(i32* %base, i32 %off32, i64 %off64) minsize {
; CHECK-LABEL: ldst_32bit:
- %addr_sxtwN = getelementptr i32* %base, i32 %off32
- %val_sxtwN = load volatile i32* %addr_sxtwN
+ %addr_sxtwN = getelementptr i32, i32* %base, i32 %off32
+ %val_sxtwN = load volatile i32, i32* %addr_sxtwN
store volatile i32 %val_sxtwN, i32* @var_32bit
; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #2]
- %addr_lslN = getelementptr i32* %base, i64 %off64
- %val_lslN = load volatile i32* %addr_lslN
+ %addr_lslN = getelementptr i32, i32* %base, i64 %off64
+ %val_lslN = load volatile i32, i32* %addr_lslN
store volatile i32 %val_lslN, i32* @var_32bit
; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #2]
@@ -108,7 +108,7 @@ define void @ldst_32bit(i32* %base, i32 %off32, i64 %off64) minsize {
%offset_uxtw = zext i32 %off32 to i64
%addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
%addr_uxtw = inttoptr i64 %addrint1_uxtw to i32*
- %val_uxtw = load volatile i32* %addr_uxtw
+ %val_uxtw = load volatile i32, i32* %addr_uxtw
%newval8 = add i32 %val_uxtw, 1
store volatile i32 %newval8, i32* @var_32bit
; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
@@ -118,7 +118,7 @@ define void @ldst_32bit(i32* %base, i32 %off32, i64 %off64) minsize {
%offset_sxtw = sext i32 %off32 to i64
%addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
%addr_sxtw = inttoptr i64 %addrint_sxtw to i32*
- %val16_sxtw = load volatile i32* %addr_sxtw
+ %val16_sxtw = load volatile i32, i32* %addr_sxtw
%val64_signed = sext i32 %val16_sxtw to i64
store volatile i64 %val64_signed, i64* @var_64bit
; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw]
@@ -127,7 +127,7 @@ define void @ldst_32bit(i32* %base, i32 %off32, i64 %off64) minsize {
%base_lsl = ptrtoint i32* %base to i64
%addrint_lsl = add i64 %base_lsl, %off64
%addr_lsl = inttoptr i64 %addrint_lsl to i32*
- %val16_lsl = load volatile i32* %addr_lsl
+ %val16_lsl = load volatile i32, i32* %addr_lsl
%val64_unsigned = zext i32 %val16_lsl to i64
store volatile i64 %val64_unsigned, i64* @var_64bit
; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
@@ -137,7 +137,7 @@ define void @ldst_32bit(i32* %base, i32 %off32, i64 %off64) minsize {
%offset2_uxtwN = shl i64 %offset_uxtwN, 2
%addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
%addr_uxtwN = inttoptr i64 %addrint_uxtwN to i32*
- %val32 = load volatile i32* @var_32bit
+ %val32 = load volatile i32, i32* @var_32bit
store volatile i32 %val32, i32* %addr_uxtwN
; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw #2]
ret void
@@ -146,13 +146,13 @@ define void @ldst_32bit(i32* %base, i32 %off32, i64 %off64) minsize {
define void @ldst_64bit(i64* %base, i32 %off32, i64 %off64) minsize {
; CHECK-LABEL: ldst_64bit:
- %addr_sxtwN = getelementptr i64* %base, i32 %off32
- %val_sxtwN = load volatile i64* %addr_sxtwN
+ %addr_sxtwN = getelementptr i64, i64* %base, i32 %off32
+ %val_sxtwN = load volatile i64, i64* %addr_sxtwN
store volatile i64 %val_sxtwN, i64* @var_64bit
; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #3]
- %addr_lslN = getelementptr i64* %base, i64 %off64
- %val_lslN = load volatile i64* %addr_lslN
+ %addr_lslN = getelementptr i64, i64* %base, i64 %off64
+ %val_lslN = load volatile i64, i64* %addr_lslN
store volatile i64 %val_lslN, i64* @var_64bit
; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #3]
@@ -160,7 +160,7 @@ define void @ldst_64bit(i64* %base, i32 %off32, i64 %off64) minsize {
%offset_uxtw = zext i32 %off32 to i64
%addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
%addr_uxtw = inttoptr i64 %addrint1_uxtw to i64*
- %val8_uxtw = load volatile i64* %addr_uxtw
+ %val8_uxtw = load volatile i64, i64* %addr_uxtw
%newval8 = add i64 %val8_uxtw, 1
store volatile i64 %newval8, i64* @var_64bit
; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
@@ -169,14 +169,14 @@ define void @ldst_64bit(i64* %base, i32 %off32, i64 %off64) minsize {
%offset_sxtw = sext i32 %off32 to i64
%addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
%addr_sxtw = inttoptr i64 %addrint_sxtw to i64*
- %val64_sxtw = load volatile i64* %addr_sxtw
+ %val64_sxtw = load volatile i64, i64* %addr_sxtw
store volatile i64 %val64_sxtw, i64* @var_64bit
; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw]
%base_lsl = ptrtoint i64* %base to i64
%addrint_lsl = add i64 %base_lsl, %off64
%addr_lsl = inttoptr i64 %addrint_lsl to i64*
- %val64_lsl = load volatile i64* %addr_lsl
+ %val64_lsl = load volatile i64, i64* %addr_lsl
store volatile i64 %val64_lsl, i64* @var_64bit
; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
@@ -185,7 +185,7 @@ define void @ldst_64bit(i64* %base, i32 %off32, i64 %off64) minsize {
%offset2_uxtwN = shl i64 %offset_uxtwN, 3
%addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
%addr_uxtwN = inttoptr i64 %addrint_uxtwN to i64*
- %val64 = load volatile i64* @var_64bit
+ %val64 = load volatile i64, i64* @var_64bit
store volatile i64 %val64, i64* %addr_uxtwN
; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw #3]
ret void
@@ -194,14 +194,14 @@ define void @ldst_64bit(i64* %base, i32 %off32, i64 %off64) minsize {
define void @ldst_float(float* %base, i32 %off32, i64 %off64) minsize {
; CHECK-LABEL: ldst_float:
- %addr_sxtwN = getelementptr float* %base, i32 %off32
- %val_sxtwN = load volatile float* %addr_sxtwN
+ %addr_sxtwN = getelementptr float, float* %base, i32 %off32
+ %val_sxtwN = load volatile float, float* %addr_sxtwN
store volatile float %val_sxtwN, float* @var_float
; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #2]
; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
- %addr_lslN = getelementptr float* %base, i64 %off64
- %val_lslN = load volatile float* %addr_lslN
+ %addr_lslN = getelementptr float, float* %base, i64 %off64
+ %val_lslN = load volatile float, float* %addr_lslN
store volatile float %val_lslN, float* @var_float
; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #2]
; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
@@ -210,7 +210,7 @@ define void @ldst_float(float* %base, i32 %off32, i64 %off64) minsize {
%offset_uxtw = zext i32 %off32 to i64
%addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
%addr_uxtw = inttoptr i64 %addrint1_uxtw to float*
- %val_uxtw = load volatile float* %addr_uxtw
+ %val_uxtw = load volatile float, float* %addr_uxtw
store volatile float %val_uxtw, float* @var_float
; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
@@ -219,7 +219,7 @@ define void @ldst_float(float* %base, i32 %off32, i64 %off64) minsize {
%offset_sxtw = sext i32 %off32 to i64
%addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
%addr_sxtw = inttoptr i64 %addrint_sxtw to float*
- %val64_sxtw = load volatile float* %addr_sxtw
+ %val64_sxtw = load volatile float, float* %addr_sxtw
store volatile float %val64_sxtw, float* @var_float
; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw]
; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
@@ -227,7 +227,7 @@ define void @ldst_float(float* %base, i32 %off32, i64 %off64) minsize {
%base_lsl = ptrtoint float* %base to i64
%addrint_lsl = add i64 %base_lsl, %off64
%addr_lsl = inttoptr i64 %addrint_lsl to float*
- %val64_lsl = load volatile float* %addr_lsl
+ %val64_lsl = load volatile float, float* %addr_lsl
store volatile float %val64_lsl, float* @var_float
; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
@@ -237,7 +237,7 @@ define void @ldst_float(float* %base, i32 %off32, i64 %off64) minsize {
%offset2_uxtwN = shl i64 %offset_uxtwN, 2
%addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
%addr_uxtwN = inttoptr i64 %addrint_uxtwN to float*
- %val64 = load volatile float* @var_float
+ %val64 = load volatile float, float* @var_float
store volatile float %val64, float* %addr_uxtwN
; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw #2]
; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
@@ -247,14 +247,14 @@ define void @ldst_float(float* %base, i32 %off32, i64 %off64) minsize {
define void @ldst_double(double* %base, i32 %off32, i64 %off64) minsize {
; CHECK-LABEL: ldst_double:
- %addr_sxtwN = getelementptr double* %base, i32 %off32
- %val_sxtwN = load volatile double* %addr_sxtwN
+ %addr_sxtwN = getelementptr double, double* %base, i32 %off32
+ %val_sxtwN = load volatile double, double* %addr_sxtwN
store volatile double %val_sxtwN, double* @var_double
; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #3]
; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
- %addr_lslN = getelementptr double* %base, i64 %off64
- %val_lslN = load volatile double* %addr_lslN
+ %addr_lslN = getelementptr double, double* %base, i64 %off64
+ %val_lslN = load volatile double, double* %addr_lslN
store volatile double %val_lslN, double* @var_double
; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #3]
; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
@@ -263,7 +263,7 @@ define void @ldst_double(double* %base, i32 %off32, i64 %off64) minsize {
%offset_uxtw = zext i32 %off32 to i64
%addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
%addr_uxtw = inttoptr i64 %addrint1_uxtw to double*
- %val_uxtw = load volatile double* %addr_uxtw
+ %val_uxtw = load volatile double, double* %addr_uxtw
store volatile double %val_uxtw, double* @var_double
; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
@@ -272,7 +272,7 @@ define void @ldst_double(double* %base, i32 %off32, i64 %off64) minsize {
%offset_sxtw = sext i32 %off32 to i64
%addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
%addr_sxtw = inttoptr i64 %addrint_sxtw to double*
- %val64_sxtw = load volatile double* %addr_sxtw
+ %val64_sxtw = load volatile double, double* %addr_sxtw
store volatile double %val64_sxtw, double* @var_double
; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw]
; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
@@ -280,7 +280,7 @@ define void @ldst_double(double* %base, i32 %off32, i64 %off64) minsize {
%base_lsl = ptrtoint double* %base to i64
%addrint_lsl = add i64 %base_lsl, %off64
%addr_lsl = inttoptr i64 %addrint_lsl to double*
- %val64_lsl = load volatile double* %addr_lsl
+ %val64_lsl = load volatile double, double* %addr_lsl
store volatile double %val64_lsl, double* @var_double
; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
@@ -290,7 +290,7 @@ define void @ldst_double(double* %base, i32 %off32, i64 %off64) minsize {
%offset2_uxtwN = shl i64 %offset_uxtwN, 3
%addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
%addr_uxtwN = inttoptr i64 %addrint_uxtwN to double*
- %val64 = load volatile double* @var_double
+ %val64 = load volatile double, double* @var_double
store volatile double %val64, double* %addr_uxtwN
; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw #3]
; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
@@ -301,14 +301,14 @@ define void @ldst_double(double* %base, i32 %off32, i64 %off64) minsize {
define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) minsize {
; CHECK-LABEL: ldst_128bit:
- %addr_sxtwN = getelementptr fp128* %base, i32 %off32
- %val_sxtwN = load volatile fp128* %addr_sxtwN
+ %addr_sxtwN = getelementptr fp128, fp128* %base, i32 %off32
+ %val_sxtwN = load volatile fp128, fp128* %addr_sxtwN
store volatile fp128 %val_sxtwN, fp128* %base
; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
- %addr_lslN = getelementptr fp128* %base, i64 %off64
- %val_lslN = load volatile fp128* %addr_lslN
+ %addr_lslN = getelementptr fp128, fp128* %base, i64 %off64
+ %val_lslN = load volatile fp128, fp128* %addr_lslN
store volatile fp128 %val_lslN, fp128* %base
; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #4]
; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
@@ -317,7 +317,7 @@ define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) minsize {
%offset_uxtw = zext i32 %off32 to i64
%addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
%addr_uxtw = inttoptr i64 %addrint1_uxtw to fp128*
- %val_uxtw = load volatile fp128* %addr_uxtw
+ %val_uxtw = load volatile fp128, fp128* %addr_uxtw
store volatile fp128 %val_uxtw, fp128* %base
; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
@@ -326,7 +326,7 @@ define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) minsize {
%offset_sxtw = sext i32 %off32 to i64
%addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
%addr_sxtw = inttoptr i64 %addrint_sxtw to fp128*
- %val64_sxtw = load volatile fp128* %addr_sxtw
+ %val64_sxtw = load volatile fp128, fp128* %addr_sxtw
store volatile fp128 %val64_sxtw, fp128* %base
; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw]
; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
@@ -334,7 +334,7 @@ define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) minsize {
%base_lsl = ptrtoint fp128* %base to i64
%addrint_lsl = add i64 %base_lsl, %off64
%addr_lsl = inttoptr i64 %addrint_lsl to fp128*
- %val64_lsl = load volatile fp128* %addr_lsl
+ %val64_lsl = load volatile fp128, fp128* %addr_lsl
store volatile fp128 %val64_lsl, fp128* %base
; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
@@ -344,7 +344,7 @@ define void @ldst_128bit(fp128* %base, i32 %off32, i64 %off64) minsize {
%offset2_uxtwN = shl i64 %offset_uxtwN, 4
%addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
%addr_uxtwN = inttoptr i64 %addrint_uxtwN to fp128*
- %val64 = load volatile fp128* %base
+ %val64 = load volatile fp128, fp128* %base
store volatile fp128 %val64, fp128* %addr_uxtwN
; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw #4]
; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
diff --git a/test/CodeGen/AArch64/ldst-unscaledimm.ll b/test/CodeGen/AArch64/ldst-unscaledimm.ll
index 1de8443d9ed2..a3625975822c 100644
--- a/test/CodeGen/AArch64/ldst-unscaledimm.ll
+++ b/test/CodeGen/AArch64/ldst-unscaledimm.ll
@@ -16,32 +16,32 @@ define void @ldst_8bit() {
; No architectural support for loads to 16-bit or 8-bit since we
; promote i8 during lowering.
- %addr_8bit = load i8** @varptr
+ %addr_8bit = load i8*, i8** @varptr
; match a sign-extending load 8-bit -> 32-bit
- %addr_sext32 = getelementptr i8* %addr_8bit, i64 -256
- %val8_sext32 = load volatile i8* %addr_sext32
+ %addr_sext32 = getelementptr i8, i8* %addr_8bit, i64 -256
+ %val8_sext32 = load volatile i8, i8* %addr_sext32
%val32_signed = sext i8 %val8_sext32 to i32
store volatile i32 %val32_signed, i32* @var_32bit
; CHECK: ldursb {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
; match a zero-extending load volatile 8-bit -> 32-bit
- %addr_zext32 = getelementptr i8* %addr_8bit, i64 -12
- %val8_zext32 = load volatile i8* %addr_zext32
+ %addr_zext32 = getelementptr i8, i8* %addr_8bit, i64 -12
+ %val8_zext32 = load volatile i8, i8* %addr_zext32
%val32_unsigned = zext i8 %val8_zext32 to i32
store volatile i32 %val32_unsigned, i32* @var_32bit
; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-12]
; match an any-extending load volatile 8-bit -> 32-bit
- %addr_anyext = getelementptr i8* %addr_8bit, i64 -1
- %val8_anyext = load volatile i8* %addr_anyext
+ %addr_anyext = getelementptr i8, i8* %addr_8bit, i64 -1
+ %val8_anyext = load volatile i8, i8* %addr_anyext
%newval8 = add i8 %val8_anyext, 1
store volatile i8 %newval8, i8* @var_8bit
; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
; match a sign-extending load volatile 8-bit -> 64-bit
- %addr_sext64 = getelementptr i8* %addr_8bit, i64 -5
- %val8_sext64 = load volatile i8* %addr_sext64
+ %addr_sext64 = getelementptr i8, i8* %addr_8bit, i64 -5
+ %val8_sext64 = load volatile i8, i8* %addr_sext64
%val64_signed = sext i8 %val8_sext64 to i64
store volatile i64 %val64_signed, i64* @var_64bit
; CHECK: ldursb {{x[0-9]+}}, [{{x[0-9]+}}, #-5]
@@ -49,22 +49,22 @@ define void @ldst_8bit() {
; match a zero-extending load volatile 8-bit -> 64-bit.
; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
; of x0 so it's identical to load volatileing to 32-bits.
- %addr_zext64 = getelementptr i8* %addr_8bit, i64 -9
- %val8_zext64 = load volatile i8* %addr_zext64
+ %addr_zext64 = getelementptr i8, i8* %addr_8bit, i64 -9
+ %val8_zext64 = load volatile i8, i8* %addr_zext64
%val64_unsigned = zext i8 %val8_zext64 to i64
store volatile i64 %val64_unsigned, i64* @var_64bit
; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-9]
; truncating store volatile 32-bits to 8-bits
- %addr_trunc32 = getelementptr i8* %addr_8bit, i64 -256
- %val32 = load volatile i32* @var_32bit
+ %addr_trunc32 = getelementptr i8, i8* %addr_8bit, i64 -256
+ %val32 = load volatile i32, i32* @var_32bit
%val8_trunc32 = trunc i32 %val32 to i8
store volatile i8 %val8_trunc32, i8* %addr_trunc32
; CHECK: sturb {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
; truncating store volatile 64-bits to 8-bits
- %addr_trunc64 = getelementptr i8* %addr_8bit, i64 -1
- %val64 = load volatile i64* @var_64bit
+ %addr_trunc64 = getelementptr i8, i8* %addr_8bit, i64 -1
+ %val64 = load volatile i64, i64* @var_64bit
%val8_trunc64 = trunc i64 %val64 to i8
store volatile i8 %val8_trunc64, i8* %addr_trunc64
; CHECK: sturb {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
@@ -77,36 +77,36 @@ define void @ldst_16bit() {
; No architectural support for loads to 16-bit or 16-bit since we
; promote i16 during lowering.
- %addr_8bit = load i8** @varptr
+ %addr_8bit = load i8*, i8** @varptr
; match a sign-extending load 16-bit -> 32-bit
- %addr8_sext32 = getelementptr i8* %addr_8bit, i64 -256
+ %addr8_sext32 = getelementptr i8, i8* %addr_8bit, i64 -256
%addr_sext32 = bitcast i8* %addr8_sext32 to i16*
- %val16_sext32 = load volatile i16* %addr_sext32
+ %val16_sext32 = load volatile i16, i16* %addr_sext32
%val32_signed = sext i16 %val16_sext32 to i32
store volatile i32 %val32_signed, i32* @var_32bit
; CHECK: ldursh {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
; match a zero-extending load volatile 16-bit -> 32-bit. With offset that would be unaligned.
- %addr8_zext32 = getelementptr i8* %addr_8bit, i64 15
+ %addr8_zext32 = getelementptr i8, i8* %addr_8bit, i64 15
%addr_zext32 = bitcast i8* %addr8_zext32 to i16*
- %val16_zext32 = load volatile i16* %addr_zext32
+ %val16_zext32 = load volatile i16, i16* %addr_zext32
%val32_unsigned = zext i16 %val16_zext32 to i32
store volatile i32 %val32_unsigned, i32* @var_32bit
; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #15]
; match an any-extending load volatile 16-bit -> 32-bit
- %addr8_anyext = getelementptr i8* %addr_8bit, i64 -1
+ %addr8_anyext = getelementptr i8, i8* %addr_8bit, i64 -1
%addr_anyext = bitcast i8* %addr8_anyext to i16*
- %val16_anyext = load volatile i16* %addr_anyext
+ %val16_anyext = load volatile i16, i16* %addr_anyext
%newval16 = add i16 %val16_anyext, 1
store volatile i16 %newval16, i16* @var_16bit
; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
; match a sign-extending load volatile 16-bit -> 64-bit
- %addr8_sext64 = getelementptr i8* %addr_8bit, i64 -5
+ %addr8_sext64 = getelementptr i8, i8* %addr_8bit, i64 -5
%addr_sext64 = bitcast i8* %addr8_sext64 to i16*
- %val16_sext64 = load volatile i16* %addr_sext64
+ %val16_sext64 = load volatile i16, i16* %addr_sext64
%val64_signed = sext i16 %val16_sext64 to i64
store volatile i64 %val64_signed, i64* @var_64bit
; CHECK: ldursh {{x[0-9]+}}, [{{x[0-9]+}}, #-5]
@@ -114,25 +114,25 @@ define void @ldst_16bit() {
; match a zero-extending load volatile 16-bit -> 64-bit.
; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
; of x0 so it's identical to load volatileing to 32-bits.
- %addr8_zext64 = getelementptr i8* %addr_8bit, i64 9
+ %addr8_zext64 = getelementptr i8, i8* %addr_8bit, i64 9
%addr_zext64 = bitcast i8* %addr8_zext64 to i16*
- %val16_zext64 = load volatile i16* %addr_zext64
+ %val16_zext64 = load volatile i16, i16* %addr_zext64
%val64_unsigned = zext i16 %val16_zext64 to i64
store volatile i64 %val64_unsigned, i64* @var_64bit
; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #9]
; truncating store volatile 32-bits to 16-bits
- %addr8_trunc32 = getelementptr i8* %addr_8bit, i64 -256
+ %addr8_trunc32 = getelementptr i8, i8* %addr_8bit, i64 -256
%addr_trunc32 = bitcast i8* %addr8_trunc32 to i16*
- %val32 = load volatile i32* @var_32bit
+ %val32 = load volatile i32, i32* @var_32bit
%val16_trunc32 = trunc i32 %val32 to i16
store volatile i16 %val16_trunc32, i16* %addr_trunc32
; CHECK: sturh {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
; truncating store volatile 64-bits to 16-bits
- %addr8_trunc64 = getelementptr i8* %addr_8bit, i64 -1
+ %addr8_trunc64 = getelementptr i8, i8* %addr_8bit, i64 -1
%addr_trunc64 = bitcast i8* %addr8_trunc64 to i16*
- %val64 = load volatile i64* @var_64bit
+ %val64 = load volatile i64, i64* @var_64bit
%val16_trunc64 = trunc i64 %val64 to i16
store volatile i16 %val16_trunc64, i16* %addr_trunc64
; CHECK: sturh {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
@@ -143,41 +143,41 @@ define void @ldst_16bit() {
define void @ldst_32bit() {
; CHECK-LABEL: ldst_32bit:
- %addr_8bit = load i8** @varptr
+ %addr_8bit = load i8*, i8** @varptr
; Straight 32-bit load/store
- %addr32_8_noext = getelementptr i8* %addr_8bit, i64 1
+ %addr32_8_noext = getelementptr i8, i8* %addr_8bit, i64 1
%addr32_noext = bitcast i8* %addr32_8_noext to i32*
- %val32_noext = load volatile i32* %addr32_noext
+ %val32_noext = load volatile i32, i32* %addr32_noext
store volatile i32 %val32_noext, i32* %addr32_noext
; CHECK: ldur {{w[0-9]+}}, [{{x[0-9]+}}, #1]
; CHECK: stur {{w[0-9]+}}, [{{x[0-9]+}}, #1]
; Zero-extension to 64-bits
- %addr32_8_zext = getelementptr i8* %addr_8bit, i64 -256
+ %addr32_8_zext = getelementptr i8, i8* %addr_8bit, i64 -256
%addr32_zext = bitcast i8* %addr32_8_zext to i32*
- %val32_zext = load volatile i32* %addr32_zext
+ %val32_zext = load volatile i32, i32* %addr32_zext
%val64_unsigned = zext i32 %val32_zext to i64
store volatile i64 %val64_unsigned, i64* @var_64bit
; CHECK: ldur {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_64bit]
; Sign-extension to 64-bits
- %addr32_8_sext = getelementptr i8* %addr_8bit, i64 -12
+ %addr32_8_sext = getelementptr i8, i8* %addr_8bit, i64 -12
%addr32_sext = bitcast i8* %addr32_8_sext to i32*
- %val32_sext = load volatile i32* %addr32_sext
+ %val32_sext = load volatile i32, i32* %addr32_sext
%val64_signed = sext i32 %val32_sext to i64
store volatile i64 %val64_signed, i64* @var_64bit
; CHECK: ldursw {{x[0-9]+}}, [{{x[0-9]+}}, #-12]
; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_64bit]
; Truncation from 64-bits
- %addr64_8_trunc = getelementptr i8* %addr_8bit, i64 255
+ %addr64_8_trunc = getelementptr i8, i8* %addr_8bit, i64 255
%addr64_trunc = bitcast i8* %addr64_8_trunc to i64*
- %addr32_8_trunc = getelementptr i8* %addr_8bit, i64 -20
+ %addr32_8_trunc = getelementptr i8, i8* %addr_8bit, i64 -20
%addr32_trunc = bitcast i8* %addr32_8_trunc to i32*
- %val64_trunc = load volatile i64* %addr64_trunc
+ %val64_trunc = load volatile i64, i64* %addr64_trunc
%val32_trunc = trunc i64 %val64_trunc to i32
store volatile i32 %val32_trunc, i32* %addr32_trunc
; CHECK: ldur {{x[0-9]+}}, [{{x[0-9]+}}, #255]
@@ -189,11 +189,11 @@ define void @ldst_32bit() {
define void @ldst_float() {
; CHECK-LABEL: ldst_float:
- %addr_8bit = load i8** @varptr
- %addrfp_8 = getelementptr i8* %addr_8bit, i64 -5
+ %addr_8bit = load i8*, i8** @varptr
+ %addrfp_8 = getelementptr i8, i8* %addr_8bit, i64 -5
%addrfp = bitcast i8* %addrfp_8 to float*
- %valfp = load volatile float* %addrfp
+ %valfp = load volatile float, float* %addrfp
; CHECK: ldur {{s[0-9]+}}, [{{x[0-9]+}}, #-5]
; CHECK-NOFP-NOT: ldur {{s[0-9]+}},
@@ -207,11 +207,11 @@ define void @ldst_float() {
define void @ldst_double() {
; CHECK-LABEL: ldst_double:
- %addr_8bit = load i8** @varptr
- %addrfp_8 = getelementptr i8* %addr_8bit, i64 4
+ %addr_8bit = load i8*, i8** @varptr
+ %addrfp_8 = getelementptr i8, i8* %addr_8bit, i64 4
%addrfp = bitcast i8* %addrfp_8 to double*
- %valfp = load volatile double* %addrfp
+ %valfp = load volatile double, double* %addrfp
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #4]
; CHECK-NOFP-NOT: ldur {{d[0-9]+}},
diff --git a/test/CodeGen/AArch64/ldst-unsignedimm.ll b/test/CodeGen/AArch64/ldst-unsignedimm.ll
index e171d22b6c7c..9777d3ed0ff9 100644
--- a/test/CodeGen/AArch64/ldst-unsignedimm.ll
+++ b/test/CodeGen/AArch64/ldst-unsignedimm.ll
@@ -16,26 +16,26 @@ define void @ldst_8bit() {
; promote i8 during lowering.
; match a sign-extending load 8-bit -> 32-bit
- %val8_sext32 = load volatile i8* @var_8bit
+ %val8_sext32 = load volatile i8, i8* @var_8bit
%val32_signed = sext i8 %val8_sext32 to i32
store volatile i32 %val32_signed, i32* @var_32bit
; CHECK: adrp {{x[0-9]+}}, var_8bit
; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
; match a zero-extending load volatile 8-bit -> 32-bit
- %val8_zext32 = load volatile i8* @var_8bit
+ %val8_zext32 = load volatile i8, i8* @var_8bit
%val32_unsigned = zext i8 %val8_zext32 to i32
store volatile i32 %val32_unsigned, i32* @var_32bit
; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
; match an any-extending load volatile 8-bit -> 32-bit
- %val8_anyext = load volatile i8* @var_8bit
+ %val8_anyext = load volatile i8, i8* @var_8bit
%newval8 = add i8 %val8_anyext, 1
store volatile i8 %newval8, i8* @var_8bit
; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
; match a sign-extending load volatile 8-bit -> 64-bit
- %val8_sext64 = load volatile i8* @var_8bit
+ %val8_sext64 = load volatile i8, i8* @var_8bit
%val64_signed = sext i8 %val8_sext64 to i64
store volatile i64 %val64_signed, i64* @var_64bit
; CHECK: ldrsb {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
@@ -43,19 +43,19 @@ define void @ldst_8bit() {
; match a zero-extending load volatile 8-bit -> 64-bit.
; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
; of x0 so it's identical to load volatileing to 32-bits.
- %val8_zext64 = load volatile i8* @var_8bit
+ %val8_zext64 = load volatile i8, i8* @var_8bit
%val64_unsigned = zext i8 %val8_zext64 to i64
store volatile i64 %val64_unsigned, i64* @var_64bit
; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
; truncating store volatile 32-bits to 8-bits
- %val32 = load volatile i32* @var_32bit
+ %val32 = load volatile i32, i32* @var_32bit
%val8_trunc32 = trunc i32 %val32 to i8
store volatile i8 %val8_trunc32, i8* @var_8bit
; CHECK: strb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
; truncating store volatile 64-bits to 8-bits
- %val64 = load volatile i64* @var_64bit
+ %val64 = load volatile i64, i64* @var_64bit
%val8_trunc64 = trunc i64 %val64 to i8
store volatile i8 %val8_trunc64, i8* @var_8bit
; CHECK: strb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
@@ -70,26 +70,26 @@ define void @ldst_16bit() {
; lowering.
; match a sign-extending load volatile 16-bit -> 32-bit
- %val16_sext32 = load volatile i16* @var_16bit
+ %val16_sext32 = load volatile i16, i16* @var_16bit
%val32_signed = sext i16 %val16_sext32 to i32
store volatile i32 %val32_signed, i32* @var_32bit
; CHECK: adrp {{x[0-9]+}}, var_16bit
; CHECK: ldrsh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
; match a zero-extending load volatile 16-bit -> 32-bit
- %val16_zext32 = load volatile i16* @var_16bit
+ %val16_zext32 = load volatile i16, i16* @var_16bit
%val32_unsigned = zext i16 %val16_zext32 to i32
store volatile i32 %val32_unsigned, i32* @var_32bit
; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
; match an any-extending load volatile 16-bit -> 32-bit
- %val16_anyext = load volatile i16* @var_16bit
+ %val16_anyext = load volatile i16, i16* @var_16bit
%newval16 = add i16 %val16_anyext, 1
store volatile i16 %newval16, i16* @var_16bit
; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
; match a sign-extending load volatile 16-bit -> 64-bit
- %val16_sext64 = load volatile i16* @var_16bit
+ %val16_sext64 = load volatile i16, i16* @var_16bit
%val64_signed = sext i16 %val16_sext64 to i64
store volatile i64 %val64_signed, i64* @var_64bit
; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
@@ -97,19 +97,19 @@ define void @ldst_16bit() {
; match a zero-extending load volatile 16-bit -> 64-bit.
; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
; of x0 so it's identical to load volatileing to 32-bits.
- %val16_zext64 = load volatile i16* @var_16bit
+ %val16_zext64 = load volatile i16, i16* @var_16bit
%val64_unsigned = zext i16 %val16_zext64 to i64
store volatile i64 %val64_unsigned, i64* @var_64bit
; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
; truncating store volatile 32-bits to 16-bits
- %val32 = load volatile i32* @var_32bit
+ %val32 = load volatile i32, i32* @var_32bit
%val16_trunc32 = trunc i32 %val32 to i16
store volatile i16 %val16_trunc32, i16* @var_16bit
; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
; truncating store volatile 64-bits to 16-bits
- %val64 = load volatile i64* @var_64bit
+ %val64 = load volatile i64, i64* @var_64bit
%val16_trunc64 = trunc i64 %val64 to i16
store volatile i16 %val16_trunc64, i16* @var_16bit
; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
@@ -121,28 +121,28 @@ define void @ldst_32bit() {
; CHECK-LABEL: ldst_32bit:
; Straight 32-bit load/store
- %val32_noext = load volatile i32* @var_32bit
+ %val32_noext = load volatile i32, i32* @var_32bit
store volatile i32 %val32_noext, i32* @var_32bit
; CHECK: adrp {{x[0-9]+}}, var_32bit
; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_32bit]
; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_32bit]
; Zero-extension to 64-bits
- %val32_zext = load volatile i32* @var_32bit
+ %val32_zext = load volatile i32, i32* @var_32bit
%val64_unsigned = zext i32 %val32_zext to i64
store volatile i64 %val64_unsigned, i64* @var_64bit
; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_32bit]
; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_64bit]
; Sign-extension to 64-bits
- %val32_sext = load volatile i32* @var_32bit
+ %val32_sext = load volatile i32, i32* @var_32bit
%val64_signed = sext i32 %val32_sext to i64
store volatile i64 %val64_signed, i64* @var_64bit
; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_32bit]
; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_64bit]
; Truncation from 64-bits
- %val64_trunc = load volatile i64* @var_64bit
+ %val64_trunc = load volatile i64, i64* @var_64bit
%val32_trunc = trunc i64 %val64_trunc to i32
store volatile i32 %val32_trunc, i32* @var_32bit
; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_64bit]
@@ -163,62 +163,62 @@ define void @ldst_32bit() {
define void @ldst_complex_offsets() {
; CHECK: ldst_complex_offsets
- %arr8_addr = load volatile i8** @arr8
+ %arr8_addr = load volatile i8*, i8** @arr8
; CHECK: adrp {{x[0-9]+}}, arr8
; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:arr8]
- %arr8_sub1_addr = getelementptr i8* %arr8_addr, i64 1
- %arr8_sub1 = load volatile i8* %arr8_sub1_addr
+ %arr8_sub1_addr = getelementptr i8, i8* %arr8_addr, i64 1
+ %arr8_sub1 = load volatile i8, i8* %arr8_sub1_addr
store volatile i8 %arr8_sub1, i8* @var_8bit
; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #1]
- %arr8_sub4095_addr = getelementptr i8* %arr8_addr, i64 4095
- %arr8_sub4095 = load volatile i8* %arr8_sub4095_addr
+ %arr8_sub4095_addr = getelementptr i8, i8* %arr8_addr, i64 4095
+ %arr8_sub4095 = load volatile i8, i8* %arr8_sub4095_addr
store volatile i8 %arr8_sub4095, i8* @var_8bit
; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #4095]
- %arr16_addr = load volatile i16** @arr16
+ %arr16_addr = load volatile i16*, i16** @arr16
; CHECK: adrp {{x[0-9]+}}, arr16
; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:arr16]
- %arr16_sub1_addr = getelementptr i16* %arr16_addr, i64 1
- %arr16_sub1 = load volatile i16* %arr16_sub1_addr
+ %arr16_sub1_addr = getelementptr i16, i16* %arr16_addr, i64 1
+ %arr16_sub1 = load volatile i16, i16* %arr16_sub1_addr
store volatile i16 %arr16_sub1, i16* @var_16bit
; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #2]
- %arr16_sub4095_addr = getelementptr i16* %arr16_addr, i64 4095
- %arr16_sub4095 = load volatile i16* %arr16_sub4095_addr
+ %arr16_sub4095_addr = getelementptr i16, i16* %arr16_addr, i64 4095
+ %arr16_sub4095 = load volatile i16, i16* %arr16_sub4095_addr
store volatile i16 %arr16_sub4095, i16* @var_16bit
; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #8190]
- %arr32_addr = load volatile i32** @arr32
+ %arr32_addr = load volatile i32*, i32** @arr32
; CHECK: adrp {{x[0-9]+}}, arr32
; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:arr32]
- %arr32_sub1_addr = getelementptr i32* %arr32_addr, i64 1
- %arr32_sub1 = load volatile i32* %arr32_sub1_addr
+ %arr32_sub1_addr = getelementptr i32, i32* %arr32_addr, i64 1
+ %arr32_sub1 = load volatile i32, i32* %arr32_sub1_addr
store volatile i32 %arr32_sub1, i32* @var_32bit
; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #4]
- %arr32_sub4095_addr = getelementptr i32* %arr32_addr, i64 4095
- %arr32_sub4095 = load volatile i32* %arr32_sub4095_addr
+ %arr32_sub4095_addr = getelementptr i32, i32* %arr32_addr, i64 4095
+ %arr32_sub4095 = load volatile i32, i32* %arr32_sub4095_addr
store volatile i32 %arr32_sub4095, i32* @var_32bit
; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #16380]
- %arr64_addr = load volatile i64** @arr64
+ %arr64_addr = load volatile i64*, i64** @arr64
; CHECK: adrp {{x[0-9]+}}, arr64
; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:arr64]
- %arr64_sub1_addr = getelementptr i64* %arr64_addr, i64 1
- %arr64_sub1 = load volatile i64* %arr64_sub1_addr
+ %arr64_sub1_addr = getelementptr i64, i64* %arr64_addr, i64 1
+ %arr64_sub1 = load volatile i64, i64* %arr64_sub1_addr
store volatile i64 %arr64_sub1, i64* @var_64bit
; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #8]
- %arr64_sub4095_addr = getelementptr i64* %arr64_addr, i64 4095
- %arr64_sub4095 = load volatile i64* %arr64_sub4095_addr
+ %arr64_sub4095_addr = getelementptr i64, i64* %arr64_addr, i64 4095
+ %arr64_sub4095 = load volatile i64, i64* %arr64_sub4095_addr
store volatile i64 %arr64_sub4095, i64* @var_64bit
; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #32760]
@@ -228,7 +228,7 @@ define void @ldst_complex_offsets() {
define void @ldst_float() {
; CHECK-LABEL: ldst_float:
- %valfp = load volatile float* @var_float
+ %valfp = load volatile float, float* @var_float
; CHECK: adrp {{x[0-9]+}}, var_float
; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_float]
; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
@@ -243,7 +243,7 @@ define void @ldst_float() {
define void @ldst_double() {
; CHECK-LABEL: ldst_double:
- %valfp = load volatile double* @var_double
+ %valfp = load volatile double, double* @var_double
; CHECK: adrp {{x[0-9]+}}, var_double
; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_double]
; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
diff --git a/test/CodeGen/AArch64/lit.local.cfg b/test/CodeGen/AArch64/lit.local.cfg
index 125995cebf11..f4626c7e0a43 100644
--- a/test/CodeGen/AArch64/lit.local.cfg
+++ b/test/CodeGen/AArch64/lit.local.cfg
@@ -6,5 +6,5 @@ if not 'AArch64' in config.root.targets:
config.unsupported = True
# For now we don't test arm64-win32.
-if re.search(r'cygwin|mingw32|win32', config.target_triple):
+if re.search(r'cygwin|mingw32|win32|windows-gnu|windows-msvc', config.target_triple):
config.unsupported = True
diff --git a/test/CodeGen/AArch64/literal_pools_float.ll b/test/CodeGen/AArch64/literal_pools_float.ll
index e53b8b62c6f3..f5d6a17f3a11 100644
--- a/test/CodeGen/AArch64/literal_pools_float.ll
+++ b/test/CodeGen/AArch64/literal_pools_float.ll
@@ -9,7 +9,7 @@
define void @floating_lits() {
; CHECK-LABEL: floating_lits:
- %floatval = load float* @varfloat
+ %floatval = load float, float* @varfloat
%newfloat = fadd float %floatval, 128.0
; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI[0-9]+_[0-9]+]]
; CHECK: ldr [[LIT128:s[0-9]+]], [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]]
@@ -26,7 +26,7 @@ define void @floating_lits() {
store float %newfloat, float* @varfloat
- %doubleval = load double* @vardouble
+ %doubleval = load double, double* @vardouble
%newdouble = fadd double %doubleval, 129.0
; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI[0-9]+_[0-9]+]]
; CHECK: ldr [[LIT129:d[0-9]+]], [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]]
diff --git a/test/CodeGen/AArch64/local_vars.ll b/test/CodeGen/AArch64/local_vars.ll
index 2f5b9f2adb48..332d660eef36 100644
--- a/test/CodeGen/AArch64/local_vars.ll
+++ b/test/CodeGen/AArch64/local_vars.ll
@@ -49,7 +49,7 @@ define void @stack_local() {
; CHECK-LABEL: stack_local:
; CHECK: sub sp, sp, #16
- %val = load i64* @var
+ %val = load i64, i64* @var
store i64 %val, i64* %local_var
; CHECK-DAG: str {{x[0-9]+}}, [sp, #{{[0-9]+}}]
diff --git a/test/CodeGen/AArch64/logical_shifted_reg.ll b/test/CodeGen/AArch64/logical_shifted_reg.ll
index b249d72e0f90..6b3246d1db8b 100644
--- a/test/CodeGen/AArch64/logical_shifted_reg.ll
+++ b/test/CodeGen/AArch64/logical_shifted_reg.ll
@@ -8,8 +8,8 @@
define void @logical_32bit() minsize {
; CHECK-LABEL: logical_32bit:
- %val1 = load i32* @var1_32
- %val2 = load i32* @var2_32
+ %val1 = load i32, i32* @var1_32
+ %val2 = load i32, i32* @var2_32
; First check basic and/bic/or/orn/eor/eon patterns with no shift
%neg_val2 = xor i32 -1, %val2
@@ -98,8 +98,8 @@ define void @logical_32bit() minsize {
define void @logical_64bit() minsize {
; CHECK-LABEL: logical_64bit:
- %val1 = load i64* @var1_64
- %val2 = load i64* @var2_64
+ %val1 = load i64, i64* @var1_64
+ %val2 = load i64, i64* @var2_64
; First check basic and/bic/or/orn/eor/eon patterns with no shift
%neg_val2 = xor i64 -1, %val2
@@ -191,8 +191,8 @@ define void @logical_64bit() minsize {
define void @flag_setting() {
; CHECK-LABEL: flag_setting:
- %val1 = load i64* @var1_64
- %val2 = load i64* @var2_64
+ %val1 = load i64, i64* @var1_64
+ %val2 = load i64, i64* @var2_64
; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}
; CHECK: b.gt .L
diff --git a/test/CodeGen/AArch64/machine-copy-prop.ll b/test/CodeGen/AArch64/machine-copy-prop.ll
index 92d877d40f59..6bacf852907e 100644
--- a/test/CodeGen/AArch64/machine-copy-prop.ll
+++ b/test/CodeGen/AArch64/machine-copy-prop.ll
@@ -47,7 +47,7 @@ if.end: ; preds = %if.then, %entry
%vld2_lane1.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2_lane1, 1
%t3 = bitcast <2 x i64>* %val1 to i8*
call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> %vld2_lane1.0.extract, <1 x i64> %vld2_lane1.1.extract, i8* %t3)
- %t4 = load <2 x i64>* %val1, align 16
+ %t4 = load <2 x i64>, <2 x i64>* %val1, align 16
%vsubhn = sub <2 x i64> <i64 11, i64 0>, %t4
%vsubhn1 = lshr <2 x i64> %vsubhn, <i64 32, i64 32>
%vsubhn2 = trunc <2 x i64> %vsubhn1 to <2 x i32>
diff --git a/test/CodeGen/AArch64/machine-sink-kill-flags.ll b/test/CodeGen/AArch64/machine-sink-kill-flags.ll
new file mode 100644
index 000000000000..590e1692ef8b
--- /dev/null
+++ b/test/CodeGen/AArch64/machine-sink-kill-flags.ll
@@ -0,0 +1,29 @@
+; RUN: llc %s -o - -fast-isel=true -O1 -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios8.0.0"
+
+; The machine verifier was asserting on this test because the AND instruction was
+; sunk below the test which killed %tmp340.
+; The kill flags on the test had to be cleared because the AND was going to read
+; registers in a BB after the test instruction.
+
+; CHECK: %bb343
+; CHECK: and
+
+define i32 @test(i32* %ptr) {
+bb:
+ br label %.thread
+
+.thread: ; preds = %.thread, %bb
+ %loc = phi i32 [ %next_iter, %.thread ], [ 0, %bb ]
+ %next_iter = lshr i32 %loc, 1
+ %tmp340 = sub i32 %loc, 1
+ %tmp341 = and i32 %tmp340, 1
+ %tmp342 = icmp eq i32 %tmp341, 0
+ br i1 %tmp342, label %bb343, label %.thread
+
+bb343: ; preds = %.thread
+ store i32 %tmp341, i32* %ptr, align 4
+ ret i32 -1
+}
diff --git a/test/CodeGen/AArch64/machine_cse.ll b/test/CodeGen/AArch64/machine_cse.ll
index bc9ab1078759..032199e62181 100644
--- a/test/CodeGen/AArch64/machine_cse.ll
+++ b/test/CodeGen/AArch64/machine_cse.ll
@@ -14,11 +14,11 @@ define void @combine-sign-comparisons-by-cse(i32 *%arg) {
; CHECK: b.le
entry:
- %a = load i32* @a, align 4
- %b = load i32* @b, align 4
- %c = load i32* @c, align 4
- %d = load i32* @d, align 4
- %e = load i32* @e, align 4
+ %a = load i32, i32* @a, align 4
+ %b = load i32, i32* @b, align 4
+ %c = load i32, i32* @c, align 4
+ %d = load i32, i32* @d, align 4
+ %e = load i32, i32* @e, align 4
%cmp = icmp slt i32 %a, %e
br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
diff --git a/test/CodeGen/AArch64/merge-store.ll b/test/CodeGen/AArch64/merge-store.ll
new file mode 100644
index 000000000000..18dbad4ce25b
--- /dev/null
+++ b/test/CodeGen/AArch64/merge-store.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march aarch64 %s -o - | FileCheck %s
+
+@g0 = external global <3 x float>, align 16
+@g1 = external global <3 x float>, align 4
+
+; CHECK: ldr s[[R0:[0-9]+]], {{\[}}[[R1:x[0-9]+]]{{\]}}, #4
+; CHECK: ld1{{\.?s?}} { v[[R0]]{{\.?s?}} }[1], {{\[}}[[R1]]{{\]}}
+; CHECK: str d[[R0]]
+
+define void @blam() {
+ %tmp4 = getelementptr inbounds <3 x float>, <3 x float>* @g1, i64 0, i64 0
+ %tmp5 = load <3 x float>, <3 x float>* @g0, align 16
+ %tmp6 = extractelement <3 x float> %tmp5, i64 0
+ store float %tmp6, float* %tmp4
+ %tmp7 = getelementptr inbounds float, float* %tmp4, i64 1
+ %tmp8 = load <3 x float>, <3 x float>* @g0, align 16
+ %tmp9 = extractelement <3 x float> %tmp8, i64 1
+ store float %tmp9, float* %tmp7
+ ret void;
+}
diff --git a/test/CodeGen/AArch64/minmax.ll b/test/CodeGen/AArch64/minmax.ll
new file mode 100644
index 000000000000..a6b5adebe107
--- /dev/null
+++ b/test/CodeGen/AArch64/minmax.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
+
+; CHECK-LABEL: t1
+; CHECK: smax
+define <4 x i32> @t1(<4 x i32> %a, <4 x i32> %b) {
+ %t1 = icmp sgt <4 x i32> %a, %b
+ %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %t2
+}
+
+; CHECK-LABEL: t2
+; CHECK: smin
+define <4 x i32> @t2(<4 x i32> %a, <4 x i32> %b) {
+ %t1 = icmp slt <4 x i32> %a, %b
+ %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %t2
+}
+
+; CHECK-LABEL: t3
+; CHECK: umax
+define <4 x i32> @t3(<4 x i32> %a, <4 x i32> %b) {
+ %t1 = icmp ugt <4 x i32> %a, %b
+ %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %t2
+}
+
+; CHECK-LABEL: t4
+; CHECK: umin
+define <8 x i8> @t4(<8 x i8> %a, <8 x i8> %b) {
+ %t1 = icmp ult <8 x i8> %a, %b
+ %t2 = select <8 x i1> %t1, <8 x i8> %a, <8 x i8> %b
+ ret <8 x i8> %t2
+}
+
+; CHECK-LABEL: t5
+; CHECK: smin
+define <4 x i16> @t5(<4 x i16> %a, <4 x i16> %b) {
+ %t1 = icmp sgt <4 x i16> %b, %a
+ %t2 = select <4 x i1> %t1, <4 x i16> %a, <4 x i16> %b
+ ret <4 x i16> %t2
+}
+
+; CHECK-LABEL: t6
+; CHECK: smax
+define <2 x i32> @t6(<2 x i32> %a, <2 x i32> %b) {
+ %t1 = icmp slt <2 x i32> %b, %a
+ %t2 = select <2 x i1> %t1, <2 x i32> %a, <2 x i32> %b
+ ret <2 x i32> %t2
+}
+
+; CHECK-LABEL: t7
+; CHECK: umin
+define <16 x i8> @t7(<16 x i8> %a, <16 x i8> %b) {
+ %t1 = icmp ugt <16 x i8> %b, %a
+ %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %t2
+}
+
+; CHECK-LABEL: t8
+; CHECK: umax
+define <8 x i16> @t8(<8 x i16> %a, <8 x i16> %b) {
+ %t1 = icmp ult <8 x i16> %b, %a
+ %t2 = select <8 x i1> %t1, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %t2
+}
+
+; CHECK-LABEL: t9
+; CHECK: umin
+; CHECK: smax
+define <4 x i32> @t9(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+ %t1 = icmp ugt <4 x i32> %b, %a
+ %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+ %t3 = icmp sge <4 x i32> %t2, %c
+ %t4 = select <4 x i1> %t3, <4 x i32> %t2, <4 x i32> %c
+ ret <4 x i32> %t4
+}
+
+; CHECK-LABEL: t10
+; CHECK: smax
+; CHECK: smax
+define <8 x i32> @t10(<8 x i32> %a, <8 x i32> %b) {
+ %t1 = icmp sgt <8 x i32> %a, %b
+ %t2 = select <8 x i1> %t1, <8 x i32> %a, <8 x i32> %b
+ ret <8 x i32> %t2
+}
+
+; CHECK-LABEL: t11
+; CHECK: smin
+; CHECK: smin
+; CHECK: smin
+; CHECK: smin
+define <16 x i32> @t11(<16 x i32> %a, <16 x i32> %b) {
+ %t1 = icmp sle <16 x i32> %a, %b
+ %t2 = select <16 x i1> %t1, <16 x i32> %a, <16 x i32> %b
+ ret <16 x i32> %t2
+}
diff --git a/test/CodeGen/AArch64/mul_pow2.ll b/test/CodeGen/AArch64/mul_pow2.ll
index efc0ec8c40e3..b828223ef1c9 100644
--- a/test/CodeGen/AArch64/mul_pow2.ll
+++ b/test/CodeGen/AArch64/mul_pow2.ll
@@ -74,8 +74,7 @@ define i32 @ntest2(i32 %x) {
define i32 @ntest3(i32 %x) {
; CHECK-LABEL: ntest3
-; CHECK: add {{w[0-9]+}}, w0, w0, lsl #1
-; CHECK: neg w0, {{w[0-9]+}}
+; CHECK: sub w0, w0, w0, lsl #2
%mul = mul nsw i32 %x, -3
ret i32 %mul
diff --git a/test/CodeGen/AArch64/neon-fpround_f128.ll b/test/CodeGen/AArch64/neon-fpround_f128.ll
index a93f3f2723c3..265664ee9442 100644
--- a/test/CodeGen/AArch64/neon-fpround_f128.ll
+++ b/test/CodeGen/AArch64/neon-fpround_f128.ll
@@ -3,7 +3,7 @@
define <1 x double> @test_fpround_v1f128(<1 x fp128>* %a) {
; CHECK-LABEL: test_fpround_v1f128:
; CHECK: bl __trunctfdf2
- %b = load <1 x fp128>* %a
+ %b = load <1 x fp128>, <1 x fp128>* %a
%c = fptrunc <1 x fp128> %b to <1 x double>
ret <1 x double> %c
}
@@ -12,7 +12,7 @@ define <2 x double> @test_fpround_v2f128(<2 x fp128>* %a) {
; CHECK-LABEL: test_fpround_v2f128:
; CHECK: bl __trunctfdf2
; CHECK: bl __trunctfdf2
- %b = load <2 x fp128>* %a
+ %b = load <2 x fp128>, <2 x fp128>* %a
%c = fptrunc <2 x fp128> %b to <2 x double>
ret <2 x double> %c
}
diff --git a/test/CodeGen/AArch64/neon-scalar-copy.ll b/test/CodeGen/AArch64/neon-scalar-copy.ll
index 6afac315a961..3f770600ac59 100644
--- a/test/CodeGen/AArch64/neon-scalar-copy.ll
+++ b/test/CodeGen/AArch64/neon-scalar-copy.ll
@@ -1,101 +1,145 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -asm-verbose=false < %s | FileCheck %s
-
-define float @test_dup_sv2S(<2 x float> %v) {
- ; CHECK-LABEL: test_dup_sv2S
- ; CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
+define float @test_dup_sv2S(<2 x float> %v) #0 {
+ ; CHECK-LABEL: test_dup_sv2S:
+ ; CHECK-NEXT: mov s{{[0-9]+}}, {{v[0-9]+}}.s[1]
+ ; CHECK-NEXT: ret
%tmp1 = extractelement <2 x float> %v, i32 1
ret float %tmp1
}
-define float @test_dup_sv2S_0(<2 x float> %v) {
- ; CHECK-LABEL: test_dup_sv2S_0
+define float @test_dup_sv2S_0(<2 x float> %v) #0 {
+ ; CHECK-LABEL: test_dup_sv2S_0:
; CHECK-NOT: dup {{[vsd][0-9]+}}
; CHECK-NOT: ins {{[vsd][0-9]+}}
- ; CHECK: ret
+ ; CHECK-NEXT: ret
%tmp1 = extractelement <2 x float> %v, i32 0
ret float %tmp1
}
-define float @test_dup_sv4S(<4 x float> %v) {
- ; CHECK-LABEL: test_dup_sv4S
+define float @test_dup_sv4S(<4 x float> %v) #0 {
+ ; CHECK-LABEL: test_dup_sv4S:
+ ; CHECK-NEXT: mov s{{[0-9]+}}, {{v[0-9]+}}.s[1]
+ ; CHECK-NEXT: ret
+ %tmp1 = extractelement <4 x float> %v, i32 1
+ ret float %tmp1
+}
+
+define float @test_dup_sv4S_0(<4 x float> %v) #0 {
+ ; CHECK-LABEL: test_dup_sv4S_0:
; CHECK-NOT: dup {{[vsd][0-9]+}}
; CHECK-NOT: ins {{[vsd][0-9]+}}
- ; CHECK: ret
+ ; CHECK-NEXT: ret
%tmp1 = extractelement <4 x float> %v, i32 0
ret float %tmp1
}
-define double @test_dup_dvD(<1 x double> %v) {
- ; CHECK-LABEL: test_dup_dvD
+define double @test_dup_dvD(<1 x double> %v) #0 {
+ ; CHECK-LABEL: test_dup_dvD:
; CHECK-NOT: dup {{[vsd][0-9]+}}
; CHECK-NOT: ins {{[vsd][0-9]+}}
- ; CHECK: ret
+ ; CHECK-NEXT: ret
%tmp1 = extractelement <1 x double> %v, i32 0
ret double %tmp1
}
-define double @test_dup_dv2D(<2 x double> %v) {
- ; CHECK-LABEL: test_dup_dv2D
- ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
+define double @test_dup_dv2D(<2 x double> %v) #0 {
+ ; CHECK-LABEL: test_dup_dv2D:
+ ; CHECK-NEXT: mov d{{[0-9]+}}, {{v[0-9]+}}.d[1]
+ ; CHECK-NEXT: ret
%tmp1 = extractelement <2 x double> %v, i32 1
ret double %tmp1
}
-define double @test_dup_dv2D_0(<2 x double> %v) {
- ; CHECK-LABEL: test_dup_dv2D_0
- ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
- ; CHECK: ret
- %tmp1 = extractelement <2 x double> %v, i32 1
+define double @test_dup_dv2D_0(<2 x double> %v) #0 {
+ ; CHECK-LABEL: test_dup_dv2D_0:
+ ; CHECK-NOT: dup {{[vsd][0-9]+}}
+ ; CHECK-NOT: ins {{[vsd][0-9]+}}
+ ; CHECK-NEXT: ret
+ %tmp1 = extractelement <2 x double> %v, i32 0
ret double %tmp1
}
-define <1 x i8> @test_vector_dup_bv16B(<16 x i8> %v1) {
- ; CHECK-LABEL: test_vector_dup_bv16B
+define half @test_dup_hv8H(<8 x half> %v) #0 {
+ ; CHECK-LABEL: test_dup_hv8H:
+ ; CHECK-NEXT: mov h{{[0-9]+}}, {{v[0-9]+}}.h[1]
+ ; CHECK-NEXT: ret
+ %tmp1 = extractelement <8 x half> %v, i32 1
+ ret half %tmp1
+}
+
+define half @test_dup_hv8H_0(<8 x half> %v) #0 {
+ ; CHECK-LABEL: test_dup_hv8H_0:
+ ; CHECK-NOT: dup {{[vsdh][0-9]+}}
+ ; CHECK-NOT: ins {{[vsdh][0-9]+}}
+ ; CHECK-NEXT: ret
+ %tmp1 = extractelement <8 x half> %v, i32 0
+ ret half %tmp1
+}
+
+define <1 x i8> @test_vector_dup_bv16B(<16 x i8> %v1) #0 {
+ ; CHECK-LABEL: test_vector_dup_bv16B:
+ ; CHECK-NEXT: umov [[W:w[0-9]+]], v0.b[14]
+ ; CHECK-NEXT: fmov s0, [[W]]
+ ; CHECK-NEXT: ret
%shuffle.i = shufflevector <16 x i8> %v1, <16 x i8> undef, <1 x i32> <i32 14>
ret <1 x i8> %shuffle.i
}
-define <1 x i8> @test_vector_dup_bv8B(<8 x i8> %v1) {
- ; CHECK-LABEL: test_vector_dup_bv8B
+define <1 x i8> @test_vector_dup_bv8B(<8 x i8> %v1) #0 {
+ ; CHECK-LABEL: test_vector_dup_bv8B:
+ ; CHECK-NEXT: dup v0.8b, v0.b[7]
+ ; CHECK-NEXT: ret
%shuffle.i = shufflevector <8 x i8> %v1, <8 x i8> undef, <1 x i32> <i32 7>
ret <1 x i8> %shuffle.i
}
-define <1 x i16> @test_vector_dup_hv8H(<8 x i16> %v1) {
- ; CHECK-LABEL: test_vector_dup_hv8H
+define <1 x i16> @test_vector_dup_hv8H(<8 x i16> %v1) #0 {
+ ; CHECK-LABEL: test_vector_dup_hv8H:
+ ; CHECK-NEXT: umov [[W:w[0-9]+]], v0.h[7]
+ ; CHECK-NEXT: fmov s0, [[W]]
+ ; CHECK-NEXT: ret
%shuffle.i = shufflevector <8 x i16> %v1, <8 x i16> undef, <1 x i32> <i32 7>
ret <1 x i16> %shuffle.i
}
-define <1 x i16> @test_vector_dup_hv4H(<4 x i16> %v1) {
- ; CHECK-LABEL: test_vector_dup_hv4H
+define <1 x i16> @test_vector_dup_hv4H(<4 x i16> %v1) #0 {
+ ; CHECK-LABEL: test_vector_dup_hv4H:
+ ; CHECK-NEXT: dup v0.4h, v0.h[3]
+ ; CHECK-NEXT: ret
%shuffle.i = shufflevector <4 x i16> %v1, <4 x i16> undef, <1 x i32> <i32 3>
ret <1 x i16> %shuffle.i
}
-define <1 x i32> @test_vector_dup_sv4S(<4 x i32> %v1) {
- ; CHECK-LABEL: test_vector_dup_sv4S
+define <1 x i32> @test_vector_dup_sv4S(<4 x i32> %v1) #0 {
+ ; CHECK-LABEL: test_vector_dup_sv4S:
+ ; CHECK-NEXT: mov [[W:w[0-9]+]], v0.s[3]
+ ; CHECK-NEXT: fmov s0, [[W]]
+ ; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <1 x i32> <i32 3>
ret <1 x i32> %shuffle
}
-define <1 x i32> @test_vector_dup_sv2S(<2 x i32> %v1) {
- ; CHECK-LABEL: test_vector_dup_sv2S
+define <1 x i32> @test_vector_dup_sv2S(<2 x i32> %v1) #0 {
+ ; CHECK-LABEL: test_vector_dup_sv2S:
+ ; CHECK-NEXT: dup v0.2s, v0.s[1]
+ ; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <1 x i32> <i32 1>
ret <1 x i32> %shuffle
}
-define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) {
- ; CHECK-LABEL: test_vector_dup_dv2D
- ; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #8
+define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) #0 {
+ ; CHECK-LABEL: test_vector_dup_dv2D:
+ ; CHECK-NEXT: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #8
+ ; CHECK-NEXT: ret
%shuffle.i = shufflevector <2 x i64> %v1, <2 x i64> undef, <1 x i32> <i32 1>
ret <1 x i64> %shuffle.i
}
-define <1 x i64> @test_vector_copy_dup_dv2D(<1 x i64> %a, <2 x i64> %c) {
- ; CHECK-LABEL: test_vector_copy_dup_dv2D
- ; CHECK: {{dup|mov}} {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+define <1 x i64> @test_vector_copy_dup_dv2D(<1 x i64> %a, <2 x i64> %c) #0 {
+ ; CHECK-LABEL: test_vector_copy_dup_dv2D:
+ ; CHECK-NEXT: {{dup|mov}} {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+ ; CHECK-NEXT: ret
%vget_lane = extractelement <2 x i64> %c, i32 1
%vset_lane = insertelement <1 x i64> undef, i64 %vget_lane, i32 0
ret <1 x i64> %vset_lane
@@ -118,3 +162,5 @@ define void @test_out_of_range_insert(<4 x i32> %vec, i32 %elt) {
insertelement <4 x i32> %vec, i32 %elt, i32 4
ret void
}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/neon-truncStore-extLoad.ll b/test/CodeGen/AArch64/neon-truncStore-extLoad.ll
index 1df3719c8867..0d5ebb324ecb 100644
--- a/test/CodeGen/AArch64/neon-truncStore-extLoad.ll
+++ b/test/CodeGen/AArch64/neon-truncStore-extLoad.ll
@@ -34,7 +34,7 @@ define void @truncStore.v8i16(<8 x i16> %a, <8 x i8>* %result) {
define <4 x i32> @loadSExt.v4i8(<4 x i8>* %ref) {
; CHECK-LABEL: loadSExt.v4i8:
; CHECK: ldrsb
- %a = load <4 x i8>* %ref
+ %a = load <4 x i8>, <4 x i8>* %ref
%conv = sext <4 x i8> %a to <4 x i32>
ret <4 x i32> %conv
}
@@ -42,7 +42,7 @@ define <4 x i32> @loadSExt.v4i8(<4 x i8>* %ref) {
define <4 x i32> @loadZExt.v4i8(<4 x i8>* %ref) {
; CHECK-LABEL: loadZExt.v4i8:
; CHECK: ldrb
- %a = load <4 x i8>* %ref
+ %a = load <4 x i8>, <4 x i8>* %ref
%conv = zext <4 x i8> %a to <4 x i32>
ret <4 x i32> %conv
}
@@ -50,7 +50,7 @@ define <4 x i32> @loadZExt.v4i8(<4 x i8>* %ref) {
define i32 @loadExt.i32(<4 x i8>* %ref) {
; CHECK-LABEL: loadExt.i32:
; CHECK: ldrb
- %a = load <4 x i8>* %ref
+ %a = load <4 x i8>, <4 x i8>* %ref
%vecext = extractelement <4 x i8> %a, i32 0
%conv = zext i8 %vecext to i32
ret i32 %conv
diff --git a/test/CodeGen/AArch64/nzcv-save.ll b/test/CodeGen/AArch64/nzcv-save.ll
index 32baff3dbe64..f8f42ec9b1a9 100644
--- a/test/CodeGen/AArch64/nzcv-save.ll
+++ b/test/CodeGen/AArch64/nzcv-save.ll
@@ -7,8 +7,8 @@
; must be saved for later.
define void @f(i256* nocapture %a, i256* nocapture %b, i256* nocapture %cc, i256* nocapture %dd) nounwind uwtable noinline ssp {
entry:
- %c = load i256* %cc
- %d = load i256* %dd
+ %c = load i256, i256* %cc
+ %d = load i256, i256* %dd
%add = add nsw i256 %c, %d
store i256 %add, i256* %a, align 8
%or = or i256 %c, 1606938044258990275541962092341162602522202993782792835301376
diff --git a/test/CodeGen/AArch64/or-combine.ll b/test/CodeGen/AArch64/or-combine.ll
new file mode 100644
index 000000000000..c6c343a3f79c
--- /dev/null
+++ b/test/CodeGen/AArch64/or-combine.ll
@@ -0,0 +1,44 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
+
+define i32 @test_consts(i32 %in) {
+; CHECK-LABEL: test_consts:
+; CHECK-NOT: bfxil
+; CHECK-NOT: and
+; CHECK-NOT: orr
+; CHECK: ret
+
+ %lo = and i32 %in, 65535
+ %hi = and i32 %in, -65536
+ %res = or i32 %lo, %hi
+ ret i32 %res
+}
+
+define i32 @test_generic(i32 %in, i32 %mask1, i32 %mask2) {
+; CHECK-LABEL: test_generic:
+; CHECK: orr [[FULL_MASK:w[0-9]+]], w1, w2
+; CHECK: and w0, w0, [[FULL_MASK]]
+
+ %lo = and i32 %in, %mask1
+ %hi = and i32 %in, %mask2
+ %res = or i32 %lo, %hi
+ ret i32 %res
+}
+
+; In this case the transformation isn't profitable, since %lo and %hi
+; are used more than once.
+define [3 x i32] @test_reuse(i32 %in, i32 %mask1, i32 %mask2) {
+; CHECK-LABEL: test_reuse:
+; CHECK-DAG: and w1, w0, w1
+; CHECK-DAG: and w2, w0, w2
+; CHECK-DAG: orr w0, w1, w2
+
+ %lo = and i32 %in, %mask1
+ %hi = and i32 %in, %mask2
+ %recombine = or i32 %lo, %hi
+
+ %res.tmp0 = insertvalue [3 x i32] undef, i32 %recombine, 0
+ %res.tmp1 = insertvalue [3 x i32] %res.tmp0, i32 %lo, 1
+ %res = insertvalue [3 x i32] %res.tmp1, i32 %hi, 2
+
+ ret [3 x i32] %res
+}
diff --git a/test/CodeGen/AArch64/paired-load.ll b/test/CodeGen/AArch64/paired-load.ll
index 3dddb9eff325..e6d2d4f51164 100644
--- a/test/CodeGen/AArch64/paired-load.ll
+++ b/test/CodeGen/AArch64/paired-load.ll
@@ -6,10 +6,10 @@ target triple = "aarch64-linux-gnu"
; CHECK: ldp
; CHECK: stp
define void @f(i64* %p, i64* %q) {
- %addr2 = getelementptr i64* %q, i32 1
- %addr = getelementptr i64* %p, i32 1
- %x = load i64* %p
- %y = load i64* %addr
+ %addr2 = getelementptr i64, i64* %q, i32 1
+ %addr = getelementptr i64, i64* %p, i32 1
+ %x = load i64, i64* %p
+ %y = load i64, i64* %addr
store i64 %x, i64* %q
store i64 %y, i64* %addr2
ret void
diff --git a/test/CodeGen/AArch64/pic-eh-stubs.ll b/test/CodeGen/AArch64/pic-eh-stubs.ll
index 93ee0e67b901..f761a87783ce 100644
--- a/test/CodeGen/AArch64/pic-eh-stubs.ll
+++ b/test/CodeGen/AArch64/pic-eh-stubs.ll
@@ -38,7 +38,7 @@ catch: ; preds = %lpad
%3 = extractvalue { i8*, i32 } %0, 0
%4 = tail call i8* @__cxa_begin_catch(i8* %3) nounwind
%5 = bitcast i8* %4 to i32*
- %exn.scalar = load i32* %5, align 4
+ %exn.scalar = load i32, i32* %5, align 4
tail call void @__cxa_end_catch() nounwind
br label %return
diff --git a/test/CodeGen/AArch64/print-mrs-system-register.ll b/test/CodeGen/AArch64/print-mrs-system-register.ll
new file mode 100644
index 000000000000..3411ed6161bd
--- /dev/null
+++ b/test/CodeGen/AArch64/print-mrs-system-register.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple=arm64-apple-darwin %s -o - | FileCheck %s
+
+; CHECK: mrs x0, CPM_IOACC_CTL_EL3
+
+define void @foo1() #0 {
+entry:
+ tail call void asm sideeffect "mrs x0, cpm_ioacc_ctl_el3", ""()
+ ret void
+}
+
+attributes #0 = { "target-cpu"="cyclone" }
diff --git a/test/CodeGen/AArch64/ragreedy-csr.ll b/test/CodeGen/AArch64/ragreedy-csr.ll
index 31ff543117ff..94eeba7c7954 100644
--- a/test/CodeGen/AArch64/ragreedy-csr.ll
+++ b/test/CodeGen/AArch64/ragreedy-csr.ll
@@ -35,31 +35,31 @@ declare i32 @__maskrune(i32, i64) #7
define fastcc i32 @prune_match(%struct.Connector_struct* nocapture readonly %a, %struct.Connector_struct* nocapture readonly %b) #9 {
entry:
%label56 = bitcast %struct.Connector_struct* %a to i16*
- %0 = load i16* %label56, align 2
+ %0 = load i16, i16* %label56, align 2
%label157 = bitcast %struct.Connector_struct* %b to i16*
- %1 = load i16* %label157, align 2
+ %1 = load i16, i16* %label157, align 2
%cmp = icmp eq i16 %0, %1
br i1 %cmp, label %if.end, label %return, !prof !988
if.end:
- %priority = getelementptr inbounds %struct.Connector_struct* %a, i64 0, i32 2
- %2 = load i8* %priority, align 1
- %priority5 = getelementptr inbounds %struct.Connector_struct* %b, i64 0, i32 2
- %3 = load i8* %priority5, align 1
- %string = getelementptr inbounds %struct.Connector_struct* %a, i64 0, i32 5
- %4 = load i8** %string, align 8
- %string7 = getelementptr inbounds %struct.Connector_struct* %b, i64 0, i32 5
- %5 = load i8** %string7, align 8
+ %priority = getelementptr inbounds %struct.Connector_struct, %struct.Connector_struct* %a, i64 0, i32 2
+ %2 = load i8, i8* %priority, align 1
+ %priority5 = getelementptr inbounds %struct.Connector_struct, %struct.Connector_struct* %b, i64 0, i32 2
+ %3 = load i8, i8* %priority5, align 1
+ %string = getelementptr inbounds %struct.Connector_struct, %struct.Connector_struct* %a, i64 0, i32 5
+ %4 = load i8*, i8** %string, align 8
+ %string7 = getelementptr inbounds %struct.Connector_struct, %struct.Connector_struct* %b, i64 0, i32 5
+ %5 = load i8*, i8** %string7, align 8
br label %while.cond
while.cond:
%lsr.iv27 = phi i64 [ %lsr.iv.next28, %if.end17 ], [ 0, %if.end ]
- %scevgep55 = getelementptr i8* %4, i64 %lsr.iv27
- %6 = load i8* %scevgep55, align 1
+ %scevgep55 = getelementptr i8, i8* %4, i64 %lsr.iv27
+ %6 = load i8, i8* %scevgep55, align 1
%idxprom.i.i = sext i8 %6 to i64
%isascii.i.i224 = icmp sgt i8 %6, -1
br i1 %isascii.i.i224, label %cond.true.i.i, label %cond.false.i.i, !prof !181
cond.true.i.i:
- %arrayidx.i.i = getelementptr inbounds %struct._RuneLocale* @_DefaultRuneLocale, i64 0, i32 5, i64 %idxprom.i.i
- %7 = load i32* %arrayidx.i.i, align 4
+ %arrayidx.i.i = getelementptr inbounds %struct._RuneLocale, %struct._RuneLocale* @_DefaultRuneLocale, i64 0, i32 5, i64 %idxprom.i.i
+ %7 = load i32, i32* %arrayidx.i.i, align 4
%and.i.i = and i32 %7, 32768
br label %isupper.exit
cond.false.i.i:
@@ -75,13 +75,13 @@ lor.rhs:
%sunkaddr = ptrtoint i8* %5 to i64
%sunkaddr58 = add i64 %sunkaddr, %lsr.iv27
%sunkaddr59 = inttoptr i64 %sunkaddr58 to i8*
- %9 = load i8* %sunkaddr59, align 1
+ %9 = load i8, i8* %sunkaddr59, align 1
%idxprom.i.i214 = sext i8 %9 to i64
%isascii.i.i213225 = icmp sgt i8 %9, -1
br i1 %isascii.i.i213225, label %cond.true.i.i217, label %cond.false.i.i219, !prof !181
cond.true.i.i217:
- %arrayidx.i.i215 = getelementptr inbounds %struct._RuneLocale* @_DefaultRuneLocale, i64 0, i32 5, i64 %idxprom.i.i214
- %10 = load i32* %arrayidx.i.i215, align 4
+ %arrayidx.i.i215 = getelementptr inbounds %struct._RuneLocale, %struct._RuneLocale* @_DefaultRuneLocale, i64 0, i32 5, i64 %idxprom.i.i214
+ %10 = load i32, i32* %arrayidx.i.i215, align 4
%and.i.i216 = and i32 %10, 32768
br label %isupper.exit223
cond.false.i.i219:
@@ -97,11 +97,11 @@ while.body:
%sunkaddr60 = ptrtoint i8* %4 to i64
%sunkaddr61 = add i64 %sunkaddr60, %lsr.iv27
%sunkaddr62 = inttoptr i64 %sunkaddr61 to i8*
- %12 = load i8* %sunkaddr62, align 1
+ %12 = load i8, i8* %sunkaddr62, align 1
%sunkaddr63 = ptrtoint i8* %5 to i64
%sunkaddr64 = add i64 %sunkaddr63, %lsr.iv27
%sunkaddr65 = inttoptr i64 %sunkaddr64 to i8*
- %13 = load i8* %sunkaddr65, align 1
+ %13 = load i8, i8* %sunkaddr65, align 1
%cmp14 = icmp eq i8 %12, %13
br i1 %cmp14, label %if.end17, label %return, !prof !991
if.end17:
@@ -115,13 +115,13 @@ if.then23:
%sunkaddr66 = ptrtoint %struct.Connector_struct* %a to i64
%sunkaddr67 = add i64 %sunkaddr66, 16
%sunkaddr68 = inttoptr i64 %sunkaddr67 to i8**
- %16 = load i8** %sunkaddr68, align 8
- %17 = load i8* %16, align 1
+ %16 = load i8*, i8** %sunkaddr68, align 8
+ %17 = load i8, i8* %16, align 1
%cmp26 = icmp eq i8 %17, 83
%sunkaddr69 = ptrtoint i8* %4 to i64
%sunkaddr70 = add i64 %sunkaddr69, %lsr.iv27
%sunkaddr71 = inttoptr i64 %sunkaddr70 to i8*
- %18 = load i8* %sunkaddr71, align 1
+ %18 = load i8, i8* %sunkaddr71, align 1
br i1 %cmp26, label %land.lhs.true28, label %while.cond59.preheader, !prof !993
land.lhs.true28:
switch i8 %18, label %land.rhs.preheader [
@@ -132,24 +132,24 @@ land.lhs.true35:
%sunkaddr72 = ptrtoint i8* %5 to i64
%sunkaddr73 = add i64 %sunkaddr72, %lsr.iv27
%sunkaddr74 = inttoptr i64 %sunkaddr73 to i8*
- %19 = load i8* %sunkaddr74, align 1
+ %19 = load i8, i8* %sunkaddr74, align 1
switch i8 %19, label %land.rhs.preheader [
i8 112, label %land.lhs.true43
], !prof !995
land.lhs.true43:
%20 = ptrtoint i8* %16 to i64
%21 = sub i64 0, %20
- %scevgep52 = getelementptr i8* %4, i64 %21
- %scevgep53 = getelementptr i8* %scevgep52, i64 %lsr.iv27
- %scevgep54 = getelementptr i8* %scevgep53, i64 -1
+ %scevgep52 = getelementptr i8, i8* %4, i64 %21
+ %scevgep53 = getelementptr i8, i8* %scevgep52, i64 %lsr.iv27
+ %scevgep54 = getelementptr i8, i8* %scevgep53, i64 -1
%cmp45 = icmp eq i8* %scevgep54, null
br i1 %cmp45, label %return, label %lor.lhs.false47, !prof !996
lor.lhs.false47:
%22 = ptrtoint i8* %16 to i64
%23 = sub i64 0, %22
- %scevgep47 = getelementptr i8* %4, i64 %23
- %scevgep48 = getelementptr i8* %scevgep47, i64 %lsr.iv27
- %scevgep49 = getelementptr i8* %scevgep48, i64 -2
+ %scevgep47 = getelementptr i8, i8* %4, i64 %23
+ %scevgep48 = getelementptr i8, i8* %scevgep47, i64 %lsr.iv27
+ %scevgep49 = getelementptr i8, i8* %scevgep48, i64 -2
%cmp50 = icmp eq i8* %scevgep49, null
br i1 %cmp50, label %land.lhs.true52, label %while.cond59.preheader, !prof !997
land.lhs.true52:
@@ -157,7 +157,7 @@ land.lhs.true52:
%sunkaddr76 = add i64 %sunkaddr75, %lsr.iv27
%sunkaddr77 = add i64 %sunkaddr76, -1
%sunkaddr78 = inttoptr i64 %sunkaddr77 to i8*
- %24 = load i8* %sunkaddr78, align 1
+ %24 = load i8, i8* %sunkaddr78, align 1
%cmp55 = icmp eq i8 %24, 73
%cmp61233 = icmp eq i8 %18, 0
%or.cond265 = or i1 %cmp55, %cmp61233
@@ -166,14 +166,14 @@ while.cond59.preheader:
%cmp61233.old = icmp eq i8 %18, 0
br i1 %cmp61233.old, label %return, label %land.rhs.preheader, !prof !999
land.rhs.preheader:
- %scevgep33 = getelementptr i8* %5, i64 %lsr.iv27
- %scevgep43 = getelementptr i8* %4, i64 %lsr.iv27
+ %scevgep33 = getelementptr i8, i8* %5, i64 %lsr.iv27
+ %scevgep43 = getelementptr i8, i8* %4, i64 %lsr.iv27
br label %land.rhs
land.rhs:
%lsr.iv = phi i64 [ 0, %land.rhs.preheader ], [ %lsr.iv.next, %if.then83 ]
%25 = phi i8 [ %27, %if.then83 ], [ %18, %land.rhs.preheader ]
- %scevgep34 = getelementptr i8* %scevgep33, i64 %lsr.iv
- %26 = load i8* %scevgep34, align 1
+ %scevgep34 = getelementptr i8, i8* %scevgep33, i64 %lsr.iv
+ %26 = load i8, i8* %scevgep34, align 1
%cmp64 = icmp eq i8 %26, 0
br i1 %cmp64, label %return, label %while.body66, !prof !1000
while.body66:
@@ -187,9 +187,9 @@ lor.lhs.false74:
%or.cond208 = or i1 %cmp77, %cmp81
br i1 %or.cond208, label %return, label %if.then83, !prof !1002
if.then83:
- %scevgep44 = getelementptr i8* %scevgep43, i64 %lsr.iv
- %scevgep45 = getelementptr i8* %scevgep44, i64 1
- %27 = load i8* %scevgep45, align 1
+ %scevgep44 = getelementptr i8, i8* %scevgep43, i64 %lsr.iv
+ %scevgep45 = getelementptr i8, i8* %scevgep44, i64 1
+ %27 = load i8, i8* %scevgep45, align 1
%cmp61 = icmp eq i8 %27, 0
%lsr.iv.next = add i64 %lsr.iv, 1
br i1 %cmp61, label %return, label %land.rhs, !prof !999
@@ -202,18 +202,18 @@ while.cond95.preheader:
%sunkaddr79 = ptrtoint i8* %4 to i64
%sunkaddr80 = add i64 %sunkaddr79, %lsr.iv27
%sunkaddr81 = inttoptr i64 %sunkaddr80 to i8*
- %28 = load i8* %sunkaddr81, align 1
+ %28 = load i8, i8* %sunkaddr81, align 1
%cmp97238 = icmp eq i8 %28, 0
br i1 %cmp97238, label %return, label %land.rhs99.preheader, !prof !1004
land.rhs99.preheader:
- %scevgep31 = getelementptr i8* %5, i64 %lsr.iv27
- %scevgep40 = getelementptr i8* %4, i64 %lsr.iv27
+ %scevgep31 = getelementptr i8, i8* %5, i64 %lsr.iv27
+ %scevgep40 = getelementptr i8, i8* %4, i64 %lsr.iv27
br label %land.rhs99
land.rhs99:
%lsr.iv17 = phi i64 [ 0, %land.rhs99.preheader ], [ %lsr.iv.next18, %if.then117 ]
%29 = phi i8 [ %31, %if.then117 ], [ %28, %land.rhs99.preheader ]
- %scevgep32 = getelementptr i8* %scevgep31, i64 %lsr.iv17
- %30 = load i8* %scevgep32, align 1
+ %scevgep32 = getelementptr i8, i8* %scevgep31, i64 %lsr.iv17
+ %30 = load i8, i8* %scevgep32, align 1
%cmp101 = icmp eq i8 %30, 0
br i1 %cmp101, label %return, label %while.body104, !prof !1005
while.body104:
@@ -224,9 +224,9 @@ while.body104:
%or.cond210 = or i1 %or.cond209, %cmp115
br i1 %or.cond210, label %if.then117, label %return, !prof !1006
if.then117:
- %scevgep41 = getelementptr i8* %scevgep40, i64 %lsr.iv17
- %scevgep42 = getelementptr i8* %scevgep41, i64 1
- %31 = load i8* %scevgep42, align 1
+ %scevgep41 = getelementptr i8, i8* %scevgep40, i64 %lsr.iv17
+ %scevgep42 = getelementptr i8, i8* %scevgep41, i64 1
+ %31 = load i8, i8* %scevgep42, align 1
%cmp97 = icmp eq i8 %31, 0
%lsr.iv.next18 = add i64 %lsr.iv17, 1
br i1 %cmp97, label %return, label %land.rhs99, !prof !1004
@@ -239,18 +239,18 @@ while.cond130.preheader:
%sunkaddr82 = ptrtoint i8* %4 to i64
%sunkaddr83 = add i64 %sunkaddr82, %lsr.iv27
%sunkaddr84 = inttoptr i64 %sunkaddr83 to i8*
- %32 = load i8* %sunkaddr84, align 1
+ %32 = load i8, i8* %sunkaddr84, align 1
%cmp132244 = icmp eq i8 %32, 0
br i1 %cmp132244, label %return, label %land.rhs134.preheader, !prof !1008
land.rhs134.preheader:
- %scevgep29 = getelementptr i8* %5, i64 %lsr.iv27
- %scevgep37 = getelementptr i8* %4, i64 %lsr.iv27
+ %scevgep29 = getelementptr i8, i8* %5, i64 %lsr.iv27
+ %scevgep37 = getelementptr i8, i8* %4, i64 %lsr.iv27
br label %land.rhs134
land.rhs134:
%lsr.iv22 = phi i64 [ 0, %land.rhs134.preheader ], [ %lsr.iv.next23, %if.then152 ]
%33 = phi i8 [ %35, %if.then152 ], [ %32, %land.rhs134.preheader ]
- %scevgep30 = getelementptr i8* %scevgep29, i64 %lsr.iv22
- %34 = load i8* %scevgep30, align 1
+ %scevgep30 = getelementptr i8, i8* %scevgep29, i64 %lsr.iv22
+ %34 = load i8, i8* %scevgep30, align 1
%cmp136 = icmp eq i8 %34, 0
br i1 %cmp136, label %return, label %while.body139, !prof !1009
while.body139:
@@ -261,9 +261,9 @@ while.body139:
%or.cond212 = or i1 %or.cond211, %cmp150
br i1 %or.cond212, label %if.then152, label %return, !prof !1010
if.then152:
- %scevgep38 = getelementptr i8* %scevgep37, i64 %lsr.iv22
- %scevgep39 = getelementptr i8* %scevgep38, i64 1
- %35 = load i8* %scevgep39, align 1
+ %scevgep38 = getelementptr i8, i8* %scevgep37, i64 %lsr.iv22
+ %scevgep39 = getelementptr i8, i8* %scevgep38, i64 1
+ %35 = load i8, i8* %scevgep39, align 1
%cmp132 = icmp eq i8 %35, 0
%lsr.iv.next23 = add i64 %lsr.iv22, 1
br i1 %cmp132, label %return, label %land.rhs134, !prof !1008
diff --git a/test/CodeGen/AArch64/regress-tail-livereg.ll b/test/CodeGen/AArch64/regress-tail-livereg.ll
index 03c3f33d9477..965aa0d062d5 100644
--- a/test/CodeGen/AArch64/regress-tail-livereg.ll
+++ b/test/CodeGen/AArch64/regress-tail-livereg.ll
@@ -5,7 +5,7 @@ declare void @bar()
define void @foo() {
; CHECK-LABEL: foo:
- %func = load void()** @var
+ %func = load void()*, void()** @var
; Calling a function encourages @foo to use a callee-saved register,
; which makes it a natural choice for the tail call itself. But we don't
diff --git a/test/CodeGen/AArch64/regress-tblgen-chains.ll b/test/CodeGen/AArch64/regress-tblgen-chains.ll
index 477d99625eec..0d301bbd502a 100644
--- a/test/CodeGen/AArch64/regress-tblgen-chains.ll
+++ b/test/CodeGen/AArch64/regress-tblgen-chains.ll
@@ -19,7 +19,7 @@ define i64 @test_chains() {
call void @bar(i8* %locvar)
; CHECK: bl {{_?bar}}
- %inc.1 = load i8* %locvar
+ %inc.1 = load i8, i8* %locvar
%inc.2 = zext i8 %inc.1 to i64
%inc.3 = add i64 %inc.2, 1
%inc.4 = trunc i64 %inc.3 to i8
@@ -30,7 +30,7 @@ define i64 @test_chains() {
; CHECK: sturb {{w[0-9]+}}, [x29, [[LOCADDR]]]
; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR]]]
- %ret.1 = load i8* %locvar
+ %ret.1 = load i8, i8* %locvar
%ret.2 = zext i8 %ret.1 to i64
ret i64 %ret.2
; CHECK: ret
diff --git a/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
index c3167e4f4bdd..66a5ed6e5277 100644
--- a/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
+++ b/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
@@ -7,15 +7,15 @@ define void @test_w29_reserved() {
; CHECK-LABEL: test_w29_reserved:
; CHECK: add x29, sp, #{{[0-9]+}}
- %val1 = load volatile i32* @var
- %val2 = load volatile i32* @var
- %val3 = load volatile i32* @var
- %val4 = load volatile i32* @var
- %val5 = load volatile i32* @var
- %val6 = load volatile i32* @var
- %val7 = load volatile i32* @var
- %val8 = load volatile i32* @var
- %val9 = load volatile i32* @var
+ %val1 = load volatile i32, i32* @var
+ %val2 = load volatile i32, i32* @var
+ %val3 = load volatile i32, i32* @var
+ %val4 = load volatile i32, i32* @var
+ %val5 = load volatile i32, i32* @var
+ %val6 = load volatile i32, i32* @var
+ %val7 = load volatile i32, i32* @var
+ %val8 = load volatile i32, i32* @var
+ %val9 = load volatile i32, i32* @var
; CHECK-NOT: ldr w29,
diff --git a/test/CodeGen/AArch64/remat-float0.ll b/test/CodeGen/AArch64/remat-float0.ll
new file mode 100644
index 000000000000..29af7818cf38
--- /dev/null
+++ b/test/CodeGen/AArch64/remat-float0.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -verify-machineinstrs | FileCheck %s
+
+; Check that float 0 gets rematerialized with an fmov of zero reg instead
+; of spilled/filled.
+
+declare void @bar(float)
+
+define void @foo() {
+; CHECK-LABEL: foo:
+; CHECK: fmov s0, wzr
+; CHECK: bl bar
+; CHECK: fmov s0, wzr
+; CHECK: bl bar
+ call void @bar(float 0.000000e+00)
+ call void asm sideeffect "", "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"()
+ call void @bar(float 0.000000e+00)
+ ret void
+}
diff --git a/test/CodeGen/AArch64/remat.ll b/test/CodeGen/AArch64/remat.ll
index 32b3ed2d9ac4..8b3e6dd5ad92 100644
--- a/test/CodeGen/AArch64/remat.ll
+++ b/test/CodeGen/AArch64/remat.ll
@@ -1,5 +1,6 @@
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a57 -o - %s | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a53 -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a72 -o - %s | FileCheck %s
%X = type { i64, i64, i64 }
declare void @f(%X*)
diff --git a/test/CodeGen/AArch64/rm_redundant_cmp.ll b/test/CodeGen/AArch64/rm_redundant_cmp.ll
index 36dc118ed1a5..f66af7fd6270 100644
--- a/test/CodeGen/AArch64/rm_redundant_cmp.ll
+++ b/test/CodeGen/AArch64/rm_redundant_cmp.ll
@@ -15,13 +15,13 @@ define void @test_i16_2cmp_signed_1() {
; CHECK-NOT: cmp
; CHECK: b.ne
entry:
- %0 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2
- %1 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2
+ %0 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2
+ %1 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2
%cmp = icmp sgt i16 %0, %1
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
- store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
+ store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
br label %if.end8
if.else: ; preds = %entry
@@ -29,7 +29,7 @@ if.else: ; preds = %entry
br i1 %cmp5, label %if.then7, label %if.end8
if.then7: ; preds = %if.else
- store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
+ store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
br label %if.end8
if.end8: ; preds = %if.else, %if.then7, %if.then
@@ -43,13 +43,13 @@ define void @test_i16_2cmp_signed_2() {
; CHECK-NOT: cmp
; CHECK: b.ge
entry:
- %0 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2
- %1 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2
+ %0 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2
+ %1 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2
%cmp = icmp sgt i16 %0, %1
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
- store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
+ store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
br label %if.end8
if.else: ; preds = %entry
@@ -57,7 +57,7 @@ if.else: ; preds = %entry
br i1 %cmp5, label %if.then7, label %if.end8
if.then7: ; preds = %if.else
- store i16 %1, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
+ store i16 %1, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
br label %if.end8
if.end8: ; preds = %if.else, %if.then7, %if.then
@@ -71,13 +71,13 @@ define void @test_i16_2cmp_unsigned_1() {
; CHECK-NOT: cmp
; CHECK: b.ne
entry:
- %0 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2
- %1 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2
+ %0 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2
+ %1 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2
%cmp = icmp ugt i16 %0, %1
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
- store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
+ store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
br label %if.end8
if.else: ; preds = %entry
@@ -85,7 +85,7 @@ if.else: ; preds = %entry
br i1 %cmp5, label %if.then7, label %if.end8
if.then7: ; preds = %if.else
- store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
+ store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
br label %if.end8
if.end8: ; preds = %if.else, %if.then7, %if.then
@@ -99,13 +99,13 @@ define void @test_i16_2cmp_unsigned_2() {
; CHECK-NOT: cmp
; CHECK: b.hs
entry:
- %0 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2
- %1 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2
+ %0 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2
+ %1 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2
%cmp = icmp ugt i16 %0, %1
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
- store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
+ store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
br label %if.end8
if.else: ; preds = %entry
@@ -113,7 +113,7 @@ if.else: ; preds = %entry
br i1 %cmp5, label %if.then7, label %if.end8
if.then7: ; preds = %if.else
- store i16 %1, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
+ store i16 %1, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
br label %if.end8
if.end8: ; preds = %if.else, %if.then7, %if.then
@@ -136,13 +136,13 @@ define void @test_i8_2cmp_signed_1() {
; CHECK-NOT: cmp
; CHECK: b.ne
entry:
- %0 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2
- %1 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2
+ %0 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2
+ %1 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2
%cmp = icmp sgt i8 %0, %1
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
- store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
+ store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
br label %if.end8
if.else: ; preds = %entry
@@ -150,7 +150,7 @@ if.else: ; preds = %entry
br i1 %cmp5, label %if.then7, label %if.end8
if.then7: ; preds = %if.else
- store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
+ store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
br label %if.end8
if.end8: ; preds = %if.else, %if.then7, %if.then
@@ -164,13 +164,13 @@ define void @test_i8_2cmp_signed_2() {
; CHECK-NOT: cmp
; CHECK: b.ge
entry:
- %0 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2
- %1 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2
+ %0 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2
+ %1 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2
%cmp = icmp sgt i8 %0, %1
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
- store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
+ store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
br label %if.end8
if.else: ; preds = %entry
@@ -178,7 +178,7 @@ if.else: ; preds = %entry
br i1 %cmp5, label %if.then7, label %if.end8
if.then7: ; preds = %if.else
- store i8 %1, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
+ store i8 %1, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
br label %if.end8
if.end8: ; preds = %if.else, %if.then7, %if.then
@@ -192,13 +192,13 @@ define void @test_i8_2cmp_unsigned_1() {
; CHECK-NOT: cmp
; CHECK: b.ne
entry:
- %0 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2
- %1 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2
+ %0 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2
+ %1 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2
%cmp = icmp ugt i8 %0, %1
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
- store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
+ store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
br label %if.end8
if.else: ; preds = %entry
@@ -206,7 +206,7 @@ if.else: ; preds = %entry
br i1 %cmp5, label %if.then7, label %if.end8
if.then7: ; preds = %if.else
- store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
+ store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
br label %if.end8
if.end8: ; preds = %if.else, %if.then7, %if.then
@@ -220,13 +220,13 @@ define void @test_i8_2cmp_unsigned_2() {
; CHECK-NOT: cmp
; CHECK: b.hs
entry:
- %0 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2
- %1 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2
+ %0 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2
+ %1 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2
%cmp = icmp ugt i8 %0, %1
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
- store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
+ store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
br label %if.end8
if.else: ; preds = %entry
@@ -234,7 +234,7 @@ if.else: ; preds = %entry
br i1 %cmp5, label %if.then7, label %if.end8
if.then7: ; preds = %if.else
- store i8 %1, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
+ store i8 %1, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
br label %if.end8
if.end8: ; preds = %if.else, %if.then7, %if.then
diff --git a/test/CodeGen/AArch64/sibling-call.ll b/test/CodeGen/AArch64/sibling-call.ll
index 34e3bb410e8c..a68fdec4cfbc 100644
--- a/test/CodeGen/AArch64/sibling-call.ll
+++ b/test/CodeGen/AArch64/sibling-call.ll
@@ -75,8 +75,8 @@ define void @caller_to16_from16([8 x i32], i64 %a, i64 %b) {
; CHECK: ldr [[VAL0:x[0-9]+]],
; CHECK: ldr [[VAL1:x[0-9]+]],
-; CHECK: str [[VAL1]],
; CHECK: str [[VAL0]],
+; CHECK: str [[VAL1]],
; CHECK-NOT: add sp, sp,
; CHECK: b callee_stack16
@@ -88,7 +88,7 @@ define void @indirect_tail() {
; CHECK-LABEL: indirect_tail:
; CHECK-NOT: sub sp, sp
- %fptr = load void(i32)** @func
+ %fptr = load void(i32)*, void(i32)** @func
tail call void %fptr(i32 42)
ret void
; CHECK: ldr [[FPTR:x[1-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:func]
diff --git a/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll b/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll
index bedbf5facbb7..eb4937e75f61 100644
--- a/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll
+++ b/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll
@@ -15,7 +15,7 @@
define i32 @test_stack_guard_remat2() {
entry:
%StackGuardSlot = alloca i8*
- %StackGuard = load i8** bitcast (i64** @__stack_chk_guard to i8**)
+ %StackGuard = load i8*, i8** bitcast (i64** @__stack_chk_guard to i8**)
call void @llvm.stackprotector(i8* %StackGuard, i8** %StackGuardSlot)
%container = alloca [32 x i8], align 1
call void @llvm.stackprotectorcheck(i8** bitcast (i64** @__stack_chk_guard to i8**))
diff --git a/test/CodeGen/AArch64/stack_guard_remat.ll b/test/CodeGen/AArch64/stack_guard_remat.ll
index cee7266abdcc..d6bae62e5edc 100644
--- a/test/CodeGen/AArch64/stack_guard_remat.ll
+++ b/test/CodeGen/AArch64/stack_guard_remat.ll
@@ -30,7 +30,7 @@ entry:
%a1 = alloca [256 x i32], align 4
%0 = bitcast [256 x i32]* %a1 to i8*
call void @llvm.lifetime.start(i64 1024, i8* %0)
- %arraydecay = getelementptr inbounds [256 x i32]* %a1, i64 0, i64 0
+ %arraydecay = getelementptr inbounds [256 x i32], [256 x i32]* %a1, i64 0, i64 0
call void @foo3(i32* %arraydecay)
call void asm sideeffect "foo2", "~{w0},~{w1},~{w2},~{w3},~{w4},~{w5},~{w6},~{w7},~{w8},~{w9},~{w10},~{w11},~{w12},~{w13},~{w14},~{w15},~{w16},~{w17},~{w18},~{w19},~{w20},~{w21},~{w22},~{w23},~{w24},~{w25},~{w26},~{w27},~{w28},~{w29},~{w30}"()
call void @llvm.lifetime.end(i64 1024, i8* %0)
diff --git a/test/CodeGen/AArch64/stackmap-liveness.ll b/test/CodeGen/AArch64/stackmap-liveness.ll
new file mode 100644
index 000000000000..6b37aac16f9e
--- /dev/null
+++ b/test/CodeGen/AArch64/stackmap-liveness.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=aarch64-apple-darwin | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: .section __LLVM_STACKMAPS,__llvm_stackmaps
+; CHECK-NEXT: __LLVM_StackMaps:
+; Header
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .short 0
+; Num Functions
+; CHECK-NEXT: .long 1
+; Num LargeConstants
+; CHECK-NEXT: .long 0
+; Num Callsites
+; CHECK-NEXT: .long 1
+
+; Functions and stack size
+; CHECK-NEXT: .quad _stackmap_liveness
+; CHECK-NEXT: .quad 16
+
+; Test that the return register is recognized as an live-out.
+define i64 @stackmap_liveness(i1 %c) {
+; CHECK-LABEL: .long L{{.*}}-_stackmap_liveness
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .short 0
+; Padding
+; CHECK-NEXT: .short 0
+; Num LiveOut Entries: 1
+; CHECK-NEXT: .short 2
+; LiveOut Entry 0: X0
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 8
+; LiveOut Entry 1: SP
+; CHECK-NEXT: .short 31
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 8
+; Align
+; CHECK-NEXT: .align 3
+ %1 = select i1 %c, i64 1, i64 2
+ call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 32, i8* null, i32 0)
+ ret i64 %1
+}
+
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
+
diff --git a/test/CodeGen/AArch64/tail-call.ll b/test/CodeGen/AArch64/tail-call.ll
index 7fb39545a32e..e5766154bb46 100644
--- a/test/CodeGen/AArch64/tail-call.ll
+++ b/test/CodeGen/AArch64/tail-call.ll
@@ -103,3 +103,41 @@ define fastcc void @caller_weak() {
tail call void @callee_weak()
ret void
}
+
+declare { [2 x float] } @get_vec2()
+
+define { [3 x float] } @test_add_elem() {
+; CHECK-LABEL: test_add_elem:
+; CHECK: bl get_vec2
+; CHECK: fmov s2, #1.0
+; CHECK: ret
+
+ %call = tail call { [2 x float] } @get_vec2()
+ %arr = extractvalue { [2 x float] } %call, 0
+ %arr.0 = extractvalue [2 x float] %arr, 0
+ %arr.1 = extractvalue [2 x float] %arr, 1
+
+ %res.0 = insertvalue { [3 x float] } undef, float %arr.0, 0, 0
+ %res.01 = insertvalue { [3 x float] } %res.0, float %arr.1, 0, 1
+ %res.012 = insertvalue { [3 x float] } %res.01, float 1.000000e+00, 0, 2
+ ret { [3 x float] } %res.012
+}
+
+declare double @get_double()
+define { double, [2 x double] } @test_mismatched_insert() {
+; CHECK-LABEL: test_mismatched_insert:
+; CHECK: bl get_double
+; CHECK: bl get_double
+; CHECK: bl get_double
+; CHECK: ret
+
+ %val0 = call double @get_double()
+ %val1 = call double @get_double()
+ %val2 = tail call double @get_double()
+
+ %res.0 = insertvalue { double, [2 x double] } undef, double %val0, 0
+ %res.01 = insertvalue { double, [2 x double] } %res.0, double %val1, 1, 0
+ %res.012 = insertvalue { double, [2 x double] } %res.01, double %val2, 1, 1
+
+ ret { double, [2 x double] } %res.012
+}
diff --git a/test/CodeGen/AArch64/tailcall-explicit-sret.ll b/test/CodeGen/AArch64/tailcall-explicit-sret.ll
index f4ad65584095..4d80f2ac5c12 100644
--- a/test/CodeGen/AArch64/tailcall-explicit-sret.ll
+++ b/test/CodeGen/AArch64/tailcall-explicit-sret.ll
@@ -42,7 +42,7 @@ define void @test_tailcall_explicit_sret_alloca_unused() #0 {
; CHECK: ret
define void @test_tailcall_explicit_sret_alloca_dummyusers(i1024* %ptr) #0 {
%l = alloca i1024, align 8
- %r = load i1024* %ptr, align 8
+ %r = load i1024, i1024* %ptr, align 8
store i1024 %r, i1024* %l, align 8
tail call void @test_explicit_sret(i1024* %l)
ret void
@@ -55,7 +55,7 @@ define void @test_tailcall_explicit_sret_alloca_dummyusers(i1024* %ptr) #0 {
; CHECK-NEXT: bl _test_explicit_sret
; CHECK: ret
define void @test_tailcall_explicit_sret_gep(i1024* %ptr) #0 {
- %ptr2 = getelementptr i1024* %ptr, i32 1
+ %ptr2 = getelementptr i1024, i1024* %ptr, i32 1
tail call void @test_explicit_sret(i1024* %ptr2)
ret void
}
@@ -70,7 +70,7 @@ define void @test_tailcall_explicit_sret_gep(i1024* %ptr) #0 {
define i1024 @test_tailcall_explicit_sret_alloca_returned() #0 {
%l = alloca i1024, align 8
tail call void @test_explicit_sret(i1024* %l)
- %r = load i1024* %l, align 8
+ %r = load i1024, i1024* %l, align 8
ret i1024 %r
}
@@ -85,7 +85,7 @@ define i1024 @test_tailcall_explicit_sret_alloca_returned() #0 {
define void @test_indirect_tailcall_explicit_sret_nosret_arg(i1024* sret %arg, void (i1024*)* %f) #0 {
%l = alloca i1024, align 8
tail call void %f(i1024* %l)
- %r = load i1024* %l, align 8
+ %r = load i1024, i1024* %l, align 8
store i1024 %r, i1024* %arg, align 8
ret void
}
diff --git a/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll b/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll
new file mode 100644
index 000000000000..b970fb124151
--- /dev/null
+++ b/test/CodeGen/AArch64/tailcall-mem-intrinsics.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s
+
+; CHECK-LABEL: tail_memcpy:
+; CHECK: b memcpy
+define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
+entry:
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
+ ret void
+}
+
+; CHECK-LABEL: tail_memmove:
+; CHECK: b memmove
+define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
+entry:
+ tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
+ ret void
+}
+
+; CHECK-LABEL: tail_memset:
+; CHECK: b memset
+define void @tail_memset(i8* nocapture %p, i8 %c, i32 %n) #0 {
+entry:
+ tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/tailcall_misched_graph.ll b/test/CodeGen/AArch64/tailcall_misched_graph.ll
new file mode 100644
index 000000000000..343ffab57e35
--- /dev/null
+++ b/test/CodeGen/AArch64/tailcall_misched_graph.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mcpu=cyclone -debug-only=misched < %s 2>&1 | FileCheck %s
+
+; REQUIRES: asserts
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios7.0.0"
+
+define void @caller2(i8* %a0, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9) {
+entry:
+ tail call void @callee2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a0)
+ ret void
+}
+
+declare void @callee2(i8*, i8*, i8*, i8*, i8*,
+ i8*, i8*, i8*, i8*, i8*)
+
+; Make sure there is a dependence between the load and store to the same stack
+; location during a tail call. Tail calls clobber the incoming argument area and
+; therefore it is not safe to assume argument locations are invariant.
+; PR23459 has a test case that we where miscompiling because of this at the
+; time.
+
+; CHECK: Frame Objects
+; CHECK: fi#-4: {{.*}} fixed, at location [SP+8]
+; CHECK: fi#-3: {{.*}} fixed, at location [SP]
+; CHECK: fi#-2: {{.*}} fixed, at location [SP+8]
+; CHECK: fi#-1: {{.*}} fixed, at location [SP]
+
+; CHECK: [[VRA:%vreg.*]]<def> = LDRXui <fi#-1>
+; CHECK: [[VRB:%vreg.*]]<def> = LDRXui <fi#-2>
+; CHECK: STRXui %vreg{{.*}}, <fi#-4>
+; CHECK: STRXui [[VRB]], <fi#-3>
+
+; Make sure that there is an dependence edge between fi#-2 and fi#-4.
+; Without this edge the scheduler would be free to move the store accross the load.
+
+; CHECK: SU({{.*}}): [[VRB]]<def> = LDRXui <fi#-2>
+; CHECK-NOT: SU
+; CHECK: Successors:
+; CHECK: ch SU([[DEPSTORE:.*]]): Latency=0
+
+; CHECK: SU([[DEPSTORE]]): STRXui %vreg0, <fi#-4>
diff --git a/test/CodeGen/AArch64/tbz-tbnz.ll b/test/CodeGen/AArch64/tbz-tbnz.ll
index c77043cae94f..8863f70444d1 100644
--- a/test/CodeGen/AArch64/tbz-tbnz.ll
+++ b/test/CodeGen/AArch64/tbz-tbnz.ll
@@ -211,7 +211,7 @@ define void @test11(i64 %val1, i64* %ptr) {
; CHECK-NOT: cmp
; CHECK: tbz [[CMP]], #63
- %val = load i64* %ptr
+ %val = load i64, i64* %ptr
%tst = icmp slt i64 %val, 0
br i1 %tst, label %if.then, label %if.end
diff --git a/test/CodeGen/AArch64/tst-br.ll b/test/CodeGen/AArch64/tst-br.ll
index 5dc7b5df475a..345c4d9ba95a 100644
--- a/test/CodeGen/AArch64/tst-br.ll
+++ b/test/CodeGen/AArch64/tst-br.ll
@@ -9,8 +9,8 @@
define i32 @test_tbz() {
; CHECK-LABEL: test_tbz:
- %val = load i32* @var32
- %val64 = load i64* @var64
+ %val = load i32, i32* @var32
+ %val64 = load i64, i64* @var64
%tbit0 = and i32 %val, 32768
%tst0 = icmp ne i32 %tbit0, 0
diff --git a/test/CodeGen/AArch64/vcvt-oversize.ll b/test/CodeGen/AArch64/vcvt-oversize.ll
new file mode 100644
index 000000000000..066a4b666204
--- /dev/null
+++ b/test/CodeGen/AArch64/vcvt-oversize.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+define <8 x i8> @float_to_i8(<8 x float>* %in) {
+; CHECK-LABEL: float_to_i8:
+; CHECK-DAG: fadd v[[LSB:[0-9]+]].4s, v0.4s, v0.4s
+; CHECK-DAG: fadd v[[MSB:[0-9]+]].4s, v1.4s, v1.4s
+; CHECK-DAG: fcvtzu v[[LSB2:[0-9]+]].4s, v[[LSB]].4s
+; CHECK-DAG: fcvtzu v[[MSB2:[0-9]+]].4s, v[[MSB]].4s
+; CHECK-DAG: xtn v[[TMP:[0-9]+]].4h, v[[LSB]].4s
+; CHECK-DAG: xtn2 v[[TMP]].8h, v[[MSB]].4s
+; CHECK-DAG: xtn v0.8b, v[[TMP]].8h
+ %l = load <8 x float>, <8 x float>* %in
+ %scale = fmul <8 x float> %l, <float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0>
+ %conv = fptoui <8 x float> %scale to <8 x i8>
+ ret <8 x i8> %conv
+}
diff --git a/test/CodeGen/AArch64/zero-reg.ll b/test/CodeGen/AArch64/zero-reg.ll
index bc112ab8db98..62b2ea34ade1 100644
--- a/test/CodeGen/AArch64/zero-reg.ll
+++ b/test/CodeGen/AArch64/zero-reg.ll
@@ -21,7 +21,7 @@ define void @test_sp(i32 %val) {
; Important correctness point here is that LLVM doesn't try to use xzr
; as an addressing register: "str w0, [xzr]" is not a valid A64
; instruction (0b11111 in the Rn field would mean "sp").
- %addr = getelementptr i32* null, i64 0
+ %addr = getelementptr i32, i32* null, i64 0
store i32 %val, i32* %addr
; CHECK: str {{w[0-9]+}}, [{{x[0-9]+|sp}}]
diff --git a/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll b/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
index f8bd886aa9e1..b719f9f4d235 100644
--- a/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
+++ b/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
@@ -11,9 +11,9 @@ bb169.i: ; preds = %entry
ret void
cond_true11: ; preds = %entry
- %tmp.i32 = load %struct.layer_data** @ld ; <%struct.layer_data*> [#uses=2]
- %tmp3.i35 = getelementptr %struct.layer_data* %tmp.i32, i32 0, i32 1, i32 2048; <i8*> [#uses=2]
- %tmp.i36 = getelementptr %struct.layer_data* %tmp.i32, i32 0, i32 2 ; <i8**> [#uses=1]
+ %tmp.i32 = load %struct.layer_data*, %struct.layer_data** @ld ; <%struct.layer_data*> [#uses=2]
+ %tmp3.i35 = getelementptr %struct.layer_data, %struct.layer_data* %tmp.i32, i32 0, i32 1, i32 2048; <i8*> [#uses=2]
+ %tmp.i36 = getelementptr %struct.layer_data, %struct.layer_data* %tmp.i32, i32 0, i32 2 ; <i8**> [#uses=1]
store i8* %tmp3.i35, i8** %tmp.i36
store i8* %tmp3.i35, i8** null
ret void
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index e7c0129a7752..4ba81e01ad31 100644
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -1,5 +1,9 @@
; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6,+vfp2 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6,+vfp2 | FileCheck --check-prefix=DOMAIN %s
+; The execution domain checking code would translate vmovs to vorr whether or not
+; we had NEON instructions. Verify we don't if we're not compiled with NEON.
+; DOMAIN-NOT: vorr
@quant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1]
@dequant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1]
@A = external global [4 x [4 x i32]] ; <[4 x [4 x i32]]*> [#uses=1]
@@ -15,15 +19,15 @@ entry:
br label %cond_next489
cond_next489: ; preds = %cond_false, %bb471
- %j.7.in = load i8* null ; <i8> [#uses=1]
- %i.8.in = load i8* null ; <i8> [#uses=1]
+ %j.7.in = load i8, i8* null ; <i8> [#uses=1]
+ %i.8.in = load i8, i8* null ; <i8> [#uses=1]
%i.8 = zext i8 %i.8.in to i32 ; <i32> [#uses=4]
%j.7 = zext i8 %j.7.in to i32 ; <i32> [#uses=4]
- %tmp495 = getelementptr [4 x [4 x i32]]* %predicted_block, i32 0, i32 %i.8, i32 %j.7 ; <i32*> [#uses=2]
- %tmp496 = load i32* %tmp495 ; <i32> [#uses=2]
- %tmp502 = load i32* null ; <i32> [#uses=1]
- %tmp542 = getelementptr [6 x [4 x [4 x i32]]]* @quant_coef, i32 0, i32 0, i32 %i.8, i32 %j.7 ; <i32*> [#uses=1]
- %tmp543 = load i32* %tmp542 ; <i32> [#uses=1]
+ %tmp495 = getelementptr [4 x [4 x i32]], [4 x [4 x i32]]* %predicted_block, i32 0, i32 %i.8, i32 %j.7 ; <i32*> [#uses=2]
+ %tmp496 = load i32, i32* %tmp495 ; <i32> [#uses=2]
+ %tmp502 = load i32, i32* null ; <i32> [#uses=1]
+ %tmp542 = getelementptr [6 x [4 x [4 x i32]]], [6 x [4 x [4 x i32]]]* @quant_coef, i32 0, i32 0, i32 %i.8, i32 %j.7 ; <i32*> [#uses=1]
+ %tmp543 = load i32, i32* %tmp542 ; <i32> [#uses=1]
%tmp548 = ashr i32 0, 0 ; <i32> [#uses=3]
%tmp561 = sub i32 0, %tmp496 ; <i32> [#uses=3]
%abscond563 = icmp sgt i32 %tmp561, -1 ; <i1> [#uses=1]
@@ -35,10 +39,10 @@ cond_next489: ; preds = %cond_false, %bb471
br i1 %tmp579, label %bb712, label %cond_next589
cond_next589: ; preds = %cond_next489
- %tmp605 = getelementptr [6 x [4 x [4 x i32]]]* @dequant_coef, i32 0, i32 0, i32 %i.8, i32 %j.7 ; <i32*> [#uses=1]
- %tmp606 = load i32* %tmp605 ; <i32> [#uses=1]
- %tmp612 = load i32* null ; <i32> [#uses=1]
- %tmp629 = load i32* null ; <i32> [#uses=1]
+ %tmp605 = getelementptr [6 x [4 x [4 x i32]]], [6 x [4 x [4 x i32]]]* @dequant_coef, i32 0, i32 0, i32 %i.8, i32 %j.7 ; <i32*> [#uses=1]
+ %tmp606 = load i32, i32* %tmp605 ; <i32> [#uses=1]
+ %tmp612 = load i32, i32* null ; <i32> [#uses=1]
+ %tmp629 = load i32, i32* null ; <i32> [#uses=1]
%tmp629a = sitofp i32 %tmp629 to double ; <double> [#uses=1]
%tmp631 = fmul double %tmp629a, 0.000000e+00 ; <double> [#uses=1]
%tmp632 = fadd double 0.000000e+00, %tmp631 ; <double> [#uses=1]
@@ -85,9 +89,9 @@ bb737: ; preds = %cond_false689
cond_true740: ; preds = %bb737
%tmp761 = call fastcc i32 @sign( i32 %tmp576, i32 0 ) ; <i32> [#uses=1]
- %tmp780 = load i32* null ; <i32> [#uses=1]
- %tmp785 = getelementptr [4 x [4 x i32]]* @A, i32 0, i32 %i.8, i32 %j.7 ; <i32*> [#uses=1]
- %tmp786 = load i32* %tmp785 ; <i32> [#uses=1]
+ %tmp780 = load i32, i32* null ; <i32> [#uses=1]
+ %tmp785 = getelementptr [4 x [4 x i32]], [4 x [4 x i32]]* @A, i32 0, i32 %i.8, i32 %j.7 ; <i32*> [#uses=1]
+ %tmp786 = load i32, i32* %tmp785 ; <i32> [#uses=1]
%tmp781 = mul i32 %tmp780, %tmp761 ; <i32> [#uses=1]
%tmp787 = mul i32 %tmp781, %tmp786 ; <i32> [#uses=1]
%tmp789 = shl i32 %tmp787, 0 ; <i32> [#uses=1]
@@ -96,7 +100,7 @@ cond_true740: ; preds = %bb737
cond_next791: ; preds = %cond_true740, %bb737
%ilev.1 = phi i32 [ %tmp790, %cond_true740 ], [ 0, %bb737 ] ; <i32> [#uses=1]
- %tmp796 = load i32* %tmp495 ; <i32> [#uses=1]
+ %tmp796 = load i32, i32* %tmp495 ; <i32> [#uses=1]
%tmp798 = add i32 %tmp796, %ilev.1 ; <i32> [#uses=1]
%tmp812 = mul i32 0, %tmp502 ; <i32> [#uses=0]
%tmp818 = call fastcc i32 @sign( i32 0, i32 %tmp798 ) ; <i32> [#uses=0]
diff --git a/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll b/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
index 83b26d340062..472a345a0d71 100644
--- a/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
+++ b/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
@@ -4,10 +4,10 @@ define fastcc i8* @read_sleb128(i8* %p, i32* %val) {
br label %bb
bb: ; preds = %bb, %0
- %p_addr.0 = getelementptr i8* %p, i32 0 ; <i8*> [#uses=1]
- %tmp2 = load i8* %p_addr.0 ; <i8> [#uses=2]
+ %p_addr.0 = getelementptr i8, i8* %p, i32 0 ; <i8*> [#uses=1]
+ %tmp2 = load i8, i8* %p_addr.0 ; <i8> [#uses=2]
%tmp4.rec = add i32 0, 1 ; <i32> [#uses=1]
- %tmp4 = getelementptr i8* %p, i32 %tmp4.rec ; <i8*> [#uses=1]
+ %tmp4 = getelementptr i8, i8* %p, i32 %tmp4.rec ; <i8*> [#uses=1]
%tmp56 = zext i8 %tmp2 to i32 ; <i32> [#uses=1]
%tmp7 = and i32 %tmp56, 127 ; <i32> [#uses=1]
%tmp9 = shl i32 %tmp7, 0 ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/2007-03-13-InstrSched.ll b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
index 4783f3707690..9c0143be06c3 100644
--- a/test/CodeGen/ARM/2007-03-13-InstrSched.ll
+++ b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
@@ -23,30 +23,30 @@ bb74: ; preds = %bb26, %newFuncRoot
%d1.1 = phi i32 [ %tmp54, %bb26 ], [ 8192, %newFuncRoot ] ; <i32> [#uses=2]
%d2.1 = phi i32 [ %tmp64, %bb26 ], [ 8192, %newFuncRoot ] ; <i32> [#uses=2]
%d3.1 = phi i32 [ %tmp69, %bb26 ], [ 8192, %newFuncRoot ] ; <i32> [#uses=2]
- %fm.1 = load i32* %fm.1.in ; <i32> [#uses=4]
+ %fm.1 = load i32, i32* %fm.1.in ; <i32> [#uses=4]
icmp eq i32 %fp.1.rec, %tmp8 ; <i1>:0 [#uses=1]
br i1 %0, label %bb78.exitStub, label %bb26
bb26: ; preds = %bb74
- %tmp28 = getelementptr i32** %tmp1, i32 %fp.1.rec ; <i32**> [#uses=1]
- %tmp30 = load i32** %tmp28 ; <i32*> [#uses=4]
- %tmp33 = getelementptr i32* %tmp30, i32 %i.0196.0.ph ; <i32*> [#uses=1]
- %tmp34 = load i32* %tmp33 ; <i32> [#uses=1]
- %tmp38 = getelementptr i32* %tmp30, i32 %tmp36224 ; <i32*> [#uses=1]
- %tmp39 = load i32* %tmp38 ; <i32> [#uses=1]
+ %tmp28 = getelementptr i32*, i32** %tmp1, i32 %fp.1.rec ; <i32**> [#uses=1]
+ %tmp30 = load i32*, i32** %tmp28 ; <i32*> [#uses=4]
+ %tmp33 = getelementptr i32, i32* %tmp30, i32 %i.0196.0.ph ; <i32*> [#uses=1]
+ %tmp34 = load i32, i32* %tmp33 ; <i32> [#uses=1]
+ %tmp38 = getelementptr i32, i32* %tmp30, i32 %tmp36224 ; <i32*> [#uses=1]
+ %tmp39 = load i32, i32* %tmp38 ; <i32> [#uses=1]
%tmp42 = mul i32 %tmp34, %fm.1 ; <i32> [#uses=1]
%tmp44 = add i32 %tmp42, %d0.1 ; <i32> [#uses=1]
- %tmp48 = getelementptr i32* %tmp30, i32 %tmp46223 ; <i32*> [#uses=1]
- %tmp49 = load i32* %tmp48 ; <i32> [#uses=1]
+ %tmp48 = getelementptr i32, i32* %tmp30, i32 %tmp46223 ; <i32*> [#uses=1]
+ %tmp49 = load i32, i32* %tmp48 ; <i32> [#uses=1]
%tmp52 = mul i32 %tmp39, %fm.1 ; <i32> [#uses=1]
%tmp54 = add i32 %tmp52, %d1.1 ; <i32> [#uses=1]
- %tmp58 = getelementptr i32* %tmp30, i32 %tmp56222 ; <i32*> [#uses=1]
- %tmp59 = load i32* %tmp58 ; <i32> [#uses=1]
+ %tmp58 = getelementptr i32, i32* %tmp30, i32 %tmp56222 ; <i32*> [#uses=1]
+ %tmp59 = load i32, i32* %tmp58 ; <i32> [#uses=1]
%tmp62 = mul i32 %tmp49, %fm.1 ; <i32> [#uses=1]
%tmp64 = add i32 %tmp62, %d2.1 ; <i32> [#uses=1]
%tmp67 = mul i32 %tmp59, %fm.1 ; <i32> [#uses=1]
%tmp69 = add i32 %tmp67, %d3.1 ; <i32> [#uses=1]
%tmp71.rec = add i32 %fp.1.rec, 1 ; <i32> [#uses=2]
- %tmp71 = getelementptr i32* %tmp1011, i32 %tmp71.rec ; <i32*> [#uses=1]
+ %tmp71 = getelementptr i32, i32* %tmp1011, i32 %tmp71.rec ; <i32*> [#uses=1]
br label %bb74
}
diff --git a/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll b/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
index b0953dc8b61f..7c425961958c 100644
--- a/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
+++ b/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
@@ -41,7 +41,7 @@ entry:
%spec.1961.adj.ins = or i64 %spec.1961.adj, 0 ; <i64> [#uses=2]
%tmp10959 = lshr i64 %spec.1961.adj.ins, 32 ; <i64> [#uses=2]
%tmp1920 = inttoptr i64 %tmp10959 to %struct.tree_common* ; <%struct.tree_common*> [#uses=1]
- %tmp21 = getelementptr %struct.tree_common* %tmp1920, i32 0, i32 3 ; <i8*> [#uses=1]
+ %tmp21 = getelementptr %struct.tree_common, %struct.tree_common* %tmp1920, i32 0, i32 3 ; <i8*> [#uses=1]
%tmp2122 = bitcast i8* %tmp21 to i32* ; <i32*> [#uses=1]
br i1 false, label %cond_next53, label %cond_true
@@ -71,7 +71,7 @@ cond_next856: ; preds = %cond_true851
ret void
bb866: ; preds = %cond_true851
- %tmp874 = load i32* %tmp2122 ; <i32> [#uses=1]
+ %tmp874 = load i32, i32* %tmp2122 ; <i32> [#uses=1]
%tmp876877 = trunc i32 %tmp874 to i8 ; <i8> [#uses=1]
icmp eq i8 %tmp876877, 1 ; <i1>:0 [#uses=1]
br i1 %0, label %cond_next881, label %cond_true878
@@ -81,9 +81,9 @@ cond_true878: ; preds = %bb866
cond_next881: ; preds = %bb866
%tmp884885 = inttoptr i64 %tmp10959 to %struct.tree_identifier* ; <%struct.tree_identifier*> [#uses=1]
- %tmp887 = getelementptr %struct.tree_identifier* %tmp884885, i32 0, i32 1, i32 0 ; <i8**> [#uses=1]
- %tmp888 = load i8** %tmp887 ; <i8*> [#uses=1]
- tail call void (i32, ...)* @error( i32 undef, i8* %tmp888 )
+ %tmp887 = getelementptr %struct.tree_identifier, %struct.tree_identifier* %tmp884885, i32 0, i32 1, i32 0 ; <i8**> [#uses=1]
+ %tmp888 = load i8*, i8** %tmp887 ; <i8*> [#uses=1]
+ tail call void (i32, ...) @error( i32 undef, i8* %tmp888 )
ret void
cond_true918: ; preds = %cond_false841
diff --git a/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
index e4635f50279d..2a0ef770f3bc 100644
--- a/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
@@ -13,7 +13,7 @@ cond_true340: ; preds = %entry
ret void
cond_next416: ; preds = %entry
- %tmp1085 = load %struct.rtx_def** %ad_addr ; <%struct.rtx_def*> [#uses=1]
+ %tmp1085 = load %struct.rtx_def*, %struct.rtx_def** %ad_addr ; <%struct.rtx_def*> [#uses=1]
br i1 false, label %bb1084, label %cond_true418
cond_true418: ; preds = %cond_next416
@@ -23,9 +23,9 @@ bb1084: ; preds = %cond_next416
br i1 false, label %cond_true1092, label %cond_next1102
cond_true1092: ; preds = %bb1084
- %tmp1094 = getelementptr %struct.rtx_def* %tmp1085, i32 0, i32 3 ; <%struct.u*> [#uses=1]
+ %tmp1094 = getelementptr %struct.rtx_def, %struct.rtx_def* %tmp1085, i32 0, i32 3 ; <%struct.u*> [#uses=1]
%tmp10981099 = bitcast %struct.u* %tmp1094 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=2]
- %tmp1101 = load %struct.rtx_def** %tmp10981099 ; <%struct.rtx_def*> [#uses=1]
+ %tmp1101 = load %struct.rtx_def*, %struct.rtx_def** %tmp10981099 ; <%struct.rtx_def*> [#uses=1]
store %struct.rtx_def* %tmp1101, %struct.rtx_def** %ad_addr
br label %cond_next1102
diff --git a/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
index f24def31f97a..7b74e6ce948f 100644
--- a/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
@@ -28,7 +28,7 @@ entry:
br i1 false, label %bb.preheader, label %return
bb.preheader: ; preds = %entry
- %tbl.014.us = load i32* null ; <i32> [#uses=1]
+ %tbl.014.us = load i32, i32* null ; <i32> [#uses=1]
br i1 false, label %cond_next.us, label %bb
cond_next51.us: ; preds = %cond_next.us, %cond_true33.us.cond_true46.us_crit_edge
@@ -40,8 +40,8 @@ cond_true33.us.cond_true46.us_crit_edge: ; preds = %cond_next.us
br label %cond_next51.us
cond_next.us: ; preds = %bb.preheader
- %tmp37.us = getelementptr %struct.X_Y* %cinfo, i32 0, i32 17, i32 %tbl.014.us ; <%struct.H_TBL**> [#uses=3]
- %tmp4524.us = load %struct.H_TBL** %tmp37.us ; <%struct.H_TBL*> [#uses=1]
+ %tmp37.us = getelementptr %struct.X_Y, %struct.X_Y* %cinfo, i32 0, i32 17, i32 %tbl.014.us ; <%struct.H_TBL**> [#uses=3]
+ %tmp4524.us = load %struct.H_TBL*, %struct.H_TBL** %tmp37.us ; <%struct.H_TBL*> [#uses=1]
icmp eq %struct.H_TBL* %tmp4524.us, null ; <i1>:0 [#uses=1]
br i1 %0, label %cond_true33.us.cond_true46.us_crit_edge, label %cond_next51.us
diff --git a/test/CodeGen/ARM/2007-04-03-PEIBug.ll b/test/CodeGen/ARM/2007-04-03-PEIBug.ll
index cf5094fb3800..87863bd3ec15 100644
--- a/test/CodeGen/ARM/2007-04-03-PEIBug.ll
+++ b/test/CodeGen/ARM/2007-04-03-PEIBug.ll
@@ -5,7 +5,7 @@ entry:
%A = alloca [1123 x i32], align 16 ; <[1123 x i32]*> [#uses=1]
%B = alloca [3123 x i32], align 16 ; <[3123 x i32]*> [#uses=1]
%C = alloca [12312 x i32], align 16 ; <[12312 x i32]*> [#uses=1]
- %tmp = call i32 (...)* @bar( [3123 x i32]* %B, [1123 x i32]* %A, [12312 x i32]* %C ) ; <i32> [#uses=0]
+ %tmp = call i32 (...) @bar( [3123 x i32]* %B, [1123 x i32]* %A, [12312 x i32]* %C ) ; <i32> [#uses=0]
ret i32 undef
}
diff --git a/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll b/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
index e001cde8351b..11f3003e05b5 100644
--- a/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
+++ b/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
@@ -8,9 +8,9 @@
define internal void @_ZN1B1iEv(%struct.B* %this) {
entry:
- %tmp1 = getelementptr %struct.B* %this, i32 0, i32 0 ; <i32*> [#uses=1]
- %tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
- %tmp4 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @str, i32 0, i32 0), i32 %tmp2 ) ; <i32> [#uses=0]
+ %tmp1 = getelementptr %struct.B, %struct.B* %this, i32 0, i32 0 ; <i32*> [#uses=1]
+ %tmp2 = load i32, i32* %tmp1 ; <i32> [#uses=1]
+ %tmp4 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([7 x i8], [7 x i8]* @str, i32 0, i32 0), i32 %tmp2 ) ; <i32> [#uses=0]
ret void
}
@@ -18,9 +18,9 @@ declare i32 @printf(i8*, ...)
define internal void @_ZN1B1jEv(%struct.B* %this) {
entry:
- %tmp1 = getelementptr %struct.B* %this, i32 0, i32 0 ; <i32*> [#uses=1]
- %tmp2 = load i32* %tmp1 ; <i32> [#uses=1]
- %tmp4 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @str1, i32 0, i32 0), i32 %tmp2 ) ; <i32> [#uses=0]
+ %tmp1 = getelementptr %struct.B, %struct.B* %this, i32 0, i32 0 ; <i32*> [#uses=1]
+ %tmp2 = load i32, i32* %tmp1 ; <i32> [#uses=1]
+ %tmp4 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([7 x i8], [7 x i8]* @str1, i32 0, i32 0), i32 %tmp2 ) ; <i32> [#uses=0]
ret void
}
@@ -29,70 +29,70 @@ entry:
%b.i29 = alloca %struct.B, align 4 ; <%struct.B*> [#uses=3]
%b.i1 = alloca %struct.B, align 4 ; <%struct.B*> [#uses=3]
%b.i = alloca %struct.B, align 4 ; <%struct.B*> [#uses=3]
- %tmp2.i = getelementptr %struct.B* %b.i, i32 0, i32 0 ; <i32*> [#uses=1]
+ %tmp2.i = getelementptr %struct.B, %struct.B* %b.i, i32 0, i32 0 ; <i32*> [#uses=1]
store i32 4, i32* %tmp2.i
br i1 icmp eq (i64 and (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 4294967296), i64 0), label %_Z3fooiM1BFvvE.exit, label %cond_true.i
cond_true.i: ; preds = %entry
%b2.i = bitcast %struct.B* %b.i to i8* ; <i8*> [#uses=1]
- %ctg23.i = getelementptr i8* %b2.i, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1) ; <i8*> [#uses=1]
+ %ctg23.i = getelementptr i8, i8* %b2.i, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1) ; <i8*> [#uses=1]
%tmp121314.i = bitcast i8* %ctg23.i to i32 (...)*** ; <i32 (...)***> [#uses=1]
- %tmp15.i = load i32 (...)*** %tmp121314.i ; <i32 (...)**> [#uses=1]
+ %tmp15.i = load i32 (...)**, i32 (...)*** %tmp121314.i ; <i32 (...)**> [#uses=1]
%tmp151.i = bitcast i32 (...)** %tmp15.i to i8* ; <i8*> [#uses=1]
- %ctg2.i = getelementptr i8* %tmp151.i, i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) ; <i8*> [#uses=1]
+ %ctg2.i = getelementptr i8, i8* %tmp151.i, i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) ; <i8*> [#uses=1]
%tmp2021.i = bitcast i8* %ctg2.i to i32 (...)** ; <i32 (...)**> [#uses=1]
- %tmp22.i = load i32 (...)** %tmp2021.i ; <i32 (...)*> [#uses=1]
+ %tmp22.i = load i32 (...)*, i32 (...)** %tmp2021.i ; <i32 (...)*> [#uses=1]
%tmp2223.i = bitcast i32 (...)* %tmp22.i to void (%struct.B*)* ; <void (%struct.B*)*> [#uses=1]
br label %_Z3fooiM1BFvvE.exit
_Z3fooiM1BFvvE.exit: ; preds = %cond_true.i, %entry
%iftmp.2.0.i = phi void (%struct.B*)* [ %tmp2223.i, %cond_true.i ], [ inttoptr (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to void (%struct.B*)*), %entry ] ; <void (%struct.B*)*> [#uses=1]
%b4.i = bitcast %struct.B* %b.i to i8* ; <i8*> [#uses=1]
- %ctg25.i = getelementptr i8* %b4.i, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1) ; <i8*> [#uses=1]
+ %ctg25.i = getelementptr i8, i8* %b4.i, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1) ; <i8*> [#uses=1]
%tmp3031.i = bitcast i8* %ctg25.i to %struct.B* ; <%struct.B*> [#uses=1]
call void %iftmp.2.0.i( %struct.B* %tmp3031.i )
- %tmp2.i30 = getelementptr %struct.B* %b.i29, i32 0, i32 0 ; <i32*> [#uses=1]
+ %tmp2.i30 = getelementptr %struct.B, %struct.B* %b.i29, i32 0, i32 0 ; <i32*> [#uses=1]
store i32 6, i32* %tmp2.i30
br i1 icmp eq (i64 and (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) to i64), i64 4294967296), i64 0), label %_Z3fooiM1BFvvE.exit56, label %cond_true.i46
cond_true.i46: ; preds = %_Z3fooiM1BFvvE.exit
%b2.i35 = bitcast %struct.B* %b.i29 to i8* ; <i8*> [#uses=1]
- %ctg23.i36 = getelementptr i8* %b2.i35, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) to i64), i64 32) to i32), i32 1) ; <i8*> [#uses=1]
+ %ctg23.i36 = getelementptr i8, i8* %b2.i35, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) to i64), i64 32) to i32), i32 1) ; <i8*> [#uses=1]
%tmp121314.i37 = bitcast i8* %ctg23.i36 to i32 (...)*** ; <i32 (...)***> [#uses=1]
- %tmp15.i38 = load i32 (...)*** %tmp121314.i37 ; <i32 (...)**> [#uses=1]
+ %tmp15.i38 = load i32 (...)**, i32 (...)*** %tmp121314.i37 ; <i32 (...)**> [#uses=1]
%tmp151.i41 = bitcast i32 (...)** %tmp15.i38 to i8* ; <i8*> [#uses=1]
- %ctg2.i42 = getelementptr i8* %tmp151.i41, i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) ; <i8*> [#uses=1]
+ %ctg2.i42 = getelementptr i8, i8* %tmp151.i41, i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) ; <i8*> [#uses=1]
%tmp2021.i43 = bitcast i8* %ctg2.i42 to i32 (...)** ; <i32 (...)**> [#uses=1]
- %tmp22.i44 = load i32 (...)** %tmp2021.i43 ; <i32 (...)*> [#uses=1]
+ %tmp22.i44 = load i32 (...)*, i32 (...)** %tmp2021.i43 ; <i32 (...)*> [#uses=1]
%tmp2223.i45 = bitcast i32 (...)* %tmp22.i44 to void (%struct.B*)* ; <void (%struct.B*)*> [#uses=1]
br label %_Z3fooiM1BFvvE.exit56
_Z3fooiM1BFvvE.exit56: ; preds = %cond_true.i46, %_Z3fooiM1BFvvE.exit
%iftmp.2.0.i49 = phi void (%struct.B*)* [ %tmp2223.i45, %cond_true.i46 ], [ inttoptr (i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) to void (%struct.B*)*), %_Z3fooiM1BFvvE.exit ] ; <void (%struct.B*)*> [#uses=1]
%b4.i53 = bitcast %struct.B* %b.i29 to i8* ; <i8*> [#uses=1]
- %ctg25.i54 = getelementptr i8* %b4.i53, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) to i64), i64 32) to i32), i32 1) ; <i8*> [#uses=1]
+ %ctg25.i54 = getelementptr i8, i8* %b4.i53, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) to i64), i64 32) to i32), i32 1) ; <i8*> [#uses=1]
%tmp3031.i55 = bitcast i8* %ctg25.i54 to %struct.B* ; <%struct.B*> [#uses=1]
call void %iftmp.2.0.i49( %struct.B* %tmp3031.i55 )
- %tmp2.i2 = getelementptr %struct.B* %b.i1, i32 0, i32 0 ; <i32*> [#uses=1]
+ %tmp2.i2 = getelementptr %struct.B, %struct.B* %b.i1, i32 0, i32 0 ; <i32*> [#uses=1]
store i32 -1, i32* %tmp2.i2
br i1 icmp eq (i64 and (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 4294967296), i64 0), label %_Z3fooiM1BFvvE.exit28, label %cond_true.i18
cond_true.i18: ; preds = %_Z3fooiM1BFvvE.exit56
%b2.i7 = bitcast %struct.B* %b.i1 to i8* ; <i8*> [#uses=1]
- %ctg23.i8 = getelementptr i8* %b2.i7, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1) ; <i8*> [#uses=1]
+ %ctg23.i8 = getelementptr i8, i8* %b2.i7, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1) ; <i8*> [#uses=1]
%tmp121314.i9 = bitcast i8* %ctg23.i8 to i32 (...)*** ; <i32 (...)***> [#uses=1]
- %tmp15.i10 = load i32 (...)*** %tmp121314.i9 ; <i32 (...)**> [#uses=1]
+ %tmp15.i10 = load i32 (...)**, i32 (...)*** %tmp121314.i9 ; <i32 (...)**> [#uses=1]
%tmp151.i13 = bitcast i32 (...)** %tmp15.i10 to i8* ; <i8*> [#uses=1]
- %ctg2.i14 = getelementptr i8* %tmp151.i13, i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) ; <i8*> [#uses=1]
+ %ctg2.i14 = getelementptr i8, i8* %tmp151.i13, i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) ; <i8*> [#uses=1]
%tmp2021.i15 = bitcast i8* %ctg2.i14 to i32 (...)** ; <i32 (...)**> [#uses=1]
- %tmp22.i16 = load i32 (...)** %tmp2021.i15 ; <i32 (...)*> [#uses=1]
+ %tmp22.i16 = load i32 (...)*, i32 (...)** %tmp2021.i15 ; <i32 (...)*> [#uses=1]
%tmp2223.i17 = bitcast i32 (...)* %tmp22.i16 to void (%struct.B*)* ; <void (%struct.B*)*> [#uses=1]
br label %_Z3fooiM1BFvvE.exit28
_Z3fooiM1BFvvE.exit28: ; preds = %cond_true.i18, %_Z3fooiM1BFvvE.exit56
%iftmp.2.0.i21 = phi void (%struct.B*)* [ %tmp2223.i17, %cond_true.i18 ], [ inttoptr (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to void (%struct.B*)*), %_Z3fooiM1BFvvE.exit56 ] ; <void (%struct.B*)*> [#uses=1]
%b4.i25 = bitcast %struct.B* %b.i1 to i8* ; <i8*> [#uses=1]
- %ctg25.i26 = getelementptr i8* %b4.i25, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1) ; <i8*> [#uses=1]
+ %ctg25.i26 = getelementptr i8, i8* %b4.i25, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1) ; <i8*> [#uses=1]
%tmp3031.i27 = bitcast i8* %ctg25.i26 to %struct.B* ; <%struct.B*> [#uses=1]
call void %iftmp.2.0.i21( %struct.B* %tmp3031.i27 )
ret i32 0
diff --git a/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll b/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
index a89e937d3e10..05c2ff4f7ca2 100644
--- a/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
+++ b/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
@@ -9,15 +9,15 @@ target triple = "arm-apple-darwin8"
define fastcc void @EvaluateDevelopment() {
entry:
- %tmp7 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 7) ; <i64> [#uses=1]
- %tmp50 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 0) ; <i64> [#uses=1]
- %tmp52 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 1) ; <i64> [#uses=1]
+ %tmp7 = load i64, i64* getelementptr (%struct.CHESS_POSITION, %struct.CHESS_POSITION* @search, i32 0, i32 7) ; <i64> [#uses=1]
+ %tmp50 = load i64, i64* getelementptr (%struct.CHESS_POSITION, %struct.CHESS_POSITION* @search, i32 0, i32 0) ; <i64> [#uses=1]
+ %tmp52 = load i64, i64* getelementptr (%struct.CHESS_POSITION, %struct.CHESS_POSITION* @search, i32 0, i32 1) ; <i64> [#uses=1]
%tmp53 = or i64 %tmp52, %tmp50 ; <i64> [#uses=1]
- %tmp57.b = load i1* @rank_mask.1.b ; <i1> [#uses=1]
+ %tmp57.b = load i1, i1* @rank_mask.1.b ; <i1> [#uses=1]
%tmp57 = select i1 %tmp57.b, i64 71776119061217280, i64 0 ; <i64> [#uses=1]
%tmp58 = and i64 %tmp57, %tmp7 ; <i64> [#uses=1]
%tmp59 = lshr i64 %tmp58, 8 ; <i64> [#uses=1]
- %tmp63 = load i64* getelementptr ([8 x i64]* @file_mask, i32 0, i32 4) ; <i64> [#uses=1]
+ %tmp63 = load i64, i64* getelementptr ([8 x i64], [8 x i64]* @file_mask, i32 0, i32 4) ; <i64> [#uses=1]
%tmp64 = or i64 %tmp63, 0 ; <i64> [#uses=1]
%tmp65 = and i64 %tmp59, %tmp53 ; <i64> [#uses=1]
%tmp66 = and i64 %tmp65, %tmp64 ; <i64> [#uses=1]
diff --git a/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll b/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
index 25ac52e5ec77..50573b457c37 100644
--- a/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
+++ b/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
@@ -14,7 +14,7 @@
define fastcc void @Draw7(i32 %Option, i32* %Status) {
entry:
- %tmp115.b = load i1* @FirstTime.4637.b ; <i1> [#uses=1]
+ %tmp115.b = load i1, i1* @FirstTime.4637.b ; <i1> [#uses=1]
br i1 %tmp115.b, label %cond_next239, label %cond_next.i
cond_next.i: ; preds = %entry
@@ -88,19 +88,19 @@ cond_next1267: ; preds = %cond_next1235
br i1 %tmp1148, label %cond_next1275, label %cond_true1272
cond_true1272: ; preds = %cond_next1267
- %tmp1273 = load %struct.TestObj** null ; <%struct.TestObj*> [#uses=2]
+ %tmp1273 = load %struct.TestObj*, %struct.TestObj** null ; <%struct.TestObj*> [#uses=2]
%tmp2930.i = ptrtoint %struct.TestObj* %tmp1273 to i32 ; <i32> [#uses=1]
%tmp42.i348 = sub i32 0, %tmp2930.i ; <i32> [#uses=1]
- %tmp45.i = getelementptr %struct.TestObj* %tmp1273, i32 0, i32 0 ; <i8**> [#uses=2]
- %tmp48.i = load i8** %tmp45.i ; <i8*> [#uses=1]
- %tmp50.i350 = call i32 (i8*, i8*, ...)* @sprintf( i8* getelementptr ([256 x i8]* @Msg, i32 0, i32 0), i8* getelementptr ([48 x i8]* @.str53615, i32 0, i32 0), i8* null, i8** %tmp45.i, i8* %tmp48.i ) ; <i32> [#uses=0]
+ %tmp45.i = getelementptr %struct.TestObj, %struct.TestObj* %tmp1273, i32 0, i32 0 ; <i8**> [#uses=2]
+ %tmp48.i = load i8*, i8** %tmp45.i ; <i8*> [#uses=1]
+ %tmp50.i350 = call i32 (i8*, i8*, ...) @sprintf( i8* getelementptr ([256 x i8], [256 x i8]* @Msg, i32 0, i32 0), i8* getelementptr ([48 x i8], [48 x i8]* @.str53615, i32 0, i32 0), i8* null, i8** %tmp45.i, i8* %tmp48.i ) ; <i32> [#uses=0]
br i1 false, label %cond_true.i632.i, label %Ut_TraceMsg.exit648.i
cond_true.i632.i: ; preds = %cond_true1272
ret void
Ut_TraceMsg.exit648.i: ; preds = %cond_true1272
- %tmp57.i = getelementptr i8* null, i32 %tmp42.i348 ; <i8*> [#uses=0]
+ %tmp57.i = getelementptr i8, i8* null, i32 %tmp42.i348 ; <i8*> [#uses=0]
ret void
cond_next1275: ; preds = %cond_next1267
diff --git a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
index 90a3b372937e..f49c805469a0 100644
--- a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
+++ b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
@@ -17,45 +17,45 @@ entry:
%retval = alloca i32, align 4 ; <i32*> [#uses=1]
store i32 %i, i32* %i_addr
store i32 %q, i32* %q_addr
- %tmp = load i32* %i_addr ; <i32> [#uses=1]
+ %tmp = load i32, i32* %i_addr ; <i32> [#uses=1]
%tmp1 = icmp ne i32 %tmp, 0 ; <i1> [#uses=1]
%tmp12 = zext i1 %tmp1 to i8 ; <i8> [#uses=1]
%toBool = icmp ne i8 %tmp12, 0 ; <i1> [#uses=1]
br i1 %toBool, label %cond_true, label %cond_false
cond_true: ; preds = %entry
- %tmp3 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
- %tmp4 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
+ %tmp3 = call i32 (...) @bar( ) ; <i32> [#uses=0]
+ %tmp4 = call i32 (...) @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
br label %cond_next
cond_false: ; preds = %entry
- %tmp5 = call i32 (...)* @foo( ) ; <i32> [#uses=0]
- %tmp6 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
+ %tmp5 = call i32 (...) @foo( ) ; <i32> [#uses=0]
+ %tmp6 = call i32 (...) @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
br label %cond_next
cond_next: ; preds = %cond_false, %cond_true
- %tmp7 = load i32* %q_addr ; <i32> [#uses=1]
+ %tmp7 = load i32, i32* %q_addr ; <i32> [#uses=1]
%tmp8 = icmp ne i32 %tmp7, 0 ; <i1> [#uses=1]
%tmp89 = zext i1 %tmp8 to i8 ; <i8> [#uses=1]
%toBool10 = icmp ne i8 %tmp89, 0 ; <i1> [#uses=1]
br i1 %toBool10, label %cond_true11, label %cond_false15
cond_true11: ; preds = %cond_next
- %tmp13 = call i32 (...)* @foo( ) ; <i32> [#uses=0]
- %tmp14 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
+ %tmp13 = call i32 (...) @foo( ) ; <i32> [#uses=0]
+ %tmp14 = call i32 (...) @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
br label %cond_next18
cond_false15: ; preds = %cond_next
- %tmp16 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
- %tmp17 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
+ %tmp16 = call i32 (...) @bar( ) ; <i32> [#uses=0]
+ %tmp17 = call i32 (...) @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
br label %cond_next18
cond_next18: ; preds = %cond_false15, %cond_true11
- %tmp19 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp19 = call i32 (...) @bar( ) ; <i32> [#uses=0]
br label %return
return: ; preds = %cond_next18
- %retval20 = load i32* %retval ; <i32> [#uses=1]
+ %retval20 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %retval20
}
diff --git a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
index 37e41ecc4b1e..421d501a2ca9 100644
--- a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
+++ b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
@@ -19,46 +19,46 @@ entry:
%retval = alloca i32, align 4 ; <i32*> [#uses=1]
store i32 %i, i32* %i_addr
store i32 %q, i32* %q_addr
- %tmp = load i32* %i_addr ; <i32> [#uses=1]
+ %tmp = load i32, i32* %i_addr ; <i32> [#uses=1]
%tmp1 = icmp ne i32 %tmp, 0 ; <i1> [#uses=1]
%tmp12 = zext i1 %tmp1 to i8 ; <i8> [#uses=1]
%toBool = icmp ne i8 %tmp12, 0 ; <i1> [#uses=1]
br i1 %toBool, label %cond_true, label %cond_false
cond_true: ; preds = %entry
- %tmp3 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
- %tmp4 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
- %tmp7 = load i32* %q_addr ; <i32> [#uses=1]
+ %tmp3 = call i32 (...) @bar( ) ; <i32> [#uses=0]
+ %tmp4 = call i32 (...) @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
+ %tmp7 = load i32, i32* %q_addr ; <i32> [#uses=1]
%tmp8 = icmp ne i32 %tmp7, 0 ; <i1> [#uses=1]
%tmp89 = zext i1 %tmp8 to i8 ; <i8> [#uses=1]
%toBool10 = icmp ne i8 %tmp89, 0 ; <i1> [#uses=1]
br i1 %toBool10, label %cond_true11, label %cond_false15
cond_false: ; preds = %entry
- %tmp5 = call i32 (...)* @foo( ) ; <i32> [#uses=0]
- %tmp6 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
- %tmp27 = load i32* %q_addr ; <i32> [#uses=1]
+ %tmp5 = call i32 (...) @foo( ) ; <i32> [#uses=0]
+ %tmp6 = call i32 (...) @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
+ %tmp27 = load i32, i32* %q_addr ; <i32> [#uses=1]
%tmp28 = icmp ne i32 %tmp27, 0 ; <i1> [#uses=1]
%tmp289 = zext i1 %tmp28 to i8 ; <i8> [#uses=1]
%toBool210 = icmp ne i8 %tmp289, 0 ; <i1> [#uses=1]
br i1 %toBool210, label %cond_true11, label %cond_false15
cond_true11: ; preds = %cond_next
- %tmp13 = call i32 (...)* @foo( ) ; <i32> [#uses=0]
- %tmp14 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
+ %tmp13 = call i32 (...) @foo( ) ; <i32> [#uses=0]
+ %tmp14 = call i32 (...) @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
br label %cond_next18
cond_false15: ; preds = %cond_next
- %tmp16 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
- %tmp17 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
+ %tmp16 = call i32 (...) @bar( ) ; <i32> [#uses=0]
+ %tmp17 = call i32 (...) @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
br label %cond_next18
cond_next18: ; preds = %cond_false15, %cond_true11
- %tmp19 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp19 = call i32 (...) @bar( ) ; <i32> [#uses=0]
br label %return
return: ; preds = %cond_next18
- %retval20 = load i32* %retval ; <i32> [#uses=1]
+ %retval20 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %retval20
}
diff --git a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
index 30ae7237395e..52cc37e24084 100644
--- a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
+++ b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
@@ -29,46 +29,46 @@ entry:
%retval = alloca i32, align 4 ; <i32*> [#uses=1]
store i32 %i, i32* %i_addr
store i32 %q, i32* %q_addr
- %tmp = load i32* %i_addr ; <i32> [#uses=1]
+ %tmp = load i32, i32* %i_addr ; <i32> [#uses=1]
%tmp1 = icmp ne i32 %tmp, 0 ; <i1> [#uses=1]
%tmp12 = zext i1 %tmp1 to i8 ; <i8> [#uses=1]
%toBool = icmp ne i8 %tmp12, 0 ; <i1> [#uses=1]
br i1 %toBool, label %cond_true, label %cond_false
cond_true: ; preds = %entry
- %tmp3 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
- %tmp4 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
- %tmp7 = load i32* %q_addr ; <i32> [#uses=1]
+ %tmp3 = call i32 (...) @bar( ) ; <i32> [#uses=0]
+ %tmp4 = call i32 (...) @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
+ %tmp7 = load i32, i32* %q_addr ; <i32> [#uses=1]
%tmp8 = icmp ne i32 %tmp7, 0 ; <i1> [#uses=1]
%tmp89 = zext i1 %tmp8 to i8 ; <i8> [#uses=1]
%toBool10 = icmp ne i8 %tmp89, 0 ; <i1> [#uses=1]
br i1 %toBool10, label %cond_true11, label %cond_false15
cond_false: ; preds = %entry
- %tmp5 = call i32 (...)* @foo( ) ; <i32> [#uses=0]
- %tmp6 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
- %tmp27 = load i32* %q_addr ; <i32> [#uses=1]
+ %tmp5 = call i32 (...) @foo( ) ; <i32> [#uses=0]
+ %tmp6 = call i32 (...) @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
+ %tmp27 = load i32, i32* %q_addr ; <i32> [#uses=1]
%tmp28 = icmp ne i32 %tmp27, 0 ; <i1> [#uses=1]
%tmp289 = zext i1 %tmp28 to i8 ; <i8> [#uses=1]
%toBool210 = icmp ne i8 %tmp289, 0 ; <i1> [#uses=1]
br i1 %toBool210, label %cond_true11, label %cond_false15
cond_true11: ; preds = %cond_next
- %tmp13 = call i32 (...)* @foo( ) ; <i32> [#uses=0]
- %tmp14 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
+ %tmp13 = call i32 (...) @foo( ) ; <i32> [#uses=0]
+ %tmp14 = call i32 (...) @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
br label %cond_next18
cond_false15: ; preds = %cond_next
- %tmp16 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
- %tmp17 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
+ %tmp16 = call i32 (...) @bar( ) ; <i32> [#uses=0]
+ %tmp17 = call i32 (...) @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
br label %cond_next18
cond_next18: ; preds = %cond_false15, %cond_true11
- %tmp19 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp19 = call i32 (...) @bar( ) ; <i32> [#uses=0]
br label %return
return: ; preds = %cond_next18
- %retval20 = load i32* %retval ; <i32> [#uses=1]
+ %retval20 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %retval20
}
diff --git a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
index 5988c65dae6c..e0b28e874b90 100644
--- a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
+++ b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
@@ -17,10 +17,10 @@ cond_false: ; preds = %entry
bb140: ; preds = %bb140, %cond_false
%indvar = phi i32 [ 0, %cond_false ], [ %indvar.next, %bb140 ] ; <i32> [#uses=2]
%edge.230.0.rec = shl i32 %indvar, 1 ; <i32> [#uses=3]
- %edge.230.0 = getelementptr %struct.shape_edge_t* null, i32 %edge.230.0.rec ; <%struct.shape_edge_t*> [#uses=1]
+ %edge.230.0 = getelementptr %struct.shape_edge_t, %struct.shape_edge_t* null, i32 %edge.230.0.rec ; <%struct.shape_edge_t*> [#uses=1]
%edge.230.0.sum6970 = or i32 %edge.230.0.rec, 1 ; <i32> [#uses=2]
- %tmp154 = getelementptr %struct.shape_edge_t* null, i32 %edge.230.0.sum6970 ; <%struct.shape_edge_t*> [#uses=1]
- %tmp11.i5 = getelementptr %struct.shape_edge_t* null, i32 %edge.230.0.sum6970, i32 0 ; <%struct.shape_edge_t**> [#uses=1]
+ %tmp154 = getelementptr %struct.shape_edge_t, %struct.shape_edge_t* null, i32 %edge.230.0.sum6970 ; <%struct.shape_edge_t*> [#uses=1]
+ %tmp11.i5 = getelementptr %struct.shape_edge_t, %struct.shape_edge_t* null, i32 %edge.230.0.sum6970, i32 0 ; <%struct.shape_edge_t**> [#uses=1]
store %struct.shape_edge_t* %edge.230.0, %struct.shape_edge_t** %tmp11.i5
store %struct.shape_edge_t* %tmp154, %struct.shape_edge_t** null
%tmp16254.0.rec = add i32 %edge.230.0.rec, 2 ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/2007-08-15-ReuseBug.ll b/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
index 30b72e09a114..76b6221d0222 100644
--- a/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
+++ b/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
@@ -44,13 +44,13 @@ bb102.i: ; preds = %cond_next212.i
br i1 false, label %cond_true110.i, label %cond_next123.i
cond_true110.i: ; preds = %bb102.i
- %tmp116.i = getelementptr i8** %argv_addr.2321.0.i, i32 2 ; <i8**> [#uses=1]
- %tmp117.i = load i8** %tmp116.i ; <i8*> [#uses=1]
- %tmp126425.i = call %struct.FILE* @fopen( i8* %tmp117.i, i8* getelementptr ([2 x i8]* @.str44, i32 0, i32 0) ) ; <%struct.FILE*> [#uses=0]
+ %tmp116.i = getelementptr i8*, i8** %argv_addr.2321.0.i, i32 2 ; <i8**> [#uses=1]
+ %tmp117.i = load i8*, i8** %tmp116.i ; <i8*> [#uses=1]
+ %tmp126425.i = call %struct.FILE* @fopen( i8* %tmp117.i, i8* getelementptr ([2 x i8], [2 x i8]* @.str44, i32 0, i32 0) ) ; <%struct.FILE*> [#uses=0]
ret i32 0
cond_next123.i: ; preds = %bb102.i
- %tmp122.i = getelementptr i8* %tmp215.i, i32 2 ; <i8*> [#uses=0]
+ %tmp122.i = getelementptr i8, i8* %tmp215.i, i32 2 ; <i8*> [#uses=0]
ret i32 0
bb162.i: ; preds = %cond_next212.i
@@ -58,12 +58,12 @@ bb162.i: ; preds = %cond_next212.i
C_addcmd.exit120.i: ; preds = %cond_next212.i
%tmp3.i.i.i.i105.i = call i8* @calloc( i32 15, i32 1 ) ; <i8*> [#uses=1]
- %tmp1.i108.i = getelementptr [100 x i8*]* @_C_cmds, i32 0, i32 0 ; <i8**> [#uses=1]
+ %tmp1.i108.i = getelementptr [100 x i8*], [100 x i8*]* @_C_cmds, i32 0, i32 0 ; <i8**> [#uses=1]
store i8* %tmp3.i.i.i.i105.i, i8** %tmp1.i108.i, align 4
- %tmp.i91.i = load i32* @_C_nextcmd, align 4 ; <i32> [#uses=1]
+ %tmp.i91.i = load i32, i32* @_C_nextcmd, align 4 ; <i32> [#uses=1]
store i32 0, i32* @_C_nextcmd, align 4
%tmp3.i.i.i.i95.i = call i8* @calloc( i32 15, i32 1 ) ; <i8*> [#uses=1]
- %tmp1.i98.i = getelementptr [100 x i8*]* @_C_cmds, i32 0, i32 %tmp.i91.i ; <i8**> [#uses=1]
+ %tmp1.i98.i = getelementptr [100 x i8*], [100 x i8*]* @_C_cmds, i32 0, i32 %tmp.i91.i ; <i8**> [#uses=1]
store i8* %tmp3.i.i.i.i95.i, i8** %tmp1.i98.i, align 4
br label %cond_next212.i
@@ -77,8 +77,8 @@ cond_next212.i: ; preds = %cond_next212.i, %cond_next212.i, %cond_next212.i, %c
%max_d.3 = phi i32 [ -1, %entry ], [ %max_d.3, %bb30.i ], [ %max_d.3, %bb21.i ], [ %max_d.3, %C_addcmd.exit120.i ], [ 0, %bb192.i ], [ %max_d.3, %cond_next212.i ], [ %max_d.3, %cond_next212.i ], [ %max_d.3, %cond_next212.i ], [ %max_d.3, %cond_next212.i ] ; <i32> [#uses=7]
%argv_addr.2321.0.i = phi i8** [ %argv, %entry ], [ %tmp214.i, %bb192.i ], [ %tmp214.i, %C_addcmd.exit120.i ], [ %tmp214.i, %bb30.i ], [ %tmp214.i, %bb21.i ], [ %tmp214.i, %cond_next212.i ], [ %tmp214.i, %cond_next212.i ], [ %tmp214.i, %cond_next212.i ], [ %tmp214.i, %cond_next212.i ] ; <i8**> [#uses=2]
%argc_addr.2358.0.i = phi i32 [ %argc, %entry ], [ %tmp205399.i, %bb30.i ], [ 0, %bb21.i ], [ 0, %C_addcmd.exit120.i ], [ 0, %bb192.i ], [ 0, %cond_next212.i ], [ 0, %cond_next212.i ], [ 0, %cond_next212.i ], [ 0, %cond_next212.i ] ; <i32> [#uses=1]
- %tmp214.i = getelementptr i8** %argv_addr.2321.0.i, i32 1 ; <i8**> [#uses=9]
- %tmp215.i = load i8** %tmp214.i ; <i8*> [#uses=1]
+ %tmp214.i = getelementptr i8*, i8** %argv_addr.2321.0.i, i32 1 ; <i8**> [#uses=9]
+ %tmp215.i = load i8*, i8** %tmp214.i ; <i8*> [#uses=1]
%tmp1314.i = sext i8 0 to i32 ; <i32> [#uses=1]
switch i32 %tmp1314.i, label %bb192.i [
i32 76, label %C_addcmd.exit120.i
diff --git a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
index 3754db01fdd1..989410552f3f 100644
--- a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
+++ b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
@@ -10,7 +10,7 @@
define i32 @vorbis_staticbook_pack(%struct.static_codebook* %c, %struct.oggpack_buffer* %opb) {
entry:
%opb_addr = alloca %struct.oggpack_buffer* ; <%struct.oggpack_buffer**> [#uses=1]
- %tmp1 = load %struct.oggpack_buffer** %opb_addr, align 4 ; <%struct.oggpack_buffer*> [#uses=1]
+ %tmp1 = load %struct.oggpack_buffer*, %struct.oggpack_buffer** %opb_addr, align 4 ; <%struct.oggpack_buffer*> [#uses=1]
call void @oggpack_write( %struct.oggpack_buffer* %tmp1, i32 5653314, i32 24 ) nounwind
call void @oggpack_write( %struct.oggpack_buffer* null, i32 0, i32 24 ) nounwind
unreachable
diff --git a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
index 5fbed0da5ce8..03b473a4658a 100644
--- a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
+++ b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
@@ -8,7 +8,7 @@
define %"struct.kc::impl_ID"* @_ZN2kc18f_typeofunpsubtermEPNS_15impl_unpsubtermEPNS_7impl_IDE(%"struct.kc::impl_Ccode_option"* %a_unpsubterm, %"struct.kc::impl_ID"* %a_operator) {
entry:
- %tmp8 = getelementptr %"struct.kc::impl_Ccode_option"* %a_unpsubterm, i32 0, i32 0, i32 0 ; <i32 (...)***> [#uses=0]
+ %tmp8 = getelementptr %"struct.kc::impl_Ccode_option", %"struct.kc::impl_Ccode_option"* %a_unpsubterm, i32 0, i32 0, i32 0 ; <i32 (...)***> [#uses=0]
br i1 false, label %bb41, label %bb55
bb41: ; preds = %entry
diff --git a/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll b/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
index dabe62003d9f..b0a50a49a76d 100644
--- a/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
+++ b/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
@@ -4,7 +4,7 @@ define i32 @main(i32 %argc, i8** %argv) {
entry:
br label %bb1
bb1: ; preds = %entry
- %tmp3.i.i = load i8* null, align 1 ; <i8> [#uses=1]
+ %tmp3.i.i = load i8, i8* null, align 1 ; <i8> [#uses=1]
%tmp4.i.i = icmp slt i8 %tmp3.i.i, 0 ; <i1> [#uses=1]
br i1 %tmp4.i.i, label %bb2, label %bb3
bb2: ; preds = %bb1
diff --git a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
index 94c562bf0129..753f9e3d1331 100644
--- a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
@@ -11,9 +11,9 @@ bb74.i: ; preds = %bb88.i, %bb74.i, %entry
bb88.i: ; preds = %bb74.i
br i1 false, label %mandel.exit, label %bb74.i
mandel.exit: ; preds = %bb88.i
- %tmp2 = load volatile double* getelementptr ({ double, double }* @accum, i32 0, i32 0), align 8 ; <double> [#uses=1]
+ %tmp2 = load volatile double, double* getelementptr ({ double, double }, { double, double }* @accum, i32 0, i32 0), align 8 ; <double> [#uses=1]
%tmp23 = fptosi double %tmp2 to i32 ; <i32> [#uses=1]
- %tmp5 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %tmp23 ) ; <i32> [#uses=0]
+ %tmp5 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %tmp23 ) ; <i32> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll b/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
index 6b39a76d0d19..4b1aa19ef067 100644
--- a/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
@@ -46,7 +46,7 @@ bb17.i: ; preds = %cond_next119.i
cond_true53.i: ; preds = %bb17.i
ret { i16, %struct.rnode* }* null
cond_false99.i: ; preds = %bb17.i
- %malloccall = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64), i64 2) to i32))
+ %malloccall = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i1** getelementptr (i1*, i1** null, i32 1) to i64), i64 2) to i32))
%tmp106.i = bitcast i8* %malloccall to %struct.ch_set*
br i1 false, label %bb126.i, label %cond_next119.i
cond_next119.i: ; preds = %cond_false99.i, %bb42
diff --git a/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll b/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
index c9a8a67ac5f9..1ededa3c3877 100644
--- a/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
@@ -24,7 +24,7 @@ define void @main(i32 %argc, i8** %argv) noreturn {
entry:
br i1 false, label %cond_next48, label %cond_false674
cond_next48: ; preds = %entry
- %tmp61 = call %struct.FILE* @fopen( i8* null, i8* getelementptr ([2 x i8]* @.str127, i32 0, i32 0) ) ; <%struct.FILE*> [#uses=2]
+ %tmp61 = call %struct.FILE* @fopen( i8* null, i8* getelementptr ([2 x i8], [2 x i8]* @.str127, i32 0, i32 0) ) ; <%struct.FILE*> [#uses=2]
br i1 false, label %bb220.i.i.i, label %bb62.preheader.i.i.i
bb62.preheader.i.i.i: ; preds = %cond_next48
ret void
@@ -53,11 +53,11 @@ bb177.i393.i: ; preds = %bb40.i.i
bb192.i.i: ; preds = %bb177.i393.i
ret void
cond_false373.i.i: ; preds = %bb.i350.i
- %tmp376.i.i = call i32 @strcmp( i8* null, i8* getelementptr ([9 x i8]* @.str8115, i32 0, i32 0) ) ; <i32> [#uses=0]
+ %tmp376.i.i = call i32 @strcmp( i8* null, i8* getelementptr ([9 x i8], [9 x i8]* @.str8115, i32 0, i32 0) ) ; <i32> [#uses=0]
br i1 false, label %cond_true380.i.i, label %cond_next602.i.i
cond_true380.i.i: ; preds = %cond_false373.i.i
%tmp394.i418.i = add i32 %cell.0.i.i, 1 ; <i32> [#uses=1]
- %tmp397.i420.i = load %struct.cellbox** null, align 4 ; <%struct.cellbox*> [#uses=1]
+ %tmp397.i420.i = load %struct.cellbox*, %struct.cellbox** null, align 4 ; <%struct.cellbox*> [#uses=1]
br label %bb398.i.i
bb398.i.i: ; preds = %bb398.i.i, %cond_true380.i.i
br i1 false, label %bb414.i.i, label %bb398.i.i
@@ -73,10 +73,10 @@ bb609.i.i: ; preds = %cond_next602.i.i
br label %bb620.i.i
bb620.i.i: ; preds = %bb620.i.i, %bb609.i.i
%indvar166.i465.i = phi i32 [ %indvar.next167.i.i, %bb620.i.i ], [ 0, %bb609.i.i ] ; <i32> [#uses=1]
- %tmp640.i.i = call i32 (%struct.FILE*, i8*, ...)* @fscanf( %struct.FILE* %tmp61, i8* getelementptr ([5 x i8]* @.str584, i32 0, i32 0), [1024 x i8]* null ) ; <i32> [#uses=0]
- %tmp648.i.i = load i32* null, align 4 ; <i32> [#uses=1]
+ %tmp640.i.i = call i32 (%struct.FILE*, i8*, ...) @fscanf( %struct.FILE* %tmp61, i8* getelementptr ([5 x i8], [5 x i8]* @.str584, i32 0, i32 0), [1024 x i8]* null ) ; <i32> [#uses=0]
+ %tmp648.i.i = load i32, i32* null, align 4 ; <i32> [#uses=1]
%tmp650.i468.i = icmp sgt i32 0, %tmp648.i.i ; <i1> [#uses=1]
- %tmp624.i469.i = call i32 (%struct.FILE*, i8*, ...)* @fscanf( %struct.FILE* %tmp61, i8* getelementptr ([5 x i8]* @.str584, i32 0, i32 0), [1024 x i8]* null ) ; <i32> [#uses=0]
+ %tmp624.i469.i = call i32 (%struct.FILE*, i8*, ...) @fscanf( %struct.FILE* %tmp61, i8* getelementptr ([5 x i8], [5 x i8]* @.str584, i32 0, i32 0), [1024 x i8]* null ) ; <i32> [#uses=0]
%indvar.next167.i.i = add i32 %indvar166.i465.i, 1 ; <i32> [#uses=1]
br i1 %tmp650.i468.i, label %bb653.i.i.loopexit, label %bb620.i.i
bb653.i.i.loopexit: ; preds = %bb620.i.i
@@ -108,7 +108,7 @@ declare fastcc i32 @init_outer_loop(%struct.lame_global_flags*, double*, %struct
define fastcc void @outer_loop(%struct.lame_global_flags* %gfp, double* %xr, i32 %targ_bits, double* %best_noise, %struct.III_psy_xmin* %l3_xmin, i32* %l3_enc, %struct.III_scalefac_t* %scalefac, %struct.gr_info* %cod_info, i32 %ch) {
entry:
- %cod_info.182 = getelementptr %struct.gr_info* %cod_info, i32 0, i32 1 ; <i32*> [#uses=1]
+ %cod_info.182 = getelementptr %struct.gr_info, %struct.gr_info* %cod_info, i32 0, i32 1 ; <i32*> [#uses=1]
br label %bb
bb: ; preds = %bb226, %entry
%save_cod_info.1.1 = phi i32 [ undef, %entry ], [ %save_cod_info.1.1, %bb226 ] ; <i32> [#uses=2]
@@ -126,7 +126,7 @@ cond_true163: ; preds = %cond_next144
bb.i53: ; preds = %cond_true163
ret void
bb34.i: ; preds = %cond_true163
- %tmp37.i55 = load i32* null, align 4 ; <i32> [#uses=1]
+ %tmp37.i55 = load i32, i32* null, align 4 ; <i32> [#uses=1]
br i1 false, label %bb65.preheader.i, label %bb78.i
bb65.preheader.i: ; preds = %bb34.i
br label %bb65.outer.us.i
@@ -148,16 +148,16 @@ bb226.backedge.i: ; preds = %cond_next215.i, %bb151.i
bb155.i: ; preds = %cond_next215.i, %bb151.i
%indvar90.i = phi i32 [ %indvar.next91.i, %cond_next215.i ], [ 0, %bb151.i ] ; <i32> [#uses=2]
%sfb.3.reg2mem.0.i = add i32 %indvar90.i, %tmp37.i55 ; <i32> [#uses=4]
- %tmp161.i = getelementptr [4 x [21 x double]]* null, i32 0, i32 %tmp15747.i, i32 %sfb.3.reg2mem.0.i ; <double*> [#uses=1]
- %tmp162.i74 = load double* %tmp161.i, align 4 ; <double> [#uses=0]
+ %tmp161.i = getelementptr [4 x [21 x double]], [4 x [21 x double]]* null, i32 0, i32 %tmp15747.i, i32 %sfb.3.reg2mem.0.i ; <double*> [#uses=1]
+ %tmp162.i74 = load double, double* %tmp161.i, align 4 ; <double> [#uses=0]
br i1 false, label %cond_true167.i, label %cond_next215.i
cond_true167.i: ; preds = %bb155.i
- %tmp173.i = getelementptr %struct.III_scalefac_t* null, i32 0, i32 1, i32 %sfb.3.reg2mem.0.i, i32 %i.154.i ; <i32*> [#uses=1]
+ %tmp173.i = getelementptr %struct.III_scalefac_t, %struct.III_scalefac_t* null, i32 0, i32 1, i32 %sfb.3.reg2mem.0.i, i32 %i.154.i ; <i32*> [#uses=1]
store i32 0, i32* %tmp173.i, align 4
- %tmp182.1.i = getelementptr [14 x i32]* @scalefac_band.1, i32 0, i32 %sfb.3.reg2mem.0.i ; <i32*> [#uses=0]
+ %tmp182.1.i = getelementptr [14 x i32], [14 x i32]* @scalefac_band.1, i32 0, i32 %sfb.3.reg2mem.0.i ; <i32*> [#uses=0]
%tmp185.i78 = add i32 %sfb.3.reg2mem.0.i, 1 ; <i32> [#uses=1]
- %tmp187.1.i = getelementptr [14 x i32]* @scalefac_band.1, i32 0, i32 %tmp185.i78 ; <i32*> [#uses=1]
- %tmp188.i = load i32* %tmp187.1.i, align 4 ; <i32> [#uses=1]
+ %tmp187.1.i = getelementptr [14 x i32], [14 x i32]* @scalefac_band.1, i32 0, i32 %tmp185.i78 ; <i32*> [#uses=1]
+ %tmp188.i = load i32, i32* %tmp187.1.i, align 4 ; <i32> [#uses=1]
%tmp21153.i = icmp slt i32 0, %tmp188.i ; <i1> [#uses=1]
br i1 %tmp21153.i, label %bb190.preheader.i, label %cond_next215.i
bb190.preheader.i: ; preds = %cond_true167.i
@@ -190,9 +190,9 @@ bb231: ; preds = %bb226
define fastcc void @outer_loop2(%struct.lame_global_flags* %gfp, double* %xr, i32 %targ_bits, double* %best_noise, %struct.III_psy_xmin* %l3_xmin, i32* %l3_enc, %struct.III_scalefac_t* %scalefac, %struct.gr_info* %cod_info, i32 %ch) {
entry:
- %cod_info.20128.1 = getelementptr %struct.gr_info* %cod_info, i32 0, i32 20, i32 1 ; <i32*> [#uses=1]
- %cod_info.20128.2 = getelementptr %struct.gr_info* %cod_info, i32 0, i32 20, i32 2 ; <i32*> [#uses=1]
- %cod_info.20128.3 = getelementptr %struct.gr_info* %cod_info, i32 0, i32 20, i32 3 ; <i32*> [#uses=1]
+ %cod_info.20128.1 = getelementptr %struct.gr_info, %struct.gr_info* %cod_info, i32 0, i32 20, i32 1 ; <i32*> [#uses=1]
+ %cod_info.20128.2 = getelementptr %struct.gr_info, %struct.gr_info* %cod_info, i32 0, i32 20, i32 2 ; <i32*> [#uses=1]
+ %cod_info.20128.3 = getelementptr %struct.gr_info, %struct.gr_info* %cod_info, i32 0, i32 20, i32 3 ; <i32*> [#uses=1]
br label %bb
bb: ; preds = %bb226, %entry
%save_cod_info.19.1 = phi i32* [ undef, %entry ], [ %save_cod_info.19.0, %bb226 ] ; <i32*> [#uses=1]
@@ -224,7 +224,7 @@ cond_next144: ; preds = %cond_next104, %bb
%over.1 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ] ; <i32> [#uses=1]
%best_over.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ] ; <i32> [#uses=1]
%notdone.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ] ; <i32> [#uses=1]
- %tmp147 = load i32* null, align 4 ; <i32> [#uses=1]
+ %tmp147 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%tmp148 = icmp eq i32 %tmp147, 0 ; <i1> [#uses=1]
%tmp153 = icmp eq i32 %over.1, 0 ; <i1> [#uses=1]
%bothcond = and i1 %tmp148, %tmp153 ; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll b/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
index 71aa6037a137..85ef8302a18a 100644
--- a/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
+++ b/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
@@ -21,14 +21,14 @@ entry:
br i1 false, label %init_orig_buffers.exit, label %cond_true.i29
cond_true.i29: ; preds = %entry
- %tmp17.i = load i32* getelementptr (%struct.ImageParameters* @images, i32 0, i32 20), align 8 ; <i32> [#uses=1]
- %tmp20.i27 = load i32* getelementptr (%struct.ImageParameters* @images, i32 0, i32 16), align 8 ; <i32> [#uses=1]
+ %tmp17.i = load i32, i32* getelementptr (%struct.ImageParameters, %struct.ImageParameters* @images, i32 0, i32 20), align 8 ; <i32> [#uses=1]
+ %tmp20.i27 = load i32, i32* getelementptr (%struct.ImageParameters, %struct.ImageParameters* @images, i32 0, i32 16), align 8 ; <i32> [#uses=1]
%tmp8.i.i = select i1 false, i32 1, i32 0 ; <i32> [#uses=1]
br label %bb.i8.us.i
bb.i8.us.i: ; preds = %get_mem2Dpel.exit.i.us.i, %cond_true.i29
%j.04.i.us.i = phi i32 [ %indvar.next39.i, %get_mem2Dpel.exit.i.us.i ], [ 0, %cond_true.i29 ] ; <i32> [#uses=2]
- %tmp13.i.us.i = getelementptr i16*** null, i32 %j.04.i.us.i ; <i16***> [#uses=0]
+ %tmp13.i.us.i = getelementptr i16**, i16*** null, i32 %j.04.i.us.i ; <i16***> [#uses=0]
%tmp15.i.i.us.i = tail call i8* @calloc( i32 0, i32 2 ) ; <i8*> [#uses=0]
store i16* null, i16** null, align 4
br label %bb.i.i.us.i
diff --git a/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll b/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll
index aa61d86e1389..3d69e4fefbc2 100644
--- a/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll
@@ -9,7 +9,7 @@ declare fastcc i32 @get_mem2Dint(i32***, i32, i32)
define fastcc void @init_global_buffers() nounwind {
entry:
- %tmp151 = tail call fastcc i32 @get_mem2Dint( i32*** getelementptr (%struct.Decoders* @decoders, i32 0, i32 0), i32 16, i32 16 ) ; <i32> [#uses=1]
+ %tmp151 = tail call fastcc i32 @get_mem2Dint( i32*** getelementptr (%struct.Decoders, %struct.Decoders* @decoders, i32 0, i32 0), i32 16, i32 16 ) ; <i32> [#uses=1]
%tmp158 = tail call i8* @calloc( i32 0, i32 4 ) ; <i8*> [#uses=0]
br i1 false, label %cond_true166, label %bb190.preheader
diff --git a/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll b/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
index e86bc1ba5ccc..cf98d7f91df0 100644
--- a/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
+++ b/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
@@ -4,6 +4,6 @@
define void @main({ i32 }*) {
entry:
%sret1 = alloca { i32 } ; <{ i32 }*> [#uses=1]
- load { i32 }* %sret1 ; <{ i32 }>:1 [#uses=0]
+ load { i32 }, { i32 }* %sret1 ; <{ i32 }>:1 [#uses=0]
ret void
}
diff --git a/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll b/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
index adb011277604..dfbbeb9968c4 100644
--- a/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
+++ b/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
@@ -8,6 +8,6 @@
define i32 @__gcov_close() nounwind {
entry:
- load i32* getelementptr (%struct.__gcov_var* @__gcov_var, i32 0, i32 5), align 4 ; <i32>:0 [#uses=1]
+ load i32, i32* getelementptr (%struct.__gcov_var, %struct.__gcov_var* @__gcov_var, i32 0, i32 5), align 4 ; <i32>:0 [#uses=1]
ret i32 %0
}
diff --git a/test/CodeGen/ARM/2009-02-16-SpillerBug.ll b/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
index 4c0c59ccfbc6..cad5440bddc9 100644
--- a/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
+++ b/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
@@ -81,13 +81,13 @@ bb244: ; preds = %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122
br i1 %0, label %bb435, label %bb433
bb394: ; preds = %bb122
- call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 3, i8* getelementptr ([23 x i8]* @"\01LC13423", i32 0, i32 0), i32 0, %struct.FILE_POS* @no_file_pos, i8* getelementptr ([13 x i8]* @"\01LC18972", i32 0, i32 0), i8* null) nounwind
+ call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...) @Error(i32 1, i32 3, i8* getelementptr ([23 x i8], [23 x i8]* @"\01LC13423", i32 0, i32 0), i32 0, %struct.FILE_POS* @no_file_pos, i8* getelementptr ([13 x i8], [13 x i8]* @"\01LC18972", i32 0, i32 0), i8* null) nounwind
br label %bb396
bb396: ; preds = %bb394, %bb131, %bb122, %bb122, %bb122, %bb122, %RESUME
%stop_link.3 = phi %struct.rec* [ null, %RESUME ], [ %stop_link.3, %bb394 ], [ %stop_link.3, %bb122 ], [ %stop_link.3, %bb122 ], [ %stop_link.3, %bb122 ], [ %stop_link.3, %bb122 ], [ %link.1, %bb131 ] ; <%struct.rec*> [#uses=7]
%headers_seen.1 = phi i32 [ 0, %RESUME ], [ %headers_seen.1, %bb394 ], [ 1, %bb122 ], [ 1, %bb122 ], [ 1, %bb122 ], [ 1, %bb122 ], [ %headers_seen.1, %bb131 ] ; <i32> [#uses=2]
- %link.1 = load %struct.rec** null ; <%struct.rec*> [#uses=2]
+ %link.1 = load %struct.rec*, %struct.rec** null ; <%struct.rec*> [#uses=2]
%1 = icmp eq %struct.rec* %link.1, %hd ; <i1> [#uses=1]
br i1 %1, label %bb398, label %bb122
diff --git a/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll b/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
index a48f0033acc8..a14589fa47d8 100644
--- a/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
+++ b/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
@@ -15,6 +15,6 @@ entry:
br label %return
return: ; preds = %entry
- %2 = load i32* %retval ; <i32> [#uses=1]
+ %2 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %2
}
diff --git a/test/CodeGen/ARM/2009-02-27-SpillerBug.ll b/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
index bc5e6023409f..d9ec4d28c5d7 100644
--- a/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
+++ b/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
@@ -28,12 +28,12 @@ bb53: ; preds = %bb52
br i1 %phitmp, label %bb55, label %bb52
bb55: ; preds = %bb53
- %4 = load double* @a, align 4 ; <double> [#uses=10]
+ %4 = load double, double* @a, align 4 ; <double> [#uses=10]
%5 = fadd double %4, 0.000000e+00 ; <double> [#uses=16]
%6 = fcmp ogt double %k.4, 0.000000e+00 ; <i1> [#uses=1]
%.pn404 = fmul double %4, %4 ; <double> [#uses=4]
%.pn402 = fmul double %5, %5 ; <double> [#uses=5]
- %.pn165.in = load double* @N ; <double> [#uses=5]
+ %.pn165.in = load double, double* @N ; <double> [#uses=5]
%.pn198 = fmul double 0.000000e+00, %5 ; <double> [#uses=1]
%.pn185 = fsub double -0.000000e+00, 0.000000e+00 ; <double> [#uses=1]
%.pn147 = fsub double -0.000000e+00, 0.000000e+00 ; <double> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
index 377bbd211752..567400318ee0 100644
--- a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
+++ b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
@@ -42,7 +42,7 @@ bb3: ; preds = %entry
%17 = fdiv double %16, %0
%18 = fadd double 0.000000e+00, %17
%19 = call double @acos(double %18) nounwind readonly
- %20 = load double* null, align 4
+ %20 = load double, double* null, align 4
%21 = fmul double %20, 0x401921FB54442D18
%22 = call double @sin(double %19) nounwind readonly
%23 = fmul double %22, 0.000000e+00
diff --git a/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll b/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
index 7bb1429872b9..a9d5480e72c9 100644
--- a/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
+++ b/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
@@ -6,7 +6,7 @@
define fastcc %struct.node_t* @_ZL6createP6node_tii3v_tS1_d(%struct.node_t* %n, i32 %lvl, i32 %dist, i64 %c.0.0, i64 %c.0.1, i64 %c.0.2, i64 %d.0.0, i64 %d.0.1, i64 %d.0.2, double %r) nounwind {
entry:
- %0 = getelementptr %struct.node_t* %n, i32 0, i32 1 ; <%struct.hit_t*> [#uses=1]
+ %0 = getelementptr %struct.node_t, %struct.node_t* %n, i32 0, i32 1 ; <%struct.hit_t*> [#uses=1]
%1 = bitcast %struct.hit_t* %0 to i256* ; <i256*> [#uses=1]
store i256 0, i256* %1, align 4
unreachable
diff --git a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
index e90c5b322db7..bc7dbd4f6953 100644
--- a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
+++ b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
@@ -8,14 +8,14 @@ entry:
store i32 0, i32* %retval
%res = alloca i32 ; <i32*> [#uses=0]
%fh = alloca i32 ; <i32*> [#uses=1]
- %1 = load i32* %fh ; <i32> [#uses=1]
- %2 = load i32* %ptr ; <i32> [#uses=1]
+ %1 = load i32, i32* %fh ; <i32> [#uses=1]
+ %2 = load i32, i32* %ptr ; <i32> [#uses=1]
%3 = call i32 asm "mov r0, $2; mov r1, $3; swi ${1:a}; mov $0, r0", "=r,i,r,r,~{r0},~{r1}"(i32 107, i32 %1, i32 %2) nounwind ; <i32> [#uses=1]
store i32 %3, i32* %retval
br label %return
return: ; preds = %entry
- %4 = load i32* %retval ; <i32> [#uses=1]
+ %4 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %4
}
diff --git a/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll b/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
index ade6a10afef6..edeae9b88bce 100644
--- a/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
+++ b/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
@@ -8,11 +8,11 @@ entry:
%b = alloca { double, double } ; <{ double, double }*> [#uses=1]
store { i32, { double, double }* } %d_arg, { i32, { double, double }* }* %d
store i32 %x_arg, i32* %x
- %tmp = load i32* %x ; <i32> [#uses=1]
- %tmp1 = getelementptr { i32, { double, double }* }* %d, i32 0, i32 1 ; <{ double, double }**> [#uses=1]
- %.ptr = load { double, double }** %tmp1 ; <{ double, double }*> [#uses=1]
- %tmp2 = getelementptr { double, double }* %.ptr, i32 %tmp ; <{ double, double }*> [#uses=1]
- %tmp3 = load { double, double }* %tmp2 ; <{ double, double }> [#uses=1]
+ %tmp = load i32, i32* %x ; <i32> [#uses=1]
+ %tmp1 = getelementptr { i32, { double, double }* }, { i32, { double, double }* }* %d, i32 0, i32 1 ; <{ double, double }**> [#uses=1]
+ %.ptr = load { double, double }*, { double, double }** %tmp1 ; <{ double, double }*> [#uses=1]
+ %tmp2 = getelementptr { double, double }, { double, double }* %.ptr, i32 %tmp ; <{ double, double }*> [#uses=1]
+ %tmp3 = load { double, double }, { double, double }* %tmp2 ; <{ double, double }> [#uses=1]
store { double, double } %tmp3, { double, double }* %b
ret void
}
diff --git a/test/CodeGen/ARM/2009-04-08-FREM.ll b/test/CodeGen/ARM/2009-04-08-FREM.ll
index 606c6b1471b4..e0f9485888d9 100644
--- a/test/CodeGen/ARM/2009-04-08-FREM.ll
+++ b/test/CodeGen/ARM/2009-04-08-FREM.ll
@@ -4,6 +4,6 @@ declare i32 @printf(i8*, ...)
define i32 @main() {
%rem_r = frem double 0.000000e+00, 0.000000e+00 ; <double> [#uses=1]
- %1 = call i32 (i8*, ...)* @printf(i8* null, double %rem_r) ; <i32> [#uses=0]
+ %1 = call i32 (i8*, ...) @printf(i8* null, double %rem_r) ; <i32> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/ARM/2009-04-08-FloatUndef.ll b/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
index 9e32e05b040b..949e1072b2b6 100644
--- a/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
+++ b/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
@@ -2,7 +2,7 @@
define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>* %CONST) {
entry:
- %input2 = load <4 x float>* null, align 16 ; <<4 x float>> [#uses=2]
+ %input2 = load <4 x float>, <4 x float>* null, align 16 ; <<4 x float>> [#uses=2]
%shuffle7 = shufflevector <4 x float> %input2, <4 x float> <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00>, <4 x i32> <i32 2, i32 2, i32 2, i32 2> ; <<4 x float>> [#uses=1]
%mul1 = fmul <4 x float> %shuffle7, zeroinitializer ; <<4 x float>> [#uses=1]
%add2 = fadd <4 x float> %mul1, %input2 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll b/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
index 5b1746301f4e..f2532d798f83 100644
--- a/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
+++ b/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
@@ -4,7 +4,7 @@
define void @foo(...) nounwind {
entry:
%rr = alloca i32 ; <i32*> [#uses=2]
- %0 = load i32* %rr ; <i32> [#uses=1]
+ %0 = load i32, i32* %rr ; <i32> [#uses=1]
%1 = call i32 asm "nop", "=r,0"(i32 %0) nounwind ; <i32> [#uses=1]
store i32 %1, i32* %rr
br label %return
diff --git a/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll b/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
index 524b5ebddc0a..ac641f99dbf9 100644
--- a/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
+++ b/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
@@ -5,7 +5,7 @@
define i16 @fn16(i16 %arg0.0, <2 x i16> %arg1, i16 %arg2.0) nounwind {
entry:
store <2 x i16> %arg1, <2 x i16>* null
- %0 = call i32 (i8*, ...)* @printf(i8* getelementptr ([30 x i8]* @.str, i32 0, i32 0), i32 0) nounwind ; <i32> [#uses=0]
+ %0 = call i32 (i8*, ...) @printf(i8* getelementptr ([30 x i8], [30 x i8]* @.str, i32 0, i32 0), i32 0) nounwind ; <i32> [#uses=0]
ret i16 0
}
diff --git a/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll b/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
index 2bc7df028534..ae005dbf4b13 100644
--- a/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
+++ b/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
@@ -19,8 +19,8 @@ bb1: ; preds = %bb
bb3: ; preds = %bb1, %bb
%iftmp.0.0 = phi i32 [ 0, %bb1 ], [ -1, %bb ] ; <i32> [#uses=1]
- %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([7 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 %iftmp.0.0) nounwind ; <i32> [#uses=0]
- %2 = load %struct.List** null, align 4 ; <%struct.List*> [#uses=2]
+ %1 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([7 x i8], [7 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 %iftmp.0.0) nounwind ; <i32> [#uses=0]
+ %2 = load %struct.List*, %struct.List** null, align 4 ; <%struct.List*> [#uses=2]
%phitmp = icmp eq %struct.List* %2, null ; <i1> [#uses=1]
br i1 %phitmp, label %bb5, label %bb
diff --git a/test/CodeGen/ARM/2009-06-02-ISelCrash.ll b/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
index 403e3f6509f3..7bbb8090c849 100644
--- a/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
+++ b/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
@@ -57,6 +57,6 @@ Fft.exit.i: ; preds = %bb7.i.i
br i1 undef, label %bb5.i, label %bb1.outer2.i.i.outer
bb5.i: ; preds = %Fft.exit.i
- %0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([15 x i8]* @"\01LC", i32 0, i32 0), double undef, double undef) nounwind ; <i32> [#uses=0]
+ %0 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([15 x i8], [15 x i8]* @"\01LC", i32 0, i32 0), double undef, double undef) nounwind ; <i32> [#uses=0]
unreachable
}
diff --git a/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll b/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
index 98e002302558..17beb3c25947 100644
--- a/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
+++ b/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
@@ -136,7 +136,7 @@ bb138: ; preds = %bb77
br label %bb141
bb139: ; preds = %bb141
- %scevgep441442881 = load i16* undef ; <i16> [#uses=1]
+ %scevgep441442881 = load i16, i16* undef ; <i16> [#uses=1]
%1 = icmp ugt i16 %scevgep441442881, %0 ; <i1> [#uses=1]
br i1 %1, label %bb141, label %bb142
diff --git a/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll b/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
index 7e9b066984f9..4ab54c2e8faf 100644
--- a/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
+++ b/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
@@ -5,9 +5,9 @@
define void @simplify_unary_real(i8* nocapture %p) nounwind {
entry:
- %tmp121 = load i64* null, align 4 ; <i64> [#uses=1]
- %0 = getelementptr %struct.rtx_def* null, i32 0, i32 3, i32 3, i32 0 ; <i64*> [#uses=1]
- %tmp122 = load i64* %0, align 4 ; <i64> [#uses=1]
+ %tmp121 = load i64, i64* null, align 4 ; <i64> [#uses=1]
+ %0 = getelementptr %struct.rtx_def, %struct.rtx_def* null, i32 0, i32 3, i32 3, i32 0 ; <i64*> [#uses=1]
+ %tmp122 = load i64, i64* %0, align 4 ; <i64> [#uses=1]
%1 = zext i64 undef to i192 ; <i192> [#uses=2]
%2 = zext i64 %tmp121 to i192 ; <i192> [#uses=1]
%3 = shl i192 %2, 64 ; <i192> [#uses=2]
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
index 812f0188f19a..e9c4b0335dc0 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
@@ -35,26 +35,26 @@ bb10: ; preds = %bb9
unreachable
bb11: ; preds = %bb9
- %0 = load i32* undef, align 4 ; <i32> [#uses=2]
+ %0 = load i32, i32* undef, align 4 ; <i32> [#uses=2]
%1 = add i32 %0, 1 ; <i32> [#uses=2]
store i32 %1, i32* undef, align 4
- %2 = load i32* undef, align 4 ; <i32> [#uses=1]
+ %2 = load i32, i32* undef, align 4 ; <i32> [#uses=1]
store i32 %2, i32* @nn, align 4
store i32 0, i32* @al_len, align 4
store i32 0, i32* @no_mat, align 4
store i32 0, i32* @no_mis, align 4
- %3 = getelementptr i8* %B, i32 %0 ; <i8*> [#uses=1]
+ %3 = getelementptr i8, i8* %B, i32 %0 ; <i8*> [#uses=1]
tail call void @diff(i8* undef, i8* %3, i32 undef, i32 undef, i32 undef, i32 undef) nounwind
%4 = sitofp i32 undef to double ; <double> [#uses=1]
%5 = fdiv double %4, 1.000000e+01 ; <double> [#uses=1]
- %6 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([29 x i8]* @"\01LC12", i32 0, i32 0), double %5) nounwind ; <i32> [#uses=0]
- %7 = load i32* @al_len, align 4 ; <i32> [#uses=1]
- %8 = load i32* @no_mat, align 4 ; <i32> [#uses=1]
- %9 = load i32* @no_mis, align 4 ; <i32> [#uses=1]
+ %6 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([29 x i8], [29 x i8]* @"\01LC12", i32 0, i32 0), double %5) nounwind ; <i32> [#uses=0]
+ %7 = load i32, i32* @al_len, align 4 ; <i32> [#uses=1]
+ %8 = load i32, i32* @no_mat, align 4 ; <i32> [#uses=1]
+ %9 = load i32, i32* @no_mis, align 4 ; <i32> [#uses=1]
%10 = sub i32 %7, %8 ; <i32> [#uses=1]
%11 = sub i32 %10, %9 ; <i32> [#uses=1]
- %12 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC16", i32 0, i32 0), i32 %11) nounwind ; <i32> [#uses=0]
- %13 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([47 x i8]* @"\01LC17", i32 0, i32 0), i32 undef, i32 %1, i32 undef, i32 undef) nounwind ; <i32> [#uses=0]
+ %12 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([33 x i8], [33 x i8]* @"\01LC16", i32 0, i32 0), i32 %11) nounwind ; <i32> [#uses=0]
+ %13 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([47 x i8], [47 x i8]* @"\01LC17", i32 0, i32 0), i32 undef, i32 %1, i32 undef, i32 undef) nounwind ; <i32> [#uses=0]
br i1 undef, label %bb15, label %bb12
bb12: ; preds = %bb11
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
index f5fb97c0ef53..08291e62b65e 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
@@ -33,19 +33,19 @@ bb10: ; preds = %bb9
unreachable
bb11: ; preds = %bb9
- %0 = load i32* undef, align 4 ; <i32> [#uses=3]
+ %0 = load i32, i32* undef, align 4 ; <i32> [#uses=3]
%1 = add i32 %0, 1 ; <i32> [#uses=2]
store i32 %1, i32* undef, align 4
- %2 = load i32* undef, align 4 ; <i32> [#uses=2]
+ %2 = load i32, i32* undef, align 4 ; <i32> [#uses=2]
%3 = sub i32 %2, %0 ; <i32> [#uses=1]
store i32 0, i32* @no_mat, align 4
store i32 0, i32* @no_mis, align 4
- %4 = getelementptr i8* %B, i32 %0 ; <i8*> [#uses=1]
+ %4 = getelementptr i8, i8* %B, i32 %0 ; <i8*> [#uses=1]
tail call void @diff(i8* undef, i8* %4, i32 undef, i32 %3, i32 undef, i32 undef) nounwind
- %5 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC11", i32 0, i32 0), i32 %tmp13) nounwind ; <i32> [#uses=0]
- %6 = load i32* @no_mis, align 4 ; <i32> [#uses=1]
- %7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC15", i32 0, i32 0), i32 %6) nounwind ; <i32> [#uses=0]
- %8 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([47 x i8]* @"\01LC17", i32 0, i32 0), i32 undef, i32 %1, i32 undef, i32 %2) nounwind ; <i32> [#uses=0]
+ %5 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([33 x i8], [33 x i8]* @"\01LC11", i32 0, i32 0), i32 %tmp13) nounwind ; <i32> [#uses=0]
+ %6 = load i32, i32* @no_mis, align 4 ; <i32> [#uses=1]
+ %7 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([33 x i8], [33 x i8]* @"\01LC15", i32 0, i32 0), i32 %6) nounwind ; <i32> [#uses=0]
+ %8 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([47 x i8], [47 x i8]* @"\01LC17", i32 0, i32 0), i32 undef, i32 %1, i32 undef, i32 %2) nounwind ; <i32> [#uses=0]
br i1 undef, label %bb15, label %bb12
bb12: ; preds = %bb11
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
index d7e4c90abb18..cf7325ddf895 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
@@ -28,7 +28,7 @@ bb11: ; preds = %bb9
br i1 undef, label %bb15, label %bb12
bb12: ; preds = %bb11
- %0 = load i32** @JJ, align 4 ; <i32*> [#uses=1]
+ %0 = load i32*, i32** @JJ, align 4 ; <i32*> [#uses=1]
br label %bb228.i
bb74.i: ; preds = %bb228.i
@@ -84,10 +84,10 @@ bb167.i: ; preds = %bb163.i
bb168.i: ; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
%fi.5.i = phi i32 [ undef, %bb167.i ], [ %ci.910.i, %bb158.i ], [ undef, %bb160.i ], [ %ci.910.i, %bb161.i ], [ undef, %bb163.i ] ; <i32> [#uses=1]
%fj.4.i = phi i32 [ undef, %bb167.i ], [ undef, %bb158.i ], [ %fj.515.i, %bb160.i ], [ undef, %bb161.i ], [ %fj.515.i, %bb163.i ] ; <i32> [#uses=2]
- %scevgep88.i = getelementptr i32* null, i32 %i.121.i ; <i32*> [#uses=3]
- %4 = load i32* %scevgep88.i, align 4 ; <i32> [#uses=2]
- %scevgep89.i = getelementptr i32* %0, i32 %i.121.i ; <i32*> [#uses=3]
- %5 = load i32* %scevgep89.i, align 4 ; <i32> [#uses=1]
+ %scevgep88.i = getelementptr i32, i32* null, i32 %i.121.i ; <i32*> [#uses=3]
+ %4 = load i32, i32* %scevgep88.i, align 4 ; <i32> [#uses=2]
+ %scevgep89.i = getelementptr i32, i32* %0, i32 %i.121.i ; <i32*> [#uses=3]
+ %5 = load i32, i32* %scevgep89.i, align 4 ; <i32> [#uses=1]
%ci.10.i = select i1 undef, i32 %pi.316.i, i32 %i.121.i ; <i32> [#uses=0]
%cj.9.i = select i1 undef, i32 %pj.317.i, i32 undef ; <i32> [#uses=0]
%6 = icmp slt i32 undef, 0 ; <i1> [#uses=3]
@@ -95,8 +95,8 @@ bb168.i: ; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
%cj.11.i100 = select i1 %6, i32 %fj.4.i, i32 %5 ; <i32> [#uses=1]
%c.14.i = select i1 %6, i32 0, i32 undef ; <i32> [#uses=2]
store i32 %c.14.i, i32* undef, align 4
- %7 = load i32* %scevgep88.i, align 4 ; <i32> [#uses=1]
- %8 = load i32* %scevgep89.i, align 4 ; <i32> [#uses=1]
+ %7 = load i32, i32* %scevgep88.i, align 4 ; <i32> [#uses=1]
+ %8 = load i32, i32* %scevgep89.i, align 4 ; <i32> [#uses=1]
store i32 %ci.12.i, i32* %scevgep88.i, align 4
store i32 %cj.11.i100, i32* %scevgep89.i, align 4
store i32 %4, i32* undef, align 4
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
index 77c133a80f95..203608ac1d40 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
@@ -22,7 +22,7 @@ bb6: ; preds = %bb6, %bb5
br i1 undef, label %bb8, label %bb6
bb8: ; preds = %bb6, %bb5
- %0 = load i8** @name1, align 4 ; <i8*> [#uses=0]
+ %0 = load i8*, i8** @name1, align 4 ; <i8*> [#uses=0]
br label %bb15
bb9: ; preds = %bb15
@@ -33,15 +33,15 @@ bb10: ; preds = %bb9
bb11: ; preds = %bb9
store i32 0, i32* @no_mis, align 4
- %1 = getelementptr i8* %A, i32 0 ; <i8*> [#uses=1]
- %2 = getelementptr i8* %B, i32 0 ; <i8*> [#uses=1]
+ %1 = getelementptr i8, i8* %A, i32 0 ; <i8*> [#uses=1]
+ %2 = getelementptr i8, i8* %B, i32 0 ; <i8*> [#uses=1]
tail call void @diff(i8* %1, i8* %2, i32 undef, i32 undef, i32 undef, i32 undef) nounwind
br i1 undef, label %bb15, label %bb12
bb12: ; preds = %bb11
- %3 = load i32** @II, align 4 ; <i32*> [#uses=1]
- %4 = load i32* @r, align 4 ; <i32> [#uses=1]
- %5 = load i32* @qr, align 4 ; <i32> [#uses=1]
+ %3 = load i32*, i32** @II, align 4 ; <i32*> [#uses=1]
+ %4 = load i32, i32* @r, align 4 ; <i32> [#uses=1]
+ %5 = load i32, i32* @qr, align 4 ; <i32> [#uses=1]
br label %bb228.i
bb74.i: ; preds = %bb228.i
@@ -95,12 +95,12 @@ bb168.i: ; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
%fi.5.i = phi i32 [ %fi.614.i, %bb167.i ], [ %ci.910.i, %bb158.i ], [ %fi.614.i, %bb160.i ], [ %ci.910.i, %bb161.i ], [ %fi.614.i, %bb163.i ] ; <i32> [#uses=2]
%fj.4.i = phi i32 [ %cj.811.i, %bb167.i ], [ %cj.811.i, %bb158.i ], [ %fj.515.i, %bb160.i ], [ %cj.811.i, %bb161.i ], [ %fj.515.i, %bb163.i ] ; <i32> [#uses=2]
%f.5.i = phi i32 [ %7, %bb167.i ], [ %8, %bb158.i ], [ %7, %bb160.i ], [ %7, %bb161.i ], [ %7, %bb163.i ] ; <i32> [#uses=2]
- %scevgep88.i = getelementptr i32* %3, i32 undef ; <i32*> [#uses=1]
+ %scevgep88.i = getelementptr i32, i32* %3, i32 undef ; <i32*> [#uses=1]
%ci.10.i = select i1 undef, i32 %pi.316.i, i32 undef ; <i32> [#uses=0]
%ci.12.i = select i1 undef, i32 %fi.5.i, i32 undef ; <i32> [#uses=1]
%cj.11.i100 = select i1 undef, i32 %fj.4.i, i32 undef ; <i32> [#uses=1]
%c.14.i = select i1 undef, i32 %f.5.i, i32 undef ; <i32> [#uses=1]
- %10 = load i32* %scevgep88.i, align 4 ; <i32> [#uses=1]
+ %10 = load i32, i32* %scevgep88.i, align 4 ; <i32> [#uses=1]
br i1 undef, label %bb211.i, label %bb218.i
bb211.i: ; preds = %bb168.i
@@ -110,7 +110,7 @@ bb218.i: ; preds = %bb211.i, %bb168.i
br i1 undef, label %bb220.i, label %bb158.i
bb220.i: ; preds = %bb218.i, %bb153.i
- %11 = getelementptr i32* null, i32 %6 ; <i32*> [#uses=1]
+ %11 = getelementptr i32, i32* null, i32 %6 ; <i32*> [#uses=1]
store i32 undef, i32* %11, align 4
br i1 undef, label %bb221.i, label %bb228.i
diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
index 16f5d1dc150f..b3c91ed3fb0d 100644
--- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
+++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
@@ -28,7 +28,7 @@ bb11: ; preds = %bb9
br i1 undef, label %bb15, label %bb12
bb12: ; preds = %bb11
- %0 = load i32** @XX, align 4 ; <i32*> [#uses=0]
+ %0 = load i32*, i32** @XX, align 4 ; <i32*> [#uses=0]
br label %bb228.i
bb74.i: ; preds = %bb228.i
diff --git a/test/CodeGen/ARM/2009-07-01-CommuteBug.ll b/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
index f0d79ce25c97..55039dd7f57b 100644
--- a/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
+++ b/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
@@ -30,9 +30,9 @@ bb11: ; preds = %bb9
br i1 undef, label %bb15, label %bb12
bb12: ; preds = %bb11
- %0 = load i32** @II, align 4 ; <i32*> [#uses=1]
- %1 = load i32** @JJ, align 4 ; <i32*> [#uses=1]
- %2 = load i32* @qr, align 4 ; <i32> [#uses=1]
+ %0 = load i32*, i32** @II, align 4 ; <i32*> [#uses=1]
+ %1 = load i32*, i32** @JJ, align 4 ; <i32*> [#uses=1]
+ %2 = load i32, i32* @qr, align 4 ; <i32> [#uses=1]
br label %bb228.i
bb74.i: ; preds = %bb228.i
@@ -90,15 +90,15 @@ bb168.i: ; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
%fi.5.i = phi i32 [ %fi.614.i, %bb167.i ], [ %ci.910.i, %bb158.i ], [ %fi.614.i, %bb160.i ], [ %ci.910.i, %bb161.i ], [ %fi.614.i, %bb163.i ] ; <i32> [#uses=2]
%fj.4.i = phi i32 [ %cj.811.i, %bb167.i ], [ %cj.811.i, %bb158.i ], [ %fj.515.i, %bb160.i ], [ %cj.811.i, %bb161.i ], [ %fj.515.i, %bb163.i ] ; <i32> [#uses=2]
%f.5.i = phi i32 [ %3, %bb167.i ], [ %4, %bb158.i ], [ %3, %bb160.i ], [ %3, %bb161.i ], [ %3, %bb163.i ] ; <i32> [#uses=2]
- %scevgep88.i = getelementptr i32* %0, i32 undef ; <i32*> [#uses=2]
- %scevgep89.i = getelementptr i32* %1, i32 undef ; <i32*> [#uses=2]
+ %scevgep88.i = getelementptr i32, i32* %0, i32 undef ; <i32*> [#uses=2]
+ %scevgep89.i = getelementptr i32, i32* %1, i32 undef ; <i32*> [#uses=2]
%ci.10.i = select i1 undef, i32 %pi.316.i, i32 undef ; <i32> [#uses=0]
%cj.9.i = select i1 undef, i32 %pj.317.i, i32 undef ; <i32> [#uses=0]
%ci.12.i = select i1 undef, i32 %fi.5.i, i32 undef ; <i32> [#uses=2]
%cj.11.i100 = select i1 undef, i32 %fj.4.i, i32 undef ; <i32> [#uses=2]
%c.14.i = select i1 undef, i32 %f.5.i, i32 undef ; <i32> [#uses=1]
- %6 = load i32* %scevgep88.i, align 4 ; <i32> [#uses=1]
- %7 = load i32* %scevgep89.i, align 4 ; <i32> [#uses=1]
+ %6 = load i32, i32* %scevgep88.i, align 4 ; <i32> [#uses=1]
+ %7 = load i32, i32* %scevgep89.i, align 4 ; <i32> [#uses=1]
store i32 %ci.12.i, i32* %scevgep88.i, align 4
store i32 %cj.11.i100, i32* %scevgep89.i, align 4
br i1 undef, label %bb211.i, label %bb218.i
diff --git a/test/CodeGen/ARM/2009-07-18-RewriterBug.ll b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
index 454fee5c5ae1..2146dd330822 100644
--- a/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
+++ b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
@@ -16,43 +16,43 @@ entry:
br i1 %0, label %bb8, label %bb
bb: ; preds = %entry
- %1 = getelementptr %struct.VERTEX* %tree, i32 0, i32 2 ; <%struct.VERTEX**> [#uses=1]
- %2 = load %struct.VERTEX** %1, align 4 ; <%struct.VERTEX*> [#uses=2]
+ %1 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 2 ; <%struct.VERTEX**> [#uses=1]
+ %2 = load %struct.VERTEX*, %struct.VERTEX** %1, align 4 ; <%struct.VERTEX*> [#uses=2]
%3 = icmp eq %struct.VERTEX* %2, null ; <i1> [#uses=1]
br i1 %3, label %bb7, label %bb1.i
bb1.i: ; preds = %bb1.i, %bb
%tree_addr.0.i = phi %struct.VERTEX* [ %5, %bb1.i ], [ %tree, %bb ] ; <%struct.VERTEX*> [#uses=3]
- %4 = getelementptr %struct.VERTEX* %tree_addr.0.i, i32 0, i32 1 ; <%struct.VERTEX**> [#uses=1]
- %5 = load %struct.VERTEX** %4, align 4 ; <%struct.VERTEX*> [#uses=2]
+ %4 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree_addr.0.i, i32 0, i32 1 ; <%struct.VERTEX**> [#uses=1]
+ %5 = load %struct.VERTEX*, %struct.VERTEX** %4, align 4 ; <%struct.VERTEX*> [#uses=2]
%6 = icmp eq %struct.VERTEX* %5, null ; <i1> [#uses=1]
br i1 %6, label %get_low.exit, label %bb1.i
get_low.exit: ; preds = %bb1.i
call void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delright, %struct.VERTEX* %2, %struct.VERTEX* %extra) nounwind
- %7 = getelementptr %struct.VERTEX* %tree, i32 0, i32 1 ; <%struct.VERTEX**> [#uses=1]
- %8 = load %struct.VERTEX** %7, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %7 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 1 ; <%struct.VERTEX**> [#uses=1]
+ %8 = load %struct.VERTEX*, %struct.VERTEX** %7, align 4 ; <%struct.VERTEX*> [#uses=1]
call void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delleft, %struct.VERTEX* %8, %struct.VERTEX* %tree) nounwind
- %9 = getelementptr %struct.EDGE_PAIR* %delleft, i32 0, i32 0 ; <%struct.edge_rec**> [#uses=1]
- %10 = load %struct.edge_rec** %9, align 8 ; <%struct.edge_rec*> [#uses=2]
- %11 = getelementptr %struct.EDGE_PAIR* %delleft, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %12 = load %struct.edge_rec** %11, align 4 ; <%struct.edge_rec*> [#uses=1]
- %13 = getelementptr %struct.EDGE_PAIR* %delright, i32 0, i32 0 ; <%struct.edge_rec**> [#uses=1]
- %14 = load %struct.edge_rec** %13, align 8 ; <%struct.edge_rec*> [#uses=1]
- %15 = getelementptr %struct.EDGE_PAIR* %delright, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %16 = load %struct.edge_rec** %15, align 4 ; <%struct.edge_rec*> [#uses=2]
+ %9 = getelementptr %struct.EDGE_PAIR, %struct.EDGE_PAIR* %delleft, i32 0, i32 0 ; <%struct.edge_rec**> [#uses=1]
+ %10 = load %struct.edge_rec*, %struct.edge_rec** %9, align 8 ; <%struct.edge_rec*> [#uses=2]
+ %11 = getelementptr %struct.EDGE_PAIR, %struct.EDGE_PAIR* %delleft, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %12 = load %struct.edge_rec*, %struct.edge_rec** %11, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %13 = getelementptr %struct.EDGE_PAIR, %struct.EDGE_PAIR* %delright, i32 0, i32 0 ; <%struct.edge_rec**> [#uses=1]
+ %14 = load %struct.edge_rec*, %struct.edge_rec** %13, align 8 ; <%struct.edge_rec*> [#uses=1]
+ %15 = getelementptr %struct.EDGE_PAIR, %struct.EDGE_PAIR* %delright, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %16 = load %struct.edge_rec*, %struct.edge_rec** %15, align 4 ; <%struct.edge_rec*> [#uses=2]
br label %bb.i
bb.i: ; preds = %bb4.i, %get_low.exit
%rdi_addr.0.i = phi %struct.edge_rec* [ %14, %get_low.exit ], [ %72, %bb4.i ] ; <%struct.edge_rec*> [#uses=2]
%ldi_addr.1.i = phi %struct.edge_rec* [ %12, %get_low.exit ], [ %ldi_addr.0.i, %bb4.i ] ; <%struct.edge_rec*> [#uses=3]
- %17 = getelementptr %struct.edge_rec* %rdi_addr.0.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %18 = load %struct.VERTEX** %17, align 4 ; <%struct.VERTEX*> [#uses=3]
+ %17 = getelementptr %struct.edge_rec, %struct.edge_rec* %rdi_addr.0.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %18 = load %struct.VERTEX*, %struct.VERTEX** %17, align 4 ; <%struct.VERTEX*> [#uses=3]
%19 = ptrtoint %struct.edge_rec* %ldi_addr.1.i to i32 ; <i32> [#uses=1]
- %20 = getelementptr %struct.VERTEX* %18, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %21 = load double* %20, align 4 ; <double> [#uses=3]
- %22 = getelementptr %struct.VERTEX* %18, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %23 = load double* %22, align 4 ; <double> [#uses=3]
+ %20 = getelementptr %struct.VERTEX, %struct.VERTEX* %18, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %21 = load double, double* %20, align 4 ; <double> [#uses=3]
+ %22 = getelementptr %struct.VERTEX, %struct.VERTEX* %18, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %23 = load double, double* %22, align 4 ; <double> [#uses=3]
br label %bb2.i
bb1.i1: ; preds = %bb2.i
@@ -62,8 +62,8 @@ bb1.i1: ; preds = %bb2.i
%27 = and i32 %24, -64 ; <i32> [#uses=1]
%28 = or i32 %26, %27 ; <i32> [#uses=1]
%29 = inttoptr i32 %28 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %30 = getelementptr %struct.edge_rec* %29, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %31 = load %struct.edge_rec** %30, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %30 = getelementptr %struct.edge_rec, %struct.edge_rec* %29, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %31 = load %struct.edge_rec*, %struct.edge_rec** %30, align 4 ; <%struct.edge_rec*> [#uses=1]
%32 = ptrtoint %struct.edge_rec* %31 to i32 ; <i32> [#uses=2]
%33 = add i32 %32, 16 ; <i32> [#uses=1]
%34 = and i32 %33, 63 ; <i32> [#uses=1]
@@ -78,18 +78,18 @@ bb2.i: ; preds = %bb1.i1, %bb.i
%ldi_addr.0.i = phi %struct.edge_rec* [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ] ; <%struct.edge_rec*> [#uses=4]
%.pn6.in.i = xor i32 %.pn6.in.in.i, 32 ; <i32> [#uses=1]
%.pn6.i = inttoptr i32 %.pn6.in.i to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %t1.0.in.i = getelementptr %struct.edge_rec* %ldi_addr.1.pn.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %t2.0.in.i = getelementptr %struct.edge_rec* %.pn6.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %t1.0.i = load %struct.VERTEX** %t1.0.in.i ; <%struct.VERTEX*> [#uses=2]
- %t2.0.i = load %struct.VERTEX** %t2.0.in.i ; <%struct.VERTEX*> [#uses=2]
- %38 = getelementptr %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %39 = load double* %38, align 4 ; <double> [#uses=3]
- %40 = getelementptr %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %41 = load double* %40, align 4 ; <double> [#uses=3]
- %42 = getelementptr %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %43 = load double* %42, align 4 ; <double> [#uses=1]
- %44 = getelementptr %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %45 = load double* %44, align 4 ; <double> [#uses=1]
+ %t1.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %ldi_addr.1.pn.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %t2.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn6.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %t1.0.i = load %struct.VERTEX*, %struct.VERTEX** %t1.0.in.i ; <%struct.VERTEX*> [#uses=2]
+ %t2.0.i = load %struct.VERTEX*, %struct.VERTEX** %t2.0.in.i ; <%struct.VERTEX*> [#uses=2]
+ %38 = getelementptr %struct.VERTEX, %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %39 = load double, double* %38, align 4 ; <double> [#uses=3]
+ %40 = getelementptr %struct.VERTEX, %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %41 = load double, double* %40, align 4 ; <double> [#uses=3]
+ %42 = getelementptr %struct.VERTEX, %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %43 = load double, double* %42, align 4 ; <double> [#uses=1]
+ %44 = getelementptr %struct.VERTEX, %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %45 = load double, double* %44, align 4 ; <double> [#uses=1]
%46 = fsub double %39, %21 ; <double> [#uses=1]
%47 = fsub double %45, %23 ; <double> [#uses=1]
%48 = fmul double %46, %47 ; <double> [#uses=1]
@@ -104,12 +104,12 @@ bb3.i: ; preds = %bb2.i
%54 = ptrtoint %struct.edge_rec* %rdi_addr.0.i to i32 ; <i32> [#uses=1]
%55 = xor i32 %54, 32 ; <i32> [#uses=3]
%56 = inttoptr i32 %55 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
- %57 = getelementptr %struct.edge_rec* %56, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %58 = load %struct.VERTEX** %57, align 4 ; <%struct.VERTEX*> [#uses=2]
- %59 = getelementptr %struct.VERTEX* %58, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %60 = load double* %59, align 4 ; <double> [#uses=1]
- %61 = getelementptr %struct.VERTEX* %58, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %62 = load double* %61, align 4 ; <double> [#uses=1]
+ %57 = getelementptr %struct.edge_rec, %struct.edge_rec* %56, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %58 = load %struct.VERTEX*, %struct.VERTEX** %57, align 4 ; <%struct.VERTEX*> [#uses=2]
+ %59 = getelementptr %struct.VERTEX, %struct.VERTEX* %58, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %60 = load double, double* %59, align 4 ; <double> [#uses=1]
+ %61 = getelementptr %struct.VERTEX, %struct.VERTEX* %58, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %62 = load double, double* %61, align 4 ; <double> [#uses=1]
%63 = fsub double %60, %39 ; <double> [#uses=1]
%64 = fsub double %23, %41 ; <double> [#uses=1]
%65 = fmul double %63, %64 ; <double> [#uses=1]
@@ -121,8 +121,8 @@ bb3.i: ; preds = %bb2.i
br i1 %70, label %bb4.i, label %bb5.i
bb4.i: ; preds = %bb3.i
- %71 = getelementptr %struct.edge_rec* %56, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %72 = load %struct.edge_rec** %71, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %71 = getelementptr %struct.edge_rec, %struct.edge_rec* %56, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %72 = load %struct.edge_rec*, %struct.edge_rec** %71, align 4 ; <%struct.edge_rec*> [#uses=1]
br label %bb.i
bb5.i: ; preds = %bb3.i
@@ -131,110 +131,110 @@ bb5.i: ; preds = %bb3.i
%75 = and i32 %55, -64 ; <i32> [#uses=1]
%76 = or i32 %74, %75 ; <i32> [#uses=1]
%77 = inttoptr i32 %76 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %78 = getelementptr %struct.edge_rec* %77, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %79 = load %struct.edge_rec** %78, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %78 = getelementptr %struct.edge_rec, %struct.edge_rec* %77, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %79 = load %struct.edge_rec*, %struct.edge_rec** %78, align 4 ; <%struct.edge_rec*> [#uses=1]
%80 = ptrtoint %struct.edge_rec* %79 to i32 ; <i32> [#uses=2]
%81 = add i32 %80, 16 ; <i32> [#uses=1]
%82 = and i32 %81, 63 ; <i32> [#uses=1]
%83 = and i32 %80, -64 ; <i32> [#uses=1]
%84 = or i32 %82, %83 ; <i32> [#uses=1]
%85 = inttoptr i32 %84 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %86 = getelementptr %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %87 = load %struct.VERTEX** %86, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %86 = getelementptr %struct.edge_rec, %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %87 = load %struct.VERTEX*, %struct.VERTEX** %86, align 4 ; <%struct.VERTEX*> [#uses=1]
%88 = call %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=6]
- %89 = getelementptr %struct.edge_rec* %88, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=4]
+ %89 = getelementptr %struct.edge_rec, %struct.edge_rec* %88, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=4]
store %struct.edge_rec* %88, %struct.edge_rec** %89, align 4
- %90 = getelementptr %struct.edge_rec* %88, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=2]
+ %90 = getelementptr %struct.edge_rec, %struct.edge_rec* %88, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=2]
store %struct.VERTEX* %18, %struct.VERTEX** %90, align 4
%91 = ptrtoint %struct.edge_rec* %88 to i32 ; <i32> [#uses=5]
%92 = add i32 %91, 16 ; <i32> [#uses=2]
%93 = inttoptr i32 %92 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
%94 = add i32 %91, 48 ; <i32> [#uses=1]
%95 = inttoptr i32 %94 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
- %96 = getelementptr %struct.edge_rec* %93, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %96 = getelementptr %struct.edge_rec, %struct.edge_rec* %93, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %95, %struct.edge_rec** %96, align 4
%97 = add i32 %91, 32 ; <i32> [#uses=1]
%98 = inttoptr i32 %97 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3]
- %99 = getelementptr %struct.edge_rec* %98, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %99 = getelementptr %struct.edge_rec, %struct.edge_rec* %98, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %98, %struct.edge_rec** %99, align 4
- %100 = getelementptr %struct.edge_rec* %98, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %100 = getelementptr %struct.edge_rec, %struct.edge_rec* %98, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
store %struct.VERTEX* %87, %struct.VERTEX** %100, align 4
- %101 = getelementptr %struct.edge_rec* %95, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %101 = getelementptr %struct.edge_rec, %struct.edge_rec* %95, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %93, %struct.edge_rec** %101, align 4
- %102 = load %struct.edge_rec** %89, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %102 = load %struct.edge_rec*, %struct.edge_rec** %89, align 4 ; <%struct.edge_rec*> [#uses=1]
%103 = ptrtoint %struct.edge_rec* %102 to i32 ; <i32> [#uses=2]
%104 = add i32 %103, 16 ; <i32> [#uses=1]
%105 = and i32 %104, 63 ; <i32> [#uses=1]
%106 = and i32 %103, -64 ; <i32> [#uses=1]
%107 = or i32 %105, %106 ; <i32> [#uses=1]
%108 = inttoptr i32 %107 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %109 = getelementptr %struct.edge_rec* %85, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %110 = load %struct.edge_rec** %109, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %109 = getelementptr %struct.edge_rec, %struct.edge_rec* %85, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %110 = load %struct.edge_rec*, %struct.edge_rec** %109, align 4 ; <%struct.edge_rec*> [#uses=1]
%111 = ptrtoint %struct.edge_rec* %110 to i32 ; <i32> [#uses=2]
%112 = add i32 %111, 16 ; <i32> [#uses=1]
%113 = and i32 %112, 63 ; <i32> [#uses=1]
%114 = and i32 %111, -64 ; <i32> [#uses=1]
%115 = or i32 %113, %114 ; <i32> [#uses=1]
%116 = inttoptr i32 %115 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %117 = getelementptr %struct.edge_rec* %116, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %118 = load %struct.edge_rec** %117, align 4 ; <%struct.edge_rec*> [#uses=1]
- %119 = getelementptr %struct.edge_rec* %108, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %120 = load %struct.edge_rec** %119, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %117 = getelementptr %struct.edge_rec, %struct.edge_rec* %116, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %118 = load %struct.edge_rec*, %struct.edge_rec** %117, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %119 = getelementptr %struct.edge_rec, %struct.edge_rec* %108, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %120 = load %struct.edge_rec*, %struct.edge_rec** %119, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %118, %struct.edge_rec** %119, align 4
store %struct.edge_rec* %120, %struct.edge_rec** %117, align 4
- %121 = load %struct.edge_rec** %89, align 4 ; <%struct.edge_rec*> [#uses=1]
- %122 = load %struct.edge_rec** %109, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %121 = load %struct.edge_rec*, %struct.edge_rec** %89, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %122 = load %struct.edge_rec*, %struct.edge_rec** %109, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %121, %struct.edge_rec** %109, align 4
store %struct.edge_rec* %122, %struct.edge_rec** %89, align 4
%123 = xor i32 %91, 32 ; <i32> [#uses=1]
%124 = inttoptr i32 %123 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3]
- %125 = getelementptr %struct.edge_rec* %124, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %126 = load %struct.edge_rec** %125, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %125 = getelementptr %struct.edge_rec, %struct.edge_rec* %124, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %126 = load %struct.edge_rec*, %struct.edge_rec** %125, align 4 ; <%struct.edge_rec*> [#uses=1]
%127 = ptrtoint %struct.edge_rec* %126 to i32 ; <i32> [#uses=2]
%128 = add i32 %127, 16 ; <i32> [#uses=1]
%129 = and i32 %128, 63 ; <i32> [#uses=1]
%130 = and i32 %127, -64 ; <i32> [#uses=1]
%131 = or i32 %129, %130 ; <i32> [#uses=1]
%132 = inttoptr i32 %131 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %133 = getelementptr %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %134 = load %struct.edge_rec** %133, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %133 = getelementptr %struct.edge_rec, %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %134 = load %struct.edge_rec*, %struct.edge_rec** %133, align 4 ; <%struct.edge_rec*> [#uses=1]
%135 = ptrtoint %struct.edge_rec* %134 to i32 ; <i32> [#uses=2]
%136 = add i32 %135, 16 ; <i32> [#uses=1]
%137 = and i32 %136, 63 ; <i32> [#uses=1]
%138 = and i32 %135, -64 ; <i32> [#uses=1]
%139 = or i32 %137, %138 ; <i32> [#uses=1]
%140 = inttoptr i32 %139 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %141 = getelementptr %struct.edge_rec* %140, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %142 = load %struct.edge_rec** %141, align 4 ; <%struct.edge_rec*> [#uses=1]
- %143 = getelementptr %struct.edge_rec* %132, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %144 = load %struct.edge_rec** %143, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %141 = getelementptr %struct.edge_rec, %struct.edge_rec* %140, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %142 = load %struct.edge_rec*, %struct.edge_rec** %141, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %143 = getelementptr %struct.edge_rec, %struct.edge_rec* %132, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %144 = load %struct.edge_rec*, %struct.edge_rec** %143, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %142, %struct.edge_rec** %143, align 4
store %struct.edge_rec* %144, %struct.edge_rec** %141, align 4
- %145 = load %struct.edge_rec** %125, align 4 ; <%struct.edge_rec*> [#uses=1]
- %146 = load %struct.edge_rec** %133, align 4 ; <%struct.edge_rec*> [#uses=2]
+ %145 = load %struct.edge_rec*, %struct.edge_rec** %125, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %146 = load %struct.edge_rec*, %struct.edge_rec** %133, align 4 ; <%struct.edge_rec*> [#uses=2]
store %struct.edge_rec* %145, %struct.edge_rec** %133, align 4
store %struct.edge_rec* %146, %struct.edge_rec** %125, align 4
%147 = and i32 %92, 63 ; <i32> [#uses=1]
%148 = and i32 %91, -64 ; <i32> [#uses=1]
%149 = or i32 %147, %148 ; <i32> [#uses=1]
%150 = inttoptr i32 %149 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %151 = getelementptr %struct.edge_rec* %150, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %152 = load %struct.edge_rec** %151, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %151 = getelementptr %struct.edge_rec, %struct.edge_rec* %150, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %152 = load %struct.edge_rec*, %struct.edge_rec** %151, align 4 ; <%struct.edge_rec*> [#uses=1]
%153 = ptrtoint %struct.edge_rec* %152 to i32 ; <i32> [#uses=2]
%154 = add i32 %153, 16 ; <i32> [#uses=1]
%155 = and i32 %154, 63 ; <i32> [#uses=1]
%156 = and i32 %153, -64 ; <i32> [#uses=1]
%157 = or i32 %155, %156 ; <i32> [#uses=1]
%158 = inttoptr i32 %157 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %159 = load %struct.VERTEX** %90, align 4 ; <%struct.VERTEX*> [#uses=1]
- %160 = getelementptr %struct.edge_rec* %124, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %161 = load %struct.VERTEX** %160, align 4 ; <%struct.VERTEX*> [#uses=1]
- %162 = getelementptr %struct.edge_rec* %16, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %163 = load %struct.VERTEX** %162, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %159 = load %struct.VERTEX*, %struct.VERTEX** %90, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %160 = getelementptr %struct.edge_rec, %struct.edge_rec* %124, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %161 = load %struct.VERTEX*, %struct.VERTEX** %160, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %162 = getelementptr %struct.edge_rec, %struct.edge_rec* %16, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %163 = load %struct.VERTEX*, %struct.VERTEX** %162, align 4 ; <%struct.VERTEX*> [#uses=1]
%164 = icmp eq %struct.VERTEX* %163, %159 ; <i1> [#uses=1]
%rdo_addr.0.i = select i1 %164, %struct.edge_rec* %88, %struct.edge_rec* %16 ; <%struct.edge_rec*> [#uses=3]
- %165 = getelementptr %struct.edge_rec* %10, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %166 = load %struct.VERTEX** %165, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %165 = getelementptr %struct.edge_rec, %struct.edge_rec* %10, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %166 = load %struct.VERTEX*, %struct.VERTEX** %165, align 4 ; <%struct.VERTEX*> [#uses=1]
%167 = icmp eq %struct.VERTEX* %166, %161 ; <i1> [#uses=1]
%ldo_addr.0.ph.i = select i1 %167, %struct.edge_rec* %124, %struct.edge_rec* %10 ; <%struct.edge_rec*> [#uses=3]
br label %bb9.i
@@ -243,32 +243,32 @@ bb9.i: ; preds = %bb25.i, %bb24.i, %bb5.i
%lcand.2.i = phi %struct.edge_rec* [ %146, %bb5.i ], [ %lcand.1.i, %bb24.i ], [ %739, %bb25.i ] ; <%struct.edge_rec*> [#uses=5]
%rcand.2.i = phi %struct.edge_rec* [ %158, %bb5.i ], [ %666, %bb24.i ], [ %rcand.1.i, %bb25.i ] ; <%struct.edge_rec*> [#uses=5]
%basel.0.i = phi %struct.edge_rec* [ %88, %bb5.i ], [ %595, %bb24.i ], [ %716, %bb25.i ] ; <%struct.edge_rec*> [#uses=2]
- %168 = getelementptr %struct.edge_rec* %lcand.2.i, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %169 = load %struct.edge_rec** %168, align 4 ; <%struct.edge_rec*> [#uses=3]
- %170 = getelementptr %struct.edge_rec* %basel.0.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=3]
- %171 = load %struct.VERTEX** %170, align 4 ; <%struct.VERTEX*> [#uses=4]
+ %168 = getelementptr %struct.edge_rec, %struct.edge_rec* %lcand.2.i, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %169 = load %struct.edge_rec*, %struct.edge_rec** %168, align 4 ; <%struct.edge_rec*> [#uses=3]
+ %170 = getelementptr %struct.edge_rec, %struct.edge_rec* %basel.0.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=3]
+ %171 = load %struct.VERTEX*, %struct.VERTEX** %170, align 4 ; <%struct.VERTEX*> [#uses=4]
%172 = ptrtoint %struct.edge_rec* %basel.0.i to i32 ; <i32> [#uses=3]
%173 = xor i32 %172, 32 ; <i32> [#uses=1]
%174 = inttoptr i32 %173 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
- %175 = getelementptr %struct.edge_rec* %174, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=3]
- %176 = load %struct.VERTEX** %175, align 4 ; <%struct.VERTEX*> [#uses=3]
+ %175 = getelementptr %struct.edge_rec, %struct.edge_rec* %174, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=3]
+ %176 = load %struct.VERTEX*, %struct.VERTEX** %175, align 4 ; <%struct.VERTEX*> [#uses=3]
%177 = ptrtoint %struct.edge_rec* %169 to i32 ; <i32> [#uses=1]
%178 = xor i32 %177, 32 ; <i32> [#uses=1]
%179 = inttoptr i32 %178 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %180 = getelementptr %struct.edge_rec* %179, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %181 = load %struct.VERTEX** %180, align 4 ; <%struct.VERTEX*> [#uses=2]
- %182 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 0 ; <double*> [#uses=2]
- %183 = load double* %182, align 4 ; <double> [#uses=2]
- %184 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 1 ; <double*> [#uses=2]
- %185 = load double* %184, align 4 ; <double> [#uses=2]
- %186 = getelementptr %struct.VERTEX* %181, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %187 = load double* %186, align 4 ; <double> [#uses=1]
- %188 = getelementptr %struct.VERTEX* %181, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %189 = load double* %188, align 4 ; <double> [#uses=1]
- %190 = getelementptr %struct.VERTEX* %176, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %191 = load double* %190, align 4 ; <double> [#uses=2]
- %192 = getelementptr %struct.VERTEX* %176, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %193 = load double* %192, align 4 ; <double> [#uses=2]
+ %180 = getelementptr %struct.edge_rec, %struct.edge_rec* %179, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %181 = load %struct.VERTEX*, %struct.VERTEX** %180, align 4 ; <%struct.VERTEX*> [#uses=2]
+ %182 = getelementptr %struct.VERTEX, %struct.VERTEX* %171, i32 0, i32 0, i32 0 ; <double*> [#uses=2]
+ %183 = load double, double* %182, align 4 ; <double> [#uses=2]
+ %184 = getelementptr %struct.VERTEX, %struct.VERTEX* %171, i32 0, i32 0, i32 1 ; <double*> [#uses=2]
+ %185 = load double, double* %184, align 4 ; <double> [#uses=2]
+ %186 = getelementptr %struct.VERTEX, %struct.VERTEX* %181, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %187 = load double, double* %186, align 4 ; <double> [#uses=1]
+ %188 = getelementptr %struct.VERTEX, %struct.VERTEX* %181, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %189 = load double, double* %188, align 4 ; <double> [#uses=1]
+ %190 = getelementptr %struct.VERTEX, %struct.VERTEX* %176, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %191 = load double, double* %190, align 4 ; <double> [#uses=2]
+ %192 = getelementptr %struct.VERTEX, %struct.VERTEX* %176, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %193 = load double, double* %192, align 4 ; <double> [#uses=2]
%194 = fsub double %183, %191 ; <double> [#uses=1]
%195 = fsub double %189, %193 ; <double> [#uses=1]
%196 = fmul double %194, %195 ; <double> [#uses=1]
@@ -280,8 +280,8 @@ bb9.i: ; preds = %bb25.i, %bb24.i, %bb5.i
br i1 %201, label %bb10.i, label %bb13.i
bb10.i: ; preds = %bb9.i
- %202 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
- %avail_edge.promoted25 = load %struct.edge_rec** @avail_edge ; <%struct.edge_rec*> [#uses=1]
+ %202 = getelementptr %struct.VERTEX, %struct.VERTEX* %171, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %avail_edge.promoted25 = load %struct.edge_rec*, %struct.edge_rec** @avail_edge ; <%struct.edge_rec*> [#uses=1]
br label %bb12.i
bb11.i: ; preds = %bb12.i
@@ -291,38 +291,38 @@ bb11.i: ; preds = %bb12.i
%206 = and i32 %203, -64 ; <i32> [#uses=3]
%207 = or i32 %205, %206 ; <i32> [#uses=1]
%208 = inttoptr i32 %207 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %209 = getelementptr %struct.edge_rec* %208, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %210 = load %struct.edge_rec** %209, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %209 = getelementptr %struct.edge_rec, %struct.edge_rec* %208, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %210 = load %struct.edge_rec*, %struct.edge_rec** %209, align 4 ; <%struct.edge_rec*> [#uses=1]
%211 = ptrtoint %struct.edge_rec* %210 to i32 ; <i32> [#uses=2]
%212 = add i32 %211, 16 ; <i32> [#uses=1]
%213 = and i32 %212, 63 ; <i32> [#uses=1]
%214 = and i32 %211, -64 ; <i32> [#uses=1]
%215 = or i32 %213, %214 ; <i32> [#uses=1]
%216 = inttoptr i32 %215 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %217 = getelementptr %struct.edge_rec* %lcand.0.i, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %218 = load %struct.edge_rec** %217, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %217 = getelementptr %struct.edge_rec, %struct.edge_rec* %lcand.0.i, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %218 = load %struct.edge_rec*, %struct.edge_rec** %217, align 4 ; <%struct.edge_rec*> [#uses=1]
%219 = ptrtoint %struct.edge_rec* %218 to i32 ; <i32> [#uses=2]
%220 = add i32 %219, 16 ; <i32> [#uses=1]
%221 = and i32 %220, 63 ; <i32> [#uses=1]
%222 = and i32 %219, -64 ; <i32> [#uses=1]
%223 = or i32 %221, %222 ; <i32> [#uses=1]
%224 = inttoptr i32 %223 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %225 = getelementptr %struct.edge_rec* %216, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %226 = load %struct.edge_rec** %225, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %225 = getelementptr %struct.edge_rec, %struct.edge_rec* %216, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %226 = load %struct.edge_rec*, %struct.edge_rec** %225, align 4 ; <%struct.edge_rec*> [#uses=1]
%227 = ptrtoint %struct.edge_rec* %226 to i32 ; <i32> [#uses=2]
%228 = add i32 %227, 16 ; <i32> [#uses=1]
%229 = and i32 %228, 63 ; <i32> [#uses=1]
%230 = and i32 %227, -64 ; <i32> [#uses=1]
%231 = or i32 %229, %230 ; <i32> [#uses=1]
%232 = inttoptr i32 %231 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %233 = getelementptr %struct.edge_rec* %232, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %234 = load %struct.edge_rec** %233, align 4 ; <%struct.edge_rec*> [#uses=1]
- %235 = getelementptr %struct.edge_rec* %224, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %236 = load %struct.edge_rec** %235, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %233 = getelementptr %struct.edge_rec, %struct.edge_rec* %232, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %234 = load %struct.edge_rec*, %struct.edge_rec** %233, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %235 = getelementptr %struct.edge_rec, %struct.edge_rec* %224, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %236 = load %struct.edge_rec*, %struct.edge_rec** %235, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %234, %struct.edge_rec** %235, align 4
store %struct.edge_rec* %236, %struct.edge_rec** %233, align 4
- %237 = load %struct.edge_rec** %217, align 4 ; <%struct.edge_rec*> [#uses=1]
- %238 = load %struct.edge_rec** %225, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %237 = load %struct.edge_rec*, %struct.edge_rec** %217, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %238 = load %struct.edge_rec*, %struct.edge_rec** %225, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %237, %struct.edge_rec** %225, align 4
store %struct.edge_rec* %238, %struct.edge_rec** %217, align 4
%239 = xor i32 %203, 32 ; <i32> [#uses=2]
@@ -330,8 +330,8 @@ bb11.i: ; preds = %bb12.i
%241 = and i32 %240, 63 ; <i32> [#uses=1]
%242 = or i32 %241, %206 ; <i32> [#uses=1]
%243 = inttoptr i32 %242 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %244 = getelementptr %struct.edge_rec* %243, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %245 = load %struct.edge_rec** %244, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %244 = getelementptr %struct.edge_rec, %struct.edge_rec* %243, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %245 = load %struct.edge_rec*, %struct.edge_rec** %244, align 4 ; <%struct.edge_rec*> [#uses=1]
%246 = ptrtoint %struct.edge_rec* %245 to i32 ; <i32> [#uses=2]
%247 = add i32 %246, 16 ; <i32> [#uses=1]
%248 = and i32 %247, 63 ; <i32> [#uses=1]
@@ -339,39 +339,39 @@ bb11.i: ; preds = %bb12.i
%250 = or i32 %248, %249 ; <i32> [#uses=1]
%251 = inttoptr i32 %250 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
%252 = inttoptr i32 %239 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %253 = getelementptr %struct.edge_rec* %252, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %254 = load %struct.edge_rec** %253, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %253 = getelementptr %struct.edge_rec, %struct.edge_rec* %252, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %254 = load %struct.edge_rec*, %struct.edge_rec** %253, align 4 ; <%struct.edge_rec*> [#uses=1]
%255 = ptrtoint %struct.edge_rec* %254 to i32 ; <i32> [#uses=2]
%256 = add i32 %255, 16 ; <i32> [#uses=1]
%257 = and i32 %256, 63 ; <i32> [#uses=1]
%258 = and i32 %255, -64 ; <i32> [#uses=1]
%259 = or i32 %257, %258 ; <i32> [#uses=1]
%260 = inttoptr i32 %259 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %261 = getelementptr %struct.edge_rec* %251, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %262 = load %struct.edge_rec** %261, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %261 = getelementptr %struct.edge_rec, %struct.edge_rec* %251, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %262 = load %struct.edge_rec*, %struct.edge_rec** %261, align 4 ; <%struct.edge_rec*> [#uses=1]
%263 = ptrtoint %struct.edge_rec* %262 to i32 ; <i32> [#uses=2]
%264 = add i32 %263, 16 ; <i32> [#uses=1]
%265 = and i32 %264, 63 ; <i32> [#uses=1]
%266 = and i32 %263, -64 ; <i32> [#uses=1]
%267 = or i32 %265, %266 ; <i32> [#uses=1]
%268 = inttoptr i32 %267 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %269 = getelementptr %struct.edge_rec* %268, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %270 = load %struct.edge_rec** %269, align 4 ; <%struct.edge_rec*> [#uses=1]
- %271 = getelementptr %struct.edge_rec* %260, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %272 = load %struct.edge_rec** %271, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %269 = getelementptr %struct.edge_rec, %struct.edge_rec* %268, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %270 = load %struct.edge_rec*, %struct.edge_rec** %269, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %271 = getelementptr %struct.edge_rec, %struct.edge_rec* %260, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %272 = load %struct.edge_rec*, %struct.edge_rec** %271, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %270, %struct.edge_rec** %271, align 4
store %struct.edge_rec* %272, %struct.edge_rec** %269, align 4
- %273 = load %struct.edge_rec** %253, align 4 ; <%struct.edge_rec*> [#uses=1]
- %274 = load %struct.edge_rec** %261, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %273 = load %struct.edge_rec*, %struct.edge_rec** %253, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %274 = load %struct.edge_rec*, %struct.edge_rec** %261, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %273, %struct.edge_rec** %261, align 4
store %struct.edge_rec* %274, %struct.edge_rec** %253, align 4
%275 = inttoptr i32 %206 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
- %276 = getelementptr %struct.edge_rec* %275, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %276 = getelementptr %struct.edge_rec, %struct.edge_rec* %275, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** %276, align 4
- %277 = getelementptr %struct.edge_rec* %t.0.i, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %278 = load %struct.edge_rec** %277, align 4 ; <%struct.edge_rec*> [#uses=2]
- %.pre.i = load double* %182, align 4 ; <double> [#uses=1]
- %.pre22.i = load double* %184, align 4 ; <double> [#uses=1]
+ %277 = getelementptr %struct.edge_rec, %struct.edge_rec* %t.0.i, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %278 = load %struct.edge_rec*, %struct.edge_rec** %277, align 4 ; <%struct.edge_rec*> [#uses=2]
+ %.pre.i = load double, double* %182, align 4 ; <double> [#uses=1]
+ %.pre22.i = load double, double* %184, align 4 ; <double> [#uses=1]
br label %bb12.i
bb12.i: ; preds = %bb11.i, %bb10.i
@@ -389,37 +389,37 @@ bb12.i: ; preds = %bb11.i, %bb10.i
%.pn4.in.i = xor i32 %.pn4.in.in.i, 32 ; <i32> [#uses=1]
%.pn5.i = inttoptr i32 %.pn5.in.i to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
%.pn4.i = inttoptr i32 %.pn4.in.i to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %v1.0.in.i = getelementptr %struct.edge_rec* %.pn5.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %v2.0.in.i = getelementptr %struct.edge_rec* %.pn4.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %v3.0.in.i = getelementptr %struct.edge_rec* %lcand.2.pn.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %v1.0.i = load %struct.VERTEX** %v1.0.in.i ; <%struct.VERTEX*> [#uses=3]
- %v2.0.i = load %struct.VERTEX** %v2.0.in.i ; <%struct.VERTEX*> [#uses=3]
- %v3.0.i = load %struct.VERTEX** %v3.0.in.i ; <%struct.VERTEX*> [#uses=3]
- %281 = load double* %202, align 4 ; <double> [#uses=3]
- %282 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %283 = load double* %282, align 4 ; <double> [#uses=1]
+ %v1.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn5.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %v2.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn4.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %v3.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %lcand.2.pn.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %v1.0.i = load %struct.VERTEX*, %struct.VERTEX** %v1.0.in.i ; <%struct.VERTEX*> [#uses=3]
+ %v2.0.i = load %struct.VERTEX*, %struct.VERTEX** %v2.0.in.i ; <%struct.VERTEX*> [#uses=3]
+ %v3.0.i = load %struct.VERTEX*, %struct.VERTEX** %v3.0.in.i ; <%struct.VERTEX*> [#uses=3]
+ %281 = load double, double* %202, align 4 ; <double> [#uses=3]
+ %282 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %283 = load double, double* %282, align 4 ; <double> [#uses=1]
%284 = fsub double %283, %280 ; <double> [#uses=2]
- %285 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %286 = load double* %285, align 4 ; <double> [#uses=1]
+ %285 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %286 = load double, double* %285, align 4 ; <double> [#uses=1]
%287 = fsub double %286, %279 ; <double> [#uses=2]
- %288 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
- %289 = load double* %288, align 4 ; <double> [#uses=1]
- %290 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %291 = load double* %290, align 4 ; <double> [#uses=1]
+ %288 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %289 = load double, double* %288, align 4 ; <double> [#uses=1]
+ %290 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %291 = load double, double* %290, align 4 ; <double> [#uses=1]
%292 = fsub double %291, %280 ; <double> [#uses=2]
- %293 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %294 = load double* %293, align 4 ; <double> [#uses=1]
+ %293 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %294 = load double, double* %293, align 4 ; <double> [#uses=1]
%295 = fsub double %294, %279 ; <double> [#uses=2]
- %296 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
- %297 = load double* %296, align 4 ; <double> [#uses=1]
- %298 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %299 = load double* %298, align 4 ; <double> [#uses=1]
+ %296 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %297 = load double, double* %296, align 4 ; <double> [#uses=1]
+ %298 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %299 = load double, double* %298, align 4 ; <double> [#uses=1]
%300 = fsub double %299, %280 ; <double> [#uses=2]
- %301 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %302 = load double* %301, align 4 ; <double> [#uses=1]
+ %301 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %302 = load double, double* %301, align 4 ; <double> [#uses=1]
%303 = fsub double %302, %279 ; <double> [#uses=2]
- %304 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
- %305 = load double* %304, align 4 ; <double> [#uses=1]
+ %304 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %305 = load double, double* %304, align 4 ; <double> [#uses=1]
%306 = fsub double %289, %281 ; <double> [#uses=1]
%307 = fmul double %292, %303 ; <double> [#uses=1]
%308 = fmul double %295, %300 ; <double> [#uses=1]
@@ -442,8 +442,8 @@ bb12.i: ; preds = %bb11.i, %bb10.i
bb13.loopexit.i: ; preds = %bb12.i
store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** @avail_edge
- %.pre23.i = load %struct.VERTEX** %170, align 4 ; <%struct.VERTEX*> [#uses=1]
- %.pre24.i = load %struct.VERTEX** %175, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %.pre23.i = load %struct.VERTEX*, %struct.VERTEX** %170, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %.pre24.i = load %struct.VERTEX*, %struct.VERTEX** %175, align 4 ; <%struct.VERTEX*> [#uses=1]
br label %bb13.i
bb13.i: ; preds = %bb13.loopexit.i, %bb9.i
@@ -456,8 +456,8 @@ bb13.i: ; preds = %bb13.loopexit.i, %bb9.i
%329 = and i32 %326, -64 ; <i32> [#uses=1]
%330 = or i32 %328, %329 ; <i32> [#uses=1]
%331 = inttoptr i32 %330 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %332 = getelementptr %struct.edge_rec* %331, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %333 = load %struct.edge_rec** %332, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %332 = getelementptr %struct.edge_rec, %struct.edge_rec* %331, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %333 = load %struct.edge_rec*, %struct.edge_rec** %332, align 4 ; <%struct.edge_rec*> [#uses=1]
%334 = ptrtoint %struct.edge_rec* %333 to i32 ; <i32> [#uses=2]
%335 = add i32 %334, 16 ; <i32> [#uses=1]
%336 = and i32 %335, 63 ; <i32> [#uses=1]
@@ -465,20 +465,20 @@ bb13.i: ; preds = %bb13.loopexit.i, %bb9.i
%338 = or i32 %336, %337 ; <i32> [#uses=3]
%339 = xor i32 %338, 32 ; <i32> [#uses=1]
%340 = inttoptr i32 %339 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %341 = getelementptr %struct.edge_rec* %340, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %342 = load %struct.VERTEX** %341, align 4 ; <%struct.VERTEX*> [#uses=2]
- %343 = getelementptr %struct.VERTEX* %325, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %344 = load double* %343, align 4 ; <double> [#uses=1]
- %345 = getelementptr %struct.VERTEX* %325, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %346 = load double* %345, align 4 ; <double> [#uses=1]
- %347 = getelementptr %struct.VERTEX* %342, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %348 = load double* %347, align 4 ; <double> [#uses=1]
- %349 = getelementptr %struct.VERTEX* %342, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %350 = load double* %349, align 4 ; <double> [#uses=1]
- %351 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 0 ; <double*> [#uses=2]
- %352 = load double* %351, align 4 ; <double> [#uses=3]
- %353 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 1 ; <double*> [#uses=2]
- %354 = load double* %353, align 4 ; <double> [#uses=3]
+ %341 = getelementptr %struct.edge_rec, %struct.edge_rec* %340, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %342 = load %struct.VERTEX*, %struct.VERTEX** %341, align 4 ; <%struct.VERTEX*> [#uses=2]
+ %343 = getelementptr %struct.VERTEX, %struct.VERTEX* %325, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %344 = load double, double* %343, align 4 ; <double> [#uses=1]
+ %345 = getelementptr %struct.VERTEX, %struct.VERTEX* %325, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %346 = load double, double* %345, align 4 ; <double> [#uses=1]
+ %347 = getelementptr %struct.VERTEX, %struct.VERTEX* %342, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %348 = load double, double* %347, align 4 ; <double> [#uses=1]
+ %349 = getelementptr %struct.VERTEX, %struct.VERTEX* %342, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %350 = load double, double* %349, align 4 ; <double> [#uses=1]
+ %351 = getelementptr %struct.VERTEX, %struct.VERTEX* %324, i32 0, i32 0, i32 0 ; <double*> [#uses=2]
+ %352 = load double, double* %351, align 4 ; <double> [#uses=3]
+ %353 = getelementptr %struct.VERTEX, %struct.VERTEX* %324, i32 0, i32 0, i32 1 ; <double*> [#uses=2]
+ %354 = load double, double* %353, align 4 ; <double> [#uses=3]
%355 = fsub double %344, %352 ; <double> [#uses=1]
%356 = fsub double %350, %354 ; <double> [#uses=1]
%357 = fmul double %355, %356 ; <double> [#uses=1]
@@ -490,8 +490,8 @@ bb13.i: ; preds = %bb13.loopexit.i, %bb9.i
br i1 %362, label %bb14.i, label %bb17.i
bb14.i: ; preds = %bb13.i
- %363 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
- %avail_edge.promoted = load %struct.edge_rec** @avail_edge ; <%struct.edge_rec*> [#uses=1]
+ %363 = getelementptr %struct.VERTEX, %struct.VERTEX* %324, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %avail_edge.promoted = load %struct.edge_rec*, %struct.edge_rec** @avail_edge ; <%struct.edge_rec*> [#uses=1]
br label %bb16.i
bb15.i: ; preds = %bb16.i
@@ -501,38 +501,38 @@ bb15.i: ; preds = %bb16.i
%367 = and i32 %364, -64 ; <i32> [#uses=3]
%368 = or i32 %366, %367 ; <i32> [#uses=1]
%369 = inttoptr i32 %368 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %370 = getelementptr %struct.edge_rec* %369, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %371 = load %struct.edge_rec** %370, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %370 = getelementptr %struct.edge_rec, %struct.edge_rec* %369, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %371 = load %struct.edge_rec*, %struct.edge_rec** %370, align 4 ; <%struct.edge_rec*> [#uses=1]
%372 = ptrtoint %struct.edge_rec* %371 to i32 ; <i32> [#uses=2]
%373 = add i32 %372, 16 ; <i32> [#uses=1]
%374 = and i32 %373, 63 ; <i32> [#uses=1]
%375 = and i32 %372, -64 ; <i32> [#uses=1]
%376 = or i32 %374, %375 ; <i32> [#uses=1]
%377 = inttoptr i32 %376 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %378 = getelementptr %struct.edge_rec* %rcand.0.i, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %379 = load %struct.edge_rec** %378, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %378 = getelementptr %struct.edge_rec, %struct.edge_rec* %rcand.0.i, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %379 = load %struct.edge_rec*, %struct.edge_rec** %378, align 4 ; <%struct.edge_rec*> [#uses=1]
%380 = ptrtoint %struct.edge_rec* %379 to i32 ; <i32> [#uses=2]
%381 = add i32 %380, 16 ; <i32> [#uses=1]
%382 = and i32 %381, 63 ; <i32> [#uses=1]
%383 = and i32 %380, -64 ; <i32> [#uses=1]
%384 = or i32 %382, %383 ; <i32> [#uses=1]
%385 = inttoptr i32 %384 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %386 = getelementptr %struct.edge_rec* %377, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %387 = load %struct.edge_rec** %386, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %386 = getelementptr %struct.edge_rec, %struct.edge_rec* %377, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %387 = load %struct.edge_rec*, %struct.edge_rec** %386, align 4 ; <%struct.edge_rec*> [#uses=1]
%388 = ptrtoint %struct.edge_rec* %387 to i32 ; <i32> [#uses=2]
%389 = add i32 %388, 16 ; <i32> [#uses=1]
%390 = and i32 %389, 63 ; <i32> [#uses=1]
%391 = and i32 %388, -64 ; <i32> [#uses=1]
%392 = or i32 %390, %391 ; <i32> [#uses=1]
%393 = inttoptr i32 %392 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %394 = getelementptr %struct.edge_rec* %393, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %395 = load %struct.edge_rec** %394, align 4 ; <%struct.edge_rec*> [#uses=1]
- %396 = getelementptr %struct.edge_rec* %385, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %397 = load %struct.edge_rec** %396, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %394 = getelementptr %struct.edge_rec, %struct.edge_rec* %393, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %395 = load %struct.edge_rec*, %struct.edge_rec** %394, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %396 = getelementptr %struct.edge_rec, %struct.edge_rec* %385, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %397 = load %struct.edge_rec*, %struct.edge_rec** %396, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %395, %struct.edge_rec** %396, align 4
store %struct.edge_rec* %397, %struct.edge_rec** %394, align 4
- %398 = load %struct.edge_rec** %378, align 4 ; <%struct.edge_rec*> [#uses=1]
- %399 = load %struct.edge_rec** %386, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %398 = load %struct.edge_rec*, %struct.edge_rec** %378, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %399 = load %struct.edge_rec*, %struct.edge_rec** %386, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %398, %struct.edge_rec** %386, align 4
store %struct.edge_rec* %399, %struct.edge_rec** %378, align 4
%400 = xor i32 %364, 32 ; <i32> [#uses=2]
@@ -540,8 +540,8 @@ bb15.i: ; preds = %bb16.i
%402 = and i32 %401, 63 ; <i32> [#uses=1]
%403 = or i32 %402, %367 ; <i32> [#uses=1]
%404 = inttoptr i32 %403 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %405 = getelementptr %struct.edge_rec* %404, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %406 = load %struct.edge_rec** %405, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %405 = getelementptr %struct.edge_rec, %struct.edge_rec* %404, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %406 = load %struct.edge_rec*, %struct.edge_rec** %405, align 4 ; <%struct.edge_rec*> [#uses=1]
%407 = ptrtoint %struct.edge_rec* %406 to i32 ; <i32> [#uses=2]
%408 = add i32 %407, 16 ; <i32> [#uses=1]
%409 = and i32 %408, 63 ; <i32> [#uses=1]
@@ -549,49 +549,49 @@ bb15.i: ; preds = %bb16.i
%411 = or i32 %409, %410 ; <i32> [#uses=1]
%412 = inttoptr i32 %411 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
%413 = inttoptr i32 %400 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %414 = getelementptr %struct.edge_rec* %413, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %415 = load %struct.edge_rec** %414, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %414 = getelementptr %struct.edge_rec, %struct.edge_rec* %413, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %415 = load %struct.edge_rec*, %struct.edge_rec** %414, align 4 ; <%struct.edge_rec*> [#uses=1]
%416 = ptrtoint %struct.edge_rec* %415 to i32 ; <i32> [#uses=2]
%417 = add i32 %416, 16 ; <i32> [#uses=1]
%418 = and i32 %417, 63 ; <i32> [#uses=1]
%419 = and i32 %416, -64 ; <i32> [#uses=1]
%420 = or i32 %418, %419 ; <i32> [#uses=1]
%421 = inttoptr i32 %420 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %422 = getelementptr %struct.edge_rec* %412, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %423 = load %struct.edge_rec** %422, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %422 = getelementptr %struct.edge_rec, %struct.edge_rec* %412, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %423 = load %struct.edge_rec*, %struct.edge_rec** %422, align 4 ; <%struct.edge_rec*> [#uses=1]
%424 = ptrtoint %struct.edge_rec* %423 to i32 ; <i32> [#uses=2]
%425 = add i32 %424, 16 ; <i32> [#uses=1]
%426 = and i32 %425, 63 ; <i32> [#uses=1]
%427 = and i32 %424, -64 ; <i32> [#uses=1]
%428 = or i32 %426, %427 ; <i32> [#uses=1]
%429 = inttoptr i32 %428 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %430 = getelementptr %struct.edge_rec* %429, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %431 = load %struct.edge_rec** %430, align 4 ; <%struct.edge_rec*> [#uses=1]
- %432 = getelementptr %struct.edge_rec* %421, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %433 = load %struct.edge_rec** %432, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %430 = getelementptr %struct.edge_rec, %struct.edge_rec* %429, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %431 = load %struct.edge_rec*, %struct.edge_rec** %430, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %432 = getelementptr %struct.edge_rec, %struct.edge_rec* %421, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %433 = load %struct.edge_rec*, %struct.edge_rec** %432, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %431, %struct.edge_rec** %432, align 4
store %struct.edge_rec* %433, %struct.edge_rec** %430, align 4
- %434 = load %struct.edge_rec** %414, align 4 ; <%struct.edge_rec*> [#uses=1]
- %435 = load %struct.edge_rec** %422, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %434 = load %struct.edge_rec*, %struct.edge_rec** %414, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %435 = load %struct.edge_rec*, %struct.edge_rec** %422, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %434, %struct.edge_rec** %422, align 4
store %struct.edge_rec* %435, %struct.edge_rec** %414, align 4
%436 = inttoptr i32 %367 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
- %437 = getelementptr %struct.edge_rec* %436, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %437 = getelementptr %struct.edge_rec, %struct.edge_rec* %436, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** %437, align 4
%438 = add i32 %t.1.in.i, 16 ; <i32> [#uses=1]
%439 = and i32 %438, 63 ; <i32> [#uses=1]
%440 = and i32 %t.1.in.i, -64 ; <i32> [#uses=1]
%441 = or i32 %439, %440 ; <i32> [#uses=1]
%442 = inttoptr i32 %441 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %443 = getelementptr %struct.edge_rec* %442, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %444 = load %struct.edge_rec** %443, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %443 = getelementptr %struct.edge_rec, %struct.edge_rec* %442, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %444 = load %struct.edge_rec*, %struct.edge_rec** %443, align 4 ; <%struct.edge_rec*> [#uses=1]
%445 = ptrtoint %struct.edge_rec* %444 to i32 ; <i32> [#uses=2]
%446 = add i32 %445, 16 ; <i32> [#uses=1]
%447 = and i32 %446, 63 ; <i32> [#uses=1]
%448 = and i32 %445, -64 ; <i32> [#uses=1]
%449 = or i32 %447, %448 ; <i32> [#uses=2]
- %.pre25.i = load double* %351, align 4 ; <double> [#uses=1]
- %.pre26.i = load double* %353, align 4 ; <double> [#uses=1]
+ %.pre25.i = load double, double* %351, align 4 ; <double> [#uses=1]
+ %.pre26.i = load double, double* %353, align 4 ; <double> [#uses=1]
br label %bb16.i
bb16.i: ; preds = %bb15.i, %bb14.i
@@ -609,37 +609,37 @@ bb16.i: ; preds = %bb15.i, %bb14.i
%.pn.in.i = xor i32 %.pn.in.in.i, 32 ; <i32> [#uses=1]
%.pn3.i = inttoptr i32 %.pn3.in.i to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
%.pn.i = inttoptr i32 %.pn.in.i to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %v1.1.in.i = getelementptr %struct.edge_rec* %.pn3.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %v2.1.in.i = getelementptr %struct.edge_rec* %.pn.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %v3.1.in.i = getelementptr %struct.edge_rec* %rcand.2.pn.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %v1.1.i = load %struct.VERTEX** %v1.1.in.i ; <%struct.VERTEX*> [#uses=3]
- %v2.1.i = load %struct.VERTEX** %v2.1.in.i ; <%struct.VERTEX*> [#uses=3]
- %v3.1.i = load %struct.VERTEX** %v3.1.in.i ; <%struct.VERTEX*> [#uses=3]
- %452 = load double* %363, align 4 ; <double> [#uses=3]
- %453 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %454 = load double* %453, align 4 ; <double> [#uses=1]
+ %v1.1.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn3.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %v2.1.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %v3.1.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %rcand.2.pn.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %v1.1.i = load %struct.VERTEX*, %struct.VERTEX** %v1.1.in.i ; <%struct.VERTEX*> [#uses=3]
+ %v2.1.i = load %struct.VERTEX*, %struct.VERTEX** %v2.1.in.i ; <%struct.VERTEX*> [#uses=3]
+ %v3.1.i = load %struct.VERTEX*, %struct.VERTEX** %v3.1.in.i ; <%struct.VERTEX*> [#uses=3]
+ %452 = load double, double* %363, align 4 ; <double> [#uses=3]
+ %453 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %454 = load double, double* %453, align 4 ; <double> [#uses=1]
%455 = fsub double %454, %451 ; <double> [#uses=2]
- %456 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %457 = load double* %456, align 4 ; <double> [#uses=1]
+ %456 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %457 = load double, double* %456, align 4 ; <double> [#uses=1]
%458 = fsub double %457, %450 ; <double> [#uses=2]
- %459 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
- %460 = load double* %459, align 4 ; <double> [#uses=1]
- %461 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %462 = load double* %461, align 4 ; <double> [#uses=1]
+ %459 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %460 = load double, double* %459, align 4 ; <double> [#uses=1]
+ %461 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %462 = load double, double* %461, align 4 ; <double> [#uses=1]
%463 = fsub double %462, %451 ; <double> [#uses=2]
- %464 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %465 = load double* %464, align 4 ; <double> [#uses=1]
+ %464 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %465 = load double, double* %464, align 4 ; <double> [#uses=1]
%466 = fsub double %465, %450 ; <double> [#uses=2]
- %467 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
- %468 = load double* %467, align 4 ; <double> [#uses=1]
- %469 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %470 = load double* %469, align 4 ; <double> [#uses=1]
+ %467 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %468 = load double, double* %467, align 4 ; <double> [#uses=1]
+ %469 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %470 = load double, double* %469, align 4 ; <double> [#uses=1]
%471 = fsub double %470, %451 ; <double> [#uses=2]
- %472 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %473 = load double* %472, align 4 ; <double> [#uses=1]
+ %472 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %473 = load double, double* %472, align 4 ; <double> [#uses=1]
%474 = fsub double %473, %450 ; <double> [#uses=2]
- %475 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
- %476 = load double* %475, align 4 ; <double> [#uses=1]
+ %475 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %476 = load double, double* %475, align 4 ; <double> [#uses=1]
%477 = fsub double %460, %452 ; <double> [#uses=1]
%478 = fmul double %463, %474 ; <double> [#uses=1]
%479 = fmul double %466, %471 ; <double> [#uses=1]
@@ -662,8 +662,8 @@ bb16.i: ; preds = %bb15.i, %bb14.i
bb17.loopexit.i: ; preds = %bb16.i
store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** @avail_edge
- %.pre27.i = load %struct.VERTEX** %170, align 4 ; <%struct.VERTEX*> [#uses=1]
- %.pre28.i = load %struct.VERTEX** %175, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %.pre27.i = load %struct.VERTEX*, %struct.VERTEX** %170, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %.pre28.i = load %struct.VERTEX*, %struct.VERTEX** %175, align 4 ; <%struct.VERTEX*> [#uses=1]
br label %bb17.i
bb17.i: ; preds = %bb17.loopexit.i, %bb13.i
@@ -673,20 +673,20 @@ bb17.i: ; preds = %bb17.loopexit.i, %bb13.i
%497 = ptrtoint %struct.edge_rec* %lcand.1.i to i32 ; <i32> [#uses=1]
%498 = xor i32 %497, 32 ; <i32> [#uses=1]
%499 = inttoptr i32 %498 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
- %500 = getelementptr %struct.edge_rec* %499, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %501 = load %struct.VERTEX** %500, align 4 ; <%struct.VERTEX*> [#uses=4]
- %502 = getelementptr %struct.VERTEX* %496, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %503 = load double* %502, align 4 ; <double> [#uses=1]
- %504 = getelementptr %struct.VERTEX* %496, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %505 = load double* %504, align 4 ; <double> [#uses=1]
- %506 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %507 = load double* %506, align 4 ; <double> [#uses=2]
- %508 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %509 = load double* %508, align 4 ; <double> [#uses=2]
- %510 = getelementptr %struct.VERTEX* %495, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %511 = load double* %510, align 4 ; <double> [#uses=3]
- %512 = getelementptr %struct.VERTEX* %495, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %513 = load double* %512, align 4 ; <double> [#uses=3]
+ %500 = getelementptr %struct.edge_rec, %struct.edge_rec* %499, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %501 = load %struct.VERTEX*, %struct.VERTEX** %500, align 4 ; <%struct.VERTEX*> [#uses=4]
+ %502 = getelementptr %struct.VERTEX, %struct.VERTEX* %496, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %503 = load double, double* %502, align 4 ; <double> [#uses=1]
+ %504 = getelementptr %struct.VERTEX, %struct.VERTEX* %496, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %505 = load double, double* %504, align 4 ; <double> [#uses=1]
+ %506 = getelementptr %struct.VERTEX, %struct.VERTEX* %501, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %507 = load double, double* %506, align 4 ; <double> [#uses=2]
+ %508 = getelementptr %struct.VERTEX, %struct.VERTEX* %501, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %509 = load double, double* %508, align 4 ; <double> [#uses=2]
+ %510 = getelementptr %struct.VERTEX, %struct.VERTEX* %495, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %511 = load double, double* %510, align 4 ; <double> [#uses=3]
+ %512 = getelementptr %struct.VERTEX, %struct.VERTEX* %495, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %513 = load double, double* %512, align 4 ; <double> [#uses=3]
%514 = fsub double %503, %511 ; <double> [#uses=2]
%515 = fsub double %509, %513 ; <double> [#uses=1]
%516 = fmul double %514, %515 ; <double> [#uses=1]
@@ -698,12 +698,12 @@ bb17.i: ; preds = %bb17.loopexit.i, %bb13.i
%522 = ptrtoint %struct.edge_rec* %rcand.1.i to i32 ; <i32> [#uses=3]
%523 = xor i32 %522, 32 ; <i32> [#uses=1]
%524 = inttoptr i32 %523 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %525 = getelementptr %struct.edge_rec* %524, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %526 = load %struct.VERTEX** %525, align 4 ; <%struct.VERTEX*> [#uses=4]
- %527 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %528 = load double* %527, align 4 ; <double> [#uses=4]
- %529 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %530 = load double* %529, align 4 ; <double> [#uses=4]
+ %525 = getelementptr %struct.edge_rec, %struct.edge_rec* %524, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %526 = load %struct.VERTEX*, %struct.VERTEX** %525, align 4 ; <%struct.VERTEX*> [#uses=4]
+ %527 = getelementptr %struct.VERTEX, %struct.VERTEX* %526, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %528 = load double, double* %527, align 4 ; <double> [#uses=4]
+ %529 = getelementptr %struct.VERTEX, %struct.VERTEX* %526, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %530 = load double, double* %529, align 4 ; <double> [#uses=4]
%531 = fsub double %530, %513 ; <double> [#uses=1]
%532 = fmul double %514, %531 ; <double> [#uses=1]
%533 = fsub double %528, %511 ; <double> [#uses=1]
@@ -714,38 +714,38 @@ bb17.i: ; preds = %bb17.loopexit.i, %bb13.i
br i1 %537, label %bb21.i, label %do_merge.exit
bb21.i: ; preds = %bb17.i
- %538 = getelementptr %struct.edge_rec* %lcand.1.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %539 = load %struct.VERTEX** %538, align 4 ; <%struct.VERTEX*> [#uses=3]
- %540 = getelementptr %struct.edge_rec* %rcand.1.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %541 = load %struct.VERTEX** %540, align 4 ; <%struct.VERTEX*> [#uses=3]
+ %538 = getelementptr %struct.edge_rec, %struct.edge_rec* %lcand.1.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %539 = load %struct.VERTEX*, %struct.VERTEX** %538, align 4 ; <%struct.VERTEX*> [#uses=3]
+ %540 = getelementptr %struct.edge_rec, %struct.edge_rec* %rcand.1.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %541 = load %struct.VERTEX*, %struct.VERTEX** %540, align 4 ; <%struct.VERTEX*> [#uses=3]
br i1 %521, label %bb22.i, label %bb24.i
bb22.i: ; preds = %bb21.i
br i1 %536, label %bb23.i, label %bb25.i
bb23.i: ; preds = %bb22.i
- %542 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
- %543 = load double* %542, align 4 ; <double> [#uses=3]
+ %542 = getelementptr %struct.VERTEX, %struct.VERTEX* %526, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %543 = load double, double* %542, align 4 ; <double> [#uses=3]
%544 = fsub double %507, %528 ; <double> [#uses=2]
%545 = fsub double %509, %530 ; <double> [#uses=2]
- %546 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
- %547 = load double* %546, align 4 ; <double> [#uses=1]
- %548 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %549 = load double* %548, align 4 ; <double> [#uses=1]
+ %546 = getelementptr %struct.VERTEX, %struct.VERTEX* %501, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %547 = load double, double* %546, align 4 ; <double> [#uses=1]
+ %548 = getelementptr %struct.VERTEX, %struct.VERTEX* %539, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %549 = load double, double* %548, align 4 ; <double> [#uses=1]
%550 = fsub double %549, %528 ; <double> [#uses=2]
- %551 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %552 = load double* %551, align 4 ; <double> [#uses=1]
+ %551 = getelementptr %struct.VERTEX, %struct.VERTEX* %539, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %552 = load double, double* %551, align 4 ; <double> [#uses=1]
%553 = fsub double %552, %530 ; <double> [#uses=2]
- %554 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
- %555 = load double* %554, align 4 ; <double> [#uses=1]
- %556 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %557 = load double* %556, align 4 ; <double> [#uses=1]
+ %554 = getelementptr %struct.VERTEX, %struct.VERTEX* %539, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %555 = load double, double* %554, align 4 ; <double> [#uses=1]
+ %556 = getelementptr %struct.VERTEX, %struct.VERTEX* %541, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %557 = load double, double* %556, align 4 ; <double> [#uses=1]
%558 = fsub double %557, %528 ; <double> [#uses=2]
- %559 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %560 = load double* %559, align 4 ; <double> [#uses=1]
+ %559 = getelementptr %struct.VERTEX, %struct.VERTEX* %541, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %560 = load double, double* %559, align 4 ; <double> [#uses=1]
%561 = fsub double %560, %530 ; <double> [#uses=2]
- %562 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
- %563 = load double* %562, align 4 ; <double> [#uses=1]
+ %562 = getelementptr %struct.VERTEX, %struct.VERTEX* %541, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %563 = load double, double* %562, align 4 ; <double> [#uses=1]
%564 = fsub double %547, %543 ; <double> [#uses=1]
%565 = fmul double %550, %561 ; <double> [#uses=1]
%566 = fmul double %553, %558 ; <double> [#uses=1]
@@ -772,8 +772,8 @@ bb24.i: ; preds = %bb23.i, %bb21.i
%584 = and i32 %522, -64 ; <i32> [#uses=1]
%585 = or i32 %583, %584 ; <i32> [#uses=1]
%586 = inttoptr i32 %585 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %587 = getelementptr %struct.edge_rec* %586, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %588 = load %struct.edge_rec** %587, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %587 = getelementptr %struct.edge_rec, %struct.edge_rec* %586, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %588 = load %struct.edge_rec*, %struct.edge_rec** %587, align 4 ; <%struct.edge_rec*> [#uses=1]
%589 = ptrtoint %struct.edge_rec* %588 to i32 ; <i32> [#uses=2]
%590 = add i32 %589, 16 ; <i32> [#uses=1]
%591 = and i32 %590, 63 ; <i32> [#uses=1]
@@ -781,76 +781,76 @@ bb24.i: ; preds = %bb23.i, %bb21.i
%593 = or i32 %591, %592 ; <i32> [#uses=1]
%594 = inttoptr i32 %593 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
%595 = call %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=5]
- %596 = getelementptr %struct.edge_rec* %595, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=4]
+ %596 = getelementptr %struct.edge_rec, %struct.edge_rec* %595, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=4]
store %struct.edge_rec* %595, %struct.edge_rec** %596, align 4
- %597 = getelementptr %struct.edge_rec* %595, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %597 = getelementptr %struct.edge_rec, %struct.edge_rec* %595, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
store %struct.VERTEX* %526, %struct.VERTEX** %597, align 4
%598 = ptrtoint %struct.edge_rec* %595 to i32 ; <i32> [#uses=5]
%599 = add i32 %598, 16 ; <i32> [#uses=1]
%600 = inttoptr i32 %599 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
%601 = add i32 %598, 48 ; <i32> [#uses=1]
%602 = inttoptr i32 %601 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
- %603 = getelementptr %struct.edge_rec* %600, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %603 = getelementptr %struct.edge_rec, %struct.edge_rec* %600, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %602, %struct.edge_rec** %603, align 4
%604 = add i32 %598, 32 ; <i32> [#uses=1]
%605 = inttoptr i32 %604 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3]
- %606 = getelementptr %struct.edge_rec* %605, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %606 = getelementptr %struct.edge_rec, %struct.edge_rec* %605, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %605, %struct.edge_rec** %606, align 4
- %607 = getelementptr %struct.edge_rec* %605, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %607 = getelementptr %struct.edge_rec, %struct.edge_rec* %605, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
store %struct.VERTEX* %495, %struct.VERTEX** %607, align 4
- %608 = getelementptr %struct.edge_rec* %602, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %608 = getelementptr %struct.edge_rec, %struct.edge_rec* %602, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %600, %struct.edge_rec** %608, align 4
- %609 = load %struct.edge_rec** %596, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %609 = load %struct.edge_rec*, %struct.edge_rec** %596, align 4 ; <%struct.edge_rec*> [#uses=1]
%610 = ptrtoint %struct.edge_rec* %609 to i32 ; <i32> [#uses=2]
%611 = add i32 %610, 16 ; <i32> [#uses=1]
%612 = and i32 %611, 63 ; <i32> [#uses=1]
%613 = and i32 %610, -64 ; <i32> [#uses=1]
%614 = or i32 %612, %613 ; <i32> [#uses=1]
%615 = inttoptr i32 %614 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %616 = getelementptr %struct.edge_rec* %594, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %617 = load %struct.edge_rec** %616, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %616 = getelementptr %struct.edge_rec, %struct.edge_rec* %594, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %617 = load %struct.edge_rec*, %struct.edge_rec** %616, align 4 ; <%struct.edge_rec*> [#uses=1]
%618 = ptrtoint %struct.edge_rec* %617 to i32 ; <i32> [#uses=2]
%619 = add i32 %618, 16 ; <i32> [#uses=1]
%620 = and i32 %619, 63 ; <i32> [#uses=1]
%621 = and i32 %618, -64 ; <i32> [#uses=1]
%622 = or i32 %620, %621 ; <i32> [#uses=1]
%623 = inttoptr i32 %622 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %624 = getelementptr %struct.edge_rec* %623, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %625 = load %struct.edge_rec** %624, align 4 ; <%struct.edge_rec*> [#uses=1]
- %626 = getelementptr %struct.edge_rec* %615, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %627 = load %struct.edge_rec** %626, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %624 = getelementptr %struct.edge_rec, %struct.edge_rec* %623, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %625 = load %struct.edge_rec*, %struct.edge_rec** %624, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %626 = getelementptr %struct.edge_rec, %struct.edge_rec* %615, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %627 = load %struct.edge_rec*, %struct.edge_rec** %626, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %625, %struct.edge_rec** %626, align 4
store %struct.edge_rec* %627, %struct.edge_rec** %624, align 4
- %628 = load %struct.edge_rec** %596, align 4 ; <%struct.edge_rec*> [#uses=1]
- %629 = load %struct.edge_rec** %616, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %628 = load %struct.edge_rec*, %struct.edge_rec** %596, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %629 = load %struct.edge_rec*, %struct.edge_rec** %616, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %628, %struct.edge_rec** %616, align 4
store %struct.edge_rec* %629, %struct.edge_rec** %596, align 4
%630 = xor i32 %598, 32 ; <i32> [#uses=2]
%631 = inttoptr i32 %630 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %632 = getelementptr %struct.edge_rec* %631, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %633 = load %struct.edge_rec** %632, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %632 = getelementptr %struct.edge_rec, %struct.edge_rec* %631, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %633 = load %struct.edge_rec*, %struct.edge_rec** %632, align 4 ; <%struct.edge_rec*> [#uses=1]
%634 = ptrtoint %struct.edge_rec* %633 to i32 ; <i32> [#uses=2]
%635 = add i32 %634, 16 ; <i32> [#uses=1]
%636 = and i32 %635, 63 ; <i32> [#uses=1]
%637 = and i32 %634, -64 ; <i32> [#uses=1]
%638 = or i32 %636, %637 ; <i32> [#uses=1]
%639 = inttoptr i32 %638 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %640 = getelementptr %struct.edge_rec* %174, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %641 = load %struct.edge_rec** %640, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %640 = getelementptr %struct.edge_rec, %struct.edge_rec* %174, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %641 = load %struct.edge_rec*, %struct.edge_rec** %640, align 4 ; <%struct.edge_rec*> [#uses=1]
%642 = ptrtoint %struct.edge_rec* %641 to i32 ; <i32> [#uses=2]
%643 = add i32 %642, 16 ; <i32> [#uses=1]
%644 = and i32 %643, 63 ; <i32> [#uses=1]
%645 = and i32 %642, -64 ; <i32> [#uses=1]
%646 = or i32 %644, %645 ; <i32> [#uses=1]
%647 = inttoptr i32 %646 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %648 = getelementptr %struct.edge_rec* %647, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %649 = load %struct.edge_rec** %648, align 4 ; <%struct.edge_rec*> [#uses=1]
- %650 = getelementptr %struct.edge_rec* %639, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %651 = load %struct.edge_rec** %650, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %648 = getelementptr %struct.edge_rec, %struct.edge_rec* %647, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %649 = load %struct.edge_rec*, %struct.edge_rec** %648, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %650 = getelementptr %struct.edge_rec, %struct.edge_rec* %639, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %651 = load %struct.edge_rec*, %struct.edge_rec** %650, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %649, %struct.edge_rec** %650, align 4
store %struct.edge_rec* %651, %struct.edge_rec** %648, align 4
- %652 = load %struct.edge_rec** %632, align 4 ; <%struct.edge_rec*> [#uses=1]
- %653 = load %struct.edge_rec** %640, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %652 = load %struct.edge_rec*, %struct.edge_rec** %632, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %653 = load %struct.edge_rec*, %struct.edge_rec** %640, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %652, %struct.edge_rec** %640, align 4
store %struct.edge_rec* %653, %struct.edge_rec** %632, align 4
%654 = add i32 %630, 48 ; <i32> [#uses=1]
@@ -858,8 +858,8 @@ bb24.i: ; preds = %bb23.i, %bb21.i
%656 = and i32 %598, -64 ; <i32> [#uses=1]
%657 = or i32 %655, %656 ; <i32> [#uses=1]
%658 = inttoptr i32 %657 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %659 = getelementptr %struct.edge_rec* %658, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %660 = load %struct.edge_rec** %659, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %659 = getelementptr %struct.edge_rec, %struct.edge_rec* %658, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %660 = load %struct.edge_rec*, %struct.edge_rec** %659, align 4 ; <%struct.edge_rec*> [#uses=1]
%661 = ptrtoint %struct.edge_rec* %660 to i32 ; <i32> [#uses=2]
%662 = add i32 %661, 16 ; <i32> [#uses=1]
%663 = and i32 %662, 63 ; <i32> [#uses=1]
@@ -874,8 +874,8 @@ bb25.i: ; preds = %bb23.i, %bb22.i
%669 = and i32 %172, -64 ; <i32> [#uses=1]
%670 = or i32 %668, %669 ; <i32> [#uses=1]
%671 = inttoptr i32 %670 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %672 = getelementptr %struct.edge_rec* %671, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %673 = load %struct.edge_rec** %672, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %672 = getelementptr %struct.edge_rec, %struct.edge_rec* %671, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %673 = load %struct.edge_rec*, %struct.edge_rec** %672, align 4 ; <%struct.edge_rec*> [#uses=1]
%674 = ptrtoint %struct.edge_rec* %673 to i32 ; <i32> [#uses=2]
%675 = add i32 %674, 16 ; <i32> [#uses=1]
%676 = and i32 %675, 63 ; <i32> [#uses=1]
@@ -883,84 +883,84 @@ bb25.i: ; preds = %bb23.i, %bb22.i
%678 = or i32 %676, %677 ; <i32> [#uses=1]
%679 = inttoptr i32 %678 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
%680 = call %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=4]
- %681 = getelementptr %struct.edge_rec* %680, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=5]
+ %681 = getelementptr %struct.edge_rec, %struct.edge_rec* %680, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=5]
store %struct.edge_rec* %680, %struct.edge_rec** %681, align 4
- %682 = getelementptr %struct.edge_rec* %680, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %682 = getelementptr %struct.edge_rec, %struct.edge_rec* %680, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
store %struct.VERTEX* %501, %struct.VERTEX** %682, align 4
%683 = ptrtoint %struct.edge_rec* %680 to i32 ; <i32> [#uses=4]
%684 = add i32 %683, 16 ; <i32> [#uses=1]
%685 = inttoptr i32 %684 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
%686 = add i32 %683, 48 ; <i32> [#uses=1]
%687 = inttoptr i32 %686 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
- %688 = getelementptr %struct.edge_rec* %685, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %688 = getelementptr %struct.edge_rec, %struct.edge_rec* %685, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %687, %struct.edge_rec** %688, align 4
%689 = add i32 %683, 32 ; <i32> [#uses=1]
%690 = inttoptr i32 %689 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3]
- %691 = getelementptr %struct.edge_rec* %690, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %691 = getelementptr %struct.edge_rec, %struct.edge_rec* %690, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %690, %struct.edge_rec** %691, align 4
- %692 = getelementptr %struct.edge_rec* %690, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %692 = getelementptr %struct.edge_rec, %struct.edge_rec* %690, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
store %struct.VERTEX* %496, %struct.VERTEX** %692, align 4
- %693 = getelementptr %struct.edge_rec* %687, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %693 = getelementptr %struct.edge_rec, %struct.edge_rec* %687, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %685, %struct.edge_rec** %693, align 4
- %694 = load %struct.edge_rec** %681, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %694 = load %struct.edge_rec*, %struct.edge_rec** %681, align 4 ; <%struct.edge_rec*> [#uses=1]
%695 = ptrtoint %struct.edge_rec* %694 to i32 ; <i32> [#uses=2]
%696 = add i32 %695, 16 ; <i32> [#uses=1]
%697 = and i32 %696, 63 ; <i32> [#uses=1]
%698 = and i32 %695, -64 ; <i32> [#uses=1]
%699 = or i32 %697, %698 ; <i32> [#uses=1]
%700 = inttoptr i32 %699 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %701 = getelementptr %struct.edge_rec* %499, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %702 = load %struct.edge_rec** %701, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %701 = getelementptr %struct.edge_rec, %struct.edge_rec* %499, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %702 = load %struct.edge_rec*, %struct.edge_rec** %701, align 4 ; <%struct.edge_rec*> [#uses=1]
%703 = ptrtoint %struct.edge_rec* %702 to i32 ; <i32> [#uses=2]
%704 = add i32 %703, 16 ; <i32> [#uses=1]
%705 = and i32 %704, 63 ; <i32> [#uses=1]
%706 = and i32 %703, -64 ; <i32> [#uses=1]
%707 = or i32 %705, %706 ; <i32> [#uses=1]
%708 = inttoptr i32 %707 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %709 = getelementptr %struct.edge_rec* %708, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %710 = load %struct.edge_rec** %709, align 4 ; <%struct.edge_rec*> [#uses=1]
- %711 = getelementptr %struct.edge_rec* %700, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %712 = load %struct.edge_rec** %711, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %709 = getelementptr %struct.edge_rec, %struct.edge_rec* %708, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %710 = load %struct.edge_rec*, %struct.edge_rec** %709, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %711 = getelementptr %struct.edge_rec, %struct.edge_rec* %700, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %712 = load %struct.edge_rec*, %struct.edge_rec** %711, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %710, %struct.edge_rec** %711, align 4
store %struct.edge_rec* %712, %struct.edge_rec** %709, align 4
- %713 = load %struct.edge_rec** %681, align 4 ; <%struct.edge_rec*> [#uses=1]
- %714 = load %struct.edge_rec** %701, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %713 = load %struct.edge_rec*, %struct.edge_rec** %681, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %714 = load %struct.edge_rec*, %struct.edge_rec** %701, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %713, %struct.edge_rec** %701, align 4
store %struct.edge_rec* %714, %struct.edge_rec** %681, align 4
%715 = xor i32 %683, 32 ; <i32> [#uses=1]
%716 = inttoptr i32 %715 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
- %717 = getelementptr %struct.edge_rec* %716, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %718 = load %struct.edge_rec** %717, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %717 = getelementptr %struct.edge_rec, %struct.edge_rec* %716, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %718 = load %struct.edge_rec*, %struct.edge_rec** %717, align 4 ; <%struct.edge_rec*> [#uses=1]
%719 = ptrtoint %struct.edge_rec* %718 to i32 ; <i32> [#uses=2]
%720 = add i32 %719, 16 ; <i32> [#uses=1]
%721 = and i32 %720, 63 ; <i32> [#uses=1]
%722 = and i32 %719, -64 ; <i32> [#uses=1]
%723 = or i32 %721, %722 ; <i32> [#uses=1]
%724 = inttoptr i32 %723 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %725 = getelementptr %struct.edge_rec* %679, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %726 = load %struct.edge_rec** %725, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %725 = getelementptr %struct.edge_rec, %struct.edge_rec* %679, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %726 = load %struct.edge_rec*, %struct.edge_rec** %725, align 4 ; <%struct.edge_rec*> [#uses=1]
%727 = ptrtoint %struct.edge_rec* %726 to i32 ; <i32> [#uses=2]
%728 = add i32 %727, 16 ; <i32> [#uses=1]
%729 = and i32 %728, 63 ; <i32> [#uses=1]
%730 = and i32 %727, -64 ; <i32> [#uses=1]
%731 = or i32 %729, %730 ; <i32> [#uses=1]
%732 = inttoptr i32 %731 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %733 = getelementptr %struct.edge_rec* %732, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %734 = load %struct.edge_rec** %733, align 4 ; <%struct.edge_rec*> [#uses=1]
- %735 = getelementptr %struct.edge_rec* %724, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %736 = load %struct.edge_rec** %735, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %733 = getelementptr %struct.edge_rec, %struct.edge_rec* %732, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %734 = load %struct.edge_rec*, %struct.edge_rec** %733, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %735 = getelementptr %struct.edge_rec, %struct.edge_rec* %724, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %736 = load %struct.edge_rec*, %struct.edge_rec** %735, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %734, %struct.edge_rec** %735, align 4
store %struct.edge_rec* %736, %struct.edge_rec** %733, align 4
- %737 = load %struct.edge_rec** %717, align 4 ; <%struct.edge_rec*> [#uses=1]
- %738 = load %struct.edge_rec** %725, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %737 = load %struct.edge_rec*, %struct.edge_rec** %717, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %738 = load %struct.edge_rec*, %struct.edge_rec** %725, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %737, %struct.edge_rec** %725, align 4
store %struct.edge_rec* %738, %struct.edge_rec** %717, align 4
- %739 = load %struct.edge_rec** %681, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %739 = load %struct.edge_rec*, %struct.edge_rec** %681, align 4 ; <%struct.edge_rec*> [#uses=1]
br label %bb9.i
do_merge.exit: ; preds = %bb17.i
- %740 = getelementptr %struct.edge_rec* %ldo_addr.0.ph.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %741 = load %struct.VERTEX** %740, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %740 = getelementptr %struct.edge_rec, %struct.edge_rec* %ldo_addr.0.ph.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %741 = load %struct.VERTEX*, %struct.VERTEX** %740, align 4 ; <%struct.VERTEX*> [#uses=1]
%742 = icmp eq %struct.VERTEX* %741, %tree_addr.0.i ; <i1> [#uses=1]
br i1 %742, label %bb5.loopexit, label %bb2
@@ -969,29 +969,29 @@ bb2: ; preds = %bb2, %do_merge.exit
%743 = ptrtoint %struct.edge_rec* %ldo.07 to i32 ; <i32> [#uses=1]
%744 = xor i32 %743, 32 ; <i32> [#uses=1]
%745 = inttoptr i32 %744 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %746 = getelementptr %struct.edge_rec* %745, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %747 = load %struct.edge_rec** %746, align 4 ; <%struct.edge_rec*> [#uses=3]
- %748 = getelementptr %struct.edge_rec* %747, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %749 = load %struct.VERTEX** %748, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %746 = getelementptr %struct.edge_rec, %struct.edge_rec* %745, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %747 = load %struct.edge_rec*, %struct.edge_rec** %746, align 4 ; <%struct.edge_rec*> [#uses=3]
+ %748 = getelementptr %struct.edge_rec, %struct.edge_rec* %747, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %749 = load %struct.VERTEX*, %struct.VERTEX** %748, align 4 ; <%struct.VERTEX*> [#uses=1]
%750 = icmp eq %struct.VERTEX* %749, %tree_addr.0.i ; <i1> [#uses=1]
br i1 %750, label %bb5.loopexit, label %bb2
bb4: ; preds = %bb5.loopexit, %bb4
%rdo.05 = phi %struct.edge_rec* [ %755, %bb4 ], [ %rdo_addr.0.i, %bb5.loopexit ] ; <%struct.edge_rec*> [#uses=1]
- %751 = getelementptr %struct.edge_rec* %rdo.05, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %752 = load %struct.edge_rec** %751, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %751 = getelementptr %struct.edge_rec, %struct.edge_rec* %rdo.05, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %752 = load %struct.edge_rec*, %struct.edge_rec** %751, align 4 ; <%struct.edge_rec*> [#uses=1]
%753 = ptrtoint %struct.edge_rec* %752 to i32 ; <i32> [#uses=1]
%754 = xor i32 %753, 32 ; <i32> [#uses=1]
%755 = inttoptr i32 %754 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3]
- %756 = getelementptr %struct.edge_rec* %755, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %757 = load %struct.VERTEX** %756, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %756 = getelementptr %struct.edge_rec, %struct.edge_rec* %755, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %757 = load %struct.VERTEX*, %struct.VERTEX** %756, align 4 ; <%struct.VERTEX*> [#uses=1]
%758 = icmp eq %struct.VERTEX* %757, %extra ; <i1> [#uses=1]
br i1 %758, label %bb6, label %bb4
bb5.loopexit: ; preds = %bb2, %do_merge.exit
%ldo.0.lcssa = phi %struct.edge_rec* [ %ldo_addr.0.ph.i, %do_merge.exit ], [ %747, %bb2 ] ; <%struct.edge_rec*> [#uses=1]
- %759 = getelementptr %struct.edge_rec* %rdo_addr.0.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %760 = load %struct.VERTEX** %759, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %759 = getelementptr %struct.edge_rec, %struct.edge_rec* %rdo_addr.0.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %760 = load %struct.VERTEX*, %struct.VERTEX** %759, align 4 ; <%struct.VERTEX*> [#uses=1]
%761 = icmp eq %struct.VERTEX* %760, %extra ; <i1> [#uses=1]
br i1 %761, label %bb6, label %bb4
@@ -1002,17 +1002,17 @@ bb6: ; preds = %bb5.loopexit, %bb4
br label %bb15
bb7: ; preds = %bb
- %762 = getelementptr %struct.VERTEX* %tree, i32 0, i32 1 ; <%struct.VERTEX**> [#uses=1]
- %763 = load %struct.VERTEX** %762, align 4 ; <%struct.VERTEX*> [#uses=4]
+ %762 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 1 ; <%struct.VERTEX**> [#uses=1]
+ %763 = load %struct.VERTEX*, %struct.VERTEX** %762, align 4 ; <%struct.VERTEX*> [#uses=4]
%764 = icmp eq %struct.VERTEX* %763, null ; <i1> [#uses=1]
%765 = call %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=5]
- %766 = getelementptr %struct.edge_rec* %765, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=4]
+ %766 = getelementptr %struct.edge_rec, %struct.edge_rec* %765, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=4]
store %struct.edge_rec* %765, %struct.edge_rec** %766, align 4
- %767 = getelementptr %struct.edge_rec* %765, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=3]
+ %767 = getelementptr %struct.edge_rec, %struct.edge_rec* %765, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=3]
br i1 %764, label %bb10, label %bb11
bb8: ; preds = %entry
- %768 = call i32 @puts(i8* getelementptr ([21 x i8]* @_2E_str7, i32 0, i32 0)) nounwind ; <i32> [#uses=0]
+ %768 = call i32 @puts(i8* getelementptr ([21 x i8], [21 x i8]* @_2E_str7, i32 0, i32 0)) nounwind ; <i32> [#uses=0]
call void @exit(i32 -1) noreturn nounwind
unreachable
@@ -1023,15 +1023,15 @@ bb10: ; preds = %bb7
%771 = inttoptr i32 %770 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
%772 = add i32 %769, 48 ; <i32> [#uses=1]
%773 = inttoptr i32 %772 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
- %774 = getelementptr %struct.edge_rec* %771, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %774 = getelementptr %struct.edge_rec, %struct.edge_rec* %771, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %773, %struct.edge_rec** %774, align 4
%775 = add i32 %769, 32 ; <i32> [#uses=1]
%776 = inttoptr i32 %775 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3]
- %777 = getelementptr %struct.edge_rec* %776, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %777 = getelementptr %struct.edge_rec, %struct.edge_rec* %776, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %776, %struct.edge_rec** %777, align 4
- %778 = getelementptr %struct.edge_rec* %776, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %778 = getelementptr %struct.edge_rec, %struct.edge_rec* %776, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
store %struct.VERTEX* %extra, %struct.VERTEX** %778, align 4
- %779 = getelementptr %struct.edge_rec* %773, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %779 = getelementptr %struct.edge_rec, %struct.edge_rec* %773, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %771, %struct.edge_rec** %779, align 4
%780 = xor i32 %769, 32 ; <i32> [#uses=1]
br label %bb15
@@ -1043,164 +1043,164 @@ bb11: ; preds = %bb7
%783 = inttoptr i32 %782 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
%784 = add i32 %781, 48 ; <i32> [#uses=1]
%785 = inttoptr i32 %784 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
- %786 = getelementptr %struct.edge_rec* %783, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %786 = getelementptr %struct.edge_rec, %struct.edge_rec* %783, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %785, %struct.edge_rec** %786, align 4
%787 = add i32 %781, 32 ; <i32> [#uses=1]
%788 = inttoptr i32 %787 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3]
- %789 = getelementptr %struct.edge_rec* %788, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %789 = getelementptr %struct.edge_rec, %struct.edge_rec* %788, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %788, %struct.edge_rec** %789, align 4
- %790 = getelementptr %struct.edge_rec* %788, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %790 = getelementptr %struct.edge_rec, %struct.edge_rec* %788, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
store %struct.VERTEX* %tree, %struct.VERTEX** %790, align 4
- %791 = getelementptr %struct.edge_rec* %785, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %791 = getelementptr %struct.edge_rec, %struct.edge_rec* %785, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %783, %struct.edge_rec** %791, align 4
%792 = call %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=4]
- %793 = getelementptr %struct.edge_rec* %792, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=4]
+ %793 = getelementptr %struct.edge_rec, %struct.edge_rec* %792, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=4]
store %struct.edge_rec* %792, %struct.edge_rec** %793, align 4
- %794 = getelementptr %struct.edge_rec* %792, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %794 = getelementptr %struct.edge_rec, %struct.edge_rec* %792, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
store %struct.VERTEX* %tree, %struct.VERTEX** %794, align 4
%795 = ptrtoint %struct.edge_rec* %792 to i32 ; <i32> [#uses=5]
%796 = add i32 %795, 16 ; <i32> [#uses=1]
%797 = inttoptr i32 %796 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
%798 = add i32 %795, 48 ; <i32> [#uses=2]
%799 = inttoptr i32 %798 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
- %800 = getelementptr %struct.edge_rec* %797, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %800 = getelementptr %struct.edge_rec, %struct.edge_rec* %797, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %799, %struct.edge_rec** %800, align 4
%801 = add i32 %795, 32 ; <i32> [#uses=1]
%802 = inttoptr i32 %801 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3]
- %803 = getelementptr %struct.edge_rec* %802, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %803 = getelementptr %struct.edge_rec, %struct.edge_rec* %802, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %802, %struct.edge_rec** %803, align 4
- %804 = getelementptr %struct.edge_rec* %802, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %804 = getelementptr %struct.edge_rec, %struct.edge_rec* %802, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
store %struct.VERTEX* %extra, %struct.VERTEX** %804, align 4
- %805 = getelementptr %struct.edge_rec* %799, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %805 = getelementptr %struct.edge_rec, %struct.edge_rec* %799, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %797, %struct.edge_rec** %805, align 4
%806 = xor i32 %781, 32 ; <i32> [#uses=1]
%807 = inttoptr i32 %806 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %808 = getelementptr %struct.edge_rec* %807, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %809 = load %struct.edge_rec** %808, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %808 = getelementptr %struct.edge_rec, %struct.edge_rec* %807, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %809 = load %struct.edge_rec*, %struct.edge_rec** %808, align 4 ; <%struct.edge_rec*> [#uses=1]
%810 = ptrtoint %struct.edge_rec* %809 to i32 ; <i32> [#uses=2]
%811 = add i32 %810, 16 ; <i32> [#uses=1]
%812 = and i32 %811, 63 ; <i32> [#uses=1]
%813 = and i32 %810, -64 ; <i32> [#uses=1]
%814 = or i32 %812, %813 ; <i32> [#uses=1]
%815 = inttoptr i32 %814 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %816 = load %struct.edge_rec** %793, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %816 = load %struct.edge_rec*, %struct.edge_rec** %793, align 4 ; <%struct.edge_rec*> [#uses=1]
%817 = ptrtoint %struct.edge_rec* %816 to i32 ; <i32> [#uses=2]
%818 = add i32 %817, 16 ; <i32> [#uses=1]
%819 = and i32 %818, 63 ; <i32> [#uses=1]
%820 = and i32 %817, -64 ; <i32> [#uses=1]
%821 = or i32 %819, %820 ; <i32> [#uses=1]
%822 = inttoptr i32 %821 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %823 = getelementptr %struct.edge_rec* %822, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %824 = load %struct.edge_rec** %823, align 4 ; <%struct.edge_rec*> [#uses=1]
- %825 = getelementptr %struct.edge_rec* %815, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %826 = load %struct.edge_rec** %825, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %823 = getelementptr %struct.edge_rec, %struct.edge_rec* %822, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %824 = load %struct.edge_rec*, %struct.edge_rec** %823, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %825 = getelementptr %struct.edge_rec, %struct.edge_rec* %815, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %826 = load %struct.edge_rec*, %struct.edge_rec** %825, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %824, %struct.edge_rec** %825, align 4
store %struct.edge_rec* %826, %struct.edge_rec** %823, align 4
- %827 = load %struct.edge_rec** %808, align 4 ; <%struct.edge_rec*> [#uses=1]
- %828 = load %struct.edge_rec** %793, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %827 = load %struct.edge_rec*, %struct.edge_rec** %808, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %828 = load %struct.edge_rec*, %struct.edge_rec** %793, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %827, %struct.edge_rec** %793, align 4
store %struct.edge_rec* %828, %struct.edge_rec** %808, align 4
%829 = xor i32 %795, 32 ; <i32> [#uses=3]
%830 = inttoptr i32 %829 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %831 = getelementptr %struct.edge_rec* %830, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
- %832 = load %struct.VERTEX** %831, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %831 = getelementptr %struct.edge_rec, %struct.edge_rec* %830, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %832 = load %struct.VERTEX*, %struct.VERTEX** %831, align 4 ; <%struct.VERTEX*> [#uses=1]
%833 = and i32 %798, 63 ; <i32> [#uses=1]
%834 = and i32 %795, -64 ; <i32> [#uses=1]
%835 = or i32 %833, %834 ; <i32> [#uses=1]
%836 = inttoptr i32 %835 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %837 = getelementptr %struct.edge_rec* %836, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %838 = load %struct.edge_rec** %837, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %837 = getelementptr %struct.edge_rec, %struct.edge_rec* %836, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %838 = load %struct.edge_rec*, %struct.edge_rec** %837, align 4 ; <%struct.edge_rec*> [#uses=1]
%839 = ptrtoint %struct.edge_rec* %838 to i32 ; <i32> [#uses=2]
%840 = add i32 %839, 16 ; <i32> [#uses=1]
%841 = and i32 %840, 63 ; <i32> [#uses=1]
%842 = and i32 %839, -64 ; <i32> [#uses=1]
%843 = or i32 %841, %842 ; <i32> [#uses=1]
%844 = inttoptr i32 %843 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %845 = load %struct.VERTEX** %767, align 4 ; <%struct.VERTEX*> [#uses=1]
+ %845 = load %struct.VERTEX*, %struct.VERTEX** %767, align 4 ; <%struct.VERTEX*> [#uses=1]
%846 = call %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=4]
- %847 = getelementptr %struct.edge_rec* %846, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=7]
+ %847 = getelementptr %struct.edge_rec, %struct.edge_rec* %846, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=7]
store %struct.edge_rec* %846, %struct.edge_rec** %847, align 4
- %848 = getelementptr %struct.edge_rec* %846, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %848 = getelementptr %struct.edge_rec, %struct.edge_rec* %846, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
store %struct.VERTEX* %832, %struct.VERTEX** %848, align 4
%849 = ptrtoint %struct.edge_rec* %846 to i32 ; <i32> [#uses=6]
%850 = add i32 %849, 16 ; <i32> [#uses=2]
%851 = inttoptr i32 %850 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
%852 = add i32 %849, 48 ; <i32> [#uses=1]
%853 = inttoptr i32 %852 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
- %854 = getelementptr %struct.edge_rec* %851, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %854 = getelementptr %struct.edge_rec, %struct.edge_rec* %851, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %853, %struct.edge_rec** %854, align 4
%855 = add i32 %849, 32 ; <i32> [#uses=1]
%856 = inttoptr i32 %855 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=3]
- %857 = getelementptr %struct.edge_rec* %856, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %857 = getelementptr %struct.edge_rec, %struct.edge_rec* %856, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %856, %struct.edge_rec** %857, align 4
- %858 = getelementptr %struct.edge_rec* %856, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
+ %858 = getelementptr %struct.edge_rec, %struct.edge_rec* %856, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1]
store %struct.VERTEX* %845, %struct.VERTEX** %858, align 4
- %859 = getelementptr %struct.edge_rec* %853, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %859 = getelementptr %struct.edge_rec, %struct.edge_rec* %853, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %851, %struct.edge_rec** %859, align 4
- %860 = load %struct.edge_rec** %847, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %860 = load %struct.edge_rec*, %struct.edge_rec** %847, align 4 ; <%struct.edge_rec*> [#uses=1]
%861 = ptrtoint %struct.edge_rec* %860 to i32 ; <i32> [#uses=2]
%862 = add i32 %861, 16 ; <i32> [#uses=1]
%863 = and i32 %862, 63 ; <i32> [#uses=1]
%864 = and i32 %861, -64 ; <i32> [#uses=1]
%865 = or i32 %863, %864 ; <i32> [#uses=1]
%866 = inttoptr i32 %865 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %867 = getelementptr %struct.edge_rec* %844, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %868 = load %struct.edge_rec** %867, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %867 = getelementptr %struct.edge_rec, %struct.edge_rec* %844, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %868 = load %struct.edge_rec*, %struct.edge_rec** %867, align 4 ; <%struct.edge_rec*> [#uses=1]
%869 = ptrtoint %struct.edge_rec* %868 to i32 ; <i32> [#uses=2]
%870 = add i32 %869, 16 ; <i32> [#uses=1]
%871 = and i32 %870, 63 ; <i32> [#uses=1]
%872 = and i32 %869, -64 ; <i32> [#uses=1]
%873 = or i32 %871, %872 ; <i32> [#uses=1]
%874 = inttoptr i32 %873 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %875 = getelementptr %struct.edge_rec* %874, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %876 = load %struct.edge_rec** %875, align 4 ; <%struct.edge_rec*> [#uses=1]
- %877 = getelementptr %struct.edge_rec* %866, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %878 = load %struct.edge_rec** %877, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %875 = getelementptr %struct.edge_rec, %struct.edge_rec* %874, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %876 = load %struct.edge_rec*, %struct.edge_rec** %875, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %877 = getelementptr %struct.edge_rec, %struct.edge_rec* %866, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %878 = load %struct.edge_rec*, %struct.edge_rec** %877, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %876, %struct.edge_rec** %877, align 4
store %struct.edge_rec* %878, %struct.edge_rec** %875, align 4
- %879 = load %struct.edge_rec** %847, align 4 ; <%struct.edge_rec*> [#uses=1]
- %880 = load %struct.edge_rec** %867, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %879 = load %struct.edge_rec*, %struct.edge_rec** %847, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %880 = load %struct.edge_rec*, %struct.edge_rec** %867, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %879, %struct.edge_rec** %867, align 4
store %struct.edge_rec* %880, %struct.edge_rec** %847, align 4
%881 = xor i32 %849, 32 ; <i32> [#uses=3]
%882 = inttoptr i32 %881 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %883 = getelementptr %struct.edge_rec* %882, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=6]
- %884 = load %struct.edge_rec** %883, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %883 = getelementptr %struct.edge_rec, %struct.edge_rec* %882, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=6]
+ %884 = load %struct.edge_rec*, %struct.edge_rec** %883, align 4 ; <%struct.edge_rec*> [#uses=1]
%885 = ptrtoint %struct.edge_rec* %884 to i32 ; <i32> [#uses=2]
%886 = add i32 %885, 16 ; <i32> [#uses=1]
%887 = and i32 %886, 63 ; <i32> [#uses=1]
%888 = and i32 %885, -64 ; <i32> [#uses=1]
%889 = or i32 %887, %888 ; <i32> [#uses=1]
%890 = inttoptr i32 %889 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %891 = load %struct.edge_rec** %766, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %891 = load %struct.edge_rec*, %struct.edge_rec** %766, align 4 ; <%struct.edge_rec*> [#uses=1]
%892 = ptrtoint %struct.edge_rec* %891 to i32 ; <i32> [#uses=2]
%893 = add i32 %892, 16 ; <i32> [#uses=1]
%894 = and i32 %893, 63 ; <i32> [#uses=1]
%895 = and i32 %892, -64 ; <i32> [#uses=1]
%896 = or i32 %894, %895 ; <i32> [#uses=1]
%897 = inttoptr i32 %896 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %898 = getelementptr %struct.edge_rec* %897, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %899 = load %struct.edge_rec** %898, align 4 ; <%struct.edge_rec*> [#uses=1]
- %900 = getelementptr %struct.edge_rec* %890, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %901 = load %struct.edge_rec** %900, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %898 = getelementptr %struct.edge_rec, %struct.edge_rec* %897, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %899 = load %struct.edge_rec*, %struct.edge_rec** %898, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %900 = getelementptr %struct.edge_rec, %struct.edge_rec* %890, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %901 = load %struct.edge_rec*, %struct.edge_rec** %900, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %899, %struct.edge_rec** %900, align 4
store %struct.edge_rec* %901, %struct.edge_rec** %898, align 4
- %902 = load %struct.edge_rec** %883, align 4 ; <%struct.edge_rec*> [#uses=1]
- %903 = load %struct.edge_rec** %766, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %902 = load %struct.edge_rec*, %struct.edge_rec** %883, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %903 = load %struct.edge_rec*, %struct.edge_rec** %766, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %902, %struct.edge_rec** %766, align 4
store %struct.edge_rec* %903, %struct.edge_rec** %883, align 4
- %904 = getelementptr %struct.VERTEX* %763, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %905 = load double* %904, align 4 ; <double> [#uses=2]
- %906 = getelementptr %struct.VERTEX* %763, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %907 = load double* %906, align 4 ; <double> [#uses=2]
- %908 = getelementptr %struct.VERTEX* %extra, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %909 = load double* %908, align 4 ; <double> [#uses=3]
- %910 = getelementptr %struct.VERTEX* %extra, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %911 = load double* %910, align 4 ; <double> [#uses=3]
- %912 = getelementptr %struct.VERTEX* %tree, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %913 = load double* %912, align 4 ; <double> [#uses=3]
- %914 = getelementptr %struct.VERTEX* %tree, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %915 = load double* %914, align 4 ; <double> [#uses=3]
+ %904 = getelementptr %struct.VERTEX, %struct.VERTEX* %763, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %905 = load double, double* %904, align 4 ; <double> [#uses=2]
+ %906 = getelementptr %struct.VERTEX, %struct.VERTEX* %763, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %907 = load double, double* %906, align 4 ; <double> [#uses=2]
+ %908 = getelementptr %struct.VERTEX, %struct.VERTEX* %extra, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %909 = load double, double* %908, align 4 ; <double> [#uses=3]
+ %910 = getelementptr %struct.VERTEX, %struct.VERTEX* %extra, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %911 = load double, double* %910, align 4 ; <double> [#uses=3]
+ %912 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %913 = load double, double* %912, align 4 ; <double> [#uses=3]
+ %914 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %915 = load double, double* %914, align 4 ; <double> [#uses=3]
%916 = fsub double %905, %913 ; <double> [#uses=1]
%917 = fsub double %911, %915 ; <double> [#uses=1]
%918 = fmul double %916, %917 ; <double> [#uses=1]
@@ -1227,79 +1227,79 @@ bb14: ; preds = %bb13
%933 = and i32 %849, -64 ; <i32> [#uses=3]
%934 = or i32 %932, %933 ; <i32> [#uses=1]
%935 = inttoptr i32 %934 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %936 = getelementptr %struct.edge_rec* %935, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %937 = load %struct.edge_rec** %936, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %936 = getelementptr %struct.edge_rec, %struct.edge_rec* %935, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %937 = load %struct.edge_rec*, %struct.edge_rec** %936, align 4 ; <%struct.edge_rec*> [#uses=1]
%938 = ptrtoint %struct.edge_rec* %937 to i32 ; <i32> [#uses=2]
%939 = add i32 %938, 16 ; <i32> [#uses=1]
%940 = and i32 %939, 63 ; <i32> [#uses=1]
%941 = and i32 %938, -64 ; <i32> [#uses=1]
%942 = or i32 %940, %941 ; <i32> [#uses=1]
%943 = inttoptr i32 %942 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %944 = load %struct.edge_rec** %847, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %944 = load %struct.edge_rec*, %struct.edge_rec** %847, align 4 ; <%struct.edge_rec*> [#uses=1]
%945 = ptrtoint %struct.edge_rec* %944 to i32 ; <i32> [#uses=2]
%946 = add i32 %945, 16 ; <i32> [#uses=1]
%947 = and i32 %946, 63 ; <i32> [#uses=1]
%948 = and i32 %945, -64 ; <i32> [#uses=1]
%949 = or i32 %947, %948 ; <i32> [#uses=1]
%950 = inttoptr i32 %949 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %951 = getelementptr %struct.edge_rec* %943, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %952 = load %struct.edge_rec** %951, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %951 = getelementptr %struct.edge_rec, %struct.edge_rec* %943, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %952 = load %struct.edge_rec*, %struct.edge_rec** %951, align 4 ; <%struct.edge_rec*> [#uses=1]
%953 = ptrtoint %struct.edge_rec* %952 to i32 ; <i32> [#uses=2]
%954 = add i32 %953, 16 ; <i32> [#uses=1]
%955 = and i32 %954, 63 ; <i32> [#uses=1]
%956 = and i32 %953, -64 ; <i32> [#uses=1]
%957 = or i32 %955, %956 ; <i32> [#uses=1]
%958 = inttoptr i32 %957 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %959 = getelementptr %struct.edge_rec* %958, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %960 = load %struct.edge_rec** %959, align 4 ; <%struct.edge_rec*> [#uses=1]
- %961 = getelementptr %struct.edge_rec* %950, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %962 = load %struct.edge_rec** %961, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %959 = getelementptr %struct.edge_rec, %struct.edge_rec* %958, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %960 = load %struct.edge_rec*, %struct.edge_rec** %959, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %961 = getelementptr %struct.edge_rec, %struct.edge_rec* %950, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %962 = load %struct.edge_rec*, %struct.edge_rec** %961, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %960, %struct.edge_rec** %961, align 4
store %struct.edge_rec* %962, %struct.edge_rec** %959, align 4
- %963 = load %struct.edge_rec** %847, align 4 ; <%struct.edge_rec*> [#uses=1]
- %964 = load %struct.edge_rec** %951, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %963 = load %struct.edge_rec*, %struct.edge_rec** %847, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %964 = load %struct.edge_rec*, %struct.edge_rec** %951, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %963, %struct.edge_rec** %951, align 4
store %struct.edge_rec* %964, %struct.edge_rec** %847, align 4
%965 = add i32 %881, 16 ; <i32> [#uses=1]
%966 = and i32 %965, 63 ; <i32> [#uses=1]
%967 = or i32 %966, %933 ; <i32> [#uses=1]
%968 = inttoptr i32 %967 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %969 = getelementptr %struct.edge_rec* %968, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
- %970 = load %struct.edge_rec** %969, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %969 = getelementptr %struct.edge_rec, %struct.edge_rec* %968, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %970 = load %struct.edge_rec*, %struct.edge_rec** %969, align 4 ; <%struct.edge_rec*> [#uses=1]
%971 = ptrtoint %struct.edge_rec* %970 to i32 ; <i32> [#uses=2]
%972 = add i32 %971, 16 ; <i32> [#uses=1]
%973 = and i32 %972, 63 ; <i32> [#uses=1]
%974 = and i32 %971, -64 ; <i32> [#uses=1]
%975 = or i32 %973, %974 ; <i32> [#uses=1]
%976 = inttoptr i32 %975 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %977 = load %struct.edge_rec** %883, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %977 = load %struct.edge_rec*, %struct.edge_rec** %883, align 4 ; <%struct.edge_rec*> [#uses=1]
%978 = ptrtoint %struct.edge_rec* %977 to i32 ; <i32> [#uses=2]
%979 = add i32 %978, 16 ; <i32> [#uses=1]
%980 = and i32 %979, 63 ; <i32> [#uses=1]
%981 = and i32 %978, -64 ; <i32> [#uses=1]
%982 = or i32 %980, %981 ; <i32> [#uses=1]
%983 = inttoptr i32 %982 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %984 = getelementptr %struct.edge_rec* %976, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
- %985 = load %struct.edge_rec** %984, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %984 = getelementptr %struct.edge_rec, %struct.edge_rec* %976, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=3]
+ %985 = load %struct.edge_rec*, %struct.edge_rec** %984, align 4 ; <%struct.edge_rec*> [#uses=1]
%986 = ptrtoint %struct.edge_rec* %985 to i32 ; <i32> [#uses=2]
%987 = add i32 %986, 16 ; <i32> [#uses=1]
%988 = and i32 %987, 63 ; <i32> [#uses=1]
%989 = and i32 %986, -64 ; <i32> [#uses=1]
%990 = or i32 %988, %989 ; <i32> [#uses=1]
%991 = inttoptr i32 %990 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1]
- %992 = getelementptr %struct.edge_rec* %991, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %993 = load %struct.edge_rec** %992, align 4 ; <%struct.edge_rec*> [#uses=1]
- %994 = getelementptr %struct.edge_rec* %983, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
- %995 = load %struct.edge_rec** %994, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %992 = getelementptr %struct.edge_rec, %struct.edge_rec* %991, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %993 = load %struct.edge_rec*, %struct.edge_rec** %992, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %994 = getelementptr %struct.edge_rec, %struct.edge_rec* %983, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=2]
+ %995 = load %struct.edge_rec*, %struct.edge_rec** %994, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %993, %struct.edge_rec** %994, align 4
store %struct.edge_rec* %995, %struct.edge_rec** %992, align 4
- %996 = load %struct.edge_rec** %883, align 4 ; <%struct.edge_rec*> [#uses=1]
- %997 = load %struct.edge_rec** %984, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %996 = load %struct.edge_rec*, %struct.edge_rec** %883, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %997 = load %struct.edge_rec*, %struct.edge_rec** %984, align 4 ; <%struct.edge_rec*> [#uses=1]
store %struct.edge_rec* %996, %struct.edge_rec** %984, align 4
store %struct.edge_rec* %997, %struct.edge_rec** %883, align 4
%998 = inttoptr i32 %933 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=2]
- %999 = load %struct.edge_rec** @avail_edge, align 4 ; <%struct.edge_rec*> [#uses=1]
- %1000 = getelementptr %struct.edge_rec* %998, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
+ %999 = load %struct.edge_rec*, %struct.edge_rec** @avail_edge, align 4 ; <%struct.edge_rec*> [#uses=1]
+ %1000 = getelementptr %struct.edge_rec, %struct.edge_rec* %998, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1]
store %struct.edge_rec* %999, %struct.edge_rec** %1000, align 4
store %struct.edge_rec* %998, %struct.edge_rec** @avail_edge, align 4
br label %bb15
diff --git a/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll b/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
index d477ba9835be..d746b104baf8 100644
--- a/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
@@ -83,7 +83,7 @@ bb45: ; preds = %bb43.preheader, %cli_calloc.exit54
br i1 undef, label %cli_calloc.exit70.thread, label %cli_calloc.exit70
bb52: ; preds = %cli_calloc.exit
- %0 = load i16* undef, align 4 ; <i16> [#uses=1]
+ %0 = load i16, i16* undef, align 4 ; <i16> [#uses=1]
%1 = icmp eq i16 %0, 0 ; <i1> [#uses=1]
%iftmp.20.0 = select i1 %1, i8* %hexsig, i8* null ; <i8*> [#uses=1]
%2 = tail call i32 @strlen(i8* %iftmp.20.0) nounwind readonly ; <i32> [#uses=0]
diff --git a/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll b/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
index bc4a95c3e00b..156fd8843bcf 100644
--- a/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
+++ b/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
@@ -65,9 +65,9 @@ bb18: ; preds = %bb18, %bb.nph
br i1 undef, label %bb18, label %bb22
bb22: ; preds = %bb18, %bb17
- %0 = getelementptr i8* null, i32 10 ; <i8*> [#uses=1]
+ %0 = getelementptr i8, i8* null, i32 10 ; <i8*> [#uses=1]
%1 = bitcast i8* %0 to i16* ; <i16*> [#uses=1]
- %2 = load i16* %1, align 2 ; <i16> [#uses=1]
+ %2 = load i16, i16* %1, align 2 ; <i16> [#uses=1]
%3 = add i16 %2, 1 ; <i16> [#uses=1]
%4 = zext i16 %3 to i32 ; <i32> [#uses=1]
%5 = mul i32 %4, 3 ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll b/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
index 5003fbdedb27..01591c80362d 100644
--- a/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
+++ b/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
@@ -12,7 +12,7 @@ bb: ; preds = %bb, %entry
br i1 undef, label %bb28, label %bb
bb28: ; preds = %bb
- %0 = load double* @a, align 4 ; <double> [#uses=2]
+ %0 = load double, double* @a, align 4 ; <double> [#uses=2]
%1 = fadd double %0, undef ; <double> [#uses=2]
br i1 undef, label %bb59, label %bb60
diff --git a/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll b/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
index a656c495f796..e277b4cf91a6 100644
--- a/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
+++ b/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
@@ -13,17 +13,17 @@ entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store <4 x i32> %v, <4 x i32>* %v_addr
store i32 %f, i32* %f_addr
- %1 = load <4 x i32>* %v_addr, align 16 ; <<4 x i32>> [#uses=1]
- %2 = load i32* %f_addr, align 4 ; <i32> [#uses=1]
+ %1 = load <4 x i32>, <4 x i32>* %v_addr, align 16 ; <<4 x i32>> [#uses=1]
+ %2 = load i32, i32* %f_addr, align 4 ; <i32> [#uses=1]
%3 = insertelement <4 x i32> undef, i32 %2, i32 0 ; <<4 x i32>> [#uses=1]
%4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>> [#uses=1]
%5 = mul <4 x i32> %1, %4 ; <<4 x i32>> [#uses=1]
store <4 x i32> %5, <4 x i32>* %0, align 16
- %6 = load <4 x i32>* %0, align 16 ; <<4 x i32>> [#uses=1]
+ %6 = load <4 x i32>, <4 x i32>* %0, align 16 ; <<4 x i32>> [#uses=1]
store <4 x i32> %6, <4 x i32>* %retval, align 16
br label %return
return: ; preds = %entry
- %retval1 = load <4 x i32>* %retval ; <<4 x i32>> [#uses=1]
+ %retval1 = load <4 x i32>, <4 x i32>* %retval ; <<4 x i32>> [#uses=1]
ret <4 x i32> %retval1
}
diff --git a/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll b/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
index 4b4101556f18..a5e9692a0082 100644
--- a/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
+++ b/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
@@ -28,8 +28,8 @@ bb7: ; preds = %bb2
bb8: ; preds = %bb7, %entry
%2 = phi i32 [ 0, %entry ], [ %1, %bb7 ] ; <i32> [#uses=3]
- %scevgep22 = getelementptr %struct.iovec* %iov, i32 %2, i32 0; <i8**> [#uses=0]
- %3 = load i32* %nr_segs, align 4 ; <i32> [#uses=1]
+ %scevgep22 = getelementptr %struct.iovec, %struct.iovec* %iov, i32 %2, i32 0; <i8**> [#uses=0]
+ %3 = load i32, i32* %nr_segs, align 4 ; <i32> [#uses=1]
%4 = icmp ult i32 %2, %3 ; <i1> [#uses=1]
br i1 %4, label %bb, label %bb9
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
index c598fe6e2e1a..0d258e66b7e0 100644
--- a/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
+++ b/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
@@ -9,18 +9,18 @@ target triple = "armv7-apple-darwin9"
define %struct.tree* @tsp(%struct.tree* %t, i32 %nproc) nounwind {
entry:
- %t.idx51.val.i = load double* null ; <double> [#uses=1]
+ %t.idx51.val.i = load double, double* null ; <double> [#uses=1]
br i1 undef, label %bb4.i, label %bb.i
bb.i: ; preds = %entry
unreachable
bb4.i: ; preds = %entry
- %0 = load %struct.tree** @g, align 4 ; <%struct.tree*> [#uses=2]
- %.idx45.i = getelementptr %struct.tree* %0, i32 0, i32 1 ; <double*> [#uses=1]
- %.idx45.val.i = load double* %.idx45.i ; <double> [#uses=1]
- %.idx46.i = getelementptr %struct.tree* %0, i32 0, i32 2 ; <double*> [#uses=1]
- %.idx46.val.i = load double* %.idx46.i ; <double> [#uses=1]
+ %0 = load %struct.tree*, %struct.tree** @g, align 4 ; <%struct.tree*> [#uses=2]
+ %.idx45.i = getelementptr %struct.tree, %struct.tree* %0, i32 0, i32 1 ; <double*> [#uses=1]
+ %.idx45.val.i = load double, double* %.idx45.i ; <double> [#uses=1]
+ %.idx46.i = getelementptr %struct.tree, %struct.tree* %0, i32 0, i32 2 ; <double*> [#uses=1]
+ %.idx46.val.i = load double, double* %.idx46.i ; <double> [#uses=1]
%1 = fsub double 0.000000e+00, %.idx45.val.i ; <double> [#uses=2]
%2 = fmul double %1, %1 ; <double> [#uses=1]
%3 = fsub double %t.idx51.val.i, %.idx46.val.i ; <double> [#uses=2]
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll
index cc92c26aeece..489d4e45236c 100644
--- a/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll
+++ b/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll
@@ -32,7 +32,7 @@ bb9: ; preds = %bb7
br label %bb11
bb11: ; preds = %bb9, %bb7
- %1 = getelementptr %struct.icstruct* %agg.result, i32 0, i32 0, i32 0 ; <i32*> [#uses=1]
+ %1 = getelementptr %struct.icstruct, %struct.icstruct* %agg.result, i32 0, i32 0, i32 0 ; <i32*> [#uses=1]
store i32 0, i32* %1
ret void
}
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll
index 382038eb13ef..133fc0588a91 100644
--- a/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll
+++ b/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll
@@ -14,14 +14,14 @@ entry:
br i1 %p, label %bb8, label %bb1
bb1: ; preds = %entry
- %malloccall = tail call i8* @malloc(i32 ptrtoint (%struct.Village* getelementptr (%struct.Village* null, i32 1) to i32))
+ %malloccall = tail call i8* @malloc(i32 ptrtoint (%struct.Village* getelementptr (%struct.Village, %struct.Village* null, i32 1) to i32))
%0 = bitcast i8* %malloccall to %struct.Village*
%exp2 = call double @ldexp(double 1.000000e+00, i32 %level) nounwind ; <double> [#uses=1]
%.c = fptosi double %exp2 to i32 ; <i32> [#uses=1]
store i32 %.c, i32* null
- %1 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 0 ; <%struct.List**> [#uses=1]
+ %1 = getelementptr %struct.Village, %struct.Village* %0, i32 0, i32 3, i32 6, i32 0 ; <%struct.List**> [#uses=1]
store %struct.List* null, %struct.List** %1
- %2 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 2 ; <%struct.List**> [#uses=1]
+ %2 = getelementptr %struct.Village, %struct.Village* %0, i32 0, i32 3, i32 6, i32 2 ; <%struct.List**> [#uses=1]
store %struct.List* null, %struct.List** %2
ret %struct.Village* %0
diff --git a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
index a016809857e7..2f6e428351d7 100644
--- a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
+++ b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
@@ -1,7 +1,9 @@
; RUN: llc < %s -mtriple=arm-apple-darwin9 -march=arm | FileCheck %s
-; CHECK: L_LSDA_0:
-
+; CHECK: .cfi_lsda 16, [[LABEL:.*]]
+; CHECK: .long [[LABEL]]-
+; CHECK: [[LABEL]]:
+; CHECK: .byte 255 @ @LPStart Encoding = omit
%struct.A = type { i32* }
@@ -22,14 +24,14 @@ invcont: ; preds = %entry
br label %return
bb: ; preds = %ppad
- %eh_select = load i32* %eh_selector
+ %eh_select = load i32, i32* %eh_selector
store i32 %eh_select, i32* %save_filt.1, align 4
- %eh_value = load i8** %eh_exception
+ %eh_value = load i8*, i8** %eh_exception
store i8* %eh_value, i8** %save_eptr.0, align 4
call void @_ZN1AD1Ev(%struct.A* %a) nounwind
- %0 = load i8** %save_eptr.0, align 4
+ %0 = load i8*, i8** %save_eptr.0, align 4
store i8* %0, i8** %eh_exception, align 4
- %1 = load i32* %save_filt.1, align 4
+ %1 = load i32, i32* %save_filt.1, align 4
store i32 %1, i32* %eh_selector, align 4
br label %Unwind
@@ -49,7 +51,7 @@ ppad: ; preds = %lpad
br label %bb
Unwind: ; preds = %bb
- %eh_ptr3 = load i8** %eh_exception
+ %eh_ptr3 = load i8*, i8** %eh_exception
call void @_Unwind_SjLj_Resume(i8* %eh_ptr3)
unreachable
}
@@ -61,8 +63,8 @@ entry:
store %struct.A* %this, %struct.A** %this_addr
%0 = call i8* @_Znwm(i32 4)
%1 = bitcast i8* %0 to i32*
- %2 = load %struct.A** %this_addr, align 4
- %3 = getelementptr inbounds %struct.A* %2, i32 0, i32 0
+ %2 = load %struct.A*, %struct.A** %this_addr, align 4
+ %3 = getelementptr inbounds %struct.A, %struct.A* %2, i32 0, i32 0
store i32* %1, i32** %3, align 4
br label %return
@@ -77,9 +79,9 @@ entry:
%this_addr = alloca %struct.A*
%"alloca point" = bitcast i32 0 to i32
store %struct.A* %this, %struct.A** %this_addr
- %0 = load %struct.A** %this_addr, align 4
- %1 = getelementptr inbounds %struct.A* %0, i32 0, i32 0
- %2 = load i32** %1, align 4
+ %0 = load %struct.A*, %struct.A** %this_addr, align 4
+ %1 = getelementptr inbounds %struct.A, %struct.A* %0, i32 0, i32 0
+ %2 = load i32*, i32** %1, align 4
%3 = bitcast i32* %2 to i8*
call void @_ZdlPv(i8* %3) nounwind
br label %bb
diff --git a/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll b/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
index b078ec06dbb8..a6d128d9e0ce 100644
--- a/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
+++ b/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
@@ -4,7 +4,7 @@
define <4 x i16> @v2regbug(<4 x i16>* %B) nounwind {
;CHECK-LABEL: v2regbug:
;CHECK: vzip.16
- %tmp1 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %B
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32><i32 0, i32 0, i32 1, i32 1>
ret <4 x i16> %tmp2
}
diff --git a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
index 426bd17590b7..4437d37e9f4a 100644
--- a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
+++ b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
@@ -2,8 +2,8 @@
; pr4939
define void @test(double* %x, double* %y) nounwind {
- %1 = load double* %x
- %2 = load double* %y
+ %1 = load double, double* %x
+ %2 = load double, double* %y
%3 = fsub double -0.000000e+00, %1
%4 = fcmp ugt double %2, %3
br i1 %4, label %bb1, label %bb2
diff --git a/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll b/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
index 13adb24e2f6f..de927a8f8b66 100644
--- a/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
+++ b/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
@@ -13,10 +13,10 @@ declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwin
define arm_aapcs_vfpcc i8 @foo(%struct.fr* nocapture %this, %struct.obb* %box) nounwind {
entry:
- %val.i.i = load <4 x float>* undef ; <<4 x float>> [#uses=1]
- %val2.i.i = load <4 x float>* null ; <<4 x float>> [#uses=1]
- %elt3.i.i = getelementptr inbounds %struct.obb* %box, i32 0, i32 0, i32 2, i32 0 ; <<4 x float>*> [#uses=1]
- %val4.i.i = load <4 x float>* %elt3.i.i ; <<4 x float>> [#uses=1]
+ %val.i.i = load <4 x float>, <4 x float>* undef ; <<4 x float>> [#uses=1]
+ %val2.i.i = load <4 x float>, <4 x float>* null ; <<4 x float>> [#uses=1]
+ %elt3.i.i = getelementptr inbounds %struct.obb, %struct.obb* %box, i32 0, i32 0, i32 2, i32 0 ; <<4 x float>*> [#uses=1]
+ %val4.i.i = load <4 x float>, <4 x float>* %elt3.i.i ; <<4 x float>> [#uses=1]
%0 = shufflevector <2 x float> undef, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
%1 = fadd <4 x float> undef, zeroinitializer ; <<4 x float>> [#uses=1]
br label %bb33
diff --git a/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll b/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
index dd9a6fd12d7e..b8a1479fd34c 100644
--- a/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
+++ b/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
@@ -16,13 +16,13 @@ define arm_aapcs_vfpcc <4 x float> @foo(i8* nocapture %pBuffer, i32 %numItems) n
%tmp3738 = inttoptr i32 %tmp37 to float*
%tmp39 = add i32 %1, 24
%tmp3940 = inttoptr i32 %tmp39 to float*
- %2 = load float* %lsr.iv2641, align 4
- %3 = load float* %tmp2930, align 4
- %4 = load float* %tmp3132, align 4
- %5 = load float* %tmp3334, align 4
- %6 = load float* %tmp3536, align 4
- %7 = load float* %tmp3738, align 4
- %8 = load float* %tmp3940, align 4
+ %2 = load float, float* %lsr.iv2641, align 4
+ %3 = load float, float* %tmp2930, align 4
+ %4 = load float, float* %tmp3132, align 4
+ %5 = load float, float* %tmp3334, align 4
+ %6 = load float, float* %tmp3536, align 4
+ %7 = load float, float* %tmp3738, align 4
+ %8 = load float, float* %tmp3940, align 4
%9 = insertelement <4 x float> undef, float %6, i32 0
%10 = shufflevector <4 x float> %9, <4 x float> undef, <4 x i32> zeroinitializer
%11 = insertelement <4 x float> %10, float %7, i32 1
diff --git a/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll b/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
index 2ff479b21781..4bbd04705643 100644
--- a/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
+++ b/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
@@ -9,7 +9,7 @@ define arm_aapcs_vfpcc %struct.1* @hhh3(%struct.1* %this, <4 x float> %lenation.
entry:
%0 = call arm_aapcs_vfpcc %struct.4* @sss1(%struct.4* undef, float 0.000000e+00) nounwind ; <%struct.4*> [#uses=0]
%1 = call arm_aapcs_vfpcc %struct.4* @qqq1(%struct.4* null, float 5.000000e-01) nounwind ; <%struct.4*> [#uses=0]
- %val92 = load <4 x float>* null ; <<4 x float>> [#uses=1]
+ %val92 = load <4 x float>, <4 x float>* null ; <<4 x float>> [#uses=1]
%2 = call arm_aapcs_vfpcc %struct.4* @zzz2(%struct.4* undef, <4 x float> %val92) nounwind ; <%struct.4*> [#uses=0]
ret %struct.1* %this
}
diff --git a/test/CodeGen/ARM/2009-09-24-spill-align.ll b/test/CodeGen/ARM/2009-09-24-spill-align.ll
index 224bd019481d..4502542809f7 100644
--- a/test/CodeGen/ARM/2009-09-24-spill-align.ll
+++ b/test/CodeGen/ARM/2009-09-24-spill-align.ll
@@ -7,7 +7,7 @@ entry:
%out_poly16_t = alloca i16 ; <i16*> [#uses=1]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
; CHECK: vldr
- %0 = load <4 x i16>* %arg0_poly16x4_t, align 8 ; <<4 x i16>> [#uses=1]
+ %0 = load <4 x i16>, <4 x i16>* %arg0_poly16x4_t, align 8 ; <<4 x i16>> [#uses=1]
%1 = extractelement <4 x i16> %0, i32 1 ; <i16> [#uses=1]
store i16 %1, i16* %out_poly16_t, align 2
br label %return
diff --git a/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll b/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
index e2ff164502ce..287384fbc214 100644
--- a/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
+++ b/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
@@ -11,9 +11,9 @@ define void @foo(%0* noalias nocapture sret %agg.result, double %x.0, double %y.
%x76 = fmul double %y.0, 0.000000e+00 ; <double> [#uses=1]
%x77 = fadd double %y.0, 0.000000e+00 ; <double> [#uses=1]
%tmpr = fadd double %x.0, %x76 ; <double> [#uses=1]
- %agg.result.0 = getelementptr %0* %agg.result, i32 0, i32 0 ; <double*> [#uses=1]
+ %agg.result.0 = getelementptr %0, %0* %agg.result, i32 0, i32 0 ; <double*> [#uses=1]
store double %tmpr, double* %agg.result.0, align 8
- %agg.result.1 = getelementptr %0* %agg.result, i32 0, i32 1 ; <double*> [#uses=1]
+ %agg.result.1 = getelementptr %0, %0* %agg.result, i32 0, i32 1 ; <double*> [#uses=1]
store double %x77, double* %agg.result.1, align 8
ret void
}
diff --git a/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll b/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll
index 465368b0ba8d..641036f684b9 100644
--- a/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll
+++ b/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll
@@ -6,8 +6,8 @@ entry:
br i1 undef, label %return, label %bb
bb: ; preds = %bb, %entry
- %0 = load float* undef, align 4 ; <float> [#uses=1]
- %1 = load float* null, align 4 ; <float> [#uses=1]
+ %0 = load float, float* undef, align 4 ; <float> [#uses=1]
+ %1 = load float, float* null, align 4 ; <float> [#uses=1]
%2 = insertelement <4 x float> undef, float undef, i32 1 ; <<4 x float>> [#uses=1]
%3 = insertelement <4 x float> %2, float %1, i32 2 ; <<4 x float>> [#uses=2]
%4 = insertelement <4 x float> undef, float %0, i32 2 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-10-16-Scope.ll b/test/CodeGen/ARM/2009-10-16-Scope.ll
index de05644fc901..3f47488372b8 100644
--- a/test/CodeGen/ARM/2009-10-16-Scope.ll
+++ b/test/CodeGen/ARM/2009-10-16-Scope.ll
@@ -9,7 +9,7 @@ entry:
br label %do.body, !dbg !0
do.body: ; preds = %entry
- call void @llvm.dbg.declare(metadata i32* %count_, metadata !4, metadata !{!"0x102"})
+ call void @llvm.dbg.declare(metadata i32* %count_, metadata !4, metadata !DIExpression()), !dbg !DILocation(scope: !5)
%conv = ptrtoint i32* %count_ to i32, !dbg !0 ; <i32> [#uses=1]
%call = call i32 @foo(i32 %conv) ssp, !dbg !0 ; <i32> [#uses=0]
br label %do.end, !dbg !0
@@ -22,13 +22,13 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
declare i32 @foo(i32) ssp
-!0 = !MDLocation(line: 5, column: 2, scope: !1)
-!1 = !{!"0xb\001\001\000", null, !2}; [DW_TAG_lexical_block ]
-!2 = !{!"0x2e\00bar\00bar\00bar\004\000\001\000\006\000\000\000", i32 0, !3, null, null, null, null, null, null}; [DW_TAG_subprogram ]
-!3 = !{!"0x11\0012\00clang 1.1\001\00\000\00\000", !8, null, !9, null, null, null}; [DW_TAG_compile_unit ]
-!4 = !{!"0x100\00count_\005\000", !5, !3, !6}; [ DW_TAG_auto_variable ]
-!5 = !{!"0xb\001\001\000", null, !1}; [DW_TAG_lexical_block ]
-!6 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !3}; [DW_TAG_base_type ]
-!7 = !MDLocation(line: 6, column: 1, scope: !2)
-!8 = !{!"genmodes.i", !"/Users/yash/Downloads"}
+!0 = !DILocation(line: 5, column: 2, scope: !1)
+!1 = distinct !DILexicalBlock(line: 1, column: 1, file: null, scope: !2)
+!2 = !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !3)
+!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang 1.1", isOptimized: true, emissionKind: 0, file: !8, retainedTypes: !9)
+!4 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "count_", line: 5, scope: !5, file: !3, type: !6)
+!5 = distinct !DILexicalBlock(line: 1, column: 1, file: null, scope: !1)
+!6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!7 = !DILocation(line: 6, column: 1, scope: !2)
+!8 = !DIFile(filename: "genmodes.i", directory: "/Users/yash/Downloads")
!9 = !{i32 0}
diff --git a/test/CodeGen/ARM/2009-10-27-double-align.ll b/test/CodeGen/ARM/2009-10-27-double-align.ll
index b37de9dbbdfd..39f3292e260b 100644
--- a/test/CodeGen/ARM/2009-10-27-double-align.ll
+++ b/test/CodeGen/ARM/2009-10-27-double-align.ll
@@ -8,7 +8,7 @@ entry:
;CHECK: [sp, #8]
;CHECK: [sp, #12]
;CHECK: [sp]
- tail call void (i8*, ...)* @f(i8* getelementptr ([1 x i8]* @.str, i32 0, i32 0), i32 1, double 2.000000e+00, i32 3, double 4.000000e+00)
+ tail call void (i8*, ...) @f(i8* getelementptr ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i32 1, double 2.000000e+00, i32 3, double 4.000000e+00)
ret void
}
diff --git a/test/CodeGen/ARM/2009-11-01-NeonMoves.ll b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
index a18a8308044c..1fc10564a460 100644
--- a/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
+++ b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
@@ -8,7 +8,7 @@ target triple = "armv7-eabi"
define arm_aapcs_vfpcc void @bar(%foo* noalias sret %agg.result, <4 x float> %quat.0) nounwind {
entry:
%quat_addr = alloca %foo, align 16 ; <%foo*> [#uses=2]
- %0 = getelementptr inbounds %foo* %quat_addr, i32 0, i32 0 ; <<4 x float>*> [#uses=1]
+ %0 = getelementptr inbounds %foo, %foo* %quat_addr, i32 0, i32 0 ; <<4 x float>*> [#uses=1]
store <4 x float> %quat.0, <4 x float>* %0
%1 = call arm_aapcs_vfpcc <4 x float> @quux(%foo* %quat_addr) nounwind ; <<4 x float>> [#uses=3]
%2 = fmul <4 x float> %1, %1 ; <<4 x float>> [#uses=2]
diff --git a/test/CodeGen/ARM/2009-11-02-NegativeLane.ll b/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
index 2597b413ec7c..154cd65e4ec1 100644
--- a/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
+++ b/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
@@ -8,7 +8,7 @@ entry:
bb: ; preds = %bb, %entry
; CHECK: vld1.16 {d16[], d17[]}
- %0 = load i16* undef, align 2
+ %0 = load i16, i16* undef, align 2
%1 = insertelement <8 x i16> undef, i16 %0, i32 2
%2 = insertelement <8 x i16> %1, i16 undef, i32 3
%3 = mul <8 x i16> %2, %2
diff --git a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
index 38eb0ea2c891..9632c7730105 100644
--- a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
+++ b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
@@ -6,7 +6,7 @@ target triple = "armv7-eabi"
define arm_aapcs_vfpcc void @foo() {
entry:
- %0 = load float* null, align 4 ; <float> [#uses=2]
+ %0 = load float, float* null, align 4 ; <float> [#uses=2]
%1 = fmul float %0, undef ; <float> [#uses=2]
%2 = fmul float 0.000000e+00, %1 ; <float> [#uses=2]
%3 = fmul float %0, %1 ; <float> [#uses=1]
@@ -18,7 +18,7 @@ entry:
%7 = fsub float %2, undef ; <float> [#uses=1]
%8 = fsub float 0.000000e+00, undef ; <float> [#uses=3]
%9 = fadd float %2, undef ; <float> [#uses=3]
- %10 = load float* undef, align 8 ; <float> [#uses=3]
+ %10 = load float, float* undef, align 8 ; <float> [#uses=3]
%11 = fmul float %8, %10 ; <float> [#uses=1]
%12 = fadd float undef, %11 ; <float> [#uses=2]
%13 = fmul float undef, undef ; <float> [#uses=1]
@@ -30,10 +30,10 @@ entry:
%19 = fadd float %18, 0.000000e+00 ; <float> [#uses=1]
%20 = fmul float undef, %10 ; <float> [#uses=1]
%21 = fadd float %19, %20 ; <float> [#uses=1]
- %22 = load float* undef, align 8 ; <float> [#uses=1]
+ %22 = load float, float* undef, align 8 ; <float> [#uses=1]
%23 = fmul float %5, %22 ; <float> [#uses=1]
%24 = fadd float %23, undef ; <float> [#uses=1]
- %25 = load float* undef, align 8 ; <float> [#uses=2]
+ %25 = load float, float* undef, align 8 ; <float> [#uses=2]
%26 = fmul float %8, %25 ; <float> [#uses=1]
%27 = fadd float %24, %26 ; <float> [#uses=1]
%28 = fmul float %9, %25 ; <float> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll b/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
index 6cce02dd48cf..07e910b3e07b 100644
--- a/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
@@ -13,11 +13,11 @@ entry:
br i1 undef, label %bb85, label %bb
bb: ; preds = %entry
- %0 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 2 ; <float*> [#uses=2]
- %1 = load float* undef, align 4 ; <float> [#uses=1]
+ %0 = getelementptr inbounds %bar, %bar* null, i32 0, i32 0, i32 0, i32 2 ; <float*> [#uses=2]
+ %1 = load float, float* undef, align 4 ; <float> [#uses=1]
%2 = fsub float 0.000000e+00, undef ; <float> [#uses=2]
%3 = fmul float 0.000000e+00, undef ; <float> [#uses=1]
- %4 = load float* %0, align 4 ; <float> [#uses=3]
+ %4 = load float, float* %0, align 4 ; <float> [#uses=3]
%5 = fmul float %4, %2 ; <float> [#uses=1]
%6 = fsub float %3, %5 ; <float> [#uses=1]
%7 = fmul float %4, undef ; <float> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll b/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
index 3ff663124819..9eddcf71cb3b 100644
--- a/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
+++ b/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
@@ -19,22 +19,22 @@ bb2.i: ; preds = %bb
br label %bb3.i
bb3.i: ; preds = %bb2.i, %bb
- %0 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=0]
+ %0 = getelementptr inbounds %quuz, %quuz* %a, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=0]
%1 = fsub float 0.000000e+00, undef ; <float> [#uses=1]
- %2 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
- %3 = load float* %2, align 4 ; <float> [#uses=1]
- %4 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+ %2 = getelementptr inbounds %quuz, %quuz* %b, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
+ %3 = load float, float* %2, align 4 ; <float> [#uses=1]
+ %4 = getelementptr inbounds %quuz, %quuz* %a, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
%5 = fsub float %3, undef ; <float> [#uses=2]
- %6 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 2 ; <float*> [#uses=2]
- %7 = load float* %6, align 4 ; <float> [#uses=1]
+ %6 = getelementptr inbounds %quuz, %quuz* %b, i32 0, i32 1, i32 0, i32 2 ; <float*> [#uses=2]
+ %7 = load float, float* %6, align 4 ; <float> [#uses=1]
%8 = fsub float %7, undef ; <float> [#uses=1]
- %9 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=2]
- %10 = load float* %9, align 4 ; <float> [#uses=1]
+ %9 = getelementptr inbounds %quuz, %quuz* %c, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=2]
+ %10 = load float, float* %9, align 4 ; <float> [#uses=1]
%11 = fsub float %10, undef ; <float> [#uses=2]
- %12 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
- %13 = load float* %12, align 4 ; <float> [#uses=1]
+ %12 = getelementptr inbounds %quuz, %quuz* %c, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
+ %13 = load float, float* %12, align 4 ; <float> [#uses=1]
%14 = fsub float %13, undef ; <float> [#uses=1]
- %15 = load float* undef, align 4 ; <float> [#uses=1]
+ %15 = load float, float* undef, align 4 ; <float> [#uses=1]
%16 = fsub float %15, undef ; <float> [#uses=1]
%17 = fmul float %5, %16 ; <float> [#uses=1]
%18 = fsub float %17, 0.000000e+00 ; <float> [#uses=5]
@@ -44,19 +44,19 @@ bb3.i: ; preds = %bb2.i, %bb
%22 = fmul float %5, %11 ; <float> [#uses=1]
%23 = fsub float %21, %22 ; <float> [#uses=2]
store float %18, float* undef
- %24 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 1 ; <float*> [#uses=2]
+ %24 = getelementptr inbounds %bar, %bar* null, i32 0, i32 0, i32 0, i32 1 ; <float*> [#uses=2]
store float %20, float* %24
store float %23, float* undef
- %25 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 3 ; <float*> [#uses=0]
+ %25 = getelementptr inbounds %bar, %bar* null, i32 0, i32 0, i32 0, i32 3 ; <float*> [#uses=0]
%26 = fmul float %18, %18 ; <float> [#uses=1]
%27 = fadd float %26, undef ; <float> [#uses=1]
%28 = fadd float %27, undef ; <float> [#uses=1]
%29 = call arm_aapcs_vfpcc float @sqrtf(float %28) readnone ; <float> [#uses=1]
- %30 = load float* null, align 4 ; <float> [#uses=2]
- %31 = load float* %4, align 4 ; <float> [#uses=2]
- %32 = load float* %2, align 4 ; <float> [#uses=2]
- %33 = load float* null, align 4 ; <float> [#uses=3]
- %34 = load float* %6, align 4 ; <float> [#uses=2]
+ %30 = load float, float* null, align 4 ; <float> [#uses=2]
+ %31 = load float, float* %4, align 4 ; <float> [#uses=2]
+ %32 = load float, float* %2, align 4 ; <float> [#uses=2]
+ %33 = load float, float* null, align 4 ; <float> [#uses=3]
+ %34 = load float, float* %6, align 4 ; <float> [#uses=2]
%35 = fsub float %33, %34 ; <float> [#uses=2]
%36 = fmul float %20, %35 ; <float> [#uses=1]
%37 = fsub float %36, undef ; <float> [#uses=1]
@@ -71,12 +71,12 @@ bb3.i: ; preds = %bb2.i, %bb
%46 = fadd float %44, %45 ; <float> [#uses=1]
%47 = fmul float %33, %43 ; <float> [#uses=1]
%48 = fadd float %46, %47 ; <float> [#uses=2]
- %49 = load float* %9, align 4 ; <float> [#uses=2]
+ %49 = load float, float* %9, align 4 ; <float> [#uses=2]
%50 = fsub float %30, %49 ; <float> [#uses=1]
- %51 = load float* %12, align 4 ; <float> [#uses=3]
+ %51 = load float, float* %12, align 4 ; <float> [#uses=3]
%52 = fsub float %32, %51 ; <float> [#uses=2]
- %53 = load float* undef, align 4 ; <float> [#uses=2]
- %54 = load float* %24, align 4 ; <float> [#uses=2]
+ %53 = load float, float* undef, align 4 ; <float> [#uses=2]
+ %54 = load float, float* %24, align 4 ; <float> [#uses=2]
%55 = fmul float %54, undef ; <float> [#uses=1]
%56 = fmul float undef, %52 ; <float> [#uses=1]
%57 = fsub float %55, %56 ; <float> [#uses=1]
@@ -93,7 +93,7 @@ bb3.i: ; preds = %bb2.i, %bb
%68 = fsub float %51, %31 ; <float> [#uses=1]
%69 = fsub float %53, %33 ; <float> [#uses=1]
%70 = fmul float undef, %67 ; <float> [#uses=1]
- %71 = load float* undef, align 4 ; <float> [#uses=2]
+ %71 = load float, float* undef, align 4 ; <float> [#uses=2]
%72 = fmul float %71, %69 ; <float> [#uses=1]
%73 = fsub float %70, %72 ; <float> [#uses=1]
%74 = fmul float %71, %68 ; <float> [#uses=1]
diff --git a/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll b/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
index 832ff4fa0987..8a14804dcf85 100644
--- a/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
+++ b/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
@@ -11,7 +11,7 @@
define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quuz* %a, %quuz* %b, %quuz* %c, i8 zeroext %forced) {
entry:
- %0 = load %bar** undef, align 4 ; <%bar*> [#uses=2]
+ %0 = load %bar*, %bar** undef, align 4 ; <%bar*> [#uses=2]
br i1 false, label %bb85, label %bb
bb: ; preds = %entry
@@ -21,13 +21,13 @@ bb2.i: ; preds = %bb
br label %bb3.i
bb3.i: ; preds = %bb2.i, %bb
- %1 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=1]
+ %1 = getelementptr inbounds %quuz, %quuz* %a, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=1]
%2 = fsub float 0.000000e+00, undef ; <float> [#uses=1]
- %3 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
- %4 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 2 ; <float*> [#uses=1]
+ %3 = getelementptr inbounds %quuz, %quuz* %b, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+ %4 = getelementptr inbounds %quuz, %quuz* %b, i32 0, i32 1, i32 0, i32 2 ; <float*> [#uses=1]
%5 = fsub float 0.000000e+00, undef ; <float> [#uses=1]
- %6 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=1]
- %7 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+ %6 = getelementptr inbounds %quuz, %quuz* %c, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=1]
+ %7 = getelementptr inbounds %quuz, %quuz* %c, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
%8 = fsub float undef, undef ; <float> [#uses=1]
%9 = fmul float 0.000000e+00, %8 ; <float> [#uses=1]
%10 = fmul float %5, 0.000000e+00 ; <float> [#uses=1]
@@ -36,17 +36,17 @@ bb3.i: ; preds = %bb2.i, %bb
%13 = fmul float 0.000000e+00, undef ; <float> [#uses=1]
%14 = fsub float %12, %13 ; <float> [#uses=2]
store float %14, float* undef
- %15 = getelementptr inbounds %bar* %0, i32 0, i32 0, i32 0, i32 3 ; <float*> [#uses=1]
+ %15 = getelementptr inbounds %bar, %bar* %0, i32 0, i32 0, i32 0, i32 3 ; <float*> [#uses=1]
store float 0.000000e+00, float* %15
%16 = fmul float %11, %11 ; <float> [#uses=1]
%17 = fadd float %16, 0.000000e+00 ; <float> [#uses=1]
%18 = fadd float %17, undef ; <float> [#uses=1]
%19 = call arm_aapcs_vfpcc float @sqrtf(float %18) readnone ; <float> [#uses=2]
%20 = fcmp ogt float %19, 0x3F1A36E2E0000000 ; <i1> [#uses=1]
- %21 = load float* %1, align 4 ; <float> [#uses=2]
- %22 = load float* %3, align 4 ; <float> [#uses=2]
- %23 = load float* undef, align 4 ; <float> [#uses=2]
- %24 = load float* %4, align 4 ; <float> [#uses=2]
+ %21 = load float, float* %1, align 4 ; <float> [#uses=2]
+ %22 = load float, float* %3, align 4 ; <float> [#uses=2]
+ %23 = load float, float* undef, align 4 ; <float> [#uses=2]
+ %24 = load float, float* %4, align 4 ; <float> [#uses=2]
%25 = fsub float %23, %24 ; <float> [#uses=2]
%26 = fmul float 0.000000e+00, %25 ; <float> [#uses=1]
%27 = fsub float %26, undef ; <float> [#uses=1]
@@ -59,11 +59,11 @@ bb3.i: ; preds = %bb2.i, %bb
%34 = fadd float %32, %33 ; <float> [#uses=1]
%35 = fmul float %23, %31 ; <float> [#uses=1]
%36 = fadd float %34, %35 ; <float> [#uses=1]
- %37 = load float* %6, align 4 ; <float> [#uses=2]
- %38 = load float* %7, align 4 ; <float> [#uses=2]
+ %37 = load float, float* %6, align 4 ; <float> [#uses=2]
+ %38 = load float, float* %7, align 4 ; <float> [#uses=2]
%39 = fsub float %22, %38 ; <float> [#uses=2]
- %40 = load float* undef, align 4 ; <float> [#uses=1]
- %41 = load float* null, align 4 ; <float> [#uses=2]
+ %40 = load float, float* undef, align 4 ; <float> [#uses=1]
+ %41 = load float, float* null, align 4 ; <float> [#uses=2]
%42 = fmul float %41, undef ; <float> [#uses=1]
%43 = fmul float undef, %39 ; <float> [#uses=1]
%44 = fsub float %42, %43 ; <float> [#uses=1]
@@ -80,7 +80,7 @@ bb3.i: ; preds = %bb2.i, %bb
%55 = fmul float undef, undef ; <float> [#uses=1]
%56 = fsub float %54, %55 ; <float> [#uses=1]
%57 = fmul float undef, %53 ; <float> [#uses=1]
- %58 = load float* undef, align 4 ; <float> [#uses=2]
+ %58 = load float, float* undef, align 4 ; <float> [#uses=2]
%59 = fmul float %58, undef ; <float> [#uses=1]
%60 = fsub float %57, %59 ; <float> [#uses=1]
%61 = fmul float %58, undef ; <float> [#uses=1]
@@ -100,7 +100,7 @@ bb3.i: ; preds = %bb2.i, %bb
br i1 %72, label %bb4.i97, label %ccc.exit98
bb4.i97: ; preds = %bb3.i
- %73 = load %bar** undef, align 4 ; <%bar*> [#uses=0]
+ %73 = load %bar*, %bar** undef, align 4 ; <%bar*> [#uses=0]
br label %ccc.exit98
ccc.exit98: ; preds = %bb4.i97, %bb3.i
diff --git a/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll b/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll
index f89a5de77b3f..8cba9116481e 100644
--- a/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll
+++ b/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll
@@ -11,9 +11,9 @@ entry:
;CHECK: vtrn.16
%0 = shufflevector <8 x i16> %tmp.0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
%1 = shufflevector <8 x i16> %tmp.0, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
- %agg.result1218.0 = getelementptr %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 0, i32 0 ; <<8 x i16>*>
+ %agg.result1218.0 = getelementptr %struct.int16x8x2_t, %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 0, i32 0 ; <<8 x i16>*>
store <8 x i16> %0, <8 x i16>* %agg.result1218.0, align 16
- %agg.result12.1.0 = getelementptr %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 1, i32 0 ; <<8 x i16>*>
+ %agg.result12.1.0 = getelementptr %struct.int16x8x2_t, %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 1, i32 0 ; <<8 x i16>*>
store <8 x i16> %1, <8 x i16>* %agg.result12.1.0, align 16
ret void
}
@@ -25,9 +25,9 @@ define void @t2(%struct.int16x8x2_t* nocapture %ptr, <4 x i16> %a.0, <4 x i16> %
entry:
%0 = shufflevector <4 x i16> %a.0, <4 x i16> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef>
%1 = shufflevector <4 x i16> %a.0, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
- %ptr26.0 = getelementptr inbounds %struct.int16x8x2_t* %ptr, i32 0, i32 0, i32 0, i32 0
+ %ptr26.0 = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* %ptr, i32 0, i32 0, i32 0, i32 0
store <8 x i16> %0, <8 x i16>* %ptr26.0, align 16
- %ptr20.1.0 = getelementptr inbounds %struct.int16x8x2_t* %ptr, i32 0, i32 0, i32 1, i32 0
+ %ptr20.1.0 = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* %ptr, i32 0, i32 0, i32 1, i32 0
store <8 x i16> %1, <8 x i16>* %ptr20.1.0, align 16
ret void
}
diff --git a/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll b/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll
index f7adf73263ff..d21b488bb3a0 100644
--- a/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll
+++ b/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=arm-unknown-linux-gnueabi
define void @"java.lang.String::getChars"([84 x i8]* %method, i32 %base_pc, [788 x i8]* %thread) {
- %1 = load i32* undef ; <i32> [#uses=1]
+ %1 = load i32, i32* undef ; <i32> [#uses=1]
%2 = sub i32 %1, 48 ; <i32> [#uses=1]
br i1 undef, label %stack_overflow, label %no_overflow
@@ -10,13 +10,13 @@ stack_overflow: ; preds = %0
no_overflow: ; preds = %0
%frame = inttoptr i32 %2 to [17 x i32]* ; <[17 x i32]*> [#uses=4]
- %3 = load i32* undef ; <i32> [#uses=1]
- %4 = load i32* null ; <i32> [#uses=1]
- %5 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 13 ; <i32*> [#uses=1]
+ %3 = load i32, i32* undef ; <i32> [#uses=1]
+ %4 = load i32, i32* null ; <i32> [#uses=1]
+ %5 = getelementptr inbounds [17 x i32], [17 x i32]* %frame, i32 0, i32 13 ; <i32*> [#uses=1]
%6 = bitcast i32* %5 to [8 x i8]** ; <[8 x i8]**> [#uses=1]
- %7 = load [8 x i8]** %6 ; <[8 x i8]*> [#uses=1]
- %8 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 12 ; <i32*> [#uses=1]
- %9 = load i32* %8 ; <i32> [#uses=1]
+ %7 = load [8 x i8]*, [8 x i8]** %6 ; <[8 x i8]*> [#uses=1]
+ %8 = getelementptr inbounds [17 x i32], [17 x i32]* %frame, i32 0, i32 12 ; <i32*> [#uses=1]
+ %9 = load i32, i32* %8 ; <i32> [#uses=1]
br i1 undef, label %bci_13, label %bci_4
bci_13: ; preds = %no_overflow
@@ -27,18 +27,18 @@ bci_30: ; preds = %bci_13
bci_46: ; preds = %bci_30
%10 = sub i32 %4, %3 ; <i32> [#uses=1]
- %11 = load [8 x i8]** null ; <[8 x i8]*> [#uses=1]
+ %11 = load [8 x i8]*, [8 x i8]** null ; <[8 x i8]*> [#uses=1]
%callee = bitcast [8 x i8]* %11 to [84 x i8]* ; <[84 x i8]*> [#uses=1]
%12 = bitcast i8* undef to i32* ; <i32*> [#uses=1]
- %base_pc7 = load i32* %12 ; <i32> [#uses=2]
+ %base_pc7 = load i32, i32* %12 ; <i32> [#uses=2]
%13 = add i32 %base_pc7, 0 ; <i32> [#uses=1]
%14 = inttoptr i32 %13 to void ([84 x i8]*, i32, [788 x i8]*)** ; <void ([84 x i8]*, i32, [788 x i8]*)**> [#uses=1]
- %entry_point = load void ([84 x i8]*, i32, [788 x i8]*)** %14 ; <void ([84 x i8]*, i32, [788 x i8]*)*> [#uses=1]
- %15 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 1 ; <i32*> [#uses=1]
+ %entry_point = load void ([84 x i8]*, i32, [788 x i8]*)*, void ([84 x i8]*, i32, [788 x i8]*)** %14 ; <void ([84 x i8]*, i32, [788 x i8]*)*> [#uses=1]
+ %15 = getelementptr inbounds [17 x i32], [17 x i32]* %frame, i32 0, i32 1 ; <i32*> [#uses=1]
%16 = ptrtoint i32* %15 to i32 ; <i32> [#uses=1]
%stack_pointer_addr9 = bitcast i8* undef to i32* ; <i32*> [#uses=1]
store i32 %16, i32* %stack_pointer_addr9
- %17 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 2 ; <i32*> [#uses=1]
+ %17 = getelementptr inbounds [17 x i32], [17 x i32]* %frame, i32 0, i32 2 ; <i32*> [#uses=1]
store i32 %9, i32* %17
store i32 %10, i32* undef
store [84 x i8]* %method, [84 x i8]** undef
diff --git a/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll b/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll
index 5e75d460aa7f..a1923ec2c3e0 100644
--- a/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll
+++ b/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll
@@ -9,10 +9,10 @@ stack_overflow: ; preds = %0
no_overflow: ; preds = %0
%frame = inttoptr i32 %1 to [17 x i32]* ; <[17 x i32]*> [#uses=4]
- %2 = load i32* null ; <i32> [#uses=2]
- %3 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 14 ; <i32*> [#uses=1]
- %4 = load i32* %3 ; <i32> [#uses=2]
- %5 = load [8 x i8]** undef ; <[8 x i8]*> [#uses=2]
+ %2 = load i32, i32* null ; <i32> [#uses=2]
+ %3 = getelementptr inbounds [17 x i32], [17 x i32]* %frame, i32 0, i32 14 ; <i32*> [#uses=1]
+ %4 = load i32, i32* %3 ; <i32> [#uses=2]
+ %5 = load [8 x i8]*, [8 x i8]** undef ; <[8 x i8]*> [#uses=2]
br i1 undef, label %bci_13, label %bci_4
bci_13: ; preds = %no_overflow
@@ -33,11 +33,11 @@ no_exception: ; preds = %bci_46
ret void
bci_35: ; preds = %bci_30
- %7 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 15 ; <i32*> [#uses=1]
+ %7 = getelementptr inbounds [17 x i32], [17 x i32]* %frame, i32 0, i32 15 ; <i32*> [#uses=1]
store i32 %2, i32* %7
- %8 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 14 ; <i32*> [#uses=1]
+ %8 = getelementptr inbounds [17 x i32], [17 x i32]* %frame, i32 0, i32 14 ; <i32*> [#uses=1]
store i32 %4, i32* %8
- %9 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 13 ; <i32*> [#uses=1]
+ %9 = getelementptr inbounds [17 x i32], [17 x i32]* %frame, i32 0, i32 13 ; <i32*> [#uses=1]
%10 = bitcast i32* %9 to [8 x i8]** ; <[8 x i8]**> [#uses=1]
store [8 x i8]* %5, [8 x i8]** %10
call void inttoptr (i32 13839116 to void ([788 x i8]*, i32)*)([788 x i8]* %thread, i32 7)
diff --git a/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll b/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
index 6f7db9352188..638b26c73146 100644
--- a/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
+++ b/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
@@ -5,7 +5,7 @@ target triple = "armv4t-apple-darwin10"
define hidden i32 @__addvsi3(i32 %a, i32 %b) nounwind {
entry:
- tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !0, metadata !{!"0x102"})
+ tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !1)
%0 = add nsw i32 %b, %a, !dbg !9 ; <i32> [#uses=1]
ret i32 %0, !dbg !11
}
@@ -14,19 +14,19 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!15}
-!0 = !{!"0x101\00b\0093\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
-!1 = !{!"0x2e\00__addvsi3\00__addvsi3\00__addvsi3\0094\000\001\000\006\000\000\000", !12, null, !4, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!2 = !{!"0x29", !12} ; [ DW_TAG_file_type ]
-!12 = !{!"libgcc2.c", !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc"}
-!3 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)\001\00\000\00\000", !12, !13, !13, !14, null, null} ; [ DW_TAG_compile_unit ]
-!4 = !{!"0x15\00\000\000\000\000\000\000", !12, !2, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 93, arg: 0, scope: !1, file: !2, type: !6)
+!1 = !DISubprogram(name: "__addvsi3", linkageName: "__addvsi3", line: 94, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: null, type: !4)
+!2 = !DIFile(filename: "libgcc2.c", directory: "/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc")
+!12 = !DIFile(filename: "libgcc2.c", directory: "/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc")
+!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", isOptimized: true, emissionKind: 0, file: !12, enums: !13, retainedTypes: !13, subprograms: !14)
+!4 = !DISubroutineType(types: !5)
!5 = !{!6, !6, !6}
-!6 = !{!"0x16\00SItype\00152\000\000\000\000", !12, null, !8} ; [ DW_TAG_typedef ]
-!7 = !{!"0x29", !"libgcc2.h", !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc", !3} ; [ DW_TAG_file_type ]
-!8 = !{!"0x24\00int\000\0032\0032\000\000\005", !12, !2} ; [ DW_TAG_base_type ]
-!9 = !MDLocation(line: 95, scope: !10)
-!10 = !{!"0xb\0094\000\000", !12, !1} ; [ DW_TAG_lexical_block ]
-!11 = !MDLocation(line: 100, scope: !10)
-!13 = !{i32 0}
+!6 = !DIDerivedType(tag: DW_TAG_typedef, name: "SItype", line: 152, file: !12, baseType: !8)
+!7 = !DIFile(filename: "libgcc2.h", directory: "/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc")
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !DILocation(line: 95, scope: !10)
+!10 = distinct !DILexicalBlock(line: 94, column: 0, file: !12, scope: !1)
+!11 = !DILocation(line: 100, scope: !10)
+!13 = !{}
!14 = !{!1}
-!15 = !{i32 1, !"Debug Info Version", i32 2}
+!15 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll b/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
index 7d4cc6e3a75a..5f5489a1ca74 100644
--- a/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
+++ b/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
@@ -82,17 +82,17 @@ cond_true1369.preheader: ; preds = %cond_true1254
ret void
bb1567: ; preds = %cond_true1254
- %tmp1591 = load i64* getelementptr inbounds (%struct.CHESS_POSITION* @search, i32 0, i32 4) ; <i64> [#uses=1]
+ %tmp1591 = load i64, i64* getelementptr inbounds (%struct.CHESS_POSITION, %struct.CHESS_POSITION* @search, i32 0, i32 4) ; <i64> [#uses=1]
%tmp1572 = tail call fastcc i32 @FirstOne() ; <i32> [#uses=1]
- %tmp1594 = load i32* undef ; <i32> [#uses=1]
+ %tmp1594 = load i32, i32* undef ; <i32> [#uses=1]
%tmp1594.upgrd.5 = trunc i32 %tmp1594 to i8 ; <i8> [#uses=1]
%shift.upgrd.6 = zext i8 %tmp1594.upgrd.5 to i64 ; <i64> [#uses=1]
%tmp1595 = lshr i64 %tmp1591, %shift.upgrd.6 ; <i64> [#uses=1]
%tmp1595.upgrd.7 = trunc i64 %tmp1595 to i32 ; <i32> [#uses=1]
%tmp1596 = and i32 %tmp1595.upgrd.7, 255 ; <i32> [#uses=1]
%gep.upgrd.8 = zext i32 %tmp1596 to i64 ; <i64> [#uses=1]
- %tmp1598 = getelementptr [64 x [256 x i32]]* @bishop_mobility_rr45, i32 0, i32 %tmp1572, i64 %gep.upgrd.8 ; <i32*> [#uses=1]
- %tmp1599 = load i32* %tmp1598 ; <i32> [#uses=1]
+ %tmp1598 = getelementptr [64 x [256 x i32]], [64 x [256 x i32]]* @bishop_mobility_rr45, i32 0, i32 %tmp1572, i64 %gep.upgrd.8 ; <i32*> [#uses=1]
+ %tmp1599 = load i32, i32* %tmp1598 ; <i32> [#uses=1]
%tmp1602 = sub i32 0, %tmp1599 ; <i32> [#uses=1]
br i1 undef, label %cond_next1637, label %cond_true1607
diff --git a/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll b/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll
index 946164321a2c..deb588403265 100644
--- a/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll
+++ b/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll
@@ -16,15 +16,15 @@ entry:
%vla10 = alloca i8, i32 undef, align 1 ; <i8*> [#uses=1]
%vla14 = alloca i8, i32 undef, align 1 ; <i8*> [#uses=1]
%vla18 = alloca i8, i32 undef, align 1 ; <i8*> [#uses=1]
- %tmp21 = load i32* undef ; <i32> [#uses=1]
+ %tmp21 = load i32, i32* undef ; <i32> [#uses=1]
%0 = mul i32 1, %tmp21 ; <i32> [#uses=1]
%vla22 = alloca i8, i32 %0, align 1 ; <i8*> [#uses=1]
- call void (...)* @zz(i8* getelementptr inbounds ([1 x i8]* @.str, i32 0, i32 0), i32 2, i32 1)
+ call void (...) @zz(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i32 0, i32 0), i32 2, i32 1)
br i1 undef, label %if.then, label %if.end36
if.then: ; preds = %entry
- %call = call i32 (...)* @x(%struct.q* undef, i8* undef, i8* %vla6, i8* %vla10, i32 undef) ; <i32> [#uses=0]
- %call35 = call i32 (...)* @x(%struct.q* undef, i8* %vla14, i8* %vla18, i8* %vla22, i32 undef) ; <i32> [#uses=0]
+ %call = call i32 (...) @x(%struct.q* undef, i8* undef, i8* %vla6, i8* %vla10, i32 undef) ; <i32> [#uses=0]
+ %call35 = call i32 (...) @x(%struct.q* undef, i8* %vla14, i8* %vla18, i8* %vla22, i32 undef) ; <i32> [#uses=0]
unreachable
if.end36: ; preds = %entry
diff --git a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
index b040b2d91cd6..24469cc3717e 100644
--- a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
+++ b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
@@ -16,12 +16,12 @@ entry:
; THUMB-DAG: movs [[VAL:r[0-9]+]], #0
; THUMB-NOT: str {{[a-z0-9]+}}, [{{[a-z0-9]+}}], {{[a-z0-9]+}}
; THUMB: str [[VAL]], [r[[ADDR]]]
- %0 = getelementptr inbounds %struct.foo* %this, i32 0, i32 1 ; <i64*> [#uses=1]
+ %0 = getelementptr inbounds %struct.foo, %struct.foo* %this, i32 0, i32 1 ; <i64*> [#uses=1]
store i32 0, i32* inttoptr (i32 8 to i32*), align 8
br i1 %tst, label %bb.nph96, label %bb3
bb3: ; preds = %entry
- %1 = load i64* %0, align 4 ; <i64> [#uses=0]
+ %1 = load i64, i64* %0, align 4 ; <i64> [#uses=0]
ret i8 42
bb.nph96: ; preds = %entry
diff --git a/test/CodeGen/ARM/2010-05-19-Shuffles.ll b/test/CodeGen/ARM/2010-05-19-Shuffles.ll
index 587c0afcb714..94d0f4abfb7e 100644
--- a/test/CodeGen/ARM/2010-05-19-Shuffles.ll
+++ b/test/CodeGen/ARM/2010-05-19-Shuffles.ll
@@ -14,7 +14,7 @@ define <8 x i8> @f2(<8 x i8> %x) nounwind {
}
define void @f3(<4 x i64>* %xp) nounwind {
- %x = load <4 x i64>* %xp
+ %x = load <4 x i64>, <4 x i64>* %xp
%y = shufflevector <4 x i64> %x, <4 x i64> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
store <4 x i64> %y, <4 x i64>* %xp
ret void
diff --git a/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/test/CodeGen/ARM/2010-05-21-BuildVector.ll
index 5bc08b037a1c..6a6ccf3d0a01 100644
--- a/test/CodeGen/ARM/2010-05-21-BuildVector.ll
+++ b/test/CodeGen/ARM/2010-05-21-BuildVector.ll
@@ -3,36 +3,36 @@
define void @test(float* %fltp, i32 %packedValue, float* %table) nounwind {
entry:
- %0 = load float* %fltp
+ %0 = load float, float* %fltp
%1 = insertelement <4 x float> undef, float %0, i32 0
%2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
%3 = shl i32 %packedValue, 16
%4 = ashr i32 %3, 30
%.sum = add i32 %4, 4
- %5 = getelementptr inbounds float* %table, i32 %.sum
+ %5 = getelementptr inbounds float, float* %table, i32 %.sum
;CHECK: vldr s
- %6 = load float* %5, align 4
+ %6 = load float, float* %5, align 4
%tmp11 = insertelement <4 x float> undef, float %6, i32 0
%7 = shl i32 %packedValue, 18
%8 = ashr i32 %7, 30
%.sum12 = add i32 %8, 4
- %9 = getelementptr inbounds float* %table, i32 %.sum12
+ %9 = getelementptr inbounds float, float* %table, i32 %.sum12
;CHECK: vldr s
- %10 = load float* %9, align 4
+ %10 = load float, float* %9, align 4
%tmp9 = insertelement <4 x float> %tmp11, float %10, i32 1
%11 = shl i32 %packedValue, 20
%12 = ashr i32 %11, 30
%.sum13 = add i32 %12, 4
- %13 = getelementptr inbounds float* %table, i32 %.sum13
+ %13 = getelementptr inbounds float, float* %table, i32 %.sum13
;CHECK: vldr s
- %14 = load float* %13, align 4
+ %14 = load float, float* %13, align 4
%tmp7 = insertelement <4 x float> %tmp9, float %14, i32 2
%15 = shl i32 %packedValue, 22
%16 = ashr i32 %15, 30
%.sum14 = add i32 %16, 4
- %17 = getelementptr inbounds float* %table, i32 %.sum14
+ %17 = getelementptr inbounds float, float* %table, i32 %.sum14
;CHECK: vldr s
- %18 = load float* %17, align 4
+ %18 = load float, float* %17, align 4
%tmp5 = insertelement <4 x float> %tmp7, float %18, i32 3
%19 = fmul <4 x float> %tmp5, %2
%20 = bitcast float* %fltp to i8*
diff --git a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
index f7ceb6e7e480..f86c3ba9ef6e 100644
--- a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
+++ b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
@@ -6,7 +6,7 @@
define void @foo(%struct.__int8x8x2_t* nocapture %a, i8* %b) nounwind {
entry:
%0 = bitcast %struct.__int8x8x2_t* %a to i128* ; <i128*> [#uses=1]
- %srcval = load i128* %0, align 8 ; <i128> [#uses=2]
+ %srcval = load i128, i128* %0, align 8 ; <i128> [#uses=2]
%tmp6 = trunc i128 %srcval to i64 ; <i64> [#uses=1]
%tmp8 = lshr i128 %srcval, 64 ; <i128> [#uses=1]
%tmp9 = trunc i128 %tmp8 to i64 ; <i64> [#uses=1]
diff --git a/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll b/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll
index 816a6d4f4b93..6f55ac058054 100644
--- a/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll
+++ b/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll
@@ -13,15 +13,15 @@
define void @TW_oldinput(%struct.FILE* nocapture %fp) nounwind {
entry:
%xcenter = alloca i32, align 4 ; <i32*> [#uses=2]
- %0 = call i32 (%struct.FILE*, i8*, ...)* @fscanf(%struct.FILE* %fp, i8* getelementptr inbounds ([14 x i8]* @.str2708, i32 0, i32 0), i32* undef, i32* undef, i32* %xcenter, i32* null) nounwind ; <i32> [#uses=1]
+ %0 = call i32 (%struct.FILE*, i8*, ...) @fscanf(%struct.FILE* %fp, i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str2708, i32 0, i32 0), i32* undef, i32* undef, i32* %xcenter, i32* null) nounwind ; <i32> [#uses=1]
%1 = icmp eq i32 %0, 4 ; <i1> [#uses=1]
br i1 %1, label %bb, label %return
bb: ; preds = %bb445, %entry
- %2 = load %struct.cellbox** undef, align 4 ; <%struct.cellbox*> [#uses=2]
- %3 = getelementptr inbounds %struct.cellbox* %2, i32 0, i32 3 ; <i32*> [#uses=1]
+ %2 = load %struct.cellbox*, %struct.cellbox** undef, align 4 ; <%struct.cellbox*> [#uses=2]
+ %3 = getelementptr inbounds %struct.cellbox, %struct.cellbox* %2, i32 0, i32 3 ; <i32*> [#uses=1]
store i32 undef, i32* %3, align 4
- %4 = load i32* undef, align 4 ; <i32> [#uses=3]
+ %4 = load i32, i32* undef, align 4 ; <i32> [#uses=3]
%5 = icmp eq i32 undef, 1 ; <i1> [#uses=1]
br i1 %5, label %bb10, label %bb445
@@ -29,12 +29,12 @@ bb10: ; preds = %bb
br i1 undef, label %bb11, label %bb445
bb11: ; preds = %bb10
- %6 = load %struct.tilebox** undef, align 4 ; <%struct.tilebox*> [#uses=3]
- %7 = load %struct.termbox** null, align 4 ; <%struct.termbox*> [#uses=1]
- %8 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 13 ; <i32*> [#uses=1]
- %9 = load i32* %8, align 4 ; <i32> [#uses=3]
- %10 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 15 ; <i32*> [#uses=1]
- %11 = load i32* %10, align 4 ; <i32> [#uses=1]
+ %6 = load %struct.tilebox*, %struct.tilebox** undef, align 4 ; <%struct.tilebox*> [#uses=3]
+ %7 = load %struct.termbox*, %struct.termbox** null, align 4 ; <%struct.termbox*> [#uses=1]
+ %8 = getelementptr inbounds %struct.tilebox, %struct.tilebox* %6, i32 0, i32 13 ; <i32*> [#uses=1]
+ %9 = load i32, i32* %8, align 4 ; <i32> [#uses=3]
+ %10 = getelementptr inbounds %struct.tilebox, %struct.tilebox* %6, i32 0, i32 15 ; <i32*> [#uses=1]
+ %11 = load i32, i32* %10, align 4 ; <i32> [#uses=1]
br i1 false, label %bb12, label %bb13
bb12: ; preds = %bb11
@@ -67,7 +67,7 @@ bb21: ; preds = %bb13
%25 = zext i1 %not.461 to i32 ; <i32> [#uses=1]
%iftmp.43.0 = add i32 %23, %iftmp.41.0.neg ; <i32> [#uses=1]
%26 = add i32 %iftmp.43.0, %25 ; <i32> [#uses=1]
- %27 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 10 ; <i32*> [#uses=1]
+ %27 = getelementptr inbounds %struct.tilebox, %struct.tilebox* %6, i32 0, i32 10 ; <i32*> [#uses=1]
store i32 %26, i32* %27, align 4
%28 = fptosi double undef to i32 ; <i32> [#uses=1]
%iftmp.45.0 = add i32 %28, %iftmp.40.0.neg ; <i32> [#uses=1]
@@ -77,7 +77,7 @@ bb21: ; preds = %bb13
bb36: ; preds = %bb43.loopexit, %bb36
%termptr.0478 = phi %struct.termbox* [ %42, %bb36 ], [ %7, %bb43.loopexit ] ; <%struct.termbox*> [#uses=1]
- %30 = load i32* undef, align 4 ; <i32> [#uses=1]
+ %30 = load i32, i32* undef, align 4 ; <i32> [#uses=1]
%31 = sub nsw i32 %30, %9 ; <i32> [#uses=1]
%32 = sitofp i32 %31 to double ; <double> [#uses=1]
%33 = fdiv double %32, 0.000000e+00 ; <double> [#uses=1]
@@ -92,8 +92,8 @@ bb36: ; preds = %bb43.loopexit, %bb3
%iftmp.47.0 = add i32 %39, %iftmp.40.0.neg ; <i32> [#uses=1]
%40 = add i32 %iftmp.47.0, 0 ; <i32> [#uses=1]
store i32 %40, i32* undef, align 4
- %41 = getelementptr inbounds %struct.termbox* %termptr.0478, i32 0, i32 0 ; <%struct.termbox**> [#uses=1]
- %42 = load %struct.termbox** %41, align 4 ; <%struct.termbox*> [#uses=2]
+ %41 = getelementptr inbounds %struct.termbox, %struct.termbox* %termptr.0478, i32 0, i32 0 ; <%struct.termbox**> [#uses=1]
+ %42 = load %struct.termbox*, %struct.termbox** %41, align 4 ; <%struct.termbox*> [#uses=2]
%43 = icmp eq %struct.termbox* %42, null ; <i1> [#uses=1]
br i1 %43, label %bb52.loopexit, label %bb36
@@ -127,9 +127,9 @@ bb248: ; preds = %bb322, %bb.nph485
br i1 %45, label %bb322, label %bb249
bb249: ; preds = %bb248
- %46 = getelementptr inbounds %struct.cellbox* %2, i32 0, i32 21, i32 undef ; <%struct.tilebox**> [#uses=1]
- %47 = load %struct.tilebox** %46, align 4 ; <%struct.tilebox*> [#uses=1]
- %48 = getelementptr inbounds %struct.tilebox* %47, i32 0, i32 11 ; <i32*> [#uses=1]
+ %46 = getelementptr inbounds %struct.cellbox, %struct.cellbox* %2, i32 0, i32 21, i32 undef ; <%struct.tilebox**> [#uses=1]
+ %47 = load %struct.tilebox*, %struct.tilebox** %46, align 4 ; <%struct.tilebox*> [#uses=1]
+ %48 = getelementptr inbounds %struct.tilebox, %struct.tilebox* %47, i32 0, i32 11 ; <i32*> [#uses=1]
store i32 undef, i32* %48, align 4
unreachable
@@ -137,7 +137,7 @@ bb322: ; preds = %bb248
br i1 undef, label %bb248, label %bb445
bb445: ; preds = %bb322, %bb10, %bb
- %49 = call i32 (%struct.FILE*, i8*, ...)* @fscanf(%struct.FILE* %fp, i8* getelementptr inbounds ([14 x i8]* @.str2708, i32 0, i32 0), i32* undef, i32* undef, i32* %xcenter, i32* null) nounwind ; <i32> [#uses=1]
+ %49 = call i32 (%struct.FILE*, i8*, ...) @fscanf(%struct.FILE* %fp, i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str2708, i32 0, i32 0), i32* undef, i32* undef, i32* %xcenter, i32* null) nounwind ; <i32> [#uses=1]
%50 = icmp eq i32 %49, 4 ; <i1> [#uses=1]
br i1 %50, label %bb, label %return
diff --git a/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll b/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
index ac8e80904eda..bead8d9781e8 100755
--- a/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
+++ b/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
@@ -33,9 +33,9 @@ define %"struct.llvm::Init"* @_ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9
;CHECK: bx r12 @ TAILCALL
entry:
%.loc = alloca i32 ; <i32*> [#uses=2]
- %tmp.i = getelementptr inbounds %"class.llvm::StringInit"* %this, i32 0, i32 0, i32 4 ; <i8*> [#uses=1]
+ %tmp.i = getelementptr inbounds %"class.llvm::StringInit", %"class.llvm::StringInit"* %this, i32 0, i32 0, i32 4 ; <i8*> [#uses=1]
%0 = bitcast i8* %tmp.i to %"struct.llvm::Init"** ; <%"struct.llvm::Init"**> [#uses=1]
- %tmp2.i = load %"struct.llvm::Init"** %0 ; <%"struct.llvm::Init"*> [#uses=2]
+ %tmp2.i = load %"struct.llvm::Init"*, %"struct.llvm::Init"** %0 ; <%"struct.llvm::Init"*> [#uses=2]
%1 = icmp eq %"struct.llvm::Init"* %tmp2.i, null ; <i1> [#uses=1]
br i1 %1, label %entry.return_crit_edge, label %tmpbb
@@ -52,17 +52,17 @@ tmpbb: ; preds = %entry
br label %return
if.then: ; preds = %tmpbb
- %tmp2.i.i.i.i = getelementptr inbounds %"class.llvm::StringInit"* %this, i32 0, i32 1, i32 0, i32 0 ; <i8**> [#uses=1]
- %tmp3.i.i.i.i = load i8** %tmp2.i.i.i.i ; <i8*> [#uses=2]
- %arrayidx.i.i.i.i = getelementptr inbounds i8* %tmp3.i.i.i.i, i32 -12 ; <i8*> [#uses=1]
+ %tmp2.i.i.i.i = getelementptr inbounds %"class.llvm::StringInit", %"class.llvm::StringInit"* %this, i32 0, i32 1, i32 0, i32 0 ; <i8**> [#uses=1]
+ %tmp3.i.i.i.i = load i8*, i8** %tmp2.i.i.i.i ; <i8*> [#uses=2]
+ %arrayidx.i.i.i.i = getelementptr inbounds i8, i8* %tmp3.i.i.i.i, i32 -12 ; <i8*> [#uses=1]
%tmp.i.i.i = bitcast i8* %arrayidx.i.i.i.i to i32* ; <i32*> [#uses=1]
- %tmp2.i.i.i = load i32* %tmp.i.i.i ; <i32> [#uses=1]
- %tmp.i5 = getelementptr inbounds %"class.llvm::Record"* %R, i32 0, i32 4 ; <%"class.std::vector"*> [#uses=1]
- %tmp2.i.i = getelementptr inbounds %"class.llvm::Record"* %R, i32 0, i32 4, i32 0, i32 4 ; <i8*> [#uses=1]
+ %tmp2.i.i.i = load i32, i32* %tmp.i.i.i ; <i32> [#uses=1]
+ %tmp.i5 = getelementptr inbounds %"class.llvm::Record", %"class.llvm::Record"* %R, i32 0, i32 4 ; <%"class.std::vector"*> [#uses=1]
+ %tmp2.i.i = getelementptr inbounds %"class.llvm::Record", %"class.llvm::Record"* %R, i32 0, i32 4, i32 0, i32 4 ; <i8*> [#uses=1]
%4 = bitcast i8* %tmp2.i.i to %"class.llvm::RecordVal"** ; <%"class.llvm::RecordVal"**> [#uses=1]
- %tmp3.i.i6 = load %"class.llvm::RecordVal"** %4 ; <%"class.llvm::RecordVal"*> [#uses=1]
+ %tmp3.i.i6 = load %"class.llvm::RecordVal"*, %"class.llvm::RecordVal"** %4 ; <%"class.llvm::RecordVal"*> [#uses=1]
%tmp5.i.i = bitcast %"class.std::vector"* %tmp.i5 to %"class.llvm::RecordVal"** ; <%"class.llvm::RecordVal"**> [#uses=1]
- %tmp6.i.i = load %"class.llvm::RecordVal"** %tmp5.i.i ; <%"class.llvm::RecordVal"*> [#uses=5]
+ %tmp6.i.i = load %"class.llvm::RecordVal"*, %"class.llvm::RecordVal"** %tmp5.i.i ; <%"class.llvm::RecordVal"*> [#uses=5]
%sub.ptr.lhs.cast.i.i = ptrtoint %"class.llvm::RecordVal"* %tmp3.i.i6 to i32 ; <i32> [#uses=1]
%sub.ptr.rhs.cast.i.i = ptrtoint %"class.llvm::RecordVal"* %tmp6.i.i to i32 ; <i32> [#uses=1]
%sub.ptr.sub.i.i = sub i32 %sub.ptr.lhs.cast.i.i, %sub.ptr.rhs.cast.i.i ; <i32> [#uses=1]
@@ -71,14 +71,14 @@ if.then: ; preds = %tmpbb
codeRepl: ; preds = %if.then
%targetBlock = call i1 @_ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs_for.cond.i(i32 %sub.ptr.div.i.i, %"class.llvm::RecordVal"* %tmp6.i.i, i32 %tmp2.i.i.i, i8* %tmp3.i.i.i.i, i32* %.loc) ; <i1> [#uses=1]
- %.reload = load i32* %.loc ; <i32> [#uses=3]
+ %.reload = load i32, i32* %.loc ; <i32> [#uses=3]
br i1 %targetBlock, label %for.cond.i.return_crit_edge, label %_ZN4llvm6Record8getValueENS_9StringRefE.exit
for.cond.i.return_crit_edge: ; preds = %codeRepl
br label %return
_ZN4llvm6Record8getValueENS_9StringRefE.exit: ; preds = %codeRepl
- %add.ptr.i.i = getelementptr inbounds %"class.llvm::RecordVal"* %tmp6.i.i, i32 %.reload ; <%"class.llvm::RecordVal"*> [#uses=2]
+ %add.ptr.i.i = getelementptr inbounds %"class.llvm::RecordVal", %"class.llvm::RecordVal"* %tmp6.i.i, i32 %.reload ; <%"class.llvm::RecordVal"*> [#uses=2]
%tobool5 = icmp eq %"class.llvm::RecordVal"* %add.ptr.i.i, null ; <i1> [#uses=1]
br i1 %tobool5, label %_ZN4llvm6Record8getValueENS_9StringRefE.exit.return_crit_edge, label %if.then6
@@ -100,8 +100,8 @@ land.lhs.true.return_crit_edge: ; preds = %land.lhs.true
br label %return
lor.lhs.false: ; preds = %land.lhs.true
- %tmp.i3 = getelementptr inbounds %"class.llvm::RecordVal"* %tmp6.i.i, i32 %.reload, i32 3 ; <%"struct.llvm::Init"**> [#uses=1]
- %tmp2.i4 = load %"struct.llvm::Init"** %tmp.i3 ; <%"struct.llvm::Init"*> [#uses=2]
+ %tmp.i3 = getelementptr inbounds %"class.llvm::RecordVal", %"class.llvm::RecordVal"* %tmp6.i.i, i32 %.reload, i32 3 ; <%"struct.llvm::Init"**> [#uses=1]
+ %tmp2.i4 = load %"struct.llvm::Init"*, %"struct.llvm::Init"** %tmp.i3 ; <%"struct.llvm::Init"*> [#uses=2]
%5 = icmp eq %"struct.llvm::Init"* %tmp2.i4, null ; <i1> [#uses=1]
br i1 %5, label %lor.lhs.false.if.end_crit_edge, label %tmpbb1
@@ -121,21 +121,21 @@ tmpbb1: ; preds = %lor.lhs.false
br label %if.end
if.end: ; preds = %.if.end_crit_edge, %lor.lhs.false.if.end_crit_edge, %if.then6.if.end_crit_edge
- %tmp.i1 = getelementptr inbounds %"class.llvm::RecordVal"* %tmp6.i.i, i32 %.reload, i32 3 ; <%"struct.llvm::Init"**> [#uses=1]
- %tmp2.i2 = load %"struct.llvm::Init"** %tmp.i1 ; <%"struct.llvm::Init"*> [#uses=3]
+ %tmp.i1 = getelementptr inbounds %"class.llvm::RecordVal", %"class.llvm::RecordVal"* %tmp6.i.i, i32 %.reload, i32 3 ; <%"struct.llvm::Init"**> [#uses=1]
+ %tmp2.i2 = load %"struct.llvm::Init"*, %"struct.llvm::Init"** %tmp.i1 ; <%"struct.llvm::Init"*> [#uses=3]
%8 = bitcast %"class.llvm::StringInit"* %this to %"struct.llvm::Init"* ; <%"struct.llvm::Init"*> [#uses=1]
%cmp19 = icmp eq %"struct.llvm::Init"* %tmp2.i2, %8 ; <i1> [#uses=1]
br i1 %cmp19, label %cond.false, label %cond.end
cond.false: ; preds = %if.end
- tail call void @__assert_fail(i8* getelementptr inbounds ([45 x i8]* @.str51, i32 0, i32 0), i8* getelementptr inbounds ([47 x i8]* @.str8, i32 0, i32 0), i32 1141, i8* getelementptr inbounds ([116 x i8]* @__PRETTY_FUNCTION__._ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs, i32 0, i32 0)) noreturn
+ tail call void @__assert_fail(i8* getelementptr inbounds ([45 x i8], [45 x i8]* @.str51, i32 0, i32 0), i8* getelementptr inbounds ([47 x i8], [47 x i8]* @.str8, i32 0, i32 0), i32 1141, i8* getelementptr inbounds ([116 x i8], [116 x i8]* @__PRETTY_FUNCTION__._ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs, i32 0, i32 0)) noreturn
unreachable
cond.end: ; preds = %if.end
%9 = bitcast %"struct.llvm::Init"* %tmp2.i2 to %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*** ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)***> [#uses=1]
- %10 = load %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*** %9 ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)**> [#uses=1]
- %vfn = getelementptr inbounds %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)** %10, i32 8 ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)**> [#uses=1]
- %11 = load %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)** %vfn ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*> [#uses=1]
+ %10 = load %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)**, %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*** %9 ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)**> [#uses=1]
+ %vfn = getelementptr inbounds %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*, %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)** %10, i32 8 ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)**> [#uses=1]
+ %11 = load %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*, %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)** %vfn ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*> [#uses=1]
%call25 = tail call %"struct.llvm::Init"* %11(%"struct.llvm::Init"* %tmp2.i2, %"class.llvm::Record"* %R, %"class.llvm::RecordVal"* %RV, %"class.std::basic_string"* %FieldName) ; <%"struct.llvm::Init"*> [#uses=1]
ret %"struct.llvm::Init"* %call25
diff --git a/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll b/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
index 18b3be0aba5c..95bb2769759e 100644
--- a/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
+++ b/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
@@ -7,21 +7,21 @@ target triple = "thumbv7-apple-darwin3.0.0-iphoneos"
define void @x0(i8* nocapture %buf, i32 %nbytes) nounwind optsize {
entry:
- tail call void @llvm.dbg.value(metadata i8* %buf, i64 0, metadata !0, metadata !{!"0x102"}), !dbg !15
- tail call void @llvm.dbg.value(metadata i32 %nbytes, i64 0, metadata !8, metadata !{!"0x102"}), !dbg !16
- %tmp = load i32* @length, !dbg !17 ; <i32> [#uses=3]
+ tail call void @llvm.dbg.value(metadata i8* %buf, i64 0, metadata !0, metadata !DIExpression()), !dbg !15
+ tail call void @llvm.dbg.value(metadata i32 %nbytes, i64 0, metadata !8, metadata !DIExpression()), !dbg !16
+ %tmp = load i32, i32* @length, !dbg !17 ; <i32> [#uses=3]
%cmp = icmp eq i32 %tmp, -1, !dbg !17 ; <i1> [#uses=1]
%cmp.not = xor i1 %cmp, true ; <i1> [#uses=1]
%cmp3 = icmp ult i32 %tmp, %nbytes, !dbg !17 ; <i1> [#uses=1]
%or.cond = and i1 %cmp.not, %cmp3 ; <i1> [#uses=1]
- tail call void @llvm.dbg.value(metadata i32 %tmp, i64 0, metadata !8, metadata !{!"0x102"}), !dbg !17
+ tail call void @llvm.dbg.value(metadata i32 %tmp, i64 0, metadata !8, metadata !DIExpression()), !dbg !17
%nbytes.addr.0 = select i1 %or.cond, i32 %tmp, i32 %nbytes ; <i32> [#uses=1]
- tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !19
+ tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !10, metadata !DIExpression()), !dbg !19
br label %while.cond, !dbg !20
while.cond: ; preds = %while.body, %entry
%0 = phi i32 [ 0, %entry ], [ %inc, %while.body ] ; <i32> [#uses=3]
- %buf.addr.0 = getelementptr i8* %buf, i32 %0 ; <i8*> [#uses=1]
+ %buf.addr.0 = getelementptr i8, i8* %buf, i32 %0 ; <i8*> [#uses=1]
%cmp7 = icmp ult i32 %0, %nbytes.addr.0, !dbg !20 ; <i1> [#uses=1]
br i1 %cmp7, label %land.rhs, label %while.end, !dbg !20
@@ -47,30 +47,30 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.lv.fn = !{!0, !8, !10, !12}
!llvm.dbg.gv = !{!14}
-!0 = !{!"0x101\00buf\004\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
-!1 = !{!"0x2e\00x0\00x0\00x0\005\000\001\000\006\000\000\000", !26, null, !4, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!2 = !{!"0x29", !26} ; [ DW_TAG_file_type ]
-!3 = !{!"0x11\0012\00clang 2.0\001\00\00\00\00", !26, null, null, null, null, null} ; [ DW_TAG_compile_unit ]
-!4 = !{!"0x15\00\000\000\000\000\000\000", !26, !2, null, !5, null} ; [ DW_TAG_subroutine_type ]
+!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "buf", line: 4, arg: 0, scope: !1, file: !2, type: !6)
+!1 = !DISubprogram(name: "x0", linkageName: "x0", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !26, scope: null, type: !4)
+!2 = !DIFile(filename: "t.c", directory: "/private/tmp")
+!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang 2.0", isOptimized: true, file: !26)
+!4 = !DISubroutineType(types: !5)
!5 = !{null}
-!6 = !{!"0xf\00\000\0032\0032\000\000", !26, !2, !7} ; [ DW_TAG_pointer_type ]
-!7 = !{!"0x24\00unsigned char\000\008\008\000\000\008", !26, !2} ; [ DW_TAG_base_type ]
-!8 = !{!"0x101\00nbytes\004\000", !1, !2, !9} ; [ DW_TAG_arg_variable ]
-!9 = !{!"0x24\00unsigned long\000\0032\0032\000\000\007", !26, !2} ; [ DW_TAG_base_type ]
-!10 = !{!"0x100\00nread\006\000", !11, !2, !9} ; [ DW_TAG_auto_variable ]
-!11 = !{!"0xb\005\001\000", !26, !1} ; [ DW_TAG_lexical_block ]
-!12 = !{!"0x100\00c\007\000", !11, !2, !13} ; [ DW_TAG_auto_variable ]
-!13 = !{!"0x24\00int\000\0032\0032\000\000\005", !26, !2} ; [ DW_TAG_base_type ]
-!14 = !{!"0x34\00length\00length\00length\001\000\001", !2, !2, !13, i32* @length} ; [ DW_TAG_variable ]
-!15 = !MDLocation(line: 4, column: 24, scope: !1)
-!16 = !MDLocation(line: 4, column: 43, scope: !1)
-!17 = !MDLocation(line: 9, column: 2, scope: !11)
+!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !26, scope: !2, baseType: !7)
+!7 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
+!8 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "nbytes", line: 4, arg: 0, scope: !1, file: !2, type: !9)
+!9 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned long", size: 32, align: 32, encoding: DW_ATE_unsigned)
+!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "nread", line: 6, scope: !11, file: !2, type: !9)
+!11 = distinct !DILexicalBlock(line: 5, column: 1, file: !26, scope: !1)
+!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 7, scope: !11, file: !2, type: !13)
+!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!14 = !DIGlobalVariable(name: "length", linkageName: "length", line: 1, isLocal: false, isDefinition: true, scope: !2, file: !2, type: !13, variable: i32* @length)
+!15 = !DILocation(line: 4, column: 24, scope: !1)
+!16 = !DILocation(line: 4, column: 43, scope: !1)
+!17 = !DILocation(line: 9, column: 2, scope: !11)
!18 = !{i32 0}
-!19 = !MDLocation(line: 10, column: 2, scope: !11)
-!20 = !MDLocation(line: 11, column: 2, scope: !11)
-!21 = !MDLocation(line: 12, column: 3, scope: !22)
-!22 = !{!"0xb\0011\0045\000", !26, !11} ; [ DW_TAG_lexical_block ]
-!23 = !MDLocation(line: 13, column: 3, scope: !22)
-!24 = !MDLocation(line: 14, column: 2, scope: !22)
-!25 = !MDLocation(line: 15, column: 1, scope: !11)
-!26 = !{!"t.c", !"/private/tmp"}
+!19 = !DILocation(line: 10, column: 2, scope: !11)
+!20 = !DILocation(line: 11, column: 2, scope: !11)
+!21 = !DILocation(line: 12, column: 3, scope: !22)
+!22 = distinct !DILexicalBlock(line: 11, column: 45, file: !26, scope: !11)
+!23 = !DILocation(line: 13, column: 3, scope: !22)
+!24 = !DILocation(line: 14, column: 2, scope: !22)
+!25 = !DILocation(line: 15, column: 1, scope: !11)
+!26 = !DIFile(filename: "t.c", directory: "/private/tmp")
diff --git a/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll b/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll
index fcabc900afa6..03362a8e51f9 100644
--- a/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll
+++ b/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll
@@ -7,8 +7,8 @@ define i32 @main(i32 %argc, i8** %argv) nounwind {
entry:
%0 = shufflevector <2 x i64> undef, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 2> ; <<2 x i64>> [#uses=1]
store <2 x i64> %0, <2 x i64>* undef, align 16
- %val4723 = load <8 x i16>* undef ; <<8 x i16>> [#uses=1]
- call void @PrintShortX(i8* getelementptr inbounds ([21 x i8]* @.str271, i32 0, i32 0), <8 x i16> %val4723, i32 0) nounwind
+ %val4723 = load <8 x i16>, <8 x i16>* undef ; <<8 x i16>> [#uses=1]
+ call void @PrintShortX(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str271, i32 0, i32 0), <8 x i16> %val4723, i32 0) nounwind
ret i32 undef
}
diff --git a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
index 80822c2c426b..b02efea929fa 100644
--- a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
+++ b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
@@ -9,7 +9,7 @@ target triple = "thumbv7-apple-darwin10.0.0"
@d = internal global i32 0, align 4 ; <i32*> [#uses=6]
@_ZTVN10__cxxabiv117__class_type_infoE = external global i8* ; <i8**> [#uses=1]
@_ZTS1A = internal constant [3 x i8] c"1A\00" ; <[3 x i8]*> [#uses=1]
-@_ZTI1A = internal constant %0 { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([3 x i8]* @_ZTS1A, i32 0, i32 0) } ; <%0*> [#uses=1]
+@_ZTI1A = internal constant %0 { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1A, i32 0, i32 0) } ; <%0*> [#uses=1]
@.str2 = private constant [18 x i8] c"c == %d, d == %d\0A\00" ; <[18 x i8]*> [#uses=1]
@.str3 = private constant [16 x i8] c"A(const A&) %d\0A\00" ; <[16 x i8]*> [#uses=1]
@.str4 = private constant [9 x i8] c"~A() %d\0A\00" ; <[9 x i8]*> [#uses=1]
@@ -29,10 +29,10 @@ declare void @_Unwind_SjLj_Resume(i8*)
define internal void @_ZN1AD1Ev(%struct.A* nocapture %this) nounwind ssp align 2 {
entry:
- %tmp.i = getelementptr inbounds %struct.A* %this, i32 0, i32 0 ; <i32*> [#uses=1]
- %tmp2.i = load i32* %tmp.i ; <i32> [#uses=1]
- %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str4, i32 0, i32 0), i32 %tmp2.i) nounwind ; <i32> [#uses=0]
- %tmp3.i = load i32* @d ; <i32> [#uses=1]
+ %tmp.i = getelementptr inbounds %struct.A, %struct.A* %this, i32 0, i32 0 ; <i32*> [#uses=1]
+ %tmp2.i = load i32, i32* %tmp.i ; <i32> [#uses=1]
+ %call.i = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str4, i32 0, i32 0), i32 %tmp2.i) nounwind ; <i32> [#uses=0]
+ %tmp3.i = load i32, i32* @d ; <i32> [#uses=1]
%inc.i = add nsw i32 %tmp3.i, 1 ; <i32> [#uses=1]
store i32 %inc.i, i32* @d
ret void
@@ -42,11 +42,11 @@ declare void @__cxa_throw(i8*, i8*, i8*)
define i32 @main() ssp {
entry:
- %puts.i = tail call i32 @puts(i8* getelementptr inbounds ([14 x i8]* @str, i32 0, i32 0)) ; <i32> [#uses=0]
+ %puts.i = tail call i32 @puts(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @str, i32 0, i32 0)) ; <i32> [#uses=0]
%exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind ; <i8*> [#uses=2]
%tmp2.i.i.i = bitcast i8* %exception.i to i32* ; <i32*> [#uses=1]
store i32 1, i32* %tmp2.i.i.i
- %call.i.i.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str5, i32 0, i32 0), i32 1) nounwind ; <i32> [#uses=0]
+ %call.i.i.i = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str5, i32 0, i32 0), i32 1) nounwind ; <i32> [#uses=0]
invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (%0* @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1AD1Ev to i8*)) noreturn
to label %.noexc unwind label %lpad
@@ -55,17 +55,17 @@ entry:
try.cont: ; preds = %lpad
%0 = tail call i8* @__cxa_get_exception_ptr(i8* %exn) nounwind ; <i8*> [#uses=0]
- %call.i.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str3, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0]
+ %call.i.i = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str3, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0]
%1 = tail call i8* @__cxa_begin_catch(i8* %exn) nounwind ; <i8*> [#uses=0]
- %puts = tail call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @str1, i32 0, i32 0)) ; <i32> [#uses=0]
- %call.i.i3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str4, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0]
- %tmp3.i.i = load i32* @d ; <i32> [#uses=1]
+ %puts = tail call i32 @puts(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @str1, i32 0, i32 0)) ; <i32> [#uses=0]
+ %call.i.i3 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str4, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0]
+ %tmp3.i.i = load i32, i32* @d ; <i32> [#uses=1]
%inc.i.i4 = add nsw i32 %tmp3.i.i, 1 ; <i32> [#uses=1]
store i32 %inc.i.i4, i32* @d
tail call void @__cxa_end_catch()
- %tmp13 = load i32* @d ; <i32> [#uses=1]
- %call14 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str2, i32 0, i32 0), i32 2, i32 %tmp13) ; <i32> [#uses=0]
- %tmp16 = load i32* @d ; <i32> [#uses=1]
+ %tmp13 = load i32, i32* @d ; <i32> [#uses=1]
+ %call14 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str2, i32 0, i32 0), i32 2, i32 %tmp13) ; <i32> [#uses=0]
+ %tmp16 = load i32, i32* @d ; <i32> [#uses=1]
%cmp = icmp ne i32 %tmp16, 2 ; <i1> [#uses=1]
%conv = zext i1 %cmp to i32 ; <i32> [#uses=1]
ret i32 %conv
diff --git a/test/CodeGen/ARM/2010-08-04-EHCrash.ll b/test/CodeGen/ARM/2010-08-04-EHCrash.ll
index 4b47085afd5e..13214c521530 100644
--- a/test/CodeGen/ARM/2010-08-04-EHCrash.ll
+++ b/test/CodeGen/ARM/2010-08-04-EHCrash.ll
@@ -12,17 +12,17 @@ entry:
br label %return
bb: ; No predecessors!
- %eh_select = load i32* %eh_selector ; <i32> [#uses=1]
+ %eh_select = load i32, i32* %eh_selector ; <i32> [#uses=1]
store i32 %eh_select, i32* %save_filt.936, align 4
- %eh_value = load i8** %eh_exception ; <i8*> [#uses=1]
+ %eh_value = load i8*, i8** %eh_exception ; <i8*> [#uses=1]
store i8* %eh_value, i8** %save_eptr.935, align 4
invoke arm_apcscc void @func3()
to label %invcont unwind label %lpad
invcont: ; preds = %bb
- %tmp6 = load i8** %save_eptr.935, align 4 ; <i8*> [#uses=1]
+ %tmp6 = load i8*, i8** %save_eptr.935, align 4 ; <i8*> [#uses=1]
store i8* %tmp6, i8** %eh_exception, align 4
- %tmp7 = load i32* %save_filt.936, align 4 ; <i32> [#uses=1]
+ %tmp7 = load i32, i32* %save_filt.936, align 4 ; <i32> [#uses=1]
store i32 %tmp7, i32* %eh_selector, align 4
br label %Unwind
@@ -38,7 +38,7 @@ lpad: ; preds = %bb
cleanup
%exn = extractvalue { i8*, i32 } %eh_ptr, 0
store i8* %exn, i8** %eh_exception
- %eh_ptr13 = load i8** %eh_exception ; <i8*> [#uses=1]
+ %eh_ptr13 = load i8*, i8** %eh_exception ; <i8*> [#uses=1]
%eh_select14 = extractvalue { i8*, i32 } %eh_ptr, 1
store i32 %eh_select14, i32* %eh_selector
br label %ppad
@@ -47,7 +47,7 @@ ppad:
br label %bb12
Unwind:
- %eh_ptr15 = load i8** %eh_exception
+ %eh_ptr15 = load i8*, i8** %eh_exception
call arm_apcscc void @_Unwind_SjLj_Resume(i8* %eh_ptr15)
unreachable
}
diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
index f71a6c9732a6..953e2bbf291c 100644
--- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
@@ -6,20 +6,20 @@
define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp {
entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !23, metadata !{!"0x102"}), !dbg !24
- call void @llvm.dbg.value(metadata %struct.SVal* %location, i64 0, metadata !25, metadata !{!"0x102"}), !dbg !24
+ call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !23, metadata !DIExpression()), !dbg !24
+ call void @llvm.dbg.value(metadata %struct.SVal* %location, i64 0, metadata !25, metadata !DIExpression()), !dbg !24
%0 = icmp ne i32 %i, 0, !dbg !27 ; <i1> [#uses=1]
br i1 %0, label %bb, label %bb1, !dbg !27
bb: ; preds = %entry
- %1 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !29 ; <i32*> [#uses=1]
- %2 = load i32* %1, align 8, !dbg !29 ; <i32> [#uses=1]
+ %1 = getelementptr inbounds %struct.SVal, %struct.SVal* %location, i32 0, i32 1, !dbg !29 ; <i32*> [#uses=1]
+ %2 = load i32, i32* %1, align 8, !dbg !29 ; <i32> [#uses=1]
%3 = add i32 %2, %i, !dbg !29 ; <i32> [#uses=1]
br label %bb2, !dbg !29
bb1: ; preds = %entry
- %4 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !30 ; <i32*> [#uses=1]
- %5 = load i32* %4, align 8, !dbg !30 ; <i32> [#uses=1]
+ %4 = getelementptr inbounds %struct.SVal, %struct.SVal* %location, i32 0, i32 1, !dbg !30 ; <i32*> [#uses=1]
+ %5 = load i32, i32* %4, align 8, !dbg !30 ; <i32> [#uses=1]
%6 = sub i32 %5, 1, !dbg !30 ; <i32> [#uses=1]
br label %bb2, !dbg !30
@@ -34,10 +34,10 @@ return: ; preds = %bb2
define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2 {
entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- call void @llvm.dbg.value(metadata %struct.SVal* %this, i64 0, metadata !31, metadata !{!"0x102"}), !dbg !34
- %0 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 0, !dbg !34 ; <i8**> [#uses=1]
+ call void @llvm.dbg.value(metadata %struct.SVal* %this, i64 0, metadata !31, metadata !DIExpression()), !dbg !34
+ %0 = getelementptr inbounds %struct.SVal, %struct.SVal* %this, i32 0, i32 0, !dbg !34 ; <i8**> [#uses=1]
store i8* null, i8** %0, align 8, !dbg !34
- %1 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 1, !dbg !34 ; <i32*> [#uses=1]
+ %1 = getelementptr inbounds %struct.SVal, %struct.SVal* %this, i32 0, i32 1, !dbg !34 ; <i32*> [#uses=1]
store i32 0, i32* %1, align 8, !dbg !34
br label %return, !dbg !34
@@ -52,20 +52,20 @@ entry:
%0 = alloca %struct.SVal ; <%struct.SVal*> [#uses=3]
%v = alloca %struct.SVal ; <%struct.SVal*> [#uses=4]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- call void @llvm.dbg.declare(metadata %struct.SVal* %v, metadata !38, metadata !{!"0x102"}), !dbg !41
+ call void @llvm.dbg.declare(metadata %struct.SVal* %v, metadata !38, metadata !DIExpression()), !dbg !41
call void @_ZN4SValC1Ev(%struct.SVal* %v) nounwind, !dbg !41
- %1 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !42 ; <i32*> [#uses=1]
+ %1 = getelementptr inbounds %struct.SVal, %struct.SVal* %v, i32 0, i32 1, !dbg !42 ; <i32*> [#uses=1]
store i32 1, i32* %1, align 8, !dbg !42
- %2 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
- %3 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
- %4 = load i8** %3, align 8, !dbg !43 ; <i8*> [#uses=1]
+ %2 = getelementptr inbounds %struct.SVal, %struct.SVal* %0, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
+ %3 = getelementptr inbounds %struct.SVal, %struct.SVal* %v, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
+ %4 = load i8*, i8** %3, align 8, !dbg !43 ; <i8*> [#uses=1]
store i8* %4, i8** %2, align 8, !dbg !43
- %5 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
- %6 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
- %7 = load i32* %6, align 8, !dbg !43 ; <i32> [#uses=1]
+ %5 = getelementptr inbounds %struct.SVal, %struct.SVal* %0, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
+ %6 = getelementptr inbounds %struct.SVal, %struct.SVal* %v, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
+ %7 = load i32, i32* %6, align 8, !dbg !43 ; <i32> [#uses=1]
store i32 %7, i32* %5, align 8, !dbg !43
%8 = call i32 @_Z3fooi4SVal(i32 2, %struct.SVal* noalias %0) nounwind, !dbg !43 ; <i32> [#uses=0]
- call void @llvm.dbg.value(metadata i32 %8, i64 0, metadata !44, metadata !{!"0x102"}), !dbg !43
+ call void @llvm.dbg.value(metadata i32 %8, i64 0, metadata !44, metadata !DIExpression()), !dbg !43
br label %return, !dbg !45
return: ; preds = %entry
@@ -77,53 +77,53 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!49}
-!0 = !{!"0x2e\00SVal\00SVal\00\0011\000\000\000\006\000\000\000", !48, !1, !14, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!1 = !{!"0x13\00SVal\001\00128\0064\000\000\000", !48, null, null, !4, null, null, null} ; [ DW_TAG_structure_type ] [SVal] [line 1, size 128, align 64, offset 0] [def] [from ]
-!2 = !{!"0x29", !48} ; [ DW_TAG_file_type ]
-!3 = !{!"0x11\004\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\000\00\000\00\001", !48, !47, !47, !46, !47, !47} ; [ DW_TAG_compile_unit ]
+!0 = !DISubprogram(name: "SVal", line: 11, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !48, scope: !1, type: !14)
+!1 = !DICompositeType(tag: DW_TAG_structure_type, name: "SVal", line: 1, size: 128, align: 64, file: !48, elements: !4)
+!2 = !DIFile(filename: "small.cc", directory: "/Users/manav/R8248330")
+!3 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 1, file: !48, enums: !47, retainedTypes: !47, subprograms: !46, globals: !47, imports: !47)
!4 = !{!5, !7, !0, !9}
-!5 = !{!"0xd\00Data\007\0064\0064\000\000", !48, !1, !6} ; [ DW_TAG_member ]
-!6 = !{!"0xf\00\000\0064\0064\000\000", !48, null, null} ; [ DW_TAG_pointer_type ]
-!7 = !{!"0xd\00Kind\008\0032\0032\0064\000", !48, !1, !8} ; [ DW_TAG_member ]
-!8 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", !48, null} ; [ DW_TAG_base_type ]
-!9 = !{!"0x2e\00~SVal\00~SVal\00\0012\000\000\000\006\000\000\000", !48, !1, !10, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!10 = !{!"0x15\00\000\000\000\000\000\000", !48, null, null, !11, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !DIDerivedType(tag: DW_TAG_member, name: "Data", line: 7, size: 64, align: 64, file: !48, scope: !1, baseType: !6)
+!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !48, baseType: null)
+!7 = !DIDerivedType(tag: DW_TAG_member, name: "Kind", line: 8, size: 32, align: 32, offset: 64, file: !48, scope: !1, baseType: !8)
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
+!9 = !DISubprogram(name: "~SVal", line: 12, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !48, scope: !1, type: !10)
+!10 = !DISubroutineType(types: !11)
!11 = !{null, !12, !13}
-!12 = !{!"0xf\00\000\0064\0064\000\0064", !48, null, !1} ; [ DW_TAG_pointer_type ]
-!13 = !{!"0x24\00int\000\0032\0032\000\000\005", !48, null} ; [ DW_TAG_base_type ]
-!14 = !{!"0x15\00\000\000\000\000\000\000", !48, null, null, !15, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, file: !48, baseType: !1)
+!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!14 = !DISubroutineType(types: !15)
!15 = !{null, !12}
-!16 = !{!"0x2e\00SVal\00SVal\00_ZN4SValC1Ev\0011\000\001\000\006\000\000\000", !48, !1, !14, null, void (%struct.SVal*)* @_ZN4SValC1Ev, null, null, null} ; [ DW_TAG_subprogram ]
-!17 = !{!"0x2e\00foo\00foo\00_Z3fooi4SVal\0016\000\001\000\006\000\000\000", !48, !2, !18, null, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal, null, null, null} ; [ DW_TAG_subprogram ]
-!18 = !{!"0x15\00\000\000\000\000\000\000", !48, null, null, !19, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !DISubprogram(name: "SVal", linkageName: "_ZN4SValC1Ev", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !1, type: !14, function: void (%struct.SVal*)* @_ZN4SValC1Ev)
+!17 = !DISubprogram(name: "foo", linkageName: "_Z3fooi4SVal", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !2, type: !18, function: i32 (i32, %struct.SVal*)* @_Z3fooi4SVal)
+!18 = !DISubroutineType(types: !19)
!19 = !{!13, !13, !1}
-!20 = !{!"0x2e\00main\00main\00main\0023\000\001\000\006\000\000\000", !48, !2, !21, null, i32 ()* @main, null, null, null} ; [ DW_TAG_subprogram ]
-!21 = !{!"0x15\00\000\000\000\000\000\000", !48, null, null, !22, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!20 = !DISubprogram(name: "main", linkageName: "main", line: 23, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !2, type: !21, function: i32 ()* @main)
+!21 = !DISubroutineType(types: !22)
!22 = !{!13}
-!23 = !{!"0x101\00i\0016\000", !17, !2, !13} ; [ DW_TAG_arg_variable ]
-!24 = !MDLocation(line: 16, scope: !17)
-!25 = !{!"0x101\00location\0016\000", !17, !2, !26} ; [ DW_TAG_arg_variable ]
-!26 = !{!"0x10\00SVal\000\0064\0064\000\000", !48, !2, !1} ; [ DW_TAG_reference_type ]
-!27 = !MDLocation(line: 17, scope: !28)
-!28 = !{!"0xb\0016\000\002", !2, !17} ; [ DW_TAG_lexical_block ]
-!29 = !MDLocation(line: 18, scope: !28)
-!30 = !MDLocation(line: 20, scope: !28)
-!31 = !{!"0x101\00this\0011\000", !16, !2, !32} ; [ DW_TAG_arg_variable ]
-!32 = !{!"0x26\00\000\0064\0064\000\0064", !48, !2, !33} ; [ DW_TAG_const_type ]
-!33 = !{!"0xf\00\000\0064\0064\000\000", !48, !2, !1} ; [ DW_TAG_pointer_type ]
-!34 = !MDLocation(line: 11, scope: !16)
-!35 = !MDLocation(line: 11, scope: !36)
-!36 = !{!"0xb\0011\000\001", !48, !37} ; [ DW_TAG_lexical_block ]
-!37 = !{!"0xb\0011\000\000", !48, !16} ; [ DW_TAG_lexical_block ]
-!38 = !{!"0x100\00v\0024\000", !39, !2, !1} ; [ DW_TAG_auto_variable ]
-!39 = !{!"0xb\0023\000\004", !48, !40} ; [ DW_TAG_lexical_block ]
-!40 = !{!"0xb\0023\000\003", !48, !20} ; [ DW_TAG_lexical_block ]
-!41 = !MDLocation(line: 24, scope: !39)
-!42 = !MDLocation(line: 25, scope: !39)
-!43 = !MDLocation(line: 26, scope: !39)
-!44 = !{!"0x100\00k\0026\000", !39, !2, !13} ; [ DW_TAG_auto_variable ]
-!45 = !MDLocation(line: 27, scope: !39)
+!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 16, arg: 0, scope: !17, file: !2, type: !13)
+!24 = !DILocation(line: 16, scope: !17)
+!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "location", line: 16, arg: 0, scope: !17, file: !2, type: !26)
+!26 = !DIDerivedType(tag: DW_TAG_reference_type, name: "SVal", size: 64, align: 64, file: !48, scope: !2, baseType: !1)
+!27 = !DILocation(line: 17, scope: !28)
+!28 = distinct !DILexicalBlock(line: 16, column: 0, file: !2, scope: !17)
+!29 = !DILocation(line: 18, scope: !28)
+!30 = !DILocation(line: 20, scope: !28)
+!31 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 11, arg: 0, scope: !16, file: !2, type: !32)
+!32 = !DIDerivedType(tag: DW_TAG_const_type, size: 64, align: 64, flags: DIFlagArtificial, file: !48, scope: !2, baseType: !33)
+!33 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !48, scope: !2, baseType: !1)
+!34 = !DILocation(line: 11, scope: !16)
+!35 = !DILocation(line: 11, scope: !36)
+!36 = distinct !DILexicalBlock(line: 11, column: 0, file: !48, scope: !37)
+!37 = distinct !DILexicalBlock(line: 11, column: 0, file: !48, scope: !16)
+!38 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "v", line: 24, scope: !39, file: !2, type: !1)
+!39 = distinct !DILexicalBlock(line: 23, column: 0, file: !48, scope: !40)
+!40 = distinct !DILexicalBlock(line: 23, column: 0, file: !48, scope: !20)
+!41 = !DILocation(line: 24, scope: !39)
+!42 = !DILocation(line: 25, scope: !39)
+!43 = !DILocation(line: 26, scope: !39)
+!44 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 26, scope: !39, file: !2, type: !13)
+!45 = !DILocation(line: 27, scope: !39)
!46 = !{!16, !17, !20}
!47 = !{}
-!48 = !{!"small.cc", !"/Users/manav/R8248330"}
-!49 = !{i32 1, !"Debug Info Version", i32 2}
+!48 = !DIFile(filename: "small.cc", directory: "/Users/manav/R8248330")
+!49 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll b/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
index 80a19649635a..09428ce9c339 100644
--- a/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
+++ b/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
@@ -34,34 +34,34 @@ bb28.i: ; preds = %bb28.i, %bb27.i
presymmetry.exit: ; preds = %bb28.i
%tmp175387 = or i32 undef, 12
- %scevgep101.i = getelementptr i32* %in, i32 undef
+ %scevgep101.i = getelementptr i32, i32* %in, i32 undef
%tmp189401 = or i32 undef, 7
- %scevgep97.i = getelementptr i32* %in, i32 undef
+ %scevgep97.i = getelementptr i32, i32* %in, i32 undef
%tmp198410 = or i32 undef, 1
- %scevgep.i48 = getelementptr i32* %in, i32 undef
- %0 = load i32* %scevgep.i48, align 4
+ %scevgep.i48 = getelementptr i32, i32* %in, i32 undef
+ %0 = load i32, i32* %scevgep.i48, align 4
%1 = add nsw i32 %0, 0
store i32 %1, i32* undef, align 4
%asmtmp.i.i33.i.i.i = tail call %0 asm "smull\09$0, $1, $2, $3", "=&r,=&r,%r,r,~{cc}"(i32 undef, i32 1518500250) nounwind
%asmresult1.i.i34.i.i.i = extractvalue %0 %asmtmp.i.i33.i.i.i, 1
%2 = shl i32 %asmresult1.i.i34.i.i.i, 1
- %3 = load i32* null, align 4
- %4 = load i32* undef, align 4
+ %3 = load i32, i32* null, align 4
+ %4 = load i32, i32* undef, align 4
%5 = sub nsw i32 %3, %4
- %6 = load i32* undef, align 4
- %7 = load i32* null, align 4
+ %6 = load i32, i32* undef, align 4
+ %7 = load i32, i32* null, align 4
%8 = sub nsw i32 %6, %7
- %9 = load i32* %scevgep97.i, align 4
- %10 = load i32* undef, align 4
+ %9 = load i32, i32* %scevgep97.i, align 4
+ %10 = load i32, i32* undef, align 4
%11 = sub nsw i32 %9, %10
- %12 = load i32* null, align 4
- %13 = load i32* %scevgep101.i, align 4
+ %12 = load i32, i32* null, align 4
+ %13 = load i32, i32* %scevgep101.i, align 4
%14 = sub nsw i32 %12, %13
- %15 = load i32* %scevgep.i48, align 4
- %16 = load i32* null, align 4
+ %15 = load i32, i32* %scevgep.i48, align 4
+ %16 = load i32, i32* null, align 4
%17 = add nsw i32 %16, %15
%18 = sub nsw i32 %15, %16
- %19 = load i32* undef, align 4
+ %19 = load i32, i32* undef, align 4
%20 = add nsw i32 %19, %2
%21 = sub nsw i32 %19, %2
%22 = add nsw i32 %14, %5
diff --git a/test/CodeGen/ARM/2010-12-08-tpsoft.ll b/test/CodeGen/ARM/2010-12-08-tpsoft.ll
index 1351a26756ef..9dfe4da27677 100644
--- a/test/CodeGen/ARM/2010-12-08-tpsoft.ll
+++ b/test/CodeGen/ARM/2010-12-08-tpsoft.ll
@@ -12,7 +12,7 @@
define arm_aapcs_vfpcc i32 @main() nounwind {
entry:
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
switch i32 %0, label %bb2 [
i32 12, label %bb
i32 13, label %bb1
diff --git a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
index f13bc1214a5a..f04e053b2e04 100644
--- a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
+++ b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
@@ -12,14 +12,9 @@
; ASM: .type array00,%object @ @array00
; ASM-NEXT: .local array00
; ASM-NEXT: .comm array00,80,1
-; ASM-NEXT: .type _MergedGlobals,%object @ @_MergedGlobals
+; ASM-NEXT: .type sum,%object @ @sum
-; OBJ: Sections [
-; OBJ: Section {
-; OBJ: Index: 4
-; OBJ-NEXT: Name: .bss
-
; OBJ: Symbols [
; OBJ: Symbol {
; OBJ: Name: array00
@@ -31,9 +26,9 @@
; OBJ-NEXT: Section: .bss
define i32 @main(i32 %argc) nounwind {
- %1 = load i32* @sum, align 4
- %2 = getelementptr [80 x i8]* @array00, i32 0, i32 %argc
- %3 = load i8* %2
+ %1 = load i32, i32* @sum, align 4
+ %2 = getelementptr [80 x i8], [80 x i8]* @array00, i32 0, i32 %argc
+ %3 = load i8, i8* %2
%4 = zext i8 %3 to i32
%5 = add i32 %1, %4
ret i32 %5
diff --git a/test/CodeGen/ARM/2010-12-17-LocalStackSlotCrash.ll b/test/CodeGen/ARM/2010-12-17-LocalStackSlotCrash.ll
index a2f50b587b22..783c82eb88b7 100644
--- a/test/CodeGen/ARM/2010-12-17-LocalStackSlotCrash.ll
+++ b/test/CodeGen/ARM/2010-12-17-LocalStackSlotCrash.ll
@@ -9,7 +9,7 @@ entry:
br label %bb
bb:
- %p.2 = getelementptr [8096 x i8]* %buf, i32 0, i32 0
+ %p.2 = getelementptr [8096 x i8], [8096 x i8]* %buf, i32 0, i32 0
store i8 undef, i8* %p.2, align 1
ret void
}
diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
index 67dda672719c..9a5baf21b8fb 100644
--- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; RUN: llc -arm-global-merge -global-merge-group-by-use=false -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
target triple = "thumbv7-apple-darwin10"
@@ -30,9 +30,9 @@ target triple = "thumbv7-apple-darwin10"
define zeroext i8 @get1(i8 zeroext %a) nounwind optsize {
entry:
- tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !30
- %0 = load i8* @x1, align 4, !dbg !30
- tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !30
+ tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !10, metadata !DIExpression()), !dbg !30
+ %0 = load i8, i8* @x1, align 4, !dbg !30
+ tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !11, metadata !DIExpression()), !dbg !30
store i8 %a, i8* @x1, align 4, !dbg !30
ret i8 %0, !dbg !31
}
@@ -41,36 +41,36 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
define zeroext i8 @get2(i8 zeroext %a) nounwind optsize {
entry:
- tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !32
- %0 = load i8* @x2, align 4, !dbg !32
- tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !19, metadata !{!"0x102"}), !dbg !32
+ tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !18, metadata !DIExpression()), !dbg !32
+ %0 = load i8, i8* @x2, align 4, !dbg !32
+ tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !19, metadata !DIExpression()), !dbg !32
store i8 %a, i8* @x2, align 4, !dbg !32
ret i8 %0, !dbg !33
}
define zeroext i8 @get3(i8 zeroext %a) nounwind optsize {
entry:
- tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !21, metadata !{!"0x102"}), !dbg !34
- %0 = load i8* @x3, align 4, !dbg !34
- tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !22, metadata !{!"0x102"}), !dbg !34
+ tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !21, metadata !DIExpression()), !dbg !34
+ %0 = load i8, i8* @x3, align 4, !dbg !34
+ tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !22, metadata !DIExpression()), !dbg !34
store i8 %a, i8* @x3, align 4, !dbg !34
ret i8 %0, !dbg !35
}
define zeroext i8 @get4(i8 zeroext %a) nounwind optsize {
entry:
- tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !24, metadata !{!"0x102"}), !dbg !36
- %0 = load i8* @x4, align 4, !dbg !36
- tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !25, metadata !{!"0x102"}), !dbg !36
+ tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !24, metadata !DIExpression()), !dbg !36
+ %0 = load i8, i8* @x4, align 4, !dbg !36
+ tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !25, metadata !DIExpression()), !dbg !36
store i8 %a, i8* @x4, align 4, !dbg !36
ret i8 %0, !dbg !37
}
define zeroext i8 @get5(i8 zeroext %a) nounwind optsize {
entry:
- tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !27, metadata !{!"0x102"}), !dbg !38
- %0 = load i8* @x5, align 4, !dbg !38
- tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !28, metadata !{!"0x102"}), !dbg !38
+ tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !27, metadata !DIExpression()), !dbg !38
+ %0 = load i8, i8* @x5, align 4, !dbg !38
+ tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !28, metadata !DIExpression()), !dbg !38
store i8 %a, i8* @x5, align 4, !dbg !38
ret i8 %0, !dbg !39
}
@@ -78,46 +78,46 @@ entry:
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!49}
-!0 = !{!"0x2e\00get1\00get1\00get1\004\000\001\000\006\00256\001\004", !47, !1, !3, null, i8 (i8)* @get1, null, null, !42} ; [ DW_TAG_subprogram ]
-!1 = !{!"0x29", !47} ; [ DW_TAG_file_type ]
-!2 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)\001\00\000\00\000", !47, !48, !48, !40, !41, !48} ; [ DW_TAG_compile_unit ]
-!3 = !{!"0x15\00\000\000\000\000\000\000", !47, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = !DISubprogram(name: "get1", linkageName: "get1", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !47, scope: !1, type: !3, function: i8 (i8)* @get1, variables: !42)
+!1 = !DIFile(filename: "foo.c", directory: "/tmp/")
+!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", isOptimized: true, emissionKind: 0, file: !47, enums: !48, retainedTypes: !48, subprograms: !40, globals: !41, imports: !48)
+!3 = !DISubroutineType(types: !4)
!4 = !{!5, !5}
-!5 = !{!"0x24\00_Bool\000\008\008\000\000\002", !47, !1} ; [ DW_TAG_base_type ]
-!6 = !{!"0x2e\00get2\00get2\00get2\007\000\001\000\006\00256\001\007", !47, !1, !3, null, i8 (i8)* @get2, null, null, !43} ; [ DW_TAG_subprogram ]
-!7 = !{!"0x2e\00get3\00get3\00get3\0010\000\001\000\006\00256\001\0010", !47, !1, !3, null, i8 (i8)* @get3, null, null, !44} ; [ DW_TAG_subprogram ]
-!8 = !{!"0x2e\00get4\00get4\00get4\0013\000\001\000\006\00256\001\0013", !47, !1, !3, null, i8 (i8)* @get4, null, null, !45} ; [ DW_TAG_subprogram ]
-!9 = !{!"0x2e\00get5\00get5\00get5\0016\000\001\000\006\00256\001\0016", !47, !1, !3, null, i8 (i8)* @get5, null, null, !46} ; [ DW_TAG_subprogram ]
-!10 = !{!"0x101\00a\004\000", !0, !1, !5} ; [ DW_TAG_arg_variable ]
-!11 = !{!"0x100\00b\004\000", !12, !1, !5} ; [ DW_TAG_auto_variable ]
-!12 = !{!"0xb\004\000\000", !47, !0} ; [ DW_TAG_lexical_block ]
-!13 = !{!"0x34\00x1\00x1\00\003\001\001", !1, !1, !5, i8* @x1, null} ; [ DW_TAG_variable ]
-!14 = !{!"0x34\00x2\00x2\00\006\001\001", !1, !1, !5, i8* @x2, null} ; [ DW_TAG_variable ]
-!15 = !{!"0x34\00x3\00x3\00\009\001\001", !1, !1, !5, i8* @x3, null} ; [ DW_TAG_variable ]
-!16 = !{!"0x34\00x4\00x4\00\0012\001\001", !1, !1, !5, i8* @x4, null} ; [ DW_TAG_variable ]
-!17 = !{!"0x34\00x5\00x5\00\0015\000\001", !1, !1, !5, i8* @x5, null} ; [ DW_TAG_variable ]
-!18 = !{!"0x101\00a\007\000", !6, !1, !5} ; [ DW_TAG_arg_variable ]
-!19 = !{!"0x100\00b\007\000", !20, !1, !5} ; [ DW_TAG_auto_variable ]
-!20 = !{!"0xb\007\000\001", !47, !6} ; [ DW_TAG_lexical_block ]
-!21 = !{!"0x101\00a\0010\000", !7, !1, !5} ; [ DW_TAG_arg_variable ]
-!22 = !{!"0x100\00b\0010\000", !23, !1, !5} ; [ DW_TAG_auto_variable ]
-!23 = !{!"0xb\0010\000\002", !47, !7} ; [ DW_TAG_lexical_block ]
-!24 = !{!"0x101\00a\0013\000", !8, !1, !5} ; [ DW_TAG_arg_variable ]
-!25 = !{!"0x100\00b\0013\000", !26, !1, !5} ; [ DW_TAG_auto_variable ]
-!26 = !{!"0xb\0013\000\003", !47, !8} ; [ DW_TAG_lexical_block ]
-!27 = !{!"0x101\00a\0016\000", !9, !1, !5} ; [ DW_TAG_arg_variable ]
-!28 = !{!"0x100\00b\0016\000", !29, !1, !5} ; [ DW_TAG_auto_variable ]
-!29 = !{!"0xb\0016\000\004", !47, !9} ; [ DW_TAG_lexical_block ]
-!30 = !MDLocation(line: 4, scope: !0)
-!31 = !MDLocation(line: 4, scope: !12)
-!32 = !MDLocation(line: 7, scope: !6)
-!33 = !MDLocation(line: 7, scope: !20)
-!34 = !MDLocation(line: 10, scope: !7)
-!35 = !MDLocation(line: 10, scope: !23)
-!36 = !MDLocation(line: 13, scope: !8)
-!37 = !MDLocation(line: 13, scope: !26)
-!38 = !MDLocation(line: 16, scope: !9)
-!39 = !MDLocation(line: 16, scope: !29)
+!5 = !DIBasicType(tag: DW_TAG_base_type, name: "_Bool", size: 8, align: 8, encoding: DW_ATE_boolean)
+!6 = !DISubprogram(name: "get2", linkageName: "get2", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !47, scope: !1, type: !3, function: i8 (i8)* @get2, variables: !43)
+!7 = !DISubprogram(name: "get3", linkageName: "get3", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !47, scope: !1, type: !3, function: i8 (i8)* @get3, variables: !44)
+!8 = !DISubprogram(name: "get4", linkageName: "get4", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !47, scope: !1, type: !3, function: i8 (i8)* @get4, variables: !45)
+!9 = !DISubprogram(name: "get5", linkageName: "get5", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !47, scope: !1, type: !3, function: i8 (i8)* @get5, variables: !46)
+!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 4, arg: 0, scope: !0, file: !1, type: !5)
+!11 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 4, scope: !12, file: !1, type: !5)
+!12 = distinct !DILexicalBlock(line: 4, column: 0, file: !47, scope: !0)
+!13 = !DIGlobalVariable(name: "x1", line: 3, isLocal: true, isDefinition: true, scope: !1, file: !1, type: !5, variable: i8* @x1)
+!14 = !DIGlobalVariable(name: "x2", line: 6, isLocal: true, isDefinition: true, scope: !1, file: !1, type: !5, variable: i8* @x2)
+!15 = !DIGlobalVariable(name: "x3", line: 9, isLocal: true, isDefinition: true, scope: !1, file: !1, type: !5, variable: i8* @x3)
+!16 = !DIGlobalVariable(name: "x4", line: 12, isLocal: true, isDefinition: true, scope: !1, file: !1, type: !5, variable: i8* @x4)
+!17 = !DIGlobalVariable(name: "x5", line: 15, isLocal: false, isDefinition: true, scope: !1, file: !1, type: !5, variable: i8* @x5)
+!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 7, arg: 0, scope: !6, file: !1, type: !5)
+!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 7, scope: !20, file: !1, type: !5)
+!20 = distinct !DILexicalBlock(line: 7, column: 0, file: !47, scope: !6)
+!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 10, arg: 0, scope: !7, file: !1, type: !5)
+!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 10, scope: !23, file: !1, type: !5)
+!23 = distinct !DILexicalBlock(line: 10, column: 0, file: !47, scope: !7)
+!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 13, arg: 0, scope: !8, file: !1, type: !5)
+!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 13, scope: !26, file: !1, type: !5)
+!26 = distinct !DILexicalBlock(line: 13, column: 0, file: !47, scope: !8)
+!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 16, arg: 0, scope: !9, file: !1, type: !5)
+!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 16, scope: !29, file: !1, type: !5)
+!29 = distinct !DILexicalBlock(line: 16, column: 0, file: !47, scope: !9)
+!30 = !DILocation(line: 4, scope: !0)
+!31 = !DILocation(line: 4, scope: !12)
+!32 = !DILocation(line: 7, scope: !6)
+!33 = !DILocation(line: 7, scope: !20)
+!34 = !DILocation(line: 10, scope: !7)
+!35 = !DILocation(line: 10, scope: !23)
+!36 = !DILocation(line: 13, scope: !8)
+!37 = !DILocation(line: 13, scope: !26)
+!38 = !DILocation(line: 16, scope: !9)
+!39 = !DILocation(line: 16, scope: !29)
!40 = !{!0, !6, !7, !8, !9}
!41 = !{!13, !14, !15, !16, !17}
!42 = !{!10, !11}
@@ -125,6 +125,6 @@ entry:
!44 = !{!21, !22}
!45 = !{!24, !25}
!46 = !{!27, !28}
-!47 = !{!"foo.c", !"/tmp/"}
+!47 = !DIFile(filename: "foo.c", directory: "/tmp/")
!48 = !{}
-!49 = !{i32 1, !"Debug Info Version", i32 2}
+!49 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll b/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll
index 3950c9e081f7..8403dd991360 100644
--- a/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll
+++ b/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll
@@ -19,7 +19,7 @@ bb3: ; preds = %bb, %entry
br i1 %tst, label %bb46, label %bb8
bb8: ; preds = %bb3
- %1 = getelementptr inbounds i8* %0, i32 0
+ %1 = getelementptr inbounds i8, i8* %0, i32 0
store i8 0, i8* %1, align 1
%2 = call i32 @ptou() nounwind
; CHECK: umull [[REGISTER:lr|r[0-9]+]],
diff --git a/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll b/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll
index f3d788818afc..e96641bf6671 100644
--- a/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll
+++ b/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll
@@ -39,12 +39,12 @@ bb134: ; preds = %bb131
unreachable
bb135: ; preds = %meshBB396
- %uriHash.1.phi.load = load i32* undef
- %.load120 = load i8*** %.SV116
- %.phi24 = load i8* null
- %.phi26 = load i8** null
+ %uriHash.1.phi.load = load i32, i32* undef
+ %.load120 = load i8**, i8*** %.SV116
+ %.phi24 = load i8, i8* null
+ %.phi26 = load i8*, i8** null
store i8 %.phi24, i8* %.phi26, align 1
- %0 = getelementptr inbounds i8* %.phi26, i32 1
+ %0 = getelementptr inbounds i8, i8* %.phi26, i32 1
store i8* %0, i8** %.load120, align 4
; CHECK: mul [[REGISTER:lr|r[0-9]+]],
; CHECK-NOT: [[REGISTER]],
@@ -52,7 +52,7 @@ bb135: ; preds = %meshBB396
%1 = mul i32 %uriHash.1.phi.load, 1000003
%2 = xor i32 0, %1
store i32 %2, i32* null
- %3 = load i8* null, align 1
+ %3 = load i8, i8* null, align 1
%4 = icmp eq i8 %3, 0
store i8* %0, i8** undef
br i1 %4, label %meshBB472, label %bb131
diff --git a/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll b/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll
index 81babe0b4b19..c447a1f25b65 100644
--- a/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll
+++ b/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll
@@ -17,10 +17,10 @@ bb: ; preds = %entry
bb1: ; preds = %entry
%0 = call %struct.ui* @vn_pp_to_ui(i32* undef) nounwind
call void @llvm.memset.p0i8.i32(i8* undef, i8 0, i32 40, i32 4, i1 false)
- %1 = getelementptr inbounds %struct.ui* %0, i32 0, i32 0
+ %1 = getelementptr inbounds %struct.ui, %struct.ui* %0, i32 0, i32 0
store %struct.mo* undef, %struct.mo** %1, align 4
- %2 = getelementptr inbounds %struct.ui* %0, i32 0, i32 5
- %3 = load i64* %2, align 4
+ %2 = getelementptr inbounds %struct.ui, %struct.ui* %0, i32 0, i32 5
+ %3 = load i64, i64* %2, align 4
%4 = call i32 @mo_create_nnm(%struct.mo* undef, i64 %3, i32** undef) nounwind
br i1 undef, label %bb3, label %bb2
diff --git a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
index 837feb6e85c2..92bdd19a7b3d 100644
--- a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
+++ b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
@@ -18,11 +18,11 @@ for.body.lr.ph:
for.body: ; preds = %_Z14printIsNotZeroi.exit17.for.body_crit_edge, %for.body.lr.ph
%tmp3 = phi i1 [ false, %for.body.lr.ph ], [ %phitmp27, %_Z14printIsNotZeroi.exit17.for.body_crit_edge ]
%i.022 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %_Z14printIsNotZeroi.exit17.for.body_crit_edge ]
- %x = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 0
- %y = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 1
+ %x = getelementptr %struct.Outer, %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 0
+ %y = getelementptr %struct.Outer, %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 1
%inc = add i32 %i.022, 1
- %tmp8 = load i32* %x, align 4
- %tmp11 = load i32* %y, align 4
+ %tmp8 = load i32, i32* %x, align 4
+ %tmp11 = load i32, i32* %y, align 4
%mul = mul nsw i32 %tmp11, %tmp8
%tobool.i14 = icmp eq i32 %mul, 0
br i1 %tobool.i14, label %_Z14printIsNotZeroi.exit17, label %if.then.i16
@@ -34,8 +34,8 @@ _Z14printIsNotZeroi.exit17: ; preds = %_Z14printIsNotZeroi
br label %_Z14printIsNotZeroi.exit17.for.body_crit_edge
_Z14printIsNotZeroi.exit17.for.body_crit_edge: ; preds = %_Z14printIsNotZeroi.exit17
- %b.phi.trans.insert = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %inc, i32 3
- %tmp3.pre = load i8* %b.phi.trans.insert, align 1
+ %b.phi.trans.insert = getelementptr %struct.Outer, %struct.Outer* @oStruct, i32 0, i32 1, i32 %inc, i32 3
+ %tmp3.pre = load i8, i8* %b.phi.trans.insert, align 1
%phitmp27 = icmp eq i8 %val8, 0
br label %for.body
diff --git a/test/CodeGen/ARM/2011-04-07-schediv.ll b/test/CodeGen/ARM/2011-04-07-schediv.ll
index f3dd3dd5811e..7f603157c5d3 100644
--- a/test/CodeGen/ARM/2011-04-07-schediv.ll
+++ b/test/CodeGen/ARM/2011-04-07-schediv.ll
@@ -18,10 +18,10 @@ entry:
bb: ; preds = %entry, %bb
%j.05 = phi i32 [ %2, %bb ], [ 0, %entry ]
%tmp = mul i32 %j.05, %src_copy_start_index
- %uglygep = getelementptr i8* %src_copy_start6, i32 %tmp
+ %uglygep = getelementptr i8, i8* %src_copy_start6, i32 %tmp
%src_copy_start_addr.04 = bitcast i8* %uglygep to float*
- %dst_copy_start_addr.03 = getelementptr float* %dst_copy_start, i32 %j.05
- %1 = load float* %src_copy_start_addr.04, align 4
+ %dst_copy_start_addr.03 = getelementptr float, float* %dst_copy_start, i32 %j.05
+ %1 = load float, float* %src_copy_start_addr.04, align 4
store float %1, float* %dst_copy_start_addr.03, align 4
%2 = add i32 %j.05, 1
%exitcond = icmp eq i32 %2, %src_width
diff --git a/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll b/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
index e30c9c615053..54fc9b049b8e 100644
--- a/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
+++ b/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
@@ -18,8 +18,8 @@ for.body:
; CHECK: mov{{.*}} r{{[0-9]+}}, #{{[01]}}
; CHECK: mov{{.*}} r{{[0-9]+}}, #{{[01]}}
; CHECK-NOT: mov r{{[0-9]+}}, #{{[01]}}
- %arrayidx = getelementptr i32* %A, i32 %0
- %tmp4 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr i32, i32* %A, i32 %0
+ %tmp4 = load i32, i32* %arrayidx, align 4
%cmp6 = icmp eq i32 %tmp4, %value
br i1 %cmp6, label %return, label %for.inc
diff --git a/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll b/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll
index e9a6793a768a..8ad654fc8f9d 100644
--- a/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll
+++ b/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll
@@ -7,7 +7,7 @@ target triple = "thumbv7-apple-darwin10.0.0"
define void @_Z8TestCasev() nounwind ssp {
entry:
%a = alloca float, align 4
- %tmp = load float* %a, align 4
+ %tmp = load float, float* %a, align 4
call void asm sideeffect "", "w,~{s0},~{s16}"(float %tmp) nounwind, !srcloc !0
ret void
}
diff --git a/test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll b/test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll
index e712e08ddb6a..f17884e0fa41 100644
--- a/test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll
+++ b/test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll
@@ -12,7 +12,7 @@ entry:
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
- tail call void (...)* @g(i32 %a, i32 %b) nounwind
+ tail call void (...) @g(i32 %a, i32 %b) nounwind
br label %if.end
if.end: ; preds = %if.then, %entry
diff --git a/test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll b/test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll
index 5404cf57a59f..864e2917b7bb 100644
--- a/test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll
+++ b/test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll
@@ -12,7 +12,7 @@ entry:
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
- tail call void (...)* @h(i32 %a, i32 %b) nounwind
+ tail call void (...) @h(i32 %a, i32 %b) nounwind
br label %if.end
if.end: ; preds = %if.then, %entry
@@ -31,7 +31,7 @@ entry:
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
- tail call void (...)* @h(i32 %a, i32 %b) nounwind
+ tail call void (...) @h(i32 %a, i32 %b) nounwind
br label %if.end
if.end: ; preds = %if.then, %entry
diff --git a/test/CodeGen/ARM/2011-04-26-SchedTweak.ll b/test/CodeGen/ARM/2011-04-26-SchedTweak.ll
index 057c19948c35..ee6d79c39f2f 100644
--- a/test/CodeGen/ARM/2011-04-26-SchedTweak.ll
+++ b/test/CodeGen/ARM/2011-04-26-SchedTweak.ll
@@ -22,7 +22,7 @@ entry:
%block_count = alloca i32, align 4
%index_cache = alloca i32, align 4
store i32 0, i32* %index_cache, align 4
- %tmp = load i32* @G, align 4
+ %tmp = load i32, i32* @G, align 4
%tmp1 = call i32 @bar(i32 0, i32 0, i32 %tmp) nounwind
switch i32 %tmp1, label %bb8 [
i32 0, label %bb
@@ -31,7 +31,7 @@ entry:
]
bb:
- %tmp2 = load i32* @G, align 4
+ %tmp2 = load i32, i32* @G, align 4
%tmp4 = icmp eq i32 %tmp2, 0
br i1 %tmp4, label %bb1, label %bb8
@@ -41,14 +41,14 @@ bb1:
; CHECK: blx _Get
; CHECK: umull
; CHECK: blx _foo
- %tmp5 = load i32* %block_size, align 4
- %tmp6 = load i32* %block_count, align 4
+ %tmp5 = load i32, i32* %block_size, align 4
+ %tmp6 = load i32, i32* %block_count, align 4
%tmp7 = call %struct.FF* @Get() nounwind
store %struct.FF* %tmp7, %struct.FF** @FuncPtr, align 4
%tmp10 = zext i32 %tmp6 to i64
%tmp11 = zext i32 %tmp5 to i64
%tmp12 = mul nsw i64 %tmp10, %tmp11
- %tmp13 = call i32 @foo(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), i64 %tmp12, i32 %tmp5) nounwind
+ %tmp13 = call i32 @foo(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str1, i32 0, i32 0), i64 %tmp12, i32 %tmp5) nounwind
br label %bb8
bb4:
diff --git a/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
index 091d037356de..aac8f7b3a026 100644
--- a/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
+++ b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -arm-global-merge -global-merge-group-by-use=false | FileCheck %s
; CHECK: .zerofill __DATA,__bss,__MergedGlobals,16,2
@prev = external global [0 x i16]
diff --git a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
index 3edc946825bb..067c719f491c 100644
--- a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; RUN: llc -arm-global-merge -global-merge-group-by-use=false -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
; Check debug info output for merged global.
; DW_AT_location
@@ -29,41 +29,41 @@ target triple = "thumbv7-apple-macosx10.7.0"
@x5 = global i32 0, align 4
define i32 @get1(i32 %a) nounwind optsize ssp {
- tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !30
- %1 = load i32* @x1, align 4, !dbg !31
- tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !31
+ tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !10, metadata !DIExpression()), !dbg !30
+ %1 = load i32, i32* @x1, align 4, !dbg !31
+ tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !11, metadata !DIExpression()), !dbg !31
store i32 %a, i32* @x1, align 4, !dbg !31
ret i32 %1, !dbg !31
}
define i32 @get2(i32 %a) nounwind optsize ssp {
- tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !32
- %1 = load i32* @x2, align 4, !dbg !33
- tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !33
+ tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !13, metadata !DIExpression()), !dbg !32
+ %1 = load i32, i32* @x2, align 4, !dbg !33
+ tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !14, metadata !DIExpression()), !dbg !33
store i32 %a, i32* @x2, align 4, !dbg !33
ret i32 %1, !dbg !33
}
define i32 @get3(i32 %a) nounwind optsize ssp {
- tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !16, metadata !{!"0x102"}), !dbg !34
- %1 = load i32* @x3, align 4, !dbg !35
- tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !35
+ tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !16, metadata !DIExpression()), !dbg !34
+ %1 = load i32, i32* @x3, align 4, !dbg !35
+ tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !17, metadata !DIExpression()), !dbg !35
store i32 %a, i32* @x3, align 4, !dbg !35
ret i32 %1, !dbg !35
}
define i32 @get4(i32 %a) nounwind optsize ssp {
- tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !19, metadata !{!"0x102"}), !dbg !36
- %1 = load i32* @x4, align 4, !dbg !37
- tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !20, metadata !{!"0x102"}), !dbg !37
+ tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !19, metadata !DIExpression()), !dbg !36
+ %1 = load i32, i32* @x4, align 4, !dbg !37
+ tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !20, metadata !DIExpression()), !dbg !37
store i32 %a, i32* @x4, align 4, !dbg !37
ret i32 %1, !dbg !37
}
define i32 @get5(i32 %a) nounwind optsize ssp {
- tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !27, metadata !{!"0x102"}), !dbg !38
- %1 = load i32* @x5, align 4, !dbg !39
- tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !28, metadata !{!"0x102"}), !dbg !39
+ tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !27, metadata !DIExpression()), !dbg !38
+ %1 = load i32, i32* @x5, align 4, !dbg !39
+ tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !28, metadata !DIExpression()), !dbg !39
store i32 %a, i32* @x5, align 4, !dbg !39
ret i32 %1, !dbg !39
}
@@ -73,43 +73,43 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!49}
-!0 = !{!"0x11\0012\00clang\001\00\000\00\001", !47, !48, !48, !40, !41, !48} ; [ DW_TAG_compile_unit ]
-!1 = !{!"0x2e\00get1\00get1\00\005\000\001\000\006\00256\001\005", !47, !2, !3, null, i32 (i32)* @get1, null, null, !42} ; [ DW_TAG_subprogram ] [line 5] [def] [get1]
-!2 = !{!"0x29", !47} ; [ DW_TAG_file_type ]
-!3 = !{!"0x15\00\000\000\000\000\000\000", !47, !2, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 1, file: !47, enums: !48, retainedTypes: !48, subprograms: !40, globals: !41, imports: !48)
+!1 = !DISubprogram(name: "get1", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !47, scope: !2, type: !3, function: i32 (i32)* @get1, variables: !42)
+!2 = !DIFile(filename: "ss3.c", directory: "/private/tmp")
+!3 = !DISubroutineType(types: !4)
!4 = !{!5}
-!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !0} ; [ DW_TAG_base_type ]
-!6 = !{!"0x2e\00get2\00get2\00\008\000\001\000\006\00256\001\008", !47, !2, !3, null, i32 (i32)* @get2, null, null, !43} ; [ DW_TAG_subprogram ] [line 8] [def] [get2]
-!7 = !{!"0x2e\00get3\00get3\00\0011\000\001\000\006\00256\001\0011", !47, !2, !3, null, i32 (i32)* @get3, null, null, !44} ; [ DW_TAG_subprogram ] [line 11] [def] [get3]
-!8 = !{!"0x2e\00get4\00get4\00\0014\000\001\000\006\00256\001\0014", !47, !2, !3, null, i32 (i32)* @get4, null, null, !45} ; [ DW_TAG_subprogram ] [line 14] [def] [get4]
-!9 = !{!"0x2e\00get5\00get5\00\0017\000\001\000\006\00256\001\0017", !47, !2, !3, null, i32 (i32)* @get5, null, null, !46} ; [ DW_TAG_subprogram ] [line 17] [def] [get5]
-!10 = !{!"0x101\00a\0016777221\000", !1, !2, !5} ; [ DW_TAG_arg_variable ]
-!11 = !{!"0x100\00b\005\000", !12, !2, !5} ; [ DW_TAG_auto_variable ]
-!12 = !{!"0xb\005\0019\000", !47, !1} ; [ DW_TAG_lexical_block ]
-!13 = !{!"0x101\00a\0016777224\000", !6, !2, !5} ; [ DW_TAG_arg_variable ]
-!14 = !{!"0x100\00b\008\000", !15, !2, !5} ; [ DW_TAG_auto_variable ]
-!15 = !{!"0xb\008\0017\001", !47, !6} ; [ DW_TAG_lexical_block ]
-!16 = !{!"0x101\00a\0016777227\000", !7, !2, !5} ; [ DW_TAG_arg_variable ]
-!17 = !{!"0x100\00b\0011\000", !18, !2, !5} ; [ DW_TAG_auto_variable ]
-!18 = !{!"0xb\0011\0019\002", !47, !7} ; [ DW_TAG_lexical_block ]
-!19 = !{!"0x101\00a\0016777230\000", !8, !2, !5} ; [ DW_TAG_arg_variable ]
-!20 = !{!"0x100\00b\0014\000", !21, !2, !5} ; [ DW_TAG_auto_variable ]
-!21 = !{!"0xb\0014\0019\003", !47, !8} ; [ DW_TAG_lexical_block ]
-!25 = !{!"0x34\00x1\00x1\00\004\001\001", !0, !2, !5, i32* @x1, null} ; [ DW_TAG_variable ]
-!26 = !{!"0x34\00x2\00x2\00\007\001\001", !0, !2, !5, i32* @x2, null} ; [ DW_TAG_variable ]
-!27 = !{!"0x101\00a\0016777233\000", !9, !2, !5} ; [ DW_TAG_arg_variable ]
-!28 = !{!"0x100\00b\0017\000", !29, !2, !5} ; [ DW_TAG_auto_variable ]
-!29 = !{!"0xb\0017\0019\004", !47, !9} ; [ DW_TAG_lexical_block ]
-!30 = !MDLocation(line: 5, column: 16, scope: !1)
-!31 = !MDLocation(line: 5, column: 32, scope: !12)
-!32 = !MDLocation(line: 8, column: 14, scope: !6)
-!33 = !MDLocation(line: 8, column: 29, scope: !15)
-!34 = !MDLocation(line: 11, column: 16, scope: !7)
-!35 = !MDLocation(line: 11, column: 32, scope: !18)
-!36 = !MDLocation(line: 14, column: 16, scope: !8)
-!37 = !MDLocation(line: 14, column: 32, scope: !21)
-!38 = !MDLocation(line: 17, column: 16, scope: !9)
-!39 = !MDLocation(line: 17, column: 32, scope: !29)
+!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!6 = !DISubprogram(name: "get2", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !47, scope: !2, type: !3, function: i32 (i32)* @get2, variables: !43)
+!7 = !DISubprogram(name: "get3", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !47, scope: !2, type: !3, function: i32 (i32)* @get3, variables: !44)
+!8 = !DISubprogram(name: "get4", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 14, file: !47, scope: !2, type: !3, function: i32 (i32)* @get4, variables: !45)
+!9 = !DISubprogram(name: "get5", line: 17, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 17, file: !47, scope: !2, type: !3, function: i32 (i32)* @get5, variables: !46)
+!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 5, arg: 1, scope: !1, file: !2, type: !5)
+!11 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 5, scope: !12, file: !2, type: !5)
+!12 = distinct !DILexicalBlock(line: 5, column: 19, file: !47, scope: !1)
+!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 8, arg: 1, scope: !6, file: !2, type: !5)
+!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 8, scope: !15, file: !2, type: !5)
+!15 = distinct !DILexicalBlock(line: 8, column: 17, file: !47, scope: !6)
+!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 11, arg: 1, scope: !7, file: !2, type: !5)
+!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 11, scope: !18, file: !2, type: !5)
+!18 = distinct !DILexicalBlock(line: 11, column: 19, file: !47, scope: !7)
+!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 14, arg: 1, scope: !8, file: !2, type: !5)
+!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 14, scope: !21, file: !2, type: !5)
+!21 = distinct !DILexicalBlock(line: 14, column: 19, file: !47, scope: !8)
+!25 = !DIGlobalVariable(name: "x1", line: 4, isLocal: true, isDefinition: true, scope: !0, file: !2, type: !5, variable: i32* @x1)
+!26 = !DIGlobalVariable(name: "x2", line: 7, isLocal: true, isDefinition: true, scope: !0, file: !2, type: !5, variable: i32* @x2)
+!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 17, arg: 1, scope: !9, file: !2, type: !5)
+!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 17, scope: !29, file: !2, type: !5)
+!29 = distinct !DILexicalBlock(line: 17, column: 19, file: !47, scope: !9)
+!30 = !DILocation(line: 5, column: 16, scope: !1)
+!31 = !DILocation(line: 5, column: 32, scope: !12)
+!32 = !DILocation(line: 8, column: 14, scope: !6)
+!33 = !DILocation(line: 8, column: 29, scope: !15)
+!34 = !DILocation(line: 11, column: 16, scope: !7)
+!35 = !DILocation(line: 11, column: 32, scope: !18)
+!36 = !DILocation(line: 14, column: 16, scope: !8)
+!37 = !DILocation(line: 14, column: 32, scope: !21)
+!38 = !DILocation(line: 17, column: 16, scope: !9)
+!39 = !DILocation(line: 17, column: 32, scope: !29)
!40 = !{!1, !6, !7, !8, !9}
!41 = !{!25, !26}
!42 = !{!10, !11}
@@ -117,6 +117,6 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!44 = !{!16, !17}
!45 = !{!19, !20}
!46 = !{!27, !28}
-!47 = !{!"ss3.c", !"/private/tmp"}
+!47 = !DIFile(filename: "ss3.c", directory: "/private/tmp")
!48 = !{}
-!49 = !{i32 1, !"Debug Info Version", i32 2}
+!49 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/ARM/2011-08-29-SchedCycle.ll b/test/CodeGen/ARM/2011-08-29-SchedCycle.ll
index be188ef630f2..fb8454479508 100644
--- a/test/CodeGen/ARM/2011-08-29-SchedCycle.ll
+++ b/test/CodeGen/ARM/2011-08-29-SchedCycle.ll
@@ -32,7 +32,7 @@
define void @t() nounwind {
entry:
- %tmp = load i64* undef, align 4
+ %tmp = load i64, i64* undef, align 4
%tmp5 = udiv i64 %tmp, 30
%tmp13 = and i64 %tmp5, 64739244643450880
%tmp16 = sub i64 0, %tmp13
diff --git a/test/CodeGen/ARM/2011-08-29-ldr_pre_imm.ll b/test/CodeGen/ARM/2011-08-29-ldr_pre_imm.ll
index 6647ed8106d5..d9b38b5e5735 100644
--- a/test/CodeGen/ARM/2011-08-29-ldr_pre_imm.ll
+++ b/test/CodeGen/ARM/2011-08-29-ldr_pre_imm.ll
@@ -17,13 +17,13 @@ bb25.lr.ph: ; preds = %entry
bb.i: ; preds = %bb5.i
%1 = shl nsw i32 %k_addr.0.i, 1
%.sum8.i = add i32 %1, -1
- %2 = getelementptr inbounds [256 x i32]* %heap, i32 0, i32 %.sum8.i
- %3 = load i32* %2, align 4
+ %2 = getelementptr inbounds [256 x i32], [256 x i32]* %heap, i32 0, i32 %.sum8.i
+ %3 = load i32, i32* %2, align 4
br i1 false, label %bb5.i, label %bb4.i
bb4.i: ; preds = %bb.i
%.sum10.i = add i32 %k_addr.0.i, -1
- %4 = getelementptr inbounds [256 x i32]* %heap, i32 0, i32 %.sum10.i
+ %4 = getelementptr inbounds [256 x i32], [256 x i32]* %heap, i32 0, i32 %.sum10.i
store i32 %3, i32* %4, align 4
br label %bb5.i
diff --git a/test/CodeGen/ARM/2011-09-09-OddVectorDivision.ll b/test/CodeGen/ARM/2011-09-09-OddVectorDivision.ll
index 8fe91028e0c2..2561af707d75 100644
--- a/test/CodeGen/ARM/2011-09-09-OddVectorDivision.ll
+++ b/test/CodeGen/ARM/2011-09-09-OddVectorDivision.ll
@@ -11,12 +11,12 @@ target triple = "armv7-none-linux-gnueabi"
@z2 = common global <4 x i16> zeroinitializer
define void @f() {
- %1 = load <3 x i16>* @x1
- %2 = load <3 x i16>* @y1
+ %1 = load <3 x i16>, <3 x i16>* @x1
+ %2 = load <3 x i16>, <3 x i16>* @y1
%3 = sdiv <3 x i16> %1, %2
store <3 x i16> %3, <3 x i16>* @z1
- %4 = load <4 x i16>* @x2
- %5 = load <4 x i16>* @y2
+ %4 = load <4 x i16>, <4 x i16>* @x2
+ %5 = load <4 x i16>, <4 x i16>* @y2
%6 = sdiv <4 x i16> %4, %5
store <4 x i16> %6, <4 x i16>* @z2
ret void
diff --git a/test/CodeGen/ARM/2011-09-28-CMovCombineBug.ll b/test/CodeGen/ARM/2011-09-28-CMovCombineBug.ll
index c6f4a93def13..53e3bed53831 100644
--- a/test/CodeGen/ARM/2011-09-28-CMovCombineBug.ll
+++ b/test/CodeGen/ARM/2011-09-28-CMovCombineBug.ll
@@ -4,7 +4,7 @@
; ARM target specific dag combine created a cycle in DAG.
define void @t() nounwind ssp {
- %1 = load i64* undef, align 4
+ %1 = load i64, i64* undef, align 4
%2 = shl i32 5, 0
%3 = zext i32 %2 to i64
%4 = and i64 %1, %3
diff --git a/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll b/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll
index 09db740b7f76..86596d6282fd 100644
--- a/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll
+++ b/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll
@@ -4,16 +4,16 @@ target triple = "armv6-none-linux-gnueabi"
define void @sample_test(i8* %.T0348, i16* nocapture %sourceA, i16* nocapture %destValues) {
L.entry:
- %0 = call i32 (...)* @get_index(i8* %.T0348, i32 0)
+ %0 = call i32 (...) @get_index(i8* %.T0348, i32 0)
%1 = bitcast i16* %destValues to i8*
%2 = mul i32 %0, 6
- %3 = getelementptr i8* %1, i32 %2
+ %3 = getelementptr i8, i8* %1, i32 %2
%4 = bitcast i8* %3 to <3 x i16>*
- %5 = load <3 x i16>* %4, align 1
+ %5 = load <3 x i16>, <3 x i16>* %4, align 1
%6 = bitcast i16* %sourceA to i8*
- %7 = getelementptr i8* %6, i32 %2
+ %7 = getelementptr i8, i8* %6, i32 %2
%8 = bitcast i8* %7 to <3 x i16>*
- %9 = load <3 x i16>* %8, align 1
+ %9 = load <3 x i16>, <3 x i16>* %8, align 1
%10 = or <3 x i16> %9, %5
store <3 x i16> %10, <3 x i16>* %4, align 1
ret void
diff --git a/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll b/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
index a707a92c9fa0..e7059716c49b 100644
--- a/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
+++ b/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
@@ -7,8 +7,8 @@
define void @test_neon_vector_add_2xi8() nounwind {
; CHECK-LABEL: test_neon_vector_add_2xi8:
- %1 = load <2 x i8>* @i8_src1
- %2 = load <2 x i8>* @i8_src2
+ %1 = load <2 x i8>, <2 x i8>* @i8_src1
+ %2 = load <2 x i8>, <2 x i8>* @i8_src2
%3 = add <2 x i8> %1, %2
store <2 x i8> %3, <2 x i8>* @i8_res
ret void
@@ -16,8 +16,8 @@ define void @test_neon_vector_add_2xi8() nounwind {
define void @test_neon_ld_st_volatile_with_ashr_2xi8() {
; CHECK-LABEL: test_neon_ld_st_volatile_with_ashr_2xi8:
- %1 = load volatile <2 x i8>* @i8_src1
- %2 = load volatile <2 x i8>* @i8_src2
+ %1 = load volatile <2 x i8>, <2 x i8>* @i8_src1
+ %2 = load volatile <2 x i8>, <2 x i8>* @i8_src2
%3 = ashr <2 x i8> %1, %2
store volatile <2 x i8> %3, <2 x i8>* @i8_res
ret void
diff --git a/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll b/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
index c1554d848c44..6dc9d4b7025d 100644
--- a/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
+++ b/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
@@ -8,7 +8,7 @@ declare <2 x i16> @foo_v2i16(<2 x i16>) nounwind
define void @test_neon_call_return_v2i16() {
; CHECK-LABEL: test_neon_call_return_v2i16:
- %1 = load <2 x i16>* @src1_v2i16
+ %1 = load <2 x i16>, <2 x i16>* @src1_v2i16
%2 = call <2 x i16> @foo_v2i16(<2 x i16> %1) nounwind
store <2 x i16> %2, <2 x i16>* @res_v2i16
ret void
diff --git a/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll b/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll
index c50461a42d8b..1da93bdd7c94 100644
--- a/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll
+++ b/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll
@@ -4,7 +4,7 @@ define <2 x i32> @test1(<2 x double>* %A) {
; CHECK: test1
; CHECK: vcvt.s32.f64
; CHECK: vcvt.s32.f64
- %tmp1 = load <2 x double>* %A
+ %tmp1 = load <2 x double>, <2 x double>* %A
%tmp2 = fptosi <2 x double> %tmp1 to <2 x i32>
ret <2 x i32> %tmp2
}
@@ -13,7 +13,7 @@ define <2 x i32> @test2(<2 x double>* %A) {
; CHECK: test2
; CHECK: vcvt.u32.f64
; CHECK: vcvt.u32.f64
- %tmp1 = load <2 x double>* %A
+ %tmp1 = load <2 x double>, <2 x double>* %A
%tmp2 = fptoui <2 x double> %tmp1 to <2 x i32>
ret <2 x i32> %tmp2
}
@@ -22,7 +22,7 @@ define <2 x double> @test3(<2 x i32>* %A) {
; CHECK: test3
; CHECK: vcvt.f64.s32
; CHECK: vcvt.f64.s32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = sitofp <2 x i32> %tmp1 to <2 x double>
ret <2 x double> %tmp2
}
@@ -31,7 +31,7 @@ define <2 x double> @test4(<2 x i32>* %A) {
; CHECK: test4
; CHECK: vcvt.f64.u32
; CHECK: vcvt.f64.u32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = uitofp <2 x i32> %tmp1 to <2 x double>
ret <2 x double> %tmp2
}
diff --git a/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll b/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll
index 52aa0bfaa545..7f4057143a07 100644
--- a/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll
+++ b/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll
@@ -18,38 +18,38 @@ target triple = "thumbv7-apple-ios"
define void @Compute_Axis_Rotation_Transform(%struct.Transform_Struct.0.11.12.17.43.46.56.58.60* nocapture %transform, double* nocapture %V1, double %angle) nounwind {
entry:
store double 1.000000e+00, double* null, align 4
- %arrayidx5.1.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 0, i32 1
+ %arrayidx5.1.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60, %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 0, i32 1
store double 0.000000e+00, double* %arrayidx5.1.i, align 4
- %arrayidx5.2.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 0, i32 2
+ %arrayidx5.2.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60, %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 0, i32 2
store double 0.000000e+00, double* %arrayidx5.2.i, align 4
- %arrayidx5.114.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 1, i32 0
+ %arrayidx5.114.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60, %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 1, i32 0
store double 0.000000e+00, double* %arrayidx5.114.i, align 4
- %arrayidx5.1.1.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 1, i32 1
+ %arrayidx5.1.1.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60, %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 1, i32 1
store double 1.000000e+00, double* %arrayidx5.1.1.i, align 4
store double 0.000000e+00, double* null, align 4
store double 1.000000e+00, double* null, align 4
store double 0.000000e+00, double* null, align 4
%call = tail call double @cos(double %angle) nounwind readnone
%call1 = tail call double @sin(double %angle) nounwind readnone
- %0 = load double* %V1, align 4
- %arrayidx2 = getelementptr inbounds double* %V1, i32 1
- %1 = load double* %arrayidx2, align 4
+ %0 = load double, double* %V1, align 4
+ %arrayidx2 = getelementptr inbounds double, double* %V1, i32 1
+ %1 = load double, double* %arrayidx2, align 4
%mul = fmul double %0, %1
%sub = fsub double 1.000000e+00, %call
%mul3 = fmul double %mul, %sub
- %2 = load double* undef, align 4
+ %2 = load double, double* undef, align 4
%mul5 = fmul double %2, %call1
%add = fadd double %mul3, %mul5
store double %add, double* %arrayidx5.1.i, align 4
- %3 = load double* %V1, align 4
+ %3 = load double, double* %V1, align 4
%mul11 = fmul double %3, undef
%mul13 = fmul double %mul11, %sub
- %4 = load double* %arrayidx2, align 4
+ %4 = load double, double* %arrayidx2, align 4
%mul15 = fmul double %4, %call1
%sub16 = fsub double %mul13, %mul15
store double %sub16, double* %arrayidx5.2.i, align 4
- %5 = load double* %V1, align 4
- %6 = load double* %arrayidx2, align 4
+ %5 = load double, double* %V1, align 4
+ %6 = load double, double* %arrayidx2, align 4
%mul22 = fmul double %5, %6
%mul24 = fmul double %mul22, %sub
%sub27 = fsub double %mul24, undef
diff --git a/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll b/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
index bc496b99f4a6..d820d688fde9 100644
--- a/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
+++ b/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
@@ -15,14 +15,14 @@ define hidden void @foo() {
; CHECK: ldr.w
; CHECK-NOT: ldm
entry:
- %tmp13 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 0), align 1
- %tmp15 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 1), align 1
- %tmp17 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 2), align 1
- %tmp19 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 3), align 1
- %tmp = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 0), align 1
- %tmp3 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 1), align 1
- %tmp4 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 2), align 1
- %tmp5 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 3), align 1
+ %tmp13 = load i32, i32* getelementptr inbounds (%struct.InformationBlock, %struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 0), align 1
+ %tmp15 = load i32, i32* getelementptr inbounds (%struct.InformationBlock, %struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 1), align 1
+ %tmp17 = load i32, i32* getelementptr inbounds (%struct.InformationBlock, %struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 2), align 1
+ %tmp19 = load i32, i32* getelementptr inbounds (%struct.InformationBlock, %struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 3), align 1
+ %tmp = load i32, i32* getelementptr inbounds (%struct.InformationBlock, %struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 0), align 1
+ %tmp3 = load i32, i32* getelementptr inbounds (%struct.InformationBlock, %struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 1), align 1
+ %tmp4 = load i32, i32* getelementptr inbounds (%struct.InformationBlock, %struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 2), align 1
+ %tmp5 = load i32, i32* getelementptr inbounds (%struct.InformationBlock, %struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 3), align 1
%insert21 = insertvalue [4 x i32] undef, i32 %tmp13, 0
%insert23 = insertvalue [4 x i32] %insert21, i32 %tmp15, 1
%insert25 = insertvalue [4 x i32] %insert23, i32 %tmp17, 2
diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
index 86b58c8186b0..404aca13cdba 100644
--- a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
+++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -16,7 +16,7 @@ define void @test_sqrt(<4 x float>* %X) nounwind {
; CHECK: vst1.64 {{.*}}
L.entry:
- %0 = load <4 x float>* @A, align 16
+ %0 = load <4 x float>, <4 x float>* @A, align 16
%1 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %0)
store <4 x float> %1, <4 x float>* %X, align 16
ret void
@@ -48,7 +48,7 @@ define void @test_cos(<4 x float>* %X) nounwind {
; CHECK: vst1.64
L.entry:
- %0 = load <4 x float>* @A, align 16
+ %0 = load <4 x float>, <4 x float>* @A, align 16
%1 = call <4 x float> @llvm.cos.v4f32(<4 x float> %0)
store <4 x float> %1, <4 x float>* %X, align 16
ret void
@@ -79,7 +79,7 @@ define void @test_exp(<4 x float>* %X) nounwind {
; CHECK: vst1.64
L.entry:
- %0 = load <4 x float>* @A, align 16
+ %0 = load <4 x float>, <4 x float>* @A, align 16
%1 = call <4 x float> @llvm.exp.v4f32(<4 x float> %0)
store <4 x float> %1, <4 x float>* %X, align 16
ret void
@@ -110,7 +110,7 @@ define void @test_exp2(<4 x float>* %X) nounwind {
; CHECK: vst1.64
L.entry:
- %0 = load <4 x float>* @A, align 16
+ %0 = load <4 x float>, <4 x float>* @A, align 16
%1 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %0)
store <4 x float> %1, <4 x float>* %X, align 16
ret void
@@ -141,7 +141,7 @@ define void @test_log10(<4 x float>* %X) nounwind {
; CHECK: vst1.64
L.entry:
- %0 = load <4 x float>* @A, align 16
+ %0 = load <4 x float>, <4 x float>* @A, align 16
%1 = call <4 x float> @llvm.log10.v4f32(<4 x float> %0)
store <4 x float> %1, <4 x float>* %X, align 16
ret void
@@ -172,7 +172,7 @@ define void @test_log(<4 x float>* %X) nounwind {
; CHECK: vst1.64
L.entry:
- %0 = load <4 x float>* @A, align 16
+ %0 = load <4 x float>, <4 x float>* @A, align 16
%1 = call <4 x float> @llvm.log.v4f32(<4 x float> %0)
store <4 x float> %1, <4 x float>* %X, align 16
ret void
@@ -203,7 +203,7 @@ define void @test_log2(<4 x float>* %X) nounwind {
; CHECK: vst1.64
L.entry:
- %0 = load <4 x float>* @A, align 16
+ %0 = load <4 x float>, <4 x float>* @A, align 16
%1 = call <4 x float> @llvm.log2.v4f32(<4 x float> %0)
store <4 x float> %1, <4 x float>* %X, align 16
ret void
@@ -236,7 +236,7 @@ define void @test_pow(<4 x float>* %X) nounwind {
L.entry:
- %0 = load <4 x float>* @A, align 16
+ %0 = load <4 x float>, <4 x float>* @A, align 16
%1 = call <4 x float> @llvm.pow.v4f32(<4 x float> %0, <4 x float> <float 2., float 2., float 2., float 2.>)
store <4 x float> %1, <4 x float>* %X, align 16
@@ -259,7 +259,7 @@ define void @test_powi(<4 x float>* %X) nounwind {
L.entry:
- %0 = load <4 x float>* @A, align 16
+ %0 = load <4 x float>, <4 x float>* @A, align 16
%1 = call <4 x float> @llvm.powi.v4f32(<4 x float> %0, i32 2)
store <4 x float> %1, <4 x float>* %X, align 16
@@ -292,7 +292,7 @@ define void @test_sin(<4 x float>* %X) nounwind {
; CHECK: vst1.64
L.entry:
- %0 = load <4 x float>* @A, align 16
+ %0 = load <4 x float>, <4 x float>* @A, align 16
%1 = call <4 x float> @llvm.sin.v4f32(<4 x float> %0)
store <4 x float> %1, <4 x float>* %X, align 16
ret void
@@ -323,7 +323,7 @@ define void @test_floor(<4 x float>* %X) nounwind {
; CHECK: vst1.64
L.entry:
- %0 = load <4 x float>* @A, align 16
+ %0 = load <4 x float>, <4 x float>* @A, align 16
%1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %0)
store <4 x float> %1, <4 x float>* %X, align 16
ret void
diff --git a/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll b/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll
index 0c90f4cf949a..0d324404d7be 100644
--- a/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll
+++ b/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll
@@ -8,9 +8,9 @@ target triple = "thumbv7-apple-darwin10"
@x2 = internal global i64 12
define i64 @f() {
- %ax = load i32* @x1
+ %ax = load i32, i32* @x1
%a = zext i32 %ax to i64
- %b = load i64* @x2
+ %b = load i64, i64* @x2
%c = add i64 %a, %b
ret i64 %c
}
diff --git a/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
index 9334bf36d805..88019f450e36 100644
--- a/test/CodeGen/ARM/2011-12-14-machine-sink.ll
+++ b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
target triple = "thumbv7-apple-ios4.0.0"
; STATS-NOT: machine-sink
-define i32 @foo(i32 %h) nounwind readonly ssp {
+define i32 @foo(i32 %h, i32 %arg1) nounwind readonly ssp {
entry:
br label %for.cond
@@ -14,33 +14,26 @@ for.cond: ; preds = %for.body, %entry
br i1 %cmp, label %for.body, label %if.end299
for.body: ; preds = %for.cond
- %v.5 = select i1 undef, i32 undef, i32 0
- %0 = load i8* undef, align 1
+ %cond0 = icmp ne i32 %arg1, 42
+ %v.5 = select i1 %cond0, i32 undef, i32 0
+ %0 = load i8, i8* undef, align 1
%conv88 = zext i8 %0 to i32
%sub89 = sub nsw i32 0, %conv88
- %v.8 = select i1 undef, i32 undef, i32 %sub89
- %1 = load i8* null, align 1
+ %cond1 = icmp ne i32 %arg1, 23
+ %v.8 = select i1 %cond1, i32 undef, i32 %sub89
+ %1 = load i8, i8* null, align 1
%conv108 = zext i8 %1 to i32
- %2 = load i8* undef, align 1
+ %2 = load i8, i8* undef, align 1
%conv110 = zext i8 %2 to i32
%sub111 = sub nsw i32 %conv108, %conv110
%cmp112 = icmp slt i32 %sub111, 0
%sub115 = sub nsw i32 0, %sub111
- %v.10 = select i1 %cmp112, i32 %sub115, i32 %sub111
- %add62 = add i32 0, %v.5
- %add73 = add i32 %add62, 0
- %add84 = add i32 %add73, 0
- %add95 = add i32 %add84, %v.8
- %add106 = add i32 %add95, 0
- %add117 = add i32 %add106, %v.10
- %add128 = add i32 %add117, 0
- %add139 = add i32 %add128, 0
- %add150 = add i32 %add139, 0
- %add161 = add i32 %add150, 0
- %add172 = add i32 %add161, 0
+ %abs = select i1 %cmp112, i32 %sub115, i32 %sub111
+ %add95 = add i32 %v.5, %v.8
+ %add117 = add i32 %add95, %abs
br i1 undef, label %for.cond, label %if.end299
if.end299: ; preds = %for.body, %for.cond
- %s.10 = phi i32 [ %add172, %for.body ], [ 0, %for.cond ]
+ %s.10 = phi i32 [ %add117, %for.body ], [ 0, %for.cond ]
ret i32 %s.10
}
diff --git a/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
index ddb76326947c..40d1f628aaae 100644
--- a/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
+++ b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
@@ -22,13 +22,13 @@ bb:
store i32 %b, i32* %tmp1, align 4
store i8* %d, i8** %tmp2, align 4
store i1 false, i1* %tmp3
- %tmp7 = load i8** %c
+ %tmp7 = load i8*, i8** %c
%tmp10 = invoke %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* %tmp7, i8* %d, %0* null)
to label %bb11 unwind label %bb15
bb11: ; preds = %bb
store %0* %tmp10, %0** %myException, align 4
- %tmp12 = load %0** %myException, align 4
+ %tmp12 = load %0*, %0** %myException, align 4
%tmp13 = bitcast %0* %tmp12 to i8*
invoke void @objc_exception_throw(i8* %tmp13) noreturn
to label %bb14 unwind label %bb15
diff --git a/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
index 0f1c452b8678..3f827f8e702f 100644
--- a/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
+++ b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
@@ -18,7 +18,7 @@ bb3: ; preds = %bb4, %bb2
br i1 %tmp, label %bb4, label %bb67
bb4: ; preds = %bb3
- %tmp5 = load <4 x i32>* undef, align 16
+ %tmp5 = load <4 x i32>, <4 x i32>* undef, align 16
%tmp6 = and <4 x i32> %tmp5, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
%tmp7 = or <4 x i32> %tmp6, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
%tmp8 = bitcast <4 x i32> %tmp7 to <4 x float>
@@ -41,9 +41,9 @@ bb4: ; preds = %bb3
%tmp24 = trunc i128 %tmp23 to i64
%tmp25 = insertvalue [2 x i64] undef, i64 %tmp24, 0
%tmp26 = insertvalue [2 x i64] %tmp25, i64 0, 1
- %tmp27 = load float* undef, align 4
+ %tmp27 = load float, float* undef, align 4
%tmp28 = insertelement <4 x float> undef, float %tmp27, i32 3
- %tmp29 = load <4 x i32>* undef, align 16
+ %tmp29 = load <4 x i32>, <4 x i32>* undef, align 16
%tmp30 = and <4 x i32> %tmp29, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
%tmp31 = or <4 x i32> %tmp30, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
%tmp32 = bitcast <4 x i32> %tmp31 to <4 x float>
@@ -52,10 +52,10 @@ bb4: ; preds = %bb3
%tmp35 = fmul <4 x float> %tmp34, undef
%tmp36 = fmul <4 x float> %tmp35, undef
%tmp37 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind
- %tmp38 = load float* undef, align 4
+ %tmp38 = load float, float* undef, align 4
%tmp39 = insertelement <2 x float> undef, float %tmp38, i32 0
%tmp40 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind
- %tmp41 = load float* undef, align 4
+ %tmp41 = load float, float* undef, align 4
%tmp42 = insertelement <4 x float> undef, float %tmp41, i32 3
%tmp43 = shufflevector <2 x float> %tmp39, <2 x float> undef, <4 x i32> zeroinitializer
%tmp44 = fmul <4 x float> %tmp33, %tmp43
@@ -64,10 +64,10 @@ bb4: ; preds = %bb3
%tmp47 = fmul <4 x float> %tmp46, %tmp36
%tmp48 = fadd <4 x float> undef, %tmp47
%tmp49 = call arm_aapcs_vfpcc i8* undef(i8* undef) nounwind
- %tmp50 = load float* undef, align 4
+ %tmp50 = load float, float* undef, align 4
%tmp51 = insertelement <4 x float> undef, float %tmp50, i32 3
%tmp52 = call arm_aapcs_vfpcc float* null(i8* undef) nounwind
- %tmp54 = load float* %tmp52, align 4
+ %tmp54 = load float, float* %tmp52, align 4
%tmp55 = insertelement <4 x float> undef, float %tmp54, i32 3
%tmp56 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp22
%tmp57 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp56, <4 x float> %tmp55) nounwind
diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
index 61623ec1b6a4..b70b7f6f3b2e 100644
--- a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
+++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
@@ -7,7 +7,7 @@ target triple = "armv7-none-linux-eabi"
; This test case is exercising REG_SEQUENCE, and chains of REG_SEQUENCE.
define arm_aapcs_vfpcc void @foo(i8* nocapture %arg, i8* %arg1) nounwind align 2 {
bb:
- %tmp = load <2 x float>* undef, align 8
+ %tmp = load <2 x float>, <2 x float>* undef, align 8
%tmp2 = extractelement <2 x float> %tmp, i32 0
%tmp3 = insertelement <4 x float> undef, float %tmp2, i32 0
%tmp4 = insertelement <4 x float> %tmp3, float 0.000000e+00, i32 1
diff --git a/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
index a9e2ebb7fe12..2484f0d42ed3 100644
--- a/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
+++ b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
@@ -56,9 +56,9 @@ bb3: ; preds = %bb2
%tmp39 = shufflevector <2 x i64> %tmp38, <2 x i64> undef, <1 x i32> zeroinitializer
%tmp40 = bitcast <1 x i64> %tmp39 to <2 x float>
%tmp41 = shufflevector <2 x float> %tmp40, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- %tmp42 = load <4 x float>* null, align 16
+ %tmp42 = load <4 x float>, <4 x float>* null, align 16
%tmp43 = fmul <4 x float> %tmp42, %tmp41
- %tmp44 = load <4 x float>* undef, align 16
+ %tmp44 = load <4 x float>, <4 x float>* undef, align 16
%tmp45 = fadd <4 x float> undef, %tmp43
%tmp46 = fadd <4 x float> undef, %tmp45
%tmp47 = bitcast <4 x float> %tmp36 to <2 x i64>
@@ -76,7 +76,7 @@ bb3: ; preds = %bb2
%tmp59 = fmul <4 x float> undef, %tmp58
%tmp60 = fadd <4 x float> %tmp59, undef
%tmp61 = fadd <4 x float> %tmp60, zeroinitializer
- %tmp62 = load void (i8*, i8*)** undef, align 4
+ %tmp62 = load void (i8*, i8*)*, void (i8*, i8*)** undef, align 4
call arm_aapcs_vfpcc void %tmp62(i8* sret undef, i8* undef) nounwind
%tmp63 = bitcast <4 x float> %tmp46 to i128
%tmp64 = bitcast <4 x float> %tmp54 to i128
@@ -96,7 +96,7 @@ bb3: ; preds = %bb2
call arm_aapcs_vfpcc void @bar(i8* sret null, [8 x i64] %tmp77) nounwind
%tmp78 = call arm_aapcs_vfpcc i8* null(i8* null) nounwind
%tmp79 = bitcast i8* %tmp78 to i512*
- %tmp80 = load i512* %tmp79, align 16
+ %tmp80 = load i512, i512* %tmp79, align 16
%tmp81 = lshr i512 %tmp80, 128
%tmp82 = trunc i512 %tmp80 to i128
%tmp83 = trunc i512 %tmp81 to i128
diff --git a/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll b/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
index 6c7aaad7c692..b0411384b96a 100644
--- a/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
+++ b/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
@@ -10,7 +10,7 @@ target triple = "armv7-none-linux-gnueabi"
@foo = external global %0, align 16
define arm_aapcs_vfpcc void @bar(float, i1 zeroext, i1 zeroext) nounwind {
- %4 = load <4 x float>* getelementptr inbounds (%0* @foo, i32 0, i32 0), align 16
+ %4 = load <4 x float>, <4 x float>* getelementptr inbounds (%0, %0* @foo, i32 0, i32 0), align 16
%5 = extractelement <4 x float> %4, i32 0
%6 = extractelement <4 x float> %4, i32 1
%7 = extractelement <4 x float> %4, i32 2
diff --git a/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll b/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
index 6206cd74d584..6fb760c4bcc7 100644
--- a/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
+++ b/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
@@ -7,7 +7,7 @@
define void @test_hi_short3(<3 x i16> * nocapture %srcA, <2 x i16> * nocapture %dst) nounwind {
entry:
; CHECK: vst1.32
- %0 = load <3 x i16> * %srcA, align 8
+ %0 = load <3 x i16> , <3 x i16> * %srcA, align 8
%1 = shufflevector <3 x i16> %0, <3 x i16> undef, <2 x i32> <i32 2, i32 undef>
store <2 x i16> %1, <2 x i16> * %dst, align 4
ret void
diff --git a/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
index a288015d6016..6f92613fa1ff 100644
--- a/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
+++ b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
@@ -27,13 +27,13 @@
define i32 @f1(i32* nocapture %p1, i32* nocapture %p2) nounwind {
entry:
store volatile i32 65540, i32* %p1, align 4
- %0 = load volatile i32* %p2, align 4
+ %0 = load volatile i32, i32* %p2, align 4
ret i32 %0
}
define i32 @f2(i32* nocapture %p1, i32* nocapture %p2) nounwind {
entry:
store i32 65540, i32* %p1, align 4
- %0 = load i32* %p2, align 4
+ %0 = load i32, i32* %p2, align 4
ret i32 %0
}
diff --git a/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll b/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
index 70e307934559..6e0b828ad24f 100644
--- a/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
+++ b/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
@@ -129,7 +129,7 @@ define arm_aapcs_vfpcc void @foo(float, i1 zeroext, i1 zeroext) nounwind uwtable
%45 = fmul <4 x float> undef, undef
%46 = fmul <4 x float> %45, %43
%47 = fmul <4 x float> undef, %44
- %48 = load <4 x float>* undef, align 8
+ %48 = load <4 x float>, <4 x float>* undef, align 8
%49 = bitcast <4 x float> %48 to <2 x i64>
%50 = shufflevector <2 x i64> %49, <2 x i64> undef, <1 x i32> <i32 1>
%51 = bitcast <1 x i64> %50 to <2 x float>
diff --git a/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll b/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll
index bdcd1b6ad4b7..576dff4d001e 100644
--- a/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll
+++ b/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll
@@ -6,7 +6,7 @@ target triple = "armv7-none-linux-gnueabi"
define void @test_hi_char8() noinline {
entry:
- %0 = load <4 x i8>* undef, align 1
+ %0 = load <4 x i8>, <4 x i8>* undef, align 1
store <4 x i8> %0, <4 x i8>* null, align 4
ret void
}
diff --git a/test/CodeGen/ARM/2012-08-09-neon-extload.ll b/test/CodeGen/ARM/2012-08-09-neon-extload.ll
index a7108253cb62..285a431a6ecf 100644
--- a/test/CodeGen/ARM/2012-08-09-neon-extload.ll
+++ b/test/CodeGen/ARM/2012-08-09-neon-extload.ll
@@ -14,7 +14,7 @@
define void @test_v2i8tov2i32() {
; CHECK-LABEL: test_v2i8tov2i32:
- %i8val = load <2 x i8>* @var_v2i8
+ %i8val = load <2 x i8>, <2 x i8>* @var_v2i8
%i32val = sext <2 x i8> %i8val to <2 x i32>
store <2 x i32> %i32val, <2 x i32>* @var_v2i32
@@ -28,7 +28,7 @@ define void @test_v2i8tov2i32() {
define void @test_v2i8tov2i64() {
; CHECK-LABEL: test_v2i8tov2i64:
- %i8val = load <2 x i8>* @var_v2i8
+ %i8val = load <2 x i8>, <2 x i8>* @var_v2i8
%i64val = sext <2 x i8> %i8val to <2 x i64>
store <2 x i64> %i64val, <2 x i64>* @var_v2i64
@@ -46,7 +46,7 @@ define void @test_v2i8tov2i64() {
define void @test_v4i8tov4i16() {
; CHECK-LABEL: test_v4i8tov4i16:
- %i8val = load <4 x i8>* @var_v4i8
+ %i8val = load <4 x i8>, <4 x i8>* @var_v4i8
%i16val = sext <4 x i8> %i8val to <4 x i16>
store <4 x i16> %i16val, <4 x i16>* @var_v4i16
@@ -61,7 +61,7 @@ define void @test_v4i8tov4i16() {
define void @test_v4i8tov4i32() {
; CHECK-LABEL: test_v4i8tov4i32:
- %i8val = load <4 x i8>* @var_v4i8
+ %i8val = load <4 x i8>, <4 x i8>* @var_v4i8
%i16val = sext <4 x i8> %i8val to <4 x i32>
store <4 x i32> %i16val, <4 x i32>* @var_v4i32
@@ -75,7 +75,7 @@ define void @test_v4i8tov4i32() {
define void @test_v2i16tov2i32() {
; CHECK-LABEL: test_v2i16tov2i32:
- %i16val = load <2 x i16>* @var_v2i16
+ %i16val = load <2 x i16>, <2 x i16>* @var_v2i16
%i32val = sext <2 x i16> %i16val to <2 x i32>
store <2 x i32> %i32val, <2 x i32>* @var_v2i32
@@ -90,7 +90,7 @@ define void @test_v2i16tov2i32() {
define void @test_v2i16tov2i64() {
; CHECK-LABEL: test_v2i16tov2i64:
- %i16val = load <2 x i16>* @var_v2i16
+ %i16val = load <2 x i16>, <2 x i16>* @var_v2i16
%i64val = sext <2 x i16> %i16val to <2 x i64>
store <2 x i64> %i64val, <2 x i64>* @var_v2i64
diff --git a/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll b/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
index e8d4fb22a59f..3a851d68f0a4 100644
--- a/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
+++ b/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
@@ -15,7 +15,7 @@
define void @sextload_v4i8_c(<4 x i8>* %v) nounwind {
;CHECK-LABEL: sextload_v4i8_c:
entry:
- %0 = load <4 x i8>* %v, align 8
+ %0 = load <4 x i8>, <4 x i8>* %v, align 8
%v0 = sext <4 x i8> %0 to <4 x i32>
;CHECK: vmull
%v1 = mul <4 x i32> %v0, <i32 3, i32 3, i32 3, i32 3>
@@ -28,7 +28,7 @@ entry:
define void @sextload_v2i8_c(<2 x i8>* %v) nounwind {
;CHECK-LABEL: sextload_v2i8_c:
entry:
- %0 = load <2 x i8>* %v, align 8
+ %0 = load <2 x i8>, <2 x i8>* %v, align 8
%v0 = sext <2 x i8> %0 to <2 x i64>
;CHECK: vmull
%v1 = mul <2 x i64> %v0, <i64 3, i64 3>
@@ -41,7 +41,7 @@ entry:
define void @sextload_v2i16_c(<2 x i16>* %v) nounwind {
;CHECK-LABEL: sextload_v2i16_c:
entry:
- %0 = load <2 x i16>* %v, align 8
+ %0 = load <2 x i16>, <2 x i16>* %v, align 8
%v0 = sext <2 x i16> %0 to <2 x i64>
;CHECK: vmull
%v1 = mul <2 x i64> %v0, <i64 3, i64 3>
@@ -56,10 +56,10 @@ entry:
define void @sextload_v4i8_v(<4 x i8>* %v, <4 x i8>* %p) nounwind {
;CHECK-LABEL: sextload_v4i8_v:
entry:
- %0 = load <4 x i8>* %v, align 8
+ %0 = load <4 x i8>, <4 x i8>* %v, align 8
%v0 = sext <4 x i8> %0 to <4 x i32>
- %1 = load <4 x i8>* %p, align 8
+ %1 = load <4 x i8>, <4 x i8>* %p, align 8
%v2 = sext <4 x i8> %1 to <4 x i32>
;CHECK: vmull
%v1 = mul <4 x i32> %v0, %v2
@@ -72,10 +72,10 @@ entry:
define void @sextload_v2i8_v(<2 x i8>* %v, <2 x i8>* %p) nounwind {
;CHECK-LABEL: sextload_v2i8_v:
entry:
- %0 = load <2 x i8>* %v, align 8
+ %0 = load <2 x i8>, <2 x i8>* %v, align 8
%v0 = sext <2 x i8> %0 to <2 x i64>
- %1 = load <2 x i8>* %p, align 8
+ %1 = load <2 x i8>, <2 x i8>* %p, align 8
%v2 = sext <2 x i8> %1 to <2 x i64>
;CHECK: vmull
%v1 = mul <2 x i64> %v0, %v2
@@ -88,10 +88,10 @@ entry:
define void @sextload_v2i16_v(<2 x i16>* %v, <2 x i16>* %p) nounwind {
;CHECK-LABEL: sextload_v2i16_v:
entry:
- %0 = load <2 x i16>* %v, align 8
+ %0 = load <2 x i16>, <2 x i16>* %v, align 8
%v0 = sext <2 x i16> %0 to <2 x i64>
- %1 = load <2 x i16>* %p, align 8
+ %1 = load <2 x i16>, <2 x i16>* %p, align 8
%v2 = sext <2 x i16> %1 to <2 x i64>
;CHECK: vmull
%v1 = mul <2 x i64> %v0, %v2
@@ -106,10 +106,10 @@ entry:
define void @sextload_v4i8_vs(<4 x i8>* %v, <4 x i16>* %p) nounwind {
;CHECK-LABEL: sextload_v4i8_vs:
entry:
- %0 = load <4 x i8>* %v, align 8
+ %0 = load <4 x i8>, <4 x i8>* %v, align 8
%v0 = sext <4 x i8> %0 to <4 x i32>
- %1 = load <4 x i16>* %p, align 8
+ %1 = load <4 x i16>, <4 x i16>* %p, align 8
%v2 = sext <4 x i16> %1 to <4 x i32>
;CHECK: vmull
%v1 = mul <4 x i32> %v0, %v2
@@ -122,10 +122,10 @@ entry:
define void @sextload_v2i8_vs(<2 x i8>* %v, <2 x i16>* %p) nounwind {
;CHECK-LABEL: sextload_v2i8_vs:
entry:
- %0 = load <2 x i8>* %v, align 8
+ %0 = load <2 x i8>, <2 x i8>* %v, align 8
%v0 = sext <2 x i8> %0 to <2 x i64>
- %1 = load <2 x i16>* %p, align 8
+ %1 = load <2 x i16>, <2 x i16>* %p, align 8
%v2 = sext <2 x i16> %1 to <2 x i64>
;CHECK: vmull
%v1 = mul <2 x i64> %v0, %v2
@@ -138,10 +138,10 @@ entry:
define void @sextload_v2i16_vs(<2 x i16>* %v, <2 x i32>* %p) nounwind {
;CHECK-LABEL: sextload_v2i16_vs:
entry:
- %0 = load <2 x i16>* %v, align 8
+ %0 = load <2 x i16>, <2 x i16>* %v, align 8
%v0 = sext <2 x i16> %0 to <2 x i64>
- %1 = load <2 x i32>* %p, align 8
+ %1 = load <2 x i32>, <2 x i32>* %p, align 8
%v2 = sext <2 x i32> %1 to <2 x i64>
;CHECK: vmull
%v1 = mul <2 x i64> %v0, %v2
diff --git a/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll b/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll
index ec7f72d7c2e8..545bfc73c590 100644
--- a/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll
+++ b/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll
@@ -21,17 +21,17 @@ define void @findEdges(i8*) nounwind ssp {
%6 = phi i8* [ %19, %5 ], [ %0, %1 ]
%7 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* null, i32 1)
%8 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %7, 0
- %9 = getelementptr inbounds i8* null, i32 3
+ %9 = getelementptr inbounds i8, i8* null, i32 3
%10 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %9, i32 1)
%11 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %10, 2
%12 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %6, i32 1)
%13 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %12, 0
%14 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %12, 1
- %15 = getelementptr inbounds i8* %6, i32 3
+ %15 = getelementptr inbounds i8, i8* %6, i32 3
%16 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %15, i32 1)
%17 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %16, 1
%18 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %16, 2
- %19 = getelementptr inbounds i8* %6, i32 48
+ %19 = getelementptr inbounds i8, i8* %6, i32 48
%20 = bitcast <16 x i8> %13 to <2 x i64>
%21 = bitcast <16 x i8> %8 to <2 x i64>
%22 = bitcast <16 x i8> %14 to <2 x i64>
diff --git a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
index f864c8cbfcb5..4a1341c4d6e7 100644
--- a/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
+++ b/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
@@ -10,7 +10,9 @@ declare void @llvm.va_end(i8*) nounwind
; CHECK-LABEL: test_byval_8_bytes_alignment:
define void @test_byval_8_bytes_alignment(i32 %i, ...) {
entry:
-; CHECK: stm r0, {r1, r2, r3}
+; CHECK: sub sp, sp, #12
+; CHECK: sub sp, sp, #4
+; CHECK: stmib sp, {r1, r2, r3}
%g = alloca i8*
%g1 = bitcast i8** %g to i8*
call void @llvm.va_start(i8* %g1)
@@ -31,7 +33,7 @@ entry:
; CHECK: movw r0, #555
define i32 @main() {
entry:
- call void (i32, ...)* @test_byval_8_bytes_alignment(i32 555, %struct_t* byval @static_val)
+ call void (i32, ...) @test_byval_8_bytes_alignment(i32 555, %struct_t* byval @static_val)
ret i32 0
}
@@ -44,9 +46,9 @@ declare void @f(double);
; CHECK-NOT: str r1
define void @test_byval_8_bytes_alignment_fixed_arg(i32 %n1, %struct_t* byval %val) nounwind {
entry:
- %a = getelementptr inbounds %struct_t* %val, i32 0, i32 0
- %0 = load double* %a
- call void (double)* @f(double %0)
+ %a = getelementptr inbounds %struct_t, %struct_t* %val, i32 0, i32 0
+ %0 = load double, double* %a
+ call void (double) @f(double %0)
ret void
}
@@ -58,6 +60,6 @@ entry:
; CHECK: movw r0, #555
define i32 @main_fixed_arg() {
entry:
- call void (i32, %struct_t*)* @test_byval_8_bytes_alignment_fixed_arg(i32 555, %struct_t* byval @static_val)
+ call void (i32, %struct_t*) @test_byval_8_bytes_alignment_fixed_arg(i32 555, %struct_t* byval @static_val)
ret i32 0
}
diff --git a/test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll b/test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll
index c9ccc103e2fa..34af9026b52e 100644
--- a/test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll
+++ b/test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll
@@ -12,8 +12,8 @@ declare i32 @printf(i8*, ...)
; CHECK: vldr d16, [sp, #8]
define void @test_byval_usage_scheduling(i32 %n1, i32 %n2, %struct_t* byval %val) nounwind {
entry:
- %a = getelementptr inbounds %struct_t* %val, i32 0, i32 0
- %0 = load double* %a
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), double %0)
+ %a = getelementptr inbounds %struct_t, %struct_t* %val, i32 0, i32 0
+ %0 = load double, double* %a
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), double %0)
ret void
}
diff --git a/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll b/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll
index 0028eec80f44..ff3b7e16188e 100644
--- a/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll
+++ b/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll
@@ -10,8 +10,9 @@ define void @t(i32 %a, %struct.s* byval %s) nounwind {
entry:
; Here we need to only check proper start address of restored %s argument.
-; CHECK: sub sp, sp, #16
+; CHECK: sub sp, sp, #12
; CHECK: push {r11, lr}
+; CHECK: sub sp, sp, #4
; CHECK: add r0, sp, #12
; CHECK: stm r0, {r1, r2, r3}
; CHECK: add r0, sp, #12
diff --git a/test/CodeGen/ARM/2013-01-21-PR14992.ll b/test/CodeGen/ARM/2013-01-21-PR14992.ll
index 014686feee0e..4c1f2a741e47 100644
--- a/test/CodeGen/ARM/2013-01-21-PR14992.ll
+++ b/test/CodeGen/ARM/2013-01-21-PR14992.ll
@@ -6,12 +6,12 @@
;CHECK-LABEL: foo:
define i32 @foo(i32* %a) nounwind optsize {
entry:
- %0 = load i32* %a, align 4
- %arrayidx1 = getelementptr inbounds i32* %a, i32 1
- %1 = load i32* %arrayidx1, align 4
- %arrayidx2 = getelementptr inbounds i32* %a, i32 2
- %2 = load i32* %arrayidx2, align 4
- %add.ptr = getelementptr inbounds i32* %a, i32 3
+ %0 = load i32, i32* %a, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %a, i32 1
+ %1 = load i32, i32* %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 2
+ %2 = load i32, i32* %arrayidx2, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %a, i32 3
;Make sure we do not have a duplicated register in the front of the reg list
;EXPECTED: ldm [[BASE:r[0-9]+]]!, {[[REG:r[0-9]+]], {{r[0-9]+}},
;CHECK-NOT: ldm [[BASE:r[0-9]+]]!, {[[REG:r[0-9]+]], [[REG]],
diff --git a/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll
index c5eba7d4773c..c38dd16f0d27 100644
--- a/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll
+++ b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll
@@ -2,26 +2,26 @@
;RUN: llc -mtriple=arm-linux-gnueabihf < %s | FileCheck %s
;CHECK-LABEL: foo:
-;CHECK: sub sp, sp, #8
+;CHECK: sub sp, sp, #16
;CHECK: push {r11, lr}
-;CHECK: str r0, [sp, #12]
-;CHECK: add r0, sp, #12
+;CHECK: str r0, [sp, #8]
+;CHECK: add r0, sp, #8
;CHECK: bl fooUseParam
;CHECK: pop {r11, lr}
-;CHECK: add sp, sp, #8
+;CHECK: add sp, sp, #16
;CHECK: mov pc, lr
;CHECK-LABEL: foo2:
-;CHECK: sub sp, sp, #8
+;CHECK: sub sp, sp, #16
;CHECK: push {r11, lr}
;CHECK: str r0, [sp, #8]
;CHECK: add r0, sp, #8
-;CHECK: str r2, [sp, #12]
+;CHECK: str r2, [sp, #16]
;CHECK: bl fooUseParam
-;CHECK: add r0, sp, #12
+;CHECK: add r0, sp, #16
;CHECK: bl fooUseParam
;CHECK: pop {r11, lr}
-;CHECK: add sp, sp, #8
+;CHECK: add sp, sp, #16
;CHECK: mov pc, lr
;CHECK-LABEL: doFoo:
diff --git a/test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll b/test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll
index 446403d79cac..5e82b0925b24 100644
--- a/test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll
+++ b/test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll
@@ -33,7 +33,7 @@ define void @foo2(double %p0, ; --> D0
%struct_t* byval %p10) ; --> Stack+8
{
entry:
-;CHECK: push.w {r11, lr}
+;CHECK: push {r7, lr}
;CHECK-NOT: stm
;CHECK: add r0, sp, #16
;CHECK: bl fooUseStruct
diff --git a/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll b/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll
index 459992818749..6c8b0ff2de19 100644
--- a/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll
+++ b/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll
@@ -9,9 +9,9 @@ define void @sample_test(<8 x i64> * %secondSource, <8 x i64> * %source, <8 x i6
entry:
; Load %source
- %s0 = load <8 x i64> * %source, align 64
- %arrayidx64 = getelementptr inbounds <8 x i64> * %source, i32 6
- %s120 = load <8 x i64> * %arrayidx64, align 64
+ %s0 = load <8 x i64> , <8 x i64> * %source, align 64
+ %arrayidx64 = getelementptr inbounds <8 x i64>, <8 x i64> * %source, i32 6
+ %s120 = load <8 x i64> , <8 x i64> * %arrayidx64, align 64
%s122 = bitcast <8 x i64> %s120 to i512
%data.i.i677.48.extract.shift = lshr i512 %s122, 384
%data.i.i677.48.extract.trunc = trunc i512 %data.i.i677.48.extract.shift to i64
@@ -33,9 +33,9 @@ entry:
%s130 = insertelement <8 x i64> %s129, i64 %data.i.i677.56.extract.trunc, i32 7
; Load %secondSource
- %s1 = load <8 x i64> * %secondSource, align 64
- %arrayidx67 = getelementptr inbounds <8 x i64> * %secondSource, i32 6
- %s121 = load <8 x i64> * %arrayidx67, align 64
+ %s1 = load <8 x i64> , <8 x i64> * %secondSource, align 64
+ %arrayidx67 = getelementptr inbounds <8 x i64>, <8 x i64> * %secondSource, i32 6
+ %s121 = load <8 x i64> , <8 x i64> * %arrayidx67, align 64
%s131 = bitcast <8 x i64> %s121 to i512
%data.i1.i676.48.extract.shift = lshr i512 %s131, 384
%data.i1.i676.48.extract.trunc = trunc i512 %data.i1.i676.48.extract.shift to i64
@@ -61,10 +61,10 @@ entry:
%vecinit35.i.i700 = shufflevector <8 x i64> %vecinit28.i.i699, <8 x i64> %s139, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 undef, i32 undef>
%vecinit42.i.i701 = shufflevector <8 x i64> %vecinit35.i.i700, <8 x i64> %s139, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 undef>
%vecinit49.i.i702 = shufflevector <8 x i64> %vecinit42.i.i701, <8 x i64> %s130, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
- %arrayidx72 = getelementptr inbounds <8 x i64> * %dest, i32 6
+ %arrayidx72 = getelementptr inbounds <8 x i64>, <8 x i64> * %dest, i32 6
store <8 x i64> %vecinit49.i.i702, <8 x i64> * %arrayidx72, align 64
- %arrayidx78 = getelementptr inbounds <8 x i64> * %secondSource, i32 7
- %s141 = load <8 x i64> * %arrayidx78, align 64
+ %arrayidx78 = getelementptr inbounds <8 x i64>, <8 x i64> * %secondSource, i32 7
+ %s141 = load <8 x i64> , <8 x i64> * %arrayidx78, align 64
%s151 = bitcast <8 x i64> %s141 to i512
%data.i1.i649.32.extract.shift = lshr i512 %s151, 256
%data.i1.i649.32.extract.trunc = trunc i512 %data.i1.i649.32.extract.shift to i64
@@ -76,7 +76,7 @@ entry:
%data.i1.i649.8.extract.shift = lshr i512 %s151, 64
%data.i1.i649.8.extract.trunc = trunc i512 %data.i1.i649.8.extract.shift to i64
%s155 = insertelement <8 x i64> %s154, i64 %data.i1.i649.8.extract.trunc, i32 3
- %arrayidx83 = getelementptr inbounds <8 x i64> * %dest, i32 7
+ %arrayidx83 = getelementptr inbounds <8 x i64>, <8 x i64> * %dest, i32 7
store <8 x i64> %s155, <8 x i64> * %arrayidx83, align 64
ret void
}
diff --git a/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll b/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll
index 0e0537ec5bfc..d18dbd2db9b4 100644
--- a/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll
+++ b/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll
@@ -14,8 +14,8 @@ define void @printfn(i32 %a, i16 signext %b, double %C, i8 signext %E) {
entry:
%conv = sext i16 %b to i32
%conv1 = sext i8 %E to i32
- %call = tail call i32 (i8*, ...)* @printf(
- i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), ; --> R0
+ %call = tail call i32 (i8*, ...) @printf(
+ i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), ; --> R0
i32 %a, ; --> R1
i32 %conv, ; --> R2
double %C, ; --> SP, NCRN := R4
diff --git a/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll
index e79a3ba741ec..c93d2a2d34fb 100644
--- a/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll
+++ b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll
@@ -21,11 +21,12 @@ define void @foo(double %vfp0, ; --> D0, NSAA=SP
i32 %p2, ; --> R3, NSAA=SP+8
i32 %p3) #0 { ; --> SP+4, NSAA=SP+12
entry:
- ;CHECK: sub sp, #8
- ;CHECK: push.w {r11, lr}
- ;CHECK: add r0, sp, #8
- ;CHECK: str r2, [sp, #12]
- ;CHECK: str r1, [sp, #8]
+ ;CHECK: sub sp, #12
+ ;CHECK: push {r7, lr}
+ ;CHECK: sub sp, #4
+ ;CHECK: add r0, sp, #12
+ ;CHECK: str r2, [sp, #16]
+ ;CHECK: str r1, [sp, #12]
;CHECK: bl fooUseStruct
call void @fooUseStruct(%st_t* %p1)
ret void
diff --git a/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll
index 212bbc2ee9c8..944a60c06107 100644
--- a/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll
+++ b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll
@@ -19,7 +19,7 @@ define void @foo(double %vfp0, ; --> D0, NSAA=SP
i32 %p2) #0 { ; --> SP+24, NSAA=SP+24
entry:
- ;CHECK: push.w {r11, lr}
+ ;CHECK: push {r7, lr}
;CHECK: ldr r0, [sp, #32]
;CHECK: bl fooUseI32
call void @fooUseI32(i32 %p2)
diff --git a/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll b/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll
index efb82027b70d..aabbfae8b879 100644
--- a/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll
+++ b/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll
@@ -7,46 +7,46 @@ entry:
%add = add nsw i32 %mul17, %w
%sub19 = sub i32 %add, %Width
%sub20 = add i32 %sub19, -1
- %arrayidx21 = getelementptr inbounds i8* %call1, i32 %sub20
- %0 = load i8* %arrayidx21, align 1
+ %arrayidx21 = getelementptr inbounds i8, i8* %call1, i32 %sub20
+ %0 = load i8, i8* %arrayidx21, align 1
%conv22 = zext i8 %0 to i32
- %arrayidx25 = getelementptr inbounds i8* %call1, i32 %sub19
- %1 = load i8* %arrayidx25, align 1
+ %arrayidx25 = getelementptr inbounds i8, i8* %call1, i32 %sub19
+ %1 = load i8, i8* %arrayidx25, align 1
%conv26 = zext i8 %1 to i32
%mul23189 = add i32 %conv26, %conv22
%add30 = add i32 %sub19, 1
- %arrayidx31 = getelementptr inbounds i8* %call1, i32 %add30
- %2 = load i8* %arrayidx31, align 1
+ %arrayidx31 = getelementptr inbounds i8, i8* %call1, i32 %add30
+ %2 = load i8, i8* %arrayidx31, align 1
%conv32 = zext i8 %2 to i32
; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
; CHECK-NEXT: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #1]
%add28190 = add i32 %mul23189, %conv32
%sub35 = add i32 %add, -1
- %arrayidx36 = getelementptr inbounds i8* %call1, i32 %sub35
- %3 = load i8* %arrayidx36, align 1
+ %arrayidx36 = getelementptr inbounds i8, i8* %call1, i32 %sub35
+ %3 = load i8, i8* %arrayidx36, align 1
%conv37 = zext i8 %3 to i32
%add34191 = add i32 %add28190, %conv37
- %arrayidx40 = getelementptr inbounds i8* %call1, i32 %add
- %4 = load i8* %arrayidx40, align 1
+ %arrayidx40 = getelementptr inbounds i8, i8* %call1, i32 %add
+ %4 = load i8, i8* %arrayidx40, align 1
%conv41 = zext i8 %4 to i32
%mul42 = mul nsw i32 %conv41, 255
%add44 = add i32 %add, 1
- %arrayidx45 = getelementptr inbounds i8* %call1, i32 %add44
- %5 = load i8* %arrayidx45, align 1
+ %arrayidx45 = getelementptr inbounds i8, i8* %call1, i32 %add44
+ %5 = load i8, i8* %arrayidx45, align 1
%conv46 = zext i8 %5 to i32
; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
; CHECK-NEXT: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #1]
%add49 = add i32 %add, %Width
%sub50 = add i32 %add49, -1
- %arrayidx51 = getelementptr inbounds i8* %call1, i32 %sub50
- %6 = load i8* %arrayidx51, align 1
+ %arrayidx51 = getelementptr inbounds i8, i8* %call1, i32 %sub50
+ %6 = load i8, i8* %arrayidx51, align 1
%conv52 = zext i8 %6 to i32
- %arrayidx56 = getelementptr inbounds i8* %call1, i32 %add49
- %7 = load i8* %arrayidx56, align 1
+ %arrayidx56 = getelementptr inbounds i8, i8* %call1, i32 %add49
+ %7 = load i8, i8* %arrayidx56, align 1
%conv57 = zext i8 %7 to i32
%add61 = add i32 %add49, 1
- %arrayidx62 = getelementptr inbounds i8* %call1, i32 %add61
- %8 = load i8* %arrayidx62, align 1
+ %arrayidx62 = getelementptr inbounds i8, i8* %call1, i32 %add61
+ %8 = load i8, i8* %arrayidx62, align 1
%conv63 = zext i8 %8 to i32
; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
; CHECK-NEXT: ldrb{{[.w]*}} r{{[0-9]*}}, [r{{[0-9]*}}, #1]
@@ -58,7 +58,7 @@ entry:
%add65 = add i32 %tmp196, %mul42
%9 = lshr i32 %add65, 8
%conv68 = trunc i32 %9 to i8
- %arrayidx69 = getelementptr inbounds i8* %call3, i32 %add
+ %arrayidx69 = getelementptr inbounds i8, i8* %call3, i32 %add
store i8 %conv68, i8* %arrayidx69, align 1
ret i8 %conv68
}
diff --git a/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll b/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll
index 7bf03a16c6fb..3c20c6b53633 100644
--- a/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll
+++ b/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll
@@ -4,7 +4,7 @@
%struct.S227 = type { [49 x i32], i32 }
define void @check227(
- i32 %b,
+ i32 %b,
%struct.S227* byval nocapture %arg0,
%struct.S227* %arg1) {
; b --> R0
@@ -13,14 +13,16 @@ define void @check227(
entry:
-;CHECK: sub sp, sp, #16
+;CHECK: sub sp, sp, #12
;CHECK: push {r11, lr}
+;CHECK: sub sp, sp, #4
;CHECK: add r0, sp, #12
;CHECK: stm r0, {r1, r2, r3}
;CHECK: ldr r0, [sp, #212]
;CHECK: bl useInt
+;CHECK: add sp, sp, #4
;CHECK: pop {r11, lr}
-;CHECK: add sp, sp, #16
+;CHECK: add sp, sp, #12
%0 = ptrtoint %struct.S227* %arg1 to i32
tail call void @useInt(i32 %0)
diff --git a/test/CodeGen/ARM/2013-05-31-char-shift-crash.ll b/test/CodeGen/ARM/2013-05-31-char-shift-crash.ll
index 0130f7ab68f5..617271264b4f 100644
--- a/test/CodeGen/ARM/2013-05-31-char-shift-crash.ll
+++ b/test/CodeGen/ARM/2013-05-31-char-shift-crash.ll
@@ -10,7 +10,7 @@ define arm_aapcscc void @f2(i8 signext %a) #0 {
entry:
%a.addr = alloca i8, align 1
store i8 %a, i8* %a.addr, align 1
- %0 = load i8* %a.addr, align 1
+ %0 = load i8, i8* %a.addr, align 1
%conv = sext i8 %0 to i32
%shr = ashr i32 %conv, 56
%conv1 = trunc i32 %shr to i8
diff --git a/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll b/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
index 05a4ef05e958..979df3072fbf 100644
--- a/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
+++ b/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
@@ -17,7 +17,7 @@ entry:
; CHECK: vorr q9, q9, q10
; CHECK: vst1.32 {d18, d19}, [r0]
vector.body:
- %wide.load = load <4 x i32>* undef, align 4
+ %wide.load = load <4 x i32>, <4 x i32>* undef, align 4
%0 = and <4 x i32> %wide.load, <i32 -16711936, i32 -16711936, i32 -16711936, i32 -16711936>
%1 = sub <4 x i32> %wide.load, zeroinitializer
%2 = and <4 x i32> %1, <i32 16711680, i32 16711680, i32 16711680, i32 16711680>
diff --git a/test/CodeGen/ARM/2013-10-11-select-stalls.ll b/test/CodeGen/ARM/2013-10-11-select-stalls.ll
index 33c0587226a8..d6045c7b8c8c 100644
--- a/test/CodeGen/ARM/2013-10-11-select-stalls.ll
+++ b/test/CodeGen/ARM/2013-10-11-select-stalls.ll
@@ -7,7 +7,7 @@ define <16 x i8> @multiselect(i32 %avail, i8* %foo, i8* %bar) {
entry:
%vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %foo, i32 1)
%vld2 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %bar, i32 1)
- %and = and i32 %avail, 1
+ %and = and i32 %avail, 3
%tobool = icmp eq i32 %and, 0
%retv = select i1 %tobool, <16 x i8> %vld1, <16 x i8> %vld2
ret <16 x i8> %retv
diff --git a/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll b/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll
index 6c0fbd00bd1a..ef575f4c41ec 100644
--- a/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll
+++ b/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll
@@ -4,7 +4,7 @@
define void @vst(i8* %m, [4 x i64] %v) {
entry:
; CHECK: vst:
-; CHECK: VST1d64Q %R{{[0-9]+}}<kill>, 8, %D{{[0-9]+}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}}<imp-use>
+; CHECK: VST1d64Q %R{{[0-9]+}}<kill>, 8, %D{{[0-9]+}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}}<imp-use,kill>
%v0 = extractvalue [4 x i64] %v, 0
%v1 = extractvalue [4 x i64] %v, 1
@@ -38,13 +38,13 @@ entry:
define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind {
; CHECK: vtbx4:
; CHECK: VTBX4 {{.*}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}}<imp-use>
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load %struct.__neon_int8x8x4_t* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load %struct.__neon_int8x8x4_t, %struct.__neon_int8x8x4_t* %B
%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
%tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
- %tmp7 = load <8 x i8>* %C
+ %tmp7 = load <8 x i8>, <8 x i8>* %C
%tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7)
call void @bar2(%struct.__neon_int8x8x4_t %tmp2, <8 x i8> %tmp8)
ret <8 x i8> %tmp8
diff --git a/test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll b/test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll
index 33bfa2fa61cd..5b2fc57359ad 100644
--- a/test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll
+++ b/test/CodeGen/ARM/2014-02-21-byval-reg-split-alignment.ll
@@ -13,15 +13,16 @@ declare void @usePtr(%struct8bytes8align*)
; c -> sp+0..sp+7
define void @foo1(i32 %a, %struct12bytes* byval %b, i64 %c) {
; CHECK-LABEL: foo1
-; CHECK: sub sp, sp, #16
+; CHECK: sub sp, sp, #12
; CHECK: push {r11, lr}
+; CHECK: sub sp, sp, #4
; CHECK: add [[SCRATCH:r[0-9]+]], sp, #12
; CHECK: stm [[SCRATCH]], {r1, r2, r3}
; CHECK: ldr r0, [sp, #24]
; CHECK: ldr r1, [sp, #28]
; CHECK: bl useLong
; CHECK: pop {r11, lr}
-; CHECK: add sp, sp, #16
+; CHECK: add sp, sp, #12
call void @useLong(i64 %c)
ret void
diff --git a/test/CodeGen/ARM/2014-07-18-earlyclobber-str-post.ll b/test/CodeGen/ARM/2014-07-18-earlyclobber-str-post.ll
index df7d2457e763..1abc40f2c16e 100644
--- a/test/CodeGen/ARM/2014-07-18-earlyclobber-str-post.ll
+++ b/test/CodeGen/ARM/2014-07-18-earlyclobber-str-post.ll
@@ -8,7 +8,7 @@ define i32* @earlyclobber-str-post(i32* %addr) nounwind {
; CHECK-NOT: str r[[REG:[0-9]+]], [r[[REG]]], #4
%val = ptrtoint i32* %addr to i32
store i32 %val, i32* %addr
- %new = getelementptr i32* %addr, i32 1
+ %new = getelementptr i32, i32* %addr, i32 1
ret i32* %new
}
@@ -18,7 +18,7 @@ define i16* @earlyclobber-strh-post(i16* %addr) nounwind {
%val = ptrtoint i16* %addr to i32
%tr = trunc i32 %val to i16
store i16 %tr, i16* %addr
- %new = getelementptr i16* %addr, i32 1
+ %new = getelementptr i16, i16* %addr, i32 1
ret i16* %new
}
@@ -28,6 +28,6 @@ define i8* @earlyclobber-strb-post(i8* %addr) nounwind {
%val = ptrtoint i8* %addr to i32
%tr = trunc i32 %val to i8
store i8 %tr, i8* %addr
- %new = getelementptr i8* %addr, i32 1
+ %new = getelementptr i8, i8* %addr, i32 1
ret i8* %new
}
diff --git a/test/CodeGen/ARM/2014-08-04-muls-it.ll b/test/CodeGen/ARM/2014-08-04-muls-it.ll
index 4636bff880a8..5ba1347433db 100644
--- a/test/CodeGen/ARM/2014-08-04-muls-it.ll
+++ b/test/CodeGen/ARM/2014-08-04-muls-it.ll
@@ -17,9 +17,7 @@ if.end: ; preds = %if.then, %entry
; CHECK-LABEL: function
; CHECK: cmp r0, r1
-; CHECK: bne [[LABEL:[.*]]]
; CHECK-NOT: mulseq r0, r0, r0
-; CHECK: [[LABEL]]
-; CHECK: muls r0, r0, r0
+; CHECK: muleq r0, r0, r0
; CHECK: bx lr
diff --git a/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll b/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll
index f3cc3d82121f..2efd91f503e5 100644
--- a/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll
+++ b/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll
@@ -1,55 +1,48 @@
; RUN: llc -mtriple=thumbv4t-none--eabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V4T
; RUN: llc -mtriple=thumbv6m-none--eabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V6M
-; CHECK-LABEL: foo
-define i32 @foo(i32 %z, ...) #0 {
-entry:
- %a = alloca i32, align 4
- %b = alloca i32, align 4
- %c = alloca i32, align 4
- %d = alloca i32, align 4
- %e = alloca i32, align 4
- %f = alloca i32, align 4
- %g = alloca i32, align 4
- %h = alloca i32, align 4
-
- store i32 1, i32* %a, align 4
- store i32 2, i32* %b, align 4
- store i32 3, i32* %c, align 4
- store i32 4, i32* %d, align 4
- store i32 5, i32* %e, align 4
- store i32 6, i32* %f, align 4
- store i32 7, i32* %g, align 4
- store i32 8, i32* %h, align 4
-
- %0 = load i32* %a, align 4
- %1 = load i32* %b, align 4
- %2 = load i32* %c, align 4
- %3 = load i32* %d, align 4
- %4 = load i32* %e, align 4
- %5 = load i32* %f, align 4
- %6 = load i32* %g, align 4
- %7 = load i32* %h, align 4
-
- %add = add nsw i32 %0, %1
- %add4 = add nsw i32 %add, %2
- %add5 = add nsw i32 %add4, %3
- %add6 = add nsw i32 %add5, %4
- %add7 = add nsw i32 %add6, %5
- %add8 = add nsw i32 %add7, %6
- %add9 = add nsw i32 %add8, %7
-
- %addz = add nsw i32 %add9, %z
- call void @llvm.va_start(i8* null)
- ret i32 %addz
-
-; CHECK: sub sp, #40
-; CHECK-NEXT: add [[BASE:r[0-9]]], sp, #8
-
-; CHECK-V4T: movs [[NEWBASE:r[0-9]]], [[BASE]]
-; CHECK-V6M: mov [[NEWBASE:r[0-9]]], [[BASE]]
-; CHECK-NEXT: adds [[NEWBASE]], #8
+; CHECK-LABEL: test1
+define i32 @test1(i32* %p) {
+
+; Offsets less than 8 can be generated in a single add
+; CHECK: adds [[NEWBASE:r[0-9]]], r0, #4
+ %1 = getelementptr inbounds i32, i32* %p, i32 1
+ %2 = getelementptr inbounds i32, i32* %p, i32 2
+ %3 = getelementptr inbounds i32, i32* %p, i32 3
+ %4 = getelementptr inbounds i32, i32* %p, i32 4
+
; CHECK-NEXT: ldm [[NEWBASE]],
+ %5 = load i32, i32* %1, align 4
+ %6 = load i32, i32* %2, align 4
+ %7 = load i32, i32* %3, align 4
+ %8 = load i32, i32* %4, align 4
+
+ %9 = add nsw i32 %5, %6
+ %10 = add nsw i32 %9, %7
+ %11 = add nsw i32 %10, %8
+ ret i32 %11
}
-declare void @llvm.va_start(i8*) nounwind
+; CHECK-LABEL: test2
+define i32 @test2(i32* %p) {
+
+; Offsets >=8 require a mov and an add
+; CHECK-V4T: movs [[NEWBASE:r[0-9]]], r0
+; CHECK-V6M: mov [[NEWBASE:r[0-9]]], r0
+; CHECK-NEXT: adds [[NEWBASE]], #8
+ %1 = getelementptr inbounds i32, i32* %p, i32 2
+ %2 = getelementptr inbounds i32, i32* %p, i32 3
+ %3 = getelementptr inbounds i32, i32* %p, i32 4
+ %4 = getelementptr inbounds i32, i32* %p, i32 5
+
+; CHECK-NEXT: ldm [[NEWBASE]],
+ %5 = load i32, i32* %1, align 4
+ %6 = load i32, i32* %2, align 4
+ %7 = load i32, i32* %3, align 4
+ %8 = load i32, i32* %4, align 4
+
+ %9 = add nsw i32 %5, %6
+ %10 = add nsw i32 %9, %7
+ %11 = add nsw i32 %10, %8
+ ret i32 %11
+}
diff --git a/test/CodeGen/ARM/MergeConsecutiveStores.ll b/test/CodeGen/ARM/MergeConsecutiveStores.ll
index 06c87e986a83..3f7d625244bd 100644
--- a/test/CodeGen/ARM/MergeConsecutiveStores.ll
+++ b/test/CodeGen/ARM/MergeConsecutiveStores.ll
@@ -12,17 +12,17 @@ define void @MergeLoadStoreBaseIndexOffset(i32* %a, i8* %b, i8* %c, i32 %n) {
%.09 = phi i32 [ %n, %0 ], [ %11, %1 ]
%.08 = phi i8* [ %b, %0 ], [ %10, %1 ]
%.0 = phi i32* [ %a, %0 ], [ %2, %1 ]
- %2 = getelementptr inbounds i32* %.0, i32 1
- %3 = load i32* %.0, align 1
- %4 = getelementptr inbounds i8* %c, i32 %3
- %5 = load i8* %4, align 1
+ %2 = getelementptr inbounds i32, i32* %.0, i32 1
+ %3 = load i32, i32* %.0, align 1
+ %4 = getelementptr inbounds i8, i8* %c, i32 %3
+ %5 = load i8, i8* %4, align 1
%6 = add i32 %3, 1
- %7 = getelementptr inbounds i8* %c, i32 %6
- %8 = load i8* %7, align 1
+ %7 = getelementptr inbounds i8, i8* %c, i32 %6
+ %8 = load i8, i8* %7, align 1
store i8 %5, i8* %.08, align 1
- %9 = getelementptr inbounds i8* %.08, i32 1
+ %9 = getelementptr inbounds i8, i8* %.08, i32 1
store i8 %8, i8* %9, align 1
- %10 = getelementptr inbounds i8* %.08, i32 2
+ %10 = getelementptr inbounds i8, i8* %.08, i32 2
%11 = add nsw i32 %.09, -1
%12 = icmp eq i32 %11, 0
br i1 %12, label %13, label %1
@@ -44,18 +44,18 @@ define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
%.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
%.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
%.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
- %2 = getelementptr inbounds i8* %.0, i32 1
- %3 = load i8* %.0, align 1
+ %2 = getelementptr inbounds i8, i8* %.0, i32 1
+ %3 = load i8, i8* %.0, align 1
%4 = sext i8 %3 to i32
- %5 = getelementptr inbounds i8* %c, i32 %4
- %6 = load i8* %5, align 1
+ %5 = getelementptr inbounds i8, i8* %c, i32 %4
+ %6 = load i8, i8* %5, align 1
%7 = add i32 %4, 1
- %8 = getelementptr inbounds i8* %c, i32 %7
- %9 = load i8* %8, align 1
+ %8 = getelementptr inbounds i8, i8* %c, i32 %7
+ %9 = load i8, i8* %8, align 1
store i8 %6, i8* %.08, align 1
- %10 = getelementptr inbounds i8* %.08, i32 1
+ %10 = getelementptr inbounds i8, i8* %.08, i32 1
store i8 %9, i8* %10, align 1
- %11 = getelementptr inbounds i8* %.08, i32 2
+ %11 = getelementptr inbounds i8, i8* %.08, i32 2
%12 = add nsw i32 %.09, -1
%13 = icmp eq i32 %12, 0
br i1 %13, label %14, label %1
@@ -76,19 +76,19 @@ define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
%.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
%.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
%.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
- %2 = getelementptr inbounds i8* %.0, i32 1
- %3 = load i8* %.0, align 1
+ %2 = getelementptr inbounds i8, i8* %.0, i32 1
+ %3 = load i8, i8* %.0, align 1
%4 = sext i8 %3 to i32
- %5 = getelementptr inbounds i8* %c, i32 %4
- %6 = load i8* %5, align 1
+ %5 = getelementptr inbounds i8, i8* %c, i32 %4
+ %6 = load i8, i8* %5, align 1
%7 = add i8 %3, 1
%wrap.4 = sext i8 %7 to i32
- %8 = getelementptr inbounds i8* %c, i32 %wrap.4
- %9 = load i8* %8, align 1
+ %8 = getelementptr inbounds i8, i8* %c, i32 %wrap.4
+ %9 = load i8, i8* %8, align 1
store i8 %6, i8* %.08, align 1
- %10 = getelementptr inbounds i8* %.08, i32 1
+ %10 = getelementptr inbounds i8, i8* %.08, i32 1
store i8 %9, i8* %10, align 1
- %11 = getelementptr inbounds i8* %.08, i32 2
+ %11 = getelementptr inbounds i8, i8* %.08, i32 2
%12 = add nsw i32 %.09, -1
%13 = icmp eq i32 %12, 0
br i1 %13, label %14, label %1
diff --git a/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll b/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll
index a82f6141dbb3..a314259e499e 100644
--- a/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll
+++ b/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll
@@ -10,10 +10,10 @@ entry:
%i.addr = alloca i32, align 4
%buffer = alloca [4096 x i8], align 1
store i32 %i, i32* %i.addr, align 4
- %0 = load i32* %i.addr, align 4
+ %0 = load i32, i32* %i.addr, align 4
%rem = urem i32 %0, 4096
- %arrayidx = getelementptr inbounds [4096 x i8]* %buffer, i32 0, i32 %rem
- %1 = load volatile i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds [4096 x i8], [4096 x i8]* %buffer, i32 0, i32 %rem
+ %1 = load volatile i8, i8* %arrayidx, align 1
ret i8 %1
}
diff --git a/test/CodeGen/ARM/Windows/dllimport.ll b/test/CodeGen/ARM/Windows/dllimport.ll
index bc737bd41827..6786be3322e3 100644
--- a/test/CodeGen/ARM/Windows/dllimport.ll
+++ b/test/CodeGen/ARM/Windows/dllimport.ll
@@ -8,7 +8,7 @@ declare dllimport arm_aapcs_vfpcc i32 @external()
declare arm_aapcs_vfpcc i32 @internal()
define arm_aapcs_vfpcc i32 @get_var() {
- %1 = load i32* @var, align 4
+ %1 = load i32, i32* @var, align 4
ret i32 %1
}
@@ -20,7 +20,7 @@ define arm_aapcs_vfpcc i32 @get_var() {
; CHECK: bx lr
define arm_aapcs_vfpcc i32 @get_ext() {
- %1 = load i32* @ext, align 4
+ %1 = load i32, i32* @ext, align 4
ret i32 %1
}
diff --git a/test/CodeGen/ARM/Windows/frame-register.ll b/test/CodeGen/ARM/Windows/frame-register.ll
index 31167d7352e3..7ecfc1a71714 100644
--- a/test/CodeGen/ARM/Windows/frame-register.ll
+++ b/test/CodeGen/ARM/Windows/frame-register.ll
@@ -8,12 +8,12 @@ entry:
%i.addr = alloca i32, align 4
%j = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
- %0 = load i32* %i.addr, align 4
+ %0 = load i32, i32* %i.addr, align 4
%add = add nsw i32 %0, 1
store i32 %add, i32* %j, align 4
- %1 = load i32* %j, align 4
+ %1 = load i32, i32* %j, align 4
call void @callee(i32 %1)
- %2 = load i32* %j, align 4
+ %2 = load i32, i32* %j, align 4
%add1 = add nsw i32 %2, 1
ret i32 %add1
}
diff --git a/test/CodeGen/ARM/Windows/long-calls.ll b/test/CodeGen/ARM/Windows/long-calls.ll
index e35f414579af..21c95fac91c5 100644
--- a/test/CodeGen/ARM/Windows/long-calls.ll
+++ b/test/CodeGen/ARM/Windows/long-calls.ll
@@ -10,7 +10,7 @@ entry:
}
; CHECK-LABEL: caller
-; CHECK: ldr [[REG:r[0-9]+]], [[CPI:.LCPI[_0-9]+]]
+; CHECK: ldr [[REG:r[0-9]+]], [[CPI:LCPI[_0-9]+]]
; CHECK: bx [[REG]]
; CHECK: .align 2
; CHECK: [[CPI]]:
diff --git a/test/CodeGen/ARM/Windows/movw-movt-relocations.ll b/test/CodeGen/ARM/Windows/movw-movt-relocations.ll
index 3ae6428d3a6b..c21aee087cf4 100644
--- a/test/CodeGen/ARM/Windows/movw-movt-relocations.ll
+++ b/test/CodeGen/ARM/Windows/movw-movt-relocations.ll
@@ -10,8 +10,8 @@
; Function Attrs: nounwind optsize readonly
define i32 @relocation(i32 %j, i32 %k) {
entry:
- %0 = load i32* @i, align 4
- %1 = load i32* @j, align 4
+ %0 = load i32, i32* @i, align 4
+ %1 = load i32, i32* @j, align 4
%add = add nsw i32 %1, %0
ret i32 %add
}
diff --git a/test/CodeGen/ARM/Windows/pic.ll b/test/CodeGen/ARM/Windows/pic.ll
index 28d371f45217..9ef7c35c5530 100644
--- a/test/CodeGen/ARM/Windows/pic.ll
+++ b/test/CodeGen/ARM/Windows/pic.ll
@@ -5,7 +5,7 @@
define arm_aapcs_vfpcc i8 @return_external() {
entry:
- %0 = load i8* @external, align 1
+ %0 = load i8, i8* @external, align 1
ret i8 %0
}
diff --git a/test/CodeGen/ARM/Windows/read-only-data.ll b/test/CodeGen/ARM/Windows/read-only-data.ll
index 0438d68b55c6..c387d1d5137c 100644
--- a/test/CodeGen/ARM/Windows/read-only-data.ll
+++ b/test/CodeGen/ARM/Windows/read-only-data.ll
@@ -6,7 +6,7 @@ declare arm_aapcs_vfpcc void @callee(i8*)
define arm_aapcs_vfpcc void @function() {
entry:
- call arm_aapcs_vfpcc void @callee(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0))
+ call arm_aapcs_vfpcc void @callee(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0))
ret void
}
diff --git a/test/CodeGen/ARM/Windows/stack-probe-non-default.ll b/test/CodeGen/ARM/Windows/stack-probe-non-default.ll
new file mode 100644
index 000000000000..d66e93ad34ee
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/stack-probe-non-default.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple thumbv7-windows -mcpu cortex-a9 -o - %s \
+; RUN: | FileCheck %s -check-prefix CHECK-DEFAULT-CODE-MODEL
+
+; RUN: llc -mtriple thumbv7-windows -mcpu cortex-a9 -code-model large -o - %s \
+; RUN: | FileCheck %s -check-prefix CHECK-LARGE-CODE-MODEL
+
+declare dllimport arm_aapcs_vfpcc void @initialise(i8*)
+
+define dllexport arm_aapcs_vfpcc signext i8 @function(i32 %offset) #0 {
+entry:
+ %buffer = alloca [4096 x i8], align 1
+ %0 = getelementptr inbounds [4096 x i8], [4096 x i8]* %buffer, i32 0, i32 0
+ call arm_aapcs_vfpcc void @initialise(i8* %0)
+ %arrayidx = getelementptr inbounds [4096 x i8], [4096 x i8]* %buffer, i32 0, i32 %offset
+ %1 = load i8, i8* %arrayidx, align 1
+ ret i8 %1
+}
+
+attributes #0 = { "stack-probe-size"="8096" }
+
+; CHECK-DEFAULT-CODE-MODEL-NOT: __chkstk
+; CHECK-DEFAULT-CODE-MODEL: sub.w sp, sp, #4096
+
+; CHECK-LARGE-CODE-MODEL-NOT: movw r12, :lower16:__chkstk
+; CHECK-LARGE-CODE-MODEL-NOT: movt r12, :upper16:__chkstk
+; CHECK-LARGE-CODE-MODEL: sub.w sp, sp, #4096
+
diff --git a/test/CodeGen/ARM/Windows/vla.ll b/test/CodeGen/ARM/Windows/vla.ll
index 56901dee0dfa..1c0632e25e55 100644
--- a/test/CodeGen/ARM/Windows/vla.ll
+++ b/test/CodeGen/ARM/Windows/vla.ll
@@ -8,8 +8,8 @@
define arm_aapcs_vfpcc i8 @function(i32 %sz, i32 %idx) {
entry:
%vla = alloca i8, i32 %sz, align 1
- %arrayidx = getelementptr inbounds i8* %vla, i32 %idx
- %0 = load volatile i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %vla, i32 %idx
+ %0 = load volatile i8, i8* %arrayidx, align 1
ret i8 %0
}
diff --git a/test/CodeGen/ARM/a15-partial-update.ll b/test/CodeGen/ARM/a15-partial-update.ll
index 5747253d56b7..576eb7a24393 100644
--- a/test/CodeGen/ARM/a15-partial-update.ll
+++ b/test/CodeGen/ARM/a15-partial-update.ll
@@ -10,7 +10,7 @@ define <2 x float> @t1(float* %A, <2 x float> %B) {
; generated.
; CHECK-NOT: vmov.{{.*}} d{{[0-9]+}},
- %tmp2 = load float* %A, align 4
+ %tmp2 = load float, float* %A, align 4
%tmp3 = insertelement <2 x float> %B, float %tmp2, i32 1
ret <2 x float> %tmp3
}
@@ -27,9 +27,9 @@ loop:
; CHECK: vmov.{{.*}} d{{[0-9]+}},
%oldcount = phi i32 [0, %entry], [%newcount, %loop]
%newcount = add i32 %oldcount, 1
- %p1 = getelementptr <4 x i8> *%in, i32 %newcount
- %p2 = getelementptr <4 x i8> *%out, i32 %newcount
- %tmp1 = load <4 x i8> *%p1, align 4
+ %p1 = getelementptr <4 x i8>, <4 x i8> *%in, i32 %newcount
+ %p2 = getelementptr <4 x i8>, <4 x i8> *%out, i32 %newcount
+ %tmp1 = load <4 x i8> , <4 x i8> *%p1, align 4
store <4 x i8> %tmp1, <4 x i8> *%p2
%cmp = icmp eq i32 %newcount, %n
br i1 %cmp, label %loop, label %ret
diff --git a/test/CodeGen/ARM/addrmode.ll b/test/CodeGen/ARM/addrmode.ll
index 8fd1da791f1f..52bb9a20662e 100644
--- a/test/CodeGen/ARM/addrmode.ll
+++ b/test/CodeGen/ARM/addrmode.ll
@@ -4,14 +4,14 @@
define i32 @t1(i32 %a) {
%b = mul i32 %a, 9
%c = inttoptr i32 %b to i32*
- %d = load i32* %c
+ %d = load i32, i32* %c
ret i32 %d
}
define i32 @t2(i32 %a) {
%b = mul i32 %a, -7
%c = inttoptr i32 %b to i32*
- %d = load i32* %c
+ %d = load i32, i32* %c
ret i32 %d
}
diff --git a/test/CodeGen/ARM/aggregate-padding.ll b/test/CodeGen/ARM/aggregate-padding.ll
new file mode 100644
index 000000000000..bc46a9cdf913
--- /dev/null
+++ b/test/CodeGen/ARM/aggregate-padding.ll
@@ -0,0 +1,101 @@
+; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s
+
+; [2 x i64] should be contiguous when split (e.g. we shouldn't try to align all
+; i32 components to 64 bits). Also makes sure i64 based types are properly
+; aligned on the stack.
+define i64 @test_i64_contiguous_on_stack([8 x double], float, i32 %in, [2 x i64] %arg) nounwind {
+; CHECK-LABEL: test_i64_contiguous_on_stack:
+; CHECK-DAG: ldr [[LO0:r[0-9]+]], [sp, #8]
+; CHECK-DAG: ldr [[HI0:r[0-9]+]], [sp, #12]
+; CHECK-DAG: ldr [[LO1:r[0-9]+]], [sp, #16]
+; CHECK-DAG: ldr [[HI1:r[0-9]+]], [sp, #20]
+; CHECK: adds r0, [[LO0]], [[LO1]]
+; CHECK: adc r1, [[HI0]], [[HI1]]
+
+ %val1 = extractvalue [2 x i64] %arg, 0
+ %val2 = extractvalue [2 x i64] %arg, 1
+ %sum = add i64 %val1, %val2
+ ret i64 %sum
+}
+
+; [2 x i64] should try to use looks for 4 regs, not 8 (which might happen if the
+; i64 -> i32, i32 split wasn't handled correctly).
+define i64 @test_2xi64_uses_4_regs([8 x double], float, [2 x i64] %arg) nounwind {
+; CHECK-LABEL: test_2xi64_uses_4_regs:
+; CHECK-DAG: mov r0, r2
+; CHECK-DAG: mov r1, r3
+
+ %val = extractvalue [2 x i64] %arg, 1
+ ret i64 %val
+}
+
+; An aggregate should be able to split between registers and stack if there is
+; nothing else on the stack.
+define i32 @test_aggregates_split([8 x double], i32, [4 x i32] %arg) nounwind {
+; CHECK-LABEL: test_aggregates_split:
+; CHECK: ldr [[VAL3:r[0-9]+]], [sp]
+; CHECK: add r0, r1, [[VAL3]]
+
+ %val0 = extractvalue [4 x i32] %arg, 0
+ %val3 = extractvalue [4 x i32] %arg, 3
+ %sum = add i32 %val0, %val3
+ ret i32 %sum
+}
+
+; If an aggregate has to be moved entirely onto the stack, nothing should be
+; able to use r0-r3 any more. Also checks that [2 x i64] properly aligned when
+; it uses regs.
+define i32 @test_no_int_backfilling([8 x double], float, i32, [2 x i64], i32 %arg) nounwind {
+; CHECK-LABEL: test_no_int_backfilling:
+; CHECK: ldr r0, [sp, #24]
+ ret i32 %arg
+}
+
+; Even if the argument was successfully allocated as reg block, there should be
+; no backfillig to r1.
+define i32 @test_no_int_backfilling_regsonly(i32, [1 x i64], i32 %arg) {
+; CHECK-LABEL: test_no_int_backfilling_regsonly:
+; CHECK: ldr r0, [sp]
+ ret i32 %arg
+}
+
+; If an aggregate has to be moved entirely onto the stack, nothing should be
+; able to use r0-r3 any more.
+define float @test_no_float_backfilling([7 x double], [4 x i32], i32, [4 x double], float %arg) nounwind {
+; CHECK-LABEL: test_no_float_backfilling:
+; CHECK: vldr s0, [sp, #40]
+ ret float %arg
+}
+
+; They're a bit pointless, but types like [N x i8] should work as well.
+define i8 @test_i8_in_regs(i32, [3 x i8] %arg) {
+; CHECK-LABEL: test_i8_in_regs:
+; CHECK: add r0, r1, r3
+ %val0 = extractvalue [3 x i8] %arg, 0
+ %val2 = extractvalue [3 x i8] %arg, 2
+ %sum = add i8 %val0, %val2
+ ret i8 %sum
+}
+
+define i16 @test_i16_split(i32, i32, [3 x i16] %arg) {
+; CHECK-LABEL: test_i16_split:
+; CHECK: ldrh [[VAL2:r[0-9]+]], [sp]
+; CHECK: add r0, r2, [[VAL2]]
+ %val0 = extractvalue [3 x i16] %arg, 0
+ %val2 = extractvalue [3 x i16] %arg, 2
+ %sum = add i16 %val0, %val2
+ ret i16 %sum
+}
+
+; Beware: on the stack each i16 still gets a 32-bit slot, the array is not
+; packed.
+define i16 @test_i16_forced_stack([8 x double], double, i32, i32, [3 x i16] %arg) {
+; CHECK-LABEL: test_i16_forced_stack:
+; CHECK-DAG: ldrh [[VAL0:r[0-9]+]], [sp, #8]
+; CHECK-DAG: ldrh [[VAL2:r[0-9]+]], [sp, #16]
+; CHECK: add r0, [[VAL0]], [[VAL2]]
+ %val0 = extractvalue [3 x i16] %arg, 0
+ %val2 = extractvalue [3 x i16] %arg, 2
+ %sum = add i16 %val0, %val2
+ ret i16 %sum
+}
diff --git a/test/CodeGen/ARM/aliases.ll b/test/CodeGen/ARM/aliases.ll
index 5a737ad995ac..04ca3e875487 100644
--- a/test/CodeGen/ARM/aliases.ll
+++ b/test/CodeGen/ARM/aliases.ll
@@ -33,12 +33,12 @@ define i32 @foo_f() {
define i32 @test() {
entry:
- %tmp = load i32* @foo1
- %tmp1 = load i32* @foo2
- %tmp0 = load i32* @bar_i
+ %tmp = load i32, i32* @foo1
+ %tmp1 = load i32, i32* @foo2
+ %tmp0 = load i32, i32* @bar_i
%tmp2 = call i32 @foo_f()
%tmp3 = add i32 %tmp, %tmp2
- %tmp4 = call %FunTy* @bar_f()
+ %tmp4 = call i32 @bar_f()
%tmp5 = add i32 %tmp3, %tmp4
%tmp6 = add i32 %tmp1, %tmp5
%tmp7 = add i32 %tmp6, %tmp0
diff --git a/test/CodeGen/ARM/alloc-no-stack-realign.ll b/test/CodeGen/ARM/alloc-no-stack-realign.ll
index 24c28baff881..600fb6aa44b1 100644
--- a/test/CodeGen/ARM/alloc-no-stack-realign.ll
+++ b/test/CodeGen/ARM/alloc-no-stack-realign.ll
@@ -9,8 +9,8 @@
define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" {
entry:
; NO-REALIGN-LABEL: test1
-; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1:[0-9]+]]:128]
-; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #16
+; NO-REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]]
+; NO-REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
@@ -21,21 +21,19 @@ entry:
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #16
-; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]!
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0:0]], #48
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0]], #32
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
-; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #16
-; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
%retval = alloca <16 x float>, align 16
- %0 = load <16 x float>* @T3_retval, align 16
+ %0 = load <16 x float>, <16 x float>* @T3_retval, align 16
store <16 x float> %0, <16 x float>* %retval
- %1 = load <16 x float>* %retval
+ %1 = load <16 x float>, <16 x float>* %retval
store <16 x float> %1, <16 x float>* %agg.result, align 16
ret void
}
@@ -44,8 +42,8 @@ define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp {
entry:
; REALIGN-LABEL: test2
; REALIGN: bfc sp, #0, #6
-; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1:[0-9]+]]:128]
-; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #16
+; REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]]
+; REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
@@ -65,13 +63,12 @@ entry:
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
; REALIGN: add r[[R1:[0-9]+]], r[[R0]], #32
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
-; REALIGN: add r[[R1:[0-9]+]], r[[R0]], #16
-; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
%retval = alloca <16 x float>, align 16
- %0 = load <16 x float>* @T3_retval, align 16
+ %0 = load <16 x float>, <16 x float>* @T3_retval, align 16
store <16 x float> %0, <16 x float>* %retval
- %1 = load <16 x float>* %retval
+ %1 = load <16 x float>, <16 x float>* %retval
store <16 x float> %1, <16 x float>* %agg.result, align 16
ret void
}
diff --git a/test/CodeGen/ARM/arguments.ll b/test/CodeGen/ARM/arguments.ll
index e7fbf9f28eff..3b1d8dd09153 100644
--- a/test/CodeGen/ARM/arguments.ll
+++ b/test/CodeGen/ARM/arguments.ll
@@ -18,7 +18,7 @@ define i32 @f2() nounwind optsize {
; DARWIN-LABEL: f2:
; DARWIN: mov r3, #128
entry:
- %0 = tail call i32 (i32, ...)* @g2(i32 5, double 1.600000e+01, i32 128) nounwind optsize ; <i32> [#uses=1]
+ %0 = tail call i32 (i32, ...) @g2(i32 5, double 1.600000e+01, i32 128) nounwind optsize ; <i32> [#uses=1]
%not. = icmp ne i32 %0, 128 ; <i1> [#uses=1]
%.0 = zext i1 %not. to i32 ; <i32> [#uses=1]
ret i32 %.0
diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll
index 14eef832e693..04eae8f9afec 100644
--- a/test/CodeGen/ARM/arm-and-tst-peephole.ll
+++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -8,12 +8,12 @@
%struct.Foo = type { i8* }
-; ARM: foo
-; THUMB: foo
-; T2: foo
+; ARM-LABEL: foo:
+; THUMB-LABEL: foo:
+; T2-LABEL: foo:
define %struct.Foo* @foo(%struct.Foo* %this, i32 %acc) nounwind readonly align 2 {
entry:
- %scevgep = getelementptr %struct.Foo* %this, i32 1
+ %scevgep = getelementptr %struct.Foo, %struct.Foo* %this, i32 1
br label %tailrecurse
tailrecurse: ; preds = %sw.bb, %entry
@@ -21,8 +21,8 @@ tailrecurse: ; preds = %sw.bb, %entry
%lsr.iv = phi i32 [ %lsr.iv.next, %sw.bb ], [ 1, %entry ]
%acc.tr = phi i32 [ %or, %sw.bb ], [ %acc, %entry ]
%lsr.iv24 = bitcast %struct.Foo* %lsr.iv2 to i8**
- %scevgep5 = getelementptr i8** %lsr.iv24, i32 -1
- %tmp2 = load i8** %scevgep5
+ %scevgep5 = getelementptr i8*, i8** %lsr.iv24, i32 -1
+ %tmp2 = load i8*, i8** %scevgep5
%0 = ptrtoint i8* %tmp2 to i32
; ARM: ands {{r[0-9]+}}, {{r[0-9]+}}, #3
@@ -49,20 +49,20 @@ tailrecurse.switch: ; preds = %tailrecurse
; V8-NEXT: beq
; V8-NEXT: %tailrecurse.switch
; V8: cmp
-; V8-NEXT: beq
+; V8-NEXT: bne
; V8-NEXT: b
; The trailing space in the last line checks that the branch is unconditional
switch i32 %and, label %sw.epilog [
i32 1, label %sw.bb
i32 3, label %sw.bb6
i32 2, label %sw.bb8
- ]
+ ], !prof !1
sw.bb: ; preds = %tailrecurse.switch, %tailrecurse
%shl = shl i32 %acc.tr, 1
%or = or i32 %and, %shl
%lsr.iv.next = add i32 %lsr.iv, 1
- %scevgep3 = getelementptr %struct.Foo* %lsr.iv2, i32 1
+ %scevgep3 = getelementptr %struct.Foo, %struct.Foo* %lsr.iv2, i32 1
br label %tailrecurse
sw.bb6: ; preds = %tailrecurse.switch
@@ -70,7 +70,7 @@ sw.bb6: ; preds = %tailrecurse.switch
sw.bb8: ; preds = %tailrecurse.switch
%tmp1 = add i32 %acc.tr, %lsr.iv
- %add.ptr11 = getelementptr inbounds %struct.Foo* %this, i32 %tmp1
+ %add.ptr11 = getelementptr inbounds %struct.Foo, %struct.Foo* %this, i32 %tmp1
ret %struct.Foo* %add.ptr11
sw.epilog: ; preds = %tailrecurse.switch
@@ -83,14 +83,14 @@ sw.epilog: ; preds = %tailrecurse.switch
%struct.S = type { i8* (i8*)*, [1 x i8] }
-; ARM: bar
-; THUMB: bar
-; T2: bar
+; ARM-LABEL: bar:
+; THUMB-LABEL: bar:
+; T2-LABEL: bar:
; V8-LABEL: bar:
define internal zeroext i8 @bar(%struct.S* %x, %struct.S* nocapture %y) nounwind readonly {
entry:
- %0 = getelementptr inbounds %struct.S* %x, i32 0, i32 1, i32 0
- %1 = load i8* %0, align 1
+ %0 = getelementptr inbounds %struct.S, %struct.S* %x, i32 0, i32 1, i32 0
+ %1 = load i8, i8* %0, align 1
%2 = zext i8 %1 to i32
; ARM: ands
; THUMB: ands
@@ -103,8 +103,8 @@ entry:
bb: ; preds = %entry
; V8-NEXT: %bb
- %5 = getelementptr inbounds %struct.S* %y, i32 0, i32 1, i32 0
- %6 = load i8* %5, align 1
+ %5 = getelementptr inbounds %struct.S, %struct.S* %y, i32 0, i32 1, i32 0
+ %6 = load i8, i8* %5, align 1
%7 = zext i8 %6 to i32
; ARM: andsne
; THUMB: ands
@@ -134,3 +134,27 @@ bb4: ; preds = %bb2
return: ; preds = %bb2, %bb, %entry
ret i8 1
}
+
+
+; We were looking through multiple COPY instructions to find an AND we might
+; fold into a TST, but in doing so we changed the register being tested allowing
+; folding of unrelated tests (in this case, a TST against r1 was eliminated in
+; favour of an AND of r0).
+
+; ARM-LABEL: test_tst_assessment:
+; THUMB-LABEL: test_tst_assessment:
+; T2-LABEL: test_tst_assessment:
+; V8-LABEL: test_tst_assessment:
+define i32 @test_tst_assessment(i1 %lhs, i1 %rhs) {
+ %lhs32 = zext i1 %lhs to i32
+ %rhs32 = zext i1 %rhs to i32
+ %diff = sub nsw i32 %lhs32, %rhs32
+; ARM: tst r1, #1
+; THUMB: movs [[RTMP:r[0-9]+]], #1
+; THUMB: tst r1, [[RTMP]]
+; T2: tst.w r1, #1
+; V8: tst.w r1, #1
+ ret i32 %diff
+}
+
+!1 = !{!"branch_weights", i32 1, i32 1, i32 3, i32 2 }
diff --git a/test/CodeGen/ARM/arm-asm.ll b/test/CodeGen/ARM/arm-asm.ll
index e869abeb2dd6..f9199ff82b38 100644
--- a/test/CodeGen/ARM/arm-asm.ll
+++ b/test/CodeGen/ARM/arm-asm.ll
@@ -2,6 +2,6 @@
define void @frame_dummy() {
entry:
- %tmp1 = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0,~{dirflag},~{fpsr},~{flags}"( void (i8*)* null ) ; <void (i8*)*> [#uses=0]
+ %tmp1 = tail call void (i8*)* (void (i8*)*) asm "", "=r,0,~{dirflag},~{fpsr},~{flags}"( void (i8*)* null ) ; <void (i8*)*> [#uses=0]
ret void
}
diff --git a/test/CodeGen/ARM/arm-modifier.ll b/test/CodeGen/ARM/arm-modifier.ll
index 580f7e7a90c7..67d468e8abd2 100644
--- a/test/CodeGen/ARM/arm-modifier.ll
+++ b/test/CodeGen/ARM/arm-modifier.ll
@@ -6,8 +6,8 @@ entry:
%scale2.addr = alloca float, align 4
store float %scale, float* %scale.addr, align 4
store float %scale2, float* %scale2.addr, align 4
- %tmp = load float* %scale.addr, align 4
- %tmp1 = load float* %scale2.addr, align 4
+ %tmp = load float, float* %scale.addr, align 4
+ %tmp1 = load float, float* %scale2.addr, align 4
call void asm sideeffect "vmul.f32 q0, q0, ${0:y} \0A\09vmul.f32 q1, q1, ${0:y} \0A\09vmul.f32 q1, q0, ${1:y} \0A\09", "w,w,~{q0},~{q1}"(float %tmp, float %tmp1) nounwind
ret i32 0
}
@@ -49,8 +49,8 @@ entry:
; CHECK: stm {{lr|r[0-9]+}}, {[[REG1:(r[0-9]+)]], r{{[0-9]+}}}
; CHECK: adds {{lr|r[0-9]+}}, [[REG1]]
; CHECK: ldm {{lr|r[0-9]+}}, {r{{[0-9]+}}, r{{[0-9]+}}}
-%tmp = load i64* @f3_var, align 4
-%tmp1 = load i64* @f3_var2, align 4
+%tmp = load i64, i64* @f3_var, align 4
+%tmp1 = load i64, i64* @f3_var2, align 4
%0 = call i64 asm sideeffect "stm ${0:m}, ${1:M}\0A\09adds $3, $1\0A\09", "=*m,=r,1,r"(i64** @f3_ptr, i64 %tmp, i64 %tmp1) nounwind
store i64 %0, i64* @f3_var, align 4
%1 = call i64 asm sideeffect "ldm ${1:m}, ${0:M}\0A\09", "=r,*m"(i64** @f3_ptr) nounwind
diff --git a/test/CodeGen/ARM/arm-negative-stride.ll b/test/CodeGen/ARM/arm-negative-stride.ll
index 7decb974e268..2ea15cf429e1 100644
--- a/test/CodeGen/ARM/arm-negative-stride.ll
+++ b/test/CodeGen/ARM/arm-negative-stride.ll
@@ -12,7 +12,7 @@ entry:
bb: ; preds = %bb, %entry
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
%i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1]
- %tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1]
+ %tmp2 = getelementptr i32, i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1]
store i32 %A, i32* %tmp2
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1]
@@ -34,7 +34,7 @@ entry:
bb: ; preds = %bb, %entry
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
%i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1]
- %tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1]
+ %tmp2 = getelementptr i32, i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1]
store i32 %A, i32* %tmp2
store i32 %indvar, i32* null
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
diff --git a/test/CodeGen/ARM/arm-ttype-target2.ll b/test/CodeGen/ARM/arm-ttype-target2.ll
index 4d61cb5bb3a5..754a16d90877 100644
--- a/test/CodeGen/ARM/arm-ttype-target2.ll
+++ b/test/CodeGen/ARM/arm-ttype-target2.ll
@@ -2,7 +2,7 @@
@_ZTVN10__cxxabiv117__class_type_infoE = external global i8*
@_ZTS3Foo = linkonce_odr constant [5 x i8] c"3Foo\00"
-@_ZTI3Foo = linkonce_odr unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([5 x i8]* @_ZTS3Foo, i32 0, i32 0) }
+@_ZTI3Foo = linkonce_odr unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @_ZTS3Foo, i32 0, i32 0) }
define i32 @main() {
entry:
diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll
index 0c0769f1b145..7510d6ccdc33 100644
--- a/test/CodeGen/ARM/atomic-64bit.ll
+++ b/test/CodeGen/ARM/atomic-64bit.ll
@@ -214,7 +214,7 @@ define i64 @test8(i64* %ptr) {
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB: dmb {{ish$}}
- %r = load atomic i64* %ptr seq_cst, align 8
+ %r = load atomic i64, i64* %ptr seq_cst, align 8
ret i64 %r
}
diff --git a/test/CodeGen/ARM/atomic-load-store.ll b/test/CodeGen/ARM/atomic-load-store.ll
index af13dfc80d2d..5db81781a7f7 100644
--- a/test/CodeGen/ARM/atomic-load-store.ll
+++ b/test/CodeGen/ARM/atomic-load-store.ll
@@ -44,7 +44,7 @@ define i32 @test2(i32* %ptr) {
; THUMBM-LABEL: test2
; THUMBM: ldr
; THUMBM: dmb sy
- %val = load atomic i32* %ptr seq_cst, align 4
+ %val = load atomic i32, i32* %ptr seq_cst, align 4
ret i32 %val
}
@@ -76,7 +76,7 @@ define void @test3(i8* %ptr1, i8* %ptr2) {
; ARMV6-NOT: mcr
; THUMBM-LABEL: test3
; THUMBM-NOT: dmb sy
- %val = load atomic i8* %ptr1 unordered, align 1
+ %val = load atomic i8, i8* %ptr1 unordered, align 1
store atomic i8 %val, i8* %ptr2 unordered, align 1
ret void
}
@@ -87,7 +87,7 @@ define void @test4(i8* %ptr1, i8* %ptr2) {
; THUMBONE: ___sync_lock_test_and_set_1
; ARMV6-LABEL: test4
; THUMBM-LABEL: test4
- %val = load atomic i8* %ptr1 seq_cst, align 1
+ %val = load atomic i8, i8* %ptr1 seq_cst, align 1
store atomic i8 %val, i8* %ptr2 seq_cst, align 1
ret void
}
@@ -95,7 +95,7 @@ define void @test4(i8* %ptr1, i8* %ptr2) {
define i64 @test_old_load_64bit(i64* %p) {
; ARMV4-LABEL: test_old_load_64bit
; ARMV4: ___sync_val_compare_and_swap_8
- %1 = load atomic i64* %p seq_cst, align 8
+ %1 = load atomic i64, i64* %p seq_cst, align 8
ret i64 %1
}
diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll
index 1ac86485c556..db32bffdd5d1 100644
--- a/test/CodeGen/ARM/atomic-op.ll
+++ b/test/CodeGen/ARM/atomic-op.ll
@@ -25,7 +25,7 @@ entry:
store i32 3855, i32* %ort
store i32 3855, i32* %xort
store i32 4, i32* %temp
- %tmp = load i32* %temp
+ %tmp = load i32, i32* %temp
; CHECK: ldrex
; CHECK: add
; CHECK: strex
@@ -308,8 +308,8 @@ define i32 @test_cmpxchg_fail_order1(i32 *%addr, i32 %desired, i32 %new) {
define i32 @load_load_add_acquire(i32* %mem1, i32* %mem2) nounwind {
; CHECK-LABEL: load_load_add_acquire
- %val1 = load atomic i32* %mem1 acquire, align 4
- %val2 = load atomic i32* %mem2 acquire, align 4
+ %val1 = load atomic i32, i32* %mem1 acquire, align 4
+ %val2 = load atomic i32, i32* %mem2 acquire, align 4
%tmp = add i32 %val1, %val2
; CHECK: ldr {{r[0-9]}}, [r0]
@@ -353,7 +353,7 @@ define void @store_store_release(i32* %mem1, i32 %val1, i32* %mem2, i32 %val2) {
define void @load_fence_store_monotonic(i32* %mem1, i32* %mem2) {
; CHECK-LABEL: load_fence_store_monotonic
- %val = load atomic i32* %mem1 monotonic, align 4
+ %val = load atomic i32, i32* %mem1 monotonic, align 4
fence seq_cst
store atomic i32 %val, i32* %mem2 monotonic, align 4
diff --git a/test/CodeGen/ARM/atomic-ops-v8.ll b/test/CodeGen/ARM/atomic-ops-v8.ll
index 7072aaaf733d..db5007b0758d 100644
--- a/test/CodeGen/ARM/atomic-ops-v8.ll
+++ b/test/CodeGen/ARM/atomic-ops-v8.ll
@@ -1166,7 +1166,7 @@ define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
define i8 @test_atomic_load_monotonic_i8() nounwind {
; CHECK-LABEL: test_atomic_load_monotonic_i8:
- %val = load atomic i8* @var8 monotonic, align 1
+ %val = load atomic i8, i8* @var8 monotonic, align 1
; CHECK-NOT: dmb
; CHECK-NOT: mcr
; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
@@ -1183,7 +1183,7 @@ define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
%addr_int = add i64 %base, %off
%addr = inttoptr i64 %addr_int to i8*
- %val = load atomic i8* %addr monotonic, align 1
+ %val = load atomic i8, i8* %addr monotonic, align 1
; CHECK-NOT: dmb
; CHECK-NOT: mcr
; CHECK-LE: ldrb r0, [r0, r2]
@@ -1196,7 +1196,7 @@ define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
define i8 @test_atomic_load_acquire_i8() nounwind {
; CHECK-LABEL: test_atomic_load_acquire_i8:
- %val = load atomic i8* @var8 acquire, align 1
+ %val = load atomic i8, i8* @var8 acquire, align 1
; CHECK-NOT: dmb
; CHECK-NOT: mcr
; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
@@ -1213,7 +1213,7 @@ define i8 @test_atomic_load_acquire_i8() nounwind {
define i8 @test_atomic_load_seq_cst_i8() nounwind {
; CHECK-LABEL: test_atomic_load_seq_cst_i8:
- %val = load atomic i8* @var8 seq_cst, align 1
+ %val = load atomic i8, i8* @var8 seq_cst, align 1
; CHECK-NOT: dmb
; CHECK-NOT: mcr
; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
@@ -1230,7 +1230,7 @@ define i8 @test_atomic_load_seq_cst_i8() nounwind {
define i16 @test_atomic_load_monotonic_i16() nounwind {
; CHECK-LABEL: test_atomic_load_monotonic_i16:
- %val = load atomic i16* @var16 monotonic, align 2
+ %val = load atomic i16, i16* @var16 monotonic, align 2
; CHECK-NOT: dmb
; CHECK-NOT: mcr
; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
@@ -1251,7 +1251,7 @@ define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind
%addr_int = add i64 %base, %off
%addr = inttoptr i64 %addr_int to i32*
- %val = load atomic i32* %addr monotonic, align 4
+ %val = load atomic i32, i32* %addr monotonic, align 4
; CHECK-NOT: dmb
; CHECK-NOT: mcr
; CHECK-LE: ldr r0, [r0, r2]
@@ -1264,7 +1264,7 @@ define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind
define i64 @test_atomic_load_seq_cst_i64() nounwind {
; CHECK-LABEL: test_atomic_load_seq_cst_i64:
- %val = load atomic i64* @var64 seq_cst, align 8
+ %val = load atomic i64, i64* @var64 seq_cst, align 8
; CHECK-NOT: dmb
; CHECK-NOT: mcr
; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
@@ -1296,7 +1296,7 @@ define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val)
%addr = inttoptr i64 %addr_int to i8*
store atomic i8 %val, i8* %addr monotonic, align 1
-; CHECK-LE: ldrb{{(\.w)?}} [[VAL:r[0-9]+]], [sp]
+; CHECK-LE: ldr{{b?(\.w)?}} [[VAL:r[0-9]+]], [sp]
; CHECK-LE: strb [[VAL]], [r0, r2]
; CHECK-BE: ldrb{{(\.w)?}} [[VAL:r[0-9]+]], [sp, #3]
; CHECK-BE: strb [[VAL]], [r1, r3]
@@ -1399,7 +1399,7 @@ define i32 @not.barriers(i32* %var, i1 %cond) {
; CHECK-LABEL: not.barriers:
br i1 %cond, label %atomic_ver, label %simple_ver
simple_ver:
- %oldval = load i32* %var
+ %oldval = load i32, i32* %var
%newval = add nsw i32 %oldval, -1
store i32 %newval, i32* %var
br label %somewhere
diff --git a/test/CodeGen/ARM/available_externally.ll b/test/CodeGen/ARM/available_externally.ll
index d925b5c77706..055074738e5c 100644
--- a/test/CodeGen/ARM/available_externally.ll
+++ b/test/CodeGen/ARM/available_externally.ll
@@ -5,7 +5,7 @@
@B = external hidden constant i32
define i32 @t1() {
- %tmp = load i32* @A
+ %tmp = load i32, i32* @A
store i32 %tmp, i32* @B
ret i32 %tmp
}
diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
index 0217a4a8fb83..c3de07e03b6b 100644
--- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -30,19 +30,19 @@ while.body:
; CHECK-NOT: muls
%ptr1.addr.09 = phi i32* [ %add.ptr, %while.body ], [ %ptr1, %entry ]
%ptr2.addr.08 = phi i32* [ %incdec.ptr, %while.body ], [ %ptr2, %entry ]
- %0 = load i32* %ptr1.addr.09, align 4
- %arrayidx1 = getelementptr inbounds i32* %ptr1.addr.09, i32 1
- %1 = load i32* %arrayidx1, align 4
- %arrayidx3 = getelementptr inbounds i32* %ptr1.addr.09, i32 2
- %2 = load i32* %arrayidx3, align 4
- %arrayidx4 = getelementptr inbounds i32* %ptr1.addr.09, i32 3
- %3 = load i32* %arrayidx4, align 4
- %add.ptr = getelementptr inbounds i32* %ptr1.addr.09, i32 4
+ %0 = load i32, i32* %ptr1.addr.09, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 1
+ %1 = load i32, i32* %arrayidx1, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 2
+ %2 = load i32, i32* %arrayidx3, align 4
+ %arrayidx4 = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 3
+ %3 = load i32, i32* %arrayidx4, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 4
%mul = mul i32 %1, %0
%mul5 = mul i32 %mul, %2
%mul6 = mul i32 %mul5, %3
store i32 %mul6, i32* %ptr2.addr.08, align 4
- %incdec.ptr = getelementptr inbounds i32* %ptr2.addr.08, i32 -1
+ %incdec.ptr = getelementptr inbounds i32, i32* %ptr2.addr.08, i32 -1
%tobool = icmp eq i32* %incdec.ptr, null
br i1 %tobool, label %while.end, label %while.body
@@ -64,19 +64,19 @@ while.body:
; CHECK: muls
%ptr1.addr.09 = phi i32* [ %add.ptr, %while.body ], [ %ptr1, %entry ]
%ptr2.addr.08 = phi i32* [ %incdec.ptr, %while.body ], [ %ptr2, %entry ]
- %0 = load i32* %ptr1.addr.09, align 4
- %arrayidx1 = getelementptr inbounds i32* %ptr1.addr.09, i32 1
- %1 = load i32* %arrayidx1, align 4
- %arrayidx3 = getelementptr inbounds i32* %ptr1.addr.09, i32 2
- %2 = load i32* %arrayidx3, align 4
- %arrayidx4 = getelementptr inbounds i32* %ptr1.addr.09, i32 3
- %3 = load i32* %arrayidx4, align 4
- %add.ptr = getelementptr inbounds i32* %ptr1.addr.09, i32 4
+ %0 = load i32, i32* %ptr1.addr.09, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 1
+ %1 = load i32, i32* %arrayidx1, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 2
+ %2 = load i32, i32* %arrayidx3, align 4
+ %arrayidx4 = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 3
+ %3 = load i32, i32* %arrayidx4, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 4
%mul = mul i32 %1, %0
%mul5 = mul i32 %mul, %2
%mul6 = mul i32 %mul5, %3
store i32 %mul6, i32* %ptr2.addr.08, align 4
- %incdec.ptr = getelementptr inbounds i32* %ptr2.addr.08, i32 -1
+ %incdec.ptr = getelementptr inbounds i32, i32* %ptr2.addr.08, i32 -1
%tobool = icmp eq i32* %incdec.ptr, null
br i1 %tobool, label %while.end, label %while.body
@@ -92,21 +92,21 @@ entry:
; CHECK: vmrs APSR_nzcv, fpscr
; CHECK: if.then
; CHECK-NOT: movs
- %0 = load double* %q, align 4
+ %0 = load double, double* %q, align 4
%cmp = fcmp olt double %0, 1.000000e+01
- %incdec.ptr1 = getelementptr inbounds i32* %p, i32 1
+ %incdec.ptr1 = getelementptr inbounds i32, i32* %p, i32 1
br i1 %cmp, label %if.then, label %if.else
if.then:
store i32 7, i32* %p, align 4
- %incdec.ptr2 = getelementptr inbounds i32* %p, i32 2
+ %incdec.ptr2 = getelementptr inbounds i32, i32* %p, i32 2
store i32 8, i32* %incdec.ptr1, align 4
store i32 9, i32* %incdec.ptr2, align 4
br label %if.end
if.else:
store i32 3, i32* %p, align 4
- %incdec.ptr5 = getelementptr inbounds i32* %p, i32 2
+ %incdec.ptr5 = getelementptr inbounds i32, i32* %p, i32 2
store i32 5, i32* %incdec.ptr1, align 4
store i32 6, i32* %incdec.ptr5, align 4
br label %if.end
diff --git a/test/CodeGen/ARM/bfi.ll b/test/CodeGen/ARM/bfi.ll
index bce09da7618a..0661960d1ae0 100644
--- a/test/CodeGen/ARM/bfi.ll
+++ b/test/CodeGen/ARM/bfi.ll
@@ -9,7 +9,7 @@ entry:
; CHECK: f1
; CHECK: mov r2, #10
; CHECK: bfi r1, r2, #22, #4
- %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1]
%1 = and i32 %0, -62914561 ; <i32> [#uses=1]
%2 = or i32 %1, 41943040 ; <i32> [#uses=1]
store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4
diff --git a/test/CodeGen/ARM/bfx.ll b/test/CodeGen/ARM/bfx.ll
index 46f49e9fab20..edb0c1a5a54a 100644
--- a/test/CodeGen/ARM/bfx.ll
+++ b/test/CodeGen/ARM/bfx.ll
@@ -41,13 +41,13 @@ entry:
%and1 = lshr i32 %x, 16
%shr2 = and i32 %and1, 255
%shr4 = lshr i32 %x, 24
- %arrayidx = getelementptr inbounds i32* %ctx, i32 %shr4
- %0 = load i32* %arrayidx, align 4
- %arrayidx5 = getelementptr inbounds i32* %ctx, i32 %shr2
- %1 = load i32* %arrayidx5, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %ctx, i32 %shr4
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx5 = getelementptr inbounds i32, i32* %ctx, i32 %shr2
+ %1 = load i32, i32* %arrayidx5, align 4
%add = add i32 %1, %0
- %arrayidx6 = getelementptr inbounds i32* %ctx, i32 %shr
- %2 = load i32* %arrayidx6, align 4
+ %arrayidx6 = getelementptr inbounds i32, i32* %ctx, i32 %shr
+ %2 = load i32, i32* %arrayidx6, align 4
%add7 = add i32 %add, %2
ret i32 %add7
}
diff --git a/test/CodeGen/ARM/big-endian-neon-bitconv.ll b/test/CodeGen/ARM/big-endian-neon-bitconv.ll
index 427d2e731428..b5a840a48f7a 100644
--- a/test/CodeGen/ARM/big-endian-neon-bitconv.ll
+++ b/test/CodeGen/ARM/big-endian-neon-bitconv.ll
@@ -19,7 +19,7 @@ define void @conv_i64_to_v8i8( i64 %val, <8 x i8>* %store ) {
; CHECK-LABEL: conv_i64_to_v8i8:
; CHECK: vrev64.8
%v = bitcast i64 %val to <8 x i8>
- %w = load <8 x i8>* @v8i8
+ %w = load <8 x i8>, <8 x i8>* @v8i8
%a = add <8 x i8> %v, %w
store <8 x i8> %a, <8 x i8>* %store
ret void
@@ -28,8 +28,8 @@ define void @conv_i64_to_v8i8( i64 %val, <8 x i8>* %store ) {
define void @conv_v8i8_to_i64( <8 x i8>* %load, <8 x i8>* %store ) {
; CHECK-LABEL: conv_v8i8_to_i64:
; CHECK: vrev64.8
- %v = load <8 x i8>* %load
- %w = load <8 x i8>* @v8i8
+ %v = load <8 x i8>, <8 x i8>* %load
+ %w = load <8 x i8>, <8 x i8>* @v8i8
%a = add <8 x i8> %v, %w
%f = bitcast <8 x i8> %a to i64
call void @conv_i64_to_v8i8( i64 %f, <8 x i8>* %store )
@@ -40,7 +40,7 @@ define void @conv_i64_to_v4i16( i64 %val, <4 x i16>* %store ) {
; CHECK-LABEL: conv_i64_to_v4i16:
; CHECK: vrev64.16
%v = bitcast i64 %val to <4 x i16>
- %w = load <4 x i16>* @v4i16
+ %w = load <4 x i16>, <4 x i16>* @v4i16
%a = add <4 x i16> %v, %w
store <4 x i16> %a, <4 x i16>* %store
ret void
@@ -49,8 +49,8 @@ define void @conv_i64_to_v4i16( i64 %val, <4 x i16>* %store ) {
define void @conv_v4i16_to_i64( <4 x i16>* %load, <4 x i16>* %store ) {
; CHECK-LABEL: conv_v4i16_to_i64:
; CHECK: vrev64.16
- %v = load <4 x i16>* %load
- %w = load <4 x i16>* @v4i16
+ %v = load <4 x i16>, <4 x i16>* %load
+ %w = load <4 x i16>, <4 x i16>* @v4i16
%a = add <4 x i16> %v, %w
%f = bitcast <4 x i16> %a to i64
call void @conv_i64_to_v4i16( i64 %f, <4 x i16>* %store )
@@ -61,7 +61,7 @@ define void @conv_i64_to_v2i32( i64 %val, <2 x i32>* %store ) {
; CHECK-LABEL: conv_i64_to_v2i32:
; CHECK: vrev64.32
%v = bitcast i64 %val to <2 x i32>
- %w = load <2 x i32>* @v2i32
+ %w = load <2 x i32>, <2 x i32>* @v2i32
%a = add <2 x i32> %v, %w
store <2 x i32> %a, <2 x i32>* %store
ret void
@@ -70,8 +70,8 @@ define void @conv_i64_to_v2i32( i64 %val, <2 x i32>* %store ) {
define void @conv_v2i32_to_i64( <2 x i32>* %load, <2 x i32>* %store ) {
; CHECK-LABEL: conv_v2i32_to_i64:
; CHECK: vrev64.32
- %v = load <2 x i32>* %load
- %w = load <2 x i32>* @v2i32
+ %v = load <2 x i32>, <2 x i32>* %load
+ %w = load <2 x i32>, <2 x i32>* @v2i32
%a = add <2 x i32> %v, %w
%f = bitcast <2 x i32> %a to i64
call void @conv_i64_to_v2i32( i64 %f, <2 x i32>* %store )
@@ -82,7 +82,7 @@ define void @conv_i64_to_v2f32( i64 %val, <2 x float>* %store ) {
; CHECK-LABEL: conv_i64_to_v2f32:
; CHECK: vrev64.32
%v = bitcast i64 %val to <2 x float>
- %w = load <2 x float>* @v2f32
+ %w = load <2 x float>, <2 x float>* @v2f32
%a = fadd <2 x float> %v, %w
store <2 x float> %a, <2 x float>* %store
ret void
@@ -91,8 +91,8 @@ define void @conv_i64_to_v2f32( i64 %val, <2 x float>* %store ) {
define void @conv_v2f32_to_i64( <2 x float>* %load, <2 x float>* %store ) {
; CHECK-LABEL: conv_v2f32_to_i64:
; CHECK: vrev64.32
- %v = load <2 x float>* %load
- %w = load <2 x float>* @v2f32
+ %v = load <2 x float>, <2 x float>* %load
+ %w = load <2 x float>, <2 x float>* @v2f32
%a = fadd <2 x float> %v, %w
%f = bitcast <2 x float> %a to i64
call void @conv_i64_to_v2f32( i64 %f, <2 x float>* %store )
@@ -103,7 +103,7 @@ define void @conv_f64_to_v8i8( double %val, <8 x i8>* %store ) {
; CHECK-LABEL: conv_f64_to_v8i8:
; CHECK: vrev64.8
%v = bitcast double %val to <8 x i8>
- %w = load <8 x i8>* @v8i8
+ %w = load <8 x i8>, <8 x i8>* @v8i8
%a = add <8 x i8> %v, %w
store <8 x i8> %a, <8 x i8>* %store
ret void
@@ -112,8 +112,8 @@ define void @conv_f64_to_v8i8( double %val, <8 x i8>* %store ) {
define void @conv_v8i8_to_f64( <8 x i8>* %load, <8 x i8>* %store ) {
; CHECK-LABEL: conv_v8i8_to_f64:
; CHECK: vrev64.8
- %v = load <8 x i8>* %load
- %w = load <8 x i8>* @v8i8
+ %v = load <8 x i8>, <8 x i8>* %load
+ %w = load <8 x i8>, <8 x i8>* @v8i8
%a = add <8 x i8> %v, %w
%f = bitcast <8 x i8> %a to double
call void @conv_f64_to_v8i8( double %f, <8 x i8>* %store )
@@ -124,7 +124,7 @@ define void @conv_f64_to_v4i16( double %val, <4 x i16>* %store ) {
; CHECK-LABEL: conv_f64_to_v4i16:
; CHECK: vrev64.16
%v = bitcast double %val to <4 x i16>
- %w = load <4 x i16>* @v4i16
+ %w = load <4 x i16>, <4 x i16>* @v4i16
%a = add <4 x i16> %v, %w
store <4 x i16> %a, <4 x i16>* %store
ret void
@@ -133,8 +133,8 @@ define void @conv_f64_to_v4i16( double %val, <4 x i16>* %store ) {
define void @conv_v4i16_to_f64( <4 x i16>* %load, <4 x i16>* %store ) {
; CHECK-LABEL: conv_v4i16_to_f64:
; CHECK: vrev64.16
- %v = load <4 x i16>* %load
- %w = load <4 x i16>* @v4i16
+ %v = load <4 x i16>, <4 x i16>* %load
+ %w = load <4 x i16>, <4 x i16>* @v4i16
%a = add <4 x i16> %v, %w
%f = bitcast <4 x i16> %a to double
call void @conv_f64_to_v4i16( double %f, <4 x i16>* %store )
@@ -145,7 +145,7 @@ define void @conv_f64_to_v2i32( double %val, <2 x i32>* %store ) {
; CHECK-LABEL: conv_f64_to_v2i32:
; CHECK: vrev64.32
%v = bitcast double %val to <2 x i32>
- %w = load <2 x i32>* @v2i32
+ %w = load <2 x i32>, <2 x i32>* @v2i32
%a = add <2 x i32> %v, %w
store <2 x i32> %a, <2 x i32>* %store
ret void
@@ -154,8 +154,8 @@ define void @conv_f64_to_v2i32( double %val, <2 x i32>* %store ) {
define void @conv_v2i32_to_f64( <2 x i32>* %load, <2 x i32>* %store ) {
; CHECK-LABEL: conv_v2i32_to_f64:
; CHECK: vrev64.32
- %v = load <2 x i32>* %load
- %w = load <2 x i32>* @v2i32
+ %v = load <2 x i32>, <2 x i32>* %load
+ %w = load <2 x i32>, <2 x i32>* @v2i32
%a = add <2 x i32> %v, %w
%f = bitcast <2 x i32> %a to double
call void @conv_f64_to_v2i32( double %f, <2 x i32>* %store )
@@ -166,7 +166,7 @@ define void @conv_f64_to_v2f32( double %val, <2 x float>* %store ) {
; CHECK-LABEL: conv_f64_to_v2f32:
; CHECK: vrev64.32
%v = bitcast double %val to <2 x float>
- %w = load <2 x float>* @v2f32
+ %w = load <2 x float>, <2 x float>* @v2f32
%a = fadd <2 x float> %v, %w
store <2 x float> %a, <2 x float>* %store
ret void
@@ -175,8 +175,8 @@ define void @conv_f64_to_v2f32( double %val, <2 x float>* %store ) {
define void @conv_v2f32_to_f64( <2 x float>* %load, <2 x float>* %store ) {
; CHECK-LABEL: conv_v2f32_to_f64:
; CHECK: vrev64.32
- %v = load <2 x float>* %load
- %w = load <2 x float>* @v2f32
+ %v = load <2 x float>, <2 x float>* %load
+ %w = load <2 x float>, <2 x float>* @v2f32
%a = fadd <2 x float> %v, %w
%f = bitcast <2 x float> %a to double
call void @conv_f64_to_v2f32( double %f, <2 x float>* %store )
@@ -190,7 +190,7 @@ define void @conv_i128_to_v16i8( i128 %val, <16 x i8>* %store ) {
; CHECK-LABEL: conv_i128_to_v16i8:
; CHECK: vrev32.8
%v = bitcast i128 %val to <16 x i8>
- %w = load <16 x i8>* @v16i8
+ %w = load <16 x i8>, <16 x i8>* @v16i8
%a = add <16 x i8> %v, %w
store <16 x i8> %a, <16 x i8>* %store
ret void
@@ -199,8 +199,8 @@ define void @conv_i128_to_v16i8( i128 %val, <16 x i8>* %store ) {
define void @conv_v16i8_to_i128( <16 x i8>* %load, <16 x i8>* %store ) {
; CHECK-LABEL: conv_v16i8_to_i128:
; CHECK: vrev32.8
- %v = load <16 x i8>* %load
- %w = load <16 x i8>* @v16i8
+ %v = load <16 x i8>, <16 x i8>* %load
+ %w = load <16 x i8>, <16 x i8>* @v16i8
%a = add <16 x i8> %v, %w
%f = bitcast <16 x i8> %a to i128
call void @conv_i128_to_v16i8( i128 %f, <16 x i8>* %store )
@@ -211,7 +211,7 @@ define void @conv_i128_to_v8i16( i128 %val, <8 x i16>* %store ) {
; CHECK-LABEL: conv_i128_to_v8i16:
; CHECK: vrev32.16
%v = bitcast i128 %val to <8 x i16>
- %w = load <8 x i16>* @v8i16
+ %w = load <8 x i16>, <8 x i16>* @v8i16
%a = add <8 x i16> %v, %w
store <8 x i16> %a, <8 x i16>* %store
ret void
@@ -220,8 +220,8 @@ define void @conv_i128_to_v8i16( i128 %val, <8 x i16>* %store ) {
define void @conv_v8i16_to_i128( <8 x i16>* %load, <8 x i16>* %store ) {
; CHECK-LABEL: conv_v8i16_to_i128:
; CHECK: vrev32.16
- %v = load <8 x i16>* %load
- %w = load <8 x i16>* @v8i16
+ %v = load <8 x i16>, <8 x i16>* %load
+ %w = load <8 x i16>, <8 x i16>* @v8i16
%a = add <8 x i16> %v, %w
%f = bitcast <8 x i16> %a to i128
call void @conv_i128_to_v8i16( i128 %f, <8 x i16>* %store )
@@ -232,7 +232,7 @@ define void @conv_i128_to_v4i32( i128 %val, <4 x i32>* %store ) {
; CHECK-LABEL: conv_i128_to_v4i32:
; CHECK: vrev64.32
%v = bitcast i128 %val to <4 x i32>
- %w = load <4 x i32>* @v4i32
+ %w = load <4 x i32>, <4 x i32>* @v4i32
%a = add <4 x i32> %v, %w
store <4 x i32> %a, <4 x i32>* %store
ret void
@@ -241,8 +241,8 @@ define void @conv_i128_to_v4i32( i128 %val, <4 x i32>* %store ) {
define void @conv_v4i32_to_i128( <4 x i32>* %load, <4 x i32>* %store ) {
; CHECK-LABEL: conv_v4i32_to_i128:
; CHECK: vrev64.32
- %v = load <4 x i32>* %load
- %w = load <4 x i32>* @v4i32
+ %v = load <4 x i32>, <4 x i32>* %load
+ %w = load <4 x i32>, <4 x i32>* @v4i32
%a = add <4 x i32> %v, %w
%f = bitcast <4 x i32> %a to i128
call void @conv_i128_to_v4i32( i128 %f, <4 x i32>* %store )
@@ -253,7 +253,7 @@ define void @conv_i128_to_v4f32( i128 %val, <4 x float>* %store ) {
; CHECK-LABEL: conv_i128_to_v4f32:
; CHECK: vrev64.32
%v = bitcast i128 %val to <4 x float>
- %w = load <4 x float>* @v4f32
+ %w = load <4 x float>, <4 x float>* @v4f32
%a = fadd <4 x float> %v, %w
store <4 x float> %a, <4 x float>* %store
ret void
@@ -262,8 +262,8 @@ define void @conv_i128_to_v4f32( i128 %val, <4 x float>* %store ) {
define void @conv_v4f32_to_i128( <4 x float>* %load, <4 x float>* %store ) {
; CHECK-LABEL: conv_v4f32_to_i128:
; CHECK: vrev64.32
- %v = load <4 x float>* %load
- %w = load <4 x float>* @v4f32
+ %v = load <4 x float>, <4 x float>* %load
+ %w = load <4 x float>, <4 x float>* @v4f32
%a = fadd <4 x float> %v, %w
%f = bitcast <4 x float> %a to i128
call void @conv_i128_to_v4f32( i128 %f, <4 x float>* %store )
@@ -274,7 +274,7 @@ define void @conv_f128_to_v2f64( fp128 %val, <2 x double>* %store ) {
; CHECK-LABEL: conv_f128_to_v2f64:
; CHECK: vrev64.32
%v = bitcast fp128 %val to <2 x double>
- %w = load <2 x double>* @v2f64
+ %w = load <2 x double>, <2 x double>* @v2f64
%a = fadd <2 x double> %v, %w
store <2 x double> %a, <2 x double>* %store
ret void
@@ -283,8 +283,8 @@ define void @conv_f128_to_v2f64( fp128 %val, <2 x double>* %store ) {
define void @conv_v2f64_to_f128( <2 x double>* %load, <2 x double>* %store ) {
; CHECK-LABEL: conv_v2f64_to_f128:
; CHECK: vrev64.32
- %v = load <2 x double>* %load
- %w = load <2 x double>* @v2f64
+ %v = load <2 x double>, <2 x double>* %load
+ %w = load <2 x double>, <2 x double>* @v2f64
%a = fadd <2 x double> %v, %w
%f = bitcast <2 x double> %a to fp128
call void @conv_f128_to_v2f64( fp128 %f, <2 x double>* %store )
@@ -295,7 +295,7 @@ define void @conv_f128_to_v16i8( fp128 %val, <16 x i8>* %store ) {
; CHECK-LABEL: conv_f128_to_v16i8:
; CHECK: vrev32.8
%v = bitcast fp128 %val to <16 x i8>
- %w = load <16 x i8>* @v16i8
+ %w = load <16 x i8>, <16 x i8>* @v16i8
%a = add <16 x i8> %v, %w
store <16 x i8> %a, <16 x i8>* %store
ret void
@@ -304,8 +304,8 @@ define void @conv_f128_to_v16i8( fp128 %val, <16 x i8>* %store ) {
define void @conv_v16i8_to_f128( <16 x i8>* %load, <16 x i8>* %store ) {
; CHECK-LABEL: conv_v16i8_to_f128:
; CHECK: vrev32.8
- %v = load <16 x i8>* %load
- %w = load <16 x i8>* @v16i8
+ %v = load <16 x i8>, <16 x i8>* %load
+ %w = load <16 x i8>, <16 x i8>* @v16i8
%a = add <16 x i8> %v, %w
%f = bitcast <16 x i8> %a to fp128
call void @conv_f128_to_v16i8( fp128 %f, <16 x i8>* %store )
@@ -316,7 +316,7 @@ define void @conv_f128_to_v8i16( fp128 %val, <8 x i16>* %store ) {
; CHECK-LABEL: conv_f128_to_v8i16:
; CHECK: vrev32.16
%v = bitcast fp128 %val to <8 x i16>
- %w = load <8 x i16>* @v8i16
+ %w = load <8 x i16>, <8 x i16>* @v8i16
%a = add <8 x i16> %v, %w
store <8 x i16> %a, <8 x i16>* %store
ret void
@@ -325,8 +325,8 @@ define void @conv_f128_to_v8i16( fp128 %val, <8 x i16>* %store ) {
define void @conv_v8i16_to_f128( <8 x i16>* %load, <8 x i16>* %store ) {
; CHECK-LABEL: conv_v8i16_to_f128:
; CHECK: vrev32.16
- %v = load <8 x i16>* %load
- %w = load <8 x i16>* @v8i16
+ %v = load <8 x i16>, <8 x i16>* %load
+ %w = load <8 x i16>, <8 x i16>* @v8i16
%a = add <8 x i16> %v, %w
%f = bitcast <8 x i16> %a to fp128
call void @conv_f128_to_v8i16( fp128 %f, <8 x i16>* %store )
@@ -337,7 +337,7 @@ define void @conv_f128_to_v4f32( fp128 %val, <4 x float>* %store ) {
; CHECK-LABEL: conv_f128_to_v4f32:
; CHECK: vrev64.32
%v = bitcast fp128 %val to <4 x float>
- %w = load <4 x float>* @v4f32
+ %w = load <4 x float>, <4 x float>* @v4f32
%a = fadd <4 x float> %v, %w
store <4 x float> %a, <4 x float>* %store
ret void
@@ -346,8 +346,8 @@ define void @conv_f128_to_v4f32( fp128 %val, <4 x float>* %store ) {
define void @conv_v4f32_to_f128( <4 x float>* %load, <4 x float>* %store ) {
; CHECK-LABEL: conv_v4f32_to_f128:
; CHECK: vrev64.32
- %v = load <4 x float>* %load
- %w = load <4 x float>* @v4f32
+ %v = load <4 x float>, <4 x float>* %load
+ %w = load <4 x float>, <4 x float>* @v4f32
%a = fadd <4 x float> %v, %w
%f = bitcast <4 x float> %a to fp128
call void @conv_f128_to_v4f32( fp128 %f, <4 x float>* %store )
diff --git a/test/CodeGen/ARM/big-endian-neon-extend.ll b/test/CodeGen/ARM/big-endian-neon-extend.ll
index 931c6c3979c6..f8542b708b8a 100644
--- a/test/CodeGen/ARM/big-endian-neon-extend.ll
+++ b/test/CodeGen/ARM/big-endian-neon-extend.ll
@@ -2,11 +2,14 @@
define void @vector_ext_2i8_to_2i64( <2 x i8>* %loadaddr, <2 x i64>* %storeaddr ) {
; CHECK-LABEL: vector_ext_2i8_to_2i64:
-; CHECK: vld1.16 {[[REG:d[0-9]+]]
-; CHECK: vmov.i64 {{q[0-9]+}}, #0xff
-; CHECK: vrev16.8 [[REG]], [[REG]]
-; CHECK: vmovl.u8 {{q[0-9]+}}, [[REG]]
- %1 = load <2 x i8>* %loadaddr
+; CHECK: vld1.16 {[[REG:d[0-9]+]][0]}, [r0:16]
+; CHECK-NEXT: vrev16.8 [[REG]], [[REG]]
+; CHECK-NEXT: vmovl.u8 [[QREG:q[0-9]+]], [[REG]]
+; CHECK-NEXT: vmovl.u16 [[QREG]], [[REG]]
+; CHECK-NEXT: vmovl.u32 [[QREG]], [[REG]]
+; CHECK-NEXT: vst1.64 {[[REG]], {{d[0-9]+}}}, [r1]
+; CHECK-NEXT: bx lr
+ %1 = load <2 x i8>, <2 x i8>* %loadaddr
%2 = zext <2 x i8> %1 to <2 x i64>
store <2 x i64> %2, <2 x i64>* %storeaddr
ret void
@@ -14,11 +17,13 @@ define void @vector_ext_2i8_to_2i64( <2 x i8>* %loadaddr, <2 x i64>* %storeaddr
define void @vector_ext_2i16_to_2i64( <2 x i16>* %loadaddr, <2 x i64>* %storeaddr ) {
; CHECK-LABEL: vector_ext_2i16_to_2i64:
-; CHECK: vld1.32 {[[REG:d[0-9]+]]
-; CHECK: vmov.i64 {{q[0-9]+}}, #0xffff
-; CHECK: vrev32.16 [[REG]], [[REG]]
-; CHECK: vmovl.u16 {{q[0-9]+}}, [[REG]]
- %1 = load <2 x i16>* %loadaddr
+; CHECK: vld1.32 {[[REG:d[0-9]+]][0]}, [r0:32]
+; CHECK-NEXT: vrev32.16 [[REG]], [[REG]]
+; CHECK-NEXT: vmovl.u16 [[QREG:q[0-9]+]], [[REG]]
+; CHECK-NEXT: vmovl.u32 [[QREG]], [[REG]]
+; CHECK-NEXT: vst1.64 {[[REG]], {{d[0-9]+}}}, [r1]
+; CHECK-NEXT: bx lr
+ %1 = load <2 x i16>, <2 x i16>* %loadaddr
%2 = zext <2 x i16> %1 to <2 x i64>
store <2 x i64> %2, <2 x i64>* %storeaddr
ret void
@@ -27,9 +32,14 @@ define void @vector_ext_2i16_to_2i64( <2 x i16>* %loadaddr, <2 x i64>* %storeadd
define void @vector_ext_2i8_to_2i32( <2 x i8>* %loadaddr, <2 x i32>* %storeaddr ) {
; CHECK-LABEL: vector_ext_2i8_to_2i32:
-; CHECK: vld1.16 {[[REG:d[0-9]+]]
-; CHECK: vrev16.8 [[REG]], [[REG]]
- %1 = load <2 x i8>* %loadaddr
+; CHECK: vld1.16 {[[REG:d[0-9]+]][0]}, [r0:16]
+; CHECK-NEXT: vrev16.8 [[REG]], [[REG]]
+; CHECK-NEXT: vmovl.u8 [[QREG:q[0-9]+]], [[REG]]
+; CHECK-NEXT: vmovl.u16 [[QREG]], [[REG]]
+; CHECK-NEXT: vrev64.32 [[REG]], [[REG]]
+; CHECK-NEXT: vstr [[REG]], [r1]
+; CHECK-NEXT: bx lr
+ %1 = load <2 x i8>, <2 x i8>* %loadaddr
%2 = zext <2 x i8> %1 to <2 x i32>
store <2 x i32> %2, <2 x i32>* %storeaddr
ret void
@@ -37,10 +47,13 @@ define void @vector_ext_2i8_to_2i32( <2 x i8>* %loadaddr, <2 x i32>* %storeaddr
define void @vector_ext_2i16_to_2i32( <2 x i16>* %loadaddr, <2 x i32>* %storeaddr ) {
; CHECK-LABEL: vector_ext_2i16_to_2i32:
-; CHECK: vld1.32 {[[REG:d[0-9]+]]
-; CHECK: vrev32.16 [[REG]], [[REG]]
-; CHECK: vmovl.u16 {{q[0-9]+}}, [[REG]]
- %1 = load <2 x i16>* %loadaddr
+; CHECK: vld1.32 {[[REG:d[0-9]+]][0]}, [r0:32]
+; CHECK-NEXT: vrev32.16 [[REG]], [[REG]]
+; CHECK-NEXT: vmovl.u16 [[QREG:q[0-9]+]], [[REG]]
+; CHECK-NEXT: vrev64.32 [[REG]], [[REG]]
+; CHECK-NEXT: vstr [[REG]], [r1]
+; CHECK-NEXT: bx lr
+ %1 = load <2 x i16>, <2 x i16>* %loadaddr
%2 = zext <2 x i16> %1 to <2 x i32>
store <2 x i32> %2, <2 x i32>* %storeaddr
ret void
@@ -48,10 +61,16 @@ define void @vector_ext_2i16_to_2i32( <2 x i16>* %loadaddr, <2 x i32>* %storeadd
define void @vector_ext_2i8_to_2i16( <2 x i8>* %loadaddr, <2 x i16>* %storeaddr ) {
; CHECK-LABEL: vector_ext_2i8_to_2i16:
-; CHECK: vld1.16 {[[REG:d[0-9]+]]
-; CHECK: vrev16.8 [[REG]], [[REG]]
-; CHECK: vmovl.u8 {{q[0-9]+}}, [[REG]]
- %1 = load <2 x i8>* %loadaddr
+; CHECK: vld1.16 {[[REG:d[0-9]+]][0]}, [r0:16]
+; CHECK-NEXT: vrev16.8 [[REG]], [[REG]]
+; CHECK-NEXT: vmovl.u8 [[QREG:q[0-9]+]], [[REG]]
+; CHECK-NEXT: vmovl.u16 [[QREG]], [[REG]]
+; CHECK-NEXT: vrev32.16 [[REG]], [[REG]]
+; CHECK-NEXT: vuzp.16 [[REG]], {{d[0-9]+}}
+; CHECK-NEXT: vrev32.16 [[REG]], {{d[0-9]+}}
+; CHECK-NEXT: vst1.32 {[[REG]][0]}, [r1:32]
+; CHECK-NEXT: bx lr
+ %1 = load <2 x i8>, <2 x i8>* %loadaddr
%2 = zext <2 x i8> %1 to <2 x i16>
store <2 x i16> %2, <2 x i16>* %storeaddr
ret void
@@ -59,10 +78,14 @@ define void @vector_ext_2i8_to_2i16( <2 x i8>* %loadaddr, <2 x i16>* %storeaddr
define void @vector_ext_4i8_to_4i32( <4 x i8>* %loadaddr, <4 x i32>* %storeaddr ) {
; CHECK-LABEL: vector_ext_4i8_to_4i32:
-; CHECK: vld1.32 {[[REG:d[0-9]+]]
-; CHECK: vrev32.8 [[REG]], [[REG]]
-; CHECK: vmovl.u8 {{q[0-9]+}}, [[REG]]
- %1 = load <4 x i8>* %loadaddr
+; CHECK: vld1.32 {[[REG:d[0-9]+]][0]}, [r0:32]
+; CHECK-NEXT: vrev32.8 [[REG]], [[REG]]
+; CHECK-NEXT: vmovl.u8 [[QREG:q[0-9]+]], [[REG]]
+; CHECK-NEXT: vmovl.u16 [[QREG]], [[REG]]
+; CHECK-NEXT: vrev64.32 [[QREG]], [[QREG]]
+; CHECK-NEXT: vst1.64 {[[REG]], {{d[0-9]+}}}, [r1]
+; CHECK-NEXT: bx lr
+ %1 = load <4 x i8>, <4 x i8>* %loadaddr
%2 = zext <4 x i8> %1 to <4 x i32>
store <4 x i32> %2, <4 x i32>* %storeaddr
ret void
@@ -70,12 +93,14 @@ define void @vector_ext_4i8_to_4i32( <4 x i8>* %loadaddr, <4 x i32>* %storeaddr
define void @vector_ext_4i8_to_4i16( <4 x i8>* %loadaddr, <4 x i16>* %storeaddr ) {
; CHECK-LABEL: vector_ext_4i8_to_4i16:
-; CHECK: vld1.32 {[[REG:d[0-9]+]]
-; CHECK: vrev32.8 [[REG]], [[REG]]
-; CHECK: vmovl.u8 {{q[0-9]+}}, [[REG]]
- %1 = load <4 x i8>* %loadaddr
+; CHECK: vld1.32 {[[REG:d[0-9]+]][0]}, [r0:32]
+; CHECK-NEXT: vrev32.8 [[REG]], [[REG]]
+; CHECK-NEXT: vmovl.u8 [[QREG:q[0-9]+]], [[REG]]
+; CHECK-NEXT: vrev64.16 [[REG]], [[REG]]
+; CHECK-NEXT: vstr [[REG]], [r1]
+; CHECK-NEXT: bx lr
+ %1 = load <4 x i8>, <4 x i8>* %loadaddr
%2 = zext <4 x i8> %1 to <4 x i16>
store <4 x i16> %2, <4 x i16>* %storeaddr
ret void
}
-
diff --git a/test/CodeGen/ARM/big-endian-neon-trunc-store.ll b/test/CodeGen/ARM/big-endian-neon-trunc-store.ll
index 65147ad5d3f7..cbfc46ed255b 100644
--- a/test/CodeGen/ARM/big-endian-neon-trunc-store.ll
+++ b/test/CodeGen/ARM/big-endian-neon-trunc-store.ll
@@ -6,7 +6,7 @@ define void @vector_trunc_store_2i64_to_2i16( <2 x i64>* %loadaddr, <2 x i16>* %
; CHECK: vrev32.16 [[REG]], [[REG]]
; CHECK: vuzp.16 [[REG]], [[REG2:d[0-9]+]]
; CHECK: vrev32.16 [[REG]], [[REG2]]
- %1 = load <2 x i64>* %loadaddr
+ %1 = load <2 x i64>, <2 x i64>* %loadaddr
%2 = trunc <2 x i64> %1 to <2 x i16>
store <2 x i16> %2, <2 x i16>* %storeaddr
ret void
@@ -18,7 +18,7 @@ define void @vector_trunc_store_4i32_to_4i8( <4 x i32>* %loadaddr, <4 x i8>* %st
; CHECK: vrev16.8 [[REG]], [[REG]]
; CHECK: vuzp.8 [[REG]], [[REG2:d[0-9]+]]
; CHECK: vrev32.8 [[REG]], [[REG2]]
- %1 = load <4 x i32>* %loadaddr
+ %1 = load <4 x i32>, <4 x i32>* %loadaddr
%2 = trunc <4 x i32> %1 to <4 x i8>
store <4 x i8> %2, <4 x i8>* %storeaddr
ret void
diff --git a/test/CodeGen/ARM/big-endian-ret-f64.ll b/test/CodeGen/ARM/big-endian-ret-f64.ll
index 614bfc0a5b3a..f83e0864100c 100644
--- a/test/CodeGen/ARM/big-endian-ret-f64.ll
+++ b/test/CodeGen/ARM/big-endian-ret-f64.ll
@@ -6,7 +6,7 @@ define double @fn() {
; CHECK: ldr r0, [sp]
; CHECK: ldr r1, [sp, #4]
%r = alloca double, align 8
- %1 = load double* %r, align 8
+ %1 = load double, double* %r, align 8
ret double %1
}
diff --git a/test/CodeGen/ARM/big-endian-vector-caller.ll b/test/CodeGen/ARM/big-endian-vector-caller.ll
index d01b0a7c974f..54bda66c54a9 100644
--- a/test/CodeGen/ARM/big-endian-vector-caller.ll
+++ b/test/CodeGen/ARM/big-endian-vector-caller.ll
@@ -7,7 +7,7 @@ define void @test_i64_f64(double* %p, i64* %q) {
; SOFT: vadd.f64 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vadd.f64 d0
- %1 = load double* %p
+ %1 = load double, double* %p
%2 = fadd double %1, %1
%3 = call i64 @test_i64_f64_helper(double %2)
%4 = add i64 %3, %3
@@ -23,7 +23,7 @@ define void @test_i64_v1i64(<1 x i64>* %p, i64* %q) {
; SOFT: vadd.i64 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vadd.i64 d0
- %1 = load <1 x i64>* %p
+ %1 = load <1 x i64>, <1 x i64>* %p
%2 = add <1 x i64> %1, %1
%3 = call i64 @test_i64_v1i64_helper(<1 x i64> %2)
%4 = add i64 %3, %3
@@ -39,7 +39,7 @@ define void @test_i64_v2f32(<2 x float>* %p, i64* %q) {
; SOFT: vrev64.32 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vrev64.32 d0
- %1 = load <2 x float>* %p
+ %1 = load <2 x float>, <2 x float>* %p
%2 = fadd <2 x float> %1, %1
%3 = call i64 @test_i64_v2f32_helper(<2 x float> %2)
%4 = add i64 %3, %3
@@ -55,7 +55,7 @@ define void @test_i64_v2i32(<2 x i32>* %p, i64* %q) {
; SOFT: vrev64.32 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vrev64.32 d0
- %1 = load <2 x i32>* %p
+ %1 = load <2 x i32>, <2 x i32>* %p
%2 = add <2 x i32> %1, %1
%3 = call i64 @test_i64_v2i32_helper(<2 x i32> %2)
%4 = add i64 %3, %3
@@ -71,7 +71,7 @@ define void @test_i64_v4i16(<4 x i16>* %p, i64* %q) {
; SOFT: vrev64.16 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vrev64.16 d0
- %1 = load <4 x i16>* %p
+ %1 = load <4 x i16>, <4 x i16>* %p
%2 = add <4 x i16> %1, %1
%3 = call i64 @test_i64_v4i16_helper(<4 x i16> %2)
%4 = add i64 %3, %3
@@ -87,7 +87,7 @@ define void @test_i64_v8i8(<8 x i8>* %p, i64* %q) {
; SOFT: vrev64.8 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vrev64.8 d0
- %1 = load <8 x i8>* %p
+ %1 = load <8 x i8>, <8 x i8>* %p
%2 = add <8 x i8> %1, %1
%3 = call i64 @test_i64_v8i8_helper(<8 x i8> %2)
%4 = add i64 %3, %3
@@ -102,7 +102,7 @@ declare double @test_f64_i64_helper(i64 %p)
define void @test_f64_i64(i64* %p, double* %q) {
; CHECK: adds r1
; CHECK: adc r0
- %1 = load i64* %p
+ %1 = load i64, i64* %p
%2 = add i64 %1, %1
%3 = call double @test_f64_i64_helper(i64 %2)
%4 = fadd double %3, %3
@@ -119,7 +119,7 @@ define void @test_f64_v1i64(<1 x i64>* %p, double* %q) {
; SOFT: vadd.i64 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vadd.i64 d0
- %1 = load <1 x i64>* %p
+ %1 = load <1 x i64>, <1 x i64>* %p
%2 = add <1 x i64> %1, %1
%3 = call double @test_f64_v1i64_helper(<1 x i64> %2)
%4 = fadd double %3, %3
@@ -136,7 +136,7 @@ define void @test_f64_v2f32(<2 x float>* %p, double* %q) {
; SOFT: vrev64.32 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vrev64.32 d0
- %1 = load <2 x float>* %p
+ %1 = load <2 x float>, <2 x float>* %p
%2 = fadd <2 x float> %1, %1
%3 = call double @test_f64_v2f32_helper(<2 x float> %2)
%4 = fadd double %3, %3
@@ -153,7 +153,7 @@ define void @test_f64_v2i32(<2 x i32>* %p, double* %q) {
; SOFT: vrev64.32 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vrev64.32 d0
- %1 = load <2 x i32>* %p
+ %1 = load <2 x i32>, <2 x i32>* %p
%2 = add <2 x i32> %1, %1
%3 = call double @test_f64_v2i32_helper(<2 x i32> %2)
%4 = fadd double %3, %3
@@ -170,7 +170,7 @@ define void @test_f64_v4i16(<4 x i16>* %p, double* %q) {
; SOFT: vrev64.16 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vrev64.16 d0
- %1 = load <4 x i16>* %p
+ %1 = load <4 x i16>, <4 x i16>* %p
%2 = add <4 x i16> %1, %1
%3 = call double @test_f64_v4i16_helper(<4 x i16> %2)
%4 = fadd double %3, %3
@@ -187,7 +187,7 @@ define void @test_f64_v8i8(<8 x i8>* %p, double* %q) {
; SOFT: vrev64.8 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vrev64.8 d0
- %1 = load <8 x i8>* %p
+ %1 = load <8 x i8>, <8 x i8>* %p
%2 = add <8 x i8> %1, %1
%3 = call double @test_f64_v8i8_helper(<8 x i8> %2)
%4 = fadd double %3, %3
@@ -203,7 +203,7 @@ declare <1 x i64> @test_v1i64_i64_helper(i64 %p)
define void @test_v1i64_i64(i64* %p, <1 x i64>* %q) {
; CHECK: adds r1
; CHECK: adc r0
- %1 = load i64* %p
+ %1 = load i64, i64* %p
%2 = add i64 %1, %1
%3 = call <1 x i64> @test_v1i64_i64_helper(i64 %2)
%4 = add <1 x i64> %3, %3
@@ -220,7 +220,7 @@ define void @test_v1i64_f64(double* %p, <1 x i64>* %q) {
; SOFT: vadd.f64 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vadd.f64 d0
- %1 = load double* %p
+ %1 = load double, double* %p
%2 = fadd double %1, %1
%3 = call <1 x i64> @test_v1i64_f64_helper(double %2)
%4 = add <1 x i64> %3, %3
@@ -237,7 +237,7 @@ define void @test_v1i64_v2f32(<2 x float>* %p, <1 x i64>* %q) {
; HARD: vrev64.32 d0
; SOFT: vadd.f32 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
- %1 = load <2 x float>* %p
+ %1 = load <2 x float>, <2 x float>* %p
%2 = fadd <2 x float> %1, %1
%3 = call <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %2)
%4 = add <1 x i64> %3, %3
@@ -255,7 +255,7 @@ define void @test_v1i64_v2i32(<2 x i32>* %p, <1 x i64>* %q) {
; SOFT: vadd.i32 [[REG:d[0-9]+]]
; SOFT: vrev64.32 [[REG]]
; SOFT: vmov r1, r0, [[REG]]
- %1 = load <2 x i32>* %p
+ %1 = load <2 x i32>, <2 x i32>* %p
%2 = add <2 x i32> %1, %1
%3 = call <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %2)
%4 = add <1 x i64> %3, %3
@@ -272,7 +272,7 @@ define void @test_v1i64_v4i16(<4 x i16>* %p, <1 x i64>* %q) {
; SOFT: vrev64.16 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vrev64.16 d0
- %1 = load <4 x i16>* %p
+ %1 = load <4 x i16>, <4 x i16>* %p
%2 = add <4 x i16> %1, %1
%3 = call <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %2)
%4 = add <1 x i64> %3, %3
@@ -289,7 +289,7 @@ define void @test_v1i64_v8i8(<8 x i8>* %p, <1 x i64>* %q) {
; SOFT: vrev64.8 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vrev64.8 d0
- %1 = load <8 x i8>* %p
+ %1 = load <8 x i8>, <8 x i8>* %p
%2 = add <8 x i8> %1, %1
%3 = call <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %2)
%4 = add <1 x i64> %3, %3
@@ -305,7 +305,7 @@ declare <2 x float> @test_v2f32_i64_helper(i64 %p)
define void @test_v2f32_i64(i64* %p, <2 x float>* %q) {
; CHECK: adds r1
; CHECK: adc r0
- %1 = load i64* %p
+ %1 = load i64, i64* %p
%2 = add i64 %1, %1
%3 = call <2 x float> @test_v2f32_i64_helper(i64 %2)
%4 = fadd <2 x float> %3, %3
@@ -322,7 +322,7 @@ define void @test_v2f32_f64(double* %p, <2 x float>* %q) {
; SOFT: vadd.f64 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vadd.f64 d0
- %1 = load double* %p
+ %1 = load double, double* %p
%2 = fadd double %1, %1
%3 = call <2 x float> @test_v2f32_f64_helper(double %2)
%4 = fadd <2 x float> %3, %3
@@ -339,7 +339,7 @@ define void @test_v2f32_v1i64(<1 x i64>* %p, <2 x float>* %q) {
; SOFT: vadd.i64 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vadd.i64 d0
- %1 = load <1 x i64>* %p
+ %1 = load <1 x i64>, <1 x i64>* %p
%2 = add <1 x i64> %1, %1
%3 = call <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %2)
%4 = fadd <2 x float> %3, %3
@@ -357,7 +357,7 @@ define void @test_v2f32_v2i32(<2 x i32>* %p, <2 x float>* %q) {
; SOFT: vadd.i32 [[REG:d[0-9]+]]
; SOFT: vrev64.32 [[REG]]
; SOFT: vmov r1, r0, [[REG]]
- %1 = load <2 x i32>* %p
+ %1 = load <2 x i32>, <2 x i32>* %p
%2 = add <2 x i32> %1, %1
%3 = call <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %2)
%4 = fadd <2 x float> %3, %3
@@ -374,7 +374,7 @@ define void @test_v2f32_v4i16(<4 x i16>* %p, <2 x float>* %q) {
; SOFT: vrev64.16 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vrev64.16 d0
- %1 = load <4 x i16>* %p
+ %1 = load <4 x i16>, <4 x i16>* %p
%2 = add <4 x i16> %1, %1
%3 = call <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %2)
%4 = fadd <2 x float> %3, %3
@@ -391,7 +391,7 @@ define void @test_v2f32_v8i8(<8 x i8>* %p, <2 x float>* %q) {
; SOFT: vrev64.8 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vrev64.8 d0
- %1 = load <8 x i8>* %p
+ %1 = load <8 x i8>, <8 x i8>* %p
%2 = add <8 x i8> %1, %1
%3 = call <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %2)
%4 = fadd <2 x float> %3, %3
@@ -407,7 +407,7 @@ declare <2 x i32> @test_v2i32_i64_helper(i64 %p)
define void @test_v2i32_i64(i64* %p, <2 x i32>* %q) {
; CHECK: adds r1
; CHECK: adc r0
- %1 = load i64* %p
+ %1 = load i64, i64* %p
%2 = add i64 %1, %1
%3 = call <2 x i32> @test_v2i32_i64_helper(i64 %2)
%4 = add <2 x i32> %3, %3
@@ -424,7 +424,7 @@ define void @test_v2i32_f64(double* %p, <2 x i32>* %q) {
; SOFT: vadd.f64 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vadd.f64 d0
- %1 = load double* %p
+ %1 = load double, double* %p
%2 = fadd double %1, %1
%3 = call <2 x i32> @test_v2i32_f64_helper(double %2)
%4 = add <2 x i32> %3, %3
@@ -441,7 +441,7 @@ define void @test_v2i32_v1i64(<1 x i64>* %p, <2 x i32>* %q) {
; SOFT: vadd.i64 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vadd.i64 d0
- %1 = load <1 x i64>* %p
+ %1 = load <1 x i64>, <1 x i64>* %p
%2 = add <1 x i64> %1, %1
%3 = call <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %2)
%4 = add <2 x i32> %3, %3
@@ -460,7 +460,7 @@ define void @test_v2i32_v2f32(<2 x float>* %p, <2 x i32>* %q) {
; SOFT: vadd.f32 [[REG:d[0-9]+]]
; SOFT: vrev64.32 [[REG]]
; SOFT: vmov r1, r0, [[REG]]
- %1 = load <2 x float>* %p
+ %1 = load <2 x float>, <2 x float>* %p
%2 = fadd <2 x float> %1, %1
%3 = call <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %2)
%4 = add <2 x i32> %3, %3
@@ -477,7 +477,7 @@ define void @test_v2i32_v4i16(<4 x i16>* %p, <2 x i32>* %q) {
; SOFT: vrev64.16 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vrev64.16 d0
- %1 = load <4 x i16>* %p
+ %1 = load <4 x i16>, <4 x i16>* %p
%2 = add <4 x i16> %1, %1
%3 = call <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %2)
%4 = add <2 x i32> %3, %3
@@ -494,7 +494,7 @@ define void @test_v2i32_v8i8(<8 x i8>* %p, <2 x i32>* %q) {
; SOFT: vrev64.8 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vrev64.8 d0
- %1 = load <8 x i8>* %p
+ %1 = load <8 x i8>, <8 x i8>* %p
%2 = add <8 x i8> %1, %1
%3 = call <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %2)
%4 = add <2 x i32> %3, %3
@@ -510,7 +510,7 @@ declare <4 x i16> @test_v4i16_i64_helper(i64 %p)
define void @test_v4i16_i64(i64* %p, <4 x i16>* %q) {
; CHECK: adds r1
; CHECK: adc r0
- %1 = load i64* %p
+ %1 = load i64, i64* %p
%2 = add i64 %1, %1
%3 = call <4 x i16> @test_v4i16_i64_helper(i64 %2)
%4 = add <4 x i16> %3, %3
@@ -527,7 +527,7 @@ define void @test_v4i16_f64(double* %p, <4 x i16>* %q) {
; SOFT: vadd.f64 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vadd.f64 d0
- %1 = load double* %p
+ %1 = load double, double* %p
%2 = fadd double %1, %1
%3 = call <4 x i16> @test_v4i16_f64_helper(double %2)
%4 = add <4 x i16> %3, %3
@@ -544,7 +544,7 @@ define void @test_v4i16_v1i64(<1 x i64>* %p, <4 x i16>* %q) {
; SOFT: vadd.i64 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vadd.i64 d0
- %1 = load <1 x i64>* %p
+ %1 = load <1 x i64>, <1 x i64>* %p
%2 = add <1 x i64> %1, %1
%3 = call <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %2)
%4 = add <4 x i16> %3, %3
@@ -563,7 +563,7 @@ define void @test_v4i16_v2f32(<2 x float>* %p, <4 x i16>* %q) {
; SOFT: vadd.f32 [[REG:d[0-9]+]]
; SOFT: vrev64.32 [[REG]]
; SOFT: vmov r1, r0, [[REG]]
- %1 = load <2 x float>* %p
+ %1 = load <2 x float>, <2 x float>* %p
%2 = fadd <2 x float> %1, %1
%3 = call <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %2)
%4 = add <4 x i16> %3, %3
@@ -582,7 +582,7 @@ define void @test_v4i16_v2i32(<2 x i32>* %p, <4 x i16>* %q) {
; SOFT: vadd.i32 [[REG:d[0-9]+]]
; SOFT: vrev64.32 [[REG]]
; SOFT: vmov r1, r0, [[REG]]
- %1 = load <2 x i32>* %p
+ %1 = load <2 x i32>, <2 x i32>* %p
%2 = add <2 x i32> %1, %1
%3 = call <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %2)
%4 = add <4 x i16> %3, %3
@@ -599,7 +599,7 @@ define void @test_v4i16_v8i8(<8 x i8>* %p, <4 x i16>* %q) {
; SOFT: vrev64.8 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vrev64.8 d0
- %1 = load <8 x i8>* %p
+ %1 = load <8 x i8>, <8 x i8>* %p
%2 = add <8 x i8> %1, %1
%3 = call <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %2)
%4 = add <4 x i16> %3, %3
@@ -615,7 +615,7 @@ declare <8 x i8> @test_v8i8_i64_helper(i64 %p)
define void @test_v8i8_i64(i64* %p, <8 x i8>* %q) {
; CHECK: adds r1
; CHECK: adc r0
- %1 = load i64* %p
+ %1 = load i64, i64* %p
%2 = add i64 %1, %1
%3 = call <8 x i8> @test_v8i8_i64_helper(i64 %2)
%4 = add <8 x i8> %3, %3
@@ -632,7 +632,7 @@ define void @test_v8i8_f64(double* %p, <8 x i8>* %q) {
; SOFT: vadd.f64 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vadd.f64 d0
- %1 = load double* %p
+ %1 = load double, double* %p
%2 = fadd double %1, %1
%3 = call <8 x i8> @test_v8i8_f64_helper(double %2)
%4 = add <8 x i8> %3, %3
@@ -649,7 +649,7 @@ define void @test_v8i8_v1i64(<1 x i64>* %p, <8 x i8>* %q) {
; SOFT: vadd.i64 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vadd.i64 d0
- %1 = load <1 x i64>* %p
+ %1 = load <1 x i64>, <1 x i64>* %p
%2 = add <1 x i64> %1, %1
%3 = call <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %2)
%4 = add <8 x i8> %3, %3
@@ -666,7 +666,7 @@ define void @test_v8i8_v2f32(<2 x float>* %p, <8 x i8>* %q) {
; SOFT: vrev64.32 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vrev64.32 d0
- %1 = load <2 x float>* %p
+ %1 = load <2 x float>, <2 x float>* %p
%2 = fadd <2 x float> %1, %1
%3 = call <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %2)
%4 = add <8 x i8> %3, %3
@@ -683,7 +683,7 @@ define void @test_v8i8_v2i32(<2 x i32>* %p, <8 x i8>* %q) {
; SOFT: vrev64.32 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vrev64.32 d0
- %1 = load <2 x i32>* %p
+ %1 = load <2 x i32>, <2 x i32>* %p
%2 = add <2 x i32> %1, %1
%3 = call <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %2)
%4 = add <8 x i8> %3, %3
@@ -700,7 +700,7 @@ define void @test_v8i8_v4i16(<4 x i16>* %p, <8 x i8>* %q) {
; SOFT: vrev64.16 [[REG:d[0-9]+]]
; SOFT: vmov r1, r0, [[REG]]
; HARD: vrev64.16 d0
- %1 = load <4 x i16>* %p
+ %1 = load <4 x i16>, <4 x i16>* %p
%2 = add <4 x i16> %1, %1
%3 = call <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %2)
%4 = add <8 x i8> %3, %3
@@ -720,7 +720,7 @@ define void @test_f128_v2f64(<2 x double>* %p, fp128* %q) {
; SOFT: vmov r3, r2, [[REG2]]
; HARD: vadd.f64 d1
; HARD: vadd.f64 d0
- %1 = load <2 x double>* %p
+ %1 = load <2 x double>, <2 x double>* %p
%2 = fadd <2 x double> %1, %1
%3 = call fp128 @test_f128_v2f64_helper(<2 x double> %2)
%4 = fadd fp128 %3, %3
@@ -735,7 +735,7 @@ define void @test_f128_v2i64(<2 x i64>* %p, fp128* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vadd.i64 q0
- %1 = load <2 x i64>* %p
+ %1 = load <2 x i64>, <2 x i64>* %p
%2 = add <2 x i64> %1, %1
%3 = call fp128 @test_f128_v2i64_helper(<2 x i64> %2)
%4 = fadd fp128 %3, %3
@@ -750,7 +750,7 @@ define void @test_f128_v4f32(<4 x float>* %p, fp128* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.32 q0
- %1 = load <4 x float>* %p
+ %1 = load <4 x float>, <4 x float>* %p
%2 = fadd <4 x float> %1, %1
%3 = call fp128 @test_f128_v4f32_helper(<4 x float> %2)
%4 = fadd fp128 %3, %3
@@ -765,7 +765,7 @@ define void @test_f128_v4i32(<4 x i32>* %p, fp128* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.32 q0
- %1 = load <4 x i32>* %p
+ %1 = load <4 x i32>, <4 x i32>* %p
%2 = add <4 x i32> %1, %1
%3 = call fp128 @test_f128_v4i32_helper(<4 x i32> %2)
%4 = fadd fp128 %3, %3
@@ -780,7 +780,7 @@ define void @test_f128_v8i16(<8 x i16>* %p, fp128* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.16 q0
- %1 = load <8 x i16>* %p
+ %1 = load <8 x i16>, <8 x i16>* %p
%2 = add <8 x i16> %1, %1
%3 = call fp128 @test_f128_v8i16_helper(<8 x i16> %2)
%4 = fadd fp128 %3, %3
@@ -795,7 +795,7 @@ define void @test_f128_v16i8(<16 x i8>* %p, fp128* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.8 q0
- %1 = load <16 x i8>* %p
+ %1 = load <16 x i8>, <16 x i8>* %p
%2 = add <16 x i8> %1, %1
%3 = call fp128 @test_f128_v16i8_helper(<16 x i8> %2)
%4 = fadd fp128 %3, %3
@@ -807,7 +807,7 @@ define void @test_f128_v16i8(<16 x i8>* %p, fp128* %q) {
; CHECK-LABEL: test_v2f64_f128:
declare <2 x double> @test_v2f64_f128_helper(fp128 %p)
define void @test_v2f64_f128(fp128* %p, <2 x double>* %q) {
- %1 = load fp128* %p
+ %1 = load fp128, fp128* %p
%2 = fadd fp128 %1, %1
%3 = call <2 x double> @test_v2f64_f128_helper(fp128 %2)
%4 = fadd <2 x double> %3, %3
@@ -824,7 +824,7 @@ define void @test_v2f64_v2i64(<2 x i64>* %p, <2 x double>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vadd.i64 q0
- %1 = load <2 x i64>* %p
+ %1 = load <2 x i64>, <2 x i64>* %p
%2 = add <2 x i64> %1, %1
%3 = call <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %2)
%4 = fadd <2 x double> %3, %3
@@ -840,7 +840,7 @@ define void @test_v2f64_v4f32(<4 x float>* %p, <2 x double>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.32 q0
- %1 = load <4 x float>* %p
+ %1 = load <4 x float>, <4 x float>* %p
%2 = fadd <4 x float> %1, %1
%3 = call <2 x double> @test_v2f64_v4f32_helper(<4 x float> %2)
%4 = fadd <2 x double> %3, %3
@@ -856,7 +856,7 @@ define void @test_v2f64_v4i32(<4 x i32>* %p, <2 x double>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.32 q0
- %1 = load <4 x i32>* %p
+ %1 = load <4 x i32>, <4 x i32>* %p
%2 = add <4 x i32> %1, %1
%3 = call <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %2)
%4 = fadd <2 x double> %3, %3
@@ -872,7 +872,7 @@ define void @test_v2f64_v8i16(<8 x i16>* %p, <2 x double>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.16 q0
- %1 = load <8 x i16>* %p
+ %1 = load <8 x i16>, <8 x i16>* %p
%2 = add <8 x i16> %1, %1
%3 = call <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %2)
%4 = fadd <2 x double> %3, %3
@@ -888,7 +888,7 @@ define void @test_v2f64_v16i8(<16 x i8>* %p, <2 x double>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.8 q0
- %1 = load <16 x i8>* %p
+ %1 = load <16 x i8>, <16 x i8>* %p
%2 = add <16 x i8> %1, %1
%3 = call <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %2)
%4 = fadd <2 x double> %3, %3
@@ -901,7 +901,7 @@ define void @test_v2f64_v16i8(<16 x i8>* %p, <2 x double>* %q) {
; CHECK-LABEL: test_v2i64_f128:
declare <2 x i64> @test_v2i64_f128_helper(fp128 %p)
define void @test_v2i64_f128(fp128* %p, <2 x i64>* %q) {
- %1 = load fp128* %p
+ %1 = load fp128, fp128* %p
%2 = fadd fp128 %1, %1
%3 = call <2 x i64> @test_v2i64_f128_helper(fp128 %2)
%4 = add <2 x i64> %3, %3
@@ -918,7 +918,7 @@ define void @test_v2i64_v2f64(<2 x double>* %p, <2 x i64>* %q) {
; SOFT: vmov r3, r2, [[REG2]]
; HARD: vadd.f64 d1
; HARD: vadd.f64 d0
- %1 = load <2 x double>* %p
+ %1 = load <2 x double>, <2 x double>* %p
%2 = fadd <2 x double> %1, %1
%3 = call <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %2)
%4 = add <2 x i64> %3, %3
@@ -934,7 +934,7 @@ define void @test_v2i64_v4f32(<4 x float>* %p, <2 x i64>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.32 q0
- %1 = load <4 x float>* %p
+ %1 = load <4 x float>, <4 x float>* %p
%2 = fadd <4 x float> %1, %1
%3 = call <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %2)
%4 = add <2 x i64> %3, %3
@@ -950,7 +950,7 @@ define void @test_v2i64_v4i32(<4 x i32>* %p, <2 x i64>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.32 q0
- %1 = load <4 x i32>* %p
+ %1 = load <4 x i32>, <4 x i32>* %p
%2 = add <4 x i32> %1, %1
%3 = call <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %2)
%4 = add <2 x i64> %3, %3
@@ -966,7 +966,7 @@ define void @test_v2i64_v8i16(<8 x i16>* %p, <2 x i64>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.16 q0
- %1 = load <8 x i16>* %p
+ %1 = load <8 x i16>, <8 x i16>* %p
%2 = add <8 x i16> %1, %1
%3 = call <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %2)
%4 = add <2 x i64> %3, %3
@@ -982,7 +982,7 @@ define void @test_v2i64_v16i8(<16 x i8>* %p, <2 x i64>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.8 q0
- %1 = load <16 x i8>* %p
+ %1 = load <16 x i8>, <16 x i8>* %p
%2 = add <16 x i8> %1, %1
%3 = call <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %2)
%4 = add <2 x i64> %3, %3
@@ -995,7 +995,7 @@ define void @test_v2i64_v16i8(<16 x i8>* %p, <2 x i64>* %q) {
; CHECK-LABEL: test_v4f32_f128:
declare <4 x float> @test_v4f32_f128_helper(fp128 %p)
define void @test_v4f32_f128(fp128* %p, <4 x float>* %q) {
- %1 = load fp128* %p
+ %1 = load fp128, fp128* %p
%2 = fadd fp128 %1, %1
%3 = call <4 x float> @test_v4f32_f128_helper(fp128 %2)
%4 = fadd <4 x float> %3, %3
@@ -1012,7 +1012,7 @@ define void @test_v4f32_v2f64(<2 x double>* %p, <4 x float>* %q) {
; SOFT: vmov r3, r2
; HARD: vadd.f64 d1
; HARD: vadd.f64 d0
- %1 = load <2 x double>* %p
+ %1 = load <2 x double>, <2 x double>* %p
%2 = fadd <2 x double> %1, %1
%3 = call <4 x float> @test_v4f32_v2f64_helper(<2 x double> %2)
%4 = fadd <4 x float> %3, %3
@@ -1028,7 +1028,7 @@ define void @test_v4f32_v2i64(<2 x i64>* %p, <4 x float>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vadd.i64 q0
- %1 = load <2 x i64>* %p
+ %1 = load <2 x i64>, <2 x i64>* %p
%2 = add <2 x i64> %1, %1
%3 = call <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %2)
%4 = fadd <4 x float> %3, %3
@@ -1044,7 +1044,7 @@ define void @test_v4f32_v4i32(<4 x i32>* %p, <4 x float>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.32 q0
- %1 = load <4 x i32>* %p
+ %1 = load <4 x i32>, <4 x i32>* %p
%2 = add <4 x i32> %1, %1
%3 = call <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %2)
%4 = fadd <4 x float> %3, %3
@@ -1060,7 +1060,7 @@ define void @test_v4f32_v8i16(<8 x i16>* %p, <4 x float>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.16 q0
- %1 = load <8 x i16>* %p
+ %1 = load <8 x i16>, <8 x i16>* %p
%2 = add <8 x i16> %1, %1
%3 = call <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %2)
%4 = fadd <4 x float> %3, %3
@@ -1076,7 +1076,7 @@ define void @test_v4f32_v16i8(<16 x i8>* %p, <4 x float>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.8 q0
- %1 = load <16 x i8>* %p
+ %1 = load <16 x i8>, <16 x i8>* %p
%2 = add <16 x i8> %1, %1
%3 = call <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %2)
%4 = fadd <4 x float> %3, %3
@@ -1089,7 +1089,7 @@ define void @test_v4f32_v16i8(<16 x i8>* %p, <4 x float>* %q) {
; CHECK-LABEL: test_v4i32_f128:
declare <4 x i32> @test_v4i32_f128_helper(fp128 %p)
define void @test_v4i32_f128(fp128* %p, <4 x i32>* %q) {
- %1 = load fp128* %p
+ %1 = load fp128, fp128* %p
%2 = fadd fp128 %1, %1
%3 = call <4 x i32> @test_v4i32_f128_helper(fp128 %2)
%4 = add <4 x i32> %3, %3
@@ -1106,7 +1106,7 @@ define void @test_v4i32_v2f64(<2 x double>* %p, <4 x i32>* %q) {
; SOFT: vmov r3, r2
; HARD: vadd.f64 d1
; HARD: vadd.f64 d0
- %1 = load <2 x double>* %p
+ %1 = load <2 x double>, <2 x double>* %p
%2 = fadd <2 x double> %1, %1
%3 = call <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %2)
%4 = add <4 x i32> %3, %3
@@ -1122,7 +1122,7 @@ define void @test_v4i32_v2i64(<2 x i64>* %p, <4 x i32>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vadd.i64 q0
- %1 = load <2 x i64>* %p
+ %1 = load <2 x i64>, <2 x i64>* %p
%2 = add <2 x i64> %1, %1
%3 = call <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %2)
%4 = add <4 x i32> %3, %3
@@ -1138,7 +1138,7 @@ define void @test_v4i32_v4f32(<4 x float>* %p, <4 x i32>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.32 q0
- %1 = load <4 x float>* %p
+ %1 = load <4 x float>, <4 x float>* %p
%2 = fadd <4 x float> %1, %1
%3 = call <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %2)
%4 = add <4 x i32> %3, %3
@@ -1154,7 +1154,7 @@ define void @test_v4i32_v8i16(<8 x i16>* %p, <4 x i32>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.16 q0
- %1 = load <8 x i16>* %p
+ %1 = load <8 x i16>, <8 x i16>* %p
%2 = add <8 x i16> %1, %1
%3 = call <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %2)
%4 = add <4 x i32> %3, %3
@@ -1170,7 +1170,7 @@ define void @test_v4i32_v16i8(<16 x i8>* %p, <4 x i32>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.8 q0
- %1 = load <16 x i8>* %p
+ %1 = load <16 x i8>, <16 x i8>* %p
%2 = add <16 x i8> %1, %1
%3 = call <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %2)
%4 = add <4 x i32> %3, %3
@@ -1183,7 +1183,7 @@ define void @test_v4i32_v16i8(<16 x i8>* %p, <4 x i32>* %q) {
; CHECK-LABEL: test_v8i16_f128:
declare <8 x i16> @test_v8i16_f128_helper(fp128 %p)
define void @test_v8i16_f128(fp128* %p, <8 x i16>* %q) {
- %1 = load fp128* %p
+ %1 = load fp128, fp128* %p
%2 = fadd fp128 %1, %1
%3 = call <8 x i16> @test_v8i16_f128_helper(fp128 %2)
%4 = add <8 x i16> %3, %3
@@ -1200,7 +1200,7 @@ define void @test_v8i16_v2f64(<2 x double>* %p, <8 x i16>* %q) {
; SOFT: vmov r3, r2
; HARD: vadd.f64 d1
; HARD: vadd.f64 d0
- %1 = load <2 x double>* %p
+ %1 = load <2 x double>, <2 x double>* %p
%2 = fadd <2 x double> %1, %1
%3 = call <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %2)
%4 = add <8 x i16> %3, %3
@@ -1216,7 +1216,7 @@ define void @test_v8i16_v2i64(<2 x i64>* %p, <8 x i16>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vadd.i64 q0
- %1 = load <2 x i64>* %p
+ %1 = load <2 x i64>, <2 x i64>* %p
%2 = add <2 x i64> %1, %1
%3 = call <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %2)
%4 = add <8 x i16> %3, %3
@@ -1232,7 +1232,7 @@ define void @test_v8i16_v4f32(<4 x float>* %p, <8 x i16>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.32 q0
- %1 = load <4 x float>* %p
+ %1 = load <4 x float>, <4 x float>* %p
%2 = fadd <4 x float> %1, %1
%3 = call <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %2)
%4 = add <8 x i16> %3, %3
@@ -1248,7 +1248,7 @@ define void @test_v8i16_v4i32(<4 x i32>* %p, <8 x i16>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.32 q0
- %1 = load <4 x i32>* %p
+ %1 = load <4 x i32>, <4 x i32>* %p
%2 = add <4 x i32> %1, %1
%3 = call <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %2)
%4 = add <8 x i16> %3, %3
@@ -1264,7 +1264,7 @@ define void @test_v8i16_v16i8(<16 x i8>* %p, <8 x i16>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.8 q0
- %1 = load <16 x i8>* %p
+ %1 = load <16 x i8>, <16 x i8>* %p
%2 = add <16 x i8> %1, %1
%3 = call <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %2)
%4 = add <8 x i16> %3, %3
@@ -1277,7 +1277,7 @@ define void @test_v8i16_v16i8(<16 x i8>* %p, <8 x i16>* %q) {
; CHECK-LABEL: test_v16i8_f128:
declare <16 x i8> @test_v16i8_f128_helper(fp128 %p)
define void @test_v16i8_f128(fp128* %p, <16 x i8>* %q) {
- %1 = load fp128* %p
+ %1 = load fp128, fp128* %p
%2 = fadd fp128 %1, %1
%3 = call <16 x i8> @test_v16i8_f128_helper(fp128 %2)
%4 = add <16 x i8> %3, %3
@@ -1294,7 +1294,7 @@ define void @test_v16i8_v2f64(<2 x double>* %p, <16 x i8>* %q) {
; SOFT: vmov r3, r2
; HARD: vadd.f64 d1
; HARD: vadd.f64 d0
- %1 = load <2 x double>* %p
+ %1 = load <2 x double>, <2 x double>* %p
%2 = fadd <2 x double> %1, %1
%3 = call <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %2)
%4 = add <16 x i8> %3, %3
@@ -1310,7 +1310,7 @@ define void @test_v16i8_v2i64(<2 x i64>* %p, <16 x i8>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vadd.i64 q0
- %1 = load <2 x i64>* %p
+ %1 = load <2 x i64>, <2 x i64>* %p
%2 = add <2 x i64> %1, %1
%3 = call <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %2)
%4 = add <16 x i8> %3, %3
@@ -1326,7 +1326,7 @@ define void @test_v16i8_v4f32(<4 x float>* %p, <16 x i8>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.32 q0
- %1 = load <4 x float>* %p
+ %1 = load <4 x float>, <4 x float>* %p
%2 = fadd <4 x float> %1, %1
%3 = call <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %2)
%4 = add <16 x i8> %3, %3
@@ -1342,7 +1342,7 @@ define void @test_v16i8_v4i32(<4 x i32>* %p, <16 x i8>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.32 q0
- %1 = load <4 x i32>* %p
+ %1 = load <4 x i32>, <4 x i32>* %p
%2 = add <4 x i32> %1, %1
%3 = call <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %2)
%4 = add <16 x i8> %3, %3
@@ -1358,7 +1358,7 @@ define void @test_v16i8_v8i16(<8 x i16>* %p, <16 x i8>* %q) {
; SOFT: vmov r1, r0
; SOFT: vmov r3, r2
; HARD: vrev64.16 q0
- %1 = load <8 x i16>* %p
+ %1 = load <8 x i16>, <8 x i16>* %p
%2 = add <8 x i16> %1, %1
%3 = call <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %2)
%4 = add <16 x i8> %3, %3
diff --git a/test/CodeGen/ARM/bswap16.ll b/test/CodeGen/ARM/bswap16.ll
index 70c62d294eec..dc0e468b72dc 100644
--- a/test/CodeGen/ARM/bswap16.ll
+++ b/test/CodeGen/ARM/bswap16.ll
@@ -4,7 +4,7 @@
define void @test1(i16* nocapture %data) {
entry:
- %0 = load i16* %data, align 2
+ %0 = load i16, i16* %data, align 2
%1 = tail call i16 @llvm.bswap.i16(i16 %0)
store i16 %1, i16* %data, align 2
ret void
@@ -30,7 +30,7 @@ entry:
define i16 @test3(i16* nocapture %data) {
entry:
- %0 = load i16* %data, align 2
+ %0 = load i16, i16* %data, align 2
%1 = tail call i16 @llvm.bswap.i16(i16 %0)
ret i16 %1
diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll
index 2e382308bf51..1982fa98ef41 100644
--- a/test/CodeGen/ARM/build-attributes.ll
+++ b/test/CodeGen/ARM/build-attributes.ll
@@ -7,6 +7,8 @@
; RUN: llc < %s -mtriple=armv6-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi | FileCheck %s --check-prefix=V6M
; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6M-FAST
+; RUN: llc < %s -mtriple=thumbv6sm-linux-gnueabi | FileCheck %s --check-prefix=V6M
+; RUN: llc < %s -mtriple=thumbv6sm-linux-gnueabi -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6M-FAST
; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s | FileCheck %s --check-prefix=ARM1156T2F-S
; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=ARM1156T2F-S-FAST
; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
@@ -53,9 +55,21 @@
; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=CORTEX-M0
; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M0-FAST
; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus | FileCheck %s --check-prefix=CORTEX-M0PLUS
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M0PLUS-FAST
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1 | FileCheck %s --check-prefix=CORTEX-M1
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M1-FAST
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000 | FileCheck %s --check-prefix=SC000
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=SC000-FAST
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3 | FileCheck %s --check-prefix=CORTEX-M3
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M3-FAST
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=sc300 | FileCheck %s --check-prefix=SC300
+; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=sc300 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=SC300-FAST
+; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=sc300 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-M4-SOFT
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=soft -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M4-SOFT-FAST
; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-M4-HARD
@@ -67,15 +81,26 @@
; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=+fp-only-sp -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M7-FAST
; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 | FileCheck %s --check-prefix=CORTEX-M7-DOUBLE
; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r4 | FileCheck %s --check-prefix=CORTEX-R4
+; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r4f | FileCheck %s --check-prefix=CORTEX-R4F
; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 | FileCheck %s --check-prefix=CORTEX-R5
; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-R5-FAST
; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r7 | FileCheck %s --check-prefix=CORTEX-R7
+; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r7 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-R7-FAST
+; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r7 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 | FileCheck %s --check-prefix=CORTEX-A53
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A53-FAST
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a57 | FileCheck %s --check-prefix=CORTEX-A57
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a57 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A57-FAST
; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a57 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a72 | FileCheck %s --check-prefix=CORTEX-A72
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a72 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A72-FAST
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a72 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi | FileCheck %s --check-prefix=GENERIC-ARMV8_1-A
+; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=GENERIC-ARMV8_1-A-FAST
+; RUN: llc < %s -mtriple=armv8.1a-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 | FileCheck %s --check-prefix=CORTEX-A7-CHECK
; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A7-CHECK-FAST
; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2,-vfp3,-vfp4,-neon | FileCheck %s --check-prefix=CORTEX-A7-NOFPU
@@ -92,10 +117,17 @@
; RUN: llc < %s -mtriple=arm-none-linux-gnueabi | FileCheck %s --check-prefix=PCS-R9-USE
; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -arm-reserve-r9 | FileCheck %s --check-prefix=PCS-R9-RESERVE
+; ARMv8.1a (AArch32)
+; RUN: llc < %s -mtriple=armv8.1a-none-linux-gnueabi -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv8.1a-none-linux-gnueabi -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv8.1a-none-linux-gnueabi | FileCheck %s --check-prefix=NO-STRICT-ALIGN
; ARMv8a (AArch32)
; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a57 -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a57 -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a57 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a72 -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a72 -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a72 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
; ARMv7a
; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
@@ -113,6 +145,11 @@
; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s | FileCheck %s --check-prefix=STRICT-ALIGN
; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; ARMv6k
+; RUN: llc < %s -mtriple=armv6k-none-netbsd-gnueabi -mcpu=arm1176j-s | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv6k-none-linux-gnueabi -mcpu=arm1176j-s | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv6k-none-linux-gnueabi -mcpu=arm1176j-s -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv6k-none-linux-gnueabi -mcpu=arm1176j-s -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
; ARMv6m
; RUN: llc < %s -mtriple=thumb-none-linux-gnueabi -arm-no-strict-align -mcpu=cortex-m0 | FileCheck %s --check-prefix=STRICT-ALIGN
; RUN: llc < %s -mtriple=thumb-none-linux-gnueabi -arm-strict-align -mcpu=cortex-m0 | FileCheck %s --check-prefix=STRICT-ALIGN
@@ -162,6 +199,8 @@
; V6-FAST-NOT: .eabi_attribute 22
; V6-FAST: .eabi_attribute 23, 1
+;; We emit 6, 12 for both v6-M and v6S-M, technically this is incorrect for
+;; V6-M, however we don't model the OS extension so this is fine.
; V6M: .eabi_attribute 6, 12
; V6M-NOT: .eabi_attribute 7
; V6M: .eabi_attribute 8, 0
@@ -723,6 +762,102 @@
; CORTEX-M0-FAST-NOT: .eabi_attribute 22
; CORTEX-M0-FAST: .eabi_attribute 23, 1
+; CORTEX-M0PLUS: .cpu cortex-m0plus
+; CORTEX-M0PLUS: .eabi_attribute 6, 12
+; CORTEX-M0PLUS-NOT: .eabi_attribute 7
+; CORTEX-M0PLUS: .eabi_attribute 8, 0
+; CORTEX-M0PLUS: .eabi_attribute 9, 1
+; CORTEX-M0PLUS-NOT: .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; CORTEX-M0PLUS: .eabi_attribute 20, 1
+; CORTEX-M0PLUS: .eabi_attribute 21, 1
+; CORTEX-M0PLUS-NOT: .eabi_attribute 22
+; CORTEX-M0PLUS: .eabi_attribute 23, 3
+; CORTEX-M0PLUS: .eabi_attribute 24, 1
+; CORTEX-M0PLUS: .eabi_attribute 25, 1
+; CORTEX-M0PLUS-NOT: .eabi_attribute 27
+; CORTEX-M0PLUS-NOT: .eabi_attribute 28
+; CORTEX-M0PLUS-NOT: .eabi_attribute 36
+; CORTEX-M0PLUS: .eabi_attribute 38, 1
+; CORTEX-M0PLUS-NOT: .eabi_attribute 42
+; CORTEX-M0PLUS-NOT: .eabi_attribute 44
+; CORTEX-M0PLUS-NOT: .eabi_attribute 68
+
+; CORTEX-M0PLUS-FAST-NOT: .eabi_attribute 19
+;; Despite the M0+ CPU having no FPU in this scenario, we chose to
+;; flush to positive zero here. There's no hardware support doing
+;; this, but the fast maths software library might and such behaviour
+;; would match hardware support on this architecture revision if it
+;; existed.
+; CORTEX-M0PLUS-FAST-NOT: .eabi_attribute 20
+; CORTEX-M0PLUS-FAST-NOT: .eabi_attribute 21
+; CORTEX-M0PLUS-FAST-NOT: .eabi_attribute 22
+; CORTEX-M0PLUS-FAST: .eabi_attribute 23, 1
+
+; CORTEX-M1: .cpu cortex-m1
+; CORTEX-M1: .eabi_attribute 6, 12
+; CORTEX-M1-NOT: .eabi_attribute 7
+; CORTEX-M1: .eabi_attribute 8, 0
+; CORTEX-M1: .eabi_attribute 9, 1
+; CORTEX-M1-NOT: .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; CORTEX-M1: .eabi_attribute 20, 1
+; CORTEX-M1: .eabi_attribute 21, 1
+; CORTEX-M1-NOT: .eabi_attribute 22
+; CORTEX-M1: .eabi_attribute 23, 3
+; CORTEX-M1: .eabi_attribute 24, 1
+; CORTEX-M1: .eabi_attribute 25, 1
+; CORTEX-M1-NOT: .eabi_attribute 27
+; CORTEX-M1-NOT: .eabi_attribute 28
+; CORTEX-M1-NOT: .eabi_attribute 36
+; CORTEX-M1: .eabi_attribute 38, 1
+; CORTEX-M1-NOT: .eabi_attribute 42
+; CORTEX-M1-NOT: .eabi_attribute 44
+; CORTEX-M1-NOT: .eabi_attribute 68
+
+; CORTEX-M1-FAST-NOT: .eabi_attribute 19
+;; Despite the M1 CPU having no FPU in this scenario, we chose to
+;; flush to positive zero here. There's no hardware support doing
+;; this, but the fast maths software library might and such behaviour
+;; would match hardware support on this architecture revision if it
+;; existed.
+; CORTEX-M1-FAST-NOT: .eabi_attribute 20
+; CORTEX-M1-FAST-NOT: .eabi_attribute 21
+; CORTEX-M1-FAST-NOT: .eabi_attribute 22
+; CORTEX-M1-FAST: .eabi_attribute 23, 1
+
+; SC000: .cpu sc000
+; SC000: .eabi_attribute 6, 12
+; SC000-NOT: .eabi_attribute 7
+; SC000: .eabi_attribute 8, 0
+; SC000: .eabi_attribute 9, 1
+; SC000-NOT: .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; SC000: .eabi_attribute 20, 1
+; SC000: .eabi_attribute 21, 1
+; SC000-NOT: .eabi_attribute 22
+; SC000: .eabi_attribute 23, 3
+; SC000: .eabi_attribute 24, 1
+; SC000: .eabi_attribute 25, 1
+; SC000-NOT: .eabi_attribute 27
+; SC000-NOT: .eabi_attribute 28
+; SC000-NOT: .eabi_attribute 36
+; SC000: .eabi_attribute 38, 1
+; SC000-NOT: .eabi_attribute 42
+; SC000-NOT: .eabi_attribute 44
+; SC000-NOT: .eabi_attribute 68
+
+; SC000-FAST-NOT: .eabi_attribute 19
+;; Despite the SC000 CPU having no FPU in this scenario, we chose to
+;; flush to positive zero here. There's no hardware support doing
+;; this, but the fast maths software library might and such behaviour
+;; would match hardware support on this architecture revision if it
+;; existed.
+; SC000-FAST-NOT: .eabi_attribute 20
+; SC000-FAST-NOT: .eabi_attribute 21
+; SC000-FAST-NOT: .eabi_attribute 22
+; SC000-FAST: .eabi_attribute 23, 1
+
; CORTEX-M3: .cpu cortex-m3
; CORTEX-M3: .eabi_attribute 6, 10
; CORTEX-M3: .eabi_attribute 7, 77
@@ -753,6 +888,36 @@
; CORTEX-M3-FAST-NOT: .eabi_attribute 22
; CORTEX-M3-FAST: .eabi_attribute 23, 1
+; SC300: .cpu sc300
+; SC300: .eabi_attribute 6, 10
+; SC300: .eabi_attribute 7, 77
+; SC300: .eabi_attribute 8, 0
+; SC300: .eabi_attribute 9, 2
+; SC300-NOT: .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; SC300: .eabi_attribute 20, 1
+; SC300: .eabi_attribute 21, 1
+; SC300-NOT: .eabi_attribute 22
+; SC300: .eabi_attribute 23, 3
+; SC300: .eabi_attribute 24, 1
+; SC300: .eabi_attribute 25, 1
+; SC300-NOT: .eabi_attribute 27
+; SC300-NOT: .eabi_attribute 28
+; SC300-NOT: .eabi_attribute 36
+; SC300: .eabi_attribute 38, 1
+; SC300-NOT: .eabi_attribute 42
+; SC300-NOT: .eabi_attribute 44
+; SC300-NOT: .eabi_attribute 68
+
+; SC300-FAST-NOT: .eabi_attribute 19
+;; Despite there being no FPU, we chose to flush to zero preserving
+;; sign. This matches what the hardware would do for this architecture
+;; revision.
+; SC300-FAST: .eabi_attribute 20, 2
+; SC300-FAST-NOT: .eabi_attribute 21
+; SC300-FAST-NOT: .eabi_attribute 22
+; SC300-FAST: .eabi_attribute 23, 1
+
; CORTEX-M4-SOFT: .cpu cortex-m4
; CORTEX-M4-SOFT: .eabi_attribute 6, 13
; CORTEX-M4-SOFT: .eabi_attribute 7, 77
@@ -835,6 +1000,7 @@
; CORTEX-M7-DOUBLE-NOT: .eabi_attribute 27
; CORTEX-M7: .eabi_attribute 36, 1
; CORTEX-M7: .eabi_attribute 38, 1
+; CORTEX-M7-NOT: .eabi_attribute 44
; CORTEX-M7: .eabi_attribute 14, 0
; CORTEX-M7-NOFPU-FAST-NOT: .eabi_attribute 19
@@ -848,6 +1014,49 @@
; CORTEX-M7-NOFPU-FAST-NOT: .eabi_attribute 22
; CORTEX-M7-NOFPU-FAST: .eabi_attribute 23, 1
+; CORTEX-R4: .cpu cortex-r4
+; CORTEX-R4: .eabi_attribute 6, 10
+; CORTEX-R4: .eabi_attribute 7, 82
+; CORTEX-R4: .eabi_attribute 8, 1
+; CORTEX-R4: .eabi_attribute 9, 2
+; CORTEX-R4-NOT: .fpu vfpv3-d16
+; CORTEX-R4-NOT: .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; CORTEX-R4: .eabi_attribute 20, 1
+; CORTEX-R4: .eabi_attribute 21, 1
+; CORTEX-R4-NOT: .eabi_attribute 22
+; CORTEX-R4: .eabi_attribute 23, 3
+; CORTEX-R4: .eabi_attribute 24, 1
+; CORTEX-R4: .eabi_attribute 25, 1
+; CORTEX-R4-NOT: .eabi_attribute 28
+; CORTEX-R4-NOT: .eabi_attribute 36
+; CORTEX-R4: .eabi_attribute 38, 1
+; CORTEX-R4-NOT: .eabi_attribute 42
+; CORTEX-R4-NOT: .eabi_attribute 44
+; CORTEX-R4-NOT: .eabi_attribute 68
+
+; CORTEX-R4F: .cpu cortex-r4f
+; CORTEX-R4F: .eabi_attribute 6, 10
+; CORTEX-R4F: .eabi_attribute 7, 82
+; CORTEX-R4F: .eabi_attribute 8, 1
+; CORTEX-R4F: .eabi_attribute 9, 2
+; CORTEX-R4F: .fpu vfpv3-d16
+; CORTEX-R4F-NOT: .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; CORTEX-R4F: .eabi_attribute 20, 1
+; CORTEX-R4F: .eabi_attribute 21, 1
+; CORTEX-R4F-NOT: .eabi_attribute 22
+; CORTEX-R4F: .eabi_attribute 23, 3
+; CORTEX-R4F: .eabi_attribute 24, 1
+; CORTEX-R4F: .eabi_attribute 25, 1
+; CORTEX-R4F: .eabi_attribute 27, 1
+; CORTEX-R4F-NOT: .eabi_attribute 28
+; CORTEX-R4F-NOT: .eabi_attribute 36
+; CORTEX-R4F: .eabi_attribute 38, 1
+; CORTEX-R4F-NOT: .eabi_attribute 42
+; CORTEX-R4F-NOT: .eabi_attribute 44
+; CORTEX-R4F-NOT: .eabi_attribute 68
+
; CORTEX-R5: .cpu cortex-r5
; CORTEX-R5: .eabi_attribute 6, 10
; CORTEX-R5: .eabi_attribute 7, 82
@@ -877,6 +1086,35 @@
; CORTEX-R5-FAST-NOT: .eabi_attribute 22
; CORTEX-R5-FAST: .eabi_attribute 23, 1
+; CORTEX-R7: .cpu cortex-r7
+; CORTEX-R7: .eabi_attribute 6, 10
+; CORTEX-R7: .eabi_attribute 7, 82
+; CORTEX-R7: .eabi_attribute 8, 1
+; CORTEX-R7: .eabi_attribute 9, 2
+; CORTEX-R7: .fpu vfpv3-d16
+; CORTEX-R7-NOT: .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; CORTEX-R7: .eabi_attribute 20, 1
+; CORTEX-R7: .eabi_attribute 21, 1
+; CORTEX-R7-NOT: .eabi_attribute 22
+; CORTEX-R7: .eabi_attribute 23, 3
+; CORTEX-R7: .eabi_attribute 24, 1
+; CORTEX-R7: .eabi_attribute 25, 1
+; CORTEX-R7: .eabi_attribute 27, 1
+; CORTEX-R7-NOT: .eabi_attribute 28
+; CORTEX-R7-NOT: .eabi_attribute 36
+; CORTEX-R7: .eabi_attribute 38, 1
+; CORTEX-R7: .eabi_attribute 42, 1
+; CORTEX-R7: .eabi_attribute 44, 2
+; CORTEX-R7-NOT: .eabi_attribute 68
+
+; CORTEX-R7-FAST-NOT: .eabi_attribute 19
+;; The R7 has the VFPv3 FP unit, which always flushes preserving sign.
+; CORTEX-R7-FAST: .eabi_attribute 20, 2
+; CORTEX-R7-FAST-NOT: .eabi_attribute 21
+; CORTEX-R7-FAST-NOT: .eabi_attribute 22
+; CORTEX-R7-FAST: .eabi_attribute 23, 1
+
; CORTEX-A53: .cpu cortex-a53
; CORTEX-A53: .eabi_attribute 6, 14
; CORTEX-A53: .eabi_attribute 7, 65
@@ -937,6 +1175,65 @@
; CORTEX-A57-FAST-NOT: .eabi_attribute 22
; CORTEX-A57-FAST: .eabi_attribute 23, 1
+; CORTEX-A72: .cpu cortex-a72
+; CORTEX-A72: .eabi_attribute 6, 14
+; CORTEX-A72: .eabi_attribute 7, 65
+; CORTEX-A72: .eabi_attribute 8, 1
+; CORTEX-A72: .eabi_attribute 9, 2
+; CORTEX-A72: .fpu crypto-neon-fp-armv8
+; CORTEX-A72: .eabi_attribute 12, 3
+; CORTEX-A72-NOT: .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; CORTEX-A72: .eabi_attribute 20, 1
+; CORTEX-A72: .eabi_attribute 21, 1
+; CORTEX-A72-NOT: .eabi_attribute 22
+; CORTEX-A72: .eabi_attribute 23, 3
+; CORTEX-A72: .eabi_attribute 24, 1
+; CORTEX-A72: .eabi_attribute 25, 1
+; CORTEX-A72-NOT: .eabi_attribute 27
+; CORTEX-A72-NOT: .eabi_attribute 28
+; CORTEX-A72: .eabi_attribute 36, 1
+; CORTEX-A72: .eabi_attribute 38, 1
+; CORTEX-A72: .eabi_attribute 42, 1
+; CORTEX-A72-NOT: .eabi_attribute 44
+; CORTEX-A72: .eabi_attribute 68, 3
+
+; CORTEX-A72-FAST-NOT: .eabi_attribute 19
+;; The A72 has the ARMv8 FP unit, which always flushes preserving sign.
+; CORTEX-A72-FAST: .eabi_attribute 20, 2
+; CORTEX-A72-FAST-NOT: .eabi_attribute 21
+; CORTEX-A72-FAST-NOT: .eabi_attribute 22
+; CORTEX-A72-FAST: .eabi_attribute 23, 1
+
+; GENERIC-ARMV8_1-A: .eabi_attribute 6, 14
+; GENERIC-ARMV8_1-A: .eabi_attribute 7, 65
+; GENERIC-ARMV8_1-A: .eabi_attribute 8, 1
+; GENERIC-ARMV8_1-A: .eabi_attribute 9, 2
+; GENERIC-ARMV8_1-A: .fpu crypto-neon-fp-armv8
+; GENERIC-ARMV8_1-A: .eabi_attribute 12, 4
+; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; GENERIC-ARMV8_1-A: .eabi_attribute 20, 1
+; GENERIC-ARMV8_1-A: .eabi_attribute 21, 1
+; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 22
+; GENERIC-ARMV8_1-A: .eabi_attribute 23, 3
+; GENERIC-ARMV8_1-A: .eabi_attribute 24, 1
+; GENERIC-ARMV8_1-A: .eabi_attribute 25, 1
+; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 27
+; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 28
+; GENERIC-ARMV8_1-A: .eabi_attribute 36, 1
+; GENERIC-ARMV8_1-A: .eabi_attribute 38, 1
+; GENERIC-ARMV8_1-A: .eabi_attribute 42, 1
+; GENERIC-ARMV8_1-A-NOT: .eabi_attribute 44
+; GENERIC-ARMV8_1-A: .eabi_attribute 68, 3
+
+; GENERIC-ARMV8_1-A-FAST-NOT: .eabi_attribute 19
+;; GENERIC-ARMV8_1-A has the ARMv8 FP unit, which always flushes preserving sign.
+; GENERIC-ARMV8_1-A-FAST: .eabi_attribute 20, 2
+; GENERIC-ARMV8_1-A-FAST-NOT: .eabi_attribute 21
+; GENERIC-ARMV8_1-A-FAST-NOT: .eabi_attribute 22
+; GENERIC-ARMV8_1-A-FAST: .eabi_attribute 23, 1
+
; RELOC-PIC: .eabi_attribute 15, 1
; RELOC-PIC: .eabi_attribute 16, 1
; RELOC-PIC: .eabi_attribute 17, 2
diff --git a/test/CodeGen/ARM/bx_fold.ll b/test/CodeGen/ARM/bx_fold.ll
index 5533038fb828..f6651ae8004e 100644
--- a/test/CodeGen/ARM/bx_fold.ll
+++ b/test/CodeGen/ARM/bx_fold.ll
@@ -6,7 +6,7 @@ entry:
bb: ; preds = %bb1
%gep.upgrd.1 = zext i32 %indvar to i64 ; <i64> [#uses=1]
- %tmp7 = getelementptr i8* %L, i64 %gep.upgrd.1 ; <i8*> [#uses=1]
+ %tmp7 = getelementptr i8, i8* %L, i64 %gep.upgrd.1 ; <i8*> [#uses=1]
store i8 0, i8* %tmp7
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
br label %bb1
@@ -14,7 +14,7 @@ bb: ; preds = %bb1
bb1: ; preds = %bb, %entry
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
%i.0 = bitcast i32 %indvar to i32 ; <i32> [#uses=2]
- %tmp = tail call i32 (...)* @bar( ) ; <i32> [#uses=1]
+ %tmp = tail call i32 (...) @bar( ) ; <i32> [#uses=1]
%tmp2 = add i32 %i.0, %tmp ; <i32> [#uses=1]
%Ptr_addr.0 = sub i32 %Ptr, %tmp2 ; <i32> [#uses=0]
%tmp12 = icmp eq i32 %i.0, %Ptr ; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/byval-align.ll b/test/CodeGen/ARM/byval-align.ll
new file mode 100644
index 000000000000..a26b5a795756
--- /dev/null
+++ b/test/CodeGen/ARM/byval-align.ll
@@ -0,0 +1,76 @@
+; RUN: llc -mtriple=thumbv7-apple-ios8.0 %s -o - | FileCheck %s
+
+; This checks that alignments greater than 4 are respected by APCS
+; targets. Mostly here to make sure *some* correct code is created after some
+; simplifying refactoring; at the time of writing there were no actual APCS
+; users of byval alignments > 4, so no real calls for ABI stability.
+
+; "byval align 16" can't fit in any regs with an i8* taking up r0.
+define i32 @test_align16(i8*, [4 x i32]* byval align 16 %b) {
+; CHECK-LABEL: test_align16:
+; CHECK-NOT: sub sp
+; CHECK: push {r4, r7, lr}
+; CHECK: add r7, sp, #4
+
+; CHECK: ldr r0, [r7, #8]
+
+ call void @bar()
+ %valptr = getelementptr [4 x i32], [4 x i32]* %b, i32 0, i32 0
+ %val = load i32, i32* %valptr
+ ret i32 %val
+}
+
+; byval align 8 can, but we used to incorrectly set r7 here (miscalculating the
+; space taken up by arg regs).
+define i32 @test_align8(i8*, [4 x i32]* byval align 8 %b) {
+; CHECK-LABEL: test_align8:
+; CHECK: sub sp, #8
+; CHECK: push {r4, r7, lr}
+; CHECK: add r7, sp, #4
+
+; CHECK-DAG: str r2, [r7, #8]
+; CHECK-DAG: str r3, [r7, #12]
+
+; CHECK: ldr r0, [r7, #8]
+
+ call void @bar()
+ %valptr = getelementptr [4 x i32], [4 x i32]* %b, i32 0, i32 0
+ %val = load i32, i32* %valptr
+ ret i32 %val
+}
+
+; "byval align 32" can't fit in regs no matter what: it would be misaligned
+; unless the incoming stack was deliberately misaligned.
+define i32 @test_align32(i8*, [4 x i32]* byval align 32 %b) {
+; CHECK-LABEL: test_align32:
+; CHECK-NOT: sub sp
+; CHECK: push {r4, r7, lr}
+; CHECK: add r7, sp, #4
+
+; CHECK: ldr r0, [r7, #8]
+
+ call void @bar()
+ %valptr = getelementptr [4 x i32], [4 x i32]* %b, i32 0, i32 0
+ %val = load i32, i32* %valptr
+ ret i32 %val
+}
+
+; When passing an object "byval align N", the stack must be at least N-aligned.
+define void @test_call_align16() {
+; CHECK-LABEL: test_call_align16:
+; CHECK: push {r4, r7, lr}
+; CHECK: add r7, sp, #4
+
+; CHECK: mov [[TMP:r[0-9]+]], sp
+; CHECK: bfc [[TMP]], #0, #4
+; CHECK: mov sp, [[TMP]]
+
+; While we're here, make sure the caller also puts it at sp
+ ; CHECK: mov r[[BASE:[0-9]+]], sp
+ ; CHECK: vst1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[BASE]]]
+ call i32 @test_align16(i8* null, [4 x i32]* byval align 16 @var)
+ ret void
+}
+
+@var = global [4 x i32] zeroinitializer
+declare void @bar()
diff --git a/test/CodeGen/ARM/cache-intrinsic.ll b/test/CodeGen/ARM/cache-intrinsic.ll
index 6048917ee9a3..12b55c7081db 100644
--- a/test/CodeGen/ARM/cache-intrinsic.ll
+++ b/test/CodeGen/ARM/cache-intrinsic.ll
@@ -10,10 +10,10 @@ define i32 @main() {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0))
- %call1 = call i8* @strcpy(i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds ([25 x i8]* @.str1, i32 0, i32 0)) #3
- call void @llvm.clear_cache(i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds (i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0), i32 32)) #3
- %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0))
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0))
+ %call1 = call i8* @strcpy(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str1, i32 0, i32 0)) #3
+ call void @llvm.clear_cache(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds (i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0), i32 32)) #3
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0))
ret i32 0
}
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index a35fd7476465..b2b6aaec8131 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -24,7 +24,7 @@ define void @t2() {
; CHECKT2D: ldr
; CHECKT2D-NEXT: ldr
; CHECKT2D-NEXT: bx r0
- %tmp = load i32 ()** @t ; <i32 ()*> [#uses=1]
+ %tmp = load i32 ()*, i32 ()** @t ; <i32 ()*> [#uses=1]
%tmp.upgrd.2 = tail call i32 %tmp( ) ; <i32> [#uses=0]
ret void
}
@@ -153,7 +153,7 @@ define i32 @t9() nounwind {
; CHECKT2D: b.w ___divsi3
%lock = alloca %class.MutexLock, align 1
%1 = call %class.MutexLock* @_ZN9MutexLockC1Ev(%class.MutexLock* %lock)
- %2 = load i32* @x, align 4
+ %2 = load i32, i32* @x, align 4
%3 = sdiv i32 1000, %2
%4 = call %class.MutexLock* @_ZN9MutexLockD1Ev(%class.MutexLock* %lock)
ret i32 %3
@@ -170,7 +170,7 @@ define float @libcall_tc_test2(float* nocapture %a, float %b) {
; CHECKT2D-LABEL: libcall_tc_test2:
; CHECKT2D: blx _floorf
; CHECKT2D: b.w _truncf
- %1 = load float* %a, align 4
+ %1 = load float, float* %a, align 4
%call = tail call float @floorf(float %1)
store float %call, float* %a, align 4
%call1 = tail call float @truncf(float %b)
diff --git a/test/CodeGen/ARM/call.ll b/test/CodeGen/ARM/call.ll
index f6301cf02032..87252a91e1b0 100644
--- a/test/CodeGen/ARM/call.ll
+++ b/test/CodeGen/ARM/call.ll
@@ -20,7 +20,7 @@ define void @f() {
define void @g.upgrd.1() {
; CHECKV4: mov lr, pc
; CHECKV5: blx
- %tmp = load i32 ()** @t ; <i32 ()*> [#uses=1]
+ %tmp = load i32 ()*, i32 ()** @t ; <i32 ()*> [#uses=1]
%tmp.upgrd.2 = call i32 %tmp( ) ; <i32> [#uses=0]
ret void
}
@@ -30,10 +30,10 @@ define i32* @m_231b(i32, i32, i32*, i32*, i32*) nounwind {
; CHECKV4: bx r{{.*}}
BB0:
%5 = inttoptr i32 %0 to i32* ; <i32*> [#uses=1]
- %t35 = load volatile i32* %5 ; <i32> [#uses=1]
+ %t35 = load volatile i32, i32* %5 ; <i32> [#uses=1]
%6 = inttoptr i32 %t35 to i32** ; <i32**> [#uses=1]
- %7 = getelementptr i32** %6, i32 86 ; <i32**> [#uses=1]
- %8 = load i32** %7 ; <i32*> [#uses=1]
+ %7 = getelementptr i32*, i32** %6, i32 86 ; <i32**> [#uses=1]
+ %8 = load i32*, i32** %7 ; <i32*> [#uses=1]
%9 = bitcast i32* %8 to i32* (i32, i32*, i32, i32*, i32*, i32*)* ; <i32* (i32, i32*, i32, i32*, i32*, i32*)*> [#uses=1]
%10 = call i32* %9(i32 %0, i32* null, i32 %1, i32* %2, i32* %3, i32* %4) ; <i32*> [#uses=1]
ret i32* %10
diff --git a/test/CodeGen/ARM/call_nolink.ll b/test/CodeGen/ARM/call_nolink.ll
index 48fa3a62ffb0..0cd5bcd086c8 100644
--- a/test/CodeGen/ARM/call_nolink.ll
+++ b/test/CodeGen/ARM/call_nolink.ll
@@ -22,32 +22,32 @@ bb115.i.i.bb115.i.i_crit_edge: ; preds = %bb115.i.i
bb115.i.i: ; preds = %bb115.i.i.bb115.i.i_crit_edge, %newFuncRoot
%i_addr.3210.0.i.i = phi i32 [ %tmp166.i.i, %bb115.i.i.bb115.i.i_crit_edge ], [ 0, %newFuncRoot ] ; <i32> [#uses=7]
- %tmp124.i.i = getelementptr [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 0 ; <i32*> [#uses=1]
- %tmp125.i.i = load i32* %tmp124.i.i ; <i32> [#uses=1]
- %tmp126.i.i = getelementptr [14 x i32]* @r, i32 0, i32 %tmp125.i.i ; <i32*> [#uses=1]
- %tmp127.i.i = load i32* %tmp126.i.i ; <i32> [#uses=1]
- %tmp131.i.i = getelementptr [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 1 ; <i32*> [#uses=1]
- %tmp132.i.i = load i32* %tmp131.i.i ; <i32> [#uses=1]
- %tmp133.i.i = getelementptr [14 x i32]* @r, i32 0, i32 %tmp132.i.i ; <i32*> [#uses=1]
- %tmp134.i.i = load i32* %tmp133.i.i ; <i32> [#uses=1]
- %tmp138.i.i = getelementptr [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 2 ; <i32*> [#uses=1]
- %tmp139.i.i = load i32* %tmp138.i.i ; <i32> [#uses=1]
- %tmp140.i.i = getelementptr [14 x i32]* @r, i32 0, i32 %tmp139.i.i ; <i32*> [#uses=1]
- %tmp141.i.i = load i32* %tmp140.i.i ; <i32> [#uses=1]
+ %tmp124.i.i = getelementptr [2 x { i32, [3 x i32] }], [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 0 ; <i32*> [#uses=1]
+ %tmp125.i.i = load i32, i32* %tmp124.i.i ; <i32> [#uses=1]
+ %tmp126.i.i = getelementptr [14 x i32], [14 x i32]* @r, i32 0, i32 %tmp125.i.i ; <i32*> [#uses=1]
+ %tmp127.i.i = load i32, i32* %tmp126.i.i ; <i32> [#uses=1]
+ %tmp131.i.i = getelementptr [2 x { i32, [3 x i32] }], [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 1 ; <i32*> [#uses=1]
+ %tmp132.i.i = load i32, i32* %tmp131.i.i ; <i32> [#uses=1]
+ %tmp133.i.i = getelementptr [14 x i32], [14 x i32]* @r, i32 0, i32 %tmp132.i.i ; <i32*> [#uses=1]
+ %tmp134.i.i = load i32, i32* %tmp133.i.i ; <i32> [#uses=1]
+ %tmp138.i.i = getelementptr [2 x { i32, [3 x i32] }], [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 2 ; <i32*> [#uses=1]
+ %tmp139.i.i = load i32, i32* %tmp138.i.i ; <i32> [#uses=1]
+ %tmp140.i.i = getelementptr [14 x i32], [14 x i32]* @r, i32 0, i32 %tmp139.i.i ; <i32*> [#uses=1]
+ %tmp141.i.i = load i32, i32* %tmp140.i.i ; <i32> [#uses=1]
%tmp143.i.i = add i32 %i_addr.3210.0.i.i, 12 ; <i32> [#uses=1]
- %tmp146.i.i = getelementptr [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 0 ; <i32*> [#uses=1]
- %tmp147.i.i = load i32* %tmp146.i.i ; <i32> [#uses=1]
- %tmp149.i.i = getelementptr [13 x %struct.anon]* @isa, i32 0, i32 %tmp147.i.i, i32 0 ; <i32 (i32, i32, i32)**> [#uses=1]
- %tmp150.i.i = load i32 (i32, i32, i32)** %tmp149.i.i ; <i32 (i32, i32, i32)*> [#uses=1]
+ %tmp146.i.i = getelementptr [2 x { i32, [3 x i32] }], [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 0 ; <i32*> [#uses=1]
+ %tmp147.i.i = load i32, i32* %tmp146.i.i ; <i32> [#uses=1]
+ %tmp149.i.i = getelementptr [13 x %struct.anon], [13 x %struct.anon]* @isa, i32 0, i32 %tmp147.i.i, i32 0 ; <i32 (i32, i32, i32)**> [#uses=1]
+ %tmp150.i.i = load i32 (i32, i32, i32)*, i32 (i32, i32, i32)** %tmp149.i.i ; <i32 (i32, i32, i32)*> [#uses=1]
%tmp154.i.i = tail call i32 %tmp150.i.i( i32 %tmp127.i.i, i32 %tmp134.i.i, i32 %tmp141.i.i ) ; <i32> [#uses=1]
- %tmp155.i.i = getelementptr [14 x i32]* @r, i32 0, i32 %tmp143.i.i ; <i32*> [#uses=1]
+ %tmp155.i.i = getelementptr [14 x i32], [14 x i32]* @r, i32 0, i32 %tmp143.i.i ; <i32*> [#uses=1]
store i32 %tmp154.i.i, i32* %tmp155.i.i
- %tmp159.i.i = getelementptr [2 x i32]* @counter, i32 0, i32 %i_addr.3210.0.i.i ; <i32*> [#uses=2]
- %tmp160.i.i = load i32* %tmp159.i.i ; <i32> [#uses=1]
+ %tmp159.i.i = getelementptr [2 x i32], [2 x i32]* @counter, i32 0, i32 %i_addr.3210.0.i.i ; <i32*> [#uses=2]
+ %tmp160.i.i = load i32, i32* %tmp159.i.i ; <i32> [#uses=1]
%tmp161.i.i = add i32 %tmp160.i.i, 1 ; <i32> [#uses=1]
store i32 %tmp161.i.i, i32* %tmp159.i.i
%tmp166.i.i = add i32 %i_addr.3210.0.i.i, 1 ; <i32> [#uses=2]
- %tmp168.i.i = load i32* @numi ; <i32> [#uses=1]
+ %tmp168.i.i = load i32, i32* @numi ; <i32> [#uses=1]
icmp slt i32 %tmp166.i.i, %tmp168.i.i ; <i1>:0 [#uses=1]
br i1 %0, label %bb115.i.i.bb115.i.i_crit_edge, label %bb115.i.i.bb170.i.i_crit_edge.exitStub
}
diff --git a/test/CodeGen/ARM/coalesce-dbgvalue.ll b/test/CodeGen/ARM/coalesce-dbgvalue.ll
index 4e5fb5e5c60f..cd2ab257207a 100644
--- a/test/CodeGen/ARM/coalesce-dbgvalue.ll
+++ b/test/CodeGen/ARM/coalesce-dbgvalue.ll
@@ -27,11 +27,11 @@ for.cond1: ; preds = %for.end9, %for.cond
for.body2: ; preds = %for.cond1
store i32 %storemerge11, i32* @b, align 4, !dbg !26
- tail call void @llvm.dbg.value(metadata i32* null, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !28
- %0 = load i64* @a, align 8, !dbg !29
+ tail call void @llvm.dbg.value(metadata i32* null, i64 0, metadata !11, metadata !DIExpression()), !dbg !28
+ %0 = load i64, i64* @a, align 8, !dbg !29
%xor = xor i64 %0, %e.1.ph, !dbg !29
%conv3 = trunc i64 %xor to i32, !dbg !29
- tail call void @llvm.dbg.value(metadata i32 %conv3, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !29
+ tail call void @llvm.dbg.value(metadata i32 %conv3, i64 0, metadata !10, metadata !DIExpression()), !dbg !29
%tobool4 = icmp eq i32 %conv3, 0, !dbg !29
br i1 %tobool4, label %land.end, label %land.rhs, !dbg !29
@@ -44,7 +44,7 @@ land.end: ; preds = %land.rhs, %for.body
%1 = phi i1 [ false, %for.body2 ], [ %tobool5, %land.rhs ]
%land.ext = zext i1 %1 to i32
%call6 = tail call i32 bitcast (i32 (...)* @fn2 to i32 (i32, i32*)*)(i32 %land.ext, i32* null) #3
- %2 = load i32* @b, align 4, !dbg !26
+ %2 = load i32, i32* @b, align 4, !dbg !26
%inc8 = add nsw i32 %2, 1, !dbg !26
%phitmp = and i64 %xor, 4294967295, !dbg !26
br label %for.cond1.outer, !dbg !26
@@ -52,7 +52,7 @@ land.end: ; preds = %land.rhs, %for.body
for.cond1.outer: ; preds = %land.end, %for.cond1.preheader
%storemerge11.ph = phi i32 [ %inc8, %land.end ], [ 0, %for.cond1.preheader ]
%e.1.ph = phi i64 [ %phitmp, %land.end ], [ 0, %for.cond1.preheader ]
- %3 = load i32* @d, align 4, !dbg !31
+ %3 = load i32, i32* @d, align 4, !dbg !31
%tobool10 = icmp eq i32 %3, 0, !dbg !31
br label %for.cond1
@@ -79,33 +79,33 @@ attributes #3 = { nounwind }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!33}
-!0 = !{!"0x11\0012\00clang version 3.4 (trunk 182024) (llvm/trunk 182023)\001\00\000\00\000", !1, !2, !2, !3, !15, !2} ; [ DW_TAG_compile_unit ] [/d/b/pr16110.c] [DW_LANG_C99]
-!1 = !{!"pr16110.c", !"/d/b"}
+!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 182024) (llvm/trunk 182023)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !15, imports: !2)
+!1 = !DIFile(filename: "pr16110.c", directory: "/d/b")
!2 = !{}
!3 = !{!4}
-!4 = !{!"0x2e\00pr16110\00pr16110\00\007\000\001\000\006\000\001\007", !1, !5, !6, null, i32 ()* @pr16110, null, null, !9} ; [ DW_TAG_subprogram ] [line 7] [def] [pr16110]
-!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [/d/b/pr16110.c]
-!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !DISubprogram(name: "pr16110", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 7, file: !1, scope: !5, type: !6, function: i32 ()* @pr16110, variables: !9)
+!5 = !DIFile(filename: "pr16110.c", directory: "/d/b")
+!6 = !DISubroutineType(types: !7)
!7 = !{!8}
-!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !{!10, !11}
-!10 = !{!"0x100\00e\008\000", !4, !5, !8} ; [ DW_TAG_auto_variable ] [e] [line 8]
-!11 = !{!"0x100\00f\0013\000", !12, !5, !14} ; [ DW_TAG_auto_variable ] [f] [line 13]
-!12 = !{!"0xb\0012\000\002", !1, !13} ; [ DW_TAG_lexical_block ] [/d/b/pr16110.c]
-!13 = !{!"0xb\0012\000\001", !1, !4} ; [ DW_TAG_lexical_block ] [/d/b/pr16110.c]
-!14 = !{!"0xf\00\000\0032\0032\000\000", null, null, !8} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from int]
+!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "e", line: 8, scope: !4, file: !5, type: !8)
+!11 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "f", line: 13, scope: !12, file: !5, type: !14)
+!12 = distinct !DILexicalBlock(line: 12, column: 0, file: !1, scope: !13)
+!13 = distinct !DILexicalBlock(line: 12, column: 0, file: !1, scope: !4)
+!14 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !8)
!15 = !{!16, !18, !19, !20}
-!16 = !{!"0x34\00a\00a\00\001\000\001", null, !5, !17, i64* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
-!17 = !{!"0x24\00long long int\000\0064\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [long long int] [line 0, size 64, align 32, offset 0, enc DW_ATE_signed]
-!18 = !{!"0x34\00b\00b\00\002\000\001", null, !5, !8, i32* @b, null} ; [ DW_TAG_variable ] [b] [line 2] [def]
-!19 = !{!"0x34\00c\00c\00\003\000\001", null, !5, !8, i32* @c, null} ; [ DW_TAG_variable ] [c] [line 3] [def]
-!20 = !{!"0x34\00d\00d\00\004\000\001", null, !5, !8, i32* @d, null} ; [ DW_TAG_variable ] [d] [line 4] [def]
-!21 = !MDLocation(line: 10, scope: !22)
-!22 = !{!"0xb\0010\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [/d/b/pr16110.c]
-!26 = !MDLocation(line: 12, scope: !13)
+!16 = !DIGlobalVariable(name: "a", line: 1, isLocal: false, isDefinition: true, scope: null, file: !5, type: !17, variable: i64* @a)
+!17 = !DIBasicType(tag: DW_TAG_base_type, name: "long long int", size: 64, align: 32, encoding: DW_ATE_signed)
+!18 = !DIGlobalVariable(name: "b", line: 2, isLocal: false, isDefinition: true, scope: null, file: !5, type: !8, variable: i32* @b)
+!19 = !DIGlobalVariable(name: "c", line: 3, isLocal: false, isDefinition: true, scope: null, file: !5, type: !8, variable: i32* @c)
+!20 = !DIGlobalVariable(name: "d", line: 4, isLocal: false, isDefinition: true, scope: null, file: !5, type: !8, variable: i32* @d)
+!21 = !DILocation(line: 10, scope: !22)
+!22 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !4)
+!26 = !DILocation(line: 12, scope: !13)
!27 = !{i32* null}
-!28 = !MDLocation(line: 13, scope: !12)
-!29 = !MDLocation(line: 14, scope: !12)
-!31 = !MDLocation(line: 16, scope: !4)
-!32 = !MDLocation(line: 18, scope: !4)
-!33 = !{i32 1, !"Debug Info Version", i32 2}
+!28 = !DILocation(line: 13, scope: !12)
+!29 = !DILocation(line: 14, scope: !12)
+!31 = !DILocation(line: 16, scope: !4)
+!32 = !DILocation(line: 18, scope: !4)
+!33 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll
index e7bd5f41bb4b..72fefeacfc5b 100644
--- a/test/CodeGen/ARM/coalesce-subregs.ll
+++ b/test/CodeGen/ARM/coalesce-subregs.ll
@@ -16,7 +16,7 @@ entry:
%0 = bitcast float* %p to i8*
%vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %0, i32 4)
%vld221 = extractvalue { <4 x float>, <4 x float> } %vld2, 1
- %add.ptr = getelementptr inbounds float* %p, i32 8
+ %add.ptr = getelementptr inbounds float, float* %p, i32 8
%1 = bitcast float* %add.ptr to i8*
tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %vld221, <4 x float> undef, i32 4)
ret void
@@ -29,7 +29,7 @@ entry:
%0 = bitcast float* %p to i8*
%vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %0, i32 4)
%vld221 = extractvalue { <4 x float>, <4 x float> } %vld2, 1
- %add.ptr = getelementptr inbounds float* %p, i32 8
+ %add.ptr = getelementptr inbounds float, float* %p, i32 8
%1 = bitcast float* %add.ptr to i8*
%vld22 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4)
%vld2215 = extractvalue { <4 x float>, <4 x float> } %vld22, 0
@@ -50,7 +50,7 @@ do.body: ; preds = %do.body, %entry
%qq0.0.1.0 = phi <4 x float> [ %vld224, %entry ], [ %vld2216, %do.body ]
%c.addr.0 = phi i32 [ %c, %entry ], [ %dec, %do.body ]
%p.addr.0 = phi float* [ %p, %entry ], [ %add.ptr, %do.body ]
- %add.ptr = getelementptr inbounds float* %p.addr.0, i32 8
+ %add.ptr = getelementptr inbounds float, float* %p.addr.0, i32 8
%1 = bitcast float* %add.ptr to i8*
%vld22 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4)
%vld2215 = extractvalue { <4 x float>, <4 x float> } %vld22, 0
@@ -85,29 +85,29 @@ declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounw
; CHECK-NOT: vorr
define void @f3(float* %p, float* %q) nounwind ssp {
entry:
- %arrayidx = getelementptr inbounds float* %p, i32 3
- %0 = load float* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds float, float* %p, i32 3
+ %0 = load float, float* %arrayidx, align 4
%vecins = insertelement <2 x float> undef, float %0, i32 1
%tobool = icmp eq float* %q, null
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
- %1 = load float* %q, align 4
- %arrayidx2 = getelementptr inbounds float* %q, i32 1
- %2 = load float* %arrayidx2, align 4
+ %1 = load float, float* %q, align 4
+ %arrayidx2 = getelementptr inbounds float, float* %q, i32 1
+ %2 = load float, float* %arrayidx2, align 4
%add = fadd float %1, %2
%vecins3 = insertelement <2 x float> %vecins, float %add, i32 0
br label %if.end
if.else: ; preds = %entry
- %arrayidx4 = getelementptr inbounds float* %p, i32 2
- %3 = load float* %arrayidx4, align 4
+ %arrayidx4 = getelementptr inbounds float, float* %p, i32 2
+ %3 = load float, float* %arrayidx4, align 4
%vecins5 = insertelement <2 x float> %vecins, float %3, i32 0
br label %if.end
if.end: ; preds = %if.else, %if.then
%x.0 = phi <2 x float> [ %vecins3, %if.then ], [ %vecins5, %if.else ]
- %add.ptr = getelementptr inbounds float* %p, i32 4
+ %add.ptr = getelementptr inbounds float, float* %p, i32 4
%4 = bitcast float* %add.ptr to i8*
tail call void @llvm.arm.neon.vst1.v2f32(i8* %4, <2 x float> %x.0, i32 4)
ret void
@@ -129,9 +129,9 @@ entry:
br i1 %tobool, label %if.end, label %if.then
if.then: ; preds = %entry
- %1 = load float* %q, align 4
- %arrayidx1 = getelementptr inbounds float* %q, i32 1
- %2 = load float* %arrayidx1, align 4
+ %1 = load float, float* %q, align 4
+ %arrayidx1 = getelementptr inbounds float, float* %q, i32 1
+ %2 = load float, float* %arrayidx1, align 4
%add = fadd float %1, %2
%vecins = insertelement <2 x float> %vld1, float %add, i32 1
br label %if.end
@@ -164,13 +164,13 @@ entry:
br i1 %tobool, label %if.end, label %if.then
if.then: ; preds = %entry
- %arrayidx = getelementptr inbounds float* %q, i32 1
- %1 = load float* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds float, float* %q, i32 1
+ %1 = load float, float* %arrayidx, align 4
%add4 = fadd float %vecext, %1
- %2 = load float* %q, align 4
+ %2 = load float, float* %q, align 4
%add6 = fadd float %vecext1, %2
- %arrayidx7 = getelementptr inbounds float* %q, i32 2
- %3 = load float* %arrayidx7, align 4
+ %arrayidx7 = getelementptr inbounds float, float* %q, i32 2
+ %3 = load float, float* %arrayidx7, align 4
%add8 = fadd float %vecext2, %3
br label %if.end
@@ -231,7 +231,7 @@ bb3: ; preds = %bb12, %bb
br i1 undef, label %bb10, label %bb12
bb10: ; preds = %bb3
- %tmp11 = load <4 x float>* undef, align 8
+ %tmp11 = load <4 x float>, <4 x float>* undef, align 8
br label %bb12
bb12: ; preds = %bb10, %bb3
@@ -293,7 +293,6 @@ bb:
; CHECK: adjustCopiesBackFrom
; The shuffle in if.else3 must be preserved even though adjustCopiesBackFrom
; is tempted to remove it.
-; CHECK: %if.else3
; CHECK: vorr d
define internal void @adjustCopiesBackFrom(<2 x i64>* noalias nocapture sret %agg.result, <2 x i64> %in) {
entry:
@@ -334,7 +333,7 @@ for.body: ; preds = %for.end, %entry
br i1 undef, label %for.body29, label %for.end
for.body29: ; preds = %for.body29, %for.body
- %0 = load <2 x double>* null, align 1
+ %0 = load <2 x double>, <2 x double>* null, align 1
%splat40 = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> zeroinitializer
%mul41 = fmul <2 x double> undef, %splat40
%add42 = fadd <2 x double> undef, %mul41
diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll
index 70d85c91c8ca..bf5cf52d8b54 100644
--- a/test/CodeGen/ARM/code-placement.ll
+++ b/test/CodeGen/ARM/code-placement.ll
@@ -18,8 +18,8 @@ bb:
; CHECK: bx lr
%list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
%next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
- %1 = getelementptr inbounds %struct.list_head* %list_addr.05, i32 0, i32 0
- %2 = load %struct.list_head** %1, align 4
+ %1 = getelementptr inbounds %struct.list_head, %struct.list_head* %list_addr.05, i32 0, i32 0
+ %2 = load %struct.list_head*, %struct.list_head** %1, align 4
store %struct.list_head* %next.04, %struct.list_head** %1, align 4
%3 = icmp eq %struct.list_head* %2, null
br i1 %3, label %bb2, label %bb
@@ -45,8 +45,8 @@ bb1: ; preds = %bb2.preheader, %bb1
%indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2]
%sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]
%tmp17 = sub i32 %i.07, %indvar ; <i32> [#uses=1]
- %scevgep = getelementptr i32* %src, i32 %tmp17 ; <i32*> [#uses=1]
- %1 = load i32* %scevgep, align 4 ; <i32> [#uses=1]
+ %scevgep = getelementptr i32, i32* %src, i32 %tmp17 ; <i32*> [#uses=1]
+ %1 = load i32, i32* %scevgep, align 4 ; <i32> [#uses=1]
%2 = add nsw i32 %1, %sum.08 ; <i32> [#uses=2]
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next, %size ; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/combine-movc-sub.ll b/test/CodeGen/ARM/combine-movc-sub.ll
new file mode 100644
index 000000000000..6a5f7bdd38e9
--- /dev/null
+++ b/test/CodeGen/ARM/combine-movc-sub.ll
@@ -0,0 +1,63 @@
+; RUN: llc %s -o - -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7s-apple-unknown"
+
+; The subtract instruction %3 will be optimized (combined and predicated) with the select
+; inside the loop. In this case, the kill flag on the subtract should be removed or else
+; it will fail verification.
+
+%struct.PROOFSEARCH_HELP = type { %struct.LIST_HELP*, %struct.LIST_HELP*, %struct.LIST_HELP*, %struct.LIST_HELP*, %struct.SHARED_INDEX_NODE*, %struct.LIST_HELP*, %struct.SHARED_INDEX_NODE*, %struct.LIST_HELP*, %struct.SORTTHEORY_HELP*, %struct.SORTTHEORY_HELP*, %struct.SORTTHEORY_HELP*, %struct.SHARED_INDEX_NODE*, %struct.LIST_HELP*, i32*, i32*, %struct.LIST_HELP*, i32, i32, i32, i32, i32, i32, i32, i32 }
+%struct.SORTTHEORY_HELP = type { %struct.st*, [4000 x %struct.NODE_HELP*], %struct.LIST_HELP*, %struct.LIST_HELP*, i32 }
+%struct.st = type { %struct.subst*, %struct.LIST_HELP*, %struct.LIST_HELP*, i16, i16 }
+%struct.subst = type { %struct.subst*, i32, %struct.term* }
+%struct.term = type { i32, %union.anon, %struct.LIST_HELP*, i32, i32 }
+%union.anon = type { %struct.LIST_HELP* }
+%struct.NODE_HELP = type { %struct.LIST_HELP*, i32, i32, i32, %struct.LIST_HELP*, i32 }
+%struct.SHARED_INDEX_NODE = type { %struct.st*, [3001 x %struct.term*], [4000 x %struct.term*], i32 }
+%struct.LIST_HELP = type { %struct.LIST_HELP*, i8* }
+%struct.CLAUSE_HELP = type { i32, i32, i32, i32, i32*, i32, %struct.LIST_HELP*, %struct.LIST_HELP*, i32, i32, %struct.LITERAL_HELP**, i32, i32, i32, i32 }
+%struct.LITERAL_HELP = type { i32, i32, i32, %struct.CLAUSE_HELP*, %struct.term* }
+
+declare void @foo(%struct.PROOFSEARCH_HELP*, %struct.CLAUSE_HELP*)
+
+; CHECK-LABEL: @test
+; CHECK: it
+; CHECK-NEXT: sub
+
+define hidden fastcc %struct.LIST_HELP* @test(%struct.PROOFSEARCH_HELP* %Search, %struct.LIST_HELP* %ClauseList, i32 %Level, %struct.LIST_HELP** nocapture %New) {
+entry:
+ %cmp4.i.i = icmp ugt i32 %Level, 31
+ %0 = add i32 %Level, -32
+ %1 = lshr i32 %0, 5
+ %2 = shl nuw i32 %1, 5
+ %3 = sub i32 %0, %2
+ %4 = add nuw nsw i32 %1, 1
+ br label %for.body
+
+for.body: ; preds = %for.inc, %entry
+ %Scan.038 = phi %struct.LIST_HELP* [ %ClauseList, %entry ], [ %9, %for.inc ]
+ %car.i33 = getelementptr inbounds %struct.LIST_HELP, %struct.LIST_HELP* %Scan.038, i32 0, i32 1
+ %5 = bitcast i8** %car.i33 to %struct.CLAUSE_HELP**
+ %6 = load %struct.CLAUSE_HELP*, %struct.CLAUSE_HELP** %5, align 4
+ %. = add i32 %4, 10
+ %.Level = select i1 %cmp4.i.i, i32 %3, i32 %Level
+ %splitfield.i = getelementptr inbounds %struct.CLAUSE_HELP, %struct.CLAUSE_HELP* %6, i32 0, i32 4
+ %7 = load i32*, i32** %splitfield.i, align 4
+ %arrayidx.i = getelementptr inbounds i32, i32* %7, i32 %.
+ %8 = load i32, i32* %arrayidx.i, align 4
+ %shl.i = shl i32 1, %.Level
+ %and.i = and i32 %8, %shl.i
+ %cmp4.i = icmp eq i32 %and.i, 0
+ br i1 %cmp4.i, label %for.inc, label %if.then
+
+if.then: ; preds = %for.body
+ tail call void @foo(%struct.PROOFSEARCH_HELP* %Search, %struct.CLAUSE_HELP* %6)
+ store i8* null, i8** %car.i33, align 4
+ br label %for.inc
+
+for.inc: ; preds = %if.then, %for.body
+ %cdr.i = getelementptr inbounds %struct.LIST_HELP, %struct.LIST_HELP* %Scan.038, i32 0, i32 0
+ %9 = load %struct.LIST_HELP*, %struct.LIST_HELP** %cdr.i, align 4
+ br label %for.body
+}
diff --git a/test/CodeGen/ARM/commute-movcc.ll b/test/CodeGen/ARM/commute-movcc.ll
index fbc25b45b6ff..2978d317ad78 100644
--- a/test/CodeGen/ARM/commute-movcc.ll
+++ b/test/CodeGen/ARM/commute-movcc.ll
@@ -31,8 +31,8 @@ for.body: ; preds = %entry, %if.end8
%i.012 = phi i32 [ 0, %entry ], [ %inc, %if.end8 ]
%BestCost.011 = phi i32 [ -1, %entry ], [ %BestCost.1, %if.end8 ]
%BestIdx.010 = phi i32 [ 0, %entry ], [ %BestIdx.1, %if.end8 ]
- %arrayidx = getelementptr inbounds i32* %a, i32 %i.012
- %0 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.012
+ %0 = load i32, i32* %arrayidx, align 4
%mul = mul i32 %0, %0
%sub = add nsw i32 %i.012, -5
%cmp2 = icmp eq i32 %sub, %Pref
diff --git a/test/CodeGen/ARM/compare-call.ll b/test/CodeGen/ARM/compare-call.ll
index 323eb1f2dd3c..f45ed73adb71 100644
--- a/test/CodeGen/ARM/compare-call.ll
+++ b/test/CodeGen/ARM/compare-call.ll
@@ -2,14 +2,14 @@
define void @test3(float* %glob, i32 %X) {
entry:
- %tmp = load float* %glob ; <float> [#uses=1]
- %tmp2 = getelementptr float* %glob, i32 2 ; <float*> [#uses=1]
- %tmp3 = load float* %tmp2 ; <float> [#uses=1]
+ %tmp = load float, float* %glob ; <float> [#uses=1]
+ %tmp2 = getelementptr float, float* %glob, i32 2 ; <float*> [#uses=1]
+ %tmp3 = load float, float* %tmp2 ; <float> [#uses=1]
%tmp.upgrd.1 = fcmp ogt float %tmp, %tmp3 ; <i1> [#uses=1]
br i1 %tmp.upgrd.1, label %cond_true, label %UnifiedReturnBlock
cond_true: ; preds = %entry
- %tmp.upgrd.2 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp.upgrd.2 = tail call i32 (...) @bar( ) ; <i32> [#uses=0]
ret void
UnifiedReturnBlock: ; preds = %entry
diff --git a/test/CodeGen/ARM/copy-paired-reg.ll b/test/CodeGen/ARM/copy-paired-reg.ll
index 17a4461c682b..453fac4b1504 100644
--- a/test/CodeGen/ARM/copy-paired-reg.ll
+++ b/test/CodeGen/ARM/copy-paired-reg.ll
@@ -11,7 +11,7 @@ define void @f() {
store atomic i64 0, i64* %c seq_cst, align 8
store atomic i64 0, i64* %d seq_cst, align 8
- %e = load atomic i64* %d seq_cst, align 8
+ %e = load atomic i64, i64* %d seq_cst, align 8
ret void
}
diff --git a/test/CodeGen/ARM/crash-O0.ll b/test/CodeGen/ARM/crash-O0.ll
index 8855bb99aaf9..f92af999be51 100644
--- a/test/CodeGen/ARM/crash-O0.ll
+++ b/test/CodeGen/ARM/crash-O0.ll
@@ -21,8 +21,8 @@ entry:
%letter = alloca i8 ; <i8*> [#uses=0]
%prodvers = alloca [256 x i8] ; <[256 x i8]*> [#uses=1]
%buildver = alloca [256 x i8] ; <[256 x i8]*> [#uses=0]
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* getelementptr inbounds ([256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* getelementptr inbounds ([256 x i8], [256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false)
%prodvers2 = bitcast [256 x i8]* %prodvers to i8* ; <i8*> [#uses=1]
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %prodvers2, i8* getelementptr inbounds ([256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %prodvers2, i8* getelementptr inbounds ([256 x i8], [256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false)
unreachable
}
diff --git a/test/CodeGen/ARM/crash-greedy-v6.ll b/test/CodeGen/ARM/crash-greedy-v6.ll
index e165dbdf087a..287c081ac5ec 100644
--- a/test/CodeGen/ARM/crash-greedy-v6.ll
+++ b/test/CodeGen/ARM/crash-greedy-v6.ll
@@ -32,13 +32,13 @@ for.body: ; preds = %for.body, %for.body
; SOURCE-SCHED: cmp
; SOURCE-SCHED: bne
%i.031 = phi i32 [ 0, %for.body.lr.ph ], [ %0, %for.body ]
- %arrayidx11 = getelementptr float* %t, i32 %i.031
- %arrayidx15 = getelementptr float* %u, i32 %i.031
- %arrayidx19 = getelementptr i8* %red, i32 %i.031
- %arrayidx22 = getelementptr i8* %green, i32 %i.031
- %arrayidx25 = getelementptr i8* %blue, i32 %i.031
- %arrayidx28 = getelementptr i8* %alpha, i32 %i.031
- %tmp12 = load float* %arrayidx11, align 4
+ %arrayidx11 = getelementptr float, float* %t, i32 %i.031
+ %arrayidx15 = getelementptr float, float* %u, i32 %i.031
+ %arrayidx19 = getelementptr i8, i8* %red, i32 %i.031
+ %arrayidx22 = getelementptr i8, i8* %green, i32 %i.031
+ %arrayidx25 = getelementptr i8, i8* %blue, i32 %i.031
+ %arrayidx28 = getelementptr i8, i8* %alpha, i32 %i.031
+ %tmp12 = load float, float* %arrayidx11, align 4
tail call fastcc void @sample_3d_nearest(i8* %tObj, i8* undef, float undef, float %tmp12, float undef, i8* %arrayidx19, i8* %arrayidx22, i8* %arrayidx25, i8* %arrayidx28)
%0 = add i32 %i.031, 1
%exitcond = icmp eq i32 %0, %n
diff --git a/test/CodeGen/ARM/crash.ll b/test/CodeGen/ARM/crash.ll
index 4e3e2010b07a..3b01d8113b9c 100644
--- a/test/CodeGen/ARM/crash.ll
+++ b/test/CodeGen/ARM/crash.ll
@@ -5,7 +5,7 @@
define void @func() nounwind {
entry:
- %tmp = load i32* undef, align 4
+ %tmp = load i32, i32* undef, align 4
br label %bb1
bb1:
@@ -17,7 +17,7 @@ bb1:
bb2:
%tmp120 = add i32 %tmp119, 0
- %scevgep810.i = getelementptr %struct.foo* null, i32 %tmp120, i32 1
+ %scevgep810.i = getelementptr %struct.foo, %struct.foo* null, i32 %tmp120, i32 1
store i32 undef, i32* %scevgep810.i, align 4
br i1 undef, label %bb2, label %bb3
diff --git a/test/CodeGen/ARM/cse-call.ll b/test/CodeGen/ARM/cse-call.ll
index eff5de5a2428..4e61a3495b80 100644
--- a/test/CodeGen/ARM/cse-call.ll
+++ b/test/CodeGen/ARM/cse-call.ll
@@ -23,8 +23,8 @@ while.body37: ; preds = %while.body37, %entr
br i1 false, label %while.end42, label %while.body37
while.end42: ; preds = %while.body37, %entry
- %. = select i1 undef, i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man1, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man2, i32 0, i32 0)
- %.92 = select i1 undef, i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man2, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man1, i32 0, i32 0)
+ %. = select i1 undef, i8* getelementptr inbounds ([200 x i8], [200 x i8]* @F_floatmul.man1, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8], [200 x i8]* @F_floatmul.man2, i32 0, i32 0)
+ %.92 = select i1 undef, i8* getelementptr inbounds ([200 x i8], [200 x i8]* @F_floatmul.man2, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8], [200 x i8]* @F_floatmul.man1, i32 0, i32 0)
tail call void bitcast (void (...)* @S_trimzeros to void (i8*)*)(i8* %.92) nounwind
%call47 = tail call i32 @strlen(i8* %.) nounwind
unreachable
diff --git a/test/CodeGen/ARM/cse-ldrlit.ll b/test/CodeGen/ARM/cse-ldrlit.ll
index 3f5d4c2e3c29..4f369d0a78ea 100644
--- a/test/CodeGen/ARM/cse-ldrlit.ll
+++ b/test/CodeGen/ARM/cse-ldrlit.ll
@@ -9,11 +9,11 @@
declare void @bar(i32*)
define void @foo() {
- %flag = load i32* getelementptr inbounds([16 x i32]* @var, i32 0, i32 1)
+ %flag = load i32, i32* getelementptr inbounds([16 x i32], [16 x i32]* @var, i32 0, i32 1)
%tst = icmp eq i32 %flag, 0
br i1 %tst, label %true, label %false
true:
- tail call void @bar(i32* getelementptr inbounds([16 x i32]* @var, i32 0, i32 4))
+ tail call void @bar(i32* getelementptr inbounds([16 x i32], [16 x i32]* @var, i32 0, i32 4))
ret void
false:
ret void
diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll
index 4f5b7592c844..1255ec5a78f1 100644
--- a/test/CodeGen/ARM/cse-libcalls.ll
+++ b/test/CodeGen/ARM/cse-libcalls.ll
@@ -10,7 +10,7 @@ target triple = "i386-apple-darwin8"
define double @u_f_nonbon(double %lambda) nounwind {
entry:
- %tmp19.i.i = load double* null, align 4 ; <double> [#uses=2]
+ %tmp19.i.i = load double, double* null, align 4 ; <double> [#uses=2]
%tmp6.i = fcmp olt double %tmp19.i.i, 1.000000e+00 ; <i1> [#uses=1]
%dielectric.0.i = select i1 %tmp6.i, double 1.000000e+00, double %tmp19.i.i ; <double> [#uses=1]
%tmp10.i4 = fdiv double 0x4074C2D71F36262D, %dielectric.0.i ; <double> [#uses=1]
diff --git a/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll b/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
index 8950abdef6a3..8b7153503b1f 100644
--- a/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
+++ b/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
@@ -2,10 +2,10 @@
; CHECK-LABEL: f:
define float @f(<4 x i16>* nocapture %in) {
- ; CHECK: vldr
+ ; CHECK: vld1
; CHECK: vmovl.u16
; CHECK-NOT: vand
- %1 = load <4 x i16>* %in
+ %1 = load <4 x i16>, <4 x i16>* %in
; CHECK: vcvt.f32.u32
%2 = uitofp <4 x i16> %1 to <4 x float>
%3 = extractelement <4 x float> %2, i32 0
@@ -21,7 +21,7 @@ define float @f(<4 x i16>* nocapture %in) {
define float @g(<4 x i16>* nocapture %in) {
; CHECK: vldr
- %1 = load <4 x i16>* %in
+ %1 = load <4 x i16>, <4 x i16>* %in
; CHECK-NOT: uxth
%2 = extractelement <4 x i16> %1, i32 0
; CHECK: vcvt.f32.u32
diff --git a/test/CodeGen/ARM/darwin-section-order.ll b/test/CodeGen/ARM/darwin-section-order.ll
deleted file mode 100644
index 701028c0a537..000000000000
--- a/test/CodeGen/ARM/darwin-section-order.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
-
-; CHECK: .section __TEXT,__text,regular,pure_instructions
-; CHECK: .section __TEXT,myprecious
-; CHECK: .section __TEXT,__textcoal_nt,coalesced,pure_instructions
-; CHECK: .section __TEXT,__const_coal,coalesced
-; CHECK: .section __TEXT,__picsymbolstub4,symbol_stubs,none,16
-; CHECK: .section __TEXT,__StaticInit,regular,pure_instructions
-
-
-define void @normal() nounwind readnone {
-; CHECK: .section __TEXT,__text,regular,pure_instructions
-; CHECK: _normal:
- ret void
-}
-
-define void @special() nounwind readnone section "__TEXT,myprecious" {
-; CHECK: .section __TEXT,myprecious
-; CHECK: _special:
- ret void
-}
diff --git a/test/CodeGen/ARM/debug-frame-large-stack.ll b/test/CodeGen/ARM/debug-frame-large-stack.ll
index 1addf639bfe4..1f814e70d543 100644
--- a/test/CodeGen/ARM/debug-frame-large-stack.ll
+++ b/test/CodeGen/ARM/debug-frame-large-stack.ll
@@ -48,7 +48,7 @@ define i32 @test3() {
%tmp = alloca i32, align 4
%a = alloca [805306369 x i8], align 16
store i32 0, i32* %tmp
- %tmp1 = load i32* %tmp
+ %tmp1 = load i32, i32* %tmp
ret i32 %tmp1
}
diff --git a/test/CodeGen/ARM/debug-frame-vararg.ll b/test/CodeGen/ARM/debug-frame-vararg.ll
index 05521d80646c..19e55fe02354 100644
--- a/test/CodeGen/ARM/debug-frame-vararg.ll
+++ b/test/CodeGen/ARM/debug-frame-vararg.ll
@@ -25,90 +25,88 @@
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = !{!"0x11\0012\00clang version 3.5 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/var.c] [DW_LANG_C99]
-!1 = !{!"var.c", !"/tmp"}
+!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "var.c", directory: "/tmp")
!2 = !{}
!3 = !{!4}
-!4 = !{!"0x2e\00sum\00sum\00\005\000\001\000\006\00256\000\005", !1, !5, !6, null, i32 (i32, ...)* @sum, null, null, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [sum]
-!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [/tmp/var.c]
-!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !DISubprogram(name: "sum", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, function: i32 (i32, ...)* @sum, variables: !2)
+!5 = !DIFile(filename: "var.c", directory: "/tmp")
+!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8}
-!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !{i32 2, !"Dwarf Version", i32 4}
-!10 = !{i32 1, !"Debug Info Version", i32 2}
+!10 = !{i32 1, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.5 "}
-!12 = !{!"0x101\00count\0016777221\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [count] [line 5]
-!13 = !MDLocation(line: 5, scope: !4)
-!14 = !{!"0x100\00vl\006\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [vl] [line 6]
-!15 = !{!"0x16\00va_list\0030\000\000\000\000", !16, null, !17} ; [ DW_TAG_typedef ] [va_list] [line 30, size 0, align 0, offset 0] [from __builtin_va_list]
-!16 = !{!"/linux-x86_64-high/gcc_4.7.2/dbg/llvm/bin/../lib/clang/3.5/include/stdarg.h", !"/tmp"}
-!17 = !{!"0x16\00__builtin_va_list\006\000\000\000\000", !1, null, !18} ; [ DW_TAG_typedef ] [__builtin_va_list] [line 6, size 0, align 0, offset 0] [from __va_list]
-!18 = !{!"0x13\00__va_list\006\0032\0032\000\000\000", !1, null, null, !19, null, null, null} ; [ DW_TAG_structure_type ] [__va_list] [line 6, size 32, align 32, offset 0] [def] [from ]
+!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "count", line: 5, arg: 1, scope: !4, file: !5, type: !8)
+!13 = !DILocation(line: 5, scope: !4)
+!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vl", line: 6, scope: !4, file: !5, type: !15)
+!15 = !DIDerivedType(tag: DW_TAG_typedef, name: "va_list", line: 30, file: !16, baseType: !17)
+!16 = !DIFile(filename: "/linux-x86_64-high/gcc_4.7.2/dbg/llvm/bin/../lib/clang/3.5/include/stdarg.h", directory: "/tmp")
+!17 = !DIDerivedType(tag: DW_TAG_typedef, name: "__builtin_va_list", line: 6, file: !1, baseType: !18)
+!18 = !DICompositeType(tag: DW_TAG_structure_type, name: "__va_list", line: 6, size: 32, align: 32, file: !1, elements: !19)
!19 = !{!20}
-!20 = !{!"0xd\00__ap\006\0032\0032\000\000", !1, !18, !21} ; [ DW_TAG_member ] [__ap] [line 6, size 32, align 32, offset 0] [from ]
-!21 = !{!"0xf\00\000\0032\0032\000\000", null, null, null} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from ]
-!22 = !MDLocation(line: 6, scope: !4)
-!23 = !MDLocation(line: 7, scope: !4)
-!24 = !{!"0x100\00sum\008\000", !4, !5, !8} ; [ DW_TAG_auto_variable ] [sum] [line 8]
-!25 = !MDLocation(line: 8, scope: !4)
-!26 = !{!"0x100\00i\009\000", !27, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 9]
-!27 = !{!"0xb\009\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [/tmp/var.c]
-!28 = !MDLocation(line: 9, scope: !27)
-!29 = !MDLocation(line: 10, scope: !30)
-!30 = !{!"0xb\009\000\001", !1, !27} ; [ DW_TAG_lexical_block ] [/tmp/var.c]
-!31 = !MDLocation(line: 11, scope: !30)
-!32 = !MDLocation(line: 12, scope: !4)
-!33 = !MDLocation(line: 13, scope: !4)
+!20 = !DIDerivedType(tag: DW_TAG_member, name: "__ap", line: 6, size: 32, align: 32, file: !1, scope: !18, baseType: !21)
+!21 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: null)
+!22 = !DILocation(line: 6, scope: !4)
+!23 = !DILocation(line: 7, scope: !4)
+!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "sum", line: 8, scope: !4, file: !5, type: !8)
+!25 = !DILocation(line: 8, scope: !4)
+!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 9, scope: !27, file: !5, type: !8)
+!27 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !4)
+!28 = !DILocation(line: 9, scope: !27)
+!29 = !DILocation(line: 10, scope: !30)
+!30 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !27)
+!31 = !DILocation(line: 11, scope: !30)
+!32 = !DILocation(line: 12, scope: !4)
+!33 = !DILocation(line: 13, scope: !4)
; CHECK-FP-LABEL: sum
; CHECK-FP: .cfi_startproc
-; CHECK-FP: sub sp, sp, #16
-; CHECK-FP: .cfi_def_cfa_offset 16
+; CHECK-FP: sub sp, sp, #12
+; CHECK-FP: .cfi_def_cfa_offset 12
; CHECK-FP: push {r4, lr}
+; CHECK-FP: .cfi_def_cfa_offset 20
+; CHECK-FP: .cfi_offset lr, -16
+; CHECK-FP: .cfi_offset r4, -20
+; CHECK-FP: sub sp, sp, #4
; CHECK-FP: .cfi_def_cfa_offset 24
-; CHECK-FP: .cfi_offset lr, -20
-; CHECK-FP: .cfi_offset r4, -24
-; CHECK-FP: sub sp, sp, #8
-; CHECK-FP: .cfi_def_cfa_offset 32
; CHECK-FP-ELIM-LABEL: sum
; CHECK-FP-ELIM: .cfi_startproc
-; CHECK-FP-ELIM: sub sp, sp, #16
-; CHECK-FP-ELIM: .cfi_def_cfa_offset 16
+; CHECK-FP-ELIM: sub sp, sp, #12
+; CHECK-FP-ELIM: .cfi_def_cfa_offset 12
; CHECK-FP-ELIM: push {r4, r10, r11, lr}
-; CHECK-FP-ELIM: .cfi_def_cfa_offset 32
-; CHECK-FP-ELIM: .cfi_offset lr, -20
-; CHECK-FP-ELIM: .cfi_offset r11, -24
-; CHECK-FP-ELIM: .cfi_offset r10, -28
-; CHECK-FP-ELIM: .cfi_offset r4, -32
+; CHECK-FP-ELIM: .cfi_def_cfa_offset 28
+; CHECK-FP-ELIM: .cfi_offset lr, -16
+; CHECK-FP-ELIM: .cfi_offset r11, -20
+; CHECK-FP-ELIM: .cfi_offset r10, -24
+; CHECK-FP-ELIM: .cfi_offset r4, -28
; CHECK-FP-ELIM: add r11, sp, #8
-; CHECK-FP-ELIM: .cfi_def_cfa r11, 24
+; CHECK-FP-ELIM: .cfi_def_cfa r11, 20
; CHECK-THUMB-FP-LABEL: sum
; CHECK-THUMB-FP: .cfi_startproc
-; CHECK-THUMB-FP: sub sp, #16
-; CHECK-THUMB-FP: .cfi_def_cfa_offset 16
-; CHECK-THUMB-FP: push {r4, r5, r7, lr}
-; CHECK-THUMB-FP: .cfi_def_cfa_offset 32
-; CHECK-THUMB-FP: .cfi_offset lr, -20
-; CHECK-THUMB-FP: .cfi_offset r7, -24
-; CHECK-THUMB-FP: .cfi_offset r5, -28
-; CHECK-THUMB-FP: .cfi_offset r4, -32
-; CHECK-THUMB-FP: sub sp, #8
-; CHECK-THUMB-FP: .cfi_def_cfa_offset 40
+; CHECK-THUMB-FP: sub sp, #12
+; CHECK-THUMB-FP: .cfi_def_cfa_offset 12
+; CHECK-THUMB-FP: push {r4, lr}
+; CHECK-THUMB-FP: .cfi_def_cfa_offset 20
+; CHECK-THUMB-FP: .cfi_offset lr, -16
+; CHECK-THUMB-FP: .cfi_offset r4, -20
+; CHECK-THUMB-FP: sub sp, #4
+; CHECK-THUMB-FP: .cfi_def_cfa_offset 24
; CHECK-THUMB-FP-ELIM-LABEL: sum
; CHECK-THUMB-FP-ELIM: .cfi_startproc
-; CHECK-THUMB-FP-ELIM: sub sp, #16
-; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 16
-; CHECK-THUMB-FP-ELIM: push {r4, r5, r7, lr}
-; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 32
-; CHECK-THUMB-FP-ELIM: .cfi_offset lr, -20
-; CHECK-THUMB-FP-ELIM: .cfi_offset r7, -24
-; CHECK-THUMB-FP-ELIM: .cfi_offset r5, -28
-; CHECK-THUMB-FP-ELIM: .cfi_offset r4, -32
+; CHECK-THUMB-FP-ELIM: sub sp, #12
+; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 12
+; CHECK-THUMB-FP-ELIM: push {r4, r6, r7, lr}
+; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 28
+; CHECK-THUMB-FP-ELIM: .cfi_offset lr, -16
+; CHECK-THUMB-FP-ELIM: .cfi_offset r7, -20
+; CHECK-THUMB-FP-ELIM: .cfi_offset r6, -24
+; CHECK-THUMB-FP-ELIM: .cfi_offset r4, -28
; CHECK-THUMB-FP-ELIM: add r7, sp, #8
-; CHECK-THUMB-FP-ELIM: .cfi_def_cfa r7, 24
+; CHECK-THUMB-FP-ELIM: .cfi_def_cfa r7, 20
define i32 @sum(i32 %count, ...) {
entry:
@@ -120,11 +118,11 @@ entry:
for.body: ; preds = %entry, %for.body
%i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
- %ap.cur = load i8** %vl, align 4
- %ap.next = getelementptr i8* %ap.cur, i32 4
+ %ap.cur = load i8*, i8** %vl, align 4
+ %ap.next = getelementptr i8, i8* %ap.cur, i32 4
store i8* %ap.next, i8** %vl, align 4
%0 = bitcast i8* %ap.cur to i32*
- %1 = load i32* %0, align 4
+ %1 = load i32, i32* %0, align 4
%call = call i32 @foo(i32 %1) #1
%inc = add nsw i32 %i.05, 1
%exitcond = icmp eq i32 %inc, %count
diff --git a/test/CodeGen/ARM/debug-frame.ll b/test/CodeGen/ARM/debug-frame.ll
index 16e2c4c59f96..134829254e3f 100644
--- a/test/CodeGen/ARM/debug-frame.ll
+++ b/test/CodeGen/ARM/debug-frame.ll
@@ -128,41 +128,41 @@ declare void @_ZSt9terminatev()
!llvm.module.flags = !{!10, !11}
!llvm.ident = !{!12}
-!0 = !{!"0x11\004\00clang version 3.5 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/exp.cpp] [DW_LANG_C_plus_plus]
-!1 = !{!"exp.cpp", !"/tmp"}
+!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "exp.cpp", directory: "/tmp")
!2 = !{}
!3 = !{!4}
-!4 = !{!"0x2e\00test\00test\00_Z4testiiiiiddddd\004\000\001\000\006\00256\000\005", !1, !5, !6, null, void (i32, i32, i32, i32, i32, double, double, double, double, double)* @_Z4testiiiiiddddd, null, null, !2} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 5] [test]
-!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [/tmp/exp.cpp]
-!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !DISubprogram(name: "test", linkageName: "_Z4testiiiiiddddd", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, function: void (i32, i32, i32, i32, i32, double, double, double, double, double)* @_Z4testiiiiiddddd, variables: !2)
+!5 = !DIFile(filename: "exp.cpp", directory: "/tmp")
+!6 = !DISubroutineType(types: !7)
!7 = !{null, !8, !8, !8, !8, !8, !9, !9, !9, !9, !9}
-!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = !{!"0x24\00double\000\0064\0064\000\000\004", null, null} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
!10 = !{i32 2, !"Dwarf Version", i32 4}
-!11 = !{i32 1, !"Debug Info Version", i32 2}
+!11 = !{i32 1, !"Debug Info Version", i32 3}
!12 = !{!"clang version 3.5 "}
-!13 = !{!"0x101\00a\0016777220\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [a] [line 4]
-!14 = !MDLocation(line: 4, scope: !4)
-!15 = !{!"0x101\00b\0033554436\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [b] [line 4]
-!16 = !{!"0x101\00c\0050331652\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [c] [line 4]
-!17 = !{!"0x101\00d\0067108868\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [d] [line 4]
-!18 = !{!"0x101\00e\0083886084\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [e] [line 4]
-!19 = !{!"0x101\00m\00100663301\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [m] [line 5]
-!20 = !MDLocation(line: 5, scope: !4)
-!21 = !{!"0x101\00n\00117440517\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [n] [line 5]
-!22 = !{!"0x101\00p\00134217733\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [p] [line 5]
-!23 = !{!"0x101\00q\00150994949\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [q] [line 5]
-!24 = !{!"0x101\00r\00167772165\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [r] [line 5]
-!25 = !MDLocation(line: 7, scope: !26)
-!26 = !{!"0xb\006\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [/tmp/exp.cpp]
-!27 = !MDLocation(line: 8, scope: !26)
-!28 = !MDLocation(line: 11, scope: !26)
-!29 = !MDLocation(line: 9, scope: !30)
-!30 = !{!"0xb\008\000\001", !1, !4} ; [ DW_TAG_lexical_block ] [/tmp/exp.cpp]
-!31 = !MDLocation(line: 10, scope: !30)
-!32 = !MDLocation(line: 10, scope: !4)
-!33 = !MDLocation(line: 11, scope: !4)
-!34 = !MDLocation(line: 11, scope: !30)
+!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 4, arg: 1, scope: !4, file: !5, type: !8)
+!14 = !DILocation(line: 4, scope: !4)
+!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 4, arg: 2, scope: !4, file: !5, type: !8)
+!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 4, arg: 3, scope: !4, file: !5, type: !8)
+!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "d", line: 4, arg: 4, scope: !4, file: !5, type: !8)
+!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "e", line: 4, arg: 5, scope: !4, file: !5, type: !8)
+!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "m", line: 5, arg: 6, scope: !4, file: !5, type: !9)
+!20 = !DILocation(line: 5, scope: !4)
+!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "n", line: 5, arg: 7, scope: !4, file: !5, type: !9)
+!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p", line: 5, arg: 8, scope: !4, file: !5, type: !9)
+!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "q", line: 5, arg: 9, scope: !4, file: !5, type: !9)
+!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "r", line: 5, arg: 10, scope: !4, file: !5, type: !9)
+!25 = !DILocation(line: 7, scope: !26)
+!26 = distinct !DILexicalBlock(line: 6, column: 0, file: !1, scope: !4)
+!27 = !DILocation(line: 8, scope: !26)
+!28 = !DILocation(line: 11, scope: !26)
+!29 = !DILocation(line: 9, scope: !30)
+!30 = distinct !DILexicalBlock(line: 8, column: 0, file: !1, scope: !4)
+!31 = !DILocation(line: 10, scope: !30)
+!32 = !DILocation(line: 10, scope: !4)
+!33 = !DILocation(line: 11, scope: !4)
+!34 = !DILocation(line: 11, scope: !30)
; CHECK-FP-LABEL: _Z4testiiiiiddddd:
; CHECK-FP: .cfi_startproc
@@ -179,7 +179,7 @@ declare void @_ZSt9terminatev()
; CHECK-FP: .cfi_offset r4, -36
; CHECK-FP: add r11, sp, #28
; CHECK-FP: .cfi_def_cfa r11, 8
-; CHECK-FP: sub sp, sp, #28
+; CHECK-FP: sub sp, sp, #44
; CHECK-FP: .cfi_endproc
; CHECK-FP-ELIM-LABEL: _Z4testiiiiiddddd:
@@ -195,8 +195,8 @@ declare void @_ZSt9terminatev()
; CHECK-FP-ELIM: .cfi_offset r6, -28
; CHECK-FP-ELIM: .cfi_offset r5, -32
; CHECK-FP-ELIM: .cfi_offset r4, -36
-; CHECK-FP-ELIM: sub sp, sp, #28
-; CHECK-FP-ELIM: .cfi_def_cfa_offset 64
+; CHECK-FP-ELIM: sub sp, sp, #36
+; CHECK-FP-ELIM: .cfi_def_cfa_offset 72
; CHECK-FP-ELIM: .cfi_endproc
; CHECK-V7-FP-LABEL: _Z4testiiiiiddddd:
@@ -264,14 +264,14 @@ declare void @_ZSt9terminatev()
; CHECK-THUMB-V7-FP-LABEL: _Z4testiiiiiddddd:
; CHECK-THUMB-V7-FP: .cfi_startproc
-; CHECK-THUMB-V7-FP: push.w {r4, r7, r11, lr}
+; CHECK-THUMB-V7-FP: push {r4, r6, r7, lr}
; CHECK-THUMB-V7-FP: .cfi_def_cfa_offset 16
; CHECK-THUMB-V7-FP: .cfi_offset lr, -4
-; CHECK-THUMB-V7-FP: .cfi_offset r11, -8
-; CHECK-THUMB-V7-FP: .cfi_offset r7, -12
+; CHECK-THUMB-V7-FP: .cfi_offset r7, -8
+; CHECK-THUMB-V7-FP: .cfi_offset r6, -12
; CHECK-THUMB-V7-FP: .cfi_offset r4, -16
-; CHECK-THUMB-V7-FP: add r7, sp, #4
-; CHECK-THUMB-V7-FP: .cfi_def_cfa r7, 12
+; CHECK-THUMB-V7-FP: add r7, sp, #8
+; CHECK-THUMB-V7-FP: .cfi_def_cfa r7, 8
; CHECK-THUMB-V7-FP: vpush {d8, d9, d10, d11, d12}
; CHECK-THUMB-V7-FP: .cfi_offset d12, -24
; CHECK-THUMB-V7-FP: .cfi_offset d11, -32
@@ -300,14 +300,14 @@ declare void @_ZSt9terminatev()
; CHECK-THUMB-V7-FP-NOIAS-LABEL: _Z4testiiiiiddddd:
; CHECK-THUMB-V7-FP-NOIAS: .cfi_startproc
-; CHECK-THUMB-V7-FP-NOIAS: push.w {r4, r7, r11, lr}
+; CHECK-THUMB-V7-FP-NOIAS: push {r4, r6, r7, lr}
; CHECK-THUMB-V7-FP-NOIAS: .cfi_def_cfa_offset 16
; CHECK-THUMB-V7-FP-NOIAS: .cfi_offset 14, -4
-; CHECK-THUMB-V7-FP-NOIAS: .cfi_offset 11, -8
-; CHECK-THUMB-V7-FP-NOIAS: .cfi_offset 7, -12
+; CHECK-THUMB-V7-FP-NOIAS: .cfi_offset 7, -8
+; CHECK-THUMB-V7-FP-NOIAS: .cfi_offset 6, -12
; CHECK-THUMB-V7-FP-NOIAS: .cfi_offset 4, -16
-; CHECK-THUMB-V7-FP-NOIAS: add r7, sp, #4
-; CHECK-THUMB-V7-FP-NOIAS: .cfi_def_cfa 7, 12
+; CHECK-THUMB-V7-FP-NOIAS: add r7, sp, #8
+; CHECK-THUMB-V7-FP-NOIAS: .cfi_def_cfa 7, 8
; CHECK-THUMB-V7-FP-NOIAS: vpush {d8, d9, d10, d11, d12}
; CHECK-THUMB-V7-FP-NOIAS: .cfi_offset 268, -24
; CHECK-THUMB-V7-FP-NOIAS: .cfi_offset 267, -32
@@ -404,11 +404,11 @@ entry:
; CHECK-THUMB-V7-FP-ELIM-LABEL: test2:
; CHECK-THUMB-V7-FP-ELIM: .cfi_startproc
-; CHECK-THUMB-V7-FP-ELIM: push.w {r11, lr}
+; CHECK-THUMB-V7-FP-ELIM: push {r7, lr}
; CHECK-THUMB-V7-FP-ELIM: .cfi_def_cfa_offset 8
; CHECK-THUMB-V7-FP-ELIM: .cfi_offset lr, -4
-; CHECK-THUMB-V7-FP-ELIM: .cfi_offset r11, -8
-; CHECK-THUMB-V7-FP-ELIM: pop.w {r11, pc}
+; CHECK-THUMB-V7-FP-ELIM: .cfi_offset r7, -8
+; CHECK-THUMB-V7-FP-ELIM: pop {r7, pc}
; CHECK-THUMB-V7-FP-ELIM: .cfi_endproc
@@ -522,13 +522,13 @@ entry:
; CHECK-THUMB-V7-FP-ELIM-LABEL: test3:
; CHECK-THUMB-V7-FP-ELIM: .cfi_startproc
-; CHECK-THUMB-V7-FP-ELIM: push.w {r4, r5, r11, lr}
+; CHECK-THUMB-V7-FP-ELIM: push {r4, r5, r7, lr}
; CHECK-THUMB-V7-FP-ELIM: .cfi_def_cfa_offset 16
; CHECK-THUMB-V7-FP-ELIM: .cfi_offset lr, -4
-; CHECK-THUMB-V7-FP-ELIM: .cfi_offset r11, -8
+; CHECK-THUMB-V7-FP-ELIM: .cfi_offset r7, -8
; CHECK-THUMB-V7-FP-ELIM: .cfi_offset r5, -12
; CHECK-THUMB-V7-FP-ELIM: .cfi_offset r4, -16
-; CHECK-THUMB-V7-FP-ELIM: pop.w {r4, r5, r11, pc}
+; CHECK-THUMB-V7-FP-ELIM: pop {r4, r5, r7, pc}
; CHECK-THUMB-V7-FP-ELIM: .cfi_endproc
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index 8679589a4865..84eae77794a4 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -7,13 +7,13 @@ target triple = "thumbv7-apple-ios"
%struct.tag_s = type { i32, i32, i32 }
define void @foo(%struct.tag_s* nocapture %this, %struct.tag_s* %c, i64 %x, i64 %y, %struct.tag_s* nocapture %ptr1, %struct.tag_s* nocapture %ptr2) nounwind ssp {
- tail call void @llvm.dbg.value(metadata %struct.tag_s* %this, i64 0, metadata !5, metadata !{!"0x102"}), !dbg !20
- tail call void @llvm.dbg.value(metadata %struct.tag_s* %c, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !21
- tail call void @llvm.dbg.value(metadata i64 %x, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !22
- tail call void @llvm.dbg.value(metadata i64 %y, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !23
+ tail call void @llvm.dbg.value(metadata %struct.tag_s* %this, i64 0, metadata !5, metadata !DIExpression()), !dbg !20
+ tail call void @llvm.dbg.value(metadata %struct.tag_s* %c, i64 0, metadata !13, metadata !DIExpression()), !dbg !21
+ tail call void @llvm.dbg.value(metadata i64 %x, i64 0, metadata !14, metadata !DIExpression()), !dbg !22
+ tail call void @llvm.dbg.value(metadata i64 %y, i64 0, metadata !17, metadata !DIExpression()), !dbg !23
;CHECK: @DEBUG_VALUE: foo:y <- [R7+8]
- tail call void @llvm.dbg.value(metadata %struct.tag_s* %ptr1, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !24
- tail call void @llvm.dbg.value(metadata %struct.tag_s* %ptr2, i64 0, metadata !19, metadata !{!"0x102"}), !dbg !25
+ tail call void @llvm.dbg.value(metadata %struct.tag_s* %ptr1, i64 0, metadata !18, metadata !DIExpression()), !dbg !24
+ tail call void @llvm.dbg.value(metadata %struct.tag_s* %ptr2, i64 0, metadata !19, metadata !DIExpression()), !dbg !25
%1 = icmp eq %struct.tag_s* %c, null, !dbg !26
br i1 %1, label %3, label %2, !dbg !26
@@ -32,37 +32,37 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!33}
-!0 = !{!"0x11\0012\00Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)\001\00\000\00\001", !32, !4, !4, !30, null, null} ; [ DW_TAG_compile_unit ]
-!1 = !{!"0x2e\00foo\00foo\00\0011\000\001\000\006\00256\001\0011", !2, !2, !3, null, void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, null, null, !31} ; [ DW_TAG_subprogram ] [line 11] [def] [foo]
-!2 = !{!"0x29", !32} ; [ DW_TAG_file_type ]
-!3 = !{!"0x15\00\000\000\000\000\000\000", !32, !2, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 1, file: !32, enums: !{}, retainedTypes: !{}, subprograms: !30, imports: null)
+!1 = !DISubprogram(name: "foo", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !2, scope: !2, type: !3, function: void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, variables: !31)
+!2 = !DIFile(filename: "one.c", directory: "/Volumes/Athwagate/R10048772")
+!3 = !DISubroutineType(types: !4)
!4 = !{null}
-!5 = !{!"0x101\00this\0016777227\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
-!6 = !{!"0xf\00\000\0032\0032\000\000", null, !0, !7} ; [ DW_TAG_pointer_type ]
-!7 = !{!"0x13\00tag_s\005\0096\0032\000\000\000", !32, !0, null, !8, null, null, null} ; [ DW_TAG_structure_type ] [tag_s] [line 5, size 96, align 32, offset 0] [def] [from ]
+!5 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 11, arg: 1, scope: !1, file: !2, type: !6)
+!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !0, baseType: !7)
+!7 = !DICompositeType(tag: DW_TAG_structure_type, name: "tag_s", line: 5, size: 96, align: 32, file: !32, scope: !0, elements: !8)
!8 = !{!9, !11, !12}
-!9 = !{!"0xd\00x\006\0032\0032\000\000", !32, !7, !10} ; [ DW_TAG_member ]
-!10 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !0} ; [ DW_TAG_base_type ]
-!11 = !{!"0xd\00y\007\0032\0032\0032\000", !32, !7, !10} ; [ DW_TAG_member ]
-!12 = !{!"0xd\00z\008\0032\0032\0064\000", !32, !7, !10} ; [ DW_TAG_member ]
-!13 = !{!"0x101\00c\0033554443\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
-!14 = !{!"0x101\00x\0050331659\000", !1, !2, !15} ; [ DW_TAG_arg_variable ]
-!15 = !{!"0x16\00UInt64\001\000\000\000\000", !32, !0, !16} ; [ DW_TAG_typedef ]
-!16 = !{!"0x24\00long long unsigned int\000\0064\0032\000\000\007", null, !0} ; [ DW_TAG_base_type ]
-!17 = !{!"0x101\00y\0067108875\000", !1, !2, !15} ; [ DW_TAG_arg_variable ]
-!18 = !{!"0x101\00ptr1\0083886091\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
-!19 = !{!"0x101\00ptr2\00100663307\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
-!20 = !MDLocation(line: 11, column: 24, scope: !1)
-!21 = !MDLocation(line: 11, column: 44, scope: !1)
-!22 = !MDLocation(line: 11, column: 54, scope: !1)
-!23 = !MDLocation(line: 11, column: 64, scope: !1)
-!24 = !MDLocation(line: 11, column: 81, scope: !1)
-!25 = !MDLocation(line: 11, column: 101, scope: !1)
-!26 = !MDLocation(line: 12, column: 3, scope: !27)
-!27 = !{!"0xb\0011\00107\000", !2, !1} ; [ DW_TAG_lexical_block ]
-!28 = !MDLocation(line: 13, column: 5, scope: !27)
-!29 = !MDLocation(line: 14, column: 1, scope: !27)
+!9 = !DIDerivedType(tag: DW_TAG_member, name: "x", line: 6, size: 32, align: 32, file: !32, scope: !7, baseType: !10)
+!10 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!11 = !DIDerivedType(tag: DW_TAG_member, name: "y", line: 7, size: 32, align: 32, offset: 32, file: !32, scope: !7, baseType: !10)
+!12 = !DIDerivedType(tag: DW_TAG_member, name: "z", line: 8, size: 32, align: 32, offset: 64, file: !32, scope: !7, baseType: !10)
+!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 11, arg: 2, scope: !1, file: !2, type: !6)
+!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 11, arg: 3, scope: !1, file: !2, type: !15)
+!15 = !DIDerivedType(tag: DW_TAG_typedef, name: "UInt64", line: 1, file: !32, scope: !0, baseType: !16)
+!16 = !DIBasicType(tag: DW_TAG_base_type, name: "long long unsigned int", size: 64, align: 32, encoding: DW_ATE_unsigned)
+!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 11, arg: 4, scope: !1, file: !2, type: !15)
+!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr1", line: 11, arg: 5, scope: !1, file: !2, type: !6)
+!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr2", line: 11, arg: 6, scope: !1, file: !2, type: !6)
+!20 = !DILocation(line: 11, column: 24, scope: !1)
+!21 = !DILocation(line: 11, column: 44, scope: !1)
+!22 = !DILocation(line: 11, column: 54, scope: !1)
+!23 = !DILocation(line: 11, column: 64, scope: !1)
+!24 = !DILocation(line: 11, column: 81, scope: !1)
+!25 = !DILocation(line: 11, column: 101, scope: !1)
+!26 = !DILocation(line: 12, column: 3, scope: !27)
+!27 = distinct !DILexicalBlock(line: 11, column: 107, file: !2, scope: !1)
+!28 = !DILocation(line: 13, column: 5, scope: !27)
+!29 = !DILocation(line: 14, column: 1, scope: !27)
!30 = !{!1}
!31 = !{!5, !13, !14, !17, !18, !19}
-!32 = !{!"one.c", !"/Volumes/Athwagate/R10048772"}
-!33 = !{i32 1, !"Debug Info Version", i32 2}
+!32 = !DIFile(filename: "one.c", directory: "/Volumes/Athwagate/R10048772")
+!33 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
index 3bf6ad91c86e..366102755174 100644
--- a/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -31,62 +31,62 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
%1 = alloca %0*, align 4
%bounds = alloca %struct.CR, align 4
%data = alloca %struct.CR, align 4
- call void @llvm.dbg.value(metadata i8* %.block_descriptor, i64 0, metadata !27, metadata !{!"0x102"}), !dbg !129
+ call void @llvm.dbg.value(metadata i8* %.block_descriptor, i64 0, metadata !27, metadata !DIExpression()), !dbg !129
store %0* %loadedMydata, %0** %1, align 4
- call void @llvm.dbg.declare(metadata %0** %1, metadata !130, metadata !{!"0x102"}), !dbg !131
+ call void @llvm.dbg.declare(metadata %0** %1, metadata !130, metadata !DIExpression()), !dbg !131
%2 = bitcast %struct.CR* %bounds to %1*
- %3 = getelementptr %1* %2, i32 0, i32 0
+ %3 = getelementptr %1, %1* %2, i32 0, i32 0
store [4 x i32] %bounds.coerce0, [4 x i32]* %3
- call void @llvm.dbg.declare(metadata %struct.CR* %bounds, metadata !132, metadata !{!"0x102"}), !dbg !133
+ call void @llvm.dbg.declare(metadata %struct.CR* %bounds, metadata !132, metadata !DIExpression()), !dbg !133
%4 = bitcast %struct.CR* %data to %1*
- %5 = getelementptr %1* %4, i32 0, i32 0
+ %5 = getelementptr %1, %1* %4, i32 0, i32 0
store [4 x i32] %data.coerce0, [4 x i32]* %5
- call void @llvm.dbg.declare(metadata %struct.CR* %data, metadata !134, metadata !{!"0x102"}), !dbg !135
+ call void @llvm.dbg.declare(metadata %struct.CR* %data, metadata !134, metadata !DIExpression()), !dbg !135
%6 = bitcast i8* %.block_descriptor to %2*
- %7 = getelementptr inbounds %2* %6, i32 0, i32 6
+ %7 = getelementptr inbounds %2, %2* %6, i32 0, i32 6
call void @llvm.dbg.declare(metadata %2* %6, metadata !136, metadata !163), !dbg !137
call void @llvm.dbg.declare(metadata %2* %6, metadata !138, metadata !164), !dbg !137
call void @llvm.dbg.declare(metadata %2* %6, metadata !139, metadata !165), !dbg !140
- %8 = load %0** %1, align 4, !dbg !141
- %9 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_13", !dbg !141
+ %8 = load %0*, %0** %1, align 4, !dbg !141
+ %9 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_13", !dbg !141
%10 = bitcast %0* %8 to i8*, !dbg !141
%11 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %10, i8* %9), !dbg !141
%12 = bitcast i8* %11 to %0*, !dbg !141
- %13 = getelementptr inbounds %2* %6, i32 0, i32 5, !dbg !141
- %14 = load i8** %13, !dbg !141
+ %13 = getelementptr inbounds %2, %2* %6, i32 0, i32 5, !dbg !141
+ %14 = load i8*, i8** %13, !dbg !141
%15 = bitcast i8* %14 to %struct.__block_byref_mydata*, !dbg !141
- %16 = getelementptr inbounds %struct.__block_byref_mydata* %15, i32 0, i32 1, !dbg !141
- %17 = load %struct.__block_byref_mydata** %16, !dbg !141
- %18 = getelementptr inbounds %struct.__block_byref_mydata* %17, i32 0, i32 6, !dbg !141
+ %16 = getelementptr inbounds %struct.__block_byref_mydata, %struct.__block_byref_mydata* %15, i32 0, i32 1, !dbg !141
+ %17 = load %struct.__block_byref_mydata*, %struct.__block_byref_mydata** %16, !dbg !141
+ %18 = getelementptr inbounds %struct.__block_byref_mydata, %struct.__block_byref_mydata* %17, i32 0, i32 6, !dbg !141
store %0* %12, %0** %18, align 4, !dbg !141
- %19 = getelementptr inbounds %2* %6, i32 0, i32 6, !dbg !143
- %20 = load %3** %19, align 4, !dbg !143
- %21 = load i32* @"OBJC_IVAR_$_MyWork._data", !dbg !143
+ %19 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !143
+ %20 = load %3*, %3** %19, align 4, !dbg !143
+ %21 = load i32, i32* @"OBJC_IVAR_$_MyWork._data", !dbg !143
%22 = bitcast %3* %20 to i8*, !dbg !143
- %23 = getelementptr inbounds i8* %22, i32 %21, !dbg !143
+ %23 = getelementptr inbounds i8, i8* %22, i32 %21, !dbg !143
%24 = bitcast i8* %23 to %struct.CR*, !dbg !143
%25 = bitcast %struct.CR* %24 to i8*, !dbg !143
%26 = bitcast %struct.CR* %data to i8*, !dbg !143
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %25, i8* %26, i32 16, i32 4, i1 false), !dbg !143
- %27 = getelementptr inbounds %2* %6, i32 0, i32 6, !dbg !144
- %28 = load %3** %27, align 4, !dbg !144
- %29 = load i32* @"OBJC_IVAR_$_MyWork._bounds", !dbg !144
+ %27 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !144
+ %28 = load %3*, %3** %27, align 4, !dbg !144
+ %29 = load i32, i32* @"OBJC_IVAR_$_MyWork._bounds", !dbg !144
%30 = bitcast %3* %28 to i8*, !dbg !144
- %31 = getelementptr inbounds i8* %30, i32 %29, !dbg !144
+ %31 = getelementptr inbounds i8, i8* %30, i32 %29, !dbg !144
%32 = bitcast i8* %31 to %struct.CR*, !dbg !144
%33 = bitcast %struct.CR* %32 to i8*, !dbg !144
%34 = bitcast %struct.CR* %bounds to i8*, !dbg !144
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %33, i8* %34, i32 16, i32 4, i1 false), !dbg !144
- %35 = getelementptr inbounds %2* %6, i32 0, i32 6, !dbg !145
- %36 = load %3** %35, align 4, !dbg !145
- %37 = getelementptr inbounds %2* %6, i32 0, i32 5, !dbg !145
- %38 = load i8** %37, !dbg !145
+ %35 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !145
+ %36 = load %3*, %3** %35, align 4, !dbg !145
+ %37 = getelementptr inbounds %2, %2* %6, i32 0, i32 5, !dbg !145
+ %38 = load i8*, i8** %37, !dbg !145
%39 = bitcast i8* %38 to %struct.__block_byref_mydata*, !dbg !145
- %40 = getelementptr inbounds %struct.__block_byref_mydata* %39, i32 0, i32 1, !dbg !145
- %41 = load %struct.__block_byref_mydata** %40, !dbg !145
- %42 = getelementptr inbounds %struct.__block_byref_mydata* %41, i32 0, i32 6, !dbg !145
- %43 = load %0** %42, align 4, !dbg !145
- %44 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_222", !dbg !145
+ %40 = getelementptr inbounds %struct.__block_byref_mydata, %struct.__block_byref_mydata* %39, i32 0, i32 1, !dbg !145
+ %41 = load %struct.__block_byref_mydata*, %struct.__block_byref_mydata** %40, !dbg !145
+ %42 = getelementptr inbounds %struct.__block_byref_mydata, %struct.__block_byref_mydata* %41, i32 0, i32 6, !dbg !145
+ %43 = load %0*, %0** %42, align 4, !dbg !145
+ %44 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_222", !dbg !145
%45 = bitcast %3* %36 to i8*, !dbg !145
call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %0*)*)(i8* %45, i8* %44, %0* %43), !dbg !145
ret void, !dbg !146
@@ -95,169 +95,169 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!162}
-!0 = !{!"0x11\0016\00Apple clang version 2.1\000\00\002\00\001", !153, !147, !26, !148, null, null} ; [ DW_TAG_compile_unit ]
-!1 = !{!"0x4\00\00248\0032\0032\000\000\000", !160, !0, null, !3, null, null, null} ; [ DW_TAG_enumeration_type ] [line 248, size 32, align 32, offset 0] [def] [from ]
-!2 = !{!"0x29", !160} ; [ DW_TAG_file_type ]
+!0 = !DICompileUnit(language: DW_LANG_ObjC, producer: "Apple clang version 2.1", isOptimized: false, runtimeVersion: 2, emissionKind: 1, file: !153, enums: !147, retainedTypes: !{}, subprograms: !148)
+!1 = !DICompositeType(tag: DW_TAG_enumeration_type, line: 248, size: 32, align: 32, file: !160, scope: !0, elements: !3)
+!2 = !DIFile(filename: "header.h", directory: "/Volumes/Sandbox/llvm")
!3 = !{!4}
-!4 = !{!"0x28\00Ver1\000"} ; [ DW_TAG_enumerator ]
-!5 = !{!"0x4\00Mode\0079\0032\0032\000\000\000", !160, !0, null, !7, null, null, null} ; [ DW_TAG_enumeration_type ] [Mode] [line 79, size 32, align 32, offset 0] [def] [from ]
-!6 = !{!"0x29", !161} ; [ DW_TAG_file_type ]
+!4 = !DIEnumerator(name: "Ver1", value: 0) ; [ DW_TAG_enumerator ]
+!5 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Mode", line: 79, size: 32, align: 32, file: !160, scope: !0, elements: !7)
+!6 = !DIFile(filename: "header2.h", directory: "/Volumes/Sandbox/llvm")
!7 = !{!8}
-!8 = !{!"0x28\00One\000"} ; [ DW_TAG_enumerator ]
-!9 = !{!"0x4\00\0015\0032\0032\000\000\000", !149, !0, null, !11, null, null, null} ; [ DW_TAG_enumeration_type ] [line 15, size 32, align 32, offset 0] [def] [from ]
-!10 = !{!"0x29", !149} ; [ DW_TAG_file_type ]
+!8 = !DIEnumerator(name: "One", value: 0) ; [ DW_TAG_enumerator ]
+!9 = !DICompositeType(tag: DW_TAG_enumeration_type, line: 15, size: 32, align: 32, file: !149, scope: !0, elements: !11)
+!10 = !DIFile(filename: "header3.h", directory: "/Volumes/Sandbox/llvm")
!11 = !{!12, !13}
-!12 = !{!"0x28\00Unknown\000"} ; [ DW_TAG_enumerator ]
-!13 = !{!"0x28\00Known\001"} ; [ DW_TAG_enumerator ]
-!14 = !{!"0x4\00\0020\0032\0032\000\000\000", !150, !0, null, !16, null, null, null} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [def] [from ]
-!15 = !{!"0x29", !150} ; [ DW_TAG_file_type ]
+!12 = !DIEnumerator(name: "Unknown", value: 0) ; [ DW_TAG_enumerator ]
+!13 = !DIEnumerator(name: "Known", value: 1) ; [ DW_TAG_enumerator ]
+!14 = !DICompositeType(tag: DW_TAG_enumeration_type, line: 20, size: 32, align: 32, file: !150, scope: !0, elements: !16)
+!15 = !DIFile(filename: "Private.h", directory: "/Volumes/Sandbox/llvm")
!16 = !{!17, !18}
-!17 = !{!"0x28\00Single\000"} ; [ DW_TAG_enumerator ]
-!18 = !{!"0x28\00Double\001"} ; [ DW_TAG_enumerator ]
-!19 = !{!"0x4\00\0014\0032\0032\000\000\000", !151, !0, null, !21, null, null, null} ; [ DW_TAG_enumeration_type ] [line 14, size 32, align 32, offset 0] [def] [from ]
-!20 = !{!"0x29", !151} ; [ DW_TAG_file_type ]
+!17 = !DIEnumerator(name: "Single", value: 0) ; [ DW_TAG_enumerator ]
+!18 = !DIEnumerator(name: "Double", value: 1) ; [ DW_TAG_enumerator ]
+!19 = !DICompositeType(tag: DW_TAG_enumeration_type, line: 14, size: 32, align: 32, file: !151, scope: !0, elements: !21)
+!20 = !DIFile(filename: "header4.h", directory: "/Volumes/Sandbox/llvm")
!21 = !{!22}
-!22 = !{!"0x28\00Eleven\000"} ; [ DW_TAG_enumerator ]
-!23 = !{!"0x2e\00foobar_func_block_invoke_0\00foobar_func_block_invoke_0\00\00609\001\001\000\006\00256\000\00609", !152, !24, !25, null, void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0, null, null, null} ; [ DW_TAG_subprogram ] [line 609] [local] [def] [foobar_func_block_invoke_0]
-!24 = !{!"0x29", !152} ; [ DW_TAG_file_type ]
-!25 = !{!"0x15\00\000\000\000\000\000\000", !152, !24, null, !26, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!22 = !DIEnumerator(name: "Eleven", value: 0) ; [ DW_TAG_enumerator ]
+!23 = !DISubprogram(name: "foobar_func_block_invoke_0", line: 609, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 609, file: !152, scope: !24, type: !25, function: void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0)
+!24 = !DIFile(filename: "MyLibrary.m", directory: "/Volumes/Sandbox/llvm")
+!25 = !DISubroutineType(types: !26)
!26 = !{null}
-!27 = !{!"0x101\00.block_descriptor\0016777825\0064", !23, !24, !28} ; [ DW_TAG_arg_variable ]
-!28 = !{!"0xf\00\000\0032\000\000\000", null, !0, !29} ; [ DW_TAG_pointer_type ]
-!29 = !{!"0x13\00__block_literal_14\00609\00256\0032\000\000\000", !152, !24, null, !30, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_14] [line 609, size 256, align 32, offset 0] [def] [from ]
+!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: ".block_descriptor", line: 609, arg: 1, flags: DIFlagArtificial, scope: !23, file: !24, type: !28)
+!28 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, scope: !0, baseType: !29)
+!29 = !DICompositeType(tag: DW_TAG_structure_type, name: "__block_literal_14", line: 609, size: 256, align: 32, file: !152, scope: !24, elements: !30)
!30 = !{!31, !33, !35, !36, !37, !48, !89, !124}
-!31 = !{!"0xd\00__isa\00609\0032\0032\000\000", !152, !24, !32} ; [ DW_TAG_member ]
-!32 = !{!"0xf\00\000\0032\0032\000\000", null, !0, null} ; [ DW_TAG_pointer_type ]
-!33 = !{!"0xd\00__flags\00609\0032\0032\0032\000", !152, !24, !34} ; [ DW_TAG_member ]
-!34 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !0} ; [ DW_TAG_base_type ]
-!35 = !{!"0xd\00__reserved\00609\0032\0032\0064\000", !152, !24, !34} ; [ DW_TAG_member ]
-!36 = !{!"0xd\00__FuncPtr\00609\0032\0032\0096\000", !152, !24, !32} ; [ DW_TAG_member ]
-!37 = !{!"0xd\00__descriptor\00609\0032\0032\00128\000", !152, !24, !38} ; [ DW_TAG_member ]
-!38 = !{!"0xf\00\000\0032\0032\000\000", null, !0, !39} ; [ DW_TAG_pointer_type ]
-!39 = !{!"0x13\00__block_descriptor_withcopydispose\00307\00128\0032\000\000\000", !153, !0, null, !41, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor_withcopydispose] [line 307, size 128, align 32, offset 0] [def] [from ]
-!40 = !{!"0x29", !153} ; [ DW_TAG_file_type ]
+!31 = !DIDerivedType(tag: DW_TAG_member, name: "__isa", line: 609, size: 32, align: 32, file: !152, scope: !24, baseType: !32)
+!32 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !0, baseType: null)
+!33 = !DIDerivedType(tag: DW_TAG_member, name: "__flags", line: 609, size: 32, align: 32, offset: 32, file: !152, scope: !24, baseType: !34)
+!34 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!35 = !DIDerivedType(tag: DW_TAG_member, name: "__reserved", line: 609, size: 32, align: 32, offset: 64, file: !152, scope: !24, baseType: !34)
+!36 = !DIDerivedType(tag: DW_TAG_member, name: "__FuncPtr", line: 609, size: 32, align: 32, offset: 96, file: !152, scope: !24, baseType: !32)
+!37 = !DIDerivedType(tag: DW_TAG_member, name: "__descriptor", line: 609, size: 32, align: 32, offset: 128, file: !152, scope: !24, baseType: !38)
+!38 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !0, baseType: !39)
+!39 = !DICompositeType(tag: DW_TAG_structure_type, name: "__block_descriptor_withcopydispose", line: 307, size: 128, align: 32, file: !153, scope: !0, elements: !41)
+!40 = !DIFile(filename: "MyLibrary.i", directory: "/Volumes/Sandbox/llvm")
!41 = !{!42, !44, !45, !47}
-!42 = !{!"0xd\00reserved\00307\0032\0032\000\000", !153, !40, !43} ; [ DW_TAG_member ]
-!43 = !{!"0x24\00long unsigned int\000\0032\0032\000\000\007", null, !0} ; [ DW_TAG_base_type ]
-!44 = !{!"0xd\00Size\00307\0032\0032\0032\000", !153, !40, !43} ; [ DW_TAG_member ]
-!45 = !{!"0xd\00CopyFuncPtr\00307\0032\0032\0064\000", !153, !40, !46} ; [ DW_TAG_member ]
-!46 = !{!"0xf\00\000\0032\0032\000\000", null, !0, !32} ; [ DW_TAG_pointer_type ]
-!47 = !{!"0xd\00DestroyFuncPtr\00307\0032\0032\0096\000", !153, !40, !46} ; [ DW_TAG_member ]
-!48 = !{!"0xd\00mydata\00609\0032\0032\00160\000", !152, !24, !49} ; [ DW_TAG_member ]
-!49 = !{!"0xf\00\000\0032\000\000\000", null, !0, !50} ; [ DW_TAG_pointer_type ]
-!50 = !{!"0x13\00\000\00224\000\000\0016\000", !152, !24, null, !51, null, null, null} ; [ DW_TAG_structure_type ] [line 0, size 224, align 0, offset 0] [def] [from ]
+!42 = !DIDerivedType(tag: DW_TAG_member, name: "reserved", line: 307, size: 32, align: 32, file: !153, scope: !40, baseType: !43)
+!43 = !DIBasicType(tag: DW_TAG_base_type, name: "long unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
+!44 = !DIDerivedType(tag: DW_TAG_member, name: "Size", line: 307, size: 32, align: 32, offset: 32, file: !153, scope: !40, baseType: !43)
+!45 = !DIDerivedType(tag: DW_TAG_member, name: "CopyFuncPtr", line: 307, size: 32, align: 32, offset: 64, file: !153, scope: !40, baseType: !46)
+!46 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !0, baseType: !32)
+!47 = !DIDerivedType(tag: DW_TAG_member, name: "DestroyFuncPtr", line: 307, size: 32, align: 32, offset: 96, file: !153, scope: !40, baseType: !46)
+!48 = !DIDerivedType(tag: DW_TAG_member, name: "mydata", line: 609, size: 32, align: 32, offset: 160, file: !152, scope: !24, baseType: !49)
+!49 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, scope: !0, baseType: !50)
+!50 = !DICompositeType(tag: DW_TAG_structure_type, size: 224, flags: DIFlagBlockByrefStruct, file: !152, scope: !24, elements: !51)
!51 = !{!52, !53, !54, !55, !56, !57, !58}
-!52 = !{!"0xd\00__isa\000\0032\0032\000\000", !152, !24, !32} ; [ DW_TAG_member ]
-!53 = !{!"0xd\00__forwarding\000\0032\0032\0032\000", !152, !24, !32} ; [ DW_TAG_member ]
-!54 = !{!"0xd\00__flags\000\0032\0032\0064\000", !152, !24, !34} ; [ DW_TAG_member ]
-!55 = !{!"0xd\00__size\000\0032\0032\0096\000", !152, !24, !34} ; [ DW_TAG_member ]
-!56 = !{!"0xd\00__copy_helper\000\0032\0032\00128\000", !152, !24, !32} ; [ DW_TAG_member ]
-!57 = !{!"0xd\00__destroy_helper\000\0032\0032\00160\000", !152, !24, !32} ; [ DW_TAG_member ]
-!58 = !{!"0xd\00mydata\000\0032\0032\00192\000", !152, !24, !59} ; [ DW_TAG_member ]
-!59 = !{!"0xf\00\000\0032\0032\000\000", null, !0, !60} ; [ DW_TAG_pointer_type ]
-!60 = !{!"0x13\00UIMydata\0026\00128\0032\000\000\0016", !154, !24, null, !62, null, null, null} ; [ DW_TAG_structure_type ] [UIMydata] [line 26, size 128, align 32, offset 0] [def] [from ]
-!61 = !{!"0x29", !154} ; [ DW_TAG_file_type ]
+!52 = !DIDerivedType(tag: DW_TAG_member, name: "__isa", size: 32, align: 32, file: !152, scope: !24, baseType: !32)
+!53 = !DIDerivedType(tag: DW_TAG_member, name: "__forwarding", size: 32, align: 32, offset: 32, file: !152, scope: !24, baseType: !32)
+!54 = !DIDerivedType(tag: DW_TAG_member, name: "__flags", size: 32, align: 32, offset: 64, file: !152, scope: !24, baseType: !34)
+!55 = !DIDerivedType(tag: DW_TAG_member, name: "__size", size: 32, align: 32, offset: 96, file: !152, scope: !24, baseType: !34)
+!56 = !DIDerivedType(tag: DW_TAG_member, name: "__copy_helper", size: 32, align: 32, offset: 128, file: !152, scope: !24, baseType: !32)
+!57 = !DIDerivedType(tag: DW_TAG_member, name: "__destroy_helper", size: 32, align: 32, offset: 160, file: !152, scope: !24, baseType: !32)
+!58 = !DIDerivedType(tag: DW_TAG_member, name: "mydata", size: 32, align: 32, offset: 192, file: !152, scope: !24, baseType: !59)
+!59 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !0, baseType: !60)
+!60 = !DICompositeType(tag: DW_TAG_structure_type, name: "UIMydata", line: 26, size: 128, align: 32, runtimeLang: DW_LANG_ObjC, file: !154, scope: !24, elements: !62)
+!61 = !DIFile(filename: "header11.h", directory: "/Volumes/Sandbox/llvm")
!62 = !{!63, !71, !75, !79}
-!63 = !{!"0x1c\00\000\000\000\000\000", !60, null, !64} ; [ DW_TAG_inheritance ]
-!64 = !{!"0x13\00NSO\0066\0032\0032\000\000\0016", !155, !40, null, !66, null, null, null} ; [ DW_TAG_structure_type ] [NSO] [line 66, size 32, align 32, offset 0] [def] [from ]
-!65 = !{!"0x29", !155} ; [ DW_TAG_file_type ]
+!63 = !DIDerivedType(tag: DW_TAG_inheritance, file: !61, baseType: !64)
+!64 = !DICompositeType(tag: DW_TAG_structure_type, name: "NSO", line: 66, size: 32, align: 32, runtimeLang: DW_LANG_ObjC, file: !155, scope: !40, elements: !66)
+!65 = !DIFile(filename: "NSO.h", directory: "/Volumes/Sandbox/llvm")
!66 = !{!67}
-!67 = !{!"0xd\00isa\0067\0032\0032\000\002", !155, !65, !68, !"", !"", !"", i32 0} ; [ DW_TAG_member ]
-!68 = !{!"0x16\00Class\00197\000\000\000\000", !153, !0, !69} ; [ DW_TAG_typedef ]
-!69 = !{!"0xf\00\000\0032\0032\000\000", null, !0, !70} ; [ DW_TAG_pointer_type ]
-!70 = !{!"0x13\00objc_class\000\000\000\000\004\000", !153, !0, null, null, null, null, null} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [decl] [from ]
-!71 = !{!"0xd\00_mydataRef\0028\0032\0032\0032\000", !154, !61, !72, !"", !"", !"", i32 0} ; [ DW_TAG_member ]
-!72 = !{!"0x16\00CFTypeRef\00313\000\000\000\000", !152, !0, !73} ; [ DW_TAG_typedef ]
-!73 = !{!"0xf\00\000\0032\0032\000\000", null, !0, !74} ; [ DW_TAG_pointer_type ]
-!74 = !{!"0x26\00\000\000\000\000\000", null, !0, null} ; [ DW_TAG_const_type ]
-!75 = !{!"0xd\00_scale\0029\0032\0032\0064\000", !154, !61, !76, !"", !"", !"", i32 0} ; [ DW_TAG_member ]
-!76 = !{!"0x16\00Float\0089\000\000\000\000", !156, !0, !78} ; [ DW_TAG_typedef ]
-!77 = !{!"0x29", !156} ; [ DW_TAG_file_type ]
-!78 = !{!"0x24\00float\000\0032\0032\000\000\004", null, !0} ; [ DW_TAG_base_type ]
-!79 = !{!"0xd\00_mydataFlags\0037\008\008\0096\000", !154, !61, !80, !"", !"", !"", i32 0} ; [ DW_TAG_member ]
-!80 = !{!"0x13\00\0030\008\008\000\000\000", !154, !0, null, !81, null, null, null} ; [ DW_TAG_structure_type ] [line 30, size 8, align 8, offset 0] [def] [from ]
+!67 = !DIDerivedType(tag: DW_TAG_member, name: "isa", line: 67, size: 32, align: 32, flags: DIFlagProtected, file: !155, scope: !65, baseType: !68, extraData: !"")
+!68 = !DIDerivedType(tag: DW_TAG_typedef, name: "Class", line: 197, file: !153, scope: !0, baseType: !69)
+!69 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !0, baseType: !70)
+!70 = !DICompositeType(tag: DW_TAG_structure_type, name: "objc_class", flags: DIFlagFwdDecl, file: !153, scope: !0)
+!71 = !DIDerivedType(tag: DW_TAG_member, name: "_mydataRef", line: 28, size: 32, align: 32, offset: 32, file: !154, scope: !61, baseType: !72, extraData: !"")
+!72 = !DIDerivedType(tag: DW_TAG_typedef, name: "CFTypeRef", line: 313, file: !152, scope: !0, baseType: !73)
+!73 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !0, baseType: !74)
+!74 = !DIDerivedType(tag: DW_TAG_const_type, scope: !0, baseType: null)
+!75 = !DIDerivedType(tag: DW_TAG_member, name: "_scale", line: 29, size: 32, align: 32, offset: 64, file: !154, scope: !61, baseType: !76, extraData: !"")
+!76 = !DIDerivedType(tag: DW_TAG_typedef, name: "Float", line: 89, file: !156, scope: !0, baseType: !78)
+!77 = !DIFile(filename: "header12.h", directory: "/Volumes/Sandbox/llvm")
+!78 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
+!79 = !DIDerivedType(tag: DW_TAG_member, name: "_mydataFlags", line: 37, size: 8, align: 8, offset: 96, file: !154, scope: !61, baseType: !80, extraData: !"")
+!80 = !DICompositeType(tag: DW_TAG_structure_type, line: 30, size: 8, align: 8, file: !154, scope: !0, elements: !81)
!81 = !{!82, !84, !85, !86, !87, !88}
-!82 = !{!"0xd\00named\0031\001\0032\000\000", !154, !61, !83} ; [ DW_TAG_member ]
-!83 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", null, !0} ; [ DW_TAG_base_type ]
-!84 = !{!"0xd\00mydataO\0032\003\0032\001\000", !154, !61, !83} ; [ DW_TAG_member ]
-!85 = !{!"0xd\00cached\0033\001\0032\004\000", !154, !61, !83} ; [ DW_TAG_member ]
-!86 = !{!"0xd\00hasBeenCached\0034\001\0032\005\000", !154, !61, !83} ; [ DW_TAG_member ]
-!87 = !{!"0xd\00hasPattern\0035\001\0032\006\000", !154, !61, !83} ; [ DW_TAG_member ]
-!88 = !{!"0xd\00isCIMydata\0036\001\0032\007\000", !154, !61, !83} ; [ DW_TAG_member ]
-!89 = !{!"0xd\00self\00609\0032\0032\00192\000", !152, !24, !90} ; [ DW_TAG_member ]
-!90 = !{!"0xf\00\000\0032\0032\000\000", null, !0, !91} ; [ DW_TAG_pointer_type ]
-!91 = !{!"0x13\00MyWork\0036\00384\0032\000\000\0016", !152, !40, null, !92, null, null, null} ; [ DW_TAG_structure_type ] [MyWork] [line 36, size 384, align 32, offset 0] [def] [from ]
+!82 = !DIDerivedType(tag: DW_TAG_member, name: "named", line: 31, size: 1, align: 32, file: !154, scope: !61, baseType: !83)
+!83 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
+!84 = !DIDerivedType(tag: DW_TAG_member, name: "mydataO", line: 32, size: 3, align: 32, offset: 1, file: !154, scope: !61, baseType: !83)
+!85 = !DIDerivedType(tag: DW_TAG_member, name: "cached", line: 33, size: 1, align: 32, offset: 4, file: !154, scope: !61, baseType: !83)
+!86 = !DIDerivedType(tag: DW_TAG_member, name: "hasBeenCached", line: 34, size: 1, align: 32, offset: 5, file: !154, scope: !61, baseType: !83)
+!87 = !DIDerivedType(tag: DW_TAG_member, name: "hasPattern", line: 35, size: 1, align: 32, offset: 6, file: !154, scope: !61, baseType: !83)
+!88 = !DIDerivedType(tag: DW_TAG_member, name: "isCIMydata", line: 36, size: 1, align: 32, offset: 7, file: !154, scope: !61, baseType: !83)
+!89 = !DIDerivedType(tag: DW_TAG_member, name: "self", line: 609, size: 32, align: 32, offset: 192, file: !152, scope: !24, baseType: !90)
+!90 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !0, baseType: !91)
+!91 = !DICompositeType(tag: DW_TAG_structure_type, name: "MyWork", line: 36, size: 384, align: 32, runtimeLang: DW_LANG_ObjC, file: !152, scope: !40, elements: !92)
!92 = !{!93, !98, !101, !107, !123}
-!93 = !{!"0x1c\00\000\000\000\000\000", !152, !91, !94} ; [ DW_TAG_inheritance ]
-!94 = !{!"0x13\00twork\0043\0032\0032\000\000\0016", !157, !40, null, !96, null, null, null} ; [ DW_TAG_structure_type ] [twork] [line 43, size 32, align 32, offset 0] [def] [from ]
-!95 = !{!"0x29", !157} ; [ DW_TAG_file_type ]
+!93 = !DIDerivedType(tag: DW_TAG_inheritance, file: !152, scope: !91, baseType: !94)
+!94 = !DICompositeType(tag: DW_TAG_structure_type, name: "twork", line: 43, size: 32, align: 32, runtimeLang: DW_LANG_ObjC, file: !157, scope: !40, elements: !96)
+!95 = !DIFile(filename: "header13.h", directory: "/Volumes/Sandbox/llvm")
!96 = !{!97}
-!97 = !{!"0x1c\00\000\000\000\000\000", !94, null, !64} ; [ DW_TAG_inheritance ]
-!98 = !{!"0xd\00_itemID\0038\0064\0032\0032\001", !152, !24, !99, !"", !"", !"", i32 0} ; [ DW_TAG_member ]
-!99 = !{!"0x16\00uint64_t\0055\000\000\000\000", !153, !0, !100} ; [ DW_TAG_typedef ]
-!100 = !{!"0x24\00long long unsigned int\000\0064\0032\000\000\007", null, !0} ; [ DW_TAG_base_type ]
-!101 = !{!"0xd\00_library\0039\0032\0032\0096\001", !152, !24, !102, !"", !"", !"", i32 0} ; [ DW_TAG_member ]
-!102 = !{!"0xf\00\000\0032\0032\000\000", null, !0, !103} ; [ DW_TAG_pointer_type ]
-!103 = !{!"0x13\00MyLibrary2\0022\0032\0032\000\000\0016", !158, !40, null, !105, null, null, null} ; [ DW_TAG_structure_type ] [MyLibrary2] [line 22, size 32, align 32, offset 0] [def] [from ]
-!104 = !{!"0x29", !158} ; [ DW_TAG_file_type ]
+!97 = !DIDerivedType(tag: DW_TAG_inheritance, file: !95, baseType: !64)
+!98 = !DIDerivedType(tag: DW_TAG_member, name: "_itemID", line: 38, size: 64, align: 32, offset: 32, flags: DIFlagPrivate, file: !152, scope: !24, baseType: !99, extraData: !"")
+!99 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint64_t", line: 55, file: !153, scope: !0, baseType: !100)
+!100 = !DIBasicType(tag: DW_TAG_base_type, name: "long long unsigned int", size: 64, align: 32, encoding: DW_ATE_unsigned)
+!101 = !DIDerivedType(tag: DW_TAG_member, name: "_library", line: 39, size: 32, align: 32, offset: 96, flags: DIFlagPrivate, file: !152, scope: !24, baseType: !102, extraData: !"")
+!102 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !0, baseType: !103)
+!103 = !DICompositeType(tag: DW_TAG_structure_type, name: "MyLibrary2", line: 22, size: 32, align: 32, runtimeLang: DW_LANG_ObjC, file: !158, scope: !40, elements: !105)
+!104 = !DIFile(filename: "header14.h", directory: "/Volumes/Sandbox/llvm")
!105 = !{!106}
-!106 = !{!"0x1c\00\000\000\000\000\000", !103, null, !64} ; [ DW_TAG_inheritance ]
-!107 = !{!"0xd\00_bounds\0040\00128\0032\00128\001", !152, !24, !108, !"", !"", !"", i32 0} ; [ DW_TAG_member ]
-!108 = !{!"0x16\00CR\0033\000\000\000\000", !153, !0, !109} ; [ DW_TAG_typedef ]
-!109 = !{!"0x13\00CR\0029\00128\0032\000\000\000", !156, !0, null, !110, null, null, null} ; [ DW_TAG_structure_type ] [CR] [line 29, size 128, align 32, offset 0] [def] [from ]
+!106 = !DIDerivedType(tag: DW_TAG_inheritance, file: !104, baseType: !64)
+!107 = !DIDerivedType(tag: DW_TAG_member, name: "_bounds", line: 40, size: 128, align: 32, offset: 128, flags: DIFlagPrivate, file: !152, scope: !24, baseType: !108, extraData: !"")
+!108 = !DIDerivedType(tag: DW_TAG_typedef, name: "CR", line: 33, file: !153, scope: !0, baseType: !109)
+!109 = !DICompositeType(tag: DW_TAG_structure_type, name: "CR", line: 29, size: 128, align: 32, file: !156, scope: !0, elements: !110)
!110 = !{!111, !117}
-!111 = !{!"0xd\00origin\0030\0064\0032\000\000", !156, !77, !112} ; [ DW_TAG_member ]
-!112 = !{!"0x16\00CP\0017\000\000\000\000", !156, !0, !113} ; [ DW_TAG_typedef ]
-!113 = !{!"0x13\00CP\0013\0064\0032\000\000\000", !156, !0, null, !114, null, null, null} ; [ DW_TAG_structure_type ] [CP] [line 13, size 64, align 32, offset 0] [def] [from ]
+!111 = !DIDerivedType(tag: DW_TAG_member, name: "origin", line: 30, size: 64, align: 32, file: !156, scope: !77, baseType: !112)
+!112 = !DIDerivedType(tag: DW_TAG_typedef, name: "CP", line: 17, file: !156, scope: !0, baseType: !113)
+!113 = !DICompositeType(tag: DW_TAG_structure_type, name: "CP", line: 13, size: 64, align: 32, file: !156, scope: !0, elements: !114)
!114 = !{!115, !116}
-!115 = !{!"0xd\00x\0014\0032\0032\000\000", !156, !77, !76} ; [ DW_TAG_member ]
-!116 = !{!"0xd\00y\0015\0032\0032\0032\000", !156, !77, !76} ; [ DW_TAG_member ]
-!117 = !{!"0xd\00size\0031\0064\0032\0064\000", !156, !77, !118} ; [ DW_TAG_member ]
-!118 = !{!"0x16\00Size\0025\000\000\000\000", !156, !0, !119} ; [ DW_TAG_typedef ]
-!119 = !{!"0x13\00Size\0021\0064\0032\000\000\000", !156, !0, null, !120, null, null, null} ; [ DW_TAG_structure_type ] [Size] [line 21, size 64, align 32, offset 0] [def] [from ]
+!115 = !DIDerivedType(tag: DW_TAG_member, name: "x", line: 14, size: 32, align: 32, file: !156, scope: !77, baseType: !76)
+!116 = !DIDerivedType(tag: DW_TAG_member, name: "y", line: 15, size: 32, align: 32, offset: 32, file: !156, scope: !77, baseType: !76)
+!117 = !DIDerivedType(tag: DW_TAG_member, name: "size", line: 31, size: 64, align: 32, offset: 64, file: !156, scope: !77, baseType: !118)
+!118 = !DIDerivedType(tag: DW_TAG_typedef, name: "Size", line: 25, file: !156, scope: !0, baseType: !119)
+!119 = !DICompositeType(tag: DW_TAG_structure_type, name: "Size", line: 21, size: 64, align: 32, file: !156, scope: !0, elements: !120)
!120 = !{!121, !122}
-!121 = !{!"0xd\00width\0022\0032\0032\000\000", !156, !77, !76} ; [ DW_TAG_member ]
-!122 = !{!"0xd\00height\0023\0032\0032\0032\000", !156, !77, !76} ; [ DW_TAG_member ]
-!123 = !{!"0xd\00_data\0040\00128\0032\00256\001", !152, !24, !108, !"", !"", !"", i32 0} ; [ DW_TAG_member ]
-!124 = !{!"0xd\00semi\00609\0032\0032\00224\000", !152, !24, !125} ; [ DW_TAG_member ]
-!125 = !{!"0x16\00d_t\0035\000\000\000\000", !152, !0, !126} ; [ DW_TAG_typedef ]
-!126 = !{!"0xf\00\000\0032\0032\000\000", null, !0, !127} ; [ DW_TAG_pointer_type ]
-!127 = !{!"0x13\00my_struct\0049\000\000\000\004\000", !159, !0, null, null, null, null, null} ; [ DW_TAG_structure_type ] [my_struct] [line 49, size 0, align 0, offset 0] [decl] [from ]
-!128 = !{!"0x29", !159} ; [ DW_TAG_file_type ]
-!129 = !MDLocation(line: 609, column: 144, scope: !23)
-!130 = !{!"0x101\00loadedMydata\0033555041\000", !23, !24, !59} ; [ DW_TAG_arg_variable ]
-!131 = !MDLocation(line: 609, column: 155, scope: !23)
-!132 = !{!"0x101\00bounds\0050332257\000", !23, !24, !108} ; [ DW_TAG_arg_variable ]
-!133 = !MDLocation(line: 609, column: 175, scope: !23)
-!134 = !{!"0x101\00data\0067109473\000", !23, !24, !108} ; [ DW_TAG_arg_variable ]
-!135 = !MDLocation(line: 609, column: 190, scope: !23)
-!136 = !{!"0x100\00mydata\00604\000", !23, !24, !50} ; [ DW_TAG_auto_variable ]
-!137 = !MDLocation(line: 604, column: 49, scope: !23)
-!138 = !{!"0x100\00self\00604\000", !23, !40, !90} ; [ DW_TAG_auto_variable ]
-!139 = !{!"0x100\00semi\00607\000", !23, !24, !125} ; [ DW_TAG_auto_variable ]
-!140 = !MDLocation(line: 607, column: 30, scope: !23)
-!141 = !MDLocation(line: 610, column: 17, scope: !142)
-!142 = !{!"0xb\00609\00200\0094", !152, !23} ; [ DW_TAG_lexical_block ]
-!143 = !MDLocation(line: 611, column: 17, scope: !142)
-!144 = !MDLocation(line: 612, column: 17, scope: !142)
-!145 = !MDLocation(line: 613, column: 17, scope: !142)
-!146 = !MDLocation(line: 615, column: 13, scope: !142)
+!121 = !DIDerivedType(tag: DW_TAG_member, name: "width", line: 22, size: 32, align: 32, file: !156, scope: !77, baseType: !76)
+!122 = !DIDerivedType(tag: DW_TAG_member, name: "height", line: 23, size: 32, align: 32, offset: 32, file: !156, scope: !77, baseType: !76)
+!123 = !DIDerivedType(tag: DW_TAG_member, name: "_data", line: 40, size: 128, align: 32, offset: 256, flags: DIFlagPrivate, file: !152, scope: !24, baseType: !108, extraData: !"")
+!124 = !DIDerivedType(tag: DW_TAG_member, name: "semi", line: 609, size: 32, align: 32, offset: 224, file: !152, scope: !24, baseType: !125)
+!125 = !DIDerivedType(tag: DW_TAG_typedef, name: "d_t", line: 35, file: !152, scope: !0, baseType: !126)
+!126 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !0, baseType: !127)
+!127 = !DICompositeType(tag: DW_TAG_structure_type, name: "my_struct", line: 49, flags: DIFlagFwdDecl, file: !159, scope: !0)
+!128 = !DIFile(filename: "header15.h", directory: "/Volumes/Sandbox/llvm")
+!129 = !DILocation(line: 609, column: 144, scope: !23)
+!130 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "loadedMydata", line: 609, arg: 2, scope: !23, file: !24, type: !59)
+!131 = !DILocation(line: 609, column: 155, scope: !23)
+!132 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "bounds", line: 609, arg: 3, scope: !23, file: !24, type: !108)
+!133 = !DILocation(line: 609, column: 175, scope: !23)
+!134 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "data", line: 609, arg: 4, scope: !23, file: !24, type: !108)
+!135 = !DILocation(line: 609, column: 190, scope: !23)
+!136 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "mydata", line: 604, scope: !23, file: !24, type: !50)
+!137 = !DILocation(line: 604, column: 49, scope: !23)
+!138 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "self", line: 604, scope: !23, file: !40, type: !90)
+!139 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "semi", line: 607, scope: !23, file: !24, type: !125)
+!140 = !DILocation(line: 607, column: 30, scope: !23)
+!141 = !DILocation(line: 610, column: 17, scope: !142)
+!142 = distinct !DILexicalBlock(line: 609, column: 200, file: !152, scope: !23)
+!143 = !DILocation(line: 611, column: 17, scope: !142)
+!144 = !DILocation(line: 612, column: 17, scope: !142)
+!145 = !DILocation(line: 613, column: 17, scope: !142)
+!146 = !DILocation(line: 615, column: 13, scope: !142)
!147 = !{!1, !1, !5, !5, !9, !14, !19, !19, !14, !14, !14, !19, !19, !19}
!148 = !{!23}
-!149 = !{!"header3.h", !"/Volumes/Sandbox/llvm"}
-!150 = !{!"Private.h", !"/Volumes/Sandbox/llvm"}
-!151 = !{!"header4.h", !"/Volumes/Sandbox/llvm"}
-!152 = !{!"MyLibrary.m", !"/Volumes/Sandbox/llvm"}
-!153 = !{!"MyLibrary.i", !"/Volumes/Sandbox/llvm"}
-!154 = !{!"header11.h", !"/Volumes/Sandbox/llvm"}
-!155 = !{!"NSO.h", !"/Volumes/Sandbox/llvm"}
-!156 = !{!"header12.h", !"/Volumes/Sandbox/llvm"}
-!157 = !{!"header13.h", !"/Volumes/Sandbox/llvm"}
-!158 = !{!"header14.h", !"/Volumes/Sandbox/llvm"}
-!159 = !{!"header15.h", !"/Volumes/Sandbox/llvm"}
-!160 = !{!"header.h", !"/Volumes/Sandbox/llvm"}
+!149 = !DIFile(filename: "header3.h", directory: "/Volumes/Sandbox/llvm")
+!150 = !DIFile(filename: "Private.h", directory: "/Volumes/Sandbox/llvm")
+!151 = !DIFile(filename: "header4.h", directory: "/Volumes/Sandbox/llvm")
+!152 = !DIFile(filename: "MyLibrary.m", directory: "/Volumes/Sandbox/llvm")
+!153 = !DIFile(filename: "MyLibrary.i", directory: "/Volumes/Sandbox/llvm")
+!154 = !DIFile(filename: "header11.h", directory: "/Volumes/Sandbox/llvm")
+!155 = !DIFile(filename: "NSO.h", directory: "/Volumes/Sandbox/llvm")
+!156 = !DIFile(filename: "header12.h", directory: "/Volumes/Sandbox/llvm")
+!157 = !DIFile(filename: "header13.h", directory: "/Volumes/Sandbox/llvm")
+!158 = !DIFile(filename: "header14.h", directory: "/Volumes/Sandbox/llvm")
+!159 = !DIFile(filename: "header15.h", directory: "/Volumes/Sandbox/llvm")
+!160 = !DIFile(filename: "header.h", directory: "/Volumes/Sandbox/llvm")
!161 = !{!"header2.h", !"/Volumes/Sandbox/llvm"}
-!162 = !{i32 1, !"Debug Info Version", i32 2}
-!163 = !{!"0x102\0034\0020\006\0034\004\006\0034\0024"} ; [ DW_TAG_expression ] [DW_OP_plus 20 DW_OP_deref DW_OP_plus 4 DW_OP_deref DW_OP_plus 24]
-!164 = !{!"0x102\0034\0024"} ; [ DW_TAG_expression ] [DW_OP_plus 24]
-!165 = !{!"0x102\0034\0028"} ; [ DW_TAG_expression ] [DW_OP_plus 28]
+!162 = !{i32 1, !"Debug Info Version", i32 3}
+!163 = !DIExpression(DW_OP_plus, 20, DW_OP_deref, DW_OP_plus, 4, DW_OP_deref, DW_OP_plus, 24)
+!164 = !DIExpression(DW_OP_deref, DW_OP_plus, 24)
+!165 = !DIExpression(DW_OP_deref, DW_OP_plus, 28)
diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll
index 94756953de0a..03b4d6b38151 100644
--- a/test/CodeGen/ARM/debug-info-branch-folding.ll
+++ b/test/CodeGen/ARM/debug-info-branch-folding.ll
@@ -20,18 +20,18 @@ entry:
for.body9: ; preds = %for.body9, %entry
%add19 = fadd <4 x float> undef, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, !dbg !39
- tail call void @llvm.dbg.value(metadata <4 x float> %add19, i64 0, metadata !27, metadata !{!"0x102"}), !dbg !39
+ tail call void @llvm.dbg.value(metadata <4 x float> %add19, i64 0, metadata !27, metadata !DIExpression()), !dbg !39
%add20 = fadd <4 x float> undef, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, !dbg !39
- tail call void @llvm.dbg.value(metadata <4 x float> %add20, i64 0, metadata !28, metadata !{!"0x102"}), !dbg !39
+ tail call void @llvm.dbg.value(metadata <4 x float> %add20, i64 0, metadata !28, metadata !DIExpression()), !dbg !39
br i1 %cond, label %for.end54, label %for.body9, !dbg !44
for.end54: ; preds = %for.body9
%tmp115 = extractelement <4 x float> %add19, i32 1
%conv6.i75 = fpext float %tmp115 to double, !dbg !45
- %call.i82 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i75, double undef, double undef) nounwind, !dbg !45
+ %call.i82 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i75, double undef, double undef) nounwind, !dbg !45
%tmp116 = extractelement <4 x float> %add20, i32 1
%conv6.i76 = fpext float %tmp116 to double, !dbg !45
- %call.i83 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i76, double undef, double undef) nounwind, !dbg !45
+ %call.i83 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i76, double undef, double undef) nounwind, !dbg !45
ret i32 0, !dbg !49
}
@@ -42,60 +42,60 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.module.flags = !{!56}
!llvm.dbg.cu = !{!2}
-!0 = !{!"0x2e\00test0001\00test0001\00\003\000\001\000\006\00256\001\000", !54, null, !3, i32 0, <4 x float> (float)* @test0001, null, null, !51} ; [ DW_TAG_subprogram ]
-!1 = !{!"0x29", !54} ; [ DW_TAG_file_type ]
-!2 = !{!"0x11\0012\00clang version 3.0 (trunk 129915)\001\00\000\00\001", !54, !17, !17, !50, null, null} ; [ DW_TAG_compile_unit ]
-!3 = !{!"0x15\00\000\000\000\000\000\000", !54, !1, i32 0, !4, i32 0} ; [ DW_TAG_subroutine_type ]
+!0 = !DISubprogram(name: "test0001", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !54, scope: null, type: !3, function: <4 x float> (float)* @test0001, variables: !51)
+!1 = !DIFile(filename: "build2.c", directory: "/private/tmp")
+!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129915)", isOptimized: true, emissionKind: 1, file: !54, enums: !{}, retainedTypes: !{}, subprograms: !50, imports: null)
+!3 = !DISubroutineType(types: !4)
!4 = !{!5}
-!5 = !{!"0x16\00v4f32\0014\000\000\000\000", !54, !2, !6} ; [ DW_TAG_typedef ]
-!6 = !{!"0x1\00\000\00128\00128\000\000", !54, !2, !7, !8, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [from float]
-!7 = !{!"0x24\00float\000\0032\0032\000\000\004", null, !2} ; [ DW_TAG_base_type ]
+!5 = !DIDerivedType(tag: DW_TAG_typedef, name: "v4f32", line: 14, file: !54, scope: !2, baseType: !6)
+!6 = !DICompositeType(tag: DW_TAG_array_type, size: 128, align: 128, file: !54, scope: !2, baseType: !7, elements: !8)
+!7 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
!8 = !{!9}
-!9 = !{!"0x21\000\004"} ; [ DW_TAG_subrange_type ]
-!10 = !{!"0x2e\00main\00main\00\0059\000\001\000\006\00256\001\000", !54, null, !11, null, i32 (i32, i8**, i1)* @main, null, null, !52} ; [ DW_TAG_subprogram ] [line 59] [def] [scope 0] [main]
-!11 = !{!"0x15\00\000\000\000\000\000\000", !54, !1, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!9 = !DISubrange(count: 4)
+!10 = !DISubprogram(name: "main", line: 59, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !54, scope: null, type: !11, function: i32 (i32, i8**, i1)* @main, variables: !52)
+!11 = !DISubroutineType(types: !12)
!12 = !{!13}
-!13 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !2} ; [ DW_TAG_base_type ]
-!14 = !{!"0x2e\00printFV\00printFV\00\0041\001\001\000\006\00256\001\000", !55, null, !16, null, null, null, null, !53} ; [ DW_TAG_subprogram ] [line 41] [local] [def] [scope 0] [printFV]
-!15 = !{!"0x29", !55} ; [ DW_TAG_file_type ]
-!16 = !{!"0x15\00\000\000\000\000\000\000", !55, !15, null, !17, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!14 = !DISubprogram(name: "printFV", line: 41, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !55, scope: null, type: !16, variables: !53)
+!15 = !DIFile(filename: "/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", directory: "/private/tmp")
+!16 = !DISubroutineType(types: !17)
!17 = !{null}
-!18 = !{!"0x101\00a\0016777219\000", !0, !1, !7} ; [ DW_TAG_arg_variable ]
-!19 = !{!"0x101\00argc\0016777275\000", !10, !1, !13} ; [ DW_TAG_arg_variable ]
-!20 = !{!"0x101\00argv\0033554491\000", !10, !1, !21} ; [ DW_TAG_arg_variable ]
-!21 = !{!"0xf\00\000\0032\0032\000\000", null, !2, !22} ; [ DW_TAG_pointer_type ]
-!22 = !{!"0xf\00\000\0032\0032\000\000", null, !2, !23} ; [ DW_TAG_pointer_type ]
-!23 = !{!"0x24\00char\000\008\008\000\000\006", null, !2} ; [ DW_TAG_base_type ]
-!24 = !{!"0x100\00i\0060\000", !25, !1, !13} ; [ DW_TAG_auto_variable ]
-!25 = !{!"0xb\0059\0033\0014", !1, !10} ; [ DW_TAG_lexical_block ]
-!26 = !{!"0x100\00j\0060\000", !25, !1, !13} ; [ DW_TAG_auto_variable ]
-!27 = !{!"0x100\00x\0061\000", !25, !1, !5} ; [ DW_TAG_auto_variable ]
-!28 = !{!"0x100\00y\0062\000", !25, !1, !5} ; [ DW_TAG_auto_variable ]
-!29 = !{!"0x100\00z\0063\000", !25, !1, !5} ; [ DW_TAG_auto_variable ]
-!30 = !{!"0x101\00F\0016777257\000", !14, !15, !31} ; [ DW_TAG_arg_variable ]
-!31 = !{!"0xf\00\000\0032\0032\000\000", null, !2, !32} ; [ DW_TAG_pointer_type ]
-!32 = !{!"0x16\00FV\0025\000\000\000\000", !55, !2, !33} ; [ DW_TAG_typedef ]
-!33 = !{!"0x17\00\0022\00128\00128\000\000\000", !55, !2, i32 0, !34, null} ; [ DW_TAG_union_type ]
+!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 3, arg: 1, scope: !0, file: !1, type: !7)
+!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 59, arg: 1, scope: !10, file: !1, type: !13)
+!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 59, arg: 2, scope: !10, file: !1, type: !21)
+!21 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !22)
+!22 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !23)
+!23 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 60, scope: !25, file: !1, type: !13)
+!25 = distinct !DILexicalBlock(line: 59, column: 33, file: !1, scope: !10)
+!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 60, scope: !25, file: !1, type: !13)
+!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 61, scope: !25, file: !1, type: !5)
+!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 62, scope: !25, file: !1, type: !5)
+!29 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "z", line: 63, scope: !25, file: !1, type: !5)
+!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "F", line: 41, arg: 1, scope: !14, file: !15, type: !31)
+!31 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !32)
+!32 = !DIDerivedType(tag: DW_TAG_typedef, name: "FV", line: 25, file: !55, scope: !2, baseType: !33)
+!33 = !DICompositeType(tag: DW_TAG_union_type, line: 22, size: 128, align: 128, file: !55, scope: !2, elements: !34)
!34 = !{!35, !37}
-!35 = !{!"0xd\00V\0023\00128\00128\000\000", !55, !15, !36} ; [ DW_TAG_member ]
-!36 = !{!"0x16\00v4sf\003\000\000\000\000", !55, !2, !6} ; [ DW_TAG_typedef ]
-!37 = !{!"0xd\00A\0024\00128\0032\000\000", !55, !15, !38} ; [ DW_TAG_member ]
-!38 = !{!"0x1\00\000\00128\0032\000\000", null, !2, !7, !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
-!39 = !MDLocation(line: 79, column: 7, scope: !40)
-!40 = !{!"0xb\0075\0035\0018", !1, !41} ; [ DW_TAG_lexical_block ]
-!41 = !{!"0xb\0075\005\0017", !1, !42} ; [ DW_TAG_lexical_block ]
-!42 = !{!"0xb\0071\0032\0016", !1, !43} ; [ DW_TAG_lexical_block ]
-!43 = !{!"0xb\0071\003\0015", !1, !25} ; [ DW_TAG_lexical_block ]
-!44 = !MDLocation(line: 75, column: 5, scope: !42)
-!45 = !MDLocation(line: 42, column: 2, scope: !46, inlinedAt: !48)
-!46 = !{!"0xb\0042\002\0020", !15, !47} ; [ DW_TAG_lexical_block ]
-!47 = !{!"0xb\0041\0028\0019", !15, !14} ; [ DW_TAG_lexical_block ]
-!48 = !MDLocation(line: 95, column: 3, scope: !25)
-!49 = !MDLocation(line: 99, column: 3, scope: !25)
+!35 = !DIDerivedType(tag: DW_TAG_member, name: "V", line: 23, size: 128, align: 128, file: !55, scope: !15, baseType: !36)
+!36 = !DIDerivedType(tag: DW_TAG_typedef, name: "v4sf", line: 3, file: !55, scope: !2, baseType: !6)
+!37 = !DIDerivedType(tag: DW_TAG_member, name: "A", line: 24, size: 128, align: 32, file: !55, scope: !15, baseType: !38)
+!38 = !DICompositeType(tag: DW_TAG_array_type, size: 128, align: 32, scope: !2, baseType: !7, elements: !8)
+!39 = !DILocation(line: 79, column: 7, scope: !40)
+!40 = distinct !DILexicalBlock(line: 75, column: 35, file: !1, scope: !41)
+!41 = distinct !DILexicalBlock(line: 75, column: 5, file: !1, scope: !42)
+!42 = distinct !DILexicalBlock(line: 71, column: 32, file: !1, scope: !43)
+!43 = distinct !DILexicalBlock(line: 71, column: 3, file: !1, scope: !25)
+!44 = !DILocation(line: 75, column: 5, scope: !42)
+!45 = !DILocation(line: 42, column: 2, scope: !46, inlinedAt: !48)
+!46 = distinct !DILexicalBlock(line: 42, column: 2, file: !15, scope: !47)
+!47 = distinct !DILexicalBlock(line: 41, column: 28, file: !15, scope: !14)
+!48 = !DILocation(line: 95, column: 3, scope: !25)
+!49 = !DILocation(line: 99, column: 3, scope: !25)
!50 = !{!0, !10, !14}
!51 = !{!18}
!52 = !{!19, !20, !24, !26, !27, !28, !29}
!53 = !{!30}
-!54 = !{!"build2.c", !"/private/tmp"}
-!55 = !{!"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", !"/private/tmp"}
-!56 = !{i32 1, !"Debug Info Version", i32 2}
+!54 = !DIFile(filename: "build2.c", directory: "/private/tmp")
+!55 = !DIFile(filename: "/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", directory: "/private/tmp")
+!56 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll
index 85b510f175ee..27bd3b8639c4 100644
--- a/test/CodeGen/ARM/debug-info-d16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-d16-reg.ll
@@ -12,21 +12,21 @@ target triple = "thumbv7-apple-darwin10"
define i32 @inlineprinter(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize {
entry:
- tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !19, metadata !{!"0x102"}), !dbg !26
- tail call void @llvm.dbg.value(metadata double %val, i64 0, metadata !20, metadata !{!"0x102"}), !dbg !26
- tail call void @llvm.dbg.value(metadata i8 %c, i64 0, metadata !21, metadata !{!"0x102"}), !dbg !26
+ tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !19, metadata !DIExpression()), !dbg !26
+ tail call void @llvm.dbg.value(metadata double %val, i64 0, metadata !20, metadata !DIExpression()), !dbg !26
+ tail call void @llvm.dbg.value(metadata i8 %c, i64 0, metadata !21, metadata !DIExpression()), !dbg !26
%0 = zext i8 %c to i32, !dbg !27
- %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !27
+ %1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !27
ret i32 0, !dbg !29
}
define i32 @printer(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize noinline {
entry:
- tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !16, metadata !{!"0x102"}), !dbg !30
- tail call void @llvm.dbg.value(metadata double %val, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !30
- tail call void @llvm.dbg.value(metadata i8 %c, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !30
+ tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !16, metadata !DIExpression()), !dbg !30
+ tail call void @llvm.dbg.value(metadata double %val, i64 0, metadata !17, metadata !DIExpression()), !dbg !30
+ tail call void @llvm.dbg.value(metadata i8 %c, i64 0, metadata !18, metadata !DIExpression()), !dbg !30
%0 = zext i8 %c to i32, !dbg !31
- %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !31
+ %1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !31
ret i32 0, !dbg !33
}
@@ -36,20 +36,20 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize {
entry:
- tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !22, metadata !{!"0x102"}), !dbg !34
- tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !23, metadata !{!"0x102"}), !dbg !34
+ tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !22, metadata !DIExpression()), !dbg !34
+ tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !23, metadata !DIExpression()), !dbg !34
%0 = sitofp i32 %argc to double, !dbg !35
%1 = fadd double %0, 5.555552e+05, !dbg !35
- tail call void @llvm.dbg.value(metadata double %1, i64 0, metadata !24, metadata !{!"0x102"}), !dbg !35
- %2 = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0)) nounwind, !dbg !36
- %3 = getelementptr inbounds i8* bitcast (i32 (i32, i8**)* @main to i8*), i32 %argc, !dbg !37
+ tail call void @llvm.dbg.value(metadata double %1, i64 0, metadata !24, metadata !DIExpression()), !dbg !35
+ %2 = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str1, i32 0, i32 0)) nounwind, !dbg !36
+ %3 = getelementptr inbounds i8, i8* bitcast (i32 (i32, i8**)* @main to i8*), i32 %argc, !dbg !37
%4 = trunc i32 %argc to i8, !dbg !37
%5 = add i8 %4, 97, !dbg !37
- tail call void @llvm.dbg.value(metadata i8* %3, i64 0, metadata !19, metadata !{!"0x102"}) nounwind, !dbg !38
- tail call void @llvm.dbg.value(metadata double %1, i64 0, metadata !20, metadata !{!"0x102"}) nounwind, !dbg !38
- tail call void @llvm.dbg.value(metadata i8 %5, i64 0, metadata !21, metadata !{!"0x102"}) nounwind, !dbg !38
+ tail call void @llvm.dbg.value(metadata i8* %3, i64 0, metadata !49, metadata !DIExpression()) nounwind, !dbg !38
+ tail call void @llvm.dbg.value(metadata double %1, i64 0, metadata !50, metadata !DIExpression()) nounwind, !dbg !38
+ tail call void @llvm.dbg.value(metadata i8 %5, i64 0, metadata !51, metadata !DIExpression()) nounwind, !dbg !38
%6 = zext i8 %5 to i32, !dbg !39
- %7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %3, double %1, i32 %6) nounwind, !dbg !39
+ %7 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %3, double %1, i32 %6) nounwind, !dbg !39
%8 = tail call i32 @printer(i8* %3, double %1, i8 zeroext %5) nounwind, !dbg !40
ret i32 0, !dbg !41
}
@@ -59,52 +59,57 @@ declare i32 @puts(i8* nocapture) nounwind
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!48}
-!0 = !{!"0x2e\00printer\00printer\00printer\0012\000\001\000\006\00256\001\0012", !46, !1, !3, null, i32 (i8*, double, i8)* @printer, null, null, !43} ; [ DW_TAG_subprogram ]
-!1 = !{!"0x29", !46} ; [ DW_TAG_file_type ]
-!2 = !{!"0x11\001\00(LLVM build 00)\001\00\000\00\001", !46, !47, !47, !42, null, null} ; [ DW_TAG_compile_unit ]
-!3 = !{!"0x15\00\000\000\000\000\000\000", !46, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = !DISubprogram(name: "printer", linkageName: "printer", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !46, scope: !1, type: !3, function: i32 (i8*, double, i8)* @printer, variables: !43)
+!1 = !DIFile(filename: "a.c", directory: "/tmp/")
+!2 = !DICompileUnit(language: DW_LANG_C89, producer: "(LLVM build 00)", isOptimized: true, emissionKind: 1, file: !46, enums: !47, retainedTypes: !47, subprograms: !42, imports: null)
+!3 = !DISubroutineType(types: !4)
!4 = !{!5, !6, !7, !8}
-!5 = !{!"0x24\00int\000\0032\0032\000\000\005", !46, !1} ; [ DW_TAG_base_type ]
-!6 = !{!"0xf\00\000\0032\0032\000\000", !46, !1, null} ; [ DW_TAG_pointer_type ]
-!7 = !{!"0x24\00double\000\0064\0032\000\000\004", !46, !1} ; [ DW_TAG_base_type ]
-!8 = !{!"0x24\00unsigned char\000\008\008\000\000\008", !46, !1} ; [ DW_TAG_base_type ]
-!9 = !{!"0x2e\00inlineprinter\00inlineprinter\00inlineprinter\005\000\001\000\006\00256\001\005", !46, !1, !3, null, i32 (i8*, double, i8)* @inlineprinter, null, null, !44} ; [ DW_TAG_subprogram ]
-!10 = !{!"0x2e\00main\00main\00main\0018\000\001\000\006\00256\001\0018", !46, !1, !11, null, i32 (i32, i8**)* @main, null, null, !45} ; [ DW_TAG_subprogram ]
-!11 = !{!"0x15\00\000\000\000\000\000\000", !46, !1, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !46, scope: !1, baseType: null)
+!7 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 32, encoding: DW_ATE_float)
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
+!9 = !DISubprogram(name: "inlineprinter", linkageName: "inlineprinter", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !46, scope: !1, type: !3, function: i32 (i8*, double, i8)* @inlineprinter, variables: !44)
+!10 = !DISubprogram(name: "main", linkageName: "main", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !46, scope: !1, type: !11, function: i32 (i32, i8**)* @main, variables: !45)
+!11 = !DISubroutineType(types: !12)
!12 = !{!5, !5, !13}
-!13 = !{!"0xf\00\000\0032\0032\000\000", !46, !1, !14} ; [ DW_TAG_pointer_type ]
-!14 = !{!"0xf\00\000\0032\0032\000\000", !46, !1, !15} ; [ DW_TAG_pointer_type ]
-!15 = !{!"0x24\00char\000\008\008\000\000\006", !46, !1} ; [ DW_TAG_base_type ]
-!16 = !{!"0x101\00ptr\0011\000", !0, !1, !6} ; [ DW_TAG_arg_variable ]
-!17 = !{!"0x101\00val\0011\000", !0, !1, !7} ; [ DW_TAG_arg_variable ]
-!18 = !{!"0x101\00c\0011\000", !0, !1, !8} ; [ DW_TAG_arg_variable ]
-!19 = !{!"0x101\00ptr\004\000", !9, !1, !6} ; [ DW_TAG_arg_variable ]
-!20 = !{!"0x101\00val\004\000", !9, !1, !7} ; [ DW_TAG_arg_variable ]
-!21 = !{!"0x101\00c\004\000", !9, !1, !8} ; [ DW_TAG_arg_variable ]
-!22 = !{!"0x101\00argc\0017\000", !10, !1, !5} ; [ DW_TAG_arg_variable ]
-!23 = !{!"0x101\00argv\0017\000", !10, !1, !13} ; [ DW_TAG_arg_variable ]
-!24 = !{!"0x100\00dval\0019\000", !25, !1, !7} ; [ DW_TAG_auto_variable ]
-!25 = !{!"0xb\0018\000\002", !46, !10} ; [ DW_TAG_lexical_block ]
-!26 = !MDLocation(line: 4, scope: !9)
-!27 = !MDLocation(line: 6, scope: !28)
-!28 = !{!"0xb\005\000\001", !46, !9} ; [ DW_TAG_lexical_block ]
-!29 = !MDLocation(line: 7, scope: !28)
-!30 = !MDLocation(line: 11, scope: !0)
-!31 = !MDLocation(line: 13, scope: !32)
-!32 = !{!"0xb\0012\000\000", !46, !0} ; [ DW_TAG_lexical_block ]
-!33 = !MDLocation(line: 14, scope: !32)
-!34 = !MDLocation(line: 17, scope: !10)
-!35 = !MDLocation(line: 19, scope: !25)
-!36 = !MDLocation(line: 20, scope: !25)
-!37 = !MDLocation(line: 21, scope: !25)
-!38 = !MDLocation(line: 4, scope: !9, inlinedAt: !37)
-!39 = !MDLocation(line: 6, scope: !28, inlinedAt: !37)
-!40 = !MDLocation(line: 22, scope: !25)
-!41 = !MDLocation(line: 23, scope: !25)
+!13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !46, scope: !1, baseType: !14)
+!14 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !46, scope: !1, baseType: !15)
+!15 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 11, arg: 1, scope: !0, file: !1, type: !6)
+!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 11, arg: 2, scope: !0, file: !1, type: !7)
+!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 11, arg: 3, scope: !0, file: !1, type: !8)
+!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 4, arg: 1, scope: !9, file: !1, type: !6)
+!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 4, arg: 2, scope: !9, file: !1, type: !7)
+!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 4, arg: 3, scope: !9, file: !1, type: !8)
+
+!49 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 4, arg: 1, scope: !9, file: !1, type: !6)
+!50 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 4, arg: 2, scope: !9, file: !1, type: !7)
+!51 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 4, arg: 2, scope: !9, file: !1, type: !8)
+
+!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 17, arg: 0, scope: !10, file: !1, type: !5)
+!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 17, arg: 0, scope: !10, file: !1, type: !13)
+!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "dval", line: 19, scope: !25, file: !1, type: !7)
+!25 = distinct !DILexicalBlock(line: 18, column: 0, file: !46, scope: !10)
+!26 = !DILocation(line: 4, scope: !9)
+!27 = !DILocation(line: 6, scope: !28)
+!28 = distinct !DILexicalBlock(line: 5, column: 0, file: !46, scope: !9)
+!29 = !DILocation(line: 7, scope: !28)
+!30 = !DILocation(line: 11, scope: !0)
+!31 = !DILocation(line: 13, scope: !32)
+!32 = distinct !DILexicalBlock(line: 12, column: 0, file: !46, scope: !0)
+!33 = !DILocation(line: 14, scope: !32)
+!34 = !DILocation(line: 17, scope: !10)
+!35 = !DILocation(line: 19, scope: !25)
+!36 = !DILocation(line: 20, scope: !25)
+!37 = !DILocation(line: 21, scope: !25)
+!38 = !DILocation(line: 4, scope: !9, inlinedAt: !37)
+!39 = !DILocation(line: 6, scope: !28, inlinedAt: !37)
+!40 = !DILocation(line: 22, scope: !25)
+!41 = !DILocation(line: 23, scope: !25)
!42 = !{!0, !9, !10}
!43 = !{!16, !17, !18}
!44 = !{!19, !20, !21}
!45 = !{!22, !23, !24}
-!46 = !{!"a.c", !"/tmp/"}
-!47 = !{i32 0}
-!48 = !{i32 1, !"Debug Info Version", i32 2}
+!46 = !DIFile(filename: "a.c", directory: "/tmp/")
+!47 = !{}
+!48 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/ARM/debug-info-no-frame.ll b/test/CodeGen/ARM/debug-info-no-frame.ll
new file mode 100644
index 000000000000..e00563cc47c4
--- /dev/null
+++ b/test/CodeGen/ARM/debug-info-no-frame.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=armv7-none-linux-gnueabihf < %s -o - | FileCheck %s
+
+; Function Attrs: nounwind
+define void @need_cfi_def_cfa_offset() #0 {
+; CHECK-LABEL: need_cfi_def_cfa_offset:
+; CHECK: sub sp, sp, #4
+; CHECK: .cfi_def_cfa_offset 4
+entry:
+ %Depth = alloca i32, align 4
+ call void @llvm.dbg.declare(metadata i32* %Depth, metadata !9, metadata !10), !dbg !11
+ store i32 2, i32* %Depth, align 4, !dbg !11
+ ret void, !dbg !12
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+
+!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false)
+!1 = !DIFile(filename: "file.c", directory: "/dir")
+!2 = !{}
+!3 = !DISubprogram(name: "need_cfi_def_cfa_offset", scope: !1, file: !1, line: 1, type: !4, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, function: void ()* @need_cfi_def_cfa_offset, variables: !2)
+!4 = !DISubroutineType(types: !5)
+!5 = !{null}
+!6 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "Depth", scope: !3, file: !1, line: 3, type: !6)
+!10 = !DIExpression()
+!11 = !DILocation(line: 3, column: 9, scope: !3)
+!12 = !DILocation(line: 7, column: 5, scope: !3)
diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll
index c05df6ab3d51..665818fc0b2e 100644
--- a/test/CodeGen/ARM/debug-info-qreg.ll
+++ b/test/CodeGen/ARM/debug-info-qreg.ll
@@ -3,13 +3,13 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
target triple = "thumbv7-apple-macosx10.6.7"
;CHECK: sub-register DW_OP_regx
-;CHECK-NEXT: ascii
+;CHECK-NEXT: 256
;CHECK-NEXT: DW_OP_piece
-;CHECK-NEXT: byte 8
+;CHECK-NEXT: 8
;CHECK-NEXT: sub-register DW_OP_regx
-;CHECK-NEXT: ascii
+;CHECK-NEXT: 257
;CHECK-NEXT: DW_OP_piece
-;CHECK-NEXT: byte 8
+;CHECK-NEXT: 8
@.str = external constant [13 x i8]
@@ -24,10 +24,10 @@ for.body9: ; preds = %for.body9, %entry
br i1 undef, label %for.end54, label %for.body9, !dbg !44
for.end54: ; preds = %for.body9
- tail call void @llvm.dbg.value(metadata <4 x float> %add19, i64 0, metadata !27, metadata !{!"0x102"}), !dbg !39
+ tail call void @llvm.dbg.value(metadata <4 x float> %add19, i64 0, metadata !27, metadata !DIExpression()), !dbg !39
%tmp115 = extractelement <4 x float> %add19, i32 1
%conv6.i75 = fpext float %tmp115 to double, !dbg !45
- %call.i82 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i75, double undef, double undef) nounwind, !dbg !45
+ %call.i82 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i75, double undef, double undef) nounwind, !dbg !45
ret i32 0, !dbg !49
}
@@ -38,60 +38,60 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!56}
-!0 = !{!"0x2e\00test0001\00test0001\00\003\000\001\000\006\00256\001\003", !54, !1, !3, null, <4 x float> (float)* @test0001, null, null, !51} ; [ DW_TAG_subprogram ] [line 3] [def] [test0001]
-!1 = !{!"0x29", !54} ; [ DW_TAG_file_type ]
-!2 = !{!"0x11\0012\00clang version 3.0 (trunk 129915)\001\00\000\00\001", !54, !17, !17, !50, null, null} ; [ DW_TAG_compile_unit ]
-!3 = !{!"0x15\00\000\000\000\000\000\000", !54, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = !DISubprogram(name: "test0001", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !54, scope: !1, type: !3, function: <4 x float> (float)* @test0001, variables: !51)
+!1 = !DIFile(filename: "build2.c", directory: "/private/tmp")
+!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129915)", isOptimized: true, emissionKind: 1, file: !54, enums: !{}, retainedTypes: !{}, subprograms: !50, imports: null)
+!3 = !DISubroutineType(types: !4)
!4 = !{!5}
-!5 = !{!"0x16\00v4f32\0014\000\000\000\000", !54, !2, !6} ; [ DW_TAG_typedef ]
-!6 = !{!"0x1\00\000\00128\00128\000\000", !2, null, !7, !8, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [from float]
-!7 = !{!"0x24\00float\000\0032\0032\000\000\004", null, !2} ; [ DW_TAG_base_type ]
+!5 = !DIDerivedType(tag: DW_TAG_typedef, name: "v4f32", line: 14, file: !54, scope: !2, baseType: !6)
+!6 = !DICompositeType(tag: DW_TAG_array_type, size: 128, align: 128, file: !1, baseType: !7, elements: !8)
+!7 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
!8 = !{!9}
-!9 = !{!"0x21\000\004"} ; [ DW_TAG_subrange_type ]
-!10 = !{!"0x2e\00main\00main\00\0059\000\001\000\006\00256\001\0059", !54, !1, !11, null, i32 (i32, i8**)* @main, null, null, !52} ; [ DW_TAG_subprogram ] [line 59] [def] [main]
-!11 = !{!"0x15\00\000\000\000\000\000\000", !54, !1, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!9 = !DISubrange(count: 4)
+!10 = !DISubprogram(name: "main", line: 59, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 59, file: !54, scope: !1, type: !11, function: i32 (i32, i8**)* @main, variables: !52)
+!11 = !DISubroutineType(types: !12)
!12 = !{!13}
-!13 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !2} ; [ DW_TAG_base_type ]
-!14 = !{!"0x2e\00printFV\00printFV\00\0041\001\001\000\006\00256\001\0041", !55, !15, !16, null, null, null, null, !53} ; [ DW_TAG_subprogram ] [line 41] [local] [def] [printFV]
-!15 = !{!"0x29", !55} ; [ DW_TAG_file_type ]
-!16 = !{!"0x15\00\000\000\000\000\000\000", !55, !15, null, !17, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!14 = !DISubprogram(name: "printFV", line: 41, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 41, file: !55, scope: !15, type: !16, variables: !53)
+!15 = !DIFile(filename: "/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", directory: "/private/tmp")
+!16 = !DISubroutineType(types: !17)
!17 = !{null}
-!18 = !{!"0x101\00a\0016777219\000", !0, !1, !7} ; [ DW_TAG_arg_variable ]
-!19 = !{!"0x101\00argc\0016777275\000", !10, !1, !13} ; [ DW_TAG_arg_variable ]
-!20 = !{!"0x101\00argv\0033554491\000", !10, !1, !21} ; [ DW_TAG_arg_variable ]
-!21 = !{!"0xf\00\000\0032\0032\000\000", null, !2, !22} ; [ DW_TAG_pointer_type ]
-!22 = !{!"0xf\00\000\0032\0032\000\000", null, !2, !23} ; [ DW_TAG_pointer_type ]
-!23 = !{!"0x24\00char\000\008\008\000\000\006", null, !2} ; [ DW_TAG_base_type ]
-!24 = !{!"0x100\00i\0060\000", !25, !1, !13} ; [ DW_TAG_auto_variable ]
-!25 = !{!"0xb\0059\0033\0014", !54, !10} ; [ DW_TAG_lexical_block ]
-!26 = !{!"0x100\00j\0060\000", !25, !1, !13} ; [ DW_TAG_auto_variable ]
-!27 = !{!"0x100\00x\0061\000", !25, !1, !5} ; [ DW_TAG_auto_variable ]
-!28 = !{!"0x100\00y\0062\000", !25, !1, !5} ; [ DW_TAG_auto_variable ]
-!29 = !{!"0x100\00z\0063\000", !25, !1, !5} ; [ DW_TAG_auto_variable ]
-!30 = !{!"0x101\00F\0016777257\000", !14, !15, !31} ; [ DW_TAG_arg_variable ]
-!31 = !{!"0xf\00\000\0032\0032\000\000", null, !2, !32} ; [ DW_TAG_pointer_type ]
-!32 = !{!"0x16\00FV\0025\000\000\000\000", !55, !2, !33} ; [ DW_TAG_typedef ]
-!33 = !{!"0x17\00\0022\00128\00128\000\000\000", !55, !2, i32 0, !34, null} ; [ DW_TAG_union_type ]
+!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 3, arg: 1, scope: !0, file: !1, type: !7)
+!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 59, arg: 1, scope: !10, file: !1, type: !13)
+!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 59, arg: 2, scope: !10, file: !1, type: !21)
+!21 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !22)
+!22 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !23)
+!23 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 60, scope: !25, file: !1, type: !13)
+!25 = distinct !DILexicalBlock(line: 59, column: 33, file: !54, scope: !10)
+!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 60, scope: !25, file: !1, type: !13)
+!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 61, scope: !25, file: !1, type: !5)
+!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 62, scope: !25, file: !1, type: !5)
+!29 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "z", line: 63, scope: !25, file: !1, type: !5)
+!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "F", line: 41, arg: 1, scope: !14, file: !15, type: !31)
+!31 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !32)
+!32 = !DIDerivedType(tag: DW_TAG_typedef, name: "FV", line: 25, file: !55, scope: !2, baseType: !33)
+!33 = !DICompositeType(tag: DW_TAG_union_type, line: 22, size: 128, align: 128, file: !55, scope: !2, elements: !34)
!34 = !{!35, !37}
-!35 = !{!"0xd\00V\0023\00128\00128\000\000", !55, !15, !36} ; [ DW_TAG_member ]
-!36 = !{!"0x16\00v4sf\003\000\000\000\000", !55, !2, !6} ; [ DW_TAG_typedef ]
-!37 = !{!"0xd\00A\0024\00128\0032\000\000", !55, !15, !38} ; [ DW_TAG_member ]
-!38 = !{!"0x1\00\000\00128\0032\000\000", null, !2, !7, !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
-!39 = !MDLocation(line: 79, column: 7, scope: !40)
-!40 = !{!"0xb\0075\0035\0018", !54, !41} ; [ DW_TAG_lexical_block ]
-!41 = !{!"0xb\0075\005\0017", !54, !42} ; [ DW_TAG_lexical_block ]
-!42 = !{!"0xb\0071\0032\0016", !54, !43} ; [ DW_TAG_lexical_block ]
-!43 = !{!"0xb\0071\003\0015", !54, !25} ; [ DW_TAG_lexical_block ]
-!44 = !MDLocation(line: 75, column: 5, scope: !42)
-!45 = !MDLocation(line: 42, column: 2, scope: !46, inlinedAt: !48)
-!46 = !{!"0xb\0042\002\0020", !55, !47} ; [ DW_TAG_lexical_block ]
-!47 = !{!"0xb\0041\0028\0019", !55, !14} ; [ DW_TAG_lexical_block ]
-!48 = !MDLocation(line: 95, column: 3, scope: !25)
-!49 = !MDLocation(line: 99, column: 3, scope: !25)
+!35 = !DIDerivedType(tag: DW_TAG_member, name: "V", line: 23, size: 128, align: 128, file: !55, scope: !15, baseType: !36)
+!36 = !DIDerivedType(tag: DW_TAG_typedef, name: "v4sf", line: 3, file: !55, scope: !2, baseType: !6)
+!37 = !DIDerivedType(tag: DW_TAG_member, name: "A", line: 24, size: 128, align: 32, file: !55, scope: !15, baseType: !38)
+!38 = !DICompositeType(tag: DW_TAG_array_type, size: 128, align: 32, scope: !2, baseType: !7, elements: !8)
+!39 = !DILocation(line: 79, column: 7, scope: !40)
+!40 = distinct !DILexicalBlock(line: 75, column: 35, file: !54, scope: !41)
+!41 = distinct !DILexicalBlock(line: 75, column: 5, file: !54, scope: !42)
+!42 = distinct !DILexicalBlock(line: 71, column: 32, file: !54, scope: !43)
+!43 = distinct !DILexicalBlock(line: 71, column: 3, file: !54, scope: !25)
+!44 = !DILocation(line: 75, column: 5, scope: !42)
+!45 = !DILocation(line: 42, column: 2, scope: !46, inlinedAt: !48)
+!46 = distinct !DILexicalBlock(line: 42, column: 2, file: !55, scope: !47)
+!47 = distinct !DILexicalBlock(line: 41, column: 28, file: !55, scope: !14)
+!48 = !DILocation(line: 95, column: 3, scope: !25)
+!49 = !DILocation(line: 99, column: 3, scope: !25)
!50 = !{!0, !10, !14}
!51 = !{!18}
!52 = !{!19, !20, !24, !26, !27, !28, !29}
!53 = !{!30}
-!54 = !{!"build2.c", !"/private/tmp"}
-!55 = !{!"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", !"/private/tmp"}
-!56 = !{i32 1, !"Debug Info Version", i32 2}
+!54 = !DIFile(filename: "build2.c", directory: "/private/tmp")
+!55 = !DIFile(filename: "/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", directory: "/private/tmp")
+!56 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll
index 9b303dde2e35..ec080f20db9c 100644
--- a/test/CodeGen/ARM/debug-info-s16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -2,7 +2,7 @@
; Radar 9309221
; Test dwarf reg no for s16
;CHECK: super-register DW_OP_regx
-;CHECK-NEXT: ascii
+;CHECK-NEXT: 264
;CHECK-NEXT: DW_OP_piece
;CHECK-NEXT: 4
@@ -14,12 +14,12 @@ target triple = "thumbv7-apple-macosx10.6.7"
define i32 @inlineprinter(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize ssp {
entry:
- tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !8, metadata !{!"0x102"}), !dbg !24
- tail call void @llvm.dbg.value(metadata float %val, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !25
- tail call void @llvm.dbg.value(metadata i8 %c, i64 0, metadata !12, metadata !{!"0x102"}), !dbg !26
+ tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !8, metadata !DIExpression()), !dbg !24
+ tail call void @llvm.dbg.value(metadata float %val, i64 0, metadata !10, metadata !DIExpression()), !dbg !25
+ tail call void @llvm.dbg.value(metadata i8 %c, i64 0, metadata !12, metadata !DIExpression()), !dbg !26
%conv = fpext float %val to double, !dbg !27
%conv3 = zext i8 %c to i32, !dbg !27
- %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !27
+ %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !27
ret i32 0, !dbg !29
}
@@ -27,33 +27,33 @@ declare i32 @printf(i8* nocapture, ...) nounwind optsize
define i32 @printer(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize noinline ssp {
entry:
- tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !30
- tail call void @llvm.dbg.value(metadata float %val, i64 0, metadata !15, metadata !{!"0x102"}), !dbg !31
- tail call void @llvm.dbg.value(metadata i8 %c, i64 0, metadata !16, metadata !{!"0x102"}), !dbg !32
+ tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !14, metadata !DIExpression()), !dbg !30
+ tail call void @llvm.dbg.value(metadata float %val, i64 0, metadata !15, metadata !DIExpression()), !dbg !31
+ tail call void @llvm.dbg.value(metadata i8 %c, i64 0, metadata !16, metadata !DIExpression()), !dbg !32
%conv = fpext float %val to double, !dbg !33
%conv3 = zext i8 %c to i32, !dbg !33
- %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !33
+ %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !33
ret i32 0, !dbg !35
}
define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize ssp {
entry:
- tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !36
- tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !37
+ tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !17, metadata !DIExpression()), !dbg !36
+ tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !18, metadata !DIExpression()), !dbg !37
%conv = sitofp i32 %argc to double, !dbg !38
%add = fadd double %conv, 5.555552e+05, !dbg !38
%conv1 = fptrunc double %add to float, !dbg !38
- tail call void @llvm.dbg.value(metadata float %conv1, i64 0, metadata !22, metadata !{!"0x102"}), !dbg !38
- %call = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0)) nounwind optsize, !dbg !39
- %add.ptr = getelementptr i8* bitcast (i32 (i32, i8**)* @main to i8*), i32 %argc, !dbg !40
+ tail call void @llvm.dbg.value(metadata float %conv1, i64 0, metadata !22, metadata !DIExpression()), !dbg !38
+ %call = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str1, i32 0, i32 0)) nounwind optsize, !dbg !39
+ %add.ptr = getelementptr i8, i8* bitcast (i32 (i32, i8**)* @main to i8*), i32 %argc, !dbg !40
%add5 = add nsw i32 %argc, 97, !dbg !40
%conv6 = trunc i32 %add5 to i8, !dbg !40
- tail call void @llvm.dbg.value(metadata i8* %add.ptr, i64 0, metadata !8, metadata !{!"0x102"}) nounwind, !dbg !41
- tail call void @llvm.dbg.value(metadata float %conv1, i64 0, metadata !10, metadata !{!"0x102"}) nounwind, !dbg !42
- tail call void @llvm.dbg.value(metadata i8 %conv6, i64 0, metadata !12, metadata !{!"0x102"}) nounwind, !dbg !43
+ tail call void @llvm.dbg.value(metadata i8* %add.ptr, i64 0, metadata !58, metadata !DIExpression()) nounwind, !dbg !41
+ tail call void @llvm.dbg.value(metadata float %conv1, i64 0, metadata !60, metadata !DIExpression()) nounwind, !dbg !42
+ tail call void @llvm.dbg.value(metadata i8 %conv6, i64 0, metadata !62, metadata !DIExpression()) nounwind, !dbg !43
%conv.i = fpext float %conv1 to double, !dbg !44
%conv3.i = and i32 %add5, 255, !dbg !44
- %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %add.ptr, double %conv.i, i32 %conv3.i) nounwind optsize, !dbg !44
+ %call.i = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i8* %add.ptr, double %conv.i, i32 %conv3.i) nounwind optsize, !dbg !44
%call14 = tail call i32 @printer(i8* %add.ptr, float %conv1, i8 zeroext %conv6) optsize, !dbg !45
ret i32 0, !dbg !46
}
@@ -65,57 +65,62 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!53}
-!0 = !{!"0x2e\00inlineprinter\00inlineprinter\00\005\000\001\000\006\00256\001\005", !51, !1, !3, null, i32 (i8*, float, i8)* @inlineprinter, null, null, !48} ; [ DW_TAG_subprogram ] [line 5] [def] [inlineprinter]
-!1 = !{!"0x29", !51} ; [ DW_TAG_file_type ]
-!2 = !{!"0x11\0012\00clang version 3.0 (trunk 129915)\001\00\000\00\001", !51, !52, !52, !47, null, null} ; [ DW_TAG_compile_unit ]
-!3 = !{!"0x15\00\000\000\000\000\000\000", !51, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = !DISubprogram(name: "inlineprinter", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !51, scope: !1, type: !3, function: i32 (i8*, float, i8)* @inlineprinter, variables: !48)
+!1 = !DIFile(filename: "a.c", directory: "/private/tmp")
+!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129915)", isOptimized: true, emissionKind: 1, file: !51, enums: !52, retainedTypes: !52, subprograms: !47, imports: null)
+!3 = !DISubroutineType(types: !4)
!4 = !{!5}
-!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !2} ; [ DW_TAG_base_type ]
-!6 = !{!"0x2e\00printer\00printer\00\0012\000\001\000\006\00256\001\0012", !51, !1, !3, null, i32 (i8*, float, i8)* @printer, null, null, !49} ; [ DW_TAG_subprogram ] [line 12] [def] [printer]
-!7 = !{!"0x2e\00main\00main\00\0018\000\001\000\006\00256\001\0018", !51, !1, !3, null, i32 (i32, i8**)* @main, null, null, !50} ; [ DW_TAG_subprogram ] [line 18] [def] [main]
-!8 = !{!"0x101\00ptr\0016777220\000", !0, !1, !9} ; [ DW_TAG_arg_variable ]
-!9 = !{!"0xf\00\000\0032\0032\000\000", null, !2, null} ; [ DW_TAG_pointer_type ]
-!10 = !{!"0x101\00val\0033554436\000", !0, !1, !11} ; [ DW_TAG_arg_variable ]
-!11 = !{!"0x24\00float\000\0032\0032\000\000\004", null, !2} ; [ DW_TAG_base_type ]
-!12 = !{!"0x101\00c\0050331652\000", !0, !1, !13} ; [ DW_TAG_arg_variable ]
-!13 = !{!"0x24\00unsigned char\000\008\008\000\000\008", null, !2} ; [ DW_TAG_base_type ]
-!14 = !{!"0x101\00ptr\0016777227\000", !6, !1, !9} ; [ DW_TAG_arg_variable ]
-!15 = !{!"0x101\00val\0033554443\000", !6, !1, !11} ; [ DW_TAG_arg_variable ]
-!16 = !{!"0x101\00c\0050331659\000", !6, !1, !13} ; [ DW_TAG_arg_variable ]
-!17 = !{!"0x101\00argc\0016777233\000", !7, !1, !5} ; [ DW_TAG_arg_variable ]
-!18 = !{!"0x101\00argv\0033554449\000", !7, !1, !19} ; [ DW_TAG_arg_variable ]
-!19 = !{!"0xf\00\000\0032\0032\000\000", null, !2, !20} ; [ DW_TAG_pointer_type ]
-!20 = !{!"0xf\00\000\0032\0032\000\000", null, !2, !21} ; [ DW_TAG_pointer_type ]
-!21 = !{!"0x24\00char\000\008\008\000\000\006", null, !2} ; [ DW_TAG_base_type ]
-!22 = !{!"0x100\00dval\0019\000", !23, !1, !11} ; [ DW_TAG_auto_variable ]
-!23 = !{!"0xb\0018\001\002", !51, !7} ; [ DW_TAG_lexical_block ]
-!24 = !MDLocation(line: 4, column: 22, scope: !0)
-!25 = !MDLocation(line: 4, column: 33, scope: !0)
-!26 = !MDLocation(line: 4, column: 52, scope: !0)
-!27 = !MDLocation(line: 6, column: 3, scope: !28)
-!28 = !{!"0xb\005\001\000", !51, !0} ; [ DW_TAG_lexical_block ]
-!29 = !MDLocation(line: 7, column: 3, scope: !28)
-!30 = !MDLocation(line: 11, column: 42, scope: !6)
-!31 = !MDLocation(line: 11, column: 53, scope: !6)
-!32 = !MDLocation(line: 11, column: 72, scope: !6)
-!33 = !MDLocation(line: 13, column: 3, scope: !34)
-!34 = !{!"0xb\0012\001\001", !51, !6} ; [ DW_TAG_lexical_block ]
-!35 = !MDLocation(line: 14, column: 3, scope: !34)
-!36 = !MDLocation(line: 17, column: 15, scope: !7)
-!37 = !MDLocation(line: 17, column: 28, scope: !7)
-!38 = !MDLocation(line: 19, column: 31, scope: !23)
-!39 = !MDLocation(line: 20, column: 3, scope: !23)
-!40 = !MDLocation(line: 21, column: 3, scope: !23)
-!41 = !MDLocation(line: 4, column: 22, scope: !0, inlinedAt: !40)
-!42 = !MDLocation(line: 4, column: 33, scope: !0, inlinedAt: !40)
-!43 = !MDLocation(line: 4, column: 52, scope: !0, inlinedAt: !40)
-!44 = !MDLocation(line: 6, column: 3, scope: !28, inlinedAt: !40)
-!45 = !MDLocation(line: 22, column: 3, scope: !23)
-!46 = !MDLocation(line: 23, column: 1, scope: !23)
+!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!6 = !DISubprogram(name: "printer", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !51, scope: !1, type: !3, function: i32 (i8*, float, i8)* @printer, variables: !49)
+!7 = !DISubprogram(name: "main", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !51, scope: !1, type: !3, function: i32 (i32, i8**)* @main, variables: !50)
+!8 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 4, arg: 1, scope: !0, file: !1, type: !9)
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: null)
+!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 4, arg: 2, scope: !0, file: !1, type: !11)
+!11 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
+!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 4, arg: 3, scope: !0, file: !1, type: !13)
+!13 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
+
+!58 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 4, arg: 1, scope: !0, file: !1, type: !9)
+!60 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 4, arg: 2, scope: !0, file: !1, type: !11)
+!62 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 4, arg: 3, scope: !0, file: !1, type: !13)
+
+!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 11, arg: 1, scope: !6, file: !1, type: !9)
+!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 11, arg: 2, scope: !6, file: !1, type: !11)
+!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 11, arg: 3, scope: !6, file: !1, type: !13)
+!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 17, arg: 1, scope: !7, file: !1, type: !5)
+!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 17, arg: 2, scope: !7, file: !1, type: !19)
+!19 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !20)
+!20 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !21)
+!21 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "dval", line: 19, scope: !23, file: !1, type: !11)
+!23 = distinct !DILexicalBlock(line: 18, column: 1, file: !51, scope: !7)
+!24 = !DILocation(line: 4, column: 22, scope: !0)
+!25 = !DILocation(line: 4, column: 33, scope: !0)
+!26 = !DILocation(line: 4, column: 52, scope: !0)
+!27 = !DILocation(line: 6, column: 3, scope: !28)
+!28 = distinct !DILexicalBlock(line: 5, column: 1, file: !51, scope: !0)
+!29 = !DILocation(line: 7, column: 3, scope: !28)
+!30 = !DILocation(line: 11, column: 42, scope: !6)
+!31 = !DILocation(line: 11, column: 53, scope: !6)
+!32 = !DILocation(line: 11, column: 72, scope: !6)
+!33 = !DILocation(line: 13, column: 3, scope: !34)
+!34 = distinct !DILexicalBlock(line: 12, column: 1, file: !51, scope: !6)
+!35 = !DILocation(line: 14, column: 3, scope: !34)
+!36 = !DILocation(line: 17, column: 15, scope: !7)
+!37 = !DILocation(line: 17, column: 28, scope: !7)
+!38 = !DILocation(line: 19, column: 31, scope: !23)
+!39 = !DILocation(line: 20, column: 3, scope: !23)
+!40 = !DILocation(line: 21, column: 3, scope: !23)
+!41 = !DILocation(line: 4, column: 22, scope: !0, inlinedAt: !40)
+!42 = !DILocation(line: 4, column: 33, scope: !0, inlinedAt: !40)
+!43 = !DILocation(line: 4, column: 52, scope: !0, inlinedAt: !40)
+!44 = !DILocation(line: 6, column: 3, scope: !28, inlinedAt: !40)
+!45 = !DILocation(line: 22, column: 3, scope: !23)
+!46 = !DILocation(line: 23, column: 1, scope: !23)
!47 = !{!0, !6, !7}
!48 = !{!8, !10, !12}
!49 = !{!14, !15, !16}
!50 = !{!17, !18, !22}
-!51 = !{!"a.c", !"/private/tmp"}
-!52 = !{i32 0}
-!53 = !{i32 1, !"Debug Info Version", i32 2}
+!51 = !DIFile(filename: "a.c", directory: "/private/tmp")
+!52 = !{}
+!53 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index 977a6f27677c..f22559efad4d 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -15,7 +15,7 @@ target triple = "thumbv7-apple-macosx10.6.7"
define void @_Z3foov() optsize ssp {
entry:
%call = tail call float @_Z3barv() optsize, !dbg !11
- tail call void @llvm.dbg.value(metadata float %call, i64 0, metadata !5, metadata !{!"0x102"}), !dbg !11
+ tail call void @llvm.dbg.value(metadata float %call, i64 0, metadata !5, metadata !DIExpression()), !dbg !11
%call16 = tail call float @_Z2f2v() optsize, !dbg !12
%cmp7 = fcmp olt float %call, %call16, !dbg !12
br i1 %cmp7, label %for.body, label %for.end, !dbg !12
@@ -43,24 +43,24 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!20}
-!0 = !{!"0x11\004\00clang version 3.0 (trunk 130845)\001\00\000\00\001", !18, !19, !19, !16, null, null} ; [ DW_TAG_compile_unit ]
-!1 = !{!"0x2e\00foo\00foo\00_Z3foov\005\000\001\000\006\00256\001\005", !18, !2, !3, null, void ()* @_Z3foov, null, null, !17} ; [ DW_TAG_subprogram ] [line 5] [def] [foo]
-!2 = !{!"0x29", !18} ; [ DW_TAG_file_type ]
-!3 = !{!"0x15\00\000\000\000\000\000\000", !18, !2, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 130845)", isOptimized: true, emissionKind: 1, file: !18, enums: !19, retainedTypes: !19, subprograms: !16, imports: null)
+!1 = !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !18, scope: !2, type: !3, function: void ()* @_Z3foov, variables: !17)
+!2 = !DIFile(filename: "k.cc", directory: "/private/tmp")
+!3 = !DISubroutineType(types: !4)
!4 = !{null}
-!5 = !{!"0x100\00k\006\000", !6, !2, !7} ; [ DW_TAG_auto_variable ]
-!6 = !{!"0xb\005\0012\000", !18, !1} ; [ DW_TAG_lexical_block ]
-!7 = !{!"0x24\00float\000\0032\0032\000\000\004", null, !0} ; [ DW_TAG_base_type ]
-!8 = !{!"0x100\00y\008\000", !9, !2, !7} ; [ DW_TAG_auto_variable ]
-!9 = !{!"0xb\007\0025\002", !18, !10} ; [ DW_TAG_lexical_block ]
-!10 = !{!"0xb\007\003\001", !18, !6} ; [ DW_TAG_lexical_block ]
-!11 = !MDLocation(line: 6, column: 18, scope: !6)
-!12 = !MDLocation(line: 7, column: 3, scope: !6)
-!13 = !MDLocation(line: 8, column: 20, scope: !9)
-!14 = !MDLocation(line: 7, column: 20, scope: !10)
-!15 = !MDLocation(line: 10, column: 1, scope: !6)
+!5 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 6, scope: !6, file: !2, type: !7)
+!6 = distinct !DILexicalBlock(line: 5, column: 12, file: !18, scope: !1)
+!7 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
+!8 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 8, scope: !9, file: !2, type: !7)
+!9 = distinct !DILexicalBlock(line: 7, column: 25, file: !18, scope: !10)
+!10 = distinct !DILexicalBlock(line: 7, column: 3, file: !18, scope: !6)
+!11 = !DILocation(line: 6, column: 18, scope: !6)
+!12 = !DILocation(line: 7, column: 3, scope: !6)
+!13 = !DILocation(line: 8, column: 20, scope: !9)
+!14 = !DILocation(line: 7, column: 20, scope: !10)
+!15 = !DILocation(line: 10, column: 1, scope: !6)
!16 = !{!1}
!17 = !{!5, !8}
-!18 = !{!"k.cc", !"/private/tmp"}
-!19 = !{i32 0}
-!20 = !{i32 1, !"Debug Info Version", i32 2}
+!18 = !DIFile(filename: "k.cc", directory: "/private/tmp")
+!19 = !{}
+!20 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/ARM/debug-segmented-stacks.ll b/test/CodeGen/ARM/debug-segmented-stacks.ll
index 7ea5665a7a9b..47d366e49ded 100644
--- a/test/CodeGen/ARM/debug-segmented-stacks.ll
+++ b/test/CodeGen/ARM/debug-segmented-stacks.ll
@@ -39,40 +39,40 @@ define void @test_basic() #0 {
; ARM-linux .cfi_same_value r5
}
-!0 = !{!"0x11\0012\00clang version 3.5 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/var.c] [DW_LANG_C99]
-!1 = !{!"var.c", !"/tmp"}
+!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "var.c", directory: "/tmp")
!2 = !{}
!3 = !{!4}
-!4 = !{!"0x2e\00test_basic\00test_basic\00\005\000\001\000\006\00256\000\005", !1, !5, !6, null, void ()* @test_basic, null, null, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [sum]
-!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [/tmp/var.c]
-!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !DISubprogram(name: "test_basic", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, function: void ()* @test_basic, variables: !2)
+!5 = !DIFile(filename: "var.c", directory: "/tmp")
+!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8}
-!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !{i32 2, !"Dwarf Version", i32 4}
-!10 = !{i32 1, !"Debug Info Version", i32 2}
+!10 = !{i32 1, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.5 "}
-!12 = !{!"0x101\00count\0016777221\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [count] [line 5]
-!13 = !MDLocation(line: 5, scope: !4)
-!14 = !{!"0x100\00vl\006\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [vl] [line 6]
-!15 = !{!"0x16\00va_list\0030\000\000\000\000", !16, null, !17} ; [ DW_TAG_typedef ] [va_list] [line 30, size 0, align 0, offset 0] [from __builtin_va_list]
-!16 = !{!"/linux-x86_64-high/gcc_4.7.2/dbg/llvm/bin/../lib/clang/3.5/include/stdarg.h", !"/tmp"}
-!17 = !{!"0x16\00__builtin_va_list\006\000\000\000\000", !1, null, !18} ; [ DW_TAG_typedef ] [__builtin_va_list] [line 6, size 0, align 0, offset 0] [from __va_list]
-!18 = !{!"0x13\00__va_list\006\0032\0032\000\000\000", !1, null, null, !19, null, null, null} ; [ DW_TAG_structure_type ] [__va_list] [line 6, size 32, align 32, offset 0] [def] [from ]
+!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "count", line: 5, arg: 1, scope: !4, file: !5, type: !8)
+!13 = !DILocation(line: 5, scope: !4)
+!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vl", line: 6, scope: !4, file: !5, type: !15)
+!15 = !DIDerivedType(tag: DW_TAG_typedef, name: "va_list", line: 30, file: !16, baseType: !17)
+!16 = !DIFile(filename: "/linux-x86_64-high/gcc_4.7.2/dbg/llvm/bin/../lib/clang/3.5/include/stdarg.h", directory: "/tmp")
+!17 = !DIDerivedType(tag: DW_TAG_typedef, name: "__builtin_va_list", line: 6, file: !1, baseType: !18)
+!18 = !DICompositeType(tag: DW_TAG_structure_type, name: "__va_list", line: 6, size: 32, align: 32, file: !1, elements: !19)
!19 = !{!20}
-!20 = !{!"0xd\00__ap\006\0032\0032\000\000", !1, !18, !21} ; [ DW_TAG_member ] [__ap] [line 6, size 32, align 32, offset 0] [from ]
-!21 = !{!"0xf\00\000\0032\0032\000\000", null, null, null} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from ]
-!22 = !MDLocation(line: 6, scope: !4)
-!23 = !MDLocation(line: 7, scope: !4)
-!24 = !{!"0x100\00test_basic\008\000", !4, !5, !8} ; [ DW_TAG_auto_variable ] [sum] [line 8]
-!25 = !MDLocation(line: 8, scope: !4)
-!26 = !{!"0x100\00i\009\000", !27, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 9]
-!27 = !{!"0xb\009\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [/tmp/var.c]
-!28 = !MDLocation(line: 9, scope: !27)
-!29 = !MDLocation(line: 10, scope: !30)
-!30 = !{!"0xb\009\000\001", !1, !27} ; [ DW_TAG_lexical_block ] [/tmp/var.c]
-!31 = !MDLocation(line: 11, scope: !30)
-!32 = !MDLocation(line: 12, scope: !4)
-!33 = !MDLocation(line: 13, scope: !4)
+!20 = !DIDerivedType(tag: DW_TAG_member, name: "__ap", line: 6, size: 32, align: 32, file: !1, scope: !18, baseType: !21)
+!21 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: null)
+!22 = !DILocation(line: 6, scope: !4)
+!23 = !DILocation(line: 7, scope: !4)
+!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "test_basic", line: 8, scope: !4, file: !5, type: !8)
+!25 = !DILocation(line: 8, scope: !4)
+!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 9, scope: !27, file: !5, type: !8)
+!27 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !4)
+!28 = !DILocation(line: 9, scope: !27)
+!29 = !DILocation(line: 10, scope: !30)
+!30 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !27)
+!31 = !DILocation(line: 11, scope: !30)
+!32 = !DILocation(line: 12, scope: !4)
+!33 = !DILocation(line: 13, scope: !4)
; Just to prevent the alloca from being optimized away
declare void @dummy_use(i32*, i32)
diff --git a/test/CodeGen/ARM/disable-fp-elim.ll b/test/CodeGen/ARM/disable-fp-elim.ll
new file mode 100644
index 000000000000..dafeda2ac762
--- /dev/null
+++ b/test/CodeGen/ARM/disable-fp-elim.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple armv7-none-linux-gnueabi -O1 | FileCheck %s --check-prefix=DISABLE-FP-ELIM
+; RUN: llc < %s -mtriple armv7-none-linux-gnueabi -disable-fp-elim -O1 | FileCheck %s --check-prefix=DISABLE-FP-ELIM
+; RUN: llc < %s -mtriple armv7-none-linux-gnueabi -disable-fp-elim=false -O1 | FileCheck %s --check-prefix=ENABLE-FP-ELIM
+; RUN: llc < %s -mtriple armv7-none-linux-gnueabi -disable-fp-elim=false -O0 | FileCheck %s --check-prefix=DISABLE-FP-ELIM
+
+; Check that command line option "-disable-fp-elim" overrides function attribute
+; "no-frame-pointer-elim". Also, check frame pointer elimination is disabled
+; when fast-isel is used.
+
+; ENABLE-FP-ELIM-NOT: .setfp
+; DISABLE-FP-ELIM: .setfp r11, sp
+
+define i32 @foo1(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) #0 {
+entry:
+ %call = tail call i32 @foo2(i32 %a)
+ %add = add i32 %c, %b
+ %add1 = add i32 %add, %d
+ %add2 = add i32 %add1, %e
+ %add3 = add i32 %add2, %call
+ ret i32 %add3
+}
+
+declare i32 @foo2(i32)
+
+attributes #0 = { nounwind "no-frame-pointer-elim"="true" }
diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll
index a339c816c578..7b298fee42a5 100644
--- a/test/CodeGen/ARM/div.ll
+++ b/test/CodeGen/ARM/div.ll
@@ -1,11 +1,13 @@
-; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK-ARM
-; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=swift | FileCheck %s -check-prefix=CHECK-HWDIV
-; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r5 | FileCheck %s -check-prefix=CHECK-HWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK-SWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=swift | FileCheck %s -check-prefix=CHECK-HWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r4 | FileCheck %s -check-prefix=CHECK-SWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r4f | FileCheck %s -check-prefix=CHECK-SWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r5 | FileCheck %s -check-prefix=CHECK-HWDIV
define i32 @f1(i32 %a, i32 %b) {
entry:
-; CHECK-ARM: f1
-; CHECK-ARM: __divsi3
+; CHECK-SWDIV: f1
+; CHECK-SWDIV: __divsi3
; CHECK-HWDIV: f1
; CHECK-HWDIV: sdiv
@@ -15,8 +17,8 @@ entry:
define i32 @f2(i32 %a, i32 %b) {
entry:
-; CHECK-ARM: f2
-; CHECK-ARM: __udivsi3
+; CHECK-SWDIV: f2
+; CHECK-SWDIV: __udivsi3
; CHECK-HWDIV: f2
; CHECK-HWDIV: udiv
@@ -26,8 +28,8 @@ entry:
define i32 @f3(i32 %a, i32 %b) {
entry:
-; CHECK-ARM: f3
-; CHECK-ARM: __modsi3
+; CHECK-SWDIV: f3
+; CHECK-SWDIV: __modsi3
; CHECK-HWDIV: f3
; CHECK-HWDIV: sdiv
@@ -38,8 +40,8 @@ entry:
define i32 @f4(i32 %a, i32 %b) {
entry:
-; CHECK-ARM: f4
-; CHECK-ARM: __umodsi3
+; CHECK-SWDIV: f4
+; CHECK-SWDIV: __umodsi3
; CHECK-HWDIV: f4
; CHECK-HWDIV: udiv
diff --git a/test/CodeGen/ARM/divmod.ll b/test/CodeGen/ARM/divmod.ll
index 7be0c796bd21..9336d0c477d1 100644
--- a/test/CodeGen/ARM/divmod.ll
+++ b/test/CodeGen/ARM/divmod.ll
@@ -16,7 +16,7 @@ entry:
%div = sdiv i32 %x, %y
store i32 %div, i32* %P, align 4
%rem = srem i32 %x, %y
- %arrayidx6 = getelementptr inbounds i32* %P, i32 1
+ %arrayidx6 = getelementptr inbounds i32, i32* %P, i32 1
store i32 %rem, i32* %arrayidx6, align 4
ret void
}
@@ -34,7 +34,7 @@ entry:
%div = udiv i32 %x, %y
store i32 %div, i32* %P, align 4
%rem = urem i32 %x, %y
- %arrayidx6 = getelementptr inbounds i32* %P, i32 1
+ %arrayidx6 = getelementptr inbounds i32, i32* %P, i32 1
store i32 %rem, i32* %arrayidx6, align 4
ret void
}
@@ -47,7 +47,7 @@ define void @do_indent(i32 %cols) nounwind {
entry:
; A8-LABEL: do_indent:
; SWIFT-LABEL: do_indent:
- %0 = load i32* @flags, align 4
+ %0 = load i32, i32* @flags, align 4
%1 = and i32 %0, 67108864
%2 = icmp eq i32 %1, 0
br i1 %2, label %bb1, label %bb
@@ -57,7 +57,7 @@ bb:
; SWIFT: sdiv
; SWIFT: mls
; SWIFT-NOT: bl __divmodsi4
- %3 = load i32* @tabsize, align 4
+ %3 = load i32, i32* @tabsize, align 4
%4 = srem i32 %cols, %3
%5 = sdiv i32 %cols, %3
%6 = tail call i32 @llvm.objectsize.i32.p0i8(i8* null, i1 false)
@@ -66,7 +66,7 @@ bb:
bb1:
%line_indent_len.0 = phi i32 [ %4, %bb ], [ 0, %entry ]
- %8 = getelementptr inbounds i8* null, i32 %line_indent_len.0
+ %8 = getelementptr inbounds i8, i8* null, i32 %line_indent_len.0
store i8 0, i8* %8, align 1
ret void
}
diff --git a/test/CodeGen/ARM/dwarf-eh.ll b/test/CodeGen/ARM/dwarf-eh.ll
index 0b8a072ec95c..c890206b3532 100644
--- a/test/CodeGen/ARM/dwarf-eh.ll
+++ b/test/CodeGen/ARM/dwarf-eh.ll
@@ -2,6 +2,10 @@
; RUN: FileCheck %s
; RUN: llc -mtriple=arm-netbsd-eabi -o - -filetype=asm %s \
; RUN: -relocation-model=pic | FileCheck -check-prefix=CHECK-PIC %s
+; RUN: llc -mtriple=armv7-bitrig-gnueabihf -o - -filetype=asm %s | \
+; RUN: FileCheck %s
+; RUN: llc -mtriple=armv7-bitrig-gnueabihf -o - -filetype=asm %s \
+; RUN: -relocation-model=pic | FileCheck -check-prefix=CHECK-PIC %s
; ModuleID = 'test.cc'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
@@ -11,7 +15,7 @@ target triple = "armv5e--netbsd-eabi"
@_ZTVN10__cxxabiv117__class_type_infoE = external global i8*
@_ZTS9exception = linkonce_odr constant [11 x i8] c"9exception\00"
-@_ZTI9exception = linkonce_odr unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([11 x i8]* @_ZTS9exception, i32 0, i32 0) }
+@_ZTI9exception = linkonce_odr unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @_ZTS9exception, i32 0, i32 0) }
define void @f() uwtable {
%1 = alloca i8*
@@ -30,12 +34,12 @@ define void @f() uwtable {
store i32 %7, i32* %2
br label %8
- %9 = load i32* %2
+ %9 = load i32, i32* %2
%10 = call i32 @llvm.eh.typeid.for(i8* bitcast ({ i8*, i8* }* @_ZTI9exception to i8*)) nounwind
%11 = icmp eq i32 %9, %10
br i1 %11, label %12, label %17
- %13 = load i8** %1
+ %13 = load i8*, i8** %1
%14 = call i8* @__cxa_begin_catch(i8* %13) #3
%15 = bitcast i8* %14 to %struct.exception*
store %struct.exception* %15, %struct.exception** %e
@@ -44,8 +48,8 @@ define void @f() uwtable {
ret void
- %18 = load i8** %1
- %19 = load i32* %2
+ %18 = load i8*, i8** %1
+ %19 = load i32, i32* %2
%20 = insertvalue { i8*, i32 } undef, i8* %18, 0
%21 = insertvalue { i8*, i32 } %20, i32 %19, 1
resume { i8*, i32 } %21
diff --git a/test/CodeGen/ARM/dyn-stackalloc.ll b/test/CodeGen/ARM/dyn-stackalloc.ll
index 4ac5b8a31e5f..5b963fd64dea 100644
--- a/test/CodeGen/ARM/dyn-stackalloc.ll
+++ b/test/CodeGen/ARM/dyn-stackalloc.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm-eabi %s -o /dev/null
+; RUN: llc -mcpu=generic -mtriple=arm-eabi -verify-machineinstrs < %s | FileCheck %s
%struct.comment = type { i8**, i32*, i32, i8* }
%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
@@ -7,7 +7,19 @@
@str215 = external global [2 x i8]
define void @t1(%struct.state* %v) {
- %tmp6 = load i32* null
+
+; Make sure we generate:
+; sub sp, sp, r1
+; instead of:
+; sub r1, sp, r1
+; mov sp, r1
+
+; CHECK-LABEL: @t1
+; CHECK: bic [[REG1:r[0-9]+]],
+; CHECK-NOT: sub r{{[0-9]+}}, sp, [[REG1]]
+; CHECK: sub sp, sp, [[REG1]]
+
+ %tmp6 = load i32, i32* null
%tmp8 = alloca float, i32 %tmp6
store i32 1, i32* null
br i1 false, label %bb123.preheader, label %return
@@ -17,8 +29,8 @@ bb123.preheader: ; preds = %0
bb43: ; preds = %bb123.preheader
call fastcc void @f1(float* %tmp8, float* null, i32 0)
- %tmp70 = load i32* null
- %tmp85 = getelementptr float* %tmp8, i32 0
+ %tmp70 = load i32, i32* null
+ %tmp85 = getelementptr float, float* %tmp8, i32 0
call fastcc void @f2(float* null, float* null, float* %tmp85, i32 %tmp70)
ret void
@@ -38,8 +50,8 @@ define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
%tmp6 = alloca i8, i32 %tmp5
%tmp9 = call i8* @strcpy(i8* %tmp6, i8* %tag)
%tmp6.len = call i32 @strlen(i8* %tmp6)
- %tmp6.indexed = getelementptr i8* %tmp6, i32 %tmp6.len
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp6.indexed, i8* getelementptr inbounds ([2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1, i1 false)
+ %tmp6.indexed = getelementptr i8, i8* %tmp6, i32 %tmp6.len
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp6.indexed, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1, i1 false)
%tmp15 = call i8* @strcat(i8* %tmp6, i8* %contents)
call fastcc void @comment_add(%struct.comment* %vc, i8* %tmp6)
ret void
diff --git a/test/CodeGen/ARM/ehabi.ll b/test/CodeGen/ARM/ehabi.ll
index ebf0c2a00330..088e48d2d793 100644
--- a/test/CodeGen/ARM/ehabi.ll
+++ b/test/CodeGen/ARM/ehabi.ll
@@ -146,8 +146,8 @@ declare void @_ZSt9terminatev()
; CHECK-FP: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-FP: .setfp r11, sp, #28
; CHECK-FP: add r11, sp, #28
-; CHECK-FP: .pad #28
-; CHECK-FP: sub sp, sp, #28
+; CHECK-FP: .pad #44
+; CHECK-FP: sub sp, sp, #44
; CHECK-FP: .personality __gxx_personality_v0
; CHECK-FP: .handlerdata
; CHECK-FP: .fnend
@@ -156,8 +156,8 @@ declare void @_ZSt9terminatev()
; CHECK-FP-ELIM: .fnstart
; CHECK-FP-ELIM: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-FP-ELIM: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-FP-ELIM: .pad #28
-; CHECK-FP-ELIM: sub sp, sp, #28
+; CHECK-FP-ELIM: .pad #36
+; CHECK-FP-ELIM: sub sp, sp, #36
; CHECK-FP-ELIM: .personality __gxx_personality_v0
; CHECK-FP-ELIM: .handlerdata
; CHECK-FP-ELIM: .fnend
@@ -205,7 +205,7 @@ declare void @_ZSt9terminatev()
; DWARF-FP: .cfi_offset r4, -36
; DWARF-FP: add r11, sp, #28
; DWARF-FP: .cfi_def_cfa r11, 8
-; DWARF-FP: sub sp, sp, #28
+; DWARF-FP: sub sp, sp, #44
; DWARF-FP: sub sp, r11, #28
; DWARF-FP: pop {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; DWARF-FP: mov pc, lr
@@ -226,9 +226,9 @@ declare void @_ZSt9terminatev()
; DWARF-FP-ELIM: .cfi_offset r6, -28
; DWARF-FP-ELIM: .cfi_offset r5, -32
; DWARF-FP-ELIM: .cfi_offset r4, -36
-; DWARF-FP-ELIM: sub sp, sp, #28
-; DWARF-FP-ELIM: .cfi_def_cfa_offset 64
-; DWARF-FP-ELIM: add sp, sp, #28
+; DWARF-FP-ELIM: sub sp, sp, #36
+; DWARF-FP-ELIM: .cfi_def_cfa_offset 72
+; DWARF-FP-ELIM: add sp, sp, #36
; DWARF-FP-ELIM: pop {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; DWARF-FP-ELIM: mov pc, lr
; DWARF-FP-ELIM: .cfi_endproc
diff --git a/test/CodeGen/ARM/emit-big-cst.ll b/test/CodeGen/ARM/emit-big-cst.ll
index 01d789c492fe..7453e8caa946 100644
--- a/test/CodeGen/ARM/emit-big-cst.ll
+++ b/test/CodeGen/ARM/emit-big-cst.ll
@@ -11,7 +11,7 @@
define void @accessBig(i64* %storage) {
%addr = bitcast i64* %storage to i82*
- %bigLoadedCst = load volatile i82* @bigCst
+ %bigLoadedCst = load volatile i82, i82* @bigCst
%tmp = add i82 %bigLoadedCst, 1
store i82 %tmp, i82* %addr
ret void
diff --git a/test/CodeGen/ARM/extload-knownzero.ll b/test/CodeGen/ARM/extload-knownzero.ll
index f55b95104b86..da340f7a9431 100644
--- a/test/CodeGen/ARM/extload-knownzero.ll
+++ b/test/CodeGen/ARM/extload-knownzero.ll
@@ -8,7 +8,7 @@ entry:
br i1 %tmp1, label %bb1, label %bb2
bb1:
; CHECK: ldrh
- %tmp2 = load i16* %ptr, align 2
+ %tmp2 = load i16, i16* %ptr, align 2
br label %bb2
bb2:
; CHECK-NOT: uxth
diff --git a/test/CodeGen/ARM/extloadi1.ll b/test/CodeGen/ARM/extloadi1.ll
index 2504c6c61e3e..a67859d60d19 100644
--- a/test/CodeGen/ARM/extloadi1.ll
+++ b/test/CodeGen/ARM/extloadi1.ll
@@ -4,7 +4,7 @@
define void @__mf_sigusr1_respond() {
entry:
- %tmp8.b = load i1* @handler_installed.6144.b ; <i1> [#uses=1]
+ %tmp8.b = load i1, i1* @handler_installed.6144.b ; <i1> [#uses=1]
br i1 false, label %cond_true7, label %cond_next
cond_next: ; preds = %entry
diff --git a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
index 05a6bab99dbf..d759d2d52c36 100644
--- a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
+++ b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
%struct.A = type { i32, [2 x [2 x i32]], i8, [3 x [3 x [3 x i32]]] }
%struct.B = type { i32, [2 x [2 x [2 x %struct.A]]] }
@@ -14,10 +14,10 @@ entry:
; ARM: t1
; THUMB: t1
%addr = alloca i32*, align 4
- store i32* getelementptr inbounds ([2 x [2 x [2 x [2 x [2 x i32]]]]]* @arr, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1), i32** %addr, align 4
+ store i32* getelementptr inbounds ([2 x [2 x [2 x [2 x [2 x i32]]]]], [2 x [2 x [2 x [2 x [2 x i32]]]]]* @arr, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1), i32** %addr, align 4
; ARM: add r0, r0, #124
; THUMB: adds r0, #124
- %0 = load i32** %addr, align 4
+ %0 = load i32*, i32** %addr, align 4
ret i32* %0
}
@@ -26,11 +26,11 @@ entry:
; ARM: t2
; THUMB: t2
%addr = alloca i32*, align 4
- store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4
+ store i32* getelementptr inbounds ([3 x [3 x %struct.A]], [3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4
; ARM: movw [[R:r[0-9]+]], #1148
; ARM: add r0, r{{[0-9]+}}, [[R]]
; THUMB: addw r0, r0, #1148
- %0 = load i32** %addr, align 4
+ %0 = load i32*, i32** %addr, align 4
ret i32* %0
}
@@ -39,10 +39,10 @@ entry:
; ARM: t3
; THUMB: t3
%addr = alloca i32*, align 4
- store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1), i32** %addr, align 4
+ store i32* getelementptr inbounds ([3 x [3 x %struct.A]], [3 x [3 x %struct.A]]* @A, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1), i32** %addr, align 4
; ARM: add r0, r0, #140
; THUMB: adds r0, #140
- %0 = load i32** %addr, align 4
+ %0 = load i32*, i32** %addr, align 4
ret i32* %0
}
@@ -51,7 +51,7 @@ entry:
; ARM: t4
; THUMB: t4
%addr = alloca i32*, align 4
- store i32* getelementptr inbounds ([2 x [2 x [2 x %struct.B]]]* @B, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 3, i32 1, i32 2, i32 1), i32** %addr, align 4
+ store i32* getelementptr inbounds ([2 x [2 x [2 x %struct.B]]], [2 x [2 x [2 x %struct.B]]]* @B, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 3, i32 1, i32 2, i32 1), i32** %addr, align 4
; ARM-NOT: movw r{{[0-9]}}, #1060
; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4
; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #132
@@ -61,6 +61,6 @@ entry:
; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4
; ARM: movw r{{[0-9]}}, #1284
; THUMB: addw r{{[0-9]}}, r{{[0-9]}}, #1284
- %0 = load i32** %addr, align 4
+ %0 = load i32*, i32** %addr, align 4
ret i32* %0
}
diff --git a/test/CodeGen/ARM/fast-isel-align.ll b/test/CodeGen/ARM/fast-isel-align.ll
index 9c9a18858289..39085db95316 100644
--- a/test/CodeGen/ARM/fast-isel-align.ll
+++ b/test/CodeGen/ARM/fast-isel-align.ll
@@ -1,20 +1,20 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl -verify-machineinstrs | FileCheck %s --check-prefix=ARM
; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
-; RUN: llc < %s -O0 -arm-no-strict-align -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -arm-no-strict-align -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+; RUN: llc < %s -O0 -arm-no-strict-align -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -arm-no-strict-align -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
@@ -34,8 +34,8 @@ entry:
; THUMB: str r1, [r0]
%add = fadd float %x, %y
- %0 = load %struct.anon** @a, align 4
- %x1 = getelementptr inbounds %struct.anon* %0, i32 0, i32 0
+ %0 = load %struct.anon*, %struct.anon** @a, align 4
+ %x1 = getelementptr inbounds %struct.anon, %struct.anon* %0, i32 0, i32 0
store float %add, float* %x1, align 1
ret void
}
@@ -51,7 +51,7 @@ entry:
; ARM: @word_aligned_f64_store
; THUMB: @word_aligned_f64_store
%add = fadd double %a, %b
- store double %add, double* getelementptr inbounds (%struct.anon.0* @foo_unpacked, i32 0, i32 0), align 4
+ store double %add, double* getelementptr inbounds (%struct.anon.0, %struct.anon.0* @foo_unpacked, i32 0, i32 0), align 4
; ARM: vstr d16, [r0]
; THUMB: vstr d16, [r0]
ret void
@@ -66,9 +66,9 @@ entry:
; THUMB: @unaligned_f32_load
%0 = alloca %class.TAlignTest*, align 4
store %class.TAlignTest* %this, %class.TAlignTest** %0, align 4
- %1 = load %class.TAlignTest** %0
- %2 = getelementptr inbounds %class.TAlignTest* %1, i32 0, i32 1
- %3 = load float* %2, align 1
+ %1 = load %class.TAlignTest*, %class.TAlignTest** %0
+ %2 = getelementptr inbounds %class.TAlignTest, %class.TAlignTest* %1, i32 0, i32 1
+ %3 = load float, float* %2, align 1
%4 = fcmp une float %3, 0.000000e+00
; ARM: ldr r[[R:[0-9]+]], [r0, #2]
; ARM: vmov s0, r[[R]]
@@ -103,7 +103,7 @@ entry:
; THUMB-STRICT-ALIGN: ldrb
; THUMB-STRICT-ALIGN: ldrb
- %0 = load i16* %x, align 1
+ %0 = load i16, i16* %x, align 1
ret i16 %0
}
@@ -139,6 +139,6 @@ entry:
; THUMB-STRICT-ALIGN: ldrb
; THUMB-STRICT-ALIGN: ldrb
- %0 = load i32* %x, align 1
+ %0 = load i32, i32* %x, align 1
ret i32 %0
}
diff --git a/test/CodeGen/ARM/fast-isel-binary.ll b/test/CodeGen/ARM/fast-isel-binary.ll
index e1a2a4f33835..3211fd6f2422 100644
--- a/test/CodeGen/ARM/fast-isel-binary.ll
+++ b/test/CodeGen/ARM/fast-isel-binary.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -O0 -fast-isel-abort -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort=1 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort=1 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort=1 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
; Test add with non-legal types
diff --git a/test/CodeGen/ARM/fast-isel-br-const.ll b/test/CodeGen/ARM/fast-isel-br-const.ll
index 2e28b08fc8d6..988664b86b80 100644
--- a/test/CodeGen/ARM/fast-isel-br-const.ll
+++ b/test/CodeGen/ARM/fast-isel-br-const.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
entry:
diff --git a/test/CodeGen/ARM/fast-isel-br-phi.ll b/test/CodeGen/ARM/fast-isel-br-phi.ll
index 3b9d4652b755..fb0f0153898b 100644
--- a/test/CodeGen/ARM/fast-isel-br-phi.ll
+++ b/test/CodeGen/ARM/fast-isel-br-phi.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios
; This test ensures HandlePHINodesInSuccessorBlocks() is able to promote basic
; non-legal integer types (i.e., i1, i8, i16).
diff --git a/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
index da829e929ef0..2c660179aa3c 100644
--- a/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
+++ b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
; Fast-isel can't handle non-double multi-reg retvals.
; This test just check to make sure we don't hit the assert in FinishCall.
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
index 74b31bd9e67f..bd170f30d979 100644
--- a/test/CodeGen/ARM/fast-isel-call.ll
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP
; Note that some of these tests assume that relocations are either
; movw/movt or constant pool loads. Different platforms will select
@@ -157,7 +157,7 @@ define void @foo3() uwtable {
; THUMB: blx r1
%fptr = alloca i32 (i32)*, align 8
store i32 (i32)* @bar0, i32 (i32)** %fptr, align 8
- %1 = load i32 (i32)** %fptr, align 8
+ %1 = load i32 (i32)*, i32 (i32)** %fptr, align 8
%call = call i32 %1(i32 0)
ret void
}
diff --git a/test/CodeGen/ARM/fast-isel-cmp-imm.ll b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
index 55baf488a425..a9d7e4580638 100644
--- a/test/CodeGen/ARM/fast-isel-cmp-imm.ll
+++ b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
define void @t1a(float %a) uwtable ssp {
entry:
diff --git a/test/CodeGen/ARM/fast-isel-conversion.ll b/test/CodeGen/ARM/fast-isel-conversion.ll
index 5983493a818b..46b5e78fb662 100644
--- a/test/CodeGen/ARM/fast-isel-conversion.ll
+++ b/test/CodeGen/ARM/fast-isel-conversion.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -verify-machineinstrs -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -verify-machineinstrs -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -verify-machineinstrs -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -verify-machineinstrs -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -verify-machineinstrs -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -verify-machineinstrs -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
; Test sitofp
diff --git a/test/CodeGen/ARM/fast-isel-crash.ll b/test/CodeGen/ARM/fast-isel-crash.ll
index ec9cf8d95019..885ca69834d3 100644
--- a/test/CodeGen/ARM/fast-isel-crash.ll
+++ b/test/CodeGen/ARM/fast-isel-crash.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-apple-darwin
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-linux-gnueabi
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=thumbv7-linux-gnueabi
%union.anon = type { <16 x i32> }
diff --git a/test/CodeGen/ARM/fast-isel-deadcode.ll b/test/CodeGen/ARM/fast-isel-deadcode.ll
index c3eed30692b8..e584c54b48a2 100644
--- a/test/CodeGen/ARM/fast-isel-deadcode.ll
+++ b/test/CodeGen/ARM/fast-isel-deadcode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort=1 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
; Target-specific selector can't properly handle the double because it isn't
; being passed via a register, so the materialized arguments become dead code.
diff --git a/test/CodeGen/ARM/fast-isel-ext.ll b/test/CodeGen/ARM/fast-isel-ext.ll
index de0dd1917eb7..b792f7a90738 100644
--- a/test/CodeGen/ARM/fast-isel-ext.ll
+++ b/test/CodeGen/ARM/fast-isel-ext.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=v7
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=v7
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv4t-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=prev6
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv4t-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=prev6
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv5-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=prev6
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv5-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=prev6
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=v7
+; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=v7
+; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=v7
+; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=armv4t-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=prev6
+; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=armv4t-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=prev6
+; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=armv5-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=prev6
+; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=armv5-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=prev6
+; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=v7
; Can't test pre-ARMv6 Thumb because ARM FastISel currently only supports
; Thumb2. The ARMFastISel::ARMEmitIntExt code should work for Thumb by always
diff --git a/test/CodeGen/ARM/fast-isel-fold.ll b/test/CodeGen/ARM/fast-isel-fold.ll
index e8ed8cbf34e9..37e93c0a7018 100644
--- a/test/CodeGen/ARM/fast-isel-fold.ll
+++ b/test/CodeGen/ARM/fast-isel-fold.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
@a = global i8 1, align 1
@b = global i16 2, align 2
@@ -14,7 +14,7 @@ define void @t1() nounwind uwtable ssp {
; THUMB: ldrb
; THUMB-NOT: uxtb
; THUMB-NOT: and{{.*}}, #255
- %1 = load i8* @a, align 1
+ %1 = load i8, i8* @a, align 1
call void @foo1(i8 zeroext %1)
ret void
}
@@ -26,7 +26,7 @@ define void @t2() nounwind uwtable ssp {
; THUMB: t2
; THUMB: ldrh
; THUMB-NOT: uxth
- %1 = load i16* @b, align 2
+ %1 = load i16, i16* @b, align 2
call void @foo2(i16 zeroext %1)
ret void
}
@@ -43,7 +43,7 @@ define i32 @t3() nounwind uwtable ssp {
; THUMB: ldrb
; THUMB-NOT: uxtb
; THUMB-NOT: and{{.*}}, #255
- %1 = load i8* @a, align 1
+ %1 = load i8, i8* @a, align 1
%2 = zext i8 %1 to i32
ret i32 %2
}
@@ -55,7 +55,7 @@ define i32 @t4() nounwind uwtable ssp {
; THUMB: t4
; THUMB: ldrh
; THUMB-NOT: uxth
- %1 = load i16* @b, align 2
+ %1 = load i16, i16* @b, align 2
%2 = zext i16 %1 to i32
ret i32 %2
}
@@ -67,7 +67,7 @@ define i32 @t5() nounwind uwtable ssp {
; THUMB: t5
; THUMB: ldrsh
; THUMB-NOT: sxth
- %1 = load i16* @b, align 2
+ %1 = load i16, i16* @b, align 2
%2 = sext i16 %1 to i32
ret i32 %2
}
@@ -79,7 +79,7 @@ define i32 @t6() nounwind uwtable ssp {
; THUMB: t6
; THUMB: ldrsb
; THUMB-NOT: sxtb
- %1 = load i8* @a, align 2
+ %1 = load i8, i8* @a, align 2
%2 = sext i8 %1 to i32
ret i32 %2
}
diff --git a/test/CodeGen/ARM/fast-isel-frameaddr.ll b/test/CodeGen/ARM/fast-isel-frameaddr.ll
index 93cdbbbbd86e..75d582f4ee3a 100644
--- a/test/CodeGen/ARM/fast-isel-frameaddr.ll
+++ b/test/CodeGen/ARM/fast-isel-frameaddr.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=DARWIN-ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=DARWIN-THUMB2
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-THUMB2
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=DARWIN-ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=DARWIN-THUMB2
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-THUMB2
define i8* @frameaddr_index0() nounwind {
entry:
diff --git a/test/CodeGen/ARM/fast-isel-icmp.ll b/test/CodeGen/ARM/fast-isel-icmp.ll
index 85f449e3d71d..bd7c2d096a8c 100644
--- a/test/CodeGen/ARM/fast-isel-icmp.ll
+++ b/test/CodeGen/ARM/fast-isel-icmp.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
define i32 @icmp_i16_signed(i16 %a, i16 %b) nounwind {
entry:
diff --git a/test/CodeGen/ARM/fast-isel-indirectbr.ll b/test/CodeGen/ARM/fast-isel-indirectbr.ll
index 2456ef442040..91648d36a694 100644
--- a/test/CodeGen/ARM/fast-isel-indirectbr.ll
+++ b/test/CodeGen/ARM/fast-isel-indirectbr.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
define void @t1(i8* %x) {
entry:
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
index b09931dc4e2f..6b434b74ca79 100644
--- a/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls -verify-machineinstrs | FileCheck %s --check-prefix=ARM-LONG
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -arm-long-calls -verify-machineinstrs | FileCheck %s --check-prefix=ARM-LONG
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-LONG
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls -verify-machineinstrs | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -arm-long-calls -verify-machineinstrs | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-LONG
; Note that some of these tests assume that relocations are either
; movw/movt or constant pool loads. Different platforms will select
@@ -39,7 +39,7 @@ define void @t1() nounwind ssp {
; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr
; THUMB-LONG: ldr r3, [r3]
; THUMB-LONG: blx r3
- call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 4, i1 false)
+ call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 4, i1 false)
ret void
}
@@ -78,7 +78,7 @@ define void @t2() nounwind ssp {
; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr
; THUMB-LONG: ldr r3, [r3]
; THUMB-LONG: blx r3
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 4, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 17, i32 4, i1 false)
ret void
}
@@ -115,7 +115,7 @@ define void @t3() nounwind ssp {
; THUMB-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr
; THUMB-LONG: ldr r3, [r3]
; THUMB-LONG: blx r3
- call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
ret void
}
@@ -142,7 +142,7 @@ define void @t4() nounwind ssp {
; THUMB: ldrh r1, [r0, #24]
; THUMB: strh r1, [r0, #12]
; THUMB: bx lr
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 4, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i32 4, i1 false)
ret void
}
@@ -179,7 +179,7 @@ define void @t5() nounwind ssp {
; THUMB: ldrh r1, [r0, #24]
; THUMB: strh r1, [r0, #12]
; THUMB: bx lr
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 2, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i32 2, i1 false)
ret void
}
@@ -234,14 +234,14 @@ define void @t6() nounwind ssp {
; THUMB: ldrb r1, [r0, #25]
; THUMB: strb r1, [r0, #13]
; THUMB: bx lr
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
ret void
}
; rdar://13202135
define void @t7() nounwind ssp {
; Just make sure this doesn't assert when we have an odd length and an alignment of 2.
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 3, i32 2, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 3, i32 2, i1 false)
ret void
}
diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll b/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
index cf294bcfbece..cce914b094f7 100644
--- a/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
+++ b/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=ARM
define i32 @t1(i32* nocapture %ptr) nounwind readonly {
entry:
; ARM: t1
- %add.ptr = getelementptr inbounds i32* %ptr, i32 1
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 1
+ %0 = load i32, i32* %add.ptr, align 4
; ARM: ldr r{{[0-9]}}, [r0, #4]
ret i32 %0
}
@@ -12,8 +12,8 @@ entry:
define i32 @t2(i32* nocapture %ptr) nounwind readonly {
entry:
; ARM: t2
- %add.ptr = getelementptr inbounds i32* %ptr, i32 63
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 63
+ %0 = load i32, i32* %add.ptr, align 4
; ARM: ldr.w r{{[0-9]}}, [r0, #252]
ret i32 %0
}
@@ -21,8 +21,8 @@ entry:
define zeroext i16 @t3(i16* nocapture %ptr) nounwind readonly {
entry:
; ARM: t3
- %add.ptr = getelementptr inbounds i16* %ptr, i16 1
- %0 = load i16* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i16, i16* %ptr, i16 1
+ %0 = load i16, i16* %add.ptr, align 4
; ARM: ldrh r{{[0-9]}}, [r0, #2]
ret i16 %0
}
@@ -30,8 +30,8 @@ entry:
define zeroext i16 @t4(i16* nocapture %ptr) nounwind readonly {
entry:
; ARM: t4
- %add.ptr = getelementptr inbounds i16* %ptr, i16 63
- %0 = load i16* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i16, i16* %ptr, i16 63
+ %0 = load i16, i16* %add.ptr, align 4
; ARM: ldrh.w r{{[0-9]}}, [r0, #126]
ret i16 %0
}
@@ -39,8 +39,8 @@ entry:
define zeroext i8 @t5(i8* nocapture %ptr) nounwind readonly {
entry:
; ARM: t5
- %add.ptr = getelementptr inbounds i8* %ptr, i8 1
- %0 = load i8* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i8, i8* %ptr, i8 1
+ %0 = load i8, i8* %add.ptr, align 4
; ARM: ldrb r{{[0-9]}}, [r0, #1]
ret i8 %0
}
@@ -48,8 +48,8 @@ entry:
define zeroext i8 @t6(i8* nocapture %ptr) nounwind readonly {
entry:
; ARM: t6
- %add.ptr = getelementptr inbounds i8* %ptr, i8 63
- %0 = load i8* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i8, i8* %ptr, i8 63
+ %0 = load i8, i8* %add.ptr, align 4
; ARM: ldrb.w r{{[0-9]}}, [r0, #63]
ret i8 %0
}
diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
index d9c9cc459c7e..f24100b36db9 100644
--- a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
+++ b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
define i32 @t1(i32* nocapture %ptr) nounwind readonly {
entry:
; THUMB: t1
- %add.ptr = getelementptr inbounds i32* %ptr, i32 -1
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -1
+ %0 = load i32, i32* %add.ptr, align 4
; THUMB: ldr r{{[0-9]}}, [r0, #-4]
ret i32 %0
}
@@ -12,8 +12,8 @@ entry:
define i32 @t2(i32* nocapture %ptr) nounwind readonly {
entry:
; THUMB: t2
- %add.ptr = getelementptr inbounds i32* %ptr, i32 -63
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -63
+ %0 = load i32, i32* %add.ptr, align 4
; THUMB: ldr r{{[0-9]}}, [r0, #-252]
ret i32 %0
}
@@ -21,8 +21,8 @@ entry:
define i32 @t3(i32* nocapture %ptr) nounwind readonly {
entry:
; THUMB: t3
- %add.ptr = getelementptr inbounds i32* %ptr, i32 -64
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -64
+ %0 = load i32, i32* %add.ptr, align 4
; THUMB: ldr r{{[0-9]}}, [r0]
ret i32 %0
}
@@ -30,8 +30,8 @@ entry:
define zeroext i16 @t4(i16* nocapture %ptr) nounwind readonly {
entry:
; THUMB: t4
- %add.ptr = getelementptr inbounds i16* %ptr, i32 -1
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -1
+ %0 = load i16, i16* %add.ptr, align 2
; THUMB: ldrh r{{[0-9]}}, [r0, #-2]
ret i16 %0
}
@@ -39,8 +39,8 @@ entry:
define zeroext i16 @t5(i16* nocapture %ptr) nounwind readonly {
entry:
; THUMB: t5
- %add.ptr = getelementptr inbounds i16* %ptr, i32 -127
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -127
+ %0 = load i16, i16* %add.ptr, align 2
; THUMB: ldrh r{{[0-9]}}, [r0, #-254]
ret i16 %0
}
@@ -48,8 +48,8 @@ entry:
define zeroext i16 @t6(i16* nocapture %ptr) nounwind readonly {
entry:
; THUMB: t6
- %add.ptr = getelementptr inbounds i16* %ptr, i32 -128
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -128
+ %0 = load i16, i16* %add.ptr, align 2
; THUMB: ldrh r{{[0-9]}}, [r0]
ret i16 %0
}
@@ -57,8 +57,8 @@ entry:
define zeroext i8 @t7(i8* nocapture %ptr) nounwind readonly {
entry:
; THUMB: t7
- %add.ptr = getelementptr inbounds i8* %ptr, i32 -1
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -1
+ %0 = load i8, i8* %add.ptr, align 1
; THUMB: ldrb r{{[0-9]}}, [r0, #-1]
ret i8 %0
}
@@ -66,8 +66,8 @@ entry:
define zeroext i8 @t8(i8* nocapture %ptr) nounwind readonly {
entry:
; THUMB: t8
- %add.ptr = getelementptr inbounds i8* %ptr, i32 -255
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -255
+ %0 = load i8, i8* %add.ptr, align 1
; THUMB: ldrb r{{[0-9]}}, [r0, #-255]
ret i8 %0
}
@@ -75,8 +75,8 @@ entry:
define zeroext i8 @t9(i8* nocapture %ptr) nounwind readonly {
entry:
; THUMB: t9
- %add.ptr = getelementptr inbounds i8* %ptr, i32 -256
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -256
+ %0 = load i8, i8* %add.ptr, align 1
; THUMB: ldrb r{{[0-9]}}, [r0]
ret i8 %0
}
@@ -84,7 +84,7 @@ entry:
define void @t10(i32* nocapture %ptr) nounwind {
entry:
; THUMB: t10
- %add.ptr = getelementptr inbounds i32* %ptr, i32 -1
+ %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -1
store i32 0, i32* %add.ptr, align 4
; THUMB: str r{{[0-9]}}, [r0, #-4]
ret void
@@ -93,7 +93,7 @@ entry:
define void @t11(i32* nocapture %ptr) nounwind {
entry:
; THUMB: t11
- %add.ptr = getelementptr inbounds i32* %ptr, i32 -63
+ %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -63
store i32 0, i32* %add.ptr, align 4
; THUMB: str r{{[0-9]}}, [r0, #-252]
ret void
@@ -102,7 +102,7 @@ entry:
define void @t12(i32* nocapture %ptr) nounwind {
entry:
; THUMB: t12
- %add.ptr = getelementptr inbounds i32* %ptr, i32 -64
+ %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -64
store i32 0, i32* %add.ptr, align 4
; THUMB: str r{{[0-9]}}, [r0]
ret void
@@ -111,7 +111,7 @@ entry:
define void @t13(i16* nocapture %ptr) nounwind {
entry:
; THUMB: t13
- %add.ptr = getelementptr inbounds i16* %ptr, i32 -1
+ %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -1
store i16 0, i16* %add.ptr, align 2
; THUMB: strh r{{[0-9]}}, [r0, #-2]
ret void
@@ -120,7 +120,7 @@ entry:
define void @t14(i16* nocapture %ptr) nounwind {
entry:
; THUMB: t14
- %add.ptr = getelementptr inbounds i16* %ptr, i32 -127
+ %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -127
store i16 0, i16* %add.ptr, align 2
; THUMB: strh r{{[0-9]}}, [r0, #-254]
ret void
@@ -129,7 +129,7 @@ entry:
define void @t15(i16* nocapture %ptr) nounwind {
entry:
; THUMB: t15
- %add.ptr = getelementptr inbounds i16* %ptr, i32 -128
+ %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -128
store i16 0, i16* %add.ptr, align 2
; THUMB: strh r{{[0-9]}}, [r0]
ret void
@@ -138,7 +138,7 @@ entry:
define void @t16(i8* nocapture %ptr) nounwind {
entry:
; THUMB: t16
- %add.ptr = getelementptr inbounds i8* %ptr, i32 -1
+ %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -1
store i8 0, i8* %add.ptr, align 1
; THUMB: strb r{{[0-9]}}, [r0, #-1]
ret void
@@ -147,7 +147,7 @@ entry:
define void @t17(i8* nocapture %ptr) nounwind {
entry:
; THUMB: t17
- %add.ptr = getelementptr inbounds i8* %ptr, i32 -255
+ %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -255
store i8 0, i8* %add.ptr, align 1
; THUMB: strb r{{[0-9]}}, [r0, #-255]
ret void
@@ -156,7 +156,7 @@ entry:
define void @t18(i8* nocapture %ptr) nounwind {
entry:
; THUMB: t18
- %add.ptr = getelementptr inbounds i8* %ptr, i32 -256
+ %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -256
store i8 0, i8* %add.ptr, align 1
; THUMB: strb r{{[0-9]}}, [r0]
ret void
diff --git a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
index c05ea398d72e..ca512970c9cf 100644
--- a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
+++ b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
; rdar://10418009
define zeroext i16 @t1(i16* nocapture %a) nounwind uwtable readonly ssp {
entry:
; ARM: t1
- %add.ptr = getelementptr inbounds i16* %a, i64 -8
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %a, i64 -8
+ %0 = load i16, i16* %add.ptr, align 2
; ARM: ldrh r0, [r0, #-16]
ret i16 %0
}
@@ -14,8 +14,8 @@ entry:
define zeroext i16 @t2(i16* nocapture %a) nounwind uwtable readonly ssp {
entry:
; ARM: t2
- %add.ptr = getelementptr inbounds i16* %a, i64 -16
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %a, i64 -16
+ %0 = load i16, i16* %add.ptr, align 2
; ARM: ldrh r0, [r0, #-32]
ret i16 %0
}
@@ -23,8 +23,8 @@ entry:
define zeroext i16 @t3(i16* nocapture %a) nounwind uwtable readonly ssp {
entry:
; ARM: t3
- %add.ptr = getelementptr inbounds i16* %a, i64 -127
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %a, i64 -127
+ %0 = load i16, i16* %add.ptr, align 2
; ARM: ldrh r0, [r0, #-254]
ret i16 %0
}
@@ -32,8 +32,8 @@ entry:
define zeroext i16 @t4(i16* nocapture %a) nounwind uwtable readonly ssp {
entry:
; ARM: t4
- %add.ptr = getelementptr inbounds i16* %a, i64 -128
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %a, i64 -128
+ %0 = load i16, i16* %add.ptr, align 2
; ARM: mvn r{{[1-9]}}, #255
; ARM: add r0, r0, r{{[1-9]}}
; ARM: ldrh r0, [r0]
@@ -43,8 +43,8 @@ entry:
define zeroext i16 @t5(i16* nocapture %a) nounwind uwtable readonly ssp {
entry:
; ARM: t5
- %add.ptr = getelementptr inbounds i16* %a, i64 8
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %a, i64 8
+ %0 = load i16, i16* %add.ptr, align 2
; ARM: ldrh r0, [r0, #16]
ret i16 %0
}
@@ -52,8 +52,8 @@ entry:
define zeroext i16 @t6(i16* nocapture %a) nounwind uwtable readonly ssp {
entry:
; ARM: t6
- %add.ptr = getelementptr inbounds i16* %a, i64 16
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %a, i64 16
+ %0 = load i16, i16* %add.ptr, align 2
; ARM: ldrh r0, [r0, #32]
ret i16 %0
}
@@ -61,8 +61,8 @@ entry:
define zeroext i16 @t7(i16* nocapture %a) nounwind uwtable readonly ssp {
entry:
; ARM: t7
- %add.ptr = getelementptr inbounds i16* %a, i64 127
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %a, i64 127
+ %0 = load i16, i16* %add.ptr, align 2
; ARM: ldrh r0, [r0, #254]
ret i16 %0
}
@@ -70,8 +70,8 @@ entry:
define zeroext i16 @t8(i16* nocapture %a) nounwind uwtable readonly ssp {
entry:
; ARM: t8
- %add.ptr = getelementptr inbounds i16* %a, i64 128
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %a, i64 128
+ %0 = load i16, i16* %add.ptr, align 2
; ARM: add r0, r0, #256
; ARM: ldrh r0, [r0]
ret i16 %0
@@ -80,7 +80,7 @@ entry:
define void @t9(i16* nocapture %a) nounwind uwtable ssp {
entry:
; ARM: t9
- %add.ptr = getelementptr inbounds i16* %a, i64 -8
+ %add.ptr = getelementptr inbounds i16, i16* %a, i64 -8
store i16 0, i16* %add.ptr, align 2
; ARM: strh r1, [r0, #-16]
ret void
@@ -91,7 +91,7 @@ entry:
define void @t10(i16* nocapture %a) nounwind uwtable ssp {
entry:
; ARM: t10
- %add.ptr = getelementptr inbounds i16* %a, i64 -128
+ %add.ptr = getelementptr inbounds i16, i16* %a, i64 -128
store i16 0, i16* %add.ptr, align 2
; ARM: mvn r{{[1-9]}}, #255
; ARM: add r0, r0, r{{[1-9]}}
@@ -102,7 +102,7 @@ entry:
define void @t11(i16* nocapture %a) nounwind uwtable ssp {
entry:
; ARM: t11
- %add.ptr = getelementptr inbounds i16* %a, i64 8
+ %add.ptr = getelementptr inbounds i16, i16* %a, i64 8
store i16 0, i16* %add.ptr, align 2
; ARM: strh r{{[1-9]}}, [r0, #16]
ret void
@@ -113,7 +113,7 @@ entry:
define void @t12(i16* nocapture %a) nounwind uwtable ssp {
entry:
; ARM: t12
- %add.ptr = getelementptr inbounds i16* %a, i64 128
+ %add.ptr = getelementptr inbounds i16, i16* %a, i64 128
store i16 0, i16* %add.ptr, align 2
; ARM: add r0, r0, #256
; ARM: strh r{{[1-9]}}, [r0]
@@ -123,8 +123,8 @@ entry:
define signext i8 @t13(i8* nocapture %a) nounwind uwtable readonly ssp {
entry:
; ARM: t13
- %add.ptr = getelementptr inbounds i8* %a, i64 -8
- %0 = load i8* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i8, i8* %a, i64 -8
+ %0 = load i8, i8* %add.ptr, align 2
; ARM: ldrsb r0, [r0, #-8]
ret i8 %0
}
@@ -132,8 +132,8 @@ entry:
define signext i8 @t14(i8* nocapture %a) nounwind uwtable readonly ssp {
entry:
; ARM: t14
- %add.ptr = getelementptr inbounds i8* %a, i64 -255
- %0 = load i8* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i8, i8* %a, i64 -255
+ %0 = load i8, i8* %add.ptr, align 2
; ARM: ldrsb r0, [r0, #-255]
ret i8 %0
}
@@ -141,8 +141,8 @@ entry:
define signext i8 @t15(i8* nocapture %a) nounwind uwtable readonly ssp {
entry:
; ARM: t15
- %add.ptr = getelementptr inbounds i8* %a, i64 -256
- %0 = load i8* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i8, i8* %a, i64 -256
+ %0 = load i8, i8* %add.ptr, align 2
; ARM: mvn r{{[1-9]}}, #255
; ARM: add r0, r0, r{{[1-9]}}
; ARM: ldrsb r0, [r0]
diff --git a/test/CodeGen/ARM/fast-isel-load-store-verify.ll b/test/CodeGen/ARM/fast-isel-load-store-verify.ll
index 710d88b3158c..acf10c8b719e 100644
--- a/test/CodeGen/ARM/fast-isel-load-store-verify.ll
+++ b/test/CodeGen/ARM/fast-isel-load-store-verify.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ALL
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ALL
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ALL
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ALL
; FIXME Add tests for thumbv7, they currently fail MI verification because
; of a mismatch in register classes in uses.
@@ -17,7 +17,7 @@ define i8 @t1() nounwind uwtable ssp {
; ALL: @t1
; ALL: ldrb
; ALL: add
- %1 = load i8* @a, align 1
+ %1 = load i8, i8* @a, align 1
%2 = add nsw i8 %1, 1
ret i8 %2
}
@@ -26,7 +26,7 @@ define i16 @t2() nounwind uwtable ssp {
; ALL: @t2
; ALL: ldrh
; ALL: add
- %1 = load i16* @b, align 2
+ %1 = load i16, i16* @b, align 2
%2 = add nsw i16 %1, 1
ret i16 %2
}
@@ -35,7 +35,7 @@ define i32 @t3() nounwind uwtable ssp {
; ALL: @t3
; ALL: ldr
; ALL: add
- %1 = load i32* @c, align 4
+ %1 = load i32, i32* @c, align 4
%2 = add nsw i32 %1, 1
ret i32 %2
}
diff --git a/test/CodeGen/ARM/fast-isel-mvn.ll b/test/CodeGen/ARM/fast-isel-mvn.ll
index 886f2daa21dd..89b7c05158cd 100644
--- a/test/CodeGen/ARM/fast-isel-mvn.ll
+++ b/test/CodeGen/ARM/fast-isel-mvn.ll
@@ -1,8 +1,8 @@
-; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -arm-use-movt=false -mtriple=armv7-apple-ios < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
-; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -arm-use-movt=false -mtriple=armv7-linux-gnueabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
-; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -arm-use-movt=false -mtriple=thumbv7-apple-ios < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
-; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -arm-use-movt=true -mtriple=thumbv7-apple-ios < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB
-; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -arm-use-movt=true -mtriple=armv7-apple-ios < %s | FileCheck %s --check-prefix=MOVT
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -arm-use-movt=false -mtriple=armv7-apple-ios < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -arm-use-movt=false -mtriple=armv7-linux-gnueabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -arm-use-movt=false -mtriple=thumbv7-apple-ios < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -arm-use-movt=true -mtriple=thumbv7-apple-ios < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -arm-use-movt=true -mtriple=armv7-apple-ios < %s | FileCheck %s --check-prefix=MOVT
; rdar://10412592
define void @t1() nounwind {
diff --git a/test/CodeGen/ARM/fast-isel-pic.ll b/test/CodeGen/ARM/fast-isel-pic.ll
index 838c103e7c09..70e15daaca62 100644
--- a/test/CodeGen/ARM/fast-isel-pic.ll
+++ b/test/CodeGen/ARM/fast-isel-pic.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=arm-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARMv7
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s --check-prefix=THUMB-ELF
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=armv7-none-linux-gnueabi | FileCheck %s --check-prefix=ARMv7-ELF
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=pic -mtriple=arm-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARMv7
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=pic -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s --check-prefix=THUMB-ELF
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=pic -mtriple=armv7-none-linux-gnueabi | FileCheck %s --check-prefix=ARMv7-ELF
@g = global i32 0, align 4
@@ -29,7 +29,7 @@ entry:
; ARMv7-ELF-NEXT: add r[[reg2]], pc
; ARMv7-ELF: ldr r[[reg3:[0-9]+]],
; ARMv7-ELF: ldr r[[reg2]], [r[[reg3]], r[[reg2]]]
- %tmp = load i32* @g
+ %tmp = load i32, i32* @g
ret i32 %tmp
}
@@ -60,6 +60,6 @@ entry:
; ARMv7-ELF-NEXT: add r[[reg5]], pc
; ARMv7-ELF: ldr r[[reg6:[0-9]+]],
; ARMv7-ELF: ldr r[[reg5]], [r[[reg6]], r[[reg5]]]
- %tmp = load i32* @i
+ %tmp = load i32, i32* @i
ret i32 %tmp
}
diff --git a/test/CodeGen/ARM/fast-isel-pred.ll b/test/CodeGen/ARM/fast-isel-pred.ll
index 48f93225b6b8..ae8b67d7157b 100644
--- a/test/CodeGen/ARM/fast-isel-pred.ll
+++ b/test/CodeGen/ARM/fast-isel-pred.ll
@@ -7,9 +7,9 @@ entry:
%X = alloca <4 x i32>, align 16
%Y = alloca <4 x float>, align 16
store i32 0, i32* %retval
- %tmp = load <4 x i32>* %X, align 16
+ %tmp = load <4 x i32>, <4 x i32>* %X, align 16
call void @__aa(<4 x i32> %tmp, i8* null, i32 3, <4 x float>* %Y)
- %0 = load i32* %retval
+ %0 = load i32, i32* %retval
ret i32 %0
}
@@ -24,15 +24,15 @@ entry:
store i8* %p, i8** %p.addr, align 4
store i32 %offset, i32* %offset.addr, align 4
store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
- %tmp = load <4 x i32>* %v.addr, align 16
+ %tmp = load <4 x i32>, <4 x i32>* %v.addr, align 16
store <4 x i32> %tmp, <4 x i32>* %__a.addr.i, align 16
- %tmp.i = load <4 x i32>* %__a.addr.i, align 16
+ %tmp.i = load <4 x i32>, <4 x i32>* %__a.addr.i, align 16
%0 = bitcast <4 x i32> %tmp.i to <16 x i8>
%1 = bitcast <16 x i8> %0 to <4 x i32>
%vcvt.i = sitofp <4 x i32> %1 to <4 x float>
- %tmp1 = load i8** %p.addr, align 4
- %tmp2 = load i32* %offset.addr, align 4
- %tmp3 = load <4 x float>** %constants.addr, align 4
+ %tmp1 = load i8*, i8** %p.addr, align 4
+ %tmp2 = load i32, i32* %offset.addr, align 4
+ %tmp3 = load <4 x float>*, <4 x float>** %constants.addr, align 4
call void @__bb(<4 x float> %vcvt.i, i8* %tmp1, i32 %tmp2, <4 x float>* %tmp3)
ret void
}
@@ -48,12 +48,12 @@ entry:
store i8* %p, i8** %p.addr, align 4
store i32 %offset, i32* %offset.addr, align 4
store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
- %tmp = load i64* %data, align 4
- %tmp1 = load i8** %p.addr, align 4
- %tmp2 = load i32* %offset.addr, align 4
- %add.ptr = getelementptr i8* %tmp1, i32 %tmp2
+ %tmp = load i64, i64* %data, align 4
+ %tmp1 = load i8*, i8** %p.addr, align 4
+ %tmp2 = load i32, i32* %offset.addr, align 4
+ %add.ptr = getelementptr i8, i8* %tmp1, i32 %tmp2
%0 = bitcast i8* %add.ptr to i64*
- %arrayidx = getelementptr inbounds i64* %0, i32 0
+ %arrayidx = getelementptr inbounds i64, i64* %0, i32 0
store i64 %tmp, i64* %arrayidx
ret void
}
diff --git a/test/CodeGen/ARM/fast-isel-redefinition.ll b/test/CodeGen/ARM/fast-isel-redefinition.ll
index ee150facac96..a1c8657cb811 100644
--- a/test/CodeGen/ARM/fast-isel-redefinition.ll
+++ b/test/CodeGen/ARM/fast-isel-redefinition.ll
@@ -1,11 +1,11 @@
-; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort -optimize-regalloc -regalloc=basic < %s
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -optimize-regalloc -regalloc=basic < %s
; This isn't exactly a useful set of command-line options, but check that it
; doesn't crash. (It was crashing because a register was getting redefined.)
target triple = "thumbv7-apple-macosx10.6.7"
define i32 @f(i32* %x) nounwind ssp {
- %y = getelementptr inbounds i32* %x, i32 5000
- %tmp103 = load i32* %y, align 4
+ %y = getelementptr inbounds i32, i32* %x, i32 5000
+ %tmp103 = load i32, i32* %y, align 4
ret i32 %tmp103
}
diff --git a/test/CodeGen/ARM/fast-isel-remat-same-constant.ll b/test/CodeGen/ARM/fast-isel-remat-same-constant.ll
new file mode 100644
index 000000000000..d64cf8022ed4
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-remat-same-constant.ll
@@ -0,0 +1,29 @@
+; RUN: llc %s -o - -fast-isel=true -O0 -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7-apple-ios8.0.0"
+
+; This test failed with verify machine instrs due to incorrect kill flags on the add instructions
+; generated by the GEPs. The first add generated killed the vreg for the #6680 constant which should
+; be correct. However, the second add is also a constant expression and the local value save area grows
+; down. This meant the next use of the vreg for #6680 was after the first which had killed it.
+
+; CHECK: #6680
+
+%struct.RD_8x8DATA = type { i32, [16 x [16 x i32]], [16 x [16 x i32]], [16 x [16 x i32]], [3 x [16 x [16 x i32]]], [4 x i16], [4 x i8], [4 x i8], [4 x i8], [16 x [16 x i16]], [16 x [16 x i16]], [16 x [16 x i32]] }
+
+@tr8x8 = external global %struct.RD_8x8DATA, align 4
+@tr4x4 = external global %struct.RD_8x8DATA, align 4
+
+; Function Attrs: noreturn
+declare void @foo(i16*, i16*) #0
+
+; Function Attrs: minsize
+define i32 @test() #1 {
+bb:
+ call void @foo(i16* getelementptr inbounds (%struct.RD_8x8DATA, %struct.RD_8x8DATA* @tr8x8, i32 0, i32 10, i32 0, i32 0), i16* getelementptr inbounds (%struct.RD_8x8DATA, %struct.RD_8x8DATA* @tr4x4, i32 0, i32 10, i32 0, i32 0))
+ unreachable
+}
+
+attributes #0 = { noreturn }
+attributes #1 = { minsize }
diff --git a/test/CodeGen/ARM/fast-isel-ret.ll b/test/CodeGen/ARM/fast-isel-ret.ll
index 8a68309dc831..f66479c2cfa3 100644
--- a/test/CodeGen/ARM/fast-isel-ret.ll
+++ b/test/CodeGen/ARM/fast-isel-ret.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s
; Sign-extend of i1 currently not supported by fast-isel
;define signext i1 @ret0(i1 signext %a) nounwind uwtable ssp {
diff --git a/test/CodeGen/ARM/fast-isel-select.ll b/test/CodeGen/ARM/fast-isel-select.ll
index 549c97e24dcd..4eef1d6bb988 100644
--- a/test/CodeGen/ARM/fast-isel-select.ll
+++ b/test/CodeGen/ARM/fast-isel-select.ll
@@ -1,18 +1,18 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv8-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv8-apple-ios | FileCheck %s --check-prefix=THUMB
define i32 @t1(i1 %c) nounwind readnone {
entry:
; ARM: t1
; ARM: movw r{{[1-9]}}, #10
-; ARM: cmp r0, #0
+; ARM: tst r0, #1
; ARM: moveq r{{[1-9]}}, #20
; ARM: mov r0, r{{[1-9]}}
; THUMB: t1
; THUMB: movs r{{[1-9]}}, #10
-; THUMB: cmp r0, #0
+; THUMB: tst.w r0, #1
; THUMB: it eq
; THUMB: moveq r{{[1-9]}}, #20
; THUMB: mov r0, r{{[1-9]}}
@@ -23,11 +23,11 @@ entry:
define i32 @t2(i1 %c, i32 %a) nounwind readnone {
entry:
; ARM: t2
-; ARM: cmp r0, #0
+; ARM: tst r0, #1
; ARM: moveq r{{[1-9]}}, #20
; ARM: mov r0, r{{[1-9]}}
; THUMB: t2
-; THUMB: cmp r0, #0
+; THUMB: tst.w r0, #1
; THUMB: it eq
; THUMB: moveq r{{[1-9]}}, #20
; THUMB: mov r0, r{{[1-9]}}
@@ -38,11 +38,11 @@ entry:
define i32 @t3(i1 %c, i32 %a, i32 %b) nounwind readnone {
entry:
; ARM: t3
-; ARM: cmp r0, #0
+; ARM: tst r0, #1
; ARM: movne r2, r1
; ARM: add r0, r2, r1
; THUMB: t3
-; THUMB: cmp r0, #0
+; THUMB: tst.w r0, #1
; THUMB: it ne
; THUMB: movne r2, r1
; THUMB: add.w r0, r2, r1
@@ -55,12 +55,12 @@ define i32 @t4(i1 %c) nounwind readnone {
entry:
; ARM: t4
; ARM: mvn r{{[1-9]}}, #9
-; ARM: cmp r0, #0
+; ARM: tst r0, #1
; ARM: mvneq r{{[1-9]}}, #0
; ARM: mov r0, r{{[1-9]}}
; THUMB-LABEL: t4
; THUMB: mvn [[REG:r[1-9]+]], #9
-; THUMB: cmp r0, #0
+; THUMB: tst.w r0, #1
; THUMB: it eq
; THUMB: mvneq [[REG]], #0
; THUMB: mov r0, [[REG]]
@@ -71,11 +71,11 @@ entry:
define i32 @t5(i1 %c, i32 %a) nounwind readnone {
entry:
; ARM: t5
-; ARM: cmp r0, #0
+; ARM: tst r0, #1
; ARM: mvneq r{{[1-9]}}, #1
; ARM: mov r0, r{{[1-9]}}
; THUMB: t5
-; THUMB: cmp r0, #0
+; THUMB: tst.w r0, #1
; THUMB: it eq
; THUMB: mvneq r{{[1-9]}}, #1
; THUMB: mov r0, r{{[1-9]}}
@@ -87,11 +87,11 @@ entry:
define i32 @t6(i1 %c, i32 %a) nounwind readnone {
entry:
; ARM: t6
-; ARM: cmp r0, #0
+; ARM: tst r0, #1
; ARM: mvneq r{{[1-9]}}, #978944
; ARM: mov r0, r{{[1-9]}}
; THUMB: t6
-; THUMB: cmp r0, #0
+; THUMB: tst.w r0, #1
; THUMB: it eq
; THUMB: mvneq r{{[1-9]}}, #978944
; THUMB: mov r0, r{{[1-9]}}
diff --git a/test/CodeGen/ARM/fast-isel-shift-materialize.ll b/test/CodeGen/ARM/fast-isel-shift-materialize.ll
new file mode 100644
index 000000000000..ab36a979d71b
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-shift-materialize.ll
@@ -0,0 +1,25 @@
+; RUN: llc %s -o - -verify-machineinstrs -fast-isel=true | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7-apple-ios8.0.0"
+
+; When materializing the '2' for the shifts below, the second shift kills the vreg
+; we materialize in to. However, the first shift was also killing that vreg.
+; This fails the machine verifier because the second use then thinks its reading an
+; undefined register.
+
+; CHECK: lsr.w
+; CHECK: lsr.w
+
+declare void @foo(i32, i32)
+
+; Function Attrs: minsize
+define i32 @test(i32 %tmp3, i32 %tmp9) #0 {
+bb:
+ %tmp4 = lshr i32 %tmp3, 2
+ %tmp10 = lshr i32 %tmp9, 2
+ call void @foo(i32 %tmp10, i32 %tmp4)
+ unreachable
+}
+
+attributes #0 = { minsize }
diff --git a/test/CodeGen/ARM/fast-isel-shifter.ll b/test/CodeGen/ARM/fast-isel-shifter.ll
index eb4b2b2ce0ae..87b861f35396 100644
--- a/test/CodeGen/ARM/fast-isel-shifter.ll
+++ b/test/CodeGen/ARM/fast-isel-shifter.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
define i32 @shl() nounwind ssp {
entry:
diff --git a/test/CodeGen/ARM/fast-isel-static.ll b/test/CodeGen/ARM/fast-isel-static.ll
index 9bd0a51e7120..c3980cb51f67 100644
--- a/test/CodeGen/ARM/fast-isel-static.ll
+++ b/test/CodeGen/ARM/fast-isel-static.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=CHECK-LONG %s
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=CHECK-LONG %s
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=static -arm-long-calls | FileCheck -check-prefix=CHECK-LONG %s
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=static -arm-long-calls | FileCheck -check-prefix=CHECK-LONG %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=static | FileCheck -check-prefix=CHECK-NORM %s
define void @myadd(float* %sum, float* %addend) nounwind {
entry:
@@ -9,12 +9,12 @@ entry:
%addend.addr = alloca float*, align 4
store float* %sum, float** %sum.addr, align 4
store float* %addend, float** %addend.addr, align 4
- %tmp = load float** %sum.addr, align 4
- %tmp1 = load float* %tmp
- %tmp2 = load float** %addend.addr, align 4
- %tmp3 = load float* %tmp2
+ %tmp = load float*, float** %sum.addr, align 4
+ %tmp1 = load float, float* %tmp
+ %tmp2 = load float*, float** %addend.addr, align 4
+ %tmp3 = load float, float* %tmp2
%add = fadd float %tmp1, %tmp3
- %tmp4 = load float** %sum.addr, align 4
+ %tmp4 = load float*, float** %sum.addr, align 4
store float %add, float* %tmp4
ret void
}
diff --git a/test/CodeGen/ARM/fast-isel-update-valuemap-for-extract.ll b/test/CodeGen/ARM/fast-isel-update-valuemap-for-extract.ll
new file mode 100644
index 000000000000..18d60fee189f
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-update-valuemap-for-extract.ll
@@ -0,0 +1,24 @@
+; RUN: llc %s -o - -fast-isel=true -O0 -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7-apple-ios8.0.0"
+
+; This test ensures that when fast-isel rewrites uses of the vreg for %tmp29, it also
+; updates kill flags on the shift instruction generated as part of the gep.
+; This was failing instruction verification.
+
+; CHECK-LABEL: @test
+
+%struct.node = type opaque
+
+declare void @foo([4 x i32], %struct.node*)
+
+define void @test([4 x i32] %xpic.coerce, %struct.node* %t) {
+bb:
+ %tmp29 = extractvalue [4 x i32] %xpic.coerce, 0
+ %tmp40 = bitcast %struct.node* %t to [8 x %struct.node*]*
+ %tmp41 = getelementptr inbounds [8 x %struct.node*], [8 x %struct.node*]* %tmp40, i32 0, i32 %tmp29
+ %tmp42 = load %struct.node*, %struct.node** %tmp41, align 4
+ call void @foo([4 x i32] %xpic.coerce, %struct.node* %tmp42)
+ ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-vaddd.ll b/test/CodeGen/ARM/fast-isel-vaddd.ll
new file mode 100644
index 000000000000..2aa269a9774d
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-vaddd.ll
@@ -0,0 +1,33 @@
+; RUN: llc %s -o - -verify-machineinstrs -fast-isel=true -mattr=+vfp4 -mattr=+neon | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7s-apple-ios8.0.0"
+
+%union.DV = type { <2 x double> }
+
+; Fast-ISel was incorrectly trying to codegen <2 x double> adds and returning only a single vadds
+; Check that we generate the 2 vaddd's we expect
+
+; CHECK: vadd.f64
+; CHECK: vadd.f64
+
+define i32 @main(i32 %argc, i8** nocapture readnone %Argv, <2 x double> %tmp31) {
+bb:
+ %Ad = alloca %union.DV, align 16
+ %tmp32 = getelementptr inbounds %union.DV, %union.DV* %Ad, i32 0, i32 0
+ %tmp33 = fadd <2 x double> %tmp31, %tmp31
+ br label %bb37
+
+bb37: ; preds = %bb37, %bb
+ %i.02 = phi i32 [ 0, %bb ], [ %tmp38, %bb37 ]
+ store <2 x double> %tmp33, <2 x double>* %tmp32, align 16
+ %tmp38 = add nuw nsw i32 %i.02, 1
+ %exitcond = icmp eq i32 %tmp38, 500000
+ br i1 %exitcond, label %bb39, label %bb37
+
+bb39: ; preds = %bb37
+ call fastcc void @printDV(%union.DV* %Ad)
+ ret i32 0
+}
+
+declare hidden fastcc void @printDV(%union.DV* nocapture readonly)
diff --git a/test/CodeGen/ARM/fast-isel-vararg.ll b/test/CodeGen/ARM/fast-isel-vararg.ll
index 3ff2b151ab5f..35442eea1005 100644
--- a/test/CodeGen/ARM/fast-isel-vararg.ll
+++ b/test/CodeGen/ARM/fast-isel-vararg.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
define i32 @VarArg() nounwind {
entry:
@@ -10,11 +10,11 @@ entry:
%m = alloca i32, align 4
%n = alloca i32, align 4
%tmp = alloca i32, align 4
- %0 = load i32* %i, align 4
- %1 = load i32* %j, align 4
- %2 = load i32* %k, align 4
- %3 = load i32* %m, align 4
- %4 = load i32* %n, align 4
+ %0 = load i32, i32* %i, align 4
+ %1 = load i32, i32* %j, align 4
+ %2 = load i32, i32* %k, align 4
+ %3 = load i32, i32* %m, align 4
+ %4 = load i32, i32* %n, align 4
; ARM: VarArg
; ARM: mov [[FP:r[0-9]+]], sp
; ARM: sub sp, sp, #32
@@ -37,9 +37,9 @@ entry:
; THUMB: str.w {{[a-z0-9]+}}, [sp]
; THUMB: str.w {{[a-z0-9]+}}, [sp, #4]
; THUMB: bl {{_?}}CallVariadic
- %call = call i32 (i32, ...)* @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4)
+ %call = call i32 (i32, ...) @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4)
store i32 %call, i32* %tmp, align 4
- %5 = load i32* %tmp, align 4
+ %5 = load i32, i32* %tmp, align 4
ret i32 %5
}
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 5981cab7dcb1..49460220c47c 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
; Very basic fast-isel functionality.
define i32 @test0(i32 %a, i32 %b) nounwind {
@@ -9,8 +9,8 @@ entry:
%b.addr = alloca i32, align 4
store i32 %a, i32* %a.addr
store i32 %b, i32* %b.addr
- %tmp = load i32* %a.addr
- %tmp1 = load i32* %b.addr
+ %tmp = load i32, i32* %a.addr
+ %tmp1 = load i32, i32* %b.addr
%add = add nsw i32 %tmp, %tmp1
ret i32 %add
}
@@ -110,9 +110,9 @@ bb2:
; ARM: sxth
bb3:
- %c1 = load i8* %ptr3
- %c2 = load i16* %ptr2
- %c3 = load i32* %ptr1
+ %c1 = load i8, i8* %ptr3
+ %c2 = load i16, i16* %ptr2
+ %c3 = load i32, i32* %ptr1
%c4 = zext i8 %c1 to i32
%c5 = sext i16 %c2 to i32
%c6 = add i32 %c4, %c5
@@ -138,7 +138,7 @@ bb3:
@test4g = external global i32
define void @test4() {
- %a = load i32* @test4g
+ %a = load i32, i32* @test4g
%b = add i32 %a, 1
store i32 %b, i32* @test4g
ret void
diff --git a/test/CodeGen/ARM/fastisel-gep-promote-before-add.ll b/test/CodeGen/ARM/fastisel-gep-promote-before-add.ll
index a32ab6d09317..232ab50c3ee5 100644
--- a/test/CodeGen/ARM/fastisel-gep-promote-before-add.ll
+++ b/test/CodeGen/ARM/fastisel-gep-promote-before-add.ll
@@ -6,13 +6,13 @@ define zeroext i8 @gep_promotion(i8* %ptr) nounwind uwtable ssp {
entry:
%ptr.addr = alloca i8*, align 8
%add = add i8 64, 64 ; 0x40 + 0x40
- %0 = load i8** %ptr.addr, align 8
+ %0 = load i8*, i8** %ptr.addr, align 8
; CHECK-LABEL: _gep_promotion:
; CHECK: ldrb {{r[0-9]+}}, {{\[r[0-9]+\]}}
- %arrayidx = getelementptr inbounds i8* %0, i8 %add
+ %arrayidx = getelementptr inbounds i8, i8* %0, i8 %add
- %1 = load i8* %arrayidx, align 1
+ %1 = load i8, i8* %arrayidx, align 1
ret i8 %1
}
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index 1de057208ce3..d013fbf8c15a 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -48,7 +48,7 @@ entry:
; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
; SOFT: vshr.u64 [[REG7]], [[REG7]], #32
; SOFT: vbsl [[REG6]], [[REG7]],
- %0 = tail call double (...)* @bar() nounwind
+ %0 = tail call double (...) @bar() nounwind
%1 = fptrunc double %0 to float
%2 = tail call float @copysignf(float 5.000000e-01, float %1) nounwind readnone
%3 = fadd float %1, %2
diff --git a/test/CodeGen/ARM/flag-crash.ll b/test/CodeGen/ARM/flag-crash.ll
index 9c61944a2154..66eb8a51c18a 100644
--- a/test/CodeGen/ARM/flag-crash.ll
+++ b/test/CodeGen/ARM/flag-crash.ll
@@ -5,13 +5,13 @@
define fastcc void @func(%struct.gs_matrix* nocapture %pm1) nounwind {
entry:
- %0 = getelementptr inbounds %struct.gs_matrix* %pm1, i32 0, i32 6
- %1 = load float* %0, align 4
- %2 = getelementptr inbounds %struct.gs_matrix* %pm1, i32 0, i32 8
- %3 = load float* %2, align 4
- %4 = getelementptr inbounds %struct.gs_matrix* %pm1, i32 0, i32 2
+ %0 = getelementptr inbounds %struct.gs_matrix, %struct.gs_matrix* %pm1, i32 0, i32 6
+ %1 = load float, float* %0, align 4
+ %2 = getelementptr inbounds %struct.gs_matrix, %struct.gs_matrix* %pm1, i32 0, i32 8
+ %3 = load float, float* %2, align 4
+ %4 = getelementptr inbounds %struct.gs_matrix, %struct.gs_matrix* %pm1, i32 0, i32 2
%5 = bitcast float* %4 to i32*
- %6 = load i32* %5, align 4
+ %6 = load i32, i32* %5, align 4
%7 = or i32 0, %6
%.mask = and i32 %7, 2147483647
%8 = icmp eq i32 %.mask, 0
diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll
index 65fe9e36fa1d..3a4767e9173f 100644
--- a/test/CodeGen/ARM/fnegs.ll
+++ b/test/CodeGen/ARM/fnegs.ll
@@ -21,7 +21,7 @@
define float @test1(float* %a) {
entry:
- %0 = load float* %a, align 4 ; <float> [#uses=2]
+ %0 = load float, float* %a, align 4 ; <float> [#uses=2]
%1 = fsub float -0.000000e+00, %0 ; <float> [#uses=2]
%2 = fpext float %1 to double ; <double> [#uses=1]
%3 = fcmp olt double %2, 1.234000e+00 ; <i1> [#uses=1]
@@ -48,7 +48,7 @@ entry:
define float @test2(float* %a) {
entry:
- %0 = load float* %a, align 4 ; <float> [#uses=2]
+ %0 = load float, float* %a, align 4 ; <float> [#uses=2]
%1 = fmul float -1.000000e+00, %0 ; <float> [#uses=2]
%2 = fpext float %1 to double ; <double> [#uses=1]
%3 = fcmp olt double %2, 1.234000e+00 ; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/fold-stack-adjust.ll b/test/CodeGen/ARM/fold-stack-adjust.ll
index c5ff82eaf830..1fd9bd9e47a3 100644
--- a/test/CodeGen/ARM/fold-stack-adjust.ll
+++ b/test/CodeGen/ARM/fold-stack-adjust.ll
@@ -12,11 +12,11 @@ declare void @bar(i8*)
define void @check_simple() minsize {
; CHECK-LABEL: check_simple:
-; CHECK: push.w {r7, r8, r9, r10, r11, lr}
+; CHECK: push {r3, r4, r5, r6, r7, lr}
; CHECK-NOT: sub sp, sp,
; ...
; CHECK-NOT: add sp, sp,
-; CHECK: pop.w {r0, r1, r2, r3, r11, pc}
+; CHECK: pop {r0, r1, r2, r3, r7, pc}
; CHECK-T1-LABEL: check_simple:
; CHECK-T1: push {r3, r4, r5, r6, r7, lr}
@@ -44,11 +44,11 @@ define void @check_simple() minsize {
define void @check_simple_too_big() minsize {
; CHECK-LABEL: check_simple_too_big:
-; CHECK: push.w {r11, lr}
+; CHECK: push {r7, lr}
; CHECK: sub sp,
; ...
; CHECK: add sp,
-; CHECK: pop.w {r11, pc}
+; CHECK: pop {r7, pc}
%var = alloca i8, i32 64
call void @bar(i8* %var)
ret void
@@ -82,7 +82,7 @@ define void @check_vfp_fold() minsize {
%var = alloca i8, i32 16
- %tmp = load %bigVec* @var
+ %tmp = load %bigVec, %bigVec* @var
call void @bar(i8* %var)
store %bigVec %tmp, %bigVec* @var
@@ -93,11 +93,11 @@ define void @check_vfp_fold() minsize {
; folded in except that doing so would clobber the value being returned.
define i64 @check_no_return_clobber() minsize {
; CHECK-LABEL: check_no_return_clobber:
-; CHECK: push.w {r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK: push {r1, r2, r3, r4, r5, r6, r7, lr}
; CHECK-NOT: sub sp,
; ...
; CHECK: add sp, #24
-; CHECK: pop.w {r11, pc}
+; CHECK: pop {r7, pc}
; Just to keep iOS FileCheck within previous function:
; CHECK-IOS-LABEL: check_no_return_clobber:
@@ -119,7 +119,7 @@ define arm_aapcs_vfpcc double @check_vfp_no_return_clobber() minsize {
%var = alloca i8, i32 64
- %tmp = load %bigVec* @var
+ %tmp = load %bigVec, %bigVec* @var
call void @bar(i8* %var)
store %bigVec %tmp, %bigVec* @var
@@ -152,7 +152,7 @@ define void @test_fold_point(i1 %tst) minsize {
; We want a long-lived floating register so that a callee-saved dN is used and
; there's both a vpop and a pop.
- %live_val = load double* @dbl
+ %live_val = load double, double* @dbl
br i1 %tst, label %true, label %end
true:
call void @bar(i8* %var)
@@ -176,9 +176,9 @@ define void @test_varsize(...) minsize {
; CHECK-LABEL: test_varsize:
; CHECK: sub sp, #16
-; CHECK: push.w {r9, r10, r11, lr}
+; CHECK: push {r5, r6, r7, lr}
; ...
-; CHECK: pop.w {r2, r3, r11, lr}
+; CHECK: pop.w {r2, r3, r7, lr}
; CHECK: add sp, #16
; CHECK: bx lr
diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll
index 7e1f000e88d9..cc47e3baddad 100644
--- a/test/CodeGen/ARM/fp.ll
+++ b/test/CodeGen/ARM/fp.ll
@@ -45,7 +45,7 @@ define double @h(double* %v) {
;CHECK: vldr
;CHECK-NEXT: vmov
entry:
- %tmp = load double* %v ; <double> [#uses=1]
+ %tmp = load double, double* %v ; <double> [#uses=1]
ret double %tmp
}
diff --git a/test/CodeGen/ARM/fp16-promote.ll b/test/CodeGen/ARM/fp16-promote.ll
new file mode 100644
index 000000000000..e691c2bb8a97
--- /dev/null
+++ b/test/CodeGen/ARM/fp16-promote.ll
@@ -0,0 +1,903 @@
+; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck %s -check-prefix=CHECK-FP16 -check-prefix=CHECK-ALL
+; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=CHECK-LIBCALL -check-prefix=CHECK-ALL
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "armv7-eabihf"
+
+; CHECK-FP16-LABEL: test_fadd:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vadd.f32
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_fadd:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: vadd.f32
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_fadd(half* %p, half* %q) #0 {
+ %a = load half, half* %p, align 2
+ %b = load half, half* %q, align 2
+ %r = fadd half %a, %b
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_fsub:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vsub.f32
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_fsub:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: vsub.f32
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_fsub(half* %p, half* %q) #0 {
+ %a = load half, half* %p, align 2
+ %b = load half, half* %q, align 2
+ %r = fsub half %a, %b
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_fmul:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vmul.f32
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_fmul
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: vmul.f32
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_fmul(half* %p, half* %q) #0 {
+ %a = load half, half* %p, align 2
+ %b = load half, half* %q, align 2
+ %r = fmul half %a, %b
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_fdiv:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vdiv.f32
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_fdiv
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: vdiv.f32
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_fdiv(half* %p, half* %q) #0 {
+ %a = load half, half* %p, align 2
+ %b = load half, half* %q, align 2
+ %r = fdiv half %a, %b
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_frem:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl fmodf
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_frem
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl fmodf
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_frem(half* %p, half* %q) #0 {
+ %a = load half, half* %p, align 2
+ %b = load half, half* %q, align 2
+ %r = frem half %a, %b
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-ALL-LABEL: test_load_store:
+; CHECK-ALL-NEXT: .fnstart
+; CHECK-ALL-NEXT: ldrh r0, [r0]
+; CHECK-ALL-NEXT: strh r0, [r1]
+; CHECK-ALL-NEXT: bx lr
+define void @test_load_store(half* %p, half* %q) #0 {
+ %a = load half, half* %p, align 2
+ store half %a, half* %q
+ ret void
+}
+
+; Testing only successfull compilation of function calls. In ARM ABI, half
+; args and returns are handled as f32.
+
+declare half @test_callee(half %a, half %b) #0
+
+; CHECK-ALL-LABEL: test_call:
+; CHECK-ALL-NEXT: .fnstart
+; CHECK-ALL-NEXT: push {r11, lr}
+; CHECK-ALL-NEXT: bl test_callee
+; CHECK-ALL-NEXT: pop {r11, pc}
+define half @test_call(half %a, half %b) #0 {
+ %r = call half @test_callee(half %a, half %b)
+ ret half %r
+}
+
+; CHECK-ALL-LABEL: test_call_flipped:
+; CHECK-ALL-NEXT: .fnstart
+; CHECK-ALL-NEXT: push {r11, lr}
+; CHECK-ALL-NEXT: mov r2, r0
+; CHECK-ALL-NEXT: mov r0, r1
+; CHECK-ALL-NEXT: mov r1, r2
+; CHECK-ALL-NEXT: bl test_callee
+; CHECK-ALL-NEXT: pop {r11, pc}
+define half @test_call_flipped(half %a, half %b) #0 {
+ %r = call half @test_callee(half %b, half %a)
+ ret half %r
+}
+
+; CHECK-ALL-LABEL: test_tailcall_flipped:
+; CHECK-ALL-NEXT: .fnstart
+; CHECK-ALL-NEXT: mov r2, r0
+; CHECK-ALL-NEXT: mov r0, r1
+; CHECK-ALL-NEXT: mov r1, r2
+; CHECK-ALL-NEXT: b test_callee
+define half @test_tailcall_flipped(half %a, half %b) #0 {
+ %r = tail call half @test_callee(half %b, half %a)
+ ret half %r
+}
+
+; Optimizer picks %p or %q based on %c and only loads that value
+; No conversion is needed
+; CHECK-ALL-LABEL: test_select:
+; CHECK-ALL-NEXT: .fnstart
+; CHECK-ALL-NEXT: cmp r2, #0
+; CHECK-ALL-NEXT: movne r1, r0
+; CHECK-ALL-NEXT: ldrh r1, [r1]
+; CHECK-ALL-NEXT: strh r1, [r0]
+; CHECK-ALL-NEXT: bx lr
+define void @test_select(half* %p, half* %q, i1 zeroext %c) #0 {
+ %a = load half, half* %p, align 2
+ %b = load half, half* %q, align 2
+ %r = select i1 %c, half %a, half %b
+ store half %r, half* %p
+ ret void
+}
+
+; Test only two variants of fcmp. These get translated to f32 vcmpe
+; instructions anyway.
+; CHECK-FP16-LABEL: test_fcmp_une:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcmpe.f32
+; CHECK-FP16: vmrs APSR_nzcv, fpscr
+; CHECK-FP16: movwne
+; CHECK-LIBCALL-LABEL: test_fcmp_une:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: vcmpe.f32
+; CHECK-LIBCALL: movwne
+define i1 @test_fcmp_une(half* %p, half* %q) #0 {
+ %a = load half, half* %p, align 2
+ %b = load half, half* %q, align 2
+ %r = fcmp une half %a, %b
+ ret i1 %r
+}
+
+; CHECK-FP16-LABEL: test_fcmp_ueq:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcmpe.f32
+; CHECK-FP16: vmrs APSR_nzcv, fpscr
+; CHECK-FP16: movweq
+; CHECK-FP16: movwvs
+; CHECK-LIBCALL-LABEL: test_fcmp_ueq:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: vcmpe.f32
+; CHECK-LIBCALL: movweq
+define i1 @test_fcmp_ueq(half* %p, half* %q) #0 {
+ %a = load half, half* %p, align 2
+ %b = load half, half* %q, align 2
+ %r = fcmp ueq half %a, %b
+ ret i1 %r
+}
+
+; CHECK-FP16-LABEL: test_br_cc:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcmpe.f32
+; CHECK-FP16: vmrs APSR_nzcv, fpscr
+; CHECK-FP16: strmi
+; CHECK-FP16: strpl
+; CHECK-LIBCALL-LABEL: test_br_cc:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: vcmpe.f32
+; CHECK-LIBCALL: strmi
+; CHECK-LIBCALL: strpl
+define void @test_br_cc(half* %p, half* %q, i32* %p1, i32* %p2) #0 {
+ %a = load half, half* %p, align 2
+ %b = load half, half* %q, align 2
+ %c = fcmp uge half %a, %b
+ br i1 %c, label %then, label %else
+then:
+ store i32 0, i32* %p1
+ ret void
+else:
+ store i32 0, i32* %p2
+ ret void
+}
+
+declare i1 @test_dummy(half* %p) #0
+; CHECK-FP16-LABEL: test_phi:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: [[LOOP:.LBB[1-9_]+]]:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl test_dummy
+; CHECK-FP16: bne [[LOOP]]
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_phi:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: [[LOOP:.LBB[1-9_]+]]:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl test_dummy
+; CHECK-LIBCALL: bne [[LOOP]]
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_phi(half* %p) #0 {
+entry:
+ %a = load half, half* %p
+ br label %loop
+loop:
+ %r = phi half [%a, %entry], [%b, %loop]
+ %b = load half, half* %p
+ %c = call i1 @test_dummy(half* %p)
+ br i1 %c, label %loop, label %return
+return:
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_fptosi_i32:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvt.s32.f32
+; CHECK-LIBCALL-LABEL: test_fptosi_i32:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: vcvt.s32.f32
+define i32 @test_fptosi_i32(half* %p) #0 {
+ %a = load half, half* %p, align 2
+ %r = fptosi half %a to i32
+ ret i32 %r
+}
+
+; CHECK-FP16-LABEL: test_fptosi_i64:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl __aeabi_f2lz
+; CHECK-LIBCALL-LABEL: test_fptosi_i64:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_f2lz
+define i64 @test_fptosi_i64(half* %p) #0 {
+ %a = load half, half* %p, align 2
+ %r = fptosi half %a to i64
+ ret i64 %r
+}
+
+; CHECK-FP16-LABEL: test_fptoui_i32:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvt.u32.f32
+; CHECK-LIBCALL-LABEL: test_fptoui_i32:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: vcvt.u32.f32
+define i32 @test_fptoui_i32(half* %p) #0 {
+ %a = load half, half* %p, align 2
+ %r = fptoui half %a to i32
+ ret i32 %r
+}
+
+; CHECK-FP16-LABEL: test_fptoui_i64:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl __aeabi_f2ulz
+; CHECK-LIBCALL-LABEL: test_fptoui_i64:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_f2ulz
+define i64 @test_fptoui_i64(half* %p) #0 {
+ %a = load half, half* %p, align 2
+ %r = fptoui half %a to i64
+ ret i64 %r
+}
+
+; CHECK-FP16-LABEL: test_sitofp_i32:
+; CHECK-FP16: vcvt.f32.s32
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_sitofp_i32:
+; CHECK-LIBCALL: vcvt.f32.s32
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_sitofp_i32(i32 %a, half* %p) #0 {
+ %r = sitofp i32 %a to half
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_uitofp_i32:
+; CHECK-FP16: vcvt.f32.u32
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_uitofp_i32:
+; CHECK-LIBCALL: vcvt.f32.u32
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_uitofp_i32(i32 %a, half* %p) #0 {
+ %r = uitofp i32 %a to half
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_sitofp_i64:
+; CHECK-FP16: bl __aeabi_l2f
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_sitofp_i64:
+; CHECK-LIBCALL: bl __aeabi_l2f
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_sitofp_i64(i64 %a, half* %p) #0 {
+ %r = sitofp i64 %a to half
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_uitofp_i64:
+; CHECK-FP16: bl __aeabi_ul2f
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_uitofp_i64:
+; CHECK-LIBCALL: bl __aeabi_ul2f
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_uitofp_i64(i64 %a, half* %p) #0 {
+ %r = uitofp i64 %a to half
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_fptrunc_float:
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_fptrunc_float:
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_fptrunc_float(float %f, half* %p) #0 {
+ %a = fptrunc float %f to half
+ store half %a, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_fptrunc_double:
+; CHECK-FP16: bl __aeabi_d2h
+; CHECK-LIBCALL-LABEL: test_fptrunc_double:
+; CHECK-LIBCALL: bl __aeabi_d2h
+define void @test_fptrunc_double(double %d, half* %p) #0 {
+ %a = fptrunc double %d to half
+ store half %a, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_fpextend_float:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-LIBCALL-LABEL: test_fpextend_float:
+; CHECK-LIBCALL: b __gnu_h2f_ieee
+define float @test_fpextend_float(half* %p) {
+ %a = load half, half* %p, align 2
+ %r = fpext half %a to float
+ ret float %r
+}
+
+; CHECK-FP16-LABEL: test_fpextend_double:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvt.f64.f32
+; CHECK-LIBCALL-LABEL: test_fpextend_double:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: vcvt.f64.f32
+define double @test_fpextend_double(half* %p) {
+ %a = load half, half* %p, align 2
+ %r = fpext half %a to double
+ ret double %r
+}
+
+; CHECK-ALL-LABEL: test_bitcast_halftoi16:
+; CHECK-ALL-NEXT: .fnstart
+; CHECK-ALL-NEXT: ldrh r0, [r0]
+; CHECK-ALL-NEXT: bx lr
+define i16 @test_bitcast_halftoi16(half* %p) #0 {
+ %a = load half, half* %p, align 2
+ %r = bitcast half %a to i16
+ ret i16 %r
+}
+
+; CHECK-ALL-LABEL: test_bitcast_i16tohalf:
+; CHECK-ALL-NEXT: .fnstart
+; CHECK-ALL-NEXT: strh r0, [r1]
+; CHECK-ALL-NEXT: bx lr
+define void @test_bitcast_i16tohalf(i16 %a, half* %p) #0 {
+ %r = bitcast i16 %a to half
+ store half %r, half* %p
+ ret void
+}
+
+declare half @llvm.sqrt.f16(half %a) #0
+declare half @llvm.powi.f16(half %a, i32 %b) #0
+declare half @llvm.sin.f16(half %a) #0
+declare half @llvm.cos.f16(half %a) #0
+declare half @llvm.pow.f16(half %a, half %b) #0
+declare half @llvm.exp.f16(half %a) #0
+declare half @llvm.exp2.f16(half %a) #0
+declare half @llvm.log.f16(half %a) #0
+declare half @llvm.log10.f16(half %a) #0
+declare half @llvm.log2.f16(half %a) #0
+declare half @llvm.fma.f16(half %a, half %b, half %c) #0
+declare half @llvm.fabs.f16(half %a) #0
+declare half @llvm.minnum.f16(half %a, half %b) #0
+declare half @llvm.maxnum.f16(half %a, half %b) #0
+declare half @llvm.copysign.f16(half %a, half %b) #0
+declare half @llvm.floor.f16(half %a) #0
+declare half @llvm.ceil.f16(half %a) #0
+declare half @llvm.trunc.f16(half %a) #0
+declare half @llvm.rint.f16(half %a) #0
+declare half @llvm.nearbyint.f16(half %a) #0
+declare half @llvm.round.f16(half %a) #0
+declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
+
+; CHECK-FP16-LABEL: test_sqrt:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vsqrt.f32
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_sqrt:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: vsqrt.f32
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_sqrt(half* %p) #0 {
+ %a = load half, half* %p, align 2
+ %r = call half @llvm.sqrt.f16(half %a)
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_fpowi:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl __powisf2
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_fpowi:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __powisf2
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_fpowi(half* %p, i32 %b) #0 {
+ %a = load half, half* %p, align 2
+ %r = call half @llvm.powi.f16(half %a, i32 %b)
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_sin:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl sinf
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_sin:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl sinf
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_sin(half* %p) #0 {
+ %a = load half, half* %p, align 2
+ %r = call half @llvm.sin.f16(half %a)
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_cos:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl cosf
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_cos:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl cosf
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_cos(half* %p) #0 {
+ %a = load half, half* %p, align 2
+ %r = call half @llvm.cos.f16(half %a)
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_pow:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl powf
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_pow:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl powf
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_pow(half* %p, half* %q) #0 {
+ %a = load half, half* %p, align 2
+ %b = load half, half* %q, align 2
+ %r = call half @llvm.pow.f16(half %a, half %b)
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_exp:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl expf
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_exp:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl expf
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_exp(half* %p) #0 {
+ %a = load half, half* %p, align 2
+ %r = call half @llvm.exp.f16(half %a)
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_exp2:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl exp2f
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_exp2:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl exp2f
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_exp2(half* %p) #0 {
+ %a = load half, half* %p, align 2
+ %r = call half @llvm.exp2.f16(half %a)
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_log:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl logf
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_log:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl logf
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_log(half* %p) #0 {
+ %a = load half, half* %p, align 2
+ %r = call half @llvm.log.f16(half %a)
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_log10:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl log10f
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_log10:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl log10f
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_log10(half* %p) #0 {
+ %a = load half, half* %p, align 2
+ %r = call half @llvm.log10.f16(half %a)
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_log2:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl log2f
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_log2:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl log2f
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_log2(half* %p) #0 {
+ %a = load half, half* %p, align 2
+ %r = call half @llvm.log2.f16(half %a)
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_fma:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl fmaf
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_fma:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl fmaf
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_fma(half* %p, half* %q, half* %r) #0 {
+ %a = load half, half* %p, align 2
+ %b = load half, half* %q, align 2
+ %c = load half, half* %r, align 2
+ %v = call half @llvm.fma.f16(half %a, half %b, half %c)
+ store half %v, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_fabs:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vabs.f32
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_fabs:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bfc
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_fabs(half* %p) {
+ %a = load half, half* %p, align 2
+ %r = call half @llvm.fabs.f16(half %a)
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_minnum:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl fminf
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_minnum:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl fminf
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_minnum(half* %p, half* %q) #0 {
+ %a = load half, half* %p, align 2
+ %b = load half, half* %q, align 2
+ %r = call half @llvm.minnum.f16(half %a, half %b)
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_maxnum:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl fmaxf
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_maxnum:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl fmaxf
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_maxnum(half* %p, half* %q) #0 {
+ %a = load half, half* %p, align 2
+ %b = load half, half* %q, align 2
+ %r = call half @llvm.maxnum.f16(half %a, half %b)
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_copysign:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vbsl
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_copysign:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: vbsl
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_copysign(half* %p, half* %q) #0 {
+ %a = load half, half* %p, align 2
+ %b = load half, half* %q, align 2
+ %r = call half @llvm.copysign.f16(half %a, half %b)
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_floor:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl floorf
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_floor:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl floorf
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_floor(half* %p) {
+ %a = load half, half* %p, align 2
+ %r = call half @llvm.floor.f16(half %a)
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_ceil:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl ceilf
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_ceil:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl ceilf
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_ceil(half* %p) {
+ %a = load half, half* %p, align 2
+ %r = call half @llvm.ceil.f16(half %a)
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_trunc:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl truncf
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_trunc:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl truncf
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_trunc(half* %p) {
+ %a = load half, half* %p, align 2
+ %r = call half @llvm.trunc.f16(half %a)
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_rint:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl rintf
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_rint:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl rintf
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_rint(half* %p) {
+ %a = load half, half* %p, align 2
+ %r = call half @llvm.rint.f16(half %a)
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_nearbyint:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl nearbyintf
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_nearbyint:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl nearbyintf
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_nearbyint(half* %p) {
+ %a = load half, half* %p, align 2
+ %r = call half @llvm.nearbyint.f16(half %a)
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_round:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: bl roundf
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_round:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl roundf
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_round(half* %p) {
+ %a = load half, half* %p, align 2
+ %r = call half @llvm.round.f16(half %a)
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_fmuladd:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vmla.f32
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL-LABEL: test_fmuladd:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: vmla.f32
+; CHECK-LIBCALL: bl __gnu_f2h_ieee
+define void @test_fmuladd(half* %p, half* %q, half* %r) #0 {
+ %a = load half, half* %p, align 2
+ %b = load half, half* %q, align 2
+ %c = load half, half* %r, align 2
+ %v = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
+ store half %v, half* %p
+ ret void
+}
+
+; f16 vectors are not legal in the backend. Vector elements are not assigned
+; to the register, but are stored in the stack instead. Hence insertelement
+; and extractelement have these extra loads and stores.
+
+; CHECK-ALL-LABEL: test_insertelement:
+; CHECK-ALL-NEXT: .fnstart
+; CHECK-ALL-NEXT: sub sp, sp, #8
+; CHECK-ALL-NEXT: ldrh
+; CHECK-ALL-NEXT: strh
+; CHECK-ALL-NEXT: ldrh
+; CHECK-ALL-NEXT: strh
+; CHECK-ALL-NEXT: ldrh
+; CHECK-ALL-NEXT: strh
+; CHECK-ALL-NEXT: ldrh
+; CHECK-ALL-NEXT: strh
+; CHECK-ALL-NEXT: mov
+; CHECK-ALL-NEXT: ldrh
+; CHECK-ALL-NEXT: add
+; CHECK-ALL-NEXT: strh
+; CHECK-ALL-NEXT: ldrh
+; CHECK-ALL-NEXT: strh
+; CHECK-ALL-NEXT: ldrh
+; CHECK-ALL-NEXT: strh
+; CHECK-ALL-NEXT: ldrh
+; CHECK-ALL-NEXT: strh
+; CHECK-ALL-NEXT: ldrh
+; CHECK-ALL-NEXT: strh
+; CHECK-ALL-NEXT: add sp, sp, #8
+; CHECK-ALL-NEXT: bx lr
+define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 {
+ %a = load half, half* %p, align 2
+ %b = load <4 x half>, <4 x half>* %q, align 8
+ %c = insertelement <4 x half> %b, half %a, i32 %i
+ store <4 x half> %c, <4 x half>* %q
+ ret void
+}
+
+; CHECK-ALL-LABEL: test_extractelement:
+; CHECK-ALL-NEXT: .fnstart
+; CHECK-ALL-NEXT: sub sp, sp, #8
+; CHECK-ALL-NEXT: ldrh
+; CHECK-ALL-NEXT: ldrh
+; CHECK-ALL-NEXT: orr
+; CHECK-ALL-NEXT: str
+; CHECK-ALL-NEXT: ldrh
+; CHECK-ALL-NEXT: ldrh
+; CHECK-ALL-NEXT: orr
+; CHECK-ALL-NEXT: str
+; CHECK-ALL-NEXT: mov
+; CHECK-ALL-NEXT: add
+; CHECK-ALL-NEXT: ldrh
+; CHECK-ALL-NEXT: strh
+; CHECK-ALL-NEXT: add sp, sp, #8
+; CHECK-ALL-NEXT: bx lr
+define void @test_extractelement(half* %p, <4 x half>* %q, i32 %i) #0 {
+ %a = load <4 x half>, <4 x half>* %q, align 8
+ %b = extractelement <4 x half> %a, i32 %i
+ store half %b, half* %p
+ ret void
+}
+
+; test struct operations
+
+%struct.dummy = type { i32, half }
+
+; CHECK-ALL-LABEL: test_insertvalue:
+; CHECK-ALL-NEXT: .fnstart
+; CHECK-ALL-NEXT: ldr
+; CHECK-ALL-NEXT: ldrh
+; CHECK-ALL-NEXT: strh
+; CHECK-ALL-NEXT: str
+; CHECK-ALL-NEXT: bx lr
+define void @test_insertvalue(%struct.dummy* %p, half* %q) {
+ %a = load %struct.dummy, %struct.dummy* %p
+ %b = load half, half* %q
+ %c = insertvalue %struct.dummy %a, half %b, 1
+ store %struct.dummy %c, %struct.dummy* %p
+ ret void
+}
+
+; CHECK-ALL-LABEL: test_extractvalue:
+; CHECK-ALL-NEXT: .fnstart
+; CHECK-ALL-NEXT: ldrh
+; CHECK-ALL-NEXT: strh
+; CHECK-ALL-NEXT: bx lr
+define void @test_extractvalue(%struct.dummy* %p, half* %q) {
+ %a = load %struct.dummy, %struct.dummy* %p
+ %b = extractvalue %struct.dummy %a, 1
+ store half %b, half* %q
+ ret void
+}
+
+; CHECK-FP16-LABEL: test_struct_return:
+; CHECK-FP16: vcvtb.f32.f16
+; CHECK-LIBCALL-LABEL: test_struct_return:
+; CHECK-LIBCALL: bl __gnu_h2f_ieee
+define %struct.dummy @test_struct_return(%struct.dummy* %p) {
+ %a = load %struct.dummy, %struct.dummy* %p
+ ret %struct.dummy %a
+}
+
+; CHECK-ALL-LABEL: test_struct_arg:
+; CHECK-ALL-NEXT: .fnstart
+; CHECK-ALL-NEXT: mov r0, r1
+; CHECK-ALL-NEXT: bx lr
+define half @test_struct_arg(%struct.dummy %p) {
+ %a = extractvalue %struct.dummy %p, 1
+ ret half %a
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/ARM/fp16.ll b/test/CodeGen/ARM/fp16.ll
index 5a926acc5430..25fbf9070cb6 100644
--- a/test/CodeGen/ARM/fp16.ll
+++ b/test/CodeGen/ARM/fp16.ll
@@ -16,8 +16,8 @@ define void @foo() nounwind {
; CHECK-ARMV8-LABEL: foo:
; CHECK-SOFTFLOAT-LABEL: foo:
entry:
- %0 = load i16* @x, align 2
- %1 = load i16* @y, align 2
+ %0 = load i16, i16* @x, align 2
+ %1 = load i16, i16* @y, align 2
%2 = tail call float @llvm.convert.from.fp16.f32(i16 %0)
; CHECK: __gnu_h2f_ieee
; CHECK-FP16: vcvtb.f32.f16
diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll
index eab5988e3eb4..45bb6d2f702d 100644
--- a/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/test/CodeGen/ARM/fpcmp-opt.ll
@@ -13,8 +13,8 @@ entry:
; CHECK: vcmpe.f32 [[S1]], [[S0]]
; CHECK: vmrs APSR_nzcv, fpscr
; CHECK: beq
- %0 = load float* %a
- %1 = load float* %b
+ %0 = load float, float* %a
+ %1 = load float, float* %b
%2 = fcmp une float %0, %1
br i1 %2, label %bb1, label %bb2
@@ -41,7 +41,7 @@ entry:
; CHECK-NOT: vcmpe.f32
; CHECK-NOT: vmrs
; CHECK: bne
- %0 = load double* %a
+ %0 = load double, double* %a
%1 = fcmp oeq double %0, 0.000000e+00
br i1 %1, label %bb1, label %bb2
@@ -64,7 +64,7 @@ entry:
; CHECK-NOT: vcmpe.f32
; CHECK-NOT: vmrs
; CHECK: bne
- %0 = load float* %a
+ %0 = load float, float* %a
%1 = fcmp oeq float %0, 0.000000e+00
br i1 %1, label %bb1, label %bb2
diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll
index 3a454ed9631c..23fbea911e5e 100644
--- a/test/CodeGen/ARM/fpmem.ll
+++ b/test/CodeGen/ARM/fpmem.ll
@@ -9,7 +9,7 @@ define float @f1(float %a) {
define float @f2(float* %v, float %u) {
; CHECK-LABEL: f2:
; CHECK: vldr{{.*}}[
- %tmp = load float* %v ; <float> [#uses=1]
+ %tmp = load float, float* %v ; <float> [#uses=1]
%tmp1 = fadd float %tmp, %u ; <float> [#uses=1]
ret float %tmp1
}
@@ -17,8 +17,8 @@ define float @f2(float* %v, float %u) {
define float @f2offset(float* %v, float %u) {
; CHECK-LABEL: f2offset:
; CHECK: vldr{{.*}}, #4]
- %addr = getelementptr float* %v, i32 1
- %tmp = load float* %addr
+ %addr = getelementptr float, float* %v, i32 1
+ %tmp = load float, float* %addr
%tmp1 = fadd float %tmp, %u
ret float %tmp1
}
@@ -26,8 +26,8 @@ define float @f2offset(float* %v, float %u) {
define float @f2noffset(float* %v, float %u) {
; CHECK-LABEL: f2noffset:
; CHECK: vldr{{.*}}, #-4]
- %addr = getelementptr float* %v, i32 -1
- %tmp = load float* %addr
+ %addr = getelementptr float, float* %v, i32 -1
+ %tmp = load float, float* %addr
%tmp1 = fadd float %tmp, %u
ret float %tmp1
}
diff --git a/test/CodeGen/ARM/fptoint.ll b/test/CodeGen/ARM/fptoint.ll
index f50d0b96fe99..6cbb30b23fbe 100644
--- a/test/CodeGen/ARM/fptoint.ll
+++ b/test/CodeGen/ARM/fptoint.ll
@@ -4,13 +4,13 @@
@u = weak global i32 0 ; <i32*> [#uses=2]
define i32 @foo1(float *%x) {
- %tmp1 = load float* %x
+ %tmp1 = load float, float* %x
%tmp2 = bitcast float %tmp1 to i32
ret i32 %tmp2
}
define i64 @foo2(double *%x) {
- %tmp1 = load double* %x
+ %tmp1 = load double, double* %x
%tmp2 = bitcast double %tmp1 to i64
ret i64 %tmp2
}
diff --git a/test/CodeGen/ARM/frame-register.ll b/test/CodeGen/ARM/frame-register.ll
index e6a55bddaf1c..0cc5005ec488 100644
--- a/test/CodeGen/ARM/frame-register.ll
+++ b/test/CodeGen/ARM/frame-register.ll
@@ -17,12 +17,12 @@ entry:
%i.addr = alloca i32, align 4
%j = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
- %0 = load i32* %i.addr, align 4
+ %0 = load i32, i32* %i.addr, align 4
%add = add nsw i32 %0, 1
store i32 %add, i32* %j, align 4
- %1 = load i32* %j, align 4
+ %1 = load i32, i32* %j, align 4
call void @callee(i32 %1)
- %2 = load i32* %j, align 4
+ %2 = load i32, i32* %j, align 4
%add1 = add nsw i32 %2, 1
ret i32 %add1
}
@@ -30,9 +30,9 @@ entry:
; CHECK-ARM: push {r11, lr}
; CHECK-ARM: mov r11, sp
-; CHECK-THUMB: push {r4, r6, r7, lr}
-; CHECK-THUMB: add r7, sp, #8
+; CHECK-THUMB: push {r7, lr}
+; CHECK-THUMB: add r7, sp, #0
; CHECK-DARWIN-ARM: push {r7, lr}
-; CHECK-DARWIN-THUMB: push {r4, r7, lr}
+; CHECK-DARWIN-THUMB: push {r7, lr}
diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll
index e29f291dc2c5..6f6cdc11491e 100644
--- a/test/CodeGen/ARM/fusedMAC.ll
+++ b/test/CodeGen/ARM/fusedMAC.ll
@@ -144,7 +144,7 @@ entry:
define float @test_fnms_f32(float %a, float %b, float* %c) nounwind readnone ssp {
; CHECK: test_fnms_f32
; CHECK: vfnms.f32
- %tmp1 = load float* %c, align 4
+ %tmp1 = load float, float* %c, align 4
%tmp2 = fsub float -0.0, %tmp1
%tmp3 = tail call float @llvm.fma.f32(float %a, float %b, float %tmp2) nounwind readnone
ret float %tmp3
diff --git a/test/CodeGen/ARM/ghc-tcreturn-lowered.ll b/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
index 623b4220c21f..f34f8f1a66c1 100644
--- a/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
+++ b/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
@@ -15,7 +15,7 @@ define ghccc void @test_direct_tail() {
define ghccc void @test_indirect_tail() {
; CHECK-LABEL: test_indirect_tail:
; CHECK: bx {{r[0-9]+}}
- %func = load void()** @ind_func
- tail call ghccc void()* %func()
+ %func = load void()*, void()** @ind_func
+ tail call ghccc void() %func()
ret void
}
diff --git a/test/CodeGen/ARM/global-merge-1.ll b/test/CodeGen/ARM/global-merge-1.ll
index e5d4def938df..d4d9b0f9d1f3 100644
--- a/test/CodeGen/ARM/global-merge-1.ll
+++ b/test/CodeGen/ARM/global-merge-1.ll
@@ -1,9 +1,12 @@
; RUN: llc %s -O0 -o - | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O0 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O0 -o - -enable-global-merge=true | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O1 -o - | FileCheck -check-prefix=MERGE %s
-; RUN: llc %s -O1 -o - -enable-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
-; RUN: llc %s -O1 -o - -enable-global-merge=true | FileCheck -check-prefix=MERGE %s
+; RUN: llc %s -O0 -o - -arm-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O0 -o - -arm-global-merge=true | FileCheck -check-prefix=MERGE %s
+; RUN: llc %s -O1 -o - | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O1 -o - -arm-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O1 -o - -arm-global-merge=true | FileCheck -check-prefix=MERGE %s
+; RUN: llc %s -O3 -o - | FileCheck -check-prefix=MERGE %s
+; RUN: llc %s -O3 -o - -arm-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
+; RUN: llc %s -O3 -o - -arm-global-merge=true | FileCheck -check-prefix=MERGE %s
; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
@@ -29,25 +32,25 @@ target triple = "thumbv7-apple-ios3.0.0"
; Function Attrs: nounwind ssp
define internal void @initialize() #0 {
%1 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
- store i32 %1, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 0), align 4, !tbaa !1
+ store i32 %1, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i32 0, i32 0), align 4, !tbaa !1
%2 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
- store i32 %2, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 0), align 4, !tbaa !1
+ store i32 %2, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i32 0, i32 0), align 4, !tbaa !1
%3 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
- store i32 %3, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 1), align 4, !tbaa !1
+ store i32 %3, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i32 0, i32 1), align 4, !tbaa !1
%4 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
- store i32 %4, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 1), align 4, !tbaa !1
+ store i32 %4, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i32 0, i32 1), align 4, !tbaa !1
%5 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
- store i32 %5, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 2), align 4, !tbaa !1
+ store i32 %5, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i32 0, i32 2), align 4, !tbaa !1
%6 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
- store i32 %6, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 2), align 4, !tbaa !1
+ store i32 %6, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i32 0, i32 2), align 4, !tbaa !1
%7 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
- store i32 %7, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 3), align 4, !tbaa !1
+ store i32 %7, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i32 0, i32 3), align 4, !tbaa !1
%8 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
- store i32 %8, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 3), align 4, !tbaa !1
+ store i32 %8, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i32 0, i32 3), align 4, !tbaa !1
%9 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
- store i32 %9, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1
+ store i32 %9, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1
%10 = tail call i32 bitcast (i32 (...)* @calc to i32 ()*)() #3
- store i32 %10, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1
+ store i32 %10, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1
ret void
}
@@ -55,20 +58,20 @@ declare i32 @calc(...) #1
; Function Attrs: nounwind ssp
define internal void @calculate() #0 {
- %1 = load <4 x i32>* bitcast ([5 x i32]* @bar to <4 x i32>*), align 4
- %2 = load <4 x i32>* bitcast ([5 x i32]* @baz to <4 x i32>*), align 4
+ %1 = load <4 x i32>, <4 x i32>* bitcast ([5 x i32]* @bar to <4 x i32>*), align 4
+ %2 = load <4 x i32>, <4 x i32>* bitcast ([5 x i32]* @baz to <4 x i32>*), align 4
%3 = mul <4 x i32> %2, %1
store <4 x i32> %3, <4 x i32>* bitcast ([5 x i32]* @foo to <4 x i32>*), align 4
- %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1
- %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1
+ %4 = load i32, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1
+ %5 = load i32, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1
%6 = mul nsw i32 %5, %4
- store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 4), align 4, !tbaa !1
+ store i32 %6, i32* getelementptr inbounds ([5 x i32], [5 x i32]* @foo, i32 0, i32 4), align 4, !tbaa !1
ret void
}
; Function Attrs: nounwind readnone ssp
define internal i32* @returnFoo() #2 {
- ret i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 0)
+ ret i32* getelementptr inbounds ([5 x i32], [5 x i32]* @foo, i32 0, i32 0)
}
attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/ARM/global-merge-addrspace.ll b/test/CodeGen/ARM/global-merge-addrspace.ll
index 0efa690bde28..8f40a4c28a76 100644
--- a/test/CodeGen/ARM/global-merge-addrspace.ll
+++ b/test/CodeGen/ARM/global-merge-addrspace.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-apple-darwin -arm-global-merge -global-merge-group-by-use=false | FileCheck %s
; Test the GlobalMerge pass. Check that the pass does not crash when using
; multiple address spaces.
@@ -7,6 +7,6 @@
@g2 = internal addrspace(1) global i32 2
-; CHECK: _MergedGlobals1:
+; CHECK: _MergedGlobals.1:
@g3 = internal addrspace(2) global i32 3
@g4 = internal addrspace(2) global i32 4
diff --git a/test/CodeGen/ARM/global-merge.ll b/test/CodeGen/ARM/global-merge.ll
index f88e92796196..e8c8289098a7 100644
--- a/test/CodeGen/ARM/global-merge.ll
+++ b/test/CodeGen/ARM/global-merge.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumb-apple-darwin -global-merge-on-const=true | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-apple-darwin -arm-global-merge -global-merge-group-by-use=false -global-merge-on-const=true | FileCheck %s
; Test the ARMGlobalMerge pass. Use -march=thumb because it has a small
; value for the maximum offset (127).
@@ -59,16 +59,16 @@ declare void @__cxa_end_catch()
; Make sure that the complete variable fits within the range of the maximum
; offset. Having the starting offset in range is not sufficient.
; When this works properly, @g3 is placed in a separate chunk of merged globals.
-; CHECK: _MergedGlobals1:
+; CHECK: _MergedGlobals.1:
@g3 = internal global [30 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 ], align 4
; Global variables that can be placed in BSS should be kept together in a
; separate pool of merged globals.
-; CHECK: _MergedGlobals2
+; CHECK: _MergedGlobals.2
@g4 = internal global i32 0
@g5 = internal global i32 0
; Global variables that are constant can be merged together
-; CHECK: _MergedGlobals3
+; CHECK: _MergedGlobals.3
@g6 = internal constant [12 x i32] zeroinitializer, align 4
@g7 = internal constant [12 x i32] zeroinitializer, align 4
diff --git a/test/CodeGen/ARM/globals.ll b/test/CodeGen/ARM/globals.ll
index 2c599bf011a7..bab96dadce55 100644
--- a/test/CodeGen/ARM/globals.ll
+++ b/test/CodeGen/ARM/globals.ll
@@ -6,7 +6,7 @@
@G = external global i32
define i32 @test1() {
- %tmp = load i32* @G
+ %tmp = load i32, i32* @G
ret i32 %tmp
}
diff --git a/test/CodeGen/ARM/gv-stubs-crash.ll b/test/CodeGen/ARM/gv-stubs-crash.ll
index c4c4180a6200..6e82afeacf88 100644
--- a/test/CodeGen/ARM/gv-stubs-crash.ll
+++ b/test/CodeGen/ARM/gv-stubs-crash.ll
@@ -4,7 +4,7 @@
@Exn = external hidden unnamed_addr constant { i8*, i8* }
define hidden void @func(i32* %this, i32* %e) optsize align 2 {
- %e.ld = load i32* %e, align 4
+ %e.ld = load i32, i32* %e, align 4
%inv = invoke zeroext i1 @func2(i32* %this, i32 %e.ld) optsize
to label %ret unwind label %lpad
diff --git a/test/CodeGen/ARM/half.ll b/test/CodeGen/ARM/half.ll
index 10cebb38c565..b40eaf638519 100644
--- a/test/CodeGen/ARM/half.ll
+++ b/test/CodeGen/ARM/half.ll
@@ -6,7 +6,7 @@ define void @test_load_store(half* %in, half* %out) {
; CHECK-LABEL: test_load_store:
; CHECK: ldrh [[TMP:r[0-9]+]], [r0]
; CHECK: strh [[TMP]], [r1]
- %val = load half* %in
+ %val = load half, half* %in
store half %val, half* %out
ret void
}
@@ -14,7 +14,7 @@ define void @test_load_store(half* %in, half* %out) {
define i16 @test_bitcast_from_half(half* %addr) {
; CHECK-LABEL: test_bitcast_from_half:
; CHECK: ldrh r0, [r0]
- %val = load half* %addr
+ %val = load half, half* %addr
%val_int = bitcast half %val to i16
ret i16 %val_int
}
@@ -30,10 +30,10 @@ define void @test_bitcast_to_half(half* %addr, i16 %in) {
define float @test_extend32(half* %addr) {
; CHECK-LABEL: test_extend32:
-; CHECK-OLD: b.w ___gnu_h2f_ieee
+; CHECK-OLD: b.w ___extendhfsf2
; CHECK-F16: vcvtb.f32.f16
; CHECK-V8: vcvtb.f32.f16
- %val16 = load half* %addr
+ %val16 = load half, half* %addr
%val32 = fpext half %val16 to float
ret float %val32
}
@@ -41,12 +41,12 @@ define float @test_extend32(half* %addr) {
define double @test_extend64(half* %addr) {
; CHECK-LABEL: test_extend64:
-; CHECK-OLD: blx ___gnu_h2f_ieee
+; CHECK-OLD: blx ___extendhfsf2
; CHECK-OLD: vcvt.f64.f32
; CHECK-F16: vcvtb.f32.f16
; CHECK-F16: vcvt.f64.f32
; CHECK-V8: vcvtb.f64.f16
- %val16 = load half* %addr
+ %val16 = load half, half* %addr
%val32 = fpext half %val16 to double
ret double %val32
}
@@ -54,7 +54,7 @@ define double @test_extend64(half* %addr) {
define void @test_trunc32(float %in, half* %addr) {
; CHECK-LABEL: test_trunc32:
-; CHECK-OLD: blx ___gnu_f2h_ieee
+; CHECK-OLD: blx ___truncsfhf2
; CHECK-F16: vcvtb.f16.f32
; CHECK-V8: vcvtb.f16.f32
%val16 = fptrunc float %in to half
diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll
index d2685854dba0..08e6104bbcf6 100644
--- a/test/CodeGen/ARM/hello.ll
+++ b/test/CodeGen/ARM/hello.ll
@@ -10,7 +10,7 @@
@str = internal constant [12 x i8] c"Hello World\00"
define i32 @main() {
- %tmp = call i32 @puts( i8* getelementptr ([12 x i8]* @str, i32 0, i64 0) ) ; <i32> [#uses=0]
+ %tmp = call i32 @puts( i8* getelementptr ([12 x i8], [12 x i8]* @str, i32 0, i64 0) ) ; <i32> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/ARM/hidden-vis-2.ll b/test/CodeGen/ARM/hidden-vis-2.ll
index 18d38d40072c..a104f354295d 100644
--- a/test/CodeGen/ARM/hidden-vis-2.ll
+++ b/test/CodeGen/ARM/hidden-vis-2.ll
@@ -7,6 +7,6 @@ entry:
; CHECK-LABEL: t:
; CHECK: ldr
; CHECK-NEXT: ldr
- %0 = load i32* @x, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* @x, align 4 ; <i32> [#uses=1]
ret i32 %0
}
diff --git a/test/CodeGen/ARM/hidden-vis-3.ll b/test/CodeGen/ARM/hidden-vis-3.ll
index 3bc3312e9c4e..0cf2f779704d 100644
--- a/test/CodeGen/ARM/hidden-vis-3.ll
+++ b/test/CodeGen/ARM/hidden-vis-3.ll
@@ -10,8 +10,8 @@ entry:
; CHECK: LCPI0_1:
; CHECK-NEXT: .long _y
- %0 = load i32* @x, align 4 ; <i32> [#uses=1]
- %1 = load i32* @y, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* @x, align 4 ; <i32> [#uses=1]
+ %1 = load i32, i32* @y, align 4 ; <i32> [#uses=1]
%2 = add i32 %1, %0 ; <i32> [#uses=1]
ret i32 %2
}
diff --git a/test/CodeGen/ARM/ifconv-kills.ll b/test/CodeGen/ARM/ifconv-kills.ll
index de80c927cea5..3a458e481936 100644
--- a/test/CodeGen/ARM/ifconv-kills.ll
+++ b/test/CodeGen/ARM/ifconv-kills.ll
@@ -10,15 +10,15 @@ entry:
; present something which can be easily if-converted
if.then:
; %R0 should be killed here
- %valt = load i32* %ptr, align 4
+ %valt = load i32, i32* %ptr, align 4
br label %return
if.else:
; %R0 should be killed here, however after if-conversion the %R0 kill
; has to be removed because if.then will follow after this and still
; read it.
- %addr = getelementptr inbounds i32* %ptr, i32 4
- %vale = load i32* %addr, align 4
+ %addr = getelementptr inbounds i32, i32* %ptr, i32 4
+ %vale = load i32, i32* %addr, align 4
br label %return
return:
diff --git a/test/CodeGen/ARM/ifconv-regmask.ll b/test/CodeGen/ARM/ifconv-regmask.ll
index d45f65f9567f..2144ca6e4074 100644
--- a/test/CodeGen/ARM/ifconv-regmask.ll
+++ b/test/CodeGen/ARM/ifconv-regmask.ll
@@ -7,7 +7,7 @@
; Function Attrs: nounwind ssp
define i32 @sfu() {
entry:
- %bf.load = load i32* getelementptr inbounds (%union.opcode* @opcode, i32 0, i32 0), align 4
+ %bf.load = load i32, i32* getelementptr inbounds (%union.opcode, %union.opcode* @opcode, i32 0, i32 0), align 4
%bf.lshr = lshr i32 %bf.load, 26
%bf.clear = and i32 %bf.lshr, 7
switch i32 %bf.clear, label %return [
diff --git a/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll b/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll
index f76fd302774b..4e6924fe5b6b 100644
--- a/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll
+++ b/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll
@@ -50,7 +50,7 @@ if.else.i.i:
br label %_ZN1M6spliceEv.exit
_ZN1M6spliceEv.exit:
- %LIS = getelementptr inbounds %classK* %this, i32 0, i32 1
+ %LIS = getelementptr inbounds %classK, %classK* %this, i32 0, i32 1
call void @_ZN1F10handleMoveEb(%classF* %LIS, i1 zeroext false)
unreachable
}
diff --git a/test/CodeGen/ARM/ifcvt-branch-weight.ll b/test/CodeGen/ARM/ifcvt-branch-weight.ll
index 2d12a899f4b3..41d78e53acc7 100644
--- a/test/CodeGen/ARM/ifcvt-branch-weight.ll
+++ b/test/CodeGen/ARM/ifcvt-branch-weight.ll
@@ -3,16 +3,16 @@
%struct.S = type { i8* (i8*)*, [1 x i8] }
define internal zeroext i8 @bar(%struct.S* %x, %struct.S* nocapture %y) nounwind readonly {
entry:
- %0 = getelementptr inbounds %struct.S* %x, i32 0, i32 1, i32 0
- %1 = load i8* %0, align 1
+ %0 = getelementptr inbounds %struct.S, %struct.S* %x, i32 0, i32 1, i32 0
+ %1 = load i8, i8* %0, align 1
%2 = zext i8 %1 to i32
%3 = and i32 %2, 112
%4 = icmp eq i32 %3, 0
br i1 %4, label %return, label %bb
bb:
- %5 = getelementptr inbounds %struct.S* %y, i32 0, i32 1, i32 0
- %6 = load i8* %5, align 1
+ %5 = getelementptr inbounds %struct.S, %struct.S* %y, i32 0, i32 1, i32 0
+ %6 = load i8, i8* %5, align 1
%7 = zext i8 %6 to i32
%8 = and i32 %7, 112
%9 = icmp eq i32 %8, 0
diff --git a/test/CodeGen/ARM/ifcvt-dead-def.ll b/test/CodeGen/ARM/ifcvt-dead-def.ll
new file mode 100644
index 000000000000..77a3f5c0961f
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt-dead-def.ll
@@ -0,0 +1,55 @@
+; RUN: llc %s -o - -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7-unknown-unknown"
+
+%struct.ref_s = type { %union.v, i16, i16 }
+%union.v = type { i32 }
+%struct.gs_color_s = type { i16, i16, i16, i16, i8, i8 }
+
+; In this case, the if converter was cloning the return instruction so that we had
+; r2<def> = ...
+; return [pred] r2<dead,def>
+; ldr <r2, kill>
+; return
+; The problem here was that the dead def on the first return was making the machine verifier
+; think that the load read from an undefined register. We need to remove the dead flag from
+; the return, and also add an implicit use of the prior value of r2.
+
+; CHECK: ldrh
+
+; Function Attrs: minsize nounwind optsize ssp
+define i32 @test(%struct.ref_s* %pref1, %struct.ref_s* %pref2, %struct.gs_color_s** %tmp152) #0 {
+bb:
+ %nref = alloca %struct.ref_s, align 4
+ %tmp46 = call %struct.ref_s* @name_string_ref(%struct.ref_s* %pref1, %struct.ref_s* %nref) #2
+ %tmp153 = load %struct.gs_color_s*, %struct.gs_color_s** %tmp152, align 4
+ %tmp154 = bitcast %struct.ref_s* %pref2 to %struct.gs_color_s**
+ %tmp155 = load %struct.gs_color_s*, %struct.gs_color_s** %tmp154, align 4
+ %tmp162 = getelementptr inbounds %struct.gs_color_s, %struct.gs_color_s* %tmp153, i32 0, i32 1
+ %tmp163 = load i16, i16* %tmp162, align 2
+ %tmp164 = getelementptr inbounds %struct.gs_color_s, %struct.gs_color_s* %tmp155, i32 0, i32 1
+ %tmp165 = load i16, i16* %tmp164, align 2
+ %tmp166 = icmp eq i16 %tmp163, %tmp165
+ br i1 %tmp166, label %bb167, label %bb173
+
+bb167: ; preds = %bb
+ %tmp168 = getelementptr inbounds %struct.gs_color_s, %struct.gs_color_s* %tmp153, i32 0, i32 2
+ %tmp169 = load i16, i16* %tmp168, align 2
+ %tmp170 = getelementptr inbounds %struct.gs_color_s, %struct.gs_color_s* %tmp155, i32 0, i32 2
+ %tmp171 = load i16, i16* %tmp170, align 2
+ %tmp172 = icmp eq i16 %tmp169, %tmp171
+ br label %bb173
+
+bb173: ; preds = %bb167, %bb
+ %tmp174 = phi i1 [ false, %bb ], [ %tmp172, %bb167 ]
+ %tmp175 = zext i1 %tmp174 to i32
+ ret i32 %tmp175
+}
+
+; Function Attrs: minsize optsize
+declare %struct.ref_s* @name_string_ref(%struct.ref_s*, %struct.ref_s*) #1
+
+attributes #0 = { minsize nounwind optsize }
+attributes #1 = { minsize optsize }
+attributes #2 = { minsize nounwind optsize }
diff --git a/test/CodeGen/ARM/ifcvt-iter-indbr.ll b/test/CodeGen/ARM/ifcvt-iter-indbr.ll
new file mode 100644
index 000000000000..75e9d77d7920
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt-iter-indbr.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple thumbv7s-apple-darwin -asm-verbose=false | FileCheck %s
+
+declare i32 @foo(i32)
+declare i8* @bar(i32, i8*, i8*)
+
+; Verify that we don't try to iteratively re-ifconvert a block with a
+; (predicated) indirectbr terminator.
+; If we do, we would ignore its fallthrough successor.
+
+
+; CHECK-LABEL: test:
+; CHECK: cmp {{.*}}, #21
+; CHECK-NEXT: itt eq
+; CHECK-NEXT: streq.w
+; CHECK-NEXT: moveq pc
+; CHECK-NEXT: LBB{{[0-9_]+}}:
+; CHECK-NEXT: cmp {{.*}}, #42
+; CHECK-NEXT: itt ne
+; CHECK-NEXT: strne.w
+; CHECK-NEXT: movne pc
+; CHECK-NEXT: Ltmp
+; CHECK-NEXT: LBB0_2:
+; CHECK-NEXT: movw r0, #1234
+; CHECK-NEXT: b [[FOOCALL:LBB[0-9_]+]]
+; CHECK-NEXT: Ltmp
+; CHECK-NEXT: LBB{{[0-9_]+}}:
+; CHECK-NEXT: movw r0, #4567
+; CHECK-NEXT: [[FOOCALL]]:
+; CHECK-NEXT: blx _foo
+
+define i32 @test(i32 %a, i32 %a2, i32* %p, i32* %p2) {
+entry:
+ %dst1 = call i8* @bar(i32 1, i8* blockaddress(@test, %bb1), i8* blockaddress(@test, %bb2))
+ %dst2 = call i8* @bar(i32 2, i8* blockaddress(@test, %bb1), i8* blockaddress(@test, %bb2))
+ %dst3 = call i8* @bar(i32 3, i8* blockaddress(@test, %bb1), i8* blockaddress(@test, %bb2))
+ %cc1 = icmp eq i32 %a, 21
+ br i1 %cc1, label %cc1t, label %cc1f
+
+cc1t:
+ store i32 %a, i32* %p
+ indirectbr i8* %dst3, [label %bb1, label %bb2]
+
+cc1f:
+ %cc2 = icmp ne i32 %a2, 42
+ br i1 %cc2, label %cc2t, label %bb1
+cc2t:
+ store i32 %a, i32* %p2
+ indirectbr i8* %dst1, [label %bb1, label %bb2]
+
+bb1:
+ %ret_bb1 = call i32 @foo(i32 1234)
+ ret i32 %ret_bb1
+bb2:
+ %ret_bb2 = call i32 @foo(i32 4567)
+ ret i32 %ret_bb2
+}
diff --git a/test/CodeGen/ARM/ifcvt-regmask-noreturn.ll b/test/CodeGen/ARM/ifcvt-regmask-noreturn.ll
new file mode 100644
index 000000000000..cf04fc90504a
--- /dev/null
+++ b/test/CodeGen/ARM/ifcvt-regmask-noreturn.ll
@@ -0,0 +1,45 @@
+; RUN: llc %s -o - -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7s-apple-ios8.0.0"
+
+@debw = external global i8*, align 4
+
+; This test ensures that the stack_chk call correctly puts implicit uses/defs for the regsiters
+; live across it when if converting. This will be R0 which is passed to the call to free at the end
+; of the function.
+; Prior to this change, the stack_chk call (which does not return) would clobber R0 in its regmask,
+; leading to verifier errors because the later use of R0 in free() is not live.
+
+; CHECK-LABEL: @test
+; CHECK: stack_chk_fail
+
+; Function Attrs: ssp
+define void @test(i32 %argc, i8** nocapture readonly %argv, i32* %ptr, i32 %val) #0 {
+entry:
+ %count.i = alloca [256 x i32], align 4
+ %cmp284.i = icmp eq i32 %val, 0
+ br i1 %cmp284.i, label %for.end31.i, label %for.body21.i
+
+for.body21.i: ; preds = %entry
+ %arrayidx23.i = getelementptr inbounds [256 x i32], [256 x i32]* %count.i, i32 0, i32 1
+ %tmp20 = load i32, i32* %arrayidx23.i, align 4, !tbaa !0
+ store i32 %tmp20, i32* %ptr, align 4, !tbaa !0
+ br label %for.end31.i
+
+for.end31.i: ; preds = %for.body21.i, %entry
+ %tmp21 = load i8*, i8** @debw, align 4, !tbaa !4
+ tail call void @free(i8* %tmp21)
+ ret void
+}
+
+declare void @free(i8* nocapture)
+
+attributes #0 = { ssp "stack-protector-buffer-size"="8" }
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"int", !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"any pointer", !2, i64 0}
diff --git a/test/CodeGen/ARM/ifcvt11.ll b/test/CodeGen/ARM/ifcvt11.ll
index dba8a3f1a6af..eae41e21c610 100644
--- a/test/CodeGen/ARM/ifcvt11.ll
+++ b/test/CodeGen/ARM/ifcvt11.ll
@@ -21,10 +21,10 @@ bb: ; preds = %bb4, %bb.nph
; CHECK: vmrs APSR_nzcv, fpscr
%r.19 = phi i32 [ 0, %bb.nph ], [ %r.0, %bb4 ]
%n.08 = phi i32 [ 0, %bb.nph ], [ %10, %bb4 ]
- %scevgep10 = getelementptr inbounds %struct.xyz_t* %p, i32 %n.08, i32 0
- %scevgep11 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 1
- %3 = load double* %scevgep10, align 4
- %4 = load double* %scevgep11, align 4
+ %scevgep10 = getelementptr inbounds %struct.xyz_t, %struct.xyz_t* %p, i32 %n.08, i32 0
+ %scevgep11 = getelementptr %struct.xyz_t, %struct.xyz_t* %p, i32 %n.08, i32 1
+ %3 = load double, double* %scevgep10, align 4
+ %4 = load double, double* %scevgep11, align 4
%5 = fcmp uge double %3, %4
br i1 %5, label %bb3, label %bb1
@@ -34,8 +34,8 @@ bb1: ; preds = %bb
; CHECK-NOT: vmrsmi
; CHECK: vcmpe.f64
; CHECK: vmrs APSR_nzcv, fpscr
- %scevgep12 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 2
- %6 = load double* %scevgep12, align 4
+ %scevgep12 = getelementptr %struct.xyz_t, %struct.xyz_t* %p, i32 %n.08, i32 2
+ %6 = load double, double* %scevgep12, align 4
%7 = fcmp uge double %3, %6
br i1 %7, label %bb3, label %bb2
diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll
index 5da63dc5f022..e53d989ad529 100644
--- a/test/CodeGen/ARM/ifcvt3.ll
+++ b/test/CodeGen/ARM/ifcvt3.ll
@@ -4,8 +4,8 @@
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: t1:
-; CHECK: cmp r2, #1
-; CHECK: cmpne r2, #7
+; CHECK: cmp r2, #7
+; CHECK: cmpne r2, #1
switch i32 %c, label %cond_next [
i32 1, label %cond_true
i32 7, label %cond_true
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index 31e3e00c468e..3aa2139cc03a 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -6,7 +6,7 @@
define void @foo(i32 %a) {
entry:
- %tmp = load i32** @x ; <i32*> [#uses=1]
+ %tmp = load i32*, i32** @x ; <i32*> [#uses=1]
store i32 %a, i32* %tmp
ret void
}
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index a00dedaee670..78901930e4b2 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -10,7 +10,7 @@ entry:
br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock
cond_true: ; preds = %entry
- %tmp10 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp10 = call i32 (...) @bar( ) ; <i32> [#uses=0]
ret void
UnifiedReturnBlock: ; preds = %entry
diff --git a/test/CodeGen/ARM/ifcvt7.ll b/test/CodeGen/ARM/ifcvt7.ll
index 476ed4d47c64..e0d2b7cffb44 100644
--- a/test/CodeGen/ARM/ifcvt7.ll
+++ b/test/CodeGen/ARM/ifcvt7.ll
@@ -11,9 +11,9 @@ entry:
br label %tailrecurse
tailrecurse: ; preds = %bb, %entry
- %tmp6 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
- %tmp9 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=2]
- %tmp12 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
+ %tmp6 = load %struct.quad_struct*, %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
+ %tmp9 = load %struct.quad_struct*, %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=2]
+ %tmp12 = load %struct.quad_struct*, %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
%tmp14 = icmp eq %struct.quad_struct* null, null ; <i1> [#uses=1]
%tmp17 = icmp eq %struct.quad_struct* %tmp6, null ; <i1> [#uses=1]
%tmp23 = icmp eq %struct.quad_struct* %tmp9, null ; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/illegal-vector-bitcast.ll b/test/CodeGen/ARM/illegal-vector-bitcast.ll
index 7208fffbcc85..766b3d7ca433 100644
--- a/test/CodeGen/ARM/illegal-vector-bitcast.ll
+++ b/test/CodeGen/ARM/illegal-vector-bitcast.ll
@@ -3,10 +3,10 @@
define void @foo(<8 x float>* %f, <8 x float>* %g, <4 x i64>* %y)
{
- %h = load <8 x float>* %f
+ %h = load <8 x float>, <8 x float>* %f
%i = fmul <8 x float> %h, <float 0x3FF19999A0000000, float 0x400A666660000000, float 0x40119999A0000000, float 0x40159999A0000000, float 0.5, float 0x3FE3333340000000, float 0x3FE6666660000000, float 0x3FE99999A0000000>
%m = bitcast <8 x float> %i to <4 x i64>
- %z = load <4 x i64>* %y
+ %z = load <4 x i64>, <4 x i64>* %y
%n = mul <4 x i64> %z, %m
%p = bitcast <4 x i64> %n to <8 x float>
store <8 x float> %p, <8 x float>* %g
diff --git a/test/CodeGen/ARM/indirect-reg-input.ll b/test/CodeGen/ARM/indirect-reg-input.ll
index 17f6a9c96f68..e82e1dee9428 100644
--- a/test/CodeGen/ARM/indirect-reg-input.ll
+++ b/test/CodeGen/ARM/indirect-reg-input.ll
@@ -8,7 +8,7 @@
define void @switch_to_stack(%struct.my_stack* %stack) nounwind {
entry:
- %regs = getelementptr inbounds %struct.my_stack* %stack, i32 0, i32 0
+ %regs = getelementptr inbounds %struct.my_stack, %struct.my_stack* %stack, i32 0, i32 0
tail call void asm "\0A", "=*r,*0"(%struct.myjmp_buf* %regs, %struct.myjmp_buf* %regs)
ret void
}
diff --git a/test/CodeGen/ARM/indirectbr-2.ll b/test/CodeGen/ARM/indirectbr-2.ll
index 0c41da658009..ca068db1db0e 100644
--- a/test/CodeGen/ARM/indirectbr-2.ll
+++ b/test/CodeGen/ARM/indirectbr-2.ll
@@ -15,7 +15,7 @@
define i32 @func() nounwind ssp {
%1 = alloca i32, align 4
- %2 = load i32* @foo, align 4
+ %2 = load i32, i32* @foo, align 4
%3 = icmp eq i32 %2, 34879
br label %4
@@ -23,17 +23,17 @@ define i32 @func() nounwind ssp {
%5 = zext i1 %3 to i32
%6 = mul i32 %5, 287
%7 = add i32 %6, 2
- %8 = getelementptr [2 x i32]* @DWJumpTable2808, i32 0, i32 %5
- %9 = load i32* %8
+ %8 = getelementptr [2 x i32], [2 x i32]* @DWJumpTable2808, i32 0, i32 %5
+ %9 = load i32, i32* %8
%10 = add i32 %9, ptrtoint (i8* blockaddress(@func, %4) to i32)
%11 = inttoptr i32 %10 to i8*
- %12 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([45 x i8]* @0, i32 0, i32 0))
+ %12 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([45 x i8], [45 x i8]* @0, i32 0, i32 0))
indirectbr i8* %11, [label %13, label %14]
; <label>:13 ; preds = %4
%tmp14 = phi i32 [ %7, %4 ]
store i32 23958, i32* @foo, align 4
- %tmp15 = load i32* %1, align 4
+ %tmp15 = load i32, i32* %1, align 4
%tmp16 = icmp eq i32 %tmp15, 0
%tmp17 = zext i1 %tmp16 to i32
%tmp21 = add i32 %tmp17, %tmp14
diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll
index 7c49cb310f39..d15ef14b4493 100644
--- a/test/CodeGen/ARM/indirectbr.ll
+++ b/test/CodeGen/ARM/indirectbr.ll
@@ -16,7 +16,7 @@ entry:
; THUMB: [[NEXTADDR_PCBASE:LPC0_[0-9]]]:
; THUMB: add r[[NEXTADDR_REG]], pc
- %0 = load i8** @nextaddr, align 4 ; <i8*> [#uses=2]
+ %0 = load i8*, i8** @nextaddr, align 4 ; <i8*> [#uses=2]
%1 = icmp eq i8* %0, null ; <i1> [#uses=1]
; indirect branch gets duplicated here
; ARM: bx
@@ -31,8 +31,8 @@ bb2: ; preds = %entry, %bb3
indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
bb3: ; preds = %entry
- %2 = getelementptr inbounds [5 x i8*]* @C.0.2070, i32 0, i32 %i ; <i8**> [#uses=1]
- %gotovar.4.0.pre = load i8** %2, align 4 ; <i8*> [#uses=1]
+ %2 = getelementptr inbounds [5 x i8*], [5 x i8*]* @C.0.2070, i32 0, i32 %i ; <i8**> [#uses=1]
+ %gotovar.4.0.pre = load i8*, i8** %2, align 4 ; <i8*> [#uses=1]
br label %bb2
L5: ; preds = %bb2
diff --git a/test/CodeGen/ARM/inline-diagnostics.ll b/test/CodeGen/ARM/inline-diagnostics.ll
index 0276abf2f5fa..3f5b73c5a211 100644
--- a/test/CodeGen/ARM/inline-diagnostics.ll
+++ b/test/CodeGen/ARM/inline-diagnostics.ll
@@ -8,8 +8,8 @@ define float @inline_func(float %f1, float %f2) #0 {
%c2 = alloca %struct.float4, align 4
%c3 = alloca %struct.float4, align 4
call void asm sideeffect "vmul.f32 ${2:q}, ${0:q}, ${1:q}", "=*r,=*r,*w"(%struct.float4* %c1, %struct.float4* %c2, %struct.float4* %c3) #1, !srcloc !1
- %x = getelementptr inbounds %struct.float4* %c3, i32 0, i32 0
- %1 = load float* %x, align 4
+ %x = getelementptr inbounds %struct.float4, %struct.float4* %c3, i32 0, i32 0
+ %1 = load float, float* %x, align 4
ret float %1
}
diff --git a/test/CodeGen/ARM/inlineasm-64bit.ll b/test/CodeGen/ARM/inlineasm-64bit.ll
index d098a4383bc6..8e747c5eb650 100644
--- a/test/CodeGen/ARM/inlineasm-64bit.ll
+++ b/test/CodeGen/ARM/inlineasm-64bit.ll
@@ -35,7 +35,7 @@ entry:
; check: strexd {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}, [r{{[0-9]+}}]
tail call void asm sideeffect " strexd $1, ${1:H}, [$0]\0A strexd $2, ${2:H}, [$0]\0A strexd $3, ${3:H}, [$0]\0A strexd $4, ${4:H}, [$0]\0A strexd $5, ${5:H}, [$0]\0A strexd $6, ${6:H}, [$0]\0A", "r,r,r,r,r,r,r"(i64* %p, i64 %val1, i64 %val2, i64 %val3, i64 %val4, i64 %val5, i64 %val6) nounwind
- %incdec.ptr = getelementptr inbounds i64* %p, i32 1
+ %incdec.ptr = getelementptr inbounds i64, i64* %p, i32 1
tail call void asm sideeffect " strexd $1, ${1:H}, [$0]\0A strexd $2, ${2:H}, [$0]\0A strexd $3, ${3:H}, [$0]\0A strexd $4, ${4:H}, [$0]\0A strexd $5, ${5:H}, [$0]\0A strexd $6, ${6:H}, [$0]\0A", "r,r,r,r,r,r,r"(i64* %incdec.ptr, i64 %val1, i64 %val2, i64 %val3, i64 %val4, i64 %val5, i64 %val6) nounwind
tail call void asm sideeffect " strexd $1, ${1:H}, [$0]\0A strexd $2, ${2:H}, [$0]\0A strexd $3, ${3:H}, [$0]\0A strexd $4, ${4:H}, [$0]\0A strexd $5, ${5:H}, [$0]\0A strexd $6, ${6:H}, [$0]\0A", "r,r,r,r,r,r,r"(i64* %incdec.ptr, i64 %val1, i64 %val2, i64 %val3, i64 %val4, i64 %val5, i64 %val6) nounwind
ret void
diff --git a/test/CodeGen/ARM/interrupt-attr.ll b/test/CodeGen/ARM/interrupt-attr.ll
index c6da09d156b7..43bd5815a558 100644
--- a/test/CodeGen/ARM/interrupt-attr.ll
+++ b/test/CodeGen/ARM/interrupt-attr.ll
@@ -35,7 +35,7 @@ define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" {
; Normal AAPCS function (r0-r3 pushed onto stack by hardware, lr set to
; appropriate sentinel so no special return needed).
; CHECK-M-LABEL: irq_fn:
-; CHECK-M: push.w {r4, r10, r11, lr}
+; CHECK-M: push.w {r4, r7, r11, lr}
; CHECK-M: add.w r11, sp, #8
; CHECK-M: mov r4, sp
; CHECK-M: bfc r4, #0, #3
@@ -43,7 +43,7 @@ define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" {
; CHECK-M: bl _bar
; CHECK-M: sub.w r4, r11, #8
; CHECK-M: mov sp, r4
-; CHECK-M: pop.w {r4, r10, r11, pc}
+; CHECK-M: pop.w {r4, r7, r11, pc}
call arm_aapcscc void @bar()
ret void
@@ -65,7 +65,7 @@ define arm_aapcscc void @fiq_fn() alignstack(8) "interrupt"="FIQ" {
; CHECK-A-THUMB-LABEL: fiq_fn:
; CHECK-M-LABEL: fiq_fn:
- %val = load volatile [16 x i32]* @bigvar
+ %val = load volatile [16 x i32], [16 x i32]* @bigvar
store volatile [16 x i32] %val, [16 x i32]* @bigvar
ret void
}
@@ -81,7 +81,7 @@ define arm_aapcscc void @swi_fn() alignstack(8) "interrupt"="SWI" {
; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
; CHECK-A: subs pc, lr, #0
- %val = load volatile [16 x i32]* @bigvar
+ %val = load volatile [16 x i32], [16 x i32]* @bigvar
store volatile [16 x i32] %val, [16 x i32]* @bigvar
ret void
}
@@ -126,8 +126,8 @@ define arm_aapcscc void @floating_fn() alignstack(8) "interrupt"="IRQ" {
; CHECK-A-NOT: vstr
; CHECK-A-NOT: vstm
; CHECK-A: vadd.f64 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
- %lhs = load volatile double* @var
- %rhs = load volatile double* @var
+ %lhs = load volatile double, double* @var
+ %rhs = load volatile double, double* @var
%sum = fadd double %lhs, %rhs
store double %sum, double* @var
ret void
diff --git a/test/CodeGen/ARM/intrinsics-crypto.ll b/test/CodeGen/ARM/intrinsics-crypto.ll
index 96413d341e4c..6e5efd85a347 100644
--- a/test/CodeGen/ARM/intrinsics-crypto.ll
+++ b/test/CodeGen/ARM/intrinsics-crypto.ll
@@ -1,8 +1,8 @@
; RUN: llc < %s -mtriple=armv8 -mattr=+crypto | FileCheck %s
define arm_aapcs_vfpcc <16 x i8> @test_aesde(<16 x i8>* %a, <16 x i8> *%b) {
- %tmp = load <16 x i8>* %a
- %tmp2 = load <16 x i8>* %b
+ %tmp = load <16 x i8>, <16 x i8>* %a
+ %tmp2 = load <16 x i8>, <16 x i8>* %b
%tmp3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %tmp, <16 x i8> %tmp2)
; CHECK: aesd.8 q{{[0-9]+}}, q{{[0-9]+}}
%tmp4 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %tmp3, <16 x i8> %tmp2)
@@ -15,9 +15,9 @@ define arm_aapcs_vfpcc <16 x i8> @test_aesde(<16 x i8>* %a, <16 x i8> *%b) {
}
define arm_aapcs_vfpcc <4 x i32> @test_sha(<4 x i32> *%a, <4 x i32> *%b, <4 x i32> *%c) {
- %tmp = load <4 x i32>* %a
- %tmp2 = load <4 x i32>* %b
- %tmp3 = load <4 x i32>* %c
+ %tmp = load <4 x i32>, <4 x i32>* %a
+ %tmp2 = load <4 x i32>, <4 x i32>* %b
+ %tmp3 = load <4 x i32>, <4 x i32>* %c
%scalar = extractelement <4 x i32> %tmp, i32 0
%resscalar = call i32 @llvm.arm.neon.sha1h(i32 %scalar)
%res1 = insertelement <4 x i32> undef, i32 %resscalar, i32 0
diff --git a/test/CodeGen/ARM/intrinsics-memory-barrier.ll b/test/CodeGen/ARM/intrinsics-memory-barrier.ll
index 5ee0b3e59902..5626d38aae4b 100644
--- a/test/CodeGen/ARM/intrinsics-memory-barrier.ll
+++ b/test/CodeGen/ARM/intrinsics-memory-barrier.ll
@@ -18,7 +18,7 @@ define void @test_dmb_reordering(i32 %a, i32 %b, i32* %d) {
call void @llvm.arm.dmb(i32 15) ; CHECK: dmb sy
- %d1 = getelementptr i32* %d, i32 1
+ %d1 = getelementptr i32, i32* %d, i32 1
store i32 %b, i32* %d1 ; CHECK: str {{r[0-9]+}}, [{{r[0-9]+}}, #4]
ret void
@@ -31,7 +31,7 @@ define void @test_dsb_reordering(i32 %a, i32 %b, i32* %d) {
call void @llvm.arm.dsb(i32 15) ; CHECK: dsb sy
- %d1 = getelementptr i32* %d, i32 1
+ %d1 = getelementptr i32, i32* %d, i32 1
store i32 %b, i32* %d1 ; CHECK: str {{r[0-9]+}}, [{{r[0-9]+}}, #4]
ret void
@@ -44,7 +44,7 @@ define void @test_isb_reordering(i32 %a, i32 %b, i32* %d) {
call void @llvm.arm.isb(i32 15) ; CHECK: isb sy
- %d1 = getelementptr i32* %d, i32 1
+ %d1 = getelementptr i32, i32* %d, i32 1
store i32 %b, i32* %d1 ; CHECK: str {{r[0-9]+}}, [{{r[0-9]+}}, #4]
ret void
diff --git a/test/CodeGen/ARM/invoke-donothing-assert.ll b/test/CodeGen/ARM/invoke-donothing-assert.ll
index 0b607f7edf38..aab3556c5477 100644
--- a/test/CodeGen/ARM/invoke-donothing-assert.ll
+++ b/test/CodeGen/ARM/invoke-donothing-assert.ll
@@ -46,7 +46,7 @@ new.notnull.i.i:
br label %cleanup
cleanup:
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%inc294 = add nsw i32 %0, 4
store i32 %inc294, i32* %a, align 4
br i1 false, label %_ZN3lol5ArrayIivvvvvvvED1Ev.exit, label %delete.notnull.i.i.i1409
diff --git a/test/CodeGen/ARM/isel-v8i32-crash.ll b/test/CodeGen/ARM/isel-v8i32-crash.ll
index 0116fe8de7cc..f1745bd47279 100644
--- a/test/CodeGen/ARM/isel-v8i32-crash.ll
+++ b/test/CodeGen/ARM/isel-v8i32-crash.ll
@@ -15,7 +15,7 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
define void @func(i16* nocapture %pb, float* nocapture readonly %pf) #0 {
entry:
%0 = bitcast float* %pf to <8 x float>*
- %1 = load <8 x float>* %0, align 4
+ %1 = load <8 x float>, <8 x float>* %0, align 4
%2 = fmul <8 x float> %1, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
%3 = fptosi <8 x float> %2 to <8 x i16>
%4 = bitcast i16* %pb to <8 x i16>*
diff --git a/test/CodeGen/ARM/krait-cpu-div-attribute.ll b/test/CodeGen/ARM/krait-cpu-div-attribute.ll
new file mode 100644
index 000000000000..222664a904ca
--- /dev/null
+++ b/test/CodeGen/ARM/krait-cpu-div-attribute.ll
@@ -0,0 +1,36 @@
+; Tests the genration of ".arch_extension" attribute for hardware
+; division on krait CPU. For now, krait is recognized as "cortex-a9" + hwdiv
+; Also, tests for the hwdiv instruction on krait CPU
+
+; check for arch_extension/cpu directive
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=krait | FileCheck %s --check-prefix=DIV_EXTENSION
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -mcpu=krait | FileCheck %s --check-prefix=DIV_EXTENSION
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 | FileCheck %s --check-prefix=NODIV_KRAIT
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -mcpu=cortex-a9 | FileCheck %s --check-prefix=NODIV_KRAIT
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=krait -mattr=-hwdiv,-hwdiv-arm | FileCheck %s --check-prefix=NODIV_KRAIT
+
+; check if correct instruction is emitted by integrated assembler
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=krait -filetype=obj | llvm-objdump -mcpu=krait -triple armv7-linux-gnueabi -d - | FileCheck %s --check-prefix=HWDIV
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -mcpu=krait -filetype=obj | llvm-objdump -mcpu=krait -triple thumbv7-linux-gnueabi -d - | FileCheck %s --check-prefix=HWDIV
+
+; arch_extension attribute
+; DIV_EXTENSION: .cpu cortex-a9
+; DIV_EXTENSION: .arch_extension idiv
+; NODIV_KRAIT-NOT: .arch_extension idiv
+; HWDIV: sdiv
+
+define i32 @main() #0 {
+entry:
+ %retval = alloca i32, align 4
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ store i32 0, i32* %retval
+ store volatile i32 100, i32* %b, align 4
+ store volatile i32 32, i32* %c, align 4
+ %0 = load volatile i32, i32* %b, align 4
+ %1 = load volatile i32, i32* %c, align 4
+ %div = sdiv i32 %0, %1
+ store volatile i32 %div, i32* %a, align 4
+ ret i32 0
+}
diff --git a/test/CodeGen/ARM/large-stack.ll b/test/CodeGen/ARM/large-stack.ll
index 1a9a1fadeebe..e2d4de341ec2 100644
--- a/test/CodeGen/ARM/large-stack.ll
+++ b/test/CodeGen/ARM/large-stack.ll
@@ -15,6 +15,6 @@ define i32 @test3() {
%tmp = alloca i32, align 4
%a = alloca [805306369 x i8], align 16
store i32 0, i32* %tmp
- %tmp1 = load i32* %tmp
+ %tmp1 = load i32, i32* %tmp
ret i32 %tmp1
}
diff --git a/test/CodeGen/ARM/ldm.ll b/test/CodeGen/ARM/ldm.ll
index 3977da6da9cb..65e972f4e1b4 100644
--- a/test/CodeGen/ARM/ldm.ll
+++ b/test/CodeGen/ARM/ldm.ll
@@ -8,8 +8,8 @@ define i32 @t1() {
; CHECK: pop
; V4T-LABEL: t1:
; V4T: pop
- %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
- %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
+ %tmp = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
+ %tmp3 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
%tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 ) ; <i32> [#uses=1]
ret i32 %tmp4
}
@@ -19,9 +19,9 @@ define i32 @t2() {
; CHECK: pop
; V4T-LABEL: t2:
; V4T: pop
- %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
- %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
- %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4) ; <i32> [#uses=1]
+ %tmp = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
+ %tmp3 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
+ %tmp5 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 4) ; <i32> [#uses=1]
%tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
ret i32 %tmp6
}
@@ -34,9 +34,9 @@ define i32 @t3() {
; V4T: ldmib
; V4T: pop
; V4T-NEXT: bx lr
- %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
- %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
- %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
+ %tmp = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
+ %tmp3 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
+ %tmp5 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
%tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
ret i32 %tmp6
}
diff --git a/test/CodeGen/ARM/ldr.ll b/test/CodeGen/ARM/ldr.ll
index 57e9977ff872..bd4de5de669c 100644
--- a/test/CodeGen/ARM/ldr.ll
+++ b/test/CodeGen/ARM/ldr.ll
@@ -4,7 +4,7 @@ define i32 @f1(i32* %v) {
; CHECK-LABEL: f1:
; CHECK: ldr r0
entry:
- %tmp = load i32* %v
+ %tmp = load i32, i32* %v
ret i32 %tmp
}
@@ -12,8 +12,8 @@ define i32 @f2(i32* %v) {
; CHECK-LABEL: f2:
; CHECK: ldr r0
entry:
- %tmp2 = getelementptr i32* %v, i32 1023
- %tmp = load i32* %tmp2
+ %tmp2 = getelementptr i32, i32* %v, i32 1023
+ %tmp = load i32, i32* %tmp2
ret i32 %tmp
}
@@ -22,8 +22,8 @@ define i32 @f3(i32* %v) {
; CHECK: mov
; CHECK: ldr r0
entry:
- %tmp2 = getelementptr i32* %v, i32 1024
- %tmp = load i32* %tmp2
+ %tmp2 = getelementptr i32, i32* %v, i32 1024
+ %tmp = load i32, i32* %tmp2
ret i32 %tmp
}
@@ -34,7 +34,7 @@ define i32 @f4(i32 %base) {
entry:
%tmp1 = sub i32 %base, 128
%tmp2 = inttoptr i32 %tmp1 to i32*
- %tmp3 = load i32* %tmp2
+ %tmp3 = load i32, i32* %tmp2
ret i32 %tmp3
}
@@ -44,7 +44,7 @@ define i32 @f5(i32 %base, i32 %offset) {
entry:
%tmp1 = add i32 %base, %offset
%tmp2 = inttoptr i32 %tmp1 to i32*
- %tmp3 = load i32* %tmp2
+ %tmp3 = load i32, i32* %tmp2
ret i32 %tmp3
}
@@ -55,7 +55,7 @@ entry:
%tmp1 = shl i32 %offset, 2
%tmp2 = add i32 %base, %tmp1
%tmp3 = inttoptr i32 %tmp2 to i32*
- %tmp4 = load i32* %tmp3
+ %tmp4 = load i32, i32* %tmp3
ret i32 %tmp4
}
@@ -66,6 +66,6 @@ entry:
%tmp1 = lshr i32 %offset, 2
%tmp2 = add i32 %base, %tmp1
%tmp3 = inttoptr i32 %tmp2 to i32*
- %tmp4 = load i32* %tmp3
+ %tmp4 = load i32, i32* %tmp3
ret i32 %tmp4
}
diff --git a/test/CodeGen/ARM/ldr_ext.ll b/test/CodeGen/ARM/ldr_ext.ll
index 31aaba5c3c21..15efb50c9a9a 100644
--- a/test/CodeGen/ARM/ldr_ext.ll
+++ b/test/CodeGen/ARM/ldr_ext.ll
@@ -2,28 +2,28 @@
define i32 @test1(i8* %t1) nounwind {
; CHECK: ldrb
- %tmp.u = load i8* %t1
+ %tmp.u = load i8, i8* %t1
%tmp1.s = zext i8 %tmp.u to i32
ret i32 %tmp1.s
}
define i32 @test2(i16* %t1) nounwind {
; CHECK: ldrh
- %tmp.u = load i16* %t1
+ %tmp.u = load i16, i16* %t1
%tmp1.s = zext i16 %tmp.u to i32
ret i32 %tmp1.s
}
define i32 @test3(i8* %t0) nounwind {
; CHECK: ldrsb
- %tmp.s = load i8* %t0
+ %tmp.s = load i8, i8* %t0
%tmp1.s = sext i8 %tmp.s to i32
ret i32 %tmp1.s
}
define i32 @test4(i16* %t0) nounwind {
; CHECK: ldrsh
- %tmp.s = load i16* %t0
+ %tmp.s = load i16, i16* %t0
%tmp1.s = sext i16 %tmp.s to i32
ret i32 %tmp1.s
}
@@ -31,7 +31,7 @@ define i32 @test4(i16* %t0) nounwind {
define i32 @test5() nounwind {
; CHECK: mov r0, #0
; CHECK: ldrsh
- %tmp.s = load i16* null
+ %tmp.s = load i16, i16* null
%tmp1.s = sext i16 %tmp.s to i32
ret i32 %tmp1.s
}
diff --git a/test/CodeGen/ARM/ldr_frame.ll b/test/CodeGen/ARM/ldr_frame.ll
index ed964ecd3f83..01b18bccc337 100644
--- a/test/CodeGen/ARM/ldr_frame.ll
+++ b/test/CodeGen/ARM/ldr_frame.ll
@@ -2,30 +2,30 @@
define i32 @f1() {
%buf = alloca [32 x i32], align 4
- %tmp = getelementptr [32 x i32]* %buf, i32 0, i32 0
- %tmp1 = load i32* %tmp
+ %tmp = getelementptr [32 x i32], [32 x i32]* %buf, i32 0, i32 0
+ %tmp1 = load i32, i32* %tmp
ret i32 %tmp1
}
define i32 @f2() {
%buf = alloca [32 x i8], align 4
- %tmp = getelementptr [32 x i8]* %buf, i32 0, i32 0
- %tmp1 = load i8* %tmp
+ %tmp = getelementptr [32 x i8], [32 x i8]* %buf, i32 0, i32 0
+ %tmp1 = load i8, i8* %tmp
%tmp2 = zext i8 %tmp1 to i32
ret i32 %tmp2
}
define i32 @f3() {
%buf = alloca [32 x i32], align 4
- %tmp = getelementptr [32 x i32]* %buf, i32 0, i32 32
- %tmp1 = load i32* %tmp
+ %tmp = getelementptr [32 x i32], [32 x i32]* %buf, i32 0, i32 32
+ %tmp1 = load i32, i32* %tmp
ret i32 %tmp1
}
define i32 @f4() {
%buf = alloca [32 x i8], align 4
- %tmp = getelementptr [32 x i8]* %buf, i32 0, i32 2
- %tmp1 = load i8* %tmp
+ %tmp = getelementptr [32 x i8], [32 x i8]* %buf, i32 0, i32 2
+ %tmp1 = load i8, i8* %tmp
%tmp2 = zext i8 %tmp1 to i32
ret i32 %tmp2
}
diff --git a/test/CodeGen/ARM/ldr_post.ll b/test/CodeGen/ARM/ldr_post.ll
index 2558b16f3d3c..139c6f45e520 100644
--- a/test/CodeGen/ARM/ldr_post.ll
+++ b/test/CodeGen/ARM/ldr_post.ll
@@ -7,7 +7,7 @@
define i32 @test1(i32 %a, i32 %b, i32 %c) {
%tmp1 = mul i32 %a, %b ; <i32> [#uses=2]
%tmp2 = inttoptr i32 %tmp1 to i32* ; <i32*> [#uses=1]
- %tmp3 = load i32* %tmp2 ; <i32> [#uses=1]
+ %tmp3 = load i32, i32* %tmp2 ; <i32> [#uses=1]
%tmp4 = sub i32 %tmp1, %c ; <i32> [#uses=1]
%tmp5 = mul i32 %tmp4, %tmp3 ; <i32> [#uses=1]
ret i32 %tmp5
@@ -19,7 +19,7 @@ define i32 @test1(i32 %a, i32 %b, i32 %c) {
define i32 @test2(i32 %a, i32 %b) {
%tmp1 = mul i32 %a, %b ; <i32> [#uses=2]
%tmp2 = inttoptr i32 %tmp1 to i32* ; <i32*> [#uses=1]
- %tmp3 = load i32* %tmp2 ; <i32> [#uses=1]
+ %tmp3 = load i32, i32* %tmp2 ; <i32> [#uses=1]
%tmp4 = sub i32 %tmp1, 16 ; <i32> [#uses=1]
%tmp5 = mul i32 %tmp4, %tmp3 ; <i32> [#uses=1]
ret i32 %tmp5
diff --git a/test/CodeGen/ARM/ldr_pre.ll b/test/CodeGen/ARM/ldr_pre.ll
index a97927a20ab1..c6c76e222836 100644
--- a/test/CodeGen/ARM/ldr_pre.ll
+++ b/test/CodeGen/ARM/ldr_pre.ll
@@ -5,8 +5,8 @@
; CHECK: ldr {{.*!}}
; CHECK-NOT: ldr
define i32* @test1(i32* %X, i32* %dest) {
- %Y = getelementptr i32* %X, i32 4 ; <i32*> [#uses=2]
- %A = load i32* %Y ; <i32> [#uses=1]
+ %Y = getelementptr i32, i32* %X, i32 4 ; <i32*> [#uses=2]
+ %A = load i32, i32* %Y ; <i32> [#uses=1]
store i32 %A, i32* %dest
ret i32* %Y
}
@@ -17,7 +17,7 @@ define i32* @test1(i32* %X, i32* %dest) {
define i32 @test2(i32 %a, i32 %b, i32 %c) {
%tmp1 = sub i32 %a, %b ; <i32> [#uses=2]
%tmp2 = inttoptr i32 %tmp1 to i32* ; <i32*> [#uses=1]
- %tmp3 = load i32* %tmp2 ; <i32> [#uses=1]
+ %tmp3 = load i32, i32* %tmp2 ; <i32> [#uses=1]
%tmp4 = sub i32 %tmp1, %c ; <i32> [#uses=1]
%tmp5 = add i32 %tmp4, %tmp3 ; <i32> [#uses=1]
ret i32 %tmp5
diff --git a/test/CodeGen/ARM/ldrd-memoper.ll b/test/CodeGen/ARM/ldrd-memoper.ll
index f1a1121f6aa4..744fbd5efb86 100644
--- a/test/CodeGen/ARM/ldrd-memoper.ll
+++ b/test/CodeGen/ARM/ldrd-memoper.ll
@@ -8,8 +8,8 @@
; CHECK: Formed {{.*}} t2LDRD{{.*}} mem:LD4[%0] LD4[%0+4]
define i64 @t(i64 %a) nounwind readonly {
entry:
- %0 = load i64** @b, align 4
- %1 = load i64* %0, align 4
+ %0 = load i64*, i64** @b, align 4
+ %1 = load i64, i64* %0, align 4
%2 = mul i64 %1, %a
ret i64 %2
}
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index caef2e78bbfa..7ce846844e05 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -19,8 +19,8 @@ entry:
; M3-LABEL: t:
; M3-NOT: ldrd
- %0 = load i64** @b, align 4
- %1 = load i64* %0, align 4
+ %0 = load i64*, i64** @b, align 4
+ %1 = load i64, i64* %0, align 4
%2 = mul i64 %1, %a
ret i64 %2
}
@@ -53,12 +53,12 @@ entry:
bb: ; preds = %bb, %entry
%i.03 = phi i32 [ %tmp, %bb ], [ 0, %entry ] ; <i32> [#uses=3]
- %scevgep = getelementptr i32* %a, i32 %i.03 ; <i32*> [#uses=1]
- %scevgep4 = getelementptr i32* %b, i32 %i.03 ; <i32*> [#uses=1]
+ %scevgep = getelementptr i32, i32* %a, i32 %i.03 ; <i32*> [#uses=1]
+ %scevgep4 = getelementptr i32, i32* %b, i32 %i.03 ; <i32*> [#uses=1]
%tmp = add i32 %i.03, 1 ; <i32> [#uses=3]
- %scevgep5 = getelementptr i32* %a, i32 %tmp ; <i32*> [#uses=1]
- %2 = load i32* %scevgep, align 4 ; <i32> [#uses=1]
- %3 = load i32* %scevgep5, align 4 ; <i32> [#uses=1]
+ %scevgep5 = getelementptr i32, i32* %a, i32 %tmp ; <i32*> [#uses=1]
+ %2 = load i32, i32* %scevgep, align 4 ; <i32> [#uses=1]
+ %3 = load i32, i32* %scevgep5, align 4 ; <i32> [#uses=1]
%4 = add nsw i32 %3, %2 ; <i32> [#uses=1]
store i32 %4, i32* %scevgep4, align 4
%exitcond = icmp eq i32 %tmp, %0 ; <i1> [#uses=1]
@@ -84,10 +84,10 @@ entry:
; A8-NEXT: str [[FIELD1]], {{\[}}[[BASE]]{{\]}}
%orig_blocks = alloca [256 x i16], align 2
%0 = bitcast [256 x i16]* %orig_blocks to i8*call void @llvm.lifetime.start(i64 512, i8* %0) nounwind
- %tmp1 = load i32* getelementptr inbounds (%struct.Test* @TestVar, i32 0, i32 1), align 4
- %tmp2 = load i32* getelementptr inbounds (%struct.Test* @TestVar, i32 0, i32 2), align 4
+ %tmp1 = load i32, i32* getelementptr inbounds (%struct.Test, %struct.Test* @TestVar, i32 0, i32 1), align 4
+ %tmp2 = load i32, i32* getelementptr inbounds (%struct.Test, %struct.Test* @TestVar, i32 0, i32 2), align 4
%add = add nsw i32 %tmp2, %tmp1
- store i32 %add, i32* getelementptr inbounds (%struct.Test* @TestVar, i32 0, i32 0), align 4
+ store i32 %add, i32* getelementptr inbounds (%struct.Test, %struct.Test* @TestVar, i32 0, i32 0), align 4
call void @llvm.lifetime.end(i64 512, i8* %0) nounwind
ret void
}
diff --git a/test/CodeGen/ARM/ldst-f32-2-i32.ll b/test/CodeGen/ARM/ldst-f32-2-i32.ll
index 61c459c7435e..d00f44e8e885 100644
--- a/test/CodeGen/ARM/ldst-f32-2-i32.ll
+++ b/test/CodeGen/ARM/ldst-f32-2-i32.ll
@@ -14,10 +14,10 @@ bb:
; CHECK: str [[REGISTER]], [{{r[0-9]+}}], #4
%j.05 = phi i32 [ %2, %bb ], [ 0, %entry ]
%tmp = mul i32 %j.05, %index
- %uglygep = getelementptr i8* %src6, i32 %tmp
+ %uglygep = getelementptr i8, i8* %src6, i32 %tmp
%src_addr.04 = bitcast i8* %uglygep to float*
- %dst_addr.03 = getelementptr float* %dst, i32 %j.05
- %1 = load float* %src_addr.04, align 4
+ %dst_addr.03 = getelementptr float, float* %dst, i32 %j.05
+ %1 = load float, float* %src_addr.04, align 4
store float %1, float* %dst_addr.03, align 4
%2 = add i32 %j.05, 1
%exitcond = icmp eq i32 %2, %width
diff --git a/test/CodeGen/ARM/ldstrex-m.ll b/test/CodeGen/ARM/ldstrex-m.ll
index b50699f4cde6..3d83a9d78e35 100644
--- a/test/CodeGen/ARM/ldstrex-m.ll
+++ b/test/CodeGen/ARM/ldstrex-m.ll
@@ -4,7 +4,7 @@
; CHECK-NOT: ldrexd
define i64 @f0(i64* %p) nounwind readonly {
entry:
- %0 = load atomic i64* %p seq_cst, align 8
+ %0 = load atomic i64, i64* %p seq_cst, align 8
ret i64 %0
}
@@ -29,7 +29,7 @@ entry:
; CHECK: ldr
define i32 @f3(i32* %p) nounwind readonly {
entry:
- %0 = load atomic i32* %p seq_cst, align 4
+ %0 = load atomic i32, i32* %p seq_cst, align 4
ret i32 %0
}
@@ -37,7 +37,7 @@ entry:
; CHECK: ldrb
define i8 @f4(i8* %p) nounwind readonly {
entry:
- %0 = load atomic i8* %p seq_cst, align 4
+ %0 = load atomic i8, i8* %p seq_cst, align 4
ret i8 %0
}
diff --git a/test/CodeGen/ARM/ldstrex.ll b/test/CodeGen/ARM/ldstrex.ll
index a40e255e83ea..59349f72a8fe 100644
--- a/test/CodeGen/ARM/ldstrex.ll
+++ b/test/CodeGen/ARM/ldstrex.ll
@@ -106,24 +106,24 @@ declare void @llvm.arm.clrex() nounwind
define void @excl_addrmode() {
; CHECK-T2ADDRMODE-LABEL: excl_addrmode:
- %base1020 = load i32** @base
- %offset1020 = getelementptr i32* %base1020, i32 255
+ %base1020 = load i32*, i32** @base
+ %offset1020 = getelementptr i32, i32* %base1020, i32 255
call i32 @llvm.arm.ldrex.p0i32(i32* %offset1020)
call i32 @llvm.arm.strex.p0i32(i32 0, i32* %offset1020)
; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [{{r[0-9]+}}, #1020]
; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [{{r[0-9]+}}, #1020]
- %base1024 = load i32** @base
- %offset1024 = getelementptr i32* %base1024, i32 256
+ %base1024 = load i32*, i32** @base
+ %offset1024 = getelementptr i32, i32* %base1024, i32 256
call i32 @llvm.arm.ldrex.p0i32(i32* %offset1024)
call i32 @llvm.arm.strex.p0i32(i32 0, i32* %offset1024)
; CHECK-T2ADDRMODE: add.w r[[ADDR:[0-9]+]], {{r[0-9]+}}, #1024
; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [r[[ADDR]]]
; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
- %base1 = load i32** @base
+ %base1 = load i32*, i32** @base
%addr8 = bitcast i32* %base1 to i8*
- %offset1_8 = getelementptr i8* %addr8, i32 1
+ %offset1_8 = getelementptr i8, i8* %addr8, i32 1
%offset1 = bitcast i8* %offset1_8 to i32*
call i32 @llvm.arm.ldrex.p0i32(i32* %offset1)
call i32 @llvm.arm.strex.p0i32(i32 0, i32* %offset1)
diff --git a/test/CodeGen/ARM/load-global.ll b/test/CodeGen/ARM/load-global.ll
index 00ca2e8b1b75..34748bc848bd 100644
--- a/test/CodeGen/ARM/load-global.ll
+++ b/test/CodeGen/ARM/load-global.ll
@@ -45,6 +45,6 @@ define i32 @test1() {
; LINUX: ldr r0, [r1, r0]
; LINUX: ldr r0, [r0]
; LINUX: .long G(GOT)
- %tmp = load i32* @G
+ %tmp = load i32, i32* @G
ret i32 %tmp
}
diff --git a/test/CodeGen/ARM/load.ll b/test/CodeGen/ARM/load.ll
index ca16adc00822..3b2d637cb26e 100644
--- a/test/CodeGen/ARM/load.ll
+++ b/test/CodeGen/ARM/load.ll
@@ -2,28 +2,28 @@
define i32 @f1(i8* %p) {
entry:
- %tmp = load i8* %p ; <i8> [#uses=1]
+ %tmp = load i8, i8* %p ; <i8> [#uses=1]
%tmp1 = sext i8 %tmp to i32 ; <i32> [#uses=1]
ret i32 %tmp1
}
define i32 @f2(i8* %p) {
entry:
- %tmp = load i8* %p ; <i8> [#uses=1]
+ %tmp = load i8, i8* %p ; <i8> [#uses=1]
%tmp2 = zext i8 %tmp to i32 ; <i32> [#uses=1]
ret i32 %tmp2
}
define i32 @f3(i16* %p) {
entry:
- %tmp = load i16* %p ; <i16> [#uses=1]
+ %tmp = load i16, i16* %p ; <i16> [#uses=1]
%tmp3 = sext i16 %tmp to i32 ; <i32> [#uses=1]
ret i32 %tmp3
}
define i32 @f4(i16* %p) {
entry:
- %tmp = load i16* %p ; <i16> [#uses=1]
+ %tmp = load i16, i16* %p ; <i16> [#uses=1]
%tmp4 = zext i16 %tmp to i32 ; <i32> [#uses=1]
ret i32 %tmp4
}
diff --git a/test/CodeGen/ARM/load_i1_select.ll b/test/CodeGen/ARM/load_i1_select.ll
index 7a208ea41752..48cd4eae58fa 100644
--- a/test/CodeGen/ARM/load_i1_select.ll
+++ b/test/CodeGen/ARM/load_i1_select.ll
@@ -11,7 +11,7 @@ target triple = "thumbv7-apple-ios0.0.0"
; CHECK: tst.w r[[R0]], #1
define void @foo(i8* %call, double* %p) nounwind {
entry:
- %tmp2 = load i8* %call
+ %tmp2 = load i8, i8* %call
%tmp3 = trunc i8 %tmp2 to i1
%cond = select i1 %tmp3, double 2.000000e+00, double 1.000000e+00
store double %cond, double* %p
diff --git a/test/CodeGen/ARM/long.ll b/test/CodeGen/ARM/long.ll
index d0bff4a906e0..1807813d9305 100644
--- a/test/CodeGen/ARM/long.ll
+++ b/test/CodeGen/ARM/long.ll
@@ -85,6 +85,6 @@ define i64 @f10() {
; CHECK-LABEL: f10:
entry:
%a = alloca i64, align 8 ; <i64*> [#uses=1]
- %retval = load i64* %a ; <i64> [#uses=1]
+ %retval = load i64, i64* %a ; <i64> [#uses=1]
ret i64 %retval
}
diff --git a/test/CodeGen/ARM/longMAC.ll b/test/CodeGen/ARM/longMAC.ll
index fed6ec02f32d..3f30fd40b7e7 100644
--- a/test/CodeGen/ARM/longMAC.ll
+++ b/test/CodeGen/ARM/longMAC.ll
@@ -75,3 +75,44 @@ define i64 @MACLongTest5(i64 %c, i32 %a, i32 %b) {
%add = add i64 %mul, %c
ret i64 %add
}
+
+define i64 @MACLongTest6(i32 %a, i32 %b, i32 %c, i32 %d) {
+;CHECK-LABEL: MACLongTest6:
+;CHECK: smull r12, lr, r1, r0
+;CHECK: smlal r12, lr, r3, r2
+ %conv = sext i32 %a to i64
+ %conv1 = sext i32 %b to i64
+ %mul = mul nsw i64 %conv1, %conv
+ %conv2 = sext i32 %c to i64
+ %conv3 = sext i32 %d to i64
+ %mul4 = mul nsw i64 %conv3, %conv2
+ %add = add nsw i64 %mul4, %mul
+ ret i64 %add
+}
+
+define i64 @MACLongTest7(i64 %acc, i32 %lhs, i32 %rhs) {
+;CHECK-LABEL: MACLongTest7:
+;CHECK-NOT: smlal
+ %conv = sext i32 %lhs to i64
+ %conv1 = sext i32 %rhs to i64
+ %mul = mul nsw i64 %conv1, %conv
+ %shl = shl i64 %mul, 32
+ %shr = lshr i64 %mul, 32
+ %or = or i64 %shl, %shr
+ %add = add i64 %or, %acc
+ ret i64 %add
+}
+
+define i64 @MACLongTest8(i64 %acc, i32 %lhs, i32 %rhs) {
+;CHECK-LABEL: MACLongTest8:
+;CHECK-NOT: smlal
+ %conv = zext i32 %lhs to i64
+ %conv1 = zext i32 %rhs to i64
+ %mul = mul nuw i64 %conv1, %conv
+ %and = and i64 %mul, 4294967295
+ %shl = shl i64 %mul, 32
+ %or = or i64 %and, %shl
+ %add = add i64 %or, %acc
+ ret i64 %add
+}
+
diff --git a/test/CodeGen/ARM/lsr-code-insertion.ll b/test/CodeGen/ARM/lsr-code-insertion.ll
index 153fd8fe34e4..aa2b2d26d121 100644
--- a/test/CodeGen/ARM/lsr-code-insertion.ll
+++ b/test/CodeGen/ARM/lsr-code-insertion.ll
@@ -22,17 +22,17 @@ entry:
bb: ; preds = %cond_next59, %entry
%indvar = phi i32 [ 0, %entry ], [ %k.069.0, %cond_next59 ] ; <i32> [#uses=6]
%k.069.0 = add i32 %indvar, 1 ; <i32> [#uses=3]
- %tmp3 = getelementptr i32* %mpp, i32 %indvar ; <i32*> [#uses=1]
- %tmp4 = load i32* %tmp3 ; <i32> [#uses=1]
- %tmp8 = getelementptr i32* %tpmm, i32 %indvar ; <i32*> [#uses=1]
- %tmp9 = load i32* %tmp8 ; <i32> [#uses=1]
+ %tmp3 = getelementptr i32, i32* %mpp, i32 %indvar ; <i32*> [#uses=1]
+ %tmp4 = load i32, i32* %tmp3 ; <i32> [#uses=1]
+ %tmp8 = getelementptr i32, i32* %tpmm, i32 %indvar ; <i32*> [#uses=1]
+ %tmp9 = load i32, i32* %tmp8 ; <i32> [#uses=1]
%tmp10 = add i32 %tmp9, %tmp4 ; <i32> [#uses=2]
- %tmp13 = getelementptr i32* %mc, i32 %k.069.0 ; <i32*> [#uses=5]
+ %tmp13 = getelementptr i32, i32* %mc, i32 %k.069.0 ; <i32*> [#uses=5]
store i32 %tmp10, i32* %tmp13
- %tmp17 = getelementptr i32* %ip, i32 %indvar ; <i32*> [#uses=1]
- %tmp18 = load i32* %tmp17 ; <i32> [#uses=1]
- %tmp22 = getelementptr i32* %tpim, i32 %indvar ; <i32*> [#uses=1]
- %tmp23 = load i32* %tmp22 ; <i32> [#uses=1]
+ %tmp17 = getelementptr i32, i32* %ip, i32 %indvar ; <i32*> [#uses=1]
+ %tmp18 = load i32, i32* %tmp17 ; <i32> [#uses=1]
+ %tmp22 = getelementptr i32, i32* %tpim, i32 %indvar ; <i32*> [#uses=1]
+ %tmp23 = load i32, i32* %tmp22 ; <i32> [#uses=1]
%tmp24 = add i32 %tmp23, %tmp18 ; <i32> [#uses=2]
%tmp30 = icmp sgt i32 %tmp24, %tmp10 ; <i1> [#uses=1]
br i1 %tmp30, label %cond_true, label %cond_next
@@ -42,9 +42,9 @@ cond_true: ; preds = %bb
br label %cond_next
cond_next: ; preds = %cond_true, %bb
- %tmp39 = load i32* %tmp13 ; <i32> [#uses=1]
- %tmp42 = getelementptr i32* %ms, i32 %k.069.0 ; <i32*> [#uses=1]
- %tmp43 = load i32* %tmp42 ; <i32> [#uses=1]
+ %tmp39 = load i32, i32* %tmp13 ; <i32> [#uses=1]
+ %tmp42 = getelementptr i32, i32* %ms, i32 %k.069.0 ; <i32*> [#uses=1]
+ %tmp43 = load i32, i32* %tmp42 ; <i32> [#uses=1]
%tmp44 = add i32 %tmp43, %tmp39 ; <i32> [#uses=2]
store i32 %tmp44, i32* %tmp13
%tmp52 = icmp slt i32 %tmp44, -987654321 ; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/lsr-icmp-imm.ll b/test/CodeGen/ARM/lsr-icmp-imm.ll
index 103642b8b72f..ef98727344e5 100644
--- a/test/CodeGen/ARM/lsr-icmp-imm.ll
+++ b/test/CodeGen/ARM/lsr-icmp-imm.ll
@@ -18,8 +18,8 @@ for.body: ; preds = %entry, %for.body
%bi.06 = phi i32 [ %i.addr.0.bi.0, %for.body ], [ 0, %entry ]
%i.addr.05 = phi i32 [ %sub, %for.body ], [ %i, %entry ]
%b.04 = phi i32 [ %.b.0, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds i32* %a, i32 %i.addr.05
- %0 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.addr.05
+ %0 = load i32, i32* %arrayidx, align 4
%cmp1 = icmp sgt i32 %0, %b.04
%.b.0 = select i1 %cmp1, i32 %0, i32 %b.04
%i.addr.0.bi.0 = select i1 %cmp1, i32 %i.addr.05, i32 %bi.06
diff --git a/test/CodeGen/ARM/lsr-scale-addr-mode.ll b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
index 948024163ba7..063ef76d95c9 100644
--- a/test/CodeGen/ARM/lsr-scale-addr-mode.ll
+++ b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
@@ -9,7 +9,7 @@ entry:
cond_next: ; preds = %cond_next, %entry
%indvar = phi i32 [ 0, %entry ], [ %tmp25, %cond_next ] ; <i32> [#uses=1]
%tmp25 = add i32 %indvar, 1 ; <i32> [#uses=3]
- %tmp36 = getelementptr i32* %a, i32 %tmp25 ; <i32*> [#uses=1]
+ %tmp36 = getelementptr i32, i32* %a, i32 %tmp25 ; <i32*> [#uses=1]
store i32 0, i32* %tmp36
icmp eq i32 %tmp25, -1 ; <i1>:0 [#uses=1]
br i1 %0, label %return, label %cond_next
diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll
index 3ad60d47b53b..17292cfe2896 100644
--- a/test/CodeGen/ARM/lsr-unfolded-offset.ll
+++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll
@@ -23,10 +23,10 @@ entry:
outer.loop: ; preds = %for.inc69, %entry
%overlap.081 = phi i32 [ %overlap.4, %for.inc69 ], [ 0, %entry ]
%0 = phi i32 [ %inc71, %for.inc69 ], [ 0, %entry ]
- %offset = getelementptr %struct.partition_entry* %part, i32 %0, i32 2
- %len = getelementptr %struct.partition_entry* %part, i32 %0, i32 3
- %tmp5 = load i64* %offset, align 4
- %tmp15 = load i64* %len, align 4
+ %offset = getelementptr %struct.partition_entry, %struct.partition_entry* %part, i32 %0, i32 2
+ %len = getelementptr %struct.partition_entry, %struct.partition_entry* %part, i32 %0, i32 3
+ %tmp5 = load i64, i64* %offset, align 4
+ %tmp15 = load i64, i64* %len, align 4
%add = add nsw i64 %tmp15, %tmp5
br label %inner.loop
@@ -37,10 +37,10 @@ inner.loop: ; preds = %for.inc, %outer.loo
br i1 %cmp23, label %for.inc, label %if.end
if.end: ; preds = %inner.loop
- %len39 = getelementptr %struct.partition_entry* %part, i32 %1, i32 3
- %offset28 = getelementptr %struct.partition_entry* %part, i32 %1, i32 2
- %tmp29 = load i64* %offset28, align 4
- %tmp40 = load i64* %len39, align 4
+ %len39 = getelementptr %struct.partition_entry, %struct.partition_entry* %part, i32 %1, i32 3
+ %offset28 = getelementptr %struct.partition_entry, %struct.partition_entry* %part, i32 %1, i32 2
+ %tmp29 = load i64, i64* %offset28, align 4
+ %tmp40 = load i64, i64* %len39, align 4
%add41 = add nsw i64 %tmp40, %tmp29
%cmp44 = icmp sge i64 %tmp29, %tmp5
%cmp47 = icmp slt i64 %tmp29, %add
diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll
index 7e4b309fd9d1..1f92ff4e1192 100644
--- a/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -30,14 +30,14 @@ entry:
; CHECK: poplt
; CHECK-NOT: cmp
; CHECK: movle
- %0 = load i32* @foo, align 4
+ %0 = load i32, i32* @foo, align 4
%cmp28 = icmp sgt i32 %0, 0
br i1 %cmp28, label %for.body.lr.ph, label %for.cond1.preheader
for.body.lr.ph: ; preds = %entry
%1 = icmp sgt i32 %0, 1
%smax = select i1 %1, i32 %0, i32 1
- call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i32 1, i1 false)
+ call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8], [250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i32 1, i1 false)
unreachable
for.cond1.preheader: ; preds = %entry
@@ -53,7 +53,7 @@ entry:
; CHECK-NOT: sub
; CHECK: cmp
; CHECK: blt
-%0 = load i32* %offset, align 4
+%0 = load i32, i32* %offset, align 4
%cmp = icmp slt i32 %0, %size
%s = sub nsw i32 %0, %size
%size2 = sub nsw i32 %size, 0
@@ -70,7 +70,7 @@ if.end:
; CHECK-NOT: sub
; CHECK: str
store i32 %s3, i32* %offset, align 4
-%add.ptr = getelementptr inbounds i8* %base, i32 %sub
+%add.ptr = getelementptr inbounds i8, i8* %base, i32 %sub
br label %return
return:
diff --git a/test/CodeGen/ARM/machine-licm.ll b/test/CodeGen/ARM/machine-licm.ll
index ca6550178f92..a1eec78e453f 100644
--- a/test/CodeGen/ARM/machine-licm.ll
+++ b/test/CodeGen/ARM/machine-licm.ll
@@ -39,14 +39,14 @@ bb.nph: ; preds = %entry
; THUMB: LCPI0_0:
; THUMB-NOT: LCPI0_1:
; THUMB: .section
- %.pre = load i32* @GV, align 4 ; <i32> [#uses=1]
+ %.pre = load i32, i32* @GV, align 4 ; <i32> [#uses=1]
br label %bb
bb: ; preds = %bb, %bb.nph
%1 = phi i32 [ %.pre, %bb.nph ], [ %3, %bb ] ; <i32> [#uses=1]
%i.03 = phi i32 [ 0, %bb.nph ], [ %4, %bb ] ; <i32> [#uses=2]
- %scevgep = getelementptr i32* %vals, i32 %i.03 ; <i32*> [#uses=1]
- %2 = load i32* %scevgep, align 4 ; <i32> [#uses=1]
+ %scevgep = getelementptr i32, i32* %vals, i32 %i.03 ; <i32*> [#uses=1]
+ %2 = load i32, i32* %scevgep, align 4 ; <i32> [#uses=1]
%3 = add nsw i32 %1, %2 ; <i32> [#uses=2]
store i32 %3, i32* @GV, align 4
%4 = add i32 %i.03, 1 ; <i32> [#uses=2]
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index 84ce4a7f0e79..4ea26e1c59a3 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -23,20 +23,20 @@ entry:
; CHECK-T1: strb [[TREG1]],
; CHECK-T1: ldrh [[TREG2:r[0-9]]],
; CHECK-T1: strh [[TREG2]]
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false)
ret i32 0
}
define void @t1(i8* nocapture %C) nounwind {
entry:
; CHECK-LABEL: t1:
-; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
; CHECK: adds r0, #15
; CHECK: adds r1, #15
; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false)
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false)
ret void
}
@@ -46,35 +46,32 @@ entry:
; CHECK: movw [[REG2:r[0-9]+]], #16716
; CHECK: movt [[REG2:r[0-9]+]], #72
; CHECK: str [[REG2]], [r0, #32]
-; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
-; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; CHECK: adds r0, #16
-; CHECK: adds r1, #16
-; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]!
+; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false)
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false)
ret void
}
define void @t3(i8* nocapture %C) nounwind {
entry:
; CHECK-LABEL: t3:
-; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
-; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; CHECK: adds r0, #16
-; CHECK: adds r1, #16
-; CHECK: vld1.8 {d{{[0-9]+}}}, [r1]
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]!
+; CHECK: vldr d{{[0-9]+}}, [r1]
; CHECK: vst1.8 {d{{[0-9]+}}}, [r0]
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false)
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false)
ret void
}
define void @t4(i8* nocapture %C) nounwind {
entry:
; CHECK-LABEL: t4:
-; CHECK: vld1.8 {[[REG3:d[0-9]+]], [[REG4:d[0-9]+]]}, [r1]
-; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false)
+; CHECK: vld1.64 {[[REG3:d[0-9]+]], [[REG4:d[0-9]+]]}, [r1]
+; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]!
+; CHECK: strh [[REG5:r[0-9]+]], [r0]
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false)
ret void
}
@@ -93,25 +90,25 @@ entry:
; CHECK-T1: strb [[TREG3]],
; CHECK-T1: movs [[TREG4:r[0-9]]],
; CHECK-T1: strb [[TREG4]],
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false)
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false)
ret void
}
define void @t6() nounwind {
entry:
; CHECK-LABEL: t6:
-; CHECK: vld1.8 {[[REG9:d[0-9]+]]}, [r0]
+; CHECK: vldr [[REG9:d[0-9]+]], [r0]
; CHECK: vstr [[REG9]], [r1]
; CHECK: adds r1, #6
; CHECK: adds r0, #6
-; CHECK: vld1.8
+; CHECK: vld1.16
; CHECK: vst1.16
; CHECK-T1-LABEL: t6:
; CHECK-T1: movs [[TREG5:r[0-9]]],
; CHECK-T1: strh [[TREG5]],
; CHECK-T1: ldr [[TREG6:r[0-9]]],
; CHECK-T1: str [[TREG6]]
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8]* @.str6, i64 0, i64 0), i64 14, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i32 1, i1 false)
ret void
}
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index 8d3800b43c14..5223983a7f30 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -1,33 +1,375 @@
-; RUN: llc < %s -mtriple=armv7-apple-ios -o - | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7m-none-macho -o - | FileCheck %s --check-prefix=DARWIN
-; RUN: llc < %s -mtriple=arm-none-eabi -o - | FileCheck --check-prefix=EABI %s
-; RUN: llc < %s -mtriple=arm-none-eabihf -o - | FileCheck --check-prefix=EABI %s
+; RUN: llc < %s -mtriple=armv7-apple-ios -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-IOS --check-prefix=CHECK
+; RUN: llc < %s -mtriple=thumbv7m-none-macho -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-DARWIN --check-prefix=CHECK
+; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK
+; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK
-@from = common global [500 x i32] zeroinitializer, align 4
-@to = common global [500 x i32] zeroinitializer, align 4
+define void @f1(i8* %dest, i8* %src) {
+entry:
+ ; CHECK-LABEL: f1
+
+ ; CHECK-IOS: memmove
+ ; CHECK-DARWIN: memmove
+ ; CHECK-EABI: __aeabi_memmove
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false)
+
+ ; CHECK-IOS: memcpy
+ ; CHECK-DARWIN: memcpy
+ ; CHECK-EABI: __aeabi_memcpy
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false)
+
+ ; EABI memset swaps arguments
+ ; CHECK-IOS: mov r1, #1
+ ; CHECK-IOS: memset
+ ; CHECK-DARWIN: movs r1, #1
+ ; CHECK-DARWIN: memset
+ ; CHECK-EABI: mov r2, #1
+ ; CHECK-EABI: __aeabi_memset
+ call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 0, i1 false)
+
+ ; EABI uses memclr if value set to 0
+ ; CHECK-IOS: mov r1, #0
+ ; CHECK-IOS: memset
+ ; CHECK-DARWIN: movs r1, #0
+ ; CHECK-DARWIN: memset
+ ; CHECK-EABI: __aeabi_memclr
+ call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 0, i1 false)
+
+ ; EABI uses aligned function variants if possible
+
+ ; CHECK-IOS: memmove
+ ; CHECK-DARWIN: memmove
+ ; CHECK-EABI: __aeabi_memmove4
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 4, i1 false)
+
+ ; CHECK-IOS: memcpy
+ ; CHECK-DARWIN: memcpy
+ ; CHECK-EABI: __aeabi_memcpy4
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 4, i1 false)
+
+ ; CHECK-IOS: memset
+ ; CHECK-DARWIN: memset
+ ; CHECK-EABI: __aeabi_memset4
+ call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 4, i1 false)
+
+ ; CHECK-IOS: memset
+ ; CHECK-DARWIN: memset
+ ; CHECK-EABI: __aeabi_memclr4
+ call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 4, i1 false)
+
+ ; CHECK-IOS: memmove
+ ; CHECK-DARWIN: memmove
+ ; CHECK-EABI: __aeabi_memmove8
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false)
+
+ ; CHECK-IOS: memcpy
+ ; CHECK-DARWIN: memcpy
+ ; CHECK-EABI: __aeabi_memcpy8
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false)
+
+ ; CHECK-IOS: memset
+ ; CHECK-DARWIN: memset
+ ; CHECK-EABI: __aeabi_memset8
+ call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 8, i1 false)
+
+ ; CHECK-IOS: memset
+ ; CHECK-DARWIN: memset
+ ; CHECK-EABI: __aeabi_memclr8
+ call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 8, i1 false)
+
+ unreachable
+}
+
+; Check that alloca arguments to memory intrinsics are automatically aligned if at least 8 bytes in size
+define void @f2(i8* %dest, i32 %n) {
+entry:
+ ; CHECK-LABEL: f2
+
+ ; IOS (ARMv7) should 8-byte align, others should 4-byte align
+ ; CHECK-IOS: add r1, sp, #32
+ ; CHECK-IOS: memmove
+ ; CHECK-DARWIN: add r1, sp, #28
+ ; CHECK-DARWIN: memmove
+ ; CHECK-EABI: add r1, sp, #28
+ ; CHECK-EABI: __aeabi_memmove
+ %arr0 = alloca [9 x i8], align 1
+ %0 = bitcast [9 x i8]* %arr0 to i8*
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
+
+ ; CHECK: add r1, sp, #16
+ ; CHECK-IOS: memcpy
+ ; CHECK-DARWIN: memcpy
+ ; CHECK-EABI: __aeabi_memcpy
+ %arr1 = alloca [9 x i8], align 1
+ %1 = bitcast [9 x i8]* %arr1 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
+
+ ; CHECK-IOS: mov r0, sp
+ ; CHECK-IOS: mov r1, #1
+ ; CHECK-IOS: memset
+ ; CHECK-DARWIN: add r0, sp, #4
+ ; CHECK-DARWIN: movs r1, #1
+ ; CHECK-DARWIN: memset
+ ; CHECK-EABI: add r0, sp, #4
+ ; CHECK-EABI: mov r2, #1
+ ; CHECK-EABI: __aeabi_memset
+ %arr2 = alloca [9 x i8], align 1
+ %2 = bitcast [9 x i8]* %arr2 to i8*
+ call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
+
+ unreachable
+}
+
+; Check that alloca arguments are not aligned if less than 8 bytes in size
+define void @f3(i8* %dest, i32 %n) {
+entry:
+ ; CHECK-LABEL: f3
-define void @f() {
+ ; CHECK: {{add(.w)? r1, sp, #17|sub(.w)? r1, r7, #15}}
+ ; CHECK-IOS: memmove
+ ; CHECK-DARWIN: memmove
+ ; CHECK-EABI: __aeabi_memmove
+ %arr0 = alloca [7 x i8], align 1
+ %0 = bitcast [7 x i8]* %arr0 to i8*
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
+
+ ; CHECK: {{add(.w)? r1, sp, #10}}
+ ; CHECK-IOS: memcpy
+ ; CHECK-DARWIN: memcpy
+ ; CHECK-EABI: __aeabi_memcpy
+ %arr1 = alloca [7 x i8], align 1
+ %1 = bitcast [7 x i8]* %arr1 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
+
+ ; CHECK: {{add(.w)? r0, sp, #3}}
+ ; CHECK-IOS: mov r1, #1
+ ; CHECK-IOS: memset
+ ; CHECK-DARWIN: movs r1, #1
+ ; CHECK-DARWIN: memset
+ ; CHECK-EABI: mov r2, #1
+ ; CHECK-EABI: __aeabi_memset
+ %arr2 = alloca [7 x i8], align 1
+ %2 = bitcast [7 x i8]* %arr2 to i8*
+ call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
+
+ unreachable
+}
+
+; Check that alloca arguments are not aligned if size+offset is less than 8 bytes
+define void @f4(i8* %dest, i32 %n) {
entry:
+ ; CHECK-LABEL: f4
+
+ ; CHECK: {{add(.w)? r., sp, #23|sub(.w)? r., r7, #17}}
+ ; CHECK-IOS: memmove
+ ; CHECK-DARWIN: memmove
+ ; CHECK-EABI: __aeabi_memmove
+ %arr0 = alloca [9 x i8], align 1
+ %0 = getelementptr inbounds [9 x i8], [9 x i8]* %arr0, i32 0, i32 4
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
- ; CHECK: memmove
- ; EABI: __aeabi_memmove
- call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0, i1 false)
+ ; CHECK: {{add(.w)? r., sp, #(10|14)}}
+ ; CHECK-IOS: memcpy
+ ; CHECK-DARWIN: memcpy
+ ; CHECK-EABI: __aeabi_memcpy
+ %arr1 = alloca [9 x i8], align 1
+ %1 = getelementptr inbounds [9 x i8], [9 x i8]* %arr1, i32 0, i32 4
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
- ; CHECK: memcpy
- ; EABI: __aeabi_memcpy
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0, i1 false)
+ ; CHECK: {{add(.w)? r., sp, #(1|5)}}
+ ; CHECK-IOS: mov r1, #1
+ ; CHECK-IOS: memset
+ ; CHECK-DARWIN: movs r1, #1
+ ; CHECK-DARWIN: memset
+ ; CHECK-EABI: mov r2, #1
+ ; CHECK-EABI: __aeabi_memset
+ %arr2 = alloca [9 x i8], align 1
+ %2 = getelementptr inbounds [9 x i8], [9 x i8]* %arr2, i32 0, i32 4
+ call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
- ; EABI memset swaps arguments
- ; CHECK: mov r1, #0
- ; CHECK: memset
- ; DARWIN: movs r1, #0
- ; DARWIN: memset
- ; EABI: mov r2, #0
- ; EABI: __aeabi_memset
- call void @llvm.memset.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0, i1 false)
- unreachable
+ unreachable
}
+; Check that alloca arguments are not aligned if the offset is not a multiple of 4
+define void @f5(i8* %dest, i32 %n) {
+entry:
+ ; CHECK-LABEL: f5
+
+ ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #21}}
+ ; CHECK-IOS: memmove
+ ; CHECK-DARWIN: memmove
+ ; CHECK-EABI: __aeabi_memmove
+ %arr0 = alloca [13 x i8], align 1
+ %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 1
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
+
+ ; CHECK: {{add(.w)? r., sp, #(10|14)}}
+ ; CHECK-IOS: memcpy
+ ; CHECK-DARWIN: memcpy
+ ; CHECK-EABI: __aeabi_memcpy
+ %arr1 = alloca [13 x i8], align 1
+ %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 1
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
+
+ ; CHECK: {{add(.w)? r., sp, #(1|5)}}
+ ; CHECK-IOS: mov r1, #1
+ ; CHECK-IOS: memset
+ ; CHECK-DARWIN: movs r1, #1
+ ; CHECK-DARWIN: memset
+ ; CHECK-EABI: mov r2, #1
+ ; CHECK-EABI: __aeabi_memset
+ %arr2 = alloca [13 x i8], align 1
+ %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 1
+ call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
+
+ unreachable
+}
+
+; Check that alloca arguments are not aligned if the offset is unknown
+define void @f6(i8* %dest, i32 %n, i32 %i) {
+entry:
+ ; CHECK-LABEL: f6
+
+ ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #25}}
+ ; CHECK-IOS: memmove
+ ; CHECK-DARWIN: memmove
+ ; CHECK-EABI: __aeabi_memmove
+ %arr0 = alloca [13 x i8], align 1
+ %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 %i
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
+
+ ; CHECK: {{add(.w)? r., sp, #(10|14)}}
+ ; CHECK-IOS: memcpy
+ ; CHECK-DARWIN: memcpy
+ ; CHECK-EABI: __aeabi_memcpy
+ %arr1 = alloca [13 x i8], align 1
+ %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 %i
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
+
+ ; CHECK: {{add(.w)? r., sp, #(1|5)}}
+ ; CHECK-IOS: mov r1, #1
+ ; CHECK-IOS: memset
+ ; CHECK-DARWIN: movs r1, #1
+ ; CHECK-DARWIN: memset
+ ; CHECK-EABI: mov r2, #1
+ ; CHECK-EABI: __aeabi_memset
+ %arr2 = alloca [13 x i8], align 1
+ %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 %i
+ call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
+
+ unreachable
+}
+
+; Check that alloca arguments are not aligned if the GEP is not inbounds
+define void @f7(i8* %dest, i32 %n) {
+entry:
+ ; CHECK-LABEL: f7
+
+ ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #21}}
+ ; CHECK-IOS: memmove
+ ; CHECK-DARWIN: memmove
+ ; CHECK-EABI: __aeabi_memmove
+ %arr0 = alloca [13 x i8], align 1
+ %0 = getelementptr [13 x i8], [13 x i8]* %arr0, i32 0, i32 4
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
+
+ ; CHECK: {{add(.w)? r., sp, #(10|14)}}
+ ; CHECK-IOS: memcpy
+ ; CHECK-DARWIN: memcpy
+ ; CHECK-EABI: __aeabi_memcpy
+ %arr1 = alloca [13 x i8], align 1
+ %1 = getelementptr [13 x i8], [13 x i8]* %arr1, i32 0, i32 4
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
+
+ ; CHECK: {{add(.w)? r., sp, #(1|5)}}
+ ; CHECK-IOS: mov r1, #1
+ ; CHECK-IOS: memset
+ ; CHECK-DARWIN: movs r1, #1
+ ; CHECK-DARWIN: memset
+ ; CHECK-EABI: mov r2, #1
+ ; CHECK-EABI: __aeabi_memset
+ %arr2 = alloca [13 x i8], align 1
+ %2 = getelementptr [13 x i8], [13 x i8]* %arr2, i32 0, i32 4
+ call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
+
+ unreachable
+}
+
+; Check that alloca arguments are not aligned when the offset is past the end of the allocation
+define void @f8(i8* %dest, i32 %n) {
+entry:
+ ; CHECK-LABEL: f8
+
+ ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #21}}
+ ; CHECK-IOS: memmove
+ ; CHECK-DARWIN: memmove
+ ; CHECK-EABI: __aeabi_memmove
+ %arr0 = alloca [13 x i8], align 1
+ %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 16
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
+
+ ; CHECK: {{add(.w)? r., sp, #(10|14)}}
+ ; CHECK-IOS: memcpy
+ ; CHECK-DARWIN: memcpy
+ ; CHECK-EABI: __aeabi_memcpy
+ %arr1 = alloca [13 x i8], align 1
+ %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 16
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
+
+ ; CHECK: {{add(.w)? r., sp, #(1|5)}}
+ ; CHECK-IOS: mov r1, #1
+ ; CHECK-IOS: memset
+ ; CHECK-DARWIN: movs r1, #1
+ ; CHECK-DARWIN: memset
+ ; CHECK-EABI: mov r2, #1
+ ; CHECK-EABI: __aeabi_memset
+ %arr2 = alloca [13 x i8], align 1
+ %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 16
+ call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
+
+ unreachable
+}
+
+; Check that global variables are aligned if they are large enough, but only if
+; they are defined in this object and don't have an explicit section.
+@arr1 = global [7 x i8] c"\01\02\03\04\05\06\07", align 1
+@arr2 = global [8 x i8] c"\01\02\03\04\05\06\07\08", align 1
+@arr3 = global [7 x i8] c"\01\02\03\04\05\06\07", section "foo,bar", align 1
+@arr4 = global [8 x i8] c"\01\02\03\04\05\06\07\08", section "foo,bar", align 1
+@arr5 = weak global [7 x i8] c"\01\02\03\04\05\06\07", align 1
+@arr6 = weak_odr global [7 x i8] c"\01\02\03\04\05\06\07", align 1
+@arr7 = external global [7 x i8], align 1
+define void @f9(i8* %dest, i32 %n) {
+entry:
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr1, i32 0, i32 0), i32 %n, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr2, i32 0, i32 0), i32 %n, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr3, i32 0, i32 0), i32 %n, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr4, i32 0, i32 0), i32 %n, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr5, i32 0, i32 0), i32 %n, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr6, i32 0, i32 0), i32 %n, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr7, i32 0, i32 0), i32 %n, i32 1, i1 false)
+
+ unreachable
+}
+
+; CHECK: {{\.data|\.section.+data}}
+; CHECK-NOT: .align
+; CHECK: arr1:
+; CHECK-IOS: .align 3
+; CHECK-DARWIN: .align 2
+; CHECK-EABI: .align 2
+; CHECK: arr2:
+; CHECK: {{\.section.+foo,bar}}
+; CHECK-NOT: .align
+; CHECK: arr3:
+; CHECK-NOT: .align
+; CHECK: arr4:
+; CHECK: {{\.data|\.section.+data}}
+; CHECK-NOT: .align
+; CHECK: arr5:
+; CHECK-NOT: .align
+; CHECK: arr6:
+; CHECK-NOT: arr7:
+
declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/memset-inline.ll b/test/CodeGen/ARM/memset-inline.ll
index 4e86d05b0a1c..191db1e20a25 100644
--- a/test/CodeGen/ARM/memset-inline.ll
+++ b/test/CodeGen/ARM/memset-inline.ll
@@ -17,9 +17,9 @@ entry:
; CHECK: add.w r1, r0, #10
; CHECK: vmov.i32 {{q[0-9]+}}, #0x0
; CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
-; CHECK: vst1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
%buf = alloca [26 x i8], align 1
- %0 = getelementptr inbounds [26 x i8]* %buf, i32 0, i32 0
+ %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0
call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false)
call void @something(i8* %0) nounwind
ret void
diff --git a/test/CodeGen/ARM/minsize-litpools.ll b/test/CodeGen/ARM/minsize-litpools.ll
index d5cd2a9b72e1..d5452ed0f909 100644
--- a/test/CodeGen/ARM/minsize-litpools.ll
+++ b/test/CodeGen/ARM/minsize-litpools.ll
@@ -12,7 +12,7 @@ define i32 @small_global() minsize {
; CHECK: ldr r[[GLOBDEST:[0-9]+]], {{.?LCPI0_0}}
; CHECK: ldr r0, [r[[GLOBDEST]]]
- %val = load i32* @var
+ %val = load i32, i32* @var
ret i32 %val
}
@@ -21,6 +21,6 @@ define i32 @big_global() {
; CHECK: movw [[GLOBDEST:r[0-9]+]], :lower16:var
; CHECK: movt [[GLOBDEST]], :upper16:var
- %val = load i32* @var
+ %val = load i32, i32* @var
ret i32 %val
}
diff --git a/test/CodeGen/ARM/misched-copy-arm.ll b/test/CodeGen/ARM/misched-copy-arm.ll
index bb2d42ca9ede..330252a90d7c 100644
--- a/test/CodeGen/ARM/misched-copy-arm.ll
+++ b/test/CodeGen/ARM/misched-copy-arm.ll
@@ -18,8 +18,8 @@ for.body: ; preds = %entry, %for.body
%indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%s.05 = phi i32 [ %mul, %for.body ], [ 0, %entry ]
%indvars.iv.next = add i32 %indvars.iv, %s
- %arrayidx = getelementptr inbounds i32* %d, i32 %indvars.iv
- %0 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %d, i32 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
%mul = mul nsw i32 %0, %s.05
%exitcond = icmp eq i32 %indvars.iv.next, %a
br i1 %exitcond, label %for.end, label %for.body
@@ -63,9 +63,9 @@ if.then24: ; preds = %while.cond
if.end28: ; preds = %if.then24, %while.cond, %while.cond
%dst.1 = phi %struct.rtx_def* [ undef, %if.then24 ], [ %dst.0, %while.cond ], [ %dst.0, %while.cond ]
- %arrayidx30 = getelementptr inbounds %struct.rtx_def* %dst.1, i32 0, i32 1, i32 0
+ %arrayidx30 = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %dst.1, i32 0, i32 1, i32 0
%rtx31 = bitcast %union.rtunion_def* %arrayidx30 to %struct.rtx_def**
- %0 = load %struct.rtx_def** %rtx31, align 4
+ %0 = load %struct.rtx_def*, %struct.rtx_def** %rtx31, align 4
br label %while.cond
if.then46: ; preds = %while.cond
diff --git a/test/CodeGen/ARM/movcc-double.ll b/test/CodeGen/ARM/movcc-double.ll
new file mode 100644
index 000000000000..9ce708d9bd36
--- /dev/null
+++ b/test/CodeGen/ARM/movcc-double.ll
@@ -0,0 +1,50 @@
+; RUN: llc -o - %s | FileCheck %s
+target triple = "arm-unknown-unknown"
+
+; select with and i1/or i1 condition should be implemented as a series of 2
+; cmovs, not by producing two conditions and using and on them.
+
+define i32 @select_and(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
+; CHECK-LABEL: select_and
+; CHECK-NOT: tst
+; CHECK-NOT: movne
+; CHECK: mov{{lo|hs}}
+; CHECK: mov{{lo|hs}}
+ %cmp0 = icmp ult i32 %a0, %a1
+ %cmp1 = icmp ult i32 %a2, %a3
+ %and = and i1 %cmp0, %cmp1
+ %res = select i1 %and, i32 %a4, i32 %a5
+ ret i32 %res
+}
+
+define i32 @select_or(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
+; select with and i1 condition should be implemented as a series of 2 cmovs, not
+; by producing two conditions and using and on them.
+; CHECK-LABEL: select_or
+; CHECK-NOT: orss
+; CHECK-NOT: tst
+; CHECK: mov{{lo|hs}}
+; CHECK: mov{{lo|hs}}
+ %cmp0 = icmp ult i32 %a0, %a1
+ %cmp1 = icmp ult i32 %a2, %a3
+ %and = or i1 %cmp0, %cmp1
+ %res = select i1 %and, i32 %a4, i32 %a5
+ ret i32 %res
+}
+
+; If one of the conditions is materialized as a 0/1 value anyway, then the
+; sequence of 2 cmovs should not be used.
+
+@var32 = global i32 0
+define i32 @select_noopt(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+; CHECK-LABEL: select_noopt
+; CHECK: orrs
+; CHECK: movne
+ %cmp0 = icmp ult i32 %a0, %a1
+ %cmp1 = icmp ult i32 %a1, %a2
+ %or = or i1 %cmp0, %cmp1
+ %zero_one = zext i1 %or to i32
+ store volatile i32 %zero_one, i32* @var32
+ %res = select i1 %or, i32 %a3, i32 %a4
+ ret i32 %res
+}
diff --git a/test/CodeGen/ARM/mult-alt-generic-arm.ll b/test/CodeGen/ARM/mult-alt-generic-arm.ll
index 05e9b0facd6c..208b05475870 100644
--- a/test/CodeGen/ARM/mult-alt-generic-arm.ll
+++ b/test/CodeGen/ARM/mult-alt-generic-arm.ll
@@ -33,10 +33,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -48,10 +48,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -63,7 +63,7 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
ret void
@@ -120,10 +120,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
@@ -137,15 +137,15 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
store i32 %2, i32* %out0, align 4
- %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %3, i32* %out0, align 4
; No lowering support.
; %4 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
@@ -159,14 +159,14 @@ define arm_aapcscc void @single_p() nounwind {
entry:
%out0 = alloca i32, align 4
store i32 0, i32* %out0, align 4
- %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %0, i32* %out0, align 4
ret void
}
define arm_aapcscc void @multi_m() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
ret void
}
@@ -191,10 +191,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -206,10 +206,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -221,7 +221,7 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
ret void
@@ -278,10 +278,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
@@ -295,15 +295,15 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
store i32 %2, i32* %out0, align 4
- %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %3, i32* %out0, align 4
; No lowering support.
; %4 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
@@ -317,7 +317,7 @@ define arm_aapcscc void @multi_p() nounwind {
entry:
%out0 = alloca i32, align 4
store i32 0, i32* %out0, align 4
- %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %0, i32* %out0, align 4
ret void
}
diff --git a/test/CodeGen/ARM/negative-offset.ll b/test/CodeGen/ARM/negative-offset.ll
index 7b949fd71fe1..bafc9645471c 100644
--- a/test/CodeGen/ARM/negative-offset.ll
+++ b/test/CodeGen/ARM/negative-offset.ll
@@ -7,10 +7,10 @@ entry:
;CHECK-NOT: sub
;CHECK: ldr r{{.*}}, [r0, #-16]
;CHECK: ldr r{{.*}}, [r0, #-8]
- %arrayidx = getelementptr inbounds i32* %p, i32 -4
- %0 = load i32* %arrayidx, align 4
- %arrayidx1 = getelementptr inbounds i32* %p, i32 -2
- %1 = load i32* %arrayidx1, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %p, i32 -4
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %p, i32 -2
+ %1 = load i32, i32* %arrayidx1, align 4
%add = add nsw i32 %1, %0
ret i32 %add
}
diff --git a/test/CodeGen/ARM/neon-spfp.ll b/test/CodeGen/ARM/neon-spfp.ll
index dd2e67fe7753..4eeaa8abfab2 100644
--- a/test/CodeGen/ARM/neon-spfp.ll
+++ b/test/CodeGen/ARM/neon-spfp.ll
@@ -64,7 +64,7 @@ for.body: ; preds = %for.body, %entry
; CHECK-DARWINA15: vmul.f32 s{{[0-9]*}}
; CHECK-DARWINSWIFT: vmul.f32 d{{[0-9]*}}
%conv = fpext float %mul to double
- %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), double %conv) #1
+ %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), double %conv) #1
%inc = add nsw i32 %i.04, 1
%exitcond = icmp eq i32 %inc, 16000
br i1 %exitcond, label %for.end, label %for.body
diff --git a/test/CodeGen/ARM/neon-v8.1a.ll b/test/CodeGen/ARM/neon-v8.1a.ll
new file mode 100644
index 000000000000..91259139d446
--- /dev/null
+++ b/test/CodeGen/ARM/neon-v8.1a.ll
@@ -0,0 +1,166 @@
+; RUN: llc < %s -mtriple=armv8 -mattr=+v8.1a | FileCheck %s
+
+;-----------------------------------------------------------------------------
+; RDMA Vector
+
+declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>)
+declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>)
+declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>)
+
+declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>)
+declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>)
+declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>)
+
+declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>)
+declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>)
+declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i16> @test_vqrdmlah_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) {
+; CHECK-LABEL: test_vqrdmlah_v4i16:
+ %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs)
+ %retval = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %acc, <4 x i16> %prod)
+; CHECK: vqrdmlah.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+ ret <4 x i16> %retval
+}
+
+define <8 x i16> @test_vqrdmlah_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) {
+; CHECK-LABEL: test_vqrdmlah_v8i16:
+ %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs)
+ %retval = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %acc, <8 x i16> %prod)
+; CHECK: vqrdmlah.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+ ret <8 x i16> %retval
+}
+
+define <2 x i32> @test_vqrdmlah_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) {
+; CHECK-LABEL: test_vqrdmlah_v2i32:
+ %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs)
+ %retval = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %acc, <2 x i32> %prod)
+; CHECK: vqrdmlah.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+ ret <2 x i32> %retval
+}
+
+define <4 x i32> @test_vqrdmlah_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) {
+; CHECK-LABEL: test_vqrdmlah_v4i32:
+ %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs)
+ %retval = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %acc, <4 x i32> %prod)
+; CHECK: vqrdmlah.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+ ret <4 x i32> %retval
+}
+
+define <4 x i16> @test_vqrdmlsh_v4i16(<4 x i16> %acc, <4 x i16> %mhs, <4 x i16> %rhs) {
+; CHECK-LABEL: test_vqrdmlsh_v4i16:
+ %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %mhs, <4 x i16> %rhs)
+ %retval = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %acc, <4 x i16> %prod)
+; CHECK: vqrdmlsh.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+ ret <4 x i16> %retval
+}
+
+define <8 x i16> @test_vqrdmlsh_v8i16(<8 x i16> %acc, <8 x i16> %mhs, <8 x i16> %rhs) {
+; CHECK-LABEL: test_vqrdmlsh_v8i16:
+ %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %mhs, <8 x i16> %rhs)
+ %retval = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %acc, <8 x i16> %prod)
+; CHECK: vqrdmlsh.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+ ret <8 x i16> %retval
+}
+
+define <2 x i32> @test_vqrdmlsh_v2i32(<2 x i32> %acc, <2 x i32> %mhs, <2 x i32> %rhs) {
+; CHECK-LABEL: test_vqrdmlsh_v2i32:
+ %prod = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %mhs, <2 x i32> %rhs)
+ %retval = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %acc, <2 x i32> %prod)
+; CHECK: vqrdmlsh.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+ ret <2 x i32> %retval
+}
+
+define <4 x i32> @test_vqrdmlsh_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32> %rhs) {
+; CHECK-LABEL: test_vqrdmlsh_v4i32:
+ %prod = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %mhs, <4 x i32> %rhs)
+ %retval = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %acc, <4 x i32> %prod)
+; CHECK: vqrdmlsh.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+ ret <4 x i32> %retval
+}
+
+;-----------------------------------------------------------------------------
+; RDMA Scalar
+
+define <4 x i16> @test_vqrdmlah_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) {
+; CHECK-LABEL: test_vqrdmlah_lane_s16:
+entry:
+ %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
+ %retval = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %acc, <4 x i16> %prod)
+; CHECK: vqrdmlah.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[3]
+ ret <4 x i16> %retval
+}
+
+define <8 x i16> @test_vqrdmlahq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16> %v) {
+; CHECK-LABEL: test_vqrdmlahq_lane_s16:
+entry:
+ %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+ %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
+ %retval = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %acc, <8 x i16> %prod)
+; CHECK: vqrdmlah.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[2]
+ ret <8 x i16> %retval
+}
+
+define <2 x i32> @test_vqrdmlah_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) {
+; CHECK-LABEL: test_vqrdmlah_lane_s32:
+entry:
+ %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+ %prod = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
+ %retval = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %acc, <2 x i32> %prod)
+; CHECK: vqrdmlah.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[1]
+ ret <2 x i32> %retval
+}
+
+define <4 x i32> @test_vqrdmlahq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> %v) {
+; CHECK-LABEL: test_vqrdmlahq_lane_s32:
+entry:
+ %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
+ %prod = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
+ %retval = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %acc, <4 x i32> %prod)
+; CHECK: vqrdmlah.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[0]
+ ret <4 x i32> %retval
+}
+
+define <4 x i16> @test_vqrdmlsh_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) {
+; CHECK-LABEL: test_vqrdmlsh_lane_s16:
+entry:
+ %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ %prod = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %x, <4 x i16> %shuffle)
+ %retval = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %acc, <4 x i16> %prod)
+; CHECK: vqrdmlsh.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[3]
+ ret <4 x i16> %retval
+}
+
+define <8 x i16> @test_vqrdmlshq_lane_s16(<8 x i16> %acc, <8 x i16> %x, <4 x i16> %v) {
+; CHECK-LABEL: test_vqrdmlshq_lane_s16:
+entry:
+ %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+ %prod = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %x, <8 x i16> %shuffle)
+ %retval = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %acc, <8 x i16> %prod)
+; CHECK: vqrdmlsh.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[2]
+ ret <8 x i16> %retval
+}
+
+define <2 x i32> @test_vqrdmlsh_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) {
+; CHECK-LABEL: test_vqrdmlsh_lane_s32:
+entry:
+ %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+ %prod = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %x, <2 x i32> %shuffle)
+ %retval = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %acc, <2 x i32> %prod)
+; CHECK: vqrdmlsh.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}[1]
+ ret <2 x i32> %retval
+}
+
+define <4 x i32> @test_vqrdmlshq_lane_s32(<4 x i32> %acc,<4 x i32> %x, <2 x i32> %v) {
+; CHECK-LABEL: test_vqrdmlshq_lane_s32:
+entry:
+ %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer
+ %prod = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %x, <4 x i32> %shuffle)
+ %retval = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %acc, <4 x i32> %prod)
+; CHECK: vqrdmlsh.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{d[0-9]+}}[0]
+ ret <4 x i32> %retval
+}
diff --git a/test/CodeGen/ARM/neon_cmp.ll b/test/CodeGen/ARM/neon_cmp.ll
index e1662c43c647..fcf4486fa46d 100644
--- a/test/CodeGen/ARM/neon_cmp.ll
+++ b/test/CodeGen/ARM/neon_cmp.ll
@@ -4,8 +4,8 @@
; radar://13191881
; CHECK: vfcmp
define void @vfcmp(<2 x double>* %a, <2 x double>* %b) {
- %wide.load = load <2 x double>* %a, align 4
- %wide.load2 = load <2 x double>* %b, align 4
+ %wide.load = load <2 x double>, <2 x double>* %a, align 4
+ %wide.load2 = load <2 x double>, <2 x double>* %b, align 4
; CHECK-NOT: vdup.32
; CHECK-NOT: vmovn.i64
%v1 = fcmp olt <2 x double> %wide.load, %wide.load2
diff --git a/test/CodeGen/ARM/neon_div.ll b/test/CodeGen/ARM/neon_div.ll
index 4f1607ed5baf..e185c2a8afbc 100644
--- a/test/CodeGen/ARM/neon_div.ll
+++ b/test/CodeGen/ARM/neon_div.ll
@@ -7,8 +7,8 @@ define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vrecpe.f32
;CHECK: vmovn.i32
;CHECK: vmovn.i16
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = sdiv <8 x i8> %tmp1, %tmp2
ret <8 x i8> %tmp3
}
@@ -21,8 +21,8 @@ define <8 x i8> @udivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vrecps.f32
;CHECK: vmovn.i32
;CHECK: vqmovun.s16
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = udiv <8 x i8> %tmp1, %tmp2
ret <8 x i8> %tmp3
}
@@ -31,8 +31,8 @@ define <4 x i16> @sdivi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vrecpe.f32
;CHECK: vrecps.f32
;CHECK: vmovn.i32
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = sdiv <4 x i16> %tmp1, %tmp2
ret <4 x i16> %tmp3
}
@@ -42,8 +42,8 @@ define <4 x i16> @udivi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vrecps.f32
;CHECK: vrecps.f32
;CHECK: vmovn.i32
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = udiv <4 x i16> %tmp1, %tmp2
ret <4 x i16> %tmp3
}
diff --git a/test/CodeGen/ARM/neon_fpconv.ll b/test/CodeGen/ARM/neon_fpconv.ll
index 8e37ce778182..61ac4098a6dd 100644
--- a/test/CodeGen/ARM/neon_fpconv.ll
+++ b/test/CodeGen/ARM/neon_fpconv.ll
@@ -19,7 +19,7 @@ define <2 x double> @vextend(<2 x float> %a) {
; CHECK: vsitofp_double
define void @vsitofp_double(<2 x i32>* %loadaddr,
<2 x double>* %storeaddr) {
- %v0 = load <2 x i32>* %loadaddr
+ %v0 = load <2 x i32>, <2 x i32>* %loadaddr
; CHECK: vldr
; CHECK-NEXT: vcvt.f64.s32
; CHECK-NEXT: vcvt.f64.s32
@@ -31,7 +31,7 @@ define void @vsitofp_double(<2 x i32>* %loadaddr,
; CHECK: vuitofp_double
define void @vuitofp_double(<2 x i32>* %loadaddr,
<2 x double>* %storeaddr) {
- %v0 = load <2 x i32>* %loadaddr
+ %v0 = load <2 x i32>, <2 x i32>* %loadaddr
; CHECK: vldr
; CHECK-NEXT: vcvt.f64.u32
; CHECK-NEXT: vcvt.f64.u32
diff --git a/test/CodeGen/ARM/neon_ld1.ll b/test/CodeGen/ARM/neon_ld1.ll
index 9fd3fc5f341a..f4d601905537 100644
--- a/test/CodeGen/ARM/neon_ld1.ll
+++ b/test/CodeGen/ARM/neon_ld1.ll
@@ -7,8 +7,8 @@
; CHECK: vstr d
define void @t1(<2 x i32>* %r, <4 x i16>* %a, <4 x i16>* %b) nounwind {
entry:
- %0 = load <4 x i16>* %a, align 8 ; <<4 x i16>> [#uses=1]
- %1 = load <4 x i16>* %b, align 8 ; <<4 x i16>> [#uses=1]
+ %0 = load <4 x i16>, <4 x i16>* %a, align 8 ; <<4 x i16>> [#uses=1]
+ %1 = load <4 x i16>, <4 x i16>* %b, align 8 ; <<4 x i16>> [#uses=1]
%2 = add <4 x i16> %0, %1 ; <<4 x i16>> [#uses=1]
%3 = bitcast <4 x i16> %2 to <2 x i32> ; <<2 x i32>> [#uses=1]
store <2 x i32> %3, <2 x i32>* %r, align 8
@@ -22,8 +22,8 @@ entry:
; CHECK: vmov r0, r1, d
define <2 x i32> @t2(<4 x i16>* %a, <4 x i16>* %b) nounwind readonly {
entry:
- %0 = load <4 x i16>* %a, align 8 ; <<4 x i16>> [#uses=1]
- %1 = load <4 x i16>* %b, align 8 ; <<4 x i16>> [#uses=1]
+ %0 = load <4 x i16>, <4 x i16>* %a, align 8 ; <<4 x i16>> [#uses=1]
+ %1 = load <4 x i16>, <4 x i16>* %b, align 8 ; <<4 x i16>> [#uses=1]
%2 = sub <4 x i16> %0, %1 ; <<4 x i16>> [#uses=1]
%3 = bitcast <4 x i16> %2 to <2 x i32> ; <<2 x i32>> [#uses=1]
ret <2 x i32> %3
diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll
index 571a16a061df..5bd6ae6d2a98 100644
--- a/test/CodeGen/ARM/neon_ld2.ll
+++ b/test/CodeGen/ARM/neon_ld2.ll
@@ -13,8 +13,8 @@
; SWIFT: vst1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
entry:
- %0 = load <2 x i64>* %a, align 16 ; <<2 x i64>> [#uses=1]
- %1 = load <2 x i64>* %b, align 16 ; <<2 x i64>> [#uses=1]
+ %0 = load <2 x i64>, <2 x i64>* %a, align 16 ; <<2 x i64>> [#uses=1]
+ %1 = load <2 x i64>, <2 x i64>* %b, align 16 ; <<2 x i64>> [#uses=1]
%2 = add <2 x i64> %0, %1 ; <<2 x i64>> [#uses=1]
%3 = bitcast <2 x i64> %2 to <4 x i32> ; <<4 x i32>> [#uses=1]
store <4 x i32> %3, <4 x i32>* %r, align 16
@@ -35,8 +35,8 @@ entry:
; SWIFT: vmov r2, r3, d
define <4 x i32> @t2(<2 x i64>* %a, <2 x i64>* %b) nounwind readonly {
entry:
- %0 = load <2 x i64>* %a, align 16 ; <<2 x i64>> [#uses=1]
- %1 = load <2 x i64>* %b, align 16 ; <<2 x i64>> [#uses=1]
+ %0 = load <2 x i64>, <2 x i64>* %a, align 16 ; <<2 x i64>> [#uses=1]
+ %1 = load <2 x i64>, <2 x i64>* %b, align 16 ; <<2 x i64>> [#uses=1]
%2 = sub <2 x i64> %0, %1 ; <<2 x i64>> [#uses=1]
%3 = bitcast <2 x i64> %2 to <4 x i32> ; <<4 x i32>> [#uses=1]
ret <4 x i32> %3
@@ -50,8 +50,8 @@ entry:
; SWIFT: vst1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+}}
define void @t3(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
entry:
- %0 = load <2 x i64>* %a, align 8
- %1 = load <2 x i64>* %b, align 8
+ %0 = load <2 x i64>, <2 x i64>* %a, align 8
+ %1 = load <2 x i64>, <2 x i64>* %b, align 8
%2 = add <2 x i64> %0, %1
%3 = bitcast <2 x i64> %2 to <4 x i32>
store <4 x i32> %3, <4 x i32>* %r, align 8
diff --git a/test/CodeGen/ARM/neon_spill.ll b/test/CodeGen/ARM/neon_spill.ll
index d286d16486c1..04f08f43840f 100644
--- a/test/CodeGen/ARM/neon_spill.ll
+++ b/test/CodeGen/ARM/neon_spill.ll
@@ -24,7 +24,7 @@ declare arm_aapcs_vfpcc %2** @func4()
define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
%2 = call arm_aapcs_vfpcc %0** @func2() nounwind
- %3 = load %0** %2, align 4
+ %3 = load %0*, %0** %2, align 4
store float 0.000000e+00, float* undef, align 4
%4 = call arm_aapcs_vfpcc %2* @func3(%2* undef, %2* undef, i32 2956) nounwind
call arm_aapcs_vfpcc void @func1(%0* %3, float* undef, float* undef, %2* undef)
diff --git a/test/CodeGen/ARM/no-fpu.ll b/test/CodeGen/ARM/no-fpu.ll
index fff4bccb80e9..c5d1f1951d7c 100644
--- a/test/CodeGen/ARM/no-fpu.ll
+++ b/test/CodeGen/ARM/no-fpu.ll
@@ -6,7 +6,7 @@
; Check no NEON instructions are selected when feature is disabled.
define void @neonop(i64* nocapture readonly %a, i64* nocapture %b) #0 {
%1 = bitcast i64* %a to <2 x i64>*
- %wide.load = load <2 x i64>* %1, align 8
+ %wide.load = load <2 x i64>, <2 x i64>* %1, align 8
; NONEON-NOVFP-NOT: vld1.64
; NONEON-NOT: vld1.64
%add = add <2 x i64> %wide.load, %wide.load
diff --git a/test/CodeGen/ARM/no-tail-call.ll b/test/CodeGen/ARM/no-tail-call.ll
index 3a8cb21bee92..5a5d43c28714 100644
--- a/test/CodeGen/ARM/no-tail-call.ll
+++ b/test/CodeGen/ARM/no-tail-call.ll
@@ -17,28 +17,28 @@ entry:
%0 = alloca %foo, align 4
%1 = alloca %foo, align 4
%2 = alloca %foo, align 4
- %.native = getelementptr inbounds %foo* %0, i32 0, i32 0
- %.native.value = getelementptr inbounds %Sf* %.native, i32 0, i32 0
+ %.native = getelementptr inbounds %foo, %foo* %0, i32 0, i32 0
+ %.native.value = getelementptr inbounds %Sf, %Sf* %.native, i32 0, i32 0
store float 0.000000e+00, float* %.native.value, align 4
- %.native1 = getelementptr inbounds %foo* %1, i32 0, i32 0
- %.native1.value = getelementptr inbounds %Sf* %.native1, i32 0, i32 0
+ %.native1 = getelementptr inbounds %foo, %foo* %1, i32 0, i32 0
+ %.native1.value = getelementptr inbounds %Sf, %Sf* %.native1, i32 0, i32 0
store float 1.000000e+00, float* %.native1.value, align 4
- %.native2 = getelementptr inbounds %foo* %2, i32 0, i32 0
- %.native2.value = getelementptr inbounds %Sf* %.native2, i32 0, i32 0
+ %.native2 = getelementptr inbounds %foo, %foo* %2, i32 0, i32 0
+ %.native2.value = getelementptr inbounds %Sf, %Sf* %.native2, i32 0, i32 0
store float 5.000000e+00, float* %.native2.value, align 4
br i1 true, label %3, label %4
; <label>:3 ; preds = %entry
- %.native4 = getelementptr inbounds %foo* %1, i32 0, i32 0
- %.native4.value = getelementptr inbounds %Sf* %.native4, i32 0, i32 0
+ %.native4 = getelementptr inbounds %foo, %foo* %1, i32 0, i32 0
+ %.native4.value = getelementptr inbounds %Sf, %Sf* %.native4, i32 0, i32 0
store float 2.000000e+00, float* %.native4.value, align 4
br label %4
; <label>:4 ; preds = %3, %entry
%5 = call float @llvm.ceil.f32(float 5.000000e+00)
- %.native3 = getelementptr inbounds %foo* %1, i32 0, i32 0
- %.native3.value = getelementptr inbounds %Sf* %.native3, i32 0, i32 0
- %6 = load float* %.native3.value, align 4
+ %.native3 = getelementptr inbounds %foo, %foo* %1, i32 0, i32 0
+ %.native3.value = getelementptr inbounds %Sf, %Sf* %.native3, i32 0, i32 0
+ %6 = load float, float* %.native3.value, align 4
%7 = call float @llvm.ceil.f32(float %6)
%8 = insertvalue { float, float, float } { float 0.000000e+00, float undef, float undef }, float %5, 1
%9 = insertvalue { float, float, float } %8, float %7, 2
@@ -55,28 +55,28 @@ entry:
%0 = alloca %foo, align 4
%1 = alloca %foo, align 4
%2 = alloca %foo, align 4
- %.native = getelementptr inbounds %foo* %0, i32 0, i32 0
- %.native.value = getelementptr inbounds %Sf* %.native, i32 0, i32 0
+ %.native = getelementptr inbounds %foo, %foo* %0, i32 0, i32 0
+ %.native.value = getelementptr inbounds %Sf, %Sf* %.native, i32 0, i32 0
store float 0.000000e+00, float* %.native.value, align 4
- %.native1 = getelementptr inbounds %foo* %1, i32 0, i32 0
- %.native1.value = getelementptr inbounds %Sf* %.native1, i32 0, i32 0
+ %.native1 = getelementptr inbounds %foo, %foo* %1, i32 0, i32 0
+ %.native1.value = getelementptr inbounds %Sf, %Sf* %.native1, i32 0, i32 0
store float 1.000000e+00, float* %.native1.value, align 4
- %.native2 = getelementptr inbounds %foo* %2, i32 0, i32 0
- %.native2.value = getelementptr inbounds %Sf* %.native2, i32 0, i32 0
+ %.native2 = getelementptr inbounds %foo, %foo* %2, i32 0, i32 0
+ %.native2.value = getelementptr inbounds %Sf, %Sf* %.native2, i32 0, i32 0
store float 5.000000e+00, float* %.native2.value, align 4
br i1 true, label %3, label %4
; <label>:3 ; preds = %entry
- %.native4 = getelementptr inbounds %foo* %1, i32 0, i32 0
- %.native4.value = getelementptr inbounds %Sf* %.native4, i32 0, i32 0
+ %.native4 = getelementptr inbounds %foo, %foo* %1, i32 0, i32 0
+ %.native4.value = getelementptr inbounds %Sf, %Sf* %.native4, i32 0, i32 0
store float 2.000000e+00, float* %.native4.value, align 4
br label %4
; <label>:4 ; preds = %3, %entry
%5 = call float @llvm.ceil.f32(float 5.000000e+00)
- %.native3 = getelementptr inbounds %foo* %1, i32 0, i32 0
- %.native3.value = getelementptr inbounds %Sf* %.native3, i32 0, i32 0
- %6 = load float* %.native3.value, align 4
+ %.native3 = getelementptr inbounds %foo, %foo* %1, i32 0, i32 0
+ %.native3.value = getelementptr inbounds %Sf, %Sf* %.native3, i32 0, i32 0
+ %6 = load float, float* %.native3.value, align 4
%7 = call float @llvm.ceil.f32(float %6)
%8 = insertvalue { float, float } { float 0.000000e+00, float undef }, float %7, 1
ret { float, float } %8
diff --git a/test/CodeGen/ARM/none-macho.ll b/test/CodeGen/ARM/none-macho.ll
index 2a7878fee300..733ba4ba2d2c 100644
--- a/test/CodeGen/ARM/none-macho.ll
+++ b/test/CodeGen/ARM/none-macho.ll
@@ -2,16 +2,11 @@
; RUN: llc -mtriple=thumbv7m-none-macho -O0 %s -o - -relocation-model=pic -disable-fp-elim | FileCheck %s
; RUN: llc -mtriple=thumbv7m-none-macho -filetype=obj %s -o /dev/null
- ; Bare-metal should probably "declare" segments just like normal MachO
-; CHECK: __picsymbolstub4
-; CHECK: __StaticInit
-; CHECK: __text
-
@var = external global i32
define i32 @test_litpool() minsize {
; CHECK-LABEL: test_litpool:
- %val = load i32* @var
+ %val = load i32, i32* @var
ret i32 %val
; Lit-pool entries need to produce a "$non_lazy_ptr" version of the symbol.
@@ -21,7 +16,7 @@ define i32 @test_litpool() minsize {
define i32 @test_movw_movt() {
; CHECK-LABEL: test_movw_movt:
- %val = load i32* @var
+ %val = load i32, i32* @var
ret i32 %val
; movw/movt should also address their symbols MachO-style
@@ -56,7 +51,7 @@ define i32 @test_frame_ptr() {
%big_arr = type [8 x i32]
define void @test_two_areas(%big_arr* %addr) {
; CHECK-LABEL: test_two_areas:
- %val = load %big_arr* %addr
+ %val = load %big_arr, %big_arr* %addr
call void @test_trap()
store %big_arr %val, %big_arr* %addr
diff --git a/test/CodeGen/ARM/noopt-dmb-v7.ll b/test/CodeGen/ARM/noopt-dmb-v7.ll
new file mode 100644
index 000000000000..56a29c8a17e8
--- /dev/null
+++ b/test/CodeGen/ARM/noopt-dmb-v7.ll
@@ -0,0 +1,15 @@
+; Ensure that adjacent duplicated barriers are not removed at -O0.
+; RUN: llc -O0 < %s -mtriple=armv7 -mattr=+db | FileCheck %s
+
+define i32 @t1() {
+entry:
+ fence seq_cst
+ fence seq_cst
+ fence seq_cst
+ ret i32 0
+}
+
+; CHECK: @ BB#0: @ %entry
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: dmb ish
+; CHECK-NEXT: dmb ish
diff --git a/test/CodeGen/ARM/nop_concat_vectors.ll b/test/CodeGen/ARM/nop_concat_vectors.ll
index c81090095a99..fa0e892f9e24 100644
--- a/test/CodeGen/ARM/nop_concat_vectors.ll
+++ b/test/CodeGen/ARM/nop_concat_vectors.ll
@@ -5,7 +5,7 @@
;CHECK-NOT: vst1.32
;CHECK: bx
define void @foo(<16 x i8>* %J) {
- %A = load <16 x i8>* %J
+ %A = load <16 x i8>, <16 x i8>* %J
%T1 = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%T2 = shufflevector <8 x i8> %T1, <8 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <16 x i8> %T2, <16 x i8>* %J
diff --git a/test/CodeGen/ARM/null-streamer.ll b/test/CodeGen/ARM/null-streamer.ll
index 350c45e5bebe..19ad22a4bdb8 100644
--- a/test/CodeGen/ARM/null-streamer.ll
+++ b/test/CodeGen/ARM/null-streamer.ll
@@ -5,3 +5,5 @@ define i32 @main() {
entry:
ret i32 0
}
+
+module asm ".fnstart"
diff --git a/test/CodeGen/ARM/odr_comdat.ll b/test/CodeGen/ARM/odr_comdat.ll
deleted file mode 100644
index e28b5788ef6d..000000000000
--- a/test/CodeGen/ARM/odr_comdat.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ARMGNUEABI
-
-; Checking that a comdat group gets generated correctly for a static member
-; of instantiated C++ templates.
-; see http://sourcery.mentor.com/public/cxx-abi/abi.html#vague-itemplate
-; section 5.2.6 Instantiated templates
-; "Any static member data object is emitted in a COMDAT identified by its mangled
-; name, in any object file with a reference to its name symbol."
-
-; Case 1: variable is not explicitly initialized, and ends up in a .bss section
-; ARMGNUEABI: .section .bss._ZN1CIiE1iE,"aGw",%nobits,_ZN1CIiE1iE,comdat
-@_ZN1CIiE1iE = weak_odr global i32 0, align 4
-
-; Case 2: variable is explicitly initialized, and ends up in a .data section
-; ARMGNUEABI: .section .data._ZN1CIiE1jE,"aGw",%progbits,_ZN1CIiE1jE,comdat
-@_ZN1CIiE1jE = weak_odr global i32 12, align 4
diff --git a/test/CodeGen/ARM/optimize-dmbs-v7.ll b/test/CodeGen/ARM/optimize-dmbs-v7.ll
index 64f5e202d36a..34a55aa718a3 100644
--- a/test/CodeGen/ARM/optimize-dmbs-v7.ll
+++ b/test/CodeGen/ARM/optimize-dmbs-v7.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7 -mattr=+db | FileCheck %s
+; RUN: llc -O1 < %s -mtriple=armv7 -mattr=+db | FileCheck %s
@x1 = global i32 0, align 4
@x2 = global i32 0, align 4
diff --git a/test/CodeGen/ARM/optselect-regclass.ll b/test/CodeGen/ARM/optselect-regclass.ll
index 0acb2f2f0ecc..4c5d44c352b2 100644
--- a/test/CodeGen/ARM/optselect-regclass.ll
+++ b/test/CodeGen/ARM/optselect-regclass.ll
@@ -9,7 +9,7 @@
; Function Attrs: nounwind ssp
define void @xfr() {
entry:
- %bf.load4 = load i32* getelementptr inbounds (%union.opcode.0.2.5.8.15.28* @opcode, i32 0, i32 0), align 4
+ %bf.load4 = load i32, i32* getelementptr inbounds (%union.opcode.0.2.5.8.15.28, %union.opcode.0.2.5.8.15.28* @opcode, i32 0, i32 0), align 4
%bf.clear10 = and i32 %bf.load4, 65535
%and11 = and i32 %bf.load4, 32768
%tobool12 = icmp ne i32 %and11, 0
@@ -17,7 +17,7 @@ entry:
%or = or i32 %cond13, %bf.clear10
%shl = shl nuw i32 %or, 2
%add = add i32 0, %shl
- tail call void (i8*, i32, i32, i8*, ...)* @__sprintf_chk(i8* getelementptr inbounds ([50 x i8]* @operands, i32 0, i32 0), i32 0, i32 50, i8* getelementptr inbounds ([13 x i8]* @.str86, i32 0, i32 0), i32 undef, i32 undef, i32 %add)
+ tail call void (i8*, i32, i32, i8*, ...) @__sprintf_chk(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @operands, i32 0, i32 0), i32 0, i32 50, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str86, i32 0, i32 0), i32 undef, i32 undef, i32 %add)
ret void
}
diff --git a/test/CodeGen/ARM/phi.ll b/test/CodeGen/ARM/phi.ll
index 5a8f623e6f50..ff85052175c8 100644
--- a/test/CodeGen/ARM/phi.ll
+++ b/test/CodeGen/ARM/phi.ll
@@ -9,17 +9,17 @@ entry:
br i1 %a, label %lblock, label %rblock
lblock:
- %lbranch = getelementptr i32* %b, i32 1
+ %lbranch = getelementptr i32, i32* %b, i32 1
br label %end
rblock:
- %rbranch = getelementptr i32* %b, i32 1
+ %rbranch = getelementptr i32, i32* %b, i32 1
br label %end
end:
; CHECK: ldr r0, [r1, #4]
%gep = phi i32* [%lbranch, %lblock], [%rbranch, %rblock]
- %r = load i32* %gep
+ %r = load i32, i32* %gep
; CHECK-NEXT: bx lr
ret i32 %r
}
diff --git a/test/CodeGen/ARM/popcnt.ll b/test/CodeGen/ARM/popcnt.ll
index 7ace640c6b61..74f90640ca6e 100644
--- a/test/CodeGen/ARM/popcnt.ll
+++ b/test/CodeGen/ARM/popcnt.ll
@@ -4,7 +4,7 @@
define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vcnt8:
;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1)
ret <8 x i8> %tmp2
}
@@ -12,7 +12,7 @@ define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vcntQ8:
;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1)
ret <16 x i8> %tmp2
}
@@ -24,7 +24,7 @@ define <4 x i16> @vcnt16(<4 x i16>* %A) nounwind {
; CHECK: vadd.i8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
; CHECK: vuzp.8 {{d[0-9]+}}, {{d[0-9]+}}
; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %tmp1)
ret <4 x i16> %tmp2
}
@@ -36,7 +36,7 @@ define <8 x i16> @vcntQ16(<8 x i16>* %A) nounwind {
; CHECK: vadd.i8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
; CHECK: vuzp.8 {{q[0-9]+}}, {{q[0-9]+}}
; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %tmp1)
ret <8 x i16> %tmp2
}
@@ -51,7 +51,7 @@ define <2 x i32> @vcnt32(<2 x i32>* %A) nounwind {
; CHECK: vrev32.16 {{d[0-9]+}}, {{d[0-9]+}}
; CHECK: vuzp.16 {{d[0-9]+}}, {{d[0-9]+}}
; CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %tmp1)
ret <2 x i32> %tmp2
}
@@ -66,7 +66,7 @@ define <4 x i32> @vcntQ32(<4 x i32>* %A) nounwind {
; CHECK: vrev32.16 {{q[0-9]+}}, {{q[0-9]+}}
; CHECK: vuzp.16 {{q[0-9]+}}, {{q[0-9]+}}
; CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %tmp1)
ret <4 x i32> %tmp2
}
@@ -81,7 +81,7 @@ declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vclz8:
;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}}
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0)
ret <8 x i8> %tmp2
}
@@ -89,7 +89,7 @@ define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vclz16:
;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}}
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0)
ret <4 x i16> %tmp2
}
@@ -97,7 +97,7 @@ define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vclz32:
;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}}
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0)
ret <2 x i32> %tmp2
}
@@ -105,7 +105,7 @@ define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vclzQ8:
;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}}
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0)
ret <16 x i8> %tmp2
}
@@ -113,7 +113,7 @@ define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vclzQ16:
;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}}
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0)
ret <8 x i16> %tmp2
}
@@ -121,7 +121,7 @@ define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vclzQ32:
;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}}
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0)
ret <4 x i32> %tmp2
}
@@ -137,7 +137,7 @@ declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vclss8:
;CHECK: vcls.s8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
ret <8 x i8> %tmp2
}
@@ -145,7 +145,7 @@ define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vclss16:
;CHECK: vcls.s16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
ret <4 x i16> %tmp2
}
@@ -153,7 +153,7 @@ define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vclss32:
;CHECK: vcls.s32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
ret <2 x i32> %tmp2
}
@@ -161,7 +161,7 @@ define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vclsQs8:
;CHECK: vcls.s8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
ret <16 x i8> %tmp2
}
@@ -169,7 +169,7 @@ define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vclsQs16:
;CHECK: vcls.s16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
ret <8 x i16> %tmp2
}
@@ -177,7 +177,7 @@ define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vclsQs32:
;CHECK: vcls.s32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
ret <4 x i32> %tmp2
}
diff --git a/test/CodeGen/ARM/pr13249.ll b/test/CodeGen/ARM/pr13249.ll
index 4bc881035497..cede6007ba16 100644
--- a/test/CodeGen/ARM/pr13249.ll
+++ b/test/CodeGen/ARM/pr13249.ll
@@ -6,19 +6,19 @@ bb:
bb3: ; preds = %bb3, %bb
%tmp = phi i8* [ %tmp5, %bb3 ], [ %arg, %bb ]
- %tmp4 = load i8* %tmp, align 1
- %tmp5 = getelementptr inbounds i8* %tmp, i32 1
+ %tmp4 = load i8, i8* %tmp, align 1
+ %tmp5 = getelementptr inbounds i8, i8* %tmp, i32 1
br i1 undef, label %bb3, label %bb7
bb7: ; preds = %bb13, %bb3
%tmp8 = phi i8 [ %tmp14, %bb13 ], [ %tmp4, %bb3 ]
%tmp9 = phi i8* [ %tmp12, %bb13 ], [ %tmp, %bb3 ]
%tmp10 = icmp ne i8 %tmp8, %arg1
- %tmp12 = getelementptr inbounds i8* %tmp9, i32 1
+ %tmp12 = getelementptr inbounds i8, i8* %tmp9, i32 1
br i1 %tmp10, label %bb13, label %bb15
bb13: ; preds = %bb7
- %tmp14 = load i8* %tmp12, align 1
+ %tmp14 = load i8, i8* %tmp12, align 1
br label %bb7
bb15: ; preds = %bb7
diff --git a/test/CodeGen/ARM/pr18364-movw.ll b/test/CodeGen/ARM/pr18364-movw.ll
index fdcf15485f1c..b783522c42b8 100644
--- a/test/CodeGen/ARM/pr18364-movw.ll
+++ b/test/CodeGen/ARM/pr18364-movw.ll
@@ -14,8 +14,8 @@ entry:
%z = alloca i64, align 8
store i64 1, i64* %y, align 8
store i64 11579764786944, i64* %z, align 8
- %0 = load i64* %y, align 8
- %1 = load i64* %z, align 8
+ %0 = load i64, i64* %y, align 8
+ %1 = load i64, i64* %z, align 8
%sub = sub i64 %0, %1
ret i64 %sub
}
diff --git a/test/CodeGen/ARM/pr3502.ll b/test/CodeGen/ARM/pr3502.ll
index 606d9698b977..4ec982ebea2b 100644
--- a/test/CodeGen/ARM/pr3502.ll
+++ b/test/CodeGen/ARM/pr3502.ll
@@ -11,7 +11,7 @@ define void @SomeCall(i32 %num) nounwind {
entry:
tail call void asm sideeffect "mcr p15, 0, $0, c7, c10, 4 \0A\09", "r,~{memory}"(i32 0) nounwind
tail call void asm sideeffect "mcr p15,0,$0,c7,c14,0", "r,~{memory}"(i32 0) nounwind
- %0 = load %struct.SHARED_AREA** null, align 4 ; <%struct.SHARED_AREA*> [#uses=1]
+ %0 = load %struct.SHARED_AREA*, %struct.SHARED_AREA** null, align 4 ; <%struct.SHARED_AREA*> [#uses=1]
%1 = ptrtoint %struct.SHARED_AREA* %0 to i32 ; <i32> [#uses=1]
%2 = lshr i32 %1, 20 ; <i32> [#uses=1]
%3 = tail call i32 @SetCurrEntry(i32 %2, i32 0) nounwind ; <i32> [#uses=0]
diff --git a/test/CodeGen/ARM/prefetch.ll b/test/CodeGen/ARM/prefetch.ll
index 7fdc5b65c70e..447411d510e9 100644
--- a/test/CodeGen/ARM/prefetch.ll
+++ b/test/CodeGen/ARM/prefetch.ll
@@ -31,7 +31,7 @@ entry:
; THUMB2-LABEL: t2:
; THUMB2: pld [r0, #1023]
- %tmp = getelementptr i8* %ptr, i32 1023
+ %tmp = getelementptr i8, i8* %ptr, i32 1023
tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3, i32 1 )
ret void
}
@@ -89,8 +89,8 @@ entry:
;THUMB2: pld [sp, #50]
%red = alloca [100 x i8], align 1
-%0 = getelementptr inbounds [100 x i8]* %red, i32 0, i32 0
-%1 = getelementptr inbounds [100 x i8]* %red, i32 0, i32 50
+%0 = getelementptr inbounds [100 x i8], [100 x i8]* %red, i32 0, i32 0
+%1 = getelementptr inbounds [100 x i8], [100 x i8]* %red, i32 0, i32 50
call void @llvm.prefetch(i8* %0, i32 0, i32 3, i32 1)
call void @llvm.prefetch(i8* %1, i32 0, i32 3, i32 1)
ret void
diff --git a/test/CodeGen/ARM/print-memb-operand.ll b/test/CodeGen/ARM/print-memb-operand.ll
new file mode 100644
index 000000000000..7748efb3a08a
--- /dev/null
+++ b/test/CodeGen/ARM/print-memb-operand.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=armv7 %s -o - | FileCheck %s
+
+; CHECK: dmb ld
+
+define void @test2() #0 {
+ call void @llvm.arm.dmb(i32 13)
+ ret void
+}
+
+declare void @llvm.arm.dmb(i32)
+
+attributes #0 = { "target-cpu"="cyclone" }
diff --git a/test/CodeGen/ARM/private.ll b/test/CodeGen/ARM/private.ll
index e48c292db466..fab8f37f989a 100644
--- a/test/CodeGen/ARM/private.ll
+++ b/test/CodeGen/ARM/private.ll
@@ -15,7 +15,7 @@ define private void @foo() {
define i32 @bar() {
call void @foo()
- %1 = load i32* @baz, align 4
+ %1 = load i32, i32* @baz, align 4
ret i32 %1
}
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index feed5ad2830a..507ee48cafc2 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -19,10 +19,10 @@ entry:
; CHECK: vshrn.i32
; CHECK-NOT: vmov d
; CHECK-NEXT: vst1.16
- %0 = getelementptr inbounds %struct.int32x4_t* %vT0ptr, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
- %1 = load <4 x i32>* %0, align 16 ; <<4 x i32>> [#uses=1]
- %2 = getelementptr inbounds %struct.int32x4_t* %vT1ptr, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
- %3 = load <4 x i32>* %2, align 16 ; <<4 x i32>> [#uses=1]
+ %0 = getelementptr inbounds %struct.int32x4_t, %struct.int32x4_t* %vT0ptr, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
+ %1 = load <4 x i32>, <4 x i32>* %0, align 16 ; <<4 x i32>> [#uses=1]
+ %2 = getelementptr inbounds %struct.int32x4_t, %struct.int32x4_t* %vT1ptr, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
+ %3 = load <4 x i32>, <4 x i32>* %2, align 16 ; <<4 x i32>> [#uses=1]
%4 = bitcast i16* %i_ptr to i8* ; <i8*> [#uses=1]
%5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1]
%6 = bitcast <8 x i16> %5 to <2 x double> ; <<2 x double>> [#uses=2]
@@ -55,20 +55,20 @@ entry:
; CHECK-NOT: vmov
; CHECK: vst1.16
; CHECK: vst1.16
- %0 = getelementptr inbounds %struct.int16x8_t* %vT0ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
- %1 = load <8 x i16>* %0, align 16 ; <<8 x i16>> [#uses=1]
- %2 = getelementptr inbounds %struct.int16x8_t* %vT1ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
- %3 = load <8 x i16>* %2, align 16 ; <<8 x i16>> [#uses=1]
+ %0 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %vT0ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+ %1 = load <8 x i16>, <8 x i16>* %0, align 16 ; <<8 x i16>> [#uses=1]
+ %2 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %vT1ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+ %3 = load <8 x i16>, <8 x i16>* %2, align 16 ; <<8 x i16>> [#uses=1]
%4 = bitcast i16* %i_ptr to i8* ; <i8*> [#uses=1]
%5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1]
- %6 = getelementptr inbounds i16* %i_ptr, i32 8 ; <i16*> [#uses=1]
+ %6 = getelementptr inbounds i16, i16* %i_ptr, i32 8 ; <i16*> [#uses=1]
%7 = bitcast i16* %6 to i8* ; <i8*> [#uses=1]
%8 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %7, i32 1) ; <<8 x i16>> [#uses=1]
%9 = mul <8 x i16> %1, %5 ; <<8 x i16>> [#uses=1]
%10 = mul <8 x i16> %3, %8 ; <<8 x i16>> [#uses=1]
%11 = bitcast i16* %o_ptr to i8* ; <i8*> [#uses=1]
tail call void @llvm.arm.neon.vst1.v8i16(i8* %11, <8 x i16> %9, i32 1)
- %12 = getelementptr inbounds i16* %o_ptr, i32 8 ; <i16*> [#uses=1]
+ %12 = getelementptr inbounds i16, i16* %o_ptr, i32 8 ; <i16*> [#uses=1]
%13 = bitcast i16* %12 to i8* ; <i8*> [#uses=1]
tail call void @llvm.arm.neon.vst1.v8i16(i8* %13, <8 x i16> %10, i32 1)
ret void
@@ -102,7 +102,7 @@ entry:
; CHECK: bne
%tmp1 = bitcast i32* %in to i8* ; <i8*> [#uses=1]
%tmp2 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp1, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
- %tmp3 = getelementptr inbounds i32* %in, i32 8 ; <i32*> [#uses=1]
+ %tmp3 = getelementptr inbounds i32, i32* %in, i32 8 ; <i32*> [#uses=1]
%tmp4 = bitcast i32* %tmp3 to i8* ; <i8*> [#uses=1]
%tmp5 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp4, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
%tmp8 = bitcast i32* %out to i8* ; <i8*> [#uses=1]
@@ -146,7 +146,7 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
; CHECK-NOT: vmov
; CHECK: vadd.i16
%tmp0 = bitcast i16* %A to i8* ; <i8*> [#uses=1]
- %tmp1 = load <8 x i16>* %B ; <<8 x i16>> [#uses=2]
+ %tmp1 = load <8 x i16>, <8 x i16>* %B ; <<8 x i16>> [#uses=2]
%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) ; <%struct.__neon_int16x8x2_t> [#uses=2]
%tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 ; <<8 x i16>> [#uses=1]
%tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 ; <<8 x i16>> [#uses=1]
@@ -159,7 +159,7 @@ define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
; CHECK: vldr
; CHECK: vorr d[[D0:[0-9]+]], d[[D1:[0-9]+]]
; CHECK-NEXT: vld2.8 {d[[D1]][1], d[[D0]][1]}
- %tmp1 = load <8 x i8>* %B ; <<8 x i8>> [#uses=2]
+ %tmp1 = load <8 x i8>, <8 x i8>* %B ; <<8 x i8>> [#uses=2]
%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2]
%tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1]
%tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 ; <<8 x i8>> [#uses=1]
diff --git a/test/CodeGen/ARM/regpair_hint_phys.ll b/test/CodeGen/ARM/regpair_hint_phys.ll
new file mode 100644
index 000000000000..8585a4c207c1
--- /dev/null
+++ b/test/CodeGen/ARM/regpair_hint_phys.ll
@@ -0,0 +1,22 @@
+; RUN: llc -o - %s
+; ARM target used to fail an assertion if RegPair{Odd|Even} hint pointed to a
+; physreg.
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7-apple-tvos8.3.0"
+
+declare i8* @llvm.frameaddress(i32) #1
+declare i8* @llvm.returnaddress(i32) #1
+
+@somevar = global [2 x i32] [i32 0, i32 0]
+
+define void @__ubsan_handle_shift_out_of_bounds() #0 {
+entry:
+ %0 = tail call i8* @llvm.frameaddress(i32 0)
+ %1 = ptrtoint i8* %0 to i32
+ %2 = tail call i8* @llvm.returnaddress(i32 0)
+ %3 = ptrtoint i8* %2 to i32
+ %val0 = insertvalue [2 x i32] [i32 undef, i32 undef], i32 %3, 0
+ %val1 = insertvalue [2 x i32] %val0, i32 %1, 1
+ store [2 x i32] %val1, [2 x i32]* @somevar, align 8
+ ret void
+}
diff --git a/test/CodeGen/ARM/saxpy10-a9.ll b/test/CodeGen/ARM/saxpy10-a9.ll
index f8f5e18fcf5e..91610f1a232d 100644
--- a/test/CodeGen/ARM/saxpy10-a9.ll
+++ b/test/CodeGen/ARM/saxpy10-a9.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mtriple=thumbv7-apple-ios7.0.0 -float-abi=hard -mcpu=cortex-a9 -misched-postra -misched-bench -scheditins=false | FileCheck %s
+; RUN: llc < %s -march=arm -mtriple=thumbv7-apple-ios7.0.0 -float-abi=hard -mcpu=cortex-a9 -misched-postra -enable-misched -pre-RA-sched=source -scheditins=false | FileCheck %s
;
; Test MI-Sched suppory latency based stalls on in in-order pipeline
; using the new machine model.
@@ -14,15 +14,12 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
; CHECK: vldr
; CHECK: vldr
; CHECK: vldr
-; CHECK: vldr
+; CHECK-NEXT: vldr
+; CHECK-NEXT: vmul
; CHECK-NEXT: vadd
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vldr
-; CHECK-NEXT: vldr
-; CHECK-NEXT: vadd
-; CHECK-NEXT: vmul
-; CHECK-NEXT: vldr
; CHECK-NEXT: vadd
; CHECK-NEXT: vadd
; CHECK-NEXT: vmul
@@ -31,6 +28,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vmul
+; CHECK-NEXT: vldr
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
; CHECK-NEXT: vadd
@@ -48,6 +46,8 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
; CHECK-NEXT: vmul
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
+; CHECK-NEXT: vadd
+; CHECK-NEXT: vldr
; CHECK-NEXT: vmul
; CHECK-NEXT: vadd
; CHECK-NEXT: vldr
@@ -63,72 +63,72 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
; This accumulates a sum rather than storing each result.
define float @saxpy10(float* nocapture readonly %data1, float* nocapture readonly %data2, float %a) {
entry:
- %0 = load float* %data1, align 4
+ %0 = load float, float* %data1, align 4
%mul = fmul float %0, %a
- %1 = load float* %data2, align 4
+ %1 = load float, float* %data2, align 4
%add = fadd float %mul, %1
%add2 = fadd float %add, 0.000000e+00
- %arrayidx.1 = getelementptr inbounds float* %data1, i32 1
- %2 = load float* %arrayidx.1, align 4
+ %arrayidx.1 = getelementptr inbounds float, float* %data1, i32 1
+ %2 = load float, float* %arrayidx.1, align 4
%mul.1 = fmul float %2, %a
- %arrayidx1.1 = getelementptr inbounds float* %data2, i32 1
- %3 = load float* %arrayidx1.1, align 4
+ %arrayidx1.1 = getelementptr inbounds float, float* %data2, i32 1
+ %3 = load float, float* %arrayidx1.1, align 4
%add.1 = fadd float %mul.1, %3
%add2.1 = fadd float %add2, %add.1
- %arrayidx.2 = getelementptr inbounds float* %data1, i32 2
- %4 = load float* %arrayidx.2, align 4
+ %arrayidx.2 = getelementptr inbounds float, float* %data1, i32 2
+ %4 = load float, float* %arrayidx.2, align 4
%mul.2 = fmul float %4, %a
- %arrayidx1.2 = getelementptr inbounds float* %data2, i32 2
- %5 = load float* %arrayidx1.2, align 4
+ %arrayidx1.2 = getelementptr inbounds float, float* %data2, i32 2
+ %5 = load float, float* %arrayidx1.2, align 4
%add.2 = fadd float %mul.2, %5
%add2.2 = fadd float %add2.1, %add.2
- %arrayidx.3 = getelementptr inbounds float* %data1, i32 3
- %6 = load float* %arrayidx.3, align 4
+ %arrayidx.3 = getelementptr inbounds float, float* %data1, i32 3
+ %6 = load float, float* %arrayidx.3, align 4
%mul.3 = fmul float %6, %a
- %arrayidx1.3 = getelementptr inbounds float* %data2, i32 3
- %7 = load float* %arrayidx1.3, align 4
+ %arrayidx1.3 = getelementptr inbounds float, float* %data2, i32 3
+ %7 = load float, float* %arrayidx1.3, align 4
%add.3 = fadd float %mul.3, %7
%add2.3 = fadd float %add2.2, %add.3
- %arrayidx.4 = getelementptr inbounds float* %data1, i32 4
- %8 = load float* %arrayidx.4, align 4
+ %arrayidx.4 = getelementptr inbounds float, float* %data1, i32 4
+ %8 = load float, float* %arrayidx.4, align 4
%mul.4 = fmul float %8, %a
- %arrayidx1.4 = getelementptr inbounds float* %data2, i32 4
- %9 = load float* %arrayidx1.4, align 4
+ %arrayidx1.4 = getelementptr inbounds float, float* %data2, i32 4
+ %9 = load float, float* %arrayidx1.4, align 4
%add.4 = fadd float %mul.4, %9
%add2.4 = fadd float %add2.3, %add.4
- %arrayidx.5 = getelementptr inbounds float* %data1, i32 5
- %10 = load float* %arrayidx.5, align 4
+ %arrayidx.5 = getelementptr inbounds float, float* %data1, i32 5
+ %10 = load float, float* %arrayidx.5, align 4
%mul.5 = fmul float %10, %a
- %arrayidx1.5 = getelementptr inbounds float* %data2, i32 5
- %11 = load float* %arrayidx1.5, align 4
+ %arrayidx1.5 = getelementptr inbounds float, float* %data2, i32 5
+ %11 = load float, float* %arrayidx1.5, align 4
%add.5 = fadd float %mul.5, %11
%add2.5 = fadd float %add2.4, %add.5
- %arrayidx.6 = getelementptr inbounds float* %data1, i32 6
- %12 = load float* %arrayidx.6, align 4
+ %arrayidx.6 = getelementptr inbounds float, float* %data1, i32 6
+ %12 = load float, float* %arrayidx.6, align 4
%mul.6 = fmul float %12, %a
- %arrayidx1.6 = getelementptr inbounds float* %data2, i32 6
- %13 = load float* %arrayidx1.6, align 4
+ %arrayidx1.6 = getelementptr inbounds float, float* %data2, i32 6
+ %13 = load float, float* %arrayidx1.6, align 4
%add.6 = fadd float %mul.6, %13
%add2.6 = fadd float %add2.5, %add.6
- %arrayidx.7 = getelementptr inbounds float* %data1, i32 7
- %14 = load float* %arrayidx.7, align 4
+ %arrayidx.7 = getelementptr inbounds float, float* %data1, i32 7
+ %14 = load float, float* %arrayidx.7, align 4
%mul.7 = fmul float %14, %a
- %arrayidx1.7 = getelementptr inbounds float* %data2, i32 7
- %15 = load float* %arrayidx1.7, align 4
+ %arrayidx1.7 = getelementptr inbounds float, float* %data2, i32 7
+ %15 = load float, float* %arrayidx1.7, align 4
%add.7 = fadd float %mul.7, %15
%add2.7 = fadd float %add2.6, %add.7
- %arrayidx.8 = getelementptr inbounds float* %data1, i32 8
- %16 = load float* %arrayidx.8, align 4
+ %arrayidx.8 = getelementptr inbounds float, float* %data1, i32 8
+ %16 = load float, float* %arrayidx.8, align 4
%mul.8 = fmul float %16, %a
- %arrayidx1.8 = getelementptr inbounds float* %data2, i32 8
- %17 = load float* %arrayidx1.8, align 4
+ %arrayidx1.8 = getelementptr inbounds float, float* %data2, i32 8
+ %17 = load float, float* %arrayidx1.8, align 4
%add.8 = fadd float %mul.8, %17
%add2.8 = fadd float %add2.7, %add.8
- %arrayidx.9 = getelementptr inbounds float* %data1, i32 9
- %18 = load float* %arrayidx.9, align 4
+ %arrayidx.9 = getelementptr inbounds float, float* %data1, i32 9
+ %18 = load float, float* %arrayidx.9, align 4
%mul.9 = fmul float %18, %a
- %arrayidx1.9 = getelementptr inbounds float* %data2, i32 9
- %19 = load float* %arrayidx1.9, align 4
+ %arrayidx1.9 = getelementptr inbounds float, float* %data2, i32 9
+ %19 = load float, float* %arrayidx1.9, align 4
%add.9 = fadd float %mul.9, %19
%add2.9 = fadd float %add2.8, %add.9
ret float %add2.9
diff --git a/test/CodeGen/ARM/sched-it-debug-nodes.ll b/test/CodeGen/ARM/sched-it-debug-nodes.ll
new file mode 100644
index 000000000000..7906e7c7581e
--- /dev/null
+++ b/test/CodeGen/ARM/sched-it-debug-nodes.ll
@@ -0,0 +1,88 @@
+; RUN: llc -mtriple thumbv7 -print-before=post-RA-sched -print-after=post-RA-sched %s -o /dev/null 2>&1 | FileCheck %s
+
+; ModuleID = '<stdin>'
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv7"
+
+%struct.s = type opaque
+
+; Function Attrs: nounwind
+define arm_aapcscc i32 @f(%struct.s* %s, i32 %u, i8* %b, i32 %n) #0 {
+entry:
+ tail call void @llvm.dbg.value(metadata %struct.s* %s, i64 0, metadata !19, metadata !28), !dbg !29
+ tail call void @llvm.dbg.value(metadata i32 %u, i64 0, metadata !20, metadata !28), !dbg !29
+ tail call void @llvm.dbg.value(metadata i8* %b, i64 0, metadata !21, metadata !28), !dbg !29
+ tail call void @llvm.dbg.value(metadata i32 %n, i64 0, metadata !22, metadata !28), !dbg !29
+ %cmp = icmp ult i32 %n, 4, !dbg !30
+ br i1 %cmp, label %return, label %if.end, !dbg !32
+
+if.end: ; preds = %entry
+ tail call arm_aapcscc void @g(%struct.s* %s, i8* %b, i32 %n) #3, !dbg !33
+ br label %return, !dbg !34
+
+return: ; preds = %entry, %if.end
+ %retval.0 = phi i32 [ 0, %if.end ], [ -1, %entry ]
+ ret i32 %retval.0, !dbg !35
+}
+
+; NOTE: This is checking that the register in the DEBUG_VALUE node is not
+; accidentally being marked as KILL. The DBG_VALUE node gets introduced in
+; If-Conversion, and gets bundled into the IT block. The Post RA Scheduler
+; attempts to schedule the Machine Instr, and tries to tag the register in the
+; debug value as KILL'ed, resulting in a DEBUG_VALUE node changing codegen! (or
+; hopefully, triggering an assert).
+
+; CHECK: BUNDLE %ITSTATE<imp-def,dead>
+; CHECK: * DBG_VALUE %R1, %noreg, !"u"
+; CHECK-NOT: * DBG_VALUE %R1<kill>, %noreg, !"u"
+
+declare arm_aapcscc void @g(%struct.s*, i8*, i32) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!23, !24, !25, !26}
+!llvm.ident = !{!27}
+
+!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (llvm/trunk 237059)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "<stdin>", directory: "/Users/compnerd/Source/llvm")
+!2 = !{}
+!3 = !{!4}
+!4 = !DISubprogram(name: "f", scope: !5, file: !5, line: 9, type: !6, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: true, function: i32 (%struct.s*, i32, i8*, i32)* @f, variables: !18)
+!5 = !DIFile(filename: "<stdin>", directory: "/Users/compnerd/Source/llvm")
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8, !9, !12, !13, !17}
+!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 32, align: 32)
+!10 = !DIDerivedType(tag: DW_TAG_typedef, name: "s", file: !5, line: 5, baseType: !11)
+!11 = !DICompositeType(tag: DW_TAG_structure_type, name: "s", file: !5, line: 5, flags: DIFlagFwdDecl)
+!12 = !DIBasicType(name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
+!13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !14, size: 32, align: 32)
+!14 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !15)
+!15 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint8_t", file: !5, line: 2, baseType: !16)
+!16 = !DIBasicType(name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
+!17 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", file: !5, line: 3, baseType: !12)
+!18 = !{!19, !20, !21, !22}
+!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", arg: 1, scope: !4, file: !5, line: 9, type: !9)
+!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "u", arg: 2, scope: !4, file: !5, line: 9, type: !12)
+!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", arg: 3, scope: !4, file: !5, line: 9, type: !13)
+!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "n", arg: 4, scope: !4, file: !5, line: 9, type: !17)
+!23 = !{i32 2, !"Dwarf Version", i32 4}
+!24 = !{i32 2, !"Debug Info Version", i32 3}
+!25 = !{i32 1, !"wchar_size", i32 4}
+!26 = !{i32 1, !"min_enum_size", i32 4}
+!27 = !{!"clang version 3.7.0 (llvm/trunk 237059)"}
+!28 = !DIExpression()
+!29 = !DILocation(line: 9, scope: !4)
+!30 = !DILocation(line: 10, scope: !31)
+!31 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10)
+!32 = !DILocation(line: 10, scope: !4)
+!33 = !DILocation(line: 13, scope: !4)
+!34 = !DILocation(line: 14, scope: !4)
+!35 = !DILocation(line: 15, scope: !4)
diff --git a/test/CodeGen/ARM/section-name.ll b/test/CodeGen/ARM/section-name.ll
index a0aad4733bc8..a4c6054197f3 100644
--- a/test/CodeGen/ARM/section-name.ll
+++ b/test/CodeGen/ARM/section-name.ll
@@ -16,7 +16,7 @@ entry:
ret void
}
-; CHECK: .section .text.test3,"axG",%progbits,test3,comdat
+; CHECK: .text
; CHECK: .weak test3
; CHECK: .type test3,%function
define linkonce_odr void @test3() {
diff --git a/test/CodeGen/ARM/segmented-stacks.ll b/test/CodeGen/ARM/segmented-stacks.ll
index 9873bf332948..cbb124de11c3 100644
--- a/test/CodeGen/ARM/segmented-stacks.ll
+++ b/test/CodeGen/ARM/segmented-stacks.ll
@@ -55,7 +55,7 @@ define void @test_basic() #0 {
}
define i32 @test_nested(i32 * nest %closure, i32 %other) #0 {
- %addend = load i32 * %closure
+ %addend = load i32 , i32 * %closure
%result = add i32 %other, %addend
%mem = alloca i32, i32 10
call void @dummy_use (i32* %mem, i32 10)
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index 326eb512d858..460ca8f18405 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -183,7 +183,7 @@ define i32 @t12(i32 %a, i32 %b) nounwind {
; Handle frame index operands.
define void @pr13628() nounwind uwtable align 2 {
%x3 = alloca i8, i32 256, align 8
- %x4 = load i8* undef, align 1
+ %x4 = load i8, i8* undef, align 1
%x5 = icmp ne i8 %x4, 0
%x6 = select i1 %x5, i8* %x3, i8* null
call void @bar(i8* %x6) nounwind
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index f14adcae663c..6f5c0e8279a9 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -43,8 +43,8 @@ entry:
%tmp3 = inttoptr i32 %tmp2 to i32*
%tmp4 = add i32 %base2, %tmp1
%tmp5 = inttoptr i32 %tmp4 to i32*
- %tmp6 = load i32* %tmp3
- %tmp7 = load i32* %tmp5
+ %tmp6 = load i32, i32* %tmp3
+ %tmp7 = load i32, i32* %tmp5
%tmp8 = add i32 %tmp7, %tmp6
ret i32 %tmp8
}
@@ -64,11 +64,11 @@ entry:
; A9-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
; A9: str [[REG]], [r0, r1, lsl #2]
; A9-NOT: str [[REG]], [r0]
- %0 = tail call i8* (...)* @malloc(i32 undef) nounwind
+ %0 = tail call i8* (...) @malloc(i32 undef) nounwind
%1 = bitcast i8* %0 to i32*
%2 = sext i16 %addr to i32
- %3 = getelementptr inbounds i32* %1, i32 %2
- %4 = load i32* %3, align 4
+ %3 = getelementptr inbounds i32, i32* %1, i32 %2
+ %4 = load i32, i32* %3, align 4
%5 = add nsw i32 %4, 1
store i32 %5, i32* %3, align 4
ret void
diff --git a/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll b/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
index d8241d0dc380..f85203e381b9 100644
--- a/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
+++ b/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O1 -mtriple thumbv7-apple-ios6
+; RUN: llc < %s -O1 -mtriple thumbv7-apple-ios6 | FileCheck %s
; Just make sure no one tries to make the assumption that the normal edge of an
; invoke is never a critical edge. Previously, this code would assert.
@@ -65,3 +65,129 @@ declare i32 @__gxx_personality_sj0(...)
declare void @release(i8*)
declare void @terminatev()
+
+; Make sure that the instruction DemoteRegToStack inserts to reload
+; %call.i.i.i14.i.i follows the instruction that saves the value to the stack in
+; basic block %entry.do.body.i.i.i_crit_edge.
+; Previously, DemoteRegToStack would insert a load instruction into the entry
+; block to reload %call.i.i.i14.i.i before the phi instruction (%0) in block
+; %do.body.i.i.i.
+
+; CHECK-LABEL: __Z4foo1c:
+; CHECK: blx __Znwm
+; CHECK: {{.*}}@ %entry.do.body.i.i.i_crit_edge
+; CHECK: str r0, [sp, [[OFFSET:#[0-9]+]]]
+; CHECK: ldr [[R0:r[0-9]+]], [sp, [[OFFSET]]]
+; CHECK: {{.*}}@ %do.body.i.i.i
+; CHECK: cbz [[R0]]
+
+%"class.std::__1::basic_string" = type { %"class.std::__1::__compressed_pair" }
+%"class.std::__1::__compressed_pair" = type { %"class.std::__1::__libcpp_compressed_pair_imp" }
+%"class.std::__1::__libcpp_compressed_pair_imp" = type { %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__rep" }
+%"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__rep" = type { %union.anon }
+%union.anon = type { %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__long" }
+%"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__long" = type { i32, i32, i8* }
+
+@.str = private unnamed_addr constant [12 x i8] c"some_string\00", align 1
+
+define void @_Z4foo1c(i8 signext %a) {
+entry:
+ %s1 = alloca %"class.std::__1::basic_string", align 4
+ call void @_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE6__initEPKcm(%"class.std::__1::basic_string"* %s1, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 11)
+ %call.i.i.i14.i.i = invoke noalias i8* @_Znwm(i32 1024)
+ to label %do.body.i.i.i unwind label %lpad.body
+
+do.body.i.i.i: ; preds = %entry, %_ZNSt3__116allocator_traitsINS_9allocatorIcEEE9constructIccEEvRS2_PT_RKT0_.exit.i.i.i
+ %lsr.iv = phi i32 [ %lsr.iv.next, %_ZNSt3__116allocator_traitsINS_9allocatorIcEEE9constructIccEEvRS2_PT_RKT0_.exit.i.i.i ], [ -1024, %entry ]
+ %0 = phi i8* [ %incdec.ptr.i.i.i, %_ZNSt3__116allocator_traitsINS_9allocatorIcEEE9constructIccEEvRS2_PT_RKT0_.exit.i.i.i ], [ %call.i.i.i14.i.i, %entry ]
+ %new.isnull.i.i.i.i = icmp eq i8* %0, null
+ br i1 %new.isnull.i.i.i.i, label %_ZNSt3__116allocator_traitsINS_9allocatorIcEEE9constructIccEEvRS2_PT_RKT0_.exit.i.i.i, label %new.notnull.i.i.i.i
+
+new.notnull.i.i.i.i: ; preds = %do.body.i.i.i
+ store i8 %a, i8* %0, align 1
+ br label %_ZNSt3__116allocator_traitsINS_9allocatorIcEEE9constructIccEEvRS2_PT_RKT0_.exit.i.i.i
+
+_ZNSt3__116allocator_traitsINS_9allocatorIcEEE9constructIccEEvRS2_PT_RKT0_.exit.i.i.i: ; preds = %new.notnull.i.i.i.i, %do.body.i.i.i
+ %1 = phi i8* [ null, %do.body.i.i.i ], [ %0, %new.notnull.i.i.i.i ]
+ %incdec.ptr.i.i.i = getelementptr inbounds i8, i8* %1, i32 1
+ %lsr.iv.next = add i32 %lsr.iv, 1
+ %cmp.i16.i.i = icmp eq i32 %lsr.iv.next, 0
+ br i1 %cmp.i16.i.i, label %invoke.cont, label %do.body.i.i.i
+
+invoke.cont: ; preds = %_ZNSt3__116allocator_traitsINS_9allocatorIcEEE9constructIccEEvRS2_PT_RKT0_.exit.i.i.i
+ invoke void @_Z4foo2Pci(i8* %call.i.i.i14.i.i, i32 1024)
+ to label %invoke.cont5 unwind label %lpad2
+
+invoke.cont5: ; preds = %invoke.cont
+ %cmp.i.i.i15 = icmp eq i8* %call.i.i.i14.i.i, null
+ br i1 %cmp.i.i.i15, label %invoke.cont6, label %_ZNSt3__113__vector_baseIcNS_9allocatorIcEEE5clearEv.exit.i.i.i19
+
+_ZNSt3__113__vector_baseIcNS_9allocatorIcEEE5clearEv.exit.i.i.i19: ; preds = %invoke.cont5
+ call void @_ZdlPv(i8* %call.i.i.i14.i.i)
+ br label %invoke.cont6
+
+invoke.cont6: ; preds = %_ZNSt3__113__vector_baseIcNS_9allocatorIcEEE5clearEv.exit.i.i.i19, %invoke.cont5
+ %call10 = call %"class.std::__1::basic_string"* @_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEED1Ev(%"class.std::__1::basic_string"* %s1)
+ ret void
+
+lpad.body: ; preds = %entry
+ %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+ cleanup
+ %3 = extractvalue { i8*, i32 } %2, 0
+ %4 = extractvalue { i8*, i32 } %2, 1
+ br label %ehcleanup
+
+lpad2: ; preds = %invoke.cont
+ %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+ cleanup
+ %6 = extractvalue { i8*, i32 } %5, 0
+ %7 = extractvalue { i8*, i32 } %5, 1
+ %cmp.i.i.i21 = icmp eq i8* %call.i.i.i14.i.i, null
+ br i1 %cmp.i.i.i21, label %ehcleanup, label %_ZNSt3__113__vector_baseIcNS_9allocatorIcEEE5clearEv.exit.i.i.i26
+
+_ZNSt3__113__vector_baseIcNS_9allocatorIcEEE5clearEv.exit.i.i.i26: ; preds = %lpad2
+ call void @_ZdlPv(i8* %call.i.i.i14.i.i)
+ br label %ehcleanup
+
+ehcleanup: ; preds = %_ZNSt3__113__vector_baseIcNS_9allocatorIcEEE5clearEv.exit.i.i.i26, %lpad2, %lpad.body
+ %exn.slot.0 = phi i8* [ %3, %lpad.body ], [ %6, %lpad2 ], [ %6, %_ZNSt3__113__vector_baseIcNS_9allocatorIcEEE5clearEv.exit.i.i.i26 ]
+ %ehselector.slot.0 = phi i32 [ %4, %lpad.body ], [ %7, %lpad2 ], [ %7, %_ZNSt3__113__vector_baseIcNS_9allocatorIcEEE5clearEv.exit.i.i.i26 ]
+ %call12 = invoke %"class.std::__1::basic_string"* @_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEED1Ev(%"class.std::__1::basic_string"* %s1)
+ to label %eh.resume unwind label %terminate.lpad
+
+eh.resume: ; preds = %ehcleanup
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0
+ %lpad.val13 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1
+ resume { i8*, i32 } %lpad.val13
+
+terminate.lpad: ; preds = %ehcleanup
+ %8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+ catch i8* null
+ %9 = extractvalue { i8*, i32 } %8, 0
+ call void @__clang_call_terminate(i8* %9)
+ unreachable
+}
+
+declare void @_Z4foo2Pci(i8*, i32)
+
+define linkonce_odr hidden void @__clang_call_terminate(i8*) {
+ %2 = tail call i8* @__cxa_begin_catch(i8* %0)
+ tail call void @_ZSt9terminatev()
+ unreachable
+}
+
+declare i8* @__cxa_begin_catch(i8*)
+declare void @_ZSt9terminatev()
+declare %"class.std::__1::basic_string"* @_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEED1Ev(%"class.std::__1::basic_string"* returned)
+declare void @_ZdlPv(i8*) #3
+declare noalias i8* @_Znwm(i32)
+declare void @_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE6__initEPKcm(%"class.std::__1::basic_string"*, i8*, i32)
+declare void @_Unwind_SjLj_Register({ i8*, i32, [4 x i32], i8*, i8*, [5 x i8*] }*)
+declare void @_Unwind_SjLj_Unregister({ i8*, i32, [4 x i32], i8*, i8*, [5 x i8*] }*)
+declare i8* @llvm.frameaddress(i32)
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+declare i32 @llvm.eh.sjlj.setjmp(i8*)
+declare i8* @llvm.eh.sjlj.lsda()
+declare void @llvm.eh.sjlj.callsite(i32)
+declare void @llvm.eh.sjlj.functioncontext(i8*)
diff --git a/test/CodeGen/ARM/smul.ll b/test/CodeGen/ARM/smul.ll
index b7ddd10a5682..13873f511e1f 100644
--- a/test/CodeGen/ARM/smul.ll
+++ b/test/CodeGen/ARM/smul.ll
@@ -7,7 +7,7 @@
define i32 @f1(i32 %y) {
; CHECK: f1
; CHECK: smulbt
- %tmp = load i16* @x ; <i16> [#uses=1]
+ %tmp = load i16, i16* @x ; <i16> [#uses=1]
%tmp1 = add i16 %tmp, 2 ; <i16> [#uses=1]
%tmp2 = sext i16 %tmp1 to i32 ; <i32> [#uses=1]
%tmp3 = ashr i32 %y, 16 ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/space-directive.ll b/test/CodeGen/ARM/space-directive.ll
index 55be1991fe82..24f0d0aadf60 100644
--- a/test/CodeGen/ARM/space-directive.ll
+++ b/test/CodeGen/ARM/space-directive.ll
@@ -11,7 +11,7 @@ define i32 @test_space() minsize {
; CHECK: [[PAST_CP]]:
; CHECK: .zero 10000
%addr = inttoptr i32 12345678 to i32*
- %val = load i32* %addr
+ %val = load i32, i32* %addr
call i32 @llvm.arm.space(i32 10000, i32 undef)
ret i32 %val
}
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index 425fc12755cd..1a102e3d971f 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -43,7 +43,7 @@ entry:
store float 0.000000e+00, float* undef, align 4
%ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
- %val173 = load <4 x float>* undef ; <<4 x float>> [#uses=1]
+ %val173 = load <4 x float>, <4 x float>* undef ; <<4 x float>> [#uses=1]
br label %bb4
bb4: ; preds = %bb193, %entry
diff --git a/test/CodeGen/ARM/ssp-data-layout.ll b/test/CodeGen/ARM/ssp-data-layout.ll
index e7dafac7020d..92fa0809ed2d 100644
--- a/test/CodeGen/ARM/ssp-data-layout.ll
+++ b/test/CodeGen/ARM/ssp-data-layout.ll
@@ -21,13 +21,13 @@
define void @layout_ssp() ssp {
entry:
; Expected stack layout for ssp is
-; 180 large_char . Group 1, nested arrays, arrays >= ssp-buffer-size
-; 172 struct_large_char .
-; 168 scalar1 | Everything else
-; 164 scalar2
-; 160 scalar3
-; 156 addr-of
-; 152 small_nonchar (84+68)
+; 176 large_char . Group 1, nested arrays, arrays >= ssp-buffer-size
+; 168 struct_large_char .
+; 164 scalar1 | Everything else
+; 160 scalar2
+; 156 scalar3
+; 152 addr-of
+; 148 small_nonchar
; 112 large_nonchar
; 110 small_char
; 108 struct_small_char
@@ -35,27 +35,25 @@ entry:
; 68 struct_small_nonchar
; CHECK: layout_ssp:
-; r[[SP]] is used as an offset into the stack later
-; CHECK: add r[[SP:[0-9]+]], sp, #68
; CHECK: bl get_scalar1
-; CHECK: str r0, [sp, #168]
+; CHECK: str r0, [sp, #164]
; CHECK: bl end_scalar1
; CHECK: bl get_scalar2
-; CHECK: str r0, [sp, #164]
+; CHECK: str r0, [sp, #160]
; CHECK: bl end_scalar2
; CHECK: bl get_scalar3
-; CHECK: str r0, [sp, #160]
+; CHECK: str r0, [sp, #156]
; CHECK: bl end_scalar3
; CHECK: bl get_addrof
-; CHECK: str r0, [sp, #156]
+; CHECK: str r0, [sp, #152]
; CHECK: bl end_addrof
; CHECK: get_small_nonchar
-; CHECK: strh r0, [r[[SP]], #84]
+; CHECK: strh r0, [sp, #148]
; CHECK: bl end_small_nonchar
; CHECK: bl get_large_nonchar
@@ -67,11 +65,11 @@ entry:
; CHECK: bl end_small_char
; CHECK: bl get_large_char
-; CHECK: strb r0, [sp, #180]
+; CHECK: strb r0, [sp, #176]
; CHECK: bl end_large_char
; CHECK: bl get_struct_large_char
-; CHECK: strb r0, [sp, #172]
+; CHECK: strb r0, [sp, #168]
; CHECK: bl end_struct_large_char
; CHECK: bl get_struct_small_char
@@ -83,7 +81,7 @@ entry:
; CHECK: bl end_struct_large_nonchar
; CHECK: bl get_struct_small_nonchar
-; CHECK: strh r0, [r[[SP]]]
+; CHECK: strh r0, [sp, #68]
; CHECK: bl end_struct_small_nonchar
%x = alloca i32, align 4
%y = alloca i32, align 4
@@ -110,58 +108,58 @@ entry:
store i32 %call3, i32* %ptr, align 4
call void @end_addrof()
%call4 = call signext i16 @get_small_nonchar()
- %arrayidx = getelementptr inbounds [2 x i16]* %small2, i32 0, i64 0
+ %arrayidx = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i64 0
store i16 %call4, i16* %arrayidx, align 2
call void @end_small_nonchar()
%call5 = call i32 @get_large_nonchar()
- %arrayidx6 = getelementptr inbounds [8 x i32]* %large2, i32 0, i64 0
+ %arrayidx6 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i64 0
store i32 %call5, i32* %arrayidx6, align 4
call void @end_large_nonchar()
%call7 = call signext i8 @get_small_char()
- %arrayidx8 = getelementptr inbounds [2 x i8]* %small, i32 0, i64 0
+ %arrayidx8 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i64 0
store i8 %call7, i8* %arrayidx8, align 1
call void @end_small_char()
%call9 = call signext i8 @get_large_char()
- %arrayidx10 = getelementptr inbounds [8 x i8]* %large, i32 0, i64 0
+ %arrayidx10 = getelementptr inbounds [8 x i8], [8 x i8]* %large, i32 0, i64 0
store i8 %call9, i8* %arrayidx10, align 1
call void @end_large_char()
%call11 = call signext i8 @get_struct_large_char()
- %foo = getelementptr inbounds %struct.struct_large_char* %a, i32 0, i32 0
- %arrayidx12 = getelementptr inbounds [8 x i8]* %foo, i32 0, i64 0
+ %foo = getelementptr inbounds %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
+ %arrayidx12 = getelementptr inbounds [8 x i8], [8 x i8]* %foo, i32 0, i64 0
store i8 %call11, i8* %arrayidx12, align 1
call void @end_struct_large_char()
%call13 = call signext i8 @get_struct_small_char()
- %foo14 = getelementptr inbounds %struct.struct_small_char* %b, i32 0, i32 0
- %arrayidx15 = getelementptr inbounds [2 x i8]* %foo14, i32 0, i64 0
+ %foo14 = getelementptr inbounds %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
+ %arrayidx15 = getelementptr inbounds [2 x i8], [2 x i8]* %foo14, i32 0, i64 0
store i8 %call13, i8* %arrayidx15, align 1
call void @end_struct_small_char()
%call16 = call i32 @get_struct_large_nonchar()
- %foo17 = getelementptr inbounds %struct.struct_large_nonchar* %c, i32 0, i32 0
- %arrayidx18 = getelementptr inbounds [8 x i32]* %foo17, i32 0, i64 0
+ %foo17 = getelementptr inbounds %struct.struct_large_nonchar, %struct.struct_large_nonchar* %c, i32 0, i32 0
+ %arrayidx18 = getelementptr inbounds [8 x i32], [8 x i32]* %foo17, i32 0, i64 0
store i32 %call16, i32* %arrayidx18, align 4
call void @end_struct_large_nonchar()
%call19 = call signext i16 @get_struct_small_nonchar()
- %foo20 = getelementptr inbounds %struct.struct_small_nonchar* %d, i32 0, i32 0
- %arrayidx21 = getelementptr inbounds [2 x i16]* %foo20, i32 0, i64 0
+ %foo20 = getelementptr inbounds %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
+ %arrayidx21 = getelementptr inbounds [2 x i16], [2 x i16]* %foo20, i32 0, i64 0
store i16 %call19, i16* %arrayidx21, align 2
call void @end_struct_small_nonchar()
- %arraydecay = getelementptr inbounds [8 x i8]* %large, i32 0, i32 0
- %arraydecay22 = getelementptr inbounds [2 x i8]* %small, i32 0, i32 0
- %arraydecay23 = getelementptr inbounds [8 x i32]* %large2, i32 0, i32 0
- %arraydecay24 = getelementptr inbounds [2 x i16]* %small2, i32 0, i32 0
- %0 = load i32* %x, align 4
- %1 = load i32* %y, align 4
- %2 = load i32* %z, align 4
- %coerce.dive = getelementptr %struct.struct_large_char* %a, i32 0, i32 0
+ %arraydecay = getelementptr inbounds [8 x i8], [8 x i8]* %large, i32 0, i32 0
+ %arraydecay22 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i32 0
+ %arraydecay23 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i32 0
+ %arraydecay24 = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i32 0
+ %0 = load i32, i32* %x, align 4
+ %1 = load i32, i32* %y, align 4
+ %2 = load i32, i32* %z, align 4
+ %coerce.dive = getelementptr %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
%3 = bitcast [8 x i8]* %coerce.dive to i64*
- %4 = load i64* %3, align 1
- %coerce.dive25 = getelementptr %struct.struct_small_char* %b, i32 0, i32 0
+ %4 = load i64, i64* %3, align 1
+ %coerce.dive25 = getelementptr %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
%5 = bitcast [2 x i8]* %coerce.dive25 to i16*
- %6 = load i16* %5, align 1
- %coerce.dive26 = getelementptr %struct.struct_small_nonchar* %d, i32 0, i32 0
+ %6 = load i16, i16* %5, align 1
+ %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
%7 = bitcast [2 x i16]* %coerce.dive26 to i32*
- %8 = load i32* %7, align 1
- call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
+ %8 = load i32, i32* %7, align 1
+ call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 4 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
ret void
}
@@ -182,8 +180,6 @@ entry:
; 68 scalar3 +
;
; CHECK: layout_sspstrong:
-; r[[SP]] is used as an offset into the stack later
-; CHECK: add r[[SP:[0-9]+]], sp, #84
; CHECK: bl get_scalar1
; CHECK: str r0, [sp, #76]
@@ -202,7 +198,7 @@ entry:
; CHECK: bl end_addrof
; CHECK: get_small_nonchar
-; CHECK: strh r0, [r[[SP]], #8]
+; CHECK: strh r0, [sp, #92]
; CHECK: bl end_small_nonchar
; CHECK: bl get_large_nonchar
@@ -230,7 +226,7 @@ entry:
; CHECK: bl end_struct_large_nonchar
; CHECK: bl get_struct_small_nonchar
-; CHECK: strh r0, [r[[SP]]]
+; CHECK: strh r0, [sp, #84]
; CHECK: bl end_struct_small_nonchar
%x = alloca i32, align 4
%y = alloca i32, align 4
@@ -257,58 +253,58 @@ entry:
store i32 %call3, i32* %ptr, align 4
call void @end_addrof()
%call4 = call signext i16 @get_small_nonchar()
- %arrayidx = getelementptr inbounds [2 x i16]* %small2, i32 0, i64 0
+ %arrayidx = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i64 0
store i16 %call4, i16* %arrayidx, align 2
call void @end_small_nonchar()
%call5 = call i32 @get_large_nonchar()
- %arrayidx6 = getelementptr inbounds [8 x i32]* %large2, i32 0, i64 0
+ %arrayidx6 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i64 0
store i32 %call5, i32* %arrayidx6, align 4
call void @end_large_nonchar()
%call7 = call signext i8 @get_small_char()
- %arrayidx8 = getelementptr inbounds [2 x i8]* %small, i32 0, i64 0
+ %arrayidx8 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i64 0
store i8 %call7, i8* %arrayidx8, align 1
call void @end_small_char()
%call9 = call signext i8 @get_large_char()
- %arrayidx10 = getelementptr inbounds [8 x i8]* %large, i32 0, i64 0
+ %arrayidx10 = getelementptr inbounds [8 x i8], [8 x i8]* %large, i32 0, i64 0
store i8 %call9, i8* %arrayidx10, align 1
call void @end_large_char()
%call11 = call signext i8 @get_struct_large_char()
- %foo = getelementptr inbounds %struct.struct_large_char* %a, i32 0, i32 0
- %arrayidx12 = getelementptr inbounds [8 x i8]* %foo, i32 0, i64 0
+ %foo = getelementptr inbounds %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
+ %arrayidx12 = getelementptr inbounds [8 x i8], [8 x i8]* %foo, i32 0, i64 0
store i8 %call11, i8* %arrayidx12, align 1
call void @end_struct_large_char()
%call13 = call signext i8 @get_struct_small_char()
- %foo14 = getelementptr inbounds %struct.struct_small_char* %b, i32 0, i32 0
- %arrayidx15 = getelementptr inbounds [2 x i8]* %foo14, i32 0, i64 0
+ %foo14 = getelementptr inbounds %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
+ %arrayidx15 = getelementptr inbounds [2 x i8], [2 x i8]* %foo14, i32 0, i64 0
store i8 %call13, i8* %arrayidx15, align 1
call void @end_struct_small_char()
%call16 = call i32 @get_struct_large_nonchar()
- %foo17 = getelementptr inbounds %struct.struct_large_nonchar* %c, i32 0, i32 0
- %arrayidx18 = getelementptr inbounds [8 x i32]* %foo17, i32 0, i64 0
+ %foo17 = getelementptr inbounds %struct.struct_large_nonchar, %struct.struct_large_nonchar* %c, i32 0, i32 0
+ %arrayidx18 = getelementptr inbounds [8 x i32], [8 x i32]* %foo17, i32 0, i64 0
store i32 %call16, i32* %arrayidx18, align 4
call void @end_struct_large_nonchar()
%call19 = call signext i16 @get_struct_small_nonchar()
- %foo20 = getelementptr inbounds %struct.struct_small_nonchar* %d, i32 0, i32 0
- %arrayidx21 = getelementptr inbounds [2 x i16]* %foo20, i32 0, i64 0
+ %foo20 = getelementptr inbounds %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
+ %arrayidx21 = getelementptr inbounds [2 x i16], [2 x i16]* %foo20, i32 0, i64 0
store i16 %call19, i16* %arrayidx21, align 2
call void @end_struct_small_nonchar()
- %arraydecay = getelementptr inbounds [8 x i8]* %large, i32 0, i32 0
- %arraydecay22 = getelementptr inbounds [2 x i8]* %small, i32 0, i32 0
- %arraydecay23 = getelementptr inbounds [8 x i32]* %large2, i32 0, i32 0
- %arraydecay24 = getelementptr inbounds [2 x i16]* %small2, i32 0, i32 0
- %0 = load i32* %x, align 4
- %1 = load i32* %y, align 4
- %2 = load i32* %z, align 4
- %coerce.dive = getelementptr %struct.struct_large_char* %a, i32 0, i32 0
+ %arraydecay = getelementptr inbounds [8 x i8], [8 x i8]* %large, i32 0, i32 0
+ %arraydecay22 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i32 0
+ %arraydecay23 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i32 0
+ %arraydecay24 = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i32 0
+ %0 = load i32, i32* %x, align 4
+ %1 = load i32, i32* %y, align 4
+ %2 = load i32, i32* %z, align 4
+ %coerce.dive = getelementptr %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
%3 = bitcast [8 x i8]* %coerce.dive to i64*
- %4 = load i64* %3, align 1
- %coerce.dive25 = getelementptr %struct.struct_small_char* %b, i32 0, i32 0
+ %4 = load i64, i64* %3, align 1
+ %coerce.dive25 = getelementptr %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
%5 = bitcast [2 x i8]* %coerce.dive25 to i16*
- %6 = load i16* %5, align 1
- %coerce.dive26 = getelementptr %struct.struct_small_nonchar* %d, i32 0, i32 0
+ %6 = load i16, i16* %5, align 1
+ %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
%7 = bitcast [2 x i16]* %coerce.dive26 to i32*
- %8 = load i32* %7, align 1
- call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
+ %8 = load i32, i32* %7, align 1
+ call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 4 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
ret void
}
@@ -317,8 +313,6 @@ entry:
; Expected stack layout for sspreq is the same as sspstrong
;
; CHECK: layout_sspreq:
-; r[[SP]] is used as an offset into the stack later
-; CHECK: add r[[SP:[0-9]+]], sp, #84
; CHECK: bl get_scalar1
; CHECK: str r0, [sp, #76]
@@ -337,7 +331,7 @@ entry:
; CHECK: bl end_addrof
; CHECK: get_small_nonchar
-; CHECK: strh r0, [r[[SP]], #8]
+; CHECK: strh r0, [sp, #92]
; CHECK: bl end_small_nonchar
; CHECK: bl get_large_nonchar
@@ -365,7 +359,7 @@ entry:
; CHECK: bl end_struct_large_nonchar
; CHECK: bl get_struct_small_nonchar
-; CHECK: strh r0, [r[[SP]]]
+; CHECK: strh r0, [sp, #84]
; CHECK: bl end_struct_small_nonchar
%x = alloca i32, align 4
%y = alloca i32, align 4
@@ -392,58 +386,58 @@ entry:
store i32 %call3, i32* %ptr, align 4
call void @end_addrof()
%call4 = call signext i16 @get_small_nonchar()
- %arrayidx = getelementptr inbounds [2 x i16]* %small2, i32 0, i64 0
+ %arrayidx = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i64 0
store i16 %call4, i16* %arrayidx, align 2
call void @end_small_nonchar()
%call5 = call i32 @get_large_nonchar()
- %arrayidx6 = getelementptr inbounds [8 x i32]* %large2, i32 0, i64 0
+ %arrayidx6 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i64 0
store i32 %call5, i32* %arrayidx6, align 4
call void @end_large_nonchar()
%call7 = call signext i8 @get_small_char()
- %arrayidx8 = getelementptr inbounds [2 x i8]* %small, i32 0, i64 0
+ %arrayidx8 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i64 0
store i8 %call7, i8* %arrayidx8, align 1
call void @end_small_char()
%call9 = call signext i8 @get_large_char()
- %arrayidx10 = getelementptr inbounds [8 x i8]* %large, i32 0, i64 0
+ %arrayidx10 = getelementptr inbounds [8 x i8], [8 x i8]* %large, i32 0, i64 0
store i8 %call9, i8* %arrayidx10, align 1
call void @end_large_char()
%call11 = call signext i8 @get_struct_large_char()
- %foo = getelementptr inbounds %struct.struct_large_char* %a, i32 0, i32 0
- %arrayidx12 = getelementptr inbounds [8 x i8]* %foo, i32 0, i64 0
+ %foo = getelementptr inbounds %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
+ %arrayidx12 = getelementptr inbounds [8 x i8], [8 x i8]* %foo, i32 0, i64 0
store i8 %call11, i8* %arrayidx12, align 1
call void @end_struct_large_char()
%call13 = call signext i8 @get_struct_small_char()
- %foo14 = getelementptr inbounds %struct.struct_small_char* %b, i32 0, i32 0
- %arrayidx15 = getelementptr inbounds [2 x i8]* %foo14, i32 0, i64 0
+ %foo14 = getelementptr inbounds %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
+ %arrayidx15 = getelementptr inbounds [2 x i8], [2 x i8]* %foo14, i32 0, i64 0
store i8 %call13, i8* %arrayidx15, align 1
call void @end_struct_small_char()
%call16 = call i32 @get_struct_large_nonchar()
- %foo17 = getelementptr inbounds %struct.struct_large_nonchar* %c, i32 0, i32 0
- %arrayidx18 = getelementptr inbounds [8 x i32]* %foo17, i32 0, i64 0
+ %foo17 = getelementptr inbounds %struct.struct_large_nonchar, %struct.struct_large_nonchar* %c, i32 0, i32 0
+ %arrayidx18 = getelementptr inbounds [8 x i32], [8 x i32]* %foo17, i32 0, i64 0
store i32 %call16, i32* %arrayidx18, align 4
call void @end_struct_large_nonchar()
%call19 = call signext i16 @get_struct_small_nonchar()
- %foo20 = getelementptr inbounds %struct.struct_small_nonchar* %d, i32 0, i32 0
- %arrayidx21 = getelementptr inbounds [2 x i16]* %foo20, i32 0, i64 0
+ %foo20 = getelementptr inbounds %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
+ %arrayidx21 = getelementptr inbounds [2 x i16], [2 x i16]* %foo20, i32 0, i64 0
store i16 %call19, i16* %arrayidx21, align 2
call void @end_struct_small_nonchar()
- %arraydecay = getelementptr inbounds [8 x i8]* %large, i32 0, i32 0
- %arraydecay22 = getelementptr inbounds [2 x i8]* %small, i32 0, i32 0
- %arraydecay23 = getelementptr inbounds [8 x i32]* %large2, i32 0, i32 0
- %arraydecay24 = getelementptr inbounds [2 x i16]* %small2, i32 0, i32 0
- %0 = load i32* %x, align 4
- %1 = load i32* %y, align 4
- %2 = load i32* %z, align 4
- %coerce.dive = getelementptr %struct.struct_large_char* %a, i32 0, i32 0
+ %arraydecay = getelementptr inbounds [8 x i8], [8 x i8]* %large, i32 0, i32 0
+ %arraydecay22 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i32 0
+ %arraydecay23 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i32 0
+ %arraydecay24 = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i32 0
+ %0 = load i32, i32* %x, align 4
+ %1 = load i32, i32* %y, align 4
+ %2 = load i32, i32* %z, align 4
+ %coerce.dive = getelementptr %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
%3 = bitcast [8 x i8]* %coerce.dive to i64*
- %4 = load i64* %3, align 1
- %coerce.dive25 = getelementptr %struct.struct_small_char* %b, i32 0, i32 0
+ %4 = load i64, i64* %3, align 1
+ %coerce.dive25 = getelementptr %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
%5 = bitcast [2 x i8]* %coerce.dive25 to i16*
- %6 = load i16* %5, align 1
- %coerce.dive26 = getelementptr %struct.struct_small_nonchar* %d, i32 0, i32 0
+ %6 = load i16, i16* %5, align 1
+ %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
%7 = bitcast [2 x i16]* %coerce.dive26 to i32*
- %8 = load i32* %7, align 1
- call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
+ %8 = load i32, i32* %7, align 1
+ call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 4 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
ret void
}
@@ -463,26 +457,26 @@ entry:
%d1 = alloca %struct.struct_large_nonchar, align 8
%d2 = alloca %struct.struct_small_nonchar, align 2
%call = call signext i8 @get_struct_small_char()
- %foo = getelementptr inbounds %struct.struct_small_char* %a, i32 0, i32 0
- %arrayidx = getelementptr inbounds [2 x i8]* %foo, i32 0, i64 0
+ %foo = getelementptr inbounds %struct.struct_small_char, %struct.struct_small_char* %a, i32 0, i32 0
+ %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %foo, i32 0, i64 0
store i8 %call, i8* %arrayidx, align 1
call void @end_struct_small_char()
%call1 = call signext i8 @get_struct_large_char2()
- %foo2 = getelementptr inbounds %struct.struct_large_char2* %b, i32 0, i32 1
- %arrayidx3 = getelementptr inbounds [8 x i8]* %foo2, i32 0, i64 0
+ %foo2 = getelementptr inbounds %struct.struct_large_char2, %struct.struct_large_char2* %b, i32 0, i32 1
+ %arrayidx3 = getelementptr inbounds [8 x i8], [8 x i8]* %foo2, i32 0, i64 0
store i8 %call1, i8* %arrayidx3, align 1
call void @end_struct_large_char2()
%0 = bitcast %struct.struct_large_char2* %b to %struct.struct_large_char*
- %coerce.dive = getelementptr %struct.struct_large_char* %0, i32 0, i32 0
+ %coerce.dive = getelementptr %struct.struct_large_char, %struct.struct_large_char* %0, i32 0, i32 0
%1 = bitcast [8 x i8]* %coerce.dive to i64*
- %2 = load i64* %1, align 1
- %coerce.dive4 = getelementptr %struct.struct_small_char* %a, i32 0, i32 0
+ %2 = load i64, i64* %1, align 1
+ %coerce.dive4 = getelementptr %struct.struct_small_char, %struct.struct_small_char* %a, i32 0, i32 0
%3 = bitcast [2 x i8]* %coerce.dive4 to i16*
- %4 = load i16* %3, align 1
- %coerce.dive5 = getelementptr %struct.struct_small_nonchar* %d2, i32 0, i32 0
+ %4 = load i16, i16* %3, align 1
+ %coerce.dive5 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d2, i32 0, i32 0
%5 = bitcast [2 x i16]* %coerce.dive5 to i32*
- %6 = load i32* %5, align 1
- call void @takes_all(i64 %2, i16 %4, %struct.struct_large_nonchar* byval align 8 %d1, i32 %6, i8* null, i8* null, i32* null, i16* null, i32* null, i32 0, i32 0, i32 0)
+ %6 = load i32, i32* %5, align 1
+ call void @takes_all(i64 %2, i16 %4, %struct.struct_large_nonchar* byval align 4 %d1, i32 %6, i8* null, i8* null, i32* null, i16* null, i32* null, i32 0, i32 0, i32 0)
ret void
}
diff --git a/test/CodeGen/ARM/stack-alignment.ll b/test/CodeGen/ARM/stack-alignment.ll
index 153f92e25f62..ac14a5959d1f 100644
--- a/test/CodeGen/ARM/stack-alignment.ll
+++ b/test/CodeGen/ARM/stack-alignment.ll
@@ -40,65 +40,65 @@ entry:
; CHECK-THUMB2: bfc r4, #0, #12
; CHECK-THUMB2-NEXT: mov sp, r4
%a = alloca i8, align 4096
- %0 = load double* %d, align 4
- %arrayidx1 = getelementptr inbounds double* %d, i32 1
- %1 = load double* %arrayidx1, align 4
- %arrayidx2 = getelementptr inbounds double* %d, i32 2
- %2 = load double* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds double* %d, i32 3
- %3 = load double* %arrayidx3, align 4
- %arrayidx4 = getelementptr inbounds double* %d, i32 4
- %4 = load double* %arrayidx4, align 4
- %arrayidx5 = getelementptr inbounds double* %d, i32 5
- %5 = load double* %arrayidx5, align 4
- %arrayidx6 = getelementptr inbounds double* %d, i32 6
- %6 = load double* %arrayidx6, align 4
- %arrayidx7 = getelementptr inbounds double* %d, i32 7
- %7 = load double* %arrayidx7, align 4
- %arrayidx8 = getelementptr inbounds double* %d, i32 8
- %8 = load double* %arrayidx8, align 4
- %arrayidx9 = getelementptr inbounds double* %d, i32 9
- %9 = load double* %arrayidx9, align 4
- %arrayidx10 = getelementptr inbounds double* %d, i32 10
- %10 = load double* %arrayidx10, align 4
- %arrayidx11 = getelementptr inbounds double* %d, i32 11
- %11 = load double* %arrayidx11, align 4
- %arrayidx12 = getelementptr inbounds double* %d, i32 12
- %12 = load double* %arrayidx12, align 4
- %arrayidx13 = getelementptr inbounds double* %d, i32 13
- %13 = load double* %arrayidx13, align 4
- %arrayidx14 = getelementptr inbounds double* %d, i32 14
- %14 = load double* %arrayidx14, align 4
- %arrayidx15 = getelementptr inbounds double* %d, i32 15
- %15 = load double* %arrayidx15, align 4
- %arrayidx16 = getelementptr inbounds double* %d, i32 16
- %16 = load double* %arrayidx16, align 4
- %arrayidx17 = getelementptr inbounds double* %d, i32 17
- %17 = load double* %arrayidx17, align 4
- %arrayidx18 = getelementptr inbounds double* %d, i32 18
- %18 = load double* %arrayidx18, align 4
- %arrayidx19 = getelementptr inbounds double* %d, i32 19
- %19 = load double* %arrayidx19, align 4
- %arrayidx20 = getelementptr inbounds double* %d, i32 20
- %20 = load double* %arrayidx20, align 4
- %arrayidx21 = getelementptr inbounds double* %d, i32 21
- %21 = load double* %arrayidx21, align 4
- %arrayidx22 = getelementptr inbounds double* %d, i32 22
- %22 = load double* %arrayidx22, align 4
- %arrayidx23 = getelementptr inbounds double* %d, i32 23
- %23 = load double* %arrayidx23, align 4
- %arrayidx24 = getelementptr inbounds double* %d, i32 24
- %24 = load double* %arrayidx24, align 4
- %arrayidx25 = getelementptr inbounds double* %d, i32 25
- %25 = load double* %arrayidx25, align 4
- %arrayidx26 = getelementptr inbounds double* %d, i32 26
- %26 = load double* %arrayidx26, align 4
- %arrayidx27 = getelementptr inbounds double* %d, i32 27
- %27 = load double* %arrayidx27, align 4
- %arrayidx28 = getelementptr inbounds double* %d, i32 28
- %28 = load double* %arrayidx28, align 4
- %arrayidx29 = getelementptr inbounds double* %d, i32 29
- %29 = load double* %arrayidx29, align 4
+ %0 = load double, double* %d, align 4
+ %arrayidx1 = getelementptr inbounds double, double* %d, i32 1
+ %1 = load double, double* %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds double, double* %d, i32 2
+ %2 = load double, double* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds double, double* %d, i32 3
+ %3 = load double, double* %arrayidx3, align 4
+ %arrayidx4 = getelementptr inbounds double, double* %d, i32 4
+ %4 = load double, double* %arrayidx4, align 4
+ %arrayidx5 = getelementptr inbounds double, double* %d, i32 5
+ %5 = load double, double* %arrayidx5, align 4
+ %arrayidx6 = getelementptr inbounds double, double* %d, i32 6
+ %6 = load double, double* %arrayidx6, align 4
+ %arrayidx7 = getelementptr inbounds double, double* %d, i32 7
+ %7 = load double, double* %arrayidx7, align 4
+ %arrayidx8 = getelementptr inbounds double, double* %d, i32 8
+ %8 = load double, double* %arrayidx8, align 4
+ %arrayidx9 = getelementptr inbounds double, double* %d, i32 9
+ %9 = load double, double* %arrayidx9, align 4
+ %arrayidx10 = getelementptr inbounds double, double* %d, i32 10
+ %10 = load double, double* %arrayidx10, align 4
+ %arrayidx11 = getelementptr inbounds double, double* %d, i32 11
+ %11 = load double, double* %arrayidx11, align 4
+ %arrayidx12 = getelementptr inbounds double, double* %d, i32 12
+ %12 = load double, double* %arrayidx12, align 4
+ %arrayidx13 = getelementptr inbounds double, double* %d, i32 13
+ %13 = load double, double* %arrayidx13, align 4
+ %arrayidx14 = getelementptr inbounds double, double* %d, i32 14
+ %14 = load double, double* %arrayidx14, align 4
+ %arrayidx15 = getelementptr inbounds double, double* %d, i32 15
+ %15 = load double, double* %arrayidx15, align 4
+ %arrayidx16 = getelementptr inbounds double, double* %d, i32 16
+ %16 = load double, double* %arrayidx16, align 4
+ %arrayidx17 = getelementptr inbounds double, double* %d, i32 17
+ %17 = load double, double* %arrayidx17, align 4
+ %arrayidx18 = getelementptr inbounds double, double* %d, i32 18
+ %18 = load double, double* %arrayidx18, align 4
+ %arrayidx19 = getelementptr inbounds double, double* %d, i32 19
+ %19 = load double, double* %arrayidx19, align 4
+ %arrayidx20 = getelementptr inbounds double, double* %d, i32 20
+ %20 = load double, double* %arrayidx20, align 4
+ %arrayidx21 = getelementptr inbounds double, double* %d, i32 21
+ %21 = load double, double* %arrayidx21, align 4
+ %arrayidx22 = getelementptr inbounds double, double* %d, i32 22
+ %22 = load double, double* %arrayidx22, align 4
+ %arrayidx23 = getelementptr inbounds double, double* %d, i32 23
+ %23 = load double, double* %arrayidx23, align 4
+ %arrayidx24 = getelementptr inbounds double, double* %d, i32 24
+ %24 = load double, double* %arrayidx24, align 4
+ %arrayidx25 = getelementptr inbounds double, double* %d, i32 25
+ %25 = load double, double* %arrayidx25, align 4
+ %arrayidx26 = getelementptr inbounds double, double* %d, i32 26
+ %26 = load double, double* %arrayidx26, align 4
+ %arrayidx27 = getelementptr inbounds double, double* %d, i32 27
+ %27 = load double, double* %arrayidx27, align 4
+ %arrayidx28 = getelementptr inbounds double, double* %d, i32 28
+ %28 = load double, double* %arrayidx28, align 4
+ %arrayidx29 = getelementptr inbounds double, double* %d, i32 29
+ %29 = load double, double* %arrayidx29, align 4
%div = fdiv double %29, %28
%div30 = fdiv double %div, %27
%div31 = fdiv double %div30, %26
@@ -159,6 +159,6 @@ entry:
%div86 = fdiv double %div85, %29
%mul = fmul double %div57, %div86
%conv = fptosi double %mul to i32
- %add.ptr = getelementptr inbounds i8* %a, i32 %conv
+ %add.ptr = getelementptr inbounds i8, i8* %a, i32 %conv
ret i8* %add.ptr
}
diff --git a/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll b/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll
index f5cda14861af..2a7a82da8f69 100644
--- a/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll
+++ b/test/CodeGen/ARM/stack-protector-bmovpcb_call.ll
@@ -14,9 +14,9 @@ target triple = "armv7s-apple-ios6.0.0"
define i32 @main() #0 {
entry:
%title = alloca [15 x i8], align 1
- %0 = getelementptr inbounds [15 x i8]* %title, i32 0, i32 0
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* getelementptr inbounds ([15 x i8]* @main.title, i32 0, i32 0), i32 15, i32 1, i1 false)
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i8* %0) #3
+ %0 = getelementptr inbounds [15 x i8], [15 x i8]* %title, i32 0, i32 0
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* getelementptr inbounds ([15 x i8], [15 x i8]* @main.title, i32 0, i32 0), i32 15, i32 1, i1 false)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8* %0) #3
ret i32 0
}
diff --git a/test/CodeGen/ARM/stack_guard_remat.ll b/test/CodeGen/ARM/stack_guard_remat.ll
index 7c89b99b8f97..99d499498450 100644
--- a/test/CodeGen/ARM/stack_guard_remat.ll
+++ b/test/CodeGen/ARM/stack_guard_remat.ll
@@ -52,7 +52,7 @@ define i32 @test_stack_guard_remat() #0 {
%a1 = alloca [256 x i32], align 4
%1 = bitcast [256 x i32]* %a1 to i8*
call void @llvm.lifetime.start(i64 1024, i8* %1)
- %2 = getelementptr inbounds [256 x i32]* %a1, i32 0, i32 0
+ %2 = getelementptr inbounds [256 x i32], [256 x i32]* %a1, i32 0, i32 0
call void @foo3(i32* %2) #3
call void asm sideeffect "foo2", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{sp},~{lr}"()
call void @llvm.lifetime.end(i64 1024, i8* %1)
diff --git a/test/CodeGen/ARM/stm.ll b/test/CodeGen/ARM/stm.ll
index 82dc14d14646..88207e6be105 100644
--- a/test/CodeGen/ARM/stm.ll
+++ b/test/CodeGen/ARM/stm.ll
@@ -10,7 +10,7 @@ entry:
; CHECK: main
; CHECK: push
; CHECK: stm
- %0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @"\01LC1", i32 0, i32 0), i32 -2, i32 -3, i32 2, i32 -6) nounwind ; <i32> [#uses=0]
- %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([32 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 1, i32 0, i32 1, i32 0, i32 1) nounwind ; <i32> [#uses=0]
+ %0 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([26 x i8], [26 x i8]* @"\01LC1", i32 0, i32 0), i32 -2, i32 -3, i32 2, i32 -6) nounwind ; <i32> [#uses=0]
+ %1 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([32 x i8], [32 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 1, i32 0, i32 1, i32 0, i32 1) nounwind ; <i32> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/ARM/str_post.ll b/test/CodeGen/ARM/str_post.ll
index a4f864065d5e..0933e15dab4e 100644
--- a/test/CodeGen/ARM/str_post.ll
+++ b/test/CodeGen/ARM/str_post.ll
@@ -3,7 +3,7 @@
define i16 @test1(i32* %X, i16* %A) {
; CHECK-LABEL: test1:
; CHECK: strh {{.*}}[{{.*}}], #-4
- %Y = load i32* %X ; <i32> [#uses=1]
+ %Y = load i32, i32* %X ; <i32> [#uses=1]
%tmp1 = trunc i32 %Y to i16 ; <i16> [#uses=1]
store i16 %tmp1, i16* %A
%tmp2 = ptrtoint i16* %A to i16 ; <i16> [#uses=1]
@@ -14,7 +14,7 @@ define i16 @test1(i32* %X, i16* %A) {
define i32 @test2(i32* %X, i32* %A) {
; CHECK-LABEL: test2:
; CHECK: str {{.*}}[{{.*}}],
- %Y = load i32* %X ; <i32> [#uses=1]
+ %Y = load i32, i32* %X ; <i32> [#uses=1]
store i32 %Y, i32* %A
%tmp1 = ptrtoint i32* %A to i32 ; <i32> [#uses=1]
%tmp2 = sub i32 %tmp1, 4 ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
index 5ce2bcecb476..4b8b4c6bca72 100644
--- a/test/CodeGen/ARM/str_pre-2.ll
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -7,8 +7,8 @@ entry:
; CHECK: push {r4, r5, lr}
; CHECK: pop {r4, r5, pc}
call void asm sideeffect "", "~{r4},~{r5}"() nounwind
- %0 = load i64** @b, align 4
- %1 = load i64* %0, align 4
+ %0 = load i64*, i64** @b, align 4
+ %1 = load i64, i64* %0, align 4
%2 = mul i64 %1, %a
ret i64 %2
}
diff --git a/test/CodeGen/ARM/str_pre.ll b/test/CodeGen/ARM/str_pre.ll
index 60e6e9ecfeb3..848261f83e31 100644
--- a/test/CodeGen/ARM/str_pre.ll
+++ b/test/CodeGen/ARM/str_pre.ll
@@ -1,16 +1,16 @@
; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
define void @test1(i32* %X, i32* %A, i32** %dest) {
- %B = load i32* %A ; <i32> [#uses=1]
- %Y = getelementptr i32* %X, i32 4 ; <i32*> [#uses=2]
+ %B = load i32, i32* %A ; <i32> [#uses=1]
+ %Y = getelementptr i32, i32* %X, i32 4 ; <i32*> [#uses=2]
store i32 %B, i32* %Y
store i32* %Y, i32** %dest
ret void
}
define i16* @test2(i16* %X, i32* %A) {
- %B = load i32* %A ; <i32> [#uses=1]
- %Y = getelementptr i16* %X, i32 4 ; <i16*> [#uses=2]
+ %B = load i32, i32* %A ; <i32> [#uses=1]
+ %Y = getelementptr i16, i16* %X, i32 4 ; <i16*> [#uses=2]
%tmp = trunc i32 %B to i16 ; <i16> [#uses=1]
store i16 %tmp, i16* %Y
ret i16* %Y
diff --git a/test/CodeGen/ARM/struct-byval-frame-index.ll b/test/CodeGen/ARM/struct-byval-frame-index.ll
index 0fd55ec6c943..52f70fe1e0f8 100644
--- a/test/CodeGen/ARM/struct-byval-frame-index.ll
+++ b/test/CodeGen/ARM/struct-byval-frame-index.ll
@@ -72,10 +72,10 @@ declare void @SetMotionVectorsMB(%structK* nocapture, i32) #1
; Function Attrs: nounwind
define void @set_stored_macroblock_parameters() #1 {
entry:
- %0 = load %structB** @img, align 4
- %1 = load i32* undef, align 4
- %mb_data = getelementptr inbounds %structB* %0, i32 0, i32 61
- %2 = load %structK** %mb_data, align 4
+ %0 = load %structB*, %structB** @img, align 4
+ %1 = load i32, i32* undef, align 4
+ %mb_data = getelementptr inbounds %structB, %structB* %0, i32 0, i32 61
+ %2 = load %structK*, %structK** %mb_data, align 4
br label %for.body
for.body: ; preds = %for.body, %entry
@@ -110,7 +110,7 @@ for.body119: ; preds = %for.body119, %for.c
if.end164: ; preds = %for.body119, %for.cond47.preheader, %if.end43
store i32*** null, i32**** @cofDC, align 4
- %mb_type = getelementptr inbounds %structK* %2, i32 %1, i32 8
+ %mb_type = getelementptr inbounds %structK, %structK* %2, i32 %1, i32 8
br i1 undef, label %if.end230, label %if.then169
if.then169: ; preds = %if.end164
@@ -124,7 +124,7 @@ for.cond210.preheader: ; preds = %if.then169
if.end230: ; preds = %if.end164
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* bitcast ([4 x i32]* @b8mode to i8*), i32 16, i32 4, i1 false)
- %b8pdir = getelementptr inbounds %structK* %2, i32 %1, i32 15
+ %b8pdir = getelementptr inbounds %structK, %structK* %2, i32 %1, i32 15
%3 = bitcast [4 x i32]* %b8pdir to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* bitcast ([4 x i32]* @b8pdir to i8*), i32 16, i32 4, i1 false)
br i1 undef, label %if.end236, label %if.then233
@@ -134,7 +134,7 @@ if.then233: ; preds = %if.end230
if.end236: ; preds = %if.end230
%cmp242 = icmp ne i16 undef, 8
- %4 = load i32* @luma_transform_size_8x8_flag, align 4
+ %4 = load i32, i32* @luma_transform_size_8x8_flag, align 4
%tobool245 = icmp ne i32 %4, 0
%or.cond812 = or i1 %cmp242, %tobool245
br i1 %or.cond812, label %if.end249, label %land.lhs.true246
@@ -150,51 +150,51 @@ if.then248: ; preds = %land.lhs.true246
br label %if.end249
if.end249: ; preds = %if.then248, %land.lhs.true246, %if.end236
- %5 = load i32* @luma_transform_size_8x8_flag, align 4
- %6 = load %structA** @rdopt, align 4
- %luma_transform_size_8x8_flag264 = getelementptr inbounds %structA* %6, i32 0, i32 21
+ %5 = load i32, i32* @luma_transform_size_8x8_flag, align 4
+ %6 = load %structA*, %structA** @rdopt, align 4
+ %luma_transform_size_8x8_flag264 = getelementptr inbounds %structA, %structA* %6, i32 0, i32 21
store i32 %5, i32* %luma_transform_size_8x8_flag264, align 4
- %7 = load i32* undef, align 4
+ %7 = load i32, i32* undef, align 4
%add281 = add nsw i32 %7, 0
br label %for.body285
for.body285: ; preds = %for.inc503, %if.end249
%8 = phi %structB* [ undef, %if.end249 ], [ %.pre1155, %for.inc503 ]
%i.21103 = phi i32 [ 0, %if.end249 ], [ %inc504, %for.inc503 ]
- %block_x286 = getelementptr inbounds %structB* %8, i32 0, i32 37
- %9 = load i32* %block_x286, align 4
+ %block_x286 = getelementptr inbounds %structB, %structB* %8, i32 0, i32 37
+ %9 = load i32, i32* %block_x286, align 4
%add287 = add nsw i32 %9, %i.21103
%shr289 = ashr i32 %i.21103, 1
%add290 = add nsw i32 %shr289, 0
- %arrayidx292 = getelementptr inbounds %structK* %2, i32 %1, i32 15, i32 %add290
- %10 = load %structM** @enc_picture, align 4
- %ref_idx = getelementptr inbounds %structM* %10, i32 0, i32 35
- %11 = load i8**** %ref_idx, align 4
- %12 = load i8*** %11, align 4
- %arrayidx313 = getelementptr inbounds i8** %12, i32 %add281
- %13 = load i8** %arrayidx313, align 4
- %arrayidx314 = getelementptr inbounds i8* %13, i32 %add287
+ %arrayidx292 = getelementptr inbounds %structK, %structK* %2, i32 %1, i32 15, i32 %add290
+ %10 = load %structM*, %structM** @enc_picture, align 4
+ %ref_idx = getelementptr inbounds %structM, %structM* %10, i32 0, i32 35
+ %11 = load i8***, i8**** %ref_idx, align 4
+ %12 = load i8**, i8*** %11, align 4
+ %arrayidx313 = getelementptr inbounds i8*, i8** %12, i32 %add281
+ %13 = load i8*, i8** %arrayidx313, align 4
+ %arrayidx314 = getelementptr inbounds i8, i8* %13, i32 %add287
store i8 -1, i8* %arrayidx314, align 1
- %14 = load %structB** @img, align 4
- %MbaffFrameFlag327 = getelementptr inbounds %structB* %14, i32 0, i32 100
- %15 = load i32* %MbaffFrameFlag327, align 4
+ %14 = load %structB*, %structB** @img, align 4
+ %MbaffFrameFlag327 = getelementptr inbounds %structB, %structB* %14, i32 0, i32 100
+ %15 = load i32, i32* %MbaffFrameFlag327, align 4
%tobool328 = icmp eq i32 %15, 0
br i1 %tobool328, label %if.end454, label %if.then329
if.then329: ; preds = %for.body285
- %16 = load %structA** @rdopt, align 4
+ %16 = load %structA*, %structA** @rdopt, align 4
br label %if.end454
if.end454: ; preds = %if.then329, %for.body285
- %17 = load i32* %arrayidx292, align 4
+ %17 = load i32, i32* %arrayidx292, align 4
%cmp457 = icmp eq i32 %17, 0
br i1 %cmp457, label %if.then475, label %lor.lhs.false459
lor.lhs.false459: ; preds = %if.end454
- %18 = load i32* %mb_type, align 4
+ %18 = load i32, i32* %mb_type, align 4
switch i32 %18, label %for.inc503 [
i32 9, label %if.then475
- i32 10, label %if.then475
+ i32 11, label %if.then475
i32 13, label %if.then475
i32 14, label %if.then475
]
@@ -205,7 +205,7 @@ if.then475: ; preds = %lor.lhs.false459, %
for.inc503: ; preds = %if.then475, %lor.lhs.false459
%inc504 = add nsw i32 %i.21103, 1
- %.pre1155 = load %structB** @img, align 4
+ %.pre1155 = load %structB*, %structB** @img, align 4
br label %for.body285
}
diff --git a/test/CodeGen/ARM/struct_byval.ll b/test/CodeGen/ARM/struct_byval.ll
index 130925a0c237..d7b9b477ec1e 100644
--- a/test/CodeGen/ARM/struct_byval.ll
+++ b/test/CodeGen/ARM/struct_byval.ll
@@ -1,5 +1,9 @@
; RUN: llc < %s -mtriple=armv7-apple-ios6.0 | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-ios6.0 | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -mtriple=armv7-unknown-nacl-gnueabi | FileCheck %s -check-prefix=NACL
+; RUN: llc < %s -mtriple=armv5-none-linux-gnueabi | FileCheck %s -check-prefix=NOMOVT
+
+; NOMOVT-NOT: movt
; rdar://9877866
%struct.SmallStruct = type { i32, [8 x i32], [37 x i8] }
@@ -33,6 +37,14 @@ entry:
; THUMB: sub
; THUMB: str
; THUMB: bne
+; NACL-LABEL: g:
+; Ensure that use movw instead of constpool for the loop trip count. But don't
+; match the __stack_chk_guard movw
+; NACL: movw r{{[1-9]}}, #
+; NACL: ldr
+; NACL: sub
+; NACL: str
+; NACL: bne
%st = alloca %struct.LargeStruct, align 4
%call = call i32 @e2(%struct.LargeStruct* byval %st)
ret i32 0
@@ -51,6 +63,11 @@ entry:
; THUMB: sub
; THUMB: vst1
; THUMB: bne
+; NACL: movw r{{[1-9]}}, #
+; NACL: vld1
+; NACL: sub
+; NACL: vst1
+; NACL: bne
%st = alloca %struct.LargeStruct, align 16
%call = call i32 @e3(%struct.LargeStruct* byval align 16 %st)
ret i32 0
@@ -80,7 +97,7 @@ define void @f4(%struct.SmallStruct* nocapture byval %s) nounwind optsize {
; THUMB-LABEL: f4
; THUMB: blx _consumestruct
entry:
- %addr = getelementptr inbounds %struct.SmallStruct* %s, i32 0, i32 0
+ %addr = getelementptr inbounds %struct.SmallStruct, %struct.SmallStruct* %s, i32 0, i32 0
%0 = bitcast i32* %addr to i8*
tail call void @consumestruct(i8* %0, i32 80) optsize
ret void
@@ -104,7 +121,7 @@ define void @f6(i32 %a, i32 %b, i32 %c, i32 %d, %struct.SmallStruct* nocapture b
; THUMB-LABEL: f6
; THUMB: b.w _consumestruct
entry:
- %addr = getelementptr inbounds %struct.SmallStruct* %s, i32 0, i32 0
+ %addr = getelementptr inbounds %struct.SmallStruct, %struct.SmallStruct* %s, i32 0, i32 0
%0 = bitcast i32* %addr to i8*
tail call void @consumestruct(i8* %0, i32 80) optsize
ret void
diff --git a/test/CodeGen/ARM/sub-cmp-peephole.ll b/test/CodeGen/ARM/sub-cmp-peephole.ll
index f7328dc580ef..32038883469b 100644
--- a/test/CodeGen/ARM/sub-cmp-peephole.ll
+++ b/test/CodeGen/ARM/sub-cmp-peephole.ll
@@ -169,7 +169,7 @@ entry:
; CHECK: sub
; CHECK: cmp
; CHECK: bge
- %load = load i32* @t, align 4
+ %load = load i32, i32* @t, align 4
%sub = sub i32 %load, 17
%cmp = icmp slt i32 %sub, 0
br i1 %cmp, label %if.then, label %if.else
@@ -191,7 +191,7 @@ entry:
; CHECK: sub
; CHECK: cmp
; CHECK: bhs
- %load = load i32* @t, align 4
+ %load = load i32, i32* @t, align 4
%sub = sub i32 %load, 17
%cmp = icmp ult i32 %sub, 0
br i1 %cmp, label %if.then, label %if.else
diff --git a/test/CodeGen/ARM/swift-atomics.ll b/test/CodeGen/ARM/swift-atomics.ll
index 8b100f1f41f2..ca7e7fb299bf 100644
--- a/test/CodeGen/ARM/swift-atomics.ll
+++ b/test/CodeGen/ARM/swift-atomics.ll
@@ -33,7 +33,7 @@ define i32 @test_seq_cst(i32* %p, i32 %v) {
; CHECK-STRICT-ATOMIC: dmb {{ish$}}
store atomic i32 %v, i32* %p seq_cst, align 4
- %val = load atomic i32* %p seq_cst, align 4
+ %val = load atomic i32, i32* %p seq_cst, align 4
ret i32 %val
}
@@ -46,6 +46,6 @@ define i32 @test_acq(i32* %addr) {
; CHECK-STRICT-ATOMIC-LABEL: test_acq:
; CHECK-STRICT-ATOMIC: dmb {{ish$}}
- %val = load atomic i32* %addr acquire, align 4
+ %val = load atomic i32, i32* %addr acquire, align 4
ret i32 %val
}
diff --git a/test/CodeGen/ARM/swift-vldm.ll b/test/CodeGen/ARM/swift-vldm.ll
index 67ae00ad7db8..9e507279fa09 100644
--- a/test/CodeGen/ARM/swift-vldm.ll
+++ b/test/CodeGen/ARM/swift-vldm.ll
@@ -12,14 +12,14 @@ declare fastcc void @force_register(double %d0, double %d1, double %d2, double %
define void @test_vldm(double* %x, double * %y) {
entry:
- %addr1 = getelementptr double * %x, i32 1
- %addr2 = getelementptr double * %x, i32 2
- %addr3 = getelementptr double * %x, i32 3
- %d0 = load double * %y
- %d1 = load double * %x
- %d2 = load double * %addr1
- %d3 = load double * %addr2
- %d4 = load double * %addr3
+ %addr1 = getelementptr double, double * %x, i32 1
+ %addr2 = getelementptr double, double * %x, i32 2
+ %addr3 = getelementptr double, double * %x, i32 3
+ %d0 = load double , double * %y
+ %d1 = load double , double * %x
+ %d2 = load double , double * %addr1
+ %d3 = load double , double * %addr2
+ %d4 = load double , double * %addr3
; We are trying to force x[0-3] in registers d1 to d4 so that we can test we
; don't form a "vldmia rX, {d1, d2, d3, d4}".
; We are relying on the calling convention and that register allocation
diff --git a/test/CodeGen/ARM/t2abs-killflags.ll b/test/CodeGen/ARM/t2abs-killflags.ll
new file mode 100644
index 000000000000..eed8f6719aec
--- /dev/null
+++ b/test/CodeGen/ARM/t2abs-killflags.ll
@@ -0,0 +1,23 @@
+; RUN: llc %s -o - -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7s-unknown-unknown"
+
+; Test that we don't crash the machine verifier when expanding t2ABS.
+; It was applying the kill flag to its source, even if the original t2ABS didn't
+; kill the operand prior to pseudo expansion.
+
+; Function Attrs: noreturn
+declare void @foo(i32, i32) #0
+
+; CHECK-LABEL: @test
+; CHECK: rsbmi
+define void @test(i32 %arg) {
+ %cmp58 = icmp slt i32 %arg, 0
+ %sub62 = sub nsw i32 0, %arg
+ %l.1 = select i1 %cmp58, i32 %sub62, i32 %arg
+ call void @foo(i32 %l.1, i32 %arg) #0
+ unreachable
+}
+
+attributes #0 = { noreturn }
diff --git a/test/CodeGen/ARM/tail-dup-kill-flags.ll b/test/CodeGen/ARM/tail-dup-kill-flags.ll
new file mode 100644
index 000000000000..bce6cdc4e28d
--- /dev/null
+++ b/test/CodeGen/ARM/tail-dup-kill-flags.ll
@@ -0,0 +1,54 @@
+; RUN: llc %s -o - -fast-isel=true -O1 -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7-apple-ios8.0.0"
+
+; Tail duplication was incorrectly leaving kill flags on the duplicated instructions.
+; The machine verifier is able to spot this error, so this test should pass if it passes verification.
+
+; CHECK-LABEL: @test
+
+%struct.cdiff_ctx = type { i8*, %struct.cdiff_node*, %struct.cdiff_node*, %struct.cdiff_node*, %struct.cdiff_node* }
+%struct.cdiff_node = type { i32, i8*, i8*, %struct.cdiff_node* }
+
+declare i32 @logg(i32)
+
+define hidden i32 @test(%struct.cdiff_ctx* nocapture %ctx, %struct.cdiff_node* %tmp10) {
+bb:
+ br label %.outer
+
+bb33: ; preds = %bb92, %.outer
+ %lines.0 = phi i32 [ %tmp37, %bb92 ], [ %lines.0.ph, %.outer ]
+ %tmp37 = add i32 %lines.0, 1
+ %tmp39 = load i32, i32* %tmp57, align 4
+ %tmp40 = icmp eq i32 %tmp39, %tmp37
+ br i1 %tmp40, label %bb41, label %bb92
+
+bb41: ; preds = %bb33
+ %tmp45 = call i32 @strncmp()
+ %tmp46 = icmp eq i32 %tmp45, 0
+ br i1 %tmp46, label %bb53, label %bb47
+
+bb47: ; preds = %bb41
+ %tmp52 = call i32 @logg(i32 %tmp37)
+ ret i32 -1
+
+bb53: ; preds = %bb41
+ %tmp54 = getelementptr inbounds %struct.cdiff_node, %struct.cdiff_node* %del.0.ph, i32 0, i32 3
+ %tmp55 = load %struct.cdiff_node*, %struct.cdiff_node** %tmp54, align 4
+ br label %.outer
+
+.outer: ; preds = %bb53, %bb
+ %del.0.ph = phi %struct.cdiff_node* [ %tmp55, %bb53 ], [ null, %bb ]
+ %lines.0.ph = phi i32 [ 1, %bb53 ], [ 0, %bb ]
+ %tmp57 = getelementptr inbounds %struct.cdiff_node, %struct.cdiff_node* %del.0.ph, i32 0, i32 0
+ br label %bb33
+
+bb92: ; preds = %bb33
+ %tmp93 = call i32 @puts()
+ br label %bb33
+}
+
+declare i32 @strncmp()
+
+declare i32 @puts()
diff --git a/test/CodeGen/ARM/tail-dup.ll b/test/CodeGen/ARM/tail-dup.ll
index d654056eaf3d..407bdf7524b1 100644
--- a/test/CodeGen/ARM/tail-dup.ll
+++ b/test/CodeGen/ARM/tail-dup.ll
@@ -11,28 +11,28 @@
define i32 @fn(i32* nocapture %opcodes) nounwind readonly ssp {
entry:
- %0 = load i32* %opcodes, align 4
- %arrayidx = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %0
+ %0 = load i32, i32* %opcodes, align 4
+ %arrayidx = getelementptr inbounds [3 x i8*], [3 x i8*]* @fn.codetable, i32 0, i32 %0
br label %indirectgoto
INCREMENT: ; preds = %indirectgoto
%inc = add nsw i32 %result.0, 1
- %1 = load i32* %opcodes.addr.0, align 4
- %arrayidx2 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %1
+ %1 = load i32, i32* %opcodes.addr.0, align 4
+ %arrayidx2 = getelementptr inbounds [3 x i8*], [3 x i8*]* @fn.codetable, i32 0, i32 %1
br label %indirectgoto
DECREMENT: ; preds = %indirectgoto
%dec = add nsw i32 %result.0, -1
- %2 = load i32* %opcodes.addr.0, align 4
- %arrayidx4 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %2
+ %2 = load i32, i32* %opcodes.addr.0, align 4
+ %arrayidx4 = getelementptr inbounds [3 x i8*], [3 x i8*]* @fn.codetable, i32 0, i32 %2
br label %indirectgoto
indirectgoto: ; preds = %DECREMENT, %INCREMENT, %entry
%result.0 = phi i32 [ 0, %entry ], [ %dec, %DECREMENT ], [ %inc, %INCREMENT ]
%opcodes.pn = phi i32* [ %opcodes, %entry ], [ %opcodes.addr.0, %DECREMENT ], [ %opcodes.addr.0, %INCREMENT ]
%indirect.goto.dest.in = phi i8** [ %arrayidx, %entry ], [ %arrayidx4, %DECREMENT ], [ %arrayidx2, %INCREMENT ]
- %opcodes.addr.0 = getelementptr inbounds i32* %opcodes.pn, i32 1
- %indirect.goto.dest = load i8** %indirect.goto.dest.in, align 4
+ %opcodes.addr.0 = getelementptr inbounds i32, i32* %opcodes.pn, i32 1
+ %indirect.goto.dest = load i8*, i8** %indirect.goto.dest.in, align 4
indirectbr i8* %indirect.goto.dest, [label %RETURN, label %INCREMENT, label %DECREMENT]
RETURN: ; preds = %indirectgoto
diff --git a/test/CodeGen/ARM/test-sharedidx.ll b/test/CodeGen/ARM/test-sharedidx.ll
index 9203f166ffa9..377996c4c3c8 100644
--- a/test/CodeGen/ARM/test-sharedidx.ll
+++ b/test/CodeGen/ARM/test-sharedidx.ll
@@ -23,15 +23,15 @@ for.body: ; preds = %entry, %for.body.3
; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
%i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds i8* %a, i32 %i.09
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.09
+ %0 = load i8, i8* %arrayidx, align 1
%conv6 = zext i8 %0 to i32
- %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09
- %1 = load i8* %arrayidx1, align 1
+ %arrayidx1 = getelementptr inbounds i8, i8* %b, i32 %i.09
+ %1 = load i8, i8* %arrayidx1, align 1
%conv27 = zext i8 %1 to i32
%add = add nsw i32 %conv27, %conv6
%conv3 = trunc i32 %add to i8
- %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09
+ %arrayidx4 = getelementptr inbounds i8, i8* %c, i32 %i.09
store i8 %conv3, i8* %arrayidx4, align 1
%add5 = add i32 %i.09, %s
%cmp = icmp ult i32 %add5, %len
@@ -44,15 +44,15 @@ for.body.1: ; preds = %for.body
; CHECK: %for.body.1
; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
- %arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5
- %2 = load i8* %arrayidx.1, align 1
+ %arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %add5
+ %2 = load i8, i8* %arrayidx.1, align 1
%conv6.1 = zext i8 %2 to i32
- %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5
- %3 = load i8* %arrayidx1.1, align 1
+ %arrayidx1.1 = getelementptr inbounds i8, i8* %b, i32 %add5
+ %3 = load i8, i8* %arrayidx1.1, align 1
%conv27.1 = zext i8 %3 to i32
%add.1 = add nsw i32 %conv27.1, %conv6.1
%conv3.1 = trunc i32 %add.1 to i8
- %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5
+ %arrayidx4.1 = getelementptr inbounds i8, i8* %c, i32 %add5
store i8 %conv3.1, i8* %arrayidx4.1, align 1
%add5.1 = add i32 %add5, %s
%cmp.1 = icmp ult i32 %add5.1, %len
@@ -62,15 +62,15 @@ for.body.2: ; preds = %for.body.1
; CHECK: %for.body.2
; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
- %arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1
- %4 = load i8* %arrayidx.2, align 1
+ %arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %add5.1
+ %4 = load i8, i8* %arrayidx.2, align 1
%conv6.2 = zext i8 %4 to i32
- %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1
- %5 = load i8* %arrayidx1.2, align 1
+ %arrayidx1.2 = getelementptr inbounds i8, i8* %b, i32 %add5.1
+ %5 = load i8, i8* %arrayidx1.2, align 1
%conv27.2 = zext i8 %5 to i32
%add.2 = add nsw i32 %conv27.2, %conv6.2
%conv3.2 = trunc i32 %add.2 to i8
- %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1
+ %arrayidx4.2 = getelementptr inbounds i8, i8* %c, i32 %add5.1
store i8 %conv3.2, i8* %arrayidx4.2, align 1
%add5.2 = add i32 %add5.1, %s
%cmp.2 = icmp ult i32 %add5.2, %len
@@ -80,15 +80,15 @@ for.body.3: ; preds = %for.body.2
; CHECK: %for.body.3
; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
- %arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2
- %6 = load i8* %arrayidx.3, align 1
+ %arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %add5.2
+ %6 = load i8, i8* %arrayidx.3, align 1
%conv6.3 = zext i8 %6 to i32
- %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2
- %7 = load i8* %arrayidx1.3, align 1
+ %arrayidx1.3 = getelementptr inbounds i8, i8* %b, i32 %add5.2
+ %7 = load i8, i8* %arrayidx1.3, align 1
%conv27.3 = zext i8 %7 to i32
%add.3 = add nsw i32 %conv27.3, %conv6.3
%conv3.3 = trunc i32 %add.3 to i8
- %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2
+ %arrayidx4.3 = getelementptr inbounds i8, i8* %c, i32 %add5.2
store i8 %conv3.3, i8* %arrayidx4.3, align 1
%add5.3 = add i32 %add5.2, %s
%cmp.3 = icmp ult i32 %add5.3, %len
diff --git a/test/CodeGen/ARM/this-return.ll b/test/CodeGen/ARM/this-return.ll
index c681a1c80958..802f880c1380 100644
--- a/test/CodeGen/ARM/this-return.ll
+++ b/test/CodeGen/ARM/this-return.ll
@@ -29,7 +29,7 @@ entry:
; CHECKT2D: b.w _B_ctor_base
%0 = bitcast %struct.C* %this to %struct.A*
%call = tail call %struct.A* @A_ctor_base(%struct.A* %0)
- %1 = getelementptr inbounds %struct.C* %this, i32 0, i32 0
+ %1 = getelementptr inbounds %struct.C, %struct.C* %this, i32 0, i32 0
%call2 = tail call %struct.B* @B_ctor_base(%struct.B* %1, i32 %x)
ret %struct.C* %this
}
@@ -48,7 +48,7 @@ entry:
; CHECKT2D-NOT: b.w _B_ctor_base_nothisret
%0 = bitcast %struct.C* %this to %struct.A*
%call = tail call %struct.A* @A_ctor_base_nothisret(%struct.A* %0)
- %1 = getelementptr inbounds %struct.C* %this, i32 0, i32 0
+ %1 = getelementptr inbounds %struct.C, %struct.C* %this, i32 0, i32 0
%call2 = tail call %struct.B* @B_ctor_base_nothisret(%struct.B* %1, i32 %x)
ret %struct.C* %this
}
@@ -85,7 +85,7 @@ entry:
; CHECKT2D: blx _B_ctor_complete
; CHECKT2D-NOT: mov r0, {{r[0-9]+}}
; CHECKT2D: b.w _B_ctor_complete
- %b = getelementptr inbounds %struct.D* %this, i32 0, i32 0
+ %b = getelementptr inbounds %struct.D, %struct.D* %this, i32 0, i32 0
%call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
%call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
ret %struct.D* %this
@@ -97,9 +97,9 @@ entry:
; CHECKELF-NOT: b B_ctor_complete
; CHECKT2D-LABEL: E_ctor_base:
; CHECKT2D-NOT: b.w _B_ctor_complete
- %b = getelementptr inbounds %struct.E* %this, i32 0, i32 0
+ %b = getelementptr inbounds %struct.E, %struct.E* %this, i32 0, i32 0
%call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
- %b2 = getelementptr inbounds %struct.E* %this, i32 0, i32 1
+ %b2 = getelementptr inbounds %struct.E, %struct.E* %this, i32 0, i32 1
%call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b2, i32 %x)
ret %struct.E* %this
}
diff --git a/test/CodeGen/ARM/thumb-alignment.ll b/test/CodeGen/ARM/thumb-alignment.ll
new file mode 100644
index 000000000000..c11d4b6da3c9
--- /dev/null
+++ b/test/CodeGen/ARM/thumb-alignment.ll
@@ -0,0 +1,54 @@
+; RUN: llc -relocation-model=pic -mtriple=thumbv7-unknown-linux -o - %s | FileCheck %s
+
+@x = external global i32
+
+; CHECK: .globl foo
+; CHECK-NEXT: .align 2
+define i32* @foo() {
+ ret i32* @x
+}
+
+; CHECK: .globl bar
+; CHECK-NEXT: .align 1
+define i32* @bar() {
+ ret i32* zeroinitializer
+}
+
+@a = external global i32
+@b = external global i32
+@c = external global i32
+@d = external global i32
+
+; Create a Thumb-2 jump table, which should force alignment to 4 bytes.
+
+; CHECK: .globl baz
+; CHECK-NEXT: .align 2
+; CHECK: adr.w
+define i32 @baz() {
+ %1 = load i32, i32* @c, align 4
+ switch i32 %1, label %7 [
+ i32 1, label %2
+ i32 4, label %5
+ i32 9, label %5
+ i32 3, label %8
+ ]
+
+; <label>:2
+ %3 = load i32, i32* @a, align 4
+ %4 = tail call i32 bitcast (i32 (...)* @fn2 to i32 (i32 (...)*, i32, i32)*)(i32 (...)* bitcast (i32 ()* @baz to i32 (...)*), i32 0, i32 %3) #2
+ br label %8
+
+; <label>:5
+ %6 = load i32, i32* @d, align 4
+ store i32 %6, i32* @b, align 4
+ br label %8
+
+; <label>:7
+ br label %8
+
+; <label>:8
+ %e.0 = phi i32 [ 1, %7 ], [ 1, %2 ], [ 0, %0 ], [ 0, %5 ]
+ ret i32 %e.0
+}
+
+declare i32 @fn2(...)
diff --git a/test/CodeGen/ARM/thumb-big-stack.ll b/test/CodeGen/ARM/thumb-big-stack.ll
new file mode 100644
index 000000000000..e5cbb9747a7e
--- /dev/null
+++ b/test/CodeGen/ARM/thumb-big-stack.ll
@@ -0,0 +1,6844 @@
+; RUN: llc %s -O0 -verify-machineinstrs -o - | FileCheck %s
+; This file uses to trigger a machine verifier error because we
+; were generating a stack adjustement with SP as second argument,
+; which is unpredictable behavior for t2ADDrr.
+; This file has been generated from the constpool test of the test-suite.
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7s-apple-ios"
+
+@.str = external unnamed_addr constant [21 x i8], align 1
+
+; CHECK-LABEL: f:
+; CHECK: movw [[ADDR:(r[0-9]+|lr)]], #
+; CHECK-NEXT: add [[ADDR]], sp
+; CHECK-NEXT: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, {{\[}}[[ADDR]]:128]
+define <4 x float> @f(<4 x float> %x) {
+entry:
+ %.compoundliteral7837 = alloca <4 x float>, align 16
+ %.compoundliteral7865 = alloca <4 x float>, align 16
+ %.compoundliteral7991 = alloca <4 x float>, align 16
+ %.compoundliteral8019 = alloca <4 x float>, align 16
+ %.compoundliteral8061 = alloca <4 x float>, align 16
+ %.compoundliteral8075 = alloca <4 x float>, align 16
+ %.compoundliteral8089 = alloca <4 x float>, align 16
+ %.compoundliteral8103 = alloca <4 x float>, align 16
+ %.compoundliteral8117 = alloca <4 x float>, align 16
+ %.compoundliteral8145 = alloca <4 x float>, align 16
+ %.compoundliteral8243 = alloca <4 x float>, align 16
+ %.compoundliteral8285 = alloca <4 x float>, align 16
+ %.compoundliteral8299 = alloca <4 x float>, align 16
+ %.compoundliteral8313 = alloca <4 x float>, align 16
+ %.compoundliteral8327 = alloca <4 x float>, align 16
+ %.compoundliteral9601 = alloca <4 x float>, align 16
+ %.compoundliteral9615 = alloca <4 x float>, align 16
+ %.compoundliteral9629 = alloca <4 x float>, align 16
+ %.compoundliteral9657 = alloca <4 x float>, align 16
+ %.compoundliteral9755 = alloca <4 x float>, align 16
+ %.compoundliteral9769 = alloca <4 x float>, align 16
+ %.compoundliteral9853 = alloca <4 x float>, align 16
+ %.compoundliteral9867 = alloca <4 x float>, align 16
+ %.compoundliteral9895 = alloca <4 x float>, align 16
+ %.compoundliteral9909 = alloca <4 x float>, align 16
+ %.compoundliteral9923 = alloca <4 x float>, align 16
+ %.compoundliteral9937 = alloca <4 x float>, align 16
+ %.compoundliteral9951 = alloca <4 x float>, align 16
+ %.compoundliteral9979 = alloca <4 x float>, align 16
+ %.compoundliteral10021 = alloca <4 x float>, align 16
+ %.compoundliteral10049 = alloca <4 x float>, align 16
+ %.compoundliteral10063 = alloca <4 x float>, align 16
+ %.compoundliteral10077 = alloca <4 x float>, align 16
+ %.compoundliteral10091 = alloca <4 x float>, align 16
+ %.compoundliteral10119 = alloca <4 x float>, align 16
+ %.compoundliteral10133 = alloca <4 x float>, align 16
+ %.compoundliteral10147 = alloca <4 x float>, align 16
+ %.compoundliteral10161 = alloca <4 x float>, align 16
+ %.compoundliteral10203 = alloca <4 x float>, align 16
+ %.compoundliteral10231 = alloca <4 x float>, align 16
+ %.compoundliteral10385 = alloca <4 x float>, align 16
+ %.compoundliteral10399 = alloca <4 x float>, align 16
+ %.compoundliteral10413 = alloca <4 x float>, align 16
+ %.compoundliteral10539 = alloca <4 x float>, align 16
+ %.compoundliteral10553 = alloca <4 x float>, align 16
+ %.compoundliteral10567 = alloca <4 x float>, align 16
+ %.compoundliteral10581 = alloca <4 x float>, align 16
+ %.compoundliteral10595 = alloca <4 x float>, align 16
+ %.compoundliteral10609 = alloca <4 x float>, align 16
+ %.compoundliteral10623 = alloca <4 x float>, align 16
+ %.compoundliteral10637 = alloca <4 x float>, align 16
+ %.compoundliteral10665 = alloca <4 x float>, align 16
+ %.compoundliteral10693 = alloca <4 x float>, align 16
+ %.compoundliteral10707 = alloca <4 x float>, align 16
+ %.compoundliteral10721 = alloca <4 x float>, align 16
+ %.compoundliteral10735 = alloca <4 x float>, align 16
+ %.compoundliteral10749 = alloca <4 x float>, align 16
+ %.compoundliteral10763 = alloca <4 x float>, align 16
+ %.compoundliteral10945 = alloca <4 x float>, align 16
+ %.compoundliteral10959 = alloca <4 x float>, align 16
+ %.compoundliteral10987 = alloca <4 x float>, align 16
+ %.compoundliteral11001 = alloca <4 x float>, align 16
+ %.compoundliteral11015 = alloca <4 x float>, align 16
+ %.compoundliteral11197 = alloca <4 x float>, align 16
+ %.compoundliteral11421 = alloca <4 x float>, align 16
+ %.compoundliteral11435 = alloca <4 x float>, align 16
+ %.compoundliteral11463 = alloca <4 x float>, align 16
+ %.compoundliteral11477 = alloca <4 x float>, align 16
+ %.compoundliteral11491 = alloca <4 x float>, align 16
+ %.compoundliteral11519 = alloca <4 x float>, align 16
+ %.compoundliteral11533 = alloca <4 x float>, align 16
+ %.compoundliteral11547 = alloca <4 x float>, align 16
+ %.compoundliteral11631 = alloca <4 x float>, align 16
+ %.compoundliteral11645 = alloca <4 x float>, align 16
+ %.compoundliteral11659 = alloca <4 x float>, align 16
+ %.compoundliteral11701 = alloca <4 x float>, align 16
+ %.compoundliteral11743 = alloca <4 x float>, align 16
+ %.compoundliteral11757 = alloca <4 x float>, align 16
+ %.compoundliteral11771 = alloca <4 x float>, align 16
+ %.compoundliteral11785 = alloca <4 x float>, align 16
+ %.compoundliteral11799 = alloca <4 x float>, align 16
+ %.compoundliteral11827 = alloca <4 x float>, align 16
+ %.compoundliteral11841 = alloca <4 x float>, align 16
+ %.compoundliteral11855 = alloca <4 x float>, align 16
+ %.compoundliteral11869 = alloca <4 x float>, align 16
+ %.compoundliteral11939 = alloca <4 x float>, align 16
+ %.compoundliteral11953 = alloca <4 x float>, align 16
+ %.compoundliteral11967 = alloca <4 x float>, align 16
+ %.compoundliteral11981 = alloca <4 x float>, align 16
+ %.compoundliteral11995 = alloca <4 x float>, align 16
+ %.compoundliteral12023 = alloca <4 x float>, align 16
+ %.compoundliteral12051 = alloca <4 x float>, align 16
+ %.compoundliteral12065 = alloca <4 x float>, align 16
+ %.compoundliteral12247 = alloca <4 x float>, align 16
+ %.compoundliteral12261 = alloca <4 x float>, align 16
+ %.compoundliteral12275 = alloca <4 x float>, align 16
+ %.compoundliteral12499 = alloca <4 x float>, align 16
+ %.compoundliteral12541 = alloca <4 x float>, align 16
+ %.compoundliteral12555 = alloca <4 x float>, align 16
+ %.compoundliteral12751 = alloca <4 x float>, align 16
+ %.compoundliteral12891 = alloca <4 x float>, align 16
+ %.compoundliteral12905 = alloca <4 x float>, align 16
+ %.compoundliteral12919 = alloca <4 x float>, align 16
+ %.compoundliteral12933 = alloca <4 x float>, align 16
+ %.compoundliteral12947 = alloca <4 x float>, align 16
+ %.compoundliteral12961 = alloca <4 x float>, align 16
+ %.compoundliteral12975 = alloca <4 x float>, align 16
+ %.compoundliteral12989 = alloca <4 x float>, align 16
+ %.compoundliteral13003 = alloca <4 x float>, align 16
+ %.compoundliteral13017 = alloca <4 x float>, align 16
+ %.compoundliteral13031 = alloca <4 x float>, align 16
+ %.compoundliteral13423 = alloca <4 x float>, align 16
+ %.compoundliteral13437 = alloca <4 x float>, align 16
+ %.compoundliteral13493 = alloca <4 x float>, align 16
+ %.compoundliteral13535 = alloca <4 x float>, align 16
+ %.compoundliteral13549 = alloca <4 x float>, align 16
+ %.compoundliteral13647 = alloca <4 x float>, align 16
+ %.compoundliteral13675 = alloca <4 x float>, align 16
+ %.compoundliteral13689 = alloca <4 x float>, align 16
+ %.compoundliteral13703 = alloca <4 x float>, align 16
+ %.compoundliteral13717 = alloca <4 x float>, align 16
+ %.compoundliteral13745 = alloca <4 x float>, align 16
+ %.compoundliteral13759 = alloca <4 x float>, align 16
+ %.compoundliteral13773 = alloca <4 x float>, align 16
+ %.compoundliteral13787 = alloca <4 x float>, align 16
+ %.compoundliteral13941 = alloca <4 x float>, align 16
+ %.compoundliteral13969 = alloca <4 x float>, align 16
+ %.compoundliteral13983 = alloca <4 x float>, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x40746999A0000000, float 0xC0719B3340000000, float 0xC070B66660000000, float 0xC07404CCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x40701B3340000000, float 0x405B866660000000, float 0xC0763999A0000000, float 4.895000e+02>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp1 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add68 = fadd <4 x float> %tmp1, %tmp
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add68, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp2 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add76 = fadd float undef, 0x4074C999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp3 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins77 = insertelement <4 x float> %tmp3, float %add76, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins77, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp4 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext78 = extractelement <4 x float> %tmp4, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add79 = fadd float %vecext78, 0x40776E6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp5 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins80 = insertelement <4 x float> %tmp5, float %add79, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins80, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x40678CCCC0000000, float 0xC03E4CCCC0000000, float -4.170000e+02, float -1.220000e+02>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp6 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add82 = fadd <4 x float> undef, %tmp6
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add82, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp7 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext83 = extractelement <4 x float> %tmp7, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add84 = fadd float %vecext83, 1.300000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp8 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins85 = insertelement <4 x float> %tmp8, float %add84, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins85, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp9 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext86 = extractelement <4 x float> %tmp9, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add93 = fadd float undef, 0xC076C66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp10 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins94 = insertelement <4 x float> %tmp10, float %add93, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x406C2999A0000000, float 8.050000e+01, float 0xC0794999A0000000, float 0xC073E4CCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp11 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp12 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add96 = fadd <4 x float> %tmp12, %tmp11
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp13 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext97 = extractelement <4 x float> %tmp13, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add98 = fadd float %vecext97, 0x4079E66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp14 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins102 = insertelement <4 x float> undef, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins102, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp15 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add104 = fadd float undef, 0x406AB999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp16 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC0531999A0000000, float 0xC0737999A0000000, float 0x407CB33340000000, float 0xC06DCCCCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext579 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add580 = fadd float %vecext579, 0xC07424CCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp17 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins581 = insertelement <4 x float> %tmp17, float %add580, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins581, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp18 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext582 = extractelement <4 x float> %tmp18, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add583 = fadd float %vecext582, 0x40444CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp19 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext590 = extractelement <4 x float> %tmp19, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add591 = fadd float %vecext590, 1.725000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins592 = insertelement <4 x float> undef, float %add591, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins592, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp20 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add594 = fadd float undef, 0xC05B466660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add605 = fadd float undef, 0x407164CCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp21 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add616 = fadd float undef, 1.885000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp22 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp23 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins620 = insertelement <4 x float> undef, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins620, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext621 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add622 = fadd float %vecext621, 0x40709B3340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins623 = insertelement <4 x float> undef, float %add622, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins623, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp24 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext624 = extractelement <4 x float> %tmp24, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add625 = fadd float %vecext624, 0xC064033340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp25 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins626 = insertelement <4 x float> %tmp25, float %add625, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins626, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x404D0CCCC0000000, float 3.955000e+02, float 0xC0334CCCC0000000, float 0x40754E6660000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp26 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp27 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add628 = fadd <4 x float> %tmp27, %tmp26
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add628, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp28 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext629 = extractelement <4 x float> %tmp28, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add630 = fadd float %vecext629, 0x40730CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp29 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins631 = insertelement <4 x float> %tmp29, float %add630, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins631, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp30 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext632 = extractelement <4 x float> %tmp30, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add633 = fadd float %vecext632, 0xC0630999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp31 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins634 = insertelement <4 x float> %tmp31, float %add633, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins634, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp32 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext635 = extractelement <4 x float> %tmp32, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add636 = fadd float %vecext635, 0xC078833340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp33 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp34 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp35 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add658 = fadd float undef, 0xC04A4CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext663 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp36 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins665 = insertelement <4 x float> %tmp36, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext694 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add695 = fadd float %vecext694, 0xC03CCCCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp37 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins696 = insertelement <4 x float> %tmp37, float %add695, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins696, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC069FCCCC0000000, float 0xC07C6E6660000000, float 0x4067E33340000000, float 0x4078DB3340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp38 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext699 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add703 = fadd float undef, 0x4068F33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins704 = insertelement <4 x float> undef, float %add703, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins704, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp39 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp40 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins710 = insertelement <4 x float> %tmp40, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins710, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC05D9999A0000000, float 0x405D6CCCC0000000, float 0x40765CCCC0000000, float 0xC07C64CCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp41 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp42 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add712 = fadd <4 x float> %tmp42, %tmp41
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add712, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp43 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext713 = extractelement <4 x float> %tmp43, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp44 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins715 = insertelement <4 x float> %tmp44, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp45 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext716 = extractelement <4 x float> %tmp45, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add717 = fadd float %vecext716, -4.315000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp46 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins718 = insertelement <4 x float> %tmp46, float %add717, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins718, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp47 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext719 = extractelement <4 x float> %tmp47, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add723 = fadd float undef, 0xC06A6CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins724 = insertelement <4 x float> undef, float %add723, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add726 = fadd <4 x float> undef, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext730 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add731 = fadd float %vecext730, 0xC0759CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp48 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins732 = insertelement <4 x float> %tmp48, float %add731, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins732, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp49 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext733 = extractelement <4 x float> %tmp49, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp50 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins738 = insertelement <4 x float> %tmp50, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x406E6CCCC0000000, float 0xC07A766660000000, float 0xC0608CCCC0000000, float 0xC063333340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp51 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add740 = fadd <4 x float> undef, %tmp51
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp52 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext741 = extractelement <4 x float> %tmp52, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add742 = fadd float %vecext741, 0xC07984CCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp53 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins743 = insertelement <4 x float> %tmp53, float %add742, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins743, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp54 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp55 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add754 = fadd <4 x float> %tmp55, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add754, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp56 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext755 = extractelement <4 x float> %tmp56, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add756 = fadd float %vecext755, 0xC070ACCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp57 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins757 = insertelement <4 x float> %tmp57, float %add756, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add765 = fadd float undef, 0x405BA66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp58 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins766 = insertelement <4 x float> %tmp58, float %add765, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp59 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext769 = extractelement <4 x float> %tmp59, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add770 = fadd float %vecext769, 0x40797199A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp60 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins771 = insertelement <4 x float> %tmp60, float %add770, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins771, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp61 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add776 = fadd float undef, 0xC055F33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins777 = insertelement <4 x float> undef, float %add776, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp62 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp63 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add782 = fadd <4 x float> %tmp63, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add782, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp64 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext783 = extractelement <4 x float> %tmp64, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add784 = fadd float %vecext783, -3.455000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC07A866660000000, float 0xC05CF999A0000000, float 0xC0757199A0000000, float -3.845000e+02>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add796 = fadd <4 x float> undef, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add796, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp65 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add801 = fadd float undef, 3.045000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp66 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins802 = insertelement <4 x float> %tmp66, float %add801, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins802, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext803 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp67 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp68 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add810 = fadd <4 x float> undef, %tmp68
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add810, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp69 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext811 = extractelement <4 x float> %tmp69, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp70 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins813 = insertelement <4 x float> %tmp70, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext817 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add818 = fadd float %vecext817, -4.830000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins822 = insertelement <4 x float> undef, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins822, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 2.700000e+01, float 0xC05F666660000000, float 0xC07D0199A0000000, float 0x407A6CCCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp71 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp72 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add824 = fadd <4 x float> %tmp72, %tmp71
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add838 = fadd <4 x float> undef, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add838, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp73 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext839 = extractelement <4 x float> %tmp73, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add849 = fadd float undef, 0xC07C266660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC07D566660000000, float 0xC06D233340000000, float 0x4068B33340000000, float 0xC07ADCCCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp74 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add852 = fadd <4 x float> %tmp74, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext856 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add857 = fadd float %vecext856, 0xC070666660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp75 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp76 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext859 = extractelement <4 x float> %tmp76, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add860 = fadd float %vecext859, 4.705000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp77 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins861 = insertelement <4 x float> %tmp77, float %add860, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins889 = insertelement <4 x float> undef, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins889, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp78 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext890 = extractelement <4 x float> %tmp78, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add891 = fadd float %vecext890, 0xC070633340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp79 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins892 = insertelement <4 x float> %tmp79, float %add891, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins892, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x4063D33340000000, float 0xC076433340000000, float 0x407C966660000000, float 0xC07B5199A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp80 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp81 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add894 = fadd <4 x float> %tmp81, %tmp80
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add894, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext895 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add896 = fadd float %vecext895, 0xC070F33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins897 = insertelement <4 x float> undef, float %add896, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp82 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext898 = extractelement <4 x float> %tmp82, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add899 = fadd float %vecext898, 0xC076F33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins900 = insertelement <4 x float> undef, float %add899, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp83 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext901 = extractelement <4 x float> %tmp83, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add902 = fadd float %vecext901, 0xC054ECCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp84 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins903 = insertelement <4 x float> %tmp84, float %add902, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins903, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext904 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add905 = fadd float %vecext904, 0x4056A66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp85 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins906 = insertelement <4 x float> %tmp85, float %add905, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC07EFCCCC0000000, float 1.795000e+02, float 0x407E3E6660000000, float 0x4070633340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp86 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp87 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add908 = fadd <4 x float> %tmp87, %tmp86
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add908, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp88 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp89 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp90 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext912 = extractelement <4 x float> %tmp90, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add913 = fadd float %vecext912, 2.575000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins914 = insertelement <4 x float> undef, float %add913, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp91 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext915 = extractelement <4 x float> %tmp91, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add916 = fadd float %vecext915, -3.115000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp92 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins917 = insertelement <4 x float> %tmp92, float %add916, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins917, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp93 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext918 = extractelement <4 x float> %tmp93, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add919 = fadd float %vecext918, 2.950000e+01
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp94 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins920 = insertelement <4 x float> %tmp94, float %add919, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins920, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp95 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins925 = insertelement <4 x float> %tmp95, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins925, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp96 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add927 = fadd float undef, 0xC0501999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp97 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins928 = insertelement <4 x float> %tmp97, float %add927, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext929 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add930 = fadd float %vecext929, 0xC07C8B3340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp98 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins931 = insertelement <4 x float> %tmp98, float %add930, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC047B33340000000, float 0x404ACCCCC0000000, float 0x40708E6660000000, float 0x4060F999A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp99 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp100 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext937 = extractelement <4 x float> %tmp100, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add941 = fadd float undef, -4.665000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins942 = insertelement <4 x float> undef, float %add941, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins942, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp101 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext943 = extractelement <4 x float> %tmp101, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add944 = fadd float %vecext943, 4.580000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp102 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins945 = insertelement <4 x float> %tmp102, float %add944, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins945, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp103 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add947 = fadd float undef, 0xC051933340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp104 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins948 = insertelement <4 x float> %tmp104, float %add947, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins948, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x4060CCCCC0000000, float 0xC07BAB3340000000, float 0xC061233340000000, float 0xC076C199A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp105 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add955 = fadd float undef, 0x4077F4CCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp106 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins956 = insertelement <4 x float> %tmp106, float %add955, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins956, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext971 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add972 = fadd float %vecext971, 0x4024333340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp107 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins973 = insertelement <4 x float> %tmp107, float %add972, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins973, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp108 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext974 = extractelement <4 x float> %tmp108, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins976 = insertelement <4 x float> undef, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins976, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x407E266660000000, float -1.225000e+02, float 0x407EB199A0000000, float 0x407BA199A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp109 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp110 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add978 = fadd <4 x float> %tmp110, %tmp109
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp111 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp112 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext982 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add983 = fadd float %vecext982, 0x407E1B3340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins984 = insertelement <4 x float> undef, float %add983, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins984, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp113 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext985 = extractelement <4 x float> %tmp113, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add986 = fadd float %vecext985, 0x406C8CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp114 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins987 = insertelement <4 x float> %tmp114, float %add986, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins987, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp115 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp116 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins995 = insertelement <4 x float> %tmp116, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins995, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp117 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add997 = fadd float undef, 0xC0798999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp118 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins998 = insertelement <4 x float> %tmp118, float %add997, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins998, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp119 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1013 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1014 = fadd float %vecext1013, 3.105000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp120 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp121 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1016 = extractelement <4 x float> %tmp121, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1017 = fadd float %vecext1016, 0x406A1999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp122 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1030 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1031 = fadd float %vecext1030, 2.010000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp123 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp124 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1085 = insertelement <4 x float> %tmp124, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp125 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1086 = extractelement <4 x float> %tmp125, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1087 = fadd float %vecext1086, -1.575000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp126 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1088 = insertelement <4 x float> %tmp126, float %add1087, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1088, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp127 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1090 = fadd <4 x float> undef, %tmp127
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp128 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1094 = extractelement <4 x float> %tmp128, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1095 = fadd float %vecext1094, 0x4072C999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp129 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1096 = insertelement <4 x float> %tmp129, float %add1095, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1096, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp130 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1097 = extractelement <4 x float> %tmp130, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1098 = fadd float %vecext1097, 0xC073E999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp131 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1099 = insertelement <4 x float> %tmp131, float %add1098, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1099, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp132 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1100 = extractelement <4 x float> %tmp132, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1101 = fadd float %vecext1100, 2.885000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp133 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1102 = insertelement <4 x float> %tmp133, float %add1101, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1102, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x4059866660000000, float 0x4072466660000000, float 0xC078FE6660000000, float 0xC058ACCCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp134 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1104 = fadd <4 x float> undef, %tmp134
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp135 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1105 = extractelement <4 x float> %tmp135, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1106 = fadd float %vecext1105, 0xC078A999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp136 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1107 = insertelement <4 x float> %tmp136, float %add1106, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1108 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp137 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1110 = insertelement <4 x float> %tmp137, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1110, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp138 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1111 = extractelement <4 x float> %tmp138, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1112 = fadd float %vecext1111, 0x407D566660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp139 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1113 = insertelement <4 x float> %tmp139, float %add1112, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1113, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1115 = fadd float undef, 0x4072B33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1116 = insertelement <4 x float> undef, float %add1115, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1116, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC0721999A0000000, float 0x4075633340000000, float 0x40794199A0000000, float 0x4061066660000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp140 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1118 = fadd <4 x float> %tmp140, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add1118, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp141 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1119 = extractelement <4 x float> %tmp141, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1120 = fadd float %vecext1119, 0xC065A66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1121 = insertelement <4 x float> undef, float %add1120, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1121, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp142 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1122 = extractelement <4 x float> %tmp142, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1123 = fadd float %vecext1122, 0x4072533340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp143 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1124 = insertelement <4 x float> %tmp143, float %add1123, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1125 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1127 = insertelement <4 x float> undef, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1127, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp144 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1128 = extractelement <4 x float> %tmp144, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1129 = fadd float %vecext1128, 0x405C866660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp145 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1130 = insertelement <4 x float> %tmp145, float %add1129, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC06D6CCCC0000000, float 0xC032E66660000000, float -1.005000e+02, float 0x40765B3340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp146 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp147 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1132 = fadd <4 x float> %tmp147, %tmp146
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp148 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1133 = extractelement <4 x float> %tmp148, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1134 = fadd float %vecext1133, 0xC07EB999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp149 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1135 = insertelement <4 x float> %tmp149, float %add1134, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1135, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp150 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1136 = extractelement <4 x float> %tmp150, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp151 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1138 = insertelement <4 x float> %tmp151, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1138, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp152 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1140 = fadd float undef, 0x407AE999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp153 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1141 = insertelement <4 x float> %tmp153, float %add1140, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1142 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1143 = fadd float %vecext1142, 0x407A24CCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp154 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1144 = insertelement <4 x float> %tmp154, float %add1143, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1144, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp155 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp156 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1146 = fadd <4 x float> %tmp156, %tmp155
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add1146, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp157 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1148 = fadd float undef, 4.145000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp158 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1158 = insertelement <4 x float> undef, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1158, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x40603999A0000000, float -9.150000e+01, float 0xC051E66660000000, float -4.825000e+02>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1218 = fadd float undef, 0xC078733340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1219 = insertelement <4 x float> undef, float %add1218, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC0655CCCC0000000, float -4.900000e+01, float -4.525000e+02, float 4.205000e+02>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp159 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1279 = extractelement <4 x float> %tmp159, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1280 = fadd float %vecext1279, 0xC062D999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp160 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1281 = insertelement <4 x float> %tmp160, float %add1280, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1281, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp161 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1282 = extractelement <4 x float> %tmp161, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1283 = fadd float %vecext1282, 4.365000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp162 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1284 = insertelement <4 x float> %tmp162, float %add1283, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1284, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp163 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp164 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1286 = fadd <4 x float> %tmp164, %tmp163
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add1286, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp165 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1288 = fadd float undef, 0xC0731199A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp166 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp167 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1444 = extractelement <4 x float> %tmp167, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1460 = insertelement <4 x float> undef, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1460, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp168 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1462 = fadd float undef, -1.670000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1463 = insertelement <4 x float> undef, float %add1462, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp169 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1464 = extractelement <4 x float> %tmp169, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1465 = fadd float %vecext1464, 0xC066333340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp170 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1466 = insertelement <4 x float> %tmp170, float %add1465, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1466, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 3.885000e+02, float 0x4054266660000000, float -9.500000e+01, float 8.500000e+01>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp171 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp172 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1468 = fadd <4 x float> %tmp172, %tmp171
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add1468, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp173 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1470 = fadd float undef, 0x4033B33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp174 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1471 = insertelement <4 x float> %tmp174, float %add1470, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1471, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp175 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1472 = extractelement <4 x float> %tmp175, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1473 = fadd float %vecext1472, 0xC05F666660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp176 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1474 = insertelement <4 x float> %tmp176, float %add1473, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp177 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1475 = extractelement <4 x float> %tmp177, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp178 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1477 = insertelement <4 x float> %tmp178, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1477, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp179 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1478 = extractelement <4 x float> %tmp179, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1479 = fadd float %vecext1478, 0x407E2E6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp180 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1480 = insertelement <4 x float> %tmp180, float %add1479, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1480, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC061B33340000000, float 3.290000e+02, float 0xC067766660000000, float 0x407DB33340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp181 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp182 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp183 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1483 = extractelement <4 x float> %tmp183, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1484 = fadd float %vecext1483, 0xC053D999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp184 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp185 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1486 = extractelement <4 x float> %tmp185, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1502 = insertelement <4 x float> undef, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1502, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1503 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1504 = fadd float %vecext1503, -2.475000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp186 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1505 = insertelement <4 x float> %tmp186, float %add1504, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1505, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp187 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1506 = extractelement <4 x float> %tmp187, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1507 = fadd float %vecext1506, 0x40715199A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp188 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1508 = insertelement <4 x float> %tmp188, float %add1507, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1508, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x40762B3340000000, float 0xC074566660000000, float 0xC07C74CCC0000000, float 0xC053F999A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp189 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp190 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1510 = fadd <4 x float> %tmp190, %tmp189
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add1510, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp191 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp192 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1654 = extractelement <4 x float> %tmp192, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1655 = fadd float %vecext1654, 0xC07D8CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp193 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1656 = insertelement <4 x float> %tmp193, float %add1655, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1656, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1658 = fadd float undef, 0x40709999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp194 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1660 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1661 = fadd float %vecext1660, 0xC06F166660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp195 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1662 = insertelement <4 x float> %tmp195, float %add1661, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1662, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC075266660000000, float 0xC072C4CCC0000000, float 0x407C4E6660000000, float -4.485000e+02>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1676 = insertelement <4 x float> undef, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp196 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1692 = fadd <4 x float> %tmp196, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add1692, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp197 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1693 = extractelement <4 x float> %tmp197, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1694 = fadd float %vecext1693, 0x407A1999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp198 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1695 = insertelement <4 x float> %tmp198, float %add1694, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1695, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp199 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1696 = extractelement <4 x float> %tmp199, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1697 = fadd float %vecext1696, 2.850000e+01
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp200 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1698 = insertelement <4 x float> %tmp200, float %add1697, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1698, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp201 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1699 = extractelement <4 x float> %tmp201, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp202 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1701 = insertelement <4 x float> %tmp202, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1701, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp203 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1704 = insertelement <4 x float> undef, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC075933340000000, float 0xC0489999A0000000, float 0xC078AB3340000000, float 0x406DFCCCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp204 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp205 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp206 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1709 = insertelement <4 x float> %tmp206, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1709, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp207 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1713 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1714 = fadd float %vecext1713, 0xC0703199A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1723 = insertelement <4 x float> undef, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp208 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1730 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1731 = fadd float %vecext1730, 4.130000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp209 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1732 = insertelement <4 x float> %tmp209, float %add1731, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1732, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x40551999A0000000, float 0xC0708999A0000000, float 0xC054F33340000000, float 0xC07C5999A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp210 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1734 = fadd <4 x float> undef, %tmp210
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp211 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1736 = fadd float undef, 0x407C3999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp212 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1737 = insertelement <4 x float> %tmp212, float %add1736, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp213 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1738 = extractelement <4 x float> %tmp213, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1739 = fadd float %vecext1738, 0xC0711E6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp214 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1740 = insertelement <4 x float> %tmp214, float %add1739, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1740, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp215 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1741 = extractelement <4 x float> %tmp215, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1742 = fadd float %vecext1741, -2.545000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp216 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1743 = insertelement <4 x float> %tmp216, float %add1742, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1743, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1744 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp217 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1746 = insertelement <4 x float> %tmp217, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC076466660000000, float 0x4060BCCCC0000000, float 0x405EF999A0000000, float 0x4074766660000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp218 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1748 = fadd <4 x float> undef, %tmp218
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add1748, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp219 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1750 = fadd float undef, 0x407C6B3340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1751 = insertelement <4 x float> undef, float %add1750, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp220 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1752 = extractelement <4 x float> %tmp220, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1753 = fadd float %vecext1752, 0x40730CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp221 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1754 = insertelement <4 x float> %tmp221, float %add1753, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp222 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1755 = extractelement <4 x float> %tmp222, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1756 = fadd float %vecext1755, 0xC059F33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp223 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1759 = fadd float undef, 0x40678999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp224 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1760 = insertelement <4 x float> %tmp224, float %add1759, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1760, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x405E333340000000, float 0x40571999A0000000, float 0xC02E333340000000, float 0x4053A66660000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp225 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1762 = fadd <4 x float> undef, %tmp225
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add1762, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp226 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1763 = extractelement <4 x float> %tmp226, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1764 = fadd float %vecext1763, 0xC0299999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp227 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1765 = insertelement <4 x float> %tmp227, float %add1764, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1765, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp228 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1766 = extractelement <4 x float> %tmp228, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1767 = fadd float %vecext1766, 0x407DDE6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp229 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1768 = insertelement <4 x float> %tmp229, float %add1767, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1768, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1769 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1770 = fadd float %vecext1769, 0x407A1B3340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp230 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1771 = insertelement <4 x float> %tmp230, float %add1770, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1771, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp231 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp232 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp233 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp234 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1779 = insertelement <4 x float> %tmp234, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1779, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp235 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp236 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1783 = extractelement <4 x float> %tmp236, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1784 = fadd float %vecext1783, 0x405E933340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1785 = insertelement <4 x float> undef, float %add1784, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1785, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC07074CCC0000000, float 0xC04D666660000000, float 3.235000e+02, float 0xC0724199A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp237 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1790 = fadd <4 x float> undef, %tmp237
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp238 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1791 = extractelement <4 x float> %tmp238, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1792 = fadd float %vecext1791, 0x4077DE6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp239 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1793 = insertelement <4 x float> %tmp239, float %add1792, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1793, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp240 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1795 = fadd float undef, 0x4055266660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp241 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1796 = insertelement <4 x float> %tmp241, float %add1795, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1799 = insertelement <4 x float> undef, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1800 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp242 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float -6.600000e+01, float 0xC07B2199A0000000, float 0x4011333340000000, float 0xC0635CCCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp243 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp244 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp245 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp246 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1865 = fadd float undef, -2.235000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp247 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1866 = insertelement <4 x float> %tmp247, float %add1865, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp248 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp249 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1872 = insertelement <4 x float> %tmp249, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x406B8999A0000000, float 0xC0696CCCC0000000, float 0xC07A34CCC0000000, float 0x407654CCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp250 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1874 = fadd <4 x float> %tmp250, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add1874, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1875 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp251 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1894 = insertelement <4 x float> %tmp251, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp252 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1895 = extractelement <4 x float> %tmp252, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1900 = insertelement <4 x float> undef, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1900, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1905 = insertelement <4 x float> undef, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1905, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp253 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1906 = extractelement <4 x float> %tmp253, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1907 = fadd float %vecext1906, 0xC07E5E6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1908 = insertelement <4 x float> undef, float %add1907, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1908, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1909 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp254 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1912 = extractelement <4 x float> %tmp254, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1913 = fadd float %vecext1912, 0xC063ECCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp255 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp256 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1916 = fadd <4 x float> %tmp256, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add1916, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1923 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp257 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1927 = fadd float undef, 0x40761999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp258 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1928 = insertelement <4 x float> %tmp258, float %add1927, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1928, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 7.100000e+01, float 0xC0634999A0000000, float 0x407B0B3340000000, float 0xC07DE999A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp259 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp260 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1930 = fadd <4 x float> %tmp260, %tmp259
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp261 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp262 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1933 = insertelement <4 x float> %tmp262, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1933, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp263 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1934 = extractelement <4 x float> %tmp263, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1935 = fadd float %vecext1934, 0xC07D3199A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp264 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1936 = insertelement <4 x float> %tmp264, float %add1935, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1940 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1942 = insertelement <4 x float> undef, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float -8.200000e+01, float 0xC04C733340000000, float 0xC077ACCCC0000000, float 0x4074566660000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp265 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp266 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp267 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1945 = extractelement <4 x float> %tmp267, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1946 = fadd float %vecext1945, 0xC074866660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1953 = insertelement <4 x float> undef, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1953, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp268 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp269 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp270 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1958 = fadd <4 x float> %tmp270, %tmp269
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp271 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1959 = extractelement <4 x float> %tmp271, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1960 = fadd float %vecext1959, 0x4065ACCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1962 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1963 = fadd float %vecext1962, 0xC07134CCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp272 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1964 = insertelement <4 x float> %tmp272, float %add1963, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1964, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1965 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp273 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1967 = insertelement <4 x float> %tmp273, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1967, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp274 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1968 = extractelement <4 x float> %tmp274, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1969 = fadd float %vecext1968, 7.100000e+01
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp275 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1970 = insertelement <4 x float> %tmp275, float %add1969, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1970, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x402E9999A0000000, float 0x407344CCC0000000, float -4.165000e+02, float 0x4078FCCCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp276 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp277 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp278 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1975 = insertelement <4 x float> %tmp278, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1975, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp279 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1976 = extractelement <4 x float> %tmp279, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1978 = insertelement <4 x float> undef, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1978, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1979 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1981 = insertelement <4 x float> undef, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1981, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1984 = insertelement <4 x float> undef, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1984, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC06A766660000000, float 0xC07CE4CCC0000000, float -1.055000e+02, float 0x40786E6660000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1990 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext1996 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add1997 = fadd float %vecext1996, -1.400000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp280 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins1998 = insertelement <4 x float> %tmp280, float %add1997, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins1998, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC0794E6660000000, float 0xC073CCCCC0000000, float 0x407994CCC0000000, float 6.500000e+01>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2004 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2005 = fadd float %vecext2004, -1.970000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp281 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2006 = insertelement <4 x float> %tmp281, float %add2005, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins2006, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp282 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2007 = extractelement <4 x float> %tmp282, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp283 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2009 = insertelement <4 x float> %tmp283, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp284 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2010 = extractelement <4 x float> %tmp284, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2011 = fadd float %vecext2010, 0xC074533340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp285 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2012 = insertelement <4 x float> %tmp285, float %add2011, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins2012, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC04E733340000000, float 0xC074566660000000, float 0x4079F66660000000, float 0xC0705B3340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp286 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp287 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp288 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2015 = extractelement <4 x float> %tmp288, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2016 = fadd float %vecext2015, 0xC060633340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp289 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2017 = insertelement <4 x float> %tmp289, float %add2016, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2022 = fadd float undef, 8.350000e+01
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp290 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2023 = insertelement <4 x float> %tmp290, float %add2022, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp291 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2024 = extractelement <4 x float> %tmp291, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp292 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2028 = fadd <4 x float> %tmp292, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add2028, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2029 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2030 = fadd float %vecext2029, -9.450000e+01
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp293 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp294 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2036 = fadd float undef, 0x407DE66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp295 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp296 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp297 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp298 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp299 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2045 = insertelement <4 x float> %tmp299, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins2045, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp300 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2046 = extractelement <4 x float> %tmp300, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2047 = fadd float %vecext2046, 0xC065433340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2052 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp301 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2054 = insertelement <4 x float> %tmp301, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins2054, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x4024666660000000, float 0x4079366660000000, float 0x40721B3340000000, float 0x406E533340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp302 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2056 = fadd <4 x float> undef, %tmp302
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add2056, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp303 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp304 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2062 = insertelement <4 x float> %tmp304, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins2062, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp305 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp306 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2066 = extractelement <4 x float> %tmp306, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2067 = fadd float %vecext2066, 0x40690999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2068 = insertelement <4 x float> undef, float %add2067, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins2068, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC07EFCCCC0000000, float -3.420000e+02, float 0xC07BC999A0000000, float 0x40751999A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp307 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp308 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2070 = fadd <4 x float> %tmp308, %tmp307
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add2070, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp309 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2071 = extractelement <4 x float> %tmp309, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2072 = fadd float %vecext2071, 0x4057733340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp310 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2073 = insertelement <4 x float> %tmp310, float %add2072, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins2073, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp311 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2074 = extractelement <4 x float> %tmp311, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp312 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2076 = insertelement <4 x float> %tmp312, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp313 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2077 = extractelement <4 x float> %tmp313, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2078 = fadd float %vecext2077, 0x4061F999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp314 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2079 = insertelement <4 x float> %tmp314, float %add2078, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins2079, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp315 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2080 = extractelement <4 x float> %tmp315, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2081 = fadd float %vecext2080, 0x407A1B3340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp316 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2082 = insertelement <4 x float> %tmp316, float %add2081, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins2082, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x40764E6660000000, float 0x40501999A0000000, float 0xC079A4CCC0000000, float 0x4050533340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp317 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp318 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp319 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2085 = extractelement <4 x float> %tmp319, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2086 = fadd float %vecext2085, 0x406E666660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2087 = insertelement <4 x float> undef, float %add2086, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins2087, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2480 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2481 = fadd float %vecext2480, 0x4039666660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2483 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2484 = fadd float %vecext2483, 0xC06A3999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp320 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2485 = insertelement <4 x float> %tmp320, float %add2484, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins2485, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp321 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2487 = fadd float undef, 2.030000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp322 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x4073DE6660000000, float 0x4067CCCCC0000000, float 0xC03F1999A0000000, float 4.350000e+01>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2491 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp323 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp324 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2494 = extractelement <4 x float> %tmp324, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2495 = fadd float %vecext2494, 0xC0743CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp325 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2499 = insertelement <4 x float> undef, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins2499, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2500 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2501 = fadd float %vecext2500, 0x40796E6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp326 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp327 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2508 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2518 = fadd <4 x float> undef, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp328 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2519 = extractelement <4 x float> %tmp328, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2520 = fadd float %vecext2519, 0xC0399999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp329 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2534 = fadd float undef, 0x4072C66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2536 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2537 = fadd float %vecext2536, 0x407D066660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp330 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2538 = insertelement <4 x float> %tmp330, float %add2537, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins2538, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2539 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2540 = fadd float %vecext2539, 0x406F9999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2580 = insertelement <4 x float> undef, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins2580, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp331 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2581 = extractelement <4 x float> %tmp331, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2582 = fadd float %vecext2581, 0x406BE66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2583 = insertelement <4 x float> undef, float %add2582, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins2583, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2584 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2585 = fadd float %vecext2584, 3.585000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp332 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x40773199A0000000, float 0x407D7999A0000000, float 0xC0717199A0000000, float 0xC07E9CCCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2590 = fadd float undef, 0x407B1999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp333 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp334 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2672 = fadd <4 x float> undef, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add2672, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp335 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2676 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2677 = fadd float %vecext2676, 0x406D6999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp336 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2678 = insertelement <4 x float> %tmp336, float %add2677, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins2678, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp337 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2679 = extractelement <4 x float> %tmp337, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2681 = insertelement <4 x float> undef, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins2681, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp338 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2682 = extractelement <4 x float> %tmp338, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2684 = insertelement <4 x float> undef, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp339 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp340 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp341 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2688 = fadd float undef, 0x4063266660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2692 = insertelement <4 x float> undef, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins2692, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp342 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2696 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2697 = fadd float %vecext2696, 4.140000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp343 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins2698 = insertelement <4 x float> %tmp343, float %add2697, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins2698, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x40547999A0000000, float 0xC060633340000000, float 0x4075766660000000, float 0x4072D33340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp344 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp345 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2700 = fadd <4 x float> %tmp345, %tmp344
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add2700, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp346 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp347 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp348 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext2704 = extractelement <4 x float> %tmp348, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add2705 = fadd float %vecext2704, 4.700000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp349 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3121 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3125 = fadd float undef, 0xC06F266660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3126 = insertelement <4 x float> undef, float %add3125, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3126, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp350 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3127 = extractelement <4 x float> %tmp350, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3128 = fadd float %vecext3127, 0x40638999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp351 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3129 = insertelement <4 x float> %tmp351, float %add3128, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3129, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp352 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3131 = fadd float undef, 3.215000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp353 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp354 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3134 = fadd <4 x float> %tmp354, %tmp353
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add3134, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp355 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3136 = fadd float undef, 0x4074333340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3140 = insertelement <4 x float> undef, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3140, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp356 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3141 = extractelement <4 x float> %tmp356, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3142 = fadd float %vecext3141, 2.425000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp357 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3143 = insertelement <4 x float> %tmp357, float %add3142, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3143, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp358 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3144 = extractelement <4 x float> %tmp358, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3145 = fadd float %vecext3144, -3.760000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp359 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3146 = insertelement <4 x float> %tmp359, float %add3145, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3146, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp360 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3272 = insertelement <4 x float> undef, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3272, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x407B4999A0000000, float 0x40695CCCC0000000, float 0xC05C0CCCC0000000, float 0x407EB33340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp361 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp362 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3274 = fadd <4 x float> %tmp362, %tmp361
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add3274, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp363 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3275 = extractelement <4 x float> %tmp363, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3276 = fadd float %vecext3275, 0x4058066660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp364 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3277 = insertelement <4 x float> %tmp364, float %add3276, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3277, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp365 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3278 = extractelement <4 x float> %tmp365, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3279 = fadd float %vecext3278, 0xC053666660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3280 = insertelement <4 x float> undef, float %add3279, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3280, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp366 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3281 = extractelement <4 x float> %tmp366, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3282 = fadd float %vecext3281, 0xC0650CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp367 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3283 = insertelement <4 x float> %tmp367, float %add3282, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3283, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp368 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3284 = extractelement <4 x float> %tmp368, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3285 = fadd float %vecext3284, 0x4062533340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3286 = insertelement <4 x float> undef, float %add3285, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp369 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp370 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3289 = extractelement <4 x float> %tmp370, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3290 = fadd float %vecext3289, 0xC07E133340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp371 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3291 = insertelement <4 x float> %tmp371, float %add3290, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3291, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3292 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp372 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp373 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3328 = insertelement <4 x float> %tmp373, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3330 = fadd <4 x float> undef, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add3330, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3331 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3332 = fadd float %vecext3331, 0x4061633340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp374 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3333 = insertelement <4 x float> %tmp374, float %add3332, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3333, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3334 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3335 = fadd float %vecext3334, 0x401B333340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3336 = insertelement <4 x float> undef, float %add3335, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp375 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3337 = extractelement <4 x float> %tmp375, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3338 = fadd float %vecext3337, 0x403C4CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp376 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3339 = insertelement <4 x float> %tmp376, float %add3338, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3339, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp377 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3340 = extractelement <4 x float> %tmp377, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp378 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3342 = insertelement <4 x float> %tmp378, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp379 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3344 = fadd <4 x float> %tmp379, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add3344, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp380 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3345 = extractelement <4 x float> %tmp380, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3346 = fadd float %vecext3345, 0x407E7E6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp381 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3347 = insertelement <4 x float> %tmp381, float %add3346, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3348 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3349 = fadd float %vecext3348, 0xC05F666660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp382 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3350 = insertelement <4 x float> %tmp382, float %add3349, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3350, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3352 = fadd float undef, 0xC06ACCCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp383 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3423 = insertelement <4 x float> undef, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3423, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3424 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3425 = fadd float %vecext3424, 0xC05DB33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp384 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3426 = insertelement <4 x float> %tmp384, float %add3425, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3426, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 2.795000e+02, float -4.065000e+02, float 0xC05CD999A0000000, float 1.825000e+02>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp385 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp386 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3428 = fadd <4 x float> %tmp386, %tmp385
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp387 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3429 = extractelement <4 x float> %tmp387, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3430 = fadd float %vecext3429, 0x40695CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp388 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3431 = insertelement <4 x float> %tmp388, float %add3430, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3431, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp389 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3432 = extractelement <4 x float> %tmp389, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3433 = fadd float %vecext3432, 0x4052A66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp390 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3434 = insertelement <4 x float> %tmp390, float %add3433, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3434, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3435 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp391 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3437 = insertelement <4 x float> %tmp391, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3437, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp392 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3438 = extractelement <4 x float> %tmp392, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3439 = fadd float %vecext3438, 0xC071D999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC0798199A0000000, float -3.385000e+02, float 0xC050066660000000, float 0xC075E999A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp393 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp394 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3442 = fadd <4 x float> %tmp394, %tmp393
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add3442, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3443 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3444 = fadd float %vecext3443, 0xC07CF999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp395 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3446 = extractelement <4 x float> %tmp395, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3447 = fadd float %vecext3446, 0xC06E4999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp396 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3448 = insertelement <4 x float> %tmp396, float %add3447, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3448, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp397 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3449 = extractelement <4 x float> %tmp397, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3450 = fadd float %vecext3449, 0x40779B3340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp398 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3451 = insertelement <4 x float> %tmp398, float %add3450, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3451, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3453 = fadd float undef, 0xC07ADCCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp399 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3454 = insertelement <4 x float> %tmp399, float %add3453, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3454, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp400 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3457 = extractelement <4 x float> %tmp400, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3458 = fadd float %vecext3457, -4.440000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3459 = insertelement <4 x float> undef, float %add3458, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3459, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp401 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3460 = extractelement <4 x float> %tmp401, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp402 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3462 = insertelement <4 x float> %tmp402, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3462, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp403 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3464 = fadd float undef, 0xC057B999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp404 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3465 = insertelement <4 x float> %tmp404, float %add3464, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3465, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp405 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3466 = extractelement <4 x float> %tmp405, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3467 = fadd float %vecext3466, 0xC07A9CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp406 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x405C3999A0000000, float 0xC07C6B3340000000, float 0x407ACB3340000000, float 0xC06E0999A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp407 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp408 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3477 = extractelement <4 x float> %tmp408, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3479 = insertelement <4 x float> undef, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3479, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3480 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3481 = fadd float %vecext3480, 0xC053F33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp409 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3482 = insertelement <4 x float> %tmp409, float %add3481, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3482, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 3.565000e+02, float 0xC0464CCCC0000000, float 0x4037666660000000, float 0xC0788CCCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp410 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3484 = fadd <4 x float> %tmp410, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add3484, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp411 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3486 = fadd float undef, -1.415000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3487 = insertelement <4 x float> undef, float %add3486, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3487, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp412 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3488 = extractelement <4 x float> %tmp412, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3489 = fadd float %vecext3488, 0x405A1999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp413 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3490 = insertelement <4 x float> %tmp413, float %add3489, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3490, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3492 = fadd float undef, 0x4078066660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp414 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3493 = insertelement <4 x float> %tmp414, float %add3492, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3493, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp415 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3495 = fadd float undef, 0xC0798999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp416 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3496 = insertelement <4 x float> %tmp416, float %add3495, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3496, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp417 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp418 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3498 = fadd <4 x float> %tmp418, %tmp417
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add3498, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3499 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3500 = fadd float %vecext3499, -1.605000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3501 = insertelement <4 x float> undef, float %add3500, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp419 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3502 = extractelement <4 x float> %tmp419, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3503 = fadd float %vecext3502, 0x4058C66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp420 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3506 = fadd float undef, 0xC074DB3340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp421 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins3507 = insertelement <4 x float> %tmp421, float %add3506, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins3507, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3509 = fadd float undef, 0xC066033340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp422 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x404B333340000000, float 4.680000e+02, float 0x40577999A0000000, float 0xC07D9999A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp423 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3513 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add3514 = fadd float %vecext3513, 2.300000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp424 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp425 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext3516 = extractelement <4 x float> %tmp425, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5414 = insertelement <4 x float> undef, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5414, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp426 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp427 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5416 = fadd <4 x float> %tmp427, %tmp426
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add5416, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp428 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5418 = fadd float undef, 0xC07ED999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp429 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5419 = insertelement <4 x float> %tmp429, float %add5418, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5624 = insertelement <4 x float> undef, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5624, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC07B4999A0000000, float 0x4078B33340000000, float 0xC07674CCC0000000, float 0xC07C533340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5626 = fadd <4 x float> undef, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add5626, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5627 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp430 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5629 = insertelement <4 x float> %tmp430, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5629, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp431 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5630 = extractelement <4 x float> %tmp431, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5631 = fadd float %vecext5630, 0x405EECCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5632 = insertelement <4 x float> undef, float %add5631, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5632, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp432 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5688 = insertelement <4 x float> %tmp432, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5688, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp433 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5689 = extractelement <4 x float> %tmp433, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp434 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5691 = insertelement <4 x float> %tmp434, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5691, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5692 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float -4.350000e+02, float 0xC0775CCCC0000000, float 0xC0714999A0000000, float 0xC0661999A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp435 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5696 = fadd <4 x float> undef, %tmp435
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add5696, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5701 = fadd float undef, 0x4077D4CCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp436 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5702 = insertelement <4 x float> %tmp436, float %add5701, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5702, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp437 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp438 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5705 = insertelement <4 x float> %tmp438, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5705, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp439 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5706 = extractelement <4 x float> %tmp439, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5707 = fadd float %vecext5706, 0xC0780B3340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp440 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5708 = insertelement <4 x float> %tmp440, float %add5707, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5708, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x405D666660000000, float 0xC069333340000000, float 0x407B6B3340000000, float 0xC06EB33340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp441 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp442 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5710 = fadd <4 x float> %tmp442, %tmp441
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add5710, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp443 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5711 = extractelement <4 x float> %tmp443, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5712 = fadd float %vecext5711, 1.850000e+01
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp444 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5713 = insertelement <4 x float> %tmp444, float %add5712, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5713, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp445 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp446 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5716 = insertelement <4 x float> %tmp446, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp447 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5724 = fadd <4 x float> %tmp447, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add5724, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp448 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5748 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp449 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5750 = insertelement <4 x float> %tmp449, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x40692999A0000000, float 0xC07C4CCCC0000000, float 0x407D1E6660000000, float 0x407B4199A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp450 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5752 = fadd <4 x float> undef, %tmp450
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5754 = fadd float undef, 0xC064033340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp451 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5755 = insertelement <4 x float> %tmp451, float %add5754, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5755, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp452 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5756 = extractelement <4 x float> %tmp452, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5757 = fadd float %vecext5756, 0x40787B3340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp453 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5758 = insertelement <4 x float> %tmp453, float %add5757, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5758, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp454 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5759 = extractelement <4 x float> %tmp454, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp455 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5761 = insertelement <4 x float> %tmp455, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5761, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp456 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5762 = extractelement <4 x float> %tmp456, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5763 = fadd float %vecext5762, 0x40703E6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp457 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5764 = insertelement <4 x float> %tmp457, float %add5763, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5764, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x407A6B3340000000, float 0x40470CCCC0000000, float 0xC076F4CCC0000000, float 0x40791999A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5766 = fadd <4 x float> undef, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add5766, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp458 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5767 = extractelement <4 x float> %tmp458, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5768 = fadd float %vecext5767, 0x4065533340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp459 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5769 = insertelement <4 x float> %tmp459, float %add5768, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5769, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5771 = fadd float undef, 8.000000e+00
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp460 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5772 = insertelement <4 x float> %tmp460, float %add5771, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp461 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5796 = fadd float undef, 0x4058ECCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5797 = insertelement <4 x float> undef, float %add5796, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5797, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp462 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5798 = extractelement <4 x float> %tmp462, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp463 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5800 = insertelement <4 x float> %tmp463, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp464 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5801 = extractelement <4 x float> %tmp464, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5802 = fadd float %vecext5801, 0xC072A199A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp465 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5803 = insertelement <4 x float> %tmp465, float %add5802, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5803, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp466 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5804 = extractelement <4 x float> %tmp466, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5805 = fadd float %vecext5804, 0x40785999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp467 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5806 = insertelement <4 x float> %tmp467, float %add5805, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5806, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp468 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp469 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5809 = extractelement <4 x float> %tmp469, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5810 = fadd float %vecext5809, 0x407B7B3340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp470 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp471 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5818 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5819 = fadd float %vecext5818, 0x4071733340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp472 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5820 = insertelement <4 x float> %tmp472, float %add5819, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5820, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x40514CCCC0000000, float 0x406A7999A0000000, float 0xC078766660000000, float 0xC0522CCCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp473 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp474 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5822 = fadd <4 x float> %tmp474, %tmp473
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add5822, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp475 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5823 = extractelement <4 x float> %tmp475, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp476 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5825 = insertelement <4 x float> %tmp476, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp477 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5826 = extractelement <4 x float> %tmp477, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5827 = fadd float %vecext5826, 0x407F14CCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp478 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5828 = insertelement <4 x float> %tmp478, float %add5827, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5828, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp479 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5829 = extractelement <4 x float> %tmp479, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5830 = fadd float %vecext5829, 3.350000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp480 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5831 = insertelement <4 x float> %tmp480, float %add5830, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float -3.370000e+02, float 0xC072DE6660000000, float -2.670000e+02, float 0x4062333340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp481 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5837 = extractelement <4 x float> %tmp481, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5839 = insertelement <4 x float> undef, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5839, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp482 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5840 = extractelement <4 x float> %tmp482, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp483 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5842 = insertelement <4 x float> %tmp483, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5842, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp484 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp485 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5845 = insertelement <4 x float> %tmp485, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5845, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC06EC999A0000000, float 0x406D5999A0000000, float 0x4056F33340000000, float 0xC07E14CCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5850 = fadd <4 x float> undef, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add5850, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp486 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5852 = fadd float undef, 2.985000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp487 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5853 = insertelement <4 x float> %tmp487, float %add5852, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5853, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp488 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5854 = extractelement <4 x float> %tmp488, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5855 = fadd float %vecext5854, 0xC053F999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp489 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5856 = insertelement <4 x float> %tmp489, float %add5855, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5856, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp490 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5858 = fadd float undef, 0x4071666660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp491 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5859 = insertelement <4 x float> %tmp491, float %add5858, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5859, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp492 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5860 = extractelement <4 x float> %tmp492, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp493 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5899 = extractelement <4 x float> %tmp493, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5900 = fadd float %vecext5899, -2.700000e+01
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp494 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5901 = insertelement <4 x float> %tmp494, float %add5900, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5901, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5914 = fadd float undef, 0x40786E6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5918 = insertelement <4 x float> undef, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5918, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x406F266660000000, float 7.900000e+01, float -4.695000e+02, float -4.880000e+02>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5920 = fadd <4 x float> undef, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add5920, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5934 = fadd <4 x float> undef, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5935 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5936 = fadd float %vecext5935, 0xC056B999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp495 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp496 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5994 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add5995 = fadd float %vecext5994, 0x4051666660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins5996 = insertelement <4 x float> undef, float %add5995, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins5996, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp497 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext5997 = extractelement <4 x float> %tmp497, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp498 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6000 = extractelement <4 x float> %tmp498, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6001 = fadd float %vecext6000, -7.600000e+01
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp499 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6002 = insertelement <4 x float> %tmp499, float %add6001, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6002, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC07EA199A0000000, float 0x407DC33340000000, float 0xC0753199A0000000, float -3.895000e+02>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp500 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6004 = fadd <4 x float> undef, %tmp500
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6004, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp501 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6005 = extractelement <4 x float> %tmp501, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp502 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6007 = insertelement <4 x float> %tmp502, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp503 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6008 = extractelement <4 x float> %tmp503, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp504 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6024 = insertelement <4 x float> %tmp504, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6024, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp505 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6025 = extractelement <4 x float> %tmp505, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6026 = fadd float %vecext6025, 3.700000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp506 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6027 = insertelement <4 x float> %tmp506, float %add6026, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6027, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6028 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6029 = fadd float %vecext6028, 0x4071666660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp507 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6030 = insertelement <4 x float> %tmp507, float %add6029, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6030, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC0527999A0000000, float 0xC06AD999A0000000, float 0x3FF6666660000000, float 0xC03F666660000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp508 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp509 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp510 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6033 = extractelement <4 x float> %tmp510, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp511 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6036 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6037 = fadd float %vecext6036, 0xC075CB3340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6038 = insertelement <4 x float> undef, float %add6037, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6038, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp512 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6040 = fadd float undef, 0x4071ECCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp513 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6041 = insertelement <4 x float> %tmp513, float %add6040, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6041, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp514 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6042 = extractelement <4 x float> %tmp514, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6043 = fadd float %vecext6042, 0xC07DD33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp515 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6044 = insertelement <4 x float> %tmp515, float %add6043, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6044, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC065FCCCC0000000, float 0x40767CCCC0000000, float 0x4079D4CCC0000000, float 0xC07314CCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp516 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp517 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6046 = fadd <4 x float> %tmp517, %tmp516
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6046, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6047 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp518 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6049 = insertelement <4 x float> %tmp518, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6049, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp519 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6050 = extractelement <4 x float> %tmp519, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6051 = fadd float %vecext6050, 0x407E4E6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6055 = insertelement <4 x float> undef, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6056 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp520 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6061 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp521 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp522 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6067 = extractelement <4 x float> %tmp522, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6068 = fadd float %vecext6067, 0x40768E6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6070 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6071 = fadd float %vecext6070, 0xC07C6CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6072 = insertelement <4 x float> undef, float %add6071, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6072, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x40546CCCC0000000, float 0x4067D66660000000, float 0xC060E33340000000, float 0x4061533340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp523 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp524 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6074 = fadd <4 x float> %tmp524, %tmp523
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6074, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp525 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6075 = extractelement <4 x float> %tmp525, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6076 = fadd float %vecext6075, 0x405D733340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp526 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6077 = insertelement <4 x float> %tmp526, float %add6076, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6077, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp527 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6079 = fadd float undef, 0xC07E9B3340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp528 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp529 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6082 = fadd float undef, 0x407DCE6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6083 = insertelement <4 x float> undef, float %add6082, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6083, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp530 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6084 = extractelement <4 x float> %tmp530, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6085 = fadd float %vecext6084, 0xC061A33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6086 = insertelement <4 x float> undef, float %add6085, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6086, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x4055C66660000000, float 0x40735199A0000000, float 0xC0713199A0000000, float 0x40729B3340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp531 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp532 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6088 = fadd <4 x float> %tmp532, %tmp531
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6088, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp533 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6089 = extractelement <4 x float> %tmp533, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6107 = fadd float undef, 0xC06A166660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp534 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6108 = insertelement <4 x float> %tmp534, float %add6107, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6108, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp535 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6109 = extractelement <4 x float> %tmp535, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6110 = fadd float %vecext6109, 0x4070FB3340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp536 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp537 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6112 = extractelement <4 x float> %tmp537, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6113 = fadd float %vecext6112, 0xC04AF33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp538 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp539 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6117 = extractelement <4 x float> %tmp539, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6118 = fadd float %vecext6117, 0x407AB33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp540 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6119 = insertelement <4 x float> %tmp540, float %add6118, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6119, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp541 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6120 = extractelement <4 x float> %tmp541, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6121 = fadd float %vecext6120, 0x405AE66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp542 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6122 = insertelement <4 x float> %tmp542, float %add6121, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6122, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6123 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6124 = fadd float %vecext6123, -4.385000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp543 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6126 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp544 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6128 = insertelement <4 x float> %tmp544, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6128, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float -2.980000e+02, float 0xC06F0CCCC0000000, float 0xC054A66660000000, float 0xC040CCCCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp545 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp546 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6130 = fadd <4 x float> %tmp546, %tmp545
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp547 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6131 = extractelement <4 x float> %tmp547, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6132 = fadd float %vecext6131, 0x407BDE6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6133 = insertelement <4 x float> undef, float %add6132, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6133, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6134 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6135 = fadd float %vecext6134, 0xC06B7999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp548 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6136 = insertelement <4 x float> %tmp548, float %add6135, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6137 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6138 = fadd float %vecext6137, 0x40752199A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp549 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6172 = fadd <4 x float> undef, %tmp549
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp550 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp551 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6178 = insertelement <4 x float> %tmp551, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6178, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp552 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6179 = extractelement <4 x float> %tmp552, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6180 = fadd float %vecext6179, -3.905000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp553 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6181 = insertelement <4 x float> %tmp553, float %add6180, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp554 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6182 = extractelement <4 x float> %tmp554, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6183 = fadd float %vecext6182, 1.515000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp555 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6184 = insertelement <4 x float> %tmp555, float %add6183, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6184, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp556 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6189 = insertelement <4 x float> undef, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6189, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp557 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6190 = extractelement <4 x float> %tmp557, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6191 = fadd float %vecext6190, 0xC07BD33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp558 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6192 = insertelement <4 x float> %tmp558, float %add6191, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6192, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp559 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp560 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6196 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6197 = fadd float %vecext6196, -4.070000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp561 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6198 = insertelement <4 x float> %tmp561, float %add6197, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x407904CCC0000000, float 0x406A833340000000, float 4.895000e+02, float 0x40648999A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp562 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp563 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6200 = fadd <4 x float> %tmp563, %tmp562
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6200, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp564 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6201 = extractelement <4 x float> %tmp564, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp565 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6203 = insertelement <4 x float> %tmp565, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp566 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6204 = extractelement <4 x float> %tmp566, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6205 = fadd float %vecext6204, 1.740000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp567 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6206 = insertelement <4 x float> %tmp567, float %add6205, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp568 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6209 = insertelement <4 x float> %tmp568, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6209, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp569 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6210 = extractelement <4 x float> %tmp569, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp570 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6219 = fadd float undef, 0xC0596CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp571 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6220 = insertelement <4 x float> %tmp571, float %add6219, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6224 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6225 = fadd float %vecext6224, 0xC074533340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp572 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6228 = fadd <4 x float> undef, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6228, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6229 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6230 = fadd float %vecext6229, 1.695000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp573 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6231 = insertelement <4 x float> %tmp573, float %add6230, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6231, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp574 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6232 = extractelement <4 x float> %tmp574, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6233 = fadd float %vecext6232, 0x4079C33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp575 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6234 = insertelement <4 x float> %tmp575, float %add6233, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6234, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6235 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6236 = fadd float %vecext6235, 0xC07D8199A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6237 = insertelement <4 x float> undef, float %add6236, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6237, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp576 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6245 = insertelement <4 x float> undef, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6245, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp577 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6246 = extractelement <4 x float> %tmp577, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6247 = fadd float %vecext6246, 0x40631999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp578 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6251 = insertelement <4 x float> undef, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp579 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6253 = fadd float undef, 0xC0692999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6254 = insertelement <4 x float> undef, float %add6253, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6254, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 4.600000e+02, float 0xC0777B3340000000, float 0x40351999A0000000, float 0xC06E433340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp580 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp581 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6256 = fadd <4 x float> %tmp581, %tmp580
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6256, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp582 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6257 = extractelement <4 x float> %tmp582, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6258 = fadd float %vecext6257, 4.670000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp583 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6259 = insertelement <4 x float> %tmp583, float %add6258, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6259, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp584 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6260 = extractelement <4 x float> %tmp584, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6261 = fadd float %vecext6260, 0xC05F733340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp585 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6262 = insertelement <4 x float> %tmp585, float %add6261, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6262, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp586 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6263 = extractelement <4 x float> %tmp586, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp587 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6265 = insertelement <4 x float> %tmp587, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6265, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp588 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6266 = extractelement <4 x float> %tmp588, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6267 = fadd float %vecext6266, 0x407174CCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp589 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6268 = insertelement <4 x float> %tmp589, float %add6267, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6268, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float -3.130000e+02, float 0xC079733340000000, float -4.660000e+02, float 0xC064E66660000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp590 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp591 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6270 = fadd <4 x float> %tmp591, %tmp590
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6270, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp592 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6271 = extractelement <4 x float> %tmp592, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6272 = fadd float %vecext6271, 1.765000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp593 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6273 = insertelement <4 x float> %tmp593, float %add6272, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6273, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp594 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6274 = extractelement <4 x float> %tmp594, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6275 = fadd float %vecext6274, 0x402C666660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp595 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6276 = insertelement <4 x float> %tmp595, float %add6275, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6276, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp596 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6277 = extractelement <4 x float> %tmp596, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6278 = fadd float %vecext6277, -8.450000e+01
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp597 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6279 = insertelement <4 x float> %tmp597, float %add6278, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6279, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp598 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6280 = extractelement <4 x float> %tmp598, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6281 = fadd float %vecext6280, 0xC07A133340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6282 = insertelement <4 x float> undef, float %add6281, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6282, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x4067ECCCC0000000, float 0xC040CCCCC0000000, float 0xC0762E6660000000, float -4.750000e+02>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6284 = fadd <4 x float> undef, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6285 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6289 = fadd float undef, 0xC0738999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp599 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6293 = insertelement <4 x float> %tmp599, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6293, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp600 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6294 = extractelement <4 x float> %tmp600, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6295 = fadd float %vecext6294, 0xC01CCCCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6296 = insertelement <4 x float> undef, float %add6295, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6296, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x40704199A0000000, float 0x40753CCCC0000000, float 0xC07E2199A0000000, float 0xC068833340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp601 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6298 = fadd <4 x float> undef, %tmp601
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6298, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp602 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6299 = extractelement <4 x float> %tmp602, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6300 = fadd float %vecext6299, 0x4074B33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp603 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6301 = insertelement <4 x float> %tmp603, float %add6300, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6301, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp604 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6302 = extractelement <4 x float> %tmp604, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6303 = fadd float %vecext6302, 0xC05B333340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp605 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6304 = insertelement <4 x float> %tmp605, float %add6303, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6304, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp606 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6305 = extractelement <4 x float> %tmp606, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6306 = fadd float %vecext6305, 0x4077E999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6307 = insertelement <4 x float> undef, float %add6306, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6307, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp607 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6308 = extractelement <4 x float> %tmp607, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6309 = fadd float %vecext6308, 0x40707E6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp608 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6310 = insertelement <4 x float> %tmp608, float %add6309, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6310, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x407A233340000000, float 0x406DA33340000000, float 3.725000e+02, float 0x40761199A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp609 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp610 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6312 = fadd <4 x float> %tmp610, %tmp609
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6312, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp611 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6313 = extractelement <4 x float> %tmp611, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6314 = fadd float %vecext6313, 0xC07CF33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp612 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6315 = insertelement <4 x float> %tmp612, float %add6314, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp613 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6655 = extractelement <4 x float> %tmp613, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6656 = fadd float %vecext6655, 2.185000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp614 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6657 = insertelement <4 x float> %tmp614, float %add6656, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6657, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6660 = insertelement <4 x float> undef, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6660, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC064E33340000000, float 0xC064833340000000, float 0xC0673CCCC0000000, float 0xC074266660000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp615 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6663 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6664 = fadd float %vecext6663, 0xC05B7999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp616 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6665 = insertelement <4 x float> %tmp616, float %add6664, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp617 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6669 = extractelement <4 x float> %tmp617, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp618 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC07CC4CCC0000000, float 0x404EE66660000000, float 0xC0754CCCC0000000, float 0xC0744B3340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp619 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6676 = fadd <4 x float> %tmp619, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6676, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp620 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6677 = extractelement <4 x float> %tmp620, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6678 = fadd float %vecext6677, 0x4077F4CCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp621 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6679 = insertelement <4 x float> %tmp621, float %add6678, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6680 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6681 = fadd float %vecext6680, 0x4061766660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp622 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp623 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6683 = extractelement <4 x float> %tmp623, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6684 = fadd float %vecext6683, 0x40718999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp624 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6685 = insertelement <4 x float> %tmp624, float %add6684, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6685, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp625 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6686 = extractelement <4 x float> %tmp625, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6687 = fadd float %vecext6686, 0x4076D66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp626 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6688 = insertelement <4 x float> %tmp626, float %add6687, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6688, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 7.500000e+00, float 0x4077E33340000000, float 0xC0596CCCC0000000, float 0xC07D4E6660000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp627 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6690 = fadd <4 x float> undef, %tmp627
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6690, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp628 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6691 = extractelement <4 x float> %tmp628, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6692 = fadd float %vecext6691, 3.250000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp629 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6693 = insertelement <4 x float> %tmp629, float %add6692, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6693, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp630 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6694 = extractelement <4 x float> %tmp630, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6695 = fadd float %vecext6694, 0x407DF999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp631 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6696 = insertelement <4 x float> %tmp631, float %add6695, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6696, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp632 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6697 = extractelement <4 x float> %tmp632, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6698 = fadd float %vecext6697, 0xC075FE6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp633 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6699 = insertelement <4 x float> %tmp633, float %add6698, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6699, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp634 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6700 = extractelement <4 x float> %tmp634, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6701 = fadd float %vecext6700, 0xC07BCE6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp635 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6702 = insertelement <4 x float> %tmp635, float %add6701, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6702, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x40772CCCC0000000, float 0xC0625CCCC0000000, float 6.200000e+01, float 0xC06ADCCCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp636 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp637 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6707 = insertelement <4 x float> undef, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6707, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp638 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6708 = extractelement <4 x float> %tmp638, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp639 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp640 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6714 = extractelement <4 x float> %tmp640, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6715 = fadd float %vecext6714, 0xC0537999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp641 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6716 = insertelement <4 x float> %tmp641, float %add6715, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6719 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6720 = fadd float %vecext6719, 2.870000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp642 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6721 = insertelement <4 x float> %tmp642, float %add6720, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp643 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6722 = extractelement <4 x float> %tmp643, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6723 = fadd float %vecext6722, 0xC07704CCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp644 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6724 = insertelement <4 x float> %tmp644, float %add6723, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp645 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6726 = fadd float undef, 0x4059B999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp646 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6727 = insertelement <4 x float> %tmp646, float %add6726, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6727, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6728 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6729 = fadd float %vecext6728, 0xC073466660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC0309999A0000000, float -2.715000e+02, float 1.620000e+02, float 0x40674CCCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp647 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp648 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6732 = fadd <4 x float> %tmp648, %tmp647
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6732, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp649 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6733 = extractelement <4 x float> %tmp649, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6734 = fadd float %vecext6733, 0x4040733340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp650 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6735 = insertelement <4 x float> %tmp650, float %add6734, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6735, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp651 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6736 = extractelement <4 x float> %tmp651, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6737 = fadd float %vecext6736, 0xC07B74CCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp652 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6738 = insertelement <4 x float> %tmp652, float %add6737, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6738, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp653 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6739 = extractelement <4 x float> %tmp653, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6740 = fadd float %vecext6739, 0x40699CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp654 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6741 = insertelement <4 x float> %tmp654, float %add6740, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6741, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp655 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6742 = extractelement <4 x float> %tmp655, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6743 = fadd float %vecext6742, 0x4078533340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp656 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6744 = insertelement <4 x float> %tmp656, float %add6743, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6744, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp657 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp658 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6746 = fadd <4 x float> %tmp658, %tmp657
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6746, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp659 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6749 = insertelement <4 x float> undef, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6749, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp660 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6751 = fadd float undef, 0x4075DE6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6752 = insertelement <4 x float> undef, float %add6751, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6752, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp661 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6753 = extractelement <4 x float> %tmp661, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6754 = fadd float %vecext6753, 0xC008CCCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6755 = insertelement <4 x float> undef, float %add6754, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6755, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp662 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6756 = extractelement <4 x float> %tmp662, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6757 = fadd float %vecext6756, 0x406CA999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp663 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6758 = insertelement <4 x float> %tmp663, float %add6757, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6758, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x403D1999A0000000, float 0xC05F533340000000, float 3.945000e+02, float 3.950000e+01>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp664 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6760 = fadd <4 x float> undef, %tmp664
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6760, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp665 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6761 = extractelement <4 x float> %tmp665, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6762 = fadd float %vecext6761, 2.860000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6763 = insertelement <4 x float> undef, float %add6762, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp666 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC079BE6660000000, float 4.930000e+02, float 0x406CC33340000000, float 0xC062E999A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp667 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6774 = fadd <4 x float> undef, %tmp667
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp668 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6775 = extractelement <4 x float> %tmp668, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6776 = fadd float %vecext6775, 0x407B8199A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp669 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6777 = insertelement <4 x float> %tmp669, float %add6776, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6777, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp670 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6778 = extractelement <4 x float> %tmp670, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6779 = fadd float %vecext6778, 0x401C666660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp671 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6784 = extractelement <4 x float> %tmp671, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6875 = insertelement <4 x float> undef, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6875, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp672 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6876 = extractelement <4 x float> %tmp672, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6877 = fadd float %vecext6876, 0x4073A66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6878 = insertelement <4 x float> undef, float %add6877, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6878, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6888 = fadd float undef, 0x4057CCCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp673 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6889 = insertelement <4 x float> %tmp673, float %add6888, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6889, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp674 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6890 = extractelement <4 x float> %tmp674, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6891 = fadd float %vecext6890, -4.430000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp675 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6892 = insertelement <4 x float> %tmp675, float %add6891, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6892, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp676 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6893 = extractelement <4 x float> %tmp676, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6894 = fadd float %vecext6893, -3.280000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp677 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6895 = insertelement <4 x float> %tmp677, float %add6894, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6895, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp678 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp679 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp680 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6900 = fadd <4 x float> %tmp680, %tmp679
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6900, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp681 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6901 = extractelement <4 x float> %tmp681, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6902 = fadd float %vecext6901, 0x4079DCCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp682 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6903 = insertelement <4 x float> %tmp682, float %add6902, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6903, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6905 = fadd float undef, 0x4031B33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp683 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6906 = insertelement <4 x float> %tmp683, float %add6905, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp684 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6912 = insertelement <4 x float> %tmp684, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 3.315000e+02, float 0xC066C999A0000000, float 0xC061F33340000000, float 0x4071166660000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp685 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp686 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6914 = fadd <4 x float> %tmp686, %tmp685
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6914, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6915 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6920 = insertelement <4 x float> undef, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6920, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6921 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6922 = fadd float %vecext6921, 0xC064066660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp687 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6926 = insertelement <4 x float> %tmp687, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6926, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC03C4CCCC0000000, float 0xC07E5199A0000000, float -8.250000e+01, float 0xC043B33340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp688 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp689 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6928 = fadd <4 x float> %tmp689, %tmp688
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6928, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6930 = fadd float undef, -4.590000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6931 = insertelement <4 x float> undef, float %add6930, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6931, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp690 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6932 = extractelement <4 x float> %tmp690, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6933 = fadd float %vecext6932, 0xC063F999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp691 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp692 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6935 = extractelement <4 x float> %tmp692, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6936 = fadd float %vecext6935, -3.335000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp693 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6937 = insertelement <4 x float> %tmp693, float %add6936, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp694 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6938 = extractelement <4 x float> %tmp694, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6939 = fadd float %vecext6938, 0x405F3999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6942 = fadd <4 x float> undef, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6943 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6944 = fadd float %vecext6943, 0x40530CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp695 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6950 = fadd float undef, 0xC078F33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp696 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6951 = insertelement <4 x float> %tmp696, float %add6950, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6951, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp697 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6952 = extractelement <4 x float> %tmp697, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6953 = fadd float %vecext6952, 0xC06E5999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp698 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6954 = insertelement <4 x float> %tmp698, float %add6953, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6954, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp699 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp700 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6956 = fadd <4 x float> %tmp700, %tmp699
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6956, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp701 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6957 = extractelement <4 x float> %tmp701, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6958 = fadd float %vecext6957, 0xC077633340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp702 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6959 = insertelement <4 x float> %tmp702, float %add6958, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6959, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp703 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6963 = extractelement <4 x float> %tmp703, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6964 = fadd float %vecext6963, 0x4068666660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp704 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6965 = insertelement <4 x float> %tmp704, float %add6964, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6965, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6975 = fadd float undef, 0x406AF33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp705 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6976 = insertelement <4 x float> %tmp705, float %add6975, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6976, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp706 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp707 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6984 = fadd <4 x float> %tmp707, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6984, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp708 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6985 = extractelement <4 x float> %tmp708, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6986 = fadd float %vecext6985, 0xC05E266660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp709 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6987 = insertelement <4 x float> %tmp709, float %add6986, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6987, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp710 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6988 = extractelement <4 x float> %tmp710, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6989 = fadd float %vecext6988, 0x40706E6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp711 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins6996 = insertelement <4 x float> %tmp711, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins6996, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x4077A4CCC0000000, float 0xC0757199A0000000, float 0xC072F4CCC0000000, float 0xC071DCCCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp712 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp713 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add6998 = fadd <4 x float> %tmp713, %tmp712
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add6998, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp714 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext6999 = extractelement <4 x float> %tmp714, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7000 = fadd float %vecext6999, 0x4076233340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp715 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7001 = insertelement <4 x float> %tmp715, float %add7000, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7001, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp716 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7002 = extractelement <4 x float> %tmp716, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7003 = fadd float %vecext7002, 0x403BCCCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp717 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7004 = insertelement <4 x float> %tmp717, float %add7003, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp718 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7140 = fadd float undef, 0x403D333340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7141 = insertelement <4 x float> undef, float %add7140, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7142 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7143 = fadd float %vecext7142, 0xC058F999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7144 = insertelement <4 x float> undef, float %add7143, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp719 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7148 = extractelement <4 x float> %tmp719, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7149 = fadd float %vecext7148, 0x4075333340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp720 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7150 = insertelement <4 x float> %tmp720, float %add7149, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7150, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 1.700000e+02, float 0xC077B4CCC0000000, float 0x40625999A0000000, float 0x406C166660000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp721 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7152 = fadd <4 x float> %tmp721, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add7152, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7156 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7157 = fadd float %vecext7156, 0xC05F533340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp722 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7158 = insertelement <4 x float> %tmp722, float %add7157, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7158, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp723 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7159 = extractelement <4 x float> %tmp723, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7160 = fadd float %vecext7159, 0x407A5999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp724 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7161 = insertelement <4 x float> %tmp724, float %add7160, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7161, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7168 = fadd float undef, 0xC072F199A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp725 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7170 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7171 = fadd float %vecext7170, 0x406AACCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7172 = insertelement <4 x float> undef, float %add7171, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7172, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7173 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp726 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7419 = extractelement <4 x float> %tmp726, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7420 = fadd float %vecext7419, 0x404EA66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7421 = insertelement <4 x float> undef, float %add7420, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7421, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp727 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7422 = extractelement <4 x float> %tmp727, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7423 = fadd float %vecext7422, 4.800000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp728 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7424 = insertelement <4 x float> %tmp728, float %add7423, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7424, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp729 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7425 = extractelement <4 x float> %tmp729, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7426 = fadd float %vecext7425, 0xC072C999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp730 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7427 = insertelement <4 x float> %tmp730, float %add7426, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7427, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7428 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp731 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7568 = extractelement <4 x float> %tmp731, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7569 = fadd float %vecext7568, 1.090000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp732 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7570 = insertelement <4 x float> %tmp732, float %add7569, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7570, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x40745199A0000000, float 0xC0411999A0000000, float -5.650000e+01, float -4.005000e+02>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp733 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp734 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7572 = fadd <4 x float> %tmp734, %tmp733
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add7572, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7573 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7574 = fadd float %vecext7573, -3.920000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp735 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7575 = insertelement <4 x float> %tmp735, float %add7574, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7575, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp736 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7577 = fadd float undef, 0xC051666660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp737 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp738 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7579 = extractelement <4 x float> %tmp738, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7580 = fadd float %vecext7579, 0x407E9199A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7581 = insertelement <4 x float> undef, float %add7580, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7581, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp739 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7582 = extractelement <4 x float> %tmp739, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7583 = fadd float %vecext7582, 2.760000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp740 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7584 = insertelement <4 x float> %tmp740, float %add7583, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC057533340000000, float 0x4060A33340000000, float 0x40791E6660000000, float 2.455000e+02>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp741 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp742 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7586 = fadd <4 x float> %tmp742, %tmp741
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add7586, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp743 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7587 = extractelement <4 x float> %tmp743, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7588 = fadd float %vecext7587, 6.100000e+01
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp744 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp745 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7590 = extractelement <4 x float> %tmp745, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7591 = fadd float %vecext7590, -3.935000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp746 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7592 = insertelement <4 x float> %tmp746, float %add7591, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7592, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp747 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7593 = extractelement <4 x float> %tmp747, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7595 = insertelement <4 x float> undef, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7595, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp748 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7596 = extractelement <4 x float> %tmp748, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7597 = fadd float %vecext7596, 0x407E666660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x406A766660000000, float 0xBFC99999A0000000, float 0xC0751B3340000000, float -4.075000e+02>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp749 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7616 = fadd float undef, 0xC04DE66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp750 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7617 = insertelement <4 x float> %tmp750, float %add7616, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7617, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp751 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7618 = extractelement <4 x float> %tmp751, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7619 = fadd float %vecext7618, 6.050000e+01
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp752 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7620 = insertelement <4 x float> %tmp752, float %add7619, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7620, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp753 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7622 = fadd float undef, 0xC054B999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp754 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7626 = insertelement <4 x float> undef, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7626, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp755 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp756 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7628 = fadd <4 x float> %tmp756, %tmp755
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add7628, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp757 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7629 = extractelement <4 x float> %tmp757, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7630 = fadd float %vecext7629, 0xC05E2CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp758 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7631 = insertelement <4 x float> %tmp758, float %add7630, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7639 = fadd float undef, 0x407C5999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp759 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7640 = insertelement <4 x float> %tmp759, float %add7639, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x406AA66660000000, float 0x4067C66660000000, float 0xC054866660000000, float -2.400000e+01>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp760 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7642 = fadd <4 x float> %tmp760, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp761 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7644 = fadd float undef, 0xC0758999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp762 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7646 = extractelement <4 x float> %tmp762, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7647 = fadd float %vecext7646, 0xC07A3B3340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp763 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7648 = insertelement <4 x float> %tmp763, float %add7647, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7648, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp764 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7649 = extractelement <4 x float> %tmp764, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7650 = fadd float %vecext7649, 0x40760CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp765 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7651 = insertelement <4 x float> %tmp765, float %add7650, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7651, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp766 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7652 = extractelement <4 x float> %tmp766, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7653 = fadd float %vecext7652, 0x40620CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp767 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7654 = insertelement <4 x float> %tmp767, float %add7653, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7654, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp768 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp769 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7656 = fadd <4 x float> %tmp769, %tmp768
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add7656, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp770 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7657 = extractelement <4 x float> %tmp770, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7658 = fadd float %vecext7657, 0xC06EF999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp771 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7659 = insertelement <4 x float> %tmp771, float %add7658, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7659, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp772 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7660 = extractelement <4 x float> %tmp772, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7661 = fadd float %vecext7660, 0x404B9999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp773 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7662 = insertelement <4 x float> %tmp773, float %add7661, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7662, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp774 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7663 = extractelement <4 x float> %tmp774, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7664 = fadd float %vecext7663, 0x4074B66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp775 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7665 = insertelement <4 x float> %tmp775, float %add7664, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7665, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp776 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7666 = extractelement <4 x float> %tmp776, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7667 = fadd float %vecext7666, 0x4074166660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7668 = insertelement <4 x float> undef, float %add7667, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7668, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp777 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp778 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7670 = fadd <4 x float> %tmp778, %tmp777
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp779 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7671 = extractelement <4 x float> %tmp779, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7672 = fadd float %vecext7671, 0x406F166660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7726 = fadd <4 x float> undef, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp780 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7727 = extractelement <4 x float> %tmp780, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp781 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp782 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7731 = fadd float undef, 1.900000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp783 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7732 = insertelement <4 x float> %tmp783, float %add7731, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7732, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp784 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7735 = insertelement <4 x float> %tmp784, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7735, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp785 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext7736 = extractelement <4 x float> %tmp785, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7737 = fadd float %vecext7736, 0xC06AF66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins7850 = insertelement <4 x float> undef, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins7850, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x4062A33340000000, float 2.290000e+02, float 0x40509999A0000000, float 0xC078BE6660000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp786 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp787 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add7852 = fadd <4 x float> %tmp787, %tmp786
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add7852, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp788 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9396 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9397 = fadd float %vecext9396, 0xC074533340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp789 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9398 = insertelement <4 x float> %tmp789, float %add9397, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9398, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9399 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp790 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9401 = insertelement <4 x float> %tmp790, float undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp791 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9402 = extractelement <4 x float> %tmp791, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9403 = fadd float %vecext9402, 0xC03E4CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp792 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9404 = insertelement <4 x float> %tmp792, float %add9403, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9404, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp793 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp794 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9407 = extractelement <4 x float> %tmp794, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9408 = fadd float %vecext9407, 0x407B2999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp795 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9410 = extractelement <4 x float> %tmp795, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9411 = fadd float %vecext9410, 0x40726E6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp796 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp797 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9413 = extractelement <4 x float> %tmp797, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9414 = fadd float %vecext9413, 0xC057ECCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp798 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9415 = insertelement <4 x float> %tmp798, float %add9414, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9415, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp799 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9416 = extractelement <4 x float> %tmp799, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9417 = fadd float %vecext9416, 0x406B0CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp800 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9418 = insertelement <4 x float> %tmp800, float %add9417, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9418, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 3.555000e+02, float 0xC062E33340000000, float 0x4065C66660000000, float -3.645000e+02>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp801 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp802 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9420 = fadd <4 x float> %tmp802, %tmp801
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add9420, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp803 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9421 = extractelement <4 x float> %tmp803, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp804 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9423 = insertelement <4 x float> %tmp804, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9423, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp805 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9424 = extractelement <4 x float> %tmp805, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9425 = fadd float %vecext9424, 0x4079C199A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp806 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9426 = insertelement <4 x float> %tmp806, float %add9425, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9426, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp807 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9428 = fadd float undef, 0xC065466660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp808 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9429 = insertelement <4 x float> %tmp808, float %add9428, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9429, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp809 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9430 = extractelement <4 x float> %tmp809, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9431 = fadd float %vecext9430, 0xC0742CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp810 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9432 = insertelement <4 x float> %tmp810, float %add9431, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC07C7E6660000000, float 1.205000e+02, float 0x4050D999A0000000, float 0xC06B233340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp811 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp812 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9434 = fadd <4 x float> %tmp812, %tmp811
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9436 = fadd float undef, -3.185000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp813 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9437 = insertelement <4 x float> %tmp813, float %add9436, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp814 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp815 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9441 = extractelement <4 x float> %tmp815, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9442 = fadd float %vecext9441, 0xC079CE6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp816 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9443 = insertelement <4 x float> %tmp816, float %add9442, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9443, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp817 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9444 = extractelement <4 x float> %tmp817, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9445 = fadd float %vecext9444, 0xC06F533340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp818 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9446 = insertelement <4 x float> %tmp818, float %add9445, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9446, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp819 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp820 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9448 = fadd <4 x float> %tmp820, %tmp819
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add9448, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9450 = fadd float undef, 0xC0718199A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp821 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9451 = insertelement <4 x float> %tmp821, float %add9450, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9451, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp822 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp823 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9454 = insertelement <4 x float> %tmp823, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9454, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp824 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9455 = extractelement <4 x float> %tmp824, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9456 = fadd float %vecext9455, -3.380000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp825 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9457 = insertelement <4 x float> %tmp825, float %add9456, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9457, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9458 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp826 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9460 = insertelement <4 x float> %tmp826, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9460, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x407B5E6660000000, float 0x40648999A0000000, float 0xC06B966660000000, float 0x40341999A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp827 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9462 = fadd <4 x float> %tmp827, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add9462, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp828 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9463 = extractelement <4 x float> %tmp828, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp829 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9465 = insertelement <4 x float> %tmp829, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9467 = fadd float undef, 0x405D666660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp830 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9468 = insertelement <4 x float> %tmp830, float %add9467, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9468, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp831 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9470 = fadd float undef, 0x4077033340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp832 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9472 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9473 = fadd float %vecext9472, 0x402DCCCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp833 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9474 = insertelement <4 x float> %tmp833, float %add9473, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9474, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x404F733340000000, float 0x407AB4CCC0000000, float 0x40605999A0000000, float 0xC03E4CCCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp834 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp835 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9476 = fadd <4 x float> %tmp835, %tmp834
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add9476, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp836 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9477 = extractelement <4 x float> %tmp836, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9478 = fadd float %vecext9477, 0xC07F266660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp837 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9479 = insertelement <4 x float> %tmp837, float %add9478, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9479, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp838 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9481 = fadd float undef, 0x407BE33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp839 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9482 = insertelement <4 x float> %tmp839, float %add9481, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9482, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9483 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9484 = fadd float %vecext9483, 0xC073E999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp840 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9485 = insertelement <4 x float> %tmp840, float %add9484, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9485, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp841 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9486 = extractelement <4 x float> %tmp841, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9487 = fadd float %vecext9486, 0x4076E66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp842 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC076B999A0000000, float 0xC0706CCCC0000000, float 0x407904CCC0000000, float 0x407EE199A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp843 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp844 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9491 = extractelement <4 x float> %tmp844, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9492 = fadd float %vecext9491, 0x407C166660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9495 = fadd float undef, 0x407DBB3340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp845 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9496 = insertelement <4 x float> %tmp845, float %add9495, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9496, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp846 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9497 = extractelement <4 x float> %tmp846, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9498 = fadd float %vecext9497, 0x4042CCCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp847 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9499 = insertelement <4 x float> %tmp847, float %add9498, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9499, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp848 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9501 = fadd float undef, 0x407D5CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp849 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9502 = insertelement <4 x float> %tmp849, float %add9501, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9502, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp850 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9504 = fadd <4 x float> %tmp850, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add9504, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp851 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9506 = fadd float undef, 0x4076EE6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp852 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9507 = insertelement <4 x float> %tmp852, float %add9506, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9507, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp853 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9509 = fadd float undef, 0xC0535999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp854 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp855 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9511 = extractelement <4 x float> %tmp855, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9512 = fadd float %vecext9511, 0xC076766660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp856 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9513 = insertelement <4 x float> %tmp856, float %add9512, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9513, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp857 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9514 = extractelement <4 x float> %tmp857, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp858 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9516 = insertelement <4 x float> %tmp858, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9516, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x407254CCC0000000, float 0x407844CCC0000000, float 0xC04D9999A0000000, float 0xC0550CCCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp859 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp860 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9518 = fadd <4 x float> %tmp860, %tmp859
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp861 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp862 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9521 = insertelement <4 x float> %tmp862, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9521, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp863 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9522 = extractelement <4 x float> %tmp863, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9523 = fadd float %vecext9522, 0x4029333340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp864 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9524 = insertelement <4 x float> %tmp864, float %add9523, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9524, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp865 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9526 = fadd float undef, 0x4072833340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp866 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9527 = insertelement <4 x float> %tmp866, float %add9526, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9527, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp867 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9530 = insertelement <4 x float> undef, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9530, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x4072F4CCC0000000, float 0x4065CCCCC0000000, float 0x4051D33340000000, float 0x40680CCCC0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp868 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp869 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9532 = fadd <4 x float> %tmp869, %tmp868
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9533 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp870 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9535 = insertelement <4 x float> %tmp870, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9535, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp871 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9536 = extractelement <4 x float> %tmp871, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9537 = fadd float %vecext9536, 0xC079F199A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp872 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9538 = insertelement <4 x float> %tmp872, float %add9537, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9538, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp873 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9542 = extractelement <4 x float> %tmp873, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9543 = fadd float %vecext9542, 0x4050D999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9576 = fadd float undef, 0x40219999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9577 = insertelement <4 x float> undef, float %add9576, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9577, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp874 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9580 = insertelement <4 x float> undef, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9580, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp875 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9581 = extractelement <4 x float> %tmp875, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9582 = fadd float %vecext9581, 0xC07EF33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp876 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9583 = insertelement <4 x float> %tmp876, float %add9582, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9583, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp877 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9673 = extractelement <4 x float> undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9674 = fadd float %vecext9673, 0xC04CF33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp878 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9675 = insertelement <4 x float> %tmp878, float %add9674, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9675, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9676 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9677 = fadd float %vecext9676, 1.455000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp879 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9678 = insertelement <4 x float> %tmp879, float %add9677, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp880 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9679 = extractelement <4 x float> %tmp880, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9680 = fadd float %vecext9679, 0x4073A33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp881 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9681 = insertelement <4 x float> %tmp881, float %add9680, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9681, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp882 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9682 = extractelement <4 x float> %tmp882, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp883 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9686 = fadd <4 x float> %tmp883, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add9686, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp884 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9687 = extractelement <4 x float> %tmp884, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9688 = fadd float %vecext9687, 0xC046666660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp885 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9689 = insertelement <4 x float> %tmp885, float %add9688, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9690 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9691 = fadd float %vecext9690, 0x4034CCCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp886 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9692 = insertelement <4 x float> %tmp886, float %add9691, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp887 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9693 = extractelement <4 x float> %tmp887, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9694 = fadd float %vecext9693, -3.710000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp888 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9695 = insertelement <4 x float> %tmp888, float %add9694, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9695, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp889 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9697 = fadd float undef, 0x4058D33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp890 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9698 = insertelement <4 x float> %tmp890, float %add9697, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9698, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x4062CCCCC0000000, float 0x407AD999A0000000, float 0x40582CCCC0000000, float 0xC0712B3340000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp891 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9700 = fadd <4 x float> %tmp891, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp892 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9701 = extractelement <4 x float> %tmp892, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9702 = fadd float %vecext9701, 0x406DC33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp893 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9703 = insertelement <4 x float> %tmp893, float %add9702, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9703, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp894 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9704 = extractelement <4 x float> %tmp894, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9705 = fadd float %vecext9704, 0xC073B33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp895 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9706 = insertelement <4 x float> %tmp895, float %add9705, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9706, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9707 = extractelement <4 x float> undef, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9708 = fadd float %vecext9707, 0xC0729999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp896 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9709 = insertelement <4 x float> %tmp896, float %add9708, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9709, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp897 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9710 = extractelement <4 x float> %tmp897, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9712 = insertelement <4 x float> undef, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9712, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x4069F33340000000, float 0xC048266660000000, float 0x40638CCCC0000000, float 0xC07EC199A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp898 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9714 = fadd <4 x float> undef, %tmp898
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add9714, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp899 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9715 = extractelement <4 x float> %tmp899, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp900 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9717 = insertelement <4 x float> %tmp900, float undef, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9717, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp901 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9718 = extractelement <4 x float> %tmp901, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9719 = fadd float %vecext9718, 0x406BC66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp902 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9720 = insertelement <4 x float> %tmp902, float %add9719, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9720, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp903 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9721 = extractelement <4 x float> %tmp903, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9722 = fadd float %vecext9721, -3.860000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp904 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9723 = insertelement <4 x float> %tmp904, float %add9722, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9723, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp905 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9724 = extractelement <4 x float> %tmp905, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9725 = fadd float %vecext9724, 0x407CF199A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp906 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9726 = insertelement <4 x float> %tmp906, float %add9725, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9726, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float -4.575000e+02, float 0x40713E6660000000, float 0x407D133340000000, float -1.425000e+02>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp907 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9728 = fadd <4 x float> %tmp907, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add9728, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp908 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9729 = extractelement <4 x float> %tmp908, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9730 = fadd float %vecext9729, 0x4079FB3340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp909 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9731 = insertelement <4 x float> %tmp909, float %add9730, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9731, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp910 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9733 = fadd float undef, 0xC050F33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp911 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9734 = insertelement <4 x float> %tmp911, float %add9733, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9734, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp912 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9735 = extractelement <4 x float> %tmp912, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9736 = fadd float %vecext9735, 0x40582CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp913 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9737 = insertelement <4 x float> %tmp913, float %add9736, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9737, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp914 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9738 = extractelement <4 x float> %tmp914, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9740 = insertelement <4 x float> undef, float undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9740, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 2.150000e+02, float 0x405A2CCCC0000000, float 2.310000e+02, float 0x404E1999A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp915 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp916 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp917 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9743 = extractelement <4 x float> %tmp917, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9744 = fadd float %vecext9743, -2.510000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9745 = insertelement <4 x float> undef, float %add9744, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9745, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp918 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9746 = extractelement <4 x float> %tmp918, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9747 = fadd float %vecext9746, 4.685000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp919 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9748 = insertelement <4 x float> %tmp919, float %add9747, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9748, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp920 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9749 = extractelement <4 x float> %tmp920, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9750 = fadd float %vecext9749, 1.600000e+01
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp921 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9751 = insertelement <4 x float> %tmp921, float %add9750, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9751, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp922 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9752 = extractelement <4 x float> %tmp922, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9753 = fadd float %vecext9752, -2.600000e+01
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp923 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9754 = insertelement <4 x float> %tmp923, float %add9753, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9754, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 2.590000e+02, float 0x407B7199A0000000, float 0xC07ED199A0000000, float 0xC064FCCCC0000000>, <4 x float>* %.compoundliteral9755
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp924 = load <4 x float>, <4 x float>* %.compoundliteral9755
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp925 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9756 = fadd <4 x float> %tmp925, %tmp924
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp926 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9757 = extractelement <4 x float> %tmp926, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9758 = fadd float %vecext9757, -1.810000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp927 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9759 = insertelement <4 x float> %tmp927, float %add9758, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9759, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp928 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9760 = extractelement <4 x float> %tmp928, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9761 = fadd float %vecext9760, 0xC07C3E6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp929 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9762 = insertelement <4 x float> %tmp929, float %add9761, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9762, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp930 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9764 = fadd float undef, 0xC060E66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp931 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9765 = insertelement <4 x float> %tmp931, float %add9764, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9765, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp932 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9766 = extractelement <4 x float> %tmp932, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9767 = fadd float %vecext9766, 0xC0753E6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp933 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9768 = insertelement <4 x float> %tmp933, float %add9767, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9768, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x4032CCCCC0000000, float -9.600000e+01, float -5.000000e+02, float 0x4078EE6660000000>, <4 x float>* %.compoundliteral9769
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp934 = load <4 x float>, <4 x float>* %.compoundliteral9769
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp935 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9770 = fadd <4 x float> %tmp935, %tmp934
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add9770, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp936 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9771 = extractelement <4 x float> %tmp936, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9772 = fadd float %vecext9771, 0xC0733E6660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp937 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9773 = insertelement <4 x float> %tmp937, float %add9772, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9773, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp938 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9774 = extractelement <4 x float> %tmp938, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add9775 = fadd float %vecext9774, 1.715000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp939 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9776 = insertelement <4 x float> %tmp939, float %add9775, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins9776, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext9816 = extractelement <4 x float> undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp940 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins9818 = insertelement <4 x float> %tmp940, float undef, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp941 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10388 = fadd float undef, 4.755000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp942 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10389 = insertelement <4 x float> %tmp942, float %add10388, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10389, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp943 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10390 = extractelement <4 x float> %tmp943, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10391 = fadd float %vecext10390, 0xC05AECCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp944 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10392 = insertelement <4 x float> %tmp944, float %add10391, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10392, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp945 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp946 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10405 = fadd float undef, -5.650000e+01
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp947 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10406 = insertelement <4 x float> %tmp947, float %add10405, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10406, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp948 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10407 = extractelement <4 x float> %tmp948, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10408 = fadd float %vecext10407, 0xC06A633340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp949 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10409 = insertelement <4 x float> %tmp949, float %add10408, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10409, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp950 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10410 = extractelement <4 x float> %tmp950, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10411 = fadd float %vecext10410, 0xC078D66660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp951 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float -2.340000e+02, float -4.720000e+02, float 4.350000e+02, float 0xC059A66660000000>, <4 x float>* %.compoundliteral10413
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp952 = load <4 x float>, <4 x float>* %.compoundliteral10413
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp953 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10414 = fadd <4 x float> %tmp953, %tmp952
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add10414, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp954 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10415 = extractelement <4 x float> %tmp954, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10416 = fadd float %vecext10415, 3.450000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp955 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10417 = insertelement <4 x float> %tmp955, float %add10416, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10417, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp956 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10418 = extractelement <4 x float> %tmp956, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10419 = fadd float %vecext10418, -6.000000e+00
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp957 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10420 = insertelement <4 x float> %tmp957, float %add10419, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10420, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10422 = fadd float undef, 0xC0662CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10424 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> undef, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x402B333340000000, float 0x40735E6660000000, float 0xC0567999A0000000, float 2.050000e+02>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp958 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp959 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10428 = fadd <4 x float> %tmp959, %tmp958
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add10428, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp960 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10429 = extractelement <4 x float> %tmp960, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10430 = fadd float %vecext10429, 0xC075166660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp961 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10436 = fadd float undef, 0xC06AF33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp962 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10437 = insertelement <4 x float> %tmp962, float %add10436, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10437, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10438 = extractelement <4 x float> undef, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10439 = fadd float %vecext10438, 0x405C7999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp963 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10440 = insertelement <4 x float> %tmp963, float %add10439, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10440, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC065E999A0000000, float 0x4067D33340000000, float 0xC070133340000000, float 0x406B666660000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp964 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp965 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10443 = extractelement <4 x float> %tmp965, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10444 = fadd float %vecext10443, 0xC06CA999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp966 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10445 = insertelement <4 x float> %tmp966, float %add10444, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10445, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp967 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10446 = extractelement <4 x float> %tmp967, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10447 = fadd float %vecext10446, 0x4064B999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp968 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10448 = insertelement <4 x float> %tmp968, float %add10447, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10448, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp969 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10449 = extractelement <4 x float> %tmp969, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10450 = fadd float %vecext10449, 0x407B3CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp970 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10451 = insertelement <4 x float> %tmp970, float %add10450, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10451, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp971 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10452 = extractelement <4 x float> %tmp971, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10453 = fadd float %vecext10452, -2.225000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10454 = insertelement <4 x float> undef, float %add10453, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x406AFCCCC0000000, float 0xC07604CCC0000000, float 6.900000e+01, float 0xC060A66660000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp972 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp973 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10456 = fadd <4 x float> %tmp973, %tmp972
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %add10456, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp974 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10457 = extractelement <4 x float> %tmp974, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10458 = fadd float %vecext10457, 2.375000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10459 = insertelement <4 x float> undef, float %add10458, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10459, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp975 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10460 = extractelement <4 x float> %tmp975, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10461 = fadd float %vecext10460, 0xC06B3999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp976 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10462 = insertelement <4 x float> %tmp976, float %add10461, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp977 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10463 = extractelement <4 x float> %tmp977, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10464 = fadd float %vecext10463, 0x40655999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp978 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10465 = insertelement <4 x float> %tmp978, float %add10464, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10465, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp979 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10466 = extractelement <4 x float> %tmp979, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10467 = fadd float %vecext10466, 0xC07B6999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp980 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10468 = insertelement <4 x float> %tmp980, float %add10467, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10468, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x4078833340000000, float 0x40786CCCC0000000, float 0xC0468CCCC0000000, float 0xC0793199A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp981 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10470 = fadd <4 x float> %tmp981, undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp982 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10471 = extractelement <4 x float> %tmp982, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10472 = fadd float %vecext10471, 0x40710CCCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp983 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10473 = insertelement <4 x float> %tmp983, float %add10472, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10473, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp984 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10474 = extractelement <4 x float> %tmp984, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10475 = fadd float %vecext10474, 0x40709B3340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp985 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10476 = insertelement <4 x float> %tmp985, float %add10475, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10476, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10489 = fadd float undef, 0x4074666660000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp986 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10490 = insertelement <4 x float> %tmp986, float %add10489, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10490, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp987 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp988 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10508 = extractelement <4 x float> %tmp988, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10509 = fadd float %vecext10508, 0xC027333340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp989 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10510 = insertelement <4 x float> %tmp989, float %add10509, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10510, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0x40656999A0000000, float 0xC073766660000000, float 1.685000e+02, float 0x40765199A0000000>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp990 = load <4 x float>, <4 x float>* undef
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10512 = fadd <4 x float> undef, %tmp990
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp991 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10513 = extractelement <4 x float> %tmp991, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10514 = fadd float %vecext10513, 0x405BB999A0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp992 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10515 = insertelement <4 x float> %tmp992, float %add10514, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10515, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp993 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10562 = fadd float undef, 2.035000e+02
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp994 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10563 = insertelement <4 x float> %tmp994, float %add10562, i32 2
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10563, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp995 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10564 = extractelement <4 x float> %tmp995, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10565 = fadd float %vecext10564, 0x407AE4CCC0000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp996 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10566 = insertelement <4 x float> %tmp996, float %add10565, i32 3
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10566, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> <float 0xC068B999A0000000, float 0xC050E66660000000, float 0xC0725999A0000000, float 0xC054D33340000000>, <4 x float>* %.compoundliteral10567
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp997 = load <4 x float>, <4 x float>* %.compoundliteral10567
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp998 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10568 = fadd <4 x float> %tmp998, %tmp997
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp999 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10569 = extractelement <4 x float> %tmp999, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10570 = fadd float %vecext10569, 0x4074C33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp1000 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10571 = insertelement <4 x float> %tmp1000, float %add10570, i32 0
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10571, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp1001 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecext10572 = extractelement <4 x float> %tmp1001, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %add10573 = fadd float %vecext10572, 0x407DF33340000000
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %tmp1002 = load <4 x float>, <4 x float>* undef, align 16
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ %vecins10574 = insertelement <4 x float> %tmp1002, float %add10573, i32 1
+ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"()
+ store <4 x float> %vecins10574, <4 x float>* undef, align 16
+ %tmp1003 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10575 = extractelement <4 x float> %tmp1003, i32 2
+ %tmp1004 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10577 = insertelement <4 x float> %tmp1004, float undef, i32 2
+ store <4 x float> %vecins10577, <4 x float>* undef, align 16
+ %tmp1005 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10578 = extractelement <4 x float> %tmp1005, i32 3
+ %add10579 = fadd float %vecext10578, 0x4076566660000000
+ %tmp1006 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10580 = insertelement <4 x float> %tmp1006, float %add10579, i32 3
+ store <4 x float> %vecins10580, <4 x float>* undef, align 16
+ store <4 x float> <float 0x407CAB3340000000, float 1.685000e+02, float 0xC07B866660000000, float 0xC061ACCCC0000000>, <4 x float>* %.compoundliteral10581
+ %tmp1007 = load <4 x float>, <4 x float>* %.compoundliteral10581
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %tmp1008 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10583 = extractelement <4 x float> %tmp1008, i32 0
+ %add10584 = fadd float %vecext10583, 0xC060533340000000
+ %tmp1009 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10585 = insertelement <4 x float> %tmp1009, float %add10584, i32 0
+ store <4 x float> %vecins10585, <4 x float>* undef, align 16
+ %tmp1010 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10586 = extractelement <4 x float> %tmp1010, i32 1
+ %add10587 = fadd float %vecext10586, 0xC0694CCCC0000000
+ %tmp1011 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10588 = insertelement <4 x float> %tmp1011, float %add10587, i32 1
+ store <4 x float> %vecins10588, <4 x float>* undef, align 16
+ %tmp1012 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10589 = extractelement <4 x float> %tmp1012, i32 2
+ %add10590 = fadd float %vecext10589, 0xC0541999A0000000
+ %tmp1013 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10591 = insertelement <4 x float> %tmp1013, float %add10590, i32 2
+ store <4 x float> %vecins10591, <4 x float>* undef, align 16
+ %tmp1014 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10592 = extractelement <4 x float> %tmp1014, i32 3
+ %add10593 = fadd float %vecext10592, 0xC06C566660000000
+ %tmp1015 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10594 = insertelement <4 x float> %tmp1015, float %add10593, i32 3
+ store <4 x float> %vecins10594, <4 x float>* undef, align 16
+ store <4 x float> <float 0x407A3199A0000000, float 0xC0659999A0000000, float 0x407E0999A0000000, float 0xC0334CCCC0000000>, <4 x float>* %.compoundliteral10595
+ %tmp1016 = load <4 x float>, <4 x float>* %.compoundliteral10595
+ %tmp1017 = load <4 x float>, <4 x float>* undef, align 16
+ %add10596 = fadd <4 x float> %tmp1017, %tmp1016
+ store <4 x float> %add10596, <4 x float>* undef, align 16
+ %tmp1018 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10597 = extractelement <4 x float> %tmp1018, i32 0
+ %add10598 = fadd float %vecext10597, 0x40640999A0000000
+ %tmp1019 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10599 = insertelement <4 x float> %tmp1019, float %add10598, i32 0
+ store <4 x float> %vecins10599, <4 x float>* undef, align 16
+ %tmp1020 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10600 = extractelement <4 x float> %tmp1020, i32 1
+ %add10601 = fadd float %vecext10600, 0xC073966660000000
+ %tmp1021 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10602 = insertelement <4 x float> %tmp1021, float %add10601, i32 1
+ %tmp1022 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10603 = extractelement <4 x float> %tmp1022, i32 2
+ %add10604 = fadd float %vecext10603, 1.780000e+02
+ %tmp1023 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10605 = insertelement <4 x float> %tmp1023, float %add10604, i32 2
+ store <4 x float> %vecins10605, <4 x float>* undef, align 16
+ %tmp1024 = load <4 x float>, <4 x float>* undef, align 16
+ %add10607 = fadd float undef, 0x4070A33340000000
+ %tmp1025 = load <4 x float>, <4 x float>* undef, align 16
+ store <4 x float> <float 0x407C5999A0000000, float 0x4046733340000000, float 0xC06E6CCCC0000000, float 0xC063C33340000000>, <4 x float>* %.compoundliteral10609
+ %tmp1026 = load <4 x float>, <4 x float>* %.compoundliteral10609
+ %tmp1027 = load <4 x float>, <4 x float>* undef, align 16
+ %tmp1028 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10611 = extractelement <4 x float> %tmp1028, i32 0
+ %add10612 = fadd float %vecext10611, 0x40757199A0000000
+ %vecins10613 = insertelement <4 x float> undef, float %add10612, i32 0
+ store <4 x float> %vecins10613, <4 x float>* undef, align 16
+ %tmp1029 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10614 = extractelement <4 x float> %tmp1029, i32 1
+ %add10615 = fadd float %vecext10614, 0x40740CCCC0000000
+ %tmp1030 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10616 = insertelement <4 x float> %tmp1030, float %add10615, i32 1
+ store <4 x float> %vecins10616, <4 x float>* undef, align 16
+ %tmp1031 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10617 = extractelement <4 x float> %tmp1031, i32 2
+ %add10618 = fadd float %vecext10617, 0xC012CCCCC0000000
+ %tmp1032 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10619 = insertelement <4 x float> %tmp1032, float %add10618, i32 2
+ store <4 x float> %vecins10619, <4 x float>* undef, align 16
+ %tmp1033 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10620 = extractelement <4 x float> %tmp1033, i32 3
+ %add10621 = fadd float %vecext10620, 0x406E566660000000
+ %tmp1034 = load <4 x float>, <4 x float>* undef, align 16
+ store <4 x float> <float 0x407B2199A0000000, float 0xC07D9CCCC0000000, float -4.350000e+01, float 0xC07D3B3340000000>, <4 x float>* %.compoundliteral10623
+ %tmp1035 = load <4 x float>, <4 x float>* %.compoundliteral10623
+ %add10624 = fadd <4 x float> undef, %tmp1035
+ %tmp1036 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10625 = extractelement <4 x float> %tmp1036, i32 0
+ %tmp1037 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10627 = insertelement <4 x float> %tmp1037, float undef, i32 0
+ store <4 x float> %vecins10627, <4 x float>* undef, align 16
+ %tmp1038 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10628 = extractelement <4 x float> %tmp1038, i32 1
+ %add10629 = fadd float %vecext10628, 0x407E3CCCC0000000
+ %tmp1039 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10630 = insertelement <4 x float> %tmp1039, float %add10629, i32 1
+ store <4 x float> %vecins10630, <4 x float>* undef, align 16
+ %tmp1040 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10631 = extractelement <4 x float> %tmp1040, i32 2
+ %tmp1041 = load <4 x float>, <4 x float>* undef, align 16
+ %tmp1042 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10634 = extractelement <4 x float> %tmp1042, i32 3
+ %add10635 = fadd float %vecext10634, 0xC067533340000000
+ %tmp1043 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10636 = insertelement <4 x float> %tmp1043, float %add10635, i32 3
+ store <4 x float> %vecins10636, <4 x float>* undef, align 16
+ store <4 x float> <float 1.950000e+02, float 0x407E8E6660000000, float 0x407D7CCCC0000000, float 0x407E166660000000>, <4 x float>* %.compoundliteral10637
+ %tmp1044 = load <4 x float>, <4 x float>* undef, align 16
+ %add10638 = fadd <4 x float> %tmp1044, undef
+ %tmp1045 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10639 = extractelement <4 x float> %tmp1045, i32 0
+ %add10640 = fadd float %vecext10639, 0x406CA33340000000
+ %tmp1046 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10641 = insertelement <4 x float> %tmp1046, float %add10640, i32 0
+ store <4 x float> %vecins10641, <4 x float>* undef, align 16
+ %tmp1047 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10642 = extractelement <4 x float> %tmp1047, i32 1
+ %add10643 = fadd float %vecext10642, 0xC07C8999A0000000
+ %tmp1048 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10644 = insertelement <4 x float> %tmp1048, float %add10643, i32 1
+ store <4 x float> %vecins10644, <4 x float>* undef, align 16
+ %tmp1049 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10645 = extractelement <4 x float> %tmp1049, i32 2
+ %tmp1050 = load <4 x float>, <4 x float>* undef, align 16
+ %tmp1051 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10748 = insertelement <4 x float> undef, float undef, i32 3
+ %tmp1052 = load <4 x float>, <4 x float>* %.compoundliteral10749
+ %add10750 = fadd <4 x float> undef, %tmp1052
+ store <4 x float> %add10750, <4 x float>* undef, align 16
+ %tmp1053 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10751 = extractelement <4 x float> %tmp1053, i32 0
+ %add10752 = fadd float %vecext10751, 0x4071B33340000000
+ %tmp1054 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10753 = insertelement <4 x float> %tmp1054, float %add10752, i32 0
+ store <4 x float> %vecins10753, <4 x float>* undef, align 16
+ %tmp1055 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10754 = extractelement <4 x float> %tmp1055, i32 1
+ %add10755 = fadd float %vecext10754, 0xC076A66660000000
+ %tmp1056 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10756 = insertelement <4 x float> %tmp1056, float %add10755, i32 1
+ store <4 x float> %vecins10756, <4 x float>* undef, align 16
+ %tmp1057 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10757 = extractelement <4 x float> %tmp1057, i32 2
+ %add10758 = fadd float %vecext10757, 3.800000e+01
+ %tmp1058 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10759 = insertelement <4 x float> %tmp1058, float %add10758, i32 2
+ store <4 x float> %vecins10759, <4 x float>* undef, align 16
+ %tmp1059 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10760 = extractelement <4 x float> %tmp1059, i32 3
+ store <4 x float> undef, <4 x float>* undef, align 16
+ store <4 x float> <float 0xC075BB3340000000, float 0x4074D4CCC0000000, float 0xC07A466660000000, float 0xC0691CCCC0000000>, <4 x float>* %.compoundliteral10763
+ %tmp1060 = load <4 x float>, <4 x float>* %.compoundliteral10763
+ %tmp1061 = load <4 x float>, <4 x float>* undef, align 16
+ %tmp1062 = load <4 x float>, <4 x float>* undef, align 16
+ %add10985 = fadd float undef, 0x405E933340000000
+ %tmp1063 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10986 = insertelement <4 x float> %tmp1063, float %add10985, i32 3
+ store <4 x float> %vecins10986, <4 x float>* undef, align 16
+ store <4 x float> <float 0xC0721E6660000000, float -4.180000e+02, float 0x406F366660000000, float 0xC055F999A0000000>, <4 x float>* %.compoundliteral10987
+ %tmp1064 = load <4 x float>, <4 x float>* %.compoundliteral10987
+ %tmp1065 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10994 = insertelement <4 x float> %tmp1065, float undef, i32 1
+ %tmp1066 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10995 = extractelement <4 x float> %tmp1066, i32 2
+ %add10996 = fadd float %vecext10995, 0x406F9999A0000000
+ %tmp1067 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins10997 = insertelement <4 x float> %tmp1067, float %add10996, i32 2
+ store <4 x float> %vecins10997, <4 x float>* undef, align 16
+ %tmp1068 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext10998 = extractelement <4 x float> %tmp1068, i32 3
+ %add10999 = fadd float %vecext10998, -2.765000e+02
+ %tmp1069 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins11000 = insertelement <4 x float> %tmp1069, float %add10999, i32 3
+ store <4 x float> %vecins11000, <4 x float>* undef, align 16
+ store <4 x float> <float 0x4078F999A0000000, float 0xC06D166660000000, float 0x40501999A0000000, float 0x406FC999A0000000>, <4 x float>* %.compoundliteral11001
+ %tmp1070 = load <4 x float>, <4 x float>* undef, align 16
+ %add11002 = fadd <4 x float> %tmp1070, undef
+ %vecext11003 = extractelement <4 x float> undef, i32 0
+ %vecext11009 = extractelement <4 x float> undef, i32 2
+ %tmp1071 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins11033 = insertelement <4 x float> %tmp1071, float undef, i32 0
+ store <4 x float> %vecins11033, <4 x float>* undef, align 16
+ %tmp1072 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext11034 = extractelement <4 x float> %tmp1072, i32 1
+ %add11035 = fadd float %vecext11034, 0x4056D33340000000
+ %tmp1073 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins11036 = insertelement <4 x float> %tmp1073, float %add11035, i32 1
+ store <4 x float> %vecins11036, <4 x float>* undef, align 16
+ %tmp1074 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext11037 = extractelement <4 x float> %tmp1074, i32 2
+ %add11038 = fadd float %vecext11037, 0xC06EA33340000000
+ %tmp1075 = load <4 x float>, <4 x float>* undef, align 16
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %tmp1076 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext11040 = extractelement <4 x float> %tmp1076, i32 3
+ %add11041 = fadd float %vecext11040, 0x40746CCCC0000000
+ %tmp1077 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins11042 = insertelement <4 x float> %tmp1077, float %add11041, i32 3
+ store <4 x float> <float 0x405DD999A0000000, float -3.775000e+02, float -1.265000e+02, float 0xC065C66660000000>, <4 x float>* undef
+ %tmp1078 = load <4 x float>, <4 x float>* undef, align 16
+ %add11044 = fadd <4 x float> %tmp1078, undef
+ store <4 x float> %add11044, <4 x float>* undef, align 16
+ %tmp1079 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext11045 = extractelement <4 x float> %tmp1079, i32 0
+ %add11046 = fadd float %vecext11045, 0xC076E66660000000
+ %tmp1080 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins11047 = insertelement <4 x float> %tmp1080, float %add11046, i32 0
+ %tmp1081 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext11048 = extractelement <4 x float> %tmp1081, i32 1
+ %add11049 = fadd float %vecext11048, 4.100000e+02
+ %vecins11064 = insertelement <4 x float> undef, float undef, i32 1
+ %add11074 = fadd float undef, 0xC06FF999A0000000
+ %tmp1082 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins11075 = insertelement <4 x float> %tmp1082, float %add11074, i32 0
+ store <4 x float> %vecins11075, <4 x float>* undef, align 16
+ %add11077 = fadd float undef, 0xC075D33340000000
+ %tmp1083 = load <4 x float>, <4 x float>* undef, align 16
+ %tmp1084 = load <4 x float>, <4 x float>* undef, align 16
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %tmp1085 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext11093 = extractelement <4 x float> %tmp1085, i32 2
+ %add11094 = fadd float %vecext11093, 0xC07CD66660000000
+ %tmp1086 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins11095 = insertelement <4 x float> %tmp1086, float %add11094, i32 2
+ store <4 x float> %vecins11095, <4 x float>* undef, align 16
+ store <4 x float> undef, <4 x float>* undef, align 16
+ store <4 x float> <float 0x4061F66660000000, float 0xC076DB3340000000, float 0xC055A66660000000, float 2.415000e+02>, <4 x float>* undef
+ %tmp1087 = load <4 x float>, <4 x float>* undef
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %tmp1088 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext11513 = extractelement <4 x float> %tmp1088, i32 2
+ %add11514 = fadd float %vecext11513, 0xC07C7199A0000000
+ %vecins11515 = insertelement <4 x float> undef, float %add11514, i32 2
+ store <4 x float> %vecins11515, <4 x float>* undef, align 16
+ %add11520 = fadd <4 x float> undef, undef
+ store <4 x float> %add11520, <4 x float>* undef, align 16
+ %vecext11521 = extractelement <4 x float> undef, i32 0
+ %add11522 = fadd float %vecext11521, 0x4041733340000000
+ %tmp1089 = load <4 x float>, <4 x float>* undef, align 16
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %tmp1090 = load <4 x float>, <4 x float>* undef
+ %tmp1091 = load <4 x float>, <4 x float>* undef, align 16
+ %add11562 = fadd <4 x float> %tmp1091, %tmp1090
+ %tmp1092 = load <4 x float>, <4 x float>* undef, align 16
+ %add11564 = fadd float undef, 0xC0411999A0000000
+ %tmp1093 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins11565 = insertelement <4 x float> %tmp1093, float %add11564, i32 0
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %vecext11586 = extractelement <4 x float> undef, i32 3
+ %add11587 = fadd float %vecext11586, 3.760000e+02
+ %tmp1094 = load <4 x float>, <4 x float>* undef, align 16
+ store <4 x float> undef, <4 x float>* undef, align 16
+ store <4 x float> <float 0xC06ED999A0000000, float 1.380000e+02, float 0xC073AB3340000000, float 0x4078A66660000000>, <4 x float>* undef
+ %tmp1095 = load <4 x float>, <4 x float>* undef
+ %tmp1096 = load <4 x float>, <4 x float>* undef, align 16
+ %tmp1097 = load <4 x float>, <4 x float>* undef, align 16
+ %tmp1098 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins11593 = insertelement <4 x float> %tmp1098, float undef, i32 0
+ %vecext11594 = extractelement <4 x float> undef, i32 1
+ %tmp1099 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins11596 = insertelement <4 x float> %tmp1099, float undef, i32 1
+ store <4 x float> %vecins11596, <4 x float>* undef, align 16
+ %tmp1100 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext11597 = extractelement <4 x float> %tmp1100, i32 2
+ %add11598 = fadd float %vecext11597, 0x40430CCCC0000000
+ %tmp1101 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins11599 = insertelement <4 x float> %tmp1101, float %add11598, i32 2
+ %tmp1102 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext11600 = extractelement <4 x float> %tmp1102, i32 3
+ %tmp1103 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins11602 = insertelement <4 x float> %tmp1103, float undef, i32 3
+ store <4 x float> %vecins11602, <4 x float>* undef, align 16
+ %tmp1104 = load <4 x float>, <4 x float>* undef
+ %tmp1105 = load <4 x float>, <4 x float>* undef, align 16
+ %add11604 = fadd <4 x float> %tmp1105, %tmp1104
+ %tmp1106 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext11605 = extractelement <4 x float> %tmp1106, i32 0
+ %tmp1107 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins11607 = insertelement <4 x float> %tmp1107, float undef, i32 0
+ %vecins11621 = insertelement <4 x float> undef, float undef, i32 0
+ %vecins11630 = insertelement <4 x float> undef, float undef, i32 3
+ store <4 x float> %vecins11630, <4 x float>* undef, align 16
+ store <4 x float> <float -1.190000e+02, float 0x402F666660000000, float 0xC07BD33340000000, float -1.595000e+02>, <4 x float>* %.compoundliteral11631
+ %tmp1108 = load <4 x float>, <4 x float>* %.compoundliteral11631
+ %tmp1109 = load <4 x float>, <4 x float>* undef, align 16
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %add11634 = fadd float undef, -1.075000e+02
+ %vecext11647 = extractelement <4 x float> undef, i32 0
+ %add11648 = fadd float %vecext11647, 0x40775999A0000000
+ %tmp1110 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext11650 = extractelement <4 x float> undef, i32 1
+ %tmp1111 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins11784 = insertelement <4 x float> %tmp1111, float undef, i32 3
+ store <4 x float> %vecins11784, <4 x float>* undef, align 16
+ store <4 x float> <float 1.605000e+02, float 0x4068366660000000, float 2.820000e+02, float 0x407CF66660000000>, <4 x float>* %.compoundliteral11785
+ %tmp1112 = load <4 x float>, <4 x float>* %.compoundliteral11785
+ %add11786 = fadd <4 x float> undef, %tmp1112
+ store <4 x float> %add11786, <4 x float>* undef, align 16
+ %tmp1113 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext11787 = extractelement <4 x float> %tmp1113, i32 0
+ %vecext11807 = extractelement <4 x float> undef, i32 2
+ %add11808 = fadd float %vecext11807, 4.535000e+02
+ %tmp1114 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext11810 = extractelement <4 x float> undef, i32 3
+ %add11811 = fadd float %vecext11810, 0x4068F66660000000
+ %tmp1115 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins11812 = insertelement <4 x float> %tmp1115, float %add11811, i32 3
+ store <4 x float> %vecins11812, <4 x float>* undef, align 16
+ %tmp1116 = load <4 x float>, <4 x float>* undef
+ %tmp1117 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext11958 = extractelement <4 x float> undef, i32 1
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %vecext11961 = extractelement <4 x float> undef, i32 2
+ %add11962 = fadd float %vecext11961, -3.680000e+02
+ %tmp1118 = load <4 x float>, <4 x float>* undef, align 16
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %add11965 = fadd float undef, 0x4061133340000000
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %tmp1119 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext11975 = extractelement <4 x float> %tmp1119, i32 2
+ %tmp1120 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins11977 = insertelement <4 x float> %tmp1120, float undef, i32 2
+ store <4 x float> %vecins11977, <4 x float>* undef, align 16
+ %vecext11978 = extractelement <4 x float> undef, i32 3
+ %add11979 = fadd float %vecext11978, 0xC0688999A0000000
+ %tmp1121 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins11980 = insertelement <4 x float> %tmp1121, float %add11979, i32 3
+ store <4 x float> %vecins11980, <4 x float>* undef, align 16
+ %add11982 = fadd <4 x float> undef, undef
+ store <4 x float> %add11982, <4 x float>* undef, align 16
+ %tmp1122 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext11983 = extractelement <4 x float> %tmp1122, i32 0
+ %add11984 = fadd float %vecext11983, 0xC075966660000000
+ %tmp1123 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins12005 = insertelement <4 x float> undef, float undef, i32 2
+ store <4 x float> %vecins12005, <4 x float>* undef, align 16
+ %tmp1124 = load <4 x float>, <4 x float>* undef, align 16
+ %add12007 = fadd float undef, 0xC07124CCC0000000
+ %vecins12008 = insertelement <4 x float> undef, float %add12007, i32 3
+ store <4 x float> %vecins12008, <4 x float>* undef, align 16
+ %tmp1125 = load <4 x float>, <4 x float>* undef, align 16
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %tmp1126 = load <4 x float>, <4 x float>* undef, align 16
+ %add12012 = fadd float undef, 0xC0750CCCC0000000
+ %tmp1127 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins12013 = insertelement <4 x float> %tmp1127, float %add12012, i32 0
+ store <4 x float> %vecins12013, <4 x float>* undef, align 16
+ %tmp1128 = load <4 x float>, <4 x float>* undef, align 16
+ %add12015 = fadd float undef, 0x4079CE6660000000
+ %tmp1129 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins12016 = insertelement <4 x float> %tmp1129, float %add12015, i32 1
+ store <4 x float> %vecins12016, <4 x float>* undef, align 16
+ %add12018 = fadd float undef, 3.555000e+02
+ %tmp1130 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins12019 = insertelement <4 x float> %tmp1130, float %add12018, i32 2
+ %tmp1131 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext12020 = extractelement <4 x float> %tmp1131, i32 3
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %vecext12028 = extractelement <4 x float> undef, i32 1
+ store <4 x float> undef, <4 x float>* undef, align 16
+ store <4 x float> <float 0x40791999A0000000, float 0x407C7CCCC0000000, float 0x4070F33340000000, float 0xC056ECCCC0000000>, <4 x float>* undef
+ %tmp1132 = load <4 x float>, <4 x float>* undef, align 16
+ %add12038 = fadd <4 x float> %tmp1132, undef
+ %tmp1133 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext12042 = extractelement <4 x float> %tmp1133, i32 1
+ %add12043 = fadd float %vecext12042, 0x402F9999A0000000
+ %tmp1134 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins12044 = insertelement <4 x float> %tmp1134, float %add12043, i32 1
+ store <4 x float> %vecins12044, <4 x float>* undef, align 16
+ %vecext12045 = extractelement <4 x float> undef, i32 2
+ %add12046 = fadd float %vecext12045, 0xC07EF33340000000
+ %tmp1135 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins12047 = insertelement <4 x float> %tmp1135, float %add12046, i32 2
+ store <4 x float> %vecins12047, <4 x float>* undef, align 16
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %tmp1136 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext12112 = extractelement <4 x float> %tmp1136, i32 1
+ %tmp1137 = load <4 x float>, <4 x float>* undef, align 16
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %add12116 = fadd float undef, 0xC074F4CCC0000000
+ %tmp1138 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins12117 = insertelement <4 x float> %tmp1138, float %add12116, i32 2
+ store <4 x float> %vecins12117, <4 x float>* undef, align 16
+ %tmp1139 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext12118 = extractelement <4 x float> %tmp1139, i32 3
+ %add12119 = fadd float %vecext12118, 0xC0638CCCC0000000
+ %tmp1140 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins12120 = insertelement <4 x float> %tmp1140, float %add12119, i32 3
+ %add12152 = fadd float undef, 0x4039333340000000
+ %tmp1141 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins12153 = insertelement <4 x float> %tmp1141, float %add12152, i32 0
+ %vecext12154 = extractelement <4 x float> undef, i32 1
+ %add12155 = fadd float %vecext12154, 0xC07BBB3340000000
+ %tmp1142 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins12156 = insertelement <4 x float> %tmp1142, float %add12155, i32 1
+ %tmp1143 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext12157 = extractelement <4 x float> %tmp1143, i32 2
+ %add12158 = fadd float %vecext12157, 0xC0428CCCC0000000
+ %tmp1144 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins12159 = insertelement <4 x float> %tmp1144, float %add12158, i32 2
+ %tmp1145 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext12160 = extractelement <4 x float> %tmp1145, i32 3
+ %add12161 = fadd float %vecext12160, 0x407B1999A0000000
+ %tmp1146 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins12162 = insertelement <4 x float> %tmp1146, float %add12161, i32 3
+ store <4 x float> %vecins12162, <4 x float>* undef, align 16
+ %tmp1147 = load <4 x float>, <4 x float>* undef
+ %tmp1148 = load <4 x float>, <4 x float>* undef, align 16
+ %tmp1149 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext12182 = extractelement <4 x float> %tmp1149, i32 1
+ %tmp1150 = load <4 x float>, <4 x float>* undef, align 16
+ store <4 x float> undef, <4 x float>* undef, align 16
+ store <4 x float> <float 0x4061833340000000, float 0x405CA66660000000, float -1.275000e+02, float 0x405BC66660000000>, <4 x float>* undef
+ %add12208 = fadd float undef, 0x407854CCC0000000
+ %tmp1151 = load <4 x float>, <4 x float>* undef, align 16
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %tmp1152 = load <4 x float>, <4 x float>* undef, align 16
+ %tmp1153 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins12218 = insertelement <4 x float> undef, float undef, i32 3
+ store <4 x float> %vecins12218, <4 x float>* undef, align 16
+ store <4 x float> <float 0x407C3CCCC0000000, float 0xC057C66660000000, float 2.605000e+02, float 0xC07974CCC0000000>, <4 x float>* undef
+ %tmp1154 = load <4 x float>, <4 x float>* undef
+ %tmp1155 = load <4 x float>, <4 x float>* undef, align 16
+ %add12220 = fadd <4 x float> %tmp1155, %tmp1154
+ %tmp1156 = load <4 x float>, <4 x float>* undef, align 16
+ %tmp1157 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins12223 = insertelement <4 x float> %tmp1157, float undef, i32 0
+ store <4 x float> %vecins12223, <4 x float>* undef, align 16
+ %tmp1158 = load <4 x float>, <4 x float>* undef, align 16
+ %add12242 = fadd float undef, 0x4067E33340000000
+ %tmp1159 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins12243 = insertelement <4 x float> %tmp1159, float %add12242, i32 2
+ store <4 x float> %vecins12243, <4 x float>* undef, align 16
+ %tmp1160 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext12244 = extractelement <4 x float> %tmp1160, i32 3
+ %add12245 = fadd float %vecext12244, 0x4071AE6660000000
+ %tmp1161 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins12246 = insertelement <4 x float> %tmp1161, float %add12245, i32 3
+ store <4 x float> %vecins12246, <4 x float>* undef, align 16
+ store <4 x float> <float -4.880000e+02, float 0xC079966660000000, float -8.450000e+01, float 0xC0464CCCC0000000>, <4 x float>* %.compoundliteral12247
+ %tmp1162 = load <4 x float>, <4 x float>* %.compoundliteral12247
+ %tmp1163 = load <4 x float>, <4 x float>* undef, align 16
+ %add12248 = fadd <4 x float> %tmp1163, %tmp1162
+ store <4 x float> %add12248, <4 x float>* undef, align 16
+ %tmp1164 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext12249 = extractelement <4 x float> %tmp1164, i32 0
+ %add12250 = fadd float %vecext12249, 1.075000e+02
+ %tmp1165 = load <4 x float>, <4 x float>* undef, align 16
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %tmp1166 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext12252 = extractelement <4 x float> %tmp1166, i32 1
+ %add12253 = fadd float %vecext12252, 0xC0662CCCC0000000
+ %tmp1167 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins12254 = insertelement <4 x float> %tmp1167, float %add12253, i32 1
+ store <4 x float> %vecins12254, <4 x float>* undef, align 16
+ %tmp1168 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext12255 = extractelement <4 x float> %tmp1168, i32 2
+ %add12256 = fadd float %vecext12255, 0x40554CCCC0000000
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %add13141 = fadd float undef, 0x40768999A0000000
+ %tmp1169 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13142 = insertelement <4 x float> %tmp1169, float %add13141, i32 3
+ store <4 x float> %vecins13142, <4 x float>* undef, align 16
+ %tmp1170 = load <4 x float>, <4 x float>* undef
+ %add13144 = fadd <4 x float> undef, %tmp1170
+ store <4 x float> %add13144, <4 x float>* undef, align 16
+ %tmp1171 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext13145 = extractelement <4 x float> %tmp1171, i32 0
+ %add13146 = fadd float %vecext13145, 3.975000e+02
+ %tmp1172 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext13378 = extractelement <4 x float> %tmp1172, i32 3
+ %add13379 = fadd float %vecext13378, 0xC053B33340000000
+ %tmp1173 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13380 = insertelement <4 x float> %tmp1173, float %add13379, i32 3
+ store <4 x float> %vecins13380, <4 x float>* undef, align 16
+ %tmp1174 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13408 = insertelement <4 x float> %tmp1174, float undef, i32 3
+ store <4 x float> %vecins13408, <4 x float>* undef, align 16
+ store <4 x float> <float 0xC0455999A0000000, float 0xC07D366660000000, float 4.240000e+02, float -1.670000e+02>, <4 x float>* undef
+ %tmp1175 = load <4 x float>, <4 x float>* undef
+ %tmp1176 = load <4 x float>, <4 x float>* undef, align 16
+ %add13410 = fadd <4 x float> %tmp1176, %tmp1175
+ store <4 x float> %add13410, <4 x float>* undef, align 16
+ %tmp1177 = load <4 x float>, <4 x float>* undef, align 16
+ %add13412 = fadd float undef, 0xC0708999A0000000
+ %tmp1178 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13413 = insertelement <4 x float> %tmp1178, float %add13412, i32 0
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %vecext13428 = extractelement <4 x float> undef, i32 1
+ %add13429 = fadd float %vecext13428, 0xC063BCCCC0000000
+ %tmp1179 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13430 = insertelement <4 x float> %tmp1179, float %add13429, i32 1
+ store <4 x float> %vecins13430, <4 x float>* undef, align 16
+ %tmp1180 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext13431 = extractelement <4 x float> %tmp1180, i32 2
+ %vecins13433 = insertelement <4 x float> undef, float undef, i32 2
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %add13449 = fadd float undef, 4.590000e+02
+ %tmp1181 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13450 = insertelement <4 x float> %tmp1181, float %add13449, i32 3
+ store <4 x float> %vecins13450, <4 x float>* undef, align 16
+ store <4 x float> <float 0xC073A66660000000, float 0xC041B33340000000, float 0x4066233340000000, float 0x4071C33340000000>, <4 x float>* undef
+ %tmp1182 = load <4 x float>, <4 x float>* undef
+ %tmp1183 = load <4 x float>, <4 x float>* undef, align 16
+ %add13452 = fadd <4 x float> %tmp1183, %tmp1182
+ store <4 x float> %add13452, <4 x float>* undef, align 16
+ %tmp1184 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext13453 = extractelement <4 x float> %tmp1184, i32 0
+ %add13454 = fadd float %vecext13453, 0xC072866660000000
+ %tmp1185 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13455 = insertelement <4 x float> %tmp1185, float %add13454, i32 0
+ %add13471 = fadd float undef, 0xC0556CCCC0000000
+ %tmp1186 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13472 = insertelement <4 x float> %tmp1186, float %add13471, i32 1
+ store <4 x float> %vecins13472, <4 x float>* undef, align 16
+ %tmp1187 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext13473 = extractelement <4 x float> %tmp1187, i32 2
+ %add13474 = fadd float %vecext13473, 0xC0786999A0000000
+ %tmp1188 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13475 = insertelement <4 x float> %tmp1188, float %add13474, i32 2
+ store <4 x float> %vecins13475, <4 x float>* undef, align 16
+ %add13477 = fadd float undef, 0xC07C3E6660000000
+ %tmp1189 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13478 = insertelement <4 x float> %tmp1189, float %add13477, i32 3
+ store <4 x float> %vecins13478, <4 x float>* undef, align 16
+ store <4 x float> <float -4.740000e+02, float 0x4023CCCCC0000000, float 0xC05C266660000000, float 0x407B7199A0000000>, <4 x float>* undef
+ %tmp1190 = load <4 x float>, <4 x float>* undef, align 16
+ %add13480 = fadd <4 x float> %tmp1190, undef
+ store <4 x float> %add13480, <4 x float>* undef, align 16
+ %tmp1191 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext13481 = extractelement <4 x float> %tmp1191, i32 0
+ %add13482 = fadd float %vecext13481, 0xC07BA4CCC0000000
+ %tmp1192 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13483 = insertelement <4 x float> %tmp1192, float %add13482, i32 0
+ store <4 x float> %vecins13483, <4 x float>* undef, align 16
+ %tmp1193 = load <4 x float>, <4 x float>* undef, align 16
+ %add13485 = fadd float undef, 0x406B1999A0000000
+ %tmp1194 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13486 = insertelement <4 x float> %tmp1194, float %add13485, i32 1
+ store <4 x float> %vecins13486, <4 x float>* undef, align 16
+ %tmp1195 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext13487 = extractelement <4 x float> %tmp1195, i32 2
+ %add13488 = fadd float %vecext13487, 0x40647999A0000000
+ %tmp1196 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13489 = insertelement <4 x float> %tmp1196, float %add13488, i32 2
+ store <4 x float> %vecins13489, <4 x float>* undef, align 16
+ %tmp1197 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext13490 = extractelement <4 x float> %tmp1197, i32 3
+ %tmp1198 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13492 = insertelement <4 x float> %tmp1198, float undef, i32 3
+ store <4 x float> %vecins13492, <4 x float>* undef, align 16
+ %tmp1199 = load <4 x float>, <4 x float>* %.compoundliteral13493
+ %tmp1200 = load <4 x float>, <4 x float>* undef, align 16
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %vecins13548 = insertelement <4 x float> undef, float undef, i32 3
+ store <4 x float> <float 4.540000e+02, float 3.760000e+02, float 0x406EA33340000000, float 0x405AACCCC0000000>, <4 x float>* %.compoundliteral13549
+ %tmp1201 = load <4 x float>, <4 x float>* undef, align 16
+ %add13552 = fadd float undef, 3.230000e+02
+ %tmp1202 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13553 = insertelement <4 x float> %tmp1202, float %add13552, i32 0
+ %tmp1203 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext13554 = extractelement <4 x float> %tmp1203, i32 1
+ %tmp1204 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13556 = insertelement <4 x float> %tmp1204, float undef, i32 1
+ store <4 x float> %vecins13556, <4 x float>* undef, align 16
+ %tmp1205 = load <4 x float>, <4 x float>* undef, align 16
+ %add13558 = fadd float undef, 2.625000e+02
+ %tmp1206 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13559 = insertelement <4 x float> %tmp1206, float %add13558, i32 2
+ store <4 x float> %vecins13559, <4 x float>* undef, align 16
+ %add13575 = fadd float undef, -4.725000e+02
+ %tmp1207 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13576 = insertelement <4 x float> %tmp1207, float %add13575, i32 3
+ store <4 x float> %vecins13576, <4 x float>* undef, align 16
+ store <4 x float> <float 0x40334CCCC0000000, float 0xC0785CCCC0000000, float 0xC078D66660000000, float 3.745000e+02>, <4 x float>* undef
+ %tmp1208 = load <4 x float>, <4 x float>* undef
+ %tmp1209 = load <4 x float>, <4 x float>* undef, align 16
+ %add13578 = fadd <4 x float> %tmp1209, %tmp1208
+ store <4 x float> %add13578, <4 x float>* undef, align 16
+ %tmp1210 = load <4 x float>, <4 x float>* undef, align 16
+ %tmp1211 = load <4 x float>, <4 x float>* undef, align 16
+ %add13592 = fadd <4 x float> %tmp1211, undef
+ store <4 x float> %add13592, <4 x float>* undef, align 16
+ %tmp1212 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext13593 = extractelement <4 x float> %tmp1212, i32 0
+ %add13594 = fadd float %vecext13593, 0xC0708B3340000000
+ %tmp1213 = load <4 x float>, <4 x float>* undef, align 16
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %tmp1214 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext13596 = extractelement <4 x float> %tmp1214, i32 1
+ %add13597 = fadd float %vecext13596, 0x40660999A0000000
+ %vecins13604 = insertelement <4 x float> undef, float undef, i32 3
+ store <4 x float> %vecins13604, <4 x float>* undef, align 16
+ store <4 x float> <float 0x407B4999A0000000, float 0xC067F66660000000, float 0xC068F999A0000000, float 0xC079233340000000>, <4 x float>* undef
+ %tmp1215 = load <4 x float>, <4 x float>* undef, align 16
+ %add13606 = fadd <4 x float> %tmp1215, undef
+ %tmp1216 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext13607 = extractelement <4 x float> %tmp1216, i32 0
+ %vecins13609 = insertelement <4 x float> undef, float undef, i32 0
+ %tmp1217 = load <4 x float>, <4 x float>* undef, align 16
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %tmp1218 = load <4 x float>, <4 x float>* undef, align 16
+ %add13622 = fadd float undef, -3.390000e+02
+ %vecins13623 = insertelement <4 x float> undef, float %add13622, i32 0
+ store <4 x float> %vecins13623, <4 x float>* undef, align 16
+ %tmp1219 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext13624 = extractelement <4 x float> %tmp1219, i32 1
+ %add13625 = fadd float %vecext13624, 0x405C3999A0000000
+ %vecext13627 = extractelement <4 x float> undef, i32 2
+ %add13628 = fadd float %vecext13627, 0xC067033340000000
+ %tmp1220 = load <4 x float>, <4 x float>* undef, align 16
+ %tmp1221 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext13630 = extractelement <4 x float> %tmp1221, i32 3
+ %add13631 = fadd float %vecext13630, 0xC060333340000000
+ %tmp1222 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13632 = insertelement <4 x float> %tmp1222, float %add13631, i32 3
+ store <4 x float> %vecins13632, <4 x float>* undef, align 16
+ store <4 x float> <float 0x4078D66660000000, float 0x4048B33340000000, float 0x4051466660000000, float -2.965000e+02>, <4 x float>* undef
+ %tmp1223 = load <4 x float>, <4 x float>* undef
+ %tmp1224 = load <4 x float>, <4 x float>* undef, align 16
+ %add13634 = fadd <4 x float> %tmp1224, %tmp1223
+ store <4 x float> %add13634, <4 x float>* undef, align 16
+ %vecext13635 = extractelement <4 x float> undef, i32 0
+ %add13636 = fadd float %vecext13635, 0x406A5999A0000000
+ %tmp1225 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13637 = insertelement <4 x float> %tmp1225, float %add13636, i32 0
+ store <4 x float> %vecins13637, <4 x float>* undef, align 16
+ %tmp1226 = load <4 x float>, <4 x float>* undef, align 16
+ %tmp1227 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13643 = insertelement <4 x float> %tmp1227, float undef, i32 2
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %tmp1228 = load <4 x float>, <4 x float>* undef, align 16
+ %add13785 = fadd float undef, 0x4068866660000000
+ %tmp1229 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13786 = insertelement <4 x float> %tmp1229, float %add13785, i32 3
+ store <4 x float> %vecins13786, <4 x float>* undef, align 16
+ store <4 x float> <float 0x407704CCC0000000, float 0x4047B33340000000, float 0x40797B3340000000, float 0xC0652CCCC0000000>, <4 x float>* %.compoundliteral13787
+ %tmp1230 = load <4 x float>, <4 x float>* undef, align 16
+ %add13788 = fadd <4 x float> %tmp1230, undef
+ %tmp1231 = load <4 x float>, <4 x float>* undef
+ %tmp1232 = load <4 x float>, <4 x float>* undef, align 16
+ %add13802 = fadd <4 x float> %tmp1232, %tmp1231
+ store <4 x float> %add13802, <4 x float>* undef, align 16
+ %tmp1233 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext13803 = extractelement <4 x float> %tmp1233, i32 0
+ %add13804 = fadd float %vecext13803, -2.900000e+01
+ %tmp1234 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13805 = insertelement <4 x float> %tmp1234, float %add13804, i32 0
+ store <4 x float> %vecins13805, <4 x float>* undef, align 16
+ %tmp1235 = load <4 x float>, <4 x float>* undef, align 16
+ %add13807 = fadd float undef, 6.400000e+01
+ %tmp1236 = load <4 x float>, <4 x float>* undef, align 16
+ %tmp1237 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext13809 = extractelement <4 x float> %tmp1237, i32 2
+ %tmp1238 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext13812 = extractelement <4 x float> %tmp1238, i32 3
+ %add13813 = fadd float %vecext13812, -3.615000e+02
+ %vecins13814 = insertelement <4 x float> undef, float %add13813, i32 3
+ store <4 x float> %vecins13814, <4 x float>* undef, align 16
+ store <4 x float> <float -2.270000e+02, float -1.500000e+01, float 0x407084CCC0000000, float -1.425000e+02>, <4 x float>* undef
+ %tmp1239 = load <4 x float>, <4 x float>* undef
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %tmp1240 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext13817 = extractelement <4 x float> %tmp1240, i32 0
+ %vecins13856 = insertelement <4 x float> undef, float undef, i32 3
+ store <4 x float> %vecins13856, <4 x float>* undef, align 16
+ store <4 x float> <float 0x40656CCCC0000000, float 0xC0656999A0000000, float 0x40778E6660000000, float 0x407ECE6660000000>, <4 x float>* undef
+ %tmp1241 = load <4 x float>, <4 x float>* undef
+ %tmp1242 = load <4 x float>, <4 x float>* undef, align 16
+ store <4 x float> undef, <4 x float>* undef, align 16
+ %tmp1243 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext13859 = extractelement <4 x float> %tmp1243, i32 0
+ %tmp1244 = load <4 x float>, <4 x float>* undef, align 16
+ %vecins13861 = insertelement <4 x float> %tmp1244, float undef, i32 0
+ %tmp1245 = load <4 x float>, <4 x float>* undef, align 16
+ %vecext13862 = extractelement <4 x float> %tmp1245, i32 1
+ %add13863 = fadd float %vecext13862, -1.380000e+02
+ %vecins13864 = insertelement <4 x float> undef, float %add13863, i32 1
+ %vecins13867 = insertelement <4 x float> undef, float undef, i32 2
+ store <4 x float> %vecins13867, <4 x float>* undef, align 16
+ %tmp1246 = load <4 x float>, <4 x float>* undef, align 16
+ %tmp1247 = load <4 x float>, <4 x float>* undef, align 16
+ ret <4 x float> undef
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/ARM/thumb1-varalloc.ll b/test/CodeGen/ARM/thumb1-varalloc.ll
index 8d5888d38f97..0637be03d565 100644
--- a/test/CodeGen/ARM/thumb1-varalloc.ll
+++ b/test/CodeGen/ARM/thumb1-varalloc.ll
@@ -12,7 +12,7 @@ entry:
; CHECK-LABEL: foo:
%size = alloca i32, align 4
- %0 = load i8** @__bar, align 4
+ %0 = load i8*, i8** @__bar, align 4
%1 = icmp eq i8* %0, null
br i1 %1, label %bb1, label %bb3
; CHECK: bne
@@ -22,7 +22,7 @@ bb1:
%2 = alloca [1026 x i8], align 1
; CHECK: mov [[R0:r[0-9]+]], sp
; CHECK: adds {{r[0-9]+}}, [[R0]], {{r[0-9]+}}
- %3 = getelementptr inbounds [1026 x i8]* %2, i32 0, i32 0
+ %3 = getelementptr inbounds [1026 x i8], [1026 x i8]* %2, i32 0, i32 0
%4 = call i32 @_called_func(i8* %3, i32* %size) nounwind
%5 = icmp eq i32 %4, 0
br i1 %5, label %bb2, label %bb3
@@ -43,26 +43,6 @@ bb3:
declare noalias i8* @strdup(i8* nocapture) nounwind
declare i32 @_called_func(i8*, i32*) nounwind
-; Variable ending up at unaligned offset from sp (i.e. not a multiple of 4)
-define void @test_local_var_addr() {
-; CHECK-LABEL: test_local_var_addr:
-
- %addr1 = alloca i8
- %addr2 = alloca i8
-
-; CHECK: mov r0, sp
-; CHECK: adds r0, #{{[0-9]+}}
-; CHECK: blx
- call void @take_ptr(i8* %addr1)
-
-; CHECK: mov r0, sp
-; CHECK: adds r0, #{{[0-9]+}}
-; CHECK: blx
- call void @take_ptr(i8* %addr2)
-
- ret void
-}
-
; Simple variable ending up *at* sp.
define void @test_simple_var() {
; CHECK-LABEL: test_simple_var:
@@ -126,14 +106,16 @@ define void @test_local_var_offset_1020() {
ret void
}
-; Max range addressable with tADDrSPi + tADDi8
-define void @test_local_var_offset_1275() {
-; CHECK-LABEL: test_local_var_offset_1275
+; Max range addressable with tADDrSPi + tADDi8 is 1275, however the automatic
+; 4-byte aligning of objects on the stack combined with 8-byte stack alignment
+; means that 1268 is the max offset we can use.
+define void @test_local_var_offset_1268() {
+; CHECK-LABEL: test_local_var_offset_1268
%addr1 = alloca i8, i32 1
- %addr2 = alloca i8, i32 1275
+ %addr2 = alloca i8, i32 1268
; CHECK: add r0, sp, #1020
-; CHECK: adds r0, #255
+; CHECK: adds r0, #248
; CHECK-NEXT: blx
call void @take_ptr(i8* %addr1)
diff --git a/test/CodeGen/ARM/thumb1_return_sequence.ll b/test/CodeGen/ARM/thumb1_return_sequence.ll
index 318e6e402370..9c62faeaa684 100644
--- a/test/CodeGen/ARM/thumb1_return_sequence.ll
+++ b/test/CodeGen/ARM/thumb1_return_sequence.ll
@@ -3,7 +3,7 @@
; CHECK-V4T-LABEL: clobberframe
; CHECK-V5T-LABEL: clobberframe
-define <4 x i32> @clobberframe() #0 {
+define <4 x i32> @clobberframe(<6 x i32>* %p) #0 {
entry:
; Prologue
; --------
@@ -11,11 +11,12 @@ entry:
; CHECK-V4T: sub sp,
; CHECK-V5T: push {[[SAVED:(r[4567](, )?)+]], lr}
- %b = alloca <4 x i32>, align 16
+ %b = alloca <6 x i32>, align 16
%a = alloca <4 x i32>, align 16
- store <4 x i32> <i32 42, i32 42, i32 42, i32 42>, <4 x i32>* %b, align 16
+ %stuff = load <6 x i32>, <6 x i32>* %p, align 16
+ store <6 x i32> %stuff, <6 x i32>* %b, align 16
store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* %a, align 16
- %0 = load <4 x i32>* %a, align 16
+ %0 = load <4 x i32>, <4 x i32>* %a, align 16
ret <4 x i32> %0
; Epilogue
@@ -45,7 +46,7 @@ entry:
%a = alloca <4 x i32>, align 16
store <4 x i32> <i32 42, i32 42, i32 42, i32 42>, <4 x i32>* %b, align 16
store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* %a, align 16
- %0 = load <4 x i32>* %a, align 16
+ %0 = load <4 x i32>, <4 x i32>* %a, align 16
call void @llvm.va_start(i8* null)
ret <4 x i32> %0
@@ -70,40 +71,25 @@ entry:
; CHECK-V4T-LABEL: simpleframe
; CHECK-V5T-LABEL: simpleframe
-define i32 @simpleframe() #0 {
+define i32 @simpleframe(<6 x i32>* %p) #0 {
entry:
; Prologue
; --------
; CHECK-V4T: push {[[SAVED:(r[4567](, )?)+]], lr}
; CHECK-V5T: push {[[SAVED:(r[4567](, )?)+]], lr}
- %a = alloca i32, align 4
- %b = alloca i32, align 4
- %c = alloca i32, align 4
- %d = alloca i32, align 4
- store i32 1, i32* %a, align 4
- store i32 2, i32* %b, align 4
- store i32 3, i32* %c, align 4
- store i32 4, i32* %d, align 4
- %0 = load i32* %a, align 4
- %inc = add nsw i32 %0, 1
- store i32 %inc, i32* %a, align 4
- %1 = load i32* %b, align 4
- %inc1 = add nsw i32 %1, 1
- store i32 %inc1, i32* %b, align 4
- %2 = load i32* %c, align 4
- %inc2 = add nsw i32 %2, 1
- store i32 %inc2, i32* %c, align 4
- %3 = load i32* %d, align 4
- %inc3 = add nsw i32 %3, 1
- store i32 %inc3, i32* %d, align 4
- %4 = load i32* %a, align 4
- %5 = load i32* %b, align 4
- %add = add nsw i32 %4, %5
- %6 = load i32* %c, align 4
- %add4 = add nsw i32 %add, %6
- %7 = load i32* %d, align 4
- %add5 = add nsw i32 %add4, %7
+ %0 = load <6 x i32>, <6 x i32>* %p, align 16
+ %1 = extractelement <6 x i32> %0, i32 0
+ %2 = extractelement <6 x i32> %0, i32 1
+ %3 = extractelement <6 x i32> %0, i32 2
+ %4 = extractelement <6 x i32> %0, i32 3
+ %5 = extractelement <6 x i32> %0, i32 4
+ %6 = extractelement <6 x i32> %0, i32 5
+ %add1 = add nsw i32 %1, %2
+ %add2 = add nsw i32 %add1, %3
+ %add3 = add nsw i32 %add2, %4
+ %add4 = add nsw i32 %add3, %5
+ %add5 = add nsw i32 %add4, %6
ret i32 %add5
; Epilogue
@@ -135,24 +121,24 @@ entry:
store i32 2, i32* %b, align 4
store i32 3, i32* %c, align 4
store i32 4, i32* %d, align 4
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* %a, align 4
- %1 = load i32* %b, align 4
+ %1 = load i32, i32* %b, align 4
%inc1 = add nsw i32 %1, 1
store i32 %inc1, i32* %b, align 4
- %2 = load i32* %c, align 4
+ %2 = load i32, i32* %c, align 4
%inc2 = add nsw i32 %2, 1
store i32 %inc2, i32* %c, align 4
- %3 = load i32* %d, align 4
+ %3 = load i32, i32* %d, align 4
%inc3 = add nsw i32 %3, 1
store i32 %inc3, i32* %d, align 4
- %4 = load i32* %a, align 4
- %5 = load i32* %b, align 4
+ %4 = load i32, i32* %a, align 4
+ %5 = load i32, i32* %b, align 4
%add = add nsw i32 %4, %5
- %6 = load i32* %c, align 4
+ %6 = load i32, i32* %c, align 4
%add4 = add nsw i32 %add, %6
- %7 = load i32* %d, align 4
+ %7 = load i32, i32* %d, align 4
%add5 = add nsw i32 %add4, %7
%add6 = add nsw i32 %add5, %i
call void @llvm.va_start(i8* null)
diff --git a/test/CodeGen/ARM/thumb2-size-reduction-internal-flags.ll b/test/CodeGen/ARM/thumb2-size-reduction-internal-flags.ll
new file mode 100644
index 000000000000..578777f97c50
--- /dev/null
+++ b/test/CodeGen/ARM/thumb2-size-reduction-internal-flags.ll
@@ -0,0 +1,173 @@
+; RUN: llc %s -o - -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7s-apple-ios8.0.0"
+
+%struct.cells = type { i32, i32, %struct.cells* }
+
+@reg_len = external global i32, align 4
+
+; The thumb2 size reduction pass commutes arguments to make the first src of an add the same as the dest.
+; It needs to also move the internal flag when commuting arguments.
+
+; CHECK-LABEL: @simulate
+
+; Function Attrs: nounwind optsize ssp
+define i32 @simulate(i32 %iterations, %struct.cells* nocapture %present, double %prob, i8* nocapture readonly %structure) {
+entry:
+ %0 = load i32, i32* @reg_len, align 4, !tbaa !3
+ %sub = add nsw i32 %0, -1
+ %div = sdiv i32 %sub, 31
+ %rem2 = srem i32 %sub, 31
+ %cmp35202 = icmp sgt i32 %rem2, 0
+ br label %for.cond3.preheader
+
+for.cond3.preheader: ; preds = %if.end85, %entry
+ %call192 = tail call i32 @lrand48() #2
+ br label %for.cond6.preheader
+
+for.cond34.preheader: ; preds = %for.inc30
+ br i1 %cmp35202, label %for.body37, label %for.end73
+
+for.cond6.preheader: ; preds = %for.inc30, %for.cond3.preheader
+ %call197 = phi i32 [ %call, %for.inc30 ], [ %call192, %for.cond3.preheader ]
+ %i.0196 = phi i32 [ %inc31, %for.inc30 ], [ 0, %for.cond3.preheader ]
+ %temp.1195 = phi %struct.cells* [ %5, %for.inc30 ], [ %present, %for.cond3.preheader ]
+ %savefaulty.0194 = phi i32 [ %add12, %for.inc30 ], [ 0, %for.cond3.preheader ]
+ %savef_free.0193 = phi i32 [ %add11, %for.inc30 ], [ 0, %for.cond3.preheader ]
+ br label %for.body8
+
+for.body8: ; preds = %for.body8, %for.cond6.preheader
+ %randv.0190 = phi i32 [ %call197, %for.cond6.preheader ], [ %shr, %for.body8 ]
+ %j.0189 = phi i32 [ 0, %for.cond6.preheader ], [ %inc, %for.body8 ]
+ %temp.2188 = phi %struct.cells* [ %temp.1195, %for.cond6.preheader ], [ %5, %for.body8 ]
+ %savefaulty.1187 = phi i32 [ %savefaulty.0194, %for.cond6.preheader ], [ %add12, %for.body8 ]
+ %savef_free.1186 = phi i32 [ %savef_free.0193, %for.cond6.preheader ], [ %add11, %for.body8 ]
+ %f_free = getelementptr inbounds %struct.cells, %struct.cells* %temp.2188, i32 0, i32 0
+ %1 = load i32, i32* %f_free, align 4, !tbaa !7
+ %add11 = add nsw i32 %1, %savef_free.1186
+ %faulty = getelementptr inbounds %struct.cells, %struct.cells* %temp.2188, i32 0, i32 1
+ %2 = load i32, i32* %faulty, align 4, !tbaa !10
+ %add12 = add nsw i32 %2, %savefaulty.1187
+ %next = getelementptr inbounds %struct.cells, %struct.cells* %temp.2188, i32 0, i32 2
+ %3 = load %struct.cells*, %struct.cells** %next, align 4, !tbaa !11
+ %f_free13 = getelementptr inbounds %struct.cells, %struct.cells* %3, i32 0, i32 0
+ %4 = load i32, i32* %f_free13, align 4, !tbaa !7
+ %add14 = add nsw i32 %4, %randv.0190
+ %and = and i32 %add14, 1
+ store i32 %and, i32* %f_free, align 4, !tbaa !7
+ %call16 = tail call i32 @lrand48() #2
+ %rem17 = srem i32 %call16, 1000
+ %conv18 = sitofp i32 %rem17 to double
+ %div19 = fdiv double %conv18, 1.000000e+03
+ %cmp20 = fcmp olt double %div19, %prob
+ %xor = zext i1 %cmp20 to i32
+ %randv.1 = xor i32 %xor, %randv.0190
+ %5 = load %struct.cells*, %struct.cells** %next, align 4, !tbaa !11
+ %faulty25 = getelementptr inbounds %struct.cells, %struct.cells* %5, i32 0, i32 1
+ %6 = load i32, i32* %faulty25, align 4, !tbaa !10
+ %add26 = add nsw i32 %randv.1, %6
+ %and27 = and i32 %add26, 1
+ store i32 %and27, i32* %faulty, align 4, !tbaa !10
+ %shr = ashr i32 %randv.0190, 1
+ %inc = add nuw nsw i32 %j.0189, 1
+ %exitcond = icmp eq i32 %inc, 31
+ br i1 %exitcond, label %for.inc30, label %for.body8
+
+for.inc30: ; preds = %for.body8
+ %inc31 = add nuw nsw i32 %i.0196, 1
+ %cmp4 = icmp slt i32 %inc31, %div
+ %call = tail call i32 @lrand48() #2
+ br i1 %cmp4, label %for.cond6.preheader, label %for.cond34.preheader
+
+for.body37: ; preds = %for.body37, %for.cond34.preheader
+ %randv.2207 = phi i32 [ %shr70, %for.body37 ], [ %call, %for.cond34.preheader ]
+ %temp.3205 = phi %struct.cells* [ %9, %for.body37 ], [ %5, %for.cond34.preheader ]
+ %f_free45 = getelementptr inbounds %struct.cells, %struct.cells* %temp.3205, i32 0, i32 0
+ %.pre220 = getelementptr inbounds %struct.cells, %struct.cells* %temp.3205, i32 0, i32 1
+ %next50 = getelementptr inbounds %struct.cells, %struct.cells* %temp.3205, i32 0, i32 2
+ %7 = load %struct.cells*, %struct.cells** %next50, align 4, !tbaa !11
+ %f_free51 = getelementptr inbounds %struct.cells, %struct.cells* %7, i32 0, i32 0
+ %8 = load i32, i32* %f_free51, align 4, !tbaa !7
+ %add52 = add nsw i32 %8, %randv.2207
+ %and53 = and i32 %add52, 1
+ store i32 %and53, i32* %f_free45, align 4, !tbaa !7
+ %call55 = tail call i32 @lrand48() #2
+ %rem56 = srem i32 %call55, 1000
+ %conv57 = sitofp i32 %rem56 to double
+ %div58 = fdiv double %conv57, 1.000000e+03
+ %cmp59 = fcmp olt double %div58, %prob
+ %xor62 = zext i1 %cmp59 to i32
+ %randv.3 = xor i32 %xor62, %randv.2207
+ %9 = load %struct.cells*, %struct.cells** %next50, align 4, !tbaa !11
+ %faulty65 = getelementptr inbounds %struct.cells, %struct.cells* %9, i32 0, i32 1
+ %10 = load i32, i32* %faulty65, align 4, !tbaa !10
+ %add66 = add nsw i32 %randv.3, %10
+ %and67 = and i32 %add66, 1
+ store i32 %and67, i32* %.pre220, align 4, !tbaa !10
+ %shr70 = ashr i32 %randv.2207, 1
+ br label %for.body37
+
+for.end73: ; preds = %for.cond34.preheader
+ %call74 = tail call i32 @lrand48() #2
+ %11 = load i32, i32* @reg_len, align 4, !tbaa !3
+ %sub75 = add nsw i32 %11, -1
+ %arrayidx76 = getelementptr inbounds i8, i8* %structure, i32 %sub75
+ %12 = load i8, i8* %arrayidx76, align 1, !tbaa !12
+ %cmp78 = icmp eq i8 %12, 49
+ %f_free81 = getelementptr inbounds %struct.cells, %struct.cells* %5, i32 0, i32 0
+ br i1 %cmp78, label %if.then80, label %for.end73.if.end85_crit_edge
+
+for.end73.if.end85_crit_edge: ; preds = %for.end73
+ %.pre222 = getelementptr inbounds %struct.cells, %struct.cells* %5, i32 0, i32 1
+ br label %if.end85
+
+if.then80: ; preds = %for.end73
+ %13 = load i32, i32* %f_free81, align 4, !tbaa !7
+ %add82 = add nsw i32 %13, %add11
+ %faulty83 = getelementptr inbounds %struct.cells, %struct.cells* %5, i32 0, i32 1
+ %14 = load i32, i32* %faulty83, align 4, !tbaa !10
+ %add84 = add nsw i32 %14, %add12
+ br label %if.end85
+
+if.end85: ; preds = %if.then80, %for.end73.if.end85_crit_edge
+ %faulty100.pre-phi = phi i32* [ %.pre222, %for.end73.if.end85_crit_edge ], [ %faulty83, %if.then80 ]
+ %savef_free.5 = phi i32 [ %add11, %for.end73.if.end85_crit_edge ], [ %add82, %if.then80 ]
+ %savefaulty.5 = phi i32 [ %add12, %for.end73.if.end85_crit_edge ], [ %add84, %if.then80 ]
+ %add86 = add nsw i32 %savef_free.5, %call74
+ %and87 = and i32 %add86, 1
+ store i32 %and87, i32* %f_free81, align 4, !tbaa !7
+ %call89 = tail call i32 @lrand48() #2
+ %rem90 = srem i32 %call89, 10000
+ %conv91 = sitofp i32 %rem90 to double
+ %div92 = fdiv double %conv91, 1.000000e+04
+ %cmp93 = fcmp olt double %div92, %prob
+ %xor96 = zext i1 %cmp93 to i32
+ %randv.4 = xor i32 %xor96, %call74
+ %add98 = add nsw i32 %randv.4, %savefaulty.5
+ %and99 = and i32 %add98, 1
+ store i32 %and99, i32* %faulty100.pre-phi, align 4, !tbaa !10
+ br label %for.cond3.preheader
+}
+
+; Function Attrs: optsize
+declare i32 @lrand48()
+
+attributes #2 = { nounwind optsize }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"min_enum_size", i32 4}
+!2 = !{!"clang version 3.7.0 (trunk 236243)"}
+!3 = !{!4, !4, i64 0}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!8, !4, i64 0}
+!8 = !{!"cells", !4, i64 0, !4, i64 4, !9, i64 8}
+!9 = !{!"any pointer", !5, i64 0}
+!10 = !{!8, !4, i64 4}
+!11 = !{!8, !9, i64 8}
+!12 = !{!5, !5, i64 0}
diff --git a/test/CodeGen/ARM/thumb_indirect_calls.ll b/test/CodeGen/ARM/thumb_indirect_calls.ll
index 16a55a882d9a..9f1950c743c0 100644
--- a/test/CodeGen/ARM/thumb_indirect_calls.ll
+++ b/test/CodeGen/ARM/thumb_indirect_calls.ll
@@ -6,7 +6,7 @@
; CHECK-LABEL foo:
define void @foo(i32 %x) {
entry:
- %0 = load void (i32)** @f, align 4
+ %0 = load void (i32)*, void (i32)** @f, align 4
tail call void %0(i32 %x)
ret void
diff --git a/test/CodeGen/ARM/tls1.ll b/test/CodeGen/ARM/tls1.ll
index b03f76b6ef08..d492522955ec 100644
--- a/test/CodeGen/ARM/tls1.ll
+++ b/test/CodeGen/ARM/tls1.ll
@@ -12,7 +12,7 @@
define i32 @f() {
entry:
- %tmp1 = load i32* @i ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* @i ; <i32> [#uses=1]
ret i32 %tmp1
}
diff --git a/test/CodeGen/ARM/tls2.ll b/test/CodeGen/ARM/tls2.ll
index 24b4794b061a..d522da8f5714 100644
--- a/test/CodeGen/ARM/tls2.ll
+++ b/test/CodeGen/ARM/tls2.ll
@@ -12,7 +12,7 @@ define i32 @f() {
; CHECK-PIC-LABEL: f:
; CHECK-PIC: __tls_get_addr
entry:
- %tmp1 = load i32* @i ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* @i ; <i32> [#uses=1]
ret i32 %tmp1
}
diff --git a/test/CodeGen/ARM/tls3.ll b/test/CodeGen/ARM/tls3.ll
index e0e944f70c5d..7e17b13a3c99 100644
--- a/test/CodeGen/ARM/tls3.ll
+++ b/test/CodeGen/ARM/tls3.ll
@@ -6,6 +6,6 @@
define i32 @main() {
entry:
- %tmp2 = load i32* getelementptr (%struct.anon* @teste, i32 0, i32 0), align 8 ; <i32> [#uses=1]
+ %tmp2 = load i32, i32* getelementptr (%struct.anon, %struct.anon* @teste, i32 0, i32 0), align 8 ; <i32> [#uses=1]
ret i32 %tmp2
}
diff --git a/test/CodeGen/ARM/trunc_ldr.ll b/test/CodeGen/ARM/trunc_ldr.ll
index 2ce9b894d55a..ca7ad9a2fc62 100644
--- a/test/CodeGen/ARM/trunc_ldr.ll
+++ b/test/CodeGen/ARM/trunc_ldr.ll
@@ -4,18 +4,18 @@
%struct.B = type { float, float, i32, i32, i32, [0 x i8] }
define i8 @f1(%struct.A* %d) {
- %tmp2 = getelementptr %struct.A* %d, i32 0, i32 4
+ %tmp2 = getelementptr %struct.A, %struct.A* %d, i32 0, i32 4
%tmp23 = bitcast i16* %tmp2 to i32*
- %tmp4 = load i32* %tmp23
+ %tmp4 = load i32, i32* %tmp23
%tmp512 = lshr i32 %tmp4, 24
%tmp56 = trunc i32 %tmp512 to i8
ret i8 %tmp56
}
define i32 @f2(%struct.A* %d) {
- %tmp2 = getelementptr %struct.A* %d, i32 0, i32 4
+ %tmp2 = getelementptr %struct.A, %struct.A* %d, i32 0, i32 4
%tmp23 = bitcast i16* %tmp2 to i32*
- %tmp4 = load i32* %tmp23
+ %tmp4 = load i32, i32* %tmp23
%tmp512 = lshr i32 %tmp4, 24
%tmp56 = trunc i32 %tmp512 to i8
%tmp57 = sext i8 %tmp56 to i32
diff --git a/test/CodeGen/ARM/truncstore-dag-combine.ll b/test/CodeGen/ARM/truncstore-dag-combine.ll
index 360e3e13f59e..11fa022451f6 100644
--- a/test/CodeGen/ARM/truncstore-dag-combine.ll
+++ b/test/CodeGen/ARM/truncstore-dag-combine.ll
@@ -3,7 +3,7 @@
define void @bar(i8* %P, i16* %Q) {
entry:
%P1 = bitcast i8* %P to i16* ; <i16*> [#uses=1]
- %tmp = load i16* %Q, align 1 ; <i16> [#uses=1]
+ %tmp = load i16, i16* %Q, align 1 ; <i16> [#uses=1]
store i16 %tmp, i16* %P1, align 1
ret void
}
@@ -11,7 +11,7 @@ entry:
define void @foo(i8* %P, i32* %Q) {
entry:
%P1 = bitcast i8* %P to i32* ; <i32*> [#uses=1]
- %tmp = load i32* %Q, align 1 ; <i32> [#uses=1]
+ %tmp = load i32, i32* %Q, align 1 ; <i32> [#uses=1]
store i32 %tmp, i32* %P1, align 1
ret void
}
diff --git a/test/CodeGen/ARM/twoaddrinstr.ll b/test/CodeGen/ARM/twoaddrinstr.ll
index 01df3b42d107..97a49334b742 100644
--- a/test/CodeGen/ARM/twoaddrinstr.ll
+++ b/test/CodeGen/ARM/twoaddrinstr.ll
@@ -12,7 +12,7 @@ define void @PR13378() nounwind {
; CHECK-NEXT: vst1.32
entry:
- %0 = load <4 x float>* undef, align 4
+ %0 = load <4 x float>, <4 x float>* undef, align 4
store <4 x float> zeroinitializer, <4 x float>* undef, align 4
store <4 x float> %0, <4 x float>* undef, align 4
%1 = insertelement <4 x float> %0, float 1.000000e+00, i32 3
diff --git a/test/CodeGen/ARM/uint64tof64.ll b/test/CodeGen/ARM/uint64tof64.ll
index 32eb225a2ad6..cd35ce74d8ee 100644
--- a/test/CodeGen/ARM/uint64tof64.ll
+++ b/test/CodeGen/ARM/uint64tof64.ll
@@ -7,10 +7,10 @@
define fastcc void @t() {
entry:
- %0 = load i64* null, align 4 ; <i64> [#uses=1]
+ %0 = load i64, i64* null, align 4 ; <i64> [#uses=1]
%1 = uitofp i64 %0 to double ; <double> [#uses=1]
%2 = fdiv double 0.000000e+00, %1 ; <double> [#uses=1]
- %3 = call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* null, i8* getelementptr ([54 x i8]* @"\01LC10", i32 0, i32 0), i64 0, double %2) ; <i32> [#uses=0]
+ %3 = call i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* null, i8* getelementptr ([54 x i8], [54 x i8]* @"\01LC10", i32 0, i32 0), i64 0, double %2) ; <i32> [#uses=0]
ret void
}
diff --git a/test/CodeGen/ARM/umulo-32.ll b/test/CodeGen/ARM/umulo-32.ll
index 19875ce94071..1c8357314c28 100644
--- a/test/CodeGen/ARM/umulo-32.ll
+++ b/test/CodeGen/ARM/umulo-32.ll
@@ -28,7 +28,7 @@ store i32 0, i32* %1
store i32 %argc, i32* %2, align 4
store i8** %argv, i8*** %3, align 4
store i32 10, i32* %m_degree, align 4
-%4 = load i32* %m_degree, align 4
+%4 = load i32, i32* %m_degree, align 4
%5 = call %umul.ty @llvm.umul.with.overflow.i32(i32 %4, i32 8)
%6 = extractvalue %umul.ty %5, 1
%7 = extractvalue %umul.ty %5, 0
diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll
index 72163ae30c38..0be3917ffa26 100644
--- a/test/CodeGen/ARM/unaligned_load_store.ll
+++ b/test/CodeGen/ARM/unaligned_load_store.ll
@@ -28,7 +28,7 @@ entry:
%__src1.i = bitcast i8* %b to i32* ; <i32*> [#uses=1]
%__dest2.i = bitcast i8* %a to i32* ; <i32*> [#uses=1]
- %tmp.i = load i32* %__src1.i, align 1 ; <i32> [#uses=1]
+ %tmp.i = load i32, i32* %__src1.i, align 1 ; <i32> [#uses=1]
store i32 %tmp.i, i32* %__dest2.i, align 1
ret void
}
@@ -44,7 +44,7 @@ entry:
; UNALIGNED-LABEL: hword:
; UNALIGNED: vld1.16
; UNALIGNED: vst1.16
- %tmp = load double* %a, align 2
+ %tmp = load double, double* %a, align 2
store double %tmp, double* %b, align 2
ret void
}
@@ -60,7 +60,7 @@ entry:
; UNALIGNED-LABEL: byte:
; UNALIGNED: vld1.8
; UNALIGNED: vst1.8
- %tmp = load double* %a, align 1
+ %tmp = load double, double* %a, align 1
store double %tmp, double* %b, align 1
ret void
}
@@ -76,7 +76,7 @@ entry:
; UNALIGNED: ldr
; UNALIGNED-NOT: strb
; UNALIGNED: str
- %tmp = load i32* %a, align 1
+ %tmp = load i32, i32* %a, align 1
store i32 %tmp, i32* %b, align 1
ret void
}
diff --git a/test/CodeGen/ARM/unaligned_load_store_vector.ll b/test/CodeGen/ARM/unaligned_load_store_vector.ll
index 000ed489c4c0..abb523c22757 100644
--- a/test/CodeGen/ARM/unaligned_load_store_vector.ll
+++ b/test/CodeGen/ARM/unaligned_load_store_vector.ll
@@ -6,12 +6,12 @@
define void @v64_v8i8_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v64_v8i8_1:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <8 x i8>*
%vo = bitcast i8* %po to <8 x i8>*
;CHECK: vld1.8
- %v1 = load <8 x i8>* %vi, align 1
+ %v1 = load <8 x i8>, <8 x i8>* %vi, align 1
;CHECK: vst1.8
store <8 x i8> %v1, <8 x i8>* %vo, align 1
ret void
@@ -24,12 +24,12 @@ entry:
define void @v64_v4i16_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v64_v4i16_1:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <4 x i16>*
%vo = bitcast i8* %po to <4 x i16>*
;CHECK: vld1.8
- %v1 = load <4 x i16>* %vi, align 1
+ %v1 = load <4 x i16>, <4 x i16>* %vi, align 1
;CHECK: vst1.8
store <4 x i16> %v1, <4 x i16>* %vo, align 1
ret void
@@ -42,12 +42,12 @@ entry:
define void @v64_v2i32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v64_v2i32_1:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <2 x i32>*
%vo = bitcast i8* %po to <2 x i32>*
;CHECK: vld1.8
- %v1 = load <2 x i32>* %vi, align 1
+ %v1 = load <2 x i32>, <2 x i32>* %vi, align 1
;CHECK: vst1.8
store <2 x i32> %v1, <2 x i32>* %vo, align 1
ret void
@@ -60,12 +60,12 @@ entry:
define void @v64_v2f32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v64_v2f32_1:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <2 x float>*
%vo = bitcast i8* %po to <2 x float>*
;CHECK: vld1.8
- %v1 = load <2 x float>* %vi, align 1
+ %v1 = load <2 x float>, <2 x float>* %vi, align 1
;CHECK: vst1.8
store <2 x float> %v1, <2 x float>* %vo, align 1
ret void
@@ -78,12 +78,12 @@ entry:
define void @v128_v16i8_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v128_v16i8_1:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <16 x i8>*
%vo = bitcast i8* %po to <16 x i8>*
;CHECK: vld1.8
- %v1 = load <16 x i8>* %vi, align 1
+ %v1 = load <16 x i8>, <16 x i8>* %vi, align 1
;CHECK: vst1.8
store <16 x i8> %v1, <16 x i8>* %vo, align 1
ret void
@@ -96,12 +96,12 @@ entry:
define void @v128_v8i16_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v128_v8i16_1:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <8 x i16>*
%vo = bitcast i8* %po to <8 x i16>*
;CHECK: vld1.8
- %v1 = load <8 x i16>* %vi, align 1
+ %v1 = load <8 x i16>, <8 x i16>* %vi, align 1
;CHECK: vst1.8
store <8 x i16> %v1, <8 x i16>* %vo, align 1
ret void
@@ -114,12 +114,12 @@ entry:
define void @v128_v4i32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v128_v4i32_1:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <4 x i32>*
%vo = bitcast i8* %po to <4 x i32>*
;CHECK: vld1.8
- %v1 = load <4 x i32>* %vi, align 1
+ %v1 = load <4 x i32>, <4 x i32>* %vi, align 1
;CHECK: vst1.8
store <4 x i32> %v1, <4 x i32>* %vo, align 1
ret void
@@ -132,12 +132,12 @@ entry:
define void @v128_v2i64_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v128_v2i64_1:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <2 x i64>*
%vo = bitcast i8* %po to <2 x i64>*
;CHECK: vld1.8
- %v1 = load <2 x i64>* %vi, align 1
+ %v1 = load <2 x i64>, <2 x i64>* %vi, align 1
;CHECK: vst1.8
store <2 x i64> %v1, <2 x i64>* %vo, align 1
ret void
@@ -150,12 +150,12 @@ entry:
define void @v128_v4f32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v128_v4f32_1:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <4 x float>*
%vo = bitcast i8* %po to <4 x float>*
;CHECK: vld1.8
- %v1 = load <4 x float>* %vi, align 1
+ %v1 = load <4 x float>, <4 x float>* %vi, align 1
;CHECK: vst1.8
store <4 x float> %v1, <4 x float>* %vo, align 1
ret void
@@ -168,12 +168,12 @@ entry:
define void @v64_v8i8_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v64_v8i8_2:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <8 x i8>*
%vo = bitcast i8* %po to <8 x i8>*
;CHECK: vld1.16
- %v1 = load <8 x i8>* %vi, align 2
+ %v1 = load <8 x i8>, <8 x i8>* %vi, align 2
;CHECK: vst1.16
store <8 x i8> %v1, <8 x i8>* %vo, align 2
ret void
@@ -186,12 +186,12 @@ entry:
define void @v64_v4i16_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v64_v4i16_2:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <4 x i16>*
%vo = bitcast i8* %po to <4 x i16>*
;CHECK: vld1.16
- %v1 = load <4 x i16>* %vi, align 2
+ %v1 = load <4 x i16>, <4 x i16>* %vi, align 2
;CHECK: vst1.16
store <4 x i16> %v1, <4 x i16>* %vo, align 2
ret void
@@ -204,12 +204,12 @@ entry:
define void @v64_v2i32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v64_v2i32_2:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <2 x i32>*
%vo = bitcast i8* %po to <2 x i32>*
;CHECK: vld1.16
- %v1 = load <2 x i32>* %vi, align 2
+ %v1 = load <2 x i32>, <2 x i32>* %vi, align 2
;CHECK: vst1.16
store <2 x i32> %v1, <2 x i32>* %vo, align 2
ret void
@@ -222,12 +222,12 @@ entry:
define void @v64_v2f32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v64_v2f32_2:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <2 x float>*
%vo = bitcast i8* %po to <2 x float>*
;CHECK: vld1.16
- %v1 = load <2 x float>* %vi, align 2
+ %v1 = load <2 x float>, <2 x float>* %vi, align 2
;CHECK: vst1.16
store <2 x float> %v1, <2 x float>* %vo, align 2
ret void
@@ -240,12 +240,12 @@ entry:
define void @v128_v16i8_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v128_v16i8_2:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <16 x i8>*
%vo = bitcast i8* %po to <16 x i8>*
;CHECK: vld1.16
- %v1 = load <16 x i8>* %vi, align 2
+ %v1 = load <16 x i8>, <16 x i8>* %vi, align 2
;CHECK: vst1.16
store <16 x i8> %v1, <16 x i8>* %vo, align 2
ret void
@@ -258,12 +258,12 @@ entry:
define void @v128_v8i16_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v128_v8i16_2:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <8 x i16>*
%vo = bitcast i8* %po to <8 x i16>*
;CHECK: vld1.16
- %v1 = load <8 x i16>* %vi, align 2
+ %v1 = load <8 x i16>, <8 x i16>* %vi, align 2
;CHECK: vst1.16
store <8 x i16> %v1, <8 x i16>* %vo, align 2
ret void
@@ -276,12 +276,12 @@ entry:
define void @v128_v4i32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v128_v4i32_2:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <4 x i32>*
%vo = bitcast i8* %po to <4 x i32>*
;CHECK: vld1.16
- %v1 = load <4 x i32>* %vi, align 2
+ %v1 = load <4 x i32>, <4 x i32>* %vi, align 2
;CHECK: vst1.16
store <4 x i32> %v1, <4 x i32>* %vo, align 2
ret void
@@ -294,12 +294,12 @@ entry:
define void @v128_v2i64_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v128_v2i64_2:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <2 x i64>*
%vo = bitcast i8* %po to <2 x i64>*
;CHECK: vld1.16
- %v1 = load <2 x i64>* %vi, align 2
+ %v1 = load <2 x i64>, <2 x i64>* %vi, align 2
;CHECK: vst1.16
store <2 x i64> %v1, <2 x i64>* %vo, align 2
ret void
@@ -312,12 +312,12 @@ entry:
define void @v128_v4f32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v128_v4f32_2:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <4 x float>*
%vo = bitcast i8* %po to <4 x float>*
;CHECK: vld1.16
- %v1 = load <4 x float>* %vi, align 2
+ %v1 = load <4 x float>, <4 x float>* %vi, align 2
;CHECK: vst1.16
store <4 x float> %v1, <4 x float>* %vo, align 2
ret void
@@ -330,12 +330,12 @@ entry:
define void @v64_v8i8_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v64_v8i8_4:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <8 x i8>*
%vo = bitcast i8* %po to <8 x i8>*
;CHECK: vldr
- %v1 = load <8 x i8>* %vi, align 4
+ %v1 = load <8 x i8>, <8 x i8>* %vi, align 4
;CHECK: vstr
store <8 x i8> %v1, <8 x i8>* %vo, align 4
ret void
@@ -348,12 +348,12 @@ entry:
define void @v64_v4i16_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v64_v4i16_4:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <4 x i16>*
%vo = bitcast i8* %po to <4 x i16>*
;CHECK: vldr
- %v1 = load <4 x i16>* %vi, align 4
+ %v1 = load <4 x i16>, <4 x i16>* %vi, align 4
;CHECK: vstr
store <4 x i16> %v1, <4 x i16>* %vo, align 4
ret void
@@ -366,12 +366,12 @@ entry:
define void @v64_v2i32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v64_v2i32_4:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <2 x i32>*
%vo = bitcast i8* %po to <2 x i32>*
;CHECK: vldr
- %v1 = load <2 x i32>* %vi, align 4
+ %v1 = load <2 x i32>, <2 x i32>* %vi, align 4
;CHECK: vstr
store <2 x i32> %v1, <2 x i32>* %vo, align 4
ret void
@@ -384,12 +384,12 @@ entry:
define void @v64_v2f32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v64_v2f32_4:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <2 x float>*
%vo = bitcast i8* %po to <2 x float>*
;CHECK: vldr
- %v1 = load <2 x float>* %vi, align 4
+ %v1 = load <2 x float>, <2 x float>* %vi, align 4
;CHECK: vstr
store <2 x float> %v1, <2 x float>* %vo, align 4
ret void
@@ -402,12 +402,12 @@ entry:
define void @v128_v16i8_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v128_v16i8_4:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <16 x i8>*
%vo = bitcast i8* %po to <16 x i8>*
;CHECK: vld1.32
- %v1 = load <16 x i8>* %vi, align 4
+ %v1 = load <16 x i8>, <16 x i8>* %vi, align 4
;CHECK: vst1.32
store <16 x i8> %v1, <16 x i8>* %vo, align 4
ret void
@@ -420,12 +420,12 @@ entry:
define void @v128_v8i16_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v128_v8i16_4:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <8 x i16>*
%vo = bitcast i8* %po to <8 x i16>*
;CHECK: vld1.32
- %v1 = load <8 x i16>* %vi, align 4
+ %v1 = load <8 x i16>, <8 x i16>* %vi, align 4
;CHECK: vst1.32
store <8 x i16> %v1, <8 x i16>* %vo, align 4
ret void
@@ -438,12 +438,12 @@ entry:
define void @v128_v4i32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v128_v4i32_4:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <4 x i32>*
%vo = bitcast i8* %po to <4 x i32>*
;CHECK: vld1.32
- %v1 = load <4 x i32>* %vi, align 4
+ %v1 = load <4 x i32>, <4 x i32>* %vi, align 4
;CHECK: vst1.32
store <4 x i32> %v1, <4 x i32>* %vo, align 4
ret void
@@ -456,12 +456,12 @@ entry:
define void @v128_v2i64_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v128_v2i64_4:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <2 x i64>*
%vo = bitcast i8* %po to <2 x i64>*
;CHECK: vld1.32
- %v1 = load <2 x i64>* %vi, align 4
+ %v1 = load <2 x i64>, <2 x i64>* %vi, align 4
;CHECK: vst1.32
store <2 x i64> %v1, <2 x i64>* %vo, align 4
ret void
@@ -474,12 +474,12 @@ entry:
define void @v128_v4f32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
;CHECK-LABEL: v128_v4f32_4:
entry:
- %po = getelementptr i8* %out, i32 0
- %pi = getelementptr i8* %in, i32 0
+ %po = getelementptr i8, i8* %out, i32 0
+ %pi = getelementptr i8, i8* %in, i32 0
%vi = bitcast i8* %pi to <4 x float>*
%vo = bitcast i8* %po to <4 x float>*
;CHECK: vld1.32
- %v1 = load <4 x float>* %vi, align 4
+ %v1 = load <4 x float>, <4 x float>* %vi, align 4
;CHECK: vst1.32
store <4 x float> %v1, <4 x float>* %vo, align 4
ret void
diff --git a/test/CodeGen/ARM/undef-sext.ll b/test/CodeGen/ARM/undef-sext.ll
index c6d76d0017df..bb06bcbaf44c 100644
--- a/test/CodeGen/ARM/undef-sext.ll
+++ b/test/CodeGen/ARM/undef-sext.ll
@@ -8,7 +8,7 @@ entry:
; CHECK: ldr r0, [r0]
; CHECK: bx lr
%0 = sext i16 undef to i32
- %1 = getelementptr inbounds i32* %a, i32 %0
- %2 = load i32* %1, align 4
+ %1 = getelementptr inbounds i32, i32* %a, i32 %0
+ %2 = load i32, i32* %1, align 4
ret i32 %2
}
diff --git a/test/CodeGen/ARM/vaba.ll b/test/CodeGen/ARM/vaba.ll
index 6478b1843c69..4323f3184469 100644
--- a/test/CodeGen/ARM/vaba.ll
+++ b/test/CodeGen/ARM/vaba.ll
@@ -3,9 +3,9 @@
define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
;CHECK-LABEL: vabas8:
;CHECK: vaba.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i8>, <8 x i8>* %C
%tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
%tmp5 = add <8 x i8> %tmp1, %tmp4
ret <8 x i8> %tmp5
@@ -14,9 +14,9 @@ define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
;CHECK-LABEL: vabas16:
;CHECK: vaba.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i16>, <4 x i16>* %C
%tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
%tmp5 = add <4 x i16> %tmp1, %tmp4
ret <4 x i16> %tmp5
@@ -25,9 +25,9 @@ define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
;CHECK-LABEL: vabas32:
;CHECK: vaba.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i32>, <2 x i32>* %C
%tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
%tmp5 = add <2 x i32> %tmp1, %tmp4
ret <2 x i32> %tmp5
@@ -36,9 +36,9 @@ define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
;CHECK-LABEL: vabau8:
;CHECK: vaba.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i8>, <8 x i8>* %C
%tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
%tmp5 = add <8 x i8> %tmp1, %tmp4
ret <8 x i8> %tmp5
@@ -47,9 +47,9 @@ define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
;CHECK-LABEL: vabau16:
;CHECK: vaba.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i16>, <4 x i16>* %C
%tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
%tmp5 = add <4 x i16> %tmp1, %tmp4
ret <4 x i16> %tmp5
@@ -58,9 +58,9 @@ define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
;CHECK-LABEL: vabau32:
;CHECK: vaba.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i32>, <2 x i32>* %C
%tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
%tmp5 = add <2 x i32> %tmp1, %tmp4
ret <2 x i32> %tmp5
@@ -69,9 +69,9 @@ define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
;CHECK-LABEL: vabaQs8:
;CHECK: vaba.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = load <16 x i8>* %C
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
+ %tmp3 = load <16 x i8>, <16 x i8>* %C
%tmp4 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3)
%tmp5 = add <16 x i8> %tmp1, %tmp4
ret <16 x i8> %tmp5
@@ -80,9 +80,9 @@ define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
;CHECK-LABEL: vabaQs16:
;CHECK: vaba.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = load <8 x i16>* %C
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
+ %tmp3 = load <8 x i16>, <8 x i16>* %C
%tmp4 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3)
%tmp5 = add <8 x i16> %tmp1, %tmp4
ret <8 x i16> %tmp5
@@ -91,9 +91,9 @@ define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: vabaQs32:
;CHECK: vaba.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = load <4 x i32>* %C
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp4 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3)
%tmp5 = add <4 x i32> %tmp1, %tmp4
ret <4 x i32> %tmp5
@@ -102,9 +102,9 @@ define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
;CHECK-LABEL: vabaQu8:
;CHECK: vaba.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = load <16 x i8>* %C
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
+ %tmp3 = load <16 x i8>, <16 x i8>* %C
%tmp4 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3)
%tmp5 = add <16 x i8> %tmp1, %tmp4
ret <16 x i8> %tmp5
@@ -113,9 +113,9 @@ define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
;CHECK-LABEL: vabaQu16:
;CHECK: vaba.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = load <8 x i16>* %C
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
+ %tmp3 = load <8 x i16>, <8 x i16>* %C
%tmp4 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3)
%tmp5 = add <8 x i16> %tmp1, %tmp4
ret <8 x i16> %tmp5
@@ -124,9 +124,9 @@ define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: vabaQu32:
;CHECK: vaba.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = load <4 x i32>* %C
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp4 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3)
%tmp5 = add <4 x i32> %tmp1, %tmp4
ret <4 x i32> %tmp5
@@ -151,9 +151,9 @@ declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind read
define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
;CHECK-LABEL: vabals8:
;CHECK: vabal.s8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i8>, <8 x i8>* %C
%tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
%tmp5 = zext <8 x i8> %tmp4 to <8 x i16>
%tmp6 = add <8 x i16> %tmp1, %tmp5
@@ -163,9 +163,9 @@ define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
;CHECK-LABEL: vabals16:
;CHECK: vabal.s16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i16>, <4 x i16>* %C
%tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
%tmp5 = zext <4 x i16> %tmp4 to <4 x i32>
%tmp6 = add <4 x i32> %tmp1, %tmp5
@@ -175,9 +175,9 @@ define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
;CHECK-LABEL: vabals32:
;CHECK: vabal.s32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i32>, <2 x i32>* %C
%tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
%tmp5 = zext <2 x i32> %tmp4 to <2 x i64>
%tmp6 = add <2 x i64> %tmp1, %tmp5
@@ -187,9 +187,9 @@ define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
;CHECK-LABEL: vabalu8:
;CHECK: vabal.u8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i8>, <8 x i8>* %C
%tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
%tmp5 = zext <8 x i8> %tmp4 to <8 x i16>
%tmp6 = add <8 x i16> %tmp1, %tmp5
@@ -199,9 +199,9 @@ define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
;CHECK-LABEL: vabalu16:
;CHECK: vabal.u16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i16>, <4 x i16>* %C
%tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
%tmp5 = zext <4 x i16> %tmp4 to <4 x i32>
%tmp6 = add <4 x i32> %tmp1, %tmp5
@@ -211,9 +211,9 @@ define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
;CHECK-LABEL: vabalu32:
;CHECK: vabal.u32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i32>, <2 x i32>* %C
%tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
%tmp5 = zext <2 x i32> %tmp4 to <2 x i64>
%tmp6 = add <2 x i64> %tmp1, %tmp5
diff --git a/test/CodeGen/ARM/vabd.ll b/test/CodeGen/ARM/vabd.ll
index 9ba8be28c776..548b8a340461 100644
--- a/test/CodeGen/ARM/vabd.ll
+++ b/test/CodeGen/ARM/vabd.ll
@@ -3,8 +3,8 @@
define <8 x i8> @vabds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vabds8:
;CHECK: vabd.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @vabds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vabds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vabds16:
;CHECK: vabd.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @vabds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vabds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vabds32:
;CHECK: vabd.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -30,8 +30,8 @@ define <2 x i32> @vabds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i8> @vabdu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vabdu8:
;CHECK: vabd.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -39,8 +39,8 @@ define <8 x i8> @vabdu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vabdu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vabdu16:
;CHECK: vabd.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -48,8 +48,8 @@ define <4 x i16> @vabdu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vabdu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vabdu32:
;CHECK: vabd.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -57,8 +57,8 @@ define <2 x i32> @vabdu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <2 x float> @vabdf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vabdf32:
;CHECK: vabd.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -66,8 +66,8 @@ define <2 x float> @vabdf32(<2 x float>* %A, <2 x float>* %B) nounwind {
define <16 x i8> @vabdQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vabdQs8:
;CHECK: vabd.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -75,8 +75,8 @@ define <16 x i8> @vabdQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vabdQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vabdQs16:
;CHECK: vabd.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -84,8 +84,8 @@ define <8 x i16> @vabdQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vabdQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vabdQs32:
;CHECK: vabd.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -93,8 +93,8 @@ define <4 x i32> @vabdQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <16 x i8> @vabdQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vabdQu8:
;CHECK: vabd.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -102,8 +102,8 @@ define <16 x i8> @vabdQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vabdQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vabdQu16:
;CHECK: vabd.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -111,8 +111,8 @@ define <8 x i16> @vabdQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vabdQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vabdQu32:
;CHECK: vabd.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -120,8 +120,8 @@ define <4 x i32> @vabdQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <4 x float> @vabdQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vabdQf32:
;CHECK: vabd.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
@@ -149,8 +149,8 @@ declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) nounwin
define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vabdls8:
;CHECK: vabdl.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
%tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
ret <8 x i16> %tmp4
@@ -159,8 +159,8 @@ define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vabdls16:
;CHECK: vabdl.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
%tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
@@ -169,8 +169,8 @@ define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vabdls32:
;CHECK: vabdl.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
%tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
ret <2 x i64> %tmp4
@@ -179,8 +179,8 @@ define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vabdlu8:
;CHECK: vabdl.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
%tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
ret <8 x i16> %tmp4
@@ -189,8 +189,8 @@ define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vabdlu16:
;CHECK: vabdl.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
%tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
@@ -199,8 +199,8 @@ define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vabdlu32:
;CHECK: vabdl.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
%tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
ret <2 x i64> %tmp4
diff --git a/test/CodeGen/ARM/vabs.ll b/test/CodeGen/ARM/vabs.ll
index 3a1aec86edfe..38c6d6c28aed 100644
--- a/test/CodeGen/ARM/vabs.ll
+++ b/test/CodeGen/ARM/vabs.ll
@@ -3,7 +3,7 @@
define <8 x i8> @vabss8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vabss8:
;CHECK: vabs.s8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %tmp1)
ret <8 x i8> %tmp2
}
@@ -11,7 +11,7 @@ define <8 x i8> @vabss8(<8 x i8>* %A) nounwind {
define <4 x i16> @vabss16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vabss16:
;CHECK: vabs.s16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %tmp1)
ret <4 x i16> %tmp2
}
@@ -19,7 +19,7 @@ define <4 x i16> @vabss16(<4 x i16>* %A) nounwind {
define <2 x i32> @vabss32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vabss32:
;CHECK: vabs.s32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %tmp1)
ret <2 x i32> %tmp2
}
@@ -27,7 +27,7 @@ define <2 x i32> @vabss32(<2 x i32>* %A) nounwind {
define <2 x float> @vabsf32(<2 x float>* %A) nounwind {
;CHECK-LABEL: vabsf32:
;CHECK: vabs.f32
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %tmp1)
ret <2 x float> %tmp2
}
@@ -35,7 +35,7 @@ define <2 x float> @vabsf32(<2 x float>* %A) nounwind {
define <16 x i8> @vabsQs8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vabsQs8:
;CHECK: vabs.s8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %tmp1)
ret <16 x i8> %tmp2
}
@@ -43,7 +43,7 @@ define <16 x i8> @vabsQs8(<16 x i8>* %A) nounwind {
define <8 x i16> @vabsQs16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vabsQs16:
;CHECK: vabs.s16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %tmp1)
ret <8 x i16> %tmp2
}
@@ -51,7 +51,7 @@ define <8 x i16> @vabsQs16(<8 x i16>* %A) nounwind {
define <4 x i32> @vabsQs32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vabsQs32:
;CHECK: vabs.s32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %tmp1)
ret <4 x i32> %tmp2
}
@@ -59,7 +59,7 @@ define <4 x i32> @vabsQs32(<4 x i32>* %A) nounwind {
define <4 x float> @vabsQf32(<4 x float>* %A) nounwind {
;CHECK-LABEL: vabsQf32:
;CHECK: vabs.f32
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %tmp1)
ret <4 x float> %tmp2
}
@@ -77,7 +77,7 @@ declare <4 x float> @llvm.fabs.v4f32(<4 x float>) nounwind readnone
define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vqabss8:
;CHECK: vqabs.s8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %tmp1)
ret <8 x i8> %tmp2
}
@@ -85,7 +85,7 @@ define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind {
define <4 x i16> @vqabss16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vqabss16:
;CHECK: vqabs.s16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %tmp1)
ret <4 x i16> %tmp2
}
@@ -93,7 +93,7 @@ define <4 x i16> @vqabss16(<4 x i16>* %A) nounwind {
define <2 x i32> @vqabss32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vqabss32:
;CHECK: vqabs.s32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %tmp1)
ret <2 x i32> %tmp2
}
@@ -101,7 +101,7 @@ define <2 x i32> @vqabss32(<2 x i32>* %A) nounwind {
define <16 x i8> @vqabsQs8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vqabsQs8:
;CHECK: vqabs.s8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %tmp1)
ret <16 x i8> %tmp2
}
@@ -109,7 +109,7 @@ define <16 x i8> @vqabsQs8(<16 x i8>* %A) nounwind {
define <8 x i16> @vqabsQs16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vqabsQs16:
;CHECK: vqabs.s16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %tmp1)
ret <8 x i16> %tmp2
}
@@ -117,7 +117,7 @@ define <8 x i16> @vqabsQs16(<8 x i16>* %A) nounwind {
define <4 x i32> @vqabsQs32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vqabsQs32:
;CHECK: vqabs.s32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %tmp1)
ret <4 x i32> %tmp2
}
diff --git a/test/CodeGen/ARM/vadd.ll b/test/CodeGen/ARM/vadd.ll
index 86b0d0297018..dd35dd1ccfb9 100644
--- a/test/CodeGen/ARM/vadd.ll
+++ b/test/CodeGen/ARM/vadd.ll
@@ -3,8 +3,8 @@
define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vaddi8:
;CHECK: vadd.i8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = add <8 x i8> %tmp1, %tmp2
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vaddi16:
;CHECK: vadd.i16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = add <4 x i16> %tmp1, %tmp2
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @vaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vaddi32:
;CHECK: vadd.i32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = add <2 x i32> %tmp1, %tmp2
ret <2 x i32> %tmp3
}
@@ -30,8 +30,8 @@ define <2 x i32> @vaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vaddi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vaddi64:
;CHECK: vadd.i64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = add <1 x i64> %tmp1, %tmp2
ret <1 x i64> %tmp3
}
@@ -39,8 +39,8 @@ define <1 x i64> @vaddi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <2 x float> @vaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vaddf32:
;CHECK: vadd.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = fadd <2 x float> %tmp1, %tmp2
ret <2 x float> %tmp3
}
@@ -48,8 +48,8 @@ define <2 x float> @vaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
define <16 x i8> @vaddQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vaddQi8:
;CHECK: vadd.i8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = add <16 x i8> %tmp1, %tmp2
ret <16 x i8> %tmp3
}
@@ -57,8 +57,8 @@ define <16 x i8> @vaddQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vaddQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vaddQi16:
;CHECK: vadd.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = add <8 x i16> %tmp1, %tmp2
ret <8 x i16> %tmp3
}
@@ -66,8 +66,8 @@ define <8 x i16> @vaddQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vaddQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vaddQi32:
;CHECK: vadd.i32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = add <4 x i32> %tmp1, %tmp2
ret <4 x i32> %tmp3
}
@@ -75,8 +75,8 @@ define <4 x i32> @vaddQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vaddQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vaddQi64:
;CHECK: vadd.i64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = add <2 x i64> %tmp1, %tmp2
ret <2 x i64> %tmp3
}
@@ -84,8 +84,8 @@ define <2 x i64> @vaddQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <4 x float> @vaddQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vaddQf32:
;CHECK: vadd.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = fadd <4 x float> %tmp1, %tmp2
ret <4 x float> %tmp3
}
@@ -93,8 +93,8 @@ define <4 x float> @vaddQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vraddhni16:
;CHECK: vraddhn.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i8> %tmp3
}
@@ -102,8 +102,8 @@ define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i16> @vraddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vraddhni32:
;CHECK: vraddhn.i32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i16> %tmp3
}
@@ -111,8 +111,8 @@ define <4 x i16> @vraddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i32> @vraddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vraddhni64:
;CHECK: vraddhn.i64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i32> %tmp3
}
@@ -151,8 +151,8 @@ define <2 x i32> @vaddhni64_natural(<2 x i64> %A, <2 x i64> %B) nounwind {
define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vaddls8:
;CHECK: vaddl.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
%tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -162,8 +162,8 @@ define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vaddls16:
;CHECK: vaddl.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
%tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -173,8 +173,8 @@ define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vaddls32:
;CHECK: vaddl.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
%tmp5 = add <2 x i64> %tmp3, %tmp4
@@ -184,8 +184,8 @@ define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vaddlu8:
;CHECK: vaddl.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
%tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -195,8 +195,8 @@ define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vaddlu16:
;CHECK: vaddl.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
%tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -206,8 +206,8 @@ define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vaddlu32:
;CHECK: vaddl.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
%tmp5 = add <2 x i64> %tmp3, %tmp4
@@ -217,8 +217,8 @@ define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vaddws8:
;CHECK: vaddw.s8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
%tmp4 = add <8 x i16> %tmp1, %tmp3
ret <8 x i16> %tmp4
@@ -227,8 +227,8 @@ define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vaddws16:
;CHECK: vaddw.s16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
%tmp4 = add <4 x i32> %tmp1, %tmp3
ret <4 x i32> %tmp4
@@ -237,8 +237,8 @@ define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vaddws32:
;CHECK: vaddw.s32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
%tmp4 = add <2 x i64> %tmp1, %tmp3
ret <2 x i64> %tmp4
@@ -247,8 +247,8 @@ define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vaddwu8:
;CHECK: vaddw.u8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
%tmp4 = add <8 x i16> %tmp1, %tmp3
ret <8 x i16> %tmp4
@@ -257,8 +257,8 @@ define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vaddwu16:
;CHECK: vaddw.u16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
%tmp4 = add <4 x i32> %tmp1, %tmp3
ret <4 x i32> %tmp4
@@ -267,8 +267,8 @@ define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vaddwu32:
;CHECK: vaddw.u32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
%tmp4 = add <2 x i64> %tmp1, %tmp3
ret <2 x i64> %tmp4
diff --git a/test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll b/test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll
index 148a79df0cb8..4879d73894d6 100644
--- a/test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll
+++ b/test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll
@@ -21,10 +21,10 @@ define void @varargs_func(i32 %arg1, ...) {
; CHECK-LABEL: varargs_func:
; Reserve space for the varargs save area. This currently reserves
; more than enough (16 bytes rather than the 12 bytes needed).
-; CHECK: sub sp, sp, #16
+; CHECK: sub sp, sp, #12
; CHECK: push {r11, lr}
; Align the stack pointer to a multiple of 16.
-; CHECK: sub sp, sp, #8
+; CHECK: sub sp, sp, #12
; Calculate the address of the varargs save area and save varargs
; arguments into it.
; CHECK-NEXT: add r0, sp, #20
diff --git a/test/CodeGen/ARM/vargs.ll b/test/CodeGen/ARM/vargs.ll
index 3b810f36cc79..41ec03857f08 100644
--- a/test/CodeGen/ARM/vargs.ll
+++ b/test/CodeGen/ARM/vargs.ll
@@ -4,8 +4,8 @@
define i32 @main() {
entry:
- %tmp = call i32 (i8*, ...)* @printf( i8* getelementptr ([43 x i8]* @str, i32 0, i64 0), i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 ) ; <i32> [#uses=0]
- %tmp2 = call i32 (i8*, ...)* @printf( i8* getelementptr ([43 x i8]* @str, i32 0, i64 0), i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1 ) ; <i32> [#uses=0]
+ %tmp = call i32 (i8*, ...) @printf( i8* getelementptr ([43 x i8], [43 x i8]* @str, i32 0, i64 0), i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 ) ; <i32> [#uses=0]
+ %tmp2 = call i32 (i8*, ...) @printf( i8* getelementptr ([43 x i8], [43 x i8]* @str, i32 0, i64 0), i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1 ) ; <i32> [#uses=0]
ret i32 11
}
diff --git a/test/CodeGen/ARM/vargs_align.ll b/test/CodeGen/ARM/vargs_align.ll
index 3abb57ee51f8..6dc71352214b 100644
--- a/test/CodeGen/ARM/vargs_align.ll
+++ b/test/CodeGen/ARM/vargs_align.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=EABI
+; RUN: llc < %s -mtriple=armv7-linux-gnueabihf | FileCheck %s -check-prefix=EABI
; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | FileCheck %s -check-prefix=OABI
define i32 @f(i32 %a, ...) {
@@ -8,16 +8,20 @@ entry:
%tmp = alloca i32, align 4 ; <i32*> [#uses=2]
store i32 %a, i32* %a_addr
store i32 0, i32* %tmp
- %tmp1 = load i32* %tmp ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* %tmp ; <i32> [#uses=1]
store i32 %tmp1, i32* %retval
call void @llvm.va_start(i8* null)
+ call void asm sideeffect "", "~{d8}"()
br label %return
return: ; preds = %entry
- %retval2 = load i32* %retval ; <i32> [#uses=1]
+ %retval2 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %retval2
-; EABI: add sp, sp, #12
; EABI: add sp, sp, #16
+; EABI: vpop {d8}
+; EABI: add sp, sp, #4
+; EABI: add sp, sp, #12
+
; OABI: add sp, sp, #12
; OABI: add sp, sp, #12
}
diff --git a/test/CodeGen/ARM/vbits.ll b/test/CodeGen/ARM/vbits.ll
index dfeaacf2085f..db9bc6ccdd0c 100644
--- a/test/CodeGen/ARM/vbits.ll
+++ b/test/CodeGen/ARM/vbits.ll
@@ -3,8 +3,8 @@
define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: v_andi8:
;CHECK: vand
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = and <8 x i8> %tmp1, %tmp2
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: v_andi16:
;CHECK: vand
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = and <4 x i16> %tmp1, %tmp2
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: v_andi32:
;CHECK: vand
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = and <2 x i32> %tmp1, %tmp2
ret <2 x i32> %tmp3
}
@@ -30,8 +30,8 @@ define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: v_andi64:
;CHECK: vand
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = and <1 x i64> %tmp1, %tmp2
ret <1 x i64> %tmp3
}
@@ -39,8 +39,8 @@ define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: v_andQi8:
;CHECK: vand
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = and <16 x i8> %tmp1, %tmp2
ret <16 x i8> %tmp3
}
@@ -48,8 +48,8 @@ define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: v_andQi16:
;CHECK: vand
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = and <8 x i16> %tmp1, %tmp2
ret <8 x i16> %tmp3
}
@@ -57,8 +57,8 @@ define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: v_andQi32:
;CHECK: vand
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = and <4 x i32> %tmp1, %tmp2
ret <4 x i32> %tmp3
}
@@ -66,8 +66,8 @@ define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: v_andQi64:
;CHECK: vand
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = and <2 x i64> %tmp1, %tmp2
ret <2 x i64> %tmp3
}
@@ -75,8 +75,8 @@ define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: v_bici8:
;CHECK: vbic
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
%tmp4 = and <8 x i8> %tmp1, %tmp3
ret <8 x i8> %tmp4
@@ -85,8 +85,8 @@ define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: v_bici16:
;CHECK: vbic
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
%tmp4 = and <4 x i16> %tmp1, %tmp3
ret <4 x i16> %tmp4
@@ -95,8 +95,8 @@ define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: v_bici32:
;CHECK: vbic
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
%tmp4 = and <2 x i32> %tmp1, %tmp3
ret <2 x i32> %tmp4
@@ -105,8 +105,8 @@ define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: v_bici64:
;CHECK: vbic
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
%tmp4 = and <1 x i64> %tmp1, %tmp3
ret <1 x i64> %tmp4
@@ -115,8 +115,8 @@ define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: v_bicQi8:
;CHECK: vbic
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
%tmp4 = and <16 x i8> %tmp1, %tmp3
ret <16 x i8> %tmp4
@@ -125,8 +125,8 @@ define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: v_bicQi16:
;CHECK: vbic
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
%tmp4 = and <8 x i16> %tmp1, %tmp3
ret <8 x i16> %tmp4
@@ -135,8 +135,8 @@ define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: v_bicQi32:
;CHECK: vbic
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
%tmp4 = and <4 x i32> %tmp1, %tmp3
ret <4 x i32> %tmp4
@@ -145,8 +145,8 @@ define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: v_bicQi64:
;CHECK: vbic
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
%tmp4 = and <2 x i64> %tmp1, %tmp3
ret <2 x i64> %tmp4
@@ -155,8 +155,8 @@ define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: v_eori8:
;CHECK: veor
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = xor <8 x i8> %tmp1, %tmp2
ret <8 x i8> %tmp3
}
@@ -164,8 +164,8 @@ define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: v_eori16:
;CHECK: veor
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = xor <4 x i16> %tmp1, %tmp2
ret <4 x i16> %tmp3
}
@@ -173,8 +173,8 @@ define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: v_eori32:
;CHECK: veor
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = xor <2 x i32> %tmp1, %tmp2
ret <2 x i32> %tmp3
}
@@ -182,8 +182,8 @@ define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: v_eori64:
;CHECK: veor
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = xor <1 x i64> %tmp1, %tmp2
ret <1 x i64> %tmp3
}
@@ -191,8 +191,8 @@ define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: v_eorQi8:
;CHECK: veor
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = xor <16 x i8> %tmp1, %tmp2
ret <16 x i8> %tmp3
}
@@ -200,8 +200,8 @@ define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: v_eorQi16:
;CHECK: veor
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = xor <8 x i16> %tmp1, %tmp2
ret <8 x i16> %tmp3
}
@@ -209,8 +209,8 @@ define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: v_eorQi32:
;CHECK: veor
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = xor <4 x i32> %tmp1, %tmp2
ret <4 x i32> %tmp3
}
@@ -218,8 +218,8 @@ define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: v_eorQi64:
;CHECK: veor
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = xor <2 x i64> %tmp1, %tmp2
ret <2 x i64> %tmp3
}
@@ -227,7 +227,7 @@ define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: v_mvni8:
;CHECK: vmvn
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
ret <8 x i8> %tmp2
}
@@ -235,7 +235,7 @@ define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind {
define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: v_mvni16:
;CHECK: vmvn
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
ret <4 x i16> %tmp2
}
@@ -243,7 +243,7 @@ define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind {
define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: v_mvni32:
;CHECK: vmvn
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
ret <2 x i32> %tmp2
}
@@ -251,7 +251,7 @@ define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind {
define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind {
;CHECK-LABEL: v_mvni64:
;CHECK: vmvn
- %tmp1 = load <1 x i64>* %A
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = xor <1 x i64> %tmp1, < i64 -1 >
ret <1 x i64> %tmp2
}
@@ -259,7 +259,7 @@ define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind {
define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: v_mvnQi8:
;CHECK: vmvn
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
ret <16 x i8> %tmp2
}
@@ -267,7 +267,7 @@ define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind {
define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: v_mvnQi16:
;CHECK: vmvn
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
ret <8 x i16> %tmp2
}
@@ -275,7 +275,7 @@ define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind {
define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: v_mvnQi32:
;CHECK: vmvn
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
ret <4 x i32> %tmp2
}
@@ -283,7 +283,7 @@ define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind {
define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind {
;CHECK-LABEL: v_mvnQi64:
;CHECK: vmvn
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
ret <2 x i64> %tmp2
}
@@ -291,8 +291,8 @@ define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind {
define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: v_orri8:
;CHECK: vorr
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = or <8 x i8> %tmp1, %tmp2
ret <8 x i8> %tmp3
}
@@ -300,8 +300,8 @@ define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: v_orri16:
;CHECK: vorr
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = or <4 x i16> %tmp1, %tmp2
ret <4 x i16> %tmp3
}
@@ -309,8 +309,8 @@ define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: v_orri32:
;CHECK: vorr
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = or <2 x i32> %tmp1, %tmp2
ret <2 x i32> %tmp3
}
@@ -318,8 +318,8 @@ define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: v_orri64:
;CHECK: vorr
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = or <1 x i64> %tmp1, %tmp2
ret <1 x i64> %tmp3
}
@@ -327,8 +327,8 @@ define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: v_orrQi8:
;CHECK: vorr
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = or <16 x i8> %tmp1, %tmp2
ret <16 x i8> %tmp3
}
@@ -336,8 +336,8 @@ define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: v_orrQi16:
;CHECK: vorr
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = or <8 x i16> %tmp1, %tmp2
ret <8 x i16> %tmp3
}
@@ -345,8 +345,8 @@ define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: v_orrQi32:
;CHECK: vorr
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = or <4 x i32> %tmp1, %tmp2
ret <4 x i32> %tmp3
}
@@ -354,8 +354,8 @@ define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: v_orrQi64:
;CHECK: vorr
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = or <2 x i64> %tmp1, %tmp2
ret <2 x i64> %tmp3
}
@@ -363,8 +363,8 @@ define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: v_orni8:
;CHECK: vorn
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
%tmp4 = or <8 x i8> %tmp1, %tmp3
ret <8 x i8> %tmp4
@@ -373,8 +373,8 @@ define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: v_orni16:
;CHECK: vorn
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
%tmp4 = or <4 x i16> %tmp1, %tmp3
ret <4 x i16> %tmp4
@@ -383,8 +383,8 @@ define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: v_orni32:
;CHECK: vorn
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
%tmp4 = or <2 x i32> %tmp1, %tmp3
ret <2 x i32> %tmp4
@@ -393,8 +393,8 @@ define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: v_orni64:
;CHECK: vorn
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
%tmp4 = or <1 x i64> %tmp1, %tmp3
ret <1 x i64> %tmp4
@@ -403,8 +403,8 @@ define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: v_ornQi8:
;CHECK: vorn
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
%tmp4 = or <16 x i8> %tmp1, %tmp3
ret <16 x i8> %tmp4
@@ -413,8 +413,8 @@ define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: v_ornQi16:
;CHECK: vorn
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
%tmp4 = or <8 x i16> %tmp1, %tmp3
ret <8 x i16> %tmp4
@@ -423,8 +423,8 @@ define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: v_ornQi32:
;CHECK: vorn
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
%tmp4 = or <4 x i32> %tmp1, %tmp3
ret <4 x i32> %tmp4
@@ -433,8 +433,8 @@ define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: v_ornQi64:
;CHECK: vorn
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
%tmp4 = or <2 x i64> %tmp1, %tmp3
ret <2 x i64> %tmp4
@@ -443,8 +443,8 @@ define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vtsti8:
;CHECK: vtst.8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = and <8 x i8> %tmp1, %tmp2
%tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer
%tmp5 = sext <8 x i1> %tmp4 to <8 x i8>
@@ -454,8 +454,8 @@ define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vtsti16:
;CHECK: vtst.16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = and <4 x i16> %tmp1, %tmp2
%tmp4 = icmp ne <4 x i16> %tmp3, zeroinitializer
%tmp5 = sext <4 x i1> %tmp4 to <4 x i16>
@@ -465,8 +465,8 @@ define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vtsti32:
;CHECK: vtst.32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = and <2 x i32> %tmp1, %tmp2
%tmp4 = icmp ne <2 x i32> %tmp3, zeroinitializer
%tmp5 = sext <2 x i1> %tmp4 to <2 x i32>
@@ -476,8 +476,8 @@ define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vtstQi8:
;CHECK: vtst.8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = and <16 x i8> %tmp1, %tmp2
%tmp4 = icmp ne <16 x i8> %tmp3, zeroinitializer
%tmp5 = sext <16 x i1> %tmp4 to <16 x i8>
@@ -487,8 +487,8 @@ define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vtstQi16:
;CHECK: vtst.16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = and <8 x i16> %tmp1, %tmp2
%tmp4 = icmp ne <8 x i16> %tmp3, zeroinitializer
%tmp5 = sext <8 x i1> %tmp4 to <8 x i16>
@@ -498,8 +498,8 @@ define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vtstQi32:
;CHECK: vtst.32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = and <4 x i32> %tmp1, %tmp2
%tmp4 = icmp ne <4 x i32> %tmp3, zeroinitializer
%tmp5 = sext <4 x i1> %tmp4 to <4 x i32>
@@ -511,7 +511,7 @@ define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind {
; CHECK-NOT: vmov
; CHECK-NOT: vmvn
; CHECK: vorr
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = or <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
ret <8 x i8> %tmp3
}
@@ -521,7 +521,7 @@ define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind {
; CHECK-NOT: vmov
; CHECK-NOT: vmvn
; CHECK: vorr
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = or <16 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
ret <16 x i8> %tmp3
}
@@ -531,7 +531,7 @@ define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind {
; CHECK-NOT: vmov
; CHECK-NOT: vmvn
; CHECK: vbic
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = and <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
ret <8 x i8> %tmp3
}
@@ -541,7 +541,7 @@ define <16 x i8> @v_bicimmQ(<16 x i8>* %A) nounwind {
; CHECK-NOT: vmov
; CHECK-NOT: vmvn
; CHECK: vbic
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp3 = and <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
ret <16 x i8> %tmp3
}
diff --git a/test/CodeGen/ARM/vbsl-constant.ll b/test/CodeGen/ARM/vbsl-constant.ll
index 5e033fe2a647..6bcbbc8fa878 100644
--- a/test/CodeGen/ARM/vbsl-constant.ll
+++ b/test/CodeGen/ARM/vbsl-constant.ll
@@ -5,9 +5,9 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
;CHECK: vldr
;CHECK: vldr
;CHECK: vbsl
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i8>, <8 x i8>* %C
%tmp4 = and <8 x i8> %tmp1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
%tmp6 = and <8 x i8> %tmp3, <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4>
%tmp7 = or <8 x i8> %tmp4, %tmp6
@@ -19,9 +19,9 @@ define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
;CHECK: vldr
;CHECK: vldr
;CHECK: vbsl
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i16>, <4 x i16>* %C
%tmp4 = and <4 x i16> %tmp1, <i16 3, i16 3, i16 3, i16 3>
%tmp6 = and <4 x i16> %tmp3, <i16 -4, i16 -4, i16 -4, i16 -4>
%tmp7 = or <4 x i16> %tmp4, %tmp6
@@ -33,9 +33,9 @@ define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
;CHECK: vldr
;CHECK: vldr
;CHECK: vbsl
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i32>, <2 x i32>* %C
%tmp4 = and <2 x i32> %tmp1, <i32 3, i32 3>
%tmp6 = and <2 x i32> %tmp3, <i32 -4, i32 -4>
%tmp7 = or <2 x i32> %tmp4, %tmp6
@@ -48,9 +48,9 @@ define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind
;CHECK: vldr
;CHECK: vldr
;CHECK: vbsl
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
- %tmp3 = load <1 x i64>* %C
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
+ %tmp3 = load <1 x i64>, <1 x i64>* %C
%tmp4 = and <1 x i64> %tmp1, <i64 3>
%tmp6 = and <1 x i64> %tmp3, <i64 -4>
%tmp7 = or <1 x i64> %tmp4, %tmp6
@@ -62,9 +62,9 @@ define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
;CHECK: vld1.32
;CHECK: vld1.32
;CHECK: vbsl
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = load <16 x i8>* %C
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
+ %tmp3 = load <16 x i8>, <16 x i8>* %C
%tmp4 = and <16 x i8> %tmp1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
%tmp6 = and <16 x i8> %tmp3, <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4>
%tmp7 = or <16 x i8> %tmp4, %tmp6
@@ -76,9 +76,9 @@ define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwin
;CHECK: vld1.32
;CHECK: vld1.32
;CHECK: vbsl
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = load <8 x i16>* %C
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
+ %tmp3 = load <8 x i16>, <8 x i16>* %C
%tmp4 = and <8 x i16> %tmp1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
%tmp6 = and <8 x i16> %tmp3, <i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4>
%tmp7 = or <8 x i16> %tmp4, %tmp6
@@ -90,9 +90,9 @@ define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwin
;CHECK: vld1.32
;CHECK: vld1.32
;CHECK: vbsl
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = load <4 x i32>* %C
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp4 = and <4 x i32> %tmp1, <i32 3, i32 3, i32 3, i32 3>
%tmp6 = and <4 x i32> %tmp3, <i32 -4, i32 -4, i32 -4, i32 -4>
%tmp7 = or <4 x i32> %tmp4, %tmp6
@@ -105,9 +105,9 @@ define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwin
;CHECK: vld1.32
;CHECK: vld1.64
;CHECK: vbsl
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
- %tmp3 = load <2 x i64>* %C
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp4 = and <2 x i64> %tmp1, <i64 3, i64 3>
%tmp6 = and <2 x i64> %tmp3, <i64 -4, i64 -4>
%tmp7 = or <2 x i64> %tmp4, %tmp6
diff --git a/test/CodeGen/ARM/vbsl.ll b/test/CodeGen/ARM/vbsl.ll
index ddc37cc82441..6812dd90a100 100644
--- a/test/CodeGen/ARM/vbsl.ll
+++ b/test/CodeGen/ARM/vbsl.ll
@@ -5,9 +5,9 @@
define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
;CHECK-LABEL: v_bsli8:
;CHECK: vbsl
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i8>, <8 x i8>* %C
%tmp4 = and <8 x i8> %tmp1, %tmp2
%tmp5 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
%tmp6 = and <8 x i8> %tmp5, %tmp3
@@ -18,9 +18,9 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
;CHECK-LABEL: v_bsli16:
;CHECK: vbsl
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i16>, <4 x i16>* %C
%tmp4 = and <4 x i16> %tmp1, %tmp2
%tmp5 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
%tmp6 = and <4 x i16> %tmp5, %tmp3
@@ -31,9 +31,9 @@ define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
;CHECK-LABEL: v_bsli32:
;CHECK: vbsl
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i32>, <2 x i32>* %C
%tmp4 = and <2 x i32> %tmp1, %tmp2
%tmp5 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
%tmp6 = and <2 x i32> %tmp5, %tmp3
@@ -44,9 +44,9 @@ define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
;CHECK-LABEL: v_bsli64:
;CHECK: vbsl
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
- %tmp3 = load <1 x i64>* %C
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
+ %tmp3 = load <1 x i64>, <1 x i64>* %C
%tmp4 = and <1 x i64> %tmp1, %tmp2
%tmp5 = xor <1 x i64> %tmp1, < i64 -1 >
%tmp6 = and <1 x i64> %tmp5, %tmp3
@@ -57,9 +57,9 @@ define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind
define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
;CHECK-LABEL: v_bslQi8:
;CHECK: vbsl
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = load <16 x i8>* %C
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
+ %tmp3 = load <16 x i8>, <16 x i8>* %C
%tmp4 = and <16 x i8> %tmp1, %tmp2
%tmp5 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
%tmp6 = and <16 x i8> %tmp5, %tmp3
@@ -70,9 +70,9 @@ define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
;CHECK-LABEL: v_bslQi16:
;CHECK: vbsl
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = load <8 x i16>* %C
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
+ %tmp3 = load <8 x i16>, <8 x i16>* %C
%tmp4 = and <8 x i16> %tmp1, %tmp2
%tmp5 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
%tmp6 = and <8 x i16> %tmp5, %tmp3
@@ -83,9 +83,9 @@ define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwin
define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: v_bslQi32:
;CHECK: vbsl
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = load <4 x i32>* %C
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp4 = and <4 x i32> %tmp1, %tmp2
%tmp5 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
%tmp6 = and <4 x i32> %tmp5, %tmp3
@@ -96,9 +96,9 @@ define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwin
define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind {
;CHECK-LABEL: v_bslQi64:
;CHECK: vbsl
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
- %tmp3 = load <2 x i64>* %C
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
+ %tmp3 = load <2 x i64>, <2 x i64>* %C
%tmp4 = and <2 x i64> %tmp1, %tmp2
%tmp5 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
%tmp6 = and <2 x i64> %tmp5, %tmp3
diff --git a/test/CodeGen/ARM/vceq.ll b/test/CodeGen/ARM/vceq.ll
index e3202e402cc7..3772401a2f95 100644
--- a/test/CodeGen/ARM/vceq.ll
+++ b/test/CodeGen/ARM/vceq.ll
@@ -3,8 +3,8 @@
define <8 x i8> @vceqi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vceqi8:
;CHECK: vceq.i8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = icmp eq <8 x i8> %tmp1, %tmp2
%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
ret <8 x i8> %tmp4
@@ -13,8 +13,8 @@ define <8 x i8> @vceqi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vceqi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vceqi16:
;CHECK: vceq.i16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = icmp eq <4 x i16> %tmp1, %tmp2
%tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
ret <4 x i16> %tmp4
@@ -23,8 +23,8 @@ define <4 x i16> @vceqi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vceqi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vceqi32:
;CHECK: vceq.i32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = icmp eq <2 x i32> %tmp1, %tmp2
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -33,8 +33,8 @@ define <2 x i32> @vceqi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <2 x i32> @vceqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vceqf32:
;CHECK: vceq.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = fcmp oeq <2 x float> %tmp1, %tmp2
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -43,8 +43,8 @@ define <2 x i32> @vceqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
define <16 x i8> @vceqQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vceqQi8:
;CHECK: vceq.i8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = icmp eq <16 x i8> %tmp1, %tmp2
%tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
ret <16 x i8> %tmp4
@@ -53,8 +53,8 @@ define <16 x i8> @vceqQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vceqQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vceqQi16:
;CHECK: vceq.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = icmp eq <8 x i16> %tmp1, %tmp2
%tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
ret <8 x i16> %tmp4
@@ -63,8 +63,8 @@ define <8 x i16> @vceqQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vceqQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vceqQi32:
;CHECK: vceq.i32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = icmp eq <4 x i32> %tmp1, %tmp2
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
@@ -73,8 +73,8 @@ define <4 x i32> @vceqQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <4 x i32> @vceqQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vceqQf32:
;CHECK: vceq.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = fcmp oeq <4 x float> %tmp1, %tmp2
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
@@ -85,7 +85,7 @@ define <8 x i8> @vceqi8Z(<8 x i8>* %A) nounwind {
;CHECK-NOT: vmov
;CHECK-NOT: vmvn
;CHECK: vceq.i8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = icmp eq <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
ret <8 x i8> %tmp4
diff --git a/test/CodeGen/ARM/vcge.ll b/test/CodeGen/ARM/vcge.ll
index 3739f5ee8c51..2cd33cf3a422 100644
--- a/test/CodeGen/ARM/vcge.ll
+++ b/test/CodeGen/ARM/vcge.ll
@@ -3,8 +3,8 @@
define <8 x i8> @vcges8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vcges8:
;CHECK: vcge.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = icmp sge <8 x i8> %tmp1, %tmp2
%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
ret <8 x i8> %tmp4
@@ -13,8 +13,8 @@ define <8 x i8> @vcges8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vcges16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vcges16:
;CHECK: vcge.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = icmp sge <4 x i16> %tmp1, %tmp2
%tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
ret <4 x i16> %tmp4
@@ -23,8 +23,8 @@ define <4 x i16> @vcges16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vcges32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vcges32:
;CHECK: vcge.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = icmp sge <2 x i32> %tmp1, %tmp2
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -33,8 +33,8 @@ define <2 x i32> @vcges32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i8> @vcgeu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vcgeu8:
;CHECK: vcge.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = icmp uge <8 x i8> %tmp1, %tmp2
%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
ret <8 x i8> %tmp4
@@ -43,8 +43,8 @@ define <8 x i8> @vcgeu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vcgeu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vcgeu16:
;CHECK: vcge.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = icmp uge <4 x i16> %tmp1, %tmp2
%tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
ret <4 x i16> %tmp4
@@ -53,8 +53,8 @@ define <4 x i16> @vcgeu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vcgeu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vcgeu32:
;CHECK: vcge.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = icmp uge <2 x i32> %tmp1, %tmp2
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -63,8 +63,8 @@ define <2 x i32> @vcgeu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <2 x i32> @vcgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vcgef32:
;CHECK: vcge.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = fcmp oge <2 x float> %tmp1, %tmp2
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -73,8 +73,8 @@ define <2 x i32> @vcgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
define <16 x i8> @vcgeQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vcgeQs8:
;CHECK: vcge.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = icmp sge <16 x i8> %tmp1, %tmp2
%tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
ret <16 x i8> %tmp4
@@ -83,8 +83,8 @@ define <16 x i8> @vcgeQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vcgeQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vcgeQs16:
;CHECK: vcge.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = icmp sge <8 x i16> %tmp1, %tmp2
%tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
ret <8 x i16> %tmp4
@@ -93,8 +93,8 @@ define <8 x i16> @vcgeQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vcgeQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vcgeQs32:
;CHECK: vcge.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = icmp sge <4 x i32> %tmp1, %tmp2
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
@@ -103,8 +103,8 @@ define <4 x i32> @vcgeQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <16 x i8> @vcgeQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vcgeQu8:
;CHECK: vcge.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = icmp uge <16 x i8> %tmp1, %tmp2
%tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
ret <16 x i8> %tmp4
@@ -113,8 +113,8 @@ define <16 x i8> @vcgeQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vcgeQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vcgeQu16:
;CHECK: vcge.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = icmp uge <8 x i16> %tmp1, %tmp2
%tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
ret <8 x i16> %tmp4
@@ -123,8 +123,8 @@ define <8 x i16> @vcgeQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vcgeQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vcgeQu32:
;CHECK: vcge.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = icmp uge <4 x i32> %tmp1, %tmp2
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
@@ -133,8 +133,8 @@ define <4 x i32> @vcgeQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <4 x i32> @vcgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vcgeQf32:
;CHECK: vcge.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = fcmp oge <4 x float> %tmp1, %tmp2
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
@@ -143,8 +143,8 @@ define <4 x i32> @vcgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vacgef32:
;CHECK: vacge.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x i32> %tmp3
}
@@ -152,8 +152,8 @@ define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vacgeQf32:
;CHECK: vacge.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x i32> %tmp3
}
@@ -166,7 +166,7 @@ define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind {
;CHECK-NOT: vmov
;CHECK-NOT: vmvn
;CHECK: vcge.s8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = icmp sge <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
ret <8 x i8> %tmp4
@@ -177,7 +177,7 @@ define <8 x i8> @vclei8Z(<8 x i8>* %A) nounwind {
;CHECK-NOT: vmov
;CHECK-NOT: vmvn
;CHECK: vcle.s8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = icmp sle <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
ret <8 x i8> %tmp4
diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll
index 2f736f689ab1..c39c939d6c95 100644
--- a/test/CodeGen/ARM/vcgt.ll
+++ b/test/CodeGen/ARM/vcgt.ll
@@ -4,8 +4,8 @@
define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vcgts8:
;CHECK: vcgt.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = icmp sgt <8 x i8> %tmp1, %tmp2
%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
ret <8 x i8> %tmp4
@@ -14,8 +14,8 @@ define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vcgts16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vcgts16:
;CHECK: vcgt.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = icmp sgt <4 x i16> %tmp1, %tmp2
%tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
ret <4 x i16> %tmp4
@@ -24,8 +24,8 @@ define <4 x i16> @vcgts16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vcgts32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vcgts32:
;CHECK: vcgt.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = icmp sgt <2 x i32> %tmp1, %tmp2
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -34,8 +34,8 @@ define <2 x i32> @vcgts32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i8> @vcgtu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vcgtu8:
;CHECK: vcgt.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = icmp ugt <8 x i8> %tmp1, %tmp2
%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
ret <8 x i8> %tmp4
@@ -44,8 +44,8 @@ define <8 x i8> @vcgtu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vcgtu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vcgtu16:
;CHECK: vcgt.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = icmp ugt <4 x i16> %tmp1, %tmp2
%tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
ret <4 x i16> %tmp4
@@ -54,8 +54,8 @@ define <4 x i16> @vcgtu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vcgtu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vcgtu32:
;CHECK: vcgt.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = icmp ugt <2 x i32> %tmp1, %tmp2
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -64,8 +64,8 @@ define <2 x i32> @vcgtu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <2 x i32> @vcgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vcgtf32:
;CHECK: vcgt.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = fcmp ogt <2 x float> %tmp1, %tmp2
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -74,8 +74,8 @@ define <2 x i32> @vcgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
define <16 x i8> @vcgtQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vcgtQs8:
;CHECK: vcgt.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = icmp sgt <16 x i8> %tmp1, %tmp2
%tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
ret <16 x i8> %tmp4
@@ -84,8 +84,8 @@ define <16 x i8> @vcgtQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vcgtQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vcgtQs16:
;CHECK: vcgt.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = icmp sgt <8 x i16> %tmp1, %tmp2
%tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
ret <8 x i16> %tmp4
@@ -94,8 +94,8 @@ define <8 x i16> @vcgtQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vcgtQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vcgtQs32:
;CHECK: vcgt.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = icmp sgt <4 x i32> %tmp1, %tmp2
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
@@ -104,8 +104,8 @@ define <4 x i32> @vcgtQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <16 x i8> @vcgtQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vcgtQu8:
;CHECK: vcgt.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = icmp ugt <16 x i8> %tmp1, %tmp2
%tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
ret <16 x i8> %tmp4
@@ -114,8 +114,8 @@ define <16 x i8> @vcgtQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vcgtQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vcgtQu16:
;CHECK: vcgt.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = icmp ugt <8 x i16> %tmp1, %tmp2
%tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
ret <8 x i16> %tmp4
@@ -124,8 +124,8 @@ define <8 x i16> @vcgtQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vcgtQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vcgtQu32:
;CHECK: vcgt.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = icmp ugt <4 x i32> %tmp1, %tmp2
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
@@ -134,8 +134,8 @@ define <4 x i32> @vcgtQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <4 x i32> @vcgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vcgtQf32:
;CHECK: vcgt.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
@@ -144,8 +144,8 @@ define <4 x i32> @vcgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vacgtf32:
;CHECK: vacgt.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x i32> %tmp3
}
@@ -153,8 +153,8 @@ define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vacgtQf32:
;CHECK: vacgt.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x i32> %tmp3
}
@@ -165,8 +165,8 @@ define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK: vmov.i32 [[Q0:q[0-9]+]], #0x1
;CHECK: vcgt.f32 [[Q1:q[0-9]+]]
;CHECK: vand [[Q2:q[0-9]+]], [[Q1]], [[Q0]]
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
%tmp4 = zext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
@@ -180,7 +180,7 @@ define <8 x i8> @vcgti8Z(<8 x i8>* %A) nounwind {
;CHECK-NOT: vmov
;CHECK-NOT: vmvn
;CHECK: vcgt.s8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = icmp sgt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
ret <8 x i8> %tmp4
@@ -191,7 +191,7 @@ define <8 x i8> @vclti8Z(<8 x i8>* %A) nounwind {
;CHECK-NOT: vmov
;CHECK-NOT: vmvn
;CHECK: vclt.s8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp3 = icmp slt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
ret <8 x i8> %tmp4
diff --git a/test/CodeGen/ARM/vcnt.ll b/test/CodeGen/ARM/vcnt.ll
index 390559b82807..de251c58e6b9 100644
--- a/test/CodeGen/ARM/vcnt.ll
+++ b/test/CodeGen/ARM/vcnt.ll
@@ -4,7 +4,7 @@
define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vcnt8:
;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1)
ret <8 x i8> %tmp2
}
@@ -12,7 +12,7 @@ define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vcntQ8:
;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1)
ret <16 x i8> %tmp2
}
@@ -23,7 +23,7 @@ declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone
define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vclz8:
;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}}
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0)
ret <8 x i8> %tmp2
}
@@ -31,7 +31,7 @@ define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vclz16:
;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}}
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0)
ret <4 x i16> %tmp2
}
@@ -39,7 +39,7 @@ define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vclz32:
;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}}
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0)
ret <2 x i32> %tmp2
}
@@ -47,7 +47,7 @@ define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vclzQ8:
;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}}
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0)
ret <16 x i8> %tmp2
}
@@ -55,7 +55,7 @@ define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vclzQ16:
;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}}
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0)
ret <8 x i16> %tmp2
}
@@ -63,7 +63,7 @@ define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vclzQ32:
;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}}
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0)
ret <4 x i32> %tmp2
}
@@ -79,7 +79,7 @@ declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vclss8:
;CHECK: vcls.s8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
ret <8 x i8> %tmp2
}
@@ -87,7 +87,7 @@ define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vclss16:
;CHECK: vcls.s16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
ret <4 x i16> %tmp2
}
@@ -95,7 +95,7 @@ define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vclss32:
;CHECK: vcls.s32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
ret <2 x i32> %tmp2
}
@@ -103,7 +103,7 @@ define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vclsQs8:
;CHECK: vcls.s8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
ret <16 x i8> %tmp2
}
@@ -111,7 +111,7 @@ define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vclsQs16:
;CHECK: vcls.s16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
ret <8 x i16> %tmp2
}
@@ -119,7 +119,7 @@ define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vclsQs32:
;CHECK: vcls.s32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
ret <4 x i32> %tmp2
}
diff --git a/test/CodeGen/ARM/vcombine.ll b/test/CodeGen/ARM/vcombine.ll
index 33aa71df0be3..9491c15aef58 100644
--- a/test/CodeGen/ARM/vcombine.ll
+++ b/test/CodeGen/ARM/vcombine.ll
@@ -7,8 +7,8 @@ define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
; CHECK-LE: vmov r2, r3, d17
; CHECK-BE: vmov r1, r0, d16
; CHECK-BE: vmov r3, r2, d17
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %tmp3
}
@@ -19,8 +19,8 @@ define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
; CHECK-LE: vmov r2, r3, d17
; CHECK-BE: vmov r1, r0, d16
; CHECK-BE: vmov r3, r2, d17
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %tmp3
}
@@ -31,8 +31,8 @@ define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
; CHECK-LE: vmov r2, r3, d17
; CHECK-BE: vmov r1, r0, d16
; CHECK-BE: vmov r3, r2, d17
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %tmp3
}
@@ -43,8 +43,8 @@ define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
; CHECK-LE: vmov r2, r3, d17
; CHECK-BE: vmov r1, r0, d16
; CHECK-BE: vmov r3, r2, d17
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x float> %tmp3
}
@@ -55,8 +55,8 @@ define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
; CHECK-LE: vmov r2, r3, d17
; CHECK-BE: vmov r1, r0, d16
; CHECK-BE: vmov r3, r2, d17
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1>
ret <2 x i64> %tmp3
}
@@ -69,7 +69,7 @@ define <4 x i16> @vget_low16(<8 x i16>* %A) nounwind {
; CHECK-NOT: vst
; CHECK-LE: vmov r0, r1, d16
; CHECK-BE: vmov r1, r0, d16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i16> %tmp2
}
@@ -79,7 +79,7 @@ define <8 x i8> @vget_high8(<16 x i8>* %A) nounwind {
; CHECK-NOT: vst
; CHECK-LE: vmov r0, r1, d17
; CHECK-BE: vmov r1, r0, d16
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <8 x i8> %tmp2
}
diff --git a/test/CodeGen/ARM/vcvt-cost.ll b/test/CodeGen/ARM/vcvt-cost.ll
index 5e56a5b34cf0..c80450a28400 100644
--- a/test/CodeGen/ARM/vcvt-cost.ll
+++ b/test/CodeGen/ARM/vcvt-cost.ll
@@ -9,7 +9,7 @@ define void @func_cvt5(%T0_5* %loadaddr, %T1_5* %storeaddr) {
; CHECK: vmovl.s8
; CHECK: vmovl.s16
; CHECK: vmovl.s16
- %v0 = load %T0_5* %loadaddr
+ %v0 = load %T0_5, %T0_5* %loadaddr
; COST: func_cvt5
; COST: cost of 3 {{.*}} sext
%r = sext %T0_5 %v0 to %T1_5
@@ -25,7 +25,7 @@ define void @func_cvt1(%TA0_5* %loadaddr, %TA1_5* %storeaddr) {
; CHECK: vmovl.u8
; CHECK: vmovl.u16
; CHECK: vmovl.u16
- %v0 = load %TA0_5* %loadaddr
+ %v0 = load %TA0_5, %TA0_5* %loadaddr
; COST: func_cvt1
; COST: cost of 3 {{.*}} zext
%r = zext %TA0_5 %v0 to %TA1_5
@@ -40,7 +40,7 @@ define void @func_cvt51(%T0_51* %loadaddr, %T1_51* %storeaddr) {
; CHECK: vmovn.i32
; CHECK: vmovn.i32
; CHECK: vmovn.i16
- %v0 = load %T0_51* %loadaddr
+ %v0 = load %T0_51, %T0_51* %loadaddr
; COST: func_cvt51
; COST: cost of 3 {{.*}} trunc
%r = trunc %T0_51 %v0 to %T1_51
@@ -56,7 +56,7 @@ define void @func_cvt52(%TT0_5* %loadaddr, %TT1_5* %storeaddr) {
; CHECK: vmovl.s16
; CHECK: vmovl.s16
; CHECK: vmovl.s16
- %v0 = load %TT0_5* %loadaddr
+ %v0 = load %TT0_5, %TT0_5* %loadaddr
; COST: func_cvt52
; COST: cost of 6 {{.*}} sext
%r = sext %TT0_5 %v0 to %TT1_5
@@ -73,7 +73,7 @@ define void @func_cvt12(%TTA0_5* %loadaddr, %TTA1_5* %storeaddr) {
; CHECK: vmovl.u16
; CHECK: vmovl.u16
; CHECK: vmovl.u16
- %v0 = load %TTA0_5* %loadaddr
+ %v0 = load %TTA0_5, %TTA0_5* %loadaddr
; COST: func_cvt12
; COST: cost of 6 {{.*}} zext
%r = zext %TTA0_5 %v0 to %TTA1_5
@@ -91,7 +91,7 @@ define void @func_cvt512(%TT0_51* %loadaddr, %TT1_51* %storeaddr) {
; CHECK: vmovn.i32
; CHECK: vmovn.i16
; CHECK: vmovn.i16
- %v0 = load %TT0_51* %loadaddr
+ %v0 = load %TT0_51, %TT0_51* %loadaddr
; COST: func_cvt512
; COST: cost of 6 {{.*}} trunc
%r = trunc %TT0_51 %v0 to %TT1_51
@@ -103,7 +103,7 @@ define void @func_cvt512(%TT0_51* %loadaddr, %TT1_51* %storeaddr) {
define void @sext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
; CHECK: vmovl.s32
; CHECK: vmovl.s32
- %v0 = load <4 x i16>* %loadaddr
+ %v0 = load <4 x i16>, <4 x i16>* %loadaddr
; COST: sext_v4i16_v4i64
; COST: cost of 3 {{.*}} sext
%r = sext <4 x i16> %v0 to <4 x i64>
@@ -115,7 +115,7 @@ define void @sext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
define void @zext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
; CHECK: vmovl.u32
; CHECK: vmovl.u32
- %v0 = load <4 x i16>* %loadaddr
+ %v0 = load <4 x i16>, <4 x i16>* %loadaddr
; COST: zext_v4i16_v4i64
; COST: cost of 3 {{.*}} zext
%r = zext <4 x i16> %v0 to <4 x i64>
@@ -129,7 +129,7 @@ define void @sext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
; CHECK: vmovl.s32
; CHECK: vmovl.s32
; CHECK: vmovl.s32
- %v0 = load <8 x i16>* %loadaddr
+ %v0 = load <8 x i16>, <8 x i16>* %loadaddr
; COST: sext_v8i16_v8i64
; COST: cost of 6 {{.*}} sext
%r = sext <8 x i16> %v0 to <8 x i64>
@@ -143,7 +143,7 @@ define void @zext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
; CHECK: vmovl.u32
; CHECK: vmovl.u32
; CHECK: vmovl.u32
- %v0 = load <8 x i16>* %loadaddr
+ %v0 = load <8 x i16>, <8 x i16>* %loadaddr
; COST: zext_v8i16_v8i64
; COST: cost of 6 {{.*}} zext
%r = zext <8 x i16> %v0 to <8 x i64>
diff --git a/test/CodeGen/ARM/vcvt-v8.ll b/test/CodeGen/ARM/vcvt-v8.ll
index c449009e1e1f..9d5972fa4da5 100644
--- a/test/CodeGen/ARM/vcvt-v8.ll
+++ b/test/CodeGen/ARM/vcvt-v8.ll
@@ -2,7 +2,7 @@
define <4 x i32> @vcvtasq(<4 x float>* %A) {
; CHECK: vcvtasq
; CHECK: vcvta.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %tmp1)
ret <4 x i32> %tmp2
}
@@ -10,7 +10,7 @@ define <4 x i32> @vcvtasq(<4 x float>* %A) {
define <2 x i32> @vcvtasd(<2 x float>* %A) {
; CHECK: vcvtasd
; CHECK: vcvta.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %tmp1)
ret <2 x i32> %tmp2
}
@@ -18,7 +18,7 @@ define <2 x i32> @vcvtasd(<2 x float>* %A) {
define <4 x i32> @vcvtnsq(<4 x float>* %A) {
; CHECK: vcvtnsq
; CHECK: vcvtn.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %tmp1)
ret <4 x i32> %tmp2
}
@@ -26,7 +26,7 @@ define <4 x i32> @vcvtnsq(<4 x float>* %A) {
define <2 x i32> @vcvtnsd(<2 x float>* %A) {
; CHECK: vcvtnsd
; CHECK: vcvtn.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %tmp1)
ret <2 x i32> %tmp2
}
@@ -34,7 +34,7 @@ define <2 x i32> @vcvtnsd(<2 x float>* %A) {
define <4 x i32> @vcvtpsq(<4 x float>* %A) {
; CHECK: vcvtpsq
; CHECK: vcvtp.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %tmp1)
ret <4 x i32> %tmp2
}
@@ -42,7 +42,7 @@ define <4 x i32> @vcvtpsq(<4 x float>* %A) {
define <2 x i32> @vcvtpsd(<2 x float>* %A) {
; CHECK: vcvtpsd
; CHECK: vcvtp.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %tmp1)
ret <2 x i32> %tmp2
}
@@ -50,7 +50,7 @@ define <2 x i32> @vcvtpsd(<2 x float>* %A) {
define <4 x i32> @vcvtmsq(<4 x float>* %A) {
; CHECK: vcvtmsq
; CHECK: vcvtm.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %tmp1)
ret <4 x i32> %tmp2
}
@@ -58,7 +58,7 @@ define <4 x i32> @vcvtmsq(<4 x float>* %A) {
define <2 x i32> @vcvtmsd(<2 x float>* %A) {
; CHECK: vcvtmsd
; CHECK: vcvtm.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %tmp1)
ret <2 x i32> %tmp2
}
@@ -66,7 +66,7 @@ define <2 x i32> @vcvtmsd(<2 x float>* %A) {
define <4 x i32> @vcvtauq(<4 x float>* %A) {
; CHECK: vcvtauq
; CHECK: vcvta.u32.f32 q{{[0-9]+}}, q{{[0-9]+}}
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %tmp1)
ret <4 x i32> %tmp2
}
@@ -74,7 +74,7 @@ define <4 x i32> @vcvtauq(<4 x float>* %A) {
define <2 x i32> @vcvtaud(<2 x float>* %A) {
; CHECK: vcvtaud
; CHECK: vcvta.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %tmp1)
ret <2 x i32> %tmp2
}
@@ -82,7 +82,7 @@ define <2 x i32> @vcvtaud(<2 x float>* %A) {
define <4 x i32> @vcvtnuq(<4 x float>* %A) {
; CHECK: vcvtnuq
; CHECK: vcvtn.u32.f32 q{{[0-9]+}}, q{{[0-9]+}}
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %tmp1)
ret <4 x i32> %tmp2
}
@@ -90,7 +90,7 @@ define <4 x i32> @vcvtnuq(<4 x float>* %A) {
define <2 x i32> @vcvtnud(<2 x float>* %A) {
; CHECK: vcvtnud
; CHECK: vcvtn.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %tmp1)
ret <2 x i32> %tmp2
}
@@ -98,7 +98,7 @@ define <2 x i32> @vcvtnud(<2 x float>* %A) {
define <4 x i32> @vcvtpuq(<4 x float>* %A) {
; CHECK: vcvtpuq
; CHECK: vcvtp.u32.f32 q{{[0-9]+}}, q{{[0-9]+}}
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %tmp1)
ret <4 x i32> %tmp2
}
@@ -106,7 +106,7 @@ define <4 x i32> @vcvtpuq(<4 x float>* %A) {
define <2 x i32> @vcvtpud(<2 x float>* %A) {
; CHECK: vcvtpud
; CHECK: vcvtp.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %tmp1)
ret <2 x i32> %tmp2
}
@@ -114,7 +114,7 @@ define <2 x i32> @vcvtpud(<2 x float>* %A) {
define <4 x i32> @vcvtmuq(<4 x float>* %A) {
; CHECK: vcvtmuq
; CHECK: vcvtm.u32.f32 q{{[0-9]+}}, q{{[0-9]+}}
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %tmp1)
ret <4 x i32> %tmp2
}
@@ -122,7 +122,7 @@ define <4 x i32> @vcvtmuq(<4 x float>* %A) {
define <2 x i32> @vcvtmud(<2 x float>* %A) {
; CHECK: vcvtmud
; CHECK: vcvtm.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %tmp1)
ret <2 x i32> %tmp2
}
diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll
index af4e6a3b0465..78105f7e0ad1 100644
--- a/test/CodeGen/ARM/vcvt.ll
+++ b/test/CodeGen/ARM/vcvt.ll
@@ -3,7 +3,7 @@
define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind {
;CHECK-LABEL: vcvt_f32tos32:
;CHECK: vcvt.s32.f32
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
ret <2 x i32> %tmp2
}
@@ -11,7 +11,7 @@ define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind {
define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind {
;CHECK-LABEL: vcvt_f32tou32:
;CHECK: vcvt.u32.f32
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
ret <2 x i32> %tmp2
}
@@ -19,7 +19,7 @@ define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind {
define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vcvt_s32tof32:
;CHECK: vcvt.f32.s32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = sitofp <2 x i32> %tmp1 to <2 x float>
ret <2 x float> %tmp2
}
@@ -27,7 +27,7 @@ define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind {
define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vcvt_u32tof32:
;CHECK: vcvt.f32.u32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = uitofp <2 x i32> %tmp1 to <2 x float>
ret <2 x float> %tmp2
}
@@ -35,7 +35,7 @@ define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind {
define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind {
;CHECK-LABEL: vcvtQ_f32tos32:
;CHECK: vcvt.s32.f32
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
ret <4 x i32> %tmp2
}
@@ -43,7 +43,7 @@ define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind {
define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind {
;CHECK-LABEL: vcvtQ_f32tou32:
;CHECK: vcvt.u32.f32
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
ret <4 x i32> %tmp2
}
@@ -51,7 +51,7 @@ define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind {
define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vcvtQ_s32tof32:
;CHECK: vcvt.f32.s32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = sitofp <4 x i32> %tmp1 to <4 x float>
ret <4 x float> %tmp2
}
@@ -59,7 +59,7 @@ define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind {
define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vcvtQ_u32tof32:
;CHECK: vcvt.f32.u32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = uitofp <4 x i32> %tmp1 to <4 x float>
ret <4 x float> %tmp2
}
@@ -67,7 +67,7 @@ define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind {
define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind {
;CHECK-LABEL: vcvt_n_f32tos32:
;CHECK: vcvt.s32.f32
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %tmp1, i32 1)
ret <2 x i32> %tmp2
}
@@ -75,7 +75,7 @@ define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind {
define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind {
;CHECK-LABEL: vcvt_n_f32tou32:
;CHECK: vcvt.u32.f32
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %tmp1, i32 1)
ret <2 x i32> %tmp2
}
@@ -83,7 +83,7 @@ define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind {
define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vcvt_n_s32tof32:
;CHECK: vcvt.f32.s32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
ret <2 x float> %tmp2
}
@@ -91,7 +91,7 @@ define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind {
define <2 x float> @vcvt_n_u32tof32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vcvt_n_u32tof32:
;CHECK: vcvt.f32.u32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
ret <2 x float> %tmp2
}
@@ -104,7 +104,7 @@ declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwi
define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind {
;CHECK-LABEL: vcvtQ_n_f32tos32:
;CHECK: vcvt.s32.f32
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %tmp1, i32 1)
ret <4 x i32> %tmp2
}
@@ -112,7 +112,7 @@ define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind {
define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind {
;CHECK-LABEL: vcvtQ_n_f32tou32:
;CHECK: vcvt.u32.f32
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %tmp1, i32 1)
ret <4 x i32> %tmp2
}
@@ -120,7 +120,7 @@ define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind {
define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vcvtQ_n_s32tof32:
;CHECK: vcvt.f32.s32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
ret <4 x float> %tmp2
}
@@ -128,7 +128,7 @@ define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind {
define <4 x float> @vcvtQ_n_u32tof32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vcvtQ_n_u32tof32:
;CHECK: vcvt.f32.u32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
ret <4 x float> %tmp2
}
@@ -141,7 +141,7 @@ declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwi
define <4 x float> @vcvt_f16tof32(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vcvt_f16tof32:
;CHECK: vcvt.f32.f16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %tmp1)
ret <4 x float> %tmp2
}
@@ -149,7 +149,7 @@ define <4 x float> @vcvt_f16tof32(<4 x i16>* %A) nounwind {
define <4 x i16> @vcvt_f32tof16(<4 x float>* %A) nounwind {
;CHECK-LABEL: vcvt_f32tof16:
;CHECK: vcvt.f16.f32
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %tmp1)
ret <4 x i16> %tmp2
}
@@ -180,8 +180,8 @@ define <2 x i64> @fix_float_to_i64(<2 x float> %in) {
define <4 x i16> @fix_double_to_i16(<4 x double> %in) {
; CHECK-LABEL: fix_double_to_i16:
-; CHECK: vcvt.s32.f64
-; CHECK: vcvt.s32.f64
+; CHECK: vcvt.u32.f64
+; CHECK: vcvt.u32.f64
%scale = fmul <4 x double> %in, <double 2.0, double 2.0, double 2.0, double 2.0>
%conv = fptoui <4 x double> %scale to <4 x i16>
diff --git a/test/CodeGen/ARM/vcvt_combine.ll b/test/CodeGen/ARM/vcvt_combine.ll
index 07ba230757be..0c856e8d7617 100644
--- a/test/CodeGen/ARM/vcvt_combine.ll
+++ b/test/CodeGen/ARM/vcvt_combine.ll
@@ -7,7 +7,7 @@
; CHECK-NOT: vmul
define void @t0() nounwind {
entry:
- %tmp = load float* @in, align 4
+ %tmp = load float, float* @in, align 4
%vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
%vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
%mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00>
@@ -23,7 +23,7 @@ declare void @foo_int32x2_t(<2 x i32>)
; CHECK-NOT: vmul
define void @t1() nounwind {
entry:
- %tmp = load float* @in, align 4
+ %tmp = load float, float* @in, align 4
%vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
%vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
%mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00>
@@ -39,7 +39,7 @@ declare void @foo_uint32x2_t(<2 x i32>)
; CHECK: vmul
define void @t2() nounwind {
entry:
- %tmp = load float* @in, align 4
+ %tmp = load float, float* @in, align 4
%vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
%vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
%mul.i = fmul <2 x float> %vecinit2.i, <float 0x401B333340000000, float 0x401B333340000000>
@@ -53,7 +53,7 @@ entry:
; CHECK: vmul
define void @t3() nounwind {
entry:
- %tmp = load float* @in, align 4
+ %tmp = load float, float* @in, align 4
%vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
%vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
%mul.i = fmul <2 x float> %vecinit2.i, <float 0x4200000000000000, float 0x4200000000000000>
@@ -67,7 +67,7 @@ entry:
; CHECK-NOT: vmul
define void @t4() nounwind {
entry:
- %tmp = load float* @in, align 4
+ %tmp = load float, float* @in, align 4
%vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
%vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
%mul.i = fmul <2 x float> %vecinit2.i, <float 0x41F0000000000000, float 0x41F0000000000000>
@@ -81,7 +81,7 @@ entry:
; CHECK-NOT: vmul
define void @t5() nounwind {
entry:
- %tmp = load float* @in, align 4
+ %tmp = load float, float* @in, align 4
%vecinit.i = insertelement <4 x float> undef, float %tmp, i32 0
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %tmp, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %tmp, i32 2
diff --git a/test/CodeGen/ARM/vdiv_combine.ll b/test/CodeGen/ARM/vdiv_combine.ll
index 96807f7280f8..8c6e4ba35054 100644
--- a/test/CodeGen/ARM/vdiv_combine.ll
+++ b/test/CodeGen/ARM/vdiv_combine.ll
@@ -11,7 +11,7 @@ declare void @foo_int32x4_t(<4 x i32>)
; CHECK-NOT: {{vdiv|vmul}}
define void @t1() nounwind {
entry:
- %tmp = load i32* @iin, align 4
+ %tmp = load i32, i32* @iin, align 4
%vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
%vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
%vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -27,7 +27,7 @@ declare void @foo_float32x2_t(<2 x float>)
; CHECK-NOT: {{vdiv|vmul}}
define void @t2() nounwind {
entry:
- %tmp = load i32* @uin, align 4
+ %tmp = load i32, i32* @uin, align 4
%vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
%vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
%vcvt.i = uitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -41,7 +41,7 @@ entry:
; CHECK: {{vdiv|vmul}}
define void @t3() nounwind {
entry:
- %tmp = load i32* @iin, align 4
+ %tmp = load i32, i32* @iin, align 4
%vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
%vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
%vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -55,7 +55,7 @@ entry:
; CHECK: {{vdiv|vmul}}
define void @t4() nounwind {
entry:
- %tmp = load i32* @iin, align 4
+ %tmp = load i32, i32* @iin, align 4
%vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
%vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
%vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -69,7 +69,7 @@ entry:
; CHECK-NOT: {{vdiv|vmul}}
define void @t5() nounwind {
entry:
- %tmp = load i32* @iin, align 4
+ %tmp = load i32, i32* @iin, align 4
%vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
%vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
%vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -83,7 +83,7 @@ entry:
; CHECK-NOT: {{vdiv|vmul}}
define void @t6() nounwind {
entry:
- %tmp = load i32* @iin, align 4
+ %tmp = load i32, i32* @iin, align 4
%vecinit.i = insertelement <4 x i32> undef, i32 %tmp, i32 0
%vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %tmp, i32 1
%vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %tmp, i32 2
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll
index 89f355c68751..36eebbfc4650 100644
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -166,7 +166,7 @@ define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vduplane8:
;CHECK: vdup.8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
ret <8 x i8> %tmp2
}
@@ -174,7 +174,7 @@ define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vduplane16:
;CHECK: vdup.16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
ret <4 x i16> %tmp2
}
@@ -182,7 +182,7 @@ define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vduplane32:
;CHECK: vdup.32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
ret <2 x i32> %tmp2
}
@@ -190,7 +190,7 @@ define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
;CHECK-LABEL: vduplanefloat:
;CHECK: vdup.32
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
ret <2 x float> %tmp2
}
@@ -198,7 +198,7 @@ define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vduplaneQ8:
;CHECK: vdup.8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
ret <16 x i8> %tmp2
}
@@ -206,7 +206,7 @@ define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vduplaneQ16:
;CHECK: vdup.16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
ret <8 x i16> %tmp2
}
@@ -214,7 +214,7 @@ define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vduplaneQ32:
;CHECK: vdup.32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
ret <4 x i32> %tmp2
}
@@ -222,7 +222,7 @@ define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
;CHECK-LABEL: vduplaneQfloat:
;CHECK: vdup.32
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
ret <4 x float> %tmp2
}
@@ -347,17 +347,17 @@ define <2 x float> @check_spr_splat2(<2 x float> %p, i16 %q) {
define <4 x float> @check_spr_splat4(<4 x float> %p, i16 %q) {
;CHECK-LABEL: check_spr_splat4:
-;CHECK: vdup.32 q
+;CHECK: vld1.16
%conv = sitofp i16 %q to float
%splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 0
%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
%sub = fsub <4 x float> %splat.splat, %p
ret <4 x float> %sub
}
-
+; Same codegen as above test; scalar is splatted using vld1, so shuffle index is irrelevant.
define <4 x float> @check_spr_splat4_lane1(<4 x float> %p, i16 %q) {
;CHECK-LABEL: check_spr_splat4_lane1:
-;CHECK: vdup.32 q{{.*}}, d{{.*}}[1]
+;CHECK: vld1.16
%conv = sitofp i16 %q to float
%splat.splatinsert = insertelement <4 x float> undef, float %conv, i32 1
%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll
index 759da2235e41..29f4bb972a24 100644
--- a/test/CodeGen/ARM/vector-DAGCombine.ll
+++ b/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -21,12 +21,20 @@ bb.i19: ; preds = %bb.i19, %bb3
define void @test_illegal_build_vector() nounwind {
entry:
store <2 x i64> undef, <2 x i64>* undef, align 16
- %0 = load <16 x i8>* undef, align 16 ; <<16 x i8>> [#uses=1]
+ %0 = load <16 x i8>, <16 x i8>* undef, align 16 ; <<16 x i8>> [#uses=1]
%1 = or <16 x i8> zeroinitializer, %0 ; <<16 x i8>> [#uses=1]
store <16 x i8> %1, <16 x i8>* undef, align 16
ret void
}
+; PR22678
+; Check CONCAT_VECTORS DAG combiner pass doesn't introduce illegal types.
+define void @test_pr22678() {
+ %1 = fptoui <16 x float> undef to <16 x i8>
+ store <16 x i8> %1, <16 x i8>* undef
+ ret void
+}
+
; Radar 8407927: Make sure that VMOVRRD gets optimized away when the result is
; converted back to be used as a vector type.
; CHECK-LABEL: test_vmovrrd_combine:
@@ -55,7 +63,7 @@ bb2:
; Test trying to do a ShiftCombine on illegal types.
; The vector should be split first.
define void @lshrIllegalType(<8 x i32>* %A) nounwind {
- %tmp1 = load <8 x i32>* %A
+ %tmp1 = load <8 x i32>, <8 x i32>* %A
%tmp2 = lshr <8 x i32> %tmp1, < i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
store <8 x i32> %tmp2, <8 x i32>* %A
ret void
@@ -81,7 +89,7 @@ declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
define void @i64_buildvector(i64* %ptr, <2 x i64>* %vp) nounwind {
; CHECK: i64_buildvector
; CHECK: vldr
- %t0 = load i64* %ptr, align 4
+ %t0 = load i64, i64* %ptr, align 4
%t1 = insertelement <2 x i64> undef, i64 %t0, i32 0
store <2 x i64> %t1, <2 x i64>* %vp
ret void
@@ -90,8 +98,8 @@ define void @i64_buildvector(i64* %ptr, <2 x i64>* %vp) nounwind {
define void @i64_insertelement(i64* %ptr, <2 x i64>* %vp) nounwind {
; CHECK: i64_insertelement
; CHECK: vldr
- %t0 = load i64* %ptr, align 4
- %vec = load <2 x i64>* %vp
+ %t0 = load i64, i64* %ptr, align 4
+ %vec = load <2 x i64>, <2 x i64>* %vp
%t1 = insertelement <2 x i64> %vec, i64 %t0, i32 0
store <2 x i64> %t1, <2 x i64>* %vp
ret void
@@ -100,7 +108,7 @@ define void @i64_insertelement(i64* %ptr, <2 x i64>* %vp) nounwind {
define void @i64_extractelement(i64* %ptr, <2 x i64>* %vp) nounwind {
; CHECK: i64_extractelement
; CHECK: vstr
- %vec = load <2 x i64>* %vp
+ %vec = load <2 x i64>, <2 x i64>* %vp
%t1 = extractelement <2 x i64> %vec, i32 0
store i64 %t1, i64* %ptr
ret void
@@ -108,7 +116,7 @@ define void @i64_extractelement(i64* %ptr, <2 x i64>* %vp) nounwind {
; Test trying to do a AND Combine on illegal types.
define void @andVec(<3 x i8>* %A) nounwind {
- %tmp = load <3 x i8>* %A, align 4
+ %tmp = load <3 x i8>, <3 x i8>* %A, align 4
%and = and <3 x i8> %tmp, <i8 7, i8 7, i8 7>
store <3 x i8> %and, <3 x i8>* %A
ret void
@@ -117,7 +125,7 @@ define void @andVec(<3 x i8>* %A) nounwind {
; Test trying to do an OR Combine on illegal types.
define void @orVec(<3 x i8>* %A) nounwind {
- %tmp = load <3 x i8>* %A, align 4
+ %tmp = load <3 x i8>, <3 x i8>* %A, align 4
%or = or <3 x i8> %tmp, <i8 7, i8 7, i8 7>
store <3 x i8> %or, <3 x i8>* %A
ret void
@@ -138,7 +146,7 @@ define i16 @foldBuildVectors() {
; shuffles.
; CHECK-LABEL: reverse_v8i16:
define void @reverse_v8i16(<8 x i16>* %loadaddr, <8 x i16>* %storeaddr) {
- %v0 = load <8 x i16>* %loadaddr
+ %v0 = load <8 x i16>, <8 x i16>* %loadaddr
; CHECK: vrev64.16
; CHECK: vext.16
%v1 = shufflevector <8 x i16> %v0, <8 x i16> undef,
@@ -151,7 +159,7 @@ define void @reverse_v8i16(<8 x i16>* %loadaddr, <8 x i16>* %storeaddr) {
; shuffles.
; CHECK-LABEL: reverse_v16i8:
define void @reverse_v16i8(<16 x i8>* %loadaddr, <16 x i8>* %storeaddr) {
- %v0 = load <16 x i8>* %loadaddr
+ %v0 = load <16 x i8>, <16 x i8>* %loadaddr
; CHECK: vrev64.8
; CHECK: vext.8
%v1 = shufflevector <16 x i8> %v0, <16 x i8> undef,
@@ -172,9 +180,9 @@ define void @reverse_v16i8(<16 x i8>* %loadaddr, <16 x i8>* %storeaddr) {
define <8 x i16> @t3(i8 zeroext %xf, i8* nocapture %sp0, i8* nocapture %sp1, i32* nocapture %outp) {
entry:
%pix_sp0.0.cast = bitcast i8* %sp0 to i32*
- %pix_sp0.0.copyload = load i32* %pix_sp0.0.cast, align 1
+ %pix_sp0.0.copyload = load i32, i32* %pix_sp0.0.cast, align 1
%pix_sp1.0.cast = bitcast i8* %sp1 to i32*
- %pix_sp1.0.copyload = load i32* %pix_sp1.0.cast, align 1
+ %pix_sp1.0.copyload = load i32, i32* %pix_sp1.0.cast, align 1
%vecinit = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
%vecinit1 = insertelement <2 x i32> %vecinit, i32 %pix_sp1.0.copyload, i32 1
%0 = bitcast <2 x i32> %vecinit1 to <8 x i8>
@@ -192,7 +200,7 @@ define <8 x i16> @t4(i8* nocapture %sp0) {
; CHECK: vld1.32 {{{d[0-9]+}}[0]}, [r0]
entry:
%pix_sp0.0.cast = bitcast i8* %sp0 to i32*
- %pix_sp0.0.copyload = load i32* %pix_sp0.0.cast, align 1
+ %pix_sp0.0.copyload = load i32, i32* %pix_sp0.0.cast, align 1
%vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
%0 = bitcast <2 x i32> %vec to <8 x i8>
%vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %0, <8 x i8> %0)
@@ -211,11 +219,11 @@ entry:
define <8 x i16> @t5(i8* nocapture %sp0, i8* nocapture %sp1, i8* nocapture %sp2) {
entry:
%pix_sp0.0.cast = bitcast i8* %sp0 to i32*
- %pix_sp0.0.copyload = load i32* %pix_sp0.0.cast, align 1
+ %pix_sp0.0.copyload = load i32, i32* %pix_sp0.0.cast, align 1
%pix_sp1.0.cast = bitcast i8* %sp1 to i32*
- %pix_sp1.0.copyload = load i32* %pix_sp1.0.cast, align 1
+ %pix_sp1.0.copyload = load i32, i32* %pix_sp1.0.cast, align 1
%pix_sp2.0.cast = bitcast i8* %sp2 to i32*
- %pix_sp2.0.copyload = load i32* %pix_sp2.0.cast, align 1
+ %pix_sp2.0.copyload = load i32, i32* %pix_sp2.0.cast, align 1
%vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 1
%vecinit1 = insertelement <2 x i32> %vec, i32 %pix_sp1.0.copyload, i32 0
%vecinit2 = insertelement <2 x i32> %vec, i32 %pix_sp2.0.copyload, i32 0
diff --git a/test/CodeGen/ARM/vector-extend-narrow.ll b/test/CodeGen/ARM/vector-extend-narrow.ll
index f3218969c78e..d054bfda615e 100644
--- a/test/CodeGen/ARM/vector-extend-narrow.ll
+++ b/test/CodeGen/ARM/vector-extend-narrow.ll
@@ -2,9 +2,9 @@
; CHECK-LABEL: f:
define float @f(<4 x i16>* nocapture %in) {
- ; CHECK: vldr
+ ; CHECK: vld1
; CHECK: vmovl.u16
- %1 = load <4 x i16>* %in
+ %1 = load <4 x i16>, <4 x i16>* %in
; CHECK: vcvt.f32.u32
%2 = uitofp <4 x i16> %1 to <4 x float>
%3 = extractelement <4 x float> %2, i32 0
@@ -25,7 +25,7 @@ define float @g(<4 x i8>* nocapture %in) {
; CHECK: vld1
; CHECK: vmovl.u8
; CHECK: vmovl.u16
- %1 = load <4 x i8>* %in
+ %1 = load <4 x i8>, <4 x i8>* %in
; CHECK: vcvt.f32.u32
%2 = uitofp <4 x i8> %1 to <4 x float>
%3 = extractelement <4 x float> %2, i32 0
@@ -58,7 +58,7 @@ define <4 x i8> @i(<4 x i8>* %x) {
; CHECK: vrecps
; CHECK: vmul
; CHECK: vmovn
- %1 = load <4 x i8>* %x, align 4
+ %1 = load <4 x i8>, <4 x i8>* %x, align 4
%2 = sdiv <4 x i8> zeroinitializer, %1
ret <4 x i8> %2
}
@@ -68,7 +68,7 @@ define <4 x i32> @j(<4 x i8>* %in) nounwind {
; CHECK: vmovl.u8
; CHECK: vmovl.u16
; CHECK-NOT: vand
- %1 = load <4 x i8>* %in, align 4
+ %1 = load <4 x i8>, <4 x i8>* %in, align 4
%2 = zext <4 x i8> %1 to <4 x i32>
ret <4 x i32> %2
}
diff --git a/test/CodeGen/ARM/vector-load.ll b/test/CodeGen/ARM/vector-load.ll
new file mode 100644
index 000000000000..17f134f458a2
--- /dev/null
+++ b/test/CodeGen/ARM/vector-load.ll
@@ -0,0 +1,253 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7s-apple-ios8.0.0"
+
+define <8 x i8> @load_v8i8(<8 x i8>** %ptr) {
+;CHECK-LABEL: load_v8i8:
+;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
+ %A = load <8 x i8>*, <8 x i8>** %ptr
+ %lA = load <8 x i8>, <8 x i8>* %A, align 1
+ ret <8 x i8> %lA
+}
+
+define <8 x i8> @load_v8i8_update(<8 x i8>** %ptr) {
+;CHECK-LABEL: load_v8i8_update:
+;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <8 x i8>*, <8 x i8>** %ptr
+ %lA = load <8 x i8>, <8 x i8>* %A, align 1
+ %inc = getelementptr <8 x i8>, <8 x i8>* %A, i38 1
+ store <8 x i8>* %inc, <8 x i8>** %ptr
+ ret <8 x i8> %lA
+}
+
+define <4 x i16> @load_v4i16(<4 x i16>** %ptr) {
+;CHECK-LABEL: load_v4i16:
+;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
+ %A = load <4 x i16>*, <4 x i16>** %ptr
+ %lA = load <4 x i16>, <4 x i16>* %A, align 1
+ ret <4 x i16> %lA
+}
+
+define <4 x i16> @load_v4i16_update(<4 x i16>** %ptr) {
+;CHECK-LABEL: load_v4i16_update:
+;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <4 x i16>*, <4 x i16>** %ptr
+ %lA = load <4 x i16>, <4 x i16>* %A, align 1
+ %inc = getelementptr <4 x i16>, <4 x i16>* %A, i34 1
+ store <4 x i16>* %inc, <4 x i16>** %ptr
+ ret <4 x i16> %lA
+}
+
+define <2 x i32> @load_v2i32(<2 x i32>** %ptr) {
+;CHECK-LABEL: load_v2i32:
+;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
+ %A = load <2 x i32>*, <2 x i32>** %ptr
+ %lA = load <2 x i32>, <2 x i32>* %A, align 1
+ ret <2 x i32> %lA
+}
+
+define <2 x i32> @load_v2i32_update(<2 x i32>** %ptr) {
+;CHECK-LABEL: load_v2i32_update:
+;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <2 x i32>*, <2 x i32>** %ptr
+ %lA = load <2 x i32>, <2 x i32>* %A, align 1
+ %inc = getelementptr <2 x i32>, <2 x i32>* %A, i32 1
+ store <2 x i32>* %inc, <2 x i32>** %ptr
+ ret <2 x i32> %lA
+}
+
+define <2 x float> @load_v2f32(<2 x float>** %ptr) {
+;CHECK-LABEL: load_v2f32:
+;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
+ %A = load <2 x float>*, <2 x float>** %ptr
+ %lA = load <2 x float>, <2 x float>* %A, align 1
+ ret <2 x float> %lA
+}
+
+define <2 x float> @load_v2f32_update(<2 x float>** %ptr) {
+;CHECK-LABEL: load_v2f32_update:
+;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <2 x float>*, <2 x float>** %ptr
+ %lA = load <2 x float>, <2 x float>* %A, align 1
+ %inc = getelementptr <2 x float>, <2 x float>* %A, i32 1
+ store <2 x float>* %inc, <2 x float>** %ptr
+ ret <2 x float> %lA
+}
+
+define <1 x i64> @load_v1i64(<1 x i64>** %ptr) {
+;CHECK-LABEL: load_v1i64:
+;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
+ %A = load <1 x i64>*, <1 x i64>** %ptr
+ %lA = load <1 x i64>, <1 x i64>* %A, align 1
+ ret <1 x i64> %lA
+}
+
+define <1 x i64> @load_v1i64_update(<1 x i64>** %ptr) {
+;CHECK-LABEL: load_v1i64_update:
+;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <1 x i64>*, <1 x i64>** %ptr
+ %lA = load <1 x i64>, <1 x i64>* %A, align 1
+ %inc = getelementptr <1 x i64>, <1 x i64>* %A, i31 1
+ store <1 x i64>* %inc, <1 x i64>** %ptr
+ ret <1 x i64> %lA
+}
+
+define <16 x i8> @load_v16i8(<16 x i8>** %ptr) {
+;CHECK-LABEL: load_v16i8:
+;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
+ %A = load <16 x i8>*, <16 x i8>** %ptr
+ %lA = load <16 x i8>, <16 x i8>* %A, align 1
+ ret <16 x i8> %lA
+}
+
+define <16 x i8> @load_v16i8_update(<16 x i8>** %ptr) {
+;CHECK-LABEL: load_v16i8_update:
+;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <16 x i8>*, <16 x i8>** %ptr
+ %lA = load <16 x i8>, <16 x i8>* %A, align 1
+ %inc = getelementptr <16 x i8>, <16 x i8>* %A, i316 1
+ store <16 x i8>* %inc, <16 x i8>** %ptr
+ ret <16 x i8> %lA
+}
+
+define <8 x i16> @load_v8i16(<8 x i16>** %ptr) {
+;CHECK-LABEL: load_v8i16:
+;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
+ %A = load <8 x i16>*, <8 x i16>** %ptr
+ %lA = load <8 x i16>, <8 x i16>* %A, align 1
+ ret <8 x i16> %lA
+}
+
+define <8 x i16> @load_v8i16_update(<8 x i16>** %ptr) {
+;CHECK-LABEL: load_v8i16_update:
+;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <8 x i16>*, <8 x i16>** %ptr
+ %lA = load <8 x i16>, <8 x i16>* %A, align 1
+ %inc = getelementptr <8 x i16>, <8 x i16>* %A, i38 1
+ store <8 x i16>* %inc, <8 x i16>** %ptr
+ ret <8 x i16> %lA
+}
+
+define <4 x i32> @load_v4i32(<4 x i32>** %ptr) {
+;CHECK-LABEL: load_v4i32:
+;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
+ %A = load <4 x i32>*, <4 x i32>** %ptr
+ %lA = load <4 x i32>, <4 x i32>* %A, align 1
+ ret <4 x i32> %lA
+}
+
+define <4 x i32> @load_v4i32_update(<4 x i32>** %ptr) {
+;CHECK-LABEL: load_v4i32_update:
+;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <4 x i32>*, <4 x i32>** %ptr
+ %lA = load <4 x i32>, <4 x i32>* %A, align 1
+ %inc = getelementptr <4 x i32>, <4 x i32>* %A, i34 1
+ store <4 x i32>* %inc, <4 x i32>** %ptr
+ ret <4 x i32> %lA
+}
+
+define <4 x float> @load_v4f32(<4 x float>** %ptr) {
+;CHECK-LABEL: load_v4f32:
+;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
+ %A = load <4 x float>*, <4 x float>** %ptr
+ %lA = load <4 x float>, <4 x float>* %A, align 1
+ ret <4 x float> %lA
+}
+
+define <4 x float> @load_v4f32_update(<4 x float>** %ptr) {
+;CHECK-LABEL: load_v4f32_update:
+;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <4 x float>*, <4 x float>** %ptr
+ %lA = load <4 x float>, <4 x float>* %A, align 1
+ %inc = getelementptr <4 x float>, <4 x float>* %A, i34 1
+ store <4 x float>* %inc, <4 x float>** %ptr
+ ret <4 x float> %lA
+}
+
+define <2 x i64> @load_v2i64(<2 x i64>** %ptr) {
+;CHECK-LABEL: load_v2i64:
+;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
+ %A = load <2 x i64>*, <2 x i64>** %ptr
+ %lA = load <2 x i64>, <2 x i64>* %A, align 1
+ ret <2 x i64> %lA
+}
+
+define <2 x i64> @load_v2i64_update(<2 x i64>** %ptr) {
+;CHECK-LABEL: load_v2i64_update:
+;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <2 x i64>*, <2 x i64>** %ptr
+ %lA = load <2 x i64>, <2 x i64>* %A, align 1
+ %inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
+ store <2 x i64>* %inc, <2 x i64>** %ptr
+ ret <2 x i64> %lA
+}
+
+; Make sure we change the type to match alignment if necessary.
+define <2 x i64> @load_v2i64_update_aligned2(<2 x i64>** %ptr) {
+;CHECK-LABEL: load_v2i64_update_aligned2:
+;CHECK: vld1.16 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <2 x i64>*, <2 x i64>** %ptr
+ %lA = load <2 x i64>, <2 x i64>* %A, align 2
+ %inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
+ store <2 x i64>* %inc, <2 x i64>** %ptr
+ ret <2 x i64> %lA
+}
+
+define <2 x i64> @load_v2i64_update_aligned4(<2 x i64>** %ptr) {
+;CHECK-LABEL: load_v2i64_update_aligned4:
+;CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <2 x i64>*, <2 x i64>** %ptr
+ %lA = load <2 x i64>, <2 x i64>* %A, align 4
+ %inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
+ store <2 x i64>* %inc, <2 x i64>** %ptr
+ ret <2 x i64> %lA
+}
+
+define <2 x i64> @load_v2i64_update_aligned8(<2 x i64>** %ptr) {
+;CHECK-LABEL: load_v2i64_update_aligned8:
+;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <2 x i64>*, <2 x i64>** %ptr
+ %lA = load <2 x i64>, <2 x i64>* %A, align 8
+ %inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
+ store <2 x i64>* %inc, <2 x i64>** %ptr
+ ret <2 x i64> %lA
+}
+
+define <2 x i64> @load_v2i64_update_aligned16(<2 x i64>** %ptr) {
+;CHECK-LABEL: load_v2i64_update_aligned16:
+;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:128]!
+ %A = load <2 x i64>*, <2 x i64>** %ptr
+ %lA = load <2 x i64>, <2 x i64>* %A, align 16
+ %inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
+ store <2 x i64>* %inc, <2 x i64>** %ptr
+ ret <2 x i64> %lA
+}
+
+; Make sure we don't break smaller-than-dreg extloads.
+define <4 x i32> @zextload_v8i8tov8i32(<4 x i8>** %ptr) {
+;CHECK-LABEL: zextload_v8i8tov8i32:
+;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [{{r[0-9]+}}:32]
+;CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
+;CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
+ %A = load <4 x i8>*, <4 x i8>** %ptr
+ %lA = load <4 x i8>, <4 x i8>* %A, align 4
+ %zlA = zext <4 x i8> %lA to <4 x i32>
+ ret <4 x i32> %zlA
+}
+
+define <4 x i32> @zextload_v8i8tov8i32_fake_update(<4 x i8>** %ptr) {
+;CHECK-LABEL: zextload_v8i8tov8i32_fake_update:
+;CHECK: ldr.w r[[PTRREG:[0-9]+]], [r0]
+;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [r[[PTRREG]]:32]
+;CHECK: add.w r[[INCREG:[0-9]+]], r[[PTRREG]], #16
+;CHECK: str.w r[[INCREG]], [r0]
+;CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
+;CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
+ %A = load <4 x i8>*, <4 x i8>** %ptr
+ %lA = load <4 x i8>, <4 x i8>* %A, align 4
+ %inc = getelementptr <4 x i8>, <4 x i8>* %A, i38 4
+ store <4 x i8>* %inc, <4 x i8>** %ptr
+ %zlA = zext <4 x i8> %lA to <4 x i32>
+ ret <4 x i32> %zlA
+}
diff --git a/test/CodeGen/ARM/vector-promotion.ll b/test/CodeGen/ARM/vector-promotion.ll
index 42ceb60c47f5..1dabee386089 100644
--- a/test/CodeGen/ARM/vector-promotion.ll
+++ b/test/CodeGen/ARM/vector-promotion.ll
@@ -3,7 +3,7 @@
; RUN: llc -mtriple=thumbv7-apple-ios %s -o - -mattr=+neon | FileCheck --check-prefix=ASM %s
; IR-BOTH-LABEL: @simpleOneInstructionPromotion
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; IR-BOTH-NEXT: [[VECTOR_OR:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[LOAD]], <i32 undef, i32 1>
; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[VECTOR_OR]], i32 1
; IR-BOTH-NEXT: store i32 [[EXTRACT]], i32* %dest
@@ -16,7 +16,7 @@
; ASM-NEXT: vst1.32 {[[LOAD]][1]}, [r1:32]
; ASM-NEXT: bx
define void @simpleOneInstructionPromotion(<2 x i32>* %addr1, i32* %dest) {
- %in1 = load <2 x i32>* %addr1, align 8
+ %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 1
%out = or i32 %extract, 1
store i32 %out, i32* %dest, align 4
@@ -24,7 +24,7 @@ define void @simpleOneInstructionPromotion(<2 x i32>* %addr1, i32* %dest) {
}
; IR-BOTH-LABEL: @unsupportedInstructionForPromotion
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 0
; IR-BOTH-NEXT: [[CMP:%[a-zA-Z_0-9-]+]] = icmp eq i32 [[EXTRACT]], %in2
; IR-BOTH-NEXT: store i1 [[CMP]], i1* %dest
@@ -35,7 +35,7 @@ define void @simpleOneInstructionPromotion(<2 x i32>* %addr1, i32* %dest) {
; ASM: vmov.32 {{r[0-9]+}}, [[LOAD]]
; ASM: bx
define void @unsupportedInstructionForPromotion(<2 x i32>* %addr1, i32 %in2, i1* %dest) {
- %in1 = load <2 x i32>* %addr1, align 8
+ %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 0
%out = icmp eq i32 %extract, %in2
store i1 %out, i1* %dest, align 4
@@ -44,7 +44,7 @@ define void @unsupportedInstructionForPromotion(<2 x i32>* %addr1, i32 %in2, i1*
; IR-BOTH-LABEL: @unsupportedChainInDifferentBBs
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 0
; IR-BOTH-NEXT: br i1 %bool, label %bb2, label %end
; BB2
@@ -58,7 +58,7 @@ define void @unsupportedInstructionForPromotion(<2 x i32>* %addr1, i32 %in2, i1*
; ASM: bx
define void @unsupportedChainInDifferentBBs(<2 x i32>* %addr1, i32* %dest, i1 %bool) {
bb1:
- %in1 = load <2 x i32>* %addr1, align 8
+ %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 0
br i1 %bool, label %bb2, label %end
bb2:
@@ -70,7 +70,7 @@ end:
}
; IR-LABEL: @chainOfInstructionsToPromote
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; IR-BOTH-NEXT: [[VECTOR_OR1:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[LOAD]], <i32 1, i32 undef>
; IR-BOTH-NEXT: [[VECTOR_OR2:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR1]], <i32 1, i32 undef>
; IR-BOTH-NEXT: [[VECTOR_OR3:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR2]], <i32 1, i32 undef>
@@ -87,7 +87,7 @@ end:
; ASM-NOT: vmov.32 {{r[0-9]+}}, [[LOAD]]
; ASM: bx
define void @chainOfInstructionsToPromote(<2 x i32>* %addr1, i32* %dest) {
- %in1 = load <2 x i32>* %addr1, align 8
+ %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 0
%out1 = or i32 %extract, 1
%out2 = or i32 %out1, 1
@@ -101,7 +101,7 @@ define void @chainOfInstructionsToPromote(<2 x i32>* %addr1, i32* %dest) {
}
; IR-BOTH-LABEL: @unsupportedMultiUses
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
; IR-BOTH-NEXT: [[OR:%[a-zA-Z_0-9-]+]] = or i32 [[EXTRACT]], 1
; IR-BOTH-NEXT: store i32 [[OR]], i32* %dest
@@ -112,7 +112,7 @@ define void @chainOfInstructionsToPromote(<2 x i32>* %addr1, i32* %dest) {
; ASM: vmov.32 {{r[0-9]+}}, [[LOAD]]
; ASM: bx
define i32 @unsupportedMultiUses(<2 x i32>* %addr1, i32* %dest) {
- %in1 = load <2 x i32>* %addr1, align 8
+ %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 1
%out = or i32 %extract, 1
store i32 %out, i32* %dest, align 4
@@ -122,7 +122,7 @@ define i32 @unsupportedMultiUses(<2 x i32>* %addr1, i32* %dest) {
; Check that we promote we a splat constant when this is a division.
; The NORMAL mode does not promote anything as divisions are not legal.
; IR-BOTH-LABEL: @udivCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = udiv i32 [[EXTRACT]], 7
@@ -133,7 +133,7 @@ define i32 @unsupportedMultiUses(<2 x i32>* %addr1, i32* %dest) {
; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
; IR-BOTH-NEXT: ret
define void @udivCase(<2 x i32>* %addr1, i32* %dest) {
- %in1 = load <2 x i32>* %addr1, align 8
+ %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 1
%out = udiv i32 %extract, 7
store i32 %out, i32* %dest, align 4
@@ -141,7 +141,7 @@ define void @udivCase(<2 x i32>* %addr1, i32* %dest) {
}
; IR-BOTH-LABEL: @uremCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = urem i32 [[EXTRACT]], 7
@@ -152,7 +152,7 @@ define void @udivCase(<2 x i32>* %addr1, i32* %dest) {
; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
; IR-BOTH-NEXT: ret
define void @uremCase(<2 x i32>* %addr1, i32* %dest) {
- %in1 = load <2 x i32>* %addr1, align 8
+ %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 1
%out = urem i32 %extract, 7
store i32 %out, i32* %dest, align 4
@@ -160,7 +160,7 @@ define void @uremCase(<2 x i32>* %addr1, i32* %dest) {
}
; IR-BOTH-LABEL: @sdivCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sdiv i32 [[EXTRACT]], 7
@@ -171,7 +171,7 @@ define void @uremCase(<2 x i32>* %addr1, i32* %dest) {
; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
; IR-BOTH-NEXT: ret
define void @sdivCase(<2 x i32>* %addr1, i32* %dest) {
- %in1 = load <2 x i32>* %addr1, align 8
+ %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 1
%out = sdiv i32 %extract, 7
store i32 %out, i32* %dest, align 4
@@ -179,7 +179,7 @@ define void @sdivCase(<2 x i32>* %addr1, i32* %dest) {
}
; IR-BOTH-LABEL: @sremCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = srem i32 [[EXTRACT]], 7
@@ -190,7 +190,7 @@ define void @sdivCase(<2 x i32>* %addr1, i32* %dest) {
; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
; IR-BOTH-NEXT: ret
define void @sremCase(<2 x i32>* %addr1, i32* %dest) {
- %in1 = load <2 x i32>* %addr1, align 8
+ %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 1
%out = srem i32 %extract, 7
store i32 %out, i32* %dest, align 4
@@ -198,7 +198,7 @@ define void @sremCase(<2 x i32>* %addr1, i32* %dest) {
}
; IR-BOTH-LABEL: @fdivCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = fdiv float [[EXTRACT]], 7.0
@@ -209,7 +209,7 @@ define void @sremCase(<2 x i32>* %addr1, i32* %dest) {
; IR-BOTH-NEXT: store float [[RES]], float* %dest
; IR-BOTH-NEXT: ret
define void @fdivCase(<2 x float>* %addr1, float* %dest) {
- %in1 = load <2 x float>* %addr1, align 8
+ %in1 = load <2 x float>, <2 x float>* %addr1, align 8
%extract = extractelement <2 x float> %in1, i32 1
%out = fdiv float %extract, 7.0
store float %out, float* %dest, align 4
@@ -217,7 +217,7 @@ define void @fdivCase(<2 x float>* %addr1, float* %dest) {
}
; IR-BOTH-LABEL: @fremCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = frem float [[EXTRACT]], 7.0
@@ -228,7 +228,7 @@ define void @fdivCase(<2 x float>* %addr1, float* %dest) {
; IR-BOTH-NEXT: store float [[RES]], float* %dest
; IR-BOTH-NEXT: ret
define void @fremCase(<2 x float>* %addr1, float* %dest) {
- %in1 = load <2 x float>* %addr1, align 8
+ %in1 = load <2 x float>, <2 x float>* %addr1, align 8
%extract = extractelement <2 x float> %in1, i32 1
%out = frem float %extract, 7.0
store float %out, float* %dest, align 4
@@ -238,13 +238,13 @@ define void @fremCase(<2 x float>* %addr1, float* %dest) {
; Check that we do not promote when we may introduce undefined behavior
; like division by zero.
; IR-BOTH-LABEL: @undefDivCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
; IR-BOTH-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = udiv i32 7, [[EXTRACT]]
; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
; IR-BOTH-NEXT: ret
define void @undefDivCase(<2 x i32>* %addr1, i32* %dest) {
- %in1 = load <2 x i32>* %addr1, align 8
+ %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 1
%out = udiv i32 7, %extract
store i32 %out, i32* %dest, align 4
@@ -255,13 +255,13 @@ define void @undefDivCase(<2 x i32>* %addr1, i32* %dest) {
; Check that we do not promote when we may introduce undefined behavior
; like division by zero.
; IR-BOTH-LABEL: @undefRemCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
; IR-BOTH-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = srem i32 7, [[EXTRACT]]
; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
; IR-BOTH-NEXT: ret
define void @undefRemCase(<2 x i32>* %addr1, i32* %dest) {
- %in1 = load <2 x i32>* %addr1, align 8
+ %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 1
%out = srem i32 7, %extract
store i32 %out, i32* %dest, align 4
@@ -271,7 +271,7 @@ define void @undefRemCase(<2 x i32>* %addr1, i32* %dest) {
; Check that we use an undef mask for undefined behavior if the fast-math
; flag is set.
; IR-BOTH-LABEL: @undefConstantFRemCaseWithFastMath
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = frem nnan float [[EXTRACT]], 7.0
@@ -282,7 +282,7 @@ define void @undefRemCase(<2 x i32>* %addr1, i32* %dest) {
; IR-BOTH-NEXT: store float [[RES]], float* %dest
; IR-BOTH-NEXT: ret
define void @undefConstantFRemCaseWithFastMath(<2 x float>* %addr1, float* %dest) {
- %in1 = load <2 x float>* %addr1, align 8
+ %in1 = load <2 x float>, <2 x float>* %addr1, align 8
%extract = extractelement <2 x float> %in1, i32 1
%out = frem nnan float %extract, 7.0
store float %out, float* %dest, align 4
@@ -292,7 +292,7 @@ define void @undefConstantFRemCaseWithFastMath(<2 x float>* %addr1, float* %dest
; Check that we use an undef mask for undefined behavior if the fast-math
; flag is set.
; IR-BOTH-LABEL: @undefVectorFRemCaseWithFastMath
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = frem nnan float 7.000000e+00, [[EXTRACT]]
@@ -303,7 +303,7 @@ define void @undefConstantFRemCaseWithFastMath(<2 x float>* %addr1, float* %dest
; IR-BOTH-NEXT: store float [[RES]], float* %dest
; IR-BOTH-NEXT: ret
define void @undefVectorFRemCaseWithFastMath(<2 x float>* %addr1, float* %dest) {
- %in1 = load <2 x float>* %addr1, align 8
+ %in1 = load <2 x float>, <2 x float>* %addr1, align 8
%extract = extractelement <2 x float> %in1, i32 1
%out = frem nnan float 7.0, %extract
store float %out, float* %dest, align 4
@@ -314,7 +314,7 @@ define void @undefVectorFRemCaseWithFastMath(<2 x float>* %addr1, float* %dest)
; This requires the STRESS mode, as floating point value are
; not promote on armv7.
; IR-BOTH-LABEL: @simpleOneInstructionPromotionFloat
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = fadd float [[EXTRACT]], 1.0
@@ -325,7 +325,7 @@ define void @undefVectorFRemCaseWithFastMath(<2 x float>* %addr1, float* %dest)
; IR-BOTH-NEXT: store float [[RES]], float* %dest
; IR-BOTH-NEXT: ret
define void @simpleOneInstructionPromotionFloat(<2 x float>* %addr1, float* %dest) {
- %in1 = load <2 x float>* %addr1, align 8
+ %in1 = load <2 x float>, <2 x float>* %addr1, align 8
%extract = extractelement <2 x float> %in1, i32 1
%out = fadd float %extract, 1.0
store float %out, float* %dest, align 4
@@ -337,7 +337,7 @@ define void @simpleOneInstructionPromotionFloat(<2 x float>* %addr1, float* %des
; This requires the STRESS modes, as variable index are expensive
; to lower.
; IR-BOTH-LABEL: @simpleOneInstructionPromotionVariableIdx
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 %idx
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = or i32 [[EXTRACT]], 1
@@ -348,7 +348,7 @@ define void @simpleOneInstructionPromotionFloat(<2 x float>* %addr1, float* %des
; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
; IR-BOTH-NEXT: ret
define void @simpleOneInstructionPromotionVariableIdx(<2 x i32>* %addr1, i32* %dest, i32 %idx) {
- %in1 = load <2 x i32>* %addr1, align 8
+ %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
%extract = extractelement <2 x i32> %in1, i32 %idx
%out = or i32 %extract, 1
store i32 %out, i32* %dest, align 4
@@ -360,7 +360,7 @@ define void @simpleOneInstructionPromotionVariableIdx(<2 x i32>* %addr1, i32* %d
; as legal or custom, althought the actual assembly is better if we were
; promoting it.
; IR-BOTH-LABEL: @simpleOneInstructionPromotion8x8
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <8 x i8>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <8 x i8>, <8 x i8>* %addr1
; Scalar version:
; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <8 x i8> [[LOAD]], i32 1
; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = or i8 [[EXTRACT]], 1
@@ -371,7 +371,7 @@ define void @simpleOneInstructionPromotionVariableIdx(<2 x i32>* %addr1, i32* %d
; IR-BOTH-NEXT: store i8 [[RES]], i8* %dest
; IR-BOTH-NEXT: ret
define void @simpleOneInstructionPromotion8x8(<8 x i8>* %addr1, i8* %dest) {
- %in1 = load <8 x i8>* %addr1, align 8
+ %in1 = load <8 x i8>, <8 x i8>* %addr1, align 8
%extract = extractelement <8 x i8> %in1, i32 1
%out = or i8 %extract, 1
store i8 %out, i8* %dest, align 4
@@ -381,7 +381,7 @@ define void @simpleOneInstructionPromotion8x8(<8 x i8>* %addr1, i8* %dest) {
; Check that we optimized the sequence correctly when it can be
; lowered on a Q register.
; IR-BOTH-LABEL: @simpleOneInstructionPromotion
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <4 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <4 x i32>, <4 x i32>* %addr1
; IR-BOTH-NEXT: [[VECTOR_OR:%[a-zA-Z_0-9-]+]] = or <4 x i32> [[LOAD]], <i32 undef, i32 1, i32 undef, i32 undef>
; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <4 x i32> [[VECTOR_OR]], i32 1
; IR-BOTH-NEXT: store i32 [[EXTRACT]], i32* %dest
@@ -395,7 +395,7 @@ define void @simpleOneInstructionPromotion8x8(<8 x i8>* %addr1, i8* %dest) {
; ASM-NEXT: vst1.32 {[[LOAD]][1]}, [r1]
; ASM-NEXT: bx
define void @simpleOneInstructionPromotion4x32(<4 x i32>* %addr1, i32* %dest) {
- %in1 = load <4 x i32>* %addr1, align 8
+ %in1 = load <4 x i32>, <4 x i32>* %addr1, align 8
%extract = extractelement <4 x i32> %in1, i32 1
%out = or i32 %extract, 1
store i32 %out, i32* %dest, align 1
diff --git a/test/CodeGen/ARM/vector-spilling.ll b/test/CodeGen/ARM/vector-spilling.ll
index 746c6dfcd114..9e3225ebcda0 100644
--- a/test/CodeGen/ARM/vector-spilling.ll
+++ b/test/CodeGen/ARM/vector-spilling.ll
@@ -10,22 +10,22 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; CHECK: vldmia
define void @test(<8 x i64>* %src) #0 {
entry:
- %0 = getelementptr inbounds <8 x i64>* %src, i32 0
- %1 = load <8 x i64>* %0, align 8
+ %0 = getelementptr inbounds <8 x i64>, <8 x i64>* %src, i32 0
+ %1 = load <8 x i64>, <8 x i64>* %0, align 8
- %2 = getelementptr inbounds <8 x i64>* %src, i32 1
- %3 = load <8 x i64>* %2, align 8
+ %2 = getelementptr inbounds <8 x i64>, <8 x i64>* %src, i32 1
+ %3 = load <8 x i64>, <8 x i64>* %2, align 8
- %4 = getelementptr inbounds <8 x i64>* %src, i32 2
- %5 = load <8 x i64>* %4, align 8
+ %4 = getelementptr inbounds <8 x i64>, <8 x i64>* %src, i32 2
+ %5 = load <8 x i64>, <8 x i64>* %4, align 8
- %6 = getelementptr inbounds <8 x i64>* %src, i32 3
- %7 = load <8 x i64>* %6, align 8
+ %6 = getelementptr inbounds <8 x i64>, <8 x i64>* %src, i32 3
+ %7 = load <8 x i64>, <8 x i64>* %6, align 8
%8 = shufflevector <8 x i64> %1, <8 x i64> %3, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
%9 = shufflevector <8 x i64> %1, <8 x i64> %3, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
- tail call void(<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>)* @foo(<8 x i64> %1, <8 x i64> %3, <8 x i64> %5, <8 x i64> %7, <8 x i64> %8, <8 x i64> %9)
+ tail call void(<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) @foo(<8 x i64> %1, <8 x i64> %3, <8 x i64> %5, <8 x i64> %7, <8 x i64> %8, <8 x i64> %9)
ret void
}
diff --git a/test/CodeGen/ARM/vector-store.ll b/test/CodeGen/ARM/vector-store.ll
new file mode 100644
index 000000000000..30baa9a20ddc
--- /dev/null
+++ b/test/CodeGen/ARM/vector-store.ll
@@ -0,0 +1,258 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7s-apple-ios8.0.0"
+
+define void @store_v8i8(<8 x i8>** %ptr, <8 x i8> %val) {
+;CHECK-LABEL: store_v8i8:
+;CHECK: str r1, [r0]
+ %A = load <8 x i8>*, <8 x i8>** %ptr
+ store <8 x i8> %val, <8 x i8>* %A, align 1
+ ret void
+}
+
+define void @store_v8i8_update(<8 x i8>** %ptr, <8 x i8> %val) {
+;CHECK-LABEL: store_v8i8_update:
+;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <8 x i8>*, <8 x i8>** %ptr
+ store <8 x i8> %val, <8 x i8>* %A, align 1
+ %inc = getelementptr <8 x i8>, <8 x i8>* %A, i38 1
+ store <8 x i8>* %inc, <8 x i8>** %ptr
+ ret void
+}
+
+define void @store_v4i16(<4 x i16>** %ptr, <4 x i16> %val) {
+;CHECK-LABEL: store_v4i16:
+;CHECK: str r1, [r0]
+ %A = load <4 x i16>*, <4 x i16>** %ptr
+ store <4 x i16> %val, <4 x i16>* %A, align 1
+ ret void
+}
+
+define void @store_v4i16_update(<4 x i16>** %ptr, <4 x i16> %val) {
+;CHECK-LABEL: store_v4i16_update:
+;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <4 x i16>*, <4 x i16>** %ptr
+ store <4 x i16> %val, <4 x i16>* %A, align 1
+ %inc = getelementptr <4 x i16>, <4 x i16>* %A, i34 1
+ store <4 x i16>* %inc, <4 x i16>** %ptr
+ ret void
+}
+
+define void @store_v2i32(<2 x i32>** %ptr, <2 x i32> %val) {
+;CHECK-LABEL: store_v2i32:
+;CHECK: str r1, [r0]
+ %A = load <2 x i32>*, <2 x i32>** %ptr
+ store <2 x i32> %val, <2 x i32>* %A, align 1
+ ret void
+}
+
+define void @store_v2i32_update(<2 x i32>** %ptr, <2 x i32> %val) {
+;CHECK-LABEL: store_v2i32_update:
+;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <2 x i32>*, <2 x i32>** %ptr
+ store <2 x i32> %val, <2 x i32>* %A, align 1
+ %inc = getelementptr <2 x i32>, <2 x i32>* %A, i32 1
+ store <2 x i32>* %inc, <2 x i32>** %ptr
+ ret void
+}
+
+define void @store_v2f32(<2 x float>** %ptr, <2 x float> %val) {
+;CHECK-LABEL: store_v2f32:
+;CHECK: str r1, [r0]
+ %A = load <2 x float>*, <2 x float>** %ptr
+ store <2 x float> %val, <2 x float>* %A, align 1
+ ret void
+}
+
+define void @store_v2f32_update(<2 x float>** %ptr, <2 x float> %val) {
+;CHECK-LABEL: store_v2f32_update:
+;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <2 x float>*, <2 x float>** %ptr
+ store <2 x float> %val, <2 x float>* %A, align 1
+ %inc = getelementptr <2 x float>, <2 x float>* %A, i32 1
+ store <2 x float>* %inc, <2 x float>** %ptr
+ ret void
+}
+
+define void @store_v1i64(<1 x i64>** %ptr, <1 x i64> %val) {
+;CHECK-LABEL: store_v1i64:
+;CHECK: str r1, [r0]
+ %A = load <1 x i64>*, <1 x i64>** %ptr
+ store <1 x i64> %val, <1 x i64>* %A, align 1
+ ret void
+}
+
+define void @store_v1i64_update(<1 x i64>** %ptr, <1 x i64> %val) {
+;CHECK-LABEL: store_v1i64_update:
+;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <1 x i64>*, <1 x i64>** %ptr
+ store <1 x i64> %val, <1 x i64>* %A, align 1
+ %inc = getelementptr <1 x i64>, <1 x i64>* %A, i31 1
+ store <1 x i64>* %inc, <1 x i64>** %ptr
+ ret void
+}
+
+define void @store_v16i8(<16 x i8>** %ptr, <16 x i8> %val) {
+;CHECK-LABEL: store_v16i8:
+;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
+ %A = load <16 x i8>*, <16 x i8>** %ptr
+ store <16 x i8> %val, <16 x i8>* %A, align 1
+ ret void
+}
+
+define void @store_v16i8_update(<16 x i8>** %ptr, <16 x i8> %val) {
+;CHECK-LABEL: store_v16i8_update:
+;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <16 x i8>*, <16 x i8>** %ptr
+ store <16 x i8> %val, <16 x i8>* %A, align 1
+ %inc = getelementptr <16 x i8>, <16 x i8>* %A, i316 1
+ store <16 x i8>* %inc, <16 x i8>** %ptr
+ ret void
+}
+
+define void @store_v8i16(<8 x i16>** %ptr, <8 x i16> %val) {
+;CHECK-LABEL: store_v8i16:
+;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
+ %A = load <8 x i16>*, <8 x i16>** %ptr
+ store <8 x i16> %val, <8 x i16>* %A, align 1
+ ret void
+}
+
+define void @store_v8i16_update(<8 x i16>** %ptr, <8 x i16> %val) {
+;CHECK-LABEL: store_v8i16_update:
+;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <8 x i16>*, <8 x i16>** %ptr
+ store <8 x i16> %val, <8 x i16>* %A, align 1
+ %inc = getelementptr <8 x i16>, <8 x i16>* %A, i38 1
+ store <8 x i16>* %inc, <8 x i16>** %ptr
+ ret void
+}
+
+define void @store_v4i32(<4 x i32>** %ptr, <4 x i32> %val) {
+;CHECK-LABEL: store_v4i32:
+;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
+ %A = load <4 x i32>*, <4 x i32>** %ptr
+ store <4 x i32> %val, <4 x i32>* %A, align 1
+ ret void
+}
+
+define void @store_v4i32_update(<4 x i32>** %ptr, <4 x i32> %val) {
+;CHECK-LABEL: store_v4i32_update:
+;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <4 x i32>*, <4 x i32>** %ptr
+ store <4 x i32> %val, <4 x i32>* %A, align 1
+ %inc = getelementptr <4 x i32>, <4 x i32>* %A, i34 1
+ store <4 x i32>* %inc, <4 x i32>** %ptr
+ ret void
+}
+
+define void @store_v4f32(<4 x float>** %ptr, <4 x float> %val) {
+;CHECK-LABEL: store_v4f32:
+;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
+ %A = load <4 x float>*, <4 x float>** %ptr
+ store <4 x float> %val, <4 x float>* %A, align 1
+ ret void
+}
+
+define void @store_v4f32_update(<4 x float>** %ptr, <4 x float> %val) {
+;CHECK-LABEL: store_v4f32_update:
+;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <4 x float>*, <4 x float>** %ptr
+ store <4 x float> %val, <4 x float>* %A, align 1
+ %inc = getelementptr <4 x float>, <4 x float>* %A, i34 1
+ store <4 x float>* %inc, <4 x float>** %ptr
+ ret void
+}
+
+define void @store_v2i64(<2 x i64>** %ptr, <2 x i64> %val) {
+;CHECK-LABEL: store_v2i64:
+;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
+ %A = load <2 x i64>*, <2 x i64>** %ptr
+ store <2 x i64> %val, <2 x i64>* %A, align 1
+ ret void
+}
+
+define void @store_v2i64_update(<2 x i64>** %ptr, <2 x i64> %val) {
+;CHECK-LABEL: store_v2i64_update:
+;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <2 x i64>*, <2 x i64>** %ptr
+ store <2 x i64> %val, <2 x i64>* %A, align 1
+ %inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
+ store <2 x i64>* %inc, <2 x i64>** %ptr
+ ret void
+}
+
+define void @store_v2i64_update_aligned2(<2 x i64>** %ptr, <2 x i64> %val) {
+;CHECK-LABEL: store_v2i64_update_aligned2:
+;CHECK: vst1.16 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <2 x i64>*, <2 x i64>** %ptr
+ store <2 x i64> %val, <2 x i64>* %A, align 2
+ %inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
+ store <2 x i64>* %inc, <2 x i64>** %ptr
+ ret void
+}
+
+define void @store_v2i64_update_aligned4(<2 x i64>** %ptr, <2 x i64> %val) {
+;CHECK-LABEL: store_v2i64_update_aligned4:
+;CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <2 x i64>*, <2 x i64>** %ptr
+ store <2 x i64> %val, <2 x i64>* %A, align 4
+ %inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
+ store <2 x i64>* %inc, <2 x i64>** %ptr
+ ret void
+}
+
+define void @store_v2i64_update_aligned8(<2 x i64>** %ptr, <2 x i64> %val) {
+;CHECK-LABEL: store_v2i64_update_aligned8:
+;CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+ %A = load <2 x i64>*, <2 x i64>** %ptr
+ store <2 x i64> %val, <2 x i64>* %A, align 8
+ %inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
+ store <2 x i64>* %inc, <2 x i64>** %ptr
+ ret void
+}
+
+define void @store_v2i64_update_aligned16(<2 x i64>** %ptr, <2 x i64> %val) {
+;CHECK-LABEL: store_v2i64_update_aligned16:
+;CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:128]!
+ %A = load <2 x i64>*, <2 x i64>** %ptr
+ store <2 x i64> %val, <2 x i64>* %A, align 16
+ %inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
+ store <2 x i64>* %inc, <2 x i64>** %ptr
+ ret void
+}
+
+define void @truncstore_v4i32tov4i8(<4 x i8>** %ptr, <4 x i32> %val) {
+;CHECK-LABEL: truncstore_v4i32tov4i8:
+;CHECK: ldr.w r9, [sp]
+;CHECK: vmov {{d[0-9]+}}, r3, r9
+;CHECK: vmov {{d[0-9]+}}, r1, r2
+;CHECK: vmovn.i32 [[VECLO:d[0-9]+]], {{q[0-9]+}}
+;CHECK: vuzp.8 [[VECLO]], {{d[0-9]+}}
+;CHECK: ldr r[[PTRREG:[0-9]+]], [r0]
+;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32]
+ %A = load <4 x i8>*, <4 x i8>** %ptr
+ %trunc = trunc <4 x i32> %val to <4 x i8>
+ store <4 x i8> %trunc, <4 x i8>* %A, align 4
+ ret void
+}
+
+define void @truncstore_v4i32tov4i8_fake_update(<4 x i8>** %ptr, <4 x i32> %val) {
+;CHECK-LABEL: truncstore_v4i32tov4i8_fake_update:
+;CHECK: ldr.w r9, [sp]
+;CHECK: vmov {{d[0-9]+}}, r3, r9
+;CHECK: vmov {{d[0-9]+}}, r1, r2
+;CHECK: movs [[IMM16:r[0-9]+]], #16
+;CHECK: vmovn.i32 [[VECLO:d[0-9]+]], {{q[0-9]+}}
+;CHECK: vuzp.8 [[VECLO]], {{d[0-9]+}}
+;CHECK: ldr r[[PTRREG:[0-9]+]], [r0]
+;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32], [[IMM16]]
+;CHECK: str r[[PTRREG]], [r0]
+ %A = load <4 x i8>*, <4 x i8>** %ptr
+ %trunc = trunc <4 x i32> %val to <4 x i8>
+ store <4 x i8> %trunc, <4 x i8>* %A, align 4
+ %inc = getelementptr <4 x i8>, <4 x i8>* %A, i38 4
+ store <4 x i8>* %inc, <4 x i8>** %ptr
+ ret void
+}
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll
index 4407451244e9..72ecf0ef0626 100644
--- a/test/CodeGen/ARM/vext.ll
+++ b/test/CodeGen/ARM/vext.ll
@@ -3,8 +3,8 @@
define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: test_vextd:
;CHECK: vext
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: test_vextRd:
;CHECK: vext
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
ret <8 x i8> %tmp3
}
@@ -21,8 +21,8 @@ define <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: test_vextq:
;CHECK: vext
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
ret <16 x i8> %tmp3
}
@@ -30,8 +30,8 @@ define <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: test_vextRq:
;CHECK: vext
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
ret <16 x i8> %tmp3
}
@@ -39,8 +39,8 @@ define <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: test_vextd16:
;CHECK: vext
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
ret <4 x i16> %tmp3
}
@@ -48,8 +48,8 @@ define <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: test_vextq32:
;CHECK: vext
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
ret <4 x i32> %tmp3
}
@@ -59,8 +59,8 @@ define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: test_vextd_undef:
;CHECK: vext
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10>
ret <8 x i8> %tmp3
}
@@ -68,8 +68,8 @@ define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: test_vextRq_undef:
;CHECK: vext
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 undef, i32 undef, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6>
ret <16 x i8> %tmp3
}
@@ -118,8 +118,8 @@ define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: vext.16
;CHECK-NOT: vext.16
;CHECK: vzip.16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 3, i32 8, i32 5, i32 9>
ret <4 x i16> %tmp3
}
@@ -128,8 +128,8 @@ define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: test_undef:
;CHECK: vzip.16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 undef, i32 8, i32 5, i32 9>
ret <4 x i16> %tmp3
}
@@ -143,7 +143,7 @@ define <4 x i16> @test_multisource(<32 x i16>* %B) nounwind {
;CHECK: vmov.16 [[REG]][1]
;CHECK: vmov.16 [[REG]][2]
;CHECK: vmov.16 [[REG]][3]
- %tmp1 = load <32 x i16>* %B
+ %tmp1 = load <32 x i16>, <32 x i16>* %B
%tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
ret <4 x i16> %tmp2
}
@@ -156,7 +156,7 @@ define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind {
;CHECK: vmov.16 [[REG]][1]
;CHECK: vmov.16 [[REG]][2]
;CHECK: vmov.16 [[REG]][3]
- %tmp1 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %B
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
ret <4 x i16> %tmp2
}
@@ -174,8 +174,8 @@ define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK: vmov.16 [[REG2]][1]
;CHECK: vmov.16 [[REG2]][2]
;CHECK: vmov.16 [[REG2]][3]
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 7, i32 5, i32 13, i32 3, i32 2, i32 2, i32 9>
ret <8 x i16> %tmp3
}
@@ -185,7 +185,7 @@ define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define arm_aapcscc void @test_elem_mismatch(<2 x i64>* nocapture %src, <4 x i16>* nocapture %dest) nounwind {
; CHECK-LABEL: test_elem_mismatch:
; CHECK: vstr
- %tmp0 = load <2 x i64>* %src, align 16
+ %tmp0 = load <2 x i64>, <2 x i64>* %src, align 16
%tmp1 = bitcast <2 x i64> %tmp0 to <4 x i32>
%tmp2 = extractelement <4 x i32> %tmp1, i32 0
%tmp3 = extractelement <4 x i32> %tmp1, i32 2
diff --git a/test/CodeGen/ARM/vfcmp.ll b/test/CodeGen/ARM/vfcmp.ll
index 4b2fea9baa09..8673b7d639d6 100644
--- a/test/CodeGen/ARM/vfcmp.ll
+++ b/test/CodeGen/ARM/vfcmp.ll
@@ -7,8 +7,8 @@ define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vcunef32:
;CHECK: vceq.f32
;CHECK-NEXT: vmvn
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = fcmp une <2 x float> %tmp1, %tmp2
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -18,8 +18,8 @@ define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vcoltf32:
;CHECK: vcgt.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = fcmp olt <2 x float> %tmp1, %tmp2
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -29,8 +29,8 @@ define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vcolef32:
;CHECK: vcge.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = fcmp ole <2 x float> %tmp1, %tmp2
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -41,8 +41,8 @@ define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vcugef32:
;CHECK: vcgt.f32
;CHECK-NEXT: vmvn
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = fcmp uge <2 x float> %tmp1, %tmp2
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -53,8 +53,8 @@ define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vculef32:
;CHECK: vcgt.f32
;CHECK-NEXT: vmvn
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = fcmp ule <2 x float> %tmp1, %tmp2
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -65,8 +65,8 @@ define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vcugtf32:
;CHECK: vcge.f32
;CHECK-NEXT: vmvn
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -77,8 +77,8 @@ define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vcultf32:
;CHECK: vcge.f32
;CHECK-NEXT: vmvn
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = fcmp ult <2 x float> %tmp1, %tmp2
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -91,8 +91,8 @@ define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-NEXT: vcgt.f32
;CHECK-NEXT: vorr
;CHECK-NEXT: vmvn
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -104,8 +104,8 @@ define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK: vcgt.f32
;CHECK-NEXT: vcgt.f32
;CHECK-NEXT: vorr
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = fcmp one <2 x float> %tmp1, %tmp2
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -118,8 +118,8 @@ define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-NEXT: vcgt.f32
;CHECK-NEXT: vorr
;CHECK-NEXT: vmvn
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = fcmp uno <2 x float> %tmp1, %tmp2
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -131,8 +131,8 @@ define <2 x i32> @vcordf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK: vcge.f32
;CHECK-NEXT: vcgt.f32
;CHECK-NEXT: vorr
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = fcmp ord <2 x float> %tmp1, %tmp2
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
diff --git a/test/CodeGen/ARM/vfp-libcalls.ll b/test/CodeGen/ARM/vfp-libcalls.ll
index 9d4e194e90ee..b08073ab62b3 100644
--- a/test/CodeGen/ARM/vfp-libcalls.ll
+++ b/test/CodeGen/ARM/vfp-libcalls.ll
@@ -1,6 +1,6 @@
; RUN: llc -mtriple=armv6-apple-ios -mcpu=arm1136jf-s -o - %s | FileCheck %s --check-prefix=CHECK-HARD
; RUN: llc -mtriple=thumbv6-apple-ios -mcpu=arm1136jf-s -o - %s | FileCheck %s --check-prefix=CHECK-SOFTISH
-; RUN: llc -mtriple=armv7s-apple-ios -soft-float -mcpu=arm1136jf-s -o - %s | FileCheck %s --check-prefix=CHECK-SOFT
+; RUN: llc -mtriple=armv7s-apple-ios -mattr=+soft-float -mcpu=arm1136jf-s -o - %s | FileCheck %s --check-prefix=CHECK-SOFT
define float @test_call(float %a, float %b) {
; CHECK-HARD: vadd.f32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
@@ -8,4 +8,4 @@ define float @test_call(float %a, float %b) {
; CHECK-SOFT: bl ___addsf3{{$}}
%sum = fadd float %a, %b
ret float %sum
-} \ No newline at end of file
+}
diff --git a/test/CodeGen/ARM/vfp-regs-dwarf.ll b/test/CodeGen/ARM/vfp-regs-dwarf.ll
index b67f770bfd13..eca0c26e5562 100644
--- a/test/CodeGen/ARM/vfp-regs-dwarf.ll
+++ b/test/CodeGen/ARM/vfp-regs-dwarf.ll
@@ -31,14 +31,14 @@ define void @stack_offsets() {
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !9}
-!0 = !{!"0x11\0012\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/Users/tim/llvm/build/tmp.c] [DW_LANG_C99]
-!1 = !{!"tmp.c", !"/Users/tim/llvm/build"}
+!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "tmp.c", directory: "/Users/tim/llvm/build")
!2 = !{}
!3 = !{!4}
-!4 = !{!"0x2e\00bar\00bar\00\001\000\001\000\006\000\000\001", !1, !5, !6, null, void ()* @stack_offsets, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [bar]
-!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [/Users/tim/llvm/build/tmp.c]
-!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !DISubprogram(name: "bar", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void ()* @stack_offsets, variables: !2)
+!5 = !DIFile(filename: "tmp.c", directory: "/Users/tim/llvm/build")
+!6 = !DISubroutineType(types: !7)
!7 = !{null}
!8 = !{i32 2, !"Dwarf Version", i32 4}
-!9 = !{i32 1, !"Debug Info Version", i32 2}
+!9 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll
index 5d2943cbfd2f..03c0354aa1df 100644
--- a/test/CodeGen/ARM/vfp.ll
+++ b/test/CodeGen/ARM/vfp.ll
@@ -2,8 +2,8 @@
; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+vfp2 -disable-post-ra -regalloc=basic | FileCheck %s
define void @test(float* %P, double* %D) {
- %A = load float* %P ; <float> [#uses=1]
- %B = load double* %D ; <double> [#uses=1]
+ %A = load float, float* %P ; <float> [#uses=1]
+ %B = load double, double* %D ; <double> [#uses=1]
store float %A, float* %P
store double %B, double* %D
ret void
@@ -15,11 +15,11 @@ declare double @fabs(double)
define void @test_abs(float* %P, double* %D) {
;CHECK-LABEL: test_abs:
- %a = load float* %P ; <float> [#uses=1]
+ %a = load float, float* %P ; <float> [#uses=1]
;CHECK: vabs.f32
%b = call float @fabsf( float %a ) readnone ; <float> [#uses=1]
store float %b, float* %P
- %A = load double* %D ; <double> [#uses=1]
+ %A = load double, double* %D ; <double> [#uses=1]
;CHECK: vabs.f64
%B = call double @fabs( double %A ) readnone ; <double> [#uses=1]
store double %B, double* %D
@@ -28,10 +28,10 @@ define void @test_abs(float* %P, double* %D) {
define void @test_add(float* %P, double* %D) {
;CHECK-LABEL: test_add:
- %a = load float* %P ; <float> [#uses=2]
+ %a = load float, float* %P ; <float> [#uses=2]
%b = fadd float %a, %a ; <float> [#uses=1]
store float %b, float* %P
- %A = load double* %D ; <double> [#uses=2]
+ %A = load double, double* %D ; <double> [#uses=2]
%B = fadd double %A, %A ; <double> [#uses=1]
store double %B, double* %D
ret void
@@ -39,11 +39,11 @@ define void @test_add(float* %P, double* %D) {
define void @test_ext_round(float* %P, double* %D) {
;CHECK-LABEL: test_ext_round:
- %a = load float* %P ; <float> [#uses=1]
+ %a = load float, float* %P ; <float> [#uses=1]
;CHECK: vcvt.f64.f32
;CHECK: vcvt.f32.f64
%b = fpext float %a to double ; <double> [#uses=1]
- %A = load double* %D ; <double> [#uses=1]
+ %A = load double, double* %D ; <double> [#uses=1]
%B = fptrunc double %A to float ; <float> [#uses=1]
store double %b, double* %D
store float %B, float* %P
@@ -52,9 +52,9 @@ define void @test_ext_round(float* %P, double* %D) {
define void @test_fma(float* %P1, float* %P2, float* %P3) {
;CHECK-LABEL: test_fma:
- %a1 = load float* %P1 ; <float> [#uses=1]
- %a2 = load float* %P2 ; <float> [#uses=1]
- %a3 = load float* %P3 ; <float> [#uses=1]
+ %a1 = load float, float* %P1 ; <float> [#uses=1]
+ %a2 = load float, float* %P2 ; <float> [#uses=1]
+ %a3 = load float, float* %P3 ; <float> [#uses=1]
;CHECK: vnmls.f32
%X = fmul float %a1, %a2 ; <float> [#uses=1]
%Y = fsub float %X, %a3 ; <float> [#uses=1]
@@ -64,7 +64,7 @@ define void @test_fma(float* %P1, float* %P2, float* %P3) {
define i32 @test_ftoi(float* %P1) {
;CHECK-LABEL: test_ftoi:
- %a1 = load float* %P1 ; <float> [#uses=1]
+ %a1 = load float, float* %P1 ; <float> [#uses=1]
;CHECK: vcvt.s32.f32
%b1 = fptosi float %a1 to i32 ; <i32> [#uses=1]
ret i32 %b1
@@ -72,7 +72,7 @@ define i32 @test_ftoi(float* %P1) {
define i32 @test_ftou(float* %P1) {
;CHECK-LABEL: test_ftou:
- %a1 = load float* %P1 ; <float> [#uses=1]
+ %a1 = load float, float* %P1 ; <float> [#uses=1]
;CHECK: vcvt.u32.f32
%b1 = fptoui float %a1 to i32 ; <i32> [#uses=1]
ret i32 %b1
@@ -80,7 +80,7 @@ define i32 @test_ftou(float* %P1) {
define i32 @test_dtoi(double* %P1) {
;CHECK-LABEL: test_dtoi:
- %a1 = load double* %P1 ; <double> [#uses=1]
+ %a1 = load double, double* %P1 ; <double> [#uses=1]
;CHECK: vcvt.s32.f64
%b1 = fptosi double %a1 to i32 ; <i32> [#uses=1]
ret i32 %b1
@@ -88,7 +88,7 @@ define i32 @test_dtoi(double* %P1) {
define i32 @test_dtou(double* %P1) {
;CHECK-LABEL: test_dtou:
- %a1 = load double* %P1 ; <double> [#uses=1]
+ %a1 = load double, double* %P1 ; <double> [#uses=1]
;CHECK: vcvt.u32.f64
%b1 = fptoui double %a1 to i32 ; <i32> [#uses=1]
ret i32 %b1
@@ -113,9 +113,9 @@ define void @test_utod2(double* %P1, i8 %X) {
define void @test_cmp(float* %glob, i32 %X) {
;CHECK-LABEL: test_cmp:
entry:
- %tmp = load float* %glob ; <float> [#uses=2]
- %tmp3 = getelementptr float* %glob, i32 2 ; <float*> [#uses=1]
- %tmp4 = load float* %tmp3 ; <float> [#uses=2]
+ %tmp = load float, float* %glob ; <float> [#uses=2]
+ %tmp3 = getelementptr float, float* %glob, i32 2 ; <float*> [#uses=1]
+ %tmp4 = load float, float* %tmp3 ; <float> [#uses=2]
%tmp.upgrd.1 = fcmp oeq float %tmp, %tmp4 ; <i1> [#uses=1]
%tmp5 = fcmp uno float %tmp, %tmp4 ; <i1> [#uses=1]
%tmp6 = or i1 %tmp.upgrd.1, %tmp5 ; <i1> [#uses=1]
@@ -124,11 +124,11 @@ entry:
br i1 %tmp6, label %cond_true, label %cond_false
cond_true: ; preds = %entry
- %tmp.upgrd.2 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp.upgrd.2 = tail call i32 (...) @bar( ) ; <i32> [#uses=0]
ret void
cond_false: ; preds = %entry
- %tmp7 = tail call i32 (...)* @baz( ) ; <i32> [#uses=0]
+ %tmp7 = tail call i32 (...) @baz( ) ; <i32> [#uses=0]
ret void
}
@@ -141,16 +141,16 @@ declare i32 @baz(...)
define void @test_cmpfp0(float* %glob, i32 %X) {
;CHECK-LABEL: test_cmpfp0:
entry:
- %tmp = load float* %glob ; <float> [#uses=1]
+ %tmp = load float, float* %glob ; <float> [#uses=1]
;CHECK: vcmpe.f32
%tmp.upgrd.3 = fcmp ogt float %tmp, 0.000000e+00 ; <i1> [#uses=1]
br i1 %tmp.upgrd.3, label %cond_true, label %cond_false
cond_true: ; preds = %entry
- %tmp.upgrd.4 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp.upgrd.4 = tail call i32 (...) @bar( ) ; <i32> [#uses=0]
ret void
cond_false: ; preds = %entry
- %tmp1 = tail call i32 (...)* @baz( ) ; <i32> [#uses=0]
+ %tmp1 = tail call i32 (...) @baz( ) ; <i32> [#uses=0]
ret void
}
diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll
index 2518ee2278cc..d4cbfad5be6f 100644
--- a/test/CodeGen/ARM/vget_lane.ll
+++ b/test/CodeGen/ARM/vget_lane.ll
@@ -5,7 +5,7 @@ target triple = "thumbv7-elf"
define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vget_lanes8:
;CHECK: vmov.s8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = extractelement <8 x i8> %tmp1, i32 1
%tmp3 = sext i8 %tmp2 to i32
ret i32 %tmp3
@@ -14,7 +14,7 @@ define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vget_lanes16:
;CHECK: vmov.s16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = extractelement <4 x i16> %tmp1, i32 1
%tmp3 = sext i16 %tmp2 to i32
ret i32 %tmp3
@@ -23,7 +23,7 @@ define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vget_laneu8:
;CHECK: vmov.u8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = extractelement <8 x i8> %tmp1, i32 1
%tmp3 = zext i8 %tmp2 to i32
ret i32 %tmp3
@@ -32,7 +32,7 @@ define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vget_laneu16:
;CHECK: vmov.u16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = extractelement <4 x i16> %tmp1, i32 1
%tmp3 = zext i16 %tmp2 to i32
ret i32 %tmp3
@@ -42,7 +42,7 @@ define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vget_lanei32:
;CHECK: vmov.32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = add <2 x i32> %tmp1, %tmp1
%tmp3 = extractelement <2 x i32> %tmp2, i32 1
ret i32 %tmp3
@@ -51,7 +51,7 @@ define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vgetQ_lanes8:
;CHECK: vmov.s8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = extractelement <16 x i8> %tmp1, i32 1
%tmp3 = sext i8 %tmp2 to i32
ret i32 %tmp3
@@ -60,7 +60,7 @@ define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vgetQ_lanes16:
;CHECK: vmov.s16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = extractelement <8 x i16> %tmp1, i32 1
%tmp3 = sext i16 %tmp2 to i32
ret i32 %tmp3
@@ -69,7 +69,7 @@ define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vgetQ_laneu8:
;CHECK: vmov.u8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = extractelement <16 x i8> %tmp1, i32 1
%tmp3 = zext i8 %tmp2 to i32
ret i32 %tmp3
@@ -78,7 +78,7 @@ define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vgetQ_laneu16:
;CHECK: vmov.u16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = extractelement <8 x i16> %tmp1, i32 1
%tmp3 = zext i16 %tmp2 to i32
ret i32 %tmp3
@@ -88,7 +88,7 @@ define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
define i32 @vgetQ_lanei32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vgetQ_lanei32:
;CHECK: vmov.32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = add <4 x i32> %tmp1, %tmp1
%tmp3 = extractelement <4 x i32> %tmp2, i32 1
ret i32 %tmp3
@@ -100,7 +100,7 @@ entry:
%arg0_uint16x4_t = alloca <4 x i16> ; <<4 x i16>*> [#uses=1]
%out_uint16_t = alloca i16 ; <i16*> [#uses=1]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- %0 = load <4 x i16>* %arg0_uint16x4_t, align 8 ; <<4 x i16>> [#uses=1]
+ %0 = load <4 x i16>, <4 x i16>* %arg0_uint16x4_t, align 8 ; <<4 x i16>> [#uses=1]
%1 = extractelement <4 x i16> %0, i32 1 ; <i16> [#uses=1]
%2 = add i16 %1, %1
store i16 %2, i16* %out_uint16_t, align 2
@@ -116,7 +116,7 @@ entry:
%arg0_uint8x8_t = alloca <8 x i8> ; <<8 x i8>*> [#uses=1]
%out_uint8_t = alloca i8 ; <i8*> [#uses=1]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- %0 = load <8 x i8>* %arg0_uint8x8_t, align 8 ; <<8 x i8>> [#uses=1]
+ %0 = load <8 x i8>, <8 x i8>* %arg0_uint8x8_t, align 8 ; <<8 x i8>> [#uses=1]
%1 = extractelement <8 x i8> %0, i32 1 ; <i8> [#uses=1]
%2 = add i8 %1, %1
store i8 %2, i8* %out_uint8_t, align 1
@@ -132,7 +132,7 @@ entry:
%arg0_uint16x8_t = alloca <8 x i16> ; <<8 x i16>*> [#uses=1]
%out_uint16_t = alloca i16 ; <i16*> [#uses=1]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- %0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
+ %0 = load <8 x i16>, <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
%1 = extractelement <8 x i16> %0, i32 1 ; <i16> [#uses=1]
%2 = add i16 %1, %1
store i16 %2, i16* %out_uint16_t, align 2
@@ -148,7 +148,7 @@ entry:
%arg0_uint8x16_t = alloca <16 x i8> ; <<16 x i8>*> [#uses=1]
%out_uint8_t = alloca i8 ; <i8*> [#uses=1]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- %0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
+ %0 = load <16 x i8>, <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
%1 = extractelement <16 x i8> %0, i32 1 ; <i8> [#uses=1]
%2 = add i8 %1, %1
store i8 %2, i8* %out_uint8_t, align 1
@@ -161,7 +161,7 @@ return: ; preds = %entry
define <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind {
;CHECK-LABEL: vset_lane8:
;CHECK: vmov.8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = insertelement <8 x i8> %tmp1, i8 %B, i32 1
ret <8 x i8> %tmp2
}
@@ -169,7 +169,7 @@ define <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind {
define <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind {
;CHECK-LABEL: vset_lane16:
;CHECK: vmov.16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = insertelement <4 x i16> %tmp1, i16 %B, i32 1
ret <4 x i16> %tmp2
}
@@ -177,7 +177,7 @@ define <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind {
define <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind {
;CHECK-LABEL: vset_lane32:
;CHECK: vmov.32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = insertelement <2 x i32> %tmp1, i32 %B, i32 1
ret <2 x i32> %tmp2
}
@@ -185,7 +185,7 @@ define <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind {
define <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind {
;CHECK-LABEL: vsetQ_lane8:
;CHECK: vmov.8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = insertelement <16 x i8> %tmp1, i8 %B, i32 1
ret <16 x i8> %tmp2
}
@@ -193,7 +193,7 @@ define <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind {
define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
;CHECK-LABEL: vsetQ_lane16:
;CHECK: vmov.16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = insertelement <8 x i16> %tmp1, i16 %B, i32 1
ret <8 x i16> %tmp2
}
@@ -201,7 +201,7 @@ define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
;CHECK-LABEL: vsetQ_lane32:
;CHECK: vmov.32 d{{.*}}[1], r1
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1
ret <4 x i32> %tmp2
}
@@ -219,14 +219,14 @@ entry:
; be an immediate constant. Make sure a variable lane number is handled.
define i32 @vget_variable_lanes8(<8 x i8>* %A, i32 %B) nounwind {
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = extractelement <8 x i8> %tmp1, i32 %B
%tmp3 = sext i8 %tmp2 to i32
ret i32 %tmp3
}
define i32 @vgetQ_variable_lanei32(<4 x i32>* %A, i32 %B) nounwind {
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = add <4 x i32> %tmp1, %tmp1
%tmp3 = extractelement <4 x i32> %tmp2, i32 %B
ret i32 %tmp3
diff --git a/test/CodeGen/ARM/vhadd.ll b/test/CodeGen/ARM/vhadd.ll
index 6183db3702b3..01e239d5c73a 100644
--- a/test/CodeGen/ARM/vhadd.ll
+++ b/test/CodeGen/ARM/vhadd.ll
@@ -3,8 +3,8 @@
define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vhadds8:
;CHECK: vhadd.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vhadds16:
;CHECK: vhadd.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vhadds32:
;CHECK: vhadd.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -30,8 +30,8 @@ define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vhaddu8:
;CHECK: vhadd.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -39,8 +39,8 @@ define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vhaddu16:
;CHECK: vhadd.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -48,8 +48,8 @@ define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vhaddu32:
;CHECK: vhadd.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -57,8 +57,8 @@ define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vhaddQs8:
;CHECK: vhadd.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -66,8 +66,8 @@ define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vhaddQs16:
;CHECK: vhadd.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -75,8 +75,8 @@ define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vhaddQs32:
;CHECK: vhadd.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -84,8 +84,8 @@ define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vhaddQu8:
;CHECK: vhadd.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -93,8 +93,8 @@ define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vhaddQu16:
;CHECK: vhadd.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -102,8 +102,8 @@ define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vhaddQu32:
;CHECK: vhadd.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -127,8 +127,8 @@ declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind rea
define <8 x i8> @vrhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vrhadds8:
;CHECK: vrhadd.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -136,8 +136,8 @@ define <8 x i8> @vrhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vrhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vrhadds16:
;CHECK: vrhadd.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -145,8 +145,8 @@ define <4 x i16> @vrhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vrhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vrhadds32:
;CHECK: vrhadd.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -154,8 +154,8 @@ define <2 x i32> @vrhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i8> @vrhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vrhaddu8:
;CHECK: vrhadd.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -163,8 +163,8 @@ define <8 x i8> @vrhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vrhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vrhaddu16:
;CHECK: vrhadd.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -172,8 +172,8 @@ define <4 x i16> @vrhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vrhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vrhaddu32:
;CHECK: vrhadd.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -181,8 +181,8 @@ define <2 x i32> @vrhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <16 x i8> @vrhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vrhaddQs8:
;CHECK: vrhadd.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -190,8 +190,8 @@ define <16 x i8> @vrhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vrhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vrhaddQs16:
;CHECK: vrhadd.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -199,8 +199,8 @@ define <8 x i16> @vrhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vrhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vrhaddQs32:
;CHECK: vrhadd.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -208,8 +208,8 @@ define <4 x i32> @vrhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <16 x i8> @vrhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vrhaddQu8:
;CHECK: vrhadd.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -217,8 +217,8 @@ define <16 x i8> @vrhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vrhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vrhaddQu16:
;CHECK: vrhadd.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -226,8 +226,8 @@ define <8 x i16> @vrhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vrhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vrhaddQu32:
;CHECK: vrhadd.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
diff --git a/test/CodeGen/ARM/vhsub.ll b/test/CodeGen/ARM/vhsub.ll
index f1a0cb27f576..7b3b29ac6e1a 100644
--- a/test/CodeGen/ARM/vhsub.ll
+++ b/test/CodeGen/ARM/vhsub.ll
@@ -3,8 +3,8 @@
define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vhsubs8:
;CHECK: vhsub.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vhsubs16:
;CHECK: vhsub.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vhsubs32:
;CHECK: vhsub.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -30,8 +30,8 @@ define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vhsubu8:
;CHECK: vhsub.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -39,8 +39,8 @@ define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vhsubu16:
;CHECK: vhsub.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -48,8 +48,8 @@ define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vhsubu32:
;CHECK: vhsub.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -57,8 +57,8 @@ define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vhsubQs8:
;CHECK: vhsub.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -66,8 +66,8 @@ define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vhsubQs16:
;CHECK: vhsub.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -75,8 +75,8 @@ define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vhsubQs32:
;CHECK: vhsub.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -84,8 +84,8 @@ define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vhsubQu8:
;CHECK: vhsub.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -93,8 +93,8 @@ define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vhsubQu16:
;CHECK: vhsub.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -102,8 +102,8 @@ define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vhsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vhsubQu32:
;CHECK: vhsub.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
diff --git a/test/CodeGen/ARM/vicmp.ll b/test/CodeGen/ARM/vicmp.ll
index bebb32062f71..21b104a0d045 100644
--- a/test/CodeGen/ARM/vicmp.ll
+++ b/test/CodeGen/ARM/vicmp.ll
@@ -10,8 +10,8 @@ define <8 x i8> @vcnei8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vcnei8:
;CHECK: vceq.i8
;CHECK-NEXT: vmvn
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = icmp ne <8 x i8> %tmp1, %tmp2
%tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
ret <8 x i8> %tmp4
@@ -21,8 +21,8 @@ define <4 x i16> @vcnei16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vcnei16:
;CHECK: vceq.i16
;CHECK-NEXT: vmvn
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = icmp ne <4 x i16> %tmp1, %tmp2
%tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
ret <4 x i16> %tmp4
@@ -32,8 +32,8 @@ define <2 x i32> @vcnei32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vcnei32:
;CHECK: vceq.i32
;CHECK-NEXT: vmvn
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = icmp ne <2 x i32> %tmp1, %tmp2
%tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
ret <2 x i32> %tmp4
@@ -43,8 +43,8 @@ define <16 x i8> @vcneQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vcneQi8:
;CHECK: vceq.i8
;CHECK-NEXT: vmvn
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = icmp ne <16 x i8> %tmp1, %tmp2
%tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
ret <16 x i8> %tmp4
@@ -54,8 +54,8 @@ define <8 x i16> @vcneQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vcneQi16:
;CHECK: vceq.i16
;CHECK-NEXT: vmvn
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = icmp ne <8 x i16> %tmp1, %tmp2
%tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
ret <8 x i16> %tmp4
@@ -65,8 +65,8 @@ define <4 x i32> @vcneQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vcneQi32:
;CHECK: vceq.i32
;CHECK-NEXT: vmvn
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = icmp ne <4 x i32> %tmp1, %tmp2
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
@@ -75,8 +75,8 @@ define <4 x i32> @vcneQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <16 x i8> @vcltQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vcltQs8:
;CHECK: vcgt.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = icmp slt <16 x i8> %tmp1, %tmp2
%tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
ret <16 x i8> %tmp4
@@ -85,8 +85,8 @@ define <16 x i8> @vcltQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <4 x i16> @vcles16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vcles16:
;CHECK: vcge.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = icmp sle <4 x i16> %tmp1, %tmp2
%tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
ret <4 x i16> %tmp4
@@ -95,8 +95,8 @@ define <4 x i16> @vcles16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <4 x i16> @vcltu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vcltu16:
;CHECK: vcgt.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = icmp ult <4 x i16> %tmp1, %tmp2
%tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
ret <4 x i16> %tmp4
@@ -105,8 +105,8 @@ define <4 x i16> @vcltu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <4 x i32> @vcleQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vcleQu32:
;CHECK: vcge.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = icmp ule <4 x i32> %tmp1, %tmp2
%tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
ret <4 x i32> %tmp4
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
index db640f54b0e6..8064ea4a320a 100644
--- a/test/CodeGen/ARM/vld1.ll
+++ b/test/CodeGen/ARM/vld1.ll
@@ -23,10 +23,10 @@ define <4 x i16> @vld1i16(i16* %A) nounwind {
define <4 x i16> @vld1i16_update(i16** %ptr) nounwind {
;CHECK-LABEL: vld1i16_update:
;CHECK: vld1.16 {d16}, [{{r[0-9]+}}]!
- %A = load i16** %ptr
+ %A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
- %tmp2 = getelementptr i16* %A, i32 4
+ %tmp2 = getelementptr i16, i16* %A, i32 4
store i16* %tmp2, i16** %ptr
ret <4 x i16> %tmp1
}
@@ -43,10 +43,10 @@ define <2 x i32> @vld1i32(i32* %A) nounwind {
define <2 x i32> @vld1i32_update(i32** %ptr, i32 %inc) nounwind {
;CHECK-LABEL: vld1i32_update:
;CHECK: vld1.32 {d16}, [{{r[0-9]+}}], {{r[0-9]+}}
- %A = load i32** %ptr
+ %A = load i32*, i32** %ptr
%tmp0 = bitcast i32* %A to i8*
%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
- %tmp2 = getelementptr i32* %A, i32 %inc
+ %tmp2 = getelementptr i32, i32* %A, i32 %inc
store i32* %tmp2, i32** %ptr
ret <2 x i32> %tmp1
}
@@ -79,9 +79,9 @@ define <16 x i8> @vld1Qi8(i8* %A) nounwind {
define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
;CHECK-LABEL: vld1Qi8_update:
;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}:64]!
- %A = load i8** %ptr
+ %A = load i8*, i8** %ptr
%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
- %tmp2 = getelementptr i8* %A, i32 16
+ %tmp2 = getelementptr i8, i8* %A, i32 16
store i8* %tmp2, i8** %ptr
ret <16 x i8> %tmp1
}
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll
index 7ac5cc709b33..391b49152cd9 100644
--- a/test/CodeGen/ARM/vld2.ll
+++ b/test/CodeGen/ARM/vld2.ll
@@ -60,13 +60,13 @@ define <2 x float> @vld2f(float* %A) nounwind {
define <2 x float> @vld2f_update(float** %ptr) nounwind {
;CHECK-LABEL: vld2f_update:
;CHECK: vld2.32 {d16, d17}, [r1]!
- %A = load float** %ptr
+ %A = load float*, float** %ptr
%tmp0 = bitcast float* %A to i8*
%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
%tmp4 = fadd <2 x float> %tmp2, %tmp3
- %tmp5 = getelementptr float* %A, i32 4
+ %tmp5 = getelementptr float, float* %A, i32 4
store float* %tmp5, float** %ptr
ret <2 x float> %tmp4
}
@@ -98,12 +98,12 @@ define <16 x i8> @vld2Qi8(i8* %A) nounwind {
define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
;CHECK-LABEL: vld2Qi8_update:
;CHECK: vld2.8 {d16, d17, d18, d19}, [r2:128], r1
- %A = load i8** %ptr
+ %A = load i8*, i8** %ptr
%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 16)
%tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
%tmp4 = add <16 x i8> %tmp2, %tmp3
- %tmp5 = getelementptr i8* %A, i32 %inc
+ %tmp5 = getelementptr i8, i8* %A, i32 %inc
store i8* %tmp5, i8** %ptr
ret <16 x i8> %tmp4
}
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
index 171a03c24da1..0d14179ba73a 100644
--- a/test/CodeGen/ARM/vld3.ll
+++ b/test/CodeGen/ARM/vld3.ll
@@ -38,13 +38,13 @@ define <4 x i16> @vld3i16(i16* %A) nounwind {
define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind {
;CHECK-LABEL: vld3i16_update:
;CHECK: vld3.16 {d16, d17, d18}, [{{r[0-9]+}}], {{r[0-9]+}}
- %A = load i16** %ptr
+ %A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
%tmp4 = add <4 x i16> %tmp2, %tmp3
- %tmp5 = getelementptr i16* %A, i32 %inc
+ %tmp5 = getelementptr i16, i16* %A, i32 %inc
store i16* %tmp5, i16** %ptr
ret <4 x i16> %tmp4
}
@@ -88,7 +88,7 @@ define <1 x i64> @vld3i64_update(i64** %ptr, i64* %A) nounwind {
;CHECK: vld1.64 {d16, d17, d18}, [r1:64]!
%tmp0 = bitcast i64* %A to i8*
%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16)
- %tmp5 = getelementptr i64* %A, i32 3
+ %tmp5 = getelementptr i64, i64* %A, i32 3
store i64* %tmp5, i64** %ptr
%tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
@@ -137,13 +137,13 @@ define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind {
;CHECK-LABEL: vld3Qi32_update:
;CHECK: vld3.32 {d16, d18, d20}, [r[[R:[0-9]+]]]!
;CHECK: vld3.32 {d17, d19, d21}, [r[[R]]]!
- %A = load i32** %ptr
+ %A = load i32*, i32** %ptr
%tmp0 = bitcast i32* %A to i8*
%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2
%tmp4 = add <4 x i32> %tmp2, %tmp3
- %tmp5 = getelementptr i32* %A, i32 12
+ %tmp5 = getelementptr i32, i32* %A, i32 12
store i32* %tmp5, i32** %ptr
ret <4 x i32> %tmp4
}
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll
index 94ad143ae0fd..575e0fa717fb 100644
--- a/test/CodeGen/ARM/vld4.ll
+++ b/test/CodeGen/ARM/vld4.ll
@@ -26,12 +26,12 @@ define <8 x i8> @vld4i8(i8* %A) nounwind {
define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
;CHECK-LABEL: vld4i8_update:
;CHECK: vld4.8 {d16, d17, d18, d19}, [r2:128], r1
- %A = load i8** %ptr
+ %A = load i8*, i8** %ptr
%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 16)
%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
%tmp4 = add <8 x i8> %tmp2, %tmp3
- %tmp5 = getelementptr i8* %A, i32 %inc
+ %tmp5 = getelementptr i8, i8* %A, i32 %inc
store i8* %tmp5, i8** %ptr
ret <8 x i8> %tmp4
}
@@ -88,7 +88,7 @@ define <1 x i64> @vld4i64_update(i64** %ptr, i64* %A) nounwind {
;CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]!
%tmp0 = bitcast i64* %A to i8*
%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64)
- %tmp5 = getelementptr i64* %A, i32 4
+ %tmp5 = getelementptr i64, i64* %A, i32 4
store i64* %tmp5, i64** %ptr
%tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
@@ -126,13 +126,13 @@ define <8 x i16> @vld4Qi16_update(i16** %ptr) nounwind {
;CHECK-LABEL: vld4Qi16_update:
;CHECK: vld4.16 {d16, d18, d20, d22}, [r1:64]!
;CHECK: vld4.16 {d17, d19, d21, d23}, [r1:64]!
- %A = load i16** %ptr
+ %A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 8)
%tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2
%tmp4 = add <8 x i16> %tmp2, %tmp3
- %tmp5 = getelementptr i16* %A, i32 32
+ %tmp5 = getelementptr i16, i16* %A, i32 32
store i16* %tmp5, i16** %ptr
ret <8 x i16> %tmp4
}
diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll
index 64aac562c1eb..09304d87d53b 100644
--- a/test/CodeGen/ARM/vlddup.ll
+++ b/test/CodeGen/ARM/vlddup.ll
@@ -4,7 +4,7 @@ define <8 x i8> @vld1dupi8(i8* %A) nounwind {
;CHECK-LABEL: vld1dupi8:
;Check the (default) alignment value.
;CHECK: vld1.8 {d16[]}, [r0]
- %tmp1 = load i8* %A, align 8
+ %tmp1 = load i8, i8* %A, align 8
%tmp2 = insertelement <8 x i8> undef, i8 %tmp1, i32 0
%tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <8 x i32> zeroinitializer
ret <8 x i8> %tmp3
@@ -14,7 +14,7 @@ define <4 x i16> @vld1dupi16(i16* %A) nounwind {
;CHECK-LABEL: vld1dupi16:
;Check the alignment value. Max for this instruction is 16 bits:
;CHECK: vld1.16 {d16[]}, [r0:16]
- %tmp1 = load i16* %A, align 8
+ %tmp1 = load i16, i16* %A, align 8
%tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0
%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer
ret <4 x i16> %tmp3
@@ -24,7 +24,7 @@ define <2 x i32> @vld1dupi32(i32* %A) nounwind {
;CHECK-LABEL: vld1dupi32:
;Check the alignment value. Max for this instruction is 32 bits:
;CHECK: vld1.32 {d16[]}, [r0:32]
- %tmp1 = load i32* %A, align 8
+ %tmp1 = load i32, i32* %A, align 8
%tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
ret <2 x i32> %tmp3
@@ -33,7 +33,7 @@ define <2 x i32> @vld1dupi32(i32* %A) nounwind {
define <2 x float> @vld1dupf(float* %A) nounwind {
;CHECK-LABEL: vld1dupf:
;CHECK: vld1.32 {d16[]}, [r0:32]
- %tmp0 = load float* %A
+ %tmp0 = load float, float* %A
%tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
ret <2 x float> %tmp2
@@ -43,7 +43,7 @@ define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
;CHECK-LABEL: vld1dupQi8:
;Check the (default) alignment value.
;CHECK: vld1.8 {d16[], d17[]}, [r0]
- %tmp1 = load i8* %A, align 8
+ %tmp1 = load i8, i8* %A, align 8
%tmp2 = insertelement <16 x i8> undef, i8 %tmp1, i32 0
%tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %tmp3
@@ -52,7 +52,7 @@ define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
define <4 x float> @vld1dupQf(float* %A) nounwind {
;CHECK-LABEL: vld1dupQf:
;CHECK: vld1.32 {d16[], d17[]}, [r0:32]
- %tmp0 = load float* %A
+ %tmp0 = load float, float* %A
%tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %tmp2
@@ -93,7 +93,7 @@ define <4 x i16> @vld2dupi16(i8* %A) nounwind {
define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
;CHECK-LABEL: vld2dupi16_update:
;CHECK: vld2.16 {d16[], d17[]}, [r1]!
- %A = load i16** %ptr
+ %A = load i16*, i16** %ptr
%A2 = bitcast i16* %A to i8*
%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
@@ -101,7 +101,7 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
%tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
%tmp5 = add <4 x i16> %tmp2, %tmp4
- %tmp6 = getelementptr i16* %A, i32 2
+ %tmp6 = getelementptr i16, i16* %A, i32 2
store i16* %tmp6, i16** %ptr
ret <4 x i16> %tmp5
}
@@ -130,7 +130,7 @@ declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>,
define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {
;CHECK-LABEL: vld3dupi8_update:
;CHECK: vld3.8 {d16[], d17[], d18[]}, [r2], r1
- %A = load i8** %ptr
+ %A = load i8*, i8** %ptr
%tmp0 = tail call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 8)
%tmp1 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 0
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
@@ -140,7 +140,7 @@ define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {
%tmp6 = shufflevector <8 x i8> %tmp5, <8 x i8> undef, <8 x i32> zeroinitializer
%tmp7 = add <8 x i8> %tmp2, %tmp4
%tmp8 = add <8 x i8> %tmp7, %tmp6
- %tmp9 = getelementptr i8* %A, i32 %inc
+ %tmp9 = getelementptr i8, i8* %A, i32 %inc
store i8* %tmp9, i8** %ptr
ret <8 x i8> %tmp8
}
@@ -171,7 +171,7 @@ declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>,
define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
;CHECK-LABEL: vld4dupi16_update:
;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]!
- %A = load i16** %ptr
+ %A = load i16*, i16** %ptr
%A2 = bitcast i16* %A to i8*
%tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
%tmp1 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 0
@@ -185,7 +185,7 @@ define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
%tmp9 = add <4 x i16> %tmp2, %tmp4
%tmp10 = add <4 x i16> %tmp6, %tmp8
%tmp11 = add <4 x i16> %tmp9, %tmp10
- %tmp12 = getelementptr i16* %A, i32 4
+ %tmp12 = getelementptr i16, i16* %A, i32 4
store i16* %tmp12, i16** %ptr
ret <4 x i16> %tmp11
}
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index c7d69ff9780a..ac2be7f87f53 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -7,8 +7,8 @@ define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vld1lanei8:
;Check the (default) alignment value.
;CHECK: vld1.8 {d16[3]}, [r0]
- %tmp1 = load <8 x i8>* %B
- %tmp2 = load i8* %A, align 8
+ %tmp1 = load <8 x i8>, <8 x i8>* %B
+ %tmp2 = load i8, i8* %A, align 8
%tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3
ret <8 x i8> %tmp3
}
@@ -17,8 +17,8 @@ define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vld1lanei16:
;Check the alignment value. Max for this instruction is 16 bits:
;CHECK: vld1.16 {d16[2]}, [r0:16]
- %tmp1 = load <4 x i16>* %B
- %tmp2 = load i16* %A, align 8
+ %tmp1 = load <4 x i16>, <4 x i16>* %B
+ %tmp2 = load i16, i16* %A, align 8
%tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
ret <4 x i16> %tmp3
}
@@ -27,8 +27,8 @@ define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vld1lanei32:
;Check the alignment value. Max for this instruction is 32 bits:
;CHECK: vld1.32 {d16[1]}, [r0:32]
- %tmp1 = load <2 x i32>* %B
- %tmp2 = load i32* %A, align 8
+ %tmp1 = load <2 x i32>, <2 x i32>* %B
+ %tmp2 = load i32, i32* %A, align 8
%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
ret <2 x i32> %tmp3
}
@@ -37,8 +37,8 @@ define <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vld1lanei32a32:
;Check the alignment value. Legal values are none or :32.
;CHECK: vld1.32 {d16[1]}, [r0:32]
- %tmp1 = load <2 x i32>* %B
- %tmp2 = load i32* %A, align 4
+ %tmp1 = load <2 x i32>, <2 x i32>* %B
+ %tmp2 = load i32, i32* %A, align 4
%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
ret <2 x i32> %tmp3
}
@@ -46,8 +46,8 @@ define <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind {
define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vld1lanef:
;CHECK: vld1.32 {d16[1]}, [r0:32]
- %tmp1 = load <2 x float>* %B
- %tmp2 = load float* %A, align 4
+ %tmp1 = load <2 x float>, <2 x float>* %B
+ %tmp2 = load float, float* %A, align 4
%tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
ret <2 x float> %tmp3
}
@@ -55,8 +55,8 @@ define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vld1laneQi8:
;CHECK: vld1.8 {d17[1]}, [r0]
- %tmp1 = load <16 x i8>* %B
- %tmp2 = load i8* %A, align 8
+ %tmp1 = load <16 x i8>, <16 x i8>* %B
+ %tmp2 = load i8, i8* %A, align 8
%tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9
ret <16 x i8> %tmp3
}
@@ -64,8 +64,8 @@ define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vld1laneQi16:
;CHECK: vld1.16 {d17[1]}, [r0:16]
- %tmp1 = load <8 x i16>* %B
- %tmp2 = load i16* %A, align 8
+ %tmp1 = load <8 x i16>, <8 x i16>* %B
+ %tmp2 = load i16, i16* %A, align 8
%tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
ret <8 x i16> %tmp3
}
@@ -73,8 +73,8 @@ define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vld1laneQi32:
;CHECK: vld1.32 {d17[1]}, [r0:32]
- %tmp1 = load <4 x i32>* %B
- %tmp2 = load i32* %A, align 8
+ %tmp1 = load <4 x i32>, <4 x i32>* %B
+ %tmp2 = load i32, i32* %A, align 8
%tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
ret <4 x i32> %tmp3
}
@@ -82,8 +82,8 @@ define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vld1laneQf:
;CHECK: vld1.32 {d16[0]}, [r0:32]
- %tmp1 = load <4 x float>* %B
- %tmp2 = load float* %A
+ %tmp1 = load <4 x float>, <4 x float>* %B
+ %tmp2 = load float, float* %A
%tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
ret <4 x float> %tmp3
}
@@ -101,7 +101,7 @@ define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vld2lanei8:
;Check the alignment value. Max for this instruction is 16 bits:
;CHECK: vld2.8 {d16[1], d17[1]}, [r0:16]
- %tmp1 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %B
%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
%tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
@@ -114,7 +114,7 @@ define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
;Check the alignment value. Max for this instruction is 32 bits:
;CHECK: vld2.16 {d16[1], d17[1]}, [r0:32]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %B
%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
%tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
@@ -126,7 +126,7 @@ define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vld2lanei32:
;CHECK: vld2.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %B
%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
@@ -138,14 +138,14 @@ define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vld2lanei32_update:
;CHECK: vld2.32 {d16[1], d17[1]}, [{{r[0-9]+}}]!
- %A = load i32** %ptr
+ %A = load i32*, i32** %ptr
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %B
%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
%tmp5 = add <2 x i32> %tmp3, %tmp4
- %tmp6 = getelementptr i32* %A, i32 2
+ %tmp6 = getelementptr i32, i32* %A, i32 2
store i32* %tmp6, i32** %ptr
ret <2 x i32> %tmp5
}
@@ -154,7 +154,7 @@ define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vld2lanef:
;CHECK: vld2.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %B
%tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
@@ -167,7 +167,7 @@ define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;Check the (default) alignment.
;CHECK: vld2.16 {d17[1], d19[1]}, [{{r[0-9]+}}]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %B
%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
%tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
@@ -180,7 +180,7 @@ define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;Check the alignment value. Max for this instruction is 64 bits:
;CHECK: vld2.32 {d17[0], d19[0]}, [{{r[0-9]+}}:64]
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %B
%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
%tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
@@ -192,7 +192,7 @@ define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vld2laneQf:
;CHECK: vld2.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %B
%tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
@@ -221,7 +221,7 @@ declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x flo
define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vld3lanei8:
;CHECK: vld3.8
- %tmp1 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %B
%tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
@@ -236,7 +236,7 @@ define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
;Check the (default) alignment value. VLD3 does not support alignment.
;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %B
%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
@@ -250,7 +250,7 @@ define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vld3lanei32:
;CHECK: vld3.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %B
%tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
@@ -264,7 +264,7 @@ define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vld3lanef:
;CHECK: vld3.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %B
%tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
@@ -279,7 +279,7 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;Check the (default) alignment value. VLD3 does not support alignment.
;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %B
%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
%tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
@@ -293,16 +293,16 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
;CHECK-LABEL: vld3laneQi16_update:
;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}], {{r[0-9]+}}
- %A = load i16** %ptr
+ %A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %B
%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
%tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
%tmp6 = add <8 x i16> %tmp3, %tmp4
%tmp7 = add <8 x i16> %tmp5, %tmp6
- %tmp8 = getelementptr i16* %A, i32 %inc
+ %tmp8 = getelementptr i16, i16* %A, i32 %inc
store i16* %tmp8, i16** %ptr
ret <8 x i16> %tmp7
}
@@ -311,7 +311,7 @@ define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vld3laneQi32:
;CHECK: vld3.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %B
%tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1)
%tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
@@ -325,7 +325,7 @@ define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vld3laneQf:
;CHECK: vld3.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %B
%tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
@@ -357,7 +357,7 @@ define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vld4lanei8:
;Check the alignment value. Max for this instruction is 32 bits:
;CHECK: vld4.8 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}:32]
- %tmp1 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %B
%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
@@ -373,8 +373,8 @@ define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vld4lanei8_update:
;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:32]!
- %A = load i8** %ptr
- %tmp1 = load <8 x i8>* %B
+ %A = load i8*, i8** %ptr
+ %tmp1 = load <8 x i8>, <8 x i8>* %B
%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
@@ -383,7 +383,7 @@ define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
%tmp7 = add <8 x i8> %tmp3, %tmp4
%tmp8 = add <8 x i8> %tmp5, %tmp6
%tmp9 = add <8 x i8> %tmp7, %tmp8
- %tmp10 = getelementptr i8* %A, i32 4
+ %tmp10 = getelementptr i8, i8* %A, i32 4
store i8* %tmp10, i8** %ptr
ret <8 x i8> %tmp9
}
@@ -394,7 +394,7 @@ define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
;being loaded is ignored.
;CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %B
%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4)
%tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
@@ -412,7 +412,7 @@ define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
;it is smaller than the total size of the memory being loaded.
;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:64]
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %B
%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
%tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
@@ -428,7 +428,7 @@ define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vld4lanef:
;CHECK: vld4.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %B
%tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
@@ -445,7 +445,7 @@ define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;Check the alignment value. Max for this instruction is 64 bits:
;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [{{r[0-9]+}}:64]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %B
%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
%tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
@@ -462,7 +462,7 @@ define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;Check the (default) alignment.
;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [{{r[0-9]+}}]
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %B
%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
%tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
@@ -478,7 +478,7 @@ define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vld4laneQf:
;CHECK: vld4.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %B
%tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
diff --git a/test/CodeGen/ARM/vldm-liveness.ll b/test/CodeGen/ARM/vldm-liveness.ll
index 751f447077be..e114e6970a32 100644
--- a/test/CodeGen/ARM/vldm-liveness.ll
+++ b/test/CodeGen/ARM/vldm-liveness.ll
@@ -22,14 +22,14 @@ define arm_aapcs_vfpcc <4 x float> @foo(float* %ptr) {
; CHECK: vldr s3, [r0, #8]
; CHECK: vldmia r0, {s0, s1}
; CHECK: vldr s2, [r0, #16]
- %off0 = getelementptr float* %ptr, i32 0
- %val0 = load float* %off0
- %off1 = getelementptr float* %ptr, i32 1
- %val1 = load float* %off1
- %off4 = getelementptr float* %ptr, i32 4
- %val4 = load float* %off4
- %off2 = getelementptr float* %ptr, i32 2
- %val2 = load float* %off2
+ %off0 = getelementptr float, float* %ptr, i32 0
+ %val0 = load float, float* %off0
+ %off1 = getelementptr float, float* %ptr, i32 1
+ %val1 = load float, float* %off1
+ %off4 = getelementptr float, float* %ptr, i32 4
+ %val4 = load float, float* %off4
+ %off2 = getelementptr float, float* %ptr, i32 2
+ %val2 = load float, float* %off2
%vec1 = insertelement <4 x float> undef, float %val0, i32 0
%vec2 = insertelement <4 x float> %vec1, float %val1, i32 1
diff --git a/test/CodeGen/ARM/vldm-sched-a9.ll b/test/CodeGen/ARM/vldm-sched-a9.ll
index e5e7bc08fa4a..0e0cf97d4365 100644
--- a/test/CodeGen/ARM/vldm-sched-a9.ll
+++ b/test/CodeGen/ARM/vldm-sched-a9.ll
@@ -10,12 +10,12 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; CHECK: vldmia
define void @test(i64* %src) #0 {
entry:
- %arrayidx39 = getelementptr inbounds i64* %src, i32 13
+ %arrayidx39 = getelementptr inbounds i64, i64* %src, i32 13
%vecinit285 = shufflevector <16 x i64> undef, <16 x i64> <i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
store <16 x i64> %vecinit285, <16 x i64>* undef, align 128
- %0 = load i64* undef, align 8
+ %0 = load i64, i64* undef, align 8
%vecinit379 = insertelement <16 x i64> undef, i64 %0, i32 9
- %1 = load i64* undef, align 8
+ %1 = load i64, i64* undef, align 8
%vecinit419 = insertelement <16 x i64> undef, i64 %1, i32 15
store <16 x i64> %vecinit419, <16 x i64>* undef, align 128
%vecinit579 = insertelement <16 x i64> undef, i64 0, i32 4
@@ -23,14 +23,14 @@ entry:
%vecinit584 = insertelement <16 x i64> %vecinit582, i64 undef, i32 9
%vecinit586 = insertelement <16 x i64> %vecinit584, i64 0, i32 10
%vecinit589 = shufflevector <16 x i64> %vecinit586, <16 x i64> <i64 12, i64 13, i64 14, i64 15, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 16, i32 17, i32 18, i32 19, i32 undef>
- %2 = load i64* undef, align 8
+ %2 = load i64, i64* undef, align 8
%vecinit591 = insertelement <16 x i64> %vecinit589, i64 %2, i32 15
store <16 x i64> %vecinit591, <16 x i64>* undef, align 128
%vecinit694 = shufflevector <16 x i64> undef, <16 x i64> <i64 13, i64 14, i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
store <16 x i64> %vecinit694, <16 x i64>* undef, align 128
- %3 = load i64* undef, align 8
+ %3 = load i64, i64* undef, align 8
%vecinit1331 = insertelement <16 x i64> undef, i64 %3, i32 14
- %4 = load i64* undef, align 8
+ %4 = load i64, i64* undef, align 8
%vecinit1468 = insertelement <16 x i64> undef, i64 %4, i32 11
%vecinit1471 = shufflevector <16 x i64> %vecinit1468, <16 x i64> <i64 13, i64 14, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 undef, i32 undef>
%vecinit1474 = shufflevector <16 x i64> %vecinit1471, <16 x i64> <i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
@@ -45,14 +45,14 @@ entry:
%vecinit1599 = insertelement <16 x i64> %vecinit1597, i64 undef, i32 8
%vecinit1601 = insertelement <16 x i64> %vecinit1599, i64 undef, i32 9
%vecinit1603 = insertelement <16 x i64> %vecinit1601, i64 undef, i32 10
- %5 = load i64* undef, align 8
+ %5 = load i64, i64* undef, align 8
%vecinit1605 = insertelement <16 x i64> %vecinit1603, i64 %5, i32 11
%vecinit1608 = shufflevector <16 x i64> %vecinit1605, <16 x i64> <i64 13, i64 14, i64 15, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 undef>
- %6 = load i64* undef, align 8
+ %6 = load i64, i64* undef, align 8
%vecinit1610 = insertelement <16 x i64> %vecinit1608, i64 %6, i32 15
store <16 x i64> %vecinit1610, <16 x i64>* undef, align 128
%vecinit2226 = shufflevector <16 x i64> undef, <16 x i64> <i64 6, i64 7, i64 8, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
- %7 = load i64* undef, align 8
+ %7 = load i64, i64* undef, align 8
%vecinit2228 = insertelement <16 x i64> %vecinit2226, i64 %7, i32 8
%vecinit2230 = insertelement <16 x i64> %vecinit2228, i64 undef, i32 9
%vecinit2233 = shufflevector <16 x i64> %vecinit2230, <16 x i64> <i64 11, i64 12, i64 13, i64 14, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef>
@@ -62,7 +62,7 @@ entry:
%vecinit2249 = shufflevector <16 x i64> %vecinit2246, <16 x i64> <i64 7, i64 8, i64 9, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%vecinit2252 = shufflevector <16 x i64> %vecinit2249, <16 x i64> <i64 10, i64 11, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 17, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%vecinit2255 = shufflevector <16 x i64> %vecinit2252, <16 x i64> <i64 12, i64 13, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 16, i32 17, i32 undef, i32 undef, i32 undef>
- %8 = load i64* %arrayidx39, align 8
+ %8 = load i64, i64* %arrayidx39, align 8
%vecinit2257 = insertelement <16 x i64> %vecinit2255, i64 %8, i32 13
%vecinit2260 = shufflevector <16 x i64> %vecinit2257, <16 x i64> <i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
store <16 x i64> %vecinit2260, <16 x i64>* null, align 128
diff --git a/test/CodeGen/ARM/vminmax.ll b/test/CodeGen/ARM/vminmax.ll
index 1167ebe06717..011bfd7ff88e 100644
--- a/test/CodeGen/ARM/vminmax.ll
+++ b/test/CodeGen/ARM/vminmax.ll
@@ -3,8 +3,8 @@
define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vmins8:
;CHECK: vmin.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vmins16:
;CHECK: vmin.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @vmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vmins32:
;CHECK: vmin.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -30,8 +30,8 @@ define <2 x i32> @vmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i8> @vminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vminu8:
;CHECK: vmin.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -39,8 +39,8 @@ define <8 x i8> @vminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vminu16:
;CHECK: vmin.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -48,8 +48,8 @@ define <4 x i16> @vminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vminu32:
;CHECK: vmin.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -57,8 +57,8 @@ define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vminf32:
;CHECK: vmin.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -66,8 +66,8 @@ define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
define <16 x i8> @vminQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vminQs8:
;CHECK: vmin.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -75,8 +75,8 @@ define <16 x i8> @vminQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vminQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vminQs16:
;CHECK: vmin.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -84,8 +84,8 @@ define <8 x i16> @vminQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vminQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vminQs32:
;CHECK: vmin.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -93,8 +93,8 @@ define <4 x i32> @vminQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <16 x i8> @vminQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vminQu8:
;CHECK: vmin.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -102,8 +102,8 @@ define <16 x i8> @vminQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vminQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vminQu16:
;CHECK: vmin.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -111,8 +111,8 @@ define <8 x i16> @vminQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vminQu32:
;CHECK: vmin.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -120,8 +120,8 @@ define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <4 x float> @vminQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vminQf32:
;CHECK: vmin.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
@@ -149,8 +149,8 @@ declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwin
define <8 x i8> @vmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vmaxs8:
;CHECK: vmax.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -158,8 +158,8 @@ define <8 x i8> @vmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vmaxs16:
;CHECK: vmax.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -167,8 +167,8 @@ define <4 x i16> @vmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vmaxs32:
;CHECK: vmax.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -176,8 +176,8 @@ define <2 x i32> @vmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i8> @vmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vmaxu8:
;CHECK: vmax.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -185,8 +185,8 @@ define <8 x i8> @vmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vmaxu16:
;CHECK: vmax.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -194,8 +194,8 @@ define <4 x i16> @vmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vmaxu32:
;CHECK: vmax.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -203,8 +203,8 @@ define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vmaxf32:
;CHECK: vmax.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -212,8 +212,8 @@ define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
define <16 x i8> @vmaxQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vmaxQs8:
;CHECK: vmax.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -221,8 +221,8 @@ define <16 x i8> @vmaxQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vmaxQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vmaxQs16:
;CHECK: vmax.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -230,8 +230,8 @@ define <8 x i16> @vmaxQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vmaxQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vmaxQs32:
;CHECK: vmax.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -239,8 +239,8 @@ define <4 x i32> @vmaxQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <16 x i8> @vmaxQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vmaxQu8:
;CHECK: vmax.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -248,8 +248,8 @@ define <16 x i8> @vmaxQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vmaxQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vmaxQu16:
;CHECK: vmax.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -257,8 +257,8 @@ define <8 x i16> @vmaxQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vmaxQu32:
;CHECK: vmax.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -266,8 +266,8 @@ define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <4 x float> @vmaxQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vmaxQf32:
;CHECK: vmax.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
diff --git a/test/CodeGen/ARM/vminmaxnm.ll b/test/CodeGen/ARM/vminmaxnm.ll
index 39289a0bafb3..3632ffd00213 100644
--- a/test/CodeGen/ARM/vminmaxnm.ll
+++ b/test/CodeGen/ARM/vminmaxnm.ll
@@ -1,11 +1,14 @@
-; RUN: llc < %s -mtriple armv8 -mattr=+neon | FileCheck %s
-; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-FAST
+; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 | FileCheck %s
+; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 \
+; RUN: -enable-no-nans-fp-math -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-FAST
+
+; vectors
define <4 x float> @vmaxnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
; CHECK-LABEL: vmaxnmq:
; CHECK: vmaxnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
@@ -13,8 +16,8 @@ define <4 x float> @vmaxnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
define <2 x float> @vmaxnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
; CHECK-LABEL: vmaxnmd:
; CHECK: vmaxnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -22,8 +25,8 @@ define <2 x float> @vmaxnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
define <4 x float> @vminnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
; CHECK-LABEL: vminnmq:
; CHECK: vminnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
@@ -31,12 +34,14 @@ define <4 x float> @vminnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
define <2 x float> @vminnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
; CHECK-LABEL: vminnmd:
; CHECK: vminnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
+; scalars
+
define float @fp-armv8_vminnm_o(float %a, float %b) {
; CHECK-FAST-LABEL: "fp-armv8_vminnm_o":
; CHECK-FAST-NOT: vcmp
@@ -48,6 +53,17 @@ define float @fp-armv8_vminnm_o(float %a, float %b) {
ret float %cond
}
+define double @fp-armv8_vminnm_ole(double %a, double %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_ole":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f64
+; CHECK-LABEL: "fp-armv8_vminnm_ole":
+; CHECK-NOT: vminnm.f64
+ %cmp = fcmp ole double %a, %b
+ %cond = select i1 %cmp, double %a, double %b
+ ret double %cond
+}
+
define float @fp-armv8_vminnm_o_rev(float %a, float %b) {
; CHECK-FAST-LABEL: "fp-armv8_vminnm_o_rev":
; CHECK-FAST-NOT: vcmp
@@ -59,6 +75,17 @@ define float @fp-armv8_vminnm_o_rev(float %a, float %b) {
ret float %cond
}
+define double @fp-armv8_vminnm_oge_rev(double %a, double %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_oge_rev":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f64
+; CHECK-LABEL: "fp-armv8_vminnm_oge_rev":
+; CHECK-NOT: vminnm.f64
+ %cmp = fcmp oge double %a, %b
+ %cond = select i1 %cmp, double %b, double %a
+ ret double %cond
+}
+
define float @fp-armv8_vminnm_u(float %a, float %b) {
; CHECK-FAST-LABEL: "fp-armv8_vminnm_u":
; CHECK-FAST-NOT: vcmp
@@ -70,6 +97,17 @@ define float @fp-armv8_vminnm_u(float %a, float %b) {
ret float %cond
}
+define float @fp-armv8_vminnm_ule(float %a, float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_ule":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f32
+; CHECK-LABEL: "fp-armv8_vminnm_ule":
+; CHECK-NOT: vminnm.f32
+ %cmp = fcmp ule float %a, %b
+ %cond = select i1 %cmp, float %a, float %b
+ ret float %cond
+}
+
define float @fp-armv8_vminnm_u_rev(float %a, float %b) {
; CHECK-FAST-LABEL: "fp-armv8_vminnm_u_rev":
; CHECK-FAST-NOT: vcmp
@@ -81,6 +119,17 @@ define float @fp-armv8_vminnm_u_rev(float %a, float %b) {
ret float %cond
}
+define double @fp-armv8_vminnm_uge_rev(double %a, double %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_uge_rev":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f64
+; CHECK-LABEL: "fp-armv8_vminnm_uge_rev":
+; CHECK-NOT: vminnm.f64
+ %cmp = fcmp uge double %a, %b
+ %cond = select i1 %cmp, double %b, double %a
+ ret double %cond
+}
+
define float @fp-armv8_vmaxnm_o(float %a, float %b) {
; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_o":
; CHECK-FAST-NOT: vcmp
@@ -92,6 +141,17 @@ define float @fp-armv8_vmaxnm_o(float %a, float %b) {
ret float %cond
}
+define float @fp-armv8_vmaxnm_oge(float %a, float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_oge":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_oge":
+; CHECK-NOT: vmaxnm.f32
+ %cmp = fcmp oge float %a, %b
+ %cond = select i1 %cmp, float %a, float %b
+ ret float %cond
+}
+
define float @fp-armv8_vmaxnm_o_rev(float %a, float %b) {
; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_o_rev":
; CHECK-FAST-NOT: vcmp
@@ -103,6 +163,17 @@ define float @fp-armv8_vmaxnm_o_rev(float %a, float %b) {
ret float %cond
}
+define float @fp-armv8_vmaxnm_ole_rev(float %a, float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_ole_rev":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_ole_rev":
+; CHECK-NOT: vmaxnm.f32
+ %cmp = fcmp ole float %a, %b
+ %cond = select i1 %cmp, float %b, float %a
+ ret float %cond
+}
+
define float @fp-armv8_vmaxnm_u(float %a, float %b) {
; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_u":
; CHECK-FAST-NOT: vcmp
@@ -114,6 +185,17 @@ define float @fp-armv8_vmaxnm_u(float %a, float %b) {
ret float %cond
}
+define float @fp-armv8_vmaxnm_uge(float %a, float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_uge":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_uge":
+; CHECK-NOT: vmaxnm.f32
+ %cmp = fcmp uge float %a, %b
+ %cond = select i1 %cmp, float %a, float %b
+ ret float %cond
+}
+
define float @fp-armv8_vmaxnm_u_rev(float %a, float %b) {
; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_u_rev":
; CHECK-FAST-NOT: vcmp
@@ -125,6 +207,302 @@ define float @fp-armv8_vmaxnm_u_rev(float %a, float %b) {
ret float %cond
}
+define double @fp-armv8_vmaxnm_ule_rev(double %a, double %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_ule_rev":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vmaxnm.f64
+; CHECK-LABEL: "fp-armv8_vmaxnm_ule_rev":
+; CHECK-NOT: vmaxnm.f64
+ %cmp = fcmp ule double %a, %b
+ %cond = select i1 %cmp, double %b, double %a
+ ret double %cond
+}
+
+; known non-NaNs
+
+define float @fp-armv8_vminnm_NNNo(float %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNo":
+; CHECK-FAST: vminnm.f32
+; CHECK-FAST: vminnm.f32
+; CHECK-LABEL: "fp-armv8_vminnm_NNNo":
+; CHECK: vminnm.f32
+; CHECK-NOT: vminnm.f32
+ %cmp1 = fcmp olt float %a, 12.
+ %cond1 = select i1 %cmp1, float %a, float 12.
+ %cmp2 = fcmp olt float 34., %cond1
+ %cond2 = select i1 %cmp2, float 34., float %cond1
+ ret float %cond2
+}
+
+define double @fp-armv8_vminnm_NNNole(double %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNole":
+; CHECK-FAST: vminnm.f64
+; CHECK-FAST: vminnm.f64
+; CHECK-LABEL: "fp-armv8_vminnm_NNNole":
+; CHECK: vminnm.f64
+; CHECK-NOT: vminnm.f64
+ %cmp1 = fcmp ole double %a, 34.
+ %cond1 = select i1 %cmp1, double %a, double 34.
+ %cmp2 = fcmp ole double 56., %cond1
+ %cond2 = select i1 %cmp2, double 56., double %cond1
+ ret double %cond2
+}
+
+define float @fp-armv8_vminnm_NNNo_rev(float %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNo_rev":
+; CHECK-FAST: vminnm.f32
+; CHECK-FAST: vminnm.f32
+; CHECK-LABEL: "fp-armv8_vminnm_NNNo_rev":
+; CHECK: vminnm.f32
+; CHECK-NOT: vminnm.f32
+ %cmp1 = fcmp ogt float %a, 56.
+ %cond1 = select i1 %cmp1, float 56., float %a
+ %cmp2 = fcmp ogt float 78., %cond1
+ %cond2 = select i1 %cmp2, float %cond1, float 78.
+ ret float %cond2
+}
+
+define double @fp-armv8_vminnm_NNNoge_rev(double %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNoge_rev":
+; CHECK-FAST: vminnm.f64
+; CHECK-FAST: vminnm.f64
+; CHECK-LABEL: "fp-armv8_vminnm_NNNoge_rev":
+; CHECK: vminnm.f64
+; CHECK-NOT: vminnm.f64
+ %cmp1 = fcmp oge double %a, 78.
+ %cond1 = select i1 %cmp1, double 78., double %a
+ %cmp2 = fcmp oge double 90., %cond1
+ %cond2 = select i1 %cmp2, double %cond1, double 90.
+ ret double %cond2
+}
+
+define float @fp-armv8_vminnm_NNNu(float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNu":
+; CHECK-FAST: vminnm.f32
+; CHECK-FAST: vminnm.f32
+; CHECK-LABEL: "fp-armv8_vminnm_NNNu":
+; CHECK: vminnm.f32
+; CHECK-NOT: vminnm.f32
+ %cmp1 = fcmp ult float 12., %b
+ %cond1 = select i1 %cmp1, float 12., float %b
+ %cmp2 = fcmp ult float %cond1, 34.
+ %cond2 = select i1 %cmp2, float %cond1, float 34.
+ ret float %cond2
+}
+
+define float @fp-armv8_vminnm_NNNule(float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNule":
+; CHECK-FAST: vminnm.f32
+; CHECK-FAST: vminnm.f32
+; CHECK-LABEL: "fp-armv8_vminnm_NNNule":
+; CHECK: vminnm.f32
+; CHECK-NOT: vminnm.f32
+ %cmp1 = fcmp ule float 34., %b
+ %cond1 = select i1 %cmp1, float 34., float %b
+ %cmp2 = fcmp ule float %cond1, 56.
+ %cond2 = select i1 %cmp2, float %cond1, float 56.
+ ret float %cond2
+}
+
+define float @fp-armv8_vminnm_NNNu_rev(float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNu_rev":
+; CHECK-FAST: vminnm.f32
+; CHECK-FAST: vminnm.f32
+; CHECK-LABEL: "fp-armv8_vminnm_NNNu_rev":
+; CHECK: vminnm.f32
+; CHECK-NOT: vminnm.f32
+ %cmp1 = fcmp ugt float 56., %b
+ %cond1 = select i1 %cmp1, float %b, float 56.
+ %cmp2 = fcmp ugt float %cond1, 78.
+ %cond2 = select i1 %cmp2, float 78., float %cond1
+ ret float %cond2
+}
+
+define double @fp-armv8_vminnm_NNNuge_rev(double %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNuge_rev":
+; CHECK-FAST: vminnm.f64
+; CHECK-FAST: vminnm.f64
+; CHECK-LABEL: "fp-armv8_vminnm_NNNuge_rev":
+; CHECK: vminnm.f64
+; CHECK-NOT: vminnm.f64
+ %cmp1 = fcmp uge double 78., %b
+ %cond1 = select i1 %cmp1, double %b, double 78.
+ %cmp2 = fcmp uge double %cond1, 90.
+ %cond2 = select i1 %cmp2, double 90., double %cond1
+ ret double %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNo(float %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNo":
+; CHECK-FAST: vmaxnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNo":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp ogt float %a, 12.
+ %cond1 = select i1 %cmp1, float %a, float 12.
+ %cmp2 = fcmp ogt float 34., %cond1
+ %cond2 = select i1 %cmp2, float 34., float %cond1
+ ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNoge(float %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNoge":
+; CHECK-FAST: vmaxnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNoge":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp oge float %a, 34.
+ %cond1 = select i1 %cmp1, float %a, float 34.
+ %cmp2 = fcmp oge float 56., %cond1
+ %cond2 = select i1 %cmp2, float 56., float %cond1
+ ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNo_rev(float %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNo_rev":
+; CHECK-FAST: vmaxnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNo_rev":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp olt float %a, 56.
+ %cond1 = select i1 %cmp1, float 56., float %a
+ %cmp2 = fcmp olt float 78., %cond1
+ %cond2 = select i1 %cmp2, float %cond1, float 78.
+ ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNole_rev(float %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNole_rev":
+; CHECK-FAST: vmaxnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNole_rev":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp ole float %a, 78.
+ %cond1 = select i1 %cmp1, float 78., float %a
+ %cmp2 = fcmp ole float 90., %cond1
+ %cond2 = select i1 %cmp2, float %cond1, float 90.
+ ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNu(float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNu":
+; CHECK-FAST: vmaxnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNu":
+; CHECK: vmaxnm.f32
+; CHEC-NOT: vmaxnm.f32
+ %cmp1 = fcmp ugt float 12., %b
+ %cond1 = select i1 %cmp1, float 12., float %b
+ %cmp2 = fcmp ugt float %cond1, 34.
+ %cond2 = select i1 %cmp2, float %cond1, float 34.
+ ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNuge(float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNuge":
+; CHECK-FAST: vmaxnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNuge":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp uge float 34., %b
+ %cond1 = select i1 %cmp1, float 34., float %b
+ %cmp2 = fcmp uge float %cond1, 56.
+ %cond2 = select i1 %cmp2, float %cond1, float 56.
+ ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNu_rev(float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNu_rev":
+; CHECK-FAST: vmaxnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNu_rev":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp ult float 56., %b
+ %cond1 = select i1 %cmp1, float %b, float 56.
+ %cmp2 = fcmp ult float %cond1, 78.
+ %cond2 = select i1 %cmp2, float 78., float %cond1
+ ret float %cond2
+}
+
+define double @fp-armv8_vmaxnm_NNNule_rev( double %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNule_rev":
+; CHECK-FAST: vmaxnm.f64
+; CHECK-FAST: vmaxnm.f64
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNule_rev":
+; CHECK: vmaxnm.f64
+; CHECK-NOT: vmaxnm.f64
+ %cmp1 = fcmp ule double 78., %b
+ %cond1 = select i1 %cmp1, double %b, double 78.
+ %cmp2 = fcmp ule double %cond1, 90.
+ %cond2 = select i1 %cmp2, double 90., double %cond1
+ ret double %cond2
+}
+
+define float @fp-armv8_vminmaxnm_0(float %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vminmaxnm_0":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vminmaxnm_0":
+; CHECK-NOT: vminnm.f32
+; CHECK: vmaxnm.f32
+ %cmp1 = fcmp olt float %a, 0.
+ %cond1 = select i1 %cmp1, float %a, float 0.
+ %cmp2 = fcmp ogt float %cond1, 0.
+ %cond2 = select i1 %cmp2, float %cond1, float 0.
+ ret float %cond2
+}
+
+define float @fp-armv8_vminmaxnm_neg0(float %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vminmaxnm_neg0":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vminmaxnm_neg0":
+; CHECK: vminnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp olt float %a, -0.
+ %cond1 = select i1 %cmp1, float %a, float -0.
+ %cmp2 = fcmp ogt float %cond1, -0.
+ %cond2 = select i1 %cmp2, float %cond1, float -0.
+ ret float %cond2
+}
+
+define float @fp-armv8_vminmaxnm_e_0(float %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vminmaxnm_e_0":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vminmaxnm_e_0":
+; CHECK-NOT: vminnm.f32
+; CHECK: vmaxnm.f32
+ %cmp1 = fcmp ule float 0., %a
+ %cond1 = select i1 %cmp1, float 0., float %a
+ %cmp2 = fcmp uge float 0., %cond1
+ %cond2 = select i1 %cmp2, float 0., float %cond1
+ ret float %cond2
+}
+
+define float @fp-armv8_vminmaxnm_e_neg0(float %a) {
+; CHECK-FAST-LABEL: "fp-armv8_vminmaxnm_e_neg0":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f32
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vminmaxnm_e_neg0":
+; CHECK: vminnm.f32
+; CHECK-NOT: vmaxnm.f32
+ %cmp1 = fcmp ule float -0., %a
+ %cond1 = select i1 %cmp1, float -0., float %a
+ %cmp2 = fcmp uge float -0., %cond1
+ %cond2 = select i1 %cmp2, float -0., float %cond1
+ ret float %cond2
+}
declare <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
declare <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmla.ll b/test/CodeGen/ARM/vmla.ll
index 6073fc5566fd..8ca33a9eecac 100644
--- a/test/CodeGen/ARM/vmla.ll
+++ b/test/CodeGen/ARM/vmla.ll
@@ -3,9 +3,9 @@
define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
;CHECK-LABEL: vmlai8:
;CHECK: vmla.i8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i8>, <8 x i8>* %C
%tmp4 = mul <8 x i8> %tmp2, %tmp3
%tmp5 = add <8 x i8> %tmp1, %tmp4
ret <8 x i8> %tmp5
@@ -14,9 +14,9 @@ define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
define <4 x i16> @vmlai16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
;CHECK-LABEL: vmlai16:
;CHECK: vmla.i16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i16>, <4 x i16>* %C
%tmp4 = mul <4 x i16> %tmp2, %tmp3
%tmp5 = add <4 x i16> %tmp1, %tmp4
ret <4 x i16> %tmp5
@@ -25,9 +25,9 @@ define <4 x i16> @vmlai16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
define <2 x i32> @vmlai32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
;CHECK-LABEL: vmlai32:
;CHECK: vmla.i32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i32>, <2 x i32>* %C
%tmp4 = mul <2 x i32> %tmp2, %tmp3
%tmp5 = add <2 x i32> %tmp1, %tmp4
ret <2 x i32> %tmp5
@@ -36,9 +36,9 @@ define <2 x i32> @vmlai32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
define <2 x float> @vmlaf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
;CHECK-LABEL: vmlaf32:
;CHECK: vmla.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
- %tmp3 = load <2 x float>* %C
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
+ %tmp3 = load <2 x float>, <2 x float>* %C
%tmp4 = fmul <2 x float> %tmp2, %tmp3
%tmp5 = fadd <2 x float> %tmp1, %tmp4
ret <2 x float> %tmp5
@@ -47,9 +47,9 @@ define <2 x float> @vmlaf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) n
define <16 x i8> @vmlaQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind {
;CHECK-LABEL: vmlaQi8:
;CHECK: vmla.i8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = load <16 x i8>* %C
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
+ %tmp3 = load <16 x i8>, <16 x i8>* %C
%tmp4 = mul <16 x i8> %tmp2, %tmp3
%tmp5 = add <16 x i8> %tmp1, %tmp4
ret <16 x i8> %tmp5
@@ -58,9 +58,9 @@ define <16 x i8> @vmlaQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind
define <8 x i16> @vmlaQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
;CHECK-LABEL: vmlaQi16:
;CHECK: vmla.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = load <8 x i16>* %C
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
+ %tmp3 = load <8 x i16>, <8 x i16>* %C
%tmp4 = mul <8 x i16> %tmp2, %tmp3
%tmp5 = add <8 x i16> %tmp1, %tmp4
ret <8 x i16> %tmp5
@@ -69,9 +69,9 @@ define <8 x i16> @vmlaQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
define <4 x i32> @vmlaQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: vmlaQi32:
;CHECK: vmla.i32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = load <4 x i32>* %C
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp4 = mul <4 x i32> %tmp2, %tmp3
%tmp5 = add <4 x i32> %tmp1, %tmp4
ret <4 x i32> %tmp5
@@ -80,9 +80,9 @@ define <4 x i32> @vmlaQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
define <4 x float> @vmlaQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
;CHECK-LABEL: vmlaQf32:
;CHECK: vmla.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
- %tmp3 = load <4 x float>* %C
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
+ %tmp3 = load <4 x float>, <4 x float>* %C
%tmp4 = fmul <4 x float> %tmp2, %tmp3
%tmp5 = fadd <4 x float> %tmp1, %tmp4
ret <4 x float> %tmp5
@@ -91,9 +91,9 @@ define <4 x float> @vmlaQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C)
define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
;CHECK-LABEL: vmlals8:
;CHECK: vmlal.s8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i8>, <8 x i8>* %C
%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
%tmp5 = sext <8 x i8> %tmp3 to <8 x i16>
%tmp6 = mul <8 x i16> %tmp4, %tmp5
@@ -104,9 +104,9 @@ define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
;CHECK-LABEL: vmlals16:
;CHECK: vmlal.s16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i16>, <4 x i16>* %C
%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
%tmp5 = sext <4 x i16> %tmp3 to <4 x i32>
%tmp6 = mul <4 x i32> %tmp4, %tmp5
@@ -117,9 +117,9 @@ define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
;CHECK-LABEL: vmlals32:
;CHECK: vmlal.s32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i32>, <2 x i32>* %C
%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
%tmp5 = sext <2 x i32> %tmp3 to <2 x i64>
%tmp6 = mul <2 x i64> %tmp4, %tmp5
@@ -130,9 +130,9 @@ define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
;CHECK-LABEL: vmlalu8:
;CHECK: vmlal.u8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i8>, <8 x i8>* %C
%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
%tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
%tmp6 = mul <8 x i16> %tmp4, %tmp5
@@ -143,9 +143,9 @@ define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
;CHECK-LABEL: vmlalu16:
;CHECK: vmlal.u16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i16>, <4 x i16>* %C
%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
%tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
%tmp6 = mul <4 x i32> %tmp4, %tmp5
@@ -156,9 +156,9 @@ define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
;CHECK-LABEL: vmlalu32:
;CHECK: vmlal.u32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i32>, <2 x i32>* %C
%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
%tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
%tmp6 = mul <2 x i64> %tmp4, %tmp5
diff --git a/test/CodeGen/ARM/vmls.ll b/test/CodeGen/ARM/vmls.ll
index f86739cea3f1..d14928147a36 100644
--- a/test/CodeGen/ARM/vmls.ll
+++ b/test/CodeGen/ARM/vmls.ll
@@ -3,9 +3,9 @@
define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
;CHECK-LABEL: vmlsi8:
;CHECK: vmls.i8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i8>, <8 x i8>* %C
%tmp4 = mul <8 x i8> %tmp2, %tmp3
%tmp5 = sub <8 x i8> %tmp1, %tmp4
ret <8 x i8> %tmp5
@@ -14,9 +14,9 @@ define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
define <4 x i16> @vmlsi16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
;CHECK-LABEL: vmlsi16:
;CHECK: vmls.i16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i16>, <4 x i16>* %C
%tmp4 = mul <4 x i16> %tmp2, %tmp3
%tmp5 = sub <4 x i16> %tmp1, %tmp4
ret <4 x i16> %tmp5
@@ -25,9 +25,9 @@ define <4 x i16> @vmlsi16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
define <2 x i32> @vmlsi32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
;CHECK-LABEL: vmlsi32:
;CHECK: vmls.i32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i32>, <2 x i32>* %C
%tmp4 = mul <2 x i32> %tmp2, %tmp3
%tmp5 = sub <2 x i32> %tmp1, %tmp4
ret <2 x i32> %tmp5
@@ -36,9 +36,9 @@ define <2 x i32> @vmlsi32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
define <2 x float> @vmlsf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
;CHECK-LABEL: vmlsf32:
;CHECK: vmls.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
- %tmp3 = load <2 x float>* %C
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
+ %tmp3 = load <2 x float>, <2 x float>* %C
%tmp4 = fmul <2 x float> %tmp2, %tmp3
%tmp5 = fsub <2 x float> %tmp1, %tmp4
ret <2 x float> %tmp5
@@ -47,9 +47,9 @@ define <2 x float> @vmlsf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) n
define <16 x i8> @vmlsQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind {
;CHECK-LABEL: vmlsQi8:
;CHECK: vmls.i8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
- %tmp3 = load <16 x i8>* %C
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
+ %tmp3 = load <16 x i8>, <16 x i8>* %C
%tmp4 = mul <16 x i8> %tmp2, %tmp3
%tmp5 = sub <16 x i8> %tmp1, %tmp4
ret <16 x i8> %tmp5
@@ -58,9 +58,9 @@ define <16 x i8> @vmlsQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind
define <8 x i16> @vmlsQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
;CHECK-LABEL: vmlsQi16:
;CHECK: vmls.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
- %tmp3 = load <8 x i16>* %C
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
+ %tmp3 = load <8 x i16>, <8 x i16>* %C
%tmp4 = mul <8 x i16> %tmp2, %tmp3
%tmp5 = sub <8 x i16> %tmp1, %tmp4
ret <8 x i16> %tmp5
@@ -69,9 +69,9 @@ define <8 x i16> @vmlsQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
define <4 x i32> @vmlsQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: vmlsQi32:
;CHECK: vmls.i32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
- %tmp3 = load <4 x i32>* %C
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
+ %tmp3 = load <4 x i32>, <4 x i32>* %C
%tmp4 = mul <4 x i32> %tmp2, %tmp3
%tmp5 = sub <4 x i32> %tmp1, %tmp4
ret <4 x i32> %tmp5
@@ -80,9 +80,9 @@ define <4 x i32> @vmlsQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
define <4 x float> @vmlsQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
;CHECK-LABEL: vmlsQf32:
;CHECK: vmls.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
- %tmp3 = load <4 x float>* %C
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
+ %tmp3 = load <4 x float>, <4 x float>* %C
%tmp4 = fmul <4 x float> %tmp2, %tmp3
%tmp5 = fsub <4 x float> %tmp1, %tmp4
ret <4 x float> %tmp5
@@ -91,9 +91,9 @@ define <4 x float> @vmlsQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C)
define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
;CHECK-LABEL: vmlsls8:
;CHECK: vmlsl.s8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i8>, <8 x i8>* %C
%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
%tmp5 = sext <8 x i8> %tmp3 to <8 x i16>
%tmp6 = mul <8 x i16> %tmp4, %tmp5
@@ -104,9 +104,9 @@ define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
;CHECK-LABEL: vmlsls16:
;CHECK: vmlsl.s16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i16>, <4 x i16>* %C
%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
%tmp5 = sext <4 x i16> %tmp3 to <4 x i32>
%tmp6 = mul <4 x i32> %tmp4, %tmp5
@@ -117,9 +117,9 @@ define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
;CHECK-LABEL: vmlsls32:
;CHECK: vmlsl.s32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i32>, <2 x i32>* %C
%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
%tmp5 = sext <2 x i32> %tmp3 to <2 x i64>
%tmp6 = mul <2 x i64> %tmp4, %tmp5
@@ -130,9 +130,9 @@ define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
;CHECK-LABEL: vmlslu8:
;CHECK: vmlsl.u8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i8>, <8 x i8>* %C
%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
%tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
%tmp6 = mul <8 x i16> %tmp4, %tmp5
@@ -143,9 +143,9 @@ define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
;CHECK-LABEL: vmlslu16:
;CHECK: vmlsl.u16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i16>, <4 x i16>* %C
%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
%tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
%tmp6 = mul <4 x i32> %tmp4, %tmp5
@@ -156,9 +156,9 @@ define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
;CHECK-LABEL: vmlslu32:
;CHECK: vmlsl.u32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i32>, <2 x i32>* %C
%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
%tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
%tmp6 = mul <2 x i64> %tmp4, %tmp5
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index 7900af44ef08..b7a23b7bb59c 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -174,7 +174,7 @@ define void @vdupn128(%struct.int8x8_t* noalias nocapture sret %agg.result) noun
entry:
;CHECK-LABEL: vdupn128:
;CHECK: vmov.i8 d{{.*}}, #0x80
- %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
+ %0 = getelementptr inbounds %struct.int8x8_t, %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
store <8 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>, <8 x i8>* %0, align 8
ret void
}
@@ -183,7 +183,7 @@ define void @vdupnneg75(%struct.int8x8_t* noalias nocapture sret %agg.result) no
entry:
;CHECK-LABEL: vdupnneg75:
;CHECK: vmov.i8 d{{.*}}, #0xb5
- %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
+ %0 = getelementptr inbounds %struct.int8x8_t, %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
store <8 x i8> <i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75>, <8 x i8>* %0, align 8
ret void
}
@@ -191,7 +191,7 @@ entry:
define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vmovls8:
;CHECK: vmovl.s8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
ret <8 x i16> %tmp2
}
@@ -199,7 +199,7 @@ define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vmovls16:
;CHECK: vmovl.s16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
ret <4 x i32> %tmp2
}
@@ -207,7 +207,7 @@ define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vmovls32:
;CHECK: vmovl.s32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
ret <2 x i64> %tmp2
}
@@ -215,7 +215,7 @@ define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vmovlu8:
;CHECK: vmovl.u8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
ret <8 x i16> %tmp2
}
@@ -223,7 +223,7 @@ define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vmovlu16:
;CHECK: vmovl.u16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
ret <4 x i32> %tmp2
}
@@ -231,7 +231,7 @@ define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vmovlu32:
;CHECK: vmovl.u32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
ret <2 x i64> %tmp2
}
@@ -239,7 +239,7 @@ define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vmovni16:
;CHECK: vmovn.i16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = trunc <8 x i16> %tmp1 to <8 x i8>
ret <8 x i8> %tmp2
}
@@ -247,7 +247,7 @@ define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vmovni32:
;CHECK: vmovn.i32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
ret <4 x i16> %tmp2
}
@@ -255,7 +255,7 @@ define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vmovni64:
;CHECK: vmovn.i64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = trunc <2 x i64> %tmp1 to <2 x i32>
ret <2 x i32> %tmp2
}
@@ -263,7 +263,7 @@ define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vqmovns16:
;CHECK: vqmovn.s16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1)
ret <8 x i8> %tmp2
}
@@ -271,7 +271,7 @@ define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vqmovns32:
;CHECK: vqmovn.s32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1)
ret <4 x i16> %tmp2
}
@@ -279,7 +279,7 @@ define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind {
define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vqmovns64:
;CHECK: vqmovn.s64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1)
ret <2 x i32> %tmp2
}
@@ -287,7 +287,7 @@ define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind {
define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vqmovnu16:
;CHECK: vqmovn.u16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1)
ret <8 x i8> %tmp2
}
@@ -295,7 +295,7 @@ define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind {
define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vqmovnu32:
;CHECK: vqmovn.u32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1)
ret <4 x i16> %tmp2
}
@@ -303,7 +303,7 @@ define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind {
define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vqmovnu64:
;CHECK: vqmovn.u64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1)
ret <2 x i32> %tmp2
}
@@ -311,7 +311,7 @@ define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind {
define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vqmovuns16:
;CHECK: vqmovun.s16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1)
ret <8 x i8> %tmp2
}
@@ -319,7 +319,7 @@ define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind {
define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vqmovuns32:
;CHECK: vqmovun.s32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1)
ret <4 x i16> %tmp2
}
@@ -327,7 +327,7 @@ define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind {
define <2 x i32> @vqmovuns64(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vqmovuns64:
;CHECK: vqmovun.s64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1)
ret <2 x i32> %tmp2
}
@@ -348,7 +348,7 @@ declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) nounwind readnone
; Radar 8598391.
define void @noTruncStore(<4 x i32>* %a, <4 x i16>* %b) nounwind {
;CHECK: vmovn
- %tmp1 = load <4 x i32>* %a, align 16
+ %tmp1 = load <4 x i32>, <4 x i32>* %a, align 16
%tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
store <4 x i16> %tmp2, <4 x i16>* %b, align 8
ret void
@@ -376,7 +376,7 @@ define void @v_mov_v4f32_undef(<4 x float> * nocapture %p) nounwind {
entry:
;CHECK-LABEL: v_mov_v4f32_undef:
;CHECK: vmov.f32 q{{.*}}, #1.000000e+00
- %a = load <4 x float> *%p
+ %a = load <4 x float> , <4 x float> *%p
%b = fadd <4 x float> %a, <float undef, float 1.0, float 1.0, float 1.0>
store <4 x float> %b, <4 x float> *%p
ret void
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index 0fa43d801bbe..c3e41cacde4c 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -3,8 +3,8 @@
define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vmuli8:
;CHECK: vmul.i8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = mul <8 x i8> %tmp1, %tmp2
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vmuli16:
;CHECK: vmul.i16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = mul <4 x i16> %tmp1, %tmp2
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vmuli32:
;CHECK: vmul.i32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = mul <2 x i32> %tmp1, %tmp2
ret <2 x i32> %tmp3
}
@@ -30,8 +30,8 @@ define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vmulf32:
;CHECK: vmul.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = fmul <2 x float> %tmp1, %tmp2
ret <2 x float> %tmp3
}
@@ -39,8 +39,8 @@ define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind {
define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vmulp8:
;CHECK: vmul.p8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -48,8 +48,8 @@ define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vmulQi8:
;CHECK: vmul.i8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = mul <16 x i8> %tmp1, %tmp2
ret <16 x i8> %tmp3
}
@@ -57,8 +57,8 @@ define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vmulQi16:
;CHECK: vmul.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = mul <8 x i16> %tmp1, %tmp2
ret <8 x i16> %tmp3
}
@@ -66,8 +66,8 @@ define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vmulQi32:
;CHECK: vmul.i32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = mul <4 x i32> %tmp1, %tmp2
ret <4 x i32> %tmp3
}
@@ -75,8 +75,8 @@ define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vmulQf32:
;CHECK: vmul.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = fmul <4 x float> %tmp1, %tmp2
ret <4 x float> %tmp3
}
@@ -84,8 +84,8 @@ define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
define <16 x i8> @vmulQp8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vmulQp8:
;CHECK: vmul.p8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -150,8 +150,8 @@ entry:
define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vmulls8:
;CHECK: vmull.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
%tmp5 = mul <8 x i16> %tmp3, %tmp4
@@ -161,8 +161,8 @@ define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <8 x i16> @vmulls8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vmulls8_int:
;CHECK: vmull.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i16> %tmp3
}
@@ -170,8 +170,8 @@ define <8 x i16> @vmulls8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vmulls16:
;CHECK: vmull.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
%tmp5 = mul <4 x i32> %tmp3, %tmp4
@@ -181,8 +181,8 @@ define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <4 x i32> @vmulls16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vmulls16_int:
;CHECK: vmull.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i32> %tmp3
}
@@ -190,8 +190,8 @@ define <4 x i32> @vmulls16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vmulls32:
;CHECK: vmull.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
%tmp5 = mul <2 x i64> %tmp3, %tmp4
@@ -201,8 +201,8 @@ define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <2 x i64> @vmulls32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vmulls32_int:
;CHECK: vmull.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i64> %tmp3
}
@@ -210,8 +210,8 @@ define <2 x i64> @vmulls32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vmullu8:
;CHECK: vmull.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
%tmp5 = mul <8 x i16> %tmp3, %tmp4
@@ -221,8 +221,8 @@ define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <8 x i16> @vmullu8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vmullu8_int:
;CHECK: vmull.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i16> %tmp3
}
@@ -230,8 +230,8 @@ define <8 x i16> @vmullu8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vmullu16:
;CHECK: vmull.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
%tmp5 = mul <4 x i32> %tmp3, %tmp4
@@ -241,8 +241,8 @@ define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <4 x i32> @vmullu16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vmullu16_int:
;CHECK: vmull.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i32> %tmp3
}
@@ -250,8 +250,8 @@ define <4 x i32> @vmullu16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vmullu32:
;CHECK: vmull.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
%tmp5 = mul <2 x i64> %tmp3, %tmp4
@@ -261,8 +261,8 @@ define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <2 x i64> @vmullu32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vmullu32_int:
;CHECK: vmull.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i64> %tmp3
}
@@ -270,8 +270,8 @@ define <2 x i64> @vmullu32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vmullp8:
;CHECK: vmull.p8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i16> %tmp3
}
@@ -488,7 +488,7 @@ entry:
%8 = bitcast double %7 to <8 x i8>
%9 = add <8 x i8> %6, %8
%10 = mul <8 x i8> %9, %2
- %11 = getelementptr inbounds %struct.uint8x8_t* %dst, i32 0, i32 0
+ %11 = getelementptr inbounds %struct.uint8x8_t, %struct.uint8x8_t* %dst, i32 0, i32 0
store <8 x i8> %10, <8 x i8>* %11, align 8
ret void
}
@@ -510,7 +510,7 @@ entry:
%8 = bitcast double %7 to <8 x i8>
%9 = add <8 x i8> %6, %8
%10 = mul <8 x i8> %2, %9
- %11 = getelementptr inbounds %struct.uint8x8_t* %dst, i32 0, i32 0
+ %11 = getelementptr inbounds %struct.uint8x8_t, %struct.uint8x8_t* %dst, i32 0, i32 0
store <8 x i8> %10, <8 x i8>* %11, align 8
ret void
}
@@ -560,7 +560,7 @@ for.body33.lr.ph: ; preds = %for.body
for.body33: ; preds = %for.body33, %for.body33.lr.ph
%add45 = add i32 undef, undef
%vld155 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* undef, i32 1)
- %0 = load i32** undef, align 4
+ %0 = load i32*, i32** undef, align 4
%shuffle.i250 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
%1 = bitcast <1 x i64> %shuffle.i250 to <8 x i8>
%vmovl.i249 = zext <8 x i8> %1 to <8 x i16>
@@ -616,7 +616,7 @@ declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
; PR15970
define void @no_illegal_types_vmull_sext(<4 x i32> %a) {
entry:
- %wide.load283.i = load <4 x i8>* undef, align 1
+ %wide.load283.i = load <4 x i8>, <4 x i8>* undef, align 1
%0 = sext <4 x i8> %wide.load283.i to <4 x i32>
%1 = sub nsw <4 x i32> %0, %a
%2 = mul nsw <4 x i32> %1, %1
@@ -626,7 +626,7 @@ entry:
}
define void @no_illegal_types_vmull_zext(<4 x i32> %a) {
entry:
- %wide.load283.i = load <4 x i8>* undef, align 1
+ %wide.load283.i = load <4 x i8>, <4 x i8>* undef, align 1
%0 = zext <4 x i8> %wide.load283.i to <4 x i32>
%1 = sub nsw <4 x i32> %0, %a
%2 = mul nsw <4 x i32> %1, %1
@@ -642,8 +642,8 @@ define void @foo(<4 x float> * %a, <4 x float>* nocapture %dst, float* nocapture
; and used a vector * scalar instruction.
; CHECK: vldr {{s[0-9]+}}, [r2]
; CHECK: vmul.f32 q8, q8, d0[0]
- %tmp = load float* %src, align 4
- %tmp5 = load <4 x float>* %a, align 4
+ %tmp = load float, float* %src, align 4
+ %tmp5 = load <4 x float>, <4 x float>* %a, align 4
%tmp6 = insertelement <4 x float> undef, float %tmp, i32 0
%tmp7 = insertelement <4 x float> %tmp6, float %tmp, i32 1
%tmp8 = insertelement <4 x float> %tmp7, float %tmp, i32 2
diff --git a/test/CodeGen/ARM/vneg.ll b/test/CodeGen/ARM/vneg.ll
index 4d548ddf8141..24a585f65a4b 100644
--- a/test/CodeGen/ARM/vneg.ll
+++ b/test/CodeGen/ARM/vneg.ll
@@ -3,7 +3,7 @@
define <8 x i8> @vnegs8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vnegs8:
;CHECK: vneg.s8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = sub <8 x i8> zeroinitializer, %tmp1
ret <8 x i8> %tmp2
}
@@ -11,7 +11,7 @@ define <8 x i8> @vnegs8(<8 x i8>* %A) nounwind {
define <4 x i16> @vnegs16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vnegs16:
;CHECK: vneg.s16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = sub <4 x i16> zeroinitializer, %tmp1
ret <4 x i16> %tmp2
}
@@ -19,7 +19,7 @@ define <4 x i16> @vnegs16(<4 x i16>* %A) nounwind {
define <2 x i32> @vnegs32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vnegs32:
;CHECK: vneg.s32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = sub <2 x i32> zeroinitializer, %tmp1
ret <2 x i32> %tmp2
}
@@ -27,7 +27,7 @@ define <2 x i32> @vnegs32(<2 x i32>* %A) nounwind {
define <2 x float> @vnegf32(<2 x float>* %A) nounwind {
;CHECK-LABEL: vnegf32:
;CHECK: vneg.f32
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = fsub <2 x float> < float -0.000000e+00, float -0.000000e+00 >, %tmp1
ret <2 x float> %tmp2
}
@@ -35,7 +35,7 @@ define <2 x float> @vnegf32(<2 x float>* %A) nounwind {
define <16 x i8> @vnegQs8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vnegQs8:
;CHECK: vneg.s8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = sub <16 x i8> zeroinitializer, %tmp1
ret <16 x i8> %tmp2
}
@@ -43,7 +43,7 @@ define <16 x i8> @vnegQs8(<16 x i8>* %A) nounwind {
define <8 x i16> @vnegQs16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vnegQs16:
;CHECK: vneg.s16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = sub <8 x i16> zeroinitializer, %tmp1
ret <8 x i16> %tmp2
}
@@ -51,7 +51,7 @@ define <8 x i16> @vnegQs16(<8 x i16>* %A) nounwind {
define <4 x i32> @vnegQs32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vnegQs32:
;CHECK: vneg.s32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = sub <4 x i32> zeroinitializer, %tmp1
ret <4 x i32> %tmp2
}
@@ -59,7 +59,7 @@ define <4 x i32> @vnegQs32(<4 x i32>* %A) nounwind {
define <4 x float> @vnegQf32(<4 x float>* %A) nounwind {
;CHECK-LABEL: vnegQf32:
;CHECK: vneg.f32
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1
ret <4 x float> %tmp2
}
@@ -67,7 +67,7 @@ define <4 x float> @vnegQf32(<4 x float>* %A) nounwind {
define <8 x i8> @vqnegs8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vqnegs8:
;CHECK: vqneg.s8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %tmp1)
ret <8 x i8> %tmp2
}
@@ -75,7 +75,7 @@ define <8 x i8> @vqnegs8(<8 x i8>* %A) nounwind {
define <4 x i16> @vqnegs16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vqnegs16:
;CHECK: vqneg.s16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %tmp1)
ret <4 x i16> %tmp2
}
@@ -83,7 +83,7 @@ define <4 x i16> @vqnegs16(<4 x i16>* %A) nounwind {
define <2 x i32> @vqnegs32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vqnegs32:
;CHECK: vqneg.s32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %tmp1)
ret <2 x i32> %tmp2
}
@@ -91,7 +91,7 @@ define <2 x i32> @vqnegs32(<2 x i32>* %A) nounwind {
define <16 x i8> @vqnegQs8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vqnegQs8:
;CHECK: vqneg.s8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %tmp1)
ret <16 x i8> %tmp2
}
@@ -99,7 +99,7 @@ define <16 x i8> @vqnegQs8(<16 x i8>* %A) nounwind {
define <8 x i16> @vqnegQs16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vqnegQs16:
;CHECK: vqneg.s16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %tmp1)
ret <8 x i16> %tmp2
}
@@ -107,7 +107,7 @@ define <8 x i16> @vqnegQs16(<8 x i16>* %A) nounwind {
define <4 x i32> @vqnegQs32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vqnegQs32:
;CHECK: vqneg.s32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %tmp1)
ret <4 x i32> %tmp2
}
diff --git a/test/CodeGen/ARM/vpadal.ll b/test/CodeGen/ARM/vpadal.ll
index ffeac737fa36..ffb69243b884 100644
--- a/test/CodeGen/ARM/vpadal.ll
+++ b/test/CodeGen/ARM/vpadal.ll
@@ -3,8 +3,8 @@
define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vpadals8:
;CHECK: vpadal.s8
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2)
ret <4 x i16> %tmp3
}
@@ -12,8 +12,8 @@ define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
define <2 x i32> @vpadals16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vpadals16:
;CHECK: vpadal.s16
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2)
ret <2 x i32> %tmp3
}
@@ -21,8 +21,8 @@ define <2 x i32> @vpadals16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
define <1 x i64> @vpadals32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vpadals32:
;CHECK: vpadal.s32
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2)
ret <1 x i64> %tmp3
}
@@ -30,8 +30,8 @@ define <1 x i64> @vpadals32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
define <4 x i16> @vpadalu8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vpadalu8:
;CHECK: vpadal.u8
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2)
ret <4 x i16> %tmp3
}
@@ -39,8 +39,8 @@ define <4 x i16> @vpadalu8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
define <2 x i32> @vpadalu16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vpadalu16:
;CHECK: vpadal.u16
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2)
ret <2 x i32> %tmp3
}
@@ -48,8 +48,8 @@ define <2 x i32> @vpadalu16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
define <1 x i64> @vpadalu32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vpadalu32:
;CHECK: vpadal.u32
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2)
ret <1 x i64> %tmp3
}
@@ -57,8 +57,8 @@ define <1 x i64> @vpadalu32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @vpadalQs8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vpadalQs8:
;CHECK: vpadal.s8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2)
ret <8 x i16> %tmp3
}
@@ -66,8 +66,8 @@ define <8 x i16> @vpadalQs8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
define <4 x i32> @vpadalQs16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vpadalQs16:
;CHECK: vpadal.s16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2)
ret <4 x i32> %tmp3
}
@@ -75,8 +75,8 @@ define <4 x i32> @vpadalQs16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
define <2 x i64> @vpadalQs32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vpadalQs32:
;CHECK: vpadal.s32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2)
ret <2 x i64> %tmp3
}
@@ -84,8 +84,8 @@ define <2 x i64> @vpadalQs32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
define <8 x i16> @vpadalQu8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vpadalQu8:
;CHECK: vpadal.u8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2)
ret <8 x i16> %tmp3
}
@@ -93,8 +93,8 @@ define <8 x i16> @vpadalQu8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
define <4 x i32> @vpadalQu16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vpadalQu16:
;CHECK: vpadal.u16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2)
ret <4 x i32> %tmp3
}
@@ -102,8 +102,8 @@ define <4 x i32> @vpadalQu16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
define <2 x i64> @vpadalQu32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vpadalQu32:
;CHECK: vpadal.u32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2)
ret <2 x i64> %tmp3
}
diff --git a/test/CodeGen/ARM/vpadd.ll b/test/CodeGen/ARM/vpadd.ll
index 01cb1c74e38e..e362ce36f8ba 100644
--- a/test/CodeGen/ARM/vpadd.ll
+++ b/test/CodeGen/ARM/vpadd.ll
@@ -3,8 +3,8 @@
define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vpaddi8:
;CHECK: vpadd.i8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vpaddi16:
;CHECK: vpadd.i16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vpaddi32:
;CHECK: vpadd.i32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -30,8 +30,8 @@ define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <2 x float> @vpaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vpaddf32:
;CHECK: vpadd.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -45,7 +45,7 @@ declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwin
define <4 x i16> @vpaddls8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vpaddls8:
;CHECK: vpaddl.s8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %tmp1)
ret <4 x i16> %tmp2
}
@@ -53,7 +53,7 @@ define <4 x i16> @vpaddls8(<8 x i8>* %A) nounwind {
define <2 x i32> @vpaddls16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vpaddls16:
;CHECK: vpaddl.s16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %tmp1)
ret <2 x i32> %tmp2
}
@@ -61,7 +61,7 @@ define <2 x i32> @vpaddls16(<4 x i16>* %A) nounwind {
define <1 x i64> @vpaddls32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vpaddls32:
;CHECK: vpaddl.s32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %tmp1)
ret <1 x i64> %tmp2
}
@@ -69,7 +69,7 @@ define <1 x i64> @vpaddls32(<2 x i32>* %A) nounwind {
define <4 x i16> @vpaddlu8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vpaddlu8:
;CHECK: vpaddl.u8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %tmp1)
ret <4 x i16> %tmp2
}
@@ -77,7 +77,7 @@ define <4 x i16> @vpaddlu8(<8 x i8>* %A) nounwind {
define <2 x i32> @vpaddlu16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vpaddlu16:
;CHECK: vpaddl.u16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %tmp1)
ret <2 x i32> %tmp2
}
@@ -85,7 +85,7 @@ define <2 x i32> @vpaddlu16(<4 x i16>* %A) nounwind {
define <1 x i64> @vpaddlu32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vpaddlu32:
;CHECK: vpaddl.u32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %tmp1)
ret <1 x i64> %tmp2
}
@@ -93,7 +93,7 @@ define <1 x i64> @vpaddlu32(<2 x i32>* %A) nounwind {
define <8 x i16> @vpaddlQs8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vpaddlQs8:
;CHECK: vpaddl.s8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %tmp1)
ret <8 x i16> %tmp2
}
@@ -101,7 +101,7 @@ define <8 x i16> @vpaddlQs8(<16 x i8>* %A) nounwind {
define <4 x i32> @vpaddlQs16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vpaddlQs16:
;CHECK: vpaddl.s16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %tmp1)
ret <4 x i32> %tmp2
}
@@ -109,7 +109,7 @@ define <4 x i32> @vpaddlQs16(<8 x i16>* %A) nounwind {
define <2 x i64> @vpaddlQs32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vpaddlQs32:
;CHECK: vpaddl.s32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %tmp1)
ret <2 x i64> %tmp2
}
@@ -117,7 +117,7 @@ define <2 x i64> @vpaddlQs32(<4 x i32>* %A) nounwind {
define <8 x i16> @vpaddlQu8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vpaddlQu8:
;CHECK: vpaddl.u8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %tmp1)
ret <8 x i16> %tmp2
}
@@ -125,7 +125,7 @@ define <8 x i16> @vpaddlQu8(<16 x i8>* %A) nounwind {
define <4 x i32> @vpaddlQu16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vpaddlQu16:
;CHECK: vpaddl.u16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %tmp1)
ret <4 x i32> %tmp2
}
@@ -133,7 +133,7 @@ define <4 x i32> @vpaddlQu16(<8 x i16>* %A) nounwind {
define <2 x i64> @vpaddlQu32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vpaddlQu32:
;CHECK: vpaddl.u32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %tmp1)
ret <2 x i64> %tmp2
}
@@ -143,9 +143,9 @@ define void @addCombineToVPADDL() nounwind ssp {
; CHECK: vpaddl.s8
%cbcr = alloca <16 x i8>, align 16
%X = alloca <8 x i8>, align 8
- %tmp = load <16 x i8>* %cbcr
+ %tmp = load <16 x i8>, <16 x i8>* %cbcr
%tmp1 = shufflevector <16 x i8> %tmp, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
- %tmp2 = load <16 x i8>* %cbcr
+ %tmp2 = load <16 x i8>, <16 x i8>* %cbcr
%tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
%add = add <8 x i8> %tmp3, %tmp1
store <8 x i8> %add, <8 x i8>* %X, align 8
diff --git a/test/CodeGen/ARM/vpminmax.ll b/test/CodeGen/ARM/vpminmax.ll
index 0b893e5bc892..9ea8c69612c5 100644
--- a/test/CodeGen/ARM/vpminmax.ll
+++ b/test/CodeGen/ARM/vpminmax.ll
@@ -3,8 +3,8 @@
define <8 x i8> @vpmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vpmins8:
;CHECK: vpmin.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @vpmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vpmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vpmins16:
;CHECK: vpmin.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @vpmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vpmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vpmins32:
;CHECK: vpmin.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -30,8 +30,8 @@ define <2 x i32> @vpmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i8> @vpminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vpminu8:
;CHECK: vpmin.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -39,8 +39,8 @@ define <8 x i8> @vpminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vpminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vpminu16:
;CHECK: vpmin.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -48,8 +48,8 @@ define <4 x i16> @vpminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vpminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vpminu32:
;CHECK: vpmin.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -57,8 +57,8 @@ define <2 x i32> @vpminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <2 x float> @vpminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vpminf32:
;CHECK: vpmin.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -76,8 +76,8 @@ declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwi
define <8 x i8> @vpmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vpmaxs8:
;CHECK: vpmax.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -85,8 +85,8 @@ define <8 x i8> @vpmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vpmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vpmaxs16:
;CHECK: vpmax.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -94,8 +94,8 @@ define <4 x i16> @vpmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vpmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vpmaxs32:
;CHECK: vpmax.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -103,8 +103,8 @@ define <2 x i32> @vpmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i8> @vpmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vpmaxu8:
;CHECK: vpmax.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -112,8 +112,8 @@ define <8 x i8> @vpmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vpmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vpmaxu16:
;CHECK: vpmax.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -121,8 +121,8 @@ define <4 x i16> @vpmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vpmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vpmaxu32:
;CHECK: vpmax.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -130,8 +130,8 @@ define <2 x i32> @vpmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <2 x float> @vpmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vpmaxf32:
;CHECK: vpmax.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
diff --git a/test/CodeGen/ARM/vqadd.ll b/test/CodeGen/ARM/vqadd.ll
index 81acc8bc5abb..d1e90cb20944 100644
--- a/test/CodeGen/ARM/vqadd.ll
+++ b/test/CodeGen/ARM/vqadd.ll
@@ -3,8 +3,8 @@
define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vqadds8:
;CHECK: vqadd.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vqadds16:
;CHECK: vqadd.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vqadds32:
;CHECK: vqadd.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -30,8 +30,8 @@ define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vqadds64:
;CHECK: vqadd.s64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
@@ -39,8 +39,8 @@ define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vqaddu8:
;CHECK: vqadd.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -48,8 +48,8 @@ define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vqaddu16:
;CHECK: vqadd.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -57,8 +57,8 @@ define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vqaddu32:
;CHECK: vqadd.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -66,8 +66,8 @@ define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vqaddu64:
;CHECK: vqadd.u64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
@@ -75,8 +75,8 @@ define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vqaddQs8:
;CHECK: vqadd.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -84,8 +84,8 @@ define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vqaddQs16:
;CHECK: vqadd.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -93,8 +93,8 @@ define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vqaddQs32:
;CHECK: vqadd.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -102,8 +102,8 @@ define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vqaddQs64:
;CHECK: vqadd.s64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
@@ -111,8 +111,8 @@ define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vqaddQu8:
;CHECK: vqadd.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -120,8 +120,8 @@ define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vqaddQu16:
;CHECK: vqadd.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -129,8 +129,8 @@ define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vqaddQu32:
;CHECK: vqadd.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -138,8 +138,8 @@ define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vqaddQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vqaddQu64:
;CHECK: vqadd.u64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
diff --git a/test/CodeGen/ARM/vqdmul.ll b/test/CodeGen/ARM/vqdmul.ll
index d298167d3a91..6da080012a1e 100644
--- a/test/CodeGen/ARM/vqdmul.ll
+++ b/test/CodeGen/ARM/vqdmul.ll
@@ -5,8 +5,8 @@ target triple = "thumbv7-elf"
define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vqdmulhs16:
;CHECK: vqdmulh.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -14,8 +14,8 @@ define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vqdmulhs32:
;CHECK: vqdmulh.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -23,8 +23,8 @@ define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vqdmulhQs16:
;CHECK: vqdmulh.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -32,8 +32,8 @@ define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vqdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vqdmulhQs32:
;CHECK: vqdmulh.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -83,8 +83,8 @@ declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind re
define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vqrdmulhs16:
;CHECK: vqrdmulh.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -92,8 +92,8 @@ define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vqrdmulhs32:
;CHECK: vqrdmulh.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -101,8 +101,8 @@ define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vqrdmulhQs16:
;CHECK: vqrdmulh.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -110,8 +110,8 @@ define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vqrdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vqrdmulhQs32:
;CHECK: vqrdmulh.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -161,8 +161,8 @@ declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind r
define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vqdmulls16:
;CHECK: vqdmull.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i32> %tmp3
}
@@ -170,8 +170,8 @@ define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @vqdmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vqdmulls32:
;CHECK: vqdmull.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i64> %tmp3
}
@@ -200,9 +200,9 @@ declare <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind r
define <4 x i32> @vqdmlals16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
;CHECK-LABEL: vqdmlals16_natural:
;CHECK: vqdmlal.s16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i16>, <4 x i16>* %C
%tmp4 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp2, <4 x i16> %tmp3)
%tmp5 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp4)
ret <4 x i32> %tmp5
@@ -211,9 +211,9 @@ define <4 x i32> @vqdmlals16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C
define <2 x i64> @vqdmlals32_natural(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
;CHECK-LABEL: vqdmlals32_natural:
;CHECK: vqdmlal.s32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i32>, <2 x i32>* %C
%tmp4 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp2, <2 x i32> %tmp3)
%tmp5 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp4)
ret <2 x i64> %tmp5
@@ -245,9 +245,9 @@ declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) nounwind re
define <4 x i32> @vqdmlsls16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
;CHECK-LABEL: vqdmlsls16_natural:
;CHECK: vqdmlsl.s16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
- %tmp3 = load <4 x i16>* %C
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
+ %tmp3 = load <4 x i16>, <4 x i16>* %C
%tmp4 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp2, <4 x i16> %tmp3)
%tmp5 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp4)
ret <4 x i32> %tmp5
@@ -256,9 +256,9 @@ define <4 x i32> @vqdmlsls16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C
define <2 x i64> @vqdmlsls32_natural(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
;CHECK-LABEL: vqdmlsls32_natural:
;CHECK: vqdmlsl.s32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
- %tmp3 = load <2 x i32>* %C
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
+ %tmp3 = load <2 x i32>, <2 x i32>* %C
%tmp4 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp2, <2 x i32> %tmp3)
%tmp5 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp4)
ret <2 x i64> %tmp5
diff --git a/test/CodeGen/ARM/vqshl.ll b/test/CodeGen/ARM/vqshl.ll
index 4afef6dbd658..6a6d9af7a2b3 100644
--- a/test/CodeGen/ARM/vqshl.ll
+++ b/test/CodeGen/ARM/vqshl.ll
@@ -3,8 +3,8 @@
define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vqshls8:
;CHECK: vqshl.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vqshls16:
;CHECK: vqshl.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vqshls32:
;CHECK: vqshl.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -30,8 +30,8 @@ define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vqshls64:
;CHECK: vqshl.s64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
@@ -39,8 +39,8 @@ define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vqshlu8:
;CHECK: vqshl.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -48,8 +48,8 @@ define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vqshlu16:
;CHECK: vqshl.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -57,8 +57,8 @@ define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vqshlu32:
;CHECK: vqshl.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -66,8 +66,8 @@ define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vqshlu64:
;CHECK: vqshl.u64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
@@ -75,8 +75,8 @@ define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vqshlQs8:
;CHECK: vqshl.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -84,8 +84,8 @@ define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vqshlQs16:
;CHECK: vqshl.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -93,8 +93,8 @@ define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vqshlQs32:
;CHECK: vqshl.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -102,8 +102,8 @@ define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vqshlQs64:
;CHECK: vqshl.s64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
@@ -111,8 +111,8 @@ define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vqshlQu8:
;CHECK: vqshl.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -120,8 +120,8 @@ define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vqshlQu16:
;CHECK: vqshl.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -129,8 +129,8 @@ define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vqshlQu32:
;CHECK: vqshl.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -138,8 +138,8 @@ define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vqshlQu64:
;CHECK: vqshl.u64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
@@ -147,7 +147,7 @@ define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vqshls_n8:
;CHECK: vqshl.s8{{.*#7}}
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
ret <8 x i8> %tmp2
}
@@ -155,7 +155,7 @@ define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind {
define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vqshls_n16:
;CHECK: vqshl.s16{{.*#15}}
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
ret <4 x i16> %tmp2
}
@@ -163,7 +163,7 @@ define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind {
define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vqshls_n32:
;CHECK: vqshl.s32{{.*#31}}
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
ret <2 x i32> %tmp2
}
@@ -171,7 +171,7 @@ define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind {
define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind {
;CHECK-LABEL: vqshls_n64:
;CHECK: vqshl.s64{{.*#63}}
- %tmp1 = load <1 x i64>* %A
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
ret <1 x i64> %tmp2
}
@@ -179,7 +179,7 @@ define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind {
define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vqshlu_n8:
;CHECK: vqshl.u8{{.*#7}}
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
ret <8 x i8> %tmp2
}
@@ -187,7 +187,7 @@ define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind {
define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vqshlu_n16:
;CHECK: vqshl.u16{{.*#15}}
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
ret <4 x i16> %tmp2
}
@@ -195,7 +195,7 @@ define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind {
define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vqshlu_n32:
;CHECK: vqshl.u32{{.*#31}}
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
ret <2 x i32> %tmp2
}
@@ -203,7 +203,7 @@ define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind {
define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind {
;CHECK-LABEL: vqshlu_n64:
;CHECK: vqshl.u64{{.*#63}}
- %tmp1 = load <1 x i64>* %A
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
ret <1 x i64> %tmp2
}
@@ -211,7 +211,7 @@ define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind {
define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vqshlsu_n8:
;CHECK: vqshlu.s8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
ret <8 x i8> %tmp2
}
@@ -219,7 +219,7 @@ define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind {
define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vqshlsu_n16:
;CHECK: vqshlu.s16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
ret <4 x i16> %tmp2
}
@@ -227,7 +227,7 @@ define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind {
define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vqshlsu_n32:
;CHECK: vqshlu.s32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
ret <2 x i32> %tmp2
}
@@ -235,7 +235,7 @@ define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind {
define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind {
;CHECK-LABEL: vqshlsu_n64:
;CHECK: vqshlu.s64
- %tmp1 = load <1 x i64>* %A
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
ret <1 x i64> %tmp2
}
@@ -243,7 +243,7 @@ define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind {
define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vqshlQs_n8:
;CHECK: vqshl.s8{{.*#7}}
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
ret <16 x i8> %tmp2
}
@@ -251,7 +251,7 @@ define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind {
define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vqshlQs_n16:
;CHECK: vqshl.s16{{.*#15}}
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
ret <8 x i16> %tmp2
}
@@ -259,7 +259,7 @@ define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind {
define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vqshlQs_n32:
;CHECK: vqshl.s32{{.*#31}}
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
ret <4 x i32> %tmp2
}
@@ -267,7 +267,7 @@ define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind {
define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vqshlQs_n64:
;CHECK: vqshl.s64{{.*#63}}
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
ret <2 x i64> %tmp2
}
@@ -275,7 +275,7 @@ define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind {
define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vqshlQu_n8:
;CHECK: vqshl.u8{{.*#7}}
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
ret <16 x i8> %tmp2
}
@@ -283,7 +283,7 @@ define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind {
define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vqshlQu_n16:
;CHECK: vqshl.u16{{.*#15}}
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
ret <8 x i16> %tmp2
}
@@ -291,7 +291,7 @@ define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind {
define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vqshlQu_n32:
;CHECK: vqshl.u32{{.*#31}}
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
ret <4 x i32> %tmp2
}
@@ -299,7 +299,7 @@ define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind {
define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vqshlQu_n64:
;CHECK: vqshl.u64{{.*#63}}
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
ret <2 x i64> %tmp2
}
@@ -307,7 +307,7 @@ define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind {
define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vqshlQsu_n8:
;CHECK: vqshlu.s8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
ret <16 x i8> %tmp2
}
@@ -315,7 +315,7 @@ define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind {
define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vqshlQsu_n16:
;CHECK: vqshlu.s16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
ret <8 x i16> %tmp2
}
@@ -323,7 +323,7 @@ define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind {
define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vqshlQsu_n32:
;CHECK: vqshlu.s32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
ret <4 x i32> %tmp2
}
@@ -331,7 +331,7 @@ define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind {
define <2 x i64> @vqshlQsu_n64(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vqshlQsu_n64:
;CHECK: vqshlu.s64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
ret <2 x i64> %tmp2
}
@@ -369,8 +369,8 @@ declare <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64>, <2 x i64>) nounwind
define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vqrshls8:
;CHECK: vqrshl.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -378,8 +378,8 @@ define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vqrshls16:
;CHECK: vqrshl.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -387,8 +387,8 @@ define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vqrshls32:
;CHECK: vqrshl.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -396,8 +396,8 @@ define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vqrshls64:
;CHECK: vqrshl.s64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
@@ -405,8 +405,8 @@ define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vqrshlu8:
;CHECK: vqrshl.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -414,8 +414,8 @@ define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vqrshlu16:
;CHECK: vqrshl.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -423,8 +423,8 @@ define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vqrshlu32:
;CHECK: vqrshl.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -432,8 +432,8 @@ define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vqrshlu64:
;CHECK: vqrshl.u64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
@@ -441,8 +441,8 @@ define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vqrshlQs8:
;CHECK: vqrshl.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -450,8 +450,8 @@ define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vqrshlQs16:
;CHECK: vqrshl.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -459,8 +459,8 @@ define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vqrshlQs32:
;CHECK: vqrshl.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -468,8 +468,8 @@ define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vqrshlQs64:
;CHECK: vqrshl.s64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
@@ -477,8 +477,8 @@ define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vqrshlQu8:
;CHECK: vqrshl.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -486,8 +486,8 @@ define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vqrshlQu16:
;CHECK: vqrshl.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -495,8 +495,8 @@ define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vqrshlQu32:
;CHECK: vqrshl.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -504,8 +504,8 @@ define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vqrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vqrshlQu64:
;CHECK: vqrshl.u64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
diff --git a/test/CodeGen/ARM/vqshrn.ll b/test/CodeGen/ARM/vqshrn.ll
index f02482c0f77c..b4b5e96d4579 100644
--- a/test/CodeGen/ARM/vqshrn.ll
+++ b/test/CodeGen/ARM/vqshrn.ll
@@ -3,7 +3,7 @@
define <8 x i8> @vqshrns8(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vqshrns8:
;CHECK: vqshrn.s16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
ret <8 x i8> %tmp2
}
@@ -11,7 +11,7 @@ define <8 x i8> @vqshrns8(<8 x i16>* %A) nounwind {
define <4 x i16> @vqshrns16(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vqshrns16:
;CHECK: vqshrn.s32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
ret <4 x i16> %tmp2
}
@@ -19,7 +19,7 @@ define <4 x i16> @vqshrns16(<4 x i32>* %A) nounwind {
define <2 x i32> @vqshrns32(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vqshrns32:
;CHECK: vqshrn.s64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
ret <2 x i32> %tmp2
}
@@ -27,7 +27,7 @@ define <2 x i32> @vqshrns32(<2 x i64>* %A) nounwind {
define <8 x i8> @vqshrnu8(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vqshrnu8:
;CHECK: vqshrn.u16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
ret <8 x i8> %tmp2
}
@@ -35,7 +35,7 @@ define <8 x i8> @vqshrnu8(<8 x i16>* %A) nounwind {
define <4 x i16> @vqshrnu16(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vqshrnu16:
;CHECK: vqshrn.u32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
ret <4 x i16> %tmp2
}
@@ -43,7 +43,7 @@ define <4 x i16> @vqshrnu16(<4 x i32>* %A) nounwind {
define <2 x i32> @vqshrnu32(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vqshrnu32:
;CHECK: vqshrn.u64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
ret <2 x i32> %tmp2
}
@@ -51,7 +51,7 @@ define <2 x i32> @vqshrnu32(<2 x i64>* %A) nounwind {
define <8 x i8> @vqshruns8(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vqshruns8:
;CHECK: vqshrun.s16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
ret <8 x i8> %tmp2
}
@@ -59,7 +59,7 @@ define <8 x i8> @vqshruns8(<8 x i16>* %A) nounwind {
define <4 x i16> @vqshruns16(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vqshruns16:
;CHECK: vqshrun.s32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
ret <4 x i16> %tmp2
}
@@ -67,7 +67,7 @@ define <4 x i16> @vqshruns16(<4 x i32>* %A) nounwind {
define <2 x i32> @vqshruns32(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vqshruns32:
;CHECK: vqshrun.s64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
ret <2 x i32> %tmp2
}
@@ -87,7 +87,7 @@ declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind
define <8 x i8> @vqrshrns8(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vqrshrns8:
;CHECK: vqrshrn.s16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
ret <8 x i8> %tmp2
}
@@ -95,7 +95,7 @@ define <8 x i8> @vqrshrns8(<8 x i16>* %A) nounwind {
define <4 x i16> @vqrshrns16(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vqrshrns16:
;CHECK: vqrshrn.s32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
ret <4 x i16> %tmp2
}
@@ -103,7 +103,7 @@ define <4 x i16> @vqrshrns16(<4 x i32>* %A) nounwind {
define <2 x i32> @vqrshrns32(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vqrshrns32:
;CHECK: vqrshrn.s64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
ret <2 x i32> %tmp2
}
@@ -111,7 +111,7 @@ define <2 x i32> @vqrshrns32(<2 x i64>* %A) nounwind {
define <8 x i8> @vqrshrnu8(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vqrshrnu8:
;CHECK: vqrshrn.u16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
ret <8 x i8> %tmp2
}
@@ -119,7 +119,7 @@ define <8 x i8> @vqrshrnu8(<8 x i16>* %A) nounwind {
define <4 x i16> @vqrshrnu16(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vqrshrnu16:
;CHECK: vqrshrn.u32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
ret <4 x i16> %tmp2
}
@@ -127,7 +127,7 @@ define <4 x i16> @vqrshrnu16(<4 x i32>* %A) nounwind {
define <2 x i32> @vqrshrnu32(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vqrshrnu32:
;CHECK: vqrshrn.u64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
ret <2 x i32> %tmp2
}
@@ -135,7 +135,7 @@ define <2 x i32> @vqrshrnu32(<2 x i64>* %A) nounwind {
define <8 x i8> @vqrshruns8(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vqrshruns8:
;CHECK: vqrshrun.s16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
ret <8 x i8> %tmp2
}
@@ -143,7 +143,7 @@ define <8 x i8> @vqrshruns8(<8 x i16>* %A) nounwind {
define <4 x i16> @vqrshruns16(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vqrshruns16:
;CHECK: vqrshrun.s32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
ret <4 x i16> %tmp2
}
@@ -151,7 +151,7 @@ define <4 x i16> @vqrshruns16(<4 x i32>* %A) nounwind {
define <2 x i32> @vqrshruns32(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vqrshruns32:
;CHECK: vqrshrun.s64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
ret <2 x i32> %tmp2
}
diff --git a/test/CodeGen/ARM/vqsub.ll b/test/CodeGen/ARM/vqsub.ll
index 4af438019208..40963ce82486 100644
--- a/test/CodeGen/ARM/vqsub.ll
+++ b/test/CodeGen/ARM/vqsub.ll
@@ -3,8 +3,8 @@
define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vqsubs8:
;CHECK: vqsub.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vqsubs16:
;CHECK: vqsub.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vqsubs32:
;CHECK: vqsub.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -30,8 +30,8 @@ define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vqsubs64:
;CHECK: vqsub.s64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
@@ -39,8 +39,8 @@ define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vqsubu8:
;CHECK: vqsub.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -48,8 +48,8 @@ define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vqsubu16:
;CHECK: vqsub.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -57,8 +57,8 @@ define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vqsubu32:
;CHECK: vqsub.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -66,8 +66,8 @@ define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vqsubu64:
;CHECK: vqsub.u64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
@@ -75,8 +75,8 @@ define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vqsubQs8:
;CHECK: vqsub.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -84,8 +84,8 @@ define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vqsubQs16:
;CHECK: vqsub.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -93,8 +93,8 @@ define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vqsubQs32:
;CHECK: vqsub.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -102,8 +102,8 @@ define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vqsubQs64:
;CHECK: vqsub.s64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
@@ -111,8 +111,8 @@ define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vqsubQu8:
;CHECK: vqsub.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -120,8 +120,8 @@ define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vqsubQu16:
;CHECK: vqsub.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -129,8 +129,8 @@ define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vqsubQu32:
;CHECK: vqsub.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -138,8 +138,8 @@ define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vqsubQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vqsubQu64:
;CHECK: vqsub.u64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
diff --git a/test/CodeGen/ARM/vrec.ll b/test/CodeGen/ARM/vrec.ll
index 91979e5a3343..a7ebd79289d8 100644
--- a/test/CodeGen/ARM/vrec.ll
+++ b/test/CodeGen/ARM/vrec.ll
@@ -3,7 +3,7 @@
define <2 x i32> @vrecpei32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vrecpei32:
;CHECK: vrecpe.u32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %tmp1)
ret <2 x i32> %tmp2
}
@@ -11,7 +11,7 @@ define <2 x i32> @vrecpei32(<2 x i32>* %A) nounwind {
define <4 x i32> @vrecpeQi32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vrecpeQi32:
;CHECK: vrecpe.u32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %tmp1)
ret <4 x i32> %tmp2
}
@@ -19,7 +19,7 @@ define <4 x i32> @vrecpeQi32(<4 x i32>* %A) nounwind {
define <2 x float> @vrecpef32(<2 x float>* %A) nounwind {
;CHECK-LABEL: vrecpef32:
;CHECK: vrecpe.f32
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %tmp1)
ret <2 x float> %tmp2
}
@@ -27,7 +27,7 @@ define <2 x float> @vrecpef32(<2 x float>* %A) nounwind {
define <4 x float> @vrecpeQf32(<4 x float>* %A) nounwind {
;CHECK-LABEL: vrecpeQf32:
;CHECK: vrecpe.f32
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp1)
ret <4 x float> %tmp2
}
@@ -41,8 +41,8 @@ declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
define <2 x float> @vrecpsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vrecpsf32:
;CHECK: vrecps.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -50,8 +50,8 @@ define <2 x float> @vrecpsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
define <4 x float> @vrecpsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vrecpsQf32:
;CHECK: vrecps.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
@@ -62,7 +62,7 @@ declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwi
define <2 x i32> @vrsqrtei32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vrsqrtei32:
;CHECK: vrsqrte.u32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %tmp1)
ret <2 x i32> %tmp2
}
@@ -70,7 +70,7 @@ define <2 x i32> @vrsqrtei32(<2 x i32>* %A) nounwind {
define <4 x i32> @vrsqrteQi32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vrsqrteQi32:
;CHECK: vrsqrte.u32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %tmp1)
ret <4 x i32> %tmp2
}
@@ -78,7 +78,7 @@ define <4 x i32> @vrsqrteQi32(<4 x i32>* %A) nounwind {
define <2 x float> @vrsqrtef32(<2 x float>* %A) nounwind {
;CHECK-LABEL: vrsqrtef32:
;CHECK: vrsqrte.f32
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %tmp1)
ret <2 x float> %tmp2
}
@@ -86,7 +86,7 @@ define <2 x float> @vrsqrtef32(<2 x float>* %A) nounwind {
define <4 x float> @vrsqrteQf32(<4 x float>* %A) nounwind {
;CHECK-LABEL: vrsqrteQf32:
;CHECK: vrsqrte.f32
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %tmp1)
ret <4 x float> %tmp2
}
@@ -100,8 +100,8 @@ declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
define <2 x float> @vrsqrtsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vrsqrtsf32:
;CHECK: vrsqrts.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
ret <2 x float> %tmp3
}
@@ -109,8 +109,8 @@ define <2 x float> @vrsqrtsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
define <4 x float> @vrsqrtsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vrsqrtsQf32:
;CHECK: vrsqrts.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
ret <4 x float> %tmp3
}
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll
index 7215ad615e81..a20d4b6baf29 100644
--- a/test/CodeGen/ARM/vrev.ll
+++ b/test/CodeGen/ARM/vrev.ll
@@ -3,7 +3,7 @@
define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: test_vrev64D8:
;CHECK: vrev64.8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <8 x i8> %tmp2
}
@@ -11,7 +11,7 @@ define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: test_vrev64D16:
;CHECK: vrev64.16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x i16> %tmp2
}
@@ -19,7 +19,7 @@ define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: test_vrev64D32:
;CHECK: vrev64.32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
ret <2 x i32> %tmp2
}
@@ -27,7 +27,7 @@ define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
;CHECK-LABEL: test_vrev64Df:
;CHECK: vrev64.32
- %tmp1 = load <2 x float>* %A
+ %tmp1 = load <2 x float>, <2 x float>* %A
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
ret <2 x float> %tmp2
}
@@ -35,7 +35,7 @@ define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: test_vrev64Q8:
;CHECK: vrev64.8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
ret <16 x i8> %tmp2
}
@@ -43,7 +43,7 @@ define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: test_vrev64Q16:
;CHECK: vrev64.16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
ret <8 x i16> %tmp2
}
@@ -51,7 +51,7 @@ define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: test_vrev64Q32:
;CHECK: vrev64.32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
ret <4 x i32> %tmp2
}
@@ -59,7 +59,7 @@ define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
;CHECK-LABEL: test_vrev64Qf:
;CHECK: vrev64.32
- %tmp1 = load <4 x float>* %A
+ %tmp1 = load <4 x float>, <4 x float>* %A
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
ret <4 x float> %tmp2
}
@@ -67,7 +67,7 @@ define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: test_vrev32D8:
;CHECK: vrev32.8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
ret <8 x i8> %tmp2
}
@@ -75,7 +75,7 @@ define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: test_vrev32D16:
;CHECK: vrev32.16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
ret <4 x i16> %tmp2
}
@@ -83,7 +83,7 @@ define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: test_vrev32Q8:
;CHECK: vrev32.8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
ret <16 x i8> %tmp2
}
@@ -91,7 +91,7 @@ define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: test_vrev32Q16:
;CHECK: vrev32.16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
ret <8 x i16> %tmp2
}
@@ -99,7 +99,7 @@ define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: test_vrev16D8:
;CHECK: vrev16.8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
ret <8 x i8> %tmp2
}
@@ -107,7 +107,7 @@ define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: test_vrev16Q8:
;CHECK: vrev16.8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
ret <16 x i8> %tmp2
}
@@ -117,7 +117,7 @@ define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
;CHECK-LABEL: test_vrev64D8_undef:
;CHECK: vrev64.8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
ret <8 x i8> %tmp2
}
@@ -125,7 +125,7 @@ define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
;CHECK-LABEL: test_vrev32Q16_undef:
;CHECK: vrev32.16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
ret <8 x i16> %tmp2
}
@@ -136,7 +136,7 @@ define void @test_with_vcombine(<4 x float>* %v) nounwind {
;CHECK-LABEL: test_with_vcombine:
;CHECK-NOT: vext
;CHECK: vrev64.32
- %tmp1 = load <4 x float>* %v, align 16
+ %tmp1 = load <4 x float>, <4 x float>* %v, align 16
%tmp2 = bitcast <4 x float> %tmp1 to <2 x double>
%tmp3 = extractelement <2 x double> %tmp2, i32 0
%tmp4 = bitcast double %tmp3 to <2 x float>
@@ -155,7 +155,7 @@ define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst
; CHECK: vst1.32
entry:
%0 = bitcast <4 x i16>* %source to <8 x i16>*
- %tmp2 = load <8 x i16>* %0, align 4
+ %tmp2 = load <8 x i16>, <8 x i16>* %0, align 4
%tmp3 = extractelement <8 x i16> %tmp2, i32 6
%tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
%tmp9 = extractelement <8 x i16> %tmp2, i32 5
@@ -171,9 +171,9 @@ define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest
; CHECK: vrev64.32
entry:
%0 = bitcast float* %source to <4 x float>*
- %tmp2 = load <4 x float>* %0, align 4
+ %tmp2 = load <4 x float>, <4 x float>* %0, align 4
%tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
- %arrayidx8 = getelementptr inbounds <4 x float>* %dest, i32 11
+ %arrayidx8 = getelementptr inbounds <4 x float>, <4 x float>* %dest, i32 11
store <4 x float> %tmp5, <4 x float>* %arrayidx8, align 4
ret void
}
diff --git a/test/CodeGen/ARM/vselect_imax.ll b/test/CodeGen/ARM/vselect_imax.ll
index e999034fa47e..0eb051036d99 100644
--- a/test/CodeGen/ARM/vselect_imax.ll
+++ b/test/CodeGen/ARM/vselect_imax.ll
@@ -18,8 +18,8 @@ define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: func_blend10:
define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
%T1_10* %blend, %T0_10* %storeaddr) {
- %v0 = load %T0_10* %loadaddr
- %v1 = load %T0_10* %loadaddr2
+ %v0 = load %T0_10, %T0_10* %loadaddr
+ %v1 = load %T0_10, %T0_10* %loadaddr2
%c = icmp slt %T0_10 %v0, %v1
; CHECK: vbsl
; CHECK: vbsl
@@ -34,8 +34,8 @@ define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
; CHECK-LABEL: func_blend14:
define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
%T1_14* %blend, %T0_14* %storeaddr) {
- %v0 = load %T0_14* %loadaddr
- %v1 = load %T0_14* %loadaddr2
+ %v0 = load %T0_14, %T0_14* %loadaddr
+ %v1 = load %T0_14, %T0_14* %loadaddr2
%c = icmp slt %T0_14 %v0, %v1
; CHECK: vbsl
; CHECK: vbsl
@@ -52,8 +52,8 @@ define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2,
%T1_15* %blend, %T0_15* %storeaddr) {
; CHECK: vbsl
; CHECK: vbsl
- %v0 = load %T0_15* %loadaddr
- %v1 = load %T0_15* %loadaddr2
+ %v0 = load %T0_15, %T0_15* %loadaddr
+ %v1 = load %T0_15, %T0_15* %loadaddr2
%c = icmp slt %T0_15 %v0, %v1
; COST: func_blend15
; COST: cost of 82 {{.*}} select
@@ -68,8 +68,8 @@ define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
%T1_18* %blend, %T0_18* %storeaddr) {
; CHECK: vbsl
; CHECK: vbsl
- %v0 = load %T0_18* %loadaddr
- %v1 = load %T0_18* %loadaddr2
+ %v0 = load %T0_18, %T0_18* %loadaddr
+ %v1 = load %T0_18, %T0_18* %loadaddr2
%c = icmp slt %T0_18 %v0, %v1
; COST: func_blend18
; COST: cost of 19 {{.*}} select
@@ -86,8 +86,8 @@ define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
; CHECK: vbsl
; CHECK: vbsl
; CHECK: vbsl
- %v0 = load %T0_19* %loadaddr
- %v1 = load %T0_19* %loadaddr2
+ %v0 = load %T0_19, %T0_19* %loadaddr
+ %v1 = load %T0_19, %T0_19* %loadaddr2
%c = icmp slt %T0_19 %v0, %v1
; COST: func_blend19
; COST: cost of 50 {{.*}} select
@@ -108,8 +108,8 @@ define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2,
; CHECK: vbsl
; CHECK: vbsl
; CHECK: vbsl
- %v0 = load %T0_20* %loadaddr
- %v1 = load %T0_20* %loadaddr2
+ %v0 = load %T0_20, %T0_20* %loadaddr
+ %v1 = load %T0_20, %T0_20* %loadaddr2
%c = icmp slt %T0_20 %v0, %v1
; COST: func_blend20
; COST: cost of 100 {{.*}} select
diff --git a/test/CodeGen/ARM/vshift.ll b/test/CodeGen/ARM/vshift.ll
index 618a137b5b05..31e4cb05dd20 100644
--- a/test/CodeGen/ARM/vshift.ll
+++ b/test/CodeGen/ARM/vshift.ll
@@ -3,8 +3,8 @@
define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vshls8:
;CHECK: vshl.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shl <8 x i8> %tmp1, %tmp2
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vshls16:
;CHECK: vshl.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = shl <4 x i16> %tmp1, %tmp2
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vshls32:
;CHECK: vshl.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = shl <2 x i32> %tmp1, %tmp2
ret <2 x i32> %tmp3
}
@@ -30,8 +30,8 @@ define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vshls64:
;CHECK: vshl.u64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = shl <1 x i64> %tmp1, %tmp2
ret <1 x i64> %tmp3
}
@@ -39,7 +39,7 @@ define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vshli8:
;CHECK: vshl.i8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = shl <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
ret <8 x i8> %tmp2
}
@@ -47,7 +47,7 @@ define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vshli16:
;CHECK: vshl.i16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = shl <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
ret <4 x i16> %tmp2
}
@@ -55,7 +55,7 @@ define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vshli32:
;CHECK: vshl.i32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = shl <2 x i32> %tmp1, < i32 31, i32 31 >
ret <2 x i32> %tmp2
}
@@ -63,7 +63,7 @@ define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
;CHECK-LABEL: vshli64:
;CHECK: vshl.i64
- %tmp1 = load <1 x i64>* %A
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = shl <1 x i64> %tmp1, < i64 63 >
ret <1 x i64> %tmp2
}
@@ -71,8 +71,8 @@ define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vshlQs8:
;CHECK: vshl.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shl <16 x i8> %tmp1, %tmp2
ret <16 x i8> %tmp3
}
@@ -80,8 +80,8 @@ define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vshlQs16:
;CHECK: vshl.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shl <8 x i16> %tmp1, %tmp2
ret <8 x i16> %tmp3
}
@@ -89,8 +89,8 @@ define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vshlQs32:
;CHECK: vshl.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = shl <4 x i32> %tmp1, %tmp2
ret <4 x i32> %tmp3
}
@@ -98,8 +98,8 @@ define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vshlQs64:
;CHECK: vshl.u64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = shl <2 x i64> %tmp1, %tmp2
ret <2 x i64> %tmp3
}
@@ -107,7 +107,7 @@ define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vshlQi8:
;CHECK: vshl.i8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = shl <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
ret <16 x i8> %tmp2
}
@@ -115,7 +115,7 @@ define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vshlQi16:
;CHECK: vshl.i16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = shl <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
ret <8 x i16> %tmp2
}
@@ -123,7 +123,7 @@ define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vshlQi32:
;CHECK: vshl.i32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = shl <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
ret <4 x i32> %tmp2
}
@@ -131,7 +131,7 @@ define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vshlQi64:
;CHECK: vshl.i64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = shl <2 x i64> %tmp1, < i64 63, i64 63 >
ret <2 x i64> %tmp2
}
@@ -140,8 +140,8 @@ define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vlshru8:
;CHECK: vneg.s8
;CHECK: vshl.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = lshr <8 x i8> %tmp1, %tmp2
ret <8 x i8> %tmp3
}
@@ -150,8 +150,8 @@ define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vlshru16:
;CHECK: vneg.s16
;CHECK: vshl.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = lshr <4 x i16> %tmp1, %tmp2
ret <4 x i16> %tmp3
}
@@ -160,8 +160,8 @@ define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vlshru32:
;CHECK: vneg.s32
;CHECK: vshl.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = lshr <2 x i32> %tmp1, %tmp2
ret <2 x i32> %tmp3
}
@@ -170,8 +170,8 @@ define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vlshru64:
;CHECK: vsub.i64
;CHECK: vshl.u64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = lshr <1 x i64> %tmp1, %tmp2
ret <1 x i64> %tmp3
}
@@ -179,7 +179,7 @@ define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vlshri8:
;CHECK: vshr.u8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = lshr <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
ret <8 x i8> %tmp2
}
@@ -187,7 +187,7 @@ define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind {
define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vlshri16:
;CHECK: vshr.u16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = lshr <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
ret <4 x i16> %tmp2
}
@@ -195,7 +195,7 @@ define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind {
define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vlshri32:
;CHECK: vshr.u32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = lshr <2 x i32> %tmp1, < i32 31, i32 31 >
ret <2 x i32> %tmp2
}
@@ -203,7 +203,7 @@ define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind {
define <1 x i64> @vlshri64(<1 x i64>* %A) nounwind {
;CHECK-LABEL: vlshri64:
;CHECK: vshr.u64
- %tmp1 = load <1 x i64>* %A
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = lshr <1 x i64> %tmp1, < i64 63 >
ret <1 x i64> %tmp2
}
@@ -212,8 +212,8 @@ define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vlshrQu8:
;CHECK: vneg.s8
;CHECK: vshl.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = lshr <16 x i8> %tmp1, %tmp2
ret <16 x i8> %tmp3
}
@@ -222,8 +222,8 @@ define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vlshrQu16:
;CHECK: vneg.s16
;CHECK: vshl.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = lshr <8 x i16> %tmp1, %tmp2
ret <8 x i16> %tmp3
}
@@ -232,8 +232,8 @@ define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vlshrQu32:
;CHECK: vneg.s32
;CHECK: vshl.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = lshr <4 x i32> %tmp1, %tmp2
ret <4 x i32> %tmp3
}
@@ -242,8 +242,8 @@ define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vlshrQu64:
;CHECK: vsub.i64
;CHECK: vshl.u64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = lshr <2 x i64> %tmp1, %tmp2
ret <2 x i64> %tmp3
}
@@ -251,7 +251,7 @@ define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vlshrQi8:
;CHECK: vshr.u8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = lshr <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
ret <16 x i8> %tmp2
}
@@ -259,7 +259,7 @@ define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind {
define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vlshrQi16:
;CHECK: vshr.u16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = lshr <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
ret <8 x i16> %tmp2
}
@@ -267,7 +267,7 @@ define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind {
define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vlshrQi32:
;CHECK: vshr.u32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = lshr <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
ret <4 x i32> %tmp2
}
@@ -275,7 +275,7 @@ define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind {
define <2 x i64> @vlshrQi64(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vlshrQi64:
;CHECK: vshr.u64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = lshr <2 x i64> %tmp1, < i64 63, i64 63 >
ret <2 x i64> %tmp2
}
@@ -291,8 +291,8 @@ define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vashrs8:
;CHECK: vneg.s8
;CHECK: vshl.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = ashr <8 x i8> %tmp1, %tmp2
ret <8 x i8> %tmp3
}
@@ -301,8 +301,8 @@ define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vashrs16:
;CHECK: vneg.s16
;CHECK: vshl.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = ashr <4 x i16> %tmp1, %tmp2
ret <4 x i16> %tmp3
}
@@ -311,8 +311,8 @@ define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vashrs32:
;CHECK: vneg.s32
;CHECK: vshl.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = ashr <2 x i32> %tmp1, %tmp2
ret <2 x i32> %tmp3
}
@@ -321,8 +321,8 @@ define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vashrs64:
;CHECK: vsub.i64
;CHECK: vshl.s64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = ashr <1 x i64> %tmp1, %tmp2
ret <1 x i64> %tmp3
}
@@ -330,7 +330,7 @@ define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <8 x i8> @vashri8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vashri8:
;CHECK: vshr.s8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = ashr <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
ret <8 x i8> %tmp2
}
@@ -338,7 +338,7 @@ define <8 x i8> @vashri8(<8 x i8>* %A) nounwind {
define <4 x i16> @vashri16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vashri16:
;CHECK: vshr.s16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = ashr <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
ret <4 x i16> %tmp2
}
@@ -346,7 +346,7 @@ define <4 x i16> @vashri16(<4 x i16>* %A) nounwind {
define <2 x i32> @vashri32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vashri32:
;CHECK: vshr.s32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = ashr <2 x i32> %tmp1, < i32 31, i32 31 >
ret <2 x i32> %tmp2
}
@@ -354,7 +354,7 @@ define <2 x i32> @vashri32(<2 x i32>* %A) nounwind {
define <1 x i64> @vashri64(<1 x i64>* %A) nounwind {
;CHECK-LABEL: vashri64:
;CHECK: vshr.s64
- %tmp1 = load <1 x i64>* %A
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = ashr <1 x i64> %tmp1, < i64 63 >
ret <1 x i64> %tmp2
}
@@ -363,8 +363,8 @@ define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vashrQs8:
;CHECK: vneg.s8
;CHECK: vshl.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = ashr <16 x i8> %tmp1, %tmp2
ret <16 x i8> %tmp3
}
@@ -373,8 +373,8 @@ define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vashrQs16:
;CHECK: vneg.s16
;CHECK: vshl.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = ashr <8 x i16> %tmp1, %tmp2
ret <8 x i16> %tmp3
}
@@ -383,8 +383,8 @@ define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vashrQs32:
;CHECK: vneg.s32
;CHECK: vshl.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = ashr <4 x i32> %tmp1, %tmp2
ret <4 x i32> %tmp3
}
@@ -393,8 +393,8 @@ define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vashrQs64:
;CHECK: vsub.i64
;CHECK: vshl.s64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = ashr <2 x i64> %tmp1, %tmp2
ret <2 x i64> %tmp3
}
@@ -402,7 +402,7 @@ define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vashrQi8:
;CHECK: vshr.s8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = ashr <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
ret <16 x i8> %tmp2
}
@@ -410,7 +410,7 @@ define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind {
define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vashrQi16:
;CHECK: vshr.s16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = ashr <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
ret <8 x i16> %tmp2
}
@@ -418,7 +418,7 @@ define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind {
define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vashrQi32:
;CHECK: vshr.s32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = ashr <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
ret <4 x i32> %tmp2
}
@@ -426,7 +426,7 @@ define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind {
define <2 x i64> @vashrQi64(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vashrQi64:
;CHECK: vshr.s64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = ashr <2 x i64> %tmp1, < i64 63, i64 63 >
ret <2 x i64> %tmp2
}
diff --git a/test/CodeGen/ARM/vshiftins.ll b/test/CodeGen/ARM/vshiftins.ll
index 9526c3222017..29487378317d 100644
--- a/test/CodeGen/ARM/vshiftins.ll
+++ b/test/CodeGen/ARM/vshiftins.ll
@@ -3,8 +3,8 @@
define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vsli8:
;CHECK: vsli.8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vsli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vsli16:
;CHECK: vsli.16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @vsli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vsli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vsli32:
;CHECK: vsli.32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 31, i32 31 >)
ret <2 x i32> %tmp3
}
@@ -30,8 +30,8 @@ define <2 x i32> @vsli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vsli64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vsli64:
;CHECK: vsli.64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 63 >)
ret <1 x i64> %tmp3
}
@@ -39,8 +39,8 @@ define <1 x i64> @vsli64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <16 x i8> @vsliQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vsliQ8:
;CHECK: vsli.8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
ret <16 x i8> %tmp3
}
@@ -48,8 +48,8 @@ define <16 x i8> @vsliQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vsliQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vsliQ16:
;CHECK: vsli.16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
ret <8 x i16> %tmp3
}
@@ -57,8 +57,8 @@ define <8 x i16> @vsliQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vsliQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vsliQ32:
;CHECK: vsli.32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
ret <4 x i32> %tmp3
}
@@ -66,8 +66,8 @@ define <4 x i32> @vsliQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vsliQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vsliQ64:
;CHECK: vsli.64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 63, i64 63 >)
ret <2 x i64> %tmp3
}
@@ -75,8 +75,8 @@ define <2 x i64> @vsliQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <8 x i8> @vsri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vsri8:
;CHECK: vsri.8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
ret <8 x i8> %tmp3
}
@@ -84,8 +84,8 @@ define <8 x i8> @vsri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vsri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vsri16:
;CHECK: vsri.16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
ret <4 x i16> %tmp3
}
@@ -93,8 +93,8 @@ define <4 x i16> @vsri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vsri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vsri32:
;CHECK: vsri.32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
ret <2 x i32> %tmp3
}
@@ -102,8 +102,8 @@ define <2 x i32> @vsri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vsri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vsri64:
;CHECK: vsri.64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 -64 >)
ret <1 x i64> %tmp3
}
@@ -111,8 +111,8 @@ define <1 x i64> @vsri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <16 x i8> @vsriQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vsriQ8:
;CHECK: vsri.8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
ret <16 x i8> %tmp3
}
@@ -120,8 +120,8 @@ define <16 x i8> @vsriQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vsriQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vsriQ16:
;CHECK: vsri.16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
ret <8 x i16> %tmp3
}
@@ -129,8 +129,8 @@ define <8 x i16> @vsriQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vsriQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vsriQ32:
;CHECK: vsri.32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
ret <4 x i32> %tmp3
}
@@ -138,8 +138,8 @@ define <4 x i32> @vsriQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vsriQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vsriQ64:
;CHECK: vsri.64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
ret <2 x i64> %tmp3
}
diff --git a/test/CodeGen/ARM/vshl.ll b/test/CodeGen/ARM/vshl.ll
index 6228652fc715..ef76e3d9a36c 100644
--- a/test/CodeGen/ARM/vshl.ll
+++ b/test/CodeGen/ARM/vshl.ll
@@ -3,8 +3,8 @@
define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vshls8:
;CHECK: vshl.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vshls16:
;CHECK: vshl.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vshls32:
;CHECK: vshl.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -30,8 +30,8 @@ define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vshls64:
;CHECK: vshl.s64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
@@ -39,8 +39,8 @@ define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <8 x i8> @vshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vshlu8:
;CHECK: vshl.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -48,8 +48,8 @@ define <8 x i8> @vshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vshlu16:
;CHECK: vshl.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -57,8 +57,8 @@ define <4 x i16> @vshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vshlu32:
;CHECK: vshl.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -66,8 +66,8 @@ define <2 x i32> @vshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vshlu64:
;CHECK: vshl.u64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
@@ -75,8 +75,8 @@ define <1 x i64> @vshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vshlQs8:
;CHECK: vshl.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -84,8 +84,8 @@ define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vshlQs16:
;CHECK: vshl.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -93,8 +93,8 @@ define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vshlQs32:
;CHECK: vshl.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -102,8 +102,8 @@ define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vshlQs64:
;CHECK: vshl.s64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
@@ -111,8 +111,8 @@ define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <16 x i8> @vshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vshlQu8:
;CHECK: vshl.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -120,8 +120,8 @@ define <16 x i8> @vshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vshlQu16:
;CHECK: vshl.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -129,8 +129,8 @@ define <8 x i16> @vshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vshlQu32:
;CHECK: vshl.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -138,8 +138,8 @@ define <4 x i32> @vshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vshlQu64:
;CHECK: vshl.u64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
@@ -150,7 +150,7 @@ define <2 x i64> @vshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vshli8:
;CHECK: vshl.i8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
ret <8 x i8> %tmp2
}
@@ -158,7 +158,7 @@ define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vshli16:
;CHECK: vshl.i16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
ret <4 x i16> %tmp2
}
@@ -166,7 +166,7 @@ define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vshli32:
;CHECK: vshl.i32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
ret <2 x i32> %tmp2
}
@@ -174,7 +174,7 @@ define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
;CHECK-LABEL: vshli64:
;CHECK: vshl.i64
- %tmp1 = load <1 x i64>* %A
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
ret <1 x i64> %tmp2
}
@@ -182,7 +182,7 @@ define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vshlQi8:
;CHECK: vshl.i8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
ret <16 x i8> %tmp2
}
@@ -190,7 +190,7 @@ define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vshlQi16:
;CHECK: vshl.i16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
ret <8 x i16> %tmp2
}
@@ -198,7 +198,7 @@ define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vshlQi32:
;CHECK: vshl.i32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
ret <4 x i32> %tmp2
}
@@ -206,7 +206,7 @@ define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vshlQi64:
;CHECK: vshl.i64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
ret <2 x i64> %tmp2
}
@@ -216,7 +216,7 @@ define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
define <8 x i8> @vshrs8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vshrs8:
;CHECK: vshr.s8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
ret <8 x i8> %tmp2
}
@@ -224,7 +224,7 @@ define <8 x i8> @vshrs8(<8 x i8>* %A) nounwind {
define <4 x i16> @vshrs16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vshrs16:
;CHECK: vshr.s16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
ret <4 x i16> %tmp2
}
@@ -232,7 +232,7 @@ define <4 x i16> @vshrs16(<4 x i16>* %A) nounwind {
define <2 x i32> @vshrs32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vshrs32:
;CHECK: vshr.s32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
ret <2 x i32> %tmp2
}
@@ -240,7 +240,7 @@ define <2 x i32> @vshrs32(<2 x i32>* %A) nounwind {
define <1 x i64> @vshrs64(<1 x i64>* %A) nounwind {
;CHECK-LABEL: vshrs64:
;CHECK: vshr.s64
- %tmp1 = load <1 x i64>* %A
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
ret <1 x i64> %tmp2
}
@@ -248,7 +248,7 @@ define <1 x i64> @vshrs64(<1 x i64>* %A) nounwind {
define <8 x i8> @vshru8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vshru8:
;CHECK: vshr.u8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
ret <8 x i8> %tmp2
}
@@ -256,7 +256,7 @@ define <8 x i8> @vshru8(<8 x i8>* %A) nounwind {
define <4 x i16> @vshru16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vshru16:
;CHECK: vshr.u16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
ret <4 x i16> %tmp2
}
@@ -264,7 +264,7 @@ define <4 x i16> @vshru16(<4 x i16>* %A) nounwind {
define <2 x i32> @vshru32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vshru32:
;CHECK: vshr.u32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
ret <2 x i32> %tmp2
}
@@ -272,7 +272,7 @@ define <2 x i32> @vshru32(<2 x i32>* %A) nounwind {
define <1 x i64> @vshru64(<1 x i64>* %A) nounwind {
;CHECK-LABEL: vshru64:
;CHECK: vshr.u64
- %tmp1 = load <1 x i64>* %A
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
ret <1 x i64> %tmp2
}
@@ -280,7 +280,7 @@ define <1 x i64> @vshru64(<1 x i64>* %A) nounwind {
define <16 x i8> @vshrQs8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vshrQs8:
;CHECK: vshr.s8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
ret <16 x i8> %tmp2
}
@@ -288,7 +288,7 @@ define <16 x i8> @vshrQs8(<16 x i8>* %A) nounwind {
define <8 x i16> @vshrQs16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vshrQs16:
;CHECK: vshr.s16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
ret <8 x i16> %tmp2
}
@@ -296,7 +296,7 @@ define <8 x i16> @vshrQs16(<8 x i16>* %A) nounwind {
define <4 x i32> @vshrQs32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vshrQs32:
;CHECK: vshr.s32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
ret <4 x i32> %tmp2
}
@@ -304,7 +304,7 @@ define <4 x i32> @vshrQs32(<4 x i32>* %A) nounwind {
define <2 x i64> @vshrQs64(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vshrQs64:
;CHECK: vshr.s64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
ret <2 x i64> %tmp2
}
@@ -312,7 +312,7 @@ define <2 x i64> @vshrQs64(<2 x i64>* %A) nounwind {
define <16 x i8> @vshrQu8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vshrQu8:
;CHECK: vshr.u8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
ret <16 x i8> %tmp2
}
@@ -320,7 +320,7 @@ define <16 x i8> @vshrQu8(<16 x i8>* %A) nounwind {
define <8 x i16> @vshrQu16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vshrQu16:
;CHECK: vshr.u16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
ret <8 x i16> %tmp2
}
@@ -328,7 +328,7 @@ define <8 x i16> @vshrQu16(<8 x i16>* %A) nounwind {
define <4 x i32> @vshrQu32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vshrQu32:
;CHECK: vshr.u32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
ret <4 x i32> %tmp2
}
@@ -336,7 +336,7 @@ define <4 x i32> @vshrQu32(<4 x i32>* %A) nounwind {
define <2 x i64> @vshrQu64(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vshrQu64:
;CHECK: vshr.u64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
ret <2 x i64> %tmp2
}
@@ -364,8 +364,8 @@ declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind re
define <8 x i8> @vrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vrshls8:
;CHECK: vrshl.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -373,8 +373,8 @@ define <8 x i8> @vrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vrshls16:
;CHECK: vrshl.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -382,8 +382,8 @@ define <4 x i16> @vrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vrshls32:
;CHECK: vrshl.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -391,8 +391,8 @@ define <2 x i32> @vrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vrshls64:
;CHECK: vrshl.s64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
@@ -400,8 +400,8 @@ define <1 x i64> @vrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <8 x i8> @vrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vrshlu8:
;CHECK: vrshl.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -409,8 +409,8 @@ define <8 x i8> @vrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vrshlu16:
;CHECK: vrshl.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
ret <4 x i16> %tmp3
}
@@ -418,8 +418,8 @@ define <4 x i16> @vrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vrshlu32:
;CHECK: vrshl.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
ret <2 x i32> %tmp3
}
@@ -427,8 +427,8 @@ define <2 x i32> @vrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vrshlu64:
;CHECK: vrshl.u64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
ret <1 x i64> %tmp3
}
@@ -436,8 +436,8 @@ define <1 x i64> @vrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <16 x i8> @vrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vrshlQs8:
;CHECK: vrshl.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -445,8 +445,8 @@ define <16 x i8> @vrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vrshlQs16:
;CHECK: vrshl.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -454,8 +454,8 @@ define <8 x i16> @vrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vrshlQs32:
;CHECK: vrshl.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -463,8 +463,8 @@ define <4 x i32> @vrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vrshlQs64:
;CHECK: vrshl.s64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
@@ -472,8 +472,8 @@ define <2 x i64> @vrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <16 x i8> @vrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vrshlQu8:
;CHECK: vrshl.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
ret <16 x i8> %tmp3
}
@@ -481,8 +481,8 @@ define <16 x i8> @vrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vrshlQu16:
;CHECK: vrshl.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i16> %tmp3
}
@@ -490,8 +490,8 @@ define <8 x i16> @vrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vrshlQu32:
;CHECK: vrshl.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i32> %tmp3
}
@@ -499,8 +499,8 @@ define <4 x i32> @vrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vrshlQu64:
;CHECK: vrshl.u64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i64> %tmp3
}
@@ -508,7 +508,7 @@ define <2 x i64> @vrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <8 x i8> @vrshrs8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vrshrs8:
;CHECK: vrshr.s8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
ret <8 x i8> %tmp2
}
@@ -516,7 +516,7 @@ define <8 x i8> @vrshrs8(<8 x i8>* %A) nounwind {
define <4 x i16> @vrshrs16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vrshrs16:
;CHECK: vrshr.s16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
ret <4 x i16> %tmp2
}
@@ -524,7 +524,7 @@ define <4 x i16> @vrshrs16(<4 x i16>* %A) nounwind {
define <2 x i32> @vrshrs32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vrshrs32:
;CHECK: vrshr.s32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
ret <2 x i32> %tmp2
}
@@ -532,7 +532,7 @@ define <2 x i32> @vrshrs32(<2 x i32>* %A) nounwind {
define <1 x i64> @vrshrs64(<1 x i64>* %A) nounwind {
;CHECK-LABEL: vrshrs64:
;CHECK: vrshr.s64
- %tmp1 = load <1 x i64>* %A
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
ret <1 x i64> %tmp2
}
@@ -540,7 +540,7 @@ define <1 x i64> @vrshrs64(<1 x i64>* %A) nounwind {
define <8 x i8> @vrshru8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vrshru8:
;CHECK: vrshr.u8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
ret <8 x i8> %tmp2
}
@@ -548,7 +548,7 @@ define <8 x i8> @vrshru8(<8 x i8>* %A) nounwind {
define <4 x i16> @vrshru16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vrshru16:
;CHECK: vrshr.u16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
ret <4 x i16> %tmp2
}
@@ -556,7 +556,7 @@ define <4 x i16> @vrshru16(<4 x i16>* %A) nounwind {
define <2 x i32> @vrshru32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vrshru32:
;CHECK: vrshr.u32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
ret <2 x i32> %tmp2
}
@@ -564,7 +564,7 @@ define <2 x i32> @vrshru32(<2 x i32>* %A) nounwind {
define <1 x i64> @vrshru64(<1 x i64>* %A) nounwind {
;CHECK-LABEL: vrshru64:
;CHECK: vrshr.u64
- %tmp1 = load <1 x i64>* %A
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
ret <1 x i64> %tmp2
}
@@ -572,7 +572,7 @@ define <1 x i64> @vrshru64(<1 x i64>* %A) nounwind {
define <16 x i8> @vrshrQs8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vrshrQs8:
;CHECK: vrshr.s8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
ret <16 x i8> %tmp2
}
@@ -580,7 +580,7 @@ define <16 x i8> @vrshrQs8(<16 x i8>* %A) nounwind {
define <8 x i16> @vrshrQs16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vrshrQs16:
;CHECK: vrshr.s16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
ret <8 x i16> %tmp2
}
@@ -588,7 +588,7 @@ define <8 x i16> @vrshrQs16(<8 x i16>* %A) nounwind {
define <4 x i32> @vrshrQs32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vrshrQs32:
;CHECK: vrshr.s32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
ret <4 x i32> %tmp2
}
@@ -596,7 +596,7 @@ define <4 x i32> @vrshrQs32(<4 x i32>* %A) nounwind {
define <2 x i64> @vrshrQs64(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vrshrQs64:
;CHECK: vrshr.s64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
ret <2 x i64> %tmp2
}
@@ -604,7 +604,7 @@ define <2 x i64> @vrshrQs64(<2 x i64>* %A) nounwind {
define <16 x i8> @vrshrQu8(<16 x i8>* %A) nounwind {
;CHECK-LABEL: vrshrQu8:
;CHECK: vrshr.u8
- %tmp1 = load <16 x i8>* %A
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
ret <16 x i8> %tmp2
}
@@ -612,7 +612,7 @@ define <16 x i8> @vrshrQu8(<16 x i8>* %A) nounwind {
define <8 x i16> @vrshrQu16(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vrshrQu16:
;CHECK: vrshr.u16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
ret <8 x i16> %tmp2
}
@@ -620,7 +620,7 @@ define <8 x i16> @vrshrQu16(<8 x i16>* %A) nounwind {
define <4 x i32> @vrshrQu32(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vrshrQu32:
;CHECK: vrshr.u32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
ret <4 x i32> %tmp2
}
@@ -628,7 +628,7 @@ define <4 x i32> @vrshrQu32(<4 x i32>* %A) nounwind {
define <2 x i64> @vrshrQu64(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vrshrQu64:
;CHECK: vrshr.u64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
ret <2 x i64> %tmp2
}
diff --git a/test/CodeGen/ARM/vshll.ll b/test/CodeGen/ARM/vshll.ll
index 27873eb72753..a8230134d91f 100644
--- a/test/CodeGen/ARM/vshll.ll
+++ b/test/CodeGen/ARM/vshll.ll
@@ -3,7 +3,7 @@
define <8 x i16> @vshlls8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vshlls8:
;CHECK: vshll.s8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%sext = sext <8 x i8> %tmp1 to <8 x i16>
%shift = shl <8 x i16> %sext, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
ret <8 x i16> %shift
@@ -12,7 +12,7 @@ define <8 x i16> @vshlls8(<8 x i8>* %A) nounwind {
define <4 x i32> @vshlls16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vshlls16:
;CHECK: vshll.s16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%sext = sext <4 x i16> %tmp1 to <4 x i32>
%shift = shl <4 x i32> %sext, <i32 15, i32 15, i32 15, i32 15>
ret <4 x i32> %shift
@@ -21,7 +21,7 @@ define <4 x i32> @vshlls16(<4 x i16>* %A) nounwind {
define <2 x i64> @vshlls32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vshlls32:
;CHECK: vshll.s32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%sext = sext <2 x i32> %tmp1 to <2 x i64>
%shift = shl <2 x i64> %sext, <i64 31, i64 31>
ret <2 x i64> %shift
@@ -30,7 +30,7 @@ define <2 x i64> @vshlls32(<2 x i32>* %A) nounwind {
define <8 x i16> @vshllu8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vshllu8:
;CHECK: vshll.u8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%zext = zext <8 x i8> %tmp1 to <8 x i16>
%shift = shl <8 x i16> %zext, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
ret <8 x i16> %shift
@@ -39,7 +39,7 @@ define <8 x i16> @vshllu8(<8 x i8>* %A) nounwind {
define <4 x i32> @vshllu16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vshllu16:
;CHECK: vshll.u16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%zext = zext <4 x i16> %tmp1 to <4 x i32>
%shift = shl <4 x i32> %zext, <i32 15, i32 15, i32 15, i32 15>
ret <4 x i32> %shift
@@ -48,7 +48,7 @@ define <4 x i32> @vshllu16(<4 x i16>* %A) nounwind {
define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vshllu32:
;CHECK: vshll.u32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%zext = zext <2 x i32> %tmp1 to <2 x i64>
%shift = shl <2 x i64> %zext, <i64 31, i64 31>
ret <2 x i64> %shift
@@ -59,7 +59,7 @@ define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind {
define <8 x i16> @vshlli8(<8 x i8>* %A) nounwind {
;CHECK-LABEL: vshlli8:
;CHECK: vshll.i8
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%sext = sext <8 x i8> %tmp1 to <8 x i16>
%shift = shl <8 x i16> %sext, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
ret <8 x i16> %shift
@@ -68,7 +68,7 @@ define <8 x i16> @vshlli8(<8 x i8>* %A) nounwind {
define <4 x i32> @vshlli16(<4 x i16>* %A) nounwind {
;CHECK-LABEL: vshlli16:
;CHECK: vshll.i16
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%zext = zext <4 x i16> %tmp1 to <4 x i32>
%shift = shl <4 x i32> %zext, <i32 16, i32 16, i32 16, i32 16>
ret <4 x i32> %shift
@@ -77,7 +77,7 @@ define <4 x i32> @vshlli16(<4 x i16>* %A) nounwind {
define <2 x i64> @vshlli32(<2 x i32>* %A) nounwind {
;CHECK-LABEL: vshlli32:
;CHECK: vshll.i32
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%zext = zext <2 x i32> %tmp1 to <2 x i64>
%shift = shl <2 x i64> %zext, <i64 32, i64 32>
ret <2 x i64> %shift
@@ -89,7 +89,7 @@ define <8 x i16> @vshllu8_bad(<8 x i8>* %A) nounwind {
; CHECK-LABEL: vshllu8_bad:
; CHECK: vmovl.u8
; CHECK: vshl.i16
- %tmp1 = load <8 x i8>* %A
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
%zext = zext <8 x i8> %tmp1 to <8 x i16>
%shift = shl <8 x i16> %zext, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
ret <8 x i16> %shift
@@ -99,7 +99,7 @@ define <4 x i32> @vshlls16_bad(<4 x i16>* %A) nounwind {
; CHECK-LABEL: vshlls16_bad:
; CHECK: vmovl.s16
; CHECK: vshl.i32
- %tmp1 = load <4 x i16>* %A
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
%sext = sext <4 x i16> %tmp1 to <4 x i32>
%shift = shl <4 x i32> %sext, <i32 17, i32 17, i32 17, i32 17>
ret <4 x i32> %shift
@@ -109,7 +109,7 @@ define <2 x i64> @vshllu32_bad(<2 x i32>* %A) nounwind {
; CHECK-LABEL: vshllu32_bad:
; CHECK: vmovl.u32
; CHECK: vshl.i64
- %tmp1 = load <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
%zext = zext <2 x i32> %tmp1 to <2 x i64>
%shift = shl <2 x i64> %zext, <i64 33, i64 33>
ret <2 x i64> %shift
diff --git a/test/CodeGen/ARM/vshrn.ll b/test/CodeGen/ARM/vshrn.ll
index 8aa009ab823e..e033486562c6 100644
--- a/test/CodeGen/ARM/vshrn.ll
+++ b/test/CodeGen/ARM/vshrn.ll
@@ -3,7 +3,7 @@
define <8 x i8> @vshrns8(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vshrns8:
;CHECK: vshrn.i16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = lshr <8 x i16> %tmp1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
ret <8 x i8> %tmp3
@@ -12,7 +12,7 @@ define <8 x i8> @vshrns8(<8 x i16>* %A) nounwind {
define <4 x i16> @vshrns16(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vshrns16:
;CHECK: vshrn.i32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = ashr <4 x i32> %tmp1, <i32 16, i32 16, i32 16, i32 16>
%tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
ret <4 x i16> %tmp3
@@ -21,7 +21,7 @@ define <4 x i16> @vshrns16(<4 x i32>* %A) nounwind {
define <2 x i32> @vshrns32(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vshrns32:
;CHECK: vshrn.i64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = ashr <2 x i64> %tmp1, <i64 32, i64 32>
%tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
ret <2 x i32> %tmp3
@@ -31,7 +31,7 @@ define <8 x i8> @vshrns8_bad(<8 x i16>* %A) nounwind {
; CHECK-LABEL: vshrns8_bad:
; CHECK: vshr.s16
; CHECK: vmovn.i16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = ashr <8 x i16> %tmp1, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
%tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
ret <8 x i8> %tmp3
@@ -41,7 +41,7 @@ define <4 x i16> @vshrns16_bad(<4 x i32>* %A) nounwind {
; CHECK-LABEL: vshrns16_bad:
; CHECK: vshr.u32
; CHECK: vmovn.i32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = lshr <4 x i32> %tmp1, <i32 17, i32 17, i32 17, i32 17>
%tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
ret <4 x i16> %tmp3
@@ -51,7 +51,7 @@ define <2 x i32> @vshrns32_bad(<2 x i64>* %A) nounwind {
; CHECK-LABEL: vshrns32_bad:
; CHECK: vshr.u64
; CHECK: vmovn.i64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = lshr <2 x i64> %tmp1, <i64 33, i64 33>
%tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
ret <2 x i32> %tmp3
@@ -60,7 +60,7 @@ define <2 x i32> @vshrns32_bad(<2 x i64>* %A) nounwind {
define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind {
;CHECK-LABEL: vrshrns8:
;CHECK: vrshrn.i16
- %tmp1 = load <8 x i16>* %A
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
ret <8 x i8> %tmp2
}
@@ -68,7 +68,7 @@ define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind {
define <4 x i16> @vrshrns16(<4 x i32>* %A) nounwind {
;CHECK-LABEL: vrshrns16:
;CHECK: vrshrn.i32
- %tmp1 = load <4 x i32>* %A
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
ret <4 x i16> %tmp2
}
@@ -76,7 +76,7 @@ define <4 x i16> @vrshrns16(<4 x i32>* %A) nounwind {
define <2 x i32> @vrshrns32(<2 x i64>* %A) nounwind {
;CHECK-LABEL: vrshrns32:
;CHECK: vrshrn.i64
- %tmp1 = load <2 x i64>* %A
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
ret <2 x i32> %tmp2
}
diff --git a/test/CodeGen/ARM/vsra.ll b/test/CodeGen/ARM/vsra.ll
index fa5985a330c4..cb758fa2f386 100644
--- a/test/CodeGen/ARM/vsra.ll
+++ b/test/CodeGen/ARM/vsra.ll
@@ -3,8 +3,8 @@
define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vsras8:
;CHECK: vsra.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = ashr <8 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
%tmp4 = add <8 x i8> %tmp1, %tmp3
ret <8 x i8> %tmp4
@@ -13,8 +13,8 @@ define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vsras16:
;CHECK: vsra.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = ashr <4 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15 >
%tmp4 = add <4 x i16> %tmp1, %tmp3
ret <4 x i16> %tmp4
@@ -23,8 +23,8 @@ define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vsras32:
;CHECK: vsra.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = ashr <2 x i32> %tmp2, < i32 31, i32 31 >
%tmp4 = add <2 x i32> %tmp1, %tmp3
ret <2 x i32> %tmp4
@@ -33,8 +33,8 @@ define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vsras64:
;CHECK: vsra.s64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = ashr <1 x i64> %tmp2, < i64 63 >
%tmp4 = add <1 x i64> %tmp1, %tmp3
ret <1 x i64> %tmp4
@@ -43,8 +43,8 @@ define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vsraQs8:
;CHECK: vsra.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = ashr <16 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
%tmp4 = add <16 x i8> %tmp1, %tmp3
ret <16 x i8> %tmp4
@@ -53,8 +53,8 @@ define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vsraQs16:
;CHECK: vsra.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = ashr <8 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
%tmp4 = add <8 x i16> %tmp1, %tmp3
ret <8 x i16> %tmp4
@@ -63,8 +63,8 @@ define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vsraQs32:
;CHECK: vsra.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = ashr <4 x i32> %tmp2, < i32 31, i32 31, i32 31, i32 31 >
%tmp4 = add <4 x i32> %tmp1, %tmp3
ret <4 x i32> %tmp4
@@ -73,8 +73,8 @@ define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vsraQs64:
;CHECK: vsra.s64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = ashr <2 x i64> %tmp2, < i64 63, i64 63 >
%tmp4 = add <2 x i64> %tmp1, %tmp3
ret <2 x i64> %tmp4
@@ -83,8 +83,8 @@ define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vsrau8:
;CHECK: vsra.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = lshr <8 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
%tmp4 = add <8 x i8> %tmp1, %tmp3
ret <8 x i8> %tmp4
@@ -93,8 +93,8 @@ define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vsrau16:
;CHECK: vsra.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = lshr <4 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15 >
%tmp4 = add <4 x i16> %tmp1, %tmp3
ret <4 x i16> %tmp4
@@ -103,8 +103,8 @@ define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vsrau32:
;CHECK: vsra.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = lshr <2 x i32> %tmp2, < i32 31, i32 31 >
%tmp4 = add <2 x i32> %tmp1, %tmp3
ret <2 x i32> %tmp4
@@ -113,8 +113,8 @@ define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vsrau64:
;CHECK: vsra.u64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = lshr <1 x i64> %tmp2, < i64 63 >
%tmp4 = add <1 x i64> %tmp1, %tmp3
ret <1 x i64> %tmp4
@@ -123,8 +123,8 @@ define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vsraQu8:
;CHECK: vsra.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = lshr <16 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
%tmp4 = add <16 x i8> %tmp1, %tmp3
ret <16 x i8> %tmp4
@@ -133,8 +133,8 @@ define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vsraQu16:
;CHECK: vsra.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = lshr <8 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
%tmp4 = add <8 x i16> %tmp1, %tmp3
ret <8 x i16> %tmp4
@@ -143,8 +143,8 @@ define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vsraQu32:
;CHECK: vsra.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = lshr <4 x i32> %tmp2, < i32 31, i32 31, i32 31, i32 31 >
%tmp4 = add <4 x i32> %tmp1, %tmp3
ret <4 x i32> %tmp4
@@ -153,8 +153,8 @@ define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vsraQu64:
;CHECK: vsra.u64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = lshr <2 x i64> %tmp2, < i64 63, i64 63 >
%tmp4 = add <2 x i64> %tmp1, %tmp3
ret <2 x i64> %tmp4
@@ -163,8 +163,8 @@ define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vrsras8:
;CHECK: vrsra.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
%tmp4 = add <8 x i8> %tmp1, %tmp3
ret <8 x i8> %tmp4
@@ -173,8 +173,8 @@ define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vrsras16:
;CHECK: vrsra.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
%tmp4 = add <4 x i16> %tmp1, %tmp3
ret <4 x i16> %tmp4
@@ -183,8 +183,8 @@ define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vrsras32:
;CHECK: vrsra.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
%tmp4 = add <2 x i32> %tmp1, %tmp3
ret <2 x i32> %tmp4
@@ -193,8 +193,8 @@ define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vrsras64:
;CHECK: vrsra.s64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >)
%tmp4 = add <1 x i64> %tmp1, %tmp3
ret <1 x i64> %tmp4
@@ -203,8 +203,8 @@ define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vrsrau8:
;CHECK: vrsra.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
%tmp4 = add <8 x i8> %tmp1, %tmp3
ret <8 x i8> %tmp4
@@ -213,8 +213,8 @@ define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vrsrau16:
;CHECK: vrsra.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
%tmp4 = add <4 x i16> %tmp1, %tmp3
ret <4 x i16> %tmp4
@@ -223,8 +223,8 @@ define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vrsrau32:
;CHECK: vrsra.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
%tmp4 = add <2 x i32> %tmp1, %tmp3
ret <2 x i32> %tmp4
@@ -233,8 +233,8 @@ define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vrsrau64:
;CHECK: vrsra.u64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >)
%tmp4 = add <1 x i64> %tmp1, %tmp3
ret <1 x i64> %tmp4
@@ -243,8 +243,8 @@ define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vrsraQs8:
;CHECK: vrsra.s8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
%tmp4 = add <16 x i8> %tmp1, %tmp3
ret <16 x i8> %tmp4
@@ -253,8 +253,8 @@ define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vrsraQs16:
;CHECK: vrsra.s16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
%tmp4 = add <8 x i16> %tmp1, %tmp3
ret <8 x i16> %tmp4
@@ -263,8 +263,8 @@ define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vrsraQs32:
;CHECK: vrsra.s32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
%tmp4 = add <4 x i32> %tmp1, %tmp3
ret <4 x i32> %tmp4
@@ -273,8 +273,8 @@ define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vrsraQs64:
;CHECK: vrsra.s64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
%tmp4 = add <2 x i64> %tmp1, %tmp3
ret <2 x i64> %tmp4
@@ -283,8 +283,8 @@ define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vrsraQu8:
;CHECK: vrsra.u8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
%tmp4 = add <16 x i8> %tmp1, %tmp3
ret <16 x i8> %tmp4
@@ -293,8 +293,8 @@ define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vrsraQu16:
;CHECK: vrsra.u16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
%tmp4 = add <8 x i16> %tmp1, %tmp3
ret <8 x i16> %tmp4
@@ -303,8 +303,8 @@ define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vrsraQu32:
;CHECK: vrsra.u32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
%tmp4 = add <4 x i32> %tmp1, %tmp3
ret <4 x i32> %tmp4
@@ -313,8 +313,8 @@ define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vrsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vrsraQu64:
;CHECK: vrsra.u64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
%tmp4 = add <2 x i64> %tmp1, %tmp3
ret <2 x i64> %tmp4
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll
index a6bcf7d8ead3..f605fa4d6003 100644
--- a/test/CodeGen/ARM/vst1.ll
+++ b/test/CodeGen/ARM/vst1.ll
@@ -4,7 +4,7 @@ define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vst1i8:
;Check the alignment value. Max for this instruction is 64 bits:
;CHECK: vst1.8 {d16}, [r0:64]
- %tmp1 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %B
call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 16)
ret void
}
@@ -13,7 +13,7 @@ define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vst1i16:
;CHECK: vst1.16
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %B
call void @llvm.arm.neon.vst1.v4i16(i8* %tmp0, <4 x i16> %tmp1, i32 1)
ret void
}
@@ -22,7 +22,7 @@ define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vst1i32:
;CHECK: vst1.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %B
call void @llvm.arm.neon.vst1.v2i32(i8* %tmp0, <2 x i32> %tmp1, i32 1)
ret void
}
@@ -31,7 +31,7 @@ define void @vst1f(float* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vst1f:
;CHECK: vst1.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %B
call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
ret void
}
@@ -40,11 +40,11 @@ define void @vst1f(float* %A, <2 x float>* %B) nounwind {
define void @vst1f_update(float** %ptr, <2 x float>* %B) nounwind {
;CHECK-LABEL: vst1f_update:
;CHECK: vst1.32 {d16}, [r1]!
- %A = load float** %ptr
+ %A = load float*, float** %ptr
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %B
call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
- %tmp2 = getelementptr float* %A, i32 2
+ %tmp2 = getelementptr float, float* %A, i32 2
store float* %tmp2, float** %ptr
ret void
}
@@ -53,7 +53,7 @@ define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vst1i64:
;CHECK: vst1.64
%tmp0 = bitcast i64* %A to i8*
- %tmp1 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %B
call void @llvm.arm.neon.vst1.v1i64(i8* %tmp0, <1 x i64> %tmp1, i32 1)
ret void
}
@@ -62,7 +62,7 @@ define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vst1Qi8:
;Check the alignment value. Max for this instruction is 128 bits:
;CHECK: vst1.8 {d16, d17}, [r0:64]
- %tmp1 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %B
call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 8)
ret void
}
@@ -72,7 +72,7 @@ define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
;Check the alignment value. Max for this instruction is 128 bits:
;CHECK: vst1.16 {d16, d17}, [r0:128]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %B
call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32)
ret void
}
@@ -81,11 +81,11 @@ define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
define void @vst1Qi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
;CHECK-LABEL: vst1Qi16_update:
;CHECK: vst1.16 {d16, d17}, [r1:64], r2
- %A = load i16** %ptr
+ %A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %B
call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 8)
- %tmp2 = getelementptr i16* %A, i32 %inc
+ %tmp2 = getelementptr i16, i16* %A, i32 %inc
store i16* %tmp2, i16** %ptr
ret void
}
@@ -94,7 +94,7 @@ define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vst1Qi32:
;CHECK: vst1.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %B
call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1, i32 1)
ret void
}
@@ -103,7 +103,7 @@ define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vst1Qf:
;CHECK: vst1.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %B
call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1, i32 1)
ret void
}
@@ -112,7 +112,7 @@ define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vst1Qi64:
;CHECK: vst1.64
%tmp0 = bitcast i64* %A to i8*
- %tmp1 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %B
call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1, i32 1)
ret void
}
@@ -121,7 +121,7 @@ define void @vst1Qf64(double* %A, <2 x double>* %B) nounwind {
;CHECK-LABEL: vst1Qf64:
;CHECK: vst1.64
%tmp0 = bitcast double* %A to i8*
- %tmp1 = load <2 x double>* %B
+ %tmp1 = load <2 x double>, <2 x double>* %B
call void @llvm.arm.neon.vst1.v2f64(i8* %tmp0, <2 x double> %tmp1, i32 1)
ret void
}
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index 2180259d57f7..17c8a4bdad9b 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -4,7 +4,7 @@ define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vst2i8:
;Check the alignment value. Max for this instruction is 128 bits:
;CHECK: vst2.8 {d16, d17}, [r0:64]
- %tmp1 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %B
call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
ret void
}
@@ -13,10 +13,10 @@ define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
define void @vst2i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
;CHECK-LABEL: vst2i8_update:
;CHECK: vst2.8 {d16, d17}, [r1], r2
- %A = load i8** %ptr
- %tmp1 = load <8 x i8>* %B
+ %A = load i8*, i8** %ptr
+ %tmp1 = load <8 x i8>, <8 x i8>* %B
call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 4)
- %tmp2 = getelementptr i8* %A, i32 %inc
+ %tmp2 = getelementptr i8, i8* %A, i32 %inc
store i8* %tmp2, i8** %ptr
ret void
}
@@ -26,7 +26,7 @@ define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
;Check the alignment value. Max for this instruction is 128 bits:
;CHECK: vst2.16 {d16, d17}, [r0:128]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %B
call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 32)
ret void
}
@@ -35,7 +35,7 @@ define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vst2i32:
;CHECK: vst2.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %B
call void @llvm.arm.neon.vst2.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
ret void
}
@@ -44,7 +44,7 @@ define void @vst2f(float* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vst2f:
;CHECK: vst2.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %B
call void @llvm.arm.neon.vst2.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
ret void
}
@@ -54,7 +54,7 @@ define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
;Check the alignment value. Max for this instruction is 128 bits:
;CHECK: vst1.64 {d16, d17}, [r0:128]
%tmp0 = bitcast i64* %A to i8*
- %tmp1 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %B
call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 32)
ret void
}
@@ -63,11 +63,11 @@ define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
define void @vst2i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vst2i64_update:
;CHECK: vst1.64 {d16, d17}, [r1:64]!
- %A = load i64** %ptr
+ %A = load i64*, i64** %ptr
%tmp0 = bitcast i64* %A to i8*
- %tmp1 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %B
call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 8)
- %tmp2 = getelementptr i64* %A, i32 2
+ %tmp2 = getelementptr i64, i64* %A, i32 2
store i64* %tmp2, i64** %ptr
ret void
}
@@ -76,7 +76,7 @@ define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vst2Qi8:
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vst2.8 {d16, d17, d18, d19}, [r0:64]
- %tmp1 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %B
call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8)
ret void
}
@@ -86,7 +86,7 @@ define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vst2.16 {d16, d17, d18, d19}, [r0:128]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %B
call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16)
ret void
}
@@ -96,7 +96,7 @@ define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vst2.32 {d16, d17, d18, d19}, [r0:256]
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %B
call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64)
ret void
}
@@ -105,7 +105,7 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vst2Qf:
;CHECK: vst2.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %B
call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
ret void
}
@@ -113,18 +113,18 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
define i8* @vst2update(i8* %out, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vst2update:
;CHECK: vst2.16 {d16, d17}, [r0]!
- %tmp1 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %B
tail call void @llvm.arm.neon.vst2.v4i16(i8* %out, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 2)
- %t5 = getelementptr inbounds i8* %out, i32 16
+ %t5 = getelementptr inbounds i8, i8* %out, i32 16
ret i8* %t5
}
define i8* @vst2update2(i8 * %out, <4 x float> * %this) nounwind optsize ssp align 2 {
;CHECK-LABEL: vst2update2:
;CHECK: vst2.32 {d16, d17, d18, d19}, [r0]!
- %tmp1 = load <4 x float>* %this
+ %tmp1 = load <4 x float>, <4 x float>* %this
call void @llvm.arm.neon.vst2.v4f32(i8* %out, <4 x float> %tmp1, <4 x float> %tmp1, i32 4) nounwind
- %tmp2 = getelementptr inbounds i8* %out, i32 32
+ %tmp2 = getelementptr inbounds i8, i8* %out, i32 32
ret i8* %tmp2
}
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
index 5f150edf31da..691ee3bd28f3 100644
--- a/test/CodeGen/ARM/vst3.ll
+++ b/test/CodeGen/ARM/vst3.ll
@@ -5,7 +5,7 @@ define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
;Check the alignment value. Max for this instruction is 64 bits:
;This test runs at -O0 so do not check for specific register numbers.
;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
- %tmp1 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %B
call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 32)
ret void
}
@@ -14,7 +14,7 @@ define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vst3i16:
;CHECK: vst3.16
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %B
call void @llvm.arm.neon.vst3.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
ret void
}
@@ -23,7 +23,7 @@ define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vst3i32:
;CHECK: vst3.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %B
call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
ret void
}
@@ -32,11 +32,11 @@ define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
define void @vst3i32_update(i32** %ptr, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vst3i32_update:
;CHECK: vst3.32 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
- %A = load i32** %ptr
+ %A = load i32*, i32** %ptr
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %B
call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
- %tmp2 = getelementptr i32* %A, i32 6
+ %tmp2 = getelementptr i32, i32* %A, i32 6
store i32* %tmp2, i32** %ptr
ret void
}
@@ -45,7 +45,7 @@ define void @vst3f(float* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vst3f:
;CHECK: vst3.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %B
call void @llvm.arm.neon.vst3.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
ret void
}
@@ -56,7 +56,7 @@ define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
;This test runs at -O0 so do not check for specific register numbers.
;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
%tmp0 = bitcast i64* %A to i8*
- %tmp1 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %B
call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 16)
ret void
}
@@ -64,11 +64,11 @@ define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
define void @vst3i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vst3i64_update
;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
- %A = load i64** %ptr
+ %A = load i64*, i64** %ptr
%tmp0 = bitcast i64* %A to i8*
- %tmp1 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %B
call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
- %tmp2 = getelementptr i64* %A, i32 3
+ %tmp2 = getelementptr i64, i64* %A, i32 3
store i64* %tmp2, i64** %ptr
ret void
}
@@ -79,7 +79,7 @@ define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
;This test runs at -O0 so do not check for specific register numbers.
;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]!
;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
- %tmp1 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %B
call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 32)
ret void
}
@@ -89,7 +89,7 @@ define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vst3.16
;CHECK: vst3.16
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %B
call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
ret void
}
@@ -99,11 +99,11 @@ define void @vst3Qi16_update(i16** %ptr, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vst3Qi16_update:
;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
- %A = load i16** %ptr
+ %A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %B
call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
- %tmp2 = getelementptr i16* %A, i32 24
+ %tmp2 = getelementptr i16, i16* %A, i32 24
store i16* %tmp2, i16** %ptr
ret void
}
@@ -113,7 +113,7 @@ define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst3.32
;CHECK: vst3.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %B
call void @llvm.arm.neon.vst3.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
ret void
}
@@ -123,7 +123,7 @@ define void @vst3Qf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vst3.32
;CHECK: vst3.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %B
call void @llvm.arm.neon.vst3.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
ret void
}
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll
index 44c76b5ed189..c343c6c86959 100644
--- a/test/CodeGen/ARM/vst4.ll
+++ b/test/CodeGen/ARM/vst4.ll
@@ -4,7 +4,7 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vst4i8:
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vst4.8 {d16, d17, d18, d19}, [r0:64]
- %tmp1 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %B
call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
ret void
}
@@ -13,10 +13,10 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
;CHECK-LABEL: vst4i8_update:
;CHECK: vst4.8 {d16, d17, d18, d19}, [r1:128], r2
- %A = load i8** %ptr
- %tmp1 = load <8 x i8>* %B
+ %A = load i8*, i8** %ptr
+ %tmp1 = load <8 x i8>, <8 x i8>* %B
call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16)
- %tmp2 = getelementptr i8* %A, i32 %inc
+ %tmp2 = getelementptr i8, i8* %A, i32 %inc
store i8* %tmp2, i8** %ptr
ret void
}
@@ -26,7 +26,7 @@ define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vst4.16 {d16, d17, d18, d19}, [r0:128]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %B
call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16)
ret void
}
@@ -36,7 +36,7 @@ define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vst4.32 {d16, d17, d18, d19}, [r0:256]
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %B
call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32)
ret void
}
@@ -45,7 +45,7 @@ define void @vst4f(float* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vst4f:
;CHECK: vst4.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %B
call void @llvm.arm.neon.vst4.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
ret void
}
@@ -55,7 +55,7 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vst1.64 {d16, d17, d18, d19}, [r0:256]
%tmp0 = bitcast i64* %A to i8*
- %tmp1 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %B
call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 64)
ret void
}
@@ -63,11 +63,11 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
define void @vst4i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vst4i64_update:
;CHECK: vst1.64 {d16, d17, d18, d19}, [r1]!
- %A = load i64** %ptr
+ %A = load i64*, i64** %ptr
%tmp0 = bitcast i64* %A to i8*
- %tmp1 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %B
call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
- %tmp2 = getelementptr i64* %A, i32 4
+ %tmp2 = getelementptr i64, i64* %A, i32 4
store i64* %tmp2, i64** %ptr
ret void
}
@@ -77,7 +77,7 @@ define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
;Check the alignment value. Max for this instruction is 256 bits:
;CHECK: vst4.8 {d16, d18, d20, d22}, [r0:256]!
;CHECK: vst4.8 {d17, d19, d21, d23}, [r0:256]
- %tmp1 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %B
call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 64)
ret void
}
@@ -88,7 +88,7 @@ define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vst4.16 {d16, d18, d20, d22}, [r0]!
;CHECK: vst4.16 {d17, d19, d21, d23}, [r0]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %B
call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
ret void
}
@@ -98,7 +98,7 @@ define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst4.32
;CHECK: vst4.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %B
call void @llvm.arm.neon.vst4.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
ret void
}
@@ -108,7 +108,7 @@ define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vst4.32
;CHECK: vst4.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %B
call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
ret void
}
@@ -118,11 +118,11 @@ define void @vst4Qf_update(float** %ptr, <4 x float>* %B) nounwind {
;CHECK-LABEL: vst4Qf_update:
;CHECK: vst4.32 {d16, d18, d20, d22}, [r1]!
;CHECK: vst4.32 {d17, d19, d21, d23}, [r1]!
- %A = load float** %ptr
+ %A = load float*, float** %ptr
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %B
call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
- %tmp2 = getelementptr float* %A, i32 16
+ %tmp2 = getelementptr float, float* %A, i32 16
store float* %tmp2, float** %ptr
ret void
}
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
index 7dd6e7b439ea..a4575417bce5 100644
--- a/test/CodeGen/ARM/vstlane.ll
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -4,7 +4,7 @@ define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vst1lanei8:
;Check the (default) alignment.
;CHECK: vst1.8 {d16[3]}, [r0]
- %tmp1 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %B
%tmp2 = extractelement <8 x i8> %tmp1, i32 3
store i8 %tmp2, i8* %A, align 8
ret void
@@ -14,11 +14,11 @@ define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
define void @vst1lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vst1lanei8_update:
;CHECK: vst1.8 {d16[3]}, [{{r[0-9]}}]!
- %A = load i8** %ptr
- %tmp1 = load <8 x i8>* %B
+ %A = load i8*, i8** %ptr
+ %tmp1 = load <8 x i8>, <8 x i8>* %B
%tmp2 = extractelement <8 x i8> %tmp1, i32 3
store i8 %tmp2, i8* %A, align 8
- %tmp3 = getelementptr i8* %A, i32 1
+ %tmp3 = getelementptr i8, i8* %A, i32 1
store i8* %tmp3, i8** %ptr
ret void
}
@@ -27,7 +27,7 @@ define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vst1lanei16:
;Check the alignment value. Max for this instruction is 16 bits:
;CHECK: vst1.16 {d16[2]}, [r0:16]
- %tmp1 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %B
%tmp2 = extractelement <4 x i16> %tmp1, i32 2
store i16 %tmp2, i16* %A, align 8
ret void
@@ -37,7 +37,7 @@ define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vst1lanei32:
;Check the alignment value. Max for this instruction is 32 bits:
;CHECK: vst1.32 {d16[1]}, [r0:32]
- %tmp1 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %B
%tmp2 = extractelement <2 x i32> %tmp1, i32 1
store i32 %tmp2, i32* %A, align 8
ret void
@@ -46,7 +46,7 @@ define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vst1lanef:
;CHECK: vst1.32 {d16[1]}, [r0:32]
- %tmp1 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %B
%tmp2 = extractelement <2 x float> %tmp1, i32 1
store float %tmp2, float* %A
ret void
@@ -56,7 +56,7 @@ define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vst1laneQi8:
; // Can use scalar load. No need to use vectors.
; // CHE-CK: vst1.8 {d17[1]}, [r0]
- %tmp1 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %B
%tmp2 = extractelement <16 x i8> %tmp1, i32 9
store i8 %tmp2, i8* %A, align 8
ret void
@@ -65,7 +65,7 @@ define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vst1laneQi16:
;CHECK: vst1.16 {d17[1]}, [r0:16]
- %tmp1 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %B
%tmp2 = extractelement <8 x i16> %tmp1, i32 5
store i16 %tmp2, i16* %A, align 8
ret void
@@ -75,7 +75,7 @@ define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vst1laneQi32:
; // Can use scalar load. No need to use vectors.
; // CHE-CK: vst1.32 {d17[1]}, [r0:32]
- %tmp1 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %B
%tmp2 = extractelement <4 x i32> %tmp1, i32 3
store i32 %tmp2, i32* %A, align 8
ret void
@@ -86,11 +86,11 @@ define void @vst1laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vst1laneQi32_update:
; // Can use scalar load. No need to use vectors.
; // CHE-CK: vst1.32 {d17[1]}, [r1:32]!
- %A = load i32** %ptr
- %tmp1 = load <4 x i32>* %B
+ %A = load i32*, i32** %ptr
+ %tmp1 = load <4 x i32>, <4 x i32>* %B
%tmp2 = extractelement <4 x i32> %tmp1, i32 3
store i32 %tmp2, i32* %A, align 8
- %tmp3 = getelementptr i32* %A, i32 1
+ %tmp3 = getelementptr i32, i32* %A, i32 1
store i32* %tmp3, i32** %ptr
ret void
}
@@ -99,7 +99,7 @@ define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vst1laneQf:
; // Can use scalar load. No need to use vectors.
; // CHE-CK: vst1.32 {d17[1]}, [r0]
- %tmp1 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %B
%tmp2 = extractelement <4 x float> %tmp1, i32 3
store float %tmp2, float* %A
ret void
@@ -109,7 +109,7 @@ define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vst2lanei8:
;Check the alignment value. Max for this instruction is 16 bits:
;CHECK: vst2.8 {d16[1], d17[1]}, [r0:16]
- %tmp1 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %B
call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
ret void
}
@@ -119,7 +119,7 @@ define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
;Check the alignment value. Max for this instruction is 32 bits:
;CHECK: vst2.16 {d16[1], d17[1]}, [r0:32]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %B
call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
ret void
}
@@ -128,11 +128,11 @@ define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
define void @vst2lanei16_update(i16** %ptr, <4 x i16>* %B, i32 %inc) nounwind {
;CHECK-LABEL: vst2lanei16_update:
;CHECK: vst2.16 {d16[1], d17[1]}, [r1], r2
- %A = load i16** %ptr
+ %A = load i16*, i16** %ptr
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %B
call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 2)
- %tmp2 = getelementptr i16* %A, i32 %inc
+ %tmp2 = getelementptr i16, i16* %A, i32 %inc
store i16* %tmp2, i16** %ptr
ret void
}
@@ -141,7 +141,7 @@ define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vst2lanei32:
;CHECK: vst2.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %B
call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
ret void
}
@@ -150,7 +150,7 @@ define void @vst2lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vst2lanef:
;CHECK: vst2.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %B
call void @llvm.arm.neon.vst2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
ret void
}
@@ -160,7 +160,7 @@ define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;Check the (default) alignment.
;CHECK: vst2.16 {d17[1], d19[1]}, [r0]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %B
call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
ret void
}
@@ -170,7 +170,7 @@ define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;Check the alignment value. Max for this instruction is 64 bits:
;CHECK: vst2.32 {d17[0], d19[0]}, [r0:64]
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %B
call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
ret void
}
@@ -179,7 +179,7 @@ define void @vst2laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vst2laneQf:
;CHECK: vst2.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %B
call void @llvm.arm.neon.vst2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3, i32 1)
ret void
}
@@ -196,7 +196,7 @@ declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i
define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vst3lanei8:
;CHECK: vst3.8
- %tmp1 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %B
call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
ret void
}
@@ -206,7 +206,7 @@ define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind {
;Check the (default) alignment value. VST3 does not support alignment.
;CHECK: vst3.16 {d16[1], d17[1], d18[1]}, [r0]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %B
call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
ret void
}
@@ -215,7 +215,7 @@ define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vst3lanei32:
;CHECK: vst3.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %B
call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
ret void
}
@@ -224,7 +224,7 @@ define void @vst3lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vst3lanef:
;CHECK: vst3.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %B
call void @llvm.arm.neon.vst3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
ret void
}
@@ -234,7 +234,7 @@ define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;Check the (default) alignment value. VST3 does not support alignment.
;CHECK: vst3.16 {d17[2], d19[2], d21[2]}, [r0]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %B
call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 8)
ret void
}
@@ -243,7 +243,7 @@ define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vst3laneQi32:
;CHECK: vst3.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %B
call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
ret void
}
@@ -252,11 +252,11 @@ define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
define void @vst3laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vst3laneQi32_update:
;CHECK: vst3.32 {d16[0], d18[0], d20[0]}, [r1]!
- %A = load i32** %ptr
+ %A = load i32*, i32** %ptr
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %B
call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
- %tmp2 = getelementptr i32* %A, i32 3
+ %tmp2 = getelementptr i32, i32* %A, i32 3
store i32* %tmp2, i32** %ptr
ret void
}
@@ -265,7 +265,7 @@ define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vst3laneQf:
;CHECK: vst3.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %B
call void @llvm.arm.neon.vst3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
ret void
}
@@ -284,7 +284,7 @@ define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vst4lanei8:
;Check the alignment value. Max for this instruction is 32 bits:
;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0:32]
- %tmp1 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %B
call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
ret void
}
@@ -293,10 +293,10 @@ define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
define void @vst4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vst4lanei8_update:
;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32]!
- %A = load i8** %ptr
- %tmp1 = load <8 x i8>* %B
+ %A = load i8*, i8** %ptr
+ %tmp1 = load <8 x i8>, <8 x i8>* %B
call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
- %tmp2 = getelementptr i8* %A, i32 4
+ %tmp2 = getelementptr i8, i8* %A, i32 4
store i8* %tmp2, i8** %ptr
ret void
}
@@ -305,7 +305,7 @@ define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vst4lanei16:
;CHECK: vst4.16
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %B
call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
ret void
}
@@ -315,7 +315,7 @@ define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
;Check the alignment value. Max for this instruction is 128 bits:
;CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0:128]
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %B
call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
ret void
}
@@ -324,7 +324,7 @@ define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vst4lanef:
;CHECK: vst4.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %B
call void @llvm.arm.neon.vst4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
ret void
}
@@ -334,7 +334,7 @@ define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;Check the alignment value. Max for this instruction is 64 bits:
;CHECK: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0:64]
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %B
call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16)
ret void
}
@@ -344,7 +344,7 @@ define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;Check the (default) alignment.
;CHECK: vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %B
call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
ret void
}
@@ -353,7 +353,7 @@ define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vst4laneQf:
;CHECK: vst4.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %B
call void @llvm.arm.neon.vst4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
ret void
}
diff --git a/test/CodeGen/ARM/vsub.ll b/test/CodeGen/ARM/vsub.ll
index d1a094b92755..75fb7d493a59 100644
--- a/test/CodeGen/ARM/vsub.ll
+++ b/test/CodeGen/ARM/vsub.ll
@@ -3,8 +3,8 @@
define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vsubi8:
;CHECK: vsub.i8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = sub <8 x i8> %tmp1, %tmp2
ret <8 x i8> %tmp3
}
@@ -12,8 +12,8 @@ define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vsubi16:
;CHECK: vsub.i16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = sub <4 x i16> %tmp1, %tmp2
ret <4 x i16> %tmp3
}
@@ -21,8 +21,8 @@ define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vsubi32:
;CHECK: vsub.i32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = sub <2 x i32> %tmp1, %tmp2
ret <2 x i32> %tmp3
}
@@ -30,8 +30,8 @@ define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
;CHECK-LABEL: vsubi64:
;CHECK: vsub.i64
- %tmp1 = load <1 x i64>* %A
- %tmp2 = load <1 x i64>* %B
+ %tmp1 = load <1 x i64>, <1 x i64>* %A
+ %tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = sub <1 x i64> %tmp1, %tmp2
ret <1 x i64> %tmp3
}
@@ -39,8 +39,8 @@ define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vsubf32:
;CHECK: vsub.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = fsub <2 x float> %tmp1, %tmp2
ret <2 x float> %tmp3
}
@@ -48,8 +48,8 @@ define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind {
define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vsubQi8:
;CHECK: vsub.i8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = sub <16 x i8> %tmp1, %tmp2
ret <16 x i8> %tmp3
}
@@ -57,8 +57,8 @@ define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vsubQi16:
;CHECK: vsub.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = sub <8 x i16> %tmp1, %tmp2
ret <8 x i16> %tmp3
}
@@ -66,8 +66,8 @@ define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vsubQi32:
;CHECK: vsub.i32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = sub <4 x i32> %tmp1, %tmp2
ret <4 x i32> %tmp3
}
@@ -75,8 +75,8 @@ define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vsubQi64:
;CHECK: vsub.i64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = sub <2 x i64> %tmp1, %tmp2
ret <2 x i64> %tmp3
}
@@ -84,8 +84,8 @@ define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
define <4 x float> @vsubQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vsubQf32:
;CHECK: vsub.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = fsub <4 x float> %tmp1, %tmp2
ret <4 x float> %tmp3
}
@@ -120,8 +120,8 @@ define <2 x i32> @vsubhni64_natural(<2 x i64> %A, <2 x i64> %B) nounwind {
define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vrsubhni16:
;CHECK: vrsubhn.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
ret <8 x i8> %tmp3
}
@@ -129,8 +129,8 @@ define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <4 x i16> @vrsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vrsubhni32:
;CHECK: vrsubhn.i32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
ret <4 x i16> %tmp3
}
@@ -138,8 +138,8 @@ define <4 x i16> @vrsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
define <2 x i32> @vrsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
;CHECK-LABEL: vrsubhni64:
;CHECK: vrsubhn.i64
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i64>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i64>, <2 x i64>* %B
%tmp3 = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
ret <2 x i32> %tmp3
}
@@ -151,8 +151,8 @@ declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind re
define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vsubls8:
;CHECK: vsubl.s8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
%tmp5 = sub <8 x i16> %tmp3, %tmp4
@@ -162,8 +162,8 @@ define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vsubls16:
;CHECK: vsubl.s16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
%tmp5 = sub <4 x i32> %tmp3, %tmp4
@@ -173,8 +173,8 @@ define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vsubls32:
;CHECK: vsubl.s32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
%tmp5 = sub <2 x i64> %tmp3, %tmp4
@@ -184,8 +184,8 @@ define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vsublu8:
;CHECK: vsubl.u8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
%tmp5 = sub <8 x i16> %tmp3, %tmp4
@@ -195,8 +195,8 @@ define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vsublu16:
;CHECK: vsubl.u16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
%tmp5 = sub <4 x i32> %tmp3, %tmp4
@@ -206,8 +206,8 @@ define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vsublu32:
;CHECK: vsubl.u32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
%tmp5 = sub <2 x i64> %tmp3, %tmp4
@@ -217,8 +217,8 @@ define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vsubws8:
;CHECK: vsubw.s8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
%tmp4 = sub <8 x i16> %tmp1, %tmp3
ret <8 x i16> %tmp4
@@ -227,8 +227,8 @@ define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vsubws16:
;CHECK: vsubw.s16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
%tmp4 = sub <4 x i32> %tmp1, %tmp3
ret <4 x i32> %tmp4
@@ -237,8 +237,8 @@ define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vsubws32:
;CHECK: vsubw.s32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
%tmp4 = sub <2 x i64> %tmp1, %tmp3
ret <2 x i64> %tmp4
@@ -247,8 +247,8 @@ define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vsubwu8:
;CHECK: vsubw.u8
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
%tmp4 = sub <8 x i16> %tmp1, %tmp3
ret <8 x i16> %tmp4
@@ -257,8 +257,8 @@ define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vsubwu16:
;CHECK: vsubw.u16
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
%tmp4 = sub <4 x i32> %tmp1, %tmp3
ret <4 x i32> %tmp4
@@ -267,8 +267,8 @@ define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vsubwu32:
;CHECK: vsubw.u32
- %tmp1 = load <2 x i64>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
%tmp4 = sub <2 x i64> %tmp1, %tmp3
ret <2 x i64> %tmp4
diff --git a/test/CodeGen/ARM/vtbl.ll b/test/CodeGen/ARM/vtbl.ll
index 32258a30da96..e4dd572a41b4 100644
--- a/test/CodeGen/ARM/vtbl.ll
+++ b/test/CodeGen/ARM/vtbl.ll
@@ -7,8 +7,8 @@
define <8 x i8> @vtbl1(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vtbl1:
;CHECK: vtbl.8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
@@ -16,8 +16,8 @@ define <8 x i8> @vtbl1(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <8 x i8> @vtbl2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B) nounwind {
;CHECK-LABEL: vtbl2:
;CHECK: vtbl.8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load %struct.__neon_int8x8x2_t* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load %struct.__neon_int8x8x2_t, %struct.__neon_int8x8x2_t* %B
%tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
%tmp5 = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4)
@@ -27,8 +27,8 @@ define <8 x i8> @vtbl2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B) nounwind {
define <8 x i8> @vtbl3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B) nounwind {
;CHECK-LABEL: vtbl3:
;CHECK: vtbl.8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load %struct.__neon_int8x8x3_t* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load %struct.__neon_int8x8x3_t, %struct.__neon_int8x8x3_t* %B
%tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
@@ -39,8 +39,8 @@ define <8 x i8> @vtbl3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B) nounwind {
define <8 x i8> @vtbl4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B) nounwind {
;CHECK-LABEL: vtbl4:
;CHECK: vtbl.8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load %struct.__neon_int8x8x4_t* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load %struct.__neon_int8x8x4_t, %struct.__neon_int8x8x4_t* %B
%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
@@ -52,9 +52,9 @@ define <8 x i8> @vtbl4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B) nounwind {
define <8 x i8> @vtbx1(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
;CHECK-LABEL: vtbx1:
;CHECK: vtbx.8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
- %tmp3 = load <8 x i8>* %C
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
+ %tmp3 = load <8 x i8>, <8 x i8>* %C
%tmp4 = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
ret <8 x i8> %tmp4
}
@@ -62,11 +62,11 @@ define <8 x i8> @vtbx1(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
define <8 x i8> @vtbx2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B, <8 x i8>* %C) nounwind {
;CHECK-LABEL: vtbx2:
;CHECK: vtbx.8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load %struct.__neon_int8x8x2_t* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load %struct.__neon_int8x8x2_t, %struct.__neon_int8x8x2_t* %B
%tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
- %tmp5 = load <8 x i8>* %C
+ %tmp5 = load <8 x i8>, <8 x i8>* %C
%tmp6 = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5)
ret <8 x i8> %tmp6
}
@@ -74,12 +74,12 @@ define <8 x i8> @vtbx2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B, <8 x i8>* %C
define <8 x i8> @vtbx3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B, <8 x i8>* %C) nounwind {
;CHECK-LABEL: vtbx3:
;CHECK: vtbx.8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load %struct.__neon_int8x8x3_t* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load %struct.__neon_int8x8x3_t, %struct.__neon_int8x8x3_t* %B
%tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
- %tmp6 = load <8 x i8>* %C
+ %tmp6 = load <8 x i8>, <8 x i8>* %C
%tmp7 = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6)
ret <8 x i8> %tmp7
}
@@ -87,13 +87,13 @@ define <8 x i8> @vtbx3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B, <8 x i8>* %C
define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind {
;CHECK-LABEL: vtbx4:
;CHECK: vtbx.8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load %struct.__neon_int8x8x4_t* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load %struct.__neon_int8x8x4_t, %struct.__neon_int8x8x4_t* %B
%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
%tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
- %tmp7 = load <8 x i8>* %C
+ %tmp7 = load <8 x i8>, <8 x i8>* %C
%tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7)
ret <8 x i8> %tmp8
}
diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll
index cdae7f8ec370..caa5becac1d9 100644
--- a/test/CodeGen/ARM/vtrn.ll
+++ b/test/CodeGen/ARM/vtrn.ll
@@ -4,8 +4,8 @@ define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vtrni8:
;CHECK: vtrn.8
;CHECK-NEXT: vadd.i8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
%tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -16,8 +16,8 @@ define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vtrni16:
;CHECK: vtrn.16
;CHECK-NEXT: vadd.i16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
%tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -28,8 +28,8 @@ define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK-LABEL: vtrni32:
;CHECK: vtrn.32
;CHECK-NEXT: vadd.i32
- %tmp1 = load <2 x i32>* %A
- %tmp2 = load <2 x i32>* %B
+ %tmp1 = load <2 x i32>, <2 x i32>* %A
+ %tmp2 = load <2 x i32>, <2 x i32>* %B
%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2>
%tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 3>
%tmp5 = add <2 x i32> %tmp3, %tmp4
@@ -40,8 +40,8 @@ define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
;CHECK-LABEL: vtrnf:
;CHECK: vtrn.32
;CHECK-NEXT: vadd.f32
- %tmp1 = load <2 x float>* %A
- %tmp2 = load <2 x float>* %B
+ %tmp1 = load <2 x float>, <2 x float>* %A
+ %tmp2 = load <2 x float>, <2 x float>* %B
%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2>
%tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3>
%tmp5 = fadd <2 x float> %tmp3, %tmp4
@@ -52,8 +52,8 @@ define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vtrnQi8:
;CHECK: vtrn.8
;CHECK-NEXT: vadd.i8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
%tmp5 = add <16 x i8> %tmp3, %tmp4
@@ -64,8 +64,8 @@ define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vtrnQi16:
;CHECK: vtrn.16
;CHECK-NEXT: vadd.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
%tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -76,8 +76,8 @@ define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vtrnQi32:
;CHECK: vtrn.32
;CHECK-NEXT: vadd.i32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
%tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -88,8 +88,8 @@ define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vtrnQf:
;CHECK: vtrn.32
;CHECK-NEXT: vadd.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
%tmp5 = fadd <4 x float> %tmp3, %tmp4
@@ -102,8 +102,8 @@ define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vtrni8_undef:
;CHECK: vtrn.8
;CHECK-NEXT: vadd.i8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14>
%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15>
%tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -114,8 +114,8 @@ define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vtrnQi16_undef:
;CHECK: vtrn.16
;CHECK-NEXT: vadd.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
%tmp5 = add <8 x i16> %tmp3, %tmp4
diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll
index 832be6c3daf1..7a7306a26593 100644
--- a/test/CodeGen/ARM/vuzp.ll
+++ b/test/CodeGen/ARM/vuzp.ll
@@ -4,8 +4,8 @@ define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vuzpi8:
;CHECK: vuzp.8
;CHECK-NEXT: vadd.i8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
%tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -16,8 +16,8 @@ define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vuzpi16:
;CHECK: vuzp.16
;CHECK-NEXT: vadd.i16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
%tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -30,8 +30,8 @@ define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vuzpQi8:
;CHECK: vuzp.8
;CHECK-NEXT: vadd.i8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
%tmp5 = add <16 x i8> %tmp3, %tmp4
@@ -42,8 +42,8 @@ define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vuzpQi16:
;CHECK: vuzp.16
;CHECK-NEXT: vadd.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
%tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -54,8 +54,8 @@ define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vuzpQi32:
;CHECK: vuzp.32
;CHECK-NEXT: vadd.i32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
%tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -66,8 +66,8 @@ define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vuzpQf:
;CHECK: vuzp.32
;CHECK-NEXT: vadd.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
%tmp5 = fadd <4 x float> %tmp3, %tmp4
@@ -80,8 +80,8 @@ define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vuzpi8_undef:
;CHECK: vuzp.8
;CHECK-NEXT: vadd.i8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14>
%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
%tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -92,8 +92,8 @@ define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vuzpQi16_undef:
;CHECK: vuzp.16
;CHECK-NEXT: vadd.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14>
%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
%tmp5 = add <8 x i16> %tmp3, %tmp4
diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll
index f74dc62599cf..a1b5b4549ac2 100644
--- a/test/CodeGen/ARM/vzip.ll
+++ b/test/CodeGen/ARM/vzip.ll
@@ -4,8 +4,8 @@ define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vzipi8:
;CHECK: vzip.8
;CHECK-NEXT: vadd.i8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
%tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -16,8 +16,8 @@ define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK-LABEL: vzipi16:
;CHECK: vzip.16
;CHECK-NEXT: vadd.i16
- %tmp1 = load <4 x i16>* %A
- %tmp2 = load <4 x i16>* %B
+ %tmp1 = load <4 x i16>, <4 x i16>* %A
+ %tmp2 = load <4 x i16>, <4 x i16>* %B
%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
%tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -30,8 +30,8 @@ define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vzipQi8:
;CHECK: vzip.8
;CHECK-NEXT: vadd.i8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
%tmp5 = add <16 x i8> %tmp3, %tmp4
@@ -42,8 +42,8 @@ define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: vzipQi16:
;CHECK: vzip.16
;CHECK-NEXT: vadd.i16
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
%tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -54,8 +54,8 @@ define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: vzipQi32:
;CHECK: vzip.32
;CHECK-NEXT: vadd.i32
- %tmp1 = load <4 x i32>* %A
- %tmp2 = load <4 x i32>* %B
+ %tmp1 = load <4 x i32>, <4 x i32>* %A
+ %tmp2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
%tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -66,8 +66,8 @@ define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
;CHECK-LABEL: vzipQf:
;CHECK: vzip.32
;CHECK-NEXT: vadd.f32
- %tmp1 = load <4 x float>* %A
- %tmp2 = load <4 x float>* %B
+ %tmp1 = load <4 x float>, <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %B
%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
%tmp5 = fadd <4 x float> %tmp3, %tmp4
@@ -80,8 +80,8 @@ define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK-LABEL: vzipi8_undef:
;CHECK: vzip.8
;CHECK-NEXT: vadd.i8
- %tmp1 = load <8 x i8>* %A
- %tmp2 = load <8 x i8>* %B
+ %tmp1 = load <8 x i8>, <8 x i8>* %A
+ %tmp2 = load <8 x i8>, <8 x i8>* %B
%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11>
%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
%tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -92,8 +92,8 @@ define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: vzipQi8_undef:
;CHECK: vzip.8
;CHECK-NEXT: vadd.i8
- %tmp1 = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp1 = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
%tmp5 = add <16 x i8> %tmp3, %tmp4
diff --git a/test/CodeGen/ARM/warn-stack.ll b/test/CodeGen/ARM/warn-stack.ll
index 90a3e1f798ed..474dc1dfb447 100644
--- a/test/CodeGen/ARM/warn-stack.ll
+++ b/test/CodeGen/ARM/warn-stack.ll
@@ -7,7 +7,7 @@
define void @nowarn() nounwind ssp {
entry:
%buffer = alloca [12 x i8], align 1
- %arraydecay = getelementptr inbounds [12 x i8]* %buffer, i64 0, i64 0
+ %arraydecay = getelementptr inbounds [12 x i8], [12 x i8]* %buffer, i64 0, i64 0
call void @doit(i8* %arraydecay) nounwind
ret void
}
@@ -16,7 +16,7 @@ entry:
define void @warn() nounwind ssp {
entry:
%buffer = alloca [80 x i8], align 1
- %arraydecay = getelementptr inbounds [80 x i8]* %buffer, i64 0, i64 0
+ %arraydecay = getelementptr inbounds [80 x i8], [80 x i8]* %buffer, i64 0, i64 0
call void @doit(i8* %arraydecay) nounwind
ret void
}
diff --git a/test/CodeGen/ARM/weak2.ll b/test/CodeGen/ARM/weak2.ll
index 82ab90efb118..a2911d780fef 100644
--- a/test/CodeGen/ARM/weak2.ll
+++ b/test/CodeGen/ARM/weak2.ll
@@ -8,7 +8,7 @@ entry:
br i1 %tmp5, label %UnifiedReturnBlock, label %cond_true8
cond_true8: ; preds = %entry
- %tmp10 = tail call i32 (...)* %t.0( ) ; <i32> [#uses=1]
+ %tmp10 = tail call i32 (...) %t.0( ) ; <i32> [#uses=1]
ret i32 %tmp10
UnifiedReturnBlock: ; preds = %entry
diff --git a/test/CodeGen/ARM/wrong-t2stmia-size-opt.ll b/test/CodeGen/ARM/wrong-t2stmia-size-opt.ll
index 7ecd25283108..4b274d2aedc2 100644
--- a/test/CodeGen/ARM/wrong-t2stmia-size-opt.ll
+++ b/test/CodeGen/ARM/wrong-t2stmia-size-opt.ll
@@ -7,11 +7,11 @@ declare i8* @llvm.returnaddress(i32)
define i32* @wrong-t2stmia-size-reduction(i32* %addr, i32 %val0) minsize {
store i32 %val0, i32* %addr
- %addr1 = getelementptr i32* %addr, i32 1
+ %addr1 = getelementptr i32, i32* %addr, i32 1
%lr = call i8* @llvm.returnaddress(i32 0)
%lr32 = ptrtoint i8* %lr to i32
store i32 %lr32, i32* %addr1
- %addr2 = getelementptr i32* %addr1, i32 1
+ %addr2 = getelementptr i32, i32* %addr1, i32 1
ret i32* %addr2
}
diff --git a/test/CodeGen/ARM/zextload_demandedbits.ll b/test/CodeGen/ARM/zextload_demandedbits.ll
index 6b6ce97ed9e2..ba7393c2dc20 100644
--- a/test/CodeGen/ARM/zextload_demandedbits.ll
+++ b/test/CodeGen/ARM/zextload_demandedbits.ll
@@ -14,8 +14,8 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
; CHECK: pop
define void @quux(%struct.eggs* %arg) {
bb:
- %tmp1 = getelementptr inbounds %struct.eggs* %arg, i32 0, i32 1
- %0 = load i16* %tmp1, align 2
+ %tmp1 = getelementptr inbounds %struct.eggs, %struct.eggs* %arg, i32 0, i32 1
+ %0 = load i16, i16* %tmp1, align 2
%tobool = icmp eq i16 %0, 0
br i1 %tobool, label %bb16, label %bb3
@@ -24,7 +24,7 @@ bb3: ; preds = %bb
%tmp5 = ptrtoint i16* %tmp1 to i32
%tmp6 = shl i32 %tmp5, 20
%tmp7 = ashr exact i32 %tmp6, 20
- %tmp14 = getelementptr inbounds %struct.barney* undef, i32 %tmp7
+ %tmp14 = getelementptr inbounds %struct.barney, %struct.barney* undef, i32 %tmp7
%tmp15 = tail call i32 @widget(%struct.barney* %tmp14, i8* %tmp4, i32 %tmp7)
br label %bb16
diff --git a/test/CodeGen/BPF/alu8.ll b/test/CodeGen/BPF/alu8.ll
new file mode 100644
index 000000000000..0233225f81b5
--- /dev/null
+++ b/test/CodeGen/BPF/alu8.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=bpf -show-mc-encoding < %s | FileCheck %s
+; test little endian only for now
+
+define i8 @mov(i8 %a, i8 %b) nounwind {
+; CHECK-LABEL: mov:
+; CHECK: mov r0, r2 # encoding: [0xbf,0x20,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK: ret # encoding: [0x95,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+ ret i8 %b
+}
+
+define i8 @add(i8 %a, i8 %b) nounwind {
+; CHECK-LABEL: add:
+; CHECK: add r1, r2 # encoding: [0x0f,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK: mov r0, r1 # encoding: [0xbf,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+ %1 = add i8 %a, %b
+ ret i8 %1
+}
+
+define i8 @and(i8 %a, i8 %b) nounwind {
+; CHECK-LABEL: and:
+; CHECK: and r1, r2 # encoding: [0x5f,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+ %1 = and i8 %a, %b
+ ret i8 %1
+}
+
+define i8 @bis(i8 %a, i8 %b) nounwind {
+; CHECK-LABEL: bis:
+; CHECK: or r1, r2 # encoding: [0x4f,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+ %1 = or i8 %a, %b
+ ret i8 %1
+}
+
+define i8 @xorand(i8 %a, i8 %b) nounwind {
+; CHECK-LABEL: xorand:
+; CHECK: xori r2, -1 # encoding: [0xa7,0x02,0x00,0x00,0xff,0xff,0xff,0xff]
+ %1 = xor i8 %b, -1
+ %2 = and i8 %a, %1
+ ret i8 %2
+}
+
+define i8 @xor(i8 %a, i8 %b) nounwind {
+; CHECK-LABEL: xor:
+; CHECK: xor r1, r2 # encoding: [0xaf,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+ %1 = xor i8 %a, %b
+ ret i8 %1
+}
diff --git a/test/CodeGen/BPF/atomics.ll b/test/CodeGen/BPF/atomics.ll
new file mode 100644
index 000000000000..2f9730ddddef
--- /dev/null
+++ b/test/CodeGen/BPF/atomics.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=bpf -verify-machineinstrs -show-mc-encoding | FileCheck %s
+; test little endian only for now
+
+; CHECK-LABEL: test_load_add_32
+; CHECK: xadd32
+; CHECK: encoding: [0xc3
+define void @test_load_add_32(i32* %p, i32 zeroext %v) {
+entry:
+ atomicrmw add i32* %p, i32 %v seq_cst
+ ret void
+}
+
+; CHECK-LABEL: test_load_add_64
+; CHECK: xadd64
+; CHECK: encoding: [0xdb
+define void @test_load_add_64(i64* %p, i64 zeroext %v) {
+entry:
+ atomicrmw add i64* %p, i64 %v seq_cst
+ ret void
+}
diff --git a/test/CodeGen/BPF/basictest.ll b/test/CodeGen/BPF/basictest.ll
new file mode 100644
index 000000000000..2a2d49878a63
--- /dev/null
+++ b/test/CodeGen/BPF/basictest.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=bpf | FileCheck %s
+
+define i32 @test0(i32 %X) {
+ %tmp.1 = add i32 %X, 1
+ ret i32 %tmp.1
+; CHECK-LABEL: test0:
+; CHECK: addi r1, 1
+}
+
+; CHECK-LABEL: store_imm:
+; CHECK: stw 0(r1), r{{[03]}}
+; CHECK: stw 4(r2), r{{[03]}}
+define i32 @store_imm(i32* %a, i32* %b) {
+entry:
+ store i32 0, i32* %a, align 4
+ %0 = getelementptr inbounds i32, i32* %b, i32 1
+ store i32 0, i32* %0, align 4
+ ret i32 0
+}
+
+@G = external global i8
+define zeroext i8 @loadG() {
+ %tmp = load i8, i8* @G
+ ret i8 %tmp
+; CHECK-LABEL: loadG:
+; CHECK: ld_64 r1
+; CHECK: ldb r0, 0(r1)
+}
diff --git a/test/CodeGen/BPF/byval.ll b/test/CodeGen/BPF/byval.ll
new file mode 100644
index 000000000000..25ba909d9cd7
--- /dev/null
+++ b/test/CodeGen/BPF/byval.ll
@@ -0,0 +1,27 @@
+; RUN: not llc -march=bpf < %s 2> %t1
+; RUN: FileCheck %s < %t1
+; CHECK: by value not supported
+
+%struct.S = type { [10 x i32] }
+
+; Function Attrs: nounwind uwtable
+define void @bar(i32 %a) #0 {
+entry:
+ %.compoundliteral = alloca %struct.S, align 8
+ %arrayinit.begin = getelementptr inbounds %struct.S, %struct.S* %.compoundliteral, i64 0, i32 0, i64 0
+ store i32 1, i32* %arrayinit.begin, align 8
+ %arrayinit.element = getelementptr inbounds %struct.S, %struct.S* %.compoundliteral, i64 0, i32 0, i64 1
+ store i32 2, i32* %arrayinit.element, align 4
+ %arrayinit.element2 = getelementptr inbounds %struct.S, %struct.S* %.compoundliteral, i64 0, i32 0, i64 2
+ store i32 3, i32* %arrayinit.element2, align 8
+ %arrayinit.start = getelementptr inbounds %struct.S, %struct.S* %.compoundliteral, i64 0, i32 0, i64 3
+ %scevgep4 = bitcast i32* %arrayinit.start to i8*
+ call void @llvm.memset.p0i8.i64(i8* %scevgep4, i8 0, i64 28, i32 4, i1 false)
+ call void @foo(i32 %a, %struct.S* byval align 8 %.compoundliteral) #3
+ ret void
+}
+
+declare void @foo(i32, %struct.S* byval align 8) #1
+
+; Function Attrs: nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #3
diff --git a/test/CodeGen/BPF/cc_args.ll b/test/CodeGen/BPF/cc_args.ll
new file mode 100644
index 000000000000..5085fe5684eb
--- /dev/null
+++ b/test/CodeGen/BPF/cc_args.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -march=bpf -show-mc-encoding | FileCheck %s
+; test little endian only for now
+
+define void @test() #0 {
+entry:
+; CHECK: test:
+
+; CHECK: mov r1, 123 # encoding: [0xb7,0x01,0x00,0x00,0x7b,0x00,0x00,0x00]
+; CHECK: call f_i16
+ call void @f_i16(i16 123)
+
+; CHECK: mov r1, 12345678 # encoding: [0xb7,0x01,0x00,0x00,0x4e,0x61,0xbc,0x00]
+; CHECK: call f_i32
+ call void @f_i32(i32 12345678)
+
+; CHECK: ld_64 r1, 72623859790382856 # encoding: [0x18,0x01,0x00,0x00,0x08,0x07,0x06,0x05,0x00,0x00,0x00,0x00,0x04,0x03,0x02,0x01]
+; CHECK: call f_i64
+ call void @f_i64(i64 72623859790382856)
+
+; CHECK: mov r1, 1234
+; CHECK: mov r2, 5678
+; CHECK: call f_i32_i32
+ call void @f_i32_i32(i32 1234, i32 5678)
+
+; CHECK: mov r1, 2
+; CHECK: mov r2, 3
+; CHECK: mov r3, 4
+; CHECK: call f_i16_i32_i16
+ call void @f_i16_i32_i16(i16 2, i32 3, i16 4)
+
+; CHECK: mov r1, 5
+; CHECK: ld_64 r2, 7262385979038285
+; CHECK: mov r3, 6
+; CHECK: call f_i16_i64_i16
+ call void @f_i16_i64_i16(i16 5, i64 7262385979038285, i16 6)
+
+ ret void
+}
+
+@g_i16 = common global i16 0, align 2
+@g_i32 = common global i32 0, align 2
+@g_i64 = common global i64 0, align 4
+
+define void @f_i16(i16 %a) #0 {
+; CHECK: f_i16:
+; CHECK: sth 0(r2), r1 # encoding: [0x6b,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+ store volatile i16 %a, i16* @g_i16, align 2
+ ret void
+}
+
+define void @f_i32(i32 %a) #0 {
+; CHECK: f_i32:
+; CHECK: sth 0(r2), r1 # encoding: [0x6b,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK: sth 2(r2), r1 # encoding: [0x6b,0x12,0x02,0x00,0x00,0x00,0x00,0x00]
+ store volatile i32 %a, i32* @g_i32, align 2
+ ret void
+}
+
+define void @f_i64(i64 %a) #0 {
+; CHECK: f_i64:
+; CHECK: stw 0(r2), r1
+; CHECK: stw 4(r2), r1 # encoding: [0x63,0x12,0x04,0x00,0x00,0x00,0x00,0x00]
+ store volatile i64 %a, i64* @g_i64, align 2
+ ret void
+}
+
+define void @f_i32_i32(i32 %a, i32 %b) #0 {
+; CHECK: f_i32_i32:
+; CHECK: stw 0(r3), r1
+ store volatile i32 %a, i32* @g_i32, align 4
+; CHECK: stw 0(r3), r2
+ store volatile i32 %b, i32* @g_i32, align 4
+ ret void
+}
+
+define void @f_i16_i32_i16(i16 %a, i32 %b, i16 %c) #0 {
+; CHECK: f_i16_i32_i16:
+; CHECK: sth 0(r4), r1
+ store volatile i16 %a, i16* @g_i16, align 2
+; CHECK: stw 0(r1), r2
+ store volatile i32 %b, i32* @g_i32, align 4
+; CHECK: sth 0(r4), r3
+ store volatile i16 %c, i16* @g_i16, align 2
+ ret void
+}
+
+define void @f_i16_i64_i16(i16 %a, i64 %b, i16 %c) #0 {
+; CHECK: f_i16_i64_i16:
+; CHECK: sth 0(r4), r1
+ store volatile i16 %a, i16* @g_i16, align 2
+; CHECK: std 0(r1), r2 # encoding: [0x7b,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+ store volatile i64 %b, i64* @g_i64, align 8
+; CHECK: sth 0(r4), r3
+ store volatile i16 %c, i16* @g_i16, align 2
+ ret void
+}
diff --git a/test/CodeGen/BPF/cc_ret.ll b/test/CodeGen/BPF/cc_ret.ll
new file mode 100644
index 000000000000..e32b17bcc61c
--- /dev/null
+++ b/test/CodeGen/BPF/cc_ret.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=bpf | FileCheck %s
+
+define void @test() #0 {
+entry:
+; CHECK: test:
+
+; CHECK: call f_i16
+; CHECK: sth 0(r1), r0
+ %0 = call i16 @f_i16()
+ store volatile i16 %0, i16* @g_i16
+
+; CHECK: call f_i32
+; CHECK: stw 0(r1), r0
+ %1 = call i32 @f_i32()
+ store volatile i32 %1, i32* @g_i32
+
+; CHECK: call f_i64
+; CHECK: std 0(r1), r0
+ %2 = call i64 @f_i64()
+ store volatile i64 %2, i64* @g_i64
+
+ ret void
+}
+
+@g_i16 = common global i16 0, align 2
+@g_i32 = common global i32 0, align 2
+@g_i64 = common global i64 0, align 2
+
+define i16 @f_i16() #0 {
+; CHECK: f_i16:
+; CHECK: mov r0, 1
+; CHECK: ret
+ ret i16 1
+}
+
+define i32 @f_i32() #0 {
+; CHECK: f_i32:
+; CHECK: mov r0, 16909060
+; CHECK: ret
+ ret i32 16909060
+}
+
+define i64 @f_i64() #0 {
+; CHECK: f_i64:
+; CHECK: ld_64 r0, 72623859790382856
+; CHECK: ret
+ ret i64 72623859790382856
+}
diff --git a/test/CodeGen/BPF/cmp.ll b/test/CodeGen/BPF/cmp.ll
new file mode 100644
index 000000000000..b353f90ab566
--- /dev/null
+++ b/test/CodeGen/BPF/cmp.ll
@@ -0,0 +1,119 @@
+; RUN: llc < %s -march=bpf | FileCheck %s
+
+; Function Attrs: nounwind readnone uwtable
+define signext i8 @foo_cmp1(i8 signext %a, i8 signext %b) #0 {
+ %1 = icmp sgt i8 %a, %b
+ br i1 %1, label %2, label %4
+
+; <label>:2 ; preds = %0
+ %3 = mul i8 %b, %a
+ br label %6
+
+; <label>:4 ; preds = %0
+ %5 = shl i8 %b, 3
+ br label %6
+
+; <label>:6 ; preds = %4, %2
+ %.0 = phi i8 [ %3, %2 ], [ %5, %4 ]
+ ret i8 %.0
+; CHECK-LABEL:foo_cmp1:
+; CHECK: jsge r2, r1
+}
+
+; Function Attrs: nounwind readnone uwtable
+define signext i8 @foo_cmp2(i8 signext %a, i8 signext %b) #0 {
+ %1 = icmp slt i8 %a, %b
+ br i1 %1, label %4, label %2
+
+; <label>:2 ; preds = %0
+ %3 = mul i8 %b, %a
+ br label %6
+
+; <label>:4 ; preds = %0
+ %5 = shl i8 %b, 3
+ br label %6
+
+; <label>:6 ; preds = %4, %2
+ %.0 = phi i8 [ %3, %2 ], [ %5, %4 ]
+ ret i8 %.0
+; CHECK-LABEL:foo_cmp2:
+; CHECK: jsgt r2, r1
+}
+
+; Function Attrs: nounwind readnone uwtable
+define signext i8 @foo_cmp3(i8 signext %a, i8 signext %b) #0 {
+ %1 = icmp slt i8 %a, %b
+ br i1 %1, label %2, label %4
+
+; <label>:2 ; preds = %0
+ %3 = mul i8 %b, %a
+ br label %6
+
+; <label>:4 ; preds = %0
+ %5 = shl i8 %b, 3
+ br label %6
+
+; <label>:6 ; preds = %4, %2
+ %.0 = phi i8 [ %3, %2 ], [ %5, %4 ]
+ ret i8 %.0
+; CHECK-LABEL:foo_cmp3:
+; CHECK: jsge r1, r2
+}
+
+; Function Attrs: nounwind readnone uwtable
+define signext i8 @foo_cmp4(i8 signext %a, i8 signext %b) #0 {
+ %1 = icmp sgt i8 %a, %b
+ br i1 %1, label %4, label %2
+
+; <label>:2 ; preds = %0
+ %3 = mul i8 %b, %a
+ br label %6
+
+; <label>:4 ; preds = %0
+ %5 = shl i8 %b, 3
+ br label %6
+
+; <label>:6 ; preds = %4, %2
+ %.0 = phi i8 [ %3, %2 ], [ %5, %4 ]
+ ret i8 %.0
+; CHECK-LABEL:foo_cmp4:
+; CHECK: jsgt r1, r2
+}
+
+; Function Attrs: nounwind readnone uwtable
+define signext i8 @min(i8 signext %a, i8 signext %b) #0 {
+ %1 = icmp slt i8 %a, %b
+ %a.b = select i1 %1, i8 %a, i8 %b
+ ret i8 %a.b
+; CHECK-LABEL:min:
+; CHECK: jsgt r2, r1
+; CHECK: mov r1, r2
+; CHECK: mov r0, r1
+}
+
+; Function Attrs: nounwind readnone uwtable
+define zeroext i8 @minu(i8 zeroext %a, i8 zeroext %b) #0 {
+ %1 = icmp ult i8 %a, 100
+ %a.b = select i1 %1, i8 %a, i8 %b
+ ret i8 %a.b
+; CHECK-LABEL:minu:
+; CHECK: jgt r3, r1
+}
+
+; Function Attrs: nounwind readnone uwtable
+define signext i8 @max(i8 signext %a, i8 signext %b) #0 {
+ %1 = icmp sgt i8 %a, %b
+ %a.b = select i1 %1, i8 %a, i8 %b
+ ret i8 %a.b
+; CHECK-LABEL:max:
+; CHECK: jsgt r1, r2
+}
+
+; Function Attrs: nounwind readnone uwtable
+define signext i8 @meq(i8 signext %a, i8 signext %b, i8 signext %c) #0 {
+ %1 = icmp eq i8 %a, %b
+ %c.a = select i1 %1, i8 %c, i8 %a
+ ret i8 %c.a
+; CHECK-LABEL:meq:
+; CHECK: jeq r1, r2
+}
diff --git a/test/CodeGen/BPF/ex1.ll b/test/CodeGen/BPF/ex1.ll
new file mode 100644
index 000000000000..be038e9a3d8c
--- /dev/null
+++ b/test/CodeGen/BPF/ex1.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=bpf | FileCheck %s
+
+%struct.bpf_context = type { i64, i64, i64, i64, i64, i64, i64 }
+%struct.sk_buff = type { i64, i64, i64, i64, i64, i64, i64 }
+%struct.net_device = type { i64, i64, i64, i64, i64, i64, i64 }
+
+@bpf_prog1.devname = private unnamed_addr constant [3 x i8] c"lo\00", align 1
+@bpf_prog1.fmt = private unnamed_addr constant [15 x i8] c"skb %x dev %x\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i32 @bpf_prog1(%struct.bpf_context* nocapture %ctx) #0 section "events/net/netif_receive_skb" {
+ %devname = alloca [3 x i8], align 1
+ %fmt = alloca [15 x i8], align 1
+ %1 = getelementptr inbounds [3 x i8], [3 x i8]* %devname, i64 0, i64 0
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @bpf_prog1.devname, i64 0, i64 0), i64 3, i32 1, i1 false)
+ %2 = getelementptr inbounds %struct.bpf_context, %struct.bpf_context* %ctx, i64 0, i32 0
+ %3 = load i64, i64* %2, align 8
+ %4 = inttoptr i64 %3 to %struct.sk_buff*
+ %5 = getelementptr inbounds %struct.sk_buff, %struct.sk_buff* %4, i64 0, i32 2
+ %6 = bitcast i64* %5 to i8*
+ %7 = call i8* inttoptr (i64 4 to i8* (i8*)*)(i8* %6) #1
+ %8 = call i32 inttoptr (i64 9 to i32 (i8*, i8*, i32)*)(i8* %7, i8* %1, i32 2) #1
+ %9 = icmp eq i32 %8, 0
+ br i1 %9, label %10, label %13
+
+; <label>:10 ; preds = %0
+ %11 = getelementptr inbounds [15 x i8], [15 x i8]* %fmt, i64 0, i64 0
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %11, i8* getelementptr inbounds ([15 x i8], [15 x i8]* @bpf_prog1.fmt, i64 0, i64 0), i64 15, i32 1, i1 false)
+ %12 = call i32 (i8*, i32, ...) inttoptr (i64 11 to i32 (i8*, i32, ...)*)(i8* %11, i32 15, %struct.sk_buff* %4, i8* %7) #1
+; CHECK-LABEL: bpf_prog1:
+; CHECK: call 4
+; CHECK: call 9
+; CHECK: jnei r0, 0
+; CHECK: mov r1, 622884453
+; CHECK: ld_64 r1, 7214898703899978611
+; CHECK: call 11
+; CHECK: mov r0, 0
+; CHECK: ret
+ br label %13
+
+; <label>:13 ; preds = %10, %0
+ ret i32 0
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #1
diff --git a/test/CodeGen/BPF/intrinsics.ll b/test/CodeGen/BPF/intrinsics.ll
new file mode 100644
index 000000000000..98b57deb7c8d
--- /dev/null
+++ b/test/CodeGen/BPF/intrinsics.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=bpf -show-mc-encoding | FileCheck %s
+
+; Function Attrs: nounwind uwtable
+define i32 @ld_b(i64 %foo, i64* nocapture %bar, i8* %ctx, i8* %ctx2) #0 {
+ %1 = tail call i64 @llvm.bpf.load.byte(i8* %ctx, i64 123) #2
+ %2 = add i64 %1, %foo
+ %3 = load volatile i64, i64* %bar, align 8
+ %4 = add i64 %2, %3
+ %5 = tail call i64 @llvm.bpf.load.byte(i8* %ctx2, i64 %foo) #2
+ %6 = add i64 %4, %5
+ %7 = load volatile i64, i64* %bar, align 8
+ %8 = add i64 %6, %7
+ %9 = trunc i64 %8 to i32
+ ret i32 %9
+; CHECK-LABEL: ld_b:
+; CHECK: ldabs_b r0, r6.data + 123
+; CHECK: ldind_b r0, r6.data
+}
+
+declare i64 @llvm.bpf.load.byte(i8*, i64) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @ld_h(i8* %ctx, i8* %ctx2, i32 %foo) #0 {
+ %1 = tail call i64 @llvm.bpf.load.half(i8* %ctx, i64 123) #2
+ %2 = sext i32 %foo to i64
+ %3 = tail call i64 @llvm.bpf.load.half(i8* %ctx2, i64 %2) #2
+ %4 = add i64 %3, %1
+ %5 = trunc i64 %4 to i32
+ ret i32 %5
+; CHECK-LABEL: ld_h:
+; CHECK: ldind_h r0, r6.data
+; CHECK: ldabs_h r0, r6.data + 123
+}
+
+declare i64 @llvm.bpf.load.half(i8*, i64) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @ld_w(i8* %ctx, i8* %ctx2, i32 %foo) #0 {
+ %1 = tail call i64 @llvm.bpf.load.word(i8* %ctx, i64 123) #2
+ %2 = sext i32 %foo to i64
+ %3 = tail call i64 @llvm.bpf.load.word(i8* %ctx2, i64 %2) #2
+ %4 = add i64 %3, %1
+ %5 = trunc i64 %4 to i32
+ ret i32 %5
+; CHECK-LABEL: ld_w:
+; CHECK: ldind_w r0, r6.data
+; CHECK: ldabs_w r0, r6.data + 123
+}
+
+declare i64 @llvm.bpf.load.word(i8*, i64) #1
+
+define i32 @ld_pseudo() #0 {
+entry:
+ %call = tail call i64 @llvm.bpf.pseudo(i64 2, i64 3)
+ tail call void @bar(i64 %call, i32 4) #2
+ ret i32 0
+; CHECK-LABEL: ld_pseudo:
+; CHECK: ld_pseudo r1, 2, 3 # encoding: [0x18,0x21,0x00,0x00,0x03,0x00
+}
+
+declare void @bar(i64, i32) #1
+
+declare i64 @llvm.bpf.pseudo(i64, i64) #2
+
+define i32 @bswap(i64 %a, i64 %b, i64 %c) #0 {
+entry:
+ %0 = tail call i64 @llvm.bswap.i64(i64 %a)
+ %conv = trunc i64 %b to i32
+ %1 = tail call i32 @llvm.bswap.i32(i32 %conv)
+ %conv1 = zext i32 %1 to i64
+ %add = add i64 %conv1, %0
+ %conv2 = trunc i64 %c to i16
+ %2 = tail call i16 @llvm.bswap.i16(i16 %conv2)
+ %conv3 = zext i16 %2 to i64
+ %add4 = add i64 %add, %conv3
+ %conv5 = trunc i64 %add4 to i32
+ ret i32 %conv5
+; CHECK-LABEL: bswap:
+; CHECK: bswap64 r1 # encoding: [0xdc,0x01,0x00,0x00,0x40,0x00,0x00,0x00]
+; CHECK: bswap32 r2 # encoding: [0xdc,0x02,0x00,0x00,0x20,0x00,0x00,0x00]
+; CHECK: add r2, r1 # encoding: [0x0f,0x12,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK: bswap16 r3 # encoding: [0xdc,0x03,0x00,0x00,0x10,0x00,0x00,0x00]
+; CHECK: add r2, r3 # encoding: [0x0f,0x32,0x00,0x00,0x00,0x00,0x00,0x00]
+}
+
+declare i64 @llvm.bswap.i64(i64) #1
+declare i32 @llvm.bswap.i32(i32) #1
+declare i16 @llvm.bswap.i16(i16) #1
diff --git a/test/CodeGen/BPF/lit.local.cfg b/test/CodeGen/BPF/lit.local.cfg
new file mode 100644
index 000000000000..a4ab2624af61
--- /dev/null
+++ b/test/CodeGen/BPF/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'BPF' in config.root.targets:
+ config.unsupported = True
diff --git a/test/CodeGen/BPF/load.ll b/test/CodeGen/BPF/load.ll
new file mode 100644
index 000000000000..03fb17c965b5
--- /dev/null
+++ b/test/CodeGen/BPF/load.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=bpf | FileCheck %s
+
+define i16 @am1(i16* %a) nounwind {
+ %1 = load i16, i16* %a
+ ret i16 %1
+}
+; CHECK-LABEL: am1:
+; CHECK: ldh r0, 0(r1)
+
+@foo = external global i16
+
+define i16 @am2() nounwind {
+ %1 = load i16, i16* @foo
+ ret i16 %1
+}
+; CHECK-LABEL: am2:
+; CHECK: ldh r0, 0(r1)
+
+define i16 @am4() nounwind {
+ %1 = load volatile i16, i16* inttoptr(i16 32 to i16*)
+ ret i16 %1
+}
+; CHECK-LABEL: am4:
+; CHECK: mov r1, 32
+; CHECK: ldh r0, 0(r1)
+
+define i16 @am5(i16* %a) nounwind {
+ %1 = getelementptr i16, i16* %a, i16 2
+ %2 = load i16, i16* %1
+ ret i16 %2
+}
+; CHECK-LABEL: am5:
+; CHECK: ldh r0, 4(r1)
+
+%S = type { i16, i16 }
+@baz = common global %S zeroinitializer, align 1
+
+define i16 @am6() nounwind {
+ %1 = load i16, i16* getelementptr (%S, %S* @baz, i32 0, i32 1)
+ ret i16 %1
+}
+; CHECK-LABEL: am6:
+; CHECK: ldh r0, 2(r1)
diff --git a/test/CodeGen/BPF/loops.ll b/test/CodeGen/BPF/loops.ll
new file mode 100644
index 000000000000..4798d78842ca
--- /dev/null
+++ b/test/CodeGen/BPF/loops.ll
@@ -0,0 +1,111 @@
+; RUN: llc < %s -march=bpf | FileCheck %s
+
+define zeroext i16 @add(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+ %cmp8 = icmp eq i16 %n, 0 ; <i1> [#uses=1]
+ br i1 %cmp8, label %for.end, label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+ %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+ %arrayidx = getelementptr i16, i16* %a, i16 %i.010 ; <i16*> [#uses=1]
+; CHECK-LABEL: add:
+; CHECK: add r{{[0-9]+}}, r{{[0-9]+}}
+ %tmp4 = load i16, i16* %arrayidx ; <i16> [#uses=1]
+ %add = add i16 %tmp4, %sum.09 ; <i16> [#uses=2]
+ %inc = add i16 %i.010, 1 ; <i16> [#uses=2]
+ %exitcond = icmp eq i16 %inc, %n ; <i1> [#uses=1]
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+ ret i16 %sum.0.lcssa
+}
+
+define zeroext i16 @sub(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+ %cmp8 = icmp eq i16 %n, 0 ; <i1> [#uses=1]
+ br i1 %cmp8, label %for.end, label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+ %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+ %arrayidx = getelementptr i16, i16* %a, i16 %i.010 ; <i16*> [#uses=1]
+; CHECK-LABEL: sub:
+; CHECK: sub r{{[0-9]+}}, r{{[0-9]+}}
+ %tmp4 = load i16, i16* %arrayidx ; <i16> [#uses=1]
+ %add = sub i16 %tmp4, %sum.09 ; <i16> [#uses=2]
+ %inc = add i16 %i.010, 1 ; <i16> [#uses=2]
+ %exitcond = icmp eq i16 %inc, %n ; <i1> [#uses=1]
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+ ret i16 %sum.0.lcssa
+}
+
+define zeroext i16 @or(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+ %cmp8 = icmp eq i16 %n, 0 ; <i1> [#uses=1]
+ br i1 %cmp8, label %for.end, label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+ %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+ %arrayidx = getelementptr i16, i16* %a, i16 %i.010 ; <i16*> [#uses=1]
+; CHECK-LABEL: or:
+; CHECK: or r{{[0-9]+}}, r{{[0-9]+}}
+ %tmp4 = load i16, i16* %arrayidx ; <i16> [#uses=1]
+ %add = or i16 %tmp4, %sum.09 ; <i16> [#uses=2]
+ %inc = add i16 %i.010, 1 ; <i16> [#uses=2]
+ %exitcond = icmp eq i16 %inc, %n ; <i1> [#uses=1]
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+ ret i16 %sum.0.lcssa
+}
+
+define zeroext i16 @xor(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+ %cmp8 = icmp eq i16 %n, 0 ; <i1> [#uses=1]
+ br i1 %cmp8, label %for.end, label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+ %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+ %arrayidx = getelementptr i16, i16* %a, i16 %i.010 ; <i16*> [#uses=1]
+; CHECK-LABEL: xor:
+; CHECK: xor r{{[0-9]+}}, r{{[0-9]+}}
+ %tmp4 = load i16, i16* %arrayidx ; <i16> [#uses=1]
+ %add = xor i16 %tmp4, %sum.09 ; <i16> [#uses=2]
+ %inc = add i16 %i.010, 1 ; <i16> [#uses=2]
+ %exitcond = icmp eq i16 %inc, %n ; <i1> [#uses=1]
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+ ret i16 %sum.0.lcssa
+}
+
+define zeroext i16 @and(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+ %cmp8 = icmp eq i16 %n, 0 ; <i1> [#uses=1]
+ br i1 %cmp8, label %for.end, label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+ %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+ %arrayidx = getelementptr i16, i16* %a, i16 %i.010 ; <i16*> [#uses=1]
+; CHECK-LABEL: and:
+; CHECK: and r{{[0-9]+}}, r{{[0-9]+}}
+ %tmp4 = load i16, i16* %arrayidx ; <i16> [#uses=1]
+ %add = and i16 %tmp4, %sum.09 ; <i16> [#uses=2]
+ %inc = add i16 %i.010, 1 ; <i16> [#uses=2]
+ %exitcond = icmp eq i16 %inc, %n ; <i1> [#uses=1]
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+ ret i16 %sum.0.lcssa
+}
diff --git a/test/CodeGen/BPF/many_args1.ll b/test/CodeGen/BPF/many_args1.ll
new file mode 100644
index 000000000000..08218f452d06
--- /dev/null
+++ b/test/CodeGen/BPF/many_args1.ll
@@ -0,0 +1,12 @@
+; RUN: not llc -march=bpf < %s 2> %t1
+; RUN: FileCheck %s < %t1
+; CHECK: too many args
+
+; Function Attrs: nounwind uwtable
+define i32 @foo(i32 %a, i32 %b, i32 %c) #0 {
+entry:
+ %call = tail call i32 @bar(i32 %a, i32 %b, i32 %c, i32 1, i32 2, i32 3) #3
+ ret i32 %call
+}
+
+declare i32 @bar(i32, i32, i32, i32, i32, i32) #1
diff --git a/test/CodeGen/BPF/many_args2.ll b/test/CodeGen/BPF/many_args2.ll
new file mode 100644
index 000000000000..a69886c2b208
--- /dev/null
+++ b/test/CodeGen/BPF/many_args2.ll
@@ -0,0 +1,15 @@
+; RUN: not llc -march=bpf < %s 2> %t1
+; RUN: FileCheck %s < %t1
+; CHECK: too many args
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) #0 {
+entry:
+ ret i32 1
+}
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @foo(i32 %a, i32 %b, i32 %c) #0 {
+entry:
+ ret i32 1
+}
diff --git a/test/CodeGen/BPF/sanity.ll b/test/CodeGen/BPF/sanity.ll
new file mode 100644
index 000000000000..09a6b65d0854
--- /dev/null
+++ b/test/CodeGen/BPF/sanity.ll
@@ -0,0 +1,117 @@
+; RUN: llc < %s -march=bpf | FileCheck %s
+
+@foo_printf.fmt = private unnamed_addr constant [9 x i8] c"hello \0A\00", align 1
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @foo_int(i32 %a, i32 %b) #0 {
+ %1 = add nsw i32 %b, %a
+ ret i32 %1
+; CHECK-LABEL: foo_int:
+; CHECK: add r2, r1
+}
+
+; Function Attrs: nounwind readnone uwtable
+define signext i8 @foo_char(i8 signext %a, i8 signext %b) #0 {
+ %1 = add i8 %b, %a
+ ret i8 %1
+; CHECK-LABEL: foo_char:
+; CHECK: add r2, r1
+; CHECK: slli r2, 56
+; CHECK: srai r2, 56
+}
+
+; Function Attrs: nounwind readnone uwtable
+define i64 @foo_ll(i64 %a, i64 %b, i64 %c) #0 {
+ %1 = add nsw i64 %b, %a
+ %2 = sub i64 %1, %c
+ ret i64 %2
+; CHECK-LABEL: foo_ll:
+; CHECK: add r2, r1
+; CHECK: sub r2, r3
+; CHECK: mov r0, r2
+}
+
+; Function Attrs: nounwind uwtable
+define void @foo_call2(i32 %a, i32 %b) #1 {
+ %1 = trunc i32 %b to i8
+ tail call void @foo_2arg(i8 signext %1, i32 %a) #3
+ ret void
+; CHECK-LABEL: foo_call2:
+; CHECK: slli r2, 56
+; CHECK: srai r2, 56
+; CHECK: mov r1, r2
+}
+
+declare void @foo_2arg(i8 signext, i32) #2
+
+; Function Attrs: nounwind uwtable
+define i32 @foo_call5(i8 signext %a, i16 signext %b, i32 %c, i64 %d) #1 {
+ %1 = tail call i32 @bar(i8 signext %a, i16 signext %b, i32 %c, i64 %d) #3
+ ret i32 0
+; CHECK-LABEL: foo_call5:
+; CHECK: call bar
+}
+
+declare i32 @bar(i8 signext, i16 signext, i32, i64) #2
+
+; Function Attrs: nounwind readnone uwtable
+define signext i8 @foo_cmp(i8 signext %a, i8 signext %b) #0 {
+ %1 = icmp slt i8 %a, %b
+ %a.b = select i1 %1, i8 %a, i8 %b
+ ret i8 %a.b
+; CHECK-LABEL: foo_cmp:
+; CHECK: jsgt r2, r1
+}
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @foo_muldiv(i8 signext %a, i16 signext %b, i32 %c, i64 %d) #0 {
+ %1 = icmp eq i8 %a, 0
+ br i1 %1, label %5, label %2
+
+; <label>:2 ; preds = %0
+ %3 = sext i16 %b to i32
+ %4 = mul nsw i32 %3, %c
+ br label %8
+
+; <label>:5 ; preds = %0
+ %6 = trunc i64 %d to i32
+ %7 = udiv i32 %6, %c
+ br label %8
+
+; <label>:8 ; preds = %5, %2
+ %.0 = phi i32 [ %4, %2 ], [ %7, %5 ]
+ ret i32 %.0
+; CHECK-LABEL: foo_muldiv:
+; CHECK: mul r2, r3
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @foo_optimized() #1 {
+ %1 = tail call i32 @manyarg(i32 1, i32 2, i32 3, i32 4, i32 5) #3
+ ret i32 %1
+; CHECK-LABEL: foo_optimized:
+; CHECK: mov r1, 1
+; CHECK: mov r2, 2
+; CHECK: mov r3, 3
+; CHECK: mov r4, 4
+; CHECK: mov r5, 5
+}
+
+declare i32 @manyarg(i32, i32, i32, i32, i32) #2
+
+; Function Attrs: nounwind uwtable
+define void @foo_printf() #1 {
+ %fmt = alloca [9 x i8], align 1
+ %1 = getelementptr inbounds [9 x i8], [9 x i8]* %fmt, i64 0, i64 0
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* getelementptr inbounds ([9 x i8], [9 x i8]* @foo_printf.fmt, i64 0, i64 0), i64 9, i32 1, i1 false)
+; CHECK-LABEL: foo_printf:
+; CHECK: ld_64 r1, 729618802566522216
+ %2 = call i32 (i8*, ...) @printf(i8* %1) #3
+ ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #3
+
+; Function Attrs: nounwind
+declare i32 @printf(i8* nocapture, ...) #4
diff --git a/test/CodeGen/BPF/setcc.ll b/test/CodeGen/BPF/setcc.ll
new file mode 100644
index 000000000000..eabb6c9bf2d6
--- /dev/null
+++ b/test/CodeGen/BPF/setcc.ll
@@ -0,0 +1,99 @@
+; RUN: llc -march=bpf < %s | FileCheck %s
+
+define i16 @sccweqand(i16 %a, i16 %b) nounwind {
+ %t1 = and i16 %a, %b
+ %t2 = icmp eq i16 %t1, 0
+ %t3 = zext i1 %t2 to i16
+ ret i16 %t3
+}
+; CHECK-LABEL: sccweqand:
+; CHECK: jeq r1, r2
+
+define i16 @sccwneand(i16 %a, i16 %b) nounwind {
+ %t1 = and i16 %a, %b
+ %t2 = icmp ne i16 %t1, 0
+ %t3 = zext i1 %t2 to i16
+ ret i16 %t3
+}
+; CHECK-LABEL: sccwneand:
+; CHECK: jne r1, r2
+
+define i16 @sccwne(i16 %a, i16 %b) nounwind {
+ %t1 = icmp ne i16 %a, %b
+ %t2 = zext i1 %t1 to i16
+ ret i16 %t2
+}
+; CHECK-LABEL:sccwne:
+; CHECK: jne r1, r2
+
+define i16 @sccweq(i16 %a, i16 %b) nounwind {
+ %t1 = icmp eq i16 %a, %b
+ %t2 = zext i1 %t1 to i16
+ ret i16 %t2
+}
+; CHECK-LABEL:sccweq:
+; CHECK: jeq r1, r2
+
+define i16 @sccwugt(i16 %a, i16 %b) nounwind {
+ %t1 = icmp ugt i16 %a, %b
+ %t2 = zext i1 %t1 to i16
+ ret i16 %t2
+}
+; CHECK-LABEL:sccwugt:
+; CHECK: jgt r1, r2
+
+define i16 @sccwuge(i16 %a, i16 %b) nounwind {
+ %t1 = icmp uge i16 %a, %b
+ %t2 = zext i1 %t1 to i16
+ ret i16 %t2
+}
+; CHECK-LABEL:sccwuge:
+; CHECK: jge r1, r2
+
+define i16 @sccwult(i16 %a, i16 %b) nounwind {
+ %t1 = icmp ult i16 %a, %b
+ %t2 = zext i1 %t1 to i16
+ ret i16 %t2
+}
+; CHECK-LABEL:sccwult:
+; CHECK: jgt r2, r1
+
+define i16 @sccwule(i16 %a, i16 %b) nounwind {
+ %t1 = icmp ule i16 %a, %b
+ %t2 = zext i1 %t1 to i16
+ ret i16 %t2
+}
+; CHECK-LABEL:sccwule:
+; CHECK: jge r2, r1
+
+define i16 @sccwsgt(i16 %a, i16 %b) nounwind {
+ %t1 = icmp sgt i16 %a, %b
+ %t2 = zext i1 %t1 to i16
+ ret i16 %t2
+}
+; CHECK-LABEL:sccwsgt:
+; CHECK: jsgt r1, r2
+
+define i16 @sccwsge(i16 %a, i16 %b) nounwind {
+ %t1 = icmp sge i16 %a, %b
+ %t2 = zext i1 %t1 to i16
+ ret i16 %t2
+}
+; CHECK-LABEL:sccwsge:
+; CHECK: jsge r1, r2
+
+define i16 @sccwslt(i16 %a, i16 %b) nounwind {
+ %t1 = icmp slt i16 %a, %b
+ %t2 = zext i1 %t1 to i16
+ ret i16 %t2
+}
+; CHECK-LABEL:sccwslt:
+; CHECK: jsgt r2, r1
+
+define i16 @sccwsle(i16 %a, i16 %b) nounwind {
+ %t1 = icmp sle i16 %a, %b
+ %t2 = zext i1 %t1 to i16
+ ret i16 %t2
+}
+; CHECK-LABEL:sccwsle:
+; CHECK: jsge r2, r1
diff --git a/test/CodeGen/BPF/shifts.ll b/test/CodeGen/BPF/shifts.ll
new file mode 100644
index 000000000000..898ae2d46123
--- /dev/null
+++ b/test/CodeGen/BPF/shifts.ll
@@ -0,0 +1,101 @@
+; RUN: llc < %s -march=bpf -show-mc-encoding | FileCheck %s
+; test little endian only for now
+
+define zeroext i8 @lshr8(i8 zeroext %a, i8 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK-LABEL: lshr8:
+; CHECK: srl r1, r2 # encoding: [0x7f,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+ %shr = lshr i8 %a, %cnt
+ ret i8 %shr
+}
+
+define signext i8 @ashr8(i8 signext %a, i8 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK-LABEL: ashr8:
+; CHECK: sra r1, r2 # encoding: [0xcf,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+ %shr = ashr i8 %a, %cnt
+ ret i8 %shr
+}
+
+define zeroext i8 @shl8(i8 zeroext %a, i8 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK: shl8
+; CHECK: sll r1, r2 # encoding: [0x6f,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+ %shl = shl i8 %a, %cnt
+ ret i8 %shl
+}
+
+define zeroext i16 @lshr16(i16 zeroext %a, i16 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK-LABEL: lshr16:
+; CHECK: srl r1, r2 # encoding: [0x7f,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+ %shr = lshr i16 %a, %cnt
+ ret i16 %shr
+}
+
+define signext i16 @ashr16(i16 signext %a, i16 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK-LABEL: ashr16:
+; CHECK: sra r1, r2 # encoding: [0xcf,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+ %shr = ashr i16 %a, %cnt
+ ret i16 %shr
+}
+
+define zeroext i16 @shl16(i16 zeroext %a, i16 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK-LABEL: shl16:
+; CHECK: sll r1, r2 # encoding: [0x6f,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+ %shl = shl i16 %a, %cnt
+ ret i16 %shl
+}
+
+define zeroext i32 @lshr32(i32 zeroext %a, i32 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK-LABEL: lshr32:
+; CHECK: srl r1, r2 # encoding: [0x7f,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK: slli r1, 32 # encoding: [0x67,0x01,0x00,0x00,0x20,0x00,0x00,0x00]
+ %shr = lshr i32 %a, %cnt
+ ret i32 %shr
+}
+
+define signext i32 @ashr32(i32 signext %a, i32 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK-LABEL: ashr32:
+; CHECK: sra r1, r2 # encoding: [0xcf,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+ %shr = ashr i32 %a, %cnt
+ ret i32 %shr
+}
+
+define zeroext i32 @shl32(i32 zeroext %a, i32 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK-LABEL: shl32:
+; CHECK: sll r1, r2 # encoding: [0x6f,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+ %shl = shl i32 %a, %cnt
+ ret i32 %shl
+}
+
+define zeroext i64 @lshr64(i64 zeroext %a, i64 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK-LABEL: lshr64:
+; CHECK: srl r1, r2 # encoding: [0x7f,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+ %shr = lshr i64 %a, %cnt
+ ret i64 %shr
+}
+
+define signext i64 @ashr64(i64 signext %a, i64 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK-LABEL: ashr64:
+; CHECK: sra r1, r2 # encoding: [0xcf,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+ %shr = ashr i64 %a, %cnt
+ ret i64 %shr
+}
+
+define zeroext i64 @shl64(i64 zeroext %a, i64 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK-LABEL: shl64:
+; CHECK: sll r1, r2 # encoding: [0x6f,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK: mov r0, r1 # encoding: [0xbf,0x10,0x00,0x00,0x00,0x00,0x00,0x00]
+; CHECK: ret # encoding: [0x95,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+ %shl = shl i64 %a, %cnt
+ ret i64 %shl
+}
diff --git a/test/CodeGen/BPF/sockex2.ll b/test/CodeGen/BPF/sockex2.ll
new file mode 100644
index 000000000000..6ae5e1c8d6bf
--- /dev/null
+++ b/test/CodeGen/BPF/sockex2.ll
@@ -0,0 +1,326 @@
+; RUN: llc < %s -march=bpf -show-mc-encoding | FileCheck %s
+; test little endian only for now
+
+%struct.bpf_map_def = type { i32, i32, i32, i32 }
+%struct.sk_buff = type opaque
+
+@hash_map = global %struct.bpf_map_def { i32 1, i32 4, i32 8, i32 1024 }, section "maps", align 4
+
+; Function Attrs: nounwind uwtable
+define i32 @bpf_prog2(%struct.sk_buff* %skb) #0 section "socket2" {
+ %key = alloca i32, align 4
+ %val = alloca i64, align 8
+ %1 = bitcast %struct.sk_buff* %skb to i8*
+ %2 = call i64 @llvm.bpf.load.half(i8* %1, i64 12) #2
+ %3 = icmp eq i64 %2, 34984
+ br i1 %3, label %4, label %6
+
+; <label>:4 ; preds = %0
+ %5 = call i64 @llvm.bpf.load.half(i8* %1, i64 16) #2
+ br label %6
+
+; <label>:6 ; preds = %4, %0
+ %proto.0.i = phi i64 [ %5, %4 ], [ %2, %0 ]
+ %nhoff.0.i = phi i64 [ 18, %4 ], [ 14, %0 ]
+ %7 = icmp eq i64 %proto.0.i, 33024
+ br i1 %7, label %8, label %12
+
+; <label>:8 ; preds = %6
+ %9 = add i64 %nhoff.0.i, 2
+ %10 = call i64 @llvm.bpf.load.half(i8* %1, i64 %9) #2
+ %11 = add i64 %nhoff.0.i, 4
+ br label %12
+
+; <label>:12 ; preds = %8, %6
+ %proto.1.i = phi i64 [ %10, %8 ], [ %proto.0.i, %6 ]
+ %nhoff.1.i = phi i64 [ %11, %8 ], [ %nhoff.0.i, %6 ]
+ switch i64 %proto.1.i, label %flow_dissector.exit.thread [
+ i64 2048, label %13
+ i64 34525, label %39
+ ]
+
+; <label>:13 ; preds = %12
+ %14 = add i64 %nhoff.1.i, 6
+ %15 = call i64 @llvm.bpf.load.half(i8* %1, i64 %14) #2
+ %16 = and i64 %15, 16383
+ %17 = icmp eq i64 %16, 0
+ br i1 %17, label %18, label %.thread.i.i
+
+; <label>:18 ; preds = %13
+ %19 = add i64 %nhoff.1.i, 9
+ %20 = call i64 @llvm.bpf.load.byte(i8* %1, i64 %19) #2
+ %21 = icmp eq i64 %20, 47
+ br i1 %21, label %28, label %.thread.i.i
+
+.thread.i.i: ; preds = %18, %13
+ %22 = phi i64 [ %20, %18 ], [ 0, %13 ]
+ %23 = add i64 %nhoff.1.i, 12
+ %24 = call i64 @llvm.bpf.load.word(i8* %1, i64 %23) #2
+ %25 = add i64 %nhoff.1.i, 16
+ %26 = call i64 @llvm.bpf.load.word(i8* %1, i64 %25) #2
+ %27 = trunc i64 %26 to i32
+ br label %28
+
+; <label>:28 ; preds = %.thread.i.i, %18
+ %29 = phi i32 [ %27, %.thread.i.i ], [ undef, %18 ]
+ %30 = phi i64 [ %22, %.thread.i.i ], [ 47, %18 ]
+ %31 = call i64 @llvm.bpf.load.byte(i8* %1, i64 %nhoff.1.i) #2
+ %32 = icmp eq i64 %31, 69
+ br i1 %32, label %33, label %35
+
+; <label>:33 ; preds = %28
+ %34 = add i64 %nhoff.1.i, 20
+ br label %parse_ip.exit.i
+
+; <label>:35 ; preds = %28
+ %36 = shl i64 %31, 2
+ %37 = and i64 %36, 60
+ %38 = add i64 %37, %nhoff.1.i
+ br label %parse_ip.exit.i
+
+; <label>:39 ; preds = %12
+ %40 = add i64 %nhoff.1.i, 6
+ %41 = call i64 @llvm.bpf.load.byte(i8* %1, i64 %40) #2
+ %42 = add i64 %nhoff.1.i, 8
+ %43 = call i64 @llvm.bpf.load.word(i8* %1, i64 %42) #2
+ %44 = add i64 %nhoff.1.i, 12
+ %45 = call i64 @llvm.bpf.load.word(i8* %1, i64 %44) #2
+ %46 = add i64 %nhoff.1.i, 16
+ %47 = call i64 @llvm.bpf.load.word(i8* %1, i64 %46) #2
+ %48 = add i64 %nhoff.1.i, 20
+ %49 = call i64 @llvm.bpf.load.word(i8* %1, i64 %48) #2
+ %50 = add i64 %nhoff.1.i, 24
+ %51 = call i64 @llvm.bpf.load.word(i8* %1, i64 %50) #2
+ %52 = add i64 %nhoff.1.i, 28
+ %53 = call i64 @llvm.bpf.load.word(i8* %1, i64 %52) #2
+ %54 = add i64 %nhoff.1.i, 32
+ %55 = call i64 @llvm.bpf.load.word(i8* %1, i64 %54) #2
+ %56 = add i64 %nhoff.1.i, 36
+ %57 = call i64 @llvm.bpf.load.word(i8* %1, i64 %56) #2
+ %58 = xor i64 %53, %51
+ %59 = xor i64 %58, %55
+ %60 = xor i64 %59, %57
+ %61 = trunc i64 %60 to i32
+ %62 = add i64 %nhoff.1.i, 40
+ br label %parse_ip.exit.i
+
+parse_ip.exit.i: ; preds = %39, %35, %33
+ %63 = phi i32 [ %61, %39 ], [ %29, %33 ], [ %29, %35 ]
+ %64 = phi i64 [ %41, %39 ], [ %30, %33 ], [ %30, %35 ]
+ %nhoff.2.i = phi i64 [ %62, %39 ], [ %34, %33 ], [ %38, %35 ]
+ switch i64 %64, label %187 [
+ i64 47, label %65
+ i64 4, label %137
+ i64 41, label %163
+ ]
+
+; <label>:65 ; preds = %parse_ip.exit.i
+ %66 = call i64 @llvm.bpf.load.half(i8* %1, i64 %nhoff.2.i) #2
+ %67 = add i64 %nhoff.2.i, 2
+ %68 = call i64 @llvm.bpf.load.half(i8* %1, i64 %67) #2
+ %69 = and i64 %66, 1856
+ %70 = icmp eq i64 %69, 0
+ br i1 %70, label %71, label %187
+
+; <label>:71 ; preds = %65
+ %72 = lshr i64 %66, 5
+ %73 = and i64 %72, 4
+ %74 = add i64 %nhoff.2.i, 4
+ %..i = add i64 %74, %73
+ %75 = and i64 %66, 32
+ %76 = icmp eq i64 %75, 0
+ %77 = add i64 %..i, 4
+ %nhoff.4.i = select i1 %76, i64 %..i, i64 %77
+ %78 = and i64 %66, 16
+ %79 = icmp eq i64 %78, 0
+ %80 = add i64 %nhoff.4.i, 4
+ %nhoff.4..i = select i1 %79, i64 %nhoff.4.i, i64 %80
+ %81 = icmp eq i64 %68, 33024
+ br i1 %81, label %82, label %86
+
+; <label>:82 ; preds = %71
+ %83 = add i64 %nhoff.4..i, 2
+ %84 = call i64 @llvm.bpf.load.half(i8* %1, i64 %83) #2
+ %85 = add i64 %nhoff.4..i, 4
+ br label %86
+
+; <label>:86 ; preds = %82, %71
+ %proto.2.i = phi i64 [ %84, %82 ], [ %68, %71 ]
+ %nhoff.6.i = phi i64 [ %85, %82 ], [ %nhoff.4..i, %71 ]
+ switch i64 %proto.2.i, label %flow_dissector.exit.thread [
+ i64 2048, label %87
+ i64 34525, label %113
+ ]
+
+; <label>:87 ; preds = %86
+ %88 = add i64 %nhoff.6.i, 6
+ %89 = call i64 @llvm.bpf.load.half(i8* %1, i64 %88) #2
+ %90 = and i64 %89, 16383
+ %91 = icmp eq i64 %90, 0
+ br i1 %91, label %92, label %.thread.i4.i
+
+; <label>:92 ; preds = %87
+ %93 = add i64 %nhoff.6.i, 9
+ %94 = call i64 @llvm.bpf.load.byte(i8* %1, i64 %93) #2
+ %95 = icmp eq i64 %94, 47
+ br i1 %95, label %102, label %.thread.i4.i
+
+.thread.i4.i: ; preds = %92, %87
+ %96 = phi i64 [ %94, %92 ], [ 0, %87 ]
+ %97 = add i64 %nhoff.6.i, 12
+ %98 = call i64 @llvm.bpf.load.word(i8* %1, i64 %97) #2
+ %99 = add i64 %nhoff.6.i, 16
+ %100 = call i64 @llvm.bpf.load.word(i8* %1, i64 %99) #2
+ %101 = trunc i64 %100 to i32
+ br label %102
+
+; <label>:102 ; preds = %.thread.i4.i, %92
+ %103 = phi i32 [ %101, %.thread.i4.i ], [ %63, %92 ]
+ %104 = phi i64 [ %96, %.thread.i4.i ], [ 47, %92 ]
+ %105 = call i64 @llvm.bpf.load.byte(i8* %1, i64 %nhoff.6.i) #2
+ %106 = icmp eq i64 %105, 69
+ br i1 %106, label %107, label %109
+
+; <label>:107 ; preds = %102
+ %108 = add i64 %nhoff.6.i, 20
+ br label %187
+
+; <label>:109 ; preds = %102
+ %110 = shl i64 %105, 2
+ %111 = and i64 %110, 60
+ %112 = add i64 %111, %nhoff.6.i
+ br label %187
+
+; <label>:113 ; preds = %86
+ %114 = add i64 %nhoff.6.i, 6
+ %115 = call i64 @llvm.bpf.load.byte(i8* %1, i64 %114) #2
+ %116 = add i64 %nhoff.6.i, 8
+ %117 = call i64 @llvm.bpf.load.word(i8* %1, i64 %116) #2
+ %118 = add i64 %nhoff.6.i, 12
+ %119 = call i64 @llvm.bpf.load.word(i8* %1, i64 %118) #2
+ %120 = add i64 %nhoff.6.i, 16
+ %121 = call i64 @llvm.bpf.load.word(i8* %1, i64 %120) #2
+ %122 = add i64 %nhoff.6.i, 20
+ %123 = call i64 @llvm.bpf.load.word(i8* %1, i64 %122) #2
+ %124 = add i64 %nhoff.6.i, 24
+ %125 = call i64 @llvm.bpf.load.word(i8* %1, i64 %124) #2
+ %126 = add i64 %nhoff.6.i, 28
+ %127 = call i64 @llvm.bpf.load.word(i8* %1, i64 %126) #2
+ %128 = add i64 %nhoff.6.i, 32
+ %129 = call i64 @llvm.bpf.load.word(i8* %1, i64 %128) #2
+ %130 = add i64 %nhoff.6.i, 36
+ %131 = call i64 @llvm.bpf.load.word(i8* %1, i64 %130) #2
+ %132 = xor i64 %127, %125
+ %133 = xor i64 %132, %129
+ %134 = xor i64 %133, %131
+ %135 = trunc i64 %134 to i32
+ %136 = add i64 %nhoff.6.i, 40
+ br label %187
+
+; <label>:137 ; preds = %parse_ip.exit.i
+ %138 = add i64 %nhoff.2.i, 6
+ %139 = call i64 @llvm.bpf.load.half(i8* %1, i64 %138) #2
+ %140 = and i64 %139, 16383
+ %141 = icmp eq i64 %140, 0
+ br i1 %141, label %142, label %.thread.i1.i
+
+; <label>:142 ; preds = %137
+ %143 = add i64 %nhoff.2.i, 9
+ %144 = call i64 @llvm.bpf.load.byte(i8* %1, i64 %143) #2
+ %145 = icmp eq i64 %144, 47
+ br i1 %145, label %152, label %.thread.i1.i
+
+.thread.i1.i: ; preds = %142, %137
+ %146 = phi i64 [ %144, %142 ], [ 0, %137 ]
+ %147 = add i64 %nhoff.2.i, 12
+ %148 = call i64 @llvm.bpf.load.word(i8* %1, i64 %147) #2
+ %149 = add i64 %nhoff.2.i, 16
+ %150 = call i64 @llvm.bpf.load.word(i8* %1, i64 %149) #2
+ %151 = trunc i64 %150 to i32
+ br label %152
+
+; <label>:152 ; preds = %.thread.i1.i, %142
+ %153 = phi i32 [ %151, %.thread.i1.i ], [ %63, %142 ]
+ %154 = phi i64 [ %146, %.thread.i1.i ], [ 47, %142 ]
+ %155 = call i64 @llvm.bpf.load.byte(i8* %1, i64 %nhoff.2.i) #2
+ %156 = icmp eq i64 %155, 69
+ br i1 %156, label %157, label %159
+
+; <label>:157 ; preds = %152
+ %158 = add i64 %nhoff.2.i, 20
+ br label %187
+
+; <label>:159 ; preds = %152
+ %160 = shl i64 %155, 2
+ %161 = and i64 %160, 60
+ %162 = add i64 %161, %nhoff.2.i
+ br label %187
+
+; <label>:163 ; preds = %parse_ip.exit.i
+ %164 = add i64 %nhoff.2.i, 6
+ %165 = call i64 @llvm.bpf.load.byte(i8* %1, i64 %164) #2
+ %166 = add i64 %nhoff.2.i, 8
+ %167 = call i64 @llvm.bpf.load.word(i8* %1, i64 %166) #2
+ %168 = add i64 %nhoff.2.i, 12
+ %169 = call i64 @llvm.bpf.load.word(i8* %1, i64 %168) #2
+ %170 = add i64 %nhoff.2.i, 16
+ %171 = call i64 @llvm.bpf.load.word(i8* %1, i64 %170) #2
+ %172 = add i64 %nhoff.2.i, 20
+ %173 = call i64 @llvm.bpf.load.word(i8* %1, i64 %172) #2
+ %174 = add i64 %nhoff.2.i, 24
+ %175 = call i64 @llvm.bpf.load.word(i8* %1, i64 %174) #2
+ %176 = add i64 %nhoff.2.i, 28
+ %177 = call i64 @llvm.bpf.load.word(i8* %1, i64 %176) #2
+ %178 = add i64 %nhoff.2.i, 32
+ %179 = call i64 @llvm.bpf.load.word(i8* %1, i64 %178) #2
+ %180 = add i64 %nhoff.2.i, 36
+ %181 = call i64 @llvm.bpf.load.word(i8* %1, i64 %180) #2
+ %182 = xor i64 %177, %175
+ %183 = xor i64 %182, %179
+ %184 = xor i64 %183, %181
+ %185 = trunc i64 %184 to i32
+ %186 = add i64 %nhoff.2.i, 40
+ br label %187
+
+; <label>:187 ; preds = %163, %159, %157, %113, %109, %107, %65, %parse_ip.exit.i
+ %188 = phi i32 [ %63, %parse_ip.exit.i ], [ %185, %163 ], [ %63, %65 ], [ %135, %113 ], [ %103, %107 ], [ %103, %109 ], [ %153, %157 ], [ %153, %159 ]
+ %189 = phi i64 [ %64, %parse_ip.exit.i ], [ %165, %163 ], [ 47, %65 ], [ %115, %113 ], [ %104, %107 ], [ %104, %109 ], [ %154, %157 ], [ %154, %159 ]
+ %nhoff.7.i = phi i64 [ %nhoff.2.i, %parse_ip.exit.i ], [ %186, %163 ], [ %nhoff.2.i, %65 ], [ %136, %113 ], [ %108, %107 ], [ %112, %109 ], [ %158, %157 ], [ %162, %159 ]
+ %cond.i.i = icmp eq i64 %189, 51
+ %190 = select i1 %cond.i.i, i64 4, i64 0
+ %191 = add i64 %190, %nhoff.7.i
+ %192 = call i64 @llvm.bpf.load.word(i8* %1, i64 %191) #2
+ store i32 %188, i32* %key, align 4
+ %193 = bitcast i32* %key to i8*
+ %194 = call i8* inttoptr (i64 1 to i8* (i8*, i8*)*)(i8* bitcast (%struct.bpf_map_def* @hash_map to i8*), i8* %193) #2
+ %195 = icmp eq i8* %194, null
+ br i1 %195, label %199, label %196
+
+; <label>:196 ; preds = %187
+ %197 = bitcast i8* %194 to i64*
+ %198 = atomicrmw add i64* %197, i64 1 seq_cst
+ br label %flow_dissector.exit.thread
+
+; <label>:199 ; preds = %187
+ store i64 1, i64* %val, align 8
+ %200 = bitcast i64* %val to i8*
+ %201 = call i32 inttoptr (i64 2 to i32 (i8*, i8*, i8*, i64)*)(i8* bitcast (%struct.bpf_map_def* @hash_map to i8*), i8* %193, i8* %200, i64 0) #2
+ br label %flow_dissector.exit.thread
+
+flow_dissector.exit.thread: ; preds = %86, %12, %196, %199
+ ret i32 0
+; CHECK-LABEL: bpf_prog2:
+; CHECK: ldabs_h r0, r6.data + 12 # encoding: [0x28,0x00,0x00,0x00,0x0c,0x00,0x00,0x00]
+; CHECK: ldabs_h r0, r6.data + 16 # encoding: [0x28,0x00,0x00,0x00,0x10,0x00,0x00,0x00]
+; CHECK-NOT: implicit
+; CHECK: ld_64 r1
+; CHECK-NOT: ori
+; CHECK: call 1 # encoding: [0x85,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
+; CHECK: call 2 # encoding: [0x85,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+}
+
+declare i64 @llvm.bpf.load.half(i8*, i64) #1
+
+declare i64 @llvm.bpf.load.word(i8*, i64) #1
+
+declare i64 @llvm.bpf.load.byte(i8*, i64) #1
diff --git a/test/CodeGen/BPF/struct_ret1.ll b/test/CodeGen/BPF/struct_ret1.ll
new file mode 100644
index 000000000000..29486b56a272
--- /dev/null
+++ b/test/CodeGen/BPF/struct_ret1.ll
@@ -0,0 +1,17 @@
+; RUN: not llc -march=bpf < %s 2> %t1
+; RUN: FileCheck %s < %t1
+; CHECK: only integer returns
+
+%struct.S = type { i32, i32, i32 }
+
+@s = common global %struct.S zeroinitializer, align 4
+
+; Function Attrs: nounwind readonly uwtable
+define { i64, i32 } @bar(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) #0 {
+entry:
+ %retval.sroa.0.0.copyload = load i64, i64* bitcast (%struct.S* @s to i64*), align 4
+ %retval.sroa.2.0.copyload = load i32, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i64 0, i32 2), align 4
+ %.fca.0.insert = insertvalue { i64, i32 } undef, i64 %retval.sroa.0.0.copyload, 0
+ %.fca.1.insert = insertvalue { i64, i32 } %.fca.0.insert, i32 %retval.sroa.2.0.copyload, 1
+ ret { i64, i32 } %.fca.1.insert
+}
diff --git a/test/CodeGen/BPF/struct_ret2.ll b/test/CodeGen/BPF/struct_ret2.ll
new file mode 100644
index 000000000000..90461205f7cf
--- /dev/null
+++ b/test/CodeGen/BPF/struct_ret2.ll
@@ -0,0 +1,12 @@
+; RUN: not llc -march=bpf < %s 2> %t1
+; RUN: FileCheck %s < %t1
+; CHECK: only small returns
+
+; Function Attrs: nounwind uwtable
+define { i64, i32 } @foo(i32 %a, i32 %b, i32 %c) #0 {
+entry:
+ %call = tail call { i64, i32 } @bar(i32 %a, i32 %b, i32 %c, i32 1, i32 2) #3
+ ret { i64, i32 } %call
+}
+
+declare { i64, i32 } @bar(i32, i32, i32, i32, i32) #1
diff --git a/test/CodeGen/BPF/vararg1.ll b/test/CodeGen/BPF/vararg1.ll
new file mode 100644
index 000000000000..4a22db65e692
--- /dev/null
+++ b/test/CodeGen/BPF/vararg1.ll
@@ -0,0 +1,9 @@
+; RUN: not llc -march=bpf < %s 2> %t1
+; RUN: FileCheck %s < %t1
+; CHECK: with VarArgs
+
+; Function Attrs: nounwind readnone uwtable
+define void @foo(i32 %a, ...) #0 {
+entry:
+ ret void
+}
diff --git a/test/CodeGen/CPP/2009-05-01-Long-Double.ll b/test/CodeGen/CPP/2009-05-01-Long-Double.ll
index 0b2d882971a3..470303d6bb05 100644
--- a/test/CodeGen/CPP/2009-05-01-Long-Double.ll
+++ b/test/CodeGen/CPP/2009-05-01-Long-Double.ll
@@ -3,10 +3,10 @@
define x86_fp80 @some_func() nounwind {
entry:
%retval = alloca x86_fp80 ; <x86_fp80*> [#uses=2]
- %call = call i32 (...)* @other_func() ; <i32> [#uses=1]
+ %call = call i32 (...) @other_func() ; <i32> [#uses=1]
%conv = sitofp i32 %call to x86_fp80 ; <x86_fp80> [#uses=1]
store x86_fp80 %conv, x86_fp80* %retval
- %0 = load x86_fp80* %retval ; <x86_fp80> [#uses=1]
+ %0 = load x86_fp80, x86_fp80* %retval ; <x86_fp80> [#uses=1]
ret x86_fp80 %0
}
diff --git a/test/CodeGen/CPP/2009-05-04-CondBr.ll b/test/CodeGen/CPP/2009-05-04-CondBr.ll
index feb2cf765e7d..9ce1e5f02042 100644
--- a/test/CodeGen/CPP/2009-05-04-CondBr.ll
+++ b/test/CodeGen/CPP/2009-05-04-CondBr.ll
@@ -6,10 +6,10 @@ entry:
%retval = alloca i32 ; <i32*> [#uses=2]
%a.addr = alloca i32 ; <i32*> [#uses=8]
store i32 %a, i32* %a.addr
- %tmp = load i32* %a.addr ; <i32> [#uses=1]
+ %tmp = load i32, i32* %a.addr ; <i32> [#uses=1]
%inc = add i32 %tmp, 1 ; <i32> [#uses=1]
store i32 %inc, i32* %a.addr
- %tmp1 = load i32* %a.addr ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* %a.addr ; <i32> [#uses=1]
%cmp = icmp slt i32 %tmp1, 3 ; <i1> [#uses=1]
br i1 %cmp, label %if.then, label %if.end
@@ -18,11 +18,11 @@ if.then: ; preds = %entry
br label %if.end
if.end: ; preds = %if.then, %entry
- %tmp2 = load i32* %a.addr ; <i32> [#uses=1]
+ %tmp2 = load i32, i32* %a.addr ; <i32> [#uses=1]
%inc3 = add i32 %tmp2, 1 ; <i32> [#uses=1]
store i32 %inc3, i32* %a.addr
- %tmp4 = load i32* %a.addr ; <i32> [#uses=1]
+ %tmp4 = load i32, i32* %a.addr ; <i32> [#uses=1]
store i32 %tmp4, i32* %retval
- %0 = load i32* %retval ; <i32> [#uses=1]
+ %0 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %0
}
diff --git a/test/CodeGen/Generic/2003-05-28-ManyArgs.ll b/test/CodeGen/Generic/2003-05-28-ManyArgs.ll
index c6fbdaef8293..c2ffc7933572 100644
--- a/test/CodeGen/Generic/2003-05-28-ManyArgs.ll
+++ b/test/CodeGen/Generic/2003-05-28-ManyArgs.ll
@@ -42,102 +42,102 @@ entry:
%det_routing_arch = alloca %struct..s_det_routing_arch ; <%struct..s_det_routing_arch*> [#uses=11]
%segment_inf = alloca %struct..s_segment_inf* ; <%struct..s_segment_inf**> [#uses=1]
%timing_inf = alloca { i32, float, float, float, float, float, float, float, float, float, float } ; <{ i32, float, float, float, float, float, float, float, float, float, float }*> [#uses=11]
- %tmp.101 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 4 ; <i8**> [#uses=1]
- %tmp.105 = getelementptr [300 x i8]* %net_file, i64 0, i64 0 ; <i8*> [#uses=1]
- %tmp.106 = getelementptr [300 x i8]* %arch_file, i64 0, i64 0 ; <i8*> [#uses=1]
- %tmp.107 = getelementptr [300 x i8]* %place_file, i64 0, i64 0 ; <i8*> [#uses=1]
- %tmp.108 = getelementptr [300 x i8]* %route_file, i64 0, i64 0 ; <i8*> [#uses=1]
- %tmp.109 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 0 ; <i32*> [#uses=1]
- %tmp.112 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 0 ; <i32*> [#uses=1]
- %tmp.114 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 6 ; <i32*> [#uses=1]
- %tmp.118 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 7 ; <i32*> [#uses=1]
- %tmp.135 = load i32* %operation ; <i32> [#uses=1]
- %tmp.137 = load i32* %tmp.112 ; <i32> [#uses=1]
- %tmp.138 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 1 ; <float*> [#uses=1]
- %tmp.139 = load float* %tmp.138 ; <float> [#uses=1]
- %tmp.140 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 2 ; <i32*> [#uses=1]
- %tmp.141 = load i32* %tmp.140 ; <i32> [#uses=1]
- %tmp.142 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 3 ; <i32*> [#uses=1]
- %tmp.143 = load i32* %tmp.142 ; <i32> [#uses=1]
- %tmp.145 = load i8** %tmp.101 ; <i8*> [#uses=1]
- %tmp.146 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 5 ; <i32*> [#uses=1]
- %tmp.147 = load i32* %tmp.146 ; <i32> [#uses=1]
- %tmp.149 = load i32* %tmp.114 ; <i32> [#uses=1]
- %tmp.154 = load i32* %full_stats ; <i32> [#uses=1]
- %tmp.155 = load i32* %verify_binary_search ; <i32> [#uses=1]
- %tmp.156 = getelementptr %struct..s_annealing_sched* %annealing_sched, i64 0, i32 0 ; <i32*> [#uses=1]
- %tmp.157 = load i32* %tmp.156 ; <i32> [#uses=1]
- %tmp.158 = getelementptr %struct..s_annealing_sched* %annealing_sched, i64 0, i32 1 ; <float*> [#uses=1]
- %tmp.159 = load float* %tmp.158 ; <float> [#uses=1]
- %tmp.160 = getelementptr %struct..s_annealing_sched* %annealing_sched, i64 0, i32 2 ; <float*> [#uses=1]
- %tmp.161 = load float* %tmp.160 ; <float> [#uses=1]
- %tmp.162 = getelementptr %struct..s_annealing_sched* %annealing_sched, i64 0, i32 3 ; <float*> [#uses=1]
- %tmp.163 = load float* %tmp.162 ; <float> [#uses=1]
- %tmp.164 = getelementptr %struct..s_annealing_sched* %annealing_sched, i64 0, i32 4 ; <float*> [#uses=1]
- %tmp.165 = load float* %tmp.164 ; <float> [#uses=1]
- %tmp.166 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 0 ; <float*> [#uses=1]
- %tmp.167 = load float* %tmp.166 ; <float> [#uses=1]
- %tmp.168 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 1 ; <float*> [#uses=1]
- %tmp.169 = load float* %tmp.168 ; <float> [#uses=1]
- %tmp.170 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 2 ; <float*> [#uses=1]
- %tmp.171 = load float* %tmp.170 ; <float> [#uses=1]
- %tmp.172 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 3 ; <float*> [#uses=1]
- %tmp.173 = load float* %tmp.172 ; <float> [#uses=1]
- %tmp.174 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 4 ; <float*> [#uses=1]
- %tmp.175 = load float* %tmp.174 ; <float> [#uses=1]
- %tmp.176 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 5 ; <i32*> [#uses=1]
- %tmp.177 = load i32* %tmp.176 ; <i32> [#uses=1]
- %tmp.178 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 6 ; <i32*> [#uses=1]
- %tmp.179 = load i32* %tmp.178 ; <i32> [#uses=1]
- %tmp.181 = load i32* %tmp.118 ; <i32> [#uses=1]
- %tmp.182 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 8 ; <i32*> [#uses=1]
- %tmp.183 = load i32* %tmp.182 ; <i32> [#uses=1]
- %tmp.184 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 0 ; <i32*> [#uses=1]
- %tmp.185 = load i32* %tmp.184 ; <i32> [#uses=1]
- %tmp.186 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 1 ; <float*> [#uses=1]
- %tmp.187 = load float* %tmp.186 ; <float> [#uses=1]
- %tmp.188 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 2 ; <float*> [#uses=1]
- %tmp.189 = load float* %tmp.188 ; <float> [#uses=1]
- %tmp.190 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 3 ; <float*> [#uses=1]
- %tmp.191 = load float* %tmp.190 ; <float> [#uses=1]
- %tmp.192 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 4 ; <i32*> [#uses=1]
- %tmp.193 = load i32* %tmp.192 ; <i32> [#uses=1]
- %tmp.194 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 5 ; <i32*> [#uses=1]
- %tmp.195 = load i32* %tmp.194 ; <i32> [#uses=1]
- %tmp.196 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 6 ; <i16*> [#uses=1]
- %tmp.197 = load i16* %tmp.196 ; <i16> [#uses=1]
- %tmp.198 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 7 ; <i16*> [#uses=1]
- %tmp.199 = load i16* %tmp.198 ; <i16> [#uses=1]
- %tmp.200 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 8 ; <i16*> [#uses=1]
- %tmp.201 = load i16* %tmp.200 ; <i16> [#uses=1]
- %tmp.202 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 9 ; <float*> [#uses=1]
- %tmp.203 = load float* %tmp.202 ; <float> [#uses=1]
- %tmp.204 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 10 ; <float*> [#uses=1]
- %tmp.205 = load float* %tmp.204 ; <float> [#uses=1]
- %tmp.206 = load %struct..s_segment_inf** %segment_inf ; <%struct..s_segment_inf*> [#uses=1]
- %tmp.208 = load i32* %tmp.109 ; <i32> [#uses=1]
- %tmp.209 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 1 ; <float*> [#uses=1]
- %tmp.210 = load float* %tmp.209 ; <float> [#uses=1]
- %tmp.211 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 2 ; <float*> [#uses=1]
- %tmp.212 = load float* %tmp.211 ; <float> [#uses=1]
- %tmp.213 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 3 ; <float*> [#uses=1]
- %tmp.214 = load float* %tmp.213 ; <float> [#uses=1]
- %tmp.215 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 4 ; <float*> [#uses=1]
- %tmp.216 = load float* %tmp.215 ; <float> [#uses=1]
- %tmp.217 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 5 ; <float*> [#uses=1]
- %tmp.218 = load float* %tmp.217 ; <float> [#uses=1]
- %tmp.219 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 6 ; <float*> [#uses=1]
- %tmp.220 = load float* %tmp.219 ; <float> [#uses=1]
- %tmp.221 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 7 ; <float*> [#uses=1]
- %tmp.222 = load float* %tmp.221 ; <float> [#uses=1]
- %tmp.223 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 8 ; <float*> [#uses=1]
- %tmp.224 = load float* %tmp.223 ; <float> [#uses=1]
- %tmp.225 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 9 ; <float*> [#uses=1]
- %tmp.226 = load float* %tmp.225 ; <float> [#uses=1]
- %tmp.227 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 10 ; <float*> [#uses=1]
- %tmp.228 = load float* %tmp.227 ; <float> [#uses=1]
+ %tmp.101 = getelementptr %struct..s_placer_opts, %struct..s_placer_opts* %placer_opts, i64 0, i32 4 ; <i8**> [#uses=1]
+ %tmp.105 = getelementptr [300 x i8], [300 x i8]* %net_file, i64 0, i64 0 ; <i8*> [#uses=1]
+ %tmp.106 = getelementptr [300 x i8], [300 x i8]* %arch_file, i64 0, i64 0 ; <i8*> [#uses=1]
+ %tmp.107 = getelementptr [300 x i8], [300 x i8]* %place_file, i64 0, i64 0 ; <i8*> [#uses=1]
+ %tmp.108 = getelementptr [300 x i8], [300 x i8]* %route_file, i64 0, i64 0 ; <i8*> [#uses=1]
+ %tmp.109 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 0 ; <i32*> [#uses=1]
+ %tmp.112 = getelementptr %struct..s_placer_opts, %struct..s_placer_opts* %placer_opts, i64 0, i32 0 ; <i32*> [#uses=1]
+ %tmp.114 = getelementptr %struct..s_placer_opts, %struct..s_placer_opts* %placer_opts, i64 0, i32 6 ; <i32*> [#uses=1]
+ %tmp.118 = getelementptr %struct..s_router_opts, %struct..s_router_opts* %router_opts, i64 0, i32 7 ; <i32*> [#uses=1]
+ %tmp.135 = load i32, i32* %operation ; <i32> [#uses=1]
+ %tmp.137 = load i32, i32* %tmp.112 ; <i32> [#uses=1]
+ %tmp.138 = getelementptr %struct..s_placer_opts, %struct..s_placer_opts* %placer_opts, i64 0, i32 1 ; <float*> [#uses=1]
+ %tmp.139 = load float, float* %tmp.138 ; <float> [#uses=1]
+ %tmp.140 = getelementptr %struct..s_placer_opts, %struct..s_placer_opts* %placer_opts, i64 0, i32 2 ; <i32*> [#uses=1]
+ %tmp.141 = load i32, i32* %tmp.140 ; <i32> [#uses=1]
+ %tmp.142 = getelementptr %struct..s_placer_opts, %struct..s_placer_opts* %placer_opts, i64 0, i32 3 ; <i32*> [#uses=1]
+ %tmp.143 = load i32, i32* %tmp.142 ; <i32> [#uses=1]
+ %tmp.145 = load i8*, i8** %tmp.101 ; <i8*> [#uses=1]
+ %tmp.146 = getelementptr %struct..s_placer_opts, %struct..s_placer_opts* %placer_opts, i64 0, i32 5 ; <i32*> [#uses=1]
+ %tmp.147 = load i32, i32* %tmp.146 ; <i32> [#uses=1]
+ %tmp.149 = load i32, i32* %tmp.114 ; <i32> [#uses=1]
+ %tmp.154 = load i32, i32* %full_stats ; <i32> [#uses=1]
+ %tmp.155 = load i32, i32* %verify_binary_search ; <i32> [#uses=1]
+ %tmp.156 = getelementptr %struct..s_annealing_sched, %struct..s_annealing_sched* %annealing_sched, i64 0, i32 0 ; <i32*> [#uses=1]
+ %tmp.157 = load i32, i32* %tmp.156 ; <i32> [#uses=1]
+ %tmp.158 = getelementptr %struct..s_annealing_sched, %struct..s_annealing_sched* %annealing_sched, i64 0, i32 1 ; <float*> [#uses=1]
+ %tmp.159 = load float, float* %tmp.158 ; <float> [#uses=1]
+ %tmp.160 = getelementptr %struct..s_annealing_sched, %struct..s_annealing_sched* %annealing_sched, i64 0, i32 2 ; <float*> [#uses=1]
+ %tmp.161 = load float, float* %tmp.160 ; <float> [#uses=1]
+ %tmp.162 = getelementptr %struct..s_annealing_sched, %struct..s_annealing_sched* %annealing_sched, i64 0, i32 3 ; <float*> [#uses=1]
+ %tmp.163 = load float, float* %tmp.162 ; <float> [#uses=1]
+ %tmp.164 = getelementptr %struct..s_annealing_sched, %struct..s_annealing_sched* %annealing_sched, i64 0, i32 4 ; <float*> [#uses=1]
+ %tmp.165 = load float, float* %tmp.164 ; <float> [#uses=1]
+ %tmp.166 = getelementptr %struct..s_router_opts, %struct..s_router_opts* %router_opts, i64 0, i32 0 ; <float*> [#uses=1]
+ %tmp.167 = load float, float* %tmp.166 ; <float> [#uses=1]
+ %tmp.168 = getelementptr %struct..s_router_opts, %struct..s_router_opts* %router_opts, i64 0, i32 1 ; <float*> [#uses=1]
+ %tmp.169 = load float, float* %tmp.168 ; <float> [#uses=1]
+ %tmp.170 = getelementptr %struct..s_router_opts, %struct..s_router_opts* %router_opts, i64 0, i32 2 ; <float*> [#uses=1]
+ %tmp.171 = load float, float* %tmp.170 ; <float> [#uses=1]
+ %tmp.172 = getelementptr %struct..s_router_opts, %struct..s_router_opts* %router_opts, i64 0, i32 3 ; <float*> [#uses=1]
+ %tmp.173 = load float, float* %tmp.172 ; <float> [#uses=1]
+ %tmp.174 = getelementptr %struct..s_router_opts, %struct..s_router_opts* %router_opts, i64 0, i32 4 ; <float*> [#uses=1]
+ %tmp.175 = load float, float* %tmp.174 ; <float> [#uses=1]
+ %tmp.176 = getelementptr %struct..s_router_opts, %struct..s_router_opts* %router_opts, i64 0, i32 5 ; <i32*> [#uses=1]
+ %tmp.177 = load i32, i32* %tmp.176 ; <i32> [#uses=1]
+ %tmp.178 = getelementptr %struct..s_router_opts, %struct..s_router_opts* %router_opts, i64 0, i32 6 ; <i32*> [#uses=1]
+ %tmp.179 = load i32, i32* %tmp.178 ; <i32> [#uses=1]
+ %tmp.181 = load i32, i32* %tmp.118 ; <i32> [#uses=1]
+ %tmp.182 = getelementptr %struct..s_router_opts, %struct..s_router_opts* %router_opts, i64 0, i32 8 ; <i32*> [#uses=1]
+ %tmp.183 = load i32, i32* %tmp.182 ; <i32> [#uses=1]
+ %tmp.184 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 0 ; <i32*> [#uses=1]
+ %tmp.185 = load i32, i32* %tmp.184 ; <i32> [#uses=1]
+ %tmp.186 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 1 ; <float*> [#uses=1]
+ %tmp.187 = load float, float* %tmp.186 ; <float> [#uses=1]
+ %tmp.188 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 2 ; <float*> [#uses=1]
+ %tmp.189 = load float, float* %tmp.188 ; <float> [#uses=1]
+ %tmp.190 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 3 ; <float*> [#uses=1]
+ %tmp.191 = load float, float* %tmp.190 ; <float> [#uses=1]
+ %tmp.192 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 4 ; <i32*> [#uses=1]
+ %tmp.193 = load i32, i32* %tmp.192 ; <i32> [#uses=1]
+ %tmp.194 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 5 ; <i32*> [#uses=1]
+ %tmp.195 = load i32, i32* %tmp.194 ; <i32> [#uses=1]
+ %tmp.196 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 6 ; <i16*> [#uses=1]
+ %tmp.197 = load i16, i16* %tmp.196 ; <i16> [#uses=1]
+ %tmp.198 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 7 ; <i16*> [#uses=1]
+ %tmp.199 = load i16, i16* %tmp.198 ; <i16> [#uses=1]
+ %tmp.200 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 8 ; <i16*> [#uses=1]
+ %tmp.201 = load i16, i16* %tmp.200 ; <i16> [#uses=1]
+ %tmp.202 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 9 ; <float*> [#uses=1]
+ %tmp.203 = load float, float* %tmp.202 ; <float> [#uses=1]
+ %tmp.204 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 10 ; <float*> [#uses=1]
+ %tmp.205 = load float, float* %tmp.204 ; <float> [#uses=1]
+ %tmp.206 = load %struct..s_segment_inf*, %struct..s_segment_inf** %segment_inf ; <%struct..s_segment_inf*> [#uses=1]
+ %tmp.208 = load i32, i32* %tmp.109 ; <i32> [#uses=1]
+ %tmp.209 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 1 ; <float*> [#uses=1]
+ %tmp.210 = load float, float* %tmp.209 ; <float> [#uses=1]
+ %tmp.211 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 2 ; <float*> [#uses=1]
+ %tmp.212 = load float, float* %tmp.211 ; <float> [#uses=1]
+ %tmp.213 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 3 ; <float*> [#uses=1]
+ %tmp.214 = load float, float* %tmp.213 ; <float> [#uses=1]
+ %tmp.215 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 4 ; <float*> [#uses=1]
+ %tmp.216 = load float, float* %tmp.215 ; <float> [#uses=1]
+ %tmp.217 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 5 ; <float*> [#uses=1]
+ %tmp.218 = load float, float* %tmp.217 ; <float> [#uses=1]
+ %tmp.219 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 6 ; <float*> [#uses=1]
+ %tmp.220 = load float, float* %tmp.219 ; <float> [#uses=1]
+ %tmp.221 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 7 ; <float*> [#uses=1]
+ %tmp.222 = load float, float* %tmp.221 ; <float> [#uses=1]
+ %tmp.223 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 8 ; <float*> [#uses=1]
+ %tmp.224 = load float, float* %tmp.223 ; <float> [#uses=1]
+ %tmp.225 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 9 ; <float*> [#uses=1]
+ %tmp.226 = load float, float* %tmp.225 ; <float> [#uses=1]
+ %tmp.227 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 10 ; <float*> [#uses=1]
+ %tmp.228 = load float, float* %tmp.227 ; <float> [#uses=1]
call void @place_and_route( i32 %tmp.135, i32 %tmp.137, float %tmp.139, i32 %tmp.141, i32 %tmp.143, i8* %tmp.145, i32 %tmp.147, i32 %tmp.149, i8* %tmp.107, i8* %tmp.105, i8* %tmp.106, i8* %tmp.108, i32 %tmp.154, i32 %tmp.155, i32 %tmp.157, float %tmp.159, float %tmp.161, float %tmp.163, float %tmp.165, float %tmp.167, float %tmp.169, float %tmp.171, float %tmp.173, float %tmp.175, i32 %tmp.177, i32 %tmp.179, i32 %tmp.181, i32 %tmp.183, i32 %tmp.185, float %tmp.187, float %tmp.189, float %tmp.191, i32 %tmp.193, i32 %tmp.195, i16 %tmp.197, i16 %tmp.199, i16 %tmp.201, float %tmp.203, float %tmp.205, %struct..s_segment_inf* %tmp.206, i32 %tmp.208, float %tmp.210, float %tmp.212, float %tmp.214, float %tmp.216, float %tmp.218, float %tmp.220, float %tmp.222, float %tmp.224, float %tmp.226, float %tmp.228 )
- %tmp.231 = load i32* %show_graphics ; <i32> [#uses=1]
+ %tmp.231 = load i32, i32* %show_graphics ; <i32> [#uses=1]
%tmp.232 = icmp ne i32 %tmp.231, 0 ; <i1> [#uses=1]
br i1 %tmp.232, label %then.2, label %endif.2
diff --git a/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll b/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll
index 10d3a11a5190..9e3d254264a5 100644
--- a/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll
+++ b/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll
@@ -22,11 +22,11 @@
define internal i32 @OpenOutput(i8* %filename.1) {
entry:
- %tmp.0 = load %FileType** @Output ; <%FileType*> [#uses=1]
- %tmp.4 = getelementptr %FileType* %tmp.0, i64 1 ; <%FileType*> [#uses=1]
- %addrOfGlobal = getelementptr [16 x %FileType]* @OutputFiles, i64 0 ; <[16 x %FileType]*> [#uses=1]
- %constantGEP = getelementptr [16 x %FileType]* %addrOfGlobal, i64 1 ; <[16 x %FileType]*> [#uses=1]
- %constantGEP.upgrd.1 = getelementptr [16 x %FileType]* %constantGEP, i64 0, i64 0 ; <%FileType*> [#uses=1]
+ %tmp.0 = load %FileType*, %FileType** @Output ; <%FileType*> [#uses=1]
+ %tmp.4 = getelementptr %FileType, %FileType* %tmp.0, i64 1 ; <%FileType*> [#uses=1]
+ %addrOfGlobal = getelementptr [16 x %FileType], [16 x %FileType]* @OutputFiles, i64 0 ; <[16 x %FileType]*> [#uses=1]
+ %constantGEP = getelementptr [16 x %FileType], [16 x %FileType]* %addrOfGlobal, i64 1 ; <[16 x %FileType]*> [#uses=1]
+ %constantGEP.upgrd.1 = getelementptr [16 x %FileType], [16 x %FileType]* %constantGEP, i64 0, i64 0 ; <%FileType*> [#uses=1]
%tmp.10 = icmp eq %FileType* %tmp.4, %constantGEP.upgrd.1 ; <i1> [#uses=1]
br i1 %tmp.10, label %return, label %endif.0
diff --git a/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll b/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll
index 1d1aad5f27e2..7e402f595809 100644
--- a/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll
+++ b/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll
@@ -31,11 +31,11 @@ entry:
br i1 %tmp.8, label %then, label %else
then: ; preds = %entry
- %tmp.11 = call i32 (i8*, ...)* @printf( i8* getelementptr ([6 x i8]* @.str_1, i64 0, i64 0) ) ; <i32> [#uses=0]
+ %tmp.11 = call i32 (i8*, ...) @printf( i8* getelementptr ([6 x i8], [6 x i8]* @.str_1, i64 0, i64 0) ) ; <i32> [#uses=0]
br label %UnifiedExitNode
else: ; preds = %entry
- %tmp.13 = call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @.str_2, i64 0, i64 0) ) ; <i32> [#uses=0]
+ %tmp.13 = call i32 (i8*, ...) @printf( i8* getelementptr ([7 x i8], [7 x i8]* @.str_2, i64 0, i64 0) ) ; <i32> [#uses=0]
br label %UnifiedExitNode
UnifiedExitNode: ; preds = %else, %then
diff --git a/test/CodeGen/Generic/2003-07-07-BadLongConst.ll b/test/CodeGen/Generic/2003-07-07-BadLongConst.ll
index 64312ba09a50..928b57efda16 100644
--- a/test/CodeGen/Generic/2003-07-07-BadLongConst.ll
+++ b/test/CodeGen/Generic/2003-07-07-BadLongConst.ll
@@ -14,7 +14,7 @@ entry:
%tmp.11 = call i64 @getL( ) ; <i64> [#uses=2]
%tmp.5 = trunc i64 %tmp.11 to i32 ; <i32> [#uses=2]
%tmp.23 = and i64 %tmp.11, -4294967296 ; <i64> [#uses=2]
- %tmp.16 = call i32 (i8*, ...)* @printf( i8* getelementptr ([42 x i8]* @.str_1, i64 0, i64 0), i32 %tmp.5, i32 %tmp.5, i64 %tmp.23, i64 %tmp.23 ) ; <i32> [#uses=0]
+ %tmp.16 = call i32 (i8*, ...) @printf( i8* getelementptr ([42 x i8], [42 x i8]* @.str_1, i64 0, i64 0), i32 %tmp.5, i32 %tmp.5, i64 %tmp.23, i64 %tmp.23 ) ; <i32> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll b/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll
index 8019caa832d7..73ad186be551 100644
--- a/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll
+++ b/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll
@@ -28,7 +28,7 @@ entry:
define i32 @main() {
entry:
%result = call i32 @adj( i32 3, i32 2 ) ; <i32> [#uses=1]
- %tmp.0 = call i32 (i8*, ...)* @printf( i8* getelementptr ([30 x i8]* @.str_1, i64 0, i64 0), i32 3, i32 2, i32 %result ) ; <i32> [#uses=0]
+ %tmp.0 = call i32 (i8*, ...) @printf( i8* getelementptr ([30 x i8], [30 x i8]* @.str_1, i64 0, i64 0), i32 3, i32 2, i32 %result ) ; <i32> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll b/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
index 4e6fe1cf8bf5..010c0c553638 100644
--- a/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
+++ b/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
@@ -26,10 +26,10 @@ entry:
loopentry: ; preds = %loopentry, %entry
%i = phi i64 [ 0, %entry ], [ %inc.i, %loopentry ] ; <i64> [#uses=3]
- %cptr = getelementptr [6 x i8]* @yy_ec, i64 0, i64 %i ; <i8*> [#uses=1]
- %c = load i8* %cptr ; <i8> [#uses=1]
- %ignore = call i32 (i8*, ...)* @printf( i8* getelementptr ([8 x i8]* @.str_3, i64 0, i64 0), i64 %i ) ; <i32> [#uses=0]
- %ignore2 = call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @.str_4, i64 0, i64 0), i8 %c ) ; <i32> [#uses=0]
+ %cptr = getelementptr [6 x i8], [6 x i8]* @yy_ec, i64 0, i64 %i ; <i8*> [#uses=1]
+ %c = load i8, i8* %cptr ; <i8> [#uses=1]
+ %ignore = call i32 (i8*, ...) @printf( i8* getelementptr ([8 x i8], [8 x i8]* @.str_3, i64 0, i64 0), i64 %i ) ; <i32> [#uses=0]
+ %ignore2 = call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @.str_4, i64 0, i64 0), i8 %c ) ; <i32> [#uses=0]
%inc.i = add i64 %i, 1 ; <i64> [#uses=2]
%done = icmp sle i64 %inc.i, 5 ; <i1> [#uses=1]
br i1 %done, label %loopentry, label %exit.1
diff --git a/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll b/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll
index d4a4cf88ce0c..8dfdd0172c29 100644
--- a/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll
+++ b/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll
@@ -3,8 +3,8 @@
@global_long_2 = linkonce global i64 49 ; <i64*> [#uses=1]
define i32 @main() {
- %l1 = load i64* @global_long_1 ; <i64> [#uses=1]
- %l2 = load i64* @global_long_2 ; <i64> [#uses=1]
+ %l1 = load i64, i64* @global_long_1 ; <i64> [#uses=1]
+ %l2 = load i64, i64* @global_long_2 ; <i64> [#uses=1]
%cond = icmp sle i64 %l1, %l2 ; <i1> [#uses=1]
%cast2 = zext i1 %cond to i32 ; <i32> [#uses=1]
%RV = sub i32 1, %cast2 ; <i32> [#uses=1]
diff --git a/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll b/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll
index 353e411b0887..b54f737b90aa 100644
--- a/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll
+++ b/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll
@@ -12,7 +12,7 @@ cond_true: ; preds = %entry
else.0: ; preds = %cond_true, %entry
%tmp.167.1 = phi i32 [ ptrtoint ([17 x i8]* @.str_87 to i32), %entry ], [ 0, %cond_true ] ; <i32> [#uses=0]
- call void @Pr( i8* getelementptr ([4 x i8]* @.str_67, i32 0, i32 0), i32 0, i32 0 )
+ call void @Pr( i8* getelementptr ([4 x i8], [4 x i8]* @.str_67, i32 0, i32 0), i32 0, i32 0 )
ret void
}
diff --git a/test/CodeGen/Generic/2005-12-01-Crash.ll b/test/CodeGen/Generic/2005-12-01-Crash.ll
index a9eeddedc54d..e6ab9d280c73 100644
--- a/test/CodeGen/Generic/2005-12-01-Crash.ll
+++ b/test/CodeGen/Generic/2005-12-01-Crash.ll
@@ -11,7 +11,7 @@
define void @printArgsNoRet(i32 %a1, float %a2, i8 %a3, double %a4, i8* %a5, i32 %a6, float %a7, i8 %a8, double %a9, i8* %a10, i32 %a11, float %a12, i8 %a13, double %a14, i8* %a15) {
entry:
%tmp17 = sext i8 %a13 to i32 ; <i32> [#uses=1]
- %tmp23 = call i32 (i8*, ...)* @printf( i8* getelementptr ([29 x i8]* @str2, i32 0, i64 0), i32 %a11, double 0.000000e+00, i32 %tmp17, double %a14, i32 0 ) ; <i32> [#uses=0]
+ %tmp23 = call i32 (i8*, ...) @printf( i8* getelementptr ([29 x i8], [29 x i8]* @str2, i32 0, i64 0), i32 %a11, double 0.000000e+00, i32 %tmp17, double %a14, i32 0 ) ; <i32> [#uses=0]
ret void
}
diff --git a/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll b/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll
index 5508272b5551..8a43b6ab2aa5 100644
--- a/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll
+++ b/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll
@@ -42,7 +42,7 @@ then.1.i52: ; preds = %then.0.i40
ret void
else.1.i56: ; preds = %then.0.i40
- %tmp.28.i = load i32* @G ; <i32> [#uses=1]
+ %tmp.28.i = load i32, i32* @G ; <i32> [#uses=1]
%tmp.29.i = icmp eq i32 %tmp.28.i, 1 ; <i1> [#uses=1]
br i1 %tmp.29.i, label %shortcirc_next.i, label %shortcirc_done.i
diff --git a/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll b/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll
index 2a6cc0c9cdd2..554cd2eef581 100644
--- a/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll
+++ b/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll
@@ -36,16 +36,16 @@ cond_next12: ; preds = %cond_true92
cond_next18: ; preds = %cond_next12, %cond_true
%tmp20 = bitcast %struct.tree_node* %tmp2 to %struct.tree_type* ; <%struct.tree_type*> [#uses=1]
- %tmp21 = getelementptr %struct.tree_type* %tmp20, i32 0, i32 17 ; <%struct.tree_node**> [#uses=1]
- %tmp22 = load %struct.tree_node** %tmp21 ; <%struct.tree_node*> [#uses=6]
+ %tmp21 = getelementptr %struct.tree_type, %struct.tree_type* %tmp20, i32 0, i32 17 ; <%struct.tree_node**> [#uses=1]
+ %tmp22 = load %struct.tree_node*, %struct.tree_node** %tmp21 ; <%struct.tree_node*> [#uses=6]
%tmp24 = icmp eq %struct.tree_node* %tmp22, %tmp23 ; <i1> [#uses=1]
br i1 %tmp24, label %return, label %cond_next28
cond_next28: ; preds = %cond_next18
%tmp30 = bitcast %struct.tree_node* %tmp2 to %struct.tree_common* ; <%struct.tree_common*> [#uses=1]
- %tmp = getelementptr %struct.tree_common* %tmp30, i32 0, i32 2 ; <i8*> [#uses=1]
+ %tmp = getelementptr %struct.tree_common, %struct.tree_common* %tmp30, i32 0, i32 2 ; <i8*> [#uses=1]
%tmp.upgrd.1 = bitcast i8* %tmp to i32* ; <i32*> [#uses=1]
- %tmp.upgrd.2 = load i32* %tmp.upgrd.1 ; <i32> [#uses=1]
+ %tmp.upgrd.2 = load i32, i32* %tmp.upgrd.1 ; <i32> [#uses=1]
%tmp32 = trunc i32 %tmp.upgrd.2 to i8 ; <i8> [#uses=1]
%tmp33 = icmp eq i8 %tmp32, 7 ; <i1> [#uses=1]
br i1 %tmp33, label %cond_true34, label %cond_next84
@@ -69,23 +69,23 @@ cond_next84: ; preds = %cond_next28
br i1 %tmp.upgrd.6, label %return, label %cond_true92
cond_true92.preheader: ; preds = %entry
- %tmp7 = load %struct.tree_node** @void_type_node ; <%struct.tree_node*> [#uses=1]
- %tmp23 = load %struct.tree_node** @float_type_node ; <%struct.tree_node*> [#uses=1]
- %tmp39 = load %struct.tree_node** @char_type_node ; <%struct.tree_node*> [#uses=1]
- %tmp48 = load %struct.tree_node** @signed_char_type_node ; <%struct.tree_node*> [#uses=1]
- %tmp57 = load %struct.tree_node** @unsigned_char_type_node ; <%struct.tree_node*> [#uses=1]
- %tmp66 = load %struct.tree_node** @short_integer_type_node ; <%struct.tree_node*> [#uses=1]
- %tmp75 = load %struct.tree_node** @short_unsigned_type_node ; <%struct.tree_node*> [#uses=1]
+ %tmp7 = load %struct.tree_node*, %struct.tree_node** @void_type_node ; <%struct.tree_node*> [#uses=1]
+ %tmp23 = load %struct.tree_node*, %struct.tree_node** @float_type_node ; <%struct.tree_node*> [#uses=1]
+ %tmp39 = load %struct.tree_node*, %struct.tree_node** @char_type_node ; <%struct.tree_node*> [#uses=1]
+ %tmp48 = load %struct.tree_node*, %struct.tree_node** @signed_char_type_node ; <%struct.tree_node*> [#uses=1]
+ %tmp57 = load %struct.tree_node*, %struct.tree_node** @unsigned_char_type_node ; <%struct.tree_node*> [#uses=1]
+ %tmp66 = load %struct.tree_node*, %struct.tree_node** @short_integer_type_node ; <%struct.tree_node*> [#uses=1]
+ %tmp75 = load %struct.tree_node*, %struct.tree_node** @short_unsigned_type_node ; <%struct.tree_node*> [#uses=1]
br label %cond_true92
cond_true92: ; preds = %cond_true92.preheader, %cond_next84, %cond_true34
%t.0.0 = phi %struct.tree_node* [ %parms, %cond_true92.preheader ], [ %tmp6, %cond_true34 ], [ %tmp6, %cond_next84 ] ; <%struct.tree_node*> [#uses=2]
%tmp.upgrd.4 = bitcast %struct.tree_node* %t.0.0 to %struct.tree_list* ; <%struct.tree_list*> [#uses=1]
- %tmp.upgrd.5 = getelementptr %struct.tree_list* %tmp.upgrd.4, i32 0, i32 2 ; <%struct.tree_node**> [#uses=1]
- %tmp2 = load %struct.tree_node** %tmp.upgrd.5 ; <%struct.tree_node*> [#uses=5]
+ %tmp.upgrd.5 = getelementptr %struct.tree_list, %struct.tree_list* %tmp.upgrd.4, i32 0, i32 2 ; <%struct.tree_node**> [#uses=1]
+ %tmp2 = load %struct.tree_node*, %struct.tree_node** %tmp.upgrd.5 ; <%struct.tree_node*> [#uses=5]
%tmp4 = bitcast %struct.tree_node* %t.0.0 to %struct.tree_common* ; <%struct.tree_common*> [#uses=1]
- %tmp5 = getelementptr %struct.tree_common* %tmp4, i32 0, i32 0 ; <%struct.tree_node**> [#uses=1]
- %tmp6 = load %struct.tree_node** %tmp5 ; <%struct.tree_node*> [#uses=3]
+ %tmp5 = getelementptr %struct.tree_common, %struct.tree_common* %tmp4, i32 0, i32 0 ; <%struct.tree_node**> [#uses=1]
+ %tmp6 = load %struct.tree_node*, %struct.tree_node** %tmp5 ; <%struct.tree_node*> [#uses=3]
%tmp.upgrd.6 = icmp eq %struct.tree_node* %tmp6, null ; <i1> [#uses=3]
br i1 %tmp.upgrd.6, label %cond_true, label %cond_next12
diff --git a/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll b/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll
index 8465b829e29f..f68dc32c5eb5 100644
--- a/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll
+++ b/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll
@@ -22,8 +22,8 @@ else.3: ; preds = %endif.4
]
then.10: ; preds = %else.3, %else.3
- %tmp.112 = load i16* null ; <i16> [#uses=2]
- %tmp.113 = load i16* @G ; <i16> [#uses=2]
+ %tmp.112 = load i16, i16* null ; <i16> [#uses=2]
+ %tmp.113 = load i16, i16* @G ; <i16> [#uses=2]
%tmp.114 = icmp ugt i16 %tmp.112, %tmp.113 ; <i1> [#uses=1]
%tmp.120 = icmp ult i16 %tmp.112, %tmp.113 ; <i1> [#uses=1]
%bothcond = and i1 %tmp.114, %tmp.120 ; <i1> [#uses=1]
diff --git a/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll b/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll
index 1a9fa9f5de6b..12a40116c59b 100644
--- a/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll
+++ b/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll
@@ -9,7 +9,7 @@ declare void @fprintf(i32, ...)
define void @OUTPUT_TABLE(%struct.SYMBOL_TABLE_ENTRY* %SYM_TAB) {
entry:
- %tmp11 = getelementptr %struct.SYMBOL_TABLE_ENTRY* %SYM_TAB, i32 0, i32 1, i32 0 ; <i8*> [#uses=2]
+ %tmp11 = getelementptr %struct.SYMBOL_TABLE_ENTRY, %struct.SYMBOL_TABLE_ENTRY* %SYM_TAB, i32 0, i32 1, i32 0 ; <i8*> [#uses=2]
%tmp.i = bitcast i8* %tmp11 to i8* ; <i8*> [#uses=1]
br label %bb.i
@@ -18,7 +18,7 @@ bb.i: ; preds = %cond_next.i, %entry
br i1 false, label %cond_true.i31, label %cond_next.i
cond_true.i31: ; preds = %bb.i
- call void (i32, ...)* @fprintf( i32 0, i8* %tmp11, i8* null )
+ call void (i32, ...) @fprintf( i32 0, i8* %tmp11, i8* null )
ret void
cond_next.i: ; preds = %bb.i
diff --git a/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll b/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll
index bd922b3aa851..80be64c7cf91 100644
--- a/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll
+++ b/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll
@@ -9,17 +9,17 @@ entry:
br i1 %tmp22, label %cond_true23, label %cond_next159
cond_true23: ; preds = %entry
- %tmp138 = getelementptr %struct.cl_perfunc_opts* @cl_pf_opts, i32 0, i32 8 ; <i8*> [#uses=1]
+ %tmp138 = getelementptr %struct.cl_perfunc_opts, %struct.cl_perfunc_opts* @cl_pf_opts, i32 0, i32 8 ; <i8*> [#uses=1]
%tmp138.upgrd.1 = bitcast i8* %tmp138 to i32* ; <i32*> [#uses=2]
- %tmp139 = load i32* %tmp138.upgrd.1 ; <i32> [#uses=1]
+ %tmp139 = load i32, i32* %tmp138.upgrd.1 ; <i32> [#uses=1]
%tmp140 = shl i32 1, 27 ; <i32> [#uses=1]
%tmp141 = and i32 %tmp140, 134217728 ; <i32> [#uses=1]
%tmp142 = and i32 %tmp139, -134217729 ; <i32> [#uses=1]
%tmp143 = or i32 %tmp142, %tmp141 ; <i32> [#uses=1]
store i32 %tmp143, i32* %tmp138.upgrd.1
- %tmp144 = getelementptr %struct.cl_perfunc_opts* @cl_pf_opts, i32 0, i32 8 ; <i8*> [#uses=1]
+ %tmp144 = getelementptr %struct.cl_perfunc_opts, %struct.cl_perfunc_opts* @cl_pf_opts, i32 0, i32 8 ; <i8*> [#uses=1]
%tmp144.upgrd.2 = bitcast i8* %tmp144 to i32* ; <i32*> [#uses=1]
- %tmp145 = load i32* %tmp144.upgrd.2 ; <i32> [#uses=1]
+ %tmp145 = load i32, i32* %tmp144.upgrd.2 ; <i32> [#uses=1]
%tmp146 = shl i32 %tmp145, 22 ; <i32> [#uses=1]
%tmp147 = lshr i32 %tmp146, 31 ; <i32> [#uses=1]
%tmp147.upgrd.3 = trunc i32 %tmp147 to i8 ; <i8> [#uses=1]
diff --git a/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll b/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll
index c4f2fb0c4726..bdd9787c6ab3 100644
--- a/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll
+++ b/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll
@@ -169,7 +169,7 @@ cond_next778: ; preds = %cond_next6.i119
br i1 %tmp781, label %cond_next784, label %bb790
cond_next784: ; preds = %cond_next778
- %tmp785 = load i32* @ix86_cpu ; <i32> [#uses=1]
+ %tmp785 = load i32, i32* @ix86_cpu ; <i32> [#uses=1]
%tmp786 = icmp eq i32 %tmp785, 5 ; <i1> [#uses=1]
br i1 %tmp786, label %UnifiedReturnBlock, label %bb790
@@ -208,7 +208,7 @@ bb1419: ; preds = %cond_true.i
ret void
bb1648: ; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i
- %tmp1650 = load i32* @which_alternative ; <i32> [#uses=1]
+ %tmp1650 = load i32, i32* @which_alternative ; <i32> [#uses=1]
switch i32 %tmp1650, label %bb1701 [
i32 0, label %cond_next1675
i32 1, label %cond_next1675
@@ -219,7 +219,7 @@ cond_next1675: ; preds = %bb1648, %bb1648, %bb1648
ret void
bb1701: ; preds = %bb1648
- %tmp1702 = load i32* @which_alternative ; <i32> [#uses=1]
+ %tmp1702 = load i32, i32* @which_alternative ; <i32> [#uses=1]
switch i32 %tmp1702, label %bb1808 [
i32 0, label %cond_next1727
i32 1, label %cond_next1727
@@ -237,7 +237,7 @@ cond_next1834: ; preds = %bb1808
ret void
bb1876: ; preds = %bb1808
- %tmp1877signed = load i32* @which_alternative ; <i32> [#uses=4]
+ %tmp1877signed = load i32, i32* @which_alternative ; <i32> [#uses=4]
%tmp1877 = bitcast i32 %tmp1877signed to i32 ; <i32> [#uses=1]
%bothcond699 = icmp ult i32 %tmp1877, 2 ; <i1> [#uses=1]
%tmp1888 = icmp eq i32 %tmp1877signed, 2 ; <i1> [#uses=1]
diff --git a/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll b/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
index 2dc5c162cd96..0c4a9c452cd9 100644
--- a/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
+++ b/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
@@ -78,33 +78,33 @@ cond_true1369.preheader: ; preds = %cond_true1254
ret void
bb1567: ; preds = %cond_true1254
- %tmp1580 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 3) ; <i64> [#uses=1]
- %tmp1591 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 4) ; <i64> [#uses=1]
+ %tmp1580 = load i64, i64* getelementptr (%struct.CHESS_POSITION, %struct.CHESS_POSITION* @search, i32 0, i32 3) ; <i64> [#uses=1]
+ %tmp1591 = load i64, i64* getelementptr (%struct.CHESS_POSITION, %struct.CHESS_POSITION* @search, i32 0, i32 4) ; <i64> [#uses=1]
%tmp1572 = tail call fastcc i32 @FirstOne( ) ; <i32> [#uses=5]
- %tmp1582 = getelementptr [64 x i32]* @bishop_shift_rl45, i32 0, i32 %tmp1572 ; <i32*> [#uses=1]
- %tmp1583 = load i32* %tmp1582 ; <i32> [#uses=1]
+ %tmp1582 = getelementptr [64 x i32], [64 x i32]* @bishop_shift_rl45, i32 0, i32 %tmp1572 ; <i32*> [#uses=1]
+ %tmp1583 = load i32, i32* %tmp1582 ; <i32> [#uses=1]
%tmp1583.upgrd.1 = trunc i32 %tmp1583 to i8 ; <i8> [#uses=1]
%shift.upgrd.2 = zext i8 %tmp1583.upgrd.1 to i64 ; <i64> [#uses=1]
%tmp1584 = lshr i64 %tmp1580, %shift.upgrd.2 ; <i64> [#uses=1]
%tmp1584.upgrd.3 = trunc i64 %tmp1584 to i32 ; <i32> [#uses=1]
%tmp1585 = and i32 %tmp1584.upgrd.3, 255 ; <i32> [#uses=1]
%gep.upgrd.4 = zext i32 %tmp1585 to i64 ; <i64> [#uses=1]
- %tmp1587 = getelementptr [64 x [256 x i32]]* @bishop_mobility_rl45, i32 0, i32 %tmp1572, i64 %gep.upgrd.4 ; <i32*> [#uses=1]
- %tmp1588 = load i32* %tmp1587 ; <i32> [#uses=1]
- %tmp1593 = getelementptr [64 x i32]* @bishop_shift_rr45, i32 0, i32 %tmp1572 ; <i32*> [#uses=1]
- %tmp1594 = load i32* %tmp1593 ; <i32> [#uses=1]
+ %tmp1587 = getelementptr [64 x [256 x i32]], [64 x [256 x i32]]* @bishop_mobility_rl45, i32 0, i32 %tmp1572, i64 %gep.upgrd.4 ; <i32*> [#uses=1]
+ %tmp1588 = load i32, i32* %tmp1587 ; <i32> [#uses=1]
+ %tmp1593 = getelementptr [64 x i32], [64 x i32]* @bishop_shift_rr45, i32 0, i32 %tmp1572 ; <i32*> [#uses=1]
+ %tmp1594 = load i32, i32* %tmp1593 ; <i32> [#uses=1]
%tmp1594.upgrd.5 = trunc i32 %tmp1594 to i8 ; <i8> [#uses=1]
%shift.upgrd.6 = zext i8 %tmp1594.upgrd.5 to i64 ; <i64> [#uses=1]
%tmp1595 = lshr i64 %tmp1591, %shift.upgrd.6 ; <i64> [#uses=1]
%tmp1595.upgrd.7 = trunc i64 %tmp1595 to i32 ; <i32> [#uses=1]
%tmp1596 = and i32 %tmp1595.upgrd.7, 255 ; <i32> [#uses=1]
%gep.upgrd.8 = zext i32 %tmp1596 to i64 ; <i64> [#uses=1]
- %tmp1598 = getelementptr [64 x [256 x i32]]* @bishop_mobility_rr45, i32 0, i32 %tmp1572, i64 %gep.upgrd.8 ; <i32*> [#uses=1]
- %tmp1599 = load i32* %tmp1598 ; <i32> [#uses=1]
+ %tmp1598 = getelementptr [64 x [256 x i32]], [64 x [256 x i32]]* @bishop_mobility_rr45, i32 0, i32 %tmp1572, i64 %gep.upgrd.8 ; <i32*> [#uses=1]
+ %tmp1599 = load i32, i32* %tmp1598 ; <i32> [#uses=1]
%tmp1600.neg = sub i32 0, %tmp1588 ; <i32> [#uses=1]
%tmp1602 = sub i32 %tmp1600.neg, %tmp1599 ; <i32> [#uses=1]
- %tmp1604 = getelementptr [64 x i8]* @black_outpost, i32 0, i32 %tmp1572 ; <i8*> [#uses=1]
- %tmp1605 = load i8* %tmp1604 ; <i8> [#uses=1]
+ %tmp1604 = getelementptr [64 x i8], [64 x i8]* @black_outpost, i32 0, i32 %tmp1572 ; <i8*> [#uses=1]
+ %tmp1605 = load i8, i8* %tmp1604 ; <i8> [#uses=1]
%tmp1606 = icmp eq i8 %tmp1605, 0 ; <i1> [#uses=1]
br i1 %tmp1606, label %cond_next1637, label %cond_true1607
diff --git a/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll b/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll
index 26d0f4f96ae8..40f91b235ab5 100644
--- a/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll
+++ b/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll
@@ -10,9 +10,9 @@ bb.preheader: ; preds = %entry
br i1 false, label %cond_true48, label %cond_next80
cond_true48: ; preds = %bb.preheader
- %tmp = load i8* null ; <i8> [#uses=1]
+ %tmp = load i8, i8* null ; <i8> [#uses=1]
%tmp51 = zext i8 %tmp to i16 ; <i16> [#uses=1]
- %tmp99 = load i8* null ; <i8> [#uses=1]
+ %tmp99 = load i8, i8* null ; <i8> [#uses=1]
%tmp54 = bitcast i8 %tmp99 to i8 ; <i8> [#uses=1]
%tmp54.upgrd.1 = zext i8 %tmp54 to i32 ; <i32> [#uses=1]
%tmp55 = lshr i32 %tmp54.upgrd.1, 3 ; <i32> [#uses=1]
diff --git a/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll b/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
index 255b12092a77..aa6793b9c688 100644
--- a/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
+++ b/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
@@ -4,8 +4,8 @@
declare i1 @foo()
define i32 @test(i32* %A, i32* %B) {
- %a = load i32* %A
- %b = load i32* %B
+ %a = load i32, i32* %A
+ %b = load i32, i32* %B
%cond = call i1 @foo()
%c = select i1 %cond, i32 %a, i32 %b
ret i32 %c
diff --git a/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll b/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll
index 81347a23b864..7d77a2eed3b3 100644
--- a/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll
+++ b/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll
@@ -5,8 +5,8 @@
define void @typeinfo() {
entry:
- %eh_typeid = tail call i32 @llvm.eh.typeid.for.i32( i8* getelementptr (%struct.exception* @program_error, i32 0, i32 0) ) ; <i32> [#uses=0]
+ %eh_typeid = tail call i32 @llvm.eh.typeid.for( i8* getelementptr (%struct.exception, %struct.exception* @program_error, i32 0, i32 0) ) ; <i32> [#uses=0]
ret void
}
-declare i32 @llvm.eh.typeid.for.i32(i8*)
+declare i32 @llvm.eh.typeid.for(i8*)
diff --git a/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll b/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll
index 314bb05c6784..4558f09c4b96 100644
--- a/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll
+++ b/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll
@@ -4,24 +4,24 @@
define i32 @f(i16* %pc) {
entry:
%acc = alloca i64, align 8 ; <i64*> [#uses=4]
- %tmp97 = load i64* %acc, align 8 ; <i64> [#uses=1]
+ %tmp97 = load i64, i64* %acc, align 8 ; <i64> [#uses=1]
%tmp98 = and i64 %tmp97, 4294967295 ; <i64> [#uses=1]
- %tmp99 = load i64* null, align 8 ; <i64> [#uses=1]
+ %tmp99 = load i64, i64* null, align 8 ; <i64> [#uses=1]
%tmp100 = and i64 %tmp99, 4294967295 ; <i64> [#uses=1]
%tmp101 = mul i64 %tmp98, %tmp100 ; <i64> [#uses=1]
%tmp103 = lshr i64 %tmp101, 0 ; <i64> [#uses=1]
- %tmp104 = load i64* %acc, align 8 ; <i64> [#uses=1]
+ %tmp104 = load i64, i64* %acc, align 8 ; <i64> [#uses=1]
%.cast105 = zext i32 32 to i64 ; <i64> [#uses=1]
%tmp106 = lshr i64 %tmp104, %.cast105 ; <i64> [#uses=1]
- %tmp107 = load i64* null, align 8 ; <i64> [#uses=1]
+ %tmp107 = load i64, i64* null, align 8 ; <i64> [#uses=1]
%tmp108 = and i64 %tmp107, 4294967295 ; <i64> [#uses=1]
%tmp109 = mul i64 %tmp106, %tmp108 ; <i64> [#uses=1]
%tmp112 = add i64 %tmp109, 0 ; <i64> [#uses=1]
%tmp116 = add i64 %tmp112, 0 ; <i64> [#uses=1]
%tmp117 = add i64 %tmp103, %tmp116 ; <i64> [#uses=1]
- %tmp118 = load i64* %acc, align 8 ; <i64> [#uses=1]
+ %tmp118 = load i64, i64* %acc, align 8 ; <i64> [#uses=1]
%tmp120 = lshr i64 %tmp118, 0 ; <i64> [#uses=1]
- %tmp121 = load i64* null, align 8 ; <i64> [#uses=1]
+ %tmp121 = load i64, i64* null, align 8 ; <i64> [#uses=1]
%tmp123 = lshr i64 %tmp121, 0 ; <i64> [#uses=1]
%tmp124 = mul i64 %tmp120, %tmp123 ; <i64> [#uses=1]
%tmp126 = shl i64 %tmp124, 0 ; <i64> [#uses=1]
diff --git a/test/CodeGen/Generic/2008-01-30-LoadCrash.ll b/test/CodeGen/Generic/2008-01-30-LoadCrash.ll
index 70c3aaabedc1..f24d1bcad5f7 100644
--- a/test/CodeGen/Generic/2008-01-30-LoadCrash.ll
+++ b/test/CodeGen/Generic/2008-01-30-LoadCrash.ll
@@ -8,12 +8,12 @@ bb20:
bb41: ; preds = %bb20
%tmp8182 = trunc i64 %tmp42.rle to i32 ; <i32> [#uses=1]
- %tmp83 = getelementptr [63 x i8]* @letters.3100, i32 0, i32 %tmp8182 ; <i8*> [#uses=1]
- %tmp84 = load i8* %tmp83, align 1 ; <i8> [#uses=1]
+ %tmp83 = getelementptr [63 x i8], [63 x i8]* @letters.3100, i32 0, i32 %tmp8182 ; <i8*> [#uses=1]
+ %tmp84 = load i8, i8* %tmp83, align 1 ; <i8> [#uses=1]
store i8 %tmp84, i8* null, align 1
%tmp90 = urem i64 %tmp42.rle, 62 ; <i64> [#uses=1]
%tmp9091 = trunc i64 %tmp90 to i32 ; <i32> [#uses=1]
- %tmp92 = getelementptr [63 x i8]* @letters.3100, i32 0, i32 %tmp9091 ; <i8*> [#uses=1]
+ %tmp92 = getelementptr [63 x i8], [63 x i8]* @letters.3100, i32 0, i32 %tmp9091 ; <i8*> [#uses=1]
store i8* %tmp92, i8** null, align 1
ret i32 -1
}
diff --git a/test/CodeGen/Generic/2008-02-04-Ctlz.ll b/test/CodeGen/Generic/2008-02-04-Ctlz.ll
index 9f102066f2bb..3244e5c6f4ce 100644
--- a/test/CodeGen/Generic/2008-02-04-Ctlz.ll
+++ b/test/CodeGen/Generic/2008-02-04-Ctlz.ll
@@ -10,7 +10,7 @@ entry:
%tmp38 = trunc i64 %tmp37 to i32 ; <i32>:0 [#uses=1]
%tmp48 = trunc i64 %tmp47 to i32 ; <i32>:0 [#uses=1]
%tmp58 = trunc i64 %tmp57 to i32 ; <i32>:0 [#uses=1]
- %tmp40 = tail call i32 (i8*, ...)* @printf( i8* noalias getelementptr ([14 x i8]* @.str, i32 0, i32 0), i64 %arg, i32 %tmp38, i32 %tmp48, i32 %tmp58 ) nounwind ; <i32> [#uses=0]
+ %tmp40 = tail call i32 (i8*, ...) @printf( i8* noalias getelementptr ([14 x i8], [14 x i8]* @.str, i32 0, i32 0), i64 %arg, i32 %tmp38, i32 %tmp48, i32 %tmp58 ) nounwind ; <i32> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/Generic/2008-02-20-MatchingMem.ll b/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
index 5ddb515bb75a..20f3dcc2971d 100644
--- a/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
+++ b/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
@@ -1,9 +1,8 @@
; RUN: llc -no-integrated-as < %s
; PR1133
-; XFAIL: hexagon
define void @test(i32* %X) nounwind {
entry:
- %tmp1 = getelementptr i32* %X, i32 10 ; <i32*> [#uses=2]
+ %tmp1 = getelementptr i32, i32* %X, i32 10 ; <i32*> [#uses=2]
tail call void asm sideeffect " $0 $1 ", "=*im,*im,~{memory}"( i32* %tmp1, i32* %tmp1 ) nounwind
ret void
}
diff --git a/test/CodeGen/Generic/2008-02-25-NegateZero.ll b/test/CodeGen/Generic/2008-02-25-NegateZero.ll
index 97db667dc13a..14800ce5b458 100644
--- a/test/CodeGen/Generic/2008-02-25-NegateZero.ll
+++ b/test/CodeGen/Generic/2008-02-25-NegateZero.ll
@@ -3,11 +3,11 @@
define void @test() {
entry:
- %tmp98 = load float* null, align 4 ; <float> [#uses=1]
- %tmp106 = load float* null, align 4 ; <float> [#uses=1]
+ %tmp98 = load float, float* null, align 4 ; <float> [#uses=1]
+ %tmp106 = load float, float* null, align 4 ; <float> [#uses=1]
%tmp113 = fadd float %tmp98, %tmp106 ; <float> [#uses=1]
%tmp119 = fsub float %tmp113, 0.000000e+00 ; <float> [#uses=1]
- call void (i32, ...)* @foo( i32 0, float 0.000000e+00, float %tmp119 ) nounwind
+ call void (i32, ...) @foo( i32 0, float 0.000000e+00, float %tmp119 ) nounwind
ret void
}
diff --git a/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll b/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll
index 00ca8c756b42..01923dde2874 100644
--- a/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll
+++ b/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll
@@ -2,4 +2,4 @@
; PR2603
%struct.A = type { i8 }
%struct.B = type { i8, [1 x i8] }
-@Foo = constant %struct.A { i8 ptrtoint (i8* getelementptr ([1 x i8]* inttoptr (i32 17 to [1 x i8]*), i32 0, i32 -16) to i8) } ; <%struct.A*> [#uses=0]
+@Foo = constant %struct.A { i8 ptrtoint (i8* getelementptr ([1 x i8], [1 x i8]* inttoptr (i32 17 to [1 x i8]*), i32 0, i32 -16) to i8) } ; <%struct.A*> [#uses=0]
diff --git a/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll b/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
index 45b561affffa..f614db00da48 100644
--- a/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
+++ b/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
@@ -1,10 +1,14 @@
-; RUN: llc < %s -soft-float
+; RUN: llc < %s
; PR3899
@m = external global <2 x double>
-define double @vector_ex() nounwind {
- %v = load <2 x double>* @m
+define double @vector_ex() nounwind #0 {
+ %v = load <2 x double>, <2 x double>* @m
%x = extractelement <2 x double> %v, i32 1
ret double %x
}
+
+; Soft-float attribute so that targets that pay attention to soft float will
+; make floating point types illegal and we'll exercise the legalizer code.
+attributes #0 = { "use-soft-float" = "true" }
diff --git a/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll b/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
index b62f811e8d16..78f97eea77d0 100644
--- a/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
+++ b/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
@@ -5,7 +5,7 @@
define i32 @test(i128* %P) nounwind {
entry:
- %tmp48 = load i128* %P
+ %tmp48 = load i128, i128* %P
%and49 = and i128 %tmp48, 18446744073709551616 ; <i128> [#uses=1]
%tobool = icmp ne i128 %and49, 0 ; <i1> [#uses=1]
br i1 %tobool, label %if.then50, label %if.end61
@@ -19,7 +19,7 @@ if.end61: ; preds = %if.then50, %if.then20, %entry
define i32 @test2(i320* %P) nounwind {
entry:
- %tmp48 = load i320* %P
+ %tmp48 = load i320, i320* %P
%and49 = and i320 %tmp48, 25108406941546723055343157692830665664409421777856138051584
%tobool = icmp ne i320 %and49, 0 ; <i1> [#uses=1]
br i1 %tobool, label %if.then50, label %if.end61
diff --git a/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll b/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll
index cd446d57d8a5..5cc48c212c40 100644
--- a/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll
+++ b/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll
@@ -5,8 +5,8 @@
define void @f(i256* nocapture %a, i256* nocapture %b, i256* nocapture %cc, i256* nocapture %dd) nounwind uwtable noinline ssp {
entry:
- %c = load i256* %cc
- %d = load i256* %dd
+ %c = load i256, i256* %cc
+ %d = load i256, i256* %dd
%add = add nsw i256 %c, %d
store i256 %add, i256* %a, align 8
%or = or i256 %c, 1606938044258990275541962092341162602522202993782792835301376
diff --git a/test/CodeGen/Generic/2012-06-08-APIntCrash.ll b/test/CodeGen/Generic/2012-06-08-APIntCrash.ll
index 2c096bf42182..88ca9369ea46 100644
--- a/test/CodeGen/Generic/2012-06-08-APIntCrash.ll
+++ b/test/CodeGen/Generic/2012-06-08-APIntCrash.ll
@@ -2,7 +2,7 @@
define void @test1(<8 x i32>* %ptr)
{
- %1 = load <8 x i32>* %ptr, align 32
+ %1 = load <8 x i32>, <8 x i32>* %ptr, align 32
%2 = and <8 x i32> %1, <i32 0, i32 0, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 -1>
store <8 x i32> %2, <8 x i32>* %ptr, align 16
ret void
diff --git a/test/CodeGen/Generic/2014-02-05-OpaqueConstants.ll b/test/CodeGen/Generic/2014-02-05-OpaqueConstants.ll
index 5c1cd0532511..1497bbb0c970 100644
--- a/test/CodeGen/Generic/2014-02-05-OpaqueConstants.ll
+++ b/test/CodeGen/Generic/2014-02-05-OpaqueConstants.ll
@@ -9,11 +9,11 @@
; Function Attrs: nounwind ssp uwtable
define void @fn() {
store i32* inttoptr (i64 68719476735 to i32*), i32** @a, align 8
- %1 = load i32* @c, align 4
+ %1 = load i32, i32* @c, align 4
%2 = sext i32 %1 to i64
%3 = lshr i64 %2, 12
%4 = and i64 %3, 68719476735
- %5 = getelementptr inbounds i32* null, i64 %4
+ %5 = getelementptr inbounds i32, i32* null, i64 %4
store i32* %5, i32** @b, align 8
ret void
}
diff --git a/test/CodeGen/Generic/APIntLoadStore.ll b/test/CodeGen/Generic/APIntLoadStore.ll
index 7c71a33fc3fd..f8d22f5b9122 100644
--- a/test/CodeGen/Generic/APIntLoadStore.ll
+++ b/test/CodeGen/Generic/APIntLoadStore.ll
@@ -513,1537 +513,1537 @@
@i256_s = external global i256 ; <i256*> [#uses=1]
define void @i1_ls() nounwind {
- %tmp = load i1* @i1_l ; <i1> [#uses=1]
+ %tmp = load i1, i1* @i1_l ; <i1> [#uses=1]
store i1 %tmp, i1* @i1_s
ret void
}
define void @i2_ls() nounwind {
- %tmp = load i2* @i2_l ; <i2> [#uses=1]
+ %tmp = load i2, i2* @i2_l ; <i2> [#uses=1]
store i2 %tmp, i2* @i2_s
ret void
}
define void @i3_ls() nounwind {
- %tmp = load i3* @i3_l ; <i3> [#uses=1]
+ %tmp = load i3, i3* @i3_l ; <i3> [#uses=1]
store i3 %tmp, i3* @i3_s
ret void
}
define void @i4_ls() nounwind {
- %tmp = load i4* @i4_l ; <i4> [#uses=1]
+ %tmp = load i4, i4* @i4_l ; <i4> [#uses=1]
store i4 %tmp, i4* @i4_s
ret void
}
define void @i5_ls() nounwind {
- %tmp = load i5* @i5_l ; <i5> [#uses=1]
+ %tmp = load i5, i5* @i5_l ; <i5> [#uses=1]
store i5 %tmp, i5* @i5_s
ret void
}
define void @i6_ls() nounwind {
- %tmp = load i6* @i6_l ; <i6> [#uses=1]
+ %tmp = load i6, i6* @i6_l ; <i6> [#uses=1]
store i6 %tmp, i6* @i6_s
ret void
}
define void @i7_ls() nounwind {
- %tmp = load i7* @i7_l ; <i7> [#uses=1]
+ %tmp = load i7, i7* @i7_l ; <i7> [#uses=1]
store i7 %tmp, i7* @i7_s
ret void
}
define void @i8_ls() nounwind {
- %tmp = load i8* @i8_l ; <i8> [#uses=1]
+ %tmp = load i8, i8* @i8_l ; <i8> [#uses=1]
store i8 %tmp, i8* @i8_s
ret void
}
define void @i9_ls() nounwind {
- %tmp = load i9* @i9_l ; <i9> [#uses=1]
+ %tmp = load i9, i9* @i9_l ; <i9> [#uses=1]
store i9 %tmp, i9* @i9_s
ret void
}
define void @i10_ls() nounwind {
- %tmp = load i10* @i10_l ; <i10> [#uses=1]
+ %tmp = load i10, i10* @i10_l ; <i10> [#uses=1]
store i10 %tmp, i10* @i10_s
ret void
}
define void @i11_ls() nounwind {
- %tmp = load i11* @i11_l ; <i11> [#uses=1]
+ %tmp = load i11, i11* @i11_l ; <i11> [#uses=1]
store i11 %tmp, i11* @i11_s
ret void
}
define void @i12_ls() nounwind {
- %tmp = load i12* @i12_l ; <i12> [#uses=1]
+ %tmp = load i12, i12* @i12_l ; <i12> [#uses=1]
store i12 %tmp, i12* @i12_s
ret void
}
define void @i13_ls() nounwind {
- %tmp = load i13* @i13_l ; <i13> [#uses=1]
+ %tmp = load i13, i13* @i13_l ; <i13> [#uses=1]
store i13 %tmp, i13* @i13_s
ret void
}
define void @i14_ls() nounwind {
- %tmp = load i14* @i14_l ; <i14> [#uses=1]
+ %tmp = load i14, i14* @i14_l ; <i14> [#uses=1]
store i14 %tmp, i14* @i14_s
ret void
}
define void @i15_ls() nounwind {
- %tmp = load i15* @i15_l ; <i15> [#uses=1]
+ %tmp = load i15, i15* @i15_l ; <i15> [#uses=1]
store i15 %tmp, i15* @i15_s
ret void
}
define void @i16_ls() nounwind {
- %tmp = load i16* @i16_l ; <i16> [#uses=1]
+ %tmp = load i16, i16* @i16_l ; <i16> [#uses=1]
store i16 %tmp, i16* @i16_s
ret void
}
define void @i17_ls() nounwind {
- %tmp = load i17* @i17_l ; <i17> [#uses=1]
+ %tmp = load i17, i17* @i17_l ; <i17> [#uses=1]
store i17 %tmp, i17* @i17_s
ret void
}
define void @i18_ls() nounwind {
- %tmp = load i18* @i18_l ; <i18> [#uses=1]
+ %tmp = load i18, i18* @i18_l ; <i18> [#uses=1]
store i18 %tmp, i18* @i18_s
ret void
}
define void @i19_ls() nounwind {
- %tmp = load i19* @i19_l ; <i19> [#uses=1]
+ %tmp = load i19, i19* @i19_l ; <i19> [#uses=1]
store i19 %tmp, i19* @i19_s
ret void
}
define void @i20_ls() nounwind {
- %tmp = load i20* @i20_l ; <i20> [#uses=1]
+ %tmp = load i20, i20* @i20_l ; <i20> [#uses=1]
store i20 %tmp, i20* @i20_s
ret void
}
define void @i21_ls() nounwind {
- %tmp = load i21* @i21_l ; <i21> [#uses=1]
+ %tmp = load i21, i21* @i21_l ; <i21> [#uses=1]
store i21 %tmp, i21* @i21_s
ret void
}
define void @i22_ls() nounwind {
- %tmp = load i22* @i22_l ; <i22> [#uses=1]
+ %tmp = load i22, i22* @i22_l ; <i22> [#uses=1]
store i22 %tmp, i22* @i22_s
ret void
}
define void @i23_ls() nounwind {
- %tmp = load i23* @i23_l ; <i23> [#uses=1]
+ %tmp = load i23, i23* @i23_l ; <i23> [#uses=1]
store i23 %tmp, i23* @i23_s
ret void
}
define void @i24_ls() nounwind {
- %tmp = load i24* @i24_l ; <i24> [#uses=1]
+ %tmp = load i24, i24* @i24_l ; <i24> [#uses=1]
store i24 %tmp, i24* @i24_s
ret void
}
define void @i25_ls() nounwind {
- %tmp = load i25* @i25_l ; <i25> [#uses=1]
+ %tmp = load i25, i25* @i25_l ; <i25> [#uses=1]
store i25 %tmp, i25* @i25_s
ret void
}
define void @i26_ls() nounwind {
- %tmp = load i26* @i26_l ; <i26> [#uses=1]
+ %tmp = load i26, i26* @i26_l ; <i26> [#uses=1]
store i26 %tmp, i26* @i26_s
ret void
}
define void @i27_ls() nounwind {
- %tmp = load i27* @i27_l ; <i27> [#uses=1]
+ %tmp = load i27, i27* @i27_l ; <i27> [#uses=1]
store i27 %tmp, i27* @i27_s
ret void
}
define void @i28_ls() nounwind {
- %tmp = load i28* @i28_l ; <i28> [#uses=1]
+ %tmp = load i28, i28* @i28_l ; <i28> [#uses=1]
store i28 %tmp, i28* @i28_s
ret void
}
define void @i29_ls() nounwind {
- %tmp = load i29* @i29_l ; <i29> [#uses=1]
+ %tmp = load i29, i29* @i29_l ; <i29> [#uses=1]
store i29 %tmp, i29* @i29_s
ret void
}
define void @i30_ls() nounwind {
- %tmp = load i30* @i30_l ; <i30> [#uses=1]
+ %tmp = load i30, i30* @i30_l ; <i30> [#uses=1]
store i30 %tmp, i30* @i30_s
ret void
}
define void @i31_ls() nounwind {
- %tmp = load i31* @i31_l ; <i31> [#uses=1]
+ %tmp = load i31, i31* @i31_l ; <i31> [#uses=1]
store i31 %tmp, i31* @i31_s
ret void
}
define void @i32_ls() nounwind {
- %tmp = load i32* @i32_l ; <i32> [#uses=1]
+ %tmp = load i32, i32* @i32_l ; <i32> [#uses=1]
store i32 %tmp, i32* @i32_s
ret void
}
define void @i33_ls() nounwind {
- %tmp = load i33* @i33_l ; <i33> [#uses=1]
+ %tmp = load i33, i33* @i33_l ; <i33> [#uses=1]
store i33 %tmp, i33* @i33_s
ret void
}
define void @i34_ls() nounwind {
- %tmp = load i34* @i34_l ; <i34> [#uses=1]
+ %tmp = load i34, i34* @i34_l ; <i34> [#uses=1]
store i34 %tmp, i34* @i34_s
ret void
}
define void @i35_ls() nounwind {
- %tmp = load i35* @i35_l ; <i35> [#uses=1]
+ %tmp = load i35, i35* @i35_l ; <i35> [#uses=1]
store i35 %tmp, i35* @i35_s
ret void
}
define void @i36_ls() nounwind {
- %tmp = load i36* @i36_l ; <i36> [#uses=1]
+ %tmp = load i36, i36* @i36_l ; <i36> [#uses=1]
store i36 %tmp, i36* @i36_s
ret void
}
define void @i37_ls() nounwind {
- %tmp = load i37* @i37_l ; <i37> [#uses=1]
+ %tmp = load i37, i37* @i37_l ; <i37> [#uses=1]
store i37 %tmp, i37* @i37_s
ret void
}
define void @i38_ls() nounwind {
- %tmp = load i38* @i38_l ; <i38> [#uses=1]
+ %tmp = load i38, i38* @i38_l ; <i38> [#uses=1]
store i38 %tmp, i38* @i38_s
ret void
}
define void @i39_ls() nounwind {
- %tmp = load i39* @i39_l ; <i39> [#uses=1]
+ %tmp = load i39, i39* @i39_l ; <i39> [#uses=1]
store i39 %tmp, i39* @i39_s
ret void
}
define void @i40_ls() nounwind {
- %tmp = load i40* @i40_l ; <i40> [#uses=1]
+ %tmp = load i40, i40* @i40_l ; <i40> [#uses=1]
store i40 %tmp, i40* @i40_s
ret void
}
define void @i41_ls() nounwind {
- %tmp = load i41* @i41_l ; <i41> [#uses=1]
+ %tmp = load i41, i41* @i41_l ; <i41> [#uses=1]
store i41 %tmp, i41* @i41_s
ret void
}
define void @i42_ls() nounwind {
- %tmp = load i42* @i42_l ; <i42> [#uses=1]
+ %tmp = load i42, i42* @i42_l ; <i42> [#uses=1]
store i42 %tmp, i42* @i42_s
ret void
}
define void @i43_ls() nounwind {
- %tmp = load i43* @i43_l ; <i43> [#uses=1]
+ %tmp = load i43, i43* @i43_l ; <i43> [#uses=1]
store i43 %tmp, i43* @i43_s
ret void
}
define void @i44_ls() nounwind {
- %tmp = load i44* @i44_l ; <i44> [#uses=1]
+ %tmp = load i44, i44* @i44_l ; <i44> [#uses=1]
store i44 %tmp, i44* @i44_s
ret void
}
define void @i45_ls() nounwind {
- %tmp = load i45* @i45_l ; <i45> [#uses=1]
+ %tmp = load i45, i45* @i45_l ; <i45> [#uses=1]
store i45 %tmp, i45* @i45_s
ret void
}
define void @i46_ls() nounwind {
- %tmp = load i46* @i46_l ; <i46> [#uses=1]
+ %tmp = load i46, i46* @i46_l ; <i46> [#uses=1]
store i46 %tmp, i46* @i46_s
ret void
}
define void @i47_ls() nounwind {
- %tmp = load i47* @i47_l ; <i47> [#uses=1]
+ %tmp = load i47, i47* @i47_l ; <i47> [#uses=1]
store i47 %tmp, i47* @i47_s
ret void
}
define void @i48_ls() nounwind {
- %tmp = load i48* @i48_l ; <i48> [#uses=1]
+ %tmp = load i48, i48* @i48_l ; <i48> [#uses=1]
store i48 %tmp, i48* @i48_s
ret void
}
define void @i49_ls() nounwind {
- %tmp = load i49* @i49_l ; <i49> [#uses=1]
+ %tmp = load i49, i49* @i49_l ; <i49> [#uses=1]
store i49 %tmp, i49* @i49_s
ret void
}
define void @i50_ls() nounwind {
- %tmp = load i50* @i50_l ; <i50> [#uses=1]
+ %tmp = load i50, i50* @i50_l ; <i50> [#uses=1]
store i50 %tmp, i50* @i50_s
ret void
}
define void @i51_ls() nounwind {
- %tmp = load i51* @i51_l ; <i51> [#uses=1]
+ %tmp = load i51, i51* @i51_l ; <i51> [#uses=1]
store i51 %tmp, i51* @i51_s
ret void
}
define void @i52_ls() nounwind {
- %tmp = load i52* @i52_l ; <i52> [#uses=1]
+ %tmp = load i52, i52* @i52_l ; <i52> [#uses=1]
store i52 %tmp, i52* @i52_s
ret void
}
define void @i53_ls() nounwind {
- %tmp = load i53* @i53_l ; <i53> [#uses=1]
+ %tmp = load i53, i53* @i53_l ; <i53> [#uses=1]
store i53 %tmp, i53* @i53_s
ret void
}
define void @i54_ls() nounwind {
- %tmp = load i54* @i54_l ; <i54> [#uses=1]
+ %tmp = load i54, i54* @i54_l ; <i54> [#uses=1]
store i54 %tmp, i54* @i54_s
ret void
}
define void @i55_ls() nounwind {
- %tmp = load i55* @i55_l ; <i55> [#uses=1]
+ %tmp = load i55, i55* @i55_l ; <i55> [#uses=1]
store i55 %tmp, i55* @i55_s
ret void
}
define void @i56_ls() nounwind {
- %tmp = load i56* @i56_l ; <i56> [#uses=1]
+ %tmp = load i56, i56* @i56_l ; <i56> [#uses=1]
store i56 %tmp, i56* @i56_s
ret void
}
define void @i57_ls() nounwind {
- %tmp = load i57* @i57_l ; <i57> [#uses=1]
+ %tmp = load i57, i57* @i57_l ; <i57> [#uses=1]
store i57 %tmp, i57* @i57_s
ret void
}
define void @i58_ls() nounwind {
- %tmp = load i58* @i58_l ; <i58> [#uses=1]
+ %tmp = load i58, i58* @i58_l ; <i58> [#uses=1]
store i58 %tmp, i58* @i58_s
ret void
}
define void @i59_ls() nounwind {
- %tmp = load i59* @i59_l ; <i59> [#uses=1]
+ %tmp = load i59, i59* @i59_l ; <i59> [#uses=1]
store i59 %tmp, i59* @i59_s
ret void
}
define void @i60_ls() nounwind {
- %tmp = load i60* @i60_l ; <i60> [#uses=1]
+ %tmp = load i60, i60* @i60_l ; <i60> [#uses=1]
store i60 %tmp, i60* @i60_s
ret void
}
define void @i61_ls() nounwind {
- %tmp = load i61* @i61_l ; <i61> [#uses=1]
+ %tmp = load i61, i61* @i61_l ; <i61> [#uses=1]
store i61 %tmp, i61* @i61_s
ret void
}
define void @i62_ls() nounwind {
- %tmp = load i62* @i62_l ; <i62> [#uses=1]
+ %tmp = load i62, i62* @i62_l ; <i62> [#uses=1]
store i62 %tmp, i62* @i62_s
ret void
}
define void @i63_ls() nounwind {
- %tmp = load i63* @i63_l ; <i63> [#uses=1]
+ %tmp = load i63, i63* @i63_l ; <i63> [#uses=1]
store i63 %tmp, i63* @i63_s
ret void
}
define void @i64_ls() nounwind {
- %tmp = load i64* @i64_l ; <i64> [#uses=1]
+ %tmp = load i64, i64* @i64_l ; <i64> [#uses=1]
store i64 %tmp, i64* @i64_s
ret void
}
define void @i65_ls() nounwind {
- %tmp = load i65* @i65_l ; <i65> [#uses=1]
+ %tmp = load i65, i65* @i65_l ; <i65> [#uses=1]
store i65 %tmp, i65* @i65_s
ret void
}
define void @i66_ls() nounwind {
- %tmp = load i66* @i66_l ; <i66> [#uses=1]
+ %tmp = load i66, i66* @i66_l ; <i66> [#uses=1]
store i66 %tmp, i66* @i66_s
ret void
}
define void @i67_ls() nounwind {
- %tmp = load i67* @i67_l ; <i67> [#uses=1]
+ %tmp = load i67, i67* @i67_l ; <i67> [#uses=1]
store i67 %tmp, i67* @i67_s
ret void
}
define void @i68_ls() nounwind {
- %tmp = load i68* @i68_l ; <i68> [#uses=1]
+ %tmp = load i68, i68* @i68_l ; <i68> [#uses=1]
store i68 %tmp, i68* @i68_s
ret void
}
define void @i69_ls() nounwind {
- %tmp = load i69* @i69_l ; <i69> [#uses=1]
+ %tmp = load i69, i69* @i69_l ; <i69> [#uses=1]
store i69 %tmp, i69* @i69_s
ret void
}
define void @i70_ls() nounwind {
- %tmp = load i70* @i70_l ; <i70> [#uses=1]
+ %tmp = load i70, i70* @i70_l ; <i70> [#uses=1]
store i70 %tmp, i70* @i70_s
ret void
}
define void @i71_ls() nounwind {
- %tmp = load i71* @i71_l ; <i71> [#uses=1]
+ %tmp = load i71, i71* @i71_l ; <i71> [#uses=1]
store i71 %tmp, i71* @i71_s
ret void
}
define void @i72_ls() nounwind {
- %tmp = load i72* @i72_l ; <i72> [#uses=1]
+ %tmp = load i72, i72* @i72_l ; <i72> [#uses=1]
store i72 %tmp, i72* @i72_s
ret void
}
define void @i73_ls() nounwind {
- %tmp = load i73* @i73_l ; <i73> [#uses=1]
+ %tmp = load i73, i73* @i73_l ; <i73> [#uses=1]
store i73 %tmp, i73* @i73_s
ret void
}
define void @i74_ls() nounwind {
- %tmp = load i74* @i74_l ; <i74> [#uses=1]
+ %tmp = load i74, i74* @i74_l ; <i74> [#uses=1]
store i74 %tmp, i74* @i74_s
ret void
}
define void @i75_ls() nounwind {
- %tmp = load i75* @i75_l ; <i75> [#uses=1]
+ %tmp = load i75, i75* @i75_l ; <i75> [#uses=1]
store i75 %tmp, i75* @i75_s
ret void
}
define void @i76_ls() nounwind {
- %tmp = load i76* @i76_l ; <i76> [#uses=1]
+ %tmp = load i76, i76* @i76_l ; <i76> [#uses=1]
store i76 %tmp, i76* @i76_s
ret void
}
define void @i77_ls() nounwind {
- %tmp = load i77* @i77_l ; <i77> [#uses=1]
+ %tmp = load i77, i77* @i77_l ; <i77> [#uses=1]
store i77 %tmp, i77* @i77_s
ret void
}
define void @i78_ls() nounwind {
- %tmp = load i78* @i78_l ; <i78> [#uses=1]
+ %tmp = load i78, i78* @i78_l ; <i78> [#uses=1]
store i78 %tmp, i78* @i78_s
ret void
}
define void @i79_ls() nounwind {
- %tmp = load i79* @i79_l ; <i79> [#uses=1]
+ %tmp = load i79, i79* @i79_l ; <i79> [#uses=1]
store i79 %tmp, i79* @i79_s
ret void
}
define void @i80_ls() nounwind {
- %tmp = load i80* @i80_l ; <i80> [#uses=1]
+ %tmp = load i80, i80* @i80_l ; <i80> [#uses=1]
store i80 %tmp, i80* @i80_s
ret void
}
define void @i81_ls() nounwind {
- %tmp = load i81* @i81_l ; <i81> [#uses=1]
+ %tmp = load i81, i81* @i81_l ; <i81> [#uses=1]
store i81 %tmp, i81* @i81_s
ret void
}
define void @i82_ls() nounwind {
- %tmp = load i82* @i82_l ; <i82> [#uses=1]
+ %tmp = load i82, i82* @i82_l ; <i82> [#uses=1]
store i82 %tmp, i82* @i82_s
ret void
}
define void @i83_ls() nounwind {
- %tmp = load i83* @i83_l ; <i83> [#uses=1]
+ %tmp = load i83, i83* @i83_l ; <i83> [#uses=1]
store i83 %tmp, i83* @i83_s
ret void
}
define void @i84_ls() nounwind {
- %tmp = load i84* @i84_l ; <i84> [#uses=1]
+ %tmp = load i84, i84* @i84_l ; <i84> [#uses=1]
store i84 %tmp, i84* @i84_s
ret void
}
define void @i85_ls() nounwind {
- %tmp = load i85* @i85_l ; <i85> [#uses=1]
+ %tmp = load i85, i85* @i85_l ; <i85> [#uses=1]
store i85 %tmp, i85* @i85_s
ret void
}
define void @i86_ls() nounwind {
- %tmp = load i86* @i86_l ; <i86> [#uses=1]
+ %tmp = load i86, i86* @i86_l ; <i86> [#uses=1]
store i86 %tmp, i86* @i86_s
ret void
}
define void @i87_ls() nounwind {
- %tmp = load i87* @i87_l ; <i87> [#uses=1]
+ %tmp = load i87, i87* @i87_l ; <i87> [#uses=1]
store i87 %tmp, i87* @i87_s
ret void
}
define void @i88_ls() nounwind {
- %tmp = load i88* @i88_l ; <i88> [#uses=1]
+ %tmp = load i88, i88* @i88_l ; <i88> [#uses=1]
store i88 %tmp, i88* @i88_s
ret void
}
define void @i89_ls() nounwind {
- %tmp = load i89* @i89_l ; <i89> [#uses=1]
+ %tmp = load i89, i89* @i89_l ; <i89> [#uses=1]
store i89 %tmp, i89* @i89_s
ret void
}
define void @i90_ls() nounwind {
- %tmp = load i90* @i90_l ; <i90> [#uses=1]
+ %tmp = load i90, i90* @i90_l ; <i90> [#uses=1]
store i90 %tmp, i90* @i90_s
ret void
}
define void @i91_ls() nounwind {
- %tmp = load i91* @i91_l ; <i91> [#uses=1]
+ %tmp = load i91, i91* @i91_l ; <i91> [#uses=1]
store i91 %tmp, i91* @i91_s
ret void
}
define void @i92_ls() nounwind {
- %tmp = load i92* @i92_l ; <i92> [#uses=1]
+ %tmp = load i92, i92* @i92_l ; <i92> [#uses=1]
store i92 %tmp, i92* @i92_s
ret void
}
define void @i93_ls() nounwind {
- %tmp = load i93* @i93_l ; <i93> [#uses=1]
+ %tmp = load i93, i93* @i93_l ; <i93> [#uses=1]
store i93 %tmp, i93* @i93_s
ret void
}
define void @i94_ls() nounwind {
- %tmp = load i94* @i94_l ; <i94> [#uses=1]
+ %tmp = load i94, i94* @i94_l ; <i94> [#uses=1]
store i94 %tmp, i94* @i94_s
ret void
}
define void @i95_ls() nounwind {
- %tmp = load i95* @i95_l ; <i95> [#uses=1]
+ %tmp = load i95, i95* @i95_l ; <i95> [#uses=1]
store i95 %tmp, i95* @i95_s
ret void
}
define void @i96_ls() nounwind {
- %tmp = load i96* @i96_l ; <i96> [#uses=1]
+ %tmp = load i96, i96* @i96_l ; <i96> [#uses=1]
store i96 %tmp, i96* @i96_s
ret void
}
define void @i97_ls() nounwind {
- %tmp = load i97* @i97_l ; <i97> [#uses=1]
+ %tmp = load i97, i97* @i97_l ; <i97> [#uses=1]
store i97 %tmp, i97* @i97_s
ret void
}
define void @i98_ls() nounwind {
- %tmp = load i98* @i98_l ; <i98> [#uses=1]
+ %tmp = load i98, i98* @i98_l ; <i98> [#uses=1]
store i98 %tmp, i98* @i98_s
ret void
}
define void @i99_ls() nounwind {
- %tmp = load i99* @i99_l ; <i99> [#uses=1]
+ %tmp = load i99, i99* @i99_l ; <i99> [#uses=1]
store i99 %tmp, i99* @i99_s
ret void
}
define void @i100_ls() nounwind {
- %tmp = load i100* @i100_l ; <i100> [#uses=1]
+ %tmp = load i100, i100* @i100_l ; <i100> [#uses=1]
store i100 %tmp, i100* @i100_s
ret void
}
define void @i101_ls() nounwind {
- %tmp = load i101* @i101_l ; <i101> [#uses=1]
+ %tmp = load i101, i101* @i101_l ; <i101> [#uses=1]
store i101 %tmp, i101* @i101_s
ret void
}
define void @i102_ls() nounwind {
- %tmp = load i102* @i102_l ; <i102> [#uses=1]
+ %tmp = load i102, i102* @i102_l ; <i102> [#uses=1]
store i102 %tmp, i102* @i102_s
ret void
}
define void @i103_ls() nounwind {
- %tmp = load i103* @i103_l ; <i103> [#uses=1]
+ %tmp = load i103, i103* @i103_l ; <i103> [#uses=1]
store i103 %tmp, i103* @i103_s
ret void
}
define void @i104_ls() nounwind {
- %tmp = load i104* @i104_l ; <i104> [#uses=1]
+ %tmp = load i104, i104* @i104_l ; <i104> [#uses=1]
store i104 %tmp, i104* @i104_s
ret void
}
define void @i105_ls() nounwind {
- %tmp = load i105* @i105_l ; <i105> [#uses=1]
+ %tmp = load i105, i105* @i105_l ; <i105> [#uses=1]
store i105 %tmp, i105* @i105_s
ret void
}
define void @i106_ls() nounwind {
- %tmp = load i106* @i106_l ; <i106> [#uses=1]
+ %tmp = load i106, i106* @i106_l ; <i106> [#uses=1]
store i106 %tmp, i106* @i106_s
ret void
}
define void @i107_ls() nounwind {
- %tmp = load i107* @i107_l ; <i107> [#uses=1]
+ %tmp = load i107, i107* @i107_l ; <i107> [#uses=1]
store i107 %tmp, i107* @i107_s
ret void
}
define void @i108_ls() nounwind {
- %tmp = load i108* @i108_l ; <i108> [#uses=1]
+ %tmp = load i108, i108* @i108_l ; <i108> [#uses=1]
store i108 %tmp, i108* @i108_s
ret void
}
define void @i109_ls() nounwind {
- %tmp = load i109* @i109_l ; <i109> [#uses=1]
+ %tmp = load i109, i109* @i109_l ; <i109> [#uses=1]
store i109 %tmp, i109* @i109_s
ret void
}
define void @i110_ls() nounwind {
- %tmp = load i110* @i110_l ; <i110> [#uses=1]
+ %tmp = load i110, i110* @i110_l ; <i110> [#uses=1]
store i110 %tmp, i110* @i110_s
ret void
}
define void @i111_ls() nounwind {
- %tmp = load i111* @i111_l ; <i111> [#uses=1]
+ %tmp = load i111, i111* @i111_l ; <i111> [#uses=1]
store i111 %tmp, i111* @i111_s
ret void
}
define void @i112_ls() nounwind {
- %tmp = load i112* @i112_l ; <i112> [#uses=1]
+ %tmp = load i112, i112* @i112_l ; <i112> [#uses=1]
store i112 %tmp, i112* @i112_s
ret void
}
define void @i113_ls() nounwind {
- %tmp = load i113* @i113_l ; <i113> [#uses=1]
+ %tmp = load i113, i113* @i113_l ; <i113> [#uses=1]
store i113 %tmp, i113* @i113_s
ret void
}
define void @i114_ls() nounwind {
- %tmp = load i114* @i114_l ; <i114> [#uses=1]
+ %tmp = load i114, i114* @i114_l ; <i114> [#uses=1]
store i114 %tmp, i114* @i114_s
ret void
}
define void @i115_ls() nounwind {
- %tmp = load i115* @i115_l ; <i115> [#uses=1]
+ %tmp = load i115, i115* @i115_l ; <i115> [#uses=1]
store i115 %tmp, i115* @i115_s
ret void
}
define void @i116_ls() nounwind {
- %tmp = load i116* @i116_l ; <i116> [#uses=1]
+ %tmp = load i116, i116* @i116_l ; <i116> [#uses=1]
store i116 %tmp, i116* @i116_s
ret void
}
define void @i117_ls() nounwind {
- %tmp = load i117* @i117_l ; <i117> [#uses=1]
+ %tmp = load i117, i117* @i117_l ; <i117> [#uses=1]
store i117 %tmp, i117* @i117_s
ret void
}
define void @i118_ls() nounwind {
- %tmp = load i118* @i118_l ; <i118> [#uses=1]
+ %tmp = load i118, i118* @i118_l ; <i118> [#uses=1]
store i118 %tmp, i118* @i118_s
ret void
}
define void @i119_ls() nounwind {
- %tmp = load i119* @i119_l ; <i119> [#uses=1]
+ %tmp = load i119, i119* @i119_l ; <i119> [#uses=1]
store i119 %tmp, i119* @i119_s
ret void
}
define void @i120_ls() nounwind {
- %tmp = load i120* @i120_l ; <i120> [#uses=1]
+ %tmp = load i120, i120* @i120_l ; <i120> [#uses=1]
store i120 %tmp, i120* @i120_s
ret void
}
define void @i121_ls() nounwind {
- %tmp = load i121* @i121_l ; <i121> [#uses=1]
+ %tmp = load i121, i121* @i121_l ; <i121> [#uses=1]
store i121 %tmp, i121* @i121_s
ret void
}
define void @i122_ls() nounwind {
- %tmp = load i122* @i122_l ; <i122> [#uses=1]
+ %tmp = load i122, i122* @i122_l ; <i122> [#uses=1]
store i122 %tmp, i122* @i122_s
ret void
}
define void @i123_ls() nounwind {
- %tmp = load i123* @i123_l ; <i123> [#uses=1]
+ %tmp = load i123, i123* @i123_l ; <i123> [#uses=1]
store i123 %tmp, i123* @i123_s
ret void
}
define void @i124_ls() nounwind {
- %tmp = load i124* @i124_l ; <i124> [#uses=1]
+ %tmp = load i124, i124* @i124_l ; <i124> [#uses=1]
store i124 %tmp, i124* @i124_s
ret void
}
define void @i125_ls() nounwind {
- %tmp = load i125* @i125_l ; <i125> [#uses=1]
+ %tmp = load i125, i125* @i125_l ; <i125> [#uses=1]
store i125 %tmp, i125* @i125_s
ret void
}
define void @i126_ls() nounwind {
- %tmp = load i126* @i126_l ; <i126> [#uses=1]
+ %tmp = load i126, i126* @i126_l ; <i126> [#uses=1]
store i126 %tmp, i126* @i126_s
ret void
}
define void @i127_ls() nounwind {
- %tmp = load i127* @i127_l ; <i127> [#uses=1]
+ %tmp = load i127, i127* @i127_l ; <i127> [#uses=1]
store i127 %tmp, i127* @i127_s
ret void
}
define void @i128_ls() nounwind {
- %tmp = load i128* @i128_l ; <i128> [#uses=1]
+ %tmp = load i128, i128* @i128_l ; <i128> [#uses=1]
store i128 %tmp, i128* @i128_s
ret void
}
define void @i129_ls() nounwind {
- %tmp = load i129* @i129_l ; <i129> [#uses=1]
+ %tmp = load i129, i129* @i129_l ; <i129> [#uses=1]
store i129 %tmp, i129* @i129_s
ret void
}
define void @i130_ls() nounwind {
- %tmp = load i130* @i130_l ; <i130> [#uses=1]
+ %tmp = load i130, i130* @i130_l ; <i130> [#uses=1]
store i130 %tmp, i130* @i130_s
ret void
}
define void @i131_ls() nounwind {
- %tmp = load i131* @i131_l ; <i131> [#uses=1]
+ %tmp = load i131, i131* @i131_l ; <i131> [#uses=1]
store i131 %tmp, i131* @i131_s
ret void
}
define void @i132_ls() nounwind {
- %tmp = load i132* @i132_l ; <i132> [#uses=1]
+ %tmp = load i132, i132* @i132_l ; <i132> [#uses=1]
store i132 %tmp, i132* @i132_s
ret void
}
define void @i133_ls() nounwind {
- %tmp = load i133* @i133_l ; <i133> [#uses=1]
+ %tmp = load i133, i133* @i133_l ; <i133> [#uses=1]
store i133 %tmp, i133* @i133_s
ret void
}
define void @i134_ls() nounwind {
- %tmp = load i134* @i134_l ; <i134> [#uses=1]
+ %tmp = load i134, i134* @i134_l ; <i134> [#uses=1]
store i134 %tmp, i134* @i134_s
ret void
}
define void @i135_ls() nounwind {
- %tmp = load i135* @i135_l ; <i135> [#uses=1]
+ %tmp = load i135, i135* @i135_l ; <i135> [#uses=1]
store i135 %tmp, i135* @i135_s
ret void
}
define void @i136_ls() nounwind {
- %tmp = load i136* @i136_l ; <i136> [#uses=1]
+ %tmp = load i136, i136* @i136_l ; <i136> [#uses=1]
store i136 %tmp, i136* @i136_s
ret void
}
define void @i137_ls() nounwind {
- %tmp = load i137* @i137_l ; <i137> [#uses=1]
+ %tmp = load i137, i137* @i137_l ; <i137> [#uses=1]
store i137 %tmp, i137* @i137_s
ret void
}
define void @i138_ls() nounwind {
- %tmp = load i138* @i138_l ; <i138> [#uses=1]
+ %tmp = load i138, i138* @i138_l ; <i138> [#uses=1]
store i138 %tmp, i138* @i138_s
ret void
}
define void @i139_ls() nounwind {
- %tmp = load i139* @i139_l ; <i139> [#uses=1]
+ %tmp = load i139, i139* @i139_l ; <i139> [#uses=1]
store i139 %tmp, i139* @i139_s
ret void
}
define void @i140_ls() nounwind {
- %tmp = load i140* @i140_l ; <i140> [#uses=1]
+ %tmp = load i140, i140* @i140_l ; <i140> [#uses=1]
store i140 %tmp, i140* @i140_s
ret void
}
define void @i141_ls() nounwind {
- %tmp = load i141* @i141_l ; <i141> [#uses=1]
+ %tmp = load i141, i141* @i141_l ; <i141> [#uses=1]
store i141 %tmp, i141* @i141_s
ret void
}
define void @i142_ls() nounwind {
- %tmp = load i142* @i142_l ; <i142> [#uses=1]
+ %tmp = load i142, i142* @i142_l ; <i142> [#uses=1]
store i142 %tmp, i142* @i142_s
ret void
}
define void @i143_ls() nounwind {
- %tmp = load i143* @i143_l ; <i143> [#uses=1]
+ %tmp = load i143, i143* @i143_l ; <i143> [#uses=1]
store i143 %tmp, i143* @i143_s
ret void
}
define void @i144_ls() nounwind {
- %tmp = load i144* @i144_l ; <i144> [#uses=1]
+ %tmp = load i144, i144* @i144_l ; <i144> [#uses=1]
store i144 %tmp, i144* @i144_s
ret void
}
define void @i145_ls() nounwind {
- %tmp = load i145* @i145_l ; <i145> [#uses=1]
+ %tmp = load i145, i145* @i145_l ; <i145> [#uses=1]
store i145 %tmp, i145* @i145_s
ret void
}
define void @i146_ls() nounwind {
- %tmp = load i146* @i146_l ; <i146> [#uses=1]
+ %tmp = load i146, i146* @i146_l ; <i146> [#uses=1]
store i146 %tmp, i146* @i146_s
ret void
}
define void @i147_ls() nounwind {
- %tmp = load i147* @i147_l ; <i147> [#uses=1]
+ %tmp = load i147, i147* @i147_l ; <i147> [#uses=1]
store i147 %tmp, i147* @i147_s
ret void
}
define void @i148_ls() nounwind {
- %tmp = load i148* @i148_l ; <i148> [#uses=1]
+ %tmp = load i148, i148* @i148_l ; <i148> [#uses=1]
store i148 %tmp, i148* @i148_s
ret void
}
define void @i149_ls() nounwind {
- %tmp = load i149* @i149_l ; <i149> [#uses=1]
+ %tmp = load i149, i149* @i149_l ; <i149> [#uses=1]
store i149 %tmp, i149* @i149_s
ret void
}
define void @i150_ls() nounwind {
- %tmp = load i150* @i150_l ; <i150> [#uses=1]
+ %tmp = load i150, i150* @i150_l ; <i150> [#uses=1]
store i150 %tmp, i150* @i150_s
ret void
}
define void @i151_ls() nounwind {
- %tmp = load i151* @i151_l ; <i151> [#uses=1]
+ %tmp = load i151, i151* @i151_l ; <i151> [#uses=1]
store i151 %tmp, i151* @i151_s
ret void
}
define void @i152_ls() nounwind {
- %tmp = load i152* @i152_l ; <i152> [#uses=1]
+ %tmp = load i152, i152* @i152_l ; <i152> [#uses=1]
store i152 %tmp, i152* @i152_s
ret void
}
define void @i153_ls() nounwind {
- %tmp = load i153* @i153_l ; <i153> [#uses=1]
+ %tmp = load i153, i153* @i153_l ; <i153> [#uses=1]
store i153 %tmp, i153* @i153_s
ret void
}
define void @i154_ls() nounwind {
- %tmp = load i154* @i154_l ; <i154> [#uses=1]
+ %tmp = load i154, i154* @i154_l ; <i154> [#uses=1]
store i154 %tmp, i154* @i154_s
ret void
}
define void @i155_ls() nounwind {
- %tmp = load i155* @i155_l ; <i155> [#uses=1]
+ %tmp = load i155, i155* @i155_l ; <i155> [#uses=1]
store i155 %tmp, i155* @i155_s
ret void
}
define void @i156_ls() nounwind {
- %tmp = load i156* @i156_l ; <i156> [#uses=1]
+ %tmp = load i156, i156* @i156_l ; <i156> [#uses=1]
store i156 %tmp, i156* @i156_s
ret void
}
define void @i157_ls() nounwind {
- %tmp = load i157* @i157_l ; <i157> [#uses=1]
+ %tmp = load i157, i157* @i157_l ; <i157> [#uses=1]
store i157 %tmp, i157* @i157_s
ret void
}
define void @i158_ls() nounwind {
- %tmp = load i158* @i158_l ; <i158> [#uses=1]
+ %tmp = load i158, i158* @i158_l ; <i158> [#uses=1]
store i158 %tmp, i158* @i158_s
ret void
}
define void @i159_ls() nounwind {
- %tmp = load i159* @i159_l ; <i159> [#uses=1]
+ %tmp = load i159, i159* @i159_l ; <i159> [#uses=1]
store i159 %tmp, i159* @i159_s
ret void
}
define void @i160_ls() nounwind {
- %tmp = load i160* @i160_l ; <i160> [#uses=1]
+ %tmp = load i160, i160* @i160_l ; <i160> [#uses=1]
store i160 %tmp, i160* @i160_s
ret void
}
define void @i161_ls() nounwind {
- %tmp = load i161* @i161_l ; <i161> [#uses=1]
+ %tmp = load i161, i161* @i161_l ; <i161> [#uses=1]
store i161 %tmp, i161* @i161_s
ret void
}
define void @i162_ls() nounwind {
- %tmp = load i162* @i162_l ; <i162> [#uses=1]
+ %tmp = load i162, i162* @i162_l ; <i162> [#uses=1]
store i162 %tmp, i162* @i162_s
ret void
}
define void @i163_ls() nounwind {
- %tmp = load i163* @i163_l ; <i163> [#uses=1]
+ %tmp = load i163, i163* @i163_l ; <i163> [#uses=1]
store i163 %tmp, i163* @i163_s
ret void
}
define void @i164_ls() nounwind {
- %tmp = load i164* @i164_l ; <i164> [#uses=1]
+ %tmp = load i164, i164* @i164_l ; <i164> [#uses=1]
store i164 %tmp, i164* @i164_s
ret void
}
define void @i165_ls() nounwind {
- %tmp = load i165* @i165_l ; <i165> [#uses=1]
+ %tmp = load i165, i165* @i165_l ; <i165> [#uses=1]
store i165 %tmp, i165* @i165_s
ret void
}
define void @i166_ls() nounwind {
- %tmp = load i166* @i166_l ; <i166> [#uses=1]
+ %tmp = load i166, i166* @i166_l ; <i166> [#uses=1]
store i166 %tmp, i166* @i166_s
ret void
}
define void @i167_ls() nounwind {
- %tmp = load i167* @i167_l ; <i167> [#uses=1]
+ %tmp = load i167, i167* @i167_l ; <i167> [#uses=1]
store i167 %tmp, i167* @i167_s
ret void
}
define void @i168_ls() nounwind {
- %tmp = load i168* @i168_l ; <i168> [#uses=1]
+ %tmp = load i168, i168* @i168_l ; <i168> [#uses=1]
store i168 %tmp, i168* @i168_s
ret void
}
define void @i169_ls() nounwind {
- %tmp = load i169* @i169_l ; <i169> [#uses=1]
+ %tmp = load i169, i169* @i169_l ; <i169> [#uses=1]
store i169 %tmp, i169* @i169_s
ret void
}
define void @i170_ls() nounwind {
- %tmp = load i170* @i170_l ; <i170> [#uses=1]
+ %tmp = load i170, i170* @i170_l ; <i170> [#uses=1]
store i170 %tmp, i170* @i170_s
ret void
}
define void @i171_ls() nounwind {
- %tmp = load i171* @i171_l ; <i171> [#uses=1]
+ %tmp = load i171, i171* @i171_l ; <i171> [#uses=1]
store i171 %tmp, i171* @i171_s
ret void
}
define void @i172_ls() nounwind {
- %tmp = load i172* @i172_l ; <i172> [#uses=1]
+ %tmp = load i172, i172* @i172_l ; <i172> [#uses=1]
store i172 %tmp, i172* @i172_s
ret void
}
define void @i173_ls() nounwind {
- %tmp = load i173* @i173_l ; <i173> [#uses=1]
+ %tmp = load i173, i173* @i173_l ; <i173> [#uses=1]
store i173 %tmp, i173* @i173_s
ret void
}
define void @i174_ls() nounwind {
- %tmp = load i174* @i174_l ; <i174> [#uses=1]
+ %tmp = load i174, i174* @i174_l ; <i174> [#uses=1]
store i174 %tmp, i174* @i174_s
ret void
}
define void @i175_ls() nounwind {
- %tmp = load i175* @i175_l ; <i175> [#uses=1]
+ %tmp = load i175, i175* @i175_l ; <i175> [#uses=1]
store i175 %tmp, i175* @i175_s
ret void
}
define void @i176_ls() nounwind {
- %tmp = load i176* @i176_l ; <i176> [#uses=1]
+ %tmp = load i176, i176* @i176_l ; <i176> [#uses=1]
store i176 %tmp, i176* @i176_s
ret void
}
define void @i177_ls() nounwind {
- %tmp = load i177* @i177_l ; <i177> [#uses=1]
+ %tmp = load i177, i177* @i177_l ; <i177> [#uses=1]
store i177 %tmp, i177* @i177_s
ret void
}
define void @i178_ls() nounwind {
- %tmp = load i178* @i178_l ; <i178> [#uses=1]
+ %tmp = load i178, i178* @i178_l ; <i178> [#uses=1]
store i178 %tmp, i178* @i178_s
ret void
}
define void @i179_ls() nounwind {
- %tmp = load i179* @i179_l ; <i179> [#uses=1]
+ %tmp = load i179, i179* @i179_l ; <i179> [#uses=1]
store i179 %tmp, i179* @i179_s
ret void
}
define void @i180_ls() nounwind {
- %tmp = load i180* @i180_l ; <i180> [#uses=1]
+ %tmp = load i180, i180* @i180_l ; <i180> [#uses=1]
store i180 %tmp, i180* @i180_s
ret void
}
define void @i181_ls() nounwind {
- %tmp = load i181* @i181_l ; <i181> [#uses=1]
+ %tmp = load i181, i181* @i181_l ; <i181> [#uses=1]
store i181 %tmp, i181* @i181_s
ret void
}
define void @i182_ls() nounwind {
- %tmp = load i182* @i182_l ; <i182> [#uses=1]
+ %tmp = load i182, i182* @i182_l ; <i182> [#uses=1]
store i182 %tmp, i182* @i182_s
ret void
}
define void @i183_ls() nounwind {
- %tmp = load i183* @i183_l ; <i183> [#uses=1]
+ %tmp = load i183, i183* @i183_l ; <i183> [#uses=1]
store i183 %tmp, i183* @i183_s
ret void
}
define void @i184_ls() nounwind {
- %tmp = load i184* @i184_l ; <i184> [#uses=1]
+ %tmp = load i184, i184* @i184_l ; <i184> [#uses=1]
store i184 %tmp, i184* @i184_s
ret void
}
define void @i185_ls() nounwind {
- %tmp = load i185* @i185_l ; <i185> [#uses=1]
+ %tmp = load i185, i185* @i185_l ; <i185> [#uses=1]
store i185 %tmp, i185* @i185_s
ret void
}
define void @i186_ls() nounwind {
- %tmp = load i186* @i186_l ; <i186> [#uses=1]
+ %tmp = load i186, i186* @i186_l ; <i186> [#uses=1]
store i186 %tmp, i186* @i186_s
ret void
}
define void @i187_ls() nounwind {
- %tmp = load i187* @i187_l ; <i187> [#uses=1]
+ %tmp = load i187, i187* @i187_l ; <i187> [#uses=1]
store i187 %tmp, i187* @i187_s
ret void
}
define void @i188_ls() nounwind {
- %tmp = load i188* @i188_l ; <i188> [#uses=1]
+ %tmp = load i188, i188* @i188_l ; <i188> [#uses=1]
store i188 %tmp, i188* @i188_s
ret void
}
define void @i189_ls() nounwind {
- %tmp = load i189* @i189_l ; <i189> [#uses=1]
+ %tmp = load i189, i189* @i189_l ; <i189> [#uses=1]
store i189 %tmp, i189* @i189_s
ret void
}
define void @i190_ls() nounwind {
- %tmp = load i190* @i190_l ; <i190> [#uses=1]
+ %tmp = load i190, i190* @i190_l ; <i190> [#uses=1]
store i190 %tmp, i190* @i190_s
ret void
}
define void @i191_ls() nounwind {
- %tmp = load i191* @i191_l ; <i191> [#uses=1]
+ %tmp = load i191, i191* @i191_l ; <i191> [#uses=1]
store i191 %tmp, i191* @i191_s
ret void
}
define void @i192_ls() nounwind {
- %tmp = load i192* @i192_l ; <i192> [#uses=1]
+ %tmp = load i192, i192* @i192_l ; <i192> [#uses=1]
store i192 %tmp, i192* @i192_s
ret void
}
define void @i193_ls() nounwind {
- %tmp = load i193* @i193_l ; <i193> [#uses=1]
+ %tmp = load i193, i193* @i193_l ; <i193> [#uses=1]
store i193 %tmp, i193* @i193_s
ret void
}
define void @i194_ls() nounwind {
- %tmp = load i194* @i194_l ; <i194> [#uses=1]
+ %tmp = load i194, i194* @i194_l ; <i194> [#uses=1]
store i194 %tmp, i194* @i194_s
ret void
}
define void @i195_ls() nounwind {
- %tmp = load i195* @i195_l ; <i195> [#uses=1]
+ %tmp = load i195, i195* @i195_l ; <i195> [#uses=1]
store i195 %tmp, i195* @i195_s
ret void
}
define void @i196_ls() nounwind {
- %tmp = load i196* @i196_l ; <i196> [#uses=1]
+ %tmp = load i196, i196* @i196_l ; <i196> [#uses=1]
store i196 %tmp, i196* @i196_s
ret void
}
define void @i197_ls() nounwind {
- %tmp = load i197* @i197_l ; <i197> [#uses=1]
+ %tmp = load i197, i197* @i197_l ; <i197> [#uses=1]
store i197 %tmp, i197* @i197_s
ret void
}
define void @i198_ls() nounwind {
- %tmp = load i198* @i198_l ; <i198> [#uses=1]
+ %tmp = load i198, i198* @i198_l ; <i198> [#uses=1]
store i198 %tmp, i198* @i198_s
ret void
}
define void @i199_ls() nounwind {
- %tmp = load i199* @i199_l ; <i199> [#uses=1]
+ %tmp = load i199, i199* @i199_l ; <i199> [#uses=1]
store i199 %tmp, i199* @i199_s
ret void
}
define void @i200_ls() nounwind {
- %tmp = load i200* @i200_l ; <i200> [#uses=1]
+ %tmp = load i200, i200* @i200_l ; <i200> [#uses=1]
store i200 %tmp, i200* @i200_s
ret void
}
define void @i201_ls() nounwind {
- %tmp = load i201* @i201_l ; <i201> [#uses=1]
+ %tmp = load i201, i201* @i201_l ; <i201> [#uses=1]
store i201 %tmp, i201* @i201_s
ret void
}
define void @i202_ls() nounwind {
- %tmp = load i202* @i202_l ; <i202> [#uses=1]
+ %tmp = load i202, i202* @i202_l ; <i202> [#uses=1]
store i202 %tmp, i202* @i202_s
ret void
}
define void @i203_ls() nounwind {
- %tmp = load i203* @i203_l ; <i203> [#uses=1]
+ %tmp = load i203, i203* @i203_l ; <i203> [#uses=1]
store i203 %tmp, i203* @i203_s
ret void
}
define void @i204_ls() nounwind {
- %tmp = load i204* @i204_l ; <i204> [#uses=1]
+ %tmp = load i204, i204* @i204_l ; <i204> [#uses=1]
store i204 %tmp, i204* @i204_s
ret void
}
define void @i205_ls() nounwind {
- %tmp = load i205* @i205_l ; <i205> [#uses=1]
+ %tmp = load i205, i205* @i205_l ; <i205> [#uses=1]
store i205 %tmp, i205* @i205_s
ret void
}
define void @i206_ls() nounwind {
- %tmp = load i206* @i206_l ; <i206> [#uses=1]
+ %tmp = load i206, i206* @i206_l ; <i206> [#uses=1]
store i206 %tmp, i206* @i206_s
ret void
}
define void @i207_ls() nounwind {
- %tmp = load i207* @i207_l ; <i207> [#uses=1]
+ %tmp = load i207, i207* @i207_l ; <i207> [#uses=1]
store i207 %tmp, i207* @i207_s
ret void
}
define void @i208_ls() nounwind {
- %tmp = load i208* @i208_l ; <i208> [#uses=1]
+ %tmp = load i208, i208* @i208_l ; <i208> [#uses=1]
store i208 %tmp, i208* @i208_s
ret void
}
define void @i209_ls() nounwind {
- %tmp = load i209* @i209_l ; <i209> [#uses=1]
+ %tmp = load i209, i209* @i209_l ; <i209> [#uses=1]
store i209 %tmp, i209* @i209_s
ret void
}
define void @i210_ls() nounwind {
- %tmp = load i210* @i210_l ; <i210> [#uses=1]
+ %tmp = load i210, i210* @i210_l ; <i210> [#uses=1]
store i210 %tmp, i210* @i210_s
ret void
}
define void @i211_ls() nounwind {
- %tmp = load i211* @i211_l ; <i211> [#uses=1]
+ %tmp = load i211, i211* @i211_l ; <i211> [#uses=1]
store i211 %tmp, i211* @i211_s
ret void
}
define void @i212_ls() nounwind {
- %tmp = load i212* @i212_l ; <i212> [#uses=1]
+ %tmp = load i212, i212* @i212_l ; <i212> [#uses=1]
store i212 %tmp, i212* @i212_s
ret void
}
define void @i213_ls() nounwind {
- %tmp = load i213* @i213_l ; <i213> [#uses=1]
+ %tmp = load i213, i213* @i213_l ; <i213> [#uses=1]
store i213 %tmp, i213* @i213_s
ret void
}
define void @i214_ls() nounwind {
- %tmp = load i214* @i214_l ; <i214> [#uses=1]
+ %tmp = load i214, i214* @i214_l ; <i214> [#uses=1]
store i214 %tmp, i214* @i214_s
ret void
}
define void @i215_ls() nounwind {
- %tmp = load i215* @i215_l ; <i215> [#uses=1]
+ %tmp = load i215, i215* @i215_l ; <i215> [#uses=1]
store i215 %tmp, i215* @i215_s
ret void
}
define void @i216_ls() nounwind {
- %tmp = load i216* @i216_l ; <i216> [#uses=1]
+ %tmp = load i216, i216* @i216_l ; <i216> [#uses=1]
store i216 %tmp, i216* @i216_s
ret void
}
define void @i217_ls() nounwind {
- %tmp = load i217* @i217_l ; <i217> [#uses=1]
+ %tmp = load i217, i217* @i217_l ; <i217> [#uses=1]
store i217 %tmp, i217* @i217_s
ret void
}
define void @i218_ls() nounwind {
- %tmp = load i218* @i218_l ; <i218> [#uses=1]
+ %tmp = load i218, i218* @i218_l ; <i218> [#uses=1]
store i218 %tmp, i218* @i218_s
ret void
}
define void @i219_ls() nounwind {
- %tmp = load i219* @i219_l ; <i219> [#uses=1]
+ %tmp = load i219, i219* @i219_l ; <i219> [#uses=1]
store i219 %tmp, i219* @i219_s
ret void
}
define void @i220_ls() nounwind {
- %tmp = load i220* @i220_l ; <i220> [#uses=1]
+ %tmp = load i220, i220* @i220_l ; <i220> [#uses=1]
store i220 %tmp, i220* @i220_s
ret void
}
define void @i221_ls() nounwind {
- %tmp = load i221* @i221_l ; <i221> [#uses=1]
+ %tmp = load i221, i221* @i221_l ; <i221> [#uses=1]
store i221 %tmp, i221* @i221_s
ret void
}
define void @i222_ls() nounwind {
- %tmp = load i222* @i222_l ; <i222> [#uses=1]
+ %tmp = load i222, i222* @i222_l ; <i222> [#uses=1]
store i222 %tmp, i222* @i222_s
ret void
}
define void @i223_ls() nounwind {
- %tmp = load i223* @i223_l ; <i223> [#uses=1]
+ %tmp = load i223, i223* @i223_l ; <i223> [#uses=1]
store i223 %tmp, i223* @i223_s
ret void
}
define void @i224_ls() nounwind {
- %tmp = load i224* @i224_l ; <i224> [#uses=1]
+ %tmp = load i224, i224* @i224_l ; <i224> [#uses=1]
store i224 %tmp, i224* @i224_s
ret void
}
define void @i225_ls() nounwind {
- %tmp = load i225* @i225_l ; <i225> [#uses=1]
+ %tmp = load i225, i225* @i225_l ; <i225> [#uses=1]
store i225 %tmp, i225* @i225_s
ret void
}
define void @i226_ls() nounwind {
- %tmp = load i226* @i226_l ; <i226> [#uses=1]
+ %tmp = load i226, i226* @i226_l ; <i226> [#uses=1]
store i226 %tmp, i226* @i226_s
ret void
}
define void @i227_ls() nounwind {
- %tmp = load i227* @i227_l ; <i227> [#uses=1]
+ %tmp = load i227, i227* @i227_l ; <i227> [#uses=1]
store i227 %tmp, i227* @i227_s
ret void
}
define void @i228_ls() nounwind {
- %tmp = load i228* @i228_l ; <i228> [#uses=1]
+ %tmp = load i228, i228* @i228_l ; <i228> [#uses=1]
store i228 %tmp, i228* @i228_s
ret void
}
define void @i229_ls() nounwind {
- %tmp = load i229* @i229_l ; <i229> [#uses=1]
+ %tmp = load i229, i229* @i229_l ; <i229> [#uses=1]
store i229 %tmp, i229* @i229_s
ret void
}
define void @i230_ls() nounwind {
- %tmp = load i230* @i230_l ; <i230> [#uses=1]
+ %tmp = load i230, i230* @i230_l ; <i230> [#uses=1]
store i230 %tmp, i230* @i230_s
ret void
}
define void @i231_ls() nounwind {
- %tmp = load i231* @i231_l ; <i231> [#uses=1]
+ %tmp = load i231, i231* @i231_l ; <i231> [#uses=1]
store i231 %tmp, i231* @i231_s
ret void
}
define void @i232_ls() nounwind {
- %tmp = load i232* @i232_l ; <i232> [#uses=1]
+ %tmp = load i232, i232* @i232_l ; <i232> [#uses=1]
store i232 %tmp, i232* @i232_s
ret void
}
define void @i233_ls() nounwind {
- %tmp = load i233* @i233_l ; <i233> [#uses=1]
+ %tmp = load i233, i233* @i233_l ; <i233> [#uses=1]
store i233 %tmp, i233* @i233_s
ret void
}
define void @i234_ls() nounwind {
- %tmp = load i234* @i234_l ; <i234> [#uses=1]
+ %tmp = load i234, i234* @i234_l ; <i234> [#uses=1]
store i234 %tmp, i234* @i234_s
ret void
}
define void @i235_ls() nounwind {
- %tmp = load i235* @i235_l ; <i235> [#uses=1]
+ %tmp = load i235, i235* @i235_l ; <i235> [#uses=1]
store i235 %tmp, i235* @i235_s
ret void
}
define void @i236_ls() nounwind {
- %tmp = load i236* @i236_l ; <i236> [#uses=1]
+ %tmp = load i236, i236* @i236_l ; <i236> [#uses=1]
store i236 %tmp, i236* @i236_s
ret void
}
define void @i237_ls() nounwind {
- %tmp = load i237* @i237_l ; <i237> [#uses=1]
+ %tmp = load i237, i237* @i237_l ; <i237> [#uses=1]
store i237 %tmp, i237* @i237_s
ret void
}
define void @i238_ls() nounwind {
- %tmp = load i238* @i238_l ; <i238> [#uses=1]
+ %tmp = load i238, i238* @i238_l ; <i238> [#uses=1]
store i238 %tmp, i238* @i238_s
ret void
}
define void @i239_ls() nounwind {
- %tmp = load i239* @i239_l ; <i239> [#uses=1]
+ %tmp = load i239, i239* @i239_l ; <i239> [#uses=1]
store i239 %tmp, i239* @i239_s
ret void
}
define void @i240_ls() nounwind {
- %tmp = load i240* @i240_l ; <i240> [#uses=1]
+ %tmp = load i240, i240* @i240_l ; <i240> [#uses=1]
store i240 %tmp, i240* @i240_s
ret void
}
define void @i241_ls() nounwind {
- %tmp = load i241* @i241_l ; <i241> [#uses=1]
+ %tmp = load i241, i241* @i241_l ; <i241> [#uses=1]
store i241 %tmp, i241* @i241_s
ret void
}
define void @i242_ls() nounwind {
- %tmp = load i242* @i242_l ; <i242> [#uses=1]
+ %tmp = load i242, i242* @i242_l ; <i242> [#uses=1]
store i242 %tmp, i242* @i242_s
ret void
}
define void @i243_ls() nounwind {
- %tmp = load i243* @i243_l ; <i243> [#uses=1]
+ %tmp = load i243, i243* @i243_l ; <i243> [#uses=1]
store i243 %tmp, i243* @i243_s
ret void
}
define void @i244_ls() nounwind {
- %tmp = load i244* @i244_l ; <i244> [#uses=1]
+ %tmp = load i244, i244* @i244_l ; <i244> [#uses=1]
store i244 %tmp, i244* @i244_s
ret void
}
define void @i245_ls() nounwind {
- %tmp = load i245* @i245_l ; <i245> [#uses=1]
+ %tmp = load i245, i245* @i245_l ; <i245> [#uses=1]
store i245 %tmp, i245* @i245_s
ret void
}
define void @i246_ls() nounwind {
- %tmp = load i246* @i246_l ; <i246> [#uses=1]
+ %tmp = load i246, i246* @i246_l ; <i246> [#uses=1]
store i246 %tmp, i246* @i246_s
ret void
}
define void @i247_ls() nounwind {
- %tmp = load i247* @i247_l ; <i247> [#uses=1]
+ %tmp = load i247, i247* @i247_l ; <i247> [#uses=1]
store i247 %tmp, i247* @i247_s
ret void
}
define void @i248_ls() nounwind {
- %tmp = load i248* @i248_l ; <i248> [#uses=1]
+ %tmp = load i248, i248* @i248_l ; <i248> [#uses=1]
store i248 %tmp, i248* @i248_s
ret void
}
define void @i249_ls() nounwind {
- %tmp = load i249* @i249_l ; <i249> [#uses=1]
+ %tmp = load i249, i249* @i249_l ; <i249> [#uses=1]
store i249 %tmp, i249* @i249_s
ret void
}
define void @i250_ls() nounwind {
- %tmp = load i250* @i250_l ; <i250> [#uses=1]
+ %tmp = load i250, i250* @i250_l ; <i250> [#uses=1]
store i250 %tmp, i250* @i250_s
ret void
}
define void @i251_ls() nounwind {
- %tmp = load i251* @i251_l ; <i251> [#uses=1]
+ %tmp = load i251, i251* @i251_l ; <i251> [#uses=1]
store i251 %tmp, i251* @i251_s
ret void
}
define void @i252_ls() nounwind {
- %tmp = load i252* @i252_l ; <i252> [#uses=1]
+ %tmp = load i252, i252* @i252_l ; <i252> [#uses=1]
store i252 %tmp, i252* @i252_s
ret void
}
define void @i253_ls() nounwind {
- %tmp = load i253* @i253_l ; <i253> [#uses=1]
+ %tmp = load i253, i253* @i253_l ; <i253> [#uses=1]
store i253 %tmp, i253* @i253_s
ret void
}
define void @i254_ls() nounwind {
- %tmp = load i254* @i254_l ; <i254> [#uses=1]
+ %tmp = load i254, i254* @i254_l ; <i254> [#uses=1]
store i254 %tmp, i254* @i254_s
ret void
}
define void @i255_ls() nounwind {
- %tmp = load i255* @i255_l ; <i255> [#uses=1]
+ %tmp = load i255, i255* @i255_l ; <i255> [#uses=1]
store i255 %tmp, i255* @i255_s
ret void
}
define void @i256_ls() nounwind {
- %tmp = load i256* @i256_l ; <i256> [#uses=1]
+ %tmp = load i256, i256* @i256_l ; <i256> [#uses=1]
store i256 %tmp, i256* @i256_s
ret void
}
diff --git a/test/CodeGen/Generic/ConstantExprLowering.ll b/test/CodeGen/Generic/ConstantExprLowering.ll
index 428d712462d6..3119dfae0aa0 100644
--- a/test/CodeGen/Generic/ConstantExprLowering.ll
+++ b/test/CodeGen/Generic/ConstantExprLowering.ll
@@ -16,7 +16,7 @@ less: ; preds = %entry
not_less: ; preds = %less, %entry
%t2 = phi i32 [ sub (i32 ptrtoint (i32* @XA to i32), i32 ptrtoint (i32* @XB to i32)), %less ], [ sub (i32 ptrtoint (i32* @XA to i32), i32 ptrtoint (i32* @XB to i32)), %entry ] ; <i32> [#uses=1]
- %tmp.39 = call i32 (i8*, ...)* @printf( i8* getelementptr ([16 x i8]* @.str_1, i64 0, i64 0), i32 %t2 ) ; <i32> [#uses=0]
+ %tmp.39 = call i32 (i8*, ...) @printf( i8* getelementptr ([16 x i8], [16 x i8]* @.str_1, i64 0, i64 0), i32 %t2 ) ; <i32> [#uses=0]
ret void
}
diff --git a/test/CodeGen/Generic/MachineBranchProb.ll b/test/CodeGen/Generic/MachineBranchProb.ll
index 83277c98989d..8288e45ee509 100644
--- a/test/CodeGen/Generic/MachineBranchProb.ll
+++ b/test/CodeGen/Generic/MachineBranchProb.ll
@@ -1,11 +1,12 @@
; RUN: llc < %s -print-machineinstrs=expand-isel-pseudos -o /dev/null 2>&1 | FileCheck %s
; ARM & AArch64 run an extra SimplifyCFG which disrupts this test.
-; XFAIL: arm,aarch64
+; Hexagon crashes (PR23377)
+; XFAIL: arm,aarch64,hexagon
; Make sure we have the correct weight attached to each successor.
define i32 @test2(i32 %x) nounwind uwtable readnone ssp {
-; CHECK: Machine code for function test2:
+; CHECK-LABEL: Machine code for function test2:
entry:
%conv = sext i32 %x to i64
switch i64 %conv, label %return [
@@ -33,3 +34,41 @@ return:
}
!0 = !{!"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64}
+
+
+declare void @g(i32)
+define void @left_leaning_weight_balanced_tree(i32 %x) {
+entry:
+ switch i32 %x, label %return [
+ i32 0, label %bb0
+ i32 10, label %bb1
+ i32 20, label %bb2
+ i32 30, label %bb3
+ i32 40, label %bb4
+ i32 50, label %bb5
+ ], !prof !1
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+bb3: tail call void @g(i32 3) br label %return
+bb4: tail call void @g(i32 4) br label %return
+bb5: tail call void @g(i32 5) br label %return
+return: ret void
+
+; Check that we set branch weights on the pivot cmp instruction correctly.
+; Cases {0,10,20,30} go on the left with weight 13; cases {40,50} go on the
+; right with weight 20.
+;
+; CHECK-LABEL: Machine code for function left_leaning_weight_balanced_tree:
+; CHECK: BB#0: derived from LLVM BB %entry
+; CHECK-NOT: Successors
+; CHECK: Successors according to CFG: BB#8(13) BB#9(20)
+}
+
+!1 = !{!"branch_weights",
+ ; Default:
+ i32 1,
+ ; Case 0, 10, 20:
+ i32 10, i32 1, i32 1,
+ ; Case 30, 40, 50:
+ i32 1, i32 10, i32 10}
diff --git a/test/CodeGen/Generic/PBQP.ll b/test/CodeGen/Generic/PBQP.ll
index 91fcfba1a905..31fc4e653d7b 100644
--- a/test/CodeGen/Generic/PBQP.ll
+++ b/test/CodeGen/Generic/PBQP.ll
@@ -2,23 +2,23 @@
define i32 @foo() {
entry:
- %call = tail call i32 (...)* @baz()
- %call1 = tail call i32 (...)* @baz()
- %call2 = tail call i32 (...)* @baz()
- %call3 = tail call i32 (...)* @baz()
- %call4 = tail call i32 (...)* @baz()
- %call5 = tail call i32 (...)* @baz()
- %call6 = tail call i32 (...)* @baz()
- %call7 = tail call i32 (...)* @baz()
- %call8 = tail call i32 (...)* @baz()
- %call9 = tail call i32 (...)* @baz()
- %call10 = tail call i32 (...)* @baz()
- %call11 = tail call i32 (...)* @baz()
- %call12 = tail call i32 (...)* @baz()
- %call13 = tail call i32 (...)* @baz()
- %call14 = tail call i32 (...)* @baz()
- %call15 = tail call i32 (...)* @baz()
- %call16 = tail call i32 (...)* @baz()
+ %call = tail call i32 (...) @baz()
+ %call1 = tail call i32 (...) @baz()
+ %call2 = tail call i32 (...) @baz()
+ %call3 = tail call i32 (...) @baz()
+ %call4 = tail call i32 (...) @baz()
+ %call5 = tail call i32 (...) @baz()
+ %call6 = tail call i32 (...) @baz()
+ %call7 = tail call i32 (...) @baz()
+ %call8 = tail call i32 (...) @baz()
+ %call9 = tail call i32 (...) @baz()
+ %call10 = tail call i32 (...) @baz()
+ %call11 = tail call i32 (...) @baz()
+ %call12 = tail call i32 (...) @baz()
+ %call13 = tail call i32 (...) @baz()
+ %call14 = tail call i32 (...) @baz()
+ %call15 = tail call i32 (...) @baz()
+ %call16 = tail call i32 (...) @baz()
%call17 = tail call i32 @bar(i32 %call, i32 %call1, i32 %call2, i32 %call3, i32 %call4, i32 %call5, i32 %call6, i32 %call7, i32 %call8, i32 %call9, i32 %call10, i32 %call11, i32 %call12, i32 %call13, i32 %call14, i32 %call15, i32 %call16)
ret i32 %call17
}
diff --git a/test/CodeGen/Generic/add-with-overflow-128.ll b/test/CodeGen/Generic/add-with-overflow-128.ll
index 33f44d6e4436..2a7456cbc211 100644
--- a/test/CodeGen/Generic/add-with-overflow-128.ll
+++ b/test/CodeGen/Generic/add-with-overflow-128.ll
@@ -14,11 +14,11 @@ entry:
br i1 %obit, label %carry, label %normal
normal:
- %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum32 ) nounwind
+ %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum32 ) nounwind
ret i1 true
carry:
- %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+ %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
ret i1 false
}
diff --git a/test/CodeGen/Generic/add-with-overflow-24.ll b/test/CodeGen/Generic/add-with-overflow-24.ll
index 63f5a222a003..6f06ae6b2afe 100644
--- a/test/CodeGen/Generic/add-with-overflow-24.ll
+++ b/test/CodeGen/Generic/add-with-overflow-24.ll
@@ -12,11 +12,11 @@ entry:
br i1 %obit, label %overflow, label %normal
normal:
- %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum32 ) nounwind
+ %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum32 ) nounwind
ret i1 true
overflow:
- %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+ %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
ret i1 false
}
@@ -29,11 +29,11 @@ entry:
br i1 %obit, label %carry, label %normal
normal:
- %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum32 ) nounwind
+ %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum32 ) nounwind
ret i1 true
carry:
- %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+ %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
ret i1 false
}
diff --git a/test/CodeGen/Generic/add-with-overflow.ll b/test/CodeGen/Generic/add-with-overflow.ll
index 0c2c9608deb9..b6bbaa1dc963 100644
--- a/test/CodeGen/Generic/add-with-overflow.ll
+++ b/test/CodeGen/Generic/add-with-overflow.ll
@@ -12,11 +12,11 @@ entry:
br i1 %obit, label %overflow, label %normal
normal:
- %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+ %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
ret i1 true
overflow:
- %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+ %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
ret i1 false
}
@@ -28,11 +28,11 @@ entry:
br i1 %obit, label %overflow, label %normal
normal:
- %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+ %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
ret i1 true
overflow:
- %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+ %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
ret i1 false
}
diff --git a/test/CodeGen/Generic/annotate.ll b/test/CodeGen/Generic/annotate.ll
index c617eb09258c..8dcf67edfb7d 100644
--- a/test/CodeGen/Generic/annotate.ll
+++ b/test/CodeGen/Generic/annotate.ll
@@ -8,7 +8,7 @@
define i32 @foo(i32 %a) {
entry:
- %0 = call i32 @llvm.annotation.i32(i32 %a, i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i32 0, i32 0), i32 2)
+ %0 = call i32 @llvm.annotation.i32(i32 %a, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i32 0, i32 0), i32 2)
ret i32 %0
}
diff --git a/test/CodeGen/Generic/badFoldGEP.ll b/test/CodeGen/Generic/badFoldGEP.ll
index 2d4474bdf930..8150390d7752 100644
--- a/test/CodeGen/Generic/badFoldGEP.ll
+++ b/test/CodeGen/Generic/badFoldGEP.ll
@@ -19,9 +19,9 @@ define i32 @main(i32 %argc, i8** %argv) {
bb0:
call void @opaque( [497 x %Domain]* @domain_array )
%cann-indvar-idxcast = sext i32 %argc to i64 ; <i64> [#uses=1]
- %reg841 = getelementptr [497 x %Domain]* @domain_array, i64 0, i64 %cann-indvar-idxcast, i32 3 ; <i32*> [#uses=1]
- %reg846 = getelementptr i32* %reg841, i64 1 ; <i32*> [#uses=1]
- %reg820 = load i32* %reg846 ; <i32> [#uses=1]
+ %reg841 = getelementptr [497 x %Domain], [497 x %Domain]* @domain_array, i64 0, i64 %cann-indvar-idxcast, i32 3 ; <i32*> [#uses=1]
+ %reg846 = getelementptr i32, i32* %reg841, i64 1 ; <i32*> [#uses=1]
+ %reg820 = load i32, i32* %reg846 ; <i32> [#uses=1]
ret i32 %reg820
}
diff --git a/test/CodeGen/Generic/badarg6.ll b/test/CodeGen/Generic/badarg6.ll
index d6e5ac5791e4..34736ec0b5f5 100644
--- a/test/CodeGen/Generic/badarg6.ll
+++ b/test/CodeGen/Generic/badarg6.ll
@@ -27,6 +27,6 @@ bb43: ; preds = %bb42, %bb25
%reg323 = phi double [ -1.000000e+00, %bb25 ], [ %reg317, %bb42 ] ; <double> [#uses=1]
%reg324 = phi double [ -1.000000e+00, %bb25 ], [ %reg318, %bb42 ] ; <double> [#uses=1]
%reg325 = phi double [ 1.000000e+00, %bb25 ], [ %reg319, %bb42 ] ; <double> [#uses=1]
- %reg609 = call i32 (i8*, ...)* @printf( i8* getelementptr ([44 x i8]* @.LC12, i64 0, i64 0), double %reg325, double %reg324, double %reg323, double %reg322, double %reg321 ) ; <i32> [#uses=0]
+ %reg609 = call i32 (i8*, ...) @printf( i8* getelementptr ([44 x i8], [44 x i8]* @.LC12, i64 0, i64 0), double %reg325, double %reg324, double %reg323, double %reg322, double %reg321 ) ; <i32> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/Generic/builtin-expect.ll b/test/CodeGen/Generic/builtin-expect.ll
index e8cd07badf0c..def687ed183f 100644
--- a/test/CodeGen/Generic/builtin-expect.ll
+++ b/test/CodeGen/Generic/builtin-expect.ll
@@ -5,7 +5,7 @@ entry:
%retval = alloca i32, align 4
%x.addr = alloca i32, align 4
store i32 %x, i32* %x.addr, align 4
- %tmp = load i32* %x.addr, align 4
+ %tmp = load i32, i32* %x.addr, align 4
%cmp = icmp sgt i32 %tmp, 1
%conv = zext i1 %cmp to i32
%conv1 = sext i32 %conv to i64
@@ -14,7 +14,7 @@ entry:
br i1 %tobool, label %if.then, label %if.end
if.then: ; preds = %entry
- %call = call i32 (...)* @f()
+ %call = call i32 (...) @f()
store i32 %call, i32* %retval
br label %return
@@ -23,7 +23,7 @@ if.end: ; preds = %entry
br label %return
return: ; preds = %if.end, %if.then
- %0 = load i32* %retval
+ %0 = load i32, i32* %retval
ret i32 %0
}
@@ -36,14 +36,14 @@ entry:
%retval = alloca i32, align 4
%x.addr = alloca i32, align 4
store i32 %x, i32* %x.addr, align 4
- %tmp = load i32* %x.addr, align 4
+ %tmp = load i32, i32* %x.addr, align 4
%conv = sext i32 %tmp to i64
%expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
%tobool = icmp ne i64 %expval, 0
br i1 %tobool, label %if.then, label %if.end
if.then: ; preds = %entry
- %call = call i32 (...)* @f()
+ %call = call i32 (...) @f()
store i32 %call, i32* %retval
br label %return
@@ -52,7 +52,7 @@ if.end: ; preds = %entry
br label %return
return: ; preds = %if.end, %if.then
- %0 = load i32* %retval
+ %0 = load i32, i32* %retval
ret i32 %0
}
@@ -61,7 +61,7 @@ entry:
%retval = alloca i32, align 4
%x.addr = alloca i32, align 4
store i32 %x, i32* %x.addr, align 4
- %tmp = load i32* %x.addr, align 4
+ %tmp = load i32, i32* %x.addr, align 4
%tobool = icmp ne i32 %tmp, 0
%lnot = xor i1 %tobool, true
%lnot.ext = zext i1 %lnot to i32
@@ -71,7 +71,7 @@ entry:
br i1 %tobool1, label %if.then, label %if.end
if.then: ; preds = %entry
- %call = call i32 (...)* @f()
+ %call = call i32 (...) @f()
store i32 %call, i32* %retval
br label %return
@@ -80,7 +80,7 @@ if.end: ; preds = %entry
br label %return
return: ; preds = %if.end, %if.then
- %0 = load i32* %retval
+ %0 = load i32, i32* %retval
ret i32 %0
}
@@ -89,7 +89,7 @@ entry:
%retval = alloca i32, align 4
%x.addr = alloca i32, align 4
store i32 %x, i32* %x.addr, align 4
- %tmp = load i32* %x.addr, align 4
+ %tmp = load i32, i32* %x.addr, align 4
%tobool = icmp ne i32 %tmp, 0
%lnot = xor i1 %tobool, true
%lnot1 = xor i1 %lnot, true
@@ -100,7 +100,7 @@ entry:
br i1 %tobool2, label %if.then, label %if.end
if.then: ; preds = %entry
- %call = call i32 (...)* @f()
+ %call = call i32 (...) @f()
store i32 %call, i32* %retval
br label %return
@@ -109,7 +109,7 @@ if.end: ; preds = %entry
br label %return
return: ; preds = %if.end, %if.then
- %0 = load i32* %retval
+ %0 = load i32, i32* %retval
ret i32 %0
}
@@ -118,7 +118,7 @@ entry:
%retval = alloca i32, align 4
%x.addr = alloca i32, align 4
store i32 %x, i32* %x.addr, align 4
- %tmp = load i32* %x.addr, align 4
+ %tmp = load i32, i32* %x.addr, align 4
%cmp = icmp slt i32 %tmp, 0
%conv = zext i1 %cmp to i32
%conv1 = sext i32 %conv to i64
@@ -127,7 +127,7 @@ entry:
br i1 %tobool, label %if.then, label %if.end
if.then: ; preds = %entry
- %call = call i32 (...)* @f()
+ %call = call i32 (...) @f()
store i32 %call, i32* %retval
br label %return
@@ -136,7 +136,7 @@ if.end: ; preds = %entry
br label %return
return: ; preds = %if.end, %if.then
- %0 = load i32* %retval
+ %0 = load i32, i32* %retval
ret i32 %0
}
@@ -145,7 +145,7 @@ entry:
%retval = alloca i32, align 4
%x.addr = alloca i32, align 4
store i32 %x, i32* %x.addr, align 4
- %tmp = load i32* %x.addr, align 4
+ %tmp = load i32, i32* %x.addr, align 4
%conv = sext i32 %tmp to i64
%expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
switch i64 %expval, label %sw.epilog [
@@ -162,7 +162,7 @@ sw.epilog: ; preds = %entry
br label %return
return: ; preds = %sw.epilog, %sw.bb
- %0 = load i32* %retval
+ %0 = load i32, i32* %retval
ret i32 %0
}
@@ -171,7 +171,7 @@ entry:
%retval = alloca i32, align 4
%x.addr = alloca i32, align 4
store i32 %x, i32* %x.addr, align 4
- %tmp = load i32* %x.addr, align 4
+ %tmp = load i32, i32* %x.addr, align 4
%conv = sext i32 %tmp to i64
%expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
switch i64 %expval, label %sw.epilog [
@@ -180,7 +180,7 @@ entry:
]
sw.bb: ; preds = %entry, %entry
- %tmp1 = load i32* %x.addr, align 4
+ %tmp1 = load i32, i32* %x.addr, align 4
store i32 %tmp1, i32* %retval
br label %return
@@ -189,7 +189,7 @@ sw.epilog: ; preds = %entry
br label %return
return: ; preds = %sw.epilog, %sw.bb
- %0 = load i32* %retval
+ %0 = load i32, i32* %retval
ret i32 %0
}
@@ -198,7 +198,7 @@ entry:
%retval = alloca i32, align 4
%x.addr = alloca i32, align 4
store i32 %x, i32* %x.addr, align 4
- %tmp = load i32* %x.addr, align 4
+ %tmp = load i32, i32* %x.addr, align 4
%cmp = icmp sgt i32 %tmp, 1
%conv = zext i1 %cmp to i32
%expval = call i32 @llvm.expect.i32(i32 %conv, i32 1)
@@ -206,7 +206,7 @@ entry:
br i1 %tobool, label %if.then, label %if.end
if.then: ; preds = %entry
- %call = call i32 (...)* @f()
+ %call = call i32 (...) @f()
store i32 %call, i32* %retval
br label %return
@@ -215,7 +215,7 @@ if.end: ; preds = %entry
br label %return
return: ; preds = %if.end, %if.then
- %0 = load i32* %retval
+ %0 = load i32, i32* %retval
ret i32 %0
}
diff --git a/test/CodeGen/Generic/cast-fp.ll b/test/CodeGen/Generic/cast-fp.ll
index 590b7ceee4bf..a2611f55dbdf 100644
--- a/test/CodeGen/Generic/cast-fp.ll
+++ b/test/CodeGen/Generic/cast-fp.ll
@@ -10,24 +10,24 @@
declare i32 @printf(i8*, ...)
define i32 @main() {
- %a = load double* @A ; <double> [#uses=4]
- %a_fs = getelementptr [8 x i8]* @a_fstr, i64 0, i64 0 ; <i8*> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %a_fs, double %a ) ; <i32>:1 [#uses=0]
+ %a = load double, double* @A ; <double> [#uses=4]
+ %a_fs = getelementptr [8 x i8], [8 x i8]* @a_fstr, i64 0, i64 0 ; <i8*> [#uses=1]
+ call i32 (i8*, ...) @printf( i8* %a_fs, double %a ) ; <i32>:1 [#uses=0]
%a_d2l = fptosi double %a to i64 ; <i64> [#uses=1]
- %a_ls = getelementptr [10 x i8]* @a_lstr, i64 0, i64 0 ; <i8*> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %a_ls, i64 %a_d2l ) ; <i32>:2 [#uses=0]
+ %a_ls = getelementptr [10 x i8], [10 x i8]* @a_lstr, i64 0, i64 0 ; <i8*> [#uses=1]
+ call i32 (i8*, ...) @printf( i8* %a_ls, i64 %a_d2l ) ; <i32>:2 [#uses=0]
%a_d2i = fptosi double %a to i32 ; <i32> [#uses=2]
- %a_ds = getelementptr [8 x i8]* @a_dstr, i64 0, i64 0 ; <i8*> [#uses=3]
- call i32 (i8*, ...)* @printf( i8* %a_ds, i32 %a_d2i ) ; <i32>:3 [#uses=0]
+ %a_ds = getelementptr [8 x i8], [8 x i8]* @a_dstr, i64 0, i64 0 ; <i8*> [#uses=3]
+ call i32 (i8*, ...) @printf( i8* %a_ds, i32 %a_d2i ) ; <i32>:3 [#uses=0]
%a_d2sb = fptosi double %a to i8 ; <i8> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %a_ds, i8 %a_d2sb ) ; <i32>:4 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_ds, i8 %a_d2sb ) ; <i32>:4 [#uses=0]
%a_d2i2sb = trunc i32 %a_d2i to i8 ; <i8> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %a_ds, i8 %a_d2i2sb ) ; <i32>:5 [#uses=0]
- %b = load i32* @B ; <i32> [#uses=2]
- %b_ds = getelementptr [8 x i8]* @b_dstr, i64 0, i64 0 ; <i8*> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %b_ds, i32 %b ) ; <i32>:6 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_ds, i8 %a_d2i2sb ) ; <i32>:5 [#uses=0]
+ %b = load i32, i32* @B ; <i32> [#uses=2]
+ %b_ds = getelementptr [8 x i8], [8 x i8]* @b_dstr, i64 0, i64 0 ; <i8*> [#uses=1]
+ call i32 (i8*, ...) @printf( i8* %b_ds, i32 %b ) ; <i32>:6 [#uses=0]
%b_i2d = sitofp i32 %b to double ; <double> [#uses=1]
- %b_fs = getelementptr [8 x i8]* @b_fstr, i64 0, i64 0 ; <i8*> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %b_fs, double %b_i2d ) ; <i32>:7 [#uses=0]
+ %b_fs = getelementptr [8 x i8], [8 x i8]* @b_fstr, i64 0, i64 0 ; <i8*> [#uses=1]
+ call i32 (i8*, ...) @printf( i8* %b_fs, double %b_i2d ) ; <i32>:7 [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/Generic/constindices.ll b/test/CodeGen/Generic/constindices.ll
index 7deb30f43d12..837836fb29ca 100644
--- a/test/CodeGen/Generic/constindices.ll
+++ b/test/CodeGen/Generic/constindices.ll
@@ -14,31 +14,31 @@ define i32 @main() {
%ScalarB = alloca %MixedB ; <%MixedB*> [#uses=1]
%ArrayA = alloca %MixedA, i32 4 ; <%MixedA*> [#uses=3]
%ArrayB = alloca %MixedB, i32 3 ; <%MixedB*> [#uses=2]
- %I1 = getelementptr %MixedA* %ScalarA, i64 0, i32 0 ; <float*> [#uses=2]
+ %I1 = getelementptr %MixedA, %MixedA* %ScalarA, i64 0, i32 0 ; <float*> [#uses=2]
store float 0x3FF6A09020000000, float* %I1
- %I2 = getelementptr %MixedB* %ScalarB, i64 0, i32 1, i32 0 ; <float*> [#uses=2]
+ %I2 = getelementptr %MixedB, %MixedB* %ScalarB, i64 0, i32 1, i32 0 ; <float*> [#uses=2]
store float 0x4005BF1420000000, float* %I2
- %fptrA = getelementptr %MixedA* %ArrayA, i64 1, i32 0 ; <float*> [#uses=1]
- %fptrB = getelementptr %MixedB* %ArrayB, i64 2, i32 1, i32 0 ; <float*> [#uses=1]
+ %fptrA = getelementptr %MixedA, %MixedA* %ArrayA, i64 1, i32 0 ; <float*> [#uses=1]
+ %fptrB = getelementptr %MixedB, %MixedB* %ArrayB, i64 2, i32 1, i32 0 ; <float*> [#uses=1]
store float 0x400921CAC0000000, float* %fptrA
store float 5.000000e+00, float* %fptrB
;; Test that a sequence of GEPs with constant indices are folded right
- %fptrA1 = getelementptr %MixedA* %ArrayA, i64 3 ; <%MixedA*> [#uses=1]
- %fptrA2 = getelementptr %MixedA* %fptrA1, i64 0, i32 1 ; <[15 x i32]*> [#uses=1]
- %fptrA3 = getelementptr [15 x i32]* %fptrA2, i64 0, i64 8 ; <i32*> [#uses=1]
+ %fptrA1 = getelementptr %MixedA, %MixedA* %ArrayA, i64 3 ; <%MixedA*> [#uses=1]
+ %fptrA2 = getelementptr %MixedA, %MixedA* %fptrA1, i64 0, i32 1 ; <[15 x i32]*> [#uses=1]
+ %fptrA3 = getelementptr [15 x i32], [15 x i32]* %fptrA2, i64 0, i64 8 ; <i32*> [#uses=1]
store i32 5, i32* %fptrA3
- %sqrtTwo = load float* %I1 ; <float> [#uses=1]
- %exp = load float* %I2 ; <float> [#uses=1]
- %I3 = getelementptr %MixedA* %ArrayA, i64 1, i32 0 ; <float*> [#uses=1]
- %pi = load float* %I3 ; <float> [#uses=1]
- %I4 = getelementptr %MixedB* %ArrayB, i64 2, i32 1, i32 0 ; <float*> [#uses=1]
- %five = load float* %I4 ; <float> [#uses=1]
+ %sqrtTwo = load float, float* %I1 ; <float> [#uses=1]
+ %exp = load float, float* %I2 ; <float> [#uses=1]
+ %I3 = getelementptr %MixedA, %MixedA* %ArrayA, i64 1, i32 0 ; <float*> [#uses=1]
+ %pi = load float, float* %I3 ; <float> [#uses=1]
+ %I4 = getelementptr %MixedB, %MixedB* %ArrayB, i64 2, i32 1, i32 0 ; <float*> [#uses=1]
+ %five = load float, float* %I4 ; <float> [#uses=1]
%dsqrtTwo = fpext float %sqrtTwo to double ; <double> [#uses=1]
%dexp = fpext float %exp to double ; <double> [#uses=1]
%dpi = fpext float %pi to double ; <double> [#uses=1]
%dfive = fpext float %five to double ; <double> [#uses=1]
- %castFmt = getelementptr [44 x i8]* @fmtArg, i64 0, i64 0 ; <i8*> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %castFmt, double %dsqrtTwo, double %dexp, double %dpi, double %dfive ) ; <i32>:1 [#uses=0]
+ %castFmt = getelementptr [44 x i8], [44 x i8]* @fmtArg, i64 0, i64 0 ; <i8*> [#uses=1]
+ call i32 (i8*, ...) @printf( i8* %castFmt, double %dsqrtTwo, double %dexp, double %dpi, double %dfive ) ; <i32>:1 [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/Generic/crash.ll b/test/CodeGen/Generic/crash.ll
index 8de6b0d4bd31..81de762ba8c0 100644
--- a/test/CodeGen/Generic/crash.ll
+++ b/test/CodeGen/Generic/crash.ll
@@ -4,7 +4,7 @@
%struct.AVCodecTag = type {}
@ff_codec_bmp_tags = external global [0 x %struct.AVCodecTag]
@tags = global [1 x %struct.AVCodecTag*] [%struct.AVCodecTag* getelementptr
-inbounds ([0 x %struct.AVCodecTag]* @ff_codec_bmp_tags, i32 0, i32 0)]
+inbounds ([0 x %struct.AVCodecTag], [0 x %struct.AVCodecTag]* @ff_codec_bmp_tags, i32 0, i32 0)]
; rdar://8878965
@@ -13,15 +13,15 @@ inbounds ([0 x %struct.AVCodecTag]* @ff_codec_bmp_tags, i32 0, i32 0)]
define void @Parse_Camera(%struct.CAMERA** nocapture %Camera_Ptr) nounwind {
entry:
-%.pre = load %struct.CAMERA** %Camera_Ptr, align 4
-%0 = getelementptr inbounds %struct.CAMERA* %.pre, i32 0, i32 1, i32 0
-%1 = getelementptr inbounds %struct.CAMERA* %.pre, i32 0, i32 1, i32 2
+%.pre = load %struct.CAMERA*, %struct.CAMERA** %Camera_Ptr, align 4
+%0 = getelementptr inbounds %struct.CAMERA, %struct.CAMERA* %.pre, i32 0, i32 1, i32 0
+%1 = getelementptr inbounds %struct.CAMERA, %struct.CAMERA* %.pre, i32 0, i32 1, i32 2
br label %bb32
bb32: ; preds = %bb6
-%2 = load double* %0, align 4
-%3 = load double* %1, align 4
-%4 = load double* %0, align 4
+%2 = load double, double* %0, align 4
+%3 = load double, double* %1, align 4
+%4 = load double, double* %0, align 4
call void @Parse_Vector(double* %0) nounwind
%5 = call i32 @llvm.objectsize.i32.p0i8(i8* undef, i1 false)
%6 = icmp eq i32 %5, -1
@@ -50,14 +50,14 @@ for.body.i: ; preds = %for.body.i, %entry
br i1 undef, label %func_74.exit.for.cond29.thread_crit_edge, label %for.body.i
func_74.exit.for.cond29.thread_crit_edge: ; preds = %for.body.i
- %f13576.pre = getelementptr inbounds %struct.S0* undef, i64 0, i32 1
+ %f13576.pre = getelementptr inbounds %struct.S0, %struct.S0* undef, i64 0, i32 1
store i8 0, i8* %f13576.pre, align 4
br label %lbl_468
lbl_468: ; preds = %lbl_468, %func_74.exit.for.cond29.thread_crit_edge
%f13577.ph = phi i8* [ %f13576.pre, %func_74.exit.for.cond29.thread_crit_edge ], [ %f135.pre, %lbl_468 ]
store i8 1, i8* %f13577.ph, align 1
- %f135.pre = getelementptr inbounds %struct.S0* undef, i64 0, i32 1
+ %f135.pre = getelementptr inbounds %struct.S0, %struct.S0* undef, i64 0, i32 1
br i1 undef, label %lbl_468, label %for.end74
for.end74: ; preds = %lbl_468
diff --git a/test/CodeGen/Generic/dag-combine-crash.ll b/test/CodeGen/Generic/dag-combine-crash.ll
index a7810b5c05e2..45abd1d92edc 100644
--- a/test/CodeGen/Generic/dag-combine-crash.ll
+++ b/test/CodeGen/Generic/dag-combine-crash.ll
@@ -5,7 +5,7 @@ if.end:
br label %block.i.i
block.i.i:
- %tmpbb = load i8* undef
+ %tmpbb = load i8, i8* undef
%tmp54 = zext i8 %tmpbb to i64
%tmp59 = and i64 %tmp54, 8
%tmp60 = add i64 %tmp59, 3691045929300498764
diff --git a/test/CodeGen/Generic/dbg_value.ll b/test/CodeGen/Generic/dbg_value.ll
index ed7bdbad1822..4038086cbb4e 100644
--- a/test/CodeGen/Generic/dbg_value.ll
+++ b/test/CodeGen/Generic/dbg_value.ll
@@ -4,11 +4,11 @@
%0 = type { i32, i32 }
define void @t(%0*, i32, i32, i32, i32) nounwind {
- tail call void @llvm.dbg.value(metadata %0* %0, i64 0, metadata !0, metadata !{!"0x102"})
+ tail call void @llvm.dbg.value(metadata %0* %0, i64 0, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !DISubprogram())
unreachable
}
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
; !0 should conform to the format of DIVariable.
-!0 = !{!"0x101\00a\000\000", null, null, null} ; [ DW_TAG_arg_variable ]
+!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", arg: 0, scope: !DISubprogram())
diff --git a/test/CodeGen/Generic/empty-load-store.ll b/test/CodeGen/Generic/empty-load-store.ll
index bca73054447c..32ece8b9f07b 100644
--- a/test/CodeGen/Generic/empty-load-store.ll
+++ b/test/CodeGen/Generic/empty-load-store.ll
@@ -8,11 +8,11 @@ entry:
%retval = alloca i32
store i32 0, i32* %retval
%local_foo = alloca { }
- load { }* @current_foo
+ load { }, { }* @current_foo
store { } %0, { }* %local_foo
br label %return
return:
- load i32* %retval
+ load i32, i32* %retval
ret i32 %1
}
diff --git a/test/CodeGen/Generic/empty-phi.ll b/test/CodeGen/Generic/empty-phi.ll
index 8d5f3b96941c..f9191b95f42e 100644
--- a/test/CodeGen/Generic/empty-phi.ll
+++ b/test/CodeGen/Generic/empty-phi.ll
@@ -11,7 +11,7 @@ bb1:
br i1 %1, label %bb2, label %bb3
bb2:
- %load = load [0 x { i8*, i64, i64 }]* undef, align 8
+ %load = load [0 x { i8*, i64, i64 }], [0 x { i8*, i64, i64 }]* undef, align 8
br label %bb1
bb3:
diff --git a/test/CodeGen/Generic/fastcall.ll b/test/CodeGen/Generic/fastcall.ll
index 35e04f1863a3..b78bb1940c2b 100644
--- a/test/CodeGen/Generic/fastcall.ll
+++ b/test/CodeGen/Generic/fastcall.ll
@@ -7,7 +7,7 @@
define fastcc void @gcov_read_words(i32 %words) {
entry:
- store i32 %words, i32* getelementptr (%struct.__gcov_var*
+ store i32 %words, i32* getelementptr (%struct.__gcov_var, %struct.__gcov_var*
@__gcov_var,
i32 0, i32 0)
ret void
diff --git a/test/CodeGen/Generic/fp-to-int-invalid.ll b/test/CodeGen/Generic/fp-to-int-invalid.ll
index cdcc3a277b6e..6a37660d68dc 100644
--- a/test/CodeGen/Generic/fp-to-int-invalid.ll
+++ b/test/CodeGen/Generic/fp-to-int-invalid.ll
@@ -7,9 +7,9 @@ entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store i8* %result, i8** %result_addr
store float 0x40B2AFA160000000, float* %test, align 4
- %0 = load float* %test, align 4 ; <float> [#uses=1]
+ %0 = load float, float* %test, align 4 ; <float> [#uses=1]
%1 = fptosi float %0 to i8 ; <i8> [#uses=1]
- %2 = load i8** %result_addr, align 4 ; <i8*> [#uses=1]
+ %2 = load i8*, i8** %result_addr, align 4 ; <i8*> [#uses=1]
store i8 %1, i8* %2, align 1
br label %return
diff --git a/test/CodeGen/Generic/fwdtwice.ll b/test/CodeGen/Generic/fwdtwice.ll
index 6b38f04673de..0c0bfad7d4c7 100644
--- a/test/CodeGen/Generic/fwdtwice.ll
+++ b/test/CodeGen/Generic/fwdtwice.ll
@@ -18,7 +18,7 @@ define i32 @SumArray(i32 %Num) {
Top: ; preds = %Top, %0
store i32 %Num, i32* %Num.upgrd.1
- %reg108 = load i32* %Num.upgrd.1 ; <i32> [#uses=1]
+ %reg108 = load i32, i32* %Num.upgrd.1 ; <i32> [#uses=1]
%cast1006 = bitcast i32 %reg108 to i32 ; <i32> [#uses=1]
%cond1001 = icmp ule i32 %cast1006, 0 ; <i1> [#uses=1]
br i1 %cond1001, label %bb6, label %Top
diff --git a/test/CodeGen/Generic/global-ret0.ll b/test/CodeGen/Generic/global-ret0.ll
index 74bff876f882..a2a24b06127b 100644
--- a/test/CodeGen/Generic/global-ret0.ll
+++ b/test/CodeGen/Generic/global-ret0.ll
@@ -3,6 +3,6 @@
@g = global i32 0 ; <i32*> [#uses=1]
define i32 @main() {
- %h = load i32* @g ; <i32> [#uses=1]
+ %h = load i32, i32* @g ; <i32> [#uses=1]
ret i32 %h
}
diff --git a/test/CodeGen/Generic/hello.ll b/test/CodeGen/Generic/hello.ll
index 705945cf1983..a8147da744d6 100644
--- a/test/CodeGen/Generic/hello.ll
+++ b/test/CodeGen/Generic/hello.ll
@@ -5,7 +5,7 @@
declare i32 @printf(i8*, ...)
define i32 @main() {
- %s = getelementptr [7 x i8]* @.str_1, i64 0, i64 0 ; <i8*> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %s ) ; <i32>:1 [#uses=0]
+ %s = getelementptr [7 x i8], [7 x i8]* @.str_1, i64 0, i64 0 ; <i8*> [#uses=1]
+ call i32 (i8*, ...) @printf( i8* %s ) ; <i32>:1 [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/Generic/icmp-illegal.ll b/test/CodeGen/Generic/icmp-illegal.ll
new file mode 100644
index 000000000000..23d20c04652f
--- /dev/null
+++ b/test/CodeGen/Generic/icmp-illegal.ll
@@ -0,0 +1,50 @@
+
+; RUN: llc < %s | FileCheck %s
+
+; CHECK-LABEL: test_ult
+define i1 @test_ult(i256 %a) nounwind {
+ %1 = icmp ult i256 %a, -6432394258550908438
+ ret i1 %1
+}
+
+; CHECK-LABEL: test_ule
+define i1 @test_ule(i256 %a) nounwind {
+ %1 = icmp ule i256 %a, -6432394258550908438
+ ret i1 %1
+}
+
+; CHECK-LABEL: test_ugt
+define i1 @test_ugt(i256 %a) nounwind {
+ %1 = icmp ugt i256 %a, -6432394258550908438
+ ret i1 %1
+}
+
+; CHECK-LABEL: test_uge
+define i1 @test_uge(i256 %a) nounwind {
+ %1 = icmp uge i256 %a, -6432394258550908438
+ ret i1 %1
+}
+
+; CHECK-LABEL: test_slt
+define i1 @test_slt(i256 %a) nounwind {
+ %1 = icmp slt i256 %a, -6432394258550908438
+ ret i1 %1
+}
+
+; CHECK-LABEL: test_sle
+define i1 @test_sle(i256 %a) nounwind {
+ %1 = icmp sle i256 %a, -6432394258550908438
+ ret i1 %1
+}
+
+; CHECK-LABEL: test_sgt
+define i1 @test_sgt(i256 %a) nounwind {
+ %1 = icmp sgt i256 %a, -6432394258550908438
+ ret i1 %1
+}
+
+; CHECK-LABEL: test_sge
+define i1 @test_sge(i256 %a) nounwind {
+ %1 = icmp sge i256 %a, -6432394258550908438
+ ret i1 %1
+}
diff --git a/test/CodeGen/Generic/inline-asm-mem-clobber.ll b/test/CodeGen/Generic/inline-asm-mem-clobber.ll
index 5aa827a0ab88..be1e0a39b3b0 100644
--- a/test/CodeGen/Generic/inline-asm-mem-clobber.ll
+++ b/test/CodeGen/Generic/inline-asm-mem-clobber.ll
@@ -8,13 +8,13 @@ entry:
%rv = alloca i32, align 4
store i8* %p, i8** %p.addr, align 8
store i32 0, i32* @G, align 4
- %0 = load i8** %p.addr, align 8
+ %0 = load i8*, i8** %p.addr, align 8
; CHECK: blah
%1 = call i32 asm "blah", "=r,r,~{memory}"(i8* %0) nounwind
; CHECK: @G
store i32 %1, i32* %rv, align 4
- %2 = load i32* %rv, align 4
- %3 = load i32* @G, align 4
+ %2 = load i32, i32* %rv, align 4
+ %3 = load i32, i32* @G, align 4
%add = add nsw i32 %2, %3
ret i32 %add
}
diff --git a/test/CodeGen/Generic/invalid-memcpy.ll b/test/CodeGen/Generic/invalid-memcpy.ll
index 2dfa28bac482..d4252bc9d988 100644
--- a/test/CodeGen/Generic/invalid-memcpy.ll
+++ b/test/CodeGen/Generic/invalid-memcpy.ll
@@ -10,7 +10,7 @@ define void @Bork() {
entry:
%Qux = alloca [33 x i8]
%Qux1 = bitcast [33 x i8]* %Qux to i8*
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Qux1, i8* getelementptr inbounds ([33 x i8]* @C.0.1173, i32 0, i32 0), i64 33, i32 8, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Qux1, i8* getelementptr inbounds ([33 x i8], [33 x i8]* @C.0.1173, i32 0, i32 0), i64 33, i32 8, i1 false)
ret void
}
diff --git a/test/CodeGen/Generic/negintconst.ll b/test/CodeGen/Generic/negintconst.ll
index 67d775e16882..4c0a654a87c2 100644
--- a/test/CodeGen/Generic/negintconst.ll
+++ b/test/CodeGen/Generic/negintconst.ll
@@ -39,9 +39,9 @@ define i32 @main() {
%iscale = mul i32 %i, -1 ; <i32> [#uses=1]
%ioff = add i32 %iscale, 3 ; <i32> [#uses=2]
%ioff.upgrd.1 = zext i32 %ioff to i64 ; <i64> [#uses=1]
- %fptr = getelementptr %Results* %fval, i64 %ioff.upgrd.1 ; <%Results*> [#uses=1]
- %castFmt = getelementptr [39 x i8]* @fmtArg, i64 0, i64 0 ; <i8*> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %castFmt, i32 %ioff, %Results* %fval, %Results* %fptr ) ; <i32>:1 [#uses=0]
+ %fptr = getelementptr %Results, %Results* %fval, i64 %ioff.upgrd.1 ; <%Results*> [#uses=1]
+ %castFmt = getelementptr [39 x i8], [39 x i8]* @fmtArg, i64 0, i64 0 ; <i8*> [#uses=1]
+ call i32 (i8*, ...) @printf( i8* %castFmt, i32 %ioff, %Results* %fval, %Results* %fptr ) ; <i32>:1 [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/Generic/overloaded-intrinsic-name.ll b/test/CodeGen/Generic/overloaded-intrinsic-name.ll
new file mode 100644
index 000000000000..979bc772f75f
--- /dev/null
+++ b/test/CodeGen/Generic/overloaded-intrinsic-name.ll
@@ -0,0 +1,57 @@
+; RUN: opt -verify -S < %s
+
+; Tests the name mangling performed by the codepath following
+; getMangledTypeStr(). Only tests that code with the various manglings
+; run fine: doesn't actually test the mangling with the type of the
+; arguments. Meant to serve as an example-document on how the user
+; should do name manglings.
+
+; Exercise the most general case, llvm_anyptr_type, using gc.relocate
+; and gc.statepoint. Note that it has nothing to do with gc.*
+; functions specifically: any function that accepts llvm_anyptr_type
+; will serve the purpose.
+
+; function and integer
+define i32* @test_iAny(i32* %v) gc "statepoint-example" {
+ %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %v)
+ %v-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 7)
+ ret i32* %v-new
+}
+
+; float
+define float* @test_fAny(float* %v) gc "statepoint-example" {
+ %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, float* %v)
+ %v-new = call float* @llvm.experimental.gc.relocate.p0f32(i32 %tok, i32 7, i32 7)
+ ret float* %v-new
+}
+
+; array of integers
+define [3 x i32]* @test_aAny([3 x i32]* %v) gc "statepoint-example" {
+ %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %v)
+ %v-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok, i32 7, i32 7)
+ ret [3 x i32]* %v-new
+}
+
+; vector of integers
+define <3 x i32>* @test_vAny(<3 x i32>* %v) gc "statepoint-example" {
+ %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, <3 x i32>* %v)
+ %v-new = call <3 x i32>* @llvm.experimental.gc.relocate.p0v3i32(i32 %tok, i32 7, i32 7)
+ ret <3 x i32>* %v-new
+}
+
+%struct.test = type { i32, i1 }
+
+; struct
+define %struct.test* @test_struct(%struct.test* %v) gc "statepoint-example" {
+ %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, %struct.test* %v)
+ %v-new = call %struct.test* @llvm.experimental.gc.relocate.p0struct.test(i32 %tok, i32 7, i32 7)
+ ret %struct.test* %v-new
+}
+
+declare zeroext i1 @return_i1()
+declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i32* @llvm.experimental.gc.relocate.p0i32(i32, i32, i32)
+declare float* @llvm.experimental.gc.relocate.p0f32(i32, i32, i32)
+declare [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32, i32, i32)
+declare <3 x i32>* @llvm.experimental.gc.relocate.p0v3i32(i32, i32, i32)
+declare %struct.test* @llvm.experimental.gc.relocate.p0struct.test(i32, i32, i32)
diff --git a/test/CodeGen/Generic/pr2625.ll b/test/CodeGen/Generic/pr2625.ll
index 3e3dc4b2d2bc..c745603c08a2 100644
--- a/test/CodeGen/Generic/pr2625.ll
+++ b/test/CodeGen/Generic/pr2625.ll
@@ -7,11 +7,11 @@ entry:
store { i32, { i32 } }* %0, { i32, { i32 } }** %state
%retval = alloca i32 ; <i32*> [#uses=2]
store i32 0, i32* %retval
- load { i32, { i32 } }** %state ; <{ i32, { i32 } }*>:1 [#uses=1]
+ load { i32, { i32 } }*, { i32, { i32 } }** %state ; <{ i32, { i32 } }*>:1 [#uses=1]
store { i32, { i32 } } zeroinitializer, { i32, { i32 } }* %1
br label %return
return: ; preds = %entry
- load i32* %retval ; <i32>:2 [#uses=1]
+ load i32, i32* %retval ; <i32>:2 [#uses=1]
ret i32 %2
}
diff --git a/test/CodeGen/Generic/print-add.ll b/test/CodeGen/Generic/print-add.ll
index 95608dc60b50..0507aba317b5 100644
--- a/test/CodeGen/Generic/print-add.ll
+++ b/test/CodeGen/Generic/print-add.ll
@@ -5,14 +5,14 @@
declare i32 @printf(i8*, ...)
define i32 @main() {
- %f = getelementptr [4 x i8]* @.str_1, i64 0, i64 0 ; <i8*> [#uses=3]
+ %f = getelementptr [4 x i8], [4 x i8]* @.str_1, i64 0, i64 0 ; <i8*> [#uses=3]
%d = add i32 1, 0 ; <i32> [#uses=3]
- call i32 (i8*, ...)* @printf( i8* %f, i32 %d ) ; <i32>:1 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %f, i32 %d ) ; <i32>:1 [#uses=0]
%e = add i32 38, 2 ; <i32> [#uses=2]
- call i32 (i8*, ...)* @printf( i8* %f, i32 %e ) ; <i32>:2 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %f, i32 %e ) ; <i32>:2 [#uses=0]
%g = add i32 %d, %d ; <i32> [#uses=1]
%h = add i32 %e, %g ; <i32> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %f, i32 %h ) ; <i32>:3 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %f, i32 %h ) ; <i32>:3 [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/Generic/print-arith-fp.ll b/test/CodeGen/Generic/print-arith-fp.ll
index d129ff85870e..93b158e10343 100644
--- a/test/CodeGen/Generic/print-arith-fp.ll
+++ b/test/CodeGen/Generic/print-arith-fp.ll
@@ -18,44 +18,44 @@
declare i32 @printf(i8*, ...)
define i32 @main() {
- %a = load double* @A ; <double> [#uses=12]
- %b = load double* @B ; <double> [#uses=12]
- %a_s = getelementptr [8 x i8]* @a_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %b_s = getelementptr [8 x i8]* @b_str, i64 0, i64 0 ; <i8*> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %a_s, double %a ) ; <i32>:1 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %b_s, double %b ) ; <i32>:2 [#uses=0]
+ %a = load double, double* @A ; <double> [#uses=12]
+ %b = load double, double* @B ; <double> [#uses=12]
+ %a_s = getelementptr [8 x i8], [8 x i8]* @a_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %b_s = getelementptr [8 x i8], [8 x i8]* @b_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ call i32 (i8*, ...) @printf( i8* %a_s, double %a ) ; <i32>:1 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %b_s, double %b ) ; <i32>:2 [#uses=0]
%add_r = fadd double %a, %b ; <double> [#uses=1]
%sub_r = fsub double %a, %b ; <double> [#uses=1]
%mul_r = fmul double %a, %b ; <double> [#uses=1]
%div_r = fdiv double %b, %a ; <double> [#uses=1]
%rem_r = frem double %b, %a ; <double> [#uses=1]
- %add_s = getelementptr [12 x i8]* @add_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %sub_s = getelementptr [12 x i8]* @sub_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %mul_s = getelementptr [12 x i8]* @mul_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %div_s = getelementptr [12 x i8]* @div_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %rem_s = getelementptr [13 x i8]* @rem_str, i64 0, i64 0 ; <i8*> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %add_s, double %add_r ) ; <i32>:3 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %sub_s, double %sub_r ) ; <i32>:4 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %mul_s, double %mul_r ) ; <i32>:5 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %div_s, double %div_r ) ; <i32>:6 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %rem_s, double %rem_r ) ; <i32>:7 [#uses=0]
+ %add_s = getelementptr [12 x i8], [12 x i8]* @add_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %sub_s = getelementptr [12 x i8], [12 x i8]* @sub_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %mul_s = getelementptr [12 x i8], [12 x i8]* @mul_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %div_s = getelementptr [12 x i8], [12 x i8]* @div_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %rem_s = getelementptr [13 x i8], [13 x i8]* @rem_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ call i32 (i8*, ...) @printf( i8* %add_s, double %add_r ) ; <i32>:3 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %sub_s, double %sub_r ) ; <i32>:4 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %mul_s, double %mul_r ) ; <i32>:5 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %div_s, double %div_r ) ; <i32>:6 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %rem_s, double %rem_r ) ; <i32>:7 [#uses=0]
%lt_r = fcmp olt double %a, %b ; <i1> [#uses=1]
%le_r = fcmp ole double %a, %b ; <i1> [#uses=1]
%gt_r = fcmp ogt double %a, %b ; <i1> [#uses=1]
%ge_r = fcmp oge double %a, %b ; <i1> [#uses=1]
%eq_r = fcmp oeq double %a, %b ; <i1> [#uses=1]
%ne_r = fcmp une double %a, %b ; <i1> [#uses=1]
- %lt_s = getelementptr [12 x i8]* @lt_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %le_s = getelementptr [13 x i8]* @le_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %gt_s = getelementptr [12 x i8]* @gt_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %ge_s = getelementptr [13 x i8]* @ge_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %eq_s = getelementptr [13 x i8]* @eq_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %ne_s = getelementptr [13 x i8]* @ne_str, i64 0, i64 0 ; <i8*> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %lt_s, i1 %lt_r ) ; <i32>:8 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %le_s, i1 %le_r ) ; <i32>:9 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %gt_s, i1 %gt_r ) ; <i32>:10 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %ge_s, i1 %ge_r ) ; <i32>:11 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %eq_s, i1 %eq_r ) ; <i32>:12 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %ne_s, i1 %ne_r ) ; <i32>:13 [#uses=0]
+ %lt_s = getelementptr [12 x i8], [12 x i8]* @lt_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %le_s = getelementptr [13 x i8], [13 x i8]* @le_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %gt_s = getelementptr [12 x i8], [12 x i8]* @gt_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %ge_s = getelementptr [13 x i8], [13 x i8]* @ge_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %eq_s = getelementptr [13 x i8], [13 x i8]* @eq_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %ne_s = getelementptr [13 x i8], [13 x i8]* @ne_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ call i32 (i8*, ...) @printf( i8* %lt_s, i1 %lt_r ) ; <i32>:8 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %le_s, i1 %le_r ) ; <i32>:9 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %gt_s, i1 %gt_r ) ; <i32>:10 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %ge_s, i1 %ge_r ) ; <i32>:11 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %eq_s, i1 %eq_r ) ; <i32>:12 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %ne_s, i1 %ne_r ) ; <i32>:13 [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/Generic/print-arith-int.ll b/test/CodeGen/Generic/print-arith-int.ll
index ce938cf05b98..a5c519c0c7fc 100644
--- a/test/CodeGen/Generic/print-arith-int.ll
+++ b/test/CodeGen/Generic/print-arith-int.ll
@@ -23,45 +23,45 @@
declare i32 @printf(i8*, ...)
define i32 @main() {
- %a = load i32* @A ; <i32> [#uses=16]
- %b = load i32* @B ; <i32> [#uses=17]
- %a_s = getelementptr [8 x i8]* @a_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %b_s = getelementptr [8 x i8]* @b_str, i64 0, i64 0 ; <i8*> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %a_s, i32 %a ) ; <i32>:1 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %b_s, i32 %b ) ; <i32>:2 [#uses=0]
+ %a = load i32, i32* @A ; <i32> [#uses=16]
+ %b = load i32, i32* @B ; <i32> [#uses=17]
+ %a_s = getelementptr [8 x i8], [8 x i8]* @a_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %b_s = getelementptr [8 x i8], [8 x i8]* @b_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ call i32 (i8*, ...) @printf( i8* %a_s, i32 %a ) ; <i32>:1 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %b_s, i32 %b ) ; <i32>:2 [#uses=0]
%add_r = add i32 %a, %b ; <i32> [#uses=1]
%sub_r = sub i32 %a, %b ; <i32> [#uses=1]
%mul_r = mul i32 %a, %b ; <i32> [#uses=1]
%div_r = sdiv i32 %b, %a ; <i32> [#uses=1]
%rem_r = srem i32 %b, %a ; <i32> [#uses=1]
- %add_s = getelementptr [12 x i8]* @add_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %sub_s = getelementptr [12 x i8]* @sub_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %mul_s = getelementptr [12 x i8]* @mul_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %div_s = getelementptr [12 x i8]* @div_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %rem_s = getelementptr [13 x i8]* @rem_str, i64 0, i64 0 ; <i8*> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %add_s, i32 %add_r ) ; <i32>:3 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %sub_s, i32 %sub_r ) ; <i32>:4 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %mul_s, i32 %mul_r ) ; <i32>:5 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %div_s, i32 %div_r ) ; <i32>:6 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %rem_s, i32 %rem_r ) ; <i32>:7 [#uses=0]
+ %add_s = getelementptr [12 x i8], [12 x i8]* @add_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %sub_s = getelementptr [12 x i8], [12 x i8]* @sub_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %mul_s = getelementptr [12 x i8], [12 x i8]* @mul_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %div_s = getelementptr [12 x i8], [12 x i8]* @div_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %rem_s = getelementptr [13 x i8], [13 x i8]* @rem_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ call i32 (i8*, ...) @printf( i8* %add_s, i32 %add_r ) ; <i32>:3 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %sub_s, i32 %sub_r ) ; <i32>:4 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %mul_s, i32 %mul_r ) ; <i32>:5 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %div_s, i32 %div_r ) ; <i32>:6 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %rem_s, i32 %rem_r ) ; <i32>:7 [#uses=0]
%lt_r = icmp slt i32 %a, %b ; <i1> [#uses=1]
%le_r = icmp sle i32 %a, %b ; <i1> [#uses=1]
%gt_r = icmp sgt i32 %a, %b ; <i1> [#uses=1]
%ge_r = icmp sge i32 %a, %b ; <i1> [#uses=1]
%eq_r = icmp eq i32 %a, %b ; <i1> [#uses=1]
%ne_r = icmp ne i32 %a, %b ; <i1> [#uses=1]
- %lt_s = getelementptr [12 x i8]* @lt_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %le_s = getelementptr [13 x i8]* @le_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %gt_s = getelementptr [12 x i8]* @gt_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %ge_s = getelementptr [13 x i8]* @ge_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %eq_s = getelementptr [13 x i8]* @eq_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %ne_s = getelementptr [13 x i8]* @ne_str, i64 0, i64 0 ; <i8*> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %lt_s, i1 %lt_r ) ; <i32>:8 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %le_s, i1 %le_r ) ; <i32>:9 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %gt_s, i1 %gt_r ) ; <i32>:10 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %ge_s, i1 %ge_r ) ; <i32>:11 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %eq_s, i1 %eq_r ) ; <i32>:12 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %ne_s, i1 %ne_r ) ; <i32>:13 [#uses=0]
+ %lt_s = getelementptr [12 x i8], [12 x i8]* @lt_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %le_s = getelementptr [13 x i8], [13 x i8]* @le_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %gt_s = getelementptr [12 x i8], [12 x i8]* @gt_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %ge_s = getelementptr [13 x i8], [13 x i8]* @ge_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %eq_s = getelementptr [13 x i8], [13 x i8]* @eq_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %ne_s = getelementptr [13 x i8], [13 x i8]* @ne_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ call i32 (i8*, ...) @printf( i8* %lt_s, i1 %lt_r ) ; <i32>:8 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %le_s, i1 %le_r ) ; <i32>:9 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %gt_s, i1 %gt_r ) ; <i32>:10 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %ge_s, i1 %ge_r ) ; <i32>:11 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %eq_s, i1 %eq_r ) ; <i32>:12 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %ne_s, i1 %ne_r ) ; <i32>:13 [#uses=0]
%and_r = and i32 %a, %b ; <i32> [#uses=1]
%or_r = or i32 %a, %b ; <i32> [#uses=1]
%xor_r = xor i32 %a, %b ; <i32> [#uses=1]
@@ -70,15 +70,15 @@ define i32 @main() {
%shl_r = shl i32 %b, %shift.upgrd.1 ; <i32> [#uses=1]
%shift.upgrd.2 = zext i8 %u to i32 ; <i32> [#uses=1]
%shr_r = ashr i32 %b, %shift.upgrd.2 ; <i32> [#uses=1]
- %and_s = getelementptr [12 x i8]* @and_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %or_s = getelementptr [12 x i8]* @or_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %xor_s = getelementptr [12 x i8]* @xor_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %shl_s = getelementptr [13 x i8]* @shl_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %shr_s = getelementptr [13 x i8]* @shr_str, i64 0, i64 0 ; <i8*> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %and_s, i32 %and_r ) ; <i32>:14 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %or_s, i32 %or_r ) ; <i32>:15 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %xor_s, i32 %xor_r ) ; <i32>:16 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %shl_s, i32 %shl_r ) ; <i32>:17 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %shr_s, i32 %shr_r ) ; <i32>:18 [#uses=0]
+ %and_s = getelementptr [12 x i8], [12 x i8]* @and_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %or_s = getelementptr [12 x i8], [12 x i8]* @or_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %xor_s = getelementptr [12 x i8], [12 x i8]* @xor_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %shl_s = getelementptr [13 x i8], [13 x i8]* @shl_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %shr_s = getelementptr [13 x i8], [13 x i8]* @shr_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ call i32 (i8*, ...) @printf( i8* %and_s, i32 %and_r ) ; <i32>:14 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %or_s, i32 %or_r ) ; <i32>:15 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %xor_s, i32 %xor_r ) ; <i32>:16 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %shl_s, i32 %shl_r ) ; <i32>:17 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %shr_s, i32 %shr_r ) ; <i32>:18 [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/Generic/print-int.ll b/test/CodeGen/Generic/print-int.ll
index 7ca4b3de48a3..85b40c0e24f7 100644
--- a/test/CodeGen/Generic/print-int.ll
+++ b/test/CodeGen/Generic/print-int.ll
@@ -5,9 +5,9 @@
declare i32 @printf(i8*, ...)
define i32 @main() {
- %f = getelementptr [4 x i8]* @.str_1, i64 0, i64 0 ; <i8*> [#uses=1]
+ %f = getelementptr [4 x i8], [4 x i8]* @.str_1, i64 0, i64 0 ; <i8*> [#uses=1]
%d = add i32 0, 0 ; <i32> [#uses=1]
- %tmp.0 = call i32 (i8*, ...)* @printf( i8* %f, i32 %d ) ; <i32> [#uses=0]
+ %tmp.0 = call i32 (i8*, ...) @printf( i8* %f, i32 %d ) ; <i32> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/Generic/print-mul-exp.ll b/test/CodeGen/Generic/print-mul-exp.ll
index 90fc55b25838..91c8147aaad9 100644
--- a/test/CodeGen/Generic/print-mul-exp.ll
+++ b/test/CodeGen/Generic/print-mul-exp.ll
@@ -7,10 +7,10 @@
declare i32 @printf(i8*, ...)
define i32 @main() {
- %a = load i32* @A ; <i32> [#uses=21]
- %a_s = getelementptr [8 x i8]* @a_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %a_mul_s = getelementptr [13 x i8]* @a_mul_str, i64 0, i64 0 ; <i8*> [#uses=20]
- call i32 (i8*, ...)* @printf( i8* %a_s, i32 %a ) ; <i32>:1 [#uses=0]
+ %a = load i32, i32* @A ; <i32> [#uses=21]
+ %a_s = getelementptr [8 x i8], [8 x i8]* @a_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %a_mul_s = getelementptr [13 x i8], [13 x i8]* @a_mul_str, i64 0, i64 0 ; <i8*> [#uses=20]
+ call i32 (i8*, ...) @printf( i8* %a_s, i32 %a ) ; <i32>:1 [#uses=0]
%r_0 = mul i32 %a, 0 ; <i32> [#uses=1]
%r_1 = mul i32 %a, 1 ; <i32> [#uses=1]
%r_2 = mul i32 %a, 2 ; <i32> [#uses=1]
@@ -31,25 +31,25 @@ define i32 @main() {
%r_17 = mul i32 %a, 17 ; <i32> [#uses=1]
%r_18 = mul i32 %a, 18 ; <i32> [#uses=1]
%r_19 = mul i32 %a, 19 ; <i32> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 0, i32 %r_0 ) ; <i32>:2 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 1, i32 %r_1 ) ; <i32>:3 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 2, i32 %r_2 ) ; <i32>:4 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 3, i32 %r_3 ) ; <i32>:5 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 4, i32 %r_4 ) ; <i32>:6 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 5, i32 %r_5 ) ; <i32>:7 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 6, i32 %r_6 ) ; <i32>:8 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 7, i32 %r_7 ) ; <i32>:9 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 8, i32 %r_8 ) ; <i32>:10 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 9, i32 %r_9 ) ; <i32>:11 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 10, i32 %r_10 ) ; <i32>:12 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 11, i32 %r_11 ) ; <i32>:13 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 12, i32 %r_12 ) ; <i32>:14 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 13, i32 %r_13 ) ; <i32>:15 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 14, i32 %r_14 ) ; <i32>:16 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 15, i32 %r_15 ) ; <i32>:17 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 16, i32 %r_16 ) ; <i32>:18 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 17, i32 %r_17 ) ; <i32>:19 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 18, i32 %r_18 ) ; <i32>:20 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 19, i32 %r_19 ) ; <i32>:21 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 0, i32 %r_0 ) ; <i32>:2 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 1, i32 %r_1 ) ; <i32>:3 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 2, i32 %r_2 ) ; <i32>:4 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 3, i32 %r_3 ) ; <i32>:5 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 4, i32 %r_4 ) ; <i32>:6 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 5, i32 %r_5 ) ; <i32>:7 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 6, i32 %r_6 ) ; <i32>:8 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 7, i32 %r_7 ) ; <i32>:9 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 8, i32 %r_8 ) ; <i32>:10 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 9, i32 %r_9 ) ; <i32>:11 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 10, i32 %r_10 ) ; <i32>:12 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 11, i32 %r_11 ) ; <i32>:13 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 12, i32 %r_12 ) ; <i32>:14 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 13, i32 %r_13 ) ; <i32>:15 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 14, i32 %r_14 ) ; <i32>:16 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 15, i32 %r_15 ) ; <i32>:17 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 16, i32 %r_16 ) ; <i32>:18 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 17, i32 %r_17 ) ; <i32>:19 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 18, i32 %r_18 ) ; <i32>:20 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 19, i32 %r_19 ) ; <i32>:21 [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/Generic/print-mul.ll b/test/CodeGen/Generic/print-mul.ll
index 0707f3c2318c..4b60d759278a 100644
--- a/test/CodeGen/Generic/print-mul.ll
+++ b/test/CodeGen/Generic/print-mul.ll
@@ -10,19 +10,19 @@ declare i32 @printf(i8*, ...)
define i32 @main() {
entry:
- %a = load i32* @A ; <i32> [#uses=2]
- %b = load i32* @B ; <i32> [#uses=1]
- %a_s = getelementptr [8 x i8]* @a_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %b_s = getelementptr [8 x i8]* @b_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %a_mul_s = getelementptr [13 x i8]* @a_mul_str, i64 0, i64 0 ; <i8*> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %a_s, i32 %a ) ; <i32>:0 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %b_s, i32 %b ) ; <i32>:1 [#uses=0]
+ %a = load i32, i32* @A ; <i32> [#uses=2]
+ %b = load i32, i32* @B ; <i32> [#uses=1]
+ %a_s = getelementptr [8 x i8], [8 x i8]* @a_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %b_s = getelementptr [8 x i8], [8 x i8]* @b_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %a_mul_s = getelementptr [13 x i8], [13 x i8]* @a_mul_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ call i32 (i8*, ...) @printf( i8* %a_s, i32 %a ) ; <i32>:0 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %b_s, i32 %b ) ; <i32>:1 [#uses=0]
br label %shl_test
shl_test: ; preds = %shl_test, %entry
%s = phi i32 [ 0, %entry ], [ %s_inc, %shl_test ] ; <i32> [#uses=4]
%result = mul i32 %a, %s ; <i32> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 %s, i32 %result ) ; <i32>:2 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_mul_s, i32 %s, i32 %result ) ; <i32>:2 [#uses=0]
%s_inc = add i32 %s, 1 ; <i32> [#uses=1]
%done = icmp eq i32 %s, 256 ; <i1> [#uses=1]
br i1 %done, label %fini, label %shl_test
diff --git a/test/CodeGen/Generic/print-shift.ll b/test/CodeGen/Generic/print-shift.ll
index 6c5d222209be..56b3ec1df760 100644
--- a/test/CodeGen/Generic/print-shift.ll
+++ b/test/CodeGen/Generic/print-shift.ll
@@ -10,20 +10,20 @@ declare i32 @printf(i8*, ...)
define i32 @main() {
entry:
- %a = load i32* @A ; <i32> [#uses=2]
- %b = load i32* @B ; <i32> [#uses=1]
- %a_s = getelementptr [8 x i8]* @a_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %b_s = getelementptr [8 x i8]* @b_str, i64 0, i64 0 ; <i8*> [#uses=1]
- %a_shl_s = getelementptr [14 x i8]* @a_shl_str, i64 0, i64 0 ; <i8*> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %a_s, i32 %a ) ; <i32>:0 [#uses=0]
- call i32 (i8*, ...)* @printf( i8* %b_s, i32 %b ) ; <i32>:1 [#uses=0]
+ %a = load i32, i32* @A ; <i32> [#uses=2]
+ %b = load i32, i32* @B ; <i32> [#uses=1]
+ %a_s = getelementptr [8 x i8], [8 x i8]* @a_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %b_s = getelementptr [8 x i8], [8 x i8]* @b_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ %a_shl_s = getelementptr [14 x i8], [14 x i8]* @a_shl_str, i64 0, i64 0 ; <i8*> [#uses=1]
+ call i32 (i8*, ...) @printf( i8* %a_s, i32 %a ) ; <i32>:0 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %b_s, i32 %b ) ; <i32>:1 [#uses=0]
br label %shl_test
shl_test: ; preds = %shl_test, %entry
%s = phi i8 [ 0, %entry ], [ %s_inc, %shl_test ] ; <i8> [#uses=4]
%shift.upgrd.1 = zext i8 %s to i32 ; <i32> [#uses=1]
%result = shl i32 %a, %shift.upgrd.1 ; <i32> [#uses=1]
- call i32 (i8*, ...)* @printf( i8* %a_shl_s, i8 %s, i32 %result ) ; <i32>:2 [#uses=0]
+ call i32 (i8*, ...) @printf( i8* %a_shl_s, i8 %s, i32 %result ) ; <i32>:2 [#uses=0]
%s_inc = add i8 %s, 1 ; <i8> [#uses=1]
%done = icmp eq i8 %s, 32 ; <i1> [#uses=1]
br i1 %done, label %fini, label %shl_test
diff --git a/test/CodeGen/Generic/ptr-annotate.ll b/test/CodeGen/Generic/ptr-annotate.ll
index ac5bd5533e9e..4c10daa8223f 100644
--- a/test/CodeGen/Generic/ptr-annotate.ll
+++ b/test/CodeGen/Generic/ptr-annotate.ll
@@ -10,7 +10,7 @@
define void @foo() {
entry:
%m = alloca i8, align 4
- %0 = call i8* @llvm.ptr.annotation.p0i8(i8* %m, i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i32 0, i32 0), i32 2)
+ %0 = call i8* @llvm.ptr.annotation.p0i8(i8* %m, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i32 0, i32 0), i32 2)
store i8 1, i8* %0, align 4
ret void
}
diff --git a/test/CodeGen/Generic/select.ll b/test/CodeGen/Generic/select.ll
index c4841b79acb6..1958cd9d71cb 100644
--- a/test/CodeGen/Generic/select.ll
+++ b/test/CodeGen/Generic/select.ll
@@ -70,7 +70,7 @@ define i32* @castconst(float) {
%castsmall = trunc i64 1 to i32 ; <i32> [#uses=1]
%usebig = add i32 %castbig, %castsmall ; <i32> [#uses=0]
%castglob = bitcast i32* @AConst to i64* ; <i64*> [#uses=1]
- %dummyl = load i64* %castglob ; <i64> [#uses=0]
+ %dummyl = load i64, i64* %castglob ; <i64> [#uses=0]
%castnull = inttoptr i64 0 to i32* ; <i32*> [#uses=1]
ret i32* %castnull
}
@@ -155,7 +155,7 @@ bb2:
%cast116 = ptrtoint i32* %A to i64 ; <i64> [#uses=1]
%reg116 = add i64 %cast116, %cast115 ; <i64> [#uses=1]
%castPtr = inttoptr i64 %reg116 to i32* ; <i32*> [#uses=1]
- %reg118 = load i32* %castPtr ; <i32> [#uses=1]
+ %reg118 = load i32, i32* %castPtr ; <i32> [#uses=1]
%cast117 = sext i32 %reg118 to i64 ; <i64> [#uses=2]
%reg159 = add i64 1234567, %cast117 ; <i64> [#uses=0]
%reg160 = add i64 7654321, %cast117 ; <i64> [#uses=0]
@@ -180,8 +180,8 @@ define void @checkNot(i1 %b, i32 %i) {
; Test case for folding getelementptr into a load/store
;
define i32 @checkFoldGEP(%Domain* %D, i64 %idx) {
- %reg841 = getelementptr %Domain* %D, i64 0, i32 1 ; <i32*> [#uses=1]
- %reg820 = load i32* %reg841 ; <i32> [#uses=1]
+ %reg841 = getelementptr %Domain, %Domain* %D, i64 0, i32 1 ; <i32*> [#uses=1]
+ %reg820 = load i32, i32* %reg841 ; <i32> [#uses=1]
ret i32 %reg820
}
diff --git a/test/CodeGen/Generic/stop-after.ll b/test/CodeGen/Generic/stop-after.ll
index 5e0e350bc7fe..557e097840af 100644
--- a/test/CodeGen/Generic/stop-after.ll
+++ b/test/CodeGen/Generic/stop-after.ll
@@ -5,6 +5,6 @@
; STOP: Loop Strength Reduction
; STOP-NEXT: Machine Function Analysis
-; START: -machine-branch-prob -jump-instr-tables -gc-lowering
+; START: -machine-branch-prob -gc-lowering
; START: FunctionPass Manager
; START-NEXT: Lower Garbage Collection Instructions
diff --git a/test/CodeGen/Generic/undef-phi.ll b/test/CodeGen/Generic/undef-phi.ll
index 10899f9fa2db..03f3a6a2d1ba 100644
--- a/test/CodeGen/Generic/undef-phi.ll
+++ b/test/CodeGen/Generic/undef-phi.ll
@@ -13,14 +13,14 @@ entry:
for.body:
%stack.addr.02 = phi %struct.xx_stack* [ %0, %for.body ], [ %stack, %entry ]
- %next = getelementptr inbounds %struct.xx_stack* %stack.addr.02, i64 0, i32 1
- %0 = load %struct.xx_stack** %next, align 8
+ %next = getelementptr inbounds %struct.xx_stack, %struct.xx_stack* %stack.addr.02, i64 0, i32 1
+ %0 = load %struct.xx_stack*, %struct.xx_stack** %next, align 8
%tobool = icmp eq %struct.xx_stack* %0, null
br i1 %tobool, label %for.end, label %for.body
for.end:
%top.0.lcssa = phi %struct.xx_stack* [ undef, %entry ], [ %stack.addr.02, %for.body ]
- %first = getelementptr inbounds %struct.xx_stack* %top.0.lcssa, i64 0, i32 0
- %1 = load i32* %first, align 4
+ %first = getelementptr inbounds %struct.xx_stack, %struct.xx_stack* %top.0.lcssa, i64 0, i32 0
+ %1 = load i32, i32* %first, align 4
ret i32 %1
}
diff --git a/test/CodeGen/Generic/v-split.ll b/test/CodeGen/Generic/v-split.ll
index 634b5621aa99..00c62f389520 100644
--- a/test/CodeGen/Generic/v-split.ll
+++ b/test/CodeGen/Generic/v-split.ll
@@ -2,8 +2,8 @@
%f8 = type <8 x float>
define void @test_f8(%f8 *%P, %f8* %Q, %f8 *%S) {
- %p = load %f8* %P
- %q = load %f8* %Q
+ %p = load %f8, %f8* %P
+ %q = load %f8, %f8* %Q
%R = fadd %f8 %p, %q
store %f8 %R, %f8 *%S
ret void
diff --git a/test/CodeGen/Generic/vector-casts.ll b/test/CodeGen/Generic/vector-casts.ll
index a26918b8f242..fee72b6b6585 100644
--- a/test/CodeGen/Generic/vector-casts.ll
+++ b/test/CodeGen/Generic/vector-casts.ll
@@ -2,43 +2,43 @@
; PR2671
define void @a(<2 x double>* %p, <2 x i8>* %q) {
- %t = load <2 x double>* %p
+ %t = load <2 x double>, <2 x double>* %p
%r = fptosi <2 x double> %t to <2 x i8>
store <2 x i8> %r, <2 x i8>* %q
ret void
}
define void @b(<2 x double>* %p, <2 x i8>* %q) {
- %t = load <2 x double>* %p
+ %t = load <2 x double>, <2 x double>* %p
%r = fptoui <2 x double> %t to <2 x i8>
store <2 x i8> %r, <2 x i8>* %q
ret void
}
define void @c(<2 x i8>* %p, <2 x double>* %q) {
- %t = load <2 x i8>* %p
+ %t = load <2 x i8>, <2 x i8>* %p
%r = sitofp <2 x i8> %t to <2 x double>
store <2 x double> %r, <2 x double>* %q
ret void
}
define void @d(<2 x i8>* %p, <2 x double>* %q) {
- %t = load <2 x i8>* %p
+ %t = load <2 x i8>, <2 x i8>* %p
%r = uitofp <2 x i8> %t to <2 x double>
store <2 x double> %r, <2 x double>* %q
ret void
}
define void @e(<2 x i8>* %p, <2 x i16>* %q) {
- %t = load <2 x i8>* %p
+ %t = load <2 x i8>, <2 x i8>* %p
%r = sext <2 x i8> %t to <2 x i16>
store <2 x i16> %r, <2 x i16>* %q
ret void
}
define void @f(<2 x i8>* %p, <2 x i16>* %q) {
- %t = load <2 x i8>* %p
+ %t = load <2 x i8>, <2 x i8>* %p
%r = zext <2 x i8> %t to <2 x i16>
store <2 x i16> %r, <2 x i16>* %q
ret void
}
define void @g(<2 x i16>* %p, <2 x i8>* %q) {
- %t = load <2 x i16>* %p
+ %t = load <2 x i16>, <2 x i16>* %p
%r = trunc <2 x i16> %t to <2 x i8>
store <2 x i8> %r, <2 x i8>* %q
ret void
diff --git a/test/CodeGen/Generic/vector-identity-shuffle.ll b/test/CodeGen/Generic/vector-identity-shuffle.ll
index 332d6d8c2536..d933f222320d 100644
--- a/test/CodeGen/Generic/vector-identity-shuffle.ll
+++ b/test/CodeGen/Generic/vector-identity-shuffle.ll
@@ -2,7 +2,7 @@
define void @test(<4 x float>* %tmp2.i) {
- %tmp2.i.upgrd.1 = load <4 x float>* %tmp2.i ; <<4 x float>> [#uses=4]
+ %tmp2.i.upgrd.1 = load <4 x float>, <4 x float>* %tmp2.i ; <<4 x float>> [#uses=4]
%xFloat0.48 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 0 ; <float> [#uses=1]
%inFloat0.49 = insertelement <4 x float> undef, float %xFloat0.48, i32 0 ; <<4 x float>> [#uses=1]
%xFloat1.50 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 1 ; <float> [#uses=1]
diff --git a/test/CodeGen/Generic/vector.ll b/test/CodeGen/Generic/vector.ll
index bc7c7d00a11c..962b1295b5de 100644
--- a/test/CodeGen/Generic/vector.ll
+++ b/test/CodeGen/Generic/vector.ll
@@ -1,6 +1,5 @@
; Test that vectors are scalarized/lowered correctly.
; RUN: llc < %s
-; XFAIL: hexagon
%d8 = type <8 x double>
%f1 = type <1 x float>
@@ -12,48 +11,48 @@
;;; TEST HANDLING OF VARIOUS VECTOR SIZES
define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
- %p = load %f1* %P ; <%f1> [#uses=1]
- %q = load %f1* %Q ; <%f1> [#uses=1]
+ %p = load %f1, %f1* %P ; <%f1> [#uses=1]
+ %q = load %f1, %f1* %Q ; <%f1> [#uses=1]
%R = fadd %f1 %p, %q ; <%f1> [#uses=1]
store %f1 %R, %f1* %S
ret void
}
define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
- %p = load %f2* %P ; <%f2> [#uses=1]
- %q = load %f2* %Q ; <%f2> [#uses=1]
+ %p = load %f2, %f2* %P ; <%f2> [#uses=1]
+ %q = load %f2, %f2* %Q ; <%f2> [#uses=1]
%R = fadd %f2 %p, %q ; <%f2> [#uses=1]
store %f2 %R, %f2* %S
ret void
}
define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
- %p = load %f4* %P ; <%f4> [#uses=1]
- %q = load %f4* %Q ; <%f4> [#uses=1]
+ %p = load %f4, %f4* %P ; <%f4> [#uses=1]
+ %q = load %f4, %f4* %Q ; <%f4> [#uses=1]
%R = fadd %f4 %p, %q ; <%f4> [#uses=1]
store %f4 %R, %f4* %S
ret void
}
define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
- %p = load %f8* %P ; <%f8> [#uses=1]
- %q = load %f8* %Q ; <%f8> [#uses=1]
+ %p = load %f8, %f8* %P ; <%f8> [#uses=1]
+ %q = load %f8, %f8* %Q ; <%f8> [#uses=1]
%R = fadd %f8 %p, %q ; <%f8> [#uses=1]
store %f8 %R, %f8* %S
ret void
}
define void @test_fmul(%f8* %P, %f8* %Q, %f8* %S) {
- %p = load %f8* %P ; <%f8> [#uses=1]
- %q = load %f8* %Q ; <%f8> [#uses=1]
+ %p = load %f8, %f8* %P ; <%f8> [#uses=1]
+ %q = load %f8, %f8* %Q ; <%f8> [#uses=1]
%R = fmul %f8 %p, %q ; <%f8> [#uses=1]
store %f8 %R, %f8* %S
ret void
}
define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {
- %p = load %f8* %P ; <%f8> [#uses=1]
- %q = load %f8* %Q ; <%f8> [#uses=1]
+ %p = load %f8, %f8* %P ; <%f8> [#uses=1]
+ %q = load %f8, %f8* %Q ; <%f8> [#uses=1]
%R = fdiv %f8 %p, %q ; <%f8> [#uses=1]
store %f8 %R, %f8* %S
ret void
@@ -63,21 +62,21 @@ define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {
define void @test_cst(%f4* %P, %f4* %S) {
- %p = load %f4* %P ; <%f4> [#uses=1]
+ %p = load %f4, %f4* %P ; <%f4> [#uses=1]
%R = fadd %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float 2.000000e+00, float 4.500000e+00 > ; <%f4> [#uses=1]
store %f4 %R, %f4* %S
ret void
}
define void @test_zero(%f4* %P, %f4* %S) {
- %p = load %f4* %P ; <%f4> [#uses=1]
+ %p = load %f4, %f4* %P ; <%f4> [#uses=1]
%R = fadd %f4 %p, zeroinitializer ; <%f4> [#uses=1]
store %f4 %R, %f4* %S
ret void
}
define void @test_undef(%f4* %P, %f4* %S) {
- %p = load %f4* %P ; <%f4> [#uses=1]
+ %p = load %f4, %f4* %P ; <%f4> [#uses=1]
%R = fadd %f4 %p, undef ; <%f4> [#uses=1]
store %f4 %R, %f4* %S
ret void
@@ -102,19 +101,19 @@ define void @test_scalar_to_vector(float %F, %f4* %S) {
}
define float @test_extract_elt(%f8* %P) {
- %p = load %f8* %P ; <%f8> [#uses=1]
+ %p = load %f8, %f8* %P ; <%f8> [#uses=1]
%R = extractelement %f8 %p, i32 3 ; <float> [#uses=1]
ret float %R
}
define double @test_extract_elt2(%d8* %P) {
- %p = load %d8* %P ; <%d8> [#uses=1]
+ %p = load %d8, %d8* %P ; <%d8> [#uses=1]
%R = extractelement %d8 %p, i32 3 ; <double> [#uses=1]
ret double %R
}
define void @test_cast_1(%f4* %b, %i4* %a) {
- %tmp = load %f4* %b ; <%f4> [#uses=1]
+ %tmp = load %f4, %f4* %b ; <%f4> [#uses=1]
%tmp2 = fadd %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > ; <%f4> [#uses=1]
%tmp3 = bitcast %f4 %tmp2 to %i4 ; <%i4> [#uses=1]
%tmp4 = add %i4 %tmp3, < i32 1, i32 2, i32 3, i32 4 > ; <%i4> [#uses=1]
@@ -123,7 +122,7 @@ define void @test_cast_1(%f4* %b, %i4* %a) {
}
define void @test_cast_2(%f8* %a, <8 x i32>* %b) {
- %T = load %f8* %a ; <%f8> [#uses=1]
+ %T = load %f8, %f8* %a ; <%f8> [#uses=1]
%T2 = bitcast %f8 %T to <8 x i32> ; <<8 x i32>> [#uses=1]
store <8 x i32> %T2, <8 x i32>* %b
ret void
@@ -136,7 +135,7 @@ define void @splat(%f4* %P, %f4* %Q, float %X) {
%tmp2 = insertelement %f4 %tmp, float %X, i32 1 ; <%f4> [#uses=1]
%tmp4 = insertelement %f4 %tmp2, float %X, i32 2 ; <%f4> [#uses=1]
%tmp6 = insertelement %f4 %tmp4, float %X, i32 3 ; <%f4> [#uses=1]
- %q = load %f4* %Q ; <%f4> [#uses=1]
+ %q = load %f4, %f4* %Q ; <%f4> [#uses=1]
%R = fadd %f4 %q, %tmp6 ; <%f4> [#uses=1]
store %f4 %R, %f4* %P
ret void
@@ -147,13 +146,13 @@ define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) {
%tmp2 = insertelement %i4 %tmp, i32 %X, i32 1 ; <%i4> [#uses=1]
%tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2 ; <%i4> [#uses=1]
%tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3 ; <%i4> [#uses=1]
- %q = load %i4* %Q ; <%i4> [#uses=1]
+ %q = load %i4, %i4* %Q ; <%i4> [#uses=1]
%R = add %i4 %q, %tmp6 ; <%i4> [#uses=1]
store %i4 %R, %i4* %P
ret void
}
define <2 x i32*> @vector_gep(<2 x [3 x {i32, i32}]*> %a) {
- %w = getelementptr <2 x [3 x {i32, i32}]*> %a, <2 x i32> <i32 1, i32 2>, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 1, i32 1>
+ %w = getelementptr [3 x {i32, i32}], <2 x [3 x {i32, i32}]*> %a, <2 x i32> <i32 1, i32 2>, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 1, i32 1>
ret <2 x i32*> %w
}
diff --git a/test/CodeGen/Hexagon/BranchPredict.ll b/test/CodeGen/Hexagon/BranchPredict.ll
index 5d564493e507..17d169974e5a 100644
--- a/test/CodeGen/Hexagon/BranchPredict.ll
+++ b/test/CodeGen/Hexagon/BranchPredict.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -ifcvt-limit=0 < %s | FileCheck %s
; Check if the branch probabilities are reflected in the instructions:
; The basic block placement pass should place the more probable successor
@@ -53,7 +53,7 @@ return: ; preds = %if.else, %if.then
define i32 @foo_bar(i32 %a, i16 signext %b) nounwind {
; CHECK: if{{ *}}(!cmp.eq(r{{[0-9]*}}.new, #0)) jump:nt
entry:
- %0 = load i32* @j, align 4
+ %0 = load i32, i32* @j, align 4
%tobool = icmp eq i32 %0, 0
br i1 %tobool, label %if.else, label %if.then, !prof !0
diff --git a/test/CodeGen/Hexagon/absaddr-store.ll b/test/CodeGen/Hexagon/absaddr-store.ll
index 5c2554df8aeb..3be4b1cc2614 100644
--- a/test/CodeGen/Hexagon/absaddr-store.ll
+++ b/test/CodeGen/Hexagon/absaddr-store.ll
@@ -9,7 +9,7 @@
define zeroext i8 @absStoreByte() nounwind {
; CHECK: memb(##b){{ *}}={{ *}}r{{[0-9]+}}
entry:
- %0 = load i8* @b, align 1
+ %0 = load i8, i8* @b, align 1
%conv = zext i8 %0 to i32
%mul = mul nsw i32 100, %conv
%conv1 = trunc i32 %mul to i8
@@ -20,7 +20,7 @@ entry:
define signext i16 @absStoreHalf() nounwind {
; CHECK: memh(##c){{ *}}={{ *}}r{{[0-9]+}}
entry:
- %0 = load i16* @c, align 2
+ %0 = load i16, i16* @c, align 2
%conv = sext i16 %0 to i32
%mul = mul nsw i32 100, %conv
%conv1 = trunc i32 %mul to i16
@@ -31,7 +31,7 @@ entry:
define i32 @absStoreWord() nounwind {
; CHECK: memw(##a){{ *}}={{ *}}r{{[0-9]+}}
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%mul = mul nsw i32 100, %0
store i32 %mul, i32* @a, align 4
ret i32 %mul
diff --git a/test/CodeGen/Hexagon/absimm.ll b/test/CodeGen/Hexagon/absimm.ll
index b8f5edc26470..07adb3fe49d5 100644
--- a/test/CodeGen/Hexagon/absimm.ll
+++ b/test/CodeGen/Hexagon/absimm.ll
@@ -12,7 +12,7 @@ entry:
define i32* @f2(i32* nocapture %i) nounwind {
entry:
; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(##786432)
- %0 = load volatile i32* inttoptr (i32 786432 to i32*), align 262144
+ %0 = load volatile i32, i32* inttoptr (i32 786432 to i32*), align 262144
%1 = inttoptr i32 %0 to i32*
ret i32* %1
}
diff --git a/test/CodeGen/Hexagon/adde.ll b/test/CodeGen/Hexagon/adde.ll
index 6d060c1b9e26..7b29e7ad8a0f 100644
--- a/test/CodeGen/Hexagon/adde.ll
+++ b/test/CodeGen/Hexagon/adde.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon -hexagon-expand-condsets=0 < %s | FileCheck %s
-; CHECK: r{{[0-9]+:[0-9]+}} = #0
; CHECK: r{{[0-9]+:[0-9]+}} = #1
+; CHECK: r{{[0-9]+:[0-9]+}} = #0
; CHECK: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
diff --git a/test/CodeGen/Hexagon/alu64.ll b/test/CodeGen/Hexagon/alu64.ll
new file mode 100644
index 000000000000..d0824a4ecadc
--- /dev/null
+++ b/test/CodeGen/Hexagon/alu64.ll
@@ -0,0 +1,599 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; CHECK-LABEL: @test00
+; CHECK: p0 = cmp.eq(r1:0, r3:2)
+define i32 @test00(i64 %Rs, i64 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.C2.cmpeqp(i64 %Rs, i64 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test01
+; CHECK: p0 = cmp.gt(r1:0, r3:2)
+define i32 @test01(i64 %Rs, i64 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.C2.cmpgtp(i64 %Rs, i64 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test02
+; CHECK: p0 = cmp.gtu(r1:0, r3:2)
+define i32 @test02(i64 %Rs, i64 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.C2.cmpgtup(i64 %Rs, i64 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test10
+; CHECK: r0 = cmp.eq(r0, r1)
+define i32 @test10(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A4.rcmpeq(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test11
+; CHECK: r0 = !cmp.eq(r0, r1)
+define i32 @test11(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A4.rcmpneq(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test12
+; CHECK: r0 = cmp.eq(r0, #23)
+define i32 @test12(i32 %Rs) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A4.rcmpeqi(i32 %Rs, i32 23)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test13
+; CHECK: r0 = !cmp.eq(r0, #47)
+define i32 @test13(i32 %Rs) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A4.rcmpneqi(i32 %Rs, i32 47)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test20
+; CHECK: p0 = cmpb.eq(r0, r1)
+define i32 @test20(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A4.cmpbeq(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test21
+; CHECK: p0 = cmpb.gt(r0, r1)
+define i32 @test21(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A4.cmpbgt(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test22
+; CHECK: p0 = cmpb.gtu(r0, r1)
+define i32 @test22(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A4.cmpbgtu(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test23
+; CHECK: p0 = cmpb.eq(r0, #56)
+define i32 @test23(i32 %Rs) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A4.cmpbeqi(i32 %Rs, i32 56)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test24
+; CHECK: p0 = cmpb.gt(r0, #29)
+define i32 @test24(i32 %Rs) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A4.cmpbgti(i32 %Rs, i32 29)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test25
+; CHECK: p0 = cmpb.gtu(r0, #111)
+define i32 @test25(i32 %Rs) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A4.cmpbgtui(i32 %Rs, i32 111)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test30
+; CHECK: p0 = cmph.eq(r0, r1)
+define i32 @test30(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A4.cmpheq(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test31
+; CHECK: p0 = cmph.gt(r0, r1)
+define i32 @test31(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A4.cmphgt(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test32
+; CHECK: p0 = cmph.gtu(r0, r1)
+define i32 @test32(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A4.cmphgtu(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test33
+; CHECK: p0 = cmph.eq(r0, #-123)
+define i32 @test33(i32 %Rs) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A4.cmpheqi(i32 %Rs, i32 -123)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test34
+; CHECK: p0 = cmph.gt(r0, #-3)
+define i32 @test34(i32 %Rs) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A4.cmphgti(i32 %Rs, i32 -3)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test35
+; CHECK: p0 = cmph.gtu(r0, #13)
+define i32 @test35(i32 %Rs) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A4.cmphgtui(i32 %Rs, i32 13)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test40
+; CHECK: r1:0 = vmux(p0, r3:2, r5:4)
+define i64 @test40(i32 %Pu, i64 %Rs, i64 %Rt) #0 {
+entry:
+ %0 = tail call i64 @llvm.hexagon.C2.vmux(i32 %Pu, i64 %Rs, i64 %Rt)
+ ret i64 %0
+}
+
+; CHECK-LABEL: @test41
+; CHECK: p0 = any8(vcmpb.eq(r1:0, r3:2))
+define i32 @test41(i64 %Rs, i64 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A4.vcmpbeq.any(i64 %Rs, i64 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test50
+; CHECK: r1:0 = add(r1:0, r3:2)
+define i64 @test50(i64 %Rs, i64 %Rt) #0 {
+entry:
+ %0 = tail call i64 @llvm.hexagon.A2.addp(i64 %Rs, i64 %Rt)
+ ret i64 %0
+}
+
+; CHECK-LABEL: @test51
+; CHECK: r1:0 = add(r1:0, r3:2):sat
+define i64 @test51(i64 %Rs, i64 %Rt) #0 {
+entry:
+ %0 = tail call i64 @llvm.hexagon.A2.addpsat(i64 %Rs, i64 %Rt)
+ ret i64 %0
+}
+
+; CHECK-LABEL: @test52
+; CHECK: r1:0 = sub(r1:0, r3:2)
+define i64 @test52(i64 %Rs, i64 %Rt) #0 {
+entry:
+ %0 = tail call i64 @llvm.hexagon.A2.subp(i64 %Rs, i64 %Rt)
+ ret i64 %0
+}
+
+; CHECK-LABEL: @test53
+; CHECK: r1:0 = add(r0, r3:2)
+define i64 @test53(i32 %Rs, i64 %Rt) #0 {
+entry:
+ %0 = tail call i64 @llvm.hexagon.A2.addsp(i32 %Rs, i64 %Rt)
+ ret i64 %0
+}
+
+; CHECK-LABEL: @test54
+; CHECK: r1:0 = and(r1:0, r3:2)
+define i64 @test54(i64 %Rs, i64 %Rt) #0 {
+entry:
+ %0 = tail call i64 @llvm.hexagon.A2.andp(i64 %Rs, i64 %Rt)
+ ret i64 %0
+}
+
+; CHECK-LABEL: @test55
+; CHECK: r1:0 = or(r1:0, r3:2)
+define i64 @test55(i64 %Rs, i64 %Rt) #0 {
+entry:
+ %0 = tail call i64 @llvm.hexagon.A2.orp(i64 %Rs, i64 %Rt)
+ ret i64 %0
+}
+
+; CHECK-LABEL: @test56
+; CHECK: r1:0 = xor(r1:0, r3:2)
+define i64 @test56(i64 %Rs, i64 %Rt) #0 {
+entry:
+ %0 = tail call i64 @llvm.hexagon.A2.xorp(i64 %Rs, i64 %Rt)
+ ret i64 %0
+}
+
+; CHECK-LABEL: @test57
+; CHECK: r1:0 = and(r1:0, ~r3:2)
+define i64 @test57(i64 %Rs, i64 %Rt) #0 {
+entry:
+ %0 = tail call i64 @llvm.hexagon.A4.andnp(i64 %Rs, i64 %Rt)
+ ret i64 %0
+}
+
+; CHECK-LABEL: @test58
+; CHECK: r1:0 = or(r1:0, ~r3:2)
+define i64 @test58(i64 %Rs, i64 %Rt) #0 {
+entry:
+ %0 = tail call i64 @llvm.hexagon.A4.ornp(i64 %Rs, i64 %Rt)
+ ret i64 %0
+}
+
+; CHECK-LABEL: @test60
+; CHECK: r0 = add(r0.l, r1.l)
+define i32 @test60(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.addh.l16.ll(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test61
+; CHECK: r0 = add(r0.l, r1.h)
+define i32 @test61(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.addh.l16.hl(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test62
+; CHECK: r0 = add(r0.l, r1.l):sat
+define i32 @test62(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test63
+; CHECK: r0 = add(r0.l, r1.h):sat
+define i32 @test63(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.hl(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test64
+; CHECK: r0 = add(r0.l, r1.l):<<16
+define i32 @test64(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.addh.h16.ll(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test65
+; CHECK: r0 = add(r0.l, r1.h):<<16
+define i32 @test65(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.addh.h16.lh(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test66
+; CHECK: r0 = add(r0.h, r1.l):<<16
+define i32 @test66(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.addh.h16.hl(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test67
+; CHECK: r0 = add(r0.h, r1.h):<<16
+define i32 @test67(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.addh.h16.hh(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test68
+; CHECK: r0 = add(r0.l, r1.l):sat:<<16
+define i32 @test68(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.ll(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test69
+; CHECK: r0 = add(r0.l, r1.h):sat:<<16
+define i32 @test69(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.lh(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test6A
+; CHECK: r0 = add(r0.h, r1.l):sat:<<16
+define i32 @test6A(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.hl(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test6B
+; CHECK: r0 = add(r0.h, r1.h):sat:<<16
+define i32 @test6B(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.hh(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test70
+; CHECK: r0 = sub(r0.l, r1.l)
+define i32 @test70(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.subh.l16.ll(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test71
+; CHECK: r0 = sub(r0.l, r1.h)
+define i32 @test71(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.subh.l16.hl(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test72
+; CHECK: r0 = sub(r0.l, r1.l):sat
+define i32 @test72(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.subh.l16.sat.ll(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test73
+; CHECK: r0 = sub(r0.l, r1.h):sat
+define i32 @test73(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.subh.l16.sat.hl(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test74
+; CHECK: r0 = sub(r0.l, r1.l):<<16
+define i32 @test74(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.subh.h16.ll(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test75
+; CHECK: r0 = sub(r0.l, r1.h):<<16
+define i32 @test75(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.subh.h16.lh(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test76
+; CHECK: r0 = sub(r0.h, r1.l):<<16
+define i32 @test76(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.subh.h16.hl(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test77
+; CHECK: r0 = sub(r0.h, r1.h):<<16
+define i32 @test77(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.subh.h16.hh(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test78
+; CHECK: r0 = sub(r0.l, r1.l):sat:<<16
+define i32 @test78(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.ll(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test79
+; CHECK: r0 = sub(r0.l, r1.h):sat:<<16
+define i32 @test79(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.lh(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test7A
+; CHECK: r0 = sub(r0.h, r1.l):sat:<<16
+define i32 @test7A(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.hl(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test7B
+; CHECK: r0 = sub(r0.h, r1.h):sat:<<16
+define i32 @test7B(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.hh(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test90
+; CHECK: r0 = and(#1, asl(r0, #2))
+define i32 @test90(i32 %Rs) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.S4.andi.asl.ri(i32 1, i32 %Rs, i32 2)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test91
+; CHECK: r0 = or(#1, asl(r0, #2))
+define i32 @test91(i32 %Rs) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.S4.ori.asl.ri(i32 1, i32 %Rs, i32 2)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test92
+; CHECK: r0 = add(#1, asl(r0, #2))
+define i32 @test92(i32 %Rs) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.S4.addi.asl.ri(i32 1, i32 %Rs, i32 2)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test93
+; CHECK: r0 = sub(#1, asl(r0, #2))
+define i32 @test93(i32 %Rs) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.S4.subi.asl.ri(i32 1, i32 %Rs, i32 2)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test94
+; CHECK: r0 = and(#1, lsr(r0, #2))
+define i32 @test94(i32 %Rs) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.S4.andi.lsr.ri(i32 1, i32 %Rs, i32 2)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test95
+; CHECK: r0 = or(#1, lsr(r0, #2))
+define i32 @test95(i32 %Rs) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.S4.ori.lsr.ri(i32 1, i32 %Rs, i32 2)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test96
+; CHECK: r0 = add(#1, lsr(r0, #2))
+define i32 @test96(i32 %Rs) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.S4.addi.lsr.ri(i32 1, i32 %Rs, i32 2)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test97
+; CHECK: r0 = sub(#1, lsr(r0, #2))
+define i32 @test97(i32 %Rs) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.S4.subi.lsr.ri(i32 1, i32 %Rs, i32 2)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test100
+; CHECK: r1:0 = bitsplit(r0, r1)
+define i64 @test100(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i64 @llvm.hexagon.A4.bitsplit(i32 %Rs, i32 %Rt)
+ ret i64 %0
+}
+
+; CHECK-LABEL: @test101
+; CHECK: r0 = modwrap(r0, r1)
+define i32 @test101(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.A4.modwrapu(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test102
+; CHECK: r0 = parity(r1:0, r3:2)
+define i32 @test102(i64 %Rs, i64 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.S2.parityp(i64 %Rs, i64 %Rt)
+ ret i32 %0
+}
+
+; CHECK-LABEL: @test103
+; CHECK: r0 = parity(r0, r1)
+define i32 @test103(i32 %Rs, i32 %Rt) #0 {
+entry:
+ %0 = tail call i32 @llvm.hexagon.S4.parity(i32 %Rs, i32 %Rt)
+ ret i32 %0
+}
+
+declare i32 @llvm.hexagon.C2.cmpeqp(i64, i64) #1
+declare i32 @llvm.hexagon.C2.cmpgtp(i64, i64) #1
+declare i32 @llvm.hexagon.C2.cmpgtup(i64, i64) #1
+declare i32 @llvm.hexagon.A4.rcmpeq(i32, i32) #1
+declare i32 @llvm.hexagon.A4.rcmpneq(i32, i32) #1
+declare i32 @llvm.hexagon.A4.rcmpeqi(i32, i32) #1
+declare i32 @llvm.hexagon.A4.rcmpneqi(i32, i32) #1
+declare i32 @llvm.hexagon.A4.cmpbeq(i32, i32) #1
+declare i32 @llvm.hexagon.A4.cmpbgt(i32, i32) #1
+declare i32 @llvm.hexagon.A4.cmpbgtu(i32, i32) #1
+declare i32 @llvm.hexagon.A4.cmpbeqi(i32, i32) #1
+declare i32 @llvm.hexagon.A4.cmpbgti(i32, i32) #1
+declare i32 @llvm.hexagon.A4.cmpbgtui(i32, i32) #1
+declare i32 @llvm.hexagon.A4.cmpheq(i32, i32) #1
+declare i32 @llvm.hexagon.A4.cmphgt(i32, i32) #1
+declare i32 @llvm.hexagon.A4.cmphgtu(i32, i32) #1
+declare i32 @llvm.hexagon.A4.cmpheqi(i32, i32) #1
+declare i32 @llvm.hexagon.A4.cmphgti(i32, i32) #1
+declare i32 @llvm.hexagon.A4.cmphgtui(i32, i32) #1
+declare i64 @llvm.hexagon.C2.vmux(i32, i64, i64) #1
+declare i32 @llvm.hexagon.A4.vcmpbeq.any(i64, i64) #1
+declare i64 @llvm.hexagon.A2.addp(i64, i64) #1
+declare i64 @llvm.hexagon.A2.addpsat(i64, i64) #1
+declare i64 @llvm.hexagon.A2.subp(i64, i64) #1
+declare i64 @llvm.hexagon.A2.addsp(i32, i64) #1
+declare i64 @llvm.hexagon.A2.andp(i64, i64) #1
+declare i64 @llvm.hexagon.A2.orp(i64, i64) #1
+declare i64 @llvm.hexagon.A2.xorp(i64, i64) #1
+declare i64 @llvm.hexagon.A4.ornp(i64, i64) #1
+declare i64 @llvm.hexagon.A4.andnp(i64, i64) #1
+declare i32 @llvm.hexagon.A2.addh.l16.ll(i32, i32) #1
+declare i32 @llvm.hexagon.A2.addh.l16.hl(i32, i32) #1
+declare i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32, i32) #1
+declare i32 @llvm.hexagon.A2.addh.l16.sat.hl(i32, i32) #1
+declare i32 @llvm.hexagon.A2.addh.h16.ll(i32, i32) #1
+declare i32 @llvm.hexagon.A2.addh.h16.lh(i32, i32) #1
+declare i32 @llvm.hexagon.A2.addh.h16.hl(i32, i32) #1
+declare i32 @llvm.hexagon.A2.addh.h16.hh(i32, i32) #1
+declare i32 @llvm.hexagon.A2.addh.h16.sat.ll(i32, i32) #1
+declare i32 @llvm.hexagon.A2.addh.h16.sat.lh(i32, i32) #1
+declare i32 @llvm.hexagon.A2.addh.h16.sat.hl(i32, i32) #1
+declare i32 @llvm.hexagon.A2.addh.h16.sat.hh(i32, i32) #1
+declare i32 @llvm.hexagon.A2.subh.l16.ll(i32, i32) #1
+declare i32 @llvm.hexagon.A2.subh.l16.hl(i32, i32) #1
+declare i32 @llvm.hexagon.A2.subh.l16.sat.ll(i32, i32) #1
+declare i32 @llvm.hexagon.A2.subh.l16.sat.hl(i32, i32) #1
+declare i32 @llvm.hexagon.A2.subh.h16.ll(i32, i32) #1
+declare i32 @llvm.hexagon.A2.subh.h16.lh(i32, i32) #1
+declare i32 @llvm.hexagon.A2.subh.h16.hl(i32, i32) #1
+declare i32 @llvm.hexagon.A2.subh.h16.hh(i32, i32) #1
+declare i32 @llvm.hexagon.A2.subh.h16.sat.ll(i32, i32) #1
+declare i32 @llvm.hexagon.A2.subh.h16.sat.lh(i32, i32) #1
+declare i32 @llvm.hexagon.A2.subh.h16.sat.hl(i32, i32) #1
+declare i32 @llvm.hexagon.A2.subh.h16.sat.hh(i32, i32) #1
+declare i64 @llvm.hexagon.A4.bitsplit(i32, i32) #1
+declare i32 @llvm.hexagon.A4.modwrapu(i32, i32) #1
+declare i32 @llvm.hexagon.S2.parityp(i64, i64) #1
+declare i32 @llvm.hexagon.S4.parity(i32, i32) #1
+declare i32 @llvm.hexagon.S4.andi.asl.ri(i32, i32, i32) #1
+declare i32 @llvm.hexagon.S4.ori.asl.ri(i32, i32, i32) #1
+declare i32 @llvm.hexagon.S4.addi.asl.ri(i32, i32, i32) #1
+declare i32 @llvm.hexagon.S4.subi.asl.ri(i32, i32, i32) #1
+declare i32 @llvm.hexagon.S4.andi.lsr.ri(i32, i32, i32) #1
+declare i32 @llvm.hexagon.S4.ori.lsr.ri(i32, i32, i32) #1
+declare i32 @llvm.hexagon.S4.addi.lsr.ri(i32, i32, i32) #1
+declare i32 @llvm.hexagon.S4.subi.lsr.ri(i32, i32, i32) #1
+
+attributes #0 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/Hexagon/always-ext.ll b/test/CodeGen/Hexagon/always-ext.ll
index 9c8d708ba877..8b4b2f5bf4f2 100644
--- a/test/CodeGen/Hexagon/always-ext.ll
+++ b/test/CodeGen/Hexagon/always-ext.ll
@@ -1,3 +1,4 @@
+; XFAIL:
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; Check that we don't generate an invalid packet with too many instructions
@@ -7,7 +8,7 @@
; CHECK: {
; CHECK-NOT: call abort
; CHECK: memw(##0)
-; CHECK: memw(r{{[0-9+]}}<<#2+##4)
+; CHECK: memw(r{{[0-9+]}}<<#2 + ##4)
; CHECK: }
%struct.CuTest.1.28.31.37.40.43.52.55.67.85.111 = type { i8*, void (%struct.CuTest.1.28.31.37.40.43.52.55.67.85.111*)*, i32, i32, i8*, [23 x i32]* }
@@ -23,8 +24,8 @@ entry:
br i1 undef, label %for.body.us, label %for.end
for.body.us: ; preds = %entry
- %0 = load %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111** null, align 4
- %1 = load i32* undef, align 4
+ %0 = load %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111*, %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111** null, align 4
+ %1 = load i32, i32* undef, align 4
%cmp.i.us = icmp slt i32 %1, 1024
br i1 %cmp.i.us, label %CuSuiteAdd.exit.us, label %cond.false6.i.us
@@ -33,7 +34,7 @@ cond.false6.i.us: ; preds = %for.body.us
unreachable
CuSuiteAdd.exit.us: ; preds = %for.body.us
- %arrayidx.i.us = getelementptr inbounds %struct.CuSuite.2.29.32.38.41.44.53.56.68.86.112* null, i32 0, i32 1, i32 %1
+ %arrayidx.i.us = getelementptr inbounds %struct.CuSuite.2.29.32.38.41.44.53.56.68.86.112, %struct.CuSuite.2.29.32.38.41.44.53.56.68.86.112* null, i32 0, i32 1, i32 %1
store %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111* %0, %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111** %arrayidx.i.us, align 4
call void @llvm.trap()
unreachable
diff --git a/test/CodeGen/Hexagon/block-addr.ll b/test/CodeGen/Hexagon/block-addr.ll
index dc0d6e60fd28..eda167a67f28 100644
--- a/test/CodeGen/Hexagon/block-addr.ll
+++ b/test/CodeGen/Hexagon/block-addr.ll
@@ -1,7 +1,8 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
-; CHECK: r{{[0-9]+}} = CONST32(#.LJTI{{[0-9]+_[0-9]+}})
-; CHECK: r{{[0-9]+}} = memw(r{{[0-9]+}} + r{{[0-9]+<<#[0-9]+}})
+; Allow combine(..##JTI..):
+; CHECK: r{{[0-9]+}}{{.*}} = {{.*}}#.LJTI
+; CHECK: r{{[0-9]+}} = memw(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+<<#[0-9]+}})
; CHECK: jumpr r{{[0-9]+}}
define void @main() #0 {
@@ -10,7 +11,7 @@ entry:
br label %while.body
while.body:
- %ret.0.load17 = load volatile i32* %ret, align 4
+ %ret.0.load17 = load volatile i32, i32* %ret, align 4
switch i32 %ret.0.load17, label %label6 [
i32 0, label %label0
i32 1, label %label1
@@ -21,37 +22,37 @@ while.body:
]
label0:
- %ret.0.load18 = load volatile i32* %ret, align 4
+ %ret.0.load18 = load volatile i32, i32* %ret, align 4
%inc = add nsw i32 %ret.0.load18, 1
store volatile i32 %inc, i32* %ret, align 4
br label %while.body
label1:
- %ret.0.load19 = load volatile i32* %ret, align 4
+ %ret.0.load19 = load volatile i32, i32* %ret, align 4
%inc2 = add nsw i32 %ret.0.load19, 1
store volatile i32 %inc2, i32* %ret, align 4
br label %while.body
label2:
- %ret.0.load20 = load volatile i32* %ret, align 4
+ %ret.0.load20 = load volatile i32, i32* %ret, align 4
%inc4 = add nsw i32 %ret.0.load20, 1
store volatile i32 %inc4, i32* %ret, align 4
br label %while.body
label3:
- %ret.0.load21 = load volatile i32* %ret, align 4
+ %ret.0.load21 = load volatile i32, i32* %ret, align 4
%inc6 = add nsw i32 %ret.0.load21, 1
store volatile i32 %inc6, i32* %ret, align 4
br label %while.body
label4:
- %ret.0.load22 = load volatile i32* %ret, align 4
+ %ret.0.load22 = load volatile i32, i32* %ret, align 4
%inc8 = add nsw i32 %ret.0.load22, 1
store volatile i32 %inc8, i32* %ret, align 4
br label %while.body
label5:
- %ret.0.load23 = load volatile i32* %ret, align 4
+ %ret.0.load23 = load volatile i32, i32* %ret, align 4
%inc10 = add nsw i32 %ret.0.load23, 1
store volatile i32 %inc10, i32* %ret, align 4
br label %while.body
diff --git a/test/CodeGen/Hexagon/brev_ld.ll b/test/CodeGen/Hexagon/brev_ld.ll
new file mode 100644
index 000000000000..12edb4c2b8f7
--- /dev/null
+++ b/test/CodeGen/Hexagon/brev_ld.ll
@@ -0,0 +1,140 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon -verify-machineinstrs=true < %s | FileCheck %s
+; Testing bitreverse load intrinsics:
+; Q6_bitrev_load_update_D(inputLR, pDelay, nConvLength);
+; Q6_bitrev_load_update_W(inputLR, pDelay, nConvLength);
+; Q6_bitrev_load_update_H(inputLR, pDelay, nConvLength);
+; Q6_bitrev_load_update_UH(inputLR, pDelay, nConvLength);
+; Q6_bitrev_load_update_UB(inputLR, pDelay, nConvLength);
+; Q6_bitrev_load_update_B(inputLR, pDelay, nConvLength);
+; producing these instructions:
+; r3:2 = memd(r0++m0:brev)
+; r1 = memw(r0++m0:brev)
+; r1 = memh(r0++m0:brev)
+; r1 = memuh(r0++m0:brev)
+; r1 = memub(r0++m0:brev)
+; r1 = memb(r0++m0:brev)
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define i64 @foo(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %inputLR = alloca i64, align 8
+ %conv = zext i16 %filtMemLen to i32
+ %shr1 = lshr i32 %conv, 1
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %1 = bitcast i64* %inputLR to i8*
+ %sub = sub i32 13, %shr1
+ %shl = shl i32 1, %sub
+; CHECK: memd(r{{[0-9]*}} ++ m{{[0-1]}}:brev)
+ %2 = call i8* @llvm.hexagon.brev.ldd(i8* %0, i8* %1, i32 %shl)
+ %3 = bitcast i8* %2 to i64*
+ %4 = load i64, i64* %3, align 8, !tbaa !0
+ ret i64 %4
+}
+
+declare i8* @llvm.hexagon.brev.ldd(i8*, i8*, i32) nounwind
+
+define i32 @foo1(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %inputLR = alloca i32, align 4
+ %conv = zext i16 %filtMemLen to i32
+ %shr1 = lshr i32 %conv, 1
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %1 = bitcast i32* %inputLR to i8*
+ %sub = sub i32 14, %shr1
+ %shl = shl i32 1, %sub
+; CHECK: memw(r{{[0-9]*}} ++ m{{[0-1]}}:brev)
+ %2 = call i8* @llvm.hexagon.brev.ldw(i8* %0, i8* %1, i32 %shl)
+ %3 = bitcast i8* %2 to i32*
+ %4 = load i32, i32* %3, align 4, !tbaa !2
+ ret i32 %4
+}
+
+declare i8* @llvm.hexagon.brev.ldw(i8*, i8*, i32) nounwind
+
+define signext i16 @foo2(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %inputLR = alloca i16, align 2
+ %conv = zext i16 %filtMemLen to i32
+ %shr1 = lshr i32 %conv, 1
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %1 = bitcast i16* %inputLR to i8*
+ %sub = sub i32 15, %shr1
+ %shl = shl i32 1, %sub
+; CHECK: memh(r{{[0-9]*}} ++ m0:brev)
+ %2 = call i8* @llvm.hexagon.brev.ldh(i8* %0, i8* %1, i32 %shl)
+ %3 = bitcast i8* %2 to i16*
+ %4 = load i16, i16* %3, align 2, !tbaa !3
+ ret i16 %4
+}
+
+declare i8* @llvm.hexagon.brev.ldh(i8*, i8*, i32) nounwind
+
+define zeroext i16 @foo3(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %inputLR = alloca i16, align 2
+ %conv = zext i16 %filtMemLen to i32
+ %shr1 = lshr i32 %conv, 1
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %1 = bitcast i16* %inputLR to i8*
+ %sub = sub i32 15, %shr1
+ %shl = shl i32 1, %sub
+; CHECK: memuh(r{{[0-9]*}} ++ m0:brev)
+ %2 = call i8* @llvm.hexagon.brev.lduh(i8* %0, i8* %1, i32 %shl)
+ %3 = bitcast i8* %2 to i16*
+ %4 = load i16, i16* %3, align 2, !tbaa !3
+ ret i16 %4
+}
+
+declare i8* @llvm.hexagon.brev.lduh(i8*, i8*, i32) nounwind
+
+define zeroext i8 @foo4(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %inputLR = alloca i8, align 1
+ %conv = zext i16 %filtMemLen to i32
+ %shr1 = lshr i32 %conv, 1
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %sub = sub nsw i32 16, %shr1
+ %shl = shl i32 1, %sub
+; CHECK: memub(r{{[0-9]*}} ++ m{{[0-1]}}:brev)
+ %1 = call i8* @llvm.hexagon.brev.ldub(i8* %0, i8* %inputLR, i32 %shl)
+ %2 = load i8, i8* %1, align 1, !tbaa !0
+ ret i8 %2
+}
+
+declare i8* @llvm.hexagon.brev.ldub(i8*, i8*, i32) nounwind
+
+define zeroext i8 @foo5(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %inputLR = alloca i8, align 1
+ %conv = zext i16 %filtMemLen to i32
+ %shr1 = lshr i32 %conv, 1
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %sub = sub nsw i32 16, %shr1
+ %shl = shl i32 1, %sub
+; CHECK: memb(r{{[0-9]*}} ++ m{{[0-1]}}:brev)
+ %1 = call i8* @llvm.hexagon.brev.ldb(i8* %0, i8* %inputLR, i32 %shl)
+ %2 = load i8, i8* %1, align 1, !tbaa !0
+ ret i8 %2
+}
+
+declare i8* @llvm.hexagon.brev.ldb(i8*, i8*, i32) nounwind
+
+!0 = !{!"omnipotent char", !1}
+!1 = !{!"Simple C/C++ TBAA"}
+!2 = !{!"int", !0}
+!3 = !{!"short", !0}
diff --git a/test/CodeGen/Hexagon/brev_st.ll b/test/CodeGen/Hexagon/brev_st.ll
new file mode 100644
index 000000000000..b80579185317
--- /dev/null
+++ b/test/CodeGen/Hexagon/brev_st.ll
@@ -0,0 +1,112 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon -verify-machineinstrs=true < %s | FileCheck %s
+; Test these 5 bitreverse store intrinsics:
+; Q6_bitrev_store_update_D(inputLR, pDelay, nConvLength);
+; Q6_bitrev_store_update_W(inputLR, pDelay, nConvLength);
+; Q6_bitrev_store_update_HL(inputLR, pDelay, nConvLength);
+; Q6_bitrev_store_update_HH(inputLR, pDelay, nConvLength);
+; Q6_bitrev_store_update_B(inputLR, pDelay, nConvLength);
+; producing these instructions:
+; memd(r0++m0:brev) = r1:0
+; memw(r0++m0:brev) = r0
+; memh(r0++m0:brev) = r3
+; memh(r0++m0:brev) = r3.h
+; memb(r0++m0:brev) = r3
+
+; ModuleID = 'brev_st.i'
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define i64 @foo(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %conv = zext i16 %filtMemLen to i32
+ %shr2 = lshr i32 %conv, 1
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %sub = sub i32 13, %shr2
+ %shl = shl i32 1, %sub
+; CHECK: memd(r{{[0-9]*}} ++ m{{[0-1]}}:brev)
+ %1 = tail call i8* @llvm.hexagon.brev.std(i8* %0, i64 undef, i32 %shl)
+ %2 = bitcast i8* %1 to i64*
+ %3 = load i64, i64* %2, align 8, !tbaa !0
+ ret i64 %3
+}
+
+declare i8* @llvm.hexagon.brev.std(i8*, i64, i32) nounwind
+
+define i32 @foo1(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %conv = zext i16 %filtMemLen to i32
+ %shr1 = lshr i32 %conv, 1
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %sub = sub i32 14, %shr1
+ %shl = shl i32 1, %sub
+; CHECK: memw(r{{[0-9]*}} ++ m{{[0-1]}}:brev)
+ %1 = tail call i8* @llvm.hexagon.brev.stw(i8* %0, i32 undef, i32 %shl)
+ %2 = bitcast i8* %1 to i32*
+ %3 = load i32, i32* %2, align 4, !tbaa !2
+ ret i32 %3
+}
+
+declare i8* @llvm.hexagon.brev.stw(i8*, i32, i32) nounwind
+
+define signext i16 @foo2(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %conv = zext i16 %filtMemLen to i32
+ %shr2 = lshr i32 %conv, 1
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %sub = sub i32 15, %shr2
+ %shl = shl i32 1, %sub
+; CHECK: memh(r{{[0-9]*}} ++ m{{[0-1]}}:brev)
+ %1 = tail call i8* @llvm.hexagon.brev.sth(i8* %0, i32 0, i32 %shl)
+ %2 = bitcast i8* %1 to i16*
+ %3 = load i16, i16* %2, align 2, !tbaa !3
+ ret i16 %3
+}
+
+declare i8* @llvm.hexagon.brev.sth(i8*, i32, i32) nounwind
+
+define signext i16 @foo3(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %conv = zext i16 %filtMemLen to i32
+ %shr2 = lshr i32 %conv, 1
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %sub = sub i32 15, %shr2
+ %shl = shl i32 1, %sub
+; CHECK: memh(r{{[0-9]*}} ++ m{{[0-1]}}:brev){{ *}}={{ *}}r{{[0-9]*}}.h
+ %1 = tail call i8* @llvm.hexagon.brev.sthhi(i8* %0, i32 0, i32 %shl)
+ %2 = bitcast i8* %1 to i16*
+ %3 = load i16, i16* %2, align 2, !tbaa !3
+ ret i16 %3
+}
+
+declare i8* @llvm.hexagon.brev.sthhi(i8*, i32, i32) nounwind
+
+define zeroext i8 @foo5(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %conv = zext i16 %filtMemLen to i32
+ %shr2 = lshr i32 %conv, 1
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %sub = sub nsw i32 16, %shr2
+ ; CHECK: memb(r{{[0-9]*}} ++ m{{[0-1]}}:brev)
+ %shl = shl i32 1, %sub
+ %1 = tail call i8* @llvm.hexagon.brev.stb(i8* %0, i32 0, i32 %shl)
+ %2 = load i8, i8* %1, align 1, !tbaa !0
+ ret i8 %2
+}
+
+declare i8* @llvm.hexagon.brev.stb(i8*, i32, i32) nounwind
+
+!0 = !{!"omnipotent char", !1}
+!1 = !{!"Simple C/C++ TBAA"}
+!2 = !{!"int", !0}
+!3 = !{!"short", !0}
diff --git a/test/CodeGen/Hexagon/calling-conv-2.ll b/test/CodeGen/Hexagon/calling-conv-2.ll
new file mode 100644
index 000000000000..3c68c88bd711
--- /dev/null
+++ b/test/CodeGen/Hexagon/calling-conv-2.ll
@@ -0,0 +1,13 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 <%s | \
+; RUN: FileCheck %s --check-prefix=CHECK-ONE
+
+%struct.test_struct = type { i32, i8, i64 }
+
+; CHECK-ONE: r1 = #45
+define void @foo(%struct.test_struct* noalias nocapture sret %agg.result, i32 %a) #0 {
+entry:
+ call void @bar(%struct.test_struct* sret %agg.result, i32 45) #2
+ ret void
+}
+
+declare void @bar(%struct.test_struct* sret, i32) #1
diff --git a/test/CodeGen/Hexagon/calling-conv.ll b/test/CodeGen/Hexagon/calling-conv.ll
new file mode 100644
index 000000000000..7133c1ae7aad
--- /dev/null
+++ b/test/CodeGen/Hexagon/calling-conv.ll
@@ -0,0 +1,73 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 <%s | \
+; RUN: FileCheck %s --check-prefix=CHECK-ONE
+; RUN: llc -march=hexagon -mcpu=hexagonv5 <%s | \
+; RUN: FileCheck %s --check-prefix=CHECK-TWO
+; RUN: llc -march=hexagon -mcpu=hexagonv5 <%s | \
+; RUN: FileCheck %s --check-prefix=CHECK-THREE
+
+%struct.test_struct = type { i32, i8, i64 }
+%struct.test_struct_long = type { i8, i64 }
+
+@mystruct = external global %struct.test_struct*, align 4
+
+; CHECK-ONE: memw(r29+#48) = r2
+; CHECK-TWO: memw(r29+#52) = r2
+; CHECK-THREE: memw(r29+#56) = r2
+; Function Attrs: nounwind
+define void @foo(%struct.test_struct* noalias sret %agg.result, i32 %a, i8 zeroext %c, %struct.test_struct* byval %s, %struct.test_struct_long* byval %t) #0 {
+entry:
+ %a.addr = alloca i32, align 4
+ %c.addr = alloca i8, align 1
+ %z = alloca i32, align 4
+ %ret = alloca %struct.test_struct, align 8
+ store i32 %a, i32* %a.addr, align 4
+ store i8 %c, i8* %c.addr, align 1
+ %0 = bitcast i32* %z to i8*
+ call void @llvm.lifetime.start(i64 4, i8* %0) #1
+ store i32 45, i32* %z, align 4
+ %1 = bitcast %struct.test_struct* %ret to i8*
+ call void @llvm.lifetime.start(i64 16, i8* %1) #1
+ %2 = load i32, i32* %z, align 4
+ %3 = load %struct.test_struct*, %struct.test_struct** @mystruct, align 4
+ %4 = load %struct.test_struct*, %struct.test_struct** @mystruct, align 4
+ %5 = load i8, i8* %c.addr, align 1
+ %6 = load i32, i32* %a.addr, align 4
+ %conv = sext i32 %6 to i64
+ %add = add nsw i64 %conv, 1
+ %7 = load i32, i32* %a.addr, align 4
+ %add1 = add nsw i32 %7, 2
+ %8 = load i32, i32* %a.addr, align 4
+ %conv2 = sext i32 %8 to i64
+ %add3 = add nsw i64 %conv2, 3
+ %9 = load i8, i8* %c.addr, align 1
+ %10 = load i8, i8* %c.addr, align 1
+ %11 = load i8, i8* %c.addr, align 1
+ %12 = load i32, i32* %z, align 4
+ call void @bar(%struct.test_struct* sret %ret, i32 %2, %struct.test_struct* byval %3, %struct.test_struct* byval %4, i8 zeroext %5, i64 %add, i32 %add1, i64 %add3, i8 zeroext %9, i8 zeroext %10, i8 zeroext %11, i32 %12)
+ %x = getelementptr inbounds %struct.test_struct, %struct.test_struct* %ret, i32 0, i32 0
+ store i32 20, i32* %x, align 4
+ %13 = bitcast %struct.test_struct* %agg.result to i8*
+ %14 = bitcast %struct.test_struct* %ret to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %13, i8* %14, i32 16, i32 8, i1 false)
+ %15 = bitcast %struct.test_struct* %ret to i8*
+ call void @llvm.lifetime.end(i64 16, i8* %15) #1
+ %16 = bitcast i32* %z to i8*
+ call void @llvm.lifetime.end(i64 4, i8* %16) #1
+ ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+declare void @bar(%struct.test_struct* sret, i32, %struct.test_struct* byval, %struct.test_struct* byval, i8 zeroext, i64, i32, i64, i8 zeroext, i8 zeroext, i8 zeroext, i32) #2
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv4" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv4" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
diff --git a/test/CodeGen/Hexagon/cext-check.ll b/test/CodeGen/Hexagon/cext-check.ll
index b7181d803f71..19b91c5245b2 100644
--- a/test/CodeGen/Hexagon/cext-check.ll
+++ b/test/CodeGen/Hexagon/cext-check.ll
@@ -7,19 +7,19 @@ define i32 @cext_test1(i32* %a) nounwind {
; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}{{ *}}+{{ *}}##4092)
; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##300)
entry:
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%tobool = icmp ne i32 %0, 0
br i1 %tobool, label %if.then, label %if.end
if.then:
- %arrayidx1 = getelementptr inbounds i32* %a, i32 2000
- %1 = load i32* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %a, i32 2000
+ %1 = load i32, i32* %arrayidx1, align 4
%add = add nsw i32 %1, 300000
br label %return
if.end:
- %arrayidx2 = getelementptr inbounds i32* %a, i32 1023
- %2 = load i32* %arrayidx2, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1023
+ %2 = load i32, i32* %arrayidx2, align 4
%add3 = add nsw i32 %2, 300
br label %return
@@ -38,15 +38,15 @@ entry:
br i1 %tobool, label %if.then, label %if.end
if.then:
- %arrayidx = getelementptr inbounds i8* %a, i32 1023
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %a, i32 1023
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 300000
br label %return
if.end:
- %arrayidx1 = getelementptr inbounds i8* %a, i32 1024
- %1 = load i8* %arrayidx1, align 1
+ %arrayidx1 = getelementptr inbounds i8, i8* %a, i32 1024
+ %1 = load i8, i8* %arrayidx1, align 1
%conv2 = zext i8 %1 to i32
%add3 = add nsw i32 %conv2, 6000
br label %return
diff --git a/test/CodeGen/Hexagon/cext-valid-packet2.ll b/test/CodeGen/Hexagon/cext-valid-packet2.ll
index 2788a6b1c865..2eba74329960 100644
--- a/test/CodeGen/Hexagon/cext-valid-packet2.ll
+++ b/test/CodeGen/Hexagon/cext-valid-packet2.ll
@@ -10,31 +10,31 @@
define i32 @test(i32* nocapture %a, i32* nocapture %b, i32 %c) nounwind {
entry:
%add = add nsw i32 %c, 200002
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%add1 = add nsw i32 %0, 200000
- %arrayidx2 = getelementptr inbounds i32* %a, i32 3000
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 3000
store i32 %add1, i32* %arrayidx2, align 4
- %1 = load i32* %b, align 4
+ %1 = load i32, i32* %b, align 4
%add4 = add nsw i32 %1, 200001
- %arrayidx5 = getelementptr inbounds i32* %a, i32 1
+ %arrayidx5 = getelementptr inbounds i32, i32* %a, i32 1
store i32 %add4, i32* %arrayidx5, align 4
- %arrayidx7 = getelementptr inbounds i32* %b, i32 1
- %2 = load i32* %arrayidx7, align 4
+ %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 1
+ %2 = load i32, i32* %arrayidx7, align 4
%cmp = icmp sgt i32 %add4, %2
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
- %arrayidx8 = getelementptr inbounds i32* %a, i32 2
- %3 = load i32* %arrayidx8, align 4
- %arrayidx9 = getelementptr inbounds i32* %b, i32 2000
- %4 = load i32* %arrayidx9, align 4
+ %arrayidx8 = getelementptr inbounds i32, i32* %a, i32 2
+ %3 = load i32, i32* %arrayidx8, align 4
+ %arrayidx9 = getelementptr inbounds i32, i32* %b, i32 2000
+ %4 = load i32, i32* %arrayidx9, align 4
%sub = sub nsw i32 %3, %4
- %arrayidx10 = getelementptr inbounds i32* %a, i32 4000
+ %arrayidx10 = getelementptr inbounds i32, i32* %a, i32 4000
store i32 %sub, i32* %arrayidx10, align 4
br label %if.end
if.else: ; preds = %entry
- %arrayidx11 = getelementptr inbounds i32* %b, i32 3200
+ %arrayidx11 = getelementptr inbounds i32, i32* %b, i32 3200
store i32 %add, i32* %arrayidx11, align 4
br label %if.end
diff --git a/test/CodeGen/Hexagon/circ_ld.ll b/test/CodeGen/Hexagon/circ_ld.ll
new file mode 100644
index 000000000000..6d372403ca7a
--- /dev/null
+++ b/test/CodeGen/Hexagon/circ_ld.ll
@@ -0,0 +1,135 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; Testing for these 6 variants of circular load:
+; Q6_circ_load_update_B(inputLR, pDelay, -1, nConvLength, 4);
+; Q6_circ_load_update_D(inputLR, pDelay, -1, nConvLength, 4);
+; Q6_circ_load_update_H(inputLR, pDelay, -1, nConvLength, 4);
+; Q6_circ_load_update_UB(inputLR, pDelay, -1, nConvLength, 4);
+; Q6_circ_load_update_UH(inputLR, pDelay, -1, nConvLength, 4);
+; Q6_circ_load_update_W(inputLR, pDelay, -1, nConvLength, 4);
+; producing these:
+; r0 = memb(r1++#-1:circ(m0))
+; r3:2 = memd(r1++#-8:circ(m0))
+; r0 = memh(r1++#-2:circ(m0))
+; r0 = memub(r1++#-1:circ(m0))
+; r0 = memuh(r1++#-2:circ(m0))
+; r0 = memw(r1++#-4:circ(m0))
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define zeroext i8 @foo1(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %inputLR = alloca i8, align 1
+ %conv = zext i16 %filtMemLen to i32
+ %shr1 = lshr i32 %conv, 1
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %or = or i32 %shr1, 33554432
+; CHECK: memb(r{{[0-9]*.}}++{{.}}#-1:circ(m{{[0-1]}}))
+ %1 = call i8* @llvm.hexagon.circ.ldb(i8* %0, i8* %inputLR, i32 %or, i32 -1)
+ %2 = load i8, i8* %1, align 1, !tbaa !0
+ ret i8 %2
+}
+
+declare i8* @llvm.hexagon.circ.ldb(i8*, i8*, i32, i32) nounwind
+
+define i64 @foo2(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %inputLR = alloca i64, align 8
+ %conv = zext i16 %filtMemLen to i32
+ %shr1 = lshr i32 %conv, 1
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %1 = bitcast i64* %inputLR to i8*
+ %shl = shl nuw nsw i32 %shr1, 3
+ %or = or i32 %shl, 83886080
+; CHECK: memd(r{{[0-9]*.}}++{{.}}#-8:circ(m{{[0-1]}}))
+ %2 = call i8* @llvm.hexagon.circ.ldd(i8* %0, i8* %1, i32 %or, i32 -8)
+ %3 = bitcast i8* %2 to i64*
+ %4 = load i64, i64* %3, align 8, !tbaa !0
+ ret i64 %4
+}
+
+declare i8* @llvm.hexagon.circ.ldd(i8*, i8*, i32, i32) nounwind
+
+define signext i16 @foo3(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %inputLR = alloca i16, align 2
+ %conv = zext i16 %filtMemLen to i32
+ %shr1 = and i32 %conv, 65534
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %1 = bitcast i16* %inputLR to i8*
+ %or = or i32 %shr1, 50331648
+; CHECK: memh(r{{[0-9]*.}}++{{.}}#-2:circ(m{{[0-1]}}))
+ %2 = call i8* @llvm.hexagon.circ.ldh(i8* %0, i8* %1, i32 %or, i32 -2)
+ %3 = bitcast i8* %2 to i16*
+ %4 = load i16, i16* %3, align 2, !tbaa !2
+ ret i16 %4
+}
+
+declare i8* @llvm.hexagon.circ.ldh(i8*, i8*, i32, i32) nounwind
+
+define zeroext i8 @foo4(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %inputLR = alloca i8, align 1
+ %conv = zext i16 %filtMemLen to i32
+ %shr1 = lshr i32 %conv, 1
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %or = or i32 %shr1, 33554432
+; CHECK: memub(r{{[0-9]*.}}++{{.}}#-1:circ(m{{[0-1]}}))
+ %1 = call i8* @llvm.hexagon.circ.ldub(i8* %0, i8* %inputLR, i32 %or, i32 -1)
+ %2 = load i8, i8* %1, align 1, !tbaa !0
+ ret i8 %2
+}
+
+declare i8* @llvm.hexagon.circ.ldub(i8*, i8*, i32, i32) nounwind
+
+define zeroext i16 @foo5(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %inputLR = alloca i16, align 2
+ %conv = zext i16 %filtMemLen to i32
+ %shr1 = and i32 %conv, 65534
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %1 = bitcast i16* %inputLR to i8*
+ %or = or i32 %shr1, 50331648
+; CHECK: memuh(r{{[0-9]*.}}++{{.}}#-2:circ(m{{[0-1]}}))
+ %2 = call i8* @llvm.hexagon.circ.lduh(i8* %0, i8* %1, i32 %or, i32 -2)
+ %3 = bitcast i8* %2 to i16*
+ %4 = load i16, i16* %3, align 2, !tbaa !2
+ ret i16 %4
+}
+
+declare i8* @llvm.hexagon.circ.lduh(i8*, i8*, i32, i32) nounwind
+
+define i32 @foo6(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %inputLR = alloca i32, align 4
+ %conv = zext i16 %filtMemLen to i32
+ %shr1 = lshr i32 %conv, 1
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %1 = bitcast i32* %inputLR to i8*
+ %shl = shl nuw nsw i32 %shr1, 2
+ %or = or i32 %shl, 67108864
+; CHECK: memw(r{{[0-9]*.}}++{{.}}#-4:circ(m{{[0-1]}}))
+ %2 = call i8* @llvm.hexagon.circ.ldw(i8* %0, i8* %1, i32 %or, i32 -4)
+ %3 = bitcast i8* %2 to i32*
+ %4 = load i32, i32* %3, align 4, !tbaa !3
+ ret i32 %4
+}
+
+declare i8* @llvm.hexagon.circ.ldw(i8*, i8*, i32, i32) nounwind
+
+!0 = !{!"omnipotent char", !1}
+!1 = !{!"Simple C/C++ TBAA"}
+!2 = !{!"short", !0}
+!3 = !{!"int", !0}
diff --git a/test/CodeGen/Hexagon/circ_ldd_bug.ll b/test/CodeGen/Hexagon/circ_ldd_bug.ll
new file mode 100644
index 000000000000..d15b5c964eb7
--- /dev/null
+++ b/test/CodeGen/Hexagon/circ_ldd_bug.ll
@@ -0,0 +1,255 @@
+; RUN: llc -O2 < %s
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+; We would fail on this file with:
+; Unimplemented
+; UNREACHABLE executed at llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp:615!
+; This happened because after unrolling a loop with a ldd_circ instruction we
+; would have several TFCR and ldd_circ instruction sequences.
+; %vreg0 (CRRegs) = TFCR %vreg0 (IntRegs)
+; = ldd_circ( , , vreg0)
+; %vreg1 (CRRegs) = TFCR %vreg1 (IntRegs)
+; = ldd_circ( , , vreg0)
+; The scheduler would move the CRRegs to the top of the loop. The allocator
+; would try to spill the CRRegs after running out of them. We don't have code to
+; spill CRRegs and the above assertion would be triggered.
+declare i8* @llvm.hexagon.circ.ldd(i8*, i8*, i32, i32) nounwind
+
+define i32 @test(i16 zeroext %var0, i16* %var1, i16 signext %var2, i16* nocapture %var3) nounwind {
+entry:
+ %var4 = alloca i64, align 8
+ %conv = zext i16 %var0 to i32
+ %shr5 = lshr i32 %conv, 1
+ %idxprom = sext i16 %var2 to i32
+ %arrayidx = getelementptr inbounds i16, i16* %var1, i32 %idxprom
+ %0 = bitcast i16* %var3 to i64*
+ %1 = load i64, i64* %0, align 8, !tbaa !1
+ %2 = bitcast i16* %arrayidx to i8*
+ %3 = bitcast i64* %var4 to i8*
+ %shl = shl nuw nsw i32 %shr5, 3
+ %or = or i32 %shl, 83886080
+ %4 = call i8* @llvm.hexagon.circ.ldd(i8* %2, i8* %3, i32 %or, i32 -8)
+ %sub = add nsw i32 %shr5, -1
+ %cmp6 = icmp sgt i32 %sub, 0
+ %5 = load i64, i64* %var4, align 8, !tbaa !1
+ %6 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 0, i64 %1, i64 %5)
+ br i1 %cmp6, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry
+ %incdec.ptr = getelementptr inbounds i16, i16* %var3, i32 4
+ %7 = bitcast i16* %incdec.ptr to i64*
+ %8 = zext i16 %var0 to i32
+ %9 = lshr i32 %8, 1
+ %10 = add i32 %9, -1
+ %xtraiter = urem i32 %10, 8
+ %lcmp = icmp ne i32 %xtraiter, 0
+ br i1 %lcmp, label %unr.cmp60, label %for.body.lr.ph.split.split
+
+unr.cmp60: ; preds = %for.body.lr.ph
+ %un.tmp61 = icmp eq i32 %xtraiter, 1
+ br i1 %un.tmp61, label %for.body.unr53, label %unr.cmp51
+
+unr.cmp51: ; preds = %unr.cmp60
+ %un.tmp52 = icmp eq i32 %xtraiter, 2
+ br i1 %un.tmp52, label %for.body.unr44, label %unr.cmp42
+
+unr.cmp42: ; preds = %unr.cmp51
+ %un.tmp43 = icmp eq i32 %xtraiter, 3
+ br i1 %un.tmp43, label %for.body.unr35, label %unr.cmp33
+
+unr.cmp33: ; preds = %unr.cmp42
+ %un.tmp34 = icmp eq i32 %xtraiter, 4
+ br i1 %un.tmp34, label %for.body.unr26, label %unr.cmp24
+
+unr.cmp24: ; preds = %unr.cmp33
+ %un.tmp25 = icmp eq i32 %xtraiter, 5
+ br i1 %un.tmp25, label %for.body.unr17, label %unr.cmp
+
+unr.cmp: ; preds = %unr.cmp24
+ %un.tmp = icmp eq i32 %xtraiter, 6
+ br i1 %un.tmp, label %for.body.unr13, label %for.body.unr
+
+for.body.unr: ; preds = %unr.cmp
+ %11 = call i8* @llvm.hexagon.circ.ldd(i8* %4, i8* %3, i32 %or, i32 -8)
+ %12 = load i64, i64* %7, align 8, !tbaa !1
+ %inc.unr = add nsw i32 0, 1
+ %incdec.ptr4.unr = getelementptr inbounds i64, i64* %7, i32 1
+ %cmp.unr = icmp slt i32 %inc.unr, %sub
+ %13 = load i64, i64* %var4, align 8, !tbaa !1
+ %14 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %6, i64 %12, i64 %13)
+ br label %for.body.unr13
+
+for.body.unr13: ; preds = %for.body.unr, %unr.cmp
+ %15 = phi i64 [ %6, %unr.cmp ], [ %14, %for.body.unr ]
+ %pvar6.09.unr = phi i64* [ %7, %unr.cmp ], [ %incdec.ptr4.unr, %for.body.unr ]
+ %var8.0.in8.unr = phi i8* [ %4, %unr.cmp ], [ %11, %for.body.unr ]
+ %i.07.unr = phi i32 [ 0, %unr.cmp ], [ %inc.unr, %for.body.unr ]
+ %16 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8.unr, i8* %3, i32 %or, i32 -8)
+ %17 = load i64, i64* %pvar6.09.unr, align 8, !tbaa !1
+ %inc.unr14 = add nsw i32 %i.07.unr, 1
+ %incdec.ptr4.unr15 = getelementptr inbounds i64, i64* %pvar6.09.unr, i32 1
+ %cmp.unr16 = icmp slt i32 %inc.unr14, %sub
+ %18 = load i64, i64* %var4, align 8, !tbaa !1
+ %19 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %15, i64 %17, i64 %18)
+ br label %for.body.unr17
+
+for.body.unr17: ; preds = %for.body.unr13, %unr.cmp24
+ %20 = phi i64 [ %6, %unr.cmp24 ], [ %19, %for.body.unr13 ]
+ %pvar6.09.unr18 = phi i64* [ %7, %unr.cmp24 ], [ %incdec.ptr4.unr15, %for.body.unr13 ]
+ %var8.0.in8.unr19 = phi i8* [ %4, %unr.cmp24 ], [ %16, %for.body.unr13 ]
+ %i.07.unr20 = phi i32 [ 0, %unr.cmp24 ], [ %inc.unr14, %for.body.unr13 ]
+ %21 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8.unr19, i8* %3, i32 %or, i32 -8)
+ %22 = load i64, i64* %pvar6.09.unr18, align 8, !tbaa !1
+ %inc.unr21 = add nsw i32 %i.07.unr20, 1
+ %incdec.ptr4.unr22 = getelementptr inbounds i64, i64* %pvar6.09.unr18, i32 1
+ %cmp.unr23 = icmp slt i32 %inc.unr21, %sub
+ %23 = load i64, i64* %var4, align 8, !tbaa !1
+ %24 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %20, i64 %22, i64 %23)
+ br label %for.body.unr26
+
+for.body.unr26: ; preds = %for.body.unr17, %unr.cmp33
+ %25 = phi i64 [ %6, %unr.cmp33 ], [ %24, %for.body.unr17 ]
+ %pvar6.09.unr27 = phi i64* [ %7, %unr.cmp33 ], [ %incdec.ptr4.unr22, %for.body.unr17 ]
+ %var8.0.in8.unr28 = phi i8* [ %4, %unr.cmp33 ], [ %21, %for.body.unr17 ]
+ %i.07.unr29 = phi i32 [ 0, %unr.cmp33 ], [ %inc.unr21, %for.body.unr17 ]
+ %26 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8.unr28, i8* %3, i32 %or, i32 -8)
+ %27 = load i64, i64* %pvar6.09.unr27, align 8, !tbaa !1
+ %inc.unr30 = add nsw i32 %i.07.unr29, 1
+ %incdec.ptr4.unr31 = getelementptr inbounds i64, i64* %pvar6.09.unr27, i32 1
+ %cmp.unr32 = icmp slt i32 %inc.unr30, %sub
+ %28 = load i64, i64* %var4, align 8, !tbaa !1
+ %29 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %25, i64 %27, i64 %28)
+ br label %for.body.unr35
+
+for.body.unr35: ; preds = %for.body.unr26, %unr.cmp42
+ %30 = phi i64 [ %6, %unr.cmp42 ], [ %29, %for.body.unr26 ]
+ %pvar6.09.unr36 = phi i64* [ %7, %unr.cmp42 ], [ %incdec.ptr4.unr31, %for.body.unr26 ]
+ %var8.0.in8.unr37 = phi i8* [ %4, %unr.cmp42 ], [ %26, %for.body.unr26 ]
+ %i.07.unr38 = phi i32 [ 0, %unr.cmp42 ], [ %inc.unr30, %for.body.unr26 ]
+ %31 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8.unr37, i8* %3, i32 %or, i32 -8)
+ %32 = load i64, i64* %pvar6.09.unr36, align 8, !tbaa !1
+ %inc.unr39 = add nsw i32 %i.07.unr38, 1
+ %incdec.ptr4.unr40 = getelementptr inbounds i64, i64* %pvar6.09.unr36, i32 1
+ %cmp.unr41 = icmp slt i32 %inc.unr39, %sub
+ %33 = load i64, i64* %var4, align 8, !tbaa !1
+ %34 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %30, i64 %32, i64 %33)
+ br label %for.body.unr44
+
+for.body.unr44: ; preds = %for.body.unr35, %unr.cmp51
+ %35 = phi i64 [ %6, %unr.cmp51 ], [ %34, %for.body.unr35 ]
+ %pvar6.09.unr45 = phi i64* [ %7, %unr.cmp51 ], [ %incdec.ptr4.unr40, %for.body.unr35 ]
+ %var8.0.in8.unr46 = phi i8* [ %4, %unr.cmp51 ], [ %31, %for.body.unr35 ]
+ %i.07.unr47 = phi i32 [ 0, %unr.cmp51 ], [ %inc.unr39, %for.body.unr35 ]
+ %36 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8.unr46, i8* %3, i32 %or, i32 -8)
+ %37 = load i64, i64* %pvar6.09.unr45, align 8, !tbaa !1
+ %inc.unr48 = add nsw i32 %i.07.unr47, 1
+ %incdec.ptr4.unr49 = getelementptr inbounds i64, i64* %pvar6.09.unr45, i32 1
+ %cmp.unr50 = icmp slt i32 %inc.unr48, %sub
+ %38 = load i64, i64* %var4, align 8, !tbaa !1
+ %39 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %35, i64 %37, i64 %38)
+ br label %for.body.unr53
+
+for.body.unr53: ; preds = %for.body.unr44, %unr.cmp60
+ %40 = phi i64 [ %6, %unr.cmp60 ], [ %39, %for.body.unr44 ]
+ %pvar6.09.unr54 = phi i64* [ %7, %unr.cmp60 ], [ %incdec.ptr4.unr49, %for.body.unr44 ]
+ %var8.0.in8.unr55 = phi i8* [ %4, %unr.cmp60 ], [ %36, %for.body.unr44 ]
+ %i.07.unr56 = phi i32 [ 0, %unr.cmp60 ], [ %inc.unr48, %for.body.unr44 ]
+ %41 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8.unr55, i8* %3, i32 %or, i32 -8)
+ %42 = load i64, i64* %pvar6.09.unr54, align 8, !tbaa !1
+ %inc.unr57 = add nsw i32 %i.07.unr56, 1
+ %incdec.ptr4.unr58 = getelementptr inbounds i64, i64* %pvar6.09.unr54, i32 1
+ %cmp.unr59 = icmp slt i32 %inc.unr57, %sub
+ %43 = load i64, i64* %var4, align 8, !tbaa !1
+ %44 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %40, i64 %42, i64 %43)
+ br label %for.body.lr.ph.split
+
+for.body.lr.ph.split: ; preds = %for.body.unr53
+ %45 = icmp ult i32 %10, 8
+ br i1 %45, label %for.end.loopexit, label %for.body.lr.ph.split.split
+
+for.body.lr.ph.split.split: ; preds = %for.body.lr.ph.split, %for.body.lr.ph
+ %.unr = phi i64 [ %44, %for.body.lr.ph.split ], [ %6, %for.body.lr.ph ]
+ %pvar6.09.unr62 = phi i64* [ %incdec.ptr4.unr58, %for.body.lr.ph.split ], [ %7, %for.body.lr.ph ]
+ %var8.0.in8.unr63 = phi i8* [ %41, %for.body.lr.ph.split ], [ %4, %for.body.lr.ph ]
+ %i.07.unr64 = phi i32 [ %inc.unr57, %for.body.lr.ph.split ], [ 0, %for.body.lr.ph ]
+ %.lcssa12.unr = phi i64 [ %44, %for.body.lr.ph.split ], [ 0, %for.body.lr.ph ]
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.lr.ph.split.split
+ %46 = phi i64 [ %.unr, %for.body.lr.ph.split.split ], [ %78, %for.body ]
+ %pvar6.09 = phi i64* [ %pvar6.09.unr62, %for.body.lr.ph.split.split ], [ %scevgep71, %for.body ]
+ %var8.0.in8 = phi i8* [ %var8.0.in8.unr63, %for.body.lr.ph.split.split ], [ %75, %for.body ]
+ %i.07 = phi i32 [ %i.07.unr64, %for.body.lr.ph.split.split ], [ %inc.7, %for.body ]
+ %47 = call i8* @llvm.hexagon.circ.ldd(i8* %var8.0.in8, i8* %3, i32 %or, i32 -8)
+ %48 = load i64, i64* %pvar6.09, align 8, !tbaa !1
+ %inc = add nsw i32 %i.07, 1
+ %49 = load i64, i64* %var4, align 8, !tbaa !1
+ %50 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %46, i64 %48, i64 %49)
+ %51 = call i8* @llvm.hexagon.circ.ldd(i8* %47, i8* %3, i32 %or, i32 -8)
+ %scevgep = getelementptr i64, i64* %pvar6.09, i32 1
+ %52 = load i64, i64* %scevgep, align 8, !tbaa !1
+ %inc.1 = add nsw i32 %inc, 1
+ %53 = load i64, i64* %var4, align 8, !tbaa !1
+ %54 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %50, i64 %52, i64 %53)
+ %55 = call i8* @llvm.hexagon.circ.ldd(i8* %51, i8* %3, i32 %or, i32 -8)
+ %scevgep65 = getelementptr i64, i64* %scevgep, i32 1
+ %56 = load i64, i64* %scevgep65, align 8, !tbaa !1
+ %inc.2 = add nsw i32 %inc.1, 1
+ %57 = load i64, i64* %var4, align 8, !tbaa !1
+ %58 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %54, i64 %56, i64 %57)
+ %59 = call i8* @llvm.hexagon.circ.ldd(i8* %55, i8* %3, i32 %or, i32 -8)
+ %scevgep66 = getelementptr i64, i64* %scevgep65, i32 1
+ %60 = load i64, i64* %scevgep66, align 8, !tbaa !1
+ %inc.3 = add nsw i32 %inc.2, 1
+ %61 = load i64, i64* %var4, align 8, !tbaa !1
+ %62 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %58, i64 %60, i64 %61)
+ %63 = call i8* @llvm.hexagon.circ.ldd(i8* %59, i8* %3, i32 %or, i32 -8)
+ %scevgep67 = getelementptr i64, i64* %scevgep66, i32 1
+ %64 = load i64, i64* %scevgep67, align 8, !tbaa !1
+ %inc.4 = add nsw i32 %inc.3, 1
+ %65 = load i64, i64* %var4, align 8, !tbaa !1
+ %66 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %62, i64 %64, i64 %65)
+ %67 = call i8* @llvm.hexagon.circ.ldd(i8* %63, i8* %3, i32 %or, i32 -8)
+ %scevgep68 = getelementptr i64, i64* %scevgep67, i32 1
+ %68 = load i64, i64* %scevgep68, align 8, !tbaa !1
+ %inc.5 = add nsw i32 %inc.4, 1
+ %69 = load i64, i64* %var4, align 8, !tbaa !1
+ %70 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %66, i64 %68, i64 %69)
+ %71 = call i8* @llvm.hexagon.circ.ldd(i8* %67, i8* %3, i32 %or, i32 -8)
+ %scevgep69 = getelementptr i64, i64* %scevgep68, i32 1
+ %72 = load i64, i64* %scevgep69, align 8, !tbaa !1
+ %inc.6 = add nsw i32 %inc.5, 1
+ %73 = load i64, i64* %var4, align 8, !tbaa !1
+ %74 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %70, i64 %72, i64 %73)
+ %75 = call i8* @llvm.hexagon.circ.ldd(i8* %71, i8* %3, i32 %or, i32 -8)
+ %scevgep70 = getelementptr i64, i64* %scevgep69, i32 1
+ %76 = load i64, i64* %scevgep70, align 8, !tbaa !1
+ %inc.7 = add nsw i32 %inc.6, 1
+ %77 = load i64, i64* %var4, align 8, !tbaa !1
+ %78 = call i64 @llvm.hexagon.M2.vdmacs.s1(i64 %74, i64 %76, i64 %77)
+ %cmp.7 = icmp slt i32 %inc.7, %sub
+ %scevgep71 = getelementptr i64, i64* %scevgep70, i32 1
+ br i1 %cmp.7, label %for.body, label %for.end.loopexit.unr-lcssa
+
+for.end.loopexit.unr-lcssa: ; preds = %for.body
+ %.lcssa12.ph = phi i64 [ %78, %for.body ]
+ br label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.end.loopexit.unr-lcssa, %for.body.lr.ph.split
+ %.lcssa12 = phi i64 [ %44, %for.body.lr.ph.split ], [ %.lcssa12.ph, %for.end.loopexit.unr-lcssa ]
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %.lcssa = phi i64 [ %6, %entry ], [ %.lcssa12, %for.end.loopexit ]
+ %79 = call i32 @llvm.hexagon.S2.vrndpackwhs(i64 %.lcssa)
+ ret i32 %79
+}
+
+declare i64 @llvm.hexagon.M2.vdmacs.s1(i64, i64, i64) nounwind readnone
+
+declare i32 @llvm.hexagon.S2.vrndpackwhs(i64) nounwind readnone
+
+!0 = !{!"long long", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/circ_ldw.ll b/test/CodeGen/Hexagon/circ_ldw.ll
new file mode 100644
index 000000000000..4511a9cf69da
--- /dev/null
+++ b/test/CodeGen/Hexagon/circ_ldw.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; CHECK: r{{[0-9]*}} = memw(r{{[0-9]*.}}++{{.}}#-4:circ(m0))
+
+
+%union.vect64 = type { i64 }
+%union.vect32 = type { i32 }
+
+define i32* @HallowedBeThyName(%union.vect64* nocapture %pRx, %union.vect32* %pLut, %union.vect64* nocapture %pOut, i64 %dc.coerce, i32 %shift, i32 %numSamples) nounwind {
+entry:
+ %vLutNext = alloca i32, align 4
+ %0 = bitcast %union.vect32* %pLut to i8*
+ %1 = bitcast i32* %vLutNext to i8*
+ %2 = call i8* @llvm.hexagon.circ.ldw(i8* %0, i8* %1, i32 83886144, i32 -4)
+ %3 = bitcast i8* %2 to i32*
+ ret i32* %3
+}
+
+declare i8* @llvm.hexagon.circ.ldw(i8*, i8*, i32, i32) nounwind
diff --git a/test/CodeGen/Hexagon/circ_st.ll b/test/CodeGen/Hexagon/circ_st.ll
new file mode 100644
index 000000000000..244ca3bae714
--- /dev/null
+++ b/test/CodeGen/Hexagon/circ_st.ll
@@ -0,0 +1,108 @@
+; RUN: llc -march=hexagon -verify-machineinstrs=true < %s | FileCheck %s
+; Testing for these 5 variants of circular store:
+; Q6_circ_store_update_B(inputLR, pDelay, -1, nConvLength, 4);
+; Q6_circ_store_update_D(inputLR, pDelay, -1, nConvLength, 4);
+; Q6_circ_store_update_HL(inputLR, pDelay, -1, nConvLength, 4);
+; Q6_circ_store_update_HH(inputLR, pDelay, -1, nConvLength, 4);
+; Q6_circ_store_update_W(inputLR, pDelay, -1, nConvLength, 4);
+; producing these
+; memb(r1++#-1:circ(m0)) = r3
+; memd(r1++#-8:circ(m0)) = r1:0
+; memh(r1++#-2:circ(m0)) = r3
+; memh(r1++#-2:circ(m0)) = r3.h
+; memw(r1++#-4:circ(m0)) = r0
+
+; ModuleID = 'circ_st.i'
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define zeroext i8 @foo1(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %conv = zext i16 %filtMemLen to i32
+ %shr2 = lshr i32 %conv, 1
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %or = or i32 %shr2, 33554432
+; CHECK: memb(r{{[0-9]*}}{{.}}++{{.}}#-1:circ(m{{[0-1]}}))
+ %1 = tail call i8* @llvm.hexagon.circ.stb(i8* %0, i32 0, i32 %or, i32 -1)
+ %2 = load i8, i8* %1, align 1, !tbaa !0
+ ret i8 %2
+}
+
+declare i8* @llvm.hexagon.circ.stb(i8*, i32, i32, i32) nounwind
+
+define i64 @foo2(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %conv = zext i16 %filtMemLen to i32
+ %shr1 = lshr i32 %conv, 1
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %shl = shl nuw nsw i32 %shr1, 3
+ %or = or i32 %shl, 83886080
+; CHECK: memd(r{{[0-9]*}}{{.}}++{{.}}#-8:circ(m{{[0-1]}}))
+ %1 = tail call i8* @llvm.hexagon.circ.std(i8* %0, i64 undef, i32 %or, i32 -8)
+ %2 = bitcast i8* %1 to i64*
+ %3 = load i64, i64* %2, align 8, !tbaa !0
+ ret i64 %3
+}
+
+declare i8* @llvm.hexagon.circ.std(i8*, i64, i32, i32) nounwind
+
+define signext i16 @foo3(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %conv = zext i16 %filtMemLen to i32
+ %shr2 = and i32 %conv, 65534
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %or = or i32 %shr2, 50331648
+; CHECK: memh(r{{[0-9]*}}{{.}}++{{.}}#-2:circ(m{{[0-1]}}))
+ %1 = tail call i8* @llvm.hexagon.circ.sth(i8* %0, i32 0, i32 %or, i32 -2)
+ %2 = bitcast i8* %1 to i16*
+ %3 = load i16, i16* %2, align 2, !tbaa !2
+ ret i16 %3
+}
+
+declare i8* @llvm.hexagon.circ.sth(i8*, i32, i32, i32) nounwind
+
+define signext i16 @foo5(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %conv = zext i16 %filtMemLen to i32
+ %shr2 = and i32 %conv, 65534
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %or = or i32 %shr2, 50331648
+; CHECK: memh(r{{[0-9]*}}{{.}}++{{.}}#-2:circ(m{{[0-1]}})){{ *}}={{ *}}r{{[0-9]*}}.h
+ %1 = tail call i8* @llvm.hexagon.circ.sthhi(i8* %0, i32 0, i32 %or, i32 -2)
+ %2 = bitcast i8* %1 to i16*
+ %3 = load i16, i16* %2, align 2, !tbaa !2
+ ret i16 %3
+}
+
+declare i8* @llvm.hexagon.circ.sthhi(i8*, i32, i32, i32) nounwind
+
+define i32 @foo6(i16 zeroext %filtMemLen, i16* %filtMemLR, i16 signext %filtMemIndex) nounwind {
+entry:
+ %conv = zext i16 %filtMemLen to i32
+ %shr1 = lshr i32 %conv, 1
+ %idxprom = sext i16 %filtMemIndex to i32
+ %arrayidx = getelementptr inbounds i16, i16* %filtMemLR, i32 %idxprom
+ %0 = bitcast i16* %arrayidx to i8*
+ %shl = shl nuw nsw i32 %shr1, 2
+ %or = or i32 %shl, 67108864
+; CHECK: memw(r{{[0-9]*}}{{.}}++{{.}}#-4:circ(m{{[0-1]}}))
+ %1 = tail call i8* @llvm.hexagon.circ.stw(i8* %0, i32 undef, i32 %or, i32 -4)
+ %2 = bitcast i8* %1 to i32*
+ %3 = load i32, i32* %2, align 4, !tbaa !3
+ ret i32 %3
+}
+
+declare i8* @llvm.hexagon.circ.stw(i8*, i32, i32, i32) nounwind
+
+!0 = !{!"omnipotent char", !1}
+!1 = !{!"Simple C/C++ TBAA"}
+!2 = !{!"short", !0}
+!3 = !{!"int", !0}
diff --git a/test/CodeGen/Hexagon/clr_set_toggle.ll b/test/CodeGen/Hexagon/clr_set_toggle.ll
new file mode 100644
index 000000000000..87c52956129e
--- /dev/null
+++ b/test/CodeGen/Hexagon/clr_set_toggle.ll
@@ -0,0 +1,160 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; Optimized bitwise operations.
+
+define i32 @my_clrbit(i32 %x) nounwind {
+entry:
+; CHECK: r{{[0-9]+}} = clrbit(r{{[0-9]+}}, #31)
+ %x.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ %0 = load i32, i32* %x.addr, align 4
+ %and = and i32 %0, 2147483647
+ ret i32 %and
+}
+
+define i64 @my_clrbit2(i64 %x) nounwind {
+entry:
+; CHECK: r{{[0-9]+}} = clrbit(r{{[0-9]+}}, #31)
+ %x.addr = alloca i64, align 8
+ store i64 %x, i64* %x.addr, align 8
+ %0 = load i64, i64* %x.addr, align 8
+ %and = and i64 %0, -2147483649
+ ret i64 %and
+}
+
+define i64 @my_clrbit3(i64 %x) nounwind {
+entry:
+; CHECK: r{{[0-9]+}} = clrbit(r{{[0-9]+}}, #31)
+ %x.addr = alloca i64, align 8
+ store i64 %x, i64* %x.addr, align 8
+ %0 = load i64, i64* %x.addr, align 8
+ %and = and i64 %0, 9223372036854775807
+ ret i64 %and
+}
+
+define i32 @my_clrbit4(i32 %x) nounwind {
+entry:
+; CHECK: r{{[0-9]+}} = clrbit(r{{[0-9]+}}, #13)
+ %x.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ %0 = load i32, i32* %x.addr, align 4
+ %and = and i32 %0, -8193
+ ret i32 %and
+}
+
+define i64 @my_clrbit5(i64 %x) nounwind {
+entry:
+; CHECK: r{{[0-9]+}} = clrbit(r{{[0-9]+}}, #13)
+ %x.addr = alloca i64, align 8
+ store i64 %x, i64* %x.addr, align 8
+ %0 = load i64, i64* %x.addr, align 8
+ %and = and i64 %0, -8193
+ ret i64 %and
+}
+
+define i64 @my_clrbit6(i64 %x) nounwind {
+entry:
+; CHECK: r{{[0-9]+}} = clrbit(r{{[0-9]+}}, #27)
+ %x.addr = alloca i64, align 8
+ store i64 %x, i64* %x.addr, align 8
+ %0 = load i64, i64* %x.addr, align 8
+ %and = and i64 %0, -576460752303423489
+ ret i64 %and
+}
+
+define zeroext i16 @my_setbit(i16 zeroext %crc) nounwind {
+entry:
+; CHECK: memh(r{{[0-9]+}}+#0){{ *}}={{ *}}setbit(#15)
+ %crc.addr = alloca i16, align 2
+ store i16 %crc, i16* %crc.addr, align 2
+ %0 = load i16, i16* %crc.addr, align 2
+ %conv = zext i16 %0 to i32
+ %or = or i32 %conv, 32768
+ %conv1 = trunc i32 %or to i16
+ store i16 %conv1, i16* %crc.addr, align 2
+ %1 = load i16, i16* %crc.addr, align 2
+ ret i16 %1
+}
+
+define i32 @my_setbit2(i32 %x) nounwind {
+entry:
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}setbit(r{{[0-9]+}}, #15)
+ %x.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ %0 = load i32, i32* %x.addr, align 4
+ %or = or i32 %0, 32768
+ ret i32 %or
+}
+
+define i64 @my_setbit3(i64 %x) nounwind {
+entry:
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}setbit(r{{[0-9]+}}, #15)
+ %x.addr = alloca i64, align 8
+ store i64 %x, i64* %x.addr, align 8
+ %0 = load i64, i64* %x.addr, align 8
+ %or = or i64 %0, 32768
+ ret i64 %or
+}
+
+define i32 @my_setbit4(i32 %x) nounwind {
+entry:
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}setbit(r{{[0-9]+}}, #31)
+ %x.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ %0 = load i32, i32* %x.addr, align 4
+ %or = or i32 %0, -2147483648
+ ret i32 %or
+}
+
+define i64 @my_setbit5(i64 %x) nounwind {
+entry:
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}setbit(r{{[0-9]+}}, #13)
+ %x.addr = alloca i64, align 8
+ store i64 %x, i64* %x.addr, align 8
+ %0 = load i64, i64* %x.addr, align 8
+ %or = or i64 %0, 35184372088832
+ ret i64 %or
+}
+
+define zeroext i16 @my_togglebit(i16 zeroext %crc) nounwind {
+entry:
+; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}}, #15)
+ %crc.addr = alloca i16, align 2
+ store i16 %crc, i16* %crc.addr, align 2
+ %0 = load i16, i16* %crc.addr, align 2
+ %conv = zext i16 %0 to i32
+ %xor = xor i32 %conv, 32768
+ %conv1 = trunc i32 %xor to i16
+ store i16 %conv1, i16* %crc.addr, align 2
+ %1 = load i16, i16* %crc.addr, align 2
+ ret i16 %1
+}
+
+define i32 @my_togglebit2(i32 %x) nounwind {
+entry:
+; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}}, #15)
+ %x.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ %0 = load i32, i32* %x.addr, align 4
+ %xor = xor i32 %0, 32768
+ ret i32 %xor
+}
+
+define i64 @my_togglebit3(i64 %x) nounwind {
+entry:
+; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}}, #15)
+ %x.addr = alloca i64, align 8
+ store i64 %x, i64* %x.addr, align 8
+ %0 = load i64, i64* %x.addr, align 8
+ %xor = xor i64 %0, 32768
+ ret i64 %xor
+}
+
+define i64 @my_togglebit4(i64 %x) nounwind {
+entry:
+; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}}, #20)
+ %x.addr = alloca i64, align 8
+ store i64 %x, i64* %x.addr, align 8
+ %0 = load i64, i64* %x.addr, align 8
+ %xor = xor i64 %0, 4503599627370496
+ ret i64 %xor
+}
diff --git a/test/CodeGen/Hexagon/cmp-not.ll b/test/CodeGen/Hexagon/cmp-not.ll
deleted file mode 100644
index abcddc38b23b..000000000000
--- a/test/CodeGen/Hexagon/cmp-not.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
-; Check that we generate matching compare insn.
-
-; Function Attrs: nounwind
-define i32 @neqi(i32 %argc) #0 {
-entry:
- %p = alloca i8, align 1
- %0 = tail call i1 @llvm.hexagon.C4.cmpneqi(i32 %argc, i32 512)
- %conv = zext i1 %0 to i8
- store volatile i8 %conv, i8* %p, align 1
- %p.0.p.0. = load volatile i8* %p, align 1
- %conv1 = zext i8 %p.0.p.0. to i32
- ret i32 %conv1
-}
-; CHECK: p{{[0-3]}}{{ *}} = !cmp.eq(r{{[0-9]+}}, ##512)
-
-; Function Attrs: nounwind readnone
-declare i1 @llvm.hexagon.C4.cmpneqi(i32, i32) #1
-
-; Function Attrs: nounwind
-define i32 @ngti(i32 %argc) #0 {
-entry:
- %p = alloca i8, align 1
- %0 = tail call i1 @llvm.hexagon.C4.cmpltei(i32 %argc, i32 4)
- %conv = zext i1 %0 to i8
- store volatile i8 %conv, i8* %p, align 1
- %p.0.p.0. = load volatile i8* %p, align 1
- %conv1 = zext i8 %p.0.p.0. to i32
- ret i32 %conv1
-}
-; CHECK: p{{[0-3]}}{{ *}} = !cmp.gt(r{{[0-9]+}}, #4)
-
-; Function Attrs: nounwind readnone
-declare i1 @llvm.hexagon.C4.cmpltei(i32, i32) #1
-
-; Function Attrs: nounwind
-define i32 @ngtui(i32 %argc) #0 {
-entry:
- %p = alloca i8, align 1
- %0 = tail call i1 @llvm.hexagon.C4.cmplteui(i32 %argc, i32 4)
- %conv = zext i1 %0 to i8
- store volatile i8 %conv, i8* %p, align 1
- %p.0.p.0. = load volatile i8* %p, align 1
- %conv1 = zext i8 %p.0.p.0. to i32
- ret i32 %conv1
-}
-; CHECK: p{{[0-3]}}{{ *}} = !cmp.gtu(r{{[0-9]+}}, #4)
-
-; Function Attrs: nounwind readnone
-declare i1 @llvm.hexagon.C4.cmplteui(i32, i32) #1
diff --git a/test/CodeGen/Hexagon/cmp-to-predreg.ll b/test/CodeGen/Hexagon/cmp-to-predreg.ll
index d430b901866d..2b65343ab2cf 100644
--- a/test/CodeGen/Hexagon/cmp-to-predreg.ll
+++ b/test/CodeGen/Hexagon/cmp-to-predreg.ll
@@ -2,7 +2,7 @@
; Check that we generate compare to predicate register.
define i32 @compare1(i32 %a, i32 %b) nounwind {
-; CHECK: p{{[0-3]}}{{ *}}={{ *}}!cmp.eq(r{{[0-9]+}},{{ *}}r{{[0-9]+}})
+; CHECK: p{{[0-3]}}{{ *}}={{ *[!]?}}cmp.eq(r{{[0-9]+}},{{ *}}r{{[0-9]+}})
entry:
%cmp = icmp ne i32 %a, %b
%add = add nsw i32 %a, %b
@@ -12,7 +12,7 @@ entry:
}
define i32 @compare2(i32 %a) nounwind {
-; CHECK: p{{[0-3]}}{{ *}}={{ *}}!cmp.eq(r{{[0-9]+}},{{ *}}#10)
+; CHECK: p{{[0-3]}}{{ *}}={{ *[!]?}}cmp.eq(r{{[0-9]+}},{{ *}}#10)
entry:
%cmp = icmp ne i32 %a, 10
%add = add nsw i32 %a, 10
diff --git a/test/CodeGen/Hexagon/cmp_pred.ll b/test/CodeGen/Hexagon/cmp_pred.ll
index 37db3b499f63..39549a1f2d54 100644
--- a/test/CodeGen/Hexagon/cmp_pred.ll
+++ b/test/CodeGen/Hexagon/cmp_pred.ll
@@ -1,3 +1,4 @@
+; XFAIL:
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
; Generate various cmpb instruction followed by if (p0) .. if (!p0)...
target triple = "hexagon"
diff --git a/test/CodeGen/Hexagon/cmp_pred2.ll b/test/CodeGen/Hexagon/cmp_pred2.ll
index a20b9f09b6e0..28f3e1bac8d1 100644
--- a/test/CodeGen/Hexagon/cmp_pred2.ll
+++ b/test/CodeGen/Hexagon/cmp_pred2.ll
@@ -11,7 +11,7 @@ entry:
br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
entry.if.end_crit_edge:
- %.pre = load i32* @c, align 4
+ %.pre = load i32, i32* @c, align 4
br label %if.end
if.then:
@@ -32,7 +32,7 @@ entry:
br i1 %cmp, label %entry.if.end_crit_edge, label %if.then
entry.if.end_crit_edge:
- %.pre = load i32* @c, align 4
+ %.pre = load i32, i32* @c, align 4
br label %if.end
if.then:
@@ -53,7 +53,7 @@ entry:
br i1 %cmp, label %entry.if.end_crit_edge, label %if.then
entry.if.end_crit_edge:
- %.pre = load i32* @c, align 4
+ %.pre = load i32, i32* @c, align 4
br label %if.end
if.then:
@@ -73,7 +73,7 @@ entry:
br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
entry.if.end_crit_edge:
- %.pre = load i32* @c, align 4
+ %.pre = load i32, i32* @c, align 4
br label %if.end
if.then:
diff --git a/test/CodeGen/Hexagon/cmp_pred_reg.ll b/test/CodeGen/Hexagon/cmp_pred_reg.ll
index 37db3b499f63..39549a1f2d54 100644
--- a/test/CodeGen/Hexagon/cmp_pred_reg.ll
+++ b/test/CodeGen/Hexagon/cmp_pred_reg.ll
@@ -1,3 +1,4 @@
+; XFAIL:
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
; Generate various cmpb instruction followed by if (p0) .. if (!p0)...
target triple = "hexagon"
diff --git a/test/CodeGen/Hexagon/cmpb_pred.ll b/test/CodeGen/Hexagon/cmpb_pred.ll
index 0960da1fa060..1a43e6291696 100644
--- a/test/CodeGen/Hexagon/cmpb_pred.ll
+++ b/test/CodeGen/Hexagon/cmpb_pred.ll
@@ -1,3 +1,4 @@
+; XFAIL:
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
; Generate various cmpb instruction followed by if (p0) .. if (!p0)...
target triple = "hexagon"
@@ -16,7 +17,7 @@ entry:
define i32 @Func_3b(i32) nounwind readonly {
entry:
; CHECK-NOT: mux
- %1 = load i8* @Enum_global, align 1
+ %1 = load i8, i8* @Enum_global, align 1
%2 = trunc i32 %0 to i8
%cmp = icmp ne i8 %1, %2
%selv = zext i1 %cmp to i32
@@ -35,7 +36,7 @@ entry:
define i32 @Func_3d(i32) nounwind readonly {
entry:
; CHECK-NOT: mux
- %1 = load i8* @Enum_global, align 1
+ %1 = load i8, i8* @Enum_global, align 1
%2 = trunc i32 %0 to i8
%cmp = icmp eq i8 %1, %2
%selv = zext i1 %cmp to i32
@@ -45,7 +46,7 @@ entry:
define i32 @Func_3e(i32) nounwind readonly {
entry:
; CHECK-NOT: mux
- %1 = load i8* @Enum_global, align 1
+ %1 = load i8, i8* @Enum_global, align 1
%2 = trunc i32 %0 to i8
%cmp = icmp eq i8 %1, %2
%selv = zext i1 %cmp to i32
diff --git a/test/CodeGen/Hexagon/combine.ll b/test/CodeGen/Hexagon/combine.ll
index 721998596c81..2e320d977d62 100644
--- a/test/CodeGen/Hexagon/combine.ll
+++ b/test/CodeGen/Hexagon/combine.ll
@@ -6,8 +6,8 @@
define void @foo() nounwind {
entry:
- %0 = load i32* @j, align 4
- %1 = load i64* @k, align 8
+ %0 = load i32, i32* @j, align 4
+ %1 = load i64, i64* @k, align 8
%conv = trunc i64 %1 to i32
%2 = call i64 @llvm.hexagon.A2.combinew(i32 %0, i32 %conv)
store i64 %2, i64* @k, align 8
diff --git a/test/CodeGen/Hexagon/combine_ir.ll b/test/CodeGen/Hexagon/combine_ir.ll
index e100cf7196f1..634a5c82a916 100644
--- a/test/CodeGen/Hexagon/combine_ir.ll
+++ b/test/CodeGen/Hexagon/combine_ir.ll
@@ -4,7 +4,7 @@
define void @word(i32* nocapture %a) nounwind {
entry:
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%1 = zext i32 %0 to i64
tail call void @bar(i64 %1) nounwind
ret void
@@ -17,10 +17,10 @@ declare void @bar(i64)
define void @halfword(i16* nocapture %a) nounwind {
entry:
- %0 = load i16* %a, align 2
+ %0 = load i16, i16* %a, align 2
%1 = zext i16 %0 to i64
- %add.ptr = getelementptr inbounds i16* %a, i32 1
- %2 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %a, i32 1
+ %2 = load i16, i16* %add.ptr, align 2
%3 = zext i16 %2 to i64
%4 = shl nuw nsw i64 %3, 16
%ins = or i64 %4, %1
@@ -33,10 +33,10 @@ entry:
define void @byte(i8* nocapture %a) nounwind {
entry:
- %0 = load i8* %a, align 1
+ %0 = load i8, i8* %a, align 1
%1 = zext i8 %0 to i64
- %add.ptr = getelementptr inbounds i8* %a, i32 1
- %2 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %a, i32 1
+ %2 = load i8, i8* %add.ptr, align 1
%3 = zext i8 %2 to i64
%4 = shl nuw nsw i64 %3, 8
%ins = or i64 %4, %1
diff --git a/test/CodeGen/Hexagon/convertdptoint.ll b/test/CodeGen/Hexagon/convertdptoint.ll
index fa068c4c8a51..a09c2fd14b12 100644
--- a/test/CodeGen/Hexagon/convertdptoint.ll
+++ b/test/CodeGen/Hexagon/convertdptoint.ll
@@ -14,13 +14,13 @@ entry:
store i32 0, i32* %retval
store double 1.540000e+01, double* %a, align 8
store double 9.100000e+00, double* %b, align 8
- %0 = load double* %a, align 8
- %1 = load double* %b, align 8
+ %0 = load double, double* %a, align 8
+ %1 = load double, double* %b, align 8
%add = fadd double %0, %1
store double %add, double* %c, align 8
- %2 = load double* %c, align 8
+ %2 = load double, double* %c, align 8
%conv = fptosi double %2 to i32
store i32 %conv, i32* %i, align 4
- %3 = load i32* %i, align 4
+ %3 = load i32, i32* %i, align 4
ret i32 %3
}
diff --git a/test/CodeGen/Hexagon/convertdptoll.ll b/test/CodeGen/Hexagon/convertdptoll.ll
index 1b4dd86bd01b..f46d46cf76b1 100644
--- a/test/CodeGen/Hexagon/convertdptoll.ll
+++ b/test/CodeGen/Hexagon/convertdptoll.ll
@@ -14,14 +14,14 @@ entry:
store i32 0, i32* %retval
store double 1.540000e+01, double* %a, align 8
store double 9.100000e+00, double* %b, align 8
- %0 = load double* %a, align 8
- %1 = load double* %b, align 8
+ %0 = load double, double* %a, align 8
+ %1 = load double, double* %b, align 8
%add = fadd double %0, %1
store double %add, double* %c, align 8
- %2 = load double* %c, align 8
+ %2 = load double, double* %c, align 8
%conv = fptosi double %2 to i64
store i64 %conv, i64* %i, align 8
- %3 = load i64* %i, align 8
+ %3 = load i64, i64* %i, align 8
%conv1 = trunc i64 %3 to i32
ret i32 %conv1
}
diff --git a/test/CodeGen/Hexagon/convertsptoint.ll b/test/CodeGen/Hexagon/convertsptoint.ll
index b8a9d6c8083c..7593e57d852f 100644
--- a/test/CodeGen/Hexagon/convertsptoint.ll
+++ b/test/CodeGen/Hexagon/convertsptoint.ll
@@ -14,13 +14,13 @@ entry:
store i32 0, i32* %retval
store float 0x402ECCCCC0000000, float* %a, align 4
store float 0x4022333340000000, float* %b, align 4
- %0 = load float* %a, align 4
- %1 = load float* %b, align 4
+ %0 = load float, float* %a, align 4
+ %1 = load float, float* %b, align 4
%add = fadd float %0, %1
store float %add, float* %c, align 4
- %2 = load float* %c, align 4
+ %2 = load float, float* %c, align 4
%conv = fptosi float %2 to i32
store i32 %conv, i32* %i, align 4
- %3 = load i32* %i, align 4
+ %3 = load i32, i32* %i, align 4
ret i32 %3
}
diff --git a/test/CodeGen/Hexagon/convertsptoll.ll b/test/CodeGen/Hexagon/convertsptoll.ll
index 1c4df94784aa..d8432cbc812b 100644
--- a/test/CodeGen/Hexagon/convertsptoll.ll
+++ b/test/CodeGen/Hexagon/convertsptoll.ll
@@ -14,14 +14,14 @@ entry:
store i32 0, i32* %retval
store float 0x402ECCCCC0000000, float* %a, align 4
store float 0x4022333340000000, float* %b, align 4
- %0 = load float* %a, align 4
- %1 = load float* %b, align 4
+ %0 = load float, float* %a, align 4
+ %1 = load float, float* %b, align 4
%add = fadd float %0, %1
store float %add, float* %c, align 4
- %2 = load float* %c, align 4
+ %2 = load float, float* %c, align 4
%conv = fptosi float %2 to i64
store i64 %conv, i64* %i, align 8
- %3 = load i64* %i, align 8
+ %3 = load i64, i64* %i, align 8
%conv1 = trunc i64 %3 to i32
ret i32 %conv1
}
diff --git a/test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll b/test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll
index e942f8d0c5dd..b8f483298f8c 100644
--- a/test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll
+++ b/test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll
@@ -1,8 +1,10 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
-; CHECK: r{{[0-9]+}}:{{[0-9]+}} |= lsr(r{{[0-9]+}}:{{[0-9]+}}, #4)
-; CHECK: r{{[0-9]+}}:{{[0-9]+}} &= lsr(r{{[0-9]+}}:{{[0-9]+}}, #2)
-; CHECK: r{{[0-9]+}} += lsr(r{{[0-9]+}}, #4)
+; CHECK-DAG: ct0({{r[0-9]*:[0-9]*}})
+; CHECK-DAG: cl0({{r[0-9]*:[0-9]*}})
+; CHECK-DAG: ct0({{r[0-9]*}})
+; CHECK-DAG: cl0({{r[0-9]*}})
+; CHECK-DAG: r{{[0-9]+}} += lsr(r{{[0-9]+}}, #4)
define i32 @foo(i64 %a, i32 %b) nounwind {
entry:
diff --git a/test/CodeGen/Hexagon/dadd.ll b/test/CodeGen/Hexagon/dadd.ll
index 602978ac01d3..5fcd705bab23 100644
--- a/test/CodeGen/Hexagon/dadd.ll
+++ b/test/CodeGen/Hexagon/dadd.ll
@@ -1,7 +1,7 @@
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
; Check that we generate double precision floating point add in V5.
-; CHECK: r{{[0-9]+}}:{{[0-9]+}} = dfadd(r{{[0-9]+}}:{{[0-9]+}}, r{{[0-9]+}}:{{[0-9]+}})
+; CHECK: call __hexagon_adddf3
define i32 @main() nounwind {
@@ -11,8 +11,8 @@ entry:
%c = alloca double, align 8
store double 1.540000e+01, double* %a, align 8
store double 9.100000e+00, double* %b, align 8
- %0 = load double* %a, align 8
- %1 = load double* %b, align 8
+ %0 = load double, double* %a, align 8
+ %1 = load double, double* %b, align 8
%add = fadd double %0, %1
store double %add, double* %c, align 8
ret i32 0
diff --git a/test/CodeGen/Hexagon/dmul.ll b/test/CodeGen/Hexagon/dmul.ll
index d7437739ee90..1b79e0aa7d70 100644
--- a/test/CodeGen/Hexagon/dmul.ll
+++ b/test/CodeGen/Hexagon/dmul.ll
@@ -1,7 +1,7 @@
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
; Check that we generate double precision floating point multiply in V5.
-; CHECK: r{{[0-9]+}}:{{[0-9]+}} = dfmpy(r{{[0-9]+}}:{{[0-9]+}}, r{{[0-9]+}}:{{[0-9]+}})
+; CHECK: call __hexagon_muldf3
define i32 @main() nounwind {
entry:
@@ -10,8 +10,8 @@ entry:
%c = alloca double, align 8
store double 1.540000e+01, double* %a, align 8
store double 9.100000e+00, double* %b, align 8
- %0 = load double* %b, align 8
- %1 = load double* %a, align 8
+ %0 = load double, double* %b, align 8
+ %1 = load double, double* %a, align 8
%mul = fmul double %0, %1
store double %mul, double* %c, align 8
ret i32 0
diff --git a/test/CodeGen/Hexagon/double.ll b/test/CodeGen/Hexagon/double.ll
index c3b6f378ec8a..b4d025cd7fd0 100644
--- a/test/CodeGen/Hexagon/double.ll
+++ b/test/CodeGen/Hexagon/double.ll
@@ -10,13 +10,13 @@ entry:
store double* %acc, double** %acc.addr, align 4
store double %num, double* %num.addr, align 8
store double %num2, double* %num2.addr, align 8
- %0 = load double** %acc.addr, align 4
- %1 = load double* %0
- %2 = load double* %num.addr, align 8
+ %0 = load double*, double** %acc.addr, align 4
+ %1 = load double, double* %0
+ %2 = load double, double* %num.addr, align 8
%add = fadd double %1, %2
- %3 = load double* %num2.addr, align 8
+ %3 = load double, double* %num2.addr, align 8
%sub = fsub double %add, %3
- %4 = load double** %acc.addr, align 4
+ %4 = load double*, double** %acc.addr, align 4
store double %sub, double* %4
ret void
}
diff --git a/test/CodeGen/Hexagon/doubleconvert-ieee-rnd-near.ll b/test/CodeGen/Hexagon/doubleconvert-ieee-rnd-near.ll
index 54e7ce3bcdd3..6bf8224904ec 100644
--- a/test/CodeGen/Hexagon/doubleconvert-ieee-rnd-near.ll
+++ b/test/CodeGen/Hexagon/doubleconvert-ieee-rnd-near.ll
@@ -14,13 +14,13 @@ entry:
store i32 0, i32* %retval
store double 1.540000e+01, double* %a, align 8
store double 9.100000e+00, double* %b, align 8
- %0 = load double* %a, align 8
- %1 = load double* %b, align 8
+ %0 = load double, double* %a, align 8
+ %1 = load double, double* %b, align 8
%add = fadd double %0, %1
store double %add, double* %c, align 8
- %2 = load double* %c, align 8
+ %2 = load double, double* %c, align 8
%conv = fptosi double %2 to i32
store i32 %conv, i32* %i, align 4
- %3 = load i32* %i, align 4
+ %3 = load i32, i32* %i, align 4
ret i32 %3
}
diff --git a/test/CodeGen/Hexagon/dsub.ll b/test/CodeGen/Hexagon/dsub.ll
index 4f9d39ed0b24..8b37301d84fb 100644
--- a/test/CodeGen/Hexagon/dsub.ll
+++ b/test/CodeGen/Hexagon/dsub.ll
@@ -1,7 +1,7 @@
; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
; Check that we generate double precision floating point subtract in V5.
-; CHECK: r{{[0-9]+}}:{{[0-9]+}} = dfsub(r{{[0-9]+}}:{{[0-9]+}}, r{{[0-9]+}}:{{[0-9]+}})
+; CHECK: call __hexagon_subdf3
define i32 @main() nounwind {
entry:
@@ -10,8 +10,8 @@ entry:
%c = alloca double, align 8
store double 1.540000e+01, double* %a, align 8
store double 9.100000e+00, double* %b, align 8
- %0 = load double* %b, align 8
- %1 = load double* %a, align 8
+ %0 = load double, double* %b, align 8
+ %1 = load double, double* %a, align 8
%sub = fsub double %0, %1
store double %sub, double* %c, align 8
ret i32 0
diff --git a/test/CodeGen/Hexagon/dualstore.ll b/test/CodeGen/Hexagon/dualstore.ll
index f7d7e8bbe75d..33d9ce9b9351 100644
--- a/test/CodeGen/Hexagon/dualstore.ll
+++ b/test/CodeGen/Hexagon/dualstore.ll
@@ -1,17 +1,12 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hexagon-misched < %s | FileCheck %s
+; RUN: llc -march=hexagon -disable-hexagon-misched < %s | FileCheck %s
; Check that we generate dual stores in one packet in V4
-; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#{{[0-9]+}}){{ *}}={{ *}}##500000
-; CHECK-NEXT: memw(r{{[0-9]+}}{{ *}}+{{ *}}#{{[0-9]+}}){{ *}}={{ *}}##100000
-; CHECK-NEXT: }
+; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#{{[0-9]+}}){{ *}}=
+; CHECK-NEXT: memw(r{{[0-9]+}}{{ *}}+{{ *}}#{{[0-9]+}}){{ *}}=
-@Reg = global i32 0, align 4
-define i32 @main() nounwind {
+define i32 @main(i32 %v, i32* %p1, i32* %p2) nounwind {
entry:
- %number= alloca i32, align 4
- store i32 500000, i32* %number, align 4
- %number1= alloca i32, align 4
- store i32 100000, i32* %number1, align 4
+ store i32 %v, i32* %p1, align 4
+ store i32 %v, i32* %p2, align 4
ret i32 0
}
-
diff --git a/test/CodeGen/Hexagon/expand-condsets-basic.ll b/test/CodeGen/Hexagon/expand-condsets-basic.ll
new file mode 100644
index 000000000000..16fe8af47b13
--- /dev/null
+++ b/test/CodeGen/Hexagon/expand-condsets-basic.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: if{{.*}}add
+; CHECK: if{{.*}}sub
+
+define i32 @foo (i1 %a, i32 %b, i32 %c, i32 %d) nounwind {
+ %1 = add i32 %b, %d
+ %2 = sub i32 %c, %d
+ %3 = select i1 %a, i32 %1, i32 %2
+ ret i32 %3
+}
+
diff --git a/test/CodeGen/Hexagon/expand-condsets-rm-segment.ll b/test/CodeGen/Hexagon/expand-condsets-rm-segment.ll
new file mode 100644
index 000000000000..cde7e6a09e1d
--- /dev/null
+++ b/test/CodeGen/Hexagon/expand-condsets-rm-segment.ll
@@ -0,0 +1,131 @@
+; RUN: llc -O2 < %s
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32"
+target triple = "hexagon-unknown--elf"
+
+%struct.cpumask = type { [1 x i32] }
+%struct.load_weight = type { i32, i32 }
+
+@sysctl_sched_latency = global i32 6000000, align 4
+@normalized_sysctl_sched_latency = global i32 6000000, align 4
+@sysctl_sched_tunable_scaling = global i8 1, align 1
+@sysctl_sched_min_granularity = global i32 750000, align 4
+@normalized_sysctl_sched_min_granularity = global i32 750000, align 4
+@sysctl_sched_wakeup_granularity = global i32 1000000, align 4
+@normalized_sysctl_sched_wakeup_granularity = global i32 1000000, align 4
+@sysctl_sched_migration_cost = constant i32 500000, align 4
+@sysctl_sched_shares_window = global i32 10000000, align 4
+@sysctl_sched_child_runs_first = common global i32 0, align 4
+@cpu_online_mask = external constant %struct.cpumask*
+
+; Function Attrs: noinline nounwind
+define void @sched_init_granularity() #0 {
+entry:
+ tail call fastcc void @update_sysctl()
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define internal fastcc void @update_sysctl() #0 {
+entry:
+ %call = tail call i32 @get_update_sysctl_factor()
+ %0 = load i32, i32* @normalized_sysctl_sched_min_granularity, align 4, !tbaa !1
+ %mul = mul i32 %0, %call
+ store i32 %mul, i32* @sysctl_sched_min_granularity, align 4, !tbaa !1
+ %1 = load i32, i32* @normalized_sysctl_sched_latency, align 4, !tbaa !1
+ %mul1 = mul i32 %1, %call
+ store i32 %mul1, i32* @sysctl_sched_latency, align 4, !tbaa !1
+ %2 = load i32, i32* @normalized_sysctl_sched_wakeup_granularity, align 4, !tbaa !1
+ %mul2 = mul i32 %2, %call
+ store i32 %mul2, i32* @sysctl_sched_wakeup_granularity, align 4, !tbaa !1
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define i32 @calc_delta_mine(i32 %delta_exec, i32 %weight, %struct.load_weight* nocapture %lw) #0 {
+entry:
+ %cmp = icmp ugt i32 %weight, 1
+ %conv = zext i32 %delta_exec to i64
+ br i1 %cmp, label %if.then, label %if.end, !prof !5
+
+if.then: ; preds = %entry
+ %conv2 = zext i32 %weight to i64
+ %mul = mul i64 %conv2, %conv
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ %tmp.0 = phi i64 [ %mul, %if.then ], [ %conv, %entry ]
+ %inv_weight = getelementptr inbounds %struct.load_weight, %struct.load_weight* %lw, i32 0, i32 1
+ %0 = load i32, i32* %inv_weight, align 4, !tbaa !6
+ %tobool4 = icmp eq i32 %0, 0
+ br i1 %tobool4, label %if.then5, label %if.end22
+
+if.then5: ; preds = %if.end
+ %weight7 = getelementptr inbounds %struct.load_weight, %struct.load_weight* %lw, i32 0, i32 0
+ %1 = load i32, i32* %weight7, align 4, !tbaa !9
+ %lnot9 = icmp eq i32 %1, 0
+ br i1 %lnot9, label %if.then17, label %if.else19, !prof !10
+
+if.then17: ; preds = %if.then5
+ store i32 -1, i32* %inv_weight, align 4, !tbaa !6
+ br label %if.end22
+
+if.else19: ; preds = %if.then5
+ %div = udiv i32 -1, %1
+ store i32 %div, i32* %inv_weight, align 4, !tbaa !6
+ br label %if.end22
+
+if.end22: ; preds = %if.end, %if.then17, %if.else19
+ %2 = phi i32 [ %0, %if.end ], [ -1, %if.then17 ], [ %div, %if.else19 ]
+ %cmp23 = icmp ugt i64 %tmp.0, 4294967295
+ br i1 %cmp23, label %if.then31, label %if.else37, !prof !10
+
+if.then31: ; preds = %if.end22
+ %add = add i64 %tmp.0, 32768
+ %shr = lshr i64 %add, 16
+ %conv33 = zext i32 %2 to i64
+ %mul34 = mul i64 %conv33, %shr
+ %add35 = add i64 %mul34, 32768
+ %shr36 = lshr i64 %add35, 16
+ br label %if.end43
+
+if.else37: ; preds = %if.end22
+ %conv39 = zext i32 %2 to i64
+ %mul40 = mul i64 %conv39, %tmp.0
+ %add41 = add i64 %mul40, 2147483648
+ %shr42 = lshr i64 %add41, 32
+ br label %if.end43
+
+if.end43: ; preds = %if.else37, %if.then31
+ %tmp.1 = phi i64 [ %shr36, %if.then31 ], [ %shr42, %if.else37 ]
+ %cmp49 = icmp ult i64 %tmp.1, 2147483647
+ %3 = trunc i64 %tmp.1 to i32
+ %conv51 = select i1 %cmp49, i32 %3, i32 2147483647
+ ret i32 %conv51
+}
+
+declare i32 @get_update_sysctl_factor() #0
+declare i32 @__bitmap_weight(i32*, i32) #1
+
+attributes #0 = { noinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"Clang 3.1"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!"branch_weights", i32 64, i32 4}
+!6 = !{!7, !8, i64 4}
+!7 = !{!"load_weight", !8, i64 0, !8, i64 4}
+!8 = !{!"long", !3, i64 0}
+!9 = !{!7, !8, i64 0}
+!10 = !{!"branch_weights", i32 4, i32 64}
+!11 = !{!12, !12, i64 0}
+!12 = !{!"any pointer", !3, i64 0}
+!13 = !{!3, !3, i64 0}
+!14 = !{i32 45854, i32 45878}
diff --git a/test/CodeGen/Hexagon/expand-condsets-undef.ll b/test/CodeGen/Hexagon/expand-condsets-undef.ll
new file mode 100644
index 000000000000..85e72aa22f0a
--- /dev/null
+++ b/test/CodeGen/Hexagon/expand-condsets-undef.ll
@@ -0,0 +1,28 @@
+; RUN: llc -O2 < %s
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-p:32:32-i64:64-a:0-v32:32-n16:32"
+target triple = "hexagon"
+
+; Function Attrs: nounwind optsize ssp
+define internal fastcc void @foo() nounwind {
+if.else473:
+ %0 = load i64, i64* undef, align 8
+ %sub = sub nsw i64 undef, %0
+ %conv476 = sitofp i64 %sub to double
+ %mul477 = fmul double %conv476, 0x3F50624DE0000000
+ br i1 undef, label %cond.true540, label %cond.end548
+
+cond.true540:
+ %1 = fptrunc double %mul477 to float
+ %2 = fptosi float %1 to i32
+ br label %cond.end548
+
+cond.end548:
+ %cond549 = phi i32 [ %2, %cond.true540 ], [ undef, %if.else473 ]
+ call void @bar(i32 %cond549) nounwind
+ unreachable
+}
+
+declare void @bar(i32) nounwind
+
diff --git a/test/CodeGen/Hexagon/extload-combine.ll b/test/CodeGen/Hexagon/extload-combine.ll
index b3b8bf07032a..519177fc75fc 100644
--- a/test/CodeGen/Hexagon/extload-combine.ll
+++ b/test/CodeGen/Hexagon/extload-combine.ll
@@ -19,7 +19,7 @@ define i64 @short_test1() #0 {
; CHECK: combine(#0, [[VAR]])
entry:
store i16 0, i16* @a, align 2
- %0 = load i16* @b, align 2
+ %0 = load i16, i16* @b, align 2
%conv2 = zext i16 %0 to i64
ret i64 %conv2
}
@@ -30,7 +30,7 @@ define i64 @short_test2() #0 {
; CHECK: sxtw([[VAR1]])
entry:
store i16 0, i16* @a, align 2
- %0 = load i16* @c, align 2
+ %0 = load i16, i16* @c, align 2
%conv2 = sext i16 %0 to i64
ret i64 %conv2
}
@@ -41,7 +41,7 @@ define i64 @char_test1() #0 {
; CHECK: combine(#0, [[VAR2]])
entry:
store i8 0, i8* @char_a, align 1
- %0 = load i8* @char_b, align 1
+ %0 = load i8, i8* @char_b, align 1
%conv2 = zext i8 %0 to i64
ret i64 %conv2
}
@@ -52,7 +52,7 @@ define i64 @char_test2() #0 {
; CHECK: sxtw([[VAR3]])
entry:
store i8 0, i8* @char_a, align 1
- %0 = load i8* @char_c, align 1
+ %0 = load i8, i8* @char_c, align 1
%conv2 = sext i8 %0 to i64
ret i64 %conv2
}
@@ -63,7 +63,7 @@ define i64 @int_test1() #0 {
; CHECK: combine(#0, [[VAR4]])
entry:
store i32 0, i32* @int_a, align 4
- %0 = load i32* @int_b, align 4
+ %0 = load i32, i32* @int_b, align 4
%conv = zext i32 %0 to i64
ret i64 %conv
}
@@ -74,7 +74,7 @@ define i64 @int_test2() #0 {
; CHECK: sxtw([[VAR5]])
entry:
store i32 0, i32* @int_a, align 4
- %0 = load i32* @int_c, align 4
+ %0 = load i32, i32* @int_c, align 4
%conv = sext i32 %0 to i64
ret i64 %conv
}
diff --git a/test/CodeGen/Hexagon/fadd.ll b/test/CodeGen/Hexagon/fadd.ll
index b95e1475ff73..6cf0fbbccf73 100644
--- a/test/CodeGen/Hexagon/fadd.ll
+++ b/test/CodeGen/Hexagon/fadd.ll
@@ -10,8 +10,8 @@ entry:
%c = alloca float, align 4
store float 0x402ECCCCC0000000, float* %a, align 4
store float 0x4022333340000000, float* %b, align 4
- %0 = load float* %a, align 4
- %1 = load float* %b, align 4
+ %0 = load float, float* %a, align 4
+ %1 = load float, float* %b, align 4
%add = fadd float %0, %1
store float %add, float* %c, align 4
ret i32 0
diff --git a/test/CodeGen/Hexagon/fcmp.ll b/test/CodeGen/Hexagon/fcmp.ll
index e7b649e2b8c0..5cf3c57b5e9c 100644
--- a/test/CodeGen/Hexagon/fcmp.ll
+++ b/test/CodeGen/Hexagon/fcmp.ll
@@ -8,7 +8,7 @@ entry:
%retval = alloca i32, align 4
%y.addr = alloca float, align 4
store float %y, float* %y.addr, align 4
- %0 = load float* %y.addr, align 4
+ %0 = load float, float* %y.addr, align 4
%cmp = fcmp ogt float %0, 0x406AD7EFA0000000
br i1 %cmp, label %if.then, label %if.else
@@ -21,7 +21,7 @@ if.else: ; preds = %entry
br label %return
return: ; preds = %if.else, %if.then
- %1 = load i32* %retval
+ %1 = load i32, i32* %retval
ret i32 %1
}
@@ -31,7 +31,7 @@ entry:
%a = alloca float, align 4
store i32 0, i32* %retval
store float 0x40012E0A00000000, float* %a, align 4
- %0 = load float* %a, align 4
+ %0 = load float, float* %a, align 4
%call = call i32 @foo(float %0)
ret i32 %call
}
diff --git a/test/CodeGen/Hexagon/float.ll b/test/CodeGen/Hexagon/float.ll
index bec9f5852e3c..03d1fbf44cb6 100644
--- a/test/CodeGen/Hexagon/float.ll
+++ b/test/CodeGen/Hexagon/float.ll
@@ -10,13 +10,13 @@ entry:
store float* %acc, float** %acc.addr, align 4
store float %num, float* %num.addr, align 4
store float %num2, float* %num2.addr, align 4
- %0 = load float** %acc.addr, align 4
- %1 = load float* %0
- %2 = load float* %num.addr, align 4
+ %0 = load float*, float** %acc.addr, align 4
+ %1 = load float, float* %0
+ %2 = load float, float* %num.addr, align 4
%add = fadd float %1, %2
- %3 = load float* %num2.addr, align 4
+ %3 = load float, float* %num2.addr, align 4
%sub = fsub float %add, %3
- %4 = load float** %acc.addr, align 4
+ %4 = load float*, float** %acc.addr, align 4
store float %sub, float* %4
ret void
}
diff --git a/test/CodeGen/Hexagon/floatconvert-ieee-rnd-near.ll b/test/CodeGen/Hexagon/floatconvert-ieee-rnd-near.ll
index bec9f5852e3c..03d1fbf44cb6 100644
--- a/test/CodeGen/Hexagon/floatconvert-ieee-rnd-near.ll
+++ b/test/CodeGen/Hexagon/floatconvert-ieee-rnd-near.ll
@@ -10,13 +10,13 @@ entry:
store float* %acc, float** %acc.addr, align 4
store float %num, float* %num.addr, align 4
store float %num2, float* %num2.addr, align 4
- %0 = load float** %acc.addr, align 4
- %1 = load float* %0
- %2 = load float* %num.addr, align 4
+ %0 = load float*, float** %acc.addr, align 4
+ %1 = load float, float* %0
+ %2 = load float, float* %num.addr, align 4
%add = fadd float %1, %2
- %3 = load float* %num2.addr, align 4
+ %3 = load float, float* %num2.addr, align 4
%sub = fsub float %add, %3
- %4 = load float** %acc.addr, align 4
+ %4 = load float*, float** %acc.addr, align 4
store float %sub, float* %4
ret void
}
diff --git a/test/CodeGen/Hexagon/fmul.ll b/test/CodeGen/Hexagon/fmul.ll
index 4766845b1143..4f55d0bec471 100644
--- a/test/CodeGen/Hexagon/fmul.ll
+++ b/test/CodeGen/Hexagon/fmul.ll
@@ -11,8 +11,8 @@ entry:
%c = alloca float, align 4
store float 0x402ECCCCC0000000, float* %a, align 4
store float 0x4022333340000000, float* %b, align 4
- %0 = load float* %b, align 4
- %1 = load float* %a, align 4
+ %0 = load float, float* %b, align 4
+ %1 = load float, float* %a, align 4
%mul = fmul float %0, %1
store float %mul, float* %c, align 4
ret i32 0
diff --git a/test/CodeGen/Hexagon/frame.ll b/test/CodeGen/Hexagon/frame.ll
index dc87c732d6fe..e87acb8cd796 100644
--- a/test/CodeGen/Hexagon/frame.ll
+++ b/test/CodeGen/Hexagon/frame.ll
@@ -10,14 +10,14 @@
define i32 @foo() nounwind {
entry:
%i = alloca i32, align 4
- %0 = load i32* @num, align 4
+ %0 = load i32, i32* @num, align 4
store i32 %0, i32* %i, align 4
- %1 = load i32* %i, align 4
- %2 = load i32* @acc, align 4
+ %1 = load i32, i32* %i, align 4
+ %2 = load i32, i32* @acc, align 4
%mul = mul nsw i32 %1, %2
- %3 = load i32* @num2, align 4
+ %3 = load i32, i32* @num2, align 4
%add = add nsw i32 %mul, %3
store i32 %add, i32* %i, align 4
- %4 = load i32* %i, align 4
+ %4 = load i32, i32* %i, align 4
ret i32 %4
}
diff --git a/test/CodeGen/Hexagon/fsub.ll b/test/CodeGen/Hexagon/fsub.ll
index 07c866f4c2e2..ca7bdc4d0b38 100644
--- a/test/CodeGen/Hexagon/fsub.ll
+++ b/test/CodeGen/Hexagon/fsub.ll
@@ -10,8 +10,8 @@ entry:
%c = alloca float, align 4
store float 0x402ECCCCC0000000, float* %a, align 4
store float 0x4022333340000000, float* %b, align 4
- %0 = load float* %b, align 4
- %1 = load float* %a, align 4
+ %0 = load float, float* %b, align 4
+ %1 = load float, float* %a, align 4
%sub = fsub float %0, %1
store float %sub, float* %c, align 4
ret i32 0
diff --git a/test/CodeGen/Hexagon/fusedandshift.ll b/test/CodeGen/Hexagon/fusedandshift.ll
index 022b3c673458..59a1e1d84fcc 100644
--- a/test/CodeGen/Hexagon/fusedandshift.ll
+++ b/test/CodeGen/Hexagon/fusedandshift.ll
@@ -5,7 +5,7 @@
define i32 @main(i16* %a, i16* %b) nounwind {
entry:
- %0 = load i16* %a, align 2
+ %0 = load i16, i16* %a, align 2
%conv1 = sext i16 %0 to i32
%shr1 = ashr i32 %conv1, 3
%and1 = and i32 %shr1, 15
diff --git a/test/CodeGen/Hexagon/gp-plus-offset-load.ll b/test/CodeGen/Hexagon/gp-plus-offset-load.ll
index a1b80a65f82a..cd1aacc2318a 100644
--- a/test/CodeGen/Hexagon/gp-plus-offset-load.ll
+++ b/test/CodeGen/Hexagon/gp-plus-offset-load.ll
@@ -12,7 +12,7 @@ entry:
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
- %0 = load i32* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 3), align 4
+ %0 = load i32, i32* getelementptr inbounds (%struct.struc, %struct.struc* @foo, i32 0, i32 3), align 4
store i32 %0, i32* %ival, align 4
br label %if.end
@@ -27,7 +27,7 @@ entry:
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
- %0 = load i8* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 1), align 1
+ %0 = load i8, i8* getelementptr inbounds (%struct.struc, %struct.struc* @foo, i32 0, i32 1), align 1
store i8 %0, i8* %ival, align 1
br label %if.end
@@ -42,7 +42,7 @@ entry:
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
- %0 = load i16* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 2), align 2
+ %0 = load i16, i16* getelementptr inbounds (%struct.struc, %struct.struc* @foo, i32 0, i32 2), align 2
store i16 %0, i16* %ival, align 2
br label %if.end
diff --git a/test/CodeGen/Hexagon/gp-plus-offset-store.ll b/test/CodeGen/Hexagon/gp-plus-offset-store.ll
index c782b30920ea..6b181cabe475 100644
--- a/test/CodeGen/Hexagon/gp-plus-offset-store.ll
+++ b/test/CodeGen/Hexagon/gp-plus-offset-store.ll
@@ -12,7 +12,7 @@ entry:
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
- store i8 %ival, i8* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 1), align 1
+ store i8 %ival, i8* getelementptr inbounds (%struct.struc, %struct.struc* @foo, i32 0, i32 1), align 1
br label %if.end
if.end: ; preds = %if.then, %entry
@@ -26,7 +26,7 @@ entry:
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
- store i16 %ival, i16* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 2), align 2
+ store i16 %ival, i16* getelementptr inbounds (%struct.struc, %struct.struc* @foo, i32 0, i32 2), align 2
br label %if.end
if.end: ; preds = %if.then, %entry
diff --git a/test/CodeGen/Hexagon/gp-rel.ll b/test/CodeGen/Hexagon/gp-rel.ll
index 561869e8ef35..bb7cb182bf1b 100644
--- a/test/CodeGen/Hexagon/gp-rel.ll
+++ b/test/CodeGen/Hexagon/gp-rel.ll
@@ -10,14 +10,14 @@ entry:
; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(#a)
; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(#b)
; CHECK: if{{ *}}(p{{[0-3]}}) memw(##c){{ *}}={{ *}}r{{[0-9]+}}
- %0 = load i32* @a, align 4
- %1 = load i32* @b, align 4
+ %0 = load i32, i32* @a, align 4
+ %1 = load i32, i32* @b, align 4
%add = add nsw i32 %1, %0
%cmp = icmp eq i32 %0, %1
br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
entry.if.end_crit_edge:
- %.pre = load i32* @c, align 4
+ %.pre = load i32, i32* @c, align 4
br label %if.end
if.then:
diff --git a/test/CodeGen/Hexagon/hwloop-cleanup.ll b/test/CodeGen/Hexagon/hwloop-cleanup.ll
index 6456ebff16d3..c04966a5a4b2 100644
--- a/test/CodeGen/Hexagon/hwloop-cleanup.ll
+++ b/test/CodeGen/Hexagon/hwloop-cleanup.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -no-phi-elim-live-out-early-exit \
+; RUN: < %s | FileCheck %s
; Check that we remove the compare and induction variable instructions
; after generating hardware loops.
; Bug 6685.
@@ -20,11 +21,11 @@ for.body: ; preds = %for.body.preheader,
%sum.03 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
%arrayidx.phi = phi i32* [ %arrayidx.inc, %for.body ], [ %b, %for.body.preheader ]
%i.02 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
- %0 = load i32* %arrayidx.phi, align 4
+ %0 = load i32, i32* %arrayidx.phi, align 4
%add = add nsw i32 %0, %sum.03
%inc = add nsw i32 %i.02, 1
%exitcond = icmp eq i32 %inc, %n
- %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+ %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
br i1 %exitcond, label %for.end.loopexit, label %for.body
for.end.loopexit:
@@ -50,11 +51,11 @@ for.body:
%sum.02 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx.phi = phi i32* [ %b, %entry ], [ %arrayidx.inc, %for.body ]
%i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %0 = load i32* %arrayidx.phi, align 4
+ %0 = load i32, i32* %arrayidx.phi, align 4
%add = add nsw i32 %0, %sum.02
%inc = add nsw i32 %i.01, 1
%exitcond = icmp eq i32 %inc, 40
- %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+ %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
br i1 %exitcond, label %for.end, label %for.body
for.end:
@@ -76,7 +77,7 @@ for.body:
store i32 %i.01, i32* %arrayidx.phi, align 4
%inc = add nsw i32 %i.01, 1
%exitcond = icmp eq i32 %inc, 40
- %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+ %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
br i1 %exitcond, label %for.end, label %for.body
for.end:
diff --git a/test/CodeGen/Hexagon/hwloop-const.ll b/test/CodeGen/Hexagon/hwloop-const.ll
index 8204ddea3490..d549c1fef8c0 100644
--- a/test/CodeGen/Hexagon/hwloop-const.ll
+++ b/test/CodeGen/Hexagon/hwloop-const.ll
@@ -14,9 +14,9 @@ entry:
; CHECK: endloop
for.body: ; preds = %for.body, %entry
%i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds [25000 x i32]* @b, i32 0, i32 %i.02
+ %arrayidx = getelementptr inbounds [25000 x i32], [25000 x i32]* @b, i32 0, i32 %i.02
store i32 %i.02, i32* %arrayidx, align 4
- %arrayidx1 = getelementptr inbounds [25000 x i32]* @a, i32 0, i32 %i.02
+ %arrayidx1 = getelementptr inbounds [25000 x i32], [25000 x i32]* @a, i32 0, i32 %i.02
store i32 %i.02, i32* %arrayidx1, align 4
%inc = add nsw i32 %i.02, 1
%exitcond = icmp eq i32 %inc, 25000
diff --git a/test/CodeGen/Hexagon/hwloop-crit-edge.ll b/test/CodeGen/Hexagon/hwloop-crit-edge.ll
new file mode 100644
index 000000000000..4de4540c142e
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-crit-edge.ll
@@ -0,0 +1,58 @@
+; RUN: llc -O3 -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+;
+; Generate hardware loop when loop 'latch' block is different
+; from the loop 'exiting' block.
+
+; CHECK: loop0(.LBB{{.}}_{{.}}, r{{[0-9]+}})
+; CHECK: endloop0
+
+define void @test(i32* nocapture %pFL, i16 signext %nBS, i16* nocapture readonly %pHT) #0 {
+entry:
+ %0 = load i32, i32* %pFL, align 4
+ %1 = tail call i64 @llvm.hexagon.M2.dpmpyss.s0(i32 %0, i32 246)
+ %2 = tail call i64 @llvm.hexagon.S2.asl.r.p(i64 %1, i32 -13)
+ %3 = tail call i32 @llvm.hexagon.A2.sat(i64 %2)
+ store i32 %3, i32* %pFL, align 4
+ %cmp16 = icmp sgt i16 %nBS, 0
+ br i1 %cmp16, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+ %4 = sext i16 %nBS to i32
+ br label %for.body
+
+for.body:
+ %5 = phi i32 [ %3, %for.body.lr.ph ], [ %.pre, %for.body.for.body_crit_edge ]
+ %arrayidx3.phi = phi i32* [ %pFL, %for.body.lr.ph ], [ %arrayidx3.inc, %for.body.for.body_crit_edge ]
+ %arrayidx5.phi = phi i16* [ %pHT, %for.body.lr.ph ], [ %arrayidx5.inc, %for.body.for.body_crit_edge ]
+ %i.017.pmt = phi i32 [ 1, %for.body.lr.ph ], [ %phitmp, %for.body.for.body_crit_edge ]
+ %6 = load i16, i16* %arrayidx5.phi, align 2
+ %conv6 = sext i16 %6 to i32
+ %7 = tail call i64 @llvm.hexagon.M2.dpmpyss.s0(i32 %5, i32 %conv6)
+ %8 = tail call i64 @llvm.hexagon.S2.asl.r.p(i64 %7, i32 -13)
+ %9 = tail call i32 @llvm.hexagon.A2.sat(i64 %8)
+ store i32 %9, i32* %arrayidx3.phi, align 4
+ %exitcond = icmp eq i32 %i.017.pmt, %4
+ %arrayidx3.inc = getelementptr i32, i32* %arrayidx3.phi, i32 1
+ br i1 %exitcond, label %for.end.loopexit, label %for.body.for.body_crit_edge
+
+for.body.for.body_crit_edge:
+ %arrayidx5.inc = getelementptr i16, i16* %arrayidx5.phi, i32 1
+ %.pre = load i32, i32* %arrayidx3.inc, align 4
+ %phitmp = add i32 %i.017.pmt, 1
+ br label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+declare i32 @llvm.hexagon.A2.sat(i64) #1
+
+declare i64 @llvm.hexagon.S2.asl.r.p(i64, i32) #1
+
+declare i64 @llvm.hexagon.M2.dpmpyss.s0(i32, i32) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "ssp-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/Hexagon/hwloop-dbg.ll b/test/CodeGen/Hexagon/hwloop-dbg.ll
index 3c05884f6a7d..66c6662f735a 100644
--- a/test/CodeGen/Hexagon/hwloop-dbg.ll
+++ b/test/CodeGen/Hexagon/hwloop-dbg.ll
@@ -5,9 +5,9 @@ target triple = "hexagon"
define void @foo(i32* nocapture %a, i32* nocapture %b) nounwind {
entry:
- tail call void @llvm.dbg.value(metadata i32* %a, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !17
- tail call void @llvm.dbg.value(metadata i32* %b, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !18
- tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !15, metadata !{!"0x102"}), !dbg !19
+ tail call void @llvm.dbg.value(metadata i32* %a, i64 0, metadata !13, metadata !DIExpression()), !dbg !17
+ tail call void @llvm.dbg.value(metadata i32* %b, i64 0, metadata !14, metadata !DIExpression()), !dbg !18
+ tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !15, metadata !DIExpression()), !dbg !19
br label %for.body, !dbg !19
for.body: ; preds = %for.body, %entry
@@ -17,14 +17,14 @@ for.body: ; preds = %for.body, %entry
%arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
%i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%b.addr.01 = phi i32* [ %b, %entry ], [ %incdec.ptr, %for.body ]
- %incdec.ptr = getelementptr inbounds i32* %b.addr.01, i32 1, !dbg !21
- tail call void @llvm.dbg.value(metadata i32* %incdec.ptr, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !21
- %0 = load i32* %b.addr.01, align 4, !dbg !21
+ %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.01, i32 1, !dbg !21
+ tail call void @llvm.dbg.value(metadata i32* %incdec.ptr, i64 0, metadata !14, metadata !DIExpression()), !dbg !21
+ %0 = load i32, i32* %b.addr.01, align 4, !dbg !21
store i32 %0, i32* %arrayidx.phi, align 4, !dbg !21
%inc = add nsw i32 %i.02, 1, !dbg !26
- tail call void @llvm.dbg.value(metadata i32 %inc, i64 0, metadata !15, metadata !{!"0x102"}), !dbg !26
+ tail call void @llvm.dbg.value(metadata i32 %inc, i64 0, metadata !15, metadata !DIExpression()), !dbg !26
%exitcond = icmp eq i32 %inc, 10, !dbg !19
- %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+ %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
br i1 %exitcond, label %for.end, label %for.body, !dbg !19
for.end: ; preds = %for.body
@@ -37,28 +37,28 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!29}
-!0 = !{!"0x11\0012\00QuIC LLVM Hexagon Clang version 6.1-pre-unknown, (git://git-hexagon-aus.quicinc.com/llvm/clang-mainline.git e9382867661454cdf44addb39430741578e9765c) (llvm/llvm-mainline.git 36412bb1fcf03ed426d4437b41198bae066675ac)\001\00\000\00\001", !28, !2, !2, !3, !2, null} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c] [DW_LANG_C99]
+!0 = !DICompileUnit(language: DW_LANG_C99, producer: "QuIC LLVM Hexagon Clang version 6.1-pre-unknown, (git://git-hexagon-aus.quicinc.com/llvm/clang-mainline.git e9382867661454cdf44addb39430741578e9765c) (llvm/llvm-mainline.git 36412bb1fcf03ed426d4437b41198bae066675ac)", isOptimized: true, emissionKind: 1, file: !28, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
!2 = !{}
!3 = !{!5}
-!5 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\00256\001\001", !28, null, !7, null, void (i32*, i32*)* @foo, null, null, !11} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!6 = !{!"0x29", !28} ; [ DW_TAG_file_type ]
-!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !28, scope: null, type: !7, function: void (i32*, i32*)* @foo, variables: !11)
+!6 = !DIFile(filename: "hwloop-dbg.c", directory: "/usr2/kparzysz/s.hex/t")
+!7 = !DISubroutineType(types: !8)
!8 = !{null, !9, !9}
-!9 = !{!"0xf\00\000\0032\0032\000\000", null, null, !10} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from int]
-!10 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !10)
+!10 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!11 = !{!13, !14, !15}
-!13 = !{!"0x101\00a\0016777217\000", !5, !6, !9} ; [ DW_TAG_arg_variable ] [a] [line 1]
-!14 = !{!"0x101\00b\0033554433\000", !5, !6, !9} ; [ DW_TAG_arg_variable ] [b] [line 1]
-!15 = !{!"0x100\00i\002\000", !16, !6, !10} ; [ DW_TAG_auto_variable ] [i] [line 2]
-!16 = !{!"0xb\001\0026\000", !28, !5} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
-!17 = !MDLocation(line: 1, column: 15, scope: !5)
-!18 = !MDLocation(line: 1, column: 23, scope: !5)
-!19 = !MDLocation(line: 3, column: 8, scope: !20)
-!20 = !{!"0xb\003\003\001", !28, !16} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
-!21 = !MDLocation(line: 4, column: 5, scope: !22)
-!22 = !{!"0xb\003\0028\002", !28, !20} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
-!26 = !MDLocation(line: 3, column: 23, scope: !20)
-!27 = !MDLocation(line: 6, column: 1, scope: !16)
-!28 = !{!"hwloop-dbg.c", !"/usr2/kparzysz/s.hex/t"}
-!29 = !{i32 1, !"Debug Info Version", i32 2}
+!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1, arg: 1, scope: !5, file: !6, type: !9)
+!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 1, arg: 2, scope: !5, file: !6, type: !9)
+!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 2, scope: !16, file: !6, type: !10)
+!16 = distinct !DILexicalBlock(line: 1, column: 26, file: !28, scope: !5)
+!17 = !DILocation(line: 1, column: 15, scope: !5)
+!18 = !DILocation(line: 1, column: 23, scope: !5)
+!19 = !DILocation(line: 3, column: 8, scope: !20)
+!20 = distinct !DILexicalBlock(line: 3, column: 3, file: !28, scope: !16)
+!21 = !DILocation(line: 4, column: 5, scope: !22)
+!22 = distinct !DILexicalBlock(line: 3, column: 28, file: !28, scope: !20)
+!26 = !DILocation(line: 3, column: 23, scope: !20)
+!27 = !DILocation(line: 6, column: 1, scope: !16)
+!28 = !DIFile(filename: "hwloop-dbg.c", directory: "/usr2/kparzysz/s.hex/t")
+!29 = !{i32 1, !"Debug Info Version", i32 3}
!30 = !{i32 0}
diff --git a/test/CodeGen/Hexagon/hwloop-le.ll b/test/CodeGen/Hexagon/hwloop-le.ll
index 9c8cec7c2a1b..85a1b3db673b 100644
--- a/test/CodeGen/Hexagon/hwloop-le.ll
+++ b/test/CodeGen/Hexagon/hwloop-le.ll
@@ -14,8 +14,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 28395, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -43,8 +43,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 9073, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -72,8 +72,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 21956, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -101,8 +101,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 16782, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -130,8 +130,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 19097, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -159,8 +159,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -188,8 +188,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -217,8 +217,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -246,8 +246,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -275,8 +275,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -304,8 +304,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -333,8 +333,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -362,8 +362,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -391,8 +391,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -420,8 +420,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
diff --git a/test/CodeGen/Hexagon/hwloop-loop1.ll b/test/CodeGen/Hexagon/hwloop-loop1.ll
new file mode 100644
index 000000000000..8b02736e0374
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-loop1.ll
@@ -0,0 +1,68 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+;
+; Generate loop1 instruction for double loop sequence.
+
+; CHECK: loop0(.LBB{{.}}_{{.}}, #100)
+; CHECK: endloop0
+; CHECK: loop1(.LBB{{.}}_{{.}}, #100)
+; CHECK: loop0(.LBB{{.}}_{{.}}, #100)
+; CHECK: endloop0
+; CHECK: endloop1
+
+define i32 @main() #0 {
+entry:
+ %array = alloca [100 x i32], align 8
+ %doublearray = alloca [100 x [100 x i32]], align 8
+ %0 = bitcast [100 x i32]* %array to i8*
+ call void @llvm.lifetime.start(i64 400, i8* %0) #1
+ %1 = bitcast [100 x [100 x i32]]* %doublearray to i8*
+ call void @llvm.lifetime.start(i64 40000, i8* %1) #1
+ %arrayidx1 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %doublearray, i32 0, i32 10, i32 10
+ %arrayidx2.gep = getelementptr [100 x i32], [100 x i32]* %array, i32 0, i32 0
+ br label %for.body
+
+for.body:
+ %2 = phi i32 [ undef, %entry ], [ %.pre, %for.body.for.body_crit_edge ]
+ %sum.031 = phi i32 [ undef, %entry ], [ %add, %for.body.for.body_crit_edge ]
+ %arrayidx2.phi = phi i32* [ %arrayidx2.gep, %entry ], [ %arrayidx2.inc, %for.body.for.body_crit_edge ]
+ %i.030 = phi i32 [ 1, %entry ], [ %phitmp, %for.body.for.body_crit_edge ]
+ %add = add nsw i32 %2, %sum.031
+ %exitcond33 = icmp eq i32 %i.030, 100
+ %arrayidx2.inc = getelementptr i32, i32* %arrayidx2.phi, i32 1
+ br i1 %exitcond33, label %for.cond7.preheader.preheader, label %for.body.for.body_crit_edge
+
+for.cond7.preheader.preheader:
+ br label %for.cond7.preheader
+
+for.body.for.body_crit_edge:
+ %.pre = load i32, i32* %arrayidx2.inc, align 4
+ %phitmp = add i32 %i.030, 1
+ br label %for.body
+
+for.cond7.preheader:
+ %i.129 = phi i32 [ %inc16, %for.inc15 ], [ 0, %for.cond7.preheader.preheader ]
+ br label %for.body9
+
+for.body9:
+ %j.028 = phi i32 [ 0, %for.cond7.preheader ], [ %inc13, %for.body9 ]
+ %arrayidx11 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %doublearray, i32 0, i32 %i.129, i32 %j.028
+ store i32 %add, i32* %arrayidx11, align 4
+ %inc13 = add nsw i32 %j.028, 1
+ %exitcond = icmp eq i32 %inc13, 100
+ br i1 %exitcond, label %for.inc15, label %for.body9
+
+for.inc15:
+ %inc16 = add nsw i32 %i.129, 1
+ %exitcond32 = icmp eq i32 %inc16, 100
+ br i1 %exitcond32, label %for.end17, label %for.cond7.preheader
+
+for.end17:
+ %3 = load i32, i32* %arrayidx1, align 8
+ call void @llvm.lifetime.end(i64 40000, i8* %1) #1
+ call void @llvm.lifetime.end(i64 400, i8* %0) #1
+ ret i32 %3
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
diff --git a/test/CodeGen/Hexagon/hwloop-lt.ll b/test/CodeGen/Hexagon/hwloop-lt.ll
index 7e43733da2a6..7e2ad2a4678e 100644
--- a/test/CodeGen/Hexagon/hwloop-lt.ll
+++ b/test/CodeGen/Hexagon/hwloop-lt.ll
@@ -1,7 +1,6 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
-
-; CHECK: test_pos1_ir_slt
+; CHECK-LABEL: @test_pos1_ir_slt
; CHECK: loop0
; a < b
define void @test_pos1_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -9,13 +8,13 @@ entry:
%cmp3 = icmp slt i32 8531, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
-for.body.lr.ph: ; preds = %entry
+for.body.lr.ph:
br label %for.body
-for.body: ; preds = %for.body.lr.ph, %for.body
+for.body:
%i.04 = phi i32 [ 8531, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -24,13 +23,11 @@ for.body: ; preds = %for.body.lr.ph, %fo
%cmp = icmp slt i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.body, %entry
+for.end:
ret void
}
-
-
-; CHECK: test_pos2_ir_slt
+; CHECK-LABEL: @test_pos2_ir_slt
; CHECK: loop0
; a < b
define void @test_pos2_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -38,13 +35,13 @@ entry:
%cmp3 = icmp slt i32 9152, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
-for.body.lr.ph: ; preds = %entry
+for.body.lr.ph:
br label %for.body
-for.body: ; preds = %for.body.lr.ph, %for.body
+for.body:
%i.04 = phi i32 [ 9152, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -53,13 +50,11 @@ for.body: ; preds = %for.body.lr.ph, %fo
%cmp = icmp slt i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.body, %entry
+for.end:
ret void
}
-
-
-; CHECK: test_pos4_ir_slt
+; CHECK-LABEL: @test_pos4_ir_slt
; CHECK: loop0
; a < b
define void @test_pos4_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -67,13 +62,13 @@ entry:
%cmp3 = icmp slt i32 18851, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
-for.body.lr.ph: ; preds = %entry
+for.body.lr.ph:
br label %for.body
-for.body: ; preds = %for.body.lr.ph, %for.body
+for.body:
%i.04 = phi i32 [ 18851, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -82,13 +77,11 @@ for.body: ; preds = %for.body.lr.ph, %fo
%cmp = icmp slt i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.body, %entry
+for.end:
ret void
}
-
-
-; CHECK: test_pos8_ir_slt
+; CHECK-LABEL: @test_pos8_ir_slt
; CHECK: loop0
; a < b
define void @test_pos8_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -96,13 +89,13 @@ entry:
%cmp3 = icmp slt i32 25466, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
-for.body.lr.ph: ; preds = %entry
+for.body.lr.ph:
br label %for.body
-for.body: ; preds = %for.body.lr.ph, %for.body
+for.body:
%i.04 = phi i32 [ 25466, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -111,13 +104,11 @@ for.body: ; preds = %for.body.lr.ph, %fo
%cmp = icmp slt i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.body, %entry
+for.end:
ret void
}
-
-
-; CHECK: test_pos16_ir_slt
+; CHECK-LABEL: @test_pos16_ir_slt
; CHECK: loop0
; a < b
define void @test_pos16_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -125,13 +116,13 @@ entry:
%cmp3 = icmp slt i32 9295, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
-for.body.lr.ph: ; preds = %entry
+for.body.lr.ph:
br label %for.body
-for.body: ; preds = %for.body.lr.ph, %for.body
+for.body:
%i.04 = phi i32 [ 9295, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -140,13 +131,11 @@ for.body: ; preds = %for.body.lr.ph, %fo
%cmp = icmp slt i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.body, %entry
+for.end:
ret void
}
-
-
-; CHECK: test_pos1_ri_slt
+; CHECK-LABEL: @test_pos1_ri_slt
; CHECK: loop0
; a < b
define void @test_pos1_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -154,13 +143,13 @@ entry:
%cmp3 = icmp slt i32 %a, 31236
br i1 %cmp3, label %for.body.lr.ph, label %for.end
-for.body.lr.ph: ; preds = %entry
+for.body.lr.ph:
br label %for.body
-for.body: ; preds = %for.body.lr.ph, %for.body
+for.body:
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -169,13 +158,11 @@ for.body: ; preds = %for.body.lr.ph, %fo
%cmp = icmp slt i32 %inc, 31236
br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.body, %entry
+for.end:
ret void
}
-
-
-; CHECK: test_pos2_ri_slt
+; CHECK-LABEL: @test_pos2_ri_slt
; CHECK: loop0
; a < b
define void @test_pos2_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -183,13 +170,13 @@ entry:
%cmp3 = icmp slt i32 %a, 22653
br i1 %cmp3, label %for.body.lr.ph, label %for.end
-for.body.lr.ph: ; preds = %entry
+for.body.lr.ph:
br label %for.body
-for.body: ; preds = %for.body.lr.ph, %for.body
+for.body:
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -198,13 +185,11 @@ for.body: ; preds = %for.body.lr.ph, %fo
%cmp = icmp slt i32 %inc, 22653
br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.body, %entry
+for.end:
ret void
}
-
-
-; CHECK: test_pos4_ri_slt
+; CHECK-LABEL: @test_pos4_ri_slt
; CHECK: loop0
; a < b
define void @test_pos4_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -212,13 +197,13 @@ entry:
%cmp3 = icmp slt i32 %a, 1431
br i1 %cmp3, label %for.body.lr.ph, label %for.end
-for.body.lr.ph: ; preds = %entry
+for.body.lr.ph:
br label %for.body
-for.body: ; preds = %for.body.lr.ph, %for.body
+for.body:
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -227,13 +212,11 @@ for.body: ; preds = %for.body.lr.ph, %fo
%cmp = icmp slt i32 %inc, 1431
br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.body, %entry
+for.end:
ret void
}
-
-
-; CHECK: test_pos8_ri_slt
+; CHECK-LABEL: @test_pos8_ri_slt
; CHECK: loop0
; a < b
define void @test_pos8_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -241,13 +224,13 @@ entry:
%cmp3 = icmp slt i32 %a, 22403
br i1 %cmp3, label %for.body.lr.ph, label %for.end
-for.body.lr.ph: ; preds = %entry
+for.body.lr.ph:
br label %for.body
-for.body: ; preds = %for.body.lr.ph, %for.body
+for.body:
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -256,13 +239,11 @@ for.body: ; preds = %for.body.lr.ph, %fo
%cmp = icmp slt i32 %inc, 22403
br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.body, %entry
+for.end:
ret void
}
-
-
-; CHECK: test_pos16_ri_slt
+; CHECK-LABEL: @test_pos16_ri_slt
; CHECK: loop0
; a < b
define void @test_pos16_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -270,13 +251,13 @@ entry:
%cmp3 = icmp slt i32 %a, 21715
br i1 %cmp3, label %for.body.lr.ph, label %for.end
-for.body.lr.ph: ; preds = %entry
+for.body.lr.ph:
br label %for.body
-for.body: ; preds = %for.body.lr.ph, %for.body
+for.body:
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -285,13 +266,11 @@ for.body: ; preds = %for.body.lr.ph, %fo
%cmp = icmp slt i32 %inc, 21715
br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.body, %entry
+for.end:
ret void
}
-
-
-; CHECK: test_pos1_rr_slt
+; CHECK-LABEL: @test_pos1_rr_slt
; CHECK: loop0
; a < b
define void @test_pos1_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -299,13 +278,13 @@ entry:
%cmp3 = icmp slt i32 %a, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
-for.body.lr.ph: ; preds = %entry
+for.body.lr.ph:
br label %for.body
-for.body: ; preds = %for.body.lr.ph, %for.body
+for.body:
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -314,13 +293,11 @@ for.body: ; preds = %for.body.lr.ph, %fo
%cmp = icmp slt i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.body, %entry
+for.end:
ret void
}
-
-
-; CHECK: test_pos2_rr_slt
+; CHECK-LABEL: @test_pos2_rr_slt
; CHECK: loop0
; a < b
define void @test_pos2_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -328,13 +305,13 @@ entry:
%cmp3 = icmp slt i32 %a, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
-for.body.lr.ph: ; preds = %entry
+for.body.lr.ph:
br label %for.body
-for.body: ; preds = %for.body.lr.ph, %for.body
+for.body:
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -343,13 +320,11 @@ for.body: ; preds = %for.body.lr.ph, %fo
%cmp = icmp slt i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.body, %entry
+for.end:
ret void
}
-
-
-; CHECK: test_pos4_rr_slt
+; CHECK-LABEL: @test_pos4_rr_slt
; CHECK: loop0
; a < b
define void @test_pos4_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -357,13 +332,13 @@ entry:
%cmp3 = icmp slt i32 %a, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
-for.body.lr.ph: ; preds = %entry
+for.body.lr.ph:
br label %for.body
-for.body: ; preds = %for.body.lr.ph, %for.body
+for.body:
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -372,13 +347,11 @@ for.body: ; preds = %for.body.lr.ph, %fo
%cmp = icmp slt i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.body, %entry
+for.end:
ret void
}
-
-
-; CHECK: test_pos8_rr_slt
+; CHECK-LABEL: @test_pos8_rr_slt
; CHECK: loop0
; a < b
define void @test_pos8_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -386,13 +359,13 @@ entry:
%cmp3 = icmp slt i32 %a, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
-for.body.lr.ph: ; preds = %entry
+for.body.lr.ph:
br label %for.body
-for.body: ; preds = %for.body.lr.ph, %for.body
+for.body:
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -401,13 +374,11 @@ for.body: ; preds = %for.body.lr.ph, %fo
%cmp = icmp slt i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.body, %entry
+for.end:
ret void
}
-
-
-; CHECK: test_pos16_rr_slt
+; CHECK-LABEL: @test_pos16_rr_slt
; CHECK: loop0
; a < b
define void @test_pos16_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
@@ -415,13 +386,13 @@ entry:
%cmp3 = icmp slt i32 %a, %b
br i1 %cmp3, label %for.body.lr.ph, label %for.end
-for.body.lr.ph: ; preds = %entry
+for.body.lr.ph:
br label %for.body
-for.body: ; preds = %for.body.lr.ph, %for.body
+for.body:
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -430,7 +401,7 @@ for.body: ; preds = %for.body.lr.ph, %fo
%cmp = icmp slt i32 %inc, %b
br i1 %cmp, label %for.body, label %for.end
-for.end: ; preds = %for.body, %entry
+for.end:
ret void
}
diff --git a/test/CodeGen/Hexagon/hwloop-lt1.ll b/test/CodeGen/Hexagon/hwloop-lt1.ll
index cf5874011ee0..16fe728fa7bc 100644
--- a/test/CodeGen/Hexagon/hwloop-lt1.ll
+++ b/test/CodeGen/Hexagon/hwloop-lt1.ll
@@ -19,10 +19,10 @@ polly.loop_body: ; preds = %entry, %polly.loop_
%p_vector_iv14 = or i32 %polly.loopiv16, 1
%p_vector_iv3 = add i32 %p_vector_iv14, 1
%p_vector_iv415 = or i32 %polly.loopiv16, 3
- %p_arrayidx = getelementptr [400 x i8]* @A, i32 0, i32 %polly.loopiv16
- %p_arrayidx5 = getelementptr [400 x i8]* @A, i32 0, i32 %p_vector_iv14
- %p_arrayidx6 = getelementptr [400 x i8]* @A, i32 0, i32 %p_vector_iv3
- %p_arrayidx7 = getelementptr [400 x i8]* @A, i32 0, i32 %p_vector_iv415
+ %p_arrayidx = getelementptr [400 x i8], [400 x i8]* @A, i32 0, i32 %polly.loopiv16
+ %p_arrayidx5 = getelementptr [400 x i8], [400 x i8]* @A, i32 0, i32 %p_vector_iv14
+ %p_arrayidx6 = getelementptr [400 x i8], [400 x i8]* @A, i32 0, i32 %p_vector_iv3
+ %p_arrayidx7 = getelementptr [400 x i8], [400 x i8]* @A, i32 0, i32 %p_vector_iv415
store i8 123, i8* %p_arrayidx, align 1
store i8 123, i8* %p_arrayidx5, align 1
store i8 123, i8* %p_arrayidx6, align 1
diff --git a/test/CodeGen/Hexagon/hwloop-missed.ll b/test/CodeGen/Hexagon/hwloop-missed.ll
new file mode 100644
index 000000000000..bcc800652294
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-missed.ll
@@ -0,0 +1,49 @@
+; RUN: llc -march=hexagon -hexagon-hwloop-preheader < %s | FileCheck %s
+
+; Generate hardware loops when we also need to add a new preheader.
+; we should generate two hardware loops for this test case.
+
+; CHECK: loop0
+; CHECK: endloop0
+; CHECK: loop0
+; CHECK: endloop0
+
+@g = external global i32
+
+define void @test(i32* nocapture %a, i32* nocapture %b, i32 %n) nounwind {
+entry:
+ %tobool = icmp eq i32 %n, 0
+ br i1 %tobool, label %for.body4.preheader, label %for.body.preheader
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %arrayidx.phi = phi i32* [ %arrayidx.inc, %for.body ], [ %a, %for.body.preheader ]
+ %i.014 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %0 = load i32, i32* @g, align 4
+ store i32 %0, i32* %arrayidx.phi, align 4
+ %inc = add nsw i32 %i.014, 1
+ %exitcond15 = icmp eq i32 %inc, 3
+ %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
+ br i1 %exitcond15, label %for.body4.preheader.loopexit, label %for.body
+
+for.body4.preheader.loopexit:
+ br label %for.body4.preheader
+
+for.body4.preheader:
+ br label %for.body4
+
+for.body4:
+ %arrayidx5.phi = phi i32* [ %arrayidx5.inc, %for.body4 ], [ %b, %for.body4.preheader ]
+ %i1.013 = phi i32 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
+ %1 = load i32, i32* @g, align 4
+ store i32 %1, i32* %arrayidx5.phi, align 4
+ %inc7 = add nsw i32 %i1.013, 1
+ %exitcond = icmp eq i32 %inc7, 3
+ %arrayidx5.inc = getelementptr i32, i32* %arrayidx5.phi, i32 1
+ br i1 %exitcond, label %for.end8, label %for.body4
+
+for.end8:
+ ret void
+}
diff --git a/test/CodeGen/Hexagon/hwloop-ne.ll b/test/CodeGen/Hexagon/hwloop-ne.ll
index bceef2a16955..12ef3b5dd0bc 100644
--- a/test/CodeGen/Hexagon/hwloop-ne.ll
+++ b/test/CodeGen/Hexagon/hwloop-ne.ll
@@ -14,8 +14,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 32623, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -43,8 +43,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 29554, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -72,8 +72,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 15692, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -101,8 +101,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 10449, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -130,8 +130,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 32087, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -159,8 +159,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -188,8 +188,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -217,8 +217,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -246,8 +246,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -275,8 +275,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -304,8 +304,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -333,8 +333,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -362,8 +362,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -391,8 +391,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -420,8 +420,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
diff --git a/test/CodeGen/Hexagon/hwloop-ph-deadcode.ll b/test/CodeGen/Hexagon/hwloop-ph-deadcode.ll
new file mode 100644
index 000000000000..06e6db420f8f
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-ph-deadcode.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -O2 -disable-block-placement=0 < %s | FileCheck %s
+
+; Test that there is no redundant register assignment in the hardware loop
+; preheader.
+
+; CHECK-NOT: r{{.*}} = #5
+
+@g = external global i32
+
+define void @foo() #0 {
+entry:
+ br i1 undef, label %if.end38, label %for.body
+
+for.body:
+ %loopIdx.051 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ store i32 1, i32* @g, align 4
+ %inc = add i32 %loopIdx.051, 1
+ %cmp9 = icmp ult i32 %inc, 5
+ br i1 %cmp9, label %for.body, label %if.end38
+
+if.end38:
+ ret void
+}
diff --git a/test/CodeGen/Hexagon/hwloop-pos-ivbump1.ll b/test/CodeGen/Hexagon/hwloop-pos-ivbump1.ll
new file mode 100644
index 000000000000..7c5ea031ffae
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-pos-ivbump1.ll
@@ -0,0 +1,45 @@
+;RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Test that a hardware loop is not generaetd due to a potential
+; underflow.
+
+; CHECK-NOT: loop0
+
+define i32 @main() #0 {
+entry:
+ br label %while.cond.outer
+
+while.cond.outer.loopexit:
+ %.lcssa = phi i32 [ %0, %for.body.preheader ]
+ br label %while.cond.outer
+
+while.cond.outer:
+ %i.0.ph = phi i32 [ 0, %entry ], [ 3, %while.cond.outer.loopexit ]
+ %j.0.ph = phi i32 [ 0, %entry ], [ %.lcssa, %while.cond.outer.loopexit ]
+ %k.0.ph = phi i32 [ 0, %entry ], [ 1, %while.cond.outer.loopexit ]
+ br label %while.cond
+
+while.cond:
+ %i.0 = phi i32 [ %i.0.ph, %while.cond.outer ], [ %inc, %for.body.preheader ]
+ %j.0 = phi i32 [ %j.0.ph, %while.cond.outer ], [ %0, %for.body.preheader ]
+ %inc = add nsw i32 %i.0, 1
+ %cmp = icmp slt i32 %i.0, 4
+ br i1 %cmp, label %for.body.preheader, label %while.end
+
+for.body.preheader:
+ %0 = add i32 %j.0, 3
+ %cmp5 = icmp eq i32 %inc, 3
+ br i1 %cmp5, label %while.cond.outer.loopexit, label %while.cond
+
+while.end:
+ %k.0.ph.lcssa = phi i32 [ %k.0.ph, %while.cond ]
+ %inc.lcssa = phi i32 [ %inc, %while.cond ]
+ %j.0.lcssa = phi i32 [ %j.0, %while.cond ]
+ %cmp6 = icmp ne i32 %inc.lcssa, 5
+ %cmp7 = icmp ne i32 %j.0.lcssa, 12
+ %or.cond = or i1 %cmp6, %cmp7
+ %cmp9 = icmp ne i32 %k.0.ph.lcssa, 1
+ %or.cond12 = or i1 %or.cond, %cmp9
+ %locflg.0 = zext i1 %or.cond12 to i32
+ ret i32 %locflg.0
+}
diff --git a/test/CodeGen/Hexagon/hwloop-preheader.ll b/test/CodeGen/Hexagon/hwloop-preheader.ll
new file mode 100644
index 000000000000..66efd2089fce
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-preheader.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -hexagon-hwloop-preheader < %s
+; REQUIRES: asserts
+
+; Test that the preheader is added to the parent loop, otherwise
+; we generate an invalid hardware loop.
+
+; Function Attrs: nounwind readonly
+define void @test(i16 signext %n) #0 {
+entry:
+ br i1 undef, label %for.cond4.preheader.preheader.split.us, label %for.end22
+
+for.cond4.preheader.preheader.split.us:
+ %0 = sext i16 %n to i32
+ br label %for.body9.preheader.us
+
+for.body9.us:
+ %indvars.iv = phi i32 [ %indvars.iv.next.7, %for.body9.us ], [ 0, %for.body9.preheader.us ]
+ %indvars.iv.next.7 = add i32 %indvars.iv, 8
+ %lftr.wideiv.7 = trunc i32 %indvars.iv.next.7 to i16
+ %exitcond.7 = icmp slt i16 %lftr.wideiv.7, 0
+ br i1 %exitcond.7, label %for.body9.us, label %for.body9.us.ur
+
+for.body9.preheader.us:
+ %i.030.us.pmt = phi i32 [ %inc21.us.pmt, %for.end.loopexit.us ], [ 0, %for.cond4.preheader.preheader.split.us ]
+ br i1 undef, label %for.body9.us, label %for.body9.us.ur
+
+for.body9.us.ur:
+ %exitcond.ur.old = icmp eq i16 undef, %n
+ br i1 %exitcond.ur.old, label %for.end.loopexit.us, label %for.body9.us.ur
+
+for.end.loopexit.us:
+ %inc21.us.pmt = add i32 %i.030.us.pmt, 1
+ %exitcond33 = icmp eq i32 %inc21.us.pmt, %0
+ br i1 %exitcond33, label %for.end22, label %for.body9.preheader.us
+
+for.end22:
+ ret void
+}
+
+attributes #0 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Hexagon/hwloop-range.ll b/test/CodeGen/Hexagon/hwloop-range.ll
new file mode 100644
index 000000000000..5e6fe78d0e0b
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-range.ll
@@ -0,0 +1,36 @@
+; RUN: llc -march=hexagon -hexagon-loop-range=0 < %s | FileCheck %s
+
+; Test that the loop start address operand uses a constant extender
+; if the offset is out of range.
+
+; CHECK: loop0(##.LBB
+; CHECK: endloop0
+
+@g = external global i32, align 4
+
+define void @test(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
+entry:
+ %cmp6 = icmp slt i32 %n, 1
+ br i1 %cmp6, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.07
+ %0 = load i32, i32* %arrayidx, align 4
+ %1 = load i32, i32* @g, align 4
+ %mul = mul nsw i32 %1, %0
+ %arrayidx1 = getelementptr inbounds i32, i32* %a, i32 %i.07
+ store i32 %mul, i32* %arrayidx1, align 4
+ %inc = add nuw nsw i32 %i.07, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
diff --git a/test/CodeGen/Hexagon/hwloop-recursion.ll b/test/CodeGen/Hexagon/hwloop-recursion.ll
new file mode 100644
index 000000000000..8ab2dc37d021
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-recursion.ll
@@ -0,0 +1,64 @@
+; RUN: llc -O2 -march=hexagon -mcpu=hexagonv5 < %s
+; REQUIRES: asserts
+; Check for successful compilation.
+
+@c = common global i32 0, align 4
+@e = common global i32 0, align 4
+@g = common global i32* null, align 4
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+@h = common global i32* null, align 4
+@d = common global i32 0, align 4
+@f = common global i32 0, align 4
+
+define i32 @fn1([0 x i32]* nocapture readnone %p1) #0 {
+entry:
+ %0 = load i32*, i32** @h, align 4
+ %1 = load i32*, i32** @g, align 4
+ %.pre = load i32, i32* @c, align 4
+ br label %for.cond
+
+for.cond:
+ %2 = phi i32 [ %10, %if.end ], [ %.pre, %entry ]
+ store i32 %2, i32* @e, align 4
+ %tobool5 = icmp eq i32 %2, 0
+ br i1 %tobool5, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:
+ %3 = sub i32 -5, %2
+ %4 = urem i32 %3, 5
+ %5 = sub i32 %3, %4
+ br label %for.body
+
+for.body:
+ %add6 = phi i32 [ %2, %for.body.lr.ph ], [ %add, %for.body ]
+ %6 = load i32, i32* %1, align 4
+ store i32 %6, i32* @a, align 4
+ %add = add nsw i32 %add6, 5
+ %tobool = icmp eq i32 %add, 0
+ br i1 %tobool, label %for.cond1.for.end_crit_edge, label %for.body
+
+for.cond1.for.end_crit_edge:
+ %7 = add i32 %2, 5
+ %8 = add i32 %7, %5
+ store i32 %8, i32* @e, align 4
+ br label %for.end
+
+for.end:
+ %9 = load i32, i32* @b, align 4
+ %tobool2 = icmp eq i32 %9, 0
+ br i1 %tobool2, label %if.end, label %if.then
+
+if.then:
+ store i32 0, i32* %0, align 4
+ %.pre7 = load i32, i32* @c, align 4
+ br label %if.end
+
+if.end:
+ %10 = phi i32 [ %2, %for.end ], [ %.pre7, %if.then ]
+ store i32 %10, i32* @d, align 4
+ %11 = load i32, i32* @f, align 4
+ %inc = add nsw i32 %11, 1
+ store i32 %inc, i32* @f, align 4
+ br label %for.cond
+}
diff --git a/test/CodeGen/Hexagon/hwloop-wrap.ll b/test/CodeGen/Hexagon/hwloop-wrap.ll
new file mode 100644
index 000000000000..e0f6a87fd2e4
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-wrap.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+
+; We shouldn't generate a hardware loop in this case because the initial
+; value may be zero, which means the endloop instruction will not decrement
+; the loop counter, and the loop will execute only once.
+
+; CHECK-NOT: loop0
+
+define void @foo(i32 %count, i32 %v) #0 {
+entry:
+ br label %do.body
+
+do.body:
+ %count.addr.0 = phi i32 [ %count, %entry ], [ %dec, %do.body ]
+ tail call void asm sideeffect "nop", ""() #1
+ %dec = add i32 %count.addr.0, -1
+ %cmp = icmp eq i32 %dec, 0
+ br i1 %cmp, label %do.end, label %do.body
+
+do.end:
+ ret void
+}
diff --git a/test/CodeGen/Hexagon/hwloop-wrap2.ll b/test/CodeGen/Hexagon/hwloop-wrap2.ll
new file mode 100644
index 000000000000..50675d6b681b
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-wrap2.ll
@@ -0,0 +1,67 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -O3 < %s | FileCheck %s
+
+; Test that we do not generate a hardware loop due to a potential underflow.
+
+; CHECK-NOT: loop0
+
+%struct.3 = type { i8*, i8, i8, i32, i32, i16, i16, i16, i16, i16, i16, i16, %struct.2* }
+%struct.2 = type { i16, i16, i16, i16, %struct.1* }
+%struct.1 = type { %struct.1*, %struct.0*, i32, i32, i16, [2 x i16], [2 x i16], i16 }
+%struct.0 = type { %struct.0*, i32, i32, i32, i32, i32, i32, i16, i16, i16, i8, i8, i8, i8 }
+
+@pairArray = external global i32**
+@carray = external global %struct.3**
+
+define void @test() #0 {
+entry:
+ %0 = load i32**, i32*** @pairArray, align 4
+ %1 = load %struct.3**, %struct.3*** @carray, align 4
+ br i1 undef, label %for.end110, label %for.body
+
+for.body:
+ %row.0199 = phi i32 [ %inc109, %for.inc108 ], [ 1, %entry ]
+ %arrayidx = getelementptr inbounds i32*, i32** %0, i32 %row.0199
+ %2 = load i32*, i32** %arrayidx, align 4
+ br i1 undef, label %for.body48, label %for.inc108
+
+for.cond45:
+ %cmp46 = icmp sgt i32 %dec58, 0
+ br i1 %cmp46, label %for.body48, label %for.inc108
+
+for.body48:
+ %i.1190 = phi i32 [ %dec58, %for.cond45 ], [ 0, %for.body ]
+ %arrayidx50 = getelementptr inbounds i32, i32* %2, i32 %i.1190
+ %3 = load i32, i32* %arrayidx50, align 4
+ %cmp53 = icmp slt i32 %3, 0
+ %dec58 = add nsw i32 %i.1190, -1
+ br i1 %cmp53, label %for.end59, label %for.cond45
+
+for.end59:
+ %cmp60 = icmp slt i32 %i.1190, 0
+ br i1 %cmp60, label %if.then65, label %for.inc108
+
+if.then65:
+ br label %for.body80
+
+for.body80:
+ %j.1196.in = phi i32 [ %j.1196, %for.body80 ], [ %i.1190, %if.then65 ]
+ %j.1196 = add nsw i32 %j.1196.in, 1
+ %arrayidx81 = getelementptr inbounds i32, i32* %2, i32 %j.1196
+ %4 = load i32, i32* %arrayidx81, align 4
+ %arrayidx82 = getelementptr inbounds %struct.3*, %struct.3** %1, i32 %4
+ %5 = load %struct.3*, %struct.3** %arrayidx82, align 4
+ %cxcenter83 = getelementptr inbounds %struct.3, %struct.3* %5, i32 0, i32 3
+ store i32 0, i32* %cxcenter83, align 4
+ %6 = load i32, i32* %arrayidx81, align 4
+ %arrayidx87 = getelementptr inbounds i32, i32* %2, i32 %j.1196.in
+ store i32 %6, i32* %arrayidx87, align 4
+ %exitcond = icmp eq i32 %j.1196, 0
+ br i1 %exitcond, label %for.inc108, label %for.body80
+
+for.inc108:
+ %inc109 = add nsw i32 %row.0199, 1
+ br i1 undef, label %for.body, label %for.end110
+
+for.end110:
+ ret void
+}
diff --git a/test/CodeGen/Hexagon/hwloop1.ll b/test/CodeGen/Hexagon/hwloop1.ll
new file mode 100644
index 000000000000..97b779cf9628
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop1.ll
@@ -0,0 +1,161 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; Check that we generate hardware loop instructions.
+
+; Case 1 : Loop with a constant number of iterations.
+; CHECK-LABEL: @hwloop1
+; CHECK: loop0(.LBB{{.}}_{{.}}, #10)
+; CHECK: endloop0
+
+@a = common global [10 x i32] zeroinitializer, align 4
+define i32 @hwloop1() nounwind {
+entry:
+ br label %for.body
+for.body:
+ %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @a, i32 0, i32 %i.01
+ store i32 %i.01, i32* %arrayidx, align 4
+ %inc = add nsw i32 %i.01, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.end, label %for.body
+for.end:
+ ret i32 0
+}
+
+; Case 2 : Loop with a run-time number of iterations.
+; CHECK-LABEL: @hwloop2
+; CHECK: loop0(.LBB{{.}}_{{.}}, r{{[0-9]+}})
+; CHECK: endloop0
+
+define i32 @hwloop2(i32 %n, i32* nocapture %b) nounwind {
+entry:
+ %cmp1 = icmp sgt i32 %n, 0
+ br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %a.03 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.02
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %0, %a.03
+ %inc = add nsw i32 %i.02, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ %a.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.end.loopexit ]
+ ret i32 %a.0.lcssa
+}
+
+; Case 3 : Induction variable increment more than 1.
+; CHECK-LABEL: @hwloop3
+; CHECK: lsr(r{{[0-9]+}}, #2)
+; CHECK: loop0(.LBB{{.}}_{{.}}, r{{[0-9]+}})
+; CHECK: endloop0
+
+define i32 @hwloop3(i32 %n, i32* nocapture %b) nounwind {
+entry:
+ %cmp1 = icmp sgt i32 %n, 0
+ br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %a.03 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.02
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %0, %a.03
+ %inc = add nsw i32 %i.02, 4
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ %a.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.end.loopexit ]
+ ret i32 %a.0.lcssa
+}
+
+; Case 4 : Loop exit compare uses register instead of immediate value.
+; CHECK-LABEL: @hwloop4
+; CHECK: loop0(.LBB{{.}}_{{.}}, r{{[0-9]+}})
+; CHECK: endloop0
+
+define i32 @hwloop4(i32 %n, i32* nocapture %b) nounwind {
+entry:
+ %cmp1 = icmp sgt i32 %n, 0
+ br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %i.02 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.02
+ store i32 %i.02, i32* %arrayidx, align 4
+ %inc = add nsw i32 %i.02, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret i32 0
+}
+
+; Case 5: After LSR, the initial value is 100 and the iv decrements to 0.
+; CHECK-LABEL: @hwloop5
+; CHECK: loop0(.LBB{{.}}_{{.}}, #100)
+; CHECK: endloop0
+
+define void @hwloop5(i32* nocapture %a, i32* nocapture %res) nounwind {
+entry:
+ br label %for.body
+
+for.body:
+ %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.03
+ %0 = load i32, i32* %arrayidx, align 4
+ %mul = mul nsw i32 %0, %0
+ %arrayidx2 = getelementptr inbounds i32, i32* %res, i32 %i.03
+ store i32 %mul, i32* %arrayidx2, align 4
+ %inc = add nsw i32 %i.03, 1
+ %exitcond = icmp eq i32 %inc, 100
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+; Case 6: Large immediate offset
+; CHECK-LABEL: @hwloop6
+; CHECK-NOT: loop0(.LBB{{.}}_{{.}}, #1024)
+; CHECK: loop0(.LBB{{.}}_{{.}}, r{{[0-9]+}})
+; CHECK: endloop0
+
+define void @hwloop6(i32* nocapture %a, i32* nocapture %res) nounwind {
+entry:
+ br label %for.body
+
+for.body:
+ %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.02
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %res, i32 %i.02
+ store i32 %0, i32* %arrayidx1, align 4
+ %inc = add nsw i32 %i.02, 1
+ %exitcond = icmp eq i32 %inc, 1024
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
diff --git a/test/CodeGen/Hexagon/hwloop2.ll b/test/CodeGen/Hexagon/hwloop2.ll
new file mode 100644
index 000000000000..d411d979904e
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop2.ll
@@ -0,0 +1,37 @@
+; RUN: llc -disable-lsr -march=hexagon < %s | FileCheck %s
+
+; Test for multiple phis with induction variables.
+
+; CHECK: loop0(.LBB{{.}}_{{.}}, r{{[0-9]+}})
+; CHECK: endloop0
+
+define i32 @hwloop4(i32* nocapture %s, i32* nocapture %a, i32 %n) {
+entry:
+ %cmp3 = icmp eq i32 %n, 0
+ br i1 %cmp3, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:
+ %.pre = load i32, i32* %s, align 4
+ br label %for.body
+
+for.body:
+ %0 = phi i32 [ %.pre, %for.body.lr.ph ], [ %add1, %for.body ]
+ %j.05 = phi i32 [ 0, %for.body.lr.ph ], [ %add2, %for.body ]
+ %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ %n, %for.body.lr.ph ]
+ %lsr.iv1 = phi i32* [ %scevgep, %for.body ], [ %a, %for.body.lr.ph ]
+ %1 = load i32, i32* %lsr.iv1, align 4
+ %add1 = add nsw i32 %0, %1
+ store i32 %add1, i32* %s, align 4
+ %add2 = add nsw i32 %j.05, 1
+ %lsr.iv.next = add i32 %lsr.iv, -1
+ %scevgep = getelementptr i32, i32* %lsr.iv1, i32 1
+ %cmp = icmp eq i32 %lsr.iv.next, 0
+ br i1 %cmp, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ %j.0.lcssa = phi i32 [ 0, %entry ], [ %add2, %for.end.loopexit ]
+ ret i32 %j.0.lcssa
+}
diff --git a/test/CodeGen/Hexagon/hwloop3.ll b/test/CodeGen/Hexagon/hwloop3.ll
new file mode 100644
index 000000000000..1135e06a0c43
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop3.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+;
+; Remove the unconditional jump to following instruction.
+
+; CHECK: endloop0
+; CHECK-NOT: jump [[L1:.]]
+; CHECK-NOT: [[L1]]
+
+define void @test(i32* nocapture %a, i32 %n) nounwind {
+entry:
+ br label %for.body
+
+for.body:
+ %arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
+ %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %0 = load i32, i32* %arrayidx.phi, align 4
+ %add = add nsw i32 %0, 1
+ store i32 %add, i32* %arrayidx.phi, align 4
+ %inc = add nsw i32 %i.02, 1
+ %exitcond = icmp eq i32 %inc, 100
+ %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
diff --git a/test/CodeGen/Hexagon/hwloop4.ll b/test/CodeGen/Hexagon/hwloop4.ll
new file mode 100644
index 000000000000..d159c45e3fb8
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop4.ll
@@ -0,0 +1,76 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+;
+; Remove the unnecessary 'add' instruction used for the hardware loop setup.
+
+; CHECK: [[OP0:r[0-9]+]] = add([[OP1:r[0-9]+]], #-[[OP2:[0-9]+]]
+; CHECK-NOT: add([[OP0]], #[[OP2]])
+; CHECK: lsr([[OP1]], #{{[0-9]+}})
+; CHECK: loop0
+
+define void @matrix_mul_matrix(i32 %N, i32* nocapture %C, i16* nocapture readnone %A, i16* nocapture readnone %B) #0 {
+entry:
+ %cmp4 = icmp eq i32 %N, 0
+ br i1 %cmp4, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+ %maxval = add i32 %N, -7
+ %0 = icmp sgt i32 %maxval, 0
+ br i1 %0, label %for.body.preheader9, label %for.body.ur.preheader
+
+for.body.preheader9:
+ br label %for.body
+
+for.body:
+ %arrayidx.phi = phi i32* [ %arrayidx.inc.7, %for.body ], [ %C, %for.body.preheader9 ]
+ %i.05 = phi i32 [ %inc.7, %for.body ], [ 0, %for.body.preheader9 ]
+ store i32 %i.05, i32* %arrayidx.phi, align 4
+ %inc = add i32 %i.05, 1
+ %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
+ store i32 %inc, i32* %arrayidx.inc, align 4
+ %inc.1 = add i32 %i.05, 2
+ %arrayidx.inc.1 = getelementptr i32, i32* %arrayidx.phi, i32 2
+ store i32 %inc.1, i32* %arrayidx.inc.1, align 4
+ %inc.2 = add i32 %i.05, 3
+ %arrayidx.inc.2 = getelementptr i32, i32* %arrayidx.phi, i32 3
+ store i32 %inc.2, i32* %arrayidx.inc.2, align 4
+ %inc.3 = add i32 %i.05, 4
+ %arrayidx.inc.3 = getelementptr i32, i32* %arrayidx.phi, i32 4
+ store i32 %inc.3, i32* %arrayidx.inc.3, align 4
+ %inc.4 = add i32 %i.05, 5
+ %arrayidx.inc.4 = getelementptr i32, i32* %arrayidx.phi, i32 5
+ store i32 %inc.4, i32* %arrayidx.inc.4, align 4
+ %inc.5 = add i32 %i.05, 6
+ %arrayidx.inc.5 = getelementptr i32, i32* %arrayidx.phi, i32 6
+ store i32 %inc.5, i32* %arrayidx.inc.5, align 4
+ %inc.6 = add i32 %i.05, 7
+ %arrayidx.inc.6 = getelementptr i32, i32* %arrayidx.phi, i32 7
+ store i32 %inc.6, i32* %arrayidx.inc.6, align 4
+ %inc.7 = add i32 %i.05, 8
+ %exitcond.7 = icmp slt i32 %inc.7, %maxval
+ %arrayidx.inc.7 = getelementptr i32, i32* %arrayidx.phi, i32 8
+ br i1 %exitcond.7, label %for.body, label %for.end.loopexit.ur-lcssa
+
+for.end.loopexit.ur-lcssa:
+ %1 = icmp eq i32 %inc.7, %N
+ br i1 %1, label %for.end, label %for.body.ur.preheader
+
+for.body.ur.preheader:
+ %arrayidx.phi.ur.ph = phi i32* [ %C, %for.body.preheader ], [ %arrayidx.inc.7, %for.end.loopexit.ur-lcssa ]
+ %i.05.ur.ph = phi i32 [ 0, %for.body.preheader ], [ %inc.7, %for.end.loopexit.ur-lcssa ]
+ br label %for.body.ur
+
+for.body.ur:
+ %arrayidx.phi.ur = phi i32* [ %arrayidx.inc.ur, %for.body.ur ], [ %arrayidx.phi.ur.ph, %for.body.ur.preheader ]
+ %i.05.ur = phi i32 [ %inc.ur, %for.body.ur ], [ %i.05.ur.ph, %for.body.ur.preheader ]
+ store i32 %i.05.ur, i32* %arrayidx.phi.ur, align 4
+ %inc.ur = add i32 %i.05.ur, 1
+ %exitcond.ur = icmp eq i32 %inc.ur, %N
+ %arrayidx.inc.ur = getelementptr i32, i32* %arrayidx.phi.ur, i32 1
+ br i1 %exitcond.ur, label %for.end.loopexit, label %for.body.ur
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
diff --git a/test/CodeGen/Hexagon/hwloop5.ll b/test/CodeGen/Hexagon/hwloop5.ll
new file mode 100644
index 000000000000..0886b03cc754
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop5.ll
@@ -0,0 +1,93 @@
+; RUN: llc -O3 -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+;
+; Generate hardware loop when unknown trip count loop is vectorized.
+
+; CHECK: loop0(.LBB{{[0-9]*}}_{{[0-9]*}}, r{{[0-9]+}})
+; CHECK: endloop0
+; CHECK: loop0(.LBB{{[0-9]*}}_{{[0-9]*}}, r{{[0-9]+}})
+; CHECK: endloop0
+
+@A = common global [1000 x i32] zeroinitializer, align 8
+@B = common global [1000 x i32] zeroinitializer, align 8
+
+define i32 @dotprod2(i32 %count) #0 {
+entry.split:
+ %cmp6 = icmp sgt i32 %count, 0
+ br i1 %cmp6, label %polly.cond, label %for.end
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ %sum.0.lcssa.reg2mem.0.load37 = phi i32 [ 0, %entry.split ], [ %p_add34, %polly.loop_if13 ], [ %p_add, %for.end.loopexit ]
+ ret i32 %sum.0.lcssa.reg2mem.0.load37
+
+polly.cond:
+ %0 = icmp sgt i32 %count, 1
+ br i1 %0, label %polly.loop_if, label %polly.loop_if13
+
+polly.loop_exit.loopexit:
+ br label %polly.loop_exit
+
+polly.loop_exit:
+ %1 = phi <2 x i32> [ zeroinitializer, %polly.loop_if ], [ %addp_vec, %polly.loop_exit.loopexit ]
+ %2 = extractelement <2 x i32> %1, i32 0
+ %3 = extractelement <2 x i32> %1, i32 1
+ %add_sum = add i32 %2, %3
+ br label %polly.loop_if13
+
+polly.loop_if:
+ %4 = add i32 %count, -1
+ %leftover_lb = and i32 %4, -2
+ %polly.loop_guard = icmp eq i32 %leftover_lb, 0
+ br i1 %polly.loop_guard, label %polly.loop_exit, label %polly.loop_preheader
+
+polly.stmt.for.body:
+ %addp_vec28 = phi <2 x i32> [ zeroinitializer, %polly.loop_preheader ], [ %addp_vec, %polly.stmt.for.body ]
+ %scevgep.phi = phi i32* [ getelementptr inbounds ([1000 x i32], [1000 x i32]* @A, i32 0, i32 0), %polly.loop_preheader ], [ %scevgep.inc, %polly.stmt.for.body ]
+ %scevgep9.phi = phi i32* [ getelementptr inbounds ([1000 x i32], [1000 x i32]* @B, i32 0, i32 0), %polly.loop_preheader ], [ %scevgep9.inc, %polly.stmt.for.body ]
+ %polly.indvar = phi i32 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.for.body ]
+ %vector_ptr = bitcast i32* %scevgep.phi to <2 x i32>*
+ %_p_vec_full = load <2 x i32>, <2 x i32>* %vector_ptr, align 8
+ %vector_ptr10 = bitcast i32* %scevgep9.phi to <2 x i32>*
+ %_p_vec_full11 = load <2 x i32>, <2 x i32>* %vector_ptr10, align 8
+ %mulp_vec = mul <2 x i32> %_p_vec_full11, %_p_vec_full
+ %addp_vec = add <2 x i32> %mulp_vec, %addp_vec28
+ %polly.indvar_next = add nsw i32 %polly.indvar, 2
+ %polly.loop_cond = icmp eq i32 %polly.indvar, %polly.adjust_ub
+ %scevgep.inc = getelementptr i32, i32* %scevgep.phi, i32 2
+ %scevgep9.inc = getelementptr i32, i32* %scevgep9.phi, i32 2
+ br i1 %polly.loop_cond, label %polly.loop_exit.loopexit, label %polly.stmt.for.body
+
+polly.loop_preheader:
+ %polly.adjust_ub = add i32 %leftover_lb, -2
+ br label %polly.stmt.for.body
+
+polly.loop_if13:
+ %p_add34 = phi i32 [ 0, %polly.cond ], [ %add_sum, %polly.loop_exit ]
+ %merge.lb = phi i32 [ 0, %polly.cond ], [ %leftover_lb, %polly.loop_exit ]
+ %polly.loop_guard17 = icmp slt i32 %merge.lb, %count
+ br i1 %polly.loop_guard17, label %polly.loop_preheader15, label %for.end
+
+polly.stmt.for.body22:
+ %p_add30 = phi i32 [ %p_add34, %polly.loop_preheader15 ], [ %p_add, %polly.stmt.for.body22 ]
+ %polly.indvar18 = phi i32 [ %merge.lb, %polly.loop_preheader15 ], [ %polly.indvar_next19, %polly.stmt.for.body22 ]
+ %5 = tail call i32 @llvm.annotation.i32(i32 %polly.indvar18, i8* null, i8* null, i32 0), !polly.loop.smallTripCount !0
+ %scevgep23 = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i32 %polly.indvar18
+ %_p_scalar_ = load i32, i32* %scevgep23, align 4
+ %scevgep24 = getelementptr [1000 x i32], [1000 x i32]* @B, i32 0, i32 %polly.indvar18
+ %_p_scalar_25 = load i32, i32* %scevgep24, align 4
+ %p_mul = mul nsw i32 %_p_scalar_25, %_p_scalar_
+ %p_add = add nsw i32 %p_mul, %p_add30
+ %polly.indvar_next19 = add nsw i32 %polly.indvar18, 1
+ %polly.loop_cond21 = icmp slt i32 %polly.indvar18, %polly.adjust_ub20
+ br i1 %polly.loop_cond21, label %polly.stmt.for.body22, label %for.end.loopexit
+
+polly.loop_preheader15:
+ %polly.adjust_ub20 = add i32 %count, -1
+ br label %polly.stmt.for.body22
+}
+
+declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32) #1
+
+!0 = !{}
diff --git a/test/CodeGen/Hexagon/i16_VarArg.ll b/test/CodeGen/Hexagon/i16_VarArg.ll
index c5d05a5e6ed8..ba98f6226683 100644
--- a/test/CodeGen/Hexagon/i16_VarArg.ll
+++ b/test/CodeGen/Hexagon/i16_VarArg.ll
@@ -20,8 +20,8 @@
declare i32 @printf(i8*, ...)
define i32 @main() {
- %a = load double* @A
- %b = load double* @B
+ %a = load double, double* @A
+ %b = load double, double* @B
%lt_r = fcmp olt double %a, %b
%le_r = fcmp ole double %a, %b
%gt_r = fcmp ogt double %a, %b
@@ -29,12 +29,12 @@ define i32 @main() {
%eq_r = fcmp oeq double %a, %b
%ne_r = fcmp une double %a, %b
%val1 = zext i1 %lt_r to i16
- %lt_s = getelementptr [12 x i8]* @lt_str, i64 0, i64 0
- %le_s = getelementptr [13 x i8]* @le_str, i64 0, i64 0
- %gt_s = getelementptr [12 x i8]* @gt_str, i64 0, i64 0
- %ge_s = getelementptr [13 x i8]* @ge_str, i64 0, i64 0
- %eq_s = getelementptr [13 x i8]* @eq_str, i64 0, i64 0
- %ne_s = getelementptr [13 x i8]* @ne_str, i64 0, i64 0
- call i32 (i8*, ...)* @printf( i8* %lt_s, i16 %val1 )
+ %lt_s = getelementptr [12 x i8], [12 x i8]* @lt_str, i64 0, i64 0
+ %le_s = getelementptr [13 x i8], [13 x i8]* @le_str, i64 0, i64 0
+ %gt_s = getelementptr [12 x i8], [12 x i8]* @gt_str, i64 0, i64 0
+ %ge_s = getelementptr [13 x i8], [13 x i8]* @ge_str, i64 0, i64 0
+ %eq_s = getelementptr [13 x i8], [13 x i8]* @eq_str, i64 0, i64 0
+ %ne_s = getelementptr [13 x i8], [13 x i8]* @ne_str, i64 0, i64 0
+ call i32 (i8*, ...) @printf( i8* %lt_s, i16 %val1 )
ret i32 0
}
diff --git a/test/CodeGen/Hexagon/i1_VarArg.ll b/test/CodeGen/Hexagon/i1_VarArg.ll
index 37f27787c186..1908b3c71f3f 100644
--- a/test/CodeGen/Hexagon/i1_VarArg.ll
+++ b/test/CodeGen/Hexagon/i1_VarArg.ll
@@ -20,25 +20,25 @@
declare i32 @printf(i8*, ...)
define i32 @main() {
- %a = load double* @A
- %b = load double* @B
+ %a = load double, double* @A
+ %b = load double, double* @B
%lt_r = fcmp olt double %a, %b
%le_r = fcmp ole double %a, %b
%gt_r = fcmp ogt double %a, %b
%ge_r = fcmp oge double %a, %b
%eq_r = fcmp oeq double %a, %b
%ne_r = fcmp une double %a, %b
- %lt_s = getelementptr [12 x i8]* @lt_str, i64 0, i64 0
- %le_s = getelementptr [13 x i8]* @le_str, i64 0, i64 0
- %gt_s = getelementptr [12 x i8]* @gt_str, i64 0, i64 0
- %ge_s = getelementptr [13 x i8]* @ge_str, i64 0, i64 0
- %eq_s = getelementptr [13 x i8]* @eq_str, i64 0, i64 0
- %ne_s = getelementptr [13 x i8]* @ne_str, i64 0, i64 0
- call i32 (i8*, ...)* @printf( i8* %lt_s, i1 %lt_r )
- call i32 (i8*, ...)* @printf( i8* %le_s, i1 %le_r )
- call i32 (i8*, ...)* @printf( i8* %gt_s, i1 %gt_r )
- call i32 (i8*, ...)* @printf( i8* %ge_s, i1 %ge_r )
- call i32 (i8*, ...)* @printf( i8* %eq_s, i1 %eq_r )
- call i32 (i8*, ...)* @printf( i8* %ne_s, i1 %ne_r )
+ %lt_s = getelementptr [12 x i8], [12 x i8]* @lt_str, i64 0, i64 0
+ %le_s = getelementptr [13 x i8], [13 x i8]* @le_str, i64 0, i64 0
+ %gt_s = getelementptr [12 x i8], [12 x i8]* @gt_str, i64 0, i64 0
+ %ge_s = getelementptr [13 x i8], [13 x i8]* @ge_str, i64 0, i64 0
+ %eq_s = getelementptr [13 x i8], [13 x i8]* @eq_str, i64 0, i64 0
+ %ne_s = getelementptr [13 x i8], [13 x i8]* @ne_str, i64 0, i64 0
+ call i32 (i8*, ...) @printf( i8* %lt_s, i1 %lt_r )
+ call i32 (i8*, ...) @printf( i8* %le_s, i1 %le_r )
+ call i32 (i8*, ...) @printf( i8* %gt_s, i1 %gt_r )
+ call i32 (i8*, ...) @printf( i8* %ge_s, i1 %ge_r )
+ call i32 (i8*, ...) @printf( i8* %eq_s, i1 %eq_r )
+ call i32 (i8*, ...) @printf( i8* %ne_s, i1 %ne_r )
ret i32 0
}
diff --git a/test/CodeGen/Hexagon/i8_VarArg.ll b/test/CodeGen/Hexagon/i8_VarArg.ll
index 6f056ff417af..c40a6a957270 100644
--- a/test/CodeGen/Hexagon/i8_VarArg.ll
+++ b/test/CodeGen/Hexagon/i8_VarArg.ll
@@ -20,8 +20,8 @@
declare i32 @printf(i8*, ...)
define i32 @main() {
- %a = load double* @A
- %b = load double* @B
+ %a = load double, double* @A
+ %b = load double, double* @B
%lt_r = fcmp olt double %a, %b
%le_r = fcmp ole double %a, %b
%gt_r = fcmp ogt double %a, %b
@@ -29,12 +29,12 @@ define i32 @main() {
%eq_r = fcmp oeq double %a, %b
%ne_r = fcmp une double %a, %b
%val1 = zext i1 %lt_r to i8
- %lt_s = getelementptr [12 x i8]* @lt_str, i64 0, i64 0
- %le_s = getelementptr [13 x i8]* @le_str, i64 0, i64 0
- %gt_s = getelementptr [12 x i8]* @gt_str, i64 0, i64 0
- %ge_s = getelementptr [13 x i8]* @ge_str, i64 0, i64 0
- %eq_s = getelementptr [13 x i8]* @eq_str, i64 0, i64 0
- %ne_s = getelementptr [13 x i8]* @ne_str, i64 0, i64 0
- call i32 (i8*, ...)* @printf( i8* %lt_s, i8 %val1 )
+ %lt_s = getelementptr [12 x i8], [12 x i8]* @lt_str, i64 0, i64 0
+ %le_s = getelementptr [13 x i8], [13 x i8]* @le_str, i64 0, i64 0
+ %gt_s = getelementptr [12 x i8], [12 x i8]* @gt_str, i64 0, i64 0
+ %ge_s = getelementptr [13 x i8], [13 x i8]* @ge_str, i64 0, i64 0
+ %eq_s = getelementptr [13 x i8], [13 x i8]* @eq_str, i64 0, i64 0
+ %ne_s = getelementptr [13 x i8], [13 x i8]* @ne_str, i64 0, i64 0
+ call i32 (i8*, ...) @printf( i8* %lt_s, i8 %val1 )
ret i32 0
}
diff --git a/test/CodeGen/Hexagon/idxload-with-zero-offset.ll b/test/CodeGen/Hexagon/idxload-with-zero-offset.ll
index 729d79f55a6e..f1a9d38f1b1c 100644
--- a/test/CodeGen/Hexagon/idxload-with-zero-offset.ll
+++ b/test/CodeGen/Hexagon/idxload-with-zero-offset.ll
@@ -1,70 +1,70 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
-; Check that we generate load instruction with (base + register offset << 0)
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; Check that we generate load instruction with (base + register offset << x)
; load word
-define i32 @load_w(i32* nocapture %a, i32 %n) nounwind {
-; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+}}<<#0)
+define i32 @load_w(i32* nocapture %a, i32 %n, i32 %m) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+}}{{ *}}<<{{ *}}#2)
entry:
- %tmp = shl i32 %n, 4
- %scevgep9 = getelementptr i32* %a, i32 %tmp
- %val = load i32* %scevgep9, align 4
+ %tmp = add i32 %n, %m
+ %scevgep9 = getelementptr i32, i32* %a, i32 %tmp
+ %val = load i32, i32* %scevgep9, align 4
ret i32 %val
}
; load unsigned half word
-define i16 @load_uh(i16* nocapture %a, i32 %n) nounwind {
-; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memuh(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+}}<<#0)
+define i16 @load_uh(i16* nocapture %a, i32 %n, i32 %m) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memuh(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+}}{{ *}}<<#1)
entry:
- %tmp = shl i32 %n, 4
- %scevgep9 = getelementptr i16* %a, i32 %tmp
- %val = load i16* %scevgep9, align 2
+ %tmp = add i32 %n, %m
+ %scevgep9 = getelementptr i16, i16* %a, i32 %tmp
+ %val = load i16, i16* %scevgep9, align 2
ret i16 %val
}
; load signed half word
-define i32 @load_h(i16* nocapture %a, i32 %n) nounwind {
-; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memh(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+}}<<#0)
+define i32 @load_h(i16* nocapture %a, i32 %n, i32 %m) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memh(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+}}{{ *}}<<#1)
entry:
- %tmp = shl i32 %n, 4
- %scevgep9 = getelementptr i16* %a, i32 %tmp
- %val = load i16* %scevgep9, align 2
+ %tmp = add i32 %n, %m
+ %scevgep9 = getelementptr i16, i16* %a, i32 %tmp
+ %val = load i16, i16* %scevgep9, align 2
%conv = sext i16 %val to i32
ret i32 %conv
}
; load unsigned byte
-define i8 @load_ub(i8* nocapture %a, i32 %n) nounwind {
-; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memub(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+}}<<#0)
+define i8 @load_ub(i8* nocapture %a, i32 %n, i32 %m) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memub(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+}}{{ *}}<<#0)
entry:
- %tmp = shl i32 %n, 4
- %scevgep9 = getelementptr i8* %a, i32 %tmp
- %val = load i8* %scevgep9, align 1
+ %tmp = add i32 %n, %m
+ %scevgep9 = getelementptr i8, i8* %a, i32 %tmp
+ %val = load i8, i8* %scevgep9, align 1
ret i8 %val
}
; load signed byte
-define i32 @foo_2(i8* nocapture %a, i32 %n) nounwind {
-; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memb(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+}}<<#0)
+define i32 @foo_2(i8* nocapture %a, i32 %n, i32 %m) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memb(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+}}{{ *}}<<{{ *}}#0)
entry:
- %tmp = shl i32 %n, 4
- %scevgep9 = getelementptr i8* %a, i32 %tmp
- %val = load i8* %scevgep9, align 1
+ %tmp = add i32 %n, %m
+ %scevgep9 = getelementptr i8, i8* %a, i32 %tmp
+ %val = load i8, i8* %scevgep9, align 1
%conv = sext i8 %val to i32
ret i32 %conv
}
; load doubleword
-define i64 @load_d(i64* nocapture %a, i32 %n) nounwind {
-; CHECK: r{{[0-9]+}}:{{[0-9]+}}{{ *}}={{ *}}memd(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+}}<<#0)
+define i64 @load_d(i64* nocapture %a, i32 %n, i32 %m) nounwind {
+; CHECK: r{{[0-9]+}}:{{[0-9]+}}{{ *}}={{ *}}memd(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+}}{{ *}}<<{{ *}}#3)
entry:
- %tmp = shl i32 %n, 4
- %scevgep9 = getelementptr i64* %a, i32 %tmp
- %val = load i64* %scevgep9, align 8
+ %tmp = add i32 %n, %m
+ %scevgep9 = getelementptr i64, i64* %a, i32 %tmp
+ %val = load i64, i64* %scevgep9, align 8
ret i64 %val
}
diff --git a/test/CodeGen/Hexagon/intrinsics/alu32_alu.ll b/test/CodeGen/Hexagon/intrinsics/alu32_alu.ll
new file mode 100644
index 000000000000..37f9f4007b67
--- /dev/null
+++ b/test/CodeGen/Hexagon/intrinsics/alu32_alu.ll
@@ -0,0 +1,202 @@
+; RUN: llc -march=hexagon -O0 < %s | FileCheck %s
+; Hexagon Programmer's Reference Manual 11.1.1 ALU32/ALU
+
+; Add
+declare i32 @llvm.hexagon.A2.addi(i32, i32)
+define i32 @A2_addi(i32 %a) {
+ %z = call i32 @llvm.hexagon.A2.addi(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = add(r0, #0)
+
+declare i32 @llvm.hexagon.A2.add(i32, i32)
+define i32 @A2_add(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.add(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = add(r0, r1)
+
+declare i32 @llvm.hexagon.A2.addsat(i32, i32)
+define i32 @A2_addsat(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.addsat(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = add(r0, r1):sat
+
+; Logical operations
+declare i32 @llvm.hexagon.A2.and(i32, i32)
+define i32 @A2_and(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.and(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = and(r0, r1)
+
+declare i32 @llvm.hexagon.A2.or(i32, i32)
+define i32 @A2_or(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.or(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = or(r0, r1)
+
+declare i32 @llvm.hexagon.A2.xor(i32, i32)
+define i32 @A2_xor(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.xor(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = xor(r0, r1)
+
+declare i32 @llvm.hexagon.A4.andn(i32, i32)
+define i32 @A4_andn(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A4.andn(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = and(r0, ~r1)
+
+declare i32 @llvm.hexagon.A4.orn(i32, i32)
+define i32 @A4_orn(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A4.orn(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = or(r0, ~r1)
+
+; Nop
+declare void @llvm.hexagon.A2.nop()
+define void @A2_nop(i32 %a, i32 %b) {
+ call void @llvm.hexagon.A2.nop()
+ ret void
+}
+; CHECK: nop
+
+; Subtract
+declare i32 @llvm.hexagon.A2.sub(i32, i32)
+define i32 @A2_sub(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.sub(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = sub(r0, r1)
+
+declare i32 @llvm.hexagon.A2.subsat(i32, i32)
+define i32 @A2_subsat(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.subsat(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = sub(r0, r1):sat
+
+; Sign extend
+declare i32 @llvm.hexagon.A2.sxtb(i32)
+define i32 @A2_sxtb(i32 %a) {
+ %z = call i32 @llvm.hexagon.A2.sxtb(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = sxtb(r0)
+
+declare i32 @llvm.hexagon.A2.sxth(i32)
+define i32 @A2_sxth(i32 %a) {
+ %z = call i32 @llvm.hexagon.A2.sxth(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = sxth(r0)
+
+; Transfer immediate
+declare i32 @llvm.hexagon.A2.tfril(i32, i32)
+define i32 @A2_tfril(i32 %a) {
+ %z = call i32 @llvm.hexagon.A2.tfril(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0.l = #0
+
+declare i32 @llvm.hexagon.A2.tfrih(i32, i32)
+define i32 @A2_tfrih(i32 %a) {
+ %z = call i32 @llvm.hexagon.A2.tfrih(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0.h = #0
+
+declare i32 @llvm.hexagon.A2.tfrsi(i32)
+define i32 @A2_tfrsi() {
+ %z = call i32 @llvm.hexagon.A2.tfrsi(i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = #0
+
+; Transfer register
+declare i32 @llvm.hexagon.A2.tfr(i32)
+define i32 @A2_tfr(i32 %a) {
+ %z = call i32 @llvm.hexagon.A2.tfr(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = r0
+
+; Vector add halfwords
+declare i32 @llvm.hexagon.A2.svaddh(i32, i32)
+define i32 @A2_svaddh(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.svaddh(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = vaddh(r0, r1)
+
+declare i32 @llvm.hexagon.A2.svaddhs(i32, i32)
+define i32 @A2_svaddhs(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.svaddhs(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = vaddh(r0, r1):sat
+
+declare i32 @llvm.hexagon.A2.svadduhs(i32, i32)
+define i32 @A2_svadduhs(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.svadduhs(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = vadduh(r0, r1):sat
+
+; Vector average halfwords
+declare i32 @llvm.hexagon.A2.svavgh(i32, i32)
+define i32 @A2_svavgh(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.svavgh(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = vavgh(r0, r1)
+
+declare i32 @llvm.hexagon.A2.svavghs(i32, i32)
+define i32 @A2_svavghs(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.svavghs(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = vavgh(r0, r1):rnd
+
+declare i32 @llvm.hexagon.A2.svnavgh(i32, i32)
+define i32 @A2_svnavgh(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.svnavgh(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = vnavgh(r0, r1)
+
+; Vector subtract halfwords
+declare i32 @llvm.hexagon.A2.svsubh(i32, i32)
+define i32 @A2_svsubh(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.svsubh(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = vsubh(r0, r1)
+
+declare i32 @llvm.hexagon.A2.svsubhs(i32, i32)
+define i32 @A2_svsubhs(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.svsubhs(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = vsubh(r0, r1):sat
+
+declare i32 @llvm.hexagon.A2.svsubuhs(i32, i32)
+define i32 @A2_svsubuhs(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.svsubuhs(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = vsubuh(r0, r1):sat
+
+; Zero extend
+declare i32 @llvm.hexagon.A2.zxth(i32)
+define i32 @A2_zxth(i32 %a) {
+ %z = call i32 @llvm.hexagon.A2.zxth(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = zxth(r0)
diff --git a/test/CodeGen/Hexagon/intrinsics/alu32_perm.ll b/test/CodeGen/Hexagon/intrinsics/alu32_perm.ll
new file mode 100644
index 000000000000..a9cc01c5dcb0
--- /dev/null
+++ b/test/CodeGen/Hexagon/intrinsics/alu32_perm.ll
@@ -0,0 +1,104 @@
+; RUN: llc -march=hexagon -O0 < %s | FileCheck %s
+; Hexagon Programmer's Reference Manual 11.1.2 ALU32/PERM
+
+; Combine words into doubleword
+declare i64 @llvm.hexagon.A4.combineri(i32, i32)
+define i64 @A4_combineri(i32 %a) {
+ %z = call i64 @llvm.hexagon.A4.combineri(i32 %a, i32 0)
+ ret i64 %z
+}
+; CHECK: = combine(r0, #0)
+
+declare i64 @llvm.hexagon.A4.combineir(i32, i32)
+define i64 @A4_combineir(i32 %a) {
+ %z = call i64 @llvm.hexagon.A4.combineir(i32 0, i32 %a)
+ ret i64 %z
+}
+; CHECK: = combine(#0, r0)
+
+declare i64 @llvm.hexagon.A2.combineii(i32, i32)
+define i64 @A2_combineii() {
+ %z = call i64 @llvm.hexagon.A2.combineii(i32 0, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 = combine(#0, #0)
+
+declare i32 @llvm.hexagon.A2.combine.hh(i32, i32)
+define i32 @A2_combine_hh(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.combine.hh(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = combine(r0.h, r1.h)
+
+declare i32 @llvm.hexagon.A2.combine.hl(i32, i32)
+define i32 @A2_combine_hl(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.combine.hl(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = combine(r0.h, r1.l)
+
+declare i32 @llvm.hexagon.A2.combine.lh(i32, i32)
+define i32 @A2_combine_lh(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.combine.lh(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = combine(r0.l, r1.h)
+
+declare i32 @llvm.hexagon.A2.combine.ll(i32, i32)
+define i32 @A2_combine_ll(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.combine.ll(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = combine(r0.l, r1.l)
+
+declare i64 @llvm.hexagon.A2.combinew(i32, i32)
+define i64 @A2_combinew(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.A2.combinew(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = combine(r0, r1)
+
+; Mux
+declare i32 @llvm.hexagon.C2.muxri(i32, i32, i32)
+define i32 @C2_muxri(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.C2.muxri(i32 %a, i32 0, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mux(p0, #0, r1)
+
+declare i32 @llvm.hexagon.C2.muxir(i32, i32, i32)
+define i32 @C2_muxir(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.C2.muxir(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = mux(p0, r1, #0)
+
+declare i32 @llvm.hexagon.C2.mux(i32, i32, i32)
+define i32 @C2_mux(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.C2.mux(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 = mux(p0, r1, r2)
+
+; Shift word by 16
+declare i32 @llvm.hexagon.A2.aslh(i32)
+define i32 @A2_aslh(i32 %a) {
+ %z = call i32 @llvm.hexagon.A2.aslh(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = aslh(r0)
+
+declare i32 @llvm.hexagon.A2.asrh(i32)
+define i32 @A2_asrh(i32 %a) {
+ %z = call i32 @llvm.hexagon.A2.asrh(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = asrh(r0)
+
+; Pack high and low halfwords
+declare i64 @llvm.hexagon.S2.packhl(i32, i32)
+define i64 @S2_packhl(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.S2.packhl(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = packhl(r0, r1)
diff --git a/test/CodeGen/Hexagon/intrinsics/cr.ll b/test/CodeGen/Hexagon/intrinsics/cr.ll
new file mode 100644
index 000000000000..9bdcb253fe2f
--- /dev/null
+++ b/test/CodeGen/Hexagon/intrinsics/cr.ll
@@ -0,0 +1,132 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; Hexagon Programmer's Reference Manual 11.2 CR
+
+; Corner detection acceleration
+declare i32 @llvm.hexagon.C4.fastcorner9(i32, i32)
+define i32 @C4_fastcorner9(i32 %a, i32 %b) {
+ %z = call i32@llvm.hexagon.C4.fastcorner9(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: p0 = fastcorner9(p0, p1)
+
+declare i32 @llvm.hexagon.C4.fastcorner9.not(i32, i32)
+define i32 @C4_fastcorner9_not(i32 %a, i32 %b) {
+ %z = call i32@llvm.hexagon.C4.fastcorner9.not(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: p0 = !fastcorner9(p0, p1)
+
+; Logical reductions on predicates
+declare i32 @llvm.hexagon.C2.any8(i32)
+define i32 @C2_any8(i32 %a) {
+ %z = call i32@llvm.hexagon.C2.any8(i32 %a)
+ ret i32 %z
+}
+; CHECK: p0 = any8(p0)
+
+declare i32 @llvm.hexagon.C2.all8(i32)
+define i32 @C2_all8(i32 %a) {
+ %z = call i32@llvm.hexagon.C2.all8(i32 %a)
+ ret i32 %z
+}
+
+; CHECK: p0 = all8(p0)
+
+; Logical operations on predicates
+declare i32 @llvm.hexagon.C2.and(i32, i32)
+define i32 @C2_and(i32 %a, i32 %b) {
+ %z = call i32@llvm.hexagon.C2.and(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: p0 = and(p0, p1)
+
+declare i32 @llvm.hexagon.C4.and.and(i32, i32, i32)
+define i32 @C4_and_and(i32 %a, i32 %b, i32 %c) {
+ %z = call i32@llvm.hexagon.C4.and.and(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: p0 = and(p0, and(p1, p2))
+
+declare i32 @llvm.hexagon.C2.or(i32, i32)
+define i32 @C2_or(i32 %a, i32 %b) {
+ %z = call i32@llvm.hexagon.C2.or(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: p0 = or(p0, p1)
+
+declare i32 @llvm.hexagon.C4.and.or(i32, i32, i32)
+define i32 @C4_and_or(i32 %a, i32 %b, i32 %c) {
+ %z = call i32@llvm.hexagon.C4.and.or(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: p0 = and(p0, or(p1, p2))
+
+declare i32 @llvm.hexagon.C2.xor(i32, i32)
+define i32 @C2_xor(i32 %a, i32 %b) {
+ %z = call i32@llvm.hexagon.C2.xor(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: p0 = xor(p0, p1)
+
+declare i32 @llvm.hexagon.C4.or.and(i32, i32, i32)
+define i32 @C4_or_and(i32 %a, i32 %b, i32 %c) {
+ %z = call i32@llvm.hexagon.C4.or.and(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: p0 = or(p0, and(p1, p2))
+
+declare i32 @llvm.hexagon.C2.andn(i32, i32)
+define i32 @C2_andn(i32 %a, i32 %b) {
+ %z = call i32@llvm.hexagon.C2.andn(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: p0 = and(p0, !p1)
+
+declare i32 @llvm.hexagon.C4.or.or(i32, i32, i32)
+define i32 @C4_or_or(i32 %a, i32 %b, i32 %c) {
+ %z = call i32@llvm.hexagon.C4.or.or(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: p0 = or(p0, or(p1, p2))
+
+declare i32 @llvm.hexagon.C4.and.andn(i32, i32, i32)
+define i32 @C4_and_andn(i32 %a, i32 %b, i32 %c) {
+ %z = call i32@llvm.hexagon.C4.and.andn(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: p0 = and(p0, and(p1, !p2))
+
+declare i32 @llvm.hexagon.C4.and.orn(i32, i32, i32)
+define i32 @C4_and_orn(i32 %a, i32 %b, i32 %c) {
+ %z = call i32@llvm.hexagon.C4.and.orn(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: p0 = and(p0, or(p1, !p2))
+
+declare i32 @llvm.hexagon.C2.not(i32)
+define i32 @C2_not(i32 %a) {
+ %z = call i32@llvm.hexagon.C2.not(i32 %a)
+ ret i32 %z
+}
+; CHECK: p0 = not(p0)
+
+declare i32 @llvm.hexagon.C4.or.andn(i32, i32, i32)
+define i32 @C4_or_andn(i32 %a, i32 %b, i32 %c) {
+ %z = call i32@llvm.hexagon.C4.or.andn(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: p0 = or(p0, and(p1, !p2))
+
+declare i32 @llvm.hexagon.C2.orn(i32, i32)
+define i32 @C2_orn(i32 %a, i32 %b) {
+ %z = call i32@llvm.hexagon.C2.orn(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: p0 = or(p0, !p1)
+
+declare i32 @llvm.hexagon.C4.or.orn(i32, i32, i32)
+define i32 @C4_or_orn(i32 %a, i32 %b, i32 %c) {
+ %z = call i32@llvm.hexagon.C4.or.orn(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: p0 = or(p0, or(p1, !p2))
diff --git a/test/CodeGen/Hexagon/intrinsics/xtype_alu.ll b/test/CodeGen/Hexagon/intrinsics/xtype_alu.ll
new file mode 100644
index 000000000000..4a11112d73a9
--- /dev/null
+++ b/test/CodeGen/Hexagon/intrinsics/xtype_alu.ll
@@ -0,0 +1,1020 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -O0 < %s | FileCheck %s
+; Hexagon Programmer's Reference Manual 11.10.1 XTYPE/ALU
+
+; Absolute value doubleword
+declare i64 @llvm.hexagon.A2.absp(i64)
+define i64 @A2_absp(i64 %a) {
+ %z = call i64 @llvm.hexagon.A2.absp(i64 %a)
+ ret i64 %z
+}
+; CHECK: r1:0 = abs(r1:0)
+
+; Absolute value word
+declare i32 @llvm.hexagon.A2.abs(i32)
+define i32 @A2_abs(i32 %a) {
+ %z = call i32 @llvm.hexagon.A2.abs(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = abs(r0)
+
+declare i32 @llvm.hexagon.A2.abssat(i32)
+define i32 @A2_abssat(i32 %a) {
+ %z = call i32 @llvm.hexagon.A2.abssat(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = abs(r0):sat
+
+; Add and accumulate
+declare i32 @llvm.hexagon.S4.addaddi(i32, i32, i32)
+define i32 @S4_addaddi(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S4.addaddi(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = add(r0, add(r1, #0))
+
+declare i32 @llvm.hexagon.S4.subaddi(i32, i32, i32)
+define i32 @S4_subaddi(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S4.subaddi(i32 %a, i32 0, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = add(r0, sub(#0, r1))
+
+declare i32 @llvm.hexagon.M2.accii(i32, i32, i32)
+define i32 @M2_accii(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.accii(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 += add(r1, #0)
+
+declare i32 @llvm.hexagon.M2.naccii(i32, i32, i32)
+define i32 @M2_naccii(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.naccii(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 -= add(r1, #0)
+
+declare i32 @llvm.hexagon.M2.acci(i32, i32, i32)
+define i32 @M2_acci(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.acci(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += add(r1, r2)
+
+declare i32 @llvm.hexagon.M2.nacci(i32, i32, i32)
+define i32 @M2_nacci(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.nacci(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= add(r1, r2)
+
+; Add doublewords
+declare i64 @llvm.hexagon.A2.addp(i64, i64)
+define i64 @A2_addp(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.addp(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = add(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.addpsat(i64, i64)
+define i64 @A2_addpsat(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.addpsat(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = add(r1:0, r3:2):sat
+
+; Add halfword
+declare i32 @llvm.hexagon.A2.addh.l16.ll(i32, i32)
+define i32 @A2_addh_l16_ll(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.addh.l16.ll(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = add(r0.l, r1.l)
+
+declare i32 @llvm.hexagon.A2.addh.l16.hl(i32, i32)
+define i32 @A2_addh_l16_hl(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.addh.l16.hl(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = add(r0.l, r1.h)
+
+declare i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32, i32)
+define i32 @A2_addh_l16_sat.ll(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = add(r0.l, r1.l):sat
+
+declare i32 @llvm.hexagon.A2.addh.l16.sat.hl(i32, i32)
+define i32 @A2_addh_l16_sat.hl(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.addh.l16.sat.hl(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = add(r0.l, r1.h):sat
+
+declare i32 @llvm.hexagon.A2.addh.h16.ll(i32, i32)
+define i32 @A2_addh_h16_ll(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.addh.h16.ll(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = add(r0.l, r1.l):<<16
+
+declare i32 @llvm.hexagon.A2.addh.h16.lh(i32, i32)
+define i32 @A2_addh_h16_lh(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.addh.h16.lh(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = add(r0.l, r1.h):<<16
+
+declare i32 @llvm.hexagon.A2.addh.h16.hl(i32, i32)
+define i32 @A2_addh_h16_hl(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.addh.h16.hl(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = add(r0.h, r1.l):<<16
+
+declare i32 @llvm.hexagon.A2.addh.h16.hh(i32, i32)
+define i32 @A2_addh_h16_hh(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.addh.h16.hh(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = add(r0.h, r1.h):<<16
+
+declare i32 @llvm.hexagon.A2.addh.h16.sat.ll(i32, i32)
+define i32 @A2_addh_h16_sat_ll(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.addh.h16.sat.ll(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = add(r0.l, r1.l):sat:<<16
+
+declare i32 @llvm.hexagon.A2.addh.h16.sat.lh(i32, i32)
+define i32 @A2_addh_h16_sat_lh(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.addh.h16.sat.lh(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = add(r0.l, r1.h):sat:<<16
+
+declare i32 @llvm.hexagon.A2.addh.h16.sat.hl(i32, i32)
+define i32 @A2_addh_h16_sat_hl(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.addh.h16.sat.hl(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = add(r0.h, r1.l):sat:<<16
+
+declare i32 @llvm.hexagon.A2.addh.h16.sat.hh(i32, i32)
+define i32 @A2_addh_h16_sat_hh(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.addh.h16.sat.hh(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = add(r0.h, r1.h):sat:<<16
+
+; Logical doublewords
+declare i64 @llvm.hexagon.A2.notp(i64)
+define i64 @A2_notp(i64 %a) {
+ %z = call i64 @llvm.hexagon.A2.notp(i64 %a)
+ ret i64 %z
+}
+; CHECK: r1:0 = not(r1:0)
+
+declare i64 @llvm.hexagon.A2.andp(i64, i64)
+define i64 @A2_andp(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.andp(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = and(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A4.andnp(i64, i64)
+define i64 @A2_andnp(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A4.andnp(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = and(r1:0, ~r3:2)
+
+declare i64 @llvm.hexagon.A2.orp(i64, i64)
+define i64 @A2_orp(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.orp(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = or(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A4.ornp(i64, i64)
+define i64 @A2_ornp(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A4.ornp(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = or(r1:0, ~r3:2)
+
+declare i64 @llvm.hexagon.A2.xorp(i64, i64)
+define i64 @A2_xorp(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.xorp(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = xor(r1:0, r3:2)
+
+; Logical-logical doublewords
+declare i64 @llvm.hexagon.M4.xor.xacc(i64, i64, i64)
+define i64 @M4_xor_xacc(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.M4.xor.xacc(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 ^= xor(r3:2, r5:4)
+
+; Logical-logical words
+declare i32 @llvm.hexagon.S4.or.andi(i32, i32, i32)
+define i32 @S4_or_andi(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S4.or.andi(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 |= and(r1, #0)
+
+declare i32 @llvm.hexagon.S4.or.andix(i32, i32, i32)
+define i32 @S4_or_andix(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S4.or.andix(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r1 = or(r0, and(r1, #0))
+
+declare i32 @llvm.hexagon.M4.or.andn(i32, i32, i32)
+define i32 @M4_or_andn(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M4.or.andn(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 |= and(r1, ~r2)
+
+declare i32 @llvm.hexagon.M4.and.andn(i32, i32, i32)
+define i32 @M4_and_andn(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M4.and.andn(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 &= and(r1, ~r2)
+
+declare i32 @llvm.hexagon.M4.xor.andn(i32, i32, i32)
+define i32 @M4_xor_andn(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M4.xor.andn(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 ^= and(r1, ~r2)
+
+declare i32 @llvm.hexagon.M4.and.and(i32, i32, i32)
+define i32 @M4_and_and(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M4.and.and(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 &= and(r1, r2)
+
+declare i32 @llvm.hexagon.M4.and.or(i32, i32, i32)
+define i32 @M4_and_or(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M4.and.or(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 &= or(r1, r2)
+
+declare i32 @llvm.hexagon.M4.and.xor(i32, i32, i32)
+define i32 @M4_and_xor(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M4.and.xor(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 &= xor(r1, r2)
+
+declare i32 @llvm.hexagon.M4.or.and(i32, i32, i32)
+define i32 @M4_or_and(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M4.or.and(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 |= and(r1, r2)
+
+declare i32 @llvm.hexagon.M4.or.or(i32, i32, i32)
+define i32 @M4_or_or(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M4.or.or(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 |= or(r1, r2)
+
+declare i32 @llvm.hexagon.M4.or.xor(i32, i32, i32)
+define i32 @M4_or_xor(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M4.or.xor(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 |= xor(r1, r2)
+
+declare i32 @llvm.hexagon.M4.xor.and(i32, i32, i32)
+define i32 @M4_xor_and(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M4.xor.and(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 ^= and(r1, r2)
+
+declare i32 @llvm.hexagon.M4.xor.or(i32, i32, i32)
+define i32 @M4_xor_or(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M4.xor.or(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 ^= or(r1, r2)
+
+; Maximum words
+declare i32 @llvm.hexagon.A2.max(i32, i32)
+define i32 @A2_max(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.max(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = max(r0, r1)
+
+declare i32 @llvm.hexagon.A2.maxu(i32, i32)
+define i32 @A2_maxu(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.maxu(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = maxu(r0, r1)
+
+; Maximum doublewords
+declare i64 @llvm.hexagon.A2.maxp(i64, i64)
+define i64 @A2_maxp(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.maxp(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = max(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.maxup(i64, i64)
+define i64 @A2_maxup(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.maxup(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = maxu(r1:0, r3:2)
+
+; Minimum words
+declare i32 @llvm.hexagon.A2.min(i32, i32)
+define i32 @A2_min(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.min(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = min(r0, r1)
+
+declare i32 @llvm.hexagon.A2.minu(i32, i32)
+define i32 @A2_minu(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.minu(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = minu(r0, r1)
+
+; Minimum doublewords
+declare i64 @llvm.hexagon.A2.minp(i64, i64)
+define i64 @A2_minp(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.minp(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = min(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.minup(i64, i64)
+define i64 @A2_minup(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.minup(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = minu(r1:0, r3:2)
+
+; Module wrap
+declare i32 @llvm.hexagon.A4.modwrapu(i32, i32)
+define i32 @A4_modwrapu(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A4.modwrapu(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = modwrap(r0, r1)
+
+; Negate
+declare i64 @llvm.hexagon.A2.negp(i64)
+define i64 @A2_negp(i64 %a) {
+ %z = call i64 @llvm.hexagon.A2.negp(i64 %a)
+ ret i64 %z
+}
+; CHECK: r1:0 = neg(r1:0)
+
+declare i32 @llvm.hexagon.A2.negsat(i32)
+define i32 @A2_negsat(i32 %a) {
+ %z = call i32 @llvm.hexagon.A2.negsat(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = neg(r0):sat
+
+; Round
+declare i32 @llvm.hexagon.A2.roundsat(i64)
+define i32 @A2_roundsat(i64 %a) {
+ %z = call i32 @llvm.hexagon.A2.roundsat(i64 %a)
+ ret i32 %z
+}
+; CHECK: r0 = round(r1:0):sat
+
+declare i32 @llvm.hexagon.A4.cround.ri(i32, i32)
+define i32 @A4_cround_ri(i32 %a) {
+ %z = call i32 @llvm.hexagon.A4.cround.ri(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = cround(r0, #0)
+
+declare i32 @llvm.hexagon.A4.round.ri(i32, i32)
+define i32 @A4_round_ri(i32 %a) {
+ %z = call i32 @llvm.hexagon.A4.round.ri(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = round(r0, #0)
+
+declare i32 @llvm.hexagon.A4.round.ri.sat(i32, i32)
+define i32 @A4_round_ri_sat(i32 %a) {
+ %z = call i32 @llvm.hexagon.A4.round.ri.sat(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = round(r0, #0):sat
+
+declare i32 @llvm.hexagon.A4.cround.rr(i32, i32)
+define i32 @A4_cround_rr(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A4.cround.rr(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = cround(r0, r1)
+
+declare i32 @llvm.hexagon.A4.round.rr(i32, i32)
+define i32 @A4_round_rr(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A4.round.rr(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = round(r0, r1)
+
+declare i32 @llvm.hexagon.A4.round.rr.sat(i32, i32)
+define i32 @A4_round_rr_sat(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A4.round.rr.sat(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = round(r0, r1):sat
+
+; Subtract doublewords
+declare i64 @llvm.hexagon.A2.subp(i64, i64)
+define i64 @A2_subp(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.subp(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = sub(r1:0, r3:2)
+
+; Subtract and accumulate
+declare i32 @llvm.hexagon.M2.subacc(i32, i32, i32)
+define i32 @M2_subacc(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.subacc(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += sub(r1, r2)
+
+; Subtract halfwords
+declare i32 @llvm.hexagon.A2.subh.l16.ll(i32, i32)
+define i32 @A2_subh_l16_ll(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.subh.l16.ll(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = sub(r0.l, r1.l)
+
+declare i32 @llvm.hexagon.A2.subh.l16.hl(i32, i32)
+define i32 @A2_subh_l16_hl(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.subh.l16.hl(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = sub(r0.l, r1.h)
+
+declare i32 @llvm.hexagon.A2.subh.l16.sat.ll(i32, i32)
+define i32 @A2_subh_l16_sat.ll(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.subh.l16.sat.ll(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = sub(r0.l, r1.l):sat
+
+declare i32 @llvm.hexagon.A2.subh.l16.sat.hl(i32, i32)
+define i32 @A2_subh_l16_sat.hl(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.subh.l16.sat.hl(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = sub(r0.l, r1.h):sat
+
+declare i32 @llvm.hexagon.A2.subh.h16.ll(i32, i32)
+define i32 @A2_subh_h16_ll(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.subh.h16.ll(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = sub(r0.l, r1.l):<<16
+
+declare i32 @llvm.hexagon.A2.subh.h16.lh(i32, i32)
+define i32 @A2_subh_h16_lh(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.subh.h16.lh(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = sub(r0.l, r1.h):<<16
+
+declare i32 @llvm.hexagon.A2.subh.h16.hl(i32, i32)
+define i32 @A2_subh_h16_hl(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.subh.h16.hl(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = sub(r0.h, r1.l):<<16
+
+declare i32 @llvm.hexagon.A2.subh.h16.hh(i32, i32)
+define i32 @A2_subh_h16_hh(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.subh.h16.hh(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = sub(r0.h, r1.h):<<16
+
+declare i32 @llvm.hexagon.A2.subh.h16.sat.ll(i32, i32)
+define i32 @A2_subh_h16_sat_ll(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.subh.h16.sat.ll(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = sub(r0.l, r1.l):sat:<<16
+
+declare i32 @llvm.hexagon.A2.subh.h16.sat.lh(i32, i32)
+define i32 @A2_subh_h16_sat_lh(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.subh.h16.sat.lh(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = sub(r0.l, r1.h):sat:<<16
+
+declare i32 @llvm.hexagon.A2.subh.h16.sat.hl(i32, i32)
+define i32 @A2_subh_h16_sat_hl(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.subh.h16.sat.hl(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = sub(r0.h, r1.l):sat:<<16
+
+declare i32 @llvm.hexagon.A2.subh.h16.sat.hh(i32, i32)
+define i32 @A2_subh_h16_sat_hh(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A2.subh.h16.sat.hh(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = sub(r0.h, r1.h):sat:<<16
+
+; Sign extend word to doubleword
+declare i64 @llvm.hexagon.A2.sxtw(i32)
+define i64 @A2_sxtw(i32 %a) {
+ %z = call i64 @llvm.hexagon.A2.sxtw(i32 %a)
+ ret i64 %z
+}
+; CHECK: = sxtw(r0)
+
+; Vector absolute value halfwords
+declare i64 @llvm.hexagon.A2.vabsh(i64)
+define i64 @A2_vabsh(i64 %a) {
+ %z = call i64 @llvm.hexagon.A2.vabsh(i64 %a)
+ ret i64 %z
+}
+; CHECK: r1:0 = vabsh(r1:0)
+
+declare i64 @llvm.hexagon.A2.vabshsat(i64)
+define i64 @A2_vabshsat(i64 %a) {
+ %z = call i64 @llvm.hexagon.A2.vabshsat(i64 %a)
+ ret i64 %z
+}
+; CHECK: r1:0 = vabsh(r1:0):sat
+
+; Vector absolute value words
+declare i64 @llvm.hexagon.A2.vabsw(i64)
+define i64 @A2_vabsw(i64 %a) {
+ %z = call i64 @llvm.hexagon.A2.vabsw(i64 %a)
+ ret i64 %z
+}
+; CHECK: r1:0 = vabsw(r1:0)
+
+declare i64 @llvm.hexagon.A2.vabswsat(i64)
+define i64 @A2_vabswsat(i64 %a) {
+ %z = call i64 @llvm.hexagon.A2.vabswsat(i64 %a)
+ ret i64 %z
+}
+; CHECK: r1:0 = vabsw(r1:0):sat
+
+; Vector absolute difference halfwords
+declare i64 @llvm.hexagon.M2.vabsdiffh(i64, i64)
+define i64 @M2_vabsdiffh(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.vabsdiffh(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vabsdiffh(r1:0, r3:2)
+
+; Vector absolute difference words
+declare i64 @llvm.hexagon.M2.vabsdiffw(i64, i64)
+define i64 @M2_vabsdiffw(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.vabsdiffw(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vabsdiffw(r1:0, r3:2)
+
+; Vector add halfwords
+declare i64 @llvm.hexagon.A2.vaddh(i64, i64)
+define i64 @A2_vaddh(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vaddh(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vaddh(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.vaddhs(i64, i64)
+define i64 @A2_vaddhs(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vaddhs(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vaddh(r1:0, r3:2):sat
+
+declare i64 @llvm.hexagon.A2.vadduhs(i64, i64)
+define i64 @A2_vadduhs(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vadduhs(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vadduh(r1:0, r3:2):sat
+
+; Vector add halfwords with saturate and pack to unsigned bytes
+declare i32 @llvm.hexagon.A5.vaddhubs(i64, i64)
+define i32 @A5_vaddhubs(i64 %a, i64 %b) {
+ %z = call i32 @llvm.hexagon.A5.vaddhubs(i64 %a, i64 %b)
+ ret i32 %z
+}
+; CHECK: r0 = vaddhub(r1:0, r3:2):sat
+
+; Vector reduce add unsigned bytes
+declare i64 @llvm.hexagon.A2.vraddub(i64, i64)
+define i64 @A2_vraddub(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vraddub(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vraddub(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.vraddub.acc(i64, i64, i64)
+define i64 @A2_vraddub_acc(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.A2.vraddub.acc(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vraddub(r3:2, r5:4)
+
+; Vector reduce add halfwords
+declare i32 @llvm.hexagon.M2.vradduh(i64, i64)
+define i32 @M2_vradduh(i64 %a, i64 %b) {
+ %z = call i32 @llvm.hexagon.M2.vradduh(i64 %a, i64 %b)
+ ret i32 %z
+}
+; CHECK: r0 = vradduh(r1:0, r3:2)
+
+declare i32 @llvm.hexagon.M2.vraddh(i64, i64)
+define i32 @M2_vraddh(i64 %a, i64 %b) {
+ %z = call i32 @llvm.hexagon.M2.vraddh(i64 %a, i64 %b)
+ ret i32 %z
+}
+; CHECK: r0 = vraddh(r1:0, r3:2)
+
+; Vector add bytes
+declare i64 @llvm.hexagon.A2.vaddub(i64, i64)
+define i64 @A2_vaddub(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vaddub(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vaddub(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.vaddubs(i64, i64)
+define i64 @A2_vaddubs(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vaddubs(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vaddub(r1:0, r3:2):sat
+
+; Vector add words
+declare i64 @llvm.hexagon.A2.vaddw(i64, i64)
+define i64 @A2_vaddw(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vaddw(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vaddw(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.vaddws(i64, i64)
+define i64 @A2_vaddws(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vaddws(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vaddw(r1:0, r3:2):sat
+
+; Vector average halfwords
+declare i64 @llvm.hexagon.A2.vavgh(i64, i64)
+define i64 @A2_vavgh(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vavgh(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vavgh(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.vavghr(i64, i64)
+define i64 @A2_vavghr(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vavghr(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vavgh(r1:0, r3:2):rnd
+
+declare i64 @llvm.hexagon.A2.vavghcr(i64, i64)
+define i64 @A2_vavghcr(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vavghcr(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vavgh(r1:0, r3:2):crnd
+
+declare i64 @llvm.hexagon.A2.vavguh(i64, i64)
+define i64 @A2_vavguh(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vavguh(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vavguh(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.vavguhr(i64, i64)
+define i64 @A2_vavguhr(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vavguhr(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vavguh(r1:0, r3:2):rnd
+
+declare i64 @llvm.hexagon.A2.vnavgh(i64, i64)
+define i64 @A2_vnavgh(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vnavgh(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vnavgh(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.vnavghr(i64, i64)
+define i64 @A2_vnavghr(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vnavghr(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vnavgh(r1:0, r3:2):rnd
+
+declare i64 @llvm.hexagon.A2.vnavghcr(i64, i64)
+define i64 @A2_vnavghcr(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vnavghcr(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vnavgh(r1:0, r3:2):crnd
+
+; Vector average unsigned bytes
+declare i64 @llvm.hexagon.A2.vavgub(i64, i64)
+define i64 @A2_vavgub(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vavgub(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vavgub(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.vavgubr(i64, i64)
+define i64 @A2_vavgubr(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vavgubr(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vavgub(r1:0, r3:2):rnd
+
+; Vector average words
+declare i64 @llvm.hexagon.A2.vavgw(i64, i64)
+define i64 @A2_vavgw(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vavgw(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vavgw(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.vavgwr(i64, i64)
+define i64 @A2_vavgwr(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vavgwr(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vavgw(r1:0, r3:2):rnd
+
+declare i64 @llvm.hexagon.A2.vavgwcr(i64, i64)
+define i64 @A2_vavgwcr(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vavgwcr(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vavgw(r1:0, r3:2):crnd
+
+declare i64 @llvm.hexagon.A2.vavguw(i64, i64)
+define i64 @A2_vavguw(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vavguw(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vavguw(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.vavguwr(i64, i64)
+define i64 @A2_vavguwr(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vavguwr(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vavguw(r1:0, r3:2):rnd
+
+declare i64 @llvm.hexagon.A2.vnavgw(i64, i64)
+define i64 @A2_vnavgw(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vnavgw(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vnavgw(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.vnavgwr(i64, i64)
+define i64 @A2_vnavgwr(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vnavgwr(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vnavgw(r1:0, r3:2):rnd
+
+declare i64 @llvm.hexagon.A2.vnavgwcr(i64, i64)
+define i64 @A2_vnavgwcr(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vnavgwcr(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vnavgw(r1:0, r3:2):crnd
+
+; Vector conditional negate
+declare i64 @llvm.hexagon.S2.vcnegh(i64, i32)
+define i64 @S2_vcnegh(i64 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.S2.vcnegh(i64 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vcnegh(r1:0, r2)
+
+declare i64 @llvm.hexagon.S2.vrcnegh(i64, i64, i32)
+define i64 @S2_vrcnegh(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.S2.vrcnegh(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vrcnegh(r3:2, r4)
+
+; Vector maximum bytes
+declare i64 @llvm.hexagon.A2.vmaxub(i64, i64)
+define i64 @A2_vmaxub(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vmaxub(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmaxub(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.vmaxb(i64, i64)
+define i64 @A2_vmaxb(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vmaxb(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmaxb(r1:0, r3:2)
+
+; Vector maximum halfwords
+declare i64 @llvm.hexagon.A2.vmaxh(i64, i64)
+define i64 @A2_vmaxh(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vmaxh(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmaxh(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.vmaxuh(i64, i64)
+define i64 @A2_vmaxuh(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vmaxuh(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmaxuh(r1:0, r3:2)
+
+; Vector reduce maximum halfwords
+declare i64 @llvm.hexagon.A4.vrmaxh(i64, i64, i32)
+define i64 @A4_vrmaxh(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.A4.vrmaxh(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrmaxh(r3:2, r4)
+
+declare i64 @llvm.hexagon.A4.vrmaxuh(i64, i64, i32)
+define i64 @A4_vrmaxuh(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.A4.vrmaxuh(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrmaxuh(r3:2, r4)
+
+; Vector reduce maximum words
+declare i64 @llvm.hexagon.A4.vrmaxw(i64, i64, i32)
+define i64 @A4_vrmaxw(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.A4.vrmaxw(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrmaxw(r3:2, r4)
+
+declare i64 @llvm.hexagon.A4.vrmaxuw(i64, i64, i32)
+define i64 @A4_vrmaxuw(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.A4.vrmaxuw(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrmaxuw(r3:2, r4)
+
+; Vector minimum bytes
+declare i64 @llvm.hexagon.A2.vminub(i64, i64)
+define i64 @A2_vminub(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vminub(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vminub(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.vminb(i64, i64)
+define i64 @A2_vminb(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vminb(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vminb(r1:0, r3:2)
+
+; Vector minimum halfwords
+declare i64 @llvm.hexagon.A2.vminh(i64, i64)
+define i64 @A2_vminh(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vminh(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vminh(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.vminuh(i64, i64)
+define i64 @A2_vminuh(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vminuh(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vminuh(r1:0, r3:2)
+
+; Vector reduce minimum halfwords
+declare i64 @llvm.hexagon.A4.vrminh(i64, i64, i32)
+define i64 @A4_vrminh(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.A4.vrminh(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrminh(r3:2, r4)
+
+declare i64 @llvm.hexagon.A4.vrminuh(i64, i64, i32)
+define i64 @A4_vrminuh(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.A4.vrminuh(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrminuh(r3:2, r4)
+
+; Vector reduce minimum words
+declare i64 @llvm.hexagon.A4.vrminw(i64, i64, i32)
+define i64 @A4_vrminw(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.A4.vrminw(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrminw(r3:2, r4)
+
+declare i64 @llvm.hexagon.A4.vrminuw(i64, i64, i32)
+define i64 @A4_vrminuw(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.A4.vrminuw(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrminuw(r3:2, r4)
+
+; Vector sum of absolute differences unsigned bytes
+declare i64 @llvm.hexagon.A2.vrsadub(i64, i64)
+define i64 @A2_vrsadub(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vrsadub(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrsadub(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.vrsadub.acc(i64, i64, i64)
+define i64 @A2_vrsadub_acc(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.A2.vrsadub.acc(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vrsadub(r3:2, r5:4)
+
+; Vector subtract halfwords
+declare i64 @llvm.hexagon.A2.vsubh(i64, i64)
+define i64 @A2_vsubh(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vsubh(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vsubh(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.vsubhs(i64, i64)
+define i64 @A2_vsubhs(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vsubhs(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vsubh(r1:0, r3:2):sat
+
+declare i64 @llvm.hexagon.A2.vsubuhs(i64, i64)
+define i64 @A2_vsubuhs(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vsubuhs(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vsubuh(r1:0, r3:2):sat
+
+; Vector subtract bytes
+declare i64 @llvm.hexagon.A2.vsubub(i64, i64)
+define i64 @A2_vsubub(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vsubub(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vsubub(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.vsububs(i64, i64)
+define i64 @A2_vsububs(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vsububs(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vsubub(r1:0, r3:2):sat
+
+; Vector subtract words
+declare i64 @llvm.hexagon.A2.vsubw(i64, i64)
+define i64 @A2_vsubw(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vsubw(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vsubw(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.A2.vsubws(i64, i64)
+define i64 @A2_vsubws(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.A2.vsubws(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vsubw(r1:0, r3:2):sat
diff --git a/test/CodeGen/Hexagon/intrinsics/xtype_bit.ll b/test/CodeGen/Hexagon/intrinsics/xtype_bit.ll
new file mode 100644
index 000000000000..8531b2f9334b
--- /dev/null
+++ b/test/CodeGen/Hexagon/intrinsics/xtype_bit.ll
@@ -0,0 +1,329 @@
+; RUN: llc -march=hexagon -O0 < %s | FileCheck %s
+; Hexagon Programmer's Reference Manual 11.10.2 XTYPE/BIT
+
+; Count leading
+declare i32 @llvm.hexagon.S2.clbp(i64)
+define i32 @S2_clbp(i64 %a) {
+ %z = call i32 @llvm.hexagon.S2.clbp(i64 %a)
+ ret i32 %z
+}
+; CHECK: r0 = clb(r1:0)
+
+declare i32 @llvm.hexagon.S2.cl0p(i64)
+define i32 @S2_cl0p(i64 %a) {
+ %z = call i32 @llvm.hexagon.S2.cl0p(i64 %a)
+ ret i32 %z
+}
+; CHECK: r0 = cl0(r1:0)
+
+declare i32 @llvm.hexagon.S2.cl1p(i64)
+define i32 @S2_cl1p(i64 %a) {
+ %z = call i32 @llvm.hexagon.S2.cl1p(i64 %a)
+ ret i32 %z
+}
+; CHECK: r0 = cl1(r1:0)
+
+declare i32 @llvm.hexagon.S4.clbpnorm(i64)
+define i32 @S4_clbpnorm(i64 %a) {
+ %z = call i32 @llvm.hexagon.S4.clbpnorm(i64 %a)
+ ret i32 %z
+}
+; CHECK: r0 = normamt(r1:0)
+
+declare i32 @llvm.hexagon.S4.clbpaddi(i64, i32)
+define i32 @S4_clbpaddi(i64 %a) {
+ %z = call i32 @llvm.hexagon.S4.clbpaddi(i64 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = add(clb(r1:0), #0)
+
+declare i32 @llvm.hexagon.S4.clbaddi(i32, i32)
+define i32 @S4_clbaddi(i32 %a) {
+ %z = call i32 @llvm.hexagon.S4.clbaddi(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = add(clb(r0), #0)
+
+declare i32 @llvm.hexagon.S2.cl0(i32)
+define i32 @S2_cl0(i32 %a) {
+ %z = call i32 @llvm.hexagon.S2.cl0(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = cl0(r0)
+
+declare i32 @llvm.hexagon.S2.cl1(i32)
+define i32 @S2_cl1(i32 %a) {
+ %z = call i32 @llvm.hexagon.S2.cl1(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = cl1(r0)
+
+declare i32 @llvm.hexagon.S2.clbnorm(i32)
+define i32 @S4_clbnorm(i32 %a) {
+ %z = call i32 @llvm.hexagon.S2.clbnorm(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = normamt(r0)
+
+; Count population
+declare i32 @llvm.hexagon.S5.popcountp(i64)
+define i32 @S5_popcountp(i64 %a) {
+ %z = call i32 @llvm.hexagon.S5.popcountp(i64 %a)
+ ret i32 %z
+}
+; CHECK: r0 = popcount(r1:0)
+
+; Count trailing
+declare i32 @llvm.hexagon.S2.ct0p(i64)
+define i32 @S2_ct0p(i64 %a) {
+ %z = call i32 @llvm.hexagon.S2.ct0p(i64 %a)
+ ret i32 %z
+}
+; CHECK: r0 = ct0(r1:0)
+
+declare i32 @llvm.hexagon.S2.ct1p(i64)
+define i32 @S2_ct1p(i64 %a) {
+ %z = call i32 @llvm.hexagon.S2.ct1p(i64 %a)
+ ret i32 %z
+}
+; CHECK: r0 = ct1(r1:0)
+
+declare i32 @llvm.hexagon.S2.ct0(i32)
+define i32 @S2_ct0(i32 %a) {
+ %z = call i32 @llvm.hexagon.S2.ct0(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = ct0(r0)
+
+declare i32 @llvm.hexagon.S2.ct1(i32)
+define i32 @S2_ct1(i32 %a) {
+ %z = call i32 @llvm.hexagon.S2.ct1(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = ct1(r0)
+
+; Extract bitfield
+declare i64 @llvm.hexagon.S2.extractup(i64, i32, i32)
+define i64 @S2_extractup(i64 %a) {
+ %z = call i64 @llvm.hexagon.S2.extractup(i64 %a, i32 0, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 = extractu(r1:0, #0, #0)
+
+declare i64 @llvm.hexagon.S4.extractp(i64, i32, i32)
+define i64 @S2_extractp(i64 %a) {
+ %z = call i64 @llvm.hexagon.S4.extractp(i64 %a, i32 0, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 = extract(r1:0, #0, #0)
+
+declare i32 @llvm.hexagon.S2.extractu(i32, i32, i32)
+define i32 @S2_extractu(i32 %a) {
+ %z = call i32 @llvm.hexagon.S2.extractu(i32 %a, i32 0, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = extractu(r0, #0, #0)
+
+declare i32 @llvm.hexagon.S4.extract(i32, i32, i32)
+define i32 @S2_extract(i32 %a) {
+ %z = call i32 @llvm.hexagon.S4.extract(i32 %a, i32 0, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = extract(r0, #0, #0)
+
+declare i64 @llvm.hexagon.S2.extractup.rp(i64, i64)
+define i64 @S2_extractup_rp(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.extractup.rp(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = extractu(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.S4.extractp.rp(i64, i64)
+define i64 @S4_extractp_rp(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S4.extractp.rp(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = extract(r1:0, r3:2)
+
+declare i32 @llvm.hexagon.S2.extractu.rp(i32, i64)
+define i32 @S2_extractu_rp(i32 %a, i64 %b) {
+ %z = call i32 @llvm.hexagon.S2.extractu.rp(i32 %a, i64 %b)
+ ret i32 %z
+}
+; CHECK: r0 = extractu(r0, r3:2)
+
+declare i32 @llvm.hexagon.S4.extract.rp(i32, i64)
+define i32 @S4_extract_rp(i32 %a, i64 %b) {
+ %z = call i32 @llvm.hexagon.S4.extract.rp(i32 %a, i64 %b)
+ ret i32 %z
+}
+; CHECK: r0 = extract(r0, r3:2)
+
+; Insert bitfield
+declare i64 @llvm.hexagon.S2.insertp(i64, i64, i32, i32)
+define i64 @S2_insertp(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.insertp(i64 %a, i64 %b, i32 0, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 = insert(r3:2, #0, #0)
+
+declare i32 @llvm.hexagon.S2.insert(i32, i32, i32, i32)
+define i32 @S2_insert(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.insert(i32 %a, i32 %b, i32 0, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = insert(r1, #0, #0)
+
+declare i32 @llvm.hexagon.S2.insert.rp(i32, i32, i64)
+define i32 @S2_insert_rp(i32 %a, i32 %b, i64 %c) {
+ %z = call i32 @llvm.hexagon.S2.insert.rp(i32 %a, i32 %b, i64 %c)
+ ret i32 %z
+}
+; CHECK: r0 = insert(r1, r3:2)
+
+declare i64 @llvm.hexagon.S2.insertp.rp(i64, i64, i64)
+define i64 @S2_insertp_rp(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.S2.insertp.rp(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 = insert(r3:2, r5:4)
+
+; Interleave/deinterleave
+declare i64 @llvm.hexagon.S2.deinterleave(i64)
+define i64 @S2_deinterleave(i64 %a) {
+ %z = call i64 @llvm.hexagon.S2.deinterleave(i64 %a)
+ ret i64 %z
+}
+; CHECK: r1:0 = deinterleave(r1:0)
+
+declare i64 @llvm.hexagon.S2.interleave(i64)
+define i64 @S2_interleave(i64 %a) {
+ %z = call i64 @llvm.hexagon.S2.interleave(i64 %a)
+ ret i64 %z
+}
+; CHECK: r1:0 = interleave(r1:0)
+
+; Linear feedback-shift operation
+declare i64 @llvm.hexagon.S2.lfsp(i64, i64)
+define i64 @S2_lfsp(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.lfsp(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = lfs(r1:0, r3:2)
+
+; Masked parity
+declare i32 @llvm.hexagon.S2.parityp(i64, i64)
+define i32 @S2_parityp(i64 %a, i64 %b) {
+ %z = call i32 @llvm.hexagon.S2.parityp(i64 %a, i64 %b)
+ ret i32 %z
+}
+; CHECK: r0 = parity(r1:0, r3:2)
+
+declare i32 @llvm.hexagon.S4.parity(i32, i32)
+define i32 @S4_parity(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S4.parity(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = parity(r0, r1)
+
+; Bit reverse
+declare i64 @llvm.hexagon.S2.brevp(i64)
+define i64 @S2_brevp(i64 %a) {
+ %z = call i64 @llvm.hexagon.S2.brevp(i64 %a)
+ ret i64 %z
+}
+; CHECK: r1:0 = brev(r1:0)
+
+declare i32 @llvm.hexagon.S2.brev(i32)
+define i32 @S2_brev(i32 %a) {
+ %z = call i32 @llvm.hexagon.S2.brev(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = brev(r0)
+
+; Set/clear/toggle bit
+declare i32 @llvm.hexagon.S2.setbit.i(i32, i32)
+define i32 @S2_setbit_i(i32 %a) {
+ %z = call i32 @llvm.hexagon.S2.setbit.i(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = setbit(r0, #0)
+
+declare i32 @llvm.hexagon.S2.clrbit.i(i32, i32)
+define i32 @S2_clrbit_i(i32 %a) {
+ %z = call i32 @llvm.hexagon.S2.clrbit.i(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = clrbit(r0, #0)
+
+declare i32 @llvm.hexagon.S2.togglebit.i(i32, i32)
+define i32 @S2_togglebit_i(i32 %a) {
+ %z = call i32 @llvm.hexagon.S2.togglebit.i(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = togglebit(r0, #0)
+
+declare i32 @llvm.hexagon.S2.setbit.r(i32, i32)
+define i32 @S2_setbit_r(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.setbit.r(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = setbit(r0, r1)
+
+declare i32 @llvm.hexagon.S2.clrbit.r(i32, i32)
+define i32 @S2_clrbit_r(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.clrbit.r(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = clrbit(r0, r1)
+
+declare i32 @llvm.hexagon.S2.togglebit.r(i32, i32)
+define i32 @S2_togglebit_r(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.togglebit.r(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = togglebit(r0, r1)
+
+; Split bitfield
+declare i64 @llvm.hexagon.A4.bitspliti(i32, i32)
+define i64 @A4_bitspliti(i32 %a) {
+ %z = call i64 @llvm.hexagon.A4.bitspliti(i32 %a, i32 0)
+ ret i64 %z
+}
+; CHECK: = bitsplit(r0, #0)
+
+declare i64 @llvm.hexagon.A4.bitsplit(i32, i32)
+define i64 @A4_bitsplit(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.A4.bitsplit(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = bitsplit(r0, r1)
+
+; Table index
+declare i32 @llvm.hexagon.S2.tableidxb.goodsyntax(i32, i32, i32, i32)
+define i32 @S2_tableidxb_goodsyntax(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.tableidxb.goodsyntax(i32 %a, i32 %b, i32 0, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = tableidxb(r1, #0, #0)
+
+declare i32 @llvm.hexagon.S2.tableidxh.goodsyntax(i32, i32, i32, i32)
+define i32 @S2_tableidxh_goodsyntax(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.tableidxh.goodsyntax(i32 %a, i32 %b, i32 0, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = tableidxh(r1, #0, #-1)
+
+declare i32 @llvm.hexagon.S2.tableidxw.goodsyntax(i32, i32, i32, i32)
+define i32 @S2_tableidxw_goodsyntax(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.tableidxw.goodsyntax(i32 %a, i32 %b, i32 0, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = tableidxw(r1, #0, #-2)
+
+declare i32 @llvm.hexagon.S2.tableidxd.goodsyntax(i32, i32, i32, i32)
+define i32 @S2_tableidxd_goodsyntax(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.tableidxd.goodsyntax(i32 %a, i32 %b, i32 0, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = tableidxd(r1, #0, #-3)
diff --git a/test/CodeGen/Hexagon/intrinsics/xtype_complex.ll b/test/CodeGen/Hexagon/intrinsics/xtype_complex.ll
new file mode 100644
index 000000000000..57b0c5b6db56
--- /dev/null
+++ b/test/CodeGen/Hexagon/intrinsics/xtype_complex.ll
@@ -0,0 +1,349 @@
+; RUN: llc -march=hexagon -O0 < %s | FileCheck %s
+; Hexagon Programmer's Reference Manual 11.10.3 XTYPE/COMPLEX
+
+; Complex add/sub halfwords
+declare i64 @llvm.hexagon.S4.vxaddsubh(i64, i64)
+define i64 @S4_vxaddsubh(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S4.vxaddsubh(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vxaddsubh(r1:0, r3:2):sat
+
+declare i64 @llvm.hexagon.S4.vxsubaddh(i64, i64)
+define i64 @S4_vxsubaddh(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S4.vxsubaddh(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vxsubaddh(r1:0, r3:2):sat
+
+declare i64 @llvm.hexagon.S4.vxaddsubhr(i64, i64)
+define i64 @S4_vxaddsubhr(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S4.vxaddsubhr(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vxaddsubh(r1:0, r3:2):rnd:>>1:sat
+
+declare i64 @llvm.hexagon.S4.vxsubaddhr(i64, i64)
+define i64 @S4_vxsubaddhr(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S4.vxsubaddhr(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vxsubaddh(r1:0, r3:2):rnd:>>1:sat
+
+; Complex add/sub words
+declare i64 @llvm.hexagon.S4.vxaddsubw(i64, i64)
+define i64 @S4_vxaddsubw(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S4.vxaddsubw(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vxaddsubw(r1:0, r3:2):sat
+
+declare i64 @llvm.hexagon.S4.vxsubaddw(i64, i64)
+define i64 @S4_vxsubaddw(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S4.vxsubaddw(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vxsubaddw(r1:0, r3:2):sat
+
+; Complex multiply
+declare i64 @llvm.hexagon.M2.cmpys.s0(i32, i32)
+define i64 @M2_cmpys_s0(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.cmpys.s0(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = cmpy(r0, r1):sat
+
+declare i64 @llvm.hexagon.M2.cmpys.s1(i32, i32)
+define i64 @M2_cmpys_s1(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.cmpys.s1(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = cmpy(r0, r1):<<1:sat
+
+declare i64 @llvm.hexagon.M2.cmpysc.s0(i32, i32)
+define i64 @M2_cmpysc_s0(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.cmpysc.s0(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = cmpy(r0, r1*):sat
+
+declare i64 @llvm.hexagon.M2.cmpysc.s1(i32, i32)
+define i64 @M2_cmpysc_s1(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.cmpysc.s1(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = cmpy(r0, r1*):<<1:sat
+
+declare i64 @llvm.hexagon.M2.cmacs.s0(i64, i32, i32)
+define i64 @M2_cmacs_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.cmacs.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += cmpy(r2, r3):sat
+
+declare i64 @llvm.hexagon.M2.cmacs.s1(i64, i32, i32)
+define i64 @M2_cmacs_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.cmacs.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += cmpy(r2, r3):<<1:sat
+
+declare i64 @llvm.hexagon.M2.cnacs.s0(i64, i32, i32)
+define i64 @M2_cnacs_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.cnacs.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= cmpy(r2, r3):sat
+
+declare i64 @llvm.hexagon.M2.cnacs.s1(i64, i32, i32)
+define i64 @M2_cnacs_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.cnacs.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= cmpy(r2, r3):<<1:sat
+
+declare i64 @llvm.hexagon.M2.cmacsc.s0(i64, i32, i32)
+define i64 @M2_cmacsc_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.cmacsc.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += cmpy(r2, r3*):sat
+
+declare i64 @llvm.hexagon.M2.cmacsc.s1(i64, i32, i32)
+define i64 @M2_cmacsc_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.cmacsc.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += cmpy(r2, r3*):<<1:sat
+
+declare i64 @llvm.hexagon.M2.cnacsc.s0(i64, i32, i32)
+define i64 @M2_cnacsc_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.cnacsc.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= cmpy(r2, r3*):sat
+
+declare i64 @llvm.hexagon.M2.cnacsc.s1(i64, i32, i32)
+define i64 @M2_cnacsc_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.cnacsc.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= cmpy(r2, r3*):<<1:sat
+
+; Complex multiply real or imaginary
+declare i64 @llvm.hexagon.M2.cmpyi.s0(i32, i32)
+define i64 @M2_cmpyi_s0(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.cmpyi.s0(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = cmpyi(r0, r1)
+
+declare i64 @llvm.hexagon.M2.cmpyr.s0(i32, i32)
+define i64 @M2_cmpyr_s0(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.cmpyr.s0(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = cmpyr(r0, r1)
+
+declare i64 @llvm.hexagon.M2.cmaci.s0(i64, i32, i32)
+define i64 @M2_cmaci_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.cmaci.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += cmpyi(r2, r3)
+
+declare i64 @llvm.hexagon.M2.cmacr.s0(i64, i32, i32)
+define i64 @M2_cmacr_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.cmacr.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += cmpyr(r2, r3)
+
+; Complex multiply with round and pack
+declare i32 @llvm.hexagon.M2.cmpyrs.s0(i32, i32)
+define i32 @M2_cmpyrs_s0(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.cmpyrs.s0(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = cmpy(r0, r1):rnd:sat
+
+declare i32 @llvm.hexagon.M2.cmpyrs.s1(i32, i32)
+define i32 @M2_cmpyrs_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.cmpyrs.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = cmpy(r0, r1):<<1:rnd:sat
+
+declare i32 @llvm.hexagon.M2.cmpyrsc.s0(i32, i32)
+define i32 @M2_cmpyrsc_s0(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.cmpyrsc.s0(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = cmpy(r0, r1*):rnd:sat
+
+declare i32 @llvm.hexagon.M2.cmpyrsc.s1(i32, i32)
+define i32 @M2_cmpyrsc_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.cmpyrsc.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = cmpy(r0, r1*):<<1:rnd:sat
+
+; Complex multiply 32x16
+declare i32 @llvm.hexagon.M4.cmpyi.wh(i64, i32)
+define i32 @M4_cmpyi_wh(i64 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M4.cmpyi.wh(i64 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = cmpyiwh(r1:0, r2):<<1:rnd:sat
+
+declare i32 @llvm.hexagon.M4.cmpyi.whc(i64, i32)
+define i32 @M4_cmpyi_whc(i64 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M4.cmpyi.whc(i64 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = cmpyiwh(r1:0, r2*):<<1:rnd:sat
+
+declare i32 @llvm.hexagon.M4.cmpyr.wh(i64, i32)
+define i32 @M4_cmpyr_wh(i64 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M4.cmpyr.wh(i64 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = cmpyrwh(r1:0, r2):<<1:rnd:sat
+
+declare i32 @llvm.hexagon.M4.cmpyr.whc(i64, i32)
+define i32 @M4_cmpyr_whc(i64 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M4.cmpyr.whc(i64 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = cmpyrwh(r1:0, r2*):<<1:rnd:sat
+
+; Vector complex multiply real or imaginary
+declare i64 @llvm.hexagon.M2.vcmpy.s0.sat.r(i64, i64)
+define i64 @M2_vcmpy_s0_sat_r(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.vcmpy.s0.sat.r(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vcmpyr(r1:0, r3:2):sat
+
+declare i64 @llvm.hexagon.M2.vcmpy.s1.sat.r(i64, i64)
+define i64 @M2_vcmpy_s1_sat_r(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.vcmpy.s1.sat.r(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vcmpyr(r1:0, r3:2):<<1:sat
+
+declare i64 @llvm.hexagon.M2.vcmpy.s0.sat.i(i64, i64)
+define i64 @M2_vcmpy_s0_sat_i(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.vcmpy.s0.sat.i(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vcmpyi(r1:0, r3:2):sat
+
+declare i64 @llvm.hexagon.M2.vcmpy.s1.sat.i(i64, i64)
+define i64 @M2_vcmpy_s1_sat_i(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.vcmpy.s1.sat.i(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vcmpyi(r1:0, r3:2):<<1:sat
+
+declare i64 @llvm.hexagon.M2.vcmac.s0.sat.r(i64, i64, i64)
+define i64 @M2_vcmac_s0_sat_r(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.M2.vcmac.s0.sat.r(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vcmpyr(r3:2, r5:4):sat
+
+declare i64 @llvm.hexagon.M2.vcmac.s0.sat.i(i64, i64, i64)
+define i64 @M2_vcmac_s0_sat_i(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.M2.vcmac.s0.sat.i(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vcmpyi(r3:2, r5:4):sat
+
+; Vector complex conjugate
+declare i64 @llvm.hexagon.A2.vconj(i64)
+define i64 @A2_vconj(i64 %a) {
+ %z = call i64 @llvm.hexagon.A2.vconj(i64 %a)
+ ret i64 %z
+}
+; CHECK: r1:0 = vconj(r1:0):sat
+
+; Vector complex rotate
+declare i64 @llvm.hexagon.S2.vcrotate(i64, i32)
+define i64 @S2_vcrotate(i64 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.S2.vcrotate(i64 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vcrotate(r1:0, r2)
+
+; Vector reduce complex multiply real or imaginary
+declare i64 @llvm.hexagon.M2.vrcmpyi.s0(i64, i64)
+define i64 @M2_vrcmpyi_s0(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.vrcmpyi.s0(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrcmpyi(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.M2.vrcmpyr.s0(i64, i64)
+define i64 @M2_vrcmpyr_s0(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.vrcmpyr.s0(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrcmpyr(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.M2.vrcmpyi.s0c(i64, i64)
+define i64 @M2_vrcmpyi_s0c(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.vrcmpyi.s0c(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrcmpyi(r1:0, r3:2*)
+
+declare i64 @llvm.hexagon.M2.vrcmpyr.s0c(i64, i64)
+define i64 @M2_vrcmpyr_s0c(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.vrcmpyr.s0c(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrcmpyr(r1:0, r3:2*)
+
+declare i64 @llvm.hexagon.M2.vrcmaci.s0(i64, i64, i64)
+define i64 @M2_vrcmaci_s0(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.M2.vrcmaci.s0(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vrcmpyi(r3:2, r5:4)
+
+declare i64 @llvm.hexagon.M2.vrcmacr.s0(i64, i64, i64)
+define i64 @M2_vrcmacr_s0(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.M2.vrcmacr.s0(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vrcmpyr(r3:2, r5:4)
+
+declare i64 @llvm.hexagon.M2.vrcmaci.s0c(i64, i64, i64)
+define i64 @M2_vrcmaci_s0c(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.M2.vrcmaci.s0c(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vrcmpyi(r3:2, r5:4*)
+
+declare i64 @llvm.hexagon.M2.vrcmacr.s0c(i64, i64, i64)
+define i64 @M2_vrcmacr_s0c(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.M2.vrcmacr.s0c(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vrcmpyr(r3:2, r5:4*)
+
+; Vector reduce complex rotate
+declare i64 @llvm.hexagon.S4.vrcrotate(i64, i32, i32)
+define i64 @S4_vrcrotate(i64 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.S4.vrcrotate(i64 %a, i32 %b, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrcrotate(r1:0, r2, #0)
+
+declare i64 @llvm.hexagon.S4.vrcrotate.acc(i64, i64, i32, i32)
+define i64 @S4_vrcrotate_acc(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.S4.vrcrotate.acc(i64 %a, i64 %b, i32 %c, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 += vrcrotate(r3:2, r4, #0)
diff --git a/test/CodeGen/Hexagon/intrinsics/xtype_fp.ll b/test/CodeGen/Hexagon/intrinsics/xtype_fp.ll
new file mode 100644
index 000000000000..aef8127d668c
--- /dev/null
+++ b/test/CodeGen/Hexagon/intrinsics/xtype_fp.ll
@@ -0,0 +1,388 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -O0 < %s | FileCheck %s
+; Hexagon Programmer's Reference Manual 11.10.4 XTYPE/FP
+
+; Floating point addition
+declare float @llvm.hexagon.F2.sfadd(float, float)
+define float @F2_sfadd(float %a, float %b) {
+ %z = call float @llvm.hexagon.F2.sfadd(float %a, float %b)
+ ret float %z
+}
+; CHECK: r0 = sfadd(r0, r1)
+
+; Classify floating-point value
+declare i32 @llvm.hexagon.F2.sfclass(float, i32)
+define i32 @F2_sfclass(float %a) {
+ %z = call i32 @llvm.hexagon.F2.sfclass(float %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = sfclass(r0, #0)
+
+declare i32 @llvm.hexagon.F2.dfclass(double, i32)
+define i32 @F2_dfclass(double %a) {
+ %z = call i32 @llvm.hexagon.F2.dfclass(double %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = dfclass(r1:0, #0)
+
+; Compare floating-point value
+declare i32 @llvm.hexagon.F2.sfcmpge(float, float)
+define i32 @F2_sfcmpge(float %a, float %b) {
+ %z = call i32 @llvm.hexagon.F2.sfcmpge(float %a, float %b)
+ ret i32 %z
+}
+; CHECK: p0 = sfcmp.ge(r0, r1)
+
+declare i32 @llvm.hexagon.F2.sfcmpuo(float, float)
+define i32 @F2_sfcmpuo(float %a, float %b) {
+ %z = call i32 @llvm.hexagon.F2.sfcmpuo(float %a, float %b)
+ ret i32 %z
+}
+; CHECK: p0 = sfcmp.uo(r0, r1)
+
+declare i32 @llvm.hexagon.F2.sfcmpeq(float, float)
+define i32 @F2_sfcmpeq(float %a, float %b) {
+ %z = call i32 @llvm.hexagon.F2.sfcmpeq(float %a, float %b)
+ ret i32 %z
+}
+; CHECK: p0 = sfcmp.eq(r0, r1)
+
+declare i32 @llvm.hexagon.F2.sfcmpgt(float, float)
+define i32 @F2_sfcmpgt(float %a, float %b) {
+ %z = call i32 @llvm.hexagon.F2.sfcmpgt(float %a, float %b)
+ ret i32 %z
+}
+; CHECK: p0 = sfcmp.gt(r0, r1)
+
+declare i32 @llvm.hexagon.F2.dfcmpge(double, double)
+define i32 @F2_dfcmpge(double %a, double %b) {
+ %z = call i32 @llvm.hexagon.F2.dfcmpge(double %a, double %b)
+ ret i32 %z
+}
+; CHECK: p0 = dfcmp.ge(r1:0, r3:2)
+
+declare i32 @llvm.hexagon.F2.dfcmpuo(double, double)
+define i32 @F2_dfcmpuo(double %a, double %b) {
+ %z = call i32 @llvm.hexagon.F2.dfcmpuo(double %a, double %b)
+ ret i32 %z
+}
+; CHECK: p0 = dfcmp.uo(r1:0, r3:2)
+
+declare i32 @llvm.hexagon.F2.dfcmpeq(double, double)
+define i32 @F2_dfcmpeq(double %a, double %b) {
+ %z = call i32 @llvm.hexagon.F2.dfcmpeq(double %a, double %b)
+ ret i32 %z
+}
+; CHECK: p0 = dfcmp.eq(r1:0, r3:2)
+
+declare i32 @llvm.hexagon.F2.dfcmpgt(double, double)
+define i32 @F2_dfcmpgt(double %a, double %b) {
+ %z = call i32 @llvm.hexagon.F2.dfcmpgt(double %a, double %b)
+ ret i32 %z
+}
+; CHECK: p0 = dfcmp.gt(r1:0, r3:2)
+
+; Convert floating-point value to other format
+declare double @llvm.hexagon.F2.conv.sf2df(float)
+define double @F2_conv_sf2df(float %a) {
+ %z = call double @llvm.hexagon.F2.conv.sf2df(float %a)
+ ret double %z
+}
+; CHECK: = convert_sf2df(r0)
+
+declare float @llvm.hexagon.F2.conv.df2sf(double)
+define float @F2_conv_df2sf(double %a) {
+ %z = call float @llvm.hexagon.F2.conv.df2sf(double %a)
+ ret float %z
+}
+; CHECK: r0 = convert_df2sf(r1:0)
+
+; Convert integer to floating-point value
+declare double @llvm.hexagon.F2.conv.ud2df(i64)
+define double @F2_conv_ud2df(i64 %a) {
+ %z = call double @llvm.hexagon.F2.conv.ud2df(i64 %a)
+ ret double %z
+}
+; CHECK: r1:0 = convert_ud2df(r1:0)
+
+declare double @llvm.hexagon.F2.conv.d2df(i64)
+define double @F2_conv_d2df(i64 %a) {
+ %z = call double @llvm.hexagon.F2.conv.d2df(i64 %a)
+ ret double %z
+}
+; CHECK: r1:0 = convert_d2df(r1:0)
+
+declare double @llvm.hexagon.F2.conv.uw2df(i32)
+define double @F2_conv_uw2df(i32 %a) {
+ %z = call double @llvm.hexagon.F2.conv.uw2df(i32 %a)
+ ret double %z
+}
+; CHECK: = convert_uw2df(r0)
+
+declare double @llvm.hexagon.F2.conv.w2df(i32)
+define double @F2_conv_w2df(i32 %a) {
+ %z = call double @llvm.hexagon.F2.conv.w2df(i32 %a)
+ ret double %z
+}
+; CHECK: = convert_w2df(r0)
+
+declare float @llvm.hexagon.F2.conv.ud2sf(i64)
+define float @F2_conv_ud2sf(i64 %a) {
+ %z = call float @llvm.hexagon.F2.conv.ud2sf(i64 %a)
+ ret float %z
+}
+; CHECK: r0 = convert_ud2sf(r1:0)
+
+declare float @llvm.hexagon.F2.conv.d2sf(i64)
+define float @F2_conv_d2sf(i64 %a) {
+ %z = call float @llvm.hexagon.F2.conv.d2sf(i64 %a)
+ ret float %z
+}
+; CHECK: r0 = convert_d2sf(r1:0)
+
+declare float @llvm.hexagon.F2.conv.uw2sf(i32)
+define float @F2_conv_uw2sf(i32 %a) {
+ %z = call float @llvm.hexagon.F2.conv.uw2sf(i32 %a)
+ ret float %z
+}
+; CHECK: r0 = convert_uw2sf(r0)
+
+declare float @llvm.hexagon.F2.conv.w2sf(i32)
+define float @F2_conv_w2sf(i32 %a) {
+ %z = call float @llvm.hexagon.F2.conv.w2sf(i32 %a)
+ ret float %z
+}
+; CHECK: r0 = convert_w2sf(r0)
+
+; Convert floating-point value to integer
+declare i64 @llvm.hexagon.F2.conv.df2d(double)
+define i64 @F2_conv_df2d(double %a) {
+ %z = call i64 @llvm.hexagon.F2.conv.df2d(double %a)
+ ret i64 %z
+}
+; CHECK: r1:0 = convert_df2d(r1:0)
+
+declare i64 @llvm.hexagon.F2.conv.df2ud(double)
+define i64 @F2_conv_df2ud(double %a) {
+ %z = call i64 @llvm.hexagon.F2.conv.df2ud(double %a)
+ ret i64 %z
+}
+; CHECK: r1:0 = convert_df2ud(r1:0)
+
+declare i64 @llvm.hexagon.F2.conv.df2d.chop(double)
+define i64 @F2_conv_df2d_chop(double %a) {
+ %z = call i64 @llvm.hexagon.F2.conv.df2d.chop(double %a)
+ ret i64 %z
+}
+; CHECK: r1:0 = convert_df2d(r1:0):chop
+
+declare i64 @llvm.hexagon.F2.conv.df2ud.chop(double)
+define i64 @F2_conv_df2ud_chop(double %a) {
+ %z = call i64 @llvm.hexagon.F2.conv.df2ud.chop(double %a)
+ ret i64 %z
+}
+; CHECK: r1:0 = convert_df2ud(r1:0):chop
+
+declare i64 @llvm.hexagon.F2.conv.sf2ud(float)
+define i64 @F2_conv_sf2ud(float %a) {
+ %z = call i64 @llvm.hexagon.F2.conv.sf2ud(float %a)
+ ret i64 %z
+}
+; CHECK: = convert_sf2ud(r0)
+
+declare i64 @llvm.hexagon.F2.conv.sf2d(float)
+define i64 @F2_conv_sf2d(float %a) {
+ %z = call i64 @llvm.hexagon.F2.conv.sf2d(float %a)
+ ret i64 %z
+}
+; CHECK: = convert_sf2d(r0)
+
+declare i64 @llvm.hexagon.F2.conv.sf2d.chop(float)
+define i64 @F2_conv_sf2d_chop(float %a) {
+ %z = call i64 @llvm.hexagon.F2.conv.sf2d.chop(float %a)
+ ret i64 %z
+}
+; CHECK: = convert_sf2d(r0):chop
+
+declare i64 @llvm.hexagon.F2.conv.sf2ud.chop(float)
+define i64 @F2_conv_sf2ud_chop(float %a) {
+ %z = call i64 @llvm.hexagon.F2.conv.sf2ud.chop(float %a)
+ ret i64 %z
+}
+; CHECK: = convert_sf2ud(r0):chop
+
+declare i32 @llvm.hexagon.F2.conv.df2uw(double)
+define i32 @F2_conv_df2uw(double %a) {
+ %z = call i32 @llvm.hexagon.F2.conv.df2uw(double %a)
+ ret i32 %z
+}
+; CHECK: r0 = convert_df2uw(r1:0)
+
+declare i32 @llvm.hexagon.F2.conv.df2w(double)
+define i32 @F2_conv_df2w(double %a) {
+ %z = call i32 @llvm.hexagon.F2.conv.df2w(double %a)
+ ret i32 %z
+}
+; CHECK: r0 = convert_df2w(r1:0)
+
+declare i32 @llvm.hexagon.F2.conv.df2w.chop(double)
+define i32 @F2_conv_df2w_chop(double %a) {
+ %z = call i32 @llvm.hexagon.F2.conv.df2w.chop(double %a)
+ ret i32 %z
+}
+; CHECK: r0 = convert_df2w(r1:0):chop
+
+declare i32 @llvm.hexagon.F2.conv.df2uw.chop(double)
+define i32 @F2_conv_df2uw_chop(double %a) {
+ %z = call i32 @llvm.hexagon.F2.conv.df2uw.chop(double %a)
+ ret i32 %z
+}
+; CHECK: r0 = convert_df2uw(r1:0):chop
+
+declare i32 @llvm.hexagon.F2.conv.sf2uw(float)
+define i32 @F2_conv_sf2uw(float %a) {
+ %z = call i32 @llvm.hexagon.F2.conv.sf2uw(float %a)
+ ret i32 %z
+}
+; CHECK: r0 = convert_sf2uw(r0)
+
+declare i32 @llvm.hexagon.F2.conv.sf2uw.chop(float)
+define i32 @F2_conv_sf2uw_chop(float %a) {
+ %z = call i32 @llvm.hexagon.F2.conv.sf2uw.chop(float %a)
+ ret i32 %z
+}
+; CHECK: r0 = convert_sf2uw(r0):chop
+
+declare i32 @llvm.hexagon.F2.conv.sf2w(float)
+define i32 @F2_conv_sf2w(float %a) {
+ %z = call i32 @llvm.hexagon.F2.conv.sf2w(float %a)
+ ret i32 %z
+}
+; CHECK: r0 = convert_sf2w(r0)
+
+declare i32 @llvm.hexagon.F2.conv.sf2w.chop(float)
+define i32 @F2_conv_sf2w_chop(float %a) {
+ %z = call i32 @llvm.hexagon.F2.conv.sf2w.chop(float %a)
+ ret i32 %z
+}
+; CHECK: r0 = convert_sf2w(r0):chop
+
+; Floating point extreme value assistance
+declare float @llvm.hexagon.F2.sffixupr(float)
+define float @F2_sffixupr(float %a) {
+ %z = call float @llvm.hexagon.F2.sffixupr(float %a)
+ ret float %z
+}
+; CHECK: r0 = sffixupr(r0)
+
+declare float @llvm.hexagon.F2.sffixupn(float, float)
+define float @F2_sffixupn(float %a, float %b) {
+ %z = call float @llvm.hexagon.F2.sffixupn(float %a, float %b)
+ ret float %z
+}
+; CHECK: r0 = sffixupn(r0, r1)
+
+declare float @llvm.hexagon.F2.sffixupd(float, float)
+define float @F2_sffixupd(float %a, float %b) {
+ %z = call float @llvm.hexagon.F2.sffixupd(float %a, float %b)
+ ret float %z
+}
+; CHECK: r0 = sffixupd(r0, r1)
+
+; Floating point fused multiply-add
+declare float @llvm.hexagon.F2.sffma(float, float, float)
+define float @F2_sffma(float %a, float %b, float %c) {
+ %z = call float @llvm.hexagon.F2.sffma(float %a, float %b, float %c)
+ ret float %z
+}
+; CHECK: r0 += sfmpy(r1, r2)
+
+declare float @llvm.hexagon.F2.sffms(float, float, float)
+define float @F2_sffms(float %a, float %b, float %c) {
+ %z = call float @llvm.hexagon.F2.sffms(float %a, float %b, float %c)
+ ret float %z
+}
+; CHECK: r0 -= sfmpy(r1, r2)
+
+; Floating point fused multiply-add with scaling
+declare float @llvm.hexagon.F2.sffma.sc(float, float, float, i32)
+define float @F2_sffma_sc(float %a, float %b, float %c, i32 %d) {
+ %z = call float @llvm.hexagon.F2.sffma.sc(float %a, float %b, float %c, i32 %d)
+ ret float %z
+}
+; CHECK: r0 += sfmpy(r1, r2, p0):scale
+
+; Floating point fused multiply-add for library routines
+declare float @llvm.hexagon.F2.sffma.lib(float, float, float)
+define float @F2_sffma_lib(float %a, float %b, float %c) {
+ %z = call float @llvm.hexagon.F2.sffma.lib(float %a, float %b, float %c)
+ ret float %z
+}
+; CHECK: r0 += sfmpy(r1, r2):lib
+
+declare float @llvm.hexagon.F2.sffms.lib(float, float, float)
+define float @F2_sffms_lib(float %a, float %b, float %c) {
+ %z = call float @llvm.hexagon.F2.sffms.lib(float %a, float %b, float %c)
+ ret float %z
+}
+; CHECK: r0 -= sfmpy(r1, r2):lib
+
+; Create floating-point constant
+declare float @llvm.hexagon.F2.sfimm.p(i32)
+define float @F2_sfimm_p() {
+ %z = call float @llvm.hexagon.F2.sfimm.p(i32 0)
+ ret float %z
+}
+; CHECK: r0 = sfmake(#0):pos
+
+declare float @llvm.hexagon.F2.sfimm.n(i32)
+define float @F2_sfimm_n() {
+ %z = call float @llvm.hexagon.F2.sfimm.n(i32 0)
+ ret float %z
+}
+; CHECK: r0 = sfmake(#0):neg
+
+declare double @llvm.hexagon.F2.dfimm.p(i32)
+define double @F2_dfimm_p() {
+ %z = call double @llvm.hexagon.F2.dfimm.p(i32 0)
+ ret double %z
+}
+; CHECK: r1:0 = dfmake(#0):pos
+
+declare double @llvm.hexagon.F2.dfimm.n(i32)
+define double @F2_dfimm_n() {
+ %z = call double @llvm.hexagon.F2.dfimm.n(i32 0)
+ ret double %z
+}
+; CHECK: r1:0 = dfmake(#0):neg
+
+; Floating point maximum
+declare float @llvm.hexagon.F2.sfmax(float, float)
+define float @F2_sfmax(float %a, float %b) {
+ %z = call float @llvm.hexagon.F2.sfmax(float %a, float %b)
+ ret float %z
+}
+; CHECK: r0 = sfmax(r0, r1)
+
+; Floating point minimum
+declare float @llvm.hexagon.F2.sfmin(float, float)
+define float @F2_sfmin(float %a, float %b) {
+ %z = call float @llvm.hexagon.F2.sfmin(float %a, float %b)
+ ret float %z
+}
+; CHECK: r0 = sfmin(r0, r1)
+
+; Floating point multiply
+declare float @llvm.hexagon.F2.sfmpy(float, float)
+define float @F2_sfmpy(float %a, float %b) {
+ %z = call float @llvm.hexagon.F2.sfmpy(float %a, float %b)
+ ret float %z
+}
+; CHECK: r0 = sfmpy(r0, r1)
+
+; Floating point subtraction
+declare float @llvm.hexagon.F2.sfsub(float, float)
+define float @F2_sfsub(float %a, float %b) {
+ %z = call float @llvm.hexagon.F2.sfsub(float %a, float %b)
+ ret float %z
+}
+; CHECK: r0 = sfsub(r0, r1)
diff --git a/test/CodeGen/Hexagon/intrinsics/xtype_mpy.ll b/test/CodeGen/Hexagon/intrinsics/xtype_mpy.ll
new file mode 100644
index 000000000000..6409e4e10ca2
--- /dev/null
+++ b/test/CodeGen/Hexagon/intrinsics/xtype_mpy.ll
@@ -0,0 +1,1525 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -O0 < %s | FileCheck %s
+; Hexagon Programmer's Reference Manual 11.10.5 XTYPE/MPY
+
+; Multiply and use lower result
+declare i32 @llvm.hexagon.M4.mpyrr.addi(i32, i32, i32)
+define i32 @M4_mpyrr_addi(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M4.mpyrr.addi(i32 0, i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = add(#0, mpyi(r0, r1))
+
+declare i32 @llvm.hexagon.M4.mpyri.addi(i32, i32, i32)
+define i32 @M4_mpyri_addi(i32 %a) {
+ %z = call i32 @llvm.hexagon.M4.mpyri.addi(i32 0, i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = add(#0, mpyi(r0, #0))
+
+declare i32 @llvm.hexagon.M4.mpyri.addr.u2(i32, i32, i32)
+define i32 @M4_mpyri_addr_u2(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M4.mpyri.addr.u2(i32 %a, i32 0, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = add(r0, mpyi(#0, r1))
+
+declare i32 @llvm.hexagon.M4.mpyri.addr(i32, i32, i32)
+define i32 @M4_mpyri_addr(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M4.mpyri.addr(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = add(r0, mpyi(r1, #0))
+
+declare i32 @llvm.hexagon.M4.mpyrr.addr(i32, i32, i32)
+define i32 @M4_mpyrr_addr(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M4.mpyrr.addr(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r1 = add(r0, mpyi(r1, r2))
+
+; Vector multiply word by signed half (32x16)
+declare i64 @llvm.hexagon.M2.mmpyl.s0(i64, i64)
+define i64 @M2_mmpyl_s0(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.mmpyl.s0(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpyweh(r1:0, r3:2):sat
+
+declare i64 @llvm.hexagon.M2.mmpyl.s1(i64, i64)
+define i64 @M2_mmpyl_s1(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.mmpyl.s1(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpyweh(r1:0, r3:2):<<1:sat
+
+declare i64 @llvm.hexagon.M2.mmpyh.s0(i64, i64)
+define i64 @M2_mmpyh_s0(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.mmpyh.s0(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpywoh(r1:0, r3:2):sat
+
+declare i64 @llvm.hexagon.M2.mmpyh.s1(i64, i64)
+define i64 @M2_mmpyh_s1(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.mmpyh.s1(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpywoh(r1:0, r3:2):<<1:sat
+
+declare i64 @llvm.hexagon.M2.mmpyl.rs0(i64, i64)
+define i64 @M2_mmpyl_rs0(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.mmpyl.rs0(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpyweh(r1:0, r3:2):rnd:sat
+
+declare i64 @llvm.hexagon.M2.mmpyl.rs1(i64, i64)
+define i64 @M2_mmpyl_rs1(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.mmpyl.rs1(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpyweh(r1:0, r3:2):<<1:rnd:sat
+
+declare i64 @llvm.hexagon.M2.mmpyh.rs0(i64, i64)
+define i64 @M2_mmpyh_rs0(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.mmpyh.rs0(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpywoh(r1:0, r3:2):rnd:sat
+
+declare i64 @llvm.hexagon.M2.mmpyh.rs1(i64, i64)
+define i64 @M2_mmpyh_rs1(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.mmpyh.rs1(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpywoh(r1:0, r3:2):<<1:rnd:sat
+
+; Vector multiply word by unsigned half (32x16)
+declare i64 @llvm.hexagon.M2.mmpyul.s0(i64, i64)
+define i64 @M2_mmpyul_s0(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.mmpyul.s0(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpyweuh(r1:0, r3:2):sat
+
+declare i64 @llvm.hexagon.M2.mmpyul.s1(i64, i64)
+define i64 @M2_mmpyul_s1(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.mmpyul.s1(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpyweuh(r1:0, r3:2):<<1:sat
+
+declare i64 @llvm.hexagon.M2.mmpyuh.s0(i64, i64)
+define i64 @M2_mmpyuh_s0(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.mmpyuh.s0(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpywouh(r1:0, r3:2):sat
+
+declare i64 @llvm.hexagon.M2.mmpyuh.s1(i64, i64)
+define i64 @M2_mmpyuh_s1(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.mmpyuh.s1(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpywouh(r1:0, r3:2):<<1:sat
+
+declare i64 @llvm.hexagon.M2.mmpyul.rs0(i64, i64)
+define i64 @M2_mmpyul_rs0(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.mmpyul.rs0(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpyweuh(r1:0, r3:2):rnd:sat
+
+declare i64 @llvm.hexagon.M2.mmpyul.rs1(i64, i64)
+define i64 @M2_mmpyul_rs1(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.mmpyul.rs1(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpyweuh(r1:0, r3:2):<<1:rnd:sat
+
+declare i64 @llvm.hexagon.M2.mmpyuh.rs0(i64, i64)
+define i64 @M2_mmpyuh_rs0(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.mmpyuh.rs0(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpywouh(r1:0, r3:2):rnd:sat
+
+declare i64 @llvm.hexagon.M2.mmpyuh.rs1(i64, i64)
+define i64 @M2_mmpyuh_rs1(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.mmpyuh.rs1(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpywouh(r1:0, r3:2):<<1:rnd:sat
+
+; Multiply signed halfwords
+declare i64 @llvm.hexagon.M2.mpyd.ll.s0(i32, i32)
+define i64 @M2_mpyd_ll_s0(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.ll.s0(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpy(r0.l, r1.l)
+
+declare i64 @llvm.hexagon.M2.mpyd.ll.s1(i32, i32)
+define i64 @M2_mpyd_ll_s1(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.ll.s1(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpy(r0.l, r1.l):<<1
+
+declare i64 @llvm.hexagon.M2.mpyd.lh.s0(i32, i32)
+define i64 @M2_mpyd_lh_s0(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.lh.s0(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpy(r0.l, r1.h)
+
+declare i64 @llvm.hexagon.M2.mpyd.lh.s1(i32, i32)
+define i64 @M2_mpyd_lh_s1(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.lh.s1(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpy(r0.l, r1.h):<<1
+
+declare i64 @llvm.hexagon.M2.mpyd.hl.s0(i32, i32)
+define i64 @M2_mpyd_hl_s0(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.hl.s0(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpy(r0.h, r1.l)
+
+declare i64 @llvm.hexagon.M2.mpyd.hl.s1(i32, i32)
+define i64 @M2_mpyd_hl_s1(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.hl.s1(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpy(r0.h, r1.l):<<1
+
+declare i64 @llvm.hexagon.M2.mpyd.hh.s0(i32, i32)
+define i64 @M2_mpyd_hh_s0(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.hh.s0(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpy(r0.h, r1.h)
+
+declare i64 @llvm.hexagon.M2.mpyd.hh.s1(i32, i32)
+define i64 @M2_mpyd_hh_s1(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.hh.s1(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpy(r0.h, r1.h):<<1
+
+declare i64 @llvm.hexagon.M2.mpyd.rnd.ll.s0(i32, i32)
+define i64 @M2_mpyd_rnd_ll_s0(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.rnd.ll.s0(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpy(r0.l, r1.l):rnd
+
+declare i64 @llvm.hexagon.M2.mpyd.rnd.ll.s1(i32, i32)
+define i64 @M2_mpyd_rnd_ll_s1(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.rnd.ll.s1(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpy(r0.l, r1.l):<<1:rnd
+
+declare i64 @llvm.hexagon.M2.mpyd.rnd.lh.s0(i32, i32)
+define i64 @M2_mpyd_rnd_lh_s0(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.rnd.lh.s0(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpy(r0.l, r1.h):rnd
+
+declare i64 @llvm.hexagon.M2.mpyd.rnd.lh.s1(i32, i32)
+define i64 @M2_mpyd_rnd_lh_s1(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.rnd.lh.s1(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpy(r0.l, r1.h):<<1:rnd
+
+declare i64 @llvm.hexagon.M2.mpyd.rnd.hl.s0(i32, i32)
+define i64 @M2_mpyd_rnd_hl_s0(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.rnd.hl.s0(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpy(r0.h, r1.l):rnd
+
+declare i64 @llvm.hexagon.M2.mpyd.rnd.hl.s1(i32, i32)
+define i64 @M2_mpyd_rnd_hl_s1(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.rnd.hl.s1(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpy(r0.h, r1.l):<<1:rnd
+
+declare i64 @llvm.hexagon.M2.mpyd.rnd.hh.s0(i32, i32)
+define i64 @M2_mpyd_rnd_hh_s0(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.rnd.hh.s0(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpy(r0.h, r1.h):rnd
+
+declare i64 @llvm.hexagon.M2.mpyd.rnd.hh.s1(i32, i32)
+define i64 @M2_mpyd_rnd_hh_s1(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.rnd.hh.s1(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpy(r0.h, r1.h):<<1:rnd
+
+declare i64 @llvm.hexagon.M2.mpyd.acc.ll.s0(i64, i32, i32)
+define i64 @M2_mpyd_acc_ll_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.acc.ll.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += mpy(r2.l, r3.l)
+
+declare i64 @llvm.hexagon.M2.mpyd.acc.ll.s1(i64, i32, i32)
+define i64 @M2_mpyd_acc_ll_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.acc.ll.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += mpy(r2.l, r3.l):<<1
+
+declare i64 @llvm.hexagon.M2.mpyd.acc.lh.s0(i64, i32, i32)
+define i64 @M2_mpyd_acc_lh_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.acc.lh.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += mpy(r2.l, r3.h)
+
+declare i64 @llvm.hexagon.M2.mpyd.acc.lh.s1(i64, i32, i32)
+define i64 @M2_mpyd_acc_lh_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.acc.lh.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += mpy(r2.l, r3.h):<<1
+
+declare i64 @llvm.hexagon.M2.mpyd.acc.hl.s0(i64, i32, i32)
+define i64 @M2_mpyd_acc_hl_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.acc.hl.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += mpy(r2.h, r3.l)
+
+declare i64 @llvm.hexagon.M2.mpyd.acc.hl.s1(i64, i32, i32)
+define i64 @M2_mpyd_acc_hl_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.acc.hl.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += mpy(r2.h, r3.l):<<1
+
+declare i64 @llvm.hexagon.M2.mpyd.acc.hh.s0(i64, i32, i32)
+define i64 @M2_mpyd_acc_hh_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.acc.hh.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += mpy(r2.h, r3.h)
+
+declare i64 @llvm.hexagon.M2.mpyd.acc.hh.s1(i64, i32, i32)
+define i64 @M2_mpyd_acc_hh_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.acc.hh.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += mpy(r2.h, r3.h):<<1
+
+declare i64 @llvm.hexagon.M2.mpyd.nac.ll.s0(i64, i32, i32)
+define i64 @M2_mpyd_nac_ll_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.nac.ll.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= mpy(r2.l, r3.l)
+
+declare i64 @llvm.hexagon.M2.mpyd.nac.ll.s1(i64, i32, i32)
+define i64 @M2_mpyd_nac_ll_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.nac.ll.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= mpy(r2.l, r3.l):<<1
+
+declare i64 @llvm.hexagon.M2.mpyd.nac.lh.s0(i64, i32, i32)
+define i64 @M2_mpyd_nac_lh_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.nac.lh.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= mpy(r2.l, r3.h)
+
+declare i64 @llvm.hexagon.M2.mpyd.nac.lh.s1(i64, i32, i32)
+define i64 @M2_mpyd_nac_lh_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.nac.lh.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= mpy(r2.l, r3.h):<<1
+
+declare i64 @llvm.hexagon.M2.mpyd.nac.hl.s0(i64, i32, i32)
+define i64 @M2_mpyd_nac_hl_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.nac.hl.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= mpy(r2.h, r3.l)
+
+declare i64 @llvm.hexagon.M2.mpyd.nac.hl.s1(i64, i32, i32)
+define i64 @M2_mpyd_nac_hl_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.nac.hl.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= mpy(r2.h, r3.l):<<1
+
+declare i64 @llvm.hexagon.M2.mpyd.nac.hh.s0(i64, i32, i32)
+define i64 @M2_mpyd_nac_hh_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.nac.hh.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= mpy(r2.h, r3.h)
+
+declare i64 @llvm.hexagon.M2.mpyd.nac.hh.s1(i64, i32, i32)
+define i64 @M2_mpyd_nac_hh_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyd.nac.hh.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= mpy(r2.h, r3.h):<<1
+
+declare i32 @llvm.hexagon.M2.mpy.ll.s0(i32, i32)
+define i32 @M2_mpy_ll_s0(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.ll.s0(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.l, r1.l)
+
+declare i32 @llvm.hexagon.M2.mpy.ll.s1(i32, i32)
+define i32 @M2_mpy_ll_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.ll.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.l, r1.l):<<1
+
+declare i32 @llvm.hexagon.M2.mpy.lh.s0(i32, i32)
+define i32 @M2_mpy_lh_s0(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.lh.s0(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.l, r1.h)
+
+declare i32 @llvm.hexagon.M2.mpy.lh.s1(i32, i32)
+define i32 @M2_mpy_lh_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.lh.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.l, r1.h):<<1
+
+declare i32 @llvm.hexagon.M2.mpy.hl.s0(i32, i32)
+define i32 @M2_mpy_hl_s0(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.hl.s0(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.h, r1.l)
+
+declare i32 @llvm.hexagon.M2.mpy.hl.s1(i32, i32)
+define i32 @M2_mpy_hl_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.hl.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.h, r1.l):<<1
+
+declare i32 @llvm.hexagon.M2.mpy.hh.s0(i32, i32)
+define i32 @M2_mpy_hh_s0(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.hh.s0(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.h, r1.h)
+
+declare i32 @llvm.hexagon.M2.mpy.hh.s1(i32, i32)
+define i32 @M2_mpy_hh_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.hh.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.h, r1.h):<<1
+
+declare i32 @llvm.hexagon.M2.mpy.sat.ll.s0(i32, i32)
+define i32 @M2_mpy_sat_ll_s0(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.sat.ll.s0(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.l, r1.l):sat
+
+declare i32 @llvm.hexagon.M2.mpy.sat.ll.s1(i32, i32)
+define i32 @M2_mpy_sat_ll_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.sat.ll.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.l, r1.l):<<1:sat
+
+declare i32 @llvm.hexagon.M2.mpy.sat.lh.s0(i32, i32)
+define i32 @M2_mpy_sat_lh_s0(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.sat.lh.s0(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.l, r1.h):sat
+
+declare i32 @llvm.hexagon.M2.mpy.sat.lh.s1(i32, i32)
+define i32 @M2_mpy_sat_lh_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.sat.lh.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.l, r1.h):<<1:sat
+
+declare i32 @llvm.hexagon.M2.mpy.sat.hl.s0(i32, i32)
+define i32 @M2_mpy_sat_hl_s0(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.sat.hl.s0(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.h, r1.l):sat
+
+declare i32 @llvm.hexagon.M2.mpy.sat.hl.s1(i32, i32)
+define i32 @M2_mpy_sat_hl_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.sat.hl.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.h, r1.l):<<1:sat
+
+declare i32 @llvm.hexagon.M2.mpy.sat.hh.s0(i32, i32)
+define i32 @M2_mpy_sat_hh_s0(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.sat.hh.s0(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.h, r1.h):sat
+
+declare i32 @llvm.hexagon.M2.mpy.sat.hh.s1(i32, i32)
+define i32 @M2_mpy_sat_hh_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.sat.hh.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.h, r1.h):<<1:sat
+
+declare i32 @llvm.hexagon.M2.mpy.sat.rnd.ll.s0(i32, i32)
+define i32 @M2_mpy_sat_rnd_ll_s0(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.sat.rnd.ll.s0(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.l, r1.l):rnd:sat
+
+declare i32 @llvm.hexagon.M2.mpy.sat.rnd.ll.s1(i32, i32)
+define i32 @M2_mpy_sat_rnd_ll_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.sat.rnd.ll.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.l, r1.l):<<1:rnd:sat
+
+declare i32 @llvm.hexagon.M2.mpy.sat.rnd.lh.s0(i32, i32)
+define i32 @M2_mpy_sat_rnd_lh_s0(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.sat.rnd.lh.s0(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.l, r1.h):rnd:sat
+
+declare i32 @llvm.hexagon.M2.mpy.sat.rnd.lh.s1(i32, i32)
+define i32 @M2_mpy_sat_rnd_lh_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.sat.rnd.lh.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.l, r1.h):<<1:rnd:sat
+
+declare i32 @llvm.hexagon.M2.mpy.sat.rnd.hl.s0(i32, i32)
+define i32 @M2_mpy_sat_rnd_hl_s0(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.sat.rnd.hl.s0(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.h, r1.l):rnd:sat
+
+declare i32 @llvm.hexagon.M2.mpy.sat.rnd.hl.s1(i32, i32)
+define i32 @M2_mpy_sat_rnd_hl_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.sat.rnd.hl.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.h, r1.l):<<1:rnd:sat
+
+declare i32 @llvm.hexagon.M2.mpy.sat.rnd.hh.s0(i32, i32)
+define i32 @M2_mpy_sat_rnd_hh_s0(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.sat.rnd.hh.s0(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.h, r1.h):rnd:sat
+
+declare i32 @llvm.hexagon.M2.mpy.sat.rnd.hh.s1(i32, i32)
+define i32 @M2_mpy_sat_rnd_hh_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.sat.rnd.hh.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0.h, r1.h):<<1:rnd:sat
+
+declare i32 @llvm.hexagon.M2.mpy.acc.ll.s0(i32, i32, i32)
+define i32 @M2_mpy_acc_ll_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.acc.ll.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpy(r1.l, r2.l)
+
+declare i32 @llvm.hexagon.M2.mpy.acc.ll.s1(i32, i32, i32)
+define i32 @M2_mpy_acc_ll_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.acc.ll.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpy(r1.l, r2.l):<<1
+
+declare i32 @llvm.hexagon.M2.mpy.acc.lh.s0(i32, i32, i32)
+define i32 @M2_mpy_acc_lh_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.acc.lh.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpy(r1.l, r2.h)
+
+declare i32 @llvm.hexagon.M2.mpy.acc.lh.s1(i32, i32, i32)
+define i32 @M2_mpy_acc_lh_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.acc.lh.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpy(r1.l, r2.h):<<1
+
+declare i32 @llvm.hexagon.M2.mpy.acc.hl.s0(i32, i32, i32)
+define i32 @M2_mpy_acc_hl_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.acc.hl.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpy(r1.h, r2.l)
+
+declare i32 @llvm.hexagon.M2.mpy.acc.hl.s1(i32, i32, i32)
+define i32 @M2_mpy_acc_hl_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.acc.hl.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpy(r1.h, r2.l):<<1
+
+declare i32 @llvm.hexagon.M2.mpy.acc.hh.s0(i32, i32, i32)
+define i32 @M2_mpy_acc_hh_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.acc.hh.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpy(r1.h, r2.h)
+
+declare i32 @llvm.hexagon.M2.mpy.acc.hh.s1(i32, i32, i32)
+define i32 @M2_mpy_acc_hh_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.acc.hh.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpy(r1.h, r2.h):<<1
+
+declare i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s0(i32, i32, i32)
+define i32 @M2_mpy_acc_sat_ll_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpy(r1.l, r2.l):sat
+
+declare i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s1(i32, i32, i32)
+define i32 @M2_mpy_acc_sat_ll_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.acc.sat.ll.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpy(r1.l, r2.l):<<1:sat
+
+declare i32 @llvm.hexagon.M2.mpy.acc.sat.lh.s0(i32, i32, i32)
+define i32 @M2_mpy_acc_sat_lh_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.acc.sat.lh.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpy(r1.l, r2.h):sat
+
+declare i32 @llvm.hexagon.M2.mpy.acc.sat.lh.s1(i32, i32, i32)
+define i32 @M2_mpy_acc_sat_lh_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.acc.sat.lh.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpy(r1.l, r2.h):<<1:sat
+
+declare i32 @llvm.hexagon.M2.mpy.acc.sat.hl.s0(i32, i32, i32)
+define i32 @M2_mpy_acc_sat_hl_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.acc.sat.hl.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpy(r1.h, r2.l):sat
+
+declare i32 @llvm.hexagon.M2.mpy.acc.sat.hl.s1(i32, i32, i32)
+define i32 @M2_mpy_acc_sat_hl_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.acc.sat.hl.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpy(r1.h, r2.l):<<1:sat
+
+declare i32 @llvm.hexagon.M2.mpy.acc.sat.hh.s0(i32, i32, i32)
+define i32 @M2_mpy_acc_sat_hh_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.acc.sat.hh.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpy(r1.h, r2.h):sat
+
+declare i32 @llvm.hexagon.M2.mpy.acc.sat.hh.s1(i32, i32, i32)
+define i32 @M2_mpy_acc_sat_hh_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.acc.sat.hh.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpy(r1.h, r2.h):<<1:sat
+
+declare i32 @llvm.hexagon.M2.mpy.nac.ll.s0(i32, i32, i32)
+define i32 @M2_mpy_nac_ll_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.nac.ll.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpy(r1.l, r2.l)
+
+declare i32 @llvm.hexagon.M2.mpy.nac.ll.s1(i32, i32, i32)
+define i32 @M2_mpy_nac_ll_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.nac.ll.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpy(r1.l, r2.l):<<1
+
+declare i32 @llvm.hexagon.M2.mpy.nac.lh.s0(i32, i32, i32)
+define i32 @M2_mpy_nac_lh_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.nac.lh.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpy(r1.l, r2.h)
+
+declare i32 @llvm.hexagon.M2.mpy.nac.lh.s1(i32, i32, i32)
+define i32 @M2_mpy_nac_lh_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.nac.lh.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpy(r1.l, r2.h):<<1
+
+declare i32 @llvm.hexagon.M2.mpy.nac.hl.s0(i32, i32, i32)
+define i32 @M2_mpy_nac_hl_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.nac.hl.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpy(r1.h, r2.l)
+
+declare i32 @llvm.hexagon.M2.mpy.nac.hl.s1(i32, i32, i32)
+define i32 @M2_mpy_nac_hl_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.nac.hl.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpy(r1.h, r2.l):<<1
+
+declare i32 @llvm.hexagon.M2.mpy.nac.hh.s0(i32, i32, i32)
+define i32 @M2_mpy_nac_hh_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.nac.hh.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpy(r1.h, r2.h)
+
+declare i32 @llvm.hexagon.M2.mpy.nac.hh.s1(i32, i32, i32)
+define i32 @M2_mpy_nac_hh_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.nac.hh.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpy(r1.h, r2.h):<<1
+
+declare i32 @llvm.hexagon.M2.mpy.nac.sat.ll.s0(i32, i32, i32)
+define i32 @M2_mpy_nac_sat_ll_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.nac.sat.ll.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpy(r1.l, r2.l):sat
+
+declare i32 @llvm.hexagon.M2.mpy.nac.sat.ll.s1(i32, i32, i32)
+define i32 @M2_mpy_nac_sat_ll_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.nac.sat.ll.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpy(r1.l, r2.l):<<1:sat
+
+declare i32 @llvm.hexagon.M2.mpy.nac.sat.lh.s0(i32, i32, i32)
+define i32 @M2_mpy_nac_sat_lh_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.nac.sat.lh.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpy(r1.l, r2.h):sat
+
+declare i32 @llvm.hexagon.M2.mpy.nac.sat.lh.s1(i32, i32, i32)
+define i32 @M2_mpy_nac_sat_lh_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.nac.sat.lh.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpy(r1.l, r2.h):<<1:sat
+
+declare i32 @llvm.hexagon.M2.mpy.nac.sat.hl.s0(i32, i32, i32)
+define i32 @M2_mpy_nac_sat_hl_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.nac.sat.hl.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpy(r1.h, r2.l):sat
+
+declare i32 @llvm.hexagon.M2.mpy.nac.sat.hl.s1(i32, i32, i32)
+define i32 @M2_mpy_nac_sat_hl_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.nac.sat.hl.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpy(r1.h, r2.l):<<1:sat
+
+declare i32 @llvm.hexagon.M2.mpy.nac.sat.hh.s0(i32, i32, i32)
+define i32 @M2_mpy_nac_sat_hh_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.nac.sat.hh.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpy(r1.h, r2.h):sat
+
+declare i32 @llvm.hexagon.M2.mpy.nac.sat.hh.s1(i32, i32, i32)
+define i32 @M2_mpy_nac_sat_hh_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpy.nac.sat.hh.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpy(r1.h, r2.h):<<1:sat
+
+; Multiply unsigned halfwords
+declare i64 @llvm.hexagon.M2.mpyud.ll.s0(i32, i32)
+define i64 @M2_mpyud_ll_s0(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.ll.s0(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpyu(r0.l, r1.l)
+
+declare i64 @llvm.hexagon.M2.mpyud.ll.s1(i32, i32)
+define i64 @M2_mpyud_ll_s1(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.ll.s1(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpyu(r0.l, r1.l):<<1
+
+declare i64 @llvm.hexagon.M2.mpyud.lh.s0(i32, i32)
+define i64 @M2_mpyud_lh_s0(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.lh.s0(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpyu(r0.l, r1.h)
+
+declare i64 @llvm.hexagon.M2.mpyud.lh.s1(i32, i32)
+define i64 @M2_mpyud_lh_s1(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.lh.s1(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpyu(r0.l, r1.h):<<1
+
+declare i64 @llvm.hexagon.M2.mpyud.hl.s0(i32, i32)
+define i64 @M2_mpyud_hl_s0(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.hl.s0(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpyu(r0.h, r1.l)
+
+declare i64 @llvm.hexagon.M2.mpyud.hl.s1(i32, i32)
+define i64 @M2_mpyud_hl_s1(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.hl.s1(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpyu(r0.h, r1.l):<<1
+
+declare i64 @llvm.hexagon.M2.mpyud.hh.s0(i32, i32)
+define i64 @M2_mpyud_hh_s0(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.hh.s0(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpyu(r0.h, r1.h)
+
+declare i64 @llvm.hexagon.M2.mpyud.hh.s1(i32, i32)
+define i64 @M2_mpyud_hh_s1(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.hh.s1(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpyu(r0.h, r1.h):<<1
+
+declare i64 @llvm.hexagon.M2.mpyud.acc.ll.s0(i64, i32, i32)
+define i64 @M2_mpyud_acc_ll_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.acc.ll.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += mpyu(r2.l, r3.l)
+
+declare i64 @llvm.hexagon.M2.mpyud.acc.ll.s1(i64, i32, i32)
+define i64 @M2_mpyud_acc_ll_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.acc.ll.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += mpyu(r2.l, r3.l):<<1
+
+declare i64 @llvm.hexagon.M2.mpyud.acc.lh.s0(i64, i32, i32)
+define i64 @M2_mpyud_acc_lh_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.acc.lh.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += mpyu(r2.l, r3.h)
+
+declare i64 @llvm.hexagon.M2.mpyud.acc.lh.s1(i64, i32, i32)
+define i64 @M2_mpyud_acc_lh_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.acc.lh.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += mpyu(r2.l, r3.h):<<1
+
+declare i64 @llvm.hexagon.M2.mpyud.acc.hl.s0(i64, i32, i32)
+define i64 @M2_mpyud_acc_hl_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.acc.hl.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += mpyu(r2.h, r3.l)
+
+declare i64 @llvm.hexagon.M2.mpyud.acc.hl.s1(i64, i32, i32)
+define i64 @M2_mpyud_acc_hl_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.acc.hl.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += mpyu(r2.h, r3.l):<<1
+
+declare i64 @llvm.hexagon.M2.mpyud.acc.hh.s0(i64, i32, i32)
+define i64 @M2_mpyud_acc_hh_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.acc.hh.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += mpyu(r2.h, r3.h)
+
+declare i64 @llvm.hexagon.M2.mpyud.acc.hh.s1(i64, i32, i32)
+define i64 @M2_mpyud_acc_hh_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.acc.hh.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += mpyu(r2.h, r3.h):<<1
+
+declare i64 @llvm.hexagon.M2.mpyud.nac.ll.s0(i64, i32, i32)
+define i64 @M2_mpyud_nac_ll_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.nac.ll.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= mpyu(r2.l, r3.l)
+
+declare i64 @llvm.hexagon.M2.mpyud.nac.ll.s1(i64, i32, i32)
+define i64 @M2_mpyud_nac_ll_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.nac.ll.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= mpyu(r2.l, r3.l):<<1
+
+declare i64 @llvm.hexagon.M2.mpyud.nac.lh.s0(i64, i32, i32)
+define i64 @M2_mpyud_nac_lh_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.nac.lh.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= mpyu(r2.l, r3.h)
+
+declare i64 @llvm.hexagon.M2.mpyud.nac.lh.s1(i64, i32, i32)
+define i64 @M2_mpyud_nac_lh_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.nac.lh.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= mpyu(r2.l, r3.h):<<1
+
+declare i64 @llvm.hexagon.M2.mpyud.nac.hl.s0(i64, i32, i32)
+define i64 @M2_mpyud_nac_hl_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.nac.hl.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= mpyu(r2.h, r3.l)
+
+declare i64 @llvm.hexagon.M2.mpyud.nac.hl.s1(i64, i32, i32)
+define i64 @M2_mpyud_nac_hl_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.nac.hl.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= mpyu(r2.h, r3.l):<<1
+
+declare i64 @llvm.hexagon.M2.mpyud.nac.hh.s0(i64, i32, i32)
+define i64 @M2_mpyud_nac_hh_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.nac.hh.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= mpyu(r2.h, r3.h)
+
+declare i64 @llvm.hexagon.M2.mpyud.nac.hh.s1(i64, i32, i32)
+define i64 @M2_mpyud_nac_hh_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.mpyud.nac.hh.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= mpyu(r2.h, r3.h):<<1
+
+declare i32 @llvm.hexagon.M2.mpyu.ll.s0(i32, i32)
+define i32 @M2_mpyu_ll_s0(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.ll.s0(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpyu(r0.l, r1.l)
+
+declare i32 @llvm.hexagon.M2.mpyu.ll.s1(i32, i32)
+define i32 @M2_mpyu_ll_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.ll.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpyu(r0.l, r1.l):<<1
+
+declare i32 @llvm.hexagon.M2.mpyu.lh.s0(i32, i32)
+define i32 @M2_mpyu_lh_s0(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.lh.s0(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpyu(r0.l, r1.h)
+
+declare i32 @llvm.hexagon.M2.mpyu.lh.s1(i32, i32)
+define i32 @M2_mpyu_lh_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.lh.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpyu(r0.l, r1.h):<<1
+
+declare i32 @llvm.hexagon.M2.mpyu.hl.s0(i32, i32)
+define i32 @M2_mpyu_hl_s0(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.hl.s0(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpyu(r0.h, r1.l)
+
+declare i32 @llvm.hexagon.M2.mpyu.hl.s1(i32, i32)
+define i32 @M2_mpyu_hl_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.hl.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpyu(r0.h, r1.l):<<1
+
+declare i32 @llvm.hexagon.M2.mpyu.hh.s0(i32, i32)
+define i32 @M2_mpyu_hh_s0(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.hh.s0(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpyu(r0.h, r1.h)
+
+declare i32 @llvm.hexagon.M2.mpyu.hh.s1(i32, i32)
+define i32 @M2_mpyu_hh_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.hh.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpyu(r0.h, r1.h):<<1
+
+declare i32 @llvm.hexagon.M2.mpyu.acc.ll.s0(i32, i32, i32)
+define i32 @M2_mpyu_acc_ll_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.acc.ll.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpyu(r1.l, r2.l)
+
+declare i32 @llvm.hexagon.M2.mpyu.acc.ll.s1(i32, i32, i32)
+define i32 @M2_mpyu_acc_ll_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.acc.ll.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpyu(r1.l, r2.l):<<1
+
+declare i32 @llvm.hexagon.M2.mpyu.acc.lh.s0(i32, i32, i32)
+define i32 @M2_mpyu_acc_lh_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.acc.lh.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpyu(r1.l, r2.h)
+
+declare i32 @llvm.hexagon.M2.mpyu.acc.lh.s1(i32, i32, i32)
+define i32 @M2_mpyu_acc_lh_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.acc.lh.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpyu(r1.l, r2.h):<<1
+
+declare i32 @llvm.hexagon.M2.mpyu.acc.hl.s0(i32, i32, i32)
+define i32 @M2_mpyu_acc_hl_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.acc.hl.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpyu(r1.h, r2.l)
+
+declare i32 @llvm.hexagon.M2.mpyu.acc.hl.s1(i32, i32, i32)
+define i32 @M2_mpyu_acc_hl_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.acc.hl.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpyu(r1.h, r2.l):<<1
+
+declare i32 @llvm.hexagon.M2.mpyu.acc.hh.s0(i32, i32, i32)
+define i32 @M2_mpyu_acc_hh_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.acc.hh.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpyu(r1.h, r2.h)
+
+declare i32 @llvm.hexagon.M2.mpyu.acc.hh.s1(i32, i32, i32)
+define i32 @M2_mpyu_acc_hh_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.acc.hh.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpyu(r1.h, r2.h):<<1
+
+declare i32 @llvm.hexagon.M2.mpyu.nac.ll.s0(i32, i32, i32)
+define i32 @M2_mpyu_nac_ll_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.nac.ll.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpyu(r1.l, r2.l)
+
+declare i32 @llvm.hexagon.M2.mpyu.nac.ll.s1(i32, i32, i32)
+define i32 @M2_mpyu_nac_ll_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.nac.ll.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpyu(r1.l, r2.l):<<1
+
+declare i32 @llvm.hexagon.M2.mpyu.nac.lh.s0(i32, i32, i32)
+define i32 @M2_mpyu_nac_lh_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.nac.lh.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpyu(r1.l, r2.h)
+
+declare i32 @llvm.hexagon.M2.mpyu.nac.lh.s1(i32, i32, i32)
+define i32 @M2_mpyu_nac_lh_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.nac.lh.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpyu(r1.l, r2.h):<<1
+
+declare i32 @llvm.hexagon.M2.mpyu.nac.hl.s0(i32, i32, i32)
+define i32 @M2_mpyu_nac_hl_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.nac.hl.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpyu(r1.h, r2.l)
+
+declare i32 @llvm.hexagon.M2.mpyu.nac.hl.s1(i32, i32, i32)
+define i32 @M2_mpyu_nac_hl_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.nac.hl.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpyu(r1.h, r2.l):<<1
+
+declare i32 @llvm.hexagon.M2.mpyu.nac.hh.s0(i32, i32, i32)
+define i32 @M2_mpyu_nac_hh_s0(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.nac.hh.s0(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpyu(r1.h, r2.h)
+
+declare i32 @llvm.hexagon.M2.mpyu.nac.hh.s1(i32, i32, i32)
+define i32 @M2_mpyu_nac_hh_s1(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.nac.hh.s1(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpyu(r1.h, r2.h):<<1
+
+; Polynomial multiply words
+declare i64 @llvm.hexagon.M4.pmpyw(i32, i32)
+define i64 @M4_pmpyw(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M4.pmpyw(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = pmpyw(r0, r1)
+
+declare i64 @llvm.hexagon.M4.pmpyw.acc(i64, i32, i32)
+define i64 @M4_pmpyw_acc(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M4.pmpyw.acc(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 ^= pmpyw(r2, r3)
+
+; Vector reduce multiply word by signed half
+declare i64 @llvm.hexagon.M4.vrmpyoh.s0(i64, i64)
+define i64 @M4_vrmpyoh_s0(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M4.vrmpyoh.s0(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrmpywoh(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.M4.vrmpyoh.s1(i64, i64)
+define i64 @M4_vrmpyoh_s1(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M4.vrmpyoh.s1(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrmpywoh(r1:0, r3:2):<<1
+
+declare i64 @llvm.hexagon.M4.vrmpyeh.s0(i64, i64)
+define i64 @M4_vrmpyeh_s0(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M4.vrmpyeh.s0(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrmpyweh(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.M4.vrmpyeh.s1(i64, i64)
+define i64 @M4_vrmpyeh_s1(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M4.vrmpyeh.s1(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrmpyweh(r1:0, r3:2):<<1
+
+declare i64 @llvm.hexagon.M4.vrmpyoh.acc.s0(i64, i64, i64)
+define i64 @M4_vrmpyoh_acc_s0(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.M4.vrmpyoh.acc.s0(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vrmpywoh(r3:2, r5:4)
+
+declare i64 @llvm.hexagon.M4.vrmpyoh.acc.s1(i64, i64, i64)
+define i64 @M4_vrmpyoh_acc_s1(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.M4.vrmpyoh.acc.s1(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vrmpywoh(r3:2, r5:4):<<1
+
+declare i64 @llvm.hexagon.M4.vrmpyeh.acc.s0(i64, i64, i64)
+define i64 @M4_vrmpyeh_acc_s0(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.M4.vrmpyeh.acc.s0(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vrmpyweh(r3:2, r5:4)
+
+declare i64 @llvm.hexagon.M4.vrmpyeh.acc.s1(i64, i64, i64)
+define i64 @M4_vrmpyeh_acc_s1(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.M4.vrmpyeh.acc.s1(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vrmpyweh(r3:2, r5:4):<<1
+
+; Multiply and use upper result
+declare i32 @llvm.hexagon.M2.dpmpyss.rnd.s0(i32, i32)
+define i32 @M2_dpmpyss_rnd_s0(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.dpmpyss.rnd.s0(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0, r1):rnd
+
+declare i32 @llvm.hexagon.M2.mpyu.up(i32, i32)
+define i32 @M2_mpyu_up(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpyu.up(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpyu(r0, r1)
+
+declare i32 @llvm.hexagon.M2.mpysu.up(i32, i32)
+define i32 @M2_mpysu_up(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpysu.up(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpysu(r0, r1)
+
+declare i32 @llvm.hexagon.M2.hmmpyh.s1(i32, i32)
+define i32 @M2_hmmpyh_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.hmmpyh.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0, r1.h):<<1:sat
+
+declare i32 @llvm.hexagon.M2.hmmpyl.s1(i32, i32)
+define i32 @M2_hmmpyl_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.hmmpyl.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0, r1.l):<<1:sat
+
+declare i32 @llvm.hexagon.M2.hmmpyh.rs1(i32, i32)
+define i32 @M2_hmmpyh_rs1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.hmmpyh.rs1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0, r1.h):<<1:rnd:sat
+
+declare i32 @llvm.hexagon.M2.mpy.up.s1.sat(i32, i32)
+define i32 @M2_mpy_up_s1_sat(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.up.s1.sat(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0, r1):<<1:sat
+
+declare i32 @llvm.hexagon.M2.hmmpyl.rs1(i32, i32)
+define i32 @M2_hmmpyl_rs1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.hmmpyl.rs1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0, r1.l):<<1:rnd:sat
+
+declare i32 @llvm.hexagon.M2.mpy.up(i32, i32)
+define i32 @M2_mpy_up(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.up(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0, r1)
+
+declare i32 @llvm.hexagon.M2.mpy.up.s1(i32, i32)
+define i32 @M2_mpy_up_s1(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.M2.mpy.up.s1(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = mpy(r0, r1):<<1
+
+declare i32 @llvm.hexagon.M4.mac.up.s1.sat(i32, i32, i32)
+define i32 @M4_mac_up_s1_sat(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M4.mac.up.s1.sat(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += mpy(r1, r2):<<1:sat
+
+declare i32 @llvm.hexagon.M4.nac.up.s1.sat(i32, i32, i32)
+define i32 @M4_nac_up_s1_sat(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.M4.nac.up.s1.sat(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= mpy(r1, r2):<<1:sat
+
+; Multiply and use full result
+declare i64 @llvm.hexagon.M2.dpmpyss.s0(i32, i32)
+define i64 @M2_dpmpyss_s0(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.dpmpyss.s0(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpy(r0, r1)
+
+declare i64 @llvm.hexagon.M2.dpmpyuu.s0(i32, i32)
+define i64 @M2_dpmpyuu_s0(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.dpmpyuu.s0(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = mpyu(r0, r1)
+
+declare i64 @llvm.hexagon.M2.dpmpyss.acc.s0(i64, i32, i32)
+define i64 @M2_dpmpyss_acc_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.dpmpyss.acc.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += mpy(r2, r3)
+
+declare i64 @llvm.hexagon.M2.dpmpyss.nac.s0(i64, i32, i32)
+define i64 @M2_dpmpyss_nac_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.dpmpyss.nac.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= mpy(r2, r3)
+
+declare i64 @llvm.hexagon.M2.dpmpyuu.acc.s0(i64, i32, i32)
+define i64 @M2_dpmpyuu_acc_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.dpmpyuu.acc.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += mpyu(r2, r3)
+
+declare i64 @llvm.hexagon.M2.dpmpyuu.nac.s0(i64, i32, i32)
+define i64 @M2_dpmpyuu_nac_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.dpmpyuu.nac.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= mpyu(r2, r3)
+
+; Vector dual multiply
+declare i64 @llvm.hexagon.M2.vdmpys.s0(i64, i64)
+define i64 @M2_vdmpys_s0(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.vdmpys.s0(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vdmpy(r1:0, r3:2):sat
+
+declare i64 @llvm.hexagon.M2.vdmpys.s1(i64, i64)
+define i64 @M2_vdmpys_s1(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.vdmpys.s1(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vdmpy(r1:0, r3:2):<<1:sat
+
+; Vector reduce multiply bytes
+declare i64 @llvm.hexagon.M5.vrmpybuu(i64, i64)
+define i64 @M5_vrmpybuu(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M5.vrmpybuu(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrmpybu(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.M5.vrmpybsu(i64, i64)
+define i64 @M5_vrmpybsu(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M5.vrmpybsu(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrmpybsu(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.M5.vrmacbuu(i64, i64, i64)
+define i64 @M5_vrmacbuu(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.M5.vrmacbuu(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vrmpybu(r3:2, r5:4)
+
+declare i64 @llvm.hexagon.M5.vrmacbsu(i64, i64, i64)
+define i64 @M5_vrmacbsu(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.M5.vrmacbsu(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vrmpybsu(r3:2, r5:4)
+
+; Vector dual multiply signed by unsigned bytes
+declare i64 @llvm.hexagon.M5.vdmpybsu(i64, i64)
+define i64 @M5_vdmpybsu(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M5.vdmpybsu(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vdmpybsu(r1:0, r3:2):sat
+
+declare i64 @llvm.hexagon.M5.vdmacbsu(i64, i64, i64)
+define i64 @M5_vdmacbsu(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.M5.vdmacbsu(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vdmpybsu(r3:2, r5:4):sat
+
+; Vector multiply even halfwords
+declare i64 @llvm.hexagon.M2.vmpy2es.s0(i64, i64)
+define i64 @M2_vmpy2es_s0(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.vmpy2es.s0(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpyeh(r1:0, r3:2):sat
+
+declare i64 @llvm.hexagon.M2.vmpy2es.s1(i64, i64)
+define i64 @M2_vmpy2es_s1(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.vmpy2es.s1(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpyeh(r1:0, r3:2):<<1:sat
+
+declare i64 @llvm.hexagon.M2.vmac2es(i64, i64, i64)
+define i64 @M2_vmac2es(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.M2.vmac2es(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vmpyeh(r3:2, r5:4)
+
+declare i64 @llvm.hexagon.M2.vmac2es.s0(i64, i64, i64)
+define i64 @M2_vmac2es_s0(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.M2.vmac2es.s0(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vmpyeh(r3:2, r5:4):sat
+
+declare i64 @llvm.hexagon.M2.vmac2es.s1(i64, i64, i64)
+define i64 @M2_vmac2es_s1(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.M2.vmac2es.s1(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vmpyeh(r3:2, r5:4):<<1:sat
+
+; Vector multiply halfwords
+declare i64 @llvm.hexagon.M2.vmpy2s.s0(i32, i32)
+define i64 @M2_vmpy2s_s0(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.vmpy2s.s0(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpyh(r0, r1):sat
+
+declare i64 @llvm.hexagon.M2.vmpy2s.s1(i32, i32)
+define i64 @M2_vmpy2s_s1(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.vmpy2s.s1(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpyh(r0, r1):<<1:sat
+
+declare i64 @llvm.hexagon.M2.vmac2(i64, i32, i32)
+define i64 @M2_vmac2(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.vmac2(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vmpyh(r2, r3)
+
+declare i64 @llvm.hexagon.M2.vmac2s.s0(i64, i32, i32)
+define i64 @M2_vmac2s_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.vmac2s.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vmpyh(r2, r3):sat
+
+declare i64 @llvm.hexagon.M2.vmac2s.s1(i64, i32, i32)
+define i64 @M2_vmac2s_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.vmac2s.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vmpyh(r2, r3):<<1:sat
+
+; Vector multiply halfwords signed by unsigned
+declare i64 @llvm.hexagon.M2.vmpy2su.s0(i32, i32)
+define i64 @M2_vmpy2su_s0(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.vmpy2su.s0(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpyhsu(r0, r1):sat
+
+declare i64 @llvm.hexagon.M2.vmpy2su.s1(i32, i32)
+define i64 @M2_vmpy2su_s1(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M2.vmpy2su.s1(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpyhsu(r0, r1):<<1:sat
+
+declare i64 @llvm.hexagon.M2.vmac2su.s0(i64, i32, i32)
+define i64 @M2_vmac2su_s0(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.vmac2su.s0(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vmpyhsu(r2, r3):sat
+
+declare i64 @llvm.hexagon.M2.vmac2su.s1(i64, i32, i32)
+define i64 @M2_vmac2su_s1(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M2.vmac2su.s1(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vmpyhsu(r2, r3):<<1:sat
+
+; Vector reduce multiply halfwords
+declare i64 @llvm.hexagon.M2.vrmpy.s0(i64, i64)
+define i64 @M2_vrmpy_s0(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.M2.vrmpy.s0(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vrmpyh(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.M2.vrmac.s0(i64, i64, i64)
+define i64 @M2_vrmac_s0(i64 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.M2.vrmac.s0(i64 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vrmpyh(r3:2, r5:4)
+
+; Vector multiply bytes
+declare i64 @llvm.hexagon.M5.vmpybsu(i32, i32)
+define i64 @M2_vmpybsu(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M5.vmpybsu(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpybsu(r0, r1)
+
+declare i64 @llvm.hexagon.M5.vmpybuu(i32, i32)
+define i64 @M2_vmpybuu(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M5.vmpybuu(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vmpybu(r0, r1)
+
+declare i64 @llvm.hexagon.M5.vmacbuu(i64, i32, i32)
+define i64 @M2_vmacbuu(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M5.vmacbuu(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vmpybu(r2, r3)
+
+declare i64 @llvm.hexagon.M5.vmacbsu(i64, i32, i32)
+define i64 @M2_vmacbsu(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M5.vmacbsu(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += vmpybsu(r2, r3)
+
+; Vector polynomial multiply halfwords
+declare i64 @llvm.hexagon.M4.vpmpyh(i32, i32)
+define i64 @M4_vpmpyh(i32 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.M4.vpmpyh(i32 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vpmpyh(r0, r1)
+
+declare i64 @llvm.hexagon.M4.vpmpyh.acc(i64, i32, i32)
+define i64 @M4_vpmpyh_acc(i64 %a, i32 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.M4.vpmpyh.acc(i64 %a, i32 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 ^= vpmpyh(r2, r3)
diff --git a/test/CodeGen/Hexagon/intrinsics/xtype_perm.ll b/test/CodeGen/Hexagon/intrinsics/xtype_perm.ll
new file mode 100644
index 000000000000..0b761323e31e
--- /dev/null
+++ b/test/CodeGen/Hexagon/intrinsics/xtype_perm.ll
@@ -0,0 +1,252 @@
+; RUN: llc -march=hexagon -O0 < %s | FileCheck %s
+; Hexagon Programmer's Reference Manual 11.10.6 XTYPE/PERM
+
+; Saturate
+declare i32 @llvm.hexagon.A2.sat(i64)
+define i32 @A2_sat(i64 %a) {
+ %z = call i32 @llvm.hexagon.A2.sat(i64 %a)
+ ret i32 %z
+}
+; CHECK: r0 = sat(r1:0)
+
+declare i32 @llvm.hexagon.A2.sath(i32)
+define i32 @A2_sath(i32 %a) {
+ %z = call i32 @llvm.hexagon.A2.sath(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = sath(r0)
+
+declare i32 @llvm.hexagon.A2.satuh(i32)
+define i32 @A2_satuh(i32 %a) {
+ %z = call i32 @llvm.hexagon.A2.satuh(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = satuh(r0)
+
+declare i32 @llvm.hexagon.A2.satub(i32)
+define i32 @A2_satub(i32 %a) {
+ %z = call i32 @llvm.hexagon.A2.satub(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = satub(r0)
+
+declare i32 @llvm.hexagon.A2.satb(i32)
+define i32 @A2_satb(i32 %a) {
+ %z = call i32 @llvm.hexagon.A2.satb(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = satb(r0)
+
+; Swizzle bytes
+declare i32 @llvm.hexagon.A2.swiz(i32)
+define i32 @A2_swiz(i32 %a) {
+ %z = call i32 @llvm.hexagon.A2.swiz(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = swiz(r0)
+
+; Vector round and pack
+declare i32 @llvm.hexagon.S2.vrndpackwh(i64)
+define i32 @S2_vrndpackwh(i64 %a) {
+ %z = call i32 @llvm.hexagon.S2.vrndpackwh(i64 %a)
+ ret i32 %z
+}
+; CHECK: r0 = vrndwh(r1:0)
+
+declare i32 @llvm.hexagon.S2.vrndpackwhs(i64)
+define i32 @S2_vrndpackwhs(i64 %a) {
+ %z = call i32 @llvm.hexagon.S2.vrndpackwhs(i64 %a)
+ ret i32 %z
+}
+; CHECK: r0 = vrndwh(r1:0):sat
+
+; Vector saturate and pack
+declare i32 @llvm.hexagon.S2.vsathub(i64)
+define i32 @S2_vsathub(i64 %a) {
+ %z = call i32 @llvm.hexagon.S2.vsathub(i64 %a)
+ ret i32 %z
+}
+; CHECK: r0 = vsathub(r1:0)
+
+declare i32 @llvm.hexagon.S2.vsatwh(i64)
+define i32 @S2_vsatwh(i64 %a) {
+ %z = call i32 @llvm.hexagon.S2.vsatwh(i64 %a)
+ ret i32 %z
+}
+; CHECK: r0 = vsatwh(r1:0)
+
+declare i32 @llvm.hexagon.S2.vsatwuh(i64)
+define i32 @S2_vsatwuh(i64 %a) {
+ %z = call i32 @llvm.hexagon.S2.vsatwuh(i64 %a)
+ ret i32 %z
+}
+; CHECK: r0 = vsatwuh(r1:0)
+
+declare i32 @llvm.hexagon.S2.vsathb(i64)
+define i32 @S2_vsathb(i64 %a) {
+ %z = call i32 @llvm.hexagon.S2.vsathb(i64 %a)
+ ret i32 %z
+}
+; CHECK: r0 = vsathb(r1:0)
+
+declare i32 @llvm.hexagon.S2.svsathb(i32)
+define i32 @S2_svsathb(i32 %a) {
+ %z = call i32 @llvm.hexagon.S2.svsathb(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = vsathb(r0)
+
+declare i32 @llvm.hexagon.S2.svsathub(i32)
+define i32 @S2_svsathub(i32 %a) {
+ %z = call i32 @llvm.hexagon.S2.svsathub(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = vsathub(r0)
+
+; Vector saturate without pack
+declare i64 @llvm.hexagon.S2.vsathub.nopack(i64)
+define i64 @S2_vsathub_nopack(i64 %a) {
+ %z = call i64 @llvm.hexagon.S2.vsathub.nopack(i64 %a)
+ ret i64 %z
+}
+; CHECK: r1:0 = vsathub(r1:0)
+
+declare i64 @llvm.hexagon.S2.vsatwuh.nopack(i64)
+define i64 @S2_vsatwuh_nopack(i64 %a) {
+ %z = call i64 @llvm.hexagon.S2.vsatwuh.nopack(i64 %a)
+ ret i64 %z
+}
+; CHECK: r1:0 = vsatwuh(r1:0)
+
+declare i64 @llvm.hexagon.S2.vsatwh.nopack(i64)
+define i64 @S2_vsatwh_nopack(i64 %a) {
+ %z = call i64 @llvm.hexagon.S2.vsatwh.nopack(i64 %a)
+ ret i64 %z
+}
+; CHECK: r1:0 = vsatwh(r1:0)
+
+declare i64 @llvm.hexagon.S2.vsathb.nopack(i64)
+define i64 @S2_vsathb_nopack(i64 %a) {
+ %z = call i64 @llvm.hexagon.S2.vsathb.nopack(i64 %a)
+ ret i64 %z
+}
+; CHECK: r1:0 = vsathb(r1:0)
+
+; Vector shuffle
+declare i64 @llvm.hexagon.S2.shuffeb(i64, i64)
+define i64 @S2_shuffeb(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.shuffeb(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = shuffeb(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.S2.shuffob(i64, i64)
+define i64 @S2_shuffob(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.shuffob(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = shuffob(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.S2.shuffeh(i64, i64)
+define i64 @S2_shuffeh(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.shuffeh(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = shuffeh(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.S2.shuffoh(i64, i64)
+define i64 @S2_shuffoh(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.shuffoh(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = shuffoh(r1:0, r3:2)
+
+; Vector splat bytes
+declare i32 @llvm.hexagon.S2.vsplatrb(i32)
+define i32 @S2_vsplatrb(i32 %a) {
+ %z = call i32 @llvm.hexagon.S2.vsplatrb(i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = vsplatb(r0)
+
+; Vector splat halfwords
+declare i64 @llvm.hexagon.S2.vsplatrh(i32)
+define i64 @S2_vsplatrh(i32 %a) {
+ %z = call i64 @llvm.hexagon.S2.vsplatrh(i32 %a)
+ ret i64 %z
+}
+; CHECK: = vsplath(r0)
+
+; Vector splice
+declare i64 @llvm.hexagon.S2.vspliceib(i64, i64, i32)
+define i64 @S2_vspliceib(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.vspliceib(i64 %a, i64 %b, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 = vspliceb(r1:0, r3:2, #0)
+
+declare i64 @llvm.hexagon.S2.vsplicerb(i64, i64, i32)
+define i64 @S2_vsplicerb(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.S2.vsplicerb(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 = vspliceb(r1:0, r3:2, p0)
+
+; Vector sign extend
+declare i64 @llvm.hexagon.S2.vsxtbh(i32)
+define i64 @S2_vsxtbh(i32 %a) {
+ %z = call i64 @llvm.hexagon.S2.vsxtbh(i32 %a)
+ ret i64 %z
+}
+; CHECK: = vsxtbh(r0)
+
+declare i64 @llvm.hexagon.S2.vsxthw(i32)
+define i64 @S2_vsxthw(i32 %a) {
+ %z = call i64 @llvm.hexagon.S2.vsxthw(i32 %a)
+ ret i64 %z
+}
+; CHECK: = vsxthw(r0)
+
+; Vector truncate
+declare i32 @llvm.hexagon.S2.vtrunohb(i64)
+define i32 @S2_vtrunohb(i64 %a) {
+ %z = call i32 @llvm.hexagon.S2.vtrunohb(i64 %a)
+ ret i32 %z
+}
+; CHECK: r0 = vtrunohb(r1:0)
+
+declare i32 @llvm.hexagon.S2.vtrunehb(i64)
+define i32 @S2_vtrunehb(i64 %a) {
+ %z = call i32 @llvm.hexagon.S2.vtrunehb(i64 %a)
+ ret i32 %z
+}
+; CHECK: r0 = vtrunehb(r1:0)
+
+declare i64 @llvm.hexagon.S2.vtrunowh(i64, i64)
+define i64 @S2_vtrunowh(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.vtrunowh(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vtrunowh(r1:0, r3:2)
+
+declare i64 @llvm.hexagon.S2.vtrunewh(i64, i64)
+define i64 @S2_vtrunewh(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.vtrunewh(i64 %a, i64 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vtrunewh(r1:0, r3:2)
+
+; Vector zero extend
+declare i64 @llvm.hexagon.S2.vzxtbh(i32)
+define i64 @S2_vzxtbh(i32 %a) {
+ %z = call i64 @llvm.hexagon.S2.vzxtbh(i32 %a)
+ ret i64 %z
+}
+; CHECK: = vzxtbh(r0)
+
+declare i64 @llvm.hexagon.S2.vzxthw(i32)
+define i64 @S2_vzxthw(i32 %a) {
+ %z = call i64 @llvm.hexagon.S2.vzxthw(i32 %a)
+ ret i64 %z
+}
+; CHECK: = vzxthw(r0)
diff --git a/test/CodeGen/Hexagon/intrinsics/xtype_pred.ll b/test/CodeGen/Hexagon/intrinsics/xtype_pred.ll
new file mode 100644
index 000000000000..96e63d8d7790
--- /dev/null
+++ b/test/CodeGen/Hexagon/intrinsics/xtype_pred.ll
@@ -0,0 +1,351 @@
+; RUN: llc -march=hexagon -O0 < %s | FileCheck %s
+; Hexagon Programmer's Reference Manual 11.10.7 XTYPE/PRED
+
+; Compare byte
+declare i32 @llvm.hexagon.A4.cmpbgt(i32, i32)
+define i32 @A4_cmpbgt(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A4.cmpbgt(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: p0 = cmpb.gt(r0, r1)
+
+declare i32 @llvm.hexagon.A4.cmpbeq(i32, i32)
+define i32 @A4_cmpbeq(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A4.cmpbeq(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: p0 = cmpb.eq(r0, r1)
+
+declare i32 @llvm.hexagon.A4.cmpbgtu(i32, i32)
+define i32 @A4_cmpbgtu(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A4.cmpbgtu(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: p0 = cmpb.gtu(r0, r1)
+
+declare i32 @llvm.hexagon.A4.cmpbgti(i32, i32)
+define i32 @A4_cmpbgti(i32 %a) {
+ %z = call i32 @llvm.hexagon.A4.cmpbgti(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = cmpb.gt(r0, #0)
+
+declare i32 @llvm.hexagon.A4.cmpbeqi(i32, i32)
+define i32 @A4_cmpbeqi(i32 %a) {
+ %z = call i32 @llvm.hexagon.A4.cmpbeqi(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = cmpb.eq(r0, #0)
+
+declare i32 @llvm.hexagon.A4.cmpbgtui(i32, i32)
+define i32 @A4_cmpbgtui(i32 %a) {
+ %z = call i32 @llvm.hexagon.A4.cmpbgtui(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = cmpb.gtu(r0, #0)
+
+; Compare half
+declare i32 @llvm.hexagon.A4.cmphgt(i32, i32)
+define i32 @A4_cmphgt(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A4.cmphgt(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: p0 = cmph.gt(r0, r1)
+
+declare i32 @llvm.hexagon.A4.cmpheq(i32, i32)
+define i32 @A4_cmpheq(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A4.cmpheq(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: p0 = cmph.eq(r0, r1)
+
+declare i32 @llvm.hexagon.A4.cmphgtu(i32, i32)
+define i32 @A4_cmphgtu(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A4.cmphgtu(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: p0 = cmph.gtu(r0, r1)
+
+declare i32 @llvm.hexagon.A4.cmphgti(i32, i32)
+define i32 @A4_cmphgti(i32 %a) {
+ %z = call i32 @llvm.hexagon.A4.cmphgti(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = cmph.gt(r0, #0)
+
+declare i32 @llvm.hexagon.A4.cmpheqi(i32, i32)
+define i32 @A4_cmpheqi(i32 %a) {
+ %z = call i32 @llvm.hexagon.A4.cmpheqi(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = cmph.eq(r0, #0)
+
+declare i32 @llvm.hexagon.A4.cmphgtui(i32, i32)
+define i32 @A4_cmphgtui(i32 %a) {
+ %z = call i32 @llvm.hexagon.A4.cmphgtui(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = cmph.gtu(r0, #0)
+
+; Compare doublewords
+declare i32 @llvm.hexagon.C2.cmpgtp(i64, i64)
+define i32 @C2_cmpgtp(i64 %a, i64 %b) {
+ %z = call i32 @llvm.hexagon.C2.cmpgtp(i64 %a, i64 %b)
+ ret i32 %z
+}
+; CHECK: p0 = cmp.gt(r1:0, r3:2)
+
+declare i32 @llvm.hexagon.C2.cmpeqp(i64, i64)
+define i32 @C2_cmpeqp(i64 %a, i64 %b) {
+ %z = call i32 @llvm.hexagon.C2.cmpeqp(i64 %a, i64 %b)
+ ret i32 %z
+}
+; CHECK: p0 = cmp.eq(r1:0, r3:2)
+
+declare i32 @llvm.hexagon.C2.cmpgtup(i64, i64)
+define i32 @C2_cmpgtup(i64 %a, i64 %b) {
+ %z = call i32 @llvm.hexagon.C2.cmpgtup(i64 %a, i64 %b)
+ ret i32 %z
+}
+; CHECK: p0 = cmp.gtu(r1:0, r3:2)
+
+; Compare bitmask
+declare i32 @llvm.hexagon.C2.bitsclri(i32, i32)
+define i32 @C2_bitsclri(i32 %a) {
+ %z = call i32 @llvm.hexagon.C2.bitsclri(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = bitsclr(r0, #0)
+
+declare i32 @llvm.hexagon.C4.nbitsclri(i32, i32)
+define i32 @C4_nbitsclri(i32 %a) {
+ %z = call i32 @llvm.hexagon.C4.nbitsclri(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = !bitsclr(r0, #0)
+
+declare i32 @llvm.hexagon.C2.bitsset(i32, i32)
+define i32 @C2_bitsset(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.C2.bitsset(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: p0 = bitsset(r0, r1)
+
+declare i32 @llvm.hexagon.C4.nbitsset(i32, i32)
+define i32 @C4_nbitsset(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.C4.nbitsset(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: p0 = !bitsset(r0, r1)
+
+declare i32 @llvm.hexagon.C2.bitsclr(i32, i32)
+define i32 @C2_bitsclr(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.C2.bitsclr(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: p0 = bitsclr(r0, r1)
+
+declare i32 @llvm.hexagon.C4.nbitsclr(i32, i32)
+define i32 @C4_nbitsclr(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.C4.nbitsclr(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: p0 = !bitsclr(r0, r1)
+
+; Mask generate from predicate
+declare i64 @llvm.hexagon.C2.mask(i32)
+define i64 @C2_mask(i32 %a) {
+ %z = call i64 @llvm.hexagon.C2.mask(i32 %a)
+ ret i64 %z
+}
+; CHECK: = mask(p0)
+
+; Check for TLB match
+declare i32 @llvm.hexagon.A4.tlbmatch(i64, i32)
+define i32 @A4_tlbmatch(i64 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.A4.tlbmatch(i64 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: p0 = tlbmatch(r1:0, r2)
+
+; Test bit
+declare i32 @llvm.hexagon.S2.tstbit.i(i32, i32)
+define i32 @S2_tstbit_i(i32 %a) {
+ %z = call i32 @llvm.hexagon.S2.tstbit.i(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = tstbit(r0, #0)
+
+declare i32 @llvm.hexagon.S4.ntstbit.i(i32, i32)
+define i32 @S4_ntstbit_i(i32 %a) {
+ %z = call i32 @llvm.hexagon.S4.ntstbit.i(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = !tstbit(r0, #0)
+
+declare i32 @llvm.hexagon.S2.tstbit.r(i32, i32)
+define i32 @S2_tstbit_r(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.tstbit.r(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: p0 = tstbit(r0, r1)
+
+declare i32 @llvm.hexagon.S4.ntstbit.r(i32, i32)
+define i32 @S4_ntstbit_r(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S4.ntstbit.r(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: p0 = !tstbit(r0, r1)
+
+; Vector compare halfwords
+declare i32 @llvm.hexagon.A2.vcmpheq(i64, i64)
+define i32 @A2_vcmpheq(i64 %a, i64 %b) {
+ %z = call i32 @llvm.hexagon.A2.vcmpheq(i64 %a, i64 %b)
+ ret i32 %z
+}
+; CHECK: p0 = vcmph.eq(r1:0, r3:2)
+
+declare i32 @llvm.hexagon.A2.vcmphgt(i64, i64)
+define i32 @A2_vcmphgt(i64 %a, i64 %b) {
+ %z = call i32 @llvm.hexagon.A2.vcmphgt(i64 %a, i64 %b)
+ ret i32 %z
+}
+; CHECK: p0 = vcmph.gt(r1:0, r3:2)
+
+declare i32 @llvm.hexagon.A2.vcmphgtu(i64, i64)
+define i32 @A2_vcmphgtu(i64 %a, i64 %b) {
+ %z = call i32 @llvm.hexagon.A2.vcmphgtu(i64 %a, i64 %b)
+ ret i32 %z
+}
+; CHECK: p0 = vcmph.gtu(r1:0, r3:2)
+
+declare i32 @llvm.hexagon.A4.vcmpheqi(i64, i32)
+define i32 @A4_vcmpheqi(i64 %a) {
+ %z = call i32 @llvm.hexagon.A4.vcmpheqi(i64 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = vcmph.eq(r1:0, #0)
+
+declare i32 @llvm.hexagon.A4.vcmphgti(i64, i32)
+define i32 @A4_vcmphgti(i64 %a) {
+ %z = call i32 @llvm.hexagon.A4.vcmphgti(i64 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = vcmph.gt(r1:0, #0)
+
+declare i32 @llvm.hexagon.A4.vcmphgtui(i64, i32)
+define i32 @A4_vcmphgtui(i64 %a) {
+ %z = call i32 @llvm.hexagon.A4.vcmphgtui(i64 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = vcmph.gtu(r1:0, #0)
+
+; Vector compare bytes for any match
+declare i32 @llvm.hexagon.A4.vcmpbeq.any(i64, i64)
+define i32 @A4_vcmpbeq_any(i64 %a, i64 %b) {
+ %z = call i32 @llvm.hexagon.A4.vcmpbeq.any(i64 %a, i64 %b)
+ ret i32 %z
+}
+; CHECK: p0 = any8(vcmpb.eq(r1:0, r3:2))
+
+; Vector compare bytes
+declare i32 @llvm.hexagon.A2.vcmpbeq(i64, i64)
+define i32 @A2_vcmpbeq(i64 %a, i64 %b) {
+ %z = call i32 @llvm.hexagon.A2.vcmpbeq(i64 %a, i64 %b)
+ ret i32 %z
+}
+; CHECK: p0 = vcmpb.eq(r1:0, r3:2)
+
+declare i32 @llvm.hexagon.A2.vcmpbgtu(i64, i64)
+define i32 @A2_vcmpbgtu(i64 %a, i64 %b) {
+ %z = call i32 @llvm.hexagon.A2.vcmpbgtu(i64 %a, i64 %b)
+ ret i32 %z
+}
+; CHECK: p0 = vcmpb.gtu(r1:0, r3:2)
+
+declare i32 @llvm.hexagon.A4.vcmpbgt(i64, i64)
+define i32 @A4_vcmpbgt(i64 %a, i64 %b) {
+ %z = call i32 @llvm.hexagon.A4.vcmpbgt(i64 %a, i64 %b)
+ ret i32 %z
+}
+; CHECK: p0 = vcmpb.gt(r1:0, r3:2)
+
+declare i32 @llvm.hexagon.A4.vcmpbeqi(i64, i32)
+define i32 @A4_vcmpbeqi(i64 %a) {
+ %z = call i32 @llvm.hexagon.A4.vcmpbeqi(i64 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = vcmpb.eq(r1:0, #0)
+
+declare i32 @llvm.hexagon.A4.vcmpbgti(i64, i32)
+define i32 @A4_vcmpbgti(i64 %a) {
+ %z = call i32 @llvm.hexagon.A4.vcmpbgti(i64 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = vcmpb.gt(r1:0, #0)
+
+declare i32 @llvm.hexagon.A4.vcmpbgtui(i64, i32)
+define i32 @A4_vcmpbgtui(i64 %a) {
+ %z = call i32 @llvm.hexagon.A4.vcmpbgtui(i64 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = vcmpb.gtu(r1:0, #0)
+
+; Vector compare words
+declare i32 @llvm.hexagon.A2.vcmpweq(i64, i64)
+define i32 @A2_vcmpweq(i64 %a, i64 %b) {
+ %z = call i32 @llvm.hexagon.A2.vcmpweq(i64 %a, i64 %b)
+ ret i32 %z
+}
+; CHECK: p0 = vcmpw.eq(r1:0, r3:2)
+
+declare i32 @llvm.hexagon.A2.vcmpwgt(i64, i64)
+define i32 @A2_vcmpwgt(i64 %a, i64 %b) {
+ %z = call i32 @llvm.hexagon.A2.vcmpwgt(i64 %a, i64 %b)
+ ret i32 %z
+}
+; CHECK: p0 = vcmpw.gt(r1:0, r3:2)
+
+declare i32 @llvm.hexagon.A2.vcmpwgtu(i64, i64)
+define i32 @A2_vcmpwgtu(i64 %a, i64 %b) {
+ %z = call i32 @llvm.hexagon.A2.vcmpwgtu(i64 %a, i64 %b)
+ ret i32 %z
+}
+; CHECK: p0 = vcmpw.gtu(r1:0, r3:2)
+
+declare i32 @llvm.hexagon.A4.vcmpweqi(i64, i32)
+define i32 @A4_vcmpweqi(i64 %a) {
+ %z = call i32 @llvm.hexagon.A4.vcmpweqi(i64 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = vcmpw.eq(r1:0, #0)
+
+declare i32 @llvm.hexagon.A4.vcmpwgti(i64, i32)
+define i32 @A4_vcmpwgti(i64 %a) {
+ %z = call i32 @llvm.hexagon.A4.vcmpwgti(i64 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = vcmpw.gt(r1:0, #0)
+
+declare i32 @llvm.hexagon.A4.vcmpwgtui(i64, i32)
+define i32 @A4_vcmpwgtui(i64 %a) {
+ %z = call i32 @llvm.hexagon.A4.vcmpwgtui(i64 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: p0 = vcmpw.gtu(r1:0, #0)
+
+; Viterbi pack even and odd predicate bitsclr
+declare i32 @llvm.hexagon.C2.vitpack(i32, i32)
+define i32 @C2_vitpack(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.C2.vitpack(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = vitpack(p1, p0)
+
+; Vector mux
+declare i64 @llvm.hexagon.C2.vmux(i32, i64, i64)
+define i64 @C2_vmux(i32 %a, i64 %b, i64 %c) {
+ %z = call i64 @llvm.hexagon.C2.vmux(i32 %a, i64 %b, i64 %c)
+ ret i64 %z
+}
+; CHECK: = vmux(p0, r3:2, r5:4)
diff --git a/test/CodeGen/Hexagon/intrinsics/xtype_shift.ll b/test/CodeGen/Hexagon/intrinsics/xtype_shift.ll
new file mode 100644
index 000000000000..c84999bf94fd
--- /dev/null
+++ b/test/CodeGen/Hexagon/intrinsics/xtype_shift.ll
@@ -0,0 +1,723 @@
+; RUN: llc -march=hexagon -O0 < %s | FileCheck %s
+; Hexagon Programmer's Reference Manual 11.10.8 XTYPE/SHIFT
+
+; Shift by immediate
+declare i64 @llvm.hexagon.S2.asr.i.p(i64, i32)
+define i64 @S2_asr_i_p(i64 %a) {
+ %z = call i64 @llvm.hexagon.S2.asr.i.p(i64 %a, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 = asr(r1:0, #0)
+
+declare i64 @llvm.hexagon.S2.lsr.i.p(i64, i32)
+define i64 @S2_lsr_i_p(i64 %a) {
+ %z = call i64 @llvm.hexagon.S2.lsr.i.p(i64 %a, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 = lsr(r1:0, #0)
+
+declare i64 @llvm.hexagon.S2.asl.i.p(i64, i32)
+define i64 @S2_asl_i_p(i64 %a) {
+ %z = call i64 @llvm.hexagon.S2.asl.i.p(i64 %a, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 = asl(r1:0, #0)
+
+declare i32 @llvm.hexagon.S2.asr.i.r(i32, i32)
+define i32 @S2_asr_i_r(i32 %a) {
+ %z = call i32 @llvm.hexagon.S2.asr.i.r(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = asr(r0, #0)
+
+declare i32 @llvm.hexagon.S2.lsr.i.r(i32, i32)
+define i32 @S2_lsr_i_r(i32 %a) {
+ %z = call i32 @llvm.hexagon.S2.lsr.i.r(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = lsr(r0, #0)
+
+declare i32 @llvm.hexagon.S2.asl.i.r(i32, i32)
+define i32 @S2_asl_i_r(i32 %a) {
+ %z = call i32 @llvm.hexagon.S2.asl.i.r(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = asl(r0, #0)
+
+; Shift by immediate and accumulate
+declare i64 @llvm.hexagon.S2.asr.i.p.nac(i64, i64, i32)
+define i64 @S2_asr_i_p_nac(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.asr.i.p.nac(i64 %a, i64 %b, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 -= asr(r3:2, #0)
+
+declare i64 @llvm.hexagon.S2.lsr.i.p.nac(i64, i64, i32)
+define i64 @S2_lsr_i_p_nac(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.lsr.i.p.nac(i64 %a, i64 %b, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 -= lsr(r3:2, #0)
+
+declare i64 @llvm.hexagon.S2.asl.i.p.nac(i64, i64, i32)
+define i64 @S2_asl_i_p_nac(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.asl.i.p.nac(i64 %a, i64 %b, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 -= asl(r3:2, #0)
+
+declare i64 @llvm.hexagon.S2.asr.i.p.acc(i64, i64, i32)
+define i64 @S2_asr_i_p_acc(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.asr.i.p.acc(i64 %a, i64 %b, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 += asr(r3:2, #0)
+
+declare i64 @llvm.hexagon.S2.lsr.i.p.acc(i64, i64, i32)
+define i64 @S2_lsr_i_p_acc(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.lsr.i.p.acc(i64 %a, i64 %b, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 += lsr(r3:2, #0)
+
+declare i64 @llvm.hexagon.S2.asl.i.p.acc(i64, i64, i32)
+define i64 @S2_asl_i_p_acc(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.asl.i.p.acc(i64 %a, i64 %b, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 += asl(r3:2, #0)
+
+declare i32 @llvm.hexagon.S2.asr.i.r.nac(i32, i32, i32)
+define i32 @S2_asr_i_r_nac(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.asr.i.r.nac(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 -= asr(r1, #0)
+
+declare i32 @llvm.hexagon.S2.lsr.i.r.nac(i32, i32, i32)
+define i32 @S2_lsr_i_r_nac(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.lsr.i.r.nac(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 -= lsr(r1, #0)
+
+declare i32 @llvm.hexagon.S2.asl.i.r.nac(i32, i32, i32)
+define i32 @S2_asl_i_r_nac(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.asl.i.r.nac(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 -= asl(r1, #0)
+
+declare i32 @llvm.hexagon.S2.asr.i.r.acc(i32, i32, i32)
+define i32 @S2_asr_i_r_acc(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.asr.i.r.acc(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 += asr(r1, #0)
+
+declare i32 @llvm.hexagon.S2.lsr.i.r.acc(i32, i32, i32)
+define i32 @S2_lsr_i_r_acc(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.lsr.i.r.acc(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 += lsr(r1, #0)
+
+declare i32 @llvm.hexagon.S2.asl.i.r.acc(i32, i32, i32)
+define i32 @S2_asl_i_r_acc(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.asl.i.r.acc(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 += asl(r1, #0)
+
+; Shift by immediate and add
+declare i32 @llvm.hexagon.S4.addi.asl.ri(i32, i32, i32)
+define i32 @S4_addi_asl_ri(i32 %a) {
+ %z = call i32 @llvm.hexagon.S4.addi.asl.ri(i32 0, i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = add(#0, asl(r0, #0))
+
+declare i32 @llvm.hexagon.S4.subi.asl.ri(i32, i32, i32)
+define i32 @S4_subi_asl_ri(i32 %a) {
+ %z = call i32 @llvm.hexagon.S4.subi.asl.ri(i32 0, i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = sub(#0, asl(r0, #0))
+
+declare i32 @llvm.hexagon.S4.addi.lsr.ri(i32, i32, i32)
+define i32 @S4_addi_lsr_ri(i32 %a) {
+ %z = call i32 @llvm.hexagon.S4.addi.lsr.ri(i32 0, i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = add(#0, lsr(r0, #0))
+
+declare i32 @llvm.hexagon.S4.subi.lsr.ri(i32, i32, i32)
+define i32 @S4_subi_lsr_ri(i32 %a) {
+ %z = call i32 @llvm.hexagon.S4.subi.lsr.ri(i32 0, i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = sub(#0, lsr(r0, #0))
+
+declare i32 @llvm.hexagon.S2.addasl.rrri(i32, i32, i32)
+define i32 @S2_addasl_rrri(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.addasl.rrri(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = addasl(r0, r1, #0)
+
+; Shift by immediate and logical
+declare i64 @llvm.hexagon.S2.asr.i.p.and(i64, i64, i32)
+define i64 @S2_asr_i_p_and(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.asr.i.p.and(i64 %a, i64 %b, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 &= asr(r3:2, #0)
+
+declare i64 @llvm.hexagon.S2.lsr.i.p.and(i64, i64, i32)
+define i64 @S2_lsr_i_p_and(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.lsr.i.p.and(i64 %a, i64 %b, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 &= lsr(r3:2, #0)
+
+declare i64 @llvm.hexagon.S2.asl.i.p.and(i64, i64, i32)
+define i64 @S2_asl_i_p_and(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.asl.i.p.and(i64 %a, i64 %b, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 &= asl(r3:2, #0)
+
+declare i64 @llvm.hexagon.S2.asr.i.p.or(i64, i64, i32)
+define i64 @S2_asr_i_p_or(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.asr.i.p.or(i64 %a, i64 %b, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 |= asr(r3:2, #0)
+
+declare i64 @llvm.hexagon.S2.lsr.i.p.or(i64, i64, i32)
+define i64 @S2_lsr_i_p_or(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.lsr.i.p.or(i64 %a, i64 %b, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 |= lsr(r3:2, #0)
+
+declare i64 @llvm.hexagon.S2.asl.i.p.or(i64, i64, i32)
+define i64 @S2_asl_i_p_or(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.asl.i.p.or(i64 %a, i64 %b, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 |= asl(r3:2, #0)
+
+declare i64 @llvm.hexagon.S2.lsr.i.p.xacc(i64, i64, i32)
+define i64 @S2_lsr_i_p_xacc(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.lsr.i.p.xacc(i64 %a, i64 %b, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 ^= lsr(r3:2, #0)
+
+declare i64 @llvm.hexagon.S2.asl.i.p.xacc(i64, i64, i32)
+define i64 @S2_asl_i_p_xacc(i64 %a, i64 %b) {
+ %z = call i64 @llvm.hexagon.S2.asl.i.p.xacc(i64 %a, i64 %b, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 ^= asl(r3:2, #0)
+
+declare i32 @llvm.hexagon.S2.asr.i.r.and(i32, i32, i32)
+define i32 @S2_asr_i_r_and(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.asr.i.r.and(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 &= asr(r1, #0)
+
+declare i32 @llvm.hexagon.S2.lsr.i.r.and(i32, i32, i32)
+define i32 @S2_lsr_i_r_and(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.lsr.i.r.and(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 &= lsr(r1, #0)
+
+declare i32 @llvm.hexagon.S2.asl.i.r.and(i32, i32, i32)
+define i32 @S2_asl_i_r_and(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.asl.i.r.and(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 &= asl(r1, #0)
+
+declare i32 @llvm.hexagon.S2.asr.i.r.or(i32, i32, i32)
+define i32 @S2_asr_i_r_or(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.asr.i.r.or(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 |= asr(r1, #0)
+
+declare i32 @llvm.hexagon.S2.lsr.i.r.or(i32, i32, i32)
+define i32 @S2_lsr_i_r_or(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.lsr.i.r.or(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 |= lsr(r1, #0)
+
+declare i32 @llvm.hexagon.S2.asl.i.r.or(i32, i32, i32)
+define i32 @S2_asl_i_r_or(i32%a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.asl.i.r.or(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 |= asl(r1, #0)
+
+declare i32 @llvm.hexagon.S2.lsr.i.r.xacc(i32, i32, i32)
+define i32 @S2_lsr_i_r_xacc(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.lsr.i.r.xacc(i32%a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 ^= lsr(r1, #0)
+
+declare i32 @llvm.hexagon.S2.asl.i.r.xacc(i32, i32, i32)
+define i32 @S2_asl_i_r_xacc(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.asl.i.r.xacc(i32 %a, i32 %b, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 ^= asl(r1, #0)
+
+declare i32 @llvm.hexagon.S4.andi.asl.ri(i32, i32, i32)
+define i32 @S4_andi_asl_ri(i32 %a) {
+ %z = call i32 @llvm.hexagon.S4.andi.asl.ri(i32 0, i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = and(#0, asl(r0, #0))
+
+declare i32 @llvm.hexagon.S4.ori.asl.ri(i32, i32, i32)
+define i32 @S4_ori_asl_ri(i32 %a) {
+ %z = call i32 @llvm.hexagon.S4.ori.asl.ri(i32 0, i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = or(#0, asl(r0, #0))
+
+declare i32 @llvm.hexagon.S4.andi.lsr.ri(i32, i32, i32)
+define i32 @S4_andi_lsr_ri(i32 %a) {
+ %z = call i32 @llvm.hexagon.S4.andi.lsr.ri(i32 0, i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = and(#0, lsr(r0, #0))
+
+declare i32 @llvm.hexagon.S4.ori.lsr.ri(i32, i32, i32)
+define i32 @S4_ori_lsr_ri(i32 %a) {
+ %z = call i32 @llvm.hexagon.S4.ori.lsr.ri(i32 0, i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = or(#0, lsr(r0, #0))
+
+; Shift right by immediate with rounding
+declare i64 @llvm.hexagon.S2.asr.i.p.rnd(i64, i32)
+define i64 @S2_asr_i_p_rnd(i64 %a) {
+ %z = call i64 @llvm.hexagon.S2.asr.i.p.rnd(i64 %a, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 = asr(r1:0, #0):rnd
+
+declare i32 @llvm.hexagon.S2.asr.i.r.rnd(i32, i32)
+define i32 @S2_asr_i_r_rnd(i32 %a) {
+ %z = call i32 @llvm.hexagon.S2.asr.i.r.rnd(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = asr(r0, #0):rnd
+
+; Shift left by immediate with saturation
+declare i32 @llvm.hexagon.S2.asl.i.r.sat(i32, i32)
+define i32 @S2_asl_i_r_sat(i32 %a) {
+ %z = call i32 @llvm.hexagon.S2.asl.i.r.sat(i32 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = asl(r0, #0):sat
+
+; Shift by register
+declare i64 @llvm.hexagon.S2.asr.r.p(i64, i32)
+define i64 @S2_asr_r_p(i64 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.S2.asr.r.p(i64 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = asr(r1:0, r2)
+
+declare i64 @llvm.hexagon.S2.lsr.r.p(i64, i32)
+define i64 @S2_lsr_r_p(i64 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.S2.lsr.r.p(i64 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = lsr(r1:0, r2)
+
+declare i64 @llvm.hexagon.S2.asl.r.p(i64, i32)
+define i64 @S2_asl_r_p(i64 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.S2.asl.r.p(i64 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = asl(r1:0, r2)
+
+declare i64 @llvm.hexagon.S2.lsl.r.p(i64, i32)
+define i64 @S2_lsl_r_p(i64 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.S2.lsl.r.p(i64 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = lsl(r1:0, r2)
+
+declare i32 @llvm.hexagon.S2.asr.r.r(i32, i32)
+define i32 @S2_asr_r_r(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.asr.r.r(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = asr(r0, r1)
+
+declare i32 @llvm.hexagon.S2.lsr.r.r(i32, i32)
+define i32 @S2_lsr_r_r(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.lsr.r.r(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = lsr(r0, r1)
+
+declare i32 @llvm.hexagon.S2.asl.r.r(i32, i32)
+define i32 @S2_asl_r_r(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.asl.r.r(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = asl(r0, r1)
+
+declare i32 @llvm.hexagon.S2.lsl.r.r(i32, i32)
+define i32 @S2_lsl_r_r(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.lsl.r.r(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = lsl(r0, r1)
+
+declare i32 @llvm.hexagon.S4.lsli(i32, i32)
+define i32 @S4_lsli(i32 %a) {
+ %z = call i32 @llvm.hexagon.S4.lsli(i32 0, i32 %a)
+ ret i32 %z
+}
+; CHECK: r0 = lsl(#0, r0)
+
+; Shift by register and accumulate
+declare i64 @llvm.hexagon.S2.asr.r.p.nac(i64, i64, i32)
+define i64 @S2_asr_r_p_nac(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.S2.asr.r.p.nac(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= asr(r3:2, r4)
+
+declare i64 @llvm.hexagon.S2.lsr.r.p.nac(i64, i64, i32)
+define i64 @S2_lsr_r_p_nac(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.S2.lsr.r.p.nac(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= lsr(r3:2, r4)
+
+declare i64 @llvm.hexagon.S2.asl.r.p.nac(i64, i64, i32)
+define i64 @S2_asl_r_p_nac(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.S2.asl.r.p.nac(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= asl(r3:2, r4)
+
+declare i64 @llvm.hexagon.S2.lsl.r.p.nac(i64, i64, i32)
+define i64 @S2_lsl_r_p_nac(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.S2.lsl.r.p.nac(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 -= lsl(r3:2, r4)
+
+declare i64 @llvm.hexagon.S2.asr.r.p.acc(i64, i64, i32)
+define i64 @S2_asr_r_p_acc(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.S2.asr.r.p.acc(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += asr(r3:2, r4)
+
+declare i64 @llvm.hexagon.S2.lsr.r.p.acc(i64, i64, i32)
+define i64 @S2_lsr_r_p_acc(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.S2.lsr.r.p.acc(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += lsr(r3:2, r4)
+
+declare i64 @llvm.hexagon.S2.asl.r.p.acc(i64, i64, i32)
+define i64 @S2_asl_r_p_acc(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.S2.asl.r.p.acc(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += asl(r3:2, r4)
+
+declare i64 @llvm.hexagon.S2.lsl.r.p.acc(i64, i64, i32)
+define i64 @S2_lsl_r_p_acc(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.S2.lsl.r.p.acc(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 += lsl(r3:2, r4)
+
+declare i32 @llvm.hexagon.S2.asr.r.r.nac(i32, i32, i32)
+define i32 @S2_asr_r_r_nac(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.S2.asr.r.r.nac(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= asr(r1, r2)
+
+declare i32 @llvm.hexagon.S2.lsr.r.r.nac(i32, i32, i32)
+define i32 @S2_lsr_r_r_nac(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.S2.lsr.r.r.nac(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= lsr(r1, r2)
+
+declare i32 @llvm.hexagon.S2.asl.r.r.nac(i32, i32, i32)
+define i32 @S2_asl_r_r_nac(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.S2.asl.r.r.nac(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= asl(r1, r2)
+
+declare i32 @llvm.hexagon.S2.lsl.r.r.nac(i32, i32, i32)
+define i32 @S2_lsl_r_r_nac(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.S2.lsl.r.r.nac(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 -= lsl(r1, r2)
+
+declare i32 @llvm.hexagon.S2.asr.r.r.acc(i32, i32, i32)
+define i32 @S2_asr_r_r_acc(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.S2.asr.r.r.acc(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += asr(r1, r2)
+
+declare i32 @llvm.hexagon.S2.lsr.r.r.acc(i32, i32, i32)
+define i32 @S2_lsr_r_r_acc(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.S2.lsr.r.r.acc(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += lsr(r1, r2)
+
+declare i32 @llvm.hexagon.S2.asl.r.r.acc(i32, i32, i32)
+define i32 @S2_asl_r_r_acc(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.S2.asl.r.r.acc(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += asl(r1, r2)
+
+declare i32 @llvm.hexagon.S2.lsl.r.r.acc(i32, i32, i32)
+define i32 @S2_lsl_r_r_acc(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.S2.lsl.r.r.acc(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 += lsl(r1, r2)
+
+; Shift by register and logical
+declare i64 @llvm.hexagon.S2.asr.r.p.or(i64, i64, i32)
+define i64 @S2_asr_r_p_or(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.S2.asr.r.p.or(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 |= asr(r3:2, r4)
+
+declare i64 @llvm.hexagon.S2.lsr.r.p.or(i64, i64, i32)
+define i64 @S2_lsr_r_p_or(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.S2.lsr.r.p.or(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 |= lsr(r3:2, r4)
+
+declare i64 @llvm.hexagon.S2.asl.r.p.or(i64, i64, i32)
+define i64 @S2_asl_r_p_or(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.S2.asl.r.p.or(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 |= asl(r3:2, r4)
+
+declare i64 @llvm.hexagon.S2.lsl.r.p.or(i64, i64, i32)
+define i64 @S2_lsl_r_p_or(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.S2.lsl.r.p.or(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 |= lsl(r3:2, r4)
+
+declare i64 @llvm.hexagon.S2.asr.r.p.and(i64, i64, i32)
+define i64 @S2_asr_r_p_and(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.S2.asr.r.p.and(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 &= asr(r3:2, r4)
+
+declare i64 @llvm.hexagon.S2.lsr.r.p.and(i64, i64, i32)
+define i64 @S2_lsr_r_p_and(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.S2.lsr.r.p.and(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 &= lsr(r3:2, r4)
+
+declare i64 @llvm.hexagon.S2.asl.r.p.and(i64, i64, i32)
+define i64 @S2_asl_r_p_and(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.S2.asl.r.p.and(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 &= asl(r3:2, r4)
+
+declare i64 @llvm.hexagon.S2.lsl.r.p.and(i64, i64, i32)
+define i64 @S2_lsl_r_p_and(i64 %a, i64 %b, i32 %c) {
+ %z = call i64 @llvm.hexagon.S2.lsl.r.p.and(i64 %a, i64 %b, i32 %c)
+ ret i64 %z
+}
+; CHECK: r1:0 &= lsl(r3:2, r4)
+
+declare i32 @llvm.hexagon.S2.asr.r.r.or(i32, i32, i32)
+define i32 @S2_asr_r_r_or(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.S2.asr.r.r.or(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 |= asr(r1, r2)
+
+declare i32 @llvm.hexagon.S2.lsr.r.r.or(i32, i32, i32)
+define i32 @S2_lsr_r_r_or(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.S2.lsr.r.r.or(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 |= lsr(r1, r2)
+
+declare i32 @llvm.hexagon.S2.asl.r.r.or(i32, i32, i32)
+define i32 @S2_asl_r_r_or(i32%a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.S2.asl.r.r.or(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 |= asl(r1, r2)
+
+declare i32 @llvm.hexagon.S2.lsl.r.r.or(i32, i32, i32)
+define i32 @S2_lsl_r_r_or(i32%a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.S2.lsl.r.r.or(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 |= lsl(r1, r2)
+
+declare i32 @llvm.hexagon.S2.asr.r.r.and(i32, i32, i32)
+define i32 @S2_asr_r_r_and(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.S2.asr.r.r.and(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 &= asr(r1, r2)
+
+declare i32 @llvm.hexagon.S2.lsr.r.r.and(i32, i32, i32)
+define i32 @S2_lsr_r_r_and(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.S2.lsr.r.r.and(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 &= lsr(r1, r2)
+
+declare i32 @llvm.hexagon.S2.asl.r.r.and(i32, i32, i32)
+define i32 @S2_asl_r_r_and(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.S2.asl.r.r.and(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 &= asl(r1, r2)
+
+declare i32 @llvm.hexagon.S2.lsl.r.r.and(i32, i32, i32)
+define i32 @S2_lsl_r_r_and(i32 %a, i32 %b, i32 %c) {
+ %z = call i32 @llvm.hexagon.S2.lsl.r.r.and(i32 %a, i32 %b, i32 %c)
+ ret i32 %z
+}
+; CHECK: r0 &= lsl(r1, r2)
+
+; Shift by register with saturation
+declare i32 @llvm.hexagon.S2.asr.r.r.sat(i32, i32)
+define i32 @S2_asr_r_r_sat(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.asr.r.r.sat(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = asr(r0, r1):sat
+
+declare i32 @llvm.hexagon.S2.asl.r.r.sat(i32, i32)
+define i32 @S2_asl_r_r_sat(i32 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.asl.r.r.sat(i32 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = asl(r0, r1):sat
+
+; Vector shift halfwords by immediate
+declare i64 @llvm.hexagon.S2.asr.i.vh(i64, i32)
+define i64 @S2_asr_i_vh(i64 %a) {
+ %z = call i64 @llvm.hexagon.S2.asr.i.vh(i64 %a, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 = vasrh(r1:0, #0)
+
+declare i64 @llvm.hexagon.S2.lsr.i.vh(i64, i32)
+define i64 @S2_lsr_i_vh(i64 %a) {
+ %z = call i64 @llvm.hexagon.S2.lsr.i.vh(i64 %a, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 = vlsrh(r1:0, #0)
+
+declare i64 @llvm.hexagon.S2.asl.i.vh(i64, i32)
+define i64 @S2_asl_i_vh(i64 %a) {
+ %z = call i64 @llvm.hexagon.S2.asl.i.vh(i64 %a, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 = vaslh(r1:0, #0)
+
+; Vector shift halfwords by register
+declare i64 @llvm.hexagon.S2.asr.r.vh(i64, i32)
+define i64 @S2_asr_r_vh(i64 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.S2.asr.r.vh(i64 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vasrh(r1:0, r2)
+
+declare i64 @llvm.hexagon.S2.lsr.r.vh(i64, i32)
+define i64 @S2_lsr_r_vh(i64 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.S2.lsr.r.vh(i64 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vlsrh(r1:0, r2)
+
+declare i64 @llvm.hexagon.S2.asl.r.vh(i64, i32)
+define i64 @S2_asl_r_vh(i64 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.S2.asl.r.vh(i64 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vaslh(r1:0, r2)
+
+declare i64 @llvm.hexagon.S2.lsl.r.vh(i64, i32)
+define i64 @S2_lsl_r_vh(i64 %a, i32 %b) {
+ %z = call i64 @llvm.hexagon.S2.lsl.r.vh(i64 %a, i32 %b)
+ ret i64 %z
+}
+; CHECK: r1:0 = vlslh(r1:0, r2)
+
+; Vector shift words by immediate
+declare i64 @llvm.hexagon.S2.asr.i.vw(i64, i32)
+define i64 @S2_asr_i_vw(i64 %a) {
+ %z = call i64 @llvm.hexagon.S2.asr.i.vw(i64 %a, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 = vasrw(r1:0, #0)
+
+declare i64 @llvm.hexagon.S2.lsr.i.vw(i64, i32)
+define i64 @S2_lsr_i_vw(i64 %a) {
+ %z = call i64 @llvm.hexagon.S2.lsr.i.vw(i64 %a, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 = vlsrw(r1:0, #0)
+
+declare i64 @llvm.hexagon.S2.asl.i.vw(i64, i32)
+define i64 @S2_asl_i_vw(i64 %a) {
+ %z = call i64 @llvm.hexagon.S2.asl.i.vw(i64 %a, i32 0)
+ ret i64 %z
+}
+; CHECK: r1:0 = vaslw(r1:0, #0)
+
+; Vector shift words by with truncate and pack
+declare i32 @llvm.hexagon.S2.asr.i.svw.trun(i64, i32)
+define i32 @S2_asr_i_svw_trun(i64 %a) {
+ %z = call i32 @llvm.hexagon.S2.asr.i.svw.trun(i64 %a, i32 0)
+ ret i32 %z
+}
+; CHECK: r0 = vasrw(r1:0, #0)
+
+declare i32 @llvm.hexagon.S2.asr.r.svw.trun(i64, i32)
+define i32 @S2_asr_r_svw_trun(i64 %a, i32 %b) {
+ %z = call i32 @llvm.hexagon.S2.asr.r.svw.trun(i64 %a, i32 %b)
+ ret i32 %z
+}
+; CHECK: r0 = vasrw(r1:0, r2)
diff --git a/test/CodeGen/Hexagon/macint.ll b/test/CodeGen/Hexagon/macint.ll
index b3b9d0ee7a01..514ba5b91308 100644
--- a/test/CodeGen/Hexagon/macint.ll
+++ b/test/CodeGen/Hexagon/macint.ll
@@ -1,11 +1,11 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; Check that we generate integer multiply accumulate.
-; CHECK: r{{[0-9]+}} += mpyi(r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+}} {{\+|\-}}= mpyi(r{{[0-9]+}},
define i32 @main(i32* %a, i32* %b) nounwind {
entry:
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%div = udiv i32 %0, 10000
%rem = urem i32 %div, 10
store i32 %rem, i32* %b, align 4
diff --git a/test/CodeGen/Hexagon/mem-fi-add.ll b/test/CodeGen/Hexagon/mem-fi-add.ll
new file mode 100644
index 000000000000..a46029fdb5ec
--- /dev/null
+++ b/test/CodeGen/Hexagon/mem-fi-add.ll
@@ -0,0 +1,29 @@
+; RUN: llc -O2 < %s | FileCheck %s
+; Look for four stores directly via r29.
+; CHECK: memd(r29
+; CHECK: memd(r29
+; CHECK: memd(r29
+; CHECK: memd(r29
+
+target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32"
+target triple = "hexagon"
+
+; Function Attrs: nounwind
+define void @foo() #0 {
+entry:
+ %t = alloca [4 x [2 x i32]], align 8
+ %0 = bitcast [4 x [2 x i32]]* %t to i8*
+ call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 32, i32 8, i1 false)
+ %arraydecay = getelementptr inbounds [4 x [2 x i32]], [4 x [2 x i32]]* %t, i32 0, i32 0
+ call void @bar([2 x i32]* %arraydecay) #1
+ ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #1
+
+declare void @bar([2 x i32]*) #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Hexagon/memops.ll b/test/CodeGen/Hexagon/memops.ll
index fca1a73811a9..e4a8bf7c95e9 100644
--- a/test/CodeGen/Hexagon/memops.ll
+++ b/test/CodeGen/Hexagon/memops.ll
@@ -4,7 +4,7 @@
define void @memop_unsigned_char_add5(i8* nocapture %p) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
- %0 = load i8* %p, align 1
+ %0 = load i8, i8* %p, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 5
%conv1 = trunc i32 %add to i8
@@ -16,7 +16,7 @@ define void @memop_unsigned_char_add(i8* nocapture %p, i8 zeroext %x) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
%conv = zext i8 %x to i32
- %0 = load i8* %p, align 1
+ %0 = load i8, i8* %p, align 1
%conv1 = zext i8 %0 to i32
%add = add nsw i32 %conv1, %conv
%conv2 = trunc i32 %add to i8
@@ -28,7 +28,7 @@ define void @memop_unsigned_char_sub(i8* nocapture %p, i8 zeroext %x) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
%conv = zext i8 %x to i32
- %0 = load i8* %p, align 1
+ %0 = load i8, i8* %p, align 1
%conv1 = zext i8 %0 to i32
%sub = sub nsw i32 %conv1, %conv
%conv2 = trunc i32 %sub to i8
@@ -39,7 +39,7 @@ entry:
define void @memop_unsigned_char_or(i8* nocapture %p, i8 zeroext %x) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
- %0 = load i8* %p, align 1
+ %0 = load i8, i8* %p, align 1
%or3 = or i8 %0, %x
store i8 %or3, i8* %p, align 1
ret void
@@ -48,7 +48,7 @@ entry:
define void @memop_unsigned_char_and(i8* nocapture %p, i8 zeroext %x) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
- %0 = load i8* %p, align 1
+ %0 = load i8, i8* %p, align 1
%and3 = and i8 %0, %x
store i8 %and3, i8* %p, align 1
ret void
@@ -57,7 +57,7 @@ entry:
define void @memop_unsigned_char_clrbit(i8* nocapture %p) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
- %0 = load i8* %p, align 1
+ %0 = load i8, i8* %p, align 1
%conv = zext i8 %0 to i32
%and = and i32 %conv, 223
%conv1 = trunc i32 %and to i8
@@ -68,7 +68,7 @@ entry:
define void @memop_unsigned_char_setbit(i8* nocapture %p) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
- %0 = load i8* %p, align 1
+ %0 = load i8, i8* %p, align 1
%conv = zext i8 %0 to i32
%or = or i32 %conv, 128
%conv1 = trunc i32 %or to i8
@@ -79,8 +79,8 @@ entry:
define void @memop_unsigned_char_add5_index(i8* nocapture %p, i32 %i) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
- %add.ptr = getelementptr inbounds i8* %p, i32 %i
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
+ %0 = load i8, i8* %add.ptr, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 5
%conv1 = trunc i32 %add to i8
@@ -92,8 +92,8 @@ define void @memop_unsigned_char_add_index(i8* nocapture %p, i32 %i, i8 zeroext
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
%conv = zext i8 %x to i32
- %add.ptr = getelementptr inbounds i8* %p, i32 %i
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
+ %0 = load i8, i8* %add.ptr, align 1
%conv1 = zext i8 %0 to i32
%add = add nsw i32 %conv1, %conv
%conv2 = trunc i32 %add to i8
@@ -105,8 +105,8 @@ define void @memop_unsigned_char_sub_index(i8* nocapture %p, i32 %i, i8 zeroext
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
%conv = zext i8 %x to i32
- %add.ptr = getelementptr inbounds i8* %p, i32 %i
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
+ %0 = load i8, i8* %add.ptr, align 1
%conv1 = zext i8 %0 to i32
%sub = sub nsw i32 %conv1, %conv
%conv2 = trunc i32 %sub to i8
@@ -117,8 +117,8 @@ entry:
define void @memop_unsigned_char_or_index(i8* nocapture %p, i32 %i, i8 zeroext %x) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i8* %p, i32 %i
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
+ %0 = load i8, i8* %add.ptr, align 1
%or3 = or i8 %0, %x
store i8 %or3, i8* %add.ptr, align 1
ret void
@@ -127,8 +127,8 @@ entry:
define void @memop_unsigned_char_and_index(i8* nocapture %p, i32 %i, i8 zeroext %x) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i8* %p, i32 %i
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
+ %0 = load i8, i8* %add.ptr, align 1
%and3 = and i8 %0, %x
store i8 %and3, i8* %add.ptr, align 1
ret void
@@ -137,8 +137,8 @@ entry:
define void @memop_unsigned_char_clrbit_index(i8* nocapture %p, i32 %i) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
- %add.ptr = getelementptr inbounds i8* %p, i32 %i
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
+ %0 = load i8, i8* %add.ptr, align 1
%conv = zext i8 %0 to i32
%and = and i32 %conv, 223
%conv1 = trunc i32 %and to i8
@@ -149,8 +149,8 @@ entry:
define void @memop_unsigned_char_setbit_index(i8* nocapture %p, i32 %i) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
- %add.ptr = getelementptr inbounds i8* %p, i32 %i
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
+ %0 = load i8, i8* %add.ptr, align 1
%conv = zext i8 %0 to i32
%or = or i32 %conv, 128
%conv1 = trunc i32 %or to i8
@@ -161,8 +161,8 @@ entry:
define void @memop_unsigned_char_add5_index5(i8* nocapture %p) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}#5
- %add.ptr = getelementptr inbounds i8* %p, i32 5
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
+ %0 = load i8, i8* %add.ptr, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 5
%conv1 = trunc i32 %add to i8
@@ -174,8 +174,8 @@ define void @memop_unsigned_char_add_index5(i8* nocapture %p, i8 zeroext %x) nou
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}r{{[0-9]+}}
%conv = zext i8 %x to i32
- %add.ptr = getelementptr inbounds i8* %p, i32 5
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
+ %0 = load i8, i8* %add.ptr, align 1
%conv1 = zext i8 %0 to i32
%add = add nsw i32 %conv1, %conv
%conv2 = trunc i32 %add to i8
@@ -187,8 +187,8 @@ define void @memop_unsigned_char_sub_index5(i8* nocapture %p, i8 zeroext %x) nou
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}-={{ *}}r{{[0-9]+}}
%conv = zext i8 %x to i32
- %add.ptr = getelementptr inbounds i8* %p, i32 5
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
+ %0 = load i8, i8* %add.ptr, align 1
%conv1 = zext i8 %0 to i32
%sub = sub nsw i32 %conv1, %conv
%conv2 = trunc i32 %sub to i8
@@ -199,8 +199,8 @@ entry:
define void @memop_unsigned_char_or_index5(i8* nocapture %p, i8 zeroext %x) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}|={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i8* %p, i32 5
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
+ %0 = load i8, i8* %add.ptr, align 1
%or3 = or i8 %0, %x
store i8 %or3, i8* %add.ptr, align 1
ret void
@@ -209,8 +209,8 @@ entry:
define void @memop_unsigned_char_and_index5(i8* nocapture %p, i8 zeroext %x) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}&={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i8* %p, i32 5
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
+ %0 = load i8, i8* %add.ptr, align 1
%and3 = and i8 %0, %x
store i8 %and3, i8* %add.ptr, align 1
ret void
@@ -219,8 +219,8 @@ entry:
define void @memop_unsigned_char_clrbit_index5(i8* nocapture %p) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
- %add.ptr = getelementptr inbounds i8* %p, i32 5
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
+ %0 = load i8, i8* %add.ptr, align 1
%conv = zext i8 %0 to i32
%and = and i32 %conv, 223
%conv1 = trunc i32 %and to i8
@@ -231,8 +231,8 @@ entry:
define void @memop_unsigned_char_setbit_index5(i8* nocapture %p) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
- %add.ptr = getelementptr inbounds i8* %p, i32 5
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
+ %0 = load i8, i8* %add.ptr, align 1
%conv = zext i8 %0 to i32
%or = or i32 %conv, 128
%conv1 = trunc i32 %or to i8
@@ -243,7 +243,7 @@ entry:
define void @memop_signed_char_add5(i8* nocapture %p) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
- %0 = load i8* %p, align 1
+ %0 = load i8, i8* %p, align 1
%conv2 = zext i8 %0 to i32
%add = add nsw i32 %conv2, 5
%conv1 = trunc i32 %add to i8
@@ -255,7 +255,7 @@ define void @memop_signed_char_add(i8* nocapture %p, i8 signext %x) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
%conv4 = zext i8 %x to i32
- %0 = load i8* %p, align 1
+ %0 = load i8, i8* %p, align 1
%conv13 = zext i8 %0 to i32
%add = add nsw i32 %conv13, %conv4
%conv2 = trunc i32 %add to i8
@@ -267,7 +267,7 @@ define void @memop_signed_char_sub(i8* nocapture %p, i8 signext %x) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
%conv4 = zext i8 %x to i32
- %0 = load i8* %p, align 1
+ %0 = load i8, i8* %p, align 1
%conv13 = zext i8 %0 to i32
%sub = sub nsw i32 %conv13, %conv4
%conv2 = trunc i32 %sub to i8
@@ -278,7 +278,7 @@ entry:
define void @memop_signed_char_or(i8* nocapture %p, i8 signext %x) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
- %0 = load i8* %p, align 1
+ %0 = load i8, i8* %p, align 1
%or3 = or i8 %0, %x
store i8 %or3, i8* %p, align 1
ret void
@@ -287,7 +287,7 @@ entry:
define void @memop_signed_char_and(i8* nocapture %p, i8 signext %x) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
- %0 = load i8* %p, align 1
+ %0 = load i8, i8* %p, align 1
%and3 = and i8 %0, %x
store i8 %and3, i8* %p, align 1
ret void
@@ -296,7 +296,7 @@ entry:
define void @memop_signed_char_clrbit(i8* nocapture %p) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
- %0 = load i8* %p, align 1
+ %0 = load i8, i8* %p, align 1
%conv2 = zext i8 %0 to i32
%and = and i32 %conv2, 223
%conv1 = trunc i32 %and to i8
@@ -307,7 +307,7 @@ entry:
define void @memop_signed_char_setbit(i8* nocapture %p) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
- %0 = load i8* %p, align 1
+ %0 = load i8, i8* %p, align 1
%conv2 = zext i8 %0 to i32
%or = or i32 %conv2, 128
%conv1 = trunc i32 %or to i8
@@ -318,8 +318,8 @@ entry:
define void @memop_signed_char_add5_index(i8* nocapture %p, i32 %i) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
- %add.ptr = getelementptr inbounds i8* %p, i32 %i
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
+ %0 = load i8, i8* %add.ptr, align 1
%conv2 = zext i8 %0 to i32
%add = add nsw i32 %conv2, 5
%conv1 = trunc i32 %add to i8
@@ -331,8 +331,8 @@ define void @memop_signed_char_add_index(i8* nocapture %p, i32 %i, i8 signext %x
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
%conv4 = zext i8 %x to i32
- %add.ptr = getelementptr inbounds i8* %p, i32 %i
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
+ %0 = load i8, i8* %add.ptr, align 1
%conv13 = zext i8 %0 to i32
%add = add nsw i32 %conv13, %conv4
%conv2 = trunc i32 %add to i8
@@ -344,8 +344,8 @@ define void @memop_signed_char_sub_index(i8* nocapture %p, i32 %i, i8 signext %x
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
%conv4 = zext i8 %x to i32
- %add.ptr = getelementptr inbounds i8* %p, i32 %i
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
+ %0 = load i8, i8* %add.ptr, align 1
%conv13 = zext i8 %0 to i32
%sub = sub nsw i32 %conv13, %conv4
%conv2 = trunc i32 %sub to i8
@@ -356,8 +356,8 @@ entry:
define void @memop_signed_char_or_index(i8* nocapture %p, i32 %i, i8 signext %x) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i8* %p, i32 %i
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
+ %0 = load i8, i8* %add.ptr, align 1
%or3 = or i8 %0, %x
store i8 %or3, i8* %add.ptr, align 1
ret void
@@ -366,8 +366,8 @@ entry:
define void @memop_signed_char_and_index(i8* nocapture %p, i32 %i, i8 signext %x) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i8* %p, i32 %i
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
+ %0 = load i8, i8* %add.ptr, align 1
%and3 = and i8 %0, %x
store i8 %and3, i8* %add.ptr, align 1
ret void
@@ -376,8 +376,8 @@ entry:
define void @memop_signed_char_clrbit_index(i8* nocapture %p, i32 %i) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
- %add.ptr = getelementptr inbounds i8* %p, i32 %i
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
+ %0 = load i8, i8* %add.ptr, align 1
%conv2 = zext i8 %0 to i32
%and = and i32 %conv2, 223
%conv1 = trunc i32 %and to i8
@@ -388,8 +388,8 @@ entry:
define void @memop_signed_char_setbit_index(i8* nocapture %p, i32 %i) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
- %add.ptr = getelementptr inbounds i8* %p, i32 %i
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
+ %0 = load i8, i8* %add.ptr, align 1
%conv2 = zext i8 %0 to i32
%or = or i32 %conv2, 128
%conv1 = trunc i32 %or to i8
@@ -400,8 +400,8 @@ entry:
define void @memop_signed_char_add5_index5(i8* nocapture %p) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}#5
- %add.ptr = getelementptr inbounds i8* %p, i32 5
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
+ %0 = load i8, i8* %add.ptr, align 1
%conv2 = zext i8 %0 to i32
%add = add nsw i32 %conv2, 5
%conv1 = trunc i32 %add to i8
@@ -413,8 +413,8 @@ define void @memop_signed_char_add_index5(i8* nocapture %p, i8 signext %x) nounw
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}r{{[0-9]+}}
%conv4 = zext i8 %x to i32
- %add.ptr = getelementptr inbounds i8* %p, i32 5
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
+ %0 = load i8, i8* %add.ptr, align 1
%conv13 = zext i8 %0 to i32
%add = add nsw i32 %conv13, %conv4
%conv2 = trunc i32 %add to i8
@@ -426,8 +426,8 @@ define void @memop_signed_char_sub_index5(i8* nocapture %p, i8 signext %x) nounw
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}-={{ *}}r{{[0-9]+}}
%conv4 = zext i8 %x to i32
- %add.ptr = getelementptr inbounds i8* %p, i32 5
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
+ %0 = load i8, i8* %add.ptr, align 1
%conv13 = zext i8 %0 to i32
%sub = sub nsw i32 %conv13, %conv4
%conv2 = trunc i32 %sub to i8
@@ -438,8 +438,8 @@ entry:
define void @memop_signed_char_or_index5(i8* nocapture %p, i8 signext %x) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}|={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i8* %p, i32 5
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
+ %0 = load i8, i8* %add.ptr, align 1
%or3 = or i8 %0, %x
store i8 %or3, i8* %add.ptr, align 1
ret void
@@ -448,8 +448,8 @@ entry:
define void @memop_signed_char_and_index5(i8* nocapture %p, i8 signext %x) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}&={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i8* %p, i32 5
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
+ %0 = load i8, i8* %add.ptr, align 1
%and3 = and i8 %0, %x
store i8 %and3, i8* %add.ptr, align 1
ret void
@@ -458,8 +458,8 @@ entry:
define void @memop_signed_char_clrbit_index5(i8* nocapture %p) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
- %add.ptr = getelementptr inbounds i8* %p, i32 5
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
+ %0 = load i8, i8* %add.ptr, align 1
%conv2 = zext i8 %0 to i32
%and = and i32 %conv2, 223
%conv1 = trunc i32 %and to i8
@@ -470,8 +470,8 @@ entry:
define void @memop_signed_char_setbit_index5(i8* nocapture %p) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
- %add.ptr = getelementptr inbounds i8* %p, i32 5
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
+ %0 = load i8, i8* %add.ptr, align 1
%conv2 = zext i8 %0 to i32
%or = or i32 %conv2, 128
%conv1 = trunc i32 %or to i8
@@ -482,7 +482,7 @@ entry:
define void @memop_unsigned_short_add5(i16* nocapture %p) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
- %0 = load i16* %p, align 2
+ %0 = load i16, i16* %p, align 2
%conv = zext i16 %0 to i32
%add = add nsw i32 %conv, 5
%conv1 = trunc i32 %add to i16
@@ -494,7 +494,7 @@ define void @memop_unsigned_short_add(i16* nocapture %p, i16 zeroext %x) nounwin
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
%conv = zext i16 %x to i32
- %0 = load i16* %p, align 2
+ %0 = load i16, i16* %p, align 2
%conv1 = zext i16 %0 to i32
%add = add nsw i32 %conv1, %conv
%conv2 = trunc i32 %add to i16
@@ -506,7 +506,7 @@ define void @memop_unsigned_short_sub(i16* nocapture %p, i16 zeroext %x) nounwin
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
%conv = zext i16 %x to i32
- %0 = load i16* %p, align 2
+ %0 = load i16, i16* %p, align 2
%conv1 = zext i16 %0 to i32
%sub = sub nsw i32 %conv1, %conv
%conv2 = trunc i32 %sub to i16
@@ -517,7 +517,7 @@ entry:
define void @memop_unsigned_short_or(i16* nocapture %p, i16 zeroext %x) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
- %0 = load i16* %p, align 2
+ %0 = load i16, i16* %p, align 2
%or3 = or i16 %0, %x
store i16 %or3, i16* %p, align 2
ret void
@@ -526,7 +526,7 @@ entry:
define void @memop_unsigned_short_and(i16* nocapture %p, i16 zeroext %x) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
- %0 = load i16* %p, align 2
+ %0 = load i16, i16* %p, align 2
%and3 = and i16 %0, %x
store i16 %and3, i16* %p, align 2
ret void
@@ -535,7 +535,7 @@ entry:
define void @memop_unsigned_short_clrbit(i16* nocapture %p) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
- %0 = load i16* %p, align 2
+ %0 = load i16, i16* %p, align 2
%conv = zext i16 %0 to i32
%and = and i32 %conv, 65503
%conv1 = trunc i32 %and to i16
@@ -546,7 +546,7 @@ entry:
define void @memop_unsigned_short_setbit(i16* nocapture %p) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
- %0 = load i16* %p, align 2
+ %0 = load i16, i16* %p, align 2
%conv = zext i16 %0 to i32
%or = or i32 %conv, 128
%conv1 = trunc i32 %or to i16
@@ -557,8 +557,8 @@ entry:
define void @memop_unsigned_short_add5_index(i16* nocapture %p, i32 %i) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
- %add.ptr = getelementptr inbounds i16* %p, i32 %i
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
+ %0 = load i16, i16* %add.ptr, align 2
%conv = zext i16 %0 to i32
%add = add nsw i32 %conv, 5
%conv1 = trunc i32 %add to i16
@@ -570,8 +570,8 @@ define void @memop_unsigned_short_add_index(i16* nocapture %p, i32 %i, i16 zeroe
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
%conv = zext i16 %x to i32
- %add.ptr = getelementptr inbounds i16* %p, i32 %i
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
+ %0 = load i16, i16* %add.ptr, align 2
%conv1 = zext i16 %0 to i32
%add = add nsw i32 %conv1, %conv
%conv2 = trunc i32 %add to i16
@@ -583,8 +583,8 @@ define void @memop_unsigned_short_sub_index(i16* nocapture %p, i32 %i, i16 zeroe
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
%conv = zext i16 %x to i32
- %add.ptr = getelementptr inbounds i16* %p, i32 %i
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
+ %0 = load i16, i16* %add.ptr, align 2
%conv1 = zext i16 %0 to i32
%sub = sub nsw i32 %conv1, %conv
%conv2 = trunc i32 %sub to i16
@@ -595,8 +595,8 @@ entry:
define void @memop_unsigned_short_or_index(i16* nocapture %p, i32 %i, i16 zeroext %x) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i16* %p, i32 %i
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
+ %0 = load i16, i16* %add.ptr, align 2
%or3 = or i16 %0, %x
store i16 %or3, i16* %add.ptr, align 2
ret void
@@ -605,8 +605,8 @@ entry:
define void @memop_unsigned_short_and_index(i16* nocapture %p, i32 %i, i16 zeroext %x) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i16* %p, i32 %i
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
+ %0 = load i16, i16* %add.ptr, align 2
%and3 = and i16 %0, %x
store i16 %and3, i16* %add.ptr, align 2
ret void
@@ -615,8 +615,8 @@ entry:
define void @memop_unsigned_short_clrbit_index(i16* nocapture %p, i32 %i) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
- %add.ptr = getelementptr inbounds i16* %p, i32 %i
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
+ %0 = load i16, i16* %add.ptr, align 2
%conv = zext i16 %0 to i32
%and = and i32 %conv, 65503
%conv1 = trunc i32 %and to i16
@@ -627,8 +627,8 @@ entry:
define void @memop_unsigned_short_setbit_index(i16* nocapture %p, i32 %i) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
- %add.ptr = getelementptr inbounds i16* %p, i32 %i
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
+ %0 = load i16, i16* %add.ptr, align 2
%conv = zext i16 %0 to i32
%or = or i32 %conv, 128
%conv1 = trunc i32 %or to i16
@@ -639,8 +639,8 @@ entry:
define void @memop_unsigned_short_add5_index5(i16* nocapture %p) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}#5
- %add.ptr = getelementptr inbounds i16* %p, i32 5
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
+ %0 = load i16, i16* %add.ptr, align 2
%conv = zext i16 %0 to i32
%add = add nsw i32 %conv, 5
%conv1 = trunc i32 %add to i16
@@ -652,8 +652,8 @@ define void @memop_unsigned_short_add_index5(i16* nocapture %p, i16 zeroext %x)
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}r{{[0-9]+}}
%conv = zext i16 %x to i32
- %add.ptr = getelementptr inbounds i16* %p, i32 5
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
+ %0 = load i16, i16* %add.ptr, align 2
%conv1 = zext i16 %0 to i32
%add = add nsw i32 %conv1, %conv
%conv2 = trunc i32 %add to i16
@@ -665,8 +665,8 @@ define void @memop_unsigned_short_sub_index5(i16* nocapture %p, i16 zeroext %x)
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}r{{[0-9]+}}
%conv = zext i16 %x to i32
- %add.ptr = getelementptr inbounds i16* %p, i32 5
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
+ %0 = load i16, i16* %add.ptr, align 2
%conv1 = zext i16 %0 to i32
%sub = sub nsw i32 %conv1, %conv
%conv2 = trunc i32 %sub to i16
@@ -677,8 +677,8 @@ entry:
define void @memop_unsigned_short_or_index5(i16* nocapture %p, i16 zeroext %x) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}|={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i16* %p, i32 5
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
+ %0 = load i16, i16* %add.ptr, align 2
%or3 = or i16 %0, %x
store i16 %or3, i16* %add.ptr, align 2
ret void
@@ -687,8 +687,8 @@ entry:
define void @memop_unsigned_short_and_index5(i16* nocapture %p, i16 zeroext %x) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}&={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i16* %p, i32 5
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
+ %0 = load i16, i16* %add.ptr, align 2
%and3 = and i16 %0, %x
store i16 %and3, i16* %add.ptr, align 2
ret void
@@ -697,8 +697,8 @@ entry:
define void @memop_unsigned_short_clrbit_index5(i16* nocapture %p) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
- %add.ptr = getelementptr inbounds i16* %p, i32 5
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
+ %0 = load i16, i16* %add.ptr, align 2
%conv = zext i16 %0 to i32
%and = and i32 %conv, 65503
%conv1 = trunc i32 %and to i16
@@ -709,8 +709,8 @@ entry:
define void @memop_unsigned_short_setbit_index5(i16* nocapture %p) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
- %add.ptr = getelementptr inbounds i16* %p, i32 5
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
+ %0 = load i16, i16* %add.ptr, align 2
%conv = zext i16 %0 to i32
%or = or i32 %conv, 128
%conv1 = trunc i32 %or to i16
@@ -721,7 +721,7 @@ entry:
define void @memop_signed_short_add5(i16* nocapture %p) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
- %0 = load i16* %p, align 2
+ %0 = load i16, i16* %p, align 2
%conv2 = zext i16 %0 to i32
%add = add nsw i32 %conv2, 5
%conv1 = trunc i32 %add to i16
@@ -733,7 +733,7 @@ define void @memop_signed_short_add(i16* nocapture %p, i16 signext %x) nounwind
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
%conv4 = zext i16 %x to i32
- %0 = load i16* %p, align 2
+ %0 = load i16, i16* %p, align 2
%conv13 = zext i16 %0 to i32
%add = add nsw i32 %conv13, %conv4
%conv2 = trunc i32 %add to i16
@@ -745,7 +745,7 @@ define void @memop_signed_short_sub(i16* nocapture %p, i16 signext %x) nounwind
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
%conv4 = zext i16 %x to i32
- %0 = load i16* %p, align 2
+ %0 = load i16, i16* %p, align 2
%conv13 = zext i16 %0 to i32
%sub = sub nsw i32 %conv13, %conv4
%conv2 = trunc i32 %sub to i16
@@ -756,7 +756,7 @@ entry:
define void @memop_signed_short_or(i16* nocapture %p, i16 signext %x) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
- %0 = load i16* %p, align 2
+ %0 = load i16, i16* %p, align 2
%or3 = or i16 %0, %x
store i16 %or3, i16* %p, align 2
ret void
@@ -765,7 +765,7 @@ entry:
define void @memop_signed_short_and(i16* nocapture %p, i16 signext %x) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
- %0 = load i16* %p, align 2
+ %0 = load i16, i16* %p, align 2
%and3 = and i16 %0, %x
store i16 %and3, i16* %p, align 2
ret void
@@ -774,7 +774,7 @@ entry:
define void @memop_signed_short_clrbit(i16* nocapture %p) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
- %0 = load i16* %p, align 2
+ %0 = load i16, i16* %p, align 2
%conv2 = zext i16 %0 to i32
%and = and i32 %conv2, 65503
%conv1 = trunc i32 %and to i16
@@ -785,7 +785,7 @@ entry:
define void @memop_signed_short_setbit(i16* nocapture %p) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
- %0 = load i16* %p, align 2
+ %0 = load i16, i16* %p, align 2
%conv2 = zext i16 %0 to i32
%or = or i32 %conv2, 128
%conv1 = trunc i32 %or to i16
@@ -796,8 +796,8 @@ entry:
define void @memop_signed_short_add5_index(i16* nocapture %p, i32 %i) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
- %add.ptr = getelementptr inbounds i16* %p, i32 %i
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
+ %0 = load i16, i16* %add.ptr, align 2
%conv2 = zext i16 %0 to i32
%add = add nsw i32 %conv2, 5
%conv1 = trunc i32 %add to i16
@@ -809,8 +809,8 @@ define void @memop_signed_short_add_index(i16* nocapture %p, i32 %i, i16 signext
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
%conv4 = zext i16 %x to i32
- %add.ptr = getelementptr inbounds i16* %p, i32 %i
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
+ %0 = load i16, i16* %add.ptr, align 2
%conv13 = zext i16 %0 to i32
%add = add nsw i32 %conv13, %conv4
%conv2 = trunc i32 %add to i16
@@ -822,8 +822,8 @@ define void @memop_signed_short_sub_index(i16* nocapture %p, i32 %i, i16 signext
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
%conv4 = zext i16 %x to i32
- %add.ptr = getelementptr inbounds i16* %p, i32 %i
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
+ %0 = load i16, i16* %add.ptr, align 2
%conv13 = zext i16 %0 to i32
%sub = sub nsw i32 %conv13, %conv4
%conv2 = trunc i32 %sub to i16
@@ -834,8 +834,8 @@ entry:
define void @memop_signed_short_or_index(i16* nocapture %p, i32 %i, i16 signext %x) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i16* %p, i32 %i
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
+ %0 = load i16, i16* %add.ptr, align 2
%or3 = or i16 %0, %x
store i16 %or3, i16* %add.ptr, align 2
ret void
@@ -844,8 +844,8 @@ entry:
define void @memop_signed_short_and_index(i16* nocapture %p, i32 %i, i16 signext %x) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i16* %p, i32 %i
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
+ %0 = load i16, i16* %add.ptr, align 2
%and3 = and i16 %0, %x
store i16 %and3, i16* %add.ptr, align 2
ret void
@@ -854,8 +854,8 @@ entry:
define void @memop_signed_short_clrbit_index(i16* nocapture %p, i32 %i) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
- %add.ptr = getelementptr inbounds i16* %p, i32 %i
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
+ %0 = load i16, i16* %add.ptr, align 2
%conv2 = zext i16 %0 to i32
%and = and i32 %conv2, 65503
%conv1 = trunc i32 %and to i16
@@ -866,8 +866,8 @@ entry:
define void @memop_signed_short_setbit_index(i16* nocapture %p, i32 %i) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
- %add.ptr = getelementptr inbounds i16* %p, i32 %i
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
+ %0 = load i16, i16* %add.ptr, align 2
%conv2 = zext i16 %0 to i32
%or = or i32 %conv2, 128
%conv1 = trunc i32 %or to i16
@@ -878,8 +878,8 @@ entry:
define void @memop_signed_short_add5_index5(i16* nocapture %p) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}#5
- %add.ptr = getelementptr inbounds i16* %p, i32 5
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
+ %0 = load i16, i16* %add.ptr, align 2
%conv2 = zext i16 %0 to i32
%add = add nsw i32 %conv2, 5
%conv1 = trunc i32 %add to i16
@@ -891,8 +891,8 @@ define void @memop_signed_short_add_index5(i16* nocapture %p, i16 signext %x) no
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}r{{[0-9]+}}
%conv4 = zext i16 %x to i32
- %add.ptr = getelementptr inbounds i16* %p, i32 5
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
+ %0 = load i16, i16* %add.ptr, align 2
%conv13 = zext i16 %0 to i32
%add = add nsw i32 %conv13, %conv4
%conv2 = trunc i32 %add to i16
@@ -904,8 +904,8 @@ define void @memop_signed_short_sub_index5(i16* nocapture %p, i16 signext %x) no
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}r{{[0-9]+}}
%conv4 = zext i16 %x to i32
- %add.ptr = getelementptr inbounds i16* %p, i32 5
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
+ %0 = load i16, i16* %add.ptr, align 2
%conv13 = zext i16 %0 to i32
%sub = sub nsw i32 %conv13, %conv4
%conv2 = trunc i32 %sub to i16
@@ -916,8 +916,8 @@ entry:
define void @memop_signed_short_or_index5(i16* nocapture %p, i16 signext %x) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}|={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i16* %p, i32 5
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
+ %0 = load i16, i16* %add.ptr, align 2
%or3 = or i16 %0, %x
store i16 %or3, i16* %add.ptr, align 2
ret void
@@ -926,8 +926,8 @@ entry:
define void @memop_signed_short_and_index5(i16* nocapture %p, i16 signext %x) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}&={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i16* %p, i32 5
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
+ %0 = load i16, i16* %add.ptr, align 2
%and3 = and i16 %0, %x
store i16 %and3, i16* %add.ptr, align 2
ret void
@@ -936,8 +936,8 @@ entry:
define void @memop_signed_short_clrbit_index5(i16* nocapture %p) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
- %add.ptr = getelementptr inbounds i16* %p, i32 5
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
+ %0 = load i16, i16* %add.ptr, align 2
%conv2 = zext i16 %0 to i32
%and = and i32 %conv2, 65503
%conv1 = trunc i32 %and to i16
@@ -948,8 +948,8 @@ entry:
define void @memop_signed_short_setbit_index5(i16* nocapture %p) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
- %add.ptr = getelementptr inbounds i16* %p, i32 5
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
+ %0 = load i16, i16* %add.ptr, align 2
%conv2 = zext i16 %0 to i32
%or = or i32 %conv2, 128
%conv1 = trunc i32 %or to i16
@@ -960,7 +960,7 @@ entry:
define void @memop_signed_int_add5(i32* nocapture %p) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
- %0 = load i32* %p, align 4
+ %0 = load i32, i32* %p, align 4
%add = add i32 %0, 5
store i32 %add, i32* %p, align 4
ret void
@@ -969,7 +969,7 @@ entry:
define void @memop_signed_int_add(i32* nocapture %p, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
- %0 = load i32* %p, align 4
+ %0 = load i32, i32* %p, align 4
%add = add i32 %0, %x
store i32 %add, i32* %p, align 4
ret void
@@ -978,7 +978,7 @@ entry:
define void @memop_signed_int_sub(i32* nocapture %p, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
- %0 = load i32* %p, align 4
+ %0 = load i32, i32* %p, align 4
%sub = sub i32 %0, %x
store i32 %sub, i32* %p, align 4
ret void
@@ -987,7 +987,7 @@ entry:
define void @memop_signed_int_or(i32* nocapture %p, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
- %0 = load i32* %p, align 4
+ %0 = load i32, i32* %p, align 4
%or = or i32 %0, %x
store i32 %or, i32* %p, align 4
ret void
@@ -996,7 +996,7 @@ entry:
define void @memop_signed_int_and(i32* nocapture %p, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
- %0 = load i32* %p, align 4
+ %0 = load i32, i32* %p, align 4
%and = and i32 %0, %x
store i32 %and, i32* %p, align 4
ret void
@@ -1005,7 +1005,7 @@ entry:
define void @memop_signed_int_clrbit(i32* nocapture %p) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
- %0 = load i32* %p, align 4
+ %0 = load i32, i32* %p, align 4
%and = and i32 %0, -33
store i32 %and, i32* %p, align 4
ret void
@@ -1014,7 +1014,7 @@ entry:
define void @memop_signed_int_setbit(i32* nocapture %p) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
- %0 = load i32* %p, align 4
+ %0 = load i32, i32* %p, align 4
%or = or i32 %0, 128
store i32 %or, i32* %p, align 4
ret void
@@ -1023,8 +1023,8 @@ entry:
define void @memop_signed_int_add5_index(i32* nocapture %p, i32 %i) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
- %add.ptr = getelementptr inbounds i32* %p, i32 %i
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
+ %0 = load i32, i32* %add.ptr, align 4
%add = add i32 %0, 5
store i32 %add, i32* %add.ptr, align 4
ret void
@@ -1033,8 +1033,8 @@ entry:
define void @memop_signed_int_add_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i32* %p, i32 %i
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
+ %0 = load i32, i32* %add.ptr, align 4
%add = add i32 %0, %x
store i32 %add, i32* %add.ptr, align 4
ret void
@@ -1043,8 +1043,8 @@ entry:
define void @memop_signed_int_sub_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i32* %p, i32 %i
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
+ %0 = load i32, i32* %add.ptr, align 4
%sub = sub i32 %0, %x
store i32 %sub, i32* %add.ptr, align 4
ret void
@@ -1053,8 +1053,8 @@ entry:
define void @memop_signed_int_or_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i32* %p, i32 %i
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
+ %0 = load i32, i32* %add.ptr, align 4
%or = or i32 %0, %x
store i32 %or, i32* %add.ptr, align 4
ret void
@@ -1063,8 +1063,8 @@ entry:
define void @memop_signed_int_and_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i32* %p, i32 %i
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
+ %0 = load i32, i32* %add.ptr, align 4
%and = and i32 %0, %x
store i32 %and, i32* %add.ptr, align 4
ret void
@@ -1073,8 +1073,8 @@ entry:
define void @memop_signed_int_clrbit_index(i32* nocapture %p, i32 %i) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
- %add.ptr = getelementptr inbounds i32* %p, i32 %i
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
+ %0 = load i32, i32* %add.ptr, align 4
%and = and i32 %0, -33
store i32 %and, i32* %add.ptr, align 4
ret void
@@ -1083,8 +1083,8 @@ entry:
define void @memop_signed_int_setbit_index(i32* nocapture %p, i32 %i) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
- %add.ptr = getelementptr inbounds i32* %p, i32 %i
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
+ %0 = load i32, i32* %add.ptr, align 4
%or = or i32 %0, 128
store i32 %or, i32* %add.ptr, align 4
ret void
@@ -1093,8 +1093,8 @@ entry:
define void @memop_signed_int_add5_index5(i32* nocapture %p) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}#5
- %add.ptr = getelementptr inbounds i32* %p, i32 5
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
+ %0 = load i32, i32* %add.ptr, align 4
%add = add i32 %0, 5
store i32 %add, i32* %add.ptr, align 4
ret void
@@ -1103,8 +1103,8 @@ entry:
define void @memop_signed_int_add_index5(i32* nocapture %p, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i32* %p, i32 5
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
+ %0 = load i32, i32* %add.ptr, align 4
%add = add i32 %0, %x
store i32 %add, i32* %add.ptr, align 4
ret void
@@ -1113,8 +1113,8 @@ entry:
define void @memop_signed_int_sub_index5(i32* nocapture %p, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i32* %p, i32 5
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
+ %0 = load i32, i32* %add.ptr, align 4
%sub = sub i32 %0, %x
store i32 %sub, i32* %add.ptr, align 4
ret void
@@ -1123,8 +1123,8 @@ entry:
define void @memop_signed_int_or_index5(i32* nocapture %p, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}|={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i32* %p, i32 5
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
+ %0 = load i32, i32* %add.ptr, align 4
%or = or i32 %0, %x
store i32 %or, i32* %add.ptr, align 4
ret void
@@ -1133,8 +1133,8 @@ entry:
define void @memop_signed_int_and_index5(i32* nocapture %p, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}&={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i32* %p, i32 5
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
+ %0 = load i32, i32* %add.ptr, align 4
%and = and i32 %0, %x
store i32 %and, i32* %add.ptr, align 4
ret void
@@ -1143,8 +1143,8 @@ entry:
define void @memop_signed_int_clrbit_index5(i32* nocapture %p) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
- %add.ptr = getelementptr inbounds i32* %p, i32 5
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
+ %0 = load i32, i32* %add.ptr, align 4
%and = and i32 %0, -33
store i32 %and, i32* %add.ptr, align 4
ret void
@@ -1153,8 +1153,8 @@ entry:
define void @memop_signed_int_setbit_index5(i32* nocapture %p) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
- %add.ptr = getelementptr inbounds i32* %p, i32 5
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
+ %0 = load i32, i32* %add.ptr, align 4
%or = or i32 %0, 128
store i32 %or, i32* %add.ptr, align 4
ret void
@@ -1163,7 +1163,7 @@ entry:
define void @memop_unsigned_int_add5(i32* nocapture %p) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
- %0 = load i32* %p, align 4
+ %0 = load i32, i32* %p, align 4
%add = add nsw i32 %0, 5
store i32 %add, i32* %p, align 4
ret void
@@ -1172,7 +1172,7 @@ entry:
define void @memop_unsigned_int_add(i32* nocapture %p, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
- %0 = load i32* %p, align 4
+ %0 = load i32, i32* %p, align 4
%add = add nsw i32 %0, %x
store i32 %add, i32* %p, align 4
ret void
@@ -1181,7 +1181,7 @@ entry:
define void @memop_unsigned_int_sub(i32* nocapture %p, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
- %0 = load i32* %p, align 4
+ %0 = load i32, i32* %p, align 4
%sub = sub nsw i32 %0, %x
store i32 %sub, i32* %p, align 4
ret void
@@ -1190,7 +1190,7 @@ entry:
define void @memop_unsigned_int_or(i32* nocapture %p, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
- %0 = load i32* %p, align 4
+ %0 = load i32, i32* %p, align 4
%or = or i32 %0, %x
store i32 %or, i32* %p, align 4
ret void
@@ -1199,7 +1199,7 @@ entry:
define void @memop_unsigned_int_and(i32* nocapture %p, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
- %0 = load i32* %p, align 4
+ %0 = load i32, i32* %p, align 4
%and = and i32 %0, %x
store i32 %and, i32* %p, align 4
ret void
@@ -1208,7 +1208,7 @@ entry:
define void @memop_unsigned_int_clrbit(i32* nocapture %p) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
- %0 = load i32* %p, align 4
+ %0 = load i32, i32* %p, align 4
%and = and i32 %0, -33
store i32 %and, i32* %p, align 4
ret void
@@ -1217,7 +1217,7 @@ entry:
define void @memop_unsigned_int_setbit(i32* nocapture %p) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
- %0 = load i32* %p, align 4
+ %0 = load i32, i32* %p, align 4
%or = or i32 %0, 128
store i32 %or, i32* %p, align 4
ret void
@@ -1226,8 +1226,8 @@ entry:
define void @memop_unsigned_int_add5_index(i32* nocapture %p, i32 %i) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
- %add.ptr = getelementptr inbounds i32* %p, i32 %i
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
+ %0 = load i32, i32* %add.ptr, align 4
%add = add nsw i32 %0, 5
store i32 %add, i32* %add.ptr, align 4
ret void
@@ -1236,8 +1236,8 @@ entry:
define void @memop_unsigned_int_add_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i32* %p, i32 %i
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
+ %0 = load i32, i32* %add.ptr, align 4
%add = add nsw i32 %0, %x
store i32 %add, i32* %add.ptr, align 4
ret void
@@ -1246,8 +1246,8 @@ entry:
define void @memop_unsigned_int_sub_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i32* %p, i32 %i
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
+ %0 = load i32, i32* %add.ptr, align 4
%sub = sub nsw i32 %0, %x
store i32 %sub, i32* %add.ptr, align 4
ret void
@@ -1256,8 +1256,8 @@ entry:
define void @memop_unsigned_int_or_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i32* %p, i32 %i
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
+ %0 = load i32, i32* %add.ptr, align 4
%or = or i32 %0, %x
store i32 %or, i32* %add.ptr, align 4
ret void
@@ -1266,8 +1266,8 @@ entry:
define void @memop_unsigned_int_and_index(i32* nocapture %p, i32 %i, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i32* %p, i32 %i
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
+ %0 = load i32, i32* %add.ptr, align 4
%and = and i32 %0, %x
store i32 %and, i32* %add.ptr, align 4
ret void
@@ -1276,8 +1276,8 @@ entry:
define void @memop_unsigned_int_clrbit_index(i32* nocapture %p, i32 %i) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
- %add.ptr = getelementptr inbounds i32* %p, i32 %i
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
+ %0 = load i32, i32* %add.ptr, align 4
%and = and i32 %0, -33
store i32 %and, i32* %add.ptr, align 4
ret void
@@ -1286,8 +1286,8 @@ entry:
define void @memop_unsigned_int_setbit_index(i32* nocapture %p, i32 %i) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
- %add.ptr = getelementptr inbounds i32* %p, i32 %i
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
+ %0 = load i32, i32* %add.ptr, align 4
%or = or i32 %0, 128
store i32 %or, i32* %add.ptr, align 4
ret void
@@ -1296,8 +1296,8 @@ entry:
define void @memop_unsigned_int_add5_index5(i32* nocapture %p) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}#5
- %add.ptr = getelementptr inbounds i32* %p, i32 5
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
+ %0 = load i32, i32* %add.ptr, align 4
%add = add nsw i32 %0, 5
store i32 %add, i32* %add.ptr, align 4
ret void
@@ -1306,8 +1306,8 @@ entry:
define void @memop_unsigned_int_add_index5(i32* nocapture %p, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i32* %p, i32 5
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
+ %0 = load i32, i32* %add.ptr, align 4
%add = add nsw i32 %0, %x
store i32 %add, i32* %add.ptr, align 4
ret void
@@ -1316,8 +1316,8 @@ entry:
define void @memop_unsigned_int_sub_index5(i32* nocapture %p, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i32* %p, i32 5
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
+ %0 = load i32, i32* %add.ptr, align 4
%sub = sub nsw i32 %0, %x
store i32 %sub, i32* %add.ptr, align 4
ret void
@@ -1326,8 +1326,8 @@ entry:
define void @memop_unsigned_int_or_index5(i32* nocapture %p, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}|={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i32* %p, i32 5
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
+ %0 = load i32, i32* %add.ptr, align 4
%or = or i32 %0, %x
store i32 %or, i32* %add.ptr, align 4
ret void
@@ -1336,8 +1336,8 @@ entry:
define void @memop_unsigned_int_and_index5(i32* nocapture %p, i32 %x) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}&={{ *}}r{{[0-9]+}}
- %add.ptr = getelementptr inbounds i32* %p, i32 5
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
+ %0 = load i32, i32* %add.ptr, align 4
%and = and i32 %0, %x
store i32 %and, i32* %add.ptr, align 4
ret void
@@ -1346,8 +1346,8 @@ entry:
define void @memop_unsigned_int_clrbit_index5(i32* nocapture %p) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
- %add.ptr = getelementptr inbounds i32* %p, i32 5
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
+ %0 = load i32, i32* %add.ptr, align 4
%and = and i32 %0, -33
store i32 %and, i32* %add.ptr, align 4
ret void
@@ -1356,8 +1356,8 @@ entry:
define void @memop_unsigned_int_setbit_index5(i32* nocapture %p) nounwind {
entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
- %add.ptr = getelementptr inbounds i32* %p, i32 5
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
+ %0 = load i32, i32* %add.ptr, align 4
%or = or i32 %0, 128
store i32 %or, i32* %add.ptr, align 4
ret void
diff --git a/test/CodeGen/Hexagon/memops1.ll b/test/CodeGen/Hexagon/memops1.ll
index 2babdc848ddc..37e885b6e0cb 100644
--- a/test/CodeGen/Hexagon/memops1.ll
+++ b/test/CodeGen/Hexagon/memops1.ll
@@ -7,9 +7,9 @@ entry:
; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#40){{ *}}-={{ *}}#1
%p.addr = alloca i32*, align 4
store i32* %p, i32** %p.addr, align 4
- %0 = load i32** %p.addr, align 4
- %add.ptr = getelementptr inbounds i32* %0, i32 10
- %1 = load i32* %add.ptr, align 4
+ %0 = load i32*, i32** %p.addr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %0, i32 10
+ %1 = load i32, i32* %add.ptr, align 4
%sub = sub nsw i32 %1, 1
store i32 %sub, i32* %add.ptr, align 4
ret void
@@ -22,11 +22,11 @@ entry:
%i.addr = alloca i32, align 4
store i32* %p, i32** %p.addr, align 4
store i32 %i, i32* %i.addr, align 4
- %0 = load i32** %p.addr, align 4
- %1 = load i32* %i.addr, align 4
- %add.ptr = getelementptr inbounds i32* %0, i32 %1
- %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 10
- %2 = load i32* %add.ptr1, align 4
+ %0 = load i32*, i32** %p.addr, align 4
+ %1 = load i32, i32* %i.addr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %0, i32 %1
+ %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 10
+ %2 = load i32, i32* %add.ptr1, align 4
%sub = sub nsw i32 %2, 1
store i32 %sub, i32* %add.ptr1, align 4
ret void
diff --git a/test/CodeGen/Hexagon/memops2.ll b/test/CodeGen/Hexagon/memops2.ll
index d6d1a50bcefa..f9f8a2478119 100644
--- a/test/CodeGen/Hexagon/memops2.ll
+++ b/test/CodeGen/Hexagon/memops2.ll
@@ -5,8 +5,8 @@
define void @f(i16* nocapture %p) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}#1
- %add.ptr = getelementptr inbounds i16* %p, i32 10
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %p, i32 10
+ %0 = load i16, i16* %add.ptr, align 2
%conv2 = zext i16 %0 to i32
%sub = add nsw i32 %conv2, 65535
%conv1 = trunc i32 %sub to i16
@@ -18,8 +18,8 @@ define void @g(i16* nocapture %p, i32 %i) nounwind {
entry:
; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}#1
%add.ptr.sum = add i32 %i, 10
- %add.ptr1 = getelementptr inbounds i16* %p, i32 %add.ptr.sum
- %0 = load i16* %add.ptr1, align 2
+ %add.ptr1 = getelementptr inbounds i16, i16* %p, i32 %add.ptr.sum
+ %0 = load i16, i16* %add.ptr1, align 2
%conv3 = zext i16 %0 to i32
%sub = add nsw i32 %conv3, 65535
%conv2 = trunc i32 %sub to i16
diff --git a/test/CodeGen/Hexagon/memops3.ll b/test/CodeGen/Hexagon/memops3.ll
index d9e4e8f53709..6cd7fdc48617 100644
--- a/test/CodeGen/Hexagon/memops3.ll
+++ b/test/CodeGen/Hexagon/memops3.ll
@@ -5,8 +5,8 @@
define void @f(i8* nocapture %p) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}#1
- %add.ptr = getelementptr inbounds i8* %p, i32 10
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %p, i32 10
+ %0 = load i8, i8* %add.ptr, align 1
%conv = zext i8 %0 to i32
%sub = add nsw i32 %conv, 255
%conv1 = trunc i32 %sub to i8
@@ -18,8 +18,8 @@ define void @g(i8* nocapture %p, i32 %i) nounwind {
entry:
; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}#1
%add.ptr.sum = add i32 %i, 10
- %add.ptr1 = getelementptr inbounds i8* %p, i32 %add.ptr.sum
- %0 = load i8* %add.ptr1, align 1
+ %add.ptr1 = getelementptr inbounds i8, i8* %p, i32 %add.ptr.sum
+ %0 = load i8, i8* %add.ptr1, align 1
%conv = zext i8 %0 to i32
%sub = add nsw i32 %conv, 255
%conv2 = trunc i32 %sub to i8
diff --git a/test/CodeGen/Hexagon/misaligned-access.ll b/test/CodeGen/Hexagon/misaligned-access.ll
index 4dafb44cc3ef..f4b0cb9cb1e3 100644
--- a/test/CodeGen/Hexagon/misaligned-access.ll
+++ b/test/CodeGen/Hexagon/misaligned-access.ll
@@ -7,10 +7,10 @@ declare i32 @_hi(i64) #1
define i32 @CSDRSEARCH_executeSearchManager() #0 {
entry:
%temp = alloca i32, align 4
- %0 = load i32* @temp1, align 4
+ %0 = load i32, i32* @temp1, align 4
store i32 %0, i32* %temp, align 4
%1 = bitcast i32* %temp to i64*
- %2 = load i64* %1, align 8
+ %2 = load i64, i64* %1, align 8
%call = call i32 @_hi(i64 %2)
ret i32 %call
}
diff --git a/test/CodeGen/Hexagon/mpy.ll b/test/CodeGen/Hexagon/mpy.ll
index d5c5ae345352..3ecf7d46ccb0 100644
--- a/test/CodeGen/Hexagon/mpy.ll
+++ b/test/CodeGen/Hexagon/mpy.ll
@@ -9,10 +9,10 @@ entry:
store i32 %acc, i32* %acc.addr, align 4
store i32 %num, i32* %num.addr, align 4
store i32 %num2, i32* %num2.addr, align 4
- %0 = load i32* %num.addr, align 4
- %1 = load i32* %acc.addr, align 4
+ %0 = load i32, i32* %num.addr, align 4
+ %1 = load i32, i32* %acc.addr, align 4
%mul = mul nsw i32 %0, %1
- %2 = load i32* %num2.addr, align 4
+ %2 = load i32, i32* %num2.addr, align 4
%add = add nsw i32 %mul, %2
store i32 %add, i32* %num.addr, align 4
ret void
diff --git a/test/CodeGen/Hexagon/newvaluejump.ll b/test/CodeGen/Hexagon/newvaluejump.ll
index 9c7ca55cb8f6..3e1ee179573a 100644
--- a/test/CodeGen/Hexagon/newvaluejump.ll
+++ b/test/CodeGen/Hexagon/newvaluejump.ll
@@ -9,10 +9,10 @@ entry:
; CHECK: if (cmp.eq(r{{[0-9]+}}.new, #0)) jump{{.}}
%addr1 = alloca i32, align 4
%addr2 = alloca i32, align 4
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
store i32 %0, i32* %addr1, align 4
call void @bar(i32 1, i32 2)
- %1 = load i32* @j, align 4
+ %1 = load i32, i32* @j, align 4
%tobool = icmp ne i32 %1, 0
br i1 %tobool, label %if.then, label %if.else
diff --git a/test/CodeGen/Hexagon/newvaluejump2.ll b/test/CodeGen/Hexagon/newvaluejump2.ll
index 3d50ea5422c7..a812a7d96659 100644
--- a/test/CodeGen/Hexagon/newvaluejump2.ll
+++ b/test/CodeGen/Hexagon/newvaluejump2.ll
@@ -1,17 +1,16 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hexagon-misched < %s \
+; RUN: | FileCheck %s
; Check that we generate new value jump, both registers, with one
; of the registers as new.
-@Reg = common global i8 0, align 1
+@Reg = common global i32 0, align 4
define i32 @main() nounwind {
entry:
-; CHECK: if (cmp.gt(r{{[0-9]+}}.new, r{{[0-9]+}})) jump:{{[t|nt]}} .LBB{{[0-9]+}}_{{[0-9]+}}
- %Reg2 = alloca i8, align 1
- %0 = load i8* %Reg2, align 1
- %conv0 = zext i8 %0 to i32
- %1 = load i8* @Reg, align 1
- %conv1 = zext i8 %1 to i32
- %tobool = icmp sle i32 %conv0, %conv1
+; CHECK: if (cmp.gt(r{{[0-9]+}}, r{{[0-9]+}}.new)) jump:{{[t|nt]}} .LBB{{[0-9]+}}_{{[0-9]+}}
+ %Reg2 = alloca i32, align 4
+ %0 = load i32, i32* %Reg2, align 4
+ %1 = load i32, i32* @Reg, align 4
+ %tobool = icmp sle i32 %0, %1
br i1 %tobool, label %if.then, label %if.else
if.then:
diff --git a/test/CodeGen/Hexagon/newvaluestore.ll b/test/CodeGen/Hexagon/newvaluestore.ll
index 93cf3479ab5e..13cbba2d08e1 100644
--- a/test/CodeGen/Hexagon/newvaluestore.ll
+++ b/test/CodeGen/Hexagon/newvaluestore.ll
@@ -11,11 +11,11 @@ entry:
%number1 = alloca i32, align 4
%number2 = alloca i32, align 4
%number3 = alloca i32, align 4
- %0 = load i32 * @i, align 4
+ %0 = load i32 , i32 * @i, align 4
store i32 %0, i32* %number1, align 4
- %1 = load i32 * @j, align 4
+ %1 = load i32 , i32 * @j, align 4
store i32 %1, i32* %number2, align 4
- %2 = load i32 * @k, align 4
+ %2 = load i32 , i32 * @k, align 4
store i32 %2, i32* %number3, align 4
ret i32 %0
}
diff --git a/test/CodeGen/Hexagon/opt-fabs.ll b/test/CodeGen/Hexagon/opt-fabs.ll
index 31b56fd6e982..da657e4b1b8f 100644
--- a/test/CodeGen/Hexagon/opt-fabs.ll
+++ b/test/CodeGen/Hexagon/opt-fabs.ll
@@ -7,7 +7,7 @@ define float @my_fabsf(float %x) nounwind {
entry:
%x.addr = alloca float, align 4
store float %x, float* %x.addr, align 4
- %0 = load float* %x.addr, align 4
+ %0 = load float, float* %x.addr, align 4
%call = call float @fabsf(float %0) readnone
ret float %call
}
diff --git a/test/CodeGen/Hexagon/opt-fneg.ll b/test/CodeGen/Hexagon/opt-fneg.ll
index 479b4b64069a..978957865863 100644
--- a/test/CodeGen/Hexagon/opt-fneg.ll
+++ b/test/CodeGen/Hexagon/opt-fneg.ll
@@ -6,7 +6,7 @@ entry:
; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}}, #31)
%x.addr = alloca float, align 4
store float %x, float* %x.addr, align 4
- %0 = load float* %x.addr, align 4
+ %0 = load float, float* %x.addr, align 4
%sub = fsub float -0.000000e+00, %0
ret float %sub
}
diff --git a/test/CodeGen/Hexagon/postinc-load.ll b/test/CodeGen/Hexagon/postinc-load.ll
index 855a347d74f5..a9d987981d65 100644
--- a/test/CodeGen/Hexagon/postinc-load.ll
+++ b/test/CodeGen/Hexagon/postinc-load.ll
@@ -12,13 +12,13 @@ for.body:
%arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
%arrayidx1.phi = phi i16* [ %b, %entry ], [ %arrayidx1.inc, %for.body ]
%sum.03 = phi i32 [ 0, %entry ], [ %add2, %for.body ]
- %0 = load i32* %arrayidx.phi, align 4
- %1 = load i16* %arrayidx1.phi, align 2
+ %0 = load i32, i32* %arrayidx.phi, align 4
+ %1 = load i16, i16* %arrayidx1.phi, align 2
%conv = sext i16 %1 to i32
%add = add i32 %0, %sum.03
%add2 = add i32 %add, %conv
- %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
- %arrayidx1.inc = getelementptr i16* %arrayidx1.phi, i32 1
+ %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
+ %arrayidx1.inc = getelementptr i16, i16* %arrayidx1.phi, i32 1
%lsr.iv.next = add i32 %lsr.iv, -1
%exitcond = icmp eq i32 %lsr.iv.next, 0
br i1 %exitcond, label %for.end, label %for.body
diff --git a/test/CodeGen/Hexagon/postinc-store.ll b/test/CodeGen/Hexagon/postinc-store.ll
index 99a3a58ad39c..6315ca14a952 100644
--- a/test/CodeGen/Hexagon/postinc-store.ll
+++ b/test/CodeGen/Hexagon/postinc-store.ll
@@ -11,15 +11,15 @@ for.body: ; preds = %for.body, %entry
%lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 10, %entry ]
%arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
%arrayidx1.phi = phi i16* [ %b, %entry ], [ %arrayidx1.inc, %for.body ]
- %0 = load i32* %arrayidx.phi, align 4
- %1 = load i16* %arrayidx1.phi, align 2
+ %0 = load i32, i32* %arrayidx.phi, align 4
+ %1 = load i16, i16* %arrayidx1.phi, align 2
%conv = sext i16 %1 to i32
%factor = mul i32 %0, 2
%add3 = add i32 %factor, %conv
store i32 %add3, i32* %arrayidx.phi, align 4
- %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
- %arrayidx1.inc = getelementptr i16* %arrayidx1.phi, i32 1
+ %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
+ %arrayidx1.inc = getelementptr i16, i16* %arrayidx1.phi, i32 1
%lsr.iv.next = add i32 %lsr.iv, -1
%exitcond = icmp eq i32 %lsr.iv.next, 0
br i1 %exitcond, label %for.end, label %for.body
diff --git a/test/CodeGen/Hexagon/pred-absolute-store.ll b/test/CodeGen/Hexagon/pred-absolute-store.ll
index 64635b176daf..3e5e98270d53 100644
--- a/test/CodeGen/Hexagon/pred-absolute-store.ll
+++ b/test/CodeGen/Hexagon/pred-absolute-store.ll
@@ -1,8 +1,7 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon < %s | FileCheck %s
; Check that we are able to predicate instructions with abosolute
; addressing mode.
-
-; CHECK: if{{ *}}(p{{[0-3]+}}.new){{ *}}memw(##gvar){{ *}}={{ *}}r{{[0-9]+}}
+; CHECK: if ({{!*}}p{{[0-2]}}.new) memw(##gvar) = r{{[0-9]+}}
@gvar = external global i32
define i32 @test2(i32 %a, i32 %b) nounwind {
diff --git a/test/CodeGen/Hexagon/pred-gp.ll b/test/CodeGen/Hexagon/pred-gp.ll
index 299bd8679dad..3868e098007f 100644
--- a/test/CodeGen/Hexagon/pred-gp.ll
+++ b/test/CodeGen/Hexagon/pred-gp.ll
@@ -14,11 +14,11 @@ entry:
br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
entry.if.end_crit_edge:
- %.pre = load i32* @c, align 4
+ %.pre = load i32, i32* @c, align 4
br label %if.end
if.then:
- %0 = load i32* @d, align 4
+ %0 = load i32, i32* @d, align 4
store i32 %0, i32* @c, align 4
br label %if.end
diff --git a/test/CodeGen/Hexagon/pred-instrs.ll b/test/CodeGen/Hexagon/pred-instrs.ll
index 800073e49b03..e0a75f13dfa8 100644
--- a/test/CodeGen/Hexagon/pred-instrs.ll
+++ b/test/CodeGen/Hexagon/pred-instrs.ll
@@ -25,6 +25,6 @@ if.else: ; preds = %entry
if.end: ; preds = %if.else, %if.then
%storemerge = phi i32 [ %and, %if.else ], [ %shl, %if.then ]
store i32 %storemerge, i32* @a, align 4
- %0 = load i32* @d, align 4
+ %0 = load i32, i32* @d, align 4
ret i32 %0
}
diff --git a/test/CodeGen/Hexagon/remove-endloop.ll b/test/CodeGen/Hexagon/remove-endloop.ll
new file mode 100644
index 000000000000..73e1ad02cd80
--- /dev/null
+++ b/test/CodeGen/Hexagon/remove-endloop.ll
@@ -0,0 +1,56 @@
+; RUN: llc -march=hexagon -O2 < %s | FileCheck %s
+
+define void @foo(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind optsize {
+entry:
+ %cmp = icmp sgt i32 %n, 100
+ br i1 %cmp, label %for.body.preheader, label %for.cond4.preheader
+
+; CHECK: endloop0
+; CHECK: endloop0
+; CHECK-NOT: endloop0
+
+for.body.preheader:
+ br label %for.body
+
+for.cond4.preheader:
+ %cmp113 = icmp sgt i32 %n, 0
+ br i1 %cmp113, label %for.body7.preheader, label %if.end
+
+for.body7.preheader:
+ br label %for.body7
+
+for.body:
+ %arrayidx.phi = phi i32* [ %arrayidx.inc, %for.body ], [ %B, %for.body.preheader ]
+ %arrayidx3.phi = phi i32* [ %arrayidx3.inc, %for.body ], [ %A, %for.body.preheader ]
+ %i.014 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %0 = load i32, i32* %arrayidx.phi, align 4
+ %sub = add nsw i32 %0, -1
+ store i32 %sub, i32* %arrayidx3.phi, align 4
+ %inc = add nsw i32 %i.014, 1
+ %exitcond = icmp eq i32 %inc, %n
+ %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
+ %arrayidx3.inc = getelementptr i32, i32* %arrayidx3.phi, i32 1
+ br i1 %exitcond, label %if.end.loopexit, label %for.body
+
+for.body7:
+ %arrayidx8.phi = phi i32* [ %arrayidx8.inc, %for.body7 ], [ %B, %for.body7.preheader ]
+ %arrayidx9.phi = phi i32* [ %arrayidx9.inc, %for.body7 ], [ %A, %for.body7.preheader ]
+ %i.117 = phi i32 [ %inc11, %for.body7 ], [ 0, %for.body7.preheader ]
+ %1 = load i32, i32* %arrayidx8.phi, align 4
+ %add = add nsw i32 %1, 1
+ store i32 %add, i32* %arrayidx9.phi, align 4
+ %inc11 = add nsw i32 %i.117, 1
+ %exitcond18 = icmp eq i32 %inc11, %n
+ %arrayidx8.inc = getelementptr i32, i32* %arrayidx8.phi, i32 1
+ %arrayidx9.inc = getelementptr i32, i32* %arrayidx9.phi, i32 1
+ br i1 %exitcond18, label %if.end.loopexit21, label %for.body7
+
+if.end.loopexit:
+ br label %if.end
+
+if.end.loopexit21:
+ br label %if.end
+
+if.end:
+ ret void
+}
diff --git a/test/CodeGen/Hexagon/remove_lsr.ll b/test/CodeGen/Hexagon/remove_lsr.ll
index 3128dbb8b21b..3b85c486348d 100644
--- a/test/CodeGen/Hexagon/remove_lsr.ll
+++ b/test/CodeGen/Hexagon/remove_lsr.ll
@@ -21,11 +21,11 @@ define void @foo(%union.vect64* nocapture %sss_extracted_bit_rx_data_ptr,
i8* nocapture %scr_s_even_code_ptr, i8* nocapture %scr_s_odd_code_ptr)
nounwind {
entry:
- %scevgep = getelementptr %union.vect64* %sss_extracted_bit_rx_data_ptr, i32 1
- %scevgep28 = getelementptr %union.vect32* %s_odd, i32 1
- %scevgep32 = getelementptr %union.vect32* %s_even, i32 1
- %scevgep36 = getelementptr i8* %scr_s_odd_code_ptr, i32 1
- %scevgep39 = getelementptr i8* %scr_s_even_code_ptr, i32 1
+ %scevgep = getelementptr %union.vect64, %union.vect64* %sss_extracted_bit_rx_data_ptr, i32 1
+ %scevgep28 = getelementptr %union.vect32, %union.vect32* %s_odd, i32 1
+ %scevgep32 = getelementptr %union.vect32, %union.vect32* %s_even, i32 1
+ %scevgep36 = getelementptr i8, i8* %scr_s_odd_code_ptr, i32 1
+ %scevgep39 = getelementptr i8, i8* %scr_s_even_code_ptr, i32 1
br label %for.body
for.body: ; preds = %for.body, %entry
@@ -54,16 +54,16 @@ for.body: ; preds = %for.body, %entry
%7 = trunc i64 %6 to i32
%8 = tail call i32 @llvm.hexagon.C2.mux(i32 %conv8, i32 %5, i32 %7)
store i32 %8, i32* %lsr.iv2931, align 4
- %srcval = load i64* %lsr.iv27, align 8
- %9 = load i8* %lsr.iv40, align 1
- %10 = load i8* %lsr.iv37, align 1
+ %srcval = load i64, i64* %lsr.iv27, align 8
+ %9 = load i8, i8* %lsr.iv40, align 1
+ %10 = load i8, i8* %lsr.iv37, align 1
%lftr.wideiv = trunc i32 %lsr.iv42 to i8
%exitcond = icmp eq i8 %lftr.wideiv, 32
- %scevgep26 = getelementptr %union.vect64* %lsr.iv, i32 1
- %scevgep30 = getelementptr %union.vect32* %lsr.iv29, i32 1
- %scevgep34 = getelementptr %union.vect32* %lsr.iv33, i32 1
- %scevgep38 = getelementptr i8* %lsr.iv37, i32 1
- %scevgep41 = getelementptr i8* %lsr.iv40, i32 1
+ %scevgep26 = getelementptr %union.vect64, %union.vect64* %lsr.iv, i32 1
+ %scevgep30 = getelementptr %union.vect32, %union.vect32* %lsr.iv29, i32 1
+ %scevgep34 = getelementptr %union.vect32, %union.vect32* %lsr.iv33, i32 1
+ %scevgep38 = getelementptr i8, i8* %lsr.iv37, i32 1
+ %scevgep41 = getelementptr i8, i8* %lsr.iv40, i32 1
%lsr.iv.next = add i32 %lsr.iv42, 1
br i1 %exitcond, label %for.end, label %for.body
diff --git a/test/CodeGen/Hexagon/shrink-frame-basic.ll b/test/CodeGen/Hexagon/shrink-frame-basic.ll
new file mode 100644
index 000000000000..50b37885eda4
--- /dev/null
+++ b/test/CodeGen/Hexagon/shrink-frame-basic.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s | FileCheck %s
+; Check for allocframe in a non-entry block LBB0_n.
+; CHECK: LBB0_{{[0-9]+}}:
+; CHECK: allocframe
+; Deallocframe may be in a different block, but must follow.
+; CHECK: deallocframe
+
+target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32"
+target triple = "hexagon"
+
+; Function Attrs: nounwind
+define i32 @foo(i32 %n, i32* %p) #0 {
+entry:
+ %cmp = icmp eq i32* %p, null
+ br i1 %cmp, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %0 = load i32, i32* %p, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* %p, align 4
+ br label %return
+
+if.end: ; preds = %entry
+ %call = tail call i32 bitcast (i32 (...)* @bar to i32 (i32)*)(i32 %n) #0
+ %add = add nsw i32 %call, 1
+ br label %return
+
+return: ; preds = %if.end, %if.then
+ %retval.0 = phi i32 [ %0, %if.then ], [ %add, %if.end ]
+ ret i32 %retval.0
+}
+
+declare i32 @bar(...) #0
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/Hexagon/stack-align1.ll b/test/CodeGen/Hexagon/stack-align1.ll
new file mode 100644
index 000000000000..4efa70f59854
--- /dev/null
+++ b/test/CodeGen/Hexagon/stack-align1.ll
@@ -0,0 +1,21 @@
+; RUN: llc -O0 -march=hexagon < %s | FileCheck %s
+; CHECK: and(r29, #-32)
+; CHECK-DAG: add(r29, #0)
+; CHECK-DAG: add(r29, #28)
+
+target triple = "hexagon-unknown-unknown"
+
+; Function Attrs: nounwind uwtable
+define void @foo() #0 {
+entry:
+ %x = alloca i32, align 4
+ %y = alloca i32, align 32
+ %0 = bitcast i32* %x to i8*
+ %1 = bitcast i32* %y to i8*
+ call void @bar(i8* %0, i8* %1)
+ ret void
+}
+
+declare void @bar(i8*, i8*) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/Hexagon/stack-align2.ll b/test/CodeGen/Hexagon/stack-align2.ll
new file mode 100644
index 000000000000..1bbd57820325
--- /dev/null
+++ b/test/CodeGen/Hexagon/stack-align2.ll
@@ -0,0 +1,27 @@
+; RUN: llc -O0 -march=hexagon < %s | FileCheck %s
+; CHECK: and(r29, #-128)
+; CHECK-DAG: add(r29, #0)
+; CHECK-DAG: add(r29, #64)
+; CHECK-DAG: add(r29, #96)
+; CHECK-DAG: add(r29, #124)
+
+target triple = "hexagon-unknown-unknown"
+
+; Function Attrs: nounwind uwtable
+define void @foo() #0 {
+entry:
+ %x = alloca i32, align 4
+ %y = alloca i32, align 32
+ %z = alloca i32, align 64
+ %w = alloca i32, align 128
+ %0 = bitcast i32* %x to i8*
+ %1 = bitcast i32* %y to i8*
+ %2 = bitcast i32* %z to i8*
+ %3 = bitcast i32* %w to i8*
+ call void @bar(i8* %0, i8* %1, i8* %2, i8* %3)
+ ret void
+}
+
+declare void @bar(i8*, i8*, i8*, i8*) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/Hexagon/stack-alloca1.ll b/test/CodeGen/Hexagon/stack-alloca1.ll
new file mode 100644
index 000000000000..00e9e051aebb
--- /dev/null
+++ b/test/CodeGen/Hexagon/stack-alloca1.ll
@@ -0,0 +1,18 @@
+; RUN: llc -O0 -march=hexagon < %s | FileCheck %s
+; CHECK: sub(r29, r[[REG:[0-9]+]])
+; CHECK: r29 = r[[REG]]
+
+target triple = "hexagon-unknown-unknown"
+
+; Function Attrs: nounwind uwtable
+define void @foo(i32 %n) #0 {
+entry:
+ %x = alloca i32, i32 %n
+ %0 = bitcast i32* %x to i8*
+ call void @bar(i8* %0)
+ ret void
+}
+
+declare void @bar(i8*) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/Hexagon/stack-alloca2.ll b/test/CodeGen/Hexagon/stack-alloca2.ll
new file mode 100644
index 000000000000..ad5e13166aa2
--- /dev/null
+++ b/test/CodeGen/Hexagon/stack-alloca2.ll
@@ -0,0 +1,23 @@
+; RUN: llc -O0 -march=hexagon < %s | FileCheck %s
+; CHECK-DAG: r[[AP:[0-9]+]] = and(r30, #-32)
+; CHECK-DAG: r1 = add(r[[AP]], #-32)
+
+; CHECK-DAG: sub(r29, r[[SP:[0-9]+]])
+; CHECK-DAG: r29 = r[[SP]]
+
+target triple = "hexagon-unknown-unknown"
+
+; Function Attrs: nounwind uwtable
+define void @foo(i32 %n) #0 {
+entry:
+ %x = alloca i32, i32 %n
+ %y = alloca i32, align 32
+ %0 = bitcast i32* %x to i8*
+ %1 = bitcast i32* %y to i8*
+ call void @bar(i8* %0, i8* %1)
+ ret void
+}
+
+declare void @bar(i8*, i8* %y) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/Hexagon/static.ll b/test/CodeGen/Hexagon/static.ll
index 683a4c21bcb8..760b8b559725 100644
--- a/test/CodeGen/Hexagon/static.ll
+++ b/test/CodeGen/Hexagon/static.ll
@@ -10,10 +10,10 @@
define void @foo() nounwind {
entry:
- %0 = load i32* @num, align 4
- %1 = load i32* @acc, align 4
+ %0 = load i32, i32* @num, align 4
+ %1 = load i32, i32* @acc, align 4
%mul = mul nsw i32 %0, %1
- %2 = load i32* @val, align 4
+ %2 = load i32, i32* @val, align 4
%add = add nsw i32 %mul, %2
store i32 %add, i32* @num, align 4
ret void
diff --git a/test/CodeGen/Hexagon/struct_args.ll b/test/CodeGen/Hexagon/struct_args.ll
index f91300b5067e..95b76c7999d4 100644
--- a/test/CodeGen/Hexagon/struct_args.ll
+++ b/test/CodeGen/Hexagon/struct_args.ll
@@ -8,7 +8,7 @@
define void @foo() nounwind {
entry:
- %0 = load i64* bitcast (%struct.small* @s1 to i64*), align 1
+ %0 = load i64, i64* bitcast (%struct.small* @s1 to i64*), align 1
call void @bar(i64 %0)
ret void
}
diff --git a/test/CodeGen/Hexagon/struct_args_large.ll b/test/CodeGen/Hexagon/struct_args_large.ll
index db87d9e81db1..1438d73eacf7 100644
--- a/test/CodeGen/Hexagon/struct_args_large.ll
+++ b/test/CodeGen/Hexagon/struct_args_large.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; XFAIL:
+; RUN: llc -march=hexagon < %s | FileCheck %s
; CHECK: r[[T0:[0-9]+]] = CONST32(#s2)
; CHECK: memw(r29+#0) = r{{.}}
; CHECK: memw(r29+#8) = r{{.}}
diff --git a/test/CodeGen/Hexagon/sube.ll b/test/CodeGen/Hexagon/sube.ll
index 735ac9eb82e4..873f52b2d5df 100644
--- a/test/CodeGen/Hexagon/sube.ll
+++ b/test/CodeGen/Hexagon/sube.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon -hexagon-expand-condsets=0 < %s | FileCheck %s
-; CHECK: r{{[0-9]+:[0-9]+}} = #0
; CHECK: r{{[0-9]+:[0-9]+}} = #1
+; CHECK: r{{[0-9]+:[0-9]+}} = #0
; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
diff --git a/test/CodeGen/Hexagon/tail-call-mem-intrinsics.ll b/test/CodeGen/Hexagon/tail-call-mem-intrinsics.ll
new file mode 100644
index 000000000000..90fb75e5be06
--- /dev/null
+++ b/test/CodeGen/Hexagon/tail-call-mem-intrinsics.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; CHECK-LABEL: tail_memcpy:
+; CHECK: jump memcpy
+define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
+entry:
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
+ ret void
+}
+
+; CHECK-LABEL: tail_memmove:
+; CHECK: jump memmove
+define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
+entry:
+ tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
+ ret void
+}
+
+; CHECK-LABEL: tail_memset:
+; CHECK: jump memset
+define void @tail_memset(i8* nocapture %p, i8 %c, i32 %n) #0 {
+entry:
+ tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/Hexagon/tfr-to-combine.ll b/test/CodeGen/Hexagon/tfr-to-combine.ll
index e3057cd1611d..a257acfeb49b 100644
--- a/test/CodeGen/Hexagon/tfr-to-combine.ll
+++ b/test/CodeGen/Hexagon/tfr-to-combine.ll
@@ -20,14 +20,14 @@ define i64 @test2() #0 {
; CHECK: combine(#0, r{{[0-9]+}})
entry:
store i16 0, i16* @a, align 2
- %0 = load i16* @c, align 2
+ %0 = load i16, i16* @c, align 2
%conv2 = zext i16 %0 to i64
ret i64 %conv2
}
; Function Attrs: nounwind
define i64 @test4() #0 {
-; CHECK: combine(#0, ##100)
+; CHECK: combine(#0, #100)
entry:
store i16 100, i16* @b, align 2
store i16 0, i16* @a, align 2
diff --git a/test/CodeGen/Hexagon/union-1.ll b/test/CodeGen/Hexagon/union-1.ll
index fe79f9510fe8..1d93797db858 100644
--- a/test/CodeGen/Hexagon/union-1.ll
+++ b/test/CodeGen/Hexagon/union-1.ll
@@ -2,13 +2,15 @@
; CHECK: word
; CHECK-NOT: combine(#0
; CHECK: jump bar
+; XFAIL: *
+; Disable this test temporarily.
define void @word(i32* nocapture %a) nounwind {
entry:
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%1 = zext i32 %0 to i64
- %add.ptr = getelementptr inbounds i32* %a, i32 1
- %2 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %a, i32 1
+ %2 = load i32, i32* %add.ptr, align 4
%3 = zext i32 %2 to i64
%4 = shl nuw i64 %3, 32
%ins = or i64 %4, %1
diff --git a/test/CodeGen/Hexagon/vaddh.ll b/test/CodeGen/Hexagon/vaddh.ll
index 01d20410978e..88194b750ad5 100644
--- a/test/CodeGen/Hexagon/vaddh.ll
+++ b/test/CodeGen/Hexagon/vaddh.ll
@@ -6,8 +6,8 @@
define void @foo() nounwind {
entry:
- %0 = load i32* @j, align 4
- %1 = load i32* @k, align 4
+ %0 = load i32, i32* @j, align 4
+ %1 = load i32, i32* @k, align 4
%2 = call i32 @llvm.hexagon.A2.svaddh(i32 %0, i32 %1)
store i32 %2, i32* @k, align 4
ret void
diff --git a/test/CodeGen/Hexagon/validate-offset.ll b/test/CodeGen/Hexagon/validate-offset.ll
index 9e7d0aa07832..8de006c80b11 100644
--- a/test/CodeGen/Hexagon/validate-offset.ll
+++ b/test/CodeGen/Hexagon/validate-offset.ll
@@ -11,26 +11,26 @@ entry:
%b.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
store i32 %b, i32* %b.addr, align 4
- %0 = load i32* %a.addr, align 4
- %1 = load i32* %b.addr, align 4
+ %0 = load i32, i32* %a.addr, align 4
+ %1 = load i32, i32* %b.addr, align 4
%cmp = icmp sgt i32 %0, %1
br i1 %cmp, label %if.then, label %if.else
if.then:
- %2 = load i32* %a.addr, align 4
- %3 = load i32* %b.addr, align 4
+ %2 = load i32, i32* %a.addr, align 4
+ %3 = load i32, i32* %b.addr, align 4
%add = add nsw i32 %2, %3
store i32 %add, i32* %retval
br label %return
if.else:
- %4 = load i32* %a.addr, align 4
- %5 = load i32* %b.addr, align 4
+ %4 = load i32, i32* %a.addr, align 4
+ %5 = load i32, i32* %b.addr, align 4
%sub = sub nsw i32 %4, %5
store i32 %sub, i32* %retval
br label %return
return:
- %6 = load i32* %retval
+ %6 = load i32, i32* %retval
ret i32 %6
}
diff --git a/test/CodeGen/Hexagon/vect/vect-anyextend.ll b/test/CodeGen/Hexagon/vect/vect-anyextend.ll
new file mode 100644
index 000000000000..fe5fe84fc37d
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-anyextend.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=hexagon < %s
+; Used to fail with "Cannot select: 0x17300f0: v2i32 = any_extend"
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout =
+"e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
+target triple = "hexagon-unknown-linux-gnu"
+
+define void @foo() nounwind {
+entry:
+ %_p_vec_full48 = load <4 x i8>, <4 x i8>* undef, align 8
+ %0 = zext <4 x i8> %_p_vec_full48 to <4 x i32>
+ store <4 x i32> %0, <4 x i32>* undef, align 8
+ unreachable
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-apint-truncate.ll b/test/CodeGen/Hexagon/vect/vect-apint-truncate.ll
new file mode 100644
index 000000000000..eb94ddfe2961
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-apint-truncate.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=hexagon < %s
+; Used to fail with "Invalid APInt Truncate request".
+; Used to fail with "Cannot select: 0x596010: v2i32 = sign_extend_inreg".
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
+target triple = "hexagon-unknown-linux-gnu"
+
+define void @foo() nounwind {
+entry:
+ br label %polly.loop_header
+
+polly.loop_after: ; preds = %polly.loop_header
+ unreachable
+
+polly.loop_header: ; preds = %polly.loop_body, %entry
+ %0 = icmp sle i32 undef, 63
+ br i1 %0, label %polly.loop_body, label %polly.loop_after
+
+polly.loop_body: ; preds = %polly.loop_header
+ %_p_vec_full = load <4 x i8>, <4 x i8>* undef, align 8
+ %1 = sext <4 x i8> %_p_vec_full to <4 x i32>
+ %p_vec = mul <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
+ %mulp_vec = add <4 x i32> %p_vec, <i32 21, i32 21, i32 21, i32 21>
+ store <4 x i32> %mulp_vec, <4 x i32>* undef, align 8
+ br label %polly.loop_header
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-bad-bitcast.ll b/test/CodeGen/Hexagon/vect/vect-bad-bitcast.ll
new file mode 100644
index 000000000000..1672a789a26d
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-bad-bitcast.ll
@@ -0,0 +1,61 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s
+; REQUIRES: asserts
+; Check for successful compilation.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32"
+target triple = "hexagon"
+
+@input_buf = internal unnamed_addr constant [256 x i16] [i16 0, i16 0, i16 0, i16 1280, i16 2560, i16 4864, i16 7168, i16 9472, i16 11776, i16 12672, i16 13568, i16 14080, i16 15360, i16 15360, i16 15360, i16 15360, i16 15360, i16 15104, i16 14848, i16 14592, i16 14336, i16 14080, i16 14080, i16 13952, i16 13824, i16 13696, i16 13568, i16 13440, i16 13312, i16 13184, i16 13056, i16 12928, i16 12800, i16 12800, i16 12800, i16 12800, i16 12800, i16 12672, i16 12544, i16 12544, i16 12544, i16 12544, i16 12672, i16 12800, i16 12800, i16 12928, i16 13056, i16 13184, i16 13312, i16 13440, i16 13568, i16 13696, i16 13824, i16 14208, i16 14592, i16 14976, i16 15104, i16 15360, i16 15616, i16 15872, i16 16128, i16 16512, i16 16896, i16 17152, i16 17408, i16 17536, i16 17664, i16 17792, i16 17920, i16 18304, i16 18688, i16 19072, i16 19456, i16 19712, i16 19968, i16 20224, i16 20480, i16 20608, i16 20864, i16 20992, i16 21248, i16 21248, i16 21248, i16 21248, i16 21248, i16 21248, i16 21376, i16 21504, i16 21760, i16 21760, i16 21632, i16 21504, i16 21504, i16 21632, i16 21632, i16 21504, i16 21504, i16 21376, i16 21248, i16 21120, i16 20992, i16 20992, i16 20864, i16 20736, i16 20736, i16 20736, i16 20480, i16 20352, i16 20224, i16 20224, i16 20224, i16 20224, i16 20352, i16 20352, i16 20480, i16 20352, i16 20352, i16 20352, i16 20352, i16 20224, i16 20224, i16 20224, i16 20096, i16 20096, i16 19968, i16 19840, i16 19712, i16 19584, i16 19456, i16 19584, i16 19584, i16 19456, i16 19456, i16 19328, i16 19328, i16 19456, i16 19456, i16 19328, i16 19328, i16 19200, i16 19200, i16 19200, i16 19072, i16 19072, i16 18944, i16 18816, i16 18688, i16 18560, i16 18432, i16 18304, i16 18304, i16 18176, i16 18176, i16 18176, i16 18304, i16 18304, i16 18432, i16 18560, i16 18432, i16 18176, i16 17920, i16 17920, i16 17792, i16 17792, i16 17664, i16 17664, i16 17536, i16 17536, i16 17408, i16 17408, i16 17280, i16 17280, i16 17280, i16 17152, i16 17152, i16 17152, i16 17152, i16 17024, i16 17024, i16 16896, i16 16896, i16 16896, i16 16768, i16 16768, i16 16640, i16 16640, i16 16512, i16 16512, i16 16384, i16 16256, i16 16128, i16 16000, i16 15872, i16 15744, i16 15616, i16 15488, i16 15360, i16 15488, i16 15360, i16 15232, i16 15360, i16 15232, i16 15104, i16 14976, i16 14336, i16 14336, i16 14592, i16 14464, i16 13824, i16 13824, i16 13568, i16 13568, i16 13440, i16 13312, i16 13184, i16 13056, i16 13056, i16 13056, i16 12928, i16 12800, i16 12672, i16 12672, i16 12544, i16 12416, i16 12288, i16 12160, i16 11904, i16 11776, i16 11571, i16 11520, i16 11392, i16 11136, i16 10905, i16 10752, i16 10624, i16 10444, i16 10240, i16 9984, i16 9728, i16 9472, i16 9216, i16 8960, i16 8704, i16 8448, i16 8192, i16 7936, i16 7680, i16 7424, i16 7168, i16 6400, i16 5632, i16 4864, i16 3584, i16 1536, i16 0, i16 0], align 8
+
+; Function Attrs: nounwind
+define i32 @t_run_test() #0 {
+entry:
+ %WaterLeveldB_out = alloca i16, align 2
+ br label %polly.stmt.for.body
+
+for.body8: ; preds = %for.body8, %polly.loop_exit.loopexit
+ %i.120 = phi i32 [ 0, %polly.loop_exit.loopexit ], [ %inc11.24, %for.body8 ]
+ %call = call i32 bitcast (i32 (...)* @fxpBitAllocation to i32 (i32, i32, i32, i32, i16*, i32, i32, i32)*)(i32 0, i32 0, i32 256, i32 %conv9, i16* %WaterLeveldB_out, i32 0, i32 1920, i32 %i.120) #2
+ %inc11.24 = add i32 %i.120, 25
+ %exitcond.24 = icmp eq i32 %inc11.24, 500
+ br i1 %exitcond.24, label %for.end12, label %for.body8
+
+for.end12: ; preds = %for.body8
+ ret i32 0
+
+polly.loop_exit.loopexit: ; preds = %polly.stmt.for.body
+ %WaterLeveldB.1p_vsel.lcssa = phi <4 x i16> [ %WaterLeveldB.1p_vsel, %polly.stmt.for.body ]
+ %_low_half = shufflevector <4 x i16> %WaterLeveldB.1p_vsel.lcssa, <4 x i16> undef, <2 x i32> <i32 0, i32 1>
+ %_high_half = shufflevector <4 x i16> %WaterLeveldB.1p_vsel.lcssa, <4 x i16> undef, <2 x i32> <i32 2, i32 3>
+ %0 = icmp sgt <2 x i16> %_low_half, %_high_half
+ %1 = select <2 x i1> %0, <2 x i16> %_low_half, <2 x i16> %_high_half
+ %2 = extractelement <2 x i16> %1, i32 0
+ %3 = extractelement <2 x i16> %1, i32 1
+ %4 = icmp sgt i16 %2, %3
+ %5 = select i1 %4, i16 %2, i16 %3
+ %conv9 = sext i16 %5 to i32
+ br label %for.body8
+
+polly.stmt.for.body: ; preds = %entry, %polly.stmt.for.body
+ %WaterLeveldB.1p_vsel35 = phi <4 x i16> [ <i16 -32768, i16 -32768, i16 -32768, i16 -32768>, %entry ], [ %WaterLeveldB.1p_vsel, %polly.stmt.for.body ]
+ %scevgep.phi = phi i16* [ getelementptr inbounds ([256 x i16], [256 x i16]* @input_buf, i32 0, i32 0), %entry ], [ %scevgep.inc, %polly.stmt.for.body ]
+ %polly.indvar = phi i32 [ 0, %entry ], [ %polly.indvar_next, %polly.stmt.for.body ]
+ %vector_ptr = bitcast i16* %scevgep.phi to <4 x i16>*
+ %_p_vec_full = load <4 x i16>, <4 x i16>* %vector_ptr, align 8
+ %cmp2p_vicmp = icmp sgt <4 x i16> %_p_vec_full, %WaterLeveldB.1p_vsel35
+ %WaterLeveldB.1p_vsel = select <4 x i1> %cmp2p_vicmp, <4 x i16> %_p_vec_full, <4 x i16> %WaterLeveldB.1p_vsel35
+ %polly.indvar_next = add nsw i32 %polly.indvar, 4
+ %polly.loop_cond = icmp slt i32 %polly.indvar, 252
+ %scevgep.inc = getelementptr i16, i16* %scevgep.phi, i32 4
+ br i1 %polly.loop_cond, label %polly.stmt.for.body, label %polly.loop_exit.loopexit
+}
+
+declare i32 @fxpBitAllocation(...) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"QuIC LLVM Hexagon Clang version 3.1"}
diff --git a/test/CodeGen/Hexagon/vect/vect-bitcast-1.ll b/test/CodeGen/Hexagon/vect/vect-bitcast-1.ll
new file mode 100644
index 000000000000..b834744d9b12
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-bitcast-1.ll
@@ -0,0 +1,68 @@
+; RUN: llc -march=hexagon < %s
+; REQUIRES: asserts
+; Used to fail with: Assertion `VT.getSizeInBits() == Operand.getValueType().getSizeInBits() && "Cannot BITCAST between types of different sizes!"' failed.
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
+target triple = "hexagon-unknown-linux-gnu"
+
+define void @foo() nounwind {
+entry:
+ br label %while.body
+
+while.body: ; preds = %if.then155, %if.then12, %entry
+ %cmp.i = icmp eq i8* undef, null
+ br i1 %cmp.i, label %lab_ci.exit, label %if.end.i
+
+if.end.i: ; preds = %while.body
+ unreachable
+
+lab_ci.exit: ; preds = %while.body
+ br i1 false, label %if.then, label %if.else
+
+if.then: ; preds = %lab_ci.exit
+ unreachable
+
+if.else: ; preds = %lab_ci.exit
+ br i1 undef, label %if.then12, label %if.else17
+
+if.then12: ; preds = %if.else
+ br label %while.body
+
+if.else17: ; preds = %if.else
+ br i1 false, label %if.then22, label %if.else35
+
+if.then22: ; preds = %if.else17
+ unreachable
+
+if.else35: ; preds = %if.else17
+ br i1 false, label %if.then40, label %if.else83
+
+if.then40: ; preds = %if.else35
+ unreachable
+
+if.else83: ; preds = %if.else35
+ br i1 false, label %if.then88, label %if.else150
+
+if.then88: ; preds = %if.else83
+ unreachable
+
+if.else150: ; preds = %if.else83
+ %cmp154 = icmp eq i32 undef, 0
+ br i1 %cmp154, label %if.then155, label %if.else208
+
+if.then155: ; preds = %if.else150
+ %call191 = call i32 @strtol() nounwind
+ %conv192 = trunc i32 %call191 to i16
+ %_p_splat_one = insertelement <1 x i16> undef, i16 %conv192, i32 0
+ %_p_splat = shufflevector <1 x i16> %_p_splat_one, <1 x i16> undef, <2 x i32> zeroinitializer
+ %0 = sext <2 x i16> %_p_splat to <2 x i32>
+ %mul198p_vec = shl <2 x i32> %0, <i32 2, i32 2>
+ %1 = extractelement <2 x i32> %mul198p_vec, i32 0
+ store i32 %1, i32* null, align 4
+ br label %while.body
+
+if.else208: ; preds = %if.else150
+ unreachable
+}
+
+declare i32 @strtol() nounwind
diff --git a/test/CodeGen/Hexagon/vect/vect-bitcast.ll b/test/CodeGen/Hexagon/vect/vect-bitcast.ll
new file mode 100644
index 000000000000..2d6b0b827397
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-bitcast.ll
@@ -0,0 +1,56 @@
+; RUN: llc -march=hexagon < %s
+; REQUIRES: asserts
+; Used to fail with "Cannot BITCAST between types of different sizes!"
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define void @foo() nounwind {
+entry:
+ br label %while.body
+
+while.body: ; preds = %if.then155, %if.then12, %if.then, %entry
+ br i1 undef, label %if.then, label %if.else
+
+if.then: ; preds = %while.body
+ br label %while.body
+
+if.else: ; preds = %while.body
+ br i1 undef, label %if.then12, label %if.else17
+
+if.then12: ; preds = %if.else
+ br label %while.body
+
+if.else17: ; preds = %if.else
+ br i1 false, label %if.then22, label %if.else35
+
+if.then22: ; preds = %if.else17
+ unreachable
+
+if.else35: ; preds = %if.else17
+ br i1 false, label %if.then40, label %if.else83
+
+if.then40: ; preds = %if.else35
+ unreachable
+
+if.else83: ; preds = %if.else35
+ br i1 false, label %if.then88, label %if.else150
+
+if.then88: ; preds = %if.else83
+ unreachable
+
+if.else150: ; preds = %if.else83
+ %cmp154 = icmp eq i32 undef, 0
+ br i1 %cmp154, label %if.then155, label %if.else208
+
+if.then155: ; preds = %if.else150
+ %_p_splat.1 = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <2 x i32> zeroinitializer
+ %0 = sext <2 x i16> %_p_splat.1 to <2 x i32>
+ %mul198p_vec.1 = mul <2 x i32> %0, <i32 4, i32 4>
+ %1 = extractelement <2 x i32> %mul198p_vec.1, i32 0
+ store i32 %1, i32* undef, align 4
+ br label %while.body
+
+if.else208: ; preds = %if.else150
+ unreachable
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-cst-v4i32.ll b/test/CodeGen/Hexagon/vect/vect-cst-v4i32.ll
new file mode 100644
index 000000000000..f5ee5d001510
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-cst-v4i32.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; This one should generate a combine with two immediates.
+; CHECK: combine(#7, #7)
+@B = common global [400 x i32] zeroinitializer, align 8
+@A = common global [400 x i32] zeroinitializer, align 8
+@C = common global [400 x i32] zeroinitializer, align 8
+
+define void @run() nounwind {
+entry:
+ br label %polly.loop_body
+
+polly.loop_after: ; preds = %polly.loop_body
+ ret void
+
+polly.loop_body: ; preds = %entry, %polly.loop_body
+ %polly.loopiv23 = phi i32 [ 0, %entry ], [ %polly.next_loopiv, %polly.loop_body ]
+ %polly.next_loopiv = add nsw i32 %polly.loopiv23, 4
+ %p_arrayidx1 = getelementptr [400 x i32], [400 x i32]* @A, i32 0, i32 %polly.loopiv23
+ %p_arrayidx = getelementptr [400 x i32], [400 x i32]* @B, i32 0, i32 %polly.loopiv23
+ %vector_ptr = bitcast i32* %p_arrayidx to <4 x i32>*
+ %_p_vec_full = load <4 x i32>, <4 x i32>* %vector_ptr, align 8
+ %mulp_vec = mul <4 x i32> %_p_vec_full, <i32 7, i32 7, i32 7, i32 7>
+ %vector_ptr12 = bitcast i32* %p_arrayidx1 to <4 x i32>*
+ %_p_vec_full13 = load <4 x i32>, <4 x i32>* %vector_ptr12, align 8
+ %addp_vec = add <4 x i32> %_p_vec_full13, %mulp_vec
+ store <4 x i32> %addp_vec, <4 x i32>* %vector_ptr12, align 8
+ %0 = icmp slt i32 %polly.next_loopiv, 400
+ br i1 %0, label %polly.loop_body, label %polly.loop_after
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-cst-v4i8.ll b/test/CodeGen/Hexagon/vect/vect-cst-v4i8.ll
new file mode 100644
index 000000000000..de3e14e2e91c
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-cst-v4i8.ll
@@ -0,0 +1,30 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; Make sure we can build the constant vector <1, 2, 3, 4>
+; CHECK-DAG: ##B
+; CHECK-DAG: ##A
+@B = common global [400 x i8] zeroinitializer, align 8
+@A = common global [400 x i8] zeroinitializer, align 8
+@C = common global [400 x i8] zeroinitializer, align 8
+
+define void @run() nounwind {
+entry:
+ br label %polly.loop_body
+
+polly.loop_after: ; preds = %polly.loop_body
+ ret void
+
+polly.loop_body: ; preds = %entry, %polly.loop_body
+ %polly.loopiv25 = phi i32 [ 0, %entry ], [ %polly.next_loopiv, %polly.loop_body ]
+ %polly.next_loopiv = add i32 %polly.loopiv25, 4
+ %p_arrayidx1 = getelementptr [400 x i8], [400 x i8]* @A, i32 0, i32 %polly.loopiv25
+ %p_arrayidx = getelementptr [400 x i8], [400 x i8]* @B, i32 0, i32 %polly.loopiv25
+ %vector_ptr = bitcast i8* %p_arrayidx to <4 x i8>*
+ %_p_vec_full = load <4 x i8>, <4 x i8>* %vector_ptr, align 8
+ %mulp_vec = mul <4 x i8> %_p_vec_full, <i8 1, i8 2, i8 3, i8 4>
+ %vector_ptr14 = bitcast i8* %p_arrayidx1 to <4 x i8>*
+ %_p_vec_full15 = load <4 x i8>, <4 x i8>* %vector_ptr14, align 8
+ %addp_vec = add <4 x i8> %_p_vec_full15, %mulp_vec
+ store <4 x i8> %addp_vec, <4 x i8>* %vector_ptr14, align 8
+ %0 = icmp slt i32 %polly.next_loopiv, 400
+ br i1 %0, label %polly.loop_body, label %polly.loop_after
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-cst.ll b/test/CodeGen/Hexagon/vect/vect-cst.ll
new file mode 100644
index 000000000000..370fa5c7539e
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-cst.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; Make sure we can build the constant vector <7, 7, 7, 7>
+; CHECK: vaddub
+@B = common global [400 x i8] zeroinitializer, align 8
+@A = common global [400 x i8] zeroinitializer, align 8
+@C = common global [400 x i8] zeroinitializer, align 8
+
+define void @run() nounwind {
+entry:
+ br label %polly.loop_body
+
+polly.loop_after: ; preds = %polly.loop_body
+ ret void
+
+polly.loop_body: ; preds = %entry, %polly.loop_body
+ %polly.loopiv25 = phi i32 [ 0, %entry ], [ %polly.next_loopiv, %polly.loop_body ]
+ %polly.next_loopiv = add i32 %polly.loopiv25, 4
+ %p_arrayidx1 = getelementptr [400 x i8], [400 x i8]* @A, i32 0, i32 %polly.loopiv25
+ %p_arrayidx = getelementptr [400 x i8], [400 x i8]* @B, i32 0, i32 %polly.loopiv25
+ %vector_ptr = bitcast i8* %p_arrayidx to <4 x i8>*
+ %_p_vec_full = load <4 x i8>, <4 x i8>* %vector_ptr, align 8
+ %mulp_vec = mul <4 x i8> %_p_vec_full, <i8 7, i8 7, i8 7, i8 7>
+ %vector_ptr14 = bitcast i8* %p_arrayidx1 to <4 x i8>*
+ %_p_vec_full15 = load <4 x i8>, <4 x i8>* %vector_ptr14, align 8
+ %addp_vec = add <4 x i8> %_p_vec_full15, %mulp_vec
+ store <4 x i8> %addp_vec, <4 x i8>* %vector_ptr14, align 8
+ %0 = icmp slt i32 %polly.next_loopiv, 400
+ br i1 %0, label %polly.loop_body, label %polly.loop_after
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-extract.ll b/test/CodeGen/Hexagon/vect/vect-extract.ll
new file mode 100644
index 000000000000..75dc6850f181
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-extract.ll
@@ -0,0 +1,96 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+
+; Check that we do not generate extract.
+; CHECK-NOT: extractu
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define void @foo(i32 %N, i32* nocapture %C, i16* nocapture %A, i16 signext %val) #0 {
+entry:
+ %cmp14 = icmp eq i32 %N, 0
+ br i1 %cmp14, label %for.end11, label %for.cond1.preheader.single_entry.preheader
+
+for.cond1.preheader.single_entry.preheader: ; preds = %entry
+ %0 = add i32 %N, -1
+ %leftover_lb = and i32 %0, -2
+ %p_conv4 = sext i16 %val to i32
+ br label %for.cond1.preheader.single_entry
+
+for.cond1.preheader.single_entry: ; preds = %for.inc9, %for.cond1.preheader.single_entry.preheader
+ %indvar = phi i32 [ %indvar.next, %for.inc9 ], [ 0, %for.cond1.preheader.single_entry.preheader ]
+ %1 = mul i32 %indvar, %N
+ %.not = icmp slt i32 %N, 2
+ %.not41 = icmp slt i32 %leftover_lb, 1
+ %brmerge = or i1 %.not, %.not41
+ %.mux = select i1 %.not, i32 0, i32 %leftover_lb
+ br i1 %brmerge, label %polly.loop_header26.preheader, label %polly.loop_body.lr.ph
+
+for.inc9.loopexit: ; preds = %polly.stmt.for.body331
+ br label %for.inc9
+
+for.inc9: ; preds = %for.inc9.loopexit, %polly.loop_header26.preheader
+ %indvar.next = add i32 %indvar, 1
+ %exitcond40 = icmp eq i32 %indvar.next, %N
+ br i1 %exitcond40, label %for.end11.loopexit, label %for.cond1.preheader.single_entry
+
+for.end11.loopexit: ; preds = %for.inc9
+ br label %for.end11
+
+for.end11: ; preds = %for.end11.loopexit, %entry
+ ret void
+
+polly.loop_body.lr.ph: ; preds = %for.cond1.preheader.single_entry
+ %2 = call i64 @llvm.hexagon.A2.combinew(i32 %1, i32 %1)
+ %3 = bitcast i64 %2 to <2 x i32>
+ %4 = extractelement <2 x i32> %3, i32 0
+ %5 = call i64 @llvm.hexagon.A2.combinew(i32 %p_conv4, i32 %p_conv4)
+ %6 = bitcast i64 %5 to <2 x i32>
+ %p_arrayidx8.gep = getelementptr i32, i32* %C, i32 %4
+ %p_arrayidx.gep = getelementptr i16, i16* %A, i32 %4
+ br label %polly.loop_body
+
+polly.loop_body: ; preds = %polly.loop_body.lr.ph, %polly.loop_body
+ %p_arrayidx8.phi = phi i32* [ %p_arrayidx8.gep, %polly.loop_body.lr.ph ], [ %p_arrayidx8.inc, %polly.loop_body ]
+ %p_arrayidx.phi = phi i16* [ %p_arrayidx.gep, %polly.loop_body.lr.ph ], [ %p_arrayidx.inc, %polly.loop_body ]
+ %polly.loopiv38 = phi i32 [ 0, %polly.loop_body.lr.ph ], [ %polly.next_loopiv, %polly.loop_body ]
+ %polly.next_loopiv = add nsw i32 %polly.loopiv38, 2
+ %vector_ptr = bitcast i16* %p_arrayidx.phi to <2 x i16>*
+ %_p_vec_full = load <2 x i16>, <2 x i16>* %vector_ptr, align 2
+ %7 = sext <2 x i16> %_p_vec_full to <2 x i32>
+ %mul5p_vec = mul <2 x i32> %7, %6
+ %vector_ptr21 = bitcast i32* %p_arrayidx8.phi to <2 x i32>*
+ store <2 x i32> %mul5p_vec, <2 x i32>* %vector_ptr21, align 4
+ %8 = icmp slt i32 %polly.next_loopiv, %leftover_lb
+ %p_arrayidx8.inc = getelementptr i32, i32* %p_arrayidx8.phi, i32 2
+ %p_arrayidx.inc = getelementptr i16, i16* %p_arrayidx.phi, i32 2
+ br i1 %8, label %polly.loop_body, label %polly.loop_header26.preheader.loopexit
+
+polly.loop_header26.preheader.loopexit: ; preds = %polly.loop_body
+ br label %polly.loop_header26.preheader
+
+polly.loop_header26.preheader: ; preds = %polly.loop_header26.preheader.loopexit, %for.cond1.preheader.single_entry
+ %polly.loopiv29.ph = phi i32 [ %.mux, %for.cond1.preheader.single_entry ], [ %leftover_lb, %polly.loop_header26.preheader.loopexit ]
+ %9 = icmp slt i32 %polly.loopiv29.ph, %N
+ br i1 %9, label %polly.stmt.for.body331.preheader, label %for.inc9
+
+polly.stmt.for.body331.preheader: ; preds = %polly.loop_header26.preheader
+ br label %polly.stmt.for.body331
+
+polly.stmt.for.body331: ; preds = %polly.stmt.for.body331.preheader, %polly.stmt.for.body331
+ %polly.loopiv2939 = phi i32 [ %polly.next_loopiv30, %polly.stmt.for.body331 ], [ %polly.loopiv29.ph, %polly.stmt.for.body331.preheader ]
+ %polly.next_loopiv30 = add nsw i32 %polly.loopiv2939, 1
+ %p_32 = add i32 %polly.loopiv2939, %1
+ %p_arrayidx833 = getelementptr i32, i32* %C, i32 %p_32
+ %p_arrayidx34 = getelementptr i16, i16* %A, i32 %p_32
+ %_p_scalar_ = load i16, i16* %p_arrayidx34, align 2
+ %p_conv = sext i16 %_p_scalar_ to i32
+ %p_mul5 = mul nsw i32 %p_conv, %p_conv4
+ store i32 %p_mul5, i32* %p_arrayidx833, align 4
+ %exitcond = icmp eq i32 %polly.next_loopiv30, %N
+ br i1 %exitcond, label %for.inc9.loopexit, label %polly.stmt.for.body331
+}
+
+declare i64 @llvm.hexagon.A2.combinew(i32, i32) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/Hexagon/vect/vect-fma.ll b/test/CodeGen/Hexagon/vect/vect-fma.ll
new file mode 100644
index 000000000000..c35e0159df70
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-fma.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s
+; REQUIRES: asserts
+; Used to fail with "SplitVectorResult #0: 0x16cbe60: v4f64 = fma"
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
+target triple = "hexagon-unknown-linux-gnu"
+
+define void @run() nounwind {
+entry:
+ br label %polly.loop_header
+
+polly.loop_after: ; preds = %polly.loop_header
+ ret void
+
+polly.loop_header: ; preds = %polly.loop_body, %entry
+ %0 = icmp sle i32 undef, 399
+ br i1 %0, label %polly.loop_body, label %polly.loop_after
+
+polly.loop_body: ; preds = %polly.loop_header
+ %_p_vec_full = load <4 x double>, <4 x double>* undef, align 8
+ %mulp_vec = fmul <4 x double> %_p_vec_full, <double 7.000000e+00, double 7.000000e+00, double 7.000000e+00, double 7.000000e+00>
+ %addp_vec = fadd <4 x double> undef, %mulp_vec
+ store <4 x double> %addp_vec, <4 x double>* undef, align 8
+ br label %polly.loop_header
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-illegal-type.ll b/test/CodeGen/Hexagon/vect/vect-illegal-type.ll
new file mode 100644
index 000000000000..3d3bf88b64d3
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-illegal-type.ll
@@ -0,0 +1,50 @@
+; RUN: llc -march=hexagon < %s
+; REQUIRES: asserts
+; Used to fail with "Unexpected illegal type!"
+; Used to fail with "Cannot select: ch = store x,x,x,<ST4[undef](align=8), trunc to v4i8>"
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
+target triple = "hexagon-unknown-linux-gnu"
+
+define void @foo() nounwind {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ br i1 undef, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ br label %for.body71
+
+for.body71: ; preds = %for.body71, %for.end
+ br i1 undef, label %for.end96, label %for.body71
+
+for.end96: ; preds = %for.body71
+ switch i32 undef, label %sw.epilog [
+ i32 1, label %for.cond375.preheader
+ i32 8, label %for.cond591
+ ]
+
+for.cond375.preheader: ; preds = %for.end96
+ br label %polly.loop_header228
+
+for.cond591: ; preds = %for.end96
+ br label %for.body664
+
+for.body664: ; preds = %for.body664, %for.cond591
+ br i1 undef, label %for.end670, label %for.body664
+
+for.end670: ; preds = %for.body664
+ br label %sw.epilog
+
+sw.epilog: ; preds = %for.end670, %for.end96
+ ret void
+
+polly.loop_header228: ; preds = %polly.loop_header228, %for.cond375.preheader
+ %_p_splat_one = load <1 x i16>, <1 x i16>* undef, align 8
+ %_p_splat = shufflevector <1 x i16> %_p_splat_one, <1 x i16> %_p_splat_one, <4 x i32> zeroinitializer
+ %0 = trunc <4 x i16> %_p_splat to <4 x i8>
+ store <4 x i8> %0, <4 x i8>* undef, align 8
+ br label %polly.loop_header228
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-insert-extract-elt.ll b/test/CodeGen/Hexagon/vect/vect-insert-extract-elt.ll
new file mode 100644
index 000000000000..baf0cd748f7f
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-insert-extract-elt.ll
@@ -0,0 +1,71 @@
+; RUN: llc -march=hexagon < %s
+; Used to fail with an infinite recursion in the insn selection.
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon-unknown-linux-gnu"
+
+%struct.elt = type { [2 x [4 x %struct.block]] }
+%struct.block = type { [2 x i16] }
+
+define void @foo(%struct.elt* noalias nocapture %p0, %struct.elt* noalias nocapture %p1) nounwind {
+entry:
+ %arrayidx1 = getelementptr inbounds %struct.elt, %struct.elt* %p1, i32 0, i32 0, i32 0, i32 3
+ %arrayidx4 = getelementptr inbounds %struct.elt, %struct.elt* %p1, i32 0, i32 0, i32 0, i32 2
+ %arrayidx7 = getelementptr inbounds %struct.elt, %struct.elt* %p0, i32 0, i32 0, i32 0, i32 3
+ %0 = bitcast %struct.block* %arrayidx7 to i32*
+ %1 = bitcast %struct.block* %arrayidx4 to i32*
+ %2 = load i32, i32* %0, align 4
+ store i32 %2, i32* %1, align 4
+ %3 = bitcast %struct.block* %arrayidx1 to i32*
+ store i32 %2, i32* %3, align 4
+ %arrayidx10 = getelementptr inbounds %struct.elt, %struct.elt* %p1, i32 0, i32 0, i32 0, i32 1
+ %arrayidx16 = getelementptr inbounds %struct.elt, %struct.elt* %p0, i32 0, i32 0, i32 0, i32 2
+ %4 = bitcast %struct.block* %arrayidx16 to i32*
+ %5 = bitcast %struct.elt* %p1 to i32*
+ %6 = load i32, i32* %4, align 4
+ store i32 %6, i32* %5, align 4
+ %7 = bitcast %struct.block* %arrayidx10 to i32*
+ store i32 %6, i32* %7, align 4
+ %p_arrayidx26 = getelementptr %struct.elt, %struct.elt* %p0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1
+ %p_arrayidx2632 = getelementptr %struct.elt, %struct.elt* %p0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1
+ %p_arrayidx2633 = getelementptr %struct.elt, %struct.elt* %p0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 1
+ %p_arrayidx2634 = getelementptr %struct.elt, %struct.elt* %p0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 1
+ %p_arrayidx20 = getelementptr %struct.elt, %struct.elt* %p1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1
+ %p_arrayidx2035 = getelementptr %struct.elt, %struct.elt* %p1, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1
+ %p_arrayidx2036 = getelementptr %struct.elt, %struct.elt* %p1, i32 0, i32 0, i32 0, i32 2, i32 0, i32 1
+ %p_arrayidx2037 = getelementptr %struct.elt, %struct.elt* %p1, i32 0, i32 0, i32 0, i32 3, i32 0, i32 1
+ %8 = lshr i32 %6, 16
+ %9 = trunc i32 %8 to i16
+ %_p_vec_ = insertelement <4 x i16> undef, i16 %9, i32 0
+ %_p_vec_39 = insertelement <4 x i16> %_p_vec_, i16 %9, i32 1
+ %10 = lshr i32 %2, 16
+ %11 = trunc i32 %10 to i16
+ %_p_vec_41 = insertelement <4 x i16> %_p_vec_39, i16 %11, i32 2
+ %_p_vec_43 = insertelement <4 x i16> %_p_vec_41, i16 %11, i32 3
+ %shlp_vec = shl <4 x i16> %_p_vec_43, <i16 1, i16 1, i16 1, i16 1>
+ %12 = extractelement <4 x i16> %shlp_vec, i32 0
+ store i16 %12, i16* %p_arrayidx20, align 2
+ %13 = extractelement <4 x i16> %shlp_vec, i32 1
+ store i16 %13, i16* %p_arrayidx2035, align 2
+ %14 = extractelement <4 x i16> %shlp_vec, i32 2
+ store i16 %14, i16* %p_arrayidx2036, align 2
+ %15 = extractelement <4 x i16> %shlp_vec, i32 3
+ store i16 %15, i16* %p_arrayidx2037, align 2
+ %_p_scalar_44 = load i16, i16* %p_arrayidx26, align 2
+ %_p_vec_45 = insertelement <4 x i16> undef, i16 %_p_scalar_44, i32 0
+ %_p_scalar_46 = load i16, i16* %p_arrayidx2632, align 2
+ %_p_vec_47 = insertelement <4 x i16> %_p_vec_45, i16 %_p_scalar_46, i32 1
+ %_p_scalar_48 = load i16, i16* %p_arrayidx2633, align 2
+ %_p_vec_49 = insertelement <4 x i16> %_p_vec_47, i16 %_p_scalar_48, i32 2
+ %_p_scalar_50 = load i16, i16* %p_arrayidx2634, align 2
+ %_p_vec_51 = insertelement <4 x i16> %_p_vec_49, i16 %_p_scalar_50, i32 3
+ %shl28p_vec = shl <4 x i16> %_p_vec_51, <i16 1, i16 1, i16 1, i16 1>
+ %16 = extractelement <4 x i16> %shl28p_vec, i32 0
+ store i16 %16, i16* %p_arrayidx26, align 2
+ %17 = extractelement <4 x i16> %shl28p_vec, i32 1
+ store i16 %17, i16* %p_arrayidx2632, align 2
+ %18 = extractelement <4 x i16> %shl28p_vec, i32 2
+ store i16 %18, i16* %p_arrayidx2633, align 2
+ %19 = extractelement <4 x i16> %shl28p_vec, i32 3
+ store i16 %19, i16* %p_arrayidx2634, align 2
+ ret void
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-load-1.ll b/test/CodeGen/Hexagon/vect/vect-load-1.ll
new file mode 100644
index 000000000000..fbaf61d545da
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-load-1.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=hexagon < %s
+; Used to fail with "Cannot select: v2i32,ch = load 0x16c5890, 0x16f76e0, 0x16f76e0<LD2[undef](align=8), sext from v2i8>", 0x16c5890, 0x16f76e0, 0x16f76e0<LD2[undef](align=8), sext from v2i8>"
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
+target triple = "hexagon-unknown-linux-gnu"
+
+define void @foo() nounwind {
+entry:
+ br label %polly.loop_header
+
+polly.loop_after: ; preds = %polly.loop_header
+ unreachable
+
+polly.loop_header: ; preds = %polly.loop_body, %entry
+ %0 = icmp sle i32 undef, 63
+ br i1 %0, label %polly.loop_body, label %polly.loop_after
+
+polly.loop_body: ; preds = %polly.loop_header
+ %_p_vec_full = load <2 x i8>, <2 x i8>* undef, align 8
+ %1 = sext <2 x i8> %_p_vec_full to <2 x i32>
+ %p_vec = mul <2 x i32> %1, <i32 3, i32 3>
+ %mulp_vec = add <2 x i32> %p_vec, <i32 21, i32 21>
+ store <2 x i32> %mulp_vec, <2 x i32>* undef, align 8
+ br label %polly.loop_header
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-load.ll b/test/CodeGen/Hexagon/vect/vect-load.ll
new file mode 100644
index 000000000000..6bdcc6d3de61
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-load.ll
@@ -0,0 +1,76 @@
+; RUN: llc -march=hexagon < %s
+; Used to fail with "Cannot select: 0x16cf370: v2i16,ch = load"
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
+target triple = "hexagon-unknown-linux-gnu"
+
+%struct.ext_hdrs.10.65.142.274.307.318.329.681.692.703.714.725.736.758.791.802.846.857.868.879.890.901.945.956.958 = type { i8, i8, i8, i8, i8, i8, i16, i32, [8 x %struct.hcdc_ext_vec.9.64.141.273.306.317.328.680.691.702.713.724.735.757.790.801.845.856.867.878.889.900.944.955.957] }
+%struct.hcdc_ext_vec.9.64.141.273.306.317.328.680.691.702.713.724.735.757.790.801.845.856.867.878.889.900.944.955.957 = type { i8, i8, i16 }
+
+define void @foo(%struct.ext_hdrs.10.65.142.274.307.318.329.681.692.703.714.725.736.758.791.802.846.857.868.879.890.901.945.956.958* %hc_ext_info) nounwind {
+entry:
+ br i1 undef, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ unreachable
+
+if.end: ; preds = %entry
+ br i1 undef, label %if.end5, label %if.then3
+
+if.then3: ; preds = %if.end
+ br label %if.end5
+
+if.end5: ; preds = %if.then3, %if.end
+ %add.ptr = getelementptr inbounds %struct.ext_hdrs.10.65.142.274.307.318.329.681.692.703.714.725.736.758.791.802.846.857.868.879.890.901.945.956.958, %struct.ext_hdrs.10.65.142.274.307.318.329.681.692.703.714.725.736.758.791.802.846.857.868.879.890.901.945.956.958* %hc_ext_info, i32 0, i32 8, i32 0
+ %add.ptr22 = getelementptr inbounds %struct.ext_hdrs.10.65.142.274.307.318.329.681.692.703.714.725.736.758.791.802.846.857.868.879.890.901.945.956.958, %struct.ext_hdrs.10.65.142.274.307.318.329.681.692.703.714.725.736.758.791.802.846.857.868.879.890.901.945.956.958* null, i32 0, i32 8, i32 undef
+ br label %while.cond
+
+while.cond: ; preds = %if.end419, %if.end5
+ %gre_chksum.0 = phi <2 x i8> [ undef, %if.end5 ], [ %gre_chksum.2, %if.end419 ]
+ %cmp23 = icmp ult %struct.hcdc_ext_vec.9.64.141.273.306.317.328.680.691.702.713.724.735.757.790.801.845.856.867.878.889.900.944.955.957* null, %add.ptr
+ %cmp25 = icmp ult %struct.hcdc_ext_vec.9.64.141.273.306.317.328.680.691.702.713.724.735.757.790.801.845.856.867.878.889.900.944.955.957* null, %add.ptr22
+ %sel1 = and i1 %cmp23, %cmp25
+ br i1 %sel1, label %while.body, label %while.end422
+
+while.body: ; preds = %while.cond
+ switch i8 undef, label %if.end419 [
+ i8 5, label %if.then70
+ i8 3, label %if.then70
+ i8 2, label %if.then70
+ i8 1, label %if.then70
+ i8 0, label %if.then70
+ i8 4, label %if.then93
+ i8 6, label %if.then195
+ ]
+
+if.then70: ; preds = %while.body, %while.body, %while.body, %while.body, %while.body
+ unreachable
+
+if.then93: ; preds = %while.body
+ unreachable
+
+if.then195: ; preds = %while.body
+ br i1 undef, label %if.end274, label %if.then202
+
+if.then202: ; preds = %if.then195
+ br label %while.body222
+
+while.body222: ; preds = %while.body222, %if.then202
+ br i1 undef, label %if.end240, label %while.body222
+
+if.end240: ; preds = %while.body222
+ %_p_vec_full100 = load <2 x i8>, <2 x i8>* undef, align 8
+ br label %if.end274
+
+if.end274: ; preds = %if.end240, %if.then195
+ %gre_chksum.1 = phi <2 x i8> [ %gre_chksum.0, %if.then195 ], [ %_p_vec_full100, %if.end240 ]
+ br label %if.end419
+
+if.end419: ; preds = %if.end274, %while.body
+ %gre_chksum.2 = phi <2 x i8> [ %gre_chksum.0, %while.body ], [ %gre_chksum.1, %if.end274 ]
+ br label %while.cond
+
+while.end422: ; preds = %while.cond
+ ret void
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-loadv4i16.ll b/test/CodeGen/Hexagon/vect/vect-loadv4i16.ll
new file mode 100644
index 000000000000..16591ef68536
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-loadv4i16.ll
@@ -0,0 +1,73 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+
+; Check that store is post-incremented.
+; CHECK: memuh(r{{[0-9]+}} + {{ *}}#6{{ *}})
+; CHECK: combine(r{{[0-9]+}}{{ *}},{{ *}}r{{[0-9]+}}{{ *}})
+; CHECK: vaddh
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define void @matrix_add_const(i32 %N, i16* nocapture %A, i16 signext %val) #0 {
+entry:
+ %cmp5 = icmp eq i32 %N, 0
+ br i1 %cmp5, label %for.end, label %polly.cond
+
+for.end.loopexit: ; preds = %polly.stmt.for.body29
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %polly.loop_header24.preheader, %entry
+ ret void
+
+polly.cond: ; preds = %entry
+ %0 = icmp sgt i32 %N, 3
+ br i1 %0, label %polly.then, label %polly.loop_header24.preheader
+
+polly.then: ; preds = %polly.cond
+ %1 = add i32 %N, -1
+ %leftover_lb = and i32 %1, -4
+ %2 = icmp sgt i32 %leftover_lb, 0
+ br i1 %2, label %polly.loop_body.lr.ph, label %polly.loop_header24.preheader
+
+polly.loop_body.lr.ph: ; preds = %polly.then
+ %3 = insertelement <4 x i16> undef, i16 %val, i32 0
+ %4 = insertelement <4 x i16> %3, i16 %val, i32 1
+ %5 = insertelement <4 x i16> %4, i16 %val, i32 2
+ %6 = insertelement <4 x i16> %5, i16 %val, i32 3
+ br label %polly.loop_body
+
+polly.loop_header24.preheader.loopexit: ; preds = %polly.loop_body
+ br label %polly.loop_header24.preheader
+
+polly.loop_header24.preheader: ; preds = %polly.loop_header24.preheader.loopexit, %polly.then, %polly.cond
+ %polly.loopiv27.ph = phi i32 [ 0, %polly.cond ], [ %leftover_lb, %polly.then ], [ %leftover_lb, %polly.loop_header24.preheader.loopexit ]
+ %7 = icmp slt i32 %polly.loopiv27.ph, %N
+ br i1 %7, label %polly.stmt.for.body29.preheader, label %for.end
+
+polly.stmt.for.body29.preheader: ; preds = %polly.loop_header24.preheader
+ br label %polly.stmt.for.body29
+
+polly.loop_body: ; preds = %polly.loop_body.lr.ph, %polly.loop_body
+ %p_arrayidx.phi = phi i16* [ %A, %polly.loop_body.lr.ph ], [ %p_arrayidx.inc, %polly.loop_body ]
+ %polly.loopiv34 = phi i32 [ 0, %polly.loop_body.lr.ph ], [ %polly.next_loopiv, %polly.loop_body ]
+ %polly.next_loopiv = add nsw i32 %polly.loopiv34, 4
+ %vector_ptr = bitcast i16* %p_arrayidx.phi to <4 x i16>*
+ %_p_vec_full = load <4 x i16>, <4 x i16>* %vector_ptr, align 2
+ %addp_vec = add <4 x i16> %_p_vec_full, %6
+ store <4 x i16> %addp_vec, <4 x i16>* %vector_ptr, align 2
+ %8 = icmp slt i32 %polly.next_loopiv, %leftover_lb
+ %p_arrayidx.inc = getelementptr i16, i16* %p_arrayidx.phi, i32 4
+ br i1 %8, label %polly.loop_body, label %polly.loop_header24.preheader.loopexit
+
+polly.stmt.for.body29: ; preds = %polly.stmt.for.body29.preheader, %polly.stmt.for.body29
+ %polly.loopiv2733 = phi i32 [ %polly.next_loopiv28, %polly.stmt.for.body29 ], [ %polly.loopiv27.ph, %polly.stmt.for.body29.preheader ]
+ %polly.next_loopiv28 = add nsw i32 %polly.loopiv2733, 1
+ %p_arrayidx30 = getelementptr i16, i16* %A, i32 %polly.loopiv2733
+ %_p_scalar_ = load i16, i16* %p_arrayidx30, align 2
+ %p_add = add i16 %_p_scalar_, %val
+ store i16 %p_add, i16* %p_arrayidx30, align 2
+ %exitcond = icmp eq i32 %polly.next_loopiv28, %N
+ br i1 %exitcond, label %for.end.loopexit, label %polly.stmt.for.body29
+}
+
+attributes #0 = { nounwind "fp-contract-model"="standard" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="static" "ssp-buffers-size"="8" }
diff --git a/test/CodeGen/Hexagon/vect/vect-mul-v2i16.ll b/test/CodeGen/Hexagon/vect/vect-mul-v2i16.ll
new file mode 100644
index 000000000000..f1a80115cb61
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-mul-v2i16.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: vmpyh
+; CHECK: vtrunewh
+
+define <2 x i16> @t_i2x16(<2 x i16> %a, <2 x i16> %b) nounwind {
+entry:
+ %0 = mul <2 x i16> %a, %b
+ ret <2 x i16> %0
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-mul-v2i32.ll b/test/CodeGen/Hexagon/vect/vect-mul-v2i32.ll
new file mode 100644
index 000000000000..1d439dd37e14
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-mul-v2i32.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: mpyi
+; CHECK: mpyi
+
+define <2 x i32> @t_i2x32(<2 x i32> %a, <2 x i32> %b) nounwind {
+entry:
+ %0 = mul <2 x i32> %a, %b
+ ret <2 x i32> %0
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-mul-v4i16.ll b/test/CodeGen/Hexagon/vect/vect-mul-v4i16.ll
new file mode 100644
index 000000000000..a50d7f8adc17
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-mul-v4i16.ll
@@ -0,0 +1,10 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: vmpyh
+; CHECK: vmpyh
+; CHECK: vtrunewh
+
+define <4 x i16> @t_i4x16(<4 x i16> %a, <4 x i16> %b) nounwind {
+entry:
+ %0 = mul <4 x i16> %a, %b
+ ret <4 x i16> %0
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-mul-v4i8.ll b/test/CodeGen/Hexagon/vect/vect-mul-v4i8.ll
new file mode 100644
index 000000000000..d60d01460785
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-mul-v4i8.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; CHECK: vmpybsu
+; CHECK: vtrunehb
+
+define <4 x i8> @t_i4x8(<4 x i8> %a, <4 x i8> %b) nounwind {
+entry:
+ %0 = mul <4 x i8> %a, %b
+ ret <4 x i8> %0
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-mul-v8i8.ll b/test/CodeGen/Hexagon/vect/vect-mul-v8i8.ll
new file mode 100644
index 000000000000..a84cd00234ea
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-mul-v8i8.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; CHECK: vmpybsu
+; CHECK: vmpybsu
+
+define <8 x i8> @t_i8x8(<8 x i8> %a, <8 x i8> %b) nounwind {
+entry:
+ %0 = mul <8 x i8> %a, %b
+ ret <8 x i8> %0
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-no-tfrs-1.ll b/test/CodeGen/Hexagon/vect/vect-no-tfrs-1.ll
new file mode 100644
index 000000000000..550b0f81d33a
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-no-tfrs-1.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK-NOT: r1:0 = r1:0
+
+define <4 x i16> @t_i4x16(<4 x i16> %a, <4 x i16> %b) nounwind {
+entry:
+ %0 = mul <4 x i16> %a, %b
+ ret <4 x i16> %0
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-no-tfrs.ll b/test/CodeGen/Hexagon/vect/vect-no-tfrs.ll
new file mode 100644
index 000000000000..9081f18b3c27
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-no-tfrs.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK-NOT: r1:0 = combine(r1, r0)
+
+define <4 x i8> @t_i4x8(<4 x i8> %a, <4 x i8> %b) nounwind {
+entry:
+ %0 = mul <4 x i8> %a, %b
+ ret <4 x i8> %0
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-packhl.ll b/test/CodeGen/Hexagon/vect/vect-packhl.ll
new file mode 100644
index 000000000000..dfdb019b677c
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-packhl.ll
@@ -0,0 +1,10 @@
+; Extracted from test/CodeGen/Generic/vector-casts.ll: used to loop indefinitely.
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: packhl
+
+define void @a(<2 x double>* %p, <2 x i8>* %q) {
+ %t = load <2 x double>, <2 x double>* %p
+ %r = fptosi <2 x double> %t to <2 x i8>
+ store <2 x i8> %r, <2 x i8>* %q
+ ret void
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-shift-imm.ll b/test/CodeGen/Hexagon/vect/vect-shift-imm.ll
new file mode 100644
index 000000000000..4861181d4125
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-shift-imm.ll
@@ -0,0 +1,41 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s --check-prefix=CHECK-ASLW
+; RUN: llc -march=hexagon < %s | FileCheck %s --check-prefix=CHECK-ASRW
+; RUN: llc -march=hexagon < %s | FileCheck %s --check-prefix=CHECK-LSRW
+; RUN: llc -march=hexagon < %s | FileCheck %s --check-prefix=CHECK-ASLH
+; RUN: llc -march=hexagon < %s | FileCheck %s --check-prefix=CHECK-ASRH
+; RUN: llc -march=hexagon < %s | FileCheck %s --check-prefix=CHECK-LSRH
+;
+; Make sure that the instructions with immediate operands are generated.
+; CHECK-ASLW: vaslw({{.*}}, #9)
+; CHECK-ASRW: vasrw({{.*}}, #8)
+; CHECK-LSRW: vlsrw({{.*}}, #7)
+; CHECK-ASLH: vaslh({{.*}}, #6)
+; CHECK-ASRH: vasrh({{.*}}, #5)
+; CHECK-LSRH: vlsrh({{.*}}, #4)
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define i64 @foo(i64 %x) nounwind readnone {
+entry:
+ %0 = tail call i64 @llvm.hexagon.S2.asl.i.vw(i64 %x, i32 9)
+ %1 = tail call i64 @llvm.hexagon.S2.asr.i.vw(i64 %x, i32 8)
+ %2 = tail call i64 @llvm.hexagon.S2.lsr.i.vw(i64 %x, i32 7)
+ %3 = tail call i64 @llvm.hexagon.S2.asl.i.vh(i64 %x, i32 6)
+ %4 = tail call i64 @llvm.hexagon.S2.asr.i.vh(i64 %x, i32 5)
+ %5 = tail call i64 @llvm.hexagon.S2.lsr.i.vh(i64 %x, i32 4)
+ %add = add i64 %1, %0
+ %add1 = add i64 %add, %2
+ %add2 = add i64 %add1, %3
+ %add3 = add i64 %add2, %4
+ %add4 = add i64 %add3, %5
+ ret i64 %add4
+}
+
+declare i64 @llvm.hexagon.S2.asl.i.vw(i64, i32) nounwind readnone
+declare i64 @llvm.hexagon.S2.asr.i.vw(i64, i32) nounwind readnone
+declare i64 @llvm.hexagon.S2.lsr.i.vw(i64, i32) nounwind readnone
+declare i64 @llvm.hexagon.S2.asl.i.vh(i64, i32) nounwind readnone
+declare i64 @llvm.hexagon.S2.asr.i.vh(i64, i32) nounwind readnone
+declare i64 @llvm.hexagon.S2.lsr.i.vh(i64, i32) nounwind readnone
+
diff --git a/test/CodeGen/Hexagon/vect/vect-shuffle.ll b/test/CodeGen/Hexagon/vect/vect-shuffle.ll
new file mode 100644
index 000000000000..9d80df2e0887
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-shuffle.ll
@@ -0,0 +1,47 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+
+; Check that store is post-incremented.
+; CHECK-NOT: extractu
+; CHECK-NOT: insert
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define i32 @foo(i16* noalias nocapture %src, i16* noalias nocapture %dstImg, i32 %width, i32 %idx, i32 %flush) #0 {
+entry:
+ %0 = tail call i64 @llvm.hexagon.A2.combinew(i32 %flush, i32 %flush)
+ %1 = bitcast i64 %0 to <2 x i32>
+ br label %polly.loop_body
+
+polly.loop_after: ; preds = %polly.loop_body
+ ret i32 0
+
+polly.loop_body: ; preds = %entry, %polly.loop_body
+ %p_arrayidx35.phi = phi i16* [ %dstImg, %entry ], [ %p_arrayidx35.inc, %polly.loop_body ]
+ %p_arrayidx.phi = phi i16* [ %src, %entry ], [ %p_arrayidx.inc, %polly.loop_body ]
+ %polly.loopiv56 = phi i32 [ 0, %entry ], [ %polly.next_loopiv, %polly.loop_body ]
+ %polly.next_loopiv = add nsw i32 %polly.loopiv56, 4
+ %vector_ptr = bitcast i16* %p_arrayidx.phi to <4 x i16>*
+ %_p_vec_full = load <4 x i16>, <4 x i16>* %vector_ptr, align 2
+ %_high_half = shufflevector <4 x i16> %_p_vec_full, <4 x i16> undef, <2 x i32> <i32 2, i32 3>
+ %_low_half = shufflevector <4 x i16> %_p_vec_full, <4 x i16> undef, <2 x i32> <i32 0, i32 1>
+ %2 = zext <2 x i16> %_low_half to <2 x i32>
+ %3 = zext <2 x i16> %_high_half to <2 x i32>
+ %add33p_vec = add <2 x i32> %2, %1
+ %add33p_vec48 = add <2 x i32> %3, %1
+ %4 = trunc <2 x i32> %add33p_vec to <2 x i16>
+ %5 = trunc <2 x i32> %add33p_vec48 to <2 x i16>
+ %_combined_vec = shufflevector <2 x i16> %4, <2 x i16> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %vector_ptr49 = bitcast i16* %p_arrayidx35.phi to <4 x i16>*
+ store <4 x i16> %_combined_vec, <4 x i16>* %vector_ptr49, align 2
+ %6 = icmp slt i32 %polly.next_loopiv, 1024
+ %p_arrayidx35.inc = getelementptr i16, i16* %p_arrayidx35.phi, i32 4
+ %p_arrayidx.inc = getelementptr i16, i16* %p_arrayidx.phi, i32 4
+ br i1 %6, label %polly.loop_body, label %polly.loop_after
+}
+
+declare i64 @llvm.hexagon.A2.combinew(i32, i32) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+
diff --git a/test/CodeGen/Hexagon/vect/vect-splat.ll b/test/CodeGen/Hexagon/vect/vect-splat.ll
new file mode 100644
index 000000000000..3613dbf6fdd1
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-splat.ll
@@ -0,0 +1,16 @@
+; Extracted from test/CodeGen/Generic/vector.ll: used to loop indefinitely.
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; CHECK: combine
+
+%i4 = type <4 x i32>
+
+define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) {
+ %tmp = insertelement %i4 undef, i32 %X, i32 0 ; <%i4> [#uses=1]
+ %tmp2 = insertelement %i4 %tmp, i32 %X, i32 1 ; <%i4> [#uses=1]
+ %tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2 ; <%i4> [#uses=1]
+ %tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3 ; <%i4> [#uses=1]
+ %q = load %i4, %i4* %Q ; <%i4> [#uses=1]
+ %R = add %i4 %q, %tmp6 ; <%i4> [#uses=1]
+ store %i4 %R, %i4* %P
+ ret void
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-store-v2i16.ll b/test/CodeGen/Hexagon/vect/vect-store-v2i16.ll
new file mode 100644
index 000000000000..1de3058e68a6
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-store-v2i16.ll
@@ -0,0 +1,51 @@
+; RUN: llc -march=hexagon < %s
+; Used to fail with: "Cannot select: 0x3bab680: ch = store <ST4[%lsr.iv522525], trunc to v2i16>
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
+target triple = "hexagon-unknown-linux-gnu"
+
+define void @foobar() nounwind {
+entry:
+ br label %for.cond7.preheader.single_entry.i
+
+for.cond7.preheader.single_entry.i: ; preds = %for.cond7.preheader.single_entry.i, %entry
+ %exitcond72.i = icmp eq i32 undef, 64
+ br i1 %exitcond72.i, label %foo_32.exit, label %for.cond7.preheader.single_entry.i
+
+foo_32.exit: ; preds = %for.cond7.preheader.single_entry.i
+ br label %for.body.i428
+
+for.body.i428: ; preds = %for.body.i428, %foo_32.exit
+ br i1 undef, label %foo_12.exit, label %for.body.i428
+
+foo_12.exit: ; preds = %for.body.i428
+ br label %for.body.i.i
+
+for.body.i.i: ; preds = %for.body.i.i, %foo_12.exit
+ br i1 undef, label %foo_14.exit, label %for.body.i.i
+
+foo_14.exit: ; preds = %for.body.i.i
+ br label %for.body
+
+for.body: ; preds = %for.body, %foo_14.exit
+ br i1 undef, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ %storemerge294 = select i1 undef, i32 32767, i32 undef
+ %_p_splat_one386 = insertelement <1 x i32> undef, i32 %storemerge294, i32 0
+ %_p_splat387 = shufflevector <1 x i32> %_p_splat_one386, <1 x i32> undef, <2 x i32> zeroinitializer
+ br label %polly.loop_body377
+
+polly.loop_after378: ; preds = %polly.loop_body377
+ unreachable
+
+polly.loop_body377: ; preds = %polly.loop_body377, %for.end
+ %_p_vec_full384 = load <2 x i16>, <2 x i16>* undef, align 4
+ %0 = sext <2 x i16> %_p_vec_full384 to <2 x i32>
+ %mulp_vec = mul <2 x i32> %0, %_p_splat387
+ %shr100293p_vec = lshr <2 x i32> %mulp_vec, <i32 15, i32 15>
+ %1 = trunc <2 x i32> %shr100293p_vec to <2 x i16>
+ store <2 x i16> %1, <2 x i16>* undef, align 4
+ br i1 undef, label %polly.loop_body377, label %polly.loop_after378
+}
+
diff --git a/test/CodeGen/Hexagon/vect/vect-truncate.ll b/test/CodeGen/Hexagon/vect/vect-truncate.ll
new file mode 100644
index 000000000000..fd75bbd58e36
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-truncate.ll
@@ -0,0 +1,42 @@
+; RUN: llc -march=hexagon < %s
+; Used to fail with "Cannot select: 0x16cb7f0: v2i16 = truncate"
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
+target triple = "hexagon-unknown-linux-gnu"
+
+define void @Autocorr() nounwind {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ br i1 undef, label %polly.loop_header43, label %for.body
+
+do.cond: ; preds = %polly.loop_header
+ unreachable
+
+do.end: ; preds = %polly.loop_after45
+ ret void
+
+polly.loop_header: ; preds = %polly.loop_after45, %polly.loop_body
+ %0 = icmp sle i32 undef, 239
+ br i1 %0, label %polly.loop_body, label %do.cond
+
+polly.loop_body: ; preds = %polly.loop_header
+ %p_25 = call i32 @llvm.hexagon.SI.to.SXTHI.asrh(i32 undef)
+ %1 = insertelement <4 x i32> undef, i32 %p_25, i32 3
+ %2 = trunc <4 x i32> %1 to <4 x i16>
+ store <4 x i16> %2, <4 x i16>* undef, align 8
+ br label %polly.loop_header
+
+polly.loop_after45: ; preds = %polly.loop_header43
+ br i1 undef, label %polly.loop_header, label %do.end
+
+polly.loop_header43: ; preds = %polly.loop_body44, %for.body
+ br i1 undef, label %polly.loop_body44, label %polly.loop_after45
+
+polly.loop_body44: ; preds = %polly.loop_header43
+ br label %polly.loop_header43
+}
+
+declare i32 @llvm.hexagon.SI.to.SXTHI.asrh(i32) nounwind readnone
diff --git a/test/CodeGen/Hexagon/vect/vect-vaddb-1.ll b/test/CodeGen/Hexagon/vect/vect-vaddb-1.ll
new file mode 100644
index 000000000000..e646f8efdd5e
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-vaddb-1.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: vaddub
+
+define <4 x i8> @t_i4x8(<4 x i8> %a, <4 x i8> %b) nounwind {
+entry:
+ %0 = add <4 x i8> %a, %b
+ ret <4 x i8> %0
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-vaddb.ll b/test/CodeGen/Hexagon/vect/vect-vaddb.ll
new file mode 100644
index 000000000000..459546991903
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-vaddb.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: vaddub
+
+define <8 x i8> @t_i8x8(<8 x i8> %a, <8 x i8> %b) nounwind {
+entry:
+ %0 = add <8 x i8> %a, %b
+ ret <8 x i8> %0
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-vaddh-1.ll b/test/CodeGen/Hexagon/vect/vect-vaddh-1.ll
new file mode 100644
index 000000000000..1b43d4fb6cc8
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-vaddh-1.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: vaddh
+
+define <4 x i16> @t_i4x16(<4 x i16> %a, <4 x i16> %b) nounwind {
+entry:
+ %0 = add <4 x i16> %a, %b
+ ret <4 x i16> %0
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-vaddh.ll b/test/CodeGen/Hexagon/vect/vect-vaddh.ll
new file mode 100644
index 000000000000..32bf3cadacdc
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-vaddh.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: vaddh
+
+define <2 x i16> @t_i2x16(<2 x i16> %a, <2 x i16> %b) nounwind {
+entry:
+ %0 = add <2 x i16> %a, %b
+ ret <2 x i16> %0
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-vaddw.ll b/test/CodeGen/Hexagon/vect/vect-vaddw.ll
new file mode 100644
index 000000000000..a8401345ab26
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-vaddw.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: vaddw
+
+define <2 x i32> @t_i2x32(<2 x i32> %a, <2 x i32> %b) nounwind {
+entry:
+ %0 = add <2 x i32> %a, %b
+ ret <2 x i32> %0
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-vaslw.ll b/test/CodeGen/Hexagon/vect/vect-vaslw.ll
new file mode 100644
index 000000000000..c662b0bd3de2
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-vaslw.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: vaslw
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon-unknown-linux-gnu"
+
+define void @foo(i16* nocapture %v) nounwind {
+entry:
+ %p_arrayidx = getelementptr i16, i16* %v, i32 4
+ %vector_ptr = bitcast i16* %p_arrayidx to <4 x i16>*
+ %_p_vec_full = load <4 x i16>, <4 x i16>* %vector_ptr, align 2
+ %_high_half = shufflevector <4 x i16> %_p_vec_full, <4 x i16> undef, <2 x i32> <i32 2, i32 3>
+ %_low_half = shufflevector <4 x i16> %_p_vec_full, <4 x i16> undef, <2 x i32> <i32 0, i32 1>
+ %0 = sext <2 x i16> %_low_half to <2 x i32>
+ %1 = sext <2 x i16> %_high_half to <2 x i32>
+ %shr6p_vec = shl <2 x i32> %0, <i32 2, i32 2>
+ %shr6p_vec19 = shl <2 x i32> %1, <i32 2, i32 2>
+ %addp_vec = add <2 x i32> %shr6p_vec, <i32 34, i32 34>
+ %addp_vec20 = add <2 x i32> %shr6p_vec19, <i32 34, i32 34>
+ %vector_ptr21 = bitcast i16* %v to <4 x i16>*
+ %_p_vec_full22 = load <4 x i16>, <4 x i16>* %vector_ptr21, align 2
+ %_high_half23 = shufflevector <4 x i16> %_p_vec_full22, <4 x i16> undef, <2 x i32> <i32 2, i32 3>
+ %_low_half24 = shufflevector <4 x i16> %_p_vec_full22, <4 x i16> undef, <2 x i32> <i32 0, i32 1>
+ %2 = zext <2 x i16> %_low_half24 to <2 x i32>
+ %3 = zext <2 x i16> %_high_half23 to <2 x i32>
+ %add3p_vec = add <2 x i32> %addp_vec, %2
+ %add3p_vec25 = add <2 x i32> %addp_vec20, %3
+ %4 = trunc <2 x i32> %add3p_vec to <2 x i16>
+ %5 = trunc <2 x i32> %add3p_vec25 to <2 x i16>
+ %_combined_vec = shufflevector <2 x i16> %4, <2 x i16> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ store <4 x i16> %_combined_vec, <4 x i16>* %vector_ptr21, align 2
+ ret void
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-vshifts.ll b/test/CodeGen/Hexagon/vect/vect-vshifts.ll
new file mode 100644
index 000000000000..49ff812601ae
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-vshifts.ll
@@ -0,0 +1,279 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+
+; Check that store is post-incremented.
+; CHECK: r{{[0-9]+:[0-9]+}} = vasrw(r{{[0-9]+:[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+:[0-9]+}} = vaslw(r{{[0-9]+:[0-9]+}}, r{{[0-9]+}})
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define void @foo(i32* nocapture %buf, i32* nocapture %dest, i32 %offset, i32 %oddBlock, i32 %gb) #0 {
+entry:
+ %0 = load i32, i32* %buf, align 4, !tbaa !0
+ %shr = ashr i32 %0, %gb
+ store i32 %shr, i32* %buf, align 4, !tbaa !0
+ %not.tobool = icmp eq i32 %oddBlock, 0
+ %1 = sub i32 %offset, %oddBlock
+ %2 = zext i1 %not.tobool to i32
+ %3 = and i32 %1, 7
+ %4 = add i32 %2, %3
+ %5 = add i32 %4, 8
+ %p_sub8 = sub nsw i32 31, %gb
+ %6 = insertelement <2 x i32> undef, i32 %p_sub8, i32 0
+ %7 = insertelement <2 x i32> %6, i32 %p_sub8, i32 1
+ %8 = bitcast <2 x i32> %7 to i64
+ %9 = tail call i64 @llvm.hexagon.S2.asl.i.vw(i64 %8, i32 1)
+ %10 = bitcast i64 %9 to <2 x i32>
+ %11 = tail call i64 @llvm.hexagon.A2.combinew(i32 -1, i32 -1)
+ %12 = bitcast i64 %11 to <2 x i32>
+ %sub12p_vec = add <2 x i32> %10, %12
+ %p_22 = add i32 %4, 64
+ %p_d.018 = getelementptr i32, i32* %dest, i32 %4
+ %p_d.01823 = getelementptr i32, i32* %dest, i32 %p_22
+ %p_25 = add i32 %4, 72
+ %p_arrayidx14 = getelementptr i32, i32* %dest, i32 %5
+ %p_arrayidx1426 = getelementptr i32, i32* %dest, i32 %p_25
+ %_p_scalar_ = load i32, i32* %p_d.018, align 4
+ %_p_vec_ = insertelement <2 x i32> undef, i32 %_p_scalar_, i32 0
+ %_p_scalar_27 = load i32, i32* %p_d.01823, align 4
+ %_p_vec_28 = insertelement <2 x i32> %_p_vec_, i32 %_p_scalar_27, i32 1
+ %13 = bitcast <2 x i32> %_p_vec_28 to i64
+ %14 = tail call i64 @llvm.hexagon.S2.asr.i.vw(i64 %13, i32 31)
+ %15 = bitcast i64 %14 to <2 x i32>
+ %shr9p_vec = ashr <2 x i32> %_p_vec_28, %7
+ %xorp_vec = xor <2 x i32> %15, %sub12p_vec
+ %16 = bitcast <2 x i32> %shr9p_vec to i64
+ %17 = tail call i32 @llvm.hexagon.A2.vcmpweq(i64 %14, i64 %16)
+ %18 = bitcast <2 x i32> %xorp_vec to i64
+ %19 = tail call i64 @llvm.hexagon.C2.vmux(i32 %17, i64 %13, i64 %18)
+ %20 = tail call i64 @llvm.hexagon.S2.asl.r.vw(i64 %19, i32 %gb)
+ %21 = bitcast i64 %20 to <2 x i32>
+ %22 = extractelement <2 x i32> %21, i32 0
+ store i32 %22, i32* %p_arrayidx14, align 4
+ %23 = extractelement <2 x i32> %21, i32 1
+ store i32 %23, i32* %p_arrayidx1426, align 4
+ store i32 %22, i32* %p_d.018, align 4
+ store i32 %23, i32* %p_d.01823, align 4
+ %p_21.1 = add i32 %4, 128
+ %p_22.1 = add i32 %4, 192
+ %p_d.018.1 = getelementptr i32, i32* %dest, i32 %p_21.1
+ %p_d.01823.1 = getelementptr i32, i32* %dest, i32 %p_22.1
+ %p_24.1 = add i32 %4, 136
+ %p_25.1 = add i32 %4, 200
+ %p_arrayidx14.1 = getelementptr i32, i32* %dest, i32 %p_24.1
+ %p_arrayidx1426.1 = getelementptr i32, i32* %dest, i32 %p_25.1
+ %_p_scalar_.1 = load i32, i32* %p_d.018.1, align 4
+ %_p_vec_.1 = insertelement <2 x i32> undef, i32 %_p_scalar_.1, i32 0
+ %_p_scalar_27.1 = load i32, i32* %p_d.01823.1, align 4
+ %_p_vec_28.1 = insertelement <2 x i32> %_p_vec_.1, i32 %_p_scalar_27.1, i32 1
+ %24 = bitcast <2 x i32> %_p_vec_28.1 to i64
+ %25 = tail call i64 @llvm.hexagon.S2.asr.i.vw(i64 %24, i32 31)
+ %26 = bitcast i64 %25 to <2 x i32>
+ %shr9p_vec.1 = ashr <2 x i32> %_p_vec_28.1, %7
+ %xorp_vec.1 = xor <2 x i32> %26, %sub12p_vec
+ %27 = bitcast <2 x i32> %shr9p_vec.1 to i64
+ %28 = tail call i32 @llvm.hexagon.A2.vcmpweq(i64 %25, i64 %27)
+ %29 = bitcast <2 x i32> %xorp_vec.1 to i64
+ %30 = tail call i64 @llvm.hexagon.C2.vmux(i32 %28, i64 %24, i64 %29)
+ %31 = tail call i64 @llvm.hexagon.S2.asl.r.vw(i64 %30, i32 %gb)
+ %32 = bitcast i64 %31 to <2 x i32>
+ %33 = extractelement <2 x i32> %32, i32 0
+ store i32 %33, i32* %p_arrayidx14.1, align 4
+ %34 = extractelement <2 x i32> %32, i32 1
+ store i32 %34, i32* %p_arrayidx1426.1, align 4
+ store i32 %33, i32* %p_d.018.1, align 4
+ store i32 %34, i32* %p_d.01823.1, align 4
+ %p_21.2 = add i32 %4, 256
+ %p_22.2 = add i32 %4, 320
+ %p_d.018.2 = getelementptr i32, i32* %dest, i32 %p_21.2
+ %p_d.01823.2 = getelementptr i32, i32* %dest, i32 %p_22.2
+ %p_24.2 = add i32 %4, 264
+ %p_25.2 = add i32 %4, 328
+ %p_arrayidx14.2 = getelementptr i32, i32* %dest, i32 %p_24.2
+ %p_arrayidx1426.2 = getelementptr i32, i32* %dest, i32 %p_25.2
+ %_p_scalar_.2 = load i32, i32* %p_d.018.2, align 4
+ %_p_vec_.2 = insertelement <2 x i32> undef, i32 %_p_scalar_.2, i32 0
+ %_p_scalar_27.2 = load i32, i32* %p_d.01823.2, align 4
+ %_p_vec_28.2 = insertelement <2 x i32> %_p_vec_.2, i32 %_p_scalar_27.2, i32 1
+ %35 = bitcast <2 x i32> %_p_vec_28.2 to i64
+ %36 = tail call i64 @llvm.hexagon.S2.asr.i.vw(i64 %35, i32 31)
+ %37 = bitcast i64 %36 to <2 x i32>
+ %shr9p_vec.2 = ashr <2 x i32> %_p_vec_28.2, %7
+ %xorp_vec.2 = xor <2 x i32> %37, %sub12p_vec
+ %38 = bitcast <2 x i32> %shr9p_vec.2 to i64
+ %39 = tail call i32 @llvm.hexagon.A2.vcmpweq(i64 %36, i64 %38)
+ %40 = bitcast <2 x i32> %xorp_vec.2 to i64
+ %41 = tail call i64 @llvm.hexagon.C2.vmux(i32 %39, i64 %35, i64 %40)
+ %42 = tail call i64 @llvm.hexagon.S2.asl.r.vw(i64 %41, i32 %gb)
+ %43 = bitcast i64 %42 to <2 x i32>
+ %44 = extractelement <2 x i32> %43, i32 0
+ store i32 %44, i32* %p_arrayidx14.2, align 4
+ %45 = extractelement <2 x i32> %43, i32 1
+ store i32 %45, i32* %p_arrayidx1426.2, align 4
+ store i32 %44, i32* %p_d.018.2, align 4
+ store i32 %45, i32* %p_d.01823.2, align 4
+ %p_21.3 = add i32 %4, 384
+ %p_22.3 = add i32 %4, 448
+ %p_d.018.3 = getelementptr i32, i32* %dest, i32 %p_21.3
+ %p_d.01823.3 = getelementptr i32, i32* %dest, i32 %p_22.3
+ %p_24.3 = add i32 %4, 392
+ %p_25.3 = add i32 %4, 456
+ %p_arrayidx14.3 = getelementptr i32, i32* %dest, i32 %p_24.3
+ %p_arrayidx1426.3 = getelementptr i32, i32* %dest, i32 %p_25.3
+ %_p_scalar_.3 = load i32, i32* %p_d.018.3, align 4
+ %_p_vec_.3 = insertelement <2 x i32> undef, i32 %_p_scalar_.3, i32 0
+ %_p_scalar_27.3 = load i32, i32* %p_d.01823.3, align 4
+ %_p_vec_28.3 = insertelement <2 x i32> %_p_vec_.3, i32 %_p_scalar_27.3, i32 1
+ %46 = bitcast <2 x i32> %_p_vec_28.3 to i64
+ %47 = tail call i64 @llvm.hexagon.S2.asr.i.vw(i64 %46, i32 31)
+ %48 = bitcast i64 %47 to <2 x i32>
+ %shr9p_vec.3 = ashr <2 x i32> %_p_vec_28.3, %7
+ %xorp_vec.3 = xor <2 x i32> %48, %sub12p_vec
+ %49 = bitcast <2 x i32> %shr9p_vec.3 to i64
+ %50 = tail call i32 @llvm.hexagon.A2.vcmpweq(i64 %47, i64 %49)
+ %51 = bitcast <2 x i32> %xorp_vec.3 to i64
+ %52 = tail call i64 @llvm.hexagon.C2.vmux(i32 %50, i64 %46, i64 %51)
+ %53 = tail call i64 @llvm.hexagon.S2.asl.r.vw(i64 %52, i32 %gb)
+ %54 = bitcast i64 %53 to <2 x i32>
+ %55 = extractelement <2 x i32> %54, i32 0
+ store i32 %55, i32* %p_arrayidx14.3, align 4
+ %56 = extractelement <2 x i32> %54, i32 1
+ store i32 %56, i32* %p_arrayidx1426.3, align 4
+ store i32 %55, i32* %p_d.018.3, align 4
+ store i32 %56, i32* %p_d.01823.3, align 4
+ %p_21.4 = add i32 %4, 512
+ %p_22.4 = add i32 %4, 576
+ %p_d.018.4 = getelementptr i32, i32* %dest, i32 %p_21.4
+ %p_d.01823.4 = getelementptr i32, i32* %dest, i32 %p_22.4
+ %p_24.4 = add i32 %4, 520
+ %p_25.4 = add i32 %4, 584
+ %p_arrayidx14.4 = getelementptr i32, i32* %dest, i32 %p_24.4
+ %p_arrayidx1426.4 = getelementptr i32, i32* %dest, i32 %p_25.4
+ %_p_scalar_.4 = load i32, i32* %p_d.018.4, align 4
+ %_p_vec_.4 = insertelement <2 x i32> undef, i32 %_p_scalar_.4, i32 0
+ %_p_scalar_27.4 = load i32, i32* %p_d.01823.4, align 4
+ %_p_vec_28.4 = insertelement <2 x i32> %_p_vec_.4, i32 %_p_scalar_27.4, i32 1
+ %57 = bitcast <2 x i32> %_p_vec_28.4 to i64
+ %58 = tail call i64 @llvm.hexagon.S2.asr.i.vw(i64 %57, i32 31)
+ %59 = bitcast i64 %58 to <2 x i32>
+ %shr9p_vec.4 = ashr <2 x i32> %_p_vec_28.4, %7
+ %xorp_vec.4 = xor <2 x i32> %59, %sub12p_vec
+ %60 = bitcast <2 x i32> %shr9p_vec.4 to i64
+ %61 = tail call i32 @llvm.hexagon.A2.vcmpweq(i64 %58, i64 %60)
+ %62 = bitcast <2 x i32> %xorp_vec.4 to i64
+ %63 = tail call i64 @llvm.hexagon.C2.vmux(i32 %61, i64 %57, i64 %62)
+ %64 = tail call i64 @llvm.hexagon.S2.asl.r.vw(i64 %63, i32 %gb)
+ %65 = bitcast i64 %64 to <2 x i32>
+ %66 = extractelement <2 x i32> %65, i32 0
+ store i32 %66, i32* %p_arrayidx14.4, align 4
+ %67 = extractelement <2 x i32> %65, i32 1
+ store i32 %67, i32* %p_arrayidx1426.4, align 4
+ store i32 %66, i32* %p_d.018.4, align 4
+ store i32 %67, i32* %p_d.01823.4, align 4
+ %p_21.5 = add i32 %4, 640
+ %p_22.5 = add i32 %4, 704
+ %p_d.018.5 = getelementptr i32, i32* %dest, i32 %p_21.5
+ %p_d.01823.5 = getelementptr i32, i32* %dest, i32 %p_22.5
+ %p_24.5 = add i32 %4, 648
+ %p_25.5 = add i32 %4, 712
+ %p_arrayidx14.5 = getelementptr i32, i32* %dest, i32 %p_24.5
+ %p_arrayidx1426.5 = getelementptr i32, i32* %dest, i32 %p_25.5
+ %_p_scalar_.5 = load i32, i32* %p_d.018.5, align 4
+ %_p_vec_.5 = insertelement <2 x i32> undef, i32 %_p_scalar_.5, i32 0
+ %_p_scalar_27.5 = load i32, i32* %p_d.01823.5, align 4
+ %_p_vec_28.5 = insertelement <2 x i32> %_p_vec_.5, i32 %_p_scalar_27.5, i32 1
+ %68 = bitcast <2 x i32> %_p_vec_28.5 to i64
+ %69 = tail call i64 @llvm.hexagon.S2.asr.i.vw(i64 %68, i32 31)
+ %70 = bitcast i64 %69 to <2 x i32>
+ %shr9p_vec.5 = ashr <2 x i32> %_p_vec_28.5, %7
+ %xorp_vec.5 = xor <2 x i32> %70, %sub12p_vec
+ %71 = bitcast <2 x i32> %shr9p_vec.5 to i64
+ %72 = tail call i32 @llvm.hexagon.A2.vcmpweq(i64 %69, i64 %71)
+ %73 = bitcast <2 x i32> %xorp_vec.5 to i64
+ %74 = tail call i64 @llvm.hexagon.C2.vmux(i32 %72, i64 %68, i64 %73)
+ %75 = tail call i64 @llvm.hexagon.S2.asl.r.vw(i64 %74, i32 %gb)
+ %76 = bitcast i64 %75 to <2 x i32>
+ %77 = extractelement <2 x i32> %76, i32 0
+ store i32 %77, i32* %p_arrayidx14.5, align 4
+ %78 = extractelement <2 x i32> %76, i32 1
+ store i32 %78, i32* %p_arrayidx1426.5, align 4
+ store i32 %77, i32* %p_d.018.5, align 4
+ store i32 %78, i32* %p_d.01823.5, align 4
+ %p_21.6 = add i32 %4, 768
+ %p_22.6 = add i32 %4, 832
+ %p_d.018.6 = getelementptr i32, i32* %dest, i32 %p_21.6
+ %p_d.01823.6 = getelementptr i32, i32* %dest, i32 %p_22.6
+ %p_24.6 = add i32 %4, 776
+ %p_25.6 = add i32 %4, 840
+ %p_arrayidx14.6 = getelementptr i32, i32* %dest, i32 %p_24.6
+ %p_arrayidx1426.6 = getelementptr i32, i32* %dest, i32 %p_25.6
+ %_p_scalar_.6 = load i32, i32* %p_d.018.6, align 4
+ %_p_vec_.6 = insertelement <2 x i32> undef, i32 %_p_scalar_.6, i32 0
+ %_p_scalar_27.6 = load i32, i32* %p_d.01823.6, align 4
+ %_p_vec_28.6 = insertelement <2 x i32> %_p_vec_.6, i32 %_p_scalar_27.6, i32 1
+ %79 = bitcast <2 x i32> %_p_vec_28.6 to i64
+ %80 = tail call i64 @llvm.hexagon.S2.asr.i.vw(i64 %79, i32 31)
+ %81 = bitcast i64 %80 to <2 x i32>
+ %shr9p_vec.6 = ashr <2 x i32> %_p_vec_28.6, %7
+ %xorp_vec.6 = xor <2 x i32> %81, %sub12p_vec
+ %82 = bitcast <2 x i32> %shr9p_vec.6 to i64
+ %83 = tail call i32 @llvm.hexagon.A2.vcmpweq(i64 %80, i64 %82)
+ %84 = bitcast <2 x i32> %xorp_vec.6 to i64
+ %85 = tail call i64 @llvm.hexagon.C2.vmux(i32 %83, i64 %79, i64 %84)
+ %86 = tail call i64 @llvm.hexagon.S2.asl.r.vw(i64 %85, i32 %gb)
+ %87 = bitcast i64 %86 to <2 x i32>
+ %88 = extractelement <2 x i32> %87, i32 0
+ store i32 %88, i32* %p_arrayidx14.6, align 4
+ %89 = extractelement <2 x i32> %87, i32 1
+ store i32 %89, i32* %p_arrayidx1426.6, align 4
+ store i32 %88, i32* %p_d.018.6, align 4
+ store i32 %89, i32* %p_d.01823.6, align 4
+ %p_21.7 = add i32 %4, 896
+ %p_22.7 = add i32 %4, 960
+ %p_d.018.7 = getelementptr i32, i32* %dest, i32 %p_21.7
+ %p_d.01823.7 = getelementptr i32, i32* %dest, i32 %p_22.7
+ %p_24.7 = add i32 %4, 904
+ %p_25.7 = add i32 %4, 968
+ %p_arrayidx14.7 = getelementptr i32, i32* %dest, i32 %p_24.7
+ %p_arrayidx1426.7 = getelementptr i32, i32* %dest, i32 %p_25.7
+ %_p_scalar_.7 = load i32, i32* %p_d.018.7, align 4
+ %_p_vec_.7 = insertelement <2 x i32> undef, i32 %_p_scalar_.7, i32 0
+ %_p_scalar_27.7 = load i32, i32* %p_d.01823.7, align 4
+ %_p_vec_28.7 = insertelement <2 x i32> %_p_vec_.7, i32 %_p_scalar_27.7, i32 1
+ %90 = bitcast <2 x i32> %_p_vec_28.7 to i64
+ %91 = tail call i64 @llvm.hexagon.S2.asr.i.vw(i64 %90, i32 31)
+ %92 = bitcast i64 %91 to <2 x i32>
+ %shr9p_vec.7 = ashr <2 x i32> %_p_vec_28.7, %7
+ %xorp_vec.7 = xor <2 x i32> %92, %sub12p_vec
+ %93 = bitcast <2 x i32> %shr9p_vec.7 to i64
+ %94 = tail call i32 @llvm.hexagon.A2.vcmpweq(i64 %91, i64 %93)
+ %95 = bitcast <2 x i32> %xorp_vec.7 to i64
+ %96 = tail call i64 @llvm.hexagon.C2.vmux(i32 %94, i64 %90, i64 %95)
+ %97 = tail call i64 @llvm.hexagon.S2.asl.r.vw(i64 %96, i32 %gb)
+ %98 = bitcast i64 %97 to <2 x i32>
+ %99 = extractelement <2 x i32> %98, i32 0
+ store i32 %99, i32* %p_arrayidx14.7, align 4
+ %100 = extractelement <2 x i32> %98, i32 1
+ store i32 %100, i32* %p_arrayidx1426.7, align 4
+ store i32 %99, i32* %p_d.018.7, align 4
+ store i32 %100, i32* %p_d.01823.7, align 4
+ ret void
+}
+
+declare i64 @llvm.hexagon.S2.asr.i.vw(i64, i32) #1
+
+declare i64 @llvm.hexagon.S2.asl.i.vw(i64, i32) #1
+
+declare i64 @llvm.hexagon.A2.combinew(i32, i32) #1
+
+declare i32 @llvm.hexagon.A2.vcmpweq(i64, i64) #1
+
+declare i64 @llvm.hexagon.C2.vmux(i32, i64, i64) #1
+
+declare i64 @llvm.hexagon.S2.asl.r.vw(i64, i32) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!0 = !{!"int", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/vect/vect-vsplatb.ll b/test/CodeGen/Hexagon/vect/vect-vsplatb.ll
new file mode 100644
index 000000000000..6996dd144eba
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-vsplatb.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; Make sure we build the constant vector <7, 7, 7, 7> with a vsplatb.
+; CHECK: vsplatb
+@B = common global [400 x i8] zeroinitializer, align 8
+@A = common global [400 x i8] zeroinitializer, align 8
+@C = common global [400 x i8] zeroinitializer, align 8
+
+define void @run() nounwind {
+entry:
+ br label %polly.loop_body
+
+polly.loop_after: ; preds = %polly.loop_body
+ ret void
+
+polly.loop_body: ; preds = %entry, %polly.loop_body
+ %polly.loopiv25 = phi i32 [ 0, %entry ], [ %polly.next_loopiv, %polly.loop_body ]
+ %polly.next_loopiv = add i32 %polly.loopiv25, 4
+ %p_arrayidx1 = getelementptr [400 x i8], [400 x i8]* @A, i32 0, i32 %polly.loopiv25
+ %p_arrayidx = getelementptr [400 x i8], [400 x i8]* @B, i32 0, i32 %polly.loopiv25
+ %vector_ptr = bitcast i8* %p_arrayidx to <4 x i8>*
+ %_p_vec_full = load <4 x i8>, <4 x i8>* %vector_ptr, align 8
+ %mulp_vec = mul <4 x i8> %_p_vec_full, <i8 7, i8 7, i8 7, i8 7>
+ %vector_ptr14 = bitcast i8* %p_arrayidx1 to <4 x i8>*
+ %_p_vec_full15 = load <4 x i8>, <4 x i8>* %vector_ptr14, align 8
+ %addp_vec = add <4 x i8> %_p_vec_full15, %mulp_vec
+ store <4 x i8> %addp_vec, <4 x i8>* %vector_ptr14, align 8
+ %0 = icmp slt i32 %polly.next_loopiv, 400
+ br i1 %0, label %polly.loop_body, label %polly.loop_after
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-vsplath.ll b/test/CodeGen/Hexagon/vect/vect-vsplath.ll
new file mode 100644
index 000000000000..f5207109773e
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-vsplath.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; Make sure we build the constant vector <7, 7, 7, 7> with a vsplath.
+; CHECK: vsplath
+@B = common global [400 x i16] zeroinitializer, align 8
+@A = common global [400 x i16] zeroinitializer, align 8
+@C = common global [400 x i16] zeroinitializer, align 8
+
+define void @run() nounwind {
+entry:
+ br label %polly.loop_body
+
+polly.loop_after: ; preds = %polly.loop_body
+ ret void
+
+polly.loop_body: ; preds = %entry, %polly.loop_body
+ %polly.loopiv26 = phi i32 [ 0, %entry ], [ %polly.next_loopiv, %polly.loop_body ]
+ %polly.next_loopiv = add nsw i32 %polly.loopiv26, 4
+ %p_arrayidx1 = getelementptr [400 x i16], [400 x i16]* @A, i32 0, i32 %polly.loopiv26
+ %p_arrayidx = getelementptr [400 x i16], [400 x i16]* @B, i32 0, i32 %polly.loopiv26
+ %vector_ptr = bitcast i16* %p_arrayidx to <4 x i16>*
+ %_p_vec_full = load <4 x i16>, <4 x i16>* %vector_ptr, align 8
+ %mulp_vec = mul <4 x i16> %_p_vec_full, <i16 7, i16 7, i16 7, i16 7>
+ %vector_ptr15 = bitcast i16* %p_arrayidx1 to <4 x i16>*
+ %_p_vec_full16 = load <4 x i16>, <4 x i16>* %vector_ptr15, align 8
+ %addp_vec = add <4 x i16> %_p_vec_full16, %mulp_vec
+ store <4 x i16> %addp_vec, <4 x i16>* %vector_ptr15, align 8
+ %0 = icmp slt i32 %polly.next_loopiv, 400
+ br i1 %0, label %polly.loop_body, label %polly.loop_after
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-vsubb-1.ll b/test/CodeGen/Hexagon/vect/vect-vsubb-1.ll
new file mode 100644
index 000000000000..8ac76a0bf13c
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-vsubb-1.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: vsubub
+
+define <4 x i8> @t_i4x8(<4 x i8> %a, <4 x i8> %b) nounwind {
+entry:
+ %0 = sub <4 x i8> %a, %b
+ ret <4 x i8> %0
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-vsubb.ll b/test/CodeGen/Hexagon/vect/vect-vsubb.ll
new file mode 100644
index 000000000000..73cfc74074ad
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-vsubb.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: vsubub
+
+define <8 x i8> @t_i8x8(<8 x i8> %a, <8 x i8> %b) nounwind {
+entry:
+ %0 = sub <8 x i8> %a, %b
+ ret <8 x i8> %0
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-vsubh-1.ll b/test/CodeGen/Hexagon/vect/vect-vsubh-1.ll
new file mode 100644
index 000000000000..c1f87bf090d6
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-vsubh-1.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: vsubh
+
+define <4 x i16> @t_i4x16(<4 x i16> %a, <4 x i16> %b) nounwind {
+entry:
+ %0 = sub <4 x i16> %a, %b
+ ret <4 x i16> %0
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-vsubh.ll b/test/CodeGen/Hexagon/vect/vect-vsubh.ll
new file mode 100644
index 000000000000..cc7e595644d2
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-vsubh.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: vsubh
+
+define <2 x i16> @t_i2x16(<2 x i16> %a, <2 x i16> %b) nounwind {
+entry:
+ %0 = sub <2 x i16> %a, %b
+ ret <2 x i16> %0
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-vsubw.ll b/test/CodeGen/Hexagon/vect/vect-vsubw.ll
new file mode 100644
index 000000000000..ba326a33109b
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-vsubw.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: vsubw
+
+define <2 x i32> @t_i2x32(<2 x i32> %a, <2 x i32> %b) nounwind {
+entry:
+ %0 = sub <2 x i32> %a, %b
+ ret <2 x i32> %0
+}
diff --git a/test/CodeGen/Hexagon/vect/vect-xor.ll b/test/CodeGen/Hexagon/vect/vect-xor.ll
new file mode 100644
index 000000000000..961185581128
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-xor.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+
+; Check that the parsing succeeded.
+; CHECK: r{{[0-9]+:[0-9]+}} = xor(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+@window_size = global i32 65536, align 4
+@prev = external global [0 x i16], align 8
+@block_start = common global i32 0, align 4
+@prev_length = common global i32 0, align 4
+@strstart = common global i32 0, align 4
+@match_start = common global i32 0, align 4
+@max_chain_length = common global i32 0, align 4
+@good_match = common global i32 0, align 4
+
+define void @fill_window() #0 {
+entry:
+ br label %polly.loop_body
+
+polly.loop_after: ; preds = %polly.loop_body
+ ret void
+
+polly.loop_body: ; preds = %entry, %polly.loop_body
+ %polly.loopiv36 = phi i32 [ 0, %entry ], [ %polly.next_loopiv, %polly.loop_body ]
+ %polly.next_loopiv = add nsw i32 %polly.loopiv36, 4
+ %p_arrayidx4 = getelementptr [0 x i16], [0 x i16]* @prev, i32 0, i32 %polly.loopiv36
+ %vector_ptr = bitcast i16* %p_arrayidx4 to <4 x i16>*
+ %_p_vec_full = load <4 x i16>, <4 x i16>* %vector_ptr, align 2
+ %cmp1p_vicmp = icmp slt <4 x i16> %_p_vec_full, zeroinitializer
+ %subp_vec = xor <4 x i16> %_p_vec_full, <i16 -32768, i16 -32768, i16 -32768, i16 -32768>
+ %sel1p_vsel = select <4 x i1> %cmp1p_vicmp, <4 x i16> %subp_vec, <4 x i16> zeroinitializer
+ store <4 x i16> %sel1p_vsel, <4 x i16>* %vector_ptr, align 2
+ %0 = icmp slt i32 %polly.next_loopiv, 32768
+ br i1 %0, label %polly.loop_body, label %polly.loop_after
+}
+
+attributes #0 = { nounwind "fp-contract-model"="standard" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="static" "ssp-buffers-size"="8" }
diff --git a/test/CodeGen/Hexagon/vect/vect-zeroextend.ll b/test/CodeGen/Hexagon/vect/vect-zeroextend.ll
new file mode 100644
index 000000000000..3d0b7946f77a
--- /dev/null
+++ b/test/CodeGen/Hexagon/vect/vect-zeroextend.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=hexagon < %s
+; Used to fail with "Cannot select: 0x16cb2d0: v4i16 = zero_extend"
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
+target triple = "hexagon-unknown-linux-gnu"
+
+define void @foo() nounwind {
+entry:
+ br i1 undef, label %for.cond30.preheader.lr.ph, label %for.end425
+
+for.cond30.preheader.lr.ph: ; preds = %entry
+ br label %for.cond37.preheader
+
+for.cond37.preheader: ; preds = %for.cond37.preheader, %for.cond30.preheader.lr.ph
+ %_p_vec_full = load <3 x i8>, <3 x i8>* undef, align 8
+ %0 = zext <3 x i8> %_p_vec_full to <3 x i16>
+ store <3 x i16> %0, <3 x i16>* undef, align 8
+ br label %for.cond37.preheader
+
+for.end425: ; preds = %entry
+ ret void
+}
diff --git a/test/CodeGen/Hexagon/zextloadi1.ll b/test/CodeGen/Hexagon/zextloadi1.ll
index b58d9332695d..9ce7bea9fce6 100644
--- a/test/CodeGen/Hexagon/zextloadi1.ll
+++ b/test/CodeGen/Hexagon/zextloadi1.ll
@@ -13,13 +13,13 @@
@i129_s = external global i129
define void @i129_ls() nounwind {
- %tmp = load i129* @i129_l
+ %tmp = load i129, i129* @i129_l
store i129 %tmp, i129* @i129_s
ret void
}
define void @i65_ls() nounwind {
- %tmp = load i65* @i65_l
+ %tmp = load i65, i65* @i65_l
store i65 %tmp, i65* @i65_s
ret void
}
diff --git a/test/CodeGen/Inputs/DbgValueOtherTargets.ll b/test/CodeGen/Inputs/DbgValueOtherTargets.ll
index d21a4eebe247..efa1a0849a8e 100644
--- a/test/CodeGen/Inputs/DbgValueOtherTargets.ll
+++ b/test/CodeGen/Inputs/DbgValueOtherTargets.ll
@@ -3,7 +3,7 @@
define i32 @main() nounwind ssp {
entry:
; CHECK: DEBUG_VALUE
- call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !7, metadata !{!"0x102"}), !dbg !9
+ call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !7, metadata !DIExpression()), !dbg !9
ret i32 0, !dbg !10
}
@@ -14,17 +14,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!13}
-!0 = !{!"0x2e\00main\00main\00\002\000\001\000\006\000\000\000", !12, !1, !3, null, i32 ()* @main, null, null, null} ; [ DW_TAG_subprogram ]
-!1 = !{!"0x29", !12} ; [ DW_TAG_file_type ]
-!2 = !{!"0x11\0012\00clang version 2.9 (trunk 120996)\000\00\000\00\000", !12, !6, !6, !11, null, null} ; [ DW_TAG_compile_unit ]
-!3 = !{!"0x15\00\000\000\000\000\000\000", !12, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: !1, type: !3, function: i32 ()* @main)
+!1 = !DIFile(filename: "/tmp/x.c", directory: "/Users/manav")
+!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 120996)", isOptimized: false, emissionKind: 0, file: !12, enums: !6, retainedTypes: !6, subprograms: !11)
+!3 = !DISubroutineType(types: !4)
!4 = !{!5}
-!5 = !{!"0x24\00int\000\0032\0032\000\000\005", !12, !2} ; [ DW_TAG_base_type ]
-!6 = !{i32 0}
-!7 = !{!"0x100\00i\003\000", !8, !1, !5} ; [ DW_TAG_auto_variable ]
-!8 = !{!"0xb\002\0012\000", !12, !0} ; [ DW_TAG_lexical_block ]
-!9 = !MDLocation(line: 3, column: 11, scope: !8)
-!10 = !MDLocation(line: 4, column: 2, scope: !8)
+!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!6 = !{}
+!7 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3, scope: !8, file: !1, type: !5)
+!8 = distinct !DILexicalBlock(line: 2, column: 12, file: !12, scope: !0)
+!9 = !DILocation(line: 3, column: 11, scope: !8)
+!10 = !DILocation(line: 4, column: 2, scope: !8)
!11 = !{!0}
-!12 = !{!"/tmp/x.c", !"/Users/manav"}
-!13 = !{i32 1, !"Debug Info Version", i32 2}
+!12 = !DIFile(filename: "/tmp/x.c", directory: "/Users/manav")
+!13 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll b/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
index 4c7d2d092564..38e9832f526d 100644
--- a/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
+++ b/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
@@ -7,7 +7,7 @@ target triple = "msp430-unknown-linux-gnu"
define void @uip_arp_arpin() nounwind {
entry:
- %tmp = load volatile i16* @uip_len ; <i16> [#uses=1]
+ %tmp = load volatile i16, i16* @uip_len ; <i16> [#uses=1]
%cmp = icmp ult i16 %tmp, 42 ; <i1> [#uses=1]
store volatile i16 0, i16* @uip_len
br i1 %cmp, label %if.then, label %if.end
diff --git a/test/CodeGen/MSP430/2009-05-17-Rot.ll b/test/CodeGen/MSP430/2009-05-17-Rot.ll
index d622aa71164b..30b373990a75 100644
--- a/test/CodeGen/MSP430/2009-05-17-Rot.ll
+++ b/test/CodeGen/MSP430/2009-05-17-Rot.ll
@@ -4,14 +4,14 @@ define i16 @rol1u16(i16 %x.arg) nounwind {
%retval = alloca i16
%x = alloca i16
store i16 %x.arg, i16* %x
- %1 = load i16* %x
+ %1 = load i16, i16* %x
%2 = shl i16 %1, 1
- %3 = load i16* %x
+ %3 = load i16, i16* %x
%4 = lshr i16 %3, 15
%5 = or i16 %2, %4
store i16 %5, i16* %retval
br label %return
return:
- %6 = load i16* %retval
+ %6 = load i16, i16* %retval
ret i16 %6
}
diff --git a/test/CodeGen/MSP430/2009-05-17-Shift.ll b/test/CodeGen/MSP430/2009-05-17-Shift.ll
index e23df7851666..2e3dd5593ff0 100644
--- a/test/CodeGen/MSP430/2009-05-17-Shift.ll
+++ b/test/CodeGen/MSP430/2009-05-17-Shift.ll
@@ -4,12 +4,12 @@ define i16 @lsr2u16(i16 %x.arg) nounwind {
%retval = alloca i16
%x = alloca i16
store i16 %x.arg, i16* %x
- %1 = load i16* %x
+ %1 = load i16, i16* %x
%2 = lshr i16 %1, 2
store i16 %2, i16* %retval
br label %return
return:
- %3 = load i16* %retval
+ %3 = load i16, i16* %retval
ret i16 %3
}
diff --git a/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll b/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
index e8c0d14afd21..ca54ff0c3b48 100644
--- a/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
+++ b/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
@@ -7,7 +7,7 @@ define i16 @foo() nounwind readnone {
entry:
%result = alloca i16, align 1 ; <i16*> [#uses=2]
store volatile i16 0, i16* %result
- %tmp = load volatile i16* %result ; <i16> [#uses=1]
+ %tmp = load volatile i16, i16* %result ; <i16> [#uses=1]
ret i16 %tmp
}
@@ -23,7 +23,7 @@ while.cond: ; preds = %while.cond, %entry
while.end: ; preds = %while.cond
%result.i = alloca i16, align 1 ; <i16*> [#uses=2]
store volatile i16 0, i16* %result.i
- %tmp.i = load volatile i16* %result.i ; <i16> [#uses=0]
+ %tmp.i = load volatile i16, i16* %result.i ; <i16> [#uses=0]
ret i16 0
}
diff --git a/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll b/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
index 9fab4826e085..72ba335b54e1 100644
--- a/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
+++ b/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
@@ -11,12 +11,12 @@ entry:
%x.addr = alloca i8 ; <i8*> [#uses=2]
%tmp = alloca i8, align 1 ; <i8*> [#uses=2]
store i8 %x, i8* %x.addr
- %tmp1 = load volatile i8* @"\010x0021" ; <i8> [#uses=1]
+ %tmp1 = load volatile i8, i8* @"\010x0021" ; <i8> [#uses=1]
store i8 %tmp1, i8* %tmp
- %tmp2 = load i8* %x.addr ; <i8> [#uses=1]
+ %tmp2 = load i8, i8* %x.addr ; <i8> [#uses=1]
store volatile i8 %tmp2, i8* @"\010x0021"
- %tmp3 = load i8* %tmp ; <i8> [#uses=1]
+ %tmp3 = load i8, i8* %tmp ; <i8> [#uses=1]
store i8 %tmp3, i8* %retval
- %0 = load i8* %retval ; <i8> [#uses=1]
+ %0 = load i8, i8* %retval ; <i8> [#uses=1]
ret i8 %0
}
diff --git a/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll b/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
index c1a186a637cf..6dfbbfc03e90 100644
--- a/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
+++ b/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
@@ -4,7 +4,7 @@ define void @foo() nounwind {
entry:
%r = alloca i8 ; <i8*> [#uses=2]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- load volatile i8* %r, align 1 ; <i8>:0 [#uses=1]
+ load volatile i8, i8* %r, align 1 ; <i8>:0 [#uses=1]
or i8 %0, 1 ; <i8>:1 [#uses=1]
store volatile i8 %1, i8* %r, align 1
br label %return
diff --git a/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll b/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll
index d232aeae5b51..04b087e95363 100644
--- a/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll
+++ b/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll
@@ -46,9 +46,9 @@ while.cond36.i: ; preds = %while.body41.i, %wh
br i1 undef, label %do.body, label %while.body41.i
while.body41.i: ; preds = %while.cond36.i
- %tmp43.i = load i8** @foo ; <i8*> [#uses=2]
- %tmp44.i = load i8* %tmp43.i ; <i8> [#uses=1]
- %ptrincdec50.i = getelementptr inbounds i8* %tmp43.i, i16 1 ; <i8*> [#uses=1]
+ %tmp43.i = load i8*, i8** @foo ; <i8*> [#uses=2]
+ %tmp44.i = load i8, i8* %tmp43.i ; <i8> [#uses=1]
+ %ptrincdec50.i = getelementptr inbounds i8, i8* %tmp43.i, i16 1 ; <i8*> [#uses=1]
store i8* %ptrincdec50.i, i8** @foo
%cmp55.i = icmp eq i8 %tmp44.i, %c ; <i1> [#uses=1]
br i1 %cmp55.i, label %do.end41, label %while.cond36.i
diff --git a/test/CodeGen/MSP430/2009-12-22-InlineAsm.ll b/test/CodeGen/MSP430/2009-12-22-InlineAsm.ll
index a9df1a3e9743..fa9d0c8e46cb 100644
--- a/test/CodeGen/MSP430/2009-12-22-InlineAsm.ll
+++ b/test/CodeGen/MSP430/2009-12-22-InlineAsm.ll
@@ -8,8 +8,8 @@ target triple = "msp430-unknown-unknown"
define i16 @main() noreturn nounwind {
entry:
- %0 = tail call i8* asm "", "=r,0"(i8* getelementptr inbounds ([10 x i8]* @buf, i16 0, i16 0)) nounwind ; <i8*> [#uses=1]
- %sub.ptr = getelementptr inbounds i8* %0, i16 1 ; <i8*> [#uses=1]
+ %0 = tail call i8* asm "", "=r,0"(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @buf, i16 0, i16 0)) nounwind ; <i8*> [#uses=1]
+ %sub.ptr = getelementptr inbounds i8, i8* %0, i16 1 ; <i8*> [#uses=1]
%sub.ptr.lhs.cast = ptrtoint i8* %sub.ptr to i16 ; <i16> [#uses=1]
%sub.ptr.sub = sub i16 %sub.ptr.lhs.cast, ptrtoint ([10 x i8]* @buf to i16) ; <i16> [#uses=1]
%cmp = icmp eq i16 %sub.ptr.sub, 1 ; <i1> [#uses=1]
diff --git a/test/CodeGen/MSP430/2010-05-01-CombinerAnd.ll b/test/CodeGen/MSP430/2010-05-01-CombinerAnd.ll
index 99100377034b..907d6abe9921 100644
--- a/test/CodeGen/MSP430/2010-05-01-CombinerAnd.ll
+++ b/test/CodeGen/MSP430/2010-05-01-CombinerAnd.ll
@@ -19,7 +19,7 @@ land.end: ; preds = %land.rhs, %while.co
br i1 %0, label %while.body, label %while.end
while.body: ; preds = %land.end
- %tmp4 = load i16* undef ; <i16> [#uses=0]
+ %tmp4 = load i16, i16* undef ; <i16> [#uses=0]
br label %while.cond
while.end: ; preds = %land.end
diff --git a/test/CodeGen/MSP430/AddrMode-bis-rx.ll b/test/CodeGen/MSP430/AddrMode-bis-rx.ll
index 44c92ebc82cc..941ee2dc2ce9 100644
--- a/test/CodeGen/MSP430/AddrMode-bis-rx.ll
+++ b/test/CodeGen/MSP430/AddrMode-bis-rx.ll
@@ -3,7 +3,7 @@ target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:16"
target triple = "msp430-generic-generic"
define i16 @am1(i16 %x, i16* %a) nounwind {
- %1 = load i16* %a
+ %1 = load i16, i16* %a
%2 = or i16 %1,%x
ret i16 %2
}
@@ -13,7 +13,7 @@ define i16 @am1(i16 %x, i16* %a) nounwind {
@foo = external global i16
define i16 @am2(i16 %x) nounwind {
- %1 = load i16* @foo
+ %1 = load i16, i16* @foo
%2 = or i16 %1,%x
ret i16 %2
}
@@ -23,8 +23,8 @@ define i16 @am2(i16 %x) nounwind {
@bar = internal constant [2 x i8] [ i8 32, i8 64 ]
define i8 @am3(i8 %x, i16 %n) nounwind {
- %1 = getelementptr [2 x i8]* @bar, i16 0, i16 %n
- %2 = load i8* %1
+ %1 = getelementptr [2 x i8], [2 x i8]* @bar, i16 0, i16 %n
+ %2 = load i8, i8* %1
%3 = or i8 %2,%x
ret i8 %3
}
@@ -32,7 +32,7 @@ define i8 @am3(i8 %x, i16 %n) nounwind {
; CHECK: bis.b bar(r14), r15
define i16 @am4(i16 %x) nounwind {
- %1 = load volatile i16* inttoptr(i16 32 to i16*)
+ %1 = load volatile i16, i16* inttoptr(i16 32 to i16*)
%2 = or i16 %1,%x
ret i16 %2
}
@@ -40,8 +40,8 @@ define i16 @am4(i16 %x) nounwind {
; CHECK: bis.w &32, r15
define i16 @am5(i16 %x, i16* %a) nounwind {
- %1 = getelementptr i16* %a, i16 2
- %2 = load i16* %1
+ %1 = getelementptr i16, i16* %a, i16 2
+ %2 = load i16, i16* %1
%3 = or i16 %2,%x
ret i16 %3
}
@@ -52,7 +52,7 @@ define i16 @am5(i16 %x, i16* %a) nounwind {
@baz = common global %S zeroinitializer, align 1
define i16 @am6(i16 %x) nounwind {
- %1 = load i16* getelementptr (%S* @baz, i32 0, i32 1)
+ %1 = load i16, i16* getelementptr (%S, %S* @baz, i32 0, i32 1)
%2 = or i16 %1,%x
ret i16 %2
}
@@ -63,9 +63,9 @@ define i16 @am6(i16 %x) nounwind {
@duh = internal constant %T { i16 16, [2 x i8][i8 32, i8 64 ] }
define i8 @am7(i8 %x, i16 %n) nounwind {
- %1 = getelementptr %T* @duh, i32 0, i32 1
- %2 = getelementptr [2 x i8]* %1, i16 0, i16 %n
- %3= load i8* %2
+ %1 = getelementptr %T, %T* @duh, i32 0, i32 1
+ %2 = getelementptr [2 x i8], [2 x i8]* %1, i16 0, i16 %n
+ %3= load i8, i8* %2
%4 = or i8 %3,%x
ret i8 %4
}
diff --git a/test/CodeGen/MSP430/AddrMode-bis-xr.ll b/test/CodeGen/MSP430/AddrMode-bis-xr.ll
index 06a3d32d8aa4..4b8f367a8880 100644
--- a/test/CodeGen/MSP430/AddrMode-bis-xr.ll
+++ b/test/CodeGen/MSP430/AddrMode-bis-xr.ll
@@ -3,7 +3,7 @@ target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:16"
target triple = "msp430-generic-generic"
define void @am1(i16* %a, i16 %x) nounwind {
- %1 = load i16* %a
+ %1 = load i16, i16* %a
%2 = or i16 %x, %1
store i16 %2, i16* %a
ret void
@@ -14,7 +14,7 @@ define void @am1(i16* %a, i16 %x) nounwind {
@foo = external global i16
define void @am2(i16 %x) nounwind {
- %1 = load i16* @foo
+ %1 = load i16, i16* @foo
%2 = or i16 %x, %1
store i16 %2, i16* @foo
ret void
@@ -25,8 +25,8 @@ define void @am2(i16 %x) nounwind {
@bar = external global [2 x i8]
define void @am3(i16 %i, i8 %x) nounwind {
- %1 = getelementptr [2 x i8]* @bar, i16 0, i16 %i
- %2 = load i8* %1
+ %1 = getelementptr [2 x i8], [2 x i8]* @bar, i16 0, i16 %i
+ %2 = load i8, i8* %1
%3 = or i8 %x, %2
store i8 %3, i8* %1
ret void
@@ -35,7 +35,7 @@ define void @am3(i16 %i, i8 %x) nounwind {
; CHECK: bis.b r14, bar(r15)
define void @am4(i16 %x) nounwind {
- %1 = load volatile i16* inttoptr(i16 32 to i16*)
+ %1 = load volatile i16, i16* inttoptr(i16 32 to i16*)
%2 = or i16 %x, %1
store volatile i16 %2, i16* inttoptr(i16 32 to i16*)
ret void
@@ -44,8 +44,8 @@ define void @am4(i16 %x) nounwind {
; CHECK: bis.w r15, &32
define void @am5(i16* %a, i16 %x) readonly {
- %1 = getelementptr inbounds i16* %a, i16 2
- %2 = load i16* %1
+ %1 = getelementptr inbounds i16, i16* %a, i16 2
+ %2 = load i16, i16* %1
%3 = or i16 %x, %2
store i16 %3, i16* %1
ret void
@@ -57,9 +57,9 @@ define void @am5(i16* %a, i16 %x) readonly {
@baz = common global %S zeroinitializer
define void @am6(i16 %x) nounwind {
- %1 = load i16* getelementptr (%S* @baz, i32 0, i32 1)
+ %1 = load i16, i16* getelementptr (%S, %S* @baz, i32 0, i32 1)
%2 = or i16 %x, %1
- store i16 %2, i16* getelementptr (%S* @baz, i32 0, i32 1)
+ store i16 %2, i16* getelementptr (%S, %S* @baz, i32 0, i32 1)
ret void
}
; CHECK-LABEL: am6:
@@ -69,9 +69,9 @@ define void @am6(i16 %x) nounwind {
@duh = external global %T
define void @am7(i16 %n, i8 %x) nounwind {
- %1 = getelementptr %T* @duh, i32 0, i32 1
- %2 = getelementptr [2 x i8]* %1, i16 0, i16 %n
- %3 = load i8* %2
+ %1 = getelementptr %T, %T* @duh, i32 0, i32 1
+ %2 = getelementptr [2 x i8], [2 x i8]* %1, i16 0, i16 %n
+ %3 = load i8, i8* %2
%4 = or i8 %x, %3
store i8 %4, i8* %2
ret void
diff --git a/test/CodeGen/MSP430/AddrMode-mov-rx.ll b/test/CodeGen/MSP430/AddrMode-mov-rx.ll
index 378b7ae58ff6..cdee931bf96d 100644
--- a/test/CodeGen/MSP430/AddrMode-mov-rx.ll
+++ b/test/CodeGen/MSP430/AddrMode-mov-rx.ll
@@ -3,7 +3,7 @@ target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:16"
target triple = "msp430-generic-generic"
define i16 @am1(i16* %a) nounwind {
- %1 = load i16* %a
+ %1 = load i16, i16* %a
ret i16 %1
}
; CHECK-LABEL: am1:
@@ -12,7 +12,7 @@ define i16 @am1(i16* %a) nounwind {
@foo = external global i16
define i16 @am2() nounwind {
- %1 = load i16* @foo
+ %1 = load i16, i16* @foo
ret i16 %1
}
; CHECK-LABEL: am2:
@@ -21,23 +21,23 @@ define i16 @am2() nounwind {
@bar = internal constant [2 x i8] [ i8 32, i8 64 ]
define i8 @am3(i16 %n) nounwind {
- %1 = getelementptr [2 x i8]* @bar, i16 0, i16 %n
- %2 = load i8* %1
+ %1 = getelementptr [2 x i8], [2 x i8]* @bar, i16 0, i16 %n
+ %2 = load i8, i8* %1
ret i8 %2
}
; CHECK-LABEL: am3:
; CHECK: mov.b bar(r15), r15
define i16 @am4() nounwind {
- %1 = load volatile i16* inttoptr(i16 32 to i16*)
+ %1 = load volatile i16, i16* inttoptr(i16 32 to i16*)
ret i16 %1
}
; CHECK-LABEL: am4:
; CHECK: mov.w &32, r15
define i16 @am5(i16* %a) nounwind {
- %1 = getelementptr i16* %a, i16 2
- %2 = load i16* %1
+ %1 = getelementptr i16, i16* %a, i16 2
+ %2 = load i16, i16* %1
ret i16 %2
}
; CHECK-LABEL: am5:
@@ -47,7 +47,7 @@ define i16 @am5(i16* %a) nounwind {
@baz = common global %S zeroinitializer, align 1
define i16 @am6() nounwind {
- %1 = load i16* getelementptr (%S* @baz, i32 0, i32 1)
+ %1 = load i16, i16* getelementptr (%S, %S* @baz, i32 0, i32 1)
ret i16 %1
}
; CHECK-LABEL: am6:
@@ -57,9 +57,9 @@ define i16 @am6() nounwind {
@duh = internal constant %T { i16 16, [2 x i8][i8 32, i8 64 ] }
define i8 @am7(i16 %n) nounwind {
- %1 = getelementptr %T* @duh, i32 0, i32 1
- %2 = getelementptr [2 x i8]* %1, i16 0, i16 %n
- %3= load i8* %2
+ %1 = getelementptr %T, %T* @duh, i32 0, i32 1
+ %2 = getelementptr [2 x i8], [2 x i8]* %1, i16 0, i16 %n
+ %3= load i8, i8* %2
ret i8 %3
}
; CHECK-LABEL: am7:
diff --git a/test/CodeGen/MSP430/AddrMode-mov-xr.ll b/test/CodeGen/MSP430/AddrMode-mov-xr.ll
index f55fd542645c..ccb42886e9b4 100644
--- a/test/CodeGen/MSP430/AddrMode-mov-xr.ll
+++ b/test/CodeGen/MSP430/AddrMode-mov-xr.ll
@@ -21,7 +21,7 @@ define void @am2(i16 %a) nounwind {
@bar = external global [2 x i8]
define void @am3(i16 %i, i8 %a) nounwind {
- %1 = getelementptr [2 x i8]* @bar, i16 0, i16 %i
+ %1 = getelementptr [2 x i8], [2 x i8]* @bar, i16 0, i16 %i
store i8 %a, i8* %1
ret void
}
@@ -36,7 +36,7 @@ define void @am4(i16 %a) nounwind {
; CHECK: mov.w r15, &32
define void @am5(i16* nocapture %p, i16 %a) nounwind readonly {
- %1 = getelementptr inbounds i16* %p, i16 2
+ %1 = getelementptr inbounds i16, i16* %p, i16 2
store i16 %a, i16* %1
ret void
}
@@ -47,7 +47,7 @@ define void @am5(i16* nocapture %p, i16 %a) nounwind readonly {
@baz = common global %S zeroinitializer, align 1
define void @am6(i16 %a) nounwind {
- store i16 %a, i16* getelementptr (%S* @baz, i32 0, i32 1)
+ store i16 %a, i16* getelementptr (%S, %S* @baz, i32 0, i32 1)
ret void
}
; CHECK-LABEL: am6:
@@ -57,8 +57,8 @@ define void @am6(i16 %a) nounwind {
@duh = external global %T
define void @am7(i16 %n, i8 %a) nounwind {
- %1 = getelementptr %T* @duh, i32 0, i32 1
- %2 = getelementptr [2 x i8]* %1, i16 0, i16 %n
+ %1 = getelementptr %T, %T* @duh, i32 0, i32 1
+ %2 = getelementptr [2 x i8], [2 x i8]* %1, i16 0, i16 %n
store i8 %a, i8* %2
ret void
}
diff --git a/test/CodeGen/MSP430/Inst16mi.ll b/test/CodeGen/MSP430/Inst16mi.ll
index e9ab75cc80bc..38c16f2ba235 100644
--- a/test/CodeGen/MSP430/Inst16mi.ll
+++ b/test/CodeGen/MSP430/Inst16mi.ll
@@ -14,7 +14,7 @@ define void @mov() nounwind {
define void @add() nounwind {
; CHECK-LABEL: add:
; CHECK: add.w #2, &foo
- %1 = load i16* @foo
+ %1 = load i16, i16* @foo
%2 = add i16 %1, 2
store i16 %2, i16 * @foo
ret void
@@ -23,7 +23,7 @@ define void @add() nounwind {
define void @and() nounwind {
; CHECK-LABEL: and:
; CHECK: and.w #2, &foo
- %1 = load i16* @foo
+ %1 = load i16, i16* @foo
%2 = and i16 %1, 2
store i16 %2, i16 * @foo
ret void
@@ -32,7 +32,7 @@ define void @and() nounwind {
define void @bis() nounwind {
; CHECK-LABEL: bis:
; CHECK: bis.w #2, &foo
- %1 = load i16* @foo
+ %1 = load i16, i16* @foo
%2 = or i16 %1, 2
store i16 %2, i16 * @foo
ret void
@@ -41,7 +41,7 @@ define void @bis() nounwind {
define void @xor() nounwind {
; CHECK-LABEL: xor:
; CHECK: xor.w #2, &foo
- %1 = load i16* @foo
+ %1 = load i16, i16* @foo
%2 = xor i16 %1, 2
store i16 %2, i16 * @foo
ret void
diff --git a/test/CodeGen/MSP430/Inst16mm.ll b/test/CodeGen/MSP430/Inst16mm.ll
index 5c93e37bfa96..c75e1beb2356 100644
--- a/test/CodeGen/MSP430/Inst16mm.ll
+++ b/test/CodeGen/MSP430/Inst16mm.ll
@@ -7,7 +7,7 @@ target triple = "msp430-generic-generic"
define void @mov() nounwind {
; CHECK-LABEL: mov:
; CHECK: mov.w &bar, &foo
- %1 = load i16* @bar
+ %1 = load i16, i16* @bar
store i16 %1, i16* @foo
ret void
}
@@ -15,8 +15,8 @@ define void @mov() nounwind {
define void @add() nounwind {
; CHECK-LABEL: add:
; CHECK: add.w &bar, &foo
- %1 = load i16* @bar
- %2 = load i16* @foo
+ %1 = load i16, i16* @bar
+ %2 = load i16, i16* @foo
%3 = add i16 %2, %1
store i16 %3, i16* @foo
ret void
@@ -25,8 +25,8 @@ define void @add() nounwind {
define void @and() nounwind {
; CHECK-LABEL: and:
; CHECK: and.w &bar, &foo
- %1 = load i16* @bar
- %2 = load i16* @foo
+ %1 = load i16, i16* @bar
+ %2 = load i16, i16* @foo
%3 = and i16 %2, %1
store i16 %3, i16* @foo
ret void
@@ -35,8 +35,8 @@ define void @and() nounwind {
define void @bis() nounwind {
; CHECK-LABEL: bis:
; CHECK: bis.w &bar, &foo
- %1 = load i16* @bar
- %2 = load i16* @foo
+ %1 = load i16, i16* @bar
+ %2 = load i16, i16* @foo
%3 = or i16 %2, %1
store i16 %3, i16* @foo
ret void
@@ -45,8 +45,8 @@ define void @bis() nounwind {
define void @xor() nounwind {
; CHECK-LABEL: xor:
; CHECK: xor.w &bar, &foo
- %1 = load i16* @bar
- %2 = load i16* @foo
+ %1 = load i16, i16* @bar
+ %2 = load i16, i16* @foo
%3 = xor i16 %2, %1
store i16 %3, i16* @foo
ret void
@@ -58,10 +58,10 @@ entry:
%x = alloca i32, align 2 ; <i32*> [#uses=1]
%y = alloca i32, align 2 ; <i32*> [#uses=1]
store i16 0, i16* %retval
- %tmp = load i32* %y ; <i32> [#uses=1]
+ %tmp = load i32, i32* %y ; <i32> [#uses=1]
store i32 %tmp, i32* %x
store i16 0, i16* %retval
- %0 = load i16* %retval ; <i16> [#uses=1]
+ %0 = load i16, i16* %retval ; <i16> [#uses=1]
ret i16 %0
; CHECK-LABEL: mov2:
; CHECK: mov.w 2(r1), 6(r1)
diff --git a/test/CodeGen/MSP430/Inst16mr.ll b/test/CodeGen/MSP430/Inst16mr.ll
index 201004893684..50dc4c0b6731 100644
--- a/test/CodeGen/MSP430/Inst16mr.ll
+++ b/test/CodeGen/MSP430/Inst16mr.ll
@@ -13,7 +13,7 @@ define void @mov(i16 %a) nounwind {
define void @add(i16 %a) nounwind {
; CHECK-LABEL: add:
; CHECK: add.w r15, &foo
- %1 = load i16* @foo
+ %1 = load i16, i16* @foo
%2 = add i16 %a, %1
store i16 %2, i16* @foo
ret void
@@ -22,7 +22,7 @@ define void @add(i16 %a) nounwind {
define void @and(i16 %a) nounwind {
; CHECK-LABEL: and:
; CHECK: and.w r15, &foo
- %1 = load i16* @foo
+ %1 = load i16, i16* @foo
%2 = and i16 %a, %1
store i16 %2, i16* @foo
ret void
@@ -31,7 +31,7 @@ define void @and(i16 %a) nounwind {
define void @bis(i16 %a) nounwind {
; CHECK-LABEL: bis:
; CHECK: bis.w r15, &foo
- %1 = load i16* @foo
+ %1 = load i16, i16* @foo
%2 = or i16 %a, %1
store i16 %2, i16* @foo
ret void
@@ -41,7 +41,7 @@ define void @bic(i16 zeroext %m) nounwind {
; CHECK-LABEL: bic:
; CHECK: bic.w r15, &foo
%1 = xor i16 %m, -1
- %2 = load i16* @foo
+ %2 = load i16, i16* @foo
%3 = and i16 %2, %1
store i16 %3, i16* @foo
ret void
@@ -50,7 +50,7 @@ define void @bic(i16 zeroext %m) nounwind {
define void @xor(i16 %a) nounwind {
; CHECK-LABEL: xor:
; CHECK: xor.w r15, &foo
- %1 = load i16* @foo
+ %1 = load i16, i16* @foo
%2 = xor i16 %a, %1
store i16 %2, i16* @foo
ret void
diff --git a/test/CodeGen/MSP430/Inst16rm.ll b/test/CodeGen/MSP430/Inst16rm.ll
index e6c52616c8f6..4f6998ee68df 100644
--- a/test/CodeGen/MSP430/Inst16rm.ll
+++ b/test/CodeGen/MSP430/Inst16rm.ll
@@ -6,7 +6,7 @@ target triple = "msp430-generic-generic"
define i16 @add(i16 %a) nounwind {
; CHECK-LABEL: add:
; CHECK: add.w &foo, r15
- %1 = load i16* @foo
+ %1 = load i16, i16* @foo
%2 = add i16 %a, %1
ret i16 %2
}
@@ -14,7 +14,7 @@ define i16 @add(i16 %a) nounwind {
define i16 @and(i16 %a) nounwind {
; CHECK-LABEL: and:
; CHECK: and.w &foo, r15
- %1 = load i16* @foo
+ %1 = load i16, i16* @foo
%2 = and i16 %a, %1
ret i16 %2
}
@@ -22,7 +22,7 @@ define i16 @and(i16 %a) nounwind {
define i16 @bis(i16 %a) nounwind {
; CHECK-LABEL: bis:
; CHECK: bis.w &foo, r15
- %1 = load i16* @foo
+ %1 = load i16, i16* @foo
%2 = or i16 %a, %1
ret i16 %2
}
@@ -30,7 +30,7 @@ define i16 @bis(i16 %a) nounwind {
define i16 @bic(i16 %a) nounwind {
; CHECK-LABEL: bic:
; CHECK: bic.w &foo, r15
- %1 = load i16* @foo
+ %1 = load i16, i16* @foo
%2 = xor i16 %1, -1
%3 = and i16 %a, %2
ret i16 %3
@@ -39,7 +39,7 @@ define i16 @bic(i16 %a) nounwind {
define i16 @xor(i16 %a) nounwind {
; CHECK-LABEL: xor:
; CHECK: xor.w &foo, r15
- %1 = load i16* @foo
+ %1 = load i16, i16* @foo
%2 = xor i16 %a, %1
ret i16 %2
}
diff --git a/test/CodeGen/MSP430/Inst8mi.ll b/test/CodeGen/MSP430/Inst8mi.ll
index a2c7b71d66dc..ff22d7e1eb3d 100644
--- a/test/CodeGen/MSP430/Inst8mi.ll
+++ b/test/CodeGen/MSP430/Inst8mi.ll
@@ -13,7 +13,7 @@ define void @mov() nounwind {
define void @add() nounwind {
; CHECK-LABEL: add:
; CHECK: add.b #2, &foo
- %1 = load i8* @foo
+ %1 = load i8, i8* @foo
%2 = add i8 %1, 2
store i8 %2, i8 * @foo
ret void
@@ -22,7 +22,7 @@ define void @add() nounwind {
define void @and() nounwind {
; CHECK-LABEL: and:
; CHECK: and.b #2, &foo
- %1 = load i8* @foo
+ %1 = load i8, i8* @foo
%2 = and i8 %1, 2
store i8 %2, i8 * @foo
ret void
@@ -31,7 +31,7 @@ define void @and() nounwind {
define void @bis() nounwind {
; CHECK-LABEL: bis:
; CHECK: bis.b #2, &foo
- %1 = load i8* @foo
+ %1 = load i8, i8* @foo
%2 = or i8 %1, 2
store i8 %2, i8 * @foo
ret void
@@ -40,7 +40,7 @@ define void @bis() nounwind {
define void @xor() nounwind {
; CHECK-LABEL: xor:
; CHECK: xor.b #2, &foo
- %1 = load i8* @foo
+ %1 = load i8, i8* @foo
%2 = xor i8 %1, 2
store i8 %2, i8 * @foo
ret void
diff --git a/test/CodeGen/MSP430/Inst8mm.ll b/test/CodeGen/MSP430/Inst8mm.ll
index d1ce8bc66b93..b9848dc12303 100644
--- a/test/CodeGen/MSP430/Inst8mm.ll
+++ b/test/CodeGen/MSP430/Inst8mm.ll
@@ -8,7 +8,7 @@ target triple = "msp430-generic-generic"
define void @mov() nounwind {
; CHECK-LABEL: mov:
; CHECK: mov.b &bar, &foo
- %1 = load i8* @bar
+ %1 = load i8, i8* @bar
store i8 %1, i8* @foo
ret void
}
@@ -16,8 +16,8 @@ define void @mov() nounwind {
define void @add() nounwind {
; CHECK-LABEL: add:
; CHECK: add.b &bar, &foo
- %1 = load i8* @bar
- %2 = load i8* @foo
+ %1 = load i8, i8* @bar
+ %2 = load i8, i8* @foo
%3 = add i8 %2, %1
store i8 %3, i8* @foo
ret void
@@ -26,8 +26,8 @@ define void @add() nounwind {
define void @and() nounwind {
; CHECK-LABEL: and:
; CHECK: and.b &bar, &foo
- %1 = load i8* @bar
- %2 = load i8* @foo
+ %1 = load i8, i8* @bar
+ %2 = load i8, i8* @foo
%3 = and i8 %2, %1
store i8 %3, i8* @foo
ret void
@@ -36,8 +36,8 @@ define void @and() nounwind {
define void @bis() nounwind {
; CHECK-LABEL: bis:
; CHECK: bis.b &bar, &foo
- %1 = load i8* @bar
- %2 = load i8* @foo
+ %1 = load i8, i8* @bar
+ %2 = load i8, i8* @foo
%3 = or i8 %2, %1
store i8 %3, i8* @foo
ret void
@@ -46,8 +46,8 @@ define void @bis() nounwind {
define void @xor() nounwind {
; CHECK-LABEL: xor:
; CHECK: xor.b &bar, &foo
- %1 = load i8* @bar
- %2 = load i8* @foo
+ %1 = load i8, i8* @bar
+ %2 = load i8, i8* @foo
%3 = xor i8 %2, %1
store i8 %3, i8* @foo
ret void
diff --git a/test/CodeGen/MSP430/Inst8mr.ll b/test/CodeGen/MSP430/Inst8mr.ll
index 0b3566770cf0..f03c7e1a659b 100644
--- a/test/CodeGen/MSP430/Inst8mr.ll
+++ b/test/CodeGen/MSP430/Inst8mr.ll
@@ -13,7 +13,7 @@ define void @mov(i8 %a) nounwind {
define void @and(i8 %a) nounwind {
; CHECK-LABEL: and:
; CHECK: and.b r15, &foo
- %1 = load i8* @foo
+ %1 = load i8, i8* @foo
%2 = and i8 %a, %1
store i8 %2, i8* @foo
ret void
@@ -22,7 +22,7 @@ define void @and(i8 %a) nounwind {
define void @add(i8 %a) nounwind {
; CHECK-LABEL: add:
; CHECK: add.b r15, &foo
- %1 = load i8* @foo
+ %1 = load i8, i8* @foo
%2 = add i8 %a, %1
store i8 %2, i8* @foo
ret void
@@ -31,7 +31,7 @@ define void @add(i8 %a) nounwind {
define void @bis(i8 %a) nounwind {
; CHECK-LABEL: bis:
; CHECK: bis.b r15, &foo
- %1 = load i8* @foo
+ %1 = load i8, i8* @foo
%2 = or i8 %a, %1
store i8 %2, i8* @foo
ret void
@@ -41,7 +41,7 @@ define void @bic(i8 zeroext %m) nounwind {
; CHECK-LABEL: bic:
; CHECK: bic.b r15, &foo
%1 = xor i8 %m, -1
- %2 = load i8* @foo
+ %2 = load i8, i8* @foo
%3 = and i8 %2, %1
store i8 %3, i8* @foo
ret void
@@ -50,7 +50,7 @@ define void @bic(i8 zeroext %m) nounwind {
define void @xor(i8 %a) nounwind {
; CHECK-LABEL: xor:
; CHECK: xor.b r15, &foo
- %1 = load i8* @foo
+ %1 = load i8, i8* @foo
%2 = xor i8 %a, %1
store i8 %2, i8* @foo
ret void
diff --git a/test/CodeGen/MSP430/Inst8rm.ll b/test/CodeGen/MSP430/Inst8rm.ll
index 308163ed7307..e1a970395578 100644
--- a/test/CodeGen/MSP430/Inst8rm.ll
+++ b/test/CodeGen/MSP430/Inst8rm.ll
@@ -6,7 +6,7 @@ target triple = "msp430-generic-generic"
define i8 @add(i8 %a) nounwind {
; CHECK-LABEL: add:
; CHECK: add.b &foo, r15
- %1 = load i8* @foo
+ %1 = load i8, i8* @foo
%2 = add i8 %a, %1
ret i8 %2
}
@@ -14,7 +14,7 @@ define i8 @add(i8 %a) nounwind {
define i8 @and(i8 %a) nounwind {
; CHECK-LABEL: and:
; CHECK: and.b &foo, r15
- %1 = load i8* @foo
+ %1 = load i8, i8* @foo
%2 = and i8 %a, %1
ret i8 %2
}
@@ -22,7 +22,7 @@ define i8 @and(i8 %a) nounwind {
define i8 @bis(i8 %a) nounwind {
; CHECK-LABEL: bis:
; CHECK: bis.b &foo, r15
- %1 = load i8* @foo
+ %1 = load i8, i8* @foo
%2 = or i8 %a, %1
ret i8 %2
}
@@ -30,7 +30,7 @@ define i8 @bis(i8 %a) nounwind {
define i8 @bic(i8 %a) nounwind {
; CHECK-LABEL: bic:
; CHECK: bic.b &foo, r15
- %1 = load i8* @foo
+ %1 = load i8, i8* @foo
%2 = xor i8 %1, -1
%3 = and i8 %a, %2
ret i8 %3
@@ -39,7 +39,7 @@ define i8 @bic(i8 %a) nounwind {
define i8 @xor(i8 %a) nounwind {
; CHECK-LABEL: xor:
; CHECK: xor.b &foo, r15
- %1 = load i8* @foo
+ %1 = load i8, i8* @foo
%2 = xor i8 %a, %1
ret i8 %2
}
diff --git a/test/CodeGen/MSP430/bit.ll b/test/CodeGen/MSP430/bit.ll
index 2ffc191695f0..45964f97f1bf 100644
--- a/test/CodeGen/MSP430/bit.ll
+++ b/test/CodeGen/MSP430/bit.ll
@@ -33,7 +33,7 @@ define i8 @bitbir(i8 %a) nounwind {
; CHECK: bit.b #15, r15
define i8 @bitbmi() nounwind {
- %t1 = load i8* @foo8
+ %t1 = load i8, i8* @foo8
%t2 = and i8 %t1, 15
%t3 = icmp ne i8 %t2, 0
%t4 = zext i1 %t3 to i8
@@ -43,7 +43,7 @@ define i8 @bitbmi() nounwind {
; CHECK: bit.b #15, &foo8
define i8 @bitbim() nounwind {
- %t1 = load i8* @foo8
+ %t1 = load i8, i8* @foo8
%t2 = and i8 15, %t1
%t3 = icmp ne i8 %t2, 0
%t4 = zext i1 %t3 to i8
@@ -53,7 +53,7 @@ define i8 @bitbim() nounwind {
; CHECK: bit.b #15, &foo8
define i8 @bitbrm(i8 %a) nounwind {
- %t1 = load i8* @foo8
+ %t1 = load i8, i8* @foo8
%t2 = and i8 %a, %t1
%t3 = icmp ne i8 %t2, 0
%t4 = zext i1 %t3 to i8
@@ -63,7 +63,7 @@ define i8 @bitbrm(i8 %a) nounwind {
; CHECK: bit.b &foo8, r15
define i8 @bitbmr(i8 %a) nounwind {
- %t1 = load i8* @foo8
+ %t1 = load i8, i8* @foo8
%t2 = and i8 %t1, %a
%t3 = icmp ne i8 %t2, 0
%t4 = zext i1 %t3 to i8
@@ -73,8 +73,8 @@ define i8 @bitbmr(i8 %a) nounwind {
; CHECK: bit.b r15, &foo8
define i8 @bitbmm() nounwind {
- %t1 = load i8* @foo8
- %t2 = load i8* @bar8
+ %t1 = load i8, i8* @foo8
+ %t2 = load i8, i8* @bar8
%t3 = and i8 %t1, %t2
%t4 = icmp ne i8 %t3, 0
%t5 = zext i1 %t4 to i8
@@ -114,7 +114,7 @@ define i16 @bitwir(i16 %a) nounwind {
; CHECK: bit.w #4080, r15
define i16 @bitwmi() nounwind {
- %t1 = load i16* @foo16
+ %t1 = load i16, i16* @foo16
%t2 = and i16 %t1, 4080
%t3 = icmp ne i16 %t2, 0
%t4 = zext i1 %t3 to i16
@@ -124,7 +124,7 @@ define i16 @bitwmi() nounwind {
; CHECK: bit.w #4080, &foo16
define i16 @bitwim() nounwind {
- %t1 = load i16* @foo16
+ %t1 = load i16, i16* @foo16
%t2 = and i16 4080, %t1
%t3 = icmp ne i16 %t2, 0
%t4 = zext i1 %t3 to i16
@@ -134,7 +134,7 @@ define i16 @bitwim() nounwind {
; CHECK: bit.w #4080, &foo16
define i16 @bitwrm(i16 %a) nounwind {
- %t1 = load i16* @foo16
+ %t1 = load i16, i16* @foo16
%t2 = and i16 %a, %t1
%t3 = icmp ne i16 %t2, 0
%t4 = zext i1 %t3 to i16
@@ -144,7 +144,7 @@ define i16 @bitwrm(i16 %a) nounwind {
; CHECK: bit.w &foo16, r15
define i16 @bitwmr(i16 %a) nounwind {
- %t1 = load i16* @foo16
+ %t1 = load i16, i16* @foo16
%t2 = and i16 %t1, %a
%t3 = icmp ne i16 %t2, 0
%t4 = zext i1 %t3 to i16
@@ -154,8 +154,8 @@ define i16 @bitwmr(i16 %a) nounwind {
; CHECK: bit.w r15, &foo16
define i16 @bitwmm() nounwind {
- %t1 = load i16* @foo16
- %t2 = load i16* @bar16
+ %t1 = load i16, i16* @foo16
+ %t2 = load i16, i16* @bar16
%t3 = and i16 %t1, %t2
%t4 = icmp ne i16 %t3, 0
%t5 = zext i1 %t4 to i16
diff --git a/test/CodeGen/MSP430/byval.ll b/test/CodeGen/MSP430/byval.ll
index bd38e95554df..410a6b047b6e 100644
--- a/test/CodeGen/MSP430/byval.ll
+++ b/test/CodeGen/MSP430/byval.ll
@@ -10,8 +10,8 @@ define i16 @callee(%struct.Foo* byval %f) nounwind {
entry:
; CHECK-LABEL: callee:
; CHECK: mov.w 2(r1), r15
- %0 = getelementptr inbounds %struct.Foo* %f, i32 0, i32 0
- %1 = load i16* %0, align 2
+ %0 = getelementptr inbounds %struct.Foo, %struct.Foo* %f, i32 0, i32 0
+ %1 = load i16, i16* %0, align 2
ret i16 %1
}
diff --git a/test/CodeGen/MSP430/indirectbr.ll b/test/CodeGen/MSP430/indirectbr.ll
index 2a62c9135c6a..af1a466b3c78 100644
--- a/test/CodeGen/MSP430/indirectbr.ll
+++ b/test/CodeGen/MSP430/indirectbr.ll
@@ -5,7 +5,7 @@
define internal i16 @foo(i16 %i) nounwind {
entry:
- %0 = load i8** @nextaddr, align 4 ; <i8*> [#uses=2]
+ %0 = load i8*, i8** @nextaddr, align 4 ; <i8*> [#uses=2]
%1 = icmp eq i8* %0, null ; <i1> [#uses=1]
br i1 %1, label %bb3, label %bb2
@@ -14,8 +14,8 @@ bb2: ; preds = %bb3, %entry
indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
bb3: ; preds = %entry
- %2 = getelementptr inbounds [5 x i8*]* @C.0.2070, i16 0, i16 %i ; <i8**> [#uses=1]
- %gotovar.4.0.pre = load i8** %2, align 4 ; <i8*> [#uses=1]
+ %2 = getelementptr inbounds [5 x i8*], [5 x i8*]* @C.0.2070, i16 0, i16 %i ; <i8**> [#uses=1]
+ %gotovar.4.0.pre = load i8*, i8** %2, align 4 ; <i8*> [#uses=1]
br label %bb2
L5: ; preds = %bb2
diff --git a/test/CodeGen/MSP430/indirectbr2.ll b/test/CodeGen/MSP430/indirectbr2.ll
index 93788b696553..b0b4f1cbfd24 100644
--- a/test/CodeGen/MSP430/indirectbr2.ll
+++ b/test/CodeGen/MSP430/indirectbr2.ll
@@ -3,8 +3,8 @@
define internal i16 @foo(i16 %i) nounwind {
entry:
- %tmp1 = getelementptr inbounds [5 x i8*]* @C.0.2070, i16 0, i16 %i ; <i8**> [#uses=1]
- %gotovar.4.0 = load i8** %tmp1, align 4 ; <i8*> [#uses=1]
+ %tmp1 = getelementptr inbounds [5 x i8*], [5 x i8*]* @C.0.2070, i16 0, i16 %i ; <i8**> [#uses=1]
+ %gotovar.4.0 = load i8*, i8** %tmp1, align 4 ; <i8*> [#uses=1]
; CHECK: br .LC.0.2070(r12)
indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
diff --git a/test/CodeGen/MSP430/inline-asm.ll b/test/CodeGen/MSP430/inline-asm.ll
index 0e7886a4721b..a2f13235b1d3 100644
--- a/test/CodeGen/MSP430/inline-asm.ll
+++ b/test/CodeGen/MSP430/inline-asm.ll
@@ -15,12 +15,12 @@ define void @reg(i16 %a) nounwind {
@foo = global i16 0, align 2
define void @immmem() nounwind {
- call void asm sideeffect "bic\09$0,r2", "i"(i16* getelementptr(i16* @foo, i32 1)) nounwind
+ call void asm sideeffect "bic\09$0,r2", "i"(i16* getelementptr(i16, i16* @foo, i32 1)) nounwind
ret void
}
define void @mem() nounwind {
- %fooval = load i16* @foo
+ %fooval = load i16, i16* @foo
call void asm sideeffect "bic\09$0,r2", "m"(i16 %fooval) nounwind
ret void
}
diff --git a/test/CodeGen/MSP430/jumptable.ll b/test/CodeGen/MSP430/jumptable.ll
index 239d79ed9cba..4ba930b04e39 100644
--- a/test/CodeGen/MSP430/jumptable.ll
+++ b/test/CodeGen/MSP430/jumptable.ll
@@ -10,7 +10,7 @@ entry:
%retval = alloca i16, align 2
%i.addr = alloca i16, align 2
store i16 %i, i16* %i.addr, align 2
- %0 = load i16* %i.addr, align 2
+ %0 = load i16, i16* %i.addr, align 2
; CHECK: mov.w #2, r14
; CHECK: call #__mulhi3hw_noint
; CHECK: br .LJTI0_0(r15)
@@ -42,7 +42,7 @@ sw.default: ; preds = %entry
br label %return
return: ; preds = %sw.default, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
- %1 = load i16* %retval
+ %1 = load i16, i16* %retval
ret i16 %1
; CHECK: ret
}
diff --git a/test/CodeGen/MSP430/memset.ll b/test/CodeGen/MSP430/memset.ll
index bf105446f52d..76cfb29586d7 100644
--- a/test/CodeGen/MSP430/memset.ll
+++ b/test/CodeGen/MSP430/memset.ll
@@ -8,7 +8,7 @@ target triple = "msp430---elf"
define void @test() nounwind {
entry:
; CHECK-LABEL: test:
- %0 = load i8** @buf, align 2
+ %0 = load i8*, i8** @buf, align 2
; CHECK: mov.w &buf, r15
; CHECK-NEXT: mov.w #5, r14
; CHECK-NEXT: mov.w #128, r13
diff --git a/test/CodeGen/MSP430/misched-msp430.ll b/test/CodeGen/MSP430/misched-msp430.ll
index c8541eff5836..3d18fa005a6b 100644
--- a/test/CodeGen/MSP430/misched-msp430.ll
+++ b/test/CodeGen/MSP430/misched-msp430.ll
@@ -14,7 +14,7 @@ target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"
; CHECK: ret
define void @f() {
entry:
- %0 = load i16* @y, align 2
+ %0 = load i16, i16* @y, align 2
store i16 %0, i16* @x, align 2
ret void
}
diff --git a/test/CodeGen/MSP430/mult-alt-generic-msp430.ll b/test/CodeGen/MSP430/mult-alt-generic-msp430.ll
index 342afed66053..8cf83879b090 100644
--- a/test/CodeGen/MSP430/mult-alt-generic-msp430.ll
+++ b/test/CodeGen/MSP430/mult-alt-generic-msp430.ll
@@ -33,10 +33,10 @@ entry:
%in1 = alloca i16, align 2
store i16 0, i16* %out0, align 2
store i16 1, i16* %in1, align 2
- %tmp = load i16* %in1, align 2
+ %tmp = load i16, i16* %in1, align 2
%0 = call i16 asm "foo $1,$0", "=r,<r"(i16 %tmp) nounwind
store i16 %0, i16* %out0, align 2
- %tmp1 = load i16* %in1, align 2
+ %tmp1 = load i16, i16* %in1, align 2
%1 = call i16 asm "foo $1,$0", "=r,r<"(i16 %tmp1) nounwind
store i16 %1, i16* %out0, align 2
ret void
@@ -48,10 +48,10 @@ entry:
%in1 = alloca i16, align 2
store i16 0, i16* %out0, align 2
store i16 1, i16* %in1, align 2
- %tmp = load i16* %in1, align 2
+ %tmp = load i16, i16* %in1, align 2
%0 = call i16 asm "foo $1,$0", "=r,>r"(i16 %tmp) nounwind
store i16 %0, i16* %out0, align 2
- %tmp1 = load i16* %in1, align 2
+ %tmp1 = load i16, i16* %in1, align 2
%1 = call i16 asm "foo $1,$0", "=r,r>"(i16 %tmp1) nounwind
store i16 %1, i16* %out0, align 2
ret void
@@ -63,7 +63,7 @@ entry:
%in1 = alloca i16, align 2
store i16 0, i16* %out0, align 2
store i16 1, i16* %in1, align 2
- %tmp = load i16* %in1, align 2
+ %tmp = load i16, i16* %in1, align 2
%0 = call i16 asm "foo $1,$0", "=r,r"(i16 %tmp) nounwind
store i16 %0, i16* %out0, align 2
ret void
@@ -120,10 +120,10 @@ entry:
%in1 = alloca i16, align 2
store i16 0, i16* %out0, align 2
store i16 1, i16* %in1, align 2
- %tmp = load i16* %in1, align 2
+ %tmp = load i16, i16* %in1, align 2
%0 = call i16 asm "foo $1,$0", "=r,imr"(i16 %tmp) nounwind
store i16 %0, i16* %out0, align 2
- %tmp1 = load i16* @min1, align 2
+ %tmp1 = load i16, i16* @min1, align 2
%1 = call i16 asm "foo $1,$0", "=r,imr"(i16 %tmp1) nounwind
store i16 %1, i16* %out0, align 2
%2 = call i16 asm "foo $1,$0", "=r,imr"(i16 1) nounwind
@@ -137,15 +137,15 @@ entry:
%in1 = alloca i16, align 2
store i16 0, i16* %out0, align 2
store i16 1, i16* %in1, align 2
- %tmp = load i16* %in1, align 2
+ %tmp = load i16, i16* %in1, align 2
%0 = call i16 asm "foo $1,$0", "=r,X"(i16 %tmp) nounwind
store i16 %0, i16* %out0, align 2
- %tmp1 = load i16* @min1, align 2
+ %tmp1 = load i16, i16* @min1, align 2
%1 = call i16 asm "foo $1,$0", "=r,X"(i16 %tmp1) nounwind
store i16 %1, i16* %out0, align 2
%2 = call i16 asm "foo $1,$0", "=r,X"(i16 1) nounwind
store i16 %2, i16* %out0, align 2
- %3 = call i16 asm "foo $1,$0", "=r,X"(i16* getelementptr inbounds ([2 x i16]* @marray, i32 0, i32 0)) nounwind
+ %3 = call i16 asm "foo $1,$0", "=r,X"(i16* getelementptr inbounds ([2 x i16], [2 x i16]* @marray, i32 0, i32 0)) nounwind
store i16 %3, i16* %out0, align 2
; No lowering support.
; %4 = call i16 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
@@ -159,14 +159,14 @@ define void @single_p() nounwind {
entry:
%out0 = alloca i16, align 2
store i16 0, i16* %out0, align 2
- %0 = call i16 asm "foo $1,$0", "=r,r"(i16* getelementptr inbounds ([2 x i16]* @marray, i32 0, i32 0)) nounwind
+ %0 = call i16 asm "foo $1,$0", "=r,r"(i16* getelementptr inbounds ([2 x i16], [2 x i16]* @marray, i32 0, i32 0)) nounwind
store i16 %0, i16* %out0, align 2
ret void
}
define void @multi_m() nounwind {
entry:
- %tmp = load i16* @min1, align 2
+ %tmp = load i16, i16* @min1, align 2
call void asm "foo $1,$0", "=*m|r,m|r"(i16* @mout0, i16 %tmp) nounwind
ret void
}
@@ -191,10 +191,10 @@ entry:
%in1 = alloca i16, align 2
store i16 0, i16* %out0, align 2
store i16 1, i16* %in1, align 2
- %tmp = load i16* %in1, align 2
+ %tmp = load i16, i16* %in1, align 2
%0 = call i16 asm "foo $1,$0", "=r|r,r|<r"(i16 %tmp) nounwind
store i16 %0, i16* %out0, align 2
- %tmp1 = load i16* %in1, align 2
+ %tmp1 = load i16, i16* %in1, align 2
%1 = call i16 asm "foo $1,$0", "=r|r,r|r<"(i16 %tmp1) nounwind
store i16 %1, i16* %out0, align 2
ret void
@@ -206,10 +206,10 @@ entry:
%in1 = alloca i16, align 2
store i16 0, i16* %out0, align 2
store i16 1, i16* %in1, align 2
- %tmp = load i16* %in1, align 2
+ %tmp = load i16, i16* %in1, align 2
%0 = call i16 asm "foo $1,$0", "=r|r,r|>r"(i16 %tmp) nounwind
store i16 %0, i16* %out0, align 2
- %tmp1 = load i16* %in1, align 2
+ %tmp1 = load i16, i16* %in1, align 2
%1 = call i16 asm "foo $1,$0", "=r|r,r|r>"(i16 %tmp1) nounwind
store i16 %1, i16* %out0, align 2
ret void
@@ -221,7 +221,7 @@ entry:
%in1 = alloca i16, align 2
store i16 0, i16* %out0, align 2
store i16 1, i16* %in1, align 2
- %tmp = load i16* %in1, align 2
+ %tmp = load i16, i16* %in1, align 2
%0 = call i16 asm "foo $1,$0", "=r|r,r|m"(i16 %tmp) nounwind
store i16 %0, i16* %out0, align 2
ret void
@@ -278,10 +278,10 @@ entry:
%in1 = alloca i16, align 2
store i16 0, i16* %out0, align 2
store i16 1, i16* %in1, align 2
- %tmp = load i16* %in1, align 2
+ %tmp = load i16, i16* %in1, align 2
%0 = call i16 asm "foo $1,$0", "=r|r,r|imr"(i16 %tmp) nounwind
store i16 %0, i16* %out0, align 2
- %tmp1 = load i16* @min1, align 2
+ %tmp1 = load i16, i16* @min1, align 2
%1 = call i16 asm "foo $1,$0", "=r|r,r|imr"(i16 %tmp1) nounwind
store i16 %1, i16* %out0, align 2
%2 = call i16 asm "foo $1,$0", "=r|r,r|imr"(i16 1) nounwind
@@ -295,15 +295,15 @@ entry:
%in1 = alloca i16, align 2
store i16 0, i16* %out0, align 2
store i16 1, i16* %in1, align 2
- %tmp = load i16* %in1, align 2
+ %tmp = load i16, i16* %in1, align 2
%0 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16 %tmp) nounwind
store i16 %0, i16* %out0, align 2
- %tmp1 = load i16* @min1, align 2
+ %tmp1 = load i16, i16* @min1, align 2
%1 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16 %tmp1) nounwind
store i16 %1, i16* %out0, align 2
%2 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16 1) nounwind
store i16 %2, i16* %out0, align 2
- %3 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16* getelementptr inbounds ([2 x i16]* @marray, i32 0, i32 0)) nounwind
+ %3 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16* getelementptr inbounds ([2 x i16], [2 x i16]* @marray, i32 0, i32 0)) nounwind
store i16 %3, i16* %out0, align 2
; No lowering support.
; %4 = call i16 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
@@ -317,7 +317,7 @@ define void @multi_p() nounwind {
entry:
%out0 = alloca i16, align 2
store i16 0, i16* %out0, align 2
- %0 = call i16 asm "foo $1,$0", "=r|r,r|r"(i16* getelementptr inbounds ([2 x i16]* @marray, i32 0, i32 0)) nounwind
+ %0 = call i16 asm "foo $1,$0", "=r|r,r|r"(i16* getelementptr inbounds ([2 x i16], [2 x i16]* @marray, i32 0, i32 0)) nounwind
store i16 %0, i16* %out0, align 2
ret void
}
diff --git a/test/CodeGen/MSP430/postinc.ll b/test/CodeGen/MSP430/postinc.ll
index 8d55fd3f8031..75a927f33fce 100644
--- a/test/CodeGen/MSP430/postinc.ll
+++ b/test/CodeGen/MSP430/postinc.ll
@@ -10,10 +10,10 @@ entry:
for.body: ; preds = %for.body, %entry
%i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
%sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
- %arrayidx = getelementptr i16* %a, i16 %i.010 ; <i16*> [#uses=1]
+ %arrayidx = getelementptr i16, i16* %a, i16 %i.010 ; <i16*> [#uses=1]
; CHECK-LABEL: add:
; CHECK: add.w @r{{[0-9]+}}+, r{{[0-9]+}}
- %tmp4 = load i16* %arrayidx ; <i16> [#uses=1]
+ %tmp4 = load i16, i16* %arrayidx ; <i16> [#uses=1]
%add = add i16 %tmp4, %sum.09 ; <i16> [#uses=2]
%inc = add i16 %i.010, 1 ; <i16> [#uses=2]
%exitcond = icmp eq i16 %inc, %n ; <i1> [#uses=1]
@@ -32,10 +32,10 @@ entry:
for.body: ; preds = %for.body, %entry
%i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
%sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
- %arrayidx = getelementptr i16* %a, i16 %i.010 ; <i16*> [#uses=1]
+ %arrayidx = getelementptr i16, i16* %a, i16 %i.010 ; <i16*> [#uses=1]
; CHECK-LABEL: sub:
; CHECK: sub.w @r{{[0-9]+}}+, r{{[0-9]+}}
- %tmp4 = load i16* %arrayidx ; <i16> [#uses=1]
+ %tmp4 = load i16, i16* %arrayidx ; <i16> [#uses=1]
%add = sub i16 %tmp4, %sum.09 ; <i16> [#uses=2]
%inc = add i16 %i.010, 1 ; <i16> [#uses=2]
%exitcond = icmp eq i16 %inc, %n ; <i1> [#uses=1]
@@ -54,10 +54,10 @@ entry:
for.body: ; preds = %for.body, %entry
%i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
%sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
- %arrayidx = getelementptr i16* %a, i16 %i.010 ; <i16*> [#uses=1]
+ %arrayidx = getelementptr i16, i16* %a, i16 %i.010 ; <i16*> [#uses=1]
; CHECK-LABEL: or:
; CHECK: bis.w @r{{[0-9]+}}+, r{{[0-9]+}}
- %tmp4 = load i16* %arrayidx ; <i16> [#uses=1]
+ %tmp4 = load i16, i16* %arrayidx ; <i16> [#uses=1]
%add = or i16 %tmp4, %sum.09 ; <i16> [#uses=2]
%inc = add i16 %i.010, 1 ; <i16> [#uses=2]
%exitcond = icmp eq i16 %inc, %n ; <i1> [#uses=1]
@@ -76,10 +76,10 @@ entry:
for.body: ; preds = %for.body, %entry
%i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
%sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
- %arrayidx = getelementptr i16* %a, i16 %i.010 ; <i16*> [#uses=1]
+ %arrayidx = getelementptr i16, i16* %a, i16 %i.010 ; <i16*> [#uses=1]
; CHECK-LABEL: xor:
; CHECK: xor.w @r{{[0-9]+}}+, r{{[0-9]+}}
- %tmp4 = load i16* %arrayidx ; <i16> [#uses=1]
+ %tmp4 = load i16, i16* %arrayidx ; <i16> [#uses=1]
%add = xor i16 %tmp4, %sum.09 ; <i16> [#uses=2]
%inc = add i16 %i.010, 1 ; <i16> [#uses=2]
%exitcond = icmp eq i16 %inc, %n ; <i1> [#uses=1]
@@ -98,10 +98,10 @@ entry:
for.body: ; preds = %for.body, %entry
%i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
%sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
- %arrayidx = getelementptr i16* %a, i16 %i.010 ; <i16*> [#uses=1]
+ %arrayidx = getelementptr i16, i16* %a, i16 %i.010 ; <i16*> [#uses=1]
; CHECK-LABEL: and:
; CHECK: and.w @r{{[0-9]+}}+, r{{[0-9]+}}
- %tmp4 = load i16* %arrayidx ; <i16> [#uses=1]
+ %tmp4 = load i16, i16* %arrayidx ; <i16> [#uses=1]
%add = and i16 %tmp4, %sum.09 ; <i16> [#uses=2]
%inc = add i16 %i.010, 1 ; <i16> [#uses=2]
%exitcond = icmp eq i16 %inc, %n ; <i1> [#uses=1]
diff --git a/test/CodeGen/Mips/2008-07-03-SRet.ll b/test/CodeGen/Mips/2008-07-03-SRet.ll
index afec7f65d607..6313ec4af356 100644
--- a/test/CodeGen/Mips/2008-07-03-SRet.ll
+++ b/test/CodeGen/Mips/2008-07-03-SRet.ll
@@ -7,11 +7,11 @@ entry:
; CHECK: sw ${{[0-9]+}}, {{[0-9]+}}($4)
; CHECK: sw ${{[0-9]+}}, {{[0-9]+}}($4)
; CHECK: sw ${{[0-9]+}}, {{[0-9]+}}($4)
- getelementptr %struct.sret0* %agg.result, i32 0, i32 0 ; <i32*>:0 [#uses=1]
+ getelementptr %struct.sret0, %struct.sret0* %agg.result, i32 0, i32 0 ; <i32*>:0 [#uses=1]
store i32 %dummy, i32* %0, align 4
- getelementptr %struct.sret0* %agg.result, i32 0, i32 1 ; <i32*>:1 [#uses=1]
+ getelementptr %struct.sret0, %struct.sret0* %agg.result, i32 0, i32 1 ; <i32*>:1 [#uses=1]
store i32 %dummy, i32* %1, align 4
- getelementptr %struct.sret0* %agg.result, i32 0, i32 2 ; <i32*>:2 [#uses=1]
+ getelementptr %struct.sret0, %struct.sret0* %agg.result, i32 0, i32 2 ; <i32*>:2 [#uses=1]
store i32 %dummy, i32* %2, align 4
ret void
}
diff --git a/test/CodeGen/Mips/2008-07-15-InternalConstant.ll b/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
index 29a7b5c3761a..d7e8f5c2d03f 100644
--- a/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
+++ b/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
@@ -8,7 +8,7 @@ entry:
; CHECK: foo
; CHECK: %hi(.str)
; CHECK: %lo(.str)
- ret i8* getelementptr ([10 x i8]* @.str, i32 0, i32 0)
+ ret i8* getelementptr ([10 x i8], [10 x i8]* @.str, i32 0, i32 0)
}
define i32* @bar() nounwind {
@@ -16,7 +16,7 @@ entry:
; CHECK: bar
; CHECK: %hi(i0)
; CHECK: %lo(i0)
- ret i32* getelementptr ([5 x i32]* @i0, i32 0, i32 0)
+ ret i32* getelementptr ([5 x i32], [5 x i32]* @i0, i32 0, i32 0)
}
; CHECK: rodata.str1.4,"aMS",@progbits
diff --git a/test/CodeGen/Mips/2008-07-15-SmallSection.ll b/test/CodeGen/Mips/2008-07-15-SmallSection.ll
index cbc3ecf5edc8..08d99d899103 100644
--- a/test/CodeGen/Mips/2008-07-15-SmallSection.ll
+++ b/test/CodeGen/Mips/2008-07-15-SmallSection.ll
@@ -22,13 +22,13 @@ target triple = "mipsallegrexel-unknown-psp-elf"
define i8* @A0() nounwind {
entry:
- ret i8* getelementptr ([8 x i8]* @s0, i32 0, i32 0)
+ ret i8* getelementptr ([8 x i8], [8 x i8]* @s0, i32 0, i32 0)
}
define i32 @A1() nounwind {
entry:
- load i32* getelementptr (%struct.anon* @foo, i32 0, i32 0), align 8
- load i32* getelementptr (%struct.anon* @foo, i32 0, i32 1), align 4
+ load i32, i32* getelementptr (%struct.anon, %struct.anon* @foo, i32 0, i32 0), align 8
+ load i32, i32* getelementptr (%struct.anon, %struct.anon* @foo, i32 0, i32 1), align 4
add i32 %1, %0
ret i32 %2
}
diff --git a/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
index 3c1bb39b4340..5edba029502a 100644
--- a/test/CodeGen/Mips/2008-08-01-AsmInline.ll
+++ b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
@@ -1,5 +1,5 @@
; RUN: llc -march=mips -mcpu=mips32 < %s | FileCheck %s
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 -target-abi=n64 < %s | FileCheck %s
%struct.DWstruct = type { i32, i32 }
@@ -26,8 +26,8 @@ entry:
define void @foo0() nounwind {
entry:
; CHECK: addu
- %0 = load i32* @gi1, align 4
- %1 = load i32* @gi0, align 4
+ %0 = load i32, i32* @gi1, align 4
+ %1 = load i32, i32* @gi0, align 4
%2 = tail call i32 asm "addu $0, $1, $2", "=r,r,r"(i32 %0, i32 %1) nounwind
store i32 %2, i32* @gi2, align 4
ret void
@@ -36,7 +36,7 @@ entry:
define void @foo2() nounwind {
entry:
; CHECK: neg.s
- %0 = load float* @gf1, align 4
+ %0 = load float, float* @gf1, align 4
%1 = tail call float asm "neg.s $0, $1", "=f,f"(float %0) nounwind
store float %1, float* @gf0, align 4
ret void
@@ -45,7 +45,7 @@ entry:
define void @foo3() nounwind {
entry:
; CHECK: neg.d
- %0 = load double* @gd1, align 8
+ %0 = load double, double* @gd1, align 8
%1 = tail call double asm "neg.d $0, $1", "=f,f"(double %0) nounwind
store double %1, double* @gd0, align 8
ret void
@@ -64,7 +64,7 @@ define void @foo4() {
entry:
%0 = tail call i32 asm sideeffect "ulh $0,16($$sp)\0A\09", "=r,~{$2}"()
store i32 %0, i32* @gi2, align 4
- %1 = load float* @gf0, align 4
+ %1 = load float, float* @gf0, align 4
%2 = tail call double asm sideeffect "cvt.d.s $0, $1\0A\09", "=f,f,~{$f0}"(float %1)
store double %2, double* @gd0, align 8
ret void
diff --git a/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll b/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
index c41d5213c178..592e574a3622 100644
--- a/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
+++ b/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
@@ -7,12 +7,12 @@ entry:
%retval = alloca double ; <double*> [#uses=3]
store double 0.000000e+00, double* %retval
%r = alloca double ; <double*> [#uses=1]
- load double* %r ; <double>:0 [#uses=1]
+ load double, double* %r ; <double>:0 [#uses=1]
store double %0, double* %retval
br label %return
return: ; preds = %entry
- load double* %retval ; <double>:1 [#uses=1]
+ load double, double* %retval ; <double>:1 [#uses=1]
ret double %1
}
diff --git a/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll b/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll
index 18f5b3d7ff7b..eaf6ddc911e3 100644
--- a/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll
+++ b/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll
@@ -7,8 +7,8 @@ entry:
continue.outer: ; preds = %case4, %entry
%p.0.ph.rec = phi i32 [ 0, %entry ], [ %indvar.next, %case4 ] ; <i32> [#uses=2]
- %p.0.ph = getelementptr i8* %0, i32 %p.0.ph.rec ; <i8*> [#uses=1]
- %1 = load i8* %p.0.ph ; <i8> [#uses=1]
+ %p.0.ph = getelementptr i8, i8* %0, i32 %p.0.ph.rec ; <i8*> [#uses=1]
+ %1 = load i8, i8* %p.0.ph ; <i8> [#uses=1]
switch i8 %1, label %infloop [
i8 0, label %return.split
i8 76, label %case4
diff --git a/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll b/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
index 9c4838a87e51..789f7ee3d4b4 100644
--- a/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
+++ b/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mips -soft-float < %s
+; RUN: llc -march=mips -mattr=+soft-float < %s
; PR2667
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
%struct._Bigint = type { %struct._Bigint*, i32, i32, i32, i32, [1 x i32] }
@@ -13,16 +13,16 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f
define double @_erand48_r(%struct._reent* %r, i16* %xseed) nounwind {
entry:
tail call void @__dorand48( %struct._reent* %r, i16* %xseed ) nounwind
- load i16* %xseed, align 2 ; <i16>:0 [#uses=1]
+ load i16, i16* %xseed, align 2 ; <i16>:0 [#uses=1]
uitofp i16 %0 to double ; <double>:1 [#uses=1]
tail call double @ldexp( double %1, i32 -48 ) nounwind ; <double>:2 [#uses=1]
- getelementptr i16* %xseed, i32 1 ; <i16*>:3 [#uses=1]
- load i16* %3, align 2 ; <i16>:4 [#uses=1]
+ getelementptr i16, i16* %xseed, i32 1 ; <i16*>:3 [#uses=1]
+ load i16, i16* %3, align 2 ; <i16>:4 [#uses=1]
uitofp i16 %4 to double ; <double>:5 [#uses=1]
tail call double @ldexp( double %5, i32 -32 ) nounwind ; <double>:6 [#uses=1]
fadd double %2, %6 ; <double>:7 [#uses=1]
- getelementptr i16* %xseed, i32 2 ; <i16*>:8 [#uses=1]
- load i16* %8, align 2 ; <i16>:9 [#uses=1]
+ getelementptr i16, i16* %xseed, i32 2 ; <i16*>:8 [#uses=1]
+ load i16, i16* %8, align 2 ; <i16>:9 [#uses=1]
uitofp i16 %9 to double ; <double>:10 [#uses=1]
tail call double @ldexp( double %10, i32 -16 ) nounwind ; <double>:11 [#uses=1]
fadd double %7, %11 ; <double>:12 [#uses=1]
@@ -35,18 +35,18 @@ declare double @ldexp(double, i32)
define double @erand48(i16* %xseed) nounwind {
entry:
- load %struct._reent** @_impure_ptr, align 4 ; <%struct._reent*>:0 [#uses=1]
+ load %struct._reent*, %struct._reent** @_impure_ptr, align 4 ; <%struct._reent*>:0 [#uses=1]
tail call void @__dorand48( %struct._reent* %0, i16* %xseed ) nounwind
- load i16* %xseed, align 2 ; <i16>:1 [#uses=1]
+ load i16, i16* %xseed, align 2 ; <i16>:1 [#uses=1]
uitofp i16 %1 to double ; <double>:2 [#uses=1]
tail call double @ldexp( double %2, i32 -48 ) nounwind ; <double>:3 [#uses=1]
- getelementptr i16* %xseed, i32 1 ; <i16*>:4 [#uses=1]
- load i16* %4, align 2 ; <i16>:5 [#uses=1]
+ getelementptr i16, i16* %xseed, i32 1 ; <i16*>:4 [#uses=1]
+ load i16, i16* %4, align 2 ; <i16>:5 [#uses=1]
uitofp i16 %5 to double ; <double>:6 [#uses=1]
tail call double @ldexp( double %6, i32 -32 ) nounwind ; <double>:7 [#uses=1]
fadd double %3, %7 ; <double>:8 [#uses=1]
- getelementptr i16* %xseed, i32 2 ; <i16*>:9 [#uses=1]
- load i16* %9, align 2 ; <i16>:10 [#uses=1]
+ getelementptr i16, i16* %xseed, i32 2 ; <i16*>:9 [#uses=1]
+ load i16, i16* %9, align 2 ; <i16>:10 [#uses=1]
uitofp i16 %10 to double ; <double>:11 [#uses=1]
tail call double @ldexp( double %11, i32 -16 ) nounwind ; <double>:12 [#uses=1]
fadd double %8, %12 ; <double>:13 [#uses=1]
diff --git a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
index c3791dfc7ce6..f736ddd0def6 100644
--- a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
+++ b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
@@ -1,9 +1,9 @@
; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-O32
; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-O32
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n32 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N32
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -target-abi n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -target-abi n32 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -target-abi n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -target-abi n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
define float @h() nounwind readnone {
entry:
diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll
index 5c840775cf9e..fd0254e9f5ec 100644
--- a/test/CodeGen/Mips/2010-07-20-Switch.ll
+++ b/test/CodeGen/Mips/2010-07-20-Switch.ll
@@ -15,7 +15,7 @@ define i32 @main() nounwind readnone {
entry:
%x = alloca i32, align 4 ; <i32*> [#uses=2]
store volatile i32 2, i32* %x, align 4
- %0 = load volatile i32* %x, align 4 ; <i32> [#uses=1]
+ %0 = load volatile i32, i32* %x, align 4 ; <i32> [#uses=1]
; STATIC-O32: sll $[[R0:[0-9]+]], ${{[0-9]+}}, 2
; STATIC-O32: lui $[[R1:[0-9]+]], %hi($JTI0_0)
; STATIC-O32: addu $[[R2:[0-9]+]], $[[R0]], $[[R1]]
diff --git a/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll b/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll
index 9d4daee696db..24bcfaee8bad 100644
--- a/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll
+++ b/test/CodeGen/Mips/2012-12-12-ExpandMemcpy.ll
@@ -4,7 +4,7 @@
define void @t(i8* %ptr) {
entry:
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %ptr, i8* getelementptr inbounds ([7 x i8]* @.str, i64 0, i64 0), i64 7, i32 1, i1 false)
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %ptr, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i64 0, i64 0), i64 7, i32 1, i1 false)
ret void
}
diff --git a/test/CodeGen/Mips/Fast-ISel/br1.ll b/test/CodeGen/Mips/Fast-ISel/br1.ll
index 579a77f88fef..11842ddc4188 100644
--- a/test/CodeGen/Mips/Fast-ISel/br1.ll
+++ b/test/CodeGen/Mips/Fast-ISel/br1.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
@b = global i32 1, align 4
@@ -10,7 +10,7 @@
; Function Attrs: nounwind
define void @br() #0 {
entry:
- %0 = load i32* @b, align 4
+ %0 = load i32, i32* @b, align 4
%tobool = icmp eq i32 %0, 0
br i1 %tobool, label %if.end, label %if.then
diff --git a/test/CodeGen/Mips/Fast-ISel/callabi.ll b/test/CodeGen/Mips/Fast-ISel/callabi.ll
index e76d7a74bd0e..8f5d68b41f66 100644
--- a/test/CodeGen/Mips/Fast-ISel/callabi.ll
+++ b/test/CodeGen/Mips/Fast-ISel/callabi.ll
@@ -1,477 +1,538 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
-; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
-; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
-; RUN: < %s | FileCheck %s -check-prefix=mips32r2
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
-; RUN: < %s | FileCheck %s -check-prefix=mips32
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
-; RUN: < %s | FileCheck %s -check-prefix=CHECK2
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
-; RUN: < %s | FileCheck %s -check-prefix=CHECK2
+; RUN: llc -march=mipsel -mcpu=mips32 -O0 \
+; RUN: -mips-fast-isel -relocation-model=pic -fast-isel-abort=1 < %s | \
+; RUN: FileCheck %s -check-prefix=ALL -check-prefix=32R1
+; RUN: llc -march=mipsel -mcpu=mips32r2 -O0 \
+; RUN: -mips-fast-isel -relocation-model=pic -fast-isel-abort=1 < %s | \
+; RUN: FileCheck %s -check-prefix=ALL -check-prefix=32R2
+declare void @xb(i8)
-@c1 = global i8 -45, align 1
-@uc1 = global i8 27, align 1
-@s1 = global i16 -1789, align 2
-@us1 = global i16 1256, align 2
+define void @cxb() {
+ ; ALL-LABEL: cxb:
-; Function Attrs: nounwind
-define void @cxi() #0 {
-entry:
-; CHECK-LABEL: cxi
- call void @xi(i32 10)
-; CHECK-DAG: addiu $4, $zero, 10
-; CHECK-DAG: lw $25, %got(xi)(${{[0-9]+}})
-; CHECK: jalr $25
+ ; ALL: addiu $[[T0:[0-9]+]], $zero, 10
+ ; 32R1: sll $[[T1:[0-9]+]], $[[T0]], 24
+ ; 32R1: sra $4, $[[T1]], 24
+
+ ; 32R2: seb $4, $[[T0]]
+ call void @xb(i8 10)
ret void
}
-declare void @xi(i32) #1
+declare void @xh(i16)
-; Function Attrs: nounwind
-define void @cxii() #0 {
-entry:
-; CHECK-LABEL: cxii
- call void @xii(i32 746, i32 892)
-; CHECK-DAG: addiu $4, $zero, 746
-; CHECK-DAG: addiu $5, $zero, 892
-; CHECK-DAG: lw $25, %got(xii)(${{[0-9]+}})
-; CHECK: jalr $25
+define void @cxh() {
+ ; ALL-LABEL: cxh:
+
+ ; ALL: addiu $[[T0:[0-9]+]], $zero, 10
+ ; 32R1: sll $[[T1:[0-9]+]], $[[T0]], 16
+ ; 32R1: sra $4, $[[T1]], 16
+
+ ; 32R2: seh $4, $[[T0]]
+ call void @xh(i16 10)
ret void
}
-declare void @xii(i32, i32) #1
+declare void @xi(i32)
-; Function Attrs: nounwind
-define void @cxiii() #0 {
-entry:
-; CHECK-LABEL: cxiii
- call void @xiii(i32 88, i32 44, i32 11)
-; CHECK-DAG: addiu $4, $zero, 88
-; CHECK-DAG: addiu $5, $zero, 44
-; CHECK-DAG: addiu $6, $zero, 11
-; CHECK-DAG: lw $25, %got(xiii)(${{[0-9]+}})
-; CHECK: jalr $25
+define void @cxi() {
+ ; ALL-LABEL: cxi:
+
+ ; ALL-DAG: addiu $4, $zero, 10
+ ; ALL-DAG: lw $25, %got(xi)(${{[0-9]+}})
+ ; ALL: jalr $25
+ call void @xi(i32 10)
ret void
}
-declare void @xiii(i32, i32, i32) #1
+declare void @xbb(i8, i8)
-; Function Attrs: nounwind
-define void @cxiiii() #0 {
-entry:
-; CHECK-LABEL: cxiiii
- call void @xiiii(i32 167, i32 320, i32 97, i32 14)
-; CHECK-DAG: addiu $4, $zero, 167
-; CHECK-DAG: addiu $5, $zero, 320
-; CHECK-DAG: addiu $6, $zero, 97
-; CHECK-DAG: addiu $7, $zero, 14
-; CHECK-DAG: lw $25, %got(xiiii)(${{[0-9]+}})
-; CHECK: jalr $25
+define void @cxbb() {
+ ; ALL-LABEL: cxbb:
+
+ ; ALL-DAG: addiu $[[T0:[0-9]+]], $zero, 76
+ ; ALL-DAG: addiu $[[T1:[0-9]+]], $zero, 101
+
+ ; 32R1-DAG: sll $[[T2:[0-9]+]], $[[T0]], 24
+ ; 32R1-DAG: sra $[[T3:[0-9]+]], $[[T2]], 24
+ ; 32R1-DAG: sll $[[T4:[0-9]+]], $[[T1]], 24
+ ; 32R1-DAG: sra $[[T5:[0-9]+]], $[[T4]], 24
+ ; 32R2-DAG: seb $4, $[[T0]]
+ ; 32R2-DAG: seb $5, $[[T1]]
+ call void @xbb(i8 76, i8 101)
ret void
}
-declare void @xiiii(i32, i32, i32, i32) #1
-
-; Function Attrs: nounwind
-define void @cxiiiiconv() #0 {
-entry:
-; CHECK-LABEL: cxiiiiconv
-; mips32r2-LABEL: cxiiiiconv
-; mips32-LABEL: cxiiiiconv
- %0 = load i8* @c1, align 1
- %conv = sext i8 %0 to i32
- %1 = load i8* @uc1, align 1
- %conv1 = zext i8 %1 to i32
- %2 = load i16* @s1, align 2
- %conv2 = sext i16 %2 to i32
- %3 = load i16* @us1, align 2
- %conv3 = zext i16 %3 to i32
- call void @xiiii(i32 %conv, i32 %conv1, i32 %conv2, i32 %conv3)
-; CHECK: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; mips32r2: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; mips32: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; mips32r2-DAG: lw $[[REG_C1_ADDR:[0-9]+]], %got(c1)($[[REG_GP]])
-; mips32r2-DAG: lbu $[[REG_C1:[0-9]+]], 0($[[REG_C1_ADDR]])
-; mips32r2-DAG seb $3, $[[REG_C1]]
-; mips32-DAG: lw $[[REG_C1_ADDR:[0-9]+]], %got(c1)($[[REG_GP]])
-; mips32-DAG: lbu $[[REG_C1:[0-9]+]], 0($[[REG_C1_ADDR]])
-; mips32-DAG: sll $[[REG_C1_1:[0-9]+]], $[[REG_C1]], 24
-; mips32-DAG: sra $4, $[[REG_C1_1]], 24
-; CHECK-DAG: lw $[[REG_UC1_ADDR:[0-9]+]], %got(uc1)($[[REG_GP]])
-; CHECK-DAG: lbu $[[REG_UC1:[0-9]+]], 0($[[REG_UC1_ADDR]])
-; FIXME andi is superfulous
-; CHECK-DAG: andi $5, $[[REG_UC1]], 255
-; mips32r2-DAG: lw $[[REG_S1_ADDR:[0-9]+]], %got(s1)($[[REG_GP]])
-; mips32r2-DAG: lhu $[[REG_S1:[0-9]+]], 0($[[REG_S1_ADDR]])
-; mips32r2-DAG: seh $6, $[[REG_S1]]
-; mips32-DAG: lw $[[REG_S1_ADDR:[0-9]+]], %got(s1)($[[REG_GP]])
-; mips32-DAG: lhu $[[REG_S1:[0-9]+]], 0($[[REG_S1_ADDR]])
-; mips32-DAG: sll $[[REG_S1_1:[0-9]+]], $[[REG_S1]], 16
-; mips32-DAG: sra $6, $[[REG_S1_1]], 16
-; CHECK-DAG: lw $[[REG_US1_ADDR:[0-9]+]], %got(us1)($[[REG_GP]])
-; CHECK-DAG: lhu $[[REG_US1:[0-9]+]], 0($[[REG_US1_ADDR]])
-; FIXME andi is superfulous
-; CHECK-DAG: andi $7, $[[REG_US1]], 65535
-; mips32r2: jalr $25
-; mips32r2: jalr $25
-; CHECK: jalr $25
+declare void @xhh(i16, i16)
+
+define void @cxhh() {
+ ; ALL-LABEL: cxhh:
+
+ ; ALL-DAG: addiu $[[T0:[0-9]+]], $zero, 76
+ ; ALL-DAG: addiu $[[T1:[0-9]+]], $zero, 101
+
+ ; 32R1-DAG: sll $[[T2:[0-9]+]], $[[T0]], 16
+ ; 32R1-DAG: sra $[[T3:[0-9]+]], $[[T2]], 16
+ ; 32R1-DAG: sll $[[T4:[0-9]+]], $[[T1]], 16
+ ; 32R1-DAG: sra $[[T5:[0-9]+]], $[[T4]], 16
+
+ ; 32R2-DAG: seh $4, $[[T0]]
+ ; 32R2-DAG: seh $5, $[[T1]]
+ call void @xhh(i16 76, i16 101)
ret void
}
-; Function Attrs: nounwind
-define void @cxf() #0 {
-entry:
-; CHECK-LABEL: cxf
- call void @xf(float 0x40BBC85560000000)
-; CHECK: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; CHECK: lui $[[REG_FPCONST_1:[0-9]+]], 17886
-; CHECK: ori $[[REG_FPCONST:[0-9]+]], $[[REG_FPCONST_1]], 17067
-; CHECK: mtc1 $[[REG_FPCONST]], $f12
-; CHECK: lw $25, %got(xf)($[[REG_GP]])
-; CHECK: jalr $25
+declare void @xii(i32, i32)
+
+define void @cxii() {
+ ; ALL-LABEL: cxii:
+
+ ; ALL-DAG: addiu $4, $zero, 746
+ ; ALL-DAG: addiu $5, $zero, 892
+ ; ALL-DAG: lw $25, %got(xii)(${{[0-9]+}})
+ ; ALL: jalr $25
+ call void @xii(i32 746, i32 892)
ret void
}
-declare void @xf(float) #1
+declare void @xccc(i8, i8, i8)
-; Function Attrs: nounwind
-define void @cxff() #0 {
-entry:
-; CHECK-LABEL: cxff
- call void @xff(float 0x3FF74A6CA0000000, float 0x401A2C0840000000)
-; CHECK: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; CHECK-DAG: lui $[[REG_FPCONST_1:[0-9]+]], 16314
-; CHECK-DAG: ori $[[REG_FPCONST:[0-9]+]], $[[REG_FPCONST_1]], 21349
-; CHECK-DAG: mtc1 $[[REG_FPCONST]], $f12
-; CHECK-DAG: lui $[[REG_FPCONST_2:[0-9]+]], 16593
-; CHECK-DAG: ori $[[REG_FPCONST_3:[0-9]+]], $[[REG_FPCONST_2]], 24642
-; CHECK-DAG: mtc1 $[[REG_FPCONST_3]], $f14
-; CHECK: lw $25, %got(xff)($[[REG_GP]])
-; CHECK: jalr $25
+define void @cxccc() {
+ ; ALL-LABEL: cxccc:
+
+ ; ALL-DAG: addiu $[[T0:[0-9]+]], $zero, 88
+ ; ALL-DAG: addiu $[[T1:[0-9]+]], $zero, 44
+ ; ALL-DAG: addiu $[[T2:[0-9]+]], $zero, 11
+
+ ; 32R1-DAG: sll $[[T3:[0-9]+]], $[[T0]], 24
+ ; 32R1-DAG: sra $4, $[[T3]], 24
+ ; 32R1-DAG: sll $[[T4:[0-9]+]], $[[T1]], 24
+ ; 32R1-DAG: sra $5, $[[T4]], 24
+ ; 32R1-DAG: sll $[[T5:[0-9]+]], $[[T2]], 24
+ ; 32R1-DAG: sra $6, $[[T5]], 24
+
+ ; 32R2-DAG: seb $4, $[[T0]]
+ ; 32R2-DAG: seb $5, $[[T1]]
+ ; 32R2-DAG: seb $6, $[[T2]]
+ call void @xccc(i8 88, i8 44, i8 11)
ret void
}
-declare void @xff(float, float) #1
+declare void @xhhh(i16, i16, i16)
-; Function Attrs: nounwind
-define void @cxfi() #0 {
-entry:
-; CHECK-LABEL: cxfi
- call void @xfi(float 0x4013906240000000, i32 102)
-; CHECK: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; CHECK-DAG: lui $[[REG_FPCONST_1:[0-9]+]], 16540
-; CHECK-DAG: ori $[[REG_FPCONST:[0-9]+]], $[[REG_FPCONST_1]], 33554
-; CHECK-DAG: mtc1 $[[REG_FPCONST]], $f12
-; CHECK-DAG: addiu $5, $zero, 102
-; CHECK: lw $25, %got(xfi)($[[REG_GP]])
-; CHECK: jalr $25
+define void @cxhhh() {
+ ; ALL-LABEL: cxhhh:
+
+ ; ALL-DAG: addiu $[[T0:[0-9]+]], $zero, 88
+ ; ALL-DAG: addiu $[[T1:[0-9]+]], $zero, 44
+ ; ALL-DAG: addiu $[[T2:[0-9]+]], $zero, 11
+ ; 32R1-DAG: sll $[[T3:[0-9]+]], $[[T0]], 16
+ ; 32R1-DAG: sra $4, $[[T3]], 16
+ ; 32R1-DAG: sll $[[T4:[0-9]+]], $[[T1]], 16
+ ; 32R1-DAG: sra $5, $[[T4]], 16
+ ; 32R1-DAG: sll $[[T5:[0-9]+]], $[[T2]], 16
+ ; 32R1-DAG: sra $6, $[[T5]], 16
+
+ ; 32R2-DAG: seh $4, $[[T0]]
+ ; 32R2-DAG: seh $5, $[[T1]]
+ ; 32R2-DAG: seh $6, $[[T2]]
+ call void @xhhh(i16 88, i16 44, i16 11)
ret void
}
-declare void @xfi(float, i32) #1
+declare void @xiii(i32, i32, i32)
-; Function Attrs: nounwind
-define void @cxfii() #0 {
-entry:
-; CHECK-LABEL: cxfii
- call void @xfii(float 0x405EC7EE00000000, i32 9993, i32 10922)
-; CHECK: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; CHECK-DAG: lui $[[REG_FPCONST_1:[0-9]+]], 17142
-; CHECK-DAG: ori $[[REG_FPCONST:[0-9]+]], $[[REG_FPCONST_1]], 16240
-; CHECK-DAG: mtc1 $[[REG_FPCONST]], $f12
-; CHECK-DAG: addiu $5, $zero, 9993
-; CHECK-DAG: addiu $6, $zero, 10922
-; CHECK: lw $25, %got(xfii)($[[REG_GP]])
-; CHECK: jalr $25
+define void @cxiii() {
+ ; ALL-LABEL: cxiii:
+
+ ; ALL-DAG: addiu $4, $zero, 88
+ ; ALL-DAG: addiu $5, $zero, 44
+ ; ALL-DAG: addiu $6, $zero, 11
+ ; ALL-DAG: lw $25, %got(xiii)(${{[0-9]+}})
+ ; ALL: jalr $25
+ call void @xiii(i32 88, i32 44, i32 11)
ret void
}
-declare void @xfii(float, i32, i32) #1
+declare void @xcccc(i8, i8, i8, i8)
+
+define void @cxcccc() {
+ ; ALL-LABEL: cxcccc:
+
+ ; ALL-DAG: addiu $[[T0:[0-9]+]], $zero, 88
+ ; ALL-DAG: addiu $[[T1:[0-9]+]], $zero, 44
+ ; ALL-DAG: addiu $[[T2:[0-9]+]], $zero, 11
+ ; ALL-DAG: addiu $[[T3:[0-9]+]], $zero, 33
+
+ ; FIXME: We should avoid the unnecessary spill/reload here.
+
+ ; 32R1-DAG: sll $[[T4:[0-9]+]], $[[T0]], 24
+ ; 32R1-DAG: sra $[[T5:[0-9]+]], $[[T4]], 24
+ ; 32R1-DAG: sw $4, 16($sp)
+ ; 32R1-DAG: move $4, $[[T5]]
+ ; 32R1-DAG: sll $[[T6:[0-9]+]], $[[T1]], 24
+ ; 32R1-DAG: sra $5, $[[T6]], 24
+ ; 32R1-DAG: sll $[[T7:[0-9]+]], $[[T2]], 24
+ ; 32R1-DAG: sra $6, $[[T7]], 24
+ ; 32R1: lw $[[T8:[0-9]+]], 16($sp)
+ ; 32R1: sll $[[T9:[0-9]+]], $[[T8]], 24
+ ; 32R1: sra $7, $[[T9]], 24
+
+ ; 32R2-DAG: seb $[[T4:[0-9]+]], $[[T0]]
+ ; 32R2-DAG: sw $4, 16($sp)
+ ; 32R2-DAG: move $4, $[[T4]]
+ ; 32R2-DAG: seb $5, $[[T1]]
+ ; 32R2-DAG: seb $6, $[[T2]]
+ ; 32R2-DAG: lw $[[T5:[0-9]+]], 16($sp)
+ ; 32R2: seb $7, $[[T5]]
+ call void @xcccc(i8 88, i8 44, i8 11, i8 33)
+ ret void
+}
-; Function Attrs: nounwind
-define void @cxfiii() #0 {
-entry:
-; CHECK-LABEL: cxfiii
- call void @xfiii(float 0x405C072B20000000, i32 3948, i32 89011, i32 111222)
-; CHECK: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; CHECK-DAG: lui $[[REG_FPCONST_1:[0-9]+]], 17120
-; CHECK-DAG: ori $[[REG_FPCONST:[0-9]+]], $[[REG_FPCONST_1]], 14681
-; CHECK-DAG: mtc1 $[[REG_FPCONST]], $f12
-; CHECK-DAG: addiu $5, $zero, 3948
-; CHECK-DAG: lui $[[REG_I_1:[0-9]+]], 1
-; CHECK-DAG: ori $6, $[[REG_I_1]], 23475
-; CHECK-DAG: lui $[[REG_I_2:[0-9]+]], 1
-; CHECK-DAG: ori $7, $[[REG_I_2]], 45686
-; CHECK: lw $25, %got(xfiii)($[[REG_GP]])
-; CHECK: jalr $25
+declare void @xhhhh(i16, i16, i16, i16)
+
+define void @cxhhhh() {
+ ; ALL-LABEL: cxhhhh:
+
+ ; ALL-DAG: addiu $[[T0:[0-9]+]], $zero, 88
+ ; ALL-DAG: addiu $[[T1:[0-9]+]], $zero, 44
+ ; ALL-DAG: addiu $[[T2:[0-9]+]], $zero, 11
+ ; ALL-DAG: addiu $[[T3:[0-9]+]], $zero, 33
+
+ ; FIXME: We should avoid the unnecessary spill/reload here.
+
+ ; 32R1-DAG: sll $[[T4:[0-9]+]], $[[T0]], 16
+ ; 32R1-DAG: sra $[[T5:[0-9]+]], $[[T4]], 16
+ ; 32R1-DAG: sw $4, 16($sp)
+ ; 32R1-DAG: move $4, $[[T5]]
+ ; 32R1-DAG: sll $[[T6:[0-9]+]], $[[T1]], 16
+ ; 32R1-DAG: sra $5, $[[T6]], 16
+ ; 32R1-DAG: sll $[[T7:[0-9]+]], $[[T2]], 16
+ ; 32R1-DAG: sra $6, $[[T7]], 16
+ ; 32R1: lw $[[T8:[0-9]+]], 16($sp)
+ ; 32R1: sll $[[T9:[0-9]+]], $[[T8]], 16
+ ; 32R1: sra $7, $[[T9]], 16
+
+ ; 32R2-DAG: seh $[[T4:[0-9]+]], $[[T0]]
+ ; 32R2-DAG: sw $4, 16($sp)
+ ; 32R2-DAG: move $4, $[[T4]]
+ ; 32R2-DAG: seh $5, $[[T1]]
+ ; 32R2-DAG: seh $6, $[[T2]]
+ ; 32R2-DAG: lw $[[T5:[0-9]+]], 16($sp)
+ ; 32R2: seh $7, $[[T5]]
+ call void @xhhhh(i16 88, i16 44, i16 11, i16 33)
ret void
}
-declare void @xfiii(float, i32, i32, i32) #1
+declare void @xiiii(i32, i32, i32, i32)
-; Function Attrs: nounwind
-define void @cxd() #0 {
-entry:
-; mips32r2-LABEL: cxd:
-; mips32-LABEL: cxd:
- call void @xd(double 5.994560e+02)
-; mips32: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; mips32-DAG: lui $[[REG_FPCONST_1:[0-9]+]], 16514
-; mips32-DAG: ori $[[REG_FPCONST_2:[0-9]+]], $[[REG_FPCONST_1]], 48037
-; mips32-DAG: lui $[[REG_FPCONST_3:[0-9]+]], 58195
-; mips32-DAG: ori $[[REG_FPCONST_4:[0-9]+]], $[[REG_FPCONST_3]], 63439
-; mips32-DAG: mtc1 $[[REG_FPCONST_4]], $f12
-; mips32-DAG: mtc1 $[[REG_FPCONST_2]], $f13
-; mips32-DAG: lw $25, %got(xd)($[[REG_GP]])
-; mips32: jalr $25
-; mips32r2: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; mips32r2-DAG: lui $[[REG_FPCONST_1:[0-9]+]], 16514
-; mips32r2-DAG: ori $[[REG_FPCONST_2:[0-9]+]], $[[REG_FPCONST_1]], 48037
-; mips32r2-DAG: lui $[[REG_FPCONST_3:[0-9]+]], 58195
-; mips32r2-DAG: ori $[[REG_FPCONST_4:[0-9]+]], $[[REG_FPCONST_3]], 63439
-; mips32r2-DAG: mtc1 $[[REG_FPCONST_4]], $f12
-; mips32r2-DAG: mthc1 $[[REG_FPCONST_2]], $f12
-; mips32r2-DAG: lw $25, %got(xd)($[[REG_GP]])
-; mips32r2 : jalr $25
+define void @cxiiii() {
+ ; ALL-LABEL: cxiiii:
+
+ ; ALL-DAG: addiu $4, $zero, 167
+ ; ALL-DAG: addiu $5, $zero, 320
+ ; ALL-DAG: addiu $6, $zero, 97
+ ; ALL-DAG: addiu $7, $zero, 14
+ ; ALL-DAG: lw $25, %got(xiiii)(${{[0-9]+}})
+ ; ALL: jalr $25
+ call void @xiiii(i32 167, i32 320, i32 97, i32 14)
ret void
}
-declare void @xd(double) #1
+@c1 = global i8 -45, align 1
+@uc1 = global i8 27, align 1
+@s1 = global i16 -1789, align 2
+@us1 = global i16 1256, align 2
-; Function Attrs: nounwind
-define void @cxdd() #0 {
-; mips32r2-LABEL: cxdd:
-; mips32-LABEL: cxdd:
-entry:
- call void @xdd(double 1.234980e+03, double 0x40F5B331F7CED917)
-; mips32: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; mips32-DAG: lui $[[REG_FPCONST_1:[0-9]+]], 16531
-; mips32-DAG: ori $[[REG_FPCONST_2:[0-9]+]], $[[REG_FPCONST_1]], 19435
-; mips32-DAG: lui $[[REG_FPCONST_3:[0-9]+]], 34078
-; mips32-DAG: ori $[[REG_FPCONST_4:[0-9]+]], $[[REG_FPCONST_3]], 47186
-; mips32-DAG: mtc1 $[[REG_FPCONST_4]], $f12
-; mips32-DAG: mtc1 $[[REG_FPCONST_2]], $f13
-; mips32-DAG: lui $[[REG_FPCONST_1:[0-9]+]], 16629
-; mips32-DAG: ori $[[REG_FPCONST_2:[0-9]+]], $[[REG_FPCONST_1]], 45873
-; mips32-DAG: lui $[[REG_FPCONST_3:[0-9]+]], 63438
-; mips32-DAG: ori $[[REG_FPCONST_4:[0-9]+]], $[[REG_FPCONST_3]], 55575
-; mips32-DAG: mtc1 $[[REG_FPCONST_4]], $f14
-; mips32-DAG: mtc1 $[[REG_FPCONST_2]], $f15
-; mips32-DAG: lw $25, %got(xdd)($[[REG_GP]])
-; mips32: jalr $25
-; mips32r2: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; mips32r2-DAG: lui $[[REG_FPCONST_1:[0-9]+]], 16531
-; mips32r2-DAG: ori $[[REG_FPCONST_2:[0-9]+]], $[[REG_FPCONST_1]], 19435
-; mips32r2-DAG: lui $[[REG_FPCONST_3:[0-9]+]], 34078
-; mips32r2-DAG: ori $[[REG_FPCONST_4:[0-9]+]], $[[REG_FPCONST_3]], 47186
-; mips32r2-DAG: mtc1 $[[REG_FPCONST_4]], $f12
-; mips32r2-DAG: mthc1 $[[REG_FPCONST_2]], $f12
-; mips32r2-DAG: lui $[[REG_FPCONST_1:[0-9]+]], 16629
-; mips32r2-DAG: ori $[[REG_FPCONST_2:[0-9]+]], $[[REG_FPCONST_1]], 45873
-; mips32r2-DAG: lui $[[REG_FPCONST_3:[0-9]+]], 63438
-; mips32r2-DAG: ori $[[REG_FPCONST_4:[0-9]+]], $[[REG_FPCONST_3]], 55575
-; mips32r2-DAG: mtc1 $[[REG_FPCONST_4]], $f14
-; mips32r2-DAG: mthc1 $[[REG_FPCONST_2]], $f14
-; mips32r2-DAG: lw $25, %got(xdd)($[[REG_GP]])
-; mips32r2 : jalr $25
+define void @cxiiiiconv() {
+ ; ALL-LABEL: cxiiiiconv:
+
+ ; ALL: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+ ; ALL-DAG: lw $[[REG_C1_ADDR:[0-9]+]], %got(c1)($[[REG_GP]])
+ ; ALL-DAG: lbu $[[REG_C1:[0-9]+]], 0($[[REG_C1_ADDR]])
+ ; 32R1-DAG: sll $[[REG_C1_1:[0-9]+]], $[[REG_C1]], 24
+ ; 32R1-DAG: sra $4, $[[REG_C1_1]], 24
+ ; 32R2-DAG: seb $4, $[[REG_C1]]
+ ; FIXME: andi is superfulous
+ ; ALL-DAG: lw $[[REG_UC1_ADDR:[0-9]+]], %got(uc1)($[[REG_GP]])
+ ; ALL-DAG: lbu $[[REG_UC1:[0-9]+]], 0($[[REG_UC1_ADDR]])
+ ; ALL-DAG: andi $5, $[[REG_UC1]], 255
+ ; ALL-DAG: lw $[[REG_S1_ADDR:[0-9]+]], %got(s1)($[[REG_GP]])
+ ; ALL-DAG: lhu $[[REG_S1:[0-9]+]], 0($[[REG_S1_ADDR]])
+ ; 32R1-DAG: sll $[[REG_S1_1:[0-9]+]], $[[REG_S1]], 16
+ ; 32R1-DAG: sra $6, $[[REG_S1_1]], 16
+ ; 32R2-DAG: seh $6, $[[REG_S1]]
+ ; FIXME andi is superfulous
+ ; ALL-DAG: lw $[[REG_US1_ADDR:[0-9]+]], %got(us1)($[[REG_GP]])
+ ; ALL-DAG: lhu $[[REG_US1:[0-9]+]], 0($[[REG_US1_ADDR]])
+ ; ALL-DAG: andi $7, $[[REG_US1]], 65535
+ ; ALL: jalr $25
+ %1 = load i8, i8* @c1, align 1
+ %conv = sext i8 %1 to i32
+ %2 = load i8, i8* @uc1, align 1
+ %conv1 = zext i8 %2 to i32
+ %3 = load i16, i16* @s1, align 2
+ %conv2 = sext i16 %3 to i32
+ %4 = load i16, i16* @us1, align 2
+ %conv3 = zext i16 %4 to i32
+ call void @xiiii(i32 %conv, i32 %conv1, i32 %conv2, i32 %conv3)
ret void
}
-declare void @xdd(double, double) #1
+declare void @xf(float)
-; Function Attrs: nounwind
-define void @cxif() #0 {
-entry:
-; CHECK-LABEL: cxif:
- call void @xif(i32 345, float 0x407BCE5A20000000)
-; CHECK-DAG: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; CHECK-DAG: addiu $4, $zero, 345
-; CHECK-DAG: lui $[[REGF_1:[0-9]+]], 17374
-; CHECK-DAG: ori $[[REGF_2:[0-9]+]], $[[REGF_1]], 29393
-; CHECK-DAG: mtc1 $[[REGF_2]], $f[[REGF_3:[0-9]+]]
-; CHECK-DAG: mfc1 $5, $f[[REGF_3]]
-; CHECK-DAG: lw $25, %got(xif)($[[REG_GP]])
-; CHECK: jalr $25
+define void @cxf() {
+ ; ALL-LABEL: cxf:
+ ; ALL: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+ ; ALL: lui $[[REG_FPCONST_1:[0-9]+]], 17886
+ ; ALL: ori $[[REG_FPCONST:[0-9]+]], $[[REG_FPCONST_1]], 17067
+ ; ALL: mtc1 $[[REG_FPCONST]], $f12
+ ; ALL: lw $25, %got(xf)($[[REG_GP]])
+ ; ALL: jalr $25
+ call void @xf(float 0x40BBC85560000000)
ret void
}
-declare void @xif(i32, float) #1
+declare void @xff(float, float)
-; Function Attrs: nounwind
-define void @cxiff() #0 {
-entry:
-; CHECK-LABEL: cxiff:
-; CHECK2-LABEL: cxiff:
- call void @xiff(i32 12239, float 0x408EDB3340000000, float 0x4013FFE5C0000000)
-; We need to do the two floating point parameters in a separate
-; check because we can't control the ordering of parts of the sequence
-;;
-; CHECK: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; CHECK: addiu $4, $zero, 12239
-; CHECK2: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; CHECK2: addiu $4, $zero, 12239
-; CHECK: lui $[[REGF_1:[0-9]+]], 17526
-; CHECK: ori $[[REGF_2:[0-9]+]], $[[REGF_1]], 55706
-; CHECK: mtc1 $[[REGF_2]], $f[[REGF_3:[0-9]+]]
-; CHECK: mfc1 $5, $f[[REGF_3]]
-; CHECK2: lui $[[REGF2_1:[0-9]+]], 16543
-; CHECK2: ori $[[REGF2_2:[0-9]+]], $[[REGF2_1]], 65326
-; CHECK2: mtc1 $[[REGF2_2]], $f[[REGF2_3:[0-9]+]]
-; CHECK2: mfc1 $6, $f[[REGF2_3]]
-; CHECK: lw $25, %got(xiff)($[[REG_GP]])
-; CHECK2: lw $25, %got(xiff)($[[REG_GP]])
-; CHECK: jalr $25
-; CHECK2: jalr $25
+define void @cxff() {
+ ; ALL-LABEL: cxff:
+
+ ; ALL: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+ ; ALL-DAG: lui $[[REG_FPCONST_1:[0-9]+]], 16314
+ ; ALL-DAG: ori $[[REG_FPCONST:[0-9]+]], $[[REG_FPCONST_1]], 21349
+ ; ALL-DAG: mtc1 $[[REG_FPCONST]], $f12
+ ; ALL-DAG: lui $[[REG_FPCONST_2:[0-9]+]], 16593
+ ; ALL-DAG: ori $[[REG_FPCONST_3:[0-9]+]], $[[REG_FPCONST_2]], 24642
+ ; ALL-DAG: mtc1 $[[REG_FPCONST_3]], $f14
+ ; ALL-DAG: lw $25, %got(xff)($[[REG_GP]])
+ ; ALL: jalr $25
+ call void @xff(float 0x3FF74A6CA0000000, float 0x401A2C0840000000)
ret void
}
-declare void @xiff(i32, float, float) #1
+declare void @xfi(float, i32)
-; Function Attrs: nounwind
-define void @cxifi() #0 {
-entry:
-; CHECK: cxifi:
- call void @xifi(i32 887, float 0x402277CEE0000000, i32 888)
-; CHECK-DAG: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; CHECK-DAG: addiu $4, $zero, 887
-; CHECK-DAG: lui $[[REGF_1:[0-9]+]], 16659
-; CHECK-DAG: ori $[[REGF_2:[0-9]+]], $[[REGF_1]], 48759
-; CHECK-DAG: mtc1 $[[REGF_2]], $f[[REGF_3:[0-9]+]]
-; CHECK-DAG: mfc1 $5, $f[[REGF_3]]
-; CHECk-DAG: addiu $6, $zero, 888
-; CHECK-DAG: lw $25, %got(xifi)($[[REG_GP]])
-; CHECK: jalr $25
+define void @cxfi() {
+ ; ALL-LABEL: cxfi:
+ ; ALL: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+ ; ALL-DAG: lui $[[REG_FPCONST_1:[0-9]+]], 16540
+ ; ALL-DAG: ori $[[REG_FPCONST:[0-9]+]], $[[REG_FPCONST_1]], 33554
+ ; ALL-DAG: mtc1 $[[REG_FPCONST]], $f12
+ ; ALL-DAG: addiu $5, $zero, 102
+ ; ALL-DAG: lw $25, %got(xfi)($[[REG_GP]])
+ ; ALL: jalr $25
+ call void @xfi(float 0x4013906240000000, i32 102)
ret void
}
-declare void @xifi(i32, float, i32) #1
-
-; Function Attrs: nounwind
-define void @cxifif() #0 {
-entry:
-; CHECK: cxifif:
-; CHECK2: cxifif:
- call void @xifif(i32 67774, float 0x408EE0FBE0000000, i32 9991, float 0x40B15C8CC0000000)
-; CHECK-DAG: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; CHECK-DAG: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; CHECK-DAG: lui $[[REGI:[0-9]+]], 1
-; CHECK-DAG: ori $4, $[[REGI]], 2238
-; CHECK-DAG: lui $[[REGF_1:[0-9]+]], 17527
-; CHECK-DAG: ori $[[REGF_2:[0-9]+]], $[[REGF_1]], 2015
-; CHECK-DAG: mtc1 $[[REGF_2]], $f[[REGF_3:[0-9]+]]
-; CHECK-DAG: mfc1 $5, $f[[REGF_3]]
-; CHECk-DAG: addiu $6, $zero, 888
-; CHECK2: lui $[[REGF2_1:[0-9]+]], 17802
-; CHECK2: ori $[[REGF2_2:[0-9]+]], $[[REGF2_1]], 58470
-; CHECK2: mtc1 $[[REGF2_2]], $f[[REGF2_3:[0-9]+]]
-; CHECK2: mfc1 $7, $f[[REGF2_3]]
-; CHECK: lw $25, %got(xifif)($[[REG_GP]])
-; CHECK2: lw $25, %got(xifif)($[[REG_GP]])
-; CHECK2: jalr $25
-; CHECK: jalr $25
+declare void @xfii(float, i32, i32)
+define void @cxfii() {
+ ; ALL-LABEL: cxfii:
+
+ ; ALL: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+ ; ALL-DAG: lui $[[REG_FPCONST_1:[0-9]+]], 17142
+ ; ALL-DAG: ori $[[REG_FPCONST:[0-9]+]], $[[REG_FPCONST_1]], 16240
+ ; ALL-DAG: mtc1 $[[REG_FPCONST]], $f12
+ ; ALL-DAG: addiu $5, $zero, 9993
+ ; ALL-DAG: addiu $6, $zero, 10922
+ ; ALL-DAG: lw $25, %got(xfii)($[[REG_GP]])
+ ; ALL: jalr $25
+ call void @xfii(float 0x405EC7EE00000000, i32 9993, i32 10922)
ret void
}
-declare void @xifif(i32, float, i32, float) #1
-
-; Function Attrs: nounwind
-define void @cxiffi() #0 {
-entry:
-; CHECK-label: cxiffi:
-; CHECK2-label: cxiffi:
- call void @xiffi(i32 45, float 0x3FF6666660000000, float 0x408F333340000000, i32 234)
-; CHECK-DAG: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; CHECK-DAG: addiu $4, $zero, 45
-; CHECK2-DAG: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; CHECK2-DAG: addiu $4, $zero, 45
-; CHECK-DAG: lui $[[REGF_1:[0-9]+]], 16307
-; CHECK-DAG: ori $[[REGF_2:[0-9]+]], $[[REGF_1]], 13107
-; CHECK-DAG: mtc1 $[[REGF_2]], $f[[REGF_3:[0-9]+]]
-; CHECK-DAG: mfc1 $5, $f[[REGF_3]]
-; CHECK2: lui $[[REGF2_1:[0-9]+]], 17529
-; CHECK2: ori $[[REGF2_2:[0-9]+]], $[[REGF2_1]], 39322
-; CHECK2: mtc1 $[[REGF2_2]], $f[[REGF2_3:[0-9]+]]
-; CHECK2: mfc1 $6, $f[[REGF2_3]]
-; CHECK-DAG: lw $25, %got(xiffi)($[[REG_GP]])
-; CHECK-DAG: addiu $7, $zero, 234
-; CHECK2-DAG: lw $25, %got(xiffi)($[[REG_GP]])
-; CHECK: jalr $25
-; CHECK2: jalr $25
+declare void @xfiii(float, i32, i32, i32)
+
+define void @cxfiii() {
+ ; ALL-LABEL: cxfiii:
+
+ ; ALL: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+ ; ALL-DAG: lui $[[REG_FPCONST_1:[0-9]+]], 17120
+ ; ALL-DAG: ori $[[REG_FPCONST:[0-9]+]], $[[REG_FPCONST_1]], 14681
+ ; ALL-DAG: mtc1 $[[REG_FPCONST]], $f12
+ ; ALL-DAG: addiu $5, $zero, 3948
+ ; ALL-DAG: lui $[[REG_I_1:[0-9]+]], 1
+ ; ALL-DAG: ori $6, $[[REG_I_1]], 23475
+ ; ALL-DAG: lui $[[REG_I_2:[0-9]+]], 1
+ ; ALL-DAG: ori $7, $[[REG_I_2]], 45686
+ ; ALL-DAG: lw $25, %got(xfiii)($[[REG_GP]])
+ ; ALL: jalr $25
+ call void @xfiii(float 0x405C072B20000000, i32 3948, i32 89011, i32 111222)
+ ret void
+}
+declare void @xd(double)
+
+define void @cxd() {
+ ; ALL-LABEL: cxd:
+
+ ; ALL: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+ ; ALL-DAG: lui $[[REG_FPCONST_1:[0-9]+]], 16514
+ ; ALL-DAG: ori $[[REG_FPCONST_2:[0-9]+]], $[[REG_FPCONST_1]], 48037
+ ; ALL-DAG: lui $[[REG_FPCONST_3:[0-9]+]], 58195
+ ; ALL-DAG: ori $[[REG_FPCONST_4:[0-9]+]], $[[REG_FPCONST_3]], 63439
+ ; ALL-DAG: mtc1 $[[REG_FPCONST_4]], $f12
+ ; 32R1-DAG: mtc1 $[[REG_FPCONST_2]], $f13
+ ; 32R2-DAG: mthc1 $[[REG_FPCONST_2]], $f12
+ ; ALL-DAG: lw $25, %got(xd)($[[REG_GP]])
+ ; ALL: jalr $25
+ call void @xd(double 5.994560e+02)
ret void
}
-declare void @xiffi(i32, float, float, i32) #1
+declare void @xdd(double, double)
+
+define void @cxdd() {
+ ; ALL-LABEL: cxdd:
+
+ ; ALL: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+ ; ALL-DAG: lui $[[REG_FPCONST_1:[0-9]+]], 16531
+ ; ALL-DAG: ori $[[REG_FPCONST_2:[0-9]+]], $[[REG_FPCONST_1]], 19435
+ ; ALL-DAG: lui $[[REG_FPCONST_3:[0-9]+]], 34078
+ ; ALL-DAG: ori $[[REG_FPCONST_4:[0-9]+]], $[[REG_FPCONST_3]], 47186
+ ; ALL-DAG: mtc1 $[[REG_FPCONST_4]], $f12
+ ; 32R1-DAG: mtc1 $[[REG_FPCONST_2]], $f13
+ ; 32R2-DAG: mthc1 $[[REG_FPCONST_2]], $f12
+ ; ALL-DAG: lui $[[REG_FPCONST_1:[0-9]+]], 16629
+ ; ALL-DAG: ori $[[REG_FPCONST_2:[0-9]+]], $[[REG_FPCONST_1]], 45873
+ ; ALL-DAG: lui $[[REG_FPCONST_3:[0-9]+]], 63438
+ ; ALL-DAG: ori $[[REG_FPCONST_4:[0-9]+]], $[[REG_FPCONST_3]], 55575
+ ; ALL-DAG: mtc1 $[[REG_FPCONST_4]], $f14
+ ; 32R1-DAG: mtc1 $[[REG_FPCONST_2]], $f15
+ ; 32R2-DAG: mthc1 $[[REG_FPCONST_2]], $f14
+ ; ALL-DAG: lw $25, %got(xdd)($[[REG_GP]])
+ ; ALL: jalr $25
+ call void @xdd(double 1.234980e+03, double 0x40F5B331F7CED917)
+ ret void
+}
-; Function Attrs: nounwind
-define void @cxifii() #0 {
-entry:
-; CHECK-DAG: cxifii:
- call void @xifii(i32 12239, float 0x408EDB3340000000, i32 998877, i32 1234)
-; CHECK-DAG: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
-; CHECK-DAG: addiu $4, $zero, 12239
-; CHECK-DAG: lui $[[REGF_1:[0-9]+]], 17526
-; CHECK-DAG: ori $[[REGF_2:[0-9]+]], $[[REGF_1]], 55706
-; CHECK-DAG: mtc1 $[[REGF_2]], $f[[REGF_3:[0-9]+]]
-; CHECK-DAG: mfc1 $5, $f[[REGF_3]]
-; CHECK-DAG: lui $[[REGI2:[0-9]+]], 15
-; CHECK-DAG: ori $6, $[[REGI2]], 15837
-; CHECk-DAG: addiu $7, $zero, 1234
-; CHECK-DAG: lw $25, %got(xifii)($[[REG_GP]])
-; CHECK: jalr $25
+declare void @xif(i32, float)
+
+define void @cxif() {
+ ; ALL-LABEL: cxif:
+
+ ; ALL: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+ ; ALL-DAG: addiu $4, $zero, 345
+ ; ALL-DAG: lui $[[REGF_1:[0-9]+]], 17374
+ ; ALL-DAG: ori $[[REGF_2:[0-9]+]], $[[REGF_1]], 29393
+ ; ALL-DAG: mtc1 $[[REGF_2]], $f[[REGF_3:[0-9]+]]
+ ; ALL-DAG: mfc1 $5, $f[[REGF_3]]
+ ; ALL-DAG: lw $25, %got(xif)($[[REG_GP]])
+ ; ALL: jalr $25
+ call void @xif(i32 345, float 0x407BCE5A20000000)
ret void
}
-declare void @xifii(i32, float, i32, i32) #1
-
-; FIXME: this function will not pass yet.
-; Function Attrs: nounwind
-; define void @cxfid() #0 {
-;entry:
-; call void @xfid(float 0x4013B851E0000000, i32 811123, double 0x40934BFF487FCB92)
-; ret void
-;}
-
-declare void @xfid(float, i32, double) #1
-
-; Function Attrs: nounwind
-define void @g() #0 {
-entry:
- call void @cxi()
- call void @cxii()
- call void @cxiii()
- call void @cxiiii()
- call void @cxiiiiconv()
- call void @cxf()
- call void @cxff()
- call void @cxd()
- call void @cxfi()
- call void @cxfii()
- call void @cxfiii()
- call void @cxdd()
- call void @cxif()
- call void @cxiff()
- call void @cxifi()
- call void @cxifii()
- call void @cxifif()
- call void @cxiffi()
+declare void @xiff(i32, float, float)
+
+define void @cxiff() {
+ ; ALL-LABEL: cxiff:
+
+ ; ALL: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+ ; ALL-DAG: addiu $4, $zero, 12239
+ ; ALL-DAG: lui $[[REGF0_1:[0-9]+]], 17526
+ ; ALL-DAG: ori $[[REGF0_2:[0-9]+]], $[[REGF0_1]], 55706
+ ; ALL-DAG: mtc1 $[[REGF0_2]], $f[[REGF0_3:[0-9]+]]
+ ; ALL-DAG: lui $[[REGF1_1:[0-9]+]], 16543
+ ; ALL-DAG: ori $[[REGF1_2:[0-9]+]], $[[REGF1_1]], 65326
+ ; ALL: mtc1 $[[REGF1_2]], $f[[REGF1_3:[0-9]+]]
+ ; ALL-DAG: mfc1 $5, $f[[REGF0_3]]
+ ; ALL-DAG: mfc1 $6, $f[[REGF1_3]]
+ ; ALL-DAG: lw $25, %got(xiff)($[[REG_GP]])
+ ; ALL: jalr $25
+ call void @xiff(i32 12239, float 0x408EDB3340000000, float 0x4013FFE5C0000000)
ret void
}
+declare void @xifi(i32, float, i32)
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+define void @cxifi() {
+ ; ALL-LABEL: cxifi:
-!llvm.ident = !{!0}
+ ; ALL: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+ ; ALL-DAG: addiu $4, $zero, 887
+ ; ALL-DAG: lui $[[REGF_1:[0-9]+]], 16659
+ ; ALL-DAG: ori $[[REGF_2:[0-9]+]], $[[REGF_1]], 48759
+ ; ALL-DAG: mtc1 $[[REGF_2]], $f[[REGF_3:[0-9]+]]
+ ; ALL-DAG: mfc1 $5, $f[[REGF_3]]
+ ; ALL-DAG: addiu $6, $zero, 888
+ ; ALL-DAG: lw $25, %got(xifi)($[[REG_GP]])
+ ; ALL: jalr $25
+ call void @xifi(i32 887, float 0x402277CEE0000000, i32 888)
+ ret void
+}
-!0 = !{!"clang version 3.6.0 (gitosis@dmz-portal.mips.com:clang 43992fe7b17de5553ac06d323cb80cc6723a9ae3) (gitosis@dmz-portal.mips.com:llvm.git 0834e6839eb170197c81bb02e916258d1527e312)"}
+declare void @xifif(i32, float, i32, float)
+
+define void @cxifif() {
+ ; ALL-LABEL: cxifif:
+
+ ; ALL: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+ ; ALL-DAG: lui $[[REGI:[0-9]+]], 1
+ ; ALL-DAG: ori $4, $[[REGI]], 2238
+ ; ALL-DAG: lui $[[REGF0_1:[0-9]+]], 17527
+ ; ALL-DAG: ori $[[REGF0_2:[0-9]+]], $[[REGF0_1]], 2015
+ ; ALL-DAG: mtc1 $[[REGF0_2]], $f[[REGF0_3:[0-9]+]]
+ ; ALL-DAG: addiu $6, $zero, 9991
+ ; ALL-DAG: lui $[[REGF1_1:[0-9]+]], 17802
+ ; ALL-DAG: ori $[[REGF1_2:[0-9]+]], $[[REGF1_1]], 58470
+ ; ALL: mtc1 $[[REGF1_2]], $f[[REGF1_3:[0-9]+]]
+ ; ALL-DAG: mfc1 $5, $f[[REGF0_3]]
+ ; ALL-DAG: mfc1 $7, $f[[REGF1_3]]
+ ; ALL-DAG: lw $25, %got(xifif)($[[REG_GP]])
+ ; ALL: jalr $25
+ call void @xifif(i32 67774, float 0x408EE0FBE0000000,
+ i32 9991, float 0x40B15C8CC0000000)
+ ret void
+}
+
+declare void @xiffi(i32, float, float, i32)
+
+define void @cxiffi() {
+ ; ALL-LABEL: cxiffi:
+
+ ; ALL: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+ ; ALL-DAG: addiu $4, $zero, 45
+ ; ALL-DAG: lui $[[REGF0_1:[0-9]+]], 16307
+ ; ALL-DAG: ori $[[REGF0_2:[0-9]+]], $[[REGF0_1]], 13107
+ ; ALL-DAG: mtc1 $[[REGF0_2]], $f[[REGF0_3:[0-9]+]]
+ ; ALL-DAG: lui $[[REGF1_1:[0-9]+]], 17529
+ ; ALL-DAG: ori $[[REGF1_2:[0-9]+]], $[[REGF1_1]], 39322
+ ; ALL: mtc1 $[[REGF1_2]], $f[[REGF1_3:[0-9]+]]
+ ; ALL-DAG: addiu $7, $zero, 234
+ ; ALL-DAG: mfc1 $5, $f[[REGF0_3]]
+ ; ALL-DAG: mfc1 $6, $f[[REGF1_3]]
+ ; ALL-DAG: lw $25, %got(xiffi)($[[REG_GP]])
+ ; ALL: jalr $25
+ call void @xiffi(i32 45, float 0x3FF6666660000000,
+ float 0x408F333340000000, i32 234)
+ ret void
+}
+
+declare void @xifii(i32, float, i32, i32)
+
+define void @cxifii() {
+ ; ALL-LABEL: cxifii:
+
+ ; ALL-DAG: addu $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+ ; ALL-DAG: addiu $4, $zero, 12239
+ ; ALL-DAG: lui $[[REGF_1:[0-9]+]], 17526
+ ; ALL-DAG: ori $[[REGF_2:[0-9]+]], $[[REGF_1]], 55706
+ ; ALL-DAG: mtc1 $[[REGF_2]], $f[[REGF_3:[0-9]+]]
+ ; ALL-DAG: mfc1 $5, $f[[REGF_3]]
+ ; ALL-DAG: lui $[[REGI2:[0-9]+]], 15
+ ; ALL-DAG: ori $6, $[[REGI2]], 15837
+ ; ALL-DAG: addiu $7, $zero, 1234
+ ; ALL-DAG: lw $25, %got(xifii)($[[REG_GP]])
+ ; ALL: jalr $25
+ call void @xifii(i32 12239, float 0x408EDB3340000000, i32 998877, i32 1234)
+ ret void
+}
diff --git a/test/CodeGen/Mips/Fast-ISel/constexpr-address.ll b/test/CodeGen/Mips/Fast-ISel/constexpr-address.ll
new file mode 100644
index 000000000000..df60d8071836
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/constexpr-address.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=mipsel -mcpu=mips32 -relocation-model=pic \
+; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 < %s | FileCheck %s
+; RUN: llc -march=mipsel -mcpu=mips32r2 -relocation-model=pic \
+; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 < %s | FileCheck %s
+
+@ARR = external global [10 x i32], align 4
+
+define void @foo() {
+; CHECK-LABEL: foo
+
+; CHECK-DAG: lw $[[ARR:[0-9]+]], %got(ARR)({{.*}})
+; CHECK-DAG: addiu $[[T0:[0-9]+]], $zero, 12345
+; CHECK: sw $[[T0]], 8($[[ARR]])
+
+entry:
+ store i32 12345, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @ARR, i32 0, i32 2), align 4
+ ret void
+}
diff --git a/test/CodeGen/Mips/Fast-ISel/fastalloca.ll b/test/CodeGen/Mips/Fast-ISel/fastalloca.ll
new file mode 100644
index 000000000000..b4a9f1ce7ab0
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/fastalloca.ll
@@ -0,0 +1,32 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: < %s | FileCheck %s
+
+%struct.x = type { i32 }
+
+@i = common global i32 0, align 4
+
+define i32 @foobar(i32 signext %x) {
+entry:
+; CHECK-LABEL: foobar:
+ %retval = alloca i32, align 4
+ %x.addr = alloca i32, align 4
+ %a = alloca %struct.x, align 4
+ %c = alloca %struct.x*, align 4
+ store i32 %x, i32* %x.addr, align 4
+ %x1 = getelementptr inbounds %struct.x, %struct.x* %a, i32 0, i32 0
+ %0 = load i32, i32* %x.addr, align 4
+ store i32 %0, i32* %x1, align 4
+ store %struct.x* %a, %struct.x** %c, align 4
+ %1 = load %struct.x*, %struct.x** %c, align 4
+ %x2 = getelementptr inbounds %struct.x, %struct.x* %1, i32 0, i32 0
+ %2 = load i32, i32* %x2, align 4
+ store i32 %2, i32* @i, align 4
+ %3 = load i32, i32* %retval
+; CHECK-DAG: lw $[[I_ADDR:[0-9]+]], %got(i)($[[REG_GP:[0-9]+]])
+; CHECK-DAG: addiu $[[A_ADDR:[0-9]+]], $sp, 8
+; CHECK-DAG: sw $[[A_ADDR]], [[A_ADDR_FI:[0-9]+]]($sp)
+; CHECK-DAG: lw $[[A_ADDR2:[0-9]+]], [[A_ADDR_FI]]($sp)
+; CHECK-DAG: lw $[[A_X:[0-9]+]], 0($[[A_ADDR2]])
+; CHECK-DAG: sw $[[A_X]], 0($[[I_ADDR]])
+ ret i32 %3
+}
diff --git a/test/CodeGen/Mips/Fast-ISel/fpcmpa.ll b/test/CodeGen/Mips/Fast-ISel/fpcmpa.ll
index c72b1e70c718..72de888b26e0 100644
--- a/test/CodeGen/Mips/Fast-ISel/fpcmpa.ll
+++ b/test/CodeGen/Mips/Fast-ISel/fpcmpa.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
@f1 = common global float 0.000000e+00, align 4
@@ -12,8 +12,8 @@
; Function Attrs: nounwind
define void @feq1() {
entry:
- %0 = load float* @f1, align 4
- %1 = load float* @f2, align 4
+ %0 = load float, float* @f1, align 4
+ %1 = load float, float* @f2, align 4
%cmp = fcmp oeq float %0, %1
; CHECK-LABEL: feq1:
; CHECK-DAG: lw $[[REG_F2_GOT:[0-9]+]], %got(f2)(${{[0-9]+}})
@@ -33,8 +33,8 @@ entry:
; Function Attrs: nounwind
define void @fne1() {
entry:
- %0 = load float* @f1, align 4
- %1 = load float* @f2, align 4
+ %0 = load float, float* @f1, align 4
+ %1 = load float, float* @f2, align 4
%cmp = fcmp une float %0, %1
; CHECK-LABEL: fne1:
; CHECK-DAG: lw $[[REG_F2_GOT:[0-9]+]], %got(f2)(${{[0-9]+}})
@@ -53,8 +53,8 @@ entry:
; Function Attrs: nounwind
define void @flt1() {
entry:
- %0 = load float* @f1, align 4
- %1 = load float* @f2, align 4
+ %0 = load float, float* @f1, align 4
+ %1 = load float, float* @f2, align 4
%cmp = fcmp olt float %0, %1
; CHECK-LABEL: flt1:
; CHECK-DAG: lw $[[REG_F2_GOT:[0-9]+]], %got(f2)(${{[0-9]+}})
@@ -74,8 +74,8 @@ entry:
; Function Attrs: nounwind
define void @fgt1() {
entry:
- %0 = load float* @f1, align 4
- %1 = load float* @f2, align 4
+ %0 = load float, float* @f1, align 4
+ %1 = load float, float* @f2, align 4
%cmp = fcmp ogt float %0, %1
; CHECK-LABEL: fgt1:
; CHECK-DAG: lw $[[REG_F2_GOT:[0-9]+]], %got(f2)(${{[0-9]+}})
@@ -94,8 +94,8 @@ entry:
; Function Attrs: nounwind
define void @fle1() {
entry:
- %0 = load float* @f1, align 4
- %1 = load float* @f2, align 4
+ %0 = load float, float* @f1, align 4
+ %1 = load float, float* @f2, align 4
%cmp = fcmp ole float %0, %1
; CHECK-LABEL: fle1:
; CHECK-DAG: lw $[[REG_F2_GOT:[0-9]+]], %got(f2)(${{[0-9]+}})
@@ -114,8 +114,8 @@ entry:
; Function Attrs: nounwind
define void @fge1() {
entry:
- %0 = load float* @f1, align 4
- %1 = load float* @f2, align 4
+ %0 = load float, float* @f1, align 4
+ %1 = load float, float* @f2, align 4
%cmp = fcmp oge float %0, %1
; CHECK-LABEL: fge1:
; CHECK-DAG: lw $[[REG_F2_GOT:[0-9]+]], %got(f2)(${{[0-9]+}})
@@ -134,8 +134,8 @@ entry:
; Function Attrs: nounwind
define void @deq1() {
entry:
- %0 = load double* @d1, align 8
- %1 = load double* @d2, align 8
+ %0 = load double, double* @d1, align 8
+ %1 = load double, double* @d2, align 8
%cmp = fcmp oeq double %0, %1
; CHECK-LABEL: deq1:
; CHECK-DAG: lw $[[REG_D2_GOT:[0-9]+]], %got(d2)(${{[0-9]+}})
@@ -154,8 +154,8 @@ entry:
; Function Attrs: nounwind
define void @dne1() {
entry:
- %0 = load double* @d1, align 8
- %1 = load double* @d2, align 8
+ %0 = load double, double* @d1, align 8
+ %1 = load double, double* @d2, align 8
%cmp = fcmp une double %0, %1
; CHECK-LABEL: dne1:
; CHECK-DAG: lw $[[REG_D2_GOT:[0-9]+]], %got(d2)(${{[0-9]+}})
@@ -174,8 +174,8 @@ entry:
; Function Attrs: nounwind
define void @dlt1() {
entry:
- %0 = load double* @d1, align 8
- %1 = load double* @d2, align 8
+ %0 = load double, double* @d1, align 8
+ %1 = load double, double* @d2, align 8
%cmp = fcmp olt double %0, %1
; CHECK-LABEL: dlt1:
; CHECK-DAG: lw $[[REG_D2_GOT:[0-9]+]], %got(d2)(${{[0-9]+}})
@@ -194,8 +194,8 @@ entry:
; Function Attrs: nounwind
define void @dgt1() {
entry:
- %0 = load double* @d1, align 8
- %1 = load double* @d2, align 8
+ %0 = load double, double* @d1, align 8
+ %1 = load double, double* @d2, align 8
%cmp = fcmp ogt double %0, %1
; CHECK-LABEL: dgt1:
; CHECK-DAG: lw $[[REG_D2_GOT:[0-9]+]], %got(d2)(${{[0-9]+}})
@@ -214,8 +214,8 @@ entry:
; Function Attrs: nounwind
define void @dle1() {
entry:
- %0 = load double* @d1, align 8
- %1 = load double* @d2, align 8
+ %0 = load double, double* @d1, align 8
+ %1 = load double, double* @d2, align 8
%cmp = fcmp ole double %0, %1
; CHECK-LABEL: dle1:
; CHECK-DAG: lw $[[REG_D2_GOT:[0-9]+]], %got(d2)(${{[0-9]+}})
@@ -234,8 +234,8 @@ entry:
; Function Attrs: nounwind
define void @dge1() {
entry:
- %0 = load double* @d1, align 8
- %1 = load double* @d2, align 8
+ %0 = load double, double* @d1, align 8
+ %1 = load double, double* @d2, align 8
%cmp = fcmp oge double %0, %1
; CHECK-LABEL: dge1:
; CHECK-DAG: lw $[[REG_D2_GOT:[0-9]+]], %got(d2)(${{[0-9]+}})
diff --git a/test/CodeGen/Mips/Fast-ISel/fpext.ll b/test/CodeGen/Mips/Fast-ISel/fpext.ll
index 98aca756c58f..5ac22490ff02 100644
--- a/test/CodeGen/Mips/Fast-ISel/fpext.ll
+++ b/test/CodeGen/Mips/Fast-ISel/fpext.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
@f = global float 0x40147E6B80000000, align 4
@@ -10,7 +10,7 @@
; Function Attrs: nounwind
define void @dv() #0 {
entry:
- %0 = load float* @f, align 4
+ %0 = load float, float* @f, align 4
%conv = fpext float %0 to double
; CHECK: cvt.d.s $f{{[0-9]+}}, $f{{[0-9]+}}
store double %conv, double* @d_f, align 8
diff --git a/test/CodeGen/Mips/Fast-ISel/fpintconv.ll b/test/CodeGen/Mips/Fast-ISel/fpintconv.ll
index 846726a868b3..a94ef5081539 100644
--- a/test/CodeGen/Mips/Fast-ISel/fpintconv.ll
+++ b/test/CodeGen/Mips/Fast-ISel/fpintconv.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
@@ -14,7 +14,7 @@
define void @ifv() {
entry:
; CHECK-LABEL: .ent ifv
- %0 = load float* @f, align 4
+ %0 = load float, float* @f, align 4
%conv = fptosi float %0 to i32
; CHECK: trunc.w.s $f[[REG:[0-9]+]], $f{{[0-9]+}}
; CHECK: mfc1 ${{[0-9]+}}, $f[[REG]]
@@ -26,7 +26,7 @@ entry:
define void @idv() {
entry:
; CHECK-LABEL: .ent idv
- %0 = load double* @d, align 8
+ %0 = load double, double* @d, align 8
%conv = fptosi double %0 to i32
; CHECK: trunc.w.d $f[[REG:[0-9]+]], $f{{[0-9]+}}
; CHECK: mfc1 ${{[0-9]+}}, $f[[REG]]
diff --git a/test/CodeGen/Mips/Fast-ISel/fptrunc.ll b/test/CodeGen/Mips/Fast-ISel/fptrunc.ll
index d843dee5a8c9..2eec4c3ef547 100644
--- a/test/CodeGen/Mips/Fast-ISel/fptrunc.ll
+++ b/test/CodeGen/Mips/Fast-ISel/fptrunc.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
@d = global double 0x40147E6B74DF0446, align 8
@@ -10,7 +10,7 @@
; Function Attrs: nounwind
define void @fv() #0 {
entry:
- %0 = load double* @d, align 8
+ %0 = load double, double* @d, align 8
%conv = fptrunc double %0 to float
; CHECK: cvt.s.d $f{{[0-9]+}}, $f{{[0-9]+}}
store float %conv, float* @f, align 4
diff --git a/test/CodeGen/Mips/Fast-ISel/icmpa.ll b/test/CodeGen/Mips/Fast-ISel/icmpa.ll
index bd41a2911dc4..670a8d5cfb4e 100644
--- a/test/CodeGen/Mips/Fast-ISel/icmpa.ll
+++ b/test/CodeGen/Mips/Fast-ISel/icmpa.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
@c = global i32 4, align 4
@@ -14,8 +14,8 @@ define void @eq() {
entry:
; CHECK-LABEL: .ent eq
- %0 = load i32* @c, align 4
- %1 = load i32* @d, align 4
+ %0 = load i32, i32* @c, align 4
+ %1 = load i32, i32* @d, align 4
%cmp = icmp eq i32 %0, %1
%conv = zext i1 %cmp to i32
; CHECK-DAG: lw $[[REG_D_GOT:[0-9+]]], %got(d)(${{[0-9]+}})
@@ -35,8 +35,8 @@ entry:
define void @ne() {
entry:
; CHECK-LABEL: .ent ne
- %0 = load i32* @c, align 4
- %1 = load i32* @d, align 4
+ %0 = load i32, i32* @c, align 4
+ %1 = load i32, i32* @d, align 4
%cmp = icmp ne i32 %0, %1
%conv = zext i1 %cmp to i32
; CHECK-DAG: lw $[[REG_D_GOT:[0-9+]]], %got(d)(${{[0-9]+}})
@@ -56,8 +56,8 @@ entry:
define void @ugt() {
entry:
; CHECK-LABEL: .ent ugt
- %0 = load i32* @uc, align 4
- %1 = load i32* @ud, align 4
+ %0 = load i32, i32* @uc, align 4
+ %1 = load i32, i32* @ud, align 4
%cmp = icmp ugt i32 %0, %1
%conv = zext i1 %cmp to i32
; CHECK-DAG: lw $[[REG_UD_GOT:[0-9+]]], %got(ud)(${{[0-9]+}})
@@ -76,8 +76,8 @@ entry:
define void @ult() {
entry:
; CHECK-LABEL: .ent ult
- %0 = load i32* @uc, align 4
- %1 = load i32* @ud, align 4
+ %0 = load i32, i32* @uc, align 4
+ %1 = load i32, i32* @ud, align 4
%cmp = icmp ult i32 %0, %1
%conv = zext i1 %cmp to i32
; CHECK-DAG: lw $[[REG_UD_GOT:[0-9+]]], %got(ud)(${{[0-9]+}})
@@ -95,8 +95,8 @@ entry:
define void @uge() {
entry:
; CHECK-LABEL: .ent uge
- %0 = load i32* @uc, align 4
- %1 = load i32* @ud, align 4
+ %0 = load i32, i32* @uc, align 4
+ %1 = load i32, i32* @ud, align 4
%cmp = icmp uge i32 %0, %1
%conv = zext i1 %cmp to i32
; CHECK-DAG: lw $[[REG_UD_GOT:[0-9+]]], %got(ud)(${{[0-9]+}})
@@ -115,8 +115,8 @@ entry:
define void @ule() {
entry:
; CHECK-LABEL: .ent ule
- %0 = load i32* @uc, align 4
- %1 = load i32* @ud, align 4
+ %0 = load i32, i32* @uc, align 4
+ %1 = load i32, i32* @ud, align 4
%cmp = icmp ule i32 %0, %1
%conv = zext i1 %cmp to i32
; CHECK-DAG: lw $[[REG_UD_GOT:[0-9+]]], %got(ud)(${{[0-9]+}})
@@ -135,8 +135,8 @@ entry:
define void @sgt() {
entry:
; CHECK-LABEL: .ent sgt
- %0 = load i32* @c, align 4
- %1 = load i32* @d, align 4
+ %0 = load i32, i32* @c, align 4
+ %1 = load i32, i32* @d, align 4
%cmp = icmp sgt i32 %0, %1
%conv = zext i1 %cmp to i32
; CHECK-DAG: lw $[[REG_D_GOT:[0-9+]]], %got(d)(${{[0-9]+}})
@@ -154,8 +154,8 @@ entry:
define void @slt() {
entry:
; CHECK-LABEL: .ent slt
- %0 = load i32* @c, align 4
- %1 = load i32* @d, align 4
+ %0 = load i32, i32* @c, align 4
+ %1 = load i32, i32* @d, align 4
%cmp = icmp slt i32 %0, %1
%conv = zext i1 %cmp to i32
; CHECK-DAG: lw $[[REG_D_GOT:[0-9+]]], %got(d)(${{[0-9]+}})
@@ -173,8 +173,8 @@ entry:
define void @sge() {
entry:
; CHECK-LABEL: .ent sge
- %0 = load i32* @c, align 4
- %1 = load i32* @d, align 4
+ %0 = load i32, i32* @c, align 4
+ %1 = load i32, i32* @d, align 4
%cmp = icmp sge i32 %0, %1
%conv = zext i1 %cmp to i32
store i32 %conv, i32* @b1, align 4
@@ -193,8 +193,8 @@ entry:
define void @sle() {
entry:
; CHECK-LABEL: .ent sle
- %0 = load i32* @c, align 4
- %1 = load i32* @d, align 4
+ %0 = load i32, i32* @c, align 4
+ %1 = load i32, i32* @d, align 4
%cmp = icmp sle i32 %0, %1
%conv = zext i1 %cmp to i32
; CHECK-DAG: lw $[[REG_D_GOT:[0-9+]]], %got(d)(${{[0-9]+}})
diff --git a/test/CodeGen/Mips/Fast-ISel/loadstore2.ll b/test/CodeGen/Mips/Fast-ISel/loadstore2.ll
index d84478b9c5a9..3daf03d681cb 100644
--- a/test/CodeGen/Mips/Fast-ISel/loadstore2.ll
+++ b/test/CodeGen/Mips/Fast-ISel/loadstore2.ll
@@ -4,9 +4,9 @@ target triple = "mips--linux-gnu"
@c2 = common global i8 0, align 1
@c1 = common global i8 0, align 1
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
@s2 = common global i16 0, align 2
@@ -21,7 +21,7 @@ target triple = "mips--linux-gnu"
; Function Attrs: nounwind
define void @cfoo() #0 {
entry:
- %0 = load i8* @c2, align 1
+ %0 = load i8, i8* @c2, align 1
store i8 %0, i8* @c1, align 1
; CHECK-LABEL: cfoo:
; CHECK: lbu $[[REGc:[0-9]+]], 0(${{[0-9]+}})
@@ -34,7 +34,7 @@ entry:
; Function Attrs: nounwind
define void @sfoo() #0 {
entry:
- %0 = load i16* @s2, align 2
+ %0 = load i16, i16* @s2, align 2
store i16 %0, i16* @s1, align 2
; CHECK-LABEL: sfoo:
; CHECK: lhu $[[REGs:[0-9]+]], 0(${{[0-9]+}})
@@ -46,7 +46,7 @@ entry:
; Function Attrs: nounwind
define void @ifoo() #0 {
entry:
- %0 = load i32* @i2, align 4
+ %0 = load i32, i32* @i2, align 4
store i32 %0, i32* @i1, align 4
; CHECK-LABEL: ifoo:
; CHECK: lw $[[REGi:[0-9]+]], 0(${{[0-9]+}})
@@ -58,7 +58,7 @@ entry:
; Function Attrs: nounwind
define void @ffoo() #0 {
entry:
- %0 = load float* @f2, align 4
+ %0 = load float, float* @f2, align 4
store float %0, float* @f1, align 4
; CHECK-LABEL: ffoo:
; CHECK: lwc1 $f[[REGf:[0-9]+]], 0(${{[0-9]+}})
@@ -71,7 +71,7 @@ entry:
; Function Attrs: nounwind
define void @dfoo() #0 {
entry:
- %0 = load double* @d2, align 8
+ %0 = load double, double* @d2, align 8
store double %0, double* @d1, align 8
; CHECK-LABEL: dfoo:
; CHECK: ldc1 $f[[REGd:[0-9]+]], 0(${{[0-9]+}})
diff --git a/test/CodeGen/Mips/Fast-ISel/loadstoreconv.ll b/test/CodeGen/Mips/Fast-ISel/loadstoreconv.ll
index f7f2c6481b3c..acba132b28e1 100644
--- a/test/CodeGen/Mips/Fast-ISel/loadstoreconv.ll
+++ b/test/CodeGen/Mips/Fast-ISel/loadstoreconv.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s -check-prefix=mips32r2
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s -check-prefix=mips32
@b2 = global i8 0, align 1
@@ -28,11 +28,11 @@
define void @_Z3b_iv() {
entry:
; CHECK-LABEL: .ent _Z3b_iv
- %0 = load i8* @b1, align 1
+ %0 = load i8, i8* @b1, align 1
%tobool = trunc i8 %0 to i1
%frombool = zext i1 %tobool to i8
store i8 %frombool, i8* @b2, align 1
- %1 = load i8* @b2, align 1
+ %1 = load i8, i8* @b2, align 1
%tobool1 = trunc i8 %1 to i1
%conv = zext i1 %tobool1 to i32
store i32 %conv, i32* @i, align 4
@@ -51,10 +51,10 @@ define void @_Z4uc_iv() {
entry:
; CHECK-LABEL: .ent _Z4uc_iv
- %0 = load i8* @uc1, align 1
+ %0 = load i8, i8* @uc1, align 1
%conv = zext i8 %0 to i32
store i32 %conv, i32* @i, align 4
- %1 = load i8* @uc2, align 1
+ %1 = load i8, i8* @uc2, align 1
%conv1 = zext i8 %1 to i32
; CHECK: lbu $[[REG1:[0-9]+]], 0(${{[0-9]+}})
; CHECK: andi ${{[0-9]+}}, $[[REG1]], 255
@@ -71,10 +71,10 @@ entry:
; mips32r2-LABEL: .ent _Z4sc_iv
; mips32-LABEL: .ent _Z4sc_iv
- %0 = load i8* @sc1, align 1
+ %0 = load i8, i8* @sc1, align 1
%conv = sext i8 %0 to i32
store i32 %conv, i32* @i, align 4
- %1 = load i8* @sc2, align 1
+ %1 = load i8, i8* @sc2, align 1
%conv1 = sext i8 %1 to i32
store i32 %conv1, i32* @j, align 4
; mips32r2: lbu $[[REG1:[0-9]+]], 0(${{[0-9]+}})
@@ -91,10 +91,10 @@ entry:
define void @_Z4us_iv() {
entry:
; CHECK-LABEL: .ent _Z4us_iv
- %0 = load i16* @us1, align 2
+ %0 = load i16, i16* @us1, align 2
%conv = zext i16 %0 to i32
store i32 %conv, i32* @i, align 4
- %1 = load i16* @us2, align 2
+ %1 = load i16, i16* @us2, align 2
%conv1 = zext i16 %1 to i32
store i32 %conv1, i32* @j, align 4
ret void
@@ -109,10 +109,10 @@ entry:
; mips32r2-LABEL: .ent _Z4ss_iv
; mips32=LABEL: .ent _Z4ss_iv
- %0 = load i16* @ss1, align 2
+ %0 = load i16, i16* @ss1, align 2
%conv = sext i16 %0 to i32
store i32 %conv, i32* @i, align 4
- %1 = load i16* @ss2, align 2
+ %1 = load i16, i16* @ss2, align 2
%conv1 = sext i16 %1 to i32
store i32 %conv1, i32* @j, align 4
; mips32r2: lhu $[[REG1:[0-9]+]], 0(${{[0-9]+}})
@@ -129,7 +129,7 @@ entry:
define void @_Z4b_ssv() {
entry:
; CHECK-LABEL: .ent _Z4b_ssv
- %0 = load i8* @b2, align 1
+ %0 = load i8, i8* @b2, align 1
%tobool = trunc i8 %0 to i1
%conv = zext i1 %tobool to i16
store i16 %conv, i16* @ssi, align 2
@@ -143,10 +143,10 @@ entry:
define void @_Z5uc_ssv() {
entry:
; CHECK-LABEL: .ent _Z5uc_ssv
- %0 = load i8* @uc1, align 1
+ %0 = load i8, i8* @uc1, align 1
%conv = zext i8 %0 to i16
store i16 %conv, i16* @ssi, align 2
- %1 = load i8* @uc2, align 1
+ %1 = load i8, i8* @uc2, align 1
%conv1 = zext i8 %1 to i16
; CHECK: lbu $[[REG1:[0-9]+]], 0(${{[0-9]+}})
; CHECK: andi ${{[0-9]+}}, $[[REG1]], 255
@@ -161,10 +161,10 @@ define void @_Z5sc_ssv() {
entry:
; mips32r2-LABEL: .ent _Z5sc_ssv
; mips32-LABEL: .ent _Z5sc_ssv
- %0 = load i8* @sc1, align 1
+ %0 = load i8, i8* @sc1, align 1
%conv = sext i8 %0 to i16
store i16 %conv, i16* @ssi, align 2
- %1 = load i8* @sc2, align 1
+ %1 = load i8, i8* @sc2, align 1
%conv1 = sext i8 %1 to i16
store i16 %conv1, i16* @ssj, align 2
; mips32r2: lbu $[[REG1:[0-9]+]], 0(${{[0-9]+}})
diff --git a/test/CodeGen/Mips/Fast-ISel/loadstrconst.ll b/test/CodeGen/Mips/Fast-ISel/loadstrconst.ll
index 93cf4c15a2f5..9f644ecd1875 100644
--- a/test/CodeGen/Mips/Fast-ISel/loadstrconst.ll
+++ b/test/CodeGen/Mips/Fast-ISel/loadstrconst.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
@@ -9,7 +9,7 @@
; Function Attrs: nounwind
define void @foo() #0 {
entry:
- store i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i8** @s, align 4
+ store i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), i8** @s, align 4
ret void
; CHECK: .ent foo
; CHECK: lw $[[REG1:[0-9]+]], %got($.str)(${{[0-9]+}})
diff --git a/test/CodeGen/Mips/Fast-ISel/logopm.ll b/test/CodeGen/Mips/Fast-ISel/logopm.ll
new file mode 100644
index 000000000000..0f0c3bf9e1dc
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/logopm.ll
@@ -0,0 +1,606 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -fast-isel -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 < %s | FileCheck %s
+
+@ub1 = common global i8 0, align 1
+@ub2 = common global i8 0, align 1
+@ub3 = common global i8 0, align 1
+@uc1 = common global i8 0, align 1
+@uc2 = common global i8 0, align 1
+@uc3 = common global i8 0, align 1
+@us1 = common global i16 0, align 2
+@us2 = common global i16 0, align 2
+@us3 = common global i16 0, align 2
+@ub = common global i8 0, align 1
+@uc = common global i8 0, align 1
+@us = common global i16 0, align 2
+@.str = private unnamed_addr constant [4 x i8] c"%i\0A\00", align 1
+@ui = common global i32 0, align 4
+@ui1 = common global i32 0, align 4
+@ui2 = common global i32 0, align 4
+@ui3 = common global i32 0, align 4
+
+; Function Attrs: noinline nounwind
+define void @andUb() #0 {
+entry:
+ %0 = load i8, i8* @ub1, align 1
+ %1 = load i8, i8* @ub2, align 1
+ %conv0 = trunc i8 %0 to i1
+ %conv1 = trunc i8 %1 to i1
+ %and0 = and i1 %conv1, %conv0
+ %conv3 = zext i1 %and0 to i8
+ store i8 %conv3, i8* @ub, align 1, !tbaa !2
+; CHECK-LABEL: .ent andUb
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[UB_ADDR:[0-9]+]], %got(ub)($[[REG_GP]])
+; CHECK-DAG: lw $[[UB2_ADDR:[0-9]+]], %got(ub2)($[[REG_GP]])
+; CHECK-DAG: lw $[[UB1_ADDR:[0-9]+]], %got(ub1)($[[REG_GP]])
+; CHECK-DAG: lbu $[[UB1:[0-9]+]], 0($[[UB1_ADDR]])
+; CHECK-DAG: lbu $[[UB2:[0-9]+]], 0($[[UB2_ADDR]])
+; CHECK-DAG: and $[[RES1:[0-9]+]], $[[UB2]], $[[UB1]]
+; CHECK: andi $[[RES:[0-9]+]], $[[RES1]], 1
+; CHECK: sb $[[RES]], 0($[[UB_ADDR]])
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @andUb0() #0 {
+entry:
+ %0 = load i8, i8* @ub1, align 1, !tbaa !2
+ %conv = trunc i8 %0 to i1
+ %and = and i1 %conv, 0
+ %conv1 = zext i1 %and to i8
+ store i8 %conv1, i8* @ub, align 1, !tbaa !2
+; CHECK-LABEL: .ent andUb0
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[UB_ADDR:[0-9]+]], %got(ub)($[[REG_GP]])
+; CHECK-DAG: lw $[[UB1_ADDR:[0-9]+]], %got(ub1)($[[REG_GP]])
+; CHECK-DAG: lbu $[[UB1:[0-9]+]], 0($[[UB1_ADDR]])
+; CHECK-DAG: and $[[RES1:[0-9]+]], $[[UB1]], $zero
+; CHECK: andi $[[RES:[0-9]+]], $[[RES1]], 1
+; CHECK: sb $[[RES]], 0($[[UB_ADDR]])
+; CHECK: .end andUb0
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @andUb1() #0 {
+; clang uses i8 constants for booleans, so we test with an i8 1.
+entry:
+ %x = load i8, i8* @ub1, align 1, !tbaa !2
+ %and = and i8 %x, 1
+ %conv = trunc i8 %and to i1
+ %conv1 = zext i1 %conv to i8
+ store i8 %conv1, i8* @ub, align 1, !tbaa !2
+; CHECK-LABEL: .ent andUb1
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[UB_ADDR:[0-9]+]], %got(ub)($[[REG_GP]])
+; CHECK-DAG: addiu $[[CONST:[0-9]+]], $zero, 1
+; CHECK-DAG: lw $[[UB1_ADDR:[0-9]+]], %got(ub1)($[[REG_GP]])
+; CHECK-DAG: lbu $[[UB1:[0-9]+]], 0($[[UB1_ADDR]])
+; CHECK-DAG: and $[[RES1:[0-9]+]], $[[UB1]], $[[CONST]]
+; CHECK: andi $[[RES:[0-9]+]], $[[RES1]], 1
+; CHECK: sb $[[RES]], 0($[[UB_ADDR]])
+; CHECK: .end andUb1
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @orUb() #0 {
+entry:
+ %0 = load i8, i8* @ub1, align 1
+ %1 = load i8, i8* @ub2, align 1
+ %conv0 = trunc i8 %0 to i1
+ %conv1 = trunc i8 %1 to i1
+ %or0 = or i1 %conv1, %conv0
+ %conv3 = zext i1 %or0 to i8
+ store i8 %conv3, i8* @ub, align 1, !tbaa !2
+; CHECK-LABEL: .ent orUb
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[UB_ADDR:[0-9]+]], %got(ub)($[[REG_GP]])
+; CHECK-DAG: lw $[[UB2_ADDR:[0-9]+]], %got(ub2)($[[REG_GP]])
+; CHECK-DAG: lw $[[UB1_ADDR:[0-9]+]], %got(ub1)($[[REG_GP]])
+; CHECK-DAG: lbu $[[UB1:[0-9]+]], 0($[[UB1_ADDR]])
+; CHECK-DAG: lbu $[[UB2:[0-9]+]], 0($[[UB2_ADDR]])
+; CHECK-DAG: or $[[RES1:[0-9]+]], $[[UB2]], $[[UB1]]
+; CHECK: andi $[[RES:[0-9]+]], $[[RES1]], 1
+; CHECK: sb $[[RES]], 0($[[UB_ADDR]])
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @orUb0() #0 {
+entry:
+ %0 = load i8, i8* @ub1, align 1, !tbaa !2
+ %conv = trunc i8 %0 to i1
+ %or = or i1 %conv, 0
+ %conv1 = zext i1 %or to i8
+ store i8 %conv1, i8* @ub, align 1, !tbaa !2
+; CHECK-LABEL: .ent orUb0
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[UB_ADDR:[0-9]+]], %got(ub)($[[REG_GP]])
+; CHECK-DAG: lw $[[UB1_ADDR:[0-9]+]], %got(ub1)($[[REG_GP]])
+; CHECK-DAG: lbu $[[UB1:[0-9]+]], 0($[[UB1_ADDR]])
+; CHECK: andi $[[RES:[0-9]+]], $[[UB1]], 1
+; CHECK: sb $[[RES]], 0($[[UB_ADDR]])
+; CHECK: .end orUb0
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @orUb1() #0 {
+entry:
+ %x = load i8, i8* @ub1, align 1, !tbaa !2
+ %or = or i8 %x, 1
+ %conv = trunc i8 %or to i1
+ %conv1 = zext i1 %conv to i8
+ store i8 %conv1, i8* @ub, align 1, !tbaa !2
+; CHECK-LABEL: .ent orUb1
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[UB_ADDR:[0-9]+]], %got(ub)($[[REG_GP]])
+; CHECK-DAG: addiu $[[CONST:[0-9]+]], $zero, 1
+; CHECK-DAG: lw $[[UB1_ADDR:[0-9]+]], %got(ub1)($[[REG_GP]])
+; CHECK-DAG: lbu $[[UB1:[0-9]+]], 0($[[UB1_ADDR]])
+; CHECK-DAG: or $[[RES1:[0-9]+]], $[[UB1]], $[[CONST]]
+; CHECK: andi $[[RES:[0-9]+]], $[[RES1]], 1
+; CHECK: sb $[[RES]], 0($[[UB_ADDR]])
+; CHECK: .end orUb1
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @xorUb() #0 {
+entry:
+ %0 = load i8, i8* @ub1, align 1
+ %1 = load i8, i8* @ub2, align 1
+ %conv0 = trunc i8 %0 to i1
+ %conv1 = trunc i8 %1 to i1
+ %xor0 = xor i1 %conv1, %conv0
+ %conv3 = zext i1 %xor0 to i8
+ store i8 %conv3, i8* @ub, align 1, !tbaa !2
+; CHECK-LABEL: .ent xorUb
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[UB_ADDR:[0-9]+]], %got(ub)($[[REG_GP]])
+; CHECK-DAG: lw $[[UB2_ADDR:[0-9]+]], %got(ub2)($[[REG_GP]])
+; CHECK-DAG: lw $[[UB1_ADDR:[0-9]+]], %got(ub1)($[[REG_GP]])
+; CHECK-DAG: lbu $[[UB1:[0-9]+]], 0($[[UB1_ADDR]])
+; CHECK-DAG: lbu $[[UB2:[0-9]+]], 0($[[UB2_ADDR]])
+; CHECK-DAG: xor $[[RES1:[0-9]+]], $[[UB2]], $[[UB1]]
+; CHECK: andi $[[RES:[0-9]+]], $[[RES1]], 1
+; CHECK: sb $[[RES]], 0($[[UB_ADDR]])
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @xorUb0() #0 {
+entry:
+ %0 = load i8, i8* @ub1, align 1, !tbaa !2
+ %conv = trunc i8 %0 to i1
+ %xor = xor i1 %conv, 0
+ %conv1 = zext i1 %xor to i8
+ store i8 %conv1, i8* @ub, align 1, !tbaa !2
+; CHECK-LABEL: .ent xorUb0
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[UB_ADDR:[0-9]+]], %got(ub)($[[REG_GP]])
+; CHECK-DAG: lw $[[UB1_ADDR:[0-9]+]], %got(ub1)($[[REG_GP]])
+; CHECK-DAG: lbu $[[UB1:[0-9]+]], 0($[[UB1_ADDR]])
+; CHECK-DAG: xor $[[RES1:[0-9]+]], $[[UB1]], $zero
+; CHECK: andi $[[RES:[0-9]+]], $[[RES1]], 1
+; CHECK: sb $[[RES]], 0($[[UB_ADDR]])
+; CHECK: .end xorUb0
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @xorUb1() #0 {
+entry:
+ %x = load i8, i8* @ub1, align 1, !tbaa !2
+ %xor = xor i8 1, %x
+ %conv = trunc i8 %xor to i1
+ %conv1 = zext i1 %conv to i8
+ store i8 %conv1, i8* @ub, align 1, !tbaa !2
+; CHECK-LABEL: .ent xorUb1
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[UB_ADDR:[0-9]+]], %got(ub)($[[REG_GP]])
+; CHECK-DAG: addiu $[[CONST:[0-9]+]], $zero, 1
+; CHECK-DAG: lw $[[UB1_ADDR:[0-9]+]], %got(ub1)($[[REG_GP]])
+; CHECK-DAG: lbu $[[UB1:[0-9]+]], 0($[[UB1_ADDR]])
+; CHECK-DAG: xor $[[RES1:[0-9]+]], $[[UB1]], $[[CONST]]
+; CHECK: andi $[[RES:[0-9]+]], $[[RES1]], 1
+; CHECK: sb $[[RES]], 0($[[UB_ADDR]])
+; CHECK: .end xorUb1
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @andUc() #0 {
+entry:
+ %0 = load i8, i8* @uc1, align 1, !tbaa !2
+ %1 = load i8, i8* @uc2, align 1, !tbaa !2
+ %and3 = and i8 %1, %0
+ store i8 %and3, i8* @uc, align 1, !tbaa !2
+; CHECK-LABEL: .ent andUc
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[UC_ADDR:[0-9]+]], %got(uc)($[[REG_GP]])
+; CHECK-DAG: lw $[[UC2_ADDR:[0-9]+]], %got(uc2)($[[REG_GP]])
+; CHECK-DAG: lw $[[UC1_ADDR:[0-9]+]], %got(uc1)($[[REG_GP]])
+; CHECK-DAG: lbu $[[UC1:[0-9]+]], 0($[[UC1_ADDR]])
+; CHECK-DAG: lbu $[[UC2:[0-9]+]], 0($[[UC2_ADDR]])
+; CHECK-DAG: and $[[RES:[0-9]+]], $[[UC2]], $[[UB1]]
+; CHECK: sb $[[RES]], 0($[[UC_ADDR]])
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @andUc0() #0 {
+entry:
+ %0 = load i8, i8* @uc1, align 1, !tbaa !2
+ %and = and i8 %0, 67
+ store i8 %and, i8* @uc, align 1, !tbaa !2
+; CHECK-LABEL: .ent andUc0
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[UC_ADDR:[0-9]+]], %got(uc)($[[REG_GP]])
+; CHECK-DAG: lw $[[UC1_ADDR:[0-9]+]], %got(uc1)($[[REG_GP]])
+; CHECK-DAG: lbu $[[UC1:[0-9]+]], 0($[[UC1_ADDR]])
+; CHECK-DAG: addiu $[[CONST_67:[0-9]+]], $zero, 67
+; CHECK-DAG: and $[[RES:[0-9]+]], $[[UC1]], $[[CONST_67]]
+; CHECK: sb $[[RES]], 0($[[UC_ADDR]])
+; CHECK: .end andUc0
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @andUc1() #0 {
+entry:
+ %0 = load i8, i8* @uc1, align 1, !tbaa !2
+ %and = and i8 %0, 167
+ store i8 %and, i8* @uc, align 1, !tbaa !2
+; CHECK-LABEL: .ent andUc1
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[UC_ADDR:[0-9]+]], %got(uc)($[[REG_GP]])
+; CHECK-DAG: lw $[[UC1_ADDR:[0-9]+]], %got(uc1)($[[REG_GP]])
+; CHECK-DAG: lbu $[[UC1:[0-9]+]], 0($[[UC1_ADDR]])
+; CHECK-DAG: addiu $[[CONST_Neg89:[0-9]+]], $zero, -89
+; CHECK-DAG: and $[[RES:[0-9]+]], $[[UC1]], $[[CONST_Neg89]]
+; CHECK: sb $[[RES]], 0($[[UC_ADDR]])
+; CHECK: .end andUc1
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @orUc() #0 {
+entry:
+ %0 = load i8, i8* @uc1, align 1, !tbaa !2
+ %1 = load i8, i8* @uc2, align 1, !tbaa !2
+ %or3 = or i8 %1, %0
+ store i8 %or3, i8* @uc, align 1, !tbaa !2
+; CHECK-LABEL: .ent orUc
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[UC_ADDR:[0-9]+]], %got(uc)($[[REG_GP]])
+; CHECK-DAG: lw $[[UC2_ADDR:[0-9]+]], %got(uc2)($[[REG_GP]])
+; CHECK-DAG: lw $[[UC1_ADDR:[0-9]+]], %got(uc1)($[[REG_GP]])
+; CHECK-DAG: lbu $[[UC1:[0-9]+]], 0($[[UC1_ADDR]])
+; CHECK-DAG: lbu $[[UC2:[0-9]+]], 0($[[UC2_ADDR]])
+; CHECK-DAG: or $[[RES:[0-9]+]], $[[UC2]], $[[UC1]]
+; CHECK: sb $[[RES]], 0($[[UC_ADDR]])
+; CHECK: .end orUc
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @orUc0() #0 {
+entry:
+ %0 = load i8, i8* @uc1, align 1, !tbaa !2
+ %or = or i8 %0, 69
+ store i8 %or, i8* @uc, align 1, !tbaa !2
+; CHECK-LABEL: .ent orUc0
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[UC_ADDR:[0-9]+]], %got(uc)($[[REG_GP]])
+; CHECK-DAG: lw $[[UC1_ADDR:[0-9]+]], %got(uc1)($[[REG_GP]])
+; CHECK-DAG: lbu $[[UC1:[0-9]+]], 0($[[UC1_ADDR]])
+; CHECK-DAG: addiu $[[CONST_69:[0-9]+]], $zero, 69
+; CHECK-DAG: or $[[RES:[0-9]+]], $[[UC1]], $[[CONST_69]]
+; CHECK: sb $[[RES]], 0($[[UC_ADDR]])
+; CHECK: .end orUc0
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @orUc1() #0 {
+entry:
+ %0 = load i8, i8* @uc1, align 1, !tbaa !2
+ %or = or i8 %0, 238
+ store i8 %or, i8* @uc, align 1, !tbaa !2
+; CHECK-LABEL: .ent orUc1
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[UC_ADDR:[0-9]+]], %got(uc)($[[REG_GP]])
+; CHECK-DAG: lw $[[UC1_ADDR:[0-9]+]], %got(uc1)($[[REG_GP]])
+; CHECK-DAG: lbu $[[UC1:[0-9]+]], 0($[[UC1_ADDR]])
+; CHECK-DAG: addiu $[[CONST_neg18:[0-9]+]], $zero, -18
+; CHECK-DAG: or $[[RES:[0-9]+]], $[[UC1]], $[[CONST_neg18]]
+; CHECK: sb $[[RES]], 0($[[UC_ADDR]])
+; CHECK: .end orUc1
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @xorUc() #0 {
+entry:
+ %0 = load i8, i8* @uc1, align 1, !tbaa !2
+ %1 = load i8, i8* @uc2, align 1, !tbaa !2
+ %xor3 = xor i8 %1, %0
+ store i8 %xor3, i8* @uc, align 1, !tbaa !2
+; CHECK-LABEL: .ent xorUc
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[UC_ADDR:[0-9]+]], %got(uc)($[[REG_GP]])
+; CHECK-DAG: lw $[[UC2_ADDR:[0-9]+]], %got(uc2)($[[REG_GP]])
+; CHECK-DAG: lw $[[UC1_ADDR:[0-9]+]], %got(uc1)($[[REG_GP]])
+; CHECK-DAG: lbu $[[UC1:[0-9]+]], 0($[[UC1_ADDR]])
+; CHECK-DAG: lbu $[[UC2:[0-9]+]], 0($[[UC2_ADDR]])
+; CHECK-DAG: xor $[[RES:[0-9]+]], $[[UC2]], $[[UC1]]
+; CHECK: sb $[[RES]], 0($[[UC_ADDR]])
+; CHECK: .end xorUc
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @xorUc0() #0 {
+entry:
+ %0 = load i8, i8* @uc1, align 1, !tbaa !2
+ %xor = xor i8 %0, 23
+ store i8 %xor, i8* @uc, align 1, !tbaa !2
+; CHECK-LABEL: .ent xorUc0
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[UC_ADDR:[0-9]+]], %got(uc)($[[REG_GP]])
+; CHECK-DAG: lw $[[UC1_ADDR:[0-9]+]], %got(uc1)($[[REG_GP]])
+; CHECK-DAG: lbu $[[UC1:[0-9]+]], 0($[[UC1_ADDR]])
+; CHECK-DAG: addiu $[[CONST_23:[0-9]+]], $zero, 23
+; CHECK-DAG: xor $[[RES:[0-9]+]], $[[UC1]], $[[CONST_23]]
+; CHECK: sb $[[RES]], 0($[[UC_ADDR]])
+; CHECK: .end xorUc0
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @xorUc1() #0 {
+entry:
+ %0 = load i8, i8* @uc1, align 1, !tbaa !2
+ %xor = xor i8 %0, 120
+ store i8 %xor, i8* @uc, align 1, !tbaa !2
+; CHECK-LABEL: .ent xorUc1
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[UC_ADDR:[0-9]+]], %got(uc)($[[REG_GP]])
+; CHECK-DAG: lw $[[UC1_ADDR:[0-9]+]], %got(uc1)($[[REG_GP]])
+; CHECK-DAG: lbu $[[UC1:[0-9]+]], 0($[[UC1_ADDR]])
+; CHECK-DAG: addiu $[[CONST_120:[0-9]+]], $zero, 120
+; CHECK-DAG: xor $[[RES:[0-9]+]], $[[UC1]], $[[CONST_120]]
+; CHECK: sb $[[RES]], 0($[[UC_ADDR]])
+; CHECK: .end xorUc1
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @andUs() #0 {
+entry:
+ %0 = load i16, i16* @us1, align 2, !tbaa !5
+ %1 = load i16, i16* @us2, align 2, !tbaa !5
+ %and3 = and i16 %1, %0
+ store i16 %and3, i16* @us, align 2, !tbaa !5
+; CHECK-LABEL: .ent andUs
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[US_ADDR:[0-9]+]], %got(us)($[[REG_GP]])
+; CHECK-DAG: lw $[[US2_ADDR:[0-9]+]], %got(us2)($[[REG_GP]])
+; CHECK-DAG: lw $[[US1_ADDR:[0-9]+]], %got(us1)($[[REG_GP]])
+; CHECK-DAG: lhu $[[US1:[0-9]+]], 0($[[US1_ADDR]])
+; CHECK-DAG: lhu $[[US2:[0-9]+]], 0($[[US2_ADDR]])
+; CHECK-DAG: and $[[RES:[0-9]+]], $[[US2]], $[[UB1]]
+; CHECK: sh $[[RES]], 0($[[US_ADDR]])
+; CHECK: .end andUs
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @andUs0() #0 {
+entry:
+ %0 = load i16, i16* @us1, align 2, !tbaa !5
+ %and = and i16 %0, 4660
+ store i16 %and, i16* @us, align 2, !tbaa !5
+; CHECK-LABEL: .ent andUs0
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[US_ADDR:[0-9]+]], %got(us)($[[REG_GP]])
+; CHECK-DAG: lw $[[US1_ADDR:[0-9]+]], %got(us1)($[[REG_GP]])
+; CHECK-DAG: lhu $[[US1:[0-9]+]], 0($[[US1_ADDR]])
+; CHECK-DAG: addiu $[[CONST_4660:[0-9]+]], $zero, 4660
+; CHECK-DAG: and $[[RES:[0-9]+]], $[[US1]], $[[CONST_4660]]
+; CHECK: sh $[[RES]], 0($[[US_ADDR]])
+; CHECK: .end andUs0
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @andUs1() #0 {
+entry:
+ %0 = load i16, i16* @us1, align 2, !tbaa !5
+ %and = and i16 %0, 61351
+ store i16 %and, i16* @us, align 2, !tbaa !5
+; CHECK-LABEL: .ent andUs1
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[US_ADDR:[0-9]+]], %got(us)($[[REG_GP]])
+; CHECK-DAG: lw $[[US1_ADDR:[0-9]+]], %got(us1)($[[REG_GP]])
+; CHECK-DAG: lhu $[[US1:[0-9]+]], 0($[[US1_ADDR]])
+; CHECK-DAG: addiu $[[CONST_Neg4185:[0-9]+]], $zero, -4185
+; CHECK-DAG: and $[[RES:[0-9]+]], $[[US1]], $[[CONST_Neg4185]]
+; CHECK: sh $[[RES]], 0($[[US_ADDR]])
+; CHECK: .end andUs1
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @orUs() #0 {
+entry:
+ %0 = load i16, i16* @us1, align 2, !tbaa !5
+ %1 = load i16, i16* @us2, align 2, !tbaa !5
+ %or3 = or i16 %1, %0
+ store i16 %or3, i16* @us, align 2, !tbaa !5
+; CHECK-LABEL: .ent orUs
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[US_ADDR:[0-9]+]], %got(us)($[[REG_GP]])
+; CHECK-DAG: lw $[[US2_ADDR:[0-9]+]], %got(us2)($[[REG_GP]])
+; CHECK-DAG: lw $[[US1_ADDR:[0-9]+]], %got(us1)($[[REG_GP]])
+; CHECK-DAG: lhu $[[US1:[0-9]+]], 0($[[US1_ADDR]])
+; CHECK-DAG: lhu $[[US2:[0-9]+]], 0($[[US2_ADDR]])
+; CHECK-DAG: or $[[RES:[0-9]+]], $[[US2]], $[[US1]]
+; CHECK: sh $[[RES]], 0($[[US_ADDR]])
+; CHECK: .end orUs
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @orUs0() #0 {
+entry:
+ %0 = load i16, i16* @us1, align 2, !tbaa !5
+ %or = or i16 %0, 17666
+ store i16 %or, i16* @us, align 2, !tbaa !5
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @orUs1() #0 {
+entry:
+ %0 = load i16, i16* @us1, align 2, !tbaa !5
+ %or = or i16 %0, 60945
+ store i16 %or, i16* @us, align 2, !tbaa !5
+; CHECK-LABEL: .ent orUs1
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[US_ADDR:[0-9]+]], %got(us)($[[REG_GP]])
+; CHECK-DAG: lw $[[US1_ADDR:[0-9]+]], %got(us1)($[[REG_GP]])
+; CHECK-DAG: lhu $[[US1:[0-9]+]], 0($[[US1_ADDR]])
+; CHECK-DAG: addiu $[[CONST_neg4591:[0-9]+]], $zero, -4591
+; CHECK-DAG: or $[[RES:[0-9]+]], $[[US1]], $[[CONST_neg4591]]
+; CHECK: sh $[[RES]], 0($[[US_ADDR]])
+; CHECK: .end orUs1
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @xorUs() #0 {
+entry:
+ %0 = load i16, i16* @us1, align 2, !tbaa !5
+ %1 = load i16, i16* @us2, align 2, !tbaa !5
+ %xor3 = xor i16 %1, %0
+ store i16 %xor3, i16* @us, align 2, !tbaa !5
+; CHECK-LABEL: .ent xorUs
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[US_ADDR:[0-9]+]], %got(us)($[[REG_GP]])
+; CHECK-DAG: lw $[[US2_ADDR:[0-9]+]], %got(us2)($[[REG_GP]])
+; CHECK-DAG: lw $[[US1_ADDR:[0-9]+]], %got(us1)($[[REG_GP]])
+; CHECK-DAG: lhu $[[US1:[0-9]+]], 0($[[US1_ADDR]])
+; CHECK-DAG: lhu $[[US2:[0-9]+]], 0($[[US2_ADDR]])
+; CHECK-DAG: xor $[[RES:[0-9]+]], $[[US2]], $[[US1]]
+; CHECK: sh $[[RES]], 0($[[US_ADDR]])
+; CHECK: .end xorUs
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @xorUs0() #0 {
+entry:
+ %0 = load i16, i16* @us1, align 2, !tbaa !5
+ %xor = xor i16 %0, 6062
+ store i16 %xor, i16* @us, align 2, !tbaa !5
+; CHECK-LABEL: .ent xorUs0
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[US_ADDR:[0-9]+]], %got(us)($[[REG_GP]])
+; CHECK-DAG: lw $[[US1_ADDR:[0-9]+]], %got(us1)($[[REG_GP]])
+; CHECK-DAG: lhu $[[US1:[0-9]+]], 0($[[US1_ADDR]])
+; CHECK-DAG: addiu $[[CONST_6062:[0-9]+]], $zero, 6062
+; CHECK-DAG: xor $[[RES:[0-9]+]], $[[US1]], $[[CONST_6062]]
+; CHECK: sh $[[RES]], 0($[[US_ADDR]])
+; CHECK: .end xorUs0
+
+ ret void
+}
+
+; Function Attrs: noinline nounwind
+define void @xorUs1() #0 {
+entry:
+ %0 = load i16, i16* @us1, align 2, !tbaa !5
+ %xor = xor i16 %0, 60024
+ store i16 %xor, i16* @us, align 2, !tbaa !5
+; CHECK-LABEL: .ent xorUs1
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[US_ADDR:[0-9]+]], %got(us)($[[REG_GP]])
+; CHECK-DAG: lw $[[US1_ADDR:[0-9]+]], %got(us1)($[[REG_GP]])
+; CHECK-DAG: lhu $[[US1:[0-9]+]], 0($[[US1_ADDR]])
+; CHECK-DAG: addiu $[[CONST_Neg5512:[0-9]+]], $zero, -5512
+; CHECK-DAG: xor $[[RES:[0-9]+]], $[[US1]], $[[CONST_Neg5512]]
+; CHECK: sh $[[RES]], 0($[[US_ADDR]])
+; CHECK: .end xorUs1
+ ret void
+}
+
+attributes #0 = { noinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.7.0 (trunk)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"short", !3, i64 0}
diff --git a/test/CodeGen/Mips/Fast-ISel/nullvoid.ll b/test/CodeGen/Mips/Fast-ISel/nullvoid.ll
index c847561d0278..5fa3f13ace4c 100644
--- a/test/CodeGen/Mips/Fast-ISel/nullvoid.ll
+++ b/test/CodeGen/Mips/Fast-ISel/nullvoid.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
; Function Attrs: nounwind
diff --git a/test/CodeGen/Mips/Fast-ISel/overflt.ll b/test/CodeGen/Mips/Fast-ISel/overflt.ll
new file mode 100644
index 000000000000..57f991e23d95
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/overflt.ll
@@ -0,0 +1,64 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
+; RUN: < %s | FileCheck %s
+
+@x = common global [128000 x float] zeroinitializer, align 4
+@y = global float* getelementptr inbounds ([128000 x float], [128000 x float]* @x, i32 0, i32 0), align 4
+@result = common global float 0.000000e+00, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%f \0A\00", align 1
+
+; Function Attrs: nounwind
+define void @foo() {
+entry:
+; CHECK-LABEL: .ent foo
+ %0 = load float*, float** @y, align 4
+ %arrayidx = getelementptr inbounds float, float* %0, i32 64000
+ store float 5.500000e+00, float* %arrayidx, align 4
+; CHECK: lui $[[REG_FPCONST_INT:[0-9]+]], 16560
+; CHECK: mtc1 $[[REG_FPCONST_INT]], $f[[REG_FPCONST:[0-9]+]]
+; CHECK: lw $[[REG_Y_GOT:[0-9]+]], %got(y)(${{[0-9]+}})
+; CHECK: lw $[[REG_Y:[0-9]+]], 0($[[REG_Y_GOT]])
+; CHECK: lui $[[REG_IDX_UPPER:[0-9]+]], 3
+; CHECK: ori $[[REG_IDX:[0-9]+]], $[[REG_IDX_UPPER]], 59392
+; CHECK: addu $[[REG_Y_IDX:[0-9]+]], $[[REG_IDX]], $[[REG_Y]]
+; CHECK: swc1 $f[[REG_FPCONST]], 0($[[REG_Y_IDX]])
+ ret void
+; CHECK-LABEL: .end foo
+}
+
+; Function Attrs: nounwind
+define void @goo() {
+entry:
+; CHECK-LABEL: .ent goo
+ %0 = load float*, float** @y, align 4
+ %arrayidx = getelementptr inbounds float, float* %0, i32 64000
+ %1 = load float, float* %arrayidx, align 4
+ store float %1, float* @result, align 4
+; CHECK-DAG: lw $[[REG_RESULT:[0-9]+]], %got(result)(${{[0-9]+}})
+; CHECK-DAG: lw $[[REG_Y_GOT:[0-9]+]], %got(y)(${{[0-9]+}})
+; CHECK-DAG: lw $[[REG_Y:[0-9]+]], 0($[[REG_Y_GOT]])
+; CHECK-DAG: lui $[[REG_IDX_UPPER:[0-9]+]], 3
+; CHECK-DAG: ori $[[REG_IDX:[0-9]+]], $[[REG_IDX_UPPER]], 59392
+; CHECK-DAG: addu $[[REG_Y_IDX:[0-9]+]], $[[REG_IDX]], $[[REG_Y]]
+; CHECK-DAG: lwc1 $f[[Y_IDX:[0-9]+]], 0($[[REG_Y_IDX]])
+; CHECK-DAG: swc1 $f[[Y_IDX]], 0($[[REG_RESULT]])
+; CHECK-LABEL: .end goo
+ ret void
+}
+
+;
+; Original C code for test.
+;
+;float x[128000];
+;float *y = x;
+;float result;
+
+
+;void foo() {
+; y[64000] = 5.5;
+;}
+
+;void goo() {
+; result = y[64000];
+;}
diff --git a/test/CodeGen/Mips/Fast-ISel/retabi.ll b/test/CodeGen/Mips/Fast-ISel/retabi.ll
new file mode 100644
index 000000000000..03119b827eb6
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/retabi.ll
@@ -0,0 +1,108 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
+; RUN: < %s | FileCheck %s
+
+@i = global i32 75, align 4
+@s = global i16 -345, align 2
+@c = global i8 118, align 1
+@f = global float 0x40BE623360000000, align 4
+@d = global double 1.298330e+03, align 8
+
+; Function Attrs: nounwind
+define i32 @reti() {
+entry:
+; CHECK-LABEL: reti:
+ %0 = load i32, i32* @i, align 4
+ ret i32 %0
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK: lw $[[REG_I_ADDR:[0-9]+]], %got(i)($[[REG_GP]])
+; CHECK: lw $2, 0($[[REG_I_ADDR]])
+; CHECK: jr $ra
+}
+
+; Function Attrs: nounwind
+define i16 @retus() {
+entry:
+; CHECK-LABEL: retus:
+ %0 = load i16, i16* @s, align 2
+ ret i16 %0
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK: lw $[[REG_S_ADDR:[0-9]+]], %got(s)($[[REG_GP]])
+; CHECK: lhu $2, 0($[[REG_S_ADDR]])
+; CHECK: jr $ra
+}
+
+; Function Attrs: nounwind
+define signext i16 @rets() {
+entry:
+; CHECK-LABEL: rets:
+ %0 = load i16, i16* @s, align 2
+ ret i16 %0
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK: lw $[[REG_S_ADDR:[0-9]+]], %got(s)($[[REG_GP]])
+; CHECK: lhu $[[REG_S:[0-9]+]], 0($[[REG_S_ADDR]])
+; CHECK: seh $2, $[[REG_S]]
+; CHECK: jr $ra
+}
+
+; Function Attrs: nounwind
+define i8 @retuc() {
+entry:
+; CHECK-LABEL: retuc:
+ %0 = load i8, i8* @c, align 1
+ ret i8 %0
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK: lw $[[REG_C_ADDR:[0-9]+]], %got(c)($[[REG_GP]])
+; CHECK: lbu $2, 0($[[REG_C_ADDR]])
+; CHECK: jr $ra
+}
+
+; Function Attrs: nounwind
+define signext i8 @retc() {
+entry:
+; CHECK-LABEL: retc:
+ %0 = load i8, i8* @c, align 1
+ ret i8 %0
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK: lw $[[REG_C_ADDR:[0-9]+]], %got(c)($[[REG_GP]])
+; CHECK: lbu $[[REG_C:[0-9]+]], 0($[[REG_C_ADDR]])
+; CHECK: seb $2, $[[REG_C]]
+; CHECK: jr $ra
+}
+
+; Function Attrs: nounwind
+define float @retf() {
+entry:
+; CHECK-LABEL: retf:
+ %0 = load float, float* @f, align 4
+ ret float %0
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK: lw $[[REG_F_ADDR:[0-9]+]], %got(f)($[[REG_GP]])
+; CHECK: lwc1 $f0, 0($[[REG_F_ADDR]])
+; CHECK: jr $ra
+}
+
+; Function Attrs: nounwind
+define double @retd() {
+entry:
+; CHECK-LABEL: retd:
+ %0 = load double, double* @d, align 8
+ ret double %0
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK: lw $[[REG_D_ADDR:[0-9]+]], %got(d)($[[REG_GP]])
+; CHECK: ldc1 $f0, 0($[[REG_D_ADDR]])
+; CHECK: jr $ra
+}
diff --git a/test/CodeGen/Mips/Fast-ISel/shftopm.ll b/test/CodeGen/Mips/Fast-ISel/shftopm.ll
new file mode 100644
index 000000000000..90ddd190be13
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/shftopm.ll
@@ -0,0 +1,122 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel \
+; RUN: -fast-isel-abort=1 -mcpu=mips32r2 < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel \
+; RUN: -fast-isel-abort=1 -mcpu=mips32 < %s | FileCheck %s
+
+@s1 = global i16 -89, align 2
+@s2 = global i16 4, align 2
+@us1 = global i16 -503, align 2
+@us2 = global i16 5, align 2
+@s3 = common global i16 0, align 2
+@us3 = common global i16 0, align 2
+
+define void @sll() {
+entry:
+ %0 = load i16, i16* @s1, align 2
+ %1 = load i16, i16* @s2, align 2
+ %shl = shl i16 %0, %1
+ store i16 %shl, i16* @s3, align 2
+; CHECK-LABEL: sll:
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK-DAG: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK-DAG: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[S3_ADDR:[0-9]+]], %got(s3)($[[REG_GP]])
+; CHECK-DAG: lw $[[S2_ADDR:[0-9]+]], %got(s2)($[[REG_GP]])
+; CHECK-DAG: lw $[[S1_ADDR:[0-9]+]], %got(s1)($[[REG_GP]])
+; CHECK-DAG: lhu $[[S1:[0-9]+]], 0($[[S1_ADDR]])
+; CHECK-DAG: lhu $[[S2:[0-9]+]], 0($[[S2_ADDR]])
+; CHECK: sllv $[[RES:[0-9]+]], $[[S1]], $[[S2]]
+; CHECK: sh $[[RES]], 0($[[S3_ADDR]])
+ ret void
+}
+
+define void @slli() {
+entry:
+ %0 = load i16, i16* @s1, align 2
+ %shl = shl i16 %0, 5
+ store i16 %shl, i16* @s3, align 2
+; CHECK-LABEL: slli:
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK-DAG: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK-DAG: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[S3_ADDR:[0-9]+]], %got(s3)($[[REG_GP]])
+; CHECK-DAG: lw $[[S1_ADDR:[0-9]+]], %got(s1)($[[REG_GP]])
+; CHECK-DAG: lhu $[[S1:[0-9]+]], 0($[[S1_ADDR]])
+; CHECK: sll $[[RES:[0-9]+]], $[[S1]], 5
+; CHECK: sh $[[RES]], 0($[[S3_ADDR]])
+ ret void
+}
+
+define void @srl() {
+entry:
+ %0 = load i16, i16* @us1, align 2
+ %1 = load i16, i16* @us2, align 2
+ %shr = lshr i16 %0, %1
+ store i16 %shr, i16* @us3, align 2
+ ret void
+; CHECK-LABEL: srl:
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK-DAG: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK-DAG: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[US3_ADDR:[0-9]+]], %got(us3)($[[REG_GP]])
+; CHECK-DAG: lw $[[US2_ADDR:[0-9]+]], %got(us2)($[[REG_GP]])
+; CHECK-DAG: lw $[[US1_ADDR:[0-9]+]], %got(us1)($[[REG_GP]])
+; CHECK-DAG: lhu $[[US1:[0-9]+]], 0($[[US1_ADDR]])
+; CHECK-DAG: lhu $[[US2:[0-9]+]], 0($[[US2_ADDR]])
+; CHECK: srlv $[[RES:[0-9]+]], $[[US1]], $[[US2]]
+; CHECK: sh $[[RES]], 0($[[S3_ADDR]])
+}
+
+define void @srli() {
+entry:
+ %0 = load i16, i16* @us1, align 2
+ %shr = lshr i16 %0, 4
+ store i16 %shr, i16* @us3, align 2
+; CHECK-LABEL: srli:
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK-DAG: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK-DAG: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[US3_ADDR:[0-9]+]], %got(us3)($[[REG_GP]])
+; CHECK-DAG: lw $[[US1_ADDR:[0-9]+]], %got(us1)($[[REG_GP]])
+; CHECK-DAG: lhu $[[US1:[0-9]+]], 0($[[US1_ADDR]])
+; CHECK: srl $[[RES:[0-9]+]], $[[US1]], 4
+; CHECK: sh $[[RES]], 0($[[S3_ADDR]])
+ ret void
+}
+
+define void @sra() {
+entry:
+ %0 = load i16, i16* @s1, align 2
+ %1 = load i16, i16* @s2, align 2
+ %shr = ashr i16 %0, %1
+ store i16 %shr, i16* @s3, align 2
+; CHECK-LABEL: sra:
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK-DAG: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK-DAG: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[S3_ADDR:[0-9]+]], %got(s3)($[[REG_GP]])
+; CHECK-DAG: lw $[[S2_ADDR:[0-9]+]], %got(s2)($[[REG_GP]])
+; CHECK-DAG: lw $[[S1_ADDR:[0-9]+]], %got(s1)($[[REG_GP]])
+; CHECK-DAG: lhu $[[S1:[0-9]+]], 0($[[S1_ADDR]])
+; CHECK-DAG: lhu $[[S2:[0-9]+]], 0($[[S2_ADDR]])
+; CHECK: srav $[[RES:[0-9]+]], $[[S1]], $[[S2]]
+; CHECK: sh $[[RES]], 0($[[S3_ADDR]])
+ ret void
+}
+
+define void @srai() {
+entry:
+ %0 = load i16, i16* @s1, align 2
+ %shr = ashr i16 %0, 2
+ store i16 %shr, i16* @s3, align 2
+; CHECK-LABEL: srai:
+; CHECK: lui $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
+; CHECK-DAG: addiu $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
+; CHECK-DAG: addu $[[REG_GP:[0-9]+]], $[[REG_GPb]], $25
+; CHECK-DAG: lw $[[S3_ADDR:[0-9]+]], %got(s3)($[[REG_GP]])
+; CHECK-DAG: lw $[[S1_ADDR:[0-9]+]], %got(s1)($[[REG_GP]])
+; CHECK-DAG: lhu $[[S1:[0-9]+]], 0($[[S1_ADDR]])
+; CHECK: sra $[[RES:[0-9]+]], $[[S1]], 2
+; CHECK: sh $[[RES]], 0($[[S3_ADDR]])
+ ret void
+}
diff --git a/test/CodeGen/Mips/Fast-ISel/shift.ll b/test/CodeGen/Mips/Fast-ISel/shift.ll
index 18fd5ac32d22..df1c82700d59 100644
--- a/test/CodeGen/Mips/Fast-ISel/shift.ll
+++ b/test/CodeGen/Mips/Fast-ISel/shift.ll
@@ -9,7 +9,7 @@ define i32 @main() nounwind uwtable {
entry:
%foo = alloca %struct.s, align 4
%0 = bitcast %struct.s* %foo to i32*
- %bf.load = load i32* %0, align 4
+ %bf.load = load i32, i32* %0, align 4
%bf.lshr = lshr i32 %bf.load, 2
%cmp = icmp ne i32 %bf.lshr, 2
br i1 %cmp, label %if.then, label %if.end
diff --git a/test/CodeGen/Mips/Fast-ISel/simplestore.ll b/test/CodeGen/Mips/Fast-ISel/simplestore.ll
index 83e3f3f24274..bcb198b1a823 100644
--- a/test/CodeGen/Mips/Fast-ISel/simplestore.ll
+++ b/test/CodeGen/Mips/Fast-ISel/simplestore.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
@abcd = external global i32
diff --git a/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll b/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll
index 74723ae1beeb..f4b91d850255 100644
--- a/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll
+++ b/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s -check-prefix=mips32r2
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s -check-prefix=mips32
@f = common global float 0.000000e+00, align 4
diff --git a/test/CodeGen/Mips/Fast-ISel/simplestorei.ll b/test/CodeGen/Mips/Fast-ISel/simplestorei.ll
index 128e1de9cad0..83ccae0b1de5 100644
--- a/test/CodeGen/Mips/Fast-ISel/simplestorei.ll
+++ b/test/CodeGen/Mips/Fast-ISel/simplestorei.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32r2 \
; RUN: < %s | FileCheck %s
-; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort=1 -mcpu=mips32 \
; RUN: < %s | FileCheck %s
@ijk = external global i32
diff --git a/test/CodeGen/Mips/abiflags32.ll b/test/CodeGen/Mips/abiflags32.ll
index e32d4a586ee3..39e2a90151e3 100644
--- a/test/CodeGen/Mips/abiflags32.ll
+++ b/test/CodeGen/Mips/abiflags32.ll
@@ -1,6 +1,6 @@
; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | FileCheck %s
; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 -mattr=fp64 %s -o - | FileCheck -check-prefix=CHECK-64 %s
-; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips64 -mattr=-n64,n32 %s -o - | FileCheck -check-prefix=CHECK-64n %s
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips64 -target-abi n32 %s -o - | FileCheck -check-prefix=CHECK-64n %s
; CHECK: .nan legacy
; We don't emit '.module fp=32' for compatibility with binutils 2.24 which
diff --git a/test/CodeGen/Mips/addi.ll b/test/CodeGen/Mips/addi.ll
index 01d409e521d7..b6af2ee45687 100644
--- a/test/CodeGen/Mips/addi.ll
+++ b/test/CodeGen/Mips/addi.ll
@@ -8,16 +8,16 @@
define void @foo() nounwind {
entry:
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
%add = add nsw i32 %0, 5
store i32 %add, i32* @i, align 4
- %1 = load i32* @j, align 4
+ %1 = load i32, i32* @j, align 4
%sub = sub nsw i32 %1, 5
store i32 %sub, i32* @j, align 4
- %2 = load i32* @k, align 4
+ %2 = load i32, i32* @k, align 4
%add1 = add nsw i32 %2, 10000
store i32 %add1, i32* @k, align 4
- %3 = load i32* @l, align 4
+ %3 = load i32, i32* @l, align 4
%sub2 = sub nsw i32 %3, 10000
store i32 %sub2, i32* @l, align 4
; 16: addiu ${{[0-9]+}}, 5 # 16 bit inst
diff --git a/test/CodeGen/Mips/addressing-mode.ll b/test/CodeGen/Mips/addressing-mode.ll
index ea76dde82dc3..81e062062ecf 100644
--- a/test/CodeGen/Mips/addressing-mode.ll
+++ b/test/CodeGen/Mips/addressing-mode.ll
@@ -20,10 +20,10 @@ for.cond1.preheader:
for.body3:
%s.120 = phi i32 [ %s.022, %for.cond1.preheader ], [ %add7, %for.body3 ]
%j.019 = phi i32 [ 0, %for.cond1.preheader ], [ %add8, %for.body3 ]
- %arrayidx4 = getelementptr inbounds [256 x i32]* %a, i32 %i.021, i32 %j.019
- %0 = load i32* %arrayidx4, align 4
- %arrayidx6 = getelementptr inbounds [256 x i32]* %b, i32 %i.021, i32 %j.019
- %1 = load i32* %arrayidx6, align 4
+ %arrayidx4 = getelementptr inbounds [256 x i32], [256 x i32]* %a, i32 %i.021, i32 %j.019
+ %0 = load i32, i32* %arrayidx4, align 4
+ %arrayidx6 = getelementptr inbounds [256 x i32], [256 x i32]* %b, i32 %i.021, i32 %j.019
+ %1 = load i32, i32* %arrayidx6, align 4
%add = add i32 %0, %s.120
%add7 = add i32 %add, %1
%add8 = add nsw i32 %j.019, %m
diff --git a/test/CodeGen/Mips/align16.ll b/test/CodeGen/Mips/align16.ll
index 689ae8307f57..f385adfaa04c 100644
--- a/test/CodeGen/Mips/align16.ll
+++ b/test/CodeGen/Mips/align16.ll
@@ -15,13 +15,13 @@ entry:
%x = alloca i32, align 8
%zz = alloca i32, align 4
%z = alloca i32, align 4
- %0 = load i32* @i, align 4
- %arrayidx = getelementptr inbounds [512 x i32]* %y, i32 0, i32 10
+ %0 = load i32, i32* @i, align 4
+ %arrayidx = getelementptr inbounds [512 x i32], [512 x i32]* %y, i32 0, i32 10
store i32 %0, i32* %arrayidx, align 4
- %1 = load i32* @i, align 4
+ %1 = load i32, i32* @i, align 4
store i32 %1, i32* %x, align 8
call void @p(i32* %x)
- %arrayidx1 = getelementptr inbounds [512 x i32]* %y, i32 0, i32 10
+ %arrayidx1 = getelementptr inbounds [512 x i32], [512 x i32]* %y, i32 0, i32 10
call void @p(i32* %arrayidx1)
ret void
}
diff --git a/test/CodeGen/Mips/alloca.ll b/test/CodeGen/Mips/alloca.ll
index fc7ef862a328..747a1362161d 100644
--- a/test/CodeGen/Mips/alloca.ll
+++ b/test/CodeGen/Mips/alloca.ll
@@ -9,7 +9,7 @@ entry:
; CHECK: move $4, $[[T0]]
; CHECK: move $4, $[[T2]]
%tmp1 = alloca i8, i32 %size, align 4
- %add.ptr = getelementptr inbounds i8* %tmp1, i32 5
+ %add.ptr = getelementptr inbounds i8, i8* %tmp1, i32 5
store i8 97, i8* %add.ptr, align 1
%tmp4 = alloca i8, i32 %size, align 4
call void @foo2(double 1.000000e+00, double 2.000000e+00, i32 3) nounwind
@@ -39,17 +39,17 @@ entry:
if.then: ; preds = %entry
; CHECK: addiu $4, $[[T0]], 40
- %add.ptr = getelementptr inbounds i8* %tmp1, i32 40
+ %add.ptr = getelementptr inbounds i8, i8* %tmp1, i32 40
%1 = bitcast i8* %add.ptr to i32*
call void @foo3(i32* %1) nounwind
- %arrayidx15.pre = getelementptr inbounds i8* %tmp1, i32 12
+ %arrayidx15.pre = getelementptr inbounds i8, i8* %tmp1, i32 12
%.pre = bitcast i8* %arrayidx15.pre to i32*
br label %if.end
if.else: ; preds = %entry
; CHECK: addiu $4, $[[T0]], 12
- %add.ptr5 = getelementptr inbounds i8* %tmp1, i32 12
+ %add.ptr5 = getelementptr inbounds i8, i8* %tmp1, i32 12
%2 = bitcast i8* %add.ptr5 to i32*
call void @foo3(i32* %2) nounwind
br label %if.end
@@ -59,24 +59,24 @@ if.end: ; preds = %if.else, %if.then
; CHECK: lw $25, %call16(printf)
%.pre-phi = phi i32* [ %2, %if.else ], [ %.pre, %if.then ]
- %tmp7 = load i32* %0, align 4
- %arrayidx9 = getelementptr inbounds i8* %tmp1, i32 4
+ %tmp7 = load i32, i32* %0, align 4
+ %arrayidx9 = getelementptr inbounds i8, i8* %tmp1, i32 4
%3 = bitcast i8* %arrayidx9 to i32*
- %tmp10 = load i32* %3, align 4
- %arrayidx12 = getelementptr inbounds i8* %tmp1, i32 8
+ %tmp10 = load i32, i32* %3, align 4
+ %arrayidx12 = getelementptr inbounds i8, i8* %tmp1, i32 8
%4 = bitcast i8* %arrayidx12 to i32*
- %tmp13 = load i32* %4, align 4
- %tmp16 = load i32* %.pre-phi, align 4
- %arrayidx18 = getelementptr inbounds i8* %tmp1, i32 16
+ %tmp13 = load i32, i32* %4, align 4
+ %tmp16 = load i32, i32* %.pre-phi, align 4
+ %arrayidx18 = getelementptr inbounds i8, i8* %tmp1, i32 16
%5 = bitcast i8* %arrayidx18 to i32*
- %tmp19 = load i32* %5, align 4
- %arrayidx21 = getelementptr inbounds i8* %tmp1, i32 20
+ %tmp19 = load i32, i32* %5, align 4
+ %arrayidx21 = getelementptr inbounds i8, i8* %tmp1, i32 20
%6 = bitcast i8* %arrayidx21 to i32*
- %tmp22 = load i32* %6, align 4
- %arrayidx24 = getelementptr inbounds i8* %tmp1, i32 24
+ %tmp22 = load i32, i32* %6, align 4
+ %arrayidx24 = getelementptr inbounds i8, i8* %tmp1, i32 24
%7 = bitcast i8* %arrayidx24 to i32*
- %tmp25 = load i32* %7, align 4
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str, i32 0, i32 0), i32 %tmp7, i32 %tmp10, i32 %tmp13, i32 %tmp16, i32 %tmp19, i32 %tmp22, i32 %tmp25) nounwind
+ %tmp25 = load i32, i32* %7, align 4
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str, i32 0, i32 0), i32 %tmp7, i32 %tmp10, i32 %tmp13, i32 %tmp16, i32 %tmp19, i32 %tmp22, i32 %tmp25) nounwind
ret i32 0
}
diff --git a/test/CodeGen/Mips/alloca16.ll b/test/CodeGen/Mips/alloca16.ll
index 4f6059878c3b..be8cc740310b 100644
--- a/test/CodeGen/Mips/alloca16.ll
+++ b/test/CodeGen/Mips/alloca16.ll
@@ -12,7 +12,7 @@ define void @temp(i32 %foo) nounwind {
entry:
%foo.addr = alloca i32, align 4
store i32 %foo, i32* %foo.addr, align 4
- %0 = load i32* %foo.addr, align 4
+ %0 = load i32, i32* %foo.addr, align 4
store i32 %0, i32* @t, align 4
ret void
}
@@ -28,46 +28,46 @@ entry:
%sssi = alloca i32, align 4
%ip = alloca i32*, align 4
%sssj = alloca i32, align 4
- %0 = load i32* @iiii, align 4
+ %0 = load i32, i32* @iiii, align 4
store i32 %0, i32* %sssi, align 4
- %1 = load i32* @kkkk, align 4
+ %1 = load i32, i32* @kkkk, align 4
%mul = mul nsw i32 %1, 100
%2 = alloca i8, i32 %mul
%3 = bitcast i8* %2 to i32*
store i32* %3, i32** %ip, align 4
- %4 = load i32* @jjjj, align 4
+ %4 = load i32, i32* @jjjj, align 4
store i32 %4, i32* %sssj, align 4
- %5 = load i32* @jjjj, align 4
- %6 = load i32* @iiii, align 4
- %7 = load i32** %ip, align 4
- %arrayidx = getelementptr inbounds i32* %7, i32 %6
+ %5 = load i32, i32* @jjjj, align 4
+ %6 = load i32, i32* @iiii, align 4
+ %7 = load i32*, i32** %ip, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %7, i32 %6
store i32 %5, i32* %arrayidx, align 4
- %8 = load i32* @kkkk, align 4
- %9 = load i32* @jjjj, align 4
- %10 = load i32** %ip, align 4
- %arrayidx1 = getelementptr inbounds i32* %10, i32 %9
+ %8 = load i32, i32* @kkkk, align 4
+ %9 = load i32, i32* @jjjj, align 4
+ %10 = load i32*, i32** %ip, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %10, i32 %9
store i32 %8, i32* %arrayidx1, align 4
- %11 = load i32* @iiii, align 4
- %12 = load i32* @kkkk, align 4
- %13 = load i32** %ip, align 4
- %arrayidx2 = getelementptr inbounds i32* %13, i32 %12
+ %11 = load i32, i32* @iiii, align 4
+ %12 = load i32, i32* @kkkk, align 4
+ %13 = load i32*, i32** %ip, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %13, i32 %12
store i32 %11, i32* %arrayidx2, align 4
- %14 = load i32** %ip, align 4
- %arrayidx3 = getelementptr inbounds i32* %14, i32 25
- %15 = load i32* %arrayidx3, align 4
+ %14 = load i32*, i32** %ip, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %14, i32 25
+ %15 = load i32, i32* %arrayidx3, align 4
store i32 %15, i32* @riii, align 4
- %16 = load i32** %ip, align 4
- %arrayidx4 = getelementptr inbounds i32* %16, i32 35
- %17 = load i32* %arrayidx4, align 4
+ %16 = load i32*, i32** %ip, align 4
+ %arrayidx4 = getelementptr inbounds i32, i32* %16, i32 35
+ %17 = load i32, i32* %arrayidx4, align 4
store i32 %17, i32* @rjjj, align 4
- %18 = load i32** %ip, align 4
- %arrayidx5 = getelementptr inbounds i32* %18, i32 100
- %19 = load i32* %arrayidx5, align 4
+ %18 = load i32*, i32** %ip, align 4
+ %arrayidx5 = getelementptr inbounds i32, i32* %18, i32 100
+ %19 = load i32, i32* %arrayidx5, align 4
store i32 %19, i32* @rkkk, align 4
- %20 = load i32* @t, align 4
- %21 = load i32** %ip, align 4
- %arrayidx6 = getelementptr inbounds i32* %21, i32 %20
- %22 = load i32* %arrayidx6, align 4
+ %20 = load i32, i32* @t, align 4
+ %21 = load i32*, i32** %ip, align 4
+ %arrayidx6 = getelementptr inbounds i32, i32* %21, i32 %20
+ %22 = load i32, i32* %arrayidx6, align 4
; 16: addiu $sp, -16
call void @temp(i32 %22)
; 16: addiu $sp, 16
diff --git a/test/CodeGen/Mips/analyzebranch.ll b/test/CodeGen/Mips/analyzebranch.ll
index 4b5d09778d79..d5ecaaeddc33 100644
--- a/test/CodeGen/Mips/analyzebranch.ll
+++ b/test/CodeGen/Mips/analyzebranch.ll
@@ -60,7 +60,7 @@ if.then: ; preds = %entry
unreachable
if.end: ; preds = %entry
- tail call void (...)* @f2() nounwind
+ tail call void (...) @f2() nounwind
ret void
}
diff --git a/test/CodeGen/Mips/and1.ll b/test/CodeGen/Mips/and1.ll
index 4ff1204fe7ae..57076a4d4fcf 100644
--- a/test/CodeGen/Mips/and1.ll
+++ b/test/CodeGen/Mips/and1.ll
@@ -6,11 +6,11 @@
define i32 @main() nounwind {
entry:
- %0 = load i32* @x, align 4
- %1 = load i32* @y, align 4
+ %0 = load i32, i32* @x, align 4
+ %1 = load i32, i32* @y, align 4
%and = and i32 %0, %1
; 16: and ${{[0-9]+}}, ${{[0-9]+}}
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 %and)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %and)
ret i32 0
}
diff --git a/test/CodeGen/Mips/atomic.ll b/test/CodeGen/Mips/atomic.ll
index ccfeb00967e3..031cce0b6074 100644
--- a/test/CodeGen/Mips/atomic.ll
+++ b/test/CodeGen/Mips/atomic.ll
@@ -54,7 +54,7 @@ define i32 @AtomicSwap32(i32 signext %newval) nounwind {
entry:
%newval.addr = alloca i32, align 4
store i32 %newval, i32* %newval.addr, align 4
- %tmp = load i32* %newval.addr, align 4
+ %tmp = load i32, i32* %newval.addr, align 4
%0 = atomicrmw xchg i32* @x, i32 %tmp monotonic
ret i32 %0
@@ -74,7 +74,7 @@ define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind {
entry:
%newval.addr = alloca i32, align 4
store i32 %newval, i32* %newval.addr, align 4
- %tmp = load i32* %newval.addr, align 4
+ %tmp = load i32, i32* %newval.addr, align 4
%0 = cmpxchg i32* @x, i32 %oldval, i32 %tmp monotonic monotonic
%1 = extractvalue { i32, i1 } %0, 0
ret i32 %1
@@ -429,7 +429,7 @@ entry:
; FIXME: At the moment, we don't seem to do addr+offset for any atomic load/store.
define i32 @AtomicLoadAdd32_OffGt9Bit(i32 signext %incr) nounwind {
entry:
- %0 = atomicrmw add i32* getelementptr(i32* @x, i32 256), i32 %incr monotonic
+ %0 = atomicrmw add i32* getelementptr(i32, i32* @x, i32 256), i32 %incr monotonic
ret i32 %0
; ALL-LABEL: AtomicLoadAdd32_OffGt9Bit:
diff --git a/test/CodeGen/Mips/atomicops.ll b/test/CodeGen/Mips/atomicops.ll
index c26415233d0b..0ff9f5c22a84 100644
--- a/test/CodeGen/Mips/atomicops.ll
+++ b/test/CodeGen/Mips/atomicops.ll
@@ -18,15 +18,15 @@ entry:
store volatile i32 0, i32* %x, align 4
%0 = atomicrmw add i32* %x, i32 1 seq_cst
%add.i = add nsw i32 %0, 2
- %1 = load volatile i32* %x, align 4
- %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %add.i, i32 %1) nounwind
+ %1 = load volatile i32, i32* %x, align 4
+ %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %add.i, i32 %1) nounwind
%pair = cmpxchg i32* %x, i32 1, i32 2 seq_cst seq_cst
%2 = extractvalue { i32, i1 } %pair, 0
- %3 = load volatile i32* %x, align 4
- %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %2, i32 %3) nounwind
+ %3 = load volatile i32, i32* %x, align 4
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %2, i32 %3) nounwind
%4 = atomicrmw xchg i32* %x, i32 1 seq_cst
- %5 = load volatile i32* %x, align 4
- %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %4, i32 %5) nounwind
+ %5 = load volatile i32, i32* %x, align 4
+ %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %4, i32 %5) nounwind
; 16-LABEL: main:
; 16: lw ${{[0-9]+}}, %call16(__sync_synchronize)(${{[0-9]+}})
; 16: lw ${{[0-9]+}}, %call16(__sync_fetch_and_add_4)(${{[0-9]+}})
diff --git a/test/CodeGen/Mips/beqzc.ll b/test/CodeGen/Mips/beqzc.ll
index 4a294c2d817e..37bece884212 100644
--- a/test/CodeGen/Mips/beqzc.ll
+++ b/test/CodeGen/Mips/beqzc.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=cond-b-short
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=cond-b-short
@i = global i32 0, align 4
@j = common global i32 0, align 4
@@ -6,7 +6,7 @@
; Function Attrs: nounwind optsize
define i32 @main() #0 {
entry:
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
%cmp = icmp eq i32 %0, 0
%. = select i1 %cmp, i32 10, i32 55
store i32 %., i32* @j, align 4
diff --git a/test/CodeGen/Mips/beqzc1.ll b/test/CodeGen/Mips/beqzc1.ll
index 8f929a8e3541..1f5575f099fa 100644
--- a/test/CodeGen/Mips/beqzc1.ll
+++ b/test/CodeGen/Mips/beqzc1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=cond-b-short
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=cond-b-short
@i = global i32 0, align 4
@j = common global i32 0, align 4
@@ -6,7 +6,7 @@
; Function Attrs: nounwind optsize
define i32 @main() #0 {
entry:
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
%cmp = icmp eq i32 %0, 0
br i1 %cmp, label %if.then, label %if.end
diff --git a/test/CodeGen/Mips/biggot.ll b/test/CodeGen/Mips/biggot.ll
index da287eea6fd1..b56ce6ba87b1 100644
--- a/test/CodeGen/Mips/biggot.ll
+++ b/test/CodeGen/Mips/biggot.ll
@@ -20,7 +20,7 @@ entry:
; N64: daddu $[[R3:[0-9]+]], $[[R2]], ${{[a-z0-9]+}}
; N64: ld ${{[0-9]+}}, %call_lo(foo0)($[[R3]])
- %0 = load i32* @v0, align 4
+ %0 = load i32, i32* @v0, align 4
tail call void @foo0(i32 %0) nounwind
ret void
}
diff --git a/test/CodeGen/Mips/blockaddr.ll b/test/CodeGen/Mips/blockaddr.ll
index d6dc7e7b24b0..f74363702af5 100644
--- a/test/CodeGen/Mips/blockaddr.ll
+++ b/test/CodeGen/Mips/blockaddr.ll
@@ -1,9 +1,9 @@
; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-O32
; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-O32
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n32 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N32
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -target-abi n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -target-abi n32 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -target-abi n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -target-abi n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -mattr=+mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-MIPS16-1
; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 -mattr=+mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-MIPS16-2
diff --git a/test/CodeGen/Mips/brconeq.ll b/test/CodeGen/Mips/brconeq.ll
index 613391557efd..f555528bbb64 100644
--- a/test/CodeGen/Mips/brconeq.ll
+++ b/test/CodeGen/Mips/brconeq.ll
@@ -6,8 +6,8 @@
define void @test() nounwind {
entry:
- %0 = load i32* @i, align 4
- %1 = load i32* @j, align 4
+ %0 = load i32, i32* @i, align 4
+ %1 = load i32, i32* @j, align 4
%cmp = icmp eq i32 %0, %1
; 16: cmp ${{[0-9]+}}, ${{[0-9]+}}
; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]]
diff --git a/test/CodeGen/Mips/brconeqk.ll b/test/CodeGen/Mips/brconeqk.ll
index 2c0e72dabd29..59edae82e5ad 100644
--- a/test/CodeGen/Mips/brconeqk.ll
+++ b/test/CodeGen/Mips/brconeqk.ll
@@ -5,7 +5,7 @@
define void @test() nounwind {
entry:
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
%cmp = icmp eq i32 %0, 10
br i1 %cmp, label %if.end, label %if.then
; 16: cmpi ${{[0-9]+}}, {{[0-9]+}}
diff --git a/test/CodeGen/Mips/brconeqz.ll b/test/CodeGen/Mips/brconeqz.ll
index 5586e7b976da..22c566407528 100644
--- a/test/CodeGen/Mips/brconeqz.ll
+++ b/test/CodeGen/Mips/brconeqz.ll
@@ -5,7 +5,7 @@
define void @test() nounwind {
entry:
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
%cmp = icmp eq i32 %0, 0
br i1 %cmp, label %if.end, label %if.then
; 16: beqz ${{[0-9]+}}, $[[LABEL:[0-9A-Ba-b_]+]]
diff --git a/test/CodeGen/Mips/brconge.ll b/test/CodeGen/Mips/brconge.ll
index 02f0a633b313..46d19847d9bc 100644
--- a/test/CodeGen/Mips/brconge.ll
+++ b/test/CodeGen/Mips/brconge.ll
@@ -8,8 +8,8 @@
define void @test() nounwind {
entry:
- %0 = load i32* @i, align 4
- %1 = load i32* @j, align 4
+ %0 = load i32, i32* @i, align 4
+ %1 = load i32, i32* @j, align 4
%cmp = icmp slt i32 %0, %1
br i1 %cmp, label %if.then, label %if.end
@@ -22,7 +22,7 @@ if.then: ; preds = %entry
br label %if.end
if.end: ; preds = %if.then, %entry
- %2 = load i32* @k, align 4
+ %2 = load i32, i32* @k, align 4
%cmp1 = icmp slt i32 %0, %2
br i1 %cmp1, label %if.then2, label %if.end3
diff --git a/test/CodeGen/Mips/brcongt.ll b/test/CodeGen/Mips/brcongt.ll
index 767b51b21b91..cefacb8318b0 100644
--- a/test/CodeGen/Mips/brcongt.ll
+++ b/test/CodeGen/Mips/brcongt.ll
@@ -7,8 +7,8 @@
define void @test() nounwind {
entry:
- %0 = load i32* @i, align 4
- %1 = load i32* @j, align 4
+ %0 = load i32, i32* @i, align 4
+ %1 = load i32, i32* @j, align 4
%cmp = icmp sgt i32 %0, %1
br i1 %cmp, label %if.end, label %if.then
; 16: slt ${{[0-9]+}}, ${{[0-9]+}}
diff --git a/test/CodeGen/Mips/brconle.ll b/test/CodeGen/Mips/brconle.ll
index 854b2481c6e6..e1f15ecb6b92 100644
--- a/test/CodeGen/Mips/brconle.ll
+++ b/test/CodeGen/Mips/brconle.ll
@@ -8,8 +8,8 @@
define void @test() nounwind {
entry:
- %0 = load i32* @j, align 4
- %1 = load i32* @i, align 4
+ %0 = load i32, i32* @j, align 4
+ %1 = load i32, i32* @i, align 4
%cmp = icmp sgt i32 %0, %1
br i1 %cmp, label %if.then, label %if.end
@@ -22,7 +22,7 @@ if.then: ; preds = %entry
br label %if.end
if.end: ; preds = %if.then, %entry
- %2 = load i32* @k, align 4
+ %2 = load i32, i32* @k, align 4
%cmp1 = icmp sgt i32 %1, %2
br i1 %cmp1, label %if.then2, label %if.end3
diff --git a/test/CodeGen/Mips/brconlt.ll b/test/CodeGen/Mips/brconlt.ll
index 931a3e8c7ba4..049f35c393fe 100644
--- a/test/CodeGen/Mips/brconlt.ll
+++ b/test/CodeGen/Mips/brconlt.ll
@@ -7,8 +7,8 @@
define void @test() nounwind {
entry:
- %0 = load i32* @j, align 4
- %1 = load i32* @i, align 4
+ %0 = load i32, i32* @j, align 4
+ %1 = load i32, i32* @i, align 4
%cmp = icmp slt i32 %0, %1
br i1 %cmp, label %if.end, label %if.then
diff --git a/test/CodeGen/Mips/brconne.ll b/test/CodeGen/Mips/brconne.ll
index 5d5bde3fcf91..b260320b94e1 100644
--- a/test/CodeGen/Mips/brconne.ll
+++ b/test/CodeGen/Mips/brconne.ll
@@ -6,8 +6,8 @@
define void @test() nounwind {
entry:
- %0 = load i32* @j, align 4
- %1 = load i32* @i, align 4
+ %0 = load i32, i32* @j, align 4
+ %1 = load i32, i32* @i, align 4
%cmp = icmp eq i32 %0, %1
br i1 %cmp, label %if.then, label %if.end
; 16: cmp ${{[0-9]+}}, ${{[0-9]+}}
diff --git a/test/CodeGen/Mips/brconnek.ll b/test/CodeGen/Mips/brconnek.ll
index 6208d7c5a04b..778a5cce72b3 100644
--- a/test/CodeGen/Mips/brconnek.ll
+++ b/test/CodeGen/Mips/brconnek.ll
@@ -5,7 +5,7 @@
define void @test() nounwind {
entry:
- %0 = load i32* @j, align 4
+ %0 = load i32, i32* @j, align 4
%cmp = icmp eq i32 %0, 5
br i1 %cmp, label %if.then, label %if.end
diff --git a/test/CodeGen/Mips/brconnez.ll b/test/CodeGen/Mips/brconnez.ll
index 47db7901b517..754714b21daf 100644
--- a/test/CodeGen/Mips/brconnez.ll
+++ b/test/CodeGen/Mips/brconnez.ll
@@ -5,7 +5,7 @@
define void @test() nounwind {
entry:
- %0 = load i32* @j, align 4
+ %0 = load i32, i32* @j, align 4
%cmp = icmp eq i32 %0, 0
br i1 %cmp, label %if.then, label %if.end
diff --git a/test/CodeGen/Mips/brdelayslot.ll b/test/CodeGen/Mips/brdelayslot.ll
index 68341c1ba25b..0f46619b8272 100644
--- a/test/CodeGen/Mips/brdelayslot.ll
+++ b/test/CodeGen/Mips/brdelayslot.ll
@@ -54,18 +54,18 @@ declare void @foo4(double)
define void @foo5(i32 %a) nounwind {
entry:
- %0 = load i32* @g2, align 4
+ %0 = load i32, i32* @g2, align 4
%tobool = icmp eq i32 %a, 0
br i1 %tobool, label %if.else, label %if.then
if.then:
- %1 = load i32* @g1, align 4
+ %1 = load i32, i32* @g1, align 4
%add = add nsw i32 %1, %0
store i32 %add, i32* @g1, align 4
br label %if.end
if.else:
- %2 = load i32* @g3, align 4
+ %2 = load i32, i32* @g3, align 4
%sub = sub nsw i32 %2, %0
store i32 %sub, i32* @g3, align 4
br label %if.end
@@ -99,9 +99,9 @@ declare void @foo7(double, float)
define i32 @foo8(i32 %a) nounwind {
entry:
store i32 %a, i32* @g1, align 4
- %0 = load void ()** @foo9, align 4
+ %0 = load void ()*, void ()** @foo9, align 4
tail call void %0() nounwind
- %1 = load i32* @g1, align 4
+ %1 = load i32, i32* @g1, align 4
%add = add nsw i32 %1, %a
ret i32 %add
}
@@ -144,8 +144,8 @@ entry:
for.body: ; preds = %entry, %for.body
%s.06 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds i32* %a, i32 %i.05
- %0 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.05
+ %0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %s.06
%inc = add nsw i32 %i.05, 1
%exitcond = icmp eq i32 %inc, %n
diff --git a/test/CodeGen/Mips/brind.ll b/test/CodeGen/Mips/brind.ll
index 4c591fa1bba1..a3e9b8011a2b 100644
--- a/test/CodeGen/Mips/brind.ll
+++ b/test/CodeGen/Mips/brind.ll
@@ -9,29 +9,29 @@
define i32 @main() nounwind {
entry:
- %puts = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str, i32 0, i32 0))
+ %puts = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str, i32 0, i32 0))
br label %L1
L1: ; preds = %entry, %L3
%i.0 = phi i32 [ 0, %entry ], [ %inc, %L3 ]
- %puts5 = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str5, i32 0, i32 0))
+ %puts5 = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str5, i32 0, i32 0))
br label %L2
L2: ; preds = %L1, %L3
%i.1 = phi i32 [ %i.0, %L1 ], [ %inc, %L3 ]
- %puts6 = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str6, i32 0, i32 0))
+ %puts6 = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str6, i32 0, i32 0))
br label %L3
L3: ; preds = %L2, %L3
%i.2 = phi i32 [ %i.1, %L2 ], [ %inc, %L3 ]
- %puts7 = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str7, i32 0, i32 0))
+ %puts7 = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str7, i32 0, i32 0))
%inc = add i32 %i.2, 1
- %arrayidx = getelementptr inbounds [5 x i8*]* @main.L, i32 0, i32 %i.2
- %0 = load i8** %arrayidx, align 4
+ %arrayidx = getelementptr inbounds [5 x i8*], [5 x i8*]* @main.L, i32 0, i32 %i.2
+ %0 = load i8*, i8** %arrayidx, align 4
indirectbr i8* %0, [label %L1, label %L2, label %L3, label %L4]
; 16: jrc ${{[0-9]+}}
L4: ; preds = %L3
- %puts8 = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str8, i32 0, i32 0))
+ %puts8 = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str8, i32 0, i32 0))
ret i32 0
}
diff --git a/test/CodeGen/Mips/brsize3.ll b/test/CodeGen/Mips/brsize3.ll
index 3620868bb2f6..dad0d841d4c6 100644
--- a/test/CodeGen/Mips/brsize3.ll
+++ b/test/CodeGen/Mips/brsize3.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=b-no-short
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=b-no-short
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=b-long
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=b-long
; ModuleID = 'brsize3.c'
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
diff --git a/test/CodeGen/Mips/brsize3a.ll b/test/CodeGen/Mips/brsize3a.ll
index f05e21191925..e1cd5893ceda 100644
--- a/test/CodeGen/Mips/brsize3a.ll
+++ b/test/CodeGen/Mips/brsize3a.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=b-short
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=b-short
; ModuleID = 'brsize3.c'
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
diff --git a/test/CodeGen/Mips/cache-intrinsic.ll b/test/CodeGen/Mips/cache-intrinsic.ll
index 2fa411589596..987032eaeb89 100644
--- a/test/CodeGen/Mips/cache-intrinsic.ll
+++ b/test/CodeGen/Mips/cache-intrinsic.ll
@@ -10,10 +10,10 @@ define i32 @main() {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0))
- %call1 = call i8* @strcpy(i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds ([25 x i8]* @.str1, i32 0, i32 0)) #3
- call void @llvm.clear_cache(i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds (i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0), i32 32)) #3
- %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0))
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0))
+ %call1 = call i8* @strcpy(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str1, i32 0, i32 0)) #3
+ call void @llvm.clear_cache(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds (i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0), i32 32)) #3
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0))
ret i32 0
}
diff --git a/test/CodeGen/Mips/cconv/arguments-float.ll b/test/CodeGen/Mips/cconv/arguments-float.ll
index 14a3baa7f539..c81c7215e164 100644
--- a/test/CodeGen/Mips/cconv/arguments-float.ll
+++ b/test/CodeGen/Mips/cconv/arguments-float.ll
@@ -1,14 +1,14 @@
-; RUN: llc -march=mips -relocation-model=static -soft-float < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32BE %s
-; RUN: llc -march=mipsel -relocation-model=static -soft-float < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32LE %s
+; RUN: llc -march=mips -relocation-model=static -mattr=+soft-float < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32BE %s
+; RUN: llc -march=mipsel -relocation-model=static -mattr=+soft-float < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32LE %s
-; RUN-TODO: llc -march=mips64 -relocation-model=static -soft-float -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
-; RUN-TODO: llc -march=mips64el -relocation-model=static -soft-float -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=+soft-float -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=+soft-float -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
-; RUN: llc -march=mips64 -relocation-model=static -soft-float -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
-; RUN: llc -march=mips64el -relocation-model=static -soft-float -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
+; RUN: llc -march=mips64 -relocation-model=static -mattr=+soft-float -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=+soft-float -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
-; RUN: llc -march=mips64 -relocation-model=static -soft-float -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
-; RUN: llc -march=mips64el -relocation-model=static -soft-float -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
+; RUN: llc -march=mips64 -relocation-model=static -mattr=+soft-float -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=+soft-float -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
; Test the floating point arguments for all ABI's and byte orders as specified
; by section 5 of MD00305 (MIPS ABIs Described).
@@ -24,23 +24,23 @@
define void @double_args(double %a, double %b, double %c, double %d, double %e,
double %f, double %g, double %h, double %i) nounwind {
entry:
- %0 = getelementptr [11 x double]* @doubles, i32 0, i32 1
+ %0 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 1
store volatile double %a, double* %0
- %1 = getelementptr [11 x double]* @doubles, i32 0, i32 2
+ %1 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 2
store volatile double %b, double* %1
- %2 = getelementptr [11 x double]* @doubles, i32 0, i32 3
+ %2 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 3
store volatile double %c, double* %2
- %3 = getelementptr [11 x double]* @doubles, i32 0, i32 4
+ %3 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 4
store volatile double %d, double* %3
- %4 = getelementptr [11 x double]* @doubles, i32 0, i32 5
+ %4 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 5
store volatile double %e, double* %4
- %5 = getelementptr [11 x double]* @doubles, i32 0, i32 6
+ %5 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 6
store volatile double %f, double* %5
- %6 = getelementptr [11 x double]* @doubles, i32 0, i32 7
+ %6 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 7
store volatile double %g, double* %6
- %7 = getelementptr [11 x double]* @doubles, i32 0, i32 8
+ %7 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 8
store volatile double %h, double* %7
- %8 = getelementptr [11 x double]* @doubles, i32 0, i32 9
+ %8 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 9
store volatile double %i, double* %8
ret void
}
@@ -105,25 +105,25 @@ define void @float_args(float %a, float %b, float %c, float %d, float %e,
float %f, float %g, float %h, float %i, float %j)
nounwind {
entry:
- %0 = getelementptr [11 x float]* @floats, i32 0, i32 1
+ %0 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 1
store volatile float %a, float* %0
- %1 = getelementptr [11 x float]* @floats, i32 0, i32 2
+ %1 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 2
store volatile float %b, float* %1
- %2 = getelementptr [11 x float]* @floats, i32 0, i32 3
+ %2 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 3
store volatile float %c, float* %2
- %3 = getelementptr [11 x float]* @floats, i32 0, i32 4
+ %3 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 4
store volatile float %d, float* %3
- %4 = getelementptr [11 x float]* @floats, i32 0, i32 5
+ %4 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 5
store volatile float %e, float* %4
- %5 = getelementptr [11 x float]* @floats, i32 0, i32 6
+ %5 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 6
store volatile float %f, float* %5
- %6 = getelementptr [11 x float]* @floats, i32 0, i32 7
+ %6 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 7
store volatile float %g, float* %6
- %7 = getelementptr [11 x float]* @floats, i32 0, i32 8
+ %7 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 8
store volatile float %h, float* %7
- %8 = getelementptr [11 x float]* @floats, i32 0, i32 9
+ %8 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 9
store volatile float %i, float* %8
- %9 = getelementptr [11 x float]* @floats, i32 0, i32 10
+ %9 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 10
store volatile float %j, float* %9
ret void
}
@@ -170,9 +170,9 @@ entry:
define void @double_arg2(i8 %a, double %b) nounwind {
entry:
- %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
+ %0 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 1
store volatile i8 %a, i8* %0
- %1 = getelementptr [11 x double]* @doubles, i32 0, i32 1
+ %1 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 1
store volatile double %b, double* %1
ret void
}
@@ -197,9 +197,9 @@ entry:
define void @float_arg2(i8 signext %a, float %b) nounwind {
entry:
- %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
+ %0 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 1
store volatile i8 %a, i8* %0
- %1 = getelementptr [11 x float]* @floats, i32 0, i32 1
+ %1 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 1
store volatile float %b, float* %1
ret void
}
diff --git a/test/CodeGen/Mips/cconv/arguments-fp128.ll b/test/CodeGen/Mips/cconv/arguments-fp128.ll
index c8cd8fd11e50..6c62609396c5 100644
--- a/test/CodeGen/Mips/cconv/arguments-fp128.ll
+++ b/test/CodeGen/Mips/cconv/arguments-fp128.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=mips64 -relocation-model=static -soft-float -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 %s
-; RUN: llc -march=mips64el -relocation-model=static -soft-float -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 %s
+; RUN: llc -march=mips64 -relocation-model=static -mattr=+soft-float -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=+soft-float -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 %s
-; RUN: llc -march=mips64 -relocation-model=static -soft-float -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 %s
-; RUN: llc -march=mips64el -relocation-model=static -soft-float -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 %s
+; RUN: llc -march=mips64 -relocation-model=static -mattr=+soft-float -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 %s
+; RUN: llc -march=mips64el -relocation-model=static -mattr=+soft-float -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 %s
; Test the fp128 arguments for all ABI's and byte orders as specified
; by section 2 of the MIPSpro N32 Handbook.
@@ -13,15 +13,15 @@
define void @ldouble_args(fp128 %a, fp128 %b, fp128 %c, fp128 %d, fp128 %e) nounwind {
entry:
- %0 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 1
+ %0 = getelementptr [11 x fp128], [11 x fp128]* @ldoubles, i32 0, i32 1
store volatile fp128 %a, fp128* %0
- %1 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 2
+ %1 = getelementptr [11 x fp128], [11 x fp128]* @ldoubles, i32 0, i32 2
store volatile fp128 %b, fp128* %1
- %2 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 3
+ %2 = getelementptr [11 x fp128], [11 x fp128]* @ldoubles, i32 0, i32 3
store volatile fp128 %c, fp128* %2
- %3 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 4
+ %3 = getelementptr [11 x fp128], [11 x fp128]* @ldoubles, i32 0, i32 4
store volatile fp128 %d, fp128* %3
- %4 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 5
+ %4 = getelementptr [11 x fp128], [11 x fp128]* @ldoubles, i32 0, i32 5
store volatile fp128 %e, fp128* %4
ret void
}
diff --git a/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll b/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll
index 70ccf14c5450..9f1fe91ec172 100644
--- a/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll
+++ b/test/CodeGen/Mips/cconv/arguments-hard-float-varargs.ll
@@ -1,14 +1,14 @@
; RUN: llc -march=mips -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32BE %s
; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32LE %s
-; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
-; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64 -relocation-model=static -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -relocation-model=static -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
-; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW --check-prefix=NEWBE %s
-; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW --check-prefix=NEWLE %s
+; RUN: llc -march=mips64 -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW --check-prefix=NEWBE %s
+; RUN: llc -march=mips64el -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=N32 --check-prefix=NEW --check-prefix=NEWLE %s
-; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW --check-prefix=NEWBE %s
-; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW --check-prefix=NEWLE %s
+; RUN: llc -march=mips64 -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW --check-prefix=NEWBE %s
+; RUN: llc -march=mips64el -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=N64 --check-prefix=NEW --check-prefix=NEWLE %s
; Test the effect of varargs on floating point types in the non-variable part
; of the argument list as specified by section 2 of the MIPSpro N32 Handbook.
@@ -25,14 +25,14 @@
define void @double_args(double %a, ...)
nounwind {
entry:
- %0 = getelementptr [11 x double]* @doubles, i32 0, i32 1
+ %0 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 1
store volatile double %a, double* %0
%ap = alloca i8*
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap2)
%b = va_arg i8** %ap, double
- %1 = getelementptr [11 x double]* @doubles, i32 0, i32 2
+ %1 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 2
store volatile double %b, double* %1
call void @llvm.va_end(i8* %ap2)
ret void
@@ -90,14 +90,14 @@ entry:
define void @float_args(float %a, ...) nounwind {
entry:
- %0 = getelementptr [11 x float]* @floats, i32 0, i32 1
+ %0 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 1
store volatile float %a, float* %0
%ap = alloca i8*
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap2)
%b = va_arg i8** %ap, float
- %1 = getelementptr [11 x float]* @floats, i32 0, i32 2
+ %1 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 2
store volatile float %b, float* %1
call void @llvm.va_end(i8* %ap2)
ret void
diff --git a/test/CodeGen/Mips/cconv/arguments-hard-float.ll b/test/CodeGen/Mips/cconv/arguments-hard-float.ll
index 9837f7ee5586..24148ed176db 100644
--- a/test/CodeGen/Mips/cconv/arguments-hard-float.ll
+++ b/test/CodeGen/Mips/cconv/arguments-hard-float.ll
@@ -1,14 +1,14 @@
; RUN: llc -march=mips -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32BE %s
; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 --check-prefix=O32LE %s
-; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
-; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64 -relocation-model=static -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -relocation-model=static -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
-; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
-; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
+; RUN: llc -march=mips64 -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
-; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
-; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
+; RUN: llc -march=mips64 -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
; Test the floating point arguments for all ABI's and byte orders as specified
; by section 5 of MD00305 (MIPS ABIs Described).
@@ -24,23 +24,23 @@
define void @double_args(double %a, double %b, double %c, double %d, double %e,
double %f, double %g, double %h, double %i) nounwind {
entry:
- %0 = getelementptr [11 x double]* @doubles, i32 0, i32 1
+ %0 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 1
store volatile double %a, double* %0
- %1 = getelementptr [11 x double]* @doubles, i32 0, i32 2
+ %1 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 2
store volatile double %b, double* %1
- %2 = getelementptr [11 x double]* @doubles, i32 0, i32 3
+ %2 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 3
store volatile double %c, double* %2
- %3 = getelementptr [11 x double]* @doubles, i32 0, i32 4
+ %3 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 4
store volatile double %d, double* %3
- %4 = getelementptr [11 x double]* @doubles, i32 0, i32 5
+ %4 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 5
store volatile double %e, double* %4
- %5 = getelementptr [11 x double]* @doubles, i32 0, i32 6
+ %5 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 6
store volatile double %f, double* %5
- %6 = getelementptr [11 x double]* @doubles, i32 0, i32 7
+ %6 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 7
store volatile double %g, double* %6
- %7 = getelementptr [11 x double]* @doubles, i32 0, i32 8
+ %7 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 8
store volatile double %h, double* %7
- %8 = getelementptr [11 x double]* @doubles, i32 0, i32 9
+ %8 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 9
store volatile double %i, double* %8
ret void
}
@@ -87,23 +87,23 @@ entry:
define void @float_args(float %a, float %b, float %c, float %d, float %e,
float %f, float %g, float %h, float %i) nounwind {
entry:
- %0 = getelementptr [11 x float]* @floats, i32 0, i32 1
+ %0 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 1
store volatile float %a, float* %0
- %1 = getelementptr [11 x float]* @floats, i32 0, i32 2
+ %1 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 2
store volatile float %b, float* %1
- %2 = getelementptr [11 x float]* @floats, i32 0, i32 3
+ %2 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 3
store volatile float %c, float* %2
- %3 = getelementptr [11 x float]* @floats, i32 0, i32 4
+ %3 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 4
store volatile float %d, float* %3
- %4 = getelementptr [11 x float]* @floats, i32 0, i32 5
+ %4 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 5
store volatile float %e, float* %4
- %5 = getelementptr [11 x float]* @floats, i32 0, i32 6
+ %5 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 6
store volatile float %f, float* %5
- %6 = getelementptr [11 x float]* @floats, i32 0, i32 7
+ %6 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 7
store volatile float %g, float* %6
- %7 = getelementptr [11 x float]* @floats, i32 0, i32 8
+ %7 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 8
store volatile float %h, float* %7
- %8 = getelementptr [11 x float]* @floats, i32 0, i32 9
+ %8 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 9
store volatile float %i, float* %8
ret void
}
@@ -153,9 +153,9 @@ entry:
define void @double_arg2(i8 %a, double %b) nounwind {
entry:
- %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
+ %0 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 1
store volatile i8 %a, i8* %0
- %1 = getelementptr [11 x double]* @doubles, i32 0, i32 1
+ %1 = getelementptr [11 x double], [11 x double]* @doubles, i32 0, i32 1
store volatile double %b, double* %1
ret void
}
@@ -184,9 +184,9 @@ entry:
define void @float_arg2(i8 %a, float %b) nounwind {
entry:
- %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
+ %0 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 1
store volatile i8 %a, i8* %0
- %1 = getelementptr [11 x float]* @floats, i32 0, i32 1
+ %1 = getelementptr [11 x float], [11 x float]* @floats, i32 0, i32 1
store volatile float %b, float* %1
ret void
}
diff --git a/test/CodeGen/Mips/cconv/arguments-hard-fp128.ll b/test/CodeGen/Mips/cconv/arguments-hard-fp128.ll
index 5e3f403495f5..26eb569f865d 100644
--- a/test/CodeGen/Mips/cconv/arguments-hard-fp128.ll
+++ b/test/CodeGen/Mips/cconv/arguments-hard-fp128.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 %s
-; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 %s
+; RUN: llc -march=mips64 -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 %s
+; RUN: llc -march=mips64el -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 %s
-; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 %s
-; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 %s
+; RUN: llc -march=mips64 -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 %s
+; RUN: llc -march=mips64el -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 %s
; Test the fp128 arguments for all ABI's and byte orders as specified
; by section 2 of the MIPSpro N32 Handbook.
@@ -13,15 +13,15 @@
define void @ldouble_args(fp128 %a, fp128 %b, fp128 %c, fp128 %d, fp128 %e) nounwind {
entry:
- %0 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 1
+ %0 = getelementptr [11 x fp128], [11 x fp128]* @ldoubles, i32 0, i32 1
store volatile fp128 %a, fp128* %0
- %1 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 2
+ %1 = getelementptr [11 x fp128], [11 x fp128]* @ldoubles, i32 0, i32 2
store volatile fp128 %b, fp128* %1
- %2 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 3
+ %2 = getelementptr [11 x fp128], [11 x fp128]* @ldoubles, i32 0, i32 3
store volatile fp128 %c, fp128* %2
- %3 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 4
+ %3 = getelementptr [11 x fp128], [11 x fp128]* @ldoubles, i32 0, i32 4
store volatile fp128 %d, fp128* %3
- %4 = getelementptr [11 x fp128]* @ldoubles, i32 0, i32 5
+ %4 = getelementptr [11 x fp128], [11 x fp128]* @ldoubles, i32 0, i32 5
store volatile fp128 %e, fp128* %4
ret void
}
diff --git a/test/CodeGen/Mips/cconv/arguments-small-structures-bigger-than-32bits.ll b/test/CodeGen/Mips/cconv/arguments-small-structures-bigger-than-32bits.ll
index d17290e552e0..087a0515f379 100644
--- a/test/CodeGen/Mips/cconv/arguments-small-structures-bigger-than-32bits.ll
+++ b/test/CodeGen/Mips/cconv/arguments-small-structures-bigger-than-32bits.ll
@@ -52,8 +52,8 @@ entry:
%0 = bitcast { i48 }* %s1_1.coerce to i8*
%1 = bitcast %struct.S1* %s1_1 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 6, i32 0, i1 false)
- %2 = getelementptr { i48 }* %s1_1.coerce, i32 0, i32 0
- %3 = load i48* %2, align 1
+ %2 = getelementptr { i48 }, { i48 }* %s1_1.coerce, i32 0, i32 0
+ %3 = load i48, i48* %2, align 1
call void @fS1(i48 inreg %3)
ret void
; ALL-LABEL: f1:
@@ -69,8 +69,8 @@ entry:
%0 = bitcast { i40 }* %s2_1.coerce to i8*
%1 = bitcast %struct.S2* %s2_1 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 5, i32 0, i1 false)
- %2 = getelementptr { i40 }* %s2_1.coerce, i32 0, i32 0
- %3 = load i40* %2, align 1
+ %2 = getelementptr { i40 }, { i40 }* %s2_1.coerce, i32 0, i32 0
+ %3 = load i40, i40* %2, align 1
call void @fS2(i40 inreg %3)
ret void
; ALL-LABEL: f2:
diff --git a/test/CodeGen/Mips/cconv/arguments-struct.ll b/test/CodeGen/Mips/cconv/arguments-struct.ll
index c1bc84ee7a04..ee6bfaeb9537 100644
--- a/test/CodeGen/Mips/cconv/arguments-struct.ll
+++ b/test/CodeGen/Mips/cconv/arguments-struct.ll
@@ -1,14 +1,14 @@
; RUN: llc -mtriple=mips-unknown-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-BE %s
; RUN: llc -mtriple=mipsel-unknown-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-LE %s
-; RUN-TODO: llc -mtriple=mips64-unknown-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-BE %s
-; RUN-TODO: llc -mtriple=mips64el-unknown-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-LE %s
+; RUN-TODO: llc -mtriple=mips64-unknown-linux-gnu -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-BE %s
+; RUN-TODO: llc -mtriple=mips64el-unknown-linux-gnu -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32-LE %s
-; RUN: llc -mtriple=mips64-unknown-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW-BE %s
-; RUN: llc -mtriple=mips64el-unknown-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW-LE %s
+; RUN: llc -mtriple=mips64-unknown-linux-gnu -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW-BE %s
+; RUN: llc -mtriple=mips64el-unknown-linux-gnu -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW-LE %s
-; RUN: llc -mtriple=mips64-unknown-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW-BE %s
-; RUN: llc -mtriple=mips64el-unknown-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW-LE %s
+; RUN: llc -mtriple=mips64-unknown-linux-gnu -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW-BE %s
+; RUN: llc -mtriple=mips64el-unknown-linux-gnu -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW-LE %s
; Test small structures for all ABI's and byte orders.
;
@@ -19,7 +19,7 @@
define void @s_i8(i8 inreg %a) nounwind {
entry:
- store i8 %a, i8* getelementptr inbounds ([2 x i8]* @bytes, i32 0, i32 1)
+ store i8 %a, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @bytes, i32 0, i32 1)
ret void
}
diff --git a/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll
index 458b124c9927..ba3aeb598f50 100644
--- a/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll
+++ b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll
@@ -140,11 +140,11 @@ define void @smallStruct_1b(%struct.SmallStruct_1b* %ss) #0 {
entry:
%ss.addr = alloca %struct.SmallStruct_1b*, align 8
store %struct.SmallStruct_1b* %ss, %struct.SmallStruct_1b** %ss.addr, align 8
- %0 = load %struct.SmallStruct_1b** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_1b*, %struct.SmallStruct_1b** %ss.addr, align 8
%1 = bitcast %struct.SmallStruct_1b* %0 to { i8 }*
- %2 = getelementptr { i8 }* %1, i32 0, i32 0
- %3 = load i8* %2, align 1
- call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i8 inreg %3)
+ %2 = getelementptr { i8 }, { i8 }* %1, i32 0, i32 0
+ %3 = load i8, i8* %2, align 1
+ call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8 inreg %3)
ret void
; CHECK-LABEL: smallStruct_1b:
; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56
@@ -154,11 +154,11 @@ define void @smallStruct_2b(%struct.SmallStruct_2b* %ss) #0 {
entry:
%ss.addr = alloca %struct.SmallStruct_2b*, align 8
store %struct.SmallStruct_2b* %ss, %struct.SmallStruct_2b** %ss.addr, align 8
- %0 = load %struct.SmallStruct_2b** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_2b*, %struct.SmallStruct_2b** %ss.addr, align 8
%1 = bitcast %struct.SmallStruct_2b* %0 to { i16 }*
- %2 = getelementptr { i16 }* %1, i32 0, i32 0
- %3 = load i16* %2, align 1
- call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i16 inreg %3)
+ %2 = getelementptr { i16 }, { i16 }* %1, i32 0, i32 0
+ %3 = load i16, i16* %2, align 1
+ call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i16 inreg %3)
ret void
; CHECK-LABEL: smallStruct_2b:
; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 48
@@ -169,13 +169,13 @@ entry:
%ss.addr = alloca %struct.SmallStruct_3b*, align 8
%.coerce = alloca { i24 }
store %struct.SmallStruct_3b* %ss, %struct.SmallStruct_3b** %ss.addr, align 8
- %0 = load %struct.SmallStruct_3b** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_3b*, %struct.SmallStruct_3b** %ss.addr, align 8
%1 = bitcast { i24 }* %.coerce to i8*
%2 = bitcast %struct.SmallStruct_3b* %0 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 3, i32 0, i1 false)
- %3 = getelementptr { i24 }* %.coerce, i32 0, i32 0
- %4 = load i24* %3, align 1
- call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i24 inreg %4)
+ %3 = getelementptr { i24 }, { i24 }* %.coerce, i32 0, i32 0
+ %4 = load i24, i24* %3, align 1
+ call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i24 inreg %4)
ret void
; CHECK-LABEL: smallStruct_3b:
; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 40
@@ -187,11 +187,11 @@ define void @smallStruct_4b(%struct.SmallStruct_4b* %ss) #0 {
entry:
%ss.addr = alloca %struct.SmallStruct_4b*, align 8
store %struct.SmallStruct_4b* %ss, %struct.SmallStruct_4b** %ss.addr, align 8
- %0 = load %struct.SmallStruct_4b** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_4b*, %struct.SmallStruct_4b** %ss.addr, align 8
%1 = bitcast %struct.SmallStruct_4b* %0 to { i32 }*
- %2 = getelementptr { i32 }* %1, i32 0, i32 0
- %3 = load i32* %2, align 1
- call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 inreg %3)
+ %2 = getelementptr { i32 }, { i32 }* %1, i32 0, i32 0
+ %3 = load i32, i32* %2, align 1
+ call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i32 inreg %3)
ret void
; CHECK-LABEL: smallStruct_4b:
; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 32
@@ -202,13 +202,13 @@ entry:
%ss.addr = alloca %struct.SmallStruct_5b*, align 8
%.coerce = alloca { i40 }
store %struct.SmallStruct_5b* %ss, %struct.SmallStruct_5b** %ss.addr, align 8
- %0 = load %struct.SmallStruct_5b** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_5b*, %struct.SmallStruct_5b** %ss.addr, align 8
%1 = bitcast { i40 }* %.coerce to i8*
%2 = bitcast %struct.SmallStruct_5b* %0 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 5, i32 0, i1 false)
- %3 = getelementptr { i40 }* %.coerce, i32 0, i32 0
- %4 = load i40* %3, align 1
- call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i40 inreg %4)
+ %3 = getelementptr { i40 }, { i40 }* %.coerce, i32 0, i32 0
+ %4 = load i40, i40* %3, align 1
+ call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i40 inreg %4)
ret void
; CHECK-LABEL: smallStruct_5b:
; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 24
@@ -219,13 +219,13 @@ entry:
%ss.addr = alloca %struct.SmallStruct_6b*, align 8
%.coerce = alloca { i48 }
store %struct.SmallStruct_6b* %ss, %struct.SmallStruct_6b** %ss.addr, align 8
- %0 = load %struct.SmallStruct_6b** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_6b*, %struct.SmallStruct_6b** %ss.addr, align 8
%1 = bitcast { i48 }* %.coerce to i8*
%2 = bitcast %struct.SmallStruct_6b* %0 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i32 0, i1 false)
- %3 = getelementptr { i48 }* %.coerce, i32 0, i32 0
- %4 = load i48* %3, align 1
- call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
+ %3 = getelementptr { i48 }, { i48 }* %.coerce, i32 0, i32 0
+ %4 = load i48, i48* %3, align 1
+ call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
ret void
; CHECK-LABEL: smallStruct_6b:
; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 16
@@ -236,13 +236,13 @@ entry:
%ss.addr = alloca %struct.SmallStruct_7b*, align 8
%.coerce = alloca { i56 }
store %struct.SmallStruct_7b* %ss, %struct.SmallStruct_7b** %ss.addr, align 8
- %0 = load %struct.SmallStruct_7b** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_7b*, %struct.SmallStruct_7b** %ss.addr, align 8
%1 = bitcast { i56 }* %.coerce to i8*
%2 = bitcast %struct.SmallStruct_7b* %0 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 7, i32 0, i1 false)
- %3 = getelementptr { i56 }* %.coerce, i32 0, i32 0
- %4 = load i56* %3, align 1
- call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i56 inreg %4)
+ %3 = getelementptr { i56 }, { i56 }* %.coerce, i32 0, i32 0
+ %4 = load i56, i56* %3, align 1
+ call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i56 inreg %4)
ret void
; CHECK-LABEL: smallStruct_7b:
; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 8
@@ -252,11 +252,11 @@ define void @smallStruct_8b(%struct.SmallStruct_8b* %ss) #0 {
entry:
%ss.addr = alloca %struct.SmallStruct_8b*, align 8
store %struct.SmallStruct_8b* %ss, %struct.SmallStruct_8b** %ss.addr, align 8
- %0 = load %struct.SmallStruct_8b** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_8b*, %struct.SmallStruct_8b** %ss.addr, align 8
%1 = bitcast %struct.SmallStruct_8b* %0 to { i64 }*
- %2 = getelementptr { i64 }* %1, i32 0, i32 0
- %3 = load i64* %2, align 1
- call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 inreg %3)
+ %2 = getelementptr { i64 }, { i64 }* %1, i32 0, i32 0
+ %3 = load i64, i64* %2, align 1
+ call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i64 inreg %3)
ret void
; CHECK-LABEL: smallStruct_8b:
; CHECK-NOT: dsll
@@ -267,15 +267,15 @@ entry:
%ss.addr = alloca %struct.SmallStruct_9b*, align 8
%.coerce = alloca { i64, i8 }
store %struct.SmallStruct_9b* %ss, %struct.SmallStruct_9b** %ss.addr, align 8
- %0 = load %struct.SmallStruct_9b** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_9b*, %struct.SmallStruct_9b** %ss.addr, align 8
%1 = bitcast { i64, i8 }* %.coerce to i8*
%2 = bitcast %struct.SmallStruct_9b* %0 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 9, i32 0, i1 false)
- %3 = getelementptr { i64, i8 }* %.coerce, i32 0, i32 0
- %4 = load i64* %3, align 1
- %5 = getelementptr { i64, i8 }* %.coerce, i32 0, i32 1
- %6 = load i8* %5, align 1
- call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 inreg %4, i8 inreg %6)
+ %3 = getelementptr { i64, i8 }, { i64, i8 }* %.coerce, i32 0, i32 0
+ %4 = load i64, i64* %3, align 1
+ %5 = getelementptr { i64, i8 }, { i64, i8 }* %.coerce, i32 0, i32 1
+ %6 = load i8, i8* %5, align 1
+ call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i64 inreg %4, i8 inreg %6)
ret void
; CHECK-LABEL: smallStruct_9b:
; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56
diff --git a/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll
index 899a3e8ff0a1..74d3d859ed75 100644
--- a/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll
+++ b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll
@@ -74,11 +74,11 @@ define void @smallStruct_1b1s(%struct.SmallStruct_1b1s* %ss) #0 {
entry:
%ss.addr = alloca %struct.SmallStruct_1b1s*, align 8
store %struct.SmallStruct_1b1s* %ss, %struct.SmallStruct_1b1s** %ss.addr, align 8
- %0 = load %struct.SmallStruct_1b1s** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_1b1s*, %struct.SmallStruct_1b1s** %ss.addr, align 8
%1 = bitcast %struct.SmallStruct_1b1s* %0 to { i32 }*
- %2 = getelementptr { i32 }* %1, i32 0, i32 0
- %3 = load i32* %2, align 1
- call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 inreg %3)
+ %2 = getelementptr { i32 }, { i32 }* %1, i32 0, i32 0
+ %3 = load i32, i32* %2, align 1
+ call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i32 inreg %3)
ret void
; CHECK-LABEL: smallStruct_1b1s:
; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 32
@@ -88,11 +88,11 @@ define void @smallStruct_1b1i(%struct.SmallStruct_1b1i* %ss) #0 {
entry:
%ss.addr = alloca %struct.SmallStruct_1b1i*, align 8
store %struct.SmallStruct_1b1i* %ss, %struct.SmallStruct_1b1i** %ss.addr, align 8
- %0 = load %struct.SmallStruct_1b1i** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_1b1i*, %struct.SmallStruct_1b1i** %ss.addr, align 8
%1 = bitcast %struct.SmallStruct_1b1i* %0 to { i64 }*
- %2 = getelementptr { i64 }* %1, i32 0, i32 0
- %3 = load i64* %2, align 1
- call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 inreg %3)
+ %2 = getelementptr { i64 }, { i64 }* %1, i32 0, i32 0
+ %3 = load i64, i64* %2, align 1
+ call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i64 inreg %3)
ret void
; CHECK-LABEL: smallStruct_1b1i:
; CHECK-NOT: dsll
@@ -103,13 +103,13 @@ entry:
%ss.addr = alloca %struct.SmallStruct_1b1s1b*, align 8
%.coerce = alloca { i48 }
store %struct.SmallStruct_1b1s1b* %ss, %struct.SmallStruct_1b1s1b** %ss.addr, align 8
- %0 = load %struct.SmallStruct_1b1s1b** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_1b1s1b*, %struct.SmallStruct_1b1s1b** %ss.addr, align 8
%1 = bitcast { i48 }* %.coerce to i8*
%2 = bitcast %struct.SmallStruct_1b1s1b* %0 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i32 0, i1 false)
- %3 = getelementptr { i48 }* %.coerce, i32 0, i32 0
- %4 = load i48* %3, align 1
- call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
+ %3 = getelementptr { i48 }, { i48 }* %.coerce, i32 0, i32 0
+ %4 = load i48, i48* %3, align 1
+ call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
ret void
; CHECK-LABEL: smallStruct_1b1s1b:
; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 16
@@ -121,11 +121,11 @@ define void @smallStruct_1s1i(%struct.SmallStruct_1s1i* %ss) #0 {
entry:
%ss.addr = alloca %struct.SmallStruct_1s1i*, align 8
store %struct.SmallStruct_1s1i* %ss, %struct.SmallStruct_1s1i** %ss.addr, align 8
- %0 = load %struct.SmallStruct_1s1i** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_1s1i*, %struct.SmallStruct_1s1i** %ss.addr, align 8
%1 = bitcast %struct.SmallStruct_1s1i* %0 to { i64 }*
- %2 = getelementptr { i64 }* %1, i32 0, i32 0
- %3 = load i64* %2, align 1
- call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 inreg %3)
+ %2 = getelementptr { i64 }, { i64 }* %1, i32 0, i32 0
+ %3 = load i64, i64* %2, align 1
+ call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i64 inreg %3)
ret void
; CHECK-LABEL: smallStruct_1s1i:
; CHECK-NOT: dsll
@@ -136,13 +136,13 @@ entry:
%ss.addr = alloca %struct.SmallStruct_3b1s*, align 8
%.coerce = alloca { i48 }
store %struct.SmallStruct_3b1s* %ss, %struct.SmallStruct_3b1s** %ss.addr, align 8
- %0 = load %struct.SmallStruct_3b1s** %ss.addr, align 8
+ %0 = load %struct.SmallStruct_3b1s*, %struct.SmallStruct_3b1s** %ss.addr, align 8
%1 = bitcast { i48 }* %.coerce to i8*
%2 = bitcast %struct.SmallStruct_3b1s* %0 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i32 0, i1 false)
- %3 = getelementptr { i48 }* %.coerce, i32 0, i32 0
- %4 = load i48* %3, align 1
- call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
+ %3 = getelementptr { i48 }, { i48 }* %.coerce, i32 0, i32 0
+ %4 = load i48, i48* %3, align 1
+ call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
ret void
; CHECK-LABEL: smallStruct_3b1s:
; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 16
diff --git a/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-multiple-args.ll b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-multiple-args.ll
index 1f7362523346..a4ac5e7bd8a6 100644
--- a/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-multiple-args.ll
+++ b/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-multiple-args.ll
@@ -110,43 +110,43 @@ entry:
store %struct.SmallStruct_1b* %ss7, %struct.SmallStruct_1b** %ss7.addr, align 8
store %struct.SmallStruct_1b* %ss8, %struct.SmallStruct_1b** %ss8.addr, align 8
store %struct.SmallStruct_1b* %ss9, %struct.SmallStruct_1b** %ss9.addr, align 8
- %0 = load %struct.SmallStruct_1b** %ss1.addr, align 8
- %1 = load %struct.SmallStruct_1b** %ss2.addr, align 8
- %2 = load %struct.SmallStruct_1b** %ss3.addr, align 8
- %3 = load %struct.SmallStruct_1b** %ss4.addr, align 8
- %4 = load %struct.SmallStruct_1b** %ss5.addr, align 8
- %5 = load %struct.SmallStruct_1b** %ss6.addr, align 8
- %6 = load %struct.SmallStruct_1b** %ss7.addr, align 8
- %7 = load %struct.SmallStruct_1b** %ss8.addr, align 8
- %8 = load %struct.SmallStruct_1b** %ss9.addr, align 8
+ %0 = load %struct.SmallStruct_1b*, %struct.SmallStruct_1b** %ss1.addr, align 8
+ %1 = load %struct.SmallStruct_1b*, %struct.SmallStruct_1b** %ss2.addr, align 8
+ %2 = load %struct.SmallStruct_1b*, %struct.SmallStruct_1b** %ss3.addr, align 8
+ %3 = load %struct.SmallStruct_1b*, %struct.SmallStruct_1b** %ss4.addr, align 8
+ %4 = load %struct.SmallStruct_1b*, %struct.SmallStruct_1b** %ss5.addr, align 8
+ %5 = load %struct.SmallStruct_1b*, %struct.SmallStruct_1b** %ss6.addr, align 8
+ %6 = load %struct.SmallStruct_1b*, %struct.SmallStruct_1b** %ss7.addr, align 8
+ %7 = load %struct.SmallStruct_1b*, %struct.SmallStruct_1b** %ss8.addr, align 8
+ %8 = load %struct.SmallStruct_1b*, %struct.SmallStruct_1b** %ss9.addr, align 8
%9 = bitcast %struct.SmallStruct_1b* %0 to { i8 }*
- %10 = getelementptr { i8 }* %9, i32 0, i32 0
- %11 = load i8* %10, align 1
+ %10 = getelementptr { i8 }, { i8 }* %9, i32 0, i32 0
+ %11 = load i8, i8* %10, align 1
%12 = bitcast %struct.SmallStruct_1b* %1 to { i8 }*
- %13 = getelementptr { i8 }* %12, i32 0, i32 0
- %14 = load i8* %13, align 1
+ %13 = getelementptr { i8 }, { i8 }* %12, i32 0, i32 0
+ %14 = load i8, i8* %13, align 1
%15 = bitcast %struct.SmallStruct_1b* %2 to { i8 }*
- %16 = getelementptr { i8 }* %15, i32 0, i32 0
- %17 = load i8* %16, align 1
+ %16 = getelementptr { i8 }, { i8 }* %15, i32 0, i32 0
+ %17 = load i8, i8* %16, align 1
%18 = bitcast %struct.SmallStruct_1b* %3 to { i8 }*
- %19 = getelementptr { i8 }* %18, i32 0, i32 0
- %20 = load i8* %19, align 1
+ %19 = getelementptr { i8 }, { i8 }* %18, i32 0, i32 0
+ %20 = load i8, i8* %19, align 1
%21 = bitcast %struct.SmallStruct_1b* %4 to { i8 }*
- %22 = getelementptr { i8 }* %21, i32 0, i32 0
- %23 = load i8* %22, align 1
+ %22 = getelementptr { i8 }, { i8 }* %21, i32 0, i32 0
+ %23 = load i8, i8* %22, align 1
%24 = bitcast %struct.SmallStruct_1b* %5 to { i8 }*
- %25 = getelementptr { i8 }* %24, i32 0, i32 0
- %26 = load i8* %25, align 1
+ %25 = getelementptr { i8 }, { i8 }* %24, i32 0, i32 0
+ %26 = load i8, i8* %25, align 1
%27 = bitcast %struct.SmallStruct_1b* %6 to { i8 }*
- %28 = getelementptr { i8 }* %27, i32 0, i32 0
- %29 = load i8* %28, align 1
+ %28 = getelementptr { i8 }, { i8 }* %27, i32 0, i32 0
+ %29 = load i8, i8* %28, align 1
%30 = bitcast %struct.SmallStruct_1b* %7 to { i8 }*
- %31 = getelementptr { i8 }* %30, i32 0, i32 0
- %32 = load i8* %31, align 1
+ %31 = getelementptr { i8 }, { i8 }* %30, i32 0, i32 0
+ %32 = load i8, i8* %31, align 1
%33 = bitcast %struct.SmallStruct_1b* %8 to { i8 }*
- %34 = getelementptr { i8 }* %33, i32 0, i32 0
- %35 = load i8* %34, align 1
- call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i8 inreg %11, i8 inreg %14, i8 inreg %17, i8 inreg %20, i8 inreg %23, i8 inreg %26, i8 inreg %29, i8 inreg %32, i8 inreg %35)
+ %34 = getelementptr { i8 }, { i8 }* %33, i32 0, i32 0
+ %35 = load i8, i8* %34, align 1
+ call void (i8*, ...) @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8 inreg %11, i8 inreg %14, i8 inreg %17, i8 inreg %20, i8 inreg %23, i8 inreg %26, i8 inreg %29, i8 inreg %32, i8 inreg %35)
ret void
; CHECK-LABEL: smallStruct_1b_x9:
; CHECK: dsll $[[R1:[0-9]+]], $[[R2:[0-9]+]], 56
diff --git a/test/CodeGen/Mips/cconv/arguments-varargs.ll b/test/CodeGen/Mips/cconv/arguments-varargs.ll
index adacda5bc420..af217c92dab8 100644
--- a/test/CodeGen/Mips/cconv/arguments-varargs.ll
+++ b/test/CodeGen/Mips/cconv/arguments-varargs.ll
@@ -1,14 +1,14 @@
; RUN: llc -mtriple=mips-linux -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-BE %s
; RUN: llc -mtriple=mipsel-linux -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-LE %s
-; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64 -relocation-model=static -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -relocation-model=static -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN: llc -mtriple=mips64-linux -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N32 --check-prefix=NEW-BE %s
-; RUN: llc -mtriple=mips64el-linux -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N32 --check-prefix=NEW-LE %s
+; RUN: llc -mtriple=mips64-linux -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N32 --check-prefix=NEW-BE %s
+; RUN: llc -mtriple=mips64el-linux -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N32 --check-prefix=NEW-LE %s
-; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N64 --check-prefix=NEW-BE %s
-; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N64 --check-prefix=NEW-LE %s
+; RUN: llc -march=mips64 -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N64 --check-prefix=NEW-BE %s
+; RUN: llc -march=mips64el -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=NEW --check-prefix=N64 --check-prefix=NEW-LE %s
@hwords = global [3 x i16] zeroinitializer, align 1
@words = global [3 x i32] zeroinitializer, align 1
@@ -119,12 +119,12 @@ entry:
call void asm sideeffect "# ANCHOR1", ""()
%arg1 = va_arg i8** %ap, i16
- %e1 = getelementptr [3 x i16]* @hwords, i32 0, i32 1
+ %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1
store volatile i16 %arg1, i16* %e1, align 2
call void asm sideeffect "# ANCHOR2", ""()
%arg2 = va_arg i8** %ap, i16
- %e2 = getelementptr [3 x i16]* @hwords, i32 0, i32 2
+ %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2
store volatile i16 %arg2, i16* %e2, align 2
call void @llvm.va_end(i8* %ap2)
@@ -237,12 +237,12 @@ entry:
call void asm sideeffect "# ANCHOR1", ""()
%arg1 = va_arg i8** %ap, i32
- %e1 = getelementptr [3 x i32]* @words, i32 0, i32 1
+ %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1
store volatile i32 %arg1, i32* %e1, align 4
call void asm sideeffect "# ANCHOR2", ""()
%arg2 = va_arg i8** %ap, i32
- %e2 = getelementptr [3 x i32]* @words, i32 0, i32 2
+ %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2
store volatile i32 %arg2, i32* %e2, align 4
call void @llvm.va_end(i8* %ap2)
@@ -364,12 +364,12 @@ entry:
call void asm sideeffect "# ANCHOR1", ""()
%arg1 = va_arg i8** %ap, i64
- %e1 = getelementptr [3 x i64]* @dwords, i32 0, i32 1
+ %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1
store volatile i64 %arg1, i64* %e1, align 8
call void asm sideeffect "# ANCHOR2", ""()
%arg2 = va_arg i8** %ap, i64
- %e2 = getelementptr [3 x i64]* @dwords, i32 0, i32 2
+ %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2
store volatile i64 %arg2, i64* %e2, align 8
call void @llvm.va_end(i8* %ap2)
@@ -482,12 +482,12 @@ entry:
call void asm sideeffect "# ANCHOR1", ""()
%arg1 = va_arg i8** %ap, i16
- %e1 = getelementptr [3 x i16]* @hwords, i32 0, i32 1
+ %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1
store volatile i16 %arg1, i16* %e1, align 2
call void asm sideeffect "# ANCHOR2", ""()
%arg2 = va_arg i8** %ap, i16
- %e2 = getelementptr [3 x i16]* @hwords, i32 0, i32 2
+ %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2
store volatile i16 %arg2, i16* %e2, align 2
call void @llvm.va_end(i8* %ap2)
@@ -600,12 +600,12 @@ entry:
call void asm sideeffect "# ANCHOR1", ""()
%arg1 = va_arg i8** %ap, i32
- %e1 = getelementptr [3 x i32]* @words, i32 0, i32 1
+ %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1
store volatile i32 %arg1, i32* %e1, align 4
call void asm sideeffect "# ANCHOR2", ""()
%arg2 = va_arg i8** %ap, i32
- %e2 = getelementptr [3 x i32]* @words, i32 0, i32 2
+ %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2
store volatile i32 %arg2, i32* %e2, align 4
call void @llvm.va_end(i8* %ap2)
@@ -727,12 +727,12 @@ entry:
call void asm sideeffect "# ANCHOR1", ""()
%arg1 = va_arg i8** %ap, i64
- %e1 = getelementptr [3 x i64]* @dwords, i32 0, i32 1
+ %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1
store volatile i64 %arg1, i64* %e1, align 8
call void asm sideeffect "# ANCHOR2", ""()
%arg2 = va_arg i8** %ap, i64
- %e2 = getelementptr [3 x i64]* @dwords, i32 0, i32 2
+ %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2
store volatile i64 %arg2, i64* %e2, align 8
call void @llvm.va_end(i8* %ap2)
@@ -844,12 +844,12 @@ entry:
call void asm sideeffect "# ANCHOR1", ""()
%arg1 = va_arg i8** %ap, i16
- %e1 = getelementptr [3 x i16]* @hwords, i32 0, i32 1
+ %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1
store volatile i16 %arg1, i16* %e1, align 2
call void asm sideeffect "# ANCHOR2", ""()
%arg2 = va_arg i8** %ap, i16
- %e2 = getelementptr [3 x i16]* @hwords, i32 0, i32 2
+ %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2
store volatile i16 %arg2, i16* %e2, align 2
call void @llvm.va_end(i8* %ap2)
@@ -961,12 +961,12 @@ entry:
call void asm sideeffect "# ANCHOR1", ""()
%arg1 = va_arg i8** %ap, i32
- %e1 = getelementptr [3 x i32]* @words, i32 0, i32 1
+ %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1
store volatile i32 %arg1, i32* %e1, align 4
call void asm sideeffect "# ANCHOR2", ""()
%arg2 = va_arg i8** %ap, i32
- %e2 = getelementptr [3 x i32]* @words, i32 0, i32 2
+ %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2
store volatile i32 %arg2, i32* %e2, align 4
call void @llvm.va_end(i8* %ap2)
@@ -1087,12 +1087,12 @@ entry:
call void asm sideeffect "# ANCHOR1", ""()
%arg1 = va_arg i8** %ap, i64
- %e1 = getelementptr [3 x i64]* @dwords, i32 0, i32 1
+ %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1
store volatile i64 %arg1, i64* %e1, align 8
call void asm sideeffect "# ANCHOR2", ""()
%arg2 = va_arg i8** %ap, i64
- %e2 = getelementptr [3 x i64]* @dwords, i32 0, i32 2
+ %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2
store volatile i64 %arg2, i64* %e2, align 8
call void @llvm.va_end(i8* %ap2)
diff --git a/test/CodeGen/Mips/cconv/arguments.ll b/test/CodeGen/Mips/cconv/arguments.ll
index 43da6044408b..430705f8d418 100644
--- a/test/CodeGen/Mips/cconv/arguments.ll
+++ b/test/CodeGen/Mips/cconv/arguments.ll
@@ -1,14 +1,14 @@
; RUN: llc -march=mips -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
-; RUN-TODO: llc -march=mips64 -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
-; RUN-TODO: llc -march=mips64el -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64 -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=O32 %s
-; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
-; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
+; RUN: llc -march=mips64 -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM32 --check-prefix=NEW %s
-; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
-; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
+; RUN: llc -march=mips64 -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
+; RUN: llc -march=mips64el -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=SYM64 --check-prefix=NEW %s
; Test the integer arguments for all ABI's and byte orders as specified by
; section 5 of MD00305 (MIPS ABIs Described).
@@ -28,25 +28,25 @@ define void @align_to_arg_slots(i8 signext %a, i8 signext %b, i8 signext %c,
i8 signext %g, i8 signext %h, i8 signext %i,
i8 signext %j) nounwind {
entry:
- %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
+ %0 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 1
store volatile i8 %a, i8* %0
- %1 = getelementptr [11 x i8]* @bytes, i32 0, i32 2
+ %1 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 2
store volatile i8 %b, i8* %1
- %2 = getelementptr [11 x i8]* @bytes, i32 0, i32 3
+ %2 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 3
store volatile i8 %c, i8* %2
- %3 = getelementptr [11 x i8]* @bytes, i32 0, i32 4
+ %3 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 4
store volatile i8 %d, i8* %3
- %4 = getelementptr [11 x i8]* @bytes, i32 0, i32 5
+ %4 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 5
store volatile i8 %e, i8* %4
- %5 = getelementptr [11 x i8]* @bytes, i32 0, i32 6
+ %5 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 6
store volatile i8 %f, i8* %5
- %6 = getelementptr [11 x i8]* @bytes, i32 0, i32 7
+ %6 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 7
store volatile i8 %g, i8* %6
- %7 = getelementptr [11 x i8]* @bytes, i32 0, i32 8
+ %7 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 8
store volatile i8 %h, i8* %7
- %8 = getelementptr [11 x i8]* @bytes, i32 0, i32 9
+ %8 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 9
store volatile i8 %i, i8* %8
- %9 = getelementptr [11 x i8]* @bytes, i32 0, i32 10
+ %9 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 10
store volatile i8 %j, i8* %9
ret void
}
@@ -95,23 +95,23 @@ define void @slot_skipping(i8 signext %a, i64 signext %b, i8 signext %c,
i8 signext %d, i8 signext %e, i8 signext %f,
i8 signext %g, i64 signext %i, i8 signext %j) nounwind {
entry:
- %0 = getelementptr [11 x i8]* @bytes, i32 0, i32 1
+ %0 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 1
store volatile i8 %a, i8* %0
- %1 = getelementptr [11 x i64]* @dwords, i32 0, i32 1
+ %1 = getelementptr [11 x i64], [11 x i64]* @dwords, i32 0, i32 1
store volatile i64 %b, i64* %1
- %2 = getelementptr [11 x i8]* @bytes, i32 0, i32 2
+ %2 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 2
store volatile i8 %c, i8* %2
- %3 = getelementptr [11 x i8]* @bytes, i32 0, i32 3
+ %3 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 3
store volatile i8 %d, i8* %3
- %4 = getelementptr [11 x i8]* @bytes, i32 0, i32 4
+ %4 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 4
store volatile i8 %e, i8* %4
- %5 = getelementptr [11 x i8]* @bytes, i32 0, i32 5
+ %5 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 5
store volatile i8 %f, i8* %5
- %6 = getelementptr [11 x i8]* @bytes, i32 0, i32 6
+ %6 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 6
store volatile i8 %g, i8* %6
- %7 = getelementptr [11 x i64]* @dwords, i32 0, i32 2
+ %7 = getelementptr [11 x i64], [11 x i64]* @dwords, i32 0, i32 2
store volatile i64 %i, i64* %7
- %8 = getelementptr [11 x i8]* @bytes, i32 0, i32 7
+ %8 = getelementptr [11 x i8], [11 x i8]* @bytes, i32 0, i32 7
store volatile i8 %j, i8* %8
ret void
}
diff --git a/test/CodeGen/Mips/cconv/callee-saved-float.ll b/test/CodeGen/Mips/cconv/callee-saved-float.ll
index de4d9171aec4..c84f0f439c26 100644
--- a/test/CodeGen/Mips/cconv/callee-saved-float.ll
+++ b/test/CodeGen/Mips/cconv/callee-saved-float.ll
@@ -3,20 +3,20 @@
; RUN: llc -march=mips < %s | FileCheck --check-prefix=ALL --check-prefix=O32-INV %s
; RUN: llc -march=mipsel < %s | FileCheck --check-prefix=ALL --check-prefix=O32-INV %s
-; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=O32-INV %s
-; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=O32-INV %s
+; RUN-TODO: llc -march=mips64 -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64 -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=O32-INV %s
+; RUN-TODO: llc -march=mips64el -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=O32-INV %s
-; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
-; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
-; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=N32-INV %s
-; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=N32-INV %s
+; RUN: llc -march=mips64 -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64el -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64 -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=N32-INV %s
+; RUN: llc -march=mips64el -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=N32-INV %s
-; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
-; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
-; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=N64-INV %s
-; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=N64-INV %s
+; RUN: llc -march=mips64 -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64el -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64 -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=N64-INV %s
+; RUN: llc -march=mips64el -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=ALL-INV --check-prefix=N64-INV %s
; Test the the callee-saved registers are callee-saved as specified by section
; 2 of the MIPSpro N32 Handbook and section 3 of the SYSV ABI spec.
diff --git a/test/CodeGen/Mips/cconv/callee-saved.ll b/test/CodeGen/Mips/cconv/callee-saved.ll
index 293e99f0c8e6..d0b1e64cdeea 100644
--- a/test/CodeGen/Mips/cconv/callee-saved.ll
+++ b/test/CodeGen/Mips/cconv/callee-saved.ll
@@ -3,20 +3,20 @@
; RUN: llc -march=mips < %s | FileCheck --check-prefix=ALL --check-prefix=O32-INV %s
; RUN: llc -march=mipsel < %s | FileCheck --check-prefix=ALL --check-prefix=O32-INV %s
-; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32-INV %s
-; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32-INV %s
+; RUN-TODO: llc -march=mips64 -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64 -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32-INV %s
+; RUN-TODO: llc -march=mips64el -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32-INV %s
-; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
-; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
-; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32-INV %s
-; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32-INV %s
+; RUN: llc -march=mips64 -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64el -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64 -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32-INV %s
+; RUN: llc -march=mips64el -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32-INV %s
-; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
-; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
-; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64-INV %s
-; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64-INV %s
+; RUN: llc -march=mips64 -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64el -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64 -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64-INV %s
+; RUN: llc -march=mips64el -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64-INV %s
; Test the the callee-saved registers are callee-saved as specified by section
; 2 of the MIPSpro N32 Handbook and section 3 of the SYSV ABI spec.
diff --git a/test/CodeGen/Mips/cconv/memory-layout.ll b/test/CodeGen/Mips/cconv/memory-layout.ll
index 0c3cc9ecedfe..33a68da157f6 100644
--- a/test/CodeGen/Mips/cconv/memory-layout.ll
+++ b/test/CodeGen/Mips/cconv/memory-layout.ll
@@ -1,14 +1,14 @@
; RUN: llc -march=mips < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
; RUN: llc -march=mipsel < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64 -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
-; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64 -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64el -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
-; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
-; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64 -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64el -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
; Test the memory layout for all ABI's and byte orders as specified by section
; 4 of MD00305 (MIPS ABIs Described).
diff --git a/test/CodeGen/Mips/cconv/reserved-space.ll b/test/CodeGen/Mips/cconv/reserved-space.ll
index b36f89ecc115..23190c2790cc 100644
--- a/test/CodeGen/Mips/cconv/reserved-space.ll
+++ b/test/CodeGen/Mips/cconv/reserved-space.ll
@@ -1,14 +1,14 @@
; RUN: llc -march=mips < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
; RUN: llc -march=mipsel < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64 -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
-; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64 -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64el -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
-; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
-; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64 -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64el -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
; Test that O32 correctly reserved space for the four arguments, even when
; there aren't any as per section 5 of MD00305 (MIPS ABIs Described).
diff --git a/test/CodeGen/Mips/cconv/return-float.ll b/test/CodeGen/Mips/cconv/return-float.ll
index d1a5e4f2fa9d..8eb8c411e081 100644
--- a/test/CodeGen/Mips/cconv/return-float.ll
+++ b/test/CodeGen/Mips/cconv/return-float.ll
@@ -1,14 +1,14 @@
-; RUN: llc -mtriple=mips-linux-gnu -soft-float -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN: llc -mtriple=mipsel-linux-gnu -soft-float -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -mtriple=mips-linux-gnu -mattr=+soft-float -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN: llc -mtriple=mipsel-linux-gnu -mattr=+soft-float -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN-TODO: llc -mtriple=mips64-linux-gnu -soft-float -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN-TODO: llc -mtriple=mips64el-linux-gnu -soft-float -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -mtriple=mips64-linux-gnu -mattr=+soft-float -relocation-model=static -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -mtriple=mips64el-linux-gnu -mattr=+soft-float -relocation-model=static -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN: llc -mtriple=mips64-linux-gnu -soft-float -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
-; RUN: llc -mtriple=mips64el-linux-gnu -soft-float -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -mtriple=mips64-linux-gnu -mattr=+soft-float -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -mattr=+soft-float -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
-; RUN: llc -mtriple=mips64-linux-gnu -soft-float -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
-; RUN: llc -mtriple=mips64el-linux-gnu -soft-float -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -mtriple=mips64-linux-gnu -mattr=+soft-float -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -mattr=+soft-float -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
; Test the float returns for all ABI's and byte orders as specified by
; section 5 of MD00305 (MIPS ABIs Described).
@@ -21,7 +21,7 @@
define float @retfloat() nounwind {
entry:
- %0 = load volatile float* @float
+ %0 = load volatile float, float* @float
ret float %0
}
@@ -35,7 +35,7 @@ entry:
define double @retdouble() nounwind {
entry:
- %0 = load volatile double* @double
+ %0 = load volatile double, double* @double
ret double %0
}
diff --git a/test/CodeGen/Mips/cconv/return-hard-float.ll b/test/CodeGen/Mips/cconv/return-hard-float.ll
index 123b499185a9..14853c8ca6f7 100644
--- a/test/CodeGen/Mips/cconv/return-hard-float.ll
+++ b/test/CodeGen/Mips/cconv/return-hard-float.ll
@@ -1,14 +1,14 @@
; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
; RUN: llc -mtriple=mipsel-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN-TODO: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN-TODO: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -mtriple=mips64-linux-gnu -relocation-model=static -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -mtriple=mips64el-linux-gnu -relocation-model=static -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
-; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
-; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
-; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static -mattr=+o32,+fp64 < %s | FileCheck --check-prefix=ALL --check-prefix=032FP64 %s
; RUN: llc -mtriple=mipsel-linux-gnu -relocation-model=static -mattr=+o32,+fp64 < %s | FileCheck --check-prefix=ALL --check-prefix=032FP64 %s
@@ -24,7 +24,7 @@
define float @retfloat() nounwind {
entry:
- %0 = load volatile float* @float
+ %0 = load volatile float, float* @float
ret float %0
}
@@ -38,7 +38,7 @@ entry:
define double @retdouble() nounwind {
entry:
- %0 = load volatile double* @double
+ %0 = load volatile double, double* @double
ret double %0
}
@@ -50,7 +50,7 @@ entry:
define { double, double } @retComplexDouble() #0 {
%retval = alloca { double, double }, align 8
- %1 = load { double, double }* %retval
+ %1 = load { double, double }, { double, double }* %retval
ret { double, double } %1
}
diff --git a/test/CodeGen/Mips/cconv/return-hard-fp128.ll b/test/CodeGen/Mips/cconv/return-hard-fp128.ll
index 0da59efddd6c..34e9647acddd 100644
--- a/test/CodeGen/Mips/cconv/return-hard-fp128.ll
+++ b/test/CodeGen/Mips/cconv/return-hard-fp128.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
-; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64 -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64el -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
-; RUN: llc -march=mips64 -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
-; RUN: llc -march=mips64el -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64 -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64el -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
; Test the fp128 returns for N32/N64 and all byte orders as specified by
; section 5 of MD00305 (MIPS ABIs Described).
@@ -13,7 +13,7 @@
define fp128 @retldouble() nounwind {
entry:
- %0 = load volatile fp128* @fp128
+ %0 = load volatile fp128, fp128* @fp128
ret fp128 %0
}
diff --git a/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll b/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
index 2e8447710281..c4c8f10ca3b4 100644
--- a/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
+++ b/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
@@ -1,8 +1,8 @@
-; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
-; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
-; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
-; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
; Test return of {fp128} agrees with de-facto N32/N64 ABI.
@@ -10,7 +10,7 @@
define inreg {fp128} @ret_struct_fp128() nounwind {
entry:
- %0 = load volatile {fp128}* @struct_fp128
+ %0 = load volatile {fp128}, {fp128}* @struct_fp128
ret {fp128} %0
}
diff --git a/test/CodeGen/Mips/cconv/return-struct.ll b/test/CodeGen/Mips/cconv/return-struct.ll
index 11a8cf032148..8decd04f089b 100644
--- a/test/CodeGen/Mips/cconv/return-struct.ll
+++ b/test/CodeGen/Mips/cconv/return-struct.ll
@@ -1,14 +1,14 @@
; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-BE %s
; RUN: llc -mtriple=mipsel-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-LE %s
-; RUN-TODO: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN-TODO: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -mtriple=mips64-linux-gnu -relocation-model=static -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -mtriple=mips64el-linux-gnu -relocation-model=static -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 --check-prefix=N32-BE %s
-; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 --check-prefix=N32-LE %s
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 --check-prefix=N32-BE %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 --check-prefix=N32-LE %s
-; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 --check-prefix=N64-BE %s
-; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 --check-prefix=N64-LE %s
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 --check-prefix=N64-BE %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 --check-prefix=N64-LE %s
; Test struct returns for all ABI's and byte orders.
@@ -22,7 +22,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i
define inreg {i8} @ret_struct_i8() nounwind {
entry:
- %0 = load volatile {i8}* @struct_byte
+ %0 = load volatile {i8}, {i8}* @struct_byte
ret {i8} %0
}
@@ -52,9 +52,9 @@ define inreg {i16} @ret_struct_i16() nounwind {
entry:
%retval = alloca {i8,i8}, align 1
%0 = bitcast {i8,i8}* %retval to i8*
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds ({i8,i8}* @struct_2byte, i32 0, i32 0), i64 2, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds ({i8,i8}, {i8,i8}* @struct_2byte, i32 0, i32 0), i64 2, i32 1, i1 false)
%1 = bitcast {i8,i8}* %retval to {i16}*
- %2 = load volatile {i16}* %1
+ %2 = load volatile {i16}, {i16}* %1
ret {i16} %2
}
@@ -91,7 +91,7 @@ entry:
; missed by the CCPromoteToType and the shift didn't happen.
define inreg {i48} @ret_struct_3xi16() nounwind {
entry:
- %0 = load volatile i48* bitcast ({[3 x i16]}* @struct_3xi16 to i48*), align 2
+ %0 = load volatile i48, i48* bitcast ({[3 x i16]}* @struct_3xi16 to i48*), align 2
%1 = insertvalue {i48} undef, i48 %0, 0
ret {i48} %1
}
@@ -174,7 +174,7 @@ entry:
; This time we let the backend lower the sret argument.
define {[6 x i32]} @ret_struct_6xi32() {
entry:
- %0 = load volatile {[6 x i32]}* @struct_6xi32, align 2
+ %0 = load volatile {[6 x i32]}, {[6 x i32]}* @struct_6xi32, align 2
ret {[6 x i32]} %0
}
diff --git a/test/CodeGen/Mips/cconv/return.ll b/test/CodeGen/Mips/cconv/return.ll
index 63f9b5f45a18..a53767275434 100644
--- a/test/CodeGen/Mips/cconv/return.ll
+++ b/test/CodeGen/Mips/cconv/return.ll
@@ -1,14 +1,14 @@
; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
; RUN: llc -mtriple=mipsel-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN-TODO: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN-TODO: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -mtriple=mips64-linux-gnu -relocation-model=static -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -mtriple=mips64el-linux-gnu -relocation-model=static -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
-; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
-; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
-; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
; Test the integer returns for all ABI's and byte orders as specified by
; section 5 of MD00305 (MIPS ABIs Described).
@@ -24,7 +24,7 @@
define i8 @reti8() nounwind {
entry:
- %0 = load volatile i8* @byte
+ %0 = load volatile i8, i8* @byte
ret i8 %0
}
@@ -38,7 +38,7 @@ entry:
define i32 @reti32() nounwind {
entry:
- %0 = load volatile i32* @word
+ %0 = load volatile i32, i32* @word
ret i32 %0
}
@@ -52,7 +52,7 @@ entry:
define i64 @reti64() nounwind {
entry:
- %0 = load volatile i64* @dword
+ %0 = load volatile i64, i64* @dword
ret i64 %0
}
diff --git a/test/CodeGen/Mips/cconv/stack-alignment.ll b/test/CodeGen/Mips/cconv/stack-alignment.ll
index 834033bc8da5..f21bc3066f72 100644
--- a/test/CodeGen/Mips/cconv/stack-alignment.ll
+++ b/test/CodeGen/Mips/cconv/stack-alignment.ll
@@ -1,14 +1,14 @@
; RUN: llc -march=mips < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
; RUN: llc -march=mipsel < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN-TODO: llc -march=mips64 -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN-TODO: llc -march=mips64el -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64 -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -march=mips64el -target-abi o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
-; RUN: llc -march=mips64 -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
-; RUN: llc -march=mips64el -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64 -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
+; RUN: llc -march=mips64el -target-abi n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 %s
-; RUN: llc -march=mips64 -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
-; RUN: llc -march=mips64el -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64 -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
+; RUN: llc -march=mips64el -target-abi n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 %s
; Test the stack alignment for all ABI's and byte orders as specified by
; section 5 of MD00305 (MIPS ABIs Described).
diff --git a/test/CodeGen/Mips/cfi_offset.ll b/test/CodeGen/Mips/cfi_offset.ll
index e23855bd65d2..97233328fd55 100644
--- a/test/CodeGen/Mips/cfi_offset.ll
+++ b/test/CodeGen/Mips/cfi_offset.ll
@@ -32,9 +32,9 @@ define void @bar() {
; CHECK: .cfi_offset 31, -20
; CHECK: .cfi_offset 16, -24
- %val1 = load volatile double* @var
- %val2 = load volatile double* @var
- call void (...)* @foo() nounwind
+ %val1 = load volatile double, double* @var
+ %val2 = load volatile double, double* @var
+ call void (...) @foo() nounwind
store volatile double %val1, double* @var
store volatile double %val2, double* @var
ret void
diff --git a/test/CodeGen/Mips/check-adde-redundant-moves.ll b/test/CodeGen/Mips/check-adde-redundant-moves.ll
index 527c21770263..7bc63a494ac7 100644
--- a/test/CodeGen/Mips/check-adde-redundant-moves.ll
+++ b/test/CodeGen/Mips/check-adde-redundant-moves.ll
@@ -4,12 +4,18 @@
; RUN: -check-prefix=ALL -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r5 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP32
; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s -check-prefix=ALL
; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s -check-prefix=ALL
; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s -check-prefix=ALL
; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL
+; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s -check-prefix=ALL
+; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s -check-prefix=ALL
; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s -check-prefix=ALL
define i64 @add_i64(i64 %a) {
diff --git a/test/CodeGen/Mips/ci2.ll b/test/CodeGen/Mips/ci2.ll
index e2068fdf14e1..4687748879ac 100644
--- a/test/CodeGen/Mips/ci2.ll
+++ b/test/CodeGen/Mips/ci2.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands < %s | FileCheck %s -check-prefix=constisle
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands < %s | FileCheck %s -check-prefix=constisle
@i = common global i32 0, align 4
@b = common global i32 0, align 4
@@ -8,7 +8,7 @@
define void @foo() #0 {
entry:
store i32 305419896, i32* @i, align 4
- %0 = load i32* @b, align 4
+ %0 = load i32, i32* @b, align 4
%tobool = icmp ne i32 %0, 0
br i1 %tobool, label %if.then, label %if.else
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
index b12c2df97c19..a8008a2cb29f 100755
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@@ -41,8 +41,8 @@
define i32* @cmov1(i32 signext %s) nounwind readonly {
entry:
%tobool = icmp ne i32 %s, 0
- %tmp1 = load i32** @i3, align 4
- %cond = select i1 %tobool, i32* getelementptr inbounds ([3 x i32]* @i1, i32 0, i32 0), i32* %tmp1
+ %tmp1 = load i32*, i32** @i3, align 4
+ %cond = select i1 %tobool, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @i1, i32 0, i32 0), i32* %tmp1
ret i32* %cond
}
@@ -81,8 +81,8 @@ entry:
define i32 @cmov2(i32 signext %s) nounwind readonly {
entry:
%tobool = icmp ne i32 %s, 0
- %tmp1 = load i32* @c, align 4
- %tmp2 = load i32* @d, align 4
+ %tmp1 = load i32, i32* @c, align 4
+ %tmp2 = load i32, i32* @d, align 4
%cond = select i1 %tobool, i32 %tmp1, i32 %tmp2
ret i32 %cond
}
diff --git a/test/CodeGen/Mips/cmplarge.ll b/test/CodeGen/Mips/cmplarge.ll
index 2a3d30a95492..79019065a905 100644
--- a/test/CodeGen/Mips/cmplarge.ll
+++ b/test/CodeGen/Mips/cmplarge.ll
@@ -9,8 +9,8 @@ target triple = "mipsel--linux-gnu"
define void @getSubImagesLuma(%struct.StorablePicture* nocapture %s) #0 {
entry:
- %size_y = getelementptr inbounds %struct.StorablePicture* %s, i32 0, i32 1
- %0 = load i32* %size_y, align 4
+ %size_y = getelementptr inbounds %struct.StorablePicture, %struct.StorablePicture* %s, i32 0, i32 1
+ %0 = load i32, i32* %size_y, align 4
%sub = add nsw i32 %0, -1
%add5 = add nsw i32 %0, 20
%cmp6 = icmp sgt i32 %add5, -20
@@ -20,7 +20,7 @@ for.body: ; preds = %entry, %for.body
%j.07 = phi i32 [ %inc, %for.body ], [ -20, %entry ]
%call = tail call i32 bitcast (i32 (...)* @iClip3 to i32 (i32, i32, i32)*)(i32 0, i32 %sub, i32 %j.07) #2
%inc = add nsw i32 %j.07, 1
- %1 = load i32* %size_y, align 4
+ %1 = load i32, i32* %size_y, align 4
%add = add nsw i32 %1, 20
%cmp = icmp slt i32 %inc, %add
br i1 %cmp, label %for.body, label %for.end
diff --git a/test/CodeGen/Mips/const1.ll b/test/CodeGen/Mips/const1.ll
index f32ce244cf40..1a5d58bd3f95 100644
--- a/test/CodeGen/Mips/const1.ll
+++ b/test/CodeGen/Mips/const1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands < %s | FileCheck %s
; ModuleID = 'const1.c'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
diff --git a/test/CodeGen/Mips/const4a.ll b/test/CodeGen/Mips/const4a.ll
index ac6795b2c833..c31e54a01036 100644
--- a/test/CodeGen/Mips/const4a.ll
+++ b/test/CodeGen/Mips/const4a.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands -mips-constant-islands-no-load-relaxation < %s | FileCheck %s -check-prefix=no-load-relax
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands -mips-constant-islands-no-load-relaxation < %s | FileCheck %s -check-prefix=no-load-relax
; ModuleID = 'const4.c'
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
@@ -14,7 +14,7 @@ target triple = "mips--linux-gnu"
define void @t() #0 {
entry:
store i32 -559023410, i32* @i, align 4
- %0 = load i32* @b, align 4
+ %0 = load i32, i32* @b, align 4
; no-load-relax: lw ${{[0-9]+}}, $CPI0_1 # 16 bit inst
%tobool = icmp ne i32 %0, 0
br i1 %tobool, label %if.then, label %if.else
diff --git a/test/CodeGen/Mips/const6.ll b/test/CodeGen/Mips/const6.ll
index c26e02f2ebba..49e98ea78703 100644
--- a/test/CodeGen/Mips/const6.ll
+++ b/test/CodeGen/Mips/const6.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=load-relax
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=load-relax
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands -mips-constant-islands-no-load-relaxation < %s | FileCheck %s -check-prefix=no-load-relax
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands -mips-constant-islands-no-load-relaxation < %s | FileCheck %s -check-prefix=no-load-relax
; ModuleID = 'const6.c'
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
diff --git a/test/CodeGen/Mips/const6a.ll b/test/CodeGen/Mips/const6a.ll
index aff1357c3a8f..54a3f2234dc2 100644
--- a/test/CodeGen/Mips/const6a.ll
+++ b/test/CodeGen/Mips/const6a.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=load-relax1
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=load-relax1
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=load-relax
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=load-relax
; ModuleID = 'const6a.c'
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
diff --git a/test/CodeGen/Mips/ctlz.ll b/test/CodeGen/Mips/ctlz.ll
index 1f871664a6cf..96af1973b196 100644
--- a/test/CodeGen/Mips/ctlz.ll
+++ b/test/CodeGen/Mips/ctlz.ll
@@ -9,7 +9,7 @@ define i32 @main() #0 {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
- %0 = load i32* @x, align 4
+ %0 = load i32, i32* @x, align 4
%1 = call i32 @llvm.ctlz.i32(i32 %0, i1 true)
store i32 %1, i32* @y, align 4
ret i32 0
diff --git a/test/CodeGen/Mips/dagcombine_crash.ll b/test/CodeGen/Mips/dagcombine_crash.ll
new file mode 100644
index 000000000000..6fcf2b4e68b4
--- /dev/null
+++ b/test/CodeGen/Mips/dagcombine_crash.ll
@@ -0,0 +1,25 @@
+; RUN: llc -o - %s | FileCheck %s
+; The selection DAG select(select()) normalisation crashed for different types
+; on the condition inputs.
+target datalayout = "E-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64"
+target triple = "mips--"
+
+; CHECK-LABEL: foobar
+; CHECK: sltiu ${{[0-9]*}}, ${{[0-9]*}}, 42
+; CHECK: sltiu ${{[0-9]*}}, ${{[0-9]*}}, 23
+; CHECK: and ${{[0-9]*}}, ${{[0-9]*}}, ${{[0-9]*}}
+; CHECK: sltu ${{[0-9]*}}, ${{[0-9]*}}, ${{[0-9]*}}
+; CHECK: addiu ${{[0-9]*}}, ${{[0-9]*}}, -1
+; CHECK: movn ${{[0-9]*}}, ${{[0-9]*}}, ${{[0-9]*}}
+; CHECK: jr $ra
+; CHECK: move ${{[0-9]*}}, ${{[0-9]*}}
+define i64 @foobar(i32 %arg) #0 {
+entry:
+ %cmp0 = icmp ult i32 %arg, 23
+ %cmp1 = icmp ult i32 %arg, 42
+ %and = and i1 %cmp0, %cmp1
+ %cmp2 = icmp ugt i32 %arg, 0
+ %sext = sext i1 %cmp1 to i64
+ %retval.0 = select i1 %and, i64 %sext, i64 0
+ ret i64 %retval.0
+}
diff --git a/test/CodeGen/Mips/delay-slot-fill-forward.ll b/test/CodeGen/Mips/delay-slot-fill-forward.ll
new file mode 100644
index 000000000000..7fc011da92db
--- /dev/null
+++ b/test/CodeGen/Mips/delay-slot-fill-forward.ll
@@ -0,0 +1,183 @@
+; RUN: llc < %s -march=mips -mcpu=mips32r2 -O2 \
+; RUN: -disable-mips-df-forward-search=false \
+; RUN: -disable-mips-df-succbb-search=false \
+; RUN: -relocation-model=static | FileCheck %s
+
+; This test was generated with bugpoint from
+; MultiSource/Applications/JM/lencod/me_fullsearch.c
+
+%struct.SubImageContainer = type { i16****, [2 x i16****] }
+%struct.storable_picture = type { i32, i32, i32, i32, i32, i32,
+ [6 x [33 x i64]], [6 x [33 x i64]], [6 x [33 x i64]], [6 x [33 x i64]],
+ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+ i32, i32, i32, i32, i32, i16**, i16****, i16****, i16*****, i16***,
+ i8*, i8***, i64***, i64***, i16****, i8**, i8**, %struct.storable_picture*,
+ %struct.storable_picture*, %struct.storable_picture*,
+ i32, i32, i32, i32, i32, i32, i32 }
+
+@img_height = external global i16, align 2
+@width_pad = external global i32, align 4
+@height_pad = external global i32, align 4
+@mvbits = external global i32*, align 4
+@ref_pic1_sub = external global %struct.SubImageContainer, align 4
+@ref_pic2_sub = external global %struct.SubImageContainer, align 4
+@wbp_weight = external global i32****, align 4
+@weight1 = external global i16, align 2
+@weight2 = external global i16, align 2
+@offsetBi = external global i16, align 2
+@computeBiPred2 = external global [3 x i32 (i16*, i32, i32, i32, i32, i32, i32, i32)*], align 4
+@computeBiPred = external global i32 (i16*, i32, i32, i32, i32, i32, i32, i32)*, align 4
+@bipred2_access_method = external global i32, align 4
+@start_me_refinement_hp = external global i32, align 4
+
+declare i32 @foobar(i16*, i32 signext , i32 signext , i32 signext ,
+ i32 signext , i32 signext , i32 signext , i32 signext ) #1
+
+define void @SubPelBlockSearchBiPred(i16* %orig_pic, i16 signext %ref,
+ i32 signext %pic_pix_x, i32 signext %pic_pix_y, i16 signext %pred_mv_y,
+ i16* nocapture %mv_x, i16* nocapture %mv_y, i16* nocapture readonly %s_mv_x,
+ i32 signext %search_pos2, i32 signext %min_mcost) #0 {
+; CHECK-LABEL: SubPelBlockSearchBiPred:
+entry:
+ %add40 = shl i32 %pic_pix_x, 2
+ %shl = add i32 %add40, 80
+ %add41 = shl i32 %pic_pix_y, 2
+ %0 = load i32, i32* @start_me_refinement_hp, align 4, !tbaa !1
+ %cond47 = select i1 undef, i32 1, i32 %search_pos2
+ %1 = load i16, i16* %s_mv_x, align 2, !tbaa !5
+ %conv48 = sext i16 %1 to i32
+ %add49 = add nsw i32 %conv48, %shl
+ %idxprom52 = sext i16 %ref to i32
+ %2 = load i32, i32* null, align 4, !tbaa !1
+ store i32 undef, i32* bitcast (%struct.SubImageContainer* @ref_pic1_sub to i32*), align 4, !tbaa !7
+ %3 = load i32, i32* undef, align 4, !tbaa !10
+ store i32 %3, i32* bitcast (%struct.SubImageContainer* @ref_pic2_sub to i32*), align 4, !tbaa !7
+ store i16 0, i16* @img_height, align 2, !tbaa !5
+ %size_x_pad = getelementptr inbounds %struct.storable_picture, %struct.storable_picture* null, i32 0, i32 22
+ %4 = load i32, i32* %size_x_pad, align 4, !tbaa !12
+ store i32 %4, i32* @width_pad, align 4, !tbaa !1
+ %5 = load i32, i32* undef, align 4, !tbaa !13
+ store i32 %5, i32* @height_pad, align 4, !tbaa !1
+ %6 = load i32****, i32***** @wbp_weight, align 4, !tbaa !14
+ %arrayidx75 = getelementptr inbounds i32***, i32**** %6, i32 undef
+ %7 = load i32***, i32**** %arrayidx75, align 4, !tbaa !14
+ %arrayidx76 = getelementptr inbounds i32**, i32*** %7, i32 %idxprom52
+ %8 = load i32**, i32*** %arrayidx76, align 4, !tbaa !14
+ %cond87.in671 = load i32*, i32** %8, align 4
+ %cond87672 = load i32, i32* %cond87.in671, align 4
+ %conv88673 = trunc i32 %cond87672 to i16
+ store i16 %conv88673, i16* @weight1, align 2, !tbaa !5
+ %cond105 = load i32, i32* undef, align 4
+ %conv106 = trunc i32 %cond105 to i16
+ store i16 %conv106, i16* @weight2, align 2, !tbaa !5
+ store i16 0, i16* @offsetBi, align 2, !tbaa !5
+ %storemerge655 = load i32, i32* bitcast (i32 (i16*, i32, i32, i32, i32, i32, i32, i32)** getelementptr inbounds ([3 x i32 (i16*, i32, i32, i32, i32, i32, i32, i32)*], [3 x i32 (i16*, i32, i32, i32, i32, i32, i32, i32)*]* @computeBiPred2, i32 0, i32 1) to i32*), align 4
+ store i32 %storemerge655, i32* bitcast (i32 (i16*, i32, i32, i32, i32, i32, i32, i32)** @computeBiPred to i32*), align 4, !tbaa !14
+ %9 = load i16, i16* %mv_x, align 2, !tbaa !5
+ %cmp270 = icmp sgt i32 undef, 1
+ %or.cond = and i1 %cmp270, false
+ br i1 %or.cond, label %land.lhs.true277, label %if.else289
+
+land.lhs.true277: ; preds = %entry
+ %10 = load i16, i16* %mv_y, align 2, !tbaa !5
+ %conv278 = sext i16 %10 to i32
+ %add279 = add nsw i32 %conv278, 0
+ %cmp280 = icmp sgt i32 %add279, 1
+ %or.cond660 = and i1 %cmp280, undef
+ br i1 %or.cond660, label %if.end290, label %if.else289
+
+if.else289: ; preds = %land.lhs.true277, %entry
+ br label %if.end290
+
+if.end290: ; preds = %if.else289, %land.lhs.true277
+ %storemerge = phi i32 [ 1, %if.else289 ], [ 0, %land.lhs.true277 ]
+ store i32 %storemerge, i32* @bipred2_access_method, align 4, !tbaa !1
+ %cmp315698 = icmp slt i32 %0, %cond47
+ br i1 %cmp315698, label %for.body.lr.ph, label %if.end358
+
+for.body.lr.ph: ; preds = %if.end290
+ %conv328 = sext i16 %pred_mv_y to i32
+ br label %for.body
+
+for.body: ; preds = %for.inc, %for.body.lr.ph
+ %11 = phi i16 [ %9, %for.body.lr.ph ], [ %.pre, %for.inc ]
+ %min_mcost.addr.0701 = phi i32 [ %min_mcost, %for.body.lr.ph ], [ undef, %for.inc ]
+ %pos.0700 = phi i32 [ %0, %for.body.lr.ph ], [ undef, %for.inc ]
+ %best_pos.0699 = phi i32 [ 0, %for.body.lr.ph ], [ %best_pos.1, %for.inc ]
+ %conv317 = sext i16 %11 to i32
+ %add320 = add nsw i32 0, %conv317
+ %12 = load i16, i16* %mv_y, align 2, !tbaa !5
+ %conv321 = sext i16 %12 to i32
+ %add324 = add nsw i32 0, %conv321
+ %13 = load i32*, i32** @mvbits, align 4, !tbaa !14
+ %14 = load i32, i32* undef, align 4, !tbaa !1
+ %sub329 = sub nsw i32 %add324, %conv328
+ %arrayidx330 = getelementptr inbounds i32, i32* %13, i32 %sub329
+ %15 = load i32, i32* %arrayidx330, align 4, !tbaa !1
+ %add331 = add nsw i32 %15, %14
+ %mul = mul nsw i32 %add331, %2
+ %shr332 = ashr i32 %mul, 16
+ %cmp333 = icmp sgt i32 %min_mcost.addr.0701, %shr332
+ br i1 %cmp333, label %if.end336, label %for.inc
+
+if.end336: ; preds = %for.body
+ ; CHECK: jalr $25
+ ; CHECK-NOT: move $ra, {{.*}}
+ ; CHECK: j $BB{{.*}}
+ %add337 = add nsw i32 %add320, %shl
+ %add338 = add nsw i32 %add324, 0
+ %call340 = tail call i32 undef(i16* %orig_pic, i32 signext undef, i32 signext
+ undef, i32 signext 0, i32 signext %add49,
+ i32 signext undef, i32 signext %add337,
+ i32 signext %add338) #1
+ %cmp342 = icmp slt i32 0, %min_mcost.addr.0701
+ %pos.0.best_pos.0 = select i1 %cmp342, i32 %pos.0700, i32 %best_pos.0699
+ br label %for.inc
+
+for.inc: ; preds = %if.end336, %for.body
+ %best_pos.1 = phi i32 [ %best_pos.0699, %for.body ], [ %pos.0.best_pos.0, %if.end336 ]
+ %.pre = load i16, i16* %mv_x, align 2, !tbaa !5
+ br label %for.body
+
+if.end358: ; preds = %if.end290
+ %.min_mcost.addr.0 = select i1 false, i32 2147483647, i32 %min_mcost
+ br i1 undef, label %for.body415.lr.ph, label %if.end461
+
+for.body415.lr.ph: ; preds = %if.end358
+ %16 = load i16, i16* %mv_y, align 2, !tbaa !5
+ %conv420 = sext i16 %16 to i32
+ %add423 = add nsw i32 0, %conv420
+ %cmp433 = icmp sgt i32 %.min_mcost.addr.0, 0
+ br i1 %cmp433, label %if.end436, label %if.end461
+
+if.end436: ; preds = %for.body415.lr.ph
+ %add438 = add nsw i32 %add423, 0
+ %call440 = tail call i32 @foobar(i16* %orig_pic, i32 signext undef, i32 signext undef,
+ i32 signext 0, i32 signext %add49, i32 signext undef,
+ i32 signext undef, i32 signext %add438) #1
+ br label %if.end461
+
+if.end461: ; preds = %if.end436, %for.body415.lr.ph, %if.end358
+ ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="mips32r2" "target-features"="+mips32r2,+nooddspreg,+fpxx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 236218) (llvm/trunk 236237)"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"short", !3, i64 0}
+!7 = !{!8, !9, i64 0}
+!8 = !{!"", !9, i64 0, !3, i64 4}
+!9 = !{!"any pointer", !3, i64 0}
+!10 = !{!11, !9, i64 6440}
+!11 = !{!"storable_picture", !3, i64 0, !2, i64 4, !2, i64 8, !2, i64 12, !2, i64 16, !2, i64 20, !3, i64 24, !3, i64 1608, !3, i64 3192, !3, i64 4776, !2, i64 6360, !2, i64 6364, !2, i64 6368, !2, i64 6372, !2, i64 6376, !2, i64 6380, !2, i64 6384, !2, i64 6388, !2, i64 6392, !2, i64 6396, !2, i64 6400, !2, i64 6404, !2, i64 6408, !2, i64 6412, !2, i64 6416, !2, i64 6420, !2, i64 6424, !2, i64 6428, !2, i64 6432, !9, i64 6436, !9, i64 6440, !9, i64 6444, !9, i64 6448, !9, i64 6452, !9, i64 6456, !9, i64 6460, !9, i64 6464, !9, i64 6468, !9, i64 6472, !9, i64 6476, !9, i64 6480, !9, i64 6484, !9, i64 6488, !9, i64 6492, !2, i64 6496, !2, i64 6500, !2, i64 6504, !2, i64 6508, !2, i64 6512, !2, i64 6516, !2, i64 6520}
+!12 = !{!11, !2, i64 6408}
+!13 = !{!11, !2, i64 6412}
+!14 = !{!9, !9, i64 0}
diff --git a/test/CodeGen/Mips/delay-slot-kill.ll b/test/CodeGen/Mips/delay-slot-kill.ll
new file mode 100644
index 000000000000..57b630303c26
--- /dev/null
+++ b/test/CodeGen/Mips/delay-slot-kill.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s
+
+; Currently, the following IR assembly generates a KILL instruction between
+; the bitwise-and instruction and the return instruction. We verify that the
+; delay slot filler ignores such KILL instructions by filling the slot of the
+; return instruction properly.
+define signext i32 @f1(i32 signext %a, i32 signext %b) {
+entry:
+ ; CHECK: jr $ra
+ ; CHECK-NEXT: and $2, $4, $5
+
+ %r = and i32 %a, %b
+ ret i32 %r
+}
diff --git a/test/CodeGen/Mips/disable-tail-merge.ll b/test/CodeGen/Mips/disable-tail-merge.ll
index b4c093aa8528..9396db7be7f6 100644
--- a/test/CodeGen/Mips/disable-tail-merge.ll
+++ b/test/CodeGen/Mips/disable-tail-merge.ll
@@ -9,20 +9,20 @@
define i32 @test1(i32 %a) {
entry:
%tobool = icmp eq i32 %a, 0
- %0 = load i32* @g0, align 4
+ %0 = load i32, i32* @g0, align 4
br i1 %tobool, label %if.else, label %if.then
if.then:
%add = add nsw i32 %0, 1
store i32 %add, i32* @g0, align 4
- %1 = load i32* @g1, align 4
+ %1 = load i32, i32* @g1, align 4
%add1 = add nsw i32 %1, 23
br label %if.end
if.else:
%add2 = add nsw i32 %0, 11
store i32 %add2, i32* @g0, align 4
- %2 = load i32* @g1, align 4
+ %2 = load i32, i32* @g1, align 4
%add3 = add nsw i32 %2, 23
br label %if.end
diff --git a/test/CodeGen/Mips/div.ll b/test/CodeGen/Mips/div.ll
index 00e2c1927459..731841c554fa 100644
--- a/test/CodeGen/Mips/div.ll
+++ b/test/CodeGen/Mips/div.ll
@@ -6,8 +6,8 @@
define void @test() nounwind {
entry:
- %0 = load i32* @iiii, align 4
- %1 = load i32* @jjjj, align 4
+ %0 = load i32, i32* @iiii, align 4
+ %1 = load i32, i32* @jjjj, align 4
%div = sdiv i32 %0, %1
; 16: div $zero, ${{[0-9]+}}, ${{[0-9]+}}
; 16: mflo ${{[0-9]+}}
diff --git a/test/CodeGen/Mips/div_rem.ll b/test/CodeGen/Mips/div_rem.ll
index 950192eee169..e64529cee841 100644
--- a/test/CodeGen/Mips/div_rem.ll
+++ b/test/CodeGen/Mips/div_rem.ll
@@ -7,8 +7,8 @@
define void @test() nounwind {
entry:
- %0 = load i32* @iiii, align 4
- %1 = load i32* @jjjj, align 4
+ %0 = load i32, i32* @iiii, align 4
+ %1 = load i32, i32* @jjjj, align 4
%div = sdiv i32 %0, %1
store i32 %div, i32* @kkkk, align 4
%rem = srem i32 %0, %1
diff --git a/test/CodeGen/Mips/divrem.ll b/test/CodeGen/Mips/divrem.ll
index a9cfe0fa1523..918db053f5b6 100644
--- a/test/CodeGen/Mips/divrem.ll
+++ b/test/CodeGen/Mips/divrem.ll
@@ -220,8 +220,8 @@ entry:
; FIXME: It's not clear what this is supposed to test.
define i32 @killFlags() {
entry:
- %0 = load i32* @g0, align 4
- %1 = load i32* @g1, align 4
+ %0 = load i32, i32* @g0, align 4
+ %1 = load i32, i32* @g1, align 4
%div = sdiv i32 %0, %1
ret i32 %div
}
diff --git a/test/CodeGen/Mips/divu.ll b/test/CodeGen/Mips/divu.ll
index b96a439390ca..5bc765a71eb9 100644
--- a/test/CodeGen/Mips/divu.ll
+++ b/test/CodeGen/Mips/divu.ll
@@ -6,8 +6,8 @@
define void @test() nounwind {
entry:
- %0 = load i32* @iiii, align 4
- %1 = load i32* @jjjj, align 4
+ %0 = load i32, i32* @iiii, align 4
+ %1 = load i32, i32* @jjjj, align 4
%div = udiv i32 %0, %1
; 16: divu $zero, ${{[0-9]+}}, ${{[0-9]+}}
; 16: mflo ${{[0-9]+}}
diff --git a/test/CodeGen/Mips/divu_remu.ll b/test/CodeGen/Mips/divu_remu.ll
index a6c1563ac195..a079440b913f 100644
--- a/test/CodeGen/Mips/divu_remu.ll
+++ b/test/CodeGen/Mips/divu_remu.ll
@@ -8,8 +8,8 @@
define void @test() nounwind {
entry:
- %0 = load i32* @iiii, align 4
- %1 = load i32* @jjjj, align 4
+ %0 = load i32, i32* @iiii, align 4
+ %1 = load i32, i32* @jjjj, align 4
%div = udiv i32 %0, %1
store i32 %div, i32* @kkkk, align 4
%rem = urem i32 %0, %1
diff --git a/test/CodeGen/Mips/dsp-patterns.ll b/test/CodeGen/Mips/dsp-patterns.ll
index f5bb3abed90e..837c0d8bfc52 100644
--- a/test/CodeGen/Mips/dsp-patterns.ll
+++ b/test/CodeGen/Mips/dsp-patterns.ll
@@ -6,8 +6,8 @@
define zeroext i8 @test_lbux(i8* nocapture %b, i32 %i) {
entry:
- %add.ptr = getelementptr inbounds i8* %b, i32 %i
- %0 = load i8* %add.ptr, align 1
+ %add.ptr = getelementptr inbounds i8, i8* %b, i32 %i
+ %0 = load i8, i8* %add.ptr, align 1
ret i8 %0
}
@@ -16,8 +16,8 @@ entry:
define signext i16 @test_lhx(i16* nocapture %b, i32 %i) {
entry:
- %add.ptr = getelementptr inbounds i16* %b, i32 %i
- %0 = load i16* %add.ptr, align 2
+ %add.ptr = getelementptr inbounds i16, i16* %b, i32 %i
+ %0 = load i16, i16* %add.ptr, align 2
ret i16 %0
}
@@ -26,8 +26,8 @@ entry:
define i32 @test_lwx(i32* nocapture %b, i32 %i) {
entry:
- %add.ptr = getelementptr inbounds i32* %b, i32 %i
- %0 = load i32* %add.ptr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %b, i32 %i
+ %0 = load i32, i32* %add.ptr, align 4
ret i32 %0
}
diff --git a/test/CodeGen/Mips/dsp-vec-load-store.ll b/test/CodeGen/Mips/dsp-vec-load-store.ll
index 7e4a8fedaa8c..f9251807d000 100644
--- a/test/CodeGen/Mips/dsp-vec-load-store.ll
+++ b/test/CodeGen/Mips/dsp-vec-load-store.ll
@@ -5,7 +5,7 @@
define void @extend_load_trunc_store_v2i8() {
entry:
- %0 = load <2 x i8>* @g1, align 2
+ %0 = load <2 x i8>, <2 x i8>* @g1, align 2
store <2 x i8> %0, <2 x i8>* @g0, align 2
ret void
}
diff --git a/test/CodeGen/Mips/eh-return32.ll b/test/CodeGen/Mips/eh-return32.ll
index 748050c4d34b..542c5bf4462e 100644
--- a/test/CodeGen/Mips/eh-return32.ll
+++ b/test/CodeGen/Mips/eh-return32.ll
@@ -7,7 +7,7 @@ declare void @foo(...)
define i8* @f1(i32 %offset, i8* %handler) {
entry:
- call void (...)* @foo()
+ call void (...) @foo()
call void @llvm.eh.return.i32(i32 %offset, i8* %handler)
unreachable
diff --git a/test/CodeGen/Mips/eh-return64.ll b/test/CodeGen/Mips/eh-return64.ll
index 74a43231598c..2f8203d77c84 100644
--- a/test/CodeGen/Mips/eh-return64.ll
+++ b/test/CodeGen/Mips/eh-return64.ll
@@ -8,7 +8,7 @@ declare void @foo(...)
define void @f1(i64 %offset, i8* %handler) {
entry:
- call void (...)* @foo()
+ call void (...) @foo()
call void @llvm.eh.return.i64(i64 %offset, i8* %handler)
unreachable
diff --git a/test/CodeGen/Mips/eh.ll b/test/CodeGen/Mips/eh.ll
index fc9e2ef21a8b..fcbd99ef737b 100644
--- a/test/CodeGen/Mips/eh.ll
+++ b/test/CodeGen/Mips/eh.ll
@@ -27,6 +27,7 @@ lpad: ; preds = %entry
; CHECK-EL: bne $5
%exn.val = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+ cleanup
catch i8* bitcast (i8** @_ZTId to i8*)
%exn = extractvalue { i8*, i32 } %exn.val, 0
%sel = extractvalue { i8*, i32 } %exn.val, 1
@@ -37,7 +38,7 @@ lpad: ; preds = %entry
catch: ; preds = %lpad
%3 = tail call i8* @__cxa_begin_catch(i8* %exn) nounwind
%4 = bitcast i8* %3 to double*
- %exn.scalar = load double* %4, align 8
+ %exn.scalar = load double, double* %4, align 8
%add = fadd double %exn.scalar, %i2
store double %add, double* @g1, align 8
tail call void @__cxa_end_catch() nounwind
diff --git a/test/CodeGen/Mips/ehframe-indirect.ll b/test/CodeGen/Mips/ehframe-indirect.ll
index b4efb40b6422..f124881a472f 100644
--- a/test/CodeGen/Mips/ehframe-indirect.ll
+++ b/test/CodeGen/Mips/ehframe-indirect.ll
@@ -1,16 +1,18 @@
-; RUN: llc -mtriple=mipsel-linux-gnu < %s | FileCheck -check-prefix=CHECK32 %s
-; RUN: llc -mtriple=mipsel-linux-android < %s | FileCheck -check-prefix=CHECK32 %s
-; RUN: llc -mtriple=mips64el-linux-gnu < %s | FileCheck -check-prefix=CHECK64 %s
-; RUN: llc -mtriple=mips64el-linux-android < %s | FileCheck -check-prefix=CHECK64 %s
+; RUN: llc -mtriple=mipsel-linux-gnu < %s | FileCheck -check-prefix=ALL -check-prefix=O32 %s
+; RUN: llc -mtriple=mipsel-linux-android < %s | FileCheck -check-prefix=ALL -check-prefix=O32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu -target-abi=n32 < %s | FileCheck -check-prefix=ALL -check-prefix=N32 %s
+; RUN: llc -mtriple=mips64el-linux-android -target-abi=n32 < %s | FileCheck -check-prefix=ALL -check-prefix=N32 %s
+; RUN: llc -mtriple=mips64el-linux-gnu < %s | FileCheck -check-prefix=ALL -check-prefix=N64 %s
+; RUN: llc -mtriple=mips64el-linux-android < %s | FileCheck -check-prefix=ALL -check-prefix=N64 %s
define i32 @main() {
-; CHECK: .cfi_startproc
-; CHECK: .cfi_personality 128, DW.ref.__gxx_personality_v0
+; ALL: .cfi_startproc
+; ALL: .cfi_personality 128, DW.ref.__gxx_personality_v0
entry:
invoke void @foo() to label %cont unwind label %lpad
-; CHECK: foo
-; CHECK: jalr
+; ALL: foo
+; ALL: jalr
lpad:
%0 = landingpad { i8*, i32 } personality i8*
@@ -20,20 +22,23 @@ lpad:
cont:
ret i32 0
}
-; CHECK: .cfi_endproc
+; ALL: .cfi_endproc
declare i32 @__gxx_personality_v0(...)
declare void @foo()
-; CHECK: .hidden DW.ref.__gxx_personality_v0
-; CHECK: .weak DW.ref.__gxx_personality_v0
-; CHECK: .section .data.DW.ref.__gxx_personality_v0,"aGw",@progbits,DW.ref.__gxx_personality_v0,comdat
-; CHECK32: .align 2
-; CHECK64: .align 3
-; CHECK: .type DW.ref.__gxx_personality_v0,@object
-; CHECK32: .size DW.ref.__gxx_personality_v0, 4
-; CHECK64: .size DW.ref.__gxx_personality_v0, 8
-; CHECK: DW.ref.__gxx_personality_v0:
-; CHECK32: .4byte __gxx_personality_v0
-; CHECK64: .8byte __gxx_personality_v0
+; ALL: .hidden DW.ref.__gxx_personality_v0
+; ALL: .weak DW.ref.__gxx_personality_v0
+; ALL: .section .data.DW.ref.__gxx_personality_v0,"aGw",@progbits,DW.ref.__gxx_personality_v0,comdat
+; O32: .align 2
+; N32: .align 2
+; N64: .align 3
+; ALL: .type DW.ref.__gxx_personality_v0,@object
+; O32: .size DW.ref.__gxx_personality_v0, 4
+; N32: .size DW.ref.__gxx_personality_v0, 4
+; N64: .size DW.ref.__gxx_personality_v0, 8
+; ALL: DW.ref.__gxx_personality_v0:
+; O32: .4byte __gxx_personality_v0
+; N32: .4byte __gxx_personality_v0
+; N64: .8byte __gxx_personality_v0
diff --git a/test/CodeGen/Mips/emergency-spill-slot-near-fp.ll b/test/CodeGen/Mips/emergency-spill-slot-near-fp.ll
new file mode 100644
index 000000000000..3dc1cde77095
--- /dev/null
+++ b/test/CodeGen/Mips/emergency-spill-slot-near-fp.ll
@@ -0,0 +1,32 @@
+; Check that register scavenging spill slot is close to $fp.
+; RUN: llc -march=mipsel -O0 < %s | FileCheck %s
+
+; CHECK: sw ${{.*}}, 4($fp)
+; CHECK: lw ${{.*}}, 4($fp)
+
+define i32 @main(i32 signext %argc, i8** %argv) "no-frame-pointer-elim"="true" {
+entry:
+ %retval = alloca i32, align 4
+ %argc.addr = alloca i32, align 4
+ %argv.addr = alloca i8**, align 4
+ %v0 = alloca <16 x i8>, align 16
+ %.compoundliteral = alloca <16 x i8>, align 16
+ %v1 = alloca <16 x i8>, align 16
+ %.compoundliteral1 = alloca <16 x i8>, align 16
+ %unused_variable = alloca [16384 x i32], align 4
+ %result = alloca <16 x i8>, align 16
+ store i32 0, i32* %retval
+ store i32 %argc, i32* %argc.addr, align 4
+ store i8** %argv, i8*** %argv.addr, align 4
+ store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %.compoundliteral
+ %0 = load <16 x i8>, <16 x i8>* %.compoundliteral
+ store <16 x i8> %0, <16 x i8>* %v0, align 16
+ store <16 x i8> zeroinitializer, <16 x i8>* %.compoundliteral1
+ %1 = load <16 x i8>, <16 x i8>* %.compoundliteral1
+ store <16 x i8> %1, <16 x i8>* %v1, align 16
+ %2 = load <16 x i8>, <16 x i8>* %v0, align 16
+ %3 = load <16 x i8>, <16 x i8>* %v1, align 16
+ %mul = mul <16 x i8> %2, %3
+ store <16 x i8> %mul, <16 x i8>* %result, align 16
+ ret i32 0
+}
diff --git a/test/CodeGen/Mips/emit-big-cst.ll b/test/CodeGen/Mips/emit-big-cst.ll
index a168743859a3..9bc96c89307d 100644
--- a/test/CodeGen/Mips/emit-big-cst.ll
+++ b/test/CodeGen/Mips/emit-big-cst.ll
@@ -10,7 +10,7 @@
define void @accessBig(i64* %storage) {
%addr = bitcast i64* %storage to i82*
- %bigLoadedCst = load volatile i82* @bigCst
+ %bigLoadedCst = load volatile i82, i82* @bigCst
%tmp = add i82 %bigLoadedCst, 1
store i82 %tmp, i82* %addr
ret void
diff --git a/test/CodeGen/Mips/ex2.ll b/test/CodeGen/Mips/ex2.ll
index 6d024c209c26..7547fdf81e35 100644
--- a/test/CodeGen/Mips/ex2.ll
+++ b/test/CodeGen/Mips/ex2.ll
@@ -17,12 +17,12 @@ entry:
store i32 0, i32* %retval
%exception = call i8* @__cxa_allocate_exception(i32 4) nounwind
%0 = bitcast i8* %exception to i8**
- store i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i8** %0
+ store i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), i8** %0
call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIPKc to i8*), i8* null) noreturn
unreachable
return: ; No predecessors!
- %1 = load i32* %retval
+ %1 = load i32, i32* %retval
ret i32 %1
}
diff --git a/test/CodeGen/Mips/extins.ll b/test/CodeGen/Mips/extins.ll
index efaeeea96a5e..6604f89b1843 100644
--- a/test/CodeGen/Mips/extins.ll
+++ b/test/CodeGen/Mips/extins.ll
@@ -16,7 +16,7 @@ entry:
; 16-NOT: ins ${{[0-9]+}}
%and = shl i32 %s, 5
%shl = and i32 %and, 16352
- %tmp3 = load i32* %d, align 4
+ %tmp3 = load i32, i32* %d, align 4
%and5 = and i32 %tmp3, -16353
%or = or i32 %and5, %shl
store i32 %or, i32* %d, align 4
diff --git a/test/CodeGen/Mips/f16abs.ll b/test/CodeGen/Mips/f16abs.ll
index 0fba9c4fd08a..838983274e9b 100644
--- a/test/CodeGen/Mips/f16abs.ll
+++ b/test/CodeGen/Mips/f16abs.ll
@@ -11,12 +11,12 @@
; Function Attrs: nounwind optsize
define i32 @main() #0 {
entry:
- %0 = load double* @y, align 8
+ %0 = load double, double* @y, align 8
%call = tail call double @fabs(double %0) #2
store double %call, double* @x, align 8
; static-NOT: .ent __call_stub_fp_fabs
; static-NOT: jal fabs
- %1 = load float* @y1, align 4
+ %1 = load float, float* @y1, align 4
%call2 = tail call float @fabsf(float %1) #2
store float %call2, float* @x1, align 4
; static-NOT: .ent __call_stub_fp_fabsf
diff --git a/test/CodeGen/Mips/fastcc.ll b/test/CodeGen/Mips/fastcc.ll
index 6b022c5e36d9..299e0d696cbb 100644
--- a/test/CodeGen/Mips/fastcc.ll
+++ b/test/CodeGen/Mips/fastcc.ll
@@ -108,23 +108,23 @@ entry:
; CHECK-NACL-NOT: lw $15
; CHECK-NACL-NOT: lw $24
- %0 = load i32* @gi0, align 4
- %1 = load i32* @gi1, align 4
- %2 = load i32* @gi2, align 4
- %3 = load i32* @gi3, align 4
- %4 = load i32* @gi4, align 4
- %5 = load i32* @gi5, align 4
- %6 = load i32* @gi6, align 4
- %7 = load i32* @gi7, align 4
- %8 = load i32* @gi8, align 4
- %9 = load i32* @gi9, align 4
- %10 = load i32* @gi10, align 4
- %11 = load i32* @gi11, align 4
- %12 = load i32* @gi12, align 4
- %13 = load i32* @gi13, align 4
- %14 = load i32* @gi14, align 4
- %15 = load i32* @gi15, align 4
- %16 = load i32* @gi16, align 4
+ %0 = load i32, i32* @gi0, align 4
+ %1 = load i32, i32* @gi1, align 4
+ %2 = load i32, i32* @gi2, align 4
+ %3 = load i32, i32* @gi3, align 4
+ %4 = load i32, i32* @gi4, align 4
+ %5 = load i32, i32* @gi5, align 4
+ %6 = load i32, i32* @gi6, align 4
+ %7 = load i32, i32* @gi7, align 4
+ %8 = load i32, i32* @gi8, align 4
+ %9 = load i32, i32* @gi9, align 4
+ %10 = load i32, i32* @gi10, align 4
+ %11 = load i32, i32* @gi11, align 4
+ %12 = load i32, i32* @gi12, align 4
+ %13 = load i32, i32* @gi13, align 4
+ %14 = load i32, i32* @gi14, align 4
+ %15 = load i32, i32* @gi15, align 4
+ %16 = load i32, i32* @gi16, align 4
tail call fastcc void @callee0(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32 %14, i32 %15, i32 %16)
ret void
}
@@ -196,27 +196,27 @@ entry:
; CHECK: lwc1 $f1
; CHECK: lwc1 $f0
- %0 = load float* @gfa0, align 4
- %1 = load float* @gfa1, align 4
- %2 = load float* @gfa2, align 4
- %3 = load float* @gfa3, align 4
- %4 = load float* @gfa4, align 4
- %5 = load float* @gfa5, align 4
- %6 = load float* @gfa6, align 4
- %7 = load float* @gfa7, align 4
- %8 = load float* @gfa8, align 4
- %9 = load float* @gfa9, align 4
- %10 = load float* @gfa10, align 4
- %11 = load float* @gfa11, align 4
- %12 = load float* @gfa12, align 4
- %13 = load float* @gfa13, align 4
- %14 = load float* @gfa14, align 4
- %15 = load float* @gfa15, align 4
- %16 = load float* @gfa16, align 4
- %17 = load float* @gfa17, align 4
- %18 = load float* @gfa18, align 4
- %19 = load float* @gfa19, align 4
- %20 = load float* @gfa20, align 4
+ %0 = load float, float* @gfa0, align 4
+ %1 = load float, float* @gfa1, align 4
+ %2 = load float, float* @gfa2, align 4
+ %3 = load float, float* @gfa3, align 4
+ %4 = load float, float* @gfa4, align 4
+ %5 = load float, float* @gfa5, align 4
+ %6 = load float, float* @gfa6, align 4
+ %7 = load float, float* @gfa7, align 4
+ %8 = load float, float* @gfa8, align 4
+ %9 = load float, float* @gfa9, align 4
+ %10 = load float, float* @gfa10, align 4
+ %11 = load float, float* @gfa11, align 4
+ %12 = load float, float* @gfa12, align 4
+ %13 = load float, float* @gfa13, align 4
+ %14 = load float, float* @gfa14, align 4
+ %15 = load float, float* @gfa15, align 4
+ %16 = load float, float* @gfa16, align 4
+ %17 = load float, float* @gfa17, align 4
+ %18 = load float, float* @gfa18, align 4
+ %19 = load float, float* @gfa19, align 4
+ %20 = load float, float* @gfa20, align 4
tail call fastcc void @callee1(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13, float %14, float %15, float %16, float %17, float %18, float %19, float %20)
ret void
}
@@ -292,17 +292,17 @@ entry:
; NOODDSPREG-DAG: lwc1 $[[F0:f[0-9]*[02468]]], 40($[[R0]])
; NOODDSPREG-DAG: swc1 $[[F0]], 0($sp)
- %0 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 0), align 4
- %1 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 1), align 4
- %2 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 2), align 4
- %3 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 3), align 4
- %4 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 4), align 4
- %5 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 5), align 4
- %6 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 6), align 4
- %7 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 7), align 4
- %8 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 8), align 4
- %9 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 9), align 4
- %10 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 10), align 4
+ %0 = load float, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 0), align 4
+ %1 = load float, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 1), align 4
+ %2 = load float, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 2), align 4
+ %3 = load float, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 3), align 4
+ %4 = load float, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 4), align 4
+ %5 = load float, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 5), align 4
+ %6 = load float, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 6), align 4
+ %7 = load float, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 7), align 4
+ %8 = load float, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 8), align 4
+ %9 = load float, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 9), align 4
+ %10 = load float, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 10), align 4
tail call fastcc void @callee2(float %0, float %1, float %2, float %3,
float %4, float %5, float %6, float %7,
float %8, float %9, float %10)
@@ -336,17 +336,17 @@ entry:
; NOODDSPREG-DAG: lwc1 $[[F0:f[0-9]*[02468]]], [[OFFSET]]($sp)
; NOODDSPREG-DAG: swc1 $[[F0]], 40($[[R0]])
- store float %a0, float* getelementptr ([11 x float]* @fa, i32 0, i32 0), align 4
- store float %a1, float* getelementptr ([11 x float]* @fa, i32 0, i32 1), align 4
- store float %a2, float* getelementptr ([11 x float]* @fa, i32 0, i32 2), align 4
- store float %a3, float* getelementptr ([11 x float]* @fa, i32 0, i32 3), align 4
- store float %a4, float* getelementptr ([11 x float]* @fa, i32 0, i32 4), align 4
- store float %a5, float* getelementptr ([11 x float]* @fa, i32 0, i32 5), align 4
- store float %a6, float* getelementptr ([11 x float]* @fa, i32 0, i32 6), align 4
- store float %a7, float* getelementptr ([11 x float]* @fa, i32 0, i32 7), align 4
- store float %a8, float* getelementptr ([11 x float]* @fa, i32 0, i32 8), align 4
- store float %a9, float* getelementptr ([11 x float]* @fa, i32 0, i32 9), align 4
- store float %a10, float* getelementptr ([11 x float]* @fa, i32 0, i32 10), align 4
+ store float %a0, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 0), align 4
+ store float %a1, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 1), align 4
+ store float %a2, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 2), align 4
+ store float %a3, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 3), align 4
+ store float %a4, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 4), align 4
+ store float %a5, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 5), align 4
+ store float %a6, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 6), align 4
+ store float %a7, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 7), align 4
+ store float %a8, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 8), align 4
+ store float %a9, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 9), align 4
+ store float %a10, float* getelementptr ([11 x float], [11 x float]* @fa, i32 0, i32 10), align 4
ret void
}
@@ -373,17 +373,17 @@ entry:
; FP64-NOODDSPREG-DAG: ldc1 $[[F0:f[0-9]*[02468]]], 80($[[R0]])
; FP64-NOODDSPREG-DAG: sdc1 $[[F0]], 0($sp)
- %0 = load double* getelementptr ([11 x double]* @da, i32 0, i32 0), align 8
- %1 = load double* getelementptr ([11 x double]* @da, i32 0, i32 1), align 8
- %2 = load double* getelementptr ([11 x double]* @da, i32 0, i32 2), align 8
- %3 = load double* getelementptr ([11 x double]* @da, i32 0, i32 3), align 8
- %4 = load double* getelementptr ([11 x double]* @da, i32 0, i32 4), align 8
- %5 = load double* getelementptr ([11 x double]* @da, i32 0, i32 5), align 8
- %6 = load double* getelementptr ([11 x double]* @da, i32 0, i32 6), align 8
- %7 = load double* getelementptr ([11 x double]* @da, i32 0, i32 7), align 8
- %8 = load double* getelementptr ([11 x double]* @da, i32 0, i32 8), align 8
- %9 = load double* getelementptr ([11 x double]* @da, i32 0, i32 9), align 8
- %10 = load double* getelementptr ([11 x double]* @da, i32 0, i32 10), align 8
+ %0 = load double, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 0), align 8
+ %1 = load double, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 1), align 8
+ %2 = load double, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 2), align 8
+ %3 = load double, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 3), align 8
+ %4 = load double, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 4), align 8
+ %5 = load double, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 5), align 8
+ %6 = load double, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 6), align 8
+ %7 = load double, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 7), align 8
+ %8 = load double, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 8), align 8
+ %9 = load double, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 9), align 8
+ %10 = load double, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 10), align 8
tail call fastcc void @callee3(double %0, double %1, double %2, double %3,
double %4, double %5, double %6, double %7,
double %8, double %9, double %10)
@@ -417,16 +417,16 @@ entry:
; FP64-NOODDSPREG-DAG: ldc1 $[[F0:f[0-9]*[02468]]], [[OFFSET]]($sp)
; FP64-NOODDSPREG-DAG: sdc1 $[[F0]], 80($[[R0]])
- store double %a0, double* getelementptr ([11 x double]* @da, i32 0, i32 0), align 8
- store double %a1, double* getelementptr ([11 x double]* @da, i32 0, i32 1), align 8
- store double %a2, double* getelementptr ([11 x double]* @da, i32 0, i32 2), align 8
- store double %a3, double* getelementptr ([11 x double]* @da, i32 0, i32 3), align 8
- store double %a4, double* getelementptr ([11 x double]* @da, i32 0, i32 4), align 8
- store double %a5, double* getelementptr ([11 x double]* @da, i32 0, i32 5), align 8
- store double %a6, double* getelementptr ([11 x double]* @da, i32 0, i32 6), align 8
- store double %a7, double* getelementptr ([11 x double]* @da, i32 0, i32 7), align 8
- store double %a8, double* getelementptr ([11 x double]* @da, i32 0, i32 8), align 8
- store double %a9, double* getelementptr ([11 x double]* @da, i32 0, i32 9), align 8
- store double %a10, double* getelementptr ([11 x double]* @da, i32 0, i32 10), align 8
+ store double %a0, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 0), align 8
+ store double %a1, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 1), align 8
+ store double %a2, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 2), align 8
+ store double %a3, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 3), align 8
+ store double %a4, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 4), align 8
+ store double %a5, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 5), align 8
+ store double %a6, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 6), align 8
+ store double %a7, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 7), align 8
+ store double %a8, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 8), align 8
+ store double %a9, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 9), align 8
+ store double %a10, double* getelementptr ([11 x double], [11 x double]* @da, i32 0, i32 10), align 8
ret void
}
diff --git a/test/CodeGen/Mips/fcopysign-f32-f64.ll b/test/CodeGen/Mips/fcopysign-f32-f64.ll
index 148a780fb930..860bc79956fc 100644
--- a/test/CodeGen/Mips/fcopysign-f32-f64.ll
+++ b/test/CodeGen/Mips/fcopysign-f32-f64.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=mips64el -mcpu=mips4 -mattr=n64 | FileCheck %s -check-prefix=64
-; RUN: llc < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
-; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
+; RUN: llc < %s -march=mips64el -mcpu=mips4 -target-abi=n64 | FileCheck %s -check-prefix=64
+; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi=n64 | FileCheck %s -check-prefix=64
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -target-abi=n64 | FileCheck %s -check-prefix=64R2
declare double @copysign(double, double) nounwind readnone
diff --git a/test/CodeGen/Mips/fcopysign.ll b/test/CodeGen/Mips/fcopysign.ll
index 3a9d9c73b279..6928f2fe507f 100644
--- a/test/CodeGen/Mips/fcopysign.ll
+++ b/test/CodeGen/Mips/fcopysign.ll
@@ -1,8 +1,8 @@
; RUN: llc < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefix=32
; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
-; RUN: llc < %s -march=mips64el -mcpu=mips4 -mattr=n64 | FileCheck %s -check-prefix=64
-; RUN: llc < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
-; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
+; RUN: llc < %s -march=mips64el -mcpu=mips4 -target-abi=n64 | FileCheck %s -check-prefix=64
+; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi=n64 | FileCheck %s -check-prefix=64
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -target-abi=n64 | FileCheck %s -check-prefix=64R2
define double @func0(double %d0, double %d1) nounwind readnone {
entry:
diff --git a/test/CodeGen/Mips/fixdfsf.ll b/test/CodeGen/Mips/fixdfsf.ll
index 4271ac222edb..869579922d51 100644
--- a/test/CodeGen/Mips/fixdfsf.ll
+++ b/test/CodeGen/Mips/fixdfsf.ll
@@ -7,7 +7,7 @@
; Function Attrs: nounwind optsize
define void @foo() {
entry:
- %0 = load double* @x, align 8
+ %0 = load double, double* @x, align 8
%conv = fptoui double %0 to i32
store i32 %conv, i32* @y, align 4
; pic1: lw ${{[0-9]+}}, %call16(__fixunsdfsi)(${{[0-9]+}})
diff --git a/test/CodeGen/Mips/fmadd1.ll b/test/CodeGen/Mips/fmadd1.ll
index f0667eec3b33..99d99fada1cf 100644
--- a/test/CodeGen/Mips/fmadd1.ll
+++ b/test/CodeGen/Mips/fmadd1.ll
@@ -8,15 +8,15 @@
; RUN: llc < %s -march=mipsel -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=32 -check-prefix=32-NONAN
; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=32R2 -check-prefix=32R2-NONAN
; RUN: llc < %s -march=mipsel -mcpu=mips32r6 -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=32R6 -check-prefix=32R6-NONAN
-; RUN: llc < %s -march=mips64el -mcpu=mips64 -mattr=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=64 -check-prefix=64-NONAN
-; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=64R2 -check-prefix=64R2-NONAN
-; RUN: llc < %s -march=mips64el -mcpu=mips64r6 -mattr=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=64R6 -check-prefix=64R6-NONAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=64 -check-prefix=64-NONAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -target-abi=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=64R2 -check-prefix=64R2-NONAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r6 -target-abi=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefix=ALL -check-prefix=64R6 -check-prefix=64R6-NONAN
; RUN: llc < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefix=ALL -check-prefix=32 -check-prefix=32-NAN
; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL -check-prefix=32R2 -check-prefix=32R2-NAN
; RUN: llc < %s -march=mipsel -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL -check-prefix=32R6 -check-prefix=32R6-NAN
-; RUN: llc < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=ALL -check-prefix=64 -check-prefix=64-NAN
-; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=ALL -check-prefix=64R2 -check-prefix=64R2-NAN
-; RUN: llc < %s -march=mips64el -mcpu=mips64r6 -mattr=n64 | FileCheck %s -check-prefix=ALL -check-prefix=64R6 -check-prefix=64R6-NAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi=n64 | FileCheck %s -check-prefix=ALL -check-prefix=64 -check-prefix=64-NAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -target-abi=n64 | FileCheck %s -check-prefix=ALL -check-prefix=64R2 -check-prefix=64R2-NAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r6 -target-abi=n64 | FileCheck %s -check-prefix=ALL -check-prefix=64R6 -check-prefix=64R6-NAN
define float @FOO0float(float %a, float %b, float %c) nounwind readnone {
entry:
diff --git a/test/CodeGen/Mips/fp-indexed-ls.ll b/test/CodeGen/Mips/fp-indexed-ls.ll
index 787e131f6ec5..219ca99d3f94 100644
--- a/test/CodeGen/Mips/fp-indexed-ls.ll
+++ b/test/CodeGen/Mips/fp-indexed-ls.ll
@@ -1,10 +1,10 @@
; RUN: llc -march=mipsel -mcpu=mips32 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32R1
; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32R2
; RUN: llc -march=mipsel -mcpu=mips32r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32R6
-; RUN: llc -march=mips64el -mcpu=mips4 -mattr=n64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS4
-; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS4
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS4
-; RUN: llc -march=mips64el -mcpu=mips64r6 -mattr=n64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64R6
+; RUN: llc -march=mips64el -mcpu=mips4 -target-abi=n64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS4
+; RUN: llc -march=mips64el -mcpu=mips64 -target-abi=n64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS4
+; RUN: llc -march=mips64el -mcpu=mips64r2 -target-abi=n64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS4
+; RUN: llc -march=mips64el -mcpu=mips64r6 -target-abi=n64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64R6
; Check that [ls][dwu]xc1 are not emitted for nacl.
; RUN: llc -mtriple=mipsel-none-nacl-gnu -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=CHECK-NACL
@@ -45,8 +45,8 @@ entry:
; CHECK-NACL-NOT: lwxc1
- %arrayidx = getelementptr inbounds float* %b, i32 %o
- %0 = load float* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds float, float* %b, i32 %o
+ %0 = load float, float* %arrayidx, align 4
ret float %0
}
@@ -76,8 +76,8 @@ entry:
; CHECK-NACL-NOT: ldxc1
- %arrayidx = getelementptr inbounds double* %b, i32 %o
- %0 = load double* %arrayidx, align 8
+ %arrayidx = getelementptr inbounds double, double* %b, i32 %o
+ %0 = load double, double* %arrayidx, align 8
ret double %0
}
@@ -100,8 +100,8 @@ entry:
; luxc1 was removed in MIPS64r6
; MIPS64R6-NOT: luxc1
- %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
- %0 = load float* %arrayidx1, align 1
+ %arrayidx1 = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
+ %0 = load float, float* %arrayidx1, align 1
ret float %0
}
@@ -129,8 +129,8 @@ entry:
; CHECK-NACL-NOT: swxc1
- %0 = load float* @gf, align 4
- %arrayidx = getelementptr inbounds float* %b, i32 %o
+ %0 = load float, float* @gf, align 4
+ %arrayidx = getelementptr inbounds float, float* %b, i32 %o
store float %0, float* %arrayidx, align 4
ret void
}
@@ -159,8 +159,8 @@ entry:
; CHECK-NACL-NOT: sdxc1
- %0 = load double* @gd, align 8
- %arrayidx = getelementptr inbounds double* %b, i32 %o
+ %0 = load double, double* @gd, align 8
+ %arrayidx = getelementptr inbounds double, double* %b, i32 %o
store double %0, double* %arrayidx, align 8
ret void
}
@@ -179,8 +179,8 @@ entry:
; MIPS64R6-NOT: suxc1
- %0 = load float* @gf, align 4
- %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
+ %0 = load float, float* @gf, align 4
+ %arrayidx1 = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
store float %0, float* %arrayidx1, align 1
ret void
}
@@ -199,8 +199,8 @@ entry:
; MIPS64R6-NOT: luxc1
- %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
- %0 = load double* %arrayidx1, align 1
+ %arrayidx1 = getelementptr inbounds [4 x %struct.S2], [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
+ %0 = load double, double* %arrayidx1, align 1
ret double %0
}
@@ -218,8 +218,8 @@ entry:
; MIPS64R6-NOT: suxc1
- %0 = load double* @gd, align 8
- %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
+ %0 = load double, double* @gd, align 8
+ %arrayidx1 = getelementptr inbounds [4 x %struct.S2], [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
store double %0, double* %arrayidx1, align 1
ret void
}
@@ -238,7 +238,7 @@ entry:
; MIPS64R6-NOT: luxc1
- %0 = load float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
+ %0 = load float, float* getelementptr inbounds (%struct.S3, %struct.S3* @s3, i32 0, i32 1), align 1
ret float %0
}
@@ -256,7 +256,7 @@ entry:
; MIPS64R6-NOT: suxc1
- store float %f, float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
+ store float %f, float* getelementptr inbounds (%struct.S3, %struct.S3* @s3, i32 0, i32 1), align 1
ret void
}
diff --git a/test/CodeGen/Mips/fp-spill-reload.ll b/test/CodeGen/Mips/fp-spill-reload.ll
index f9887a55827f..4a53ad8c8e13 100644
--- a/test/CodeGen/Mips/fp-spill-reload.ll
+++ b/test/CodeGen/Mips/fp-spill-reload.ll
@@ -5,27 +5,27 @@ define void @foo0(i32* nocapture %b) nounwind {
entry:
; CHECK: sw $fp
; CHECK: lw $fp
- %0 = load i32* %b, align 4
- %arrayidx.1 = getelementptr inbounds i32* %b, i32 1
- %1 = load i32* %arrayidx.1, align 4
+ %0 = load i32, i32* %b, align 4
+ %arrayidx.1 = getelementptr inbounds i32, i32* %b, i32 1
+ %1 = load i32, i32* %arrayidx.1, align 4
%add.1 = add nsw i32 %1, 1
- %arrayidx.2 = getelementptr inbounds i32* %b, i32 2
- %2 = load i32* %arrayidx.2, align 4
+ %arrayidx.2 = getelementptr inbounds i32, i32* %b, i32 2
+ %2 = load i32, i32* %arrayidx.2, align 4
%add.2 = add nsw i32 %2, 2
- %arrayidx.3 = getelementptr inbounds i32* %b, i32 3
- %3 = load i32* %arrayidx.3, align 4
+ %arrayidx.3 = getelementptr inbounds i32, i32* %b, i32 3
+ %3 = load i32, i32* %arrayidx.3, align 4
%add.3 = add nsw i32 %3, 3
- %arrayidx.4 = getelementptr inbounds i32* %b, i32 4
- %4 = load i32* %arrayidx.4, align 4
+ %arrayidx.4 = getelementptr inbounds i32, i32* %b, i32 4
+ %4 = load i32, i32* %arrayidx.4, align 4
%add.4 = add nsw i32 %4, 4
- %arrayidx.5 = getelementptr inbounds i32* %b, i32 5
- %5 = load i32* %arrayidx.5, align 4
+ %arrayidx.5 = getelementptr inbounds i32, i32* %b, i32 5
+ %5 = load i32, i32* %arrayidx.5, align 4
%add.5 = add nsw i32 %5, 5
- %arrayidx.6 = getelementptr inbounds i32* %b, i32 6
- %6 = load i32* %arrayidx.6, align 4
+ %arrayidx.6 = getelementptr inbounds i32, i32* %b, i32 6
+ %6 = load i32, i32* %arrayidx.6, align 4
%add.6 = add nsw i32 %6, 6
- %arrayidx.7 = getelementptr inbounds i32* %b, i32 7
- %7 = load i32* %arrayidx.7, align 4
+ %arrayidx.7 = getelementptr inbounds i32, i32* %b, i32 7
+ %7 = load i32, i32* %arrayidx.7, align 4
%add.7 = add nsw i32 %7, 7
call void @foo2(i32 %0, i32 %add.1, i32 %add.2, i32 %add.3, i32 %add.4, i32 %add.5, i32 %add.6, i32 %add.7) nounwind
call void bitcast (void (...)* @foo1 to void ()*)() nounwind
diff --git a/test/CodeGen/Mips/fp16-promote.ll b/test/CodeGen/Mips/fp16-promote.ll
new file mode 100644
index 000000000000..2ac46e028072
--- /dev/null
+++ b/test/CodeGen/Mips/fp16-promote.ll
@@ -0,0 +1,98 @@
+; RUN: llc -asm-verbose=false -mtriple=mipsel-linux-gnueabi < %s | FileCheck %s -check-prefix=CHECK-LIBCALL
+
+; CHECK-LIBCALL-LABEL: test_fadd:
+; CHECK-LIBCALL: %call16(__gnu_h2f_ieee)
+; CHECK-LIBCALL: %call16(__gnu_h2f_ieee)
+; CHECK-LIBCALL-DAG: add.s
+; CHECK-LIBCALL-DAG: %call16(__gnu_f2h_ieee)
+define void @test_fadd(half* %p, half* %q) #0 {
+ %a = load half, half* %p, align 2
+ %b = load half, half* %q, align 2
+ %r = fadd half %a, %b
+ store half %r, half* %p
+ ret void
+}
+
+; CHECK-LIBCALL-LABEL: test_fpext_float:
+; CHECK-LIBCALL: %call16(__gnu_h2f_ieee)
+define float @test_fpext_float(half* %p) {
+ %a = load half, half* %p, align 2
+ %r = fpext half %a to float
+ ret float %r
+}
+
+; CHECK-LIBCALL-LABEL: test_fpext_double:
+; CHECK-LIBCALL: %call16(__gnu_h2f_ieee)
+; CHECK-LIBCALL: cvt.d.s
+define double @test_fpext_double(half* %p) {
+ %a = load half, half* %p, align 2
+ %r = fpext half %a to double
+ ret double %r
+}
+
+; CHECK-LIBCALL-LABEL: test_fptrunc_float:
+; CHECK-LIBCALL: %call16(__gnu_f2h_ieee)
+define void @test_fptrunc_float(float %f, half* %p) #0 {
+ %a = fptrunc float %f to half
+ store half %a, half* %p
+ ret void
+}
+
+; CHECK-LIBCALL-LABEL: test_fptrunc_double:
+; CHECK-LIBCALL: %call16(__truncdfhf2)
+define void @test_fptrunc_double(double %d, half* %p) #0 {
+ %a = fptrunc double %d to half
+ store half %a, half* %p
+ ret void
+}
+
+; CHECK-LIBCALL-LABEL: test_vec_fpext_float:
+; CHECK-LIBCALL: %call16(__gnu_h2f_ieee)
+; CHECK-LIBCALL: %call16(__gnu_h2f_ieee)
+; CHECK-LIBCALL: %call16(__gnu_h2f_ieee)
+; CHECK-LIBCALL: %call16(__gnu_h2f_ieee)
+define <4 x float> @test_vec_fpext_float(<4 x half>* %p) #0 {
+ %a = load <4 x half>, <4 x half>* %p, align 8
+ %b = fpext <4 x half> %a to <4 x float>
+ ret <4 x float> %b
+}
+
+; This test is not robust against variations in instruction scheduling.
+; See the discussion in http://reviews.llvm.org/D8804
+; CHECK-LIBCALL-LABEL: test_vec_fpext_double:
+; CHECK-LIBCALL: %call16(__gnu_h2f_ieee)
+; CHECK-LIBCALL: %call16(__gnu_h2f_ieee)
+; CHECK-LIBCALL: %call16(__gnu_h2f_ieee)
+; CHECK-LIBCALL: cvt.d.s
+; CHECK-LIBCALL: cvt.d.s
+; CHECK-LIBCALL: cvt.d.s
+; CHECK-LIBCALL: %call16(__gnu_h2f_ieee)
+; CHECK-LIBCALL: cvt.d.s
+define <4 x double> @test_vec_fpext_double(<4 x half>* %p) #0 {
+ %a = load <4 x half>, <4 x half>* %p, align 8
+ %b = fpext <4 x half> %a to <4 x double>
+ ret <4 x double> %b
+}
+
+; CHECK-LIBCALL-LABEL: test_vec_fptrunc_float:
+; CHECK-LIBCALL: %call16(__gnu_f2h_ieee)
+; CHECK-LIBCALL: %call16(__gnu_f2h_ieee)
+; CHECK-LIBCALL: %call16(__gnu_f2h_ieee)
+; CHECK-LIBCALL: %call16(__gnu_f2h_ieee)
+define void @test_vec_fptrunc_float(<4 x float> %a, <4 x half>* %p) #0 {
+ %b = fptrunc <4 x float> %a to <4 x half>
+ store <4 x half> %b, <4 x half>* %p, align 8
+ ret void
+}
+
+; CHECK-LIBCALL-LABEL: test_vec_fptrunc_double:
+; CHECK-LIBCALL: %call16(__truncdfhf2)
+; CHECK-LIBCALL: %call16(__truncdfhf2)
+; CHECK-LIBCALL: %call16(__truncdfhf2)
+; CHECK-LIBCALL: %call16(__truncdfhf2)
+define void @test_vec_fptrunc_double(<4 x double> %a, <4 x half>* %p) #0 {
+ %b = fptrunc <4 x double> %a to <4 x half>
+ store <4 x half> %b, <4 x half>* %p, align 8
+ ret void
+}
+
diff --git a/test/CodeGen/Mips/fp16instrinsmc.ll b/test/CodeGen/Mips/fp16instrinsmc.ll
index 84d3814ee8b8..797be2668d40 100644
--- a/test/CodeGen/Mips/fp16instrinsmc.ll
+++ b/test/CodeGen/Mips/fp16instrinsmc.ll
@@ -23,8 +23,8 @@ define void @foo1() #0 {
; fmask: .set reorder
; fmask: .end foo1
entry:
- %0 = load float* @x, align 4
- %1 = load float* @one, align 4
+ %0 = load float, float* @x, align 4
+ %1 = load float, float* @one, align 4
%call = call float @copysignf(float %0, float %1) #2
store float %call, float* @y, align 4
ret void
@@ -39,8 +39,8 @@ define void @foo2() #0 {
; fmask: save {{.*}}
; fmask: .end foo2
entry:
- %0 = load float* @x, align 4
- %1 = load float* @negone, align 4
+ %0 = load float, float* @x, align 4
+ %1 = load float, float* @negone, align 4
%call = call float @copysignf(float %0, float %1) #2
store float %call, float* @y, align 4
ret void
@@ -57,8 +57,8 @@ entry:
; fmask: .set macro
; fmask: .set reorder
; fmask: .end foo3
- %0 = load double* @xd, align 8
- %1 = load float* @oned, align 4
+ %0 = load double, double* @xd, align 8
+ %1 = load float, float* @oned, align 4
%conv = fpext float %1 to double
%call = call double @copysign(double %0, double %conv) #2
store double %call, double* @yd, align 8
@@ -74,8 +74,8 @@ entry:
; fmask: .ent foo4
; fmask: save {{.*}}
; fmask: .end foo4
- %0 = load double* @xd, align 8
- %1 = load double* @negoned, align 8
+ %0 = load double, double* @xd, align 8
+ %1 = load double, double* @negoned, align 8
%call = call double @copysign(double %0, double %1) #2
store double %call, double* @yd, align 8
ret void
@@ -84,7 +84,7 @@ entry:
; Function Attrs: nounwind
define void @foo5() #0 {
entry:
- %0 = load float* @xn, align 4
+ %0 = load float, float* @xn, align 4
%call = call float @fabsf(float %0) #2
store float %call, float* @y, align 4
ret void
@@ -96,7 +96,7 @@ declare float @fabsf(float) #1
; Function Attrs: nounwind
define void @foo6() #0 {
entry:
- %0 = load double* @xdn, align 8
+ %0 = load double, double* @xdn, align 8
%call = call double @fabs(double %0) #2
store double %call, double* @yd, align 8
ret void
@@ -108,7 +108,7 @@ declare double @fabs(double) #1
; Function Attrs: nounwind
define void @foo7() #0 {
entry:
- %0 = load float* @x, align 4
+ %0 = load float, float* @x, align 4
%call = call float @sinf(float %0) #3
;pic: lw ${{[0-9]+}}, %call16(sinf)(${{[0-9]+}})
;pic: lw ${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
@@ -122,7 +122,7 @@ declare float @sinf(float) #0
; Function Attrs: nounwind
define void @foo8() #0 {
entry:
- %0 = load double* @xd, align 8
+ %0 = load double, double* @xd, align 8
%call = call double @sin(double %0) #3
;pic: lw ${{[0-9]+}}, %call16(sin)(${{[0-9]+}})
;pic: lw ${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
@@ -136,7 +136,7 @@ declare double @sin(double) #0
; Function Attrs: nounwind
define void @foo9() #0 {
entry:
- %0 = load float* @x, align 4
+ %0 = load float, float* @x, align 4
%call = call float @cosf(float %0) #3
;pic: lw ${{[0-9]+}}, %call16(cosf)(${{[0-9]+}})
;pic: lw ${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
@@ -150,7 +150,7 @@ declare float @cosf(float) #0
; Function Attrs: nounwind
define void @foo10() #0 {
entry:
- %0 = load double* @xd, align 8
+ %0 = load double, double* @xd, align 8
%call = call double @cos(double %0) #3
;pic: lw ${{[0-9]+}}, %call16(cos)(${{[0-9]+}})
;pic: lw ${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
@@ -164,7 +164,7 @@ declare double @cos(double) #0
; Function Attrs: nounwind
define void @foo11() #0 {
entry:
- %0 = load float* @x, align 4
+ %0 = load float, float* @x, align 4
%call = call float @sqrtf(float %0) #3
;pic: lw ${{[0-9]+}}, %call16(sqrtf)(${{[0-9]+}})
;pic: lw ${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
@@ -178,7 +178,7 @@ declare float @sqrtf(float) #0
; Function Attrs: nounwind
define void @foo12() #0 {
entry:
- %0 = load double* @xd, align 8
+ %0 = load double, double* @xd, align 8
%call = call double @sqrt(double %0) #3
;pic: lw ${{[0-9]+}}, %call16(sqrt)(${{[0-9]+}})
;pic: lw ${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
@@ -192,7 +192,7 @@ declare double @sqrt(double) #0
; Function Attrs: nounwind
define void @foo13() #0 {
entry:
- %0 = load float* @x, align 4
+ %0 = load float, float* @x, align 4
%call = call float @floorf(float %0) #2
;pic: lw ${{[0-9]+}}, %call16(floorf)(${{[0-9]+}})
;pic: lw ${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
@@ -206,7 +206,7 @@ declare float @floorf(float) #1
; Function Attrs: nounwind
define void @foo14() #0 {
entry:
- %0 = load double* @xd, align 8
+ %0 = load double, double* @xd, align 8
%call = call double @floor(double %0) #2
;pic: lw ${{[0-9]+}}, %call16(floor)(${{[0-9]+}})
;pic: lw ${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
@@ -220,7 +220,7 @@ declare double @floor(double) #1
; Function Attrs: nounwind
define void @foo15() #0 {
entry:
- %0 = load float* @x, align 4
+ %0 = load float, float* @x, align 4
%call = call float @nearbyintf(float %0) #2
;pic: lw ${{[0-9]+}}, %call16(nearbyintf)(${{[0-9]+}})
;pic: lw ${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
@@ -234,7 +234,7 @@ declare float @nearbyintf(float) #1
; Function Attrs: nounwind
define void @foo16() #0 {
entry:
- %0 = load double* @xd, align 8
+ %0 = load double, double* @xd, align 8
%call = call double @nearbyint(double %0) #2
;pic: lw ${{[0-9]+}}, %call16(nearbyint)(${{[0-9]+}})
;pic: lw ${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
@@ -248,7 +248,7 @@ declare double @nearbyint(double) #1
; Function Attrs: nounwind
define void @foo17() #0 {
entry:
- %0 = load float* @x, align 4
+ %0 = load float, float* @x, align 4
%call = call float @ceilf(float %0) #2
;pic: lw ${{[0-9]+}}, %call16(ceilf)(${{[0-9]+}})
;pic: lw ${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
@@ -262,7 +262,7 @@ declare float @ceilf(float) #1
; Function Attrs: nounwind
define void @foo18() #0 {
entry:
- %0 = load double* @xd, align 8
+ %0 = load double, double* @xd, align 8
%call = call double @ceil(double %0) #2
;pic: lw ${{[0-9]+}}, %call16(ceil)(${{[0-9]+}})
;pic: lw ${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
@@ -276,7 +276,7 @@ declare double @ceil(double) #1
; Function Attrs: nounwind
define void @foo19() #0 {
entry:
- %0 = load float* @x, align 4
+ %0 = load float, float* @x, align 4
%call = call float @rintf(float %0) #2
;pic: lw ${{[0-9]+}}, %call16(rintf)(${{[0-9]+}})
;pic: lw ${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
@@ -290,7 +290,7 @@ declare float @rintf(float) #1
; Function Attrs: nounwind
define void @foo20() #0 {
entry:
- %0 = load double* @xd, align 8
+ %0 = load double, double* @xd, align 8
%call = call double @rint(double %0) #2
;pic: lw ${{[0-9]+}}, %call16(rint)(${{[0-9]+}})
;pic: lw ${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
@@ -304,7 +304,7 @@ declare double @rint(double) #1
; Function Attrs: nounwind
define void @foo21() #0 {
entry:
- %0 = load float* @x, align 4
+ %0 = load float, float* @x, align 4
%call = call float @truncf(float %0) #2
;pic: lw ${{[0-9]+}}, %call16(truncf)(${{[0-9]+}})
;pic: lw ${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
@@ -318,7 +318,7 @@ declare float @truncf(float) #1
; Function Attrs: nounwind
define void @foo22() #0 {
entry:
- %0 = load double* @xd, align 8
+ %0 = load double, double* @xd, align 8
%call = call double @trunc(double %0) #2
;pic: lw ${{[0-9]+}}, %call16(trunc)(${{[0-9]+}})
;pic: lw ${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
@@ -332,7 +332,7 @@ declare double @trunc(double) #1
; Function Attrs: nounwind
define void @foo23() #0 {
entry:
- %0 = load float* @x, align 4
+ %0 = load float, float* @x, align 4
%call = call float @log2f(float %0) #3
;pic: lw ${{[0-9]+}}, %call16(log2f)(${{[0-9]+}})
;pic: lw ${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
@@ -346,7 +346,7 @@ declare float @log2f(float) #0
; Function Attrs: nounwind
define void @foo24() #0 {
entry:
- %0 = load double* @xd, align 8
+ %0 = load double, double* @xd, align 8
%call = call double @log2(double %0) #3
;pic: lw ${{[0-9]+}}, %call16(log2)(${{[0-9]+}})
;pic: lw ${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
@@ -360,7 +360,7 @@ declare double @log2(double) #0
; Function Attrs: nounwind
define void @foo25() #0 {
entry:
- %0 = load float* @x, align 4
+ %0 = load float, float* @x, align 4
%call = call float @exp2f(float %0) #3
;pic: lw ${{[0-9]+}}, %call16(exp2f)(${{[0-9]+}})
;pic: lw ${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
@@ -374,7 +374,7 @@ declare float @exp2f(float) #0
; Function Attrs: nounwind
define void @foo26() #0 {
entry:
- %0 = load double* @xd, align 8
+ %0 = load double, double* @xd, align 8
%call = call double @exp2(double %0) #3
;pic: lw ${{[0-9]+}}, %call16(exp2)(${{[0-9]+}})
;pic: lw ${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
diff --git a/test/CodeGen/Mips/fp16static.ll b/test/CodeGen/Mips/fp16static.ll
index beb063db15ca..4e5059ed39e9 100644
--- a/test/CodeGen/Mips/fp16static.ll
+++ b/test/CodeGen/Mips/fp16static.ll
@@ -4,8 +4,8 @@
define void @foo() nounwind {
entry:
- %0 = load float* @x, align 4
- %1 = load float* @x, align 4
+ %0 = load float, float* @x, align 4
+ %1 = load float, float* @x, align 4
%mul = fmul float %0, %1
store float %mul, float* @x, align 4
; CHECK-STATIC16: jal __mips16_mulsf3
diff --git a/test/CodeGen/Mips/fpbr.ll b/test/CodeGen/Mips/fpbr.ll
index 311b83015a56..27d7094376e6 100644
--- a/test/CodeGen/Mips/fpbr.ll
+++ b/test/CodeGen/Mips/fpbr.ll
@@ -24,11 +24,11 @@ entry:
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
- tail call void (...)* @g0() nounwind
+ tail call void (...) @g0() nounwind
br label %if.end
if.else: ; preds = %entry
- tail call void (...)* @g1() nounwind
+ tail call void (...) @g1() nounwind
br label %if.end
if.end: ; preds = %if.else, %if.then
@@ -57,11 +57,11 @@ entry:
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
- tail call void (...)* @g0() nounwind
+ tail call void (...) @g0() nounwind
br label %if.end
if.else: ; preds = %entry
- tail call void (...)* @g1() nounwind
+ tail call void (...) @g1() nounwind
br label %if.end
if.end: ; preds = %if.else, %if.then
@@ -86,11 +86,11 @@ entry:
br i1 %cmp, label %if.else, label %if.then
if.then: ; preds = %entry
- tail call void (...)* @g0() nounwind
+ tail call void (...) @g0() nounwind
br label %if.end
if.else: ; preds = %entry
- tail call void (...)* @g1() nounwind
+ tail call void (...) @g1() nounwind
br label %if.end
if.end: ; preds = %if.else, %if.then
@@ -116,11 +116,11 @@ entry:
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
- tail call void (...)* @g0() nounwind
+ tail call void (...) @g0() nounwind
br label %if.end
if.else: ; preds = %entry
- tail call void (...)* @g1() nounwind
+ tail call void (...) @g1() nounwind
br label %if.end
if.end: ; preds = %if.else, %if.then
@@ -145,11 +145,11 @@ entry:
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
- tail call void (...)* @g0() nounwind
+ tail call void (...) @g0() nounwind
br label %if.end
if.else: ; preds = %entry
- tail call void (...)* @g1() nounwind
+ tail call void (...) @g1() nounwind
br label %if.end
if.end: ; preds = %if.else, %if.then
@@ -174,11 +174,11 @@ entry:
br i1 %cmp, label %if.else, label %if.then
if.then: ; preds = %entry
- tail call void (...)* @g0() nounwind
+ tail call void (...) @g0() nounwind
br label %if.end
if.else: ; preds = %entry
- tail call void (...)* @g1() nounwind
+ tail call void (...) @g1() nounwind
br label %if.end
if.end: ; preds = %if.else, %if.then
diff --git a/test/CodeGen/Mips/fpneeded.ll b/test/CodeGen/Mips/fpneeded.ll
index fdd8e8f707ef..a89e2a593a4d 100644
--- a/test/CodeGen/Mips/fpneeded.ll
+++ b/test/CodeGen/Mips/fpneeded.ll
@@ -76,8 +76,8 @@ entry:
define void @foo1() #0 {
entry:
store float 1.000000e+00, float* @zz, align 4
- %0 = load float* @y, align 4
- %1 = load float* @x, align 4
+ %0 = load float, float* @y, align 4
+ %1 = load float, float* @x, align 4
%add = fadd float %0, %1
store float %add, float* @z, align 4
ret void
@@ -96,7 +96,7 @@ entry:
define void @foo2() #0 {
entry:
- %0 = load float* @x, align 4
+ %0 = load float, float* @x, align 4
call void @vf(float %0)
ret void
}
diff --git a/test/CodeGen/Mips/fpnotneeded.ll b/test/CodeGen/Mips/fpnotneeded.ll
index e12d7baacdbb..02b8e8a345db 100644
--- a/test/CodeGen/Mips/fpnotneeded.ll
+++ b/test/CodeGen/Mips/fpnotneeded.ll
@@ -19,7 +19,7 @@ entry:
define i32 @iv() #0 {
entry:
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
ret i32 %0
}
diff --git a/test/CodeGen/Mips/fpxx.ll b/test/CodeGen/Mips/fpxx.ll
index 7e2ed22e2d80..5b42ecec53e8 100644
--- a/test/CodeGen/Mips/fpxx.ll
+++ b/test/CodeGen/Mips/fpxx.ll
@@ -10,11 +10,11 @@
; RUN: llc -march=mips64 -mcpu=mips64 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-NOFPXX
; RUN: not llc -march=mips64 -mcpu=mips64 -mattr=fpxx < %s 2>&1 | FileCheck %s -check-prefix=64-FPXX
-; RUN-TODO: llc -march=mips64 -mcpu=mips4 -mattr=-n64,+o32 < %s | FileCheck %s -check-prefix=ALL -check-prefix=4-O32-NOFPXX
-; RUN-TODO: llc -march=mips64 -mcpu=mips4 -mattr=-n64,+o32 -mattr=fpxx < %s | FileCheck %s -check-prefix=ALL -check-prefix=4-O32-FPXX
+; RUN-TODO: llc -march=mips64 -mcpu=mips4 -target-abi o32 < %s | FileCheck %s -check-prefix=ALL -check-prefix=4-O32-NOFPXX
+; RUN-TODO: llc -march=mips64 -mcpu=mips4 -target-abi o32 -mattr=fpxx < %s | FileCheck %s -check-prefix=ALL -check-prefix=4-O32-FPXX
-; RUN-TODO: llc -march=mips64 -mcpu=mips64 -mattr=-n64,+o32 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-O32-NOFPXX
-; RUN-TODO: llc -march=mips64 -mcpu=mips64 -mattr=-n64,+o32 -mattr=fpxx < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-O32-FPXX
+; RUN-TODO: llc -march=mips64 -mcpu=mips64 -target-abi o32 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-O32-NOFPXX
+; RUN-TODO: llc -march=mips64 -mcpu=mips64 -target-abi o32 -mattr=fpxx < %s | FileCheck %s -check-prefix=ALL -check-prefix=64-O32-FPXX
declare double @dbl();
diff --git a/test/CodeGen/Mips/global-address.ll b/test/CodeGen/Mips/global-address.ll
index 0785cfcc0515..ecf5e563a577 100644
--- a/test/CodeGen/Mips/global-address.ll
+++ b/test/CodeGen/Mips/global-address.ll
@@ -1,9 +1,9 @@
; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-O32
; RUN: llc -march=mipsel -relocation-model=static -mtriple=mipsel-linux-gnu < %s | FileCheck %s -check-prefix=STATIC-O32
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n32 -relocation-model=static -mtriple=mipsel-linux-gnu < %s | FileCheck %s -check-prefix=STATIC-N32
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=-n64,n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -target-abi n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -target-abi n32 -relocation-model=static -mtriple=mipsel-linux-gnu < %s | FileCheck %s -check-prefix=STATIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -target-abi n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -target-abi n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
@s1 = internal unnamed_addr global i32 8, align 4
@g1 = external global i32
@@ -33,9 +33,9 @@ entry:
; STATIC-N64: lw ${{[0-9]+}}, %got_ofst(s1)($[[R1]])
; STATIC-N64: ld ${{[0-9]+}}, %got_disp(g1)
- %0 = load i32* @s1, align 4
+ %0 = load i32, i32* @s1, align 4
tail call void @foo1(i32 %0) nounwind
- %1 = load i32* @g1, align 4
+ %1 = load i32, i32* @g1, align 4
store i32 %1, i32* @s1, align 4
%add = add nsw i32 %1, 2
store i32 %add, i32* @g1, align 4
diff --git a/test/CodeGen/Mips/gpreg-lazy-binding.ll b/test/CodeGen/Mips/gpreg-lazy-binding.ll
index 3a636d82533f..800a74f5358f 100644
--- a/test/CodeGen/Mips/gpreg-lazy-binding.ll
+++ b/test/CodeGen/Mips/gpreg-lazy-binding.ll
@@ -19,7 +19,7 @@ declare void @externalFunc()
define internal fastcc void @internalFunc() nounwind noinline {
entry:
- %0 = load i32* @g, align 4
+ %0 = load i32, i32* @g, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* @g, align 4
ret void
diff --git a/test/CodeGen/Mips/gprestore.ll b/test/CodeGen/Mips/gprestore.ll
index cbcf0c93491c..b1c2ad1d2756 100644
--- a/test/CodeGen/Mips/gprestore.ll
+++ b/test/CodeGen/Mips/gprestore.ll
@@ -17,11 +17,11 @@ entry:
; CHECK: jalr
; CHECK-NOT: got({{.*}})($gp)
; CHECK: lw $gp
- tail call void (...)* @f1() nounwind
- %tmp = load i32* @p, align 4
+ tail call void (...) @f1() nounwind
+ %tmp = load i32, i32* @p, align 4
tail call void @f2(i32 %tmp) nounwind
- %tmp1 = load i32* @q, align 4
- %tmp2 = load i32* @r, align 4
+ %tmp1 = load i32, i32* @q, align 4
+ %tmp2 = load i32, i32* @r, align 4
tail call void @f3(i32 %tmp1, i32 %tmp2) nounwind
ret void
}
diff --git a/test/CodeGen/Mips/helloworld.ll b/test/CodeGen/Mips/helloworld.ll
index 36f4ad6b55c0..a0dbdf3afd47 100644
--- a/test/CodeGen/Mips/helloworld.ll
+++ b/test/CodeGen/Mips/helloworld.ll
@@ -12,7 +12,7 @@
define i32 @main() nounwind {
entry:
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0))
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0))
ret i32 0
; SR: .set mips16
diff --git a/test/CodeGen/Mips/hf16_1.ll b/test/CodeGen/Mips/hf16_1.ll
index 9879cd523af3..103fd2d7fd63 100644
--- a/test/CodeGen/Mips/hf16_1.ll
+++ b/test/CodeGen/Mips/hf16_1.ll
@@ -11,96 +11,96 @@
define void @foo() nounwind {
entry:
- %0 = load float* @x, align 4
+ %0 = load float, float* @x, align 4
call void @v_sf(float %0)
- %1 = load double* @xd, align 8
+ %1 = load double, double* @xd, align 8
call void @v_df(double %1)
- %2 = load float* @x, align 4
- %3 = load float* @y, align 4
+ %2 = load float, float* @x, align 4
+ %3 = load float, float* @y, align 4
call void @v_sf_sf(float %2, float %3)
- %4 = load double* @xd, align 8
- %5 = load float* @x, align 4
+ %4 = load double, double* @xd, align 8
+ %5 = load float, float* @x, align 4
call void @v_df_sf(double %4, float %5)
- %6 = load double* @xd, align 8
- %7 = load double* @yd, align 8
+ %6 = load double, double* @xd, align 8
+ %7 = load double, double* @yd, align 8
call void @v_df_df(double %6, double %7)
%call = call float @sf_v()
- %8 = load float* @x, align 4
+ %8 = load float, float* @x, align 4
%call1 = call float @sf_sf(float %8)
- %9 = load double* @xd, align 8
+ %9 = load double, double* @xd, align 8
%call2 = call float @sf_df(double %9)
- %10 = load float* @x, align 4
- %11 = load float* @y, align 4
+ %10 = load float, float* @x, align 4
+ %11 = load float, float* @y, align 4
%call3 = call float @sf_sf_sf(float %10, float %11)
- %12 = load double* @xd, align 8
- %13 = load float* @x, align 4
+ %12 = load double, double* @xd, align 8
+ %13 = load float, float* @x, align 4
%call4 = call float @sf_df_sf(double %12, float %13)
- %14 = load double* @xd, align 8
- %15 = load double* @yd, align 8
+ %14 = load double, double* @xd, align 8
+ %15 = load double, double* @yd, align 8
%call5 = call float @sf_df_df(double %14, double %15)
%call6 = call double @df_v()
- %16 = load float* @x, align 4
+ %16 = load float, float* @x, align 4
%call7 = call double @df_sf(float %16)
- %17 = load double* @xd, align 8
+ %17 = load double, double* @xd, align 8
%call8 = call double @df_df(double %17)
- %18 = load float* @x, align 4
- %19 = load float* @y, align 4
+ %18 = load float, float* @x, align 4
+ %19 = load float, float* @y, align 4
%call9 = call double @df_sf_sf(float %18, float %19)
- %20 = load double* @xd, align 8
- %21 = load float* @x, align 4
+ %20 = load double, double* @xd, align 8
+ %21 = load float, float* @x, align 4
%call10 = call double @df_df_sf(double %20, float %21)
- %22 = load double* @xd, align 8
- %23 = load double* @yd, align 8
+ %22 = load double, double* @xd, align 8
+ %23 = load double, double* @yd, align 8
%call11 = call double @df_df_df(double %22, double %23)
%call12 = call { float, float } @sc_v()
%24 = extractvalue { float, float } %call12, 0
%25 = extractvalue { float, float } %call12, 1
- %26 = load float* @x, align 4
+ %26 = load float, float* @x, align 4
%call13 = call { float, float } @sc_sf(float %26)
%27 = extractvalue { float, float } %call13, 0
%28 = extractvalue { float, float } %call13, 1
- %29 = load double* @xd, align 8
+ %29 = load double, double* @xd, align 8
%call14 = call { float, float } @sc_df(double %29)
%30 = extractvalue { float, float } %call14, 0
%31 = extractvalue { float, float } %call14, 1
- %32 = load float* @x, align 4
- %33 = load float* @y, align 4
+ %32 = load float, float* @x, align 4
+ %33 = load float, float* @y, align 4
%call15 = call { float, float } @sc_sf_sf(float %32, float %33)
%34 = extractvalue { float, float } %call15, 0
%35 = extractvalue { float, float } %call15, 1
- %36 = load double* @xd, align 8
- %37 = load float* @x, align 4
+ %36 = load double, double* @xd, align 8
+ %37 = load float, float* @x, align 4
%call16 = call { float, float } @sc_df_sf(double %36, float %37)
%38 = extractvalue { float, float } %call16, 0
%39 = extractvalue { float, float } %call16, 1
- %40 = load double* @xd, align 8
- %41 = load double* @yd, align 8
+ %40 = load double, double* @xd, align 8
+ %41 = load double, double* @yd, align 8
%call17 = call { float, float } @sc_df_df(double %40, double %41)
%42 = extractvalue { float, float } %call17, 0
%43 = extractvalue { float, float } %call17, 1
%call18 = call { double, double } @dc_v()
%44 = extractvalue { double, double } %call18, 0
%45 = extractvalue { double, double } %call18, 1
- %46 = load float* @x, align 4
+ %46 = load float, float* @x, align 4
%call19 = call { double, double } @dc_sf(float %46)
%47 = extractvalue { double, double } %call19, 0
%48 = extractvalue { double, double } %call19, 1
- %49 = load double* @xd, align 8
+ %49 = load double, double* @xd, align 8
%call20 = call { double, double } @dc_df(double %49)
%50 = extractvalue { double, double } %call20, 0
%51 = extractvalue { double, double } %call20, 1
- %52 = load float* @x, align 4
- %53 = load float* @y, align 4
+ %52 = load float, float* @x, align 4
+ %53 = load float, float* @y, align 4
%call21 = call { double, double } @dc_sf_sf(float %52, float %53)
%54 = extractvalue { double, double } %call21, 0
%55 = extractvalue { double, double } %call21, 1
- %56 = load double* @xd, align 8
- %57 = load float* @x, align 4
+ %56 = load double, double* @xd, align 8
+ %57 = load float, float* @x, align 4
%call22 = call { double, double } @dc_df_sf(double %56, float %57)
%58 = extractvalue { double, double } %call22, 0
%59 = extractvalue { double, double } %call22, 1
- %60 = load double* @xd, align 8
- %61 = load double* @yd, align 8
+ %60 = load double, double* @xd, align 8
+ %61 = load double, double* @yd, align 8
%call23 = call { double, double } @dc_df_df(double %60, double %61)
%62 = extractvalue { double, double } %call23, 0
%63 = extractvalue { double, double } %call23, 1
diff --git a/test/CodeGen/Mips/hf16call32.ll b/test/CodeGen/Mips/hf16call32.ll
index aec9c71c485b..3b3f8f799111 100644
--- a/test/CodeGen/Mips/hf16call32.ll
+++ b/test/CodeGen/Mips/hf16call32.ll
@@ -33,30 +33,30 @@ entry:
store float 1.000000e+00, float* @y, align 4
store double 1.000000e+00, double* @xd, align 8
store double 1.000000e+00, double* @yd, align 8
- store float 1.000000e+00, float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 0)
- store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 1)
- store double 1.000000e+00, double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 0)
- store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 1)
+ store float 1.000000e+00, float* getelementptr inbounds ({ float, float }, { float, float }* @xy, i32 0, i32 0)
+ store float 0.000000e+00, float* getelementptr inbounds ({ float, float }, { float, float }* @xy, i32 0, i32 1)
+ store double 1.000000e+00, double* getelementptr inbounds ({ double, double }, { double, double }* @xyd, i32 0, i32 0)
+ store double 0.000000e+00, double* getelementptr inbounds ({ double, double }, { double, double }* @xyd, i32 0, i32 1)
store float 1.000000e+00, float* @ret_sf, align 4
store double 1.000000e+00, double* @ret_df, align 8
- store float 1.000000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
- store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
- store double 1.000000e+00, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
- store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+ store float 1.000000e+00, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 0)
+ store float 0.000000e+00, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 1)
+ store double 1.000000e+00, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 0)
+ store double 0.000000e+00, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 1)
store float 0.000000e+00, float* @lx, align 4
store float 0.000000e+00, float* @ly, align 4
store double 0.000000e+00, double* @lxd, align 8
store double 0.000000e+00, double* @lyd, align 8
- store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @lxy, i32 0, i32 0)
- store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @lxy, i32 0, i32 1)
- store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @lxyd, i32 0, i32 0)
- store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @lxyd, i32 0, i32 1)
+ store float 0.000000e+00, float* getelementptr inbounds ({ float, float }, { float, float }* @lxy, i32 0, i32 0)
+ store float 0.000000e+00, float* getelementptr inbounds ({ float, float }, { float, float }* @lxy, i32 0, i32 1)
+ store double 0.000000e+00, double* getelementptr inbounds ({ double, double }, { double, double }* @lxyd, i32 0, i32 0)
+ store double 0.000000e+00, double* getelementptr inbounds ({ double, double }, { double, double }* @lxyd, i32 0, i32 1)
store float 0.000000e+00, float* @lret_sf, align 4
store double 0.000000e+00, double* @lret_df, align 8
- store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
- store float 0.000000e+00, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
- store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
- store double 0.000000e+00, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+ store float 0.000000e+00, float* getelementptr inbounds ({ float, float }, { float, float }* @lret_sc, i32 0, i32 0)
+ store float 0.000000e+00, float* getelementptr inbounds ({ float, float }, { float, float }* @lret_sc, i32 0, i32 1)
+ store double 0.000000e+00, double* getelementptr inbounds ({ double, double }, { double, double }* @lret_dc, i32 0, i32 0)
+ store double 0.000000e+00, double* getelementptr inbounds ({ double, double }, { double, double }* @lret_dc, i32 0, i32 1)
ret void
}
@@ -67,686 +67,686 @@ entry:
store i32 0, i32* %retval
call void @clear()
store float 1.500000e+00, float* @lx, align 4
- %0 = load float* @lx, align 4
+ %0 = load float, float* @lx, align 4
call void @v_sf(float %0)
- %1 = load float* @x, align 4
+ %1 = load float, float* @x, align 4
%conv = fpext float %1 to double
- %2 = load float* @lx, align 4
+ %2 = load float, float* @lx, align 4
%conv1 = fpext float %2 to double
- %3 = load float* @x, align 4
- %4 = load float* @lx, align 4
+ %3 = load float, float* @x, align 4
+ %4 = load float, float* @lx, align 4
%cmp = fcmp oeq float %3, %4
%conv2 = zext i1 %cmp to i32
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), double %conv, double %conv1, i32 %conv2)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), double %conv, double %conv1, i32 %conv2)
call void @clear()
store double 0x41678C29C0000000, double* @lxd, align 8
- %5 = load double* @lxd, align 8
+ %5 = load double, double* @lxd, align 8
call void @v_df(double %5)
- %6 = load double* @xd, align 8
- %7 = load double* @lxd, align 8
- %8 = load double* @xd, align 8
- %9 = load double* @lxd, align 8
+ %6 = load double, double* @xd, align 8
+ %7 = load double, double* @lxd, align 8
+ %8 = load double, double* @xd, align 8
+ %9 = load double, double* @lxd, align 8
%cmp3 = fcmp oeq double %8, %9
%conv4 = zext i1 %cmp3 to i32
- %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), double %6, double %7, i32 %conv4)
+ %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), double %6, double %7, i32 %conv4)
call void @clear()
store float 9.000000e+00, float* @lx, align 4
store float 1.000000e+01, float* @ly, align 4
- %10 = load float* @lx, align 4
- %11 = load float* @ly, align 4
+ %10 = load float, float* @lx, align 4
+ %11 = load float, float* @ly, align 4
call void @v_sf_sf(float %10, float %11)
- %12 = load float* @x, align 4
+ %12 = load float, float* @x, align 4
%conv6 = fpext float %12 to double
- %13 = load float* @lx, align 4
+ %13 = load float, float* @lx, align 4
%conv7 = fpext float %13 to double
- %14 = load float* @y, align 4
+ %14 = load float, float* @y, align 4
%conv8 = fpext float %14 to double
- %15 = load float* @ly, align 4
+ %15 = load float, float* @ly, align 4
%conv9 = fpext float %15 to double
- %16 = load float* @x, align 4
- %17 = load float* @lx, align 4
+ %16 = load float, float* @x, align 4
+ %17 = load float, float* @lx, align 4
%cmp10 = fcmp oeq float %16, %17
br i1 %cmp10, label %land.rhs, label %land.end
land.rhs: ; preds = %entry
- %18 = load float* @y, align 4
- %19 = load float* @ly, align 4
+ %18 = load float, float* @y, align 4
+ %19 = load float, float* @ly, align 4
%cmp12 = fcmp oeq float %18, %19
br label %land.end
land.end: ; preds = %land.rhs, %entry
%20 = phi i1 [ false, %entry ], [ %cmp12, %land.rhs ]
%land.ext = zext i1 %20 to i32
- %call14 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %conv6, double %conv7, double %conv8, double %conv9, i32 %land.ext)
+ %call14 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %conv6, double %conv7, double %conv8, double %conv9, i32 %land.ext)
call void @clear()
store float 0x3FFE666660000000, float* @lx, align 4
store double 0x4007E613249FF279, double* @lyd, align 8
- %21 = load float* @lx, align 4
- %22 = load double* @lyd, align 8
+ %21 = load float, float* @lx, align 4
+ %22 = load double, double* @lyd, align 8
call void @v_sf_df(float %21, double %22)
- %23 = load float* @x, align 4
+ %23 = load float, float* @x, align 4
%conv15 = fpext float %23 to double
- %24 = load float* @lx, align 4
+ %24 = load float, float* @lx, align 4
%conv16 = fpext float %24 to double
- %25 = load double* @yd, align 8
- %26 = load double* @lyd, align 8
- %27 = load float* @x, align 4
- %28 = load float* @lx, align 4
+ %25 = load double, double* @yd, align 8
+ %26 = load double, double* @lyd, align 8
+ %27 = load float, float* @x, align 4
+ %28 = load float, float* @lx, align 4
%cmp17 = fcmp oeq float %27, %28
%conv18 = zext i1 %cmp17 to i32
- %29 = load double* @yd, align 8
- %30 = load double* @lyd, align 8
+ %29 = load double, double* @yd, align 8
+ %30 = load double, double* @lyd, align 8
%cmp19 = fcmp oeq double %29, %30
%conv20 = zext i1 %cmp19 to i32
%and = and i32 %conv18, %conv20
- %call21 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %conv15, double %conv16, double %25, double %26, i32 %and)
+ %call21 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %conv15, double %conv16, double %25, double %26, i32 %and)
call void @clear()
store double 0x4194E54F94000000, double* @lxd, align 8
store float 7.600000e+01, float* @ly, align 4
- %31 = load double* @lxd, align 8
- %32 = load float* @ly, align 4
+ %31 = load double, double* @lxd, align 8
+ %32 = load float, float* @ly, align 4
call void @v_df_sf(double %31, float %32)
- %33 = load double* @xd, align 8
- %34 = load double* @lxd, align 8
- %35 = load float* @y, align 4
+ %33 = load double, double* @xd, align 8
+ %34 = load double, double* @lxd, align 8
+ %35 = load float, float* @y, align 4
%conv22 = fpext float %35 to double
- %36 = load float* @ly, align 4
+ %36 = load float, float* @ly, align 4
%conv23 = fpext float %36 to double
- %37 = load double* @xd, align 8
- %38 = load double* @lxd, align 8
+ %37 = load double, double* @xd, align 8
+ %38 = load double, double* @lxd, align 8
%cmp24 = fcmp oeq double %37, %38
%conv25 = zext i1 %cmp24 to i32
- %39 = load float* @y, align 4
- %40 = load float* @ly, align 4
+ %39 = load float, float* @y, align 4
+ %40 = load float, float* @ly, align 4
%cmp26 = fcmp oeq float %39, %40
%conv27 = zext i1 %cmp26 to i32
%and28 = and i32 %conv25, %conv27
- %call29 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %33, double %34, double %conv22, double %conv23, i32 %and28)
+ %call29 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %33, double %34, double %conv22, double %conv23, i32 %and28)
call void @clear()
store double 7.365198e+07, double* @lxd, align 8
store double 0x416536CD80000000, double* @lyd, align 8
- %41 = load double* @lxd, align 8
- %42 = load double* @lyd, align 8
+ %41 = load double, double* @lxd, align 8
+ %42 = load double, double* @lyd, align 8
call void @v_df_df(double %41, double %42)
- %43 = load double* @xd, align 8
- %44 = load double* @lxd, align 8
- %45 = load double* @yd, align 8
- %46 = load double* @lyd, align 8
- %47 = load double* @xd, align 8
- %48 = load double* @lxd, align 8
+ %43 = load double, double* @xd, align 8
+ %44 = load double, double* @lxd, align 8
+ %45 = load double, double* @yd, align 8
+ %46 = load double, double* @lyd, align 8
+ %47 = load double, double* @xd, align 8
+ %48 = load double, double* @lxd, align 8
%cmp30 = fcmp oeq double %47, %48
%conv31 = zext i1 %cmp30 to i32
- %49 = load double* @yd, align 8
- %50 = load double* @lyd, align 8
+ %49 = load double, double* @yd, align 8
+ %50 = load double, double* @lyd, align 8
%cmp32 = fcmp oeq double %49, %50
%conv33 = zext i1 %cmp32 to i32
%and34 = and i32 %conv31, %conv33
- %call35 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %43, double %44, double %45, double %46, i32 %and34)
+ %call35 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %43, double %44, double %45, double %46, i32 %and34)
call void @clear()
store float 0x4016666660000000, float* @ret_sf, align 4
%call36 = call float @sf_v()
store float %call36, float* @lret_sf, align 4
- %51 = load float* @ret_sf, align 4
+ %51 = load float, float* @ret_sf, align 4
%conv37 = fpext float %51 to double
- %52 = load float* @lret_sf, align 4
+ %52 = load float, float* @lret_sf, align 4
%conv38 = fpext float %52 to double
- %53 = load float* @ret_sf, align 4
- %54 = load float* @lret_sf, align 4
+ %53 = load float, float* @ret_sf, align 4
+ %54 = load float, float* @lret_sf, align 4
%cmp39 = fcmp oeq float %53, %54
%conv40 = zext i1 %cmp39 to i32
- %call41 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), double %conv37, double %conv38, i32 %conv40)
+ %call41 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), double %conv37, double %conv38, i32 %conv40)
call void @clear()
store float 4.587300e+06, float* @ret_sf, align 4
store float 3.420000e+02, float* @lx, align 4
- %55 = load float* @lx, align 4
+ %55 = load float, float* @lx, align 4
%call42 = call float @sf_sf(float %55)
store float %call42, float* @lret_sf, align 4
- %56 = load float* @ret_sf, align 4
+ %56 = load float, float* @ret_sf, align 4
%conv43 = fpext float %56 to double
- %57 = load float* @lret_sf, align 4
+ %57 = load float, float* @lret_sf, align 4
%conv44 = fpext float %57 to double
- %58 = load float* @x, align 4
+ %58 = load float, float* @x, align 4
%conv45 = fpext float %58 to double
- %59 = load float* @lx, align 4
+ %59 = load float, float* @lx, align 4
%conv46 = fpext float %59 to double
- %60 = load float* @ret_sf, align 4
- %61 = load float* @lret_sf, align 4
+ %60 = load float, float* @ret_sf, align 4
+ %61 = load float, float* @lret_sf, align 4
%cmp47 = fcmp oeq float %60, %61
%conv48 = zext i1 %cmp47 to i32
- %62 = load float* @x, align 4
- %63 = load float* @lx, align 4
+ %62 = load float, float* @x, align 4
+ %63 = load float, float* @lx, align 4
%cmp49 = fcmp oeq float %62, %63
%conv50 = zext i1 %cmp49 to i32
%and51 = and i32 %conv48, %conv50
- %call52 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %conv43, double %conv44, double %conv45, double %conv46, i32 %and51)
+ %call52 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %conv43, double %conv44, double %conv45, double %conv46, i32 %and51)
call void @clear()
store float 4.445910e+06, float* @ret_sf, align 4
store double 0x419A7DB294000000, double* @lxd, align 8
- %64 = load double* @lxd, align 8
+ %64 = load double, double* @lxd, align 8
%call53 = call float @sf_df(double %64)
store float %call53, float* @lret_sf, align 4
- %65 = load float* @ret_sf, align 4
+ %65 = load float, float* @ret_sf, align 4
%conv54 = fpext float %65 to double
- %66 = load float* @lret_sf, align 4
+ %66 = load float, float* @lret_sf, align 4
%conv55 = fpext float %66 to double
- %67 = load double* @xd, align 8
- %68 = load double* @lxd, align 8
- %69 = load float* @ret_sf, align 4
- %70 = load float* @lret_sf, align 4
+ %67 = load double, double* @xd, align 8
+ %68 = load double, double* @lxd, align 8
+ %69 = load float, float* @ret_sf, align 4
+ %70 = load float, float* @lret_sf, align 4
%cmp56 = fcmp oeq float %69, %70
%conv57 = zext i1 %cmp56 to i32
- %71 = load double* @xd, align 8
- %72 = load double* @lxd, align 8
+ %71 = load double, double* @xd, align 8
+ %72 = load double, double* @lxd, align 8
%cmp58 = fcmp oeq double %71, %72
%conv59 = zext i1 %cmp58 to i32
%and60 = and i32 %conv57, %conv59
- %call61 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %conv54, double %conv55, double %67, double %68, i32 %and60)
+ %call61 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %conv54, double %conv55, double %67, double %68, i32 %and60)
call void @clear()
store float 0x3FFF4BC6A0000000, float* @ret_sf, align 4
store float 4.445500e+03, float* @lx, align 4
store float 0x4068ACCCC0000000, float* @ly, align 4
- %73 = load float* @lx, align 4
- %74 = load float* @ly, align 4
+ %73 = load float, float* @lx, align 4
+ %74 = load float, float* @ly, align 4
%call62 = call float @sf_sf_sf(float %73, float %74)
store float %call62, float* @lret_sf, align 4
- %75 = load float* @ret_sf, align 4
+ %75 = load float, float* @ret_sf, align 4
%conv63 = fpext float %75 to double
- %76 = load float* @lret_sf, align 4
+ %76 = load float, float* @lret_sf, align 4
%conv64 = fpext float %76 to double
- %77 = load float* @x, align 4
+ %77 = load float, float* @x, align 4
%conv65 = fpext float %77 to double
- %78 = load float* @lx, align 4
+ %78 = load float, float* @lx, align 4
%conv66 = fpext float %78 to double
- %79 = load float* @y, align 4
+ %79 = load float, float* @y, align 4
%conv67 = fpext float %79 to double
- %80 = load float* @ly, align 4
+ %80 = load float, float* @ly, align 4
%conv68 = fpext float %80 to double
- %81 = load float* @ret_sf, align 4
- %82 = load float* @lret_sf, align 4
+ %81 = load float, float* @ret_sf, align 4
+ %82 = load float, float* @lret_sf, align 4
%cmp69 = fcmp oeq float %81, %82
br i1 %cmp69, label %land.lhs.true, label %land.end76
land.lhs.true: ; preds = %land.end
- %83 = load float* @x, align 4
- %84 = load float* @lx, align 4
+ %83 = load float, float* @x, align 4
+ %84 = load float, float* @lx, align 4
%cmp71 = fcmp oeq float %83, %84
br i1 %cmp71, label %land.rhs73, label %land.end76
land.rhs73: ; preds = %land.lhs.true
- %85 = load float* @y, align 4
- %86 = load float* @ly, align 4
+ %85 = load float, float* @y, align 4
+ %86 = load float, float* @ly, align 4
%cmp74 = fcmp oeq float %85, %86
br label %land.end76
land.end76: ; preds = %land.rhs73, %land.lhs.true, %land.end
%87 = phi i1 [ false, %land.lhs.true ], [ false, %land.end ], [ %cmp74, %land.rhs73 ]
%land.ext77 = zext i1 %87 to i32
- %call78 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %conv63, double %conv64, double %conv65, double %conv66, double %conv67, double %conv68, i32 %land.ext77)
+ %call78 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %conv63, double %conv64, double %conv65, double %conv66, double %conv67, double %conv68, i32 %land.ext77)
call void @clear()
store float 9.991300e+04, float* @ret_sf, align 4
store float 1.114500e+04, float* @lx, align 4
store double 9.994445e+07, double* @lyd, align 8
- %88 = load float* @lx, align 4
- %89 = load double* @lyd, align 8
+ %88 = load float, float* @lx, align 4
+ %89 = load double, double* @lyd, align 8
%call79 = call float @sf_sf_df(float %88, double %89)
store float %call79, float* @lret_sf, align 4
- %90 = load float* @ret_sf, align 4
+ %90 = load float, float* @ret_sf, align 4
%conv80 = fpext float %90 to double
- %91 = load float* @lret_sf, align 4
+ %91 = load float, float* @lret_sf, align 4
%conv81 = fpext float %91 to double
- %92 = load float* @x, align 4
+ %92 = load float, float* @x, align 4
%conv82 = fpext float %92 to double
- %93 = load float* @lx, align 4
+ %93 = load float, float* @lx, align 4
%conv83 = fpext float %93 to double
- %94 = load double* @yd, align 8
- %95 = load double* @lyd, align 8
- %96 = load float* @ret_sf, align 4
- %97 = load float* @lret_sf, align 4
+ %94 = load double, double* @yd, align 8
+ %95 = load double, double* @lyd, align 8
+ %96 = load float, float* @ret_sf, align 4
+ %97 = load float, float* @lret_sf, align 4
%cmp84 = fcmp oeq float %96, %97
br i1 %cmp84, label %land.lhs.true86, label %land.end92
land.lhs.true86: ; preds = %land.end76
- %98 = load float* @x, align 4
- %99 = load float* @lx, align 4
+ %98 = load float, float* @x, align 4
+ %99 = load float, float* @lx, align 4
%cmp87 = fcmp oeq float %98, %99
br i1 %cmp87, label %land.rhs89, label %land.end92
land.rhs89: ; preds = %land.lhs.true86
- %100 = load double* @yd, align 8
- %101 = load double* @lyd, align 8
+ %100 = load double, double* @yd, align 8
+ %101 = load double, double* @lyd, align 8
%cmp90 = fcmp oeq double %100, %101
br label %land.end92
land.end92: ; preds = %land.rhs89, %land.lhs.true86, %land.end76
%102 = phi i1 [ false, %land.lhs.true86 ], [ false, %land.end76 ], [ %cmp90, %land.rhs89 ]
%land.ext93 = zext i1 %102 to i32
- %call94 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %conv80, double %conv81, double %conv82, double %conv83, double %94, double %95, i32 %land.ext93)
+ %call94 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %conv80, double %conv81, double %conv82, double %conv83, double %94, double %95, i32 %land.ext93)
call void @clear()
store float 0x417CCC7A00000000, float* @ret_sf, align 4
store double 0x4172034530000000, double* @lxd, align 8
store float 4.456200e+04, float* @ly, align 4
- %103 = load double* @lxd, align 8
- %104 = load float* @ly, align 4
+ %103 = load double, double* @lxd, align 8
+ %104 = load float, float* @ly, align 4
%call95 = call float @sf_df_sf(double %103, float %104)
store float %call95, float* @lret_sf, align 4
- %105 = load float* @ret_sf, align 4
+ %105 = load float, float* @ret_sf, align 4
%conv96 = fpext float %105 to double
- %106 = load float* @lret_sf, align 4
+ %106 = load float, float* @lret_sf, align 4
%conv97 = fpext float %106 to double
- %107 = load double* @xd, align 8
- %108 = load double* @lxd, align 8
- %109 = load float* @y, align 4
+ %107 = load double, double* @xd, align 8
+ %108 = load double, double* @lxd, align 8
+ %109 = load float, float* @y, align 4
%conv98 = fpext float %109 to double
- %110 = load float* @ly, align 4
+ %110 = load float, float* @ly, align 4
%conv99 = fpext float %110 to double
- %111 = load float* @ret_sf, align 4
- %112 = load float* @lret_sf, align 4
+ %111 = load float, float* @ret_sf, align 4
+ %112 = load float, float* @lret_sf, align 4
%cmp100 = fcmp oeq float %111, %112
br i1 %cmp100, label %land.lhs.true102, label %land.end108
land.lhs.true102: ; preds = %land.end92
- %113 = load double* @xd, align 8
- %114 = load double* @lxd, align 8
+ %113 = load double, double* @xd, align 8
+ %114 = load double, double* @lxd, align 8
%cmp103 = fcmp oeq double %113, %114
br i1 %cmp103, label %land.rhs105, label %land.end108
land.rhs105: ; preds = %land.lhs.true102
- %115 = load float* @y, align 4
- %116 = load float* @ly, align 4
+ %115 = load float, float* @y, align 4
+ %116 = load float, float* @ly, align 4
%cmp106 = fcmp oeq float %115, %116
br label %land.end108
land.end108: ; preds = %land.rhs105, %land.lhs.true102, %land.end92
%117 = phi i1 [ false, %land.lhs.true102 ], [ false, %land.end92 ], [ %cmp106, %land.rhs105 ]
%land.ext109 = zext i1 %117 to i32
- %call110 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %conv96, double %conv97, double %107, double %108, double %conv98, double %conv99, i32 %land.ext109)
+ %call110 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %conv96, double %conv97, double %107, double %108, double %conv98, double %conv99, i32 %land.ext109)
call void @clear()
store float 3.987721e+06, float* @ret_sf, align 4
store double 0x3FF1F49F6DDDC2D8, double* @lxd, align 8
store double 0x409129F306A2B170, double* @lyd, align 8
- %118 = load double* @lxd, align 8
- %119 = load double* @lyd, align 8
+ %118 = load double, double* @lxd, align 8
+ %119 = load double, double* @lyd, align 8
%call111 = call float @sf_df_df(double %118, double %119)
store float %call111, float* @lret_sf, align 4
- %120 = load float* @ret_sf, align 4
+ %120 = load float, float* @ret_sf, align 4
%conv112 = fpext float %120 to double
- %121 = load float* @lret_sf, align 4
+ %121 = load float, float* @lret_sf, align 4
%conv113 = fpext float %121 to double
- %122 = load double* @xd, align 8
- %123 = load double* @lxd, align 8
- %124 = load double* @yd, align 8
- %125 = load double* @lyd, align 8
- %126 = load float* @ret_sf, align 4
- %127 = load float* @lret_sf, align 4
+ %122 = load double, double* @xd, align 8
+ %123 = load double, double* @lxd, align 8
+ %124 = load double, double* @yd, align 8
+ %125 = load double, double* @lyd, align 8
+ %126 = load float, float* @ret_sf, align 4
+ %127 = load float, float* @lret_sf, align 4
%cmp114 = fcmp oeq float %126, %127
br i1 %cmp114, label %land.lhs.true116, label %land.end122
land.lhs.true116: ; preds = %land.end108
- %128 = load double* @xd, align 8
- %129 = load double* @lxd, align 8
+ %128 = load double, double* @xd, align 8
+ %129 = load double, double* @lxd, align 8
%cmp117 = fcmp oeq double %128, %129
br i1 %cmp117, label %land.rhs119, label %land.end122
land.rhs119: ; preds = %land.lhs.true116
- %130 = load double* @yd, align 8
- %131 = load double* @lyd, align 8
+ %130 = load double, double* @yd, align 8
+ %131 = load double, double* @lyd, align 8
%cmp120 = fcmp oeq double %130, %131
br label %land.end122
land.end122: ; preds = %land.rhs119, %land.lhs.true116, %land.end108
%132 = phi i1 [ false, %land.lhs.true116 ], [ false, %land.end108 ], [ %cmp120, %land.rhs119 ]
%land.ext123 = zext i1 %132 to i32
- %call124 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %conv112, double %conv113, double %122, double %123, double %124, double %125, i32 %land.ext123)
+ %call124 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %conv112, double %conv113, double %122, double %123, double %124, double %125, i32 %land.ext123)
call void @clear()
store double 1.561234e+01, double* @ret_df, align 8
%call125 = call double @df_v()
store double %call125, double* @lret_df, align 8
- %133 = load double* @ret_df, align 8
- %134 = load double* @lret_df, align 8
- %135 = load double* @ret_df, align 8
- %136 = load double* @lret_df, align 8
+ %133 = load double, double* @ret_df, align 8
+ %134 = load double, double* @lret_df, align 8
+ %135 = load double, double* @ret_df, align 8
+ %136 = load double, double* @lret_df, align 8
%cmp126 = fcmp oeq double %135, %136
%conv127 = zext i1 %cmp126 to i32
- %call128 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), double %133, double %134, i32 %conv127)
+ %call128 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), double %133, double %134, i32 %conv127)
call void @clear()
store double 1.345873e+01, double* @ret_df, align 8
store float 3.434520e+05, float* @lx, align 4
- %137 = load float* @lx, align 4
+ %137 = load float, float* @lx, align 4
%call129 = call double @df_sf(float %137)
store double %call129, double* @lret_df, align 8
- %138 = load double* @ret_df, align 8
- %139 = load double* @lret_df, align 8
- %140 = load float* @x, align 4
+ %138 = load double, double* @ret_df, align 8
+ %139 = load double, double* @lret_df, align 8
+ %140 = load float, float* @x, align 4
%conv130 = fpext float %140 to double
- %141 = load float* @lx, align 4
+ %141 = load float, float* @lx, align 4
%conv131 = fpext float %141 to double
- %142 = load double* @ret_df, align 8
- %143 = load double* @lret_df, align 8
+ %142 = load double, double* @ret_df, align 8
+ %143 = load double, double* @lret_df, align 8
%cmp132 = fcmp oeq double %142, %143
%conv133 = zext i1 %cmp132 to i32
- %144 = load float* @x, align 4
- %145 = load float* @lx, align 4
+ %144 = load float, float* @x, align 4
+ %145 = load float, float* @lx, align 4
%cmp134 = fcmp oeq float %144, %145
%conv135 = zext i1 %cmp134 to i32
%and136 = and i32 %conv133, %conv135
- %call137 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %138, double %139, double %conv130, double %conv131, i32 %and136)
+ %call137 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %138, double %139, double %conv130, double %conv131, i32 %and136)
call void @clear()
store double 0x4084F3AB7AA25D8D, double* @ret_df, align 8
store double 0x4114F671D2F1A9FC, double* @lxd, align 8
- %146 = load double* @lxd, align 8
+ %146 = load double, double* @lxd, align 8
%call138 = call double @df_df(double %146)
store double %call138, double* @lret_df, align 8
- %147 = load double* @ret_df, align 8
- %148 = load double* @lret_df, align 8
- %149 = load double* @xd, align 8
- %150 = load double* @lxd, align 8
- %151 = load double* @ret_df, align 8
- %152 = load double* @lret_df, align 8
+ %147 = load double, double* @ret_df, align 8
+ %148 = load double, double* @lret_df, align 8
+ %149 = load double, double* @xd, align 8
+ %150 = load double, double* @lxd, align 8
+ %151 = load double, double* @ret_df, align 8
+ %152 = load double, double* @lret_df, align 8
%cmp139 = fcmp oeq double %151, %152
%conv140 = zext i1 %cmp139 to i32
- %153 = load double* @xd, align 8
- %154 = load double* @lxd, align 8
+ %153 = load double, double* @xd, align 8
+ %154 = load double, double* @lxd, align 8
%cmp141 = fcmp oeq double %153, %154
%conv142 = zext i1 %cmp141 to i32
%and143 = and i32 %conv140, %conv142
- %call144 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str1, i32 0, i32 0), double %147, double %148, double %149, double %150, i32 %and143)
+ %call144 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str1, i32 0, i32 0), double %147, double %148, double %149, double %150, i32 %and143)
call void @clear()
store double 6.781956e+03, double* @ret_df, align 8
store float 4.445500e+03, float* @lx, align 4
store float 0x4068ACCCC0000000, float* @ly, align 4
- %155 = load float* @lx, align 4
- %156 = load float* @ly, align 4
+ %155 = load float, float* @lx, align 4
+ %156 = load float, float* @ly, align 4
%call145 = call double @df_sf_sf(float %155, float %156)
store double %call145, double* @lret_df, align 8
- %157 = load double* @ret_df, align 8
- %158 = load double* @lret_df, align 8
- %159 = load float* @x, align 4
+ %157 = load double, double* @ret_df, align 8
+ %158 = load double, double* @lret_df, align 8
+ %159 = load float, float* @x, align 4
%conv146 = fpext float %159 to double
- %160 = load float* @lx, align 4
+ %160 = load float, float* @lx, align 4
%conv147 = fpext float %160 to double
- %161 = load float* @y, align 4
+ %161 = load float, float* @y, align 4
%conv148 = fpext float %161 to double
- %162 = load float* @ly, align 4
+ %162 = load float, float* @ly, align 4
%conv149 = fpext float %162 to double
- %163 = load double* @ret_df, align 8
- %164 = load double* @lret_df, align 8
+ %163 = load double, double* @ret_df, align 8
+ %164 = load double, double* @lret_df, align 8
%cmp150 = fcmp oeq double %163, %164
br i1 %cmp150, label %land.lhs.true152, label %land.end158
land.lhs.true152: ; preds = %land.end122
- %165 = load float* @x, align 4
- %166 = load float* @lx, align 4
+ %165 = load float, float* @x, align 4
+ %166 = load float, float* @lx, align 4
%cmp153 = fcmp oeq float %165, %166
br i1 %cmp153, label %land.rhs155, label %land.end158
land.rhs155: ; preds = %land.lhs.true152
- %167 = load float* @y, align 4
- %168 = load float* @ly, align 4
+ %167 = load float, float* @y, align 4
+ %168 = load float, float* @ly, align 4
%cmp156 = fcmp oeq float %167, %168
br label %land.end158
land.end158: ; preds = %land.rhs155, %land.lhs.true152, %land.end122
%169 = phi i1 [ false, %land.lhs.true152 ], [ false, %land.end122 ], [ %cmp156, %land.rhs155 ]
%land.ext159 = zext i1 %169 to i32
- %call160 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %157, double %158, double %conv146, double %conv147, double %conv148, double %conv149, i32 %land.ext159)
+ %call160 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %157, double %158, double %conv146, double %conv147, double %conv148, double %conv149, i32 %land.ext159)
call void @clear()
store double 1.889130e+05, double* @ret_df, align 8
store float 9.111450e+05, float* @lx, align 4
store double 0x4185320A58000000, double* @lyd, align 8
- %170 = load float* @lx, align 4
- %171 = load double* @lyd, align 8
+ %170 = load float, float* @lx, align 4
+ %171 = load double, double* @lyd, align 8
%call161 = call double @df_sf_df(float %170, double %171)
store double %call161, double* @lret_df, align 8
- %172 = load double* @ret_df, align 8
- %173 = load double* @lret_df, align 8
- %174 = load float* @x, align 4
+ %172 = load double, double* @ret_df, align 8
+ %173 = load double, double* @lret_df, align 8
+ %174 = load float, float* @x, align 4
%conv162 = fpext float %174 to double
- %175 = load float* @lx, align 4
+ %175 = load float, float* @lx, align 4
%conv163 = fpext float %175 to double
- %176 = load double* @yd, align 8
- %177 = load double* @lyd, align 8
- %178 = load double* @ret_df, align 8
- %179 = load double* @lret_df, align 8
+ %176 = load double, double* @yd, align 8
+ %177 = load double, double* @lyd, align 8
+ %178 = load double, double* @ret_df, align 8
+ %179 = load double, double* @lret_df, align 8
%cmp164 = fcmp oeq double %178, %179
br i1 %cmp164, label %land.lhs.true166, label %land.end172
land.lhs.true166: ; preds = %land.end158
- %180 = load float* @x, align 4
- %181 = load float* @lx, align 4
+ %180 = load float, float* @x, align 4
+ %181 = load float, float* @lx, align 4
%cmp167 = fcmp oeq float %180, %181
br i1 %cmp167, label %land.rhs169, label %land.end172
land.rhs169: ; preds = %land.lhs.true166
- %182 = load double* @yd, align 8
- %183 = load double* @lyd, align 8
+ %182 = load double, double* @yd, align 8
+ %183 = load double, double* @lyd, align 8
%cmp170 = fcmp oeq double %182, %183
br label %land.end172
land.end172: ; preds = %land.rhs169, %land.lhs.true166, %land.end158
%184 = phi i1 [ false, %land.lhs.true166 ], [ false, %land.end158 ], [ %cmp170, %land.rhs169 ]
%land.ext173 = zext i1 %184 to i32
- %call174 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %172, double %173, double %conv162, double %conv163, double %176, double %177, i32 %land.ext173)
+ %call174 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %172, double %173, double %conv162, double %conv163, double %176, double %177, i32 %land.ext173)
call void @clear()
store double 0x418B2DB900000000, double* @ret_df, align 8
store double 0x41B1EF2ED3000000, double* @lxd, align 8
store float 1.244562e+06, float* @ly, align 4
- %185 = load double* @lxd, align 8
- %186 = load float* @ly, align 4
+ %185 = load double, double* @lxd, align 8
+ %186 = load float, float* @ly, align 4
%call175 = call double @df_df_sf(double %185, float %186)
store double %call175, double* @lret_df, align 8
- %187 = load double* @ret_df, align 8
- %188 = load double* @lret_df, align 8
- %189 = load double* @xd, align 8
- %190 = load double* @lxd, align 8
- %191 = load float* @y, align 4
+ %187 = load double, double* @ret_df, align 8
+ %188 = load double, double* @lret_df, align 8
+ %189 = load double, double* @xd, align 8
+ %190 = load double, double* @lxd, align 8
+ %191 = load float, float* @y, align 4
%conv176 = fpext float %191 to double
- %192 = load float* @ly, align 4
+ %192 = load float, float* @ly, align 4
%conv177 = fpext float %192 to double
- %193 = load double* @ret_df, align 8
- %194 = load double* @lret_df, align 8
+ %193 = load double, double* @ret_df, align 8
+ %194 = load double, double* @lret_df, align 8
%cmp178 = fcmp oeq double %193, %194
br i1 %cmp178, label %land.lhs.true180, label %land.end186
land.lhs.true180: ; preds = %land.end172
- %195 = load double* @xd, align 8
- %196 = load double* @lxd, align 8
+ %195 = load double, double* @xd, align 8
+ %196 = load double, double* @lxd, align 8
%cmp181 = fcmp oeq double %195, %196
br i1 %cmp181, label %land.rhs183, label %land.end186
land.rhs183: ; preds = %land.lhs.true180
- %197 = load float* @y, align 4
- %198 = load float* @ly, align 4
+ %197 = load float, float* @y, align 4
+ %198 = load float, float* @ly, align 4
%cmp184 = fcmp oeq float %197, %198
br label %land.end186
land.end186: ; preds = %land.rhs183, %land.lhs.true180, %land.end172
%199 = phi i1 [ false, %land.lhs.true180 ], [ false, %land.end172 ], [ %cmp184, %land.rhs183 ]
%land.ext187 = zext i1 %199 to i32
- %call188 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %187, double %188, double %189, double %190, double %conv176, double %conv177, i32 %land.ext187)
+ %call188 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %187, double %188, double %189, double %190, double %conv176, double %conv177, i32 %land.ext187)
call void @clear()
store double 3.987721e+06, double* @ret_df, align 8
store double 5.223560e+00, double* @lxd, align 8
store double 0x40B7D37CC1A8AC5C, double* @lyd, align 8
- %200 = load double* @lxd, align 8
- %201 = load double* @lyd, align 8
+ %200 = load double, double* @lxd, align 8
+ %201 = load double, double* @lyd, align 8
%call189 = call double @df_df_df(double %200, double %201)
store double %call189, double* @lret_df, align 8
- %202 = load double* @ret_df, align 8
- %203 = load double* @lret_df, align 8
- %204 = load double* @xd, align 8
- %205 = load double* @lxd, align 8
- %206 = load double* @yd, align 8
- %207 = load double* @lyd, align 8
- %208 = load double* @ret_df, align 8
- %209 = load double* @lret_df, align 8
+ %202 = load double, double* @ret_df, align 8
+ %203 = load double, double* @lret_df, align 8
+ %204 = load double, double* @xd, align 8
+ %205 = load double, double* @lxd, align 8
+ %206 = load double, double* @yd, align 8
+ %207 = load double, double* @lyd, align 8
+ %208 = load double, double* @ret_df, align 8
+ %209 = load double, double* @lret_df, align 8
%cmp190 = fcmp oeq double %208, %209
br i1 %cmp190, label %land.lhs.true192, label %land.end198
land.lhs.true192: ; preds = %land.end186
- %210 = load double* @xd, align 8
- %211 = load double* @lxd, align 8
+ %210 = load double, double* @xd, align 8
+ %211 = load double, double* @lxd, align 8
%cmp193 = fcmp oeq double %210, %211
br i1 %cmp193, label %land.rhs195, label %land.end198
land.rhs195: ; preds = %land.lhs.true192
- %212 = load double* @yd, align 8
- %213 = load double* @lyd, align 8
+ %212 = load double, double* @yd, align 8
+ %213 = load double, double* @lyd, align 8
%cmp196 = fcmp oeq double %212, %213
br label %land.end198
land.end198: ; preds = %land.rhs195, %land.lhs.true192, %land.end186
%214 = phi i1 [ false, %land.lhs.true192 ], [ false, %land.end186 ], [ %cmp196, %land.rhs195 ]
%land.ext199 = zext i1 %214 to i32
- %call200 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str2, i32 0, i32 0), double %202, double %203, double %204, double %205, double %206, double %207, i32 %land.ext199)
+ %call200 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str2, i32 0, i32 0), double %202, double %203, double %204, double %205, double %206, double %207, i32 %land.ext199)
call void @clear()
- store float 4.500000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
- store float 7.000000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+ store float 4.500000e+00, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 0)
+ store float 7.000000e+00, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 1)
%call201 = call { float, float } @sc_v()
%215 = extractvalue { float, float } %call201, 0
%216 = extractvalue { float, float } %call201, 1
- store float %215, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
- store float %216, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
- %ret_sc.real = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
- %ret_sc.imag = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+ store float %215, float* getelementptr inbounds ({ float, float }, { float, float }* @lret_sc, i32 0, i32 0)
+ store float %216, float* getelementptr inbounds ({ float, float }, { float, float }* @lret_sc, i32 0, i32 1)
+ %ret_sc.real = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 0)
+ %ret_sc.imag = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 1)
%conv202 = fpext float %ret_sc.real to double
%conv203 = fpext float %ret_sc.imag to double
- %ret_sc.real204 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
- %ret_sc.imag205 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+ %ret_sc.real204 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 0)
+ %ret_sc.imag205 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 1)
%conv206 = fpext float %ret_sc.real204 to double
%conv207 = fpext float %ret_sc.imag205 to double
- %lret_sc.real = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
- %lret_sc.imag = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+ %lret_sc.real = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @lret_sc, i32 0, i32 0)
+ %lret_sc.imag = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @lret_sc, i32 0, i32 1)
%conv208 = fpext float %lret_sc.real to double
%conv209 = fpext float %lret_sc.imag to double
- %lret_sc.real210 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
- %lret_sc.imag211 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+ %lret_sc.real210 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @lret_sc, i32 0, i32 0)
+ %lret_sc.imag211 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @lret_sc, i32 0, i32 1)
%conv212 = fpext float %lret_sc.real210 to double
%conv213 = fpext float %lret_sc.imag211 to double
- %ret_sc.real214 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
- %ret_sc.imag215 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
- %lret_sc.real216 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
- %lret_sc.imag217 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+ %ret_sc.real214 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 0)
+ %ret_sc.imag215 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 1)
+ %lret_sc.real216 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @lret_sc, i32 0, i32 0)
+ %lret_sc.imag217 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @lret_sc, i32 0, i32 1)
%cmp.r = fcmp oeq float %ret_sc.real214, %lret_sc.real216
%cmp.i = fcmp oeq float %ret_sc.imag215, %lret_sc.imag217
%and.ri = and i1 %cmp.r, %cmp.i
%conv218 = zext i1 %and.ri to i32
- %call219 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str3, i32 0, i32 0), double %conv202, double %conv207, double %conv208, double %conv213, i32 %conv218)
+ %call219 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str3, i32 0, i32 0), double %conv202, double %conv207, double %conv208, double %conv213, i32 %conv218)
call void @clear()
store float 0x3FF7A99300000000, float* @lx, align 4
- store float 4.500000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
- store float 7.000000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
- %217 = load float* @lx, align 4
+ store float 4.500000e+00, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 0)
+ store float 7.000000e+00, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 1)
+ %217 = load float, float* @lx, align 4
%call220 = call { float, float } @sc_sf(float %217)
%218 = extractvalue { float, float } %call220, 0
%219 = extractvalue { float, float } %call220, 1
- store float %218, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
- store float %219, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
- %ret_sc.real221 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
- %ret_sc.imag222 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+ store float %218, float* getelementptr inbounds ({ float, float }, { float, float }* @lret_sc, i32 0, i32 0)
+ store float %219, float* getelementptr inbounds ({ float, float }, { float, float }* @lret_sc, i32 0, i32 1)
+ %ret_sc.real221 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 0)
+ %ret_sc.imag222 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 1)
%conv223 = fpext float %ret_sc.real221 to double
%conv224 = fpext float %ret_sc.imag222 to double
- %ret_sc.real225 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
- %ret_sc.imag226 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+ %ret_sc.real225 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 0)
+ %ret_sc.imag226 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 1)
%conv227 = fpext float %ret_sc.real225 to double
%conv228 = fpext float %ret_sc.imag226 to double
- %lret_sc.real229 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
- %lret_sc.imag230 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+ %lret_sc.real229 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @lret_sc, i32 0, i32 0)
+ %lret_sc.imag230 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @lret_sc, i32 0, i32 1)
%conv231 = fpext float %lret_sc.real229 to double
%conv232 = fpext float %lret_sc.imag230 to double
- %lret_sc.real233 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
- %lret_sc.imag234 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+ %lret_sc.real233 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @lret_sc, i32 0, i32 0)
+ %lret_sc.imag234 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @lret_sc, i32 0, i32 1)
%conv235 = fpext float %lret_sc.real233 to double
%conv236 = fpext float %lret_sc.imag234 to double
- %220 = load float* @x, align 4
+ %220 = load float, float* @x, align 4
%conv237 = fpext float %220 to double
- %221 = load float* @lx, align 4
+ %221 = load float, float* @lx, align 4
%conv238 = fpext float %221 to double
- %ret_sc.real239 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
- %ret_sc.imag240 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
- %lret_sc.real241 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
- %lret_sc.imag242 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+ %ret_sc.real239 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 0)
+ %ret_sc.imag240 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @ret_sc, i32 0, i32 1)
+ %lret_sc.real241 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @lret_sc, i32 0, i32 0)
+ %lret_sc.imag242 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @lret_sc, i32 0, i32 1)
%cmp.r243 = fcmp oeq float %ret_sc.real239, %lret_sc.real241
%cmp.i244 = fcmp oeq float %ret_sc.imag240, %lret_sc.imag242
%and.ri245 = and i1 %cmp.r243, %cmp.i244
br i1 %and.ri245, label %land.rhs247, label %land.end250
land.rhs247: ; preds = %land.end198
- %222 = load float* @x, align 4
- %223 = load float* @lx, align 4
+ %222 = load float, float* @x, align 4
+ %223 = load float, float* @lx, align 4
%cmp248 = fcmp oeq float %222, %223
br label %land.end250
land.end250: ; preds = %land.rhs247, %land.end198
%224 = phi i1 [ false, %land.end198 ], [ %cmp248, %land.rhs247 ]
%land.ext251 = zext i1 %224 to i32
- %call252 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([24 x i8]* @.str4, i32 0, i32 0), double %conv223, double %conv228, double %conv231, double %conv236, double %conv237, double %conv238, i32 %land.ext251)
+ %call252 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str4, i32 0, i32 0), double %conv223, double %conv228, double %conv231, double %conv236, double %conv237, double %conv238, i32 %land.ext251)
call void @clear()
- store double 1.234500e+03, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
- store double 7.677000e+03, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+ store double 1.234500e+03, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 0)
+ store double 7.677000e+03, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 1)
%call253 = call { double, double } @dc_v()
%225 = extractvalue { double, double } %call253, 0
%226 = extractvalue { double, double } %call253, 1
- store double %225, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
- store double %226, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
- %ret_dc.real = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
- %ret_dc.imag = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
- %ret_dc.real254 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
- %ret_dc.imag255 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
- %lret_dc.real = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
- %lret_dc.imag = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
- %lret_dc.real256 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
- %lret_dc.imag257 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
- %ret_dc.real258 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
- %ret_dc.imag259 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
- %lret_dc.real260 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
- %lret_dc.imag261 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+ store double %225, double* getelementptr inbounds ({ double, double }, { double, double }* @lret_dc, i32 0, i32 0)
+ store double %226, double* getelementptr inbounds ({ double, double }, { double, double }* @lret_dc, i32 0, i32 1)
+ %ret_dc.real = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 0)
+ %ret_dc.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 1)
+ %ret_dc.real254 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 0)
+ %ret_dc.imag255 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 1)
+ %lret_dc.real = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @lret_dc, i32 0, i32 0)
+ %lret_dc.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @lret_dc, i32 0, i32 1)
+ %lret_dc.real256 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @lret_dc, i32 0, i32 0)
+ %lret_dc.imag257 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @lret_dc, i32 0, i32 1)
+ %ret_dc.real258 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 0)
+ %ret_dc.imag259 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 1)
+ %lret_dc.real260 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @lret_dc, i32 0, i32 0)
+ %lret_dc.imag261 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @lret_dc, i32 0, i32 1)
%cmp.r262 = fcmp oeq double %ret_dc.real258, %lret_dc.real260
%cmp.i263 = fcmp oeq double %ret_dc.imag259, %lret_dc.imag261
%and.ri264 = and i1 %cmp.r262, %cmp.i263
%conv265 = zext i1 %and.ri264 to i32
- %call266 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str3, i32 0, i32 0), double %ret_dc.real, double %ret_dc.imag255, double %lret_dc.real, double %lret_dc.imag257, i32 %conv265)
+ %call266 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str3, i32 0, i32 0), double %ret_dc.real, double %ret_dc.imag255, double %lret_dc.real, double %lret_dc.imag257, i32 %conv265)
call void @clear()
store double 0x40AAF6F532617C1C, double* @lxd, align 8
- store double 4.444500e+03, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
- store double 7.888000e+03, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
- %227 = load float* @lx, align 4
+ store double 4.444500e+03, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 0)
+ store double 7.888000e+03, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 1)
+ %227 = load float, float* @lx, align 4
%call267 = call { double, double } @dc_sf(float %227)
%228 = extractvalue { double, double } %call267, 0
%229 = extractvalue { double, double } %call267, 1
- store double %228, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
- store double %229, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
- %ret_dc.real268 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
- %ret_dc.imag269 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
- %ret_dc.real270 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
- %ret_dc.imag271 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
- %lret_dc.real272 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
- %lret_dc.imag273 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
- %lret_dc.real274 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
- %lret_dc.imag275 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
- %230 = load float* @x, align 4
+ store double %228, double* getelementptr inbounds ({ double, double }, { double, double }* @lret_dc, i32 0, i32 0)
+ store double %229, double* getelementptr inbounds ({ double, double }, { double, double }* @lret_dc, i32 0, i32 1)
+ %ret_dc.real268 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 0)
+ %ret_dc.imag269 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 1)
+ %ret_dc.real270 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 0)
+ %ret_dc.imag271 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 1)
+ %lret_dc.real272 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @lret_dc, i32 0, i32 0)
+ %lret_dc.imag273 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @lret_dc, i32 0, i32 1)
+ %lret_dc.real274 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @lret_dc, i32 0, i32 0)
+ %lret_dc.imag275 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @lret_dc, i32 0, i32 1)
+ %230 = load float, float* @x, align 4
%conv276 = fpext float %230 to double
- %231 = load float* @lx, align 4
+ %231 = load float, float* @lx, align 4
%conv277 = fpext float %231 to double
- %ret_dc.real278 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
- %ret_dc.imag279 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
- %lret_dc.real280 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
- %lret_dc.imag281 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+ %ret_dc.real278 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 0)
+ %ret_dc.imag279 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @ret_dc, i32 0, i32 1)
+ %lret_dc.real280 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @lret_dc, i32 0, i32 0)
+ %lret_dc.imag281 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @lret_dc, i32 0, i32 1)
%cmp.r282 = fcmp oeq double %ret_dc.real278, %lret_dc.real280
%cmp.i283 = fcmp oeq double %ret_dc.imag279, %lret_dc.imag281
%and.ri284 = and i1 %cmp.r282, %cmp.i283
br i1 %and.ri284, label %land.rhs286, label %land.end289
land.rhs286: ; preds = %land.end250
- %232 = load float* @x, align 4
- %233 = load float* @lx, align 4
+ %232 = load float, float* @x, align 4
+ %233 = load float, float* @lx, align 4
%cmp287 = fcmp oeq float %232, %233
br label %land.end289
land.end289: ; preds = %land.rhs286, %land.end250
%234 = phi i1 [ false, %land.end250 ], [ %cmp287, %land.rhs286 ]
%land.ext290 = zext i1 %234 to i32
- %call291 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([24 x i8]* @.str4, i32 0, i32 0), double %ret_dc.real268, double %ret_dc.imag271, double %lret_dc.real272, double %lret_dc.imag275, double %conv276, double %conv277, i32 %land.ext290)
- %235 = load i32* %retval
+ %call291 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str4, i32 0, i32 0), double %ret_dc.real268, double %ret_dc.imag271, double %lret_dc.real272, double %lret_dc.imag275, double %conv276, double %conv277, i32 %land.ext290)
+ %235 = load i32, i32* %retval
ret i32 %235
}
diff --git a/test/CodeGen/Mips/hf16call32_body.ll b/test/CodeGen/Mips/hf16call32_body.ll
index adac31460c44..d06256cc564f 100644
--- a/test/CodeGen/Mips/hf16call32_body.ll
+++ b/test/CodeGen/Mips/hf16call32_body.ll
@@ -14,7 +14,7 @@ define void @v_sf(float %p) #0 {
entry:
%p.addr = alloca float, align 4
store float %p, float* %p.addr, align 4
- %0 = load float* %p.addr, align 4
+ %0 = load float, float* %p.addr, align 4
store float %0, float* @x, align 4
ret void
}
@@ -33,7 +33,7 @@ define void @v_df(double %p) #0 {
entry:
%p.addr = alloca double, align 8
store double %p, double* %p.addr, align 8
- %0 = load double* %p.addr, align 8
+ %0 = load double, double* %p.addr, align 8
store double %0, double* @xd, align 8
ret void
}
@@ -54,9 +54,9 @@ entry:
%p2.addr = alloca float, align 4
store float %p1, float* %p1.addr, align 4
store float %p2, float* %p2.addr, align 4
- %0 = load float* %p1.addr, align 4
+ %0 = load float, float* %p1.addr, align 4
store float %0, float* @x, align 4
- %1 = load float* %p2.addr, align 4
+ %1 = load float, float* %p2.addr, align 4
store float %1, float* @y, align 4
ret void
}
@@ -77,9 +77,9 @@ entry:
%p2.addr = alloca double, align 8
store float %p1, float* %p1.addr, align 4
store double %p2, double* %p2.addr, align 8
- %0 = load float* %p1.addr, align 4
+ %0 = load float, float* %p1.addr, align 4
store float %0, float* @x, align 4
- %1 = load double* %p2.addr, align 8
+ %1 = load double, double* %p2.addr, align 8
store double %1, double* @yd, align 8
ret void
}
@@ -101,9 +101,9 @@ entry:
%p2.addr = alloca float, align 4
store double %p1, double* %p1.addr, align 8
store float %p2, float* %p2.addr, align 4
- %0 = load double* %p1.addr, align 8
+ %0 = load double, double* %p1.addr, align 8
store double %0, double* @xd, align 8
- %1 = load float* %p2.addr, align 4
+ %1 = load float, float* %p2.addr, align 4
store float %1, float* @y, align 4
ret void
}
@@ -125,9 +125,9 @@ entry:
%p2.addr = alloca double, align 8
store double %p1, double* %p1.addr, align 8
store double %p2, double* %p2.addr, align 8
- %0 = load double* %p1.addr, align 8
+ %0 = load double, double* %p1.addr, align 8
store double %0, double* @xd, align 8
- %1 = load double* %p2.addr, align 8
+ %1 = load double, double* %p2.addr, align 8
store double %1, double* @yd, align 8
ret void
}
@@ -146,7 +146,7 @@ entry:
; Function Attrs: nounwind
define float @sf_v() #0 {
entry:
- %0 = load float* @ret_sf, align 4
+ %0 = load float, float* @ret_sf, align 4
ret float %0
}
@@ -155,9 +155,9 @@ define float @sf_sf(float %p) #0 {
entry:
%p.addr = alloca float, align 4
store float %p, float* %p.addr, align 4
- %0 = load float* %p.addr, align 4
+ %0 = load float, float* %p.addr, align 4
store float %0, float* @x, align 4
- %1 = load float* @ret_sf, align 4
+ %1 = load float, float* @ret_sf, align 4
ret float %1
}
@@ -176,9 +176,9 @@ define float @sf_df(double %p) #0 {
entry:
%p.addr = alloca double, align 8
store double %p, double* %p.addr, align 8
- %0 = load double* %p.addr, align 8
+ %0 = load double, double* %p.addr, align 8
store double %0, double* @xd, align 8
- %1 = load float* @ret_sf, align 4
+ %1 = load float, float* @ret_sf, align 4
ret float %1
}
@@ -198,11 +198,11 @@ entry:
%p2.addr = alloca float, align 4
store float %p1, float* %p1.addr, align 4
store float %p2, float* %p2.addr, align 4
- %0 = load float* %p1.addr, align 4
+ %0 = load float, float* %p1.addr, align 4
store float %0, float* @x, align 4
- %1 = load float* %p2.addr, align 4
+ %1 = load float, float* %p2.addr, align 4
store float %1, float* @y, align 4
- %2 = load float* @ret_sf, align 4
+ %2 = load float, float* @ret_sf, align 4
ret float %2
}
@@ -222,11 +222,11 @@ entry:
%p2.addr = alloca double, align 8
store float %p1, float* %p1.addr, align 4
store double %p2, double* %p2.addr, align 8
- %0 = load float* %p1.addr, align 4
+ %0 = load float, float* %p1.addr, align 4
store float %0, float* @x, align 4
- %1 = load double* %p2.addr, align 8
+ %1 = load double, double* %p2.addr, align 8
store double %1, double* @yd, align 8
- %2 = load float* @ret_sf, align 4
+ %2 = load float, float* @ret_sf, align 4
ret float %2
}
@@ -247,11 +247,11 @@ entry:
%p2.addr = alloca float, align 4
store double %p1, double* %p1.addr, align 8
store float %p2, float* %p2.addr, align 4
- %0 = load double* %p1.addr, align 8
+ %0 = load double, double* %p1.addr, align 8
store double %0, double* @xd, align 8
- %1 = load float* %p2.addr, align 4
+ %1 = load float, float* %p2.addr, align 4
store float %1, float* @y, align 4
- %2 = load float* @ret_sf, align 4
+ %2 = load float, float* @ret_sf, align 4
ret float %2
}
@@ -272,11 +272,11 @@ entry:
%p2.addr = alloca double, align 8
store double %p1, double* %p1.addr, align 8
store double %p2, double* %p2.addr, align 8
- %0 = load double* %p1.addr, align 8
+ %0 = load double, double* %p1.addr, align 8
store double %0, double* @xd, align 8
- %1 = load double* %p2.addr, align 8
+ %1 = load double, double* %p2.addr, align 8
store double %1, double* @yd, align 8
- %2 = load float* @ret_sf, align 4
+ %2 = load float, float* @ret_sf, align 4
ret float %2
}
diff --git a/test/CodeGen/Mips/hf1_body.ll b/test/CodeGen/Mips/hf1_body.ll
index 5acfe86373d9..71a1b960c5b3 100644
--- a/test/CodeGen/Mips/hf1_body.ll
+++ b/test/CodeGen/Mips/hf1_body.ll
@@ -7,7 +7,7 @@ define void @v_sf(float %p) #0 {
entry:
%p.addr = alloca float, align 4
store float %p, float* %p.addr, align 4
- %0 = load float* %p.addr, align 4
+ %0 = load float, float* %p.addr, align 4
store float %0, float* @x, align 4
ret void
}
diff --git a/test/CodeGen/Mips/hfptrcall.ll b/test/CodeGen/Mips/hfptrcall.ll
index 683952d0e4ec..c9f1fe973796 100644
--- a/test/CodeGen/Mips/hfptrcall.ll
+++ b/test/CodeGen/Mips/hfptrcall.ll
@@ -34,11 +34,11 @@ entry:
define { float, float } @scv() #0 {
entry:
%retval = alloca { float, float }, align 4
- %real = getelementptr inbounds { float, float }* %retval, i32 0, i32 0
- %imag = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
+ %real = getelementptr inbounds { float, float }, { float, float }* %retval, i32 0, i32 0
+ %imag = getelementptr inbounds { float, float }, { float, float }* %retval, i32 0, i32 1
store float 5.000000e+00, float* %real
store float 9.900000e+01, float* %imag
- %0 = load { float, float }* %retval
+ %0 = load { float, float }, { float, float }* %retval
ret { float, float } %0
}
@@ -50,11 +50,11 @@ entry:
define { double, double } @dcv() #0 {
entry:
%retval = alloca { double, double }, align 8
- %real = getelementptr inbounds { double, double }* %retval, i32 0, i32 0
- %imag = getelementptr inbounds { double, double }* %retval, i32 0, i32 1
+ %real = getelementptr inbounds { double, double }, { double, double }* %retval, i32 0, i32 0
+ %imag = getelementptr inbounds { double, double }, { double, double }* %retval, i32 0, i32 1
store double 0x416BC8B0A0000000, double* %real
store double 0x41CDCCB763800000, double* %imag
- %0 = load { double, double }* %retval
+ %0 = load { double, double }, { double, double }* %retval
ret { double, double } %0
}
@@ -65,43 +65,43 @@ entry:
; Function Attrs: nounwind
define i32 @main() #0 {
entry:
- %0 = load float ()** @ptrsv, align 4
+ %0 = load float ()*, float ()** @ptrsv, align 4
%call = call float %0()
store float %call, float* @x, align 4
- %1 = load float* @x, align 4
+ %1 = load float, float* @x, align 4
%conv = fpext float %1 to double
- %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double %conv)
- %2 = load double ()** @ptrdv, align 4
+ %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double %conv)
+ %2 = load double ()*, double ()** @ptrdv, align 4
%call2 = call double %2()
store double %call2, double* @xd, align 8
- %3 = load double* @xd, align 8
- %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double %3)
- %4 = load { float, float } ()** @ptrscv, align 4
+ %3 = load double, double* @xd, align 8
+ %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double %3)
+ %4 = load { float, float } ()*, { float, float } ()** @ptrscv, align 4
%call4 = call { float, float } %4()
%5 = extractvalue { float, float } %call4, 0
%6 = extractvalue { float, float } %call4, 1
- store float %5, float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 0)
- store float %6, float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 1)
- %xy.real = load float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 0)
- %xy.imag = load float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 1)
+ store float %5, float* getelementptr inbounds ({ float, float }, { float, float }* @xy, i32 0, i32 0)
+ store float %6, float* getelementptr inbounds ({ float, float }, { float, float }* @xy, i32 0, i32 1)
+ %xy.real = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @xy, i32 0, i32 0)
+ %xy.imag = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @xy, i32 0, i32 1)
%conv5 = fpext float %xy.real to double
%conv6 = fpext float %xy.imag to double
- %xy.real7 = load float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 0)
- %xy.imag8 = load float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 1)
+ %xy.real7 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @xy, i32 0, i32 0)
+ %xy.imag8 = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @xy, i32 0, i32 1)
%conv9 = fpext float %xy.real7 to double
%conv10 = fpext float %xy.imag8 to double
- %call11 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str1, i32 0, i32 0), double %conv5, double %conv10)
- %7 = load { double, double } ()** @ptrdcv, align 4
+ %call11 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str1, i32 0, i32 0), double %conv5, double %conv10)
+ %7 = load { double, double } ()*, { double, double } ()** @ptrdcv, align 4
%call12 = call { double, double } %7()
%8 = extractvalue { double, double } %call12, 0
%9 = extractvalue { double, double } %call12, 1
- store double %8, double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 0)
- store double %9, double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 1)
- %xyd.real = load double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 0)
- %xyd.imag = load double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 1)
- %xyd.real13 = load double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 0)
- %xyd.imag14 = load double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 1)
- %call15 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str1, i32 0, i32 0), double %xyd.real, double %xyd.imag14)
+ store double %8, double* getelementptr inbounds ({ double, double }, { double, double }* @xyd, i32 0, i32 0)
+ store double %9, double* getelementptr inbounds ({ double, double }, { double, double }* @xyd, i32 0, i32 1)
+ %xyd.real = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @xyd, i32 0, i32 0)
+ %xyd.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @xyd, i32 0, i32 1)
+ %xyd.real13 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @xyd, i32 0, i32 0)
+ %xyd.imag14 = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @xyd, i32 0, i32 1)
+ %call15 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str1, i32 0, i32 0), double %xyd.real, double %xyd.imag14)
ret i32 0
}
diff --git a/test/CodeGen/Mips/i32k.ll b/test/CodeGen/Mips/i32k.ll
index 73f1302beec0..ba9cf7342308 100644
--- a/test/CodeGen/Mips/i32k.ll
+++ b/test/CodeGen/Mips/i32k.ll
@@ -4,14 +4,14 @@
define i32 @main() nounwind {
entry:
- %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 1075344593) nounwind
+ %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 1075344593) nounwind
; 16: lw ${{[0-9]+}}, 1f
; 16: b 2f
; 16: .align 2
; 16: 1: .word 1075344593
; 16: 2:
- %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 -1075344593) nounwind
+ %call1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 -1075344593) nounwind
; 16: lw ${{[0-9]+}}, 1f
; 16: b 2f
diff --git a/test/CodeGen/Mips/inlineasm-assembler-directives.ll b/test/CodeGen/Mips/inlineasm-assembler-directives.ll
index e4a6d1e26c69..88ceed4114c2 100644
--- a/test/CodeGen/Mips/inlineasm-assembler-directives.ll
+++ b/test/CodeGen/Mips/inlineasm-assembler-directives.ll
@@ -16,7 +16,7 @@ entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
store i32 20, i32* %a, align 4
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%1 = call i32 asm sideeffect "addi $$9, $1, 8\0A\09subi $0, $$9, 6", "=r,r,~{$1}"(i32 %0)
store i32 %1, i32* %b, align 4
ret void
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll
index a7ba762b1064..acce63203812 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll
@@ -3,7 +3,7 @@
; The target is 64 bit.
;
;
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 -target-abi=n64 < %s | FileCheck %s
define i32 @main() nounwind {
diff --git a/test/CodeGen/Mips/inlineasm-operand-code.ll b/test/CodeGen/Mips/inlineasm-operand-code.ll
index 3d9dec76fb37..b9415ee90cdb 100644
--- a/test/CodeGen/Mips/inlineasm-operand-code.ll
+++ b/test/CodeGen/Mips/inlineasm-operand-code.ll
@@ -125,7 +125,7 @@ entry:
;CHECK_BIG_32: #APP
;CHECK_BIG_32: or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
;CHECK_BIG_32: #NO_APP
- %bosco = load i64* getelementptr inbounds (%union.u_tag* @uval, i32 0, i32 0), align 8
+ %bosco = load i64, i64* getelementptr inbounds (%union.u_tag, %union.u_tag* @uval, i32 0, i32 0), align 8
%trunc1 = trunc i64 %bosco to i32
tail call i32 asm sideeffect "or $0,${1:D},$2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
ret i32 0
@@ -149,7 +149,7 @@ entry:
;CHECK_BIG_32: #APP
;CHECK_BIG_32: or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
;CHECK_BIG_32: #NO_APP
- %bosco = load i64* getelementptr inbounds (%union.u_tag* @uval, i32 0, i32 0), align 8
+ %bosco = load i64, i64* getelementptr inbounds (%union.u_tag, %union.u_tag* @uval, i32 0, i32 0), align 8
%trunc1 = trunc i64 %bosco to i32
tail call i32 asm sideeffect "or $0,${1:L},$2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
ret i32 0
@@ -173,7 +173,7 @@ entry:
;CHECK_BIG_32: #APP
;CHECK_BIG_32: or ${{[0-9]+}},$[[FIRST]],${{[0-9]+}}
;CHECK_BIG_32: #NO_APP
- %bosco = load i64* getelementptr inbounds (%union.u_tag* @uval, i32 0, i32 0), align 8
+ %bosco = load i64, i64* getelementptr inbounds (%union.u_tag, %union.u_tag* @uval, i32 0, i32 0), align 8
%trunc1 = trunc i64 %bosco to i32
tail call i32 asm sideeffect "or $0,${1:M},$2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
ret i32 0
diff --git a/test/CodeGen/Mips/inlineasm64.ll b/test/CodeGen/Mips/inlineasm64.ll
index dbce3c394e96..82abdf82a3ed 100644
--- a/test/CodeGen/Mips/inlineasm64.ll
+++ b/test/CodeGen/Mips/inlineasm64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 -target-abi=n64 < %s | FileCheck %s
@gl2 = external global i64
@gl1 = external global i64
@@ -8,8 +8,8 @@ define void @foo1() nounwind {
entry:
; CHECK: foo1
; CHECK: daddu
- %0 = load i64* @gl1, align 8
- %1 = load i64* @gl0, align 8
+ %0 = load i64, i64* @gl1, align 8
+ %1 = load i64, i64* @gl0, align 8
%2 = tail call i64 asm "daddu $0, $1, $2", "=r,r,r"(i64 %0, i64 %1) nounwind
store i64 %2, i64* @gl2, align 8
ret void
diff --git a/test/CodeGen/Mips/inlineasm_constraint.ll b/test/CodeGen/Mips/inlineasm_constraint.ll
index 76b73dc276ae..868433e0941f 100644
--- a/test/CodeGen/Mips/inlineasm_constraint.ll
+++ b/test/CodeGen/Mips/inlineasm_constraint.ll
@@ -51,14 +51,5 @@ entry:
; CHECK: #NO_APP
tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,P"(i32 7, i32 65535) nounwind
-; Now R Which takes the address of c
- %c = alloca i32, align 4
- store i32 -4469539, i32* %c, align 4
- %8 = call i32 asm sideeffect "lw $0, 1 + $1\0A\09lw $0, 2 + $1\0A\09", "=r,*R"(i32* %c) #1
-; CHECK: #APP
-; CHECK: lw ${{[0-9]+}}, 1 + 0(${{[0-9]+}})
-; CHECK: lw ${{[0-9]+}}, 2 + 0(${{[0-9]+}})
-; CHECK: #NO_APP
-
ret i32 0
}
diff --git a/test/CodeGen/Mips/inlineasm_constraint_R.ll b/test/CodeGen/Mips/inlineasm_constraint_R.ll
new file mode 100644
index 000000000000..c4105ae6b22c
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm_constraint_R.ll
@@ -0,0 +1,60 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+@data = global [8193 x i32] zeroinitializer
+
+define void @R(i32 *%p) nounwind {
+entry:
+ ; CHECK-LABEL: R:
+
+ call void asm sideeffect "lw $$1, $0", "*R,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 0))
+
+ ; CHECK: lw $[[BASEPTR:[0-9]+]], %got(data)(
+ ; CHECK: #APP
+ ; CHECK: lw $1, 0($[[BASEPTR]])
+ ; CHECK: #NO_APP
+
+ ret void
+}
+
+define void @R_offset_4(i32 *%p) nounwind {
+entry:
+ ; CHECK-LABEL: R_offset_4:
+
+ call void asm sideeffect "lw $$1, $0", "*R,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 1))
+
+ ; CHECK: lw $[[BASEPTR:[0-9]+]], %got(data)(
+ ; CHECK: #APP
+ ; CHECK: lw $1, 4($[[BASEPTR]])
+ ; CHECK: #NO_APP
+
+ ret void
+}
+
+define void @R_offset_254(i32 *%p) nounwind {
+entry:
+ ; CHECK-LABEL: R_offset_254:
+
+ call void asm sideeffect "lw $$1, $0", "*R,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 63))
+
+ ; CHECK-DAG: lw $[[BASEPTR:[0-9]+]], %got(data)(
+ ; CHECK: #APP
+ ; CHECK: lw $1, 252($[[BASEPTR]])
+ ; CHECK: #NO_APP
+
+ ret void
+}
+
+define void @R_offset_256(i32 *%p) nounwind {
+entry:
+ ; CHECK-LABEL: R_offset_256:
+
+ call void asm sideeffect "lw $$1, $0", "*R,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 64))
+
+ ; CHECK-DAG: lw $[[BASEPTR:[0-9]+]], %got(data)(
+ ; CHECK: addiu $[[BASEPTR2:[0-9]+]], $[[BASEPTR]], 256
+ ; CHECK: #APP
+ ; CHECK: lw $1, 0($[[BASEPTR2]])
+ ; CHECK: #NO_APP
+
+ ret void
+}
diff --git a/test/CodeGen/Mips/inlineasm_constraint_ZC.ll b/test/CodeGen/Mips/inlineasm_constraint_ZC.ll
new file mode 100644
index 000000000000..c1746a67564f
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm_constraint_ZC.ll
@@ -0,0 +1,167 @@
+; RUN: llc -march=mipsel -mcpu=mips32r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=09BIT
+; RUN: llc -march=mipsel -mattr=+micromips < %s | FileCheck %s -check-prefix=ALL -check-prefix=12BIT
+; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=ALL -check-prefix=16BIT
+
+@data = global [8193 x i32] zeroinitializer
+
+define void @ZC(i32 *%p) nounwind {
+entry:
+ ; ALL-LABEL: ZC:
+
+ call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 0))
+
+ ; ALL: lw $[[BASEPTR:[0-9]+]], %got(data)(
+ ; ALL: #APP
+ ; ALL: lw $1, 0($[[BASEPTR]])
+ ; ALL: #NO_APP
+
+ ret void
+}
+
+define void @ZC_offset_n4(i32 *%p) nounwind {
+entry:
+ ; ALL-LABEL: ZC_offset_n4:
+
+ call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 -1))
+
+ ; ALL: lw $[[BASEPTR:[0-9]+]], %got(data)(
+ ; ALL: #APP
+ ; ALL: lw $1, -4($[[BASEPTR]])
+ ; ALL: #NO_APP
+
+ ret void
+}
+
+define void @ZC_offset_4(i32 *%p) nounwind {
+entry:
+ ; ALL-LABEL: ZC_offset_4:
+
+ call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 1))
+
+ ; ALL: lw $[[BASEPTR:[0-9]+]], %got(data)(
+ ; ALL: #APP
+ ; ALL: lw $1, 4($[[BASEPTR]])
+ ; ALL: #NO_APP
+
+ ret void
+}
+
+define void @ZC_offset_252(i32 *%p) nounwind {
+entry:
+ ; ALL-LABEL: ZC_offset_252:
+
+ call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 63))
+
+ ; ALL: lw $[[BASEPTR:[0-9]+]], %got(data)(
+ ; ALL: #APP
+ ; ALL: lw $1, 252($[[BASEPTR]])
+ ; ALL: #NO_APP
+
+ ret void
+}
+
+define void @ZC_offset_256(i32 *%p) nounwind {
+entry:
+ ; ALL-LABEL: ZC_offset_256:
+
+ call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 64))
+
+ ; ALL: lw $[[BASEPTR:[0-9]+]], %got(data)(
+
+ ; 09BIT: addiu $[[BASEPTR2:[0-9]+]], $[[BASEPTR]], 256
+
+ ; ALL: #APP
+
+ ; 09BIT: lw $1, 0($[[BASEPTR2]])
+ ; 12BIT: lw $1, 256($[[BASEPTR]])
+ ; 16BIT: lw $1, 256($[[BASEPTR]])
+
+ ; ALL: #NO_APP
+
+ ret void
+}
+
+define void @ZC_offset_2044(i32 *%p) nounwind {
+entry:
+ ; ALL-LABEL: ZC_offset_2044:
+
+ call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 511))
+
+ ; ALL: lw $[[BASEPTR:[0-9]+]], %got(data)(
+
+ ; 09BIT: addiu $[[BASEPTR2:[0-9]+]], $[[BASEPTR]], 2044
+
+ ; ALL: #APP
+
+ ; 09BIT: lw $1, 0($[[BASEPTR2]])
+ ; 12BIT: lw $1, 2044($[[BASEPTR]])
+ ; 16BIT: lw $1, 2044($[[BASEPTR]])
+
+ ; ALL: #NO_APP
+
+ ret void
+}
+
+define void @ZC_offset_2048(i32 *%p) nounwind {
+entry:
+ ; ALL-LABEL: ZC_offset_2048:
+
+ call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 512))
+
+ ; ALL: lw $[[BASEPTR:[0-9]+]], %got(data)(
+
+ ; 09BIT: addiu $[[BASEPTR2:[0-9]+]], $[[BASEPTR]], 2048
+ ; 12BIT: addiu $[[BASEPTR2:[0-9]+]], $[[BASEPTR]], 2048
+
+ ; ALL: #APP
+
+ ; 09BIT: lw $1, 0($[[BASEPTR2]])
+ ; 12BIT: lw $1, 0($[[BASEPTR2]])
+ ; 16BIT: lw $1, 2048($[[BASEPTR]])
+
+ ; ALL: #NO_APP
+
+ ret void
+}
+
+define void @ZC_offset_32764(i32 *%p) nounwind {
+entry:
+ ; ALL-LABEL: ZC_offset_32764:
+
+ call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 8191))
+
+ ; ALL-DAG: lw $[[BASEPTR:[0-9]+]], %got(data)(
+
+ ; 09BIT: addiu $[[BASEPTR2:[0-9]+]], $[[BASEPTR]], 32764
+ ; 12BIT: addiu $[[BASEPTR2:[0-9]+]], $[[BASEPTR]], 32764
+
+ ; ALL: #APP
+
+ ; 09BIT: lw $1, 0($[[BASEPTR2]])
+ ; 12BIT: lw $1, 0($[[BASEPTR2]])
+ ; 16BIT: lw $1, 32764($[[BASEPTR]])
+
+ ; ALL: #NO_APP
+
+ ret void
+}
+
+define void @ZC_offset_32768(i32 *%p) nounwind {
+entry:
+ ; ALL-LABEL: ZC_offset_32768:
+
+ call void asm sideeffect "lw $$1, $0", "*^ZC,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 8192))
+
+ ; ALL-DAG: lw $[[BASEPTR:[0-9]+]], %got(data)(
+ ; ALL-DAG: ori $[[T0:[0-9]+]], $zero, 32768
+
+ ; 09BIT: addu $[[BASEPTR2:[0-9]+]], $[[BASEPTR]], $[[T0]]
+ ; 12BIT: addu16 $[[BASEPTR2:[0-9]+]], $[[BASEPTR]], $[[T0]]
+ ; 16BIT: addu $[[BASEPTR2:[0-9]+]], $[[BASEPTR]], $[[T0]]
+
+ ; ALL: #APP
+ ; ALL: lw $1, 0($[[BASEPTR2]])
+ ; ALL: #NO_APP
+
+ ret void
+}
diff --git a/test/CodeGen/Mips/inlineasm_constraint_m.ll b/test/CodeGen/Mips/inlineasm_constraint_m.ll
new file mode 100644
index 000000000000..00053ad3c105
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm_constraint_m.ll
@@ -0,0 +1,61 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+@data = global [8193 x i32] zeroinitializer
+
+define void @m(i32 *%p) nounwind {
+entry:
+ ; CHECK-LABEL: m:
+
+ call void asm sideeffect "lw $$1, $0", "*m,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 0))
+
+ ; CHECK: lw $[[BASEPTR:[0-9]+]], %got(data)(
+ ; CHECK: #APP
+ ; CHECK: lw $1, 0($[[BASEPTR]])
+ ; CHECK: #NO_APP
+
+ ret void
+}
+
+define void @m_offset_4(i32 *%p) nounwind {
+entry:
+ ; CHECK-LABEL: m_offset_4:
+
+ call void asm sideeffect "lw $$1, $0", "*m,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 1))
+
+ ; CHECK: lw $[[BASEPTR:[0-9]+]], %got(data)(
+ ; CHECK: #APP
+ ; CHECK: lw $1, 4($[[BASEPTR]])
+ ; CHECK: #NO_APP
+
+ ret void
+}
+
+define void @m_offset_32764(i32 *%p) nounwind {
+entry:
+ ; CHECK-LABEL: m_offset_32764:
+
+ call void asm sideeffect "lw $$1, $0", "*m,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 8191))
+
+ ; CHECK-DAG: lw $[[BASEPTR:[0-9]+]], %got(data)(
+ ; CHECK: #APP
+ ; CHECK: lw $1, 32764($[[BASEPTR]])
+ ; CHECK: #NO_APP
+
+ ret void
+}
+
+define void @m_offset_32768(i32 *%p) nounwind {
+entry:
+ ; CHECK-LABEL: m_offset_32768:
+
+ call void asm sideeffect "lw $$1, $0", "*m,~{$1}"(i32* getelementptr inbounds ([8193 x i32], [8193 x i32]* @data, i32 0, i32 8192))
+
+ ; CHECK-DAG: lw $[[BASEPTR:[0-9]+]], %got(data)(
+ ; CHECK-DAG: ori $[[T0:[0-9]+]], $zero, 32768
+ ; CHECK: addu $[[BASEPTR2:[0-9]+]], $[[BASEPTR]], $[[T0]]
+ ; CHECK: #APP
+ ; CHECK: lw $1, 0($[[BASEPTR2]])
+ ; CHECK: #NO_APP
+
+ ret void
+}
diff --git a/test/CodeGen/Mips/inlineasmmemop.ll b/test/CodeGen/Mips/inlineasmmemop.ll
index 5518520c5491..9e9b6cd089ea 100644
--- a/test/CodeGen/Mips/inlineasmmemop.ll
+++ b/test/CodeGen/Mips/inlineasmmemop.ll
@@ -6,14 +6,13 @@
define i32 @f1(i32 %x) nounwind {
entry:
; CHECK-LABEL: f1:
-; CHECK: addiu $[[T0:[0-9]+]], $sp
; CHECK: #APP
-; CHECK: sw $4, 0($[[T0]])
+; CHECK: sw $4, [[OFFSET:[0-9]+]]($sp)
; CHECK: #NO_APP
+; CHECK: lw $[[T1:[0-9]+]], %got(g1)
; CHECK: #APP
-; CHECK: lw $[[T3:[0-9]+]], 0($[[T0]])
+; CHECK: lw $[[T3:[0-9]+]], [[OFFSET]]($sp)
; CHECK: #NO_APP
-; CHECK: lw $[[T1:[0-9]+]], %got(g1)
; CHECK: sw $[[T3]], 0($[[T1]])
%l1 = alloca i32, align 4
@@ -27,13 +26,13 @@ entry:
; "D": Second word of a double word. This works for any memory element
; double or single.
; CHECK: #APP
-; CHECK: lw ${{[0-9]+}},4(${{[0-9]+}});
+; CHECK: lw ${{[0-9]+}}, 16(${{[0-9]+}});
; CHECK: #NO_APP
; No "D": First word of a double word. This works for any memory element
; double or single.
; CHECK: #APP
-; CHECK: lw ${{[0-9]+}},0(${{[0-9]+}});
+; CHECK: lw ${{[0-9]+}}, 12(${{[0-9]+}});
; CHECK: #NO_APP
@b = common global [20 x i32] zeroinitializer, align 4
@@ -41,8 +40,8 @@ entry:
define void @main() {
entry:
; Second word:
- tail call void asm sideeffect " lw $0,${1:D};", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32]* @b, i32 0, i32 3))
+ tail call void asm sideeffect " lw $0, ${1:D};", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @b, i32 0, i32 3))
; First word. Notice, no 'D':
- tail call void asm sideeffect " lw $0,${1};", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32]* @b, i32 0, i32 3))
+ tail call void asm sideeffect " lw $0, ${1};", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @b, i32 0, i32 3))
ret void
}
diff --git a/test/CodeGen/Mips/insn-zero-size-bb.ll b/test/CodeGen/Mips/insn-zero-size-bb.ll
new file mode 100644
index 000000000000..9739c6f17fab
--- /dev/null
+++ b/test/CodeGen/Mips/insn-zero-size-bb.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s
+; RUN: llc < %s -march=mips -mcpu=mips32r3 -mattr=+micromips | FileCheck %s
+; RUN: llc < %s -march=mips -mcpu=mips16 | FileCheck %s
+
+; Verify that we emit the .insn directive for zero-sized (empty) basic blocks.
+; This only really matters for microMIPS and MIPS16.
+
+declare i32 @foo(...)
+declare void @bar()
+
+define void @main() {
+entry:
+ invoke void @bar() #0
+ to label %unreachable unwind label %return
+
+unreachable:
+; CHECK: ${{.*}}: # %unreachable
+; CHECK-NEXT: .insn
+ unreachable
+
+return:
+ %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @foo to i8*)
+ catch i8* null
+ ret void
+}
+
+attributes #0 = { noreturn }
diff --git a/test/CodeGen/Mips/internalfunc.ll b/test/CodeGen/Mips/internalfunc.ll
index 863375ad4d4a..2b4a0397f45f 100644
--- a/test/CodeGen/Mips/internalfunc.ll
+++ b/test/CodeGen/Mips/internalfunc.ll
@@ -20,8 +20,8 @@ entry:
br i1 %tobool, label %if.end, label %if.then
if.then: ; preds = %entry
- %tmp1 = load void (...)** @caller.sf1, align 4
- tail call void (...)* %tmp1() nounwind
+ %tmp1 = load void (...)*, void (...)** @caller.sf1, align 4
+ tail call void (...) %tmp1() nounwind
br label %if.end
if.end: ; preds = %entry, %if.then
@@ -30,7 +30,7 @@ if.end: ; preds = %entry, %if.then
; CHECK: lw $[[R3:[0-9]+]], %got(caller.sf1)
; CHECK: sw ${{[0-9]+}}, %lo(caller.sf1)($[[R3]])
%tobool3 = icmp ne i32 %a0, 0
- %tmp4 = load void (...)** @gf1, align 4
+ %tmp4 = load void (...)*, void (...)** @gf1, align 4
%cond = select i1 %tobool3, void (...)* %tmp4, void (...)* bitcast (void ()* @sf2 to void (...)*)
store void (...)* %cond, void (...)** @caller.sf1, align 4
ret void
@@ -38,7 +38,7 @@ if.end: ; preds = %entry, %if.then
define internal void @sf2() nounwind {
entry:
- %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0)) nounwind
+ %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0)) nounwind
ret void
}
@@ -46,7 +46,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind
define internal fastcc void @f2() nounwind noinline {
entry:
- %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0)) nounwind
+ %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0)) nounwind
ret void
}
diff --git a/test/CodeGen/Mips/jtstat.ll b/test/CodeGen/Mips/jtstat.ll
index 01afc080c2ed..35f71cf2dc85 100644
--- a/test/CodeGen/Mips/jtstat.ll
+++ b/test/CodeGen/Mips/jtstat.ll
@@ -8,7 +8,7 @@ define void @test(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
- %0 = load i32* %i.addr, align 4
+ %0 = load i32, i32* %i.addr, align 4
switch i32 %0, label %sw.epilog [
i32 115, label %sw.bb
i32 105, label %sw.bb1
diff --git a/test/CodeGen/Mips/l3mc.ll b/test/CodeGen/Mips/l3mc.ll
index 3bfb389ba05d..c1bff11595c9 100644
--- a/test/CodeGen/Mips/l3mc.ll
+++ b/test/CodeGen/Mips/l3mc.ll
@@ -1,22 +1,22 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunsdfsi
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunsdfsi
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___floatdidf
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___floatdidf
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___floatdisf
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___floatdisf
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___floatundidf
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___floatundidf
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixsfdi
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixsfdi
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunsdfdi
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunsdfdi
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixdfdi
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixdfdi
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunssfsi
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunssfsi
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunssfdi
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunssfdi
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___floatundisf
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=__call_stub_fp___floatundisf
@ll1 = global i64 0, align 8
@ll2 = global i64 0, align 8
@@ -42,28 +42,28 @@
; Function Attrs: nounwind
define void @_Z3foov() #0 {
entry:
- %0 = load double* @d1, align 8
+ %0 = load double, double* @d1, align 8
%conv = fptosi double %0 to i64
store i64 %conv, i64* @ll1, align 8
- %1 = load double* @d2, align 8
+ %1 = load double, double* @d2, align 8
%conv1 = fptoui double %1 to i64
store i64 %conv1, i64* @ull1, align 8
- %2 = load float* @f1, align 4
+ %2 = load float, float* @f1, align 4
%conv2 = fptosi float %2 to i64
store i64 %conv2, i64* @ll2, align 8
- %3 = load float* @f2, align 4
+ %3 = load float, float* @f2, align 4
%conv3 = fptoui float %3 to i64
store i64 %conv3, i64* @ull2, align 8
- %4 = load double* @d3, align 8
+ %4 = load double, double* @d3, align 8
%conv4 = fptosi double %4 to i32
store i32 %conv4, i32* @l1, align 4
- %5 = load double* @d4, align 8
+ %5 = load double, double* @d4, align 8
%conv5 = fptoui double %5 to i32
store i32 %conv5, i32* @ul1, align 4
- %6 = load float* @f3, align 4
+ %6 = load float, float* @f3, align 4
%conv6 = fptosi float %6 to i32
store i32 %conv6, i32* @l2, align 4
- %7 = load float* @f4, align 4
+ %7 = load float, float* @f4, align 4
%conv7 = fptoui float %7 to i32
store i32 %conv7, i32* @ul2, align 4
ret void
@@ -72,28 +72,28 @@ entry:
; Function Attrs: nounwind
define void @_Z3goov() #0 {
entry:
- %0 = load i64* @ll1, align 8
+ %0 = load i64, i64* @ll1, align 8
%conv = sitofp i64 %0 to double
store double %conv, double* @d1, align 8
- %1 = load i64* @ull1, align 8
+ %1 = load i64, i64* @ull1, align 8
%conv1 = uitofp i64 %1 to double
store double %conv1, double* @d2, align 8
- %2 = load i64* @ll2, align 8
+ %2 = load i64, i64* @ll2, align 8
%conv2 = sitofp i64 %2 to float
store float %conv2, float* @f1, align 4
- %3 = load i64* @ull2, align 8
+ %3 = load i64, i64* @ull2, align 8
%conv3 = uitofp i64 %3 to float
store float %conv3, float* @f2, align 4
- %4 = load i32* @l1, align 4
+ %4 = load i32, i32* @l1, align 4
%conv4 = sitofp i32 %4 to double
store double %conv4, double* @d3, align 8
- %5 = load i32* @ul1, align 4
+ %5 = load i32, i32* @ul1, align 4
%conv5 = uitofp i32 %5 to double
store double %conv5, double* @d4, align 8
- %6 = load i32* @l2, align 4
+ %6 = load i32, i32* @l2, align 4
%conv6 = sitofp i32 %6 to float
store float %conv6, float* @f3, align 4
- %7 = load i32* @ul2, align 4
+ %7 = load i32, i32* @ul2, align 4
%conv7 = uitofp i32 %7 to float
store float %conv7, float* @f4, align 4
ret void
diff --git a/test/CodeGen/Mips/largeimm1.ll b/test/CodeGen/Mips/largeimm1.ll
index 1c0f69c59011..06c4d6bd9603 100644
--- a/test/CodeGen/Mips/largeimm1.ll
+++ b/test/CodeGen/Mips/largeimm1.ll
@@ -5,7 +5,7 @@
define void @f() nounwind {
entry:
%a1 = alloca [1073741824 x i8], align 1
- %arrayidx = getelementptr inbounds [1073741824 x i8]* %a1, i32 0, i32 1048676
+ %arrayidx = getelementptr inbounds [1073741824 x i8], [1073741824 x i8]* %a1, i32 0, i32 1048676
call void @f2(i8* %arrayidx) nounwind
ret void
}
diff --git a/test/CodeGen/Mips/largeimmprinting.ll b/test/CodeGen/Mips/largeimmprinting.ll
index 0e9c91fb46df..a53a953a7883 100644
--- a/test/CodeGen/Mips/largeimmprinting.ll
+++ b/test/CodeGen/Mips/largeimmprinting.ll
@@ -1,7 +1,7 @@
; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32
-; RUN: llc -march=mips64el -mcpu=mips4 -mattr=n64 < %s | \
+; RUN: llc -march=mips64el -mcpu=mips4 -target-abi=n64 < %s | \
; RUN: FileCheck %s -check-prefix=64
-; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | \
+; RUN: llc -march=mips64el -mcpu=mips64 -target-abi=n64 < %s | \
; RUN: FileCheck %s -check-prefix=64
%struct.S1 = type { [65536 x i8] }
@@ -27,8 +27,8 @@ entry:
; 64: sd $ra, 24($[[R1]])
%agg.tmp = alloca %struct.S1, align 1
- %tmp = getelementptr inbounds %struct.S1* %agg.tmp, i32 0, i32 0, i32 0
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.S1* @s1, i32 0, i32 0, i32 0), i32 65536, i32 1, i1 false)
+ %tmp = getelementptr inbounds %struct.S1, %struct.S1* %agg.tmp, i32 0, i32 0, i32 0
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.S1, %struct.S1* @s1, i32 0, i32 0, i32 0), i32 65536, i32 1, i1 false)
call void @f2(%struct.S1* byval %agg.tmp) nounwind
ret void
}
diff --git a/test/CodeGen/Mips/lb1.ll b/test/CodeGen/Mips/lb1.ll
index aac2767a4e40..21648d7572a5 100644
--- a/test/CodeGen/Mips/lb1.ll
+++ b/test/CodeGen/Mips/lb1.ll
@@ -6,12 +6,12 @@
define i32 @main() nounwind {
entry:
%i = alloca i32, align 4
- %0 = load i8* @c, align 1
+ %0 = load i8, i8* @c, align 1
; 16: lb ${{[0-9]+}}, 0(${{[0-9]+}})
%conv = sext i8 %0 to i32
store i32 %conv, i32* %i, align 4
- %1 = load i32* %i, align 4
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
+ %1 = load i32, i32* %i, align 4
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %1)
ret i32 0
}
diff --git a/test/CodeGen/Mips/lbu1.ll b/test/CodeGen/Mips/lbu1.ll
index 63e0cca1684d..28ca27132467 100644
--- a/test/CodeGen/Mips/lbu1.ll
+++ b/test/CodeGen/Mips/lbu1.ll
@@ -6,13 +6,13 @@
define i32 @main() nounwind {
entry:
%i = alloca i32, align 4
- %0 = load i8* @c, align 1
+ %0 = load i8, i8* @c, align 1
%conv = zext i8 %0 to i32
; 16: lbu ${{[0-9]+}}, 0(${{[0-9]+}})
store i32 %conv, i32* %i, align 4
- %1 = load i8* @c, align 1
+ %1 = load i8, i8* @c, align 1
%conv1 = zext i8 %1 to i32
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %conv1)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %conv1)
ret i32 0
}
diff --git a/test/CodeGen/Mips/lcb2.ll b/test/CodeGen/Mips/lcb2.ll
index 59b96e64e95e..a6f4968e6d23 100644
--- a/test/CodeGen/Mips/lcb2.ll
+++ b/test/CodeGen/Mips/lcb2.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands=true < %s | FileCheck %s -check-prefix=lcb
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands=true < %s | FileCheck %s -check-prefix=lcb
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands=true < %s | FileCheck %s -check-prefix=lcbn
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands=true < %s | FileCheck %s -check-prefix=lcbn
@i = global i32 0, align 4
@j = common global i32 0, align 4
@@ -9,7 +9,7 @@
; Function Attrs: nounwind optsize
define i32 @bnez() #0 {
entry:
- %0 = load i32* @i, align 4, !tbaa !1
+ %0 = load i32, i32* @i, align 4, !tbaa !1
%cmp = icmp eq i32 %0, 0
br i1 %cmp, label %if.then, label %if.end
@@ -31,7 +31,7 @@ if.end: ; preds = %if.then, %entry
; Function Attrs: nounwind optsize
define i32 @beqz() #0 {
entry:
- %0 = load i32* @i, align 4, !tbaa !1
+ %0 = load i32, i32* @i, align 4, !tbaa !1
%cmp = icmp eq i32 %0, 0
br i1 %cmp, label %if.then, label %if.else
@@ -60,8 +60,8 @@ if.end: ; preds = %if.else, %if.then
; Function Attrs: nounwind optsize
define void @bteqz() #0 {
entry:
- %0 = load i32* @i, align 4, !tbaa !1
- %1 = load i32* @j, align 4, !tbaa !1
+ %0 = load i32, i32* @i, align 4, !tbaa !1
+ %1 = load i32, i32* @j, align 4, !tbaa !1
%cmp = icmp eq i32 %0, %1
br i1 %cmp, label %if.then, label %if.else
@@ -90,15 +90,15 @@ if.end: ; preds = %if.else, %if.then
; Function Attrs: nounwind optsize
define void @btz() #0 {
entry:
- %0 = load i32* @i, align 4, !tbaa !1
- %1 = load i32* @j, align 4, !tbaa !1
+ %0 = load i32, i32* @i, align 4, !tbaa !1
+ %1 = load i32, i32* @j, align 4, !tbaa !1
%cmp1 = icmp sgt i32 %0, %1
br i1 %cmp1, label %if.then, label %if.end
if.then: ; preds = %entry, %if.then
tail call void asm sideeffect ".space 60000", ""() #1, !srcloc !10
- %2 = load i32* @i, align 4, !tbaa !1
- %3 = load i32* @j, align 4, !tbaa !1
+ %2 = load i32, i32* @i, align 4, !tbaa !1
+ %3 = load i32, i32* @j, align 4, !tbaa !1
%cmp = icmp sgt i32 %2, %3
br i1 %cmp, label %if.then, label %if.end
diff --git a/test/CodeGen/Mips/lcb3c.ll b/test/CodeGen/Mips/lcb3c.ll
index eb8329145421..4c6f2c036a0b 100644
--- a/test/CodeGen/Mips/lcb3c.ll
+++ b/test/CodeGen/Mips/lcb3c.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -O0 < %s | FileCheck %s -check-prefix=lcb
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -O0 < %s | FileCheck %s -check-prefix=lcb
@i = global i32 0, align 4
@j = common global i32 0, align 4
@@ -7,7 +7,7 @@
; Function Attrs: nounwind
define i32 @s() #0 {
entry:
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
%cmp = icmp eq i32 %0, 0
br i1 %cmp, label %if.then, label %if.else
@@ -30,7 +30,7 @@ if.end: ; preds = %if.else, %if.then
; Function Attrs: nounwind
define i32 @b() #0 {
entry:
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
%cmp = icmp eq i32 %0, 0
br i1 %cmp, label %if.then, label %if.else
diff --git a/test/CodeGen/Mips/lcb4a.ll b/test/CodeGen/Mips/lcb4a.ll
index fbcadd2552f8..9e97b5bf1433 100644
--- a/test/CodeGen/Mips/lcb4a.ll
+++ b/test/CodeGen/Mips/lcb4a.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=ci
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=ci
@i = global i32 0, align 4
@j = common global i32 0, align 4
@@ -7,7 +7,7 @@
; Function Attrs: nounwind optsize
define i32 @foo() #0 {
entry:
- %0 = load i32* @i, align 4, !tbaa !1
+ %0 = load i32, i32* @i, align 4, !tbaa !1
%cmp = icmp eq i32 %0, 0
br i1 %cmp, label %if.then, label %if.else
@@ -32,7 +32,7 @@ if.end: ; preds = %if.else, %if.then
; Function Attrs: nounwind optsize
define i32 @goo() #0 {
entry:
- %0 = load i32* @i, align 4, !tbaa !1
+ %0 = load i32, i32* @i, align 4, !tbaa !1
%cmp = icmp eq i32 %0, 0
br i1 %cmp, label %if.then, label %if.else
diff --git a/test/CodeGen/Mips/lcb5.ll b/test/CodeGen/Mips/lcb5.ll
index b2a8d1d33ef6..41878d5f8817 100644
--- a/test/CodeGen/Mips/lcb5.ll
+++ b/test/CodeGen/Mips/lcb5.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=ci
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static < %s | FileCheck %s -check-prefix=ci
@i = global i32 0, align 4
@j = common global i32 0, align 4
@@ -7,7 +7,7 @@
; Function Attrs: nounwind optsize
define i32 @x0() #0 {
entry:
- %0 = load i32* @i, align 4, !tbaa !1
+ %0 = load i32, i32* @i, align 4, !tbaa !1
%cmp = icmp eq i32 %0, 0
br i1 %cmp, label %if.then, label %if.else
@@ -33,7 +33,7 @@ if.end: ; preds = %if.else, %if.then
; Function Attrs: nounwind optsize
define i32 @x1() #0 {
entry:
- %0 = load i32* @i, align 4, !tbaa !1
+ %0 = load i32, i32* @i, align 4, !tbaa !1
%cmp = icmp eq i32 %0, 0
br i1 %cmp, label %if.then, label %if.else
@@ -61,7 +61,7 @@ if.end: ; preds = %if.else, %if.then
; Function Attrs: nounwind optsize
define i32 @y0() #0 {
entry:
- %0 = load i32* @i, align 4, !tbaa !1
+ %0 = load i32, i32* @i, align 4, !tbaa !1
%cmp = icmp eq i32 %0, 0
br i1 %cmp, label %if.then, label %if.else
@@ -86,7 +86,7 @@ if.end: ; preds = %if.else, %if.then
; Function Attrs: nounwind optsize
define i32 @y1() #0 {
entry:
- %0 = load i32* @i, align 4, !tbaa !1
+ %0 = load i32, i32* @i, align 4, !tbaa !1
%cmp = icmp eq i32 %0, 0
br i1 %cmp, label %if.then, label %if.else
@@ -114,8 +114,8 @@ if.end: ; preds = %if.else, %if.then
; Function Attrs: nounwind optsize
define void @z0() #0 {
entry:
- %0 = load i32* @i, align 4, !tbaa !1
- %1 = load i32* @j, align 4, !tbaa !1
+ %0 = load i32, i32* @i, align 4, !tbaa !1
+ %1 = load i32, i32* @j, align 4, !tbaa !1
%cmp = icmp eq i32 %0, %1
br i1 %cmp, label %if.then, label %if.else
@@ -140,8 +140,8 @@ if.end: ; preds = %if.else, %if.then
; Function Attrs: nounwind optsize
define void @z1() #0 {
entry:
- %0 = load i32* @i, align 4, !tbaa !1
- %1 = load i32* @j, align 4, !tbaa !1
+ %0 = load i32, i32* @i, align 4, !tbaa !1
+ %1 = load i32, i32* @j, align 4, !tbaa !1
%cmp = icmp eq i32 %0, %1
br i1 %cmp, label %if.then, label %if.else
@@ -169,15 +169,15 @@ if.end: ; preds = %if.else, %if.then
; Function Attrs: nounwind optsize
define void @z3() #0 {
entry:
- %0 = load i32* @i, align 4, !tbaa !1
- %1 = load i32* @j, align 4, !tbaa !1
+ %0 = load i32, i32* @i, align 4, !tbaa !1
+ %1 = load i32, i32* @j, align 4, !tbaa !1
%cmp1 = icmp sgt i32 %0, %1
br i1 %cmp1, label %if.then, label %if.end
if.then: ; preds = %entry, %if.then
tail call void asm sideeffect ".space 10000", ""() #1, !srcloc !17
- %2 = load i32* @i, align 4, !tbaa !1
- %3 = load i32* @j, align 4, !tbaa !1
+ %2 = load i32, i32* @i, align 4, !tbaa !1
+ %3 = load i32, i32* @j, align 4, !tbaa !1
%cmp = icmp sgt i32 %2, %3
br i1 %cmp, label %if.then, label %if.end
@@ -192,15 +192,15 @@ if.end: ; preds = %if.then, %entry
; Function Attrs: nounwind optsize
define void @z4() #0 {
entry:
- %0 = load i32* @i, align 4, !tbaa !1
- %1 = load i32* @j, align 4, !tbaa !1
+ %0 = load i32, i32* @i, align 4, !tbaa !1
+ %1 = load i32, i32* @j, align 4, !tbaa !1
%cmp1 = icmp sgt i32 %0, %1
br i1 %cmp1, label %if.then, label %if.end
if.then: ; preds = %entry, %if.then
tail call void asm sideeffect ".space 10000000", ""() #1, !srcloc !18
- %2 = load i32* @i, align 4, !tbaa !1
- %3 = load i32* @j, align 4, !tbaa !1
+ %2 = load i32, i32* @i, align 4, !tbaa !1
+ %3 = load i32, i32* @j, align 4, !tbaa !1
%cmp = icmp sgt i32 %2, %3
br i1 %cmp, label %if.then, label %if.end
diff --git a/test/CodeGen/Mips/lh1.ll b/test/CodeGen/Mips/lh1.ll
index 1f95b0903466..31967e5a5379 100644
--- a/test/CodeGen/Mips/lh1.ll
+++ b/test/CodeGen/Mips/lh1.ll
@@ -6,12 +6,12 @@
define i32 @main() nounwind {
entry:
%i = alloca i32, align 4
- %0 = load i16* @s, align 2
+ %0 = load i16, i16* @s, align 2
%conv = sext i16 %0 to i32
; 16: lh ${{[0-9]+}}, 0(${{[0-9]+}})
store i32 %conv, i32* %i, align 4
- %1 = load i32* %i, align 4
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
+ %1 = load i32, i32* %i, align 4
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %1)
ret i32 0
}
diff --git a/test/CodeGen/Mips/lhu1.ll b/test/CodeGen/Mips/lhu1.ll
index 0cfcede669e0..413da46d4a31 100644
--- a/test/CodeGen/Mips/lhu1.ll
+++ b/test/CodeGen/Mips/lhu1.ll
@@ -7,12 +7,12 @@
define i32 @main() nounwind {
entry:
%i = alloca i32, align 4
- %0 = load i16* @s, align 2
+ %0 = load i16, i16* @s, align 2
%conv = zext i16 %0 to i32
; 16: lhu ${{[0-9]+}}, 0(${{[0-9]+}})
store i32 %conv, i32* %i, align 4
- %1 = load i32* %i, align 4
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
+ %1 = load i32, i32* %i, align 4
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %1)
ret i32 0
}
diff --git a/test/CodeGen/Mips/llcarry.ll b/test/CodeGen/Mips/llcarry.ll
index 7763daec3b32..f4120ecec175 100644
--- a/test/CodeGen/Mips/llcarry.ll
+++ b/test/CodeGen/Mips/llcarry.ll
@@ -9,8 +9,8 @@
define void @test1() nounwind {
entry:
- %0 = load i64* @i, align 8
- %1 = load i64* @j, align 8
+ %0 = load i64, i64* @i, align 8
+ %1 = load i64, i64* @j, align 8
%add = add nsw i64 %1, %0
store i64 %add, i64* @k, align 8
; 16: addu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
@@ -23,8 +23,8 @@ entry:
define void @test2() nounwind {
entry:
- %0 = load i64* @i, align 8
- %1 = load i64* @j, align 8
+ %0 = load i64, i64* @i, align 8
+ %1 = load i64, i64* @j, align 8
%sub = sub nsw i64 %0, %1
; 16: subu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
; 16: sltu ${{[0-9]+}}, ${{[0-9]+}}
@@ -37,7 +37,7 @@ entry:
define void @test3() nounwind {
entry:
- %0 = load i64* @ii, align 8
+ %0 = load i64, i64* @ii, align 8
%add = add nsw i64 %0, 15
; 16: addiu ${{[0-9]+}}, 15
; 16: sltu ${{[0-9]+}}, ${{[0-9]+}}
diff --git a/test/CodeGen/Mips/llvm-ir/add.ll b/test/CodeGen/Mips/llvm-ir/add.ll
index 83774eda634f..6cccc7df19f9 100644
--- a/test/CodeGen/Mips/llvm-ir/add.ll
+++ b/test/CodeGen/Mips/llvm-ir/add.ll
@@ -4,6 +4,10 @@
; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r5 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP32
; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
@@ -14,6 +18,10 @@
; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP64
; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP64
; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP64
diff --git a/test/CodeGen/Mips/llvm-ir/and.ll b/test/CodeGen/Mips/llvm-ir/and.ll
index 09d0ef9238af..8ebcfe4a3f64 100644
--- a/test/CodeGen/Mips/llvm-ir/and.ll
+++ b/test/CodeGen/Mips/llvm-ir/and.ll
@@ -4,6 +4,10 @@
; RUN: -check-prefix=ALL -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r5 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP32
; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
@@ -14,6 +18,10 @@
; RUN: -check-prefix=ALL -check-prefix=GP64
; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP64
@@ -51,10 +59,7 @@ define signext i32 @and_i32(i32 signext %a, i32 signext %b) {
entry:
; ALL-LABEL: and_i32:
- ; GP32: and $2, $4, $5
-
- ; GP64: and $[[T0:[0-9]+]], $4, $5
- ; GP64: sll $2, $[[T0]], 0
+ ; ALL: and $2, $4, $5
%r = and i32 %a, %b
ret i32 %r
diff --git a/test/CodeGen/Mips/llvm-ir/ashr.ll b/test/CodeGen/Mips/llvm-ir/ashr.ll
index 415998929aa0..cad4a39d7743 100644
--- a/test/CodeGen/Mips/llvm-ir/ashr.ll
+++ b/test/CodeGen/Mips/llvm-ir/ashr.ll
@@ -1,30 +1,42 @@
; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP32 \
-; RUN: -check-prefix=M2 -check-prefix=NOT-R2-R6
+; RUN: -check-prefix=M2
; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
-; RUN: -check-prefix=ALL -check-prefix=GP32 -check-prefix=NOT-R2-R6 \
-; RUN: -check-prefix=32R1-R2
+; RUN: -check-prefix=ALL -check-prefix=GP32 \
+; RUN: -check-prefix=32R1-R5
; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP32 \
-; RUN: -check-prefix=32R1-R2 -check-prefix=R2-R6
+; RUN: -check-prefix=32R1-R5
+; RUN: llc < %s -march=mips -mcpu=mips32r3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32 \
+; RUN: -check-prefix=32R1-R5
+; RUN: llc < %s -march=mips -mcpu=mips32r5 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32 \
+; RUN: -check-prefix=32R1-R5
; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP32 \
-; RUN: -check-prefix=32R6 -check-prefix=R2-R6
+; RUN: -check-prefix=32R6
; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP64 \
-; RUN: -check-prefix=M3 -check-prefix=NOT-R2-R6
+; RUN: -check-prefix=M3
; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP64 \
-; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R2-R6
+; RUN: -check-prefix=GP64-NOT-R6
; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP64 \
-; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R2-R6
+; RUN: -check-prefix=GP64-NOT-R6
; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP64 \
-; RUN: -check-prefix=GP64-NOT-R6 -check-prefix R2-R6
+; RUN: -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=GP64-NOT-R6
; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP64 \
-; RUN: -check-prefix=64R6 -check-prefix=R2-R6
+; RUN: -check-prefix=64R6
define signext i1 @ashr_i1(i1 signext %a, i1 signext %b) {
entry:
@@ -91,17 +103,17 @@ entry:
; M2: jr $ra
; M2: nop
- ; 32R1-R2: srlv $[[T0:[0-9]+]], $5, $7
- ; 32R1-R2: not $[[T1:[0-9]+]], $7
- ; 32R1-R2: sll $[[T2:[0-9]+]], $4, 1
- ; 32R1-R2: sllv $[[T3:[0-9]+]], $[[T2]], $[[T1]]
- ; 32R1-R2: or $3, $[[T3]], $[[T0]]
- ; 32R1-R2: srav $[[T4:[0-9]+]], $4, $7
- ; 32R1-R2: andi $[[T5:[0-9]+]], $7, 32
- ; 32R1-R2: movn $3, $[[T4]], $[[T5]]
- ; 32R1-R2: sra $4, $4, 31
- ; 32R1-R2: jr $ra
- ; 32R1-R2: movn $2, $4, $[[T5]]
+ ; 32R1-R5: srlv $[[T0:[0-9]+]], $5, $7
+ ; 32R1-R5: not $[[T1:[0-9]+]], $7
+ ; 32R1-R5: sll $[[T2:[0-9]+]], $4, 1
+ ; 32R1-R5: sllv $[[T3:[0-9]+]], $[[T2]], $[[T1]]
+ ; 32R1-R5: or $3, $[[T3]], $[[T0]]
+ ; 32R1-R5: srav $[[T4:[0-9]+]], $4, $7
+ ; 32R1-R5: andi $[[T5:[0-9]+]], $7, 32
+ ; 32R1-R5: movn $3, $[[T4]], $[[T5]]
+ ; 32R1-R5: sra $4, $4, 31
+ ; 32R1-R5: jr $ra
+ ; 32R1-R5: movn $2, $4, $[[T5]]
; 32R6: srav $[[T0:[0-9]+]], $4, $7
; 32R6: andi $[[T1:[0-9]+]], $7, 32
@@ -119,9 +131,7 @@ entry:
; 32R6: jr $ra
; 32R6: or $3, $[[T0]], $[[T11]]
- ; FIXME: The sll instruction below is redundant.
- ; GP64: sll $[[T0:[0-9]+]], $5, 0
- ; GP64: dsrav $2, $4, $[[T0]]
+ ; GP64: dsrav $2, $4, $5
%r = ashr i64 %a, %b
ret i64 %r
@@ -134,11 +144,11 @@ entry:
; GP32: lw $25, %call16(__ashrti3)($gp)
; M3: sll $[[T0:[0-9]+]], $7, 0
- ; M3: dsrav $[[T1:[0-9]+]], $4, $[[T0]]
+ ; M3: dsrav $[[T1:[0-9]+]], $4, $7
; M3: andi $[[T2:[0-9]+]], $[[T0]], 64
; M3: bnez $[[T3:[0-9]+]], $[[BB0:BB[0-9_]+]]
; M3: move $3, $[[T1]]
- ; M3: dsrlv $[[T4:[0-9]+]], $5, $[[T0]]
+ ; M3: dsrlv $[[T4:[0-9]+]], $5, $7
; M3: dsll $[[T5:[0-9]+]], $4, 1
; M3: not $[[T6:[0-9]+]], $[[T0]]
; M3: dsllv $[[T7:[0-9]+]], $[[T5]], $[[T6]]
@@ -151,35 +161,34 @@ entry:
; M3: jr $ra
; M3: nop
- ; GP64-NOT-R6: sll $[[T0:[0-9]+]], $7, 0
- ; GP64-NOT-R6: dsrlv $[[T1:[0-9]+]], $5, $[[T0]]
- ; GP64-NOT-R6: dsll $[[T2:[0-9]+]], $4, 1
- ; GP64-NOT-R6: not $[[T3:[0-9]+]], $[[T0]]
- ; GP64-NOT-R6: dsllv $[[T4:[0-9]+]], $[[T2]], $[[T3]]
- ; GP64-NOT-R6: or $3, $[[T4]], $[[T1]]
- ; GP64-NOT-R6: dsrav $2, $4, $[[T0]]
- ; GP64-NOT-R6: andi $[[T5:[0-9]+]], $[[T0]], 64
-
+ ; GP64-NOT-R6: dsrlv $[[T0:[0-9]+]], $5, $7
+ ; GP64-NOT-R6: dsll $[[T1:[0-9]+]], $4, 1
+ ; GP64-NOT-R6: sll $[[T2:[0-9]+]], $7, 0
+ ; GP64-NOT-R6: not $[[T3:[0-9]+]], $[[T2]]
+ ; GP64-NOT-R6: dsllv $[[T4:[0-9]+]], $[[T1]], $[[T3]]
+ ; GP64-NOT-R6: or $3, $[[T4]], $[[T0]]
+ ; GP64-NOT-R6: dsrav $2, $4, $7
+ ; GP64-NOT-R6: andi $[[T5:[0-9]+]], $[[T2]], 64
; GP64-NOT-R6: movn $3, $2, $[[T5]]
; GP64-NOT-R6: dsra $[[T6:[0-9]+]], $4, 63
; GP64-NOT-R6: jr $ra
; GP64-NOT-R6: movn $2, $[[T6]], $[[T5]]
- ; 64R6: sll $[[T0:[0-9]+]], $7, 0
- ; 64R6: dsrav $[[T1:[0-9]+]], $4, $[[T0]]
- ; 64R6: andi $[[T2:[0-9]+]], $[[T0]], 64
+ ; 64R6: dsrav $[[T0:[0-9]+]], $4, $7
+ ; 64R6: sll $[[T1:[0-9]+]], $7, 0
+ ; 64R6: andi $[[T2:[0-9]+]], $[[T1]], 64
; 64R6: sll $[[T3:[0-9]+]], $[[T2]], 0
- ; 64R6: seleqz $[[T4:[0-9]+]], $[[T1]], $[[T3]]
+ ; 64R6: seleqz $[[T4:[0-9]+]], $[[T0]], $[[T3]]
; 64R6: dsra $[[T5:[0-9]+]], $4, 63
; 64R6: selnez $[[T6:[0-9]+]], $[[T5]], $[[T3]]
; 64R6: or $2, $[[T6]], $[[T4]]
- ; 64R6: dsrlv $[[T7:[0-9]+]], $5, $[[T0]]
+ ; 64R6: dsrlv $[[T7:[0-9]+]], $5, $7
; 64R6: dsll $[[T8:[0-9]+]], $4, 1
- ; 64R6: not $[[T9:[0-9]+]], $[[T0]]
+ ; 64R6: not $[[T9:[0-9]+]], $[[T1]]
; 64R6: dsllv $[[T10:[0-9]+]], $[[T8]], $[[T9]]
; 64R6: or $[[T11:[0-9]+]], $[[T10]], $[[T7]]
; 64R6: seleqz $[[T12:[0-9]+]], $[[T11]], $[[T3]]
- ; 64R6: selnez $[[T13:[0-9]+]], $[[T1]], $[[T3]]
+ ; 64R6: selnez $[[T13:[0-9]+]], $[[T0]], $[[T3]]
; 64R6: jr $ra
; 64R6: or $3, $[[T13]], $[[T12]]
diff --git a/test/CodeGen/Mips/llvm-ir/call.ll b/test/CodeGen/Mips/llvm-ir/call.ll
index 4cbf43cae28e..112ab8ee8c7f 100644
--- a/test/CodeGen/Mips/llvm-ir/call.ll
+++ b/test/CodeGen/Mips/llvm-ir/call.ll
@@ -3,10 +3,14 @@
; FIXME: We should remove the need for -enable-mips-tail-calls
; RUN: llc -march=mips -mcpu=mips32 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
; RUN: llc -march=mips -mcpu=mips32r2 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
+; RUN: llc -march=mips -mcpu=mips32r3 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
+; RUN: llc -march=mips -mcpu=mips32r5 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
; RUN: llc -march=mips -mcpu=mips32r6 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
; RUN: llc -march=mips64 -mcpu=mips4 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
; RUN: llc -march=mips64 -mcpu=mips64 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
; RUN: llc -march=mips64 -mcpu=mips64r2 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
+; RUN: llc -march=mips64 -mcpu=mips64r3 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
+; RUN: llc -march=mips64 -mcpu=mips64r5 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
; RUN: llc -march=mips64 -mcpu=mips64r6 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
declare void @extern_void_void()
diff --git a/test/CodeGen/Mips/llvm-ir/indirectbr.ll b/test/CodeGen/Mips/llvm-ir/indirectbr.ll
index d8fd78774553..debfeb35b213 100644
--- a/test/CodeGen/Mips/llvm-ir/indirectbr.ll
+++ b/test/CodeGen/Mips/llvm-ir/indirectbr.ll
@@ -2,10 +2,14 @@
; RUN: llc -march=mips -mcpu=mips32 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6
; RUN: llc -march=mips -mcpu=mips32r2 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6
+; RUN: llc -march=mips -mcpu=mips32r3 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6
+; RUN: llc -march=mips -mcpu=mips32r5 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6
; RUN: llc -march=mips -mcpu=mips32r6 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=R6
; RUN: llc -march=mips64 -mcpu=mips4 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6
; RUN: llc -march=mips64 -mcpu=mips64 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6
; RUN: llc -march=mips64 -mcpu=mips64r2 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6
+; RUN: llc -march=mips64 -mcpu=mips64r3 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6
+; RUN: llc -march=mips64 -mcpu=mips64r5 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOT-R6
; RUN: llc -march=mips64 -mcpu=mips64r6 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=R6
define i32 @br(i8 *%addr) {
diff --git a/test/CodeGen/Mips/llvm-ir/lshr.ll b/test/CodeGen/Mips/llvm-ir/lshr.ll
index 59f4330dde6c..3a7029fa5b7a 100644
--- a/test/CodeGen/Mips/llvm-ir/lshr.ll
+++ b/test/CodeGen/Mips/llvm-ir/lshr.ll
@@ -1,30 +1,42 @@
; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP32 \
-; RUN: -check-prefix=M2 -check-prefix=NOT-R2-R6
+; RUN: -check-prefix=M2
; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
-; RUN: -check-prefix=ALL -check-prefix=GP32 -check-prefix=NOT-R2-R6 \
-; RUN: -check-prefix=32R1-R2
+; RUN: -check-prefix=ALL -check-prefix=GP32 \
+; RUN: -check-prefix=32R1-R5
; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP32 \
-; RUN: -check-prefix=32R1-R2 -check-prefix=R2-R6
+; RUN: -check-prefix=32R1-R5
+; RUN: llc < %s -march=mips -mcpu=mips32r3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32 \
+; RUN: -check-prefix=32R1-R5
+; RUN: llc < %s -march=mips -mcpu=mips32r5 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32 \
+; RUN: -check-prefix=32R1-R5
; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP32 \
-; RUN: -check-prefix=32R6 -check-prefix=R2-R6
+; RUN: -check-prefix=32R6
; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP64 \
-; RUN: -check-prefix=M3 -check-prefix=NOT-R2-R6
+; RUN: -check-prefix=M3
; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP64 \
-; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R2-R6
+; RUN: -check-prefix=GP64-NOT-R6
; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP64 \
-; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R2-R6
+; RUN: -check-prefix=GP64-NOT-R6
; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP64 \
-; RUN: -check-prefix=GP64-NOT-R6 -check-prefix R2-R6
+; RUN: -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=GP64-NOT-R6
; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP64 \
-; RUN: -check-prefix=64R6 -check-prefix=R2-R6
+; RUN: -check-prefix=64R6
define signext i1 @lshr_i1(i1 signext %a, i1 signext %b) {
entry:
@@ -89,16 +101,16 @@ entry:
; M2: jr $ra
; M2: nop
- ; 32R1-R2: srlv $[[T0:[0-9]+]], $5, $7
- ; 32R1-R2: not $[[T1:[0-9]+]], $7
- ; 32R1-R2: sll $[[T2:[0-9]+]], $4, 1
- ; 32R1-R2: sllv $[[T3:[0-9]+]], $[[T2]], $[[T1]]
- ; 32R1-R2: or $3, $[[T3]], $[[T0]]
- ; 32R1-R2: srlv $[[T4:[0-9]+]], $4, $7
- ; 32R1-R2: andi $[[T5:[0-9]+]], $7, 32
- ; 32R1-R2: movn $3, $[[T4]], $[[T5]]
- ; 32R1-R2: jr $ra
- ; 32R1-R2: movn $2, $zero, $[[T5]]
+ ; 32R1-R5: srlv $[[T0:[0-9]+]], $5, $7
+ ; 32R1-R5: not $[[T1:[0-9]+]], $7
+ ; 32R1-R5: sll $[[T2:[0-9]+]], $4, 1
+ ; 32R1-R5: sllv $[[T3:[0-9]+]], $[[T2]], $[[T1]]
+ ; 32R1-R5: or $3, $[[T3]], $[[T0]]
+ ; 32R1-R5: srlv $[[T4:[0-9]+]], $4, $7
+ ; 32R1-R5: andi $[[T5:[0-9]+]], $7, 32
+ ; 32R1-R5: movn $3, $[[T4]], $[[T5]]
+ ; 32R1-R5: jr $ra
+ ; 32R1-R5: movn $2, $zero, $[[T5]]
; 32R6: srlv $[[T0:[0-9]+]], $5, $7
; 32R6: not $[[T1:[0-9]+]], $7
@@ -113,8 +125,7 @@ entry:
; 32R6: jr $ra
; 32R6: seleqz $2, $[[T7]], $[[T5]]
- ; GP64: sll $[[T0:[0-9]+]], $5, 0
- ; GP64: dsrlv $2, $4, $[[T0]]
+ ; GP64: dsrlv $2, $4, $5
%r = lshr i64 %a, %b
ret i64 %r
@@ -127,11 +138,11 @@ entry:
; GP32: lw $25, %call16(__lshrti3)($gp)
; M3: sll $[[T0:[0-9]+]], $7, 0
- ; M3: dsrlv $[[T1:[0-9]+]], $4, $[[T0]]
+ ; M3: dsrlv $[[T1:[0-9]+]], $4, $7
; M3: andi $[[T2:[0-9]+]], $[[T0]], 64
; M3: bnez $[[T3:[0-9]+]], $[[BB0:BB[0-9_]+]]
; M3: move $3, $[[T1]]
- ; M3: dsrlv $[[T4:[0-9]+]], $5, $[[T0]]
+ ; M3: dsrlv $[[T4:[0-9]+]], $5, $7
; M3: dsll $[[T5:[0-9]+]], $4, 1
; M3: not $[[T6:[0-9]+]], $[[T0]]
; M3: dsllv $[[T7:[0-9]+]], $[[T5]], $[[T6]]
@@ -144,32 +155,32 @@ entry:
; M3: jr $ra
; M3: nop
- ; GP64-NOT-R6: sll $[[T0:[0-9]+]], $7, 0
- ; GP64-NOT-R6: dsrlv $[[T1:[0-9]+]], $5, $[[T0]]
- ; GP64-NOT-R6: dsll $[[T2:[0-9]+]], $4, 1
- ; GP64-NOT-R6: not $[[T3:[0-9]+]], $[[T0]]
- ; GP64-NOT-R6: dsllv $[[T4:[0-9]+]], $[[T2]], $[[T3]]
- ; GP64-NOT-R6: or $3, $[[T4]], $[[T1]]
- ; GP64-NOT-R6: dsrlv $2, $4, $[[T0]]
- ; GP64-NOT-R6: andi $[[T5:[0-9]+]], $[[T0]], 64
+ ; GP64-NOT-R6: dsrlv $[[T0:[0-9]+]], $5, $7
+ ; GP64-NOT-R6: dsll $[[T1:[0-9]+]], $4, 1
+ ; GP64-NOT-R6: sll $[[T2:[0-9]+]], $7, 0
+ ; GP64-NOT-R6: not $[[T3:[0-9]+]], $[[T2]]
+ ; GP64-NOT-R6: dsllv $[[T4:[0-9]+]], $[[T1]], $[[T3]]
+ ; GP64-NOT-R6: or $3, $[[T4]], $[[T0]]
+ ; GP64-NOT-R6: dsrlv $2, $4, $7
+ ; GP64-NOT-R6: andi $[[T5:[0-9]+]], $[[T2]], 64
; GP64-NOT-R6: movn $3, $2, $[[T5]]
; GP64-NOT-R6: jr $ra
; GP64-NOT-R6: movn $2, $zero, $1
- ; 64R6: sll $[[T0:[0-9]+]], $7, 0
- ; 64R6: dsrlv $[[T1:[0-9]+]], $5, $[[T0]]
- ; 64R6: dsll $[[T2:[0-9]+]], $4, 1
- ; 64R6: not $[[T3:[0-9]+]], $[[T0]]
- ; 64R6: dsllv $[[T4:[0-9]+]], $[[T2]], $[[T3]]
- ; 64R6: or $[[T5:[0-9]+]], $[[T4]], $[[T1]]
- ; 64R6: andi $[[T6:[0-9]+]], $[[T0]], 64
+ ; 64R6: dsrlv $[[T0:[0-9]+]], $5, $7
+ ; 64R6: dsll $[[T1:[0-9]+]], $4, 1
+ ; 64R6: sll $[[T2:[0-9]+]], $7, 0
+ ; 64R6: not $[[T3:[0-9]+]], $[[T2]]
+ ; 64R6: dsllv $[[T4:[0-9]+]], $[[T1]], $[[T3]]
+ ; 64R6: or $[[T5:[0-9]+]], $[[T4]], $[[T0]]
+ ; 64R6: andi $[[T6:[0-9]+]], $[[T2]], 64
; 64R6: sll $[[T7:[0-9]+]], $[[T6]], 0
; 64R6: seleqz $[[T8:[0-9]+]], $[[T5]], $[[T7]]
- ; 64R6: dsrlv $[[T9:[0-9]+]], $4, $[[T0]]
+ ; 64R6: dsrlv $[[T9:[0-9]+]], $4, $7
; 64R6: selnez $[[T10:[0-9]+]], $[[T9]], $[[T7]]
; 64R6: or $3, $[[T10]], $[[T8]]
; 64R6: jr $ra
- ; 64R6: seleqz $2, $[[T0]], $[[T7]]
+ ; 64R6: seleqz $2, $[[T9]], $[[T7]]
%r = lshr i128 %a, %b
ret i128 %r
diff --git a/test/CodeGen/Mips/llvm-ir/mul.ll b/test/CodeGen/Mips/llvm-ir/mul.ll
index 5f7f338c7789..a7582805dd74 100644
--- a/test/CodeGen/Mips/llvm-ir/mul.ll
+++ b/test/CodeGen/Mips/llvm-ir/mul.ll
@@ -1,17 +1,25 @@
; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s -check-prefix=ALL \
; RUN: -check-prefix=M2 -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s -check-prefix=ALL \
-; RUN: -check-prefix=32R1-R2 -check-prefix=GP32
+; RUN: -check-prefix=32R1-R5 -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s -check-prefix=ALL \
-; RUN: -check-prefix=32R1-R2 -check-prefix=32R2 -check-prefix=GP32
+; RUN: -check-prefix=32R1-R5 -check-prefix=32R2-R5 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r3 | FileCheck %s -check-prefix=ALL \
+; RUN: -check-prefix=32R1-R5 -check-prefix=32R2-R5 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r5 | FileCheck %s -check-prefix=ALL \
+; RUN: -check-prefix=32R1-R5 -check-prefix=32R2-R5 -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s -check-prefix=ALL \
; RUN: -check-prefix=32R6 -check-prefix=GP32
; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s -check-prefix=ALL \
; RUN: -check-prefix=M4 -check-prefix=GP64-NOT-R6
; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s -check-prefix=ALL \
-; RUN: -check-prefix=64R1-R2 -check-prefix=GP64-NOT-R6
+; RUN: -check-prefix=64R1-R5 -check-prefix=GP64-NOT-R6
; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s -check-prefix=ALL \
-; RUN: -check-prefix=64R1-R2 -check-prefix=GP64 -check-prefix=GP64-NOT-R6
+; RUN: -check-prefix=64R1-R5 -check-prefix=GP64 -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s -check-prefix=ALL \
+; RUN: -check-prefix=64R1-R5 -check-prefix=GP64 -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s -check-prefix=ALL \
+; RUN: -check-prefix=64R1-R5 -check-prefix=GP64 -check-prefix=GP64-NOT-R6
; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s -check-prefix=ALL \
; RUN: -check-prefix=64R6
@@ -24,9 +32,9 @@ entry:
; M2: sll $[[T0]], $[[T0]], 31
; M2: sra $2, $[[T0]], 31
- ; 32R1-R2: mul $[[T0:[0-9]+]], $4, $5
- ; 32R1-R2: sll $[[T0]], $[[T0]], 31
- ; 32R1-R2: sra $2, $[[T0]], 31
+ ; 32R1-R5: mul $[[T0:[0-9]+]], $4, $5
+ ; 32R1-R5: sll $[[T0]], $[[T0]], 31
+ ; 32R1-R5: sra $2, $[[T0]], 31
; 32R6: mul $[[T0:[0-9]+]], $4, $5
; 32R6: sll $[[T0]], $[[T0]], 31
@@ -37,9 +45,9 @@ entry:
; M4: sll $[[T0]], $[[T0]], 31
; M4: sra $2, $[[T0]], 31
- ; 64R1-R2: mul $[[T0:[0-9]+]], $4, $5
- ; 64R1-R2: sll $[[T0]], $[[T0]], 31
- ; 64R1-R2: sra $2, $[[T0]], 31
+ ; 64R1-R5: mul $[[T0:[0-9]+]], $4, $5
+ ; 64R1-R5: sll $[[T0]], $[[T0]], 31
+ ; 64R1-R5: sra $2, $[[T0]], 31
; 64R6: mul $[[T0:[0-9]+]], $4, $5
; 64R6: sll $[[T0]], $[[T0]], 31
@@ -62,8 +70,8 @@ entry:
; 32R1: sll $[[T0]], $[[T0]], 24
; 32R1: sra $2, $[[T0]], 24
- ; 32R2: mul $[[T0:[0-9]+]], $4, $5
- ; 32R2: seb $2, $[[T0]]
+ ; 32R2-R5: mul $[[T0:[0-9]+]], $4, $5
+ ; 32R2-R5: seb $2, $[[T0]]
; 32R6: mul $[[T0:[0-9]+]], $4, $5
; 32R6: seb $2, $[[T0]]
@@ -99,8 +107,8 @@ entry:
; 32R1: sll $[[T0]], $[[T0]], 16
; 32R1: sra $2, $[[T0]], 16
- ; 32R2: mul $[[T0:[0-9]+]], $4, $5
- ; 32R2: seh $2, $[[T0]]
+ ; 32R2-R5: mul $[[T0:[0-9]+]], $4, $5
+ ; 32R2-R5: seh $2, $[[T0]]
; 32R6: mul $[[T0:[0-9]+]], $4, $5
; 32R6: seh $2, $[[T0]]
@@ -130,10 +138,10 @@ entry:
; M2: mult $4, $5
; M2: mflo $2
- ; 32R1-R2: mul $2, $4, $5
+ ; 32R1-R5: mul $2, $4, $5
; 32R6: mul $2, $4, $5
- ; 64R1-R2: mul $2, $4, $5
+ ; 64R1-R5: mul $2, $4, $5
; 64R6: mul $2, $4, $5
%r = mul i32 %a, %b
ret i32 %r
@@ -153,13 +161,13 @@ entry:
; M2: addu $[[T2:[0-9]+]], $4, $[[T1]]
; M2: addu $2, $[[T2]], $[[T0]]
- ; 32R1-R2: multu $5, $7
- ; 32R1-R2: mflo $3
- ; 32R1-R2: mfhi $[[T0:[0-9]+]]
- ; 32R1-R2: mul $[[T1:[0-9]+]], $4, $7
- ; 32R1-R2: mul $[[T2:[0-9]+]], $5, $6
- ; 32R1-R2: addu $[[T0]], $[[T0]], $[[T2:[0-9]+]]
- ; 32R1-R2: addu $2, $[[T0]], $[[T1]]
+ ; 32R1-R5: multu $5, $7
+ ; 32R1-R5: mflo $3
+ ; 32R1-R5: mfhi $[[T0:[0-9]+]]
+ ; 32R1-R5: mul $[[T1:[0-9]+]], $4, $7
+ ; 32R1-R5: mul $[[T2:[0-9]+]], $5, $6
+ ; 32R1-R5: addu $[[T0]], $[[T0]], $[[T2:[0-9]+]]
+ ; 32R1-R5: addu $2, $[[T0]], $[[T1]]
; 32R6: mul $[[T0:[0-9]+]], $5, $6
; 32R6: muhu $[[T1:[0-9]+]], $5, $7
@@ -171,8 +179,8 @@ entry:
; M4: dmult $4, $5
; M4: mflo $2
- ; 64R1-R2: dmult $4, $5
- ; 64R1-R2: mflo $2
+ ; 64R1-R5: dmult $4, $5
+ ; 64R1-R5: mflo $2
; 64R6: dmul $2, $4, $5
diff --git a/test/CodeGen/Mips/llvm-ir/or.ll b/test/CodeGen/Mips/llvm-ir/or.ll
index 21d1d4fca2a3..6215e4036325 100644
--- a/test/CodeGen/Mips/llvm-ir/or.ll
+++ b/test/CodeGen/Mips/llvm-ir/or.ll
@@ -4,6 +4,10 @@
; RUN: -check-prefix=ALL -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r5 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP32
; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
@@ -14,6 +18,10 @@
; RUN: -check-prefix=ALL -check-prefix=GP64
; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP64
@@ -51,11 +59,7 @@ define signext i32 @or_i32(i32 signext %a, i32 signext %b) {
entry:
; ALL-LABEL: or_i32:
- ; GP32: or $2, $4, $5
-
- ; GP64: or $[[T0:[0-9]+]], $4, $5
- ; FIXME: The sll instruction below is redundant.
- ; GP64: sll $2, $[[T0]], 0
+ ; ALL: or $2, $4, $5
%r = or i32 %a, %b
ret i32 %r
diff --git a/test/CodeGen/Mips/llvm-ir/ret.ll b/test/CodeGen/Mips/llvm-ir/ret.ll
index 8f5b1159760c..0561c24219ce 100644
--- a/test/CodeGen/Mips/llvm-ir/ret.ll
+++ b/test/CodeGen/Mips/llvm-ir/ret.ll
@@ -9,10 +9,14 @@
; RUN: llc -march=mips -mcpu=mips32 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR32 -check-prefix=NO-MTHC1 -check-prefix=NOT-R6
; RUN: llc -march=mips -mcpu=mips32r2 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR32 -check-prefix=MTHC1 -check-prefix=NOT-R6
+; RUN: llc -march=mips -mcpu=mips32r3 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR32 -check-prefix=MTHC1 -check-prefix=NOT-R6
+; RUN: llc -march=mips -mcpu=mips32r5 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR32 -check-prefix=MTHC1 -check-prefix=NOT-R6
; RUN: llc -march=mips -mcpu=mips32r6 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR32 -check-prefix=MTHC1 -check-prefix=R6
; RUN: llc -march=mips64 -mcpu=mips4 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR64 -check-prefix=DMTC1 -check-prefix=NOT-R6
; RUN: llc -march=mips64 -mcpu=mips64 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR64 -check-prefix=DMTC1 -check-prefix=NOT-R6
; RUN: llc -march=mips64 -mcpu=mips64r2 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR64 -check-prefix=DMTC1 -check-prefix=NOT-R6
+; RUN: llc -march=mips64 -mcpu=mips64r3 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR64 -check-prefix=DMTC1 -check-prefix=NOT-R6
+; RUN: llc -march=mips64 -mcpu=mips64r5 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR64 -check-prefix=DMTC1 -check-prefix=NOT-R6
; RUN: llc -march=mips64 -mcpu=mips64r6 -asm-show-inst < %s | FileCheck %s -check-prefix=ALL -check-prefix=GPR64 -check-prefix=DMTC1 -check-prefix=R6
define void @ret_void() {
diff --git a/test/CodeGen/Mips/llvm-ir/sdiv.ll b/test/CodeGen/Mips/llvm-ir/sdiv.ll
index 54b7f70b1dac..929ee88bb7f7 100644
--- a/test/CodeGen/Mips/llvm-ir/sdiv.ll
+++ b/test/CodeGen/Mips/llvm-ir/sdiv.ll
@@ -3,7 +3,11 @@
; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
; RUN: -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
-; RUN: -check-prefix=NOT-R6 -check-prefix=R2 -check-prefix=GP32
+; RUN: -check-prefix=NOT-R6 -check-prefix=R2-R5 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r3 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=R2-R5 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r5 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=R2-R5 -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
; RUN: -check-prefix=R6 -check-prefix=GP32
; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
@@ -13,7 +17,11 @@
; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
; RUN: -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6 -check-prefix=GP64-NOT-R6
; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
-; RUN: -check-prefix=NOT-R6 -check-prefix=R2 -check-prefix=GP64-NOT-R6
+; RUN: -check-prefix=NOT-R6 -check-prefix=R2-R5 -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=R2-R5 -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=R2-R5 -check-prefix=GP64-NOT-R6
; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
; RUN: -check-prefix=R6 -check-prefix=64R6
@@ -49,11 +57,11 @@ entry:
; NOT-R2-R6: sll $[[T1:[0-9]+]], $[[T0]], 24
; NOT-R2-R6: sra $2, $[[T1]], 24
- ; R2: div $zero, $4, $5
- ; R2: teq $5, $zero, 7
- ; R2: mflo $[[T0:[0-9]+]]
+ ; R2-R5: div $zero, $4, $5
+ ; R2-R5: teq $5, $zero, 7
+ ; R2-R5: mflo $[[T0:[0-9]+]]
; FIXME: This instruction is redundant.
- ; R2: seb $2, $[[T0]]
+ ; R2-R5: seb $2, $[[T0]]
; R6: div $[[T0:[0-9]+]], $4, $5
; R6: teq $5, $zero, 7
@@ -75,11 +83,11 @@ entry:
; NOT-R2-R6: sll $[[T1:[0-9]+]], $[[T0]], 16
; NOT-R2-R6: sra $2, $[[T1]], 16
- ; R2: div $zero, $4, $5
- ; R2: teq $5, $zero, 7
- ; R2: mflo $[[T0:[0-9]+]]
+ ; R2-R5: div $zero, $4, $5
+ ; R2-R5: teq $5, $zero, 7
+ ; R2-R5: mflo $[[T0:[0-9]+]]
; FIXME: This is instruction is redundant since div is signed.
- ; R2: seh $2, $[[T0]]
+ ; R2-R5: seh $2, $[[T0]]
; R6: div $[[T0:[0-9]+]], $4, $5
; R6: teq $5, $zero, 7
diff --git a/test/CodeGen/Mips/llvm-ir/select.ll b/test/CodeGen/Mips/llvm-ir/select.ll
index 736bc579088d..f17670adca33 100644
--- a/test/CodeGen/Mips/llvm-ir/select.ll
+++ b/test/CodeGen/Mips/llvm-ir/select.ll
@@ -5,7 +5,13 @@
; RUN: -check-prefix=CMOV-32 -check-prefix=CMOV-32R1
; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=CMOV \
-; RUN: -check-prefix=CMOV-32 -check-prefix=CMOV-32R2
+; RUN: -check-prefix=CMOV-32 -check-prefix=CMOV-32R2-R5
+; RUN: llc < %s -march=mips -mcpu=mips32r3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=CMOV \
+; RUN: -check-prefix=CMOV-32 -check-prefix=CMOV-32R2-R5
+; RUN: llc < %s -march=mips -mcpu=mips32r5 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=CMOV \
+; RUN: -check-prefix=CMOV-32 -check-prefix=CMOV-32R2-R5
; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=SEL -check-prefix=SEL-32
; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
@@ -16,6 +22,10 @@
; RUN: -check-prefix=ALL -check-prefix=CMOV -check-prefix=CMOV-64
; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=CMOV -check-prefix=CMOV-64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=CMOV -check-prefix=CMOV-64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=CMOV -check-prefix=CMOV-64
; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=SEL -check-prefix=SEL-64
@@ -232,12 +242,12 @@ entry:
; M2: jr $ra
; M2: mtc1 $6, $f1
- ; CMOV-32: mtc1 $7, $[[F0:f[0-9]+]]
- ; CMOV-32R1: mtc1 $6, $f{{[0-9]+}}
- ; CMOV-32R2 mthc1 $6, $[[F0]]
- ; CMOV-32: andi $[[T0:[0-9]+]], $4, 1
- ; CMOV-32: ldc1 $f0, 16($sp)
- ; CMOV-32: movn.d $f0, $[[F0]], $[[T0]]
+ ; CMOV-32: mtc1 $7, $[[F0:f[0-9]+]]
+ ; CMOV-32R1: mtc1 $6, $f{{[0-9]+}}
+ ; CMOV-32R2-R5: mthc1 $6, $[[F0]]
+ ; CMOV-32: andi $[[T0:[0-9]+]], $4, 1
+ ; CMOV-32: ldc1 $f0, 16($sp)
+ ; CMOV-32: movn.d $f0, $[[F0]], $[[T0]]
; SEL-32: mtc1 $7, $[[F0:f[0-9]+]]
; SEL-32: mthc1 $6, $[[F0]]
diff --git a/test/CodeGen/Mips/llvm-ir/shl.ll b/test/CodeGen/Mips/llvm-ir/shl.ll
index fc5243cc97f2..bba34c47ea82 100644
--- a/test/CodeGen/Mips/llvm-ir/shl.ll
+++ b/test/CodeGen/Mips/llvm-ir/shl.ll
@@ -3,10 +3,16 @@
; RUN: -check-prefix=M2 -check-prefix=NOT-R2-R6
; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP32 -check-prefix=NOT-R2-R6 \
-; RUN: -check-prefix=32R1-R2
+; RUN: -check-prefix=32R1-R5
; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP32 \
-; RUN: -check-prefix=32R1-R2 -check-prefix=R2-R6
+; RUN: -check-prefix=32R1-R5 -check-prefix=R2-R6
+; RUN: llc < %s -march=mips -mcpu=mips32r3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32 \
+; RUN: -check-prefix=32R1-R5 -check-prefix=R2-R6
+; RUN: llc < %s -march=mips -mcpu=mips32r5 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32 \
+; RUN: -check-prefix=32R1-R5 -check-prefix=R2-R6
; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP32 \
; RUN: -check-prefix=32R6 -check-prefix=R2-R6
@@ -22,6 +28,12 @@
; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP64 \
; RUN: -check-prefix=GP64-NOT-R6 -check-prefix R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix R2-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64 \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix R2-R6
; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP64 \
; RUN: -check-prefix=64R6 -check-prefix=R2-R6
@@ -101,16 +113,16 @@ entry:
; M2: jr $ra
; M2: nop
- ; 32R1-R2: sllv $[[T0:[0-9]+]], $4, $7
- ; 32R1-R2: not $[[T1:[0-9]+]], $7
- ; 32R1-R2: srl $[[T2:[0-9]+]], $5, 1
- ; 32R1-R2: srlv $[[T3:[0-9]+]], $[[T2]], $[[T1]]
- ; 32R1-R2: or $2, $[[T0]], $[[T3]]
- ; 32R1-R2: sllv $[[T4:[0-9]+]], $5, $7
- ; 32R1-R2: andi $[[T5:[0-9]+]], $7, 32
- ; 32R1-R2: movn $2, $[[T4]], $[[T5]]
- ; 32R1-R2: jr $ra
- ; 32R1-R2: movn $3, $zero, $[[T5]]
+ ; 32R1-R5: sllv $[[T0:[0-9]+]], $4, $7
+ ; 32R1-R5: not $[[T1:[0-9]+]], $7
+ ; 32R1-R5: srl $[[T2:[0-9]+]], $5, 1
+ ; 32R1-R5: srlv $[[T3:[0-9]+]], $[[T2]], $[[T1]]
+ ; 32R1-R5: or $2, $[[T0]], $[[T3]]
+ ; 32R1-R5: sllv $[[T4:[0-9]+]], $5, $7
+ ; 32R1-R5: andi $[[T5:[0-9]+]], $7, 32
+ ; 32R1-R5: movn $2, $[[T4]], $[[T5]]
+ ; 32R1-R5: jr $ra
+ ; 32R1-R5: movn $3, $zero, $[[T5]]
; 32R6: sllv $[[T0:[0-9]+]], $4, $7
; 32R6: not $[[T1:[0-9]+]], $7
@@ -125,8 +137,7 @@ entry:
; 32R6: jr $ra
; 32R6: seleqz $3, $[[T7]], $[[T5]]
- ; GP64: sll $[[T0:[0-9]+]], $5, 0
- ; GP64: dsllv $2, $4, $1
+ ; GP64: dsllv $2, $4, $5
%r = shl i64 %a, %b
ret i64 %r
@@ -139,11 +150,11 @@ entry:
; GP32: lw $25, %call16(__ashlti3)($gp)
; M3: sll $[[T0:[0-9]+]], $7, 0
- ; M3: dsllv $[[T1:[0-9]+]], $5, $[[T0]]
+ ; M3: dsllv $[[T1:[0-9]+]], $5, $7
; M3: andi $[[T2:[0-9]+]], $[[T0]], 64
; M3: bnez $[[T3:[0-9]+]], $[[BB0:BB[0-9_]+]]
; M3: move $2, $[[T1]]
- ; M3: dsllv $[[T4:[0-9]+]], $4, $[[T0]]
+ ; M3: dsllv $[[T4:[0-9]+]], $4, $7
; M3: dsrl $[[T5:[0-9]+]], $5, 1
; M3: not $[[T6:[0-9]+]], $[[T0]]
; M3: dsrlv $[[T7:[0-9]+]], $[[T5]], $[[T6]]
@@ -156,32 +167,32 @@ entry:
; M3: jr $ra
; M3: nop
- ; GP64-NOT-R6: sll $[[T0:[0-9]+]], $7, 0
- ; GP64-NOT-R6: dsllv $[[T1:[0-9]+]], $4, $[[T0]]
- ; GP64-NOT-R6: dsrl $[[T2:[0-9]+]], $5, 1
- ; GP64-NOT-R6: not $[[T3:[0-9]+]], $[[T0]]
- ; GP64-NOT-R6: dsrlv $[[T4:[0-9]+]], $[[T2]], $[[T3]]
- ; GP64-NOT-R6: or $2, $[[T1]], $[[T4]]
- ; GP64-NOT-R6: dsllv $3, $5, $[[T0]]
- ; GP64-NOT-R6: andi $[[T5:[0-9]+]], $[[T0]], 64
+ ; GP64-NOT-R6: dsllv $[[T0:[0-9]+]], $4, $7
+ ; GP64-NOT-R6: dsrl $[[T1:[0-9]+]], $5, 1
+ ; GP64-NOT-R6: sll $[[T2:[0-9]+]], $7, 0
+ ; GP64-NOT-R6: not $[[T3:[0-9]+]], $[[T2]]
+ ; GP64-NOT-R6: dsrlv $[[T4:[0-9]+]], $[[T1]], $[[T3]]
+ ; GP64-NOT-R6: or $2, $[[T0]], $[[T4]]
+ ; GP64-NOT-R6: dsllv $3, $5, $7
+ ; GP64-NOT-R6: andi $[[T5:[0-9]+]], $[[T2]], 64
; GP64-NOT-R6: movn $2, $3, $[[T5]]
; GP64-NOT-R6: jr $ra
; GP64-NOT-R6: movn $3, $zero, $1
- ; 64R6: sll $[[T0:[0-9]+]], $7, 0
- ; 64R6: dsllv $[[T1:[0-9]+]], $4, $[[T0]]
- ; 64R6: dsrl $[[T2:[0-9]+]], $5, 1
- ; 64R6: not $[[T3:[0-9]+]], $[[T0]]
- ; 64R6: dsrlv $[[T4:[0-9]+]], $[[T2]], $[[T3]]
- ; 64R6: or $[[T5:[0-9]+]], $[[T1]], $[[T4]]
- ; 64R6: andi $[[T6:[0-9]+]], $[[T0]], 64
+ ; 64R6: dsllv $[[T0:[0-9]+]], $4, $7
+ ; 64R6: dsrl $[[T1:[0-9]+]], $5, 1
+ ; 64R6: sll $[[T2:[0-9]+]], $7, 0
+ ; 64R6: not $[[T3:[0-9]+]], $[[T2]]
+ ; 64R6: dsrlv $[[T4:[0-9]+]], $[[T1]], $[[T3]]
+ ; 64R6: or $[[T5:[0-9]+]], $[[T0]], $[[T4]]
+ ; 64R6: andi $[[T6:[0-9]+]], $[[T2]], 64
; 64R6: sll $[[T7:[0-9]+]], $[[T6]], 0
; 64R6: seleqz $[[T8:[0-9]+]], $[[T5]], $[[T7]]
- ; 64R6: dsllv $[[T9:[0-9]+]], $5, $[[T0]]
+ ; 64R6: dsllv $[[T9:[0-9]+]], $5, $7
; 64R6: selnez $[[T10:[0-9]+]], $[[T9]], $[[T7]]
; 64R6: or $2, $[[T10]], $[[T8]]
; 64R6: jr $ra
- ; 64R6: seleqz $3, $[[T0]], $[[T7]]
+ ; 64R6: seleqz $3, $[[T9]], $[[T7]]
%r = shl i128 %a, %b
ret i128 %r
diff --git a/test/CodeGen/Mips/llvm-ir/srem.ll b/test/CodeGen/Mips/llvm-ir/srem.ll
index 1e949d24678b..ceb53ee7033a 100644
--- a/test/CodeGen/Mips/llvm-ir/srem.ll
+++ b/test/CodeGen/Mips/llvm-ir/srem.ll
@@ -3,7 +3,11 @@
; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
; RUN: -check-prefix=GP32 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6
; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s -check-prefix=GP32 \
-; RUN: -check-prefix=R2 -check-prefix=R2-R6 -check-prefix=NOT-R6
+; RUN: -check-prefix=R2-R5 -check-prefix=R2-R6 -check-prefix=NOT-R6
+; RUN: llc < %s -march=mips -mcpu=mips32r3 | FileCheck %s -check-prefix=GP32 \
+; RUN: -check-prefix=R2-R5 -check-prefix=R2-R6 -check-prefix=NOT-R6
+; RUN: llc < %s -march=mips -mcpu=mips32r5 | FileCheck %s -check-prefix=GP32 \
+; RUN: -check-prefix=R2-R5 -check-prefix=R2-R6 -check-prefix=NOT-R6
; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
; RUN: -check-prefix=GP32 -check-prefix=R6 -check-prefix=R2-R6
; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
@@ -13,7 +17,13 @@
; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6
; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
-; RUN: -check-prefix=R2 -check-prefix=R2-R6 \
+; RUN: -check-prefix=R2-R5 -check-prefix=R2-R6 \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s \
+; RUN: -check-prefix=R2-R5 -check-prefix=R2-R6 \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s \
+; RUN: -check-prefix=R2-R5 -check-prefix=R2-R6 \
; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6
; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
; RUN: -check-prefix=64R6 -check-prefix=R6 -check-prefix=R2-R6
@@ -47,10 +57,10 @@ entry:
; NOT-R2-R6: sll $[[T1:[0-9]+]], $[[T0]], 24
; NOT-R2-R6: sra $2, $[[T1]], 24
- ; R2: div $zero, $4, $5
- ; R2: teq $5, $zero, 7
- ; R2: mfhi $[[T0:[0-9]+]]
- ; R2: seb $2, $[[T0]]
+ ; R2-R5: div $zero, $4, $5
+ ; R2-R5: teq $5, $zero, 7
+ ; R2-R5: mfhi $[[T0:[0-9]+]]
+ ; R2-R5: seb $2, $[[T0]]
; R6: mod $[[T0:[0-9]+]], $4, $5
; R6: teq $5, $zero, 7
@@ -70,10 +80,10 @@ entry:
; NOT-R2-R6: sll $[[T1:[0-9]+]], $[[T0]], 16
; NOT-R2-R6: sra $2, $[[T1]], 16
- ; R2: div $zero, $4, $5
- ; R2: teq $5, $zero, 7
- ; R2: mfhi $[[T0:[0-9]+]]
- ; R2: seh $2, $[[T1]]
+ ; R2-R5: div $zero, $4, $5
+ ; R2-R5: teq $5, $zero, 7
+ ; R2-R5: mfhi $[[T0:[0-9]+]]
+ ; R2-R5: seh $2, $[[T1]]
; R6: mod $[[T0:[0-9]+]], $4, $5
; R6: teq $5, $zero, 7
diff --git a/test/CodeGen/Mips/llvm-ir/sub.ll b/test/CodeGen/Mips/llvm-ir/sub.ll
index 6d592be38211..164975844d73 100644
--- a/test/CodeGen/Mips/llvm-ir/sub.ll
+++ b/test/CodeGen/Mips/llvm-ir/sub.ll
@@ -4,6 +4,10 @@
; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r5 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP32
; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
@@ -14,6 +18,10 @@
; RUN: -check-prefix=ALL -check-prefix=NOT-R2-R6 -check-prefix=GP64
; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP64
; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=R2-R6 -check-prefix=GP64
diff --git a/test/CodeGen/Mips/llvm-ir/udiv.ll b/test/CodeGen/Mips/llvm-ir/udiv.ll
index 1f7aa0d5f4ce..a7cafe52d1ac 100644
--- a/test/CodeGen/Mips/llvm-ir/udiv.ll
+++ b/test/CodeGen/Mips/llvm-ir/udiv.ll
@@ -4,6 +4,10 @@
; RUN: -check-prefix=NOT-R6 -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
; RUN: -check-prefix=NOT-R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r3 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r5 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
; RUN: -check-prefix=R6 -check-prefix=GP32
; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
@@ -14,6 +18,10 @@
; RUN: -check-prefix=NOT-R6 -check-prefix=GP64-NOT-R6
; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
; RUN: -check-prefix=NOT-R6 -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=GP64-NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s \
+; RUN: -check-prefix=NOT-R6 -check-prefix=GP64-NOT-R6
; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
; RUN: -check-prefix=R6 -check-prefix=64R6
diff --git a/test/CodeGen/Mips/llvm-ir/urem.ll b/test/CodeGen/Mips/llvm-ir/urem.ll
index 73235341a42f..d5a231c8dfca 100644
--- a/test/CodeGen/Mips/llvm-ir/urem.ll
+++ b/test/CodeGen/Mips/llvm-ir/urem.ll
@@ -3,7 +3,11 @@
; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
; RUN: -check-prefix=GP32 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6
; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s -check-prefix=GP32 \
-; RUN: -check-prefix=R2 -check-prefix=R2-R6 -check-prefix=NOT-R6
+; RUN: -check-prefix=R2-R5 -check-prefix=R2-R6 -check-prefix=NOT-R6
+; RUN: llc < %s -march=mips -mcpu=mips32r3 | FileCheck %s -check-prefix=GP32 \
+; RUN: -check-prefix=R2-R5 -check-prefix=R2-R6 -check-prefix=NOT-R6
+; RUN: llc < %s -march=mips -mcpu=mips32r5 | FileCheck %s -check-prefix=GP32 \
+; RUN: -check-prefix=R2-R5 -check-prefix=R2-R6 -check-prefix=NOT-R6
; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
; RUN: -check-prefix=GP32 -check-prefix=R6 -check-prefix=R2-R6
; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
@@ -13,7 +17,13 @@
; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6 -check-prefix=NOT-R2-R6
; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
-; RUN: -check-prefix=R2 -check-prefix=R2-R6 \
+; RUN: -check-prefix=R2-R5 -check-prefix=R2-R6 \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s \
+; RUN: -check-prefix=R2-R5 -check-prefix=R2-R6 \
+; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s \
+; RUN: -check-prefix=R2-R5 -check-prefix=R2-R6 \
; RUN: -check-prefix=GP64-NOT-R6 -check-prefix=NOT-R6
; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
; RUN: -check-prefix=64R6 -check-prefix=R6 -check-prefix=R2-R6
@@ -53,12 +63,12 @@ entry:
; NOT-R2-R6: sll $[[T3:[0-9]+]], $[[T2]], 24
; NOT-R2-R6: sra $2, $[[T3]], 24
- ; R2: andi $[[T0:[0-9]+]], $5, 255
- ; R2: andi $[[T1:[0-9]+]], $4, 255
- ; R2: divu $zero, $[[T1]], $[[T0]]
- ; R2: teq $[[T0]], $zero, 7
- ; R2: mfhi $[[T2:[0-9]+]]
- ; R2: seb $2, $[[T2]]
+ ; R2-R5: andi $[[T0:[0-9]+]], $5, 255
+ ; R2-R5: andi $[[T1:[0-9]+]], $4, 255
+ ; R2-R5: divu $zero, $[[T1]], $[[T0]]
+ ; R2-R5: teq $[[T0]], $zero, 7
+ ; R2-R5: mfhi $[[T2:[0-9]+]]
+ ; R2-R5: seb $2, $[[T2]]
; R6: andi $[[T0:[0-9]+]], $5, 255
; R6: andi $[[T1:[0-9]+]], $4, 255
@@ -82,12 +92,12 @@ entry:
; NOT-R2-R6: sll $[[T3:[0-9]+]], $[[T2]], 16
; NOT-R2-R6: sra $2, $[[T3]], 16
- ; R2: andi $[[T0:[0-9]+]], $5, 65535
- ; R2: andi $[[T1:[0-9]+]], $4, 65535
- ; R2: divu $zero, $[[T1]], $[[T0]]
- ; R2: teq $[[T0]], $zero, 7
- ; R2: mfhi $[[T3:[0-9]+]]
- ; R2: seh $2, $[[T2]]
+ ; R2-R5: andi $[[T0:[0-9]+]], $5, 65535
+ ; R2-R5: andi $[[T1:[0-9]+]], $4, 65535
+ ; R2-R5: divu $zero, $[[T1]], $[[T0]]
+ ; R2-R5: teq $[[T0]], $zero, 7
+ ; R2-R5: mfhi $[[T3:[0-9]+]]
+ ; R2-R5: seh $2, $[[T2]]
; R6: andi $[[T0:[0-9]+]], $5, 65535
; R6: andi $[[T1:[0-9]+]], $4, 65535
diff --git a/test/CodeGen/Mips/llvm-ir/xor.ll b/test/CodeGen/Mips/llvm-ir/xor.ll
index 94dead1eff41..89af99981a3c 100644
--- a/test/CodeGen/Mips/llvm-ir/xor.ll
+++ b/test/CodeGen/Mips/llvm-ir/xor.ll
@@ -4,6 +4,10 @@
; RUN: -check-prefix=ALL -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
+; RUN: llc < %s -march=mips -mcpu=mips32r5 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP32
; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP32
; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
@@ -14,6 +18,10 @@
; RUN: -check-prefix=ALL -check-prefix=GP64
; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s \
+; RUN: -check-prefix=ALL -check-prefix=GP64
; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=GP64
@@ -51,10 +59,7 @@ define signext i32 @xor_i32(i32 signext %a, i32 signext %b) {
entry:
; ALL-LABEL: xor_i32:
- ; GP32: xor $2, $4, $5
-
- ; GP64: xor $[[T0:[0-9]+]], $4, $5
- ; GP64: sll $2, $[[T0]], 0
+ ; ALL: xor $2, $4, $5
%r = xor i32 %a, %b
ret i32 %r
diff --git a/test/CodeGen/Mips/load-store-left-right.ll b/test/CodeGen/Mips/load-store-left-right.ll
index f6d0e8debb36..a01d246ae460 100644
--- a/test/CodeGen/Mips/load-store-left-right.ll
+++ b/test/CodeGen/Mips/load-store-left-right.ll
@@ -4,14 +4,14 @@
; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32 -check-prefix=MIPS32-EB %s
; RUN: llc -march=mipsel -mcpu=mips32r6 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32R6 -check-prefix=MIPS32R6-EL %s
; RUN: llc -march=mips -mcpu=mips32r6 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS32R6 -check-prefix=MIPS32R6-EB %s
-; RUN: llc -march=mips64el -mcpu=mips4 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EL %s
-; RUN: llc -march=mips64 -mcpu=mips4 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EB %s
-; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EL %s
-; RUN: llc -march=mips64 -mcpu=mips64 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EB %s
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EL %s
-; RUN: llc -march=mips64 -mcpu=mips64r2 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EB %s
-; RUN: llc -march=mips64el -mcpu=mips64r6 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64R6 -check-prefix=MIPS64R6-EL %s
-; RUN: llc -march=mips64 -mcpu=mips64r6 -mattr=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64R6 -check-prefix=MIPS64R6-EB %s
+; RUN: llc -march=mips64el -mcpu=mips4 -target-abi=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EL %s
+; RUN: llc -march=mips64 -mcpu=mips4 -target-abi=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EB %s
+; RUN: llc -march=mips64el -mcpu=mips64 -target-abi=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EL %s
+; RUN: llc -march=mips64 -mcpu=mips64 -target-abi=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EB %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 -target-abi=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EL %s
+; RUN: llc -march=mips64 -mcpu=mips64r2 -target-abi=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64 -check-prefix=MIPS64-EB %s
+; RUN: llc -march=mips64el -mcpu=mips64r6 -target-abi=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64R6 -check-prefix=MIPS64R6-EL %s
+; RUN: llc -march=mips64 -mcpu=mips64r6 -target-abi=n64 < %s | FileCheck -check-prefix=ALL -check-prefix=MIPS64R6 -check-prefix=MIPS64R6-EB %s
%struct.SLL = type { i64 }
%struct.SI = type { i32 }
@@ -43,7 +43,7 @@ entry:
; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(si)(
; MIPS64R6: lw $2, 0($[[PTR]])
- %0 = load i32* getelementptr inbounds (%struct.SI* @si, i32 0, i32 0), align 1
+ %0 = load i32, i32* getelementptr inbounds (%struct.SI, %struct.SI* @si, i32 0, i32 0), align 1
ret i32 %0
}
@@ -69,7 +69,7 @@ entry:
; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(si)(
; MIPS64R6: sw $4, 0($[[PTR]])
- store i32 %a, i32* getelementptr inbounds (%struct.SI* @si, i32 0, i32 0), align 1
+ store i32 %a, i32* getelementptr inbounds (%struct.SI, %struct.SI* @si, i32 0, i32 0), align 1
ret void
}
@@ -100,7 +100,7 @@ entry:
; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(sll)(
; MIPS64R6: ld $2, 0($[[PTR]])
- %0 = load i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1
+ %0 = load i64, i64* getelementptr inbounds (%struct.SLL, %struct.SLL* @sll, i64 0, i32 0), align 1
ret i64 %0
}
@@ -129,7 +129,7 @@ entry:
; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(si)(
; MIPS64R6: lw $2, 0($[[PTR]])
- %0 = load i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1
+ %0 = load i32, i32* getelementptr inbounds (%struct.SI, %struct.SI* @si, i64 0, i32 0), align 1
%conv = sext i32 %0 to i64
ret i64 %conv
}
@@ -165,7 +165,7 @@ entry:
; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(sui)(
; MIPS64R6: lwu $2, 0($[[PTR]])
- %0 = load i32* getelementptr inbounds (%struct.SUI* @sui, i64 0, i32 0), align 1
+ %0 = load i32, i32* getelementptr inbounds (%struct.SUI, %struct.SUI* @sui, i64 0, i32 0), align 1
%conv = zext i32 %0 to i64
ret i64 %conv
}
@@ -197,7 +197,7 @@ entry:
; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(sll)(
; MIPS64R6: sd $4, 0($[[PTR]])
- store i64 %a, i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1
+ store i64 %a, i64* getelementptr inbounds (%struct.SLL, %struct.SLL* @sll, i64 0, i32 0), align 1
ret void
}
@@ -223,7 +223,7 @@ entry:
; MIPS64R6: ld $[[PTR:[0-9]+]], %got_disp(si)(
; MIPS64R6: sw $4, 0($[[PTR]])
- store i32 %a, i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1
+ store i32 %a, i32* getelementptr inbounds (%struct.SI, %struct.SI* @si, i64 0, i32 0), align 1
ret void
}
@@ -257,8 +257,8 @@ entry:
; ALL-DAG: lbu $[[R1:[0-9]+]], 1($[[PTR]])
; ALL-DAG: sb $[[R1]], 3($[[PTR]])
- %0 = load %struct.S0* getelementptr inbounds (%struct.S0* @struct_s0, i32 0), align 1
- store %struct.S0 %0, %struct.S0* getelementptr inbounds (%struct.S0* @struct_s0, i32 1), align 1
+ %0 = load %struct.S0, %struct.S0* getelementptr inbounds (%struct.S0, %struct.S0* @struct_s0, i32 0), align 1
+ store %struct.S0 %0, %struct.S0* getelementptr inbounds (%struct.S0, %struct.S0* @struct_s0, i32 1), align 1
ret void
}
@@ -300,8 +300,8 @@ entry:
; MIPS64R6-DAG: lhu $[[R1:[0-9]+]], 2($[[PTR]])
; MIPS64R6-DAG: sh $[[R1]], 6($[[PTR]])
- %0 = load %struct.S1* getelementptr inbounds (%struct.S1* @struct_s1, i32 0), align 1
- store %struct.S1 %0, %struct.S1* getelementptr inbounds (%struct.S1* @struct_s1, i32 1), align 1
+ %0 = load %struct.S1, %struct.S1* getelementptr inbounds (%struct.S1, %struct.S1* @struct_s1, i32 0), align 1
+ store %struct.S1 %0, %struct.S1* getelementptr inbounds (%struct.S1, %struct.S1* @struct_s1, i32 1), align 1
ret void
}
@@ -361,8 +361,8 @@ entry:
; MIPS64R6-DAG: lw $[[R1:[0-9]+]], 4($[[PTR]])
; MIPS64R6-DAG: sw $[[R1]], 12($[[PTR]])
- %0 = load %struct.S2* getelementptr inbounds (%struct.S2* @struct_s2, i32 0), align 1
- store %struct.S2 %0, %struct.S2* getelementptr inbounds (%struct.S2* @struct_s2, i32 1), align 1
+ %0 = load %struct.S2, %struct.S2* getelementptr inbounds (%struct.S2, %struct.S2* @struct_s2, i32 0), align 1
+ store %struct.S2 %0, %struct.S2* getelementptr inbounds (%struct.S2, %struct.S2* @struct_s2, i32 1), align 1
ret void
}
diff --git a/test/CodeGen/Mips/longbranch.ll b/test/CodeGen/Mips/longbranch.ll
index ad0235eb0af4..9f5b7417b859 100644
--- a/test/CodeGen/Mips/longbranch.ll
+++ b/test/CodeGen/Mips/longbranch.ll
@@ -1,9 +1,9 @@
; RUN: llc -march=mipsel < %s | FileCheck %s
; RUN: llc -march=mipsel -force-mips-long-branch -O3 < %s \
; RUN: | FileCheck %s -check-prefix=O32
-; RUN: llc -march=mips64el -mcpu=mips4 -mattr=n64 -force-mips-long-branch -O3 \
+; RUN: llc -march=mips64el -mcpu=mips4 -target-abi=n64 -force-mips-long-branch -O3 \
; RUN: < %s | FileCheck %s -check-prefix=N64
-; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 -force-mips-long-branch -O3 \
+; RUN: llc -march=mips64el -mcpu=mips64 -target-abi=n64 -force-mips-long-branch -O3 \
; RUN: < %s | FileCheck %s -check-prefix=N64
; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=micromips \
; RUN: -force-mips-long-branch -O3 < %s | FileCheck %s -check-prefix=MICROMIPS
@@ -124,10 +124,9 @@ end:
; MICROMIPS: $[[BB0]]:
; MICROMIPS: lw $[[R1:[0-9]+]], %got(x)($[[GP]])
; MICROMIPS: li16 $[[R2:[0-9]+]], 1
-; MICROMIPS: sw $[[R2]], 0($[[R1]])
+; MICROMIPS: sw16 $[[R2]], 0($[[R1]])
; MICROMIPS: $[[BB2]]:
-; MICROMIPS: jr $ra
-; MICROMIPS: nop
+; MICROMIPS: jrc $ra
; Check the NaCl version. Check that sp change is not in the branch delay slot
diff --git a/test/CodeGen/Mips/machineverifier.ll b/test/CodeGen/Mips/machineverifier.ll
index c673fe557e6b..d496b833a6c6 100644
--- a/test/CodeGen/Mips/machineverifier.ll
+++ b/test/CodeGen/Mips/machineverifier.ll
@@ -6,7 +6,7 @@
define void @foo() nounwind {
entry:
- %0 = load i32* @g, align 4
+ %0 = load i32, i32* @g, align 4
%tobool = icmp eq i32 %0, 0
br i1 %tobool, label %if.end, label %if.then
diff --git a/test/CodeGen/Mips/mbrsize4a.ll b/test/CodeGen/Mips/mbrsize4a.ll
index 15e1f47ce29e..264d2284afc9 100644
--- a/test/CodeGen/Mips/mbrsize4a.ll
+++ b/test/CodeGen/Mips/mbrsize4a.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands < %s | FileCheck %s -check-prefix=jal16
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands < %s | FileCheck %s -check-prefix=jal16
@j = global i32 10, align 4
@.str = private unnamed_addr constant [11 x i8] c"at bottom\0A\00", align 1
@@ -17,11 +17,11 @@ z: ; preds = %y, %entry
br label %y
y: ; preds = %z
- %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0))
+ %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0))
br label %z
return: ; No predecessors!
- %0 = load i32* %retval
+ %0 = load i32, i32* %retval
ret i32 %0
; jal16: jal $BB{{[0-9]+}}_{{[0-9]+}}
}
diff --git a/test/CodeGen/Mips/memcpy.ll b/test/CodeGen/Mips/memcpy.ll
index 39764a936381..5c4ebb27dde4 100644
--- a/test/CodeGen/Mips/memcpy.ll
+++ b/test/CodeGen/Mips/memcpy.ll
@@ -8,9 +8,9 @@ define void @foo1(%struct.S1* %s1, i8 signext %n) nounwind {
entry:
; CHECK-NOT: call16(memcpy
- %arraydecay = getelementptr inbounds %struct.S1* %s1, i32 0, i32 1, i32 0
- tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %arraydecay, i8* getelementptr inbounds ([31 x i8]* @.str, i32 0, i32 0), i32 31, i32 1, i1 false)
- %arrayidx = getelementptr inbounds %struct.S1* %s1, i32 0, i32 1, i32 40
+ %arraydecay = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 1, i32 0
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %arraydecay, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str, i32 0, i32 0), i32 31, i32 1, i1 false)
+ %arrayidx = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 1, i32 40
store i8 %n, i8* %arrayidx, align 1
ret void
}
diff --git a/test/CodeGen/Mips/micromips-addiu.ll b/test/CodeGen/Mips/micromips-addiu.ll
index c5bee34028c8..e0743c9c088b 100644
--- a/test/CodeGen/Mips/micromips-addiu.ll
+++ b/test/CodeGen/Mips/micromips-addiu.ll
@@ -8,20 +8,20 @@
define i32 @main() nounwind {
entry:
- %0 = load i32* @x, align 4
+ %0 = load i32, i32* @x, align 4
%addiu1 = add i32 %0, -7
- %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
- ([7 x i8]* @.str, i32 0, i32 0), i32 %addiu1)
+ %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds
+ ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %addiu1)
- %1 = load i32* @y, align 4
+ %1 = load i32, i32* @y, align 4
%addiu2 = add i32 %1, 55
- %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
- ([7 x i8]* @.str, i32 0, i32 0), i32 %addiu2)
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds
+ ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %addiu2)
- %2 = load i32* @z, align 4
+ %2 = load i32, i32* @z, align 4
%addiu3 = add i32 %2, 24
- %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
- ([7 x i8]* @.str, i32 0, i32 0), i32 %addiu3)
+ %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds
+ ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %addiu3)
ret i32 0
}
diff --git a/test/CodeGen/Mips/micromips-addu16.ll b/test/CodeGen/Mips/micromips-addu16.ll
new file mode 100644
index 000000000000..3ecdf2488d2c
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-addu16.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+micromips \
+; RUN: -relocation-model=pic -O3 < %s | FileCheck %s
+
+define i32 @main() {
+entry:
+ %retval = alloca i32, align 4
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ store i32 0, i32* %retval
+ %0 = load i32, i32* %b, align 4
+ %1 = load i32, i32* %c, align 4
+ %add = add nsw i32 %0, %1
+ store i32 %add, i32* %a, align 4
+ ret i32 0
+}
+
+; CHECK: addu16
diff --git a/test/CodeGen/Mips/micromips-and16.ll b/test/CodeGen/Mips/micromips-and16.ll
new file mode 100644
index 000000000000..d0a16ac28a09
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-and16.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+micromips \
+; RUN: -relocation-model=pic -O3 < %s | FileCheck %s
+
+define i32 @main() {
+entry:
+ %retval = alloca i32, align 4
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ store i32 0, i32* %retval
+ %0 = load i32, i32* %b, align 4
+ %1 = load i32, i32* %c, align 4
+ %and = and i32 %0, %1
+ store i32 %and, i32* %a, align 4
+ ret i32 0
+}
+
+; CHECK: and16
diff --git a/test/CodeGen/Mips/micromips-andi.ll b/test/CodeGen/Mips/micromips-andi.ll
index b82d2b09eae4..cd7a794cd1be 100644
--- a/test/CodeGen/Mips/micromips-andi.ll
+++ b/test/CodeGen/Mips/micromips-andi.ll
@@ -7,15 +7,15 @@
define i32 @main() nounwind {
entry:
- %0 = load i32* @x, align 4
+ %0 = load i32, i32* @x, align 4
%and1 = and i32 %0, 4
- %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
- ([7 x i8]* @.str, i32 0, i32 0), i32 %and1)
+ %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds
+ ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %and1)
- %1 = load i32* @y, align 4
+ %1 = load i32, i32* @y, align 4
%and2 = and i32 %1, 5
- %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
- ([7 x i8]* @.str, i32 0, i32 0), i32 %and2)
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds
+ ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %and2)
ret i32 0
}
diff --git a/test/CodeGen/Mips/micromips-compact-branches.ll b/test/CodeGen/Mips/micromips-compact-branches.ll
index 670f9a05064f..c689944d386b 100644
--- a/test/CodeGen/Mips/micromips-compact-branches.ll
+++ b/test/CodeGen/Mips/micromips-compact-branches.ll
@@ -4,7 +4,7 @@
define void @main() nounwind uwtable {
entry:
%x = alloca i32, align 4
- %0 = load i32* %x, align 4
+ %0 = load i32, i32* %x, align 4
%cmp = icmp eq i32 %0, 0
br i1 %cmp, label %if.then, label %if.end
diff --git a/test/CodeGen/Mips/micromips-compact-jump.ll b/test/CodeGen/Mips/micromips-compact-jump.ll
new file mode 100644
index 000000000000..70cff84e967f
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-compact-jump.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+micromips \
+; RUN: -disable-mips-delay-filler -O3 < %s | FileCheck %s
+
+define i32 @foo(i32 signext %a) #0 {
+entry:
+ ret i32 0
+}
+
+declare i32 @bar(i32 signext) #1
+
+; CHECK: jrc
diff --git a/test/CodeGen/Mips/micromips-delay-slot-jr.ll b/test/CodeGen/Mips/micromips-delay-slot-jr.ll
index df593b35e2a6..c6636ff5b4c7 100644
--- a/test/CodeGen/Mips/micromips-delay-slot-jr.ll
+++ b/test/CodeGen/Mips/micromips-delay-slot-jr.ll
@@ -11,14 +11,14 @@ entry:
L1: ; preds = %entry, %L1
%i.0 = phi i32 [ 0, %entry ], [ %inc, %L1 ]
- %puts = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str, i32 0, i32 0))
+ %puts = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str, i32 0, i32 0))
%inc = add i32 %i.0, 1
- %arrayidx = getelementptr inbounds [3 x i8*]* @main.L, i32 0, i32 %i.0
- %0 = load i8** %arrayidx, align 4, !tbaa !1
+ %arrayidx = getelementptr inbounds [3 x i8*], [3 x i8*]* @main.L, i32 0, i32 %i.0
+ %0 = load i8*, i8** %arrayidx, align 4, !tbaa !1
indirectbr i8* %0, [label %L1, label %L2]
L2: ; preds = %L1
- %puts2 = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str2, i32 0, i32 0))
+ %puts2 = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str2, i32 0, i32 0))
ret i32 0
}
@@ -29,20 +29,18 @@ declare i32 @puts(i8* nocapture readonly) #1
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}
-; CHECK: jr
-; CHECK-NEXT: nop
+; CHECK: jrc
%struct.foostruct = type { [3 x float] }
%struct.barstruct = type { %struct.foostruct, float }
@bar_ary = common global [4 x %struct.barstruct] zeroinitializer, align 4
define float* @spooky(i32 signext %i) #0 {
- %safe = getelementptr inbounds [4 x %struct.barstruct]* @bar_ary, i32 0, i32 %i, i32 1
+ %safe = getelementptr inbounds [4 x %struct.barstruct], [4 x %struct.barstruct]* @bar_ary, i32 0, i32 %i, i32 1
store float 1.420000e+02, float* %safe, align 4, !tbaa !1
ret float* %safe
}
; CHECK: spooky:
-; CHECK: jr $ra
-; CHECK-NEXT: nop
+; CHECK: jrc $ra
diff --git a/test/CodeGen/Mips/micromips-delay-slot.ll b/test/CodeGen/Mips/micromips-delay-slot.ll
index b5f6c56235bc..ef6546232835 100644
--- a/test/CodeGen/Mips/micromips-delay-slot.ll
+++ b/test/CodeGen/Mips/micromips-delay-slot.ll
@@ -6,7 +6,7 @@ define i32 @foo(i32 signext %a) #0 {
entry:
%a.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
- %0 = load i32* %a.addr, align 4
+ %0 = load i32, i32* %a.addr, align 4
%shl = shl i32 %0, 2
%call = call i32 @bar(i32 signext %shl)
ret i32 %call
diff --git a/test/CodeGen/Mips/micromips-gp-rc.ll b/test/CodeGen/Mips/micromips-gp-rc.ll
new file mode 100644
index 000000000000..f139f7a8486d
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-gp-rc.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+micromips \
+; RUN: -relocation-model=pic -O3 < %s | FileCheck %s
+
+@g = external global i32
+
+; Function Attrs: noreturn nounwind
+define void @foo() #0 {
+entry:
+ %0 = load i32, i32* @g, align 4
+ tail call void @exit(i32 signext %0)
+ unreachable
+}
+
+; Function Attrs: noreturn
+declare void @exit(i32 signext)
+
+; CHECK: move $gp, ${{[0-9]+}}
+
diff --git a/test/CodeGen/Mips/micromips-jal.ll b/test/CodeGen/Mips/micromips-jal.ll
index fccc22919728..51832fe333dc 100644
--- a/test/CodeGen/Mips/micromips-jal.ll
+++ b/test/CodeGen/Mips/micromips-jal.ll
@@ -7,8 +7,8 @@ entry:
%b.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
store i32 %b, i32* %b.addr, align 4
- %0 = load i32* %a.addr, align 4
- %1 = load i32* %b.addr, align 4
+ %0 = load i32, i32* %a.addr, align 4
+ %1 = load i32, i32* %b.addr, align 4
%add = add nsw i32 %0, %1
ret i32 %add
}
@@ -20,11 +20,11 @@ entry:
%y = alloca i32, align 4
%z = alloca i32, align 4
store i32 0, i32* %retval
- %0 = load i32* %y, align 4
- %1 = load i32* %z, align 4
+ %0 = load i32, i32* %y, align 4
+ %1 = load i32, i32* %z, align 4
%call = call i32 @sum(i32 %0, i32 %1)
store i32 %call, i32* %x, align 4
- %2 = load i32* %x, align 4
+ %2 = load i32, i32* %x, align 4
ret i32 %2
}
diff --git a/test/CodeGen/Mips/micromips-load-effective-address.ll b/test/CodeGen/Mips/micromips-load-effective-address.ll
index afba760f0e62..470458098212 100644
--- a/test/CodeGen/Mips/micromips-load-effective-address.ll
+++ b/test/CodeGen/Mips/micromips-load-effective-address.ll
@@ -7,10 +7,10 @@ entry:
%y.addr = alloca i32*, align 8
store i32* %x, i32** %x.addr, align 8
store i32* %y, i32** %y.addr, align 8
- %0 = load i32** %x.addr, align 8
- %1 = load i32* %0, align 4
- %2 = load i32** %y.addr, align 8
- %3 = load i32* %2, align 4
+ %0 = load i32*, i32** %x.addr, align 8
+ %1 = load i32, i32* %0, align 4
+ %2 = load i32*, i32** %y.addr, align 8
+ %3 = load i32, i32* %2, align 4
%add = add nsw i32 %1, %3
ret i32 %add
}
diff --git a/test/CodeGen/Mips/micromips-not16.ll b/test/CodeGen/Mips/micromips-not16.ll
new file mode 100644
index 000000000000..d31aefae6f0d
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-not16.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+micromips \
+; RUN: -relocation-model=pic -O3 < %s | FileCheck %s
+
+define i32 @main() {
+entry:
+ %retval = alloca i32, align 4
+ %x = alloca i64, align 8
+ store i32 0, i32* %retval
+ %0 = load i64, i64* %x, align 8
+ %cmp = icmp ne i64 %0, 9223372036854775807
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ store i32 1, i32* %retval
+ br label %return
+
+if.end:
+ store i32 0, i32* %retval
+ br label %return
+
+return:
+ %1 = load i32, i32* %retval
+ ret i32 %1
+}
+
+; CHECK: not16
diff --git a/test/CodeGen/Mips/micromips-or16.ll b/test/CodeGen/Mips/micromips-or16.ll
new file mode 100644
index 000000000000..82ea9c687df4
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-or16.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+micromips \
+; RUN: -relocation-model=pic -O3 < %s | FileCheck %s
+
+define i32 @main() {
+entry:
+ %retval = alloca i32, align 4
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ store i32 0, i32* %retval
+ %0 = load i32, i32* %b, align 4
+ %1 = load i32, i32* %c, align 4
+ %or = or i32 %0, %1
+ store i32 %or, i32* %a, align 4
+ ret i32 0
+}
+
+; CHECK: or16
diff --git a/test/CodeGen/Mips/micromips-rdhwr-directives.ll b/test/CodeGen/Mips/micromips-rdhwr-directives.ll
index af40a8796824..ebe4dddd0124 100644
--- a/test/CodeGen/Mips/micromips-rdhwr-directives.ll
+++ b/test/CodeGen/Mips/micromips-rdhwr-directives.ll
@@ -10,6 +10,6 @@ entry:
; CHECK: rdhwr
; CHECK: .set pop
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
ret i32 %0
}
diff --git a/test/CodeGen/Mips/micromips-shift.ll b/test/CodeGen/Mips/micromips-shift.ll
index 8215010bfc78..ed1bcbbf0831 100644
--- a/test/CodeGen/Mips/micromips-shift.ll
+++ b/test/CodeGen/Mips/micromips-shift.ll
@@ -8,11 +8,11 @@
define i32 @shift_left() nounwind {
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%shl = shl i32 %0, 4
store i32 %shl, i32* @b, align 4
- %1 = load i32* @c, align 4
+ %1 = load i32, i32* @c, align 4
%shl1 = shl i32 %1, 10
store i32 %shl1, i32* @d, align 4
@@ -29,11 +29,11 @@ entry:
define i32 @shift_right() nounwind {
entry:
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
%shr = lshr i32 %0, 4
store i32 %shr, i32* @j, align 4
- %1 = load i32* @m, align 4
+ %1 = load i32, i32* @m, align 4
%shr1 = lshr i32 %1, 10
store i32 %shr1, i32* @n, align 4
diff --git a/test/CodeGen/Mips/micromips-subu16.ll b/test/CodeGen/Mips/micromips-subu16.ll
new file mode 100644
index 000000000000..d415574f443a
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-subu16.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+micromips \
+; RUN: -relocation-model=pic -O3 < %s | FileCheck %s
+
+define i32 @main() {
+entry:
+ %retval = alloca i32, align 4
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ store i32 0, i32* %retval
+ %0 = load i32, i32* %b, align 4
+ %1 = load i32, i32* %c, align 4
+ %sub = sub nsw i32 %0, %1
+ store i32 %sub, i32* %a, align 4
+ ret i32 0
+}
+
+; CHECK: subu16
diff --git a/test/CodeGen/Mips/micromips-sw-lw-16.ll b/test/CodeGen/Mips/micromips-sw-lw-16.ll
new file mode 100644
index 000000000000..358372649b5d
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-sw-lw-16.ll
@@ -0,0 +1,27 @@
+; RUN: llc %s -march=mipsel -mattr=micromips -filetype=asm \
+; RUN: -relocation-model=pic -O3 -o - | FileCheck %s
+
+; Function Attrs: noinline nounwind
+define void @bar(i32* %p) #0 {
+entry:
+ %p.addr = alloca i32*, align 4
+ store i32* %p, i32** %p.addr, align 4
+ %0 = load i32*, i32** %p.addr, align 4
+ %1 = load i32, i32* %0, align 4
+ %add = add nsw i32 7, %1
+ %2 = load i32*, i32** %p.addr, align 4
+ store i32 %add, i32* %2, align 4
+ %3 = load i32*, i32** %p.addr, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %3, i32 1
+ %4 = load i32, i32* %add.ptr, align 4
+ %add1 = add nsw i32 7, %4
+ %5 = load i32*, i32** %p.addr, align 4
+ %add.ptr2 = getelementptr inbounds i32, i32* %5, i32 1
+ store i32 %add1, i32* %add.ptr2, align 4
+ ret void
+}
+
+; CHECK: lw16 ${{[0-9]+}}, 0($4)
+; CHECK: sw16 ${{[0-9]+}}, 0($4)
+; CHECK: lw16 ${{[0-9]+}}, 4(${{[0-9]+}})
+; CHECK: sw16 ${{[0-9]+}}, 4(${{[0-9]+}})
diff --git a/test/CodeGen/Mips/micromips-xor16.ll b/test/CodeGen/Mips/micromips-xor16.ll
new file mode 100644
index 000000000000..53c75acd4d3b
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-xor16.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+micromips \
+; RUN: -relocation-model=pic -O3 < %s | FileCheck %s
+
+define i32 @main() {
+entry:
+ %retval = alloca i32, align 4
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+ %c = alloca i32, align 4
+ store i32 0, i32* %retval
+ %0 = load i32, i32* %b, align 4
+ %1 = load i32, i32* %c, align 4
+ %xor = xor i32 %0, %1
+ store i32 %xor, i32* %a, align 4
+ ret i32 0
+}
+
+; CHECK: xor16
diff --git a/test/CodeGen/Mips/mips16_32_8.ll b/test/CodeGen/Mips/mips16_32_8.ll
index 2f5bc219cf35..5e03928a11f1 100644
--- a/test/CodeGen/Mips/mips16_32_8.ll
+++ b/test/CodeGen/Mips/mips16_32_8.ll
@@ -22,13 +22,13 @@ entry:
define void @nofoo() #1 {
entry:
store i32 20, i32* @i, align 4
- %0 = load float* @x, align 4
- %1 = load float* @y, align 4
+ %0 = load float, float* @x, align 4
+ %1 = load float, float* @y, align 4
%add = fadd float %0, %1
store float %add, float* @f, align 4
- %2 = load float* @f, align 4
+ %2 = load float, float* @f, align 4
%conv = fpext float %2 to double
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), double %conv)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), double %conv)
ret void
}
@@ -48,11 +48,11 @@ declare i32 @printf(i8*, ...) #2
define i32 @main() #3 {
entry:
call void @foo()
- %0 = load i32* @i, align 4
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str1, i32 0, i32 0), i32 %0)
+ %0 = load i32, i32* @i, align 4
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str1, i32 0, i32 0), i32 %0)
call void @nofoo()
- %1 = load i32* @i, align 4
- %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str2, i32 0, i32 0), i32 %1)
+ %1 = load i32, i32* @i, align 4
+ %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str2, i32 0, i32 0), i32 %1)
ret i32 0
}
diff --git a/test/CodeGen/Mips/mips16_fpret.ll b/test/CodeGen/Mips/mips16_fpret.ll
index fe87604d6107..0f09c4105a17 100644
--- a/test/CodeGen/Mips/mips16_fpret.ll
+++ b/test/CodeGen/Mips/mips16_fpret.ll
@@ -11,7 +11,7 @@
define float @foox() {
entry:
- %0 = load float* @x, align 4
+ %0 = load float, float* @x, align 4
ret float %0
; 1: .ent foox
; 1: lw $2, %lo(x)(${{[0-9]+}})
@@ -20,7 +20,7 @@ entry:
define double @foodx() {
entry:
- %0 = load double* @dx, align 8
+ %0 = load double, double* @dx, align 8
ret double %0
; 1: .ent foodx
; 1: lw $2, %lo(dx)(${{[0-9]+}})
@@ -34,13 +34,13 @@ entry:
define { float, float } @foocx() {
entry:
%retval = alloca { float, float }, align 4
- %cx.real = load float* getelementptr inbounds ({ float, float }* @cx, i32 0, i32 0)
- %cx.imag = load float* getelementptr inbounds ({ float, float }* @cx, i32 0, i32 1)
- %real = getelementptr inbounds { float, float }* %retval, i32 0, i32 0
- %imag = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
+ %cx.real = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cx, i32 0, i32 0)
+ %cx.imag = load float, float* getelementptr inbounds ({ float, float }, { float, float }* @cx, i32 0, i32 1)
+ %real = getelementptr inbounds { float, float }, { float, float }* %retval, i32 0, i32 0
+ %imag = getelementptr inbounds { float, float }, { float, float }* %retval, i32 0, i32 1
store float %cx.real, float* %real
store float %cx.imag, float* %imag
- %0 = load { float, float }* %retval
+ %0 = load { float, float }, { float, float }* %retval
ret { float, float } %0
; 1: .ent foocx
; 1: lw $2, %lo(cx)(${{[0-9]+}})
@@ -53,13 +53,13 @@ entry:
define { double, double } @foodcx() {
entry:
%retval = alloca { double, double }, align 8
- %dcx.real = load double* getelementptr inbounds ({ double, double }* @dcx, i32 0, i32 0)
- %dcx.imag = load double* getelementptr inbounds ({ double, double }* @dcx, i32 0, i32 1)
- %real = getelementptr inbounds { double, double }* %retval, i32 0, i32 0
- %imag = getelementptr inbounds { double, double }* %retval, i32 0, i32 1
+ %dcx.real = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @dcx, i32 0, i32 0)
+ %dcx.imag = load double, double* getelementptr inbounds ({ double, double }, { double, double }* @dcx, i32 0, i32 1)
+ %real = getelementptr inbounds { double, double }, { double, double }* %retval, i32 0, i32 0
+ %imag = getelementptr inbounds { double, double }, { double, double }* %retval, i32 0, i32 1
store double %dcx.real, double* %real
store double %dcx.imag, double* %imag
- %0 = load { double, double }* %retval
+ %0 = load { double, double }, { double, double }* %retval
ret { double, double } %0
; 1: .ent foodcx
; 1: lw ${{[0-9]}}, %lo(dcx)(${{[0-9]+}})
diff --git a/test/CodeGen/Mips/mips16ex.ll b/test/CodeGen/Mips/mips16ex.ll
index a1a99191595d..68b584604b27 100644
--- a/test/CodeGen/Mips/mips16ex.ll
+++ b/test/CodeGen/Mips/mips16ex.ll
@@ -1,8 +1,10 @@
; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
-;16: .cfi_personality
+;16: main:
;16-NEXT: [[TMP:.*]]:
-;16-NEXT: $eh_func_begin0 = ([[TMP]])
+;16-NEXT: $func_begin0 = ([[TMP]])
+;16-NEXT: .cfi_startproc
+;16-NEXT: .cfi_personality
@.str = private unnamed_addr constant [7 x i8] c"hello\0A\00", align 1
@_ZTIi = external constant i8*
@.str1 = private unnamed_addr constant [15 x i8] c"exception %i \0A\00", align 1
@@ -14,7 +16,7 @@ entry:
%ehselector.slot = alloca i32
%e = alloca i32, align 4
store i32 0, i32* %retval
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0))
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0))
%exception = call i8* @__cxa_allocate_exception(i32 4) nounwind
%0 = bitcast i8* %exception to i32*
store i32 20, i32* %0
@@ -31,19 +33,19 @@ lpad: ; preds = %entry
br label %catch.dispatch
catch.dispatch: ; preds = %lpad
- %sel = load i32* %ehselector.slot
+ %sel = load i32, i32* %ehselector.slot
%4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind
%matches = icmp eq i32 %sel, %4
br i1 %matches, label %catch, label %eh.resume
catch: ; preds = %catch.dispatch
- %exn = load i8** %exn.slot
+ %exn = load i8*, i8** %exn.slot
%5 = call i8* @__cxa_begin_catch(i8* %exn) nounwind
%6 = bitcast i8* %5 to i32*
- %exn.scalar = load i32* %6
+ %exn.scalar = load i32, i32* %6
store i32 %exn.scalar, i32* %e, align 4
- %7 = load i32* %e, align 4
- %call2 = invoke i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str1, i32 0, i32 0), i32 %7)
+ %7 = load i32, i32* %e, align 4
+ %call2 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str1, i32 0, i32 0), i32 %7)
to label %invoke.cont unwind label %lpad1
invoke.cont: ; preds = %catch
@@ -64,8 +66,8 @@ lpad1: ; preds = %catch
br label %eh.resume
eh.resume: ; preds = %lpad1, %catch.dispatch
- %exn3 = load i8** %exn.slot
- %sel4 = load i32* %ehselector.slot
+ %exn3 = load i8*, i8** %exn.slot
+ %sel4 = load i32, i32* %ehselector.slot
%lpad.val = insertvalue { i8*, i32 } undef, i8* %exn3, 0
%lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel4, 1
resume { i8*, i32 } %lpad.val5
diff --git a/test/CodeGen/Mips/mips16fpe.ll b/test/CodeGen/Mips/mips16fpe.ll
index 987980e080ff..f8b916da3a49 100644
--- a/test/CodeGen/Mips/mips16fpe.ll
+++ b/test/CodeGen/Mips/mips16fpe.ll
@@ -42,8 +42,8 @@
define void @test_addsf3() nounwind {
entry:
;16hf-LABEL: test_addsf3:
- %0 = load float* @x, align 4
- %1 = load float* @y, align 4
+ %0 = load float, float* @x, align 4
+ %1 = load float, float* @y, align 4
%add = fadd float %0, %1
store float %add, float* @addsf3_result, align 4
;16hf: lw ${{[0-9]+}}, %call16(__mips16_addsf3)(${{[0-9]+}})
@@ -53,8 +53,8 @@ entry:
define void @test_adddf3() nounwind {
entry:
;16hf-LABEL: test_adddf3:
- %0 = load double* @xd, align 8
- %1 = load double* @yd, align 8
+ %0 = load double, double* @xd, align 8
+ %1 = load double, double* @yd, align 8
%add = fadd double %0, %1
store double %add, double* @adddf3_result, align 8
;16hf: lw ${{[0-9]+}}, %call16(__mips16_adddf3)(${{[0-9]+}})
@@ -64,8 +64,8 @@ entry:
define void @test_subsf3() nounwind {
entry:
;16hf-LABEL: test_subsf3:
- %0 = load float* @x, align 4
- %1 = load float* @y, align 4
+ %0 = load float, float* @x, align 4
+ %1 = load float, float* @y, align 4
%sub = fsub float %0, %1
store float %sub, float* @subsf3_result, align 4
;16hf: lw ${{[0-9]+}}, %call16(__mips16_subsf3)(${{[0-9]+}})
@@ -75,8 +75,8 @@ entry:
define void @test_subdf3() nounwind {
entry:
;16hf-LABEL: test_subdf3:
- %0 = load double* @xd, align 8
- %1 = load double* @yd, align 8
+ %0 = load double, double* @xd, align 8
+ %1 = load double, double* @yd, align 8
%sub = fsub double %0, %1
store double %sub, double* @subdf3_result, align 8
;16hf: lw ${{[0-9]+}}, %call16(__mips16_subdf3)(${{[0-9]+}})
@@ -86,8 +86,8 @@ entry:
define void @test_mulsf3() nounwind {
entry:
;16hf-LABEL: test_mulsf3:
- %0 = load float* @x, align 4
- %1 = load float* @y, align 4
+ %0 = load float, float* @x, align 4
+ %1 = load float, float* @y, align 4
%mul = fmul float %0, %1
store float %mul, float* @mulsf3_result, align 4
;16hf: lw ${{[0-9]+}}, %call16(__mips16_mulsf3)(${{[0-9]+}})
@@ -97,8 +97,8 @@ entry:
define void @test_muldf3() nounwind {
entry:
;16hf-LABEL: test_muldf3:
- %0 = load double* @xd, align 8
- %1 = load double* @yd, align 8
+ %0 = load double, double* @xd, align 8
+ %1 = load double, double* @yd, align 8
%mul = fmul double %0, %1
store double %mul, double* @muldf3_result, align 8
;16hf: lw ${{[0-9]+}}, %call16(__mips16_muldf3)(${{[0-9]+}})
@@ -108,8 +108,8 @@ entry:
define void @test_divsf3() nounwind {
entry:
;16hf-LABEL: test_divsf3:
- %0 = load float* @y, align 4
- %1 = load float* @x, align 4
+ %0 = load float, float* @y, align 4
+ %1 = load float, float* @x, align 4
%div = fdiv float %0, %1
store float %div, float* @divsf3_result, align 4
;16hf: lw ${{[0-9]+}}, %call16(__mips16_divsf3)(${{[0-9]+}})
@@ -119,9 +119,9 @@ entry:
define void @test_divdf3() nounwind {
entry:
;16hf-LABEL: test_divdf3:
- %0 = load double* @yd, align 8
+ %0 = load double, double* @yd, align 8
%mul = fmul double %0, 2.000000e+00
- %1 = load double* @xd, align 8
+ %1 = load double, double* @xd, align 8
%div = fdiv double %mul, %1
store double %div, double* @divdf3_result, align 8
;16hf: lw ${{[0-9]+}}, %call16(__mips16_divdf3)(${{[0-9]+}})
@@ -131,7 +131,7 @@ entry:
define void @test_extendsfdf2() nounwind {
entry:
;16hf-LABEL: test_extendsfdf2:
- %0 = load float* @x, align 4
+ %0 = load float, float* @x, align 4
%conv = fpext float %0 to double
store double %conv, double* @extendsfdf2_result, align 8
;16hf: lw ${{[0-9]+}}, %call16(__mips16_extendsfdf2)(${{[0-9]+}})
@@ -141,7 +141,7 @@ entry:
define void @test_truncdfsf2() nounwind {
entry:
;16hf-LABEL: test_truncdfsf2:
- %0 = load double* @xd2, align 8
+ %0 = load double, double* @xd2, align 8
%conv = fptrunc double %0 to float
store float %conv, float* @truncdfsf2_result, align 4
;16hf: lw ${{[0-9]+}}, %call16(__mips16_truncdfsf2)(${{[0-9]+}})
@@ -151,7 +151,7 @@ entry:
define void @test_fix_truncsfsi() nounwind {
entry:
;16hf-LABEL: test_fix_truncsfsi:
- %0 = load float* @x, align 4
+ %0 = load float, float* @x, align 4
%conv = fptosi float %0 to i32
store i32 %conv, i32* @fix_truncsfsi_result, align 4
;16hf: lw ${{[0-9]+}}, %call16(__mips16_fix_truncsfsi)(${{[0-9]+}})
@@ -161,7 +161,7 @@ entry:
define void @test_fix_truncdfsi() nounwind {
entry:
;16hf-LABEL: test_fix_truncdfsi:
- %0 = load double* @xd, align 8
+ %0 = load double, double* @xd, align 8
%conv = fptosi double %0 to i32
store i32 %conv, i32* @fix_truncdfsi_result, align 4
;16hf: lw ${{[0-9]+}}, %call16(__mips16_fix_truncdfsi)(${{[0-9]+}})
@@ -171,7 +171,7 @@ entry:
define void @test_floatsisf() nounwind {
entry:
;16hf-LABEL: test_floatsisf:
- %0 = load i32* @si, align 4
+ %0 = load i32, i32* @si, align 4
%conv = sitofp i32 %0 to float
store float %conv, float* @floatsisf_result, align 4
;16hf: lw ${{[0-9]+}}, %call16(__mips16_floatsisf)(${{[0-9]+}})
@@ -181,7 +181,7 @@ entry:
define void @test_floatsidf() nounwind {
entry:
;16hf-LABEL: test_floatsidf:
- %0 = load i32* @si, align 4
+ %0 = load i32, i32* @si, align 4
%conv = sitofp i32 %0 to double
store double %conv, double* @floatsidf_result, align 8
;16hf: lw ${{[0-9]+}}, %call16(__mips16_floatsidf)(${{[0-9]+}})
@@ -191,7 +191,7 @@ entry:
define void @test_floatunsisf() nounwind {
entry:
;16hf-LABEL: test_floatunsisf:
- %0 = load i32* @ui, align 4
+ %0 = load i32, i32* @ui, align 4
%conv = uitofp i32 %0 to float
store float %conv, float* @floatunsisf_result, align 4
;16hf: lw ${{[0-9]+}}, %call16(__mips16_floatunsisf)(${{[0-9]+}})
@@ -201,7 +201,7 @@ entry:
define void @test_floatunsidf() nounwind {
entry:
;16hf-LABEL: test_floatunsidf:
- %0 = load i32* @ui, align 4
+ %0 = load i32, i32* @ui, align 4
%conv = uitofp i32 %0 to double
store double %conv, double* @floatunsidf_result, align 8
;16hf: lw ${{[0-9]+}}, %call16(__mips16_floatunsidf)(${{[0-9]+}})
@@ -211,8 +211,8 @@ entry:
define void @test_eqsf2() nounwind {
entry:
;16hf-LABEL: test_eqsf2:
- %0 = load float* @x, align 4
- %1 = load float* @xx, align 4
+ %0 = load float, float* @x, align 4
+ %1 = load float, float* @xx, align 4
%cmp = fcmp oeq float %0, %1
%conv = zext i1 %cmp to i32
store i32 %conv, i32* @eqsf2_result, align 4
@@ -223,8 +223,8 @@ entry:
define void @test_eqdf2() nounwind {
entry:
;16hf-LABEL: test_eqdf2:
- %0 = load double* @xd, align 8
- %1 = load double* @xxd, align 8
+ %0 = load double, double* @xd, align 8
+ %1 = load double, double* @xxd, align 8
%cmp = fcmp oeq double %0, %1
%conv = zext i1 %cmp to i32
store i32 %conv, i32* @eqdf2_result, align 4
@@ -235,8 +235,8 @@ entry:
define void @test_nesf2() nounwind {
entry:
;16hf-LABEL: test_nesf2:
- %0 = load float* @x, align 4
- %1 = load float* @y, align 4
+ %0 = load float, float* @x, align 4
+ %1 = load float, float* @y, align 4
%cmp = fcmp une float %0, %1
%conv = zext i1 %cmp to i32
store i32 %conv, i32* @nesf2_result, align 4
@@ -247,8 +247,8 @@ entry:
define void @test_nedf2() nounwind {
entry:
;16hf-LABEL: test_nedf2:
- %0 = load double* @xd, align 8
- %1 = load double* @yd, align 8
+ %0 = load double, double* @xd, align 8
+ %1 = load double, double* @yd, align 8
%cmp = fcmp une double %0, %1
%conv = zext i1 %cmp to i32
store i32 %conv, i32* @nedf2_result, align 4
@@ -259,10 +259,10 @@ entry:
define void @test_gesf2() nounwind {
entry:
;16hf-LABEL: test_gesf2:
- %0 = load float* @x, align 4
- %1 = load float* @xx, align 4
+ %0 = load float, float* @x, align 4
+ %1 = load float, float* @xx, align 4
%cmp = fcmp oge float %0, %1
- %2 = load float* @y, align 4
+ %2 = load float, float* @y, align 4
%cmp1 = fcmp oge float %2, %0
%and3 = and i1 %cmp, %cmp1
%and = zext i1 %and3 to i32
@@ -274,10 +274,10 @@ entry:
define void @test_gedf2() nounwind {
entry:
;16hf-LABEL: test_gedf2:
- %0 = load double* @xd, align 8
- %1 = load double* @xxd, align 8
+ %0 = load double, double* @xd, align 8
+ %1 = load double, double* @xxd, align 8
%cmp = fcmp oge double %0, %1
- %2 = load double* @yd, align 8
+ %2 = load double, double* @yd, align 8
%cmp1 = fcmp oge double %2, %0
%and3 = and i1 %cmp, %cmp1
%and = zext i1 %and3 to i32
@@ -289,10 +289,10 @@ entry:
define void @test_ltsf2() nounwind {
entry:
;16hf-LABEL: test_ltsf2:
- %0 = load float* @x, align 4
- %1 = load float* @xx, align 4
+ %0 = load float, float* @x, align 4
+ %1 = load float, float* @xx, align 4
%lnot = fcmp uge float %0, %1
- %2 = load float* @y, align 4
+ %2 = load float, float* @y, align 4
%cmp1 = fcmp olt float %0, %2
%and2 = and i1 %lnot, %cmp1
%and = zext i1 %and2 to i32
@@ -305,10 +305,10 @@ entry:
define void @test_ltdf2() nounwind {
entry:
;16hf-LABEL: test_ltdf2:
- %0 = load double* @xd, align 8
- %1 = load double* @xxd, align 8
+ %0 = load double, double* @xd, align 8
+ %1 = load double, double* @xxd, align 8
%lnot = fcmp uge double %0, %1
- %2 = load double* @yd, align 8
+ %2 = load double, double* @yd, align 8
%cmp1 = fcmp olt double %0, %2
%and2 = and i1 %lnot, %cmp1
%and = zext i1 %and2 to i32
@@ -321,10 +321,10 @@ entry:
define void @test_lesf2() nounwind {
entry:
;16hf-LABEL: test_lesf2:
- %0 = load float* @x, align 4
- %1 = load float* @xx, align 4
+ %0 = load float, float* @x, align 4
+ %1 = load float, float* @xx, align 4
%cmp = fcmp ole float %0, %1
- %2 = load float* @y, align 4
+ %2 = load float, float* @y, align 4
%cmp1 = fcmp ole float %0, %2
%and3 = and i1 %cmp, %cmp1
%and = zext i1 %and3 to i32
@@ -336,10 +336,10 @@ entry:
define void @test_ledf2() nounwind {
entry:
;16hf-LABEL: test_ledf2:
- %0 = load double* @xd, align 8
- %1 = load double* @xxd, align 8
+ %0 = load double, double* @xd, align 8
+ %1 = load double, double* @xxd, align 8
%cmp = fcmp ole double %0, %1
- %2 = load double* @yd, align 8
+ %2 = load double, double* @yd, align 8
%cmp1 = fcmp ole double %0, %2
%and3 = and i1 %cmp, %cmp1
%and = zext i1 %and3 to i32
@@ -351,10 +351,10 @@ entry:
define void @test_gtsf2() nounwind {
entry:
;16hf-LABEL: test_gtsf2:
- %0 = load float* @x, align 4
- %1 = load float* @xx, align 4
+ %0 = load float, float* @x, align 4
+ %1 = load float, float* @xx, align 4
%lnot = fcmp ule float %0, %1
- %2 = load float* @y, align 4
+ %2 = load float, float* @y, align 4
%cmp1 = fcmp ogt float %2, %0
%and2 = and i1 %lnot, %cmp1
%and = zext i1 %and2 to i32
@@ -366,10 +366,10 @@ entry:
define void @test_gtdf2() nounwind {
entry:
;16hf-LABEL: test_gtdf2:
- %0 = load double* @xd, align 8
- %1 = load double* @xxd, align 8
+ %0 = load double, double* @xd, align 8
+ %1 = load double, double* @xxd, align 8
%lnot = fcmp ule double %0, %1
- %2 = load double* @yd, align 8
+ %2 = load double, double* @yd, align 8
%cmp1 = fcmp ogt double %2, %0
%and2 = and i1 %lnot, %cmp1
%and = zext i1 %and2 to i32
diff --git a/test/CodeGen/Mips/mips64-f128-call.ll b/test/CodeGen/Mips/mips64-f128-call.ll
index 455e540e5df1..9a093e6f9825 100644
--- a/test/CodeGen/Mips/mips64-f128-call.ll
+++ b/test/CodeGen/Mips/mips64-f128-call.ll
@@ -19,7 +19,7 @@ entry:
define void @foo1() {
entry:
- %0 = load fp128* @gld0, align 16
+ %0 = load fp128, fp128* @gld0, align 16
tail call void @foo2(fp128 %0)
ret void
}
@@ -38,7 +38,7 @@ define fp128 @foo3() {
entry:
%call = tail call fp128 @foo4()
store fp128 %call, fp128* @gld0, align 16
- %0 = load fp128* @gld1, align 16
+ %0 = load fp128, fp128* @gld1, align 16
ret fp128 %0
}
diff --git a/test/CodeGen/Mips/mips64-f128.ll b/test/CodeGen/Mips/mips64-f128.ll
index f0cbbd08d79a..d9c93810438f 100644
--- a/test/CodeGen/Mips/mips64-f128.ll
+++ b/test/CodeGen/Mips/mips64-f128.ll
@@ -1,10 +1,10 @@
-; RUN: llc -mtriple=mips64el-unknown-unknown -mcpu=mips4 -soft-float -O1 \
+; RUN: llc -mtriple=mips64el-unknown-unknown -mcpu=mips4 -mattr=+soft-float -O1 \
; RUN: -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=ALL -check-prefix=C_CC_FMT
-; RUN: llc -mtriple=mips64el-unknown-unknown -mcpu=mips64 -soft-float -O1 \
+; RUN: llc -mtriple=mips64el-unknown-unknown -mcpu=mips64 -mattr=+soft-float -O1 \
; RUN: -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=ALL -check-prefix=C_CC_FMT
-; RUN: llc -mtriple=mips64el-unknown-unknown -mcpu=mips64r2 -soft-float -O1 \
+; RUN: llc -mtriple=mips64el-unknown-unknown -mcpu=mips64r2 -mattr=+soft-float -O1 \
; RUN: -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=ALL -check-prefix=C_CC_FMT
-; RUN: llc -mtriple=mips64el-unknown-unknown -mcpu=mips64r6 -soft-float -O1 \
+; RUN: llc -mtriple=mips64el-unknown-unknown -mcpu=mips64r6 -mattr=+soft-float -O1 \
; RUN: -disable-mips-delay-filler < %s | FileCheck %s -check-prefix=ALL -check-prefix=CMP_CC_FMT
@gld0 = external global fp128
@@ -18,8 +18,8 @@
define fp128 @addLD() {
entry:
- %0 = load fp128* @gld0, align 16
- %1 = load fp128* @gld1, align 16
+ %0 = load fp128, fp128* @gld0, align 16
+ %1 = load fp128, fp128* @gld1, align 16
%add = fadd fp128 %0, %1
ret fp128 %add
}
@@ -29,8 +29,8 @@ entry:
define fp128 @subLD() {
entry:
- %0 = load fp128* @gld0, align 16
- %1 = load fp128* @gld1, align 16
+ %0 = load fp128, fp128* @gld0, align 16
+ %1 = load fp128, fp128* @gld1, align 16
%sub = fsub fp128 %0, %1
ret fp128 %sub
}
@@ -40,8 +40,8 @@ entry:
define fp128 @mulLD() {
entry:
- %0 = load fp128* @gld0, align 16
- %1 = load fp128* @gld1, align 16
+ %0 = load fp128, fp128* @gld0, align 16
+ %1 = load fp128, fp128* @gld1, align 16
%mul = fmul fp128 %0, %1
ret fp128 %mul
}
@@ -51,8 +51,8 @@ entry:
define fp128 @divLD() {
entry:
- %0 = load fp128* @gld0, align 16
- %1 = load fp128* @gld1, align 16
+ %0 = load fp128, fp128* @gld0, align 16
+ %1 = load fp128, fp128* @gld1, align 16
%div = fdiv fp128 %0, %1
ret fp128 %div
}
@@ -247,7 +247,7 @@ entry:
define fp128 @libcall1_fabsl() {
entry:
- %0 = load fp128* @gld0, align 16
+ %0 = load fp128, fp128* @gld0, align 16
%call = tail call fp128 @fabsl(fp128 %0) nounwind readnone
ret fp128 %call
}
@@ -259,7 +259,7 @@ declare fp128 @fabsl(fp128) #1
define fp128 @libcall1_ceill() {
entry:
- %0 = load fp128* @gld0, align 16
+ %0 = load fp128, fp128* @gld0, align 16
%call = tail call fp128 @ceill(fp128 %0) nounwind readnone
ret fp128 %call
}
@@ -271,7 +271,7 @@ declare fp128 @ceill(fp128) #1
define fp128 @libcall1_sinl() {
entry:
- %0 = load fp128* @gld0, align 16
+ %0 = load fp128, fp128* @gld0, align 16
%call = tail call fp128 @sinl(fp128 %0) nounwind
ret fp128 %call
}
@@ -283,7 +283,7 @@ declare fp128 @sinl(fp128) #2
define fp128 @libcall1_cosl() {
entry:
- %0 = load fp128* @gld0, align 16
+ %0 = load fp128, fp128* @gld0, align 16
%call = tail call fp128 @cosl(fp128 %0) nounwind
ret fp128 %call
}
@@ -295,7 +295,7 @@ declare fp128 @cosl(fp128) #2
define fp128 @libcall1_expl() {
entry:
- %0 = load fp128* @gld0, align 16
+ %0 = load fp128, fp128* @gld0, align 16
%call = tail call fp128 @expl(fp128 %0) nounwind
ret fp128 %call
}
@@ -307,7 +307,7 @@ declare fp128 @expl(fp128) #2
define fp128 @libcall1_exp2l() {
entry:
- %0 = load fp128* @gld0, align 16
+ %0 = load fp128, fp128* @gld0, align 16
%call = tail call fp128 @exp2l(fp128 %0) nounwind
ret fp128 %call
}
@@ -319,7 +319,7 @@ declare fp128 @exp2l(fp128) #2
define fp128 @libcall1_logl() {
entry:
- %0 = load fp128* @gld0, align 16
+ %0 = load fp128, fp128* @gld0, align 16
%call = tail call fp128 @logl(fp128 %0) nounwind
ret fp128 %call
}
@@ -331,7 +331,7 @@ declare fp128 @logl(fp128) #2
define fp128 @libcall1_log2l() {
entry:
- %0 = load fp128* @gld0, align 16
+ %0 = load fp128, fp128* @gld0, align 16
%call = tail call fp128 @log2l(fp128 %0) nounwind
ret fp128 %call
}
@@ -343,7 +343,7 @@ declare fp128 @log2l(fp128) #2
define fp128 @libcall1_log10l() {
entry:
- %0 = load fp128* @gld0, align 16
+ %0 = load fp128, fp128* @gld0, align 16
%call = tail call fp128 @log10l(fp128 %0) nounwind
ret fp128 %call
}
@@ -355,7 +355,7 @@ declare fp128 @log10l(fp128) #2
define fp128 @libcall1_nearbyintl() {
entry:
- %0 = load fp128* @gld0, align 16
+ %0 = load fp128, fp128* @gld0, align 16
%call = tail call fp128 @nearbyintl(fp128 %0) nounwind readnone
ret fp128 %call
}
@@ -367,7 +367,7 @@ declare fp128 @nearbyintl(fp128) #1
define fp128 @libcall1_floorl() {
entry:
- %0 = load fp128* @gld0, align 16
+ %0 = load fp128, fp128* @gld0, align 16
%call = tail call fp128 @floorl(fp128 %0) nounwind readnone
ret fp128 %call
}
@@ -379,7 +379,7 @@ declare fp128 @floorl(fp128) #1
define fp128 @libcall1_sqrtl() {
entry:
- %0 = load fp128* @gld0, align 16
+ %0 = load fp128, fp128* @gld0, align 16
%call = tail call fp128 @sqrtl(fp128 %0) nounwind
ret fp128 %call
}
@@ -391,7 +391,7 @@ declare fp128 @sqrtl(fp128) #2
define fp128 @libcall1_rintl() {
entry:
- %0 = load fp128* @gld0, align 16
+ %0 = load fp128, fp128* @gld0, align 16
%call = tail call fp128 @rintl(fp128 %0) nounwind readnone
ret fp128 %call
}
@@ -424,8 +424,8 @@ declare fp128 @llvm.powi.f128(fp128, i32) #3
define fp128 @libcall2_copysignl() {
entry:
- %0 = load fp128* @gld0, align 16
- %1 = load fp128* @gld1, align 16
+ %0 = load fp128, fp128* @gld0, align 16
+ %1 = load fp128, fp128* @gld1, align 16
%call = tail call fp128 @copysignl(fp128 %0, fp128 %1) nounwind readnone
ret fp128 %call
}
@@ -437,8 +437,8 @@ declare fp128 @copysignl(fp128, fp128) #1
define fp128 @libcall2_powl() {
entry:
- %0 = load fp128* @gld0, align 16
- %1 = load fp128* @gld1, align 16
+ %0 = load fp128, fp128* @gld0, align 16
+ %1 = load fp128, fp128* @gld1, align 16
%call = tail call fp128 @powl(fp128 %0, fp128 %1) nounwind
ret fp128 %call
}
@@ -450,8 +450,8 @@ declare fp128 @powl(fp128, fp128) #2
define fp128 @libcall2_fmodl() {
entry:
- %0 = load fp128* @gld0, align 16
- %1 = load fp128* @gld1, align 16
+ %0 = load fp128, fp128* @gld0, align 16
+ %1 = load fp128, fp128* @gld1, align 16
%call = tail call fp128 @fmodl(fp128 %0, fp128 %1) nounwind
ret fp128 %call
}
@@ -463,9 +463,9 @@ declare fp128 @fmodl(fp128, fp128) #2
define fp128 @libcall3_fmal() {
entry:
- %0 = load fp128* @gld0, align 16
- %1 = load fp128* @gld2, align 16
- %2 = load fp128* @gld1, align 16
+ %0 = load fp128, fp128* @gld0, align 16
+ %1 = load fp128, fp128* @gld2, align 16
+ %2 = load fp128, fp128* @gld1, align 16
%3 = tail call fp128 @llvm.fma.f128(fp128 %0, fp128 %2, fp128 %1)
ret fp128 %3
}
@@ -539,7 +539,7 @@ entry:
define fp128 @load_LD_LD() {
entry:
- %0 = load fp128* @gld1, align 16
+ %0 = load fp128, fp128* @gld1, align 16
ret fp128 %0
}
@@ -551,7 +551,7 @@ entry:
define fp128 @load_LD_float() {
entry:
- %0 = load float* @gf1, align 4
+ %0 = load float, float* @gf1, align 4
%conv = fpext float %0 to fp128
ret fp128 %conv
}
@@ -564,7 +564,7 @@ entry:
define fp128 @load_LD_double() {
entry:
- %0 = load double* @gd1, align 8
+ %0 = load double, double* @gd1, align 8
%conv = fpext double %0 to fp128
ret fp128 %conv
}
@@ -579,7 +579,7 @@ entry:
define void @store_LD_LD() {
entry:
- %0 = load fp128* @gld1, align 16
+ %0 = load fp128, fp128* @gld1, align 16
store fp128 %0, fp128* @gld0, align 16
ret void
}
@@ -595,7 +595,7 @@ entry:
define void @store_LD_float() {
entry:
- %0 = load fp128* @gld1, align 16
+ %0 = load fp128, fp128* @gld1, align 16
%conv = fptrunc fp128 %0 to float
store float %conv, float* @gf1, align 4
ret void
@@ -612,7 +612,7 @@ entry:
define void @store_LD_double() {
entry:
- %0 = load fp128* @gld1, align 16
+ %0 = load fp128, fp128* @gld1, align 16
%conv = fptrunc fp128 %0 to double
store double %conv, double* @gd1, align 8
ret void
diff --git a/test/CodeGen/Mips/mips64-libcall.ll b/test/CodeGen/Mips/mips64-libcall.ll
index 290baafd18b9..8512e9fcb72e 100644
--- a/test/CodeGen/Mips/mips64-libcall.ll
+++ b/test/CodeGen/Mips/mips64-libcall.ll
@@ -1,6 +1,6 @@
; RUN: llc -march=mips64el -mcpu=mips64r2 -O3 < %s |\
; RUN: FileCheck %s -check-prefix=HARD
-; RUN: llc -march=mips64el -mcpu=mips64r2 -soft-float < %s |\
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=+soft-float < %s |\
; RUN: FileCheck %s -check-prefix=SOFT
; Check that %add is not passed in an integer register.
diff --git a/test/CodeGen/Mips/mips64-sret.ll b/test/CodeGen/Mips/mips64-sret.ll
index ed494e965b7d..0559747f62cc 100644
--- a/test/CodeGen/Mips/mips64-sret.ll
+++ b/test/CodeGen/Mips/mips64-sret.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 -target-abi=n64 < %s | FileCheck %s
define void @foo(i32* noalias sret %agg.result) nounwind {
entry:
diff --git a/test/CodeGen/Mips/mips64directive.ll b/test/CodeGen/Mips/mips64directive.ll
index 3d95f519bc64..b1052f77f5a8 100644
--- a/test/CodeGen/Mips/mips64directive.ll
+++ b/test/CodeGen/Mips/mips64directive.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -march=mips64el -mcpu=mips4 -mattr=n64 | FileCheck %s
-; RUN: llc < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s
+; RUN: llc < %s -march=mips64el -mcpu=mips4 -target-abi=n64 | FileCheck %s
+; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi=n64 | FileCheck %s
@gl = global i64 1250999896321, align 8
; CHECK: 8byte
define i64 @foo1() nounwind readonly {
entry:
- %0 = load i64* @gl, align 8
+ %0 = load i64, i64* @gl, align 8
ret i64 %0
}
diff --git a/test/CodeGen/Mips/mips64ext.ll b/test/CodeGen/Mips/mips64ext.ll
index 22ea0eb7769c..9c1243b8f18c 100644
--- a/test/CodeGen/Mips/mips64ext.ll
+++ b/test/CodeGen/Mips/mips64ext.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=mips64el -mcpu=mips4 -mattr=n64 | FileCheck %s
-; RUN: llc < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s
+; RUN: llc < %s -march=mips64el -mcpu=mips4 -target-abi=n64 | FileCheck %s
+; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi=n64 | FileCheck %s
define i64 @zext64_32(i32 %a) nounwind readnone {
entry:
diff --git a/test/CodeGen/Mips/mips64extins.ll b/test/CodeGen/Mips/mips64extins.ll
index 14f92ca86947..211cd5f8e7fd 100644
--- a/test/CodeGen/Mips/mips64extins.ll
+++ b/test/CodeGen/Mips/mips64extins.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -target-abi=n64 | FileCheck %s
define i64 @dext(i64 %i) nounwind readnone {
entry:
diff --git a/test/CodeGen/Mips/mips64fpimm0.ll b/test/CodeGen/Mips/mips64fpimm0.ll
index 19e076d1ecda..0296cb523f98 100644
--- a/test/CodeGen/Mips/mips64fpimm0.ll
+++ b/test/CodeGen/Mips/mips64fpimm0.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=mips64el -mcpu=mips4 -mattr=n64 | FileCheck %s
-; RUN: llc < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s
+; RUN: llc < %s -march=mips64el -mcpu=mips4 -target-abi=n64 | FileCheck %s
+; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi=n64 | FileCheck %s
define double @foo1() nounwind readnone {
entry:
diff --git a/test/CodeGen/Mips/mips64fpldst.ll b/test/CodeGen/Mips/mips64fpldst.ll
index 2f42270b645d..55d5c775cbba 100644
--- a/test/CodeGen/Mips/mips64fpldst.ll
+++ b/test/CodeGen/Mips/mips64fpldst.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=mips64el -mcpu=mips4 -mattr=-n64,n64 | FileCheck %s -check-prefix=CHECK-N64
-; RUN: llc < %s -march=mips64el -mcpu=mips4 -mattr=-n64,n32 | FileCheck %s -check-prefix=CHECK-N32
-; RUN: llc < %s -march=mips64el -mcpu=mips64 -mattr=-n64,n64 | FileCheck %s -check-prefix=CHECK-N64
-; RUN: llc < %s -march=mips64el -mcpu=mips64 -mattr=-n64,n32 | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc < %s -march=mips64el -mcpu=mips4 -target-abi n64 | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc < %s -march=mips64el -mcpu=mips4 -target-abi n32 | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi n64 | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi n32 | FileCheck %s -check-prefix=CHECK-N32
@f0 = common global float 0.000000e+00, align 4
@d0 = common global double 0.000000e+00, align 8
@@ -16,7 +16,7 @@ entry:
; CHECK-N32: funcfl1
; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(f0)
; CHECK-N32: lwc1 $f{{[0-9]+}}, 0($[[R0]])
- %0 = load float* @f0, align 4
+ %0 = load float, float* @f0, align 4
ret float %0
}
@@ -28,7 +28,7 @@ entry:
; CHECK-N32: funcfl2
; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(d0)
; CHECK-N32: ldc1 $f{{[0-9]+}}, 0($[[R0]])
- %0 = load double* @d0, align 8
+ %0 = load double, double* @d0, align 8
ret double %0
}
@@ -40,7 +40,7 @@ entry:
; CHECK-N32: funcfs1
; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(f0)
; CHECK-N32: swc1 $f{{[0-9]+}}, 0($[[R0]])
- %0 = load float* @f1, align 4
+ %0 = load float, float* @f1, align 4
store float %0, float* @f0, align 4
ret void
}
@@ -53,7 +53,7 @@ entry:
; CHECK-N32: funcfs2
; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(d0)
; CHECK-N32: sdc1 $f{{[0-9]+}}, 0($[[R0]])
- %0 = load double* @d1, align 8
+ %0 = load double, double* @d1, align 8
store double %0, double* @d0, align 8
ret void
}
diff --git a/test/CodeGen/Mips/mips64instrs.ll b/test/CodeGen/Mips/mips64instrs.ll
index ed617be6532e..d64cdceb6b81 100644
--- a/test/CodeGen/Mips/mips64instrs.ll
+++ b/test/CodeGen/Mips/mips64instrs.ll
@@ -123,8 +123,8 @@ entry:
; GPRMULDIV: ddiv $2, $[[T0]], $[[T1]]
; GPRMULDIV: teq $[[T1]], $zero, 7
- %0 = load i64* @gll0, align 8
- %1 = load i64* @gll1, align 8
+ %0 = load i64, i64* @gll0, align 8
+ %1 = load i64, i64* @gll1, align 8
%div = sdiv i64 %0, %1
ret i64 %div
}
@@ -144,8 +144,8 @@ entry:
; GPRMULDIV: ddivu $2, $[[T0]], $[[T1]]
; GPRMULDIV: teq $[[T1]], $zero, 7
- %0 = load i64* @gll0, align 8
- %1 = load i64* @gll1, align 8
+ %0 = load i64, i64* @gll0, align 8
+ %1 = load i64, i64* @gll1, align 8
%div = udiv i64 %0, %1
ret i64 %div
}
diff --git a/test/CodeGen/Mips/mips64intldst.ll b/test/CodeGen/Mips/mips64intldst.ll
index c3607baeefeb..658ab88481c4 100644
--- a/test/CodeGen/Mips/mips64intldst.ll
+++ b/test/CodeGen/Mips/mips64intldst.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=mips64el -mcpu=mips4 -mattr=-n64,n64 | FileCheck %s -check-prefix=CHECK-N64
-; RUN: llc < %s -march=mips64el -mcpu=mips4 -mattr=-n64,n32 | FileCheck %s -check-prefix=CHECK-N32
-; RUN: llc < %s -march=mips64el -mcpu=mips64 -mattr=-n64,n64 | FileCheck %s -check-prefix=CHECK-N64
-; RUN: llc < %s -march=mips64el -mcpu=mips64 -mattr=-n64,n32 | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc < %s -march=mips64el -mcpu=mips4 -target-abi n64 | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc < %s -march=mips64el -mcpu=mips4 -target-abi n32 | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi n64 | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc < %s -march=mips64el -mcpu=mips64 -target-abi n32 | FileCheck %s -check-prefix=CHECK-N32
@c = common global i8 0, align 4
@s = common global i16 0, align 4
@@ -20,7 +20,7 @@ entry:
; CHECK-N32: func1
; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(c)
; CHECK-N32: lb ${{[0-9]+}}, 0($[[R0]])
- %0 = load i8* @c, align 4
+ %0 = load i8, i8* @c, align 4
%conv = sext i8 %0 to i64
ret i64 %conv
}
@@ -33,7 +33,7 @@ entry:
; CHECK-N32: func2
; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(s)
; CHECK-N32: lh ${{[0-9]+}}, 0($[[R0]])
- %0 = load i16* @s, align 4
+ %0 = load i16, i16* @s, align 4
%conv = sext i16 %0 to i64
ret i64 %conv
}
@@ -46,7 +46,7 @@ entry:
; CHECK-N32: func3
; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(i)
; CHECK-N32: lw ${{[0-9]+}}, 0($[[R0]])
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
%conv = sext i32 %0 to i64
ret i64 %conv
}
@@ -59,7 +59,7 @@ entry:
; CHECK-N32: func4
; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(l)
; CHECK-N32: ld ${{[0-9]+}}, 0($[[R0]])
- %0 = load i64* @l, align 8
+ %0 = load i64, i64* @l, align 8
ret i64 %0
}
@@ -71,7 +71,7 @@ entry:
; CHECK-N32: ufunc1
; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(uc)
; CHECK-N32: lbu ${{[0-9]+}}, 0($[[R0]])
- %0 = load i8* @uc, align 4
+ %0 = load i8, i8* @uc, align 4
%conv = zext i8 %0 to i64
ret i64 %conv
}
@@ -84,7 +84,7 @@ entry:
; CHECK-N32: ufunc2
; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(us)
; CHECK-N32: lhu ${{[0-9]+}}, 0($[[R0]])
- %0 = load i16* @us, align 4
+ %0 = load i16, i16* @us, align 4
%conv = zext i16 %0 to i64
ret i64 %conv
}
@@ -97,7 +97,7 @@ entry:
; CHECK-N32: ufunc3
; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(ui)
; CHECK-N32: lwu ${{[0-9]+}}, 0($[[R0]])
- %0 = load i32* @ui, align 4
+ %0 = load i32, i32* @ui, align 4
%conv = zext i32 %0 to i64
ret i64 %conv
}
@@ -110,7 +110,7 @@ entry:
; CHECK-N32: sfunc1
; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(c)
; CHECK-N32: sb ${{[0-9]+}}, 0($[[R0]])
- %0 = load i64* @l1, align 8
+ %0 = load i64, i64* @l1, align 8
%conv = trunc i64 %0 to i8
store i8 %conv, i8* @c, align 4
ret void
@@ -124,7 +124,7 @@ entry:
; CHECK-N32: sfunc2
; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(s)
; CHECK-N32: sh ${{[0-9]+}}, 0($[[R0]])
- %0 = load i64* @l1, align 8
+ %0 = load i64, i64* @l1, align 8
%conv = trunc i64 %0 to i16
store i16 %conv, i16* @s, align 4
ret void
@@ -138,7 +138,7 @@ entry:
; CHECK-N32: sfunc3
; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(i)
; CHECK-N32: sw ${{[0-9]+}}, 0($[[R0]])
- %0 = load i64* @l1, align 8
+ %0 = load i64, i64* @l1, align 8
%conv = trunc i64 %0 to i32
store i32 %conv, i32* @i, align 4
ret void
@@ -152,7 +152,7 @@ entry:
; CHECK-N32: sfunc4
; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(l)
; CHECK-N32: sd ${{[0-9]+}}, 0($[[R0]])
- %0 = load i64* @l1, align 8
+ %0 = load i64, i64* @l1, align 8
store i64 %0, i64* @l, align 8
ret void
}
diff --git a/test/CodeGen/Mips/mips64shift.ll b/test/CodeGen/Mips/mips64shift.ll
index 45d1c9532276..52c6f9066392 100644
--- a/test/CodeGen/Mips/mips64shift.ll
+++ b/test/CodeGen/Mips/mips64shift.ll
@@ -65,7 +65,8 @@ entry:
define i64 @f9(i64 %a0, i64 %a1) nounwind readnone {
entry:
-; CHECK: drotrv
+; CHECK-NOT: sll
+; CHECK: drotrv
%shr = lshr i64 %a0, %a1
%sub = sub i64 64, %a1
%shl = shl i64 %a0, %sub
@@ -75,7 +76,8 @@ entry:
define i64 @f10(i64 %a0, i64 %a1) nounwind readnone {
entry:
-; CHECK: drotrv
+; CHECK-NOT: sll
+; CHECK: drotrv
%shl = shl i64 %a0, %a1
%sub = sub i64 64, %a1
%shr = lshr i64 %a0, %sub
diff --git a/test/CodeGen/Mips/mips64signextendsesf.ll b/test/CodeGen/Mips/mips64signextendsesf.ll
index dec83b80afea..d0ce1b86bf56 100644
--- a/test/CodeGen/Mips/mips64signextendsesf.ll
+++ b/test/CodeGen/Mips/mips64signextendsesf.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=mips64 -mcpu=mips64r2 -soft-float -O2 < %s | FileCheck %s
+; RUN: llc -march=mips64 -mcpu=mips64r2 -mattr=+soft-float -O2 < %s | FileCheck %s
define void @foosf() #0 {
entry:
%in = alloca float, align 4
%out = alloca float, align 4
store volatile float 0xBFD59E1380000000, float* %in, align 4
- %in.0.in.0. = load volatile float* %in, align 4
+ %in.0.in.0. = load volatile float, float* %in, align 4
%rintf = tail call float @rintf(float %in.0.in.0.) #1
store volatile float %rintf, float* %out, align 4
ret void
@@ -20,7 +20,7 @@ declare float @rintf(float)
define float @foosf1(float* nocapture readonly %a) #0 {
entry:
- %0 = load float* %a, align 4
+ %0 = load float, float* %a, align 4
%call = tail call float @roundf(float %0) #2
ret float %call
@@ -34,7 +34,7 @@ declare float @roundf(float) #1
define float @foosf2(float* nocapture readonly %a) #0 {
entry:
- %0 = load float* %a, align 4
+ %0 = load float, float* %a, align 4
%call = tail call float @truncf(float %0) #2
ret float %call
@@ -48,7 +48,7 @@ declare float @truncf(float) #1
define float @foosf3(float* nocapture readonly %a) #0 {
entry:
- %0 = load float* %a, align 4
+ %0 = load float, float* %a, align 4
%call = tail call float @floorf(float %0) #2
ret float %call
@@ -62,7 +62,7 @@ declare float @floorf(float) #1
define float @foosf4(float* nocapture readonly %a) #0 {
entry:
- %0 = load float* %a, align 4
+ %0 = load float, float* %a, align 4
%call = tail call float @nearbyintf(float %0) #2
ret float %call
@@ -76,7 +76,7 @@ declare float @nearbyintf(float) #1
define float @foosf5(float* nocapture readonly %a) #0 {
entry:
- %0 = load float* %a, align 4
+ %0 = load float, float* %a, align 4
%mul = fmul float %0, undef
ret float %mul
@@ -88,7 +88,7 @@ entry:
define float @foosf6(float* nocapture readonly %a) #0 {
entry:
- %0 = load float* %a, align 4
+ %0 = load float, float* %a, align 4
%sub = fsub float %0, undef
ret float %sub
@@ -100,7 +100,7 @@ entry:
define float @foosf7(float* nocapture readonly %a) #0 {
entry:
- %0 = load float* %a, align 4
+ %0 = load float, float* %a, align 4
%add = fadd float %0, undef
ret float %add
@@ -113,8 +113,8 @@ entry:
define float @foosf8(float* nocapture readonly %a) #0 {
entry:
%b = alloca float, align 4
- %b.0.b.0. = load volatile float* %b, align 4
- %0 = load float* %a, align 4
+ %b.0.b.0. = load volatile float, float* %b, align 4
+ %0 = load float, float* %a, align 4
%div = fdiv float %b.0.b.0., %0
ret float %div
@@ -127,9 +127,9 @@ entry:
define float @foosf9() #0 {
entry:
%b = alloca float, align 4
- %b.0.b.0. = load volatile float* %b, align 4
+ %b.0.b.0. = load volatile float, float* %b, align 4
%conv = fpext float %b.0.b.0. to double
- %b.0.b.0.3 = load volatile float* %b, align 4
+ %b.0.b.0.3 = load volatile float, float* %b, align 4
%conv1 = fpext float %b.0.b.0.3 to double
%call = tail call double @pow(double %conv, double %conv1) #1
%conv2 = fptrunc double %call to float
@@ -146,7 +146,7 @@ declare double @pow(double, double) #0
define float @foosf10() #0 {
entry:
%a = alloca float, align 4
- %a.0.a.0. = load volatile float* %a, align 4
+ %a.0.a.0. = load volatile float, float* %a, align 4
%conv = fpext float %a.0.a.0. to double
%call = tail call double @sin(double %conv) #1
%conv1 = fptrunc double %call to float
@@ -163,7 +163,7 @@ declare double @sin(double) #0
define float @foosf11() #0 {
entry:
%b = alloca float, align 4
- %b.0.b.0. = load volatile float* %b, align 4
+ %b.0.b.0. = load volatile float, float* %b, align 4
%call = tail call float @ceilf(float %b.0.b.0.) #2
ret float %call
@@ -179,8 +179,8 @@ define float @foosf12() #0 {
entry:
%b = alloca float, align 4
%a = alloca float, align 4
- %b.0.b.0. = load volatile float* %b, align 4
- %a.0.a.0. = load volatile float* %a, align 4
+ %b.0.b.0. = load volatile float, float* %b, align 4
+ %a.0.a.0. = load volatile float, float* %a, align 4
%call = tail call float @fmaxf(float %b.0.b.0., float %a.0.a.0.) #2
ret float %call
@@ -196,8 +196,8 @@ define float @foosf13() #0 {
entry:
%b = alloca float, align 4
%a = alloca float, align 4
- %b.0.b.0. = load volatile float* %b, align 4
- %a.0.a.0. = load volatile float* %a, align 4
+ %b.0.b.0. = load volatile float, float* %b, align 4
+ %a.0.a.0. = load volatile float, float* %a, align 4
%call = tail call float @fminf(float %b.0.b.0., float %a.0.a.0.) #2
ret float %call
@@ -211,4 +211,4 @@ declare float @fminf(float, float) #1
attributes #0 = { nounwind "use-soft-float"="true" }
-attributes #1 = { nounwind readnone "use-soft-float"="true" }
+attributes #1 = { nounwind readnone "use-soft-float"="true" } \ No newline at end of file
diff --git a/test/CodeGen/Mips/mips64sinttofpsf.ll b/test/CodeGen/Mips/mips64sinttofpsf.ll
index d3d46036f7da..87f803ecf552 100644
--- a/test/CodeGen/Mips/mips64sinttofpsf.ll
+++ b/test/CodeGen/Mips/mips64sinttofpsf.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=mips64 -mcpu=mips64r2 -soft-float -O0 < %s | FileCheck %s
+; RUN: llc -march=mips64 -mcpu=mips64r2 -mattr=+soft-float -O0 < %s | FileCheck %s
define double @foo() #0 {
entry:
%x = alloca i32, align 4
store volatile i32 -32, i32* %x, align 4
- %0 = load volatile i32* %x, align 4
+ %0 = load volatile i32, i32* %x, align 4
%conv = sitofp i32 %0 to double
ret double %conv
diff --git a/test/CodeGen/Mips/mipslopat.ll b/test/CodeGen/Mips/mipslopat.ll
index 1f433b9870ce..63b68c1762b2 100644
--- a/test/CodeGen/Mips/mipslopat.ll
+++ b/test/CodeGen/Mips/mipslopat.ll
@@ -6,10 +6,10 @@
define void @simple_vol_file() nounwind {
entry:
- %tmp = load volatile i32** @stat_vol_ptr_int, align 4
+ %tmp = load volatile i32*, i32** @stat_vol_ptr_int, align 4
%0 = bitcast i32* %tmp to i8*
call void @llvm.prefetch(i8* %0, i32 0, i32 0, i32 1)
- %tmp1 = load i32** @stat_ptr_vol_int, align 4
+ %tmp1 = load i32*, i32** @stat_ptr_vol_int, align 4
%1 = bitcast i32* %tmp1 to i8*
call void @llvm.prefetch(i8* %1, i32 0, i32 0, i32 1)
ret void
diff --git a/test/CodeGen/Mips/misha.ll b/test/CodeGen/Mips/misha.ll
index 65d3b7b5d874..23ad7f6057af 100644
--- a/test/CodeGen/Mips/misha.ll
+++ b/test/CodeGen/Mips/misha.ll
@@ -8,15 +8,15 @@ entry:
br i1 %cmp8, label %for.end, label %for.body.lr.ph
for.body.lr.ph: ; preds = %entry
- %.pre = load i8* %to, align 1
+ %.pre = load i8, i8* %to, align 1
br label %for.body
for.body: ; preds = %for.body.lr.ph, %for.body
%1 = phi i8 [ %.pre, %for.body.lr.ph ], [ %conv4, %for.body ]
%i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
%from.addr.09 = phi i8* [ %from, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
- %incdec.ptr = getelementptr inbounds i8* %from.addr.09, i32 1
- %2 = load i8* %from.addr.09, align 1
+ %incdec.ptr = getelementptr inbounds i8, i8* %from.addr.09, i32 1
+ %2 = load i8, i8* %from.addr.09, align 1
%conv27 = zext i8 %2 to i32
%conv36 = zext i8 %1 to i32
%add = add nsw i32 %conv36, %conv27
@@ -44,15 +44,15 @@ entry:
br i1 %cmp8, label %for.end, label %for.body.lr.ph
for.body.lr.ph: ; preds = %entry
- %.pre = load i16* %to, align 2
+ %.pre = load i16, i16* %to, align 2
br label %for.body
for.body: ; preds = %for.body.lr.ph, %for.body
%1 = phi i16 [ %.pre, %for.body.lr.ph ], [ %conv4, %for.body ]
%i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
%from.addr.09 = phi i16* [ %from, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
- %incdec.ptr = getelementptr inbounds i16* %from.addr.09, i32 1
- %2 = load i16* %from.addr.09, align 2
+ %incdec.ptr = getelementptr inbounds i16, i16* %from.addr.09, i32 1
+ %2 = load i16, i16* %from.addr.09, align 2
%conv27 = zext i16 %2 to i32
%conv36 = zext i16 %1 to i32
%add = add nsw i32 %conv36, %conv27
diff --git a/test/CodeGen/Mips/mno-ldc1-sdc1.ll b/test/CodeGen/Mips/mno-ldc1-sdc1.ll
index db653eadf2f7..c7eda3320bc6 100644
--- a/test/CodeGen/Mips/mno-ldc1-sdc1.ll
+++ b/test/CodeGen/Mips/mno-ldc1-sdc1.ll
@@ -111,7 +111,7 @@
define double @test_ldc1() {
entry:
- %0 = load double* @g0, align 8
+ %0 = load double, double* @g0, align 8
ret double %0
}
@@ -212,8 +212,8 @@ entry:
define double @test_ldxc1(double* nocapture readonly %a, i32 %i) {
entry:
- %arrayidx = getelementptr inbounds double* %a, i32 %i
- %0 = load double* %arrayidx, align 8
+ %arrayidx = getelementptr inbounds double, double* %a, i32 %i
+ %0 = load double, double* %arrayidx, align 8
ret double %0
}
@@ -243,7 +243,7 @@ entry:
define void @test_sdxc1(double %b, double* nocapture %a, i32 %i) {
entry:
- %arrayidx = getelementptr inbounds double* %a, i32 %i
+ %arrayidx = getelementptr inbounds double, double* %a, i32 %i
store double %b, double* %arrayidx, align 8
ret void
}
diff --git a/test/CodeGen/Mips/msa/2r.ll b/test/CodeGen/Mips/msa/2r.ll
index da35ad82cad1..501936c76e73 100644
--- a/test/CodeGen/Mips/msa/2r.ll
+++ b/test/CodeGen/Mips/msa/2r.ll
@@ -8,7 +8,7 @@
define void @llvm_mips_nloc_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_nloc_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_nloc_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.nloc.b(<16 x i8> %0)
store <16 x i8> %1, <16 x i8>* @llvm_mips_nloc_b_RES
ret void
@@ -29,7 +29,7 @@ declare <16 x i8> @llvm.mips.nloc.b(<16 x i8>) nounwind
define void @llvm_mips_nloc_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_nloc_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_nloc_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.nloc.h(<8 x i16> %0)
store <8 x i16> %1, <8 x i16>* @llvm_mips_nloc_h_RES
ret void
@@ -50,7 +50,7 @@ declare <8 x i16> @llvm.mips.nloc.h(<8 x i16>) nounwind
define void @llvm_mips_nloc_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_nloc_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_nloc_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.nloc.w(<4 x i32> %0)
store <4 x i32> %1, <4 x i32>* @llvm_mips_nloc_w_RES
ret void
@@ -71,7 +71,7 @@ declare <4 x i32> @llvm.mips.nloc.w(<4 x i32>) nounwind
define void @llvm_mips_nloc_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_nloc_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_nloc_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.nloc.d(<2 x i64> %0)
store <2 x i64> %1, <2 x i64>* @llvm_mips_nloc_d_RES
ret void
@@ -92,7 +92,7 @@ declare <2 x i64> @llvm.mips.nloc.d(<2 x i64>) nounwind
define void @llvm_mips_nlzc_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_nlzc_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_nlzc_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.nlzc.b(<16 x i8> %0)
store <16 x i8> %1, <16 x i8>* @llvm_mips_nlzc_b_RES
ret void
@@ -113,7 +113,7 @@ declare <16 x i8> @llvm.mips.nlzc.b(<16 x i8>) nounwind
define void @llvm_mips_nlzc_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_nlzc_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_nlzc_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.nlzc.h(<8 x i16> %0)
store <8 x i16> %1, <8 x i16>* @llvm_mips_nlzc_h_RES
ret void
@@ -134,7 +134,7 @@ declare <8 x i16> @llvm.mips.nlzc.h(<8 x i16>) nounwind
define void @llvm_mips_nlzc_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_nlzc_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_nlzc_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.nlzc.w(<4 x i32> %0)
store <4 x i32> %1, <4 x i32>* @llvm_mips_nlzc_w_RES
ret void
@@ -155,7 +155,7 @@ declare <4 x i32> @llvm.mips.nlzc.w(<4 x i32>) nounwind
define void @llvm_mips_nlzc_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_nlzc_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_nlzc_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.nlzc.d(<2 x i64> %0)
store <2 x i64> %1, <2 x i64>* @llvm_mips_nlzc_d_RES
ret void
@@ -176,7 +176,7 @@ declare <2 x i64> @llvm.mips.nlzc.d(<2 x i64>) nounwind
define void @llvm_mips_pcnt_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_pcnt_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_pcnt_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.pcnt.b(<16 x i8> %0)
store <16 x i8> %1, <16 x i8>* @llvm_mips_pcnt_b_RES
ret void
@@ -197,7 +197,7 @@ declare <16 x i8> @llvm.mips.pcnt.b(<16 x i8>) nounwind
define void @llvm_mips_pcnt_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_pcnt_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_pcnt_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.pcnt.h(<8 x i16> %0)
store <8 x i16> %1, <8 x i16>* @llvm_mips_pcnt_h_RES
ret void
@@ -218,7 +218,7 @@ declare <8 x i16> @llvm.mips.pcnt.h(<8 x i16>) nounwind
define void @llvm_mips_pcnt_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_pcnt_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_pcnt_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.pcnt.w(<4 x i32> %0)
store <4 x i32> %1, <4 x i32>* @llvm_mips_pcnt_w_RES
ret void
@@ -239,7 +239,7 @@ declare <4 x i32> @llvm.mips.pcnt.w(<4 x i32>) nounwind
define void @llvm_mips_pcnt_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_pcnt_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_pcnt_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.pcnt.d(<2 x i64> %0)
store <2 x i64> %1, <2 x i64>* @llvm_mips_pcnt_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/2r_vector_scalar.ll b/test/CodeGen/Mips/msa/2r_vector_scalar.ll
index 64e459e4d9a9..ddcd3cf757d9 100644
--- a/test/CodeGen/Mips/msa/2r_vector_scalar.ll
+++ b/test/CodeGen/Mips/msa/2r_vector_scalar.ll
@@ -15,7 +15,7 @@
define void @llvm_mips_fill_b_test() nounwind {
entry:
- %0 = load i32* @llvm_mips_fill_b_ARG1
+ %0 = load i32, i32* @llvm_mips_fill_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.fill.b(i32 %0)
store <16 x i8> %1, <16 x i8>* @llvm_mips_fill_b_RES
ret void
@@ -35,7 +35,7 @@ declare <16 x i8> @llvm.mips.fill.b(i32) nounwind
define void @llvm_mips_fill_h_test() nounwind {
entry:
- %0 = load i32* @llvm_mips_fill_h_ARG1
+ %0 = load i32, i32* @llvm_mips_fill_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.fill.h(i32 %0)
store <8 x i16> %1, <8 x i16>* @llvm_mips_fill_h_RES
ret void
@@ -55,7 +55,7 @@ declare <8 x i16> @llvm.mips.fill.h(i32) nounwind
define void @llvm_mips_fill_w_test() nounwind {
entry:
- %0 = load i32* @llvm_mips_fill_w_ARG1
+ %0 = load i32, i32* @llvm_mips_fill_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.fill.w(i32 %0)
store <4 x i32> %1, <4 x i32>* @llvm_mips_fill_w_RES
ret void
@@ -75,7 +75,7 @@ declare <4 x i32> @llvm.mips.fill.w(i32) nounwind
define void @llvm_mips_fill_d_test() nounwind {
entry:
- %0 = load i64* @llvm_mips_fill_d_ARG1
+ %0 = load i64, i64* @llvm_mips_fill_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.fill.d(i64 %0)
store <2 x i64> %1, <2 x i64>* @llvm_mips_fill_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/2rf.ll b/test/CodeGen/Mips/msa/2rf.ll
index b361ef5eae21..1dbfbda1b612 100644
--- a/test/CodeGen/Mips/msa/2rf.ll
+++ b/test/CodeGen/Mips/msa/2rf.ll
@@ -8,7 +8,7 @@
define void @llvm_mips_flog2_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_flog2_w_ARG1
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_flog2_w_ARG1
%1 = tail call <4 x float> @llvm.mips.flog2.w(<4 x float> %0)
store <4 x float> %1, <4 x float>* @llvm_mips_flog2_w_RES
ret void
@@ -29,7 +29,7 @@ declare <4 x float> @llvm.mips.flog2.w(<4 x float>) nounwind
define void @llvm_mips_flog2_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_flog2_d_ARG1
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_flog2_d_ARG1
%1 = tail call <2 x double> @llvm.mips.flog2.d(<2 x double> %0)
store <2 x double> %1, <2 x double>* @llvm_mips_flog2_d_RES
ret void
@@ -47,7 +47,7 @@ declare <2 x double> @llvm.mips.flog2.d(<2 x double>) nounwind
define void @flog2_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_flog2_w_ARG1
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_flog2_w_ARG1
%1 = tail call <4 x float> @llvm.log2.v4f32(<4 x float> %0)
store <4 x float> %1, <4 x float>* @llvm_mips_flog2_w_RES
ret void
@@ -65,7 +65,7 @@ declare <4 x float> @llvm.log2.v4f32(<4 x float> %val)
define void @flog2_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_flog2_d_ARG1
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_flog2_d_ARG1
%1 = tail call <2 x double> @llvm.log2.v2f64(<2 x double> %0)
store <2 x double> %1, <2 x double>* @llvm_mips_flog2_d_RES
ret void
@@ -86,7 +86,7 @@ declare <2 x double> @llvm.log2.v2f64(<2 x double> %val)
define void @llvm_mips_frint_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_frint_w_ARG1
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_frint_w_ARG1
%1 = tail call <4 x float> @llvm.mips.frint.w(<4 x float> %0)
store <4 x float> %1, <4 x float>* @llvm_mips_frint_w_RES
ret void
@@ -107,7 +107,7 @@ declare <4 x float> @llvm.mips.frint.w(<4 x float>) nounwind
define void @llvm_mips_frint_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_frint_d_ARG1
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_frint_d_ARG1
%1 = tail call <2 x double> @llvm.mips.frint.d(<2 x double> %0)
store <2 x double> %1, <2 x double>* @llvm_mips_frint_d_RES
ret void
@@ -125,7 +125,7 @@ declare <2 x double> @llvm.mips.frint.d(<2 x double>) nounwind
define void @frint_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_frint_w_ARG1
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_frint_w_ARG1
%1 = tail call <4 x float> @llvm.rint.v4f32(<4 x float> %0)
store <4 x float> %1, <4 x float>* @llvm_mips_frint_w_RES
ret void
@@ -143,7 +143,7 @@ declare <4 x float> @llvm.rint.v4f32(<4 x float>) nounwind
define void @frint_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_frint_d_ARG1
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_frint_d_ARG1
%1 = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %0)
store <2 x double> %1, <2 x double>* @llvm_mips_frint_d_RES
ret void
@@ -164,7 +164,7 @@ declare <2 x double> @llvm.rint.v2f64(<2 x double>) nounwind
define void @llvm_mips_frcp_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_frcp_w_ARG1
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_frcp_w_ARG1
%1 = tail call <4 x float> @llvm.mips.frcp.w(<4 x float> %0)
store <4 x float> %1, <4 x float>* @llvm_mips_frcp_w_RES
ret void
@@ -185,7 +185,7 @@ declare <4 x float> @llvm.mips.frcp.w(<4 x float>) nounwind
define void @llvm_mips_frcp_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_frcp_d_ARG1
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_frcp_d_ARG1
%1 = tail call <2 x double> @llvm.mips.frcp.d(<2 x double> %0)
store <2 x double> %1, <2 x double>* @llvm_mips_frcp_d_RES
ret void
@@ -206,7 +206,7 @@ declare <2 x double> @llvm.mips.frcp.d(<2 x double>) nounwind
define void @llvm_mips_frsqrt_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_frsqrt_w_ARG1
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_frsqrt_w_ARG1
%1 = tail call <4 x float> @llvm.mips.frsqrt.w(<4 x float> %0)
store <4 x float> %1, <4 x float>* @llvm_mips_frsqrt_w_RES
ret void
@@ -227,7 +227,7 @@ declare <4 x float> @llvm.mips.frsqrt.w(<4 x float>) nounwind
define void @llvm_mips_frsqrt_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_frsqrt_d_ARG1
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_frsqrt_d_ARG1
%1 = tail call <2 x double> @llvm.mips.frsqrt.d(<2 x double> %0)
store <2 x double> %1, <2 x double>* @llvm_mips_frsqrt_d_RES
ret void
@@ -248,7 +248,7 @@ declare <2 x double> @llvm.mips.frsqrt.d(<2 x double>) nounwind
define void @llvm_mips_fsqrt_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fsqrt_w_ARG1
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fsqrt_w_ARG1
%1 = tail call <4 x float> @llvm.mips.fsqrt.w(<4 x float> %0)
store <4 x float> %1, <4 x float>* @llvm_mips_fsqrt_w_RES
ret void
@@ -269,7 +269,7 @@ declare <4 x float> @llvm.mips.fsqrt.w(<4 x float>) nounwind
define void @llvm_mips_fsqrt_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fsqrt_d_ARG1
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fsqrt_d_ARG1
%1 = tail call <2 x double> @llvm.mips.fsqrt.d(<2 x double> %0)
store <2 x double> %1, <2 x double>* @llvm_mips_fsqrt_d_RES
ret void
@@ -287,7 +287,7 @@ declare <2 x double> @llvm.mips.fsqrt.d(<2 x double>) nounwind
define void @fsqrt_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fsqrt_w_ARG1
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fsqrt_w_ARG1
%1 = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %0)
store <4 x float> %1, <4 x float>* @llvm_mips_fsqrt_w_RES
ret void
@@ -305,7 +305,7 @@ declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind
define void @fsqrt_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fsqrt_d_ARG1
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fsqrt_d_ARG1
%1 = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %0)
store <2 x double> %1, <2 x double>* @llvm_mips_fsqrt_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/2rf_exup.ll b/test/CodeGen/Mips/msa/2rf_exup.ll
index 8d7cc367040a..fd81ff6d112b 100644
--- a/test/CodeGen/Mips/msa/2rf_exup.ll
+++ b/test/CodeGen/Mips/msa/2rf_exup.ll
@@ -9,7 +9,7 @@
define void @llvm_mips_fexupl_w_test() nounwind {
entry:
- %0 = load <8 x half>* @llvm_mips_fexupl_w_ARG1
+ %0 = load <8 x half>, <8 x half>* @llvm_mips_fexupl_w_ARG1
%1 = tail call <4 x float> @llvm.mips.fexupl.w(<8 x half> %0)
store <4 x float> %1, <4 x float>* @llvm_mips_fexupl_w_RES
ret void
@@ -28,7 +28,7 @@ declare <4 x float> @llvm.mips.fexupl.w(<8 x half>) nounwind
define void @llvm_mips_fexupl_d_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fexupl_d_ARG1
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fexupl_d_ARG1
%1 = tail call <2 x double> @llvm.mips.fexupl.d(<4 x float> %0)
store <2 x double> %1, <2 x double>* @llvm_mips_fexupl_d_RES
ret void
@@ -47,7 +47,7 @@ declare <2 x double> @llvm.mips.fexupl.d(<4 x float>) nounwind
define void @llvm_mips_fexupr_w_test() nounwind {
entry:
- %0 = load <8 x half>* @llvm_mips_fexupr_w_ARG1
+ %0 = load <8 x half>, <8 x half>* @llvm_mips_fexupr_w_ARG1
%1 = tail call <4 x float> @llvm.mips.fexupr.w(<8 x half> %0)
store <4 x float> %1, <4 x float>* @llvm_mips_fexupr_w_RES
ret void
@@ -66,7 +66,7 @@ declare <4 x float> @llvm.mips.fexupr.w(<8 x half>) nounwind
define void @llvm_mips_fexupr_d_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fexupr_d_ARG1
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fexupr_d_ARG1
%1 = tail call <2 x double> @llvm.mips.fexupr.d(<4 x float> %0)
store <2 x double> %1, <2 x double>* @llvm_mips_fexupr_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/2rf_float_int.ll b/test/CodeGen/Mips/msa/2rf_float_int.ll
index 3b5dfda2d1e6..369015814b0e 100644
--- a/test/CodeGen/Mips/msa/2rf_float_int.ll
+++ b/test/CodeGen/Mips/msa/2rf_float_int.ll
@@ -9,7 +9,7 @@
define void @llvm_mips_ffint_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_ffint_s_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ffint_s_w_ARG1
%1 = tail call <4 x float> @llvm.mips.ffint.s.w(<4 x i32> %0)
store <4 x float> %1, <4 x float>* @llvm_mips_ffint_s_w_RES
ret void
@@ -30,7 +30,7 @@ declare <4 x float> @llvm.mips.ffint.s.w(<4 x i32>) nounwind
define void @llvm_mips_ffint_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_ffint_s_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_ffint_s_d_ARG1
%1 = tail call <2 x double> @llvm.mips.ffint.s.d(<2 x i64> %0)
store <2 x double> %1, <2 x double>* @llvm_mips_ffint_s_d_RES
ret void
@@ -51,7 +51,7 @@ declare <2 x double> @llvm.mips.ffint.s.d(<2 x i64>) nounwind
define void @llvm_mips_ffint_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_ffint_u_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ffint_u_w_ARG1
%1 = tail call <4 x float> @llvm.mips.ffint.u.w(<4 x i32> %0)
store <4 x float> %1, <4 x float>* @llvm_mips_ffint_u_w_RES
ret void
@@ -72,7 +72,7 @@ declare <4 x float> @llvm.mips.ffint.u.w(<4 x i32>) nounwind
define void @llvm_mips_ffint_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_ffint_u_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_ffint_u_d_ARG1
%1 = tail call <2 x double> @llvm.mips.ffint.u.d(<2 x i64> %0)
store <2 x double> %1, <2 x double>* @llvm_mips_ffint_u_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/2rf_fq.ll b/test/CodeGen/Mips/msa/2rf_fq.ll
index 021dd937fad3..05c649ee918a 100644
--- a/test/CodeGen/Mips/msa/2rf_fq.ll
+++ b/test/CodeGen/Mips/msa/2rf_fq.ll
@@ -9,7 +9,7 @@
define void @llvm_mips_ffql_w_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_ffql_w_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_ffql_w_ARG1
%1 = tail call <4 x float> @llvm.mips.ffql.w(<8 x i16> %0)
store <4 x float> %1, <4 x float>* @llvm_mips_ffql_w_RES
ret void
@@ -28,7 +28,7 @@ declare <4 x float> @llvm.mips.ffql.w(<8 x i16>) nounwind
define void @llvm_mips_ffql_d_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_ffql_d_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ffql_d_ARG1
%1 = tail call <2 x double> @llvm.mips.ffql.d(<4 x i32> %0)
store <2 x double> %1, <2 x double>* @llvm_mips_ffql_d_RES
ret void
@@ -47,7 +47,7 @@ declare <2 x double> @llvm.mips.ffql.d(<4 x i32>) nounwind
define void @llvm_mips_ffqr_w_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_ffqr_w_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_ffqr_w_ARG1
%1 = tail call <4 x float> @llvm.mips.ffqr.w(<8 x i16> %0)
store <4 x float> %1, <4 x float>* @llvm_mips_ffqr_w_RES
ret void
@@ -66,7 +66,7 @@ declare <4 x float> @llvm.mips.ffqr.w(<8 x i16>) nounwind
define void @llvm_mips_ffqr_d_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_ffqr_d_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ffqr_d_ARG1
%1 = tail call <2 x double> @llvm.mips.ffqr.d(<4 x i32> %0)
store <2 x double> %1, <2 x double>* @llvm_mips_ffqr_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/2rf_int_float.ll b/test/CodeGen/Mips/msa/2rf_int_float.ll
index 4665ae066a4f..77d1404f9cfa 100644
--- a/test/CodeGen/Mips/msa/2rf_int_float.ll
+++ b/test/CodeGen/Mips/msa/2rf_int_float.ll
@@ -10,7 +10,7 @@
define void @llvm_mips_fclass_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fclass_w_ARG1
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fclass_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.fclass.w(<4 x float> %0)
store <4 x i32> %1, <4 x i32>* @llvm_mips_fclass_w_RES
ret void
@@ -31,7 +31,7 @@ declare <4 x i32> @llvm.mips.fclass.w(<4 x float>) nounwind
define void @llvm_mips_fclass_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fclass_d_ARG1
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fclass_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.fclass.d(<2 x double> %0)
store <2 x i64> %1, <2 x i64>* @llvm_mips_fclass_d_RES
ret void
@@ -52,7 +52,7 @@ declare <2 x i64> @llvm.mips.fclass.d(<2 x double>) nounwind
define void @llvm_mips_ftrunc_s_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_ftrunc_s_w_ARG1
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_ftrunc_s_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.ftrunc.s.w(<4 x float> %0)
store <4 x i32> %1, <4 x i32>* @llvm_mips_ftrunc_s_w_RES
ret void
@@ -73,7 +73,7 @@ declare <4 x i32> @llvm.mips.ftrunc.s.w(<4 x float>) nounwind
define void @llvm_mips_ftrunc_s_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_ftrunc_s_d_ARG1
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_ftrunc_s_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.ftrunc.s.d(<2 x double> %0)
store <2 x i64> %1, <2 x i64>* @llvm_mips_ftrunc_s_d_RES
ret void
@@ -94,7 +94,7 @@ declare <2 x i64> @llvm.mips.ftrunc.s.d(<2 x double>) nounwind
define void @llvm_mips_ftrunc_u_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_ftrunc_u_w_ARG1
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_ftrunc_u_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.ftrunc.u.w(<4 x float> %0)
store <4 x i32> %1, <4 x i32>* @llvm_mips_ftrunc_u_w_RES
ret void
@@ -115,7 +115,7 @@ declare <4 x i32> @llvm.mips.ftrunc.u.w(<4 x float>) nounwind
define void @llvm_mips_ftrunc_u_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_ftrunc_u_d_ARG1
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_ftrunc_u_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.ftrunc.u.d(<2 x double> %0)
store <2 x i64> %1, <2 x i64>* @llvm_mips_ftrunc_u_d_RES
ret void
@@ -136,7 +136,7 @@ declare <2 x i64> @llvm.mips.ftrunc.u.d(<2 x double>) nounwind
define void @llvm_mips_ftint_s_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_ftint_s_w_ARG1
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_ftint_s_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.ftint.s.w(<4 x float> %0)
store <4 x i32> %1, <4 x i32>* @llvm_mips_ftint_s_w_RES
ret void
@@ -157,7 +157,7 @@ declare <4 x i32> @llvm.mips.ftint.s.w(<4 x float>) nounwind
define void @llvm_mips_ftint_s_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_ftint_s_d_ARG1
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_ftint_s_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.ftint.s.d(<2 x double> %0)
store <2 x i64> %1, <2 x i64>* @llvm_mips_ftint_s_d_RES
ret void
@@ -178,7 +178,7 @@ declare <2 x i64> @llvm.mips.ftint.s.d(<2 x double>) nounwind
define void @llvm_mips_ftint_u_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_ftint_u_w_ARG1
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_ftint_u_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.ftint.u.w(<4 x float> %0)
store <4 x i32> %1, <4 x i32>* @llvm_mips_ftint_u_w_RES
ret void
@@ -199,7 +199,7 @@ declare <4 x i32> @llvm.mips.ftint.u.w(<4 x float>) nounwind
define void @llvm_mips_ftint_u_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_ftint_u_d_ARG1
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_ftint_u_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.ftint.u.d(<2 x double> %0)
store <2 x i64> %1, <2 x i64>* @llvm_mips_ftint_u_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/2rf_tq.ll b/test/CodeGen/Mips/msa/2rf_tq.ll
index 6f3c508f5b8c..9b7f02a5ef33 100644
--- a/test/CodeGen/Mips/msa/2rf_tq.ll
+++ b/test/CodeGen/Mips/msa/2rf_tq.ll
@@ -10,8 +10,8 @@
define void @llvm_mips_ftq_h_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_ftq_h_ARG1
- %1 = load <4 x float>* @llvm_mips_ftq_h_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_ftq_h_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_ftq_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.ftq.h(<4 x float> %0, <4 x float> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_ftq_h_RES
ret void
@@ -32,8 +32,8 @@ declare <8 x i16> @llvm.mips.ftq.h(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_ftq_w_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_ftq_w_ARG1
- %1 = load <2 x double>* @llvm_mips_ftq_w_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_ftq_w_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_ftq_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.ftq.w(<2 x double> %0, <2 x double> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_ftq_w_RES
ret void
diff --git a/test/CodeGen/Mips/msa/3r-a.ll b/test/CodeGen/Mips/msa/3r-a.ll
index dab15b66b7ce..db772f918614 100644
--- a/test/CodeGen/Mips/msa/3r-a.ll
+++ b/test/CodeGen/Mips/msa/3r-a.ll
@@ -15,8 +15,8 @@
define void @llvm_mips_add_a_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_add_a_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_add_a_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_add_a_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_add_a_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.add.a.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_add_a_b_RES
ret void
@@ -40,8 +40,8 @@ declare <16 x i8> @llvm.mips.add.a.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_add_a_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_add_a_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_add_a_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_add_a_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_add_a_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.add.a.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_add_a_h_RES
ret void
@@ -65,8 +65,8 @@ declare <8 x i16> @llvm.mips.add.a.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_add_a_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_add_a_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_add_a_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_add_a_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_add_a_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.add.a.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_add_a_w_RES
ret void
@@ -90,8 +90,8 @@ declare <4 x i32> @llvm.mips.add.a.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_add_a_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_add_a_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_add_a_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_add_a_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_add_a_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.add.a.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_add_a_d_RES
ret void
@@ -115,8 +115,8 @@ declare <2 x i64> @llvm.mips.add.a.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_adds_a_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_adds_a_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_adds_a_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_adds_a_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_adds_a_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.adds.a.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_adds_a_b_RES
ret void
@@ -140,8 +140,8 @@ declare <16 x i8> @llvm.mips.adds.a.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_adds_a_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_adds_a_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_adds_a_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_adds_a_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_adds_a_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.adds.a.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_adds_a_h_RES
ret void
@@ -165,8 +165,8 @@ declare <8 x i16> @llvm.mips.adds.a.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_adds_a_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_adds_a_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_adds_a_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_adds_a_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_adds_a_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.adds.a.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_adds_a_w_RES
ret void
@@ -190,8 +190,8 @@ declare <4 x i32> @llvm.mips.adds.a.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_adds_a_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_adds_a_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_adds_a_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_adds_a_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_adds_a_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.adds.a.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_adds_a_d_RES
ret void
@@ -215,8 +215,8 @@ declare <2 x i64> @llvm.mips.adds.a.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_adds_s_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_adds_s_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_adds_s_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_adds_s_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_adds_s_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.adds.s.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_adds_s_b_RES
ret void
@@ -240,8 +240,8 @@ declare <16 x i8> @llvm.mips.adds.s.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_adds_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_adds_s_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_adds_s_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_adds_s_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_adds_s_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.adds.s.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_adds_s_h_RES
ret void
@@ -265,8 +265,8 @@ declare <8 x i16> @llvm.mips.adds.s.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_adds_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_adds_s_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_adds_s_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_adds_s_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_adds_s_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.adds.s.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_adds_s_w_RES
ret void
@@ -290,8 +290,8 @@ declare <4 x i32> @llvm.mips.adds.s.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_adds_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_adds_s_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_adds_s_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_adds_s_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_adds_s_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.adds.s.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_adds_s_d_RES
ret void
@@ -315,8 +315,8 @@ declare <2 x i64> @llvm.mips.adds.s.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_adds_u_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_adds_u_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_adds_u_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_adds_u_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_adds_u_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.adds.u.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_adds_u_b_RES
ret void
@@ -340,8 +340,8 @@ declare <16 x i8> @llvm.mips.adds.u.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_adds_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_adds_u_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_adds_u_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_adds_u_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_adds_u_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.adds.u.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_adds_u_h_RES
ret void
@@ -365,8 +365,8 @@ declare <8 x i16> @llvm.mips.adds.u.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_adds_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_adds_u_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_adds_u_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_adds_u_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_adds_u_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.adds.u.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_adds_u_w_RES
ret void
@@ -390,8 +390,8 @@ declare <4 x i32> @llvm.mips.adds.u.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_adds_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_adds_u_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_adds_u_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_adds_u_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_adds_u_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.adds.u.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_adds_u_d_RES
ret void
@@ -415,8 +415,8 @@ declare <2 x i64> @llvm.mips.adds.u.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_addv_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_addv_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_addv_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_addv_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_addv_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_addv_b_RES
ret void
@@ -440,8 +440,8 @@ declare <16 x i8> @llvm.mips.addv.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_addv_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_addv_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_addv_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_addv_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_addv_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_addv_h_RES
ret void
@@ -465,8 +465,8 @@ declare <8 x i16> @llvm.mips.addv.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_addv_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_addv_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_addv_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_addv_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_addv_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_addv_w_RES
ret void
@@ -490,8 +490,8 @@ declare <4 x i32> @llvm.mips.addv.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_addv_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_addv_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_addv_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_addv_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_addv_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_addv_d_RES
ret void
@@ -512,8 +512,8 @@ declare <2 x i64> @llvm.mips.addv.d(<2 x i64>, <2 x i64>) nounwind
define void @addv_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_addv_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_addv_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_addv_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_addv_b_ARG2
%2 = add <16 x i8> %0, %1
store <16 x i8> %2, <16 x i8>* @llvm_mips_addv_b_RES
ret void
@@ -532,8 +532,8 @@ entry:
define void @addv_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_addv_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_addv_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_addv_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_addv_h_ARG2
%2 = add <8 x i16> %0, %1
store <8 x i16> %2, <8 x i16>* @llvm_mips_addv_h_RES
ret void
@@ -552,8 +552,8 @@ entry:
define void @addv_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_addv_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_addv_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_addv_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_addv_w_ARG2
%2 = add <4 x i32> %0, %1
store <4 x i32> %2, <4 x i32>* @llvm_mips_addv_w_RES
ret void
@@ -572,8 +572,8 @@ entry:
define void @addv_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_addv_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_addv_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_addv_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_addv_d_ARG2
%2 = add <2 x i64> %0, %1
store <2 x i64> %2, <2 x i64>* @llvm_mips_addv_d_RES
ret void
@@ -595,8 +595,8 @@ entry:
define void @llvm_mips_asub_s_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_asub_s_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_asub_s_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_asub_s_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_asub_s_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.asub.s.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_asub_s_b_RES
ret void
@@ -620,8 +620,8 @@ declare <16 x i8> @llvm.mips.asub.s.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_asub_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_asub_s_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_asub_s_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_asub_s_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_asub_s_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.asub.s.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_asub_s_h_RES
ret void
@@ -645,8 +645,8 @@ declare <8 x i16> @llvm.mips.asub.s.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_asub_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_asub_s_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_asub_s_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_asub_s_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_asub_s_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.asub.s.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_asub_s_w_RES
ret void
@@ -670,8 +670,8 @@ declare <4 x i32> @llvm.mips.asub.s.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_asub_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_asub_s_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_asub_s_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_asub_s_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_asub_s_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.asub.s.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_asub_s_d_RES
ret void
@@ -695,8 +695,8 @@ declare <2 x i64> @llvm.mips.asub.s.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_asub_u_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_asub_u_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_asub_u_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_asub_u_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_asub_u_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.asub.u.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_asub_u_b_RES
ret void
@@ -720,8 +720,8 @@ declare <16 x i8> @llvm.mips.asub.u.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_asub_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_asub_u_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_asub_u_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_asub_u_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_asub_u_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.asub.u.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_asub_u_h_RES
ret void
@@ -745,8 +745,8 @@ declare <8 x i16> @llvm.mips.asub.u.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_asub_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_asub_u_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_asub_u_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_asub_u_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_asub_u_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.asub.u.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_asub_u_w_RES
ret void
@@ -770,8 +770,8 @@ declare <4 x i32> @llvm.mips.asub.u.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_asub_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_asub_u_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_asub_u_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_asub_u_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_asub_u_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.asub.u.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_asub_u_d_RES
ret void
@@ -795,8 +795,8 @@ declare <2 x i64> @llvm.mips.asub.u.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_ave_s_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_ave_s_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_ave_s_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_ave_s_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_ave_s_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.ave.s.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_ave_s_b_RES
ret void
@@ -820,8 +820,8 @@ declare <16 x i8> @llvm.mips.ave.s.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_ave_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_ave_s_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_ave_s_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_ave_s_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_ave_s_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.ave.s.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_ave_s_h_RES
ret void
@@ -845,8 +845,8 @@ declare <8 x i16> @llvm.mips.ave.s.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_ave_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_ave_s_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_ave_s_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ave_s_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_ave_s_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.ave.s.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_ave_s_w_RES
ret void
@@ -870,8 +870,8 @@ declare <4 x i32> @llvm.mips.ave.s.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_ave_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_ave_s_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_ave_s_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_ave_s_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_ave_s_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.ave.s.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_ave_s_d_RES
ret void
@@ -895,8 +895,8 @@ declare <2 x i64> @llvm.mips.ave.s.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_ave_u_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_ave_u_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_ave_u_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_ave_u_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_ave_u_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.ave.u.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_ave_u_b_RES
ret void
@@ -920,8 +920,8 @@ declare <16 x i8> @llvm.mips.ave.u.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_ave_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_ave_u_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_ave_u_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_ave_u_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_ave_u_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.ave.u.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_ave_u_h_RES
ret void
@@ -945,8 +945,8 @@ declare <8 x i16> @llvm.mips.ave.u.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_ave_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_ave_u_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_ave_u_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ave_u_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_ave_u_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.ave.u.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_ave_u_w_RES
ret void
@@ -970,8 +970,8 @@ declare <4 x i32> @llvm.mips.ave.u.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_ave_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_ave_u_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_ave_u_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_ave_u_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_ave_u_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.ave.u.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_ave_u_d_RES
ret void
@@ -995,8 +995,8 @@ declare <2 x i64> @llvm.mips.ave.u.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_aver_s_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_aver_s_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_aver_s_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_aver_s_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_aver_s_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.aver.s.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_aver_s_b_RES
ret void
@@ -1020,8 +1020,8 @@ declare <16 x i8> @llvm.mips.aver.s.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_aver_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_aver_s_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_aver_s_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_aver_s_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_aver_s_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.aver.s.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_aver_s_h_RES
ret void
@@ -1045,8 +1045,8 @@ declare <8 x i16> @llvm.mips.aver.s.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_aver_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_aver_s_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_aver_s_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_aver_s_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_aver_s_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.aver.s.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_aver_s_w_RES
ret void
@@ -1070,8 +1070,8 @@ declare <4 x i32> @llvm.mips.aver.s.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_aver_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_aver_s_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_aver_s_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_aver_s_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_aver_s_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.aver.s.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_aver_s_d_RES
ret void
@@ -1095,8 +1095,8 @@ declare <2 x i64> @llvm.mips.aver.s.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_aver_u_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_aver_u_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_aver_u_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_aver_u_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_aver_u_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.aver.u.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_aver_u_b_RES
ret void
@@ -1120,8 +1120,8 @@ declare <16 x i8> @llvm.mips.aver.u.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_aver_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_aver_u_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_aver_u_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_aver_u_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_aver_u_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.aver.u.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_aver_u_h_RES
ret void
@@ -1145,8 +1145,8 @@ declare <8 x i16> @llvm.mips.aver.u.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_aver_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_aver_u_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_aver_u_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_aver_u_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_aver_u_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.aver.u.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_aver_u_w_RES
ret void
@@ -1170,8 +1170,8 @@ declare <4 x i32> @llvm.mips.aver.u.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_aver_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_aver_u_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_aver_u_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_aver_u_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_aver_u_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.aver.u.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_aver_u_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/3r-b.ll b/test/CodeGen/Mips/msa/3r-b.ll
index a05d19b4d490..2ecdc4290067 100644
--- a/test/CodeGen/Mips/msa/3r-b.ll
+++ b/test/CodeGen/Mips/msa/3r-b.ll
@@ -10,8 +10,8 @@
define void @llvm_mips_bclr_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_bclr_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_bclr_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bclr_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bclr_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.bclr.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_bclr_b_RES
ret void
@@ -32,8 +32,8 @@ declare <16 x i8> @llvm.mips.bclr.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_bclr_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_bclr_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_bclr_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bclr_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_bclr_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.bclr.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_bclr_h_RES
ret void
@@ -54,8 +54,8 @@ declare <8 x i16> @llvm.mips.bclr.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_bclr_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_bclr_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_bclr_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bclr_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_bclr_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.bclr.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_bclr_w_RES
ret void
@@ -76,8 +76,8 @@ declare <4 x i32> @llvm.mips.bclr.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_bclr_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_bclr_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_bclr_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bclr_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_bclr_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.bclr.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_bclr_d_RES
ret void
@@ -99,9 +99,9 @@ declare <2 x i64> @llvm.mips.bclr.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_binsl_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_binsl_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_binsl_b_ARG2
- %2 = load <16 x i8>* @llvm_mips_binsl_b_ARG3
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_binsl_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_binsl_b_ARG2
+ %2 = load <16 x i8>, <16 x i8>* @llvm_mips_binsl_b_ARG3
%3 = tail call <16 x i8> @llvm.mips.binsl.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
store <16 x i8> %3, <16 x i8>* @llvm_mips_binsl_b_RES
ret void
@@ -127,9 +127,9 @@ declare <16 x i8> @llvm.mips.binsl.b(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_binsl_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_binsl_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_binsl_h_ARG2
- %2 = load <8 x i16>* @llvm_mips_binsl_h_ARG3
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_binsl_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_binsl_h_ARG2
+ %2 = load <8 x i16>, <8 x i16>* @llvm_mips_binsl_h_ARG3
%3 = tail call <8 x i16> @llvm.mips.binsl.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
store <8 x i16> %3, <8 x i16>* @llvm_mips_binsl_h_RES
ret void
@@ -155,9 +155,9 @@ declare <8 x i16> @llvm.mips.binsl.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_binsl_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_binsl_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_binsl_w_ARG2
- %2 = load <4 x i32>* @llvm_mips_binsl_w_ARG3
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_binsl_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_binsl_w_ARG2
+ %2 = load <4 x i32>, <4 x i32>* @llvm_mips_binsl_w_ARG3
%3 = tail call <4 x i32> @llvm.mips.binsl.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
store <4 x i32> %3, <4 x i32>* @llvm_mips_binsl_w_RES
ret void
@@ -183,9 +183,9 @@ declare <4 x i32> @llvm.mips.binsl.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_binsl_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_binsl_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_binsl_d_ARG2
- %2 = load <2 x i64>* @llvm_mips_binsl_d_ARG3
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_binsl_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_binsl_d_ARG2
+ %2 = load <2 x i64>, <2 x i64>* @llvm_mips_binsl_d_ARG3
%3 = tail call <2 x i64> @llvm.mips.binsl.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
store <2 x i64> %3, <2 x i64>* @llvm_mips_binsl_d_RES
ret void
@@ -211,9 +211,9 @@ declare <2 x i64> @llvm.mips.binsl.d(<2 x i64>, <2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_binsr_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_binsr_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_binsr_b_ARG2
- %2 = load <16 x i8>* @llvm_mips_binsr_b_ARG3
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_binsr_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_binsr_b_ARG2
+ %2 = load <16 x i8>, <16 x i8>* @llvm_mips_binsr_b_ARG3
%3 = tail call <16 x i8> @llvm.mips.binsr.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
store <16 x i8> %3, <16 x i8>* @llvm_mips_binsr_b_RES
ret void
@@ -239,9 +239,9 @@ declare <16 x i8> @llvm.mips.binsr.b(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_binsr_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_binsr_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_binsr_h_ARG2
- %2 = load <8 x i16>* @llvm_mips_binsr_h_ARG3
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_binsr_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_binsr_h_ARG2
+ %2 = load <8 x i16>, <8 x i16>* @llvm_mips_binsr_h_ARG3
%3 = tail call <8 x i16> @llvm.mips.binsr.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
store <8 x i16> %3, <8 x i16>* @llvm_mips_binsr_h_RES
ret void
@@ -267,9 +267,9 @@ declare <8 x i16> @llvm.mips.binsr.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_binsr_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_binsr_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_binsr_w_ARG2
- %2 = load <4 x i32>* @llvm_mips_binsr_w_ARG3
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_binsr_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_binsr_w_ARG2
+ %2 = load <4 x i32>, <4 x i32>* @llvm_mips_binsr_w_ARG3
%3 = tail call <4 x i32> @llvm.mips.binsr.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
store <4 x i32> %3, <4 x i32>* @llvm_mips_binsr_w_RES
ret void
@@ -295,9 +295,9 @@ declare <4 x i32> @llvm.mips.binsr.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_binsr_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_binsr_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_binsr_d_ARG2
- %2 = load <2 x i64>* @llvm_mips_binsr_d_ARG3
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_binsr_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_binsr_d_ARG2
+ %2 = load <2 x i64>, <2 x i64>* @llvm_mips_binsr_d_ARG3
%3 = tail call <2 x i64> @llvm.mips.binsr.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
store <2 x i64> %3, <2 x i64>* @llvm_mips_binsr_d_RES
ret void
@@ -322,8 +322,8 @@ declare <2 x i64> @llvm.mips.binsr.d(<2 x i64>, <2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_bneg_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_bneg_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_bneg_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bneg_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bneg_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.bneg.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_bneg_b_RES
ret void
@@ -344,8 +344,8 @@ declare <16 x i8> @llvm.mips.bneg.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_bneg_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_bneg_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_bneg_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bneg_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_bneg_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.bneg.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_bneg_h_RES
ret void
@@ -366,8 +366,8 @@ declare <8 x i16> @llvm.mips.bneg.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_bneg_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_bneg_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_bneg_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bneg_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_bneg_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.bneg.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_bneg_w_RES
ret void
@@ -388,8 +388,8 @@ declare <4 x i32> @llvm.mips.bneg.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_bneg_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_bneg_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_bneg_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bneg_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_bneg_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.bneg.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_bneg_d_RES
ret void
@@ -410,8 +410,8 @@ declare <2 x i64> @llvm.mips.bneg.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_bset_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_bset_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_bset_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bset_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bset_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.bset.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_bset_b_RES
ret void
@@ -432,8 +432,8 @@ declare <16 x i8> @llvm.mips.bset.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_bset_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_bset_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_bset_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bset_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_bset_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.bset.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_bset_h_RES
ret void
@@ -454,8 +454,8 @@ declare <8 x i16> @llvm.mips.bset.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_bset_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_bset_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_bset_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bset_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_bset_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.bset.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_bset_w_RES
ret void
@@ -476,8 +476,8 @@ declare <4 x i32> @llvm.mips.bset.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_bset_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_bset_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_bset_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bset_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_bset_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.bset.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_bset_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/3r-c.ll b/test/CodeGen/Mips/msa/3r-c.ll
index 6ec92c284fec..a3913e0a27fd 100644
--- a/test/CodeGen/Mips/msa/3r-c.ll
+++ b/test/CodeGen/Mips/msa/3r-c.ll
@@ -10,8 +10,8 @@
define void @llvm_mips_ceq_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_ceq_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_ceq_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_ceq_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_ceq_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.ceq.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_ceq_b_RES
ret void
@@ -32,8 +32,8 @@ declare <16 x i8> @llvm.mips.ceq.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_ceq_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_ceq_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_ceq_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_ceq_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_ceq_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.ceq.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_ceq_h_RES
ret void
@@ -54,8 +54,8 @@ declare <8 x i16> @llvm.mips.ceq.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_ceq_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_ceq_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_ceq_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ceq_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_ceq_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.ceq.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_ceq_w_RES
ret void
@@ -76,8 +76,8 @@ declare <4 x i32> @llvm.mips.ceq.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_ceq_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_ceq_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_ceq_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_ceq_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_ceq_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.ceq.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_ceq_d_RES
ret void
@@ -98,8 +98,8 @@ declare <2 x i64> @llvm.mips.ceq.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_cle_s_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_cle_s_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_cle_s_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_cle_s_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_cle_s_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.cle.s.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_cle_s_b_RES
ret void
@@ -120,8 +120,8 @@ declare <16 x i8> @llvm.mips.cle.s.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_cle_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_cle_s_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_cle_s_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_cle_s_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_cle_s_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.cle.s.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_cle_s_h_RES
ret void
@@ -142,8 +142,8 @@ declare <8 x i16> @llvm.mips.cle.s.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_cle_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_cle_s_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_cle_s_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_cle_s_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_cle_s_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.cle.s.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_cle_s_w_RES
ret void
@@ -164,8 +164,8 @@ declare <4 x i32> @llvm.mips.cle.s.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_cle_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_cle_s_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_cle_s_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_cle_s_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_cle_s_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.cle.s.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_cle_s_d_RES
ret void
@@ -186,8 +186,8 @@ declare <2 x i64> @llvm.mips.cle.s.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_cle_u_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_cle_u_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_cle_u_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_cle_u_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_cle_u_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.cle.u.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_cle_u_b_RES
ret void
@@ -208,8 +208,8 @@ declare <16 x i8> @llvm.mips.cle.u.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_cle_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_cle_u_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_cle_u_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_cle_u_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_cle_u_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.cle.u.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_cle_u_h_RES
ret void
@@ -230,8 +230,8 @@ declare <8 x i16> @llvm.mips.cle.u.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_cle_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_cle_u_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_cle_u_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_cle_u_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_cle_u_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.cle.u.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_cle_u_w_RES
ret void
@@ -252,8 +252,8 @@ declare <4 x i32> @llvm.mips.cle.u.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_cle_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_cle_u_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_cle_u_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_cle_u_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_cle_u_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.cle.u.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_cle_u_d_RES
ret void
@@ -274,8 +274,8 @@ declare <2 x i64> @llvm.mips.cle.u.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_clt_s_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_clt_s_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_clt_s_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_clt_s_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_clt_s_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.clt.s.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_clt_s_b_RES
ret void
@@ -296,8 +296,8 @@ declare <16 x i8> @llvm.mips.clt.s.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_clt_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_clt_s_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_clt_s_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_clt_s_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_clt_s_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.clt.s.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_clt_s_h_RES
ret void
@@ -318,8 +318,8 @@ declare <8 x i16> @llvm.mips.clt.s.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_clt_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_clt_s_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_clt_s_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_clt_s_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_clt_s_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.clt.s.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_clt_s_w_RES
ret void
@@ -340,8 +340,8 @@ declare <4 x i32> @llvm.mips.clt.s.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_clt_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_clt_s_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_clt_s_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_clt_s_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_clt_s_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.clt.s.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_clt_s_d_RES
ret void
@@ -362,8 +362,8 @@ declare <2 x i64> @llvm.mips.clt.s.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_clt_u_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_clt_u_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_clt_u_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_clt_u_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_clt_u_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.clt.u.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_clt_u_b_RES
ret void
@@ -384,8 +384,8 @@ declare <16 x i8> @llvm.mips.clt.u.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_clt_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_clt_u_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_clt_u_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_clt_u_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_clt_u_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.clt.u.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_clt_u_h_RES
ret void
@@ -406,8 +406,8 @@ declare <8 x i16> @llvm.mips.clt.u.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_clt_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_clt_u_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_clt_u_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_clt_u_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_clt_u_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.clt.u.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_clt_u_w_RES
ret void
@@ -428,8 +428,8 @@ declare <4 x i32> @llvm.mips.clt.u.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_clt_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_clt_u_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_clt_u_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_clt_u_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_clt_u_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.clt.u.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_clt_u_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/3r-d.ll b/test/CodeGen/Mips/msa/3r-d.ll
index 0099554a8eea..4fc32b76a7b2 100644
--- a/test/CodeGen/Mips/msa/3r-d.ll
+++ b/test/CodeGen/Mips/msa/3r-d.ll
@@ -10,8 +10,8 @@
define void @llvm_mips_div_s_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_div_s_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_div_s_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_div_s_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_div_s_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.div.s.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_div_s_b_RES
ret void
@@ -32,8 +32,8 @@ declare <16 x i8> @llvm.mips.div.s.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_div_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_div_s_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_div_s_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_div_s_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_div_s_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.div.s.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_div_s_h_RES
ret void
@@ -54,8 +54,8 @@ declare <8 x i16> @llvm.mips.div.s.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_div_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_div_s_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_div_s_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_div_s_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_div_s_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.div.s.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_div_s_w_RES
ret void
@@ -76,8 +76,8 @@ declare <4 x i32> @llvm.mips.div.s.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_div_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_div_s_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_div_s_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_div_s_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_div_s_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.div.s.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_div_s_d_RES
ret void
@@ -95,8 +95,8 @@ declare <2 x i64> @llvm.mips.div.s.d(<2 x i64>, <2 x i64>) nounwind
define void @div_s_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_div_s_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_div_s_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_div_s_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_div_s_b_ARG2
%2 = sdiv <16 x i8> %0, %1
store <16 x i8> %2, <16 x i8>* @llvm_mips_div_s_b_RES
ret void
@@ -111,8 +111,8 @@ entry:
define void @div_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_div_s_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_div_s_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_div_s_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_div_s_h_ARG2
%2 = sdiv <8 x i16> %0, %1
store <8 x i16> %2, <8 x i16>* @llvm_mips_div_s_h_RES
ret void
@@ -127,8 +127,8 @@ entry:
define void @div_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_div_s_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_div_s_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_div_s_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_div_s_w_ARG2
%2 = sdiv <4 x i32> %0, %1
store <4 x i32> %2, <4 x i32>* @llvm_mips_div_s_w_RES
ret void
@@ -143,8 +143,8 @@ entry:
define void @div_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_div_s_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_div_s_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_div_s_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_div_s_d_ARG2
%2 = sdiv <2 x i64> %0, %1
store <2 x i64> %2, <2 x i64>* @llvm_mips_div_s_d_RES
ret void
@@ -163,8 +163,8 @@ entry:
define void @llvm_mips_div_u_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_div_u_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_div_u_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_div_u_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_div_u_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.div.u.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_div_u_b_RES
ret void
@@ -185,8 +185,8 @@ declare <16 x i8> @llvm.mips.div.u.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_div_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_div_u_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_div_u_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_div_u_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_div_u_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.div.u.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_div_u_h_RES
ret void
@@ -207,8 +207,8 @@ declare <8 x i16> @llvm.mips.div.u.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_div_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_div_u_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_div_u_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_div_u_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_div_u_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.div.u.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_div_u_w_RES
ret void
@@ -229,8 +229,8 @@ declare <4 x i32> @llvm.mips.div.u.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_div_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_div_u_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_div_u_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_div_u_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_div_u_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.div.u.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_div_u_d_RES
ret void
@@ -248,8 +248,8 @@ declare <2 x i64> @llvm.mips.div.u.d(<2 x i64>, <2 x i64>) nounwind
define void @div_u_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_div_u_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_div_u_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_div_u_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_div_u_b_ARG2
%2 = udiv <16 x i8> %0, %1
store <16 x i8> %2, <16 x i8>* @llvm_mips_div_u_b_RES
ret void
@@ -264,8 +264,8 @@ entry:
define void @div_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_div_u_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_div_u_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_div_u_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_div_u_h_ARG2
%2 = udiv <8 x i16> %0, %1
store <8 x i16> %2, <8 x i16>* @llvm_mips_div_u_h_RES
ret void
@@ -280,8 +280,8 @@ entry:
define void @div_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_div_u_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_div_u_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_div_u_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_div_u_w_ARG2
%2 = udiv <4 x i32> %0, %1
store <4 x i32> %2, <4 x i32>* @llvm_mips_div_u_w_RES
ret void
@@ -296,8 +296,8 @@ entry:
define void @div_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_div_u_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_div_u_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_div_u_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_div_u_d_ARG2
%2 = udiv <2 x i64> %0, %1
store <2 x i64> %2, <2 x i64>* @llvm_mips_div_u_d_RES
ret void
@@ -326,8 +326,8 @@ entry:
define void @llvm_mips_dotp_s_h_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_dotp_s_h_ARG1
- %1 = load <16 x i8>* @llvm_mips_dotp_s_h_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_dotp_s_h_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_dotp_s_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.dotp.s.h(<16 x i8> %0, <16 x i8> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_dotp_s_h_RES
ret void
@@ -353,8 +353,8 @@ declare <8 x i16> @llvm.mips.dotp.s.h(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_dotp_s_w_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_dotp_s_w_ARG1
- %1 = load <8 x i16>* @llvm_mips_dotp_s_w_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_dotp_s_w_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_dotp_s_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.dotp.s.w(<8 x i16> %0, <8 x i16> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_dotp_s_w_RES
ret void
@@ -377,8 +377,8 @@ declare <4 x i32> @llvm.mips.dotp.s.w(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_dotp_s_d_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_dotp_s_d_ARG1
- %1 = load <4 x i32>* @llvm_mips_dotp_s_d_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_dotp_s_d_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_dotp_s_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.dotp.s.d(<4 x i32> %0, <4 x i32> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_dotp_s_d_RES
ret void
@@ -409,8 +409,8 @@ declare <2 x i64> @llvm.mips.dotp.s.d(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_dotp_u_h_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_dotp_u_h_ARG1
- %1 = load <16 x i8>* @llvm_mips_dotp_u_h_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_dotp_u_h_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_dotp_u_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.dotp.u.h(<16 x i8> %0, <16 x i8> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_dotp_u_h_RES
ret void
@@ -436,8 +436,8 @@ declare <8 x i16> @llvm.mips.dotp.u.h(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_dotp_u_w_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_dotp_u_w_ARG1
- %1 = load <8 x i16>* @llvm_mips_dotp_u_w_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_dotp_u_w_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_dotp_u_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.dotp.u.w(<8 x i16> %0, <8 x i16> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_dotp_u_w_RES
ret void
@@ -460,8 +460,8 @@ declare <4 x i32> @llvm.mips.dotp.u.w(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_dotp_u_d_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_dotp_u_d_ARG1
- %1 = load <4 x i32>* @llvm_mips_dotp_u_d_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_dotp_u_d_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_dotp_u_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.dotp.u.d(<4 x i32> %0, <4 x i32> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_dotp_u_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/3r-i.ll b/test/CodeGen/Mips/msa/3r-i.ll
index 2ef30471b026..7147b756b15f 100644
--- a/test/CodeGen/Mips/msa/3r-i.ll
+++ b/test/CodeGen/Mips/msa/3r-i.ll
@@ -10,8 +10,8 @@
define void @llvm_mips_ilvev_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_ilvev_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_ilvev_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_ilvev_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_ilvev_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.ilvev.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_ilvev_b_RES
ret void
@@ -32,8 +32,8 @@ declare <16 x i8> @llvm.mips.ilvev.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_ilvev_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_ilvev_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_ilvev_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_ilvev_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_ilvev_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.ilvev.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_ilvev_h_RES
ret void
@@ -54,8 +54,8 @@ declare <8 x i16> @llvm.mips.ilvev.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_ilvev_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_ilvev_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_ilvev_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ilvev_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_ilvev_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.ilvev.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_ilvev_w_RES
ret void
@@ -76,8 +76,8 @@ declare <4 x i32> @llvm.mips.ilvev.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_ilvev_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_ilvev_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_ilvev_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_ilvev_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_ilvev_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.ilvev.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_ilvev_d_RES
ret void
@@ -98,8 +98,8 @@ declare <2 x i64> @llvm.mips.ilvev.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_ilvl_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_ilvl_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_ilvl_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_ilvl_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_ilvl_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.ilvl.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_ilvl_b_RES
ret void
@@ -120,8 +120,8 @@ declare <16 x i8> @llvm.mips.ilvl.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_ilvl_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_ilvl_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_ilvl_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_ilvl_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_ilvl_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.ilvl.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_ilvl_h_RES
ret void
@@ -142,8 +142,8 @@ declare <8 x i16> @llvm.mips.ilvl.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_ilvl_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_ilvl_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_ilvl_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ilvl_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_ilvl_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.ilvl.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_ilvl_w_RES
ret void
@@ -164,8 +164,8 @@ declare <4 x i32> @llvm.mips.ilvl.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_ilvl_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_ilvl_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_ilvl_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_ilvl_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_ilvl_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.ilvl.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_ilvl_d_RES
ret void
@@ -186,8 +186,8 @@ declare <2 x i64> @llvm.mips.ilvl.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_ilvod_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_ilvod_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_ilvod_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_ilvod_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_ilvod_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.ilvod.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_ilvod_b_RES
ret void
@@ -208,8 +208,8 @@ declare <16 x i8> @llvm.mips.ilvod.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_ilvod_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_ilvod_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_ilvod_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_ilvod_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_ilvod_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.ilvod.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_ilvod_h_RES
ret void
@@ -230,8 +230,8 @@ declare <8 x i16> @llvm.mips.ilvod.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_ilvod_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_ilvod_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_ilvod_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ilvod_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_ilvod_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.ilvod.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_ilvod_w_RES
ret void
@@ -252,8 +252,8 @@ declare <4 x i32> @llvm.mips.ilvod.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_ilvod_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_ilvod_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_ilvod_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_ilvod_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_ilvod_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.ilvod.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_ilvod_d_RES
ret void
@@ -274,8 +274,8 @@ declare <2 x i64> @llvm.mips.ilvod.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_ilvr_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_ilvr_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_ilvr_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_ilvr_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_ilvr_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.ilvr.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_ilvr_b_RES
ret void
@@ -296,8 +296,8 @@ declare <16 x i8> @llvm.mips.ilvr.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_ilvr_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_ilvr_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_ilvr_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_ilvr_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_ilvr_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.ilvr.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_ilvr_h_RES
ret void
@@ -318,8 +318,8 @@ declare <8 x i16> @llvm.mips.ilvr.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_ilvr_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_ilvr_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_ilvr_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ilvr_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_ilvr_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.ilvr.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_ilvr_w_RES
ret void
@@ -340,8 +340,8 @@ declare <4 x i32> @llvm.mips.ilvr.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_ilvr_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_ilvr_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_ilvr_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_ilvr_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_ilvr_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.ilvr.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_ilvr_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/3r-m.ll b/test/CodeGen/Mips/msa/3r-m.ll
index ddfd720a2f84..39b4f7db3a48 100644
--- a/test/CodeGen/Mips/msa/3r-m.ll
+++ b/test/CodeGen/Mips/msa/3r-m.ll
@@ -10,8 +10,8 @@
define void @llvm_mips_max_a_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_max_a_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_max_a_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_max_a_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_max_a_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.max.a.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_max_a_b_RES
ret void
@@ -32,8 +32,8 @@ declare <16 x i8> @llvm.mips.max.a.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_max_a_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_max_a_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_max_a_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_max_a_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_max_a_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.max.a.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_max_a_h_RES
ret void
@@ -54,8 +54,8 @@ declare <8 x i16> @llvm.mips.max.a.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_max_a_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_max_a_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_max_a_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_max_a_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_max_a_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.max.a.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_max_a_w_RES
ret void
@@ -76,8 +76,8 @@ declare <4 x i32> @llvm.mips.max.a.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_max_a_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_max_a_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_max_a_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_max_a_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_max_a_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.max.a.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_max_a_d_RES
ret void
@@ -98,8 +98,8 @@ declare <2 x i64> @llvm.mips.max.a.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_max_s_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_max_s_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_max_s_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_max_s_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_max_s_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.max.s.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_max_s_b_RES
ret void
@@ -120,8 +120,8 @@ declare <16 x i8> @llvm.mips.max.s.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_max_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_max_s_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_max_s_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_max_s_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_max_s_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.max.s.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_max_s_h_RES
ret void
@@ -142,8 +142,8 @@ declare <8 x i16> @llvm.mips.max.s.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_max_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_max_s_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_max_s_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_max_s_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_max_s_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.max.s.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_max_s_w_RES
ret void
@@ -164,8 +164,8 @@ declare <4 x i32> @llvm.mips.max.s.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_max_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_max_s_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_max_s_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_max_s_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_max_s_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.max.s.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_max_s_d_RES
ret void
@@ -186,8 +186,8 @@ declare <2 x i64> @llvm.mips.max.s.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_max_u_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_max_u_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_max_u_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_max_u_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_max_u_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.max.u.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_max_u_b_RES
ret void
@@ -208,8 +208,8 @@ declare <16 x i8> @llvm.mips.max.u.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_max_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_max_u_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_max_u_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_max_u_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_max_u_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.max.u.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_max_u_h_RES
ret void
@@ -230,8 +230,8 @@ declare <8 x i16> @llvm.mips.max.u.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_max_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_max_u_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_max_u_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_max_u_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_max_u_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.max.u.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_max_u_w_RES
ret void
@@ -252,8 +252,8 @@ declare <4 x i32> @llvm.mips.max.u.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_max_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_max_u_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_max_u_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_max_u_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_max_u_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.max.u.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_max_u_d_RES
ret void
@@ -274,8 +274,8 @@ declare <2 x i64> @llvm.mips.max.u.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_min_a_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_min_a_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_min_a_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_min_a_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_min_a_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.min.a.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_min_a_b_RES
ret void
@@ -296,8 +296,8 @@ declare <16 x i8> @llvm.mips.min.a.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_min_a_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_min_a_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_min_a_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_min_a_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_min_a_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.min.a.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_min_a_h_RES
ret void
@@ -318,8 +318,8 @@ declare <8 x i16> @llvm.mips.min.a.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_min_a_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_min_a_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_min_a_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_min_a_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_min_a_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.min.a.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_min_a_w_RES
ret void
@@ -340,8 +340,8 @@ declare <4 x i32> @llvm.mips.min.a.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_min_a_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_min_a_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_min_a_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_min_a_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_min_a_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.min.a.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_min_a_d_RES
ret void
@@ -362,8 +362,8 @@ declare <2 x i64> @llvm.mips.min.a.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_min_s_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_min_s_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_min_s_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_min_s_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_min_s_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.min.s.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_min_s_b_RES
ret void
@@ -384,8 +384,8 @@ declare <16 x i8> @llvm.mips.min.s.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_min_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_min_s_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_min_s_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_min_s_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_min_s_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.min.s.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_min_s_h_RES
ret void
@@ -406,8 +406,8 @@ declare <8 x i16> @llvm.mips.min.s.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_min_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_min_s_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_min_s_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_min_s_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_min_s_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.min.s.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_min_s_w_RES
ret void
@@ -428,8 +428,8 @@ declare <4 x i32> @llvm.mips.min.s.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_min_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_min_s_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_min_s_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_min_s_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_min_s_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.min.s.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_min_s_d_RES
ret void
@@ -450,8 +450,8 @@ declare <2 x i64> @llvm.mips.min.s.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_min_u_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_min_u_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_min_u_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_min_u_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_min_u_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.min.u.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_min_u_b_RES
ret void
@@ -472,8 +472,8 @@ declare <16 x i8> @llvm.mips.min.u.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_min_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_min_u_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_min_u_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_min_u_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_min_u_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.min.u.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_min_u_h_RES
ret void
@@ -494,8 +494,8 @@ declare <8 x i16> @llvm.mips.min.u.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_min_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_min_u_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_min_u_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_min_u_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_min_u_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.min.u.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_min_u_w_RES
ret void
@@ -516,8 +516,8 @@ declare <4 x i32> @llvm.mips.min.u.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_min_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_min_u_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_min_u_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_min_u_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_min_u_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.min.u.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_min_u_d_RES
ret void
@@ -538,8 +538,8 @@ declare <2 x i64> @llvm.mips.min.u.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_mod_s_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_mod_s_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_mod_s_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_mod_s_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_mod_s_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.mod.s.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_mod_s_b_RES
ret void
@@ -560,8 +560,8 @@ declare <16 x i8> @llvm.mips.mod.s.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_mod_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_mod_s_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_mod_s_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_mod_s_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_mod_s_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.mod.s.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_mod_s_h_RES
ret void
@@ -582,8 +582,8 @@ declare <8 x i16> @llvm.mips.mod.s.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_mod_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_mod_s_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_mod_s_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_mod_s_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_mod_s_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.mod.s.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_mod_s_w_RES
ret void
@@ -604,8 +604,8 @@ declare <4 x i32> @llvm.mips.mod.s.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_mod_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_mod_s_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_mod_s_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_mod_s_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_mod_s_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.mod.s.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_mod_s_d_RES
ret void
@@ -626,8 +626,8 @@ declare <2 x i64> @llvm.mips.mod.s.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_mod_u_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_mod_u_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_mod_u_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_mod_u_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_mod_u_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.mod.u.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_mod_u_b_RES
ret void
@@ -648,8 +648,8 @@ declare <16 x i8> @llvm.mips.mod.u.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_mod_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_mod_u_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_mod_u_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_mod_u_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_mod_u_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.mod.u.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_mod_u_h_RES
ret void
@@ -670,8 +670,8 @@ declare <8 x i16> @llvm.mips.mod.u.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_mod_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_mod_u_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_mod_u_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_mod_u_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_mod_u_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.mod.u.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_mod_u_w_RES
ret void
@@ -692,8 +692,8 @@ declare <4 x i32> @llvm.mips.mod.u.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_mod_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_mod_u_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_mod_u_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_mod_u_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_mod_u_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.mod.u.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_mod_u_d_RES
ret void
@@ -714,8 +714,8 @@ declare <2 x i64> @llvm.mips.mod.u.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_mulv_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_mulv_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_mulv_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_mulv_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_mulv_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.mulv.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_mulv_b_RES
ret void
@@ -736,8 +736,8 @@ declare <16 x i8> @llvm.mips.mulv.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_mulv_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_mulv_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_mulv_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_mulv_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_mulv_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.mulv.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_mulv_h_RES
ret void
@@ -758,8 +758,8 @@ declare <8 x i16> @llvm.mips.mulv.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_mulv_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_mulv_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_mulv_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_mulv_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_mulv_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.mulv.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_mulv_w_RES
ret void
@@ -780,8 +780,8 @@ declare <4 x i32> @llvm.mips.mulv.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_mulv_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_mulv_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_mulv_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_mulv_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_mulv_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.mulv.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_mulv_d_RES
ret void
@@ -798,8 +798,8 @@ declare <2 x i64> @llvm.mips.mulv.d(<2 x i64>, <2 x i64>) nounwind
define void @mulv_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_mulv_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_mulv_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_mulv_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_mulv_b_ARG2
%2 = mul <16 x i8> %0, %1
store <16 x i8> %2, <16 x i8>* @llvm_mips_mulv_b_RES
ret void
@@ -814,8 +814,8 @@ entry:
define void @mulv_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_mulv_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_mulv_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_mulv_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_mulv_h_ARG2
%2 = mul <8 x i16> %0, %1
store <8 x i16> %2, <8 x i16>* @llvm_mips_mulv_h_RES
ret void
@@ -830,8 +830,8 @@ entry:
define void @mulv_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_mulv_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_mulv_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_mulv_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_mulv_w_ARG2
%2 = mul <4 x i32> %0, %1
store <4 x i32> %2, <4 x i32>* @llvm_mips_mulv_w_RES
ret void
@@ -846,8 +846,8 @@ entry:
define void @mulv_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_mulv_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_mulv_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_mulv_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_mulv_d_ARG2
%2 = mul <2 x i64> %0, %1
store <2 x i64> %2, <2 x i64>* @llvm_mips_mulv_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/3r-p.ll b/test/CodeGen/Mips/msa/3r-p.ll
index 852023b0824a..70b98aa8f9a1 100644
--- a/test/CodeGen/Mips/msa/3r-p.ll
+++ b/test/CodeGen/Mips/msa/3r-p.ll
@@ -10,8 +10,8 @@
define void @llvm_mips_pckev_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_pckev_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_pckev_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_pckev_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_pckev_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.pckev.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_pckev_b_RES
ret void
@@ -32,8 +32,8 @@ declare <16 x i8> @llvm.mips.pckev.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_pckev_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_pckev_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_pckev_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_pckev_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_pckev_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.pckev.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_pckev_h_RES
ret void
@@ -54,8 +54,8 @@ declare <8 x i16> @llvm.mips.pckev.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_pckev_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_pckev_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_pckev_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_pckev_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_pckev_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.pckev.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_pckev_w_RES
ret void
@@ -76,8 +76,8 @@ declare <4 x i32> @llvm.mips.pckev.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_pckev_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_pckev_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_pckev_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_pckev_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_pckev_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.pckev.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_pckev_d_RES
ret void
@@ -98,8 +98,8 @@ declare <2 x i64> @llvm.mips.pckev.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_pckod_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_pckod_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_pckod_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_pckod_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_pckod_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.pckod.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_pckod_b_RES
ret void
@@ -120,8 +120,8 @@ declare <16 x i8> @llvm.mips.pckod.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_pckod_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_pckod_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_pckod_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_pckod_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_pckod_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.pckod.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_pckod_h_RES
ret void
@@ -142,8 +142,8 @@ declare <8 x i16> @llvm.mips.pckod.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_pckod_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_pckod_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_pckod_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_pckod_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_pckod_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.pckod.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_pckod_w_RES
ret void
@@ -164,8 +164,8 @@ declare <4 x i32> @llvm.mips.pckod.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_pckod_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_pckod_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_pckod_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_pckod_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_pckod_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.pckod.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_pckod_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/3r-s.ll b/test/CodeGen/Mips/msa/3r-s.ll
index 581c3bfd78af..d04c5ff165f2 100644
--- a/test/CodeGen/Mips/msa/3r-s.ll
+++ b/test/CodeGen/Mips/msa/3r-s.ll
@@ -11,9 +11,9 @@
define void @llvm_mips_sld_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_sld_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_sld_b_ARG2
- %2 = load i32* @llvm_mips_sld_b_ARG3
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_sld_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_sld_b_ARG2
+ %2 = load i32, i32* @llvm_mips_sld_b_ARG3
%3 = tail call <16 x i8> @llvm.mips.sld.b(<16 x i8> %0, <16 x i8> %1, i32 %2)
store <16 x i8> %3, <16 x i8>* @llvm_mips_sld_b_RES
ret void
@@ -39,9 +39,9 @@ declare <16 x i8> @llvm.mips.sld.b(<16 x i8>, <16 x i8>, i32) nounwind
define void @llvm_mips_sld_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_sld_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_sld_h_ARG2
- %2 = load i32* @llvm_mips_sld_h_ARG3
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_sld_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_sld_h_ARG2
+ %2 = load i32, i32* @llvm_mips_sld_h_ARG3
%3 = tail call <8 x i16> @llvm.mips.sld.h(<8 x i16> %0, <8 x i16> %1, i32 %2)
store <8 x i16> %3, <8 x i16>* @llvm_mips_sld_h_RES
ret void
@@ -67,9 +67,9 @@ declare <8 x i16> @llvm.mips.sld.h(<8 x i16>, <8 x i16>, i32) nounwind
define void @llvm_mips_sld_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_sld_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_sld_w_ARG2
- %2 = load i32* @llvm_mips_sld_w_ARG3
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_sld_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_sld_w_ARG2
+ %2 = load i32, i32* @llvm_mips_sld_w_ARG3
%3 = tail call <4 x i32> @llvm.mips.sld.w(<4 x i32> %0, <4 x i32> %1, i32 %2)
store <4 x i32> %3, <4 x i32>* @llvm_mips_sld_w_RES
ret void
@@ -95,9 +95,9 @@ declare <4 x i32> @llvm.mips.sld.w(<4 x i32>, <4 x i32>, i32) nounwind
define void @llvm_mips_sld_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_sld_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_sld_d_ARG2
- %2 = load i32* @llvm_mips_sld_d_ARG3
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_sld_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_sld_d_ARG2
+ %2 = load i32, i32* @llvm_mips_sld_d_ARG3
%3 = tail call <2 x i64> @llvm.mips.sld.d(<2 x i64> %0, <2 x i64> %1, i32 %2)
store <2 x i64> %3, <2 x i64>* @llvm_mips_sld_d_RES
ret void
@@ -122,8 +122,8 @@ declare <2 x i64> @llvm.mips.sld.d(<2 x i64>, <2 x i64>, i32) nounwind
define void @llvm_mips_sll_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_sll_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_sll_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_sll_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_sll_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.sll.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_sll_b_RES
ret void
@@ -146,8 +146,8 @@ declare <16 x i8> @llvm.mips.sll.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_sll_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_sll_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_sll_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_sll_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_sll_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.sll.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_sll_h_RES
ret void
@@ -170,8 +170,8 @@ declare <8 x i16> @llvm.mips.sll.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_sll_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_sll_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_sll_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_sll_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_sll_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.sll.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_sll_w_RES
ret void
@@ -194,8 +194,8 @@ declare <4 x i32> @llvm.mips.sll.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_sll_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_sll_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_sll_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_sll_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_sll_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.sll.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_sll_d_RES
ret void
@@ -214,8 +214,8 @@ declare <2 x i64> @llvm.mips.sll.d(<2 x i64>, <2 x i64>) nounwind
define void @sll_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_sll_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_sll_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_sll_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_sll_b_ARG2
%2 = shl <16 x i8> %0, %1
store <16 x i8> %2, <16 x i8>* @llvm_mips_sll_b_RES
ret void
@@ -232,8 +232,8 @@ entry:
define void @sll_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_sll_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_sll_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_sll_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_sll_h_ARG2
%2 = shl <8 x i16> %0, %1
store <8 x i16> %2, <8 x i16>* @llvm_mips_sll_h_RES
ret void
@@ -250,8 +250,8 @@ entry:
define void @sll_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_sll_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_sll_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_sll_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_sll_w_ARG2
%2 = shl <4 x i32> %0, %1
store <4 x i32> %2, <4 x i32>* @llvm_mips_sll_w_RES
ret void
@@ -268,8 +268,8 @@ entry:
define void @sll_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_sll_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_sll_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_sll_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_sll_d_ARG2
%2 = shl <2 x i64> %0, %1
store <2 x i64> %2, <2 x i64>* @llvm_mips_sll_d_RES
ret void
@@ -290,8 +290,8 @@ entry:
define void @llvm_mips_sra_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_sra_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_sra_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_sra_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_sra_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.sra.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_sra_b_RES
ret void
@@ -314,8 +314,8 @@ declare <16 x i8> @llvm.mips.sra.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_sra_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_sra_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_sra_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_sra_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_sra_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.sra.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_sra_h_RES
ret void
@@ -338,8 +338,8 @@ declare <8 x i16> @llvm.mips.sra.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_sra_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_sra_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_sra_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_sra_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_sra_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.sra.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_sra_w_RES
ret void
@@ -362,8 +362,8 @@ declare <4 x i32> @llvm.mips.sra.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_sra_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_sra_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_sra_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_sra_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_sra_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.sra.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_sra_d_RES
ret void
@@ -383,8 +383,8 @@ declare <2 x i64> @llvm.mips.sra.d(<2 x i64>, <2 x i64>) nounwind
define void @sra_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_sra_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_sra_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_sra_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_sra_b_ARG2
%2 = ashr <16 x i8> %0, %1
store <16 x i8> %2, <16 x i8>* @llvm_mips_sra_b_RES
ret void
@@ -401,8 +401,8 @@ entry:
define void @sra_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_sra_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_sra_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_sra_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_sra_h_ARG2
%2 = ashr <8 x i16> %0, %1
store <8 x i16> %2, <8 x i16>* @llvm_mips_sra_h_RES
ret void
@@ -419,8 +419,8 @@ entry:
define void @sra_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_sra_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_sra_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_sra_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_sra_w_ARG2
%2 = ashr <4 x i32> %0, %1
store <4 x i32> %2, <4 x i32>* @llvm_mips_sra_w_RES
ret void
@@ -437,8 +437,8 @@ entry:
define void @sra_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_sra_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_sra_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_sra_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_sra_d_ARG2
%2 = ashr <2 x i64> %0, %1
store <2 x i64> %2, <2 x i64>* @llvm_mips_sra_d_RES
ret void
@@ -459,8 +459,8 @@ entry:
define void @llvm_mips_srar_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_srar_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_srar_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_srar_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_srar_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.srar.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_srar_b_RES
ret void
@@ -483,8 +483,8 @@ declare <16 x i8> @llvm.mips.srar.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_srar_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_srar_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_srar_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_srar_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_srar_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.srar.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_srar_h_RES
ret void
@@ -507,8 +507,8 @@ declare <8 x i16> @llvm.mips.srar.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_srar_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_srar_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_srar_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_srar_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_srar_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.srar.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_srar_w_RES
ret void
@@ -531,8 +531,8 @@ declare <4 x i32> @llvm.mips.srar.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_srar_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_srar_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_srar_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_srar_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_srar_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.srar.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_srar_d_RES
ret void
@@ -555,8 +555,8 @@ declare <2 x i64> @llvm.mips.srar.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_srl_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_srl_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_srl_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_srl_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_srl_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.srl.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_srl_b_RES
ret void
@@ -579,8 +579,8 @@ declare <16 x i8> @llvm.mips.srl.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_srl_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_srl_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_srl_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_srl_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_srl_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.srl.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_srl_h_RES
ret void
@@ -603,8 +603,8 @@ declare <8 x i16> @llvm.mips.srl.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_srl_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_srl_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_srl_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_srl_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_srl_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.srl.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_srl_w_RES
ret void
@@ -627,8 +627,8 @@ declare <4 x i32> @llvm.mips.srl.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_srl_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_srl_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_srl_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_srl_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_srl_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.srl.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_srl_d_RES
ret void
@@ -651,8 +651,8 @@ declare <2 x i64> @llvm.mips.srl.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_srlr_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_srlr_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_srlr_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_srlr_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_srlr_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.srlr.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_srlr_b_RES
ret void
@@ -675,8 +675,8 @@ declare <16 x i8> @llvm.mips.srlr.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_srlr_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_srlr_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_srlr_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_srlr_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_srlr_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.srlr.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_srlr_h_RES
ret void
@@ -699,8 +699,8 @@ declare <8 x i16> @llvm.mips.srlr.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_srlr_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_srlr_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_srlr_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_srlr_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_srlr_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.srlr.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_srlr_w_RES
ret void
@@ -723,8 +723,8 @@ declare <4 x i32> @llvm.mips.srlr.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_srlr_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_srlr_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_srlr_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_srlr_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_srlr_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.srlr.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_srlr_d_RES
ret void
@@ -744,8 +744,8 @@ declare <2 x i64> @llvm.mips.srlr.d(<2 x i64>, <2 x i64>) nounwind
define void @srl_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_srl_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_srl_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_srl_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_srl_b_ARG2
%2 = lshr <16 x i8> %0, %1
store <16 x i8> %2, <16 x i8>* @llvm_mips_srl_b_RES
ret void
@@ -762,8 +762,8 @@ entry:
define void @srl_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_srl_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_srl_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_srl_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_srl_h_ARG2
%2 = lshr <8 x i16> %0, %1
store <8 x i16> %2, <8 x i16>* @llvm_mips_srl_h_RES
ret void
@@ -780,8 +780,8 @@ entry:
define void @srl_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_srl_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_srl_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_srl_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_srl_w_ARG2
%2 = lshr <4 x i32> %0, %1
store <4 x i32> %2, <4 x i32>* @llvm_mips_srl_w_RES
ret void
@@ -798,8 +798,8 @@ entry:
define void @srl_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_srl_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_srl_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_srl_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_srl_d_ARG2
%2 = lshr <2 x i64> %0, %1
store <2 x i64> %2, <2 x i64>* @llvm_mips_srl_d_RES
ret void
@@ -820,8 +820,8 @@ entry:
define void @llvm_mips_subs_s_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_subs_s_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_subs_s_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_subs_s_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_subs_s_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.subs.s.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_subs_s_b_RES
ret void
@@ -844,8 +844,8 @@ declare <16 x i8> @llvm.mips.subs.s.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_subs_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_subs_s_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_subs_s_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_subs_s_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_subs_s_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.subs.s.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_subs_s_h_RES
ret void
@@ -868,8 +868,8 @@ declare <8 x i16> @llvm.mips.subs.s.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_subs_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_subs_s_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_subs_s_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_subs_s_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_subs_s_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.subs.s.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_subs_s_w_RES
ret void
@@ -892,8 +892,8 @@ declare <4 x i32> @llvm.mips.subs.s.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_subs_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_subs_s_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_subs_s_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_subs_s_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_subs_s_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.subs.s.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_subs_s_d_RES
ret void
@@ -916,8 +916,8 @@ declare <2 x i64> @llvm.mips.subs.s.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_subs_u_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_subs_u_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_subs_u_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_subs_u_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_subs_u_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.subs.u.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_subs_u_b_RES
ret void
@@ -940,8 +940,8 @@ declare <16 x i8> @llvm.mips.subs.u.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_subs_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_subs_u_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_subs_u_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_subs_u_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_subs_u_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.subs.u.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_subs_u_h_RES
ret void
@@ -964,8 +964,8 @@ declare <8 x i16> @llvm.mips.subs.u.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_subs_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_subs_u_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_subs_u_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_subs_u_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_subs_u_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.subs.u.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_subs_u_w_RES
ret void
@@ -988,8 +988,8 @@ declare <4 x i32> @llvm.mips.subs.u.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_subs_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_subs_u_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_subs_u_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_subs_u_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_subs_u_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.subs.u.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_subs_u_d_RES
ret void
@@ -1012,8 +1012,8 @@ declare <2 x i64> @llvm.mips.subs.u.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_subsus_u_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_subsus_u_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_subsus_u_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_subsus_u_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_subsus_u_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.subsus.u.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_subsus_u_b_RES
ret void
@@ -1036,8 +1036,8 @@ declare <16 x i8> @llvm.mips.subsus.u.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_subsus_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_subsus_u_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_subsus_u_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_subsus_u_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_subsus_u_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.subsus.u.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_subsus_u_h_RES
ret void
@@ -1060,8 +1060,8 @@ declare <8 x i16> @llvm.mips.subsus.u.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_subsus_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_subsus_u_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_subsus_u_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_subsus_u_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_subsus_u_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.subsus.u.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_subsus_u_w_RES
ret void
@@ -1084,8 +1084,8 @@ declare <4 x i32> @llvm.mips.subsus.u.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_subsus_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_subsus_u_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_subsus_u_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_subsus_u_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_subsus_u_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.subsus.u.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_subsus_u_d_RES
ret void
@@ -1108,8 +1108,8 @@ declare <2 x i64> @llvm.mips.subsus.u.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_subsuu_s_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_subsuu_s_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_subsuu_s_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_subsuu_s_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_subsuu_s_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.subsuu.s.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_subsuu_s_b_RES
ret void
@@ -1132,8 +1132,8 @@ declare <16 x i8> @llvm.mips.subsuu.s.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_subsuu_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_subsuu_s_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_subsuu_s_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_subsuu_s_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_subsuu_s_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.subsuu.s.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_subsuu_s_h_RES
ret void
@@ -1156,8 +1156,8 @@ declare <8 x i16> @llvm.mips.subsuu.s.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_subsuu_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_subsuu_s_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_subsuu_s_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_subsuu_s_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_subsuu_s_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.subsuu.s.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_subsuu_s_w_RES
ret void
@@ -1180,8 +1180,8 @@ declare <4 x i32> @llvm.mips.subsuu.s.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_subsuu_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_subsuu_s_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_subsuu_s_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_subsuu_s_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_subsuu_s_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.subsuu.s.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_subsuu_s_d_RES
ret void
@@ -1204,8 +1204,8 @@ declare <2 x i64> @llvm.mips.subsuu.s.d(<2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_subv_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_subv_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_subv_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_subv_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_subv_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.subv.b(<16 x i8> %0, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_subv_b_RES
ret void
@@ -1228,8 +1228,8 @@ declare <16 x i8> @llvm.mips.subv.b(<16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_subv_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_subv_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_subv_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_subv_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_subv_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.subv.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_subv_h_RES
ret void
@@ -1252,8 +1252,8 @@ declare <8 x i16> @llvm.mips.subv.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_subv_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_subv_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_subv_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_subv_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_subv_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.subv.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_subv_w_RES
ret void
@@ -1276,8 +1276,8 @@ declare <4 x i32> @llvm.mips.subv.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_subv_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_subv_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_subv_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_subv_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_subv_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.subv.d(<2 x i64> %0, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_subv_d_RES
ret void
@@ -1297,8 +1297,8 @@ declare <2 x i64> @llvm.mips.subv.d(<2 x i64>, <2 x i64>) nounwind
define void @subv_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_subv_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_subv_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_subv_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_subv_b_ARG2
%2 = sub <16 x i8> %0, %1
store <16 x i8> %2, <16 x i8>* @llvm_mips_subv_b_RES
ret void
@@ -1315,8 +1315,8 @@ entry:
define void @subv_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_subv_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_subv_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_subv_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_subv_h_ARG2
%2 = sub <8 x i16> %0, %1
store <8 x i16> %2, <8 x i16>* @llvm_mips_subv_h_RES
ret void
@@ -1333,8 +1333,8 @@ entry:
define void @subv_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_subv_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_subv_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_subv_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_subv_w_ARG2
%2 = sub <4 x i32> %0, %1
store <4 x i32> %2, <4 x i32>* @llvm_mips_subv_w_RES
ret void
@@ -1351,8 +1351,8 @@ entry:
define void @subv_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_subv_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_subv_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_subv_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_subv_d_ARG2
%2 = sub <2 x i64> %0, %1
store <2 x i64> %2, <2 x i64>* @llvm_mips_subv_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/3r-v.ll b/test/CodeGen/Mips/msa/3r-v.ll
index c9693f90d556..2d36da40d2b1 100644
--- a/test/CodeGen/Mips/msa/3r-v.ll
+++ b/test/CodeGen/Mips/msa/3r-v.ll
@@ -11,9 +11,9 @@
define void @llvm_mips_vshf_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_vshf_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_vshf_b_ARG2
- %2 = load <16 x i8>* @llvm_mips_vshf_b_ARG3
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_vshf_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_vshf_b_ARG2
+ %2 = load <16 x i8>, <16 x i8>* @llvm_mips_vshf_b_ARG3
%3 = tail call <16 x i8> @llvm.mips.vshf.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
store <16 x i8> %3, <16 x i8>* @llvm_mips_vshf_b_RES
ret void
@@ -36,9 +36,9 @@ declare <16 x i8> @llvm.mips.vshf.b(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_vshf_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_vshf_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_vshf_h_ARG2
- %2 = load <8 x i16>* @llvm_mips_vshf_h_ARG3
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_vshf_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_vshf_h_ARG2
+ %2 = load <8 x i16>, <8 x i16>* @llvm_mips_vshf_h_ARG3
%3 = tail call <8 x i16> @llvm.mips.vshf.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
store <8 x i16> %3, <8 x i16>* @llvm_mips_vshf_h_RES
ret void
@@ -61,9 +61,9 @@ declare <8 x i16> @llvm.mips.vshf.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_vshf_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_vshf_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_vshf_w_ARG2
- %2 = load <4 x i32>* @llvm_mips_vshf_w_ARG3
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_vshf_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_vshf_w_ARG2
+ %2 = load <4 x i32>, <4 x i32>* @llvm_mips_vshf_w_ARG3
%3 = tail call <4 x i32> @llvm.mips.vshf.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
store <4 x i32> %3, <4 x i32>* @llvm_mips_vshf_w_RES
ret void
@@ -86,9 +86,9 @@ declare <4 x i32> @llvm.mips.vshf.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_vshf_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_vshf_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_vshf_d_ARG2
- %2 = load <2 x i64>* @llvm_mips_vshf_d_ARG3
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_vshf_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_vshf_d_ARG2
+ %2 = load <2 x i64>, <2 x i64>* @llvm_mips_vshf_d_ARG3
%3 = tail call <2 x i64> @llvm.mips.vshf.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
store <2 x i64> %3, <2 x i64>* @llvm_mips_vshf_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/3r_4r.ll b/test/CodeGen/Mips/msa/3r_4r.ll
index b7fd7283788c..73d104c68c53 100644
--- a/test/CodeGen/Mips/msa/3r_4r.ll
+++ b/test/CodeGen/Mips/msa/3r_4r.ll
@@ -11,9 +11,9 @@
define void @llvm_mips_maddv_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_maddv_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_maddv_b_ARG2
- %2 = load <16 x i8>* @llvm_mips_maddv_b_ARG3
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_maddv_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_maddv_b_ARG2
+ %2 = load <16 x i8>, <16 x i8>* @llvm_mips_maddv_b_ARG3
%3 = tail call <16 x i8> @llvm.mips.maddv.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
store <16 x i8> %3, <16 x i8>* @llvm_mips_maddv_b_RES
ret void
@@ -36,9 +36,9 @@ declare <16 x i8> @llvm.mips.maddv.b(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_maddv_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_maddv_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_maddv_h_ARG2
- %2 = load <8 x i16>* @llvm_mips_maddv_h_ARG3
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_maddv_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_maddv_h_ARG2
+ %2 = load <8 x i16>, <8 x i16>* @llvm_mips_maddv_h_ARG3
%3 = tail call <8 x i16> @llvm.mips.maddv.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
store <8 x i16> %3, <8 x i16>* @llvm_mips_maddv_h_RES
ret void
@@ -61,9 +61,9 @@ declare <8 x i16> @llvm.mips.maddv.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_maddv_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_maddv_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_maddv_w_ARG2
- %2 = load <4 x i32>* @llvm_mips_maddv_w_ARG3
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_maddv_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_maddv_w_ARG2
+ %2 = load <4 x i32>, <4 x i32>* @llvm_mips_maddv_w_ARG3
%3 = tail call <4 x i32> @llvm.mips.maddv.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
store <4 x i32> %3, <4 x i32>* @llvm_mips_maddv_w_RES
ret void
@@ -86,9 +86,9 @@ declare <4 x i32> @llvm.mips.maddv.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_maddv_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_maddv_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_maddv_d_ARG2
- %2 = load <2 x i64>* @llvm_mips_maddv_d_ARG3
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_maddv_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_maddv_d_ARG2
+ %2 = load <2 x i64>, <2 x i64>* @llvm_mips_maddv_d_ARG3
%3 = tail call <2 x i64> @llvm.mips.maddv.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
store <2 x i64> %3, <2 x i64>* @llvm_mips_maddv_d_RES
ret void
@@ -111,9 +111,9 @@ declare <2 x i64> @llvm.mips.maddv.d(<2 x i64>, <2 x i64>, <2 x i64>) nounwind
define void @llvm_mips_msubv_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_msubv_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_msubv_b_ARG2
- %2 = load <16 x i8>* @llvm_mips_msubv_b_ARG3
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_msubv_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_msubv_b_ARG2
+ %2 = load <16 x i8>, <16 x i8>* @llvm_mips_msubv_b_ARG3
%3 = tail call <16 x i8> @llvm.mips.msubv.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
store <16 x i8> %3, <16 x i8>* @llvm_mips_msubv_b_RES
ret void
@@ -136,9 +136,9 @@ declare <16 x i8> @llvm.mips.msubv.b(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_msubv_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_msubv_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_msubv_h_ARG2
- %2 = load <8 x i16>* @llvm_mips_msubv_h_ARG3
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_msubv_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_msubv_h_ARG2
+ %2 = load <8 x i16>, <8 x i16>* @llvm_mips_msubv_h_ARG3
%3 = tail call <8 x i16> @llvm.mips.msubv.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
store <8 x i16> %3, <8 x i16>* @llvm_mips_msubv_h_RES
ret void
@@ -161,9 +161,9 @@ declare <8 x i16> @llvm.mips.msubv.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_msubv_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_msubv_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_msubv_w_ARG2
- %2 = load <4 x i32>* @llvm_mips_msubv_w_ARG3
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_msubv_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_msubv_w_ARG2
+ %2 = load <4 x i32>, <4 x i32>* @llvm_mips_msubv_w_ARG3
%3 = tail call <4 x i32> @llvm.mips.msubv.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
store <4 x i32> %3, <4 x i32>* @llvm_mips_msubv_w_RES
ret void
@@ -186,9 +186,9 @@ declare <4 x i32> @llvm.mips.msubv.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_msubv_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_msubv_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_msubv_d_ARG2
- %2 = load <2 x i64>* @llvm_mips_msubv_d_ARG3
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_msubv_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_msubv_d_ARG2
+ %2 = load <2 x i64>, <2 x i64>* @llvm_mips_msubv_d_ARG3
%3 = tail call <2 x i64> @llvm.mips.msubv.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
store <2 x i64> %3, <2 x i64>* @llvm_mips_msubv_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/3r_4r_widen.ll b/test/CodeGen/Mips/msa/3r_4r_widen.ll
index 7063e4566a78..fe248eeb566b 100644
--- a/test/CodeGen/Mips/msa/3r_4r_widen.ll
+++ b/test/CodeGen/Mips/msa/3r_4r_widen.ll
@@ -12,9 +12,9 @@
define void @llvm_mips_dpadd_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_dpadd_s_h_ARG1
- %1 = load <16 x i8>* @llvm_mips_dpadd_s_h_ARG2
- %2 = load <16 x i8>* @llvm_mips_dpadd_s_h_ARG3
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_s_h_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_s_h_ARG2
+ %2 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_s_h_ARG3
%3 = tail call <8 x i16> @llvm.mips.dpadd.s.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2)
store <8 x i16> %3, <8 x i16>* @llvm_mips_dpadd_s_h_RES
ret void
@@ -37,9 +37,9 @@ declare <8 x i16> @llvm.mips.dpadd.s.h(<8 x i16>, <16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_dpadd_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_dpadd_s_w_ARG1
- %1 = load <8 x i16>* @llvm_mips_dpadd_s_w_ARG2
- %2 = load <8 x i16>* @llvm_mips_dpadd_s_w_ARG3
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_s_w_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_s_w_ARG2
+ %2 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_s_w_ARG3
%3 = tail call <4 x i32> @llvm.mips.dpadd.s.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2)
store <4 x i32> %3, <4 x i32>* @llvm_mips_dpadd_s_w_RES
ret void
@@ -62,9 +62,9 @@ declare <4 x i32> @llvm.mips.dpadd.s.w(<4 x i32>, <8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_dpadd_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_dpadd_s_d_ARG1
- %1 = load <4 x i32>* @llvm_mips_dpadd_s_d_ARG2
- %2 = load <4 x i32>* @llvm_mips_dpadd_s_d_ARG3
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_dpadd_s_d_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_s_d_ARG2
+ %2 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_s_d_ARG3
%3 = tail call <2 x i64> @llvm.mips.dpadd.s.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2)
store <2 x i64> %3, <2 x i64>* @llvm_mips_dpadd_s_d_RES
ret void
@@ -87,9 +87,9 @@ declare <2 x i64> @llvm.mips.dpadd.s.d(<2 x i64>, <4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_dpadd_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_dpadd_u_h_ARG1
- %1 = load <16 x i8>* @llvm_mips_dpadd_u_h_ARG2
- %2 = load <16 x i8>* @llvm_mips_dpadd_u_h_ARG3
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_u_h_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_u_h_ARG2
+ %2 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_u_h_ARG3
%3 = tail call <8 x i16> @llvm.mips.dpadd.u.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2)
store <8 x i16> %3, <8 x i16>* @llvm_mips_dpadd_u_h_RES
ret void
@@ -112,9 +112,9 @@ declare <8 x i16> @llvm.mips.dpadd.u.h(<8 x i16>, <16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_dpadd_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_dpadd_u_w_ARG1
- %1 = load <8 x i16>* @llvm_mips_dpadd_u_w_ARG2
- %2 = load <8 x i16>* @llvm_mips_dpadd_u_w_ARG3
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_u_w_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_u_w_ARG2
+ %2 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_u_w_ARG3
%3 = tail call <4 x i32> @llvm.mips.dpadd.u.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2)
store <4 x i32> %3, <4 x i32>* @llvm_mips_dpadd_u_w_RES
ret void
@@ -137,9 +137,9 @@ declare <4 x i32> @llvm.mips.dpadd.u.w(<4 x i32>, <8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_dpadd_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_dpadd_u_d_ARG1
- %1 = load <4 x i32>* @llvm_mips_dpadd_u_d_ARG2
- %2 = load <4 x i32>* @llvm_mips_dpadd_u_d_ARG3
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_dpadd_u_d_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_u_d_ARG2
+ %2 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_u_d_ARG3
%3 = tail call <2 x i64> @llvm.mips.dpadd.u.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2)
store <2 x i64> %3, <2 x i64>* @llvm_mips_dpadd_u_d_RES
ret void
@@ -162,9 +162,9 @@ declare <2 x i64> @llvm.mips.dpadd.u.d(<2 x i64>, <4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_dpsub_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_dpsub_s_h_ARG1
- %1 = load <16 x i8>* @llvm_mips_dpsub_s_h_ARG2
- %2 = load <16 x i8>* @llvm_mips_dpsub_s_h_ARG3
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_dpsub_s_h_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_dpsub_s_h_ARG2
+ %2 = load <16 x i8>, <16 x i8>* @llvm_mips_dpsub_s_h_ARG3
%3 = tail call <8 x i16> @llvm.mips.dpsub.s.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2)
store <8 x i16> %3, <8 x i16>* @llvm_mips_dpsub_s_h_RES
ret void
@@ -187,9 +187,9 @@ declare <8 x i16> @llvm.mips.dpsub.s.h(<8 x i16>, <16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_dpsub_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_dpsub_s_w_ARG1
- %1 = load <8 x i16>* @llvm_mips_dpsub_s_w_ARG2
- %2 = load <8 x i16>* @llvm_mips_dpsub_s_w_ARG3
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_dpsub_s_w_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_dpsub_s_w_ARG2
+ %2 = load <8 x i16>, <8 x i16>* @llvm_mips_dpsub_s_w_ARG3
%3 = tail call <4 x i32> @llvm.mips.dpsub.s.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2)
store <4 x i32> %3, <4 x i32>* @llvm_mips_dpsub_s_w_RES
ret void
@@ -212,9 +212,9 @@ declare <4 x i32> @llvm.mips.dpsub.s.w(<4 x i32>, <8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_dpsub_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_dpsub_s_d_ARG1
- %1 = load <4 x i32>* @llvm_mips_dpsub_s_d_ARG2
- %2 = load <4 x i32>* @llvm_mips_dpsub_s_d_ARG3
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_dpsub_s_d_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_dpsub_s_d_ARG2
+ %2 = load <4 x i32>, <4 x i32>* @llvm_mips_dpsub_s_d_ARG3
%3 = tail call <2 x i64> @llvm.mips.dpsub.s.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2)
store <2 x i64> %3, <2 x i64>* @llvm_mips_dpsub_s_d_RES
ret void
@@ -237,9 +237,9 @@ declare <2 x i64> @llvm.mips.dpsub.s.d(<2 x i64>, <4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_dpsub_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_dpsub_u_h_ARG1
- %1 = load <16 x i8>* @llvm_mips_dpsub_u_h_ARG2
- %2 = load <16 x i8>* @llvm_mips_dpsub_u_h_ARG3
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_dpsub_u_h_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_dpsub_u_h_ARG2
+ %2 = load <16 x i8>, <16 x i8>* @llvm_mips_dpsub_u_h_ARG3
%3 = tail call <8 x i16> @llvm.mips.dpsub.u.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2)
store <8 x i16> %3, <8 x i16>* @llvm_mips_dpsub_u_h_RES
ret void
@@ -262,9 +262,9 @@ declare <8 x i16> @llvm.mips.dpsub.u.h(<8 x i16>, <16 x i8>, <16 x i8>) nounwind
define void @llvm_mips_dpsub_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_dpsub_u_w_ARG1
- %1 = load <8 x i16>* @llvm_mips_dpsub_u_w_ARG2
- %2 = load <8 x i16>* @llvm_mips_dpsub_u_w_ARG3
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_dpsub_u_w_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_dpsub_u_w_ARG2
+ %2 = load <8 x i16>, <8 x i16>* @llvm_mips_dpsub_u_w_ARG3
%3 = tail call <4 x i32> @llvm.mips.dpsub.u.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2)
store <4 x i32> %3, <4 x i32>* @llvm_mips_dpsub_u_w_RES
ret void
@@ -287,9 +287,9 @@ declare <4 x i32> @llvm.mips.dpsub.u.w(<4 x i32>, <8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_dpsub_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_dpsub_u_d_ARG1
- %1 = load <4 x i32>* @llvm_mips_dpsub_u_d_ARG2
- %2 = load <4 x i32>* @llvm_mips_dpsub_u_d_ARG3
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_dpsub_u_d_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_dpsub_u_d_ARG2
+ %2 = load <4 x i32>, <4 x i32>* @llvm_mips_dpsub_u_d_ARG3
%3 = tail call <2 x i64> @llvm.mips.dpsub.u.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2)
store <2 x i64> %3, <2 x i64>* @llvm_mips_dpsub_u_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/3r_splat.ll b/test/CodeGen/Mips/msa/3r_splat.ll
index 6b0cb26f8c81..56d26b030de9 100644
--- a/test/CodeGen/Mips/msa/3r_splat.ll
+++ b/test/CodeGen/Mips/msa/3r_splat.ll
@@ -11,7 +11,7 @@
define void @llvm_mips_splat_b_test(i32 %a) nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_splat_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_splat_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.splat.b(<16 x i8> %0, i32 %a)
store <16 x i8> %1, <16 x i8>* @llvm_mips_splat_b_RES
ret void
@@ -32,7 +32,7 @@ declare <16 x i8> @llvm.mips.splat.b(<16 x i8>, i32) nounwind
define void @llvm_mips_splat_h_test(i32 %a) nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_splat_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_splat_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.splat.h(<8 x i16> %0, i32 %a)
store <8 x i16> %1, <8 x i16>* @llvm_mips_splat_h_RES
ret void
@@ -53,7 +53,7 @@ declare <8 x i16> @llvm.mips.splat.h(<8 x i16>, i32) nounwind
define void @llvm_mips_splat_w_test(i32 %a) nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_splat_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_splat_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.splat.w(<4 x i32> %0, i32 %a)
store <4 x i32> %1, <4 x i32>* @llvm_mips_splat_w_RES
ret void
@@ -74,7 +74,7 @@ declare <4 x i32> @llvm.mips.splat.w(<4 x i32>, i32) nounwind
define void @llvm_mips_splat_d_test(i32 %a) nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_splat_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_splat_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.splat.d(<2 x i64> %0, i32 %a)
store <2 x i64> %1, <2 x i64>* @llvm_mips_splat_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/3rf.ll b/test/CodeGen/Mips/msa/3rf.ll
index ae665afcc950..dce0c275e8da 100644
--- a/test/CodeGen/Mips/msa/3rf.ll
+++ b/test/CodeGen/Mips/msa/3rf.ll
@@ -9,8 +9,8 @@
define void @llvm_mips_fadd_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fadd_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fadd_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fadd_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fadd_w_ARG2
%2 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %1)
store <4 x float> %2, <4 x float>* @llvm_mips_fadd_w_RES
ret void
@@ -31,8 +31,8 @@ declare <4 x float> @llvm.mips.fadd.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fadd_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fadd_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fadd_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fadd_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fadd_d_ARG2
%2 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %1)
store <2 x double> %2, <2 x double>* @llvm_mips_fadd_d_RES
ret void
@@ -49,8 +49,8 @@ declare <2 x double> @llvm.mips.fadd.d(<2 x double>, <2 x double>) nounwind
define void @fadd_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fadd_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fadd_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fadd_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fadd_w_ARG2
%2 = fadd <4 x float> %0, %1
store <4 x float> %2, <4 x float>* @llvm_mips_fadd_w_RES
ret void
@@ -65,8 +65,8 @@ entry:
define void @fadd_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fadd_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fadd_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fadd_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fadd_d_ARG2
%2 = fadd <2 x double> %0, %1
store <2 x double> %2, <2 x double>* @llvm_mips_fadd_d_RES
ret void
@@ -85,8 +85,8 @@ entry:
define void @llvm_mips_fdiv_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fdiv_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fdiv_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fdiv_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fdiv_w_ARG2
%2 = tail call <4 x float> @llvm.mips.fdiv.w(<4 x float> %0, <4 x float> %1)
store <4 x float> %2, <4 x float>* @llvm_mips_fdiv_w_RES
ret void
@@ -107,8 +107,8 @@ declare <4 x float> @llvm.mips.fdiv.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fdiv_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fdiv_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fdiv_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fdiv_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fdiv_d_ARG2
%2 = tail call <2 x double> @llvm.mips.fdiv.d(<2 x double> %0, <2 x double> %1)
store <2 x double> %2, <2 x double>* @llvm_mips_fdiv_d_RES
ret void
@@ -125,8 +125,8 @@ declare <2 x double> @llvm.mips.fdiv.d(<2 x double>, <2 x double>) nounwind
define void @fdiv_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fdiv_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fdiv_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fdiv_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fdiv_w_ARG2
%2 = fdiv <4 x float> %0, %1
store <4 x float> %2, <4 x float>* @llvm_mips_fdiv_w_RES
ret void
@@ -141,8 +141,8 @@ entry:
define void @fdiv_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fdiv_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fdiv_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fdiv_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fdiv_d_ARG2
%2 = fdiv <2 x double> %0, %1
store <2 x double> %2, <2 x double>* @llvm_mips_fdiv_d_RES
ret void
@@ -161,8 +161,8 @@ entry:
define void @llvm_mips_fmin_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fmin_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fmin_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fmin_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fmin_w_ARG2
%2 = tail call <4 x float> @llvm.mips.fmin.w(<4 x float> %0, <4 x float> %1)
store <4 x float> %2, <4 x float>* @llvm_mips_fmin_w_RES
ret void
@@ -183,8 +183,8 @@ declare <4 x float> @llvm.mips.fmin.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fmin_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fmin_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fmin_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fmin_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fmin_d_ARG2
%2 = tail call <2 x double> @llvm.mips.fmin.d(<2 x double> %0, <2 x double> %1)
store <2 x double> %2, <2 x double>* @llvm_mips_fmin_d_RES
ret void
@@ -205,8 +205,8 @@ declare <2 x double> @llvm.mips.fmin.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fmin_a_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fmin_a_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fmin_a_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fmin_a_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fmin_a_w_ARG2
%2 = tail call <4 x float> @llvm.mips.fmin.a.w(<4 x float> %0, <4 x float> %1)
store <4 x float> %2, <4 x float>* @llvm_mips_fmin_a_w_RES
ret void
@@ -227,8 +227,8 @@ declare <4 x float> @llvm.mips.fmin.a.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fmin_a_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fmin_a_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fmin_a_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fmin_a_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fmin_a_d_ARG2
%2 = tail call <2 x double> @llvm.mips.fmin.a.d(<2 x double> %0, <2 x double> %1)
store <2 x double> %2, <2 x double>* @llvm_mips_fmin_a_d_RES
ret void
@@ -249,8 +249,8 @@ declare <2 x double> @llvm.mips.fmin.a.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fmax_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fmax_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fmax_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fmax_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fmax_w_ARG2
%2 = tail call <4 x float> @llvm.mips.fmax.w(<4 x float> %0, <4 x float> %1)
store <4 x float> %2, <4 x float>* @llvm_mips_fmax_w_RES
ret void
@@ -271,8 +271,8 @@ declare <4 x float> @llvm.mips.fmax.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fmax_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fmax_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fmax_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fmax_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fmax_d_ARG2
%2 = tail call <2 x double> @llvm.mips.fmax.d(<2 x double> %0, <2 x double> %1)
store <2 x double> %2, <2 x double>* @llvm_mips_fmax_d_RES
ret void
@@ -293,8 +293,8 @@ declare <2 x double> @llvm.mips.fmax.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fmax_a_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fmax_a_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fmax_a_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fmax_a_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fmax_a_w_ARG2
%2 = tail call <4 x float> @llvm.mips.fmax.a.w(<4 x float> %0, <4 x float> %1)
store <4 x float> %2, <4 x float>* @llvm_mips_fmax_a_w_RES
ret void
@@ -315,8 +315,8 @@ declare <4 x float> @llvm.mips.fmax.a.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fmax_a_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fmax_a_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fmax_a_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fmax_a_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fmax_a_d_ARG2
%2 = tail call <2 x double> @llvm.mips.fmax.a.d(<2 x double> %0, <2 x double> %1)
store <2 x double> %2, <2 x double>* @llvm_mips_fmax_a_d_RES
ret void
@@ -337,8 +337,8 @@ declare <2 x double> @llvm.mips.fmax.a.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fmul_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fmul_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fmul_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fmul_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fmul_w_ARG2
%2 = tail call <4 x float> @llvm.mips.fmul.w(<4 x float> %0, <4 x float> %1)
store <4 x float> %2, <4 x float>* @llvm_mips_fmul_w_RES
ret void
@@ -359,8 +359,8 @@ declare <4 x float> @llvm.mips.fmul.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fmul_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fmul_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fmul_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fmul_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fmul_d_ARG2
%2 = tail call <2 x double> @llvm.mips.fmul.d(<2 x double> %0, <2 x double> %1)
store <2 x double> %2, <2 x double>* @llvm_mips_fmul_d_RES
ret void
@@ -377,8 +377,8 @@ declare <2 x double> @llvm.mips.fmul.d(<2 x double>, <2 x double>) nounwind
define void @fmul_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fmul_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fmul_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fmul_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fmul_w_ARG2
%2 = fmul <4 x float> %0, %1
store <4 x float> %2, <4 x float>* @llvm_mips_fmul_w_RES
ret void
@@ -393,8 +393,8 @@ entry:
define void @fmul_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fmul_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fmul_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fmul_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fmul_d_ARG2
%2 = fmul <2 x double> %0, %1
store <2 x double> %2, <2 x double>* @llvm_mips_fmul_d_RES
ret void
@@ -413,8 +413,8 @@ entry:
define void @llvm_mips_fsub_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fsub_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fsub_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fsub_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fsub_w_ARG2
%2 = tail call <4 x float> @llvm.mips.fsub.w(<4 x float> %0, <4 x float> %1)
store <4 x float> %2, <4 x float>* @llvm_mips_fsub_w_RES
ret void
@@ -435,8 +435,8 @@ declare <4 x float> @llvm.mips.fsub.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fsub_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fsub_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fsub_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fsub_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fsub_d_ARG2
%2 = tail call <2 x double> @llvm.mips.fsub.d(<2 x double> %0, <2 x double> %1)
store <2 x double> %2, <2 x double>* @llvm_mips_fsub_d_RES
ret void
@@ -454,8 +454,8 @@ declare <2 x double> @llvm.mips.fsub.d(<2 x double>, <2 x double>) nounwind
define void @fsub_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fsub_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fsub_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fsub_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fsub_w_ARG2
%2 = fsub <4 x float> %0, %1
store <4 x float> %2, <4 x float>* @llvm_mips_fsub_w_RES
ret void
@@ -470,8 +470,8 @@ entry:
define void @fsub_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fsub_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fsub_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fsub_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fsub_d_ARG2
%2 = fsub <2 x double> %0, %1
store <2 x double> %2, <2 x double>* @llvm_mips_fsub_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/3rf_4rf.ll b/test/CodeGen/Mips/msa/3rf_4rf.ll
index 67ef7fd2bae1..f1a3002e8179 100644
--- a/test/CodeGen/Mips/msa/3rf_4rf.ll
+++ b/test/CodeGen/Mips/msa/3rf_4rf.ll
@@ -11,9 +11,9 @@
define void @llvm_mips_fmadd_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fmadd_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fmadd_w_ARG2
- %2 = load <4 x float>* @llvm_mips_fmadd_w_ARG3
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fmadd_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fmadd_w_ARG2
+ %2 = load <4 x float>, <4 x float>* @llvm_mips_fmadd_w_ARG3
%3 = tail call <4 x float> @llvm.mips.fmadd.w(<4 x float> %0, <4 x float> %1, <4 x float> %2)
store <4 x float> %3, <4 x float>* @llvm_mips_fmadd_w_RES
ret void
@@ -36,9 +36,9 @@ declare <4 x float> @llvm.mips.fmadd.w(<4 x float>, <4 x float>, <4 x float>) no
define void @llvm_mips_fmadd_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fmadd_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fmadd_d_ARG2
- %2 = load <2 x double>* @llvm_mips_fmadd_d_ARG3
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fmadd_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fmadd_d_ARG2
+ %2 = load <2 x double>, <2 x double>* @llvm_mips_fmadd_d_ARG3
%3 = tail call <2 x double> @llvm.mips.fmadd.d(<2 x double> %0, <2 x double> %1, <2 x double> %2)
store <2 x double> %3, <2 x double>* @llvm_mips_fmadd_d_RES
ret void
@@ -61,9 +61,9 @@ declare <2 x double> @llvm.mips.fmadd.d(<2 x double>, <2 x double>, <2 x double>
define void @llvm_mips_fmsub_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fmsub_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fmsub_w_ARG2
- %2 = load <4 x float>* @llvm_mips_fmsub_w_ARG3
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fmsub_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fmsub_w_ARG2
+ %2 = load <4 x float>, <4 x float>* @llvm_mips_fmsub_w_ARG3
%3 = tail call <4 x float> @llvm.mips.fmsub.w(<4 x float> %0, <4 x float> %1, <4 x float> %2)
store <4 x float> %3, <4 x float>* @llvm_mips_fmsub_w_RES
ret void
@@ -86,9 +86,9 @@ declare <4 x float> @llvm.mips.fmsub.w(<4 x float>, <4 x float>, <4 x float>) no
define void @llvm_mips_fmsub_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fmsub_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fmsub_d_ARG2
- %2 = load <2 x double>* @llvm_mips_fmsub_d_ARG3
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fmsub_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fmsub_d_ARG2
+ %2 = load <2 x double>, <2 x double>* @llvm_mips_fmsub_d_ARG3
%3 = tail call <2 x double> @llvm.mips.fmsub.d(<2 x double> %0, <2 x double> %1, <2 x double> %2)
store <2 x double> %3, <2 x double>* @llvm_mips_fmsub_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/3rf_4rf_q.ll b/test/CodeGen/Mips/msa/3rf_4rf_q.ll
index de28be0b1c22..704c4b7e7cbc 100644
--- a/test/CodeGen/Mips/msa/3rf_4rf_q.ll
+++ b/test/CodeGen/Mips/msa/3rf_4rf_q.ll
@@ -11,9 +11,9 @@
define void @llvm_mips_madd_q_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_madd_q_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_madd_q_h_ARG2
- %2 = load <8 x i16>* @llvm_mips_madd_q_h_ARG3
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_madd_q_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_madd_q_h_ARG2
+ %2 = load <8 x i16>, <8 x i16>* @llvm_mips_madd_q_h_ARG3
%3 = tail call <8 x i16> @llvm.mips.madd.q.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
store <8 x i16> %3, <8 x i16>* @llvm_mips_madd_q_h_RES
ret void
@@ -36,9 +36,9 @@ declare <8 x i16> @llvm.mips.madd.q.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_madd_q_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_madd_q_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_madd_q_w_ARG2
- %2 = load <4 x i32>* @llvm_mips_madd_q_w_ARG3
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_madd_q_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_madd_q_w_ARG2
+ %2 = load <4 x i32>, <4 x i32>* @llvm_mips_madd_q_w_ARG3
%3 = tail call <4 x i32> @llvm.mips.madd.q.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
store <4 x i32> %3, <4 x i32>* @llvm_mips_madd_q_w_RES
ret void
@@ -61,9 +61,9 @@ declare <4 x i32> @llvm.mips.madd.q.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_maddr_q_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_maddr_q_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_maddr_q_h_ARG2
- %2 = load <8 x i16>* @llvm_mips_maddr_q_h_ARG3
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_maddr_q_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_maddr_q_h_ARG2
+ %2 = load <8 x i16>, <8 x i16>* @llvm_mips_maddr_q_h_ARG3
%3 = tail call <8 x i16> @llvm.mips.maddr.q.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
store <8 x i16> %3, <8 x i16>* @llvm_mips_maddr_q_h_RES
ret void
@@ -86,9 +86,9 @@ declare <8 x i16> @llvm.mips.maddr.q.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_maddr_q_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_maddr_q_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_maddr_q_w_ARG2
- %2 = load <4 x i32>* @llvm_mips_maddr_q_w_ARG3
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_maddr_q_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_maddr_q_w_ARG2
+ %2 = load <4 x i32>, <4 x i32>* @llvm_mips_maddr_q_w_ARG3
%3 = tail call <4 x i32> @llvm.mips.maddr.q.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
store <4 x i32> %3, <4 x i32>* @llvm_mips_maddr_q_w_RES
ret void
@@ -111,9 +111,9 @@ declare <4 x i32> @llvm.mips.maddr.q.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_msub_q_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_msub_q_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_msub_q_h_ARG2
- %2 = load <8 x i16>* @llvm_mips_msub_q_h_ARG3
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_msub_q_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_msub_q_h_ARG2
+ %2 = load <8 x i16>, <8 x i16>* @llvm_mips_msub_q_h_ARG3
%3 = tail call <8 x i16> @llvm.mips.msub.q.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
store <8 x i16> %3, <8 x i16>* @llvm_mips_msub_q_h_RES
ret void
@@ -136,9 +136,9 @@ declare <8 x i16> @llvm.mips.msub.q.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_msub_q_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_msub_q_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_msub_q_w_ARG2
- %2 = load <4 x i32>* @llvm_mips_msub_q_w_ARG3
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_msub_q_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_msub_q_w_ARG2
+ %2 = load <4 x i32>, <4 x i32>* @llvm_mips_msub_q_w_ARG3
%3 = tail call <4 x i32> @llvm.mips.msub.q.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
store <4 x i32> %3, <4 x i32>* @llvm_mips_msub_q_w_RES
ret void
@@ -161,9 +161,9 @@ declare <4 x i32> @llvm.mips.msub.q.w(<4 x i32>, <4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_msubr_q_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_msubr_q_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_msubr_q_h_ARG2
- %2 = load <8 x i16>* @llvm_mips_msubr_q_h_ARG3
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_msubr_q_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_msubr_q_h_ARG2
+ %2 = load <8 x i16>, <8 x i16>* @llvm_mips_msubr_q_h_ARG3
%3 = tail call <8 x i16> @llvm.mips.msubr.q.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
store <8 x i16> %3, <8 x i16>* @llvm_mips_msubr_q_h_RES
ret void
@@ -186,9 +186,9 @@ declare <8 x i16> @llvm.mips.msubr.q.h(<8 x i16>, <8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_msubr_q_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_msubr_q_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_msubr_q_w_ARG2
- %2 = load <4 x i32>* @llvm_mips_msubr_q_w_ARG3
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_msubr_q_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_msubr_q_w_ARG2
+ %2 = load <4 x i32>, <4 x i32>* @llvm_mips_msubr_q_w_ARG3
%3 = tail call <4 x i32> @llvm.mips.msubr.q.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
store <4 x i32> %3, <4 x i32>* @llvm_mips_msubr_q_w_RES
ret void
diff --git a/test/CodeGen/Mips/msa/3rf_exdo.ll b/test/CodeGen/Mips/msa/3rf_exdo.ll
index 8a7f268a5069..1b1b2e9243ec 100644
--- a/test/CodeGen/Mips/msa/3rf_exdo.ll
+++ b/test/CodeGen/Mips/msa/3rf_exdo.ll
@@ -10,8 +10,8 @@
define void @llvm_mips_fexdo_h_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fexdo_h_ARG1
- %1 = load <4 x float>* @llvm_mips_fexdo_h_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fexdo_h_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fexdo_h_ARG2
%2 = tail call <8 x half> @llvm.mips.fexdo.h(<4 x float> %0, <4 x float> %1)
store <8 x half> %2, <8 x half>* @llvm_mips_fexdo_h_RES
ret void
@@ -32,8 +32,8 @@ declare <8 x half> @llvm.mips.fexdo.h(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fexdo_w_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fexdo_w_ARG1
- %1 = load <2 x double>* @llvm_mips_fexdo_w_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fexdo_w_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fexdo_w_ARG2
%2 = tail call <4 x float> @llvm.mips.fexdo.w(<2 x double> %0, <2 x double> %1)
store <4 x float> %2, <4 x float>* @llvm_mips_fexdo_w_RES
ret void
diff --git a/test/CodeGen/Mips/msa/3rf_float_int.ll b/test/CodeGen/Mips/msa/3rf_float_int.ll
index 7b01e1721db9..2bd056d3cc8d 100644
--- a/test/CodeGen/Mips/msa/3rf_float_int.ll
+++ b/test/CodeGen/Mips/msa/3rf_float_int.ll
@@ -10,8 +10,8 @@
define void @llvm_mips_fexp2_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fexp2_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_fexp2_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fexp2_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_fexp2_w_ARG2
%2 = tail call <4 x float> @llvm.mips.fexp2.w(<4 x float> %0, <4 x i32> %1)
store <4 x float> %2, <4 x float>* @llvm_mips_fexp2_w_RES
ret void
@@ -32,8 +32,8 @@ declare <4 x float> @llvm.mips.fexp2.w(<4 x float>, <4 x i32>) nounwind
define void @llvm_mips_fexp2_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fexp2_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_fexp2_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fexp2_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_fexp2_d_ARG2
%2 = tail call <2 x double> @llvm.mips.fexp2.d(<2 x double> %0, <2 x i64> %1)
store <2 x double> %2, <2 x double>* @llvm_mips_fexp2_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/3rf_int_float.ll b/test/CodeGen/Mips/msa/3rf_int_float.ll
index 5624771b8357..545e5435d643 100644
--- a/test/CodeGen/Mips/msa/3rf_int_float.ll
+++ b/test/CodeGen/Mips/msa/3rf_int_float.ll
@@ -10,8 +10,8 @@
define void @llvm_mips_fcaf_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fcaf_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fcaf_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fcaf_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fcaf_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fcaf.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fcaf_w_RES
ret void
@@ -32,8 +32,8 @@ declare <4 x i32> @llvm.mips.fcaf.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fcaf_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fcaf_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fcaf_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fcaf_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fcaf_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fcaf.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fcaf_d_RES
ret void
@@ -54,8 +54,8 @@ declare <2 x i64> @llvm.mips.fcaf.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fceq_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fceq_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fceq_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fceq_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fceq_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fceq.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fceq_w_RES
ret void
@@ -76,8 +76,8 @@ declare <4 x i32> @llvm.mips.fceq.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fceq_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fceq_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fceq_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fceq_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fceq_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fceq.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fceq_d_RES
ret void
@@ -98,8 +98,8 @@ declare <2 x i64> @llvm.mips.fceq.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fcle_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fcle_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fcle_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fcle_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fcle_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fcle.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fcle_w_RES
ret void
@@ -120,8 +120,8 @@ declare <4 x i32> @llvm.mips.fcle.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fcle_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fcle_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fcle_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fcle_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fcle_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fcle.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fcle_d_RES
ret void
@@ -142,8 +142,8 @@ declare <2 x i64> @llvm.mips.fcle.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fclt_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fclt_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fclt_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fclt_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fclt_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fclt.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fclt_w_RES
ret void
@@ -164,8 +164,8 @@ declare <4 x i32> @llvm.mips.fclt.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fclt_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fclt_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fclt_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fclt_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fclt_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fclt.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fclt_d_RES
ret void
@@ -186,8 +186,8 @@ declare <2 x i64> @llvm.mips.fclt.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fcor_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fcor_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fcor_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fcor_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fcor_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fcor.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fcor_w_RES
ret void
@@ -208,8 +208,8 @@ declare <4 x i32> @llvm.mips.fcor.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fcor_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fcor_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fcor_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fcor_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fcor_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fcor.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fcor_d_RES
ret void
@@ -230,8 +230,8 @@ declare <2 x i64> @llvm.mips.fcor.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fcne_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fcne_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fcne_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fcne_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fcne_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fcne.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fcne_w_RES
ret void
@@ -252,8 +252,8 @@ declare <4 x i32> @llvm.mips.fcne.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fcne_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fcne_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fcne_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fcne_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fcne_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fcne.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fcne_d_RES
ret void
@@ -274,8 +274,8 @@ declare <2 x i64> @llvm.mips.fcne.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fcueq_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fcueq_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fcueq_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fcueq_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fcueq_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fcueq.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fcueq_w_RES
ret void
@@ -296,8 +296,8 @@ declare <4 x i32> @llvm.mips.fcueq.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fcueq_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fcueq_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fcueq_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fcueq_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fcueq_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fcueq.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fcueq_d_RES
ret void
@@ -318,8 +318,8 @@ declare <2 x i64> @llvm.mips.fcueq.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fcult_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fcult_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fcult_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fcult_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fcult_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fcult.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fcult_w_RES
ret void
@@ -340,8 +340,8 @@ declare <4 x i32> @llvm.mips.fcult.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fcult_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fcult_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fcult_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fcult_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fcult_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fcult.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fcult_d_RES
ret void
@@ -362,8 +362,8 @@ declare <2 x i64> @llvm.mips.fcult.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fcule_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fcule_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fcule_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fcule_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fcule_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fcule.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fcule_w_RES
ret void
@@ -384,8 +384,8 @@ declare <4 x i32> @llvm.mips.fcule.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fcule_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fcule_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fcule_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fcule_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fcule_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fcule.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fcule_d_RES
ret void
@@ -406,8 +406,8 @@ declare <2 x i64> @llvm.mips.fcule.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fcun_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fcun_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fcun_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fcun_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fcun_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fcun.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fcun_w_RES
ret void
@@ -428,8 +428,8 @@ declare <4 x i32> @llvm.mips.fcun.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fcun_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fcun_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fcun_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fcun_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fcun_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fcun.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fcun_d_RES
ret void
@@ -450,8 +450,8 @@ declare <2 x i64> @llvm.mips.fcun.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fcune_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fcune_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fcune_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fcune_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fcune_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fcune.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fcune_w_RES
ret void
@@ -472,8 +472,8 @@ declare <4 x i32> @llvm.mips.fcune.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fcune_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fcune_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fcune_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fcune_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fcune_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fcune.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fcune_d_RES
ret void
@@ -494,8 +494,8 @@ declare <2 x i64> @llvm.mips.fcune.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fsaf_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fsaf_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fsaf_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fsaf_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fsaf_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fsaf.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fsaf_w_RES
ret void
@@ -516,8 +516,8 @@ declare <4 x i32> @llvm.mips.fsaf.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fsaf_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fsaf_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fsaf_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fsaf_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fsaf_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fsaf.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fsaf_d_RES
ret void
@@ -538,8 +538,8 @@ declare <2 x i64> @llvm.mips.fsaf.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fseq_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fseq_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fseq_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fseq_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fseq_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fseq.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fseq_w_RES
ret void
@@ -560,8 +560,8 @@ declare <4 x i32> @llvm.mips.fseq.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fseq_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fseq_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fseq_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fseq_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fseq_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fseq.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fseq_d_RES
ret void
@@ -582,8 +582,8 @@ declare <2 x i64> @llvm.mips.fseq.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fsle_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fsle_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fsle_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fsle_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fsle_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fsle.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fsle_w_RES
ret void
@@ -604,8 +604,8 @@ declare <4 x i32> @llvm.mips.fsle.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fsle_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fsle_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fsle_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fsle_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fsle_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fsle.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fsle_d_RES
ret void
@@ -626,8 +626,8 @@ declare <2 x i64> @llvm.mips.fsle.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fslt_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fslt_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fslt_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fslt_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fslt_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fslt.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fslt_w_RES
ret void
@@ -648,8 +648,8 @@ declare <4 x i32> @llvm.mips.fslt.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fslt_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fslt_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fslt_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fslt_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fslt_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fslt.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fslt_d_RES
ret void
@@ -670,8 +670,8 @@ declare <2 x i64> @llvm.mips.fslt.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fsor_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fsor_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fsor_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fsor_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fsor_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fsor.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fsor_w_RES
ret void
@@ -692,8 +692,8 @@ declare <4 x i32> @llvm.mips.fsor.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fsor_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fsor_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fsor_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fsor_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fsor_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fsor.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fsor_d_RES
ret void
@@ -714,8 +714,8 @@ declare <2 x i64> @llvm.mips.fsor.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fsne_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fsne_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fsne_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fsne_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fsne_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fsne.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fsne_w_RES
ret void
@@ -736,8 +736,8 @@ declare <4 x i32> @llvm.mips.fsne.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fsne_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fsne_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fsne_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fsne_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fsne_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fsne.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fsne_d_RES
ret void
@@ -758,8 +758,8 @@ declare <2 x i64> @llvm.mips.fsne.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fsueq_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fsueq_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fsueq_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fsueq_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fsueq_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fsueq.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fsueq_w_RES
ret void
@@ -780,8 +780,8 @@ declare <4 x i32> @llvm.mips.fsueq.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fsueq_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fsueq_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fsueq_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fsueq_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fsueq_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fsueq.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fsueq_d_RES
ret void
@@ -802,8 +802,8 @@ declare <2 x i64> @llvm.mips.fsueq.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fsult_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fsult_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fsult_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fsult_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fsult_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fsult.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fsult_w_RES
ret void
@@ -824,8 +824,8 @@ declare <4 x i32> @llvm.mips.fsult.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fsult_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fsult_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fsult_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fsult_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fsult_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fsult.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fsult_d_RES
ret void
@@ -846,8 +846,8 @@ declare <2 x i64> @llvm.mips.fsult.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fsule_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fsule_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fsule_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fsule_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fsule_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fsule.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fsule_w_RES
ret void
@@ -868,8 +868,8 @@ declare <4 x i32> @llvm.mips.fsule.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fsule_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fsule_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fsule_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fsule_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fsule_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fsule.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fsule_d_RES
ret void
@@ -890,8 +890,8 @@ declare <2 x i64> @llvm.mips.fsule.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fsun_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fsun_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fsun_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fsun_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fsun_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fsun.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fsun_w_RES
ret void
@@ -912,8 +912,8 @@ declare <4 x i32> @llvm.mips.fsun.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fsun_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fsun_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fsun_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fsun_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fsun_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fsun.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fsun_d_RES
ret void
@@ -934,8 +934,8 @@ declare <2 x i64> @llvm.mips.fsun.d(<2 x double>, <2 x double>) nounwind
define void @llvm_mips_fsune_w_test() nounwind {
entry:
- %0 = load <4 x float>* @llvm_mips_fsune_w_ARG1
- %1 = load <4 x float>* @llvm_mips_fsune_w_ARG2
+ %0 = load <4 x float>, <4 x float>* @llvm_mips_fsune_w_ARG1
+ %1 = load <4 x float>, <4 x float>* @llvm_mips_fsune_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.fsune.w(<4 x float> %0, <4 x float> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_fsune_w_RES
ret void
@@ -956,8 +956,8 @@ declare <4 x i32> @llvm.mips.fsune.w(<4 x float>, <4 x float>) nounwind
define void @llvm_mips_fsune_d_test() nounwind {
entry:
- %0 = load <2 x double>* @llvm_mips_fsune_d_ARG1
- %1 = load <2 x double>* @llvm_mips_fsune_d_ARG2
+ %0 = load <2 x double>, <2 x double>* @llvm_mips_fsune_d_ARG1
+ %1 = load <2 x double>, <2 x double>* @llvm_mips_fsune_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.fsune.d(<2 x double> %0, <2 x double> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_fsune_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/3rf_q.ll b/test/CodeGen/Mips/msa/3rf_q.ll
index f7000ee913a6..c8b0a5000021 100644
--- a/test/CodeGen/Mips/msa/3rf_q.ll
+++ b/test/CodeGen/Mips/msa/3rf_q.ll
@@ -10,8 +10,8 @@
define void @llvm_mips_mul_q_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_mul_q_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_mul_q_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_mul_q_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_mul_q_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.mul.q.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_mul_q_h_RES
ret void
@@ -32,8 +32,8 @@ declare <8 x i16> @llvm.mips.mul.q.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_mul_q_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_mul_q_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_mul_q_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_mul_q_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_mul_q_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.mul.q.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_mul_q_w_RES
ret void
@@ -54,8 +54,8 @@ declare <4 x i32> @llvm.mips.mul.q.w(<4 x i32>, <4 x i32>) nounwind
define void @llvm_mips_mulr_q_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_mulr_q_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_mulr_q_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_mulr_q_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_mulr_q_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.mulr.q.h(<8 x i16> %0, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_mulr_q_h_RES
ret void
@@ -76,8 +76,8 @@ declare <8 x i16> @llvm.mips.mulr.q.h(<8 x i16>, <8 x i16>) nounwind
define void @llvm_mips_mulr_q_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_mulr_q_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_mulr_q_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_mulr_q_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_mulr_q_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.mulr.q.w(<4 x i32> %0, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_mulr_q_w_RES
ret void
diff --git a/test/CodeGen/Mips/msa/arithmetic.ll b/test/CodeGen/Mips/msa/arithmetic.ll
index 09ee5023c7b1..3ecd0e43589f 100644
--- a/test/CodeGen/Mips/msa/arithmetic.ll
+++ b/test/CodeGen/Mips/msa/arithmetic.ll
@@ -4,9 +4,9 @@
define void @add_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: add_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = add <16 x i8> %1, %2
; CHECK-DAG: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -20,9 +20,9 @@ define void @add_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @add_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: add_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = add <8 x i16> %1, %2
; CHECK-DAG: addv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -36,9 +36,9 @@ define void @add_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @add_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: add_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = add <4 x i32> %1, %2
; CHECK-DAG: addv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -52,9 +52,9 @@ define void @add_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @add_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: add_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = add <2 x i64> %1, %2
; CHECK-DAG: addv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -68,7 +68,7 @@ define void @add_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @add_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: add_v16i8_i:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = add <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -83,7 +83,7 @@ define void @add_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @add_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: add_v8i16_i:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = add <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1,
i16 1, i16 1, i16 1, i16 1>
@@ -98,7 +98,7 @@ define void @add_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @add_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: add_v4i32_i:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = add <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
; CHECK-DAG: addvi.w [[R3:\$w[0-9]+]], [[R1]], 1
@@ -112,7 +112,7 @@ define void @add_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @add_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: add_v2i64_i:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = add <2 x i64> %1, <i64 1, i64 1>
; CHECK-DAG: addvi.d [[R3:\$w[0-9]+]], [[R1]], 1
@@ -126,9 +126,9 @@ define void @add_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @sub_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: sub_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = sub <16 x i8> %1, %2
; CHECK-DAG: subv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -142,9 +142,9 @@ define void @sub_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @sub_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: sub_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = sub <8 x i16> %1, %2
; CHECK-DAG: subv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -158,9 +158,9 @@ define void @sub_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @sub_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: sub_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = sub <4 x i32> %1, %2
; CHECK-DAG: subv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -174,9 +174,9 @@ define void @sub_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @sub_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: sub_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = sub <2 x i64> %1, %2
; CHECK-DAG: subv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -190,7 +190,7 @@ define void @sub_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @sub_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: sub_v16i8_i:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = sub <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -205,7 +205,7 @@ define void @sub_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @sub_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: sub_v8i16_i:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = sub <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1,
i16 1, i16 1, i16 1, i16 1>
@@ -220,7 +220,7 @@ define void @sub_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @sub_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: sub_v4i32_i:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = sub <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
; CHECK-DAG: subvi.w [[R3:\$w[0-9]+]], [[R1]], 1
@@ -234,7 +234,7 @@ define void @sub_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @sub_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: sub_v2i64_i:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = sub <2 x i64> %1, <i64 1, i64 1>
; CHECK-DAG: subvi.d [[R3:\$w[0-9]+]], [[R1]], 1
@@ -248,9 +248,9 @@ define void @sub_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @mul_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: mul_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = mul <16 x i8> %1, %2
; CHECK-DAG: mulv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -264,9 +264,9 @@ define void @mul_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @mul_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: mul_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = mul <8 x i16> %1, %2
; CHECK-DAG: mulv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -280,9 +280,9 @@ define void @mul_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @mul_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: mul_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = mul <4 x i32> %1, %2
; CHECK-DAG: mulv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -296,9 +296,9 @@ define void @mul_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @mul_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: mul_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = mul <2 x i64> %1, %2
; CHECK-DAG: mulv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -313,11 +313,11 @@ define void @maddv_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
<16 x i8>* %c) nounwind {
; CHECK: maddv_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
- %3 = load <16 x i8>* %c
+ %3 = load <16 x i8>, <16 x i8>* %c
; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
%4 = mul <16 x i8> %2, %3
%5 = add <16 x i8> %4, %1
@@ -333,11 +333,11 @@ define void @maddv_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
<8 x i16>* %c) nounwind {
; CHECK: maddv_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
- %3 = load <8 x i16>* %c
+ %3 = load <8 x i16>, <8 x i16>* %c
; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
%4 = mul <8 x i16> %2, %3
%5 = add <8 x i16> %4, %1
@@ -353,11 +353,11 @@ define void @maddv_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
<4 x i32>* %c) nounwind {
; CHECK: maddv_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
- %3 = load <4 x i32>* %c
+ %3 = load <4 x i32>, <4 x i32>* %c
; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
%4 = mul <4 x i32> %2, %3
%5 = add <4 x i32> %4, %1
@@ -373,11 +373,11 @@ define void @maddv_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
<2 x i64>* %c) nounwind {
; CHECK: maddv_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
- %3 = load <2 x i64>* %c
+ %3 = load <2 x i64>, <2 x i64>* %c
; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
%4 = mul <2 x i64> %2, %3
%5 = add <2 x i64> %4, %1
@@ -393,11 +393,11 @@ define void @msubv_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
<16 x i8>* %c) nounwind {
; CHECK: msubv_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
- %3 = load <16 x i8>* %c
+ %3 = load <16 x i8>, <16 x i8>* %c
; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
%4 = mul <16 x i8> %2, %3
%5 = sub <16 x i8> %1, %4
@@ -413,11 +413,11 @@ define void @msubv_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
<8 x i16>* %c) nounwind {
; CHECK: msubv_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
- %3 = load <8 x i16>* %c
+ %3 = load <8 x i16>, <8 x i16>* %c
; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
%4 = mul <8 x i16> %2, %3
%5 = sub <8 x i16> %1, %4
@@ -433,11 +433,11 @@ define void @msubv_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
<4 x i32>* %c) nounwind {
; CHECK: msubv_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
- %3 = load <4 x i32>* %c
+ %3 = load <4 x i32>, <4 x i32>* %c
; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
%4 = mul <4 x i32> %2, %3
%5 = sub <4 x i32> %1, %4
@@ -453,11 +453,11 @@ define void @msubv_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
<2 x i64>* %c) nounwind {
; CHECK: msubv_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
- %3 = load <2 x i64>* %c
+ %3 = load <2 x i64>, <2 x i64>* %c
; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
%4 = mul <2 x i64> %2, %3
%5 = sub <2 x i64> %1, %4
@@ -472,9 +472,9 @@ define void @msubv_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
define void @div_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: div_s_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = sdiv <16 x i8> %1, %2
; CHECK-DAG: div_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -488,9 +488,9 @@ define void @div_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @div_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: div_s_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = sdiv <8 x i16> %1, %2
; CHECK-DAG: div_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -504,9 +504,9 @@ define void @div_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @div_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: div_s_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = sdiv <4 x i32> %1, %2
; CHECK-DAG: div_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -520,9 +520,9 @@ define void @div_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @div_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: div_s_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = sdiv <2 x i64> %1, %2
; CHECK-DAG: div_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -536,9 +536,9 @@ define void @div_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @div_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: div_u_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = udiv <16 x i8> %1, %2
; CHECK-DAG: div_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -552,9 +552,9 @@ define void @div_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @div_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: div_u_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = udiv <8 x i16> %1, %2
; CHECK-DAG: div_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -568,9 +568,9 @@ define void @div_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @div_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: div_u_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = udiv <4 x i32> %1, %2
; CHECK-DAG: div_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -584,9 +584,9 @@ define void @div_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @div_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: div_u_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = udiv <2 x i64> %1, %2
; CHECK-DAG: div_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -600,9 +600,9 @@ define void @div_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @mod_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: mod_s_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = srem <16 x i8> %1, %2
; CHECK-DAG: mod_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -616,9 +616,9 @@ define void @mod_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @mod_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: mod_s_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = srem <8 x i16> %1, %2
; CHECK-DAG: mod_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -632,9 +632,9 @@ define void @mod_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @mod_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: mod_s_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = srem <4 x i32> %1, %2
; CHECK-DAG: mod_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -648,9 +648,9 @@ define void @mod_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @mod_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: mod_s_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = srem <2 x i64> %1, %2
; CHECK-DAG: mod_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -664,9 +664,9 @@ define void @mod_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @mod_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: mod_u_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = urem <16 x i8> %1, %2
; CHECK-DAG: mod_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -680,9 +680,9 @@ define void @mod_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @mod_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: mod_u_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = urem <8 x i16> %1, %2
; CHECK-DAG: mod_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -696,9 +696,9 @@ define void @mod_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @mod_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: mod_u_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = urem <4 x i32> %1, %2
; CHECK-DAG: mod_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -712,9 +712,9 @@ define void @mod_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @mod_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: mod_u_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = urem <2 x i64> %1, %2
; CHECK-DAG: mod_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
diff --git a/test/CodeGen/Mips/msa/arithmetic_float.ll b/test/CodeGen/Mips/msa/arithmetic_float.ll
index 9aae284fe535..d2ead536804a 100644
--- a/test/CodeGen/Mips/msa/arithmetic_float.ll
+++ b/test/CodeGen/Mips/msa/arithmetic_float.ll
@@ -4,9 +4,9 @@
define void @add_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: add_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = fadd <4 x float> %1, %2
; CHECK-DAG: fadd.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -20,9 +20,9 @@ define void @add_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwi
define void @add_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: add_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = fadd <2 x double> %1, %2
; CHECK-DAG: fadd.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -36,9 +36,9 @@ define void @add_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nou
define void @sub_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: sub_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = fsub <4 x float> %1, %2
; CHECK-DAG: fsub.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -52,9 +52,9 @@ define void @sub_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwi
define void @sub_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: sub_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = fsub <2 x double> %1, %2
; CHECK-DAG: fsub.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -68,9 +68,9 @@ define void @sub_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nou
define void @mul_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: mul_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = fmul <4 x float> %1, %2
; CHECK-DAG: fmul.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -84,9 +84,9 @@ define void @mul_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwi
define void @mul_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: mul_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = fmul <2 x double> %1, %2
; CHECK-DAG: fmul.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -101,11 +101,11 @@ define void @fma_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
<4 x float>* %c) nounwind {
; CHECK: fma_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
- %3 = load <4 x float>* %c
+ %3 = load <4 x float>, <4 x float>* %c
; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
%4 = tail call <4 x float> @llvm.fma.v4f32 (<4 x float> %1, <4 x float> %2,
<4 x float> %3)
@@ -121,11 +121,11 @@ define void @fma_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
<2 x double>* %c) nounwind {
; CHECK: fma_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
- %3 = load <2 x double>* %c
+ %3 = load <2 x double>, <2 x double>* %c
; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
%4 = tail call <2 x double> @llvm.fma.v2f64 (<2 x double> %1, <2 x double> %2,
<2 x double> %3)
@@ -141,11 +141,11 @@ define void @fmsub_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
<4 x float>* %c) nounwind {
; CHECK: fmsub_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
- %3 = load <4 x float>* %c
+ %3 = load <4 x float>, <4 x float>* %c
; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
%4 = fmul <4 x float> %2, %3
%5 = fsub <4 x float> %1, %4
@@ -161,11 +161,11 @@ define void @fmsub_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
<2 x double>* %c) nounwind {
; CHECK: fmsub_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
- %3 = load <2 x double>* %c
+ %3 = load <2 x double>, <2 x double>* %c
; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
%4 = fmul <2 x double> %2, %3
%5 = fsub <2 x double> %1, %4
@@ -180,9 +180,9 @@ define void @fmsub_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
define void @fdiv_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: fdiv_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = fdiv <4 x float> %1, %2
; CHECK-DAG: fdiv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -196,9 +196,9 @@ define void @fdiv_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounw
define void @fdiv_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: fdiv_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = fdiv <2 x double> %1, %2
; CHECK-DAG: fdiv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -212,7 +212,7 @@ define void @fdiv_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) no
define void @fabs_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
; CHECK: fabs_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = tail call <4 x float> @llvm.fabs.v4f32 (<4 x float> %1)
; CHECK-DAG: fmax_a.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
@@ -226,7 +226,7 @@ define void @fabs_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
define void @fabs_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
; CHECK: fabs_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = tail call <2 x double> @llvm.fabs.v2f64 (<2 x double> %1)
; CHECK-DAG: fmax_a.d [[R3:\$w[0-9]+]], [[R1]], [[R1]]
@@ -240,7 +240,7 @@ define void @fabs_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
define void @fexp2_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
; CHECK: fexp2_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)
; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
@@ -256,7 +256,7 @@ define void @fexp2_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
define void @fexp2_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
; CHECK: fexp2_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
@@ -272,7 +272,7 @@ define void @fexp2_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
define void @fexp2_v4f32_2(<4 x float>* %c, <4 x float>* %a) nounwind {
; CHECK: fexp2_v4f32_2:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)
%3 = fmul <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, %2
@@ -289,7 +289,7 @@ define void @fexp2_v4f32_2(<4 x float>* %c, <4 x float>* %a) nounwind {
define void @fexp2_v2f64_2(<2 x double>* %c, <2 x double>* %a) nounwind {
; CHECK: fexp2_v2f64_2:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
%3 = fmul <2 x double> <double 2.0, double 2.0>, %2
@@ -306,7 +306,7 @@ define void @fexp2_v2f64_2(<2 x double>* %c, <2 x double>* %a) nounwind {
define void @fsqrt_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
; CHECK: fsqrt_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = tail call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %1)
; CHECK-DAG: fsqrt.w [[R3:\$w[0-9]+]], [[R1]]
@@ -320,7 +320,7 @@ define void @fsqrt_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
define void @fsqrt_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
; CHECK: fsqrt_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = tail call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %1)
; CHECK-DAG: fsqrt.d [[R3:\$w[0-9]+]], [[R1]]
@@ -334,7 +334,7 @@ define void @fsqrt_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
define void @ffint_u_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind {
; CHECK: ffint_u_v4f32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = uitofp <4 x i32> %1 to <4 x float>
; CHECK-DAG: ffint_u.w [[R3:\$w[0-9]+]], [[R1]]
@@ -348,7 +348,7 @@ define void @ffint_u_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind {
define void @ffint_u_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind {
; CHECK: ffint_u_v2f64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = uitofp <2 x i64> %1 to <2 x double>
; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R1]]
@@ -362,7 +362,7 @@ define void @ffint_u_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind {
define void @ffint_s_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind {
; CHECK: ffint_s_v4f32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = sitofp <4 x i32> %1 to <4 x float>
; CHECK-DAG: ffint_s.w [[R3:\$w[0-9]+]], [[R1]]
@@ -376,7 +376,7 @@ define void @ffint_s_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind {
define void @ffint_s_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind {
; CHECK: ffint_s_v2f64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = sitofp <2 x i64> %1 to <2 x double>
; CHECK-DAG: ffint_s.d [[R3:\$w[0-9]+]], [[R1]]
@@ -390,7 +390,7 @@ define void @ffint_s_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind {
define void @ftrunc_u_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind {
; CHECK: ftrunc_u_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = fptoui <4 x float> %1 to <4 x i32>
; CHECK-DAG: ftrunc_u.w [[R3:\$w[0-9]+]], [[R1]]
@@ -404,7 +404,7 @@ define void @ftrunc_u_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind {
define void @ftrunc_u_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind {
; CHECK: ftrunc_u_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = fptoui <2 x double> %1 to <2 x i64>
; CHECK-DAG: ftrunc_u.d [[R3:\$w[0-9]+]], [[R1]]
@@ -418,7 +418,7 @@ define void @ftrunc_u_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind {
define void @ftrunc_s_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind {
; CHECK: ftrunc_s_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = fptosi <4 x float> %1 to <4 x i32>
; CHECK-DAG: ftrunc_s.w [[R3:\$w[0-9]+]], [[R1]]
@@ -432,7 +432,7 @@ define void @ftrunc_s_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind {
define void @ftrunc_s_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind {
; CHECK: ftrunc_s_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = fptosi <2 x double> %1 to <2 x i64>
; CHECK-DAG: ftrunc_s.d [[R3:\$w[0-9]+]], [[R1]]
diff --git a/test/CodeGen/Mips/msa/basic_operations.ll b/test/CodeGen/Mips/msa/basic_operations.ll
index dbdf42be49ca..2efec2911935 100644
--- a/test/CodeGen/Mips/msa/basic_operations.ll
+++ b/test/CodeGen/Mips/msa/basic_operations.ll
@@ -1,5 +1,9 @@
-; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32-AE -check-prefix=MIPS32-BE %s
-; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32-AE -check-prefix=MIPS32-LE %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=O32 -check-prefix=MIPS32 -check-prefix=ALL-BE %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=O32 -check-prefix=MIPS32 -check-prefix=ALL-LE %s
+; RUN: llc -march=mips64 -target-abi n32 -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=N32 -check-prefix=MIPS64 -check-prefix=ALL-BE %s
+; RUN: llc -march=mips64el -target-abi n32 -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=N32 -check-prefix=MIPS64 -check-prefix=ALL-LE %s
+; RUN: llc -march=mips64 -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=N64 -check-prefix=MIPS64 -check-prefix=ALL-BE %s
+; RUN: llc -march=mips64el -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=N64 -check-prefix=MIPS64 -check-prefix=ALL-LE %s
@v4i8 = global <4 x i8> <i8 0, i8 0, i8 0, i8 0>
@v16i8 = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
@@ -10,135 +14,153 @@
@i64 = global i64 0
define void @const_v16i8() nounwind {
- ; MIPS32-AE-LABEL: const_v16i8:
+ ; ALL-LABEL: const_v16i8:
store volatile <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8>*@v16i8
- ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 0
+ ; ALL: ldi.b [[R1:\$w[0-9]+]], 0
store volatile <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8>*@v16i8
- ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 1
+ ; ALL: ldi.b [[R1:\$w[0-9]+]], 1
store volatile <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 31>, <16 x i8>*@v16i8
- ; MIPS32-AE: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; ALL: ld.b [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6>, <16 x i8>*@v16i8
- ; MIPS32-AE: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; ALL: ld.b [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <16 x i8> <i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0>, <16 x i8>*@v16i8
- ; MIPS32-BE: ldi.h [[R1:\$w[0-9]+]], 256
- ; MIPS32-LE: ldi.h [[R1:\$w[0-9]+]], 1
+ ; ALL-BE: ldi.h [[R1:\$w[0-9]+]], 256
+ ; ALL-LE: ldi.h [[R1:\$w[0-9]+]], 1
store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4>, <16 x i8>*@v16i8
- ; MIPS32-BE-DAG: lui [[R2:\$[0-9]+]], 258
- ; MIPS32-LE-DAG: lui [[R2:\$[0-9]+]], 1027
- ; MIPS32-BE-DAG: ori [[R2]], [[R2]], 772
- ; MIPS32-LE-DAG: ori [[R2]], [[R2]], 513
- ; MIPS32-AE-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]]
+ ; ALL-BE-DAG: lui [[R2:\$[0-9]+]], 258
+ ; ALL-LE-DAG: lui [[R2:\$[0-9]+]], 1027
+ ; ALL-BE-DAG: ori [[R2]], [[R2]], 772
+ ; ALL-LE-DAG: ori [[R2]], [[R2]], 513
+ ; ALL-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]]
store volatile <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, <16 x i8>*@v16i8
- ; MIPS32-AE: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; ALL: ld.b [[R1:\$w[0-9]+]], 0([[G_PTR]])
ret void
- ; MIPS32-AE: .size const_v16i8
}
define void @const_v8i16() nounwind {
- ; MIPS32-AE-LABEL: const_v8i16:
+ ; ALL-LABEL: const_v8i16:
store volatile <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <8 x i16>*@v8i16
- ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 0
+ ; ALL: ldi.b [[R1:\$w[0-9]+]], 0
store volatile <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16>*@v8i16
- ; MIPS32-AE: ldi.h [[R1:\$w[0-9]+]], 1
+ ; ALL: ldi.h [[R1:\$w[0-9]+]], 1
store volatile <8 x i16> <i16 1, i16 1, i16 1, i16 2, i16 1, i16 1, i16 1, i16 31>, <8 x i16>*@v8i16
- ; MIPS32-AE: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; ALL: ld.h [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <8 x i16> <i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028>, <8 x i16>*@v8i16
- ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 4
+ ; ALL: ldi.b [[R1:\$w[0-9]+]], 4
store volatile <8 x i16> <i16 1, i16 2, i16 1, i16 2, i16 1, i16 2, i16 1, i16 2>, <8 x i16>*@v8i16
- ; MIPS32-BE-DAG: lui [[R2:\$[0-9]+]], 1
- ; MIPS32-LE-DAG: lui [[R2:\$[0-9]+]], 2
- ; MIPS32-BE-DAG: ori [[R2]], [[R2]], 2
- ; MIPS32-LE-DAG: ori [[R2]], [[R2]], 1
- ; MIPS32-AE-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]]
+ ; ALL-BE-DAG: lui [[R2:\$[0-9]+]], 1
+ ; ALL-LE-DAG: lui [[R2:\$[0-9]+]], 2
+ ; ALL-BE-DAG: ori [[R2]], [[R2]], 2
+ ; ALL-LE-DAG: ori [[R2]], [[R2]], 1
+ ; ALL-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]]
store volatile <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 1, i16 2, i16 3, i16 4>, <8 x i16>*@v8i16
- ; MIPS32-AE: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; ALL: ld.h [[R1:\$w[0-9]+]], 0([[G_PTR]])
ret void
- ; MIPS32-AE: .size const_v8i16
}
define void @const_v4i32() nounwind {
- ; MIPS32-AE-LABEL: const_v4i32:
+ ; ALL-LABEL: const_v4i32:
store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>*@v4i32
- ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 0
+ ; ALL: ldi.b [[R1:\$w[0-9]+]], 0
store volatile <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>*@v4i32
- ; MIPS32-AE: ldi.w [[R1:\$w[0-9]+]], 1
+ ; ALL: ldi.w [[R1:\$w[0-9]+]], 1
store volatile <4 x i32> <i32 1, i32 1, i32 1, i32 31>, <4 x i32>*@v4i32
- ; MIPS32-AE: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <4 x i32> <i32 16843009, i32 16843009, i32 16843009, i32 16843009>, <4 x i32>*@v4i32
- ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 1
+ ; ALL: ldi.b [[R1:\$w[0-9]+]], 1
store volatile <4 x i32> <i32 65537, i32 65537, i32 65537, i32 65537>, <4 x i32>*@v4i32
- ; MIPS32-AE: ldi.h [[R1:\$w[0-9]+]], 1
+ ; ALL: ldi.h [[R1:\$w[0-9]+]], 1
store volatile <4 x i32> <i32 1, i32 2, i32 1, i32 2>, <4 x i32>*@v4i32
- ; MIPS32-AE: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <4 x i32> <i32 3, i32 4, i32 5, i32 6>, <4 x i32>*@v4i32
- ; MIPS32-AE: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
ret void
- ; MIPS32-AE: .size const_v4i32
}
define void @const_v2i64() nounwind {
- ; MIPS32-AE-LABEL: const_v2i64:
+ ; ALL-LABEL: const_v2i64:
store volatile <2 x i64> <i64 0, i64 0>, <2 x i64>*@v2i64
- ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 0
+ ; ALL: ldi.b [[R1:\$w[0-9]+]], 0
store volatile <2 x i64> <i64 72340172838076673, i64 72340172838076673>, <2 x i64>*@v2i64
- ; MIPS32-AE: ldi.b [[R1:\$w[0-9]+]], 1
+ ; ALL: ldi.b [[R1:\$w[0-9]+]], 1
store volatile <2 x i64> <i64 281479271743489, i64 281479271743489>, <2 x i64>*@v2i64
- ; MIPS32-AE: ldi.h [[R1:\$w[0-9]+]], 1
+ ; ALL: ldi.h [[R1:\$w[0-9]+]], 1
store volatile <2 x i64> <i64 4294967297, i64 4294967297>, <2 x i64>*@v2i64
- ; MIPS32-AE: ldi.w [[R1:\$w[0-9]+]], 1
+ ; ALL: ldi.w [[R1:\$w[0-9]+]], 1
store volatile <2 x i64> <i64 1, i64 1>, <2 x i64>*@v2i64
- ; MIPS32-AE: ldi.d [[R1:\$w[0-9]+]], 1
+ ; ALL: ldi.d [[R1:\$w[0-9]+]], 1
store volatile <2 x i64> <i64 1, i64 31>, <2 x i64>*@v2i64
- ; MIPS32-AE: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; MIPS32: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; MIPS64: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <2 x i64> <i64 3, i64 4>, <2 x i64>*@v2i64
- ; MIPS32-AE: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; MIPS32: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; MIPS64: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
ret void
- ; MIPS32-AE: .size const_v2i64
}
-define void @nonconst_v16i8(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g, i8 %h) nounwind {
- ; MIPS32-AE-LABEL: nonconst_v16i8:
+define void @nonconst_v16i8(i8 signext %a, i8 signext %b, i8 signext %c, i8 signext %d, i8 signext %e, i8 signext %f, i8 signext %g, i8 signext %h) nounwind {
+ ; ALL-LABEL: nonconst_v16i8:
%1 = insertelement <16 x i8> undef, i8 %a, i32 0
%2 = insertelement <16 x i8> %1, i8 %b, i32 1
@@ -156,39 +178,38 @@ define void @nonconst_v16i8(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g, i8
%14 = insertelement <16 x i8> %13, i8 %h, i32 13
%15 = insertelement <16 x i8> %14, i8 %h, i32 14
%16 = insertelement <16 x i8> %15, i8 %h, i32 15
- ; MIPS32-AE-DAG: insert.b [[R1:\$w[0-9]+]][0], $4
- ; MIPS32-AE-DAG: insert.b [[R1]][1], $5
- ; MIPS32-AE-DAG: insert.b [[R1]][2], $6
- ; MIPS32-AE-DAG: insert.b [[R1]][3], $7
- ; MIPS32-BE-DAG: lbu [[R2:\$[0-9]+]], 19($sp)
- ; MIPS32-LE-DAG: lbu [[R2:\$[0-9]+]], 16($sp)
- ; MIPS32-AE-DAG: insert.b [[R1]][4], [[R2]]
- ; MIPS32-BE-DAG: lbu [[R3:\$[0-9]+]], 23($sp)
- ; MIPS32-LE-DAG: lbu [[R3:\$[0-9]+]], 20($sp)
- ; MIPS32-AE-DAG: insert.b [[R1]][5], [[R3]]
- ; MIPS32-BE-DAG: lbu [[R4:\$[0-9]+]], 27($sp)
- ; MIPS32-LE-DAG: lbu [[R4:\$[0-9]+]], 24($sp)
- ; MIPS32-AE-DAG: insert.b [[R1]][6], [[R4]]
- ; MIPS32-BE-DAG: lbu [[R5:\$[0-9]+]], 31($sp)
- ; MIPS32-LE-DAG: lbu [[R5:\$[0-9]+]], 28($sp)
- ; MIPS32-AE-DAG: insert.b [[R1]][7], [[R5]]
- ; MIPS32-AE-DAG: insert.b [[R1]][8], [[R5]]
- ; MIPS32-AE-DAG: insert.b [[R1]][9], [[R5]]
- ; MIPS32-AE-DAG: insert.b [[R1]][10], [[R5]]
- ; MIPS32-AE-DAG: insert.b [[R1]][11], [[R5]]
- ; MIPS32-AE-DAG: insert.b [[R1]][12], [[R5]]
- ; MIPS32-AE-DAG: insert.b [[R1]][13], [[R5]]
- ; MIPS32-AE-DAG: insert.b [[R1]][14], [[R5]]
- ; MIPS32-AE-DAG: insert.b [[R1]][15], [[R5]]
+ ; ALL-DAG: insert.b [[R1:\$w[0-9]+]][0], $4
+ ; ALL-DAG: insert.b [[R1]][1], $5
+ ; ALL-DAG: insert.b [[R1]][2], $6
+ ; ALL-DAG: insert.b [[R1]][3], $7
+ ; MIPS32-DAG: lw [[R2:\$[0-9]+]], 16($sp)
+ ; MIPS32-DAG: insert.b [[R1]][4], [[R2]]
+ ; MIPS64-DAG: insert.b [[R1]][4], $8
+ ; MIPS32-DAG: lw [[R3:\$[0-9]+]], 20($sp)
+ ; MIPS32-DAG: insert.b [[R1]][5], [[R3]]
+ ; MIPS64-DAG: insert.b [[R1]][5], $9
+ ; MIPS32-DAG: lw [[R4:\$[0-9]+]], 24($sp)
+ ; MIPS32-DAG: insert.b [[R1]][6], [[R4]]
+ ; MIPS64-DAG: insert.b [[R1]][6], $10
+ ; MIPS32-DAG: lw [[R5:\$[0-9]+]], 28($sp)
+ ; MIPS32-DAG: insert.b [[R1]][7], [[R5]]
+ ; MIPS64-DAG: insert.b [[R1]][7], [[R5:\$11]]
+ ; ALL-DAG: insert.b [[R1]][8], [[R5]]
+ ; ALL-DAG: insert.b [[R1]][9], [[R5]]
+ ; ALL-DAG: insert.b [[R1]][10], [[R5]]
+ ; ALL-DAG: insert.b [[R1]][11], [[R5]]
+ ; ALL-DAG: insert.b [[R1]][12], [[R5]]
+ ; ALL-DAG: insert.b [[R1]][13], [[R5]]
+ ; ALL-DAG: insert.b [[R1]][14], [[R5]]
+ ; ALL-DAG: insert.b [[R1]][15], [[R5]]
store volatile <16 x i8> %16, <16 x i8>*@v16i8
ret void
- ; MIPS32-AE: .size nonconst_v16i8
}
-define void @nonconst_v8i16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i16 %g, i16 %h) nounwind {
- ; MIPS32-AE-LABEL: nonconst_v8i16:
+define void @nonconst_v8i16(i16 signext %a, i16 signext %b, i16 signext %c, i16 signext %d, i16 signext %e, i16 signext %f, i16 signext %g, i16 signext %h) nounwind {
+ ; ALL-LABEL: nonconst_v8i16:
%1 = insertelement <8 x i16> undef, i16 %a, i32 0
%2 = insertelement <8 x i16> %1, i16 %b, i32 1
@@ -198,610 +219,648 @@ define void @nonconst_v8i16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i16
%6 = insertelement <8 x i16> %5, i16 %f, i32 5
%7 = insertelement <8 x i16> %6, i16 %g, i32 6
%8 = insertelement <8 x i16> %7, i16 %h, i32 7
- ; MIPS32-AE-DAG: insert.h [[R1:\$w[0-9]+]][0], $4
- ; MIPS32-AE-DAG: insert.h [[R1]][1], $5
- ; MIPS32-AE-DAG: insert.h [[R1]][2], $6
- ; MIPS32-AE-DAG: insert.h [[R1]][3], $7
- ; MIPS32-BE-DAG: lhu [[R2:\$[0-9]+]], 18($sp)
- ; MIPS32-LE-DAG: lhu [[R2:\$[0-9]+]], 16($sp)
- ; MIPS32-AE-DAG: insert.h [[R1]][4], [[R2]]
- ; MIPS32-BE-DAG: lhu [[R2:\$[0-9]+]], 22($sp)
- ; MIPS32-LE-DAG: lhu [[R2:\$[0-9]+]], 20($sp)
- ; MIPS32-AE-DAG: insert.h [[R1]][5], [[R2]]
- ; MIPS32-BE-DAG: lhu [[R2:\$[0-9]+]], 26($sp)
- ; MIPS32-LE-DAG: lhu [[R2:\$[0-9]+]], 24($sp)
- ; MIPS32-AE-DAG: insert.h [[R1]][6], [[R2]]
- ; MIPS32-BE-DAG: lhu [[R2:\$[0-9]+]], 30($sp)
- ; MIPS32-LE-DAG: lhu [[R2:\$[0-9]+]], 28($sp)
- ; MIPS32-AE-DAG: insert.h [[R1]][7], [[R2]]
+ ; ALL-DAG: insert.h [[R1:\$w[0-9]+]][0], $4
+ ; ALL-DAG: insert.h [[R1]][1], $5
+ ; ALL-DAG: insert.h [[R1]][2], $6
+ ; ALL-DAG: insert.h [[R1]][3], $7
+ ; MIPS32-DAG: lw [[R2:\$[0-9]+]], 16($sp)
+ ; MIPS32-DAG: insert.h [[R1]][4], [[R2]]
+ ; MIPS64-DAG: insert.h [[R1]][4], $8
+ ; MIPS32-DAG: lw [[R2:\$[0-9]+]], 20($sp)
+ ; MIPS32-DAG: insert.h [[R1]][5], [[R2]]
+ ; MIPS64-DAG: insert.h [[R1]][5], $9
+ ; MIPS32-DAG: lw [[R2:\$[0-9]+]], 24($sp)
+ ; MIPS32-DAG: insert.h [[R1]][6], [[R2]]
+ ; MIPS64-DAG: insert.h [[R1]][6], $10
+ ; MIPS32-DAG: lw [[R2:\$[0-9]+]], 28($sp)
+ ; MIPS32-DAG: insert.h [[R1]][7], [[R2]]
+ ; MIPS64-DAG: insert.h [[R1]][7], $11
store volatile <8 x i16> %8, <8 x i16>*@v8i16
ret void
- ; MIPS32-AE: .size nonconst_v8i16
}
-define void @nonconst_v4i32(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
- ; MIPS32-AE-LABEL: nonconst_v4i32:
+define void @nonconst_v4i32(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d) nounwind {
+ ; ALL-LABEL: nonconst_v4i32:
%1 = insertelement <4 x i32> undef, i32 %a, i32 0
%2 = insertelement <4 x i32> %1, i32 %b, i32 1
%3 = insertelement <4 x i32> %2, i32 %c, i32 2
%4 = insertelement <4 x i32> %3, i32 %d, i32 3
- ; MIPS32-AE: insert.w [[R1:\$w[0-9]+]][0], $4
- ; MIPS32-AE: insert.w [[R1]][1], $5
- ; MIPS32-AE: insert.w [[R1]][2], $6
- ; MIPS32-AE: insert.w [[R1]][3], $7
+ ; ALL: insert.w [[R1:\$w[0-9]+]][0], $4
+ ; ALL: insert.w [[R1]][1], $5
+ ; ALL: insert.w [[R1]][2], $6
+ ; ALL: insert.w [[R1]][3], $7
store volatile <4 x i32> %4, <4 x i32>*@v4i32
ret void
- ; MIPS32-AE: .size nonconst_v4i32
}
-define void @nonconst_v2i64(i64 %a, i64 %b) nounwind {
- ; MIPS32-AE-LABEL: nonconst_v2i64:
+define void @nonconst_v2i64(i64 signext %a, i64 signext %b) nounwind {
+ ; ALL-LABEL: nonconst_v2i64:
%1 = insertelement <2 x i64> undef, i64 %a, i32 0
%2 = insertelement <2 x i64> %1, i64 %b, i32 1
- ; MIPS32-AE: insert.w [[R1:\$w[0-9]+]][0], $4
- ; MIPS32-AE: insert.w [[R1]][1], $5
- ; MIPS32-AE: insert.w [[R1]][2], $6
- ; MIPS32-AE: insert.w [[R1]][3], $7
+ ; MIPS32: insert.w [[R1:\$w[0-9]+]][0], $4
+ ; MIPS32: insert.w [[R1]][1], $5
+ ; MIPS32: insert.w [[R1]][2], $6
+ ; MIPS32: insert.w [[R1]][3], $7
+ ; MIPS64: insert.d [[R1:\$w[0-9]+]][0], $4
+ ; MIPS64: insert.d [[R1]][1], $5
store volatile <2 x i64> %2, <2 x i64>*@v2i64
ret void
- ; MIPS32-AE: .size nonconst_v2i64
}
define i32 @extract_sext_v16i8() nounwind {
- ; MIPS32-AE-LABEL: extract_sext_v16i8:
+ ; ALL-LABEL: extract_sext_v16i8:
- %1 = load <16 x i8>* @v16i8
- ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
+ %1 = load <16 x i8>, <16 x i8>* @v16i8
+ ; ALL-DAG: ld.b [[R1:\$w[0-9]+]],
%2 = add <16 x i8> %1, %1
- ; MIPS32-AE-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
%3 = extractelement <16 x i8> %2, i32 1
%4 = sext i8 %3 to i32
- ; MIPS32-AE-DAG: copy_s.b [[R3:\$[0-9]+]], [[R1]][1]
- ; MIPS32-AE-NOT: sll
- ; MIPS32-AE-NOT: sra
+ ; ALL-DAG: copy_s.b [[R3:\$[0-9]+]], [[R1]][1]
+ ; ALL-NOT: sll
+ ; ALL-NOT: sra
ret i32 %4
- ; MIPS32-AE: .size extract_sext_v16i8
}
define i32 @extract_sext_v8i16() nounwind {
- ; MIPS32-AE-LABEL: extract_sext_v8i16:
+ ; ALL-LABEL: extract_sext_v8i16:
- %1 = load <8 x i16>* @v8i16
- ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
+ %1 = load <8 x i16>, <8 x i16>* @v8i16
+ ; ALL-DAG: ld.h [[R1:\$w[0-9]+]],
%2 = add <8 x i16> %1, %1
- ; MIPS32-AE-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
%3 = extractelement <8 x i16> %2, i32 1
%4 = sext i16 %3 to i32
- ; MIPS32-AE-DAG: copy_s.h [[R3:\$[0-9]+]], [[R1]][1]
- ; MIPS32-AE-NOT: sll
- ; MIPS32-AE-NOT: sra
+ ; ALL-DAG: copy_s.h [[R3:\$[0-9]+]], [[R1]][1]
+ ; ALL-NOT: sll
+ ; ALL-NOT: sra
ret i32 %4
- ; MIPS32-AE: .size extract_sext_v8i16
}
define i32 @extract_sext_v4i32() nounwind {
- ; MIPS32-AE-LABEL: extract_sext_v4i32:
+ ; ALL-LABEL: extract_sext_v4i32:
- %1 = load <4 x i32>* @v4i32
- ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
+ %1 = load <4 x i32>, <4 x i32>* @v4i32
+ ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
%2 = add <4 x i32> %1, %1
- ; MIPS32-AE-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
%3 = extractelement <4 x i32> %2, i32 1
- ; MIPS32-AE-DAG: copy_s.w [[R3:\$[0-9]+]], [[R1]][1]
+ ; ALL-DAG: copy_s.w [[R3:\$[0-9]+]], [[R1]][1]
ret i32 %3
- ; MIPS32-AE: .size extract_sext_v4i32
}
define i64 @extract_sext_v2i64() nounwind {
- ; MIPS32-AE-LABEL: extract_sext_v2i64:
+ ; ALL-LABEL: extract_sext_v2i64:
- %1 = load <2 x i64>* @v2i64
- ; MIPS32-AE-DAG: ld.d [[R1:\$w[0-9]+]],
+ %1 = load <2 x i64>, <2 x i64>* @v2i64
+ ; ALL-DAG: ld.d [[R1:\$w[0-9]+]],
%2 = add <2 x i64> %1, %1
- ; MIPS32-AE-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
%3 = extractelement <2 x i64> %2, i32 1
- ; MIPS32-AE-DAG: copy_s.w [[R3:\$[0-9]+]], [[R1]][2]
- ; MIPS32-AE-DAG: copy_s.w [[R4:\$[0-9]+]], [[R1]][3]
- ; MIPS32-AE-NOT: sll
- ; MIPS32-AE-NOT: sra
+ ; MIPS32-DAG: copy_s.w [[R3:\$[0-9]+]], [[R1]][2]
+ ; MIPS32-DAG: copy_s.w [[R4:\$[0-9]+]], [[R1]][3]
+ ; MIPS64-DAG: copy_s.d [[R3:\$[0-9]+]], [[R1]][1]
+ ; ALL-NOT: sll
+ ; ALL-NOT: sra
ret i64 %3
- ; MIPS32-AE: .size extract_sext_v2i64
}
define i32 @extract_zext_v16i8() nounwind {
- ; MIPS32-AE-LABEL: extract_zext_v16i8:
+ ; ALL-LABEL: extract_zext_v16i8:
- %1 = load <16 x i8>* @v16i8
- ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
+ %1 = load <16 x i8>, <16 x i8>* @v16i8
+ ; ALL-DAG: ld.b [[R1:\$w[0-9]+]],
%2 = add <16 x i8> %1, %1
- ; MIPS32-AE-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
%3 = extractelement <16 x i8> %2, i32 1
%4 = zext i8 %3 to i32
- ; MIPS32-AE-DAG: copy_u.b [[R3:\$[0-9]+]], [[R1]][1]
- ; MIPS32-AE-NOT: andi
+ ; ALL-DAG: copy_u.b [[R3:\$[0-9]+]], [[R1]][1]
+ ; ALL-NOT: andi
ret i32 %4
- ; MIPS32-AE: .size extract_zext_v16i8
}
define i32 @extract_zext_v8i16() nounwind {
- ; MIPS32-AE-LABEL: extract_zext_v8i16:
+ ; ALL-LABEL: extract_zext_v8i16:
- %1 = load <8 x i16>* @v8i16
- ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
+ %1 = load <8 x i16>, <8 x i16>* @v8i16
+ ; ALL-DAG: ld.h [[R1:\$w[0-9]+]],
%2 = add <8 x i16> %1, %1
- ; MIPS32-AE-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
%3 = extractelement <8 x i16> %2, i32 1
%4 = zext i16 %3 to i32
- ; MIPS32-AE-DAG: copy_u.h [[R3:\$[0-9]+]], [[R1]][1]
- ; MIPS32-AE-NOT: andi
+ ; ALL-DAG: copy_u.h [[R3:\$[0-9]+]], [[R1]][1]
+ ; ALL-NOT: andi
ret i32 %4
- ; MIPS32-AE: .size extract_zext_v8i16
}
define i32 @extract_zext_v4i32() nounwind {
- ; MIPS32-AE-LABEL: extract_zext_v4i32:
+ ; ALL-LABEL: extract_zext_v4i32:
- %1 = load <4 x i32>* @v4i32
- ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
+ %1 = load <4 x i32>, <4 x i32>* @v4i32
+ ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
%2 = add <4 x i32> %1, %1
- ; MIPS32-AE-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
%3 = extractelement <4 x i32> %2, i32 1
- ; MIPS32-AE-DAG: copy_{{[su]}}.w [[R3:\$[0-9]+]], [[R1]][1]
+ ; ALL-DAG: copy_{{[su]}}.w [[R3:\$[0-9]+]], [[R1]][1]
ret i32 %3
- ; MIPS32-AE: .size extract_zext_v4i32
}
define i64 @extract_zext_v2i64() nounwind {
- ; MIPS32-AE-LABEL: extract_zext_v2i64:
+ ; ALL-LABEL: extract_zext_v2i64:
- %1 = load <2 x i64>* @v2i64
- ; MIPS32-AE-DAG: ld.d [[R1:\$w[0-9]+]],
+ %1 = load <2 x i64>, <2 x i64>* @v2i64
+ ; ALL-DAG: ld.d [[R1:\$w[0-9]+]],
%2 = add <2 x i64> %1, %1
- ; MIPS32-AE-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
%3 = extractelement <2 x i64> %2, i32 1
- ; MIPS32-AE-DAG: copy_{{[su]}}.w [[R3:\$[0-9]+]], [[R1]][2]
- ; MIPS32-AE-DAG: copy_{{[su]}}.w [[R4:\$[0-9]+]], [[R1]][3]
- ; MIPS32-AE-NOT: andi
+ ; MIPS32-DAG: copy_{{[su]}}.w [[R3:\$[0-9]+]], [[R1]][2]
+ ; MIPS32-DAG: copy_{{[su]}}.w [[R4:\$[0-9]+]], [[R1]][3]
+ ; MIPS64-DAG: copy_{{[su]}}.d [[R3:\$[0-9]+]], [[R1]][1]
+ ; ALL-NOT: andi
ret i64 %3
- ; MIPS32-AE: .size extract_zext_v2i64
}
define i32 @extract_sext_v16i8_vidx() nounwind {
- ; MIPS32-AE-LABEL: extract_sext_v16i8_vidx:
+ ; ALL-LABEL: extract_sext_v16i8_vidx:
- %1 = load <16 x i8>* @v16i8
- ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v16i8)(
- ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]], 0([[PTR_V]])
+ %1 = load <16 x i8>, <16 x i8>* @v16i8
+ ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v16i8)(
+ ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v16i8)(
+ ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v16i8)(
+ ; ALL-DAG: ld.b [[R1:\$w[0-9]+]], 0([[PTR_V]])
%2 = add <16 x i8> %1, %1
- ; MIPS32-AE-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
- %3 = load i32* @i32
- ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
- ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+ %3 = load i32, i32* @i32
+ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+ ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%4 = extractelement <16 x i8> %2, i32 %3
%5 = sext i8 %4 to i32
- ; MIPS32-AE-DAG: splat.b $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
- ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
- ; MIPS32-AE-DAG: sra [[R6:\$[0-9]+]], [[R5]], 24
+ ; ALL-DAG: splat.b $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+ ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+ ; ALL-DAG: sra [[R6:\$[0-9]+]], [[R5]], 24
ret i32 %5
- ; MIPS32-AE: .size extract_sext_v16i8_vidx
}
define i32 @extract_sext_v8i16_vidx() nounwind {
- ; MIPS32-AE-LABEL: extract_sext_v8i16_vidx:
+ ; ALL-LABEL: extract_sext_v8i16_vidx:
- %1 = load <8 x i16>* @v8i16
- ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v8i16)(
- ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]], 0([[PTR_V]])
+ %1 = load <8 x i16>, <8 x i16>* @v8i16
+ ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v8i16)(
+ ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v8i16)(
+ ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v8i16)(
+ ; ALL-DAG: ld.h [[R1:\$w[0-9]+]], 0([[PTR_V]])
%2 = add <8 x i16> %1, %1
- ; MIPS32-AE-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
- %3 = load i32* @i32
- ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
- ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+ %3 = load i32, i32* @i32
+ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+ ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%4 = extractelement <8 x i16> %2, i32 %3
%5 = sext i16 %4 to i32
- ; MIPS32-AE-DAG: splat.h $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
- ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
- ; MIPS32-AE-DAG: sra [[R6:\$[0-9]+]], [[R5]], 16
+ ; ALL-DAG: splat.h $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+ ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+ ; ALL-DAG: sra [[R6:\$[0-9]+]], [[R5]], 16
ret i32 %5
- ; MIPS32-AE: .size extract_sext_v8i16_vidx
}
define i32 @extract_sext_v4i32_vidx() nounwind {
- ; MIPS32-AE-LABEL: extract_sext_v4i32_vidx:
+ ; ALL-LABEL: extract_sext_v4i32_vidx:
- %1 = load <4 x i32>* @v4i32
- ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4i32)(
- ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
+ %1 = load <4 x i32>, <4 x i32>* @v4i32
+ ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4i32)(
+ ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v4i32)(
+ ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v4i32)(
+ ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
%2 = add <4 x i32> %1, %1
- ; MIPS32-AE-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
- %3 = load i32* @i32
- ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
- ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+ %3 = load i32, i32* @i32
+ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+ ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%4 = extractelement <4 x i32> %2, i32 %3
- ; MIPS32-AE-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
- ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
- ; MIPS32-AE-NOT: sra
+ ; ALL-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+ ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+ ; ALL-NOT: sra
ret i32 %4
- ; MIPS32-AE: .size extract_sext_v4i32_vidx
}
define i64 @extract_sext_v2i64_vidx() nounwind {
- ; MIPS32-AE-LABEL: extract_sext_v2i64_vidx:
+ ; ALL-LABEL: extract_sext_v2i64_vidx:
- %1 = load <2 x i64>* @v2i64
- ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2i64)(
- ; MIPS32-AE-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
+ %1 = load <2 x i64>, <2 x i64>* @v2i64
+ ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2i64)(
+ ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v2i64)(
+ ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v2i64)(
+ ; ALL-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
%2 = add <2 x i64> %1, %1
- ; MIPS32-AE-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
- %3 = load i32* @i32
- ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
- ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+ %3 = load i32, i32* @i32
+ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+ ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%4 = extractelement <2 x i64> %2, i32 %3
- ; MIPS32-AE-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
- ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
- ; MIPS32-AE-DAG: splat.w $w[[R4:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
- ; MIPS32-AE-DAG: mfc1 [[R6:\$[0-9]+]], $f[[R4]]
- ; MIPS32-AE-NOT: sra
+ ; MIPS32-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+ ; MIPS32-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+ ; MIPS32-DAG: splat.w $w[[R4:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+ ; MIPS32-DAG: mfc1 [[R6:\$[0-9]+]], $f[[R4]]
+ ; MIPS64-DAG: splat.d $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+ ; MIPS64-DAG: dmfc1 [[R5:\$[0-9]+]], $f[[R3]]
+ ; ALL-NOT: sra
ret i64 %4
- ; MIPS32-AE: .size extract_sext_v2i64_vidx
}
define i32 @extract_zext_v16i8_vidx() nounwind {
- ; MIPS32-AE-LABEL: extract_zext_v16i8_vidx:
+ ; ALL-LABEL: extract_zext_v16i8_vidx:
- %1 = load <16 x i8>* @v16i8
- ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v16i8)(
- ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]], 0([[PTR_V]])
+ %1 = load <16 x i8>, <16 x i8>* @v16i8
+ ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v16i8)(
+ ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v16i8)(
+ ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v16i8)(
+ ; ALL-DAG: ld.b [[R1:\$w[0-9]+]], 0([[PTR_V]])
%2 = add <16 x i8> %1, %1
- ; MIPS32-AE-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
- %3 = load i32* @i32
- ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
- ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+ %3 = load i32, i32* @i32
+ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+ ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%4 = extractelement <16 x i8> %2, i32 %3
%5 = zext i8 %4 to i32
- ; MIPS32-AE-DAG: splat.b $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
- ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
- ; MIPS32-AE-DAG: srl [[R6:\$[0-9]+]], [[R5]], 24
+ ; ALL-DAG: splat.b $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+ ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+ ; ALL-DAG: srl [[R6:\$[0-9]+]], [[R5]], 24
ret i32 %5
- ; MIPS32-AE: .size extract_zext_v16i8_vidx
}
define i32 @extract_zext_v8i16_vidx() nounwind {
- ; MIPS32-AE-LABEL: extract_zext_v8i16_vidx:
+ ; ALL-LABEL: extract_zext_v8i16_vidx:
- %1 = load <8 x i16>* @v8i16
- ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v8i16)(
- ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]], 0([[PTR_V]])
+ %1 = load <8 x i16>, <8 x i16>* @v8i16
+ ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v8i16)(
+ ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v8i16)(
+ ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v8i16)(
+ ; ALL-DAG: ld.h [[R1:\$w[0-9]+]], 0([[PTR_V]])
%2 = add <8 x i16> %1, %1
- ; MIPS32-AE-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
- %3 = load i32* @i32
- ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
- ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+ %3 = load i32, i32* @i32
+ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+ ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%4 = extractelement <8 x i16> %2, i32 %3
%5 = zext i16 %4 to i32
- ; MIPS32-AE-DAG: splat.h $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
- ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
- ; MIPS32-AE-DAG: srl [[R6:\$[0-9]+]], [[R5]], 16
+ ; ALL-DAG: splat.h $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+ ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+ ; ALL-DAG: srl [[R6:\$[0-9]+]], [[R5]], 16
ret i32 %5
- ; MIPS32-AE: .size extract_zext_v8i16_vidx
}
define i32 @extract_zext_v4i32_vidx() nounwind {
- ; MIPS32-AE-LABEL: extract_zext_v4i32_vidx:
+ ; ALL-LABEL: extract_zext_v4i32_vidx:
- %1 = load <4 x i32>* @v4i32
- ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4i32)(
- ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
+ %1 = load <4 x i32>, <4 x i32>* @v4i32
+ ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4i32)(
+ ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v4i32)(
+ ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v4i32)(
+ ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
%2 = add <4 x i32> %1, %1
- ; MIPS32-AE-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
- %3 = load i32* @i32
- ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
- ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+ %3 = load i32, i32* @i32
+ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+ ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%4 = extractelement <4 x i32> %2, i32 %3
- ; MIPS32-AE-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
- ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
- ; MIPS32-AE-NOT: srl
+ ; ALL-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+ ; ALL-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+ ; ALL-NOT: srl
ret i32 %4
- ; MIPS32-AE: .size extract_zext_v4i32_vidx
}
define i64 @extract_zext_v2i64_vidx() nounwind {
- ; MIPS32-AE-LABEL: extract_zext_v2i64_vidx:
+ ; ALL-LABEL: extract_zext_v2i64_vidx:
- %1 = load <2 x i64>* @v2i64
- ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2i64)(
- ; MIPS32-AE-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
+ %1 = load <2 x i64>, <2 x i64>* @v2i64
+ ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2i64)(
+ ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v2i64)(
+ ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v2i64)(
+ ; ALL-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
%2 = add <2 x i64> %1, %1
- ; MIPS32-AE-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
- %3 = load i32* @i32
- ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
- ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+ %3 = load i32, i32* @i32
+ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+ ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%4 = extractelement <2 x i64> %2, i32 %3
- ; MIPS32-AE-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
- ; MIPS32-AE-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
- ; MIPS32-AE-DAG: splat.w $w[[R4:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
- ; MIPS32-AE-DAG: mfc1 [[R6:\$[0-9]+]], $f[[R4]]
- ; MIPS32-AE-NOT: srl
+ ; MIPS32-DAG: splat.w $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+ ; MIPS32-DAG: mfc1 [[R5:\$[0-9]+]], $f[[R3]]
+ ; MIPS32-DAG: splat.w $w[[R4:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+ ; MIPS32-DAG: mfc1 [[R6:\$[0-9]+]], $f[[R4]]
+ ; MIPS64-DAG: splat.d $w[[R3:[0-9]+]], [[R1]]{{\[}}[[IDX]]]
+ ; MIPS64-DAG: dmfc1 [[R5:\$[0-9]+]], $f[[R3]]
+ ; ALL-NOT: srl
ret i64 %4
- ; MIPS32-AE: .size extract_zext_v2i64_vidx
}
-define void @insert_v16i8(i32 %a) nounwind {
- ; MIPS32-AE-LABEL: insert_v16i8:
+define void @insert_v16i8(i32 signext %a) nounwind {
+ ; ALL-LABEL: insert_v16i8:
- %1 = load <16 x i8>* @v16i8
- ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
+ %1 = load <16 x i8>, <16 x i8>* @v16i8
+ ; ALL-DAG: ld.b [[R1:\$w[0-9]+]],
%a2 = trunc i32 %a to i8
%a3 = sext i8 %a2 to i32
%a4 = trunc i32 %a3 to i8
- ; MIPS32-AE-NOT: andi
- ; MIPS32-AE-NOT: sra
+ ; ALL-NOT: andi
+ ; ALL-NOT: sra
%2 = insertelement <16 x i8> %1, i8 %a4, i32 1
- ; MIPS32-AE-DAG: insert.b [[R1]][1], $4
+ ; ALL-DAG: insert.b [[R1]][1], $4
store <16 x i8> %2, <16 x i8>* @v16i8
- ; MIPS32-AE-DAG: st.b [[R1]]
+ ; ALL-DAG: st.b [[R1]]
ret void
- ; MIPS32-AE: .size insert_v16i8
}
-define void @insert_v8i16(i32 %a) nounwind {
- ; MIPS32-AE-LABEL: insert_v8i16:
+define void @insert_v8i16(i32 signext %a) nounwind {
+ ; ALL-LABEL: insert_v8i16:
- %1 = load <8 x i16>* @v8i16
- ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
+ %1 = load <8 x i16>, <8 x i16>* @v8i16
+ ; ALL-DAG: ld.h [[R1:\$w[0-9]+]],
%a2 = trunc i32 %a to i16
%a3 = sext i16 %a2 to i32
%a4 = trunc i32 %a3 to i16
- ; MIPS32-AE-NOT: andi
- ; MIPS32-AE-NOT: sra
+ ; ALL-NOT: andi
+ ; ALL-NOT: sra
%2 = insertelement <8 x i16> %1, i16 %a4, i32 1
- ; MIPS32-AE-DAG: insert.h [[R1]][1], $4
+ ; ALL-DAG: insert.h [[R1]][1], $4
store <8 x i16> %2, <8 x i16>* @v8i16
- ; MIPS32-AE-DAG: st.h [[R1]]
+ ; ALL-DAG: st.h [[R1]]
ret void
- ; MIPS32-AE: .size insert_v8i16
}
-define void @insert_v4i32(i32 %a) nounwind {
- ; MIPS32-AE-LABEL: insert_v4i32:
+define void @insert_v4i32(i32 signext %a) nounwind {
+ ; ALL-LABEL: insert_v4i32:
- %1 = load <4 x i32>* @v4i32
- ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
+ %1 = load <4 x i32>, <4 x i32>* @v4i32
+ ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
- ; MIPS32-AE-NOT: andi
- ; MIPS32-AE-NOT: sra
+ ; ALL-NOT: andi
+ ; ALL-NOT: sra
%2 = insertelement <4 x i32> %1, i32 %a, i32 1
- ; MIPS32-AE-DAG: insert.w [[R1]][1], $4
+ ; ALL-DAG: insert.w [[R1]][1], $4
store <4 x i32> %2, <4 x i32>* @v4i32
- ; MIPS32-AE-DAG: st.w [[R1]]
+ ; ALL-DAG: st.w [[R1]]
ret void
- ; MIPS32-AE: .size insert_v4i32
}
-define void @insert_v2i64(i64 %a) nounwind {
- ; MIPS32-AE-LABEL: insert_v2i64:
+define void @insert_v2i64(i64 signext %a) nounwind {
+ ; ALL-LABEL: insert_v2i64:
- %1 = load <2 x i64>* @v2i64
- ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
+ %1 = load <2 x i64>, <2 x i64>* @v2i64
+ ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
+ ; MIPS64-DAG: ld.d [[R1:\$w[0-9]+]],
- ; MIPS32-AE-NOT: andi
- ; MIPS32-AE-NOT: sra
+ ; ALL-NOT: andi
+ ; ALL-NOT: sra
%2 = insertelement <2 x i64> %1, i64 %a, i32 1
- ; MIPS32-AE-DAG: insert.w [[R1]][2], $4
- ; MIPS32-AE-DAG: insert.w [[R1]][3], $5
+ ; MIPS32-DAG: insert.w [[R1]][2], $4
+ ; MIPS32-DAG: insert.w [[R1]][3], $5
+ ; MIPS64-DAG: insert.d [[R1]][1], $4
store <2 x i64> %2, <2 x i64>* @v2i64
- ; MIPS32-AE-DAG: st.w [[R1]]
+ ; MIPS32-DAG: st.w [[R1]]
+ ; MIPS64-DAG: st.d [[R1]]
ret void
- ; MIPS32-AE: .size insert_v2i64
}
-define void @insert_v16i8_vidx(i32 %a) nounwind {
- ; MIPS32-AE: insert_v16i8_vidx:
+define void @insert_v16i8_vidx(i32 signext %a) nounwind {
+ ; ALL-LABEL: insert_v16i8_vidx:
- %1 = load <16 x i8>* @v16i8
- ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
+ %1 = load <16 x i8>, <16 x i8>* @v16i8
+ ; ALL-DAG: ld.b [[R1:\$w[0-9]+]],
- %2 = load i32* @i32
- ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
- ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+ %2 = load i32, i32* @i32
+ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+ ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%a2 = trunc i32 %a to i8
%a3 = sext i8 %a2 to i32
%a4 = trunc i32 %a3 to i8
- ; MIPS32-AE-NOT: andi
- ; MIPS32-AE-NOT: sra
+ ; ALL-NOT: andi
+ ; ALL-NOT: sra
%3 = insertelement <16 x i8> %1, i8 %a4, i32 %2
- ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[IDX]]]
- ; MIPS32-AE-DAG: insert.b [[R1]][0], $4
- ; MIPS32-AE-DAG: neg [[NIDX:\$[0-9]+]], [[IDX]]
- ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+ ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[IDX]]]
+ ; ALL-DAG: insert.b [[R1]][0], $4
+ ; O32-DAG: neg [[NIDX:\$[0-9]+]], [[IDX]]
+ ; N32-DAG: neg [[NIDX:\$[0-9]+]], [[IDX]]
+ ; N64-DAG: dneg [[NIDX:\$[0-9]+]], [[IDX]]
+ ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
store <16 x i8> %3, <16 x i8>* @v16i8
- ; MIPS32-AE-DAG: st.b [[R1]]
+ ; ALL-DAG: st.b [[R1]]
ret void
- ; MIPS32-AE: .size insert_v16i8_vidx
}
-define void @insert_v8i16_vidx(i32 %a) nounwind {
- ; MIPS32-AE: insert_v8i16_vidx:
+define void @insert_v8i16_vidx(i32 signext %a) nounwind {
+ ; ALL-LABEL: insert_v8i16_vidx:
- %1 = load <8 x i16>* @v8i16
- ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
+ %1 = load <8 x i16>, <8 x i16>* @v8i16
+ ; ALL-DAG: ld.h [[R1:\$w[0-9]+]],
- %2 = load i32* @i32
- ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
- ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+ %2 = load i32, i32* @i32
+ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+ ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%a2 = trunc i32 %a to i16
%a3 = sext i16 %a2 to i32
%a4 = trunc i32 %a3 to i16
- ; MIPS32-AE-NOT: andi
- ; MIPS32-AE-NOT: sra
+ ; ALL-NOT: andi
+ ; ALL-NOT: sra
%3 = insertelement <8 x i16> %1, i16 %a4, i32 %2
- ; MIPS32-AE-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 1
- ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
- ; MIPS32-AE-DAG: insert.h [[R1]][0], $4
- ; MIPS32-AE-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
- ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+ ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 1
+ ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
+ ; ALL-DAG: insert.h [[R1]][0], $4
+ ; O32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+ ; N32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+ ; N64-DAG: dneg [[NIDX:\$[0-9]+]], [[BIDX]]
+ ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
store <8 x i16> %3, <8 x i16>* @v8i16
- ; MIPS32-AE-DAG: st.h [[R1]]
+ ; ALL-DAG: st.h [[R1]]
ret void
- ; MIPS32-AE: .size insert_v8i16_vidx
}
-define void @insert_v4i32_vidx(i32 %a) nounwind {
- ; MIPS32-AE: insert_v4i32_vidx:
+define void @insert_v4i32_vidx(i32 signext %a) nounwind {
+ ; ALL-LABEL: insert_v4i32_vidx:
- %1 = load <4 x i32>* @v4i32
- ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
+ %1 = load <4 x i32>, <4 x i32>* @v4i32
+ ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
- %2 = load i32* @i32
- ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
- ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+ %2 = load i32, i32* @i32
+ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+ ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
- ; MIPS32-AE-NOT: andi
- ; MIPS32-AE-NOT: sra
+ ; ALL-NOT: andi
+ ; ALL-NOT: sra
%3 = insertelement <4 x i32> %1, i32 %a, i32 %2
- ; MIPS32-AE-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2
- ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
- ; MIPS32-AE-DAG: insert.w [[R1]][0], $4
- ; MIPS32-AE-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
- ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+ ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2
+ ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
+ ; ALL-DAG: insert.w [[R1]][0], $4
+ ; O32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+ ; N32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+ ; N64-DAG: dneg [[NIDX:\$[0-9]+]], [[BIDX]]
+ ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
store <4 x i32> %3, <4 x i32>* @v4i32
- ; MIPS32-AE-DAG: st.w [[R1]]
+ ; ALL-DAG: st.w [[R1]]
ret void
- ; MIPS32-AE: .size insert_v4i32_vidx
}
-define void @insert_v2i64_vidx(i64 %a) nounwind {
- ; MIPS32-AE: insert_v2i64_vidx:
+define void @insert_v2i64_vidx(i64 signext %a) nounwind {
+ ; ALL-LABEL: insert_v2i64_vidx:
- %1 = load <2 x i64>* @v2i64
- ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
+ %1 = load <2 x i64>, <2 x i64>* @v2i64
+ ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
+ ; MIPS64-DAG: ld.d [[R1:\$w[0-9]+]],
- %2 = load i32* @i32
- ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
- ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+ %2 = load i32, i32* @i32
+ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+ ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
- ; MIPS32-AE-NOT: andi
- ; MIPS32-AE-NOT: sra
+ ; ALL-NOT: andi
+ ; ALL-NOT: sra
%3 = insertelement <2 x i64> %1, i64 %a, i32 %2
; TODO: This code could be a lot better but it works. The legalizer splits
; 64-bit inserts into two 32-bit inserts because there is no i64 type on
; MIPS32. The obvious optimisation is to perform both insert.w's at once while
; the vector is rotated.
- ; MIPS32-AE-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2
- ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
- ; MIPS32-AE-DAG: insert.w [[R1]][0], $4
- ; MIPS32-AE-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
- ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
- ; MIPS32-AE-DAG: addiu [[IDX2:\$[0-9]+]], [[IDX]], 1
- ; MIPS32-AE-DAG: sll [[BIDX:\$[0-9]+]], [[IDX2]], 2
- ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
- ; MIPS32-AE-DAG: insert.w [[R1]][0], $5
- ; MIPS32-AE-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
- ; MIPS32-AE-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+ ; MIPS32-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2
+ ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
+ ; MIPS32-DAG: insert.w [[R1]][0], $4
+ ; MIPS32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+ ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+ ; MIPS32-DAG: addiu [[IDX2:\$[0-9]+]], [[IDX]], 1
+ ; MIPS32-DAG: sll [[BIDX:\$[0-9]+]], [[IDX2]], 2
+ ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
+ ; MIPS32-DAG: insert.w [[R1]][0], $5
+ ; MIPS32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+ ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+
+ ; MIPS64-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 3
+ ; MIPS64-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
+ ; MIPS64-DAG: insert.d [[R1]][0], $4
+ ; N32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+ ; N64-DAG: dneg [[NIDX:\$[0-9]+]], [[BIDX]]
+ ; MIPS64-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
store <2 x i64> %3, <2 x i64>* @v2i64
- ; MIPS32-AE-DAG: st.w [[R1]]
+ ; MIPS32-DAG: st.w [[R1]]
+ ; MIPS64-DAG: st.d [[R1]]
ret void
- ; MIPS32-AE: .size insert_v2i64_vidx
}
define void @truncstore() nounwind {
- ; MIPS32-AE-LABEL: truncstore:
+ ; ALL-LABEL: truncstore:
store volatile <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>, <4 x i8>*@v4i8
; TODO: What code should be emitted?
ret void
- ; MIPS32-AE: .size truncstore
}
diff --git a/test/CodeGen/Mips/msa/basic_operations_float.ll b/test/CodeGen/Mips/msa/basic_operations_float.ll
index a0c9d29e231a..f19cb9b7c2e5 100644
--- a/test/CodeGen/Mips/msa/basic_operations_float.ll
+++ b/test/CodeGen/Mips/msa/basic_operations_float.ll
@@ -1,5 +1,9 @@
-; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32 %s
-; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=MIPS32 %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=O32 %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=O32 %s
+; RUN: llc -march=mips64 -target-abi=n32 -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=N32 %s
+; RUN: llc -march=mips64el -target-abi=n32 -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=N32 %s
+; RUN: llc -march=mips64 -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=N64 %s
+; RUN: llc -march=mips64el -mattr=+msa,+fp64 < %s | FileCheck -check-prefix=ALL -check-prefix=N64 %s
@v4f32 = global <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>
@v2f64 = global <2 x double> <double 0.0, double 0.0>
@@ -8,322 +12,341 @@
@f64 = global double 0.0
define void @const_v4f32() nounwind {
- ; MIPS32-LABEL: const_v4f32:
+ ; ALL-LABEL: const_v4f32:
store volatile <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, <4 x float>*@v4f32
- ; MIPS32: ldi.b [[R1:\$w[0-9]+]], 0
+ ; ALL: ldi.b [[R1:\$w[0-9]+]], 0
store volatile <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, <4 x float>*@v4f32
- ; MIPS32: lui [[R1:\$[0-9]+]], 16256
- ; MIPS32: fill.w [[R2:\$w[0-9]+]], [[R1]]
+ ; ALL: lui [[R1:\$[0-9]+]], 16256
+ ; ALL: fill.w [[R2:\$w[0-9]+]], [[R1]]
store volatile <4 x float> <float 1.0, float 1.0, float 1.0, float 31.0>, <4 x float>*@v4f32
- ; MIPS32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; MIPS32: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <4 x float> <float 65537.0, float 65537.0, float 65537.0, float 65537.0>, <4 x float>*@v4f32
- ; MIPS32: lui [[R1:\$[0-9]+]], 18304
- ; MIPS32: ori [[R2:\$[0-9]+]], [[R1]], 128
- ; MIPS32: fill.w [[R3:\$w[0-9]+]], [[R2]]
+ ; ALL: lui [[R1:\$[0-9]+]], 18304
+ ; ALL: ori [[R2:\$[0-9]+]], [[R1]], 128
+ ; ALL: fill.w [[R3:\$w[0-9]+]], [[R2]]
store volatile <4 x float> <float 1.0, float 2.0, float 1.0, float 2.0>, <4 x float>*@v4f32
- ; MIPS32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; MIPS32: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <4 x float> <float 3.0, float 4.0, float 5.0, float 6.0>, <4 x float>*@v4f32
- ; MIPS32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; MIPS32: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; ALL: ld.w [[R1:\$w[0-9]+]], 0([[G_PTR]])
ret void
- ; MIPS32: .size const_v4f32
}
define void @const_v2f64() nounwind {
- ; MIPS32-LABEL: const_v2f64:
+ ; ALL-LABEL: const_v2f64:
store volatile <2 x double> <double 0.0, double 0.0>, <2 x double>*@v2f64
- ; MIPS32: ldi.b [[R1:\$w[0-9]+]], 0
+ ; ALL: ldi.b [[R1:\$w[0-9]+]], 0
store volatile <2 x double> <double 72340172838076673.0, double 72340172838076673.0>, <2 x double>*@v2f64
- ; MIPS32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; MIPS32: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <2 x double> <double 281479271743489.0, double 281479271743489.0>, <2 x double>*@v2f64
- ; MIPS32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; MIPS32: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <2 x double> <double 4294967297.0, double 4294967297.0>, <2 x double>*@v2f64
- ; MIPS32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; MIPS32: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <2 x double> <double 1.0, double 1.0>, <2 x double>*@v2f64
- ; MIPS32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; MIPS32: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <2 x double> <double 1.0, double 31.0>, <2 x double>*@v2f64
- ; MIPS32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; MIPS32: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
store volatile <2 x double> <double 3.0, double 4.0>, <2 x double>*@v2f64
- ; MIPS32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
- ; MIPS32: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
+ ; O32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
+ ; N32: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; N64: daddiu [[G_PTR:\$[0-9]+]], {{.*}}, %got_ofst($
+ ; ALL: ld.d [[R1:\$w[0-9]+]], 0([[G_PTR]])
ret void
- ; MIPS32: .size const_v2f64
}
define void @nonconst_v4f32() nounwind {
- ; MIPS32-LABEL: nonconst_v4f32:
+ ; ALL-LABEL: nonconst_v4f32:
- %1 = load float *@f32
+ %1 = load float , float *@f32
%2 = insertelement <4 x float> undef, float %1, i32 0
%3 = insertelement <4 x float> %2, float %1, i32 1
%4 = insertelement <4 x float> %3, float %1, i32 2
%5 = insertelement <4 x float> %4, float %1, i32 3
store volatile <4 x float> %5, <4 x float>*@v4f32
- ; MIPS32: lwc1 $f[[R1:[0-9]+]], 0(
- ; MIPS32: splati.w [[R2:\$w[0-9]+]], $w[[R1]]
+ ; ALL: lwc1 $f[[R1:[0-9]+]], 0(
+ ; ALL: splati.w [[R2:\$w[0-9]+]], $w[[R1]]
ret void
- ; MIPS32: .size nonconst_v4f32
}
define void @nonconst_v2f64() nounwind {
- ; MIPS32-LABEL: nonconst_v2f64:
+ ; ALL-LABEL: nonconst_v2f64:
- %1 = load double *@f64
+ %1 = load double , double *@f64
%2 = insertelement <2 x double> undef, double %1, i32 0
%3 = insertelement <2 x double> %2, double %1, i32 1
store volatile <2 x double> %3, <2 x double>*@v2f64
- ; MIPS32: ldc1 $f[[R1:[0-9]+]], 0(
- ; MIPS32: splati.d [[R2:\$w[0-9]+]], $w[[R1]]
+ ; ALL: ldc1 $f[[R1:[0-9]+]], 0(
+ ; ALL: splati.d [[R2:\$w[0-9]+]], $w[[R1]]
ret void
- ; MIPS32: .size nonconst_v2f64
}
define float @extract_v4f32() nounwind {
- ; MIPS32-LABEL: extract_v4f32:
+ ; ALL-LABEL: extract_v4f32:
- %1 = load <4 x float>* @v4f32
- ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
+ %1 = load <4 x float>, <4 x float>* @v4f32
+ ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
%2 = fadd <4 x float> %1, %1
- ; MIPS32-DAG: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
%3 = extractelement <4 x float> %2, i32 1
; Element 1 can be obtained by splatting it across the vector and extracting
; $w0:sub_lo
- ; MIPS32-DAG: splati.w $w0, [[R1]][1]
+ ; ALL-DAG: splati.w $w0, [[R1]][1]
ret float %3
- ; MIPS32: .size extract_v4f32
}
define float @extract_v4f32_elt0() nounwind {
- ; MIPS32-LABEL: extract_v4f32_elt0:
+ ; ALL-LABEL: extract_v4f32_elt0:
- %1 = load <4 x float>* @v4f32
- ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
+ %1 = load <4 x float>, <4 x float>* @v4f32
+ ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
%2 = fadd <4 x float> %1, %1
- ; MIPS32-DAG: fadd.w $w0, [[R1]], [[R1]]
+ ; ALL-DAG: fadd.w $w0, [[R1]], [[R1]]
%3 = extractelement <4 x float> %2, i32 0
; Element 0 can be obtained by extracting $w0:sub_lo ($f0)
- ; MIPS32-NOT: copy_u.w
- ; MIPS32-NOT: mtc1
+ ; ALL-NOT: copy_u.w
+ ; ALL-NOT: mtc1
ret float %3
- ; MIPS32: .size extract_v4f32_elt0
}
define float @extract_v4f32_elt2() nounwind {
- ; MIPS32-LABEL: extract_v4f32_elt2:
+ ; ALL-LABEL: extract_v4f32_elt2:
- %1 = load <4 x float>* @v4f32
- ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
+ %1 = load <4 x float>, <4 x float>* @v4f32
+ ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
%2 = fadd <4 x float> %1, %1
- ; MIPS32-DAG: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
%3 = extractelement <4 x float> %2, i32 2
; Element 2 can be obtained by splatting it across the vector and extracting
; $w0:sub_lo
- ; MIPS32-DAG: splati.w $w0, [[R1]][2]
+ ; ALL-DAG: splati.w $w0, [[R1]][2]
ret float %3
- ; MIPS32: .size extract_v4f32_elt2
}
define float @extract_v4f32_vidx() nounwind {
- ; MIPS32-LABEL: extract_v4f32_vidx:
+ ; ALL-LABEL: extract_v4f32_vidx:
- %1 = load <4 x float>* @v4f32
- ; MIPS32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4f32)(
- ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
+ %1 = load <4 x float>, <4 x float>* @v4f32
+ ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4f32)(
+ ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v4f32)(
+ ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v4f32)(
+ ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
%2 = fadd <4 x float> %1, %1
- ; MIPS32-DAG: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
- %3 = load i32* @i32
- ; MIPS32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
- ; MIPS32-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+ %3 = load i32, i32* @i32
+ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+ ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%4 = extractelement <4 x float> %2, i32 %3
- ; MIPS32-DAG: splat.w $w0, [[R1]]{{\[}}[[IDX]]]
+ ; ALL-DAG: splat.w $w0, [[R1]]{{\[}}[[IDX]]]
ret float %4
- ; MIPS32: .size extract_v4f32_vidx
}
define double @extract_v2f64() nounwind {
- ; MIPS32-LABEL: extract_v2f64:
+ ; ALL-LABEL: extract_v2f64:
- %1 = load <2 x double>* @v2f64
- ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]],
+ %1 = load <2 x double>, <2 x double>* @v2f64
+ ; ALL-DAG: ld.d [[R1:\$w[0-9]+]],
%2 = fadd <2 x double> %1, %1
- ; MIPS32-DAG: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
%3 = extractelement <2 x double> %2, i32 1
; Element 1 can be obtained by splatting it across the vector and extracting
; $w0:sub_64
- ; MIPS32-DAG: splati.d $w0, [[R1]][1]
- ; MIPS32-NOT: copy_u.w
- ; MIPS32-NOT: mtc1
- ; MIPS32-NOT: mthc1
- ; MIPS32-NOT: sll
- ; MIPS32-NOT: sra
+ ; ALL-DAG: splati.d $w0, [[R1]][1]
+ ; ALL-NOT: copy_u.w
+ ; ALL-NOT: mtc1
+ ; ALL-NOT: mthc1
+ ; ALL-NOT: sll
+ ; ALL-NOT: sra
ret double %3
- ; MIPS32: .size extract_v2f64
}
define double @extract_v2f64_elt0() nounwind {
- ; MIPS32-LABEL: extract_v2f64_elt0:
+ ; ALL-LABEL: extract_v2f64_elt0:
- %1 = load <2 x double>* @v2f64
- ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]],
+ %1 = load <2 x double>, <2 x double>* @v2f64
+ ; ALL-DAG: ld.d [[R1:\$w[0-9]+]],
%2 = fadd <2 x double> %1, %1
- ; MIPS32-DAG: fadd.d $w0, [[R1]], [[R1]]
+ ; ALL-DAG: fadd.d $w0, [[R1]], [[R1]]
%3 = extractelement <2 x double> %2, i32 0
; Element 0 can be obtained by extracting $w0:sub_64 ($f0)
- ; MIPS32-NOT: copy_u.w
- ; MIPS32-NOT: mtc1
- ; MIPS32-NOT: mthc1
- ; MIPS32-NOT: sll
- ; MIPS32-NOT: sra
+ ; ALL-NOT: copy_u.w
+ ; ALL-NOT: mtc1
+ ; ALL-NOT: mthc1
+ ; ALL-NOT: sll
+ ; ALL-NOT: sra
ret double %3
- ; MIPS32: .size extract_v2f64_elt0
}
define double @extract_v2f64_vidx() nounwind {
- ; MIPS32-LABEL: extract_v2f64_vidx:
+ ; ALL-LABEL: extract_v2f64_vidx:
- %1 = load <2 x double>* @v2f64
- ; MIPS32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2f64)(
- ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
+ %1 = load <2 x double>, <2 x double>* @v2f64
+ ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2f64)(
+ ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v2f64)(
+ ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v2f64)(
+ ; ALL-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
%2 = fadd <2 x double> %1, %1
- ; MIPS32-DAG: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
+ ; ALL-DAG: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
- %3 = load i32* @i32
- ; MIPS32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
- ; MIPS32-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+ %3 = load i32, i32* @i32
+ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+ ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%4 = extractelement <2 x double> %2, i32 %3
- ; MIPS32-DAG: splat.d $w0, [[R1]]{{\[}}[[IDX]]]
+ ; ALL-DAG: splat.d $w0, [[R1]]{{\[}}[[IDX]]]
ret double %4
- ; MIPS32: .size extract_v2f64_vidx
}
define void @insert_v4f32(float %a) nounwind {
- ; MIPS32-LABEL: insert_v4f32:
+ ; ALL-LABEL: insert_v4f32:
- %1 = load <4 x float>* @v4f32
- ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
+ %1 = load <4 x float>, <4 x float>* @v4f32
+ ; ALL-DAG: ld.w [[R1:\$w[0-9]+]],
%2 = insertelement <4 x float> %1, float %a, i32 1
; float argument passed in $f12
- ; MIPS32-DAG: insve.w [[R1]][1], $w12[0]
+ ; ALL-DAG: insve.w [[R1]][1], $w12[0]
store <4 x float> %2, <4 x float>* @v4f32
- ; MIPS32-DAG: st.w [[R1]]
+ ; ALL-DAG: st.w [[R1]]
ret void
- ; MIPS32: .size insert_v4f32
}
define void @insert_v2f64(double %a) nounwind {
- ; MIPS32-LABEL: insert_v2f64:
+ ; ALL-LABEL: insert_v2f64:
- %1 = load <2 x double>* @v2f64
- ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]],
+ %1 = load <2 x double>, <2 x double>* @v2f64
+ ; ALL-DAG: ld.d [[R1:\$w[0-9]+]],
%2 = insertelement <2 x double> %1, double %a, i32 1
; double argument passed in $f12
- ; MIPS32-DAG: insve.d [[R1]][1], $w12[0]
+ ; ALL-DAG: insve.d [[R1]][1], $w12[0]
store <2 x double> %2, <2 x double>* @v2f64
- ; MIPS32-DAG: st.d [[R1]]
+ ; ALL-DAG: st.d [[R1]]
ret void
- ; MIPS32: .size insert_v2f64
}
define void @insert_v4f32_vidx(float %a) nounwind {
- ; MIPS32-LABEL: insert_v4f32_vidx:
+ ; ALL-LABEL: insert_v4f32_vidx:
- %1 = load <4 x float>* @v4f32
- ; MIPS32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4f32)(
- ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
+ %1 = load <4 x float>, <4 x float>* @v4f32
+ ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4f32)(
+ ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v4f32)(
+ ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v4f32)(
+ ; ALL-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
- %2 = load i32* @i32
- ; MIPS32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
- ; MIPS32-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+ %2 = load i32, i32* @i32
+ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+ ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%3 = insertelement <4 x float> %1, float %a, i32 %2
; float argument passed in $f12
- ; MIPS32-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2
- ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
- ; MIPS32-DAG: insve.w [[R1]][0], $w12[0]
- ; MIPS32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
- ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+ ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 2
+ ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
+ ; ALL-DAG: insve.w [[R1]][0], $w12[0]
+ ; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+ ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
store <4 x float> %3, <4 x float>* @v4f32
- ; MIPS32-DAG: st.w [[R1]]
+ ; ALL-DAG: st.w [[R1]]
ret void
- ; MIPS32: .size insert_v4f32_vidx
}
define void @insert_v2f64_vidx(double %a) nounwind {
- ; MIPS32-LABEL: insert_v2f64_vidx:
+ ; ALL-LABEL: insert_v2f64_vidx:
- %1 = load <2 x double>* @v2f64
- ; MIPS32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2f64)(
- ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
+ %1 = load <2 x double>, <2 x double>* @v2f64
+ ; O32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2f64)(
+ ; N32-DAG: lw [[PTR_V:\$[0-9]+]], %got_disp(v2f64)(
+ ; N64-DAG: ld [[PTR_V:\$[0-9]+]], %got_disp(v2f64)(
+ ; ALL-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
- %2 = load i32* @i32
- ; MIPS32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
- ; MIPS32-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
+ %2 = load i32, i32* @i32
+ ; O32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
+ ; N32-DAG: lw [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; N64-DAG: ld [[PTR_I:\$[0-9]+]], %got_disp(i32)(
+ ; ALL-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
%3 = insertelement <2 x double> %1, double %a, i32 %2
; double argument passed in $f12
- ; MIPS32-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 3
- ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
- ; MIPS32-DAG: insve.d [[R1]][0], $w12[0]
- ; MIPS32-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
- ; MIPS32-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
+ ; ALL-DAG: sll [[BIDX:\$[0-9]+]], [[IDX]], 3
+ ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[BIDX]]]
+ ; ALL-DAG: insve.d [[R1]][0], $w12[0]
+ ; ALL-DAG: neg [[NIDX:\$[0-9]+]], [[BIDX]]
+ ; ALL-DAG: sld.b [[R1]], [[R1]]{{\[}}[[NIDX]]]
store <2 x double> %3, <2 x double>* @v2f64
- ; MIPS32-DAG: st.d [[R1]]
+ ; ALL-DAG: st.d [[R1]]
ret void
- ; MIPS32: .size insert_v2f64_vidx
}
diff --git a/test/CodeGen/Mips/msa/bit.ll b/test/CodeGen/Mips/msa/bit.ll
index 59ddbe17a33f..f0057307bbf7 100644
--- a/test/CodeGen/Mips/msa/bit.ll
+++ b/test/CodeGen/Mips/msa/bit.ll
@@ -8,7 +8,7 @@
define void @llvm_mips_sat_s_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_sat_s_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_sat_s_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.sat.s.b(<16 x i8> %0, i32 7)
store <16 x i8> %1, <16 x i8>* @llvm_mips_sat_s_b_RES
ret void
@@ -27,7 +27,7 @@ declare <16 x i8> @llvm.mips.sat.s.b(<16 x i8>, i32) nounwind
define void @llvm_mips_sat_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_sat_s_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_sat_s_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.sat.s.h(<8 x i16> %0, i32 7)
store <8 x i16> %1, <8 x i16>* @llvm_mips_sat_s_h_RES
ret void
@@ -46,7 +46,7 @@ declare <8 x i16> @llvm.mips.sat.s.h(<8 x i16>, i32) nounwind
define void @llvm_mips_sat_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_sat_s_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_sat_s_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.sat.s.w(<4 x i32> %0, i32 7)
store <4 x i32> %1, <4 x i32>* @llvm_mips_sat_s_w_RES
ret void
@@ -65,7 +65,7 @@ declare <4 x i32> @llvm.mips.sat.s.w(<4 x i32>, i32) nounwind
define void @llvm_mips_sat_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_sat_s_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_sat_s_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.sat.s.d(<2 x i64> %0, i32 7)
store <2 x i64> %1, <2 x i64>* @llvm_mips_sat_s_d_RES
ret void
@@ -84,7 +84,7 @@ declare <2 x i64> @llvm.mips.sat.s.d(<2 x i64>, i32) nounwind
define void @llvm_mips_sat_u_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_sat_u_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_sat_u_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.sat.u.b(<16 x i8> %0, i32 7)
store <16 x i8> %1, <16 x i8>* @llvm_mips_sat_u_b_RES
ret void
@@ -103,7 +103,7 @@ declare <16 x i8> @llvm.mips.sat.u.b(<16 x i8>, i32) nounwind
define void @llvm_mips_sat_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_sat_u_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_sat_u_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.sat.u.h(<8 x i16> %0, i32 7)
store <8 x i16> %1, <8 x i16>* @llvm_mips_sat_u_h_RES
ret void
@@ -122,7 +122,7 @@ declare <8 x i16> @llvm.mips.sat.u.h(<8 x i16>, i32) nounwind
define void @llvm_mips_sat_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_sat_u_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_sat_u_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.sat.u.w(<4 x i32> %0, i32 7)
store <4 x i32> %1, <4 x i32>* @llvm_mips_sat_u_w_RES
ret void
@@ -141,7 +141,7 @@ declare <4 x i32> @llvm.mips.sat.u.w(<4 x i32>, i32) nounwind
define void @llvm_mips_sat_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_sat_u_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_sat_u_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.sat.u.d(<2 x i64> %0, i32 7)
store <2 x i64> %1, <2 x i64>* @llvm_mips_sat_u_d_RES
ret void
@@ -160,7 +160,7 @@ declare <2 x i64> @llvm.mips.sat.u.d(<2 x i64>, i32) nounwind
define void @llvm_mips_slli_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_slli_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_slli_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.slli.b(<16 x i8> %0, i32 7)
store <16 x i8> %1, <16 x i8>* @llvm_mips_slli_b_RES
ret void
@@ -179,7 +179,7 @@ declare <16 x i8> @llvm.mips.slli.b(<16 x i8>, i32) nounwind
define void @llvm_mips_slli_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_slli_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_slli_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.slli.h(<8 x i16> %0, i32 7)
store <8 x i16> %1, <8 x i16>* @llvm_mips_slli_h_RES
ret void
@@ -198,7 +198,7 @@ declare <8 x i16> @llvm.mips.slli.h(<8 x i16>, i32) nounwind
define void @llvm_mips_slli_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_slli_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_slli_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %0, i32 7)
store <4 x i32> %1, <4 x i32>* @llvm_mips_slli_w_RES
ret void
@@ -217,7 +217,7 @@ declare <4 x i32> @llvm.mips.slli.w(<4 x i32>, i32) nounwind
define void @llvm_mips_slli_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_slli_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_slli_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %0, i32 7)
store <2 x i64> %1, <2 x i64>* @llvm_mips_slli_d_RES
ret void
@@ -236,7 +236,7 @@ declare <2 x i64> @llvm.mips.slli.d(<2 x i64>, i32) nounwind
define void @llvm_mips_srai_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_srai_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_srai_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.srai.b(<16 x i8> %0, i32 7)
store <16 x i8> %1, <16 x i8>* @llvm_mips_srai_b_RES
ret void
@@ -255,7 +255,7 @@ declare <16 x i8> @llvm.mips.srai.b(<16 x i8>, i32) nounwind
define void @llvm_mips_srai_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_srai_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_srai_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.srai.h(<8 x i16> %0, i32 7)
store <8 x i16> %1, <8 x i16>* @llvm_mips_srai_h_RES
ret void
@@ -274,7 +274,7 @@ declare <8 x i16> @llvm.mips.srai.h(<8 x i16>, i32) nounwind
define void @llvm_mips_srai_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_srai_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_srai_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.srai.w(<4 x i32> %0, i32 7)
store <4 x i32> %1, <4 x i32>* @llvm_mips_srai_w_RES
ret void
@@ -293,7 +293,7 @@ declare <4 x i32> @llvm.mips.srai.w(<4 x i32>, i32) nounwind
define void @llvm_mips_srai_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_srai_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_srai_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.srai.d(<2 x i64> %0, i32 7)
store <2 x i64> %1, <2 x i64>* @llvm_mips_srai_d_RES
ret void
@@ -312,7 +312,7 @@ declare <2 x i64> @llvm.mips.srai.d(<2 x i64>, i32) nounwind
define void @llvm_mips_srari_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_srari_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_srari_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.srari.b(<16 x i8> %0, i32 7)
store <16 x i8> %1, <16 x i8>* @llvm_mips_srari_b_RES
ret void
@@ -331,7 +331,7 @@ declare <16 x i8> @llvm.mips.srari.b(<16 x i8>, i32) nounwind
define void @llvm_mips_srari_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_srari_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_srari_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.srari.h(<8 x i16> %0, i32 7)
store <8 x i16> %1, <8 x i16>* @llvm_mips_srari_h_RES
ret void
@@ -350,7 +350,7 @@ declare <8 x i16> @llvm.mips.srari.h(<8 x i16>, i32) nounwind
define void @llvm_mips_srari_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_srari_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_srari_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.srari.w(<4 x i32> %0, i32 7)
store <4 x i32> %1, <4 x i32>* @llvm_mips_srari_w_RES
ret void
@@ -369,7 +369,7 @@ declare <4 x i32> @llvm.mips.srari.w(<4 x i32>, i32) nounwind
define void @llvm_mips_srari_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_srari_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_srari_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.srari.d(<2 x i64> %0, i32 7)
store <2 x i64> %1, <2 x i64>* @llvm_mips_srari_d_RES
ret void
@@ -388,7 +388,7 @@ declare <2 x i64> @llvm.mips.srari.d(<2 x i64>, i32) nounwind
define void @llvm_mips_srli_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_srli_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_srli_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.srli.b(<16 x i8> %0, i32 7)
store <16 x i8> %1, <16 x i8>* @llvm_mips_srli_b_RES
ret void
@@ -407,7 +407,7 @@ declare <16 x i8> @llvm.mips.srli.b(<16 x i8>, i32) nounwind
define void @llvm_mips_srli_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_srli_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_srli_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.srli.h(<8 x i16> %0, i32 7)
store <8 x i16> %1, <8 x i16>* @llvm_mips_srli_h_RES
ret void
@@ -426,7 +426,7 @@ declare <8 x i16> @llvm.mips.srli.h(<8 x i16>, i32) nounwind
define void @llvm_mips_srli_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_srli_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_srli_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 7)
store <4 x i32> %1, <4 x i32>* @llvm_mips_srli_w_RES
ret void
@@ -445,7 +445,7 @@ declare <4 x i32> @llvm.mips.srli.w(<4 x i32>, i32) nounwind
define void @llvm_mips_srli_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_srli_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_srli_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 7)
store <2 x i64> %1, <2 x i64>* @llvm_mips_srli_d_RES
ret void
@@ -464,7 +464,7 @@ declare <2 x i64> @llvm.mips.srli.d(<2 x i64>, i32) nounwind
define void @llvm_mips_srlri_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_srlri_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_srlri_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.srlri.b(<16 x i8> %0, i32 7)
store <16 x i8> %1, <16 x i8>* @llvm_mips_srlri_b_RES
ret void
@@ -483,7 +483,7 @@ declare <16 x i8> @llvm.mips.srlri.b(<16 x i8>, i32) nounwind
define void @llvm_mips_srlri_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_srlri_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_srlri_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.srlri.h(<8 x i16> %0, i32 7)
store <8 x i16> %1, <8 x i16>* @llvm_mips_srlri_h_RES
ret void
@@ -502,7 +502,7 @@ declare <8 x i16> @llvm.mips.srlri.h(<8 x i16>, i32) nounwind
define void @llvm_mips_srlri_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_srlri_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_srlri_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.srlri.w(<4 x i32> %0, i32 7)
store <4 x i32> %1, <4 x i32>* @llvm_mips_srlri_w_RES
ret void
@@ -521,7 +521,7 @@ declare <4 x i32> @llvm.mips.srlri.w(<4 x i32>, i32) nounwind
define void @llvm_mips_srlri_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_srlri_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_srlri_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.srlri.d(<2 x i64> %0, i32 7)
store <2 x i64> %1, <2 x i64>* @llvm_mips_srlri_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/bitcast.ll b/test/CodeGen/Mips/msa/bitcast.ll
index 8e880ecd9afb..837cc28aa82d 100644
--- a/test/CodeGen/Mips/msa/bitcast.ll
+++ b/test/CodeGen/Mips/msa/bitcast.ll
@@ -5,7 +5,7 @@
define void @v16i8_to_v16i8(<16 x i8>* %src, <16 x i8>* %dst) nounwind {
entry:
- %0 = load volatile <16 x i8>* %src
+ %0 = load volatile <16 x i8>, <16 x i8>* %src
%1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
%2 = bitcast <16 x i8> %1 to <16 x i8>
%3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
@@ -29,7 +29,7 @@ entry:
define void @v16i8_to_v8i16(<16 x i8>* %src, <8 x i16>* %dst) nounwind {
entry:
- %0 = load volatile <16 x i8>* %src
+ %0 = load volatile <16 x i8>, <16 x i8>* %src
%1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
%2 = bitcast <16 x i8> %1 to <8 x i16>
%3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
@@ -56,7 +56,7 @@ entry:
; are no operations for v8f16 to put in the way.
define void @v16i8_to_v8f16(<16 x i8>* %src, <8 x half>* %dst) nounwind {
entry:
- %0 = load volatile <16 x i8>* %src
+ %0 = load volatile <16 x i8>, <16 x i8>* %src
%1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
%2 = bitcast <16 x i8> %1 to <8 x half>
store <8 x half> %2, <8 x half>* %dst
@@ -77,7 +77,7 @@ entry:
define void @v16i8_to_v4i32(<16 x i8>* %src, <4 x i32>* %dst) nounwind {
entry:
- %0 = load volatile <16 x i8>* %src
+ %0 = load volatile <16 x i8>, <16 x i8>* %src
%1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
%2 = bitcast <16 x i8> %1 to <4 x i32>
%3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
@@ -102,7 +102,7 @@ entry:
define void @v16i8_to_v4f32(<16 x i8>* %src, <4 x float>* %dst) nounwind {
entry:
- %0 = load volatile <16 x i8>* %src
+ %0 = load volatile <16 x i8>, <16 x i8>* %src
%1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
%2 = bitcast <16 x i8> %1 to <4 x float>
%3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
@@ -127,7 +127,7 @@ entry:
define void @v16i8_to_v2i64(<16 x i8>* %src, <2 x i64>* %dst) nounwind {
entry:
- %0 = load volatile <16 x i8>* %src
+ %0 = load volatile <16 x i8>, <16 x i8>* %src
%1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
%2 = bitcast <16 x i8> %1 to <2 x i64>
%3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
@@ -153,7 +153,7 @@ entry:
define void @v16i8_to_v2f64(<16 x i8>* %src, <2 x double>* %dst) nounwind {
entry:
- %0 = load volatile <16 x i8>* %src
+ %0 = load volatile <16 x i8>, <16 x i8>* %src
%1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
%2 = bitcast <16 x i8> %1 to <2 x double>
%3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
@@ -179,7 +179,7 @@ entry:
define void @v8i16_to_v16i8(<8 x i16>* %src, <16 x i8>* %dst) nounwind {
entry:
- %0 = load volatile <8 x i16>* %src
+ %0 = load volatile <8 x i16>, <8 x i16>* %src
%1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
%2 = bitcast <8 x i16> %1 to <16 x i8>
%3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
@@ -204,7 +204,7 @@ entry:
define void @v8i16_to_v8i16(<8 x i16>* %src, <8 x i16>* %dst) nounwind {
entry:
- %0 = load volatile <8 x i16>* %src
+ %0 = load volatile <8 x i16>, <8 x i16>* %src
%1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
%2 = bitcast <8 x i16> %1 to <8 x i16>
%3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
@@ -230,7 +230,7 @@ entry:
; are no operations for v8f16 to put in the way.
define void @v8i16_to_v8f16(<8 x i16>* %src, <8 x half>* %dst) nounwind {
entry:
- %0 = load volatile <8 x i16>* %src
+ %0 = load volatile <8 x i16>, <8 x i16>* %src
%1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
%2 = bitcast <8 x i16> %1 to <8 x half>
store <8 x half> %2, <8 x half>* %dst
@@ -251,7 +251,7 @@ entry:
define void @v8i16_to_v4i32(<8 x i16>* %src, <4 x i32>* %dst) nounwind {
entry:
- %0 = load volatile <8 x i16>* %src
+ %0 = load volatile <8 x i16>, <8 x i16>* %src
%1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
%2 = bitcast <8 x i16> %1 to <4 x i32>
%3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
@@ -276,7 +276,7 @@ entry:
define void @v8i16_to_v4f32(<8 x i16>* %src, <4 x float>* %dst) nounwind {
entry:
- %0 = load volatile <8 x i16>* %src
+ %0 = load volatile <8 x i16>, <8 x i16>* %src
%1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
%2 = bitcast <8 x i16> %1 to <4 x float>
%3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
@@ -301,7 +301,7 @@ entry:
define void @v8i16_to_v2i64(<8 x i16>* %src, <2 x i64>* %dst) nounwind {
entry:
- %0 = load volatile <8 x i16>* %src
+ %0 = load volatile <8 x i16>, <8 x i16>* %src
%1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
%2 = bitcast <8 x i16> %1 to <2 x i64>
%3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
@@ -326,7 +326,7 @@ entry:
define void @v8i16_to_v2f64(<8 x i16>* %src, <2 x double>* %dst) nounwind {
entry:
- %0 = load volatile <8 x i16>* %src
+ %0 = load volatile <8 x i16>, <8 x i16>* %src
%1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
%2 = bitcast <8 x i16> %1 to <2 x double>
%3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
@@ -354,7 +354,7 @@ entry:
; are no operations for v8f16 to put in the way.
define void @v8f16_to_v16i8(<8 x half>* %src, <16 x i8>* %dst) nounwind {
entry:
- %0 = load volatile <8 x half>* %src
+ %0 = load volatile <8 x half>, <8 x half>* %src
%1 = bitcast <8 x half> %0 to <16 x i8>
%2 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %1, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* %dst
@@ -378,7 +378,7 @@ entry:
; are no operations for v8f16 to put in the way.
define void @v8f16_to_v8i16(<8 x half>* %src, <8 x i16>* %dst) nounwind {
entry:
- %0 = load volatile <8 x half>* %src
+ %0 = load volatile <8 x half>, <8 x half>* %src
%1 = bitcast <8 x half> %0 to <8 x i16>
%2 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %1, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* %dst
@@ -403,7 +403,7 @@ entry:
; are no operations for v8f16 to put in the way.
define void @v8f16_to_v8f16(<8 x half>* %src, <8 x half>* %dst) nounwind {
entry:
- %0 = load volatile <8 x half>* %src
+ %0 = load volatile <8 x half>, <8 x half>* %src
%1 = bitcast <8 x half> %0 to <8 x half>
store <8 x half> %1, <8 x half>* %dst
ret void
@@ -423,7 +423,7 @@ entry:
; are no operations for v8f16 to put in the way.
define void @v8f16_to_v4i32(<8 x half>* %src, <4 x i32>* %dst) nounwind {
entry:
- %0 = load volatile <8 x half>* %src
+ %0 = load volatile <8 x half>, <8 x half>* %src
%1 = bitcast <8 x half> %0 to <4 x i32>
%2 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %1, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* %dst
@@ -447,7 +447,7 @@ entry:
; are no operations for v8f16 to put in the way.
define void @v8f16_to_v4f32(<8 x half>* %src, <4 x float>* %dst) nounwind {
entry:
- %0 = load volatile <8 x half>* %src
+ %0 = load volatile <8 x half>, <8 x half>* %src
%1 = bitcast <8 x half> %0 to <4 x float>
%2 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %1, <4 x float> %1)
store <4 x float> %2, <4 x float>* %dst
@@ -471,7 +471,7 @@ entry:
; are no operations for v8f16 to put in the way.
define void @v8f16_to_v2i64(<8 x half>* %src, <2 x i64>* %dst) nounwind {
entry:
- %0 = load volatile <8 x half>* %src
+ %0 = load volatile <8 x half>, <8 x half>* %src
%1 = bitcast <8 x half> %0 to <2 x i64>
%2 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %1, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* %dst
@@ -495,7 +495,7 @@ entry:
; are no operations for v8f16 to put in the way.
define void @v8f16_to_v2f64(<8 x half>* %src, <2 x double>* %dst) nounwind {
entry:
- %0 = load volatile <8 x half>* %src
+ %0 = load volatile <8 x half>, <8 x half>* %src
%1 = bitcast <8 x half> %0 to <2 x double>
%2 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %1, <2 x double> %1)
store <2 x double> %2, <2 x double>* %dst
@@ -518,7 +518,7 @@ entry:
define void @v4i32_to_v16i8(<4 x i32>* %src, <16 x i8>* %dst) nounwind {
entry:
- %0 = load volatile <4 x i32>* %src
+ %0 = load volatile <4 x i32>, <4 x i32>* %src
%1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
%2 = bitcast <4 x i32> %1 to <16 x i8>
%3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
@@ -543,7 +543,7 @@ entry:
define void @v4i32_to_v8i16(<4 x i32>* %src, <8 x i16>* %dst) nounwind {
entry:
- %0 = load volatile <4 x i32>* %src
+ %0 = load volatile <4 x i32>, <4 x i32>* %src
%1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
%2 = bitcast <4 x i32> %1 to <8 x i16>
%3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
@@ -570,7 +570,7 @@ entry:
; are no operations for v8f16 to put in the way.
define void @v4i32_to_v8f16(<4 x i32>* %src, <8 x half>* %dst) nounwind {
entry:
- %0 = load volatile <4 x i32>* %src
+ %0 = load volatile <4 x i32>, <4 x i32>* %src
%1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
%2 = bitcast <4 x i32> %1 to <8 x half>
store <8 x half> %2, <8 x half>* %dst
@@ -591,7 +591,7 @@ entry:
define void @v4i32_to_v4i32(<4 x i32>* %src, <4 x i32>* %dst) nounwind {
entry:
- %0 = load volatile <4 x i32>* %src
+ %0 = load volatile <4 x i32>, <4 x i32>* %src
%1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
%2 = bitcast <4 x i32> %1 to <4 x i32>
%3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
@@ -615,7 +615,7 @@ entry:
define void @v4i32_to_v4f32(<4 x i32>* %src, <4 x float>* %dst) nounwind {
entry:
- %0 = load volatile <4 x i32>* %src
+ %0 = load volatile <4 x i32>, <4 x i32>* %src
%1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
%2 = bitcast <4 x i32> %1 to <4 x float>
%3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
@@ -639,7 +639,7 @@ entry:
define void @v4i32_to_v2i64(<4 x i32>* %src, <2 x i64>* %dst) nounwind {
entry:
- %0 = load volatile <4 x i32>* %src
+ %0 = load volatile <4 x i32>, <4 x i32>* %src
%1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
%2 = bitcast <4 x i32> %1 to <2 x i64>
%3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
@@ -664,7 +664,7 @@ entry:
define void @v4i32_to_v2f64(<4 x i32>* %src, <2 x double>* %dst) nounwind {
entry:
- %0 = load volatile <4 x i32>* %src
+ %0 = load volatile <4 x i32>, <4 x i32>* %src
%1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
%2 = bitcast <4 x i32> %1 to <2 x double>
%3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
@@ -689,7 +689,7 @@ entry:
define void @v4f32_to_v16i8(<4 x float>* %src, <16 x i8>* %dst) nounwind {
entry:
- %0 = load volatile <4 x float>* %src
+ %0 = load volatile <4 x float>, <4 x float>* %src
%1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
%2 = bitcast <4 x float> %1 to <16 x i8>
%3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
@@ -714,7 +714,7 @@ entry:
define void @v4f32_to_v8i16(<4 x float>* %src, <8 x i16>* %dst) nounwind {
entry:
- %0 = load volatile <4 x float>* %src
+ %0 = load volatile <4 x float>, <4 x float>* %src
%1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
%2 = bitcast <4 x float> %1 to <8 x i16>
%3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
@@ -741,7 +741,7 @@ entry:
; are no operations for v8f16 to put in the way.
define void @v4f32_to_v8f16(<4 x float>* %src, <8 x half>* %dst) nounwind {
entry:
- %0 = load volatile <4 x float>* %src
+ %0 = load volatile <4 x float>, <4 x float>* %src
%1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
%2 = bitcast <4 x float> %1 to <8 x half>
store <8 x half> %2, <8 x half>* %dst
@@ -762,7 +762,7 @@ entry:
define void @v4f32_to_v4i32(<4 x float>* %src, <4 x i32>* %dst) nounwind {
entry:
- %0 = load volatile <4 x float>* %src
+ %0 = load volatile <4 x float>, <4 x float>* %src
%1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
%2 = bitcast <4 x float> %1 to <4 x i32>
%3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
@@ -786,7 +786,7 @@ entry:
define void @v4f32_to_v4f32(<4 x float>* %src, <4 x float>* %dst) nounwind {
entry:
- %0 = load volatile <4 x float>* %src
+ %0 = load volatile <4 x float>, <4 x float>* %src
%1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
%2 = bitcast <4 x float> %1 to <4 x float>
%3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
@@ -810,7 +810,7 @@ entry:
define void @v4f32_to_v2i64(<4 x float>* %src, <2 x i64>* %dst) nounwind {
entry:
- %0 = load volatile <4 x float>* %src
+ %0 = load volatile <4 x float>, <4 x float>* %src
%1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
%2 = bitcast <4 x float> %1 to <2 x i64>
%3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
@@ -835,7 +835,7 @@ entry:
define void @v4f32_to_v2f64(<4 x float>* %src, <2 x double>* %dst) nounwind {
entry:
- %0 = load volatile <4 x float>* %src
+ %0 = load volatile <4 x float>, <4 x float>* %src
%1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
%2 = bitcast <4 x float> %1 to <2 x double>
%3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
@@ -860,7 +860,7 @@ entry:
define void @v2i64_to_v16i8(<2 x i64>* %src, <16 x i8>* %dst) nounwind {
entry:
- %0 = load volatile <2 x i64>* %src
+ %0 = load volatile <2 x i64>, <2 x i64>* %src
%1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
%2 = bitcast <2 x i64> %1 to <16 x i8>
%3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
@@ -886,7 +886,7 @@ entry:
define void @v2i64_to_v8i16(<2 x i64>* %src, <8 x i16>* %dst) nounwind {
entry:
- %0 = load volatile <2 x i64>* %src
+ %0 = load volatile <2 x i64>, <2 x i64>* %src
%1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
%2 = bitcast <2 x i64> %1 to <8 x i16>
%3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
@@ -913,7 +913,7 @@ entry:
; are no operations for v8f16 to put in the way.
define void @v2i64_to_v8f16(<2 x i64>* %src, <8 x half>* %dst) nounwind {
entry:
- %0 = load volatile <2 x i64>* %src
+ %0 = load volatile <2 x i64>, <2 x i64>* %src
%1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
%2 = bitcast <2 x i64> %1 to <8 x half>
store <8 x half> %2, <8 x half>* %dst
@@ -934,7 +934,7 @@ entry:
define void @v2i64_to_v4i32(<2 x i64>* %src, <4 x i32>* %dst) nounwind {
entry:
- %0 = load volatile <2 x i64>* %src
+ %0 = load volatile <2 x i64>, <2 x i64>* %src
%1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
%2 = bitcast <2 x i64> %1 to <4 x i32>
%3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
@@ -959,7 +959,7 @@ entry:
define void @v2i64_to_v4f32(<2 x i64>* %src, <4 x float>* %dst) nounwind {
entry:
- %0 = load volatile <2 x i64>* %src
+ %0 = load volatile <2 x i64>, <2 x i64>* %src
%1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
%2 = bitcast <2 x i64> %1 to <4 x float>
%3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
@@ -984,7 +984,7 @@ entry:
define void @v2i64_to_v2i64(<2 x i64>* %src, <2 x i64>* %dst) nounwind {
entry:
- %0 = load volatile <2 x i64>* %src
+ %0 = load volatile <2 x i64>, <2 x i64>* %src
%1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
%2 = bitcast <2 x i64> %1 to <2 x i64>
%3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
@@ -1008,7 +1008,7 @@ entry:
define void @v2i64_to_v2f64(<2 x i64>* %src, <2 x double>* %dst) nounwind {
entry:
- %0 = load volatile <2 x i64>* %src
+ %0 = load volatile <2 x i64>, <2 x i64>* %src
%1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
%2 = bitcast <2 x i64> %1 to <2 x double>
%3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
@@ -1032,7 +1032,7 @@ entry:
define void @v2f64_to_v16i8(<2 x double>* %src, <16 x i8>* %dst) nounwind {
entry:
- %0 = load volatile <2 x double>* %src
+ %0 = load volatile <2 x double>, <2 x double>* %src
%1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
%2 = bitcast <2 x double> %1 to <16 x i8>
%3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
@@ -1058,7 +1058,7 @@ entry:
define void @v2f64_to_v8i16(<2 x double>* %src, <8 x i16>* %dst) nounwind {
entry:
- %0 = load volatile <2 x double>* %src
+ %0 = load volatile <2 x double>, <2 x double>* %src
%1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
%2 = bitcast <2 x double> %1 to <8 x i16>
%3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
@@ -1085,7 +1085,7 @@ entry:
; are no operations for v8f16 to put in the way.
define void @v2f64_to_v8f16(<2 x double>* %src, <8 x half>* %dst) nounwind {
entry:
- %0 = load volatile <2 x double>* %src
+ %0 = load volatile <2 x double>, <2 x double>* %src
%1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
%2 = bitcast <2 x double> %1 to <8 x half>
store <8 x half> %2, <8 x half>* %dst
@@ -1106,7 +1106,7 @@ entry:
define void @v2f64_to_v4i32(<2 x double>* %src, <4 x i32>* %dst) nounwind {
entry:
- %0 = load volatile <2 x double>* %src
+ %0 = load volatile <2 x double>, <2 x double>* %src
%1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
%2 = bitcast <2 x double> %1 to <4 x i32>
%3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
@@ -1131,7 +1131,7 @@ entry:
define void @v2f64_to_v4f32(<2 x double>* %src, <4 x float>* %dst) nounwind {
entry:
- %0 = load volatile <2 x double>* %src
+ %0 = load volatile <2 x double>, <2 x double>* %src
%1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
%2 = bitcast <2 x double> %1 to <4 x float>
%3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
@@ -1156,7 +1156,7 @@ entry:
define void @v2f64_to_v2i64(<2 x double>* %src, <2 x i64>* %dst) nounwind {
entry:
- %0 = load volatile <2 x double>* %src
+ %0 = load volatile <2 x double>, <2 x double>* %src
%1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
%2 = bitcast <2 x double> %1 to <2 x i64>
%3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
@@ -1180,7 +1180,7 @@ entry:
define void @v2f64_to_v2f64(<2 x double>* %src, <2 x double>* %dst) nounwind {
entry:
- %0 = load volatile <2 x double>* %src
+ %0 = load volatile <2 x double>, <2 x double>* %src
%1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
%2 = bitcast <2 x double> %1 to <2 x double>
%3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
diff --git a/test/CodeGen/Mips/msa/bitwise.ll b/test/CodeGen/Mips/msa/bitwise.ll
index 5d57198a9355..2a260b2c5733 100644
--- a/test/CodeGen/Mips/msa/bitwise.ll
+++ b/test/CodeGen/Mips/msa/bitwise.ll
@@ -4,9 +4,9 @@
define void @and_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: and_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = and <16 x i8> %1, %2
; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -20,9 +20,9 @@ define void @and_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @and_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: and_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = and <8 x i16> %1, %2
; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -36,9 +36,9 @@ define void @and_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @and_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: and_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = and <4 x i32> %1, %2
; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -52,9 +52,9 @@ define void @and_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @and_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: and_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = and <2 x i64> %1, %2
; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -68,7 +68,7 @@ define void @and_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @and_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: and_v16i8_i:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = and <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-DAG: andi.b [[R4:\$w[0-9]+]], [[R1]], 1
@@ -82,7 +82,7 @@ define void @and_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @and_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: and_v8i16_i:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = and <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
@@ -97,7 +97,7 @@ define void @and_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @and_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: and_v4i32_i:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = and <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
@@ -112,7 +112,7 @@ define void @and_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @and_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: and_v2i64_i:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = and <2 x i64> %1, <i64 1, i64 1>
; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
@@ -127,9 +127,9 @@ define void @and_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @or_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: or_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = or <16 x i8> %1, %2
; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -143,9 +143,9 @@ define void @or_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @or_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: or_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = or <8 x i16> %1, %2
; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -159,9 +159,9 @@ define void @or_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @or_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: or_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = or <4 x i32> %1, %2
; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -175,9 +175,9 @@ define void @or_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @or_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: or_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = or <2 x i64> %1, %2
; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -191,7 +191,7 @@ define void @or_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @or_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: or_v16i8_i:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = or <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
; CHECK-DAG: ori.b [[R4:\$w[0-9]+]], [[R1]], 3
@@ -205,7 +205,7 @@ define void @or_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @or_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: or_v8i16_i:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = or <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3
@@ -220,7 +220,7 @@ define void @or_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @or_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: or_v4i32_i:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3
@@ -235,7 +235,7 @@ define void @or_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @or_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: or_v2i64_i:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = or <2 x i64> %1, <i64 3, i64 3>
; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3
@@ -250,9 +250,9 @@ define void @or_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @nor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: nor_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = or <16 x i8> %1, %2
%4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -267,9 +267,9 @@ define void @nor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @nor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: nor_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = or <8 x i16> %1, %2
%4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
@@ -284,9 +284,9 @@ define void @nor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @nor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: nor_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = or <4 x i32> %1, %2
%4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -301,9 +301,9 @@ define void @nor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @nor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: nor_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = or <2 x i64> %1, %2
%4 = xor <2 x i64> %3, <i64 -1, i64 -1>
@@ -318,7 +318,7 @@ define void @nor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @nor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: nor_v16i8_i:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = or <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -333,7 +333,7 @@ define void @nor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @nor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: nor_v8i16_i:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = or <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = xor <8 x i16> %2, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
@@ -349,7 +349,7 @@ define void @nor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @nor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: nor_v4i32_i:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
%3 = xor <4 x i32> %2, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -365,7 +365,7 @@ define void @nor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @nor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: nor_v2i64_i:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = or <2 x i64> %1, <i64 1, i64 1>
%3 = xor <2 x i64> %2, <i64 -1, i64 -1>
@@ -381,9 +381,9 @@ define void @nor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @xor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: xor_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = xor <16 x i8> %1, %2
; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -397,9 +397,9 @@ define void @xor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @xor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: xor_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = xor <8 x i16> %1, %2
; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -413,9 +413,9 @@ define void @xor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @xor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: xor_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = xor <4 x i32> %1, %2
; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -429,9 +429,9 @@ define void @xor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @xor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: xor_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = xor <2 x i64> %1, %2
; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -445,7 +445,7 @@ define void @xor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @xor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: xor_v16i8_i:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = xor <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
; CHECK-DAG: xori.b [[R4:\$w[0-9]+]], [[R1]], 3
@@ -459,7 +459,7 @@ define void @xor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @xor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: xor_v8i16_i:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = xor <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3
@@ -474,7 +474,7 @@ define void @xor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @xor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: xor_v4i32_i:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = xor <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3
@@ -489,7 +489,7 @@ define void @xor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @xor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: xor_v2i64_i:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = xor <2 x i64> %1, <i64 3, i64 3>
; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3
@@ -504,9 +504,9 @@ define void @xor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @sll_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: sll_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = shl <16 x i8> %1, %2
; CHECK-DAG: sll.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -520,9 +520,9 @@ define void @sll_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @sll_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: sll_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = shl <8 x i16> %1, %2
; CHECK-DAG: sll.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -536,9 +536,9 @@ define void @sll_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @sll_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: sll_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = shl <4 x i32> %1, %2
; CHECK-DAG: sll.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -552,9 +552,9 @@ define void @sll_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @sll_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: sll_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = shl <2 x i64> %1, %2
; CHECK-DAG: sll.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -568,7 +568,7 @@ define void @sll_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @sll_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: sll_v16i8_i:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = shl <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-DAG: slli.b [[R4:\$w[0-9]+]], [[R1]], 1
@@ -582,7 +582,7 @@ define void @sll_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @sll_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: sll_v8i16_i:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = shl <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-DAG: slli.h [[R4:\$w[0-9]+]], [[R1]], 1
@@ -596,7 +596,7 @@ define void @sll_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @sll_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: sll_v4i32_i:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
; CHECK-DAG: slli.w [[R4:\$w[0-9]+]], [[R1]], 1
@@ -610,7 +610,7 @@ define void @sll_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @sll_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: sll_v2i64_i:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = shl <2 x i64> %1, <i64 1, i64 1>
; CHECK-DAG: slli.d [[R4:\$w[0-9]+]], [[R1]], 1
@@ -624,9 +624,9 @@ define void @sll_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @sra_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: sra_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = ashr <16 x i8> %1, %2
; CHECK-DAG: sra.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -640,9 +640,9 @@ define void @sra_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @sra_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: sra_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = ashr <8 x i16> %1, %2
; CHECK-DAG: sra.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -656,9 +656,9 @@ define void @sra_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @sra_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: sra_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = ashr <4 x i32> %1, %2
; CHECK-DAG: sra.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -672,9 +672,9 @@ define void @sra_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @sra_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: sra_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = ashr <2 x i64> %1, %2
; CHECK-DAG: sra.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -688,7 +688,7 @@ define void @sra_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @sra_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: sra_v16i8_i:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = ashr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-DAG: srai.b [[R4:\$w[0-9]+]], [[R1]], 1
@@ -702,7 +702,7 @@ define void @sra_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @sra_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: sra_v8i16_i:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = ashr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-DAG: srai.h [[R4:\$w[0-9]+]], [[R1]], 1
@@ -716,7 +716,7 @@ define void @sra_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @sra_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: sra_v4i32_i:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = ashr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
; CHECK-DAG: srai.w [[R4:\$w[0-9]+]], [[R1]], 1
@@ -730,7 +730,7 @@ define void @sra_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @sra_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: sra_v2i64_i:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = ashr <2 x i64> %1, <i64 1, i64 1>
; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R1]], 1
@@ -744,9 +744,9 @@ define void @sra_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @srl_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: srl_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = lshr <16 x i8> %1, %2
; CHECK-DAG: srl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -760,9 +760,9 @@ define void @srl_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @srl_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: srl_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = lshr <8 x i16> %1, %2
; CHECK-DAG: srl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -776,9 +776,9 @@ define void @srl_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @srl_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: srl_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = lshr <4 x i32> %1, %2
; CHECK-DAG: srl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -792,9 +792,9 @@ define void @srl_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @srl_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: srl_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = lshr <2 x i64> %1, %2
; CHECK-DAG: srl.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -808,7 +808,7 @@ define void @srl_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @srl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: srl_v16i8_i:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = lshr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
; CHECK-DAG: srli.b [[R4:\$w[0-9]+]], [[R1]], 1
@@ -822,7 +822,7 @@ define void @srl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @srl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: srl_v8i16_i:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = lshr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
; CHECK-DAG: srli.h [[R4:\$w[0-9]+]], [[R1]], 1
@@ -836,7 +836,7 @@ define void @srl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @srl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: srl_v4i32_i:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = lshr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
; CHECK-DAG: srli.w [[R4:\$w[0-9]+]], [[R1]], 1
@@ -850,7 +850,7 @@ define void @srl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @srl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: srl_v2i64_i:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = lshr <2 x i64> %1, <i64 1, i64 1>
; CHECK-DAG: srli.d [[R4:\$w[0-9]+]], [[R1]], 1
@@ -864,7 +864,7 @@ define void @srl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @ctpop_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: ctpop_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = tail call <16 x i8> @llvm.ctpop.v16i8 (<16 x i8> %1)
; CHECK-DAG: pcnt.b [[R3:\$w[0-9]+]], [[R1]]
@@ -878,7 +878,7 @@ define void @ctpop_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @ctpop_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: ctpop_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = tail call <8 x i16> @llvm.ctpop.v8i16 (<8 x i16> %1)
; CHECK-DAG: pcnt.h [[R3:\$w[0-9]+]], [[R1]]
@@ -892,7 +892,7 @@ define void @ctpop_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @ctpop_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: ctpop_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = tail call <4 x i32> @llvm.ctpop.v4i32 (<4 x i32> %1)
; CHECK-DAG: pcnt.w [[R3:\$w[0-9]+]], [[R1]]
@@ -906,7 +906,7 @@ define void @ctpop_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @ctpop_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: ctpop_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = tail call <2 x i64> @llvm.ctpop.v2i64 (<2 x i64> %1)
; CHECK-DAG: pcnt.d [[R3:\$w[0-9]+]], [[R1]]
@@ -920,7 +920,7 @@ define void @ctpop_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @ctlz_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: ctlz_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = tail call <16 x i8> @llvm.ctlz.v16i8 (<16 x i8> %1)
; CHECK-DAG: nlzc.b [[R3:\$w[0-9]+]], [[R1]]
@@ -934,7 +934,7 @@ define void @ctlz_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @ctlz_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: ctlz_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = tail call <8 x i16> @llvm.ctlz.v8i16 (<8 x i16> %1)
; CHECK-DAG: nlzc.h [[R3:\$w[0-9]+]], [[R1]]
@@ -948,7 +948,7 @@ define void @ctlz_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @ctlz_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: ctlz_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = tail call <4 x i32> @llvm.ctlz.v4i32 (<4 x i32> %1)
; CHECK-DAG: nlzc.w [[R3:\$w[0-9]+]], [[R1]]
@@ -962,7 +962,7 @@ define void @ctlz_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @ctlz_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: ctlz_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = tail call <2 x i64> @llvm.ctlz.v2i64 (<2 x i64> %1)
; CHECK-DAG: nlzc.d [[R3:\$w[0-9]+]], [[R1]]
@@ -976,11 +976,11 @@ define void @ctlz_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @bsel_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %m) nounwind {
; CHECK: bsel_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
- %3 = load <16 x i8>* %m
+ %3 = load <16 x i8>, <16 x i8>* %m
; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
%4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
@@ -1002,9 +1002,9 @@ define void @bsel_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b, <16 x i8>*
define void @bsel_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %m) nounwind {
; CHECK: bsel_v16i8_i:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %m
+ %2 = load <16 x i8>, <16 x i8>* %m
; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($6)
%3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1,
i8 -1, i8 -1, i8 -1, i8 -1,
@@ -1027,9 +1027,9 @@ define void @bsel_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %m) nounwind
define void @bsel_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: bsel_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = and <8 x i16> %1, <i16 6, i16 6, i16 6, i16 6,
i16 6, i16 6, i16 6, i16 6>
@@ -1048,9 +1048,9 @@ define void @bsel_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @bsel_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: bsel_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = and <4 x i32> %1, <i32 6, i32 6, i32 6, i32 6>
%4 = and <4 x i32> %2, <i32 4294967289, i32 4294967289, i32 4294967289, i32 4294967289>
@@ -1067,9 +1067,9 @@ define void @bsel_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @bsel_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: bsel_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = and <2 x i64> %1, <i64 6, i64 6>
%4 = and <2 x i64> %2, <i64 18446744073709551609, i64 18446744073709551609>
@@ -1086,9 +1086,9 @@ define void @bsel_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @binsl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: binsl_v16i8_i:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = and <16 x i8> %1, <i8 192, i8 192, i8 192, i8 192,
i8 192, i8 192, i8 192, i8 192,
@@ -1110,9 +1110,9 @@ define void @binsl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind
define void @binsl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: binsl_v8i16_i:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = and <8 x i16> %1, <i16 49152, i16 49152, i16 49152, i16 49152,
i16 49152, i16 49152, i16 49152, i16 49152>
@@ -1130,9 +1130,9 @@ define void @binsl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind
define void @binsl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: binsl_v4i32_i:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = and <4 x i32> %1, <i32 3221225472, i32 3221225472, i32 3221225472, i32 3221225472>
%4 = and <4 x i32> %2, <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
@@ -1148,9 +1148,9 @@ define void @binsl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind
define void @binsl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: binsl_v2i64_i:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = and <2 x i64> %1, <i64 18446744073709551608, i64 18446744073709551608>
%4 = and <2 x i64> %2, <i64 7, i64 7>
@@ -1170,9 +1170,9 @@ define void @binsl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
define void @binsr_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: binsr_v16i8_i:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = and <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3,
i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1192,9 +1192,9 @@ define void @binsr_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind
define void @binsr_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: binsr_v8i16_i:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = and <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3,
i16 3, i16 3, i16 3, i16 3>
@@ -1212,9 +1212,9 @@ define void @binsr_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind
define void @binsr_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: binsr_v4i32_i:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = and <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
%4 = and <4 x i32> %2, <i32 4294967292, i32 4294967292, i32 4294967292, i32 4294967292>
@@ -1230,9 +1230,9 @@ define void @binsr_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind
define void @binsr_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: binsr_v2i64_i:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = and <2 x i64> %1, <i64 3, i64 3>
%4 = and <2 x i64> %2, <i64 18446744073709551612, i64 18446744073709551612>
@@ -1248,9 +1248,9 @@ define void @binsr_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
define void @bclr_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: bclr_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
%4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -1266,9 +1266,9 @@ define void @bclr_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @bclr_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: bclr_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
%4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
@@ -1284,9 +1284,9 @@ define void @bclr_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @bclr_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: bclr_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
%4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1302,9 +1302,9 @@ define void @bclr_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @bclr_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: bclr_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = shl <2 x i64> <i64 1, i64 1>, %2
%4 = xor <2 x i64> %3, <i64 -1, i64 -1>
@@ -1320,9 +1320,9 @@ define void @bclr_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @bset_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: bset_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
%4 = or <16 x i8> %1, %3
@@ -1337,9 +1337,9 @@ define void @bset_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @bset_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: bset_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
%4 = or <8 x i16> %1, %3
@@ -1354,9 +1354,9 @@ define void @bset_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @bset_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: bset_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
%4 = or <4 x i32> %1, %3
@@ -1371,9 +1371,9 @@ define void @bset_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @bset_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: bset_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = shl <2 x i64> <i64 1, i64 1>, %2
%4 = or <2 x i64> %1, %3
@@ -1388,9 +1388,9 @@ define void @bset_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @bneg_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: bneg_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
%4 = xor <16 x i8> %1, %3
@@ -1405,9 +1405,9 @@ define void @bneg_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @bneg_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: bneg_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
%4 = xor <8 x i16> %1, %3
@@ -1422,9 +1422,9 @@ define void @bneg_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @bneg_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: bneg_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
%4 = xor <4 x i32> %1, %3
@@ -1439,9 +1439,9 @@ define void @bneg_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @bneg_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: bneg_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = shl <2 x i64> <i64 1, i64 1>, %2
%4 = xor <2 x i64> %1, %3
@@ -1456,7 +1456,7 @@ define void @bneg_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @bclri_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: bclri_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = xor <16 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>,
<i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -1473,7 +1473,7 @@ define void @bclri_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @bclri_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: bclri_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = xor <8 x i16> <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>,
<i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
@@ -1489,7 +1489,7 @@ define void @bclri_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @bclri_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: bclri_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = xor <4 x i32> <i32 8, i32 8, i32 8, i32 8>,
<i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1505,7 +1505,7 @@ define void @bclri_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @bclri_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: bclri_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = xor <2 x i64> <i64 8, i64 8>,
<i64 -1, i64 -1>
@@ -1521,7 +1521,7 @@ define void @bclri_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @bseti_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: bseti_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = or <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
; CHECK-DAG: bseti.b [[R3:\$w[0-9]+]], [[R1]], 3
@@ -1535,7 +1535,7 @@ define void @bseti_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @bseti_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: bseti_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = or <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
; CHECK-DAG: bseti.h [[R3:\$w[0-9]+]], [[R1]], 3
@@ -1549,7 +1549,7 @@ define void @bseti_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @bseti_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: bseti_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = or <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
; CHECK-DAG: bseti.w [[R3:\$w[0-9]+]], [[R1]], 3
@@ -1563,7 +1563,7 @@ define void @bseti_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @bseti_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: bseti_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = or <2 x i64> %1, <i64 8, i64 8>
; CHECK-DAG: bseti.d [[R3:\$w[0-9]+]], [[R1]], 3
@@ -1577,7 +1577,7 @@ define void @bseti_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @bnegi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: bnegi_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = xor <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
; CHECK-DAG: bnegi.b [[R3:\$w[0-9]+]], [[R1]], 3
@@ -1591,7 +1591,7 @@ define void @bnegi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @bnegi_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: bnegi_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = xor <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
; CHECK-DAG: bnegi.h [[R3:\$w[0-9]+]], [[R1]], 3
@@ -1605,7 +1605,7 @@ define void @bnegi_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @bnegi_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: bnegi_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = xor <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
; CHECK-DAG: bnegi.w [[R3:\$w[0-9]+]], [[R1]], 3
@@ -1619,7 +1619,7 @@ define void @bnegi_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @bnegi_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: bnegi_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = xor <2 x i64> %1, <i64 8, i64 8>
; CHECK-DAG: bnegi.d [[R3:\$w[0-9]+]], [[R1]], 3
diff --git a/test/CodeGen/Mips/msa/compare.ll b/test/CodeGen/Mips/msa/compare.ll
index 87ca1482da81..bc4f6e7e3946 100644
--- a/test/CodeGen/Mips/msa/compare.ll
+++ b/test/CodeGen/Mips/msa/compare.ll
@@ -4,9 +4,9 @@
define void @ceq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: ceq_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = icmp eq <16 x i8> %1, %2
%4 = sext <16 x i1> %3 to <16 x i8>
@@ -21,9 +21,9 @@ define void @ceq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @ceq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: ceq_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = icmp eq <8 x i16> %1, %2
%4 = sext <8 x i1> %3 to <8 x i16>
@@ -38,9 +38,9 @@ define void @ceq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @ceq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: ceq_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = icmp eq <4 x i32> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
@@ -55,9 +55,9 @@ define void @ceq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @ceq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: ceq_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = icmp eq <2 x i64> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
@@ -72,9 +72,9 @@ define void @ceq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @cle_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: cle_s_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sle <16 x i8> %1, %2
%4 = sext <16 x i1> %3 to <16 x i8>
@@ -89,9 +89,9 @@ define void @cle_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @cle_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: cle_s_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sle <8 x i16> %1, %2
%4 = sext <8 x i1> %3 to <8 x i16>
@@ -106,9 +106,9 @@ define void @cle_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @cle_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: cle_s_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sle <4 x i32> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
@@ -123,9 +123,9 @@ define void @cle_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @cle_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: cle_s_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sle <2 x i64> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
@@ -140,9 +140,9 @@ define void @cle_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @cle_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: cle_u_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ule <16 x i8> %1, %2
%4 = sext <16 x i1> %3 to <16 x i8>
@@ -157,9 +157,9 @@ define void @cle_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @cle_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: cle_u_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ule <8 x i16> %1, %2
%4 = sext <8 x i1> %3 to <8 x i16>
@@ -174,9 +174,9 @@ define void @cle_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @cle_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: cle_u_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ule <4 x i32> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
@@ -191,9 +191,9 @@ define void @cle_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @cle_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: cle_u_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ule <2 x i64> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
@@ -208,9 +208,9 @@ define void @cle_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @clt_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: clt_s_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = icmp slt <16 x i8> %1, %2
%4 = sext <16 x i1> %3 to <16 x i8>
@@ -225,9 +225,9 @@ define void @clt_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @clt_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: clt_s_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = icmp slt <8 x i16> %1, %2
%4 = sext <8 x i1> %3 to <8 x i16>
@@ -242,9 +242,9 @@ define void @clt_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @clt_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: clt_s_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = icmp slt <4 x i32> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
@@ -259,9 +259,9 @@ define void @clt_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @clt_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: clt_s_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = icmp slt <2 x i64> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
@@ -276,9 +276,9 @@ define void @clt_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @clt_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: clt_u_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ult <16 x i8> %1, %2
%4 = sext <16 x i1> %3 to <16 x i8>
@@ -293,9 +293,9 @@ define void @clt_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @clt_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: clt_u_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ult <8 x i16> %1, %2
%4 = sext <8 x i1> %3 to <8 x i16>
@@ -310,9 +310,9 @@ define void @clt_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @clt_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: clt_u_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ult <4 x i32> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
@@ -327,9 +327,9 @@ define void @clt_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @clt_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: clt_u_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ult <2 x i64> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
@@ -345,9 +345,9 @@ define void @clt_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; issues in this area.
define void @cne_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: cne_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ne <16 x i8> %1, %2
%4 = sext <16 x i1> %3 to <16 x i8>
@@ -365,9 +365,9 @@ define void @cne_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @cne_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: cne_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ne <8 x i16> %1, %2
%4 = sext <8 x i1> %3 to <8 x i16>
@@ -387,9 +387,9 @@ define void @cne_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @cne_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: cne_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ne <4 x i32> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
@@ -409,9 +409,9 @@ define void @cne_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @cne_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: cne_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ne <2 x i64> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
@@ -429,7 +429,7 @@ define void @cne_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @ceqi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: ceqi_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = icmp eq <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%3 = sext <16 x i1> %2 to <16 x i8>
@@ -444,7 +444,7 @@ define void @ceqi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @ceqi_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: ceqi_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = icmp eq <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = sext <8 x i1> %2 to <8 x i16>
@@ -459,7 +459,7 @@ define void @ceqi_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @ceqi_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: ceqi_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = icmp eq <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
%3 = sext <4 x i1> %2 to <4 x i32>
@@ -474,7 +474,7 @@ define void @ceqi_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @ceqi_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: ceqi_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = icmp eq <2 x i64> %1, <i64 1, i64 1>
%3 = sext <2 x i1> %2 to <2 x i64>
@@ -489,7 +489,7 @@ define void @ceqi_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @clei_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: clei_s_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = icmp sle <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%3 = sext <16 x i1> %2 to <16 x i8>
@@ -504,7 +504,7 @@ define void @clei_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @clei_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: clei_s_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = icmp sle <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = sext <8 x i1> %2 to <8 x i16>
@@ -519,7 +519,7 @@ define void @clei_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @clei_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: clei_s_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = icmp sle <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
%3 = sext <4 x i1> %2 to <4 x i32>
@@ -534,7 +534,7 @@ define void @clei_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @clei_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: clei_s_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = icmp sle <2 x i64> %1, <i64 1, i64 1>
%3 = sext <2 x i1> %2 to <2 x i64>
@@ -549,7 +549,7 @@ define void @clei_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @clei_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: clei_u_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = icmp ule <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%3 = sext <16 x i1> %2 to <16 x i8>
@@ -564,7 +564,7 @@ define void @clei_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @clei_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: clei_u_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = icmp ule <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = sext <8 x i1> %2 to <8 x i16>
@@ -579,7 +579,7 @@ define void @clei_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @clei_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: clei_u_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = icmp ule <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
%3 = sext <4 x i1> %2 to <4 x i32>
@@ -594,7 +594,7 @@ define void @clei_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @clei_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: clei_u_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = icmp ule <2 x i64> %1, <i64 1, i64 1>
%3 = sext <2 x i1> %2 to <2 x i64>
@@ -609,7 +609,7 @@ define void @clei_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @clti_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: clti_s_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = icmp slt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%3 = sext <16 x i1> %2 to <16 x i8>
@@ -624,7 +624,7 @@ define void @clti_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @clti_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: clti_s_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = icmp slt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = sext <8 x i1> %2 to <8 x i16>
@@ -639,7 +639,7 @@ define void @clti_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @clti_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: clti_s_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = icmp slt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
%3 = sext <4 x i1> %2 to <4 x i32>
@@ -654,7 +654,7 @@ define void @clti_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @clti_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: clti_s_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = icmp slt <2 x i64> %1, <i64 1, i64 1>
%3 = sext <2 x i1> %2 to <2 x i64>
@@ -669,7 +669,7 @@ define void @clti_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @clti_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: clti_u_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = icmp ult <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%3 = sext <16 x i1> %2 to <16 x i8>
@@ -684,7 +684,7 @@ define void @clti_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @clti_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: clti_u_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = icmp ult <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = sext <8 x i1> %2 to <8 x i16>
@@ -699,7 +699,7 @@ define void @clti_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @clti_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: clti_u_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = icmp ult <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
%3 = sext <4 x i1> %2 to <4 x i32>
@@ -714,7 +714,7 @@ define void @clti_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @clti_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: clti_u_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = icmp ult <2 x i64> %1, <i64 1, i64 1>
%3 = sext <2 x i1> %2 to <2 x i64>
@@ -730,11 +730,11 @@ define void @bsel_s_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
<16 x i8>* %c) nounwind {
; CHECK: bsel_s_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
- %3 = load <16 x i8>* %c
+ %3 = load <16 x i8>, <16 x i8>* %c
; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
%4 = icmp sgt <16 x i8> %1, %2
; CHECK-DAG: clt_s.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -752,11 +752,11 @@ define void @bsel_s_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
<8 x i16>* %c) nounwind {
; CHECK: bsel_s_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
- %3 = load <8 x i16>* %c
+ %3 = load <8 x i16>, <8 x i16>* %c
; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
%4 = icmp sgt <8 x i16> %1, %2
; CHECK-DAG: clt_s.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -774,11 +774,11 @@ define void @bsel_s_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
<4 x i32>* %c) nounwind {
; CHECK: bsel_s_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
- %3 = load <4 x i32>* %c
+ %3 = load <4 x i32>, <4 x i32>* %c
; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
%4 = icmp sgt <4 x i32> %1, %2
; CHECK-DAG: clt_s.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -796,11 +796,11 @@ define void @bsel_s_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
<2 x i64>* %c) nounwind {
; CHECK: bsel_s_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
- %3 = load <2 x i64>* %c
+ %3 = load <2 x i64>, <2 x i64>* %c
; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
%4 = icmp sgt <2 x i64> %1, %2
; CHECK-DAG: clt_s.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -818,11 +818,11 @@ define void @bsel_u_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
<16 x i8>* %c) nounwind {
; CHECK: bsel_u_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
- %3 = load <16 x i8>* %c
+ %3 = load <16 x i8>, <16 x i8>* %c
; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
%4 = icmp ugt <16 x i8> %1, %2
; CHECK-DAG: clt_u.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -840,11 +840,11 @@ define void @bsel_u_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
<8 x i16>* %c) nounwind {
; CHECK: bsel_u_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
- %3 = load <8 x i16>* %c
+ %3 = load <8 x i16>, <8 x i16>* %c
; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
%4 = icmp ugt <8 x i16> %1, %2
; CHECK-DAG: clt_u.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -862,11 +862,11 @@ define void @bsel_u_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
<4 x i32>* %c) nounwind {
; CHECK: bsel_u_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
- %3 = load <4 x i32>* %c
+ %3 = load <4 x i32>, <4 x i32>* %c
; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
%4 = icmp ugt <4 x i32> %1, %2
; CHECK-DAG: clt_u.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -884,11 +884,11 @@ define void @bsel_u_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
<2 x i64>* %c) nounwind {
; CHECK: bsel_u_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
- %3 = load <2 x i64>* %c
+ %3 = load <2 x i64>, <2 x i64>* %c
; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
%4 = icmp ugt <2 x i64> %1, %2
; CHECK-DAG: clt_u.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -906,9 +906,9 @@ define void @bseli_s_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
<16 x i8>* %c) nounwind {
; CHECK: bseli_s_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sgt <16 x i8> %1, %2
; CHECK-DAG: clt_s.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -925,9 +925,9 @@ define void @bseli_s_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
<8 x i16>* %c) nounwind {
; CHECK: bseli_s_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sgt <8 x i16> %1, %2
; CHECK-DAG: clt_s.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -945,9 +945,9 @@ define void @bseli_s_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
<4 x i32>* %c) nounwind {
; CHECK: bseli_s_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sgt <4 x i32> %1, %2
; CHECK-DAG: clt_s.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -965,9 +965,9 @@ define void @bseli_s_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
<2 x i64>* %c) nounwind {
; CHECK: bseli_s_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sgt <2 x i64> %1, %2
; CHECK-DAG: clt_s.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -985,9 +985,9 @@ define void @bseli_u_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
<16 x i8>* %c) nounwind {
; CHECK: bseli_u_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ugt <16 x i8> %1, %2
; CHECK-DAG: clt_u.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -1004,9 +1004,9 @@ define void @bseli_u_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
<8 x i16>* %c) nounwind {
; CHECK: bseli_u_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ugt <8 x i16> %1, %2
; CHECK-DAG: clt_u.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -1024,9 +1024,9 @@ define void @bseli_u_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
<4 x i32>* %c) nounwind {
; CHECK: bseli_u_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ugt <4 x i32> %1, %2
; CHECK-DAG: clt_u.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -1044,9 +1044,9 @@ define void @bseli_u_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
<2 x i64>* %c) nounwind {
; CHECK: bseli_u_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ugt <2 x i64> %1, %2
; CHECK-DAG: clt_u.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -1063,9 +1063,9 @@ define void @bseli_u_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
define void @max_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: max_s_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sgt <16 x i8> %1, %2
%4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
@@ -1080,9 +1080,9 @@ define void @max_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @max_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: max_s_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sgt <8 x i16> %1, %2
%4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
@@ -1097,9 +1097,9 @@ define void @max_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @max_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: max_s_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sgt <4 x i32> %1, %2
%4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
@@ -1114,9 +1114,9 @@ define void @max_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @max_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: max_s_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sgt <2 x i64> %1, %2
%4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
@@ -1131,9 +1131,9 @@ define void @max_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @max_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: max_u_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ugt <16 x i8> %1, %2
%4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
@@ -1148,9 +1148,9 @@ define void @max_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @max_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: max_u_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ugt <8 x i16> %1, %2
%4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
@@ -1165,9 +1165,9 @@ define void @max_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @max_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: max_u_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ugt <4 x i32> %1, %2
%4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
@@ -1182,9 +1182,9 @@ define void @max_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @max_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: max_u_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ugt <2 x i64> %1, %2
%4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
@@ -1199,9 +1199,9 @@ define void @max_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @max_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: max_s_eq_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sge <16 x i8> %1, %2
%4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
@@ -1216,9 +1216,9 @@ define void @max_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwin
define void @max_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: max_s_eq_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sge <8 x i16> %1, %2
%4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
@@ -1233,9 +1233,9 @@ define void @max_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwin
define void @max_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: max_s_eq_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sge <4 x i32> %1, %2
%4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
@@ -1250,9 +1250,9 @@ define void @max_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwin
define void @max_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: max_s_eq_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sge <2 x i64> %1, %2
%4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
@@ -1267,9 +1267,9 @@ define void @max_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwin
define void @max_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: max_u_eq_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = icmp uge <16 x i8> %1, %2
%4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
@@ -1284,9 +1284,9 @@ define void @max_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwin
define void @max_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: max_u_eq_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = icmp uge <8 x i16> %1, %2
%4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
@@ -1301,9 +1301,9 @@ define void @max_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwin
define void @max_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: max_u_eq_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = icmp uge <4 x i32> %1, %2
%4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
@@ -1318,9 +1318,9 @@ define void @max_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwin
define void @max_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: max_u_eq_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = icmp uge <2 x i64> %1, %2
%4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
@@ -1335,7 +1335,7 @@ define void @max_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwin
define void @maxi_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: maxi_s_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = icmp sgt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -1350,7 +1350,7 @@ define void @maxi_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @maxi_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: maxi_s_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = icmp sgt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1365,7 +1365,7 @@ define void @maxi_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @maxi_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: maxi_s_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = icmp sgt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
%3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1380,7 +1380,7 @@ define void @maxi_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @maxi_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: maxi_s_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = icmp sgt <2 x i64> %1, <i64 1, i64 1>
%3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
@@ -1395,7 +1395,7 @@ define void @maxi_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @maxi_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: maxi_u_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = icmp ugt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -1410,7 +1410,7 @@ define void @maxi_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @maxi_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: maxi_u_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = icmp ugt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1425,7 +1425,7 @@ define void @maxi_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @maxi_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: maxi_u_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = icmp ugt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
%3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1440,7 +1440,7 @@ define void @maxi_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @maxi_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: maxi_u_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = icmp ugt <2 x i64> %1, <i64 1, i64 1>
%3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
@@ -1455,7 +1455,7 @@ define void @maxi_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @maxi_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: maxi_s_eq_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = icmp sge <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -1470,7 +1470,7 @@ define void @maxi_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @maxi_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: maxi_s_eq_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = icmp sge <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1485,7 +1485,7 @@ define void @maxi_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @maxi_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: maxi_s_eq_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = icmp sge <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
%3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1500,7 +1500,7 @@ define void @maxi_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @maxi_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: maxi_s_eq_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = icmp sge <2 x i64> %1, <i64 1, i64 1>
%3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
@@ -1515,7 +1515,7 @@ define void @maxi_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @maxi_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: maxi_u_eq_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = icmp uge <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -1530,7 +1530,7 @@ define void @maxi_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @maxi_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: maxi_u_eq_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = icmp uge <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1545,7 +1545,7 @@ define void @maxi_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @maxi_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: maxi_u_eq_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = icmp uge <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
%3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1560,7 +1560,7 @@ define void @maxi_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @maxi_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: maxi_u_eq_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = icmp uge <2 x i64> %1, <i64 1, i64 1>
%3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
@@ -1575,9 +1575,9 @@ define void @maxi_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @min_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: min_s_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sle <16 x i8> %1, %2
%4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
@@ -1592,9 +1592,9 @@ define void @min_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @min_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: min_s_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = icmp slt <8 x i16> %1, %2
%4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
@@ -1609,9 +1609,9 @@ define void @min_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @min_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: min_s_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = icmp slt <4 x i32> %1, %2
%4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
@@ -1626,9 +1626,9 @@ define void @min_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @min_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: min_s_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = icmp slt <2 x i64> %1, %2
%4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
@@ -1643,9 +1643,9 @@ define void @min_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @min_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: min_u_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ult <16 x i8> %1, %2
%4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
@@ -1660,9 +1660,9 @@ define void @min_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
define void @min_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: min_u_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ult <8 x i16> %1, %2
%4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
@@ -1677,9 +1677,9 @@ define void @min_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
define void @min_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: min_u_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ult <4 x i32> %1, %2
%4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
@@ -1694,9 +1694,9 @@ define void @min_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
define void @min_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: min_u_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ult <2 x i64> %1, %2
%4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
@@ -1711,9 +1711,9 @@ define void @min_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
define void @min_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: min_s_eq_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sle <16 x i8> %1, %2
%4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
@@ -1728,9 +1728,9 @@ define void @min_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwin
define void @min_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: min_s_eq_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sle <8 x i16> %1, %2
%4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
@@ -1745,9 +1745,9 @@ define void @min_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwin
define void @min_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: min_s_eq_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sle <4 x i32> %1, %2
%4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
@@ -1762,9 +1762,9 @@ define void @min_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwin
define void @min_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: min_s_eq_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = icmp sle <2 x i64> %1, %2
%4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
@@ -1779,9 +1779,9 @@ define void @min_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwin
define void @min_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK: min_u_eq_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ule <16 x i8> %1, %2
%4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
@@ -1796,9 +1796,9 @@ define void @min_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwin
define void @min_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: min_u_eq_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ule <8 x i16> %1, %2
%4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
@@ -1813,9 +1813,9 @@ define void @min_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwin
define void @min_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK: min_u_eq_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ule <4 x i32> %1, %2
%4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
@@ -1830,9 +1830,9 @@ define void @min_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwin
define void @min_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
; CHECK: min_u_eq_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = icmp ule <2 x i64> %1, %2
%4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
@@ -1847,7 +1847,7 @@ define void @min_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwin
define void @mini_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: mini_s_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = icmp slt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -1862,7 +1862,7 @@ define void @mini_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @mini_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: mini_s_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = icmp slt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1877,7 +1877,7 @@ define void @mini_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @mini_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: mini_s_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = icmp slt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
%3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1892,7 +1892,7 @@ define void @mini_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @mini_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: mini_s_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = icmp slt <2 x i64> %1, <i64 1, i64 1>
%3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
@@ -1907,7 +1907,7 @@ define void @mini_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @mini_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: mini_u_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = icmp ult <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -1922,7 +1922,7 @@ define void @mini_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @mini_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: mini_u_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = icmp ult <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1937,7 +1937,7 @@ define void @mini_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @mini_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: mini_u_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = icmp ult <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
%3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1952,7 +1952,7 @@ define void @mini_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @mini_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: mini_u_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = icmp ult <2 x i64> %1, <i64 1, i64 1>
%3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
@@ -1967,7 +1967,7 @@ define void @mini_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @mini_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: mini_s_eq_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = icmp sle <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -1982,7 +1982,7 @@ define void @mini_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @mini_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: mini_s_eq_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = icmp sle <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1997,7 +1997,7 @@ define void @mini_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @mini_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: mini_s_eq_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = icmp sle <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
%3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -2012,7 +2012,7 @@ define void @mini_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @mini_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: mini_s_eq_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = icmp sle <2 x i64> %1, <i64 1, i64 1>
%3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
@@ -2027,7 +2027,7 @@ define void @mini_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
define void @mini_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK: mini_u_eq_v16i8:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = icmp ule <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -2042,7 +2042,7 @@ define void @mini_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
define void @mini_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK: mini_u_eq_v8i16:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = icmp ule <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -2057,7 +2057,7 @@ define void @mini_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
define void @mini_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
; CHECK: mini_u_eq_v4i32:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = icmp ule <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
%3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -2072,7 +2072,7 @@ define void @mini_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
define void @mini_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: mini_u_eq_v2i64:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = icmp ule <2 x i64> %1, <i64 1, i64 1>
%3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
diff --git a/test/CodeGen/Mips/msa/compare_float.ll b/test/CodeGen/Mips/msa/compare_float.ll
index e93221b93612..3229d027d95d 100644
--- a/test/CodeGen/Mips/msa/compare_float.ll
+++ b/test/CodeGen/Mips/msa/compare_float.ll
@@ -9,8 +9,8 @@ declare <2 x double> @llvm.mips.fmin.d(<2 x double>, <2 x double>) nounwind
define void @false_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: false_v4f32:
- %1 = load <4 x float>* %a
- %2 = load <4 x float>* %b
+ %1 = load <4 x float>, <4 x float>* %a
+ %2 = load <4 x float>, <4 x float>* %b
%3 = fcmp false <4 x float> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
store <4 x i32> %4, <4 x i32>* %c
@@ -25,8 +25,8 @@ define void @false_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwi
define void @false_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: false_v2f64:
- %1 = load <2 x double>* %a
- %2 = load <2 x double>* %b
+ %1 = load <2 x double>, <2 x double>* %a
+ %2 = load <2 x double>, <2 x double>* %b
%3 = fcmp false <2 x double> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
store <2 x i64> %4, <2 x i64>* %c
@@ -41,9 +41,9 @@ define void @false_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) noun
define void @oeq_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: oeq_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp oeq <4 x float> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
@@ -58,9 +58,9 @@ define void @oeq_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind
define void @oeq_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: oeq_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp oeq <2 x double> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
@@ -75,9 +75,9 @@ define void @oeq_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwi
define void @oge_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: oge_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp oge <4 x float> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
@@ -92,9 +92,9 @@ define void @oge_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind
define void @oge_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: oge_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp oge <2 x double> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
@@ -109,9 +109,9 @@ define void @oge_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwi
define void @ogt_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: ogt_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp ogt <4 x float> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
@@ -126,9 +126,9 @@ define void @ogt_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind
define void @ogt_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: ogt_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp ogt <2 x double> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
@@ -143,9 +143,9 @@ define void @ogt_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwi
define void @ole_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: ole_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp ole <4 x float> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
@@ -160,9 +160,9 @@ define void @ole_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind
define void @ole_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: ole_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp ole <2 x double> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
@@ -177,9 +177,9 @@ define void @ole_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwi
define void @olt_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: olt_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp olt <4 x float> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
@@ -194,9 +194,9 @@ define void @olt_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind
define void @olt_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: olt_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp olt <2 x double> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
@@ -211,9 +211,9 @@ define void @olt_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwi
define void @one_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: one_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp one <4 x float> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
@@ -228,9 +228,9 @@ define void @one_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind
define void @one_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: one_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp one <2 x double> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
@@ -245,9 +245,9 @@ define void @one_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwi
define void @ord_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: ord_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp ord <4 x float> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
@@ -262,9 +262,9 @@ define void @ord_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind
define void @ord_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: ord_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp ord <2 x double> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
@@ -279,9 +279,9 @@ define void @ord_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwi
define void @ueq_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: ueq_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp ueq <4 x float> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
@@ -296,9 +296,9 @@ define void @ueq_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind
define void @ueq_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: ueq_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp ueq <2 x double> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
@@ -313,9 +313,9 @@ define void @ueq_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwi
define void @uge_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: uge_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp uge <4 x float> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
@@ -330,9 +330,9 @@ define void @uge_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind
define void @uge_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: uge_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp uge <2 x double> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
@@ -347,9 +347,9 @@ define void @uge_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwi
define void @ugt_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: ugt_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp ugt <4 x float> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
@@ -364,9 +364,9 @@ define void @ugt_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind
define void @ugt_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: ugt_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp ugt <2 x double> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
@@ -381,9 +381,9 @@ define void @ugt_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwi
define void @ule_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: ule_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp ule <4 x float> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
@@ -398,9 +398,9 @@ define void @ule_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind
define void @ule_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: ule_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp ule <2 x double> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
@@ -415,9 +415,9 @@ define void @ule_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwi
define void @ult_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: ult_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp ult <4 x float> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
@@ -432,9 +432,9 @@ define void @ult_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind
define void @ult_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: ult_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp ult <2 x double> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
@@ -449,9 +449,9 @@ define void @ult_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwi
define void @uno_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: uno_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp uno <4 x float> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
@@ -466,9 +466,9 @@ define void @uno_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind
define void @uno_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: uno_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp uno <2 x double> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
@@ -483,8 +483,8 @@ define void @uno_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwi
define void @true_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: true_v4f32:
- %1 = load <4 x float>* %a
- %2 = load <4 x float>* %b
+ %1 = load <4 x float>, <4 x float>* %a
+ %2 = load <4 x float>, <4 x float>* %b
%3 = fcmp true <4 x float> %1, %2
%4 = sext <4 x i1> %3 to <4 x i32>
store <4 x i32> %4, <4 x i32>* %c
@@ -499,8 +499,8 @@ define void @true_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwin
define void @true_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: true_v2f64:
- %1 = load <2 x double>* %a
- %2 = load <2 x double>* %b
+ %1 = load <2 x double>, <2 x double>* %a
+ %2 = load <2 x double>, <2 x double>* %b
%3 = fcmp true <2 x double> %1, %2
%4 = sext <2 x i1> %3 to <2 x i64>
store <2 x i64> %4, <2 x i64>* %c
@@ -516,11 +516,11 @@ define void @bsel_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
<4 x float>* %c) nounwind {
; CHECK: bsel_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
- %3 = load <4 x float>* %c
+ %3 = load <4 x float>, <4 x float>* %c
; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
%4 = fcmp ogt <4 x float> %1, %2
; CHECK-DAG: fclt.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -538,11 +538,11 @@ define void @bsel_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
<2 x double>* %c) nounwind {
; CHECK: bsel_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
- %3 = load <2 x double>* %c
+ %3 = load <2 x double>, <2 x double>* %c
; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
%4 = fcmp ogt <2 x double> %1, %2
; CHECK-DAG: fclt.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -560,9 +560,9 @@ define void @bseli_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
<4 x float>* %c) nounwind {
; CHECK: bseli_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp ogt <4 x float> %1, %2
; CHECK-DAG: fclt.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -580,9 +580,9 @@ define void @bseli_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
<2 x double>* %c) nounwind {
; CHECK: bseli_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = fcmp ogt <2 x double> %1, %2
; CHECK-DAG: fclt.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -599,9 +599,9 @@ define void @bseli_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
define void @max_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: max_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = tail call <4 x float> @llvm.mips.fmax.w(<4 x float> %1, <4 x float> %2)
; CHECK-DAG: fmax.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -615,9 +615,9 @@ define void @max_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwi
define void @max_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: max_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = tail call <2 x double> @llvm.mips.fmax.d(<2 x double> %1, <2 x double> %2)
; CHECK-DAG: fmax.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -631,9 +631,9 @@ define void @max_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nou
define void @min_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
; CHECK: min_v4f32:
- %1 = load <4 x float>* %a
+ %1 = load <4 x float>, <4 x float>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x float>* %b
+ %2 = load <4 x float>, <4 x float>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = tail call <4 x float> @llvm.mips.fmin.w(<4 x float> %1, <4 x float> %2)
; CHECK-DAG: fmin.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -647,9 +647,9 @@ define void @min_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwi
define void @min_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
; CHECK: min_v2f64:
- %1 = load <2 x double>* %a
+ %1 = load <2 x double>, <2 x double>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x double>* %b
+ %2 = load <2 x double>, <2 x double>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = tail call <2 x double> @llvm.mips.fmin.d(<2 x double> %1, <2 x double> %2)
; CHECK-DAG: fmin.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
diff --git a/test/CodeGen/Mips/msa/elm_copy.ll b/test/CodeGen/Mips/msa/elm_copy.ll
index 0dd75fa3db12..2a0d74f44524 100644
--- a/test/CodeGen/Mips/msa/elm_copy.ll
+++ b/test/CodeGen/Mips/msa/elm_copy.ll
@@ -15,7 +15,7 @@
define void @llvm_mips_copy_s_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_copy_s_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_copy_s_b_ARG1
%1 = tail call i32 @llvm.mips.copy.s.b(<16 x i8> %0, i32 1)
store i32 %1, i32* @llvm_mips_copy_s_b_RES
ret void
@@ -38,7 +38,7 @@ declare i32 @llvm.mips.copy.s.b(<16 x i8>, i32) nounwind
define void @llvm_mips_copy_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_copy_s_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_copy_s_h_ARG1
%1 = tail call i32 @llvm.mips.copy.s.h(<8 x i16> %0, i32 1)
store i32 %1, i32* @llvm_mips_copy_s_h_RES
ret void
@@ -61,7 +61,7 @@ declare i32 @llvm.mips.copy.s.h(<8 x i16>, i32) nounwind
define void @llvm_mips_copy_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_copy_s_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_copy_s_w_ARG1
%1 = tail call i32 @llvm.mips.copy.s.w(<4 x i32> %0, i32 1)
store i32 %1, i32* @llvm_mips_copy_s_w_RES
ret void
@@ -84,7 +84,7 @@ declare i32 @llvm.mips.copy.s.w(<4 x i32>, i32) nounwind
define void @llvm_mips_copy_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_copy_s_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_copy_s_d_ARG1
%1 = tail call i64 @llvm.mips.copy.s.d(<2 x i64> %0, i32 1)
store i64 %1, i64* @llvm_mips_copy_s_d_RES
ret void
@@ -112,7 +112,7 @@ declare i64 @llvm.mips.copy.s.d(<2 x i64>, i32) nounwind
define void @llvm_mips_copy_u_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_copy_u_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_copy_u_b_ARG1
%1 = tail call i32 @llvm.mips.copy.u.b(<16 x i8> %0, i32 1)
store i32 %1, i32* @llvm_mips_copy_u_b_RES
ret void
@@ -135,7 +135,7 @@ declare i32 @llvm.mips.copy.u.b(<16 x i8>, i32) nounwind
define void @llvm_mips_copy_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_copy_u_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_copy_u_h_ARG1
%1 = tail call i32 @llvm.mips.copy.u.h(<8 x i16> %0, i32 1)
store i32 %1, i32* @llvm_mips_copy_u_h_RES
ret void
@@ -158,7 +158,7 @@ declare i32 @llvm.mips.copy.u.h(<8 x i16>, i32) nounwind
define void @llvm_mips_copy_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_copy_u_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_copy_u_w_ARG1
%1 = tail call i32 @llvm.mips.copy.u.w(<4 x i32> %0, i32 1)
store i32 %1, i32* @llvm_mips_copy_u_w_RES
ret void
@@ -181,7 +181,7 @@ declare i32 @llvm.mips.copy.u.w(<4 x i32>, i32) nounwind
define void @llvm_mips_copy_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_copy_u_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_copy_u_d_ARG1
%1 = tail call i64 @llvm.mips.copy.u.d(<2 x i64> %0, i32 1)
store i64 %1, i64* @llvm_mips_copy_u_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/elm_insv.ll b/test/CodeGen/Mips/msa/elm_insv.ll
index c746e523def6..46e6289189df 100644
--- a/test/CodeGen/Mips/msa/elm_insv.ll
+++ b/test/CodeGen/Mips/msa/elm_insv.ll
@@ -16,8 +16,8 @@
define void @llvm_mips_insert_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_insert_b_ARG1
- %1 = load i32* @llvm_mips_insert_b_ARG3
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_insert_b_ARG1
+ %1 = load i32, i32* @llvm_mips_insert_b_ARG3
%2 = tail call <16 x i8> @llvm.mips.insert.b(<16 x i8> %0, i32 1, i32 %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_insert_b_RES
ret void
@@ -38,8 +38,8 @@ declare <16 x i8> @llvm.mips.insert.b(<16 x i8>, i32, i32) nounwind
define void @llvm_mips_insert_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_insert_h_ARG1
- %1 = load i32* @llvm_mips_insert_h_ARG3
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_insert_h_ARG1
+ %1 = load i32, i32* @llvm_mips_insert_h_ARG3
%2 = tail call <8 x i16> @llvm.mips.insert.h(<8 x i16> %0, i32 1, i32 %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_insert_h_RES
ret void
@@ -60,8 +60,8 @@ declare <8 x i16> @llvm.mips.insert.h(<8 x i16>, i32, i32) nounwind
define void @llvm_mips_insert_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_insert_w_ARG1
- %1 = load i32* @llvm_mips_insert_w_ARG3
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_insert_w_ARG1
+ %1 = load i32, i32* @llvm_mips_insert_w_ARG3
%2 = tail call <4 x i32> @llvm.mips.insert.w(<4 x i32> %0, i32 1, i32 %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_insert_w_RES
ret void
@@ -82,8 +82,8 @@ declare <4 x i32> @llvm.mips.insert.w(<4 x i32>, i32, i32) nounwind
define void @llvm_mips_insert_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_insert_d_ARG1
- %1 = load i64* @llvm_mips_insert_d_ARG3
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_insert_d_ARG1
+ %1 = load i64, i64* @llvm_mips_insert_d_ARG3
%2 = tail call <2 x i64> @llvm.mips.insert.d(<2 x i64> %0, i32 1, i64 %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_insert_d_RES
ret void
@@ -110,8 +110,8 @@ declare <2 x i64> @llvm.mips.insert.d(<2 x i64>, i32, i64) nounwind
define void @llvm_mips_insve_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_insve_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_insve_b_ARG3
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_insve_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_insve_b_ARG3
%2 = tail call <16 x i8> @llvm.mips.insve.b(<16 x i8> %0, i32 1, <16 x i8> %1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_insve_b_RES
ret void
@@ -136,8 +136,8 @@ declare <16 x i8> @llvm.mips.insve.b(<16 x i8>, i32, <16 x i8>) nounwind
define void @llvm_mips_insve_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_insve_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_insve_h_ARG3
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_insve_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_insve_h_ARG3
%2 = tail call <8 x i16> @llvm.mips.insve.h(<8 x i16> %0, i32 1, <8 x i16> %1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_insve_h_RES
ret void
@@ -162,8 +162,8 @@ declare <8 x i16> @llvm.mips.insve.h(<8 x i16>, i32, <8 x i16>) nounwind
define void @llvm_mips_insve_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_insve_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_insve_w_ARG3
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_insve_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_insve_w_ARG3
%2 = tail call <4 x i32> @llvm.mips.insve.w(<4 x i32> %0, i32 1, <4 x i32> %1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_insve_w_RES
ret void
@@ -188,8 +188,8 @@ declare <4 x i32> @llvm.mips.insve.w(<4 x i32>, i32, <4 x i32>) nounwind
define void @llvm_mips_insve_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_insve_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_insve_d_ARG3
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_insve_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_insve_d_ARG3
%2 = tail call <2 x i64> @llvm.mips.insve.d(<2 x i64> %0, i32 1, <2 x i64> %1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_insve_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/elm_move.ll b/test/CodeGen/Mips/msa/elm_move.ll
index 98c06c732c36..9665b6d688f3 100644
--- a/test/CodeGen/Mips/msa/elm_move.ll
+++ b/test/CodeGen/Mips/msa/elm_move.ll
@@ -9,7 +9,7 @@
define void @llvm_mips_move_vb_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_move_vb_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_move_vb_ARG1
%1 = tail call <16 x i8> @llvm.mips.move.v(<16 x i8> %0)
store <16 x i8> %1, <16 x i8>* @llvm_mips_move_vb_RES
ret void
diff --git a/test/CodeGen/Mips/msa/elm_shift_slide.ll b/test/CodeGen/Mips/msa/elm_shift_slide.ll
index 00a6544b1207..87f15f1a8c92 100644
--- a/test/CodeGen/Mips/msa/elm_shift_slide.ll
+++ b/test/CodeGen/Mips/msa/elm_shift_slide.ll
@@ -10,8 +10,8 @@
define void @llvm_mips_sldi_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_sldi_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_sldi_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_sldi_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_sldi_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.sldi.b(<16 x i8> %0, <16 x i8> %1, i32 1)
store <16 x i8> %2, <16 x i8>* @llvm_mips_sldi_b_RES
ret void
@@ -31,8 +31,8 @@ declare <16 x i8> @llvm.mips.sldi.b(<16 x i8>, <16 x i8>, i32) nounwind
define void @llvm_mips_sldi_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_sldi_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_sldi_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_sldi_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_sldi_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.sldi.h(<8 x i16> %0, <8 x i16> %1, i32 1)
store <8 x i16> %2, <8 x i16>* @llvm_mips_sldi_h_RES
ret void
@@ -52,8 +52,8 @@ declare <8 x i16> @llvm.mips.sldi.h(<8 x i16>, <8 x i16>, i32) nounwind
define void @llvm_mips_sldi_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_sldi_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_sldi_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_sldi_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_sldi_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.sldi.w(<4 x i32> %0, <4 x i32> %1, i32 1)
store <4 x i32> %2, <4 x i32>* @llvm_mips_sldi_w_RES
ret void
@@ -73,8 +73,8 @@ declare <4 x i32> @llvm.mips.sldi.w(<4 x i32>, <4 x i32>, i32) nounwind
define void @llvm_mips_sldi_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_sldi_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_sldi_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_sldi_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_sldi_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.sldi.d(<2 x i64> %0, <2 x i64> %1, i32 1)
store <2 x i64> %2, <2 x i64>* @llvm_mips_sldi_d_RES
ret void
@@ -93,7 +93,7 @@ declare <2 x i64> @llvm.mips.sldi.d(<2 x i64>, <2 x i64>, i32) nounwind
define void @llvm_mips_splati_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_splati_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_splati_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.splati.b(<16 x i8> %0, i32 1)
store <16 x i8> %1, <16 x i8>* @llvm_mips_splati_b_RES
ret void
@@ -112,7 +112,7 @@ declare <16 x i8> @llvm.mips.splati.b(<16 x i8>, i32) nounwind
define void @llvm_mips_splati_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_splati_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_splati_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.splati.h(<8 x i16> %0, i32 1)
store <8 x i16> %1, <8 x i16>* @llvm_mips_splati_h_RES
ret void
@@ -131,7 +131,7 @@ declare <8 x i16> @llvm.mips.splati.h(<8 x i16>, i32) nounwind
define void @llvm_mips_splati_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_splati_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_splati_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.splati.w(<4 x i32> %0, i32 1)
store <4 x i32> %1, <4 x i32>* @llvm_mips_splati_w_RES
ret void
@@ -150,7 +150,7 @@ declare <4 x i32> @llvm.mips.splati.w(<4 x i32>, i32) nounwind
define void @llvm_mips_splati_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_splati_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_splati_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.splati.d(<2 x i64> %0, i32 1)
store <2 x i64> %1, <2 x i64>* @llvm_mips_splati_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/frameindex.ll b/test/CodeGen/Mips/msa/frameindex.ll
index ebec465a3e33..afd28ae184dd 100644
--- a/test/CodeGen/Mips/msa/frameindex.ll
+++ b/test/CodeGen/Mips/msa/frameindex.ll
@@ -5,7 +5,7 @@ define void @loadstore_v16i8_near() nounwind {
; MIPS32-AE: loadstore_v16i8_near:
%1 = alloca <16 x i8>
- %2 = load volatile <16 x i8>* %1
+ %2 = load volatile <16 x i8>, <16 x i8>* %1
; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0($sp)
store volatile <16 x i8> %2, <16 x i8>* %1
; MIPS32-AE: st.b [[R1]], 0($sp)
@@ -20,7 +20,7 @@ define void @loadstore_v16i8_just_under_simm10() nounwind {
%1 = alloca <16 x i8>
%2 = alloca [496 x i8] ; Push the frame right up to 512 bytes
- %3 = load volatile <16 x i8>* %1
+ %3 = load volatile <16 x i8>, <16 x i8>* %1
; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 496($sp)
store volatile <16 x i8> %3, <16 x i8>* %1
; MIPS32-AE: st.b [[R1]], 496($sp)
@@ -35,7 +35,7 @@ define void @loadstore_v16i8_just_over_simm10() nounwind {
%1 = alloca <16 x i8>
%2 = alloca [497 x i8] ; Push the frame just over 512 bytes
- %3 = load volatile <16 x i8>* %1
+ %3 = load volatile <16 x i8>, <16 x i8>* %1
; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 512
; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <16 x i8> %3, <16 x i8>* %1
@@ -52,7 +52,7 @@ define void @loadstore_v16i8_just_under_simm16() nounwind {
%1 = alloca <16 x i8>
%2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
- %3 = load volatile <16 x i8>* %1
+ %3 = load volatile <16 x i8>, <16 x i8>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
@@ -71,7 +71,7 @@ define void @loadstore_v16i8_just_over_simm16() nounwind {
%1 = alloca <16 x i8>
%2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
- %3 = load volatile <16 x i8>* %1
+ %3 = load volatile <16 x i8>, <16 x i8>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
@@ -88,7 +88,7 @@ define void @loadstore_v8i16_near() nounwind {
; MIPS32-AE: loadstore_v8i16_near:
%1 = alloca <8 x i16>
- %2 = load volatile <8 x i16>* %1
+ %2 = load volatile <8 x i16>, <8 x i16>* %1
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0($sp)
store volatile <8 x i16> %2, <8 x i16>* %1
; MIPS32-AE: st.h [[R1]], 0($sp)
@@ -102,11 +102,11 @@ define void @loadstore_v8i16_unaligned() nounwind {
%1 = alloca [2 x <8 x i16>]
%2 = bitcast [2 x <8 x i16>]* %1 to i8*
- %3 = getelementptr i8* %2, i32 1
+ %3 = getelementptr i8, i8* %2, i32 1
%4 = bitcast i8* %3 to [2 x <8 x i16>]*
- %5 = getelementptr [2 x <8 x i16>]* %4, i32 0, i32 0
+ %5 = getelementptr [2 x <8 x i16>], [2 x <8 x i16>]* %4, i32 0, i32 0
- %6 = load volatile <8 x i16>* %5
+ %6 = load volatile <8 x i16>, <8 x i16>* %5
; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <8 x i16> %6, <8 x i16>* %5
@@ -123,7 +123,7 @@ define void @loadstore_v8i16_just_under_simm10() nounwind {
%1 = alloca <8 x i16>
%2 = alloca [1008 x i8] ; Push the frame right up to 1024 bytes
- %3 = load volatile <8 x i16>* %1
+ %3 = load volatile <8 x i16>, <8 x i16>* %1
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 1008($sp)
store volatile <8 x i16> %3, <8 x i16>* %1
; MIPS32-AE: st.h [[R1]], 1008($sp)
@@ -138,7 +138,7 @@ define void @loadstore_v8i16_just_over_simm10() nounwind {
%1 = alloca <8 x i16>
%2 = alloca [1009 x i8] ; Push the frame just over 1024 bytes
- %3 = load volatile <8 x i16>* %1
+ %3 = load volatile <8 x i16>, <8 x i16>* %1
; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1024
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <8 x i16> %3, <8 x i16>* %1
@@ -155,7 +155,7 @@ define void @loadstore_v8i16_just_under_simm16() nounwind {
%1 = alloca <8 x i16>
%2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
- %3 = load volatile <8 x i16>* %1
+ %3 = load volatile <8 x i16>, <8 x i16>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
@@ -174,7 +174,7 @@ define void @loadstore_v8i16_just_over_simm16() nounwind {
%1 = alloca <8 x i16>
%2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
- %3 = load volatile <8 x i16>* %1
+ %3 = load volatile <8 x i16>, <8 x i16>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
@@ -191,7 +191,7 @@ define void @loadstore_v4i32_near() nounwind {
; MIPS32-AE: loadstore_v4i32_near:
%1 = alloca <4 x i32>
- %2 = load volatile <4 x i32>* %1
+ %2 = load volatile <4 x i32>, <4 x i32>* %1
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0($sp)
store volatile <4 x i32> %2, <4 x i32>* %1
; MIPS32-AE: st.w [[R1]], 0($sp)
@@ -205,11 +205,11 @@ define void @loadstore_v4i32_unaligned() nounwind {
%1 = alloca [2 x <4 x i32>]
%2 = bitcast [2 x <4 x i32>]* %1 to i8*
- %3 = getelementptr i8* %2, i32 1
+ %3 = getelementptr i8, i8* %2, i32 1
%4 = bitcast i8* %3 to [2 x <4 x i32>]*
- %5 = getelementptr [2 x <4 x i32>]* %4, i32 0, i32 0
+ %5 = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %4, i32 0, i32 0
- %6 = load volatile <4 x i32>* %5
+ %6 = load volatile <4 x i32>, <4 x i32>* %5
; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <4 x i32> %6, <4 x i32>* %5
@@ -226,7 +226,7 @@ define void @loadstore_v4i32_just_under_simm10() nounwind {
%1 = alloca <4 x i32>
%2 = alloca [2032 x i8] ; Push the frame right up to 2048 bytes
- %3 = load volatile <4 x i32>* %1
+ %3 = load volatile <4 x i32>, <4 x i32>* %1
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 2032($sp)
store volatile <4 x i32> %3, <4 x i32>* %1
; MIPS32-AE: st.w [[R1]], 2032($sp)
@@ -241,7 +241,7 @@ define void @loadstore_v4i32_just_over_simm10() nounwind {
%1 = alloca <4 x i32>
%2 = alloca [2033 x i8] ; Push the frame just over 2048 bytes
- %3 = load volatile <4 x i32>* %1
+ %3 = load volatile <4 x i32>, <4 x i32>* %1
; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 2048
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <4 x i32> %3, <4 x i32>* %1
@@ -258,7 +258,7 @@ define void @loadstore_v4i32_just_under_simm16() nounwind {
%1 = alloca <4 x i32>
%2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
- %3 = load volatile <4 x i32>* %1
+ %3 = load volatile <4 x i32>, <4 x i32>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
@@ -277,7 +277,7 @@ define void @loadstore_v4i32_just_over_simm16() nounwind {
%1 = alloca <4 x i32>
%2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
- %3 = load volatile <4 x i32>* %1
+ %3 = load volatile <4 x i32>, <4 x i32>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
@@ -294,7 +294,7 @@ define void @loadstore_v2i64_near() nounwind {
; MIPS32-AE: loadstore_v2i64_near:
%1 = alloca <2 x i64>
- %2 = load volatile <2 x i64>* %1
+ %2 = load volatile <2 x i64>, <2 x i64>* %1
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0($sp)
store volatile <2 x i64> %2, <2 x i64>* %1
; MIPS32-AE: st.d [[R1]], 0($sp)
@@ -308,11 +308,11 @@ define void @loadstore_v2i64_unaligned() nounwind {
%1 = alloca [2 x <2 x i64>]
%2 = bitcast [2 x <2 x i64>]* %1 to i8*
- %3 = getelementptr i8* %2, i32 1
+ %3 = getelementptr i8, i8* %2, i32 1
%4 = bitcast i8* %3 to [2 x <2 x i64>]*
- %5 = getelementptr [2 x <2 x i64>]* %4, i32 0, i32 0
+ %5 = getelementptr [2 x <2 x i64>], [2 x <2 x i64>]* %4, i32 0, i32 0
- %6 = load volatile <2 x i64>* %5
+ %6 = load volatile <2 x i64>, <2 x i64>* %5
; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <2 x i64> %6, <2 x i64>* %5
@@ -329,7 +329,7 @@ define void @loadstore_v2i64_just_under_simm10() nounwind {
%1 = alloca <2 x i64>
%2 = alloca [4080 x i8] ; Push the frame right up to 4096 bytes
- %3 = load volatile <2 x i64>* %1
+ %3 = load volatile <2 x i64>, <2 x i64>* %1
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 4080($sp)
store volatile <2 x i64> %3, <2 x i64>* %1
; MIPS32-AE: st.d [[R1]], 4080($sp)
@@ -344,7 +344,7 @@ define void @loadstore_v2i64_just_over_simm10() nounwind {
%1 = alloca <2 x i64>
%2 = alloca [4081 x i8] ; Push the frame just over 4096 bytes
- %3 = load volatile <2 x i64>* %1
+ %3 = load volatile <2 x i64>, <2 x i64>* %1
; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 4096
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
store volatile <2 x i64> %3, <2 x i64>* %1
@@ -361,7 +361,7 @@ define void @loadstore_v2i64_just_under_simm16() nounwind {
%1 = alloca <2 x i64>
%2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
- %3 = load volatile <2 x i64>* %1
+ %3 = load volatile <2 x i64>, <2 x i64>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
@@ -380,7 +380,7 @@ define void @loadstore_v2i64_just_over_simm16() nounwind {
%1 = alloca <2 x i64>
%2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
- %3 = load volatile <2 x i64>* %1
+ %3 = load volatile <2 x i64>, <2 x i64>* %1
; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
diff --git a/test/CodeGen/Mips/msa/i10.ll b/test/CodeGen/Mips/msa/i10.ll
index c5a96174a734..204884bbf025 100644
--- a/test/CodeGen/Mips/msa/i10.ll
+++ b/test/CodeGen/Mips/msa/i10.ll
@@ -7,7 +7,7 @@
define i32 @llvm_mips_bnz_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_bnz_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bnz_b_ARG1
%1 = tail call i32 @llvm.mips.bnz.b(<16 x i8> %0)
%2 = icmp eq i32 %1, 0
br i1 %2, label %true, label %false
@@ -28,7 +28,7 @@ declare i32 @llvm.mips.bnz.b(<16 x i8>) nounwind
define i32 @llvm_mips_bnz_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_bnz_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bnz_h_ARG1
%1 = tail call i32 @llvm.mips.bnz.h(<8 x i16> %0)
%2 = icmp eq i32 %1, 0
br i1 %2, label %true, label %false
@@ -49,7 +49,7 @@ declare i32 @llvm.mips.bnz.h(<8 x i16>) nounwind
define i32 @llvm_mips_bnz_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_bnz_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bnz_w_ARG1
%1 = tail call i32 @llvm.mips.bnz.w(<4 x i32> %0)
%2 = icmp eq i32 %1, 0
br i1 %2, label %true, label %false
@@ -70,7 +70,7 @@ declare i32 @llvm.mips.bnz.w(<4 x i32>) nounwind
define i32 @llvm_mips_bnz_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_bnz_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bnz_d_ARG1
%1 = tail call i32 @llvm.mips.bnz.d(<2 x i64> %0)
%2 = icmp eq i32 %1, 0
br i1 %2, label %true, label %false
diff --git a/test/CodeGen/Mips/msa/i5-a.ll b/test/CodeGen/Mips/msa/i5-a.ll
index 0b507208f429..f9486b17e0ad 100644
--- a/test/CodeGen/Mips/msa/i5-a.ll
+++ b/test/CodeGen/Mips/msa/i5-a.ll
@@ -9,7 +9,7 @@
define void @llvm_mips_addvi_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_addvi_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_addvi_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.addvi.b(<16 x i8> %0, i32 14)
store <16 x i8> %1, <16 x i8>* @llvm_mips_addvi_b_RES
ret void
@@ -28,7 +28,7 @@ declare <16 x i8> @llvm.mips.addvi.b(<16 x i8>, i32) nounwind
define void @llvm_mips_addvi_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_addvi_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_addvi_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.addvi.h(<8 x i16> %0, i32 14)
store <8 x i16> %1, <8 x i16>* @llvm_mips_addvi_h_RES
ret void
@@ -47,7 +47,7 @@ declare <8 x i16> @llvm.mips.addvi.h(<8 x i16>, i32) nounwind
define void @llvm_mips_addvi_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_addvi_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_addvi_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.addvi.w(<4 x i32> %0, i32 14)
store <4 x i32> %1, <4 x i32>* @llvm_mips_addvi_w_RES
ret void
@@ -66,7 +66,7 @@ declare <4 x i32> @llvm.mips.addvi.w(<4 x i32>, i32) nounwind
define void @llvm_mips_addvi_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_addvi_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_addvi_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.addvi.d(<2 x i64> %0, i32 14)
store <2 x i64> %1, <2 x i64>* @llvm_mips_addvi_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/i5-b.ll b/test/CodeGen/Mips/msa/i5-b.ll
index da6be669f0dd..40ab095f6809 100644
--- a/test/CodeGen/Mips/msa/i5-b.ll
+++ b/test/CodeGen/Mips/msa/i5-b.ll
@@ -9,7 +9,7 @@
define void @llvm_mips_bclri_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_bclri_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bclri_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.bclri.b(<16 x i8> %0, i32 7)
store <16 x i8> %1, <16 x i8>* @llvm_mips_bclri_b_RES
ret void
@@ -29,7 +29,7 @@ declare <16 x i8> @llvm.mips.bclri.b(<16 x i8>, i32) nounwind
define void @llvm_mips_bclri_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_bclri_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bclri_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.bclri.h(<8 x i16> %0, i32 7)
store <8 x i16> %1, <8 x i16>* @llvm_mips_bclri_h_RES
ret void
@@ -48,7 +48,7 @@ declare <8 x i16> @llvm.mips.bclri.h(<8 x i16>, i32) nounwind
define void @llvm_mips_bclri_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_bclri_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bclri_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.bclri.w(<4 x i32> %0, i32 7)
store <4 x i32> %1, <4 x i32>* @llvm_mips_bclri_w_RES
ret void
@@ -67,7 +67,7 @@ declare <4 x i32> @llvm.mips.bclri.w(<4 x i32>, i32) nounwind
define void @llvm_mips_bclri_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_bclri_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bclri_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.bclri.d(<2 x i64> %0, i32 7)
store <2 x i64> %1, <2 x i64>* @llvm_mips_bclri_d_RES
ret void
@@ -87,8 +87,8 @@ declare <2 x i64> @llvm.mips.bclri.d(<2 x i64>, i32) nounwind
define void @llvm_mips_binsli_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_binsli_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_binsli_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_binsli_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_binsli_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.binsli.b(<16 x i8> %0, <16 x i8> %1, i32 7)
store <16 x i8> %2, <16 x i8>* @llvm_mips_binsli_b_RES
ret void
@@ -112,8 +112,8 @@ declare <16 x i8> @llvm.mips.binsli.b(<16 x i8>, <16 x i8>, i32) nounwind
define void @llvm_mips_binsli_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_binsli_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_binsli_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_binsli_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_binsli_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.binsli.h(<8 x i16> %0, <8 x i16> %1, i32 7)
store <8 x i16> %2, <8 x i16>* @llvm_mips_binsli_h_RES
ret void
@@ -137,8 +137,8 @@ declare <8 x i16> @llvm.mips.binsli.h(<8 x i16>, <8 x i16>, i32) nounwind
define void @llvm_mips_binsli_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_binsli_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_binsli_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_binsli_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_binsli_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.binsli.w(<4 x i32> %0, <4 x i32> %1, i32 7)
store <4 x i32> %2, <4 x i32>* @llvm_mips_binsli_w_RES
ret void
@@ -162,8 +162,8 @@ declare <4 x i32> @llvm.mips.binsli.w(<4 x i32>, <4 x i32>, i32) nounwind
define void @llvm_mips_binsli_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_binsli_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_binsli_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_binsli_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_binsli_d_ARG2
; TODO: We use a particularly wide mask here to work around a legalization
; issue. If the mask doesn't fit within a 10-bit immediate, it gets
; legalized into a constant pool. We should add a test to cover the
@@ -191,8 +191,8 @@ declare <2 x i64> @llvm.mips.binsli.d(<2 x i64>, <2 x i64>, i32) nounwind
define void @llvm_mips_binsri_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_binsri_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_binsri_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_binsri_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_binsri_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.binsri.b(<16 x i8> %0, <16 x i8> %1, i32 7)
store <16 x i8> %2, <16 x i8>* @llvm_mips_binsri_b_RES
ret void
@@ -216,8 +216,8 @@ declare <16 x i8> @llvm.mips.binsri.b(<16 x i8>, <16 x i8>, i32) nounwind
define void @llvm_mips_binsri_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_binsri_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_binsri_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_binsri_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_binsri_h_ARG2
%2 = tail call <8 x i16> @llvm.mips.binsri.h(<8 x i16> %0, <8 x i16> %1, i32 7)
store <8 x i16> %2, <8 x i16>* @llvm_mips_binsri_h_RES
ret void
@@ -241,8 +241,8 @@ declare <8 x i16> @llvm.mips.binsri.h(<8 x i16>, <8 x i16>, i32) nounwind
define void @llvm_mips_binsri_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_binsri_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_binsri_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_binsri_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_binsri_w_ARG2
%2 = tail call <4 x i32> @llvm.mips.binsri.w(<4 x i32> %0, <4 x i32> %1, i32 7)
store <4 x i32> %2, <4 x i32>* @llvm_mips_binsri_w_RES
ret void
@@ -266,8 +266,8 @@ declare <4 x i32> @llvm.mips.binsri.w(<4 x i32>, <4 x i32>, i32) nounwind
define void @llvm_mips_binsri_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_binsri_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_binsri_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_binsri_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_binsri_d_ARG2
%2 = tail call <2 x i64> @llvm.mips.binsri.d(<2 x i64> %0, <2 x i64> %1, i32 7)
store <2 x i64> %2, <2 x i64>* @llvm_mips_binsri_d_RES
ret void
@@ -290,7 +290,7 @@ declare <2 x i64> @llvm.mips.binsri.d(<2 x i64>, <2 x i64>, i32) nounwind
define void @llvm_mips_bnegi_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_bnegi_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bnegi_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.bnegi.b(<16 x i8> %0, i32 7)
store <16 x i8> %1, <16 x i8>* @llvm_mips_bnegi_b_RES
ret void
@@ -309,7 +309,7 @@ declare <16 x i8> @llvm.mips.bnegi.b(<16 x i8>, i32) nounwind
define void @llvm_mips_bnegi_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_bnegi_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bnegi_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.bnegi.h(<8 x i16> %0, i32 7)
store <8 x i16> %1, <8 x i16>* @llvm_mips_bnegi_h_RES
ret void
@@ -328,7 +328,7 @@ declare <8 x i16> @llvm.mips.bnegi.h(<8 x i16>, i32) nounwind
define void @llvm_mips_bnegi_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_bnegi_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bnegi_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.bnegi.w(<4 x i32> %0, i32 7)
store <4 x i32> %1, <4 x i32>* @llvm_mips_bnegi_w_RES
ret void
@@ -347,7 +347,7 @@ declare <4 x i32> @llvm.mips.bnegi.w(<4 x i32>, i32) nounwind
define void @llvm_mips_bnegi_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_bnegi_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bnegi_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.bnegi.d(<2 x i64> %0, i32 7)
store <2 x i64> %1, <2 x i64>* @llvm_mips_bnegi_d_RES
ret void
@@ -366,7 +366,7 @@ declare <2 x i64> @llvm.mips.bnegi.d(<2 x i64>, i32) nounwind
define void @llvm_mips_bseti_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_bseti_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bseti_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.bseti.b(<16 x i8> %0, i32 7)
store <16 x i8> %1, <16 x i8>* @llvm_mips_bseti_b_RES
ret void
@@ -385,7 +385,7 @@ declare <16 x i8> @llvm.mips.bseti.b(<16 x i8>, i32) nounwind
define void @llvm_mips_bseti_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_bseti_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bseti_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.bseti.h(<8 x i16> %0, i32 7)
store <8 x i16> %1, <8 x i16>* @llvm_mips_bseti_h_RES
ret void
@@ -404,7 +404,7 @@ declare <8 x i16> @llvm.mips.bseti.h(<8 x i16>, i32) nounwind
define void @llvm_mips_bseti_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_bseti_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bseti_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.bseti.w(<4 x i32> %0, i32 7)
store <4 x i32> %1, <4 x i32>* @llvm_mips_bseti_w_RES
ret void
@@ -423,7 +423,7 @@ declare <4 x i32> @llvm.mips.bseti.w(<4 x i32>, i32) nounwind
define void @llvm_mips_bseti_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_bseti_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bseti_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.bseti.d(<2 x i64> %0, i32 7)
store <2 x i64> %1, <2 x i64>* @llvm_mips_bseti_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/i5-c.ll b/test/CodeGen/Mips/msa/i5-c.ll
index bf1578f30f32..815825013ea1 100644
--- a/test/CodeGen/Mips/msa/i5-c.ll
+++ b/test/CodeGen/Mips/msa/i5-c.ll
@@ -9,7 +9,7 @@
define void @llvm_mips_ceqi_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_ceqi_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_ceqi_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.ceqi.b(<16 x i8> %0, i32 14)
store <16 x i8> %1, <16 x i8>* @llvm_mips_ceqi_b_RES
ret void
@@ -28,7 +28,7 @@ declare <16 x i8> @llvm.mips.ceqi.b(<16 x i8>, i32) nounwind
define void @llvm_mips_ceqi_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_ceqi_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_ceqi_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.ceqi.h(<8 x i16> %0, i32 14)
store <8 x i16> %1, <8 x i16>* @llvm_mips_ceqi_h_RES
ret void
@@ -47,7 +47,7 @@ declare <8 x i16> @llvm.mips.ceqi.h(<8 x i16>, i32) nounwind
define void @llvm_mips_ceqi_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_ceqi_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ceqi_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.ceqi.w(<4 x i32> %0, i32 14)
store <4 x i32> %1, <4 x i32>* @llvm_mips_ceqi_w_RES
ret void
@@ -66,7 +66,7 @@ declare <4 x i32> @llvm.mips.ceqi.w(<4 x i32>, i32) nounwind
define void @llvm_mips_ceqi_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_ceqi_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_ceqi_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.ceqi.d(<2 x i64> %0, i32 14)
store <2 x i64> %1, <2 x i64>* @llvm_mips_ceqi_d_RES
ret void
@@ -85,7 +85,7 @@ declare <2 x i64> @llvm.mips.ceqi.d(<2 x i64>, i32) nounwind
define void @llvm_mips_clei_s_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_clei_s_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_clei_s_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.clei.s.b(<16 x i8> %0, i32 14)
store <16 x i8> %1, <16 x i8>* @llvm_mips_clei_s_b_RES
ret void
@@ -104,7 +104,7 @@ declare <16 x i8> @llvm.mips.clei.s.b(<16 x i8>, i32) nounwind
define void @llvm_mips_clei_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_clei_s_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_clei_s_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.clei.s.h(<8 x i16> %0, i32 14)
store <8 x i16> %1, <8 x i16>* @llvm_mips_clei_s_h_RES
ret void
@@ -123,7 +123,7 @@ declare <8 x i16> @llvm.mips.clei.s.h(<8 x i16>, i32) nounwind
define void @llvm_mips_clei_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_clei_s_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_clei_s_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.clei.s.w(<4 x i32> %0, i32 14)
store <4 x i32> %1, <4 x i32>* @llvm_mips_clei_s_w_RES
ret void
@@ -142,7 +142,7 @@ declare <4 x i32> @llvm.mips.clei.s.w(<4 x i32>, i32) nounwind
define void @llvm_mips_clei_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_clei_s_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_clei_s_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.clei.s.d(<2 x i64> %0, i32 14)
store <2 x i64> %1, <2 x i64>* @llvm_mips_clei_s_d_RES
ret void
@@ -161,7 +161,7 @@ declare <2 x i64> @llvm.mips.clei.s.d(<2 x i64>, i32) nounwind
define void @llvm_mips_clei_u_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_clei_u_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_clei_u_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.clei.u.b(<16 x i8> %0, i32 14)
store <16 x i8> %1, <16 x i8>* @llvm_mips_clei_u_b_RES
ret void
@@ -180,7 +180,7 @@ declare <16 x i8> @llvm.mips.clei.u.b(<16 x i8>, i32) nounwind
define void @llvm_mips_clei_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_clei_u_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_clei_u_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.clei.u.h(<8 x i16> %0, i32 14)
store <8 x i16> %1, <8 x i16>* @llvm_mips_clei_u_h_RES
ret void
@@ -199,7 +199,7 @@ declare <8 x i16> @llvm.mips.clei.u.h(<8 x i16>, i32) nounwind
define void @llvm_mips_clei_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_clei_u_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_clei_u_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.clei.u.w(<4 x i32> %0, i32 14)
store <4 x i32> %1, <4 x i32>* @llvm_mips_clei_u_w_RES
ret void
@@ -218,7 +218,7 @@ declare <4 x i32> @llvm.mips.clei.u.w(<4 x i32>, i32) nounwind
define void @llvm_mips_clei_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_clei_u_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_clei_u_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.clei.u.d(<2 x i64> %0, i32 14)
store <2 x i64> %1, <2 x i64>* @llvm_mips_clei_u_d_RES
ret void
@@ -237,7 +237,7 @@ declare <2 x i64> @llvm.mips.clei.u.d(<2 x i64>, i32) nounwind
define void @llvm_mips_clti_s_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_clti_s_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_clti_s_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.clti.s.b(<16 x i8> %0, i32 14)
store <16 x i8> %1, <16 x i8>* @llvm_mips_clti_s_b_RES
ret void
@@ -256,7 +256,7 @@ declare <16 x i8> @llvm.mips.clti.s.b(<16 x i8>, i32) nounwind
define void @llvm_mips_clti_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_clti_s_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_clti_s_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.clti.s.h(<8 x i16> %0, i32 14)
store <8 x i16> %1, <8 x i16>* @llvm_mips_clti_s_h_RES
ret void
@@ -275,7 +275,7 @@ declare <8 x i16> @llvm.mips.clti.s.h(<8 x i16>, i32) nounwind
define void @llvm_mips_clti_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_clti_s_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_clti_s_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.clti.s.w(<4 x i32> %0, i32 14)
store <4 x i32> %1, <4 x i32>* @llvm_mips_clti_s_w_RES
ret void
@@ -294,7 +294,7 @@ declare <4 x i32> @llvm.mips.clti.s.w(<4 x i32>, i32) nounwind
define void @llvm_mips_clti_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_clti_s_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_clti_s_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.clti.s.d(<2 x i64> %0, i32 14)
store <2 x i64> %1, <2 x i64>* @llvm_mips_clti_s_d_RES
ret void
@@ -313,7 +313,7 @@ declare <2 x i64> @llvm.mips.clti.s.d(<2 x i64>, i32) nounwind
define void @llvm_mips_clti_u_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_clti_u_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_clti_u_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.clti.u.b(<16 x i8> %0, i32 14)
store <16 x i8> %1, <16 x i8>* @llvm_mips_clti_u_b_RES
ret void
@@ -332,7 +332,7 @@ declare <16 x i8> @llvm.mips.clti.u.b(<16 x i8>, i32) nounwind
define void @llvm_mips_clti_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_clti_u_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_clti_u_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.clti.u.h(<8 x i16> %0, i32 14)
store <8 x i16> %1, <8 x i16>* @llvm_mips_clti_u_h_RES
ret void
@@ -351,7 +351,7 @@ declare <8 x i16> @llvm.mips.clti.u.h(<8 x i16>, i32) nounwind
define void @llvm_mips_clti_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_clti_u_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_clti_u_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.clti.u.w(<4 x i32> %0, i32 14)
store <4 x i32> %1, <4 x i32>* @llvm_mips_clti_u_w_RES
ret void
@@ -370,7 +370,7 @@ declare <4 x i32> @llvm.mips.clti.u.w(<4 x i32>, i32) nounwind
define void @llvm_mips_clti_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_clti_u_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_clti_u_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.clti.u.d(<2 x i64> %0, i32 14)
store <2 x i64> %1, <2 x i64>* @llvm_mips_clti_u_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/i5-m.ll b/test/CodeGen/Mips/msa/i5-m.ll
index 27663494324d..ba6e9d2384a7 100644
--- a/test/CodeGen/Mips/msa/i5-m.ll
+++ b/test/CodeGen/Mips/msa/i5-m.ll
@@ -9,7 +9,7 @@
define void @llvm_mips_maxi_s_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_maxi_s_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_maxi_s_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.maxi.s.b(<16 x i8> %0, i32 14)
store <16 x i8> %1, <16 x i8>* @llvm_mips_maxi_s_b_RES
ret void
@@ -28,7 +28,7 @@ declare <16 x i8> @llvm.mips.maxi.s.b(<16 x i8>, i32) nounwind
define void @llvm_mips_maxi_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_maxi_s_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_maxi_s_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.maxi.s.h(<8 x i16> %0, i32 14)
store <8 x i16> %1, <8 x i16>* @llvm_mips_maxi_s_h_RES
ret void
@@ -47,7 +47,7 @@ declare <8 x i16> @llvm.mips.maxi.s.h(<8 x i16>, i32) nounwind
define void @llvm_mips_maxi_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_maxi_s_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_maxi_s_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.maxi.s.w(<4 x i32> %0, i32 14)
store <4 x i32> %1, <4 x i32>* @llvm_mips_maxi_s_w_RES
ret void
@@ -66,7 +66,7 @@ declare <4 x i32> @llvm.mips.maxi.s.w(<4 x i32>, i32) nounwind
define void @llvm_mips_maxi_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_maxi_s_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_maxi_s_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.maxi.s.d(<2 x i64> %0, i32 14)
store <2 x i64> %1, <2 x i64>* @llvm_mips_maxi_s_d_RES
ret void
@@ -85,7 +85,7 @@ declare <2 x i64> @llvm.mips.maxi.s.d(<2 x i64>, i32) nounwind
define void @llvm_mips_maxi_u_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_maxi_u_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_maxi_u_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.maxi.u.b(<16 x i8> %0, i32 14)
store <16 x i8> %1, <16 x i8>* @llvm_mips_maxi_u_b_RES
ret void
@@ -104,7 +104,7 @@ declare <16 x i8> @llvm.mips.maxi.u.b(<16 x i8>, i32) nounwind
define void @llvm_mips_maxi_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_maxi_u_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_maxi_u_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.maxi.u.h(<8 x i16> %0, i32 14)
store <8 x i16> %1, <8 x i16>* @llvm_mips_maxi_u_h_RES
ret void
@@ -123,7 +123,7 @@ declare <8 x i16> @llvm.mips.maxi.u.h(<8 x i16>, i32) nounwind
define void @llvm_mips_maxi_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_maxi_u_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_maxi_u_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.maxi.u.w(<4 x i32> %0, i32 14)
store <4 x i32> %1, <4 x i32>* @llvm_mips_maxi_u_w_RES
ret void
@@ -142,7 +142,7 @@ declare <4 x i32> @llvm.mips.maxi.u.w(<4 x i32>, i32) nounwind
define void @llvm_mips_maxi_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_maxi_u_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_maxi_u_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.maxi.u.d(<2 x i64> %0, i32 14)
store <2 x i64> %1, <2 x i64>* @llvm_mips_maxi_u_d_RES
ret void
@@ -161,7 +161,7 @@ declare <2 x i64> @llvm.mips.maxi.u.d(<2 x i64>, i32) nounwind
define void @llvm_mips_mini_s_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_mini_s_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_mini_s_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.mini.s.b(<16 x i8> %0, i32 14)
store <16 x i8> %1, <16 x i8>* @llvm_mips_mini_s_b_RES
ret void
@@ -180,7 +180,7 @@ declare <16 x i8> @llvm.mips.mini.s.b(<16 x i8>, i32) nounwind
define void @llvm_mips_mini_s_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_mini_s_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_mini_s_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.mini.s.h(<8 x i16> %0, i32 14)
store <8 x i16> %1, <8 x i16>* @llvm_mips_mini_s_h_RES
ret void
@@ -199,7 +199,7 @@ declare <8 x i16> @llvm.mips.mini.s.h(<8 x i16>, i32) nounwind
define void @llvm_mips_mini_s_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_mini_s_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_mini_s_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.mini.s.w(<4 x i32> %0, i32 14)
store <4 x i32> %1, <4 x i32>* @llvm_mips_mini_s_w_RES
ret void
@@ -218,7 +218,7 @@ declare <4 x i32> @llvm.mips.mini.s.w(<4 x i32>, i32) nounwind
define void @llvm_mips_mini_s_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_mini_s_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_mini_s_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.mini.s.d(<2 x i64> %0, i32 14)
store <2 x i64> %1, <2 x i64>* @llvm_mips_mini_s_d_RES
ret void
@@ -237,7 +237,7 @@ declare <2 x i64> @llvm.mips.mini.s.d(<2 x i64>, i32) nounwind
define void @llvm_mips_mini_u_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_mini_u_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_mini_u_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.mini.u.b(<16 x i8> %0, i32 14)
store <16 x i8> %1, <16 x i8>* @llvm_mips_mini_u_b_RES
ret void
@@ -256,7 +256,7 @@ declare <16 x i8> @llvm.mips.mini.u.b(<16 x i8>, i32) nounwind
define void @llvm_mips_mini_u_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_mini_u_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_mini_u_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.mini.u.h(<8 x i16> %0, i32 14)
store <8 x i16> %1, <8 x i16>* @llvm_mips_mini_u_h_RES
ret void
@@ -275,7 +275,7 @@ declare <8 x i16> @llvm.mips.mini.u.h(<8 x i16>, i32) nounwind
define void @llvm_mips_mini_u_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_mini_u_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_mini_u_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.mini.u.w(<4 x i32> %0, i32 14)
store <4 x i32> %1, <4 x i32>* @llvm_mips_mini_u_w_RES
ret void
@@ -294,7 +294,7 @@ declare <4 x i32> @llvm.mips.mini.u.w(<4 x i32>, i32) nounwind
define void @llvm_mips_mini_u_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_mini_u_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_mini_u_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.mini.u.d(<2 x i64> %0, i32 14)
store <2 x i64> %1, <2 x i64>* @llvm_mips_mini_u_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/i5-s.ll b/test/CodeGen/Mips/msa/i5-s.ll
index 184172f63b85..db331b1476ce 100644
--- a/test/CodeGen/Mips/msa/i5-s.ll
+++ b/test/CodeGen/Mips/msa/i5-s.ll
@@ -9,7 +9,7 @@
define void @llvm_mips_subvi_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_subvi_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_subvi_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.subvi.b(<16 x i8> %0, i32 14)
store <16 x i8> %1, <16 x i8>* @llvm_mips_subvi_b_RES
ret void
@@ -28,7 +28,7 @@ declare <16 x i8> @llvm.mips.subvi.b(<16 x i8>, i32) nounwind
define void @llvm_mips_subvi_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_subvi_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_subvi_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.subvi.h(<8 x i16> %0, i32 14)
store <8 x i16> %1, <8 x i16>* @llvm_mips_subvi_h_RES
ret void
@@ -47,7 +47,7 @@ declare <8 x i16> @llvm.mips.subvi.h(<8 x i16>, i32) nounwind
define void @llvm_mips_subvi_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_subvi_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_subvi_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.subvi.w(<4 x i32> %0, i32 14)
store <4 x i32> %1, <4 x i32>* @llvm_mips_subvi_w_RES
ret void
@@ -66,7 +66,7 @@ declare <4 x i32> @llvm.mips.subvi.w(<4 x i32>, i32) nounwind
define void @llvm_mips_subvi_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_subvi_d_ARG1
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_subvi_d_ARG1
%1 = tail call <2 x i64> @llvm.mips.subvi.d(<2 x i64> %0, i32 14)
store <2 x i64> %1, <2 x i64>* @llvm_mips_subvi_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/i5_ld_st.ll b/test/CodeGen/Mips/msa/i5_ld_st.ll
index 7cc55f2904be..991bb8436b33 100644
--- a/test/CodeGen/Mips/msa/i5_ld_st.ll
+++ b/test/CodeGen/Mips/msa/i5_ld_st.ll
@@ -81,7 +81,7 @@ declare <2 x i64> @llvm.mips.ld.d(i8*, i32) nounwind
define void @llvm_mips_st_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_st_b_ARG
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_st_b_ARG
%1 = bitcast <16 x i8>* @llvm_mips_st_b_RES to i8*
tail call void @llvm.mips.st.b(<16 x i8> %0, i8* %1, i32 16)
ret void
@@ -99,7 +99,7 @@ declare void @llvm.mips.st.b(<16 x i8>, i8*, i32) nounwind
define void @llvm_mips_st_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_st_h_ARG
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_st_h_ARG
%1 = bitcast <8 x i16>* @llvm_mips_st_h_RES to i8*
tail call void @llvm.mips.st.h(<8 x i16> %0, i8* %1, i32 16)
ret void
@@ -117,7 +117,7 @@ declare void @llvm.mips.st.h(<8 x i16>, i8*, i32) nounwind
define void @llvm_mips_st_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_st_w_ARG
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_st_w_ARG
%1 = bitcast <4 x i32>* @llvm_mips_st_w_RES to i8*
tail call void @llvm.mips.st.w(<4 x i32> %0, i8* %1, i32 16)
ret void
@@ -135,7 +135,7 @@ declare void @llvm.mips.st.w(<4 x i32>, i8*, i32) nounwind
define void @llvm_mips_st_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_st_d_ARG
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_st_d_ARG
%1 = bitcast <2 x i64>* @llvm_mips_st_d_RES to i8*
tail call void @llvm.mips.st.d(<2 x i64> %0, i8* %1, i32 16)
ret void
diff --git a/test/CodeGen/Mips/msa/i8.ll b/test/CodeGen/Mips/msa/i8.ll
index d2931a72feaa..4af9c588fdef 100644
--- a/test/CodeGen/Mips/msa/i8.ll
+++ b/test/CodeGen/Mips/msa/i8.ll
@@ -8,7 +8,7 @@
define void @llvm_mips_andi_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_andi_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_andi_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.andi.b(<16 x i8> %0, i32 25)
store <16 x i8> %1, <16 x i8>* @llvm_mips_andi_b_RES
ret void
@@ -28,8 +28,8 @@ declare <16 x i8> @llvm.mips.andi.b(<16 x i8>, i32) nounwind
define void @llvm_mips_bmnzi_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_bmnzi_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_bmnzi_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 25)
store <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
ret void
@@ -52,8 +52,8 @@ declare <16 x i8> @llvm.mips.bmnzi.b(<16 x i8>, <16 x i8>, i32) nounwind
define void @llvm_mips_bmzi_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_bmzi_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_bmzi_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bmzi_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bmzi_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 25)
store <16 x i8> %2, <16 x i8>* @llvm_mips_bmzi_b_RES
ret void
@@ -77,8 +77,8 @@ declare <16 x i8> @llvm.mips.bmzi.b(<16 x i8>, <16 x i8>, i32) nounwind
define void @llvm_mips_bseli_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_bseli_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_bseli_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bseli_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bseli_b_ARG2
%2 = tail call <16 x i8> @llvm.mips.bseli.b(<16 x i8> %0, <16 x i8> %1, i32 25)
store <16 x i8> %2, <16 x i8>* @llvm_mips_bseli_b_RES
ret void
@@ -100,7 +100,7 @@ declare <16 x i8> @llvm.mips.bseli.b(<16 x i8>, <16 x i8>, i32) nounwind
define void @llvm_mips_nori_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_nori_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_nori_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.nori.b(<16 x i8> %0, i32 25)
store <16 x i8> %1, <16 x i8>* @llvm_mips_nori_b_RES
ret void
@@ -119,7 +119,7 @@ declare <16 x i8> @llvm.mips.nori.b(<16 x i8>, i32) nounwind
define void @llvm_mips_ori_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_ori_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_ori_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.ori.b(<16 x i8> %0, i32 25)
store <16 x i8> %1, <16 x i8>* @llvm_mips_ori_b_RES
ret void
@@ -138,7 +138,7 @@ declare <16 x i8> @llvm.mips.ori.b(<16 x i8>, i32) nounwind
define void @llvm_mips_shf_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_shf_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_shf_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.shf.b(<16 x i8> %0, i32 25)
store <16 x i8> %1, <16 x i8>* @llvm_mips_shf_b_RES
ret void
@@ -157,7 +157,7 @@ declare <16 x i8> @llvm.mips.shf.b(<16 x i8>, i32) nounwind
define void @llvm_mips_shf_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_shf_h_ARG1
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_shf_h_ARG1
%1 = tail call <8 x i16> @llvm.mips.shf.h(<8 x i16> %0, i32 25)
store <8 x i16> %1, <8 x i16>* @llvm_mips_shf_h_RES
ret void
@@ -176,7 +176,7 @@ declare <8 x i16> @llvm.mips.shf.h(<8 x i16>, i32) nounwind
define void @llvm_mips_shf_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_shf_w_ARG1
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_shf_w_ARG1
%1 = tail call <4 x i32> @llvm.mips.shf.w(<4 x i32> %0, i32 25)
store <4 x i32> %1, <4 x i32>* @llvm_mips_shf_w_RES
ret void
@@ -195,7 +195,7 @@ declare <4 x i32> @llvm.mips.shf.w(<4 x i32>, i32) nounwind
define void @llvm_mips_xori_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_xori_b_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_xori_b_ARG1
%1 = tail call <16 x i8> @llvm.mips.xori.b(<16 x i8> %0, i32 25)
store <16 x i8> %1, <16 x i8>* @llvm_mips_xori_b_RES
ret void
diff --git a/test/CodeGen/Mips/msa/inline-asm.ll b/test/CodeGen/Mips/msa/inline-asm.ll
index 4a34273f3c00..85da87b6f8a7 100644
--- a/test/CodeGen/Mips/msa/inline-asm.ll
+++ b/test/CodeGen/Mips/msa/inline-asm.ll
@@ -16,7 +16,7 @@ entry:
define void @test2() nounwind {
entry:
; CHECK-LABEL: test2:
- %0 = load <4 x i32>* @v4i32_r
+ %0 = load <4 x i32>, <4 x i32>* @v4i32_r
%1 = call <4 x i32> asm "addvi.w ${0:w}, ${1:w}, 1", "=f,f"(<4 x i32> %0)
; CHECK: addvi.w $w{{[1-3]?[0-9]}}, $w{{[1-3]?[0-9]}}, 1
store <4 x i32> %1, <4 x i32>* @v4i32_r
@@ -26,7 +26,7 @@ entry:
define void @test3() nounwind {
entry:
; CHECK-LABEL: test3:
- %0 = load <4 x i32>* @v4i32_r
+ %0 = load <4 x i32>, <4 x i32>* @v4i32_r
%1 = call <4 x i32> asm sideeffect "addvi.w ${0:w}, ${1:w}, 1", "=f,f,~{$w0}"(<4 x i32> %0)
; CHECK: addvi.w $w{{([1-9]|[1-3][0-9])}}, $w{{([1-9]|[1-3][0-9])}}, 1
store <4 x i32> %1, <4 x i32>* @v4i32_r
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll b/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll
index 4beaaa9c1841..beb361bc9f3d 100644
--- a/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll
+++ b/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll
@@ -14,7 +14,7 @@ BB:
%A2 = alloca <1 x double>
%A1 = alloca double
%A = alloca i32
- %L = load i8* %0
+ %L = load i8, i8* %0
store i8 77, i8* %0
%E = extractelement <8 x i64> zeroinitializer, i32 2
%Shuff = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15, i32 1, i32 3>
@@ -24,7 +24,7 @@ BB:
br label %CF
CF: ; preds = %CF, %CF78, %BB
- %L5 = load i8* %Sl
+ %L5 = load i8, i8* %Sl
store i8 %L, i8* %Sl
%E6 = extractelement <8 x i32> zeroinitializer, i32 2
%Shuff7 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff, <8 x i32> <i32 13, i32 15, i32 1, i32 3, i32 5, i32 7, i32 9, i32 undef>
@@ -33,7 +33,7 @@ CF: ; preds = %CF, %CF78, %BB
%FC = sitofp <8 x i64> zeroinitializer to <8 x float>
%Sl9 = select i1 %Cmp, i8 77, i8 77
%Cmp10 = icmp uge <8 x i64> %Shuff, zeroinitializer
- %L11 = load i8* %0
+ %L11 = load i8, i8* %0
store i8 %Sl9, i8* %0
%E12 = extractelement <1 x i16> zeroinitializer, i32 0
%Shuff13 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 undef, i32 3, i32 5, i32 7>
@@ -42,7 +42,7 @@ CF: ; preds = %CF, %CF78, %BB
%Tr = trunc <8 x i64> %Shuff to <8 x i32>
%Sl16 = select i1 %Cmp, i8 77, i8 %5
%Cmp17 = icmp ult <8 x i1> %Cmp10, %Cmp10
- %L18 = load i8* %Sl
+ %L18 = load i8, i8* %Sl
store i8 -1, i8* %Sl
%E19 = extractelement <8 x i32> zeroinitializer, i32 3
%Shuff20 = shufflevector <8 x float> %FC, <8 x float> %FC, <8 x i32> <i32 6, i32 8, i32 undef, i32 12, i32 14, i32 0, i32 2, i32 undef>
@@ -54,7 +54,7 @@ CF: ; preds = %CF, %CF78, %BB
br i1 %Cmp25, label %CF, label %CF78
CF78: ; preds = %CF
- %L26 = load i8* %Sl
+ %L26 = load i8, i8* %Sl
store i32 50347, i32* %A
%E27 = extractelement <8 x i1> %Cmp10, i32 2
br i1 %E27, label %CF, label %CF77
@@ -65,7 +65,7 @@ CF77: ; preds = %CF77, %CF81, %CF78
%B30 = urem <8 x i32> %Tr, zeroinitializer
%Tr31 = trunc i32 0 to i16
%Sl32 = select i1 %Cmp, <2 x i1> zeroinitializer, <2 x i1> zeroinitializer
- %L33 = load i8* %Sl
+ %L33 = load i8, i8* %Sl
store i8 %L26, i8* %Sl
%E34 = extractelement <4 x i32> zeroinitializer, i32 0
%Shuff35 = shufflevector <1 x i16> zeroinitializer, <1 x i16> %B, <1 x i32> undef
@@ -73,7 +73,7 @@ CF77: ; preds = %CF77, %CF81, %CF78
%B37 = srem <1 x i16> %I29, zeroinitializer
%FC38 = sitofp <8 x i32> %B30 to <8 x double>
%Sl39 = select i1 %Cmp, double 0.000000e+00, double %Sl24
- %L40 = load i8* %Sl
+ %L40 = load i8, i8* %Sl
store i8 %Sl16, i8* %Sl
%E41 = extractelement <1 x i16> zeroinitializer, i32 0
%Shuff42 = shufflevector <8 x i1> %Cmp17, <8 x i1> %Cmp10, <8 x i32> <i32 14, i32 undef, i32 2, i32 4, i32 undef, i32 8, i32 10, i32 12>
@@ -85,7 +85,7 @@ CF77: ; preds = %CF77, %CF81, %CF78
br i1 %Cmp46, label %CF77, label %CF80
CF80: ; preds = %CF80, %CF77
- %L47 = load i64* %PC
+ %L47 = load i64, i64* %PC
store i8 77, i8* %Sl
%E48 = extractelement <8 x i64> zeroinitializer, i32 2
%Shuff49 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff7, <8 x i32> <i32 5, i32 7, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 3>
@@ -97,7 +97,7 @@ CF80: ; preds = %CF80, %CF77
br i1 %Cmp54, label %CF80, label %CF81
CF81: ; preds = %CF80
- %L55 = load i8* %Sl
+ %L55 = load i8, i8* %Sl
store i8 %Sl16, i8* %Sl
%E56 = extractelement <1 x i16> %B, i32 0
%Shuff57 = shufflevector <1 x i16> zeroinitializer, <1 x i16> zeroinitializer, <1 x i32> <i32 1>
@@ -105,7 +105,7 @@ CF81: ; preds = %CF80
%B59 = srem i32 %E19, %E19
%Sl60 = select i1 %Cmp, i8 77, i8 77
%Cmp61 = icmp ult <1 x i16> zeroinitializer, %B
- %L62 = load i8* %Sl
+ %L62 = load i8, i8* %Sl
store i64 %L47, i64* %PC52
%E63 = extractelement <4 x i32> %I43, i32 2
%Shuff64 = shufflevector <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i32> <i32 undef, i32 undef, i32 1, i32 3>
@@ -117,7 +117,7 @@ CF81: ; preds = %CF80
br i1 %Cmp69, label %CF77, label %CF79
CF79: ; preds = %CF81
- %L70 = load i32* %A
+ %L70 = load i32, i32* %A
store i64 %4, i64* %PC
%E71 = extractelement <4 x i32> zeroinitializer, i32 0
%Shuff72 = shufflevector <8 x i32> zeroinitializer, <8 x i32> %B44, <8 x i32> <i32 11, i32 undef, i32 15, i32 1, i32 3, i32 undef, i32 7, i32 9>
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s1935737938.ll b/test/CodeGen/Mips/msa/llvm-stress-s1935737938.ll
index f9cab037e7cc..bdf6eafdf4ef 100644
--- a/test/CodeGen/Mips/msa/llvm-stress-s1935737938.ll
+++ b/test/CodeGen/Mips/msa/llvm-stress-s1935737938.ll
@@ -14,7 +14,7 @@ BB:
%A2 = alloca i64
%A1 = alloca i32
%A = alloca <2 x i64>
- %L = load i8* %0
+ %L = load i8, i8* %0
store i8 -1, i8* %0
%E = extractelement <2 x i32> zeroinitializer, i32 0
%Shuff = shufflevector <2 x i32> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 3>
@@ -22,7 +22,7 @@ BB:
%B = lshr i8 %L, -69
%ZE = fpext float 0xBF2AA5FE80000000 to double
%Sl = select i1 true, <1 x i64> <i64 -1>, <1 x i64> <i64 -1>
- %L5 = load i8* %0
+ %L5 = load i8, i8* %0
store i8 -69, i8* %0
%E6 = extractelement <16 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, i32 14
%Shuff7 = shufflevector <2 x i32> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 3>
@@ -31,7 +31,7 @@ BB:
%FC = uitofp i32 %3 to double
%Sl10 = select i1 true, <1 x i1> zeroinitializer, <1 x i1> zeroinitializer
%Cmp = icmp ne <1 x i64> %I, <i64 -1>
- %L11 = load i8* %0
+ %L11 = load i8, i8* %0
store i8 %L11, i8* %0
%E12 = extractelement <1 x i64> <i64 -1>, i32 0
%Shuff13 = shufflevector <1 x i64> %Sl, <1 x i64> <i64 -1>, <1 x i32> <i32 1>
@@ -42,7 +42,7 @@ BB:
br label %CF74
CF74: ; preds = %CF74, %CF80, %CF76, %BB
- %L18 = load i8* %0
+ %L18 = load i8, i8* %0
store i8 -69, i8* %0
%E19 = extractelement <1 x i64> %Sl, i32 0
%Shuff20 = shufflevector <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i32> <i32 12, i32 14, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10>
@@ -50,7 +50,7 @@ CF74: ; preds = %CF74, %CF80, %CF76,
%B22 = urem i32 135673, %3
%FC23 = sitofp i8 %L to float
%Sl24 = select i1 true, i8 %B, i8 %L18
- %L25 = load i8* %0
+ %L25 = load i8, i8* %0
store i8 %L, i8* %0
%E26 = extractelement <2 x i32> %Shuff, i32 1
%Shuff27 = shufflevector <2 x i32> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> <i32 2, i32 0>
@@ -62,7 +62,7 @@ CF74: ; preds = %CF74, %CF80, %CF76,
br i1 %Cmp31, label %CF74, label %CF80
CF80: ; preds = %CF74
- %L32 = load i8* %0
+ %L32 = load i8, i8* %0
store i8 -1, i8* %0
%E33 = extractelement <2 x i32> zeroinitializer, i32 1
%Shuff34 = shufflevector <1 x i64> %Shuff13, <1 x i64> <i64 -1>, <1 x i32> zeroinitializer
@@ -70,7 +70,7 @@ CF80: ; preds = %CF74
%FC36 = sitofp <1 x i1> %Cmp to <1 x float>
%Sl37 = select i1 true, <8 x i8> %Shuff20, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%Cmp38 = icmp sgt <2 x i32> %I21, %Shuff27
- %L39 = load i8* %0
+ %L39 = load i8, i8* %0
store i8 %Sl24, i8* %0
%E40 = extractelement <8 x i64> zeroinitializer, i32 1
%Shuff41 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %Cmp38, <2 x i32> <i32 0, i32 2>
@@ -81,7 +81,7 @@ CF80: ; preds = %CF74
br i1 %Cmp45, label %CF74, label %CF76
CF76: ; preds = %CF80
- %L46 = load i8* %0
+ %L46 = load i8, i8* %0
store i8 %L39, i8* %0
%E47 = extractelement <2 x i32> %Shuff27, i32 0
%Shuff48 = shufflevector <1 x i1> %Sl10, <1 x i1> %Sl10, <1 x i32> <i32 1>
@@ -92,7 +92,7 @@ CF76: ; preds = %CF80
br i1 %Cmp52, label %CF74, label %CF75
CF75: ; preds = %CF75, %CF76
- %L53 = load i8* %0
+ %L53 = load i8, i8* %0
store i8 %L18, i8* %0
%E54 = extractelement <8 x i8> %Shuff20, i32 5
%Shuff55 = shufflevector <2 x i32> %Shuff, <2 x i32> zeroinitializer, <2 x i32> <i32 0, i32 2>
@@ -103,7 +103,7 @@ CF75: ; preds = %CF75, %CF76
br i1 %Cmp59, label %CF75, label %CF78
CF78: ; preds = %CF75
- %L60 = load i8* %0
+ %L60 = load i8, i8* %0
store i8 -69, i8* %0
%E61 = extractelement <2 x i32> zeroinitializer, i32 0
%Shuff62 = shufflevector <2 x i32> %Shuff7, <2 x i32> %I21, <2 x i32> <i32 1, i32 3>
@@ -115,7 +115,7 @@ CF78: ; preds = %CF75
br label %CF
CF: ; preds = %CF, %CF78
- %L68 = load i8* %0
+ %L68 = load i8, i8* %0
store i64 %B57, i64* %2
%E69 = extractelement <2 x i1> %Shuff41, i32 1
br i1 %E69, label %CF, label %CF77
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s2704903805.ll b/test/CodeGen/Mips/msa/llvm-stress-s2704903805.ll
index e14f405320cb..8f23a8ca5177 100644
--- a/test/CodeGen/Mips/msa/llvm-stress-s2704903805.ll
+++ b/test/CodeGen/Mips/msa/llvm-stress-s2704903805.ll
@@ -13,7 +13,7 @@ BB:
%A2 = alloca i8
%A1 = alloca i32
%A = alloca i8
- %L = load i8* %0
+ %L = load i8, i8* %0
store i8 %5, i8* %0
%E = extractelement <2 x i16> zeroinitializer, i32 0
%Shuff = shufflevector <1 x i8> <i8 -1>, <1 x i8> <i8 -1>, <1 x i32> undef
@@ -25,7 +25,7 @@ BB:
br label %CF83
CF83: ; preds = %BB
- %L5 = load i8* %0
+ %L5 = load i8, i8* %0
store i8 85, i8* %0
%E6 = extractelement <1 x i8> <i8 -1>, i32 0
%Shuff7 = shufflevector <2 x i16> zeroinitializer, <2 x i16> zeroinitializer, <2 x i32> <i32 1, i32 3>
@@ -37,7 +37,7 @@ CF83: ; preds = %BB
br label %CF
CF: ; preds = %CF, %CF81, %CF83
- %L13 = load i8* %0
+ %L13 = load i8, i8* %0
store i8 0, i8* %0
%E14 = extractelement <2 x i64> zeroinitializer, i32 0
%Shuff15 = shufflevector <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, <4 x i32> <i32 3, i32 5, i32 7, i32 undef>
@@ -52,7 +52,7 @@ CF80: ; preds = %CF80, %CF
br i1 %Cmp19, label %CF80, label %CF81
CF81: ; preds = %CF80
- %L20 = load i8* %0
+ %L20 = load i8, i8* %0
store i8 85, i8* %0
%E21 = extractelement <1 x i8> <i8 -1>, i32 0
%Shuff22 = shufflevector <1 x i8> <i8 -1>, <1 x i8> %Shuff, <1 x i32> zeroinitializer
@@ -60,7 +60,7 @@ CF81: ; preds = %CF80
%FC24 = fptoui <4 x float> %FC to <4 x i16>
%Sl25 = select i1 %Cmp, <2 x i32> zeroinitializer, <2 x i32> <i32 -1, i32 -1>
%Cmp26 = icmp ult <4 x i64> %I16, %Shuff15
- %L27 = load i8* %0
+ %L27 = load i8, i8* %0
store i8 %L, i8* %0
%E28 = extractelement <1 x i8> <i8 -1>, i32 0
%Shuff29 = shufflevector <8 x i16> zeroinitializer, <8 x i16> zeroinitializer, <8 x i32> <i32 11, i32 undef, i32 15, i32 1, i32 3, i32 5, i32 undef, i32 9>
@@ -68,7 +68,7 @@ CF81: ; preds = %CF80
%B31 = mul i8 %E28, 85
%PC = bitcast i32* %A3 to i32*
%Sl32 = select i1 %Cmp12, float %FC10, float 0x4712BFE680000000
- %L33 = load i32* %PC
+ %L33 = load i32, i32* %PC
store i32 %L33, i32* %PC
%E34 = extractelement <2 x i16> zeroinitializer, i32 1
%Shuff35 = shufflevector <1 x i8> %Shuff, <1 x i8> <i8 -1>, <1 x i32> zeroinitializer
@@ -79,7 +79,7 @@ CF81: ; preds = %CF80
br i1 %Cmp39, label %CF, label %CF77
CF77: ; preds = %CF77, %CF81
- %L40 = load i32* %PC
+ %L40 = load i32, i32* %PC
store i32 %3, i32* %PC
%E41 = extractelement <2 x i32> zeroinitializer, i32 0
%Shuff42 = shufflevector <2 x i32> <i32 -1, i32 -1>, <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 3>
@@ -88,7 +88,7 @@ CF77: ; preds = %CF77, %CF81
%Se = sext i32 %3 to i64
%Sl45 = select i1 true, <1 x i8> %Shuff, <1 x i8> %I43
%Cmp46 = icmp sge <1 x i8> %I36, %Shuff
- %L47 = load i32* %PC
+ %L47 = load i32, i32* %PC
store i32 %L33, i32* %PC
%E48 = extractelement <2 x i16> zeroinitializer, i32 0
%Shuff49 = shufflevector <1 x i8> <i8 -1>, <1 x i8> <i8 -1>, <1 x i32> <i32 1>
@@ -100,7 +100,7 @@ CF77: ; preds = %CF77, %CF81
br i1 %Cmp54, label %CF77, label %CF78
CF78: ; preds = %CF78, %CF77
- %L55 = load i32* %PC
+ %L55 = load i32, i32* %PC
store i32 %L33, i32* %PC
%E56 = extractelement <8 x i16> %Shuff29, i32 4
%Shuff57 = shufflevector <1 x i8> <i8 -1>, <1 x i8> <i8 -1>, <1 x i32> <i32 1>
@@ -111,7 +111,7 @@ CF78: ; preds = %CF78, %CF77
br i1 %Cmp60, label %CF78, label %CF79
CF79: ; preds = %CF79, %CF78
- %L61 = load i32* %PC
+ %L61 = load i32, i32* %PC
store i32 %L33, i32* %A3
%E62 = extractelement <4 x i64> %Shuff15, i32 1
%Shuff63 = shufflevector <8 x i16> %Shuff29, <8 x i16> %Shuff29, <8 x i32> <i32 undef, i32 10, i32 12, i32 undef, i32 undef, i32 undef, i32 4, i32 6>
@@ -123,7 +123,7 @@ CF79: ; preds = %CF79, %CF78
br i1 %Cmp68, label %CF79, label %CF82
CF82: ; preds = %CF79
- %L69 = load i32* %PC
+ %L69 = load i32, i32* %PC
store i32 %L33, i32* %PC
%E70 = extractelement <8 x i16> zeroinitializer, i32 3
%Shuff71 = shufflevector <4 x i64> %Shuff15, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, <4 x i32> <i32 6, i32 undef, i32 2, i32 4>
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll b/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll
index 1a03e55d9d54..e3cf7964497f 100644
--- a/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll
+++ b/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll
@@ -14,7 +14,7 @@ BB:
%A2 = alloca i64
%A1 = alloca i64
%A = alloca double
- %L = load i8* %0
+ %L = load i8, i8* %0
store i8 -101, i8* %0
%E = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 0
%Shuff = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 undef, i32 1>
@@ -22,7 +22,7 @@ BB:
%B = and i64 116376, 57247
%FC = uitofp i8 7 to double
%Sl = select i1 false, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
- %L5 = load i8* %0
+ %L5 = load i8, i8* %0
store i8 %L, i8* %0
%E6 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 3
%Shuff7 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 6, i32 0>
@@ -33,7 +33,7 @@ BB:
br label %CF
CF: ; preds = %CF, %BB
- %L11 = load i8* %0
+ %L11 = load i8, i8* %0
store i8 -87, i8* %0
%E12 = extractelement <4 x i64> zeroinitializer, i32 0
%Shuff13 = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 7, i32 9, i32 11, i32 13, i32 undef, i32 1, i32 3, i32 5>
@@ -45,7 +45,7 @@ CF: ; preds = %CF, %BB
br i1 %Cmp18, label %CF, label %CF80
CF80: ; preds = %CF80, %CF88, %CF
- %L19 = load i8* %0
+ %L19 = load i8, i8* %0
store i8 -101, i8* %0
%E20 = extractelement <4 x i64> zeroinitializer, i32 0
%Shuff21 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff7, <4 x i32> <i32 7, i32 1, i32 3, i32 5>
@@ -56,7 +56,7 @@ CF80: ; preds = %CF80, %CF88, %CF
br i1 %Cmp25, label %CF80, label %CF83
CF83: ; preds = %CF83, %CF80
- %L26 = load i8* %0
+ %L26 = load i8, i8* %0
store i8 -87, i8* %0
%E27 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 0
%Shuff28 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 7, i32 1, i32 3, i32 5>
@@ -68,7 +68,7 @@ CF83: ; preds = %CF83, %CF80
br i1 %Cmp33, label %CF83, label %CF88
CF88: ; preds = %CF83
- %L34 = load i8* %0
+ %L34 = load i8, i8* %0
store i8 -87, i8* %0
%E35 = extractelement <8 x i64> %Shuff, i32 7
%Shuff36 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %Shuff28, <4 x i32> <i32 2, i32 undef, i32 undef, i32 0>
@@ -80,7 +80,7 @@ CF88: ; preds = %CF83
br i1 %Cmp40, label %CF80, label %CF81
CF81: ; preds = %CF81, %CF85, %CF87, %CF88
- %L41 = load i8* %0
+ %L41 = load i8, i8* %0
store i8 %L34, i8* %0
%E42 = extractelement <8 x i64> %Shuff13, i32 6
%Shuff43 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 7>
@@ -92,7 +92,7 @@ CF81: ; preds = %CF81, %CF85, %CF87,
br i1 %Cmp47, label %CF81, label %CF85
CF85: ; preds = %CF81
- %L48 = load i8* %0
+ %L48 = load i8, i8* %0
store i8 -101, i8* %0
%E49 = extractelement <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, i32 2
%Shuff50 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 5, i32 7, i32 1, i32 3>
@@ -101,7 +101,7 @@ CF85: ; preds = %CF81
%FC53 = uitofp i8 %L48 to double
%Sl54 = select i1 %Cmp47, i32 %3, i32 %Sl24
%Cmp55 = icmp ne <8 x i64> %Shuff13, zeroinitializer
- %L56 = load i8* %0
+ %L56 = load i8, i8* %0
store i8 %L11, i8* %0
%E57 = extractelement <4 x i64> %Shuff21, i32 1
%Shuff58 = shufflevector <8 x i64> %Shuff, <8 x i64> zeroinitializer, <8 x i32> <i32 4, i32 6, i32 undef, i32 10, i32 12, i32 undef, i32 0, i32 2>
@@ -113,7 +113,7 @@ CF85: ; preds = %CF81
CF84: ; preds = %CF84, %CF85
%Sl62 = select i1 false, i8 %L, i8 %L48
%Cmp63 = icmp ne <8 x i64> %I, zeroinitializer
- %L64 = load i8* %0
+ %L64 = load i8, i8* %0
store i8 %5, i8* %0
%E65 = extractelement <8 x i1> %Cmp55, i32 0
br i1 %E65, label %CF84, label %CF87
@@ -125,7 +125,7 @@ CF87: ; preds = %CF84
%ZE69 = zext <8 x i8> %Sl32 to <8 x i64>
%Sl70 = select i1 %Tr61, i64 %E20, i64 %E12
%Cmp71 = icmp slt <8 x i64> %I, %Shuff
- %L72 = load i8* %0
+ %L72 = load i8, i8* %0
store i8 %L72, i8* %0
%E73 = extractelement <8 x i1> %Cmp55, i32 6
br i1 %E73, label %CF81, label %CF82
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s3926023935.ll b/test/CodeGen/Mips/msa/llvm-stress-s3926023935.ll
index 96547d90cb40..6f338107825e 100644
--- a/test/CodeGen/Mips/msa/llvm-stress-s3926023935.ll
+++ b/test/CodeGen/Mips/msa/llvm-stress-s3926023935.ll
@@ -14,7 +14,7 @@ BB:
%A2 = alloca double
%A1 = alloca float
%A = alloca double
- %L = load i8* %0
+ %L = load i8, i8* %0
store i8 -123, i8* %0
%E = extractelement <4 x i64> zeroinitializer, i32 1
%Shuff = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -22,7 +22,7 @@ BB:
%BC = bitcast i64 181325 to double
%Sl = select i1 false, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer
%Cmp = icmp ne <4 x i64> zeroinitializer, zeroinitializer
- %L5 = load i8* %0
+ %L5 = load i8, i8* %0
store i8 %L, i8* %0
%E6 = extractelement <4 x i64> zeroinitializer, i32 3
%Shuff7 = shufflevector <2 x i16> zeroinitializer, <2 x i16> zeroinitializer, <2 x i32> <i32 2, i32 0>
@@ -33,7 +33,7 @@ BB:
br label %CF80
CF80: ; preds = %BB
- %L11 = load i8* %0
+ %L11 = load i8, i8* %0
store i8 -123, i8* %0
%E12 = extractelement <2 x i16> zeroinitializer, i32 1
%Shuff13 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -42,7 +42,7 @@ CF80: ; preds = %BB
%PC = bitcast i1* %A4 to i64*
%Sl16 = select i1 %Cmp10, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
%Cmp17 = icmp ule <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %Sl16
- %L18 = load double* %A2
+ %L18 = load double, double* %A2
store i64 498254, i64* %PC
%E19 = extractelement <4 x i64> zeroinitializer, i32 0
%Shuff20 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %I, <2 x i32> <i32 3, i32 1>
@@ -51,7 +51,7 @@ CF80: ; preds = %BB
%ZE = zext <2 x i1> %Shuff20 to <2 x i32>
%Sl23 = select i1 %Cmp10, <2 x i1> %Shuff20, <2 x i1> zeroinitializer
%Cmp24 = icmp ult <2 x i32> zeroinitializer, zeroinitializer
- %L25 = load i8* %0
+ %L25 = load i8, i8* %0
store i8 %L25, i8* %0
%E26 = extractelement <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>, i32 3
%Shuff27 = shufflevector <4 x i32> %Shuff, <4 x i32> %I14, <4 x i32> <i32 6, i32 0, i32 undef, i32 4>
@@ -63,7 +63,7 @@ CF80: ; preds = %BB
CF79: ; preds = %CF80
%Sl30 = select i1 false, i8 %B29, i8 -123
%Cmp31 = icmp sge <2 x i1> %I, %I
- %L32 = load i64* %PC
+ %L32 = load i64, i64* %PC
store i8 -123, i8* %0
%E33 = extractelement <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, i32 2
%Shuff34 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff13, <4 x i32> <i32 5, i32 7, i32 1, i32 3>
@@ -75,7 +75,7 @@ CF79: ; preds = %CF80
br label %CF
CF: ; preds = %CF, %CF79
- %L40 = load double* %A
+ %L40 = load double, double* %A
store i1 %Cmp39, i1* %PC37
%E41 = extractelement <4 x i64> zeroinitializer, i32 3
%Shuff42 = shufflevector <2 x i32> zeroinitializer, <2 x i32> %ZE, <2 x i32> <i32 2, i32 undef>
@@ -90,7 +90,7 @@ CF77: ; preds = %CF77, %CF
br i1 %Cmp46, label %CF77, label %CF78
CF78: ; preds = %CF78, %CF83, %CF82, %CF77
- %L47 = load i64* %PC
+ %L47 = load i64, i64* %PC
store i8 -123, i8* %0
%E48 = extractelement <4 x i64> zeroinitializer, i32 3
%Shuff49 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 6, i32 undef>
@@ -105,7 +105,7 @@ CF83: ; preds = %CF78
br i1 %Cmp54, label %CF78, label %CF82
CF82: ; preds = %CF83
- %L55 = load i64* %PC
+ %L55 = load i64, i64* %PC
store i64 %L32, i64* %PC
%E56 = extractelement <2 x i16> %Shuff7, i32 1
%Shuff57 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 6, i32 0>
@@ -114,7 +114,7 @@ CF82: ; preds = %CF83
%FC = sitofp i64 498254 to double
%Sl60 = select i1 false, i64 %E6, i64 -1
%Cmp61 = icmp sgt <4 x i32> %Shuff27, %I43
- %L62 = load i64* %PC
+ %L62 = load i64, i64* %PC
store i64 %Sl9, i64* %PC
%E63 = extractelement <2 x i32> %ZE, i32 0
%Shuff64 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff13, <4 x i32> <i32 1, i32 3, i32 undef, i32 7>
@@ -126,7 +126,7 @@ CF82: ; preds = %CF83
CF81: ; preds = %CF82
%Cmp69 = icmp ne <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, %B36
- %L70 = load i8* %0
+ %L70 = load i8, i8* %0
store i64 %L55, i64* %PC
%E71 = extractelement <4 x i32> %Shuff49, i32 1
%Shuff72 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff34, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s3997499501.ll b/test/CodeGen/Mips/msa/llvm-stress-s3997499501.ll
index bef75f3645c8..181f72abd378 100644
--- a/test/CodeGen/Mips/msa/llvm-stress-s3997499501.ll
+++ b/test/CodeGen/Mips/msa/llvm-stress-s3997499501.ll
@@ -14,7 +14,7 @@ BB:
%A2 = alloca float
%A1 = alloca double
%A = alloca double
- %L = load i8* %0
+ %L = load i8, i8* %0
store i8 97, i8* %0
%E = extractelement <16 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, i32 14
%Shuff = shufflevector <2 x i1> zeroinitializer, <2 x i1> zeroinitializer, <2 x i32> <i32 1, i32 3>
@@ -22,7 +22,7 @@ BB:
%Tr = trunc <1 x i64> zeroinitializer to <1 x i8>
%Sl = select i1 false, double* %A1, double* %A
%Cmp = icmp ne <2 x i64> zeroinitializer, zeroinitializer
- %L5 = load double* %Sl
+ %L5 = load double, double* %Sl
store float -4.374162e+06, float* %A2
%E6 = extractelement <4 x i64> zeroinitializer, i32 3
%Shuff7 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %I, <4 x i32> <i32 2, i32 4, i32 6, i32 undef>
@@ -34,7 +34,7 @@ BB:
br label %CF72
CF72: ; preds = %CF72, %CF80, %CF78, %BB
- %L11 = load double* %Sl
+ %L11 = load double, double* %Sl
store double 0.000000e+00, double* %Sl
%E12 = extractelement <2 x i1> zeroinitializer, i32 0
br i1 %E12, label %CF72, label %CF80
@@ -49,7 +49,7 @@ CF80: ; preds = %CF72
br i1 %Cmp17, label %CF72, label %CF77
CF77: ; preds = %CF77, %CF80
- %L18 = load double* %Sl
+ %L18 = load double, double* %Sl
store double 0.000000e+00, double* %Sl
%E19 = extractelement <2 x i1> zeroinitializer, i32 0
br i1 %E19, label %CF77, label %CF78
@@ -60,7 +60,7 @@ CF78: ; preds = %CF77
%B22 = sdiv <4 x i64> %Shuff7, zeroinitializer
%FC = uitofp i8 97 to double
%Sl23 = select i1 %Cmp10, <2 x i1> zeroinitializer, <2 x i1> zeroinitializer
- %L24 = load double* %Sl
+ %L24 = load double, double* %Sl
store float %Sl16, float* %PC
%E25 = extractelement <2 x i1> %Shuff, i32 1
br i1 %E25, label %CF72, label %CF76
@@ -71,7 +71,7 @@ CF76: ; preds = %CF78
%B28 = mul <4 x i64> %I27, zeroinitializer
%ZE = zext <8 x i1> zeroinitializer to <8 x i64>
%Sl29 = select i1 %Cmp17, float -4.374162e+06, float -4.374162e+06
- %L30 = load i8* %0
+ %L30 = load i8, i8* %0
store double %L5, double* %Sl
%E31 = extractelement <8 x i1> zeroinitializer, i32 5
br label %CF
@@ -85,7 +85,7 @@ CF: ; preds = %CF, %CF81, %CF76
br i1 %Cmp36, label %CF, label %CF74
CF74: ; preds = %CF74, %CF
- %L37 = load float* %PC
+ %L37 = load float, float* %PC
store double 0.000000e+00, double* %Sl
%E38 = extractelement <2 x i1> %Sl23, i32 1
br i1 %E38, label %CF74, label %CF75
@@ -95,7 +95,7 @@ CF75: ; preds = %CF75, %CF82, %CF74
%I40 = insertelement <4 x i64> zeroinitializer, i64 %4, i32 2
%Sl41 = select i1 %Cmp10, i32 0, i32 %3
%Cmp42 = icmp ne <1 x i64> zeroinitializer, zeroinitializer
- %L43 = load double* %Sl
+ %L43 = load double, double* %Sl
store i64 %4, i64* %2
%E44 = extractelement <2 x i1> %Shuff20, i32 1
br i1 %E44, label %CF75, label %CF82
@@ -109,7 +109,7 @@ CF82: ; preds = %CF75
br i1 %Cmp49, label %CF75, label %CF81
CF81: ; preds = %CF82
- %L50 = load i8* %0
+ %L50 = load i8, i8* %0
store double %L43, double* %Sl
%E51 = extractelement <4 x i64> %Shuff7, i32 3
%Shuff52 = shufflevector <4 x float> %BC34, <4 x float> %BC34, <4 x i32> <i32 2, i32 4, i32 6, i32 0>
@@ -117,7 +117,7 @@ CF81: ; preds = %CF82
%B54 = fdiv double %L24, %L43
%BC55 = bitcast <4 x i64> zeroinitializer to <4 x double>
%Sl56 = select i1 false, i8 %5, i8 97
- %L57 = load i8* %0
+ %L57 = load i8, i8* %0
store i8 %L50, i8* %0
%E58 = extractelement <2 x i1> %Shuff20, i32 1
br i1 %E58, label %CF, label %CF73
@@ -129,7 +129,7 @@ CF73: ; preds = %CF73, %CF81
%PC62 = bitcast double* %A3 to float*
%Sl63 = select i1 %Cmp10, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer
%Cmp64 = icmp ne <2 x i1> %Cmp, %Shuff
- %L65 = load double* %A1
+ %L65 = load double, double* %A1
store float -4.374162e+06, float* %PC62
%E66 = extractelement <8 x i1> %I21, i32 3
br i1 %E66, label %CF73, label %CF79
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s525530439.ll b/test/CodeGen/Mips/msa/llvm-stress-s525530439.ll
index 697871df797d..c0bc90563484 100644
--- a/test/CodeGen/Mips/msa/llvm-stress-s525530439.ll
+++ b/test/CodeGen/Mips/msa/llvm-stress-s525530439.ll
@@ -14,7 +14,7 @@ BB:
%A2 = alloca <1 x double>
%A1 = alloca <8 x double>
%A = alloca i64
- %L = load i8* %0
+ %L = load i8, i8* %0
store i64 33695, i64* %A
%E = extractelement <4 x i32> zeroinitializer, i32 3
%Shuff = shufflevector <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 2, i32 0>
@@ -22,7 +22,7 @@ BB:
%B = lshr <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
%ZE = fpext float 0x3B64A2B880000000 to double
%Sl = select i1 true, i16 -1, i16 -11642
- %L5 = load i8* %0
+ %L5 = load i8, i8* %0
store i8 0, i8* %0
%E6 = extractelement <4 x i32> zeroinitializer, i32 2
%Shuff7 = shufflevector <8 x i1> zeroinitializer, <8 x i1> zeroinitializer, <8 x i32> <i32 undef, i32 7, i32 9, i32 11, i32 13, i32 15, i32 1, i32 undef>
@@ -31,7 +31,7 @@ BB:
%BC = bitcast <2 x i32> <i32 -1, i32 -1> to <2 x float>
%Sl10 = select i1 true, i32* %1, i32* %1
%Cmp = icmp sge <8 x i64> zeroinitializer, zeroinitializer
- %L11 = load i32* %Sl10
+ %L11 = load i32, i32* %Sl10
store <1 x double> zeroinitializer, <1 x double>* %A2
%E12 = extractelement <4 x i16> zeroinitializer, i32 0
%Shuff13 = shufflevector <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i32> undef
@@ -43,7 +43,7 @@ BB:
br label %CF75
CF75: ; preds = %CF75, %BB
- %L19 = load i32* %Sl10
+ %L19 = load i32, i32* %Sl10
store i32 %L11, i32* %Sl10
%E20 = extractelement <4 x i32> zeroinitializer, i32 1
%Shuff21 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %I8, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
@@ -55,7 +55,7 @@ CF75: ; preds = %CF75, %BB
br i1 %Cmp26, label %CF75, label %CF76
CF76: ; preds = %CF75
- %L27 = load i32* %Sl10
+ %L27 = load i32, i32* %Sl10
store i32 439732, i32* %Sl10
%E28 = extractelement <4 x i32> %Shuff21, i32 3
%Shuff29 = shufflevector <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32> <i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 0>
@@ -65,7 +65,7 @@ CF76: ; preds = %CF75
br label %CF74
CF74: ; preds = %CF74, %CF80, %CF78, %CF76
- %L33 = load i64* %2
+ %L33 = load i64, i64* %2
store i32 71140, i32* %Sl10
%E34 = extractelement <4 x i32> zeroinitializer, i32 1
%Shuff35 = shufflevector <1 x i16> zeroinitializer, <1 x i16> zeroinitializer, <1 x i32> undef
@@ -76,7 +76,7 @@ CF74: ; preds = %CF74, %CF80, %CF78,
br i1 %Cmp39, label %CF74, label %CF80
CF80: ; preds = %CF74
- %L40 = load i8* %0
+ %L40 = load i8, i8* %0
store i32 0, i32* %Sl10
%E41 = extractelement <8 x i64> zeroinitializer, i32 1
%Shuff42 = shufflevector <1 x i16> %I14, <1 x i16> %I14, <1 x i32> undef
@@ -86,7 +86,7 @@ CF80: ; preds = %CF74
br i1 %Sl44, label %CF74, label %CF78
CF78: ; preds = %CF80
- %L45 = load i32* %Sl10
+ %L45 = load i32, i32* %Sl10
store i8 %L5, i8* %0
%E46 = extractelement <8 x i1> %Shuff7, i32 2
br i1 %E46, label %CF74, label %CF77
@@ -101,7 +101,7 @@ CF77: ; preds = %CF77, %CF78
br i1 %Cmp52, label %CF77, label %CF79
CF79: ; preds = %CF77
- %L53 = load i32* %Sl10
+ %L53 = load i32, i32* %Sl10
store i8 %L40, i8* %0
%E54 = extractelement <4 x i32> zeroinitializer, i32 1
%Shuff55 = shufflevector <4 x i32> %Shuff21, <4 x i32> %I8, <4 x i32> <i32 4, i32 6, i32 undef, i32 2>
@@ -109,7 +109,7 @@ CF79: ; preds = %CF77
%Tr = trunc <1 x i64> %Shuff13 to <1 x i16>
%Sl57 = select i1 %Cmp18, <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 -1, i32 -1>
%Cmp58 = icmp uge <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %I56
- %L59 = load i8* %0
+ %L59 = load i8, i8* %0
store <1 x double> zeroinitializer, <1 x double>* %A2
%E60 = extractelement <4 x i32> zeroinitializer, i32 0
%Shuff61 = shufflevector <4 x i32> %I8, <4 x i32> %I8, <4 x i32> <i32 undef, i32 1, i32 undef, i32 undef>
@@ -121,7 +121,7 @@ CF79: ; preds = %CF77
br label %CF
CF: ; preds = %CF79
- %L66 = load i32* %Sl10
+ %L66 = load i32, i32* %Sl10
store i32 %E6, i32* %PC
%E67 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 2
%Shuff68 = shufflevector <4 x i32> %Sl64, <4 x i32> %I8, <4 x i32> <i32 5, i32 undef, i32 1, i32 undef>
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s997348632.ll b/test/CodeGen/Mips/msa/llvm-stress-s997348632.ll
index dc4200ad4285..a3150e9a67d5 100644
--- a/test/CodeGen/Mips/msa/llvm-stress-s997348632.ll
+++ b/test/CodeGen/Mips/msa/llvm-stress-s997348632.ll
@@ -14,14 +14,14 @@ BB:
%A2 = alloca <4 x i1>
%A1 = alloca <4 x i16>
%A = alloca <2 x i32>
- %L = load i8* %0
+ %L = load i8, i8* %0
store i8 %L, i8* %0
%E = extractelement <4 x i32> zeroinitializer, i32 0
%Shuff = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 undef, i32 1, i32 3, i32 5>
%I = insertelement <2 x i1> zeroinitializer, i1 false, i32 1
%FC = sitofp <4 x i32> zeroinitializer to <4 x double>
%Sl = select i1 false, <4 x i64> %Shuff, <4 x i64> %Shuff
- %L5 = load i8* %0
+ %L5 = load i8, i8* %0
store i8 %5, i8* %0
%E6 = extractelement <1 x i16> zeroinitializer, i32 0
%Shuff7 = shufflevector <2 x i1> %I, <2 x i1> %I, <2 x i32> <i32 1, i32 undef>
@@ -30,7 +30,7 @@ BB:
%FC9 = fptoui float 0x406DB70180000000 to i64
%Sl10 = select i1 false, <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
%Cmp = icmp ult <4 x i64> zeroinitializer, zeroinitializer
- %L11 = load i8* %0
+ %L11 = load i8, i8* %0
store i8 %L, i8* %0
%E12 = extractelement <4 x i64> zeroinitializer, i32 2
%Shuff13 = shufflevector <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> <i32 5, i32 7, i32 undef, i32 3>
@@ -42,7 +42,7 @@ BB:
br label %CF
CF: ; preds = %CF, %CF79, %CF84, %BB
- %L18 = load i8* %0
+ %L18 = load i8, i8* %0
store i8 %L, i8* %0
%E19 = extractelement <4 x i64> %Sl, i32 3
%Shuff20 = shufflevector <2 x i1> %Shuff7, <2 x i1> %I, <2 x i32> <i32 2, i32 0>
@@ -54,7 +54,7 @@ CF: ; preds = %CF, %CF79, %CF84, %
br i1 %Cmp25, label %CF, label %CF79
CF79: ; preds = %CF
- %L26 = load i8* %0
+ %L26 = load i8, i8* %0
store i8 %L26, i8* %0
%E27 = extractelement <1 x i16> zeroinitializer, i32 0
%Shuff28 = shufflevector <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> <i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11>
@@ -65,7 +65,7 @@ CF79: ; preds = %CF
br i1 %Cmp32, label %CF, label %CF78
CF78: ; preds = %CF78, %CF79
- %L33 = load i8* %0
+ %L33 = load i8, i8* %0
store i8 %L, i8* %0
%E34 = extractelement <16 x i32> %Shuff28, i32 1
%Shuff35 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %I21, <4 x i32> <i32 undef, i32 6, i32 0, i32 2>
@@ -76,7 +76,7 @@ CF78: ; preds = %CF78, %CF79
br i1 %Cmp38, label %CF78, label %CF80
CF80: ; preds = %CF80, %CF82, %CF78
- %L39 = load i8* %0
+ %L39 = load i8, i8* %0
store i8 %L, i8* %0
%E40 = extractelement <2 x i1> %Shuff20, i32 1
br i1 %E40, label %CF80, label %CF82
@@ -87,7 +87,7 @@ CF82: ; preds = %CF80
%B43 = sub i32 %E, 0
%Sl44 = select i1 %Cmp32, <16 x i32> %Shuff28, <16 x i32> %Shuff28
%Cmp45 = icmp sgt <4 x i64> zeroinitializer, %I21
- %L46 = load i8* %0
+ %L46 = load i8, i8* %0
store i8 %L11, i8* %0
%E47 = extractelement <8 x i32> %Sl16, i32 4
%Shuff48 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %Shuff7, <2 x i32> <i32 undef, i32 1>
@@ -99,7 +99,7 @@ CF82: ; preds = %CF80
CF81: ; preds = %CF81, %CF82
%Sl52 = select i1 false, float -6.749110e+06, float 0x406DB70180000000
%Cmp53 = icmp uge <2 x i32> <i32 -1, i32 -1>, <i32 -1, i32 -1>
- %L54 = load i8* %0
+ %L54 = load i8, i8* %0
store i8 %L5, i8* %0
%E55 = extractelement <8 x i32> zeroinitializer, i32 7
%Shuff56 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 undef, i32 4, i32 6, i32 0>
@@ -108,7 +108,7 @@ CF81: ; preds = %CF81, %CF82
%FC59 = fptoui <4 x double> %I36 to <4 x i16>
%Sl60 = select i1 %Cmp17, <2 x i1> %I, <2 x i1> %I57
%Cmp61 = icmp ule <8 x i32> %B50, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
- %L62 = load i8* %0
+ %L62 = load i8, i8* %0
store i8 %L33, i8* %0
%E63 = extractelement <4 x i64> %Shuff, i32 2
%Shuff64 = shufflevector <4 x i64> %Shuff56, <4 x i64> %Shuff56, <4 x i32> <i32 5, i32 7, i32 1, i32 undef>
@@ -126,7 +126,7 @@ CF84: ; preds = %CF83
br i1 %Cmp69, label %CF, label %CF77
CF77: ; preds = %CF84
- %L70 = load i8* %0
+ %L70 = load i8, i8* %0
store i8 %L, i8* %0
%E71 = extractelement <4 x i64> %Shuff, i32 0
%Shuff72 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %I, <2 x i32> <i32 3, i32 1>
diff --git a/test/CodeGen/Mips/msa/shuffle.ll b/test/CodeGen/Mips/msa/shuffle.ll
index faeec5d58dd4..aadff7d5a5c8 100644
--- a/test/CodeGen/Mips/msa/shuffle.ll
+++ b/test/CodeGen/Mips/msa/shuffle.ll
@@ -2,9 +2,9 @@
; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
- ; CHECK: vshf_v16i8_0:
+ ; CHECK-LABEL: vshf_v16i8_0:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
@@ -14,13 +14,12 @@ define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind
; CHECK-DAG: st.b [[R3]], 0($4)
ret void
- ; CHECK: .size vshf_v16i8_0
}
define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
- ; CHECK: vshf_v16i8_1:
+ ; CHECK-LABEL: vshf_v16i8_1:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
@@ -28,14 +27,13 @@ define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind
; CHECK-DAG: st.b [[R3]], 0($4)
ret void
- ; CHECK: .size vshf_v16i8_1
}
define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
- ; CHECK: vshf_v16i8_2:
+ ; CHECK-LABEL: vshf_v16i8_2:
- %1 = load <16 x i8>* %a
- %2 = load <16 x i8>* %b
+ %1 = load <16 x i8>, <16 x i8>* %a
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
@@ -45,15 +43,14 @@ define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind
; CHECK-DAG: st.b [[R3]], 0($4)
ret void
- ; CHECK: .size vshf_v16i8_2
}
define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
- ; CHECK: vshf_v16i8_3:
+ ; CHECK-LABEL: vshf_v16i8_3:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 17, i32 24, i32 25, i32 18, i32 19, i32 20, i32 28, i32 19, i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
@@ -65,13 +62,12 @@ define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind
; CHECK-DAG: st.b [[R3]], 0($4)
ret void
- ; CHECK: .size vshf_v16i8_3
}
define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
- ; CHECK: vshf_v16i8_4:
+ ; CHECK-LABEL: vshf_v16i8_4:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17>
; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
@@ -79,13 +75,12 @@ define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind
; CHECK-DAG: st.b [[R3]], 0($4)
ret void
- ; CHECK: .size vshf_v16i8_4
}
define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
- ; CHECK: vshf_v8i16_0:
+ ; CHECK-LABEL: vshf_v8i16_0:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
@@ -95,13 +90,12 @@ define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind
; CHECK-DAG: st.h [[R3]], 0($4)
ret void
- ; CHECK: .size vshf_v8i16_0
}
define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
- ; CHECK: vshf_v8i16_1:
+ ; CHECK-LABEL: vshf_v8i16_1:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
@@ -109,14 +103,13 @@ define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind
; CHECK-DAG: st.h [[R3]], 0($4)
ret void
- ; CHECK: .size vshf_v8i16_1
}
define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
- ; CHECK: vshf_v8i16_2:
+ ; CHECK-LABEL: vshf_v8i16_2:
- %1 = load <8 x i16>* %a
- %2 = load <8 x i16>* %b
+ %1 = load <8 x i16>, <8 x i16>* %a
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
@@ -126,15 +119,14 @@ define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind
; CHECK-DAG: st.h [[R3]], 0($4)
ret void
- ; CHECK: .size vshf_v8i16_2
}
define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
- ; CHECK: vshf_v8i16_3:
+ ; CHECK-LABEL: vshf_v8i16_3:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
@@ -146,13 +138,12 @@ define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind
; CHECK-DAG: st.h [[R3]], 0($4)
ret void
- ; CHECK: .size vshf_v8i16_3
}
define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
- ; CHECK: vshf_v8i16_4:
+ ; CHECK-LABEL: vshf_v8i16_4:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> <i32 1, i32 9, i32 1, i32 9, i32 1, i32 9, i32 1, i32 9>
; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
@@ -160,16 +151,15 @@ define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind
; CHECK-DAG: st.h [[R3]], 0($4)
ret void
- ; CHECK: .size vshf_v8i16_4
}
; Note: v4i32 only has one 4-element set so it's impossible to get a vshf.w
; instruction when using a single vector.
define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
- ; CHECK: vshf_v4i32_0:
+ ; CHECK-LABEL: vshf_v4i32_0:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
@@ -177,28 +167,26 @@ define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind
; CHECK-DAG: st.w [[R3]], 0($4)
ret void
- ; CHECK: .size vshf_v4i32_0
}
define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
- ; CHECK: vshf_v4i32_1:
+ ; CHECK-LABEL: vshf_v4i32_1:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
+ ; CHECK-DAG: splati.w [[R3:\$w[0-9]+]], [[R1]][1]
store <4 x i32> %2, <4 x i32>* %c
; CHECK-DAG: st.w [[R3]], 0($4)
ret void
- ; CHECK: .size vshf_v4i32_1
}
define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
- ; CHECK: vshf_v4i32_2:
+ ; CHECK-LABEL: vshf_v4i32_2:
- %1 = load <4 x i32>* %a
- %2 = load <4 x i32>* %b
+ %1 = load <4 x i32>, <4 x i32>* %a
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 5, i32 6, i32 4>
; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R2]], 36
@@ -206,15 +194,14 @@ define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind
; CHECK-DAG: st.w [[R3]], 0($4)
ret void
- ; CHECK: .size vshf_v4i32_2
}
define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
- ; CHECK: vshf_v4i32_3:
+ ; CHECK-LABEL: vshf_v4i32_3:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 6, i32 4>
; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
@@ -226,27 +213,26 @@ define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind
; CHECK-DAG: st.w [[R3]], 0($4)
ret void
- ; CHECK: .size vshf_v4i32_3
}
define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
- ; CHECK: vshf_v4i32_4:
+ ; CHECK-LABEL: vshf_v4i32_4:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 1>
- ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
+ ; The two operand vectors are the same so element 1 and 5 are equivalent.
+ ; CHECK-DAG: splati.w [[R3:\$w[0-9]+]], [[R1]][1]
store <4 x i32> %2, <4 x i32>* %c
; CHECK-DAG: st.w [[R3]], 0($4)
ret void
- ; CHECK: .size vshf_v4i32_4
}
define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
- ; CHECK: vshf_v2i64_0:
+ ; CHECK-LABEL: vshf_v2i64_0:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
@@ -256,13 +242,12 @@ define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
; CHECK-DAG: st.d [[R3]], 0($4)
ret void
- ; CHECK: .size vshf_v2i64_0
}
define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
- ; CHECK: vshf_v2i64_1:
+ ; CHECK-LABEL: vshf_v2i64_1:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
@@ -270,14 +255,13 @@ define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
; CHECK-DAG: st.d [[R3]], 0($4)
ret void
- ; CHECK: .size vshf_v2i64_1
}
define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
- ; CHECK: vshf_v2i64_2:
+ ; CHECK-LABEL: vshf_v2i64_2:
- %1 = load <2 x i64>* %a
- %2 = load <2 x i64>* %b
+ %1 = load <2 x i64>, <2 x i64>* %a
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 2>
; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
@@ -287,15 +271,14 @@ define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
; CHECK-DAG: st.d [[R3]], 0($4)
ret void
- ; CHECK: .size vshf_v2i64_2
}
define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
- ; CHECK: vshf_v2i64_3:
+ ; CHECK-LABEL: vshf_v2i64_3:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 2>
; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
@@ -307,13 +290,12 @@ define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
; CHECK-DAG: st.d [[R3]], 0($4)
ret void
- ; CHECK: .size vshf_v2i64_3
}
define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
- ; CHECK: vshf_v2i64_4:
+ ; CHECK-LABEL: vshf_v2i64_4:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> <i32 1, i32 3>
; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
@@ -321,13 +303,12 @@ define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
; CHECK-DAG: st.d [[R3]], 0($4)
ret void
- ; CHECK: .size vshf_v2i64_4
}
define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
- ; CHECK: shf_v16i8_0:
+ ; CHECK-LABEL: shf_v16i8_0:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 0, i32 5, i32 7, i32 6, i32 4, i32 9, i32 11, i32 10, i32 8, i32 13, i32 15, i32 14, i32 12>
; CHECK-DAG: shf.b [[R3:\$w[0-9]+]], [[R1]], 45
@@ -335,13 +316,12 @@ define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
; CHECK-DAG: st.b [[R3]], 0($4)
ret void
- ; CHECK: .size shf_v16i8_0
}
define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
- ; CHECK: shf_v8i16_0:
+ ; CHECK-LABEL: shf_v8i16_0:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
; CHECK-DAG: shf.h [[R3:\$w[0-9]+]], [[R1]], 27
@@ -349,13 +329,12 @@ define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK-DAG: st.h [[R3]], 0($4)
ret void
- ; CHECK: .size shf_v8i16_0
}
define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
- ; CHECK: shf_v4i32_0:
+ ; CHECK-LABEL: shf_v4i32_0:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
@@ -363,82 +342,194 @@ define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
; CHECK-DAG: st.w [[R3]], 0($4)
ret void
- ; CHECK: .size shf_v4i32_0
}
; shf.d does not exist
define void @ilvev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
- ; CHECK: ilvev_v16i8_0:
+ ; CHECK-LABEL: ilvev_v16i8_0:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <16 x i8> %1, <16 x i8> %2,
<16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
- ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
store <16 x i8> %3, <16 x i8>* %c
; CHECK-DAG: st.b [[R3]], 0($4)
ret void
- ; CHECK: .size ilvev_v16i8_0
}
define void @ilvev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
- ; CHECK: ilvev_v8i16_0:
+ ; CHECK-LABEL: ilvev_v8i16_0:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
store <8 x i16> %3, <8 x i16>* %c
; CHECK-DAG: st.h [[R3]], 0($4)
ret void
- ; CHECK: .size ilvev_v8i16_0
}
define void @ilvev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
- ; CHECK: ilvev_v4i32_0:
+ ; CHECK-LABEL: ilvev_v4i32_0:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
store <4 x i32> %3, <4 x i32>* %c
; CHECK-DAG: st.w [[R3]], 0($4)
ret void
- ; CHECK: .size ilvev_v4i32_0
}
define void @ilvev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
- ; CHECK: ilvev_v2i64_0:
+ ; CHECK-LABEL: ilvev_v2i64_0:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
- ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
+ store <2 x i64> %3, <2 x i64>* %c
+ ; CHECK-DAG: st.d [[R3]], 0($4)
+
+ ret void
+}
+
+; Interleaving one operand with itself.
+define void @ilvev_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+ ; CHECK-LABEL: ilvev_v16i8_1:
+
+ %1 = load <16 x i8>, <16 x i8>* %a
+ ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <16 x i8>, <16 x i8>* %b
+ %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+ <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
+ ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+ store <16 x i8> %3, <16 x i8>* %c
+ ; CHECK-DAG: st.b [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvev_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+ ; CHECK-LABEL: ilvev_v8i16_1:
+
+ %1 = load <8 x i16>, <8 x i16>* %a
+ ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <8 x i16>, <8 x i16>* %b
+ %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+ ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+ store <8 x i16> %3, <8 x i16>* %c
+ ; CHECK-DAG: st.h [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvev_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+ ; CHECK-LABEL: ilvev_v4i32_1:
+
+ %1 = load <4 x i32>, <4 x i32>* %a
+ ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <4 x i32>, <4 x i32>* %b
+ %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+ ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+ store <4 x i32> %3, <4 x i32>* %c
+ ; CHECK-DAG: st.w [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvev_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+ ; CHECK-LABEL: ilvev_v2i64_1:
+
+ %1 = load <2 x i64>, <2 x i64>* %a
+ ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <2 x i64>, <2 x i64>* %b
+ %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 0>
+ ; ilvev.d with two identical operands is equivalent to splati.d
+ ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][0]
+ store <2 x i64> %3, <2 x i64>* %c
+ ; CHECK-DAG: st.d [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvev_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+ ; CHECK-LABEL: ilvev_v16i8_2:
+
+ %1 = load <16 x i8>, <16 x i8>* %a
+ %2 = load <16 x i8>, <16 x i8>* %b
+ ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+ <16 x i32> <i32 16, i32 16, i32 18, i32 18, i32 20, i32 20, i32 22, i32 22, i32 24, i32 24, i32 26, i32 26, i32 28, i32 28, i32 30, i32 30>
+ ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+ store <16 x i8> %3, <16 x i8>* %c
+ ; CHECK-DAG: st.b [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvev_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+ ; CHECK-LABEL: ilvev_v8i16_2:
+
+ %1 = load <8 x i16>, <8 x i16>* %a
+ %2 = load <8 x i16>, <8 x i16>* %b
+ ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
+ ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+ store <8 x i16> %3, <8 x i16>* %c
+ ; CHECK-DAG: st.h [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvev_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+ ; CHECK-LABEL: ilvev_v4i32_2:
+
+ %1 = load <4 x i32>, <4 x i32>* %a
+ %2 = load <4 x i32>, <4 x i32>* %b
+ ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 4, i32 6, i32 6>
+ ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+ store <4 x i32> %3, <4 x i32>* %c
+ ; CHECK-DAG: st.w [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvev_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+ ; CHECK-LABEL: ilvev_v2i64_2:
+
+ %1 = load <2 x i64>, <2 x i64>* %a
+ %2 = load <2 x i64>, <2 x i64>* %b
+ ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 2, i32 2>
+ ; ilvev.d with two identical operands is equivalent to splati.d
+ ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][0]
store <2 x i64> %3, <2 x i64>* %c
; CHECK-DAG: st.d [[R3]], 0($4)
ret void
- ; CHECK: .size ilvev_v2i64_0
}
define void @ilvod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
- ; CHECK: ilvod_v16i8_0:
+ ; CHECK-LABEL: ilvod_v16i8_0:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <16 x i8> %1, <16 x i8> %2,
<16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
@@ -447,15 +538,14 @@ define void @ilvod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind
; CHECK-DAG: st.b [[R3]], 0($4)
ret void
- ; CHECK: .size ilvod_v16i8_0
}
define void @ilvod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
- ; CHECK: ilvod_v8i16_0:
+ ; CHECK-LABEL: ilvod_v8i16_0:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -463,15 +553,14 @@ define void @ilvod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind
; CHECK-DAG: st.h [[R3]], 0($4)
ret void
- ; CHECK: .size ilvod_v8i16_0
}
define void @ilvod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
- ; CHECK: ilvod_v4i32_0:
+ ; CHECK-LABEL: ilvod_v4i32_0:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -479,15 +568,14 @@ define void @ilvod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind
; CHECK-DAG: st.w [[R3]], 0($4)
ret void
- ; CHECK: .size ilvod_v4i32_0
}
define void @ilvod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
- ; CHECK: ilvod_v2i64_0:
+ ; CHECK-LABEL: ilvod_v2i64_0:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -495,262 +583,710 @@ define void @ilvod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
; CHECK-DAG: st.d [[R3]], 0($4)
ret void
- ; CHECK: .size ilvod_v2i64_0
}
-define void @ilvl_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
- ; CHECK: ilvl_v16i8_0:
+define void @ilvod_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+ ; CHECK-LABEL: ilvod_v16i8_1:
+
+ %1 = load <16 x i8>, <16 x i8>* %a
+ ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <16 x i8>, <16 x i8>* %b
+ %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+ <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
+ ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+ store <16 x i8> %3, <16 x i8>* %c
+ ; CHECK-DAG: st.b [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvod_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+ ; CHECK-LABEL: ilvod_v8i16_1:
+
+ %1 = load <8 x i16>, <8 x i16>* %a
+ ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <8 x i16>, <8 x i16>* %b
+ %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+ ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+ store <8 x i16> %3, <8 x i16>* %c
+ ; CHECK-DAG: st.h [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvod_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+ ; CHECK-LABEL: ilvod_v4i32_1:
+
+ %1 = load <4 x i32>, <4 x i32>* %a
+ ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <4 x i32>, <4 x i32>* %b
+ %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+ ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+ store <4 x i32> %3, <4 x i32>* %c
+ ; CHECK-DAG: st.w [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvod_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+ ; CHECK-LABEL: ilvod_v2i64_1:
+
+ %1 = load <2 x i64>, <2 x i64>* %a
+ ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <2 x i64>, <2 x i64>* %b
+ %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 1>
+ ; ilvod.d with two identical operands is equivalent to splati.d
+ ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
+ store <2 x i64> %3, <2 x i64>* %c
+ ; CHECK-DAG: st.d [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvod_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+ ; CHECK-LABEL: ilvod_v16i8_2:
+
+ %1 = load <16 x i8>, <16 x i8>* %a
+ %2 = load <16 x i8>, <16 x i8>* %b
+ ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+ <16 x i32> <i32 17, i32 17, i32 19, i32 19, i32 21, i32 21, i32 23, i32 23, i32 25, i32 25, i32 27, i32 27, i32 29, i32 29, i32 31, i32 31>
+ ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+ store <16 x i8> %3, <16 x i8>* %c
+ ; CHECK-DAG: st.b [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvod_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+ ; CHECK-LABEL: ilvod_v8i16_2:
+
+ %1 = load <8 x i16>, <8 x i16>* %a
+ %2 = load <8 x i16>, <8 x i16>* %b
+ ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
+ ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+ store <8 x i16> %3, <8 x i16>* %c
+ ; CHECK-DAG: st.h [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvod_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+ ; CHECK-LABEL: ilvod_v4i32_2:
+
+ %1 = load <4 x i32>, <4 x i32>* %a
+ %2 = load <4 x i32>, <4 x i32>* %b
+ ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 5, i32 5, i32 7, i32 7>
+ ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+ store <4 x i32> %3, <4 x i32>* %c
+ ; CHECK-DAG: st.w [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvod_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+ ; CHECK-LABEL: ilvod_v2i64_2:
+
+ %1 = load <2 x i64>, <2 x i64>* %a
+ %2 = load <2 x i64>, <2 x i64>* %b
+ ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 3>
+ ; ilvod.d with two identical operands is equivalent to splati.d
+ ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][1]
+ store <2 x i64> %3, <2 x i64>* %c
+ ; CHECK-DAG: st.d [[R3]], 0($4)
- %1 = load <16 x i8>* %a
+ ret void
+}
+
+define void @ilvr_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+ ; CHECK-LABEL: ilvr_v16i8_0:
+
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <16 x i8> %1, <16 x i8> %2,
<16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
- ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
store <16 x i8> %3, <16 x i8>* %c
; CHECK-DAG: st.b [[R3]], 0($4)
ret void
- ; CHECK: .size ilvl_v16i8_0
}
-define void @ilvl_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
- ; CHECK: ilvl_v8i16_0:
+define void @ilvr_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+ ; CHECK-LABEL: ilvr_v8i16_0:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
- ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
store <8 x i16> %3, <8 x i16>* %c
; CHECK-DAG: st.h [[R3]], 0($4)
ret void
- ; CHECK: .size ilvl_v8i16_0
}
-define void @ilvl_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
- ; CHECK: ilvl_v4i32_0:
+define void @ilvr_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+ ; CHECK-LABEL: ilvr_v4i32_0:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
store <4 x i32> %3, <4 x i32>* %c
; CHECK-DAG: st.w [[R3]], 0($4)
ret void
- ; CHECK: .size ilvl_v4i32_0
}
-define void @ilvl_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
- ; CHECK: ilvl_v2i64_0:
+define void @ilvr_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+ ; CHECK-LABEL: ilvr_v2i64_0:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
- ; ilvl.d and ilvev.d are equivalent for v2i64
- ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; ilvr.d and ilvev.d are equivalent for v2i64
+ ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
store <2 x i64> %3, <2 x i64>* %c
; CHECK-DAG: st.d [[R3]], 0($4)
ret void
- ; CHECK: .size ilvl_v2i64_0
}
-define void @ilvr_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
- ; CHECK: ilvr_v16i8_0:
+define void @ilvr_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+ ; CHECK-LABEL: ilvr_v16i8_1:
+
+ %1 = load <16 x i8>, <16 x i8>* %a
+ %2 = load <16 x i8>, <16 x i8>* %b
+ ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+ <16 x i32> <i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23>
+ ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+ store <16 x i8> %3, <16 x i8>* %c
+ ; CHECK-DAG: st.b [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvr_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+ ; CHECK-LABEL: ilvr_v8i16_1:
+
+ %1 = load <8 x i16>, <8 x i16>* %a
+ %2 = load <8 x i16>, <8 x i16>* %b
+ ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11>
+ ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+ store <8 x i16> %3, <8 x i16>* %c
+ ; CHECK-DAG: st.h [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvr_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+ ; CHECK-LABEL: ilvr_v4i32_1:
+
+ %1 = load <4 x i32>, <4 x i32>* %a
+ %2 = load <4 x i32>, <4 x i32>* %b
+ ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 4, i32 5, i32 5>
+ ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+ store <4 x i32> %3, <4 x i32>* %c
+ ; CHECK-DAG: st.w [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvr_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+ ; CHECK-LABEL: ilvr_v2i64_1:
+
+ %1 = load <2 x i64>, <2 x i64>* %a
+ %2 = load <2 x i64>, <2 x i64>* %b
+ ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 2, i32 2>
+ ; ilvr.d and splati.d are equivalent for v2i64
+ ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][0]
+ store <2 x i64> %3, <2 x i64>* %c
+ ; CHECK-DAG: st.d [[R3]], 0($4)
- %1 = load <16 x i8>* %a
+ ret void
+}
+
+define void @ilvr_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+ ; CHECK-LABEL: ilvr_v16i8_2:
+
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
+ %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+ <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
+ ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+ store <16 x i8> %3, <16 x i8>* %c
+ ; CHECK-DAG: st.b [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvr_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+ ; CHECK-LABEL: ilvr_v8i16_2:
+
+ %1 = load <8 x i16>, <8 x i16>* %a
+ ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <8 x i16>, <8 x i16>* %b
+ %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
+ ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+ store <8 x i16> %3, <8 x i16>* %c
+ ; CHECK-DAG: st.h [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvr_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+ ; CHECK-LABEL: ilvr_v4i32_2:
+
+ %1 = load <4 x i32>, <4 x i32>* %a
+ ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <4 x i32>, <4 x i32>* %b
+ %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+ ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+ store <4 x i32> %3, <4 x i32>* %c
+ ; CHECK-DAG: st.w [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvr_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+ ; CHECK-LABEL: ilvr_v2i64_2:
+
+ %1 = load <2 x i64>, <2 x i64>* %a
+ ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <2 x i64>, <2 x i64>* %b
+ %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 0>
+ ; ilvr.d and splati.d are equivalent for v2i64
+ ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][0]
+ store <2 x i64> %3, <2 x i64>* %c
+ ; CHECK-DAG: st.d [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvl_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+ ; CHECK-LABEL: ilvl_v16i8_0:
+
+ %1 = load <16 x i8>, <16 x i8>* %a
+ ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <16 x i8> %1, <16 x i8> %2,
<16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
- ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
store <16 x i8> %3, <16 x i8>* %c
; CHECK-DAG: st.b [[R3]], 0($4)
ret void
- ; CHECK: .size ilvr_v16i8_0
}
-define void @ilvr_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
- ; CHECK: ilvr_v8i16_0:
+define void @ilvl_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+ ; CHECK-LABEL: ilvl_v8i16_0:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
- ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
store <8 x i16> %3, <8 x i16>* %c
; CHECK-DAG: st.h [[R3]], 0($4)
ret void
- ; CHECK: .size ilvr_v8i16_0
}
-define void @ilvr_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
- ; CHECK: ilvr_v4i32_0:
+define void @ilvl_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+ ; CHECK-LABEL: ilvl_v4i32_0:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
store <4 x i32> %3, <4 x i32>* %c
; CHECK-DAG: st.w [[R3]], 0($4)
ret void
- ; CHECK: .size ilvr_v4i32_0
}
-define void @ilvr_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
- ; CHECK: ilvr_v2i64_0:
+define void @ilvl_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+ ; CHECK-LABEL: ilvl_v2i64_0:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
- ; ilvr.d and ilvod.d are equivalent for v2i64
+ ; ilvl.d and ilvod.d are equivalent for v2i64
; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
store <2 x i64> %3, <2 x i64>* %c
; CHECK-DAG: st.d [[R3]], 0($4)
ret void
- ; CHECK: .size ilvr_v2i64_0
+}
+
+define void @ilvl_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+ ; CHECK-LABEL: ilvl_v16i8_1:
+
+ %1 = load <16 x i8>, <16 x i8>* %a
+ %2 = load <16 x i8>, <16 x i8>* %b
+ ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+ <16 x i32> <i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31>
+ ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+ store <16 x i8> %3, <16 x i8>* %c
+ ; CHECK-DAG: st.b [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvl_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+ ; CHECK-LABEL: ilvl_v8i16_1:
+
+ %1 = load <8 x i16>, <8 x i16>* %a
+ %2 = load <8 x i16>, <8 x i16>* %b
+ ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
+ ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+ store <8 x i16> %3, <8 x i16>* %c
+ ; CHECK-DAG: st.h [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvl_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+ ; CHECK-LABEL: ilvl_v4i32_1:
+
+ %1 = load <4 x i32>, <4 x i32>* %a
+ %2 = load <4 x i32>, <4 x i32>* %b
+ ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 6, i32 6, i32 7, i32 7>
+ ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+ store <4 x i32> %3, <4 x i32>* %c
+ ; CHECK-DAG: st.w [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvl_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+ ; CHECK-LABEL: ilvl_v2i64_1:
+
+ %1 = load <2 x i64>, <2 x i64>* %a
+ %2 = load <2 x i64>, <2 x i64>* %b
+ ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 3>
+ ; ilvl.d and splati.d are equivalent for v2i64
+ ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][1]
+ store <2 x i64> %3, <2 x i64>* %c
+ ; CHECK-DAG: st.d [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvl_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+ ; CHECK-LABEL: ilvl_v16i8_2:
+
+ %1 = load <16 x i8>, <16 x i8>* %a
+ ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <16 x i8>, <16 x i8>* %b
+ %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+ <16 x i32> <i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
+ ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+ store <16 x i8> %3, <16 x i8>* %c
+ ; CHECK-DAG: st.b [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvl_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+ ; CHECK-LABEL: ilvl_v8i16_2:
+
+ %1 = load <8 x i16>, <8 x i16>* %a
+ ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <8 x i16>, <8 x i16>* %b
+ %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
+ ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+ store <8 x i16> %3, <8 x i16>* %c
+ ; CHECK-DAG: st.h [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvl_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+ ; CHECK-LABEL: ilvl_v4i32_2:
+
+ %1 = load <4 x i32>, <4 x i32>* %a
+ ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <4 x i32>, <4 x i32>* %b
+ %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
+ ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+ store <4 x i32> %3, <4 x i32>* %c
+ ; CHECK-DAG: st.w [[R3]], 0($4)
+
+ ret void
+}
+
+define void @ilvl_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+ ; CHECK-LABEL: ilvl_v2i64_2:
+
+ %1 = load <2 x i64>, <2 x i64>* %a
+ ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <2 x i64>, <2 x i64>* %b
+ %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 1>
+ ; ilvl.d and splati.d are equivalent for v2i64
+ ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
+ store <2 x i64> %3, <2 x i64>* %c
+ ; CHECK-DAG: st.d [[R3]], 0($4)
+
+ ret void
}
define void @pckev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
- ; CHECK: pckev_v16i8_0:
+ ; CHECK-LABEL: pckev_v16i8_0:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <16 x i8> %1, <16 x i8> %2,
<16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
- ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
store <16 x i8> %3, <16 x i8>* %c
; CHECK-DAG: st.b [[R3]], 0($4)
ret void
- ; CHECK: .size pckev_v16i8_0
}
define void @pckev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
- ; CHECK: pckev_v8i16_0:
+ ; CHECK-LABEL: pckev_v8i16_0:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
- ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
store <8 x i16> %3, <8 x i16>* %c
; CHECK-DAG: st.h [[R3]], 0($4)
ret void
- ; CHECK: .size pckev_v8i16_0
}
define void @pckev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
- ; CHECK: pckev_v4i32_0:
+ ; CHECK-LABEL: pckev_v4i32_0:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
- ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
store <4 x i32> %3, <4 x i32>* %c
; CHECK-DAG: st.w [[R3]], 0($4)
ret void
- ; CHECK: .size pckev_v4i32_0
}
define void @pckev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
- ; CHECK: pckev_v2i64_0:
+ ; CHECK-LABEL: pckev_v2i64_0:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
; pckev.d and ilvev.d are equivalent for v2i64
- ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
+ store <2 x i64> %3, <2 x i64>* %c
+ ; CHECK-DAG: st.d [[R3]], 0($4)
+
+ ret void
+}
+
+define void @pckev_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+ ; CHECK-LABEL: pckev_v16i8_1:
+
+ %1 = load <16 x i8>, <16 x i8>* %a
+ %2 = load <16 x i8>, <16 x i8>* %b
+ ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+ <16 x i32> <i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+ ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+ store <16 x i8> %3, <16 x i8>* %c
+ ; CHECK-DAG: st.b [[R3]], 0($4)
+
+ ret void
+}
+
+define void @pckev_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+ ; CHECK-LABEL: pckev_v8i16_1:
+
+ %1 = load <8 x i16>, <8 x i16>* %a
+ %2 = load <8 x i16>, <8 x i16>* %b
+ ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 8, i32 10, i32 12, i32 14>
+ ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+ store <8 x i16> %3, <8 x i16>* %c
+ ; CHECK-DAG: st.h [[R3]], 0($4)
+
+ ret void
+}
+
+define void @pckev_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+ ; CHECK-LABEL: pckev_v4i32_1:
+
+ %1 = load <4 x i32>, <4 x i32>* %a
+ %2 = load <4 x i32>, <4 x i32>* %b
+ ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 6, i32 4, i32 6>
+ ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+ store <4 x i32> %3, <4 x i32>* %c
+ ; CHECK-DAG: st.w [[R3]], 0($4)
+
+ ret void
+}
+
+define void @pckev_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+ ; CHECK-LABEL: pckev_v2i64_1:
+
+ %1 = load <2 x i64>, <2 x i64>* %a
+ %2 = load <2 x i64>, <2 x i64>* %b
+ ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 2, i32 2>
+ ; pckev.d and splati.d are equivalent for v2i64
+ ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][0]
+ store <2 x i64> %3, <2 x i64>* %c
+ ; CHECK-DAG: st.d [[R3]], 0($4)
+
+ ret void
+}
+
+define void @pckev_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+ ; CHECK-LABEL: pckev_v16i8_2:
+
+ %1 = load <16 x i8>, <16 x i8>* %a
+ ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <16 x i8>, <16 x i8>* %b
+ %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+ <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+ ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+ store <16 x i8> %3, <16 x i8>* %c
+ ; CHECK-DAG: st.b [[R3]], 0($4)
+
+ ret void
+}
+
+define void @pckev_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+ ; CHECK-LABEL: pckev_v8i16_2:
+
+ %1 = load <8 x i16>, <8 x i16>* %a
+ ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <8 x i16>, <8 x i16>* %b
+ %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 0, i32 2, i32 4, i32 6>
+ ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+ store <8 x i16> %3, <8 x i16>* %c
+ ; CHECK-DAG: st.h [[R3]], 0($4)
+
+ ret void
+}
+
+define void @pckev_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+ ; CHECK-LABEL: pckev_v4i32_2:
+
+ %1 = load <4 x i32>, <4 x i32>* %a
+ ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <4 x i32>, <4 x i32>* %b
+ %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 0, i32 2>
+ ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+ store <4 x i32> %3, <4 x i32>* %c
+ ; CHECK-DAG: st.w [[R3]], 0($4)
+
+ ret void
+}
+
+define void @pckev_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+ ; CHECK-LABEL: pckev_v2i64_2:
+
+ %1 = load <2 x i64>, <2 x i64>* %a
+ ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <2 x i64>, <2 x i64>* %b
+ %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 0>
+ ; pckev.d and splati.d are equivalent for v2i64
+ ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][0]
store <2 x i64> %3, <2 x i64>* %c
; CHECK-DAG: st.d [[R3]], 0($4)
ret void
- ; CHECK: .size pckev_v2i64_0
}
define void @pckod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
- ; CHECK: pckod_v16i8_0:
+ ; CHECK-LABEL: pckod_v16i8_0:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
- %2 = load <16 x i8>* %b
+ %2 = load <16 x i8>, <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <16 x i8> %1, <16 x i8> %2,
<16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
- ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
store <16 x i8> %3, <16 x i8>* %c
; CHECK-DAG: st.b [[R3]], 0($4)
ret void
- ; CHECK: .size pckod_v16i8_0
}
define void @pckod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
- ; CHECK: pckod_v8i16_0:
+ ; CHECK-LABEL: pckod_v8i16_0:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
- %2 = load <8 x i16>* %b
+ %2 = load <8 x i16>, <8 x i16>* %b
; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
- ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
store <8 x i16> %3, <8 x i16>* %c
; CHECK-DAG: st.h [[R3]], 0($4)
ret void
- ; CHECK: .size pckod_v8i16_0
}
define void @pckod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
- ; CHECK: pckod_v4i32_0:
+ ; CHECK-LABEL: pckod_v4i32_0:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
- %2 = load <4 x i32>* %b
+ %2 = load <4 x i32>, <4 x i32>* %b
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
- ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+ ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
store <4 x i32> %3, <4 x i32>* %c
; CHECK-DAG: st.w [[R3]], 0($4)
ret void
- ; CHECK: .size pckod_v4i32_0
}
define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
- ; CHECK: pckod_v2i64_0:
+ ; CHECK-LABEL: pckod_v2i64_0:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
- %2 = load <2 x i64>* %b
+ %2 = load <2 x i64>, <2 x i64>* %b
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
%3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
; pckod.d and ilvod.d are equivalent for v2i64
@@ -759,13 +1295,128 @@ define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
; CHECK-DAG: st.d [[R3]], 0($4)
ret void
- ; CHECK: .size pckod_v2i64_0
+}
+
+define void @pckod_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+ ; CHECK-LABEL: pckod_v16i8_1:
+
+ %1 = load <16 x i8>, <16 x i8>* %a
+ %2 = load <16 x i8>, <16 x i8>* %b
+ ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+ <16 x i32> <i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+ ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+ store <16 x i8> %3, <16 x i8>* %c
+ ; CHECK-DAG: st.b [[R3]], 0($4)
+
+ ret void
+}
+
+define void @pckod_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+ ; CHECK-LABEL: pckod_v8i16_1:
+
+ %1 = load <8 x i16>, <8 x i16>* %a
+ %2 = load <8 x i16>, <8 x i16>* %b
+ ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 9, i32 11, i32 13, i32 15>
+ ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+ store <8 x i16> %3, <8 x i16>* %c
+ ; CHECK-DAG: st.h [[R3]], 0($4)
+
+ ret void
+}
+
+define void @pckod_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+ ; CHECK-LABEL: pckod_v4i32_1:
+
+ %1 = load <4 x i32>, <4 x i32>* %a
+ %2 = load <4 x i32>, <4 x i32>* %b
+ ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 5, i32 7, i32 5, i32 7>
+ ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
+ store <4 x i32> %3, <4 x i32>* %c
+ ; CHECK-DAG: st.w [[R3]], 0($4)
+
+ ret void
+}
+
+define void @pckod_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+ ; CHECK-LABEL: pckod_v2i64_1:
+
+ %1 = load <2 x i64>, <2 x i64>* %a
+ %2 = load <2 x i64>, <2 x i64>* %b
+ ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+ %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 3>
+ ; pckod.d and splati.d are equivalent for v2i64
+ ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][1]
+ store <2 x i64> %3, <2 x i64>* %c
+ ; CHECK-DAG: st.d [[R3]], 0($4)
+
+ ret void
+}
+
+define void @pckod_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+ ; CHECK-LABEL: pckod_v16i8_2:
+
+ %1 = load <16 x i8>, <16 x i8>* %a
+ ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <16 x i8>, <16 x i8>* %b
+ %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
+ <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+ store <16 x i8> %3, <16 x i8>* %c
+ ; CHECK-DAG: st.b [[R3]], 0($4)
+
+ ret void
+}
+
+define void @pckod_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+ ; CHECK-LABEL: pckod_v8i16_2:
+
+ %1 = load <8 x i16>, <8 x i16>* %a
+ ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <8 x i16>, <8 x i16>* %b
+ %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 1, i32 3, i32 5, i32 7>
+ ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+ store <8 x i16> %3, <8 x i16>* %c
+ ; CHECK-DAG: st.h [[R3]], 0($4)
+
+ ret void
+}
+
+define void @pckod_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+ ; CHECK-LABEL: pckod_v4i32_2:
+
+ %1 = load <4 x i32>, <4 x i32>* %a
+ ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <4 x i32>, <4 x i32>* %b
+ %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 1, i32 3>
+ ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
+ store <4 x i32> %3, <4 x i32>* %c
+ ; CHECK-DAG: st.w [[R3]], 0($4)
+
+ ret void
+}
+
+define void @pckod_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+ ; CHECK-LABEL: pckod_v2i64_2:
+
+ %1 = load <2 x i64>, <2 x i64>* %a
+ ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <2 x i64>, <2 x i64>* %b
+ %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 1>
+ ; pckod.d and splati.d are equivalent for v2i64
+ ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
+ store <2 x i64> %3, <2 x i64>* %c
+ ; CHECK-DAG: st.d [[R3]], 0($4)
+
+ ret void
}
define void @splati_v16i8_0(<16 x i8>* %c, <16 x i8>* %a) nounwind {
- ; CHECK: splati_v16i8_0:
+ ; CHECK-LABEL: splati_v16i8_0:
- %1 = load <16 x i8>* %a
+ %1 = load <16 x i8>, <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <16 x i8> %1, <16 x i8> undef,
<16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
@@ -774,13 +1425,12 @@ define void @splati_v16i8_0(<16 x i8>* %c, <16 x i8>* %a) nounwind {
; CHECK-DAG: st.b [[R3]], 0($4)
ret void
- ; CHECK: .size splati_v16i8_0
}
define void @splati_v8i16_0(<8 x i16>* %c, <8 x i16>* %a) nounwind {
- ; CHECK: splati_v8i16_0:
+ ; CHECK-LABEL: splati_v8i16_0:
- %1 = load <8 x i16>* %a
+ %1 = load <8 x i16>, <8 x i16>* %a
; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][4]
@@ -788,28 +1438,25 @@ define void @splati_v8i16_0(<8 x i16>* %c, <8 x i16>* %a) nounwind {
; CHECK-DAG: st.h [[R3]], 0($4)
ret void
- ; CHECK: .size splati_v8i16_0
}
define void @splati_v4i32_0(<4 x i32>* %c, <4 x i32>* %a) nounwind {
- ; CHECK: splati_v4i32_0:
+ ; CHECK-LABEL: splati_v4i32_0:
- %1 = load <4 x i32>* %a
+ %1 = load <4 x i32>, <4 x i32>* %a
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
- ; shf.w and splati.w are equivalent
- ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 255
+ ; CHECK-DAG: splati.w [[R3:\$w[0-9]+]], [[R1]][3]
store <4 x i32> %2, <4 x i32>* %c
; CHECK-DAG: st.w [[R3]], 0($4)
ret void
- ; CHECK: .size splati_v4i32_0
}
define void @splati_v2i64_0(<2 x i64>* %c, <2 x i64>* %a) nounwind {
- ; CHECK: splati_v2i64_0:
+ ; CHECK-LABEL: splati_v2i64_0:
- %1 = load <2 x i64>* %a
+ %1 = load <2 x i64>, <2 x i64>* %a
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
@@ -817,5 +1464,4 @@ define void @splati_v2i64_0(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK-DAG: st.d [[R3]], 0($4)
ret void
- ; CHECK: .size splati_v2i64_0
}
diff --git a/test/CodeGen/Mips/msa/spill.ll b/test/CodeGen/Mips/msa/spill.ll
index 66f896ac4684..8c9a79912351 100644
--- a/test/CodeGen/Mips/msa/spill.ll
+++ b/test/CodeGen/Mips/msa/spill.ll
@@ -6,73 +6,73 @@
define i32 @test_i8(<16 x i8>* %p0, <16 x i8>* %q1) nounwind {
entry:
- %p1 = getelementptr <16 x i8>* %p0, i32 1
- %p2 = getelementptr <16 x i8>* %p0, i32 2
- %p3 = getelementptr <16 x i8>* %p0, i32 3
- %p4 = getelementptr <16 x i8>* %p0, i32 4
- %p5 = getelementptr <16 x i8>* %p0, i32 5
- %p6 = getelementptr <16 x i8>* %p0, i32 6
- %p7 = getelementptr <16 x i8>* %p0, i32 7
- %p8 = getelementptr <16 x i8>* %p0, i32 8
- %p9 = getelementptr <16 x i8>* %p0, i32 9
- %p10 = getelementptr <16 x i8>* %p0, i32 10
- %p11 = getelementptr <16 x i8>* %p0, i32 11
- %p12 = getelementptr <16 x i8>* %p0, i32 12
- %p13 = getelementptr <16 x i8>* %p0, i32 13
- %p14 = getelementptr <16 x i8>* %p0, i32 14
- %p15 = getelementptr <16 x i8>* %p0, i32 15
- %p16 = getelementptr <16 x i8>* %p0, i32 16
- %p17 = getelementptr <16 x i8>* %p0, i32 17
- %p18 = getelementptr <16 x i8>* %p0, i32 18
- %p19 = getelementptr <16 x i8>* %p0, i32 19
- %p20 = getelementptr <16 x i8>* %p0, i32 20
- %p21 = getelementptr <16 x i8>* %p0, i32 21
- %p22 = getelementptr <16 x i8>* %p0, i32 22
- %p23 = getelementptr <16 x i8>* %p0, i32 23
- %p24 = getelementptr <16 x i8>* %p0, i32 24
- %p25 = getelementptr <16 x i8>* %p0, i32 25
- %p26 = getelementptr <16 x i8>* %p0, i32 26
- %p27 = getelementptr <16 x i8>* %p0, i32 27
- %p28 = getelementptr <16 x i8>* %p0, i32 28
- %p29 = getelementptr <16 x i8>* %p0, i32 29
- %p30 = getelementptr <16 x i8>* %p0, i32 30
- %p31 = getelementptr <16 x i8>* %p0, i32 31
- %p32 = getelementptr <16 x i8>* %p0, i32 32
- %p33 = getelementptr <16 x i8>* %p0, i32 33
- %0 = load <16 x i8>* %p0, align 16
- %1 = load <16 x i8>* %p1, align 16
- %2 = load <16 x i8>* %p2, align 16
- %3 = load <16 x i8>* %p3, align 16
- %4 = load <16 x i8>* %p4, align 16
- %5 = load <16 x i8>* %p5, align 16
- %6 = load <16 x i8>* %p6, align 16
- %7 = load <16 x i8>* %p7, align 16
- %8 = load <16 x i8>* %p8, align 16
- %9 = load <16 x i8>* %p9, align 16
- %10 = load <16 x i8>* %p10, align 16
- %11 = load <16 x i8>* %p11, align 16
- %12 = load <16 x i8>* %p12, align 16
- %13 = load <16 x i8>* %p13, align 16
- %14 = load <16 x i8>* %p14, align 16
- %15 = load <16 x i8>* %p15, align 16
- %16 = load <16 x i8>* %p16, align 16
- %17 = load <16 x i8>* %p17, align 16
- %18 = load <16 x i8>* %p18, align 16
- %19 = load <16 x i8>* %p19, align 16
- %20 = load <16 x i8>* %p20, align 16
- %21 = load <16 x i8>* %p21, align 16
- %22 = load <16 x i8>* %p22, align 16
- %23 = load <16 x i8>* %p23, align 16
- %24 = load <16 x i8>* %p24, align 16
- %25 = load <16 x i8>* %p25, align 16
- %26 = load <16 x i8>* %p26, align 16
- %27 = load <16 x i8>* %p27, align 16
- %28 = load <16 x i8>* %p28, align 16
- %29 = load <16 x i8>* %p29, align 16
- %30 = load <16 x i8>* %p30, align 16
- %31 = load <16 x i8>* %p31, align 16
- %32 = load <16 x i8>* %p32, align 16
- %33 = load <16 x i8>* %p33, align 16
+ %p1 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 1
+ %p2 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 2
+ %p3 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 3
+ %p4 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 4
+ %p5 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 5
+ %p6 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 6
+ %p7 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 7
+ %p8 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 8
+ %p9 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 9
+ %p10 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 10
+ %p11 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 11
+ %p12 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 12
+ %p13 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 13
+ %p14 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 14
+ %p15 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 15
+ %p16 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 16
+ %p17 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 17
+ %p18 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 18
+ %p19 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 19
+ %p20 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 20
+ %p21 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 21
+ %p22 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 22
+ %p23 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 23
+ %p24 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 24
+ %p25 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 25
+ %p26 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 26
+ %p27 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 27
+ %p28 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 28
+ %p29 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 29
+ %p30 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 30
+ %p31 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 31
+ %p32 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 32
+ %p33 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 33
+ %0 = load <16 x i8>, <16 x i8>* %p0, align 16
+ %1 = load <16 x i8>, <16 x i8>* %p1, align 16
+ %2 = load <16 x i8>, <16 x i8>* %p2, align 16
+ %3 = load <16 x i8>, <16 x i8>* %p3, align 16
+ %4 = load <16 x i8>, <16 x i8>* %p4, align 16
+ %5 = load <16 x i8>, <16 x i8>* %p5, align 16
+ %6 = load <16 x i8>, <16 x i8>* %p6, align 16
+ %7 = load <16 x i8>, <16 x i8>* %p7, align 16
+ %8 = load <16 x i8>, <16 x i8>* %p8, align 16
+ %9 = load <16 x i8>, <16 x i8>* %p9, align 16
+ %10 = load <16 x i8>, <16 x i8>* %p10, align 16
+ %11 = load <16 x i8>, <16 x i8>* %p11, align 16
+ %12 = load <16 x i8>, <16 x i8>* %p12, align 16
+ %13 = load <16 x i8>, <16 x i8>* %p13, align 16
+ %14 = load <16 x i8>, <16 x i8>* %p14, align 16
+ %15 = load <16 x i8>, <16 x i8>* %p15, align 16
+ %16 = load <16 x i8>, <16 x i8>* %p16, align 16
+ %17 = load <16 x i8>, <16 x i8>* %p17, align 16
+ %18 = load <16 x i8>, <16 x i8>* %p18, align 16
+ %19 = load <16 x i8>, <16 x i8>* %p19, align 16
+ %20 = load <16 x i8>, <16 x i8>* %p20, align 16
+ %21 = load <16 x i8>, <16 x i8>* %p21, align 16
+ %22 = load <16 x i8>, <16 x i8>* %p22, align 16
+ %23 = load <16 x i8>, <16 x i8>* %p23, align 16
+ %24 = load <16 x i8>, <16 x i8>* %p24, align 16
+ %25 = load <16 x i8>, <16 x i8>* %p25, align 16
+ %26 = load <16 x i8>, <16 x i8>* %p26, align 16
+ %27 = load <16 x i8>, <16 x i8>* %p27, align 16
+ %28 = load <16 x i8>, <16 x i8>* %p28, align 16
+ %29 = load <16 x i8>, <16 x i8>* %p29, align 16
+ %30 = load <16 x i8>, <16 x i8>* %p30, align 16
+ %31 = load <16 x i8>, <16 x i8>* %p31, align 16
+ %32 = load <16 x i8>, <16 x i8>* %p32, align 16
+ %33 = load <16 x i8>, <16 x i8>* %p33, align 16
%r1 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %1)
%r2 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r1, <16 x i8> %2)
%r3 = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r2, <16 x i8> %3)
@@ -155,73 +155,73 @@ declare i32 @llvm.mips.copy.s.b(<16 x i8>, i32) nounwind
define i32 @test_i16(<8 x i16>* %p0, <8 x i16>* %q1) nounwind {
entry:
- %p1 = getelementptr <8 x i16>* %p0, i32 1
- %p2 = getelementptr <8 x i16>* %p0, i32 2
- %p3 = getelementptr <8 x i16>* %p0, i32 3
- %p4 = getelementptr <8 x i16>* %p0, i32 4
- %p5 = getelementptr <8 x i16>* %p0, i32 5
- %p6 = getelementptr <8 x i16>* %p0, i32 6
- %p7 = getelementptr <8 x i16>* %p0, i32 7
- %p8 = getelementptr <8 x i16>* %p0, i32 8
- %p9 = getelementptr <8 x i16>* %p0, i32 9
- %p10 = getelementptr <8 x i16>* %p0, i32 10
- %p11 = getelementptr <8 x i16>* %p0, i32 11
- %p12 = getelementptr <8 x i16>* %p0, i32 12
- %p13 = getelementptr <8 x i16>* %p0, i32 13
- %p14 = getelementptr <8 x i16>* %p0, i32 14
- %p15 = getelementptr <8 x i16>* %p0, i32 15
- %p16 = getelementptr <8 x i16>* %p0, i32 16
- %p17 = getelementptr <8 x i16>* %p0, i32 17
- %p18 = getelementptr <8 x i16>* %p0, i32 18
- %p19 = getelementptr <8 x i16>* %p0, i32 19
- %p20 = getelementptr <8 x i16>* %p0, i32 20
- %p21 = getelementptr <8 x i16>* %p0, i32 21
- %p22 = getelementptr <8 x i16>* %p0, i32 22
- %p23 = getelementptr <8 x i16>* %p0, i32 23
- %p24 = getelementptr <8 x i16>* %p0, i32 24
- %p25 = getelementptr <8 x i16>* %p0, i32 25
- %p26 = getelementptr <8 x i16>* %p0, i32 26
- %p27 = getelementptr <8 x i16>* %p0, i32 27
- %p28 = getelementptr <8 x i16>* %p0, i32 28
- %p29 = getelementptr <8 x i16>* %p0, i32 29
- %p30 = getelementptr <8 x i16>* %p0, i32 30
- %p31 = getelementptr <8 x i16>* %p0, i32 31
- %p32 = getelementptr <8 x i16>* %p0, i32 32
- %p33 = getelementptr <8 x i16>* %p0, i32 33
- %0 = load <8 x i16>* %p0, align 16
- %1 = load <8 x i16>* %p1, align 16
- %2 = load <8 x i16>* %p2, align 16
- %3 = load <8 x i16>* %p3, align 16
- %4 = load <8 x i16>* %p4, align 16
- %5 = load <8 x i16>* %p5, align 16
- %6 = load <8 x i16>* %p6, align 16
- %7 = load <8 x i16>* %p7, align 16
- %8 = load <8 x i16>* %p8, align 16
- %9 = load <8 x i16>* %p9, align 16
- %10 = load <8 x i16>* %p10, align 16
- %11 = load <8 x i16>* %p11, align 16
- %12 = load <8 x i16>* %p12, align 16
- %13 = load <8 x i16>* %p13, align 16
- %14 = load <8 x i16>* %p14, align 16
- %15 = load <8 x i16>* %p15, align 16
- %16 = load <8 x i16>* %p16, align 16
- %17 = load <8 x i16>* %p17, align 16
- %18 = load <8 x i16>* %p18, align 16
- %19 = load <8 x i16>* %p19, align 16
- %20 = load <8 x i16>* %p20, align 16
- %21 = load <8 x i16>* %p21, align 16
- %22 = load <8 x i16>* %p22, align 16
- %23 = load <8 x i16>* %p23, align 16
- %24 = load <8 x i16>* %p24, align 16
- %25 = load <8 x i16>* %p25, align 16
- %26 = load <8 x i16>* %p26, align 16
- %27 = load <8 x i16>* %p27, align 16
- %28 = load <8 x i16>* %p28, align 16
- %29 = load <8 x i16>* %p29, align 16
- %30 = load <8 x i16>* %p30, align 16
- %31 = load <8 x i16>* %p31, align 16
- %32 = load <8 x i16>* %p32, align 16
- %33 = load <8 x i16>* %p33, align 16
+ %p1 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 1
+ %p2 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 2
+ %p3 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 3
+ %p4 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 4
+ %p5 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 5
+ %p6 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 6
+ %p7 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 7
+ %p8 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 8
+ %p9 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 9
+ %p10 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 10
+ %p11 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 11
+ %p12 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 12
+ %p13 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 13
+ %p14 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 14
+ %p15 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 15
+ %p16 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 16
+ %p17 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 17
+ %p18 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 18
+ %p19 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 19
+ %p20 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 20
+ %p21 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 21
+ %p22 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 22
+ %p23 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 23
+ %p24 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 24
+ %p25 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 25
+ %p26 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 26
+ %p27 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 27
+ %p28 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 28
+ %p29 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 29
+ %p30 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 30
+ %p31 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 31
+ %p32 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 32
+ %p33 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 33
+ %0 = load <8 x i16>, <8 x i16>* %p0, align 16
+ %1 = load <8 x i16>, <8 x i16>* %p1, align 16
+ %2 = load <8 x i16>, <8 x i16>* %p2, align 16
+ %3 = load <8 x i16>, <8 x i16>* %p3, align 16
+ %4 = load <8 x i16>, <8 x i16>* %p4, align 16
+ %5 = load <8 x i16>, <8 x i16>* %p5, align 16
+ %6 = load <8 x i16>, <8 x i16>* %p6, align 16
+ %7 = load <8 x i16>, <8 x i16>* %p7, align 16
+ %8 = load <8 x i16>, <8 x i16>* %p8, align 16
+ %9 = load <8 x i16>, <8 x i16>* %p9, align 16
+ %10 = load <8 x i16>, <8 x i16>* %p10, align 16
+ %11 = load <8 x i16>, <8 x i16>* %p11, align 16
+ %12 = load <8 x i16>, <8 x i16>* %p12, align 16
+ %13 = load <8 x i16>, <8 x i16>* %p13, align 16
+ %14 = load <8 x i16>, <8 x i16>* %p14, align 16
+ %15 = load <8 x i16>, <8 x i16>* %p15, align 16
+ %16 = load <8 x i16>, <8 x i16>* %p16, align 16
+ %17 = load <8 x i16>, <8 x i16>* %p17, align 16
+ %18 = load <8 x i16>, <8 x i16>* %p18, align 16
+ %19 = load <8 x i16>, <8 x i16>* %p19, align 16
+ %20 = load <8 x i16>, <8 x i16>* %p20, align 16
+ %21 = load <8 x i16>, <8 x i16>* %p21, align 16
+ %22 = load <8 x i16>, <8 x i16>* %p22, align 16
+ %23 = load <8 x i16>, <8 x i16>* %p23, align 16
+ %24 = load <8 x i16>, <8 x i16>* %p24, align 16
+ %25 = load <8 x i16>, <8 x i16>* %p25, align 16
+ %26 = load <8 x i16>, <8 x i16>* %p26, align 16
+ %27 = load <8 x i16>, <8 x i16>* %p27, align 16
+ %28 = load <8 x i16>, <8 x i16>* %p28, align 16
+ %29 = load <8 x i16>, <8 x i16>* %p29, align 16
+ %30 = load <8 x i16>, <8 x i16>* %p30, align 16
+ %31 = load <8 x i16>, <8 x i16>* %p31, align 16
+ %32 = load <8 x i16>, <8 x i16>* %p32, align 16
+ %33 = load <8 x i16>, <8 x i16>* %p33, align 16
%r1 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %1)
%r2 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r1, <8 x i16> %2)
%r3 = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r2, <8 x i16> %3)
@@ -304,73 +304,73 @@ declare i32 @llvm.mips.copy.s.h(<8 x i16>, i32) nounwind
define i32 @test_i32(<4 x i32>* %p0, <4 x i32>* %q1) nounwind {
entry:
- %p1 = getelementptr <4 x i32>* %p0, i32 1
- %p2 = getelementptr <4 x i32>* %p0, i32 2
- %p3 = getelementptr <4 x i32>* %p0, i32 3
- %p4 = getelementptr <4 x i32>* %p0, i32 4
- %p5 = getelementptr <4 x i32>* %p0, i32 5
- %p6 = getelementptr <4 x i32>* %p0, i32 6
- %p7 = getelementptr <4 x i32>* %p0, i32 7
- %p8 = getelementptr <4 x i32>* %p0, i32 8
- %p9 = getelementptr <4 x i32>* %p0, i32 9
- %p10 = getelementptr <4 x i32>* %p0, i32 10
- %p11 = getelementptr <4 x i32>* %p0, i32 11
- %p12 = getelementptr <4 x i32>* %p0, i32 12
- %p13 = getelementptr <4 x i32>* %p0, i32 13
- %p14 = getelementptr <4 x i32>* %p0, i32 14
- %p15 = getelementptr <4 x i32>* %p0, i32 15
- %p16 = getelementptr <4 x i32>* %p0, i32 16
- %p17 = getelementptr <4 x i32>* %p0, i32 17
- %p18 = getelementptr <4 x i32>* %p0, i32 18
- %p19 = getelementptr <4 x i32>* %p0, i32 19
- %p20 = getelementptr <4 x i32>* %p0, i32 20
- %p21 = getelementptr <4 x i32>* %p0, i32 21
- %p22 = getelementptr <4 x i32>* %p0, i32 22
- %p23 = getelementptr <4 x i32>* %p0, i32 23
- %p24 = getelementptr <4 x i32>* %p0, i32 24
- %p25 = getelementptr <4 x i32>* %p0, i32 25
- %p26 = getelementptr <4 x i32>* %p0, i32 26
- %p27 = getelementptr <4 x i32>* %p0, i32 27
- %p28 = getelementptr <4 x i32>* %p0, i32 28
- %p29 = getelementptr <4 x i32>* %p0, i32 29
- %p30 = getelementptr <4 x i32>* %p0, i32 30
- %p31 = getelementptr <4 x i32>* %p0, i32 31
- %p32 = getelementptr <4 x i32>* %p0, i32 32
- %p33 = getelementptr <4 x i32>* %p0, i32 33
- %0 = load <4 x i32>* %p0, align 16
- %1 = load <4 x i32>* %p1, align 16
- %2 = load <4 x i32>* %p2, align 16
- %3 = load <4 x i32>* %p3, align 16
- %4 = load <4 x i32>* %p4, align 16
- %5 = load <4 x i32>* %p5, align 16
- %6 = load <4 x i32>* %p6, align 16
- %7 = load <4 x i32>* %p7, align 16
- %8 = load <4 x i32>* %p8, align 16
- %9 = load <4 x i32>* %p9, align 16
- %10 = load <4 x i32>* %p10, align 16
- %11 = load <4 x i32>* %p11, align 16
- %12 = load <4 x i32>* %p12, align 16
- %13 = load <4 x i32>* %p13, align 16
- %14 = load <4 x i32>* %p14, align 16
- %15 = load <4 x i32>* %p15, align 16
- %16 = load <4 x i32>* %p16, align 16
- %17 = load <4 x i32>* %p17, align 16
- %18 = load <4 x i32>* %p18, align 16
- %19 = load <4 x i32>* %p19, align 16
- %20 = load <4 x i32>* %p20, align 16
- %21 = load <4 x i32>* %p21, align 16
- %22 = load <4 x i32>* %p22, align 16
- %23 = load <4 x i32>* %p23, align 16
- %24 = load <4 x i32>* %p24, align 16
- %25 = load <4 x i32>* %p25, align 16
- %26 = load <4 x i32>* %p26, align 16
- %27 = load <4 x i32>* %p27, align 16
- %28 = load <4 x i32>* %p28, align 16
- %29 = load <4 x i32>* %p29, align 16
- %30 = load <4 x i32>* %p30, align 16
- %31 = load <4 x i32>* %p31, align 16
- %32 = load <4 x i32>* %p32, align 16
- %33 = load <4 x i32>* %p33, align 16
+ %p1 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 1
+ %p2 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 2
+ %p3 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 3
+ %p4 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 4
+ %p5 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 5
+ %p6 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 6
+ %p7 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 7
+ %p8 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 8
+ %p9 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 9
+ %p10 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 10
+ %p11 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 11
+ %p12 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 12
+ %p13 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 13
+ %p14 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 14
+ %p15 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 15
+ %p16 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 16
+ %p17 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 17
+ %p18 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 18
+ %p19 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 19
+ %p20 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 20
+ %p21 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 21
+ %p22 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 22
+ %p23 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 23
+ %p24 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 24
+ %p25 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 25
+ %p26 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 26
+ %p27 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 27
+ %p28 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 28
+ %p29 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 29
+ %p30 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 30
+ %p31 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 31
+ %p32 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 32
+ %p33 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 33
+ %0 = load <4 x i32>, <4 x i32>* %p0, align 16
+ %1 = load <4 x i32>, <4 x i32>* %p1, align 16
+ %2 = load <4 x i32>, <4 x i32>* %p2, align 16
+ %3 = load <4 x i32>, <4 x i32>* %p3, align 16
+ %4 = load <4 x i32>, <4 x i32>* %p4, align 16
+ %5 = load <4 x i32>, <4 x i32>* %p5, align 16
+ %6 = load <4 x i32>, <4 x i32>* %p6, align 16
+ %7 = load <4 x i32>, <4 x i32>* %p7, align 16
+ %8 = load <4 x i32>, <4 x i32>* %p8, align 16
+ %9 = load <4 x i32>, <4 x i32>* %p9, align 16
+ %10 = load <4 x i32>, <4 x i32>* %p10, align 16
+ %11 = load <4 x i32>, <4 x i32>* %p11, align 16
+ %12 = load <4 x i32>, <4 x i32>* %p12, align 16
+ %13 = load <4 x i32>, <4 x i32>* %p13, align 16
+ %14 = load <4 x i32>, <4 x i32>* %p14, align 16
+ %15 = load <4 x i32>, <4 x i32>* %p15, align 16
+ %16 = load <4 x i32>, <4 x i32>* %p16, align 16
+ %17 = load <4 x i32>, <4 x i32>* %p17, align 16
+ %18 = load <4 x i32>, <4 x i32>* %p18, align 16
+ %19 = load <4 x i32>, <4 x i32>* %p19, align 16
+ %20 = load <4 x i32>, <4 x i32>* %p20, align 16
+ %21 = load <4 x i32>, <4 x i32>* %p21, align 16
+ %22 = load <4 x i32>, <4 x i32>* %p22, align 16
+ %23 = load <4 x i32>, <4 x i32>* %p23, align 16
+ %24 = load <4 x i32>, <4 x i32>* %p24, align 16
+ %25 = load <4 x i32>, <4 x i32>* %p25, align 16
+ %26 = load <4 x i32>, <4 x i32>* %p26, align 16
+ %27 = load <4 x i32>, <4 x i32>* %p27, align 16
+ %28 = load <4 x i32>, <4 x i32>* %p28, align 16
+ %29 = load <4 x i32>, <4 x i32>* %p29, align 16
+ %30 = load <4 x i32>, <4 x i32>* %p30, align 16
+ %31 = load <4 x i32>, <4 x i32>* %p31, align 16
+ %32 = load <4 x i32>, <4 x i32>* %p32, align 16
+ %33 = load <4 x i32>, <4 x i32>* %p33, align 16
%r1 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %1)
%r2 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r1, <4 x i32> %2)
%r3 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r2, <4 x i32> %3)
@@ -453,73 +453,73 @@ declare i32 @llvm.mips.copy.s.w(<4 x i32>, i32) nounwind
define i32 @test_i64(<2 x i64>* %p0, <2 x i64>* %q1) nounwind {
entry:
- %p1 = getelementptr <2 x i64>* %p0, i32 1
- %p2 = getelementptr <2 x i64>* %p0, i32 2
- %p3 = getelementptr <2 x i64>* %p0, i32 3
- %p4 = getelementptr <2 x i64>* %p0, i32 4
- %p5 = getelementptr <2 x i64>* %p0, i32 5
- %p6 = getelementptr <2 x i64>* %p0, i32 6
- %p7 = getelementptr <2 x i64>* %p0, i32 7
- %p8 = getelementptr <2 x i64>* %p0, i32 8
- %p9 = getelementptr <2 x i64>* %p0, i32 9
- %p10 = getelementptr <2 x i64>* %p0, i32 10
- %p11 = getelementptr <2 x i64>* %p0, i32 11
- %p12 = getelementptr <2 x i64>* %p0, i32 12
- %p13 = getelementptr <2 x i64>* %p0, i32 13
- %p14 = getelementptr <2 x i64>* %p0, i32 14
- %p15 = getelementptr <2 x i64>* %p0, i32 15
- %p16 = getelementptr <2 x i64>* %p0, i32 16
- %p17 = getelementptr <2 x i64>* %p0, i32 17
- %p18 = getelementptr <2 x i64>* %p0, i32 18
- %p19 = getelementptr <2 x i64>* %p0, i32 19
- %p20 = getelementptr <2 x i64>* %p0, i32 20
- %p21 = getelementptr <2 x i64>* %p0, i32 21
- %p22 = getelementptr <2 x i64>* %p0, i32 22
- %p23 = getelementptr <2 x i64>* %p0, i32 23
- %p24 = getelementptr <2 x i64>* %p0, i32 24
- %p25 = getelementptr <2 x i64>* %p0, i32 25
- %p26 = getelementptr <2 x i64>* %p0, i32 26
- %p27 = getelementptr <2 x i64>* %p0, i32 27
- %p28 = getelementptr <2 x i64>* %p0, i32 28
- %p29 = getelementptr <2 x i64>* %p0, i32 29
- %p30 = getelementptr <2 x i64>* %p0, i32 30
- %p31 = getelementptr <2 x i64>* %p0, i32 31
- %p32 = getelementptr <2 x i64>* %p0, i32 32
- %p33 = getelementptr <2 x i64>* %p0, i32 33
- %0 = load <2 x i64>* %p0, align 16
- %1 = load <2 x i64>* %p1, align 16
- %2 = load <2 x i64>* %p2, align 16
- %3 = load <2 x i64>* %p3, align 16
- %4 = load <2 x i64>* %p4, align 16
- %5 = load <2 x i64>* %p5, align 16
- %6 = load <2 x i64>* %p6, align 16
- %7 = load <2 x i64>* %p7, align 16
- %8 = load <2 x i64>* %p8, align 16
- %9 = load <2 x i64>* %p9, align 16
- %10 = load <2 x i64>* %p10, align 16
- %11 = load <2 x i64>* %p11, align 16
- %12 = load <2 x i64>* %p12, align 16
- %13 = load <2 x i64>* %p13, align 16
- %14 = load <2 x i64>* %p14, align 16
- %15 = load <2 x i64>* %p15, align 16
- %16 = load <2 x i64>* %p16, align 16
- %17 = load <2 x i64>* %p17, align 16
- %18 = load <2 x i64>* %p18, align 16
- %19 = load <2 x i64>* %p19, align 16
- %20 = load <2 x i64>* %p20, align 16
- %21 = load <2 x i64>* %p21, align 16
- %22 = load <2 x i64>* %p22, align 16
- %23 = load <2 x i64>* %p23, align 16
- %24 = load <2 x i64>* %p24, align 16
- %25 = load <2 x i64>* %p25, align 16
- %26 = load <2 x i64>* %p26, align 16
- %27 = load <2 x i64>* %p27, align 16
- %28 = load <2 x i64>* %p28, align 16
- %29 = load <2 x i64>* %p29, align 16
- %30 = load <2 x i64>* %p30, align 16
- %31 = load <2 x i64>* %p31, align 16
- %32 = load <2 x i64>* %p32, align 16
- %33 = load <2 x i64>* %p33, align 16
+ %p1 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 1
+ %p2 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 2
+ %p3 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 3
+ %p4 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 4
+ %p5 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 5
+ %p6 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 6
+ %p7 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 7
+ %p8 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 8
+ %p9 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 9
+ %p10 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 10
+ %p11 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 11
+ %p12 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 12
+ %p13 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 13
+ %p14 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 14
+ %p15 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 15
+ %p16 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 16
+ %p17 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 17
+ %p18 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 18
+ %p19 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 19
+ %p20 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 20
+ %p21 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 21
+ %p22 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 22
+ %p23 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 23
+ %p24 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 24
+ %p25 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 25
+ %p26 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 26
+ %p27 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 27
+ %p28 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 28
+ %p29 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 29
+ %p30 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 30
+ %p31 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 31
+ %p32 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 32
+ %p33 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 33
+ %0 = load <2 x i64>, <2 x i64>* %p0, align 16
+ %1 = load <2 x i64>, <2 x i64>* %p1, align 16
+ %2 = load <2 x i64>, <2 x i64>* %p2, align 16
+ %3 = load <2 x i64>, <2 x i64>* %p3, align 16
+ %4 = load <2 x i64>, <2 x i64>* %p4, align 16
+ %5 = load <2 x i64>, <2 x i64>* %p5, align 16
+ %6 = load <2 x i64>, <2 x i64>* %p6, align 16
+ %7 = load <2 x i64>, <2 x i64>* %p7, align 16
+ %8 = load <2 x i64>, <2 x i64>* %p8, align 16
+ %9 = load <2 x i64>, <2 x i64>* %p9, align 16
+ %10 = load <2 x i64>, <2 x i64>* %p10, align 16
+ %11 = load <2 x i64>, <2 x i64>* %p11, align 16
+ %12 = load <2 x i64>, <2 x i64>* %p12, align 16
+ %13 = load <2 x i64>, <2 x i64>* %p13, align 16
+ %14 = load <2 x i64>, <2 x i64>* %p14, align 16
+ %15 = load <2 x i64>, <2 x i64>* %p15, align 16
+ %16 = load <2 x i64>, <2 x i64>* %p16, align 16
+ %17 = load <2 x i64>, <2 x i64>* %p17, align 16
+ %18 = load <2 x i64>, <2 x i64>* %p18, align 16
+ %19 = load <2 x i64>, <2 x i64>* %p19, align 16
+ %20 = load <2 x i64>, <2 x i64>* %p20, align 16
+ %21 = load <2 x i64>, <2 x i64>* %p21, align 16
+ %22 = load <2 x i64>, <2 x i64>* %p22, align 16
+ %23 = load <2 x i64>, <2 x i64>* %p23, align 16
+ %24 = load <2 x i64>, <2 x i64>* %p24, align 16
+ %25 = load <2 x i64>, <2 x i64>* %p25, align 16
+ %26 = load <2 x i64>, <2 x i64>* %p26, align 16
+ %27 = load <2 x i64>, <2 x i64>* %p27, align 16
+ %28 = load <2 x i64>, <2 x i64>* %p28, align 16
+ %29 = load <2 x i64>, <2 x i64>* %p29, align 16
+ %30 = load <2 x i64>, <2 x i64>* %p30, align 16
+ %31 = load <2 x i64>, <2 x i64>* %p31, align 16
+ %32 = load <2 x i64>, <2 x i64>* %p32, align 16
+ %33 = load <2 x i64>, <2 x i64>* %p33, align 16
%r1 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %1)
%r2 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r1, <2 x i64> %2)
%r3 = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r2, <2 x i64> %3)
diff --git a/test/CodeGen/Mips/msa/vec.ll b/test/CodeGen/Mips/msa/vec.ll
index d5b97f52fb83..8790923ce727 100644
--- a/test/CodeGen/Mips/msa/vec.ll
+++ b/test/CodeGen/Mips/msa/vec.ll
@@ -9,8 +9,8 @@
define void @llvm_mips_and_v_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_and_v_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_and_v_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_and_v_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_and_v_b_ARG2
%2 = bitcast <16 x i8> %0 to <16 x i8>
%3 = bitcast <16 x i8> %1 to <16 x i8>
%4 = tail call <16 x i8> @llvm.mips.and.v(<16 x i8> %2, <16 x i8> %3)
@@ -32,8 +32,8 @@ entry:
define void @llvm_mips_and_v_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_and_v_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_and_v_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_and_v_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_and_v_h_ARG2
%2 = bitcast <8 x i16> %0 to <16 x i8>
%3 = bitcast <8 x i16> %1 to <16 x i8>
%4 = tail call <16 x i8> @llvm.mips.and.v(<16 x i8> %2, <16 x i8> %3)
@@ -55,8 +55,8 @@ entry:
define void @llvm_mips_and_v_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_and_v_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_and_v_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_and_v_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_and_v_w_ARG2
%2 = bitcast <4 x i32> %0 to <16 x i8>
%3 = bitcast <4 x i32> %1 to <16 x i8>
%4 = tail call <16 x i8> @llvm.mips.and.v(<16 x i8> %2, <16 x i8> %3)
@@ -78,8 +78,8 @@ entry:
define void @llvm_mips_and_v_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_and_v_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_and_v_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_and_v_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_and_v_d_ARG2
%2 = bitcast <2 x i64> %0 to <16 x i8>
%3 = bitcast <2 x i64> %1 to <16 x i8>
%4 = tail call <16 x i8> @llvm.mips.and.v(<16 x i8> %2, <16 x i8> %3)
@@ -97,8 +97,8 @@ entry:
;
define void @and_v_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_and_v_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_and_v_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_and_v_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_and_v_b_ARG2
%2 = and <16 x i8> %0, %1
store <16 x i8> %2, <16 x i8>* @llvm_mips_and_v_b_RES
ret void
@@ -113,8 +113,8 @@ entry:
;
define void @and_v_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_and_v_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_and_v_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_and_v_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_and_v_h_ARG2
%2 = and <8 x i16> %0, %1
store <8 x i16> %2, <8 x i16>* @llvm_mips_and_v_h_RES
ret void
@@ -130,8 +130,8 @@ entry:
define void @and_v_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_and_v_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_and_v_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_and_v_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_and_v_w_ARG2
%2 = and <4 x i32> %0, %1
store <4 x i32> %2, <4 x i32>* @llvm_mips_and_v_w_RES
ret void
@@ -147,8 +147,8 @@ entry:
define void @and_v_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_and_v_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_and_v_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_and_v_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_and_v_d_ARG2
%2 = and <2 x i64> %0, %1
store <2 x i64> %2, <2 x i64>* @llvm_mips_and_v_d_RES
ret void
@@ -168,9 +168,9 @@ entry:
define void @llvm_mips_bmnz_v_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_bmnz_v_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_bmnz_v_b_ARG2
- %2 = load <16 x i8>* @llvm_mips_bmnz_v_b_ARG3
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnz_v_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnz_v_b_ARG2
+ %2 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnz_v_b_ARG3
%3 = bitcast <16 x i8> %0 to <16 x i8>
%4 = bitcast <16 x i8> %1 to <16 x i8>
%5 = bitcast <16 x i8> %2 to <16 x i8>
@@ -198,9 +198,9 @@ entry:
define void @llvm_mips_bmnz_v_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_bmnz_v_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_bmnz_v_h_ARG2
- %2 = load <8 x i16>* @llvm_mips_bmnz_v_h_ARG3
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bmnz_v_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_bmnz_v_h_ARG2
+ %2 = load <8 x i16>, <8 x i16>* @llvm_mips_bmnz_v_h_ARG3
%3 = bitcast <8 x i16> %0 to <16 x i8>
%4 = bitcast <8 x i16> %1 to <16 x i8>
%5 = bitcast <8 x i16> %2 to <16 x i8>
@@ -228,9 +228,9 @@ entry:
define void @llvm_mips_bmnz_v_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_bmnz_v_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_bmnz_v_w_ARG2
- %2 = load <4 x i32>* @llvm_mips_bmnz_v_w_ARG3
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bmnz_v_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_bmnz_v_w_ARG2
+ %2 = load <4 x i32>, <4 x i32>* @llvm_mips_bmnz_v_w_ARG3
%3 = bitcast <4 x i32> %0 to <16 x i8>
%4 = bitcast <4 x i32> %1 to <16 x i8>
%5 = bitcast <4 x i32> %2 to <16 x i8>
@@ -258,9 +258,9 @@ entry:
define void @llvm_mips_bmnz_v_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_bmnz_v_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_bmnz_v_d_ARG2
- %2 = load <2 x i64>* @llvm_mips_bmnz_v_d_ARG3
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bmnz_v_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_bmnz_v_d_ARG2
+ %2 = load <2 x i64>, <2 x i64>* @llvm_mips_bmnz_v_d_ARG3
%3 = bitcast <2 x i64> %0 to <16 x i8>
%4 = bitcast <2 x i64> %1 to <16 x i8>
%5 = bitcast <2 x i64> %2 to <16 x i8>
@@ -288,9 +288,9 @@ entry:
define void @llvm_mips_bmz_v_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_bmz_v_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_bmz_v_b_ARG2
- %2 = load <16 x i8>* @llvm_mips_bmz_v_b_ARG3
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bmz_v_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bmz_v_b_ARG2
+ %2 = load <16 x i8>, <16 x i8>* @llvm_mips_bmz_v_b_ARG3
%3 = bitcast <16 x i8> %0 to <16 x i8>
%4 = bitcast <16 x i8> %1 to <16 x i8>
%5 = bitcast <16 x i8> %2 to <16 x i8>
@@ -319,9 +319,9 @@ entry:
define void @llvm_mips_bmz_v_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_bmz_v_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_bmz_v_h_ARG2
- %2 = load <8 x i16>* @llvm_mips_bmz_v_h_ARG3
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bmz_v_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_bmz_v_h_ARG2
+ %2 = load <8 x i16>, <8 x i16>* @llvm_mips_bmz_v_h_ARG3
%3 = bitcast <8 x i16> %0 to <16 x i8>
%4 = bitcast <8 x i16> %1 to <16 x i8>
%5 = bitcast <8 x i16> %2 to <16 x i8>
@@ -350,9 +350,9 @@ entry:
define void @llvm_mips_bmz_v_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_bmz_v_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_bmz_v_w_ARG2
- %2 = load <4 x i32>* @llvm_mips_bmz_v_w_ARG3
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bmz_v_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_bmz_v_w_ARG2
+ %2 = load <4 x i32>, <4 x i32>* @llvm_mips_bmz_v_w_ARG3
%3 = bitcast <4 x i32> %0 to <16 x i8>
%4 = bitcast <4 x i32> %1 to <16 x i8>
%5 = bitcast <4 x i32> %2 to <16 x i8>
@@ -381,9 +381,9 @@ entry:
define void @llvm_mips_bmz_v_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_bmz_v_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_bmz_v_d_ARG2
- %2 = load <2 x i64>* @llvm_mips_bmz_v_d_ARG3
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bmz_v_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_bmz_v_d_ARG2
+ %2 = load <2 x i64>, <2 x i64>* @llvm_mips_bmz_v_d_ARG3
%3 = bitcast <2 x i64> %0 to <16 x i8>
%4 = bitcast <2 x i64> %1 to <16 x i8>
%5 = bitcast <2 x i64> %2 to <16 x i8>
@@ -412,9 +412,9 @@ entry:
define void @llvm_mips_bsel_v_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_bsel_v_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_bsel_v_b_ARG2
- %2 = load <16 x i8>* @llvm_mips_bsel_v_b_ARG3
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bsel_v_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bsel_v_b_ARG2
+ %2 = load <16 x i8>, <16 x i8>* @llvm_mips_bsel_v_b_ARG3
%3 = bitcast <16 x i8> %0 to <16 x i8>
%4 = bitcast <16 x i8> %1 to <16 x i8>
%5 = bitcast <16 x i8> %2 to <16 x i8>
@@ -443,9 +443,9 @@ entry:
define void @llvm_mips_bsel_v_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_bsel_v_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_bsel_v_h_ARG2
- %2 = load <8 x i16>* @llvm_mips_bsel_v_h_ARG3
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bsel_v_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_bsel_v_h_ARG2
+ %2 = load <8 x i16>, <8 x i16>* @llvm_mips_bsel_v_h_ARG3
%3 = bitcast <8 x i16> %0 to <16 x i8>
%4 = bitcast <8 x i16> %1 to <16 x i8>
%5 = bitcast <8 x i16> %2 to <16 x i8>
@@ -474,9 +474,9 @@ entry:
define void @llvm_mips_bsel_v_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_bsel_v_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_bsel_v_w_ARG2
- %2 = load <4 x i32>* @llvm_mips_bsel_v_w_ARG3
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bsel_v_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_bsel_v_w_ARG2
+ %2 = load <4 x i32>, <4 x i32>* @llvm_mips_bsel_v_w_ARG3
%3 = bitcast <4 x i32> %0 to <16 x i8>
%4 = bitcast <4 x i32> %1 to <16 x i8>
%5 = bitcast <4 x i32> %2 to <16 x i8>
@@ -505,9 +505,9 @@ entry:
define void @llvm_mips_bsel_v_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_bsel_v_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_bsel_v_d_ARG2
- %2 = load <2 x i64>* @llvm_mips_bsel_v_d_ARG3
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bsel_v_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_bsel_v_d_ARG2
+ %2 = load <2 x i64>, <2 x i64>* @llvm_mips_bsel_v_d_ARG3
%3 = bitcast <2 x i64> %0 to <16 x i8>
%4 = bitcast <2 x i64> %1 to <16 x i8>
%5 = bitcast <2 x i64> %2 to <16 x i8>
@@ -535,8 +535,8 @@ entry:
define void @llvm_mips_nor_v_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_nor_v_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_nor_v_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_nor_v_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_nor_v_b_ARG2
%2 = bitcast <16 x i8> %0 to <16 x i8>
%3 = bitcast <16 x i8> %1 to <16 x i8>
%4 = tail call <16 x i8> @llvm.mips.nor.v(<16 x i8> %2, <16 x i8> %3)
@@ -558,8 +558,8 @@ entry:
define void @llvm_mips_nor_v_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_nor_v_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_nor_v_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_nor_v_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_nor_v_h_ARG2
%2 = bitcast <8 x i16> %0 to <16 x i8>
%3 = bitcast <8 x i16> %1 to <16 x i8>
%4 = tail call <16 x i8> @llvm.mips.nor.v(<16 x i8> %2, <16 x i8> %3)
@@ -581,8 +581,8 @@ entry:
define void @llvm_mips_nor_v_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_nor_v_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_nor_v_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_nor_v_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_nor_v_w_ARG2
%2 = bitcast <4 x i32> %0 to <16 x i8>
%3 = bitcast <4 x i32> %1 to <16 x i8>
%4 = tail call <16 x i8> @llvm.mips.nor.v(<16 x i8> %2, <16 x i8> %3)
@@ -604,8 +604,8 @@ entry:
define void @llvm_mips_nor_v_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_nor_v_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_nor_v_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_nor_v_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_nor_v_d_ARG2
%2 = bitcast <2 x i64> %0 to <16 x i8>
%3 = bitcast <2 x i64> %1 to <16 x i8>
%4 = tail call <16 x i8> @llvm.mips.nor.v(<16 x i8> %2, <16 x i8> %3)
@@ -627,8 +627,8 @@ entry:
define void @llvm_mips_or_v_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_or_v_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_or_v_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_or_v_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_or_v_b_ARG2
%2 = bitcast <16 x i8> %0 to <16 x i8>
%3 = bitcast <16 x i8> %1 to <16 x i8>
%4 = tail call <16 x i8> @llvm.mips.or.v(<16 x i8> %2, <16 x i8> %3)
@@ -650,8 +650,8 @@ entry:
define void @llvm_mips_or_v_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_or_v_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_or_v_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_or_v_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_or_v_h_ARG2
%2 = bitcast <8 x i16> %0 to <16 x i8>
%3 = bitcast <8 x i16> %1 to <16 x i8>
%4 = tail call <16 x i8> @llvm.mips.or.v(<16 x i8> %2, <16 x i8> %3)
@@ -673,8 +673,8 @@ entry:
define void @llvm_mips_or_v_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_or_v_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_or_v_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_or_v_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_or_v_w_ARG2
%2 = bitcast <4 x i32> %0 to <16 x i8>
%3 = bitcast <4 x i32> %1 to <16 x i8>
%4 = tail call <16 x i8> @llvm.mips.or.v(<16 x i8> %2, <16 x i8> %3)
@@ -696,8 +696,8 @@ entry:
define void @llvm_mips_or_v_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_or_v_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_or_v_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_or_v_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_or_v_d_ARG2
%2 = bitcast <2 x i64> %0 to <16 x i8>
%3 = bitcast <2 x i64> %1 to <16 x i8>
%4 = tail call <16 x i8> @llvm.mips.or.v(<16 x i8> %2, <16 x i8> %3)
@@ -715,8 +715,8 @@ entry:
;
define void @or_v_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_or_v_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_or_v_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_or_v_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_or_v_b_ARG2
%2 = or <16 x i8> %0, %1
store <16 x i8> %2, <16 x i8>* @llvm_mips_or_v_b_RES
ret void
@@ -731,8 +731,8 @@ entry:
;
define void @or_v_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_or_v_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_or_v_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_or_v_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_or_v_h_ARG2
%2 = or <8 x i16> %0, %1
store <8 x i16> %2, <8 x i16>* @llvm_mips_or_v_h_RES
ret void
@@ -748,8 +748,8 @@ entry:
define void @or_v_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_or_v_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_or_v_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_or_v_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_or_v_w_ARG2
%2 = or <4 x i32> %0, %1
store <4 x i32> %2, <4 x i32>* @llvm_mips_or_v_w_RES
ret void
@@ -765,8 +765,8 @@ entry:
define void @or_v_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_or_v_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_or_v_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_or_v_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_or_v_d_ARG2
%2 = or <2 x i64> %0, %1
store <2 x i64> %2, <2 x i64>* @llvm_mips_or_v_d_RES
ret void
@@ -785,8 +785,8 @@ entry:
define void @llvm_mips_xor_v_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_xor_v_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_xor_v_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_xor_v_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_xor_v_b_ARG2
%2 = bitcast <16 x i8> %0 to <16 x i8>
%3 = bitcast <16 x i8> %1 to <16 x i8>
%4 = tail call <16 x i8> @llvm.mips.xor.v(<16 x i8> %2, <16 x i8> %3)
@@ -808,8 +808,8 @@ entry:
define void @llvm_mips_xor_v_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_xor_v_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_xor_v_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_xor_v_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_xor_v_h_ARG2
%2 = bitcast <8 x i16> %0 to <16 x i8>
%3 = bitcast <8 x i16> %1 to <16 x i8>
%4 = tail call <16 x i8> @llvm.mips.xor.v(<16 x i8> %2, <16 x i8> %3)
@@ -831,8 +831,8 @@ entry:
define void @llvm_mips_xor_v_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_xor_v_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_xor_v_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_xor_v_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_xor_v_w_ARG2
%2 = bitcast <4 x i32> %0 to <16 x i8>
%3 = bitcast <4 x i32> %1 to <16 x i8>
%4 = tail call <16 x i8> @llvm.mips.xor.v(<16 x i8> %2, <16 x i8> %3)
@@ -854,8 +854,8 @@ entry:
define void @llvm_mips_xor_v_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_xor_v_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_xor_v_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_xor_v_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_xor_v_d_ARG2
%2 = bitcast <2 x i64> %0 to <16 x i8>
%3 = bitcast <2 x i64> %1 to <16 x i8>
%4 = tail call <16 x i8> @llvm.mips.xor.v(<16 x i8> %2, <16 x i8> %3)
@@ -873,8 +873,8 @@ entry:
;
define void @xor_v_b_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_xor_v_b_ARG1
- %1 = load <16 x i8>* @llvm_mips_xor_v_b_ARG2
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_xor_v_b_ARG1
+ %1 = load <16 x i8>, <16 x i8>* @llvm_mips_xor_v_b_ARG2
%2 = xor <16 x i8> %0, %1
store <16 x i8> %2, <16 x i8>* @llvm_mips_xor_v_b_RES
ret void
@@ -889,8 +889,8 @@ entry:
;
define void @xor_v_h_test() nounwind {
entry:
- %0 = load <8 x i16>* @llvm_mips_xor_v_h_ARG1
- %1 = load <8 x i16>* @llvm_mips_xor_v_h_ARG2
+ %0 = load <8 x i16>, <8 x i16>* @llvm_mips_xor_v_h_ARG1
+ %1 = load <8 x i16>, <8 x i16>* @llvm_mips_xor_v_h_ARG2
%2 = xor <8 x i16> %0, %1
store <8 x i16> %2, <8 x i16>* @llvm_mips_xor_v_h_RES
ret void
@@ -906,8 +906,8 @@ entry:
define void @xor_v_w_test() nounwind {
entry:
- %0 = load <4 x i32>* @llvm_mips_xor_v_w_ARG1
- %1 = load <4 x i32>* @llvm_mips_xor_v_w_ARG2
+ %0 = load <4 x i32>, <4 x i32>* @llvm_mips_xor_v_w_ARG1
+ %1 = load <4 x i32>, <4 x i32>* @llvm_mips_xor_v_w_ARG2
%2 = xor <4 x i32> %0, %1
store <4 x i32> %2, <4 x i32>* @llvm_mips_xor_v_w_RES
ret void
@@ -923,8 +923,8 @@ entry:
define void @xor_v_d_test() nounwind {
entry:
- %0 = load <2 x i64>* @llvm_mips_xor_v_d_ARG1
- %1 = load <2 x i64>* @llvm_mips_xor_v_d_ARG2
+ %0 = load <2 x i64>, <2 x i64>* @llvm_mips_xor_v_d_ARG1
+ %1 = load <2 x i64>, <2 x i64>* @llvm_mips_xor_v_d_ARG2
%2 = xor <2 x i64> %0, %1
store <2 x i64> %2, <2 x i64>* @llvm_mips_xor_v_d_RES
ret void
diff --git a/test/CodeGen/Mips/msa/vecs10.ll b/test/CodeGen/Mips/msa/vecs10.ll
index e22e0755ef00..f442f7727447 100644
--- a/test/CodeGen/Mips/msa/vecs10.ll
+++ b/test/CodeGen/Mips/msa/vecs10.ll
@@ -7,7 +7,7 @@
define i32 @llvm_mips_bnz_v_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_bnz_v_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bnz_v_ARG1
%1 = tail call i32 @llvm.mips.bnz.v(<16 x i8> %0)
%2 = icmp eq i32 %1, 0
br i1 %2, label %true, label %false
@@ -28,7 +28,7 @@ declare i32 @llvm.mips.bnz.v(<16 x i8>) nounwind
define i32 @llvm_mips_bz_v_test() nounwind {
entry:
- %0 = load <16 x i8>* @llvm_mips_bz_v_ARG1
+ %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bz_v_ARG1
%1 = tail call i32 @llvm.mips.bz.v(<16 x i8> %0)
%2 = icmp eq i32 %1, 0
br i1 %2, label %true, label %false
diff --git a/test/CodeGen/Mips/mul.ll b/test/CodeGen/Mips/mul.ll
index 4ce801b1c9f4..3231f9cac38e 100644
--- a/test/CodeGen/Mips/mul.ll
+++ b/test/CodeGen/Mips/mul.ll
@@ -6,8 +6,8 @@
define void @test() nounwind {
entry:
- %0 = load i32* @iiii, align 4
- %1 = load i32* @jjjj, align 4
+ %0 = load i32, i32* @iiii, align 4
+ %1 = load i32, i32* @jjjj, align 4
%mul = mul nsw i32 %1, %0
; 16: mult ${{[0-9]+}}, ${{[0-9]+}}
; 16: mflo ${{[0-9]+}}
diff --git a/test/CodeGen/Mips/mulll.ll b/test/CodeGen/Mips/mulll.ll
index e37b9197df82..6e5ba647b8bf 100644
--- a/test/CodeGen/Mips/mulll.ll
+++ b/test/CodeGen/Mips/mulll.ll
@@ -6,8 +6,8 @@
define void @test() nounwind {
entry:
- %0 = load i64* @iiii, align 8
- %1 = load i64* @jjjj, align 8
+ %0 = load i64, i64* @iiii, align 8
+ %1 = load i64, i64* @jjjj, align 8
%mul = mul nsw i64 %1, %0
store i64 %mul, i64* @kkkk, align 8
; 16: multu ${{[0-9]+}}, ${{[0-9]+}}
diff --git a/test/CodeGen/Mips/mulull.ll b/test/CodeGen/Mips/mulull.ll
index 4d23c693184b..c1334484fb66 100644
--- a/test/CodeGen/Mips/mulull.ll
+++ b/test/CodeGen/Mips/mulull.ll
@@ -7,8 +7,8 @@
define void @test() nounwind {
entry:
- %0 = load i64* @iiii, align 8
- %1 = load i64* @jjjj, align 8
+ %0 = load i64, i64* @iiii, align 8
+ %1 = load i64, i64* @jjjj, align 8
%mul = mul nsw i64 %1, %0
store i64 %mul, i64* @kkkk, align 8
; 16: multu ${{[0-9]+}}, ${{[0-9]+}}
diff --git a/test/CodeGen/Mips/nacl-align.ll b/test/CodeGen/Mips/nacl-align.ll
index e61b8347760e..ec8f3f06afdf 100644
--- a/test/CodeGen/Mips/nacl-align.ll
+++ b/test/CodeGen/Mips/nacl-align.ll
@@ -67,8 +67,8 @@ default:
define i32 @test2(i32 %i) {
entry:
- %elementptr = getelementptr inbounds [2 x i8*]* @bb_array, i32 0, i32 %i
- %0 = load i8** %elementptr, align 4
+ %elementptr = getelementptr inbounds [2 x i8*], [2 x i8*]* @bb_array, i32 0, i32 %i
+ %0 = load i8*, i8** %elementptr, align 4
indirectbr i8* %0, [label %bb1, label %bb2]
bb1:
diff --git a/test/CodeGen/Mips/nacl-branch-delay.ll b/test/CodeGen/Mips/nacl-branch-delay.ll
index d251eee07526..2927f39a416c 100644
--- a/test/CodeGen/Mips/nacl-branch-delay.ll
+++ b/test/CodeGen/Mips/nacl-branch-delay.ll
@@ -10,7 +10,7 @@ declare void @f2()
define void @test1() {
- %1 = load i32* @x, align 4
+ %1 = load i32, i32* @x, align 4
call void @f1(i32 %1)
ret void
diff --git a/test/CodeGen/Mips/nacl-reserved-regs.ll b/test/CodeGen/Mips/nacl-reserved-regs.ll
index ae21283b1fb7..efe2a663a3c5 100644
--- a/test/CodeGen/Mips/nacl-reserved-regs.ll
+++ b/test/CodeGen/Mips/nacl-reserved-regs.ll
@@ -5,22 +5,22 @@
@var = external global i32
define void @f() {
- %val1 = load volatile i32* @var
- %val2 = load volatile i32* @var
- %val3 = load volatile i32* @var
- %val4 = load volatile i32* @var
- %val5 = load volatile i32* @var
- %val6 = load volatile i32* @var
- %val7 = load volatile i32* @var
- %val8 = load volatile i32* @var
- %val9 = load volatile i32* @var
- %val10 = load volatile i32* @var
- %val11 = load volatile i32* @var
- %val12 = load volatile i32* @var
- %val13 = load volatile i32* @var
- %val14 = load volatile i32* @var
- %val15 = load volatile i32* @var
- %val16 = load volatile i32* @var
+ %val1 = load volatile i32, i32* @var
+ %val2 = load volatile i32, i32* @var
+ %val3 = load volatile i32, i32* @var
+ %val4 = load volatile i32, i32* @var
+ %val5 = load volatile i32, i32* @var
+ %val6 = load volatile i32, i32* @var
+ %val7 = load volatile i32, i32* @var
+ %val8 = load volatile i32, i32* @var
+ %val9 = load volatile i32, i32* @var
+ %val10 = load volatile i32, i32* @var
+ %val11 = load volatile i32, i32* @var
+ %val12 = load volatile i32, i32* @var
+ %val13 = load volatile i32, i32* @var
+ %val14 = load volatile i32, i32* @var
+ %val15 = load volatile i32, i32* @var
+ %val16 = load volatile i32, i32* @var
store volatile i32 %val1, i32* @var
store volatile i32 %val2, i32* @var
store volatile i32 %val3, i32* @var
diff --git a/test/CodeGen/Mips/named-register-n32.ll b/test/CodeGen/Mips/named-register-n32.ll
index 1e5f53ac5c9c..b15e9283fc43 100644
--- a/test/CodeGen/Mips/named-register-n32.ll
+++ b/test/CodeGen/Mips/named-register-n32.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mips64 -relocation-model=static -mattr=+noabicalls,-n64,+n32 < %s | FileCheck %s
+; RUN: llc -march=mips64 -relocation-model=static -mattr=+noabicalls -target-abi n32 < %s | FileCheck %s
define i32* @get_gp() {
entry:
diff --git a/test/CodeGen/Mips/neg1.ll b/test/CodeGen/Mips/neg1.ll
index 281e62621565..36275a2991f6 100644
--- a/test/CodeGen/Mips/neg1.ll
+++ b/test/CodeGen/Mips/neg1.ll
@@ -5,10 +5,10 @@
define i32 @main() nounwind {
entry:
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
%sub = sub nsw i32 0, %0
; 16: neg ${{[0-9]+}}, ${{[0-9]+}}
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %sub)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %sub)
ret i32 0
}
diff --git a/test/CodeGen/Mips/no-odd-spreg-msa.ll b/test/CodeGen/Mips/no-odd-spreg-msa.ll
index 30dd1ff82d73..cf79557cc97f 100644
--- a/test/CodeGen/Mips/no-odd-spreg-msa.ll
+++ b/test/CodeGen/Mips/no-odd-spreg-msa.ll
@@ -8,7 +8,7 @@ entry:
; Force the float into an odd-numbered register using named registers and
; load the vector.
%b = call float asm sideeffect "mov.s $0, $1", "={$f13},{$f12}" (float %a)
- %0 = load volatile <4 x float>* @v4f32
+ %0 = load volatile <4 x float>, <4 x float>* @v4f32
; Clobber all except $f12/$w12 and $f13
;
@@ -42,7 +42,7 @@ entry:
; Force the float into an odd-numbered register using named registers and
; load the vector.
%b = call float asm sideeffect "mov.s $0, $1", "={$f13},{$f12}" (float %a)
- %0 = load volatile <4 x float>* @v4f32
+ %0 = load volatile <4 x float>, <4 x float>* @v4f32
; Clobber all except $f12/$w12 and $f13
;
@@ -73,7 +73,7 @@ entry:
define float @msa_extract_0() {
entry:
- %0 = load volatile <4 x float>* @v4f32
+ %0 = load volatile <4 x float>, <4 x float>* @v4f32
%1 = call <4 x float> asm sideeffect "move.v $0, $1", "={$w13},{$w12}" (<4 x float> %0)
; Clobber all except $f12, and $f13
@@ -101,7 +101,7 @@ entry:
define float @msa_extract_1() {
entry:
- %0 = load volatile <4 x float>* @v4f32
+ %0 = load volatile <4 x float>, <4 x float>* @v4f32
%1 = call <4 x float> asm sideeffect "move.v $0, $1", "={$w13},{$w12}" (<4 x float> %0)
; Clobber all except $f13
diff --git a/test/CodeGen/Mips/nomips16.ll b/test/CodeGen/Mips/nomips16.ll
index 5f7d74e41979..418d8ead2c39 100644
--- a/test/CodeGen/Mips/nomips16.ll
+++ b/test/CodeGen/Mips/nomips16.ll
@@ -6,7 +6,7 @@
; Function Attrs: nounwind
define void @foo() #0 {
entry:
- %0 = load float* @x, align 4
+ %0 = load float, float* @x, align 4
%conv = fpext float %0 to double
%add = fadd double %conv, 1.500000e+00
%conv1 = fptrunc double %add to float
@@ -20,7 +20,7 @@ entry:
; Function Attrs: nounwind
define void @nofoo() #1 {
entry:
- %0 = load float* @x, align 4
+ %0 = load float, float* @x, align 4
%conv = fpext float %0 to double
%add = fadd double %conv, 3.900000e+00
%conv1 = fptrunc double %add to float
diff --git a/test/CodeGen/Mips/not1.ll b/test/CodeGen/Mips/not1.ll
index 2163b236c56f..f5ec5b60e421 100644
--- a/test/CodeGen/Mips/not1.ll
+++ b/test/CodeGen/Mips/not1.ll
@@ -6,10 +6,10 @@
define i32 @main() nounwind {
entry:
- %0 = load i32* @x, align 4
+ %0 = load i32, i32* @x, align 4
%neg = xor i32 %0, -1
; 16: not ${{[0-9]+}}, ${{[0-9]+}}
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 %neg)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %neg)
ret i32 0
}
diff --git a/test/CodeGen/Mips/o32_cc_byval.ll b/test/CodeGen/Mips/o32_cc_byval.ll
index 5db47acc5a85..108c663ab1cd 100644
--- a/test/CodeGen/Mips/o32_cc_byval.ll
+++ b/test/CodeGen/Mips/o32_cc_byval.ll
@@ -30,7 +30,7 @@ entry:
%agg.tmp10 = alloca %struct.S3, align 4
call void @callee1(float 2.000000e+01, %struct.S1* byval bitcast (%0* @f1.s1 to %struct.S1*)) nounwind
call void @callee2(%struct.S2* byval @f1.s2) nounwind
- %tmp11 = getelementptr inbounds %struct.S3* %agg.tmp10, i32 0, i32 0
+ %tmp11 = getelementptr inbounds %struct.S3, %struct.S3* %agg.tmp10, i32 0, i32 0
store i8 11, i8* %tmp11, align 4
call void @callee3(float 2.100000e+01, %struct.S3* byval %agg.tmp10, %struct.S1* byval bitcast (%0* @f1.s1 to %struct.S1*)) nounwind
ret void
@@ -61,18 +61,18 @@ entry:
; CHECK: sw $[[R3]], 16($sp)
; CHECK: mfc1 $6, $f[[F0]]
- %i2 = getelementptr inbounds %struct.S1* %s1, i32 0, i32 5
- %tmp = load i32* %i2, align 4
- %d = getelementptr inbounds %struct.S1* %s1, i32 0, i32 4
- %tmp1 = load double* %d, align 8
- %ll = getelementptr inbounds %struct.S1* %s1, i32 0, i32 3
- %tmp2 = load i64* %ll, align 8
- %i = getelementptr inbounds %struct.S1* %s1, i32 0, i32 2
- %tmp3 = load i32* %i, align 4
- %s = getelementptr inbounds %struct.S1* %s1, i32 0, i32 1
- %tmp4 = load i16* %s, align 2
- %c = getelementptr inbounds %struct.S1* %s1, i32 0, i32 0
- %tmp5 = load i8* %c, align 1
+ %i2 = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 5
+ %tmp = load i32, i32* %i2, align 4
+ %d = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 4
+ %tmp1 = load double, double* %d, align 8
+ %ll = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 3
+ %tmp2 = load i64, i64* %ll, align 8
+ %i = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 2
+ %tmp3 = load i32, i32* %i, align 4
+ %s = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 1
+ %tmp4 = load i16, i16* %s, align 2
+ %c = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 0
+ %tmp5 = load i8, i8* %c, align 1
tail call void @callee4(i32 %tmp, double %tmp1, i64 %tmp2, i32 %tmp3, i16 signext %tmp4, i8 signext %tmp5, float %f) nounwind
ret void
}
@@ -90,10 +90,10 @@ entry:
; CHECK: lw $[[R0:[0-9]+]], 60($sp)
; CHECK: sw $[[R0]], 24($sp)
- %arrayidx = getelementptr inbounds %struct.S2* %s2, i32 0, i32 0, i32 0
- %tmp = load i32* %arrayidx, align 4
- %arrayidx2 = getelementptr inbounds %struct.S2* %s2, i32 0, i32 0, i32 3
- %tmp3 = load i32* %arrayidx2, align 4
+ %arrayidx = getelementptr inbounds %struct.S2, %struct.S2* %s2, i32 0, i32 0, i32 0
+ %tmp = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds %struct.S2, %struct.S2* %s2, i32 0, i32 0, i32 3
+ %tmp3 = load i32, i32* %arrayidx2, align 4
tail call void @callee4(i32 %tmp, double 2.000000e+00, i64 3, i32 %tmp3, i16 signext 4, i8 signext 5, float 6.000000e+00) nounwind
ret void
}
@@ -110,12 +110,12 @@ entry:
; CHECK: sw $[[R0]], 32($sp)
; CHECK: sw $[[R1]], 24($sp)
- %i = getelementptr inbounds %struct.S1* %s1, i32 0, i32 2
- %tmp = load i32* %i, align 4
- %i2 = getelementptr inbounds %struct.S1* %s1, i32 0, i32 5
- %tmp1 = load i32* %i2, align 4
- %c = getelementptr inbounds %struct.S3* %s3, i32 0, i32 0
- %tmp2 = load i8* %c, align 1
+ %i = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 2
+ %tmp = load i32, i32* %i, align 4
+ %i2 = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 5
+ %tmp1 = load i32, i32* %i2, align 4
+ %c = getelementptr inbounds %struct.S3, %struct.S3* %s3, i32 0, i32 0
+ %tmp2 = load i8, i8* %c, align 1
tail call void @callee4(i32 %tmp, double 2.000000e+00, i64 3, i32 %tmp1, i16 signext 4, i8 signext %tmp2, float 6.000000e+00) nounwind
ret void
}
diff --git a/test/CodeGen/Mips/o32_cc_vararg.ll b/test/CodeGen/Mips/o32_cc_vararg.ll
index 10972e884ac3..b4597a3214e2 100644
--- a/test/CodeGen/Mips/o32_cc_vararg.ll
+++ b/test/CodeGen/Mips/o32_cc_vararg.ll
@@ -24,7 +24,7 @@ entry:
store i32 %0, i32* %b, align 4
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_end(i8* %ap2)
- %tmp = load i32* %b, align 4
+ %tmp = load i32, i32* %b, align 4
ret i32 %tmp
; CHECK-LABEL: va1:
@@ -50,7 +50,7 @@ entry:
store double %0, double* %b, align 8
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_end(i8* %ap2)
- %tmp = load double* %b, align 8
+ %tmp = load double, double* %b, align 8
ret double %tmp
; CHECK-LABEL: va2:
@@ -78,7 +78,7 @@ entry:
store i32 %0, i32* %b, align 4
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_end(i8* %ap2)
- %tmp = load i32* %b, align 4
+ %tmp = load i32, i32* %b, align 4
ret i32 %tmp
; CHECK-LABEL: va3:
@@ -101,7 +101,7 @@ entry:
store double %0, double* %b, align 8
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_end(i8* %ap2)
- %tmp = load double* %b, align 8
+ %tmp = load double, double* %b, align 8
ret double %tmp
; CHECK-LABEL: va4:
@@ -129,7 +129,7 @@ entry:
store i32 %0, i32* %d, align 4
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_end(i8* %ap2)
- %tmp = load i32* %d, align 4
+ %tmp = load i32, i32* %d, align 4
ret i32 %tmp
; CHECK-LABEL: va5:
@@ -155,7 +155,7 @@ entry:
store double %0, double* %d, align 8
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_end(i8* %ap2)
- %tmp = load double* %d, align 8
+ %tmp = load double, double* %d, align 8
ret double %tmp
; CHECK-LABEL: va6:
@@ -183,7 +183,7 @@ entry:
store i32 %0, i32* %c, align 4
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_end(i8* %ap2)
- %tmp = load i32* %c, align 4
+ %tmp = load i32, i32* %c, align 4
ret i32 %tmp
; CHECK-LABEL: va7:
@@ -206,7 +206,7 @@ entry:
store double %0, double* %c, align 8
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_end(i8* %ap2)
- %tmp = load double* %c, align 8
+ %tmp = load double, double* %c, align 8
ret double %tmp
; CHECK-LABEL: va8:
@@ -232,7 +232,7 @@ entry:
store i32 %0, i32* %d, align 4
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_end(i8* %ap2)
- %tmp = load i32* %d, align 4
+ %tmp = load i32, i32* %d, align 4
ret i32 %tmp
; CHECK-LABEL: va9:
@@ -257,7 +257,7 @@ entry:
store double %0, double* %d, align 8
%ap2 = bitcast i8** %ap to i8*
call void @llvm.va_end(i8* %ap2)
- %tmp = load double* %d, align 8
+ %tmp = load double, double* %d, align 8
ret double %tmp
; CHECK-LABEL: va10:
diff --git a/test/CodeGen/Mips/octeon.ll b/test/CodeGen/Mips/octeon.ll
index 9d82b74f5b7e..499ce3c1ddbf 100644
--- a/test/CodeGen/Mips/octeon.ll
+++ b/test/CodeGen/Mips/octeon.ll
@@ -1,15 +1,14 @@
-; RUN: llc -O1 < %s -march=mips64 -mcpu=octeon | FileCheck %s -check-prefix=OCTEON
-; RUN: llc -O1 < %s -march=mips64 -mcpu=mips64 | FileCheck %s -check-prefix=MIPS64
+; RUN: llc -O1 < %s -march=mips64 -mcpu=octeon | FileCheck %s -check-prefix=ALL -check-prefix=OCTEON
+; RUN: llc -O1 < %s -march=mips64 -mcpu=mips64 | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64
define i64 @addi64(i64 %a, i64 %b) nounwind {
entry:
-; OCTEON-LABEL: addi64:
+; ALL-LABEL: addi64:
; OCTEON: jr $ra
; OCTEON: baddu $2, $4, $5
-; MIPS64-LABEL: addi64:
-; MIPS64: daddu
-; MIPS64: jr
-; MIPS64: andi
+; MIPS64: daddu $[[T0:[0-9]+]], $4, $5
+; MIPS64: jr $ra
+; MIPS64: andi $2, $[[T0]], 255
%add = add i64 %a, %b
%and = and i64 %add, 255
ret i64 %and
@@ -17,28 +16,26 @@ entry:
define i64 @mul(i64 %a, i64 %b) nounwind {
entry:
-; OCTEON-LABEL: mul:
+; ALL-LABEL: mul:
; OCTEON: jr $ra
; OCTEON: dmul $2, $4, $5
-; MIPS64-LABEL: mul:
-; MIPS64: dmult
-; MIPS64: jr
-; MIPS64: mflo
+; MIPS64: dmult $4, $5
+; MIPS64: jr $ra
+; MIPS64: mflo $2
%res = mul i64 %a, %b
ret i64 %res
}
define i64 @cmpeq(i64 %a, i64 %b) nounwind {
entry:
-; OCTEON-LABEL: cmpeq:
+; ALL-LABEL: cmpeq:
; OCTEON: jr $ra
; OCTEON: seq $2, $4, $5
-; MIPS64-LABEL: cmpeq:
-; MIPS64: xor $1, $4, $5
-; MIPS64: sltiu $1, $1, 1
-; MIPS64: dsll $1, $1, 32
+; MIPS64: xor $[[T0:[0-9]+]], $4, $5
+; MIPS64: sltiu $[[T1:[0-9]+]], $[[T0]], 1
+; MIPS64: dsll $[[T2:[0-9]+]], $[[T1]], 32
; MIPS64: jr $ra
-; MIPS64: dsrl $2, $1, 32
+; MIPS64: dsrl $2, $[[T2]], 32
%res = icmp eq i64 %a, %b
%res2 = zext i1 %res to i64
ret i64 %res2
@@ -46,16 +43,15 @@ entry:
define i64 @cmpeqi(i64 %a) nounwind {
entry:
-; OCTEON-LABEL: cmpeqi:
+; ALL-LABEL: cmpeqi:
; OCTEON: jr $ra
; OCTEON: seqi $2, $4, 42
-; MIPS64-LABEL: cmpeqi:
-; MIPS64: daddiu $1, $zero, 42
-; MIPS64: xor $1, $4, $1
-; MIPS64: sltiu $1, $1, 1
-; MIPS64: dsll $1, $1, 32
+; MIPS64: daddiu $[[T0:[0-9]+]], $zero, 42
+; MIPS64: xor $[[T1:[0-9]+]], $4, $[[T0]]
+; MIPS64: sltiu $[[T2:[0-9]+]], $[[T1]], 1
+; MIPS64: dsll $[[T3:[0-9]+]], $[[T2]], 32
; MIPS64: jr $ra
-; MIPS64: dsrl $2, $1, 32
+; MIPS64: dsrl $2, $[[T3]], 32
%res = icmp eq i64 %a, 42
%res2 = zext i1 %res to i64
ret i64 %res2
@@ -63,15 +59,14 @@ entry:
define i64 @cmpne(i64 %a, i64 %b) nounwind {
entry:
-; OCTEON-LABEL: cmpne:
+; ALL-LABEL: cmpne:
; OCTEON: jr $ra
; OCTEON: sne $2, $4, $5
-; MIPS64-LABEL: cmpne:
-; MIPS64: xor $1, $4, $5
-; MIPS64: sltu $1, $zero, $1
-; MIPS64: dsll $1, $1, 32
+; MIPS64: xor $[[T0:[0-9]+]], $4, $5
+; MIPS64: sltu $[[T1:[0-9]+]], $zero, $[[T0]]
+; MIPS64: dsll $[[T2:[0-9]+]], $[[T1]], 32
; MIPS64: jr $ra
-; MIPS64: dsrl $2, $1, 32
+; MIPS64: dsrl $2, $[[T2]], 32
%res = icmp ne i64 %a, %b
%res2 = zext i1 %res to i64
ret i64 %res2
@@ -79,17 +74,84 @@ entry:
define i64 @cmpnei(i64 %a) nounwind {
entry:
-; OCTEON-LABEL: cmpnei:
+; ALL-LABEL: cmpnei:
; OCTEON: jr $ra
; OCTEON: snei $2, $4, 42
-; MIPS64-LABEL: cmpnei:
-; MIPS64: daddiu $1, $zero, 42
-; MIPS64: xor $1, $4, $1
-; MIPS64: sltu $1, $zero, $1
-; MIPS64: dsll $1, $1, 32
+; MIPS64: daddiu $[[T0:[0-9]+]], $zero, 42
+; MIPS64: xor $[[T1:[0-9]+]], $4, $[[T0]]
+; MIPS64: sltu $[[T2:[0-9]+]], $zero, $[[T1]]
+; MIPS64: dsll $[[T3:[0-9]+]], $[[T2]], 32
; MIPS64: jr $ra
-; MIPS64: dsrl $2, $1, 32
+; MIPS64: dsrl $2, $[[T3]], 32
%res = icmp ne i64 %a, 42
%res2 = zext i1 %res to i64
ret i64 %res2
}
+
+define i64 @bbit0(i64 %a) nounwind {
+entry:
+; ALL-LABEL: bbit0:
+; OCTEON: bbit0 $4, 3, $[[BB0:BB[0-9_]+]]
+; MIPS64: andi $[[T0:[0-9]+]], $4, 8
+; MIPS64: bnez $[[T0]], $[[BB0:BB[0-9_]+]]
+ %bit = and i64 %a, 8
+ %res = icmp eq i64 %bit, 0
+ br i1 %res, label %endif, label %if
+if:
+ ret i64 48
+
+endif:
+ ret i64 12
+}
+
+define i64 @bbit032(i64 %a) nounwind {
+entry:
+; ALL-LABEL: bbit032:
+; OCTEON: bbit032 $4, 3, $[[BB0:BB[0-9_]+]]
+; MIPS64: daddiu $[[T0:[0-9]+]], $zero, 1
+; MIPS64: dsll $[[T1:[0-9]+]], $[[T0]], 35
+; MIPS64: and $[[T2:[0-9]+]], $4, $[[T1]]
+; MIPS64: bnez $[[T2]], $[[BB0:BB[0-9_]+]]
+ %bit = and i64 %a, 34359738368
+ %res = icmp eq i64 %bit, 0
+ br i1 %res, label %endif, label %if
+if:
+ ret i64 48
+
+endif:
+ ret i64 12
+}
+
+define i64 @bbit1(i64 %a) nounwind {
+entry:
+; ALL-LABEL: bbit1:
+; OCTEON: bbit1 $4, 3, $[[BB0:BB[0-9_]+]]
+; MIPS64: andi $[[T0:[0-9]+]], $4, 8
+; MIPS64: beqz $[[T0]], $[[BB0:BB[0-9_]+]]
+ %bit = and i64 %a, 8
+ %res = icmp ne i64 %bit, 0
+ br i1 %res, label %endif, label %if
+if:
+ ret i64 48
+
+endif:
+ ret i64 12
+}
+
+define i64 @bbit132(i64 %a) nounwind {
+entry:
+; ALL-LABEL: bbit132:
+; OCTEON: bbit132 $4, 3, $[[BB0:BB[0-9_]+]]
+; MIPS64: daddiu $[[T0:[0-9]+]], $zero, 1
+; MIPS64: dsll $[[T1:[0-9]+]], $[[T0]], 35
+; MIPS64: and $[[T2:[0-9]+]], $4, $[[T1]]
+; MIPS64: beqz $[[T2]], $[[BB0:BB[0-9_]+]]
+ %bit = and i64 %a, 34359738368
+ %res = icmp ne i64 %bit, 0
+ br i1 %res, label %endif, label %if
+if:
+ ret i64 48
+
+endif:
+ ret i64 12
+}
diff --git a/test/CodeGen/Mips/optimize-pic-o0.ll b/test/CodeGen/Mips/optimize-pic-o0.ll
index 554d49e728c7..454bc851484d 100644
--- a/test/CodeGen/Mips/optimize-pic-o0.ll
+++ b/test/CodeGen/Mips/optimize-pic-o0.ll
@@ -10,7 +10,7 @@ entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
- %0 = load i32* %i, align 4
+ %0 = load i32, i32* %i, align 4
%cmp = icmp slt i32 %0, 10
br i1 %cmp, label %for.body, label %for.end
@@ -20,13 +20,13 @@ for.body: ; preds = %for.cond
br label %for.inc
for.inc: ; preds = %for.body
- %1 = load i32* %i, align 4
+ %1 = load i32, i32* %i, align 4
%inc = add nsw i32 %1, 1
store i32 %inc, i32* %i, align 4
br label %for.cond
for.end: ; preds = %for.cond
- %2 = load i32* %retval
+ %2 = load i32, i32* %retval
ret i32 %2
}
diff --git a/test/CodeGen/Mips/or1.ll b/test/CodeGen/Mips/or1.ll
index b1c36961f92b..51b6ebfe8e3b 100644
--- a/test/CodeGen/Mips/or1.ll
+++ b/test/CodeGen/Mips/or1.ll
@@ -6,11 +6,11 @@
define i32 @main() nounwind {
entry:
- %0 = load i32* @x, align 4
- %1 = load i32* @y, align 4
+ %0 = load i32, i32* @x, align 4
+ %1 = load i32, i32* @y, align 4
%or = or i32 %0, %1
; 16: or ${{[0-9]+}}, ${{[0-9]+}}
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 %or)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %or)
ret i32 0
}
diff --git a/test/CodeGen/Mips/prevent-hoisting.ll b/test/CodeGen/Mips/prevent-hoisting.ll
index 210fe3b0f6d8..81b14d7441b3 100644
--- a/test/CodeGen/Mips/prevent-hoisting.ll
+++ b/test/CodeGen/Mips/prevent-hoisting.ll
@@ -46,7 +46,7 @@
define void @readLumaCoeff8x8_CABAC(%struct.img_par* %img, i32 %b8) {
- %1 = load i32* undef, align 4
+ %1 = load i32, i32* undef, align 4
br i1 false, label %2, label %3
; <label>:2 ; preds = %0
@@ -65,7 +65,7 @@ switch.lookup: ; preds = %3
br label %6
; <label>:6 ; preds = %5, %4
- %7 = phi [2 x i8]* [ getelementptr inbounds ([64 x [2 x i8]]* @FIELD_SCAN8x8, i32 0, i32 0), %4 ], [ null, %5 ]
+ %7 = phi [2 x i8]* [ getelementptr inbounds ([64 x [2 x i8]], [64 x [2 x i8]]* @FIELD_SCAN8x8, i32 0, i32 0), %4 ], [ null, %5 ]
br i1 undef, label %switch.lookup6, label %8
switch.lookup6: ; preds = %6
@@ -77,7 +77,7 @@ switch.lookup6: ; preds = %6
; <label>:9 ; preds = %8
%10 = and i32 %b8, 1
%11 = shl nuw nsw i32 %10, 3
- %12 = getelementptr inbounds %struct.Slice* null, i32 0, i32 9
+ %12 = getelementptr inbounds %struct.Slice, %struct.Slice* null, i32 0, i32 9
br i1 undef, label %.preheader, label %.preheader11
.preheader11: ; preds = %21, %9
@@ -92,19 +92,19 @@ switch.lookup6: ; preds = %6
br label %15
; <label>:15 ; preds = %14, %13
- %16 = getelementptr inbounds [0 x [20 x i32]]* @assignSE2partition, i32 0, i32 %1, i32 undef
- %17 = load i32* %16, align 4
- %18 = getelementptr inbounds %struct.datapartition* null, i32 %17, i32 2
- %19 = load i32 (%struct.syntaxelement*, %struct.img_par*, %struct.datapartition*)** %18, align 4
+ %16 = getelementptr inbounds [0 x [20 x i32]], [0 x [20 x i32]]* @assignSE2partition, i32 0, i32 %1, i32 undef
+ %17 = load i32, i32* %16, align 4
+ %18 = getelementptr inbounds %struct.datapartition, %struct.datapartition* null, i32 %17, i32 2
+ %19 = load i32 (%struct.syntaxelement*, %struct.img_par*, %struct.datapartition*)*, i32 (%struct.syntaxelement*, %struct.img_par*, %struct.datapartition*)** %18, align 4
%20 = call i32 %19(%struct.syntaxelement* undef, %struct.img_par* %img, %struct.datapartition* undef)
br i1 false, label %.loopexit, label %21
; <label>:21 ; preds = %15
%22 = add i32 %coef_ctr.013, 1
%23 = add i32 %22, 0
- %24 = getelementptr inbounds [2 x i8]* %7, i32 %23, i32 0
+ %24 = getelementptr inbounds [2 x i8], [2 x i8]* %7, i32 %23, i32 0
%25 = add nsw i32 0, %11
- %26 = getelementptr inbounds %struct.img_par* %img, i32 0, i32 27, i32 undef, i32 %25
+ %26 = getelementptr inbounds %struct.img_par, %struct.img_par* %img, i32 0, i32 27, i32 undef, i32 %25
store i32 0, i32* %26, align 4
%27 = add nsw i32 %k.014, 1
%28 = icmp slt i32 %27, 65
@@ -122,21 +122,21 @@ switch.lookup6: ; preds = %6
br label %31
; <label>:31 ; preds = %30, %29
- %32 = getelementptr inbounds [0 x [20 x i32]]* @assignSE2partition, i32 0, i32 %1, i32 undef
- %33 = load i32* %32, align 4
- %34 = getelementptr inbounds %struct.datapartition* null, i32 %33
+ %32 = getelementptr inbounds [0 x [20 x i32]], [0 x [20 x i32]]* @assignSE2partition, i32 0, i32 %1, i32 undef
+ %33 = load i32, i32* %32, align 4
+ %34 = getelementptr inbounds %struct.datapartition, %struct.datapartition* null, i32 %33
%35 = call i32 undef(%struct.syntaxelement* undef, %struct.img_par* %img, %struct.datapartition* %34)
br i1 false, label %.loopexit, label %36
; <label>:36 ; preds = %31
- %37 = load i32* undef, align 4
+ %37 = load i32, i32* undef, align 4
%38 = add i32 %coef_ctr.29, 1
%39 = add i32 %38, %37
- %40 = getelementptr inbounds [2 x i8]* %7, i32 %39, i32 0
- %41 = load i8* %40, align 1
+ %40 = getelementptr inbounds [2 x i8], [2 x i8]* %7, i32 %39, i32 0
+ %41 = load i8, i8* %40, align 1
%42 = zext i8 %41 to i32
%43 = add nsw i32 %42, %11
- %44 = getelementptr inbounds %struct.img_par* %img, i32 0, i32 27, i32 undef, i32 %43
+ %44 = getelementptr inbounds %struct.img_par, %struct.img_par* %img, i32 0, i32 27, i32 undef, i32 %43
store i32 0, i32* %44, align 4
%45 = add nsw i32 %k.110, 1
%46 = icmp slt i32 %45, 65
diff --git a/test/CodeGen/Mips/private.ll b/test/CodeGen/Mips/private.ll
index 058db0bb977a..5907dbd644ae 100644
--- a/test/CodeGen/Mips/private.ll
+++ b/test/CodeGen/Mips/private.ll
@@ -15,6 +15,6 @@ define i32 @bar() {
; CHECK: lw $[[R0:[0-9]+]], %got($baz)($
; CHECK: lw ${{[0-9]+}}, %lo($baz)($[[R0]])
call void @foo()
- %1 = load i32* @baz, align 4
+ %1 = load i32, i32* @baz, align 4
ret i32 %1
}
diff --git a/test/CodeGen/Mips/ra-allocatable.ll b/test/CodeGen/Mips/ra-allocatable.ll
index afc5cb0c2556..048d4325a411 100644
--- a/test/CodeGen/Mips/ra-allocatable.ll
+++ b/test/CodeGen/Mips/ra-allocatable.ll
@@ -98,186 +98,186 @@ entry:
; CHECK: lw $ra, {{[0-9]+}}($sp) # 4-byte Folded Reload
; CHECK: jr $ra
- %0 = load i32* @a0, align 4
- %1 = load i32** @b0, align 4
+ %0 = load i32, i32* @a0, align 4
+ %1 = load i32*, i32** @b0, align 4
store i32 %0, i32* %1, align 4
- %2 = load i32* @a1, align 4
- %3 = load i32** @b1, align 4
+ %2 = load i32, i32* @a1, align 4
+ %3 = load i32*, i32** @b1, align 4
store i32 %2, i32* %3, align 4
- %4 = load i32* @a2, align 4
- %5 = load i32** @b2, align 4
+ %4 = load i32, i32* @a2, align 4
+ %5 = load i32*, i32** @b2, align 4
store i32 %4, i32* %5, align 4
- %6 = load i32* @a3, align 4
- %7 = load i32** @b3, align 4
+ %6 = load i32, i32* @a3, align 4
+ %7 = load i32*, i32** @b3, align 4
store i32 %6, i32* %7, align 4
- %8 = load i32* @a4, align 4
- %9 = load i32** @b4, align 4
+ %8 = load i32, i32* @a4, align 4
+ %9 = load i32*, i32** @b4, align 4
store i32 %8, i32* %9, align 4
- %10 = load i32* @a5, align 4
- %11 = load i32** @b5, align 4
+ %10 = load i32, i32* @a5, align 4
+ %11 = load i32*, i32** @b5, align 4
store i32 %10, i32* %11, align 4
- %12 = load i32* @a6, align 4
- %13 = load i32** @b6, align 4
+ %12 = load i32, i32* @a6, align 4
+ %13 = load i32*, i32** @b6, align 4
store i32 %12, i32* %13, align 4
- %14 = load i32* @a7, align 4
- %15 = load i32** @b7, align 4
+ %14 = load i32, i32* @a7, align 4
+ %15 = load i32*, i32** @b7, align 4
store i32 %14, i32* %15, align 4
- %16 = load i32* @a8, align 4
- %17 = load i32** @b8, align 4
+ %16 = load i32, i32* @a8, align 4
+ %17 = load i32*, i32** @b8, align 4
store i32 %16, i32* %17, align 4
- %18 = load i32* @a9, align 4
- %19 = load i32** @b9, align 4
+ %18 = load i32, i32* @a9, align 4
+ %19 = load i32*, i32** @b9, align 4
store i32 %18, i32* %19, align 4
- %20 = load i32* @a10, align 4
- %21 = load i32** @b10, align 4
+ %20 = load i32, i32* @a10, align 4
+ %21 = load i32*, i32** @b10, align 4
store i32 %20, i32* %21, align 4
- %22 = load i32* @a11, align 4
- %23 = load i32** @b11, align 4
+ %22 = load i32, i32* @a11, align 4
+ %23 = load i32*, i32** @b11, align 4
store i32 %22, i32* %23, align 4
- %24 = load i32* @a12, align 4
- %25 = load i32** @b12, align 4
+ %24 = load i32, i32* @a12, align 4
+ %25 = load i32*, i32** @b12, align 4
store i32 %24, i32* %25, align 4
- %26 = load i32* @a13, align 4
- %27 = load i32** @b13, align 4
+ %26 = load i32, i32* @a13, align 4
+ %27 = load i32*, i32** @b13, align 4
store i32 %26, i32* %27, align 4
- %28 = load i32* @a14, align 4
- %29 = load i32** @b14, align 4
+ %28 = load i32, i32* @a14, align 4
+ %29 = load i32*, i32** @b14, align 4
store i32 %28, i32* %29, align 4
- %30 = load i32* @a15, align 4
- %31 = load i32** @b15, align 4
+ %30 = load i32, i32* @a15, align 4
+ %31 = load i32*, i32** @b15, align 4
store i32 %30, i32* %31, align 4
- %32 = load i32* @a16, align 4
- %33 = load i32** @b16, align 4
+ %32 = load i32, i32* @a16, align 4
+ %33 = load i32*, i32** @b16, align 4
store i32 %32, i32* %33, align 4
- %34 = load i32* @a17, align 4
- %35 = load i32** @b17, align 4
+ %34 = load i32, i32* @a17, align 4
+ %35 = load i32*, i32** @b17, align 4
store i32 %34, i32* %35, align 4
- %36 = load i32* @a18, align 4
- %37 = load i32** @b18, align 4
+ %36 = load i32, i32* @a18, align 4
+ %37 = load i32*, i32** @b18, align 4
store i32 %36, i32* %37, align 4
- %38 = load i32* @a19, align 4
- %39 = load i32** @b19, align 4
+ %38 = load i32, i32* @a19, align 4
+ %39 = load i32*, i32** @b19, align 4
store i32 %38, i32* %39, align 4
- %40 = load i32* @a20, align 4
- %41 = load i32** @b20, align 4
+ %40 = load i32, i32* @a20, align 4
+ %41 = load i32*, i32** @b20, align 4
store i32 %40, i32* %41, align 4
- %42 = load i32* @a21, align 4
- %43 = load i32** @b21, align 4
+ %42 = load i32, i32* @a21, align 4
+ %43 = load i32*, i32** @b21, align 4
store i32 %42, i32* %43, align 4
- %44 = load i32* @a22, align 4
- %45 = load i32** @b22, align 4
+ %44 = load i32, i32* @a22, align 4
+ %45 = load i32*, i32** @b22, align 4
store i32 %44, i32* %45, align 4
- %46 = load i32* @a23, align 4
- %47 = load i32** @b23, align 4
+ %46 = load i32, i32* @a23, align 4
+ %47 = load i32*, i32** @b23, align 4
store i32 %46, i32* %47, align 4
- %48 = load i32* @a24, align 4
- %49 = load i32** @b24, align 4
+ %48 = load i32, i32* @a24, align 4
+ %49 = load i32*, i32** @b24, align 4
store i32 %48, i32* %49, align 4
- %50 = load i32* @a25, align 4
- %51 = load i32** @b25, align 4
+ %50 = load i32, i32* @a25, align 4
+ %51 = load i32*, i32** @b25, align 4
store i32 %50, i32* %51, align 4
- %52 = load i32* @a26, align 4
- %53 = load i32** @b26, align 4
+ %52 = load i32, i32* @a26, align 4
+ %53 = load i32*, i32** @b26, align 4
store i32 %52, i32* %53, align 4
- %54 = load i32* @a27, align 4
- %55 = load i32** @b27, align 4
+ %54 = load i32, i32* @a27, align 4
+ %55 = load i32*, i32** @b27, align 4
store i32 %54, i32* %55, align 4
- %56 = load i32* @a28, align 4
- %57 = load i32** @b28, align 4
+ %56 = load i32, i32* @a28, align 4
+ %57 = load i32*, i32** @b28, align 4
store i32 %56, i32* %57, align 4
- %58 = load i32* @a29, align 4
- %59 = load i32** @b29, align 4
+ %58 = load i32, i32* @a29, align 4
+ %59 = load i32*, i32** @b29, align 4
store i32 %58, i32* %59, align 4
- %60 = load i32* @a0, align 4
- %61 = load i32** @c0, align 4
+ %60 = load i32, i32* @a0, align 4
+ %61 = load i32*, i32** @c0, align 4
store i32 %60, i32* %61, align 4
- %62 = load i32* @a1, align 4
- %63 = load i32** @c1, align 4
+ %62 = load i32, i32* @a1, align 4
+ %63 = load i32*, i32** @c1, align 4
store i32 %62, i32* %63, align 4
- %64 = load i32* @a2, align 4
- %65 = load i32** @c2, align 4
+ %64 = load i32, i32* @a2, align 4
+ %65 = load i32*, i32** @c2, align 4
store i32 %64, i32* %65, align 4
- %66 = load i32* @a3, align 4
- %67 = load i32** @c3, align 4
+ %66 = load i32, i32* @a3, align 4
+ %67 = load i32*, i32** @c3, align 4
store i32 %66, i32* %67, align 4
- %68 = load i32* @a4, align 4
- %69 = load i32** @c4, align 4
+ %68 = load i32, i32* @a4, align 4
+ %69 = load i32*, i32** @c4, align 4
store i32 %68, i32* %69, align 4
- %70 = load i32* @a5, align 4
- %71 = load i32** @c5, align 4
+ %70 = load i32, i32* @a5, align 4
+ %71 = load i32*, i32** @c5, align 4
store i32 %70, i32* %71, align 4
- %72 = load i32* @a6, align 4
- %73 = load i32** @c6, align 4
+ %72 = load i32, i32* @a6, align 4
+ %73 = load i32*, i32** @c6, align 4
store i32 %72, i32* %73, align 4
- %74 = load i32* @a7, align 4
- %75 = load i32** @c7, align 4
+ %74 = load i32, i32* @a7, align 4
+ %75 = load i32*, i32** @c7, align 4
store i32 %74, i32* %75, align 4
- %76 = load i32* @a8, align 4
- %77 = load i32** @c8, align 4
+ %76 = load i32, i32* @a8, align 4
+ %77 = load i32*, i32** @c8, align 4
store i32 %76, i32* %77, align 4
- %78 = load i32* @a9, align 4
- %79 = load i32** @c9, align 4
+ %78 = load i32, i32* @a9, align 4
+ %79 = load i32*, i32** @c9, align 4
store i32 %78, i32* %79, align 4
- %80 = load i32* @a10, align 4
- %81 = load i32** @c10, align 4
+ %80 = load i32, i32* @a10, align 4
+ %81 = load i32*, i32** @c10, align 4
store i32 %80, i32* %81, align 4
- %82 = load i32* @a11, align 4
- %83 = load i32** @c11, align 4
+ %82 = load i32, i32* @a11, align 4
+ %83 = load i32*, i32** @c11, align 4
store i32 %82, i32* %83, align 4
- %84 = load i32* @a12, align 4
- %85 = load i32** @c12, align 4
+ %84 = load i32, i32* @a12, align 4
+ %85 = load i32*, i32** @c12, align 4
store i32 %84, i32* %85, align 4
- %86 = load i32* @a13, align 4
- %87 = load i32** @c13, align 4
+ %86 = load i32, i32* @a13, align 4
+ %87 = load i32*, i32** @c13, align 4
store i32 %86, i32* %87, align 4
- %88 = load i32* @a14, align 4
- %89 = load i32** @c14, align 4
+ %88 = load i32, i32* @a14, align 4
+ %89 = load i32*, i32** @c14, align 4
store i32 %88, i32* %89, align 4
- %90 = load i32* @a15, align 4
- %91 = load i32** @c15, align 4
+ %90 = load i32, i32* @a15, align 4
+ %91 = load i32*, i32** @c15, align 4
store i32 %90, i32* %91, align 4
- %92 = load i32* @a16, align 4
- %93 = load i32** @c16, align 4
+ %92 = load i32, i32* @a16, align 4
+ %93 = load i32*, i32** @c16, align 4
store i32 %92, i32* %93, align 4
- %94 = load i32* @a17, align 4
- %95 = load i32** @c17, align 4
+ %94 = load i32, i32* @a17, align 4
+ %95 = load i32*, i32** @c17, align 4
store i32 %94, i32* %95, align 4
- %96 = load i32* @a18, align 4
- %97 = load i32** @c18, align 4
+ %96 = load i32, i32* @a18, align 4
+ %97 = load i32*, i32** @c18, align 4
store i32 %96, i32* %97, align 4
- %98 = load i32* @a19, align 4
- %99 = load i32** @c19, align 4
+ %98 = load i32, i32* @a19, align 4
+ %99 = load i32*, i32** @c19, align 4
store i32 %98, i32* %99, align 4
- %100 = load i32* @a20, align 4
- %101 = load i32** @c20, align 4
+ %100 = load i32, i32* @a20, align 4
+ %101 = load i32*, i32** @c20, align 4
store i32 %100, i32* %101, align 4
- %102 = load i32* @a21, align 4
- %103 = load i32** @c21, align 4
+ %102 = load i32, i32* @a21, align 4
+ %103 = load i32*, i32** @c21, align 4
store i32 %102, i32* %103, align 4
- %104 = load i32* @a22, align 4
- %105 = load i32** @c22, align 4
+ %104 = load i32, i32* @a22, align 4
+ %105 = load i32*, i32** @c22, align 4
store i32 %104, i32* %105, align 4
- %106 = load i32* @a23, align 4
- %107 = load i32** @c23, align 4
+ %106 = load i32, i32* @a23, align 4
+ %107 = load i32*, i32** @c23, align 4
store i32 %106, i32* %107, align 4
- %108 = load i32* @a24, align 4
- %109 = load i32** @c24, align 4
+ %108 = load i32, i32* @a24, align 4
+ %109 = load i32*, i32** @c24, align 4
store i32 %108, i32* %109, align 4
- %110 = load i32* @a25, align 4
- %111 = load i32** @c25, align 4
+ %110 = load i32, i32* @a25, align 4
+ %111 = load i32*, i32** @c25, align 4
store i32 %110, i32* %111, align 4
- %112 = load i32* @a26, align 4
- %113 = load i32** @c26, align 4
+ %112 = load i32, i32* @a26, align 4
+ %113 = load i32*, i32** @c26, align 4
store i32 %112, i32* %113, align 4
- %114 = load i32* @a27, align 4
- %115 = load i32** @c27, align 4
+ %114 = load i32, i32* @a27, align 4
+ %115 = load i32*, i32** @c27, align 4
store i32 %114, i32* %115, align 4
- %116 = load i32* @a28, align 4
- %117 = load i32** @c28, align 4
+ %116 = load i32, i32* @a28, align 4
+ %117 = load i32*, i32** @c28, align 4
store i32 %116, i32* %117, align 4
- %118 = load i32* @a29, align 4
- %119 = load i32** @c29, align 4
+ %118 = load i32, i32* @a29, align 4
+ %119 = load i32*, i32** @c29, align 4
store i32 %118, i32* %119, align 4
- %120 = load i32* @a0, align 4
+ %120 = load i32, i32* @a0, align 4
ret i32 %120
}
diff --git a/test/CodeGen/Mips/rdhwr-directives.ll b/test/CodeGen/Mips/rdhwr-directives.ll
index 27010d4d3699..ebc91ea4459e 100644
--- a/test/CodeGen/Mips/rdhwr-directives.ll
+++ b/test/CodeGen/Mips/rdhwr-directives.ll
@@ -9,7 +9,7 @@ entry:
; CHECK: rdhwr
; CHECK: .set pop
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
ret i32 %0
}
diff --git a/test/CodeGen/Mips/rem.ll b/test/CodeGen/Mips/rem.ll
index b18f85dcbecf..70f957ce15f6 100644
--- a/test/CodeGen/Mips/rem.ll
+++ b/test/CodeGen/Mips/rem.ll
@@ -7,8 +7,8 @@
define void @test() nounwind {
entry:
- %0 = load i32* @iiii, align 4
- %1 = load i32* @jjjj, align 4
+ %0 = load i32, i32* @iiii, align 4
+ %1 = load i32, i32* @jjjj, align 4
%rem = srem i32 %0, %1
; 16: div $zero, ${{[0-9]+}}, ${{[0-9]+}}
; 16: mfhi ${{[0-9]+}}
diff --git a/test/CodeGen/Mips/remat-immed-load.ll b/test/CodeGen/Mips/remat-immed-load.ll
index b53b156e9eec..3d37b43bbc63 100644
--- a/test/CodeGen/Mips/remat-immed-load.ll
+++ b/test/CodeGen/Mips/remat-immed-load.ll
@@ -1,6 +1,6 @@
; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=32
-; RUN: llc -march=mips64el -mcpu=mips4 -mattr=n64 < %s | FileCheck %s -check-prefix=64
-; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck %s -check-prefix=64
+; RUN: llc -march=mips64el -mcpu=mips4 -target-abi=n64 < %s | FileCheck %s -check-prefix=64
+; RUN: llc -march=mips64el -mcpu=mips64 -target-abi=n64 < %s | FileCheck %s -check-prefix=64
define void @f0() nounwind {
entry:
diff --git a/test/CodeGen/Mips/remu.ll b/test/CodeGen/Mips/remu.ll
index 472503c38403..12679727952f 100644
--- a/test/CodeGen/Mips/remu.ll
+++ b/test/CodeGen/Mips/remu.ll
@@ -7,8 +7,8 @@
define void @test() nounwind {
entry:
- %0 = load i32* @iiii, align 4
- %1 = load i32* @jjjj, align 4
+ %0 = load i32, i32* @iiii, align 4
+ %1 = load i32, i32* @jjjj, align 4
%rem = urem i32 %0, %1
; 16: divu $zero, ${{[0-9]+}}, ${{[0-9]+}}
; 16: mfhi ${{[0-9]+}}
diff --git a/test/CodeGen/Mips/return-vector.ll b/test/CodeGen/Mips/return-vector.ll
index 0e0d51587541..3870fe092156 100644
--- a/test/CodeGen/Mips/return-vector.ll
+++ b/test/CodeGen/Mips/return-vector.ll
@@ -12,7 +12,7 @@ declare <4 x double> @d4(...)
define i32 @call_i8() {
entry:
- %call = call <8 x i32> (...)* @i8()
+ %call = call <8 x i32> (...) @i8()
%v0 = extractelement <8 x i32> %call, i32 0
%v1 = extractelement <8 x i32> %call, i32 1
%v2 = extractelement <8 x i32> %call, i32 2
@@ -46,7 +46,7 @@ entry:
define float @call_f4() {
entry:
- %call = call <4 x float> (...)* @f4()
+ %call = call <4 x float> (...) @f4()
%v0 = extractelement <4 x float> %call, i32 0
%v1 = extractelement <4 x float> %call, i32 1
%v2 = extractelement <4 x float> %call, i32 2
@@ -68,7 +68,7 @@ entry:
define double @call_d4() {
entry:
- %call = call <4 x double> (...)* @d4()
+ %call = call <4 x double> (...) @d4()
%v0 = extractelement <4 x double> %call, i32 0
%v1 = extractelement <4 x double> %call, i32 1
%v2 = extractelement <4 x double> %call, i32 2
@@ -99,7 +99,7 @@ declare <2 x double> @d2(...)
define i32 @call_i4() {
entry:
- %call = call <4 x i32> (...)* @i4()
+ %call = call <4 x i32> (...) @i4()
%v0 = extractelement <4 x i32> %call, i32 0
%v1 = extractelement <4 x i32> %call, i32 1
%v2 = extractelement <4 x i32> %call, i32 2
@@ -120,7 +120,7 @@ entry:
define float @call_f2() {
entry:
- %call = call <2 x float> (...)* @f2()
+ %call = call <2 x float> (...) @f2()
%v0 = extractelement <2 x float> %call, i32 0
%v1 = extractelement <2 x float> %call, i32 1
%add1 = fadd float %v0, %v1
@@ -135,7 +135,7 @@ entry:
define double @call_d2() {
entry:
- %call = call <2 x double> (...)* @d2()
+ %call = call <2 x double> (...) @d2()
%v0 = extractelement <2 x double> %call, i32 0
%v1 = extractelement <2 x double> %call, i32 1
%add1 = fadd double %v0, %v1
diff --git a/test/CodeGen/Mips/s2rem.ll b/test/CodeGen/Mips/s2rem.ll
index 9edb5be2771e..65e48fe57c92 100644
--- a/test/CodeGen/Mips/s2rem.ll
+++ b/test/CodeGen/Mips/s2rem.ll
@@ -56,7 +56,7 @@ declare double @d() #1
; Function Attrs: nounwind
define void @fft() #0 {
entry:
- %0 = load float* @x, align 4
+ %0 = load float, float* @x, align 4
%call = call float @ff(float %0)
store float %call, float* @x, align 4
ret void
@@ -71,7 +71,7 @@ declare float @ff(float) #1
; Function Attrs: nounwind
define void @vft() #0 {
entry:
- %0 = load float* @x, align 4
+ %0 = load float, float* @x, align 4
call void @vf(float %0)
ret void
; PIC: .ent vft
diff --git a/test/CodeGen/Mips/sb1.ll b/test/CodeGen/Mips/sb1.ll
index e1a28d459548..d2e8510024e5 100644
--- a/test/CodeGen/Mips/sb1.ll
+++ b/test/CodeGen/Mips/sb1.ll
@@ -6,14 +6,14 @@
define i32 @main() nounwind {
entry:
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
%conv = trunc i32 %0 to i8
store i8 %conv, i8* @c, align 1
- %1 = load i32* @i, align 4
- %2 = load i8* @c, align 1
+ %1 = load i32, i32* @i, align 4
+ %2 = load i8, i8* @c, align 1
%conv1 = sext i8 %2 to i32
; 16: sb ${{[0-9]+}}, 0(${{[0-9]+}})
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %1, i32 %conv1)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 %1, i32 %conv1)
ret i32 0
}
diff --git a/test/CodeGen/Mips/sel1c.ll b/test/CodeGen/Mips/sel1c.ll
index edd2e3e43b79..6753af106e0f 100644
--- a/test/CodeGen/Mips/sel1c.ll
+++ b/test/CodeGen/Mips/sel1c.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=cond-b-short
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=cond-b-short
@i = global i32 1, align 4
@j = global i32 2, align 4
@@ -7,8 +7,8 @@
; Function Attrs: nounwind optsize
define void @t() #0 {
entry:
- %0 = load i32* @i, align 4
- %1 = load i32* @j, align 4
+ %0 = load i32, i32* @i, align 4
+ %1 = load i32, i32* @j, align 4
%cmp = icmp eq i32 %0, %1
%cond = select i1 %cmp, i32 1, i32 3
store i32 %cond, i32* @k, align 4
diff --git a/test/CodeGen/Mips/sel2c.ll b/test/CodeGen/Mips/sel2c.ll
index 4b211245f46e..987cccad5bf4 100644
--- a/test/CodeGen/Mips/sel2c.ll
+++ b/test/CodeGen/Mips/sel2c.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=cond-b-short
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands < %s | FileCheck %s -check-prefix=cond-b-short
@i = global i32 1, align 4
@j = global i32 2, align 4
@@ -7,8 +7,8 @@
; Function Attrs: nounwind optsize
define void @t() #0 {
entry:
- %0 = load i32* @i, align 4
- %1 = load i32* @j, align 4
+ %0 = load i32, i32* @i, align 4
+ %1 = load i32, i32* @j, align 4
%cmp = icmp ne i32 %0, %1
%cond = select i1 %cmp, i32 1, i32 3
store i32 %cond, i32* @k, align 4
diff --git a/test/CodeGen/Mips/selTBteqzCmpi.ll b/test/CodeGen/Mips/selTBteqzCmpi.ll
index 9cb8227f9d2b..5a72ea01073c 100644
--- a/test/CodeGen/Mips/selTBteqzCmpi.ll
+++ b/test/CodeGen/Mips/selTBteqzCmpi.ll
@@ -8,10 +8,10 @@
define void @t() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%cmp = icmp eq i32 %0, 10
- %1 = load i32* @i, align 4
- %2 = load i32* @j, align 4
+ %1 = load i32, i32* @i, align 4
+ %2 = load i32, i32* @j, align 4
%cond = select i1 %cmp, i32 %1, i32 %2
store i32 %cond, i32* @i, align 4
ret void
diff --git a/test/CodeGen/Mips/selTBtnezCmpi.ll b/test/CodeGen/Mips/selTBtnezCmpi.ll
index bd334f59d33b..b6407e67f27a 100644
--- a/test/CodeGen/Mips/selTBtnezCmpi.ll
+++ b/test/CodeGen/Mips/selTBtnezCmpi.ll
@@ -8,10 +8,10 @@
define void @t() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%cmp = icmp ne i32 %0, 10
- %1 = load i32* @i, align 4
- %2 = load i32* @j, align 4
+ %1 = load i32, i32* @i, align 4
+ %2 = load i32, i32* @j, align 4
%cond = select i1 %cmp, i32 %1, i32 %2
store i32 %cond, i32* @i, align 4
ret void
diff --git a/test/CodeGen/Mips/selTBtnezSlti.ll b/test/CodeGen/Mips/selTBtnezSlti.ll
index 593f6f274eb3..2f1cdb866294 100644
--- a/test/CodeGen/Mips/selTBtnezSlti.ll
+++ b/test/CodeGen/Mips/selTBtnezSlti.ll
@@ -8,10 +8,10 @@
define void @t() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%cmp = icmp slt i32 %0, 10
- %1 = load i32* @j, align 4
- %2 = load i32* @i, align 4
+ %1 = load i32, i32* @j, align 4
+ %2 = load i32, i32* @i, align 4
%cond = select i1 %cmp, i32 %1, i32 %2
store i32 %cond, i32* @i, align 4
ret void
diff --git a/test/CodeGen/Mips/select.ll b/test/CodeGen/Mips/select.ll
index d6e1826c30c8..96bd3782c058 100644
--- a/test/CodeGen/Mips/select.ll
+++ b/test/CodeGen/Mips/select.ll
@@ -700,8 +700,8 @@ entry:
; 64R6: selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
; 64R6: or $2, $[[NE]], $[[EQ]]
- %tmp = load double* @d2, align 8
- %tmp1 = load double* @d3, align 8
+ %tmp = load double, double* @d2, align 8
+ %tmp1 = load double, double* @d3, align 8
%cmp = fcmp oeq double %tmp, %tmp1
%cond = select i1 %cmp, i32 %f0, i32 %f1
ret i32 %cond
@@ -777,8 +777,8 @@ entry:
; 64R6: selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
; 64R6: or $2, $[[NE]], $[[EQ]]
- %tmp = load double* @d2, align 8
- %tmp1 = load double* @d3, align 8
+ %tmp = load double, double* @d2, align 8
+ %tmp1 = load double, double* @d3, align 8
%cmp = fcmp olt double %tmp, %tmp1
%cond = select i1 %cmp, i32 %f0, i32 %f1
ret i32 %cond
@@ -854,8 +854,8 @@ entry:
; 64R6: selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
; 64R6: or $2, $[[NE]], $[[EQ]]
- %tmp = load double* @d2, align 8
- %tmp1 = load double* @d3, align 8
+ %tmp = load double, double* @d2, align 8
+ %tmp1 = load double, double* @d3, align 8
%cmp = fcmp ogt double %tmp, %tmp1
%cond = select i1 %cmp, i32 %f0, i32 %f1
ret i32 %cond
diff --git a/test/CodeGen/Mips/seleq.ll b/test/CodeGen/Mips/seleq.ll
index 9af422fa1bdb..bd25358fb9e6 100644
--- a/test/CodeGen/Mips/seleq.ll
+++ b/test/CodeGen/Mips/seleq.ll
@@ -12,65 +12,65 @@
define void @calc_seleq() nounwind {
entry:
- %0 = load i32* @a, align 4
- %1 = load i32* @b, align 4
+ %0 = load i32, i32* @a, align 4
+ %1 = load i32, i32* @b, align 4
%cmp = icmp eq i32 %0, %1
br i1 %cmp, label %cond.true, label %cond.false
cond.true: ; preds = %entry
- %2 = load i32* @f, align 4
+ %2 = load i32, i32* @f, align 4
br label %cond.end
cond.false: ; preds = %entry
- %3 = load i32* @t, align 4
+ %3 = load i32, i32* @t, align 4
br label %cond.end
cond.end: ; preds = %cond.false, %cond.true
%cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ]
store i32 %cond, i32* @z1, align 4
- %4 = load i32* @b, align 4
- %5 = load i32* @a, align 4
+ %4 = load i32, i32* @b, align 4
+ %5 = load i32, i32* @a, align 4
%cmp1 = icmp eq i32 %4, %5
br i1 %cmp1, label %cond.true2, label %cond.false3
cond.true2: ; preds = %cond.end
- %6 = load i32* @f, align 4
+ %6 = load i32, i32* @f, align 4
br label %cond.end4
cond.false3: ; preds = %cond.end
- %7 = load i32* @t, align 4
+ %7 = load i32, i32* @t, align 4
br label %cond.end4
cond.end4: ; preds = %cond.false3, %cond.true2
%cond5 = phi i32 [ %6, %cond.true2 ], [ %7, %cond.false3 ]
store i32 %cond5, i32* @z2, align 4
- %8 = load i32* @c, align 4
- %9 = load i32* @a, align 4
+ %8 = load i32, i32* @c, align 4
+ %9 = load i32, i32* @a, align 4
%cmp6 = icmp eq i32 %8, %9
br i1 %cmp6, label %cond.true7, label %cond.false8
cond.true7: ; preds = %cond.end4
- %10 = load i32* @t, align 4
+ %10 = load i32, i32* @t, align 4
br label %cond.end9
cond.false8: ; preds = %cond.end4
- %11 = load i32* @f, align 4
+ %11 = load i32, i32* @f, align 4
br label %cond.end9
cond.end9: ; preds = %cond.false8, %cond.true7
%cond10 = phi i32 [ %10, %cond.true7 ], [ %11, %cond.false8 ]
store i32 %cond10, i32* @z3, align 4
- %12 = load i32* @a, align 4
- %13 = load i32* @c, align 4
+ %12 = load i32, i32* @a, align 4
+ %13 = load i32, i32* @c, align 4
%cmp11 = icmp eq i32 %12, %13
br i1 %cmp11, label %cond.true12, label %cond.false13
cond.true12: ; preds = %cond.end9
- %14 = load i32* @t, align 4
+ %14 = load i32, i32* @t, align 4
br label %cond.end14
cond.false13: ; preds = %cond.end9
- %15 = load i32* @f, align 4
+ %15 = load i32, i32* @f, align 4
br label %cond.end14
cond.end14: ; preds = %cond.false13, %cond.true12
diff --git a/test/CodeGen/Mips/seleqk.ll b/test/CodeGen/Mips/seleqk.ll
index 3ca622d5d8fe..2eeaa9e33738 100644
--- a/test/CodeGen/Mips/seleqk.ll
+++ b/test/CodeGen/Mips/seleqk.ll
@@ -12,61 +12,61 @@
define void @calc_seleqk() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%cmp = icmp eq i32 %0, 1
br i1 %cmp, label %cond.true, label %cond.false
cond.true: ; preds = %entry
- %1 = load i32* @t, align 4
+ %1 = load i32, i32* @t, align 4
br label %cond.end
cond.false: ; preds = %entry
- %2 = load i32* @f, align 4
+ %2 = load i32, i32* @f, align 4
br label %cond.end
cond.end: ; preds = %cond.false, %cond.true
%cond = phi i32 [ %1, %cond.true ], [ %2, %cond.false ]
store i32 %cond, i32* @z1, align 4
- %3 = load i32* @a, align 4
+ %3 = load i32, i32* @a, align 4
%cmp1 = icmp eq i32 %3, 1000
br i1 %cmp1, label %cond.true2, label %cond.false3
cond.true2: ; preds = %cond.end
- %4 = load i32* @f, align 4
+ %4 = load i32, i32* @f, align 4
br label %cond.end4
cond.false3: ; preds = %cond.end
- %5 = load i32* @t, align 4
+ %5 = load i32, i32* @t, align 4
br label %cond.end4
cond.end4: ; preds = %cond.false3, %cond.true2
%cond5 = phi i32 [ %4, %cond.true2 ], [ %5, %cond.false3 ]
store i32 %cond5, i32* @z2, align 4
- %6 = load i32* @b, align 4
+ %6 = load i32, i32* @b, align 4
%cmp6 = icmp eq i32 %6, 3
br i1 %cmp6, label %cond.true7, label %cond.false8
cond.true7: ; preds = %cond.end4
- %7 = load i32* @f, align 4
+ %7 = load i32, i32* @f, align 4
br label %cond.end9
cond.false8: ; preds = %cond.end4
- %8 = load i32* @t, align 4
+ %8 = load i32, i32* @t, align 4
br label %cond.end9
cond.end9: ; preds = %cond.false8, %cond.true7
%cond10 = phi i32 [ %7, %cond.true7 ], [ %8, %cond.false8 ]
store i32 %cond10, i32* @z3, align 4
- %9 = load i32* @b, align 4
+ %9 = load i32, i32* @b, align 4
%cmp11 = icmp eq i32 %9, 1000
br i1 %cmp11, label %cond.true12, label %cond.false13
cond.true12: ; preds = %cond.end9
- %10 = load i32* @t, align 4
+ %10 = load i32, i32* @t, align 4
br label %cond.end14
cond.false13: ; preds = %cond.end9
- %11 = load i32* @f, align 4
+ %11 = load i32, i32* @f, align 4
br label %cond.end14
cond.end14: ; preds = %cond.false13, %cond.true12
diff --git a/test/CodeGen/Mips/selgek.ll b/test/CodeGen/Mips/selgek.ll
index 8ab4046e92cb..38ad95ee01a9 100644
--- a/test/CodeGen/Mips/selgek.ll
+++ b/test/CodeGen/Mips/selgek.ll
@@ -13,61 +13,61 @@
define void @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%cmp = icmp sge i32 %0, 1000
br i1 %cmp, label %cond.true, label %cond.false
cond.true: ; preds = %entry
- %1 = load i32* @f, align 4
+ %1 = load i32, i32* @f, align 4
br label %cond.end
cond.false: ; preds = %entry
- %2 = load i32* @t, align 4
+ %2 = load i32, i32* @t, align 4
br label %cond.end
cond.end: ; preds = %cond.false, %cond.true
%cond = phi i32 [ %1, %cond.true ], [ %2, %cond.false ]
store i32 %cond, i32* @z1, align 4
- %3 = load i32* @b, align 4
+ %3 = load i32, i32* @b, align 4
%cmp1 = icmp sge i32 %3, 1
br i1 %cmp1, label %cond.true2, label %cond.false3
cond.true2: ; preds = %cond.end
- %4 = load i32* @t, align 4
+ %4 = load i32, i32* @t, align 4
br label %cond.end4
cond.false3: ; preds = %cond.end
- %5 = load i32* @f, align 4
+ %5 = load i32, i32* @f, align 4
br label %cond.end4
cond.end4: ; preds = %cond.false3, %cond.true2
%cond5 = phi i32 [ %4, %cond.true2 ], [ %5, %cond.false3 ]
store i32 %cond5, i32* @z2, align 4
- %6 = load i32* @c, align 4
+ %6 = load i32, i32* @c, align 4
%cmp6 = icmp sge i32 %6, 2
br i1 %cmp6, label %cond.true7, label %cond.false8
cond.true7: ; preds = %cond.end4
- %7 = load i32* @t, align 4
+ %7 = load i32, i32* @t, align 4
br label %cond.end9
cond.false8: ; preds = %cond.end4
- %8 = load i32* @f, align 4
+ %8 = load i32, i32* @f, align 4
br label %cond.end9
cond.end9: ; preds = %cond.false8, %cond.true7
%cond10 = phi i32 [ %7, %cond.true7 ], [ %8, %cond.false8 ]
store i32 %cond10, i32* @z3, align 4
- %9 = load i32* @a, align 4
+ %9 = load i32, i32* @a, align 4
%cmp11 = icmp sge i32 %9, 2
br i1 %cmp11, label %cond.true12, label %cond.false13
cond.true12: ; preds = %cond.end9
- %10 = load i32* @t, align 4
+ %10 = load i32, i32* @t, align 4
br label %cond.end14
cond.false13: ; preds = %cond.end9
- %11 = load i32* @f, align 4
+ %11 = load i32, i32* @f, align 4
br label %cond.end14
cond.end14: ; preds = %cond.false13, %cond.true12
diff --git a/test/CodeGen/Mips/selgt.ll b/test/CodeGen/Mips/selgt.ll
index 67b9b498709b..a2e1e39e742f 100644
--- a/test/CodeGen/Mips/selgt.ll
+++ b/test/CodeGen/Mips/selgt.ll
@@ -14,71 +14,71 @@
define i32 @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
entry:
%retval = alloca i32, align 4
- %0 = load i32* @a, align 4
- %1 = load i32* @b, align 4
+ %0 = load i32, i32* @a, align 4
+ %1 = load i32, i32* @b, align 4
%cmp = icmp sgt i32 %0, %1
br i1 %cmp, label %cond.true, label %cond.false
cond.true: ; preds = %entry
- %2 = load i32* @f, align 4
+ %2 = load i32, i32* @f, align 4
br label %cond.end
cond.false: ; preds = %entry
- %3 = load i32* @t, align 4
+ %3 = load i32, i32* @t, align 4
br label %cond.end
cond.end: ; preds = %cond.false, %cond.true
%cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ]
store i32 %cond, i32* @z1, align 4
- %4 = load i32* @b, align 4
- %5 = load i32* @a, align 4
+ %4 = load i32, i32* @b, align 4
+ %5 = load i32, i32* @a, align 4
%cmp1 = icmp sgt i32 %4, %5
br i1 %cmp1, label %cond.true2, label %cond.false3
cond.true2: ; preds = %cond.end
- %6 = load i32* @t, align 4
+ %6 = load i32, i32* @t, align 4
br label %cond.end4
cond.false3: ; preds = %cond.end
- %7 = load i32* @f, align 4
+ %7 = load i32, i32* @f, align 4
br label %cond.end4
cond.end4: ; preds = %cond.false3, %cond.true2
%cond5 = phi i32 [ %6, %cond.true2 ], [ %7, %cond.false3 ]
store i32 %cond5, i32* @z2, align 4
- %8 = load i32* @c, align 4
- %9 = load i32* @a, align 4
+ %8 = load i32, i32* @c, align 4
+ %9 = load i32, i32* @a, align 4
%cmp6 = icmp sgt i32 %8, %9
br i1 %cmp6, label %cond.true7, label %cond.false8
cond.true7: ; preds = %cond.end4
- %10 = load i32* @f, align 4
+ %10 = load i32, i32* @f, align 4
br label %cond.end9
cond.false8: ; preds = %cond.end4
- %11 = load i32* @t, align 4
+ %11 = load i32, i32* @t, align 4
br label %cond.end9
cond.end9: ; preds = %cond.false8, %cond.true7
%cond10 = phi i32 [ %10, %cond.true7 ], [ %11, %cond.false8 ]
store i32 %cond10, i32* @z3, align 4
- %12 = load i32* @a, align 4
- %13 = load i32* @c, align 4
+ %12 = load i32, i32* @a, align 4
+ %13 = load i32, i32* @c, align 4
%cmp11 = icmp sgt i32 %12, %13
br i1 %cmp11, label %cond.true12, label %cond.false13
cond.true12: ; preds = %cond.end9
- %14 = load i32* @f, align 4
+ %14 = load i32, i32* @f, align 4
br label %cond.end14
cond.false13: ; preds = %cond.end9
- %15 = load i32* @t, align 4
+ %15 = load i32, i32* @t, align 4
br label %cond.end14
cond.end14: ; preds = %cond.false13, %cond.true12
%cond15 = phi i32 [ %14, %cond.true12 ], [ %15, %cond.false13 ]
store i32 %cond15, i32* @z4, align 4
- %16 = load i32* %retval
+ %16 = load i32, i32* %retval
ret i32 %16
}
diff --git a/test/CodeGen/Mips/selle.ll b/test/CodeGen/Mips/selle.ll
index b27df45e6739..1adefb7846e4 100644
--- a/test/CodeGen/Mips/selle.ll
+++ b/test/CodeGen/Mips/selle.ll
@@ -13,65 +13,65 @@
define void @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
entry:
- %0 = load i32* @a, align 4
- %1 = load i32* @b, align 4
+ %0 = load i32, i32* @a, align 4
+ %1 = load i32, i32* @b, align 4
%cmp = icmp sle i32 %0, %1
br i1 %cmp, label %cond.true, label %cond.false
cond.true: ; preds = %entry
- %2 = load i32* @t, align 4
+ %2 = load i32, i32* @t, align 4
br label %cond.end
cond.false: ; preds = %entry
- %3 = load i32* @f, align 4
+ %3 = load i32, i32* @f, align 4
br label %cond.end
cond.end: ; preds = %cond.false, %cond.true
%cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ]
store i32 %cond, i32* @z1, align 4
- %4 = load i32* @b, align 4
- %5 = load i32* @a, align 4
+ %4 = load i32, i32* @b, align 4
+ %5 = load i32, i32* @a, align 4
%cmp1 = icmp sle i32 %4, %5
br i1 %cmp1, label %cond.true2, label %cond.false3
cond.true2: ; preds = %cond.end
- %6 = load i32* @f, align 4
+ %6 = load i32, i32* @f, align 4
br label %cond.end4
cond.false3: ; preds = %cond.end
- %7 = load i32* @t, align 4
+ %7 = load i32, i32* @t, align 4
br label %cond.end4
cond.end4: ; preds = %cond.false3, %cond.true2
%cond5 = phi i32 [ %6, %cond.true2 ], [ %7, %cond.false3 ]
store i32 %cond5, i32* @z2, align 4
- %8 = load i32* @c, align 4
- %9 = load i32* @a, align 4
+ %8 = load i32, i32* @c, align 4
+ %9 = load i32, i32* @a, align 4
%cmp6 = icmp sle i32 %8, %9
br i1 %cmp6, label %cond.true7, label %cond.false8
cond.true7: ; preds = %cond.end4
- %10 = load i32* @t, align 4
+ %10 = load i32, i32* @t, align 4
br label %cond.end9
cond.false8: ; preds = %cond.end4
- %11 = load i32* @f, align 4
+ %11 = load i32, i32* @f, align 4
br label %cond.end9
cond.end9: ; preds = %cond.false8, %cond.true7
%cond10 = phi i32 [ %10, %cond.true7 ], [ %11, %cond.false8 ]
store i32 %cond10, i32* @z3, align 4
- %12 = load i32* @a, align 4
- %13 = load i32* @c, align 4
+ %12 = load i32, i32* @a, align 4
+ %13 = load i32, i32* @c, align 4
%cmp11 = icmp sle i32 %12, %13
br i1 %cmp11, label %cond.true12, label %cond.false13
cond.true12: ; preds = %cond.end9
- %14 = load i32* @t, align 4
+ %14 = load i32, i32* @t, align 4
br label %cond.end14
cond.false13: ; preds = %cond.end9
- %15 = load i32* @f, align 4
+ %15 = load i32, i32* @f, align 4
br label %cond.end14
cond.end14: ; preds = %cond.false13, %cond.true12
diff --git a/test/CodeGen/Mips/selltk.ll b/test/CodeGen/Mips/selltk.ll
index 1471b892c92a..db9f8c171b78 100644
--- a/test/CodeGen/Mips/selltk.ll
+++ b/test/CodeGen/Mips/selltk.ll
@@ -13,61 +13,61 @@
define void @calc_selltk() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%cmp = icmp slt i32 %0, 1000
br i1 %cmp, label %cond.true, label %cond.false
cond.true: ; preds = %entry
- %1 = load i32* @t, align 4
+ %1 = load i32, i32* @t, align 4
br label %cond.end
cond.false: ; preds = %entry
- %2 = load i32* @f, align 4
+ %2 = load i32, i32* @f, align 4
br label %cond.end
cond.end: ; preds = %cond.false, %cond.true
%cond = phi i32 [ %1, %cond.true ], [ %2, %cond.false ]
store i32 %cond, i32* @z1, align 4
- %3 = load i32* @b, align 4
+ %3 = load i32, i32* @b, align 4
%cmp1 = icmp slt i32 %3, 2
br i1 %cmp1, label %cond.true2, label %cond.false3
cond.true2: ; preds = %cond.end
- %4 = load i32* @f, align 4
+ %4 = load i32, i32* @f, align 4
br label %cond.end4
cond.false3: ; preds = %cond.end
- %5 = load i32* @t, align 4
+ %5 = load i32, i32* @t, align 4
br label %cond.end4
cond.end4: ; preds = %cond.false3, %cond.true2
%cond5 = phi i32 [ %4, %cond.true2 ], [ %5, %cond.false3 ]
store i32 %cond5, i32* @z2, align 4
- %6 = load i32* @c, align 4
+ %6 = load i32, i32* @c, align 4
%cmp6 = icmp sgt i32 %6, 2
br i1 %cmp6, label %cond.true7, label %cond.false8
cond.true7: ; preds = %cond.end4
- %7 = load i32* @f, align 4
+ %7 = load i32, i32* @f, align 4
br label %cond.end9
cond.false8: ; preds = %cond.end4
- %8 = load i32* @t, align 4
+ %8 = load i32, i32* @t, align 4
br label %cond.end9
cond.end9: ; preds = %cond.false8, %cond.true7
%cond10 = phi i32 [ %7, %cond.true7 ], [ %8, %cond.false8 ]
store i32 %cond10, i32* @z3, align 4
- %9 = load i32* @a, align 4
+ %9 = load i32, i32* @a, align 4
%cmp11 = icmp sgt i32 %9, 2
br i1 %cmp11, label %cond.true12, label %cond.false13
cond.true12: ; preds = %cond.end9
- %10 = load i32* @f, align 4
+ %10 = load i32, i32* @f, align 4
br label %cond.end14
cond.false13: ; preds = %cond.end9
- %11 = load i32* @t, align 4
+ %11 = load i32, i32* @t, align 4
br label %cond.end14
cond.end14: ; preds = %cond.false13, %cond.true12
diff --git a/test/CodeGen/Mips/selne.ll b/test/CodeGen/Mips/selne.ll
index e3d82b8cf5d0..9be99d669475 100644
--- a/test/CodeGen/Mips/selne.ll
+++ b/test/CodeGen/Mips/selne.ll
@@ -13,65 +13,65 @@
define void @calc_seleq() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
entry:
- %0 = load i32* @a, align 4
- %1 = load i32* @b, align 4
+ %0 = load i32, i32* @a, align 4
+ %1 = load i32, i32* @b, align 4
%cmp = icmp ne i32 %0, %1
br i1 %cmp, label %cond.true, label %cond.false
cond.true: ; preds = %entry
- %2 = load i32* @f, align 4
+ %2 = load i32, i32* @f, align 4
br label %cond.end
cond.false: ; preds = %entry
- %3 = load i32* @t, align 4
+ %3 = load i32, i32* @t, align 4
br label %cond.end
cond.end: ; preds = %cond.false, %cond.true
%cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ]
store i32 %cond, i32* @z1, align 4
- %4 = load i32* @b, align 4
- %5 = load i32* @a, align 4
+ %4 = load i32, i32* @b, align 4
+ %5 = load i32, i32* @a, align 4
%cmp1 = icmp ne i32 %4, %5
br i1 %cmp1, label %cond.true2, label %cond.false3
cond.true2: ; preds = %cond.end
- %6 = load i32* @f, align 4
+ %6 = load i32, i32* @f, align 4
br label %cond.end4
cond.false3: ; preds = %cond.end
- %7 = load i32* @t, align 4
+ %7 = load i32, i32* @t, align 4
br label %cond.end4
cond.end4: ; preds = %cond.false3, %cond.true2
%cond5 = phi i32 [ %6, %cond.true2 ], [ %7, %cond.false3 ]
store i32 %cond5, i32* @z2, align 4
- %8 = load i32* @c, align 4
- %9 = load i32* @a, align 4
+ %8 = load i32, i32* @c, align 4
+ %9 = load i32, i32* @a, align 4
%cmp6 = icmp ne i32 %8, %9
br i1 %cmp6, label %cond.true7, label %cond.false8
cond.true7: ; preds = %cond.end4
- %10 = load i32* @t, align 4
+ %10 = load i32, i32* @t, align 4
br label %cond.end9
cond.false8: ; preds = %cond.end4
- %11 = load i32* @f, align 4
+ %11 = load i32, i32* @f, align 4
br label %cond.end9
cond.end9: ; preds = %cond.false8, %cond.true7
%cond10 = phi i32 [ %10, %cond.true7 ], [ %11, %cond.false8 ]
store i32 %cond10, i32* @z3, align 4
- %12 = load i32* @a, align 4
- %13 = load i32* @c, align 4
+ %12 = load i32, i32* @a, align 4
+ %13 = load i32, i32* @c, align 4
%cmp11 = icmp ne i32 %12, %13
br i1 %cmp11, label %cond.true12, label %cond.false13
cond.true12: ; preds = %cond.end9
- %14 = load i32* @t, align 4
+ %14 = load i32, i32* @t, align 4
br label %cond.end14
cond.false13: ; preds = %cond.end9
- %15 = load i32* @f, align 4
+ %15 = load i32, i32* @f, align 4
br label %cond.end14
cond.end14: ; preds = %cond.false13, %cond.true12
diff --git a/test/CodeGen/Mips/selnek.ll b/test/CodeGen/Mips/selnek.ll
index 64834b256fe5..5b6aa2afa1af 100644
--- a/test/CodeGen/Mips/selnek.ll
+++ b/test/CodeGen/Mips/selnek.ll
@@ -12,61 +12,61 @@
define void @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%cmp = icmp ne i32 %0, 1
br i1 %cmp, label %cond.true, label %cond.false
cond.true: ; preds = %entry
- %1 = load i32* @f, align 4
+ %1 = load i32, i32* @f, align 4
br label %cond.end
cond.false: ; preds = %entry
- %2 = load i32* @t, align 4
+ %2 = load i32, i32* @t, align 4
br label %cond.end
cond.end: ; preds = %cond.false, %cond.true
%cond = phi i32 [ %1, %cond.true ], [ %2, %cond.false ]
store i32 %cond, i32* @z1, align 4
- %3 = load i32* @a, align 4
+ %3 = load i32, i32* @a, align 4
%cmp1 = icmp ne i32 %3, 1000
br i1 %cmp1, label %cond.true2, label %cond.false3
cond.true2: ; preds = %cond.end
- %4 = load i32* @t, align 4
+ %4 = load i32, i32* @t, align 4
br label %cond.end4
cond.false3: ; preds = %cond.end
- %5 = load i32* @f, align 4
+ %5 = load i32, i32* @f, align 4
br label %cond.end4
cond.end4: ; preds = %cond.false3, %cond.true2
%cond5 = phi i32 [ %4, %cond.true2 ], [ %5, %cond.false3 ]
store i32 %cond5, i32* @z2, align 4
- %6 = load i32* @b, align 4
+ %6 = load i32, i32* @b, align 4
%cmp6 = icmp ne i32 %6, 3
br i1 %cmp6, label %cond.true7, label %cond.false8
cond.true7: ; preds = %cond.end4
- %7 = load i32* @t, align 4
+ %7 = load i32, i32* @t, align 4
br label %cond.end9
cond.false8: ; preds = %cond.end4
- %8 = load i32* @f, align 4
+ %8 = load i32, i32* @f, align 4
br label %cond.end9
cond.end9: ; preds = %cond.false8, %cond.true7
%cond10 = phi i32 [ %7, %cond.true7 ], [ %8, %cond.false8 ]
store i32 %cond10, i32* @z3, align 4
- %9 = load i32* @b, align 4
+ %9 = load i32, i32* @b, align 4
%cmp11 = icmp ne i32 %9, 1000
br i1 %cmp11, label %cond.true12, label %cond.false13
cond.true12: ; preds = %cond.end9
- %10 = load i32* @f, align 4
+ %10 = load i32, i32* @f, align 4
br label %cond.end14
cond.false13: ; preds = %cond.end9
- %11 = load i32* @t, align 4
+ %11 = load i32, i32* @t, align 4
br label %cond.end14
cond.end14: ; preds = %cond.false13, %cond.true12
@@ -78,14 +78,14 @@ cond.end14: ; preds = %cond.false13, %cond
define i32 @main() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
entry:
call void @calc_z() "target-cpu"="mips16" "target-features"="+mips16,+o32"
- %0 = load i32* @z1, align 4
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %0) "target-cpu"="mips16" "target-features"="+mips16,+o32"
- %1 = load i32* @z2, align 4
- %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1) "target-cpu"="mips16" "target-features"="+mips16,+o32"
- %2 = load i32* @z3, align 4
- %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %2) "target-cpu"="mips16" "target-features"="+mips16,+o32"
- %3 = load i32* @z4, align 4
- %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %3) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+ %0 = load i32, i32* @z1, align 4
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %0) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+ %1 = load i32, i32* @z2, align 4
+ %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %1) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+ %2 = load i32, i32* @z3, align 4
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %2) "target-cpu"="mips16" "target-features"="+mips16,+o32"
+ %3 = load i32, i32* @z4, align 4
+ %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %3) "target-cpu"="mips16" "target-features"="+mips16,+o32"
ret i32 0
}
diff --git a/test/CodeGen/Mips/selpat.ll b/test/CodeGen/Mips/selpat.ll
index 8eda8de45e08..c682d8182a46 100644
--- a/test/CodeGen/Mips/selpat.ll
+++ b/test/CodeGen/Mips/selpat.ll
@@ -12,18 +12,18 @@
define void @calc_seleq() nounwind {
entry:
- %0 = load i32* @a, align 4
- %1 = load i32* @b, align 4
+ %0 = load i32, i32* @a, align 4
+ %1 = load i32, i32* @b, align 4
%cmp = icmp eq i32 %0, %1
- %2 = load i32* @f, align 4
- %3 = load i32* @t, align 4
+ %2 = load i32, i32* @f, align 4
+ %3 = load i32, i32* @t, align 4
%cond = select i1 %cmp, i32 %2, i32 %3
store i32 %cond, i32* @z1, align 4
; 16: cmp ${{[0-9]+}}, ${{[0-9]+}}
; 16: bteqz $BB{{[0-9]+}}_{{[0-9]}}
; 16: move ${{[0-9]+}}, ${{[0-9]+}}
store i32 %cond, i32* @z2, align 4
- %4 = load i32* @c, align 4
+ %4 = load i32, i32* @c, align 4
%cmp6 = icmp eq i32 %4, %0
%cond10 = select i1 %cmp6, i32 %3, i32 %2
store i32 %cond10, i32* @z3, align 4
@@ -34,10 +34,10 @@ entry:
define void @calc_seleqk() nounwind {
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%cmp = icmp eq i32 %0, 1
- %1 = load i32* @t, align 4
- %2 = load i32* @f, align 4
+ %1 = load i32, i32* @t, align 4
+ %2 = load i32, i32* @f, align 4
%cond = select i1 %cmp, i32 %1, i32 %2
store i32 %cond, i32* @z1, align 4
; 16: cmpi ${{[0-9]+}}, 1
@@ -46,7 +46,7 @@ entry:
%cmp1 = icmp eq i32 %0, 10
%cond5 = select i1 %cmp1, i32 %2, i32 %1
store i32 %cond5, i32* @z2, align 4
- %3 = load i32* @b, align 4
+ %3 = load i32, i32* @b, align 4
%cmp6 = icmp eq i32 %3, 3
%cond10 = select i1 %cmp6, i32 %2, i32 %1
store i32 %cond10, i32* @z3, align 4
@@ -61,19 +61,19 @@ entry:
define void @calc_seleqz() nounwind {
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%cmp = icmp eq i32 %0, 0
- %1 = load i32* @t, align 4
- %2 = load i32* @f, align 4
+ %1 = load i32, i32* @t, align 4
+ %2 = load i32, i32* @f, align 4
%cond = select i1 %cmp, i32 %1, i32 %2
store i32 %cond, i32* @z1, align 4
; 16: beqz ${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]}}
; 16: move ${{[0-9]+}}, ${{[0-9]+}}
- %3 = load i32* @b, align 4
+ %3 = load i32, i32* @b, align 4
%cmp1 = icmp eq i32 %3, 0
%cond5 = select i1 %cmp1, i32 %2, i32 %1
store i32 %cond5, i32* @z2, align 4
- %4 = load i32* @c, align 4
+ %4 = load i32, i32* @c, align 4
%cmp6 = icmp eq i32 %4, 0
%cond10 = select i1 %cmp6, i32 %1, i32 %2
store i32 %cond10, i32* @z3, align 4
@@ -83,11 +83,11 @@ entry:
define void @calc_selge() nounwind {
entry:
- %0 = load i32* @a, align 4
- %1 = load i32* @b, align 4
+ %0 = load i32, i32* @a, align 4
+ %1 = load i32, i32* @b, align 4
%cmp = icmp sge i32 %0, %1
- %2 = load i32* @f, align 4
- %3 = load i32* @t, align 4
+ %2 = load i32, i32* @f, align 4
+ %3 = load i32, i32* @t, align 4
%cond = select i1 %cmp, i32 %2, i32 %3
store i32 %cond, i32* @z1, align 4
; 16: slt ${{[0-9]+}}, ${{[0-9]+}}
@@ -96,7 +96,7 @@ entry:
%cmp1 = icmp sge i32 %1, %0
%cond5 = select i1 %cmp1, i32 %3, i32 %2
store i32 %cond5, i32* @z2, align 4
- %4 = load i32* @c, align 4
+ %4 = load i32, i32* @c, align 4
%cmp6 = icmp sge i32 %4, %0
%cond10 = select i1 %cmp6, i32 %3, i32 %2
store i32 %cond10, i32* @z3, align 4
@@ -108,20 +108,20 @@ entry:
define i32 @calc_selgt() nounwind {
entry:
- %0 = load i32* @a, align 4
- %1 = load i32* @b, align 4
+ %0 = load i32, i32* @a, align 4
+ %1 = load i32, i32* @b, align 4
%cmp = icmp sgt i32 %0, %1
; 16: slt ${{[0-9]+}}, ${{[0-9]+}}
; 16: btnez $BB{{[0-9]+}}_{{[0-9]}}
; 16: move ${{[0-9]+}}, ${{[0-9]+}}
- %2 = load i32* @f, align 4
- %3 = load i32* @t, align 4
+ %2 = load i32, i32* @f, align 4
+ %3 = load i32, i32* @t, align 4
%cond = select i1 %cmp, i32 %2, i32 %3
store i32 %cond, i32* @z1, align 4
%cmp1 = icmp sgt i32 %1, %0
%cond5 = select i1 %cmp1, i32 %3, i32 %2
store i32 %cond5, i32* @z2, align 4
- %4 = load i32* @c, align 4
+ %4 = load i32, i32* @c, align 4
%cmp6 = icmp sgt i32 %4, %0
%cond10 = select i1 %cmp6, i32 %2, i32 %3
store i32 %cond10, i32* @z3, align 4
@@ -133,11 +133,11 @@ entry:
define void @calc_selle() nounwind {
entry:
- %0 = load i32* @a, align 4
- %1 = load i32* @b, align 4
+ %0 = load i32, i32* @a, align 4
+ %1 = load i32, i32* @b, align 4
%cmp = icmp sle i32 %0, %1
- %2 = load i32* @t, align 4
- %3 = load i32* @f, align 4
+ %2 = load i32, i32* @t, align 4
+ %3 = load i32, i32* @f, align 4
%cond = select i1 %cmp, i32 %2, i32 %3
store i32 %cond, i32* @z1, align 4
; 16: slt ${{[0-9]+}}, ${{[0-9]+}}
@@ -146,7 +146,7 @@ entry:
%cmp1 = icmp sle i32 %1, %0
%cond5 = select i1 %cmp1, i32 %3, i32 %2
store i32 %cond5, i32* @z2, align 4
- %4 = load i32* @c, align 4
+ %4 = load i32, i32* @c, align 4
%cmp6 = icmp sle i32 %4, %0
%cond10 = select i1 %cmp6, i32 %2, i32 %3
store i32 %cond10, i32* @z3, align 4
@@ -158,20 +158,20 @@ entry:
define void @calc_selltk() nounwind {
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%cmp = icmp slt i32 %0, 10
- %1 = load i32* @t, align 4
- %2 = load i32* @f, align 4
+ %1 = load i32, i32* @t, align 4
+ %2 = load i32, i32* @f, align 4
%cond = select i1 %cmp, i32 %1, i32 %2
store i32 %cond, i32* @z1, align 4
; 16: slti ${{[0-9]+}}, {{[0-9]+}}
; 16: btnez $BB{{[0-9]+}}_{{[0-9]}}
; 16: move ${{[0-9]+}}, ${{[0-9]+}}
- %3 = load i32* @b, align 4
+ %3 = load i32, i32* @b, align 4
%cmp1 = icmp slt i32 %3, 2
%cond5 = select i1 %cmp1, i32 %2, i32 %1
store i32 %cond5, i32* @z2, align 4
- %4 = load i32* @c, align 4
+ %4 = load i32, i32* @c, align 4
%cmp6 = icmp sgt i32 %4, 2
%cond10 = select i1 %cmp6, i32 %2, i32 %1
store i32 %cond10, i32* @z3, align 4
@@ -184,18 +184,18 @@ entry:
define void @calc_selne() nounwind {
entry:
- %0 = load i32* @a, align 4
- %1 = load i32* @b, align 4
+ %0 = load i32, i32* @a, align 4
+ %1 = load i32, i32* @b, align 4
%cmp = icmp ne i32 %0, %1
- %2 = load i32* @t, align 4
- %3 = load i32* @f, align 4
+ %2 = load i32, i32* @t, align 4
+ %3 = load i32, i32* @f, align 4
%cond = select i1 %cmp, i32 %2, i32 %3
store i32 %cond, i32* @z1, align 4
; 16: cmp ${{[0-9]+}}, ${{[0-9]+}}
; 16: btnez $BB{{[0-9]+}}_{{[0-9]}}
; 16: move ${{[0-9]+}}, ${{[0-9]+}}
store i32 %cond, i32* @z2, align 4
- %4 = load i32* @c, align 4
+ %4 = load i32, i32* @c, align 4
%cmp6 = icmp ne i32 %4, %0
%cond10 = select i1 %cmp6, i32 %3, i32 %2
store i32 %cond10, i32* @z3, align 4
@@ -205,10 +205,10 @@ entry:
define void @calc_selnek() nounwind {
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%cmp = icmp ne i32 %0, 1
- %1 = load i32* @f, align 4
- %2 = load i32* @t, align 4
+ %1 = load i32, i32* @f, align 4
+ %2 = load i32, i32* @t, align 4
%cond = select i1 %cmp, i32 %1, i32 %2
store i32 %cond, i32* @z1, align 4
; 16: cmpi ${{[0-9]+}}, 1
@@ -217,7 +217,7 @@ entry:
%cmp1 = icmp ne i32 %0, 10
%cond5 = select i1 %cmp1, i32 %2, i32 %1
store i32 %cond5, i32* @z2, align 4
- %3 = load i32* @b, align 4
+ %3 = load i32, i32* @b, align 4
%cmp6 = icmp ne i32 %3, 3
%cond10 = select i1 %cmp6, i32 %2, i32 %1
store i32 %cond10, i32* @z3, align 4
@@ -232,19 +232,19 @@ entry:
define void @calc_selnez() nounwind {
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%cmp = icmp ne i32 %0, 0
- %1 = load i32* @f, align 4
- %2 = load i32* @t, align 4
+ %1 = load i32, i32* @f, align 4
+ %2 = load i32, i32* @t, align 4
%cond = select i1 %cmp, i32 %1, i32 %2
store i32 %cond, i32* @z1, align 4
; 16: bnez ${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]}}
; 16: move ${{[0-9]+}}, ${{[0-9]+}}
- %3 = load i32* @b, align 4
+ %3 = load i32, i32* @b, align 4
%cmp1 = icmp ne i32 %3, 0
%cond5 = select i1 %cmp1, i32 %2, i32 %1
store i32 %cond5, i32* @z2, align 4
- %4 = load i32* @c, align 4
+ %4 = load i32, i32* @c, align 4
%cmp6 = icmp ne i32 %4, 0
%cond10 = select i1 %cmp6, i32 %1, i32 %2
store i32 %cond10, i32* @z3, align 4
@@ -254,19 +254,19 @@ entry:
define void @calc_selnez2() nounwind {
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%tobool = icmp ne i32 %0, 0
- %1 = load i32* @f, align 4
- %2 = load i32* @t, align 4
+ %1 = load i32, i32* @f, align 4
+ %2 = load i32, i32* @t, align 4
%cond = select i1 %tobool, i32 %1, i32 %2
store i32 %cond, i32* @z1, align 4
; 16: bnez ${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]}}
; 16: move ${{[0-9]+}}, ${{[0-9]+}}
- %3 = load i32* @b, align 4
+ %3 = load i32, i32* @b, align 4
%tobool1 = icmp ne i32 %3, 0
%cond5 = select i1 %tobool1, i32 %2, i32 %1
store i32 %cond5, i32* @z2, align 4
- %4 = load i32* @c, align 4
+ %4 = load i32, i32* @c, align 4
%tobool6 = icmp ne i32 %4, 0
%cond10 = select i1 %tobool6, i32 %1, i32 %2
store i32 %cond10, i32* @z3, align 4
@@ -276,11 +276,11 @@ entry:
define void @calc_seluge() nounwind {
entry:
- %0 = load i32* @a, align 4
- %1 = load i32* @b, align 4
+ %0 = load i32, i32* @a, align 4
+ %1 = load i32, i32* @b, align 4
%cmp = icmp uge i32 %0, %1
- %2 = load i32* @f, align 4
- %3 = load i32* @t, align 4
+ %2 = load i32, i32* @f, align 4
+ %3 = load i32, i32* @t, align 4
%cond = select i1 %cmp, i32 %2, i32 %3
store i32 %cond, i32* @z1, align 4
; 16: sltu ${{[0-9]+}}, ${{[0-9]+}}
@@ -289,7 +289,7 @@ entry:
%cmp1 = icmp uge i32 %1, %0
%cond5 = select i1 %cmp1, i32 %3, i32 %2
store i32 %cond5, i32* @z2, align 4
- %4 = load i32* @c, align 4
+ %4 = load i32, i32* @c, align 4
%cmp6 = icmp uge i32 %4, %0
%cond10 = select i1 %cmp6, i32 %3, i32 %2
store i32 %cond10, i32* @z3, align 4
@@ -301,11 +301,11 @@ entry:
define void @calc_selugt() nounwind {
entry:
- %0 = load i32* @a, align 4
- %1 = load i32* @b, align 4
+ %0 = load i32, i32* @a, align 4
+ %1 = load i32, i32* @b, align 4
%cmp = icmp ugt i32 %0, %1
- %2 = load i32* @f, align 4
- %3 = load i32* @t, align 4
+ %2 = load i32, i32* @f, align 4
+ %3 = load i32, i32* @t, align 4
%cond = select i1 %cmp, i32 %2, i32 %3
store i32 %cond, i32* @z1, align 4
; 16: sltu ${{[0-9]+}}, ${{[0-9]+}}
@@ -314,7 +314,7 @@ entry:
%cmp1 = icmp ugt i32 %1, %0
%cond5 = select i1 %cmp1, i32 %3, i32 %2
store i32 %cond5, i32* @z2, align 4
- %4 = load i32* @c, align 4
+ %4 = load i32, i32* @c, align 4
%cmp6 = icmp ugt i32 %4, %0
%cond10 = select i1 %cmp6, i32 %2, i32 %3
store i32 %cond10, i32* @z3, align 4
@@ -326,11 +326,11 @@ entry:
define void @calc_selule() nounwind {
entry:
- %0 = load i32* @a, align 4
- %1 = load i32* @b, align 4
+ %0 = load i32, i32* @a, align 4
+ %1 = load i32, i32* @b, align 4
%cmp = icmp ule i32 %0, %1
- %2 = load i32* @t, align 4
- %3 = load i32* @f, align 4
+ %2 = load i32, i32* @t, align 4
+ %3 = load i32, i32* @f, align 4
%cond = select i1 %cmp, i32 %2, i32 %3
store i32 %cond, i32* @z1, align 4
; 16: sltu ${{[0-9]+}}, ${{[0-9]+}}
@@ -339,7 +339,7 @@ entry:
%cmp1 = icmp ule i32 %1, %0
%cond5 = select i1 %cmp1, i32 %3, i32 %2
store i32 %cond5, i32* @z2, align 4
- %4 = load i32* @c, align 4
+ %4 = load i32, i32* @c, align 4
%cmp6 = icmp ule i32 %4, %0
%cond10 = select i1 %cmp6, i32 %2, i32 %3
store i32 %cond10, i32* @z3, align 4
diff --git a/test/CodeGen/Mips/seteq.ll b/test/CodeGen/Mips/seteq.ll
index 5fadf78d57a0..8fad6122bdbe 100644
--- a/test/CodeGen/Mips/seteq.ll
+++ b/test/CodeGen/Mips/seteq.ll
@@ -8,8 +8,8 @@
define void @test() nounwind {
entry:
- %0 = load i32* @i, align 4
- %1 = load i32* @k, align 4
+ %0 = load i32, i32* @i, align 4
+ %1 = load i32, i32* @k, align 4
%cmp = icmp eq i32 %0, %1
%conv = zext i1 %cmp to i32
store i32 %conv, i32* @r1, align 4
diff --git a/test/CodeGen/Mips/seteqz.ll b/test/CodeGen/Mips/seteqz.ll
index 80dc3120a6a1..8e9a4beac75b 100644
--- a/test/CodeGen/Mips/seteqz.ll
+++ b/test/CodeGen/Mips/seteqz.ll
@@ -7,13 +7,13 @@
define void @test() nounwind {
entry:
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
%cmp = icmp eq i32 %0, 0
%conv = zext i1 %cmp to i32
store i32 %conv, i32* @r1, align 4
; 16: sltiu ${{[0-9]+}}, 1
; 16: move ${{[0-9]+}}, $24
- %1 = load i32* @j, align 4
+ %1 = load i32, i32* @j, align 4
%cmp1 = icmp eq i32 %1, 99
%conv2 = zext i1 %cmp1 to i32
store i32 %conv2, i32* @r2, align 4
diff --git a/test/CodeGen/Mips/setge.ll b/test/CodeGen/Mips/setge.ll
index 8869eb8fc547..8fb729964cf5 100644
--- a/test/CodeGen/Mips/setge.ll
+++ b/test/CodeGen/Mips/setge.ll
@@ -11,15 +11,15 @@
define void @test() nounwind {
entry:
- %0 = load i32* @k, align 4
- %1 = load i32* @j, align 4
+ %0 = load i32, i32* @k, align 4
+ %1 = load i32, i32* @j, align 4
%cmp = icmp sge i32 %0, %1
%conv = zext i1 %cmp to i32
store i32 %conv, i32* @r1, align 4
; 16: slt ${{[0-9]+}}, ${{[0-9]+}}
; 16: move $[[REGISTER:[0-9]+]], $24
; 16: xor $[[REGISTER]], ${{[0-9]+}}
- %2 = load i32* @m, align 4
+ %2 = load i32, i32* @m, align 4
%cmp1 = icmp sge i32 %0, %2
%conv2 = zext i1 %cmp1 to i32
store i32 %conv2, i32* @r2, align 4
diff --git a/test/CodeGen/Mips/setgek.ll b/test/CodeGen/Mips/setgek.ll
index 18a0fcf62130..1148d1b67bda 100644
--- a/test/CodeGen/Mips/setgek.ll
+++ b/test/CodeGen/Mips/setgek.ll
@@ -7,7 +7,7 @@
define void @test() nounwind {
entry:
- %0 = load i32* @k, align 4
+ %0 = load i32, i32* @k, align 4
%cmp = icmp sgt i32 %0, -32769
%conv = zext i1 %cmp to i32
store i32 %conv, i32* @r1, align 4
diff --git a/test/CodeGen/Mips/setle.ll b/test/CodeGen/Mips/setle.ll
index 2df6774c1fad..fe4a2c37eb54 100644
--- a/test/CodeGen/Mips/setle.ll
+++ b/test/CodeGen/Mips/setle.ll
@@ -10,15 +10,15 @@
define void @test() nounwind {
entry:
- %0 = load i32* @j, align 4
- %1 = load i32* @k, align 4
+ %0 = load i32, i32* @j, align 4
+ %1 = load i32, i32* @k, align 4
%cmp = icmp sle i32 %0, %1
%conv = zext i1 %cmp to i32
store i32 %conv, i32* @r1, align 4
; 16: slt ${{[0-9]+}}, ${{[0-9]+}}
; 16: move $[[REGISTER:[0-9]+]], $24
; 16: xor $[[REGISTER]], ${{[0-9]+}}
- %2 = load i32* @m, align 4
+ %2 = load i32, i32* @m, align 4
%cmp1 = icmp sle i32 %2, %1
%conv2 = zext i1 %cmp1 to i32
store i32 %conv2, i32* @r2, align 4
diff --git a/test/CodeGen/Mips/setlt.ll b/test/CodeGen/Mips/setlt.ll
index 3dac74bf2e01..c4211e6dd696 100644
--- a/test/CodeGen/Mips/setlt.ll
+++ b/test/CodeGen/Mips/setlt.ll
@@ -10,8 +10,8 @@
define void @test() nounwind {
entry:
- %0 = load i32* @j, align 4
- %1 = load i32* @k, align 4
+ %0 = load i32, i32* @j, align 4
+ %1 = load i32, i32* @k, align 4
%cmp = icmp slt i32 %0, %1
%conv = zext i1 %cmp to i32
store i32 %conv, i32* @r1, align 4
diff --git a/test/CodeGen/Mips/setltk.ll b/test/CodeGen/Mips/setltk.ll
index ecebc7e578e1..8c0041111270 100644
--- a/test/CodeGen/Mips/setltk.ll
+++ b/test/CodeGen/Mips/setltk.ll
@@ -10,7 +10,7 @@
define void @test() nounwind {
entry:
- %0 = load i32* @j, align 4
+ %0 = load i32, i32* @j, align 4
%cmp = icmp slt i32 %0, 10
%conv = zext i1 %cmp to i32
store i32 %conv, i32* @r1, align 4
diff --git a/test/CodeGen/Mips/setne.ll b/test/CodeGen/Mips/setne.ll
index 9e66901e32b5..484674e5da32 100644
--- a/test/CodeGen/Mips/setne.ll
+++ b/test/CodeGen/Mips/setne.ll
@@ -8,8 +8,8 @@
define void @test() nounwind {
entry:
- %0 = load i32* @i, align 4
- %1 = load i32* @k, align 4
+ %0 = load i32, i32* @i, align 4
+ %1 = load i32, i32* @k, align 4
%cmp = icmp ne i32 %0, %1
%conv = zext i1 %cmp to i32
store i32 %conv, i32* @r1, align 4
diff --git a/test/CodeGen/Mips/setuge.ll b/test/CodeGen/Mips/setuge.ll
index 1c9b5bbe8114..025b4dcefd76 100644
--- a/test/CodeGen/Mips/setuge.ll
+++ b/test/CodeGen/Mips/setuge.ll
@@ -10,15 +10,15 @@
define void @test() nounwind {
entry:
- %0 = load i32* @k, align 4
- %1 = load i32* @j, align 4
+ %0 = load i32, i32* @k, align 4
+ %1 = load i32, i32* @j, align 4
%cmp = icmp uge i32 %0, %1
%conv = zext i1 %cmp to i32
store i32 %conv, i32* @r1, align 4
; 16: sltu ${{[0-9]+}}, ${{[0-9]+}}
; 16: move $[[REGISTER:[0-9]+]], $24
; 16: xor $[[REGISTER]], ${{[0-9]+}}
- %2 = load i32* @m, align 4
+ %2 = load i32, i32* @m, align 4
%cmp1 = icmp uge i32 %0, %2
%conv2 = zext i1 %cmp1 to i32
store i32 %conv2, i32* @r2, align 4
diff --git a/test/CodeGen/Mips/setugt.ll b/test/CodeGen/Mips/setugt.ll
index f10b47ae7178..0ce317e0df9e 100644
--- a/test/CodeGen/Mips/setugt.ll
+++ b/test/CodeGen/Mips/setugt.ll
@@ -10,8 +10,8 @@
define void @test() nounwind {
entry:
- %0 = load i32* @k, align 4
- %1 = load i32* @j, align 4
+ %0 = load i32, i32* @k, align 4
+ %1 = load i32, i32* @j, align 4
%cmp = icmp ugt i32 %0, %1
%conv = zext i1 %cmp to i32
store i32 %conv, i32* @r1, align 4
diff --git a/test/CodeGen/Mips/setule.ll b/test/CodeGen/Mips/setule.ll
index a6d6bf064052..4255fd27c5cd 100644
--- a/test/CodeGen/Mips/setule.ll
+++ b/test/CodeGen/Mips/setule.ll
@@ -10,15 +10,15 @@
define void @test() nounwind {
entry:
- %0 = load i32* @j, align 4
- %1 = load i32* @k, align 4
+ %0 = load i32, i32* @j, align 4
+ %1 = load i32, i32* @k, align 4
%cmp = icmp ule i32 %0, %1
%conv = zext i1 %cmp to i32
store i32 %conv, i32* @r1, align 4
; 16: sltu ${{[0-9]+}}, ${{[0-9]+}}
; 16: move $[[REGISTER:[0-9]+]], $24
; 16: xor $[[REGISTER]], ${{[0-9]+}}
- %2 = load i32* @m, align 4
+ %2 = load i32, i32* @m, align 4
%cmp1 = icmp ule i32 %2, %1
%conv2 = zext i1 %cmp1 to i32
store i32 %conv2, i32* @r2, align 4
diff --git a/test/CodeGen/Mips/setult.ll b/test/CodeGen/Mips/setult.ll
index 00ee437a2ffe..d30107e54dd0 100644
--- a/test/CodeGen/Mips/setult.ll
+++ b/test/CodeGen/Mips/setult.ll
@@ -10,8 +10,8 @@
define void @test() nounwind {
entry:
- %0 = load i32* @j, align 4
- %1 = load i32* @k, align 4
+ %0 = load i32, i32* @j, align 4
+ %1 = load i32, i32* @k, align 4
%cmp = icmp ult i32 %0, %1
%conv = zext i1 %cmp to i32
store i32 %conv, i32* @r1, align 4
diff --git a/test/CodeGen/Mips/setultk.ll b/test/CodeGen/Mips/setultk.ll
index eb9edbaad7f8..1b79f103bed7 100644
--- a/test/CodeGen/Mips/setultk.ll
+++ b/test/CodeGen/Mips/setultk.ll
@@ -10,7 +10,7 @@
define void @test() nounwind {
entry:
- %0 = load i32* @j, align 4
+ %0 = load i32, i32* @j, align 4
%cmp = icmp ult i32 %0, 10
%conv = zext i1 %cmp to i32
store i32 %conv, i32* @r1, align 4
diff --git a/test/CodeGen/Mips/sh1.ll b/test/CodeGen/Mips/sh1.ll
index 1746ae284f2a..3f70b9bc6e68 100644
--- a/test/CodeGen/Mips/sh1.ll
+++ b/test/CodeGen/Mips/sh1.ll
@@ -6,14 +6,14 @@
define i32 @main() nounwind {
entry:
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
%conv = trunc i32 %0 to i16
store i16 %conv, i16* @s, align 2
- %1 = load i32* @i, align 4
- %2 = load i16* @s, align 2
+ %1 = load i32, i32* @i, align 4
+ %2 = load i16, i16* @s, align 2
%conv1 = sext i16 %2 to i32
; 16: sh ${{[0-9]+}}, 0(${{[0-9]+}})
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %1, i32 %conv1)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 %1, i32 %conv1)
ret i32 0
}
diff --git a/test/CodeGen/Mips/simplebr.ll b/test/CodeGen/Mips/simplebr.ll
index a1d63671b4ee..2aeacc903fbe 100644
--- a/test/CodeGen/Mips/simplebr.ll
+++ b/test/CodeGen/Mips/simplebr.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mips16-hard-float -soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mips16-hard-float -mattr=+soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
; ModuleID = 'simplebr.c'
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
@@ -9,7 +9,7 @@ target triple = "mips--linux-gnu"
; Function Attrs: nounwind
define void @foo() #0 {
entry:
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
%tobool = icmp ne i32 %0, 0
br i1 %tobool, label %if.then, label %if.else
diff --git a/test/CodeGen/Mips/sitofp-selectcc-opt.ll b/test/CodeGen/Mips/sitofp-selectcc-opt.ll
index 576cbd8e9637..c60fceb1a04c 100644
--- a/test/CodeGen/Mips/sitofp-selectcc-opt.ll
+++ b/test/CodeGen/Mips/sitofp-selectcc-opt.ll
@@ -14,7 +14,7 @@ entry:
%tobool1. = or i1 %tobool1, %not.tobool
%lor.ext = zext i1 %tobool1. to i32
%conv = sitofp i32 %lor.ext to double
- %1 = load double* @foo12.d4, align 8
+ %1 = load double, double* @foo12.d4, align 8
%add = fadd double %conv, %1
store double %add, double* @foo12.d4, align 8
ret double %add
diff --git a/test/CodeGen/Mips/sll1.ll b/test/CodeGen/Mips/sll1.ll
index fdcd38c84b3a..4d35b64e0b58 100644
--- a/test/CodeGen/Mips/sll1.ll
+++ b/test/CodeGen/Mips/sll1.ll
@@ -7,12 +7,12 @@
define i32 @main() nounwind {
entry:
; 16: sll ${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
%shl = shl i32 %0, 4
; 16: sll ${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
store i32 %shl, i32* @j, align 4
- %1 = load i32* @j, align 4
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
+ %1 = load i32, i32* @j, align 4
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %1)
ret i32 0
}
diff --git a/test/CodeGen/Mips/sll2.ll b/test/CodeGen/Mips/sll2.ll
index c2af454cc853..dc2236b10ccf 100644
--- a/test/CodeGen/Mips/sll2.ll
+++ b/test/CodeGen/Mips/sll2.ll
@@ -6,13 +6,13 @@
define i32 @main() nounwind {
entry:
- %0 = load i32* @i, align 4
- %1 = load i32* @j, align 4
+ %0 = load i32, i32* @i, align 4
+ %1 = load i32, i32* @j, align 4
%shl = shl i32 %0, %1
; 16: sllv ${{[0-9]+}}, ${{[0-9]+}}
store i32 %shl, i32* @i, align 4
- %2 = load i32* @j, align 4
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %2)
+ %2 = load i32, i32* @j, align 4
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %2)
ret i32 0
}
diff --git a/test/CodeGen/Mips/small-section-reserve-gp.ll b/test/CodeGen/Mips/small-section-reserve-gp.ll
index cbf0681c78e5..c4e37665aaf6 100644
--- a/test/CodeGen/Mips/small-section-reserve-gp.ll
+++ b/test/CodeGen/Mips/small-section-reserve-gp.ll
@@ -6,7 +6,7 @@
define i32 @geti() nounwind readonly {
entry:
; CHECK: lw ${{[0-9]+}}, %gp_rel(i)($gp)
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
ret i32 %0
}
diff --git a/test/CodeGen/Mips/spill-copy-acreg.ll b/test/CodeGen/Mips/spill-copy-acreg.ll
index 6563a5cffd91..fd160b67cf24 100644
--- a/test/CodeGen/Mips/spill-copy-acreg.ll
+++ b/test/CodeGen/Mips/spill-copy-acreg.ll
@@ -6,7 +6,7 @@
define i64 @test_acreg_copy(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
entry:
- %0 = load i64* @g1, align 8
+ %0 = load i64, i64* @g1, align 8
%1 = tail call i64 @llvm.mips.maddu(i64 %0, i32 %a0, i32 %a1)
%2 = tail call i64 @llvm.mips.maddu(i64 %0, i32 %a2, i32 %a3)
store i64 %1, i64* @g1, align 8
@@ -32,8 +32,8 @@ entry:
%sext = sext <2 x i1> %cmp3 to <2 x i16>
store <2 x i16> %sext, <2 x i16>* @g4, align 4
tail call void @foo1()
- %2 = load <2 x i16>* @g5, align 4
- %3 = load <2 x i16>* @g6, align 4
+ %2 = load <2 x i16>, <2 x i16>* @g5, align 4
+ %3 = load <2 x i16>, <2 x i16>* @g6, align 4
%or = select <2 x i1> %cmp3, <2 x i16> %2, <2 x i16> %3
%4 = bitcast <2 x i16> %or to i32
%.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
diff --git a/test/CodeGen/Mips/sr1.ll b/test/CodeGen/Mips/sr1.ll
index 610693d58b3f..69655f7b842c 100644
--- a/test/CodeGen/Mips/sr1.ll
+++ b/test/CodeGen/Mips/sr1.ll
@@ -8,9 +8,9 @@
define void @foo1() #0 {
entry:
%c = alloca [10 x i8], align 1
- %arraydecay = getelementptr inbounds [10 x i8]* %c, i32 0, i32 0
+ %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %c, i32 0, i32 0
call void @x(i8* %arraydecay)
- %arraydecay1 = getelementptr inbounds [10 x i8]* %c, i32 0, i32 0
+ %arraydecay1 = getelementptr inbounds [10 x i8], [10 x i8]* %c, i32 0, i32 0
call void @x(i8* %arraydecay1)
ret void
; CHECK: .ent foo1
@@ -25,9 +25,9 @@ declare void @x(i8*) #1
define void @foo2() #0 {
entry:
%c = alloca [150 x i8], align 1
- %arraydecay = getelementptr inbounds [150 x i8]* %c, i32 0, i32 0
+ %arraydecay = getelementptr inbounds [150 x i8], [150 x i8]* %c, i32 0, i32 0
call void @x(i8* %arraydecay)
- %arraydecay1 = getelementptr inbounds [150 x i8]* %c, i32 0, i32 0
+ %arraydecay1 = getelementptr inbounds [150 x i8], [150 x i8]* %c, i32 0, i32 0
call void @x(i8* %arraydecay1)
ret void
; CHECK: .ent foo2
diff --git a/test/CodeGen/Mips/sra1.ll b/test/CodeGen/Mips/sra1.ll
index 15bf8d644ea3..1c7d417cb13a 100644
--- a/test/CodeGen/Mips/sra1.ll
+++ b/test/CodeGen/Mips/sra1.ll
@@ -5,10 +5,10 @@
define i32 @main() nounwind {
entry:
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
%shr = ashr i32 %0, 3
; 16: sra ${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %shr)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %shr)
ret i32 0
}
diff --git a/test/CodeGen/Mips/sra2.ll b/test/CodeGen/Mips/sra2.ll
index 26bf19d44020..771d0f4a79e3 100644
--- a/test/CodeGen/Mips/sra2.ll
+++ b/test/CodeGen/Mips/sra2.ll
@@ -6,11 +6,11 @@
define i32 @main() nounwind {
entry:
- %0 = load i32* @i, align 4
- %1 = load i32* @j, align 4
+ %0 = load i32, i32* @i, align 4
+ %1 = load i32, i32* @j, align 4
%shr = ashr i32 %0, %1
; 16: srav ${{[0-9]+}}, ${{[0-9]+}}
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %shr)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %shr)
ret i32 0
}
diff --git a/test/CodeGen/Mips/srl1.ll b/test/CodeGen/Mips/srl1.ll
index 3474283faef9..a748eabb066f 100644
--- a/test/CodeGen/Mips/srl1.ll
+++ b/test/CodeGen/Mips/srl1.ll
@@ -6,12 +6,12 @@
define i32 @main() nounwind {
entry:
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
%shr = lshr i32 %0, 4
; 16: srl ${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
store i32 %shr, i32* @j, align 4
- %1 = load i32* @j, align 4
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
+ %1 = load i32, i32* @j, align 4
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %1)
ret i32 0
}
diff --git a/test/CodeGen/Mips/srl2.ll b/test/CodeGen/Mips/srl2.ll
index 26ec0927a559..6e338b39350f 100644
--- a/test/CodeGen/Mips/srl2.ll
+++ b/test/CodeGen/Mips/srl2.ll
@@ -7,13 +7,13 @@
define i32 @main() nounwind {
entry:
- %0 = load i32* @i, align 4
- %1 = load i32* @k, align 4
+ %0 = load i32, i32* @i, align 4
+ %1 = load i32, i32* @k, align 4
%shr = lshr i32 %0, %1
; 16: srlv ${{[0-9]+}}, ${{[0-9]+}}
store i32 %shr, i32* @j, align 4
- %2 = load i32* @j, align 4
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %2)
+ %2 = load i32, i32* @j, align 4
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i32 %2)
ret i32 0
}
diff --git a/test/CodeGen/Mips/stackcoloring.ll b/test/CodeGen/Mips/stackcoloring.ll
index 4987dad5338b..5516b5a3c023 100644
--- a/test/CodeGen/Mips/stackcoloring.ll
+++ b/test/CodeGen/Mips/stackcoloring.ll
@@ -12,15 +12,15 @@ entry:
%b = alloca [16 x i32], align 4
%0 = bitcast [16 x i32]* %b to i8*
call void @llvm.lifetime.start(i64 64, i8* %0)
- %arraydecay = getelementptr inbounds [16 x i32]* %b, i32 0, i32 0
+ %arraydecay = getelementptr inbounds [16 x i32], [16 x i32]* %b, i32 0, i32 0
br label %for.body
for.body: ; preds = %for.body, %entry
%i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%v.04 = phi i32 [ 0, %entry ], [ %add, %for.body ]
- %1 = load i32** @g1, align 4
- %arrayidx = getelementptr inbounds i32* %1, i32 %i.05
- %2 = load i32* %arrayidx, align 4
+ %1 = load i32*, i32** @g1, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %1, i32 %i.05
+ %2 = load i32, i32* %arrayidx, align 4
%call = call i32 @foo2(i32 %2, i32* %arraydecay)
%add = add nsw i32 %call, %v.04
%inc = add nsw i32 %i.05, 1
diff --git a/test/CodeGen/Mips/start-asm-file.ll b/test/CodeGen/Mips/start-asm-file.ll
index 9dc501ce10b4..60c047a4e8cc 100644
--- a/test/CodeGen/Mips/start-asm-file.ll
+++ b/test/CodeGen/Mips/start-asm-file.ll
@@ -19,36 +19,36 @@
; ### N32 ABI ###
; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
-; RUN: -relocation-model=static -mattr=-n64,+n32 %s -o - | \
+; RUN: -relocation-model=static -target-abi n32 %s -o - | \
; RUN: FileCheck -check-prefix=CHECK-STATIC-N32 -check-prefix=CHECK-STATIC-N32-NLEGACY %s
; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
-; RUN: -relocation-model=pic -mattr=-n64,+n32 %s -o - | \
+; RUN: -relocation-model=pic -target-abi n32 %s -o - | \
; RUN: FileCheck -check-prefix=CHECK-PIC-N32 -check-prefix=CHECK-PIC-N32-NLEGACY %s
; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
-; RUN: -relocation-model=static -mattr=-n64,+n32,+nan2008 %s -o - | \
+; RUN: -relocation-model=static -target-abi n32 -mattr=+nan2008 %s -o - | \
; RUN: FileCheck -check-prefix=CHECK-STATIC-N32 -check-prefix=CHECK-STATIC-N32-N2008 %s
; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
-; RUN: -relocation-model=pic -mattr=-n64,+n32,+nan2008 %s -o - | \
+; RUN: -relocation-model=pic -target-abi n32 -mattr=+nan2008 %s -o - | \
; RUN: FileCheck -check-prefix=CHECK-PIC-N32 -check-prefix=CHECK-PIC-N32-N2008 %s
; ### N64 ABI ###
; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
-; RUN: -relocation-model=static -mattr=+n64 %s -o - | \
+; RUN: -relocation-model=static -target-abi n64 %s -o - | \
; RUN: FileCheck -check-prefix=CHECK-STATIC-N64 -check-prefix=CHECK-STATIC-N64-NLEGACY %s
; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
-; RUN: -relocation-model=pic -mattr=+n64 %s -o - | \
+; RUN: -relocation-model=pic -target-abi n64 %s -o - | \
; RUN: FileCheck -check-prefix=CHECK-PIC-N64 -check-prefix=CHECK-PIC-N64-NLEGACY %s
; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
-; RUN: -relocation-model=static -mattr=+n64,+nan2008 %s -o - | \
+; RUN: -relocation-model=static -target-abi n64 -mattr=+nan2008 %s -o - | \
; RUN: FileCheck -check-prefix=CHECK-STATIC-N64 -check-prefix=CHECK-STATIC-N64-N2008 %s
; RUN: llc -filetype=asm -mtriple mips64-unknown-linux -mcpu=mips64 \
-; RUN: -relocation-model=pic -mattr=+n64,+nan2008 %s -o - | \
+; RUN: -relocation-model=pic -target-abi n64 -mattr=+nan2008 %s -o - | \
; RUN: FileCheck -check-prefix=CHECK-PIC-N64 -check-prefix=CHECK-PIC-N64-N2008 %s
; CHECK-STATIC-O32: .abicalls
diff --git a/test/CodeGen/Mips/stchar.ll b/test/CodeGen/Mips/stchar.ll
index 12eae3487ff1..6bc4889931a7 100644
--- a/test/CodeGen/Mips/stchar.ll
+++ b/test/CodeGen/Mips/stchar.ll
@@ -9,7 +9,7 @@ define void @p1(i16 signext %s, i8 signext %c) nounwind {
entry:
%conv = sext i16 %s to i32
%conv1 = sext i8 %c to i32
- %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %conv, i32 %conv1) nounwind
+ %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 %conv, i32 %conv1) nounwind
ret void
}
@@ -17,16 +17,16 @@ declare i32 @printf(i8* nocapture, ...) nounwind
define void @p2() nounwind {
entry:
- %0 = load i16** @sp, align 4
- %1 = load i16* %0, align 2
- %2 = load i8** @cp, align 4
- %3 = load i8* %2, align 1
+ %0 = load i16*, i16** @sp, align 4
+ %1 = load i16, i16* %0, align 2
+ %2 = load i8*, i8** @cp, align 4
+ %3 = load i8, i8* %2, align 1
%conv.i = sext i16 %1 to i32
%conv1.i = sext i8 %3 to i32
- %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %conv.i, i32 %conv1.i) nounwind
- %4 = load i16** @sp, align 4
+ %call.i = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 %conv.i, i32 %conv1.i) nounwind
+ %4 = load i16*, i16** @sp, align 4
store i16 32, i16* %4, align 2
- %5 = load i8** @cp, align 4
+ %5 = load i8*, i8** @cp, align 4
store i8 97, i8* %5, align 1
ret void
}
@@ -39,16 +39,16 @@ entry:
store i8 99, i8* %c, align 4
store i16* %s, i16** @sp, align 4
store i8* %c, i8** @cp, align 4
- %call.i.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 16, i32 99) nounwind
- %0 = load i16** @sp, align 4
+ %call.i.i = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 16, i32 99) nounwind
+ %0 = load i16*, i16** @sp, align 4
store i16 32, i16* %0, align 2
- %1 = load i8** @cp, align 4
+ %1 = load i8*, i8** @cp, align 4
store i8 97, i8* %1, align 1
- %2 = load i16* %s, align 4
- %3 = load i8* %c, align 4
+ %2 = load i16, i16* %s, align 4
+ %3 = load i8, i8* %c, align 4
%conv.i = sext i16 %2 to i32
%conv1.i = sext i8 %3 to i32
- %call.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %conv.i, i32 %conv1.i) nounwind
+ %call.i = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 %conv.i, i32 %conv1.i) nounwind
ret void
; 16_b-LABEL: test:
; 16_h-LABEL: test:
@@ -69,16 +69,16 @@ entry:
store i8 99, i8* %c.i, align 4
store i16* %s.i, i16** @sp, align 4
store i8* %c.i, i8** @cp, align 4
- %call.i.i.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 16, i32 99) nounwind
- %1 = load i16** @sp, align 4
+ %call.i.i.i = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 16, i32 99) nounwind
+ %1 = load i16*, i16** @sp, align 4
store i16 32, i16* %1, align 2
- %2 = load i8** @cp, align 4
+ %2 = load i8*, i8** @cp, align 4
store i8 97, i8* %2, align 1
- %3 = load i16* %s.i, align 4
- %4 = load i8* %c.i, align 4
+ %3 = load i16, i16* %s.i, align 4
+ %4 = load i8, i8* %c.i, align 4
%conv.i.i = sext i16 %3 to i32
%conv1.i.i = sext i8 %4 to i32
- %call.i.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %conv.i.i, i32 %conv1.i.i) nounwind
+ %call.i.i = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i32 %conv.i.i, i32 %conv1.i.i) nounwind
call void @llvm.lifetime.end(i64 -1, i8* %0) nounwind
call void @llvm.lifetime.end(i64 -1, i8* %c.i) nounwind
ret i32 0
diff --git a/test/CodeGen/Mips/stldst.ll b/test/CodeGen/Mips/stldst.ll
index 4182b9e76d63..4eef5ece0589 100644
--- a/test/CodeGen/Mips/stldst.ll
+++ b/test/CodeGen/Mips/stldst.ll
@@ -12,25 +12,25 @@
define i32 @main() nounwind {
entry:
- %0 = load i32* @kkkk, align 4
- %1 = load i32* @llll, align 4
+ %0 = load i32, i32* @kkkk, align 4
+ %1 = load i32, i32* @llll, align 4
%add = add nsw i32 %0, 10
%add1 = add nsw i32 %1, 10
- %2 = load i32* @mmmm, align 4
+ %2 = load i32, i32* @mmmm, align 4
%sub = add nsw i32 %2, -3
- %3 = load i32* @nnnn, align 4
+ %3 = load i32, i32* @nnnn, align 4
%add2 = add nsw i32 %3, 10
- %4 = load i32* @oooo, align 4
+ %4 = load i32, i32* @oooo, align 4
%add3 = add nsw i32 %4, 4
- %5 = load i32* @pppp, align 4
+ %5 = load i32, i32* @pppp, align 4
%sub4 = add nsw i32 %5, -5
- %6 = load i32* @qqqq, align 4
+ %6 = load i32, i32* @qqqq, align 4
%sub5 = add nsw i32 %6, -10
- %7 = load i32* @rrrr, align 4
+ %7 = load i32, i32* @rrrr, align 4
%add6 = add nsw i32 %7, 6
- %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([32 x i8]* @.str, i32 0, i32 0), i32 %sub5, i32 %add6, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) nounwind
- %call7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([32 x i8]* @.str, i32 0, i32 0), i32 %0, i32 %1, i32 %add, i32 %add1, i32 %sub, i32 %add2, i32 %add3, i32 %sub4, i32 %sub5, i32 %add6) nounwind
+ %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @.str, i32 0, i32 0), i32 %sub5, i32 %add6, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) nounwind
+ %call7 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @.str, i32 0, i32 0), i32 %0, i32 %1, i32 %add, i32 %add1, i32 %sub, i32 %add2, i32 %add3, i32 %sub4, i32 %sub5, i32 %add6) nounwind
ret i32 0
}
; 16: sw ${{[0-9]+}}, {{[0-9]+}} ( $sp ); # 4-byte Folded Spill
diff --git a/test/CodeGen/Mips/sub1.ll b/test/CodeGen/Mips/sub1.ll
index 195750b805d6..636ab8f2c5f3 100644
--- a/test/CodeGen/Mips/sub1.ll
+++ b/test/CodeGen/Mips/sub1.ll
@@ -5,10 +5,10 @@
define i32 @main() nounwind {
entry:
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
%sub = sub nsw i32 %0, 5
; 16: addiu ${{[0-9]+}}, -{{[0-9]+}}
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %sub)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %sub)
ret i32 0
}
diff --git a/test/CodeGen/Mips/sub2.ll b/test/CodeGen/Mips/sub2.ll
index 4f6bfccec401..a97f5e947ca9 100644
--- a/test/CodeGen/Mips/sub2.ll
+++ b/test/CodeGen/Mips/sub2.ll
@@ -6,11 +6,11 @@
define i32 @main() nounwind {
entry:
- %0 = load i32* @j, align 4
- %1 = load i32* @i, align 4
+ %0 = load i32, i32* @j, align 4
+ %1 = load i32, i32* @i, align 4
%sub = sub nsw i32 %0, %1
; 16: subu ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %sub)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %sub)
ret i32 0
}
diff --git a/test/CodeGen/Mips/swzero.ll b/test/CodeGen/Mips/swzero.ll
index 9f91a3902d7d..9aaee1509806 100644
--- a/test/CodeGen/Mips/swzero.ll
+++ b/test/CodeGen/Mips/swzero.ll
@@ -6,7 +6,7 @@ define void @zero_u(%struct.unaligned* nocapture %p) nounwind {
entry:
; CHECK: swl $zero
; CHECK: swr $zero
- %x = getelementptr inbounds %struct.unaligned* %p, i32 0, i32 0
+ %x = getelementptr inbounds %struct.unaligned, %struct.unaligned* %p, i32 0, i32 0
store i32 0, i32* %x, align 1
ret void
}
diff --git a/test/CodeGen/Mips/tail16.ll b/test/CodeGen/Mips/tail16.ll
index 4e62e557478a..13f27fcc513b 100644
--- a/test/CodeGen/Mips/tail16.ll
+++ b/test/CodeGen/Mips/tail16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic < %s | FileCheck %s
; Function Attrs: nounwind optsize
define float @h() {
diff --git a/test/CodeGen/Mips/tailcall.ll b/test/CodeGen/Mips/tailcall.ll
index 30f47abc06cb..6a0d64b7eed8 100644
--- a/test/CodeGen/Mips/tailcall.ll
+++ b/test/CodeGen/Mips/tailcall.ll
@@ -85,16 +85,16 @@ entry:
; PIC16: jalrc
; PIC16: .end caller5
- %0 = load i32* @g0, align 4
- %1 = load i32* @g1, align 4
- %2 = load i32* @g2, align 4
- %3 = load i32* @g3, align 4
- %4 = load i32* @g4, align 4
- %5 = load i32* @g5, align 4
- %6 = load i32* @g6, align 4
- %7 = load i32* @g7, align 4
- %8 = load i32* @g8, align 4
- %9 = load i32* @g9, align 4
+ %0 = load i32, i32* @g0, align 4
+ %1 = load i32, i32* @g1, align 4
+ %2 = load i32, i32* @g2, align 4
+ %3 = load i32, i32* @g3, align 4
+ %4 = load i32, i32* @g4, align 4
+ %5 = load i32, i32* @g5, align 4
+ %6 = load i32, i32* @g6, align 4
+ %7 = load i32, i32* @g7, align 4
+ %8 = load i32, i32* @g8, align 4
+ %9 = load i32, i32* @g9, align 4
%call = tail call fastcc i32 @callee5(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9)
ret i32 %call
}
@@ -136,7 +136,7 @@ entry:
; PIC16: jalrc
; PIC16: .end caller8_1
- %call = tail call i32 (i32, ...)* @callee8(i32 2, i32 1) nounwind
+ %call = tail call i32 (i32, ...) @callee8(i32 2, i32 1) nounwind
ret i32 %call
}
@@ -239,7 +239,7 @@ entry:
; PIC16: .ent caller13
; PIC16: jalrc
- %call = tail call i32 (i32, ...)* @callee13(i32 1, i32 2) nounwind
+ %call = tail call i32 (i32, ...) @callee13(i32 1, i32 2) nounwind
ret i32 %call
}
diff --git a/test/CodeGen/Mips/tls.ll b/test/CodeGen/Mips/tls.ll
index b14ad5ba452b..97e270fc59a6 100644
--- a/test/CodeGen/Mips/tls.ll
+++ b/test/CodeGen/Mips/tls.ll
@@ -10,7 +10,7 @@
define i32 @f1() nounwind {
entry:
- %tmp = load i32* @t1, align 4
+ %tmp = load i32, i32* @t1, align 4
ret i32 %tmp
; PIC-LABEL: f1:
@@ -33,7 +33,7 @@ entry:
define i32 @f2() nounwind {
entry:
- %tmp = load i32* @t2, align 4
+ %tmp = load i32, i32* @t2, align 4
ret i32 %tmp
; PIC-LABEL: f2:
@@ -69,7 +69,7 @@ entry:
; PIC: addu $[[R1:[0-9]+]], $[[R0]], $2
; PIC: lw ${{[0-9]+}}, %dtprel_lo(f3.i)($[[R1]])
- %0 = load i32* @f3.i, align 4
+ %0 = load i32, i32* @f3.i, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* @f3.i, align 4
ret i32 %inc
diff --git a/test/CodeGen/Mips/tls16.ll b/test/CodeGen/Mips/tls16.ll
index 861864bcfe0f..3d324d7ed1e8 100644
--- a/test/CodeGen/Mips/tls16.ll
+++ b/test/CodeGen/Mips/tls16.ll
@@ -4,7 +4,7 @@
define i32 @foo() nounwind readonly {
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
; PIC16: lw ${{[0-9]+}}, %call16(__tls_get_addr)(${{[0-9]+}})
; PIC16: addiu ${{[0-9]+}}, %tlsgd(a)
ret i32 %0
diff --git a/test/CodeGen/Mips/tls16_2.ll b/test/CodeGen/Mips/tls16_2.ll
index b33e3c3766b6..0a6a4123e116 100644
--- a/test/CodeGen/Mips/tls16_2.ll
+++ b/test/CodeGen/Mips/tls16_2.ll
@@ -4,7 +4,7 @@
define i8* @f(i8* nocapture %a) nounwind {
entry:
- %0 = load i32* @f.i, align 4
+ %0 = load i32, i32* @f.i, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* @f.i, align 4
%1 = inttoptr i32 %inc to i8*
diff --git a/test/CodeGen/Mips/uitofp.ll b/test/CodeGen/Mips/uitofp.ll
index aff70c24f07c..83c2069f9661 100644
--- a/test/CodeGen/Mips/uitofp.ll
+++ b/test/CodeGen/Mips/uitofp.ll
@@ -5,7 +5,7 @@ entry:
%b = alloca i32, align 4
%a = alloca float, align 4
store volatile i32 1, i32* %b, align 4
- %0 = load volatile i32* %b, align 4
+ %0 = load volatile i32, i32* %b, align 4
%conv = uitofp i32 %0 to float
store float %conv, float* %a, align 4
ret void
diff --git a/test/CodeGen/Mips/ul1.ll b/test/CodeGen/Mips/ul1.ll
index 7e64ff4d90fd..ad0992954631 100644
--- a/test/CodeGen/Mips/ul1.ll
+++ b/test/CodeGen/Mips/ul1.ll
@@ -5,7 +5,7 @@
define i32 @main() nounwind {
entry:
- store i32 10, i32* getelementptr inbounds (%struct.ua* @foo, i32 0, i32 1), align 1
+ store i32 10, i32* getelementptr inbounds (%struct.ua, %struct.ua* @foo, i32 0, i32 1), align 1
; 16: sb ${{[0-9]+}}, {{[0-9]+}}(${{[0-9]+}})
; 16: sb ${{[0-9]+}}, {{[0-9]+}}(${{[0-9]+}})
; 16: sb ${{[0-9]+}}, {{[0-9]+}}(${{[0-9]+}})
diff --git a/test/CodeGen/Mips/unalignedload.ll b/test/CodeGen/Mips/unalignedload.ll
index 2002b1c60abe..9e453a6e794b 100644
--- a/test/CodeGen/Mips/unalignedload.ll
+++ b/test/CodeGen/Mips/unalignedload.ll
@@ -30,7 +30,7 @@ entry:
; MIPS32R6-DAG: lhu $[[PART1:[0-9]+]], 2($[[R0]])
- tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind
+ tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2, %struct.S2* @s2, i32 0, i32 1)) nounwind
ret void
}
diff --git a/test/CodeGen/Mips/vector-load-store.ll b/test/CodeGen/Mips/vector-load-store.ll
index d88996309908..61cbc5a6dee1 100644
--- a/test/CodeGen/Mips/vector-load-store.ll
+++ b/test/CodeGen/Mips/vector-load-store.ll
@@ -10,7 +10,7 @@ entry:
; CHECK: lw
; CHECK: sw
- %0 = load <2 x i16>* @g1, align 4
+ %0 = load <2 x i16>, <2 x i16>* @g1, align 4
store <2 x i16> %0, <2 x i16>* @g0, align 4
ret void
}
@@ -20,7 +20,7 @@ entry:
; CHECK: lw
; CHECK: sw
- %0 = load <4 x i8>* @g3, align 4
+ %0 = load <4 x i8>, <4 x i8>* @g3, align 4
store <4 x i8> %0, <4 x i8>* @g2, align 4
ret void
}
diff --git a/test/CodeGen/Mips/vector-setcc.ll b/test/CodeGen/Mips/vector-setcc.ll
index aeff4918c8bb..64b84e40513e 100644
--- a/test/CodeGen/Mips/vector-setcc.ll
+++ b/test/CodeGen/Mips/vector-setcc.ll
@@ -6,8 +6,8 @@
define void @foo0() nounwind {
entry:
- %0 = load <4 x i32>* @a, align 16
- %1 = load <4 x i32>* @b, align 16
+ %0 = load <4 x i32>, <4 x i32>* @a, align 16
+ %1 = load <4 x i32>, <4 x i32>* @b, align 16
%cmp = icmp slt <4 x i32> %0, %1
%sext = sext <4 x i1> %cmp to <4 x i32>
store <4 x i32> %sext, <4 x i32>* @g0, align 16
diff --git a/test/CodeGen/Mips/xor1.ll b/test/CodeGen/Mips/xor1.ll
index f2c13169cf7a..dd51f143bb6c 100644
--- a/test/CodeGen/Mips/xor1.ll
+++ b/test/CodeGen/Mips/xor1.ll
@@ -6,11 +6,11 @@
define i32 @main() nounwind {
entry:
- %0 = load i32* @x, align 4
- %1 = load i32* @y, align 4
+ %0 = load i32, i32* @x, align 4
+ %1 = load i32, i32* @y, align 4
%xor = xor i32 %0, %1
; 16: xor ${{[0-9]+}}, ${{[0-9]+}}
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 %xor)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 %xor)
ret i32 0
}
diff --git a/test/CodeGen/Mips/zeroreg.ll b/test/CodeGen/Mips/zeroreg.ll
index c766d3b3cc2a..6baf9d4fbff2 100644
--- a/test/CodeGen/Mips/zeroreg.ll
+++ b/test/CodeGen/Mips/zeroreg.ll
@@ -25,7 +25,7 @@ entry:
; 64R6: seleqz $2, $[[R0]], $4
%tobool = icmp ne i32 %s, 0
- %0 = load i32* @g1, align 4
+ %0 = load i32, i32* @g1, align 4
%cond = select i1 %tobool, i32 0, i32 %0
ret i32 %cond
}
@@ -47,7 +47,7 @@ entry:
; 64R6: selnez $2, $[[R0]], $4
%tobool = icmp ne i32 %s, 0
- %0 = load i32* @g1, align 4
+ %0 = load i32, i32* @g1, align 4
%cond = select i1 %tobool, i32 %0, i32 0
ret i32 %cond
}
@@ -76,7 +76,7 @@ entry:
; 64R6: seleqz $2, $[[R0]], $4
%tobool = icmp ne i64 %s, 0
- %0 = load i64* @g2, align 4
+ %0 = load i64, i64* @g2, align 4
%cond = select i1 %tobool, i64 0, i64 %0
ret i64 %cond
}
@@ -103,7 +103,7 @@ entry:
; 64R6: selnez $2, $[[R0]], $4
%tobool = icmp ne i64 %s, 0
- %0 = load i64* @g2, align 4
+ %0 = load i64, i64* @g2, align 4
%cond = select i1 %tobool, i64 %0, i64 0
ret i64 %cond
}
diff --git a/test/CodeGen/NVPTX/access-non-generic.ll b/test/CodeGen/NVPTX/access-non-generic.ll
index c225abf0fd85..e709302918f5 100644
--- a/test/CodeGen/NVPTX/access-non-generic.ll
+++ b/test/CodeGen/NVPTX/access-non-generic.ll
@@ -18,7 +18,7 @@ define float @ld_st_shared_f32(i32 %i, float %v) {
; IR-NOT: addrspacecast
; PTX-LABEL: ld_st_shared_f32(
; load cast
- %1 = load float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
+ %1 = load float, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
; PTX: ld.shared.f32 %f{{[0-9]+}}, [scalar];
; store cast
store float %v, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
@@ -29,7 +29,7 @@ define float @ld_st_shared_f32(i32 %i, float %v) {
; cast; load
%2 = addrspacecast float addrspace(3)* @scalar to float*
- %3 = load float* %2, align 4
+ %3 = load float, float* %2, align 4
; PTX: ld.shared.f32 %f{{[0-9]+}}, [scalar];
; cast; store
store float %v, float* %2, align 4
@@ -38,17 +38,17 @@ define float @ld_st_shared_f32(i32 %i, float %v) {
; PTX: bar.sync 0;
; load gep cast
- %4 = load float* getelementptr inbounds ([10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
+ %4 = load float, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
; PTX: ld.shared.f32 %f{{[0-9]+}}, [array+20];
; store gep cast
- store float %v, float* getelementptr inbounds ([10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
+ store float %v, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
; PTX: st.shared.f32 [array+20], %f{{[0-9]+}};
call void @llvm.cuda.syncthreads()
; PTX: bar.sync 0;
; gep cast; load
- %5 = getelementptr inbounds [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5
- %6 = load float* %5, align 4
+ %5 = getelementptr inbounds [10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5
+ %6 = load float, float* %5, align 4
; PTX: ld.shared.f32 %f{{[0-9]+}}, [array+20];
; gep cast; store
store float %v, float* %5, align 4
@@ -58,8 +58,8 @@ define float @ld_st_shared_f32(i32 %i, float %v) {
; cast; gep; load
%7 = addrspacecast [10 x float] addrspace(3)* @array to [10 x float]*
- %8 = getelementptr inbounds [10 x float]* %7, i32 0, i32 %i
- %9 = load float* %8, align 4
+ %8 = getelementptr inbounds [10 x float], [10 x float]* %7, i32 0, i32 %i
+ %9 = load float, float* %8, align 4
; PTX: ld.shared.f32 %f{{[0-9]+}}, [%{{(r|rl|rd)[0-9]+}}];
; cast; gep; store
store float %v, float* %8, align 4
@@ -78,10 +78,10 @@ define float @ld_st_shared_f32(i32 %i, float %v) {
; addrspacecast with a bitcast.
define i32 @ld_int_from_float() {
; IR-LABEL: @ld_int_from_float
-; IR: load i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*)
+; IR: load i32, i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*)
; PTX-LABEL: ld_int_from_float(
; PTX: ld.shared.u{{(32|64)}}
- %1 = load i32* addrspacecast(float addrspace(3)* @scalar to i32*), align 4
+ %1 = load i32, i32* addrspacecast(float addrspace(3)* @scalar to i32*), align 4
ret i32 %1
}
diff --git a/test/CodeGen/NVPTX/addrspacecast-gvar.ll b/test/CodeGen/NVPTX/addrspacecast-gvar.ll
index 6afbdb8a429f..1e2fde4b858a 100644
--- a/test/CodeGen/NVPTX/addrspacecast-gvar.ll
+++ b/test/CodeGen/NVPTX/addrspacecast-gvar.ll
@@ -3,7 +3,11 @@
; CHECK: .visible .global .align 4 .u32 g = 42;
; CHECK: .visible .global .align 4 .u32 g2 = generic(g);
; CHECK: .visible .global .align 4 .u32 g3 = g;
+; CHECK: .visible .global .align 8 .u32 g4[2] = {0, generic(g)};
+; CHECK: .visible .global .align 8 .u32 g5[2] = {0, generic(g)+8};
@g = addrspace(1) global i32 42
@g2 = addrspace(1) global i32* addrspacecast (i32 addrspace(1)* @g to i32*)
@g3 = addrspace(1) global i32 addrspace(1)* @g
+@g4 = constant {i32*, i32*} {i32* null, i32* addrspacecast (i32 addrspace(1)* @g to i32*)}
+@g5 = constant {i32*, i32*} {i32* null, i32* addrspacecast (i32 addrspace(1)* getelementptr (i32, i32 addrspace(1)* @g, i32 2) to i32*)}
diff --git a/test/CodeGen/NVPTX/addrspacecast.ll b/test/CodeGen/NVPTX/addrspacecast.ll
index 03b9a9844752..42e67ca8ce9b 100644
--- a/test/CodeGen/NVPTX/addrspacecast.ll
+++ b/test/CodeGen/NVPTX/addrspacecast.ll
@@ -10,7 +10,7 @@ define i32 @conv1(i32 addrspace(1)* %ptr) {
; PTX64: cvta.global.u64
; PTX64: ld.u32
%genptr = addrspacecast i32 addrspace(1)* %ptr to i32*
- %val = load i32* %genptr
+ %val = load i32, i32* %genptr
ret i32 %val
}
@@ -22,7 +22,7 @@ define i32 @conv2(i32 addrspace(3)* %ptr) {
; PTX64: cvta.shared.u64
; PTX64: ld.u32
%genptr = addrspacecast i32 addrspace(3)* %ptr to i32*
- %val = load i32* %genptr
+ %val = load i32, i32* %genptr
ret i32 %val
}
@@ -34,7 +34,7 @@ define i32 @conv3(i32 addrspace(4)* %ptr) {
; PTX64: cvta.const.u64
; PTX64: ld.u32
%genptr = addrspacecast i32 addrspace(4)* %ptr to i32*
- %val = load i32* %genptr
+ %val = load i32, i32* %genptr
ret i32 %val
}
@@ -46,7 +46,7 @@ define i32 @conv4(i32 addrspace(5)* %ptr) {
; PTX64: cvta.local.u64
; PTX64: ld.u32
%genptr = addrspacecast i32 addrspace(5)* %ptr to i32*
- %val = load i32* %genptr
+ %val = load i32, i32* %genptr
ret i32 %val
}
@@ -58,7 +58,7 @@ define i32 @conv5(i32* %ptr) {
; PTX64: cvta.to.global.u64
; PTX64: ld.global.u32
%specptr = addrspacecast i32* %ptr to i32 addrspace(1)*
- %val = load i32 addrspace(1)* %specptr
+ %val = load i32, i32 addrspace(1)* %specptr
ret i32 %val
}
@@ -70,7 +70,7 @@ define i32 @conv6(i32* %ptr) {
; PTX64: cvta.to.shared.u64
; PTX64: ld.shared.u32
%specptr = addrspacecast i32* %ptr to i32 addrspace(3)*
- %val = load i32 addrspace(3)* %specptr
+ %val = load i32, i32 addrspace(3)* %specptr
ret i32 %val
}
@@ -82,7 +82,7 @@ define i32 @conv7(i32* %ptr) {
; PTX64: cvta.to.const.u64
; PTX64: ld.const.u32
%specptr = addrspacecast i32* %ptr to i32 addrspace(4)*
- %val = load i32 addrspace(4)* %specptr
+ %val = load i32, i32 addrspace(4)* %specptr
ret i32 %val
}
@@ -94,6 +94,6 @@ define i32 @conv8(i32* %ptr) {
; PTX64: cvta.to.local.u64
; PTX64: ld.local.u32
%specptr = addrspacecast i32* %ptr to i32 addrspace(5)*
- %val = load i32 addrspace(5)* %specptr
+ %val = load i32, i32 addrspace(5)* %specptr
ret i32 %val
}
diff --git a/test/CodeGen/NVPTX/bug21465.ll b/test/CodeGen/NVPTX/bug21465.ll
index cacffceac517..76af386c6516 100644
--- a/test/CodeGen/NVPTX/bug21465.ll
+++ b/test/CodeGen/NVPTX/bug21465.ll
@@ -11,8 +11,8 @@ entry:
; CHECK-LABEL @_Z22TakesStruct1SPi
; CHECK: bitcast %struct.S* %input to i8*
; CHECK: call i8 addrspace(101)* @llvm.nvvm.ptr.gen.to.param.p101i8.p0i8
- %b = getelementptr inbounds %struct.S* %input, i64 0, i32 1
- %0 = load i32* %b, align 4
+ %b = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
+ %0 = load i32, i32* %b, align 4
store i32 %0, i32* %output, align 4
ret void
}
diff --git a/test/CodeGen/NVPTX/bug22246.ll b/test/CodeGen/NVPTX/bug22246.ll
new file mode 100644
index 000000000000..70e7e12336e7
--- /dev/null
+++ b/test/CodeGen/NVPTX/bug22246.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; CHECK-LABEL: _Z3foobbbPb
+define void @_Z3foobbbPb(i1 zeroext %p1, i1 zeroext %p2, i1 zeroext %p3, i8* nocapture %output) {
+entry:
+; CHECK: selp.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %p{{[0-9]+}}
+ %.sink.v = select i1 %p1, i1 %p2, i1 %p3
+ %frombool5 = zext i1 %.sink.v to i8
+ store i8 %frombool5, i8* %output, align 1
+ ret void
+}
diff --git a/test/CodeGen/NVPTX/bug22322.ll b/test/CodeGen/NVPTX/bug22322.ll
new file mode 100644
index 000000000000..97863b9ea546
--- /dev/null
+++ b/test/CodeGen/NVPTX/bug22322.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+%class.float3 = type { float, float, float }
+
+; Function Attrs: nounwind
+; CHECK-LABEL: some_kernel
+define void @some_kernel(%class.float3* nocapture %dst) #0 {
+_ZL11compute_vecRK6float3jb.exit:
+ %ret_vec.sroa.8.i = alloca float, align 4
+ %0 = tail call i32 @llvm.ptx.read.ctaid.x()
+ %1 = tail call i32 @llvm.ptx.read.ntid.x()
+ %2 = mul nsw i32 %1, %0
+ %3 = tail call i32 @llvm.ptx.read.tid.x()
+ %4 = add nsw i32 %2, %3
+ %5 = zext i32 %4 to i64
+ %6 = bitcast float* %ret_vec.sroa.8.i to i8*
+ call void @llvm.lifetime.start(i64 4, i8* %6)
+ %7 = and i32 %4, 15
+ %8 = icmp eq i32 %7, 0
+ %9 = select i1 %8, float 0.000000e+00, float -1.000000e+00
+ store float %9, float* %ret_vec.sroa.8.i, align 4
+; CHECK: setp.lt.f32 %p{{[0-9]+}}, %f{{[0-9]+}}, 0f00000000
+ %10 = fcmp olt float %9, 0.000000e+00
+ %ret_vec.sroa.8.i.val = load float, float* %ret_vec.sroa.8.i, align 4
+ %11 = select i1 %10, float 0.000000e+00, float %ret_vec.sroa.8.i.val
+ call void @llvm.lifetime.end(i64 4, i8* %6)
+ %12 = getelementptr inbounds %class.float3, %class.float3* %dst, i64 %5, i32 0
+ store float 0.000000e+00, float* %12, align 4
+ %13 = getelementptr inbounds %class.float3, %class.float3* %dst, i64 %5, i32 1
+ store float %11, float* %13, align 4
+ %14 = getelementptr inbounds %class.float3, %class.float3* %dst, i64 %5, i32 2
+ store float 0.000000e+00, float* %14, align 4
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ptx.read.ctaid.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ptx.read.ntid.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ptx.read.tid.x() #1
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #2
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!nvvm.annotations = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{void (%class.float3*)* @some_kernel, !"kernel", i32 1}
+!1 = !{!"clang version 3.5.1 (tags/RELEASE_351/final)"}
diff --git a/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
index 8483112381f1..58b191129917 100644
--- a/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
+++ b/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
@@ -27,22 +27,22 @@ entry:
; CHECK: ld.f32 %f[[A0_REG:[0-9]+]], [%rd[[A_REG]]]
; CHECK: st.f32 [%SP+0], %f[[A0_REG]]
- %0 = load float* %a, align 4
+ %0 = load float, float* %a, align 4
%1 = bitcast [16 x i8]* %buf to float*
store float %0, float* %1, align 4
- %arrayidx2 = getelementptr inbounds float* %a, i64 1
- %2 = load float* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [16 x i8]* %buf, i64 0, i64 1
+ %arrayidx2 = getelementptr inbounds float, float* %a, i64 1
+ %2 = load float, float* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 1
%3 = bitcast i8* %arrayidx3 to float*
store float %2, float* %3, align 4
- %arrayidx4 = getelementptr inbounds float* %a, i64 2
- %4 = load float* %arrayidx4, align 4
- %arrayidx5 = getelementptr inbounds [16 x i8]* %buf, i64 0, i64 2
+ %arrayidx4 = getelementptr inbounds float, float* %a, i64 2
+ %4 = load float, float* %arrayidx4, align 4
+ %arrayidx5 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 2
%5 = bitcast i8* %arrayidx5 to float*
store float %4, float* %5, align 4
- %arrayidx6 = getelementptr inbounds float* %a, i64 3
- %6 = load float* %arrayidx6, align 4
- %arrayidx7 = getelementptr inbounds [16 x i8]* %buf, i64 0, i64 3
+ %arrayidx6 = getelementptr inbounds float, float* %a, i64 3
+ %6 = load float, float* %arrayidx6, align 4
+ %arrayidx7 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 3
%7 = bitcast i8* %arrayidx7 to float*
store float %6, float* %7, align 4
@@ -54,7 +54,7 @@ entry:
; CHECK-NEXT: call.uni
; CHECK-NEXT: callee,
- %arraydecay = getelementptr inbounds [16 x i8]* %buf, i64 0, i64 0
+ %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 0
call void @callee(float* %a, i8* %arraydecay) #2
ret void
}
diff --git a/test/CodeGen/NVPTX/fp16.ll b/test/CodeGen/NVPTX/fp16.ll
index 8770399f2ec9..b85eed0f6c7f 100644
--- a/test/CodeGen/NVPTX/fp16.ll
+++ b/test/CodeGen/NVPTX/fp16.ll
@@ -8,7 +8,7 @@ declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone
; CHECK-LABEL: @test_convert_fp16_to_fp32
; CHECK: cvt.f32.f16
define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
- %val = load i16 addrspace(1)* %in, align 2
+ %val = load i16, i16 addrspace(1)* %in, align 2
%cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
store float %cvt, float addrspace(1)* %out, align 4
ret void
@@ -18,7 +18,7 @@ define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 add
; CHECK-LABEL: @test_convert_fp16_to_fp64
; CHECK: cvt.f64.f16
define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
- %val = load i16 addrspace(1)* %in, align 2
+ %val = load i16, i16 addrspace(1)* %in, align 2
%cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone
store double %cvt, double addrspace(1)* %out, align 4
ret void
@@ -28,7 +28,7 @@ define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 ad
; CHECK-LABEL: @test_convert_fp32_to_fp16
; CHECK: cvt.rn.f16.f32
define void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
- %val = load float addrspace(1)* %in, align 2
+ %val = load float, float addrspace(1)* %in, align 2
%cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone
store i16 %cvt, i16 addrspace(1)* %out, align 4
ret void
@@ -38,7 +38,7 @@ define void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float add
; CHECK-LABEL: @test_convert_fp64_to_fp16
; CHECK: cvt.rn.f16.f64
define void @test_convert_fp64_to_fp16(i16 addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
- %val = load double addrspace(1)* %in, align 2
+ %val = load double, double addrspace(1)* %in, align 2
%cvt = call i16 @llvm.convert.to.fp16.f64(double %val) nounwind readnone
store i16 %cvt, i16 addrspace(1)* %out, align 4
ret void
diff --git a/test/CodeGen/NVPTX/function-align.ll b/test/CodeGen/NVPTX/function-align.ll
new file mode 100644
index 000000000000..e7abfb128f58
--- /dev/null
+++ b/test/CodeGen/NVPTX/function-align.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+; CHECK-NOT: .align 2
+define ptx_device void @foo() align 2 {
+; CHECK-LABEL: .func foo
+ ret void
+}
diff --git a/test/CodeGen/NVPTX/generic-to-nvvm.ll b/test/CodeGen/NVPTX/generic-to-nvvm.ll
index fb63d6ed575f..66917d5cb182 100644
--- a/test/CodeGen/NVPTX/generic-to-nvvm.ll
+++ b/test/CodeGen/NVPTX/generic-to-nvvm.ll
@@ -13,9 +13,9 @@ target triple = "nvptx-nvidia-cuda"
define void @foo(i32* %a, i32* %b) {
; CHECK: cvta.global.u32
- %ld1 = load i32* @myglobal
+ %ld1 = load i32, i32* @myglobal
; CHECK: cvta.global.u32
- %ld2 = load i32* @myconst
+ %ld2 = load i32, i32* @myconst
store i32 %ld1, i32* %a
store i32 %ld2, i32* %b
ret void
diff --git a/test/CodeGen/NVPTX/half.ll b/test/CodeGen/NVPTX/half.ll
index aa08cc78e91a..b99524162e65 100644
--- a/test/CodeGen/NVPTX/half.ll
+++ b/test/CodeGen/NVPTX/half.ll
@@ -4,7 +4,7 @@ define void @test_load_store(half addrspace(1)* %in, half addrspace(1)* %out) {
; CHECK-LABEL: @test_load_store
; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}]
; CHECK: st.global.u16 [{{%r[0-9]+}}], [[TMP]]
- %val = load half addrspace(1)* %in
+ %val = load half, half addrspace(1)* %in
store half %val, half addrspace(1) * %out
ret void
}
@@ -13,7 +13,7 @@ define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %o
; CHECK-LABEL: @test_bitcast_from_half
; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}]
; CHECK: st.global.u16 [{{%r[0-9]+}}], [[TMP]]
- %val = load half addrspace(1) * %in
+ %val = load half, half addrspace(1) * %in
%val_int = bitcast half %val to i16
store i16 %val_int, i16 addrspace(1)* %out
ret void
@@ -23,7 +23,7 @@ define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in
; CHECK-LABEL: @test_bitcast_to_half
; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}]
; CHECK: st.global.u16 [{{%r[0-9]+}}], [[TMP]]
- %val = load i16 addrspace(1)* %in
+ %val = load i16, i16 addrspace(1)* %in
%val_fp = bitcast i16 %val to half
store half %val_fp, half addrspace(1)* %out
ret void
@@ -33,7 +33,7 @@ define void @test_extend32(half addrspace(1)* %in, float addrspace(1)* %out) {
; CHECK-LABEL: @test_extend32
; CHECK: cvt.f32.f16
- %val16 = load half addrspace(1)* %in
+ %val16 = load half, half addrspace(1)* %in
%val32 = fpext half %val16 to float
store float %val32, float addrspace(1)* %out
ret void
@@ -43,7 +43,7 @@ define void @test_extend64(half addrspace(1)* %in, double addrspace(1)* %out) {
; CHECK-LABEL: @test_extend64
; CHECK: cvt.f64.f16
- %val16 = load half addrspace(1)* %in
+ %val16 = load half, half addrspace(1)* %in
%val64 = fpext half %val16 to double
store double %val64, double addrspace(1)* %out
ret void
@@ -53,7 +53,7 @@ define void @test_trunc32(float addrspace(1)* %in, half addrspace(1)* %out) {
; CHECK-LABEL: test_trunc32
; CHECK: cvt.rn.f16.f32
- %val32 = load float addrspace(1)* %in
+ %val32 = load float, float addrspace(1)* %in
%val16 = fptrunc float %val32 to half
store half %val16, half addrspace(1)* %out
ret void
@@ -63,7 +63,7 @@ define void @test_trunc64(double addrspace(1)* %in, half addrspace(1)* %out) {
; CHECK-LABEL: @test_trunc64
; CHECK: cvt.rn.f16.f64
- %val32 = load double addrspace(1)* %in
+ %val32 = load double, double addrspace(1)* %in
%val16 = fptrunc double %val32 to half
store half %val16, half addrspace(1)* %out
ret void
diff --git a/test/CodeGen/NVPTX/i1-global.ll b/test/CodeGen/NVPTX/i1-global.ll
index e3fe08e5f874..35d77b4b44d2 100644
--- a/test/CodeGen/NVPTX/i1-global.ll
+++ b/test/CodeGen/NVPTX/i1-global.ll
@@ -8,7 +8,7 @@ target triple = "nvptx-nvidia-cuda"
define void @foo(i1 %p, i32* %out) {
- %ld = load i1 addrspace(1)* @mypred
+ %ld = load i1, i1 addrspace(1)* @mypred
%val = zext i1 %ld to i32
store i32 %val, i32* %out
ret void
diff --git a/test/CodeGen/NVPTX/i8-param.ll b/test/CodeGen/NVPTX/i8-param.ll
index 84daa9f66316..6a1e3a0e1a0d 100644
--- a/test/CodeGen/NVPTX/i8-param.ll
+++ b/test/CodeGen/NVPTX/i8-param.ll
@@ -13,7 +13,7 @@ define i8 @callee(i8 %a) {
; CHECK: .visible .func caller
define void @caller(i8* %a) {
; CHECK: ld.u8
- %val = load i8* %a
+ %val = load i8, i8* %a
%ret = tail call i8 @callee(i8 %val)
; CHECK: ld.param.b32
store i8 %ret, i8* %a
diff --git a/test/CodeGen/NVPTX/ld-addrspace.ll b/test/CodeGen/NVPTX/ld-addrspace.ll
index f33659c92e84..0018e6177be8 100644
--- a/test/CodeGen/NVPTX/ld-addrspace.ll
+++ b/test/CodeGen/NVPTX/ld-addrspace.ll
@@ -8,7 +8,7 @@ define i8 @ld_global_i8(i8 addrspace(1)* %ptr) {
; PTX32: ret
; PTX64: ld.global.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i8 addrspace(1)* %ptr
+ %a = load i8, i8 addrspace(1)* %ptr
ret i8 %a
}
@@ -17,7 +17,7 @@ define i8 @ld_shared_i8(i8 addrspace(3)* %ptr) {
; PTX32: ret
; PTX64: ld.shared.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i8 addrspace(3)* %ptr
+ %a = load i8, i8 addrspace(3)* %ptr
ret i8 %a
}
@@ -26,7 +26,7 @@ define i8 @ld_local_i8(i8 addrspace(5)* %ptr) {
; PTX32: ret
; PTX64: ld.local.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i8 addrspace(5)* %ptr
+ %a = load i8, i8 addrspace(5)* %ptr
ret i8 %a
}
@@ -36,7 +36,7 @@ define i16 @ld_global_i16(i16 addrspace(1)* %ptr) {
; PTX32: ret
; PTX64: ld.global.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i16 addrspace(1)* %ptr
+ %a = load i16, i16 addrspace(1)* %ptr
ret i16 %a
}
@@ -45,7 +45,7 @@ define i16 @ld_shared_i16(i16 addrspace(3)* %ptr) {
; PTX32: ret
; PTX64: ld.shared.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i16 addrspace(3)* %ptr
+ %a = load i16, i16 addrspace(3)* %ptr
ret i16 %a
}
@@ -54,7 +54,7 @@ define i16 @ld_local_i16(i16 addrspace(5)* %ptr) {
; PTX32: ret
; PTX64: ld.local.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i16 addrspace(5)* %ptr
+ %a = load i16, i16 addrspace(5)* %ptr
ret i16 %a
}
@@ -64,7 +64,7 @@ define i32 @ld_global_i32(i32 addrspace(1)* %ptr) {
; PTX32: ret
; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i32 addrspace(1)* %ptr
+ %a = load i32, i32 addrspace(1)* %ptr
ret i32 %a
}
@@ -73,7 +73,7 @@ define i32 @ld_shared_i32(i32 addrspace(3)* %ptr) {
; PTX32: ret
; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i32 addrspace(3)* %ptr
+ %a = load i32, i32 addrspace(3)* %ptr
ret i32 %a
}
@@ -82,7 +82,7 @@ define i32 @ld_local_i32(i32 addrspace(5)* %ptr) {
; PTX32: ret
; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i32 addrspace(5)* %ptr
+ %a = load i32, i32 addrspace(5)* %ptr
ret i32 %a
}
@@ -92,7 +92,7 @@ define i64 @ld_global_i64(i64 addrspace(1)* %ptr) {
; PTX32: ret
; PTX64: ld.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i64 addrspace(1)* %ptr
+ %a = load i64, i64 addrspace(1)* %ptr
ret i64 %a
}
@@ -101,7 +101,7 @@ define i64 @ld_shared_i64(i64 addrspace(3)* %ptr) {
; PTX32: ret
; PTX64: ld.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i64 addrspace(3)* %ptr
+ %a = load i64, i64 addrspace(3)* %ptr
ret i64 %a
}
@@ -110,7 +110,7 @@ define i64 @ld_local_i64(i64 addrspace(5)* %ptr) {
; PTX32: ret
; PTX64: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i64 addrspace(5)* %ptr
+ %a = load i64, i64 addrspace(5)* %ptr
ret i64 %a
}
@@ -120,7 +120,7 @@ define float @ld_global_f32(float addrspace(1)* %ptr) {
; PTX32: ret
; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load float addrspace(1)* %ptr
+ %a = load float, float addrspace(1)* %ptr
ret float %a
}
@@ -129,7 +129,7 @@ define float @ld_shared_f32(float addrspace(3)* %ptr) {
; PTX32: ret
; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load float addrspace(3)* %ptr
+ %a = load float, float addrspace(3)* %ptr
ret float %a
}
@@ -138,7 +138,7 @@ define float @ld_local_f32(float addrspace(5)* %ptr) {
; PTX32: ret
; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load float addrspace(5)* %ptr
+ %a = load float, float addrspace(5)* %ptr
ret float %a
}
@@ -148,7 +148,7 @@ define double @ld_global_f64(double addrspace(1)* %ptr) {
; PTX32: ret
; PTX64: ld.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load double addrspace(1)* %ptr
+ %a = load double, double addrspace(1)* %ptr
ret double %a
}
@@ -157,7 +157,7 @@ define double @ld_shared_f64(double addrspace(3)* %ptr) {
; PTX32: ret
; PTX64: ld.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load double addrspace(3)* %ptr
+ %a = load double, double addrspace(3)* %ptr
ret double %a
}
@@ -166,6 +166,6 @@ define double @ld_local_f64(double addrspace(5)* %ptr) {
; PTX32: ret
; PTX64: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load double addrspace(5)* %ptr
+ %a = load double, double addrspace(5)* %ptr
ret double %a
}
diff --git a/test/CodeGen/NVPTX/ld-generic.ll b/test/CodeGen/NVPTX/ld-generic.ll
index d629e0ecc647..44cfe6551b99 100644
--- a/test/CodeGen/NVPTX/ld-generic.ll
+++ b/test/CodeGen/NVPTX/ld-generic.ll
@@ -8,7 +8,7 @@ define i8 @ld_global_i8(i8 addrspace(0)* %ptr) {
; PTX32: ret
; PTX64: ld.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i8 addrspace(0)* %ptr
+ %a = load i8, i8 addrspace(0)* %ptr
ret i8 %a
}
@@ -18,7 +18,7 @@ define i16 @ld_global_i16(i16 addrspace(0)* %ptr) {
; PTX32: ret
; PTX64: ld.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i16 addrspace(0)* %ptr
+ %a = load i16, i16 addrspace(0)* %ptr
ret i16 %a
}
@@ -28,7 +28,7 @@ define i32 @ld_global_i32(i32 addrspace(0)* %ptr) {
; PTX32: ret
; PTX64: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i32 addrspace(0)* %ptr
+ %a = load i32, i32 addrspace(0)* %ptr
ret i32 %a
}
@@ -38,7 +38,7 @@ define i64 @ld_global_i64(i64 addrspace(0)* %ptr) {
; PTX32: ret
; PTX64: ld.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load i64 addrspace(0)* %ptr
+ %a = load i64, i64 addrspace(0)* %ptr
ret i64 %a
}
@@ -48,7 +48,7 @@ define float @ld_global_f32(float addrspace(0)* %ptr) {
; PTX32: ret
; PTX64: ld.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load float addrspace(0)* %ptr
+ %a = load float, float addrspace(0)* %ptr
ret float %a
}
@@ -58,6 +58,6 @@ define double @ld_global_f64(double addrspace(0)* %ptr) {
; PTX32: ret
; PTX64: ld.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
; PTX64: ret
- %a = load double addrspace(0)* %ptr
+ %a = load double, double addrspace(0)* %ptr
ret double %a
}
diff --git a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
index fd35a7503901..ec96a493021a 100644
--- a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
+++ b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
@@ -6,9 +6,9 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
define void @reg_plus_offset(i32* %a) {
; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+32];
; CHECK: ldu.global.u32 %r{{[0-9]+}}, [%r{{[0-9]+}}+36];
- %p2 = getelementptr i32* %a, i32 8
+ %p2 = getelementptr i32, i32* %a, i32 8
%t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2, i32 4)
- %p3 = getelementptr i32* %a, i32 9
+ %p3 = getelementptr i32, i32* %a, i32 9
%t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3, i32 4)
%t3 = mul i32 %t1, %t2
store i32 %t3, i32* %a
diff --git a/test/CodeGen/NVPTX/load-sext-i1.ll b/test/CodeGen/NVPTX/load-sext-i1.ll
index d836740eed94..9fc98a45f59a 100644
--- a/test/CodeGen/NVPTX/load-sext-i1.ll
+++ b/test/CodeGen/NVPTX/load-sext-i1.ll
@@ -6,8 +6,8 @@ target triple = "nvptx-nvidia-cuda"
define void @main(i1* %a1, i32 %a2, i32* %arg3) {
; CHECK: ld.u8
; CHECK-NOT: ld.u1
- %t1 = getelementptr i1* %a1, i32 %a2
- %t2 = load i1* %t1
+ %t1 = getelementptr i1, i1* %a1, i32 %a2
+ %t2 = load i1, i1* %t1
%t3 = sext i1 %t2 to i32
store i32 %t3, i32* %arg3
ret void
diff --git a/test/CodeGen/NVPTX/machine-sink.ll b/test/CodeGen/NVPTX/machine-sink.ll
index 3614bea16534..65ba141c41d9 100644
--- a/test/CodeGen/NVPTX/machine-sink.ll
+++ b/test/CodeGen/NVPTX/machine-sink.ll
@@ -14,8 +14,8 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
define float @post_dominate(float %x, i1 %cond) {
; CHECK-LABEL: post_dominate(
entry:
- %0 = load float* addrspacecast (float addrspace(3)* @scalar1 to float*), align 4
- %1 = load float* addrspacecast (float addrspace(3)* @scalar2 to float*), align 4
+ %0 = load float, float* addrspacecast (float addrspace(3)* @scalar1 to float*), align 4
+ %1 = load float, float* addrspacecast (float addrspace(3)* @scalar2 to float*), align 4
; CHECK: ld.shared.f32
; CHECK: ld.shared.f32
%2 = fmul float %0, %0
diff --git a/test/CodeGen/NVPTX/misaligned-vector-ldst.ll b/test/CodeGen/NVPTX/misaligned-vector-ldst.ll
index 90c9c4306de7..2ad72b018851 100644
--- a/test/CodeGen/NVPTX/misaligned-vector-ldst.ll
+++ b/test/CodeGen/NVPTX/misaligned-vector-ldst.ll
@@ -10,7 +10,7 @@ define <4 x float> @t1(i8* %p1) {
; CHECK-NOT: ld.f32
; CHECK: ld.u8
%cast = bitcast i8* %p1 to <4 x float>*
- %r = load <4 x float>* %cast, align 1
+ %r = load <4 x float>, <4 x float>* %cast, align 1
ret <4 x float> %r
}
@@ -20,7 +20,7 @@ define <4 x float> @t2(i8* %p1) {
; CHECK-NOT: ld.v2
; CHECK: ld.f32
%cast = bitcast i8* %p1 to <4 x float>*
- %r = load <4 x float>* %cast, align 4
+ %r = load <4 x float>, <4 x float>* %cast, align 4
ret <4 x float> %r
}
@@ -29,7 +29,7 @@ define <4 x float> @t3(i8* %p1) {
; CHECK-NOT: ld.v4
; CHECK: ld.v2
%cast = bitcast i8* %p1 to <4 x float>*
- %r = load <4 x float>* %cast, align 8
+ %r = load <4 x float>, <4 x float>* %cast, align 8
ret <4 x float> %r
}
@@ -37,7 +37,7 @@ define <4 x float> @t3(i8* %p1) {
define <4 x float> @t4(i8* %p1) {
; CHECK: ld.v4
%cast = bitcast i8* %p1 to <4 x float>*
- %r = load <4 x float>* %cast, align 16
+ %r = load <4 x float>, <4 x float>* %cast, align 16
ret <4 x float> %r
}
diff --git a/test/CodeGen/NVPTX/noduplicate-syncthreads.ll b/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
index 841bbc3a517c..2fec31b3791d 100644
--- a/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
+++ b/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
@@ -11,16 +11,16 @@ define void @foo(float* %output) #1 {
entry:
%output.addr = alloca float*, align 8
store float* %output, float** %output.addr, align 8
- %0 = load float** %output.addr, align 8
- %arrayidx = getelementptr inbounds float* %0, i64 0
- %1 = load float* %arrayidx, align 4
+ %0 = load float*, float** %output.addr, align 8
+ %arrayidx = getelementptr inbounds float, float* %0, i64 0
+ %1 = load float, float* %arrayidx, align 4
%conv = fpext float %1 to double
%cmp = fcmp olt double %conv, 1.000000e+01
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
- %2 = load float** %output.addr, align 8
- %3 = load float* %2, align 4
+ %2 = load float*, float** %output.addr, align 8
+ %3 = load float, float* %2, align 4
%conv1 = fpext float %3 to double
%add = fadd double %conv1, 1.000000e+00
%conv2 = fptrunc double %add to float
@@ -28,8 +28,8 @@ if.then: ; preds = %entry
br label %if.end
if.else: ; preds = %entry
- %4 = load float** %output.addr, align 8
- %5 = load float* %4, align 4
+ %4 = load float*, float** %output.addr, align 8
+ %5 = load float, float* %4, align 4
%conv3 = fpext float %5 to double
%add4 = fadd double %conv3, 2.000000e+00
%conv5 = fptrunc double %add4 to float
@@ -38,16 +38,16 @@ if.else: ; preds = %entry
if.end: ; preds = %if.else, %if.then
call void @llvm.cuda.syncthreads()
- %6 = load float** %output.addr, align 8
- %arrayidx6 = getelementptr inbounds float* %6, i64 0
- %7 = load float* %arrayidx6, align 4
+ %6 = load float*, float** %output.addr, align 8
+ %arrayidx6 = getelementptr inbounds float, float* %6, i64 0
+ %7 = load float, float* %arrayidx6, align 4
%conv7 = fpext float %7 to double
%cmp8 = fcmp olt double %conv7, 1.000000e+01
br i1 %cmp8, label %if.then9, label %if.else13
if.then9: ; preds = %if.end
- %8 = load float** %output.addr, align 8
- %9 = load float* %8, align 4
+ %8 = load float*, float** %output.addr, align 8
+ %9 = load float, float* %8, align 4
%conv10 = fpext float %9 to double
%add11 = fadd double %conv10, 3.000000e+00
%conv12 = fptrunc double %add11 to float
@@ -55,8 +55,8 @@ if.then9: ; preds = %if.end
br label %if.end17
if.else13: ; preds = %if.end
- %10 = load float** %output.addr, align 8
- %11 = load float* %10, align 4
+ %10 = load float*, float** %output.addr, align 8
+ %11 = load float, float* %10, align 4
%conv14 = fpext float %11 to double
%add15 = fadd double %conv14, 4.000000e+00
%conv16 = fptrunc double %add15 to float
diff --git a/test/CodeGen/NVPTX/nounroll.ll b/test/CodeGen/NVPTX/nounroll.ll
new file mode 100644
index 000000000000..e80a4a21f161
--- /dev/null
+++ b/test/CodeGen/NVPTX/nounroll.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-unknown-unknown"
+
+; Compiled from the following CUDA code:
+;
+; #pragma nounroll
+; for (int i = 0; i < 2; ++i)
+; output[i] = input[i];
+define void @nounroll(float* %input, float* %output) {
+; CHECK-LABEL: .visible .func nounroll(
+entry:
+ br label %for.body
+
+for.body:
+; CHECK: .pragma "nounroll"
+ %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %idxprom = sext i32 %i.06 to i64
+ %arrayidx = getelementptr inbounds float, float* %input, i64 %idxprom
+ %0 = load float, float* %arrayidx, align 4
+; CHECK: ld.f32
+ %arrayidx2 = getelementptr inbounds float, float* %output, i64 %idxprom
+ store float %0, float* %arrayidx2, align 4
+; CHECK: st.f32
+ %inc = add nuw nsw i32 %i.06, 1
+ %exitcond = icmp eq i32 %inc, 2
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+; CHECK-NOT: ld.f32
+; CHECK-NOT: st.f32
+
+for.end:
+ ret void
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.unroll.disable"}
diff --git a/test/CodeGen/NVPTX/nvvm-reflect.ll b/test/CodeGen/NVPTX/nvvm-reflect.ll
index 21e9c69e657a..8c75dfc30a56 100644
--- a/test/CodeGen/NVPTX/nvvm-reflect.ll
+++ b/test/CodeGen/NVPTX/nvvm-reflect.ll
@@ -11,7 +11,7 @@ define float @foo(float %a, float %b) {
; USE_MUL_0-NOT: call i32 @__nvvm_reflect
; USE_MUL_1: define float @foo
; USE_MUL_1-NOT: call i32 @__nvvm_reflect
- %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8] addrspace(4)* @str, i32 0, i32 0))
+ %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0))
%reflect = tail call i32 @__nvvm_reflect(i8* %ptr)
%cmp = icmp ugt i32 %reflect, 0
br i1 %cmp, label %use_mul, label %use_add
@@ -42,7 +42,38 @@ define i32 @intrinsic() {
; USE_MUL_0: ret i32 0
; USE_MUL_1-NOT: call i32 @llvm.nvvm.reflect
; USE_MUL_1: ret i32 1
- %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8] addrspace(4)* @str, i32 0, i32 0))
+ %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0))
%reflect = tail call i32 @llvm.nvvm.reflect.p0i8(i8* %ptr)
ret i32 %reflect
}
+
+; CUDA-7.0 passes __nvvm_reflect argument slightly differently.
+; Verify that it works, too
+
+@"$str" = private addrspace(1) constant [8 x i8] c"USE_MUL\00"
+
+define float @bar(float %a, float %b) {
+; USE_MUL_0: define float @bar
+; USE_MUL_0-NOT: call i32 @__nvvm_reflect
+; USE_MUL_1: define float @bar
+; USE_MUL_1-NOT: call i32 @__nvvm_reflect
+ %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
+ %cmp = icmp ne i32 %reflect, 0
+ br i1 %cmp, label %use_mul, label %use_add
+
+use_mul:
+; USE_MUL_1: fmul float %a, %b
+; USE_MUL_0-NOT: fadd float %a, %b
+ %ret1 = fmul float %a, %b
+ br label %exit
+
+use_add:
+; USE_MUL_0: fadd float %a, %b
+; USE_MUL_1-NOT: fmul float %a, %b
+ %ret2 = fadd float %a, %b
+ br label %exit
+
+exit:
+ %ret = phi float [%ret1, %use_mul], [%ret2, %use_add]
+ ret float %ret
+}
diff --git a/test/CodeGen/NVPTX/pr13291-i1-store.ll b/test/CodeGen/NVPTX/pr13291-i1-store.ll
index cc67a6fff8e4..d4f7c3bd210a 100644
--- a/test/CodeGen/NVPTX/pr13291-i1-store.ll
+++ b/test/CodeGen/NVPTX/pr13291-i1-store.ll
@@ -19,7 +19,7 @@ define ptx_kernel void @t2(i1* %a, i8* %b) {
; PTX64: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1;
; PTX64: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1;
- %t1 = load i1* %a
+ %t1 = load i1, i1* %a
%t2 = select i1 %t1, i8 1, i8 2
store i8 %t2, i8* %b
ret void
diff --git a/test/CodeGen/NVPTX/pr16278.ll b/test/CodeGen/NVPTX/pr16278.ll
index 5432a848442c..a836eaf2e51f 100644
--- a/test/CodeGen/NVPTX/pr16278.ll
+++ b/test/CodeGen/NVPTX/pr16278.ll
@@ -5,6 +5,6 @@
define float @foo() {
; CHECK: ld.const.f32
- %val = load float addrspace(4)* @one_f
+ %val = load float, float addrspace(4)* @one_f
ret float %val
}
diff --git a/test/CodeGen/NVPTX/pr17529.ll b/test/CodeGen/NVPTX/pr17529.ll
index a16214225674..a7519776f526 100644
--- a/test/CodeGen/NVPTX/pr17529.ll
+++ b/test/CodeGen/NVPTX/pr17529.ll
@@ -11,7 +11,7 @@ entry:
vector.body: ; preds = %vector.body, %entry
%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
- %scevgep9 = getelementptr i8* %dst, i64 %index
+ %scevgep9 = getelementptr i8, i8* %dst, i64 %index
%scevgep910 = bitcast i8* %scevgep9 to <4 x i8>*
store <4 x i8> undef, <4 x i8>* %scevgep910, align 1
%index.next = add i64 %index, 4
@@ -22,13 +22,13 @@ middle.block: ; preds = %vector.body
br i1 undef, label %for.end, label %for.body.preheader1
for.body.preheader1: ; preds = %middle.block
- %scevgep2 = getelementptr i8* %dst, i64 0
+ %scevgep2 = getelementptr i8, i8* %dst, i64 0
br label %for.body
for.body: ; preds = %for.body, %for.body.preheader1
%lsr.iv3 = phi i8* [ %scevgep2, %for.body.preheader1 ], [ %scevgep4, %for.body ]
store i8 undef, i8* %lsr.iv3, align 1
- %scevgep4 = getelementptr i8* %lsr.iv3, i64 1
+ %scevgep4 = getelementptr i8, i8* %lsr.iv3, i64 1
br label %for.body
for.end: ; preds = %middle.block, %entry
diff --git a/test/CodeGen/NVPTX/ptx-version-30.ll b/test/CodeGen/NVPTX/ptx-version-30.ll
deleted file mode 100644
index 0422b01f4ee3..000000000000
--- a/test/CodeGen/NVPTX/ptx-version-30.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 -mattr=ptx30 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=ptx30 | FileCheck %s
-
-
-; CHECK: .version 3.0
-
diff --git a/test/CodeGen/NVPTX/ptx-version-31.ll b/test/CodeGen/NVPTX/ptx-version-31.ll
deleted file mode 100644
index d6e57301a371..000000000000
--- a/test/CodeGen/NVPTX/ptx-version-31.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_20 -mattr=ptx31 | FileCheck %s
-; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -mattr=ptx31 | FileCheck %s
-
-
-; CHECK: .version 3.1
-
diff --git a/test/CodeGen/NVPTX/refl1.ll b/test/CodeGen/NVPTX/refl1.ll
index e8782ea3aa27..0432b67535c0 100644
--- a/test/CodeGen/NVPTX/refl1.ll
+++ b/test/CodeGen/NVPTX/refl1.ll
@@ -5,7 +5,7 @@ target triple = "nvptx-nvidia-cuda"
; Function Attrs: nounwind
; CHECK: .entry foo
define void @foo(float* nocapture %a) #0 {
- %val = load float* %a
+ %val = load float, float* %a
%tan = tail call fastcc float @__nv_fast_tanf(float %val)
store float %tan, float* %a
ret void
diff --git a/test/CodeGen/NVPTX/sched1.ll b/test/CodeGen/NVPTX/sched1.ll
index 03ab635e73b9..fb01eb262adc 100644
--- a/test/CodeGen/NVPTX/sched1.ll
+++ b/test/CodeGen/NVPTX/sched1.ll
@@ -11,14 +11,14 @@ define void @foo(i32* %a) {
; CHECK-NEXT: add.s32
; CHECK-NEXT: add.s32
; CHECK-NEXT: add.s32
- %ptr0 = getelementptr i32* %a, i32 0
- %val0 = load i32* %ptr0
- %ptr1 = getelementptr i32* %a, i32 1
- %val1 = load i32* %ptr1
- %ptr2 = getelementptr i32* %a, i32 2
- %val2 = load i32* %ptr2
- %ptr3 = getelementptr i32* %a, i32 3
- %val3 = load i32* %ptr3
+ %ptr0 = getelementptr i32, i32* %a, i32 0
+ %val0 = load i32, i32* %ptr0
+ %ptr1 = getelementptr i32, i32* %a, i32 1
+ %val1 = load i32, i32* %ptr1
+ %ptr2 = getelementptr i32, i32* %a, i32 2
+ %val2 = load i32, i32* %ptr2
+ %ptr3 = getelementptr i32, i32* %a, i32 3
+ %val3 = load i32, i32* %ptr3
%t0 = add i32 %val0, %val1
%t1 = add i32 %t0, %val2
diff --git a/test/CodeGen/NVPTX/sched2.ll b/test/CodeGen/NVPTX/sched2.ll
index 71a9a4963faf..91ed77878f81 100644
--- a/test/CodeGen/NVPTX/sched2.ll
+++ b/test/CodeGen/NVPTX/sched2.ll
@@ -12,14 +12,14 @@ define void @foo(<2 x i32>* %a) {
; CHECK-NEXT: add.s32
; CHECK-NEXT: add.s32
; CHECK-NEXT: add.s32
- %ptr0 = getelementptr <2 x i32>* %a, i32 0
- %val0 = load <2 x i32>* %ptr0
- %ptr1 = getelementptr <2 x i32>* %a, i32 1
- %val1 = load <2 x i32>* %ptr1
- %ptr2 = getelementptr <2 x i32>* %a, i32 2
- %val2 = load <2 x i32>* %ptr2
- %ptr3 = getelementptr <2 x i32>* %a, i32 3
- %val3 = load <2 x i32>* %ptr3
+ %ptr0 = getelementptr <2 x i32>, <2 x i32>* %a, i32 0
+ %val0 = load <2 x i32>, <2 x i32>* %ptr0
+ %ptr1 = getelementptr <2 x i32>, <2 x i32>* %a, i32 1
+ %val1 = load <2 x i32>, <2 x i32>* %ptr1
+ %ptr2 = getelementptr <2 x i32>, <2 x i32>* %a, i32 2
+ %val2 = load <2 x i32>, <2 x i32>* %ptr2
+ %ptr3 = getelementptr <2 x i32>, <2 x i32>* %a, i32 3
+ %val3 = load <2 x i32>, <2 x i32>* %ptr3
%t0 = add <2 x i32> %val0, %val1
%t1 = add <2 x i32> %t0, %val2
diff --git a/test/CodeGen/NVPTX/shift-parts.ll b/test/CodeGen/NVPTX/shift-parts.ll
index 748297caf339..b4d408ff5972 100644
--- a/test/CodeGen/NVPTX/shift-parts.ll
+++ b/test/CodeGen/NVPTX/shift-parts.ll
@@ -12,8 +12,8 @@ define void @shift_parts_left_128(i128* %val, i128* %amtptr) {
; CHECK: setp.gt.s32
; CHECK: selp.b64
; CHECK: shl.b64
- %amt = load i128* %amtptr
- %a = load i128* %val
+ %amt = load i128, i128* %amtptr
+ %a = load i128, i128* %val
%val0 = shl i128 %a, %amt
store i128 %val0, i128* %val
ret void
@@ -30,8 +30,8 @@ define void @shift_parts_right_128(i128* %val, i128* %amtptr) {
; CHECK: setp.gt.s32
; CHECK: selp.b64
; CHECK: shr.s64
- %amt = load i128* %amtptr
- %a = load i128* %val
+ %amt = load i128, i128* %amtptr
+ %a = load i128, i128* %val
%val0 = ashr i128 %a, %amt
store i128 %val0, i128* %val
ret void
diff --git a/test/CodeGen/NVPTX/simple-call.ll b/test/CodeGen/NVPTX/simple-call.ll
index 1b41361cf7ed..da6568685fe6 100644
--- a/test/CodeGen/NVPTX/simple-call.ll
+++ b/test/CodeGen/NVPTX/simple-call.ll
@@ -11,7 +11,7 @@ define float @device_func(float %a) noinline {
; CHECK: .entry kernel_func
define void @kernel_func(float* %a) {
- %val = load float* %a
+ %val = load float, float* %a
; CHECK: call.uni (retval0),
; CHECK: device_func,
%mul = call float @device_func(float %val)
diff --git a/test/CodeGen/NVPTX/sm-version-30.ll b/test/CodeGen/NVPTX/sm-version-30.ll
index 692b49a0d6b3..4f35cf04c63b 100644
--- a/test/CodeGen/NVPTX/sm-version-30.ll
+++ b/test/CodeGen/NVPTX/sm-version-30.ll
@@ -2,5 +2,6 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s
+; CHECK: .version 3.2
; CHECK: .target sm_30
diff --git a/test/CodeGen/NVPTX/sm-version-32.ll b/test/CodeGen/NVPTX/sm-version-32.ll
new file mode 100644
index 000000000000..d6a5082c5267
--- /dev/null
+++ b/test/CodeGen/NVPTX/sm-version-32.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_32 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_32 | FileCheck %s
+
+
+; CHECK: .version 4.0
+; CHECK: .target sm_32
+
diff --git a/test/CodeGen/NVPTX/sm-version-35.ll b/test/CodeGen/NVPTX/sm-version-35.ll
index 25368a01335e..8456c666677d 100644
--- a/test/CodeGen/NVPTX/sm-version-35.ll
+++ b/test/CodeGen/NVPTX/sm-version-35.ll
@@ -2,5 +2,6 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
+; CHECK: .version 3.2
; CHECK: .target sm_35
diff --git a/test/CodeGen/NVPTX/sm-version-37.ll b/test/CodeGen/NVPTX/sm-version-37.ll
new file mode 100644
index 000000000000..fd51a9c7063f
--- /dev/null
+++ b/test/CodeGen/NVPTX/sm-version-37.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_37 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_37 | FileCheck %s
+
+
+; CHECK: .version 4.1
+; CHECK: .target sm_37
+
diff --git a/test/CodeGen/NVPTX/sm-version-50.ll b/test/CodeGen/NVPTX/sm-version-50.ll
new file mode 100644
index 000000000000..374c6ea057ae
--- /dev/null
+++ b/test/CodeGen/NVPTX/sm-version-50.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_50 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_50 | FileCheck %s
+
+
+; CHECK: .version 4.0
+; CHECK: .target sm_50
+
diff --git a/test/CodeGen/NVPTX/sm-version-52.ll b/test/CodeGen/NVPTX/sm-version-52.ll
new file mode 100644
index 000000000000..18881b2e98cc
--- /dev/null
+++ b/test/CodeGen/NVPTX/sm-version-52.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_52 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_52 | FileCheck %s
+
+
+; CHECK: .version 4.1
+; CHECK: .target sm_52
+
diff --git a/test/CodeGen/NVPTX/sm-version-53.ll b/test/CodeGen/NVPTX/sm-version-53.ll
new file mode 100644
index 000000000000..50d2dec11bc5
--- /dev/null
+++ b/test/CodeGen/NVPTX/sm-version-53.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_53 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_53 | FileCheck %s
+
+
+; CHECK: .version 4.2
+; CHECK: .target sm_53
+
diff --git a/test/CodeGen/NVPTX/symbol-naming.ll b/test/CodeGen/NVPTX/symbol-naming.ll
index bd1333f1c4e6..0f176934ca39 100644
--- a/test/CodeGen/NVPTX/symbol-naming.ll
+++ b/test/CodeGen/NVPTX/symbol-naming.ll
@@ -24,7 +24,7 @@ target triple = "nvptx64-unknown-unknown"
; Function Attrs: nounwind
define void @foo(i32 %a, float %b, i8 signext %c, i32 %e) {
entry:
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0))
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i32 0, i32 0))
ret void
}
diff --git a/test/CodeGen/NVPTX/vector-compare.ll b/test/CodeGen/NVPTX/vector-compare.ll
index 218049995233..2992b0e62c56 100644
--- a/test/CodeGen/NVPTX/vector-compare.ll
+++ b/test/CodeGen/NVPTX/vector-compare.ll
@@ -6,8 +6,8 @@
; tried to promote <2 x i1> to <2 x i8> and instruction selection failed.
define void @foo(<2 x i32>* %a, <2 x i32>* %b, i32* %r1, i32* %r2) {
- %aval = load <2 x i32>* %a
- %bval = load <2 x i32>* %b
+ %aval = load <2 x i32>, <2 x i32>* %a
+ %bval = load <2 x i32>, <2 x i32>* %b
%res = icmp slt <2 x i32> %aval, %bval
%t1 = extractelement <2 x i1> %res, i32 0
%t2 = extractelement <2 x i1> %res, i32 1
diff --git a/test/CodeGen/NVPTX/vector-loads.ll b/test/CodeGen/NVPTX/vector-loads.ll
index 58882bf16668..d70348942200 100644
--- a/test/CodeGen/NVPTX/vector-loads.ll
+++ b/test/CodeGen/NVPTX/vector-loads.ll
@@ -10,7 +10,7 @@
define void @foo(<2 x float>* %a) {
; CHECK: .func foo
; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}
- %t1 = load <2 x float>* %a
+ %t1 = load <2 x float>, <2 x float>* %a
%t2 = fmul <2 x float> %t1, %t1
store <2 x float> %t2, <2 x float>* %a
ret void
@@ -19,7 +19,7 @@ define void @foo(<2 x float>* %a) {
define void @foo2(<4 x float>* %a) {
; CHECK: .func foo2
; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
- %t1 = load <4 x float>* %a
+ %t1 = load <4 x float>, <4 x float>* %a
%t2 = fmul <4 x float> %t1, %t1
store <4 x float> %t2, <4 x float>* %a
ret void
@@ -29,7 +29,7 @@ define void @foo3(<8 x float>* %a) {
; CHECK: .func foo3
; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
- %t1 = load <8 x float>* %a
+ %t1 = load <8 x float>, <8 x float>* %a
%t2 = fmul <8 x float> %t1, %t1
store <8 x float> %t2, <8 x float>* %a
ret void
@@ -40,7 +40,7 @@ define void @foo3(<8 x float>* %a) {
define void @foo4(<2 x i32>* %a) {
; CHECK: .func foo4
; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}
- %t1 = load <2 x i32>* %a
+ %t1 = load <2 x i32>, <2 x i32>* %a
%t2 = mul <2 x i32> %t1, %t1
store <2 x i32> %t2, <2 x i32>* %a
ret void
@@ -49,7 +49,7 @@ define void @foo4(<2 x i32>* %a) {
define void @foo5(<4 x i32>* %a) {
; CHECK: .func foo5
; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
- %t1 = load <4 x i32>* %a
+ %t1 = load <4 x i32>, <4 x i32>* %a
%t2 = mul <4 x i32> %t1, %t1
store <4 x i32> %t2, <4 x i32>* %a
ret void
@@ -59,7 +59,7 @@ define void @foo6(<8 x i32>* %a) {
; CHECK: .func foo6
; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
- %t1 = load <8 x i32>* %a
+ %t1 = load <8 x i32>, <8 x i32>* %a
%t2 = mul <8 x i32> %t1, %t1
store <8 x i32> %t2, <8 x i32>* %a
ret void
diff --git a/test/CodeGen/NVPTX/vector-select.ll b/test/CodeGen/NVPTX/vector-select.ll
index 11893df10329..1e81031c685a 100644
--- a/test/CodeGen/NVPTX/vector-select.ll
+++ b/test/CodeGen/NVPTX/vector-select.ll
@@ -6,9 +6,9 @@
define void @foo(<2 x i32> addrspace(1)* %def_a, <2 x i32> addrspace(1)* %def_b, <2 x i32> addrspace(1)* %def_c) {
entry:
- %tmp4 = load <2 x i32> addrspace(1)* %def_a
- %tmp6 = load <2 x i32> addrspace(1)* %def_c
- %tmp8 = load <2 x i32> addrspace(1)* %def_b
+ %tmp4 = load <2 x i32>, <2 x i32> addrspace(1)* %def_a
+ %tmp6 = load <2 x i32>, <2 x i32> addrspace(1)* %def_c
+ %tmp8 = load <2 x i32>, <2 x i32> addrspace(1)* %def_b
%0 = icmp sge <2 x i32> %tmp4, zeroinitializer
%cond = select <2 x i1> %0, <2 x i32> %tmp6, <2 x i32> %tmp8
store <2 x i32> %cond, <2 x i32> addrspace(1)* %def_c
diff --git a/test/CodeGen/NVPTX/weak-global.ll b/test/CodeGen/NVPTX/weak-global.ll
index 2bef4c5228a9..a64f9f48b26f 100644
--- a/test/CodeGen/NVPTX/weak-global.ll
+++ b/test/CodeGen/NVPTX/weak-global.ll
@@ -4,6 +4,6 @@
@g = common addrspace(1) global i32 zeroinitializer
define i32 @func0() {
- %val = load i32 addrspace(1)* @g
+ %val = load i32, i32 addrspace(1)* @g
ret i32 %val
}
diff --git a/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll b/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
index 047a12bedd81..0f56ac990bbe 100644
--- a/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
+++ b/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
@@ -7,7 +7,7 @@ define void @bar(i32 %G, i32 %E, i32 %F, i32 %A, i32 %B, i32 %C, i32 %D, i8* %fm
%ap = alloca i8* ; <i8**> [#uses=2]
%va.upgrd.1 = bitcast i8** %ap to i8* ; <i8*> [#uses=1]
call void @llvm.va_start( i8* %va.upgrd.1 )
- %tmp.1 = load i8** %ap ; <i8*> [#uses=1]
+ %tmp.1 = load i8*, i8** %ap ; <i8*> [#uses=1]
%tmp.0 = call double @foo( i8* %tmp.1 ) ; <double> [#uses=0]
ret void
}
diff --git a/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll b/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
index fbf254082ee0..fde330321aa4 100644
--- a/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
+++ b/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
@@ -4,11 +4,11 @@ define void @iterative_hash_host_wide_int() {
%zero = alloca i32 ; <i32*> [#uses=2]
%b = alloca i32 ; <i32*> [#uses=1]
store i32 0, i32* %zero
- %tmp = load i32* %zero ; <i32> [#uses=1]
+ %tmp = load i32, i32* %zero ; <i32> [#uses=1]
%tmp5 = bitcast i32 %tmp to i32 ; <i32> [#uses=1]
%tmp6.u = add i32 %tmp5, 32 ; <i32> [#uses=1]
%tmp6 = bitcast i32 %tmp6.u to i32 ; <i32> [#uses=1]
- %tmp7 = load i64* null ; <i64> [#uses=1]
+ %tmp7 = load i64, i64* null ; <i64> [#uses=1]
%tmp6.upgrd.1 = trunc i32 %tmp6 to i8 ; <i8> [#uses=1]
%shift.upgrd.2 = zext i8 %tmp6.upgrd.1 to i64 ; <i64> [#uses=1]
%tmp8 = ashr i64 %tmp7, %shift.upgrd.2 ; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll b/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
index 7e845382a8e8..80827dc1505d 100644
--- a/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
+++ b/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
@@ -2,7 +2,7 @@
; RUN: grep "vspltish v.*, 10"
define void @test(<8 x i16>* %P) {
- %tmp = load <8 x i16>* %P ; <<8 x i16>> [#uses=1]
+ %tmp = load <8 x i16>, <8 x i16>* %P ; <<8 x i16>> [#uses=1]
%tmp1 = add <8 x i16> %tmp, < i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10 > ; <<8 x i16>> [#uses=1]
store <8 x i16> %tmp1, <8 x i16>* %P
ret void
diff --git a/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll b/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
index 0205d10a795c..50d64f46569a 100644
--- a/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
+++ b/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
@@ -20,32 +20,32 @@ bb30: ; preds = %entry
cond_true68: ; preds = %bb30
ret void
cond_next92: ; preds = %bb30
- %tmp173 = getelementptr %struct.attr_desc* null, i32 0, i32 4 ; <i32*> [#uses=2]
- %tmp174 = load i32* %tmp173 ; <i32> [#uses=1]
+ %tmp173 = getelementptr %struct.attr_desc, %struct.attr_desc* null, i32 0, i32 4 ; <i32*> [#uses=2]
+ %tmp174 = load i32, i32* %tmp173 ; <i32> [#uses=1]
%tmp177 = and i32 %tmp174, -9 ; <i32> [#uses=1]
store i32 %tmp177, i32* %tmp173
- %tmp180 = getelementptr %struct.attr_desc* null, i32 0, i32 4 ; <i32*> [#uses=1]
- %tmp181 = load i32* %tmp180 ; <i32> [#uses=1]
- %tmp185 = getelementptr %struct.attr_desc* null, i32 0, i32 4 ; <i32*> [#uses=2]
- %tmp186 = load i32* %tmp185 ; <i32> [#uses=1]
+ %tmp180 = getelementptr %struct.attr_desc, %struct.attr_desc* null, i32 0, i32 4 ; <i32*> [#uses=1]
+ %tmp181 = load i32, i32* %tmp180 ; <i32> [#uses=1]
+ %tmp185 = getelementptr %struct.attr_desc, %struct.attr_desc* null, i32 0, i32 4 ; <i32*> [#uses=2]
+ %tmp186 = load i32, i32* %tmp185 ; <i32> [#uses=1]
%tmp183187 = shl i32 %tmp181, 1 ; <i32> [#uses=1]
%tmp188 = and i32 %tmp183187, 16 ; <i32> [#uses=1]
%tmp190 = and i32 %tmp186, -17 ; <i32> [#uses=1]
%tmp191 = or i32 %tmp190, %tmp188 ; <i32> [#uses=1]
store i32 %tmp191, i32* %tmp185
- %tmp193 = getelementptr %struct.attr_desc* null, i32 0, i32 4 ; <i32*> [#uses=1]
- %tmp194 = load i32* %tmp193 ; <i32> [#uses=1]
- %tmp198 = getelementptr %struct.attr_desc* null, i32 0, i32 4 ; <i32*> [#uses=2]
- %tmp199 = load i32* %tmp198 ; <i32> [#uses=1]
+ %tmp193 = getelementptr %struct.attr_desc, %struct.attr_desc* null, i32 0, i32 4 ; <i32*> [#uses=1]
+ %tmp194 = load i32, i32* %tmp193 ; <i32> [#uses=1]
+ %tmp198 = getelementptr %struct.attr_desc, %struct.attr_desc* null, i32 0, i32 4 ; <i32*> [#uses=2]
+ %tmp199 = load i32, i32* %tmp198 ; <i32> [#uses=1]
%tmp196200 = shl i32 %tmp194, 2 ; <i32> [#uses=1]
%tmp201 = and i32 %tmp196200, 64 ; <i32> [#uses=1]
%tmp203 = and i32 %tmp199, -65 ; <i32> [#uses=1]
%tmp204 = or i32 %tmp203, %tmp201 ; <i32> [#uses=1]
store i32 %tmp204, i32* %tmp198
- %tmp206 = getelementptr %struct.attr_desc* null, i32 0, i32 4 ; <i32*> [#uses=1]
- %tmp207 = load i32* %tmp206 ; <i32> [#uses=1]
- %tmp211 = getelementptr %struct.attr_desc* null, i32 0, i32 4 ; <i32*> [#uses=2]
- %tmp212 = load i32* %tmp211 ; <i32> [#uses=1]
+ %tmp206 = getelementptr %struct.attr_desc, %struct.attr_desc* null, i32 0, i32 4 ; <i32*> [#uses=1]
+ %tmp207 = load i32, i32* %tmp206 ; <i32> [#uses=1]
+ %tmp211 = getelementptr %struct.attr_desc, %struct.attr_desc* null, i32 0, i32 4 ; <i32*> [#uses=2]
+ %tmp212 = load i32, i32* %tmp211 ; <i32> [#uses=1]
%tmp209213 = shl i32 %tmp207, 1 ; <i32> [#uses=1]
%tmp214 = and i32 %tmp209213, 128 ; <i32> [#uses=1]
%tmp216 = and i32 %tmp212, -129 ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll b/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
index 1b8b064ee914..792c271d0c08 100644
--- a/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
+++ b/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
@@ -4,14 +4,14 @@
@vals = external global i32* ; <i32**> [#uses=1]
define i32 @test(i32 %i) {
- %tmp = load i8** @lens ; <i8*> [#uses=1]
- %tmp1 = getelementptr i8* %tmp, i32 %i ; <i8*> [#uses=1]
- %tmp.upgrd.1 = load i8* %tmp1 ; <i8> [#uses=1]
+ %tmp = load i8*, i8** @lens ; <i8*> [#uses=1]
+ %tmp1 = getelementptr i8, i8* %tmp, i32 %i ; <i8*> [#uses=1]
+ %tmp.upgrd.1 = load i8, i8* %tmp1 ; <i8> [#uses=1]
%tmp2 = zext i8 %tmp.upgrd.1 to i32 ; <i32> [#uses=1]
- %tmp3 = load i32** @vals ; <i32*> [#uses=1]
+ %tmp3 = load i32*, i32** @vals ; <i32*> [#uses=1]
%tmp5 = sub i32 1, %tmp2 ; <i32> [#uses=1]
- %tmp6 = getelementptr i32* %tmp3, i32 %tmp5 ; <i32*> [#uses=1]
- %tmp7 = load i32* %tmp6 ; <i32> [#uses=1]
+ %tmp6 = getelementptr i32, i32* %tmp3, i32 %tmp5 ; <i32*> [#uses=1]
+ %tmp7 = load i32, i32* %tmp6 ; <i32> [#uses=1]
ret i32 %tmp7
}
diff --git a/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll b/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
index 65dd568b1ee3..4b287641d55f 100644
--- a/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
+++ b/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=ppc32
define void @img2buf(i32 %symbol_size_in_bytes, i16* %ui16) nounwind {
- %tmp93 = load i16* null ; <i16> [#uses=1]
+ %tmp93 = load i16, i16* null ; <i16> [#uses=1]
%tmp99 = call i16 @llvm.bswap.i16( i16 %tmp93 ) ; <i16> [#uses=1]
store i16 %tmp99, i16* %ui16
ret void
diff --git a/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll b/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
index cb76b5c70cf0..c63fd9ae1700 100644
--- a/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
+++ b/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
@@ -5,7 +5,7 @@
define fastcc void @immed_double_const(i32 %i0, i32 %i1) {
entry:
- %tmp1 = load i32* null ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* null ; <i32> [#uses=1]
switch i32 %tmp1, label %bb103 [
i32 1, label %bb
i32 3, label %bb
diff --git a/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll b/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
index 002a0644183a..5992ad4481d3 100644
--- a/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
+++ b/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
@@ -6,7 +6,7 @@ entry:
%tmp = icmp sgt i64 %tmp1, 2 ; <i1> [#uses=1]
br i1 %tmp, label %UnifiedReturnBlock, label %cond_true
cond_true: ; preds = %entry
- %tmp.upgrd.1 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp.upgrd.1 = tail call i32 (...) @bar( ) ; <i32> [#uses=0]
ret void
UnifiedReturnBlock: ; preds = %entry
ret void
diff --git a/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll b/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
index 3d462b4d1461..ab5f37d4babe 100644
--- a/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
+++ b/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
@@ -10,7 +10,7 @@ entry:
%tmp = icmp eq i32 %tmp2, 0 ; <i1> [#uses=1]
br i1 %tmp, label %UnifiedReturnBlock, label %cond_true
cond_true: ; preds = %entry
- tail call i32 (...)* @bar( ) ; <i32>:0 [#uses=0]
+ tail call i32 (...) @bar( ) ; <i32>:0 [#uses=0]
ret void
UnifiedReturnBlock: ; preds = %entry
ret void
diff --git a/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll b/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
index ba863047be99..0e7709857406 100644
--- a/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
+++ b/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
@@ -14,7 +14,7 @@ bb19: ; preds = %entry
bb12.i: ; preds = %bb12.i, %bb19
%i.0.i = phi i32 [ %tmp11.i, %bb12.i ], [ 0, %bb19 ] ; <i32> [#uses=2]
%gep.upgrd.1 = zext i32 %i.0.i to i64 ; <i64> [#uses=1]
- %tmp9.i = getelementptr [256 x i32]* %RMask.i, i32 0, i64 %gep.upgrd.1 ; <i32*> [#uses=1]
+ %tmp9.i = getelementptr [256 x i32], [256 x i32]* %RMask.i, i32 0, i64 %gep.upgrd.1 ; <i32*> [#uses=1]
store i32 0, i32* %tmp9.i
%tmp11.i = add i32 %i.0.i, 1 ; <i32> [#uses=1]
br label %bb12.i
diff --git a/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll b/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
index 6d9a3fa7b106..9660d450cb4c 100644
--- a/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
+++ b/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
@@ -10,7 +10,7 @@ entry:
cond_true: ; preds = %entry
ret void
cond_next71: ; preds = %entry
- %tmp73.b = load i1* @qsz.b ; <i1> [#uses=1]
+ %tmp73.b = load i1, i1* @qsz.b ; <i1> [#uses=1]
%ii.4.ph = select i1 %tmp73.b, i64 4, i64 0 ; <i64> [#uses=1]
br label %bb139
bb82: ; preds = %bb139
diff --git a/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll b/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
index c7792884bb89..ca134fa9be4c 100644
--- a/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
+++ b/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN: grep cntlzw
+; RUN: grep cntlz
define i32 @foo() nounwind {
entry:
@@ -8,19 +8,19 @@ entry:
%ctz_x = alloca i32, align 4 ; <i32*> [#uses=3]
%ctz_c = alloca i32, align 4 ; <i32*> [#uses=2]
store i32 61440, i32* %ctz_x
- %tmp = load i32* %ctz_x ; <i32> [#uses=1]
+ %tmp = load i32, i32* %ctz_x ; <i32> [#uses=1]
%tmp1 = sub i32 0, %tmp ; <i32> [#uses=1]
- %tmp2 = load i32* %ctz_x ; <i32> [#uses=1]
+ %tmp2 = load i32, i32* %ctz_x ; <i32> [#uses=1]
%tmp3 = and i32 %tmp1, %tmp2 ; <i32> [#uses=1]
%tmp4 = call i32 asm "$(cntlz$|cntlzw$) $0,$1", "=r,r,~{dirflag},~{fpsr},~{flags}"( i32 %tmp3 ) ; <i32> [#uses=1]
store i32 %tmp4, i32* %ctz_c
- %tmp5 = load i32* %ctz_c ; <i32> [#uses=1]
+ %tmp5 = load i32, i32* %ctz_c ; <i32> [#uses=1]
store i32 %tmp5, i32* %temp
- %tmp6 = load i32* %temp ; <i32> [#uses=1]
+ %tmp6 = load i32, i32* %temp ; <i32> [#uses=1]
store i32 %tmp6, i32* %retval
br label %return
return: ; preds = %entry
- %retval2 = load i32* %retval ; <i32> [#uses=1]
+ %retval2 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %retval2
}
diff --git a/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll b/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll
index fe5145d15230..6ce32da2f740 100644
--- a/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll
+++ b/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll
@@ -10,7 +10,7 @@
define void @test1() {
entry:
%Out = alloca %struct.A, align 4 ; <%struct.A*> [#uses=1]
- %tmp2 = getelementptr %struct.A* %Out, i32 0, i32 1
+ %tmp2 = getelementptr %struct.A, %struct.A* %Out, i32 0, i32 1
%tmp5 = call i32 asm "lwbrx $0, $1", "=r,m"(i32* %tmp2 )
ret void
}
@@ -18,7 +18,7 @@ entry:
define void @test2() {
entry:
%Out = alloca %struct.A, align 4 ; <%struct.A*> [#uses=1]
- %tmp2 = getelementptr %struct.A* %Out, i32 0, i32 0 ; <i32*> [#uses=1]
+ %tmp2 = getelementptr %struct.A, %struct.A* %Out, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp5 = call i32 asm "lwbrx $0, $2, $1", "=r,r,bO,m"( i8* null, i32 0, i32* %tmp2 ) ; <i32> [#uses=0]
ret void
}
diff --git a/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll b/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
index 0473857ae70f..5a6fbf01c1b8 100644
--- a/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
+++ b/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
@@ -7,7 +7,7 @@ target triple = "powerpc-apple-darwin8"
define i32 @main() {
entry:
- %tmp = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([18 x i8]* @str, i32 0, i32 0) ) ; <i32> [#uses=0]
+ %tmp = tail call i32 (i8*, ...) @printf( i8* getelementptr ([18 x i8], [18 x i8]* @str, i32 0, i32 0) ) ; <i32> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
index 3624b5109301..2db87fcb1c88 100644
--- a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
+++ b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
@@ -3,7 +3,7 @@ target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
define i32 @_ZNK4llvm5APInt17countLeadingZerosEv(i64 *%t) nounwind {
- %tmp19 = load i64* %t
+ %tmp19 = load i64, i64* %t
%tmp22 = tail call i64 @llvm.ctlz.i64( i64 %tmp19, i1 true ) ; <i64> [#uses=1]
%tmp23 = trunc i64 %tmp22 to i32
%tmp89 = add i32 %tmp23, -64 ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll b/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
index d43916d4f3c1..de445f4c034a 100644
--- a/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
+++ b/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
@@ -2,578 +2,578 @@
define void @test(<4 x float>*, { { i16, i16, i32 } }*) {
xOperationInitMasks.exit:
- %.sub7896 = getelementptr [4 x <4 x i32>]* null, i32 0, i32 0 ; <<4 x i32>*> [#uses=24]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 175, i32 3 ; <<4 x float>*>:2 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 174, i32 2 ; <<4 x float>*>:3 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 174, i32 3 ; <<4 x float>*>:4 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 173, i32 1 ; <<4 x float>*>:5 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 173, i32 2 ; <<4 x float>*>:6 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 173, i32 3 ; <<4 x float>*>:7 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 172, i32 1 ; <<4 x float>*>:8 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 172, i32 2 ; <<4 x float>*>:9 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 172, i32 3 ; <<4 x float>*>:10 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 171, i32 1 ; <<4 x float>*>:11 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 171, i32 2 ; <<4 x float>*>:12 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 171, i32 3 ; <<4 x float>*>:13 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 170, i32 1 ; <<4 x float>*>:14 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 170, i32 2 ; <<4 x float>*>:15 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 170, i32 3 ; <<4 x float>*>:16 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 169, i32 1 ; <<4 x float>*>:17 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 169, i32 2 ; <<4 x float>*>:18 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 169, i32 3 ; <<4 x float>*>:19 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 168, i32 1 ; <<4 x float>*>:20 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 168, i32 2 ; <<4 x float>*>:21 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 168, i32 3 ; <<4 x float>*>:22 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 167, i32 1 ; <<4 x float>*>:23 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 167, i32 2 ; <<4 x float>*>:24 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 167, i32 3 ; <<4 x float>*>:25 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 166, i32 1 ; <<4 x float>*>:26 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 166, i32 2 ; <<4 x float>*>:27 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 166, i32 3 ; <<4 x float>*>:28 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 165, i32 1 ; <<4 x float>*>:29 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 165, i32 2 ; <<4 x float>*>:30 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 165, i32 3 ; <<4 x float>*>:31 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 164, i32 1 ; <<4 x float>*>:32 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 164, i32 2 ; <<4 x float>*>:33 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 164, i32 3 ; <<4 x float>*>:34 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 163, i32 1 ; <<4 x float>*>:35 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 163, i32 2 ; <<4 x float>*>:36 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 163, i32 3 ; <<4 x float>*>:37 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 162, i32 1 ; <<4 x float>*>:38 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 162, i32 2 ; <<4 x float>*>:39 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 162, i32 3 ; <<4 x float>*>:40 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 161, i32 1 ; <<4 x float>*>:41 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 161, i32 2 ; <<4 x float>*>:42 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 161, i32 3 ; <<4 x float>*>:43 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 160, i32 1 ; <<4 x float>*>:44 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 160, i32 2 ; <<4 x float>*>:45 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 160, i32 3 ; <<4 x float>*>:46 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 159, i32 1 ; <<4 x float>*>:47 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 159, i32 2 ; <<4 x float>*>:48 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 159, i32 3 ; <<4 x float>*>:49 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 158, i32 1 ; <<4 x float>*>:50 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 158, i32 2 ; <<4 x float>*>:51 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 158, i32 3 ; <<4 x float>*>:52 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 157, i32 1 ; <<4 x float>*>:53 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 157, i32 2 ; <<4 x float>*>:54 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 157, i32 3 ; <<4 x float>*>:55 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 156, i32 1 ; <<4 x float>*>:56 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 156, i32 2 ; <<4 x float>*>:57 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 156, i32 3 ; <<4 x float>*>:58 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 155, i32 1 ; <<4 x float>*>:59 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 155, i32 2 ; <<4 x float>*>:60 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 155, i32 3 ; <<4 x float>*>:61 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 154, i32 1 ; <<4 x float>*>:62 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 154, i32 2 ; <<4 x float>*>:63 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 154, i32 3 ; <<4 x float>*>:64 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 153, i32 1 ; <<4 x float>*>:65 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 153, i32 2 ; <<4 x float>*>:66 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 153, i32 3 ; <<4 x float>*>:67 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 152, i32 1 ; <<4 x float>*>:68 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 152, i32 2 ; <<4 x float>*>:69 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 152, i32 3 ; <<4 x float>*>:70 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 151, i32 1 ; <<4 x float>*>:71 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 151, i32 2 ; <<4 x float>*>:72 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 151, i32 3 ; <<4 x float>*>:73 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 150, i32 1 ; <<4 x float>*>:74 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 150, i32 2 ; <<4 x float>*>:75 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 150, i32 3 ; <<4 x float>*>:76 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 149, i32 1 ; <<4 x float>*>:77 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 149, i32 2 ; <<4 x float>*>:78 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 149, i32 3 ; <<4 x float>*>:79 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 148, i32 1 ; <<4 x float>*>:80 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 148, i32 2 ; <<4 x float>*>:81 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 148, i32 3 ; <<4 x float>*>:82 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 147, i32 1 ; <<4 x float>*>:83 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 147, i32 2 ; <<4 x float>*>:84 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 147, i32 3 ; <<4 x float>*>:85 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 146, i32 1 ; <<4 x float>*>:86 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 146, i32 2 ; <<4 x float>*>:87 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 146, i32 3 ; <<4 x float>*>:88 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 145, i32 1 ; <<4 x float>*>:89 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 145, i32 2 ; <<4 x float>*>:90 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 145, i32 3 ; <<4 x float>*>:91 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 144, i32 1 ; <<4 x float>*>:92 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 144, i32 2 ; <<4 x float>*>:93 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 144, i32 3 ; <<4 x float>*>:94 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 143, i32 1 ; <<4 x float>*>:95 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 143, i32 2 ; <<4 x float>*>:96 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 143, i32 3 ; <<4 x float>*>:97 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 142, i32 1 ; <<4 x float>*>:98 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 142, i32 2 ; <<4 x float>*>:99 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 142, i32 3 ; <<4 x float>*>:100 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 141, i32 1 ; <<4 x float>*>:101 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 141, i32 2 ; <<4 x float>*>:102 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 141, i32 3 ; <<4 x float>*>:103 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 140, i32 1 ; <<4 x float>*>:104 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 140, i32 2 ; <<4 x float>*>:105 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 140, i32 3 ; <<4 x float>*>:106 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 139, i32 1 ; <<4 x float>*>:107 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 139, i32 2 ; <<4 x float>*>:108 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 139, i32 3 ; <<4 x float>*>:109 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 138, i32 1 ; <<4 x float>*>:110 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 138, i32 2 ; <<4 x float>*>:111 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 138, i32 3 ; <<4 x float>*>:112 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 137, i32 1 ; <<4 x float>*>:113 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 137, i32 2 ; <<4 x float>*>:114 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 137, i32 3 ; <<4 x float>*>:115 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 136, i32 1 ; <<4 x float>*>:116 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 136, i32 2 ; <<4 x float>*>:117 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 136, i32 3 ; <<4 x float>*>:118 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 135, i32 1 ; <<4 x float>*>:119 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 135, i32 2 ; <<4 x float>*>:120 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 135, i32 3 ; <<4 x float>*>:121 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 134, i32 1 ; <<4 x float>*>:122 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 134, i32 2 ; <<4 x float>*>:123 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 134, i32 3 ; <<4 x float>*>:124 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 133, i32 1 ; <<4 x float>*>:125 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 133, i32 2 ; <<4 x float>*>:126 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 133, i32 3 ; <<4 x float>*>:127 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 132, i32 1 ; <<4 x float>*>:128 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 132, i32 2 ; <<4 x float>*>:129 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 132, i32 3 ; <<4 x float>*>:130 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 131, i32 1 ; <<4 x float>*>:131 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 131, i32 2 ; <<4 x float>*>:132 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 131, i32 3 ; <<4 x float>*>:133 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 130, i32 1 ; <<4 x float>*>:134 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 130, i32 2 ; <<4 x float>*>:135 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 130, i32 3 ; <<4 x float>*>:136 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 129, i32 1 ; <<4 x float>*>:137 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 129, i32 2 ; <<4 x float>*>:138 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 129, i32 3 ; <<4 x float>*>:139 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 128, i32 1 ; <<4 x float>*>:140 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 128, i32 2 ; <<4 x float>*>:141 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 128, i32 3 ; <<4 x float>*>:142 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 127, i32 1 ; <<4 x float>*>:143 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 127, i32 2 ; <<4 x float>*>:144 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 127, i32 3 ; <<4 x float>*>:145 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 126, i32 1 ; <<4 x float>*>:146 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 126, i32 2 ; <<4 x float>*>:147 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 126, i32 3 ; <<4 x float>*>:148 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 125, i32 1 ; <<4 x float>*>:149 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 125, i32 2 ; <<4 x float>*>:150 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 125, i32 3 ; <<4 x float>*>:151 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 124, i32 1 ; <<4 x float>*>:152 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 124, i32 2 ; <<4 x float>*>:153 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 124, i32 3 ; <<4 x float>*>:154 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 123, i32 1 ; <<4 x float>*>:155 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 123, i32 2 ; <<4 x float>*>:156 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 123, i32 3 ; <<4 x float>*>:157 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 122, i32 1 ; <<4 x float>*>:158 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 122, i32 2 ; <<4 x float>*>:159 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 122, i32 3 ; <<4 x float>*>:160 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 121, i32 1 ; <<4 x float>*>:161 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 121, i32 2 ; <<4 x float>*>:162 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 121, i32 3 ; <<4 x float>*>:163 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 120, i32 1 ; <<4 x float>*>:164 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 120, i32 2 ; <<4 x float>*>:165 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 120, i32 3 ; <<4 x float>*>:166 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 119, i32 1 ; <<4 x float>*>:167 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 119, i32 2 ; <<4 x float>*>:168 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 119, i32 3 ; <<4 x float>*>:169 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 118, i32 1 ; <<4 x float>*>:170 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 118, i32 2 ; <<4 x float>*>:171 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 118, i32 3 ; <<4 x float>*>:172 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 117, i32 1 ; <<4 x float>*>:173 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 117, i32 2 ; <<4 x float>*>:174 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 117, i32 3 ; <<4 x float>*>:175 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 116, i32 1 ; <<4 x float>*>:176 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 116, i32 2 ; <<4 x float>*>:177 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 116, i32 3 ; <<4 x float>*>:178 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 115, i32 1 ; <<4 x float>*>:179 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 115, i32 2 ; <<4 x float>*>:180 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 115, i32 3 ; <<4 x float>*>:181 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 114, i32 1 ; <<4 x float>*>:182 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 114, i32 2 ; <<4 x float>*>:183 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 114, i32 3 ; <<4 x float>*>:184 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 113, i32 1 ; <<4 x float>*>:185 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 113, i32 2 ; <<4 x float>*>:186 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 113, i32 3 ; <<4 x float>*>:187 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 112, i32 1 ; <<4 x float>*>:188 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 112, i32 2 ; <<4 x float>*>:189 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 112, i32 3 ; <<4 x float>*>:190 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 111, i32 1 ; <<4 x float>*>:191 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 111, i32 2 ; <<4 x float>*>:192 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 111, i32 3 ; <<4 x float>*>:193 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 110, i32 1 ; <<4 x float>*>:194 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 110, i32 2 ; <<4 x float>*>:195 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 110, i32 3 ; <<4 x float>*>:196 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 109, i32 1 ; <<4 x float>*>:197 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 109, i32 2 ; <<4 x float>*>:198 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 109, i32 3 ; <<4 x float>*>:199 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 108, i32 1 ; <<4 x float>*>:200 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 108, i32 2 ; <<4 x float>*>:201 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 108, i32 3 ; <<4 x float>*>:202 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 107, i32 1 ; <<4 x float>*>:203 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 107, i32 2 ; <<4 x float>*>:204 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 107, i32 3 ; <<4 x float>*>:205 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 106, i32 1 ; <<4 x float>*>:206 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 106, i32 2 ; <<4 x float>*>:207 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 106, i32 3 ; <<4 x float>*>:208 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 105, i32 1 ; <<4 x float>*>:209 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 105, i32 2 ; <<4 x float>*>:210 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 105, i32 3 ; <<4 x float>*>:211 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 104, i32 1 ; <<4 x float>*>:212 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 104, i32 2 ; <<4 x float>*>:213 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 104, i32 3 ; <<4 x float>*>:214 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 103, i32 1 ; <<4 x float>*>:215 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 103, i32 2 ; <<4 x float>*>:216 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 103, i32 3 ; <<4 x float>*>:217 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 102, i32 1 ; <<4 x float>*>:218 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 102, i32 2 ; <<4 x float>*>:219 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 102, i32 3 ; <<4 x float>*>:220 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 101, i32 1 ; <<4 x float>*>:221 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 101, i32 2 ; <<4 x float>*>:222 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 101, i32 3 ; <<4 x float>*>:223 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 100, i32 1 ; <<4 x float>*>:224 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 100, i32 2 ; <<4 x float>*>:225 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 100, i32 3 ; <<4 x float>*>:226 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 99, i32 1 ; <<4 x float>*>:227 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 99, i32 2 ; <<4 x float>*>:228 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 99, i32 3 ; <<4 x float>*>:229 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 98, i32 1 ; <<4 x float>*>:230 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 98, i32 2 ; <<4 x float>*>:231 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 98, i32 3 ; <<4 x float>*>:232 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 97, i32 1 ; <<4 x float>*>:233 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 97, i32 2 ; <<4 x float>*>:234 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 97, i32 3 ; <<4 x float>*>:235 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 96, i32 1 ; <<4 x float>*>:236 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 96, i32 2 ; <<4 x float>*>:237 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 96, i32 3 ; <<4 x float>*>:238 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 95, i32 1 ; <<4 x float>*>:239 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 95, i32 2 ; <<4 x float>*>:240 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 95, i32 3 ; <<4 x float>*>:241 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 94, i32 1 ; <<4 x float>*>:242 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 94, i32 2 ; <<4 x float>*>:243 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 94, i32 3 ; <<4 x float>*>:244 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 93, i32 1 ; <<4 x float>*>:245 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 93, i32 2 ; <<4 x float>*>:246 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 93, i32 3 ; <<4 x float>*>:247 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 92, i32 1 ; <<4 x float>*>:248 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 92, i32 2 ; <<4 x float>*>:249 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 92, i32 3 ; <<4 x float>*>:250 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 91, i32 1 ; <<4 x float>*>:251 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 91, i32 2 ; <<4 x float>*>:252 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 91, i32 3 ; <<4 x float>*>:253 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 90, i32 1 ; <<4 x float>*>:254 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 90, i32 2 ; <<4 x float>*>:255 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 90, i32 3 ; <<4 x float>*>:256 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 89, i32 1 ; <<4 x float>*>:257 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 89, i32 2 ; <<4 x float>*>:258 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 89, i32 3 ; <<4 x float>*>:259 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 88, i32 1 ; <<4 x float>*>:260 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 88, i32 2 ; <<4 x float>*>:261 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 88, i32 3 ; <<4 x float>*>:262 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 87, i32 1 ; <<4 x float>*>:263 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 87, i32 2 ; <<4 x float>*>:264 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 87, i32 3 ; <<4 x float>*>:265 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 86, i32 1 ; <<4 x float>*>:266 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 86, i32 2 ; <<4 x float>*>:267 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 86, i32 3 ; <<4 x float>*>:268 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 85, i32 1 ; <<4 x float>*>:269 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 85, i32 2 ; <<4 x float>*>:270 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 85, i32 3 ; <<4 x float>*>:271 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 84, i32 1 ; <<4 x float>*>:272 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 84, i32 2 ; <<4 x float>*>:273 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 84, i32 3 ; <<4 x float>*>:274 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 83, i32 1 ; <<4 x float>*>:275 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 83, i32 2 ; <<4 x float>*>:276 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 83, i32 3 ; <<4 x float>*>:277 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 82, i32 1 ; <<4 x float>*>:278 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 82, i32 2 ; <<4 x float>*>:279 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 82, i32 3 ; <<4 x float>*>:280 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 81, i32 1 ; <<4 x float>*>:281 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 81, i32 2 ; <<4 x float>*>:282 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 81, i32 3 ; <<4 x float>*>:283 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 80, i32 1 ; <<4 x float>*>:284 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 80, i32 2 ; <<4 x float>*>:285 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 80, i32 3 ; <<4 x float>*>:286 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 79, i32 1 ; <<4 x float>*>:287 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 79, i32 2 ; <<4 x float>*>:288 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 79, i32 3 ; <<4 x float>*>:289 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 78, i32 1 ; <<4 x float>*>:290 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 78, i32 2 ; <<4 x float>*>:291 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 78, i32 3 ; <<4 x float>*>:292 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 77, i32 1 ; <<4 x float>*>:293 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 77, i32 2 ; <<4 x float>*>:294 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 77, i32 3 ; <<4 x float>*>:295 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 76, i32 1 ; <<4 x float>*>:296 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 76, i32 2 ; <<4 x float>*>:297 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 76, i32 3 ; <<4 x float>*>:298 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 75, i32 1 ; <<4 x float>*>:299 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 75, i32 2 ; <<4 x float>*>:300 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 75, i32 3 ; <<4 x float>*>:301 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 74, i32 1 ; <<4 x float>*>:302 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 74, i32 2 ; <<4 x float>*>:303 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 74, i32 3 ; <<4 x float>*>:304 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 73, i32 1 ; <<4 x float>*>:305 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 73, i32 2 ; <<4 x float>*>:306 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 73, i32 3 ; <<4 x float>*>:307 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 72, i32 1 ; <<4 x float>*>:308 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 72, i32 2 ; <<4 x float>*>:309 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 72, i32 3 ; <<4 x float>*>:310 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 71, i32 1 ; <<4 x float>*>:311 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 71, i32 2 ; <<4 x float>*>:312 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 71, i32 3 ; <<4 x float>*>:313 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 70, i32 1 ; <<4 x float>*>:314 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 70, i32 2 ; <<4 x float>*>:315 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 70, i32 3 ; <<4 x float>*>:316 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 69, i32 1 ; <<4 x float>*>:317 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 69, i32 2 ; <<4 x float>*>:318 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 69, i32 3 ; <<4 x float>*>:319 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 68, i32 1 ; <<4 x float>*>:320 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 68, i32 2 ; <<4 x float>*>:321 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 68, i32 3 ; <<4 x float>*>:322 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 67, i32 1 ; <<4 x float>*>:323 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 67, i32 2 ; <<4 x float>*>:324 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 67, i32 3 ; <<4 x float>*>:325 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 66, i32 1 ; <<4 x float>*>:326 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 66, i32 2 ; <<4 x float>*>:327 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 66, i32 3 ; <<4 x float>*>:328 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 65, i32 1 ; <<4 x float>*>:329 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 65, i32 2 ; <<4 x float>*>:330 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 65, i32 3 ; <<4 x float>*>:331 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 64, i32 1 ; <<4 x float>*>:332 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 64, i32 2 ; <<4 x float>*>:333 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 64, i32 3 ; <<4 x float>*>:334 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 63, i32 1 ; <<4 x float>*>:335 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 63, i32 2 ; <<4 x float>*>:336 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 63, i32 3 ; <<4 x float>*>:337 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 62, i32 1 ; <<4 x float>*>:338 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 62, i32 2 ; <<4 x float>*>:339 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 62, i32 3 ; <<4 x float>*>:340 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 61, i32 1 ; <<4 x float>*>:341 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 61, i32 2 ; <<4 x float>*>:342 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 61, i32 3 ; <<4 x float>*>:343 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 60, i32 1 ; <<4 x float>*>:344 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 60, i32 2 ; <<4 x float>*>:345 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 60, i32 3 ; <<4 x float>*>:346 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 59, i32 1 ; <<4 x float>*>:347 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 59, i32 2 ; <<4 x float>*>:348 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 59, i32 3 ; <<4 x float>*>:349 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 58, i32 1 ; <<4 x float>*>:350 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 58, i32 2 ; <<4 x float>*>:351 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 58, i32 3 ; <<4 x float>*>:352 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 57, i32 1 ; <<4 x float>*>:353 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 57, i32 2 ; <<4 x float>*>:354 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 57, i32 3 ; <<4 x float>*>:355 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 56, i32 1 ; <<4 x float>*>:356 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 56, i32 2 ; <<4 x float>*>:357 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 56, i32 3 ; <<4 x float>*>:358 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 55, i32 1 ; <<4 x float>*>:359 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 55, i32 2 ; <<4 x float>*>:360 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 55, i32 3 ; <<4 x float>*>:361 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 54, i32 1 ; <<4 x float>*>:362 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 54, i32 2 ; <<4 x float>*>:363 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 54, i32 3 ; <<4 x float>*>:364 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 53, i32 1 ; <<4 x float>*>:365 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 53, i32 2 ; <<4 x float>*>:366 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 53, i32 3 ; <<4 x float>*>:367 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 52, i32 1 ; <<4 x float>*>:368 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 52, i32 2 ; <<4 x float>*>:369 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 52, i32 3 ; <<4 x float>*>:370 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 51, i32 1 ; <<4 x float>*>:371 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 51, i32 2 ; <<4 x float>*>:372 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 51, i32 3 ; <<4 x float>*>:373 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 50, i32 1 ; <<4 x float>*>:374 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 50, i32 2 ; <<4 x float>*>:375 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 50, i32 3 ; <<4 x float>*>:376 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 49, i32 1 ; <<4 x float>*>:377 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 49, i32 2 ; <<4 x float>*>:378 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 49, i32 3 ; <<4 x float>*>:379 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 48, i32 1 ; <<4 x float>*>:380 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 48, i32 2 ; <<4 x float>*>:381 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 48, i32 3 ; <<4 x float>*>:382 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 47, i32 1 ; <<4 x float>*>:383 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 47, i32 2 ; <<4 x float>*>:384 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 47, i32 3 ; <<4 x float>*>:385 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 46, i32 1 ; <<4 x float>*>:386 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 46, i32 2 ; <<4 x float>*>:387 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 46, i32 3 ; <<4 x float>*>:388 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 45, i32 1 ; <<4 x float>*>:389 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 45, i32 2 ; <<4 x float>*>:390 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 45, i32 3 ; <<4 x float>*>:391 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 44, i32 1 ; <<4 x float>*>:392 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 44, i32 2 ; <<4 x float>*>:393 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 44, i32 3 ; <<4 x float>*>:394 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 43, i32 1 ; <<4 x float>*>:395 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 43, i32 2 ; <<4 x float>*>:396 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 43, i32 3 ; <<4 x float>*>:397 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 42, i32 1 ; <<4 x float>*>:398 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 42, i32 2 ; <<4 x float>*>:399 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 42, i32 3 ; <<4 x float>*>:400 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 41, i32 1 ; <<4 x float>*>:401 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 41, i32 2 ; <<4 x float>*>:402 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 41, i32 3 ; <<4 x float>*>:403 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 40, i32 1 ; <<4 x float>*>:404 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 40, i32 2 ; <<4 x float>*>:405 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 40, i32 3 ; <<4 x float>*>:406 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 39, i32 1 ; <<4 x float>*>:407 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 39, i32 2 ; <<4 x float>*>:408 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 39, i32 3 ; <<4 x float>*>:409 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 38, i32 1 ; <<4 x float>*>:410 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 38, i32 2 ; <<4 x float>*>:411 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 38, i32 3 ; <<4 x float>*>:412 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 37, i32 1 ; <<4 x float>*>:413 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 37, i32 2 ; <<4 x float>*>:414 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 37, i32 3 ; <<4 x float>*>:415 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 36, i32 1 ; <<4 x float>*>:416 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 36, i32 2 ; <<4 x float>*>:417 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 36, i32 3 ; <<4 x float>*>:418 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 35, i32 1 ; <<4 x float>*>:419 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 35, i32 2 ; <<4 x float>*>:420 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 35, i32 3 ; <<4 x float>*>:421 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 34, i32 1 ; <<4 x float>*>:422 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 34, i32 2 ; <<4 x float>*>:423 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 34, i32 3 ; <<4 x float>*>:424 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 33, i32 1 ; <<4 x float>*>:425 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 33, i32 2 ; <<4 x float>*>:426 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 33, i32 3 ; <<4 x float>*>:427 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 32, i32 1 ; <<4 x float>*>:428 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 32, i32 2 ; <<4 x float>*>:429 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 32, i32 3 ; <<4 x float>*>:430 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 31, i32 1 ; <<4 x float>*>:431 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 31, i32 2 ; <<4 x float>*>:432 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 31, i32 3 ; <<4 x float>*>:433 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 30, i32 1 ; <<4 x float>*>:434 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 30, i32 2 ; <<4 x float>*>:435 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 30, i32 3 ; <<4 x float>*>:436 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 29, i32 1 ; <<4 x float>*>:437 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 29, i32 2 ; <<4 x float>*>:438 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 29, i32 3 ; <<4 x float>*>:439 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 28, i32 1 ; <<4 x float>*>:440 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 28, i32 2 ; <<4 x float>*>:441 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 28, i32 3 ; <<4 x float>*>:442 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 27, i32 1 ; <<4 x float>*>:443 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 27, i32 2 ; <<4 x float>*>:444 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 27, i32 3 ; <<4 x float>*>:445 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 26, i32 1 ; <<4 x float>*>:446 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 26, i32 2 ; <<4 x float>*>:447 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 26, i32 3 ; <<4 x float>*>:448 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 25, i32 1 ; <<4 x float>*>:449 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 25, i32 2 ; <<4 x float>*>:450 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 25, i32 3 ; <<4 x float>*>:451 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 24, i32 1 ; <<4 x float>*>:452 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 24, i32 2 ; <<4 x float>*>:453 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 24, i32 3 ; <<4 x float>*>:454 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 23, i32 1 ; <<4 x float>*>:455 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 23, i32 2 ; <<4 x float>*>:456 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 23, i32 3 ; <<4 x float>*>:457 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 22, i32 1 ; <<4 x float>*>:458 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 22, i32 2 ; <<4 x float>*>:459 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 22, i32 3 ; <<4 x float>*>:460 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 21, i32 1 ; <<4 x float>*>:461 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 21, i32 2 ; <<4 x float>*>:462 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 21, i32 3 ; <<4 x float>*>:463 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 20, i32 1 ; <<4 x float>*>:464 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 20, i32 2 ; <<4 x float>*>:465 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 20, i32 3 ; <<4 x float>*>:466 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 19, i32 1 ; <<4 x float>*>:467 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 19, i32 2 ; <<4 x float>*>:468 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 19, i32 3 ; <<4 x float>*>:469 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 18, i32 1 ; <<4 x float>*>:470 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 18, i32 2 ; <<4 x float>*>:471 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 18, i32 3 ; <<4 x float>*>:472 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 17, i32 1 ; <<4 x float>*>:473 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 17, i32 2 ; <<4 x float>*>:474 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 17, i32 3 ; <<4 x float>*>:475 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 16, i32 1 ; <<4 x float>*>:476 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 16, i32 2 ; <<4 x float>*>:477 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 16, i32 3 ; <<4 x float>*>:478 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 15, i32 1 ; <<4 x float>*>:479 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 15, i32 2 ; <<4 x float>*>:480 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 15, i32 3 ; <<4 x float>*>:481 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 14, i32 1 ; <<4 x float>*>:482 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 14, i32 2 ; <<4 x float>*>:483 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 14, i32 3 ; <<4 x float>*>:484 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:485 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:486 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:487 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 1 ; <<4 x float>*>:488 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 2 ; <<4 x float>*>:489 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 3 ; <<4 x float>*>:490 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 180, i32 1 ; <<4 x float>*>:491 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 180, i32 2 ; <<4 x float>*>:492 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 180, i32 3 ; <<4 x float>*>:493 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 181, i32 1 ; <<4 x float>*>:494 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 181, i32 2 ; <<4 x float>*>:495 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 181, i32 3 ; <<4 x float>*>:496 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 182, i32 1 ; <<4 x float>*>:497 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 182, i32 2 ; <<4 x float>*>:498 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 182, i32 3 ; <<4 x float>*>:499 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 183, i32 1 ; <<4 x float>*>:500 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 183, i32 2 ; <<4 x float>*>:501 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 183, i32 3 ; <<4 x float>*>:502 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 184, i32 1 ; <<4 x float>*>:503 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 184, i32 2 ; <<4 x float>*>:504 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 184, i32 3 ; <<4 x float>*>:505 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 185, i32 1 ; <<4 x float>*>:506 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 185, i32 2 ; <<4 x float>*>:507 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 185, i32 3 ; <<4 x float>*>:508 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 190, i32 1 ; <<4 x float>*>:509 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 190, i32 2 ; <<4 x float>*>:510 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 190, i32 3 ; <<4 x float>*>:511 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 9, i32 1 ; <<4 x float>*>:512 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 9, i32 2 ; <<4 x float>*>:513 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 9, i32 3 ; <<4 x float>*>:514 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 10, i32 1 ; <<4 x float>*>:515 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 10, i32 2 ; <<4 x float>*>:516 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 10, i32 3 ; <<4 x float>*>:517 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 11, i32 1 ; <<4 x float>*>:518 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 11, i32 2 ; <<4 x float>*>:519 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 11, i32 3 ; <<4 x float>*>:520 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 12, i32 1 ; <<4 x float>*>:521 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 12, i32 2 ; <<4 x float>*>:522 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 12, i32 3 ; <<4 x float>*>:523 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 13, i32 1 ; <<4 x float>*>:524 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 13, i32 2 ; <<4 x float>*>:525 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 13, i32 3 ; <<4 x float>*>:526 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 1 ; <<4 x float>*>:527 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2 ; <<4 x float>*>:528 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3 ; <<4 x float>*>:529 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1 ; <<4 x float>*>:530 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:531 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:532 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 1 ; <<4 x float>*>:533 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 2 ; <<4 x float>*>:534 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 3 ; <<4 x float>*>:535 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 6, i32 1 ; <<4 x float>*>:536 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 6, i32 2 ; <<4 x float>*>:537 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 6, i32 3 ; <<4 x float>*>:538 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 7, i32 1 ; <<4 x float>*>:539 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 7, i32 2 ; <<4 x float>*>:540 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 7, i32 3 ; <<4 x float>*>:541 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 1 ; <<4 x float>*>:542 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2 ; <<4 x float>*>:543 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3 ; <<4 x float>*>:544 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 187, i32 1 ; <<4 x float>*>:545 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 187, i32 2 ; <<4 x float>*>:546 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 187, i32 3 ; <<4 x float>*>:547 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 1 ; <<4 x float>*>:548 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 2 ; <<4 x float>*>:549 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 3 ; <<4 x float>*>:550 [#uses=0]
- load <4 x float>* null ; <<4 x float>>:551 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 1 ; <<4 x float>*>:552 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 2 ; <<4 x float>*>:553 [#uses=1]
- load <4 x float>* %553 ; <<4 x float>>:554 [#uses=1]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 3 ; <<4 x float>*>:555 [#uses=0]
+ %.sub7896 = getelementptr [4 x <4 x i32>], [4 x <4 x i32>]* null, i32 0, i32 0 ; <<4 x i32>*> [#uses=24]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 175, i32 3 ; <<4 x float>*>:2 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 174, i32 2 ; <<4 x float>*>:3 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 174, i32 3 ; <<4 x float>*>:4 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 173, i32 1 ; <<4 x float>*>:5 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 173, i32 2 ; <<4 x float>*>:6 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 173, i32 3 ; <<4 x float>*>:7 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 172, i32 1 ; <<4 x float>*>:8 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 172, i32 2 ; <<4 x float>*>:9 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 172, i32 3 ; <<4 x float>*>:10 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 171, i32 1 ; <<4 x float>*>:11 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 171, i32 2 ; <<4 x float>*>:12 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 171, i32 3 ; <<4 x float>*>:13 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 170, i32 1 ; <<4 x float>*>:14 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 170, i32 2 ; <<4 x float>*>:15 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 170, i32 3 ; <<4 x float>*>:16 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 169, i32 1 ; <<4 x float>*>:17 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 169, i32 2 ; <<4 x float>*>:18 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 169, i32 3 ; <<4 x float>*>:19 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 168, i32 1 ; <<4 x float>*>:20 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 168, i32 2 ; <<4 x float>*>:21 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 168, i32 3 ; <<4 x float>*>:22 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 167, i32 1 ; <<4 x float>*>:23 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 167, i32 2 ; <<4 x float>*>:24 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 167, i32 3 ; <<4 x float>*>:25 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 166, i32 1 ; <<4 x float>*>:26 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 166, i32 2 ; <<4 x float>*>:27 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 166, i32 3 ; <<4 x float>*>:28 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 165, i32 1 ; <<4 x float>*>:29 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 165, i32 2 ; <<4 x float>*>:30 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 165, i32 3 ; <<4 x float>*>:31 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 164, i32 1 ; <<4 x float>*>:32 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 164, i32 2 ; <<4 x float>*>:33 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 164, i32 3 ; <<4 x float>*>:34 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 163, i32 1 ; <<4 x float>*>:35 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 163, i32 2 ; <<4 x float>*>:36 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 163, i32 3 ; <<4 x float>*>:37 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 162, i32 1 ; <<4 x float>*>:38 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 162, i32 2 ; <<4 x float>*>:39 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 162, i32 3 ; <<4 x float>*>:40 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 161, i32 1 ; <<4 x float>*>:41 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 161, i32 2 ; <<4 x float>*>:42 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 161, i32 3 ; <<4 x float>*>:43 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 160, i32 1 ; <<4 x float>*>:44 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 160, i32 2 ; <<4 x float>*>:45 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 160, i32 3 ; <<4 x float>*>:46 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 159, i32 1 ; <<4 x float>*>:47 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 159, i32 2 ; <<4 x float>*>:48 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 159, i32 3 ; <<4 x float>*>:49 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 158, i32 1 ; <<4 x float>*>:50 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 158, i32 2 ; <<4 x float>*>:51 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 158, i32 3 ; <<4 x float>*>:52 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 157, i32 1 ; <<4 x float>*>:53 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 157, i32 2 ; <<4 x float>*>:54 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 157, i32 3 ; <<4 x float>*>:55 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 156, i32 1 ; <<4 x float>*>:56 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 156, i32 2 ; <<4 x float>*>:57 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 156, i32 3 ; <<4 x float>*>:58 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 155, i32 1 ; <<4 x float>*>:59 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 155, i32 2 ; <<4 x float>*>:60 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 155, i32 3 ; <<4 x float>*>:61 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 154, i32 1 ; <<4 x float>*>:62 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 154, i32 2 ; <<4 x float>*>:63 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 154, i32 3 ; <<4 x float>*>:64 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 153, i32 1 ; <<4 x float>*>:65 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 153, i32 2 ; <<4 x float>*>:66 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 153, i32 3 ; <<4 x float>*>:67 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 152, i32 1 ; <<4 x float>*>:68 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 152, i32 2 ; <<4 x float>*>:69 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 152, i32 3 ; <<4 x float>*>:70 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 151, i32 1 ; <<4 x float>*>:71 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 151, i32 2 ; <<4 x float>*>:72 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 151, i32 3 ; <<4 x float>*>:73 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 150, i32 1 ; <<4 x float>*>:74 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 150, i32 2 ; <<4 x float>*>:75 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 150, i32 3 ; <<4 x float>*>:76 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 149, i32 1 ; <<4 x float>*>:77 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 149, i32 2 ; <<4 x float>*>:78 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 149, i32 3 ; <<4 x float>*>:79 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 148, i32 1 ; <<4 x float>*>:80 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 148, i32 2 ; <<4 x float>*>:81 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 148, i32 3 ; <<4 x float>*>:82 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 147, i32 1 ; <<4 x float>*>:83 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 147, i32 2 ; <<4 x float>*>:84 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 147, i32 3 ; <<4 x float>*>:85 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 146, i32 1 ; <<4 x float>*>:86 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 146, i32 2 ; <<4 x float>*>:87 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 146, i32 3 ; <<4 x float>*>:88 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 145, i32 1 ; <<4 x float>*>:89 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 145, i32 2 ; <<4 x float>*>:90 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 145, i32 3 ; <<4 x float>*>:91 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 144, i32 1 ; <<4 x float>*>:92 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 144, i32 2 ; <<4 x float>*>:93 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 144, i32 3 ; <<4 x float>*>:94 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 143, i32 1 ; <<4 x float>*>:95 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 143, i32 2 ; <<4 x float>*>:96 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 143, i32 3 ; <<4 x float>*>:97 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 142, i32 1 ; <<4 x float>*>:98 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 142, i32 2 ; <<4 x float>*>:99 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 142, i32 3 ; <<4 x float>*>:100 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 141, i32 1 ; <<4 x float>*>:101 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 141, i32 2 ; <<4 x float>*>:102 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 141, i32 3 ; <<4 x float>*>:103 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 140, i32 1 ; <<4 x float>*>:104 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 140, i32 2 ; <<4 x float>*>:105 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 140, i32 3 ; <<4 x float>*>:106 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 139, i32 1 ; <<4 x float>*>:107 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 139, i32 2 ; <<4 x float>*>:108 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 139, i32 3 ; <<4 x float>*>:109 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 138, i32 1 ; <<4 x float>*>:110 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 138, i32 2 ; <<4 x float>*>:111 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 138, i32 3 ; <<4 x float>*>:112 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 137, i32 1 ; <<4 x float>*>:113 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 137, i32 2 ; <<4 x float>*>:114 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 137, i32 3 ; <<4 x float>*>:115 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 136, i32 1 ; <<4 x float>*>:116 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 136, i32 2 ; <<4 x float>*>:117 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 136, i32 3 ; <<4 x float>*>:118 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 135, i32 1 ; <<4 x float>*>:119 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 135, i32 2 ; <<4 x float>*>:120 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 135, i32 3 ; <<4 x float>*>:121 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 134, i32 1 ; <<4 x float>*>:122 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 134, i32 2 ; <<4 x float>*>:123 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 134, i32 3 ; <<4 x float>*>:124 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 133, i32 1 ; <<4 x float>*>:125 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 133, i32 2 ; <<4 x float>*>:126 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 133, i32 3 ; <<4 x float>*>:127 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 132, i32 1 ; <<4 x float>*>:128 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 132, i32 2 ; <<4 x float>*>:129 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 132, i32 3 ; <<4 x float>*>:130 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 131, i32 1 ; <<4 x float>*>:131 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 131, i32 2 ; <<4 x float>*>:132 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 131, i32 3 ; <<4 x float>*>:133 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 130, i32 1 ; <<4 x float>*>:134 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 130, i32 2 ; <<4 x float>*>:135 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 130, i32 3 ; <<4 x float>*>:136 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 129, i32 1 ; <<4 x float>*>:137 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 129, i32 2 ; <<4 x float>*>:138 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 129, i32 3 ; <<4 x float>*>:139 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 128, i32 1 ; <<4 x float>*>:140 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 128, i32 2 ; <<4 x float>*>:141 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 128, i32 3 ; <<4 x float>*>:142 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 127, i32 1 ; <<4 x float>*>:143 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 127, i32 2 ; <<4 x float>*>:144 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 127, i32 3 ; <<4 x float>*>:145 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 126, i32 1 ; <<4 x float>*>:146 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 126, i32 2 ; <<4 x float>*>:147 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 126, i32 3 ; <<4 x float>*>:148 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 125, i32 1 ; <<4 x float>*>:149 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 125, i32 2 ; <<4 x float>*>:150 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 125, i32 3 ; <<4 x float>*>:151 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 124, i32 1 ; <<4 x float>*>:152 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 124, i32 2 ; <<4 x float>*>:153 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 124, i32 3 ; <<4 x float>*>:154 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 123, i32 1 ; <<4 x float>*>:155 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 123, i32 2 ; <<4 x float>*>:156 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 123, i32 3 ; <<4 x float>*>:157 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 122, i32 1 ; <<4 x float>*>:158 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 122, i32 2 ; <<4 x float>*>:159 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 122, i32 3 ; <<4 x float>*>:160 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 121, i32 1 ; <<4 x float>*>:161 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 121, i32 2 ; <<4 x float>*>:162 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 121, i32 3 ; <<4 x float>*>:163 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 120, i32 1 ; <<4 x float>*>:164 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 120, i32 2 ; <<4 x float>*>:165 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 120, i32 3 ; <<4 x float>*>:166 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 119, i32 1 ; <<4 x float>*>:167 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 119, i32 2 ; <<4 x float>*>:168 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 119, i32 3 ; <<4 x float>*>:169 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 118, i32 1 ; <<4 x float>*>:170 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 118, i32 2 ; <<4 x float>*>:171 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 118, i32 3 ; <<4 x float>*>:172 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 117, i32 1 ; <<4 x float>*>:173 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 117, i32 2 ; <<4 x float>*>:174 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 117, i32 3 ; <<4 x float>*>:175 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 116, i32 1 ; <<4 x float>*>:176 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 116, i32 2 ; <<4 x float>*>:177 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 116, i32 3 ; <<4 x float>*>:178 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 115, i32 1 ; <<4 x float>*>:179 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 115, i32 2 ; <<4 x float>*>:180 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 115, i32 3 ; <<4 x float>*>:181 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 114, i32 1 ; <<4 x float>*>:182 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 114, i32 2 ; <<4 x float>*>:183 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 114, i32 3 ; <<4 x float>*>:184 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 113, i32 1 ; <<4 x float>*>:185 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 113, i32 2 ; <<4 x float>*>:186 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 113, i32 3 ; <<4 x float>*>:187 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 112, i32 1 ; <<4 x float>*>:188 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 112, i32 2 ; <<4 x float>*>:189 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 112, i32 3 ; <<4 x float>*>:190 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 111, i32 1 ; <<4 x float>*>:191 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 111, i32 2 ; <<4 x float>*>:192 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 111, i32 3 ; <<4 x float>*>:193 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 110, i32 1 ; <<4 x float>*>:194 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 110, i32 2 ; <<4 x float>*>:195 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 110, i32 3 ; <<4 x float>*>:196 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 109, i32 1 ; <<4 x float>*>:197 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 109, i32 2 ; <<4 x float>*>:198 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 109, i32 3 ; <<4 x float>*>:199 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 108, i32 1 ; <<4 x float>*>:200 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 108, i32 2 ; <<4 x float>*>:201 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 108, i32 3 ; <<4 x float>*>:202 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 107, i32 1 ; <<4 x float>*>:203 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 107, i32 2 ; <<4 x float>*>:204 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 107, i32 3 ; <<4 x float>*>:205 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 106, i32 1 ; <<4 x float>*>:206 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 106, i32 2 ; <<4 x float>*>:207 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 106, i32 3 ; <<4 x float>*>:208 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 105, i32 1 ; <<4 x float>*>:209 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 105, i32 2 ; <<4 x float>*>:210 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 105, i32 3 ; <<4 x float>*>:211 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 104, i32 1 ; <<4 x float>*>:212 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 104, i32 2 ; <<4 x float>*>:213 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 104, i32 3 ; <<4 x float>*>:214 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 103, i32 1 ; <<4 x float>*>:215 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 103, i32 2 ; <<4 x float>*>:216 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 103, i32 3 ; <<4 x float>*>:217 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 102, i32 1 ; <<4 x float>*>:218 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 102, i32 2 ; <<4 x float>*>:219 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 102, i32 3 ; <<4 x float>*>:220 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 101, i32 1 ; <<4 x float>*>:221 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 101, i32 2 ; <<4 x float>*>:222 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 101, i32 3 ; <<4 x float>*>:223 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 100, i32 1 ; <<4 x float>*>:224 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 100, i32 2 ; <<4 x float>*>:225 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 100, i32 3 ; <<4 x float>*>:226 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 99, i32 1 ; <<4 x float>*>:227 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 99, i32 2 ; <<4 x float>*>:228 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 99, i32 3 ; <<4 x float>*>:229 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 98, i32 1 ; <<4 x float>*>:230 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 98, i32 2 ; <<4 x float>*>:231 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 98, i32 3 ; <<4 x float>*>:232 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 97, i32 1 ; <<4 x float>*>:233 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 97, i32 2 ; <<4 x float>*>:234 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 97, i32 3 ; <<4 x float>*>:235 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 96, i32 1 ; <<4 x float>*>:236 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 96, i32 2 ; <<4 x float>*>:237 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 96, i32 3 ; <<4 x float>*>:238 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 95, i32 1 ; <<4 x float>*>:239 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 95, i32 2 ; <<4 x float>*>:240 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 95, i32 3 ; <<4 x float>*>:241 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 94, i32 1 ; <<4 x float>*>:242 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 94, i32 2 ; <<4 x float>*>:243 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 94, i32 3 ; <<4 x float>*>:244 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 93, i32 1 ; <<4 x float>*>:245 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 93, i32 2 ; <<4 x float>*>:246 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 93, i32 3 ; <<4 x float>*>:247 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 92, i32 1 ; <<4 x float>*>:248 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 92, i32 2 ; <<4 x float>*>:249 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 92, i32 3 ; <<4 x float>*>:250 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 91, i32 1 ; <<4 x float>*>:251 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 91, i32 2 ; <<4 x float>*>:252 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 91, i32 3 ; <<4 x float>*>:253 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 90, i32 1 ; <<4 x float>*>:254 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 90, i32 2 ; <<4 x float>*>:255 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 90, i32 3 ; <<4 x float>*>:256 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 89, i32 1 ; <<4 x float>*>:257 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 89, i32 2 ; <<4 x float>*>:258 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 89, i32 3 ; <<4 x float>*>:259 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 88, i32 1 ; <<4 x float>*>:260 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 88, i32 2 ; <<4 x float>*>:261 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 88, i32 3 ; <<4 x float>*>:262 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 87, i32 1 ; <<4 x float>*>:263 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 87, i32 2 ; <<4 x float>*>:264 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 87, i32 3 ; <<4 x float>*>:265 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 86, i32 1 ; <<4 x float>*>:266 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 86, i32 2 ; <<4 x float>*>:267 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 86, i32 3 ; <<4 x float>*>:268 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 85, i32 1 ; <<4 x float>*>:269 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 85, i32 2 ; <<4 x float>*>:270 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 85, i32 3 ; <<4 x float>*>:271 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 84, i32 1 ; <<4 x float>*>:272 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 84, i32 2 ; <<4 x float>*>:273 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 84, i32 3 ; <<4 x float>*>:274 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 83, i32 1 ; <<4 x float>*>:275 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 83, i32 2 ; <<4 x float>*>:276 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 83, i32 3 ; <<4 x float>*>:277 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 82, i32 1 ; <<4 x float>*>:278 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 82, i32 2 ; <<4 x float>*>:279 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 82, i32 3 ; <<4 x float>*>:280 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 81, i32 1 ; <<4 x float>*>:281 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 81, i32 2 ; <<4 x float>*>:282 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 81, i32 3 ; <<4 x float>*>:283 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 80, i32 1 ; <<4 x float>*>:284 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 80, i32 2 ; <<4 x float>*>:285 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 80, i32 3 ; <<4 x float>*>:286 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 79, i32 1 ; <<4 x float>*>:287 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 79, i32 2 ; <<4 x float>*>:288 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 79, i32 3 ; <<4 x float>*>:289 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 78, i32 1 ; <<4 x float>*>:290 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 78, i32 2 ; <<4 x float>*>:291 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 78, i32 3 ; <<4 x float>*>:292 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 77, i32 1 ; <<4 x float>*>:293 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 77, i32 2 ; <<4 x float>*>:294 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 77, i32 3 ; <<4 x float>*>:295 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 76, i32 1 ; <<4 x float>*>:296 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 76, i32 2 ; <<4 x float>*>:297 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 76, i32 3 ; <<4 x float>*>:298 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 75, i32 1 ; <<4 x float>*>:299 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 75, i32 2 ; <<4 x float>*>:300 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 75, i32 3 ; <<4 x float>*>:301 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 74, i32 1 ; <<4 x float>*>:302 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 74, i32 2 ; <<4 x float>*>:303 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 74, i32 3 ; <<4 x float>*>:304 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 73, i32 1 ; <<4 x float>*>:305 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 73, i32 2 ; <<4 x float>*>:306 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 73, i32 3 ; <<4 x float>*>:307 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 72, i32 1 ; <<4 x float>*>:308 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 72, i32 2 ; <<4 x float>*>:309 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 72, i32 3 ; <<4 x float>*>:310 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 71, i32 1 ; <<4 x float>*>:311 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 71, i32 2 ; <<4 x float>*>:312 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 71, i32 3 ; <<4 x float>*>:313 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 70, i32 1 ; <<4 x float>*>:314 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 70, i32 2 ; <<4 x float>*>:315 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 70, i32 3 ; <<4 x float>*>:316 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 69, i32 1 ; <<4 x float>*>:317 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 69, i32 2 ; <<4 x float>*>:318 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 69, i32 3 ; <<4 x float>*>:319 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 68, i32 1 ; <<4 x float>*>:320 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 68, i32 2 ; <<4 x float>*>:321 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 68, i32 3 ; <<4 x float>*>:322 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 67, i32 1 ; <<4 x float>*>:323 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 67, i32 2 ; <<4 x float>*>:324 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 67, i32 3 ; <<4 x float>*>:325 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 66, i32 1 ; <<4 x float>*>:326 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 66, i32 2 ; <<4 x float>*>:327 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 66, i32 3 ; <<4 x float>*>:328 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 65, i32 1 ; <<4 x float>*>:329 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 65, i32 2 ; <<4 x float>*>:330 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 65, i32 3 ; <<4 x float>*>:331 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 64, i32 1 ; <<4 x float>*>:332 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 64, i32 2 ; <<4 x float>*>:333 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 64, i32 3 ; <<4 x float>*>:334 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 63, i32 1 ; <<4 x float>*>:335 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 63, i32 2 ; <<4 x float>*>:336 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 63, i32 3 ; <<4 x float>*>:337 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 62, i32 1 ; <<4 x float>*>:338 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 62, i32 2 ; <<4 x float>*>:339 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 62, i32 3 ; <<4 x float>*>:340 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 61, i32 1 ; <<4 x float>*>:341 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 61, i32 2 ; <<4 x float>*>:342 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 61, i32 3 ; <<4 x float>*>:343 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 60, i32 1 ; <<4 x float>*>:344 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 60, i32 2 ; <<4 x float>*>:345 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 60, i32 3 ; <<4 x float>*>:346 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 59, i32 1 ; <<4 x float>*>:347 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 59, i32 2 ; <<4 x float>*>:348 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 59, i32 3 ; <<4 x float>*>:349 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 58, i32 1 ; <<4 x float>*>:350 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 58, i32 2 ; <<4 x float>*>:351 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 58, i32 3 ; <<4 x float>*>:352 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 57, i32 1 ; <<4 x float>*>:353 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 57, i32 2 ; <<4 x float>*>:354 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 57, i32 3 ; <<4 x float>*>:355 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 56, i32 1 ; <<4 x float>*>:356 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 56, i32 2 ; <<4 x float>*>:357 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 56, i32 3 ; <<4 x float>*>:358 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 55, i32 1 ; <<4 x float>*>:359 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 55, i32 2 ; <<4 x float>*>:360 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 55, i32 3 ; <<4 x float>*>:361 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 54, i32 1 ; <<4 x float>*>:362 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 54, i32 2 ; <<4 x float>*>:363 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 54, i32 3 ; <<4 x float>*>:364 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 53, i32 1 ; <<4 x float>*>:365 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 53, i32 2 ; <<4 x float>*>:366 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 53, i32 3 ; <<4 x float>*>:367 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 52, i32 1 ; <<4 x float>*>:368 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 52, i32 2 ; <<4 x float>*>:369 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 52, i32 3 ; <<4 x float>*>:370 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 51, i32 1 ; <<4 x float>*>:371 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 51, i32 2 ; <<4 x float>*>:372 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 51, i32 3 ; <<4 x float>*>:373 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 50, i32 1 ; <<4 x float>*>:374 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 50, i32 2 ; <<4 x float>*>:375 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 50, i32 3 ; <<4 x float>*>:376 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 49, i32 1 ; <<4 x float>*>:377 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 49, i32 2 ; <<4 x float>*>:378 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 49, i32 3 ; <<4 x float>*>:379 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 48, i32 1 ; <<4 x float>*>:380 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 48, i32 2 ; <<4 x float>*>:381 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 48, i32 3 ; <<4 x float>*>:382 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 47, i32 1 ; <<4 x float>*>:383 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 47, i32 2 ; <<4 x float>*>:384 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 47, i32 3 ; <<4 x float>*>:385 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 46, i32 1 ; <<4 x float>*>:386 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 46, i32 2 ; <<4 x float>*>:387 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 46, i32 3 ; <<4 x float>*>:388 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 45, i32 1 ; <<4 x float>*>:389 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 45, i32 2 ; <<4 x float>*>:390 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 45, i32 3 ; <<4 x float>*>:391 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 44, i32 1 ; <<4 x float>*>:392 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 44, i32 2 ; <<4 x float>*>:393 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 44, i32 3 ; <<4 x float>*>:394 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 43, i32 1 ; <<4 x float>*>:395 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 43, i32 2 ; <<4 x float>*>:396 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 43, i32 3 ; <<4 x float>*>:397 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 42, i32 1 ; <<4 x float>*>:398 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 42, i32 2 ; <<4 x float>*>:399 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 42, i32 3 ; <<4 x float>*>:400 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 41, i32 1 ; <<4 x float>*>:401 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 41, i32 2 ; <<4 x float>*>:402 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 41, i32 3 ; <<4 x float>*>:403 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 40, i32 1 ; <<4 x float>*>:404 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 40, i32 2 ; <<4 x float>*>:405 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 40, i32 3 ; <<4 x float>*>:406 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 39, i32 1 ; <<4 x float>*>:407 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 39, i32 2 ; <<4 x float>*>:408 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 39, i32 3 ; <<4 x float>*>:409 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 38, i32 1 ; <<4 x float>*>:410 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 38, i32 2 ; <<4 x float>*>:411 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 38, i32 3 ; <<4 x float>*>:412 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 37, i32 1 ; <<4 x float>*>:413 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 37, i32 2 ; <<4 x float>*>:414 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 37, i32 3 ; <<4 x float>*>:415 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 36, i32 1 ; <<4 x float>*>:416 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 36, i32 2 ; <<4 x float>*>:417 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 36, i32 3 ; <<4 x float>*>:418 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 35, i32 1 ; <<4 x float>*>:419 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 35, i32 2 ; <<4 x float>*>:420 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 35, i32 3 ; <<4 x float>*>:421 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 34, i32 1 ; <<4 x float>*>:422 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 34, i32 2 ; <<4 x float>*>:423 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 34, i32 3 ; <<4 x float>*>:424 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 33, i32 1 ; <<4 x float>*>:425 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 33, i32 2 ; <<4 x float>*>:426 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 33, i32 3 ; <<4 x float>*>:427 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 32, i32 1 ; <<4 x float>*>:428 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 32, i32 2 ; <<4 x float>*>:429 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 32, i32 3 ; <<4 x float>*>:430 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 31, i32 1 ; <<4 x float>*>:431 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 31, i32 2 ; <<4 x float>*>:432 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 31, i32 3 ; <<4 x float>*>:433 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 30, i32 1 ; <<4 x float>*>:434 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 30, i32 2 ; <<4 x float>*>:435 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 30, i32 3 ; <<4 x float>*>:436 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 29, i32 1 ; <<4 x float>*>:437 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 29, i32 2 ; <<4 x float>*>:438 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 29, i32 3 ; <<4 x float>*>:439 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 28, i32 1 ; <<4 x float>*>:440 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 28, i32 2 ; <<4 x float>*>:441 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 28, i32 3 ; <<4 x float>*>:442 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 27, i32 1 ; <<4 x float>*>:443 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 27, i32 2 ; <<4 x float>*>:444 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 27, i32 3 ; <<4 x float>*>:445 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 26, i32 1 ; <<4 x float>*>:446 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 26, i32 2 ; <<4 x float>*>:447 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 26, i32 3 ; <<4 x float>*>:448 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 25, i32 1 ; <<4 x float>*>:449 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 25, i32 2 ; <<4 x float>*>:450 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 25, i32 3 ; <<4 x float>*>:451 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 24, i32 1 ; <<4 x float>*>:452 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 24, i32 2 ; <<4 x float>*>:453 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 24, i32 3 ; <<4 x float>*>:454 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 23, i32 1 ; <<4 x float>*>:455 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 23, i32 2 ; <<4 x float>*>:456 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 23, i32 3 ; <<4 x float>*>:457 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 22, i32 1 ; <<4 x float>*>:458 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 22, i32 2 ; <<4 x float>*>:459 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 22, i32 3 ; <<4 x float>*>:460 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 21, i32 1 ; <<4 x float>*>:461 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 21, i32 2 ; <<4 x float>*>:462 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 21, i32 3 ; <<4 x float>*>:463 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 20, i32 1 ; <<4 x float>*>:464 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 20, i32 2 ; <<4 x float>*>:465 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 20, i32 3 ; <<4 x float>*>:466 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 19, i32 1 ; <<4 x float>*>:467 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 19, i32 2 ; <<4 x float>*>:468 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 19, i32 3 ; <<4 x float>*>:469 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 18, i32 1 ; <<4 x float>*>:470 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 18, i32 2 ; <<4 x float>*>:471 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 18, i32 3 ; <<4 x float>*>:472 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 17, i32 1 ; <<4 x float>*>:473 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 17, i32 2 ; <<4 x float>*>:474 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 17, i32 3 ; <<4 x float>*>:475 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 16, i32 1 ; <<4 x float>*>:476 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 16, i32 2 ; <<4 x float>*>:477 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 16, i32 3 ; <<4 x float>*>:478 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 15, i32 1 ; <<4 x float>*>:479 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 15, i32 2 ; <<4 x float>*>:480 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 15, i32 3 ; <<4 x float>*>:481 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 14, i32 1 ; <<4 x float>*>:482 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 14, i32 2 ; <<4 x float>*>:483 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 14, i32 3 ; <<4 x float>*>:484 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:485 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:486 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:487 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 1 ; <<4 x float>*>:488 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 2 ; <<4 x float>*>:489 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 3 ; <<4 x float>*>:490 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 180, i32 1 ; <<4 x float>*>:491 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 180, i32 2 ; <<4 x float>*>:492 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 180, i32 3 ; <<4 x float>*>:493 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 181, i32 1 ; <<4 x float>*>:494 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 181, i32 2 ; <<4 x float>*>:495 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 181, i32 3 ; <<4 x float>*>:496 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 182, i32 1 ; <<4 x float>*>:497 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 182, i32 2 ; <<4 x float>*>:498 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 182, i32 3 ; <<4 x float>*>:499 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 183, i32 1 ; <<4 x float>*>:500 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 183, i32 2 ; <<4 x float>*>:501 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 183, i32 3 ; <<4 x float>*>:502 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 184, i32 1 ; <<4 x float>*>:503 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 184, i32 2 ; <<4 x float>*>:504 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 184, i32 3 ; <<4 x float>*>:505 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 185, i32 1 ; <<4 x float>*>:506 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 185, i32 2 ; <<4 x float>*>:507 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 185, i32 3 ; <<4 x float>*>:508 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 190, i32 1 ; <<4 x float>*>:509 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 190, i32 2 ; <<4 x float>*>:510 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 190, i32 3 ; <<4 x float>*>:511 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 9, i32 1 ; <<4 x float>*>:512 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 9, i32 2 ; <<4 x float>*>:513 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 9, i32 3 ; <<4 x float>*>:514 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 10, i32 1 ; <<4 x float>*>:515 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 10, i32 2 ; <<4 x float>*>:516 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 10, i32 3 ; <<4 x float>*>:517 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 11, i32 1 ; <<4 x float>*>:518 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 11, i32 2 ; <<4 x float>*>:519 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 11, i32 3 ; <<4 x float>*>:520 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 12, i32 1 ; <<4 x float>*>:521 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 12, i32 2 ; <<4 x float>*>:522 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 12, i32 3 ; <<4 x float>*>:523 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 13, i32 1 ; <<4 x float>*>:524 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 13, i32 2 ; <<4 x float>*>:525 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 13, i32 3 ; <<4 x float>*>:526 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 1 ; <<4 x float>*>:527 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2 ; <<4 x float>*>:528 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3 ; <<4 x float>*>:529 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1 ; <<4 x float>*>:530 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:531 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:532 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 1 ; <<4 x float>*>:533 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 2 ; <<4 x float>*>:534 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 3 ; <<4 x float>*>:535 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 6, i32 1 ; <<4 x float>*>:536 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 6, i32 2 ; <<4 x float>*>:537 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 6, i32 3 ; <<4 x float>*>:538 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 7, i32 1 ; <<4 x float>*>:539 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 7, i32 2 ; <<4 x float>*>:540 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 7, i32 3 ; <<4 x float>*>:541 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 1 ; <<4 x float>*>:542 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2 ; <<4 x float>*>:543 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3 ; <<4 x float>*>:544 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 187, i32 1 ; <<4 x float>*>:545 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 187, i32 2 ; <<4 x float>*>:546 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 187, i32 3 ; <<4 x float>*>:547 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 1 ; <<4 x float>*>:548 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 2 ; <<4 x float>*>:549 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 3 ; <<4 x float>*>:550 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:551 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 1 ; <<4 x float>*>:552 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 2 ; <<4 x float>*>:553 [#uses=1]
+ load <4 x float>, <4 x float>* %553 ; <<4 x float>>:554 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 3 ; <<4 x float>*>:555 [#uses=0]
shufflevector <4 x float> %554, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:556 [#uses=1]
call <4 x i32> @llvm.ppc.altivec.vcmpgtfp( <4 x float> zeroinitializer, <4 x float> %556 ) ; <<4 x i32>>:557 [#uses=0]
bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:558 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0 ; <<4 x float>*>:559 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 2 ; <<4 x float>*>:560 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0 ; <<4 x float>*>:559 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 2 ; <<4 x float>*>:560 [#uses=1]
store <4 x float> zeroinitializer, <4 x float>* %560
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3 ; <<4 x float>*>:561 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:562 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 2 ; <<4 x float>*>:563 [#uses=0]
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:564 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3 ; <<4 x float>*>:561 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:562 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 2 ; <<4 x float>*>:563 [#uses=0]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:564 [#uses=0]
shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:565 [#uses=1]
store <4 x float> %565, <4 x float>* null
icmp eq i32 0, 0 ; <i1>:566 [#uses=1]
br i1 %566, label %.critedge, label %xPIF.exit
.critedge: ; preds = %xOperationInitMasks.exit
- getelementptr [4 x <4 x i32>]* null, i32 0, i32 3 ; <<4 x i32>*>:567 [#uses=0]
+ getelementptr [4 x <4 x i32>], [4 x <4 x i32>]* null, i32 0, i32 3 ; <<4 x i32>*>:567 [#uses=0]
and <4 x i32> zeroinitializer, zeroinitializer ; <<4 x i32>>:568 [#uses=0]
or <4 x i32> zeroinitializer, zeroinitializer ; <<4 x i32>>:569 [#uses=0]
icmp eq i32 0, 0 ; <i1>:570 [#uses=1]
@@ -583,24 +583,24 @@ xOperationInitMasks.exit:
br label %xPIF.exit
xPIF.exit: ; preds = %.critedge7898, %xOperationInitMasks.exit
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 1 ; <<4 x float>*>:571 [#uses=0]
- load <4 x float>* null ; <<4 x float>>:572 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 1 ; <<4 x float>*>:571 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:572 [#uses=0]
shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:573 [#uses=0]
icmp eq i32 0, 0 ; <i1>:574 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 1 ; <<4 x float>*>:575 [#uses=0]
- load <4 x float>* %0 ; <<4 x float>>:576 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 1 ; <<4 x float>*>:575 [#uses=0]
+ load <4 x float>, <4 x float>* %0 ; <<4 x float>>:576 [#uses=0]
call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:577 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 0 ; <<4 x float>*>:578 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 1 ; <<4 x float>*>:579 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 2 ; <<4 x float>*>:580 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 3 ; <<4 x float>*>:581 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:582 [#uses=0]
- load <4 x float>* null ; <<4 x float>>:583 [#uses=1]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:584 [#uses=1]
- load <4 x float>* %584 ; <<4 x float>>:585 [#uses=1]
- load <4 x float>* null ; <<4 x float>>:586 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:587 [#uses=1]
- load <4 x float>* %587 ; <<4 x float>>:588 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 0 ; <<4 x float>*>:578 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 1 ; <<4 x float>*>:579 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 2 ; <<4 x float>*>:580 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 3 ; <<4 x float>*>:581 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:582 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:583 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:584 [#uses=1]
+ load <4 x float>, <4 x float>* %584 ; <<4 x float>>:585 [#uses=1]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:586 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:587 [#uses=1]
+ load <4 x float>, <4 x float>* %587 ; <<4 x float>>:588 [#uses=1]
shufflevector <4 x float> %583, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x float>>:589 [#uses=1]
shufflevector <4 x float> %585, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x float>>:590 [#uses=1]
shufflevector <4 x float> %588, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x float>>:591 [#uses=1]
@@ -608,32 +608,32 @@ xPIF.exit: ; preds = %.critedge7898, %xOperationInitMasks.exit
fmul <4 x float> zeroinitializer, %590 ; <<4 x float>>:593 [#uses=0]
fmul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:594 [#uses=1]
fmul <4 x float> zeroinitializer, %591 ; <<4 x float>>:595 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0 ; <<4 x float>*>:596 [#uses=2]
- load <4 x float>* %596 ; <<4 x float>>:597 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0 ; <<4 x float>*>:596 [#uses=2]
+ load <4 x float>, <4 x float>* %596 ; <<4 x float>>:597 [#uses=0]
store <4 x float> zeroinitializer, <4 x float>* %596
- load <4 x float>* null ; <<4 x float>>:598 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:599 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:598 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:599 [#uses=0]
shufflevector <4 x float> %594, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:600 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:601 [#uses=2]
- load <4 x float>* %601 ; <<4 x float>>:602 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:601 [#uses=2]
+ load <4 x float>, <4 x float>* %601 ; <<4 x float>>:602 [#uses=0]
store <4 x float> zeroinitializer, <4 x float>* %601
- load <4 x float>* null ; <<4 x float>>:603 [#uses=0]
- load <4 x float>* null ; <<4 x float>>:604 [#uses=1]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:605 [#uses=1]
- load <4 x float>* %605 ; <<4 x float>>:606 [#uses=1]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:603 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:604 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:605 [#uses=1]
+ load <4 x float>, <4 x float>* %605 ; <<4 x float>>:606 [#uses=1]
fsub <4 x float> zeroinitializer, %604 ; <<4 x float>>:607 [#uses=2]
fsub <4 x float> zeroinitializer, %606 ; <<4 x float>>:608 [#uses=2]
call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:609 [#uses=0]
br i1 false, label %617, label %610
; <label>:610 ; preds = %xPIF.exit
- load <4 x float>* null ; <<4 x float>>:611 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:612 [#uses=2]
- load <4 x float>* %612 ; <<4 x float>>:613 [#uses=1]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:611 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:612 [#uses=2]
+ load <4 x float>, <4 x float>* %612 ; <<4 x float>>:613 [#uses=1]
shufflevector <4 x float> %607, <4 x float> %613, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:614 [#uses=1]
store <4 x float> %614, <4 x float>* %612
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:615 [#uses=2]
- load <4 x float>* %615 ; <<4 x float>>:616 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:615 [#uses=2]
+ load <4 x float>, <4 x float>* %615 ; <<4 x float>>:616 [#uses=0]
store <4 x float> zeroinitializer, <4 x float>* %615
br label %xST.exit400
@@ -645,33 +645,33 @@ xPIF.exit: ; preds = %.critedge7898, %xOperationInitMasks.exit
br i1 %621, label %625, label %622
; <label>:622 ; preds = %617
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:623 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:623 [#uses=0]
shufflevector <4 x float> %607, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:624 [#uses=0]
br label %625
; <label>:625 ; preds = %622, %617
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:626 [#uses=0]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:626 [#uses=0]
call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:627 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:628 [#uses=1]
- load <4 x float>* %628 ; <<4 x float>>:629 [#uses=0]
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:630 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:628 [#uses=1]
+ load <4 x float>, <4 x float>* %628 ; <<4 x float>>:629 [#uses=0]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:630 [#uses=0]
call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:631 [#uses=1]
icmp eq i32 %631, 0 ; <i1>:632 [#uses=1]
br i1 %632, label %xST.exit400, label %633
; <label>:633 ; preds = %625
- load <4 x float>* null ; <<4 x float>>:634 [#uses=1]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:634 [#uses=1]
shufflevector <4 x float> zeroinitializer, <4 x float> %634, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:635 [#uses=1]
store <4 x float> %635, <4 x float>* null
br label %xST.exit400
xST.exit400: ; preds = %633, %625, %610
%.17218 = phi <4 x float> [ zeroinitializer, %610 ], [ %608, %633 ], [ %608, %625 ] ; <<4 x float>> [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 0 ; <<4 x float>*>:636 [#uses=1]
- load <4 x float>* %636 ; <<4 x float>>:637 [#uses=0]
- load <4 x float>* null ; <<4 x float>>:638 [#uses=2]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:639 [#uses=0]
- load <4 x float>* null ; <<4 x float>>:640 [#uses=2]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 0 ; <<4 x float>*>:636 [#uses=1]
+ load <4 x float>, <4 x float>* %636 ; <<4 x float>>:637 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:638 [#uses=2]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:639 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:640 [#uses=2]
fmul <4 x float> %638, %638 ; <<4 x float>>:641 [#uses=1]
fmul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:642 [#uses=0]
fmul <4 x float> %640, %640 ; <<4 x float>>:643 [#uses=2]
@@ -691,12 +691,12 @@ xST.exit400: ; preds = %633, %625, %610
br i1 %656, label %665, label %657
; <label>:657 ; preds = %xST.exit400
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0 ; <<4 x float>*>:658 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0 ; <<4 x float>*>:658 [#uses=0]
shufflevector <4 x float> %653, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:659 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:660 [#uses=1]
- load <4 x float>* %660 ; <<4 x float>>:661 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:662 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:663 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:660 [#uses=1]
+ load <4 x float>, <4 x float>* %660 ; <<4 x float>>:661 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:662 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:663 [#uses=0]
shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:664 [#uses=0]
br label %xST.exit402
@@ -705,7 +705,7 @@ xST.exit400: ; preds = %633, %625, %610
br i1 false, label %669, label %667
; <label>:667 ; preds = %665
- load <4 x float>* null ; <<4 x float>>:668 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:668 [#uses=0]
br label %669
; <label>:669 ; preds = %667, %665
@@ -713,12 +713,12 @@ xST.exit400: ; preds = %633, %625, %610
br label %xST.exit402
xST.exit402: ; preds = %669, %657
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0 ; <<4 x float>*>:671 [#uses=0]
- load <4 x float>* null ; <<4 x float>>:672 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:673 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:674 [#uses=1]
- load <4 x float>* %674 ; <<4 x float>>:675 [#uses=1]
- load <4 x float>* null ; <<4 x float>>:676 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0 ; <<4 x float>*>:671 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:672 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:673 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:674 [#uses=1]
+ load <4 x float>, <4 x float>* %674 ; <<4 x float>>:675 [#uses=1]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:676 [#uses=0]
shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:677 [#uses=1]
shufflevector <4 x float> %675, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:678 [#uses=1]
fmul <4 x float> zeroinitializer, %677 ; <<4 x float>>:679 [#uses=0]
@@ -728,68 +728,68 @@ xST.exit402: ; preds = %669, %657
br i1 %682, label %689, label %683
; <label>:683 ; preds = %xST.exit402
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 1 ; <<4 x float>*>:684 [#uses=1]
- load <4 x float>* %684 ; <<4 x float>>:685 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2 ; <<4 x float>*>:686 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3 ; <<4 x float>*>:687 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 1 ; <<4 x float>*>:684 [#uses=1]
+ load <4 x float>, <4 x float>* %684 ; <<4 x float>>:685 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2 ; <<4 x float>*>:686 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3 ; <<4 x float>*>:687 [#uses=0]
shufflevector <4 x float> %681, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:688 [#uses=0]
br label %xST.exit405
; <label>:689 ; preds = %xST.exit402
shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>>:690 [#uses=0]
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:691 [#uses=1]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:691 [#uses=1]
shufflevector <4 x i32> %691, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>>:692 [#uses=1]
call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %692, <4 x i32> zeroinitializer ) ; <i32>:693 [#uses=1]
icmp eq i32 %693, 0 ; <i1>:694 [#uses=0]
br label %xST.exit405
xST.exit405: ; preds = %689, %683
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:695 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:695 [#uses=0]
shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:696 [#uses=0]
shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:697 [#uses=0]
- load <4 x float>* null ; <<4 x float>>:698 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2 ; <<4 x float>*>:699 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:698 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2 ; <<4 x float>*>:699 [#uses=0]
shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:700 [#uses=1]
fadd <4 x float> zeroinitializer, %700 ; <<4 x float>>:701 [#uses=0]
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:702 [#uses=1]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:702 [#uses=1]
call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %702, <4 x i32> zeroinitializer ) ; <i32>:703 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1 ; <<4 x float>*>:704 [#uses=2]
- load <4 x float>* %704 ; <<4 x float>>:705 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1 ; <<4 x float>*>:704 [#uses=2]
+ load <4 x float>, <4 x float>* %704 ; <<4 x float>>:705 [#uses=0]
store <4 x float> zeroinitializer, <4 x float>* %704
- load <4 x float>* null ; <<4 x float>>:706 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:706 [#uses=0]
store <4 x float> zeroinitializer, <4 x float>* null
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:707 [#uses=2]
- load <4 x float>* %707 ; <<4 x float>>:708 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:707 [#uses=2]
+ load <4 x float>, <4 x float>* %707 ; <<4 x float>>:708 [#uses=0]
store <4 x float> zeroinitializer, <4 x float>* %707
- load <4 x float>* null ; <<4 x float>>:709 [#uses=0]
- load <4 x float>* null ; <<4 x float>>:710 [#uses=0]
- load <4 x float>* null ; <<4 x float>>:711 [#uses=1]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:709 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:710 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:711 [#uses=1]
shufflevector <4 x float> %711, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x float>>:712 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:713 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:714 [#uses=1]
- load <4 x float>* %714 ; <<4 x float>>:715 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:713 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:714 [#uses=1]
+ load <4 x float>, <4 x float>* %714 ; <<4 x float>>:715 [#uses=0]
shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:716 [#uses=0]
fmul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:717 [#uses=1]
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:718 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 0 ; <<4 x float>*>:719 [#uses=1]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:718 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 0 ; <<4 x float>*>:719 [#uses=1]
store <4 x float> zeroinitializer, <4 x float>* %719
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 1 ; <<4 x float>*>:720 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 1 ; <<4 x float>*>:720 [#uses=1]
shufflevector <4 x float> %717, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:721 [#uses=1]
store <4 x float> %721, <4 x float>* %720
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2 ; <<4 x float>*>:722 [#uses=1]
- load <4 x float>* %722 ; <<4 x float>>:723 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2 ; <<4 x float>*>:722 [#uses=1]
+ load <4 x float>, <4 x float>* %722 ; <<4 x float>>:723 [#uses=1]
shufflevector <4 x float> zeroinitializer, <4 x float> %723, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:724 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3 ; <<4 x float>*>:725 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3 ; <<4 x float>*>:725 [#uses=1]
store <4 x float> zeroinitializer, <4 x float>* %725
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2 ; <<4 x float>*>:726 [#uses=1]
- load <4 x float>* %726 ; <<4 x float>>:727 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3 ; <<4 x float>*>:728 [#uses=1]
- load <4 x float>* %728 ; <<4 x float>>:729 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0 ; <<4 x float>*>:730 [#uses=1]
- load <4 x float>* %730 ; <<4 x float>>:731 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:732 [#uses=1]
- load <4 x float>* %732 ; <<4 x float>>:733 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:734 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2 ; <<4 x float>*>:726 [#uses=1]
+ load <4 x float>, <4 x float>* %726 ; <<4 x float>>:727 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3 ; <<4 x float>*>:728 [#uses=1]
+ load <4 x float>, <4 x float>* %728 ; <<4 x float>>:729 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0 ; <<4 x float>*>:730 [#uses=1]
+ load <4 x float>, <4 x float>* %730 ; <<4 x float>>:731 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:732 [#uses=1]
+ load <4 x float>, <4 x float>* %732 ; <<4 x float>>:733 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:734 [#uses=0]
shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:735 [#uses=1]
fmul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:736 [#uses=1]
fmul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:737 [#uses=1]
@@ -797,28 +797,28 @@ xST.exit405: ; preds = %689, %683
fmul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:739 [#uses=1]
call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:740 [#uses=1]
icmp eq i32 %740, 0 ; <i1>:741 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0 ; <<4 x float>*>:742 [#uses=2]
- load <4 x float>* %742 ; <<4 x float>>:743 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0 ; <<4 x float>*>:742 [#uses=2]
+ load <4 x float>, <4 x float>* %742 ; <<4 x float>>:743 [#uses=1]
shufflevector <4 x float> %736, <4 x float> %743, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:744 [#uses=1]
store <4 x float> %744, <4 x float>* %742
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:745 [#uses=1]
- load <4 x float>* %745 ; <<4 x float>>:746 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:745 [#uses=1]
+ load <4 x float>, <4 x float>* %745 ; <<4 x float>>:746 [#uses=1]
shufflevector <4 x float> %737, <4 x float> %746, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:747 [#uses=0]
shufflevector <4 x float> %738, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:748 [#uses=1]
store <4 x float> %748, <4 x float>* null
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:749 [#uses=1]
- load <4 x float>* %749 ; <<4 x float>>:750 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:749 [#uses=1]
+ load <4 x float>, <4 x float>* %749 ; <<4 x float>>:750 [#uses=1]
shufflevector <4 x float> %739, <4 x float> %750, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:751 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0 ; <<4 x float>*>:752 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1 ; <<4 x float>*>:753 [#uses=1]
- load <4 x float>* %753 ; <<4 x float>>:754 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:755 [#uses=0]
- load <4 x float>* null ; <<4 x float>>:756 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0 ; <<4 x float>*>:752 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1 ; <<4 x float>*>:753 [#uses=1]
+ load <4 x float>, <4 x float>* %753 ; <<4 x float>>:754 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:755 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:756 [#uses=1]
shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:757 [#uses=1]
shufflevector <4 x float> %756, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:758 [#uses=1]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:759 [#uses=1]
- load <4 x float>* %759 ; <<4 x float>>:760 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:761 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:759 [#uses=1]
+ load <4 x float>, <4 x float>* %759 ; <<4 x float>>:760 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:761 [#uses=0]
shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:762 [#uses=0]
shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:763 [#uses=1]
fadd <4 x float> %757, zeroinitializer ; <<4 x float>>:764 [#uses=0]
@@ -827,12 +827,12 @@ xST.exit405: ; preds = %689, %683
br i1 false, label %773, label %767
; <label>:767 ; preds = %xST.exit405
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:768 [#uses=0]
- load <4 x float>* null ; <<4 x float>>:769 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:768 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:769 [#uses=1]
shufflevector <4 x float> zeroinitializer, <4 x float> %769, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:770 [#uses=1]
store <4 x float> %770, <4 x float>* null
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:771 [#uses=1]
- load <4 x float>* %771 ; <<4 x float>>:772 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:771 [#uses=1]
+ load <4 x float>, <4 x float>* %771 ; <<4 x float>>:772 [#uses=0]
br label %xST.exit422
; <label>:773 ; preds = %xST.exit405
@@ -840,30 +840,30 @@ xST.exit405: ; preds = %689, %683
xST.exit422: ; preds = %773, %767
%.07267 = phi <4 x float> [ %766, %767 ], [ undef, %773 ] ; <<4 x float>> [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:774 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:774 [#uses=0]
fmul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:775 [#uses=0]
icmp eq i32 0, 0 ; <i1>:776 [#uses=1]
br i1 %776, label %780, label %777
; <label>:777 ; preds = %xST.exit422
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:778 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:779 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:778 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:779 [#uses=0]
br label %xST.exit431
; <label>:780 ; preds = %xST.exit422
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:781 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:782 [#uses=2]
- load <4 x float>* %782 ; <<4 x float>>:783 [#uses=0]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:781 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:782 [#uses=2]
+ load <4 x float>, <4 x float>* %782 ; <<4 x float>>:783 [#uses=0]
store <4 x float> zeroinitializer, <4 x float>* %782
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:784 [#uses=1]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:784 [#uses=1]
shufflevector <4 x i32> %784, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>>:785 [#uses=0]
icmp eq i32 0, 0 ; <i1>:786 [#uses=0]
br label %xST.exit431
xST.exit431: ; preds = %780, %777
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:787 [#uses=0]
- load <4 x float>* null ; <<4 x float>>:788 [#uses=0]
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:789 [#uses=2]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:787 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:788 [#uses=0]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:789 [#uses=2]
call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %789, <4 x i32> zeroinitializer ) ; <i32>:790 [#uses=1]
icmp eq i32 %790, 0 ; <i1>:791 [#uses=0]
shufflevector <4 x i32> %789, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>>:792 [#uses=1]
@@ -872,7 +872,7 @@ xST.exit431: ; preds = %780, %777
br i1 %794, label %797, label %795
; <label>:795 ; preds = %xST.exit431
- load <4 x float>* null ; <<4 x float>>:796 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:796 [#uses=0]
store <4 x float> zeroinitializer, <4 x float>* null
br label %797
@@ -882,38 +882,38 @@ xST.exit431: ; preds = %780, %777
br i1 false, label %xST.exit434, label %799
; <label>:799 ; preds = %797
- load <4 x float>* null ; <<4 x float>>:800 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:800 [#uses=0]
store <4 x float> zeroinitializer, <4 x float>* null
br label %xST.exit434
xST.exit434: ; preds = %799, %797
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:801 [#uses=1]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:801 [#uses=1]
shufflevector <4 x i32> %801, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x i32>>:802 [#uses=0]
shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>>:803 [#uses=0]
icmp eq i32 0, 0 ; <i1>:804 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 0 ; <<4 x float>*>:805 [#uses=1]
- load <4 x float>* %805 ; <<4 x float>>:806 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:807 [#uses=1]
- load <4 x float>* %807 ; <<4 x float>>:808 [#uses=0]
- load <4 x float>* null ; <<4 x float>>:809 [#uses=0]
- load <4 x float>* null ; <<4 x float>>:810 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0 ; <<4 x float>*>:811 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:812 [#uses=1]
- load <4 x float>* %812 ; <<4 x float>>:813 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:814 [#uses=1]
- load <4 x float>* %814 ; <<4 x float>>:815 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 0 ; <<4 x float>*>:805 [#uses=1]
+ load <4 x float>, <4 x float>* %805 ; <<4 x float>>:806 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:807 [#uses=1]
+ load <4 x float>, <4 x float>* %807 ; <<4 x float>>:808 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:809 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:810 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0 ; <<4 x float>*>:811 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:812 [#uses=1]
+ load <4 x float>, <4 x float>* %812 ; <<4 x float>>:813 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:814 [#uses=1]
+ load <4 x float>, <4 x float>* %814 ; <<4 x float>>:815 [#uses=0]
shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:816 [#uses=0]
unreachable
xPBRK.exit: ; preds = %.critedge
store <4 x i32> < i32 -1, i32 -1, i32 -1, i32 -1 >, <4 x i32>* %.sub7896
store <4 x i32> zeroinitializer, <4 x i32>* null
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 1 ; <<4 x float>*>:817 [#uses=1]
- load <4 x float>* %817 ; <<4 x float>>:818 [#uses=1]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2 ; <<4 x float>*>:819 [#uses=1]
- load <4 x float>* %819 ; <<4 x float>>:820 [#uses=1]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3 ; <<4 x float>*>:821 [#uses=1]
- load <4 x float>* %821 ; <<4 x float>>:822 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 1 ; <<4 x float>*>:817 [#uses=1]
+ load <4 x float>, <4 x float>* %817 ; <<4 x float>>:818 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2 ; <<4 x float>*>:819 [#uses=1]
+ load <4 x float>, <4 x float>* %819 ; <<4 x float>>:820 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3 ; <<4 x float>*>:821 [#uses=1]
+ load <4 x float>, <4 x float>* %821 ; <<4 x float>>:822 [#uses=1]
shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:823 [#uses=1]
shufflevector <4 x float> %818, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:824 [#uses=1]
shufflevector <4 x float> %820, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:825 [#uses=1]
@@ -921,10 +921,10 @@ xPBRK.exit: ; preds = %.critedge
shufflevector <4 x float> %823, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:827 [#uses=0]
shufflevector <4 x float> %824, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:828 [#uses=1]
store <4 x float> %828, <4 x float>* null
- load <4 x float>* null ; <<4 x float>>:829 [#uses=1]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:829 [#uses=1]
shufflevector <4 x float> %825, <4 x float> %829, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:830 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3 ; <<4 x float>*>:831 [#uses=2]
- load <4 x float>* %831 ; <<4 x float>>:832 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3 ; <<4 x float>*>:831 [#uses=2]
+ load <4 x float>, <4 x float>* %831 ; <<4 x float>>:832 [#uses=1]
shufflevector <4 x float> %826, <4 x float> %832, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:833 [#uses=1]
store <4 x float> %833, <4 x float>* %831
br label %xLS.exit449
@@ -958,14 +958,14 @@ xLS.exit449: ; preds = %1215, %xPBRK.exit
%.17731 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07730, %1215 ] ; <<4 x float>> [#uses=2]
%.17735 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07734, %1215 ] ; <<4 x float>> [#uses=2]
%.17770 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07769, %1215 ] ; <<4 x float>> [#uses=2]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 0 ; <<4 x float>*>:834 [#uses=0]
- load <4 x float>* null ; <<4 x float>>:835 [#uses=1]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2 ; <<4 x float>*>:836 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3 ; <<4 x float>*>:837 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 0 ; <<4 x float>*>:834 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:835 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2 ; <<4 x float>*>:836 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3 ; <<4 x float>*>:837 [#uses=0]
shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:838 [#uses=0]
shufflevector <4 x float> %835, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:839 [#uses=1]
- getelementptr <4 x float>* null, i32 878 ; <<4 x float>*>:840 [#uses=1]
- load <4 x float>* %840 ; <<4 x float>>:841 [#uses=0]
+ getelementptr <4 x float>, <4 x float>* null, i32 878 ; <<4 x float>*>:840 [#uses=1]
+ load <4 x float>, <4 x float>* %840 ; <<4 x float>>:841 [#uses=0]
call <4 x float> @llvm.ppc.altivec.vcfsx( <4 x i32> zeroinitializer, i32 0 ) ; <<4 x float>>:842 [#uses=1]
shufflevector <4 x float> %842, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:843 [#uses=2]
call <4 x i32> @llvm.ppc.altivec.vcmpgtfp( <4 x float> %843, <4 x float> %839 ) ; <<4 x i32>>:844 [#uses=1]
@@ -977,7 +977,7 @@ xLS.exit449: ; preds = %1215, %xPBRK.exit
; <label>:849 ; preds = %xLS.exit449
shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:850 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:851 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:851 [#uses=1]
store <4 x float> zeroinitializer, <4 x float>* %851
shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:852 [#uses=1]
store <4 x float> %852, <4 x float>* null
@@ -989,8 +989,8 @@ xLS.exit449: ; preds = %1215, %xPBRK.exit
br i1 false, label %859, label %856
; <label>:856 ; preds = %854
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0 ; <<4 x float>*>:857 [#uses=2]
- load <4 x float>* %857 ; <<4 x float>>:858 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0 ; <<4 x float>*>:857 [#uses=2]
+ load <4 x float>, <4 x float>* %857 ; <<4 x float>>:858 [#uses=0]
store <4 x float> zeroinitializer, <4 x float>* %857
br label %859
@@ -999,13 +999,13 @@ xLS.exit449: ; preds = %1215, %xPBRK.exit
br i1 false, label %864, label %861
; <label>:861 ; preds = %859
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:862 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:862 [#uses=1]
shufflevector <4 x float> %845, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:863 [#uses=1]
store <4 x float> %863, <4 x float>* %862
br label %864
; <label>:864 ; preds = %861, %859
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:865 [#uses=1]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:865 [#uses=1]
shufflevector <4 x i32> %865, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x i32>>:866 [#uses=0]
br i1 false, label %868, label %867
@@ -1018,9 +1018,9 @@ xLS.exit449: ; preds = %1215, %xPBRK.exit
br label %xST.exit451
xST.exit451: ; preds = %868, %849
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0 ; <<4 x float>*>:870 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:871 [#uses=0]
- load <4 x float>* null ; <<4 x float>>:872 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0 ; <<4 x float>*>:870 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:871 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:872 [#uses=0]
shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:873 [#uses=1]
bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:874 [#uses=1]
xor <4 x i32> %874, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>>:875 [#uses=0]
@@ -1029,16 +1029,16 @@ xST.exit451: ; preds = %868, %849
bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:878 [#uses=1]
xor <4 x i32> %878, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>>:879 [#uses=1]
bitcast <4 x i32> %879 to <4 x float> ; <<4 x float>>:880 [#uses=0]
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:881 [#uses=1]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:881 [#uses=1]
icmp eq i32 0, 0 ; <i1>:882 [#uses=1]
br i1 %882, label %888, label %883
; <label>:883 ; preds = %xST.exit451
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0 ; <<4 x float>*>:884 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0 ; <<4 x float>*>:884 [#uses=1]
store <4 x float> zeroinitializer, <4 x float>* %884
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:885 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:885 [#uses=0]
shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:886 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3 ; <<4 x float>*>:887 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3 ; <<4 x float>*>:887 [#uses=0]
br label %xST.exit453
; <label>:888 ; preds = %xST.exit451
@@ -1047,7 +1047,7 @@ xST.exit451: ; preds = %868, %849
br i1 false, label %894, label %891
; <label>:891 ; preds = %888
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:892 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:892 [#uses=1]
shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:893 [#uses=1]
store <4 x float> %893, <4 x float>* %892
br label %894
@@ -1061,34 +1061,34 @@ xST.exit451: ; preds = %868, %849
br label %898
; <label>:898 ; preds = %897, %894
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:899 [#uses=0]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:899 [#uses=0]
br i1 false, label %xST.exit453, label %900
; <label>:900 ; preds = %898
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3 ; <<4 x float>*>:901 [#uses=1]
- load <4 x float>* %901 ; <<4 x float>>:902 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3 ; <<4 x float>*>:901 [#uses=1]
+ load <4 x float>, <4 x float>* %901 ; <<4 x float>>:902 [#uses=1]
shufflevector <4 x float> zeroinitializer, <4 x float> %902, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:903 [#uses=0]
br label %xST.exit453
xST.exit453: ; preds = %900, %898, %883
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:904 [#uses=0]
- load <4 x float>* null ; <<4 x float>>:905 [#uses=1]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3 ; <<4 x float>*>:906 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1 ; <<4 x float>*>:904 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:905 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3 ; <<4 x float>*>:906 [#uses=0]
shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:907 [#uses=1]
shufflevector <4 x float> %905, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:908 [#uses=1]
bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:909 [#uses=0]
bitcast <4 x float> %908 to <4 x i32> ; <<4 x i32>>:910 [#uses=0]
bitcast <4 x float> %907 to <4 x i32> ; <<4 x i32>>:911 [#uses=0]
bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:912 [#uses=0]
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:913 [#uses=0]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:913 [#uses=0]
call i32 @llvm.ppc.altivec.vcmpequw.p( i32 2, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:914 [#uses=0]
br i1 false, label %915, label %xPIF.exit455
; <label>:915 ; preds = %xST.exit453
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:916 [#uses=0]
- getelementptr [4 x <4 x i32>]* null, i32 0, i32 3 ; <<4 x i32>*>:917 [#uses=1]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:916 [#uses=0]
+ getelementptr [4 x <4 x i32>], [4 x <4 x i32>]* null, i32 0, i32 3 ; <<4 x i32>*>:917 [#uses=1]
store <4 x i32> zeroinitializer, <4 x i32>* %917
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:918 [#uses=1]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:918 [#uses=1]
and <4 x i32> %918, zeroinitializer ; <<4 x i32>>:919 [#uses=0]
br label %.critedge7899
@@ -1101,16 +1101,16 @@ xPBRK.exit456: ; preds = %.critedge7899
unreachable
xPIF.exit455: ; preds = %xST.exit453
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 0 ; <<4 x float>*>:922 [#uses=1]
- load <4 x float>* %922 ; <<4 x float>>:923 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 1 ; <<4 x float>*>:924 [#uses=1]
- load <4 x float>* %924 ; <<4 x float>>:925 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2 ; <<4 x float>*>:926 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3 ; <<4 x float>*>:927 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 0 ; <<4 x float>*>:922 [#uses=1]
+ load <4 x float>, <4 x float>* %922 ; <<4 x float>>:923 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 1 ; <<4 x float>*>:924 [#uses=1]
+ load <4 x float>, <4 x float>* %924 ; <<4 x float>>:925 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2 ; <<4 x float>*>:926 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3 ; <<4 x float>*>:927 [#uses=0]
shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:928 [#uses=0]
bitcast { { i16, i16, i32 } }* %1 to <4 x float>* ; <<4 x float>*>:929 [#uses=0]
bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:930 [#uses=0]
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:931 [#uses=0]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:931 [#uses=0]
icmp eq i32 0, 0 ; <i1>:932 [#uses=1]
br i1 %932, label %934, label %933
@@ -1129,13 +1129,13 @@ xPIF.exit455: ; preds = %xST.exit453
xST.exit459: ; preds = %937, %934
shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x i32>>:938 [#uses=1]
call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %938, <4 x i32> zeroinitializer ) ; <i32>:939 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 2 ; <<4 x float>*>:940 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 2 ; <<4 x float>*>:940 [#uses=1]
store <4 x float> zeroinitializer, <4 x float>* %940
- load <4 x float>* null ; <<4 x float>>:941 [#uses=1]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:941 [#uses=1]
shufflevector <4 x float> zeroinitializer, <4 x float> %941, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:942 [#uses=1]
store <4 x float> %942, <4 x float>* null
shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:943 [#uses=0]
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:944 [#uses=0]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:944 [#uses=0]
call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:945 [#uses=0]
br i1 false, label %947, label %946
@@ -1156,7 +1156,7 @@ xST.exit459: ; preds = %937, %934
br i1 false, label %955, label %953
; <label>:953 ; preds = %952
- getelementptr [4 x <4 x i32>]* null, i32 0, i32 2 ; <<4 x i32>*>:954 [#uses=0]
+ getelementptr [4 x <4 x i32>], [4 x <4 x i32>]* null, i32 0, i32 2 ; <<4 x i32>*>:954 [#uses=0]
br label %955
; <label>:955 ; preds = %953, %952
@@ -1170,16 +1170,16 @@ xST.exit459: ; preds = %937, %934
br label %xStoreDestAddressWithMask.exit461
xStoreDestAddressWithMask.exit461: ; preds = %958, %955
- load <4 x float>* %0 ; <<4 x float>>:960 [#uses=0]
+ load <4 x float>, <4 x float>* %0 ; <<4 x float>>:960 [#uses=0]
call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:961 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 0 ; <<4 x float>*>:962 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 0 ; <<4 x float>*>:962 [#uses=0]
br i1 false, label %968, label %xST.exit463
xST.exit463: ; preds = %xStoreDestAddressWithMask.exit461
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 1 ; <<4 x float>*>:963 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 2 ; <<4 x float>*>:964 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 3 ; <<4 x float>*>:965 [#uses=0]
- load <4 x float>* %0 ; <<4 x float>>:966 [#uses=3]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 1 ; <<4 x float>*>:963 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 2 ; <<4 x float>*>:964 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 3 ; <<4 x float>*>:965 [#uses=0]
+ load <4 x float>, <4 x float>* %0 ; <<4 x float>>:966 [#uses=3]
call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32>:967 [#uses=0]
br i1 false, label %972, label %969
@@ -1187,8 +1187,8 @@ xST.exit463: ; preds = %xStoreDestAddressWithMask.exit461
unreachable
; <label>:969 ; preds = %xST.exit463
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 1 ; <<4 x float>*>:970 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 2 ; <<4 x float>*>:971 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 1 ; <<4 x float>*>:970 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 2 ; <<4 x float>*>:971 [#uses=1]
store <4 x float> %966, <4 x float>* %971
store <4 x float> %966, <4 x float>* null
br label %xST.exit465
@@ -1197,39 +1197,39 @@ xST.exit463: ; preds = %xStoreDestAddressWithMask.exit461
call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <<4 x i32>>:973 [#uses=0]
store <4 x float> zeroinitializer, <4 x float>* null
store <4 x float> zeroinitializer, <4 x float>* null
- load <4 x float>* null ; <<4 x float>>:974 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:974 [#uses=0]
bitcast <4 x float> %966 to <4 x i32> ; <<4 x i32>>:975 [#uses=1]
call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> zeroinitializer, <4 x i32> %975, <4 x i32> zeroinitializer ) ; <<4 x i32>>:976 [#uses=1]
bitcast <4 x i32> %976 to <4 x float> ; <<4 x float>>:977 [#uses=1]
store <4 x float> %977, <4 x float>* null
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 3 ; <<4 x float>*>:978 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 3 ; <<4 x float>*>:978 [#uses=0]
bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:979 [#uses=1]
call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> %979, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <<4 x i32>>:980 [#uses=1]
bitcast <4 x i32> %980 to <4 x float> ; <<4 x float>>:981 [#uses=0]
br label %xST.exit465
xST.exit465: ; preds = %972, %969
- load <4 x float>* %0 ; <<4 x float>>:982 [#uses=3]
+ load <4 x float>, <4 x float>* %0 ; <<4 x float>>:982 [#uses=3]
icmp eq i32 0, 0 ; <i1>:983 [#uses=1]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0 ; <<4 x float>*>:984 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0 ; <<4 x float>*>:984 [#uses=1]
br i1 %983, label %989, label %985
; <label>:985 ; preds = %xST.exit465
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1 ; <<4 x float>*>:986 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:987 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1 ; <<4 x float>*>:986 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:987 [#uses=1]
store <4 x float> %982, <4 x float>* %987
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:988 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:988 [#uses=0]
br label %xST.exit467
; <label>:989 ; preds = %xST.exit465
bitcast <4 x float> %982 to <4 x i32> ; <<4 x i32>>:990 [#uses=0]
shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>>:991 [#uses=0]
store <4 x float> zeroinitializer, <4 x float>* %984
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1 ; <<4 x float>*>:992 [#uses=0]
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:993 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:994 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1 ; <<4 x float>*>:992 [#uses=0]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:993 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2 ; <<4 x float>*>:994 [#uses=0]
bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:995 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:996 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3 ; <<4 x float>*>:996 [#uses=0]
bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>>:997 [#uses=1]
bitcast <4 x float> %982 to <4 x i32> ; <<4 x i32>>:998 [#uses=1]
shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>>:999 [#uses=1]
@@ -1238,17 +1238,17 @@ xST.exit465: ; preds = %972, %969
br label %xST.exit467
xST.exit467: ; preds = %989, %985
- load <4 x float>* %0 ; <<4 x float>>:1002 [#uses=5]
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:1003 [#uses=2]
+ load <4 x float>, <4 x float>* %0 ; <<4 x float>>:1002 [#uses=5]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:1003 [#uses=2]
call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1003, <4 x i32> zeroinitializer ) ; <i32>:1004 [#uses=0]
br i1 false, label %1011, label %1005
; <label>:1005 ; preds = %xST.exit467
- load <4 x float>* null ; <<4 x float>>:1006 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:1007 [#uses=1]
- load <4 x float>* %1007 ; <<4 x float>>:1008 [#uses=0]
- load <4 x float>* null ; <<4 x float>>:1009 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:1010 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:1006 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:1007 [#uses=1]
+ load <4 x float>, <4 x float>* %1007 ; <<4 x float>>:1008 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:1009 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:1010 [#uses=0]
br label %xST.exit469
; <label>:1011 ; preds = %xST.exit467
@@ -1266,7 +1266,7 @@ xST.exit467: ; preds = %989, %985
br i1 %1017, label %1021, label %1018
; <label>:1018 ; preds = %1015
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:1019 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1 ; <<4 x float>*>:1019 [#uses=0]
shufflevector <4 x float> %1002, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:1020 [#uses=0]
br label %1021
@@ -1276,7 +1276,7 @@ xST.exit467: ; preds = %989, %985
br i1 %1022, label %1025, label %1023
; <label>:1023 ; preds = %1021
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:1024 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:1024 [#uses=1]
store <4 x float> zeroinitializer, <4 x float>* %1024
br label %1025
@@ -1286,23 +1286,23 @@ xST.exit467: ; preds = %989, %985
br i1 %1026, label %xST.exit469, label %1027
; <label>:1027 ; preds = %1025
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:1028 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:1028 [#uses=0]
br label %xST.exit469
xST.exit469: ; preds = %1027, %1025, %1005
%.17463 = phi <4 x float> [ %.27464, %1005 ], [ %.07462, %1027 ], [ %.07462, %1025 ] ; <<4 x float>> [#uses=1]
%.17468 = phi <4 x float> [ %.27469, %1005 ], [ %.07467, %1027 ], [ %.07467, %1025 ] ; <<4 x float>> [#uses=1]
%.07489 = phi <4 x float> [ %1002, %1005 ], [ %.17490, %1027 ], [ %.17490, %1025 ] ; <<4 x float>> [#uses=1]
- load <4 x float>* null ; <<4 x float>>:1029 [#uses=0]
- load <4 x float>* null ; <<4 x float>>:1030 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:1029 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:1030 [#uses=0]
fsub <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:1031 [#uses=1]
br i1 false, label %1037, label %1032
; <label>:1032 ; preds = %xST.exit469
- load <4 x float>* null ; <<4 x float>>:1033 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:1034 [#uses=1]
- load <4 x float>* %1034 ; <<4 x float>>:1035 [#uses=0]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:1036 [#uses=0]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:1033 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2 ; <<4 x float>*>:1034 [#uses=1]
+ load <4 x float>, <4 x float>* %1034 ; <<4 x float>>:1035 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3 ; <<4 x float>*>:1036 [#uses=0]
br label %xST.exit472
; <label>:1037 ; preds = %xST.exit469
@@ -1318,8 +1318,8 @@ xST.exit469: ; preds = %1027, %1025, %1005
br i1 %1041, label %1045, label %1042
; <label>:1042 ; preds = %1040
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:1043 [#uses=1]
- load <4 x float>* %1043 ; <<4 x float>>:1044 [#uses=0]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1 ; <<4 x float>*>:1043 [#uses=1]
+ load <4 x float>, <4 x float>* %1043 ; <<4 x float>>:1044 [#uses=0]
br label %1045
; <label>:1045 ; preds = %1042, %1040
@@ -1367,7 +1367,7 @@ xST.exit472: ; preds = %1050, %1048, %1032
br label %xST.exit474
xST.exit474: ; preds = %1059, %1058, %1051
- load <4 x float>* null ; <<4 x float>>:1060 [#uses=1]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:1060 [#uses=1]
fmul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:1061 [#uses=1]
fmul <4 x float> %1060, zeroinitializer ; <<4 x float>>:1062 [#uses=2]
br i1 false, label %1065, label %1063
@@ -1555,7 +1555,7 @@ xST.exit489: ; preds = %1109, %1108, %1101
br label %xST.exit492
xST.exit492: ; preds = %1118, %1117, %1110
- load <4 x float>* null ; <<4 x float>>:1119 [#uses=1]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:1119 [#uses=1]
fmul <4 x float> %1119, zeroinitializer ; <<4 x float>>:1120 [#uses=1]
fmul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:1121 [#uses=1]
br i1 false, label %1123, label %1122
@@ -1590,7 +1590,7 @@ xST.exit492: ; preds = %1118, %1117, %1110
xST.exit495: ; preds = %1130, %1129, %1122
%.07582 = phi <4 x float> [ %1121, %1122 ], [ %.17583, %1130 ], [ %.17583, %1129 ] ; <<4 x float>> [#uses=1]
%.07590 = phi <4 x float> [ %1120, %1122 ], [ %.17591, %1130 ], [ %.17591, %1129 ] ; <<4 x float>> [#uses=1]
- load <4 x float>* null ; <<4 x float>>:1131 [#uses=1]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:1131 [#uses=1]
fadd <4 x float> %1131, zeroinitializer ; <<4 x float>>:1132 [#uses=1]
fadd <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:1133 [#uses=1]
br i1 false, label %1135, label %1134
@@ -1625,11 +1625,11 @@ xST.exit495: ; preds = %1130, %1129, %1122
xST.exit498: ; preds = %1142, %1141, %1134
%.07617 = phi <4 x float> [ %1133, %1134 ], [ %.17618, %1142 ], [ %.17618, %1141 ] ; <<4 x float>> [#uses=1]
%.07621 = phi <4 x float> [ %1132, %1134 ], [ %.17622, %1142 ], [ %.17622, %1141 ] ; <<4 x float>> [#uses=1]
- load <4 x float>* null ; <<4 x float>>:1143 [#uses=1]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:1144 [#uses=1]
- load <4 x float>* %1144 ; <<4 x float>>:1145 [#uses=1]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:1146 [#uses=1]
- load <4 x float>* %1146 ; <<4 x float>>:1147 [#uses=1]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:1143 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:1144 [#uses=1]
+ load <4 x float>, <4 x float>* %1144 ; <<4 x float>>:1145 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:1146 [#uses=1]
+ load <4 x float>, <4 x float>* %1146 ; <<4 x float>>:1147 [#uses=1]
shufflevector <4 x float> %1143, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:1148 [#uses=1]
shufflevector <4 x float> %1145, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:1149 [#uses=1]
shufflevector <4 x float> %1147, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>>:1150 [#uses=1]
@@ -1671,11 +1671,11 @@ xST.exit501: ; preds = %1163, %1162, %1155
%.07656 = phi <4 x float> [ %1153, %1155 ], [ %.17657, %1163 ], [ %.17657, %1162 ] ; <<4 x float>> [#uses=1]
%.07660 = phi <4 x float> [ %1152, %1155 ], [ %.17661, %1163 ], [ %.17661, %1162 ] ; <<4 x float>> [#uses=1]
%.07664 = phi <4 x float> [ %1151, %1155 ], [ %.17665, %1163 ], [ %.17665, %1162 ] ; <<4 x float>> [#uses=1]
- load <4 x float>* null ; <<4 x float>>:1164 [#uses=1]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:1165 [#uses=1]
- load <4 x float>* %1165 ; <<4 x float>>:1166 [#uses=1]
- getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:1167 [#uses=1]
- load <4 x float>* %1167 ; <<4 x float>>:1168 [#uses=1]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:1164 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2 ; <<4 x float>*>:1165 [#uses=1]
+ load <4 x float>, <4 x float>* %1165 ; <<4 x float>>:1166 [#uses=1]
+ getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3 ; <<4 x float>*>:1167 [#uses=1]
+ load <4 x float>, <4 x float>* %1167 ; <<4 x float>>:1168 [#uses=1]
fadd <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:1169 [#uses=1]
fadd <4 x float> zeroinitializer, %1164 ; <<4 x float>>:1170 [#uses=1]
fadd <4 x float> zeroinitializer, %1166 ; <<4 x float>>:1171 [#uses=1]
@@ -1734,21 +1734,21 @@ xST.exit504: ; preds = %1181, %1180, %1173
br label %1188
; <label>:1188 ; preds = %1187, %1186
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:1189 [#uses=1]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:1189 [#uses=1]
shufflevector <4 x i32> %1189, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x i32>>:1190 [#uses=1]
call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1190, <4 x i32> zeroinitializer ) ; <i32>:1191 [#uses=1]
icmp eq i32 %1191, 0 ; <i1>:1192 [#uses=1]
br i1 %1192, label %1196, label %1193
; <label>:1193 ; preds = %1188
- load <4 x float>* null ; <<4 x float>>:1194 [#uses=1]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:1194 [#uses=1]
shufflevector <4 x float> zeroinitializer, <4 x float> %1194, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:1195 [#uses=1]
store <4 x float> %1195, <4 x float>* null
br label %1196
; <label>:1196 ; preds = %1193, %1188
%.07742 = phi <4 x float> [ zeroinitializer, %1193 ], [ zeroinitializer, %1188 ] ; <<4 x float>> [#uses=0]
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:1197 [#uses=1]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:1197 [#uses=1]
shufflevector <4 x i32> %1197, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>>:1198 [#uses=1]
call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1198, <4 x i32> zeroinitializer ) ; <i32>:1199 [#uses=1]
icmp eq i32 %1199, 0 ; <i1>:1200 [#uses=1]
@@ -1765,20 +1765,20 @@ xST.exit507: ; preds = %1201, %1196, %1183
br i1 %1203, label %1207, label %1204
; <label>:1204 ; preds = %xST.exit507
- load <4 x float>* null ; <<4 x float>>:1205 [#uses=1]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:1205 [#uses=1]
shufflevector <4 x float> zeroinitializer, <4 x float> %1205, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:1206 [#uses=1]
store <4 x float> %1206, <4 x float>* null
br label %1207
; <label>:1207 ; preds = %1204, %xST.exit507
- load <4 x i32>* %.sub7896 ; <<4 x i32>>:1208 [#uses=1]
+ load <4 x i32>, <4 x i32>* %.sub7896 ; <<4 x i32>>:1208 [#uses=1]
shufflevector <4 x i32> %1208, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > ; <<4 x i32>>:1209 [#uses=1]
call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1209, <4 x i32> zeroinitializer ) ; <i32>:1210 [#uses=1]
icmp eq i32 %1210, 0 ; <i1>:1211 [#uses=1]
br i1 %1211, label %1215, label %1212
; <label>:1212 ; preds = %1207
- load <4 x float>* null ; <<4 x float>>:1213 [#uses=1]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:1213 [#uses=1]
shufflevector <4 x float> zeroinitializer, <4 x float> %1213, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>>:1214 [#uses=1]
store <4 x float> %1214, <4 x float>* null
br label %1215
diff --git a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
index 53231b4f435e..017775781f7d 100644
--- a/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
+++ b/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
@@ -22,7 +22,7 @@ define i64 @test(i32 %A, i32 %B, i32 %C) nounwind {
entry:
%Y = alloca i32, align 4 ; <i32*> [#uses=2]
%tmp4 = call i32 asm "subf${3:I}c $1,$4,$3\0A\09subfze $0,$2", "=r,=*&r,r,rI,r"( i32* %Y, i32 %A, i32 %B, i32 %C ) ; <i32> [#uses=1]
- %tmp5 = load i32* %Y ; <i32> [#uses=1]
+ %tmp5 = load i32, i32* %Y ; <i32> [#uses=1]
%tmp56 = zext i32 %tmp5 to i64 ; <i64> [#uses=1]
%tmp7 = shl i64 %tmp56, 32 ; <i64> [#uses=1]
%tmp89 = zext i32 %tmp4 to i64 ; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll b/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll
index 490aa0c1442c..c5721560d382 100644
--- a/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll
+++ b/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll
@@ -7,6 +7,6 @@ target triple = "powerpc-apple-darwin8.8.0"
define void @foo() {
entry:
- tail call void asm sideeffect "$0 $1", "s,i"( i8* bitcast (i32* getelementptr ([2 x i32]* @x, i32 0, i32 1) to i8*), i8* bitcast (i32* getelementptr ([2 x i32]* @x, i32 0, i32 1) to i8*) )
+ tail call void asm sideeffect "$0 $1", "s,i"( i8* bitcast (i32* getelementptr ([2 x i32], [2 x i32]* @x, i32 0, i32 1) to i8*), i8* bitcast (i32* getelementptr ([2 x i32], [2 x i32]* @x, i32 0, i32 1) to i8*) )
ret void
}
diff --git a/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll b/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll
index e4e931492ac4..1305c42e9320 100644
--- a/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll
+++ b/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll
@@ -12,7 +12,7 @@ entry:
bb: ; preds = %bb, %entry
%i.035.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
- %tmp8 = getelementptr float* %tmp56, i32 %i.035.0 ; <float*> [#uses=2]
+ %tmp8 = getelementptr float, float* %tmp56, i32 %i.035.0 ; <float*> [#uses=2]
%tmp101112 = bitcast float* %tmp8 to i8* ; <i8*> [#uses=1]
%tmp1617 = bitcast float* %tmp8 to i32* ; <i32*> [#uses=1]
%tmp21 = tail call i32 asm "lwbrx $0, $2, $1", "=r,r,bO,*m"( i8* %tmp101112, i32 0, i32* %tmp1617 ) ; <i32> [#uses=0]
diff --git a/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
index 382ba1f6a82d..b3b73238420d 100644
--- a/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
+++ b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
@@ -15,46 +15,46 @@ entry:
%retval = alloca i32, align 4 ; <i32*> [#uses=1]
store i32 %i, i32* %i_addr
store i32 %q, i32* %q_addr
- %tmp = load i32* %i_addr ; <i32> [#uses=1]
+ %tmp = load i32, i32* %i_addr ; <i32> [#uses=1]
%tmp1 = icmp ne i32 %tmp, 0 ; <i1> [#uses=1]
%tmp12 = zext i1 %tmp1 to i8 ; <i8> [#uses=1]
%toBool = icmp ne i8 %tmp12, 0 ; <i1> [#uses=1]
br i1 %toBool, label %cond_true, label %cond_false
cond_true: ; preds = %entry
- %tmp3 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
- %tmp4 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
- %tmp7 = load i32* %q_addr ; <i32> [#uses=1]
+ %tmp3 = call i32 (...) @bar( ) ; <i32> [#uses=0]
+ %tmp4 = call i32 (...) @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
+ %tmp7 = load i32, i32* %q_addr ; <i32> [#uses=1]
%tmp8 = icmp ne i32 %tmp7, 0 ; <i1> [#uses=1]
%tmp89 = zext i1 %tmp8 to i8 ; <i8> [#uses=1]
%toBool10 = icmp ne i8 %tmp89, 0 ; <i1> [#uses=1]
br i1 %toBool10, label %cond_true11, label %cond_false15
cond_false: ; preds = %entry
- %tmp5 = call i32 (...)* @foo( ) ; <i32> [#uses=0]
- %tmp6 = call i32 (...)* @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
- %tmp27 = load i32* %q_addr ; <i32> [#uses=1]
+ %tmp5 = call i32 (...) @foo( ) ; <i32> [#uses=0]
+ %tmp6 = call i32 (...) @baz( i32 5, i32 6 ) ; <i32> [#uses=0]
+ %tmp27 = load i32, i32* %q_addr ; <i32> [#uses=1]
%tmp28 = icmp ne i32 %tmp27, 0 ; <i1> [#uses=1]
%tmp289 = zext i1 %tmp28 to i8 ; <i8> [#uses=1]
%toBool210 = icmp ne i8 %tmp289, 0 ; <i1> [#uses=1]
br i1 %toBool210, label %cond_true11, label %cond_false15
cond_true11: ; preds = %cond_next
- %tmp13 = call i32 (...)* @foo( ) ; <i32> [#uses=0]
- %tmp14 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
+ %tmp13 = call i32 (...) @foo( ) ; <i32> [#uses=0]
+ %tmp14 = call i32 (...) @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
br label %cond_next18
cond_false15: ; preds = %cond_next
- %tmp16 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
- %tmp17 = call i32 (...)* @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
+ %tmp16 = call i32 (...) @bar( ) ; <i32> [#uses=0]
+ %tmp17 = call i32 (...) @quux( i32 3, i32 4 ) ; <i32> [#uses=0]
br label %cond_next18
cond_next18: ; preds = %cond_false15, %cond_true11
- %tmp19 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp19 = call i32 (...) @bar( ) ; <i32> [#uses=0]
br label %return
return: ; preds = %cond_next18
- %retval20 = load i32* %retval ; <i32> [#uses=1]
+ %retval20 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %retval20
}
diff --git a/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll b/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll
index 6de7a09128f0..7a8eb175a93a 100644
--- a/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll
+++ b/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll
@@ -70,7 +70,7 @@ declare i32 @llvm.ppc.altivec.vcmpequw.p(i32, <4 x i32>, <4 x i32>)
define void @test(%struct.XState* %gldst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._GVMConstants* %cnstn, %struct.PPSToken* %pstrm, %struct.GVMFPContext* %vmctx, %struct.GVMTs* %txtrs, %struct.GVMFPStack* %fpstk, %struct.GVMFGAttrib* %start, %struct.GVMFGAttrib* %deriv, i32 %fragx, i32 %fragy) {
bb58.i:
- %tmp3405.i = getelementptr %struct.XTRec* null, i32 0, i32 1 ; <float*> [#uses=1]
+ %tmp3405.i = getelementptr %struct.XTRec, %struct.XTRec* null, i32 0, i32 1 ; <float*> [#uses=1]
%tmp34053406.i = bitcast float* %tmp3405.i to i8* ; <i8*> [#uses=1]
%tmp3407.i = call <4 x i32> @llvm.ppc.altivec.lvewx( i8* %tmp34053406.i ) ; <<4 x i32>> [#uses=0]
%tmp4146.i = call i32 @llvm.ppc.altivec.vcmpequw.p( i32 3, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer ) ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll b/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll
index 06f40d98c68c..6b88b81681cb 100644
--- a/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll
+++ b/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll
@@ -22,7 +22,7 @@ cond_true28: ; preds = %cond_false, %cond_true
cond_next30: ; preds = %cond_true28, %cond_false, %cond_true
%iftmp.0.043.1 = phi %struct._obstack_chunk* [ %iftmp.0.043.0, %cond_true28 ], [ null, %cond_true ], [ %tmp22, %cond_false ] ; <%struct._obstack_chunk*> [#uses=1]
- %tmp41 = getelementptr %struct._obstack_chunk* %iftmp.0.043.1, i32 0, i32 0 ; <i8**> [#uses=1]
+ %tmp41 = getelementptr %struct._obstack_chunk, %struct._obstack_chunk* %iftmp.0.043.1, i32 0, i32 0 ; <i8**> [#uses=1]
store i8* null, i8** %tmp41, align 8
ret i32 undef
}
diff --git a/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll b/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
index 40f46fda468d..aae914ecc435 100644
--- a/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
+++ b/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
@@ -8,8 +8,8 @@ define void @foo() {
entry:
%ttype = alloca i32, align 4 ; <i32*> [#uses=1]
%regs = alloca [1024 x %struct.__db_region], align 16 ; <[1024 x %struct.__db_region]*> [#uses=0]
- %tmp = load i32* %ttype, align 4 ; <i32> [#uses=1]
- %tmp1 = call i32 (...)* @bork( i32 %tmp ) ; <i32> [#uses=0]
+ %tmp = load i32, i32* %ttype, align 4 ; <i32> [#uses=1]
+ %tmp1 = call i32 (...) @bork( i32 %tmp ) ; <i32> [#uses=0]
ret void
; CHECK: @foo
diff --git a/test/CodeGen/PowerPC/2007-09-08-unaligned.ll b/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
index bdd91f345718..ccbadb4255a0 100644
--- a/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
+++ b/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
@@ -16,18 +16,18 @@ define i32 @foo() {
entry:
%retval = alloca i32, align 4 ; <i32*> [#uses=1]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- %tmp = getelementptr %struct.anon* @s, i32 0, i32 1 ; <float*> [#uses=1]
- %tmp1 = load float* %tmp, align 1 ; <float> [#uses=1]
- %tmp2 = getelementptr %struct.anon* @t, i32 0, i32 1 ; <float*> [#uses=1]
+ %tmp = getelementptr %struct.anon, %struct.anon* @s, i32 0, i32 1 ; <float*> [#uses=1]
+ %tmp1 = load float, float* %tmp, align 1 ; <float> [#uses=1]
+ %tmp2 = getelementptr %struct.anon, %struct.anon* @t, i32 0, i32 1 ; <float*> [#uses=1]
store float %tmp1, float* %tmp2, align 1
- %tmp3 = getelementptr <{ i8, double }>* @u, i32 0, i32 1 ; <double*> [#uses=1]
- %tmp4 = load double* %tmp3, align 1 ; <double> [#uses=1]
- %tmp5 = getelementptr <{ i8, double }>* @v, i32 0, i32 1 ; <double*> [#uses=1]
+ %tmp3 = getelementptr <{ i8, double }>, <{ i8, double }>* @u, i32 0, i32 1 ; <double*> [#uses=1]
+ %tmp4 = load double, double* %tmp3, align 1 ; <double> [#uses=1]
+ %tmp5 = getelementptr <{ i8, double }>, <{ i8, double }>* @v, i32 0, i32 1 ; <double*> [#uses=1]
store double %tmp4, double* %tmp5, align 1
br label %return
return: ; preds = %entry
- %retval6 = load i32* %retval ; <i32> [#uses=1]
+ %retval6 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %retval6
}
@@ -36,17 +36,17 @@ entry:
%retval = alloca i32, align 4 ; <i32*> [#uses=1]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
%tmp = call i32 @foo( ) ; <i32> [#uses=0]
- %tmp1 = getelementptr %struct.anon* @t, i32 0, i32 1 ; <float*> [#uses=1]
- %tmp2 = load float* %tmp1, align 1 ; <float> [#uses=1]
+ %tmp1 = getelementptr %struct.anon, %struct.anon* @t, i32 0, i32 1 ; <float*> [#uses=1]
+ %tmp2 = load float, float* %tmp1, align 1 ; <float> [#uses=1]
%tmp23 = fpext float %tmp2 to double ; <double> [#uses=1]
- %tmp4 = getelementptr <{ i8, double }>* @v, i32 0, i32 1 ; <double*> [#uses=1]
- %tmp5 = load double* %tmp4, align 1 ; <double> [#uses=1]
- %tmp6 = getelementptr [8 x i8]* @.str, i32 0, i32 0 ; <i8*> [#uses=1]
- %tmp7 = call i32 (i8*, ...)* @printf( i8* %tmp6, double %tmp23, double %tmp5 ) ; <i32> [#uses=0]
+ %tmp4 = getelementptr <{ i8, double }>, <{ i8, double }>* @v, i32 0, i32 1 ; <double*> [#uses=1]
+ %tmp5 = load double, double* %tmp4, align 1 ; <double> [#uses=1]
+ %tmp6 = getelementptr [8 x i8], [8 x i8]* @.str, i32 0, i32 0 ; <i8*> [#uses=1]
+ %tmp7 = call i32 (i8*, ...) @printf( i8* %tmp6, double %tmp23, double %tmp5 ) ; <i32> [#uses=0]
br label %return
return: ; preds = %entry
- %retval8 = load i32* %retval ; <i32> [#uses=1]
+ %retval8 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %retval8
}
diff --git a/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll b/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll
index 84fadd1b0461..07b1f8d7698a 100644
--- a/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll
+++ b/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll
@@ -10,9 +10,9 @@ entry:
cond_true: ; preds = %entry
%tmp89 = bitcast float* %res to <4 x i32>* ; <<4 x i32>*> [#uses=1]
%tmp1011 = bitcast float* %argA to <4 x i32>* ; <<4 x i32>*> [#uses=1]
- %tmp14 = load <4 x i32>* %tmp1011, align 16 ; <<4 x i32>> [#uses=1]
+ %tmp14 = load <4 x i32>, <4 x i32>* %tmp1011, align 16 ; <<4 x i32>> [#uses=1]
%tmp1516 = bitcast float* %argB to <4 x i32>* ; <<4 x i32>*> [#uses=1]
- %tmp18 = load <4 x i32>* %tmp1516, align 16 ; <<4 x i32>> [#uses=1]
+ %tmp18 = load <4 x i32>, <4 x i32>* %tmp1516, align 16 ; <<4 x i32>> [#uses=1]
%tmp19 = sdiv <4 x i32> %tmp14, %tmp18 ; <<4 x i32>> [#uses=1]
store <4 x i32> %tmp19, <4 x i32>* %tmp89, align 16
ret void
diff --git a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
index a60d11c85c55..13b9be31b69b 100644
--- a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
+++ b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
@@ -17,11 +17,11 @@
define %struct.NSManagedObjectContext* @"+[ListGenerator(Private) managedObjectContextWithModelURL:storeURL:]"(%struct.objc_object* %self, %struct._message_ref_t* %_cmd, %struct.NSURL* %modelURL, %struct.NSURL* %storeURL) {
entry:
%storeCoordinator = alloca %struct.NSPersistentStoreCoordinator* ; <%struct.NSPersistentStoreCoordinator**> [#uses=0]
- %tmp29 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2" ) ; <%struct.objc_object*> [#uses=0]
- %tmp34 = load %struct.NSString** @NSXMLStoreType, align 8 ; <%struct.NSString*> [#uses=1]
- %tmp37 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_5", i32 0, i32 0), align 8 ; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
- %tmp42 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 1 ) ; <%struct.objc_object*> [#uses=1]
- %tmp45 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* %tmp37( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_5", %struct.objc_object* %tmp42, %struct.NSString* null ) ; <%struct.objc_object*> [#uses=1]
- %tmp48 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", %struct.NSString* %tmp34, i8* null, %struct.NSURL* null, %struct.objc_object* %tmp45, %struct.NSError** null ) ; <%struct.objc_object*> [#uses=0]
+ %tmp29 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...) null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2" ) ; <%struct.objc_object*> [#uses=0]
+ %tmp34 = load %struct.NSString*, %struct.NSString** @NSXMLStoreType, align 8 ; <%struct.NSString*> [#uses=1]
+ %tmp37 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*, %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_5", i32 0, i32 0), align 8 ; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
+ %tmp42 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...) null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 1 ) ; <%struct.objc_object*> [#uses=1]
+ %tmp45 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...) %tmp37( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_5", %struct.objc_object* %tmp42, %struct.NSString* null ) ; <%struct.objc_object*> [#uses=1]
+ %tmp48 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...) null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", %struct.NSString* %tmp34, i8* null, %struct.NSURL* null, %struct.objc_object* %tmp45, %struct.NSError** null ) ; <%struct.objc_object*> [#uses=0]
unreachable
}
diff --git a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
index 3d1a328ec3c1..ff5f835fd531 100644
--- a/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
+++ b/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
@@ -14,12 +14,12 @@
define %struct.NSManagedObjectContext* @"+[ListGenerator(Private) managedObjectContextWithModelURL:storeURL:]"(%struct.objc_object* %self, %struct._message_ref_t* %_cmd, %struct.NSURL* %modelURL, %struct.NSURL* %storeURL) {
entry:
- %tmp27 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2", i32 0, i32 0), align 8 ; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
- %tmp29 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* %tmp27( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2" ) ; <%struct.objc_object*> [#uses=0]
- %tmp33 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", i32 0, i32 0), align 8 ; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
- %tmp34 = load %struct.NSString** @NSXMLStoreType, align 8 ; <%struct.NSString*> [#uses=1]
- %tmp40 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 0, i32 0), align 8 ; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
- %tmp42 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* %tmp40( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 1 ) ; <%struct.objc_object*> [#uses=0]
- %tmp48 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* %tmp33( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", %struct.NSString* %tmp34, i8* null, %struct.NSURL* null, %struct.objc_object* null, %struct.NSError** null ) ; <%struct.objc_object*> [#uses=0]
+ %tmp27 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*, %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2", i32 0, i32 0), align 8 ; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
+ %tmp29 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...) %tmp27( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2" ) ; <%struct.objc_object*> [#uses=0]
+ %tmp33 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*, %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", i32 0, i32 0), align 8 ; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
+ %tmp34 = load %struct.NSString*, %struct.NSString** @NSXMLStoreType, align 8 ; <%struct.NSString*> [#uses=1]
+ %tmp40 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*, %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 0, i32 0), align 8 ; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
+ %tmp42 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...) %tmp40( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 1 ) ; <%struct.objc_object*> [#uses=0]
+ %tmp48 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...) %tmp33( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", %struct.NSString* %tmp34, i8* null, %struct.NSURL* null, %struct.objc_object* null, %struct.NSError** null ) ; <%struct.objc_object*> [#uses=0]
unreachable
}
diff --git a/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll b/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
index df83f8b191c6..34122912349b 100644
--- a/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
+++ b/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
@@ -29,7 +29,7 @@ entry:
to label %bb30.preheader unwind label %unwind
bb30.preheader: ; preds = %entry
- %tmp26 = getelementptr %struct.Range* %effectiveRange, i64 0, i32 1 ; <i64*> [#uses=1]
+ %tmp26 = getelementptr %struct.Range, %struct.Range* %effectiveRange, i64 0, i32 1 ; <i64*> [#uses=1]
br label %bb30
unwind: ; preds = %cond_true, %entry
@@ -39,7 +39,7 @@ unwind: ; preds = %cond_true, %entry
resume { i8*, i32 } %exn
invcont23: ; preds = %cond_true
- %tmp27 = load i64* %tmp26, align 8 ; <i64> [#uses=1]
+ %tmp27 = load i64, i64* %tmp26, align 8 ; <i64> [#uses=1]
%tmp28 = sub i64 %range_addr.1.0, %tmp27 ; <i64> [#uses=1]
br label %bb30
diff --git a/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll b/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
index d1f028586160..4830ca60f9ff 100644
--- a/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
+++ b/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
@@ -6,7 +6,7 @@
define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>*
%CONST) {
entry:
- %input2 = load <4 x float>* null, align 16 ; <<4 x float>>
+ %input2 = load <4 x float>, <4 x float>* null, align 16 ; <<4 x float>>
%shuffle7 = shufflevector <4 x float> %input2, <4 x float> < float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 2, i32 2, i32 2, i32 2 > ; <<4 x float>> [#uses=1]
%mul1 = fmul <4 x float> %shuffle7, zeroinitializer ; <<4 x
diff --git a/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll b/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll
index 791e9e610655..7ed7b9b36687 100644
--- a/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll
+++ b/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll
@@ -44,7 +44,7 @@ bb103.preheader: ; preds = %bb113
bb113: ; preds = %bb113, %bb93, %bb82, %bb52, %entry
%fingerprint_addr.0.reg2mem.9 = phi i64 [ 0, %entry ], [ 0, %bb52 ], [ 0, %bb82 ], [ 0, %bb93 ], [ %tmp118, %bb113 ] ; <i64> [#uses=1]
- tail call void @_Z28report_should_not_reach_herePKci( i8* getelementptr ([44 x i8]* @.str, i32 0, i32 0), i32 817 ) nounwind
+ tail call void @_Z28report_should_not_reach_herePKci( i8* getelementptr ([44 x i8], [44 x i8]* @.str, i32 0, i32 0), i32 817 ) nounwind
%tmp118 = lshr i64 %fingerprint_addr.0.reg2mem.9, 4 ; <i64> [#uses=2]
%tmp21158 = and i64 %tmp118, 15 ; <i64> [#uses=1]
switch i64 %tmp21158, label %bb113 [
diff --git a/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll b/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
index e28a3e04cf1b..73a804bf02b2 100644
--- a/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
+++ b/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
@@ -2,7 +2,7 @@
define i32 @bork(i64 %foo, i64 %bar) {
entry:
- %tmp = load i64* null, align 8 ; <i64> [#uses=2]
+ %tmp = load i64, i64* null, align 8 ; <i64> [#uses=2]
%tmp2 = icmp ule i64 %tmp, 0 ; <i1> [#uses=1]
%min = select i1 %tmp2, i64 %tmp, i64 0 ; <i64> [#uses=1]
store i64 %min, i64* null, align 8
diff --git a/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll b/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
index d10291e190b9..863b02528e1e 100644
--- a/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
+++ b/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
@@ -6,7 +6,7 @@ define void @foo(i8* %pp) nounwind {
entry:
%tmp2 = tail call i8* @bar( i32 14 ) nounwind ; <i8*> [#uses=0]
%tmp28 = bitcast i8* %pp to void ()** ; <void ()**> [#uses=1]
- %tmp38 = load void ()** %tmp28, align 4 ; <void ()*> [#uses=2]
+ %tmp38 = load void ()*, void ()** %tmp28, align 4 ; <void ()*> [#uses=2]
br i1 false, label %bb34, label %bb25
bb25: ; preds = %entry
%tmp30 = bitcast void ()* %tmp38 to void (i8*)* ; <void (i8*)*> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll b/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
index fb8cdcea63aa..dc9734f2f734 100644
--- a/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
+++ b/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
@@ -7,7 +7,7 @@ declare fastcc void @emit_numeric_escape(i32, i32, %struct._cpp_strbuf*, i32) no
define i32 @cpp_interpret_string(i32 %pfile, %struct.cpp_string* %from, i32 %wide) nounwind {
entry:
- %tmp61 = load i32* null, align 4 ; <i32> [#uses=1]
+ %tmp61 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%toBool = icmp eq i32 %wide, 0 ; <i1> [#uses=2]
%iftmp.87.0 = select i1 %toBool, i32 %tmp61, i32 0 ; <i32> [#uses=2]
%tmp69 = icmp ult i32 %iftmp.87.0, 33 ; <i1> [#uses=1]
@@ -23,7 +23,7 @@ bb94: ; preds = %bb79
bb103: ; preds = %bb79
ret i32 0
bb130.preheader: ; preds = %bb94
- %tmp134 = getelementptr %struct.cpp_string* %from, i32 0, i32 1 ; <i8**> [#uses=0]
+ %tmp134 = getelementptr %struct.cpp_string, %struct.cpp_string* %from, i32 0, i32 1 ; <i8**> [#uses=0]
ret i32 0
bb729: ; preds = %bb94
call fastcc void @emit_numeric_escape( i32 %pfile, i32 0, %struct._cpp_strbuf* null, i32 %wide ) nounwind
diff --git a/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll b/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll
index a8fef05b1ad8..1191748b87d4 100644
--- a/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll
+++ b/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll
@@ -5,8 +5,8 @@ entry:
%tmp2627 = ptrtoint i8* %rec to i64 ; <i64> [#uses=2]
%tmp28 = and i64 %tmp2627, -16384 ; <i64> [#uses=2]
%tmp2829 = inttoptr i64 %tmp28 to i8* ; <i8*> [#uses=1]
- %tmp37 = getelementptr i8* %tmp2829, i64 42 ; <i8*> [#uses=1]
- %tmp40 = load i8* %tmp37, align 1 ; <i8> [#uses=1]
+ %tmp37 = getelementptr i8, i8* %tmp2829, i64 42 ; <i8*> [#uses=1]
+ %tmp40 = load i8, i8* %tmp37, align 1 ; <i8> [#uses=1]
%tmp4041 = zext i8 %tmp40 to i64 ; <i64> [#uses=1]
%tmp42 = shl i64 %tmp4041, 8 ; <i64> [#uses=1]
%tmp47 = add i64 %tmp42, 0 ; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll b/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll
index 8e5bf567b126..908a2a803b2b 100644
--- a/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll
+++ b/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll
@@ -2,7 +2,7 @@
define i32 @t(i64 %byteStart, i32 %activeIndex) nounwind {
entry:
- %tmp50 = load i32* null, align 4 ; <i32> [#uses=1]
+ %tmp50 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%tmp5051 = zext i32 %tmp50 to i64 ; <i64> [#uses=3]
%tmp53 = udiv i64 %byteStart, %tmp5051 ; <i64> [#uses=1]
%tmp5354 = trunc i64 %tmp53 to i32 ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll b/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
index e7a1cf69c693..45d43997fefb 100644
--- a/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
+++ b/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
@@ -24,7 +24,7 @@ bb: ; preds = %entry
bb31: ; preds = %_Z24unlock_then_erase_sectory.exit, %bb
%Pos.0.reg2mem.0 = phi i64 [ %tmp93, %_Z24unlock_then_erase_sectory.exit ], [ %Offset, %bb ] ; <i64> [#uses=3]
- %tmp35 = load i16* @_ZL10DeviceCode, align 2 ; <i16> [#uses=1]
+ %tmp35 = load i16, i16* @_ZL10DeviceCode, align 2 ; <i16> [#uses=1]
%tmp3536 = zext i16 %tmp35 to i32 ; <i32> [#uses=2]
%tmp37 = and i32 %tmp3536, 65520 ; <i32> [#uses=1]
%tmp38 = icmp eq i32 %tmp37, 35008 ; <i1> [#uses=1]
@@ -39,11 +39,11 @@ bb41: ; preds = %bb31
ret i32 0
bb68: ; preds = %bb31
- tail call void (i8*, ...)* @IOLog( i8* getelementptr ([68 x i8]* @.str34, i32 0, i32 0), i64 %tmp34, i64 0, i32 131072 ) nounwind
+ tail call void (i8*, ...) @IOLog( i8* getelementptr ([68 x i8], [68 x i8]* @.str34, i32 0, i32 0), i64 %tmp34, i64 0, i32 131072 ) nounwind
%tmp2021.i = trunc i64 %Pos.0.reg2mem.0 to i32 ; <i32> [#uses=1]
%tmp202122.i = inttoptr i32 %tmp2021.i to i8* ; <i8*> [#uses=1]
tail call void @IODelay( i32 500 ) nounwind
- %tmp53.i = load volatile i16* null, align 2 ; <i16> [#uses=2]
+ %tmp53.i = load volatile i16, i16* null, align 2 ; <i16> [#uses=2]
%tmp5455.i = zext i16 %tmp53.i to i32 ; <i32> [#uses=1]
br i1 false, label %bb.i, label %bb65.i
@@ -55,7 +55,7 @@ bb65.i: ; preds = %bb68
br i1 %tmp67.i, label %_Z24unlock_then_erase_sectory.exit, label %bb70.i
bb70.i: ; preds = %bb65.i
- tail call void (i8*, ...)* @IOLog( i8* getelementptr ([64 x i8]* @.str19, i32 0, i32 0), i32 %tmp5455.i ) nounwind
+ tail call void (i8*, ...) @IOLog( i8* getelementptr ([64 x i8], [64 x i8]* @.str19, i32 0, i32 0), i32 %tmp5455.i ) nounwind
ret i32 0
_Z24unlock_then_erase_sectory.exit: ; preds = %bb65.i
@@ -66,15 +66,15 @@ _Z24unlock_then_erase_sectory.exit: ; preds = %bb65.i
br i1 %tmp100, label %bb31, label %bb103
bb103: ; preds = %_Z24unlock_then_erase_sectory.exit, %bb
- tail call void (i8*, ...)* @IOLog( i8* getelementptr ([37 x i8]* @.str35, i32 0, i32 0) ) nounwind
+ tail call void (i8*, ...) @IOLog( i8* getelementptr ([37 x i8], [37 x i8]* @.str35, i32 0, i32 0) ) nounwind
ret i32 0
bb107: ; preds = %entry
- tail call void (i8*, ...)* @IOLog( i8* getelementptr ([48 x i8]* @.str36, i32 0, i32 0) ) nounwind
+ tail call void (i8*, ...) @IOLog( i8* getelementptr ([48 x i8], [48 x i8]* @.str36, i32 0, i32 0) ) nounwind
%tmp114115 = bitcast i8* %buffer to i16* ; <i16*> [#uses=1]
%tmp256 = lshr i64 %bufferSize, 1 ; <i64> [#uses=1]
%tmp256257 = trunc i64 %tmp256 to i32 ; <i32> [#uses=1]
- %tmp258 = getelementptr i16* %tmp114115, i32 %tmp256257 ; <i16*> [#uses=0]
+ %tmp258 = getelementptr i16, i16* %tmp114115, i32 %tmp256257 ; <i16*> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll b/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll
index 862559b109cf..8ce1708acdbf 100644
--- a/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll
+++ b/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll
@@ -4,7 +4,7 @@
@h = external global ppc_fp128
define void @f() {
- %tmp = load ppc_fp128* @g
+ %tmp = load ppc_fp128, ppc_fp128* @g
store ppc_fp128 %tmp, ppc_fp128* @h
ret void
}
diff --git a/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll b/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll
index 83c5511878ca..db488ff36bfb 100644
--- a/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll
+++ b/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll
@@ -6,7 +6,7 @@ entry:
br i1 true, label %bb1, label %bb3
bb1:
- %tmp1 = load i8* null, align 1
+ %tmp1 = load i8, i8* null, align 1
%tmp2 = icmp eq i8 %tmp1, 0
br label %bb2
diff --git a/test/CodeGen/PowerPC/2008-07-15-Bswap.ll b/test/CodeGen/PowerPC/2008-07-15-Bswap.ll
index 4a834f93a205..b271048fd045 100644
--- a/test/CodeGen/PowerPC/2008-07-15-Bswap.ll
+++ b/test/CodeGen/PowerPC/2008-07-15-Bswap.ll
@@ -95,13 +95,13 @@ entry:
bb16: ; preds = %entry
bitcast %struct.PerMacroblockBoundaryStrengths* null to i32* ; <i32*>:1 [#uses=3]
- getelementptr i32* %1, i32 1 ; <i32*>:2 [#uses=0]
- getelementptr i32* %1, i32 2 ; <i32*>:3 [#uses=0]
- getelementptr i32* %1, i32 3 ; <i32*>:4 [#uses=0]
+ getelementptr i32, i32* %1, i32 1 ; <i32*>:2 [#uses=0]
+ getelementptr i32, i32* %1, i32 2 ; <i32*>:3 [#uses=0]
+ getelementptr i32, i32* %1, i32 3 ; <i32*>:4 [#uses=0]
bitcast [16 x i8]* null to i32* ; <i32*>:5 [#uses=3]
- getelementptr i32* %5, i32 1 ; <i32*>:6 [#uses=0]
- getelementptr i32* %5, i32 2 ; <i32*>:7 [#uses=0]
- getelementptr i32* %5, i32 3 ; <i32*>:8 [#uses=0]
+ getelementptr i32, i32* %5, i32 1 ; <i32*>:6 [#uses=0]
+ getelementptr i32, i32* %5, i32 2 ; <i32*>:7 [#uses=0]
+ getelementptr i32, i32* %5, i32 3 ; <i32*>:8 [#uses=0]
icmp eq i32 0, 0 ; <i1>:9 [#uses=0]
lshr i32 0, 30 ; <i32>:10 [#uses=0]
and i32 0, 268435455 ; <i32>:11 [#uses=0]
@@ -117,14 +117,14 @@ bb16: ; preds = %entry
%.not658 = icmp ne i32 0, 0 ; <i1> [#uses=1]
and i32 0, 268369920 ; <i32>:20 [#uses=1]
icmp eq i32 %20, 268369920 ; <i1>:21 [#uses=2]
- getelementptr %struct.PerMacroblockBoundaryStrengths* null, i32 0, i32 2 ; <[4 x i8]*>:22 [#uses=1]
- getelementptr %struct.PerMacroblockBoundaryStrengths* null, i32 0, i32 2, i32 0 ; <i8*>:23 [#uses=0]
+ getelementptr %struct.PerMacroblockBoundaryStrengths, %struct.PerMacroblockBoundaryStrengths* null, i32 0, i32 2 ; <[4 x i8]*>:22 [#uses=1]
+ getelementptr %struct.PerMacroblockBoundaryStrengths, %struct.PerMacroblockBoundaryStrengths* null, i32 0, i32 2, i32 0 ; <i8*>:23 [#uses=0]
and i32 0, -2 ; <i32>:24 [#uses=1]
add i32 %24, -1 ; <i32>:25 [#uses=0]
bitcast [4 x i8]* %22 to i32* ; <i32*>:26 [#uses=3]
- getelementptr i32* %26, i32 1 ; <i32*>:27 [#uses=0]
- getelementptr i32* %26, i32 2 ; <i32*>:28 [#uses=0]
- getelementptr i32* %26, i32 3 ; <i32*>:29 [#uses=0]
+ getelementptr i32, i32* %26, i32 1 ; <i32*>:27 [#uses=0]
+ getelementptr i32, i32* %26, i32 2 ; <i32*>:28 [#uses=0]
+ getelementptr i32, i32* %26, i32 3 ; <i32*>:29 [#uses=0]
br label %bb144
bb144: ; preds = %bb395, %bb16
@@ -136,8 +136,8 @@ bb144: ; preds = %bb395, %bb16
%boundaryStrengthsV.1771 = phi i8* [ null, %bb16 ], [ %158, %bb395 ] ; <i8*> [#uses=2]
%numEdgesToTest.1770 = phi i32 [ 4, %bb16 ], [ %numEdgesToTest.2, %bb395 ] ; <i32> [#uses=1]
icmp eq i32 %idxEachField11.0773, 0 ; <i1>:30 [#uses=0]
- getelementptr %struct.BiPartSrcDescriptor** null, i32 %mbIndexLeft.2772 ; <%struct.BiPartSrcDescriptor**>:31 [#uses=1]
- load %struct.BiPartSrcDescriptor** %31, align 4 ; <%struct.BiPartSrcDescriptor*>:32 [#uses=0]
+ getelementptr %struct.BiPartSrcDescriptor*, %struct.BiPartSrcDescriptor** null, i32 %mbIndexLeft.2772 ; <%struct.BiPartSrcDescriptor**>:31 [#uses=1]
+ load %struct.BiPartSrcDescriptor*, %struct.BiPartSrcDescriptor** %31, align 4 ; <%struct.BiPartSrcDescriptor*>:32 [#uses=0]
%fMacroblockHasNonZeroBS.4 = select i1 %21, i32 1, i32 0 ; <i32> [#uses=1]
%numEdgesToTest.2 = select i1 %21, i32 1, i32 %numEdgesToTest.1770 ; <i32> [#uses=2]
store i8 32, i8* %boundaryStrengthsV.1771, align 1
@@ -180,32 +180,32 @@ bb210.preheader: ; preds = %bb206
add i32 %52, %42 ; <i32>:53 [#uses=1]
mul i32 %51, 0 ; <i32>:54 [#uses=1]
add i32 %46, %54 ; <i32>:55 [#uses=1]
- getelementptr %struct.BiPartSrcDescriptor** null, i32 %53 ; <%struct.BiPartSrcDescriptor**>:56 [#uses=1]
- load %struct.BiPartSrcDescriptor** %56, align 4 ; <%struct.BiPartSrcDescriptor*>:57 [#uses=7]
- getelementptr %struct.BiPartSrcDescriptor** null, i32 %55 ; <%struct.BiPartSrcDescriptor**>:58 [#uses=1]
- load %struct.BiPartSrcDescriptor** %58, align 4 ; <%struct.BiPartSrcDescriptor*>:59 [#uses=5]
+ getelementptr %struct.BiPartSrcDescriptor*, %struct.BiPartSrcDescriptor** null, i32 %53 ; <%struct.BiPartSrcDescriptor**>:56 [#uses=1]
+ load %struct.BiPartSrcDescriptor*, %struct.BiPartSrcDescriptor** %56, align 4 ; <%struct.BiPartSrcDescriptor*>:57 [#uses=7]
+ getelementptr %struct.BiPartSrcDescriptor*, %struct.BiPartSrcDescriptor** null, i32 %55 ; <%struct.BiPartSrcDescriptor**>:58 [#uses=1]
+ load %struct.BiPartSrcDescriptor*, %struct.BiPartSrcDescriptor** %58, align 4 ; <%struct.BiPartSrcDescriptor*>:59 [#uses=5]
icmp slt i32 %159, 0 ; <i1>:60 [#uses=0]
icmp eq %struct.BiPartSrcDescriptor* %57, %59 ; <i1>:61 [#uses=0]
bitcast %struct.BiPartSrcDescriptor* %57 to i16* ; <i16*>:62 [#uses=5]
- load i16* %62, align 2 ; <i16>:63 [#uses=2]
- getelementptr i16* %62, i32 1 ; <i16*>:64 [#uses=1]
- load i16* %64, align 2 ; <i16>:65 [#uses=2]
- getelementptr i16* %62, i32 2 ; <i16*>:66 [#uses=1]
- load i16* %66, align 2 ; <i16>:67 [#uses=2]
- getelementptr i16* %62, i32 3 ; <i16*>:68 [#uses=1]
- load i16* %68, align 2 ; <i16>:69 [#uses=2]
- getelementptr i16* %62, i32 6 ; <i16*>:70 [#uses=1]
- load i16* %70, align 2 ; <i16>:71 [#uses=2]
+ load i16, i16* %62, align 2 ; <i16>:63 [#uses=2]
+ getelementptr i16, i16* %62, i32 1 ; <i16*>:64 [#uses=1]
+ load i16, i16* %64, align 2 ; <i16>:65 [#uses=2]
+ getelementptr i16, i16* %62, i32 2 ; <i16*>:66 [#uses=1]
+ load i16, i16* %66, align 2 ; <i16>:67 [#uses=2]
+ getelementptr i16, i16* %62, i32 3 ; <i16*>:68 [#uses=1]
+ load i16, i16* %68, align 2 ; <i16>:69 [#uses=2]
+ getelementptr i16, i16* %62, i32 6 ; <i16*>:70 [#uses=1]
+ load i16, i16* %70, align 2 ; <i16>:71 [#uses=2]
bitcast %struct.BiPartSrcDescriptor* %59 to i16* ; <i16*>:72 [#uses=5]
- load i16* %72, align 2 ; <i16>:73 [#uses=2]
- getelementptr i16* %72, i32 1 ; <i16*>:74 [#uses=1]
- load i16* %74, align 2 ; <i16>:75 [#uses=2]
- getelementptr i16* %72, i32 2 ; <i16*>:76 [#uses=1]
- load i16* %76, align 2 ; <i16>:77 [#uses=2]
- getelementptr i16* %72, i32 3 ; <i16*>:78 [#uses=1]
- load i16* %78, align 2 ; <i16>:79 [#uses=2]
- getelementptr i16* %72, i32 6 ; <i16*>:80 [#uses=1]
- load i16* %80, align 2 ; <i16>:81 [#uses=2]
+ load i16, i16* %72, align 2 ; <i16>:73 [#uses=2]
+ getelementptr i16, i16* %72, i32 1 ; <i16*>:74 [#uses=1]
+ load i16, i16* %74, align 2 ; <i16>:75 [#uses=2]
+ getelementptr i16, i16* %72, i32 2 ; <i16*>:76 [#uses=1]
+ load i16, i16* %76, align 2 ; <i16>:77 [#uses=2]
+ getelementptr i16, i16* %72, i32 3 ; <i16*>:78 [#uses=1]
+ load i16, i16* %78, align 2 ; <i16>:79 [#uses=2]
+ getelementptr i16, i16* %72, i32 6 ; <i16*>:80 [#uses=1]
+ load i16, i16* %80, align 2 ; <i16>:81 [#uses=2]
sub i16 %63, %73 ; <i16>:82 [#uses=3]
sub i16 %65, %75 ; <i16>:83 [#uses=3]
sub i16 %67, %77 ; <i16>:84 [#uses=3]
@@ -226,23 +226,23 @@ bb210.preheader: ; preds = %bb206
sub i16 0, %86 ; <i16>:95 [#uses=1]
icmp slt i16 %86, 0 ; <i1>:96 [#uses=1]
%.663 = select i1 %96, i16 %95, i16 %86 ; <i16> [#uses=1]
- getelementptr %struct.BiPartSrcDescriptor* %57, i32 0, i32 0, i32 0, i32 1, i32 0 ; <i8*>:97 [#uses=1]
- load i8* %97, align 1 ; <i8>:98 [#uses=1]
+ getelementptr %struct.BiPartSrcDescriptor, %struct.BiPartSrcDescriptor* %57, i32 0, i32 0, i32 0, i32 1, i32 0 ; <i8*>:97 [#uses=1]
+ load i8, i8* %97, align 1 ; <i8>:98 [#uses=1]
zext i8 %98 to i32 ; <i32>:99 [#uses=1]
- getelementptr %struct.BiPartSrcDescriptor* %57, i32 0, i32 0, i32 0, i32 1, i32 1 ; <i8*>:100 [#uses=1]
- load i8* %100, align 1 ; <i8>:101 [#uses=1]
+ getelementptr %struct.BiPartSrcDescriptor, %struct.BiPartSrcDescriptor* %57, i32 0, i32 0, i32 0, i32 1, i32 1 ; <i8*>:100 [#uses=1]
+ load i8, i8* %100, align 1 ; <i8>:101 [#uses=1]
zext i8 %101 to i32 ; <i32>:102 [#uses=1]
- getelementptr %struct.BiPartSrcDescriptor* %57, i32 0, i32 0, i32 0, i32 3, i32 0 ; <i8*>:103 [#uses=1]
- load i8* %103, align 1 ; <i8>:104 [#uses=2]
+ getelementptr %struct.BiPartSrcDescriptor, %struct.BiPartSrcDescriptor* %57, i32 0, i32 0, i32 0, i32 3, i32 0 ; <i8*>:103 [#uses=1]
+ load i8, i8* %103, align 1 ; <i8>:104 [#uses=2]
zext i8 %104 to i32 ; <i32>:105 [#uses=1]
- getelementptr %struct.BiPartSrcDescriptor* %59, i32 0, i32 0, i32 0, i32 3, i32 0 ; <i8*>:106 [#uses=1]
- load i8* %106, align 1 ; <i8>:107 [#uses=2]
+ getelementptr %struct.BiPartSrcDescriptor, %struct.BiPartSrcDescriptor* %59, i32 0, i32 0, i32 0, i32 3, i32 0 ; <i8*>:106 [#uses=1]
+ load i8, i8* %106, align 1 ; <i8>:107 [#uses=2]
zext i8 %107 to i32 ; <i32>:108 [#uses=1]
- getelementptr %struct.BiPartSrcDescriptor* %57, i32 0, i32 0, i32 0, i32 3, i32 1 ; <i8*>:109 [#uses=1]
- load i8* %109, align 1 ; <i8>:110 [#uses=1]
+ getelementptr %struct.BiPartSrcDescriptor, %struct.BiPartSrcDescriptor* %57, i32 0, i32 0, i32 0, i32 3, i32 1 ; <i8*>:109 [#uses=1]
+ load i8, i8* %109, align 1 ; <i8>:110 [#uses=1]
zext i8 %110 to i32 ; <i32>:111 [#uses=1]
- getelementptr %struct.BiPartSrcDescriptor* %59, i32 0, i32 0, i32 0, i32 3, i32 1 ; <i8*>:112 [#uses=1]
- load i8* %112, align 1 ; <i8>:113 [#uses=1]
+ getelementptr %struct.BiPartSrcDescriptor, %struct.BiPartSrcDescriptor* %59, i32 0, i32 0, i32 0, i32 3, i32 1 ; <i8*>:112 [#uses=1]
+ load i8, i8* %112, align 1 ; <i8>:113 [#uses=1]
zext i8 %113 to i32 ; <i32>:114 [#uses=1]
lshr i32 %99, 4 ; <i32>:115 [#uses=1]
and i32 %115, 2 ; <i32>:116 [#uses=1]
@@ -322,13 +322,13 @@ labelContinueEdgesLoopV: ; preds = %bb206, %bb205, %bb144
%bfNZ12.2 = phi i32 [ %159, %bb205 ], [ 0, %bb144 ], [ %159, %bb206 ] ; <i32> [#uses=1]
%boundaryStrengthsV.3 = phi i8* [ %158, %bb205 ], [ %boundaryStrengthsV.1771, %bb144 ], [ %158, %bb206 ] ; <i8*> [#uses=3]
or i32 %fMacroblockHasNonZeroBS.6, %fEdgeHasNonZeroBS.0 ; <i32>:152 [#uses=2]
- load i8* %boundaryStrengthsV.3, align 1 ; <i8>:153 [#uses=1]
+ load i8, i8* %boundaryStrengthsV.3, align 1 ; <i8>:153 [#uses=1]
trunc i32 %fEdgeHasNonZeroBS.0 to i8 ; <i8>:154 [#uses=1]
shl i8 %154, 5 ; <i8>:155 [#uses=1]
xor i8 %155, 32 ; <i8>:156 [#uses=1]
or i8 %153, %156 ; <i8>:157 [#uses=1]
store i8 %157, i8* %boundaryStrengthsV.3, align 1
- getelementptr i8* %boundaryStrengthsV.3, i32 4 ; <i8*>:158 [#uses=4]
+ getelementptr i8, i8* %boundaryStrengthsV.3, i32 4 ; <i8*>:158 [#uses=4]
shl i32 %bfNZ12.2, 4 ; <i32>:159 [#uses=4]
add i32 %ixEdge.1, 1 ; <i32>:160 [#uses=6]
icmp ult i32 %160, %numEdgesToTest.2 ; <i1>:161 [#uses=1]
diff --git a/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll b/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
index 21b0c619e111..53639e7ceb04 100644
--- a/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
+++ b/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
@@ -4,7 +4,7 @@ target triple = "powerpc-apple-darwin9"
define signext i16 @t(i16* %dct) nounwind {
entry:
- load i16* null, align 2 ; <i16>:0 [#uses=2]
+ load i16, i16* null, align 2 ; <i16>:0 [#uses=2]
lshr i16 %0, 11 ; <i16>:1 [#uses=0]
trunc i16 %0 to i8 ; <i8>:2 [#uses=1]
sext i8 %2 to i16 ; <i16>:3 [#uses=1]
diff --git a/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll b/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll
index c9c05e1cc363..ee3d0f4ea46c 100644
--- a/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll
+++ b/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll
@@ -4,7 +4,7 @@
define void @llvm_static_func(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, i32 %a10, i32 %a11, i32 %a12, i32 %a13, i32 %a14, i32 %a15) nounwind {
entry:
- tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i64 0), i32 %a8 ) nounwind ; <i32>:0 [#uses=0]
+ tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i64 0), i32 %a8 ) nounwind ; <i32>:0 [#uses=0]
ret void
}
diff --git a/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll b/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
index 97844dd7486a..b107600de135 100644
--- a/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
+++ b/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
@@ -10,25 +10,25 @@
define void @lb(%struct.CGLSI* %src, i32 %n, %struct.CGLDI* %dst) nounwind {
entry:
- %0 = load i32* null, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%1 = icmp sgt i32 %0, 0 ; <i1> [#uses=1]
br i1 %1, label %bb.nph4945, label %return
bb.nph4945: ; preds = %entry
%2 = bitcast [2 x %struct.vv_t]* null to i64* ; <i64*> [#uses=6]
- %3 = getelementptr [2 x i64]* null, i32 0, i32 1 ; <i64*> [#uses=6]
+ %3 = getelementptr [2 x i64], [2 x i64]* null, i32 0, i32 1 ; <i64*> [#uses=6]
%4 = bitcast %struct.vv_t* null to i64* ; <i64*> [#uses=5]
- %5 = getelementptr [2 x i64]* null, i32 0, i32 1 ; <i64*> [#uses=3]
+ %5 = getelementptr [2 x i64], [2 x i64]* null, i32 0, i32 1 ; <i64*> [#uses=3]
br label %bb2326
bb2217: ; preds = %bb2326
%6 = or i64 0, 0 ; <i64> [#uses=2]
%7 = fptosi float 0.000000e+00 to i32 ; <i32> [#uses=1]
%8 = fptosi float 0.000000e+00 to i32 ; <i32> [#uses=1]
- %9 = getelementptr float* null, i32 2 ; <float*> [#uses=1]
- %10 = load float* %9, align 4 ; <float> [#uses=1]
- %11 = getelementptr float* null, i32 3 ; <float*> [#uses=1]
- %12 = load float* %11, align 4 ; <float> [#uses=1]
+ %9 = getelementptr float, float* null, i32 2 ; <float*> [#uses=1]
+ %10 = load float, float* %9, align 4 ; <float> [#uses=1]
+ %11 = getelementptr float, float* null, i32 3 ; <float*> [#uses=1]
+ %12 = load float, float* %11, align 4 ; <float> [#uses=1]
%13 = fmul float %10, 6.553500e+04 ; <float> [#uses=1]
%14 = fadd float %13, 5.000000e-01 ; <float> [#uses=1]
%15 = fmul float %12, 6.553500e+04 ; <float> [#uses=1]
@@ -63,11 +63,11 @@ bb2265: ; preds = %bb2264, %bb2262, %bb2217
%34 = and i64 %33, 281470681743360 ; <i64> [#uses=1]
store i64 %6, i64* %2, align 16
store i64 %31, i64* %3, align 8
- %35 = getelementptr i8* null, i32 0 ; <i8*> [#uses=1]
+ %35 = getelementptr i8, i8* null, i32 0 ; <i8*> [#uses=1]
%36 = bitcast i8* %35 to float* ; <float*> [#uses=4]
- %37 = load float* %36, align 4 ; <float> [#uses=1]
- %38 = getelementptr float* %36, i32 1 ; <float*> [#uses=1]
- %39 = load float* %38, align 4 ; <float> [#uses=1]
+ %37 = load float, float* %36, align 4 ; <float> [#uses=1]
+ %38 = getelementptr float, float* %36, i32 1 ; <float*> [#uses=1]
+ %39 = load float, float* %38, align 4 ; <float> [#uses=1]
%40 = fmul float %37, 6.553500e+04 ; <float> [#uses=1]
%41 = fadd float %40, 5.000000e-01 ; <float> [#uses=1]
%42 = fmul float %39, 6.553500e+04 ; <float> [#uses=1]
@@ -84,10 +84,10 @@ bb2277: ; preds = %bb2274, %bb2265
%f1582.0 = phi float [ 0.000000e+00, %bb2265 ], [ %43, %bb2274 ] ; <float> [#uses=1]
%47 = fptosi float 0.000000e+00 to i32 ; <i32> [#uses=1]
%48 = fptosi float %f1582.0 to i32 ; <i32> [#uses=1]
- %49 = getelementptr float* %36, i32 2 ; <float*> [#uses=1]
- %50 = load float* %49, align 4 ; <float> [#uses=1]
- %51 = getelementptr float* %36, i32 3 ; <float*> [#uses=1]
- %52 = load float* %51, align 4 ; <float> [#uses=1]
+ %49 = getelementptr float, float* %36, i32 2 ; <float*> [#uses=1]
+ %50 = load float, float* %49, align 4 ; <float> [#uses=1]
+ %51 = getelementptr float, float* %36, i32 3 ; <float*> [#uses=1]
+ %52 = load float, float* %51, align 4 ; <float> [#uses=1]
%53 = fmul float %50, 6.553500e+04 ; <float> [#uses=1]
%54 = fadd float %53, 5.000000e-01 ; <float> [#uses=1]
%55 = fmul float %52, 6.553500e+04 ; <float> [#uses=1]
@@ -106,11 +106,11 @@ bb2277: ; preds = %bb2274, %bb2265
%68 = or i64 %64, %62 ; <i64> [#uses=1]
%69 = or i64 %68, %66 ; <i64> [#uses=1]
%70 = or i64 %69, %67 ; <i64> [#uses=2]
- %71 = getelementptr i8* null, i32 0 ; <i8*> [#uses=1]
+ %71 = getelementptr i8, i8* null, i32 0 ; <i8*> [#uses=1]
%72 = bitcast i8* %71 to float* ; <float*> [#uses=4]
- %73 = load float* %72, align 4 ; <float> [#uses=1]
- %74 = getelementptr float* %72, i32 1 ; <float*> [#uses=1]
- %75 = load float* %74, align 4 ; <float> [#uses=1]
+ %73 = load float, float* %72, align 4 ; <float> [#uses=1]
+ %74 = getelementptr float, float* %72, i32 1 ; <float*> [#uses=1]
+ %75 = load float, float* %74, align 4 ; <float> [#uses=1]
%76 = fmul float %73, 6.553500e+04 ; <float> [#uses=1]
%77 = fadd float %76, 5.000000e-01 ; <float> [#uses=3]
%78 = fmul float %75, 6.553500e+04 ; <float> [#uses=1]
@@ -130,10 +130,10 @@ bb2295: ; preds = %bb2294, %bb2292, %bb2277
%82 = fcmp olt float %79, 0.000000e+00 ; <i1> [#uses=0]
%83 = fptosi float %f0569.0 to i32 ; <i32> [#uses=1]
%84 = fptosi float 0.000000e+00 to i32 ; <i32> [#uses=1]
- %85 = getelementptr float* %72, i32 2 ; <float*> [#uses=1]
- %86 = load float* %85, align 4 ; <float> [#uses=1]
- %87 = getelementptr float* %72, i32 3 ; <float*> [#uses=1]
- %88 = load float* %87, align 4 ; <float> [#uses=1]
+ %85 = getelementptr float, float* %72, i32 2 ; <float*> [#uses=1]
+ %86 = load float, float* %85, align 4 ; <float> [#uses=1]
+ %87 = getelementptr float, float* %72, i32 3 ; <float*> [#uses=1]
+ %88 = load float, float* %87, align 4 ; <float> [#uses=1]
%89 = fmul float %86, 6.553500e+04 ; <float> [#uses=1]
%90 = fadd float %89, 5.000000e-01 ; <float> [#uses=1]
%91 = fmul float %88, 6.553500e+04 ; <float> [#uses=1]
@@ -168,62 +168,62 @@ bb2315: ; preds = %bb2295
br i1 %114, label %bb2318, label %bb2317
bb2317: ; preds = %bb2315
- %115 = load i64* %2, align 16 ; <i64> [#uses=1]
- %116 = call i32 (...)* @_u16a_cm( i64 %115, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind ; <i32> [#uses=1]
+ %115 = load i64, i64* %2, align 16 ; <i64> [#uses=1]
+ %116 = call i32 (...) @_u16a_cm( i64 %115, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind ; <i32> [#uses=1]
%117 = sext i32 %116 to i64 ; <i64> [#uses=1]
store i64 %117, i64* %2, align 16
- %118 = load i64* %3, align 8 ; <i64> [#uses=1]
- %119 = call i32 (...)* @_u16a_cm( i64 %118, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind ; <i32> [#uses=1]
+ %118 = load i64, i64* %3, align 8 ; <i64> [#uses=1]
+ %119 = call i32 (...) @_u16a_cm( i64 %118, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind ; <i32> [#uses=1]
%120 = sext i32 %119 to i64 ; <i64> [#uses=1]
store i64 %120, i64* %3, align 8
- %121 = load i64* %4, align 16 ; <i64> [#uses=1]
- %122 = call i32 (...)* @_u16a_cm( i64 %121, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind ; <i32> [#uses=1]
+ %121 = load i64, i64* %4, align 16 ; <i64> [#uses=1]
+ %122 = call i32 (...) @_u16a_cm( i64 %121, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind ; <i32> [#uses=1]
%123 = sext i32 %122 to i64 ; <i64> [#uses=1]
store i64 %123, i64* %4, align 16
- %124 = load i64* %5, align 8 ; <i64> [#uses=1]
- %125 = call i32 (...)* @_u16a_cm( i64 %124, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind ; <i32> [#uses=0]
+ %124 = load i64, i64* %5, align 8 ; <i64> [#uses=1]
+ %125 = call i32 (...) @_u16a_cm( i64 %124, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind ; <i32> [#uses=0]
unreachable
bb2318: ; preds = %bb2315
- %126 = getelementptr %struct.CGLSI* %src, i32 %indvar5021, i32 8 ; <%struct.vv_t*> [#uses=1]
+ %126 = getelementptr %struct.CGLSI, %struct.CGLSI* %src, i32 %indvar5021, i32 8 ; <%struct.vv_t*> [#uses=1]
%127 = bitcast %struct.vv_t* %126 to i64* ; <i64*> [#uses=1]
- %128 = load i64* %127, align 8 ; <i64> [#uses=1]
+ %128 = load i64, i64* %127, align 8 ; <i64> [#uses=1]
%129 = trunc i64 %128 to i32 ; <i32> [#uses=4]
- %130 = load i64* %2, align 16 ; <i64> [#uses=1]
- %131 = call i32 (...)* @_u16_ff( i64 %130, i32 %129 ) nounwind ; <i32> [#uses=1]
+ %130 = load i64, i64* %2, align 16 ; <i64> [#uses=1]
+ %131 = call i32 (...) @_u16_ff( i64 %130, i32 %129 ) nounwind ; <i32> [#uses=1]
%132 = sext i32 %131 to i64 ; <i64> [#uses=1]
store i64 %132, i64* %2, align 16
- %133 = load i64* %3, align 8 ; <i64> [#uses=1]
- %134 = call i32 (...)* @_u16_ff( i64 %133, i32 %129 ) nounwind ; <i32> [#uses=1]
+ %133 = load i64, i64* %3, align 8 ; <i64> [#uses=1]
+ %134 = call i32 (...) @_u16_ff( i64 %133, i32 %129 ) nounwind ; <i32> [#uses=1]
%135 = sext i32 %134 to i64 ; <i64> [#uses=1]
store i64 %135, i64* %3, align 8
- %136 = load i64* %4, align 16 ; <i64> [#uses=1]
- %137 = call i32 (...)* @_u16_ff( i64 %136, i32 %129 ) nounwind ; <i32> [#uses=1]
+ %136 = load i64, i64* %4, align 16 ; <i64> [#uses=1]
+ %137 = call i32 (...) @_u16_ff( i64 %136, i32 %129 ) nounwind ; <i32> [#uses=1]
%138 = sext i32 %137 to i64 ; <i64> [#uses=1]
store i64 %138, i64* %4, align 16
- %139 = load i64* %5, align 8 ; <i64> [#uses=1]
- %140 = call i32 (...)* @_u16_ff( i64 %139, i32 %129 ) nounwind ; <i32> [#uses=0]
+ %139 = load i64, i64* %5, align 8 ; <i64> [#uses=1]
+ %140 = call i32 (...) @_u16_ff( i64 %139, i32 %129 ) nounwind ; <i32> [#uses=0]
unreachable
bb2319: ; preds = %bb2326
- %141 = getelementptr %struct.CGLSI* %src, i32 %indvar5021, i32 2 ; <i8**> [#uses=1]
- %142 = load i8** %141, align 4 ; <i8*> [#uses=4]
- %143 = getelementptr i8* %142, i32 0 ; <i8*> [#uses=1]
- %144 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %143 ) nounwind ; <i32> [#uses=1]
+ %141 = getelementptr %struct.CGLSI, %struct.CGLSI* %src, i32 %indvar5021, i32 2 ; <i8**> [#uses=1]
+ %142 = load i8*, i8** %141, align 4 ; <i8*> [#uses=4]
+ %143 = getelementptr i8, i8* %142, i32 0 ; <i8*> [#uses=1]
+ %144 = call i32 (...) @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %143 ) nounwind ; <i32> [#uses=1]
%145 = sext i32 %144 to i64 ; <i64> [#uses=2]
- %146 = getelementptr i8* %142, i32 0 ; <i8*> [#uses=1]
- %147 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %146 ) nounwind ; <i32> [#uses=1]
+ %146 = getelementptr i8, i8* %142, i32 0 ; <i8*> [#uses=1]
+ %147 = call i32 (...) @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %146 ) nounwind ; <i32> [#uses=1]
%148 = sext i32 %147 to i64 ; <i64> [#uses=2]
%149 = shl i64 %145, 48 ; <i64> [#uses=0]
%150 = shl i64 %148, 32 ; <i64> [#uses=1]
%151 = and i64 %150, 281470681743360 ; <i64> [#uses=0]
store i64 %145, i64* %2, align 16
store i64 %148, i64* %3, align 8
- %152 = getelementptr i8* %142, i32 0 ; <i8*> [#uses=1]
- %153 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %152 ) nounwind ; <i32> [#uses=1]
+ %152 = getelementptr i8, i8* %142, i32 0 ; <i8*> [#uses=1]
+ %153 = call i32 (...) @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %152 ) nounwind ; <i32> [#uses=1]
%154 = sext i32 %153 to i64 ; <i64> [#uses=0]
- %155 = getelementptr i8* %142, i32 0 ; <i8*> [#uses=1]
- %156 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %155 ) nounwind ; <i32> [#uses=0]
+ %155 = getelementptr i8, i8* %142, i32 0 ; <i8*> [#uses=1]
+ %156 = call i32 (...) @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %155 ) nounwind ; <i32> [#uses=0]
unreachable
bb2325: ; preds = %bb2326, %bb2295
@@ -233,10 +233,10 @@ bb2325: ; preds = %bb2326, %bb2295
bb2326: ; preds = %bb2325, %bb.nph4945
%indvar5021 = phi i32 [ 0, %bb.nph4945 ], [ %indvar.next5145, %bb2325 ] ; <i32> [#uses=6]
%157 = icmp slt i32 %indvar5021, %n ; <i1> [#uses=0]
- %158 = getelementptr %struct.CGLSI* %src, i32 %indvar5021, i32 10 ; <%struct.xx_t**> [#uses=1]
- %159 = load %struct.xx_t** %158, align 4 ; <%struct.xx_t*> [#uses=5]
- %160 = getelementptr %struct.CGLSI* %src, i32 %indvar5021, i32 1 ; <i32*> [#uses=1]
- %161 = load i32* %160, align 4 ; <i32> [#uses=1]
+ %158 = getelementptr %struct.CGLSI, %struct.CGLSI* %src, i32 %indvar5021, i32 10 ; <%struct.xx_t**> [#uses=1]
+ %159 = load %struct.xx_t*, %struct.xx_t** %158, align 4 ; <%struct.xx_t*> [#uses=5]
+ %160 = getelementptr %struct.CGLSI, %struct.CGLSI* %src, i32 %indvar5021, i32 1 ; <i32*> [#uses=1]
+ %161 = load i32, i32* %160, align 4 ; <i32> [#uses=1]
%162 = and i32 %161, 255 ; <i32> [#uses=1]
switch i32 %162, label %bb2325 [
i32 59, label %bb2217
diff --git a/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll b/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
index f474a6d7cc22..2372b2f0b9dd 100644
--- a/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
+++ b/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
@@ -2,7 +2,7 @@
define void @__divtc3({ ppc_fp128, ppc_fp128 }* noalias sret %agg.result, ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind {
entry:
- %imag59 = load ppc_fp128* null, align 8 ; <ppc_fp128> [#uses=1]
+ %imag59 = load ppc_fp128, ppc_fp128* null, align 8 ; <ppc_fp128> [#uses=1]
%0 = fmul ppc_fp128 0xM00000000000000000000000000000000, %imag59 ; <ppc_fp128> [#uses=1]
%1 = fmul ppc_fp128 0xM00000000000000000000000000000000, 0xM00000000000000000000000000000000 ; <ppc_fp128> [#uses=1]
%2 = fadd ppc_fp128 %0, %1 ; <ppc_fp128> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll b/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll
index 8322a8430815..fbe1287776f1 100644
--- a/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll
+++ b/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll
@@ -9,17 +9,17 @@ target triple = "powerpc-apple-darwin10.0"
define void @foo() nounwind {
entry:
- %0 = load ppc_fp128* @a, align 16 ; <ppc_fp128> [#uses=1]
+ %0 = load ppc_fp128, ppc_fp128* @a, align 16 ; <ppc_fp128> [#uses=1]
%1 = call ppc_fp128 @llvm.sqrt.ppcf128(ppc_fp128 %0) ; <ppc_fp128> [#uses=1]
store ppc_fp128 %1, ppc_fp128* @a, align 16
- %2 = load ppc_fp128* @b, align 16 ; <ppc_fp128> [#uses=1]
+ %2 = load ppc_fp128, ppc_fp128* @b, align 16 ; <ppc_fp128> [#uses=1]
%3 = call ppc_fp128 @"\01_sinl$LDBL128"(ppc_fp128 %2) nounwind readonly ; <ppc_fp128> [#uses=1]
store ppc_fp128 %3, ppc_fp128* @b, align 16
- %4 = load ppc_fp128* @c, align 16 ; <ppc_fp128> [#uses=1]
+ %4 = load ppc_fp128, ppc_fp128* @c, align 16 ; <ppc_fp128> [#uses=1]
%5 = call ppc_fp128 @"\01_cosl$LDBL128"(ppc_fp128 %4) nounwind readonly ; <ppc_fp128> [#uses=1]
store ppc_fp128 %5, ppc_fp128* @c, align 16
- %6 = load ppc_fp128* @d, align 16 ; <ppc_fp128> [#uses=1]
- %7 = load ppc_fp128* @c, align 16 ; <ppc_fp128> [#uses=1]
+ %6 = load ppc_fp128, ppc_fp128* @d, align 16 ; <ppc_fp128> [#uses=1]
+ %7 = load ppc_fp128, ppc_fp128* @c, align 16 ; <ppc_fp128> [#uses=1]
%8 = call ppc_fp128 @llvm.pow.ppcf128(ppc_fp128 %6, ppc_fp128 %7) ; <ppc_fp128> [#uses=1]
store ppc_fp128 %8, ppc_fp128* @d, align 16
br label %return
diff --git a/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll b/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll
index ce8e72df2616..74356d38f8a2 100644
--- a/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll
+++ b/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll
@@ -5,7 +5,7 @@
define i32 @main() nounwind {
entry:
- %0 = call i8* @fopen(i8* getelementptr ([13 x i8]* @"\01LC", i32 0, i32 0), i8* null) nounwind ; <i8*> [#uses=0]
+ %0 = call i8* @fopen(i8* getelementptr ([13 x i8], [13 x i8]* @"\01LC", i32 0, i32 0), i8* null) nounwind ; <i8*> [#uses=0]
unreachable
}
diff --git a/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll b/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll
index 172531e5db49..f5a7bf8de4c3 100644
--- a/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll
+++ b/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll
@@ -9,7 +9,7 @@ bb20.loopexit: ; preds = %entry
ret void
bb21: ; preds = %entry
- %0 = getelementptr i8* %a, i32 0 ; <i8*> [#uses=2]
+ %0 = getelementptr i8, i8* %a, i32 0 ; <i8*> [#uses=2]
br label %bb35
bb29: ; preds = %bb35
@@ -17,7 +17,7 @@ bb29: ; preds = %bb35
bb7.i252: ; preds = %bb7.i252, %bb29
%pj.0.rec.i247 = phi i32 [ %indvar.next488, %bb7.i252 ], [ 0, %bb29 ] ; <i32> [#uses=2]
- %pi.0.i248 = getelementptr i8* %pa.1, i32 %pj.0.rec.i247 ; <i8*> [#uses=0]
+ %pi.0.i248 = getelementptr i8, i8* %pa.1, i32 %pj.0.rec.i247 ; <i8*> [#uses=0]
%indvar.next488 = add i32 %pj.0.rec.i247, 1 ; <i32> [#uses=1]
br i1 false, label %bb34, label %bb7.i252
@@ -45,7 +45,7 @@ bb7.i161: ; preds = %bb7.i161, %bb50
%pj.0.rec.i156 = phi i32 [ %indvar.next394, %bb7.i161 ], [ 0, %bb50 ] ; <i32> [#uses=2]
%.sum279 = sub i32 %pj.0.rec.i156, %min ; <i32> [#uses=1]
%pb.0.sum542 = add i32 %pb.0.rec, %.sum279 ; <i32> [#uses=1]
- %pj.0.i158 = getelementptr i8* %0, i32 %pb.0.sum542 ; <i8*> [#uses=0]
+ %pj.0.i158 = getelementptr i8, i8* %0, i32 %pb.0.sum542 ; <i8*> [#uses=0]
%indvar.next394 = add i32 %pj.0.rec.i156, 1 ; <i32> [#uses=1]
br label %bb7.i161
}
diff --git a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
index 91253daae396..289e09b2dae2 100644
--- a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
+++ b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
@@ -15,8 +15,8 @@ entry:
%y_addr = alloca i32 ; <i32*> [#uses=2]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store i32 %y, i32* %y_addr
- %0 = load i32* %y_addr, align 4 ; <i32> [#uses=1]
- %1 = getelementptr inbounds [0 x i32]* @x, i32 0, i32 %0 ; <i32*> [#uses=1]
+ %0 = load i32, i32* %y_addr, align 4 ; <i32> [#uses=1]
+ %1 = getelementptr inbounds [0 x i32], [0 x i32]* @x, i32 0, i32 %0 ; <i32*> [#uses=1]
call void asm sideeffect "isync\0A\09eieio\0A\09stw $1, $0", "=*o,r,~{memory}"(i32* %1, i32 0) nounwind
br label %return
diff --git a/test/CodeGen/PowerPC/2009-11-15-ProcImpDefsBug.ll b/test/CodeGen/PowerPC/2009-11-15-ProcImpDefsBug.ll
index 2d9d16ae6d83..61a9a4fbd4c0 100644
--- a/test/CodeGen/PowerPC/2009-11-15-ProcImpDefsBug.ll
+++ b/test/CodeGen/PowerPC/2009-11-15-ProcImpDefsBug.ll
@@ -100,6 +100,6 @@ bb48.3: ; preds = %bb49.2
br label %bb49.3
bb48.4: ; preds = %bb49.3
- %0 = getelementptr inbounds [5 x i64*]* undef, i32 0, i32 %c_ix.0.3 ; <i64**> [#uses=0]
+ %0 = getelementptr inbounds [5 x i64*], [5 x i64*]* undef, i32 0, i32 %c_ix.0.3 ; <i64**> [#uses=0]
br label %bb51
}
diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
index b0c37b80ed2f..5932b6d75c0a 100644
--- a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
+++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
@@ -9,13 +9,13 @@ entry:
; CHECK: mfcr [[T1:r[0-9]+]] ; cr2
; CHECK: lis [[T2:r[0-9]+]], 1
; CHECK: addi r3, r1, 72
-; CHECK: rlwinm [[T1]], [[T1]], 8, 0, 31
+; CHECK: rotlwi [[T1]], [[T1]], 8
; CHECK: ori [[T2]], [[T2]], 34540
; CHECK: stwx [[T1]], r1, [[T2]]
; CHECK: lis [[T3:r[0-9]+]], 1
; CHECK: mfcr [[T4:r[0-9]+]] ; cr3
; CHECK: ori [[T3]], [[T3]], 34536
-; CHECK: rlwinm [[T4]], [[T4]], 12, 0, 31
+; CHECK: rotlwi [[T4]], [[T4]], 12
; CHECK: stwx [[T4]], r1, [[T3]]
%x = alloca [100000 x i8] ; <[100000 x i8]*> [#uses=1]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
@@ -28,12 +28,12 @@ return: ; preds = %entry
; CHECK: lis [[T1:r[0-9]+]], 1
; CHECK: ori [[T1]], [[T1]], 34536
; CHECK: lwzx [[T1]], r1, [[T1]]
-; CHECK: rlwinm [[T1]], [[T1]], 20, 0, 31
+; CHECK: rotlwi [[T1]], [[T1]], 20
; CHECK: mtcrf 16, [[T1]]
; CHECK: lis [[T1]], 1
; CHECK: ori [[T1]], [[T1]], 34540
; CHECK: lwzx [[T1]], r1, [[T1]]
-; CHECK: rlwinm [[T1]], [[T1]], 24, 0, 31
+; CHECK: rotlwi [[T1]], [[T1]], 24
; CHECK: mtcrf 32, [[T1]]
ret void
}
diff --git a/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll b/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
index b95ac6880758..0599b74a69f5 100644
--- a/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
+++ b/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
@@ -11,8 +11,8 @@ define void @foo() nounwind ssp {
entry:
; CHECK: mtctr r12
; CHECK: bctrl
- %0 = load void (...)** @p, align 4 ; <void (...)*> [#uses=1]
- call void (...)* %0() nounwind
+ %0 = load void (...)*, void (...)** @p, align 4 ; <void (...)*> [#uses=1]
+ call void (...) %0() nounwind
br label %return
return: ; preds = %entry
diff --git a/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll b/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
index a25ce07e83bf..1f320a84a4e6 100644
--- a/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
+++ b/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
@@ -12,11 +12,11 @@ entry:
%0 = alloca i32
%"alloca point" = bitcast i32 0 to i32
store i32 0, i32* %0, align 4
- %1 = load i32* %0, align 4
+ %1 = load i32, i32* %0, align 4
store i32 %1, i32* %retval, align 4
br label %return
return: ; preds = %entry
- %retval1 = load i32* %retval
+ %retval1 = load i32, i32* %retval
ret i32 %retval1
}
diff --git a/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll b/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
index e7bc5bfa37ec..e5920911ee2f 100644
--- a/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
+++ b/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
@@ -28,7 +28,7 @@ declare void @check(i32 %name) nounwind
define i32 @s122(i32 %n1, i32 %n3) nounwind {
entry:
- %call = tail call i32 @init(i8* getelementptr inbounds ([6 x i8]* @.str11, i64 0, i64 0))
+ %call = tail call i32 @init(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str11, i64 0, i64 0))
%call1 = tail call i64 @clock() nounwind
%sub = add nsw i32 %n1, -1
%cmp316 = icmp slt i32 %sub, 32000
@@ -46,10 +46,10 @@ for.body4.us: ; preds = %for.body4.lr.ph.us,
%sub5.us = sub i64 31999, %indvars.iv20
%sext = shl i64 %sub5.us, 32
%idxprom.us = ashr exact i64 %sext, 32
- %arrayidx.us = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us
- %2 = load float* %arrayidx.us, align 4
- %arrayidx7.us = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv
- %3 = load float* %arrayidx7.us, align 4
+ %arrayidx.us = getelementptr inbounds [32000 x float], [32000 x float]* @b, i64 0, i64 %idxprom.us
+ %2 = load float, float* %arrayidx.us, align 4
+ %arrayidx7.us = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv
+ %3 = load float, float* %arrayidx7.us, align 4
%add8.us = fadd float %3, %2
store float %add8.us, float* %arrayidx7.us, align 4
%indvars.iv.next = add i64 %indvars.iv, %1
@@ -66,12 +66,12 @@ for.end12: ; preds = %for.end.7, %for.end
%sub14 = sub nsw i64 %call13, %call1
%conv = sitofp i64 %sub14 to double
%div = fdiv double %conv, 1.000000e+06
- %call15 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str152, i64 0, i64 0), double %div) nounwind
+ %call15 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str152, i64 0, i64 0), double %div) nounwind
tail call void @check(i32 1)
ret i32 0
for.body4.lr.ph.us.1: ; preds = %for.body4.us
- %call10.us = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+ %call10.us = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
br label %for.body4.us.1
for.body4.us.1: ; preds = %for.body4.us.1, %for.body4.lr.ph.us.1
@@ -81,10 +81,10 @@ for.body4.us.1: ; preds = %for.body4.us.1, %fo
%sub5.us.1 = sub i64 31999, %indvars.iv20.1
%sext23 = shl i64 %sub5.us.1, 32
%idxprom.us.1 = ashr exact i64 %sext23, 32
- %arrayidx.us.1 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.1
- %5 = load float* %arrayidx.us.1, align 4
- %arrayidx7.us.1 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.1
- %6 = load float* %arrayidx7.us.1, align 4
+ %arrayidx.us.1 = getelementptr inbounds [32000 x float], [32000 x float]* @b, i64 0, i64 %idxprom.us.1
+ %5 = load float, float* %arrayidx.us.1, align 4
+ %arrayidx7.us.1 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv.1
+ %6 = load float, float* %arrayidx7.us.1, align 4
%add8.us.1 = fadd float %6, %5
store float %add8.us.1, float* %arrayidx7.us.1, align 4
%indvars.iv.next.1 = add i64 %indvars.iv.1, %1
@@ -93,7 +93,7 @@ for.body4.us.1: ; preds = %for.body4.us.1, %fo
br i1 %cmp3.us.1, label %for.body4.us.1, label %for.body4.lr.ph.us.2
for.body4.lr.ph.us.2: ; preds = %for.body4.us.1
- %call10.us.1 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+ %call10.us.1 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
br label %for.body4.us.2
for.body4.us.2: ; preds = %for.body4.us.2, %for.body4.lr.ph.us.2
@@ -103,10 +103,10 @@ for.body4.us.2: ; preds = %for.body4.us.2, %fo
%sub5.us.2 = sub i64 31999, %indvars.iv20.2
%sext24 = shl i64 %sub5.us.2, 32
%idxprom.us.2 = ashr exact i64 %sext24, 32
- %arrayidx.us.2 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.2
- %8 = load float* %arrayidx.us.2, align 4
- %arrayidx7.us.2 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.2
- %9 = load float* %arrayidx7.us.2, align 4
+ %arrayidx.us.2 = getelementptr inbounds [32000 x float], [32000 x float]* @b, i64 0, i64 %idxprom.us.2
+ %8 = load float, float* %arrayidx.us.2, align 4
+ %arrayidx7.us.2 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv.2
+ %9 = load float, float* %arrayidx7.us.2, align 4
%add8.us.2 = fadd float %9, %8
store float %add8.us.2, float* %arrayidx7.us.2, align 4
%indvars.iv.next.2 = add i64 %indvars.iv.2, %1
@@ -115,7 +115,7 @@ for.body4.us.2: ; preds = %for.body4.us.2, %fo
br i1 %cmp3.us.2, label %for.body4.us.2, label %for.body4.lr.ph.us.3
for.body4.lr.ph.us.3: ; preds = %for.body4.us.2
- %call10.us.2 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+ %call10.us.2 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
br label %for.body4.us.3
for.body4.us.3: ; preds = %for.body4.us.3, %for.body4.lr.ph.us.3
@@ -125,10 +125,10 @@ for.body4.us.3: ; preds = %for.body4.us.3, %fo
%sub5.us.3 = sub i64 31999, %indvars.iv20.3
%sext25 = shl i64 %sub5.us.3, 32
%idxprom.us.3 = ashr exact i64 %sext25, 32
- %arrayidx.us.3 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.3
- %11 = load float* %arrayidx.us.3, align 4
- %arrayidx7.us.3 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.3
- %12 = load float* %arrayidx7.us.3, align 4
+ %arrayidx.us.3 = getelementptr inbounds [32000 x float], [32000 x float]* @b, i64 0, i64 %idxprom.us.3
+ %11 = load float, float* %arrayidx.us.3, align 4
+ %arrayidx7.us.3 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv.3
+ %12 = load float, float* %arrayidx7.us.3, align 4
%add8.us.3 = fadd float %12, %11
store float %add8.us.3, float* %arrayidx7.us.3, align 4
%indvars.iv.next.3 = add i64 %indvars.iv.3, %1
@@ -137,7 +137,7 @@ for.body4.us.3: ; preds = %for.body4.us.3, %fo
br i1 %cmp3.us.3, label %for.body4.us.3, label %for.body4.lr.ph.us.4
for.body4.lr.ph.us.4: ; preds = %for.body4.us.3
- %call10.us.3 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+ %call10.us.3 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
br label %for.body4.us.4
for.body4.us.4: ; preds = %for.body4.us.4, %for.body4.lr.ph.us.4
@@ -147,10 +147,10 @@ for.body4.us.4: ; preds = %for.body4.us.4, %fo
%sub5.us.4 = sub i64 31999, %indvars.iv20.4
%sext26 = shl i64 %sub5.us.4, 32
%idxprom.us.4 = ashr exact i64 %sext26, 32
- %arrayidx.us.4 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.4
- %14 = load float* %arrayidx.us.4, align 4
- %arrayidx7.us.4 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.4
- %15 = load float* %arrayidx7.us.4, align 4
+ %arrayidx.us.4 = getelementptr inbounds [32000 x float], [32000 x float]* @b, i64 0, i64 %idxprom.us.4
+ %14 = load float, float* %arrayidx.us.4, align 4
+ %arrayidx7.us.4 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv.4
+ %15 = load float, float* %arrayidx7.us.4, align 4
%add8.us.4 = fadd float %15, %14
store float %add8.us.4, float* %arrayidx7.us.4, align 4
%indvars.iv.next.4 = add i64 %indvars.iv.4, %1
@@ -159,21 +159,21 @@ for.body4.us.4: ; preds = %for.body4.us.4, %fo
br i1 %cmp3.us.4, label %for.body4.us.4, label %for.end.us.4
for.end.us.4: ; preds = %for.body4.us.4
- %call10.us.4 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+ %call10.us.4 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
%inc.us.4 = add nsw i32 %nl.019.us, 5
%exitcond.4 = icmp eq i32 %inc.us.4, 200000
br i1 %exitcond.4, label %for.end12, label %for.body4.lr.ph.us
for.end.7: ; preds = %entry, %for.end.7
%nl.019 = phi i32 [ %inc.7, %for.end.7 ], [ 0, %entry ]
- %call10 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
- %call10.1 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
- %call10.2 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
- %call10.3 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
- %call10.4 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
- %call10.5 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
- %call10.6 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
- %call10.7 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+ %call10 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+ %call10.1 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+ %call10.2 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+ %call10.3 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+ %call10.4 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+ %call10.5 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+ %call10.6 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+ %call10.7 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
%inc.7 = add nsw i32 %nl.019, 8
%exitcond.7 = icmp eq i32 %inc.7, 200000
br i1 %exitcond.7, label %for.end12, label %for.end.7
diff --git a/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll b/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
index a6223d41cc3f..93476827949f 100644
--- a/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
+++ b/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
@@ -29,13 +29,13 @@ declare void @check(i32 %name) nounwind
define i32 @s3110() nounwind {
entry:
- %call = tail call i32 @init(i8* getelementptr inbounds ([6 x i8]* @.str81, i64 0, i64 0))
+ %call = tail call i32 @init(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str81, i64 0, i64 0))
%call1 = tail call i64 @clock() nounwind
br label %for.body
for.body: ; preds = %for.end17, %entry
%nl.041 = phi i32 [ 0, %entry ], [ %inc22, %for.end17 ]
- %0 = load float* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0, i64 0), align 16
+ %0 = load float, float* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0, i64 0), align 16
br label %for.cond5.preheader
for.cond5.preheader: ; preds = %for.inc15, %for.body
@@ -50,8 +50,8 @@ for.body7: ; preds = %for.body7, %for.con
%max.235 = phi float [ %max.139, %for.cond5.preheader ], [ %max.3.15, %for.body7 ]
%xindex.234 = phi i32 [ %xindex.138, %for.cond5.preheader ], [ %xindex.3.15, %for.body7 ]
%yindex.233 = phi i32 [ %yindex.137, %for.cond5.preheader ], [ %yindex.3.15, %for.body7 ]
- %arrayidx9 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv
- %1 = load float* %arrayidx9, align 16
+ %arrayidx9 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv
+ %1 = load float, float* %arrayidx9, align 16
%cmp10 = fcmp ogt float %1, %max.235
%2 = trunc i64 %indvars.iv to i32
%yindex.3 = select i1 %cmp10, i32 %2, i32 %yindex.233
@@ -59,120 +59,120 @@ for.body7: ; preds = %for.body7, %for.con
%xindex.3 = select i1 %cmp10, i32 %3, i32 %xindex.234
%max.3 = select i1 %cmp10, float %1, float %max.235
%indvars.iv.next45 = or i64 %indvars.iv, 1
- %arrayidx9.1 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next45
- %4 = load float* %arrayidx9.1, align 4
+ %arrayidx9.1 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next45
+ %4 = load float, float* %arrayidx9.1, align 4
%cmp10.1 = fcmp ogt float %4, %max.3
%5 = trunc i64 %indvars.iv.next45 to i32
%yindex.3.1 = select i1 %cmp10.1, i32 %5, i32 %yindex.3
%xindex.3.1 = select i1 %cmp10.1, i32 %3, i32 %xindex.3
%max.3.1 = select i1 %cmp10.1, float %4, float %max.3
%indvars.iv.next.146 = or i64 %indvars.iv, 2
- %arrayidx9.2 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.146
- %6 = load float* %arrayidx9.2, align 8
+ %arrayidx9.2 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.146
+ %6 = load float, float* %arrayidx9.2, align 8
%cmp10.2 = fcmp ogt float %6, %max.3.1
%7 = trunc i64 %indvars.iv.next.146 to i32
%yindex.3.2 = select i1 %cmp10.2, i32 %7, i32 %yindex.3.1
%xindex.3.2 = select i1 %cmp10.2, i32 %3, i32 %xindex.3.1
%max.3.2 = select i1 %cmp10.2, float %6, float %max.3.1
%indvars.iv.next.247 = or i64 %indvars.iv, 3
- %arrayidx9.3 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.247
- %8 = load float* %arrayidx9.3, align 4
+ %arrayidx9.3 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.247
+ %8 = load float, float* %arrayidx9.3, align 4
%cmp10.3 = fcmp ogt float %8, %max.3.2
%9 = trunc i64 %indvars.iv.next.247 to i32
%yindex.3.3 = select i1 %cmp10.3, i32 %9, i32 %yindex.3.2
%xindex.3.3 = select i1 %cmp10.3, i32 %3, i32 %xindex.3.2
%max.3.3 = select i1 %cmp10.3, float %8, float %max.3.2
%indvars.iv.next.348 = or i64 %indvars.iv, 4
- %arrayidx9.4 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.348
- %10 = load float* %arrayidx9.4, align 16
+ %arrayidx9.4 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.348
+ %10 = load float, float* %arrayidx9.4, align 16
%cmp10.4 = fcmp ogt float %10, %max.3.3
%11 = trunc i64 %indvars.iv.next.348 to i32
%yindex.3.4 = select i1 %cmp10.4, i32 %11, i32 %yindex.3.3
%xindex.3.4 = select i1 %cmp10.4, i32 %3, i32 %xindex.3.3
%max.3.4 = select i1 %cmp10.4, float %10, float %max.3.3
%indvars.iv.next.449 = or i64 %indvars.iv, 5
- %arrayidx9.5 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.449
- %12 = load float* %arrayidx9.5, align 4
+ %arrayidx9.5 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.449
+ %12 = load float, float* %arrayidx9.5, align 4
%cmp10.5 = fcmp ogt float %12, %max.3.4
%13 = trunc i64 %indvars.iv.next.449 to i32
%yindex.3.5 = select i1 %cmp10.5, i32 %13, i32 %yindex.3.4
%xindex.3.5 = select i1 %cmp10.5, i32 %3, i32 %xindex.3.4
%max.3.5 = select i1 %cmp10.5, float %12, float %max.3.4
%indvars.iv.next.550 = or i64 %indvars.iv, 6
- %arrayidx9.6 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.550
- %14 = load float* %arrayidx9.6, align 8
+ %arrayidx9.6 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.550
+ %14 = load float, float* %arrayidx9.6, align 8
%cmp10.6 = fcmp ogt float %14, %max.3.5
%15 = trunc i64 %indvars.iv.next.550 to i32
%yindex.3.6 = select i1 %cmp10.6, i32 %15, i32 %yindex.3.5
%xindex.3.6 = select i1 %cmp10.6, i32 %3, i32 %xindex.3.5
%max.3.6 = select i1 %cmp10.6, float %14, float %max.3.5
%indvars.iv.next.651 = or i64 %indvars.iv, 7
- %arrayidx9.7 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.651
- %16 = load float* %arrayidx9.7, align 4
+ %arrayidx9.7 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.651
+ %16 = load float, float* %arrayidx9.7, align 4
%cmp10.7 = fcmp ogt float %16, %max.3.6
%17 = trunc i64 %indvars.iv.next.651 to i32
%yindex.3.7 = select i1 %cmp10.7, i32 %17, i32 %yindex.3.6
%xindex.3.7 = select i1 %cmp10.7, i32 %3, i32 %xindex.3.6
%max.3.7 = select i1 %cmp10.7, float %16, float %max.3.6
%indvars.iv.next.752 = or i64 %indvars.iv, 8
- %arrayidx9.8 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.752
- %18 = load float* %arrayidx9.8, align 16
+ %arrayidx9.8 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.752
+ %18 = load float, float* %arrayidx9.8, align 16
%cmp10.8 = fcmp ogt float %18, %max.3.7
%19 = trunc i64 %indvars.iv.next.752 to i32
%yindex.3.8 = select i1 %cmp10.8, i32 %19, i32 %yindex.3.7
%xindex.3.8 = select i1 %cmp10.8, i32 %3, i32 %xindex.3.7
%max.3.8 = select i1 %cmp10.8, float %18, float %max.3.7
%indvars.iv.next.853 = or i64 %indvars.iv, 9
- %arrayidx9.9 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.853
- %20 = load float* %arrayidx9.9, align 4
+ %arrayidx9.9 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.853
+ %20 = load float, float* %arrayidx9.9, align 4
%cmp10.9 = fcmp ogt float %20, %max.3.8
%21 = trunc i64 %indvars.iv.next.853 to i32
%yindex.3.9 = select i1 %cmp10.9, i32 %21, i32 %yindex.3.8
%xindex.3.9 = select i1 %cmp10.9, i32 %3, i32 %xindex.3.8
%max.3.9 = select i1 %cmp10.9, float %20, float %max.3.8
%indvars.iv.next.954 = or i64 %indvars.iv, 10
- %arrayidx9.10 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.954
- %22 = load float* %arrayidx9.10, align 8
+ %arrayidx9.10 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.954
+ %22 = load float, float* %arrayidx9.10, align 8
%cmp10.10 = fcmp ogt float %22, %max.3.9
%23 = trunc i64 %indvars.iv.next.954 to i32
%yindex.3.10 = select i1 %cmp10.10, i32 %23, i32 %yindex.3.9
%xindex.3.10 = select i1 %cmp10.10, i32 %3, i32 %xindex.3.9
%max.3.10 = select i1 %cmp10.10, float %22, float %max.3.9
%indvars.iv.next.1055 = or i64 %indvars.iv, 11
- %arrayidx9.11 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1055
- %24 = load float* %arrayidx9.11, align 4
+ %arrayidx9.11 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1055
+ %24 = load float, float* %arrayidx9.11, align 4
%cmp10.11 = fcmp ogt float %24, %max.3.10
%25 = trunc i64 %indvars.iv.next.1055 to i32
%yindex.3.11 = select i1 %cmp10.11, i32 %25, i32 %yindex.3.10
%xindex.3.11 = select i1 %cmp10.11, i32 %3, i32 %xindex.3.10
%max.3.11 = select i1 %cmp10.11, float %24, float %max.3.10
%indvars.iv.next.1156 = or i64 %indvars.iv, 12
- %arrayidx9.12 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1156
- %26 = load float* %arrayidx9.12, align 16
+ %arrayidx9.12 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1156
+ %26 = load float, float* %arrayidx9.12, align 16
%cmp10.12 = fcmp ogt float %26, %max.3.11
%27 = trunc i64 %indvars.iv.next.1156 to i32
%yindex.3.12 = select i1 %cmp10.12, i32 %27, i32 %yindex.3.11
%xindex.3.12 = select i1 %cmp10.12, i32 %3, i32 %xindex.3.11
%max.3.12 = select i1 %cmp10.12, float %26, float %max.3.11
%indvars.iv.next.1257 = or i64 %indvars.iv, 13
- %arrayidx9.13 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1257
- %28 = load float* %arrayidx9.13, align 4
+ %arrayidx9.13 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1257
+ %28 = load float, float* %arrayidx9.13, align 4
%cmp10.13 = fcmp ogt float %28, %max.3.12
%29 = trunc i64 %indvars.iv.next.1257 to i32
%yindex.3.13 = select i1 %cmp10.13, i32 %29, i32 %yindex.3.12
%xindex.3.13 = select i1 %cmp10.13, i32 %3, i32 %xindex.3.12
%max.3.13 = select i1 %cmp10.13, float %28, float %max.3.12
%indvars.iv.next.1358 = or i64 %indvars.iv, 14
- %arrayidx9.14 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1358
- %30 = load float* %arrayidx9.14, align 8
+ %arrayidx9.14 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1358
+ %30 = load float, float* %arrayidx9.14, align 8
%cmp10.14 = fcmp ogt float %30, %max.3.13
%31 = trunc i64 %indvars.iv.next.1358 to i32
%yindex.3.14 = select i1 %cmp10.14, i32 %31, i32 %yindex.3.13
%xindex.3.14 = select i1 %cmp10.14, i32 %3, i32 %xindex.3.13
%max.3.14 = select i1 %cmp10.14, float %30, float %max.3.13
%indvars.iv.next.1459 = or i64 %indvars.iv, 15
- %arrayidx9.15 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1459
- %32 = load float* %arrayidx9.15, align 4
+ %arrayidx9.15 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1459
+ %32 = load float, float* %arrayidx9.15, align 4
%cmp10.15 = fcmp ogt float %32, %max.3.14
%33 = trunc i64 %indvars.iv.next.1459 to i32
%yindex.3.15 = select i1 %cmp10.15, i32 %33, i32 %yindex.3.14
@@ -194,7 +194,7 @@ for.end17: ; preds = %for.inc15
%add = fadd float %max.3.15, %conv
%conv18 = sitofp i32 %yindex.3.15 to float
%add19 = fadd float %add, %conv18
- %call20 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float %add19) nounwind
+ %call20 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float], [32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float], [32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]], [256 x [256 x float]]* @cc, i64 0, i64 0), float %add19) nounwind
%inc22 = add nsw i32 %nl.041, 1
%exitcond44 = icmp eq i32 %inc22, 78100
br i1 %exitcond44, label %for.end23, label %for.body
@@ -204,7 +204,7 @@ for.end23: ; preds = %for.end17
%sub = sub nsw i64 %call24, %call1
%conv25 = sitofp i64 %sub to double
%div = fdiv double %conv25, 1.000000e+06
- %call26 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str235, i64 0, i64 0), double %div) nounwind
+ %call26 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str235, i64 0, i64 0), double %div) nounwind
%add29 = fadd float %add, 1.000000e+00
%add31 = fadd float %add29, %conv18
%add32 = fadd float %add31, 1.000000e+00
diff --git a/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll b/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll
index b1cbb36fe041..05390cf8b92e 100644
--- a/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll
+++ b/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll
@@ -2,7 +2,7 @@
define void @test(i32* nocapture %x, i64* %xx, i32* %yp) nounwind uwtable ssp {
entry:
- %yy = load i32* %yp
+ %yy = load i32, i32* %yp
%y = add i32 %yy, 1
%z = zext i32 %y to i64
%z2 = shl i64 %z, 32
diff --git a/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll b/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll
index 5bff58f2bbf5..1d45c2e73455 100644
--- a/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll
+++ b/test/CodeGen/PowerPC/2012-09-16-TOC-entry-check.ll
@@ -19,9 +19,9 @@ define i32 @foo(double %X, double %Y) nounwind readnone {
}
; Check the creation of 2 .tc entries for both double constants. They
-; should be .LC1 and .LC3 to avoid name clash with global constants
-; .LC0 and .LC2
-; CHECK: .LC{{[13]}}:
+; avoid name clash with global constants .LC0 and .LC2
+; CHECK: .section .toc,"aw",@progbits
+; CHECK: .LC{{.*}}:
; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}}
-; CHECK: .LC{{[13]}}:
+; CHECK: .LC{{.*}}:
; CHECK-NEXT: .tc {{[\._a-zA-Z0-9]+}}[TC],{{[\._a-zA-Z0-9]+}}
diff --git a/test/CodeGen/PowerPC/2013-05-15-preinc-fold.ll b/test/CodeGen/PowerPC/2013-05-15-preinc-fold.ll
index 542a766300ef..9fe88a85d15f 100644
--- a/test/CodeGen/PowerPC/2013-05-15-preinc-fold.ll
+++ b/test/CodeGen/PowerPC/2013-05-15-preinc-fold.ll
@@ -5,9 +5,9 @@ target triple = "powerpc64-unknown-linux-gnu"
define i8* @test(i8* %base, i8 %val) {
entry:
- %arrayidx = getelementptr inbounds i8* %base, i32 -1
+ %arrayidx = getelementptr inbounds i8, i8* %base, i32 -1
store i8 %val, i8* %arrayidx, align 1
- %arrayidx2 = getelementptr inbounds i8* %base, i32 1
+ %arrayidx2 = getelementptr inbounds i8, i8* %base, i32 1
store i8 %val, i8* %arrayidx2, align 1
ret i8* %arrayidx
}
@@ -19,9 +19,9 @@ entry:
define i64* @test64(i64* %base, i64 %val) {
entry:
- %arrayidx = getelementptr inbounds i64* %base, i32 -1
+ %arrayidx = getelementptr inbounds i64, i64* %base, i32 -1
store i64 %val, i64* %arrayidx, align 8
- %arrayidx2 = getelementptr inbounds i64* %base, i32 1
+ %arrayidx2 = getelementptr inbounds i64, i64* %base, i32 1
store i64 %val, i64* %arrayidx2, align 8
ret i64* %arrayidx
}
diff --git a/test/CodeGen/PowerPC/2013-07-01-PHIElimBug.ll b/test/CodeGen/PowerPC/2013-07-01-PHIElimBug.ll
index 9bf25c8ffe49..3c6f3ff32454 100644
--- a/test/CodeGen/PowerPC/2013-07-01-PHIElimBug.ll
+++ b/test/CodeGen/PowerPC/2013-07-01-PHIElimBug.ll
@@ -10,17 +10,17 @@ target triple = "powerpc64-unknown-linux-gnu"
; Function Attrs: nounwind
define fastcc void @func_7() #0 {
entry:
- %arrayidx638 = getelementptr inbounds [3 x [1 x i32]]* undef, i64 0, i64 1, i64 0
+ %arrayidx638 = getelementptr inbounds [3 x [1 x i32]], [3 x [1 x i32]]* undef, i64 0, i64 1, i64 0
br i1 undef, label %for.cond940, label %if.end1018
for.cond940: ; preds = %for.cond940, %if.else876
- %l_655.1 = phi i32* [ getelementptr inbounds ([8 x i32]* @g_51, i64 0, i64 6), %entry ], [ %l_654.0, %for.cond940 ]
+ %l_655.1 = phi i32* [ getelementptr inbounds ([8 x i32], [8 x i32]* @g_51, i64 0, i64 6), %entry ], [ %l_654.0, %for.cond940 ]
%l_654.0 = phi i32* [ null, %entry ], [ %arrayidx638, %for.cond940 ]
%exitcond = icmp eq i32 undef, 20
br i1 %exitcond, label %if.end1018, label %for.cond940
if.end1018: ; preds = %for.end957, %for.end834
- %l_655.3.ph33 = phi i32* [ %l_655.1, %for.cond940 ], [ getelementptr inbounds ([8 x i32]* @g_51, i64 0, i64 6), %entry ]
+ %l_655.3.ph33 = phi i32* [ %l_655.1, %for.cond940 ], [ getelementptr inbounds ([8 x i32], [8 x i32]* @g_51, i64 0, i64 6), %entry ]
store i32 0, i32* %l_655.3.ph33, align 4
ret void
}
diff --git a/test/CodeGen/PowerPC/Atomics-64.ll b/test/CodeGen/PowerPC/Atomics-64.ll
index 122b54e080ac..77066de25e78 100644
--- a/test/CodeGen/PowerPC/Atomics-64.ll
+++ b/test/CodeGen/PowerPC/Atomics-64.ll
@@ -254,272 +254,272 @@ return: ; preds = %entry
define void @test_op_and_fetch() nounwind {
entry:
- %0 = load i8* @uc, align 1
+ %0 = load i8, i8* @uc, align 1
%1 = atomicrmw add i8* @sc, i8 %0 monotonic
%2 = add i8 %1, %0
store i8 %2, i8* @sc, align 1
- %3 = load i8* @uc, align 1
+ %3 = load i8, i8* @uc, align 1
%4 = atomicrmw add i8* @uc, i8 %3 monotonic
%5 = add i8 %4, %3
store i8 %5, i8* @uc, align 1
- %6 = load i8* @uc, align 1
+ %6 = load i8, i8* @uc, align 1
%7 = zext i8 %6 to i16
%8 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
%9 = atomicrmw add i16* %8, i16 %7 monotonic
%10 = add i16 %9, %7
store i16 %10, i16* @ss, align 2
- %11 = load i8* @uc, align 1
+ %11 = load i8, i8* @uc, align 1
%12 = zext i8 %11 to i16
%13 = bitcast i8* bitcast (i16* @us to i8*) to i16*
%14 = atomicrmw add i16* %13, i16 %12 monotonic
%15 = add i16 %14, %12
store i16 %15, i16* @us, align 2
- %16 = load i8* @uc, align 1
+ %16 = load i8, i8* @uc, align 1
%17 = zext i8 %16 to i32
%18 = bitcast i8* bitcast (i32* @si to i8*) to i32*
%19 = atomicrmw add i32* %18, i32 %17 monotonic
%20 = add i32 %19, %17
store i32 %20, i32* @si, align 4
- %21 = load i8* @uc, align 1
+ %21 = load i8, i8* @uc, align 1
%22 = zext i8 %21 to i32
%23 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
%24 = atomicrmw add i32* %23, i32 %22 monotonic
%25 = add i32 %24, %22
store i32 %25, i32* @ui, align 4
- %26 = load i8* @uc, align 1
+ %26 = load i8, i8* @uc, align 1
%27 = zext i8 %26 to i64
%28 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
%29 = atomicrmw add i64* %28, i64 %27 monotonic
%30 = add i64 %29, %27
store i64 %30, i64* @sl, align 8
- %31 = load i8* @uc, align 1
+ %31 = load i8, i8* @uc, align 1
%32 = zext i8 %31 to i64
%33 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
%34 = atomicrmw add i64* %33, i64 %32 monotonic
%35 = add i64 %34, %32
store i64 %35, i64* @ul, align 8
- %36 = load i8* @uc, align 1
+ %36 = load i8, i8* @uc, align 1
%37 = atomicrmw sub i8* @sc, i8 %36 monotonic
%38 = sub i8 %37, %36
store i8 %38, i8* @sc, align 1
- %39 = load i8* @uc, align 1
+ %39 = load i8, i8* @uc, align 1
%40 = atomicrmw sub i8* @uc, i8 %39 monotonic
%41 = sub i8 %40, %39
store i8 %41, i8* @uc, align 1
- %42 = load i8* @uc, align 1
+ %42 = load i8, i8* @uc, align 1
%43 = zext i8 %42 to i16
%44 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
%45 = atomicrmw sub i16* %44, i16 %43 monotonic
%46 = sub i16 %45, %43
store i16 %46, i16* @ss, align 2
- %47 = load i8* @uc, align 1
+ %47 = load i8, i8* @uc, align 1
%48 = zext i8 %47 to i16
%49 = bitcast i8* bitcast (i16* @us to i8*) to i16*
%50 = atomicrmw sub i16* %49, i16 %48 monotonic
%51 = sub i16 %50, %48
store i16 %51, i16* @us, align 2
- %52 = load i8* @uc, align 1
+ %52 = load i8, i8* @uc, align 1
%53 = zext i8 %52 to i32
%54 = bitcast i8* bitcast (i32* @si to i8*) to i32*
%55 = atomicrmw sub i32* %54, i32 %53 monotonic
%56 = sub i32 %55, %53
store i32 %56, i32* @si, align 4
- %57 = load i8* @uc, align 1
+ %57 = load i8, i8* @uc, align 1
%58 = zext i8 %57 to i32
%59 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
%60 = atomicrmw sub i32* %59, i32 %58 monotonic
%61 = sub i32 %60, %58
store i32 %61, i32* @ui, align 4
- %62 = load i8* @uc, align 1
+ %62 = load i8, i8* @uc, align 1
%63 = zext i8 %62 to i64
%64 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
%65 = atomicrmw sub i64* %64, i64 %63 monotonic
%66 = sub i64 %65, %63
store i64 %66, i64* @sl, align 8
- %67 = load i8* @uc, align 1
+ %67 = load i8, i8* @uc, align 1
%68 = zext i8 %67 to i64
%69 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
%70 = atomicrmw sub i64* %69, i64 %68 monotonic
%71 = sub i64 %70, %68
store i64 %71, i64* @ul, align 8
- %72 = load i8* @uc, align 1
+ %72 = load i8, i8* @uc, align 1
%73 = atomicrmw or i8* @sc, i8 %72 monotonic
%74 = or i8 %73, %72
store i8 %74, i8* @sc, align 1
- %75 = load i8* @uc, align 1
+ %75 = load i8, i8* @uc, align 1
%76 = atomicrmw or i8* @uc, i8 %75 monotonic
%77 = or i8 %76, %75
store i8 %77, i8* @uc, align 1
- %78 = load i8* @uc, align 1
+ %78 = load i8, i8* @uc, align 1
%79 = zext i8 %78 to i16
%80 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
%81 = atomicrmw or i16* %80, i16 %79 monotonic
%82 = or i16 %81, %79
store i16 %82, i16* @ss, align 2
- %83 = load i8* @uc, align 1
+ %83 = load i8, i8* @uc, align 1
%84 = zext i8 %83 to i16
%85 = bitcast i8* bitcast (i16* @us to i8*) to i16*
%86 = atomicrmw or i16* %85, i16 %84 monotonic
%87 = or i16 %86, %84
store i16 %87, i16* @us, align 2
- %88 = load i8* @uc, align 1
+ %88 = load i8, i8* @uc, align 1
%89 = zext i8 %88 to i32
%90 = bitcast i8* bitcast (i32* @si to i8*) to i32*
%91 = atomicrmw or i32* %90, i32 %89 monotonic
%92 = or i32 %91, %89
store i32 %92, i32* @si, align 4
- %93 = load i8* @uc, align 1
+ %93 = load i8, i8* @uc, align 1
%94 = zext i8 %93 to i32
%95 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
%96 = atomicrmw or i32* %95, i32 %94 monotonic
%97 = or i32 %96, %94
store i32 %97, i32* @ui, align 4
- %98 = load i8* @uc, align 1
+ %98 = load i8, i8* @uc, align 1
%99 = zext i8 %98 to i64
%100 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
%101 = atomicrmw or i64* %100, i64 %99 monotonic
%102 = or i64 %101, %99
store i64 %102, i64* @sl, align 8
- %103 = load i8* @uc, align 1
+ %103 = load i8, i8* @uc, align 1
%104 = zext i8 %103 to i64
%105 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
%106 = atomicrmw or i64* %105, i64 %104 monotonic
%107 = or i64 %106, %104
store i64 %107, i64* @ul, align 8
- %108 = load i8* @uc, align 1
+ %108 = load i8, i8* @uc, align 1
%109 = atomicrmw xor i8* @sc, i8 %108 monotonic
%110 = xor i8 %109, %108
store i8 %110, i8* @sc, align 1
- %111 = load i8* @uc, align 1
+ %111 = load i8, i8* @uc, align 1
%112 = atomicrmw xor i8* @uc, i8 %111 monotonic
%113 = xor i8 %112, %111
store i8 %113, i8* @uc, align 1
- %114 = load i8* @uc, align 1
+ %114 = load i8, i8* @uc, align 1
%115 = zext i8 %114 to i16
%116 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
%117 = atomicrmw xor i16* %116, i16 %115 monotonic
%118 = xor i16 %117, %115
store i16 %118, i16* @ss, align 2
- %119 = load i8* @uc, align 1
+ %119 = load i8, i8* @uc, align 1
%120 = zext i8 %119 to i16
%121 = bitcast i8* bitcast (i16* @us to i8*) to i16*
%122 = atomicrmw xor i16* %121, i16 %120 monotonic
%123 = xor i16 %122, %120
store i16 %123, i16* @us, align 2
- %124 = load i8* @uc, align 1
+ %124 = load i8, i8* @uc, align 1
%125 = zext i8 %124 to i32
%126 = bitcast i8* bitcast (i32* @si to i8*) to i32*
%127 = atomicrmw xor i32* %126, i32 %125 monotonic
%128 = xor i32 %127, %125
store i32 %128, i32* @si, align 4
- %129 = load i8* @uc, align 1
+ %129 = load i8, i8* @uc, align 1
%130 = zext i8 %129 to i32
%131 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
%132 = atomicrmw xor i32* %131, i32 %130 monotonic
%133 = xor i32 %132, %130
store i32 %133, i32* @ui, align 4
- %134 = load i8* @uc, align 1
+ %134 = load i8, i8* @uc, align 1
%135 = zext i8 %134 to i64
%136 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
%137 = atomicrmw xor i64* %136, i64 %135 monotonic
%138 = xor i64 %137, %135
store i64 %138, i64* @sl, align 8
- %139 = load i8* @uc, align 1
+ %139 = load i8, i8* @uc, align 1
%140 = zext i8 %139 to i64
%141 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
%142 = atomicrmw xor i64* %141, i64 %140 monotonic
%143 = xor i64 %142, %140
store i64 %143, i64* @ul, align 8
- %144 = load i8* @uc, align 1
+ %144 = load i8, i8* @uc, align 1
%145 = atomicrmw and i8* @sc, i8 %144 monotonic
%146 = and i8 %145, %144
store i8 %146, i8* @sc, align 1
- %147 = load i8* @uc, align 1
+ %147 = load i8, i8* @uc, align 1
%148 = atomicrmw and i8* @uc, i8 %147 monotonic
%149 = and i8 %148, %147
store i8 %149, i8* @uc, align 1
- %150 = load i8* @uc, align 1
+ %150 = load i8, i8* @uc, align 1
%151 = zext i8 %150 to i16
%152 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
%153 = atomicrmw and i16* %152, i16 %151 monotonic
%154 = and i16 %153, %151
store i16 %154, i16* @ss, align 2
- %155 = load i8* @uc, align 1
+ %155 = load i8, i8* @uc, align 1
%156 = zext i8 %155 to i16
%157 = bitcast i8* bitcast (i16* @us to i8*) to i16*
%158 = atomicrmw and i16* %157, i16 %156 monotonic
%159 = and i16 %158, %156
store i16 %159, i16* @us, align 2
- %160 = load i8* @uc, align 1
+ %160 = load i8, i8* @uc, align 1
%161 = zext i8 %160 to i32
%162 = bitcast i8* bitcast (i32* @si to i8*) to i32*
%163 = atomicrmw and i32* %162, i32 %161 monotonic
%164 = and i32 %163, %161
store i32 %164, i32* @si, align 4
- %165 = load i8* @uc, align 1
+ %165 = load i8, i8* @uc, align 1
%166 = zext i8 %165 to i32
%167 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
%168 = atomicrmw and i32* %167, i32 %166 monotonic
%169 = and i32 %168, %166
store i32 %169, i32* @ui, align 4
- %170 = load i8* @uc, align 1
+ %170 = load i8, i8* @uc, align 1
%171 = zext i8 %170 to i64
%172 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
%173 = atomicrmw and i64* %172, i64 %171 monotonic
%174 = and i64 %173, %171
store i64 %174, i64* @sl, align 8
- %175 = load i8* @uc, align 1
+ %175 = load i8, i8* @uc, align 1
%176 = zext i8 %175 to i64
%177 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
%178 = atomicrmw and i64* %177, i64 %176 monotonic
%179 = and i64 %178, %176
store i64 %179, i64* @ul, align 8
- %180 = load i8* @uc, align 1
+ %180 = load i8, i8* @uc, align 1
%181 = atomicrmw nand i8* @sc, i8 %180 monotonic
%182 = xor i8 %181, -1
%183 = and i8 %182, %180
store i8 %183, i8* @sc, align 1
- %184 = load i8* @uc, align 1
+ %184 = load i8, i8* @uc, align 1
%185 = atomicrmw nand i8* @uc, i8 %184 monotonic
%186 = xor i8 %185, -1
%187 = and i8 %186, %184
store i8 %187, i8* @uc, align 1
- %188 = load i8* @uc, align 1
+ %188 = load i8, i8* @uc, align 1
%189 = zext i8 %188 to i16
%190 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
%191 = atomicrmw nand i16* %190, i16 %189 monotonic
%192 = xor i16 %191, -1
%193 = and i16 %192, %189
store i16 %193, i16* @ss, align 2
- %194 = load i8* @uc, align 1
+ %194 = load i8, i8* @uc, align 1
%195 = zext i8 %194 to i16
%196 = bitcast i8* bitcast (i16* @us to i8*) to i16*
%197 = atomicrmw nand i16* %196, i16 %195 monotonic
%198 = xor i16 %197, -1
%199 = and i16 %198, %195
store i16 %199, i16* @us, align 2
- %200 = load i8* @uc, align 1
+ %200 = load i8, i8* @uc, align 1
%201 = zext i8 %200 to i32
%202 = bitcast i8* bitcast (i32* @si to i8*) to i32*
%203 = atomicrmw nand i32* %202, i32 %201 monotonic
%204 = xor i32 %203, -1
%205 = and i32 %204, %201
store i32 %205, i32* @si, align 4
- %206 = load i8* @uc, align 1
+ %206 = load i8, i8* @uc, align 1
%207 = zext i8 %206 to i32
%208 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
%209 = atomicrmw nand i32* %208, i32 %207 monotonic
%210 = xor i32 %209, -1
%211 = and i32 %210, %207
store i32 %211, i32* @ui, align 4
- %212 = load i8* @uc, align 1
+ %212 = load i8, i8* @uc, align 1
%213 = zext i8 %212 to i64
%214 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
%215 = atomicrmw nand i64* %214, i64 %213 monotonic
%216 = xor i64 %215, -1
%217 = and i64 %216, %213
store i64 %217, i64* @sl, align 8
- %218 = load i8* @uc, align 1
+ %218 = load i8, i8* @uc, align 1
%219 = zext i8 %218 to i64
%220 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
%221 = atomicrmw nand i64* %220, i64 %219 monotonic
@@ -534,73 +534,73 @@ return: ; preds = %entry
define void @test_compare_and_swap() nounwind {
entry:
- %0 = load i8* @uc, align 1
- %1 = load i8* @sc, align 1
+ %0 = load i8, i8* @uc, align 1
+ %1 = load i8, i8* @sc, align 1
%2 = cmpxchg i8* @sc, i8 %0, i8 %1 monotonic monotonic
store i8 %2, i8* @sc, align 1
- %3 = load i8* @uc, align 1
- %4 = load i8* @sc, align 1
+ %3 = load i8, i8* @uc, align 1
+ %4 = load i8, i8* @sc, align 1
%5 = cmpxchg i8* @uc, i8 %3, i8 %4 monotonic monotonic
store i8 %5, i8* @uc, align 1
- %6 = load i8* @uc, align 1
+ %6 = load i8, i8* @uc, align 1
%7 = zext i8 %6 to i16
- %8 = load i8* @sc, align 1
+ %8 = load i8, i8* @sc, align 1
%9 = sext i8 %8 to i16
%10 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
%11 = cmpxchg i16* %10, i16 %7, i16 %9 monotonic monotonic
store i16 %11, i16* @ss, align 2
- %12 = load i8* @uc, align 1
+ %12 = load i8, i8* @uc, align 1
%13 = zext i8 %12 to i16
- %14 = load i8* @sc, align 1
+ %14 = load i8, i8* @sc, align 1
%15 = sext i8 %14 to i16
%16 = bitcast i8* bitcast (i16* @us to i8*) to i16*
%17 = cmpxchg i16* %16, i16 %13, i16 %15 monotonic monotonic
store i16 %17, i16* @us, align 2
- %18 = load i8* @uc, align 1
+ %18 = load i8, i8* @uc, align 1
%19 = zext i8 %18 to i32
- %20 = load i8* @sc, align 1
+ %20 = load i8, i8* @sc, align 1
%21 = sext i8 %20 to i32
%22 = bitcast i8* bitcast (i32* @si to i8*) to i32*
%23 = cmpxchg i32* %22, i32 %19, i32 %21 monotonic monotonic
store i32 %23, i32* @si, align 4
- %24 = load i8* @uc, align 1
+ %24 = load i8, i8* @uc, align 1
%25 = zext i8 %24 to i32
- %26 = load i8* @sc, align 1
+ %26 = load i8, i8* @sc, align 1
%27 = sext i8 %26 to i32
%28 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
%29 = cmpxchg i32* %28, i32 %25, i32 %27 monotonic monotonic
store i32 %29, i32* @ui, align 4
- %30 = load i8* @uc, align 1
+ %30 = load i8, i8* @uc, align 1
%31 = zext i8 %30 to i64
- %32 = load i8* @sc, align 1
+ %32 = load i8, i8* @sc, align 1
%33 = sext i8 %32 to i64
%34 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
%35 = cmpxchg i64* %34, i64 %31, i64 %33 monotonic monotonic
store i64 %35, i64* @sl, align 8
- %36 = load i8* @uc, align 1
+ %36 = load i8, i8* @uc, align 1
%37 = zext i8 %36 to i64
- %38 = load i8* @sc, align 1
+ %38 = load i8, i8* @sc, align 1
%39 = sext i8 %38 to i64
%40 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
%41 = cmpxchg i64* %40, i64 %37, i64 %39 monotonic monotonic
store i64 %41, i64* @ul, align 8
- %42 = load i8* @uc, align 1
- %43 = load i8* @sc, align 1
+ %42 = load i8, i8* @uc, align 1
+ %43 = load i8, i8* @sc, align 1
%44 = cmpxchg i8* @sc, i8 %42, i8 %43 monotonic monotonic
%45 = icmp eq i8 %44, %42
%46 = zext i1 %45 to i8
%47 = zext i8 %46 to i32
store i32 %47, i32* @ui, align 4
- %48 = load i8* @uc, align 1
- %49 = load i8* @sc, align 1
+ %48 = load i8, i8* @uc, align 1
+ %49 = load i8, i8* @sc, align 1
%50 = cmpxchg i8* @uc, i8 %48, i8 %49 monotonic monotonic
%51 = icmp eq i8 %50, %48
%52 = zext i1 %51 to i8
%53 = zext i8 %52 to i32
store i32 %53, i32* @ui, align 4
- %54 = load i8* @uc, align 1
+ %54 = load i8, i8* @uc, align 1
%55 = zext i8 %54 to i16
- %56 = load i8* @sc, align 1
+ %56 = load i8, i8* @sc, align 1
%57 = sext i8 %56 to i16
%58 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
%59 = cmpxchg i16* %58, i16 %55, i16 %57 monotonic monotonic
@@ -608,9 +608,9 @@ entry:
%61 = zext i1 %60 to i8
%62 = zext i8 %61 to i32
store i32 %62, i32* @ui, align 4
- %63 = load i8* @uc, align 1
+ %63 = load i8, i8* @uc, align 1
%64 = zext i8 %63 to i16
- %65 = load i8* @sc, align 1
+ %65 = load i8, i8* @sc, align 1
%66 = sext i8 %65 to i16
%67 = bitcast i8* bitcast (i16* @us to i8*) to i16*
%68 = cmpxchg i16* %67, i16 %64, i16 %66 monotonic monotonic
@@ -618,9 +618,9 @@ entry:
%70 = zext i1 %69 to i8
%71 = zext i8 %70 to i32
store i32 %71, i32* @ui, align 4
- %72 = load i8* @uc, align 1
+ %72 = load i8, i8* @uc, align 1
%73 = zext i8 %72 to i32
- %74 = load i8* @sc, align 1
+ %74 = load i8, i8* @sc, align 1
%75 = sext i8 %74 to i32
%76 = bitcast i8* bitcast (i32* @si to i8*) to i32*
%77 = cmpxchg i32* %76, i32 %73, i32 %75 monotonic monotonic
@@ -628,9 +628,9 @@ entry:
%79 = zext i1 %78 to i8
%80 = zext i8 %79 to i32
store i32 %80, i32* @ui, align 4
- %81 = load i8* @uc, align 1
+ %81 = load i8, i8* @uc, align 1
%82 = zext i8 %81 to i32
- %83 = load i8* @sc, align 1
+ %83 = load i8, i8* @sc, align 1
%84 = sext i8 %83 to i32
%85 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
%86 = cmpxchg i32* %85, i32 %82, i32 %84 monotonic monotonic
@@ -638,9 +638,9 @@ entry:
%88 = zext i1 %87 to i8
%89 = zext i8 %88 to i32
store i32 %89, i32* @ui, align 4
- %90 = load i8* @uc, align 1
+ %90 = load i8, i8* @uc, align 1
%91 = zext i8 %90 to i64
- %92 = load i8* @sc, align 1
+ %92 = load i8, i8* @sc, align 1
%93 = sext i8 %92 to i64
%94 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
%95 = cmpxchg i64* %94, i64 %91, i64 %93 monotonic monotonic
@@ -648,9 +648,9 @@ entry:
%97 = zext i1 %96 to i8
%98 = zext i8 %97 to i32
store i32 %98, i32* @ui, align 4
- %99 = load i8* @uc, align 1
+ %99 = load i8, i8* @uc, align 1
%100 = zext i8 %99 to i64
- %101 = load i8* @sc, align 1
+ %101 = load i8, i8* @sc, align 1
%102 = sext i8 %101 to i64
%103 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
%104 = cmpxchg i64* %103, i64 %100, i64 %102 monotonic monotonic
diff --git a/test/CodeGen/PowerPC/MergeConsecutiveStores.ll b/test/CodeGen/PowerPC/MergeConsecutiveStores.ll
new file mode 100644
index 000000000000..977b3b701cce
--- /dev/null
+++ b/test/CodeGen/PowerPC/MergeConsecutiveStores.ll
@@ -0,0 +1,68 @@
+; RUN: llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec < %s | FileCheck %s
+
+;; This test ensures that MergeConsecutiveStores does not attempt to
+;; merge stores or loads when doing so would result in unaligned
+;; memory operations (unless the target supports those, e.g. X86).
+
+;; This issue happen in other situations for other targets, but PPC
+;; with Altivec extensions was chosen for the test because it does not
+;; support unaligned access with AltiVec instructions. If the 4
+;; load/stores get merged to an v4i32 vector type severely bad code
+;; gets generated: it painstakingly copies the values to a temporary
+;; location on the stack, with vector ops, in order to then use
+;; integer ops to load from the temporary stack location and store to
+;; the final location. Yuck!
+
+%struct.X = type { i32, i32, i32, i32 }
+
+@fx = common global %struct.X zeroinitializer, align 4
+@fy = common global %struct.X zeroinitializer, align 4
+
+;; In this test case, lvx and stvx instructions should NOT be
+;; generated, as the alignment is not sufficient for it to be
+;; worthwhile.
+
+;; CHECK-LABEL: f:
+;; CHECK: lwzu
+;; CHECK-NEXT: lwz
+;; CHECK-NEXT: lwz
+;; CHECK-NEXT: lwz
+;; CHECK-NEXT: stwu
+;; CHECK-NEXT: stw
+;; CHECK-NEXT: stw
+;; CHECK-NEXT: stw
+;; CHECK-NEXT: blr
+define void @f() {
+entry:
+ %0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 4
+ %1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4
+ %2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4
+ %3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4
+ store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 4
+ store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4
+ store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4
+ store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4
+ ret void
+}
+
+@gx = common global %struct.X zeroinitializer, align 16
+@gy = common global %struct.X zeroinitializer, align 16
+
+;; In this test, lvx and stvx instructions SHOULD be generated, as
+;; the 16-byte alignment of the new load/store is acceptable.
+;; CHECK-LABEL: g:
+;; CHECK: lvx
+;; CHECK: stvx
+;; CHECK: blr
+define void @g() {
+entry:
+ %0 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 0), align 16
+ %1 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 1), align 4
+ %2 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 2), align 4
+ %3 = load i32, i32* getelementptr inbounds (%struct.X, %struct.X* @fx, i32 0, i32 3), align 4
+ store i32 %0, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 0), align 16
+ store i32 %1, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 1), align 4
+ store i32 %2, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 2), align 4
+ store i32 %3, i32* getelementptr inbounds (%struct.X, %struct.X* @fy, i32 0, i32 3), align 4
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/a2-fp-basic.ll b/test/CodeGen/PowerPC/a2-fp-basic.ll
index de3aa7c31766..0324e38e1691 100644
--- a/test/CodeGen/PowerPC/a2-fp-basic.ll
+++ b/test/CodeGen/PowerPC/a2-fp-basic.ll
@@ -4,28 +4,28 @@
define void @maybe_an_fma(%0* sret %agg.result, %0* byval %a, %0* byval %b, %0* byval %c) nounwind {
entry:
- %a.realp = getelementptr inbounds %0* %a, i32 0, i32 0
- %a.real = load double* %a.realp
- %a.imagp = getelementptr inbounds %0* %a, i32 0, i32 1
- %a.imag = load double* %a.imagp
- %b.realp = getelementptr inbounds %0* %b, i32 0, i32 0
- %b.real = load double* %b.realp
- %b.imagp = getelementptr inbounds %0* %b, i32 0, i32 1
- %b.imag = load double* %b.imagp
+ %a.realp = getelementptr inbounds %0, %0* %a, i32 0, i32 0
+ %a.real = load double, double* %a.realp
+ %a.imagp = getelementptr inbounds %0, %0* %a, i32 0, i32 1
+ %a.imag = load double, double* %a.imagp
+ %b.realp = getelementptr inbounds %0, %0* %b, i32 0, i32 0
+ %b.real = load double, double* %b.realp
+ %b.imagp = getelementptr inbounds %0, %0* %b, i32 0, i32 1
+ %b.imag = load double, double* %b.imagp
%mul.rl = fmul double %a.real, %b.real
%mul.rr = fmul double %a.imag, %b.imag
%mul.r = fsub double %mul.rl, %mul.rr
%mul.il = fmul double %a.imag, %b.real
%mul.ir = fmul double %a.real, %b.imag
%mul.i = fadd double %mul.il, %mul.ir
- %c.realp = getelementptr inbounds %0* %c, i32 0, i32 0
- %c.real = load double* %c.realp
- %c.imagp = getelementptr inbounds %0* %c, i32 0, i32 1
- %c.imag = load double* %c.imagp
+ %c.realp = getelementptr inbounds %0, %0* %c, i32 0, i32 0
+ %c.real = load double, double* %c.realp
+ %c.imagp = getelementptr inbounds %0, %0* %c, i32 0, i32 1
+ %c.imag = load double, double* %c.imagp
%add.r = fadd double %mul.r, %c.real
%add.i = fadd double %mul.i, %c.imag
- %real = getelementptr inbounds %0* %agg.result, i32 0, i32 0
- %imag = getelementptr inbounds %0* %agg.result, i32 0, i32 1
+ %real = getelementptr inbounds %0, %0* %agg.result, i32 0, i32 0
+ %imag = getelementptr inbounds %0, %0* %agg.result, i32 0, i32 1
store double %add.r, double* %real
store double %add.i, double* %imag
ret void
diff --git a/test/CodeGen/PowerPC/add-fi.ll b/test/CodeGen/PowerPC/add-fi.ll
index 18892c8cdf5e..010602e516a4 100644
--- a/test/CodeGen/PowerPC/add-fi.ll
+++ b/test/CodeGen/PowerPC/add-fi.ll
@@ -4,7 +4,7 @@ target triple = "powerpc64-unknown-linux-gnu"
define i32* @test1() {
%X = alloca { i32, i32 }
- %Y = getelementptr {i32,i32}* %X, i32 0, i32 1
+ %Y = getelementptr {i32,i32}, {i32,i32}* %X, i32 0, i32 1
ret i32* %Y
; CHECK-LABEL: @test1
@@ -14,7 +14,7 @@ define i32* @test1() {
define i32* @test2() {
%X = alloca { i32, i32, i32, i32 }
- %Y = getelementptr {i32,i32,i32,i32}* %X, i32 0, i32 3
+ %Y = getelementptr {i32,i32,i32,i32}, {i32,i32,i32,i32}* %X, i32 0, i32 3
ret i32* %Y
; CHECK-LABEL: @test2
diff --git a/test/CodeGen/PowerPC/addi-licm.ll b/test/CodeGen/PowerPC/addi-licm.ll
index b52cb678a969..b6cfeec0e6f7 100644
--- a/test/CodeGen/PowerPC/addi-licm.ll
+++ b/test/CodeGen/PowerPC/addi-licm.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -disable-ppc-preinc-prep < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s -check-prefix=PIP
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@@ -21,14 +22,22 @@ entry:
; CHECK-DAG: lfsx {{[0-9]+}}, [[REG2]],
; CHECK: blr
+; PIP-LABEL: @foo
+; PIP: addi [[REG1:[0-9]+]], 1,
+; PIP: addi [[REG2:[0-9]+]], 1,
+; PIP: %for.body.i
+; PIP-DAG: lfsu {{[0-9]+}}, 4([[REG1]])
+; PIP-DAG: lfsu {{[0-9]+}}, 4([[REG2]])
+; PIP: blr
+
for.body.i: ; preds = %for.body.i.preheader, %for.body.i
%accumulator.09.i = phi double [ %add.i, %for.body.i ], [ 0.000000e+00, %entry ]
%i.08.i = phi i64 [ %inc.i, %for.body.i ], [ 0, %entry ]
- %arrayidx.i = getelementptr inbounds [2048 x float]* %x, i64 0, i64 %i.08.i
- %v14 = load float* %arrayidx.i, align 4
+ %arrayidx.i = getelementptr inbounds [2048 x float], [2048 x float]* %x, i64 0, i64 %i.08.i
+ %v14 = load float, float* %arrayidx.i, align 4
%conv.i = fpext float %v14 to double
- %arrayidx1.i = getelementptr inbounds [2048 x float]* %y, i64 0, i64 %i.08.i
- %v15 = load float* %arrayidx1.i, align 4
+ %arrayidx1.i = getelementptr inbounds [2048 x float], [2048 x float]* %y, i64 0, i64 %i.08.i
+ %v15 = load float, float* %arrayidx1.i, align 4
%conv2.i = fpext float %v15 to double
%mul.i = fmul double %conv.i, %conv2.i
%add.i = fadd double %accumulator.09.i, %mul.i
diff --git a/test/CodeGen/PowerPC/addi-reassoc.ll b/test/CodeGen/PowerPC/addi-reassoc.ll
index 2b71ce65f6bc..3624ce638c7d 100644
--- a/test/CodeGen/PowerPC/addi-reassoc.ll
+++ b/test/CodeGen/PowerPC/addi-reassoc.ll
@@ -4,15 +4,15 @@
define i32 @test1([4 x i32]* %P, i32 %i) {
%tmp.2 = add i32 %i, 2 ; <i32> [#uses=1]
- %tmp.4 = getelementptr [4 x i32]* %P, i32 %tmp.2, i32 1 ; <i32*> [#uses=1]
- %tmp.5 = load i32* %tmp.4 ; <i32> [#uses=1]
+ %tmp.4 = getelementptr [4 x i32], [4 x i32]* %P, i32 %tmp.2, i32 1 ; <i32*> [#uses=1]
+ %tmp.5 = load i32, i32* %tmp.4 ; <i32> [#uses=1]
ret i32 %tmp.5
}
define i32 @test2(%struct.X* %P, i32 %i) {
%tmp.2 = add i32 %i, 2 ; <i32> [#uses=1]
- %tmp.5 = getelementptr %struct.X* %P, i32 %tmp.2, i32 0, i32 1 ; <i8*> [#uses=1]
- %tmp.6 = load i8* %tmp.5 ; <i8> [#uses=1]
+ %tmp.5 = getelementptr %struct.X, %struct.X* %P, i32 %tmp.2, i32 0, i32 1 ; <i8*> [#uses=1]
+ %tmp.6 = load i8, i8* %tmp.5 ; <i8> [#uses=1]
%tmp.7 = sext i8 %tmp.6 to i32 ; <i32> [#uses=1]
ret i32 %tmp.7
}
diff --git a/test/CodeGen/PowerPC/alias.ll b/test/CodeGen/PowerPC/alias.ll
index 86e41148a0d7..524abd5da3ef 100644
--- a/test/CodeGen/PowerPC/alias.ll
+++ b/test/CodeGen/PowerPC/alias.ll
@@ -10,8 +10,8 @@
; CHECK-LABEL: bar:
define i32 @bar() {
; MEDIUM: addis 3, 2, fooa@toc@ha
-; LARGE: addis 3, 2, .LC1@toc@ha
- %a = load i32* @fooa
+; LARGE: addis 3, 2, .L[[L0:.*]]@toc@ha
+ %a = load i32, i32* @fooa
ret i32 %a
}
@@ -19,13 +19,13 @@ define i32 @bar() {
define i64 @bar2() {
; MEDIUM: addis 3, 2, foo2a@toc@ha
; MEDIUM: addi 3, 3, foo2a@toc@l
-; LARGE: addis 3, 2, .LC3@toc@ha
- %a = load i64* @foo2a
+; LARGE: addis 3, 2, .L[[L1:.*]]@toc@ha
+ %a = load i64, i64* @foo2a
ret i64 %a
}
-; LARGE: .LC1:
+; LARGE: .L[[L0]]:
; LARGE-NEXT: .tc fooa[TC],fooa
-; LARGE: .LC3:
+; LARGE: .L[[L1]]:
; LARGE-NEXT: .tc foo2a[TC],foo2a
diff --git a/test/CodeGen/PowerPC/and-branch.ll b/test/CodeGen/PowerPC/and-branch.ll
index 0484f882ec72..1543205f1a58 100644
--- a/test/CodeGen/PowerPC/and-branch.ll
+++ b/test/CodeGen/PowerPC/and-branch.ll
@@ -7,7 +7,7 @@ entry:
%tmp4 = and i1 %tmp3, %tmp ; <i1> [#uses=1]
br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock
cond_true: ; preds = %entry
- %tmp5 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp5 = tail call i32 (...) @bar( ) ; <i32> [#uses=0]
ret void
UnifiedReturnBlock: ; preds = %entry
ret void
diff --git a/test/CodeGen/PowerPC/and-elim.ll b/test/CodeGen/PowerPC/and-elim.ll
index a1ec29b16f14..f1738b2c1517 100644
--- a/test/CodeGen/PowerPC/and-elim.ll
+++ b/test/CodeGen/PowerPC/and-elim.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=ppc32 | not grep rlwin
define void @test(i8* %P) {
- %W = load i8* %P
+ %W = load i8, i8* %P
%X = shl i8 %W, 1
%Y = add i8 %X, 2
%Z = and i8 %Y, 254 ; dead and
diff --git a/test/CodeGen/PowerPC/anon_aggr.ll b/test/CodeGen/PowerPC/anon_aggr.ll
index 6c4f140de127..9f9eed019c15 100644
--- a/test/CodeGen/PowerPC/anon_aggr.ll
+++ b/test/CodeGen/PowerPC/anon_aggr.ll
@@ -21,7 +21,7 @@ unequal:
}
; CHECK-LABEL: func1:
-; CHECK: cmpld {{[0-9]+}}, 4, 5
+; CHECK: cmpld {{([0-9]+,)?}}4, 5
; CHECK-DAG: std 4, -[[OFFSET1:[0-9]+]]
; CHECK-DAG: std 5, -[[OFFSET2:[0-9]+]]
; CHECK: ld 3, -[[OFFSET1]](1)
@@ -31,7 +31,7 @@ unequal:
; DARWIN32: mr
; DARWIN32: mr r[[REG1:[0-9]+]], r[[REGA:[0-9]+]]
; DARWIN32: mr r[[REG2:[0-9]+]], r[[REGB:[0-9]+]]
-; DARWIN32: cmplw cr{{[0-9]+}}, r[[REGA]], r[[REGB]]
+; DARWIN32: cmplw {{(cr[0-9]+,)?}}r[[REGA]], r[[REGB]]
; DARWIN32: stw r[[REG1]], -[[OFFSET1:[0-9]+]]
; DARWIN32: stw r[[REG2]], -[[OFFSET2:[0-9]+]]
; DARWIN32: lwz r3, -[[OFFSET1]]
@@ -41,7 +41,7 @@ unequal:
; DARWIN64: mr
; DARWIN64: mr r[[REG1:[0-9]+]], r[[REGA:[0-9]+]]
; DARWIN64: mr r[[REG2:[0-9]+]], r[[REGB:[0-9]+]]
-; DARWIN64: cmpld cr{{[0-9]+}}, r[[REGA]], r[[REGB]]
+; DARWIN64: cmpld {{(cr[0-9]+,)?}}r[[REGA]], r[[REGB]]
; DARWIN64: std r[[REG1]], -[[OFFSET1:[0-9]+]]
; DARWIN64: std r[[REG2]], -[[OFFSET2:[0-9]+]]
; DARWIN64: ld r3, -[[OFFSET1]]
@@ -51,8 +51,8 @@ unequal:
define i8* @func2({ i64, i8* } %array1, %tarray* byval %array2) {
entry:
%array1_ptr = extractvalue {i64, i8* } %array1, 1
- %tmp = getelementptr inbounds %tarray* %array2, i32 0, i32 1
- %array2_ptr = load i8** %tmp
+ %tmp = getelementptr inbounds %tarray, %tarray* %array2, i32 0, i32 1
+ %array2_ptr = load i8*, i8** %tmp
%cond = icmp eq i8* %array1_ptr, %array2_ptr
br i1 %cond, label %equal, label %unequal
equal:
@@ -63,7 +63,7 @@ unequal:
; CHECK-LABEL: func2:
; CHECK: ld [[REG2:[0-9]+]], 72(1)
-; CHECK: cmpld {{[0-9]+}}, 4, [[REG2]]
+; CHECK: cmpld {{([0-9]+,)?}}4, [[REG2]]
; CHECK-DAG: std [[REG2]], -[[OFFSET1:[0-9]+]]
; CHECK-DAG: std 4, -[[OFFSET2:[0-9]+]]
; CHECK: ld 3, -[[OFFSET2]](1)
@@ -74,7 +74,7 @@ unequal:
; DARWIN32: lwz r[[REG2:[0-9]+]], 44(r[[REGSP]])
; DARWIN32: mr
; DARWIN32: mr r[[REG3:[0-9]+]], r[[REGA:[0-9]+]]
-; DARWIN32: cmplw cr{{[0-9]+}}, r[[REGA]], r[[REG2]]
+; DARWIN32: cmplw {{(cr[0-9]+,)?}}r[[REGA]], r[[REG2]]
; DARWIN32: stw r[[REG3]], -[[OFFSET1:[0-9]+]]
; DARWIN32: stw r[[REG2]], -[[OFFSET2:[0-9]+]]
; DARWIN32: lwz r3, -[[OFFSET1]]
@@ -84,7 +84,7 @@ unequal:
; DARWIN64: ld r[[REG2:[0-9]+]], 72(r1)
; DARWIN64: mr
; DARWIN64: mr r[[REG3:[0-9]+]], r[[REGA:[0-9]+]]
-; DARWIN64: cmpld cr{{[0-9]+}}, r[[REGA]], r[[REG2]]
+; DARWIN64: cmpld {{(cr[0-9]+,)?}}r[[REGA]], r[[REG2]]
; DARWIN64: std r[[REG3]], -[[OFFSET1:[0-9]+]]
; DARWIN64: std r[[REG2]], -[[OFFSET2:[0-9]+]]
; DARWIN64: ld r3, -[[OFFSET1]]
@@ -93,10 +93,10 @@ unequal:
define i8* @func3({ i64, i8* }* byval %array1, %tarray* byval %array2) {
entry:
- %tmp1 = getelementptr inbounds { i64, i8* }* %array1, i32 0, i32 1
- %array1_ptr = load i8** %tmp1
- %tmp2 = getelementptr inbounds %tarray* %array2, i32 0, i32 1
- %array2_ptr = load i8** %tmp2
+ %tmp1 = getelementptr inbounds { i64, i8* }, { i64, i8* }* %array1, i32 0, i32 1
+ %array1_ptr = load i8*, i8** %tmp1
+ %tmp2 = getelementptr inbounds %tarray, %tarray* %array2, i32 0, i32 1
+ %array2_ptr = load i8*, i8** %tmp2
%cond = icmp eq i8* %array1_ptr, %array2_ptr
br i1 %cond, label %equal, label %unequal
equal:
@@ -108,7 +108,7 @@ unequal:
; CHECK-LABEL: func3:
; CHECK: ld [[REG3:[0-9]+]], 72(1)
; CHECK: ld [[REG4:[0-9]+]], 56(1)
-; CHECK: cmpld {{[0-9]+}}, [[REG4]], [[REG3]]
+; CHECK: cmpld {{([0-9]+,)?}}[[REG4]], [[REG3]]
; CHECK: std [[REG3]], -[[OFFSET1:[0-9]+]](1)
; CHECK: std [[REG4]], -[[OFFSET2:[0-9]+]](1)
; CHECK: ld 3, -[[OFFSET2]](1)
@@ -119,7 +119,7 @@ unequal:
; DARWIN32: addi r[[REG2:[0-9]+]], r[[REGSP]], 24
; DARWIN32: lwz r[[REG3:[0-9]+]], 44(r[[REGSP]])
; DARWIN32: lwz r[[REG4:[0-9]+]], 32(r[[REGSP]])
-; DARWIN32: cmplw cr{{[0-9]+}}, r[[REG4]], r[[REG3]]
+; DARWIN32: cmplw {{(cr[0-9]+,)?}}r[[REG4]], r[[REG3]]
; DARWIN32: stw r[[REG3]], -[[OFFSET1:[0-9]+]]
; DARWIN32: stw r[[REG4]], -[[OFFSET2:[0-9]+]]
; DARWIN32: lwz r3, -[[OFFSET2]]
@@ -128,7 +128,7 @@ unequal:
; DARWIN64: _func3:
; DARWIN64: ld r[[REG3:[0-9]+]], 72(r1)
; DARWIN64: ld r[[REG4:[0-9]+]], 56(r1)
-; DARWIN64: cmpld cr{{[0-9]+}}, r[[REG4]], r[[REG3]]
+; DARWIN64: cmpld {{(cr[0-9]+,)?}}r[[REG4]], r[[REG3]]
; DARWIN64: std r[[REG3]], -[[OFFSET1:[0-9]+]]
; DARWIN64: std r[[REG4]], -[[OFFSET2:[0-9]+]]
; DARWIN64: ld r3, -[[OFFSET2]]
@@ -140,8 +140,8 @@ define i8* @func4(i64 %p1, i64 %p2, i64 %p3, i64 %p4,
{ i64, i8* } %array1, %tarray* byval %array2) {
entry:
%array1_ptr = extractvalue {i64, i8* } %array1, 1
- %tmp = getelementptr inbounds %tarray* %array2, i32 0, i32 1
- %array2_ptr = load i8** %tmp
+ %tmp = getelementptr inbounds %tarray, %tarray* %array2, i32 0, i32 1
+ %array2_ptr = load i8*, i8** %tmp
%cond = icmp eq i8* %array1_ptr, %array2_ptr
br i1 %cond, label %equal, label %unequal
equal:
@@ -153,7 +153,7 @@ unequal:
; CHECK-LABEL: func4:
; CHECK: ld [[REG3:[0-9]+]], 136(1)
; CHECK: ld [[REG2:[0-9]+]], 120(1)
-; CHECK: cmpld {{[0-9]+}}, [[REG2]], [[REG3]]
+; CHECK: cmpld {{([0-9]+,)?}}[[REG2]], [[REG3]]
; CHECK: std [[REG3]], -[[OFFSET2:[0-9]+]](1)
; CHECK: std [[REG2]], -[[OFFSET1:[0-9]+]](1)
; CHECK: ld 3, -[[OFFSET1]](1)
@@ -164,8 +164,8 @@ unequal:
; DARWIN32: addi r[[REG1:[0-9]+]], r1, 100
; DARWIN32: lwz r[[REG3:[0-9]+]], 108(r1)
; DARWIN32: mr r[[REG2:[0-9]+]], r[[REG4]]
-; DARWIN32: cmplw cr{{[0-9]+}}, r[[REG4]], r[[REG3]]
-; DARWIN32: stw r[[REG4]], -[[OFFSET1:[0-9]+]]
+; DARWIN32: cmplw {{(cr[0-9]+,)?}}r[[REG4]], r[[REG3]]
+; DARWIN32: stw r[[REG2]], -[[OFFSET1:[0-9]+]]
; DARWIN32: stw r[[REG3]], -[[OFFSET2:[0-9]+]]
; DARWIN32: lwz r[[REG1]], -[[OFFSET1]]
; DARWIN32: lwz r[[REG1]], -[[OFFSET2]]
@@ -174,7 +174,7 @@ unequal:
; DARWIN64: ld r[[REG2:[0-9]+]], 120(r1)
; DARWIN64: ld r[[REG3:[0-9]+]], 136(r1)
; DARWIN64: mr r[[REG4:[0-9]+]], r[[REG2]]
-; DARWIN64: cmpld cr{{[0-9]+}}, r[[REG2]], r[[REG3]]
+; DARWIN64: cmpld {{(cr[0-9]+,)?}}r[[REG2]], r[[REG3]]
; DARWIN64: std r[[REG4]], -[[OFFSET1:[0-9]+]]
; DARWIN64: std r[[REG3]], -[[OFFSET2:[0-9]+]]
; DARWIN64: ld r3, -[[OFFSET1]]
diff --git a/test/CodeGen/PowerPC/asm-constraints.ll b/test/CodeGen/PowerPC/asm-constraints.ll
index 9bf8b75e0ace..2d9b0eb591d3 100644
--- a/test/CodeGen/PowerPC/asm-constraints.ll
+++ b/test/CodeGen/PowerPC/asm-constraints.ll
@@ -23,7 +23,7 @@ entry:
%addr.addr = alloca i8*, align 8
store i32 %result, i32* %result.addr, align 4
store i8* %addr, i8** %addr.addr, align 8
- %0 = load i8** %addr.addr, align 8
+ %0 = load i8*, i8** %addr.addr, align 8
%1 = call i32 asm sideeffect "ld${1:U}${1:X} $0,$1\0Acmpw $0,$0\0Abne- 1f\0A1: isync\0A", "=r,*m,~{memory},~{cr0}"(i8* %0) #1, !srcloc !1
store i32 %1, i32* %result.addr, align 4
ret void
@@ -31,7 +31,7 @@ entry:
; CHECK-LABEL: @foo
; CHECK: ld [[REG:[0-9]+]], 0(4)
-; CHECK: cmpw 0, [[REG]], [[REG]]
+; CHECK: cmpw [[REG]], [[REG]]
; CHECK: bne- 0, .Ltmp[[TMP:[0-9]+]]
; CHECK: .Ltmp[[TMP]]:
; CHECK: isync
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
index 9cb0fa5be5c9..1857d5d697e6 100644
--- a/test/CodeGen/PowerPC/atomic-2.ll
+++ b/test/CodeGen/PowerPC/atomic-2.ll
@@ -1,4 +1,6 @@
; RUN: llc < %s -march=ppc64 | FileCheck %s
+; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -march=ppc64 -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-P8U
define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {
; CHECK-LABEL: exchange_and_add:
@@ -8,6 +10,22 @@ define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {
ret i64 %tmp
}
+define i8 @exchange_and_add8(i8* %mem, i8 %val) nounwind {
+; CHECK-LABEL: exchange_and_add8:
+; CHECK-P8U: lbarx
+ %tmp = atomicrmw add i8* %mem, i8 %val monotonic
+; CHECK-P8U: stbcx.
+ ret i8 %tmp
+}
+
+define i16 @exchange_and_add16(i16* %mem, i16 %val) nounwind {
+; CHECK-LABEL: exchange_and_add16:
+; CHECK-P8U: lharx
+ %tmp = atomicrmw add i16* %mem, i16 %val monotonic
+; CHECK-P8U: sthcx.
+ ret i16 %tmp
+}
+
define i64 @exchange_and_cmp(i64* %mem) nounwind {
; CHECK-LABEL: exchange_and_cmp:
; CHECK: ldarx
@@ -18,6 +36,26 @@ define i64 @exchange_and_cmp(i64* %mem) nounwind {
ret i64 %tmp
}
+define i8 @exchange_and_cmp8(i8* %mem) nounwind {
+; CHECK-LABEL: exchange_and_cmp8:
+; CHECK-P8U: lbarx
+ %tmppair = cmpxchg i8* %mem, i8 0, i8 1 monotonic monotonic
+ %tmp = extractvalue { i8, i1 } %tmppair, 0
+; CHECK-P8U: stbcx.
+; CHECK-P8U: stbcx.
+ ret i8 %tmp
+}
+
+define i16 @exchange_and_cmp16(i16* %mem) nounwind {
+; CHECK-LABEL: exchange_and_cmp16:
+; CHECK-P8U: lharx
+ %tmppair = cmpxchg i16* %mem, i16 0, i16 1 monotonic monotonic
+ %tmp = extractvalue { i16, i1 } %tmppair, 0
+; CHECK-P8U: sthcx.
+; CHECK-P8U: sthcx.
+ ret i16 %tmp
+}
+
define i64 @exchange(i64* %mem, i64 %val) nounwind {
; CHECK-LABEL: exchange:
; CHECK: ldarx
@@ -26,11 +64,27 @@ define i64 @exchange(i64* %mem, i64 %val) nounwind {
ret i64 %tmp
}
+define i8 @exchange8(i8* %mem, i8 %val) nounwind {
+; CHECK-LABEL: exchange8:
+; CHECK-P8U: lbarx
+ %tmp = atomicrmw xchg i8* %mem, i8 1 monotonic
+; CHECK-P8U: stbcx.
+ ret i8 %tmp
+}
+
+define i16 @exchange16(i16* %mem, i16 %val) nounwind {
+; CHECK-LABEL: exchange16:
+; CHECK-P8U: lharx
+ %tmp = atomicrmw xchg i16* %mem, i16 1 monotonic
+; CHECK-P8U: sthcx.
+ ret i16 %tmp
+}
+
define void @atomic_store(i64* %mem, i64 %val) nounwind {
entry:
; CHECK: @atomic_store
store atomic i64 %val, i64* %mem release, align 64
-; CHECK: sync 1
+; CHECK: lwsync
; CHECK-NOT: stdcx
; CHECK: std
ret void
@@ -39,10 +93,10 @@ entry:
define i64 @atomic_load(i64* %mem) nounwind {
entry:
; CHECK: @atomic_load
- %tmp = load atomic i64* %mem acquire, align 64
+ %tmp = load atomic i64, i64* %mem acquire, align 64
; CHECK-NOT: ldarx
; CHECK: ld
-; CHECK: sync 1
+; CHECK: lwsync
ret i64 %tmp
}
diff --git a/test/CodeGen/PowerPC/atomics-fences.ll b/test/CodeGen/PowerPC/atomics-fences.ll
index 862bd173fdaf..c015fa6eefb0 100644
--- a/test/CodeGen/PowerPC/atomics-fences.ll
+++ b/test/CodeGen/PowerPC/atomics-fences.ll
@@ -5,24 +5,23 @@
; Fences
define void @fence_acquire() {
; CHECK-LABEL: fence_acquire
-; CHECK: sync 1
-; PPC440-NOT: sync 1
+; CHECK: lwsync
+; PPC440-NOT: lwsync
; PPC440: msync
fence acquire
ret void
}
define void @fence_release() {
; CHECK-LABEL: fence_release
-; CHECK: sync 1
-; PPC440-NOT: sync 1
+; CHECK: lwsync
+; PPC440-NOT: lwsync
; PPC440: msync
fence release
ret void
}
define void @fence_seq_cst() {
; CHECK-LABEL: fence_seq_cst
-; CHECK: sync 0
-; PPC440-NOT: sync 0
+; CHECK: sync
; PPC440: msync
fence seq_cst
ret void
diff --git a/test/CodeGen/PowerPC/atomics-indexed.ll b/test/CodeGen/PowerPC/atomics-indexed.ll
index bb9ca0401966..7a0dde034d68 100644
--- a/test/CodeGen/PowerPC/atomics-indexed.ll
+++ b/test/CodeGen/PowerPC/atomics-indexed.ll
@@ -9,27 +9,27 @@
; Indexed version of loads
define i8 @load_x_i8_seq_cst([100000 x i8]* %mem) {
; CHECK-LABEL: load_x_i8_seq_cst
-; CHECK: sync 0
+; CHECK: sync
; CHECK: lbzx
-; CHECK: sync 1
- %ptr = getelementptr inbounds [100000 x i8]* %mem, i64 0, i64 90000
- %val = load atomic i8* %ptr seq_cst, align 1
+; CHECK: lwsync
+ %ptr = getelementptr inbounds [100000 x i8], [100000 x i8]* %mem, i64 0, i64 90000
+ %val = load atomic i8, i8* %ptr seq_cst, align 1
ret i8 %val
}
define i16 @load_x_i16_acquire([100000 x i16]* %mem) {
; CHECK-LABEL: load_x_i16_acquire
; CHECK: lhzx
-; CHECK: sync 1
- %ptr = getelementptr inbounds [100000 x i16]* %mem, i64 0, i64 90000
- %val = load atomic i16* %ptr acquire, align 2
+; CHECK: lwsync
+ %ptr = getelementptr inbounds [100000 x i16], [100000 x i16]* %mem, i64 0, i64 90000
+ %val = load atomic i16, i16* %ptr acquire, align 2
ret i16 %val
}
define i32 @load_x_i32_monotonic([100000 x i32]* %mem) {
; CHECK-LABEL: load_x_i32_monotonic
; CHECK: lwzx
; CHECK-NOT: sync
- %ptr = getelementptr inbounds [100000 x i32]* %mem, i64 0, i64 90000
- %val = load atomic i32* %ptr monotonic, align 4
+ %ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %mem, i64 0, i64 90000
+ %val = load atomic i32, i32* %ptr monotonic, align 4
ret i32 %val
}
define i64 @load_x_i64_unordered([100000 x i64]* %mem) {
@@ -38,25 +38,25 @@ define i64 @load_x_i64_unordered([100000 x i64]* %mem) {
; PPC64-NOT: __sync_
; PPC64: ldx
; CHECK-NOT: sync
- %ptr = getelementptr inbounds [100000 x i64]* %mem, i64 0, i64 90000
- %val = load atomic i64* %ptr unordered, align 8
+ %ptr = getelementptr inbounds [100000 x i64], [100000 x i64]* %mem, i64 0, i64 90000
+ %val = load atomic i64, i64* %ptr unordered, align 8
ret i64 %val
}
; Indexed version of stores
define void @store_x_i8_seq_cst([100000 x i8]* %mem) {
; CHECK-LABEL: store_x_i8_seq_cst
-; CHECK: sync 0
+; CHECK: sync
; CHECK: stbx
- %ptr = getelementptr inbounds [100000 x i8]* %mem, i64 0, i64 90000
+ %ptr = getelementptr inbounds [100000 x i8], [100000 x i8]* %mem, i64 0, i64 90000
store atomic i8 42, i8* %ptr seq_cst, align 1
ret void
}
define void @store_x_i16_release([100000 x i16]* %mem) {
; CHECK-LABEL: store_x_i16_release
-; CHECK: sync 1
+; CHECK: lwsync
; CHECK: sthx
- %ptr = getelementptr inbounds [100000 x i16]* %mem, i64 0, i64 90000
+ %ptr = getelementptr inbounds [100000 x i16], [100000 x i16]* %mem, i64 0, i64 90000
store atomic i16 42, i16* %ptr release, align 2
ret void
}
@@ -64,18 +64,17 @@ define void @store_x_i32_monotonic([100000 x i32]* %mem) {
; CHECK-LABEL: store_x_i32_monotonic
; CHECK-NOT: sync
; CHECK: stwx
- %ptr = getelementptr inbounds [100000 x i32]* %mem, i64 0, i64 90000
+ %ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %mem, i64 0, i64 90000
store atomic i32 42, i32* %ptr monotonic, align 4
ret void
}
define void @store_x_i64_unordered([100000 x i64]* %mem) {
; CHECK-LABEL: store_x_i64_unordered
-; CHECK-NOT: sync 0
-; CHECK-NOT: sync 1
+; CHECK-NOT: sync
; PPC32: __sync_
; PPC64-NOT: __sync_
; PPC64: stdx
- %ptr = getelementptr inbounds [100000 x i64]* %mem, i64 0, i64 90000
+ %ptr = getelementptr inbounds [100000 x i64], [100000 x i64]* %mem, i64 0, i64 90000
store atomic i64 42, i64* %ptr unordered, align 8
ret void
}
diff --git a/test/CodeGen/PowerPC/atomics.ll b/test/CodeGen/PowerPC/atomics.ll
index 5f6a6a4dcdf1..2e1eff0f634d 100644
--- a/test/CodeGen/PowerPC/atomics.ll
+++ b/test/CodeGen/PowerPC/atomics.ll
@@ -13,31 +13,31 @@ define i8 @load_i8_unordered(i8* %mem) {
; CHECK-LABEL: load_i8_unordered
; CHECK: lbz
; CHECK-NOT: sync
- %val = load atomic i8* %mem unordered, align 1
+ %val = load atomic i8, i8* %mem unordered, align 1
ret i8 %val
}
define i16 @load_i16_monotonic(i16* %mem) {
; CHECK-LABEL: load_i16_monotonic
; CHECK: lhz
; CHECK-NOT: sync
- %val = load atomic i16* %mem monotonic, align 2
+ %val = load atomic i16, i16* %mem monotonic, align 2
ret i16 %val
}
define i32 @load_i32_acquire(i32* %mem) {
; CHECK-LABEL: load_i32_acquire
; CHECK: lwz
- %val = load atomic i32* %mem acquire, align 4
-; CHECK: sync 1
+ %val = load atomic i32, i32* %mem acquire, align 4
+; CHECK: lwsync
ret i32 %val
}
define i64 @load_i64_seq_cst(i64* %mem) {
; CHECK-LABEL: load_i64_seq_cst
-; CHECK: sync 0
+; CHECK: sync
; PPC32: __sync_
; PPC64-NOT: __sync_
; PPC64: ld
- %val = load atomic i64* %mem seq_cst, align 8
-; CHECK: sync 1
+ %val = load atomic i64, i64* %mem seq_cst, align 8
+; CHECK: lwsync
ret i64 %val
}
@@ -58,14 +58,14 @@ define void @store_i16_monotonic(i16* %mem) {
}
define void @store_i32_release(i32* %mem) {
; CHECK-LABEL: store_i32_release
-; CHECK: sync 1
+; CHECK: lwsync
; CHECK: stw
store atomic i32 42, i32* %mem release, align 4
ret void
}
define void @store_i64_seq_cst(i64* %mem) {
; CHECK-LABEL: store_i64_seq_cst
-; CHECK: sync 0
+; CHECK: sync
; PPC32: __sync_
; PPC64-NOT: __sync_
; PPC64: std
@@ -76,9 +76,9 @@ define void @store_i64_seq_cst(i64* %mem) {
; Atomic CmpXchg
define i8 @cas_strong_i8_sc_sc(i8* %mem) {
; CHECK-LABEL: cas_strong_i8_sc_sc
-; CHECK: sync 0
+; CHECK: sync
%val = cmpxchg i8* %mem, i8 0, i8 1 seq_cst seq_cst
-; CHECK: sync 1
+; CHECK: lwsync
%loaded = extractvalue { i8, i1} %val, 0
ret i8 %loaded
}
@@ -86,21 +86,21 @@ define i16 @cas_weak_i16_acquire_acquire(i16* %mem) {
; CHECK-LABEL: cas_weak_i16_acquire_acquire
;CHECK-NOT: sync
%val = cmpxchg weak i16* %mem, i16 0, i16 1 acquire acquire
-; CHECK: sync 1
+; CHECK: lwsync
%loaded = extractvalue { i16, i1} %val, 0
ret i16 %loaded
}
define i32 @cas_strong_i32_acqrel_acquire(i32* %mem) {
; CHECK-LABEL: cas_strong_i32_acqrel_acquire
-; CHECK: sync 1
+; CHECK: lwsync
%val = cmpxchg i32* %mem, i32 0, i32 1 acq_rel acquire
-; CHECK: sync 1
+; CHECK: lwsync
%loaded = extractvalue { i32, i1} %val, 0
ret i32 %loaded
}
define i64 @cas_weak_i64_release_monotonic(i64* %mem) {
; CHECK-LABEL: cas_weak_i64_release_monotonic
-; CHECK: sync 1
+; CHECK: lwsync
%val = cmpxchg weak i64* %mem, i64 0, i64 1 release monotonic
; CHECK-NOT: [sync ]
%loaded = extractvalue { i64, i1} %val, 0
@@ -116,21 +116,21 @@ define i8 @add_i8_monotonic(i8* %mem, i8 %operand) {
}
define i16 @xor_i16_seq_cst(i16* %mem, i16 %operand) {
; CHECK-LABEL: xor_i16_seq_cst
-; CHECK: sync 0
+; CHECK: sync
%val = atomicrmw xor i16* %mem, i16 %operand seq_cst
-; CHECK: sync 1
+; CHECK: lwsync
ret i16 %val
}
define i32 @xchg_i32_acq_rel(i32* %mem, i32 %operand) {
; CHECK-LABEL: xchg_i32_acq_rel
-; CHECK: sync 1
+; CHECK: lwsync
%val = atomicrmw xchg i32* %mem, i32 %operand acq_rel
-; CHECK: sync 1
+; CHECK: lwsync
ret i32 %val
}
define i64 @and_i64_release(i64* %mem, i64 %operand) {
; CHECK-LABEL: and_i64_release
-; CHECK: sync 1
+; CHECK: lwsync
%val = atomicrmw and i64* %mem, i64 %operand release
; CHECK-NOT: [sync ]
ret i64 %val
diff --git a/test/CodeGen/PowerPC/bdzlr.ll b/test/CodeGen/PowerPC/bdzlr.ll
index 29b74c6c8c66..d6506044868f 100644
--- a/test/CodeGen/PowerPC/bdzlr.ll
+++ b/test/CodeGen/PowerPC/bdzlr.ll
@@ -35,8 +35,8 @@ for.body.lr.ph: ; preds = %if.end
for.body: ; preds = %for.body.for.body_crit_edge, %for.body.lr.ph
%0 = phi %struct.lua_TValue.17.692* [ undef, %for.body.lr.ph ], [ %.pre, %for.body.for.body_crit_edge ]
%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body.for.body_crit_edge ]
- %tt = getelementptr inbounds %struct.lua_TValue.17.692* %0, i64 %indvars.iv, i32 1
- %1 = load i32* %tt, align 4
+ %tt = getelementptr inbounds %struct.lua_TValue.17.692, %struct.lua_TValue.17.692* %0, i64 %indvars.iv, i32 1
+ %1 = load i32, i32* %tt, align 4
store i32 %1, i32* undef, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -44,7 +44,7 @@ for.body: ; preds = %for.body.for.body_c
br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge
for.body.for.body_crit_edge: ; preds = %for.body
- %.pre = load %struct.lua_TValue.17.692** undef, align 8
+ %.pre = load %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692** undef, align 8
br label %for.body
for.end: ; preds = %for.body, %if.end, %entry
diff --git a/test/CodeGen/PowerPC/bperm.ll b/test/CodeGen/PowerPC/bperm.ll
index c489c1f90a8f..c5a728859a05 100644
--- a/test/CodeGen/PowerPC/bperm.ll
+++ b/test/CodeGen/PowerPC/bperm.ll
@@ -22,15 +22,15 @@ entry:
ret i64 %0
; CHECK-LABEL: @bs8
-; CHECK-DAG: rldicl [[REG1:[0-9]+]], 3, 16, 0
-; CHECK-DAG: rldicl [[REG2:[0-9]+]], 3, 8, 0
-; CHECK-DAG: rldicl [[REG3:[0-9]+]], 3, 24, 0
+; CHECK-DAG: rotldi [[REG1:[0-9]+]], 3, 16
+; CHECK-DAG: rotldi [[REG2:[0-9]+]], 3, 8
+; CHECK-DAG: rotldi [[REG3:[0-9]+]], 3, 24
; CHECK-DAG: rldimi [[REG2]], [[REG1]], 8, 48
-; CHECK-DAG: rldicl [[REG4:[0-9]+]], 3, 32, 0
+; CHECK-DAG: rotldi [[REG4:[0-9]+]], 3, 32
; CHECK-DAG: rldimi [[REG2]], [[REG3]], 16, 40
-; CHECK-DAG: rldicl [[REG5:[0-9]+]], 3, 48, 0
+; CHECK-DAG: rotldi [[REG5:[0-9]+]], 3, 48
; CHECK-DAG: rldimi [[REG2]], [[REG4]], 24, 32
-; CHECK-DAG: rldicl [[REG6:[0-9]+]], 3, 56, 0
+; CHECK-DAG: rotldi [[REG6:[0-9]+]], 3, 56
; CHECK-DAG: rldimi [[REG2]], [[REG5]], 40, 16
; CHECK-DAG: rldimi [[REG2]], [[REG6]], 48, 8
; CHECK-DAG: rldimi [[REG2]], 3, 56, 0
@@ -46,7 +46,7 @@ entry:
; CHECK-LABEL: @test1
; CHECK-DAG: li [[REG1:[0-9]+]], 11375
-; CHECK-DAG: rldicl [[REG3:[0-9]+]], 4, 56, 0
+; CHECK-DAG: rotldi [[REG3:[0-9]+]], 4, 56
; CHECK-DAG: sldi [[REG2:[0-9]+]], [[REG1]], 19
; CHECK: and 3, [[REG3]], [[REG2]]
; CHECK: blr
@@ -60,7 +60,7 @@ entry:
; CHECK-LABEL: @test2
; CHECK-DAG: lis [[REG1:[0-9]+]], 474
-; CHECK-DAG: rldicl [[REG5:[0-9]+]], 4, 58, 0
+; CHECK-DAG: rotldi [[REG5:[0-9]+]], 4, 58
; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 3648
; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG2]], 32
; CHECK-DAG: oris [[REG4:[0-9]+]], [[REG3]], 25464
@@ -76,7 +76,7 @@ entry:
; CHECK-LABEL: @test3
; CHECK-DAG: lis [[REG1:[0-9]+]], 170
-; CHECK-DAG: rldicl [[REG4:[0-9]+]], 3, 34, 0
+; CHECK-DAG: rotldi [[REG4:[0-9]+]], 3, 34
; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 22861
; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG2]], 34
; CHECK: and 3, [[REG4]], [[REG3]]
@@ -90,7 +90,7 @@ entry:
ret i64 %and
; CHECK-LABEL: @test4
-; CHECK: rldicl [[REG1:[0-9]+]], 4, 49, 0
+; CHECK: rotldi [[REG1:[0-9]+]], 4, 49
; CHECK: andis. 3, [[REG1]], 888
; CHECK: blr
}
@@ -103,7 +103,7 @@ entry:
; CHECK-LABEL: @test5
; CHECK-DAG: lis [[REG1:[0-9]+]], 3703
-; CHECK-DAG: rldicl [[REG4:[0-9]+]], 4, 12, 0
+; CHECK-DAG: rotldi [[REG4:[0-9]+]], 4, 12
; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 35951
; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG2]], 19
; CHECK: and 3, [[REG4]], [[REG3]]
@@ -148,7 +148,7 @@ entry:
; CHECK-LABEL: @test8
; CHECK-DAG: lis [[REG1:[0-9]+]], 4
-; CHECK-DAG: rldicl [[REG4:[0-9]+]], 3, 63, 0
+; CHECK-DAG: rotldi [[REG4:[0-9]+]], 3, 63
; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 60527
; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG2]], 19
; CHECK: and 3, [[REG4]], [[REG3]]
@@ -166,8 +166,8 @@ entry:
; CHECK-LABEL: @test9
; CHECK-DAG: lis [[REG1:[0-9]+]], 1440
-; CHECK-DAG: rldicl [[REG5:[0-9]+]], 4, 62, 0
-; CHECK-DAG: rldicl [[REG6:[0-9]+]], 4, 50, 0
+; CHECK-DAG: rotldi [[REG5:[0-9]+]], 4, 62
+; CHECK-DAG: rotldi [[REG6:[0-9]+]], 4, 50
; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 4
; CHECK-DAG: rldimi [[REG6]], [[REG5]], 53, 0
; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG2]], 32
@@ -187,8 +187,8 @@ entry:
; CHECK-LABEL: @test10
; CHECK-DAG: lis [[REG1:[0-9]+]], 1
-; CHECK-DAG: rldicl [[REG6:[0-9]+]], 3, 25, 0
-; CHECK-DAG: rldicl [[REG7:[0-9]+]], 3, 37, 0
+; CHECK-DAG: rotldi [[REG6:[0-9]+]], 3, 25
+; CHECK-DAG: rotldi [[REG7:[0-9]+]], 3, 37
; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 8183
; CHECK-DAG: ori [[REG3:[0-9]+]], [[REG1]], 50017
; CHECK-DAG: sldi [[REG4:[0-9]+]], [[REG2]], 25
diff --git a/test/CodeGen/PowerPC/branch-opt.ll b/test/CodeGen/PowerPC/branch-opt.ll
index dda1538f1cdf..d6928dde2a7d 100644
--- a/test/CodeGen/PowerPC/branch-opt.ll
+++ b/test/CodeGen/PowerPC/branch-opt.ll
@@ -11,7 +11,7 @@ entry:
br i1 %tmp1.upgrd.1, label %cond_false, label %bb5
bb: ; preds = %bb5, %bb
%indvar77 = phi i32 [ %indvar.next78, %bb ], [ 0, %bb5 ] ; <i32> [#uses=1]
- %tmp2 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp2 = tail call i32 (...) @bar( ) ; <i32> [#uses=0]
%indvar.next78 = add i32 %indvar77, 1 ; <i32> [#uses=2]
%exitcond79 = icmp eq i32 %indvar.next78, %X ; <i1> [#uses=1]
br i1 %exitcond79, label %cond_next48, label %bb
@@ -24,7 +24,7 @@ cond_false: ; preds = %entry
br i1 %tmp10.upgrd.2, label %cond_false20, label %bb16
bb12: ; preds = %bb16, %bb12
%indvar72 = phi i32 [ %indvar.next73, %bb12 ], [ 0, %bb16 ] ; <i32> [#uses=1]
- %tmp13 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp13 = tail call i32 (...) @bar( ) ; <i32> [#uses=0]
%indvar.next73 = add i32 %indvar72, 1 ; <i32> [#uses=2]
%exitcond74 = icmp eq i32 %indvar.next73, %Y ; <i1> [#uses=1]
br i1 %exitcond74, label %cond_next48, label %bb12
@@ -37,7 +37,7 @@ cond_false20: ; preds = %cond_false
br i1 %tmp23.upgrd.3, label %cond_false33, label %bb29
bb25: ; preds = %bb29, %bb25
%indvar67 = phi i32 [ %indvar.next68, %bb25 ], [ 0, %bb29 ] ; <i32> [#uses=1]
- %tmp26 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp26 = tail call i32 (...) @bar( ) ; <i32> [#uses=0]
%indvar.next68 = add i32 %indvar67, 1 ; <i32> [#uses=2]
%exitcond69 = icmp eq i32 %indvar.next68, %Z ; <i1> [#uses=1]
br i1 %exitcond69, label %cond_next48, label %bb25
@@ -49,7 +49,7 @@ cond_false33: ; preds = %cond_false20
%tmp36.upgrd.4 = icmp eq i32 %tmp36, 0 ; <i1> [#uses=1]
br i1 %tmp36.upgrd.4, label %cond_next48, label %bb42
bb38: ; preds = %bb42
- %tmp39 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp39 = tail call i32 (...) @bar( ) ; <i32> [#uses=0]
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
br label %bb42
bb42: ; preds = %bb38, %cond_false33
@@ -62,7 +62,7 @@ cond_next48: ; preds = %bb42, %cond_false33, %bb29, %bb25, %bb16, %bb12, %bb5,
%tmp50 = icmp eq i32 %W_addr.1, 0 ; <i1> [#uses=1]
br i1 %tmp50, label %UnifiedReturnBlock, label %cond_true51
cond_true51: ; preds = %cond_next48
- %tmp52 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp52 = tail call i32 (...) @bar( ) ; <i32> [#uses=0]
ret void
UnifiedReturnBlock: ; preds = %cond_next48
ret void
diff --git a/test/CodeGen/PowerPC/bswap-load-store.ll b/test/CodeGen/PowerPC/bswap-load-store.ll
index 53bbc52167c4..cee1f0cdaa99 100644
--- a/test/CodeGen/PowerPC/bswap-load-store.ll
+++ b/test/CodeGen/PowerPC/bswap-load-store.ll
@@ -5,7 +5,7 @@
define void @STWBRX(i32 %i, i8* %ptr, i32 %off) {
- %tmp1 = getelementptr i8* %ptr, i32 %off ; <i8*> [#uses=1]
+ %tmp1 = getelementptr i8, i8* %ptr, i32 %off ; <i8*> [#uses=1]
%tmp1.upgrd.1 = bitcast i8* %tmp1 to i32* ; <i32*> [#uses=1]
%tmp13 = tail call i32 @llvm.bswap.i32( i32 %i ) ; <i32> [#uses=1]
store i32 %tmp13, i32* %tmp1.upgrd.1
@@ -13,15 +13,15 @@ define void @STWBRX(i32 %i, i8* %ptr, i32 %off) {
}
define i32 @LWBRX(i8* %ptr, i32 %off) {
- %tmp1 = getelementptr i8* %ptr, i32 %off ; <i8*> [#uses=1]
+ %tmp1 = getelementptr i8, i8* %ptr, i32 %off ; <i8*> [#uses=1]
%tmp1.upgrd.2 = bitcast i8* %tmp1 to i32* ; <i32*> [#uses=1]
- %tmp = load i32* %tmp1.upgrd.2 ; <i32> [#uses=1]
+ %tmp = load i32, i32* %tmp1.upgrd.2 ; <i32> [#uses=1]
%tmp14 = tail call i32 @llvm.bswap.i32( i32 %tmp ) ; <i32> [#uses=1]
ret i32 %tmp14
}
define void @STHBRX(i16 %s, i8* %ptr, i32 %off) {
- %tmp1 = getelementptr i8* %ptr, i32 %off ; <i8*> [#uses=1]
+ %tmp1 = getelementptr i8, i8* %ptr, i32 %off ; <i8*> [#uses=1]
%tmp1.upgrd.3 = bitcast i8* %tmp1 to i16* ; <i16*> [#uses=1]
%tmp5 = call i16 @llvm.bswap.i16( i16 %s ) ; <i16> [#uses=1]
store i16 %tmp5, i16* %tmp1.upgrd.3
@@ -29,15 +29,15 @@ define void @STHBRX(i16 %s, i8* %ptr, i32 %off) {
}
define i16 @LHBRX(i8* %ptr, i32 %off) {
- %tmp1 = getelementptr i8* %ptr, i32 %off ; <i8*> [#uses=1]
+ %tmp1 = getelementptr i8, i8* %ptr, i32 %off ; <i8*> [#uses=1]
%tmp1.upgrd.4 = bitcast i8* %tmp1 to i16* ; <i16*> [#uses=1]
- %tmp = load i16* %tmp1.upgrd.4 ; <i16> [#uses=1]
+ %tmp = load i16, i16* %tmp1.upgrd.4 ; <i16> [#uses=1]
%tmp6 = call i16 @llvm.bswap.i16( i16 %tmp ) ; <i16> [#uses=1]
ret i16 %tmp6
}
define void @STDBRX(i64 %i, i8* %ptr, i64 %off) {
- %tmp1 = getelementptr i8* %ptr, i64 %off ; <i8*> [#uses=1]
+ %tmp1 = getelementptr i8, i8* %ptr, i64 %off ; <i8*> [#uses=1]
%tmp1.upgrd.1 = bitcast i8* %tmp1 to i64* ; <i64*> [#uses=1]
%tmp13 = tail call i64 @llvm.bswap.i64( i64 %i ) ; <i64> [#uses=1]
store i64 %tmp13, i64* %tmp1.upgrd.1
@@ -45,9 +45,9 @@ define void @STDBRX(i64 %i, i8* %ptr, i64 %off) {
}
define i64 @LDBRX(i8* %ptr, i64 %off) {
- %tmp1 = getelementptr i8* %ptr, i64 %off ; <i8*> [#uses=1]
+ %tmp1 = getelementptr i8, i8* %ptr, i64 %off ; <i8*> [#uses=1]
%tmp1.upgrd.2 = bitcast i8* %tmp1 to i64* ; <i64*> [#uses=1]
- %tmp = load i64* %tmp1.upgrd.2 ; <i64> [#uses=1]
+ %tmp = load i64, i64* %tmp1.upgrd.2 ; <i64> [#uses=1]
%tmp14 = tail call i64 @llvm.bswap.i64( i64 %tmp ) ; <i64> [#uses=1]
ret i64 %tmp14
}
diff --git a/test/CodeGen/PowerPC/buildvec_canonicalize.ll b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
index b70671bfd5cb..6c591912d9b2 100644
--- a/test/CodeGen/PowerPC/buildvec_canonicalize.ll
+++ b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
@@ -1,8 +1,8 @@
; RUN: llc < %s -mattr=-vsx -march=ppc32 -mattr=+altivec --enable-unsafe-fp-math | FileCheck %s
define void @VXOR(<4 x float>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
- %tmp = load <4 x float>* %P3 ; <<4 x float>> [#uses=1]
- %tmp3 = load <4 x float>* %P1 ; <<4 x float>> [#uses=1]
+ %tmp = load <4 x float>, <4 x float>* %P3 ; <<4 x float>> [#uses=1]
+ %tmp3 = load <4 x float>, <4 x float>* %P1 ; <<4 x float>> [#uses=1]
%tmp4 = fmul <4 x float> %tmp, %tmp3 ; <<4 x float>> [#uses=1]
store <4 x float> %tmp4, <4 x float>* %P3
store <4 x float> zeroinitializer, <4 x float>* %P1
diff --git a/test/CodeGen/PowerPC/byval-aliased.ll b/test/CodeGen/PowerPC/byval-aliased.ll
index 9ef2f02f036a..8668e64d4d51 100644
--- a/test/CodeGen/PowerPC/byval-aliased.ll
+++ b/test/CodeGen/PowerPC/byval-aliased.ll
@@ -7,8 +7,8 @@ target triple = "powerpc-apple-macosx10.5.0"
; Function Attrs: nounwind ssp
define void @foo(%struct.sm* byval %s) #0 {
entry:
- %a = getelementptr inbounds %struct.sm* %s, i32 0, i32 0
- %0 = load i8* %a, align 1
+ %a = getelementptr inbounds %struct.sm, %struct.sm* %s, i32 0, i32 0
+ %0 = load i8, i8* %a, align 1
%conv2 = zext i8 %0 to i32
%add = add nuw nsw i32 %conv2, 3
%conv1 = trunc i32 %add to i8
diff --git a/test/CodeGen/PowerPC/cmpb-ppc32.ll b/test/CodeGen/PowerPC/cmpb-ppc32.ll
index 639ed887b978..b5cb0935d886 100644
--- a/test/CodeGen/PowerPC/cmpb-ppc32.ll
+++ b/test/CodeGen/PowerPC/cmpb-ppc32.ll
@@ -17,7 +17,7 @@ entry:
; CHECK-LABEL: @test16
; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
-; CHECK: rlwinm 3, [[REG1]], 0, 16, 31
+; CHECK: clrlwi 3, [[REG1]], 16
; CHECK: blr
}
diff --git a/test/CodeGen/PowerPC/cmpb.ll b/test/CodeGen/PowerPC/cmpb.ll
index 7d0c0ab3316b..d1c951df962e 100644
--- a/test/CodeGen/PowerPC/cmpb.ll
+++ b/test/CodeGen/PowerPC/cmpb.ll
@@ -17,7 +17,7 @@ entry:
; CHECK-LABEL: @test16
; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
-; CHECK: rldicl 3, [[REG1]], 0, 48
+; CHECK: clrldi 3, [[REG1]], 48
; CHECK: blr
}
@@ -73,7 +73,7 @@ entry:
; CHECK-LABEL: @test16p3
; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
-; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 0, 55
+; CHECK: clrldi [[REG2:[0-9]+]], [[REG1]], 55
; CHECK: xori 3, [[REG2]], 1280
; CHECK: blr
}
@@ -99,7 +99,7 @@ entry:
; CHECK-LABEL: @test32
; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
-; CHECK: rldicl 3, [[REG1]], 0, 32
+; CHECK: clrldi 3, [[REG1]], 32
; CHECK: blr
}
diff --git a/test/CodeGen/PowerPC/code-align.ll b/test/CodeGen/PowerPC/code-align.ll
index 306230be5005..19d1b236ce0d 100644
--- a/test/CodeGen/PowerPC/code-align.ll
+++ b/test/CodeGen/PowerPC/code-align.ll
@@ -44,6 +44,9 @@ entry:
; GENERIC-NOT: .align
; BASIC: .align 4
; PWR: .align 4
+; GENERIC: lwzu
+; BASIC: lwzu
+; PWR: lwzu
; GENERIC: bdnz
; BASIC: bdnz
; PWR: bdnz
@@ -51,17 +54,19 @@ entry:
vector.body: ; preds = %vector.body, %entry
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%induction45 = or i64 %index, 1
- %0 = getelementptr inbounds i32* %a, i64 %index
- %1 = getelementptr inbounds i32* %a, i64 %induction45
- %2 = load i32* %0, align 4
- %3 = load i32* %1, align 4
+ %0 = getelementptr inbounds i32, i32* %a, i64 %index
+ %1 = getelementptr inbounds i32, i32* %a, i64 %induction45
+ %2 = load i32, i32* %0, align 4
+ %3 = load i32, i32* %1, align 4
%4 = add nsw i32 %2, 4
%5 = add nsw i32 %3, 4
- store i32 %4, i32* %0, align 4
- store i32 %5, i32* %1, align 4
+ %6 = mul nsw i32 %4, 3
+ %7 = mul nsw i32 %5, 3
+ store i32 %6, i32* %0, align 4
+ store i32 %7, i32* %1, align 4
%index.next = add i64 %index, 2
- %6 = icmp eq i64 %index.next, 2048
- br i1 %6, label %for.end, label %vector.body
+ %8 = icmp eq i64 %index.next, 2048
+ br i1 %8, label %for.end, label %vector.body
for.end: ; preds = %vector.body
ret void
@@ -87,10 +92,11 @@ entry:
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
- %0 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, 4
- store i32 %add, i32* %arrayidx, align 4
+ %mul = mul nsw i32 %add, 3
+ store i32 %mul, i32* %arrayidx, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 2048
br i1 %exitcond, label %for.end, label %for.body
diff --git a/test/CodeGen/PowerPC/compare-simm.ll b/test/CodeGen/PowerPC/compare-simm.ll
index 94c5c0290f58..12eff7bb1813 100644
--- a/test/CodeGen/PowerPC/compare-simm.ll
+++ b/test/CodeGen/PowerPC/compare-simm.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
-; RUN: grep "cmpwi cr0, r3, -1"
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s
define i32 @test(i32 %x) nounwind {
+; CHECK-LABEL: @test
+; CHECK: cmpwi r3, -1
+
%c = icmp eq i32 %x, -1
br i1 %c, label %T, label %F
T:
diff --git a/test/CodeGen/PowerPC/complex-return.ll b/test/CodeGen/PowerPC/complex-return.ll
index 9d25e619d2e5..e419f0799522 100644
--- a/test/CodeGen/PowerPC/complex-return.ll
+++ b/test/CodeGen/PowerPC/complex-return.ll
@@ -7,19 +7,19 @@ define { ppc_fp128, ppc_fp128 } @foo() nounwind {
entry:
%retval = alloca { ppc_fp128, ppc_fp128 }, align 16
%x = alloca { ppc_fp128, ppc_fp128 }, align 16
- %real = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 0
- %imag = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 1
+ %real = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 0
+ %imag = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 1
store ppc_fp128 0xM400C0000000000000000000000000000, ppc_fp128* %real
store ppc_fp128 0xMC00547AE147AE1483CA47AE147AE147A, ppc_fp128* %imag
- %x.realp = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 0
- %x.real = load ppc_fp128* %x.realp
- %x.imagp = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 1
- %x.imag = load ppc_fp128* %x.imagp
- %real1 = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %retval, i32 0, i32 0
- %imag2 = getelementptr inbounds { ppc_fp128, ppc_fp128 }* %retval, i32 0, i32 1
+ %x.realp = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 0
+ %x.real = load ppc_fp128, ppc_fp128* %x.realp
+ %x.imagp = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 1
+ %x.imag = load ppc_fp128, ppc_fp128* %x.imagp
+ %real1 = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %retval, i32 0, i32 0
+ %imag2 = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %retval, i32 0, i32 1
store ppc_fp128 %x.real, ppc_fp128* %real1
store ppc_fp128 %x.imag, ppc_fp128* %imag2
- %0 = load { ppc_fp128, ppc_fp128 }* %retval
+ %0 = load { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %retval
ret { ppc_fp128, ppc_fp128 } %0
}
@@ -33,19 +33,19 @@ define { float, float } @oof() nounwind {
entry:
%retval = alloca { float, float }, align 4
%x = alloca { float, float }, align 4
- %real = getelementptr inbounds { float, float }* %x, i32 0, i32 0
- %imag = getelementptr inbounds { float, float }* %x, i32 0, i32 1
+ %real = getelementptr inbounds { float, float }, { float, float }* %x, i32 0, i32 0
+ %imag = getelementptr inbounds { float, float }, { float, float }* %x, i32 0, i32 1
store float 3.500000e+00, float* %real
store float 0xC00547AE20000000, float* %imag
- %x.realp = getelementptr inbounds { float, float }* %x, i32 0, i32 0
- %x.real = load float* %x.realp
- %x.imagp = getelementptr inbounds { float, float }* %x, i32 0, i32 1
- %x.imag = load float* %x.imagp
- %real1 = getelementptr inbounds { float, float }* %retval, i32 0, i32 0
- %imag2 = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
+ %x.realp = getelementptr inbounds { float, float }, { float, float }* %x, i32 0, i32 0
+ %x.real = load float, float* %x.realp
+ %x.imagp = getelementptr inbounds { float, float }, { float, float }* %x, i32 0, i32 1
+ %x.imag = load float, float* %x.imagp
+ %real1 = getelementptr inbounds { float, float }, { float, float }* %retval, i32 0, i32 0
+ %imag2 = getelementptr inbounds { float, float }, { float, float }* %retval, i32 0, i32 1
store float %x.real, float* %real1
store float %x.imag, float* %imag2
- %0 = load { float, float }* %retval
+ %0 = load { float, float }, { float, float }* %retval
ret { float, float } %0
}
diff --git a/test/CodeGen/PowerPC/cr-spills.ll b/test/CodeGen/PowerPC/cr-spills.ll
index be0dbad6289e..1a903115c0da 100644
--- a/test/CodeGen/PowerPC/cr-spills.ll
+++ b/test/CodeGen/PowerPC/cr-spills.ll
@@ -31,7 +31,7 @@ land.rhs: ; preds = %land.lhs.true, %lan
land.end: ; preds = %land.rhs, %land.lhs.true, %entry
%0 = phi i1 [ %tobool21, %land.rhs ], [ false, %land.lhs.true ], [ false, %entry ]
- %cond = load i32** undef, align 8
+ %cond = load i32*, i32** undef, align 8
br i1 undef, label %if.then95, label %for.body.lr.ph
if.then95: ; preds = %land.end
@@ -52,12 +52,12 @@ for.cond286.preheader: ; preds = %for.body252
br label %for.cond290.preheader
for.cond290.preheader: ; preds = %for.end520, %for.cond286.preheader
- %srcptr.31595 = phi i16* [ getelementptr inbounds ([768 x i16]* @SetupFastFullPelSearch.orig_pels, i64 0, i64 0), %for.cond286.preheader ], [ null, %for.end520 ]
- %1 = load i32* undef, align 4
- %2 = load i32* @weight_luma, align 4
- %3 = load i32* @wp_luma_round, align 4
- %4 = load i32* @luma_log_weight_denom, align 4
- %5 = load i32* @offset_luma, align 4
+ %srcptr.31595 = phi i16* [ getelementptr inbounds ([768 x i16], [768 x i16]* @SetupFastFullPelSearch.orig_pels, i64 0, i64 0), %for.cond286.preheader ], [ null, %for.end520 ]
+ %1 = load i32, i32* undef, align 4
+ %2 = load i32, i32* @weight_luma, align 4
+ %3 = load i32, i32* @wp_luma_round, align 4
+ %4 = load i32, i32* @luma_log_weight_denom, align 4
+ %5 = load i32, i32* @offset_luma, align 4
%incdec.ptr502.sum = add i64 undef, 16
br label %for.body293
@@ -68,7 +68,7 @@ for.body293: ; preds = %for.body293, %for.c
%LineSadBlk1.01587 = phi i32 [ 0, %for.cond290.preheader ], [ %add402, %for.body293 ]
%LineSadBlk3.01586 = phi i32 [ 0, %for.cond290.preheader ], [ %add514, %for.body293 ]
%LineSadBlk2.01585 = phi i32 [ 0, %for.cond290.preheader ], [ %add458, %for.body293 ]
- %6 = load i16* %refptr.11590, align 2
+ %6 = load i16, i16* %refptr.11590, align 2
%conv294 = zext i16 %6 to i32
%mul295 = mul nsw i32 %conv294, %2
%add296 = add nsw i32 %mul295, %3
@@ -78,16 +78,16 @@ for.body293: ; preds = %for.body293, %for.c
%cond.i.i1514 = select i1 %cmp.i.i1513, i32 %add297, i32 0
%cmp.i4.i1515 = icmp slt i32 %cond.i.i1514, %1
%cond.i5.i1516 = select i1 %cmp.i4.i1515, i32 %cond.i.i1514, i32 %1
- %7 = load i16* %srcptr.41591, align 2
+ %7 = load i16, i16* %srcptr.41591, align 2
%conv300 = zext i16 %7 to i32
%sub301 = sub nsw i32 %cond.i5.i1516, %conv300
%idxprom302 = sext i32 %sub301 to i64
- %arrayidx303 = getelementptr inbounds i32* %cond, i64 %idxprom302
- %8 = load i32* %arrayidx303, align 4
+ %arrayidx303 = getelementptr inbounds i32, i32* %cond, i64 %idxprom302
+ %8 = load i32, i32* %arrayidx303, align 4
%add304 = add nsw i32 %8, %LineSadBlk0.01588
- %9 = load i32* undef, align 4
+ %9 = load i32, i32* undef, align 4
%add318 = add nsw i32 %add304, %9
- %10 = load i16* undef, align 2
+ %10 = load i16, i16* undef, align 2
%conv321 = zext i16 %10 to i32
%mul322 = mul nsw i32 %conv321, %2
%add323 = add nsw i32 %mul322, %3
@@ -99,23 +99,23 @@ for.body293: ; preds = %for.body293, %for.c
%cond.i5.i1508 = select i1 %cmp.i4.i1507, i32 %cond.i.i1506, i32 %1
%sub329 = sub nsw i32 %cond.i5.i1508, 0
%idxprom330 = sext i32 %sub329 to i64
- %arrayidx331 = getelementptr inbounds i32* %cond, i64 %idxprom330
- %11 = load i32* %arrayidx331, align 4
+ %arrayidx331 = getelementptr inbounds i32, i32* %cond, i64 %idxprom330
+ %11 = load i32, i32* %arrayidx331, align 4
%add332 = add nsw i32 %add318, %11
%cmp.i.i1501 = icmp sgt i32 undef, 0
%cond.i.i1502 = select i1 %cmp.i.i1501, i32 undef, i32 0
%cmp.i4.i1503 = icmp slt i32 %cond.i.i1502, %1
%cond.i5.i1504 = select i1 %cmp.i4.i1503, i32 %cond.i.i1502, i32 %1
- %incdec.ptr341 = getelementptr inbounds i16* %srcptr.41591, i64 4
- %12 = load i16* null, align 2
+ %incdec.ptr341 = getelementptr inbounds i16, i16* %srcptr.41591, i64 4
+ %12 = load i16, i16* null, align 2
%conv342 = zext i16 %12 to i32
%sub343 = sub nsw i32 %cond.i5.i1504, %conv342
%idxprom344 = sext i32 %sub343 to i64
- %arrayidx345 = getelementptr inbounds i32* %cond, i64 %idxprom344
- %13 = load i32* %arrayidx345, align 4
+ %arrayidx345 = getelementptr inbounds i32, i32* %cond, i64 %idxprom344
+ %13 = load i32, i32* %arrayidx345, align 4
%add346 = add nsw i32 %add332, %13
- %incdec.ptr348 = getelementptr inbounds i16* %refptr.11590, i64 5
- %14 = load i16* null, align 2
+ %incdec.ptr348 = getelementptr inbounds i16, i16* %refptr.11590, i64 5
+ %14 = load i16, i16* null, align 2
%conv349 = zext i16 %14 to i32
%mul350 = mul nsw i32 %conv349, %2
%add351 = add nsw i32 %mul350, %3
@@ -125,16 +125,16 @@ for.body293: ; preds = %for.body293, %for.c
%cond.i.i1498 = select i1 %cmp.i.i1497, i32 %add353, i32 0
%cmp.i4.i1499 = icmp slt i32 %cond.i.i1498, %1
%cond.i5.i1500 = select i1 %cmp.i4.i1499, i32 %cond.i.i1498, i32 %1
- %incdec.ptr355 = getelementptr inbounds i16* %srcptr.41591, i64 5
- %15 = load i16* %incdec.ptr341, align 2
+ %incdec.ptr355 = getelementptr inbounds i16, i16* %srcptr.41591, i64 5
+ %15 = load i16, i16* %incdec.ptr341, align 2
%conv356 = zext i16 %15 to i32
%sub357 = sub nsw i32 %cond.i5.i1500, %conv356
%idxprom358 = sext i32 %sub357 to i64
- %arrayidx359 = getelementptr inbounds i32* %cond, i64 %idxprom358
- %16 = load i32* %arrayidx359, align 4
+ %arrayidx359 = getelementptr inbounds i32, i32* %cond, i64 %idxprom358
+ %16 = load i32, i32* %arrayidx359, align 4
%add360 = add nsw i32 %16, %LineSadBlk1.01587
- %incdec.ptr362 = getelementptr inbounds i16* %refptr.11590, i64 6
- %17 = load i16* %incdec.ptr348, align 2
+ %incdec.ptr362 = getelementptr inbounds i16, i16* %refptr.11590, i64 6
+ %17 = load i16, i16* %incdec.ptr348, align 2
%conv363 = zext i16 %17 to i32
%mul364 = mul nsw i32 %conv363, %2
%add365 = add nsw i32 %mul364, %3
@@ -144,16 +144,16 @@ for.body293: ; preds = %for.body293, %for.c
%cond.i.i1494 = select i1 %cmp.i.i1493, i32 %add367, i32 0
%cmp.i4.i1495 = icmp slt i32 %cond.i.i1494, %1
%cond.i5.i1496 = select i1 %cmp.i4.i1495, i32 %cond.i.i1494, i32 %1
- %incdec.ptr369 = getelementptr inbounds i16* %srcptr.41591, i64 6
- %18 = load i16* %incdec.ptr355, align 2
+ %incdec.ptr369 = getelementptr inbounds i16, i16* %srcptr.41591, i64 6
+ %18 = load i16, i16* %incdec.ptr355, align 2
%conv370 = zext i16 %18 to i32
%sub371 = sub nsw i32 %cond.i5.i1496, %conv370
%idxprom372 = sext i32 %sub371 to i64
- %arrayidx373 = getelementptr inbounds i32* %cond, i64 %idxprom372
- %19 = load i32* %arrayidx373, align 4
+ %arrayidx373 = getelementptr inbounds i32, i32* %cond, i64 %idxprom372
+ %19 = load i32, i32* %arrayidx373, align 4
%add374 = add nsw i32 %add360, %19
- %incdec.ptr376 = getelementptr inbounds i16* %refptr.11590, i64 7
- %20 = load i16* %incdec.ptr362, align 2
+ %incdec.ptr376 = getelementptr inbounds i16, i16* %refptr.11590, i64 7
+ %20 = load i16, i16* %incdec.ptr362, align 2
%conv377 = zext i16 %20 to i32
%mul378 = mul nsw i32 %conv377, %2
%add379 = add nsw i32 %mul378, %3
@@ -163,15 +163,15 @@ for.body293: ; preds = %for.body293, %for.c
%cond.i.i1490 = select i1 %cmp.i.i1489, i32 %add381, i32 0
%cmp.i4.i1491 = icmp slt i32 %cond.i.i1490, %1
%cond.i5.i1492 = select i1 %cmp.i4.i1491, i32 %cond.i.i1490, i32 %1
- %incdec.ptr383 = getelementptr inbounds i16* %srcptr.41591, i64 7
- %21 = load i16* %incdec.ptr369, align 2
+ %incdec.ptr383 = getelementptr inbounds i16, i16* %srcptr.41591, i64 7
+ %21 = load i16, i16* %incdec.ptr369, align 2
%conv384 = zext i16 %21 to i32
%sub385 = sub nsw i32 %cond.i5.i1492, %conv384
%idxprom386 = sext i32 %sub385 to i64
- %arrayidx387 = getelementptr inbounds i32* %cond, i64 %idxprom386
- %22 = load i32* %arrayidx387, align 4
+ %arrayidx387 = getelementptr inbounds i32, i32* %cond, i64 %idxprom386
+ %22 = load i32, i32* %arrayidx387, align 4
%add388 = add nsw i32 %add374, %22
- %23 = load i16* %incdec.ptr376, align 2
+ %23 = load i16, i16* %incdec.ptr376, align 2
%conv391 = zext i16 %23 to i32
%mul392 = mul nsw i32 %conv391, %2
%add395 = add nsw i32 0, %5
@@ -179,26 +179,26 @@ for.body293: ; preds = %for.body293, %for.c
%cond.i.i1486 = select i1 %cmp.i.i1485, i32 %add395, i32 0
%cmp.i4.i1487 = icmp slt i32 %cond.i.i1486, %1
%cond.i5.i1488 = select i1 %cmp.i4.i1487, i32 %cond.i.i1486, i32 %1
- %incdec.ptr397 = getelementptr inbounds i16* %srcptr.41591, i64 8
- %24 = load i16* %incdec.ptr383, align 2
+ %incdec.ptr397 = getelementptr inbounds i16, i16* %srcptr.41591, i64 8
+ %24 = load i16, i16* %incdec.ptr383, align 2
%conv398 = zext i16 %24 to i32
%sub399 = sub nsw i32 %cond.i5.i1488, %conv398
%idxprom400 = sext i32 %sub399 to i64
- %arrayidx401 = getelementptr inbounds i32* %cond, i64 %idxprom400
- %25 = load i32* %arrayidx401, align 4
+ %arrayidx401 = getelementptr inbounds i32, i32* %cond, i64 %idxprom400
+ %25 = load i32, i32* %arrayidx401, align 4
%add402 = add nsw i32 %add388, %25
- %incdec.ptr404 = getelementptr inbounds i16* %refptr.11590, i64 9
+ %incdec.ptr404 = getelementptr inbounds i16, i16* %refptr.11590, i64 9
%cmp.i4.i1483 = icmp slt i32 undef, %1
%cond.i5.i1484 = select i1 %cmp.i4.i1483, i32 undef, i32 %1
- %26 = load i16* %incdec.ptr397, align 2
+ %26 = load i16, i16* %incdec.ptr397, align 2
%conv412 = zext i16 %26 to i32
%sub413 = sub nsw i32 %cond.i5.i1484, %conv412
%idxprom414 = sext i32 %sub413 to i64
- %arrayidx415 = getelementptr inbounds i32* %cond, i64 %idxprom414
- %27 = load i32* %arrayidx415, align 4
+ %arrayidx415 = getelementptr inbounds i32, i32* %cond, i64 %idxprom414
+ %27 = load i32, i32* %arrayidx415, align 4
%add416 = add nsw i32 %27, %LineSadBlk2.01585
- %incdec.ptr418 = getelementptr inbounds i16* %refptr.11590, i64 10
- %28 = load i16* %incdec.ptr404, align 2
+ %incdec.ptr418 = getelementptr inbounds i16, i16* %refptr.11590, i64 10
+ %28 = load i16, i16* %incdec.ptr404, align 2
%conv419 = zext i16 %28 to i32
%mul420 = mul nsw i32 %conv419, %2
%add421 = add nsw i32 %mul420, %3
@@ -208,14 +208,14 @@ for.body293: ; preds = %for.body293, %for.c
%cond.i.i1478 = select i1 %cmp.i.i1477, i32 %add423, i32 0
%cmp.i4.i1479 = icmp slt i32 %cond.i.i1478, %1
%cond.i5.i1480 = select i1 %cmp.i4.i1479, i32 %cond.i.i1478, i32 %1
- %incdec.ptr425 = getelementptr inbounds i16* %srcptr.41591, i64 10
+ %incdec.ptr425 = getelementptr inbounds i16, i16* %srcptr.41591, i64 10
%sub427 = sub nsw i32 %cond.i5.i1480, 0
%idxprom428 = sext i32 %sub427 to i64
- %arrayidx429 = getelementptr inbounds i32* %cond, i64 %idxprom428
- %29 = load i32* %arrayidx429, align 4
+ %arrayidx429 = getelementptr inbounds i32, i32* %cond, i64 %idxprom428
+ %29 = load i32, i32* %arrayidx429, align 4
%add430 = add nsw i32 %add416, %29
- %incdec.ptr432 = getelementptr inbounds i16* %refptr.11590, i64 11
- %30 = load i16* %incdec.ptr418, align 2
+ %incdec.ptr432 = getelementptr inbounds i16, i16* %refptr.11590, i64 11
+ %30 = load i16, i16* %incdec.ptr418, align 2
%conv433 = zext i16 %30 to i32
%mul434 = mul nsw i32 %conv433, %2
%add435 = add nsw i32 %mul434, %3
@@ -225,15 +225,15 @@ for.body293: ; preds = %for.body293, %for.c
%cond.i.i1474 = select i1 %cmp.i.i1473, i32 %add437, i32 0
%cmp.i4.i1475 = icmp slt i32 %cond.i.i1474, %1
%cond.i5.i1476 = select i1 %cmp.i4.i1475, i32 %cond.i.i1474, i32 %1
- %31 = load i16* %incdec.ptr425, align 2
+ %31 = load i16, i16* %incdec.ptr425, align 2
%conv440 = zext i16 %31 to i32
%sub441 = sub nsw i32 %cond.i5.i1476, %conv440
%idxprom442 = sext i32 %sub441 to i64
- %arrayidx443 = getelementptr inbounds i32* %cond, i64 %idxprom442
- %32 = load i32* %arrayidx443, align 4
+ %arrayidx443 = getelementptr inbounds i32, i32* %cond, i64 %idxprom442
+ %32 = load i32, i32* %arrayidx443, align 4
%add444 = add nsw i32 %add430, %32
- %incdec.ptr446 = getelementptr inbounds i16* %refptr.11590, i64 12
- %33 = load i16* %incdec.ptr432, align 2
+ %incdec.ptr446 = getelementptr inbounds i16, i16* %refptr.11590, i64 12
+ %33 = load i16, i16* %incdec.ptr432, align 2
%conv447 = zext i16 %33 to i32
%mul448 = mul nsw i32 %conv447, %2
%add449 = add nsw i32 %mul448, %3
@@ -243,16 +243,16 @@ for.body293: ; preds = %for.body293, %for.c
%cond.i.i1470 = select i1 %cmp.i.i1469, i32 %add451, i32 0
%cmp.i4.i1471 = icmp slt i32 %cond.i.i1470, %1
%cond.i5.i1472 = select i1 %cmp.i4.i1471, i32 %cond.i.i1470, i32 %1
- %incdec.ptr453 = getelementptr inbounds i16* %srcptr.41591, i64 12
- %34 = load i16* undef, align 2
+ %incdec.ptr453 = getelementptr inbounds i16, i16* %srcptr.41591, i64 12
+ %34 = load i16, i16* undef, align 2
%conv454 = zext i16 %34 to i32
%sub455 = sub nsw i32 %cond.i5.i1472, %conv454
%idxprom456 = sext i32 %sub455 to i64
- %arrayidx457 = getelementptr inbounds i32* %cond, i64 %idxprom456
- %35 = load i32* %arrayidx457, align 4
+ %arrayidx457 = getelementptr inbounds i32, i32* %cond, i64 %idxprom456
+ %35 = load i32, i32* %arrayidx457, align 4
%add458 = add nsw i32 %add444, %35
- %incdec.ptr460 = getelementptr inbounds i16* %refptr.11590, i64 13
- %36 = load i16* %incdec.ptr446, align 2
+ %incdec.ptr460 = getelementptr inbounds i16, i16* %refptr.11590, i64 13
+ %36 = load i16, i16* %incdec.ptr446, align 2
%conv461 = zext i16 %36 to i32
%mul462 = mul nsw i32 %conv461, %2
%add463 = add nsw i32 %mul462, %3
@@ -262,15 +262,15 @@ for.body293: ; preds = %for.body293, %for.c
%cond.i.i1466 = select i1 %cmp.i.i1465, i32 %add465, i32 0
%cmp.i4.i1467 = icmp slt i32 %cond.i.i1466, %1
%cond.i5.i1468 = select i1 %cmp.i4.i1467, i32 %cond.i.i1466, i32 %1
- %incdec.ptr467 = getelementptr inbounds i16* %srcptr.41591, i64 13
- %37 = load i16* %incdec.ptr453, align 2
+ %incdec.ptr467 = getelementptr inbounds i16, i16* %srcptr.41591, i64 13
+ %37 = load i16, i16* %incdec.ptr453, align 2
%conv468 = zext i16 %37 to i32
%sub469 = sub nsw i32 %cond.i5.i1468, %conv468
%idxprom470 = sext i32 %sub469 to i64
- %arrayidx471 = getelementptr inbounds i32* %cond, i64 %idxprom470
- %38 = load i32* %arrayidx471, align 4
+ %arrayidx471 = getelementptr inbounds i32, i32* %cond, i64 %idxprom470
+ %38 = load i32, i32* %arrayidx471, align 4
%add472 = add nsw i32 %38, %LineSadBlk3.01586
- %incdec.ptr474 = getelementptr inbounds i16* %refptr.11590, i64 14
+ %incdec.ptr474 = getelementptr inbounds i16, i16* %refptr.11590, i64 14
%add477 = add nsw i32 0, %3
%shr478 = ashr i32 %add477, %4
%add479 = add nsw i32 %shr478, %5
@@ -278,16 +278,16 @@ for.body293: ; preds = %for.body293, %for.c
%cond.i.i1462 = select i1 %cmp.i.i1461, i32 %add479, i32 0
%cmp.i4.i1463 = icmp slt i32 %cond.i.i1462, %1
%cond.i5.i1464 = select i1 %cmp.i4.i1463, i32 %cond.i.i1462, i32 %1
- %incdec.ptr481 = getelementptr inbounds i16* %srcptr.41591, i64 14
- %39 = load i16* %incdec.ptr467, align 2
+ %incdec.ptr481 = getelementptr inbounds i16, i16* %srcptr.41591, i64 14
+ %39 = load i16, i16* %incdec.ptr467, align 2
%conv482 = zext i16 %39 to i32
%sub483 = sub nsw i32 %cond.i5.i1464, %conv482
%idxprom484 = sext i32 %sub483 to i64
- %arrayidx485 = getelementptr inbounds i32* %cond, i64 %idxprom484
- %40 = load i32* %arrayidx485, align 4
+ %arrayidx485 = getelementptr inbounds i32, i32* %cond, i64 %idxprom484
+ %40 = load i32, i32* %arrayidx485, align 4
%add486 = add nsw i32 %add472, %40
- %incdec.ptr488 = getelementptr inbounds i16* %refptr.11590, i64 15
- %41 = load i16* %incdec.ptr474, align 2
+ %incdec.ptr488 = getelementptr inbounds i16, i16* %refptr.11590, i64 15
+ %41 = load i16, i16* %incdec.ptr474, align 2
%conv489 = zext i16 %41 to i32
%mul490 = mul nsw i32 %conv489, %2
%add491 = add nsw i32 %mul490, %3
@@ -297,15 +297,15 @@ for.body293: ; preds = %for.body293, %for.c
%cond.i.i1458 = select i1 %cmp.i.i1457, i32 %add493, i32 0
%cmp.i4.i1459 = icmp slt i32 %cond.i.i1458, %1
%cond.i5.i1460 = select i1 %cmp.i4.i1459, i32 %cond.i.i1458, i32 %1
- %incdec.ptr495 = getelementptr inbounds i16* %srcptr.41591, i64 15
- %42 = load i16* %incdec.ptr481, align 2
+ %incdec.ptr495 = getelementptr inbounds i16, i16* %srcptr.41591, i64 15
+ %42 = load i16, i16* %incdec.ptr481, align 2
%conv496 = zext i16 %42 to i32
%sub497 = sub nsw i32 %cond.i5.i1460, %conv496
%idxprom498 = sext i32 %sub497 to i64
- %arrayidx499 = getelementptr inbounds i32* %cond, i64 %idxprom498
- %43 = load i32* %arrayidx499, align 4
+ %arrayidx499 = getelementptr inbounds i32, i32* %cond, i64 %idxprom498
+ %43 = load i32, i32* %arrayidx499, align 4
%add500 = add nsw i32 %add486, %43
- %44 = load i16* %incdec.ptr488, align 2
+ %44 = load i16, i16* %incdec.ptr488, align 2
%conv503 = zext i16 %44 to i32
%mul504 = mul nsw i32 %conv503, %2
%add505 = add nsw i32 %mul504, %3
@@ -315,14 +315,14 @@ for.body293: ; preds = %for.body293, %for.c
%cond.i.i1454 = select i1 %cmp.i.i1453, i32 %add507, i32 0
%cmp.i4.i1455 = icmp slt i32 %cond.i.i1454, %1
%cond.i5.i1456 = select i1 %cmp.i4.i1455, i32 %cond.i.i1454, i32 %1
- %45 = load i16* %incdec.ptr495, align 2
+ %45 = load i16, i16* %incdec.ptr495, align 2
%conv510 = zext i16 %45 to i32
%sub511 = sub nsw i32 %cond.i5.i1456, %conv510
%idxprom512 = sext i32 %sub511 to i64
- %arrayidx513 = getelementptr inbounds i32* %cond, i64 %idxprom512
- %46 = load i32* %arrayidx513, align 4
+ %arrayidx513 = getelementptr inbounds i32, i32* %cond, i64 %idxprom512
+ %46 = load i32, i32* %arrayidx513, align 4
%add514 = add nsw i32 %add500, %46
- %add.ptr517 = getelementptr inbounds i16* %refptr.11590, i64 %incdec.ptr502.sum
+ %add.ptr517 = getelementptr inbounds i16, i16* %refptr.11590, i64 %incdec.ptr502.sum
%exitcond1692 = icmp eq i32 undef, 4
br i1 %exitcond1692, label %for.end520, label %for.body293
diff --git a/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll b/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll
index afa1ea8e75a1..2b3ab9bcceaa 100644
--- a/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll
+++ b/test/CodeGen/PowerPC/cr1eq-no-extra-moves.ll
@@ -7,14 +7,14 @@ target triple = "powerpc-unknown-linux"
define void @test(i32 %count) nounwind {
entry:
; CHECK: crxor 6, 6, 6
- %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind
+ %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind
%cmp2 = icmp sgt i32 %count, 0
br i1 %cmp2, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%i.03 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
; CHECK: crxor 6, 6, 6
- %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind
+ %call1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i32 1) nounwind
%inc = add nsw i32 %i.03, 1
%exitcond = icmp eq i32 %inc, %count
br i1 %exitcond, label %for.end, label %for.body
diff --git a/test/CodeGen/PowerPC/cr1eq.ll b/test/CodeGen/PowerPC/cr1eq.ll
index fb9c9695d176..43cd4544424c 100644
--- a/test/CodeGen/PowerPC/cr1eq.ll
+++ b/test/CodeGen/PowerPC/cr1eq.ll
@@ -9,9 +9,9 @@ target triple = "powerpc-unknown-freebsd"
define void @foo() nounwind {
entry:
; CHECK: crxor 6, 6, 6
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 1)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 1)
; CHECK: creqv 6, 6, 6
- %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str1, i32 0, i32 0), double 1.100000e+00)
+ %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i32 0, i32 0), double 1.100000e+00)
ret void
}
diff --git a/test/CodeGen/PowerPC/cr_spilling.ll b/test/CodeGen/PowerPC/cr_spilling.ll
index 8bd809fe5948..8ac4e7271ac6 100644
--- a/test/CodeGen/PowerPC/cr_spilling.ll
+++ b/test/CodeGen/PowerPC/cr_spilling.ll
@@ -10,7 +10,7 @@ entry:
br i1 false, label %cond_true94, label %cond_next99
cond_true94: ; preds = %entry
- %tmp98 = call i32 (i8*, ...)* @printf(i8* getelementptr ([3 x i8]* @.str242, i32 0, i32 0), i8* null) ; <i32> [#uses=0]
+ %tmp98 = call i32 (i8*, ...) @printf(i8* getelementptr ([3 x i8], [3 x i8]* @.str242, i32 0, i32 0), i8* null) ; <i32> [#uses=0]
%tmp20971 = icmp sgt i32 %tmp86, 0 ; <i1> [#uses=1]
br i1 %tmp20971, label %bb101, label %bb212
diff --git a/test/CodeGen/PowerPC/crbit-asm.ll b/test/CodeGen/PowerPC/crbit-asm.ll
index 373e334f02bd..36de3435a081 100644
--- a/test/CodeGen/PowerPC/crbit-asm.ll
+++ b/test/CodeGen/PowerPC/crbit-asm.ll
@@ -12,7 +12,7 @@ entry:
; CHECK-LABEL: @testi1
; CHECK-DAG: andi. {{[0-9]+}}, 3, 1
; CHECK-DAG: li [[REG1:[0-9]+]], 0
-; CHECK-DAG: cror [[REG2:[0-9]+]], 1, 1
+; CHECK-DAG: crmove [[REG2:[0-9]+]], 1
; CHECK-DAG: andi. {{[0-9]+}}, 4, 1
; CHECK-DAG: crand [[REG3:[0-9]+]], [[REG2]], 1
; CHECK-DAG: li [[REG4:[0-9]+]], 1
@@ -31,7 +31,7 @@ entry:
; CHECK-LABEL: @testi32
; CHECK-DAG: andi. {{[0-9]+}}, 3, 1
; CHECK-DAG: li [[REG1:[0-9]+]], 0
-; CHECK-DAG: cror [[REG2:[0-9]+]], 1, 1
+; CHECK-DAG: crmove [[REG2:[0-9]+]], 1
; CHECK-DAG: andi. {{[0-9]+}}, 4, 1
; CHECK-DAG: crand [[REG3:[0-9]+]], [[REG2]], 1
; CHECK-DAG: li [[REG4:[0-9]+]], -1
@@ -47,7 +47,7 @@ entry:
; CHECK-LABEL: @testi8
; CHECK-DAG: andi. {{[0-9]+}}, 3, 1
; CHECK-DAG: li [[REG1:[0-9]+]], 0
-; CHECK-DAG: cror [[REG2:[0-9]+]], 1, 1
+; CHECK-DAG: crmove [[REG2:[0-9]+]], 1
; CHECK-DAG: andi. {{[0-9]+}}, 4, 1
; CHECK-DAG: crand [[REG3:[0-9]+]], [[REG2]], 1
; CHECK-DAG: li [[REG4:[0-9]+]], 1
diff --git a/test/CodeGen/PowerPC/crbits.ll b/test/CodeGen/PowerPC/crbits.ll
index 06e90019db76..ab8655c5c8c6 100644
--- a/test/CodeGen/PowerPC/crbits.ll
+++ b/test/CodeGen/PowerPC/crbits.ll
@@ -107,7 +107,7 @@ entry:
; CHECK-LABEL: @test6
; CHECK-DAG: andi. {{[0-9]+}}, 3, 1
; CHECK-DAG: cmpwi {{[0-9]+}}, 5, -2
-; CHECK-DAG: cror [[REG1:[0-9]+]], 1, 1
+; CHECK-DAG: crmove [[REG1:[0-9]+]], 1
; CHECK-DAG: andi. {{[0-9]+}}, 4, 1
; CHECK-DAG: li [[REG2:[0-9]+]], 1
; CHECK-DAG: crorc [[REG4:[0-9]+]], 1,
@@ -145,7 +145,7 @@ entry:
define zeroext i32 @exttest8() #0 {
entry:
- %v0 = load i64* undef, align 8
+ %v0 = load i64, i64* undef, align 8
%sub = sub i64 80, %v0
%div = lshr i64 %sub, 1
%conv13 = trunc i64 %div to i32
diff --git a/test/CodeGen/PowerPC/crsave.ll b/test/CodeGen/PowerPC/crsave.ll
index 602ba94dc094..8121e1b6e639 100644
--- a/test/CodeGen/PowerPC/crsave.ll
+++ b/test/CodeGen/PowerPC/crsave.ll
@@ -9,7 +9,7 @@ entry:
%0 = call i32 asm sideeffect "\0A\09mtcr $4\0A\09cmpw 2,$2,$1\0A\09mfcr $0", "=r,r,r,r,r,~{cr2}"(i32 1, i32 2, i32 3, i32 0) nounwind
store i32 %0, i32* %ret, align 4
call void @foo()
- %1 = load i32* %ret, align 4
+ %1 = load i32, i32* %ret, align 4
ret i32 %1
}
@@ -38,7 +38,7 @@ entry:
%0 = call i32 asm sideeffect "\0A\09mtcr $4\0A\09cmpw 2,$2,$1\0A\09cmpw 3,$2,$2\0A\09cmpw 4,$2,$3\0A\09mfcr $0", "=r,r,r,r,r,~{cr2},~{cr3},~{cr4}"(i32 1, i32 2, i32 3, i32 0) nounwind
store i32 %0, i32* %ret, align 4
call void @foo()
- %1 = load i32* %ret, align 4
+ %1 = load i32, i32* %ret, align 4
ret i32 %1
}
diff --git a/test/CodeGen/PowerPC/crypto_bifs.ll b/test/CodeGen/PowerPC/crypto_bifs.ll
new file mode 100644
index 000000000000..f58935b85b66
--- /dev/null
+++ b/test/CodeGen/PowerPC/crypto_bifs.ll
@@ -0,0 +1,275 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s
+; FIXME: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+; FIXME: The original intent was to add a check-next for the blr after every check.
+; However, this currently fails since we don't eliminate stores of the unused
+; locals. These stores are sometimes scheduled after the crypto instruction
+
+; Function Attrs: nounwind
+define <16 x i8> @test_vpmsumb() #0 {
+entry:
+ %a = alloca <16 x i8>, align 16
+ %b = alloca <16 x i8>, align 16
+ store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16
+ store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16
+ %0 = load <16 x i8>, <16 x i8>* %a, align 16
+ %1 = load <16 x i8>, <16 x i8>* %b, align 16
+ %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1)
+ ret <16 x i8> %2
+; CHECK: vpmsumb 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1
+
+; Function Attrs: nounwind
+define <8 x i16> @test_vpmsumh() #0 {
+entry:
+ %a = alloca <8 x i16>, align 16
+ %b = alloca <8 x i16>, align 16
+ store <8 x i16> <i16 258, i16 772, i16 1286, i16 1800, i16 2314, i16 2828, i16 3342, i16 3856>, <8 x i16>* %a, align 16
+ store <8 x i16> <i16 29042, i16 29556, i16 30070, i16 30584, i16 31098, i16 31612, i16 32126, i16 32624>, <8 x i16>* %b, align 16
+ %0 = load <8 x i16>, <8 x i16>* %a, align 16
+ %1 = load <8 x i16>, <8 x i16>* %b, align 16
+ %2 = call <8 x i16> @llvm.ppc.altivec.crypto.vpmsumh(<8 x i16> %0, <8 x i16> %1)
+ ret <8 x i16> %2
+; CHECK: vpmsumh 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <8 x i16> @llvm.ppc.altivec.crypto.vpmsumh(<8 x i16>, <8 x i16>) #1
+
+; Function Attrs: nounwind
+define <4 x i32> @test_vpmsumw() #0 {
+entry:
+ %a = alloca <4 x i32>, align 16
+ %b = alloca <4 x i32>, align 16
+ store <4 x i32> <i32 16909060, i32 84281096, i32 151653132, i32 219025168>, <4 x i32>* %a, align 16
+ store <4 x i32> <i32 1903326068, i32 1970698104, i32 2038070140, i32 2105442160>, <4 x i32>* %b, align 16
+ %0 = load <4 x i32>, <4 x i32>* %a, align 16
+ %1 = load <4 x i32>, <4 x i32>* %b, align 16
+ %2 = call <4 x i32> @llvm.ppc.altivec.crypto.vpmsumw(<4 x i32> %0, <4 x i32> %1)
+ ret <4 x i32> %2
+; CHECK: vpmsumw 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <4 x i32> @llvm.ppc.altivec.crypto.vpmsumw(<4 x i32>, <4 x i32>) #1
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vpmsumd() #0 {
+entry:
+ %a = alloca <2 x i64>, align 16
+ %b = alloca <2 x i64>, align 16
+ store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+ store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
+ %0 = load <2 x i64>, <2 x i64>* %a, align 16
+ %1 = load <2 x i64>, <2 x i64>* %b, align 16
+ %2 = call <2 x i64> @llvm.ppc.altivec.crypto.vpmsumd(<2 x i64> %0, <2 x i64> %1)
+ ret <2 x i64> %2
+; CHECK: vpmsumd 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vpmsumd(<2 x i64>, <2 x i64>) #1
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vsbox() #0 {
+entry:
+ %a = alloca <2 x i64>, align 16
+ store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+ %0 = load <2 x i64>, <2 x i64>* %a, align 16
+ %1 = call <2 x i64> @llvm.ppc.altivec.crypto.vsbox(<2 x i64> %0)
+ ret <2 x i64> %1
+; CHECK: vsbox 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vsbox(<2 x i64>) #1
+
+; Function Attrs: nounwind
+define <16 x i8> @test_vpermxorb() #0 {
+entry:
+ %a = alloca <16 x i8>, align 16
+ %b = alloca <16 x i8>, align 16
+ %c = alloca <16 x i8>, align 16
+ store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16
+ store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16
+ store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %c, align 16
+ %0 = load <16 x i8>, <16 x i8>* %a, align 16
+ %1 = load <16 x i8>, <16 x i8>* %b, align 16
+ %2 = load <16 x i8>, <16 x i8>* %c, align 16
+ %3 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
+ ret <16 x i8> %3
+; CHECK: vpermxor 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8>, <16 x i8>, <16 x i8>) #1
+
+; Function Attrs: nounwind
+define <8 x i16> @test_vpermxorh() #0 {
+entry:
+ %a = alloca <8 x i16>, align 16
+ %b = alloca <8 x i16>, align 16
+ %c = alloca <8 x i16>, align 16
+ store <8 x i16> <i16 258, i16 772, i16 1286, i16 1800, i16 2314, i16 2828, i16 3342, i16 3856>, <8 x i16>* %a, align 16
+ store <8 x i16> <i16 29042, i16 29556, i16 30070, i16 30584, i16 31098, i16 31612, i16 32126, i16 32624>, <8 x i16>* %b, align 16
+ store <8 x i16> <i16 29042, i16 29556, i16 30070, i16 30584, i16 31098, i16 31612, i16 32126, i16 32624>, <8 x i16>* %c, align 16
+ %0 = load <8 x i16>, <8 x i16>* %a, align 16
+ %1 = bitcast <8 x i16> %0 to <16 x i8>
+ %2 = load <8 x i16>, <8 x i16>* %b, align 16
+ %3 = bitcast <8 x i16> %2 to <16 x i8>
+ %4 = load <8 x i16>, <8 x i16>* %c, align 16
+ %5 = bitcast <8 x i16> %4 to <16 x i8>
+ %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
+ %7 = bitcast <16 x i8> %6 to <8 x i16>
+ ret <8 x i16> %7
+; CHECK: vpermxor 2,
+}
+
+; Function Attrs: nounwind
+define <4 x i32> @test_vpermxorw() #0 {
+entry:
+ %a = alloca <4 x i32>, align 16
+ %b = alloca <4 x i32>, align 16
+ %c = alloca <4 x i32>, align 16
+ store <4 x i32> <i32 16909060, i32 84281096, i32 151653132, i32 219025168>, <4 x i32>* %a, align 16
+ store <4 x i32> <i32 1903326068, i32 1970698104, i32 2038070140, i32 2105442160>, <4 x i32>* %b, align 16
+ store <4 x i32> <i32 1903326068, i32 1970698104, i32 2038070140, i32 2105442160>, <4 x i32>* %c, align 16
+ %0 = load <4 x i32>, <4 x i32>* %a, align 16
+ %1 = bitcast <4 x i32> %0 to <16 x i8>
+ %2 = load <4 x i32>, <4 x i32>* %b, align 16
+ %3 = bitcast <4 x i32> %2 to <16 x i8>
+ %4 = load <4 x i32>, <4 x i32>* %c, align 16
+ %5 = bitcast <4 x i32> %4 to <16 x i8>
+ %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
+ %7 = bitcast <16 x i8> %6 to <4 x i32>
+ ret <4 x i32> %7
+; CHECK: vpermxor 2,
+}
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vpermxord() #0 {
+entry:
+ %a = alloca <2 x i64>, align 16
+ %b = alloca <2 x i64>, align 16
+ %c = alloca <2 x i64>, align 16
+ store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+ store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
+ store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %c, align 16
+ %0 = load <2 x i64>, <2 x i64>* %a, align 16
+ %1 = bitcast <2 x i64> %0 to <16 x i8>
+ %2 = load <2 x i64>, <2 x i64>* %b, align 16
+ %3 = bitcast <2 x i64> %2 to <16 x i8>
+ %4 = load <2 x i64>, <2 x i64>* %c, align 16
+ %5 = bitcast <2 x i64> %4 to <16 x i8>
+ %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
+ %7 = bitcast <16 x i8> %6 to <2 x i64>
+ ret <2 x i64> %7
+; CHECK: vpermxor 2,
+}
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vcipher() #0 {
+entry:
+ %a = alloca <2 x i64>, align 16
+ %b = alloca <2 x i64>, align 16
+ store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+ store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
+ %0 = load <2 x i64>, <2 x i64>* %a, align 16
+ %1 = load <2 x i64>, <2 x i64>* %b, align 16
+ %2 = call <2 x i64> @llvm.ppc.altivec.crypto.vcipher(<2 x i64> %0, <2 x i64> %1)
+ ret <2 x i64> %2
+; CHECK: vcipher 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vcipher(<2 x i64>, <2 x i64>) #1
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vcipherlast() #0 {
+entry:
+ %a = alloca <2 x i64>, align 16
+ %b = alloca <2 x i64>, align 16
+ store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+ store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
+ %0 = load <2 x i64>, <2 x i64>* %a, align 16
+ %1 = load <2 x i64>, <2 x i64>* %b, align 16
+ %2 = call <2 x i64> @llvm.ppc.altivec.crypto.vcipherlast(<2 x i64> %0, <2 x i64> %1)
+ ret <2 x i64> %2
+; CHECK: vcipherlast 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vcipherlast(<2 x i64>, <2 x i64>) #1
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vncipher() #0 {
+entry:
+ %a = alloca <2 x i64>, align 16
+ %b = alloca <2 x i64>, align 16
+ store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+ store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
+ %0 = load <2 x i64>, <2 x i64>* %a, align 16
+ %1 = load <2 x i64>, <2 x i64>* %b, align 16
+ %2 = call <2 x i64> @llvm.ppc.altivec.crypto.vncipher(<2 x i64> %0, <2 x i64> %1)
+ ret <2 x i64> %2
+; CHECK: vncipher 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vncipher(<2 x i64>, <2 x i64>) #1
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vncipherlast() #0 {
+entry:
+ %a = alloca <2 x i64>, align 16
+ %b = alloca <2 x i64>, align 16
+ store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
+ store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
+ %0 = load <2 x i64>, <2 x i64>* %a, align 16
+ %1 = load <2 x i64>, <2 x i64>* %b, align 16
+ %2 = call <2 x i64> @llvm.ppc.altivec.crypto.vncipherlast(<2 x i64> %0, <2 x i64> %1)
+ ret <2 x i64> %2
+; CHECK: vncipherlast 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vncipherlast(<2 x i64>, <2 x i64>) #1
+
+; Function Attrs: nounwind
+define <4 x i32> @test_vshasigmaw() #0 {
+entry:
+ %a = alloca <4 x i32>, align 16
+ store <4 x i32> <i32 16909060, i32 84281096, i32 151653132, i32 219025168>, <4 x i32>* %a, align 16
+ %0 = load <4 x i32>, <4 x i32>* %a, align 16
+ %1 = call <4 x i32> @llvm.ppc.altivec.crypto.vshasigmaw(<4 x i32> %0, i32 1, i32 15)
+ ret <4 x i32> %1
+; CHECK: vshasigmaw 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <4 x i32> @llvm.ppc.altivec.crypto.vshasigmaw(<4 x i32>, i32, i32) #1
+
+; Function Attrs: nounwind
+define <2 x i64> @test_vshasigmad() #0 {
+entry:
+ %a = alloca <2 x i64>, align 16
+ store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %a, align 16
+ %0 = load <2 x i64>, <2 x i64>* %a, align 16
+ %1 = call <2 x i64> @llvm.ppc.altivec.crypto.vshasigmad(<2 x i64> %0, i32 1, i32 15)
+ ret <2 x i64> %1
+; CHECK: vshasigmad 2,
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.crypto.vshasigmad(<2 x i64>, i32, i32) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 230949) (llvm/trunk 230946)"}
diff --git a/test/CodeGen/PowerPC/ctrloop-cpsgn.ll b/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
index 2f0440912cc9..fcfcf154ef58 100644
--- a/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
+++ b/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
@@ -10,8 +10,8 @@ entry:
for.body: ; preds = %for.body, %entry
%i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%x.05 = phi ppc_fp128 [ %d, %entry ], [ %conv, %for.body ]
- %arrayidx = getelementptr inbounds ppc_fp128* %n, i32 %i.06
- %0 = load ppc_fp128* %arrayidx, align 8
+ %arrayidx = getelementptr inbounds ppc_fp128, ppc_fp128* %n, i32 %i.06
+ %0 = load ppc_fp128, ppc_fp128* %arrayidx, align 8
%conv = tail call ppc_fp128 @copysignl(ppc_fp128 %x.05, ppc_fp128 %d) nounwind readonly
%inc = add nsw i32 %i.06, 1
%exitcond = icmp eq i32 %inc, 2048
diff --git a/test/CodeGen/PowerPC/ctrloop-fp64.ll b/test/CodeGen/PowerPC/ctrloop-fp64.ll
index 77555ac58de2..6128d7cbdf6f 100644
--- a/test/CodeGen/PowerPC/ctrloop-fp64.ll
+++ b/test/CodeGen/PowerPC/ctrloop-fp64.ll
@@ -10,8 +10,8 @@ entry:
for.body: ; preds = %for.body, %entry
%i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%x.05 = phi i64 [ 0, %entry ], [ %conv1, %for.body ]
- %arrayidx = getelementptr inbounds double* %n, i32 %i.06
- %0 = load double* %arrayidx, align 8
+ %arrayidx = getelementptr inbounds double, double* %n, i32 %i.06
+ %0 = load double, double* %arrayidx, align 8
%conv = sitofp i64 %x.05 to double
%add = fadd double %conv, %0
%conv1 = fptosi double %add to i64
@@ -31,7 +31,7 @@ for.end: ; preds = %for.body
define i32 @main(i32 %argc, i8** nocapture %argv) {
entry:
- %0 = load double* @init_value, align 8
+ %0 = load double, double* @init_value, align 8
%conv = fptosi double %0 to i64
%broadcast.splatinsert.i = insertelement <2 x i64> undef, i64 %conv, i32 0
%broadcast.splat.i = shufflevector <2 x i64> %broadcast.splatinsert.i, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -39,11 +39,11 @@ entry:
vector.body.i: ; preds = %vector.body.i, %entry
%index.i = phi i32 [ 0, %entry ], [ %index.next.i, %vector.body.i ]
- %next.gep.i = getelementptr [8000 x i64]* @data64, i32 0, i32 %index.i
+ %next.gep.i = getelementptr [8000 x i64], [8000 x i64]* @data64, i32 0, i32 %index.i
%1 = bitcast i64* %next.gep.i to <2 x i64>*
store <2 x i64> %broadcast.splat.i, <2 x i64>* %1, align 8
%next.gep.sum24.i = or i32 %index.i, 2
- %2 = getelementptr [8000 x i64]* @data64, i32 0, i32 %next.gep.sum24.i
+ %2 = getelementptr [8000 x i64], [8000 x i64]* @data64, i32 0, i32 %next.gep.sum24.i
%3 = bitcast i64* %2 to <2 x i64>*
store <2 x i64> %broadcast.splat.i, <2 x i64>* %3, align 8
%index.next.i = add i32 %index.i, 4
diff --git a/test/CodeGen/PowerPC/ctrloop-i64.ll b/test/CodeGen/PowerPC/ctrloop-i64.ll
index 9e01392a458f..5c66a6865ae2 100644
--- a/test/CodeGen/PowerPC/ctrloop-i64.ll
+++ b/test/CodeGen/PowerPC/ctrloop-i64.ll
@@ -10,8 +10,8 @@ entry:
for.body: ; preds = %for.body, %entry
%i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%x.05 = phi i64 [ 0, %entry ], [ %conv1, %for.body ]
- %arrayidx = getelementptr inbounds i64* %n, i32 %i.06
- %0 = load i64* %arrayidx, align 8
+ %arrayidx = getelementptr inbounds i64, i64* %n, i32 %i.06
+ %0 = load i64, i64* %arrayidx, align 8
%conv = udiv i64 %x.05, %d
%conv1 = add i64 %conv, %0
%inc = add nsw i32 %i.06, 1
@@ -32,8 +32,8 @@ entry:
for.body: ; preds = %for.body, %entry
%i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%x.05 = phi i64 [ 0, %entry ], [ %conv1, %for.body ]
- %arrayidx = getelementptr inbounds i64* %n, i32 %i.06
- %0 = load i64* %arrayidx, align 8
+ %arrayidx = getelementptr inbounds i64, i64* %n, i32 %i.06
+ %0 = load i64, i64* %arrayidx, align 8
%conv = sdiv i64 %x.05, %d
%conv1 = add i64 %conv, %0
%inc = add nsw i32 %i.06, 1
@@ -54,8 +54,8 @@ entry:
for.body: ; preds = %for.body, %entry
%i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%x.05 = phi i64 [ 0, %entry ], [ %conv1, %for.body ]
- %arrayidx = getelementptr inbounds i64* %n, i32 %i.06
- %0 = load i64* %arrayidx, align 8
+ %arrayidx = getelementptr inbounds i64, i64* %n, i32 %i.06
+ %0 = load i64, i64* %arrayidx, align 8
%conv = urem i64 %x.05, %d
%conv1 = add i64 %conv, %0
%inc = add nsw i32 %i.06, 1
@@ -76,8 +76,8 @@ entry:
for.body: ; preds = %for.body, %entry
%i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%x.05 = phi i64 [ 0, %entry ], [ %conv1, %for.body ]
- %arrayidx = getelementptr inbounds i64* %n, i32 %i.06
- %0 = load i64* %arrayidx, align 8
+ %arrayidx = getelementptr inbounds i64, i64* %n, i32 %i.06
+ %0 = load i64, i64* %arrayidx, align 8
%conv = srem i64 %x.05, %d
%conv1 = add i64 %conv, %0
%inc = add nsw i32 %i.06, 1
diff --git a/test/CodeGen/PowerPC/ctrloop-le.ll b/test/CodeGen/PowerPC/ctrloop-le.ll
index 60b0536f9924..bef043703247 100644
--- a/test/CodeGen/PowerPC/ctrloop-le.ll
+++ b/test/CodeGen/PowerPC/ctrloop-le.ll
@@ -18,8 +18,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 28395, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -47,8 +47,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 9073, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -76,8 +76,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 21956, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -105,8 +105,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 16782, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -134,8 +134,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 19097, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -163,8 +163,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -192,8 +192,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -221,8 +221,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -250,8 +250,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -279,8 +279,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -309,8 +309,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -339,8 +339,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -369,8 +369,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -399,8 +399,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -429,8 +429,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
diff --git a/test/CodeGen/PowerPC/ctrloop-lt.ll b/test/CodeGen/PowerPC/ctrloop-lt.ll
index a9dc42c1c971..fa910aab4e0d 100644
--- a/test/CodeGen/PowerPC/ctrloop-lt.ll
+++ b/test/CodeGen/PowerPC/ctrloop-lt.ll
@@ -18,8 +18,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 8531, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -48,8 +48,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 9152, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -78,8 +78,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 18851, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -107,8 +107,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 25466, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -136,8 +136,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 9295, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -165,8 +165,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -194,8 +194,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -223,8 +223,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -252,8 +252,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -281,8 +281,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -310,8 +310,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -339,8 +339,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -368,8 +368,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -397,8 +397,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -426,8 +426,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
diff --git a/test/CodeGen/PowerPC/ctrloop-ne.ll b/test/CodeGen/PowerPC/ctrloop-ne.ll
index 636030a15dd2..13a9909c3d61 100644
--- a/test/CodeGen/PowerPC/ctrloop-ne.ll
+++ b/test/CodeGen/PowerPC/ctrloop-ne.ll
@@ -15,8 +15,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 32623, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -45,8 +45,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 29554, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -75,8 +75,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 15692, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -105,8 +105,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 10449, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -135,8 +135,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ 32087, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -164,8 +164,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -194,8 +194,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -224,8 +224,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -254,8 +254,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -284,8 +284,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -313,8 +313,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -343,8 +343,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -373,8 +373,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -403,8 +403,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
@@ -433,8 +433,8 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body.lr.ph, %for.body
%i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
- %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
+ %0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%add = add nsw i32 %conv, 1
%conv1 = trunc i32 %add to i8
diff --git a/test/CodeGen/PowerPC/ctrloop-s000.ll b/test/CodeGen/PowerPC/ctrloop-s000.ll
index 4d8ef50501f2..344bbf3b341d 100644
--- a/test/CodeGen/PowerPC/ctrloop-s000.ll
+++ b/test/CodeGen/PowerPC/ctrloop-s000.ll
@@ -35,100 +35,100 @@ for.cond1.preheader: ; preds = %for.end, %entry
for.body3: ; preds = %for.body3, %for.cond1.preheader
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next.15, %for.body3 ]
- %arrayidx = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv
- %0 = load double* %arrayidx, align 32
+ %arrayidx = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv
+ %0 = load double, double* %arrayidx, align 32
%add = fadd double %0, 1.000000e+00
- %arrayidx5 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv
+ %arrayidx5 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv
store double %add, double* %arrayidx5, align 32
%indvars.iv.next11 = or i64 %indvars.iv, 1
- %arrayidx.1 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next11
- %1 = load double* %arrayidx.1, align 8
+ %arrayidx.1 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next11
+ %1 = load double, double* %arrayidx.1, align 8
%add.1 = fadd double %1, 1.000000e+00
- %arrayidx5.1 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next11
+ %arrayidx5.1 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next11
store double %add.1, double* %arrayidx5.1, align 8
%indvars.iv.next.112 = or i64 %indvars.iv, 2
- %arrayidx.2 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.112
- %2 = load double* %arrayidx.2, align 16
+ %arrayidx.2 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.112
+ %2 = load double, double* %arrayidx.2, align 16
%add.2 = fadd double %2, 1.000000e+00
- %arrayidx5.2 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.112
+ %arrayidx5.2 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.112
store double %add.2, double* %arrayidx5.2, align 16
%indvars.iv.next.213 = or i64 %indvars.iv, 3
- %arrayidx.3 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.213
- %3 = load double* %arrayidx.3, align 8
+ %arrayidx.3 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.213
+ %3 = load double, double* %arrayidx.3, align 8
%add.3 = fadd double %3, 1.000000e+00
- %arrayidx5.3 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.213
+ %arrayidx5.3 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.213
store double %add.3, double* %arrayidx5.3, align 8
%indvars.iv.next.314 = or i64 %indvars.iv, 4
- %arrayidx.4 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.314
- %4 = load double* %arrayidx.4, align 32
+ %arrayidx.4 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.314
+ %4 = load double, double* %arrayidx.4, align 32
%add.4 = fadd double %4, 1.000000e+00
- %arrayidx5.4 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.314
+ %arrayidx5.4 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.314
store double %add.4, double* %arrayidx5.4, align 32
%indvars.iv.next.415 = or i64 %indvars.iv, 5
- %arrayidx.5 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.415
- %5 = load double* %arrayidx.5, align 8
+ %arrayidx.5 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.415
+ %5 = load double, double* %arrayidx.5, align 8
%add.5 = fadd double %5, 1.000000e+00
- %arrayidx5.5 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.415
+ %arrayidx5.5 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.415
store double %add.5, double* %arrayidx5.5, align 8
%indvars.iv.next.516 = or i64 %indvars.iv, 6
- %arrayidx.6 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.516
- %6 = load double* %arrayidx.6, align 16
+ %arrayidx.6 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.516
+ %6 = load double, double* %arrayidx.6, align 16
%add.6 = fadd double %6, 1.000000e+00
- %arrayidx5.6 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.516
+ %arrayidx5.6 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.516
store double %add.6, double* %arrayidx5.6, align 16
%indvars.iv.next.617 = or i64 %indvars.iv, 7
- %arrayidx.7 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.617
- %7 = load double* %arrayidx.7, align 8
+ %arrayidx.7 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.617
+ %7 = load double, double* %arrayidx.7, align 8
%add.7 = fadd double %7, 1.000000e+00
- %arrayidx5.7 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.617
+ %arrayidx5.7 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.617
store double %add.7, double* %arrayidx5.7, align 8
%indvars.iv.next.718 = or i64 %indvars.iv, 8
- %arrayidx.8 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.718
- %8 = load double* %arrayidx.8, align 32
+ %arrayidx.8 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.718
+ %8 = load double, double* %arrayidx.8, align 32
%add.8 = fadd double %8, 1.000000e+00
- %arrayidx5.8 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.718
+ %arrayidx5.8 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.718
store double %add.8, double* %arrayidx5.8, align 32
%indvars.iv.next.819 = or i64 %indvars.iv, 9
- %arrayidx.9 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.819
- %9 = load double* %arrayidx.9, align 8
+ %arrayidx.9 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.819
+ %9 = load double, double* %arrayidx.9, align 8
%add.9 = fadd double %9, 1.000000e+00
- %arrayidx5.9 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.819
+ %arrayidx5.9 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.819
store double %add.9, double* %arrayidx5.9, align 8
%indvars.iv.next.920 = or i64 %indvars.iv, 10
- %arrayidx.10 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.920
- %10 = load double* %arrayidx.10, align 16
+ %arrayidx.10 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.920
+ %10 = load double, double* %arrayidx.10, align 16
%add.10 = fadd double %10, 1.000000e+00
- %arrayidx5.10 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.920
+ %arrayidx5.10 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.920
store double %add.10, double* %arrayidx5.10, align 16
%indvars.iv.next.1021 = or i64 %indvars.iv, 11
- %arrayidx.11 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1021
- %11 = load double* %arrayidx.11, align 8
+ %arrayidx.11 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1021
+ %11 = load double, double* %arrayidx.11, align 8
%add.11 = fadd double %11, 1.000000e+00
- %arrayidx5.11 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1021
+ %arrayidx5.11 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1021
store double %add.11, double* %arrayidx5.11, align 8
%indvars.iv.next.1122 = or i64 %indvars.iv, 12
- %arrayidx.12 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1122
- %12 = load double* %arrayidx.12, align 32
+ %arrayidx.12 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1122
+ %12 = load double, double* %arrayidx.12, align 32
%add.12 = fadd double %12, 1.000000e+00
- %arrayidx5.12 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1122
+ %arrayidx5.12 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1122
store double %add.12, double* %arrayidx5.12, align 32
%indvars.iv.next.1223 = or i64 %indvars.iv, 13
- %arrayidx.13 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1223
- %13 = load double* %arrayidx.13, align 8
+ %arrayidx.13 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1223
+ %13 = load double, double* %arrayidx.13, align 8
%add.13 = fadd double %13, 1.000000e+00
- %arrayidx5.13 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1223
+ %arrayidx5.13 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1223
store double %add.13, double* %arrayidx5.13, align 8
%indvars.iv.next.1324 = or i64 %indvars.iv, 14
- %arrayidx.14 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1324
- %14 = load double* %arrayidx.14, align 16
+ %arrayidx.14 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1324
+ %14 = load double, double* %arrayidx.14, align 16
%add.14 = fadd double %14, 1.000000e+00
- %arrayidx5.14 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1324
+ %arrayidx5.14 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1324
store double %add.14, double* %arrayidx5.14, align 16
%indvars.iv.next.1425 = or i64 %indvars.iv, 15
- %arrayidx.15 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1425
- %15 = load double* %arrayidx.15, align 8
+ %arrayidx.15 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1425
+ %15 = load double, double* %arrayidx.15, align 8
%add.15 = fadd double %15, 1.000000e+00
- %arrayidx5.15 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1425
+ %arrayidx5.15 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1425
store double %add.15, double* %arrayidx5.15, align 8
%indvars.iv.next.15 = add i64 %indvars.iv, 16
%lftr.wideiv.15 = trunc i64 %indvars.iv.next.15 to i32
@@ -136,7 +136,7 @@ for.body3: ; preds = %for.body3, %for.con
br i1 %exitcond.15, label %for.end, label %for.body3
for.end: ; preds = %for.body3
- %call = tail call i32 @dummy(double* getelementptr inbounds ([16000 x double]* @X, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Y, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Z, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @U, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @V, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @aa, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @bb, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @cc, i64 0, i64 0), double 0.000000e+00) nounwind
+ %call = tail call i32 @dummy(double* getelementptr inbounds ([16000 x double], [16000 x double]* @X, i64 0, i64 0), double* getelementptr inbounds ([16000 x double], [16000 x double]* @Y, i64 0, i64 0), double* getelementptr inbounds ([16000 x double], [16000 x double]* @Z, i64 0, i64 0), double* getelementptr inbounds ([16000 x double], [16000 x double]* @U, i64 0, i64 0), double* getelementptr inbounds ([16000 x double], [16000 x double]* @V, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]], [256 x [256 x double]]* @aa, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]], [256 x [256 x double]]* @bb, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]], [256 x [256 x double]]* @cc, i64 0, i64 0), double 0.000000e+00) nounwind
%inc7 = add nsw i32 %nl.010, 1
%exitcond = icmp eq i32 %inc7, 400000
br i1 %exitcond, label %for.end8, label %for.cond1.preheader
diff --git a/test/CodeGen/PowerPC/ctrloop-sh.ll b/test/CodeGen/PowerPC/ctrloop-sh.ll
index d8e6fc79a665..540f0d6b57e6 100644
--- a/test/CodeGen/PowerPC/ctrloop-sh.ll
+++ b/test/CodeGen/PowerPC/ctrloop-sh.ll
@@ -9,8 +9,8 @@ entry:
for.body: ; preds = %for.body, %entry
%i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %0 = load i128* %b, align 16
- %1 = load i128* %c, align 16
+ %0 = load i128, i128* %b, align 16
+ %1 = load i128, i128* %c, align 16
%shl = shl i128 %0, %1
store i128 %shl, i128* %a, align 16
%inc = add nsw i32 %i.02, 1
@@ -31,8 +31,8 @@ entry:
for.body: ; preds = %for.body, %entry
%i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %0 = load i128* %b, align 16
- %1 = load i128* %c, align 16
+ %0 = load i128, i128* %b, align 16
+ %1 = load i128, i128* %c, align 16
%shl = ashr i128 %0, %1
store i128 %shl, i128* %a, align 16
%inc = add nsw i32 %i.02, 1
@@ -53,8 +53,8 @@ entry:
for.body: ; preds = %for.body, %entry
%i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %0 = load i128* %b, align 16
- %1 = load i128* %c, align 16
+ %0 = load i128, i128* %b, align 16
+ %1 = load i128, i128* %c, align 16
%shl = lshr i128 %0, %1
store i128 %shl, i128* %a, align 16
%inc = add nsw i32 %i.02, 1
diff --git a/test/CodeGen/PowerPC/ctrloop-sums.ll b/test/CodeGen/PowerPC/ctrloop-sums.ll
index d9965f280e72..056ee3448c75 100644
--- a/test/CodeGen/PowerPC/ctrloop-sums.ll
+++ b/test/CodeGen/PowerPC/ctrloop-sums.ll
@@ -23,8 +23,8 @@ for.inc6.us: ; preds = %for.body3.us
for.body3.us: ; preds = %for.body3.us, %for.body3.lr.ph.us
%indvars.iv = phi i64 [ 0, %for.body3.lr.ph.us ], [ %indvars.iv.next, %for.body3.us ]
%Result.111.us = phi i32 [ %Result.014.us, %for.body3.lr.ph.us ], [ %add.us, %for.body3.us ]
- %arrayidx5.us = getelementptr inbounds [100 x i32]* %Array, i64 %indvars.iv16, i64 %indvars.iv
- %0 = load i32* %arrayidx5.us, align 4
+ %arrayidx5.us = getelementptr inbounds [100 x i32], [100 x i32]* %Array, i64 %indvars.iv16, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx5.us, align 4
%add.us = add nsw i32 %0, %Result.111.us
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -59,7 +59,7 @@ for.body: ; preds = %for.body, %entry
%indvars.iv33 = phi i64 [ 0, %entry ], [ %indvars.iv.next34, %for.body ]
%0 = trunc i64 %indvars.iv33 to i32
%sub = sub i32 0, %0
- %arrayidx2 = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv33, i64 %indvars.iv33
+ %arrayidx2 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv33, i64 %indvars.iv33
store i32 %sub, i32* %arrayidx2, align 4
%indvars.iv.next34 = add i64 %indvars.iv33, 1
%lftr.wideiv35 = trunc i64 %indvars.iv.next34 to i32
@@ -79,7 +79,7 @@ for.body8: ; preds = %for.inc14, %for.con
if.then: ; preds = %for.body8
%3 = add i64 %indvars.iv, %indvars.iv29
- %arrayidx13 = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv29, i64 %indvars.iv
+ %arrayidx13 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv29, i64 %indvars.iv
%4 = trunc i64 %3 to i32
store i32 %4, i32* %arrayidx13, align 4
br label %for.inc14
@@ -105,8 +105,8 @@ for.inc6.us.i: ; preds = %for.body3.us.i
for.body3.us.i: ; preds = %for.body3.lr.ph.us.i, %for.body3.us.i
%indvars.iv.i = phi i64 [ 0, %for.body3.lr.ph.us.i ], [ %indvars.iv.next.i, %for.body3.us.i ]
%Result.111.us.i = phi i32 [ %Result.014.us.i, %for.body3.lr.ph.us.i ], [ %add.us.i, %for.body3.us.i ]
- %arrayidx5.us.i = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv16.i, i64 %indvars.iv.i
- %5 = load i32* %arrayidx5.us.i, align 4
+ %arrayidx5.us.i = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv16.i, i64 %indvars.iv.i
+ %5 = load i32, i32* %arrayidx5.us.i, align 4
%add.us.i = add nsw i32 %5, %Result.111.us.i
%indvars.iv.next.i = add i64 %indvars.iv.i, 1
%lftr.wideiv = trunc i64 %indvars.iv.next.i to i32
@@ -119,7 +119,7 @@ for.body3.lr.ph.us.i: ; preds = %for.inc17, %for.inc
br label %for.body3.us.i
SumArray.exit: ; preds = %for.inc6.us.i
- %call20 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([23 x i8]* @.str, i64 0, i64 0), i32 100, i32 100, i32 %add.us.i) nounwind
+ %call20 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i64 0, i64 0), i32 100, i32 100, i32 %add.us.i) nounwind
ret i32 0
; CHECK: @main
diff --git a/test/CodeGen/PowerPC/ctrloops.ll b/test/CodeGen/PowerPC/ctrloops.ll
index ccab7cb7a0ba..fff9e20d2626 100644
--- a/test/CodeGen/PowerPC/ctrloops.ll
+++ b/test/CodeGen/PowerPC/ctrloops.ll
@@ -10,7 +10,7 @@ entry:
for.body: ; preds = %for.body, %entry
%i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %0 = load volatile i32* @a, align 4
+ %0 = load volatile i32, i32* @a, align 4
%add = add nsw i32 %0, %c
store volatile i32 %add, i32* @a, align 4
%inc = add nsw i32 %i.01, 1
@@ -34,7 +34,7 @@ entry:
for.body: ; preds = %entry, %for.body
%i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
- %0 = load volatile i32* @a, align 4
+ %0 = load volatile i32, i32* @a, align 4
%add = add nsw i32 %0, %c
store volatile i32 %add, i32* @a, align 4
%inc = add nsw i32 %i.02, 1
@@ -58,7 +58,7 @@ entry:
for.body: ; preds = %entry, %for.body
%i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
%mul = mul nsw i32 %i.02, %c
- %0 = load volatile i32* @a, align 4
+ %0 = load volatile i32, i32* @a, align 4
%add = add nsw i32 %0, %mul
store volatile i32 %add, i32* @a, align 4
%inc = add nsw i32 %i.02, 1
diff --git a/test/CodeGen/PowerPC/cttz-ctlz-spec.ll b/test/CodeGen/PowerPC/cttz-ctlz-spec.ll
deleted file mode 100644
index 13b017a746ec..000000000000
--- a/test/CodeGen/PowerPC/cttz-ctlz-spec.ll
+++ /dev/null
@@ -1,41 +0,0 @@
-; RUN: opt -S -codegenprepare < %s | FileCheck %s
-target datalayout = "E-m:e-i64:64-n32:64"
-target triple = "powerpc64-unknown-linux-gnu"
-
-define i64 @test1(i64 %A) {
-; CHECK-LABEL: @test1(
-; CHECK: [[CTLZ:%[A-Za-z0-9]+]] = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
-; CHECK-NEXT: ret i64 [[CTLZ]]
-entry:
- %tobool = icmp eq i64 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
- ret i64 %cond
-}
-
-define i64 @test1b(i64 %A) {
-; CHECK-LABEL: @test1b(
-; CHECK: [[CTTZ:%[A-Za-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %A, i1 false)
-; CHECK-NEXT: ret i64 [[CTTZ]]
-entry:
- %tobool = icmp eq i64 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
- ret i64 %cond
-}
-
-declare i64 @llvm.ctlz.i64(i64, i1)
-declare i64 @llvm.cttz.i64(i64, i1)
-
diff --git a/test/CodeGen/PowerPC/cttz.ll b/test/CodeGen/PowerPC/cttz.ll
index 3757fa3e2f29..60de982d91a1 100644
--- a/test/CodeGen/PowerPC/cttz.ll
+++ b/test/CodeGen/PowerPC/cttz.ll
@@ -6,7 +6,7 @@ declare i32 @llvm.cttz.i32(i32, i1)
define i32 @bar(i32 %x) {
entry:
; CHECK: @bar
-; CHECK: cntlzw
+; CHECK: cntlz
%tmp.1 = call i32 @llvm.cttz.i32( i32 %x, i1 true ) ; <i32> [#uses=1]
ret i32 %tmp.1
}
diff --git a/test/CodeGen/PowerPC/dbg.ll b/test/CodeGen/PowerPC/dbg.ll
index bd153674eab1..87914025b733 100644
--- a/test/CodeGen/PowerPC/dbg.ll
+++ b/test/CodeGen/PowerPC/dbg.ll
@@ -6,8 +6,8 @@ target triple = "powerpc64-unknown-linux-gnu"
define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readnone {
entry:
- tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !15, metadata !{!"0x102"}), !dbg !17
- tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !16, metadata !{!"0x102"}), !dbg !18
+ tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !15, metadata !DIExpression()), !dbg !17
+ tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !16, metadata !DIExpression()), !dbg !18
%add = add nsw i32 %argc, 1, !dbg !19
ret i32 %add, !dbg !19
}
@@ -17,23 +17,23 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!22}
-!0 = !{!"0x11\0012\00clang version 3.1\001\00\000\00\000", !21, !1, !1, !3, !1, !""} ; [ DW_TAG_compile_unit ]
+!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1", isOptimized: true, emissionKind: 0, file: !21, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
!1 = !{}
!3 = !{!5}
-!5 = !{!"0x2e\00main\00main\00\001\000\001\000\006\00256\001\000", !21, null, !7, null, i32 (i32, i8**)* @main, null, null, !13} ; [ DW_TAG_subprogram ]
-!6 = !{!"0x29", !21} ; [ DW_TAG_file_type ]
-!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !21, scope: null, type: !7, function: i32 (i32, i8**)* @main, variables: !13)
+!6 = !DIFile(filename: "dbg.c", directory: "/src")
+!7 = !DISubroutineType(types: !8)
!8 = !{!9, !9, !10}
-!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
-!10 = !{!"0xf\00\000\0064\0064\000\000", null, null, !11} ; [ DW_TAG_pointer_type ]
-!11 = !{!"0xf\00\000\0064\0064\000\000", null, null, !12} ; [ DW_TAG_pointer_type ]
-!12 = !{!"0x24\00char\000\008\008\000\000\008", null, null} ; [ DW_TAG_base_type ]
+!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !11)
+!11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !12)
+!12 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
!13 = !{!15, !16}
-!15 = !{!"0x101\00argc\0016777217\000", !5, !6, !9} ; [ DW_TAG_arg_variable ]
-!16 = !{!"0x101\00argv\0033554433\000", !5, !6, !10} ; [ DW_TAG_arg_variable ]
-!17 = !MDLocation(line: 1, column: 14, scope: !5)
-!18 = !MDLocation(line: 1, column: 26, scope: !5)
-!19 = !MDLocation(line: 2, column: 3, scope: !20)
-!20 = !{!"0xb\001\0034\000", !21, !5} ; [ DW_TAG_lexical_block ]
-!21 = !{!"dbg.c", !"/src"}
-!22 = !{i32 1, !"Debug Info Version", i32 2}
+!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 1, arg: 1, scope: !5, file: !6, type: !9)
+!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 1, arg: 2, scope: !5, file: !6, type: !10)
+!17 = !DILocation(line: 1, column: 14, scope: !5)
+!18 = !DILocation(line: 1, column: 26, scope: !5)
+!19 = !DILocation(line: 2, column: 3, scope: !20)
+!20 = distinct !DILexicalBlock(line: 1, column: 34, file: !21, scope: !5)
+!21 = !DIFile(filename: "dbg.c", directory: "/src")
+!22 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/PowerPC/dcbt-sched.ll b/test/CodeGen/PowerPC/dcbt-sched.ll
index dfa1b75bd7db..51d58b47ee96 100644
--- a/test/CodeGen/PowerPC/dcbt-sched.ll
+++ b/test/CodeGen/PowerPC/dcbt-sched.ll
@@ -4,9 +4,9 @@ target triple = "powerpc64-unknown-linux-gnu"
define i8 @test1(i8* noalias %a, i8* noalias %b, i8* noalias %c) nounwind {
entry:
- %q = load i8* %b
+ %q = load i8, i8* %b
call void @llvm.prefetch(i8* %a, i32 0, i32 3, i32 1)
- %r = load i8* %c
+ %r = load i8, i8* %c
%s = add i8 %q, %r
ret i8 %s
}
diff --git a/test/CodeGen/PowerPC/delete-node.ll b/test/CodeGen/PowerPC/delete-node.ll
index a26c21154824..999af54a8a94 100644
--- a/test/CodeGen/PowerPC/delete-node.ll
+++ b/test/CodeGen/PowerPC/delete-node.ll
@@ -9,11 +9,11 @@ entry:
br label %bb1
bb1: ; preds = %bb1, %entry
- %0 = load i16* null, align 2 ; <i16> [#uses=1]
+ %0 = load i16, i16* null, align 2 ; <i16> [#uses=1]
%1 = ashr i16 %0, 4 ; <i16> [#uses=1]
%2 = sext i16 %1 to i32 ; <i32> [#uses=1]
- %3 = getelementptr i8* null, i32 %2 ; <i8*> [#uses=1]
- %4 = load i8* %3, align 1 ; <i8> [#uses=1]
+ %3 = getelementptr i8, i8* null, i32 %2 ; <i8*> [#uses=1]
+ %4 = load i8, i8* %3, align 1 ; <i8> [#uses=1]
%5 = zext i8 %4 to i32 ; <i32> [#uses=1]
%6 = shl i32 %5, 24 ; <i32> [#uses=1]
%7 = or i32 0, %6 ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/div-e-32.ll b/test/CodeGen/PowerPC/div-e-32.ll
new file mode 100644
index 000000000000..588756bb8dd8
--- /dev/null
+++ b/test/CodeGen/PowerPC/div-e-32.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=powerpc-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+
+; Function Attrs: nounwind
+define signext i32 @test1() #0 {
+entry:
+ %0 = call i32 @llvm.ppc.divwe(i32 32, i32 16)
+ ret i32 %0
+; CHECK: divwe 3, 4, 3
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ppc.divwe(i32, i32) #1
+
+; Function Attrs: nounwind
+define signext i32 @test2() #0 {
+entry:
+ %0 = call i32 @llvm.ppc.divweu(i32 32, i32 16)
+ ret i32 %0
+; CHECK: divweu 3, 4, 3
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ppc.divweu(i32, i32) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 231831) (llvm/trunk 231828:231843M)"}
diff --git a/test/CodeGen/PowerPC/div-e-all.ll b/test/CodeGen/PowerPC/div-e-all.ll
new file mode 100644
index 000000000000..912deeb2b3e0
--- /dev/null
+++ b/test/CodeGen/PowerPC/div-e-all.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+
+; Function Attrs: nounwind
+define signext i32 @test1() #0 {
+entry:
+ %0 = call i32 @llvm.ppc.divwe(i32 32, i32 16)
+ ret i32 %0
+; CHECK: divwe 3, 4, 3
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ppc.divwe(i32, i32) #1
+
+; Function Attrs: nounwind
+define signext i32 @test2() #0 {
+entry:
+ %0 = call i32 @llvm.ppc.divweu(i32 32, i32 16)
+ ret i32 %0
+; CHECK: divweu 3, 4, 3
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ppc.divweu(i32, i32) #1
+
+; Function Attrs: nounwind
+define i64 @test3() #0 {
+entry:
+ %0 = call i64 @llvm.ppc.divde(i64 32, i64 16)
+ ret i64 %0
+; CHECK: divde 3, 4, 3
+}
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.ppc.divde(i64, i64) #1
+
+; Function Attrs: nounwind
+define i64 @test4() #0 {
+entry:
+ %0 = call i64 @llvm.ppc.divdeu(i64 32, i64 16)
+ ret i64 %0
+; CHECK: divdeu 3, 4, 3
+}
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.ppc.divdeu(i64, i64) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 231831) (llvm/trunk 231828:231843M)"}
diff --git a/test/CodeGen/PowerPC/dyn-alloca-aligned.ll b/test/CodeGen/PowerPC/dyn-alloca-aligned.ll
index a5d45b8e94a0..98b0a175f369 100644
--- a/test/CodeGen/PowerPC/dyn-alloca-aligned.ll
+++ b/test/CodeGen/PowerPC/dyn-alloca-aligned.ll
@@ -11,12 +11,12 @@ entry:
%0 = zext i32 %n to i64
%vla = alloca i32, i64 %0, align 128
%vla1 = alloca i32, i64 %0, align 128
- %a2 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
- %1 = load i32* %a2, align 4
+ %a2 = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 0
+ %1 = load i32, i32* %a2, align 4
store i32 %1, i32* %vla1, align 128
- %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
- %2 = load i32* %b, align 4
- %arrayidx3 = getelementptr inbounds i32* %vla1, i64 1
+ %b = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 1
+ %2 = load i32, i32* %b, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %vla1, i64 1
store i32 %2, i32* %arrayidx3, align 4
call void @bar(i32* %vla1, i32* %vla) #0
ret void
diff --git a/test/CodeGen/PowerPC/early-ret.ll b/test/CodeGen/PowerPC/early-ret.ll
index 7d3e225a1e29..52cf464b9fd5 100644
--- a/test/CodeGen/PowerPC/early-ret.ll
+++ b/test/CodeGen/PowerPC/early-ret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@@ -45,4 +45,37 @@ if.end3: ; preds = %if.then, %if.then2,
; CHECK: blr
}
+
+@.str0 = private unnamed_addr constant [2 x i8] c"a\00"
+@.str1 = private unnamed_addr constant [2 x i8] c"b\00"
+@.str2 = private unnamed_addr constant [2 x i8] c"c\00"
+@.str3 = private unnamed_addr constant [2 x i8] c"d\00"
+@.str4 = private unnamed_addr constant [2 x i8] c"e\00"
+define i8* @dont_assert(i32 %x) {
+; LLVM would assert due to moving an early return into the jump table block and
+; removing one of its predecessors despite that block ending with an indirect
+; branch.
+entry:
+ switch i32 %x, label %sw.epilog [
+ i32 1, label %return
+ i32 2, label %sw.bb1
+ i32 3, label %sw.bb2
+ i32 4, label %sw.bb3
+ i32 255, label %sw.bb4
+ ]
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.bb4: br label %return
+sw.epilog: br label %return
+return:
+ %retval.0 = phi i8* [ null, %sw.epilog ],
+ [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str4, i64 0, i64 0), %sw.bb4 ],
+ [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str3, i64 0, i64 0), %sw.bb3 ],
+ [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str2, i64 0, i64 0), %sw.bb2 ],
+ [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str1, i64 0, i64 0), %sw.bb1 ],
+ [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str0, i64 0, i64 0), %entry ]
+ ret i8* %retval.0
+}
+
attributes #0 = { nounwind }
diff --git a/test/CodeGen/PowerPC/ec-input.ll b/test/CodeGen/PowerPC/ec-input.ll
new file mode 100644
index 000000000000..a57f69be12da
--- /dev/null
+++ b/test/CodeGen/PowerPC/ec-input.ll
@@ -0,0 +1,155 @@
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+; This test case used to fail both with and without -verify-machineinstrs
+; (-verify-machineinstrs would catch the problem right after instruction
+; scheduling because the live intervals would not be right for the registers
+; that were both inputs to the inline asm and also early-clobber outputs).
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+%struct._IO_FILE.119.8249.32639.195239.200117.211499.218003.221255.222881.224507.226133.240767.244019.245645.248897.260279.271661.281417.283043.302555.304181.325319.326945.344713 = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker.118.8248.32638.195238.200116.211498.218002.221254.222880.224506.226132.240766.244018.245644.248896.260278.271660.281416.283042.302554.304180.325318.326944.344712*, %struct._IO_FILE.119.8249.32639.195239.200117.211499.218003.221255.222881.224507.226133.240767.244019.245645.248897.260279.271661.281417.283043.302555.304181.325319.326945.344713*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+%struct._IO_marker.118.8248.32638.195238.200116.211498.218002.221254.222880.224506.226132.240766.244018.245644.248896.260278.271660.281416.283042.302554.304180.325318.326944.344712 = type { %struct._IO_marker.118.8248.32638.195238.200116.211498.218002.221254.222880.224506.226132.240766.244018.245644.248896.260278.271660.281416.283042.302554.304180.325318.326944.344712*, %struct._IO_FILE.119.8249.32639.195239.200117.211499.218003.221255.222881.224507.226133.240767.244019.245645.248897.260279.271661.281417.283043.302555.304181.325319.326945.344713*, i32 }
+
+@.str236 = external unnamed_addr constant [121 x i8], align 1
+@.str294 = external unnamed_addr constant [49 x i8], align 1
+
+; Function Attrs: nounwind
+declare void @fprintf(%struct._IO_FILE.119.8249.32639.195239.200117.211499.218003.221255.222881.224507.226133.240767.244019.245645.248897.260279.271661.281417.283043.302555.304181.325319.326945.344713* nocapture, i8* nocapture readonly, ...) #0
+
+; Function Attrs: inlinehint nounwind
+define void @_ZN4PAMI6Device2MU15ResourceManager46calculatePerCoreMUResourcesBasedOnAvailabilityEv() #1 align 2 {
+; CHECK-LABEL: @_ZN4PAMI6Device2MU15ResourceManager46calculatePerCoreMUResourcesBasedOnAvailabilityEv
+; CHECK: sc
+
+entry:
+ %numFreeResourcesInSubgroup = alloca i32, align 4
+ %0 = ptrtoint i32* %numFreeResourcesInSubgroup to i64
+ br label %for.cond2.preheader
+
+for.cond2.preheader: ; preds = %if.end23.3, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end23.3 ]
+ %group.098 = phi i32 [ 0, %entry ], [ %inc37, %if.end23.3 ]
+ %minFreeBatIdsPerCore.097 = phi i64 [ 32, %entry ], [ %numFreeBatIdsInGroup.0.minFreeBatIdsPerCore.0, %if.end23.3 ]
+ %minFreeRecFifosPerCore.096 = phi i64 [ 16, %entry ], [ %minFreeRecFifosPerCore.1, %if.end23.3 ]
+ %minFreeInjFifosPerCore.095 = phi i64 [ 32, %entry ], [ %numFreeInjFifosInGroup.0.minFreeInjFifosPerCore.0, %if.end23.3 ]
+ %cmp5 = icmp eq i32 undef, 0
+ br i1 %cmp5, label %if.end, label %if.then
+
+if.then: ; preds = %if.end23.2, %if.end23.1, %if.end23, %for.cond2.preheader
+ unreachable
+
+if.end: ; preds = %for.cond2.preheader
+ %1 = load i32, i32* %numFreeResourcesInSubgroup, align 4
+ %conv = zext i32 %1 to i64
+ %2 = call { i64, i64, i64, i64 } asm sideeffect "sc", "=&{r0},=&{r3},=&{r4},=&{r5},{r0},{r3},{r4},{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{cr0},~{memory}"(i64 1034, i64 %indvars.iv, i64 %0, i64 undef) #2
+ %cmp10 = icmp eq i32 0, 0
+ br i1 %cmp10, label %if.end14, label %if.then11
+
+if.then11: ; preds = %if.end.3, %if.end.2, %if.end.1, %if.end
+ unreachable
+
+if.end14: ; preds = %if.end
+ %3 = load i32, i32* %numFreeResourcesInSubgroup, align 4
+ %cmp19 = icmp eq i32 undef, 0
+ br i1 %cmp19, label %if.end23, label %if.then20
+
+if.then20: ; preds = %if.end14.3, %if.end14.2, %if.end14.1, %if.end14
+ %conv4.i65.lcssa = phi i32 [ undef, %if.end14 ], [ 0, %if.end14.1 ], [ %conv4.i65.2, %if.end14.2 ], [ %conv4.i65.3, %if.end14.3 ]
+ call void (%struct._IO_FILE.119.8249.32639.195239.200117.211499.218003.221255.222881.224507.226133.240767.244019.245645.248897.260279.271661.281417.283043.302555.304181.325319.326945.344713*, i8*, ...) @fprintf(%struct._IO_FILE.119.8249.32639.195239.200117.211499.218003.221255.222881.224507.226133.240767.244019.245645.248897.260279.271661.281417.283043.302555.304181.325319.326945.344713* undef, i8* getelementptr inbounds ([121 x i8], [121 x i8]* @.str236, i64 0, i64 0), i32 signext 2503) #3
+ call void (%struct._IO_FILE.119.8249.32639.195239.200117.211499.218003.221255.222881.224507.226133.240767.244019.245645.248897.260279.271661.281417.283043.302555.304181.325319.326945.344713*, i8*, ...) @fprintf(%struct._IO_FILE.119.8249.32639.195239.200117.211499.218003.221255.222881.224507.226133.240767.244019.245645.248897.260279.271661.281417.283043.302555.304181.325319.326945.344713* undef, i8* getelementptr inbounds ([49 x i8], [49 x i8]* @.str294, i64 0, i64 0), i32 signext %conv4.i65.lcssa) #3
+ unreachable
+
+if.end23: ; preds = %if.end14
+ %conv15 = zext i32 %3 to i64
+ %4 = load i32, i32* %numFreeResourcesInSubgroup, align 4
+ %conv24 = zext i32 %4 to i64
+ %5 = call { i64, i64, i64, i64 } asm sideeffect "sc", "=&{r0},=&{r3},=&{r4},=&{r5},{r0},{r3},{r4},{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{cr0},~{memory}"(i64 1033, i64 0, i64 %0, i64 undef) #2
+ %cmp5.1 = icmp eq i32 0, 0
+ br i1 %cmp5.1, label %if.end.1, label %if.then
+
+for.end38: ; preds = %if.end23.3
+ ret void
+
+if.end.1: ; preds = %if.end23
+ %6 = load i32, i32* %numFreeResourcesInSubgroup, align 4
+ %conv.1 = zext i32 %6 to i64
+ %add.1 = add nuw nsw i64 %conv.1, %conv
+ %7 = call { i64, i64, i64, i64 } asm sideeffect "sc", "=&{r0},=&{r3},=&{r4},=&{r5},{r0},{r3},{r4},{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{cr0},~{memory}"(i64 1034, i64 0, i64 %0, i64 undef) #2
+ %cmp10.1 = icmp eq i32 undef, 0
+ br i1 %cmp10.1, label %if.end14.1, label %if.then11
+
+if.end14.1: ; preds = %if.end.1
+ %8 = load i32, i32* %numFreeResourcesInSubgroup, align 4
+ %cmp19.1 = icmp eq i32 0, 0
+ br i1 %cmp19.1, label %if.end23.1, label %if.then20
+
+if.end23.1: ; preds = %if.end14.1
+ %conv15.1 = zext i32 %8 to i64
+ %add16.1 = add nuw nsw i64 %conv15.1, %conv15
+ %9 = load i32, i32* %numFreeResourcesInSubgroup, align 4
+ %conv24.1 = zext i32 %9 to i64
+ %add25.1 = add nuw nsw i64 %conv24.1, %conv24
+ %cmp5.2 = icmp eq i32 undef, 0
+ br i1 %cmp5.2, label %if.end.2, label %if.then
+
+if.end.2: ; preds = %if.end23.1
+ %10 = load i32, i32* %numFreeResourcesInSubgroup, align 4
+ %conv.2 = zext i32 %10 to i64
+ %add.2 = add nuw nsw i64 %conv.2, %add.1
+ %11 = call { i64, i64, i64, i64 } asm sideeffect "sc", "=&{r0},=&{r3},=&{r4},=&{r5},{r0},{r3},{r4},{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{cr0},~{memory}"(i64 1034, i64 undef, i64 %0, i64 undef) #2
+ %cmp10.2 = icmp eq i32 0, 0
+ br i1 %cmp10.2, label %if.end14.2, label %if.then11
+
+if.end14.2: ; preds = %if.end.2
+ %12 = load i32, i32* %numFreeResourcesInSubgroup, align 4
+ %13 = call { i64, i64, i64, i64 } asm sideeffect "sc", "=&{r0},=&{r3},=&{r4},=&{r5},{r0},{r3},{r4},{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{cr0},~{memory}"(i64 1035, i64 undef, i64 %0, i64 0) #2
+ %asmresult1.i64.2 = extractvalue { i64, i64, i64, i64 } %13, 1
+ %conv4.i65.2 = trunc i64 %asmresult1.i64.2 to i32
+ %cmp19.2 = icmp eq i32 %conv4.i65.2, 0
+ br i1 %cmp19.2, label %if.end23.2, label %if.then20
+
+if.end23.2: ; preds = %if.end14.2
+ %conv15.2 = zext i32 %12 to i64
+ %add16.2 = add nuw nsw i64 %conv15.2, %add16.1
+ %14 = load i32, i32* %numFreeResourcesInSubgroup, align 4
+ %conv24.2 = zext i32 %14 to i64
+ %add25.2 = add nuw nsw i64 %conv24.2, %add25.1
+ %cmp5.3 = icmp eq i32 0, 0
+ br i1 %cmp5.3, label %if.end.3, label %if.then
+
+if.end.3: ; preds = %if.end23.2
+ %15 = load i32, i32* %numFreeResourcesInSubgroup, align 4
+ %conv.3 = zext i32 %15 to i64
+ %add.3 = add nuw nsw i64 %conv.3, %add.2
+ %cmp10.3 = icmp eq i32 undef, 0
+ br i1 %cmp10.3, label %if.end14.3, label %if.then11
+
+if.end14.3: ; preds = %if.end.3
+ %16 = load i32, i32* %numFreeResourcesInSubgroup, align 4
+ %17 = call { i64, i64, i64, i64 } asm sideeffect "sc", "=&{r0},=&{r3},=&{r4},=&{r5},{r0},{r3},{r4},{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{cr0},~{memory}"(i64 1035, i64 0, i64 %0, i64 0) #2
+ %asmresult1.i64.3 = extractvalue { i64, i64, i64, i64 } %17, 1
+ %conv4.i65.3 = trunc i64 %asmresult1.i64.3 to i32
+ %cmp19.3 = icmp eq i32 %conv4.i65.3, 0
+ br i1 %cmp19.3, label %if.end23.3, label %if.then20
+
+if.end23.3: ; preds = %if.end14.3
+ %conv15.3 = zext i32 %16 to i64
+ %add16.3 = add nuw nsw i64 %conv15.3, %add16.2
+ %add25.3 = add nuw nsw i64 0, %add25.2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
+ %cmp27 = icmp ult i64 %add.3, %minFreeInjFifosPerCore.095
+ %numFreeInjFifosInGroup.0.minFreeInjFifosPerCore.0 = select i1 %cmp27, i64 %add.3, i64 %minFreeInjFifosPerCore.095
+ %cmp30 = icmp ult i64 %add16.3, %minFreeRecFifosPerCore.096
+ %minFreeRecFifosPerCore.1 = select i1 %cmp30, i64 %add16.3, i64 %minFreeRecFifosPerCore.096
+ %cmp33 = icmp ult i64 %add25.3, %minFreeBatIdsPerCore.097
+ %numFreeBatIdsInGroup.0.minFreeBatIdsPerCore.0 = select i1 %cmp33, i64 %add25.3, i64 %minFreeBatIdsPerCore.097
+ %inc37 = add nuw nsw i32 %group.098, 1
+ %cmp = icmp ult i32 %inc37, 16
+ br i1 %cmp, label %for.cond2.preheader, label %for.end38
+}
+
+attributes #0 = { nounwind "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "target-cpu"="a2q" }
+attributes #1 = { inlinehint nounwind "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "target-cpu"="a2q" }
+attributes #2 = { nounwind }
+attributes #3 = { cold nounwind }
+
diff --git a/test/CodeGen/PowerPC/empty-functions.ll b/test/CodeGen/PowerPC/empty-functions.ll
index e32a8472b835..aa760d82b1c7 100644
--- a/test/CodeGen/PowerPC/empty-functions.ll
+++ b/test/CodeGen/PowerPC/empty-functions.ll
@@ -17,16 +17,18 @@ entry:
; An empty function is perfectly fine on ELF.
; LINUX-NO-FP: func:
+; LINUX-NO-FP-NEXT: {{^}}.L[[BEGIN:.*]]:{{$}}
; LINUX-NO-FP-NEXT: .cfi_startproc
; LINUX-NO-FP-NEXT: {{^}}#
-; LINUX-NO-FP-NEXT: {{^}}.L{{.*}}:{{$}}
-; LINUX-NO-FP-NEXT: .size func, .L{{.*}}-func
+; LINUX-NO-FP-NEXT: {{^}}.L[[END:.*]]:{{$}}
+; LINUX-NO-FP-NEXT: .size func, .L[[END]]-.L[[BEGIN]]
; LINUX-NO-FP-NEXT: .cfi_endproc
; A cfi directive can point to the end of a function. It (and in fact the
; entire body) could be optimized out because of the unreachable, but we
; don't do it right now.
; LINUX-FP: func:
+; LINUX-FP-NEXT: {{^}}.L[[BEGIN:.*]]:{{$}}
; LINUX-FP-NEXT: .cfi_startproc
; LINUX-FP-NEXT: {{^}}#
; LINUX-FP-NEXT: stw 31, -4(1)
@@ -38,6 +40,6 @@ entry:
; LINUX-FP-NEXT: mr 31, 1
; LINUX-FP-NEXT:{{^}}.L{{.*}}:{{$}}
; LINUX-FP-NEXT: .cfi_def_cfa_register r31
-; LINUX-FP-NEXT:{{^}}.L{{.*}}:{{$}}
-; LINUX-FP-NEXT: .size func, .Ltmp3-func
+; LINUX-FP-NEXT: {{^}}.L[[END:.*]]:{{$}}
+; LINUX-FP-NEXT: .size func, .L[[END]]-.L[[BEGIN]]
; LINUX-FP-NEXT: .cfi_endproc
diff --git a/test/CodeGen/PowerPC/emptystruct.ll b/test/CodeGen/PowerPC/emptystruct.ll
index 47cfadd0a7bb..66cada14f9d2 100644
--- a/test/CodeGen/PowerPC/emptystruct.ll
+++ b/test/CodeGen/PowerPC/emptystruct.ll
@@ -18,7 +18,7 @@ define void @callee(%struct.empty* noalias sret %agg.result, %struct.empty* byva
entry:
%a2.addr = alloca %struct.empty*, align 8
store %struct.empty* %a2, %struct.empty** %a2.addr, align 8
- %0 = load %struct.empty** %a2.addr, align 8
+ %0 = load %struct.empty*, %struct.empty** %a2.addr, align 8
%1 = bitcast %struct.empty* %agg.result to i8*
%2 = bitcast %struct.empty* %0 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 0, i32 1, i1 false)
diff --git a/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll b/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
index f99089b3bb02..f90eccb359a8 100644
--- a/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
+++ b/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
@@ -69,9 +69,9 @@ define i32 @NAND1(i32 %X, i32 %Y) nounwind {
}
define void @VNOR(<4 x float>* %P, <4 x float>* %Q) nounwind {
- %tmp = load <4 x float>* %P ; <<4 x float>> [#uses=1]
+ %tmp = load <4 x float>, <4 x float>* %P ; <<4 x float>> [#uses=1]
%tmp.upgrd.1 = bitcast <4 x float> %tmp to <4 x i32> ; <<4 x i32>> [#uses=1]
- %tmp2 = load <4 x float>* %Q ; <<4 x float>> [#uses=1]
+ %tmp2 = load <4 x float>, <4 x float>* %Q ; <<4 x float>> [#uses=1]
%tmp2.upgrd.2 = bitcast <4 x float> %tmp2 to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp3 = or <4 x i32> %tmp.upgrd.1, %tmp2.upgrd.2 ; <<4 x i32>> [#uses=1]
%tmp4 = xor <4 x i32> %tmp3, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
@@ -81,9 +81,9 @@ define void @VNOR(<4 x float>* %P, <4 x float>* %Q) nounwind {
}
define void @VANDC(<4 x float>* %P, <4 x float>* %Q) nounwind {
- %tmp = load <4 x float>* %P ; <<4 x float>> [#uses=1]
+ %tmp = load <4 x float>, <4 x float>* %P ; <<4 x float>> [#uses=1]
%tmp.upgrd.4 = bitcast <4 x float> %tmp to <4 x i32> ; <<4 x i32>> [#uses=1]
- %tmp2 = load <4 x float>* %Q ; <<4 x float>> [#uses=1]
+ %tmp2 = load <4 x float>, <4 x float>* %Q ; <<4 x float>> [#uses=1]
%tmp2.upgrd.5 = bitcast <4 x float> %tmp2 to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp4 = xor <4 x i32> %tmp2.upgrd.5, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
%tmp3 = and <4 x i32> %tmp.upgrd.4, %tmp4 ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/extra-toc-reg-deps.ll b/test/CodeGen/PowerPC/extra-toc-reg-deps.ll
new file mode 100644
index 000000000000..1056c5a57aac
--- /dev/null
+++ b/test/CodeGen/PowerPC/extra-toc-reg-deps.ll
@@ -0,0 +1,430 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+%"class.Foam::messageStream.6" = type <{ %"class.Foam::string.5", i32, i32, i32, [4 x i8] }>
+%"class.Foam::string.5" = type { %"class.std::basic_string.4" }
+%"class.std::basic_string.4" = type { %"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider.3" }
+%"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider.3" = type { i8* }
+%"class.Foam::prefixOSstream.27" = type { %"class.Foam::OSstream.26", i8, %"class.Foam::string.5" }
+%"class.Foam::OSstream.26" = type { %"class.Foam::Ostream.base.9", %"class.Foam::fileName.10", %"class.std::basic_ostream.25"* }
+%"class.Foam::Ostream.base.9" = type <{ %"class.Foam::IOstream.8", i16 }>
+%"class.Foam::IOstream.8" = type { i32 (...)**, i32, [4 x i8], %"class.Foam::IOstream::versionNumber.7", i32, i32, i32, i32 }
+%"class.Foam::IOstream::versionNumber.7" = type <{ double, i32, [4 x i8] }>
+%"class.Foam::fileName.10" = type { %"class.Foam::string.5" }
+%"class.std::basic_ostream.25" = type { i32 (...)**, %"class.std::basic_ios.24" }
+%"class.std::basic_ios.24" = type { %"class.std::ios_base.16", %"class.std::basic_ostream.25"*, i8, i8, %"class.std::basic_streambuf.17"*, %"class.std::ctype.21"*, %"class.std::__gnu_cxx_ldbl128::num_put.22"*, %"class.std::__gnu_cxx_ldbl128::num_get.23"* }
+%"class.std::ios_base.16" = type { i32 (...)**, i64, i64, i32, i32, i32, %"struct.std::ios_base::_Callback_list.11"*, %"struct.std::ios_base::_Words.12", [8 x %"struct.std::ios_base::_Words.12"], i32, %"struct.std::ios_base::_Words.12"*, %"class.std::locale.15" }
+%"struct.std::ios_base::_Callback_list.11" = type { %"struct.std::ios_base::_Callback_list.11"*, void (i32, %"class.std::ios_base.16"*, i32)*, i32, i32 }
+%"struct.std::ios_base::_Words.12" = type { i8*, i64 }
+%"class.std::locale.15" = type { %"class.std::locale::_Impl.14"* }
+%"class.std::locale::_Impl.14" = type { i32, %"class.std::locale::facet.13"**, i64, %"class.std::locale::facet.13"**, i8** }
+%"class.std::locale::facet.13" = type <{ i32 (...)**, i32, [4 x i8] }>
+%"class.std::basic_streambuf.17" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"class.std::locale.15" }
+%"class.std::ctype.21" = type <{ %"class.std::locale::facet.base.18", [4 x i8], %struct.__locale_struct.20*, i8, [7 x i8], i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8, [6 x i8] }>
+%"class.std::locale::facet.base.18" = type <{ i32 (...)**, i32 }>
+%struct.__locale_struct.20 = type { [13 x %struct.__locale_data.19*], i16*, i32*, i32*, [13 x i8*] }
+%struct.__locale_data.19 = type opaque
+%"class.std::__gnu_cxx_ldbl128::num_put.22" = type { %"class.std::locale::facet.base.18", [4 x i8] }
+%"class.std::__gnu_cxx_ldbl128::num_get.23" = type { %"class.std::locale::facet.base.18", [4 x i8] }
+%"class.Foam::primitiveMesh.135" = type { i32 (...)**, i32, i32, i32, i32, i32, i32, i32, i32, i32, %"class.Foam::List.116"*, %"class.Foam::List.0"*, %"class.Foam::List.1"*, %"class.Foam::List.1"*, %"class.Foam::List.1"*, %"class.Foam::List.5"*, %"class.Foam::List.1"*, %"class.Foam::List.1"*, %"class.Foam::List.1"*, %"class.Foam::List.1"*, %"class.Foam::List.1"*, %"class.Foam::List.1"*, %"class.Foam::List.1"*, %"class.Foam::DynamicList.40", %"class.Foam::HashSet.127", %"class.Foam::Field.131"*, %"class.Foam::Field.131"*, %"class.Foam::Field.11"*, %"class.Foam::Field.131"* }
+%"class.Foam::List.116" = type opaque
+%"class.Foam::List.0" = type { %"class.Foam::UList.119" }
+%"class.Foam::UList.119" = type { i32, %"class.Foam::edge.118"* }
+%"class.Foam::edge.118" = type { %"class.Foam::FixedList.117" }
+%"class.Foam::FixedList.117" = type { [2 x i32] }
+%"class.Foam::List.5" = type { %"class.Foam::UList.6" }
+%"class.Foam::UList.6" = type { i32, %"class.Foam::cell.121"* }
+%"class.Foam::cell.121" = type { %"class.Foam::List.3" }
+%"class.Foam::List.3" = type { %"class.Foam::UList.4" }
+%"class.Foam::UList.4" = type { i32, i32* }
+%"class.Foam::List.1" = type { %"class.Foam::UList.2" }
+%"class.Foam::UList.2" = type { i32, %"class.Foam::List.3"* }
+%"class.Foam::DynamicList.40" = type <{ %"class.Foam::List.3", i32, [4 x i8] }>
+%"class.Foam::HashSet.127" = type { %"class.Foam::HashTable.7" }
+%"class.Foam::HashTable.7" = type { i32, i32, %"struct.Foam::HashTable<Foam::nil, int, Foam::Hash<Foam::label> >::hashedEntry.125"** }
+%"struct.Foam::HashTable<Foam::nil, int, Foam::Hash<Foam::label> >::hashedEntry.125" = type <{ i32, [4 x i8], %"struct.Foam::HashTable<Foam::nil, int, Foam::Hash<Foam::label> >::hashedEntry.125"*, %"class.Foam::nil.124", [7 x i8] }>
+%"class.Foam::nil.124" = type { i8 }
+%"class.Foam::Field.11" = type { %"class.Foam::refCount.128", %"class.Foam::List.12" }
+%"class.Foam::refCount.128" = type { i32 }
+%"class.Foam::List.12" = type { %"class.Foam::UList.13" }
+%"class.Foam::UList.13" = type { i32, double* }
+%"class.Foam::Field.131" = type { %"class.Foam::refCount.128", %"class.Foam::List.8" }
+%"class.Foam::List.8" = type { %"class.Foam::UList.9" }
+%"class.Foam::UList.9" = type { i32, %"class.Foam::Vector.29"* }
+%"class.Foam::Vector.29" = type { %"class.Foam::VectorSpace.10" }
+%"class.Foam::VectorSpace.10" = type { [3 x double] }
+%"class.Foam::Ostream.189" = type <{ %"class.Foam::IOstream.8", i16, [6 x i8] }>
+
+@_ZN4Foam4InfoE = external global %"class.Foam::messageStream.6", align 8
+@.str27 = external unnamed_addr constant [24 x i8], align 1
+@.str28 = external unnamed_addr constant [7 x i8], align 1
+@_ZN4Foam4PoutE = external global %"class.Foam::prefixOSstream.27", align 8
+
+define void @_ZN4Foam13checkTopologyERKNS_8polyMeshEbb(i1 zeroext %allTopology) #0 {
+entry:
+ br i1 undef, label %for.body, label %for.cond.cleanup
+
+; CHECK-LABEL: @_ZN4Foam13checkTopologyERKNS_8polyMeshEbb
+
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC0@toc@ha
+; CHECK: std 2, 40(1)
+; CHECK: ld {{[0-9]+}}, .LC0@toc@l([[REG1]])
+; CHECK: {{mr|ld}} 2,
+; CHECK: mtctr
+; CHECK: bctrl
+; CHECK: ld 2, 40(1)
+
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC0@toc@ha
+; CHECK: std 2, 40(1)
+; CHECK: ld {{[0-9]+}}, .LC0@toc@l([[REG1]])
+; CHECK: {{mr|ld}} 2,
+; CHECK: mtctr
+; CHECK: bctrl
+; CHECK: ld 2, 40(1)
+
+for.cond.cleanup: ; preds = %entry
+ br i1 undef, label %if.then.i, label %if.else.i
+
+if.then.i: ; preds = %for.cond.cleanup
+ br i1 undef, label %if.then.i1435, label %if.else.i1436
+
+if.else.i: ; preds = %for.cond.cleanup
+ unreachable
+
+if.then.i1435: ; preds = %if.then.i
+ br label %_ZN4Foam12returnReduceIiNS_5sumOpIiEEEET_RKS3_RKT0_ii.exit
+
+if.else.i1436: ; preds = %if.then.i
+ br label %_ZN4Foam12returnReduceIiNS_5sumOpIiEEEET_RKS3_RKT0_ii.exit
+
+_ZN4Foam12returnReduceIiNS_5sumOpIiEEEET_RKS3_RKT0_ii.exit: ; preds = %if.else.i1436, %if.then.i1435
+ br i1 undef, label %for.body.i, label %_ZNK4Foam8ZoneMeshINS_8cellZoneENS_8polyMeshEE15checkDefinitionEb.exit
+
+for.body: ; preds = %entry
+ unreachable
+
+for.body.i: ; preds = %_ZN4Foam12returnReduceIiNS_5sumOpIiEEEET_RKS3_RKT0_ii.exit
+ unreachable
+
+_ZNK4Foam8ZoneMeshINS_8cellZoneENS_8polyMeshEE15checkDefinitionEb.exit: ; preds = %_ZN4Foam12returnReduceIiNS_5sumOpIiEEEET_RKS3_RKT0_ii.exit
+ br i1 undef, label %for.body.i1480, label %_ZNK4Foam8ZoneMeshINS_8faceZoneENS_8polyMeshEE15checkDefinitionEb.exit
+
+for.body.i1480: ; preds = %_ZNK4Foam8ZoneMeshINS_8cellZoneENS_8polyMeshEE15checkDefinitionEb.exit
+ unreachable
+
+_ZNK4Foam8ZoneMeshINS_8faceZoneENS_8polyMeshEE15checkDefinitionEb.exit: ; preds = %_ZNK4Foam8ZoneMeshINS_8cellZoneENS_8polyMeshEE15checkDefinitionEb.exit
+ br i1 undef, label %for.body.i1504, label %_ZNK4Foam8ZoneMeshINS_9pointZoneENS_8polyMeshEE15checkDefinitionEb.exit
+
+for.body.i1504: ; preds = %_ZNK4Foam8ZoneMeshINS_8faceZoneENS_8polyMeshEE15checkDefinitionEb.exit
+ unreachable
+
+_ZNK4Foam8ZoneMeshINS_9pointZoneENS_8polyMeshEE15checkDefinitionEb.exit: ; preds = %_ZNK4Foam8ZoneMeshINS_8faceZoneENS_8polyMeshEE15checkDefinitionEb.exit
+ invoke void @_ZN4Foam4word12stripInvalidEv()
+ to label %_ZN4Foam4wordC2EPKcb.exit unwind label %lpad.i
+
+lpad.i: ; preds = %_ZNK4Foam8ZoneMeshINS_9pointZoneENS_8polyMeshEE15checkDefinitionEb.exit
+ %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ resume { i8*, i32 } %0
+
+_ZN4Foam4wordC2EPKcb.exit: ; preds = %_ZNK4Foam8ZoneMeshINS_9pointZoneENS_8polyMeshEE15checkDefinitionEb.exit
+ invoke void @_ZN4Foam7cellSetC1ERKNS_8polyMeshERKNS_4wordEiNS_8IOobject11writeOptionE()
+ to label %invoke.cont59 unwind label %lpad
+
+invoke.cont59: ; preds = %_ZN4Foam4wordC2EPKcb.exit
+ br i1 undef, label %_ZNSsD2Ev.exit, label %if.then.i.i, !prof !1
+
+if.then.i.i: ; preds = %invoke.cont59
+ br i1 true, label %if.then.i.i.i1508, label %if.else.i.i.i
+
+if.then.i.i.i1508: ; preds = %if.then.i.i
+ br label %_ZN9__gnu_cxxL27__exchange_and_add_dispatchEPii.exit.i.i
+
+if.else.i.i.i: ; preds = %if.then.i.i
+ br label %_ZN9__gnu_cxxL27__exchange_and_add_dispatchEPii.exit.i.i
+
+_ZN9__gnu_cxxL27__exchange_and_add_dispatchEPii.exit.i.i: ; preds = %if.else.i.i.i, %if.then.i.i.i1508
+ br i1 undef, label %if.then4.i.i, label %_ZNSsD2Ev.exit
+
+if.then4.i.i: ; preds = %_ZN9__gnu_cxxL27__exchange_and_add_dispatchEPii.exit.i.i
+ br label %_ZNSsD2Ev.exit
+
+_ZNSsD2Ev.exit: ; preds = %if.then4.i.i, %_ZN9__gnu_cxxL27__exchange_and_add_dispatchEPii.exit.i.i, %invoke.cont59
+ br i1 undef, label %for.body70, label %for.cond.cleanup69
+
+for.cond.cleanup69: ; preds = %_ZNSsD2Ev.exit
+ br i1 undef, label %if.then121, label %if.else
+
+lpad: ; preds = %_ZN4Foam4wordC2EPKcb.exit
+ %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ br i1 undef, label %_ZNSsD2Ev.exit1578, label %if.then.i.i1570, !prof !1
+
+if.then.i.i1570: ; preds = %lpad
+ br i1 undef, label %if.then4.i.i1577, label %_ZNSsD2Ev.exit1578
+
+if.then4.i.i1577: ; preds = %if.then.i.i1570
+ unreachable
+
+_ZNSsD2Ev.exit1578: ; preds = %if.then.i.i1570, %lpad
+ unreachable
+
+for.body70: ; preds = %_ZNSsD2Ev.exit
+ unreachable
+
+if.then121: ; preds = %for.cond.cleanup69
+ unreachable
+
+if.else: ; preds = %for.cond.cleanup69
+ invoke void @_ZN4Foam4word12stripInvalidEv()
+ to label %_ZN4Foam4wordC2EPKcb.exit1701 unwind label %lpad.i1689
+
+lpad.i1689: ; preds = %if.else
+ %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ unreachable
+
+_ZN4Foam4wordC2EPKcb.exit1701: ; preds = %if.else
+ invoke void @_ZN4Foam8pointSetC1ERKNS_8polyMeshERKNS_4wordEiNS_8IOobject11writeOptionE()
+ to label %invoke.cont169 unwind label %lpad165
+
+invoke.cont169: ; preds = %_ZN4Foam4wordC2EPKcb.exit1701
+ %call177 = invoke zeroext i1 undef(%"class.Foam::primitiveMesh.135"* undef, i1 zeroext true, %"class.Foam::HashSet.127"* undef)
+ to label %invoke.cont176 unwind label %lpad175
+
+invoke.cont176: ; preds = %invoke.cont169
+ br i1 %call177, label %if.then178, label %if.end213
+
+if.then178: ; preds = %invoke.cont176
+ unreachable
+
+lpad165: ; preds = %_ZN4Foam4wordC2EPKcb.exit1701
+ %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ unreachable
+
+lpad175: ; preds = %invoke.cont169
+ %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ invoke void @_ZN4Foam8pointSetD1Ev()
+ to label %eh.resume unwind label %terminate.lpad
+
+if.end213: ; preds = %invoke.cont176
+ invoke void @_ZN4Foam4word12stripInvalidEv()
+ to label %_ZN4Foam4wordC2EPKcb.exit1777 unwind label %lpad.i1765
+
+lpad.i1765: ; preds = %if.end213
+ %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ br i1 undef, label %eh.resume.i1776, label %if.then.i.i.i1767, !prof !1
+
+if.then.i.i.i1767: ; preds = %lpad.i1765
+ unreachable
+
+eh.resume.i1776: ; preds = %lpad.i1765
+ resume { i8*, i32 } %5
+
+_ZN4Foam4wordC2EPKcb.exit1777: ; preds = %if.end213
+ invoke void @_ZN4Foam7faceSetC1ERKNS_8polyMeshERKNS_4wordEiNS_8IOobject11writeOptionE()
+ to label %invoke.cont221 unwind label %lpad217
+
+invoke.cont221: ; preds = %_ZN4Foam4wordC2EPKcb.exit1777
+ br i1 undef, label %_ZNSsD2Ev.exit1792, label %if.then.i.i1784, !prof !1
+
+if.then.i.i1784: ; preds = %invoke.cont221
+ br i1 undef, label %if.then4.i.i1791, label %_ZNSsD2Ev.exit1792
+
+if.then4.i.i1791: ; preds = %if.then.i.i1784
+ br label %_ZNSsD2Ev.exit1792
+
+_ZNSsD2Ev.exit1792: ; preds = %if.then4.i.i1791, %if.then.i.i1784, %invoke.cont221
+ %call232 = invoke zeroext i1 undef(%"class.Foam::primitiveMesh.135"* undef, i1 zeroext true, %"class.Foam::HashSet.127"* undef)
+ to label %invoke.cont231 unwind label %lpad230
+
+invoke.cont231: ; preds = %_ZNSsD2Ev.exit1792
+ invoke void @_ZN4Foam6reduceIiNS_5sumOpIiEEEEvRKNS_4ListINS_8UPstream11commsStructEEERT_RKT0_ii()
+ to label %invoke.cont243 unwind label %lpad230
+
+lpad217: ; preds = %_ZN4Foam4wordC2EPKcb.exit1777
+ %6 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ br label %eh.resume
+
+lpad230: ; preds = %invoke.cont231, %_ZNSsD2Ev.exit1792
+ %7 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ invoke void @_ZN4Foam7faceSetD1Ev()
+ to label %eh.resume unwind label %terminate.lpad
+
+invoke.cont243: ; preds = %invoke.cont231
+ invoke void @_ZN4Foam4word12stripInvalidEv()
+ to label %_ZN4Foam4wordC2EPKcb.exit1862 unwind label %lpad.i1850
+
+lpad.i1850: ; preds = %invoke.cont243
+ %8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ unreachable
+
+_ZN4Foam4wordC2EPKcb.exit1862: ; preds = %invoke.cont243
+ invoke void @_ZN4Foam7faceSetC1ERKNS_8polyMeshERKNS_4wordEiNS_8IOobject11writeOptionE()
+ to label %invoke.cont280 unwind label %lpad276
+
+invoke.cont280: ; preds = %_ZN4Foam4wordC2EPKcb.exit1862
+ br i1 undef, label %_ZNSsD2Ev.exit1877, label %if.then.i.i1869, !prof !1
+
+if.then.i.i1869: ; preds = %invoke.cont280
+ unreachable
+
+_ZNSsD2Ev.exit1877: ; preds = %invoke.cont280
+ br i1 undef, label %if.then292, label %if.end328
+
+if.then292: ; preds = %_ZNSsD2Ev.exit1877
+ unreachable
+
+lpad276: ; preds = %_ZN4Foam4wordC2EPKcb.exit1862
+ %9 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ unreachable
+
+if.end328: ; preds = %_ZNSsD2Ev.exit1877
+ br i1 %allTopology, label %if.then331, label %if.end660
+
+if.then331: ; preds = %if.end328
+ unreachable
+
+if.end660: ; preds = %if.end328
+ invoke void @_ZN4Foam13messageStreamcvRNS_8OSstreamEEv()
+ to label %invoke.cont668 unwind label %lpad663
+
+invoke.cont668: ; preds = %if.end660
+ %call671 = invoke dereferenceable(56) %"class.Foam::Ostream.189"* @_ZN4FoamlsERNS_7OstreamEPKc()
+ to label %invoke.cont670 unwind label %lpad663
+
+invoke.cont670: ; preds = %invoke.cont668
+ invoke void @_ZN4FoamlsERNS_7OstreamEi()
+ to label %invoke.cont674 unwind label %lpad663
+
+invoke.cont674: ; preds = %invoke.cont670
+ %call677 = invoke dereferenceable(56) %"class.Foam::Ostream.189"* @_ZN4FoamlsERNS_7OstreamEPKc()
+ to label %invoke.cont676 unwind label %lpad663
+
+invoke.cont676: ; preds = %invoke.cont674
+ invoke void undef(%"class.Foam::Ostream.189"* %call677)
+ to label %if.end878 unwind label %lpad663
+
+lpad663: ; preds = %invoke.cont670, %if.end660, %invoke.cont668, %invoke.cont674, %invoke.cont676
+ %10 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ br i1 undef, label %_ZN4Foam4ListIiED2Ev.exit.i3073, label %delete.notnull.i.i3071
+
+if.end878: ; preds = %invoke.cont676
+ br i1 undef, label %_ZN4Foam11regionSplitD2Ev.exit, label %delete.notnull.i.i3056
+
+delete.notnull.i.i3056: ; preds = %if.end878
+ unreachable
+
+_ZN4Foam11regionSplitD2Ev.exit: ; preds = %if.end878
+ br i1 undef, label %if.then883, label %if.else888
+
+if.then883: ; preds = %_ZN4Foam11regionSplitD2Ev.exit
+ unreachable
+
+delete.notnull.i.i3071: ; preds = %lpad663
+ unreachable
+
+_ZN4Foam4ListIiED2Ev.exit.i3073: ; preds = %lpad663
+ invoke void @_ZN4Foam11regIOobjectD2Ev()
+ to label %eh.resume unwind label %terminate.lpad
+
+if.else888: ; preds = %_ZN4Foam11regionSplitD2Ev.exit
+ invoke void @_ZN4Foam4word12stripInvalidEv()
+ to label %_ZN4Foam4wordC2EPKcb.exit3098 unwind label %lpad.i3086
+
+lpad.i3086: ; preds = %if.else888
+ %11 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ unreachable
+
+_ZN4Foam4wordC2EPKcb.exit3098: ; preds = %if.else888
+ invoke void @_ZN4Foam8pointSetC1ERKNS_8polyMeshERKNS_4wordEiNS_8IOobject11writeOptionE()
+ to label %invoke.cont902 unwind label %lpad898
+
+invoke.cont902: ; preds = %_ZN4Foam4wordC2EPKcb.exit3098
+ br i1 undef, label %_ZNSsD2Ev.exit3113, label %if.then.i.i3105, !prof !1
+
+if.then.i.i3105: ; preds = %invoke.cont902
+ br i1 undef, label %if.then4.i.i3112, label %_ZNSsD2Ev.exit3113
+
+if.then4.i.i3112: ; preds = %if.then.i.i3105
+ unreachable
+
+_ZNSsD2Ev.exit3113: ; preds = %if.then.i.i3105, %invoke.cont902
+ %call.i31163117 = invoke zeroext i32 undef(%"class.Foam::IOstream.8"* getelementptr inbounds (%"class.Foam::prefixOSstream.27", %"class.Foam::prefixOSstream.27"* @_ZN4Foam4PoutE, i64 0, i32 0, i32 0, i32 0))
+ to label %call.i3116.noexc unwind label %lpad905.loopexit.split-lp
+
+call.i3116.noexc: ; preds = %_ZNSsD2Ev.exit3113
+ %call5.i3118 = invoke zeroext i32 null(%"class.Foam::IOstream.8"* getelementptr inbounds (%"class.Foam::prefixOSstream.27", %"class.Foam::prefixOSstream.27"* @_ZN4Foam4PoutE, i64 0, i32 0, i32 0, i32 0), i32 zeroext undef)
+ to label %invoke.cont906 unwind label %lpad905.loopexit.split-lp
+
+invoke.cont906: ; preds = %call.i3116.noexc
+ unreachable
+
+lpad898: ; preds = %_ZN4Foam4wordC2EPKcb.exit3098
+ %12 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ br i1 undef, label %_ZNSsD2Ev.exit3204, label %if.then.i.i3196, !prof !1
+
+if.then.i.i3196: ; preds = %lpad898
+ unreachable
+
+_ZNSsD2Ev.exit3204: ; preds = %lpad898
+ unreachable
+
+lpad905.loopexit.split-lp: ; preds = %call.i3116.noexc, %_ZNSsD2Ev.exit3113
+ %lpad.loopexit.split-lp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ invoke void @_ZN4Foam8pointSetD1Ev()
+ to label %eh.resume unwind label %terminate.lpad
+
+eh.resume: ; preds = %_ZN4Foam4ListIiED2Ev.exit.i3073, %lpad230, %lpad175, %lpad905.loopexit.split-lp, %lpad217
+ resume { i8*, i32 } undef
+
+terminate.lpad: ; preds = %_ZN4Foam4ListIiED2Ev.exit.i3073, %lpad230, %lpad175, %lpad905.loopexit.split-lp
+ %13 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ catch i8* null
+ unreachable
+}
+
+declare dereferenceable(56) %"class.Foam::Ostream.189"* @_ZN4FoamlsERNS_7OstreamEPKc() #0
+
+declare void @_ZN4Foam13messageStreamcvRNS_8OSstreamEEv() #0
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_ZN4Foam7cellSetC1ERKNS_8polyMeshERKNS_4wordEiNS_8IOobject11writeOptionE() #0
+
+declare void @_ZN4FoamlsERNS_7OstreamEi() #0
+
+declare void @_ZN4Foam8pointSetC1ERKNS_8polyMeshERKNS_4wordEiNS_8IOobject11writeOptionE() #0
+
+declare void @_ZN4Foam8pointSetD1Ev() #0
+
+declare void @_ZN4Foam7faceSetC1ERKNS_8polyMeshERKNS_4wordEiNS_8IOobject11writeOptionE() #0
+
+declare void @_ZN4Foam7faceSetD1Ev() #0
+
+; Function Attrs: inlinehint
+declare void @_ZN4Foam4word12stripInvalidEv() #1 align 2
+
+declare void @_ZN4Foam11regIOobjectD2Ev() #0
+
+declare void @_ZN4Foam6reduceIiNS_5sumOpIiEEEEvRKNS_4ListINS_8UPstream11commsStructEEERT_RKT0_ii() #0
+
+attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="a2q" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { inlinehint "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="a2q" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"branch_weights", i32 64, i32 4}
diff --git a/test/CodeGen/PowerPC/f32-to-i64.ll b/test/CodeGen/PowerPC/f32-to-i64.ll
new file mode 100644
index 000000000000..c1381880b757
--- /dev/null
+++ b/test/CodeGen/PowerPC/f32-to-i64.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-p:32:32-i64:64-n32"
+target triple = "powerpc-unknown-unknown"
+
+; Function Attrs: nounwind
+define i64 @testullf(float %arg) #0 {
+entry:
+ %arg.addr = alloca float, align 4
+ store float %arg, float* %arg.addr, align 4
+ %0 = load float, float* %arg.addr, align 4
+ %conv = fptoui float %0 to i64
+ ret i64 %conv
+
+; CHECK-LABEL: @testullf
+; CHECK: fctiduz [[REG1:[0-9]+]], 1
+; CHECK: stfd [[REG1]], [[OFF:[0-9]+]](1)
+; CHECK-DAG: lwz 3, [[OFF]](1)
+; CHECK-DAG: lwz 4, {{[0-9]+}}(1)
+; CHECK: blr
+}
+
+attributes #0 = { nounwind "target-cpu"="a2" }
+
diff --git a/test/CodeGen/PowerPC/fast-isel-GEP-coalesce.ll b/test/CodeGen/PowerPC/fast-isel-GEP-coalesce.ll
index 7bdda0494b8f..3e0e5250d2fb 100644
--- a/test/CodeGen/PowerPC/fast-isel-GEP-coalesce.ll
+++ b/test/CodeGen/PowerPC/fast-isel-GEP-coalesce.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
%struct.A = type { i32, [2 x [2 x i32]], i8, [3 x [3 x [3 x i32]]] }
%struct.B = type { i32, [2 x [2 x [2 x %struct.A]]] }
@@ -11,9 +11,9 @@ define i32* @t1() nounwind {
entry:
; ELF64: t1
%addr = alloca i32*, align 4
- store i32* getelementptr inbounds ([2 x [2 x [2 x [2 x [2 x i32]]]]]* @arr, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1), i32** %addr, align 4
+ store i32* getelementptr inbounds ([2 x [2 x [2 x [2 x [2 x i32]]]]], [2 x [2 x [2 x [2 x [2 x i32]]]]]* @arr, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1), i32** %addr, align 4
; ELF64: addi {{[0-9]+}}, {{[0-9]+}}, 124
- %0 = load i32** %addr, align 4
+ %0 = load i32*, i32** %addr, align 4
ret i32* %0
}
@@ -21,9 +21,9 @@ define i32* @t2() nounwind {
entry:
; ELF64: t2
%addr = alloca i32*, align 4
- store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4
+ store i32* getelementptr inbounds ([3 x [3 x %struct.A]], [3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4
; ELF64: addi {{[0-9]+}}, {{[0-9]+}}, 1148
- %0 = load i32** %addr, align 4
+ %0 = load i32*, i32** %addr, align 4
ret i32* %0
}
@@ -31,9 +31,9 @@ define i32* @t3() nounwind {
entry:
; ELF64: t3
%addr = alloca i32*, align 4
- store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1), i32** %addr, align 4
+ store i32* getelementptr inbounds ([3 x [3 x %struct.A]], [3 x [3 x %struct.A]]* @A, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1), i32** %addr, align 4
; ELF64: addi {{[0-9]+}}, {{[0-9]+}}, 140
- %0 = load i32** %addr, align 4
+ %0 = load i32*, i32** %addr, align 4
ret i32* %0
}
@@ -41,8 +41,8 @@ define i32* @t4() nounwind {
entry:
; ELF64: t4
%addr = alloca i32*, align 4
- store i32* getelementptr inbounds ([2 x [2 x [2 x %struct.B]]]* @B, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 3, i32 1, i32 2, i32 1), i32** %addr, align 4
+ store i32* getelementptr inbounds ([2 x [2 x [2 x %struct.B]]], [2 x [2 x [2 x %struct.B]]]* @B, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 3, i32 1, i32 2, i32 1), i32** %addr, align 4
; ELF64: addi {{[0-9]+}}, {{[0-9]+}}, 1284
- %0 = load i32** %addr, align 4
+ %0 = load i32*, i32** %addr, align 4
ret i32* %0
}
diff --git a/test/CodeGen/PowerPC/fast-isel-binary.ll b/test/CodeGen/PowerPC/fast-isel-binary.ll
index 43a6cd085055..2f1513f8aa11 100644
--- a/test/CodeGen/PowerPC/fast-isel-binary.ll
+++ b/test/CodeGen/PowerPC/fast-isel-binary.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
; Test add with non-legal types
diff --git a/test/CodeGen/PowerPC/fast-isel-br-const.ll b/test/CodeGen/PowerPC/fast-isel-br-const.ll
index 2cfb8a225745..6be7fbf9e02f 100644
--- a/test/CodeGen/PowerPC/fast-isel-br-const.ll
+++ b/test/CodeGen/PowerPC/fast-isel-br-const.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
entry:
diff --git a/test/CodeGen/PowerPC/fast-isel-call.ll b/test/CodeGen/PowerPC/fast-isel-call.ll
index b2cc75e26114..64d8f6e79195 100644
--- a/test/CodeGen/PowerPC/fast-isel-call.ll
+++ b/test/CodeGen/PowerPC/fast-isel-call.ll
@@ -1,8 +1,8 @@
; FIXME: FastISel currently returns false if it hits code that uses VSX
-; registers and with -fast-isel-abort turned on the test case will then fail.
+; registers and with -fast-isel-abort=1 turned on the test case will then fail.
; When fastisel better supports VSX fix up this test case.
;
-; RUN: llc < %s -O0 -verify-machineinstrs -mattr=-vsx -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+; RUN: llc < %s -O0 -verify-machineinstrs -mattr=-vsx -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
define i32 @t1(i8 signext %a) nounwind {
%1 = sext i8 %a to i32
@@ -85,7 +85,7 @@ define i32 @bar0(i32 %i) nounwind {
;define void @foo3() uwtable {
; %fptr = alloca i32 (i32)*, align 8
; store i32 (i32)* @bar0, i32 (i32)** %fptr, align 8
-; %1 = load i32 (i32)** %fptr, align 8
+; %1 = load i32 (i32)*, i32 (i32)** %fptr, align 8
; %call = call i32 %1(i32 0)
; ret void
;}
diff --git a/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll b/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll
index c1f6b6327a44..5a9d15868b6b 100644
--- a/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll
+++ b/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll
@@ -1,8 +1,8 @@
; FIXME: FastISel currently returns false if it hits code that uses VSX
-; registers and with -fast-isel-abort turned on the test case will then fail.
+; registers and with -fast-isel-abort=1 turned on the test case will then fail.
; When fastisel better supports VSX fix up this test case.
;
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64
define void @t1a(float %a) uwtable ssp {
entry:
; ELF64: t1a
@@ -201,7 +201,7 @@ define void @t12(i8 %a) uwtable ssp {
entry:
; ELF64: t12
%cmp = icmp ugt i8 %a, -113
-; ELF64: rlwinm
+; ELF64: clrlwi
; ELF64: cmplwi
br i1 %cmp, label %if.then, label %if.end
diff --git a/test/CodeGen/PowerPC/fast-isel-const.ll b/test/CodeGen/PowerPC/fast-isel-const.ll
index 1057d0a0ce2b..a751a2be6c69 100644
--- a/test/CodeGen/PowerPC/fast-isel-const.ll
+++ b/test/CodeGen/PowerPC/fast-isel-const.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64
define zeroext i1 @testi1(i8 %in) nounwind uwtable ssp {
entry:
diff --git a/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll b/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
index ac41e8c27700..cfb934c6ab02 100644
--- a/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
+++ b/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr5 | FileCheck %s --check-prefix=ELF64
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr5 | FileCheck %s --check-prefix=ELF64
; Test sitofp
diff --git a/test/CodeGen/PowerPC/fast-isel-conversion.ll b/test/CodeGen/PowerPC/fast-isel-conversion.ll
index b0e29c1274a2..f7557d456858 100644
--- a/test/CodeGen/PowerPC/fast-isel-conversion.ll
+++ b/test/CodeGen/PowerPC/fast-isel-conversion.ll
@@ -1,12 +1,12 @@
; FIXME: FastISel currently returns false if it hits code that uses VSX
-; registers and with -fast-isel-abort turned on the test case will then fail.
+; registers and with -fast-isel-abort=1 turned on the test case will then fail.
; When fastisel better supports VSX fix up this test case.
;
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx | FileCheck %s --check-prefix=ELF64LE
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx | FileCheck %s --check-prefix=ELF64LE
; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=970 -mattr=-vsx | FileCheck %s --check-prefix=PPC970
-;; Tests for 970 don't use -fast-isel-abort because we intentionally punt
+;; Tests for 970 don't use -fast-isel-abort=1 because we intentionally punt
;; to SelectionDAG in some cases.
; Test sitofp
@@ -253,7 +253,7 @@ entry:
; ELF64LE: std
; ELF64LE: lfd
; ELF64LE: fcfidus
-; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
+; PPC970: clrlwi {{[0-9]+}}, {{[0-9]+}}, 16
; PPC970: std
; PPC970: lfd
; PPC970: fcfid
@@ -277,7 +277,7 @@ entry:
; ELF64LE: std
; ELF64LE: lfd
; ELF64LE: fcfidus
-; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
+; PPC970: clrlwi {{[0-9]+}}, {{[0-9]+}}, 24
; PPC970: std
; PPC970: lfd
; PPC970: fcfid
@@ -342,7 +342,7 @@ entry:
; ELF64LE: std
; ELF64LE: lfd
; ELF64LE: fcfidu
-; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
+; PPC970: clrlwi {{[0-9]+}}, {{[0-9]+}}, 16
; PPC970: std
; PPC970: lfd
; PPC970: fcfid
@@ -365,7 +365,7 @@ entry:
; ELF64LE: std
; ELF64LE: lfd
; ELF64LE: fcfidu
-; PPC970: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
+; PPC970: clrlwi {{[0-9]+}}, {{[0-9]+}}, 24
; PPC970: std
; PPC970: lfd
; PPC970: fcfid
diff --git a/test/CodeGen/PowerPC/fast-isel-crash.ll b/test/CodeGen/PowerPC/fast-isel-crash.ll
index 1813fc96acee..55e87effcd82 100644
--- a/test/CodeGen/PowerPC/fast-isel-crash.ll
+++ b/test/CodeGen/PowerPC/fast-isel-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7
; Ensure this doesn't crash.
diff --git a/test/CodeGen/PowerPC/fast-isel-ext.ll b/test/CodeGen/PowerPC/fast-isel-ext.ll
index 753305a68dda..6fd3b4035122 100644
--- a/test/CodeGen/PowerPC/fast-isel-ext.ll
+++ b/test/CodeGen/PowerPC/fast-isel-ext.ll
@@ -1,18 +1,18 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
; zext
define i32 @zext_8_32(i8 %a) nounwind ssp {
; ELF64: zext_8_32
%r = zext i8 %a to i32
-; ELF64: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
+; ELF64: clrlwi {{[0-9]+}}, {{[0-9]+}}, 24
ret i32 %r
}
define i32 @zext_16_32(i16 %a) nounwind ssp {
; ELF64: zext_16_32
%r = zext i16 %a to i32
-; ELF64: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 16, 31
+; ELF64: clrlwi {{[0-9]+}}, {{[0-9]+}}, 16
ret i32 %r
}
diff --git a/test/CodeGen/PowerPC/fast-isel-fold.ll b/test/CodeGen/PowerPC/fast-isel-fold.ll
index 4de345f309af..e56101a28e2b 100644
--- a/test/CodeGen/PowerPC/fast-isel-fold.ll
+++ b/test/CodeGen/PowerPC/fast-isel-fold.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
@a = global i8 1, align 1
@b = global i16 2, align 2
@@ -6,7 +6,7 @@
define void @t1() nounwind uwtable ssp {
; ELF64: t1
- %1 = load i8* @a, align 1
+ %1 = load i8, i8* @a, align 1
call void @foo1(i8 zeroext %1)
; ELF64: lbz
; ELF64-NOT: rldicl
@@ -16,7 +16,7 @@ define void @t1() nounwind uwtable ssp {
define void @t2() nounwind uwtable ssp {
; ELF64: t2
- %1 = load i16* @b, align 2
+ %1 = load i16, i16* @b, align 2
call void @foo2(i16 zeroext %1)
; ELF64: lhz
; ELF64-NOT: rldicl
@@ -26,7 +26,7 @@ define void @t2() nounwind uwtable ssp {
define void @t2a() nounwind uwtable ssp {
; ELF64: t2a
- %1 = load i32* @c, align 4
+ %1 = load i32, i32* @c, align 4
call void @foo3(i32 zeroext %1)
; ELF64: lwz
; ELF64-NOT: rldicl
@@ -40,7 +40,7 @@ declare void @foo3(i32 zeroext)
define i32 @t3() nounwind uwtable ssp {
; ELF64: t3
- %1 = load i8* @a, align 1
+ %1 = load i8, i8* @a, align 1
%2 = zext i8 %1 to i32
; ELF64: lbz
; ELF64-NOT: rlwinm
@@ -49,7 +49,7 @@ define i32 @t3() nounwind uwtable ssp {
define i32 @t4() nounwind uwtable ssp {
; ELF64: t4
- %1 = load i16* @b, align 2
+ %1 = load i16, i16* @b, align 2
%2 = zext i16 %1 to i32
; ELF64: lhz
; ELF64-NOT: rlwinm
@@ -58,7 +58,7 @@ define i32 @t4() nounwind uwtable ssp {
define i32 @t5() nounwind uwtable ssp {
; ELF64: t5
- %1 = load i16* @b, align 2
+ %1 = load i16, i16* @b, align 2
%2 = sext i16 %1 to i32
; ELF64: lha
; ELF64-NOT: rlwinm
@@ -67,7 +67,7 @@ define i32 @t5() nounwind uwtable ssp {
define i32 @t6() nounwind uwtable ssp {
; ELF64: t6
- %1 = load i8* @a, align 2
+ %1 = load i8, i8* @a, align 2
%2 = sext i8 %1 to i32
; ELF64: lbz
; ELF64-NOT: rlwinm
@@ -76,7 +76,7 @@ define i32 @t6() nounwind uwtable ssp {
define i64 @t7() nounwind uwtable ssp {
; ELF64: t7
- %1 = load i8* @a, align 1
+ %1 = load i8, i8* @a, align 1
%2 = zext i8 %1 to i64
; ELF64: lbz
; ELF64-NOT: rldicl
@@ -85,7 +85,7 @@ define i64 @t7() nounwind uwtable ssp {
define i64 @t8() nounwind uwtable ssp {
; ELF64: t8
- %1 = load i16* @b, align 2
+ %1 = load i16, i16* @b, align 2
%2 = zext i16 %1 to i64
; ELF64: lhz
; ELF64-NOT: rldicl
@@ -94,7 +94,7 @@ define i64 @t8() nounwind uwtable ssp {
define i64 @t9() nounwind uwtable ssp {
; ELF64: t9
- %1 = load i16* @b, align 2
+ %1 = load i16, i16* @b, align 2
%2 = sext i16 %1 to i64
; ELF64: lha
; ELF64-NOT: extsh
@@ -103,7 +103,7 @@ define i64 @t9() nounwind uwtable ssp {
define i64 @t10() nounwind uwtable ssp {
; ELF64: t10
- %1 = load i8* @a, align 2
+ %1 = load i8, i8* @a, align 2
%2 = sext i8 %1 to i64
; ELF64: lbz
; ELF64: extsb
@@ -112,7 +112,7 @@ define i64 @t10() nounwind uwtable ssp {
define i64 @t11() nounwind uwtable ssp {
; ELF64: t11
- %1 = load i32* @c, align 4
+ %1 = load i32, i32* @c, align 4
%2 = zext i32 %1 to i64
; ELF64: lwz
; ELF64-NOT: rldicl
@@ -121,7 +121,7 @@ define i64 @t11() nounwind uwtable ssp {
define i64 @t12() nounwind uwtable ssp {
; ELF64: t12
- %1 = load i32* @c, align 4
+ %1 = load i32, i32* @c, align 4
%2 = sext i32 %1 to i64
; ELF64: lwa
; ELF64-NOT: extsw
diff --git a/test/CodeGen/PowerPC/fast-isel-icmp-split.ll b/test/CodeGen/PowerPC/fast-isel-icmp-split.ll
new file mode 100644
index 000000000000..459616eb9698
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-icmp-split.ll
@@ -0,0 +1,72 @@
+; RUN: llc -O0 -relocation-model=pic < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+%"class.std::__1::__tree_node.130.151" = type { %"class.std::__1::__tree_node_base.base.128.149", %"class.boost::serialization::extended_type_info.129.150"* }
+%"class.std::__1::__tree_node_base.base.128.149" = type <{ %"class.std::__1::__tree_end_node.127.148", %"class.std::__1::__tree_node_base.126.147"*, %"class.std::__1::__tree_node_base.126.147"*, i8 }>
+%"class.std::__1::__tree_end_node.127.148" = type { %"class.std::__1::__tree_node_base.126.147"* }
+%"class.std::__1::__tree_node_base.126.147" = type <{ %"class.std::__1::__tree_end_node.127.148", %"class.std::__1::__tree_node_base.126.147"*, %"class.std::__1::__tree_node_base.126.147"*, i8, [7 x i8] }>
+%"class.boost::serialization::extended_type_info.129.150" = type { i32 (...)**, i32, i8* }
+
+; Function Attrs: noinline
+define void @_ZN5boost13serialization18extended_type_info4findEPKc() #0 align 2 {
+entry:
+ br i1 undef, label %cond.true, label %cond.false
+
+; CHECK: @_ZN5boost13serialization18extended_type_info4findEPKc
+
+cond.true: ; preds = %entry
+ br label %cond.end
+
+cond.false: ; preds = %entry
+ unreachable
+ ; No predecessors!
+ br label %cond.end
+
+cond.end: ; preds = %0, %cond.true
+ invoke void @_ZNKSt3__16__treeIPKN5boost13serialization18extended_type_infoENS2_6detail11key_compareENS_9allocatorIS5_EEE4findIS5_EENS_21__tree_const_iteratorIS5_PNS_11__tree_nodeIS5_PvEElEERKT_()
+ to label %_ZNKSt3__18multisetIPKN5boost13serialization18extended_type_infoENS2_6detail11key_compareENS_9allocatorIS5_EEE4findERKS5_.exit unwind label %lpad
+
+_ZNKSt3__18multisetIPKN5boost13serialization18extended_type_infoENS2_6detail11key_compareENS_9allocatorIS5_EEE4findERKS5_.exit: ; preds = %cond.end
+ br label %invoke.cont
+
+invoke.cont: ; preds = %_ZNKSt3__18multisetIPKN5boost13serialization18extended_type_infoENS2_6detail11key_compareENS_9allocatorIS5_EEE4findERKS5_.exit
+ %1 = load %"class.std::__1::__tree_node.130.151"*, %"class.std::__1::__tree_node.130.151"** undef, align 8
+ %cmp.i = icmp eq %"class.std::__1::__tree_node.130.151"* undef, %1
+ br label %invoke.cont.2
+
+invoke.cont.2: ; preds = %invoke.cont
+ br i1 %cmp.i, label %if.then, label %if.end
+
+if.then: ; preds = %invoke.cont.2
+ br label %cleanup
+
+lpad: ; preds = %cond.end
+ %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ br label %eh.resume
+
+if.end: ; preds = %invoke.cont.2
+ br label %invoke.cont.4
+
+invoke.cont.4: ; preds = %if.end
+ br label %cleanup
+
+cleanup: ; preds = %invoke.cont.4, %if.then
+ ret void
+
+eh.resume: ; preds = %lpad
+ resume { i8*, i32 } undef
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+; Function Attrs: noinline
+declare void @_ZNKSt3__16__treeIPKN5boost13serialization18extended_type_infoENS2_6detail11key_compareENS_9allocatorIS5_EEE4findIS5_EENS_21__tree_const_iteratorIS5_PNS_11__tree_nodeIS5_PvEElEERKT_() #0 align 2
+
+attributes #0 = { noinline "target-cpu"="a2q" }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+
diff --git a/test/CodeGen/PowerPC/fast-isel-indirectbr.ll b/test/CodeGen/PowerPC/fast-isel-indirectbr.ll
index 88ccf918ae96..b5477134c517 100644
--- a/test/CodeGen/PowerPC/fast-isel-indirectbr.ll
+++ b/test/CodeGen/PowerPC/fast-isel-indirectbr.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
define void @t1(i8* %x) {
entry:
diff --git a/test/CodeGen/PowerPC/fast-isel-load-store-vsx.ll b/test/CodeGen/PowerPC/fast-isel-load-store-vsx.ll
new file mode 100644
index 000000000000..8a873daa6c7a
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-load-store-vsx.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -O0 -fast-isel -mattr=+vsx -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64VSX
+
+;; The semantics of VSX stores for when R0 is used is different depending on
+;; whether it is used as base or offset. If used as base, the effective
+;; address computation will use zero regardless of the content of R0. If used as
+;; an offset the content will be used in the effective address. We observed that
+;; for some constructors, the initialization values were being stored without
+;; an offset register being specified which was causing R0 to be used as offset
+;; in regions where it contained the value in the link register. This test
+;; verifies that R0 is used as base in these situations.
+
+%SomeStruct = type { double }
+
+; ELF64VSX-LABEL: SomeStructCtor
+define linkonce_odr void @SomeStructCtor(%SomeStruct* %this, double %V) unnamed_addr align 2 {
+entry:
+ %this.addr = alloca %SomeStruct*, align 8
+ %V.addr = alloca double, align 8
+ store %SomeStruct* %this, %SomeStruct** %this.addr, align 8
+; ELF64VSX: stxsdx {{[0-9][0-9]?}}, 0, {{[1-9][0-9]?}}
+ store double %V, double* %V.addr, align 8
+ %this1 = load %SomeStruct*, %SomeStruct** %this.addr
+ %Val = getelementptr inbounds %SomeStruct, %SomeStruct* %this1, i32 0, i32 0
+; ELF64VSX: stxsdx {{[0-9][0-9]?}}, 0, {{[1-9][0-9]?}}
+ %0 = load double, double* %V.addr, align 8
+ store double %0, double* %Val, align 8
+ ret void
+ }
diff --git a/test/CodeGen/PowerPC/fast-isel-load-store.ll b/test/CodeGen/PowerPC/fast-isel-load-store.ll
index ef702e21d6a1..f6a55f06b2cb 100644
--- a/test/CodeGen/PowerPC/fast-isel-load-store.ll
+++ b/test/CodeGen/PowerPC/fast-isel-load-store.ll
@@ -1,8 +1,8 @@
; FIXME: FastISel currently returns false if it hits code that uses VSX
-; registers and with -fast-isel-abort turned on the test case will then fail.
+; registers and with -fast-isel-abort=1 turned on the test case will then fail.
; When fastisel better supports VSX fix up this test case.
;
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel -fast-isel-abort -mattr=-vsx -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel -fast-isel-abort=1 -mattr=-vsx -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
; This test verifies that load/store instructions are properly generated,
; and that they pass MI verification.
@@ -26,7 +26,7 @@
define i8 @t1() nounwind uwtable ssp {
; ELF64: t1
- %1 = load i8* @a, align 1
+ %1 = load i8, i8* @a, align 1
; ELF64: lbz
%2 = add nsw i8 %1, 1
; ELF64: addi
@@ -35,7 +35,7 @@ define i8 @t1() nounwind uwtable ssp {
define i16 @t2() nounwind uwtable ssp {
; ELF64: t2
- %1 = load i16* @b, align 2
+ %1 = load i16, i16* @b, align 2
; ELF64: lhz
%2 = add nsw i16 %1, 1
; ELF64: addi
@@ -44,7 +44,7 @@ define i16 @t2() nounwind uwtable ssp {
define i32 @t3() nounwind uwtable ssp {
; ELF64: t3
- %1 = load i32* @c, align 4
+ %1 = load i32, i32* @c, align 4
; ELF64: lwz
%2 = add nsw i32 %1, 1
; ELF64: addi
@@ -53,7 +53,7 @@ define i32 @t3() nounwind uwtable ssp {
define i64 @t4() nounwind uwtable ssp {
; ELF64: t4
- %1 = load i64* @d, align 4
+ %1 = load i64, i64* @d, align 4
; ELF64: ld
%2 = add nsw i64 %1, 1
; ELF64: addi
@@ -62,7 +62,7 @@ define i64 @t4() nounwind uwtable ssp {
define float @t5() nounwind uwtable ssp {
; ELF64: t5
- %1 = load float* @e, align 4
+ %1 = load float, float* @e, align 4
; ELF64: lfs
%2 = fadd float %1, 1.0
; ELF64: fadds
@@ -71,7 +71,7 @@ define float @t5() nounwind uwtable ssp {
define double @t6() nounwind uwtable ssp {
; ELF64: t6
- %1 = load double* @f, align 8
+ %1 = load double, double* @f, align 8
; ELF64: lfd
%2 = fadd double %1, 1.0
; ELF64: fadd
@@ -145,7 +145,7 @@ define void @t12(double %v) nounwind uwtable ssp {
;; lwa requires an offset divisible by 4, so we need lwax here.
define i64 @t13() nounwind uwtable ssp {
; ELF64: t13
- %1 = load i32* getelementptr inbounds (%struct.s* @g, i32 0, i32 1), align 1
+ %1 = load i32, i32* getelementptr inbounds (%struct.s, %struct.s* @g, i32 0, i32 1), align 1
%2 = sext i32 %1 to i64
; ELF64: li
; ELF64: lwax
@@ -157,7 +157,7 @@ define i64 @t13() nounwind uwtable ssp {
;; ld requires an offset divisible by 4, so we need ldx here.
define i64 @t14() nounwind uwtable ssp {
; ELF64: t14
- %1 = load i64* getelementptr inbounds (%struct.t* @h, i32 0, i32 1), align 1
+ %1 = load i64, i64* getelementptr inbounds (%struct.t, %struct.t* @h, i32 0, i32 1), align 1
; ELF64: li
; ELF64: ldx
%2 = add nsw i64 %1, 1
@@ -169,7 +169,7 @@ define i64 @t14() nounwind uwtable ssp {
define void @t15(i64 %v) nounwind uwtable ssp {
; ELF64: t15
%1 = add nsw i64 %v, 1
- store i64 %1, i64* getelementptr inbounds (%struct.t* @h, i32 0, i32 1), align 1
+ store i64 %1, i64* getelementptr inbounds (%struct.t, %struct.t* @h, i32 0, i32 1), align 1
; ELF64: addis
; ELF64: addi
; ELF64: addi
@@ -181,7 +181,7 @@ define void @t15(i64 %v) nounwind uwtable ssp {
;; ld requires an offset that fits in 16 bits, so we need ldx here.
define i64 @t16() nounwind uwtable ssp {
; ELF64: t16
- %1 = load i64* getelementptr inbounds ([8192 x i64]* @i, i32 0, i64 5000), align 8
+ %1 = load i64, i64* getelementptr inbounds ([8192 x i64], [8192 x i64]* @i, i32 0, i64 5000), align 8
; ELF64: lis
; ELF64: ori
; ELF64: ldx
@@ -194,7 +194,7 @@ define i64 @t16() nounwind uwtable ssp {
define void @t17(i64 %v) nounwind uwtable ssp {
; ELF64: t17
%1 = add nsw i64 %v, 1
- store i64 %1, i64* getelementptr inbounds ([8192 x i64]* @i, i32 0, i64 5000), align 8
+ store i64 %1, i64* getelementptr inbounds ([8192 x i64], [8192 x i64]* @i, i32 0, i64 5000), align 8
; ELF64: addis
; ELF64: ld
; ELF64: addi
diff --git a/test/CodeGen/PowerPC/fast-isel-redefinition.ll b/test/CodeGen/PowerPC/fast-isel-redefinition.ll
index 72422bda4433..60706a6e1438 100644
--- a/test/CodeGen/PowerPC/fast-isel-redefinition.ll
+++ b/test/CodeGen/PowerPC/fast-isel-redefinition.ll
@@ -1,10 +1,10 @@
-; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort -optimize-regalloc -regalloc=basic -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -optimize-regalloc -regalloc=basic -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s
; This isn't exactly a useful set of command-line options, but check that it
; doesn't crash. (It crashed formerly on ARM, and proved useful in
; discovering a bug on PowerPC as well.)
define i32 @f(i32* %x) nounwind ssp {
- %y = getelementptr inbounds i32* %x, i32 5000
- %tmp103 = load i32* %y, align 4
+ %y = getelementptr inbounds i32, i32* %x, i32 5000
+ %tmp103 = load i32, i32* %y, align 4
ret i32 %tmp103
}
diff --git a/test/CodeGen/PowerPC/fast-isel-ret.ll b/test/CodeGen/PowerPC/fast-isel-ret.ll
index ae34fbf7bfe1..1e4566d94dfd 100644
--- a/test/CodeGen/PowerPC/fast-isel-ret.ll
+++ b/test/CodeGen/PowerPC/fast-isel-ret.ll
@@ -1,8 +1,8 @@
; FIXME: FastISel currently returns false if it hits code that uses VSX
-; registers and with -fast-isel-abort turned on the test case will then fail.
+; registers and with -fast-isel-abort=1 turned on the test case will then fail.
; When fastisel better supports VSX fix up this test case.
;
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64
define zeroext i1 @rettrue() nounwind uwtable ssp {
entry:
diff --git a/test/CodeGen/PowerPC/fast-isel-shifter.ll b/test/CodeGen/PowerPC/fast-isel-shifter.ll
index 198bfbecda63..c18f659dde13 100644
--- a/test/CodeGen/PowerPC/fast-isel-shifter.ll
+++ b/test/CodeGen/PowerPC/fast-isel-shifter.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
define i32 @shl() nounwind ssp {
entry:
diff --git a/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll b/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll
index 4bcacf009746..96cf67c869f9 100644
--- a/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll
+++ b/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll
@@ -6,12 +6,12 @@ define zeroext i8 @gep_promotion(i8* %ptr) nounwind uwtable ssp {
entry:
%ptr.addr = alloca i8*, align 8
%add = add i8 64, 64 ; 0x40 + 0x40
- %0 = load i8** %ptr.addr, align 8
+ %0 = load i8*, i8** %ptr.addr, align 8
; CHECK-LABEL: gep_promotion:
; CHECK: lbz {{[0-9]+}}, 0({{.*}})
- %arrayidx = getelementptr inbounds i8* %0, i8 %add
+ %arrayidx = getelementptr inbounds i8, i8* %0, i8 %add
- %1 = load i8* %arrayidx, align 1
+ %1 = load i8, i8* %arrayidx, align 1
ret i8 %1
}
diff --git a/test/CodeGen/PowerPC/floatPSA.ll b/test/CodeGen/PowerPC/floatPSA.ll
index f14c73630a6f..cff95d591c9a 100644
--- a/test/CodeGen/PowerPC/floatPSA.ll
+++ b/test/CodeGen/PowerPC/floatPSA.ll
@@ -37,7 +37,7 @@ entry:
store float %l, float* %l.addr, align 4
store float %m, float* %m.addr, align 4
store float %n, float* %n.addr, align 4
- %0 = load float* %n.addr, align 4
+ %0 = load float, float* %n.addr, align 4
ret float %0
}
@@ -73,20 +73,20 @@ entry:
store float 1.200000e+01, float* %l, align 4
store float 1.300000e+01, float* %m, align 4
store float 1.400000e+01, float* %n, align 4
- %0 = load float* %a, align 4
- %1 = load float* %b, align 4
- %2 = load float* %c, align 4
- %3 = load float* %d, align 4
- %4 = load float* %e, align 4
- %5 = load float* %f, align 4
- %6 = load float* %g, align 4
- %7 = load float* %h, align 4
- %8 = load float* %i, align 4
- %9 = load float* %j, align 4
- %10 = load float* %k, align 4
- %11 = load float* %l, align 4
- %12 = load float* %m, align 4
- %13 = load float* %n, align 4
+ %0 = load float, float* %a, align 4
+ %1 = load float, float* %b, align 4
+ %2 = load float, float* %c, align 4
+ %3 = load float, float* %d, align 4
+ %4 = load float, float* %e, align 4
+ %5 = load float, float* %f, align 4
+ %6 = load float, float* %g, align 4
+ %7 = load float, float* %h, align 4
+ %8 = load float, float* %i, align 4
+ %9 = load float, float* %j, align 4
+ %10 = load float, float* %k, align 4
+ %11 = load float, float* %l, align 4
+ %12 = load float, float* %m, align 4
+ %13 = load float, float* %n, align 4
%call = call float @bar(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13)
ret float %call
}
diff --git a/test/CodeGen/PowerPC/flt-preinc.ll b/test/CodeGen/PowerPC/flt-preinc.ll
new file mode 100644
index 000000000000..c0e3d3a21e19
--- /dev/null
+++ b/test/CodeGen/PowerPC/flt-preinc.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readonly
+define float @tf(float* nocapture readonly %i, i32 signext %o) #0 {
+entry:
+ %idx.ext = sext i32 %o to i64
+ %add.ptr = getelementptr inbounds float, float* %i, i64 %idx.ext
+ %0 = load float, float* %add.ptr, align 4
+ %add.ptr.sum = add nsw i64 %idx.ext, 1
+ %add.ptr3 = getelementptr inbounds float, float* %i, i64 %add.ptr.sum
+ %1 = load float, float* %add.ptr3, align 4
+ %add = fadd float %0, %1
+ ret float %add
+
+; CHECK-LABEL: @tf
+; CHECK: lfsux
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readonly
+define double @td(double* nocapture readonly %i, i32 signext %o) #0 {
+entry:
+ %idx.ext = sext i32 %o to i64
+ %add.ptr = getelementptr inbounds double, double* %i, i64 %idx.ext
+ %0 = load double, double* %add.ptr, align 8
+ %add.ptr.sum = add nsw i64 %idx.ext, 1
+ %add.ptr3 = getelementptr inbounds double, double* %i, i64 %add.ptr.sum
+ %1 = load double, double* %add.ptr3, align 8
+ %add = fadd double %0, %1
+ ret double %add
+
+; CHECK-LABEL: @td
+; CHECK: lfdux
+; CHECK: blr
+}
+
+attributes #0 = { nounwind readonly }
+
diff --git a/test/CodeGen/PowerPC/fma-assoc.ll b/test/CodeGen/PowerPC/fma-assoc.ll
index dc1316e5e24f..3044dd09128c 100644
--- a/test/CodeGen/PowerPC/fma-assoc.ll
+++ b/test/CodeGen/PowerPC/fma-assoc.ll
@@ -3,11 +3,11 @@
define double @test_FMADD_ASSOC1(double %A, double %B, double %C,
double %D, double %E) {
- %F = fmul double %A, %B ; <double> [#uses=1]
- %G = fmul double %C, %D ; <double> [#uses=1]
- %H = fadd double %F, %G ; <double> [#uses=1]
- %I = fadd double %H, %E ; <double> [#uses=1]
- ret double %I
+ %F = fmul double %A, %B ; <double> [#uses=1]
+ %G = fmul double %C, %D ; <double> [#uses=1]
+ %H = fadd double %F, %G ; <double> [#uses=1]
+ %I = fadd double %H, %E ; <double> [#uses=1]
+ ret double %I
; CHECK-LABEL: test_FMADD_ASSOC1:
; CHECK: fmadd
; CHECK-NEXT: fmadd
@@ -22,11 +22,11 @@ define double @test_FMADD_ASSOC1(double %A, double %B, double %C,
define double @test_FMADD_ASSOC2(double %A, double %B, double %C,
double %D, double %E) {
- %F = fmul double %A, %B ; <double> [#uses=1]
- %G = fmul double %C, %D ; <double> [#uses=1]
- %H = fadd double %F, %G ; <double> [#uses=1]
- %I = fadd double %E, %H ; <double> [#uses=1]
- ret double %I
+ %F = fmul double %A, %B ; <double> [#uses=1]
+ %G = fmul double %C, %D ; <double> [#uses=1]
+ %H = fadd double %F, %G ; <double> [#uses=1]
+ %I = fadd double %E, %H ; <double> [#uses=1]
+ ret double %I
; CHECK-LABEL: test_FMADD_ASSOC2:
; CHECK: fmadd
; CHECK-NEXT: fmadd
@@ -41,11 +41,11 @@ define double @test_FMADD_ASSOC2(double %A, double %B, double %C,
define double @test_FMSUB_ASSOC1(double %A, double %B, double %C,
double %D, double %E) {
- %F = fmul double %A, %B ; <double> [#uses=1]
- %G = fmul double %C, %D ; <double> [#uses=1]
- %H = fadd double %F, %G ; <double> [#uses=1]
- %I = fsub double %H, %E ; <double> [#uses=1]
- ret double %I
+ %F = fmul double %A, %B ; <double> [#uses=1]
+ %G = fmul double %C, %D ; <double> [#uses=1]
+ %H = fadd double %F, %G ; <double> [#uses=1]
+ %I = fsub double %H, %E ; <double> [#uses=1]
+ ret double %I
; CHECK-LABEL: test_FMSUB_ASSOC1:
; CHECK: fmsub
; CHECK-NEXT: fmadd
@@ -60,11 +60,11 @@ define double @test_FMSUB_ASSOC1(double %A, double %B, double %C,
define double @test_FMSUB_ASSOC2(double %A, double %B, double %C,
double %D, double %E) {
- %F = fmul double %A, %B ; <double> [#uses=1]
- %G = fmul double %C, %D ; <double> [#uses=1]
- %H = fadd double %F, %G ; <double> [#uses=1]
- %I = fsub double %E, %H ; <double> [#uses=1]
- ret double %I
+ %F = fmul double %A, %B ; <double> [#uses=1]
+ %G = fmul double %C, %D ; <double> [#uses=1]
+ %H = fadd double %F, %G ; <double> [#uses=1]
+ %I = fsub double %E, %H ; <double> [#uses=1]
+ ret double %I
; CHECK-LABEL: test_FMSUB_ASSOC2:
; CHECK: fnmsub
; CHECK-NEXT: fnmsub
@@ -77,3 +77,159 @@ define double @test_FMSUB_ASSOC2(double %A, double %B, double %C,
; CHECK-VSX-NEXT: blr
}
+define double @test_FMADD_ASSOC_EXT1(float %A, float %B, double %C,
+ double %D, double %E) {
+ %F = fmul float %A, %B ; <float> [#uses=1]
+ %G = fpext float %F to double ; <double> [#uses=1]
+ %H = fmul double %C, %D ; <double> [#uses=1]
+ %I = fadd double %H, %G ; <double> [#uses=1]
+ %J = fadd double %I, %E ; <double> [#uses=1]
+ ret double %J
+; CHECK-LABEL: test_FMADD_ASSOC_EXT1:
+; CHECK: fmadd
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT1:
+; CHECK-VSX: xsmaddmdp
+; CHECK-VSX-NEXT: xsmaddadp
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMADD_ASSOC_EXT2(float %A, float %B, float %C,
+ float %D, double %E) {
+ %F = fmul float %A, %B ; <float> [#uses=1]
+ %G = fmul float %C, %D ; <float> [#uses=1]
+ %H = fadd float %F, %G ; <float> [#uses=1]
+ %I = fpext float %H to double ; <double> [#uses=1]
+ %J = fadd double %I, %E ; <double> [#uses=1]
+ ret double %J
+; CHECK-LABEL: test_FMADD_ASSOC_EXT2:
+; CHECK: fmadd
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT2:
+; CHECK-VSX: xsmaddmdp
+; CHECK-VSX-NEXT: xsmaddadp
+; CHECK-VSX-NEXT: fmr
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMADD_ASSOC_EXT3(float %A, float %B, double %C,
+ double %D, double %E) {
+ %F = fmul float %A, %B ; <float> [#uses=1]
+ %G = fpext float %F to double ; <double> [#uses=1]
+ %H = fmul double %C, %D ; <double> [#uses=1]
+ %I = fadd double %H, %G ; <double> [#uses=1]
+ %J = fadd double %E, %I ; <double> [#uses=1]
+ ret double %J
+; CHECK-LABEL: test_FMADD_ASSOC_EXT3:
+; CHECK: fmadd
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT3:
+; CHECK-VSX: xsmaddmdp
+; CHECK-VSX-NEXT: xsmaddadp
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMADD_ASSOC_EXT4(float %A, float %B, float %C,
+ float %D, double %E) {
+ %F = fmul float %A, %B ; <float> [#uses=1]
+ %G = fmul float %C, %D ; <float> [#uses=1]
+ %H = fadd float %F, %G ; <float> [#uses=1]
+ %I = fpext float %H to double ; <double> [#uses=1]
+ %J = fadd double %E, %I ; <double> [#uses=1]
+ ret double %J
+; CHECK-LABEL: test_FMADD_ASSOC_EXT4:
+; CHECK: fmadd
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMADD_ASSOC_EXT4:
+; CHECK-VSX: xsmaddmdp
+; CHECK-VSX-NEXT: xsmaddadp
+; CHECK-VSX-NEXT: fmr
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMSUB_ASSOC_EXT1(float %A, float %B, double %C,
+ double %D, double %E) {
+ %F = fmul float %A, %B ; <float> [#uses=1]
+ %G = fpext float %F to double ; <double> [#uses=1]
+ %H = fmul double %C, %D ; <double> [#uses=1]
+ %I = fadd double %H, %G ; <double> [#uses=1]
+ %J = fsub double %I, %E ; <double> [#uses=1]
+ ret double %J
+; CHECK-LABEL: test_FMSUB_ASSOC_EXT1:
+; CHECK: fmsub
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT1:
+; CHECK-VSX: xsmsubmdp
+; CHECK-VSX-NEXT: xsmaddadp
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMSUB_ASSOC_EXT2(float %A, float %B, float %C,
+ float %D, double %E) {
+ %F = fmul float %A, %B ; <float> [#uses=1]
+ %G = fmul float %C, %D ; <float> [#uses=1]
+ %H = fadd float %F, %G ; <float> [#uses=1]
+ %I = fpext float %H to double ; <double> [#uses=1]
+ %J = fsub double %I, %E ; <double> [#uses=1]
+ ret double %J
+; CHECK-LABEL: test_FMSUB_ASSOC_EXT2:
+; CHECK: fmsub
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT2:
+; CHECK-VSX: xsmsubmdp
+; CHECK-VSX-NEXT: xsmaddadp
+; CHECK-VSX-NEXT: fmr
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMSUB_ASSOC_EXT3(float %A, float %B, double %C,
+ double %D, double %E) {
+ %F = fmul float %A, %B ; <float> [#uses=1]
+ %G = fpext float %F to double ; <double> [#uses=1]
+ %H = fmul double %C, %D ; <double> [#uses=1]
+ %I = fadd double %H, %G ; <double> [#uses=1]
+ %J = fsub double %E, %I ; <double> [#uses=1]
+ ret double %J
+; CHECK-LABEL: test_FMSUB_ASSOC_EXT3:
+; CHECK: fnmsub
+; CHECK-NEXT: fnmsub
+; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT3:
+; CHECK-VSX: xsnmsubmdp
+; CHECK-VSX-NEXT: xsnmsubadp
+; CHECK-VSX-NEXT: fmr
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMSUB_ASSOC_EXT4(float %A, float %B, float %C,
+ float %D, double %E) {
+ %F = fmul float %A, %B ; <float> [#uses=1]
+ %G = fmul float %C, %D ; <float> [#uses=1]
+ %H = fadd float %F, %G ; <float> [#uses=1]
+ %I = fpext float %H to double ; <double> [#uses=1]
+ %J = fsub double %E, %I ; <double> [#uses=1]
+ ret double %J
+; CHECK-LABEL: test_FMSUB_ASSOC_EXT4:
+; CHECK: fnmsub
+; CHECK-NEXT: fnmsub
+; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMSUB_ASSOC_EXT4:
+; CHECK-VSX: xsnmsubmdp
+; CHECK-VSX-NEXT: xsnmsubadp
+; CHECK-VSX-NEXT: fmr
+; CHECK-VSX-NEXT: blr
+}
diff --git a/test/CodeGen/PowerPC/fma-ext.ll b/test/CodeGen/PowerPC/fma-ext.ll
index 56825ce8f227..da7c34ccb9d8 100644
--- a/test/CodeGen/PowerPC/fma-ext.ll
+++ b/test/CodeGen/PowerPC/fma-ext.ll
@@ -60,34 +60,34 @@ define double @test_FMSUB_EXT2(float %A, float %B, double %C) {
define double @test_FMSUB_EXT3(float %A, float %B, double %C) {
%D = fmul float %A, %B ; <float> [#uses=1]
- %E = fsub float -0.000000e+00, %D ; <float> [#uses=1]
+ %E = fsub float -0.000000e+00, %D ; <float> [#uses=1]
%F = fpext float %E to double ; <double> [#uses=1]
%G = fsub double %F, %C ; <double> [#uses=1]
ret double %G
; CHECK-LABEL: test_FMSUB_EXT3:
-; CHECK: fneg
-; CHECK-NEXT: fmsub
+; CHECK: fnmadd
+
; CHECK-NEXT: blr
; CHECK-VSX-LABEL: test_FMSUB_EXT3:
-; CHECK-VSX: xsnegdp
-; CHECK-VSX-NEXT: xsmsubmdp
+; CHECK-VSX: xsnmaddmdp
+
; CHECK-VSX-NEXT: blr
}
define double @test_FMSUB_EXT4(float %A, float %B, double %C) {
%D = fmul float %A, %B ; <float> [#uses=1]
%E = fpext float %D to double ; <double> [#uses=1]
- %F = fsub double -0.000000e+00, %E ; <double> [#uses=1]
+ %F = fsub double -0.000000e+00, %E ; <double> [#uses=1]
%G = fsub double %F, %C ; <double> [#uses=1]
ret double %G
; CHECK-LABEL: test_FMSUB_EXT4:
-; CHECK: fneg
-; CHECK-NEXT: fmsub
+; CHECK: fnmadd
+
; CHECK-NEXT: blr
; CHECK-VSX-LABEL: test_FMSUB_EXT4:
-; CHECK-VSX: xsnegdp
-; CHECK-VSX-NEXT: xsmsubmdp
+; CHECK-VSX: xsnmaddmdp
+
; CHECK-VSX-NEXT: blr
-} \ No newline at end of file
+}
diff --git a/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll b/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll
new file mode 100644
index 000000000000..1d9b64823140
--- /dev/null
+++ b/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll
@@ -0,0 +1,426 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+
+; Function Attrs: nounwind
+define zeroext i8 @_Z6testcff(float %arg) {
+entry:
+ %arg.addr = alloca float, align 4
+ store float %arg, float* %arg.addr, align 4
+ %0 = load float, float* %arg.addr, align 4
+ %conv = fptoui float %0 to i8
+ ret i8 %conv
+; CHECK-LABEL: @_Z6testcff
+; CHECK: xscvdpsxws [[CONVREG01:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG01]]
+}
+
+; Function Attrs: nounwind
+define float @_Z6testfcc(i8 zeroext %arg) {
+entry:
+ %arg.addr = alloca i8, align 1
+ store i8 %arg, i8* %arg.addr, align 1
+ %0 = load i8, i8* %arg.addr, align 1
+ %conv = uitofp i8 %0 to float
+ ret float %conv
+; CHECK-LABEL: @_Z6testfcc
+; CHECK: mtvsrwz [[MOVEREG01:[0-9]+]], 3
+; FIXME: Once we have XSCVUXDSP implemented, this will change
+; CHECK: fcfidus 1, [[MOVEREG01]]
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @_Z6testcdd(double %arg) {
+entry:
+ %arg.addr = alloca double, align 8
+ store double %arg, double* %arg.addr, align 8
+ %0 = load double, double* %arg.addr, align 8
+ %conv = fptoui double %0 to i8
+ ret i8 %conv
+; CHECK-LABEL: @_Z6testcdd
+; CHECK: xscvdpsxws [[CONVREG02:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG02]]
+}
+
+; Function Attrs: nounwind
+define double @_Z6testdcc(i8 zeroext %arg) {
+entry:
+ %arg.addr = alloca i8, align 1
+ store i8 %arg, i8* %arg.addr, align 1
+ %0 = load i8, i8* %arg.addr, align 1
+ %conv = uitofp i8 %0 to double
+ ret double %conv
+; CHECK-LABEL: @_Z6testdcc
+; CHECK: mtvsrwz [[MOVEREG02:[0-9]+]], 3
+; CHECK: xscvuxddp 1, [[MOVEREG02]]
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @_Z7testucff(float %arg) {
+entry:
+ %arg.addr = alloca float, align 4
+ store float %arg, float* %arg.addr, align 4
+ %0 = load float, float* %arg.addr, align 4
+ %conv = fptoui float %0 to i8
+ ret i8 %conv
+; CHECK-LABEL: @_Z7testucff
+; CHECK: xscvdpsxws [[CONVREG03:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG03]]
+}
+
+; Function Attrs: nounwind
+define float @_Z7testfuch(i8 zeroext %arg) {
+entry:
+ %arg.addr = alloca i8, align 1
+ store i8 %arg, i8* %arg.addr, align 1
+ %0 = load i8, i8* %arg.addr, align 1
+ %conv = uitofp i8 %0 to float
+ ret float %conv
+; CHECK-LABEL: @_Z7testfuch
+; CHECK: mtvsrwz [[MOVEREG03:[0-9]+]], 3
+; FIXME: Once we have XSCVUXDSP implemented, this will change
+; CHECK: fcfidus 1, [[MOVEREG03]]
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @_Z7testucdd(double %arg) {
+entry:
+ %arg.addr = alloca double, align 8
+ store double %arg, double* %arg.addr, align 8
+ %0 = load double, double* %arg.addr, align 8
+ %conv = fptoui double %0 to i8
+ ret i8 %conv
+; CHECK-LABEL: @_Z7testucdd
+; CHECK: xscvdpsxws [[CONVREG04:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG04]]
+}
+
+; Function Attrs: nounwind
+define double @_Z7testduch(i8 zeroext %arg) {
+entry:
+ %arg.addr = alloca i8, align 1
+ store i8 %arg, i8* %arg.addr, align 1
+ %0 = load i8, i8* %arg.addr, align 1
+ %conv = uitofp i8 %0 to double
+ ret double %conv
+; CHECK-LABEL: @_Z7testduch
+; CHECK: mtvsrwz [[MOVEREG04:[0-9]+]], 3
+; CHECK: xscvuxddp 1, [[MOVEREG04]]
+}
+
+; Function Attrs: nounwind
+define signext i16 @_Z6testsff(float %arg) {
+entry:
+ %arg.addr = alloca float, align 4
+ store float %arg, float* %arg.addr, align 4
+ %0 = load float, float* %arg.addr, align 4
+ %conv = fptosi float %0 to i16
+ ret i16 %conv
+; CHECK-LABEL: @_Z6testsff
+; CHECK: xscvdpsxws [[CONVREG05:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG05]]
+}
+
+; Function Attrs: nounwind
+define float @_Z6testfss(i16 signext %arg) {
+entry:
+ %arg.addr = alloca i16, align 2
+ store i16 %arg, i16* %arg.addr, align 2
+ %0 = load i16, i16* %arg.addr, align 2
+ %conv = sitofp i16 %0 to float
+ ret float %conv
+; CHECK-LABEL: @_Z6testfss
+; CHECK: mtvsrwa [[MOVEREG05:[0-9]+]], 3
+; FIXME: Once we have XSCVSXDSP implemented, this will change
+; CHECK: fcfids 1, [[MOVEREG05]]
+}
+
+; Function Attrs: nounwind
+define signext i16 @_Z6testsdd(double %arg) {
+entry:
+ %arg.addr = alloca double, align 8
+ store double %arg, double* %arg.addr, align 8
+ %0 = load double, double* %arg.addr, align 8
+ %conv = fptosi double %0 to i16
+ ret i16 %conv
+; CHECK-LABEL: @_Z6testsdd
+; CHECK: xscvdpsxws [[CONVREG06:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG06]]
+}
+
+; Function Attrs: nounwind
+define double @_Z6testdss(i16 signext %arg) {
+entry:
+ %arg.addr = alloca i16, align 2
+ store i16 %arg, i16* %arg.addr, align 2
+ %0 = load i16, i16* %arg.addr, align 2
+ %conv = sitofp i16 %0 to double
+ ret double %conv
+; CHECK-LABEL: @_Z6testdss
+; CHECK: mtvsrwa [[MOVEREG06:[0-9]+]], 3
+; CHECK: xscvsxddp 1, [[MOVEREG06]]
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @_Z7testusff(float %arg) {
+entry:
+ %arg.addr = alloca float, align 4
+ store float %arg, float* %arg.addr, align 4
+ %0 = load float, float* %arg.addr, align 4
+ %conv = fptoui float %0 to i16
+ ret i16 %conv
+; CHECK-LABEL: @_Z7testusff
+; CHECK: xscvdpsxws [[CONVREG07:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG07]]
+}
+
+; Function Attrs: nounwind
+define float @_Z7testfust(i16 zeroext %arg) {
+entry:
+ %arg.addr = alloca i16, align 2
+ store i16 %arg, i16* %arg.addr, align 2
+ %0 = load i16, i16* %arg.addr, align 2
+ %conv = uitofp i16 %0 to float
+ ret float %conv
+; CHECK-LABEL: @_Z7testfust
+; CHECK: mtvsrwz [[MOVEREG07:[0-9]+]], 3
+; FIXME: Once we have XSCVUXDSP implemented, this will change
+; CHECK: fcfidus 1, [[MOVEREG07]]
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @_Z7testusdd(double %arg) {
+entry:
+ %arg.addr = alloca double, align 8
+ store double %arg, double* %arg.addr, align 8
+ %0 = load double, double* %arg.addr, align 8
+ %conv = fptoui double %0 to i16
+ ret i16 %conv
+; CHECK-LABEL: @_Z7testusdd
+; CHECK: xscvdpsxws [[CONVREG08:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG08]]
+}
+
+; Function Attrs: nounwind
+define double @_Z7testdust(i16 zeroext %arg) {
+entry:
+ %arg.addr = alloca i16, align 2
+ store i16 %arg, i16* %arg.addr, align 2
+ %0 = load i16, i16* %arg.addr, align 2
+ %conv = uitofp i16 %0 to double
+ ret double %conv
+; CHECK-LABEL: @_Z7testdust
+; CHECK: mtvsrwz [[MOVEREG08:[0-9]+]], 3
+; CHECK: xscvuxddp 1, [[MOVEREG08]]
+}
+
+; Function Attrs: nounwind
+define signext i32 @_Z6testiff(float %arg) {
+entry:
+ %arg.addr = alloca float, align 4
+ store float %arg, float* %arg.addr, align 4
+ %0 = load float, float* %arg.addr, align 4
+ %conv = fptosi float %0 to i32
+ ret i32 %conv
+; CHECK-LABEL: @_Z6testiff
+; CHECK: xscvdpsxws [[CONVREG09:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG09]]
+}
+
+; Function Attrs: nounwind
+define float @_Z6testfii(i32 signext %arg) {
+entry:
+ %arg.addr = alloca i32, align 4
+ store i32 %arg, i32* %arg.addr, align 4
+ %0 = load i32, i32* %arg.addr, align 4
+ %conv = sitofp i32 %0 to float
+ ret float %conv
+; CHECK-LABEL: @_Z6testfii
+; CHECK: mtvsrwa [[MOVEREG09:[0-9]+]], 3
+; FIXME: Once we have XSCVSXDSP implemented, this will change
+; CHECK: fcfids 1, [[MOVEREG09]]
+}
+
+; Function Attrs: nounwind
+define signext i32 @_Z6testidd(double %arg) {
+entry:
+ %arg.addr = alloca double, align 8
+ store double %arg, double* %arg.addr, align 8
+ %0 = load double, double* %arg.addr, align 8
+ %conv = fptosi double %0 to i32
+ ret i32 %conv
+; CHECK-LABEL: @_Z6testidd
+; CHECK: xscvdpsxws [[CONVREG10:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG10]]
+}
+
+; Function Attrs: nounwind
+define double @_Z6testdii(i32 signext %arg) {
+entry:
+ %arg.addr = alloca i32, align 4
+ store i32 %arg, i32* %arg.addr, align 4
+ %0 = load i32, i32* %arg.addr, align 4
+ %conv = sitofp i32 %0 to double
+ ret double %conv
+; CHECK-LABEL: @_Z6testdii
+; CHECK: mtvsrwa [[MOVEREG10:[0-9]+]], 3
+; CHECK: xscvsxddp 1, [[MOVEREG10]]
+}
+
+; Function Attrs: nounwind
+define zeroext i32 @_Z7testuiff(float %arg) {
+entry:
+ %arg.addr = alloca float, align 4
+ store float %arg, float* %arg.addr, align 4
+ %0 = load float, float* %arg.addr, align 4
+ %conv = fptoui float %0 to i32
+ ret i32 %conv
+; CHECK-LABEL: @_Z7testuiff
+; CHECK: xscvdpuxws [[CONVREG11:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG11]]
+}
+
+; Function Attrs: nounwind
+define float @_Z7testfuij(i32 zeroext %arg) {
+entry:
+ %arg.addr = alloca i32, align 4
+ store i32 %arg, i32* %arg.addr, align 4
+ %0 = load i32, i32* %arg.addr, align 4
+ %conv = uitofp i32 %0 to float
+ ret float %conv
+; CHECK-LABEL: @_Z7testfuij
+; CHECK: mtvsrwz [[MOVEREG11:[0-9]+]], 3
+; FIXME: Once we have XSCVUXDSP implemented, this will change
+; CHECK: fcfidus 1, [[MOVEREG11]]
+}
+
+; Function Attrs: nounwind
+define zeroext i32 @_Z7testuidd(double %arg) {
+entry:
+ %arg.addr = alloca double, align 8
+ store double %arg, double* %arg.addr, align 8
+ %0 = load double, double* %arg.addr, align 8
+ %conv = fptoui double %0 to i32
+ ret i32 %conv
+; CHECK-LABEL: @_Z7testuidd
+; CHECK: xscvdpuxws [[CONVREG12:[0-9]+]], 1
+; CHECK: mfvsrwz 3, [[CONVREG12]]
+}
+
+; Function Attrs: nounwind
+define double @_Z7testduij(i32 zeroext %arg) {
+entry:
+ %arg.addr = alloca i32, align 4
+ store i32 %arg, i32* %arg.addr, align 4
+ %0 = load i32, i32* %arg.addr, align 4
+ %conv = uitofp i32 %0 to double
+ ret double %conv
+; CHECK-LABEL: @_Z7testduij
+; CHECK: mtvsrwz [[MOVEREG12:[0-9]+]], 3
+; CHECK: xscvuxddp 1, [[MOVEREG12]]
+}
+
+; Function Attrs: nounwind
+define i64 @_Z7testllff(float %arg) {
+entry:
+ %arg.addr = alloca float, align 4
+ store float %arg, float* %arg.addr, align 4
+ %0 = load float, float* %arg.addr, align 4
+ %conv = fptosi float %0 to i64
+ ret i64 %conv
+; CHECK-LABEL: @_Z7testllff
+; CHECK: xscvdpsxds [[CONVREG13:[0-9]+]], 1
+; CHECK: mfvsrd 3, [[CONVREG13]]
+}
+
+; Function Attrs: nounwind
+define float @_Z7testfllx(i64 %arg) {
+entry:
+ %arg.addr = alloca i64, align 8
+ store i64 %arg, i64* %arg.addr, align 8
+ %0 = load i64, i64* %arg.addr, align 8
+ %conv = sitofp i64 %0 to float
+ ret float %conv
+; CHECK-LABEL:@_Z7testfllx
+; CHECK: mtvsrd [[MOVEREG13:[0-9]+]], 3
+; FIXME: Once we have XSCVSXDSP implemented, this will change
+; CHECK: fcfids 1, [[MOVEREG13]]
+}
+
+; Function Attrs: nounwind
+define i64 @_Z7testlldd(double %arg) {
+entry:
+ %arg.addr = alloca double, align 8
+ store double %arg, double* %arg.addr, align 8
+ %0 = load double, double* %arg.addr, align 8
+ %conv = fptosi double %0 to i64
+ ret i64 %conv
+; CHECK-LABEL: @_Z7testlldd
+; CHECK: xscvdpsxds [[CONVREG14:[0-9]+]], 1
+; CHECK: mfvsrd 3, [[CONVREG14]]
+}
+
+; Function Attrs: nounwind
+define double @_Z7testdllx(i64 %arg) {
+entry:
+ %arg.addr = alloca i64, align 8
+ store i64 %arg, i64* %arg.addr, align 8
+ %0 = load i64, i64* %arg.addr, align 8
+ %conv = sitofp i64 %0 to double
+ ret double %conv
+; CHECK-LABEL: @_Z7testdllx
+; CHECK: mtvsrd [[MOVEREG14:[0-9]+]], 3
+; CHECK: xscvsxddp 1, [[MOVEREG14]]
+}
+
+; Function Attrs: nounwind
+define i64 @_Z8testullff(float %arg) {
+entry:
+ %arg.addr = alloca float, align 4
+ store float %arg, float* %arg.addr, align 4
+ %0 = load float, float* %arg.addr, align 4
+ %conv = fptoui float %0 to i64
+ ret i64 %conv
+; CHECK-LABEL: @_Z8testullff
+; CHECK: xscvdpuxds [[CONVREG15:[0-9]+]], 1
+; CHECK: mfvsrd 3, [[CONVREG15]]
+}
+
+; Function Attrs: nounwind
+define float @_Z8testfully(i64 %arg) {
+entry:
+ %arg.addr = alloca i64, align 8
+ store i64 %arg, i64* %arg.addr, align 8
+ %0 = load i64, i64* %arg.addr, align 8
+ %conv = uitofp i64 %0 to float
+ ret float %conv
+; CHECK-LABEL: @_Z8testfully
+; CHECK: mtvsrd [[MOVEREG15:[0-9]+]], 3
+; FIXME: Once we have XSCVUXDSP implemented, this will change
+; CHECK: fcfidus 1, [[MOVEREG15]]
+}
+
+; Function Attrs: nounwind
+define i64 @_Z8testulldd(double %arg) {
+entry:
+ %arg.addr = alloca double, align 8
+ store double %arg, double* %arg.addr, align 8
+ %0 = load double, double* %arg.addr, align 8
+ %conv = fptoui double %0 to i64
+ ret i64 %conv
+; CHECK-LABEL: @_Z8testulldd
+; CHECK: xscvdpuxds [[CONVREG16:[0-9]+]], 1
+; CHECK: mfvsrd 3, [[CONVREG16]]
+}
+
+; Function Attrs: nounwind
+define double @_Z8testdully(i64 %arg) {
+entry:
+ %arg.addr = alloca i64, align 8
+ store i64 %arg, i64* %arg.addr, align 8
+ %0 = load i64, i64* %arg.addr, align 8
+ %conv = uitofp i64 %0 to double
+ ret double %conv
+; CHECK-LABEL: @_Z8testdully
+; CHECK: mtvsrd [[MOVEREG16:[0-9]+]], 3
+; CHECK: xscvuxddp 1, [[MOVEREG16]]
+}
diff --git a/test/CodeGen/PowerPC/fp-to-int-ext.ll b/test/CodeGen/PowerPC/fp-to-int-ext.ll
index bfacd89ca1a2..393fe04d41c6 100644
--- a/test/CodeGen/PowerPC/fp-to-int-ext.ll
+++ b/test/CodeGen/PowerPC/fp-to-int-ext.ll
@@ -5,7 +5,7 @@ target triple = "powerpc64-unknown-linux-gnu"
; Function Attrs: nounwind
define double @foo1(i32* %x) #0 {
entry:
- %0 = load i32* %x, align 4
+ %0 = load i32, i32* %x, align 4
%conv = sext i32 %0 to i64
%conv1 = sitofp i64 %conv to double
ret double %conv1
@@ -18,7 +18,7 @@ entry:
define double @foo2(i32* %x) #0 {
entry:
- %0 = load i32* %x, align 4
+ %0 = load i32, i32* %x, align 4
%conv = zext i32 %0 to i64
%conv1 = sitofp i64 %conv to double
ret double %conv1
@@ -31,7 +31,7 @@ entry:
define double @foo3(i32* %x) #0 {
entry:
- %0 = load i32* %x, align 4
+ %0 = load i32, i32* %x, align 4
%1 = add i32 %0, 8
%conv = zext i32 %1 to i64
%conv1 = sitofp i64 %conv to double
@@ -49,7 +49,7 @@ entry:
define double @foo4(i32* %x) #0 {
entry:
- %0 = load i32* %x, align 4
+ %0 = load i32, i32* %x, align 4
%1 = add i32 %0, 8
%conv = sext i32 %1 to i64
%conv1 = sitofp i64 %conv to double
diff --git a/test/CodeGen/PowerPC/frounds.ll b/test/CodeGen/PowerPC/frounds.ll
index 8eeadc3a3469..49f3465e9e22 100644
--- a/test/CodeGen/PowerPC/frounds.ll
+++ b/test/CodeGen/PowerPC/frounds.ll
@@ -7,12 +7,12 @@ entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
%tmp1 = call i32 @llvm.flt.rounds( ) ; <i32> [#uses=1]
store i32 %tmp1, i32* %tmp, align 4
- %tmp2 = load i32* %tmp, align 4 ; <i32> [#uses=1]
+ %tmp2 = load i32, i32* %tmp, align 4 ; <i32> [#uses=1]
store i32 %tmp2, i32* %retval, align 4
br label %return
return: ; preds = %entry
- %retval3 = load i32* %retval ; <i32> [#uses=1]
+ %retval3 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %retval3
}
diff --git a/test/CodeGen/PowerPC/glob-comp-aa-crash.ll b/test/CodeGen/PowerPC/glob-comp-aa-crash.ll
index 2ea036f83496..66df6bb8669d 100644
--- a/test/CodeGen/PowerPC/glob-comp-aa-crash.ll
+++ b/test/CodeGen/PowerPC/glob-comp-aa-crash.ll
@@ -23,17 +23,17 @@ entry:
%ref.tmp = alloca %"class.std::__exception_ptr::exception_ptr", align 8
%tmp = alloca { i64, i64 }, align 8
%agg.tmp = alloca %"class.std::__exception_ptr::exception_ptr", align 8
- %__mut_ = getelementptr inbounds %"class.std::__1::__assoc_sub_state"* %this, i64 0, i32 2
- %__m_.i.i = getelementptr inbounds %"class.std::__1::unique_lock"* %__lk, i64 0, i32 0
+ %__mut_ = getelementptr inbounds %"class.std::__1::__assoc_sub_state", %"class.std::__1::__assoc_sub_state"* %this, i64 0, i32 2
+ %__m_.i.i = getelementptr inbounds %"class.std::__1::unique_lock", %"class.std::__1::unique_lock"* %__lk, i64 0, i32 0
store %"class.std::__1::mutex"* %__mut_, %"class.std::__1::mutex"** %__m_.i.i, align 8, !tbaa !5
- %__owns_.i.i = getelementptr inbounds %"class.std::__1::unique_lock"* %__lk, i64 0, i32 1
+ %__owns_.i.i = getelementptr inbounds %"class.std::__1::unique_lock", %"class.std::__1::unique_lock"* %__lk, i64 0, i32 1
store i8 1, i8* %__owns_.i.i, align 8, !tbaa !6
call void @_ZNSt3__15mutex4lockEv(%"class.std::__1::mutex"* %__mut_) #4
invoke void @_ZNSt3__117__assoc_sub_state10__sub_waitERNS_11unique_lockINS_5mutexEEE(%"class.std::__1::__assoc_sub_state"* %this, %"class.std::__1::unique_lock"* %__lk) #4
to label %invoke.cont unwind label %lpad
invoke.cont: ; preds = %entry
- %__exception_ = getelementptr inbounds %"class.std::__1::__assoc_sub_state"* %this, i64 0, i32 1
+ %__exception_ = getelementptr inbounds %"class.std::__1::__assoc_sub_state", %"class.std::__1::__assoc_sub_state"* %this, i64 0, i32 1
%0 = bitcast { i64, i64 }* %tmp to i8*
call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 16, i32 8, i1 false)
call void @_ZNSt15__exception_ptr13exception_ptrC1EMS0_FvvE(%"class.std::__exception_ptr::exception_ptr"* %ref.tmp, { i64, i64 }* byval %tmp) #5
@@ -65,12 +65,12 @@ lpad3: ; preds = %if.then
br label %ehcleanup
if.end: ; preds = %invoke.cont
- %7 = load i8* %__owns_.i.i, align 8, !tbaa !6, !range !4
+ %7 = load i8, i8* %__owns_.i.i, align 8, !tbaa !6, !range !4
%tobool.i.i = icmp eq i8 %7, 0
br i1 %tobool.i.i, label %_ZNSt3__111unique_lockINS_5mutexEED1Ev.exit, label %if.then.i.i
if.then.i.i: ; preds = %if.end
- %8 = load %"class.std::__1::mutex"** %__m_.i.i, align 8, !tbaa !5
+ %8 = load %"class.std::__1::mutex"*, %"class.std::__1::mutex"** %__m_.i.i, align 8, !tbaa !5
call void @_ZNSt3__15mutex6unlockEv(%"class.std::__1::mutex"* %8) #5
br label %_ZNSt3__111unique_lockINS_5mutexEED1Ev.exit
@@ -80,12 +80,12 @@ _ZNSt3__111unique_lockINS_5mutexEED1Ev.exit: ; preds = %if.then.i.i, %if.en
ehcleanup: ; preds = %lpad3, %lpad
%exn.slot.0 = phi i8* [ %5, %lpad3 ], [ %2, %lpad ]
%ehselector.slot.0 = phi i32 [ %6, %lpad3 ], [ %3, %lpad ]
- %9 = load i8* %__owns_.i.i, align 8, !tbaa !6, !range !4
+ %9 = load i8, i8* %__owns_.i.i, align 8, !tbaa !6, !range !4
%tobool.i.i9 = icmp eq i8 %9, 0
br i1 %tobool.i.i9, label %_ZNSt3__111unique_lockINS_5mutexEED1Ev.exit12, label %if.then.i.i11
if.then.i.i11: ; preds = %ehcleanup
- %10 = load %"class.std::__1::mutex"** %__m_.i.i, align 8, !tbaa !5
+ %10 = load %"class.std::__1::mutex"*, %"class.std::__1::mutex"** %__m_.i.i, align 8, !tbaa !5
call void @_ZNSt3__15mutex6unlockEv(%"class.std::__1::mutex"* %10) #5
br label %_ZNSt3__111unique_lockINS_5mutexEED1Ev.exit12
diff --git a/test/CodeGen/PowerPC/hello.ll b/test/CodeGen/PowerPC/hello.ll
index ea27e9257a65..da7fb31526b9 100644
--- a/test/CodeGen/PowerPC/hello.ll
+++ b/test/CodeGen/PowerPC/hello.ll
@@ -5,7 +5,7 @@
@.str = internal constant [13 x i8] c"Hello World!\00"
define i32 @main() {
- %tmp2 = tail call i32 @puts( i8* getelementptr ([13 x i8]* @.str, i32 0, i64 0) )
+ %tmp2 = tail call i32 @puts( i8* getelementptr ([13 x i8], [13 x i8]* @.str, i32 0, i64 0) )
ret i32 0
}
diff --git a/test/CodeGen/PowerPC/hidden-vis-2.ll b/test/CodeGen/PowerPC/hidden-vis-2.ll
index e9e2c0a93a0d..3eb9dbd21ade 100644
--- a/test/CodeGen/PowerPC/hidden-vis-2.ll
+++ b/test/CodeGen/PowerPC/hidden-vis-2.ll
@@ -5,8 +5,8 @@
define i32 @t() nounwind readonly {
entry:
- %0 = load i32* @x, align 4 ; <i32> [#uses=1]
- %1 = load i32* @y, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* @x, align 4 ; <i32> [#uses=1]
+ %1 = load i32, i32* @y, align 4 ; <i32> [#uses=1]
%2 = add i32 %1, %0 ; <i32> [#uses=1]
ret i32 %2
}
diff --git a/test/CodeGen/PowerPC/hidden-vis.ll b/test/CodeGen/PowerPC/hidden-vis.ll
index b2cc1431ebde..bcb6723402b0 100644
--- a/test/CodeGen/PowerPC/hidden-vis.ll
+++ b/test/CodeGen/PowerPC/hidden-vis.ll
@@ -4,6 +4,6 @@
define i32 @t() nounwind readonly {
entry:
- %0 = load i32* @x, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* @x, align 4 ; <i32> [#uses=1]
ret i32 %0
}
diff --git a/test/CodeGen/PowerPC/htm.ll b/test/CodeGen/PowerPC/htm.ll
new file mode 100644
index 000000000000..0e4304dc163b
--- /dev/null
+++ b/test/CodeGen/PowerPC/htm.ll
@@ -0,0 +1,125 @@
+; RUN: llc -mcpu=pwr8 -mattr=+htm < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define zeroext i32 @test1() {
+entry:
+ %0 = tail call i32 @llvm.ppc.tbegin(i32 0)
+ ret i32 %0
+
+; CHECK-LABEL: @test1
+; CHECK: tbegin. 0
+; CHECK: mfocrf [[REGISTER1:[0-9]+]], 128
+; CHECK: rlwinm [[REGISTER2:[0-9]+]], [[REGISTER1]], 3, 31, 31
+; CHECK: xori {{[0-9]+}}, [[REGISTER2]], 1
+}
+
+declare i32 @llvm.ppc.tbegin(i32) #1
+
+
+define zeroext i32 @test2() {
+entry:
+ %0 = tail call i32 @llvm.ppc.tend(i32 0)
+ ret i32 %0
+; CHECK-LABEL: @test2
+; CHECK: tend. 0
+; CHECK: mfocrf {{[0-9]+}}, 128
+}
+
+declare i32 @llvm.ppc.tend(i32)
+
+
+define void @test3() {
+entry:
+ %0 = tail call i32 @llvm.ppc.tabort(i32 0)
+ %1 = tail call i32 @llvm.ppc.tabortdc(i32 0, i32 1, i32 2)
+ %2 = tail call i32 @llvm.ppc.tabortdci(i32 0, i32 1, i32 2)
+ %3 = tail call i32 @llvm.ppc.tabortwc(i32 0, i32 1, i32 2)
+ %4 = tail call i32 @llvm.ppc.tabortwci(i32 0, i32 1, i32 2)
+ ret void
+; CHECK-LABEL: @test3
+; CHECK: tabort. {{[0-9]+}}
+; CHECK: tabortdc. 0, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: tabortdci. 0, {{[0-9]+}}, 2
+; CHECK: tabortwc. 0, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: tabortwci. 0, {{[0-9]+}}, 2
+}
+
+declare i32 @llvm.ppc.tabort(i32)
+declare i32 @llvm.ppc.tabortdc(i32, i32, i32)
+declare i32 @llvm.ppc.tabortdci(i32, i32, i32)
+declare i32 @llvm.ppc.tabortwc(i32, i32, i32)
+declare i32 @llvm.ppc.tabortwci(i32, i32, i32)
+
+
+define void @test4() {
+entry:
+ %0 = tail call i32 @llvm.ppc.tendall()
+ %1 = tail call i32 @llvm.ppc.tresume()
+ %2 = tail call i32 @llvm.ppc.tsuspend()
+ ret void
+; CHECK-LABEL: @test4
+; CHECK: tend. 1
+; CHECK: tsr. 1
+; CHECK: tsr. 0
+}
+
+declare i32 @llvm.ppc.tendall()
+declare i32 @llvm.ppc.tresume()
+declare i32 @llvm.ppc.tsuspend()
+
+
+define void @test5(i64 %v) {
+entry:
+ tail call void @llvm.ppc.set.texasr(i64 %v)
+ tail call void @llvm.ppc.set.texasru(i64 %v)
+ tail call void @llvm.ppc.set.tfhar(i64 %v)
+ tail call void @llvm.ppc.set.tfiar(i64 %v)
+ ret void
+; CHECK-LABEL: @test5
+; CHECK: mtspr 130, [[REG1:[0-9]+]]
+; CHECK: mtspr 131, [[REG2:[0-9]+]]
+; CHECK: mtspr 128, [[REG3:[0-9]+]]
+; CHECK: mtspr 129, [[REG4:[0-9]+]]
+}
+
+define i64 @test6() {
+entry:
+ %0 = tail call i64 @llvm.ppc.get.texasr()
+ ret i64 %0
+; CHECK-LABEL: @test6
+; CHECK: mfspr [[REG1:[0-9]+]], 130
+}
+
+define i64 @test7() {
+entry:
+ %0 = tail call i64 @llvm.ppc.get.texasru()
+ ret i64 %0
+; CHECK-LABEL: @test7
+; CHECK: mfspr [[REG1:[0-9]+]], 131
+}
+
+define i64 @test8() {
+entry:
+ %0 = tail call i64 @llvm.ppc.get.tfhar()
+ ret i64 %0
+; CHECK-LABEL: @test8
+; CHECK: mfspr [[REG1:[0-9]+]], 128
+}
+
+define i64 @test9() {
+entry:
+ %0 = tail call i64 @llvm.ppc.get.tfiar()
+ ret i64 %0
+; CHECK-LABEL: @test9
+; CHECK: mfspr [[REG1:[0-9]+]], 129
+}
+
+declare void @llvm.ppc.set.texasr(i64)
+declare void @llvm.ppc.set.texasru(i64)
+declare void @llvm.ppc.set.tfhar(i64)
+declare void @llvm.ppc.set.tfiar(i64)
+declare i64 @llvm.ppc.get.texasr()
+declare i64 @llvm.ppc.get.texasru()
+declare i64 @llvm.ppc.get.tfhar()
+declare i64 @llvm.ppc.get.tfiar()
diff --git a/test/CodeGen/PowerPC/i64_fp_round.ll b/test/CodeGen/PowerPC/i64_fp_round.ll
index 5770d788caf7..2530b8a92825 100644
--- a/test/CodeGen/PowerPC/i64_fp_round.ll
+++ b/test/CodeGen/PowerPC/i64_fp_round.ll
@@ -14,7 +14,7 @@ entry:
; CHECK: sradi [[REG1:[0-9]+]], 3, 53
; CHECK: addi [[REG2:[0-9]+]], [[REG1]], 1
-; CHECK: cmpldi 0, [[REG2]], 1
+; CHECK: cmpldi [[REG2]], 1
; CHECK: isel [[REG3:[0-9]+]], {{[0-9]+}}, 3, 1
; CHECK: std [[REG3]], -{{[0-9]+}}(1)
diff --git a/test/CodeGen/PowerPC/ia-mem-r0.ll b/test/CodeGen/PowerPC/ia-mem-r0.ll
index 4ab17edc5b10..a007fc1aa631 100644
--- a/test/CodeGen/PowerPC/ia-mem-r0.ll
+++ b/test/CodeGen/PowerPC/ia-mem-r0.ll
@@ -16,78 +16,78 @@ define void @test1({ i8*, void (i8*, i8*)* } %fn_arg) {
%1 = bitcast [18 x i64]* %regs to i64*
call void asm sideeffect "std 14, $0", "=*m"(i64* %1)
%2 = bitcast [18 x i64]* %regs to i8*
- %3 = getelementptr i8* %2, i32 8
+ %3 = getelementptr i8, i8* %2, i32 8
%4 = bitcast i8* %3 to i64*
call void asm sideeffect "std 15, $0", "=*m"(i64* %4)
%5 = bitcast [18 x i64]* %regs to i8*
- %6 = getelementptr i8* %5, i32 16
+ %6 = getelementptr i8, i8* %5, i32 16
%7 = bitcast i8* %6 to i64*
call void asm sideeffect "std 16, $0", "=*m"(i64* %7)
%8 = bitcast [18 x i64]* %regs to i8*
- %9 = getelementptr i8* %8, i32 24
+ %9 = getelementptr i8, i8* %8, i32 24
%10 = bitcast i8* %9 to i64*
call void asm sideeffect "std 17, $0", "=*m"(i64* %10)
%11 = bitcast [18 x i64]* %regs to i8*
- %12 = getelementptr i8* %11, i32 32
+ %12 = getelementptr i8, i8* %11, i32 32
%13 = bitcast i8* %12 to i64*
call void asm sideeffect "std 18, $0", "=*m"(i64* %13)
%14 = bitcast [18 x i64]* %regs to i8*
- %15 = getelementptr i8* %14, i32 40
+ %15 = getelementptr i8, i8* %14, i32 40
%16 = bitcast i8* %15 to i64*
call void asm sideeffect "std 19, $0", "=*m"(i64* %16)
%17 = bitcast [18 x i64]* %regs to i8*
- %18 = getelementptr i8* %17, i32 48
+ %18 = getelementptr i8, i8* %17, i32 48
%19 = bitcast i8* %18 to i64*
call void asm sideeffect "std 20, $0", "=*m"(i64* %19)
%20 = bitcast [18 x i64]* %regs to i8*
- %21 = getelementptr i8* %20, i32 56
+ %21 = getelementptr i8, i8* %20, i32 56
%22 = bitcast i8* %21 to i64*
call void asm sideeffect "std 21, $0", "=*m"(i64* %22)
%23 = bitcast [18 x i64]* %regs to i8*
- %24 = getelementptr i8* %23, i32 64
+ %24 = getelementptr i8, i8* %23, i32 64
%25 = bitcast i8* %24 to i64*
call void asm sideeffect "std 22, $0", "=*m"(i64* %25)
%26 = bitcast [18 x i64]* %regs to i8*
- %27 = getelementptr i8* %26, i32 72
+ %27 = getelementptr i8, i8* %26, i32 72
%28 = bitcast i8* %27 to i64*
call void asm sideeffect "std 23, $0", "=*m"(i64* %28)
%29 = bitcast [18 x i64]* %regs to i8*
- %30 = getelementptr i8* %29, i32 80
+ %30 = getelementptr i8, i8* %29, i32 80
%31 = bitcast i8* %30 to i64*
call void asm sideeffect "std 24, $0", "=*m"(i64* %31)
%32 = bitcast [18 x i64]* %regs to i8*
- %33 = getelementptr i8* %32, i32 88
+ %33 = getelementptr i8, i8* %32, i32 88
%34 = bitcast i8* %33 to i64*
call void asm sideeffect "std 25, $0", "=*m"(i64* %34)
%35 = bitcast [18 x i64]* %regs to i8*
- %36 = getelementptr i8* %35, i32 96
+ %36 = getelementptr i8, i8* %35, i32 96
%37 = bitcast i8* %36 to i64*
call void asm sideeffect "std 26, $0", "=*m"(i64* %37)
%38 = bitcast [18 x i64]* %regs to i8*
- %39 = getelementptr i8* %38, i32 104
+ %39 = getelementptr i8, i8* %38, i32 104
%40 = bitcast i8* %39 to i64*
call void asm sideeffect "std 27, $0", "=*m"(i64* %40)
%41 = bitcast [18 x i64]* %regs to i8*
- %42 = getelementptr i8* %41, i32 112
+ %42 = getelementptr i8, i8* %41, i32 112
%43 = bitcast i8* %42 to i64*
call void asm sideeffect "std 28, $0", "=*m"(i64* %43)
%44 = bitcast [18 x i64]* %regs to i8*
- %45 = getelementptr i8* %44, i32 120
+ %45 = getelementptr i8, i8* %44, i32 120
%46 = bitcast i8* %45 to i64*
call void asm sideeffect "std 29, $0", "=*m"(i64* %46)
%47 = bitcast [18 x i64]* %regs to i8*
- %48 = getelementptr i8* %47, i32 128
+ %48 = getelementptr i8, i8* %47, i32 128
%49 = bitcast i8* %48 to i64*
call void asm sideeffect "std 30, $0", "=*m"(i64* %49)
%50 = bitcast [18 x i64]* %regs to i8*
- %51 = getelementptr i8* %50, i32 136
+ %51 = getelementptr i8, i8* %50, i32 136
%52 = bitcast i8* %51 to i64*
call void asm sideeffect "std 31, $0", "=*m"(i64* %52)
- %53 = getelementptr { i8*, void (i8*, i8*)* }* %fn, i32 0, i32 1
- %.funcptr = load void (i8*, i8*)** %53
- %54 = getelementptr { i8*, void (i8*, i8*)* }* %fn, i32 0, i32 0
- %.ptr = load i8** %54
- %55 = load i8** %sp
+ %53 = getelementptr { i8*, void (i8*, i8*)* }, { i8*, void (i8*, i8*)* }* %fn, i32 0, i32 1
+ %.funcptr = load void (i8*, i8*)*, void (i8*, i8*)** %53
+ %54 = getelementptr { i8*, void (i8*, i8*)* }, { i8*, void (i8*, i8*)* }* %fn, i32 0, i32 0
+ %.ptr = load i8*, i8** %54
+ %55 = load i8*, i8** %sp
call void %.funcptr(i8* %.ptr, i8* %55)
ret void
}
diff --git a/test/CodeGen/PowerPC/indexed-load.ll b/test/CodeGen/PowerPC/indexed-load.ll
index 59fc058c9414..ce386d76cf17 100644
--- a/test/CodeGen/PowerPC/indexed-load.ll
+++ b/test/CodeGen/PowerPC/indexed-load.ll
@@ -13,9 +13,9 @@ target triple = "powerpc64le-unknown-linux-gnu"
; CHECK-NOT: stwx {{[0-9]+}}, {{[0-9]+}}, 64
define void @f(%class.test* %this) {
entry:
- %Subminor.i.i = getelementptr inbounds %class.test* %this, i64 0, i32 1
+ %Subminor.i.i = getelementptr inbounds %class.test, %class.test* %this, i64 0, i32 1
%0 = bitcast [5 x i8]* %Subminor.i.i to i40*
- %bf.load2.i.i = load i40* %0, align 4
+ %bf.load2.i.i = load i40, i40* %0, align 4
%bf.clear7.i.i = and i40 %bf.load2.i.i, -8589934592
store i40 %bf.clear7.i.i, i40* %0, align 4
ret void
diff --git a/test/CodeGen/PowerPC/indirectbr.ll b/test/CodeGen/PowerPC/indirectbr.ll
index fd06fd9b7f46..d1e03ca7773a 100644
--- a/test/CodeGen/PowerPC/indirectbr.ll
+++ b/test/CodeGen/PowerPC/indirectbr.ll
@@ -10,7 +10,7 @@ define internal i32 @foo(i32 %i) nounwind {
; STATIC-LABEL: foo:
; PPC64-LABEL: foo:
entry:
- %0 = load i8** @nextaddr, align 4 ; <i8*> [#uses=2]
+ %0 = load i8*, i8** @nextaddr, align 4 ; <i8*> [#uses=2]
%1 = icmp eq i8* %0, null ; <i1> [#uses=1]
br i1 %1, label %bb3, label %bb2
@@ -37,8 +37,8 @@ bb2: ; preds = %entry, %bb3
indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
bb3: ; preds = %entry
- %2 = getelementptr inbounds [5 x i8*]* @C.0.2070, i32 0, i32 %i ; <i8**> [#uses=1]
- %gotovar.4.0.pre = load i8** %2, align 4 ; <i8*> [#uses=1]
+ %2 = getelementptr inbounds [5 x i8*], [5 x i8*]* @C.0.2070, i32 0, i32 %i ; <i8**> [#uses=1]
+ %gotovar.4.0.pre = load i8*, i8** %2, align 4 ; <i8*> [#uses=1]
br label %bb2
L5: ; preds = %bb2
diff --git a/test/CodeGen/PowerPC/inlineasm-i64-reg.ll b/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
index 4d8e704f07a0..05f2a197cd51 100644
--- a/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
+++ b/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
@@ -19,18 +19,18 @@ entry:
store %struct.BG_CoordinateMapping_t* %map, %struct.BG_CoordinateMapping_t** %map.addr, align 8
store i64* %numentries, i64** %numentries.addr, align 8
store i64 1055, i64* %r0, align 8
- %0 = load i64* %mapsize.addr, align 8
+ %0 = load i64, i64* %mapsize.addr, align 8
store i64 %0, i64* %r3, align 8
- %1 = load %struct.BG_CoordinateMapping_t** %map.addr, align 8
+ %1 = load %struct.BG_CoordinateMapping_t*, %struct.BG_CoordinateMapping_t** %map.addr, align 8
%2 = ptrtoint %struct.BG_CoordinateMapping_t* %1 to i64
store i64 %2, i64* %r4, align 8
- %3 = load i64** %numentries.addr, align 8
+ %3 = load i64*, i64** %numentries.addr, align 8
%4 = ptrtoint i64* %3 to i64
store i64 %4, i64* %r5, align 8
- %5 = load i64* %r0, align 8
- %6 = load i64* %r3, align 8
- %7 = load i64* %r4, align 8
- %8 = load i64* %r5, align 8
+ %5 = load i64, i64* %r0, align 8
+ %6 = load i64, i64* %r3, align 8
+ %7 = load i64, i64* %r4, align 8
+ %8 = load i64, i64* %r5, align 8
%9 = call { i64, i64, i64, i64 } asm sideeffect "sc", "={r0},={r3},={r4},={r5},{r0},{r3},{r4},{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{cr0},~{memory}"(i64 %5, i64 %6, i64 %7, i64 %8) #1, !srcloc !0
; CHECK-LABEL: @Kernel_RanksToCoords
@@ -52,9 +52,9 @@ entry:
store i64 %asmresult1, i64* %r3, align 8
store i64 %asmresult2, i64* %r4, align 8
store i64 %asmresult3, i64* %r5, align 8
- %10 = load i64* %r3, align 8
+ %10 = load i64, i64* %r3, align 8
store i64 %10, i64* %tmp
- %11 = load i64* %tmp
+ %11 = load i64, i64* %tmp
%conv = trunc i64 %11 to i32
ret i32 %conv
}
@@ -87,7 +87,7 @@ entry:
if.then: ; preds = %entry
call void @mtrace()
- %.pre = load i32* %argc.addr, align 4
+ %.pre = load i32, i32* %argc.addr, align 4
br label %if.end
if.end: ; preds = %if.then, %entry
diff --git a/test/CodeGen/PowerPC/isel-rc-nox0.ll b/test/CodeGen/PowerPC/isel-rc-nox0.ll
index ac99aa408bdd..7475e12c4477 100644
--- a/test/CodeGen/PowerPC/isel-rc-nox0.ll
+++ b/test/CodeGen/PowerPC/isel-rc-nox0.ll
@@ -17,12 +17,12 @@ for.cond1.preheader.i: ; preds = %for.cond1.preheader
crc32_gentab.exit: ; preds = %for.cond1.preheader.i
%tobool.i19.i.i = icmp eq i32 undef, 0
- %retval.0.i.i.i = select i1 %tobool.i19.i.i, i32* getelementptr inbounds ([1 x [9 x i32]]* @g_62, i64 0, i64 0, i64 6), i32* getelementptr inbounds ([1 x [9 x i32]]* @g_62, i64 0, i64 0, i64 8)
+ %retval.0.i.i.i = select i1 %tobool.i19.i.i, i32* getelementptr inbounds ([1 x [9 x i32]], [1 x [9 x i32]]* @g_62, i64 0, i64 0, i64 6), i32* getelementptr inbounds ([1 x [9 x i32]], [1 x [9 x i32]]* @g_62, i64 0, i64 0, i64 8)
br label %for.cond1.preheader.i2961.i
for.cond1.preheader.i2961.i: ; preds = %for.inc44.i2977.i, %crc32_gentab.exit
call void @llvm.memset.p0i8.i64(i8* bitcast ([1 x [9 x i32]]* @g_62 to i8*), i8 -1, i64 36, i32 4, i1 false) #1
- %0 = load i32* %retval.0.i.i.i, align 4
+ %0 = load i32, i32* %retval.0.i.i.i, align 4
%tobool.i2967.i = icmp eq i32 %0, 0
br label %for.body21.i2968.i
diff --git a/test/CodeGen/PowerPC/lbz-from-ld-shift.ll b/test/CodeGen/PowerPC/lbz-from-ld-shift.ll
index 3eacd6a45fb4..7696b84708d8 100644
--- a/test/CodeGen/PowerPC/lbz-from-ld-shift.ll
+++ b/test/CodeGen/PowerPC/lbz-from-ld-shift.ll
@@ -5,7 +5,7 @@ target triple = "powerpc64-unknown-linux-gnu"
; Function Attrs: nounwind readonly
define signext i32 @test(i32* nocapture readonly %P) #0 {
entry:
- %0 = load i32* %P, align 4
+ %0 = load i32, i32* %P, align 4
%shr = lshr i32 %0, 24
ret i32 %shr
diff --git a/test/CodeGen/PowerPC/lbzux.ll b/test/CodeGen/PowerPC/lbzux.ll
index f3158b32f390..4bd9cb6ab18a 100644
--- a/test/CodeGen/PowerPC/lbzux.ll
+++ b/test/CodeGen/PowerPC/lbzux.ll
@@ -4,7 +4,7 @@ target triple = "powerpc64-unknown-linux-gnu"
define fastcc void @allocateSpace(i1 %cond1, i1 %cond2) nounwind {
entry:
- %0 = load i8** undef, align 8
+ %0 = load i8*, i8** undef, align 8
br i1 undef, label %return, label %lor.lhs.false
lor.lhs.false: ; preds = %entry
@@ -18,7 +18,7 @@ if.then15: ; preds = %if.end7
while.cond: ; preds = %while.body, %if.then15
%idxprom17 = sext i32 0 to i64
- %arrayidx18 = getelementptr inbounds i8* %0, i64 %idxprom17
+ %arrayidx18 = getelementptr inbounds i8, i8* %0, i64 %idxprom17
%or = or i32 undef, undef
br i1 %cond1, label %if.end71, label %while.body
@@ -27,10 +27,10 @@ while.body: ; preds = %while.cond
if.then45: ; preds = %while.body
%idxprom48139 = zext i32 %or to i64
- %arrayidx49 = getelementptr inbounds i8* %0, i64 %idxprom48139
+ %arrayidx49 = getelementptr inbounds i8, i8* %0, i64 %idxprom48139
%1 = bitcast i8* %arrayidx49 to i16*
%2 = bitcast i8* %arrayidx18 to i16*
- %3 = load i16* %1, align 1
+ %3 = load i16, i16* %1, align 1
store i16 %3, i16* %2, align 1
br label %return
diff --git a/test/CodeGen/PowerPC/ld-st-upd.ll b/test/CodeGen/PowerPC/ld-st-upd.ll
index 24f31aca05ad..be0c94a54fe0 100644
--- a/test/CodeGen/PowerPC/ld-st-upd.ll
+++ b/test/CodeGen/PowerPC/ld-st-upd.ll
@@ -4,8 +4,8 @@ target triple = "powerpc-unknown-linux-gnu"
; Function Attrs: nounwind
define i32* @test4(i32* readonly %X, i32* nocapture %dest) #0 {
- %Y = getelementptr i32* %X, i64 4
- %A = load i32* %Y, align 4
+ %Y = getelementptr i32, i32* %X, i64 4
+ %A = load i32, i32* %Y, align 4
store i32 %A, i32* %dest, align 4
ret i32* %Y
diff --git a/test/CodeGen/PowerPC/ldtoc-inv.ll b/test/CodeGen/PowerPC/ldtoc-inv.ll
new file mode 100644
index 000000000000..07a1686cc704
--- /dev/null
+++ b/test/CodeGen/PowerPC/ldtoc-inv.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@phasor = external constant [4096 x i32]
+
+; Function Attrs: nounwind
+define void @test(i32* nocapture %out, i32 zeroext %step_size) #0 {
+entry:
+ %shl = shl i32 %step_size, 2
+ %idxprom = zext i32 %shl to i64
+ br label %for.body
+
+; Make sure that the TOC load has been hoisted out of the loop.
+; CHECK-LABEL: @test
+; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc@l
+; CHECK: %for.body
+; CHECK: blr
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = trunc i64 %indvars.iv to i32
+ %shl1 = shl i32 %0, %step_size
+ %idxprom2 = sext i32 %shl1 to i64
+ %arrayidx.sum = add nsw i64 %idxprom2, %idxprom
+ %arrayidx3 = getelementptr inbounds [4096 x i32], [4096 x i32]* @phasor, i64 0, i64 %arrayidx.sum
+ %1 = load i32, i32* %arrayidx3, align 4
+ %arrayidx5 = getelementptr inbounds i32, i32* %out, i64 %indvars.iv
+ store i32 %1, i32* %arrayidx5, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
+ %cmp = icmp slt i64 %indvars.iv.next, 1020
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/lha.ll b/test/CodeGen/PowerPC/lha.ll
index 3a100c1aae6d..c5c5f5905f67 100644
--- a/test/CodeGen/PowerPC/lha.ll
+++ b/test/CodeGen/PowerPC/lha.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=ppc32 | grep lha
define i32 @test(i16* %a) {
- %tmp.1 = load i16* %a ; <i16> [#uses=1]
+ %tmp.1 = load i16, i16* %a ; <i16> [#uses=1]
%tmp.2 = sext i16 %tmp.1 to i32 ; <i32> [#uses=1]
ret i32 %tmp.2
}
diff --git a/test/CodeGen/PowerPC/load-constant-addr.ll b/test/CodeGen/PowerPC/load-constant-addr.ll
index f1d061c1ad5a..783443008cae 100644
--- a/test/CodeGen/PowerPC/load-constant-addr.ll
+++ b/test/CodeGen/PowerPC/load-constant-addr.ll
@@ -3,7 +3,7 @@
; RUN: llc < %s -march=ppc32 | not grep ori
define float @test() {
- %tmp.i = load float* inttoptr (i32 186018016 to float*) ; <float> [#uses=1]
+ %tmp.i = load float, float* inttoptr (i32 186018016 to float*) ; <float> [#uses=1]
ret float %tmp.i
}
diff --git a/test/CodeGen/PowerPC/load-shift-combine.ll b/test/CodeGen/PowerPC/load-shift-combine.ll
index a5d1224864a6..8d1f8146db95 100644
--- a/test/CodeGen/PowerPC/load-shift-combine.ll
+++ b/test/CodeGen/PowerPC/load-shift-combine.ll
@@ -16,19 +16,19 @@
define void @test1847() nounwind {
entry:
%j = alloca i32, align 4
- %0 = load i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 8), align 8
- %1 = load i32* @fails, align 4
- %bf.load1 = load i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+ %0 = load i64, i64* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 8), align 8
+ %1 = load i32, i32* @fails, align 4
+ %bf.load1 = load i96, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847], [5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
%bf.clear2 = and i96 %bf.load1, 302231454903657293676543
%bf.set3 = or i96 %bf.clear2, -38383394772764476296921088
- store i96 %bf.set3, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
- %2 = load i32* %j, align 4
- %3 = load i32* %j, align 4
+ store i96 %bf.set3, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847], [5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+ %2 = load i32, i32* %j, align 4
+ %3 = load i32, i32* %j, align 4
%inc11 = add nsw i32 %3, 1
store i32 %inc11, i32* %j, align 4
- %bf.load15 = load i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+ %bf.load15 = load i96, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847], [5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
%bf.clear16 = and i96 %bf.load15, -18446744069414584321
%bf.set17 = or i96 %bf.clear16, 18446743532543672320
- store i96 %bf.set17, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+ store i96 %bf.set17, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847], [5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
ret void
}
diff --git a/test/CodeGen/PowerPC/long-compare.ll b/test/CodeGen/PowerPC/long-compare.ll
index 915595f6dbac..e53356a5ddf2 100644
--- a/test/CodeGen/PowerPC/long-compare.ll
+++ b/test/CodeGen/PowerPC/long-compare.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 | grep cntlzw
+; RUN: llc < %s -march=ppc32 | grep cntlz
; RUN: llc < %s -march=ppc32 | not grep xori
; RUN: llc < %s -march=ppc32 | not grep "li "
; RUN: llc < %s -march=ppc32 | not grep "mr "
diff --git a/test/CodeGen/PowerPC/loop-data-prefetch-inner.ll b/test/CodeGen/PowerPC/loop-data-prefetch-inner.ll
new file mode 100644
index 000000000000..adcc7b90bc48
--- /dev/null
+++ b/test/CodeGen/PowerPC/loop-data-prefetch-inner.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+; Function Attrs: nounwind
+define void @foo(double* %x, double* nocapture readonly %y) #0 {
+entry:
+ %scevgep = getelementptr double, double* %x, i64 1599
+ %scevgep20 = getelementptr double, double* %y, i64 1599
+ br label %vector.memcheck
+
+vector.memcheck: ; preds = %for.end, %entry
+ %j.015 = phi i32 [ 0, %entry ], [ %inc7, %for.end ]
+ %bound0 = icmp uge double* %scevgep20, %x
+ %bound1 = icmp uge double* %scevgep, %y
+ %memcheck.conflict = and i1 %bound0, %bound1
+ br i1 %memcheck.conflict, label %middle.block, label %vector.body
+
+vector.body: ; preds = %vector.memcheck, %vector.body
+ %index = phi i64 [ %index.next, %vector.body ], [ 0, %vector.memcheck ]
+ %0 = getelementptr inbounds double, double* %y, i64 %index
+ %1 = bitcast double* %0 to <4 x double>*
+ %wide.load = load <4 x double>, <4 x double>* %1, align 8
+ %2 = fadd <4 x double> %wide.load, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+ %3 = getelementptr inbounds double, double* %x, i64 %index
+ %4 = bitcast double* %3 to <4 x double>*
+ store <4 x double> %2, <4 x double>* %4, align 8
+ %index.next = add i64 %index, 4
+ %5 = icmp eq i64 %index.next, 1600
+ br i1 %5, label %middle.block, label %vector.body
+
+middle.block: ; preds = %vector.body, %vector.memcheck
+ %resume.val = phi i1 [ false, %vector.memcheck ], [ true, %vector.body ]
+ %trunc.resume.val = phi i64 [ 0, %vector.memcheck ], [ 1600, %vector.body ]
+ br i1 %resume.val, label %for.end, label %for.body3
+
+for.body3: ; preds = %middle.block, %for.body3
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ %trunc.resume.val, %middle.block ]
+ %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
+ %6 = load double, double* %arrayidx, align 8
+ %add = fadd double %6, 1.000000e+00
+ %arrayidx5 = getelementptr inbounds double, double* %x, i64 %indvars.iv
+ store double %add, double* %arrayidx5, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1600
+ br i1 %exitcond, label %for.end, label %for.body3
+
+for.end: ; preds = %middle.block, %for.body3
+ tail call void @bar(double* %x) #2
+ %inc7 = add nuw nsw i32 %j.015, 1
+ %exitcond16 = icmp eq i32 %inc7, 100
+ br i1 %exitcond16, label %for.end8, label %vector.memcheck
+
+for.end8: ; preds = %for.end
+ ret void
+
+; CHECK-LABEL: @foo
+; CHECK: dcbt
+}
+
+declare void @bar(double*) #1
+
+attributes #0 = { nounwind "target-cpu"="a2q" }
+attributes #1 = { "target-cpu"="a2q" }
+attributes #2 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/loop-data-prefetch.ll b/test/CodeGen/PowerPC/loop-data-prefetch.ll
new file mode 100644
index 000000000000..59b74e67251b
--- /dev/null
+++ b/test/CodeGen/PowerPC/loop-data-prefetch.ll
@@ -0,0 +1,29 @@
+; RUN: llc -mcpu=a2 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+; Function Attrs: nounwind
+define void @foo(double* nocapture %a, double* nocapture readonly %b) #0 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
+ %0 = load double, double* %arrayidx, align 8
+ %add = fadd double %0, 1.000000e+00
+ %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+ store double %add, double* %arrayidx2, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1600
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+
+; CHECK-LABEL: @foo
+; CHECK: dcbt
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/loop-prep-all.ll b/test/CodeGen/PowerPC/loop-prep-all.ll
new file mode 100644
index 000000000000..895daff63ad9
--- /dev/null
+++ b/test/CodeGen/PowerPC/loop-prep-all.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-bgq-linux < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BGQ
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(double* nocapture %x, double* nocapture readonly %y) #0 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
+ %0 = load double, double* %arrayidx, align 8
+ %add = fadd double %0, 1.000000e+00
+ %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
+ store double %add, double* %arrayidx2, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond19 = icmp eq i64 %indvars.iv.next, 1600
+ br i1 %exitcond19, label %for.body7, label %for.body
+
+; CHECK-LABEL: @foo
+
+; CHECK-BGQ-DAG: dcbt 4, 5
+; CHECK-DAG: lfdu [[REG1:[0-9]+]], 8({{[0-9]+}})
+; CHECK-DAG: fadd [[REG2:[0-9]+]], [[REG1]], 0
+; CHECK-DAG: stfdu [[REG2]], 8({{[0-9]+}})
+; CHECK: bdnz
+
+; CHECK: blr
+
+for.cond.cleanup6: ; preds = %for.body7
+ ret void
+
+for.body7: ; preds = %for.body, %for.body7
+ %i3.017 = phi i32 [ %inc9, %for.body7 ], [ 0, %for.body ]
+ tail call void bitcast (void (...)* @bar to void ()*)() #2
+ %inc9 = add nuw nsw i32 %i3.017, 1
+ %exitcond = icmp eq i32 %inc9, 1024
+ br i1 %exitcond, label %for.cond.cleanup6, label %for.body7
+}
+
+declare void @bar(...) #1
+
+attributes #0 = { nounwind "target-cpu"="a2q" }
+attributes #1 = { "target-cpu"="a2q" }
+attributes #2 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/lsa.ll b/test/CodeGen/PowerPC/lsa.ll
index a892a4cf4140..f4d61c014dd9 100644
--- a/test/CodeGen/PowerPC/lsa.ll
+++ b/test/CodeGen/PowerPC/lsa.ll
@@ -13,13 +13,13 @@ entry:
call void @llvm.lifetime.start(i64 32800, i8* %1) #0
%2 = bitcast [8200 x i32]* %q to i8*
call void @llvm.lifetime.start(i64 32800, i8* %2) #0
- %arraydecay = getelementptr inbounds [8200 x i32]* %q, i64 0, i64 0
- %arraydecay1 = getelementptr inbounds [8200 x i32]* %v, i64 0, i64 0
- %arraydecay2 = getelementptr inbounds [8200 x i32]* %w, i64 0, i64 0
+ %arraydecay = getelementptr inbounds [8200 x i32], [8200 x i32]* %q, i64 0, i64 0
+ %arraydecay1 = getelementptr inbounds [8200 x i32], [8200 x i32]* %v, i64 0, i64 0
+ %arraydecay2 = getelementptr inbounds [8200 x i32], [8200 x i32]* %w, i64 0, i64 0
call void @bar(i32* %arraydecay, i32* %arraydecay1, i32* %arraydecay2) #0
- %3 = load i32* %arraydecay2, align 4
- %arrayidx3 = getelementptr inbounds [8200 x i32]* %w, i64 0, i64 1
- %4 = load i32* %arrayidx3, align 4
+ %3 = load i32, i32* %arraydecay2, align 4
+ %arrayidx3 = getelementptr inbounds [8200 x i32], [8200 x i32]* %w, i64 0, i64 1
+ %4 = load i32, i32* %arrayidx3, align 4
; CHECK: @foo
; CHECK-NOT: lwzx
diff --git a/test/CodeGen/PowerPC/lsr-postinc-pos.ll b/test/CodeGen/PowerPC/lsr-postinc-pos.ll
index 42472c58fe8b..7831df154606 100644
--- a/test/CodeGen/PowerPC/lsr-postinc-pos.ll
+++ b/test/CodeGen/PowerPC/lsr-postinc-pos.ll
@@ -3,8 +3,8 @@
; The icmp is a post-inc use, and the increment is in %bb11, but the
; scevgep needs to be inserted in %bb so that it is dominated by %t.
-; CHECK: %t = load i8** undef
-; CHECK: %scevgep = getelementptr i8* %t, i32 %lsr.iv.next
+; CHECK: %t = load i8*, i8** undef
+; CHECK: %scevgep = getelementptr i8, i8* %t, i32 %lsr.iv.next
; CHECK: %c1 = icmp ult i8* %scevgep, undef
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
@@ -21,8 +21,8 @@ bb11:
br i1 %c0, label %bb13, label %bb
bb:
- %t = load i8** undef, align 16 ; <i8*> [#uses=1]
- %p = getelementptr i8* %t, i32 %ii ; <i8*> [#uses=1]
+ %t = load i8*, i8** undef, align 16 ; <i8*> [#uses=1]
+ %p = getelementptr i8, i8* %t, i32 %ii ; <i8*> [#uses=1]
%c1 = icmp ult i8* %p, undef ; <i1> [#uses=1]
%i.next = add i32 %i, 1 ; <i32> [#uses=1]
br i1 %c1, label %bb11, label %bb13
diff --git a/test/CodeGen/PowerPC/mask64.ll b/test/CodeGen/PowerPC/mask64.ll
index 139621af1f22..600cecd3fe7b 100644
--- a/test/CodeGen/PowerPC/mask64.ll
+++ b/test/CodeGen/PowerPC/mask64.ll
@@ -9,8 +9,8 @@ entry:
br i1 false, label %bb16, label %bb49
bb16: ; preds = %entry
- %tmp19 = load i8** null, align 1 ; <i8*> [#uses=1]
- %tmp21 = load i8* %tmp19, align 1 ; <i8> [#uses=1]
+ %tmp19 = load i8*, i8** null, align 1 ; <i8*> [#uses=1]
+ %tmp21 = load i8, i8* %tmp19, align 1 ; <i8> [#uses=1]
switch i8 %tmp21, label %bb49 [
i8 0, label %bb45
i8 1, label %bb34
diff --git a/test/CodeGen/PowerPC/mature-mc-support.ll b/test/CodeGen/PowerPC/mature-mc-support.ll
index 7c83e184a6f8..aa387f6e2666 100644
--- a/test/CodeGen/PowerPC/mature-mc-support.ll
+++ b/test/CodeGen/PowerPC/mature-mc-support.ll
@@ -10,6 +10,10 @@
; RUN: not llc -march=ppc32 -filetype=obj < %s > /dev/null 2> %t2
; RUN: FileCheck %s < %t2
+; Test that we don't try to produce COFF for ppc.
+; RUN: not llc -mtriple=powerpc-mingw32 -filetype=obj < %s > /dev/null 2> %t2
+; RUN: FileCheck %s < %t2
+
; SKIP: not llc -march=ppc64 < %s > /dev/null 2> %t3
; SKIP: FileCheck %s < %t3
diff --git a/test/CodeGen/PowerPC/mcm-1.ll b/test/CodeGen/PowerPC/mcm-1.ll
index 4e31550c40d4..7ef4a8e957e6 100644
--- a/test/CodeGen/PowerPC/mcm-1.ll
+++ b/test/CodeGen/PowerPC/mcm-1.ll
@@ -11,7 +11,7 @@ target triple = "powerpc64-unknown-linux-gnu"
define signext i32 @test_external() nounwind {
entry:
- %0 = load i32* @ei, align 4
+ %0 = load i32, i32* @ei, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* @ei, align 4
ret i32 %0
diff --git a/test/CodeGen/PowerPC/mcm-10.ll b/test/CodeGen/PowerPC/mcm-10.ll
index 9565ebc780bf..affa249e32b5 100644
--- a/test/CodeGen/PowerPC/mcm-10.ll
+++ b/test/CodeGen/PowerPC/mcm-10.ll
@@ -10,7 +10,7 @@ target triple = "powerpc64-unknown-linux-gnu"
define signext i32 @test_fn_static() nounwind {
entry:
- %0 = load i32* @test_fn_static.si, align 4
+ %0 = load i32, i32* @test_fn_static.si, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* @test_fn_static.si, align 4
ret i32 %0
diff --git a/test/CodeGen/PowerPC/mcm-11.ll b/test/CodeGen/PowerPC/mcm-11.ll
index 033045c74c8a..457c60afb57e 100644
--- a/test/CodeGen/PowerPC/mcm-11.ll
+++ b/test/CodeGen/PowerPC/mcm-11.ll
@@ -10,7 +10,7 @@ target triple = "powerpc64-unknown-linux-gnu"
define signext i32 @test_file_static() nounwind {
entry:
- %0 = load i32* @gi, align 4
+ %0 = load i32, i32* @gi, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* @gi, align 4
ret i32 %0
diff --git a/test/CodeGen/PowerPC/mcm-2.ll b/test/CodeGen/PowerPC/mcm-2.ll
index 811600ecdbf6..03ce1c65851e 100644
--- a/test/CodeGen/PowerPC/mcm-2.ll
+++ b/test/CodeGen/PowerPC/mcm-2.ll
@@ -11,7 +11,7 @@ target triple = "powerpc64-unknown-linux-gnu"
define signext i32 @test_fn_static() nounwind {
entry:
- %0 = load i32* @test_fn_static.si, align 4
+ %0 = load i32, i32* @test_fn_static.si, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* @test_fn_static.si, align 4
ret i32 %0
diff --git a/test/CodeGen/PowerPC/mcm-3.ll b/test/CodeGen/PowerPC/mcm-3.ll
index b6d681d580ad..40188bcb5a0b 100644
--- a/test/CodeGen/PowerPC/mcm-3.ll
+++ b/test/CodeGen/PowerPC/mcm-3.ll
@@ -11,7 +11,7 @@ target triple = "powerpc64-unknown-linux-gnu"
define signext i32 @test_file_static() nounwind {
entry:
- %0 = load i32* @gi, align 4
+ %0 = load i32, i32* @gi, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* @gi, align 4
ret i32 %0
diff --git a/test/CodeGen/PowerPC/mcm-5.ll b/test/CodeGen/PowerPC/mcm-5.ll
index 92ddecaeb8c8..19adbe5b7d93 100644
--- a/test/CodeGen/PowerPC/mcm-5.ll
+++ b/test/CodeGen/PowerPC/mcm-5.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
-; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -code-model=large <%s | FileCheck %s
; Test correct code generation for medium and large code model
; for loading the address of a jump table from the TOC.
@@ -11,7 +11,7 @@ define signext i32 @test_jump_table(i32 signext %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
- %0 = load i32* %i.addr, align 4
+ %0 = load i32, i32* %i.addr, align 4
switch i32 %0, label %sw.default [
i32 3, label %sw.bb
i32 4, label %sw.bb1
@@ -23,31 +23,31 @@ sw.default: ; preds = %entry
br label %sw.epilog
sw.bb: ; preds = %entry
- %1 = load i32* %i.addr, align 4
+ %1 = load i32, i32* %i.addr, align 4
%mul = mul nsw i32 %1, 7
store i32 %mul, i32* %i.addr, align 4
br label %sw.bb1
sw.bb1: ; preds = %entry, %sw.bb
- %2 = load i32* %i.addr, align 4
+ %2 = load i32, i32* %i.addr, align 4
%dec = add nsw i32 %2, -1
store i32 %dec, i32* %i.addr, align 4
br label %sw.bb2
sw.bb2: ; preds = %entry, %sw.bb1
- %3 = load i32* %i.addr, align 4
+ %3 = load i32, i32* %i.addr, align 4
%add = add nsw i32 %3, 3
store i32 %add, i32* %i.addr, align 4
br label %sw.bb3
sw.bb3: ; preds = %entry, %sw.bb2
- %4 = load i32* %i.addr, align 4
+ %4 = load i32, i32* %i.addr, align 4
%shl = shl i32 %4, 1
store i32 %shl, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb3, %sw.default
- %5 = load i32* %i.addr, align 4
+ %5 = load i32, i32* %i.addr, align 4
ret i32 %5
}
diff --git a/test/CodeGen/PowerPC/mcm-6.ll b/test/CodeGen/PowerPC/mcm-6.ll
index f7838b4b2527..6a50d1264109 100644
--- a/test/CodeGen/PowerPC/mcm-6.ll
+++ b/test/CodeGen/PowerPC/mcm-6.ll
@@ -11,7 +11,7 @@ target triple = "powerpc64-unknown-linux-gnu"
define signext i32 @test_tentative() nounwind {
entry:
- %0 = load i32* @ti, align 4
+ %0 = load i32, i32* @ti, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* @ti, align 4
ret i32 %0
diff --git a/test/CodeGen/PowerPC/mcm-7.ll b/test/CodeGen/PowerPC/mcm-7.ll
index 7caa13bcdcf8..9c8158df77b3 100644
--- a/test/CodeGen/PowerPC/mcm-7.ll
+++ b/test/CodeGen/PowerPC/mcm-7.ll
@@ -11,7 +11,7 @@ define i8* @test_fnaddr() nounwind {
entry:
%func = alloca i32 (i32)*, align 8
store i32 (i32)* @foo, i32 (i32)** %func, align 8
- %0 = load i32 (i32)** %func, align 8
+ %0 = load i32 (i32)*, i32 (i32)** %func, align 8
%1 = bitcast i32 (i32)* %0 to i8*
ret i8* %1
}
diff --git a/test/CodeGen/PowerPC/mcm-8.ll b/test/CodeGen/PowerPC/mcm-8.ll
index 643548f6b125..b265c77e2d31 100644
--- a/test/CodeGen/PowerPC/mcm-8.ll
+++ b/test/CodeGen/PowerPC/mcm-8.ll
@@ -11,8 +11,8 @@ target triple = "powerpc64-unknown-linux-gnu"
define signext i8 @test_avext() nounwind {
entry:
- %0 = getelementptr inbounds [13 x i8]* @x, i32 0, i32 0
- %1 = load i8* %0, align 1
+ %0 = getelementptr inbounds [13 x i8], [13 x i8]* @x, i32 0, i32 0
+ %1 = load i8, i8* %0, align 1
ret i8 %1
}
diff --git a/test/CodeGen/PowerPC/mcm-9.ll b/test/CodeGen/PowerPC/mcm-9.ll
index 7906b6abea6a..45a4e699a3db 100644
--- a/test/CodeGen/PowerPC/mcm-9.ll
+++ b/test/CodeGen/PowerPC/mcm-9.ll
@@ -11,7 +11,7 @@ target triple = "powerpc64-unknown-linux-gnu"
define signext i32 @test_external() nounwind {
entry:
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* @a, align 4
ret i32 %0
diff --git a/test/CodeGen/PowerPC/mcm-default.ll b/test/CodeGen/PowerPC/mcm-default.ll
index 8d4ff14118f3..49e6513a7c13 100644
--- a/test/CodeGen/PowerPC/mcm-default.ll
+++ b/test/CodeGen/PowerPC/mcm-default.ll
@@ -10,7 +10,7 @@ target triple = "powerpc64-unknown-linux-gnu"
define signext i32 @test_external() nounwind {
entry:
- %0 = load i32* @ei, align 4
+ %0 = load i32, i32* @ei, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* @ei, align 4
ret i32 %0
diff --git a/test/CodeGen/PowerPC/mcm-obj-2.ll b/test/CodeGen/PowerPC/mcm-obj-2.ll
index c42cf0c36ea8..36c58560ebff 100644
--- a/test/CodeGen/PowerPC/mcm-obj-2.ll
+++ b/test/CodeGen/PowerPC/mcm-obj-2.ll
@@ -10,7 +10,7 @@ target triple = "powerpc64-unknown-linux-gnu"
define signext i32 @test_fn_static() nounwind {
entry:
- %0 = load i32* @test_fn_static.si, align 4
+ %0 = load i32, i32* @test_fn_static.si, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* @test_fn_static.si, align 4
ret i32 %0
@@ -20,7 +20,7 @@ entry:
; accessing function-scoped variable si.
;
; CHECK: Relocations [
-; CHECK: Section (2) .rela.text {
+; CHECK: Section {{.*}} .rela.text {
; CHECK: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM2:[^ ]+]]
; CHECK: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM2]]
; CHECK: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM2]]
@@ -29,7 +29,7 @@ entry:
define signext i32 @test_file_static() nounwind {
entry:
- %0 = load i32* @gi, align 4
+ %0 = load i32, i32* @gi, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* @gi, align 4
ret i32 %0
diff --git a/test/CodeGen/PowerPC/mcm-obj.ll b/test/CodeGen/PowerPC/mcm-obj.ll
index d3d05eb48d32..1ececf84926e 100644
--- a/test/CodeGen/PowerPC/mcm-obj.ll
+++ b/test/CodeGen/PowerPC/mcm-obj.ll
@@ -3,6 +3,12 @@
; RUN: llc -O0 -mcpu=pwr7 -code-model=large -filetype=obj -fast-isel=false %s -o - | \
; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE %s
+; Run jump table test separately since jump tables aren't generated at -O0.
+; RUN: llc -mcpu=pwr7 -code-model=medium -filetype=obj -fast-isel=false %s -o - | \
+; RUN: llvm-readobj -r | FileCheck -check-prefix=MEDIUM-JT %s
+; RUN: llc -mcpu=pwr7 -code-model=large -filetype=obj -fast-isel=false %s -o - | \
+; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE-JT %s
+
; FIXME: When asm-parse is available, could make this an assembly test.
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
@@ -12,7 +18,7 @@ target triple = "powerpc64-unknown-linux-gnu"
define signext i32 @test_external() nounwind {
entry:
- %0 = load i32* @ei, align 4
+ %0 = load i32, i32* @ei, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* @ei, align 4
ret i32 %0
@@ -22,12 +28,12 @@ entry:
; accessing external variable ei.
;
; MEDIUM: Relocations [
-; MEDIUM: Section (2) .rela.text {
+; MEDIUM: Section {{.*}} .rela.text {
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM1:[^ ]+]]
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM1]]
;
; LARGE: Relocations [
-; LARGE: Section (2) .rela.text {
+; LARGE: Section {{.*}} .rela.text {
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM1:[^ ]+]]
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM1]]
@@ -35,7 +41,7 @@ entry:
define signext i32 @test_fn_static() nounwind {
entry:
- %0 = load i32* @test_fn_static.si, align 4
+ %0 = load i32, i32* @test_fn_static.si, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* @test_fn_static.si, align 4
ret i32 %0
@@ -57,7 +63,7 @@ entry:
define signext i32 @test_file_static() nounwind {
entry:
- %0 = load i32* @gi, align 4
+ %0 = load i32, i32* @gi, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* @gi, align 4
ret i32 %0
@@ -92,11 +98,51 @@ entry:
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM4:[^ ]+]]
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM4]]
+@ti = common global i32 0, align 4
+
+define signext i32 @test_tentative() nounwind {
+entry:
+ %0 = load i32, i32* @ti, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* @ti, align 4
+ ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing tentatively declared variable ti.
+;
+; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
+; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
+;
+; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
+; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
+
+define i8* @test_fnaddr() nounwind {
+entry:
+ %func = alloca i32 (i32)*, align 8
+ store i32 (i32)* @foo, i32 (i32)** %func, align 8
+ %0 = load i32 (i32)*, i32 (i32)** %func, align 8
+ %1 = bitcast i32 (i32)* %0 to i8*
+ ret i8* %1
+}
+
+declare signext i32 @foo(i32 signext)
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing function address foo.
+;
+; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
+; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
+;
+; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
+; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
+
+
define signext i32 @test_jump_table(i32 signext %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
- %0 = load i32* %i.addr, align 4
+ %0 = load i32, i32* %i.addr, align 4
switch i32 %0, label %sw.default [
i32 3, label %sw.bb
i32 4, label %sw.bb1
@@ -108,78 +154,43 @@ sw.default: ; preds = %entry
br label %sw.epilog
sw.bb: ; preds = %entry
- %1 = load i32* %i.addr, align 4
+ %1 = load i32, i32* %i.addr, align 4
%mul = mul nsw i32 %1, 7
store i32 %mul, i32* %i.addr, align 4
br label %sw.bb1
sw.bb1: ; preds = %entry, %sw.bb
- %2 = load i32* %i.addr, align 4
+ %2 = load i32, i32* %i.addr, align 4
%dec = add nsw i32 %2, -1
store i32 %dec, i32* %i.addr, align 4
br label %sw.bb2
sw.bb2: ; preds = %entry, %sw.bb1
- %3 = load i32* %i.addr, align 4
+ %3 = load i32, i32* %i.addr, align 4
%add = add nsw i32 %3, 3
store i32 %add, i32* %i.addr, align 4
br label %sw.bb3
sw.bb3: ; preds = %entry, %sw.bb2
- %4 = load i32* %i.addr, align 4
+ %4 = load i32, i32* %i.addr, align 4
%shl = shl i32 %4, 1
store i32 %shl, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb3, %sw.default
- %5 = load i32* %i.addr, align 4
+ %5 = load i32, i32* %i.addr, align 4
ret i32 %5
}
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
; accessing a jump table address.
;
-; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM5:[^ ]+]]
-; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM5]]
-;
-; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM5:[^ ]+]]
-; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM5]]
-
-@ti = common global i32 0, align 4
-
-define signext i32 @test_tentative() nounwind {
-entry:
- %0 = load i32* @ti, align 4
- %inc = add nsw i32 %0, 1
- store i32 %inc, i32* @ti, align 4
- ret i32 %0
-}
-
-; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
-; accessing tentatively declared variable ti.
-;
-; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
-; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
-;
-; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
-; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
-
-define i8* @test_fnaddr() nounwind {
-entry:
- %func = alloca i32 (i32)*, align 8
- store i32 (i32)* @foo, i32 (i32)** %func, align 8
- %0 = load i32 (i32)** %func, align 8
- %1 = bitcast i32 (i32)* %0 to i8*
- ret i8* %1
-}
-
-declare signext i32 @foo(i32 signext)
-
-; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
-; accessing function address foo.
-;
-; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
-; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
+; MEDIUM-JT: Relocations [
+; MEDIUM-JT: Section ({{.*}}) .rela.text {
+; MEDIUM-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM:[^ ]+]]
+; MEDIUM-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM]]
;
-; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
-; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
+; LARGE-JT: Relocations [
+; LARGE-JT: Section ({{.*}}) .rela.text {
+; LARGE-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM:[^ ]+]]
+; LARGE-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM]]
diff --git a/test/CodeGen/PowerPC/mem-rr-addr-mode.ll b/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
index 5661ef9768d1..60a4bdba2296 100644
--- a/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
+++ b/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
@@ -5,12 +5,12 @@
; This shares the 16 between the two loads.
define void @func(<4 x float>* %a, <4 x float>* %b) {
- %tmp1 = getelementptr <4 x float>* %b, i32 1 ; <<4 x float>*> [#uses=1]
- %tmp = load <4 x float>* %tmp1 ; <<4 x float>> [#uses=1]
- %tmp3 = getelementptr <4 x float>* %a, i32 1 ; <<4 x float>*> [#uses=1]
- %tmp4 = load <4 x float>* %tmp3 ; <<4 x float>> [#uses=1]
+ %tmp1 = getelementptr <4 x float>, <4 x float>* %b, i32 1 ; <<4 x float>*> [#uses=1]
+ %tmp = load <4 x float>, <4 x float>* %tmp1 ; <<4 x float>> [#uses=1]
+ %tmp3 = getelementptr <4 x float>, <4 x float>* %a, i32 1 ; <<4 x float>*> [#uses=1]
+ %tmp4 = load <4 x float>, <4 x float>* %tmp3 ; <<4 x float>> [#uses=1]
%tmp5 = fmul <4 x float> %tmp, %tmp4 ; <<4 x float>> [#uses=1]
- %tmp8 = load <4 x float>* %b ; <<4 x float>> [#uses=1]
+ %tmp8 = load <4 x float>, <4 x float>* %b ; <<4 x float>> [#uses=1]
%tmp9 = fadd <4 x float> %tmp5, %tmp8 ; <<4 x float>> [#uses=1]
store <4 x float> %tmp9, <4 x float>* %a
ret void
diff --git a/test/CodeGen/PowerPC/mem_update.ll b/test/CodeGen/PowerPC/mem_update.ll
index fcf53da67fc2..2fa01402579a 100644
--- a/test/CodeGen/PowerPC/mem_update.ll
+++ b/test/CodeGen/PowerPC/mem_update.ll
@@ -6,58 +6,58 @@
@Glob = global i64 4
define i32* @test0(i32* %X, i32* %dest) nounwind {
- %Y = getelementptr i32* %X, i32 4
- %A = load i32* %Y
+ %Y = getelementptr i32, i32* %X, i32 4
+ %A = load i32, i32* %Y
store i32 %A, i32* %dest
ret i32* %Y
}
define i32* @test1(i32* %X, i32* %dest) nounwind {
- %Y = getelementptr i32* %X, i32 4
- %A = load i32* %Y
+ %Y = getelementptr i32, i32* %X, i32 4
+ %A = load i32, i32* %Y
store i32 %A, i32* %dest
ret i32* %Y
}
define i16* @test2(i16* %X, i32* %dest) nounwind {
- %Y = getelementptr i16* %X, i32 4
- %A = load i16* %Y
+ %Y = getelementptr i16, i16* %X, i32 4
+ %A = load i16, i16* %Y
%B = sext i16 %A to i32
store i32 %B, i32* %dest
ret i16* %Y
}
define i16* @test3(i16* %X, i32* %dest) nounwind {
- %Y = getelementptr i16* %X, i32 4
- %A = load i16* %Y
+ %Y = getelementptr i16, i16* %X, i32 4
+ %A = load i16, i16* %Y
%B = zext i16 %A to i32
store i32 %B, i32* %dest
ret i16* %Y
}
define i16* @test3a(i16* %X, i64* %dest) nounwind {
- %Y = getelementptr i16* %X, i32 4
- %A = load i16* %Y
+ %Y = getelementptr i16, i16* %X, i32 4
+ %A = load i16, i16* %Y
%B = sext i16 %A to i64
store i64 %B, i64* %dest
ret i16* %Y
}
define i64* @test4(i64* %X, i64* %dest) nounwind {
- %Y = getelementptr i64* %X, i32 4
- %A = load i64* %Y
+ %Y = getelementptr i64, i64* %X, i32 4
+ %A = load i64, i64* %Y
store i64 %A, i64* %dest
ret i64* %Y
}
define i16* @test5(i16* %X) nounwind {
- %Y = getelementptr i16* %X, i32 4
+ %Y = getelementptr i16, i16* %X, i32 4
store i16 7, i16* %Y
ret i16* %Y
}
define i64* @test6(i64* %X, i64 %A) nounwind {
- %Y = getelementptr i64* %X, i32 4
+ %Y = getelementptr i64, i64* %X, i32 4
store i64 %A, i64* %Y
ret i64* %Y
}
diff --git a/test/CodeGen/PowerPC/memcpy-vec.ll b/test/CodeGen/PowerPC/memcpy-vec.ll
new file mode 100644
index 000000000000..70b8ea931a27
--- /dev/null
+++ b/test/CodeGen/PowerPC/memcpy-vec.ll
@@ -0,0 +1,110 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s -check-prefix=PWR7
+; RUN: llc -mcpu=pwr8 < %s | FileCheck %s -check-prefix=PWR8
+; RUN: llc -mcpu=a2q < %s | FileCheck %s -check-prefix=A2Q
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo1(double* nocapture %x, double* nocapture readonly %y) #0 {
+entry:
+ %0 = bitcast double* %x to i8*
+ %1 = bitcast double* %y to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 32, i32 8, i1 false)
+ ret void
+
+; PWR7-LABEL: @foo1
+; PWR7-NOT: bl memcpy
+; PWR7: ld {{[0-9]+}}, {{[0-9]+}}(4)
+; PWR7: std {{[0-9]+}}, {{[0-9]+}}(3)
+; PWR7: blr
+
+; PWR8-LABEL: @foo1
+; PWR8: lxvw4x
+; PWR8: stxvw4x
+; PWR8: blr
+
+; A2Q-LABEL: @foo1
+; A2Q-NOT: bl memcpy
+; A2Q: ld {{[0-9]+}}, {{[0-9]+}}(4)
+; A2Q: std {{[0-9]+}}, {{[0-9]+}}(3)
+; A2Q: blr
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0
+
+; Function Attrs: nounwind
+define void @foo2(double* nocapture %x, double* nocapture readonly %y) #0 {
+entry:
+ %0 = bitcast double* %x to i8*
+ %1 = bitcast double* %y to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 128, i32 8, i1 false)
+ ret void
+
+; PWR7-LABEL: @foo2
+; PWR7: bl memcpy
+; PWR7: blr
+
+; PWR8-LABEL: @foo2
+; PWR8: lxvw4x
+; PWR8: stxvw4x
+; PWR8: blr
+
+; A2Q-LABEL: @foo2
+; A2Q-NOT: bl memcpy
+; A2Q: ld {{[0-9]+}}, {{[0-9]+}}(4)
+; A2Q: std {{[0-9]+}}, {{[0-9]+}}(3)
+; A2Q: blr
+}
+
+; Function Attrs: nounwind
+define void @bar1(double* nocapture %x) #0 {
+entry:
+ %0 = bitcast double* %x to i8*
+ tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 128, i32 8, i1 false)
+ ret void
+
+; PWR7-LABEL: @bar1
+; PWR7-NOT: bl memset
+; PWR7: stxvw4x
+; PWR7: blr
+
+; PWR8-LABEL: @bar1
+; PWR8-NOT: bl memset
+; PWR8: stxvw4x
+; PWR8: blr
+
+; A2Q-LABEL: @bar1
+; A2Q-NOT: bl memset
+; A2Q: std {{[0-9]+}}, {{[0-9]+}}(3)
+; A2Q: blr
+}
+
+; Function Attrs: nounwind
+define void @bar2(double* nocapture %x) #0 {
+entry:
+ %0 = bitcast double* %x to i8*
+ tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 128, i32 32, i1 false)
+ ret void
+
+; PWR7-LABEL: @bar2
+; PWR7-NOT: bl memset
+; PWR7: stxvw4x
+; PWR7: blr
+
+; PWR8-LABEL: @bar2
+; PWR8-NOT: bl memset
+; PWR8: stxvw4x
+; PWR8: blr
+
+; A2Q-LABEL: @bar2
+; A2Q-NOT: bl memset
+; A2Q: qvstfdx
+; A2Q: blr
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #0
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/memset-nc-le.ll b/test/CodeGen/PowerPC/memset-nc-le.ll
new file mode 100644
index 000000000000..af8e9c3fb4fd
--- /dev/null
+++ b/test/CodeGen/PowerPC/memset-nc-le.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le"
+
+; Function Attrs: nounwind
+define void @test_vsx() unnamed_addr #0 align 2 {
+entry:
+ %0 = load i32, i32* undef, align 4
+ %1 = trunc i32 %0 to i8
+ call void @llvm.memset.p0i8.i64(i8* null, i8 %1, i64 32, i32 1, i1 false)
+ ret void
+
+; CHECK-LABEL: @test_vsx
+; CHECK: stxvd2x
+; CHECK: stxvd2x
+; CHECK: blr
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
+
+attributes #0 = { nounwind "target-cpu"="pwr8" }
+attributes #1 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/memset-nc.ll b/test/CodeGen/PowerPC/memset-nc.ll
new file mode 100644
index 000000000000..414a987a56fe
--- /dev/null
+++ b/test/CodeGen/PowerPC/memset-nc.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s | FileCheck %s
+; RUN: llc -O0 < %s | FileCheck %s -check-prefix=CHECK-O0
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+; Function Attrs: nounwind
+define void @test_qpx() unnamed_addr #0 align 2 {
+entry:
+ %0 = load i32, i32* undef, align 4
+ %1 = trunc i32 %0 to i8
+ call void @llvm.memset.p0i8.i64(i8* null, i8 %1, i64 64, i32 32, i1 false)
+ ret void
+
+; CHECK-LABEL: @test_qpx
+; CHECK: qvstfdx
+; CHECK: qvstfdx
+; CHECK: blr
+
+; CHECK-O0-LABEL: @test_qpx
+; CHECK-O0-NOT: qvstfdx
+; CHECK-O0: blr
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
+
+; Function Attrs: nounwind
+define void @test_vsx() unnamed_addr #2 align 2 {
+entry:
+ %0 = load i32, i32* undef, align 4
+ %1 = trunc i32 %0 to i8
+ call void @llvm.memset.p0i8.i64(i8* null, i8 %1, i64 32, i32 1, i1 false)
+ ret void
+
+; CHECK-LABEL: @test_vsx
+; CHECK: stxvw4x
+; CHECK: stxvw4x
+; CHECK: blr
+
+; CHECK-O0-LABEL: @test_vsx
+; CHECK-O0-NOT: stxvw4x
+; CHECK-O0: blr
+}
+
+attributes #0 = { nounwind "target-cpu"="a2q" }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind "target-cpu"="pwr7" }
+
diff --git a/test/CodeGen/PowerPC/misched-inorder-latency.ll b/test/CodeGen/PowerPC/misched-inorder-latency.ll
index b259ff182c0c..0f57e90abebd 100644
--- a/test/CodeGen/PowerPC/misched-inorder-latency.ll
+++ b/test/CodeGen/PowerPC/misched-inorder-latency.ll
@@ -15,13 +15,13 @@ target triple = "powerpc64-bgq-linux"
define i32 @testload(i32 *%ptr, i32 %sumin) {
entry:
%sum1 = add i32 %sumin, 1
- %val1 = load i32* %ptr
+ %val1 = load i32, i32* %ptr
%p = icmp eq i32 %sumin, 0
br i1 %p, label %true, label %end
true:
%sum2 = add i32 %sum1, 1
- %ptr2 = getelementptr i32* %ptr, i32 1
- %val = load i32* %ptr2
+ %ptr2 = getelementptr i32, i32* %ptr, i32 1
+ %val = load i32, i32* %ptr2
%val2 = add i32 %val1, %val
br label %end
end:
diff --git a/test/CodeGen/PowerPC/misched.ll b/test/CodeGen/PowerPC/misched.ll
index d6fb3b30464f..1c868b3f171c 100644
--- a/test/CodeGen/PowerPC/misched.ll
+++ b/test/CodeGen/PowerPC/misched.ll
@@ -18,7 +18,7 @@ for.body24.i: ; preds = %for.body24.i, %for.
br i1 undef, label %for.body24.i58, label %for.body24.i
for.body24.i58: ; preds = %for.body24.i58, %for.body24.i
- %arrayidx26.i55.1 = getelementptr inbounds [16000 x double]* @b, i64 0, i64 undef
+ %arrayidx26.i55.1 = getelementptr inbounds [16000 x double], [16000 x double]* @b, i64 0, i64 undef
store double 1.000000e+00, double* %arrayidx26.i55.1, align 8
br i1 undef, label %for.body24.i64, label %for.body24.i58
diff --git a/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll b/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll
index 743cc62ddba7..36aecf1806bd 100644
--- a/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll
+++ b/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll
@@ -33,10 +33,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -48,10 +48,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -63,7 +63,7 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
ret void
@@ -120,10 +120,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
@@ -137,15 +137,15 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
store i32 %2, i32* %out0, align 4
- %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %3, i32* %out0, align 4
%4 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
store i32 %4, i32* %out0, align 4
@@ -158,14 +158,14 @@ define void @single_p() nounwind {
entry:
%out0 = alloca i32, align 4
store i32 0, i32* %out0, align 4
- %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %0, i32* %out0, align 4
ret void
}
define void @multi_m() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
ret void
}
@@ -190,10 +190,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -205,10 +205,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -220,7 +220,7 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
ret void
@@ -277,10 +277,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
@@ -294,15 +294,15 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
store i32 %2, i32* %out0, align 4
- %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %3, i32* %out0, align 4
%4 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
store i32 %4, i32* %out0, align 4
@@ -315,7 +315,7 @@ define void @multi_p() nounwind {
entry:
%out0 = alloca i32, align 4
store i32 0, i32* %out0, align 4
- %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %0, i32* %out0, align 4
ret void
}
diff --git a/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll b/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll
index 29a57867f567..ab7e414ead55 100644
--- a/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll
+++ b/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll
@@ -33,10 +33,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -48,10 +48,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -63,7 +63,7 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
ret void
@@ -120,10 +120,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
@@ -137,15 +137,15 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
store i32 %2, i32* %out0, align 4
- %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %3, i32* %out0, align 4
%4 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
store i32 %4, i32* %out0, align 4
@@ -158,14 +158,14 @@ define void @single_p() nounwind {
entry:
%out0 = alloca i32, align 4
store i32 0, i32* %out0, align 4
- %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %0, i32* %out0, align 4
ret void
}
define void @multi_m() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
ret void
}
@@ -190,10 +190,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -205,10 +205,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -220,7 +220,7 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
ret void
@@ -277,10 +277,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
@@ -294,15 +294,15 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
store i32 %2, i32* %out0, align 4
- %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %3, i32* %out0, align 4
%4 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
store i32 %4, i32* %out0, align 4
@@ -315,7 +315,7 @@ define void @multi_p() nounwind {
entry:
%out0 = alloca i32, align 4
store i32 0, i32* %out0, align 4
- %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %0, i32* %out0, align 4
ret void
}
diff --git a/test/CodeGen/PowerPC/named-reg-alloc-r2-64.ll b/test/CodeGen/PowerPC/named-reg-alloc-r2-64.ll
index d4ed05b9e50a..834df8b4d91c 100644
--- a/test/CodeGen/PowerPC/named-reg-alloc-r2-64.ll
+++ b/test/CodeGen/PowerPC/named-reg-alloc-r2-64.ll
@@ -1,15 +1,12 @@
-; RUN: not llc < %s -mtriple=powerpc64-apple-darwin 2>&1 | FileCheck %s --check-prefix=CHECK-DARWIN
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu 2>&1 | FileCheck %s
+; RUN: not llc < %s -mtriple=powerpc64-apple-darwin 2>&1 | FileCheck %s
+; RUN: not llc < %s -mtriple=powerpc64-unknown-linux-gnu 2>&1 | FileCheck %s
define i64 @get_reg() nounwind {
entry:
; FIXME: Include an allocatable-specific error message
-; CHECK-DARWIN: Invalid register name global variable
+; CHECK: Invalid register name global variable
%reg = call i64 @llvm.read_register.i64(metadata !0)
ret i64 %reg
-
-; CHECK-LABEL: @get_reg
-; CHECK: mr 3, 2
}
declare i64 @llvm.read_register.i64(metadata) nounwind
diff --git a/test/CodeGen/PowerPC/named-reg-alloc-r2.ll b/test/CodeGen/PowerPC/named-reg-alloc-r2.ll
index 262d034e16bd..45d9816793c4 100644
--- a/test/CodeGen/PowerPC/named-reg-alloc-r2.ll
+++ b/test/CodeGen/PowerPC/named-reg-alloc-r2.ll
@@ -1,11 +1,11 @@
-; RUN: not llc < %s -mtriple=powerpc-apple-darwin 2>&1 | FileCheck %s --check-prefix=CHECK-DARWIN
+; RUN: not llc < %s -mtriple=powerpc-apple-darwin 2>&1 | FileCheck %s --check-prefix=CHECK-NOTPPC32
; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu 2>&1 | FileCheck %s
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu 2>&1 | FileCheck %s
+; RUN: not llc < %s -mtriple=powerpc64-unknown-linux-gnu 2>&1 | FileCheck %s --check-prefix=CHECK-NOTPPC32
define i32 @get_reg() nounwind {
entry:
; FIXME: Include an allocatable-specific error message
-; CHECK-DARWIN: Invalid register name global variable
+; CHECK-NOTPPC32: Invalid register name global variable
%reg = call i32 @llvm.read_register.i32(metadata !0)
ret i32 %reg
diff --git a/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll b/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll
index 6beee253a2ec..26739bf958b5 100644
--- a/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll
+++ b/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll
@@ -5,7 +5,7 @@ target triple = "powerpc64-unknown-linux-gnu"
; Function Attrs: nounwind readonly
define double @test1(i64* nocapture readonly %x) #0 {
entry:
- %0 = load i64* %x, align 8
+ %0 = load i64, i64* %x, align 8
%conv = sitofp i64 %0 to double
ret double %conv
@@ -18,7 +18,7 @@ entry:
; Function Attrs: nounwind readonly
define double @test2(i32* nocapture readonly %x) #0 {
entry:
- %0 = load i32* %x, align 4
+ %0 = load i32, i32* %x, align 4
%conv = sitofp i32 %0 to double
ret double %conv
diff --git a/test/CodeGen/PowerPC/no-pref-jumps.ll b/test/CodeGen/PowerPC/no-pref-jumps.ll
new file mode 100644
index 000000000000..d9490f16e8f9
--- /dev/null
+++ b/test/CodeGen/PowerPC/no-pref-jumps.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(i32 signext %a, i32 signext %b) #0 {
+entry:
+ %cmp = icmp sgt i32 %a, 5
+ %cmp1 = icmp slt i32 %b, 3
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.else
+
+; CHECK-LABEL: @foo
+; CHECK: cmpwi
+; CHECK: cmpwi
+; CHECK: cror
+; CHECK: blr
+
+if.then: ; preds = %entry
+ tail call void bitcast (void (...)* @bar to void ()*)() #0
+ br label %if.end
+
+if.else: ; preds = %entry
+ tail call void bitcast (void (...)* @car to void ()*)() #0
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ ret void
+}
+
+declare void @bar(...)
+
+declare void @car(...)
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/novrsave.ll b/test/CodeGen/PowerPC/novrsave.ll
index a70576a291e9..50be2a162ed8 100644
--- a/test/CodeGen/PowerPC/novrsave.ll
+++ b/test/CodeGen/PowerPC/novrsave.ll
@@ -7,7 +7,7 @@ define <4 x float> @bar(<4 x float> %v) nounwind {
entry:
%v.addr = alloca <4 x float>, align 16
store <4 x float> %v, <4 x float>* %v.addr, align 16
- %0 = load <4 x float>* %v.addr, align 16
+ %0 = load <4 x float>, <4 x float>* %v.addr, align 16
ret <4 x float> %0
}
diff --git a/test/CodeGen/PowerPC/optnone-crbits-i1-ret.ll b/test/CodeGen/PowerPC/optnone-crbits-i1-ret.ll
new file mode 100644
index 000000000000..745a038d6ce0
--- /dev/null
+++ b/test/CodeGen/PowerPC/optnone-crbits-i1-ret.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+declare zeroext i1 @ri1()
+declare void @se1()
+declare void @se2()
+
+define void @test() #0 {
+entry:
+ %b = call zeroext i1 @ri1()
+ br label %next
+
+; CHECK-LABEL: @test
+; CHECK: bl ri1
+; CHECK-NEXT: nop
+; CHECK: andi. 3, 3, 1
+
+next:
+ br i1 %b, label %case1, label %case2
+
+case1:
+ call void @se1()
+ br label %end
+
+case2:
+ call void @se2()
+ br label %end
+
+end:
+ ret void
+
+; CHECK: blr
+}
+
+attributes #0 = { noinline optnone }
+
diff --git a/test/CodeGen/PowerPC/or-addressing-mode.ll b/test/CodeGen/PowerPC/or-addressing-mode.ll
index e50374e30696..f98b34cfdb6a 100644
--- a/test/CodeGen/PowerPC/or-addressing-mode.ll
+++ b/test/CodeGen/PowerPC/or-addressing-mode.ll
@@ -8,7 +8,7 @@ define i32 @test1(i8* %P) {
%tmp.11.i = and i32 %tmp.10.i, 2040 ; <i32> [#uses=1]
%tmp.13.i = or i32 %tmp.11.i, %tmp.4.i ; <i32> [#uses=1]
%tmp.14.i = inttoptr i32 %tmp.13.i to i32* ; <i32*> [#uses=1]
- %tmp.3 = load i32* %tmp.14.i ; <i32> [#uses=1]
+ %tmp.3 = load i32, i32* %tmp.14.i ; <i32> [#uses=1]
ret i32 %tmp.3
}
@@ -16,7 +16,7 @@ define i32 @test2(i32 %P) {
%tmp.2 = shl i32 %P, 4 ; <i32> [#uses=1]
%tmp.3 = or i32 %tmp.2, 2 ; <i32> [#uses=1]
%tmp.4 = inttoptr i32 %tmp.3 to i32* ; <i32*> [#uses=1]
- %tmp.5 = load i32* %tmp.4 ; <i32> [#uses=1]
+ %tmp.5 = load i32, i32* %tmp.4 ; <i32> [#uses=1]
ret i32 %tmp.5
}
diff --git a/test/CodeGen/PowerPC/p8-isel-sched.ll b/test/CodeGen/PowerPC/p8-isel-sched.ll
new file mode 100644
index 000000000000..034fe3c5fcbf
--- /dev/null
+++ b/test/CodeGen/PowerPC/p8-isel-sched.ll
@@ -0,0 +1,33 @@
+; RUN: llc -mcpu=pwr8 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(i32* nocapture %r1, i32* nocapture %r2, i32* nocapture %r3, i32* nocapture %r4, i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d) #0 {
+entry:
+ %tobool = icmp ne i32 %a, 0
+ %cond = select i1 %tobool, i32 %b, i32 %c
+ store i32 %cond, i32* %r1, align 4
+ %cond5 = select i1 %tobool, i32 %b, i32 %d
+ store i32 %cond5, i32* %r2, align 4
+ %add = add nsw i32 %b, 1
+ %sub = add nsw i32 %d, -2
+ %cond10 = select i1 %tobool, i32 %add, i32 %sub
+ store i32 %cond10, i32* %r3, align 4
+ %add13 = add nsw i32 %b, 3
+ %sub15 = add nsw i32 %d, -5
+ %cond17 = select i1 %tobool, i32 %add13, i32 %sub15
+ store i32 %cond17, i32* %r4, align 4
+ ret void
+}
+
+; Make sure that we don't schedule all of the isels together, they should be
+; intermixed with the adds because each isel starts a new dispatch group.
+; CHECK-LABEL: @foo
+; CHECK: isel
+; CHECK: addi
+; CHECK: isel
+; CHECK: blr
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/pip-inner.ll b/test/CodeGen/PowerPC/pip-inner.ll
new file mode 100644
index 000000000000..930f0d371472
--- /dev/null
+++ b/test/CodeGen/PowerPC/pip-inner.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(double* %x, double* nocapture readonly %y) #0 {
+entry:
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.end, %entry
+ %i.015 = phi i32 [ 0, %entry ], [ %inc7, %for.end ]
+ br label %for.body3
+
+for.body3: ; preds = %for.body3, %for.cond1.preheader
+ %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
+ %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
+ %0 = load double, double* %arrayidx, align 8
+ %add = fadd double %0, 1.000000e+00
+ %arrayidx5 = getelementptr inbounds double, double* %x, i64 %indvars.iv
+ store double %add, double* %arrayidx5, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 16000
+ br i1 %exitcond, label %for.end, label %for.body3
+
+for.end: ; preds = %for.body3
+ tail call void @bar(double* %x) #2
+ %inc7 = add nuw nsw i32 %i.015, 1
+ %exitcond16 = icmp eq i32 %inc7, 1000
+ br i1 %exitcond16, label %for.end8, label %for.cond1.preheader
+
+for.end8: ; preds = %for.end
+ ret void
+
+; CHECK-LABEL: @foo
+
+; CHECK: lfdu [[REG1:[0-9]+]], 8({{[0-9]+}})
+; CHECK: fadd [[REG2:[0-9]+]], [[REG1]], {{[0-9]+}}
+; CHECK: stfdu [[REG2]], 8({{[0-9]+}})
+; CHECK: bdnz
+
+; CHECK: bl bar
+; CHECK-NEXT: nop
+
+; CHECK: blr
+}
+
+declare void @bar(double*) #1
+
+attributes #0 = { nounwind "target-cpu"="a2" }
+attributes #1 = { "target-cpu"="a2" }
+attributes #2 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/post-ra-ec.ll b/test/CodeGen/PowerPC/post-ra-ec.ll
index 9c61677650ba..e32441be90fd 100644
--- a/test/CodeGen/PowerPC/post-ra-ec.ll
+++ b/test/CodeGen/PowerPC/post-ra-ec.ll
@@ -16,12 +16,12 @@ entry:
br i1 undef, label %if.end, label %if.then
if.then: ; preds = %entry
- %0 = load i64* undef, align 8
+ %0 = load i64, i64* undef, align 8
%conv.i = trunc i64 %0 to i32
- %1 = load i32* null, align 4
+ %1 = load i32, i32* null, align 4
%add = add i32 %1, %conv.i
store i32 %add, i32* null, align 4
- %counter.i.i = getelementptr inbounds %struct.task_struct.4.16.124* %call1.i, i64 0, i32 1, i32 0
+ %counter.i.i = getelementptr inbounds %struct.task_struct.4.16.124, %struct.task_struct.4.16.124* %call1.i, i64 0, i32 1, i32 0
%2 = tail call i32 asm sideeffect "\09lwsync\0A1:\09lwarx\09$0,0,$1\09\09# atomic_dec_return\0A\09addic\09$0,$0,-1\0A\09stwcx.\09$0,0,$1\0A\09bne-\091b\0A\09sync\0A", "=&r,r,~{cr0},~{xer},~{memory}"(i32* %counter.i.i) #0
%cmp.i = icmp eq i32 %2, 0
br i1 %cmp.i, label %if.then.i, label %if.end
diff --git a/test/CodeGen/PowerPC/ppc-crbits-onoff.ll b/test/CodeGen/PowerPC/ppc-crbits-onoff.ll
new file mode 100644
index 000000000000..88648df5fa36
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc-crbits-onoff.ll
@@ -0,0 +1,43 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define signext i32 @crbitsoff(i32 signext %v1, i32 signext %v2) #0 {
+entry:
+ %tobool = icmp ne i32 %v1, 0
+ %lnot = icmp eq i32 %v2, 0
+ %and3 = and i1 %tobool, %lnot
+ %and = zext i1 %and3 to i32
+ ret i32 %and
+
+; CHECK-LABEL: @crbitsoff
+; CHECK-DAG: cmplwi {{[0-9]+}}, 3, 0
+; CHECK-DAG: li [[REG2:[0-9]+]], 1
+; CHECK-DAG: cntlzw [[REG3:[0-9]+]],
+; CHECK: isel 3, 0, [[REG2]]
+; CHECK: and 3, 3, [[REG3]]
+; CHECK: blr
+}
+
+define signext i32 @crbitson(i32 signext %v1, i32 signext %v2) #1 {
+entry:
+ %tobool = icmp ne i32 %v1, 0
+ %lnot = icmp eq i32 %v2, 0
+ %and3 = and i1 %tobool, %lnot
+ %and = zext i1 %and3 to i32
+ ret i32 %and
+
+; CHECK-LABEL: @crbitson
+; CHECK-DAG: cmpwi {{[0-9]+}}, 3, 0
+; CHECK-DAG: cmpwi {{[0-9]+}}, 4, 0
+; CHECK-DAG: li [[REG2:[0-9]+]], 1
+; CHECK-DAG: crorc [[REG3:[0-9]+]],
+; CHECK: isel 3, 0, [[REG2]], [[REG3]]
+; CHECK: blr
+}
+
+
+attributes #0 = { nounwind readnone "target-features"="-crbits" }
+attributes #1 = { nounwind readnone }
+
diff --git a/test/CodeGen/PowerPC/ppc-empty-fs.ll b/test/CodeGen/PowerPC/ppc-empty-fs.ll
new file mode 100644
index 000000000000..8c0a2fb1dc06
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc-empty-fs.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s | FileCheck %s
+; This guarantees that we add the default set of features to the current feature
+; string. We won't successfully legalize the types here without +64bit being
+; silently added.
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.fab = type { float, float }
+
+; Function Attrs: nounwind
+define void @func_fab(%struct.fab* noalias sret %agg.result, i64 %x.coerce) #0 {
+entry:
+ %x = alloca %struct.fab, align 8
+ %0 = bitcast %struct.fab* %x to i64*
+ store i64 %x.coerce, i64* %0, align 1
+ %1 = bitcast %struct.fab* %agg.result to i8*
+ %2 = bitcast %struct.fab* %x to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 8, i32 4, i1 false)
+ ret void
+}
+
+; CHECK: func_fab
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "target-features"="" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 233227) (llvm/trunk 233226)"}
diff --git a/test/CodeGen/PowerPC/ppc-prologue.ll b/test/CodeGen/PowerPC/ppc-prologue.ll
index 553837121a36..c84e6fbd4b60 100644
--- a/test/CodeGen/PowerPC/ppc-prologue.ll
+++ b/test/CodeGen/PowerPC/ppc-prologue.ll
@@ -14,12 +14,12 @@ entry:
store i32 %a, i32* %a_addr
%1 = call i32 @_Z3barPi(i32* %a_addr) ; <i32> [#uses=1]
store i32 %1, i32* %0, align 4
- %2 = load i32* %0, align 4 ; <i32> [#uses=1]
+ %2 = load i32, i32* %0, align 4 ; <i32> [#uses=1]
store i32 %2, i32* %retval, align 4
br label %return
return: ; preds = %entry
- %retval1 = load i32* %retval ; <i32> [#uses=1]
+ %retval1 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %retval1
}
diff --git a/test/CodeGen/PowerPC/ppc32-cyclecounter.ll b/test/CodeGen/PowerPC/ppc32-cyclecounter.ll
index 9e2cd0b12880..ea50a1bdbdc1 100644
--- a/test/CodeGen/PowerPC/ppc32-cyclecounter.ll
+++ b/test/CodeGen/PowerPC/ppc32-cyclecounter.ll
@@ -13,8 +13,8 @@ entry:
; CHECK: mfspr 3, 269
; CHECK: mfspr 4, 268
; CHECK: mfspr [[REG:[0-9]+]], 269
-; CHECK: cmpw [[CR:[0-9]+]], 3, [[REG]]
-; CHECK: bne [[CR]], .LBB
+; CHECK: cmpw 3, [[REG]]
+; CHECK: bne 0, .LBB
declare i64 @llvm.readcyclecounter()
diff --git a/test/CodeGen/PowerPC/ppc32-i1-vaarg.ll b/test/CodeGen/PowerPC/ppc32-i1-vaarg.ll
index 6e0aec27b7c1..ad8ed38da7fa 100644
--- a/test/CodeGen/PowerPC/ppc32-i1-vaarg.ll
+++ b/test/CodeGen/PowerPC/ppc32-i1-vaarg.ll
@@ -5,7 +5,7 @@ target triple = "powerpc-unknown-linux-gnu"
declare void @printf(i8*, ...)
define void @main() {
- call void (i8*, ...)* @printf(i8* undef, i1 false)
+ call void (i8*, ...) @printf(i8* undef, i1 false)
ret void
}
diff --git a/test/CodeGen/PowerPC/ppc32-lshrti3.ll b/test/CodeGen/PowerPC/ppc32-lshrti3.ll
index f773cce81be3..a2a280f5168e 100644
--- a/test/CodeGen/PowerPC/ppc32-lshrti3.ll
+++ b/test/CodeGen/PowerPC/ppc32-lshrti3.ll
@@ -9,7 +9,7 @@ target triple = "powerpc--netbsd"
; Function Attrs: nounwind uwtable
define i32 @fn1() #0 {
entry:
- %.promoted = load i72* inttoptr (i32 1 to i72*), align 4
+ %.promoted = load i72, i72* inttoptr (i32 1 to i72*), align 4
br label %while.cond
while.cond: ; preds = %while.cond, %entry
diff --git a/test/CodeGen/PowerPC/ppc32-pic-large.ll b/test/CodeGen/PowerPC/ppc32-pic-large.ll
index bb906ec78d11..6a069451a410 100644
--- a/test/CodeGen/PowerPC/ppc32-pic-large.ll
+++ b/test/CodeGen/PowerPC/ppc32-pic-large.ll
@@ -5,8 +5,8 @@ declare i32 @call_foo(i32, ...)
define i32 @foo() {
entry:
- %0 = load i32* @bar, align 4
- %call = call i32 (i32, ...)* @call_foo(i32 %0, i32 0, i32 1, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64)
+ %0 = load i32, i32* @bar, align 4
+ %call = call i32 (i32, ...) @call_foo(i32 %0, i32 0, i32 1, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64)
ret i32 %0
}
diff --git a/test/CodeGen/PowerPC/ppc32-pic.ll b/test/CodeGen/PowerPC/ppc32-pic.ll
index abc136757177..63f521cbea38 100644
--- a/test/CodeGen/PowerPC/ppc32-pic.ll
+++ b/test/CodeGen/PowerPC/ppc32-pic.ll
@@ -5,8 +5,8 @@ declare i32 @call_foo(i32, ...)
define i32 @foo() {
entry:
- %0 = load i32* @bar, align 4
- %call = call i32 (i32, ...)* @call_foo(i32 %0, i32 0, i32 1, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64)
+ %0 = load i32, i32* @bar, align 4
+ %call = call i32 (i32, ...) @call_foo(i32 %0, i32 0, i32 1, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64)
ret i32 0
}
diff --git a/test/CodeGen/PowerPC/ppc440-fp-basic.ll b/test/CodeGen/PowerPC/ppc440-fp-basic.ll
index 77b726c5ae38..95773570d33c 100644
--- a/test/CodeGen/PowerPC/ppc440-fp-basic.ll
+++ b/test/CodeGen/PowerPC/ppc440-fp-basic.ll
@@ -4,28 +4,28 @@
define void @maybe_an_fma(%0* sret %agg.result, %0* byval %a, %0* byval %b, %0* byval %c) nounwind {
entry:
- %a.realp = getelementptr inbounds %0* %a, i32 0, i32 0
- %a.real = load double* %a.realp
- %a.imagp = getelementptr inbounds %0* %a, i32 0, i32 1
- %a.imag = load double* %a.imagp
- %b.realp = getelementptr inbounds %0* %b, i32 0, i32 0
- %b.real = load double* %b.realp
- %b.imagp = getelementptr inbounds %0* %b, i32 0, i32 1
- %b.imag = load double* %b.imagp
+ %a.realp = getelementptr inbounds %0, %0* %a, i32 0, i32 0
+ %a.real = load double, double* %a.realp
+ %a.imagp = getelementptr inbounds %0, %0* %a, i32 0, i32 1
+ %a.imag = load double, double* %a.imagp
+ %b.realp = getelementptr inbounds %0, %0* %b, i32 0, i32 0
+ %b.real = load double, double* %b.realp
+ %b.imagp = getelementptr inbounds %0, %0* %b, i32 0, i32 1
+ %b.imag = load double, double* %b.imagp
%mul.rl = fmul double %a.real, %b.real
%mul.rr = fmul double %a.imag, %b.imag
%mul.r = fsub double %mul.rl, %mul.rr
%mul.il = fmul double %a.imag, %b.real
%mul.ir = fmul double %a.real, %b.imag
%mul.i = fadd double %mul.il, %mul.ir
- %c.realp = getelementptr inbounds %0* %c, i32 0, i32 0
- %c.real = load double* %c.realp
- %c.imagp = getelementptr inbounds %0* %c, i32 0, i32 1
- %c.imag = load double* %c.imagp
+ %c.realp = getelementptr inbounds %0, %0* %c, i32 0, i32 0
+ %c.real = load double, double* %c.realp
+ %c.imagp = getelementptr inbounds %0, %0* %c, i32 0, i32 1
+ %c.imag = load double, double* %c.imagp
%add.r = fadd double %mul.r, %c.real
%add.i = fadd double %mul.i, %c.imag
- %real = getelementptr inbounds %0* %agg.result, i32 0, i32 0
- %imag = getelementptr inbounds %0* %agg.result, i32 0, i32 1
+ %real = getelementptr inbounds %0, %0* %agg.result, i32 0, i32 0
+ %imag = getelementptr inbounds %0, %0* %agg.result, i32 0, i32 1
store double %add.r, double* %real
store double %add.i, double* %imag
ret void
diff --git a/test/CodeGen/PowerPC/ppc64-abi-extend.ll b/test/CodeGen/PowerPC/ppc64-abi-extend.ll
index 8baf1c613e78..f8e72e3a108e 100644
--- a/test/CodeGen/PowerPC/ppc64-abi-extend.ll
+++ b/test/CodeGen/PowerPC/ppc64-abi-extend.ll
@@ -15,7 +15,7 @@ declare zeroext i32 @ret_ui()
define void @pass_arg_si() nounwind {
entry:
- %0 = load i32* @si, align 4
+ %0 = load i32, i32* @si, align 4
tail call void @arg_si(i32 signext %0) nounwind
ret void
}
@@ -25,7 +25,7 @@ entry:
define void @pass_arg_ui() nounwind {
entry:
- %0 = load i32* @ui, align 4
+ %0 = load i32, i32* @ui, align 4
tail call void @arg_ui(i32 zeroext %0) nounwind
ret void
}
@@ -53,7 +53,7 @@ entry:
define signext i32 @pass_ret_si() nounwind readonly {
entry:
- %0 = load i32* @si, align 4
+ %0 = load i32, i32* @si, align 4
ret i32 %0
}
; CHECK: @pass_ret_si
@@ -62,7 +62,7 @@ entry:
define zeroext i32 @pass_ret_ui() nounwind readonly {
entry:
- %0 = load i32* @ui, align 4
+ %0 = load i32, i32* @ui, align 4
ret i32 %0
}
; CHECK: @pass_ret_ui
diff --git a/test/CodeGen/PowerPC/ppc64-align-long-double.ll b/test/CodeGen/PowerPC/ppc64-align-long-double.ll
index 5ed029cc9702..b9fd6707f041 100644
--- a/test/CodeGen/PowerPC/ppc64-align-long-double.ll
+++ b/test/CodeGen/PowerPC/ppc64-align-long-double.ll
@@ -13,8 +13,8 @@ target triple = "powerpc64-unknown-linux-gnu"
define ppc_fp128 @test(%struct.S* byval %x) nounwind {
entry:
- %b = getelementptr inbounds %struct.S* %x, i32 0, i32 1
- %0 = load ppc_fp128* %b, align 16
+ %b = getelementptr inbounds %struct.S, %struct.S* %x, i32 0, i32 1
+ %0 = load ppc_fp128, ppc_fp128* %b, align 16
ret ppc_fp128 %0
}
diff --git a/test/CodeGen/PowerPC/ppc64-anyregcc-crash.ll b/test/CodeGen/PowerPC/ppc64-anyregcc-crash.ll
index 479c7a7af25f..dfa6ec058b92 100644
--- a/test/CodeGen/PowerPC/ppc64-anyregcc-crash.ll
+++ b/test/CodeGen/PowerPC/ppc64-anyregcc-crash.ll
@@ -8,7 +8,7 @@ define i64 @anyreglimit(i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i6
i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64 %v22, i64 %v23, i64 %v24,
i64 %v25, i64 %v26, i64 %v27, i64 %v28, i64 %v29, i64 %v30, i64 %v31, i64 %v32) {
entry:
- %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 32,
+ %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 32,
i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i64 %v7, i64 %v8,
i64 %v9, i64 %v10, i64 %v11, i64 %v12, i64 %v13, i64 %v14, i64 %v15, i64 %v16,
i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64 %v22, i64 %v23, i64 %v24,
diff --git a/test/CodeGen/PowerPC/ppc64-anyregcc.ll b/test/CodeGen/PowerPC/ppc64-anyregcc.ll
index 7cd3c4b38200..66f6a2c790c6 100644
--- a/test/CodeGen/PowerPC/ppc64-anyregcc.ll
+++ b/test/CodeGen/PowerPC/ppc64-anyregcc.ll
@@ -2,6 +2,31 @@
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
+; CHECK-LABEL: test:
+; CHECK: {{^}}.L[[test_BEGIN:.*]]:{{$}}
+
+; CHECK-LABEL: property_access1:
+; CHECK: {{^}}.L[[property_access1_BEGIN:.*]]:{{$}}
+
+; CHECK-LABEL: property_access2:
+; CHECK: {{^}}.L[[property_access2_BEGIN:.*]]:{{$}}
+
+; CHECK-LABEL: property_access3:
+; CHECK: {{^}}.L[[property_access3_BEGIN:.*]]:{{$}}
+
+; CHECK-LABEL: anyreg_test1:
+; CHECK: {{^}}.L[[anyreg_test1_BEGIN:.*]]:{{$}}
+
+; CHECK-LABEL: anyreg_test2:
+; CHECK: {{^}}.L[[anyreg_test2_BEGIN:.*]]:{{$}}
+
+; CHECK-LABEL: patchpoint_spilldef:
+; CHECK: {{^}}.L[[patchpoint_spilldef_BEGIN:.*]]:{{$}}
+
+; CHECK-LABEL: patchpoint_spillargs:
+; CHECK: {{^}}.L[[patchpoint_spillargs_BEGIN:.*]]:{{$}}
+
+
; Stackmap Header: no constants - 6 callsites
; CHECK-LABEL: .section .llvm_stackmaps
; CHECK-NEXT: __LLVM_StackMaps:
@@ -26,9 +51,9 @@ target triple = "powerpc64-unknown-linux-gnu"
; CHECK-NEXT: .quad property_access3
; CHECK-NEXT: .quad 128
; CHECK-NEXT: .quad anyreg_test1
-; CHECK-NEXT: .quad 160
+; CHECK-NEXT: .quad 144
; CHECK-NEXT: .quad anyreg_test2
-; CHECK-NEXT: .quad 160
+; CHECK-NEXT: .quad 144
; CHECK-NEXT: .quad patchpoint_spilldef
; CHECK-NEXT: .quad 256
; CHECK-NEXT: .quad patchpoint_spillargs
@@ -36,7 +61,7 @@ target triple = "powerpc64-unknown-linux-gnu"
; test
-; CHECK-LABEL: .long .L{{.*}}-.L.test
+; CHECK: .long .L{{.*}}-.L[[test_BEGIN]]
; CHECK-NEXT: .short 0
; 3 locations
; CHECK-NEXT: .short 3
@@ -57,12 +82,12 @@ target triple = "powerpc64-unknown-linux-gnu"
; CHECK-NEXT: .long 3
define i64 @test() nounwind ssp uwtable {
entry:
- call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 0, i32 24, i8* null, i32 2, i32 1, i32 2, i64 3)
+ call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 0, i32 24, i8* null, i32 2, i32 1, i32 2, i64 3)
ret i64 0
}
; property access 1 - %obj is an anyreg call argument and should therefore be in a register
-; CHECK-LABEL: .long .L{{.*}}-.L.property_access1
+; CHECK: .long .L{{.*}}-.L[[property_access1_BEGIN]]
; CHECK-NEXT: .short 0
; 2 locations
; CHECK-NEXT: .short 2
@@ -79,12 +104,12 @@ entry:
define i64 @property_access1(i8* %obj) nounwind ssp uwtable {
entry:
%f = inttoptr i64 281474417671919 to i8*
- %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 1, i32 24, i8* %f, i32 1, i8* %obj)
+ %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 1, i32 24, i8* %f, i32 1, i8* %obj)
ret i64 %ret
}
; property access 2 - %obj is an anyreg call argument and should therefore be in a register
-; CHECK-LABEL: .long .L{{.*}}-.L.property_access2
+; CHECK: .long .L{{.*}}-.L[[property_access2_BEGIN]]
; CHECK-NEXT: .short 0
; 2 locations
; CHECK-NEXT: .short 2
@@ -102,12 +127,12 @@ define i64 @property_access2() nounwind ssp uwtable {
entry:
%obj = alloca i64, align 8
%f = inttoptr i64 281474417671919 to i8*
- %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 24, i8* %f, i32 1, i64* %obj)
+ %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 24, i8* %f, i32 1, i64* %obj)
ret i64 %ret
}
; property access 3 - %obj is a frame index
-; CHECK-LABEL: .long .L{{.*}}-.L.property_access3
+; CHECK: .long .L{{.*}}-.L[[property_access3_BEGIN]]
; CHECK-NEXT: .short 0
; 2 locations
; CHECK-NEXT: .short 2
@@ -125,12 +150,12 @@ define i64 @property_access3() nounwind ssp uwtable {
entry:
%obj = alloca i64, align 8
%f = inttoptr i64 281474417671919 to i8*
- %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 3, i32 24, i8* %f, i32 0, i64* %obj)
+ %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 3, i32 24, i8* %f, i32 0, i64* %obj)
ret i64 %ret
}
; anyreg_test1
-; CHECK-LABEL: .long .L{{.*}}-.L.anyreg_test1
+; CHECK: .long .L{{.*}}-.L[[anyreg_test1_BEGIN]]
; CHECK-NEXT: .short 0
; 14 locations
; CHECK-NEXT: .short 14
@@ -207,12 +232,12 @@ entry:
define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
entry:
%f = inttoptr i64 281474417671919 to i8*
- %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 4, i32 24, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
+ %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 4, i32 24, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
ret i64 %ret
}
; anyreg_test2
-; CHECK-LABEL: .long .L{{.*}}-.L.anyreg_test2
+; CHECK: .long .L{{.*}}-.L[[anyreg_test2_BEGIN]]
; CHECK-NEXT: .short 0
; 14 locations
; CHECK-NEXT: .short 14
@@ -289,7 +314,7 @@ entry:
define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
entry:
%f = inttoptr i64 281474417671919 to i8*
- %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 24, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
+ %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 24, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
ret i64 %ret
}
@@ -297,7 +322,7 @@ entry:
;
; <rdar://problem/15432754> [JS] Assertion: "Folded a def to a non-store!"
;
-; CHECK-LABEL: .long .L{{.*}}-.L.patchpoint_spilldef
+; CHECK: .long .L{{.*}}-.L[[patchpoint_spilldef_BEGIN]]
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 3
; Loc 0: Register (some register that will be spilled to the stack)
@@ -317,7 +342,7 @@ entry:
; CHECK-NEXT: .long 0
define i64 @patchpoint_spilldef(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
entry:
- %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 24, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2)
+ %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 12, i32 24, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2)
tail call void asm sideeffect "nop", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17
},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() nounwind
ret i64 %result
@@ -327,7 +352,7 @@ entry:
;
; <rdar://problem/15487687> [JS] AnyRegCC argument ends up being spilled
;
-; CHECK-LABEL: .long .L{{.*}}-.L.patchpoint_spillargs
+; CHECK: .long .L{{.*}}-.L[[patchpoint_spillargs_BEGIN]]
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 5
; Loc 0: Return a register
@@ -359,7 +384,7 @@ define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
entry:
tail call void asm sideeffect "nop", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17
},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() nounwind
- %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 13, i32 24, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+ %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 13, i32 24, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
ret i64 %result
}
diff --git a/test/CodeGen/PowerPC/ppc64-byval-align.ll b/test/CodeGen/PowerPC/ppc64-byval-align.ll
index 0e73cf2b0e05..7170f5906581 100644
--- a/test/CodeGen/PowerPC/ppc64-byval-align.ll
+++ b/test/CodeGen/PowerPC/ppc64-byval-align.ll
@@ -30,8 +30,8 @@ entry:
define i64 @callee2(%struct.pad* byval nocapture readnone %x, i32 signext %y, %struct.test* byval align 16 nocapture readonly %z) {
entry:
- %x1 = getelementptr inbounds %struct.test* %z, i64 0, i32 0
- %0 = load i64* %x1, align 16
+ %x1 = getelementptr inbounds %struct.test, %struct.test* %z, i64 0, i32 0
+ %0 = load i64, i64* %x1, align 16
ret i64 %0
}
; CHECK-LABEL: @callee2
@@ -43,14 +43,16 @@ declare i64 @test2(%struct.pad* byval, i32 signext, %struct.test* byval align 16
define void @caller2(i64 %z) {
entry:
%tmp = alloca %struct.test, align 16
- %.compoundliteral.sroa.0.0..sroa_idx = getelementptr inbounds %struct.test* %tmp, i64 0, i32 0
+ %.compoundliteral.sroa.0.0..sroa_idx = getelementptr inbounds %struct.test, %struct.test* %tmp, i64 0, i32 0
store i64 %z, i64* %.compoundliteral.sroa.0.0..sroa_idx, align 16
%call = call i64 @test2(%struct.pad* byval @gp, i32 signext 0, %struct.test* byval align 16 %tmp)
ret void
}
; CHECK-LABEL: @caller2
; CHECK: std 3, [[OFF:[0-9]+]](1)
-; CHECK: ld [[REG:[0-9]+]], [[OFF]](1)
-; CHECK: std [[REG]], 128(1)
+; CHECK: addi [[REG1:[0-9]+]], 1, [[OFF]]
+; CHECK: lxvw4x [[REG2:[0-9]+]], 0, [[REG1]]
+; CHECK: li [[REG3:[0-9]+]], 128
+; CHECK: stxvw4x 0, 1, [[REG3]]
; CHECK: bl test2
diff --git a/test/CodeGen/PowerPC/ppc64-calls.ll b/test/CodeGen/PowerPC/ppc64-calls.ll
index 707ba95235f2..23a14e6687d6 100644
--- a/test/CodeGen/PowerPC/ppc64-calls.ll
+++ b/test/CodeGen/PowerPC/ppc64-calls.ll
@@ -73,7 +73,7 @@ define double @test_external(double %x) nounwind {
@g = external global void ()*
declare void @h(i64)
define void @test_indir_toc_reload(i64 %x) {
- %1 = load void ()** @g
+ %1 = load void ()*, void ()** @g
call void %1()
call void @h(i64 %x)
ret void
diff --git a/test/CodeGen/PowerPC/ppc64-elf-abi.ll b/test/CodeGen/PowerPC/ppc64-elf-abi.ll
index d82122d58ee5..53443376e4d5 100644
--- a/test/CodeGen/PowerPC/ppc64-elf-abi.ll
+++ b/test/CodeGen/PowerPC/ppc64-elf-abi.ll
@@ -1,9 +1,9 @@
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefix=CHECK-ELFv1
-; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mattr=+elfv1 < %s | FileCheck %s -check-prefix=CHECK-ELFv1
-; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mattr=+elfv2 < %s | FileCheck %s -check-prefix=CHECK-ELFv2
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -target-abi elfv1 < %s | FileCheck %s -check-prefix=CHECK-ELFv1
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -target-abi elfv2 < %s | FileCheck %s -check-prefix=CHECK-ELFv2
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefix=CHECK-ELFv2
-; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mattr=+elfv1 < %s | FileCheck %s -check-prefix=CHECK-ELFv1
-; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mattr=+elfv2 < %s | FileCheck %s -check-prefix=CHECK-ELFv2
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -target-abi elfv1 < %s | FileCheck %s -check-prefix=CHECK-ELFv1
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -target-abi elfv2 < %s | FileCheck %s -check-prefix=CHECK-ELFv2
; CHECK-ELFv2: .abiversion 2
; CHECK-ELFv1-NOT: .abiversion 2
diff --git a/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll b/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll
new file mode 100644
index 000000000000..f90519836c25
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-fastcc-fast-isel.ll
@@ -0,0 +1,56 @@
+; RUN: llc -mcpu=pwr7 -mattr=-vsx -fast-isel -fast-isel-abort=1 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define fastcc i64 @g1(i64 %g1, double %f1, i64 %g2, double %f2, i64 %g3, double %f3, i64 %g4, double %f4) #0 {
+ ret i64 %g1
+
+; CHECK-LABEL: @g1
+; CHECK-NOT: mr 3,
+; CHECK: blr
+}
+
+define fastcc i64 @g2(i64 %g1, double %f1, i64 %g2, double %f2, i64 %g3, double %f3, i64 %g4, double %f4) #0 {
+ ret i64 %g2
+
+; CHECK-LABEL: @g2
+; CHECK: mr 3, 4
+; CHECK-NEXT: blr
+}
+
+define fastcc i64 @g3(i64 %g1, double %f1, i64 %g2, double %f2, i64 %g3, double %f3, i64 %g4, double %f4) #0 {
+ ret i64 %g3
+
+; CHECK-LABEL: @g3
+; CHECK: mr 3, 5
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f2(i64 %g1, double %f1, i64 %g2, double %f2, i64 %g3, double %f3, i64 %g4, double %f4) #0 {
+ ret double %f2
+
+; CHECK-LABEL: @f2
+; CHECK: fmr 1, 2
+; CHECK-NEXT: blr
+}
+
+define void @cg2(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, i64 %v, double 0.0, i64 0, double 0.0, i64 0, double 0.0)
+ ret void
+
+; CHECK-LABEL: @cg2
+; CHECK: mr 4, 3
+; CHECK: blr
+}
+
+define void @cf2(double %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, i64 0, double %v, i64 0, double 0.0, i64 0, double 0.0)
+ ret void
+
+; CHECK-LABEL: @cf2
+; CHECK: mr 2, 1
+; CHECK: blr
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/ppc64-fastcc.ll b/test/CodeGen/PowerPC/ppc64-fastcc.ll
new file mode 100644
index 000000000000..bb1365a3b675
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-fastcc.ll
@@ -0,0 +1,540 @@
+; RUN: llc -mcpu=pwr7 -mattr=-vsx < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define fastcc i64 @g1(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g1
+
+; CHECK-LABEL: @g1
+; CHECK-NOT: mr 3,
+; CHECK: blr
+}
+
+define fastcc i64 @g2(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g2
+
+; CHECK-LABEL: @g2
+; CHECK: mr 3, 4
+; CHECK-NEXT: blr
+}
+
+define fastcc i64 @g3(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g3
+
+; CHECK-LABEL: @g3
+; CHECK: mr 3, 5
+; CHECK-NEXT: blr
+}
+
+define fastcc i64 @g4(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g4
+
+; CHECK-LABEL: @g4
+; CHECK: mr 3, 6
+; CHECK-NEXT: blr
+}
+
+define fastcc i64 @g5(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g5
+
+; CHECK-LABEL: @g5
+; CHECK: mr 3, 7
+; CHECK-NEXT: blr
+}
+
+define fastcc i64 @g6(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g6
+
+; CHECK-LABEL: @g6
+; CHECK: mr 3, 8
+; CHECK-NEXT: blr
+}
+
+define fastcc i64 @g7(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g7
+
+; CHECK-LABEL: @g7
+; CHECK: mr 3, 9
+; CHECK-NEXT: blr
+}
+
+define fastcc i64 @g8(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g8
+
+; CHECK-LABEL: @g8
+; CHECK: mr 3, 10
+; CHECK-NEXT: blr
+}
+
+define fastcc i64 @g9(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g9
+
+; CHECK-LABEL: @g9
+; CHECK: ld 3, 48(1)
+; CHECK-NEXT: blr
+}
+
+define fastcc i64 @g10(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g10
+
+; CHECK-LABEL: @g10
+; CHECK: ld 3, 56(1)
+; CHECK-NEXT: blr
+}
+
+define fastcc i64 @g11(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret i64 %g11
+
+; CHECK-LABEL: @g11
+; CHECK: ld 3, 64(1)
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f1(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f1
+
+; CHECK-LABEL: @f1
+; CHECK-NOT: fmr 1,
+; CHECK: blr
+}
+
+define fastcc double @f2(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f2
+
+; CHECK-LABEL: @f2
+; CHECK: fmr 1, 2
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f3(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f3
+
+; CHECK-LABEL: @f3
+; CHECK: fmr 1, 3
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f4(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f4
+
+; CHECK-LABEL: @f4
+; CHECK: fmr 1, 4
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f5(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f5
+
+; CHECK-LABEL: @f5
+; CHECK: fmr 1, 5
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f6(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f6
+
+; CHECK-LABEL: @f6
+; CHECK: fmr 1, 6
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f7(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f7
+
+; CHECK-LABEL: @f7
+; CHECK: fmr 1, 7
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f8(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f8
+
+; CHECK-LABEL: @f8
+; CHECK: fmr 1, 8
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f9(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f9
+
+; CHECK-LABEL: @f9
+; CHECK: fmr 1, 9
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f10(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f10
+
+; CHECK-LABEL: @f10
+; CHECK: fmr 1, 10
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f11(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f11
+
+; CHECK-LABEL: @f11
+; CHECK: fmr 1, 11
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f12(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f12
+
+; CHECK-LABEL: @f12
+; CHECK: fmr 1, 12
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f13(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f13
+
+; CHECK-LABEL: @f13
+; CHECK: fmr 1, 13
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f14(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f14
+
+; CHECK-LABEL: @f14
+; CHECK: lfd 1, 120(1)
+; CHECK-NEXT: blr
+}
+
+define fastcc double @f15(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret double %f15
+
+; CHECK-LABEL: @f15
+; CHECK: lfd 1, 152(1)
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v1(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v1
+
+; CHECK-LABEL: @v1
+; CHECK-NOT: vor 2,
+; CHECK: blr
+}
+
+define fastcc <4 x i32> @v2(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v2
+
+; CHECK-LABEL: @v2
+; CHECK: vor 2, 3, 3
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v3(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v3
+
+; CHECK-LABEL: @v3
+; CHECK: vor 2, 4, 4
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v4(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v4
+
+; CHECK-LABEL: @v4
+; CHECK: vor 2, 5, 5
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v5(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v5
+
+; CHECK-LABEL: @v5
+; CHECK: vor 2, 6, 6
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v6(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v6
+
+; CHECK-LABEL: @v6
+; CHECK: vor 2, 7, 7
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v7(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v7
+
+; CHECK-LABEL: @v7
+; CHECK: vor 2, 8, 8
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v8(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v8
+
+; CHECK-LABEL: @v8
+; CHECK: vor 2, 9, 9
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v9(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v9
+
+; CHECK-LABEL: @v9
+; CHECK: vor 2, 10, 10
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v10(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v10
+
+; CHECK-LABEL: @v10
+; CHECK: vor 2, 11, 11
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v11(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v11
+
+; CHECK-LABEL: @v11
+; CHECK: vor 2, 12, 12
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v12(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v12
+
+; CHECK-LABEL: @v12
+; CHECK: vor 2, 13, 13
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v13(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v13
+
+; CHECK-LABEL: @v13
+; CHECK: addi [[REG1:[0-9]+]], 1, 96
+; CHECK-NEXT: lvx 2, 0, [[REG1]]
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v14(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v14
+
+; CHECK-LABEL: @v14
+; CHECK: addi [[REG1:[0-9]+]], 1, 128
+; CHECK-NEXT: lvx 2, 0, [[REG1]]
+; CHECK-NEXT: blr
+}
+
+define fastcc <4 x i32> @v15(i64 %g1, double %f1, <4 x i32> %v1, i64 %g2, double %f2, <4 x i32> %v2, i64 %g3, double %f3, <4 x i32> %v3, i64 %g4, double %f4, <4 x i32> %v4, i64 %g5, double %f5, <4 x i32> %v5, i64 %g6, double %f6, <4 x i32> %v6, i64 %g7, double %f7, <4 x i32> %v7, i64 %g8, double %f8, <4 x i32> %v8, i64 %g9, double %f9, <4 x i32> %v9, i64 %g10, double %f10, <4 x i32> %v10, i64 %g11, double %f11, <4 x i32> %v11, i64 %g12, double %f12, <4 x i32> %v12, i64 %g13, double %f13, <4 x i32> %v13, i64 %g14, double %f14, <4 x i32> %v14, i64 %g15, double %f15, <4 x i32> %v15, i64 %g16, double %f16, <4 x i32> %v16) #0 {
+ ret <4 x i32> %v15
+
+; CHECK-LABEL: @v15
+; CHECK: addi [[REG1:[0-9]+]], 1, 160
+; CHECK-NEXT: lvx 2, 0, [[REG1]]
+; CHECK-NEXT: blr
+}
+
+define void @cg1(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg1
+; CHECK-NOT: {{^[ \t]*}}mr 3,
+; CHECK: blr
+}
+
+define void @cg2(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg2
+; CHECK: mr 4, 3
+; CHECK: blr
+}
+
+define void @cg3(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg3
+; CHECK: mr 5, 3
+; CHECK: blr
+}
+
+define void @cg4(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg4
+; CHECK: mr 6, 3
+; CHECK: blr
+}
+
+define void @cg5(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg5
+; CHECK: mr 7, 3
+; CHECK: blr
+}
+
+define void @cg6(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg6
+; CHECK: mr 8, 3
+; CHECK: blr
+}
+
+define void @cg7(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg7
+; CHECK: mr 9, 3
+; CHECK: blr
+}
+
+define void @cg8(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg8
+; CHECK: mr 10, 3
+; CHECK: blr
+}
+
+define void @cg9(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg9
+; CHECK: mr [[REG1:[0-9]+]], 3
+; CHECK: std [[REG1]], 48(1)
+; CHECK: blr
+}
+
+define void @cg10(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg10
+; CHECK: mr [[REG1:[0-9]+]], 3
+; CHECK: std [[REG1]], 56(1)
+; CHECK: blr
+}
+
+define void @cg11(i64 %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 %v, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cg11
+; CHECK: mr [[REG1:[0-9]+]], 3
+; CHECK: std [[REG1]], 64(1)
+; CHECK: blr
+}
+
+define void @cf1(double %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double %v, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cf1
+; CHECK-NOT: fmr 1,
+; CHECK: blr
+}
+
+define void @cf2(double %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double %v, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cf2
+; CHECK: fmr 2, 1
+; CHECK: blr
+}
+
+define void @cf3(double %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double %v, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cf3
+; CHECK: fmr 3, 1
+; CHECK: blr
+}
+
+define void @cf4(double %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double %v, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cf4
+; CHECK: fmr 4, 1
+; CHECK: blr
+}
+
+define void @cf5(double %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double %v, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cf5
+; CHECK: fmr 5, 1
+; CHECK: blr
+}
+
+define void @cf14(double %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double %v, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cf14
+; CHECK: stfd 1, 120(1)
+; CHECK: blr
+}
+
+define void @cf15(double %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double %v, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cf15
+; CHECK: stfd 1, 152(1)
+; CHECK: blr
+}
+
+define void @cv2(<4 x i32> %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> %v, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cv2
+; CHECK: vor 3, 2, 2
+; CHECK: blr
+}
+
+define void @cv3(<4 x i32> %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> %v, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cv3
+; CHECK: vor 4, 2, 2
+; CHECK: blr
+}
+
+define void @cv13(<4 x i32> %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> %v, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cv13
+; CHECK: li [[REG1:[0-9]+]], 96
+; CHECK: stvx 2, 1, [[REG1]]
+; CHECK: blr
+}
+
+define void @cv14(<4 x i32> %v) #0 {
+ tail call fastcc i64 @g1(i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> %v, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i64 0, double 0.0, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ ret void
+
+; CHECK-LABEL: @cv14
+; CHECK: li [[REG1:[0-9]+]], 128
+; CHECK: stvx 2, 1, [[REG1]]
+; CHECK: blr
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/ppc64-func-desc-hoist.ll b/test/CodeGen/PowerPC/ppc64-func-desc-hoist.ll
new file mode 100644
index 000000000000..57577f90109c
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-func-desc-hoist.ll
@@ -0,0 +1,47 @@
+; RUN: llc -mcpu=a2 < %s | FileCheck %s -check-prefix=INVFUNCDESC
+; RUN: llc -mcpu=a2 -mattr=-invariant-function-descriptors < %s | FileCheck %s -check-prefix=NONINVFUNCDESC
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @bar(void (...)* nocapture %x) #0 {
+entry:
+ %callee.knr.cast = bitcast void (...)* %x to void ()*
+ br label %for.body
+
+; INVFUNCDESC-LABEL: @bar
+; INVFUNCDESC-DAG: ld [[REG1:[0-9]+]], 8(3)
+; INVFUNCDESC-DAG: ld [[REG2:[0-9]+]], 16(3)
+; INVFUNCDESC-DAG: ld [[REG3:[0-9]+]], 0(3)
+
+; INVFUNCDESC: %for.body
+; INVFUNCDESC: std 2, 40(1)
+; INVFUNCDESC-DAG: mtctr [[REG3]]
+; INVFUNCDESC-DAG: mr 11, [[REG2]]
+; INVFUNCDESC-DAG: mr 2, [[REG1]]
+; INVFUNCDESC: bctrl
+; INVFUNCDESC-NEXT: ld 2, 40(1)
+
+; NONINVFUNCDESC-LABEL: @bar
+; NONINVFUNCDESC: %for.body
+; NONINVFUNCDESC: std 2, 40(1)
+; NONINVFUNCDESC-DAG: ld 3, 0(30)
+; NONINVFUNCDESC-DAG: ld 11, 16(30)
+; NONINVFUNCDESC-DAG: ld 2, 8(30)
+; NONINVFUNCDESC: mtctr 3
+; NONINVFUNCDESC: bctrl
+; NONINVFUNCDESC-NEXT: ld 2, 40(1)
+
+for.body: ; preds = %for.body, %entry
+ %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ tail call void %callee.knr.cast() #0
+ %inc = add nuw nsw i32 %i.02, 1
+ %exitcond = icmp eq i32 %inc, 1600000000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/ppc64-gep-opt.ll b/test/CodeGen/PowerPC/ppc64-gep-opt.ll
index 14cf9a7e8382..f238908fcaf2 100644
--- a/test/CodeGen/PowerPC/ppc64-gep-opt.ll
+++ b/test/CodeGen/PowerPC/ppc64-gep-opt.ll
@@ -14,14 +14,14 @@ target triple = "powerpc64-unknown-linux-gnu"
; Check that when two complex GEPs are used in two basic blocks, LLVM can
; elimilate the common subexpression for the second use.
define void @test_GEP_CSE([240 x %struct]* %string, i32* %adj, i32 %lib, i64 %idxprom) {
- %liberties = getelementptr [240 x %struct]* %string, i64 1, i64 %idxprom, i32 3
- %1 = load i32* %liberties, align 4
+ %liberties = getelementptr [240 x %struct], [240 x %struct]* %string, i64 1, i64 %idxprom, i32 3
+ %1 = load i32, i32* %liberties, align 4
%cmp = icmp eq i32 %1, %lib
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
- %origin = getelementptr [240 x %struct]* %string, i64 1, i64 %idxprom, i32 2
- %2 = load i32* %origin, align 4
+ %origin = getelementptr [240 x %struct], [240 x %struct]* %string, i64 1, i64 %idxprom, i32 2
+ %2 = load i32, i32* %origin, align 4
store i32 %2, i32* %adj, align 4
br label %if.end
@@ -44,11 +44,11 @@ if.end: ; preds = %if.then, %entry
; CHECK-UseAA-LABEL: @test_GEP_CSE(
; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = bitcast [240 x %struct]* %string to i8*
; CHECK-UseAA: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
-; CHECK-UseAA: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8* [[PTR0]], i64 [[IDX]]
-; CHECK-UseAA: getelementptr i8* [[PTR1]], i64 23052
+; CHECK-UseAA: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8, i8* [[PTR0]], i64 [[IDX]]
+; CHECK-UseAA: getelementptr i8, i8* [[PTR1]], i64 23052
; CHECK-UseAA: bitcast
; CHECK-UseAA: if.then:
-; CHECK-UseAA: getelementptr i8* [[PTR1]], i64 23048
+; CHECK-UseAA: getelementptr i8, i8* [[PTR1]], i64 23048
; CHECK-UseAA: bitcast
%class.my = type { i32, [128 x i32], i32, [256 x %struct.pt]}
@@ -59,10 +59,10 @@ if.end: ; preds = %if.then, %entry
; calculation and code gen can generate a better addressing mode for the second
; use.
define void @test_GEP_across_BB(%class.my* %this, i64 %idx) {
- %1 = getelementptr %class.my* %this, i64 0, i32 3, i64 %idx, i32 1
- %2 = load i32* %1, align 4
- %3 = getelementptr %class.my* %this, i64 0, i32 3, i64 %idx, i32 2
- %4 = load i32* %3, align 4
+ %1 = getelementptr %class.my, %class.my* %this, i64 0, i32 3, i64 %idx, i32 1
+ %2 = load i32, i32* %1, align 4
+ %3 = getelementptr %class.my, %class.my* %this, i64 0, i32 3, i64 %idx, i32 2
+ %4 = load i32, i32* %3, align 4
%5 = icmp eq i32 %2, %4
br i1 %5, label %if.true, label %exit
@@ -90,12 +90,12 @@ exit:
; CHECK-UseAA-LABEL: test_GEP_across_BB(
; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = getelementptr
-; CHECK-UseAA: getelementptr i8* [[PTR0]], i64 528
-; CHECK-UseAA: getelementptr i8* [[PTR0]], i64 532
+; CHECK-UseAA: getelementptr i8, i8* [[PTR0]], i64 528
+; CHECK-UseAA: getelementptr i8, i8* [[PTR0]], i64 532
; CHECK-UseAA: if.true:
-; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8* [[PTR0]], i64 532
+; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8, i8* [[PTR0]], i64 532
; CHECK-UseAA: exit:
-; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8* [[PTR0]], i64 528
+; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8, i8* [[PTR0]], i64 528
%struct.S = type { float, double }
@struct_array = global [1024 x %struct.S] zeroinitializer, align 16
@@ -109,7 +109,7 @@ define double* @test-struct_1(i32 %i) {
entry:
%add = add nsw i32 %i, 5
%idxprom = sext i32 %add to i64
- %p = getelementptr [1024 x %struct.S]* @struct_array, i64 0, i64 %idxprom, i32 1
+ %p = getelementptr [1024 x %struct.S], [1024 x %struct.S]* @struct_array, i64 0, i64 %idxprom, i32 1
ret double* %p
}
; CHECK-NoAA-LABEL: @test-struct_1(
@@ -117,7 +117,7 @@ entry:
; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, 88
; CHECK-UseAA-LABEL: @test-struct_1(
-; CHECK-UseAA: getelementptr i8* %{{[a-zA-Z0-9]+}}, i64 88
+; CHECK-UseAA: getelementptr i8, i8* %{{[a-zA-Z0-9]+}}, i64 88
%struct3 = type { i64, i32 }
%struct2 = type { %struct3, i32 }
@@ -131,7 +131,7 @@ entry:
define %struct2* @test-struct_2(%struct0* %ptr, i64 %idx) {
entry:
%arrayidx = add nsw i64 %idx, -2
- %ptr2 = getelementptr %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
+ %ptr2 = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
ret %struct2* %ptr2
}
; CHECK-NoAA-LABEL: @test-struct_2(
@@ -139,14 +139,14 @@ entry:
; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, -40
; CHECK-UseAA-LABEL: @test-struct_2(
-; CHECK-UseAA: getelementptr i8* %{{[a-zA-Z0-9]+}}, i64 -40
+; CHECK-UseAA: getelementptr i8, i8* %{{[a-zA-Z0-9]+}}, i64 -40
; Test that when a index is added from two constant, SeparateConstOffsetFromGEP
; pass does not generate incorrect result.
define void @test_const_add([3 x i32]* %in) {
%inc = add nsw i32 2, 1
%idxprom = sext i32 %inc to i64
- %arrayidx = getelementptr [3 x i32]* %in, i64 %idxprom, i64 2
+ %arrayidx = getelementptr [3 x i32], [3 x i32]* %in, i64 %idxprom, i64 2
store i32 0, i32* %arrayidx, align 4
ret void
}
diff --git a/test/CodeGen/PowerPC/ppc64-i128-abi.ll b/test/CodeGen/PowerPC/ppc64-i128-abi.ll
new file mode 100644
index 000000000000..993aec24c8fb
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-i128-abi.ll
@@ -0,0 +1,274 @@
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-BE-NOVSX
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-LE-NOVSX
+
+@x = common global <1 x i128> zeroinitializer, align 16
+@y = common global <1 x i128> zeroinitializer, align 16
+@a = common global i128 zeroinitializer, align 16
+@b = common global i128 zeroinitializer, align 16
+
+; VSX:
+; %a is passed in register 34
+; The value of 1 is stored in the TOC.
+; On LE, ensure the value of 1 is swapped before being used (using xxswapd).
+; VMX (no VSX):
+; %a is passed in register 2
+; The value of 1 is stored in the TOC.
+; No swaps are necessary when using P8 Vector instructions on LE
+define <1 x i128> @v1i128_increment_by_one(<1 x i128> %a) nounwind {
+ %tmp = add <1 x i128> %a, <i128 1>
+ ret <1 x i128> %tmp
+
+; FIXME: Seems a 128-bit literal is materialized by loading from the TOC. There
+; should be a better way of doing this.
+
+; CHECK-LE-LABEL: @v1i128_increment_by_one
+; CHECK-LE: lxvd2x [[VAL:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK-LE: xxswapd 35, [[VAL]]
+; CHECK-LE: vadduqm 2, 2, 3
+; CHECK-LE: blr
+
+; CHECK-BE-LABEL: @v1i128_increment_by_one
+; CHECK-BE: lxvd2x 35, {{[0-9]+}}, {{[0-9]+}}
+; CHECK-BE-NOT: xxswapd
+; CHECK-BE: vadduqm 2, 2, 3
+; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
+; CHECK-BE: blr
+
+; CHECK-NOVSX-LABEL: @v1i128_increment_by_one
+; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
+; CHECK-NOVSX-NOT: stxvd2x {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK-NOVSX: lvx [[VAL:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK-NOVSX-NOT: lxvd2x {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
+; CHECK-NOVSX: vadduqm 2, 2, [[VAL]]
+; CHECK-NOVSX: blr
+}
+
+; VSX:
+; %a is passed in register 34
+; %b is passed in register 35
+; No swaps are necessary when using P8 Vector instructions on LE
+; VMX (no VSX):
+; %a is passewd in register 2
+; %b is passed in register 3
+; On LE, do not need to swap contents of 2 and 3 because the lvx/stvx
+; instructions no not swap elements
+define <1 x i128> @v1i128_increment_by_val(<1 x i128> %a, <1 x i128> %b) nounwind {
+ %tmp = add <1 x i128> %a, %b
+ ret <1 x i128> %tmp
+
+; CHECK-LE-LABEL: @v1i128_increment_by_val
+; CHECK-LE-NOT: xxswapd
+; CHECK-LE: adduqm 2, 2, 3
+; CHECK-LE: blr
+
+; CHECK-BE-LABEL: @v1i128_increment_by_val
+; CHECK-BE-NOT: xxswapd {{[0-9]+}}, 34
+; CHECK-BE-NOT: xxswapd {{[0-9]+}}, 35
+; CHECK-BE-NOT: xxswapd 34, [[RESULT]]
+; CHECK-BE: adduqm 2, 2, 3
+; CHECK-BE: blr
+
+; CHECK-NOVSX-LABEL: @v1i128_increment_by_val
+; CHECK-NOVSX-NOT: xxswapd 34, [[RESULT]]
+; CHECK-NOVSX: adduqm 2, 2, 3
+; CHECK-NOVSX: blr
+}
+
+; Little Endian (VSX and VMX):
+; Lower 64-bits of %a are passed in register 3
+; Upper 64-bits of %a are passed in register 4
+; Increment lower 64-bits using addic (immediate value of 1)
+; Increment upper 64-bits using add zero extended
+; Results are placed in registers 3 and 4
+; Big Endian (VSX and VMX)
+; Lower 64-bits of %a are passed in register 4
+; Upper 64-bits of %a are passed in register 3
+; Increment lower 64-bits using addic (immediate value of 1)
+; Increment upper 64-bits using add zero extended
+; Results are placed in registers 3 and 4
+define i128 @i128_increment_by_one(i128 %a) nounwind {
+ %tmp = add i128 %a, 1
+ ret i128 %tmp
+; CHECK-LE-LABEL: @i128_increment_by_one
+; CHECK-LE: addic 3, 3, 1
+; CHECK-LE-NEXT: addze 4, 4
+; CHECK-LE: blr
+
+; CHECK-BE-LABEL: @i128_increment_by_one
+; CHECK-BE: addic 4, 4, 1
+; CHECK-BE-NEXT: addze 3, 3
+; CHECK-BE: blr
+
+; CHECK-LE-NOVSX-LABEL: @i128_increment_by_one
+; CHECK-LE-NOVSX: addic 3, 3, 1
+; CHECK-LE-NOVSX-NEXT: addze 4, 4
+; CHECK-LE-NOVSX: blr
+
+; CHECK-BE-NOVSX-LABEL: @i128_increment_by_one
+; CHECK-BE-NOVSX: addic 4, 4, 1
+; CHECK-BE-NOVSX-NEXT: addze 3, 3
+; CHECK-BE-NOVSX: blr
+}
+
+; Little Endian (VSX and VMX):
+; Lower 64-bits of %a are passed in register 3
+; Upper 64-bits of %a are passed in register 4
+; Lower 64-bits of %b are passed in register 5
+; Upper 64-bits of %b are passed in register 6
+; Add the lower 64-bits using addc on registers 3 and 5
+; Add the upper 64-bits using adde on registers 4 and 6
+; Registers 3 and 4 should hold the result
+; Big Endian (VSX and VMX):
+; Upper 64-bits of %a are passed in register 3
+; Lower 64-bits of %a are passed in register 4
+; Upper 64-bits of %b are passed in register 5
+; Lower 64-bits of %b are passed in register 6
+; Add the lower 64-bits using addc on registers 4 and 6
+; Add the upper 64-bits using adde on registers 3 and 5
+; Registers 3 and 4 should hold the result
+define i128 @i128_increment_by_val(i128 %a, i128 %b) nounwind {
+ %tmp = add i128 %a, %b
+ ret i128 %tmp
+; CHECK-LE-LABEL: @i128_increment_by_val
+; CHECK-LE: addc 3, 3, 5
+; CHECK-LE-NEXT: adde 4, 4, 6
+; CHECK-LE: blr
+
+; CHECK-BE-LABEL: @i128_increment_by_val
+; CHECK-BE: addc 4, 4, 6
+; CHECK-BE-NEXT: adde 3, 3, 5
+; CHECK-BE: blr
+
+; CHECK-LE-NOVSX-LABEL: @i128_increment_by_val
+; CHECK-LE-NOVSX: addc 3, 3, 5
+; CHECK-LE-NOVSX-NEXT: adde 4, 4, 6
+; CHECK-LE-NOVSX: blr
+
+; CHECK-BE-NOVSX-LABEL: @i128_increment_by_val
+; CHECK-BE-NOVSX: addc 4, 4, 6
+; CHECK-BE-NOVSX-NEXT: adde 3, 3, 5
+; CHECK-BE-NOVSX: blr
+}
+
+
+; Callsites for the routines defined above.
+; Ensure the parameters are loaded in the same order that is expected by the
+; callee. See comments for individual functions above for details on registers
+; used for parameters.
+define <1 x i128> @call_v1i128_increment_by_one() nounwind {
+ %tmp = load <1 x i128>, <1 x i128>* @x, align 16
+ %ret = call <1 x i128> @v1i128_increment_by_one(<1 x i128> %tmp)
+ ret <1 x i128> %ret
+
+; CHECK-LE-LABEL: @call_v1i128_increment_by_one
+; CHECK-LE: lxvd2x [[PARAM:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK-LE: xxswapd 34, [[PARAM]]
+; CHECK-LE: bl v1i128_increment_by_one
+; CHECK-LE: blr
+
+; CHECK-BE-LABEL: @call_v1i128_increment_by_one
+; CHECK-BE: lxvw4x 34, {{[0-9]+}}, {{[0-9]+}}
+; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
+; CHECK-BE: bl v1i128_increment_by_one
+; CHECK-BE: blr
+
+; CHECK-NOVSX-LABEL: @call_v1i128_increment_by_one
+; CHECK-NOVSX: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
+; CHECK-NOVSX: bl v1i128_increment_by_one
+; CHECK-NOVSX: blr
+}
+
+define <1 x i128> @call_v1i128_increment_by_val() nounwind {
+ %tmp = load <1 x i128>, <1 x i128>* @x, align 16
+ %tmp2 = load <1 x i128>, <1 x i128>* @y, align 16
+ %ret = call <1 x i128> @v1i128_increment_by_val(<1 x i128> %tmp, <1 x i128> %tmp2)
+ ret <1 x i128> %ret
+
+; CHECK-LE-LABEL: @call_v1i128_increment_by_val
+; CHECK-LE: lxvd2x [[PARAM1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK-LE: lxvd2x [[PARAM2:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
+; CHECK-LE-DAG: xxswapd 34, [[PARAM1]]
+; CHECK-LE-DAG: xxswapd 35, [[PARAM2]]
+; CHECK-LE: bl v1i128_increment_by_val
+; CHECK-LE: blr
+
+; CHECK-BE-LABEL: @call_v1i128_increment_by_val
+
+
+; CHECK-BE-DAG: lxvw4x 35, {{[0-9]+}}, {{[0-9]+}}
+; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
+; CHECK-BE-NOT: xxswapd 35, {{[0-9]+}}
+; CHECK-BE: bl v1i128_increment_by_val
+; CHECK-BE: blr
+
+; CHECK-NOVSX-LABEL: @call_v1i128_increment_by_val
+; CHECK-NOVSX-DAG: lvx 2, {{[0-9]+}}, {{[0-9]+}}
+; CHECK-NOVSX-DAG: lvx 3, {{[0-9]+}}, {{[0-9]+}}
+; CHECK-NOVSX-NOT: xxswapd 34, {{[0-9]+}}
+; CHECK-NOVSX-NOT: xxswapd 35, {{[0-9]+}}
+; CHECK-NOVSX: bl v1i128_increment_by_val
+; CHECK-NOVSX: blr
+
+}
+
+define i128 @call_i128_increment_by_one() nounwind {
+ %tmp = load i128, i128* @a, align 16
+ %ret = call i128 @i128_increment_by_one(i128 %tmp)
+ ret i128 %ret
+; %ret4 = call i128 @i128_increment_by_val(i128 %tmp2, i128 %tmp2)
+; CHECK-LE-LABEL: @call_i128_increment_by_one
+; CHECK-LE-DAG: ld 3, 0([[BASEREG:[0-9]+]])
+; CHECK-LE-DAG: ld 4, 8([[BASEREG]])
+; CHECK-LE: bl i128_increment_by_one
+; CHECK-LE: blr
+
+; CHECK-BE-LABEL: @call_i128_increment_by_one
+; CHECK-BE-DAG: ld 3, 0([[BASEREG:[0-9]+]])
+; CHECK-BE-DAG: ld 4, 8([[BASEREG]])
+; CHECK-BE: bl i128_increment_by_one
+; CHECK-BE: blr
+
+; CHECK-NOVSX-LABEL: @call_i128_increment_by_one
+; CHECK-NOVSX-DAG: ld 3, 0([[BASEREG:[0-9]+]])
+; CHECK-NOVSX-DAG: ld 4, 8([[BASEREG]])
+; CHECK-NOVSX: bl i128_increment_by_one
+; CHECK-NOVSX: blr
+}
+
+define i128 @call_i128_increment_by_val() nounwind {
+ %tmp = load i128, i128* @a, align 16
+ %tmp2 = load i128, i128* @b, align 16
+ %ret = call i128 @i128_increment_by_val(i128 %tmp, i128 %tmp2)
+ ret i128 %ret
+; CHECK-LE-LABEL: @call_i128_increment_by_val
+; CHECK-LE-DAG: ld 3, 0([[P1BASEREG:[0-9]+]])
+; CHECK-LE-DAG: ld 4, 8([[P1BASEREG]])
+; CHECK-LE-DAG: ld 5, 0([[P2BASEREG:[0-9]+]])
+; CHECK-LE-DAG: ld 6, 8([[P2BASEREG]])
+; CHECK-LE: bl i128_increment_by_val
+; CHECK-LE: blr
+
+; CHECK-BE-LABEL: @call_i128_increment_by_val
+; CHECK-BE-DAG: ld 3, 0([[P1BASEREG:[0-9]+]])
+; CHECK-BE-DAG: ld 4, 8([[P1BASEREG]])
+; CHECK-BE-DAG: ld 5, 0([[P2BASEREG:[0-9]+]])
+; CHECK-BE-DAG: ld 6, 8([[P2BASEREG]])
+; CHECK-BE: bl i128_increment_by_val
+; CHECK-BE: blr
+
+; CHECK-NOVSX-LABEL: @call_i128_increment_by_val
+; CHECK-NOVSX-DAG: ld 3, 0([[P1BASEREG:[0-9]+]])
+; CHECK-NOVSX-DAG: ld 4, 8([[P1BASEREG]])
+; CHECK-NOVSX-DAG: ld 5, 0([[P2BASEREG:[0-9]+]])
+; CHECK-NOVSX-DAG: ld 6, 8([[P2BASEREG]])
+; CHECK-NOVSX: bl i128_increment_by_val
+; CHECK-NOVSX: blr
+}
+
+
diff --git a/test/CodeGen/PowerPC/ppc64-icbt-pwr7.ll b/test/CodeGen/PowerPC/ppc64-icbt-pwr7.ll
new file mode 100644
index 000000000000..e8617ccfc8a5
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-icbt-pwr7.ll
@@ -0,0 +1,19 @@
+; Test the ICBT instruction is not emitted on POWER7
+; Based on the ppc64-prefetch.ll test
+; RUN: not llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s 2>&1 | FileCheck %s
+
+declare void @llvm.prefetch(i8*, i32, i32, i32)
+
+define void @test(i8* %a, ...) nounwind {
+entry:
+ call void @llvm.prefetch(i8* %a, i32 0, i32 3, i32 0)
+ ret void
+
+; FIXME: Crashing is not really the correct behavior here, we really should just emit nothing
+; CHECK: Cannot select: 0x{{[0-9,a-f]+}}: ch = Prefetch
+; CHECK: 0x{{[0-9,a-f]+}}: i32 = Constant<0>
+; CHECK-NEXT: 0x{{[0-9,a-f]+}}: i32 = Constant<3>
+; CHECK-NEXT: 0x{{[0-9,a-f]+}}: i32 = Constant<0>
+
+}
+
diff --git a/test/CodeGen/PowerPC/ppc64-icbt-pwr8.ll b/test/CodeGen/PowerPC/ppc64-icbt-pwr8.ll
new file mode 100644
index 000000000000..a0f084a6bf96
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-icbt-pwr8.ll
@@ -0,0 +1,16 @@
+; Test the ICBT instruction on POWER8
+; Copied from the ppc64-prefetch.ll test
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+
+declare void @llvm.prefetch(i8*, i32, i32, i32)
+
+define void @test(i8* %a, ...) nounwind {
+entry:
+ call void @llvm.prefetch(i8* %a, i32 0, i32 3, i32 0)
+ ret void
+
+; CHECK-LABEL: @test
+; CHECK: icbt
+}
+
+
diff --git a/test/CodeGen/PowerPC/ppc64-linux-func-size.ll b/test/CodeGen/PowerPC/ppc64-linux-func-size.ll
index e1d50bac51a2..fb017bc224ba 100644
--- a/test/CodeGen/PowerPC/ppc64-linux-func-size.ll
+++ b/test/CodeGen/PowerPC/ppc64-linux-func-size.ll
@@ -3,11 +3,11 @@
; CHECK: .section .opd,"aw",@progbits
; CHECK-NEXT: test1:
; CHECK-NEXT: .align 3
-; CHECK-NEXT: .quad .L.test1
+; CHECK-NEXT: .quad .L[[BEGIN:.*]]
; CHECK-NEXT: .quad .TOC.@tocbase
; CHECK-NEXT: .quad 0
; CHECK-NEXT: .text
-; CHECK-NEXT: .L.test1:
+; CHECK-NEXT: .L[[BEGIN]]:
define i32 @test1(i32 %a) nounwind {
entry:
@@ -19,4 +19,4 @@ entry:
; however, using this directive with recent binutils will result in the error:
; .size expression for XXX does not evaluate to a constant
; so we must use the label which actually tags the start of the function.
-; CHECK: .size test1, .Ltmp0-.L.test1
+; CHECK: .size test1, .Lfunc_end0-.L[[BEGIN]]
diff --git a/test/CodeGen/PowerPC/ppc64-nonfunc-calls.ll b/test/CodeGen/PowerPC/ppc64-nonfunc-calls.ll
index 9f56f0102b7c..b1d3f39e2f89 100644
--- a/test/CodeGen/PowerPC/ppc64-nonfunc-calls.ll
+++ b/test/CodeGen/PowerPC/ppc64-nonfunc-calls.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
diff --git a/test/CodeGen/PowerPC/ppc64-patchpoint.ll b/test/CodeGen/PowerPC/ppc64-patchpoint.ll
index 5e58fdab2168..67b26268a3a3 100644
--- a/test/CodeGen/PowerPC/ppc64-patchpoint.ll
+++ b/test/CodeGen/PowerPC/ppc64-patchpoint.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s | FileCheck %s
-; RUN: llc -fast-isel -fast-isel-abort < %s | FileCheck %s
-target datalayout = "E-m:e-i64:64-n32:64"
+; RUN: llc < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE
+; RUN: llc -fast-isel -fast-isel-abort=1 < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -fast-isel -fast-isel-abort=1 < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE
+
target triple = "powerpc64-unknown-linux-gnu"
; Trivial patchpoint codegen
@@ -9,26 +11,26 @@ define i64 @trivial_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
entry:
; CHECK-LABEL: trivial_patchpoint_codegen:
-; CHECK: li 11, -8531
-; CHECK-NEXT: rldic 11, 11, 32, 16
-; CHECK-NEXT: oris 11, 11, 48879
-; CHECK-NEXT: ori 11, 11, 51966
-; CHECK-NEXT: mtctr 11
+; CHECK: li 12, -8531
+; CHECK-NEXT: rldic 12, 12, 32, 16
+; CHECK-NEXT: oris 12, 12, 48879
+; CHECK-NEXT: ori 12, 12, 51966
+; CHECK-NEXT: mtctr 12
; CHECK-NEXT: bctrl
-; CHECK: li 11, -8531
-; CHECK-NEXT: rldic 11, 11, 32, 16
-; CHECK-NEXT: oris 11, 11, 48879
-; CHECK-NEXT: ori 11, 11, 51967
-; CHECK-NEXT: mtctr 11
+; CHECK: li 12, -8531
+; CHECK-NEXT: rldic 12, 12, 32, 16
+; CHECK-NEXT: oris 12, 12, 48879
+; CHECK-NEXT: ori 12, 12, 51967
+; CHECK-NEXT: mtctr 12
; CHECK-NEXT: bctrl
; CHECK: blr
%resolveCall2 = inttoptr i64 244837814094590 to i8*
- %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 24, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+ %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 24, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
%resolveCall3 = inttoptr i64 244837814094591 to i8*
- tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 3, i32 24, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
+ tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 3, i32 24, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
ret i64 %result
}
@@ -36,9 +38,11 @@ entry:
; as a leaf function.
;
; CHECK-LABEL: caller_meta_leaf
-; CHECK: stdu 1, -80(1)
+; CHECK-BE: stdu 1, -80(1)
+; CHECK-LE: stdu 1, -64(1)
; CHECK: Ltmp
-; CHECK: addi 1, 1, 80
+; CHECK-BE: addi 1, 1, 80
+; CHECK-LE: addi 1, 1, 64
; CHECK: blr
define void @caller_meta_leaf() {
@@ -47,7 +51,7 @@ entry:
store i64 11, i64* %metadata
store i64 12, i64* %metadata
store i64 13, i64* %metadata
- call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
+ call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
ret void
}
@@ -59,15 +63,15 @@ define i64 @testLowerConstant(i64 %arg, i64 %tmp2, i64 %tmp10, i64* %tmp33, i64
entry:
%tmp80 = add i64 %tmp79, -16
%tmp81 = inttoptr i64 %tmp80 to i64*
- %tmp82 = load i64* %tmp81, align 8
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 8, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
- tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 15, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
- %tmp83 = load i64* %tmp33, align 8
+ %tmp82 = load i64, i64* %tmp81, align 8
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 14, i32 8, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
+ tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 15, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
+ %tmp83 = load i64, i64* %tmp33, align 8
%tmp84 = add i64 %tmp83, -24
%tmp85 = inttoptr i64 %tmp84 to i64*
- %tmp86 = load i64* %tmp85, align 8
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 17, i32 8, i64 %arg, i64 %tmp10, i64 %tmp86)
- tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 18, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
+ %tmp86 = load i64, i64* %tmp85, align 8
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 17, i32 8, i64 %arg, i64 %tmp10, i64 %tmp86)
+ tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 18, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
ret i64 10
}
@@ -83,7 +87,7 @@ entry:
; CHECK-NEXT: nop
; CHECK-NOT: nop
; CHECK: blr
- %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* null, i32 2, i64 %p1, i64 %p2)
+ %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* null, i32 2, i64 %p1, i64 %p2)
ret void
}
diff --git a/test/CodeGen/PowerPC/ppc64-r2-alloc.ll b/test/CodeGen/PowerPC/ppc64-r2-alloc.ll
new file mode 100644
index 000000000000..87292d821294
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-r2-alloc.ll
@@ -0,0 +1,81 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define signext i32 @foo(i32 signext %a, i32 signext %d) #0 {
+entry:
+ %div = sdiv i32 %a, %d
+ %div1 = sdiv i32 %div, %d
+ %div2 = sdiv i32 %div1, %d
+ %div3 = sdiv i32 %div2, %d
+ %div4 = sdiv i32 %div3, %d
+ %div5 = sdiv i32 %div4, %d
+ %div6 = sdiv i32 %div5, %d
+ %div7 = sdiv i32 %div6, %d
+ %div8 = sdiv i32 %div7, %d
+ %div9 = sdiv i32 %div8, %d
+ %div10 = sdiv i32 %div9, %d
+ %div11 = sdiv i32 %div10, %d
+ %div12 = sdiv i32 %div11, %d
+ %div13 = sdiv i32 %div12, %d
+ %div14 = sdiv i32 %div13, %d
+ %div15 = sdiv i32 %div14, %d
+ %div16 = sdiv i32 %div15, %d
+ %div17 = sdiv i32 %div16, %d
+ %div18 = sdiv i32 %div17, %d
+ %div19 = sdiv i32 %div18, %d
+ %div20 = sdiv i32 %div19, %d
+ %div21 = sdiv i32 %div20, %d
+ %div22 = sdiv i32 %div21, %d
+ %div23 = sdiv i32 %div22, %d
+ %div24 = sdiv i32 %div23, %d
+ %div25 = sdiv i32 %div24, %d
+ %div26 = sdiv i32 %div25, %d
+ %div27 = sdiv i32 %div26, %d
+ %div28 = sdiv i32 %div27, %d
+ %div29 = sdiv i32 %div28, %d
+ %div30 = sdiv i32 %div29, %d
+ %div31 = sdiv i32 %div30, %d
+ %div32 = sdiv i32 %div31, %d
+ %div33 = sdiv i32 %div32, %div31
+ %div34 = sdiv i32 %div33, %div30
+ %div35 = sdiv i32 %div34, %div29
+ %div36 = sdiv i32 %div35, %div28
+ %div37 = sdiv i32 %div36, %div27
+ %div38 = sdiv i32 %div37, %div26
+ %div39 = sdiv i32 %div38, %div25
+ %div40 = sdiv i32 %div39, %div24
+ %div41 = sdiv i32 %div40, %div23
+ %div42 = sdiv i32 %div41, %div22
+ %div43 = sdiv i32 %div42, %div21
+ %div44 = sdiv i32 %div43, %div20
+ %div45 = sdiv i32 %div44, %div19
+ %div46 = sdiv i32 %div45, %div18
+ %div47 = sdiv i32 %div46, %div17
+ %div48 = sdiv i32 %div47, %div16
+ %div49 = sdiv i32 %div48, %div15
+ %div50 = sdiv i32 %div49, %div14
+ %div51 = sdiv i32 %div50, %div13
+ %div52 = sdiv i32 %div51, %div12
+ %div53 = sdiv i32 %div52, %div11
+ %div54 = sdiv i32 %div53, %div10
+ %div55 = sdiv i32 %div54, %div9
+ %div56 = sdiv i32 %div55, %div8
+ %div57 = sdiv i32 %div56, %div7
+ %div58 = sdiv i32 %div57, %div6
+ %div59 = sdiv i32 %div58, %div5
+ %div60 = sdiv i32 %div59, %div4
+ %div61 = sdiv i32 %div60, %div3
+ %div62 = sdiv i32 %div61, %div2
+ %div63 = sdiv i32 %div62, %div1
+ %div64 = sdiv i32 %div63, %div
+ ret i32 %div64
+}
+
+; This function will need to use all non-reserved GPRs (and then some), make
+; sure that r2 is among them.
+; CHECK-LABEL: @foo
+; CHECK: std 2,
+; CHECK: ld 2,
+; CHECK: blr
+
diff --git a/test/CodeGen/PowerPC/ppc64-smallarg.ll b/test/CodeGen/PowerPC/ppc64-smallarg.ll
index 0d5b078e217a..27aca1077cd6 100644
--- a/test/CodeGen/PowerPC/ppc64-smallarg.ll
+++ b/test/CodeGen/PowerPC/ppc64-smallarg.ll
@@ -17,7 +17,7 @@ define void @callee1(%struct.small_arg* noalias nocapture sret %agg.result, %str
entry:
%0 = bitcast %struct.small_arg* %x to i32*
%1 = bitcast %struct.small_arg* %agg.result to i32*
- %2 = load i32* %0, align 2
+ %2 = load i32, i32* %0, align 2
store i32 %2, i32* %1, align 2
ret void
}
@@ -47,7 +47,7 @@ entry:
define void @caller2() {
entry:
- %0 = load float* @gf, align 4
+ %0 = load float, float* @gf, align 4
%call = tail call float @test2(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %0)
ret void
}
diff --git a/test/CodeGen/PowerPC/ppc64-stackmap-nops.ll b/test/CodeGen/PowerPC/ppc64-stackmap-nops.ll
index 368ddc5c8335..19d65b983b0a 100644
--- a/test/CodeGen/PowerPC/ppc64-stackmap-nops.ll
+++ b/test/CodeGen/PowerPC/ppc64-stackmap-nops.ll
@@ -16,7 +16,7 @@ entry:
; CHECK: mtlr [[REG1]]
; CHECK: blr
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 0, i32 32)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 0, i32 32)
ret void
}
diff --git a/test/CodeGen/PowerPC/ppc64-stackmap.ll b/test/CodeGen/PowerPC/ppc64-stackmap.ll
index 9be8d0c8ad44..917fa7422512 100644
--- a/test/CodeGen/PowerPC/ppc64-stackmap.ll
+++ b/test/CodeGen/PowerPC/ppc64-stackmap.ll
@@ -7,6 +7,40 @@
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
+; CHECK-LABEL: constantargs:
+; CHECK: {{^}}.L[[constantargs_BEGIN:.*]]:{{$}}
+
+; CHECK-LABEL: osrinline:
+; CHECK: {{^}}.L[[osrinline_BEGIN:.*]]:{{$}}
+
+; CHECK-LABEL: osrcold:
+; CHECK: {{^}}.L[[osrcold_BEGIN:.*]]:{{$}}
+
+; CHECK-LABEL: propertyRead:
+; CHECK: {{^}}.L[[propertyRead_BEGIN:.*]]:{{$}}
+
+; CHECK-LABEL: propertyWrite:
+; CHECK: {{^}}.L[[propertyWrite_BEGIN:.*]]:{{$}}
+
+; CHECK-LABEL: jsVoidCall:
+; CHECK: {{^}}.L[[jsVoidCall_BEGIN:.*]]:{{$}}
+
+; CHECK-LABEL: jsIntCall:
+; CHECK: {{^}}.L[[jsIntCall_BEGIN:.*]]:{{$}}
+
+; CHECK-LABEL: spilledValue:
+; CHECK: {{^}}.L[[spilledValue_BEGIN:.*]]:{{$}}
+
+; CHECK-LABEL: spilledStackMapValue:
+; CHECK: {{^}}.L[[spilledStackMapValue_BEGIN:.*]]:{{$}}
+
+; CHECK-LABEL: liveConstant:
+; CHECK: {{^}}.L[[liveConstant_BEGIN:.*]]:{{$}}
+
+; CHECK-LABEL: clobberLR:
+; CHECK: {{^}}.L[[clobberLR_BEGIN:.*]]:{{$}}
+
+
; CHECK-LABEL: .section .llvm_stackmaps
; CHECK-NEXT: __LLVM_StackMaps:
; Header
@@ -36,7 +70,7 @@ target triple = "powerpc64-unknown-linux-gnu"
; CHECK-NEXT: .quad jsIntCall
; CHECK-NEXT: .quad 128
; CHECK-NEXT: .quad spilledValue
-; CHECK-NEXT: .quad 320
+; CHECK-NEXT: .quad 304
; CHECK-NEXT: .quad spilledStackMapValue
; CHECK-NEXT: .quad 224
; CHECK-NEXT: .quad liveConstant
@@ -51,7 +85,7 @@ target triple = "powerpc64-unknown-linux-gnu"
; Constant arguments
;
; CHECK-NEXT: .quad 1
-; CHECK-NEXT: .long .L{{.*}}-.L.constantargs
+; CHECK-NEXT: .long .L{{.*}}-.L[[constantargs_BEGIN]]
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 4
; SmallConstant
@@ -78,13 +112,13 @@ target triple = "powerpc64-unknown-linux-gnu"
define void @constantargs() {
entry:
%0 = inttoptr i64 244837814094590 to i8*
- tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 1, i32 24, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296)
+ tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 24, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296)
ret void
}
; Inline OSR Exit
;
-; CHECK-LABEL: .long .L{{.*}}-.L.osrinline
+; CHECK: .long .L{{.*}}-.L[[osrinline_BEGIN]]
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 2
; CHECK-NEXT: .byte 1
@@ -100,7 +134,7 @@ entry:
; Runtime void->void call.
call void inttoptr (i64 244837814094590 to void ()*)()
; Followed by inline OSR patchpoint with 12-byte shadow and 2 live vars.
- call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 3, i32 12, i64 %a, i64 %b)
+ call void (i64, i32, ...) @llvm.experimental.stackmap(i64 3, i32 12, i64 %a, i64 %b)
ret void
}
@@ -108,7 +142,7 @@ entry:
;
; 2 live variables in register.
;
-; CHECK-LABEL: .long .L{{.*}}-.L.osrcold
+; CHECK: .long .L{{.*}}-.L[[osrcold_BEGIN]]
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 2
; CHECK-NEXT: .byte 1
@@ -126,14 +160,14 @@ entry:
cold:
; OSR patchpoint with 12-byte nop-slide and 2 live vars.
%thunk = inttoptr i64 244837814094590 to i8*
- call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 4, i32 24, i8* %thunk, i32 0, i64 %a, i64 %b)
+ call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4, i32 24, i8* %thunk, i32 0, i64 %a, i64 %b)
unreachable
ret:
ret void
}
; Property Read
-; CHECK-LABEL: .long .L{{.*}}-.L.propertyRead
+; CHECK: .long .L{{.*}}-.L[[propertyRead_BEGIN]]
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 0
;
@@ -142,13 +176,13 @@ ret:
define i64 @propertyRead(i64* %obj) {
entry:
%resolveRead = inttoptr i64 244837814094590 to i8*
- %result = call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 24, i8* %resolveRead, i32 1, i64* %obj)
+ %result = call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 24, i8* %resolveRead, i32 1, i64* %obj)
%add = add i64 %result, 3
ret i64 %add
}
; Property Write
-; CHECK-LABEL: .long .L{{.*}}-.L.propertyWrite
+; CHECK: .long .L{{.*}}-.L[[propertyWrite_BEGIN]]
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 2
; CHECK-NEXT: .byte 1
@@ -162,7 +196,7 @@ entry:
define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a) {
entry:
%resolveWrite = inttoptr i64 244837814094590 to i8*
- call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 24, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
+ call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 6, i32 24, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
ret void
}
@@ -170,7 +204,7 @@ entry:
;
; 2 live variables in registers.
;
-; CHECK-LABEL: .long .L{{.*}}-.L.jsVoidCall
+; CHECK: .long .L{{.*}}-.L[[jsVoidCall_BEGIN]]
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 2
; CHECK-NEXT: .byte 1
@@ -184,7 +218,7 @@ entry:
define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
entry:
%resolveCall = inttoptr i64 244837814094590 to i8*
- call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 7, i32 24, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+ call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 7, i32 24, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
ret void
}
@@ -192,7 +226,7 @@ entry:
;
; 2 live variables in registers.
;
-; CHECK-LABEL: .long .L{{.*}}-.L.jsIntCall
+; CHECK: .long .L{{.*}}-.L[[jsIntCall_BEGIN]]
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 2
; CHECK-NEXT: .byte 1
@@ -206,7 +240,7 @@ entry:
define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
entry:
%resolveCall = inttoptr i64 244837814094590 to i8*
- %result = call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 8, i32 24, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+ %result = call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 8, i32 24, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
%add = add i64 %result, 3
ret i64 %add
}
@@ -215,7 +249,7 @@ entry:
;
; Verify 28 stack map entries.
;
-; CHECK-LABEL: .long .L{{.*}}-.L.spilledValue
+; CHECK: .long .L{{.*}}-.L[[spilledValue_BEGIN]]
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 28
;
@@ -226,7 +260,7 @@ entry:
; CHECK-NEXT: .short 31
define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27) {
entry:
- call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 11, i32 24, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27)
+ call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 24, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27)
ret void
}
@@ -234,7 +268,7 @@ entry:
;
; Verify 30 stack map entries.
;
-; CHECK-LABEL: .long .L{{.*}}-.L.spilledStackMapValue
+; CHECK: .long .L{{.*}}-.L[[spilledStackMapValue_BEGIN]]
; CHECK-NEXT: .short 0
; CHECK-NEXT: .short 30
;
@@ -245,14 +279,14 @@ entry:
; CHECK-NEXT: .short 31
define webkit_jscc void @spilledStackMapValue(i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29) {
entry:
- call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 12, i32 16, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29)
+ call void (i64, i32, ...) @llvm.experimental.stackmap(i64 12, i32 16, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29)
ret void
}
; Map a constant value.
;
-; CHECK-LABEL: .long .L{{.*}}-.L.liveConstant
+; CHECK: .long .L{{.*}}-.L[[liveConstant_BEGIN]]
; CHECK-NEXT: .short 0
; 1 location
; CHECK-NEXT: .short 1
@@ -263,13 +297,13 @@ entry:
; CHECK-NEXT: .long 33
define void @liveConstant() {
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 15, i32 8, i32 33)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 15, i32 8, i32 33)
ret void
}
; Map a value when LR is the only free register.
;
-; CHECK-LABEL: .long .L{{.*}}-.L.clobberLR
+; CHECK: .long .L{{.*}}-.L[[clobberLR_BEGIN]]
; CHECK-NEXT: .short 0
; 1 location
; CHECK-NEXT: .short 1
@@ -280,7 +314,7 @@ define void @liveConstant() {
; CHECK-NEXT: .long {{[0-9]+}}
define void @clobberLR(i32 %a) {
tail call void asm sideeffect "nop", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() nounwind
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 8, i32 %a)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 8, i32 %a)
ret void
}
diff --git a/test/CodeGen/PowerPC/ppc64-toc.ll b/test/CodeGen/PowerPC/ppc64-toc.ll
index f349919b7e99..7500ed606636 100644
--- a/test/CodeGen/PowerPC/ppc64-toc.ll
+++ b/test/CodeGen/PowerPC/ppc64-toc.ll
@@ -10,11 +10,12 @@ define i64 @access_int64(i64 %a) nounwind readonly {
entry:
; CHECK-LABEL: access_int64:
; CHECK-NEXT: .align 3
-; CHECK-NEXT: .quad .L.access_int64
+; CHECK-NEXT: .quad .L[[BEGIN:.*]]
; CHECK-NEXT: .quad .TOC.@tocbase
; CHECK-NEXT: .quad 0
; CHECK-NEXT: .text
- %0 = load i64* @number64, align 8
+; CHECK-NEXT: .L[[BEGIN]]:
+ %0 = load i64, i64* @number64, align 8
; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2)
%cmp = icmp eq i64 %0, %a
%conv1 = zext i1 %cmp to i64
@@ -25,7 +26,7 @@ define i64 @internal_static_var(i64 %a) nounwind {
entry:
; CHECK-LABEL: internal_static_var:
; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2)
- %0 = load i64* @internal_static_var.x, align 8
+ %0 = load i64, i64* @internal_static_var.x, align 8
%cmp = icmp eq i64 %0, %a
%conv1 = zext i1 %cmp to i64
ret i64 %conv1
@@ -45,8 +46,8 @@ define i32 @access_double_array(double %a, i32 %i) nounwind readonly {
entry:
; CHECK-LABEL: access_double_array:
%idxprom = sext i32 %i to i64
- %arrayidx = getelementptr inbounds [32 x double]* @double_array, i64 0, i64 %idxprom
- %0 = load double* %arrayidx, align 8
+ %arrayidx = getelementptr inbounds [32 x double], [32 x double]* @double_array, i64 0, i64 %idxprom
+ %0 = load double, double* %arrayidx, align 8
; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2)
%cmp = fcmp oeq double %0, %a
%conv = zext i1 %cmp to i32
diff --git a/test/CodeGen/PowerPC/ppc64-zext.ll b/test/CodeGen/PowerPC/ppc64-zext.ll
index eb55445cc6c9..bbd4856babde 100644
--- a/test/CodeGen/PowerPC/ppc64-zext.ll
+++ b/test/CodeGen/PowerPC/ppc64-zext.ll
@@ -4,7 +4,7 @@ target triple = "powerpc64-unknown-linux"
define i64 @fun(i32 %arg32) nounwind {
entry:
-; CHECK: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
+; CHECK: clrldi {{[0-9]+}}, {{[0-9]+}}, 32
%o = zext i32 %arg32 to i64
ret i64 %o
}
diff --git a/test/CodeGen/PowerPC/ppc64le-aggregates.ll b/test/CodeGen/PowerPC/ppc64le-aggregates.ll
index 4edd8d59e526..a4bec759050b 100644
--- a/test/CodeGen/PowerPC/ppc64le-aggregates.ll
+++ b/test/CodeGen/PowerPC/ppc64le-aggregates.ll
@@ -1,8 +1,11 @@
; RUN: llc < %s -march=ppc64le -mcpu=pwr8 -mattr=+altivec -mattr=-vsx | FileCheck %s
+; RUN: llc < %s -march=ppc64le -mattr=+altivec -mattr=-vsx | FileCheck %s
; Currently VSX support is disabled for this test because we generate lxsdx
; instead of lfd, and stxsdx instead of stfd. That is a poor choice when we
; have reg+imm addressing, and is on the list of things to be fixed.
+; The second run step is to ensure that -march=ppc64le is adequate to select
+; the same feature set as with -mcpu=pwr8 since that is the baseline for ppc64le.
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
@@ -254,33 +257,33 @@ entry:
define void @caller2() {
entry:
- %0 = load [8 x float]* getelementptr inbounds (%struct.float8* @g8, i64 0, i32 0), align 4
- %1 = load [5 x float]* getelementptr inbounds (%struct.float5* @g5, i64 0, i32 0), align 4
- %2 = load [2 x float]* getelementptr inbounds (%struct.float2* @g2, i64 0, i32 0), align 4
+ %0 = load [8 x float], [8 x float]* getelementptr inbounds (%struct.float8, %struct.float8* @g8, i64 0, i32 0), align 4
+ %1 = load [5 x float], [5 x float]* getelementptr inbounds (%struct.float5, %struct.float5* @g5, i64 0, i32 0), align 4
+ %2 = load [2 x float], [2 x float]* getelementptr inbounds (%struct.float2, %struct.float2* @g2, i64 0, i32 0), align 4
tail call void @test2([8 x float] %0, [5 x float] %1, [2 x float] %2)
ret void
}
; CHECK-LABEL: @caller2
-; CHECK: ld [[REG:[0-9]+]], .LC
-; CHECK-DAG: lfs 1, 0([[REG]])
-; CHECK-DAG: lfs 2, 4([[REG]])
-; CHECK-DAG: lfs 3, 8([[REG]])
-; CHECK-DAG: lfs 4, 12([[REG]])
-; CHECK-DAG: lfs 5, 16([[REG]])
-; CHECK-DAG: lfs 6, 20([[REG]])
-; CHECK-DAG: lfs 7, 24([[REG]])
-; CHECK-DAG: lfs 8, 28([[REG]])
-; CHECK: ld [[REG:[0-9]+]], .LC
-; CHECK-DAG: lfs 9, 0([[REG]])
-; CHECK-DAG: lfs 10, 4([[REG]])
-; CHECK-DAG: lfs 11, 8([[REG]])
-; CHECK-DAG: lfs 12, 12([[REG]])
-; CHECK-DAG: lfs 13, 16([[REG]])
-; CHECK: ld [[REG:[0-9]+]], .LC
-; CHECK-DAG: lwz [[REG0:[0-9]+]], 0([[REG]])
-; CHECK-DAG: lwz [[REG1:[0-9]+]], 4([[REG]])
-; CHECK-DAG: sldi [[REG1]], [[REG1]], 32
-; CHECK-DAG: or 10, [[REG0]], [[REG1]]
+; CHECK: ld {{[0-9]+}}, .LC
+; CHECK-DAG: lfs 1, 0({{[0-9]+}})
+; CHECK-DAG: lfs 2, 4({{[0-9]+}})
+; CHECK-DAG: lfs 3, 8({{[0-9]+}})
+; CHECK-DAG: lfs 4, 12({{[0-9]+}})
+; CHECK-DAG: lfs 5, 16({{[0-9]+}})
+; CHECK-DAG: lfs 6, 20({{[0-9]+}})
+; CHECK-DAG: lfs 7, 24({{[0-9]+}})
+; CHECK-DAG: lfs 8, 28({{[0-9]+}})
+
+; CHECK-DAG: lfs 9, 0({{[0-9]+}})
+; CHECK-DAG: lfs 10, 4({{[0-9]+}})
+; CHECK-DAG: lfs 11, 8({{[0-9]+}})
+; CHECK-DAG: lfs 12, 12({{[0-9]+}})
+; CHECK-DAG: lfs 13, 16({{[0-9]+}})
+
+; CHECK-DAG: lwz [[REG0:[0-9]+]], 0({{[0-9]+}})
+; CHECK-DAG: lwz [[REG1:[0-9]+]], 4({{[0-9]+}})
+; CHECK-DAG: sldi [[REG2:[0-9]+]], [[REG1]], 32
+; CHECK-DAG: or 10, [[REG0]], [[REG2]]
; CHECK: bl test2
declare void @test2([8 x float], [5 x float], [2 x float])
@@ -296,8 +299,8 @@ entry:
define void @caller3(double %d) {
entry:
- %0 = load [8 x float]* getelementptr inbounds (%struct.float8* @g8, i64 0, i32 0), align 4
- %1 = load [5 x float]* getelementptr inbounds (%struct.float5* @g5, i64 0, i32 0), align 4
+ %0 = load [8 x float], [8 x float]* getelementptr inbounds (%struct.float8, %struct.float8* @g8, i64 0, i32 0), align 4
+ %1 = load [5 x float], [5 x float]* getelementptr inbounds (%struct.float5, %struct.float5* @g5, i64 0, i32 0), align 4
tail call void @test3([8 x float] %0, [5 x float] %1, double %d)
ret void
}
@@ -319,8 +322,8 @@ entry:
define void @caller4(float %f) {
entry:
- %0 = load [8 x float]* getelementptr inbounds (%struct.float8* @g8, i64 0, i32 0), align 4
- %1 = load [5 x float]* getelementptr inbounds (%struct.float5* @g5, i64 0, i32 0), align 4
+ %0 = load [8 x float], [8 x float]* getelementptr inbounds (%struct.float8, %struct.float8* @g8, i64 0, i32 0), align 4
+ %1 = load [5 x float], [5 x float]* getelementptr inbounds (%struct.float5, %struct.float5* @g5, i64 0, i32 0), align 4
tail call void @test4([8 x float] %0, [5 x float] %1, float %f)
ret void
}
diff --git a/test/CodeGen/PowerPC/ppc64le-calls.ll b/test/CodeGen/PowerPC/ppc64le-calls.ll
index 0d667dde96b4..b65b9549b6b1 100644
--- a/test/CodeGen/PowerPC/ppc64le-calls.ll
+++ b/test/CodeGen/PowerPC/ppc64le-calls.ll
@@ -1,4 +1,8 @@
; RUN: llc -march=ppc64le -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -march=ppc64le < %s | FileCheck %s
+
+; The second run of the test case is to ensure the behaviour is the same
+; without specifying -mcpu=pwr8 as that is now the baseline for ppc64le.
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
diff --git a/test/CodeGen/PowerPC/ppc64le-localentry.ll b/test/CodeGen/PowerPC/ppc64le-localentry.ll
index 4676ce8eadc6..be64f1151769 100644
--- a/test/CodeGen/PowerPC/ppc64le-localentry.ll
+++ b/test/CodeGen/PowerPC/ppc64le-localentry.ll
@@ -1,5 +1,10 @@
; RUN: llc -march=ppc64le -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -march=ppc64le -mcpu=pwr8 -O0 < %s | FileCheck %s
+; RUN: llc -march=ppc64le < %s | FileCheck %s
+; RUN: llc -march=ppc64le -O0 < %s | FileCheck %s
+
+; The second run of the test case is to ensure the behaviour is the same
+; without specifying -mcpu=pwr8 as that is now the baseline for ppc64le.
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
@@ -11,13 +16,14 @@ target triple = "powerpc64le-unknown-linux-gnu"
define i64 @use_toc(i64 %a) nounwind {
entry:
; CHECK-LABEL: @use_toc
+; CHECK-NEXT: .L{{.*}}:
; CHECK-NEXT: .Ltmp[[TMP1:[0-9]+]]:
; CHECK-NEXT: addis 2, 12, .TOC.-.Ltmp[[TMP1]]@ha
; CHECK-NEXT: addi 2, 2, .TOC.-.Ltmp[[TMP1]]@l
; CHECK-NEXT: .Ltmp[[TMP2:[0-9]+]]:
; CHECK-NEXT: .localentry use_toc, .Ltmp[[TMP2]]-.Ltmp[[TMP1]]
; CHECK-NEXT: %entry
- %0 = load i64* @number64, align 8
+ %0 = load i64, i64* @number64, align 8
%cmp = icmp eq i64 %0, %a
%conv1 = zext i1 %cmp to i64
ret i64 %conv1
@@ -27,6 +33,7 @@ declare void @callee()
define void @use_toc_implicit() nounwind {
entry:
; CHECK-LABEL: @use_toc_implicit
+; CHECK-NEXT: .L{{.*}}:
; CHECK-NEXT: .Ltmp[[TMP1:[0-9]+]]:
; CHECK-NEXT: addis 2, 12, .TOC.-.Ltmp[[TMP1]]@ha
; CHECK-NEXT: addi 2, 2, .TOC.-.Ltmp[[TMP1]]@l
@@ -40,6 +47,7 @@ entry:
define i64 @no_toc(i64 %a) nounwind {
entry:
; CHECK-LABEL: @no_toc
+; CHECK-NEXT: .L{{.*}}:
; CHECK-NEXT: %entry
ret i64 %a
}
diff --git a/test/CodeGen/PowerPC/ppc64le-smallarg.ll b/test/CodeGen/PowerPC/ppc64le-smallarg.ll
index 120c14039f99..070a617ffe4f 100644
--- a/test/CodeGen/PowerPC/ppc64le-smallarg.ll
+++ b/test/CodeGen/PowerPC/ppc64le-smallarg.ll
@@ -17,7 +17,7 @@ define void @callee1(%struct.small_arg* noalias nocapture sret %agg.result, %str
entry:
%0 = bitcast %struct.small_arg* %x to i32*
%1 = bitcast %struct.small_arg* %agg.result to i32*
- %2 = load i32* %0, align 2
+ %2 = load i32, i32* %0, align 2
store i32 %2, i32* %1, align 2
ret void
}
@@ -42,17 +42,19 @@ entry:
ret float %x
}
; CHECK: @callee2
-; CHECK: lfs {{[0-9]+}}, 136(1)
+; CHECK: addi [[TOCREG:[0-9]+]], 1, 136
+; CHECK: lxsspx {{[0-9]+}}, {{[0-9]+}}, [[TOCREG]]
; CHECK: blr
define void @caller2() {
entry:
- %0 = load float* @gf, align 4
+ %0 = load float, float* @gf, align 4
%call = tail call float @test2(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %0)
ret void
}
; CHECK: @caller2
-; CHECK: stfs {{[0-9]+}}, 136(1)
+; CHECK: li [[TOCOFF:[0-9]+]], 136
+; CHECK: stxsspx {{[0-9]+}}, 1, [[TOCOFF]]
; CHECK: bl test2
declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float)
diff --git a/test/CodeGen/PowerPC/ppcf128-1.ll b/test/CodeGen/PowerPC/ppcf128-1.ll
index 2cec934c66fd..f0e58f61a867 100644
--- a/test/CodeGen/PowerPC/ppcf128-1.ll
+++ b/test/CodeGen/PowerPC/ppcf128-1.ll
@@ -12,16 +12,16 @@ entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store ppc_fp128 %x, ppc_fp128* %x_addr
store ppc_fp128 %y, ppc_fp128* %y_addr
- %tmp1 = load ppc_fp128* %x_addr, align 16 ; <ppc_fp128> [#uses=1]
- %tmp2 = load ppc_fp128* %y_addr, align 16 ; <ppc_fp128> [#uses=1]
+ %tmp1 = load ppc_fp128, ppc_fp128* %x_addr, align 16 ; <ppc_fp128> [#uses=1]
+ %tmp2 = load ppc_fp128, ppc_fp128* %y_addr, align 16 ; <ppc_fp128> [#uses=1]
%tmp3 = fadd ppc_fp128 %tmp1, %tmp2 ; <ppc_fp128> [#uses=1]
store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
- %tmp4 = load ppc_fp128* %tmp, align 16 ; <ppc_fp128> [#uses=1]
+ %tmp4 = load ppc_fp128, ppc_fp128* %tmp, align 16 ; <ppc_fp128> [#uses=1]
store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
br label %return
return: ; preds = %entry
- %retval5 = load ppc_fp128* %retval ; <ppc_fp128> [#uses=1]
+ %retval5 = load ppc_fp128, ppc_fp128* %retval ; <ppc_fp128> [#uses=1]
ret ppc_fp128 %retval5
}
@@ -34,16 +34,16 @@ entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store ppc_fp128 %x, ppc_fp128* %x_addr
store ppc_fp128 %y, ppc_fp128* %y_addr
- %tmp1 = load ppc_fp128* %x_addr, align 16 ; <ppc_fp128> [#uses=1]
- %tmp2 = load ppc_fp128* %y_addr, align 16 ; <ppc_fp128> [#uses=1]
+ %tmp1 = load ppc_fp128, ppc_fp128* %x_addr, align 16 ; <ppc_fp128> [#uses=1]
+ %tmp2 = load ppc_fp128, ppc_fp128* %y_addr, align 16 ; <ppc_fp128> [#uses=1]
%tmp3 = fsub ppc_fp128 %tmp1, %tmp2 ; <ppc_fp128> [#uses=1]
store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
- %tmp4 = load ppc_fp128* %tmp, align 16 ; <ppc_fp128> [#uses=1]
+ %tmp4 = load ppc_fp128, ppc_fp128* %tmp, align 16 ; <ppc_fp128> [#uses=1]
store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
br label %return
return: ; preds = %entry
- %retval5 = load ppc_fp128* %retval ; <ppc_fp128> [#uses=1]
+ %retval5 = load ppc_fp128, ppc_fp128* %retval ; <ppc_fp128> [#uses=1]
ret ppc_fp128 %retval5
}
@@ -56,16 +56,16 @@ entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store ppc_fp128 %x, ppc_fp128* %x_addr
store ppc_fp128 %y, ppc_fp128* %y_addr
- %tmp1 = load ppc_fp128* %x_addr, align 16 ; <ppc_fp128> [#uses=1]
- %tmp2 = load ppc_fp128* %y_addr, align 16 ; <ppc_fp128> [#uses=1]
+ %tmp1 = load ppc_fp128, ppc_fp128* %x_addr, align 16 ; <ppc_fp128> [#uses=1]
+ %tmp2 = load ppc_fp128, ppc_fp128* %y_addr, align 16 ; <ppc_fp128> [#uses=1]
%tmp3 = fmul ppc_fp128 %tmp1, %tmp2 ; <ppc_fp128> [#uses=1]
store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
- %tmp4 = load ppc_fp128* %tmp, align 16 ; <ppc_fp128> [#uses=1]
+ %tmp4 = load ppc_fp128, ppc_fp128* %tmp, align 16 ; <ppc_fp128> [#uses=1]
store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
br label %return
return: ; preds = %entry
- %retval5 = load ppc_fp128* %retval ; <ppc_fp128> [#uses=1]
+ %retval5 = load ppc_fp128, ppc_fp128* %retval ; <ppc_fp128> [#uses=1]
ret ppc_fp128 %retval5
}
@@ -78,15 +78,15 @@ entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store ppc_fp128 %x, ppc_fp128* %x_addr
store ppc_fp128 %y, ppc_fp128* %y_addr
- %tmp1 = load ppc_fp128* %x_addr, align 16 ; <ppc_fp128> [#uses=1]
- %tmp2 = load ppc_fp128* %y_addr, align 16 ; <ppc_fp128> [#uses=1]
+ %tmp1 = load ppc_fp128, ppc_fp128* %x_addr, align 16 ; <ppc_fp128> [#uses=1]
+ %tmp2 = load ppc_fp128, ppc_fp128* %y_addr, align 16 ; <ppc_fp128> [#uses=1]
%tmp3 = fdiv ppc_fp128 %tmp1, %tmp2 ; <ppc_fp128> [#uses=1]
store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
- %tmp4 = load ppc_fp128* %tmp, align 16 ; <ppc_fp128> [#uses=1]
+ %tmp4 = load ppc_fp128, ppc_fp128* %tmp, align 16 ; <ppc_fp128> [#uses=1]
store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
br label %return
return: ; preds = %entry
- %retval5 = load ppc_fp128* %retval ; <ppc_fp128> [#uses=1]
+ %retval5 = load ppc_fp128, ppc_fp128* %retval ; <ppc_fp128> [#uses=1]
ret ppc_fp128 %retval5
}
diff --git a/test/CodeGen/PowerPC/ppcf128-3.ll b/test/CodeGen/PowerPC/ppcf128-3.ll
index 5043b622584b..fe3b4188d11c 100644
--- a/test/CodeGen/PowerPC/ppcf128-3.ll
+++ b/test/CodeGen/PowerPC/ppcf128-3.ll
@@ -4,28 +4,28 @@
define i32 @stp_sequence_set_short_data(%struct.stp_sequence* %sequence, i32 %count, i16* %data) {
entry:
%tmp1112 = sitofp i16 0 to ppc_fp128 ; <ppc_fp128> [#uses=1]
- %tmp13 = call i32 (...)* @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind ; <i32> [#uses=0]
+ %tmp13 = call i32 (...) @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind ; <i32> [#uses=0]
ret i32 0
}
define i32 @stp_sequence_set_short_data2(%struct.stp_sequence* %sequence, i32 %count, i16* %data) {
entry:
%tmp1112 = sitofp i8 0 to ppc_fp128 ; <ppc_fp128> [#uses=1]
- %tmp13 = call i32 (...)* @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind ; <i32> [#uses=0]
+ %tmp13 = call i32 (...) @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind ; <i32> [#uses=0]
ret i32 0
}
define i32 @stp_sequence_set_short_data3(%struct.stp_sequence* %sequence, i32 %count, i16* %data) {
entry:
%tmp1112 = uitofp i16 0 to ppc_fp128 ; <ppc_fp128> [#uses=1]
- %tmp13 = call i32 (...)* @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind ; <i32> [#uses=0]
+ %tmp13 = call i32 (...) @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind ; <i32> [#uses=0]
ret i32 0
}
define i32 @stp_sequence_set_short_data4(%struct.stp_sequence* %sequence, i32 %count, i16* %data) {
entry:
%tmp1112 = uitofp i8 0 to ppc_fp128 ; <ppc_fp128> [#uses=1]
- %tmp13 = call i32 (...)* @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind ; <i32> [#uses=0]
+ %tmp13 = call i32 (...) @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind ; <i32> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/PowerPC/ppcf128-endian.ll b/test/CodeGen/PowerPC/ppcf128-endian.ll
index 180fedf5c9f4..ee314c1db58b 100644
--- a/test/CodeGen/PowerPC/ppcf128-endian.ll
+++ b/test/CodeGen/PowerPC/ppcf128-endian.ll
@@ -9,7 +9,7 @@ define void @callee(ppc_fp128 %x) {
entry:
%x.addr = alloca ppc_fp128, align 16
store ppc_fp128 %x, ppc_fp128* %x.addr, align 16
- %0 = load ppc_fp128* %x.addr, align 16
+ %0 = load ppc_fp128, ppc_fp128* %x.addr, align 16
store ppc_fp128 %0, ppc_fp128* @g, align 16
ret void
}
@@ -21,7 +21,7 @@ entry:
define void @caller() {
entry:
- %0 = load ppc_fp128* @g, align 16
+ %0 = load ppc_fp128, ppc_fp128* @g, align 16
call void @test(ppc_fp128 %0)
ret void
}
@@ -51,7 +51,7 @@ entry:
define ppc_fp128 @result() {
entry:
- %0 = load ppc_fp128* @g, align 16
+ %0 = load ppc_fp128, ppc_fp128* @g, align 16
ret ppc_fp128 %0
}
; CHECK: @result
diff --git a/test/CodeGen/PowerPC/pr13891.ll b/test/CodeGen/PowerPC/pr13891.ll
index 4be65dd43d6a..5b695eb9f0df 100644
--- a/test/CodeGen/PowerPC/pr13891.ll
+++ b/test/CodeGen/PowerPC/pr13891.ll
@@ -10,7 +10,7 @@ define void @_Z5check3foos(%struct.foo* nocapture byval %f, i16 signext %i) noin
; CHECK: lha {{[0-9]+}}, {{[0-9]+}}(1)
entry:
%0 = bitcast %struct.foo* %f to i16*
- %1 = load i16* %0, align 2
+ %1 = load i16, i16* %0, align 2
%bf.val.sext = ashr i16 %1, 8
%cmp = icmp eq i16 %bf.val.sext, %i
br i1 %cmp, label %if.end, label %if.then
diff --git a/test/CodeGen/PowerPC/pr15031.ll b/test/CodeGen/PowerPC/pr15031.ll
index e58ad80e139b..d1b9932ca22b 100644
--- a/test/CodeGen/PowerPC/pr15031.ll
+++ b/test/CodeGen/PowerPC/pr15031.ll
@@ -298,50 +298,50 @@ declare zeroext i32 @_ZNK4llvm14MCRegisterInfo9getSubRegEjj(%"class.llvm::MCRegi
define void @_ZN4llvm14MachineOperand12substPhysRegEjRKNS_18TargetRegisterInfoE(%"class.llvm::MachineOperand"* %this, i32 zeroext %Reg, %"class.llvm::TargetRegisterInfo"* %TRI) align 2 {
entry:
- %SubReg_TargetFlags.i = getelementptr inbounds %"class.llvm::MachineOperand"* %this, i64 0, i32 1
+ %SubReg_TargetFlags.i = getelementptr inbounds %"class.llvm::MachineOperand", %"class.llvm::MachineOperand"* %this, i64 0, i32 1
%0 = bitcast [3 x i8]* %SubReg_TargetFlags.i to i24*
- %bf.load.i = load i24* %0, align 1
+ %bf.load.i = load i24, i24* %0, align 1
%bf.lshr.i = lshr i24 %bf.load.i, 12
%tobool = icmp eq i24 %bf.lshr.i, 0
br i1 %tobool, label %if.end, label %if.then
if.then: ; preds = %entry
%bf.cast.i = zext i24 %bf.lshr.i to i32
- %add.ptr = getelementptr inbounds %"class.llvm::TargetRegisterInfo"* %TRI, i64 0, i32 1
+ %add.ptr = getelementptr inbounds %"class.llvm::TargetRegisterInfo", %"class.llvm::TargetRegisterInfo"* %TRI, i64 0, i32 1
%call3 = tail call zeroext i32 @_ZNK4llvm14MCRegisterInfo9getSubRegEjj(%"class.llvm::MCRegisterInfo"* %add.ptr, i32 zeroext %Reg, i32 zeroext %bf.cast.i)
- %bf.load.i10 = load i24* %0, align 1
+ %bf.load.i10 = load i24, i24* %0, align 1
%bf.clear.i = and i24 %bf.load.i10, 4095
store i24 %bf.clear.i, i24* %0, align 1
br label %if.end
if.end: ; preds = %entry, %if.then
%Reg.addr.0 = phi i32 [ %call3, %if.then ], [ %Reg, %entry ]
- %RegNo.i.i = getelementptr inbounds %"class.llvm::MachineOperand"* %this, i64 0, i32 2, i32 0
- %1 = load i32* %RegNo.i.i, align 4
+ %RegNo.i.i = getelementptr inbounds %"class.llvm::MachineOperand", %"class.llvm::MachineOperand"* %this, i64 0, i32 2, i32 0
+ %1 = load i32, i32* %RegNo.i.i, align 4
%cmp.i = icmp eq i32 %1, %Reg.addr.0
br i1 %cmp.i, label %_ZN4llvm14MachineOperand6setRegEj.exit, label %if.end.i
if.end.i: ; preds = %if.end
- %ParentMI.i.i = getelementptr inbounds %"class.llvm::MachineOperand"* %this, i64 0, i32 3
- %2 = load %"class.llvm::MachineInstr"** %ParentMI.i.i, align 8
+ %ParentMI.i.i = getelementptr inbounds %"class.llvm::MachineOperand", %"class.llvm::MachineOperand"* %this, i64 0, i32 3
+ %2 = load %"class.llvm::MachineInstr"*, %"class.llvm::MachineInstr"** %ParentMI.i.i, align 8
%tobool.i = icmp eq %"class.llvm::MachineInstr"* %2, null
br i1 %tobool.i, label %if.end13.i, label %if.then3.i
if.then3.i: ; preds = %if.end.i
- %Parent.i.i = getelementptr inbounds %"class.llvm::MachineInstr"* %2, i64 0, i32 2
- %3 = load %"class.llvm::MachineBasicBlock"** %Parent.i.i, align 8
+ %Parent.i.i = getelementptr inbounds %"class.llvm::MachineInstr", %"class.llvm::MachineInstr"* %2, i64 0, i32 2
+ %3 = load %"class.llvm::MachineBasicBlock"*, %"class.llvm::MachineBasicBlock"** %Parent.i.i, align 8
%tobool5.i = icmp eq %"class.llvm::MachineBasicBlock"* %3, null
br i1 %tobool5.i, label %if.end13.i, label %if.then6.i
if.then6.i: ; preds = %if.then3.i
- %xParent.i.i = getelementptr inbounds %"class.llvm::MachineBasicBlock"* %3, i64 0, i32 4
- %4 = load %"class.llvm::MachineFunction"** %xParent.i.i, align 8
+ %xParent.i.i = getelementptr inbounds %"class.llvm::MachineBasicBlock", %"class.llvm::MachineBasicBlock"* %3, i64 0, i32 4
+ %4 = load %"class.llvm::MachineFunction"*, %"class.llvm::MachineFunction"** %xParent.i.i, align 8
%tobool8.i = icmp eq %"class.llvm::MachineFunction"* %4, null
br i1 %tobool8.i, label %if.end13.i, label %if.then9.i
if.then9.i: ; preds = %if.then6.i
- %RegInfo.i.i = getelementptr inbounds %"class.llvm::MachineFunction"* %4, i64 0, i32 5
- %5 = load %"class.llvm::MachineRegisterInfo"** %RegInfo.i.i, align 8
+ %RegInfo.i.i = getelementptr inbounds %"class.llvm::MachineFunction", %"class.llvm::MachineFunction"* %4, i64 0, i32 5
+ %5 = load %"class.llvm::MachineRegisterInfo"*, %"class.llvm::MachineRegisterInfo"** %RegInfo.i.i, align 8
tail call void @_ZN4llvm19MachineRegisterInfo27removeRegOperandFromUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"* %5, %"class.llvm::MachineOperand"* %this)
store i32 %Reg.addr.0, i32* %RegNo.i.i, align 4
tail call void @_ZN4llvm19MachineRegisterInfo22addRegOperandToUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"* %5, %"class.llvm::MachineOperand"* %this)
diff --git a/test/CodeGen/PowerPC/pr15630.ll b/test/CodeGen/PowerPC/pr15630.ll
index 3c1b604f0090..54a1b36868e9 100644
--- a/test/CodeGen/PowerPC/pr15630.ll
+++ b/test/CodeGen/PowerPC/pr15630.ll
@@ -8,7 +8,7 @@ entry:
%newval = alloca i8
%ordering = alloca i32, align 4
store i8 %newval_arg, i8* %newval
- %tmp = load i8* %newval
+ %tmp = load i8, i8* %newval
store atomic volatile i8 %tmp, i8* %val_arg seq_cst, align 1
ret void
}
diff --git a/test/CodeGen/PowerPC/pr16556-2.ll b/test/CodeGen/PowerPC/pr16556-2.ll
index e2dae4573c72..9155ed5926f5 100644
--- a/test/CodeGen/PowerPC/pr16556-2.ll
+++ b/test/CodeGen/PowerPC/pr16556-2.ll
@@ -11,7 +11,7 @@ target triple = "powerpc-unknown-linux-gnu"
@_D4core4time12TickDuration11ticksPerSecyl = global i64 0
@.str5 = internal unnamed_addr constant [40 x i8] c"..\5Cldc\5Cruntime\5Cdruntime\5Csrc\5Ccore\5Ctime.d\00"
@.str83 = internal constant [10 x i8] c"null this\00"
-@.modulefilename = internal constant { i32, i8* } { i32 39, i8* getelementptr inbounds ([40 x i8]* @.str5, i32 0, i32 0) }
+@.modulefilename = internal constant { i32, i8* } { i32 39, i8* getelementptr inbounds ([40 x i8], [40 x i8]* @.str5, i32 0, i32 0) }
declare i8* @_d_assert_msg({ i32, i8* }, { i32, i8* }, i32)
@@ -23,15 +23,15 @@ entry:
br i1 %tmp, label %noassert, label %assert
assert: ; preds = %entry
- %tmp1 = load { i32, i8* }* @.modulefilename
- %0 = call i8* @_d_assert_msg({ i32, i8* } { i32 9, i8* getelementptr inbounds ([10 x i8]* @.str83, i32 0, i32 0) }, { i32, i8* } %tmp1, i32 1586)
+ %tmp1 = load { i32, i8* }, { i32, i8* }* @.modulefilename
+ %0 = call i8* @_d_assert_msg({ i32, i8* } { i32 9, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str83, i32 0, i32 0) }, { i32, i8* } %tmp1, i32 1586)
unreachable
noassert: ; preds = %entry
- %tmp2 = getelementptr %core.time.TickDuration* %.this_arg, i32 0, i32 0
- %tmp3 = load i64* %tmp2
+ %tmp2 = getelementptr %core.time.TickDuration, %core.time.TickDuration* %.this_arg, i32 0, i32 0
+ %tmp3 = load i64, i64* %tmp2
%tmp4 = sitofp i64 %tmp3 to ppc_fp128
- %tmp5 = load i64* @_D4core4time12TickDuration11ticksPerSecyl
+ %tmp5 = load i64, i64* @_D4core4time12TickDuration11ticksPerSecyl
%tmp6 = sitofp i64 %tmp5 to ppc_fp128
%tmp7 = fdiv ppc_fp128 %tmp6, 0xM80000000000000000000000000000000
%tmp8 = fdiv ppc_fp128 %tmp4, %tmp7
diff --git a/test/CodeGen/PowerPC/pr17168.ll b/test/CodeGen/PowerPC/pr17168.ll
index 62a9ede0200b..096895491381 100644
--- a/test/CodeGen/PowerPC/pr17168.ll
+++ b/test/CodeGen/PowerPC/pr17168.ll
@@ -24,8 +24,8 @@ for.cond968.preheader: ; preds = %for.cond968.prehead
for.end1042: ; preds = %for.cond968.preheader, %for.cond964.preheader, %entry
%0 = phi i32 [ undef, %for.cond964.preheader ], [ undef, %for.cond968.preheader ], [ undef, %entry ]
- %1 = load i32* getelementptr inbounds ([3 x i32]* @grid_points, i64 0, i64 0), align 4, !dbg !443, !tbaa !444
- tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !119, metadata !{!"0x102"}), !dbg !448
+ %1 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @grid_points, i64 0, i64 0), align 4, !dbg !443, !tbaa !444
+ tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !119, metadata !DIExpression()), !dbg !448
%sub10454270 = add nsw i32 %0, -1, !dbg !448
%cmp10464271 = icmp sgt i32 %sub10454270, 1, !dbg !448
%sub11134263 = add nsw i32 %1, -1, !dbg !450
@@ -54,468 +54,468 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!438, !464}
-!0 = !{!"0x11\0012\00clang version 3.4 (trunk 190311)\001\00\000\00\000", !1, !2, !2, !3, !298, !2} ; [ DW_TAG_compile_unit ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c] [DW_LANG_C99]
-!1 = !{!"bt.c", !"/home/hfinkel/src/NPB2.3-omp-C/BT"}
+!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 190311)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !298, imports: !2)
+!1 = !DIFile(filename: "bt.c", directory: "/home/hfinkel/src/NPB2.3-omp-C/BT")
!2 = !{}
!3 = !{!4, !82, !102, !114, !132, !145, !154, !155, !162, !183, !200, !201, !207, !208, !215, !221, !230, !238, !246, !255, !260, !261, !268, !274, !279, !280, !287, !293}
-!4 = !{!"0x2e\00main\00main\00\0074\000\001\000\006\00256\001\0074", !1, !5, !6, null, null, null, null, !12} ; [ DW_TAG_subprogram ] [line 74] [def] [main]
-!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !DISubprogram(name: "main", line: 74, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 74, file: !1, scope: !5, type: !6, variables: !12)
+!5 = !DIFile(filename: "bt.c", directory: "/home/hfinkel/src/NPB2.3-omp-C/BT")
+!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8, !9}
-!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = !{!"0xf\00\000\0064\0064\000\000", null, null, !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!10 = !{!"0xf\00\000\0064\0064\000\000", null, null, !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
-!11 = !{!"0x24\00char\000\008\008\000\000\008", null, null} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_unsigned_char]
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !10)
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !11)
+!11 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
!12 = !{!13, !14, !15, !16, !17, !18, !19, !21, !22, !23, !25, !26}
-!13 = !{!"0x101\00argc\0016777290\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [argc] [line 74]
-!14 = !{!"0x101\00argv\0033554506\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [argv] [line 74]
-!15 = !{!"0x100\00niter\0076\000", !4, !5, !8} ; [ DW_TAG_auto_variable ] [niter] [line 76]
-!16 = !{!"0x100\00step\0076\000", !4, !5, !8} ; [ DW_TAG_auto_variable ] [step] [line 76]
-!17 = !{!"0x100\00n3\0076\000", !4, !5, !8} ; [ DW_TAG_auto_variable ] [n3] [line 76]
-!18 = !{!"0x100\00nthreads\0077\000", !4, !5, !8} ; [ DW_TAG_auto_variable ] [nthreads] [line 77]
-!19 = !{!"0x100\00navg\0078\000", !4, !5, !20} ; [ DW_TAG_auto_variable ] [navg] [line 78]
-!20 = !{!"0x24\00double\000\0064\0064\000\000\004", null, null} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
-!21 = !{!"0x100\00mflops\0078\000", !4, !5, !20} ; [ DW_TAG_auto_variable ] [mflops] [line 78]
-!22 = !{!"0x100\00tmax\0080\000", !4, !5, !20} ; [ DW_TAG_auto_variable ] [tmax] [line 80]
-!23 = !{!"0x100\00verified\0081\000", !4, !5, !24} ; [ DW_TAG_auto_variable ] [verified] [line 81]
-!24 = !{!"0x16\00boolean\0012\000\000\000\000", !1, null, !8} ; [ DW_TAG_typedef ] [boolean] [line 12, size 0, align 0, offset 0] [from int]
-!25 = !{!"0x100\00class\0082\000", !4, !5, !11} ; [ DW_TAG_auto_variable ] [class] [line 82]
-!26 = !{!"0x100\00fp\0083\000", !4, !5, !27} ; [ DW_TAG_auto_variable ] [fp] [line 83]
-!27 = !{!"0xf\00\000\0064\0064\000\000", null, null, !28} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from FILE]
-!28 = !{!"0x16\00FILE\0049\000\000\000\000", !1, null, !29} ; [ DW_TAG_typedef ] [FILE] [line 49, size 0, align 0, offset 0] [from _IO_FILE]
-!29 = !{!"0x13\00_IO_FILE\00271\001728\0064\000\000\000", !30, null, null, !31, null, null, null} ; [ DW_TAG_structure_type ] [_IO_FILE] [line 271, size 1728, align 64, offset 0] [def] [from ]
-!30 = !{!"/usr/include/libio.h", !"/home/hfinkel/src/NPB2.3-omp-C/BT"}
+!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 74, arg: 1, scope: !4, file: !5, type: !8)
+!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 74, arg: 2, scope: !4, file: !5, type: !9)
+!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "niter", line: 76, scope: !4, file: !5, type: !8)
+!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "step", line: 76, scope: !4, file: !5, type: !8)
+!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "n3", line: 76, scope: !4, file: !5, type: !8)
+!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "nthreads", line: 77, scope: !4, file: !5, type: !8)
+!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "navg", line: 78, scope: !4, file: !5, type: !20)
+!20 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
+!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "mflops", line: 78, scope: !4, file: !5, type: !20)
+!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "tmax", line: 80, scope: !4, file: !5, type: !20)
+!23 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "verified", line: 81, scope: !4, file: !5, type: !24)
+!24 = !DIDerivedType(tag: DW_TAG_typedef, name: "boolean", line: 12, file: !1, baseType: !8)
+!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "class", line: 82, scope: !4, file: !5, type: !11)
+!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "fp", line: 83, scope: !4, file: !5, type: !27)
+!27 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !28)
+!28 = !DIDerivedType(tag: DW_TAG_typedef, name: "FILE", line: 49, file: !1, baseType: !29)
+!29 = !DICompositeType(tag: DW_TAG_structure_type, name: "_IO_FILE", line: 271, size: 1728, align: 64, file: !30, elements: !31)
+!30 = !DIFile(filename: "/usr/include/libio.h", directory: "/home/hfinkel/src/NPB2.3-omp-C/BT")
!31 = !{!32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !52, !53, !54, !55, !58, !60, !62, !66, !68, !70, !71, !72, !73, !74, !77, !78}
-!32 = !{!"0xd\00_flags\00272\0032\0032\000\000", !30, !29, !8} ; [ DW_TAG_member ] [_flags] [line 272, size 32, align 32, offset 0] [from int]
-!33 = !{!"0xd\00_IO_read_ptr\00277\0064\0064\0064\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_read_ptr] [line 277, size 64, align 64, offset 64] [from ]
-!34 = !{!"0xd\00_IO_read_end\00278\0064\0064\00128\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_read_end] [line 278, size 64, align 64, offset 128] [from ]
-!35 = !{!"0xd\00_IO_read_base\00279\0064\0064\00192\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_read_base] [line 279, size 64, align 64, offset 192] [from ]
-!36 = !{!"0xd\00_IO_write_base\00280\0064\0064\00256\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_write_base] [line 280, size 64, align 64, offset 256] [from ]
-!37 = !{!"0xd\00_IO_write_ptr\00281\0064\0064\00320\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_write_ptr] [line 281, size 64, align 64, offset 320] [from ]
-!38 = !{!"0xd\00_IO_write_end\00282\0064\0064\00384\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_write_end] [line 282, size 64, align 64, offset 384] [from ]
-!39 = !{!"0xd\00_IO_buf_base\00283\0064\0064\00448\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_buf_base] [line 283, size 64, align 64, offset 448] [from ]
-!40 = !{!"0xd\00_IO_buf_end\00284\0064\0064\00512\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_buf_end] [line 284, size 64, align 64, offset 512] [from ]
-!41 = !{!"0xd\00_IO_save_base\00286\0064\0064\00576\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_save_base] [line 286, size 64, align 64, offset 576] [from ]
-!42 = !{!"0xd\00_IO_backup_base\00287\0064\0064\00640\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_backup_base] [line 287, size 64, align 64, offset 640] [from ]
-!43 = !{!"0xd\00_IO_save_end\00288\0064\0064\00704\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_save_end] [line 288, size 64, align 64, offset 704] [from ]
-!44 = !{!"0xd\00_markers\00290\0064\0064\00768\000", !30, !29, !45} ; [ DW_TAG_member ] [_markers] [line 290, size 64, align 64, offset 768] [from ]
-!45 = !{!"0xf\00\000\0064\0064\000\000", null, null, !46} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _IO_marker]
-!46 = !{!"0x13\00_IO_marker\00186\00192\0064\000\000\000", !30, null, null, !47, null, null, null} ; [ DW_TAG_structure_type ] [_IO_marker] [line 186, size 192, align 64, offset 0] [def] [from ]
+!32 = !DIDerivedType(tag: DW_TAG_member, name: "_flags", line: 272, size: 32, align: 32, file: !30, scope: !29, baseType: !8)
+!33 = !DIDerivedType(tag: DW_TAG_member, name: "_IO_read_ptr", line: 277, size: 64, align: 64, offset: 64, file: !30, scope: !29, baseType: !10)
+!34 = !DIDerivedType(tag: DW_TAG_member, name: "_IO_read_end", line: 278, size: 64, align: 64, offset: 128, file: !30, scope: !29, baseType: !10)
+!35 = !DIDerivedType(tag: DW_TAG_member, name: "_IO_read_base", line: 279, size: 64, align: 64, offset: 192, file: !30, scope: !29, baseType: !10)
+!36 = !DIDerivedType(tag: DW_TAG_member, name: "_IO_write_base", line: 280, size: 64, align: 64, offset: 256, file: !30, scope: !29, baseType: !10)
+!37 = !DIDerivedType(tag: DW_TAG_member, name: "_IO_write_ptr", line: 281, size: 64, align: 64, offset: 320, file: !30, scope: !29, baseType: !10)
+!38 = !DIDerivedType(tag: DW_TAG_member, name: "_IO_write_end", line: 282, size: 64, align: 64, offset: 384, file: !30, scope: !29, baseType: !10)
+!39 = !DIDerivedType(tag: DW_TAG_member, name: "_IO_buf_base", line: 283, size: 64, align: 64, offset: 448, file: !30, scope: !29, baseType: !10)
+!40 = !DIDerivedType(tag: DW_TAG_member, name: "_IO_buf_end", line: 284, size: 64, align: 64, offset: 512, file: !30, scope: !29, baseType: !10)
+!41 = !DIDerivedType(tag: DW_TAG_member, name: "_IO_save_base", line: 286, size: 64, align: 64, offset: 576, file: !30, scope: !29, baseType: !10)
+!42 = !DIDerivedType(tag: DW_TAG_member, name: "_IO_backup_base", line: 287, size: 64, align: 64, offset: 640, file: !30, scope: !29, baseType: !10)
+!43 = !DIDerivedType(tag: DW_TAG_member, name: "_IO_save_end", line: 288, size: 64, align: 64, offset: 704, file: !30, scope: !29, baseType: !10)
+!44 = !DIDerivedType(tag: DW_TAG_member, name: "_markers", line: 290, size: 64, align: 64, offset: 768, file: !30, scope: !29, baseType: !45)
+!45 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !46)
+!46 = !DICompositeType(tag: DW_TAG_structure_type, name: "_IO_marker", line: 186, size: 192, align: 64, file: !30, elements: !47)
!47 = !{!48, !49, !51}
-!48 = !{!"0xd\00_next\00187\0064\0064\000\000", !30, !46, !45} ; [ DW_TAG_member ] [_next] [line 187, size 64, align 64, offset 0] [from ]
-!49 = !{!"0xd\00_sbuf\00188\0064\0064\0064\000", !30, !46, !50} ; [ DW_TAG_member ] [_sbuf] [line 188, size 64, align 64, offset 64] [from ]
-!50 = !{!"0xf\00\000\0064\0064\000\000", null, null, !29} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _IO_FILE]
-!51 = !{!"0xd\00_pos\00192\0032\0032\00128\000", !30, !46, !8} ; [ DW_TAG_member ] [_pos] [line 192, size 32, align 32, offset 128] [from int]
-!52 = !{!"0xd\00_chain\00292\0064\0064\00832\000", !30, !29, !50} ; [ DW_TAG_member ] [_chain] [line 292, size 64, align 64, offset 832] [from ]
-!53 = !{!"0xd\00_fileno\00294\0032\0032\00896\000", !30, !29, !8} ; [ DW_TAG_member ] [_fileno] [line 294, size 32, align 32, offset 896] [from int]
-!54 = !{!"0xd\00_flags2\00298\0032\0032\00928\000", !30, !29, !8} ; [ DW_TAG_member ] [_flags2] [line 298, size 32, align 32, offset 928] [from int]
-!55 = !{!"0xd\00_old_offset\00300\0064\0064\00960\000", !30, !29, !56} ; [ DW_TAG_member ] [_old_offset] [line 300, size 64, align 64, offset 960] [from __off_t]
-!56 = !{!"0x16\00__off_t\00141\000\000\000\000", !30, null, !57} ; [ DW_TAG_typedef ] [__off_t] [line 141, size 0, align 0, offset 0] [from long int]
-!57 = !{!"0x24\00long int\000\0064\0064\000\000\005", null, null} ; [ DW_TAG_base_type ] [long int] [line 0, size 64, align 64, offset 0, enc DW_ATE_signed]
-!58 = !{!"0xd\00_cur_column\00304\0016\0016\001024\000", !30, !29, !59} ; [ DW_TAG_member ] [_cur_column] [line 304, size 16, align 16, offset 1024] [from unsigned short]
-!59 = !{!"0x24\00unsigned short\000\0016\0016\000\000\007", null, null} ; [ DW_TAG_base_type ] [unsigned short] [line 0, size 16, align 16, offset 0, enc DW_ATE_unsigned]
-!60 = !{!"0xd\00_vtable_offset\00305\008\008\001040\000", !30, !29, !61} ; [ DW_TAG_member ] [_vtable_offset] [line 305, size 8, align 8, offset 1040] [from signed char]
-!61 = !{!"0x24\00signed char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ] [signed char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
-!62 = !{!"0xd\00_shortbuf\00306\008\008\001048\000", !30, !29, !63} ; [ DW_TAG_member ] [_shortbuf] [line 306, size 8, align 8, offset 1048] [from ]
-!63 = !{!"0x1\00\000\008\008\000\000", null, null, !11, !64, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 8, align 8, offset 0] [from char]
+!48 = !DIDerivedType(tag: DW_TAG_member, name: "_next", line: 187, size: 64, align: 64, file: !30, scope: !46, baseType: !45)
+!49 = !DIDerivedType(tag: DW_TAG_member, name: "_sbuf", line: 188, size: 64, align: 64, offset: 64, file: !30, scope: !46, baseType: !50)
+!50 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !29)
+!51 = !DIDerivedType(tag: DW_TAG_member, name: "_pos", line: 192, size: 32, align: 32, offset: 128, file: !30, scope: !46, baseType: !8)
+!52 = !DIDerivedType(tag: DW_TAG_member, name: "_chain", line: 292, size: 64, align: 64, offset: 832, file: !30, scope: !29, baseType: !50)
+!53 = !DIDerivedType(tag: DW_TAG_member, name: "_fileno", line: 294, size: 32, align: 32, offset: 896, file: !30, scope: !29, baseType: !8)
+!54 = !DIDerivedType(tag: DW_TAG_member, name: "_flags2", line: 298, size: 32, align: 32, offset: 928, file: !30, scope: !29, baseType: !8)
+!55 = !DIDerivedType(tag: DW_TAG_member, name: "_old_offset", line: 300, size: 64, align: 64, offset: 960, file: !30, scope: !29, baseType: !56)
+!56 = !DIDerivedType(tag: DW_TAG_typedef, name: "__off_t", line: 141, file: !30, baseType: !57)
+!57 = !DIBasicType(tag: DW_TAG_base_type, name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!58 = !DIDerivedType(tag: DW_TAG_member, name: "_cur_column", line: 304, size: 16, align: 16, offset: 1024, file: !30, scope: !29, baseType: !59)
+!59 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned short", size: 16, align: 16, encoding: DW_ATE_unsigned)
+!60 = !DIDerivedType(tag: DW_TAG_member, name: "_vtable_offset", line: 305, size: 8, align: 8, offset: 1040, file: !30, scope: !29, baseType: !61)
+!61 = !DIBasicType(tag: DW_TAG_base_type, name: "signed char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!62 = !DIDerivedType(tag: DW_TAG_member, name: "_shortbuf", line: 306, size: 8, align: 8, offset: 1048, file: !30, scope: !29, baseType: !63)
+!63 = !DICompositeType(tag: DW_TAG_array_type, size: 8, align: 8, baseType: !11, elements: !64)
!64 = !{!65}
-!65 = !{!"0x21\000\001"} ; [ DW_TAG_subrange_type ] [0, 0]
-!66 = !{!"0xd\00_lock\00310\0064\0064\001088\000", !30, !29, !67} ; [ DW_TAG_member ] [_lock] [line 310, size 64, align 64, offset 1088] [from ]
-!67 = !{!"0xf\00\000\0064\0064\000\000", null, null, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!68 = !{!"0xd\00_offset\00319\0064\0064\001152\000", !30, !29, !69} ; [ DW_TAG_member ] [_offset] [line 319, size 64, align 64, offset 1152] [from __off64_t]
-!69 = !{!"0x16\00__off64_t\00142\000\000\000\000", !30, null, !57} ; [ DW_TAG_typedef ] [__off64_t] [line 142, size 0, align 0, offset 0] [from long int]
-!70 = !{!"0xd\00__pad1\00328\0064\0064\001216\000", !30, !29, !67} ; [ DW_TAG_member ] [__pad1] [line 328, size 64, align 64, offset 1216] [from ]
-!71 = !{!"0xd\00__pad2\00329\0064\0064\001280\000", !30, !29, !67} ; [ DW_TAG_member ] [__pad2] [line 329, size 64, align 64, offset 1280] [from ]
-!72 = !{!"0xd\00__pad3\00330\0064\0064\001344\000", !30, !29, !67} ; [ DW_TAG_member ] [__pad3] [line 330, size 64, align 64, offset 1344] [from ]
-!73 = !{!"0xd\00__pad4\00331\0064\0064\001408\000", !30, !29, !67} ; [ DW_TAG_member ] [__pad4] [line 331, size 64, align 64, offset 1408] [from ]
-!74 = !{!"0xd\00__pad5\00332\0064\0064\001472\000", !30, !29, !75} ; [ DW_TAG_member ] [__pad5] [line 332, size 64, align 64, offset 1472] [from size_t]
-!75 = !{!"0x16\00size_t\0042\000\000\000\000", !30, null, !76} ; [ DW_TAG_typedef ] [size_t] [line 42, size 0, align 0, offset 0] [from long unsigned int]
-!76 = !{!"0x24\00long unsigned int\000\0064\0064\000\000\007", null, null} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
-!77 = !{!"0xd\00_mode\00334\0032\0032\001536\000", !30, !29, !8} ; [ DW_TAG_member ] [_mode] [line 334, size 32, align 32, offset 1536] [from int]
-!78 = !{!"0xd\00_unused2\00336\00160\008\001568\000", !30, !29, !79} ; [ DW_TAG_member ] [_unused2] [line 336, size 160, align 8, offset 1568] [from ]
-!79 = !{!"0x1\00\000\00160\008\000\000", null, null, !11, !80, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 160, align 8, offset 0] [from char]
+!65 = !DISubrange(count: 1)
+!66 = !DIDerivedType(tag: DW_TAG_member, name: "_lock", line: 310, size: 64, align: 64, offset: 1088, file: !30, scope: !29, baseType: !67)
+!67 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: null)
+!68 = !DIDerivedType(tag: DW_TAG_member, name: "_offset", line: 319, size: 64, align: 64, offset: 1152, file: !30, scope: !29, baseType: !69)
+!69 = !DIDerivedType(tag: DW_TAG_typedef, name: "__off64_t", line: 142, file: !30, baseType: !57)
+!70 = !DIDerivedType(tag: DW_TAG_member, name: "__pad1", line: 328, size: 64, align: 64, offset: 1216, file: !30, scope: !29, baseType: !67)
+!71 = !DIDerivedType(tag: DW_TAG_member, name: "__pad2", line: 329, size: 64, align: 64, offset: 1280, file: !30, scope: !29, baseType: !67)
+!72 = !DIDerivedType(tag: DW_TAG_member, name: "__pad3", line: 330, size: 64, align: 64, offset: 1344, file: !30, scope: !29, baseType: !67)
+!73 = !DIDerivedType(tag: DW_TAG_member, name: "__pad4", line: 331, size: 64, align: 64, offset: 1408, file: !30, scope: !29, baseType: !67)
+!74 = !DIDerivedType(tag: DW_TAG_member, name: "__pad5", line: 332, size: 64, align: 64, offset: 1472, file: !30, scope: !29, baseType: !75)
+!75 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", line: 42, file: !30, baseType: !76)
+!76 = !DIBasicType(tag: DW_TAG_base_type, name: "long unsigned int", size: 64, align: 64, encoding: DW_ATE_unsigned)
+!77 = !DIDerivedType(tag: DW_TAG_member, name: "_mode", line: 334, size: 32, align: 32, offset: 1536, file: !30, scope: !29, baseType: !8)
+!78 = !DIDerivedType(tag: DW_TAG_member, name: "_unused2", line: 336, size: 160, align: 8, offset: 1568, file: !30, scope: !29, baseType: !79)
+!79 = !DICompositeType(tag: DW_TAG_array_type, size: 160, align: 8, baseType: !11, elements: !80)
!80 = !{!81}
-!81 = !{!"0x21\000\0020"} ; [ DW_TAG_subrange_type ] [0, 19]
-!82 = !{!"0x2e\00verify\00verify\00\002388\001\001\000\006\00256\001\002388", !1, !5, !83, null, null, null, null, !86} ; [ DW_TAG_subprogram ] [line 2388] [local] [def] [verify]
-!83 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !84, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!81 = !DISubrange(count: 20)
+!82 = !DISubprogram(name: "verify", line: 2388, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2388, file: !1, scope: !5, type: !83, variables: !86)
+!83 = !DISubroutineType(types: !84)
!84 = !{null, !8, !10, !85}
-!85 = !{!"0xf\00\000\0064\0064\000\000", null, null, !24} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from boolean]
+!85 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !24)
!86 = !{!87, !88, !89, !90, !94, !95, !96, !97, !98, !99, !100, !101}
-!87 = !{!"0x101\00no_time_steps\0016779604\000", !82, !5, !8} ; [ DW_TAG_arg_variable ] [no_time_steps] [line 2388]
-!88 = !{!"0x101\00class\0033556820\000", !82, !5, !10} ; [ DW_TAG_arg_variable ] [class] [line 2388]
-!89 = !{!"0x101\00verified\0050334036\000", !82, !5, !85} ; [ DW_TAG_arg_variable ] [verified] [line 2388]
-!90 = !{!"0x100\00xcrref\002397\000", !82, !5, !91} ; [ DW_TAG_auto_variable ] [xcrref] [line 2397]
-!91 = !{!"0x1\00\000\00320\0064\000\000", null, null, !20, !92, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 320, align 64, offset 0] [from double]
+!87 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "no_time_steps", line: 2388, arg: 1, scope: !82, file: !5, type: !8)
+!88 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "class", line: 2388, arg: 2, scope: !82, file: !5, type: !10)
+!89 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "verified", line: 2388, arg: 3, scope: !82, file: !5, type: !85)
+!90 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xcrref", line: 2397, scope: !82, file: !5, type: !91)
+!91 = !DICompositeType(tag: DW_TAG_array_type, size: 320, align: 64, baseType: !20, elements: !92)
!92 = !{!93}
-!93 = !{!"0x21\000\005"} ; [ DW_TAG_subrange_type ] [0, 4]
-!94 = !{!"0x100\00xceref\002397\000", !82, !5, !91} ; [ DW_TAG_auto_variable ] [xceref] [line 2397]
-!95 = !{!"0x100\00xcrdif\002397\000", !82, !5, !91} ; [ DW_TAG_auto_variable ] [xcrdif] [line 2397]
-!96 = !{!"0x100\00xcedif\002397\000", !82, !5, !91} ; [ DW_TAG_auto_variable ] [xcedif] [line 2397]
-!97 = !{!"0x100\00epsilon\002398\000", !82, !5, !20} ; [ DW_TAG_auto_variable ] [epsilon] [line 2398]
-!98 = !{!"0x100\00xce\002398\000", !82, !5, !91} ; [ DW_TAG_auto_variable ] [xce] [line 2398]
-!99 = !{!"0x100\00xcr\002398\000", !82, !5, !91} ; [ DW_TAG_auto_variable ] [xcr] [line 2398]
-!100 = !{!"0x100\00dtref\002398\000", !82, !5, !20} ; [ DW_TAG_auto_variable ] [dtref] [line 2398]
-!101 = !{!"0x100\00m\002399\000", !82, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 2399]
-!102 = !{!"0x2e\00rhs_norm\00rhs_norm\00\00266\001\001\000\006\00256\001\00266", !1, !5, !103, null, null, null, null, !106} ; [ DW_TAG_subprogram ] [line 266] [local] [def] [rhs_norm]
-!103 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !104, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!93 = !DISubrange(count: 5)
+!94 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xceref", line: 2397, scope: !82, file: !5, type: !91)
+!95 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xcrdif", line: 2397, scope: !82, file: !5, type: !91)
+!96 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xcedif", line: 2397, scope: !82, file: !5, type: !91)
+!97 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "epsilon", line: 2398, scope: !82, file: !5, type: !20)
+!98 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xce", line: 2398, scope: !82, file: !5, type: !91)
+!99 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xcr", line: 2398, scope: !82, file: !5, type: !91)
+!100 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "dtref", line: 2398, scope: !82, file: !5, type: !20)
+!101 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 2399, scope: !82, file: !5, type: !8)
+!102 = !DISubprogram(name: "rhs_norm", line: 266, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 266, file: !1, scope: !5, type: !103, variables: !106)
+!103 = !DISubroutineType(types: !104)
!104 = !{null, !105}
-!105 = !{!"0xf\00\000\0064\0064\000\000", null, null, !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from double]
+!105 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !20)
!106 = !{!107, !108, !109, !110, !111, !112, !113}
-!107 = !{!"0x101\00rms\0016777482\000", !102, !5, !105} ; [ DW_TAG_arg_variable ] [rms] [line 266]
-!108 = !{!"0x100\00i\00271\000", !102, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 271]
-!109 = !{!"0x100\00j\00271\000", !102, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 271]
-!110 = !{!"0x100\00k\00271\000", !102, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 271]
-!111 = !{!"0x100\00d\00271\000", !102, !5, !8} ; [ DW_TAG_auto_variable ] [d] [line 271]
-!112 = !{!"0x100\00m\00271\000", !102, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 271]
-!113 = !{!"0x100\00add\00272\000", !102, !5, !20} ; [ DW_TAG_auto_variable ] [add] [line 272]
-!114 = !{!"0x2e\00compute_rhs\00compute_rhs\00\001767\001\001\000\006\00256\001\001767", !1, !5, !115, null, void ()* @compute_rhs, null, null, !117} ; [ DW_TAG_subprogram ] [line 1767] [local] [def] [compute_rhs]
-!115 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !116, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!107 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "rms", line: 266, arg: 1, scope: !102, file: !5, type: !105)
+!108 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 271, scope: !102, file: !5, type: !8)
+!109 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 271, scope: !102, file: !5, type: !8)
+!110 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 271, scope: !102, file: !5, type: !8)
+!111 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", line: 271, scope: !102, file: !5, type: !8)
+!112 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 271, scope: !102, file: !5, type: !8)
+!113 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "add", line: 272, scope: !102, file: !5, type: !20)
+!114 = !DISubprogram(name: "compute_rhs", line: 1767, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1767, file: !1, scope: !5, type: !115, function: void ()* @compute_rhs, variables: !117)
+!115 = !DISubroutineType(types: !116)
!116 = !{null}
!117 = !{!118, !119, !120, !121, !122, !123, !124, !125, !126, !127, !128, !129, !130, !131}
-!118 = !{!"0x100\00i\001769\000", !114, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 1769]
-!119 = !{!"0x100\00j\001769\000", !114, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 1769]
-!120 = !{!"0x100\00k\001769\000", !114, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 1769]
-!121 = !{!"0x100\00m\001769\000", !114, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 1769]
-!122 = !{!"0x100\00rho_inv\001770\000", !114, !5, !20} ; [ DW_TAG_auto_variable ] [rho_inv] [line 1770]
-!123 = !{!"0x100\00uijk\001770\000", !114, !5, !20} ; [ DW_TAG_auto_variable ] [uijk] [line 1770]
-!124 = !{!"0x100\00up1\001770\000", !114, !5, !20} ; [ DW_TAG_auto_variable ] [up1] [line 1770]
-!125 = !{!"0x100\00um1\001770\000", !114, !5, !20} ; [ DW_TAG_auto_variable ] [um1] [line 1770]
-!126 = !{!"0x100\00vijk\001770\000", !114, !5, !20} ; [ DW_TAG_auto_variable ] [vijk] [line 1770]
-!127 = !{!"0x100\00vp1\001770\000", !114, !5, !20} ; [ DW_TAG_auto_variable ] [vp1] [line 1770]
-!128 = !{!"0x100\00vm1\001770\000", !114, !5, !20} ; [ DW_TAG_auto_variable ] [vm1] [line 1770]
-!129 = !{!"0x100\00wijk\001770\000", !114, !5, !20} ; [ DW_TAG_auto_variable ] [wijk] [line 1770]
-!130 = !{!"0x100\00wp1\001770\000", !114, !5, !20} ; [ DW_TAG_auto_variable ] [wp1] [line 1770]
-!131 = !{!"0x100\00wm1\001770\000", !114, !5, !20} ; [ DW_TAG_auto_variable ] [wm1] [line 1770]
-!132 = !{!"0x2e\00error_norm\00error_norm\00\00225\001\001\000\006\00256\001\00225", !1, !5, !103, null, null, null, null, !133} ; [ DW_TAG_subprogram ] [line 225] [local] [def] [error_norm]
+!118 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 1769, scope: !114, file: !5, type: !8)
+!119 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 1769, scope: !114, file: !5, type: !8)
+!120 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 1769, scope: !114, file: !5, type: !8)
+!121 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 1769, scope: !114, file: !5, type: !8)
+!122 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "rho_inv", line: 1770, scope: !114, file: !5, type: !20)
+!123 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "uijk", line: 1770, scope: !114, file: !5, type: !20)
+!124 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "up1", line: 1770, scope: !114, file: !5, type: !20)
+!125 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "um1", line: 1770, scope: !114, file: !5, type: !20)
+!126 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vijk", line: 1770, scope: !114, file: !5, type: !20)
+!127 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vp1", line: 1770, scope: !114, file: !5, type: !20)
+!128 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vm1", line: 1770, scope: !114, file: !5, type: !20)
+!129 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "wijk", line: 1770, scope: !114, file: !5, type: !20)
+!130 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "wp1", line: 1770, scope: !114, file: !5, type: !20)
+!131 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "wm1", line: 1770, scope: !114, file: !5, type: !20)
+!132 = !DISubprogram(name: "error_norm", line: 225, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 225, file: !1, scope: !5, type: !103, variables: !133)
!133 = !{!134, !135, !136, !137, !138, !139, !140, !141, !142, !143, !144}
-!134 = !{!"0x101\00rms\0016777441\000", !132, !5, !105} ; [ DW_TAG_arg_variable ] [rms] [line 225]
-!135 = !{!"0x100\00i\00232\000", !132, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 232]
-!136 = !{!"0x100\00j\00232\000", !132, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 232]
-!137 = !{!"0x100\00k\00232\000", !132, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 232]
-!138 = !{!"0x100\00m\00232\000", !132, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 232]
-!139 = !{!"0x100\00d\00232\000", !132, !5, !8} ; [ DW_TAG_auto_variable ] [d] [line 232]
-!140 = !{!"0x100\00xi\00233\000", !132, !5, !20} ; [ DW_TAG_auto_variable ] [xi] [line 233]
-!141 = !{!"0x100\00eta\00233\000", !132, !5, !20} ; [ DW_TAG_auto_variable ] [eta] [line 233]
-!142 = !{!"0x100\00zeta\00233\000", !132, !5, !20} ; [ DW_TAG_auto_variable ] [zeta] [line 233]
-!143 = !{!"0x100\00u_exact\00233\000", !132, !5, !91} ; [ DW_TAG_auto_variable ] [u_exact] [line 233]
-!144 = !{!"0x100\00add\00233\000", !132, !5, !20} ; [ DW_TAG_auto_variable ] [add] [line 233]
-!145 = !{!"0x2e\00exact_solution\00exact_solution\00\00643\001\001\000\006\00256\001\00644", !1, !5, !146, null, null, null, null, !148} ; [ DW_TAG_subprogram ] [line 643] [local] [def] [scope 644] [exact_solution]
-!146 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !147, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!134 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "rms", line: 225, arg: 1, scope: !132, file: !5, type: !105)
+!135 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 232, scope: !132, file: !5, type: !8)
+!136 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 232, scope: !132, file: !5, type: !8)
+!137 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 232, scope: !132, file: !5, type: !8)
+!138 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 232, scope: !132, file: !5, type: !8)
+!139 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", line: 232, scope: !132, file: !5, type: !8)
+!140 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xi", line: 233, scope: !132, file: !5, type: !20)
+!141 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "eta", line: 233, scope: !132, file: !5, type: !20)
+!142 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "zeta", line: 233, scope: !132, file: !5, type: !20)
+!143 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "u_exact", line: 233, scope: !132, file: !5, type: !91)
+!144 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "add", line: 233, scope: !132, file: !5, type: !20)
+!145 = !DISubprogram(name: "exact_solution", line: 643, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 644, file: !1, scope: !5, type: !146, variables: !148)
+!146 = !DISubroutineType(types: !147)
!147 = !{null, !20, !20, !20, !105}
!148 = !{!149, !150, !151, !152, !153}
-!149 = !{!"0x101\00xi\0016777859\000", !145, !5, !20} ; [ DW_TAG_arg_variable ] [xi] [line 643]
-!150 = !{!"0x101\00eta\0033555075\000", !145, !5, !20} ; [ DW_TAG_arg_variable ] [eta] [line 643]
-!151 = !{!"0x101\00zeta\0050332291\000", !145, !5, !20} ; [ DW_TAG_arg_variable ] [zeta] [line 643]
-!152 = !{!"0x101\00dtemp\0067109508\000", !145, !5, !105} ; [ DW_TAG_arg_variable ] [dtemp] [line 644]
-!153 = !{!"0x100\00m\00653\000", !145, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 653]
-!154 = !{!"0x2e\00set_constants\00set_constants\00\002191\001\001\000\006\00256\001\002191", !1, !5, !115, null, null, null, null, !2} ; [ DW_TAG_subprogram ] [line 2191] [local] [def] [set_constants]
-!155 = !{!"0x2e\00lhsinit\00lhsinit\00\00855\001\001\000\006\00256\001\00855", !1, !5, !115, null, null, null, null, !156} ; [ DW_TAG_subprogram ] [line 855] [local] [def] [lhsinit]
+!149 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "xi", line: 643, arg: 1, scope: !145, file: !5, type: !20)
+!150 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "eta", line: 643, arg: 2, scope: !145, file: !5, type: !20)
+!151 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "zeta", line: 643, arg: 3, scope: !145, file: !5, type: !20)
+!152 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "dtemp", line: 644, arg: 4, scope: !145, file: !5, type: !105)
+!153 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 653, scope: !145, file: !5, type: !8)
+!154 = !DISubprogram(name: "set_constants", line: 2191, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2191, file: !1, scope: !5, type: !115, variables: !2)
+!155 = !DISubprogram(name: "lhsinit", line: 855, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 855, file: !1, scope: !5, type: !115, variables: !156)
!156 = !{!157, !158, !159, !160, !161}
-!157 = !{!"0x100\00i\00857\000", !155, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 857]
-!158 = !{!"0x100\00j\00857\000", !155, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 857]
-!159 = !{!"0x100\00k\00857\000", !155, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 857]
-!160 = !{!"0x100\00m\00857\000", !155, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 857]
-!161 = !{!"0x100\00n\00857\000", !155, !5, !8} ; [ DW_TAG_auto_variable ] [n] [line 857]
-!162 = !{!"0x2e\00initialize\00initialize\00\00669\001\001\000\006\00256\001\00669", !1, !5, !115, null, null, null, null, !163} ; [ DW_TAG_subprogram ] [line 669] [local] [def] [initialize]
+!157 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 857, scope: !155, file: !5, type: !8)
+!158 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 857, scope: !155, file: !5, type: !8)
+!159 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 857, scope: !155, file: !5, type: !8)
+!160 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 857, scope: !155, file: !5, type: !8)
+!161 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "n", line: 857, scope: !155, file: !5, type: !8)
+!162 = !DISubprogram(name: "initialize", line: 669, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 669, file: !1, scope: !5, type: !115, variables: !163)
!163 = !{!164, !165, !166, !167, !168, !169, !170, !171, !172, !173, !174, !179, !180, !181, !182}
-!164 = !{!"0x100\00i\00679\000", !162, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 679]
-!165 = !{!"0x100\00j\00679\000", !162, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 679]
-!166 = !{!"0x100\00k\00679\000", !162, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 679]
-!167 = !{!"0x100\00m\00679\000", !162, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 679]
-!168 = !{!"0x100\00ix\00679\000", !162, !5, !8} ; [ DW_TAG_auto_variable ] [ix] [line 679]
-!169 = !{!"0x100\00iy\00679\000", !162, !5, !8} ; [ DW_TAG_auto_variable ] [iy] [line 679]
-!170 = !{!"0x100\00iz\00679\000", !162, !5, !8} ; [ DW_TAG_auto_variable ] [iz] [line 679]
-!171 = !{!"0x100\00xi\00680\000", !162, !5, !20} ; [ DW_TAG_auto_variable ] [xi] [line 680]
-!172 = !{!"0x100\00eta\00680\000", !162, !5, !20} ; [ DW_TAG_auto_variable ] [eta] [line 680]
-!173 = !{!"0x100\00zeta\00680\000", !162, !5, !20} ; [ DW_TAG_auto_variable ] [zeta] [line 680]
-!174 = !{!"0x100\00Pface\00680\000", !162, !5, !175} ; [ DW_TAG_auto_variable ] [Pface] [line 680]
-!175 = !{!"0x1\00\000\001920\0064\000\000", null, null, !20, !176, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 1920, align 64, offset 0] [from double]
+!164 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 679, scope: !162, file: !5, type: !8)
+!165 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 679, scope: !162, file: !5, type: !8)
+!166 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 679, scope: !162, file: !5, type: !8)
+!167 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 679, scope: !162, file: !5, type: !8)
+!168 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "ix", line: 679, scope: !162, file: !5, type: !8)
+!169 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "iy", line: 679, scope: !162, file: !5, type: !8)
+!170 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "iz", line: 679, scope: !162, file: !5, type: !8)
+!171 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xi", line: 680, scope: !162, file: !5, type: !20)
+!172 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "eta", line: 680, scope: !162, file: !5, type: !20)
+!173 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "zeta", line: 680, scope: !162, file: !5, type: !20)
+!174 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "Pface", line: 680, scope: !162, file: !5, type: !175)
+!175 = !DICompositeType(tag: DW_TAG_array_type, size: 1920, align: 64, baseType: !20, elements: !176)
!176 = !{!177, !178, !93}
-!177 = !{!"0x21\000\002"} ; [ DW_TAG_subrange_type ] [0, 1]
-!178 = !{!"0x21\000\003"} ; [ DW_TAG_subrange_type ] [0, 2]
-!179 = !{!"0x100\00Pxi\00680\000", !162, !5, !20} ; [ DW_TAG_auto_variable ] [Pxi] [line 680]
-!180 = !{!"0x100\00Peta\00680\000", !162, !5, !20} ; [ DW_TAG_auto_variable ] [Peta] [line 680]
-!181 = !{!"0x100\00Pzeta\00680\000", !162, !5, !20} ; [ DW_TAG_auto_variable ] [Pzeta] [line 680]
-!182 = !{!"0x100\00temp\00680\000", !162, !5, !91} ; [ DW_TAG_auto_variable ] [temp] [line 680]
-!183 = !{!"0x2e\00exact_rhs\00exact_rhs\00\00301\001\001\000\006\00256\001\00301", !1, !5, !115, null, null, null, null, !184} ; [ DW_TAG_subprogram ] [line 301] [local] [def] [exact_rhs]
+!177 = !DISubrange(count: 2)
+!178 = !DISubrange(count: 3)
+!179 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "Pxi", line: 680, scope: !162, file: !5, type: !20)
+!180 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "Peta", line: 680, scope: !162, file: !5, type: !20)
+!181 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "Pzeta", line: 680, scope: !162, file: !5, type: !20)
+!182 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "temp", line: 680, scope: !162, file: !5, type: !91)
+!183 = !DISubprogram(name: "exact_rhs", line: 301, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 301, file: !1, scope: !5, type: !115, variables: !184)
!184 = !{!185, !186, !187, !188, !189, !190, !191, !192, !193, !194, !195, !196, !197, !198, !199}
-!185 = !{!"0x100\00dtemp\00310\000", !183, !5, !91} ; [ DW_TAG_auto_variable ] [dtemp] [line 310]
-!186 = !{!"0x100\00xi\00310\000", !183, !5, !20} ; [ DW_TAG_auto_variable ] [xi] [line 310]
-!187 = !{!"0x100\00eta\00310\000", !183, !5, !20} ; [ DW_TAG_auto_variable ] [eta] [line 310]
-!188 = !{!"0x100\00zeta\00310\000", !183, !5, !20} ; [ DW_TAG_auto_variable ] [zeta] [line 310]
-!189 = !{!"0x100\00dtpp\00310\000", !183, !5, !20} ; [ DW_TAG_auto_variable ] [dtpp] [line 310]
-!190 = !{!"0x100\00m\00311\000", !183, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 311]
-!191 = !{!"0x100\00i\00311\000", !183, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 311]
-!192 = !{!"0x100\00j\00311\000", !183, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 311]
-!193 = !{!"0x100\00k\00311\000", !183, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 311]
-!194 = !{!"0x100\00ip1\00311\000", !183, !5, !8} ; [ DW_TAG_auto_variable ] [ip1] [line 311]
-!195 = !{!"0x100\00im1\00311\000", !183, !5, !8} ; [ DW_TAG_auto_variable ] [im1] [line 311]
-!196 = !{!"0x100\00jp1\00311\000", !183, !5, !8} ; [ DW_TAG_auto_variable ] [jp1] [line 311]
-!197 = !{!"0x100\00jm1\00311\000", !183, !5, !8} ; [ DW_TAG_auto_variable ] [jm1] [line 311]
-!198 = !{!"0x100\00km1\00311\000", !183, !5, !8} ; [ DW_TAG_auto_variable ] [km1] [line 311]
-!199 = !{!"0x100\00kp1\00311\000", !183, !5, !8} ; [ DW_TAG_auto_variable ] [kp1] [line 311]
-!200 = !{!"0x2e\00adi\00adi\00\00210\001\001\000\006\00256\001\00210", !1, !5, !115, null, null, null, null, !2} ; [ DW_TAG_subprogram ] [line 210] [local] [def] [adi]
-!201 = !{!"0x2e\00add\00add\00\00187\001\001\000\006\00256\001\00187", !1, !5, !115, null, null, null, null, !202} ; [ DW_TAG_subprogram ] [line 187] [local] [def] [add]
+!185 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "dtemp", line: 310, scope: !183, file: !5, type: !91)
+!186 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xi", line: 310, scope: !183, file: !5, type: !20)
+!187 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "eta", line: 310, scope: !183, file: !5, type: !20)
+!188 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "zeta", line: 310, scope: !183, file: !5, type: !20)
+!189 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "dtpp", line: 310, scope: !183, file: !5, type: !20)
+!190 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 311, scope: !183, file: !5, type: !8)
+!191 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 311, scope: !183, file: !5, type: !8)
+!192 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 311, scope: !183, file: !5, type: !8)
+!193 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 311, scope: !183, file: !5, type: !8)
+!194 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "ip1", line: 311, scope: !183, file: !5, type: !8)
+!195 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "im1", line: 311, scope: !183, file: !5, type: !8)
+!196 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "jp1", line: 311, scope: !183, file: !5, type: !8)
+!197 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "jm1", line: 311, scope: !183, file: !5, type: !8)
+!198 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "km1", line: 311, scope: !183, file: !5, type: !8)
+!199 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "kp1", line: 311, scope: !183, file: !5, type: !8)
+!200 = !DISubprogram(name: "adi", line: 210, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 210, file: !1, scope: !5, type: !115, variables: !2)
+!201 = !DISubprogram(name: "add", line: 187, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 187, file: !1, scope: !5, type: !115, variables: !202)
!202 = !{!203, !204, !205, !206}
-!203 = !{!"0x100\00i\00193\000", !201, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 193]
-!204 = !{!"0x100\00j\00193\000", !201, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 193]
-!205 = !{!"0x100\00k\00193\000", !201, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 193]
-!206 = !{!"0x100\00m\00193\000", !201, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 193]
-!207 = !{!"0x2e\00z_solve\00z_solve\00\003457\001\001\000\006\00256\001\003457", !1, !5, !115, null, null, null, null, !2} ; [ DW_TAG_subprogram ] [line 3457] [local] [def] [z_solve]
-!208 = !{!"0x2e\00z_backsubstitute\00z_backsubstitute\00\003480\001\001\000\006\00256\001\003480", !1, !5, !115, null, null, null, null, !209} ; [ DW_TAG_subprogram ] [line 3480] [local] [def] [z_backsubstitute]
+!203 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 193, scope: !201, file: !5, type: !8)
+!204 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 193, scope: !201, file: !5, type: !8)
+!205 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 193, scope: !201, file: !5, type: !8)
+!206 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 193, scope: !201, file: !5, type: !8)
+!207 = !DISubprogram(name: "z_solve", line: 3457, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3457, file: !1, scope: !5, type: !115, variables: !2)
+!208 = !DISubprogram(name: "z_backsubstitute", line: 3480, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3480, file: !1, scope: !5, type: !115, variables: !209)
!209 = !{!210, !211, !212, !213, !214}
-!210 = !{!"0x100\00i\003492\000", !208, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 3492]
-!211 = !{!"0x100\00j\003492\000", !208, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 3492]
-!212 = !{!"0x100\00k\003492\000", !208, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 3492]
-!213 = !{!"0x100\00m\003492\000", !208, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 3492]
-!214 = !{!"0x100\00n\003492\000", !208, !5, !8} ; [ DW_TAG_auto_variable ] [n] [line 3492]
-!215 = !{!"0x2e\00z_solve_cell\00z_solve_cell\00\003512\001\001\000\006\00256\001\003512", !1, !5, !115, null, null, null, null, !216} ; [ DW_TAG_subprogram ] [line 3512] [local] [def] [z_solve_cell]
+!210 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3492, scope: !208, file: !5, type: !8)
+!211 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 3492, scope: !208, file: !5, type: !8)
+!212 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 3492, scope: !208, file: !5, type: !8)
+!213 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 3492, scope: !208, file: !5, type: !8)
+!214 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "n", line: 3492, scope: !208, file: !5, type: !8)
+!215 = !DISubprogram(name: "z_solve_cell", line: 3512, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3512, file: !1, scope: !5, type: !115, variables: !216)
!216 = !{!217, !218, !219, !220}
-!217 = !{!"0x100\00i\003527\000", !215, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 3527]
-!218 = !{!"0x100\00j\003527\000", !215, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 3527]
-!219 = !{!"0x100\00k\003527\000", !215, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 3527]
-!220 = !{!"0x100\00ksize\003527\000", !215, !5, !8} ; [ DW_TAG_auto_variable ] [ksize] [line 3527]
-!221 = !{!"0x2e\00binvrhs\00binvrhs\00\003154\001\001\000\006\00256\001\003154", !1, !5, !222, null, null, null, null, !225} ; [ DW_TAG_subprogram ] [line 3154] [local] [def] [binvrhs]
-!222 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !223, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!217 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3527, scope: !215, file: !5, type: !8)
+!218 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 3527, scope: !215, file: !5, type: !8)
+!219 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 3527, scope: !215, file: !5, type: !8)
+!220 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "ksize", line: 3527, scope: !215, file: !5, type: !8)
+!221 = !DISubprogram(name: "binvrhs", line: 3154, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3154, file: !1, scope: !5, type: !222, variables: !225)
+!222 = !DISubroutineType(types: !223)
!223 = !{null, !224, !105}
-!224 = !{!"0xf\00\000\0064\0064\000\000", null, null, !91} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!224 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !91)
!225 = !{!226, !227, !228, !229}
-!226 = !{!"0x101\00lhs\0016780370\000", !221, !5, !224} ; [ DW_TAG_arg_variable ] [lhs] [line 3154]
-!227 = !{!"0x101\00r\0033557586\000", !221, !5, !105} ; [ DW_TAG_arg_variable ] [r] [line 3154]
-!228 = !{!"0x100\00pivot\003159\000", !221, !5, !20} ; [ DW_TAG_auto_variable ] [pivot] [line 3159]
-!229 = !{!"0x100\00coeff\003159\000", !221, !5, !20} ; [ DW_TAG_auto_variable ] [coeff] [line 3159]
-!230 = !{!"0x2e\00matmul_sub\00matmul_sub\00\002841\001\001\000\006\00256\001\002842", !1, !5, !231, null, null, null, null, !233} ; [ DW_TAG_subprogram ] [line 2841] [local] [def] [scope 2842] [matmul_sub]
-!231 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !232, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!226 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "lhs", line: 3154, arg: 1, scope: !221, file: !5, type: !224)
+!227 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "r", line: 3154, arg: 2, scope: !221, file: !5, type: !105)
+!228 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "pivot", line: 3159, scope: !221, file: !5, type: !20)
+!229 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "coeff", line: 3159, scope: !221, file: !5, type: !20)
+!230 = !DISubprogram(name: "matmul_sub", line: 2841, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2842, file: !1, scope: !5, type: !231, variables: !233)
+!231 = !DISubroutineType(types: !232)
!232 = !{null, !224, !224, !224}
!233 = !{!234, !235, !236, !237}
-!234 = !{!"0x101\00ablock\0016780057\000", !230, !5, !224} ; [ DW_TAG_arg_variable ] [ablock] [line 2841]
-!235 = !{!"0x101\00bblock\0033557273\000", !230, !5, !224} ; [ DW_TAG_arg_variable ] [bblock] [line 2841]
-!236 = !{!"0x101\00cblock\0050334490\000", !230, !5, !224} ; [ DW_TAG_arg_variable ] [cblock] [line 2842]
-!237 = !{!"0x100\00j\002851\000", !230, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 2851]
-!238 = !{!"0x2e\00matvec_sub\00matvec_sub\00\002814\001\001\000\006\00256\001\002814", !1, !5, !239, null, null, null, null, !241} ; [ DW_TAG_subprogram ] [line 2814] [local] [def] [matvec_sub]
-!239 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !240, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!234 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ablock", line: 2841, arg: 1, scope: !230, file: !5, type: !224)
+!235 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "bblock", line: 2841, arg: 2, scope: !230, file: !5, type: !224)
+!236 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "cblock", line: 2842, arg: 3, scope: !230, file: !5, type: !224)
+!237 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 2851, scope: !230, file: !5, type: !8)
+!238 = !DISubprogram(name: "matvec_sub", line: 2814, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2814, file: !1, scope: !5, type: !239, variables: !241)
+!239 = !DISubroutineType(types: !240)
!240 = !{null, !224, !105, !105}
!241 = !{!242, !243, !244, !245}
-!242 = !{!"0x101\00ablock\0016780030\000", !238, !5, !224} ; [ DW_TAG_arg_variable ] [ablock] [line 2814]
-!243 = !{!"0x101\00avec\0033557246\000", !238, !5, !105} ; [ DW_TAG_arg_variable ] [avec] [line 2814]
-!244 = !{!"0x101\00bvec\0050334462\000", !238, !5, !105} ; [ DW_TAG_arg_variable ] [bvec] [line 2814]
-!245 = !{!"0x100\00i\002823\000", !238, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 2823]
-!246 = !{!"0x2e\00binvcrhs\00binvcrhs\00\002885\001\001\000\006\00256\001\002885", !1, !5, !247, null, null, null, null, !249} ; [ DW_TAG_subprogram ] [line 2885] [local] [def] [binvcrhs]
-!247 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !248, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!242 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ablock", line: 2814, arg: 1, scope: !238, file: !5, type: !224)
+!243 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "avec", line: 2814, arg: 2, scope: !238, file: !5, type: !105)
+!244 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "bvec", line: 2814, arg: 3, scope: !238, file: !5, type: !105)
+!245 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 2823, scope: !238, file: !5, type: !8)
+!246 = !DISubprogram(name: "binvcrhs", line: 2885, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2885, file: !1, scope: !5, type: !247, variables: !249)
+!247 = !DISubroutineType(types: !248)
!248 = !{null, !224, !224, !105}
!249 = !{!250, !251, !252, !253, !254}
-!250 = !{!"0x101\00lhs\0016780101\000", !246, !5, !224} ; [ DW_TAG_arg_variable ] [lhs] [line 2885]
-!251 = !{!"0x101\00c\0033557317\000", !246, !5, !224} ; [ DW_TAG_arg_variable ] [c] [line 2885]
-!252 = !{!"0x101\00r\0050334533\000", !246, !5, !105} ; [ DW_TAG_arg_variable ] [r] [line 2885]
-!253 = !{!"0x100\00pivot\002890\000", !246, !5, !20} ; [ DW_TAG_auto_variable ] [pivot] [line 2890]
-!254 = !{!"0x100\00coeff\002890\000", !246, !5, !20} ; [ DW_TAG_auto_variable ] [coeff] [line 2890]
-!255 = !{!"0x2e\00lhsz\00lhsz\00\001475\001\001\000\006\00256\001\001475", !1, !5, !115, null, null, null, null, !256} ; [ DW_TAG_subprogram ] [line 1475] [local] [def] [lhsz]
+!250 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "lhs", line: 2885, arg: 1, scope: !246, file: !5, type: !224)
+!251 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 2885, arg: 2, scope: !246, file: !5, type: !224)
+!252 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "r", line: 2885, arg: 3, scope: !246, file: !5, type: !105)
+!253 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "pivot", line: 2890, scope: !246, file: !5, type: !20)
+!254 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "coeff", line: 2890, scope: !246, file: !5, type: !20)
+!255 = !DISubprogram(name: "lhsz", line: 1475, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1475, file: !1, scope: !5, type: !115, variables: !256)
!256 = !{!257, !258, !259}
-!257 = !{!"0x100\00i\001484\000", !255, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 1484]
-!258 = !{!"0x100\00j\001484\000", !255, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 1484]
-!259 = !{!"0x100\00k\001484\000", !255, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 1484]
-!260 = !{!"0x2e\00y_solve\00y_solve\00\003299\001\001\000\006\00256\001\003299", !1, !5, !115, null, null, null, null, !2} ; [ DW_TAG_subprogram ] [line 3299] [local] [def] [y_solve]
-!261 = !{!"0x2e\00y_backsubstitute\00y_backsubstitute\00\003323\001\001\000\006\00256\001\003323", !1, !5, !115, null, null, null, null, !262} ; [ DW_TAG_subprogram ] [line 3323] [local] [def] [y_backsubstitute]
+!257 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 1484, scope: !255, file: !5, type: !8)
+!258 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 1484, scope: !255, file: !5, type: !8)
+!259 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 1484, scope: !255, file: !5, type: !8)
+!260 = !DISubprogram(name: "y_solve", line: 3299, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3299, file: !1, scope: !5, type: !115, variables: !2)
+!261 = !DISubprogram(name: "y_backsubstitute", line: 3323, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3323, file: !1, scope: !5, type: !115, variables: !262)
!262 = !{!263, !264, !265, !266, !267}
-!263 = !{!"0x100\00i\003335\000", !261, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 3335]
-!264 = !{!"0x100\00j\003335\000", !261, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 3335]
-!265 = !{!"0x100\00k\003335\000", !261, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 3335]
-!266 = !{!"0x100\00m\003335\000", !261, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 3335]
-!267 = !{!"0x100\00n\003335\000", !261, !5, !8} ; [ DW_TAG_auto_variable ] [n] [line 3335]
-!268 = !{!"0x2e\00y_solve_cell\00y_solve_cell\00\003355\001\001\000\006\00256\001\003355", !1, !5, !115, null, null, null, null, !269} ; [ DW_TAG_subprogram ] [line 3355] [local] [def] [y_solve_cell]
+!263 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3335, scope: !261, file: !5, type: !8)
+!264 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 3335, scope: !261, file: !5, type: !8)
+!265 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 3335, scope: !261, file: !5, type: !8)
+!266 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 3335, scope: !261, file: !5, type: !8)
+!267 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "n", line: 3335, scope: !261, file: !5, type: !8)
+!268 = !DISubprogram(name: "y_solve_cell", line: 3355, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3355, file: !1, scope: !5, type: !115, variables: !269)
!269 = !{!270, !271, !272, !273}
-!270 = !{!"0x100\00i\003370\000", !268, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 3370]
-!271 = !{!"0x100\00j\003370\000", !268, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 3370]
-!272 = !{!"0x100\00k\003370\000", !268, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 3370]
-!273 = !{!"0x100\00jsize\003370\000", !268, !5, !8} ; [ DW_TAG_auto_variable ] [jsize] [line 3370]
-!274 = !{!"0x2e\00lhsy\00lhsy\00\001181\001\001\000\006\00256\001\001181", !1, !5, !115, null, null, null, null, !275} ; [ DW_TAG_subprogram ] [line 1181] [local] [def] [lhsy]
+!270 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3370, scope: !268, file: !5, type: !8)
+!271 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 3370, scope: !268, file: !5, type: !8)
+!272 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 3370, scope: !268, file: !5, type: !8)
+!273 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "jsize", line: 3370, scope: !268, file: !5, type: !8)
+!274 = !DISubprogram(name: "lhsy", line: 1181, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1181, file: !1, scope: !5, type: !115, variables: !275)
!275 = !{!276, !277, !278}
-!276 = !{!"0x100\00i\001190\000", !274, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 1190]
-!277 = !{!"0x100\00j\001190\000", !274, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 1190]
-!278 = !{!"0x100\00k\001190\000", !274, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 1190]
-!279 = !{!"0x2e\00x_solve\00x_solve\00\002658\001\001\000\006\00256\001\002658", !1, !5, !115, null, null, null, null, !2} ; [ DW_TAG_subprogram ] [line 2658] [local] [def] [x_solve]
-!280 = !{!"0x2e\00x_backsubstitute\00x_backsubstitute\00\002684\001\001\000\006\00256\001\002684", !1, !5, !115, null, null, null, null, !281} ; [ DW_TAG_subprogram ] [line 2684] [local] [def] [x_backsubstitute]
+!276 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 1190, scope: !274, file: !5, type: !8)
+!277 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 1190, scope: !274, file: !5, type: !8)
+!278 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 1190, scope: !274, file: !5, type: !8)
+!279 = !DISubprogram(name: "x_solve", line: 2658, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2658, file: !1, scope: !5, type: !115, variables: !2)
+!280 = !DISubprogram(name: "x_backsubstitute", line: 2684, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2684, file: !1, scope: !5, type: !115, variables: !281)
!281 = !{!282, !283, !284, !285, !286}
-!282 = !{!"0x100\00i\002696\000", !280, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 2696]
-!283 = !{!"0x100\00j\002696\000", !280, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 2696]
-!284 = !{!"0x100\00k\002696\000", !280, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 2696]
-!285 = !{!"0x100\00m\002696\000", !280, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 2696]
-!286 = !{!"0x100\00n\002696\000", !280, !5, !8} ; [ DW_TAG_auto_variable ] [n] [line 2696]
-!287 = !{!"0x2e\00x_solve_cell\00x_solve_cell\00\002716\001\001\000\006\00256\001\002716", !1, !5, !115, null, null, null, null, !288} ; [ DW_TAG_subprogram ] [line 2716] [local] [def] [x_solve_cell]
+!282 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 2696, scope: !280, file: !5, type: !8)
+!283 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 2696, scope: !280, file: !5, type: !8)
+!284 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 2696, scope: !280, file: !5, type: !8)
+!285 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 2696, scope: !280, file: !5, type: !8)
+!286 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "n", line: 2696, scope: !280, file: !5, type: !8)
+!287 = !DISubprogram(name: "x_solve_cell", line: 2716, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2716, file: !1, scope: !5, type: !115, variables: !288)
!288 = !{!289, !290, !291, !292}
-!289 = !{!"0x100\00i\002728\000", !287, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 2728]
-!290 = !{!"0x100\00j\002728\000", !287, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 2728]
-!291 = !{!"0x100\00k\002728\000", !287, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 2728]
-!292 = !{!"0x100\00isize\002728\000", !287, !5, !8} ; [ DW_TAG_auto_variable ] [isize] [line 2728]
-!293 = !{!"0x2e\00lhsx\00lhsx\00\00898\001\001\000\006\00256\001\00898", !1, !5, !115, null, null, null, null, !294} ; [ DW_TAG_subprogram ] [line 898] [local] [def] [lhsx]
+!289 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 2728, scope: !287, file: !5, type: !8)
+!290 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 2728, scope: !287, file: !5, type: !8)
+!291 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 2728, scope: !287, file: !5, type: !8)
+!292 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "isize", line: 2728, scope: !287, file: !5, type: !8)
+!293 = !DISubprogram(name: "lhsx", line: 898, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 898, file: !1, scope: !5, type: !115, variables: !294)
!294 = !{!295, !296, !297}
-!295 = !{!"0x100\00i\00907\000", !293, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 907]
-!296 = !{!"0x100\00j\00907\000", !293, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 907]
-!297 = !{!"0x100\00k\00907\000", !293, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 907]
+!295 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 907, scope: !293, file: !5, type: !8)
+!296 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 907, scope: !293, file: !5, type: !8)
+!297 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 907, scope: !293, file: !5, type: !8)
!298 = !{!299, !304, !305, !309, !310, !311, !312, !313, !314, !315, !316, !317, !318, !319, !320, !321, !322, !323, !324, !325, !326, !327, !328, !329, !330, !331, !332, !333, !334, !335, !336, !337, !338, !339, !340, !341, !342, !343, !347, !350, !351, !352, !353, !354, !355, !356, !360, !361, !362, !363, !364, !365, !366, !367, !368, !369, !370, !371, !372, !373, !374, !375, !376, !377, !378, !379, !380, !381, !382, !383, !384, !385, !386, !387, !388, !389, !390, !391, !392, !393, !394, !395, !396, !397, !398, !399, !400, !401, !402, !403, !404, !405, !406, !407, !408, !409, !410, !411, !412, !413, !414, !415, !416, !417, !418, !419, !422, !426, !427, !430, !431, !434, !435, !436, !437}
-!299 = !{!"0x34\00grid_points\00grid_points\00\0028\001\001", null, !300, !302, [3 x i32]* @grid_points, null} ; [ DW_TAG_variable ] [grid_points] [line 28] [local] [def]
-!300 = !{!"0x29", !301} ; [ DW_TAG_file_type ] [/home/hfinkel/src/NPB2.3-omp-C/BT/./header.h]
+!299 = !DIGlobalVariable(name: "grid_points", line: 28, isLocal: true, isDefinition: true, scope: null, file: !300, type: !302, variable: [3 x i32]* @grid_points)
+!300 = !DIFile(filename: "./header.h", directory: "/home/hfinkel/src/NPB2.3-omp-C/BT")
!301 = !{!"./header.h", !"/home/hfinkel/src/NPB2.3-omp-C/BT"}
-!302 = !{!"0x1\00\000\0096\0032\000\000", null, null, !8, !303, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 96, align 32, offset 0] [from int]
+!302 = !DICompositeType(tag: DW_TAG_array_type, size: 96, align: 32, baseType: !8, elements: !303)
!303 = !{!178}
-!304 = !{!"0x34\00dt\00dt\00\0035\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dt] [line 35] [local] [def]
-!305 = !{!"0x34\00rhs\00rhs\00\0068\001\001", null, !300, !306, null, null} ; [ DW_TAG_variable ] [rhs] [line 68] [local] [def]
-!306 = !{!"0x1\00\000\001385839040\0064\000\000", null, null, !20, !307, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 1385839040, align 64, offset 0] [from double]
+!304 = !DIGlobalVariable(name: "dt", line: 35, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!305 = !DIGlobalVariable(name: "rhs", line: 68, isLocal: true, isDefinition: true, scope: null, file: !300, type: !306)
+!306 = !DICompositeType(tag: DW_TAG_array_type, size: 1385839040, align: 64, baseType: !20, elements: !307)
!307 = !{!308, !308, !308, !93}
-!308 = !{!"0x21\000\00163"} ; [ DW_TAG_subrange_type ] [0, 162]
-!309 = !{!"0x34\00zzcon5\00zzcon5\00\0042\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [zzcon5] [line 42] [local] [def]
-!310 = !{!"0x34\00zzcon4\00zzcon4\00\0042\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [zzcon4] [line 42] [local] [def]
-!311 = !{!"0x34\00zzcon3\00zzcon3\00\0042\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [zzcon3] [line 42] [local] [def]
-!312 = !{!"0x34\00dz5tz1\00dz5tz1\00\0043\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dz5tz1] [line 43] [local] [def]
-!313 = !{!"0x34\00dz4tz1\00dz4tz1\00\0043\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dz4tz1] [line 43] [local] [def]
-!314 = !{!"0x34\00dz3tz1\00dz3tz1\00\0043\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dz3tz1] [line 43] [local] [def]
-!315 = !{!"0x34\00zzcon2\00zzcon2\00\0042\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [zzcon2] [line 42] [local] [def]
-!316 = !{!"0x34\00dz2tz1\00dz2tz1\00\0043\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dz2tz1] [line 43] [local] [def]
-!317 = !{!"0x34\00tz2\00tz2\00\0031\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [tz2] [line 31] [local] [def]
-!318 = !{!"0x34\00dz1tz1\00dz1tz1\00\0043\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dz1tz1] [line 43] [local] [def]
-!319 = !{!"0x34\00yycon5\00yycon5\00\0040\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [yycon5] [line 40] [local] [def]
-!320 = !{!"0x34\00yycon4\00yycon4\00\0040\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [yycon4] [line 40] [local] [def]
-!321 = !{!"0x34\00yycon3\00yycon3\00\0040\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [yycon3] [line 40] [local] [def]
-!322 = !{!"0x34\00dy5ty1\00dy5ty1\00\0041\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dy5ty1] [line 41] [local] [def]
-!323 = !{!"0x34\00dy4ty1\00dy4ty1\00\0041\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dy4ty1] [line 41] [local] [def]
-!324 = !{!"0x34\00dy3ty1\00dy3ty1\00\0041\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dy3ty1] [line 41] [local] [def]
-!325 = !{!"0x34\00yycon2\00yycon2\00\0040\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [yycon2] [line 40] [local] [def]
-!326 = !{!"0x34\00dy2ty1\00dy2ty1\00\0041\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dy2ty1] [line 41] [local] [def]
-!327 = !{!"0x34\00ty2\00ty2\00\0031\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [ty2] [line 31] [local] [def]
-!328 = !{!"0x34\00dy1ty1\00dy1ty1\00\0041\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dy1ty1] [line 41] [local] [def]
-!329 = !{!"0x34\00dssp\00dssp\00\0035\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dssp] [line 35] [local] [def]
-!330 = !{!"0x34\00c1\00c1\00\0045\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c1] [line 45] [local] [def]
-!331 = !{!"0x34\00xxcon5\00xxcon5\00\0038\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [xxcon5] [line 38] [local] [def]
-!332 = !{!"0x34\00xxcon4\00xxcon4\00\0038\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [xxcon4] [line 38] [local] [def]
-!333 = !{!"0x34\00xxcon3\00xxcon3\00\0038\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [xxcon3] [line 38] [local] [def]
-!334 = !{!"0x34\00dx5tx1\00dx5tx1\00\0039\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dx5tx1] [line 39] [local] [def]
-!335 = !{!"0x34\00dx4tx1\00dx4tx1\00\0039\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dx4tx1] [line 39] [local] [def]
-!336 = !{!"0x34\00dx3tx1\00dx3tx1\00\0039\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dx3tx1] [line 39] [local] [def]
-!337 = !{!"0x34\00c2\00c2\00\0045\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c2] [line 45] [local] [def]
-!338 = !{!"0x34\00con43\00con43\00\0048\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [con43] [line 48] [local] [def]
-!339 = !{!"0x34\00xxcon2\00xxcon2\00\0038\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [xxcon2] [line 38] [local] [def]
-!340 = !{!"0x34\00dx2tx1\00dx2tx1\00\0039\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dx2tx1] [line 39] [local] [def]
-!341 = !{!"0x34\00tx2\00tx2\00\0031\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [tx2] [line 31] [local] [def]
-!342 = !{!"0x34\00dx1tx1\00dx1tx1\00\0039\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dx1tx1] [line 39] [local] [def]
-!343 = !{!"0x34\00forcing\00forcing\00\0066\001\001", null, !300, !344, null, null} ; [ DW_TAG_variable ] [forcing] [line 66] [local] [def]
-!344 = !{!"0x1\00\000\001663006848\0064\000\000", null, null, !20, !345, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 1663006848, align 64, offset 0] [from double]
+!308 = !DISubrange(count: 163)
+!309 = !DIGlobalVariable(name: "zzcon5", line: 42, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!310 = !DIGlobalVariable(name: "zzcon4", line: 42, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!311 = !DIGlobalVariable(name: "zzcon3", line: 42, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!312 = !DIGlobalVariable(name: "dz5tz1", line: 43, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!313 = !DIGlobalVariable(name: "dz4tz1", line: 43, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!314 = !DIGlobalVariable(name: "dz3tz1", line: 43, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!315 = !DIGlobalVariable(name: "zzcon2", line: 42, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!316 = !DIGlobalVariable(name: "dz2tz1", line: 43, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!317 = !DIGlobalVariable(name: "tz2", line: 31, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!318 = !DIGlobalVariable(name: "dz1tz1", line: 43, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!319 = !DIGlobalVariable(name: "yycon5", line: 40, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!320 = !DIGlobalVariable(name: "yycon4", line: 40, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!321 = !DIGlobalVariable(name: "yycon3", line: 40, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!322 = !DIGlobalVariable(name: "dy5ty1", line: 41, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!323 = !DIGlobalVariable(name: "dy4ty1", line: 41, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!324 = !DIGlobalVariable(name: "dy3ty1", line: 41, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!325 = !DIGlobalVariable(name: "yycon2", line: 40, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!326 = !DIGlobalVariable(name: "dy2ty1", line: 41, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!327 = !DIGlobalVariable(name: "ty2", line: 31, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!328 = !DIGlobalVariable(name: "dy1ty1", line: 41, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!329 = !DIGlobalVariable(name: "dssp", line: 35, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!330 = !DIGlobalVariable(name: "c1", line: 45, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!331 = !DIGlobalVariable(name: "xxcon5", line: 38, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!332 = !DIGlobalVariable(name: "xxcon4", line: 38, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!333 = !DIGlobalVariable(name: "xxcon3", line: 38, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!334 = !DIGlobalVariable(name: "dx5tx1", line: 39, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!335 = !DIGlobalVariable(name: "dx4tx1", line: 39, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!336 = !DIGlobalVariable(name: "dx3tx1", line: 39, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!337 = !DIGlobalVariable(name: "c2", line: 45, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!338 = !DIGlobalVariable(name: "con43", line: 48, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!339 = !DIGlobalVariable(name: "xxcon2", line: 38, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!340 = !DIGlobalVariable(name: "dx2tx1", line: 39, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!341 = !DIGlobalVariable(name: "tx2", line: 31, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!342 = !DIGlobalVariable(name: "dx1tx1", line: 39, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!343 = !DIGlobalVariable(name: "forcing", line: 66, isLocal: true, isDefinition: true, scope: null, file: !300, type: !344)
+!344 = !DICompositeType(tag: DW_TAG_array_type, size: 1663006848, align: 64, baseType: !20, elements: !345)
!345 = !{!308, !308, !308, !346}
-!346 = !{!"0x21\000\006"} ; [ DW_TAG_subrange_type ] [0, 5]
-!347 = !{!"0x34\00qs\00qs\00\0063\001\001", null, !300, !348, null, null} ; [ DW_TAG_variable ] [qs] [line 63] [local] [def]
-!348 = !{!"0x1\00\000\00277167808\0064\000\000", null, null, !20, !349, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 277167808, align 64, offset 0] [from double]
+!346 = !DISubrange(count: 6)
+!347 = !DIGlobalVariable(name: "qs", line: 63, isLocal: true, isDefinition: true, scope: null, file: !300, type: !348)
+!348 = !DICompositeType(tag: DW_TAG_array_type, size: 277167808, align: 64, baseType: !20, elements: !349)
!349 = !{!308, !308, !308}
-!350 = !{!"0x34\00square\00square\00\0065\001\001", null, !300, !348, null, null} ; [ DW_TAG_variable ] [square] [line 65] [local] [def]
-!351 = !{!"0x34\00ws\00ws\00\0062\001\001", null, !300, !348, null, null} ; [ DW_TAG_variable ] [ws] [line 62] [local] [def]
-!352 = !{!"0x34\00vs\00vs\00\0061\001\001", null, !300, !348, null, null} ; [ DW_TAG_variable ] [vs] [line 61] [local] [def]
-!353 = !{!"0x34\00us\00us\00\0060\001\001", null, !300, !348, null, null} ; [ DW_TAG_variable ] [us] [line 60] [local] [def]
-!354 = !{!"0x34\00rho_i\00rho_i\00\0064\001\001", null, !300, !348, null, null} ; [ DW_TAG_variable ] [rho_i] [line 64] [local] [def]
-!355 = !{!"0x34\00u\00u\00\0067\001\001", null, !300, !306, null, null} ; [ DW_TAG_variable ] [u] [line 67] [local] [def]
-!356 = !{!"0x34\00ce\00ce\00\0036\001\001", null, !300, !357, null, null} ; [ DW_TAG_variable ] [ce] [line 36] [local] [def]
-!357 = !{!"0x1\00\000\004160\0064\000\000", null, null, !20, !358, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 4160, align 64, offset 0] [from double]
+!350 = !DIGlobalVariable(name: "square", line: 65, isLocal: true, isDefinition: true, scope: null, file: !300, type: !348)
+!351 = !DIGlobalVariable(name: "ws", line: 62, isLocal: true, isDefinition: true, scope: null, file: !300, type: !348)
+!352 = !DIGlobalVariable(name: "vs", line: 61, isLocal: true, isDefinition: true, scope: null, file: !300, type: !348)
+!353 = !DIGlobalVariable(name: "us", line: 60, isLocal: true, isDefinition: true, scope: null, file: !300, type: !348)
+!354 = !DIGlobalVariable(name: "rho_i", line: 64, isLocal: true, isDefinition: true, scope: null, file: !300, type: !348)
+!355 = !DIGlobalVariable(name: "u", line: 67, isLocal: true, isDefinition: true, scope: null, file: !300, type: !306)
+!356 = !DIGlobalVariable(name: "ce", line: 36, isLocal: true, isDefinition: true, scope: null, file: !300, type: !357)
+!357 = !DICompositeType(tag: DW_TAG_array_type, size: 4160, align: 64, baseType: !20, elements: !358)
!358 = !{!93, !359}
-!359 = !{!"0x21\000\0013"} ; [ DW_TAG_subrange_type ] [0, 12]
-!360 = !{!"0x34\00dnzm1\00dnzm1\00\0044\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dnzm1] [line 44] [local] [def]
-!361 = !{!"0x34\00dnym1\00dnym1\00\0044\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dnym1] [line 44] [local] [def]
-!362 = !{!"0x34\00dnxm1\00dnxm1\00\0044\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dnxm1] [line 44] [local] [def]
-!363 = !{!"0x34\00zzcon1\00zzcon1\00\0042\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [zzcon1] [line 42] [local] [def]
-!364 = !{!"0x34\00yycon1\00yycon1\00\0040\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [yycon1] [line 40] [local] [def]
-!365 = !{!"0x34\00xxcon1\00xxcon1\00\0038\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [xxcon1] [line 38] [local] [def]
-!366 = !{!"0x34\00con16\00con16\00\0048\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [con16] [line 48] [local] [def]
-!367 = !{!"0x34\00c2iv\00c2iv\00\0048\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c2iv] [line 48] [local] [def]
-!368 = !{!"0x34\00c3c4tz3\00c3c4tz3\00\0048\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c3c4tz3] [line 48] [local] [def]
-!369 = !{!"0x34\00c3c4ty3\00c3c4ty3\00\0048\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c3c4ty3] [line 48] [local] [def]
-!370 = !{!"0x34\00c3c4tx3\00c3c4tx3\00\0048\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c3c4tx3] [line 48] [local] [def]
-!371 = !{!"0x34\00comz6\00comz6\00\0047\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [comz6] [line 47] [local] [def]
-!372 = !{!"0x34\00comz5\00comz5\00\0047\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [comz5] [line 47] [local] [def]
-!373 = !{!"0x34\00comz4\00comz4\00\0047\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [comz4] [line 47] [local] [def]
-!374 = !{!"0x34\00comz1\00comz1\00\0047\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [comz1] [line 47] [local] [def]
-!375 = !{!"0x34\00dtdssp\00dtdssp\00\0045\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dtdssp] [line 45] [local] [def]
-!376 = !{!"0x34\00c2dttz1\00c2dttz1\00\0047\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c2dttz1] [line 47] [local] [def]
-!377 = !{!"0x34\00c2dtty1\00c2dtty1\00\0047\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c2dtty1] [line 47] [local] [def]
-!378 = !{!"0x34\00c2dttx1\00c2dttx1\00\0047\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c2dttx1] [line 47] [local] [def]
-!379 = !{!"0x34\00dttz2\00dttz2\00\0046\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dttz2] [line 46] [local] [def]
-!380 = !{!"0x34\00dttz1\00dttz1\00\0046\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dttz1] [line 46] [local] [def]
-!381 = !{!"0x34\00dtty2\00dtty2\00\0046\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dtty2] [line 46] [local] [def]
-!382 = !{!"0x34\00dtty1\00dtty1\00\0046\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dtty1] [line 46] [local] [def]
-!383 = !{!"0x34\00dttx2\00dttx2\00\0046\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dttx2] [line 46] [local] [def]
-!384 = !{!"0x34\00dttx1\00dttx1\00\0046\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dttx1] [line 46] [local] [def]
-!385 = !{!"0x34\00c5dssp\00c5dssp\00\0045\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c5dssp] [line 45] [local] [def]
-!386 = !{!"0x34\00c4dssp\00c4dssp\00\0045\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c4dssp] [line 45] [local] [def]
-!387 = !{!"0x34\00dzmax\00dzmax\00\0037\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dzmax] [line 37] [local] [def]
-!388 = !{!"0x34\00dymax\00dymax\00\0037\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dymax] [line 37] [local] [def]
-!389 = !{!"0x34\00dxmax\00dxmax\00\0037\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dxmax] [line 37] [local] [def]
-!390 = !{!"0x34\00dz5\00dz5\00\0034\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dz5] [line 34] [local] [def]
-!391 = !{!"0x34\00dz4\00dz4\00\0034\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dz4] [line 34] [local] [def]
-!392 = !{!"0x34\00dz3\00dz3\00\0034\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dz3] [line 34] [local] [def]
-!393 = !{!"0x34\00dz2\00dz2\00\0034\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dz2] [line 34] [local] [def]
-!394 = !{!"0x34\00dz1\00dz1\00\0034\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dz1] [line 34] [local] [def]
-!395 = !{!"0x34\00dy5\00dy5\00\0033\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dy5] [line 33] [local] [def]
-!396 = !{!"0x34\00dy4\00dy4\00\0033\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dy4] [line 33] [local] [def]
-!397 = !{!"0x34\00dy3\00dy3\00\0033\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dy3] [line 33] [local] [def]
-!398 = !{!"0x34\00dy2\00dy2\00\0033\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dy2] [line 33] [local] [def]
-!399 = !{!"0x34\00dy1\00dy1\00\0033\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dy1] [line 33] [local] [def]
-!400 = !{!"0x34\00dx5\00dx5\00\0032\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dx5] [line 32] [local] [def]
-!401 = !{!"0x34\00dx4\00dx4\00\0032\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dx4] [line 32] [local] [def]
-!402 = !{!"0x34\00dx3\00dx3\00\0032\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dx3] [line 32] [local] [def]
-!403 = !{!"0x34\00dx2\00dx2\00\0032\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dx2] [line 32] [local] [def]
-!404 = !{!"0x34\00dx1\00dx1\00\0032\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dx1] [line 32] [local] [def]
-!405 = !{!"0x34\00tz3\00tz3\00\0031\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [tz3] [line 31] [local] [def]
-!406 = !{!"0x34\00tz1\00tz1\00\0031\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [tz1] [line 31] [local] [def]
-!407 = !{!"0x34\00ty3\00ty3\00\0031\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [ty3] [line 31] [local] [def]
-!408 = !{!"0x34\00ty1\00ty1\00\0031\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [ty1] [line 31] [local] [def]
-!409 = !{!"0x34\00tx3\00tx3\00\0031\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [tx3] [line 31] [local] [def]
-!410 = !{!"0x34\00tx1\00tx1\00\0031\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [tx1] [line 31] [local] [def]
-!411 = !{!"0x34\00conz1\00conz1\00\0045\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [conz1] [line 45] [local] [def]
-!412 = !{!"0x34\00c1345\00c1345\00\0044\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c1345] [line 44] [local] [def]
-!413 = !{!"0x34\00c3c4\00c3c4\00\0044\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c3c4] [line 44] [local] [def]
-!414 = !{!"0x34\00c1c5\00c1c5\00\0044\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c1c5] [line 44] [local] [def]
-!415 = !{!"0x34\00c1c2\00c1c2\00\0044\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c1c2] [line 44] [local] [def]
-!416 = !{!"0x34\00c5\00c5\00\0045\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c5] [line 45] [local] [def]
-!417 = !{!"0x34\00c4\00c4\00\0045\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c4] [line 45] [local] [def]
-!418 = !{!"0x34\00c3\00c3\00\0045\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c3] [line 45] [local] [def]
-!419 = !{!"0x34\00lhs\00lhs\00\0069\001\001", null, !300, !420, null, null} ; [ DW_TAG_variable ] [lhs] [line 69] [local] [def]
-!420 = !{!"0x1\00\000\0020787585600\0064\000\000", null, null, !20, !421, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 20787585600, align 64, offset 0] [from double]
+!359 = !DISubrange(count: 13)
+!360 = !DIGlobalVariable(name: "dnzm1", line: 44, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!361 = !DIGlobalVariable(name: "dnym1", line: 44, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!362 = !DIGlobalVariable(name: "dnxm1", line: 44, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!363 = !DIGlobalVariable(name: "zzcon1", line: 42, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!364 = !DIGlobalVariable(name: "yycon1", line: 40, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!365 = !DIGlobalVariable(name: "xxcon1", line: 38, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!366 = !DIGlobalVariable(name: "con16", line: 48, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!367 = !DIGlobalVariable(name: "c2iv", line: 48, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!368 = !DIGlobalVariable(name: "c3c4tz3", line: 48, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!369 = !DIGlobalVariable(name: "c3c4ty3", line: 48, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!370 = !DIGlobalVariable(name: "c3c4tx3", line: 48, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!371 = !DIGlobalVariable(name: "comz6", line: 47, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!372 = !DIGlobalVariable(name: "comz5", line: 47, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!373 = !DIGlobalVariable(name: "comz4", line: 47, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!374 = !DIGlobalVariable(name: "comz1", line: 47, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!375 = !DIGlobalVariable(name: "dtdssp", line: 45, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!376 = !DIGlobalVariable(name: "c2dttz1", line: 47, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!377 = !DIGlobalVariable(name: "c2dtty1", line: 47, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!378 = !DIGlobalVariable(name: "c2dttx1", line: 47, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!379 = !DIGlobalVariable(name: "dttz2", line: 46, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!380 = !DIGlobalVariable(name: "dttz1", line: 46, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!381 = !DIGlobalVariable(name: "dtty2", line: 46, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!382 = !DIGlobalVariable(name: "dtty1", line: 46, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!383 = !DIGlobalVariable(name: "dttx2", line: 46, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!384 = !DIGlobalVariable(name: "dttx1", line: 46, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!385 = !DIGlobalVariable(name: "c5dssp", line: 45, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!386 = !DIGlobalVariable(name: "c4dssp", line: 45, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!387 = !DIGlobalVariable(name: "dzmax", line: 37, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!388 = !DIGlobalVariable(name: "dymax", line: 37, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!389 = !DIGlobalVariable(name: "dxmax", line: 37, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!390 = !DIGlobalVariable(name: "dz5", line: 34, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!391 = !DIGlobalVariable(name: "dz4", line: 34, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!392 = !DIGlobalVariable(name: "dz3", line: 34, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!393 = !DIGlobalVariable(name: "dz2", line: 34, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!394 = !DIGlobalVariable(name: "dz1", line: 34, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!395 = !DIGlobalVariable(name: "dy5", line: 33, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!396 = !DIGlobalVariable(name: "dy4", line: 33, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!397 = !DIGlobalVariable(name: "dy3", line: 33, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!398 = !DIGlobalVariable(name: "dy2", line: 33, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!399 = !DIGlobalVariable(name: "dy1", line: 33, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!400 = !DIGlobalVariable(name: "dx5", line: 32, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!401 = !DIGlobalVariable(name: "dx4", line: 32, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!402 = !DIGlobalVariable(name: "dx3", line: 32, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!403 = !DIGlobalVariable(name: "dx2", line: 32, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!404 = !DIGlobalVariable(name: "dx1", line: 32, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!405 = !DIGlobalVariable(name: "tz3", line: 31, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!406 = !DIGlobalVariable(name: "tz1", line: 31, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!407 = !DIGlobalVariable(name: "ty3", line: 31, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!408 = !DIGlobalVariable(name: "ty1", line: 31, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!409 = !DIGlobalVariable(name: "tx3", line: 31, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!410 = !DIGlobalVariable(name: "tx1", line: 31, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!411 = !DIGlobalVariable(name: "conz1", line: 45, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!412 = !DIGlobalVariable(name: "c1345", line: 44, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!413 = !DIGlobalVariable(name: "c3c4", line: 44, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!414 = !DIGlobalVariable(name: "c1c5", line: 44, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!415 = !DIGlobalVariable(name: "c1c2", line: 44, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!416 = !DIGlobalVariable(name: "c5", line: 45, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!417 = !DIGlobalVariable(name: "c4", line: 45, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!418 = !DIGlobalVariable(name: "c3", line: 45, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!419 = !DIGlobalVariable(name: "lhs", line: 69, isLocal: true, isDefinition: true, scope: null, file: !300, type: !420)
+!420 = !DICompositeType(tag: DW_TAG_array_type, size: 20787585600, align: 64, baseType: !20, elements: !421)
!421 = !{!308, !308, !308, !178, !93, !93}
-!422 = !{!"0x34\00q\00q\00\0073\001\001", null, !300, !423, null, null} ; [ DW_TAG_variable ] [q] [line 73] [local] [def]
-!423 = !{!"0x1\00\000\0010368\0064\000\000", null, null, !20, !424, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 10368, align 64, offset 0] [from double]
+!422 = !DIGlobalVariable(name: "q", line: 73, isLocal: true, isDefinition: true, scope: null, file: !300, type: !423)
+!423 = !DICompositeType(tag: DW_TAG_array_type, size: 10368, align: 64, baseType: !20, elements: !424)
!424 = !{!425}
-!425 = !{!"0x21\000\00162"} ; [ DW_TAG_subrange_type ] [0, 161]
-!426 = !{!"0x34\00cuf\00cuf\00\0072\001\001", null, !300, !423, null, null} ; [ DW_TAG_variable ] [cuf] [line 72] [local] [def]
-!427 = !{!"0x34\00buf\00buf\00\0075\001\001", null, !300, !428, null, null} ; [ DW_TAG_variable ] [buf] [line 75] [local] [def]
-!428 = !{!"0x1\00\000\0051840\0064\000\000", null, null, !20, !429, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 51840, align 64, offset 0] [from double]
+!425 = !DISubrange(count: 162)
+!426 = !DIGlobalVariable(name: "cuf", line: 72, isLocal: true, isDefinition: true, scope: null, file: !300, type: !423)
+!427 = !DIGlobalVariable(name: "buf", line: 75, isLocal: true, isDefinition: true, scope: null, file: !300, type: !428)
+!428 = !DICompositeType(tag: DW_TAG_array_type, size: 51840, align: 64, baseType: !20, elements: !429)
!429 = !{!425, !93}
-!430 = !{!"0x34\00ue\00ue\00\0074\001\001", null, !300, !428, null, null} ; [ DW_TAG_variable ] [ue] [line 74] [local] [def]
-!431 = !{!"0x34\00njac\00njac\00\0086\001\001", null, !300, !432, null, null} ; [ DW_TAG_variable ] [njac] [line 86] [local] [def]
-!432 = !{!"0x1\00\000\006886684800\0064\000\000", null, null, !20, !433, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 6886684800, align 64, offset 0] [from double]
+!430 = !DIGlobalVariable(name: "ue", line: 74, isLocal: true, isDefinition: true, scope: null, file: !300, type: !428)
+!431 = !DIGlobalVariable(name: "njac", line: 86, isLocal: true, isDefinition: true, scope: null, file: !300, type: !432)
+!432 = !DICompositeType(tag: DW_TAG_array_type, size: 6886684800, align: 64, baseType: !20, elements: !433)
!433 = !{!308, !308, !425, !93, !93}
-!434 = !{!"0x34\00fjac\00fjac\00\0084\001\001", null, !300, !432, null, null} ; [ DW_TAG_variable ] [fjac] [line 84] [local] [def]
-!435 = !{!"0x34\00tmp3\00tmp3\00\0088\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [tmp3] [line 88] [local] [def]
-!436 = !{!"0x34\00tmp2\00tmp2\00\0088\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [tmp2] [line 88] [local] [def]
-!437 = !{!"0x34\00tmp1\00tmp1\00\0088\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [tmp1] [line 88] [local] [def]
+!434 = !DIGlobalVariable(name: "fjac", line: 84, isLocal: true, isDefinition: true, scope: null, file: !300, type: !432)
+!435 = !DIGlobalVariable(name: "tmp3", line: 88, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!436 = !DIGlobalVariable(name: "tmp2", line: 88, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
+!437 = !DIGlobalVariable(name: "tmp1", line: 88, isLocal: true, isDefinition: true, scope: null, file: !300, type: !20)
!438 = !{i32 2, !"Dwarf Version", i32 4}
-!439 = !MDLocation(line: 1898, scope: !440)
-!440 = !{!"0xb\001898\000\00107", !1, !114} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!441 = !MDLocation(line: 1913, scope: !442)
-!442 = !{!"0xb\001913\000\00115", !1, !114} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!443 = !MDLocation(line: 1923, scope: !114)
+!439 = !DILocation(line: 1898, scope: !440)
+!440 = distinct !DILexicalBlock(line: 1898, column: 0, file: !1, scope: !114)
+!441 = !DILocation(line: 1913, scope: !442)
+!442 = distinct !DILexicalBlock(line: 1913, column: 0, file: !1, scope: !114)
+!443 = !DILocation(line: 1923, scope: !114)
!444 = !{!"int", !445}
!445 = !{!"omnipotent char", !446}
!446 = !{!"Simple C/C++ TBAA"}
!447 = !{i32 1}
-!448 = !MDLocation(line: 1925, scope: !449)
-!449 = !{!"0xb\001925\000\00121", !1, !114} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!450 = !MDLocation(line: 1939, scope: !451)
-!451 = !{!"0xb\001939\000\00127", !1, !114} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!452 = !MDLocation(line: 1940, scope: !453)
-!453 = !{!"0xb\001940\000\00129", !1, !454} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!454 = !{!"0xb\001939\000\00128", !1, !451} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!455 = !MDLocation(line: 1941, scope: !456)
-!456 = !{!"0xb\001941\000\00131", !1, !457} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!457 = !{!"0xb\001940\000\00130", !1, !453} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!458 = !MDLocation(line: 2020, scope: !459)
-!459 = !{!"0xb\002020\000\00149", !1, !460} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!460 = !{!"0xb\002019\000\00148", !1, !461} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!461 = !{!"0xb\002019\000\00147", !1, !462} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!462 = !{!"0xb\002018\000\00146", !1, !463} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!463 = !{!"0xb\002018\000\00145", !1, !114} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!464 = !{i32 1, !"Debug Info Version", i32 2}
+!448 = !DILocation(line: 1925, scope: !449)
+!449 = distinct !DILexicalBlock(line: 1925, column: 0, file: !1, scope: !114)
+!450 = !DILocation(line: 1939, scope: !451)
+!451 = distinct !DILexicalBlock(line: 1939, column: 0, file: !1, scope: !114)
+!452 = !DILocation(line: 1940, scope: !453)
+!453 = distinct !DILexicalBlock(line: 1940, column: 0, file: !1, scope: !454)
+!454 = distinct !DILexicalBlock(line: 1939, column: 0, file: !1, scope: !451)
+!455 = !DILocation(line: 1941, scope: !456)
+!456 = distinct !DILexicalBlock(line: 1941, column: 0, file: !1, scope: !457)
+!457 = distinct !DILexicalBlock(line: 1940, column: 0, file: !1, scope: !453)
+!458 = !DILocation(line: 2020, scope: !459)
+!459 = distinct !DILexicalBlock(line: 2020, column: 0, file: !1, scope: !460)
+!460 = distinct !DILexicalBlock(line: 2019, column: 0, file: !1, scope: !461)
+!461 = distinct !DILexicalBlock(line: 2019, column: 0, file: !1, scope: !462)
+!462 = distinct !DILexicalBlock(line: 2018, column: 0, file: !1, scope: !463)
+!463 = distinct !DILexicalBlock(line: 2018, column: 0, file: !1, scope: !114)
+!464 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/PowerPC/pr17354.ll b/test/CodeGen/PowerPC/pr17354.ll
index dca81b1c2ca6..ed6fd3480dd4 100644
--- a/test/CodeGen/PowerPC/pr17354.ll
+++ b/test/CodeGen/PowerPC/pr17354.ll
@@ -14,7 +14,7 @@ target triple = "powerpc64-unknown-linux-gnu"
define internal void @__cxx_global_var_init() section ".text.startup" {
entry:
- call void @_Z4funcv(%struct.CS* sret getelementptr inbounds ([1 x %struct.CS]* @_ZL3glb, i64 0, i64 0))
+ call void @_Z4funcv(%struct.CS* sret getelementptr inbounds ([1 x %struct.CS], [1 x %struct.CS]* @_ZL3glb, i64 0, i64 0))
ret void
}
@@ -25,7 +25,7 @@ entry:
; Function Attrs: nounwind
define void @_Z4funcv(%struct.CS* noalias sret %agg.result) #0 {
entry:
- %a_ = getelementptr inbounds %struct.CS* %agg.result, i32 0, i32 0
+ %a_ = getelementptr inbounds %struct.CS, %struct.CS* %agg.result, i32 0, i32 0
store i32 0, i32* %a_, align 4
ret void
}
diff --git a/test/CodeGen/PowerPC/pr18663.ll b/test/CodeGen/PowerPC/pr18663.ll
index 1b85223aa09a..04bc39276f90 100644
--- a/test/CodeGen/PowerPC/pr18663.ll
+++ b/test/CodeGen/PowerPC/pr18663.ll
@@ -61,21 +61,21 @@
define void @_ZNK18TriaObjectAccessorILi3ELi3EE10barycenterEv(%class.Point.1* noalias nocapture sret %agg.result, %class.TriaObjectAccessor.57* %this) #0 align 2 {
entry:
- %0 = load double* null, align 8
- %1 = load double* undef, align 8
+ %0 = load double, double* null, align 8
+ %1 = load double, double* undef, align 8
%call18 = tail call dereferenceable(24) %class.Point.1* @_ZNK18TriaObjectAccessorILi3ELi3EE6vertexEj(%class.TriaObjectAccessor.57* %this, i32 zeroext 6)
- %2 = load double* undef, align 8
+ %2 = load double, double* undef, align 8
%call21 = tail call dereferenceable(24) %class.Point.1* @_ZNK18TriaObjectAccessorILi3ELi3EE6vertexEj(%class.TriaObjectAccessor.57* %this, i32 zeroext 7)
- %3 = load double* undef, align 8
+ %3 = load double, double* undef, align 8
%call33 = tail call dereferenceable(24) %class.Point.1* @_ZNK18TriaObjectAccessorILi3ELi3EE6vertexEj(%class.TriaObjectAccessor.57* %this, i32 zeroext 3)
- %4 = load double* null, align 8
- %5 = load double* undef, align 8
+ %4 = load double, double* null, align 8
+ %5 = load double, double* undef, align 8
%call45 = tail call dereferenceable(24) %class.Point.1* @_ZNK18TriaObjectAccessorILi3ELi3EE6vertexEj(%class.TriaObjectAccessor.57* %this, i32 zeroext 7)
- %6 = load double* undef, align 8
+ %6 = load double, double* undef, align 8
%call48 = tail call dereferenceable(24) %class.Point.1* @_ZNK18TriaObjectAccessorILi3ELi3EE6vertexEj(%class.TriaObjectAccessor.57* %this, i32 zeroext 0)
- %7 = load double* undef, align 8
+ %7 = load double, double* undef, align 8
%call66 = tail call dereferenceable(24) %class.Point.1* @_ZNK18TriaObjectAccessorILi3ELi3EE6vertexEj(%class.TriaObjectAccessor.57* %this, i32 zeroext 6)
- %8 = load double* undef, align 8
+ %8 = load double, double* undef, align 8
%mul334 = fmul double undef, 2.000000e+00
%mul579 = fmul double %2, %5
%mul597 = fmul double undef, %mul579
diff --git a/test/CodeGen/PowerPC/pr20442.ll b/test/CodeGen/PowerPC/pr20442.ll
index ad43a04e70c4..555c3dae1f47 100644
--- a/test/CodeGen/PowerPC/pr20442.ll
+++ b/test/CodeGen/PowerPC/pr20442.ll
@@ -20,15 +20,15 @@ target triple = "powerpc-unknown-linux-gnu"
; Function Attrs: nounwind readonly uwtable
define i32 @fn1() #0 {
entry:
- %0 = load %struct.anon** @b, align 4
+ %0 = load %struct.anon*, %struct.anon** @b, align 4
%1 = ptrtoint %struct.anon* %0 to i32
%cmp = icmp sgt %struct.anon* %0, null
- %2 = load %struct.anon.0** @a, align 4
+ %2 = load %struct.anon.0*, %struct.anon.0** @a, align 4
br i1 %cmp, label %for.bodythread-pre-split, label %if.end8
for.bodythread-pre-split: ; preds = %entry
- %aclass = getelementptr inbounds %struct.anon.0* %2, i32 0, i32 0
- %.pr = load i32* %aclass, align 4
+ %aclass = getelementptr inbounds %struct.anon.0, %struct.anon.0* %2, i32 0, i32 0
+ %.pr = load i32, i32* %aclass, align 4
br label %for.body
for.body: ; preds = %for.bodythread-pre-split, %for.body
@@ -51,10 +51,10 @@ while.cond: ; preds = %while.body
while.body: ; preds = %while.body.lr.ph, %while.cond
%j.110 = phi i32 [ %j.1.ph13, %while.body.lr.ph ], [ %inc7, %while.cond ]
- %aclass_index = getelementptr inbounds %struct.anon* %0, i32 %j.110, i32 0
- %3 = load i32* %aclass_index, align 4
- %aclass5 = getelementptr inbounds %struct.anon.0* %2, i32 %3, i32 0
- %4 = load i32* %aclass5, align 4
+ %aclass_index = getelementptr inbounds %struct.anon, %struct.anon* %0, i32 %j.110, i32 0
+ %3 = load i32, i32* %aclass_index, align 4
+ %aclass5 = getelementptr inbounds %struct.anon.0, %struct.anon.0* %2, i32 %3, i32 0
+ %4 = load i32, i32* %aclass5, align 4
%tobool = icmp eq i32 %4, 0
%inc7 = add nsw i32 %j.110, 1
br i1 %tobool, label %while.cond, label %if.then6
diff --git a/test/CodeGen/PowerPC/pr22711.ll b/test/CodeGen/PowerPC/pr22711.ll
new file mode 100644
index 000000000000..fb1e971d4416
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr22711.ll
@@ -0,0 +1,78 @@
+; Verify that the .toc section is aligned on an 8-byte boundary.
+
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -filetype=obj -o - | llvm-readobj --sections | FileCheck %s
+
+define void @test(i32* %a) {
+entry:
+ %a.addr = alloca i32*, align 8
+ store i32* %a, i32** %a.addr, align 8
+ %0 = load i32*, i32** %a.addr, align 8
+ %incdec.ptr = getelementptr inbounds i32, i32* %0, i32 1
+ store i32* %incdec.ptr, i32** %a.addr, align 8
+ %1 = load i32, i32* %0, align 4
+ switch i32 %1, label %sw.epilog [
+ i32 17, label %sw.bb
+ i32 13, label %sw.bb1
+ i32 11, label %sw.bb2
+ i32 7, label %sw.bb3
+ i32 5, label %sw.bb4
+ i32 3, label %sw.bb5
+ i32 2, label %sw.bb6
+ ]
+
+sw.bb: ; preds = %entry
+ %2 = load i32*, i32** %a.addr, align 8
+ store i32 2, i32* %2, align 4
+ br label %sw.epilog
+
+sw.bb1: ; preds = %entry
+ %3 = load i32*, i32** %a.addr, align 8
+ store i32 3, i32* %3, align 4
+ br label %sw.epilog
+
+sw.bb2: ; preds = %entry
+ %4 = load i32*, i32** %a.addr, align 8
+ store i32 5, i32* %4, align 4
+ br label %sw.epilog
+
+sw.bb3: ; preds = %entry
+ %5 = load i32*, i32** %a.addr, align 8
+ store i32 7, i32* %5, align 4
+ br label %sw.epilog
+
+sw.bb4: ; preds = %entry
+ %6 = load i32*, i32** %a.addr, align 8
+ store i32 11, i32* %6, align 4
+ br label %sw.epilog
+
+sw.bb5: ; preds = %entry
+ %7 = load i32*, i32** %a.addr, align 8
+ store i32 13, i32* %7, align 4
+ br label %sw.epilog
+
+sw.bb6: ; preds = %entry
+ %8 = load i32*, i32** %a.addr, align 8
+ store i32 17, i32* %8, align 4
+ br label %sw.epilog
+
+sw.epilog: ; preds = %entry, %sw.bb6, %sw.bb5, %sw.bb4, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
+ ret void
+}
+
+; CHECK: Name: .toc
+; CHECK: AddressAlignment: 8
+; CHECK: Name: .rela.toc
+
+; This test was generated from the following from PR22711:
+
+;void test(int *a) {
+; switch (*a++) {
+; case 17: *a = 2; break;
+; case 13: *a = 3; break;
+; case 11: *a = 5; break;
+; case 7: *a = 7; break;
+; case 5: *a = 11; break;
+; case 3: *a = 13; break;
+; case 2: *a = 17; break;
+; }
+;}
diff --git a/test/CodeGen/PowerPC/preinc-ld-sel-crash.ll b/test/CodeGen/PowerPC/preinc-ld-sel-crash.ll
new file mode 100644
index 000000000000..cb2f035e5662
--- /dev/null
+++ b/test/CodeGen/PowerPC/preinc-ld-sel-crash.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+%t1 = type { %t2*, %t3* }
+%t2 = type <{ %t3*, i32, [4 x i8] }>
+%t3 = type { %t3* }
+
+@_ZN4Foam10SLListBase13endConstIter_E = external global %t1
+
+define void @_ZN4FoamrsIbEERNS_7IstreamES2_RNS_4ListIT_EE() #0 {
+entry:
+ switch i32 undef, label %if.else82 [
+ i32 9, label %if.then
+ i32 6, label %invoke.cont10
+ i32 1, label %invoke.cont61
+ ]
+
+if.then: ; preds = %entry
+ unreachable
+
+invoke.cont10: ; preds = %entry
+ unreachable
+
+invoke.cont61: ; preds = %entry
+ br i1 undef, label %if.end75, label %if.then64
+
+if.then64: ; preds = %invoke.cont61
+ unreachable
+
+if.end75: ; preds = %invoke.cont61
+ br i1 undef, label %if.then17.i, label %if.then.i181
+
+if.then.i181: ; preds = %if.end75
+ unreachable
+
+if.then17.i: ; preds = %if.end75
+ %tobool.i.i.i = icmp eq i32 undef, 0
+ %0 = load i64*, i64** undef, align 8
+ %agg.tmp.sroa.3.0.copyload33.in.i = select i1 %tobool.i.i.i, i64* bitcast (%t3** getelementptr inbounds (%t1, %t1* @_ZN4Foam10SLListBase13endConstIter_E, i64 0, i32 1) to i64*), i64* %0
+ %agg.tmp.sroa.3.0.copyload33.i = load i64, i64* %agg.tmp.sroa.3.0.copyload33.in.i, align 8
+ %1 = inttoptr i64 %agg.tmp.sroa.3.0.copyload33.i to %t3*
+ %2 = load %t3*, %t3** getelementptr inbounds (%t1, %t1* @_ZN4Foam10SLListBase13endConstIter_E, i64 0, i32 1), align 8
+ %cmp.i37.i = icmp eq %t3* %1, %2
+ br i1 %cmp.i37.i, label %invoke.cont79, label %for.body.lr.ph.i
+
+; CHECK-LABEL: @_ZN4FoamrsIbEERNS_7IstreamES2_RNS_4ListIT_EE
+
+for.body.lr.ph.i: ; preds = %if.then17.i
+ br label %for.body.i
+
+for.body.i: ; preds = %for.body.i, %for.body.lr.ph.i
+ br i1 undef, label %invoke.cont79, label %for.body.i
+
+invoke.cont79: ; preds = %for.body.i, %if.then17.i
+ unreachable
+
+if.else82: ; preds = %entry
+ ret void
+}
+
+attributes #0 = { "target-cpu"="a2q" }
+
diff --git a/test/CodeGen/PowerPC/preincprep-invoke.ll b/test/CodeGen/PowerPC/preincprep-invoke.ll
new file mode 100644
index 000000000000..0e09ff1b774a
--- /dev/null
+++ b/test/CodeGen/PowerPC/preincprep-invoke.ll
@@ -0,0 +1,50 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@.str1 = external unnamed_addr constant [1 x i8], align 1
+@.str2 = external unnamed_addr constant [39 x i8], align 1
+
+declare void @_ZN13CStdOutStreamlsEPKc()
+
+declare void @_ZN13CStdOutStream5FlushEv()
+
+declare i32 @__gxx_personality_v0(...)
+
+define void @_Z11GetPasswordP13CStdOutStreamb() {
+entry:
+ br label %for.cond.i.i
+
+for.cond.i.i: ; preds = %for.cond.i.i, %entry
+ br i1 undef, label %_ZN11CStringBaseIcEC2EPKc.exit.critedge, label %for.cond.i.i
+
+_ZN11CStringBaseIcEC2EPKc.exit.critedge: ; preds = %for.cond.i.i
+ invoke void @_ZN13CStdOutStreamlsEPKc()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %_ZN11CStringBaseIcEC2EPKc.exit.critedge
+ invoke void @_ZN13CStdOutStream5FlushEv()
+ to label %invoke.cont4 unwind label %lpad
+
+invoke.cont4: ; preds = %invoke.cont
+ %call7 = invoke i8* @getpass()
+ to label %for.cond.i.i30 unwind label %lpad
+
+; CHECK-LABEL: @_Z11GetPasswordP13CStdOutStreamb
+; CHECK: addi {{[0-9]+}}, 3, -1
+
+for.cond.i.i30: ; preds = %for.cond.i.i30, %invoke.cont4
+ %indvars.iv.i.i26 = phi i64 [ %indvars.iv.next.i.i29, %for.cond.i.i30 ], [ 0, %invoke.cont4 ]
+ %arrayidx.i.i27 = getelementptr inbounds i8, i8* %call7, i64 %indvars.iv.i.i26
+ %0 = load i8, i8* %arrayidx.i.i27, align 1
+ %indvars.iv.next.i.i29 = add nuw nsw i64 %indvars.iv.i.i26, 1
+ br label %for.cond.i.i30
+
+lpad: ; preds = %invoke.cont4, %invoke.cont, %_ZN11CStringBaseIcEC2EPKc.exit.critedge
+ %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ resume { i8*, i32 } undef
+}
+
+declare i8* @getpass()
+
diff --git a/test/CodeGen/PowerPC/private.ll b/test/CodeGen/PowerPC/private.ll
index 633fa651037f..4665fd246f33 100644
--- a/test/CodeGen/PowerPC/private.ll
+++ b/test/CodeGen/PowerPC/private.ll
@@ -19,7 +19,7 @@ define i32 @bar() nounwind {
; LINUX: lis{{.*}}.Lbaz
; OSX: lis{{.*}}l_baz
- %1 = load i32* @baz, align 4
+ %1 = load i32, i32* @baz, align 4
ret i32 %1
}
diff --git a/test/CodeGen/PowerPC/pwr7-gt-nop.ll b/test/CodeGen/PowerPC/pwr7-gt-nop.ll
index 8c8545d60df7..70f6dad362bf 100644
--- a/test/CodeGen/PowerPC/pwr7-gt-nop.ll
+++ b/test/CodeGen/PowerPC/pwr7-gt-nop.ll
@@ -8,11 +8,11 @@ define void @foo(float* nocapture %a, float* nocapture %b, float* nocapture read
; CHECK-LABEL: @foo
entry:
- %0 = load float* %b, align 4
+ %0 = load float, float* %b, align 4
store float %0, float* %a, align 4
- %1 = load float* %c, align 4
+ %1 = load float, float* %c, align 4
store float %1, float* %b, align 4
- %2 = load float* %a, align 4
+ %2 = load float, float* %a, align 4
store float %2, float* %d, align 4
ret void
diff --git a/test/CodeGen/PowerPC/qpx-bv-sint.ll b/test/CodeGen/PowerPC/qpx-bv-sint.ll
new file mode 100644
index 000000000000..0bc14ed4351a
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-bv-sint.ll
@@ -0,0 +1,33 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+
+define void @s452() nounwind {
+entry:
+ br label %for.body4
+
+for.body4: ; preds = %for.body4, %entry
+ %conv.4 = sitofp i32 undef to double
+ %conv.5 = sitofp i32 undef to double
+ %mul.4.v.i0.1 = insertelement <2 x double> undef, double %conv.4, i32 0
+ %mul.4.v.i0.2 = insertelement <2 x double> %mul.4.v.i0.1, double %conv.5, i32 1
+ %mul.4 = fmul <2 x double> %mul.4.v.i0.2, undef
+ %add7.4 = fadd <2 x double> undef, %mul.4
+ store <2 x double> %add7.4, <2 x double>* undef, align 16
+ br i1 undef, label %for.end, label %for.body4
+
+for.end: ; preds = %for.body4
+ unreachable
+; CHECK-LABEL: @s452
+; CHECK: lfiwax [[REG1:[0-9]+]],
+; CHECK: fcfid [[REG2:[0-9]+]], [[REG1]]
+; FIXME: We could 'promote' this to a vector earlier and remove this splat.
+; CHECK: qvesplati {{[0-9]+}}, [[REG2]], 0
+; CHECK: qvfmul
+; CHECK: qvfadd
+; CHECK: qvesplati {{[0-9]+}},
+; FIXME: We can use qvstfcdx here instead of two stores.
+; CHECK: stfd
+; CHECK: stfd
+}
+
diff --git a/test/CodeGen/PowerPC/qpx-bv.ll b/test/CodeGen/PowerPC/qpx-bv.ll
new file mode 100644
index 000000000000..ae181de383b5
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-bv.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mcpu=a2q | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define <4 x double> @foo(double %f1, double %f2, double %f3, double %f4) {
+ %v1 = insertelement <4 x double> undef, double %f1, i32 0
+ %v2 = insertelement <4 x double> %v1, double %f2, i32 1
+ %v3 = insertelement <4 x double> %v2, double %f3, i32 2
+ %v4 = insertelement <4 x double> %v3, double %f4, i32 3
+ ret <4 x double> %v4
+
+; CHECK-LABEL: @foo
+; CHECK: qvgpci [[REG1:[0-9]+]], 275
+; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101
+; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]]
+; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]]
+; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]]
+; CHECK: blr
+}
+
+define <4 x float> @goo(float %f1, float %f2, float %f3, float %f4) {
+ %v1 = insertelement <4 x float> undef, float %f1, i32 0
+ %v2 = insertelement <4 x float> %v1, float %f2, i32 1
+ %v3 = insertelement <4 x float> %v2, float %f3, i32 2
+ %v4 = insertelement <4 x float> %v3, float %f4, i32 3
+ ret <4 x float> %v4
+
+; CHECK-LABEL: @goo
+; CHECK: qvgpci [[REG1:[0-9]+]], 275
+; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101
+; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]]
+; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]]
+; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]]
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/qpx-func-clobber.ll b/test/CodeGen/PowerPC/qpx-func-clobber.ll
new file mode 100644
index 000000000000..511fa3827b0c
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-func-clobber.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+target triple = "powerpc64-bgq-linux"
+
+declare <4 x double> @foo(<4 x double> %p)
+
+define <4 x double> @bar(<4 x double> %p, <4 x double> %q) {
+entry:
+ %v = call <4 x double> @foo(<4 x double> %p)
+ %w = call <4 x double> @foo(<4 x double> %q)
+ %x = fadd <4 x double> %v, %w
+ ret <4 x double> %x
+
+; CHECK-LABEL: @bar
+; CHECK: qvstfdx 2,
+; CHECK: bl foo
+; CHECK: qvstfdx 1,
+; CHECK: qvlfdx 1,
+; CHECK: bl foo
+; CHECK: qvlfdx [[REG:[0-9]+]],
+; CHECK: qvfadd 1, [[REG]], 1
+}
+
diff --git a/test/CodeGen/PowerPC/qpx-load.ll b/test/CodeGen/PowerPC/qpx-load.ll
new file mode 100644
index 000000000000..7637c43850cc
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-load.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+target triple = "powerpc64-bgq-linux"
+
+define <4 x double> @foo(<4 x double>* %p) {
+entry:
+ %v = load <4 x double>, <4 x double>* %p, align 8
+ ret <4 x double> %v
+}
+
+; CHECK: @foo
+; CHECK-DAG: li [[REG1:[0-9]+]], 31
+; CHECK-DAG: qvlfdx [[REG4:[0-9]+]], 0, 3
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: qvlpcldx [[REG3:[0-9]+]], 0, 3
+; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]]
+; CHECK: blr
+
+define <4 x double> @bar(<4 x double>* %p) {
+entry:
+ %v = load <4 x double>, <4 x double>* %p, align 32
+ ret <4 x double> %v
+}
+
+; CHECK: @bar
+; CHECK: qvlfdx
+
diff --git a/test/CodeGen/PowerPC/qpx-recipest.ll b/test/CodeGen/PowerPC/qpx-recipest.ll
new file mode 100644
index 000000000000..0e01358e5791
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-recipest.ll
@@ -0,0 +1,194 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck -check-prefix=CHECK-SAFE %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
+
+define <4 x double> @foo(<4 x double> %a, <4 x double> %b) nounwind {
+entry:
+ %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
+ %r = fdiv <4 x double> %a, %x
+ ret <4 x double> %r
+
+; CHECK-LABEL: @foo
+; CHECK: qvfrsqrte
+; CHECK: qvfmul
+; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
+; an qvfmadd instead of a qvfnmsub
+; CHECK: qvfmadd
+; CHECK: qvfmadd
+; CHECK: qvfmul
+; CHECK: qvfmul
+; CHECK: qvfmadd
+; CHECK: qvfmul
+; CHECK: qvfmul
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @foo
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define <4 x double> @foof(<4 x double> %a, <4 x float> %b) nounwind {
+entry:
+ %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
+ %y = fpext <4 x float> %x to <4 x double>
+ %r = fdiv <4 x double> %a, %y
+ ret <4 x double> %r
+
+; CHECK-LABEL: @foof
+; CHECK: qvfrsqrtes
+; CHECK: qvfmuls
+; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
+; an qvfmadd instead of a qvfnmsubs
+; CHECK: qvfmadds
+; CHECK: qvfmadds
+; CHECK: qvfmuls
+; CHECK: qvfmul
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @foof
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @food(<4 x float> %a, <4 x double> %b) nounwind {
+entry:
+ %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b)
+ %y = fptrunc <4 x double> %x to <4 x float>
+ %r = fdiv <4 x float> %a, %y
+ ret <4 x float> %r
+
+; CHECK-LABEL: @food
+; CHECK: qvfrsqrte
+; CHECK: qvfmul
+; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
+; an qvfmadd instead of a qvfnmsub
+; CHECK: qvfmadd
+; CHECK: qvfmadd
+; CHECK: qvfmul
+; CHECK: qvfmul
+; CHECK: qvfmadd
+; CHECK: qvfmul
+; CHECK: qvfrsp
+; CHECK: qvfmuls
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @food
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @goo(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+ %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
+ %r = fdiv <4 x float> %a, %x
+ ret <4 x float> %r
+
+; CHECK-LABEL: @goo
+; CHECK: qvfrsqrtes
+; CHECK: qvfmuls
+; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
+; an qvfmadd instead of a qvfnmsubs
+; CHECK: qvfmadds
+; CHECK: qvfmadds
+; CHECK: qvfmuls
+; CHECK: qvfmuls
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @goo
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define <4 x double> @foo2(<4 x double> %a, <4 x double> %b) nounwind {
+entry:
+ %r = fdiv <4 x double> %a, %b
+ ret <4 x double> %r
+
+; CHECK-LABEL: @foo2
+; CHECK: qvfre
+; CHECK: qvfnmsub
+; CHECK: qvfmadd
+; CHECK: qvfnmsub
+; CHECK: qvfmadd
+; CHECK: qvfmul
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @foo2
+; CHECK-SAFE: fdiv
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @goo2(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+ %r = fdiv <4 x float> %a, %b
+ ret <4 x float> %r
+
+; CHECK-LABEL: @goo2
+; CHECK: qvfres
+; CHECK: qvfnmsubs
+; CHECK: qvfmadds
+; CHECK: qvfmuls
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @goo2
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
+define <4 x double> @foo3(<4 x double> %a) nounwind {
+entry:
+ %r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
+ ret <4 x double> %r
+
+; CHECK-LABEL: @foo3
+; CHECK: qvfrsqrte
+; CHECK: qvfmul
+; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
+; an qvfmadd instead of a qvfnmsub
+; CHECK-DAG: qvfmadd
+; CHECK-DAG: qvfcmpeq
+; CHECK-DAG: qvfmadd
+; CHECK-DAG: qvfmul
+; CHECK-DAG: qvfmul
+; CHECK-DAG: qvfmadd
+; CHECK-DAG: qvfmul
+; CHECK-DAG: qvfmul
+; CHECK: qvfsel
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @foo3
+; CHECK-SAFE: fsqrt
+; CHECK-SAFE: blr
+}
+
+define <4 x float> @goo3(<4 x float> %a) nounwind {
+entry:
+ %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
+ ret <4 x float> %r
+
+; CHECK-LABEL: @goo3
+; CHECK: qvfrsqrtes
+; CHECK: qvfmuls
+; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
+; an qvfmadds instead of a qvfnmsubs
+; CHECK-DAG: qvfmadds
+; CHECK-DAG: qvfcmpeq
+; CHECK-DAG: qvfmadds
+; CHECK-DAG: qvfmuls
+; CHECK-DAG: qvfmuls
+; CHECK: qvfsel
+; CHECK: blr
+
+; CHECK-SAFE-LABEL: @goo3
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: blr
+}
+
diff --git a/test/CodeGen/PowerPC/qpx-rounding-ops.ll b/test/CodeGen/PowerPC/qpx-rounding-ops.ll
new file mode 100644
index 000000000000..6fdd8e6a7147
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-rounding-ops.ll
@@ -0,0 +1,109 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define <4 x float> @test1(<4 x float> %x) nounwind {
+ %call = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %x) nounwind readnone
+ ret <4 x float> %call
+
+; CHECK: test1:
+; CHECK: qvfrim 1, 1
+
+; CHECK-FM: test1:
+; CHECK-FM: qvfrim 1, 1
+}
+
+declare <4 x float> @llvm.floor.v4f32(<4 x float>) nounwind readnone
+
+define <4 x double> @test2(<4 x double> %x) nounwind {
+ %call = tail call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
+ ret <4 x double> %call
+
+; CHECK: test2:
+; CHECK: qvfrim 1, 1
+
+; CHECK-FM: test2:
+; CHECK-FM: qvfrim 1, 1
+}
+
+declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
+
+define <4 x float> @test3(<4 x float> %x) nounwind {
+ %call = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %x) nounwind readnone
+ ret <4 x float> %call
+
+; CHECK: test3:
+; CHECK-NOT: qvfrin
+
+; CHECK-FM: test3:
+; CHECK-FM-NOT: qvfrin
+}
+
+declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) nounwind readnone
+
+define <4 x double> @test4(<4 x double> %x) nounwind {
+ %call = tail call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %x) nounwind readnone
+ ret <4 x double> %call
+
+; CHECK: test4:
+; CHECK-NOT: qvfrin
+
+; CHECK-FM: test4:
+; CHECK-FM-NOT: qvfrin
+}
+
+declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) nounwind readnone
+
+define <4 x float> @test5(<4 x float> %x) nounwind {
+ %call = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone
+ ret <4 x float> %call
+
+; CHECK: test5:
+; CHECK: qvfrip 1, 1
+
+; CHECK-FM: test5:
+; CHECK-FM: qvfrip 1, 1
+}
+
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
+
+define <4 x double> @test6(<4 x double> %x) nounwind {
+ %call = tail call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
+ ret <4 x double> %call
+
+; CHECK: test6:
+; CHECK: qvfrip 1, 1
+
+; CHECK-FM: test6:
+; CHECK-FM: qvfrip 1, 1
+}
+
+declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
+
+define <4 x float> @test9(<4 x float> %x) nounwind {
+ %call = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone
+ ret <4 x float> %call
+
+; CHECK: test9:
+; CHECK: qvfriz 1, 1
+
+; CHECK-FM: test9:
+; CHECK-FM: qvfriz 1, 1
+}
+
+declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone
+
+define <4 x double> @test10(<4 x double> %x) nounwind {
+ %call = tail call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone
+ ret <4 x double> %call
+
+; CHECK: test10:
+; CHECK: qvfriz 1, 1
+
+; CHECK-FM: test10:
+; CHECK-FM: qvfriz 1, 1
+}
+
+declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone
+
diff --git a/test/CodeGen/PowerPC/qpx-s-load.ll b/test/CodeGen/PowerPC/qpx-s-load.ll
new file mode 100644
index 000000000000..db147126c1ec
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-s-load.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+target triple = "powerpc64-bgq-linux"
+
+define <4 x float> @foo(<4 x float>* %p) {
+entry:
+ %v = load <4 x float>, <4 x float>* %p, align 4
+ ret <4 x float> %v
+}
+
+; CHECK: @foo
+; CHECK-DAG: li [[REG1:[0-9]+]], 15
+; CHECK-DAG: qvlfsx [[REG4:[0-9]+]], 0, 3
+; CHECK-DAG: qvlfsx [[REG2:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: qvlpclsx [[REG3:[0-9]+]], 0, 3
+; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]]
+; CHECK: blr
+
+define <4 x float> @bar(<4 x float>* %p) {
+entry:
+ %v = load <4 x float>, <4 x float>* %p, align 16
+ ret <4 x float> %v
+}
+
+; CHECK: @bar
+; CHECK: qvlfsx
+
diff --git a/test/CodeGen/PowerPC/qpx-s-sel.ll b/test/CodeGen/PowerPC/qpx-s-sel.ll
new file mode 100644
index 000000000000..09a615c4597d
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-s-sel.ll
@@ -0,0 +1,144 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+target triple = "powerpc64-bgq-linux"
+
+@Q = constant <4 x i1> <i1 0, i1 undef, i1 1, i1 1>, align 16
+@R = global <4 x i1> <i1 0, i1 0, i1 0, i1 0>, align 16
+
+define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x i1> %c) nounwind readnone {
+entry:
+ %r = select <4 x i1> %c, <4 x float> %a, <4 x float> %b
+ ret <4 x float> %r
+
+; CHECK-LABEL: @test1
+; CHECK: qvfsel 1, 3, 1, 2
+; CHECK: blr
+}
+
+define <4 x float> @test2(<4 x float> %a, <4 x float> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone {
+entry:
+ %v = insertelement <4 x i1> undef, i1 %c1, i32 0
+ %v2 = insertelement <4 x i1> %v, i1 %c2, i32 1
+ %v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2
+ %v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3
+ %r = select <4 x i1> %v4, <4 x float> %a, <4 x float> %b
+ ret <4 x float> %r
+
+; CHECK-LABEL: @test2
+; CHECK: stw
+; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
+; CHECK: qvfsel 1, [[REG4]], 1, 2
+; CHECK: blr
+}
+
+define <4 x i1> @test3(<4 x i1> %a) nounwind readnone {
+entry:
+ %v = and <4 x i1> %a, <i1 0, i1 undef, i1 1, i1 1>
+ ret <4 x i1> %v
+
+; CHECK-LABEL: @test3
+; CHECK: qvlfsx [[REG:[0-9]+]],
+; qvflogical 1, 1, [[REG]], 1
+; blr
+}
+
+define <4 x i1> @test4(<4 x i1> %a) nounwind {
+entry:
+ %q = load <4 x i1>, <4 x i1>* @Q, align 16
+ %v = and <4 x i1> %a, %q
+ ret <4 x i1> %v
+
+; CHECK-LABEL: @test4
+; CHECK-DAG: lbz
+; CHECK-DAG: qvlfdx [[REG1:[0-9]+]],
+; CHECK-DAG: stw
+; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]]
+; CHECK: qvflogical 1, 1, [[REG4]], 1
+; CHECK: blr
+}
+
+define void @test5(<4 x i1> %a) nounwind {
+entry:
+ store <4 x i1> %a, <4 x i1>* @R
+ ret void
+
+; CHECK-LABEL: @test5
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: stb
+; CHECK: blr
+}
+
+define i1 @test6(<4 x i1> %a) nounwind {
+entry:
+ %r = extractelement <4 x i1> %a, i32 2
+ ret i1 %r
+
+; CHECK-LABEL: @test6
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: blr
+}
+
+define i1 @test7(<4 x i1> %a) nounwind {
+entry:
+ %r = extractelement <4 x i1> %a, i32 2
+ %s = extractelement <4 x i1> %a, i32 3
+ %q = and i1 %r, %s
+ ret i1 %q
+
+; CHECK-LABEL: @test7
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK-DAG: lwz [[REG4:[0-9]+]],
+; FIXME: We're storing the vector twice, and that's silly.
+; CHECK-DAG: qvstfiwx [[REG3]],
+; CHECK: lwz [[REG5:[0-9]+]],
+; CHECK: and 3,
+; CHECK: blr
+}
+
+define i1 @test8(<3 x i1> %a) nounwind {
+entry:
+ %r = extractelement <3 x i1> %a, i32 2
+ ret i1 %r
+
+; CHECK-LABEL: @test8
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: blr
+}
+
+define <3 x float> @test9(<3 x float> %a, <3 x float> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone {
+entry:
+ %v = insertelement <3 x i1> undef, i1 %c1, i32 0
+ %v2 = insertelement <3 x i1> %v, i1 %c2, i32 1
+ %v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2
+ %r = select <3 x i1> %v3, <3 x float> %a, <3 x float> %b
+ ret <3 x float> %r
+
+; CHECK-LABEL: @test9
+; CHECK: stw
+; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
+; CHECK: qvfsel 1, [[REG4]], 1, 2
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/qpx-s-store.ll b/test/CodeGen/PowerPC/qpx-s-store.ll
new file mode 100644
index 000000000000..0bd6201f767c
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-s-store.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+target triple = "powerpc64-bgq-linux"
+
+define void @foo(<4 x float> %v, <4 x float>* %p) {
+entry:
+ store <4 x float> %v, <4 x float>* %p, align 4
+ ret void
+}
+
+; CHECK: @foo
+; CHECK: stfs
+; CHECK: stfs
+; CHECK: stfs
+; CHECK: stfs
+; CHECK: blr
+
+define void @bar(<4 x float> %v, <4 x float>* %p) {
+entry:
+ store <4 x float> %v, <4 x float>* %p, align 16
+ ret void
+}
+
+; CHECK: @bar
+; CHECK: qvstfsx
+
diff --git a/test/CodeGen/PowerPC/qpx-sel.ll b/test/CodeGen/PowerPC/qpx-sel.ll
new file mode 100644
index 000000000000..a375e6effbae
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-sel.ll
@@ -0,0 +1,152 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+target triple = "powerpc64-bgq-linux"
+
+@Q = constant <4 x i1> <i1 0, i1 undef, i1 1, i1 1>, align 16
+@R = global <4 x i1> <i1 0, i1 0, i1 0, i1 0>, align 16
+
+define <4 x double> @test1(<4 x double> %a, <4 x double> %b, <4 x i1> %c) nounwind readnone {
+entry:
+ %r = select <4 x i1> %c, <4 x double> %a, <4 x double> %b
+ ret <4 x double> %r
+
+; CHECK-LABEL: @test1
+; CHECK: qvfsel 1, 3, 1, 2
+; CHECK: blr
+}
+
+define <4 x double> @test2(<4 x double> %a, <4 x double> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone {
+entry:
+ %v = insertelement <4 x i1> undef, i1 %c1, i32 0
+ %v2 = insertelement <4 x i1> %v, i1 %c2, i32 1
+ %v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2
+ %v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3
+ %r = select <4 x i1> %v4, <4 x double> %a, <4 x double> %b
+ ret <4 x double> %r
+
+; CHECK-LABEL: @test2
+
+; FIXME: This load/store sequence is unnecessary.
+; CHECK-DAG: lbz
+; CHECK-DAG: stw
+
+; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
+; CHECK: qvfsel 1, [[REG4]], 1, 2
+; CHECK: blr
+}
+
+define <4 x i1> @test3(<4 x i1> %a) nounwind readnone {
+entry:
+ %v = and <4 x i1> %a, <i1 0, i1 undef, i1 1, i1 1>
+ ret <4 x i1> %v
+
+; CHECK-LABEL: @test3
+; CHECK: qvlfsx [[REG:[0-9]+]],
+; qvflogical 1, 1, [[REG]], 1
+; blr
+}
+
+define <4 x i1> @test4(<4 x i1> %a) nounwind {
+entry:
+ %q = load <4 x i1>, <4 x i1>* @Q, align 16
+ %v = and <4 x i1> %a, %q
+ ret <4 x i1> %v
+
+; CHECK-LABEL: @test4
+; CHECK-DAG: lbz
+; CHECK-DAG: qvlfdx [[REG1:[0-9]+]],
+; CHECK-DAG: stw
+; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]]
+; CHECK: qvflogical 1, 1, [[REG4]], 1
+; CHECK: blr
+}
+
+define void @test5(<4 x i1> %a) nounwind {
+entry:
+ store <4 x i1> %a, <4 x i1>* @R
+ ret void
+
+; CHECK-LABEL: @test5
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: stb
+; CHECK: blr
+}
+
+define i1 @test6(<4 x i1> %a) nounwind {
+entry:
+ %r = extractelement <4 x i1> %a, i32 2
+ ret i1 %r
+
+; CHECK-LABEL: @test6
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: blr
+}
+
+define i1 @test7(<4 x i1> %a) nounwind {
+entry:
+ %r = extractelement <4 x i1> %a, i32 2
+ %s = extractelement <4 x i1> %a, i32 3
+ %q = and i1 %r, %s
+ ret i1 %q
+
+; CHECK-LABEL: @test7
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK-DAG: lwz [[REG4:[0-9]+]],
+; FIXME: We're storing the vector twice, and that's silly.
+; CHECK-DAG: qvstfiwx [[REG3]],
+; CHECK-DAG: lwz [[REG5:[0-9]+]],
+; CHECK: and 3,
+; CHECK: blr
+}
+
+define i1 @test8(<3 x i1> %a) nounwind {
+entry:
+ %r = extractelement <3 x i1> %a, i32 2
+ ret i1 %r
+
+; CHECK-LABEL: @test8
+; CHECK: qvlfdx [[REG1:[0-9]+]],
+; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]]
+; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]]
+; CHECK: qvstfiwx [[REG3]],
+; CHECK: lwz
+; CHECK: blr
+}
+
+define <3 x double> @test9(<3 x double> %a, <3 x double> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone {
+entry:
+ %v = insertelement <3 x i1> undef, i1 %c1, i32 0
+ %v2 = insertelement <3 x i1> %v, i1 %c2, i32 1
+ %v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2
+ %r = select <3 x i1> %v3, <3 x double> %a, <3 x double> %b
+ ret <3 x double> %r
+
+; CHECK-LABEL: @test9
+
+; FIXME: This load/store sequence is unnecessary.
+; CHECK-DAG: lbz
+; CHECK-DAG: stw
+
+; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]],
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]],
+; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]]
+; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]]
+; CHECK: qvfsel 1, [[REG4]], 1, 2
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/qpx-split-vsetcc.ll b/test/CodeGen/PowerPC/qpx-split-vsetcc.ll
new file mode 100644
index 000000000000..c8cef0faeaa4
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-split-vsetcc.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mcpu=a2q < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+; Function Attrs: nounwind
+define void @gsl_sf_legendre_Pl_deriv_array() #0 {
+entry:
+ br i1 undef, label %do.body.i, label %if.else.i
+
+do.body.i: ; preds = %entry
+ unreachable
+
+if.else.i: ; preds = %entry
+ br i1 undef, label %return, label %for.body46.lr.ph
+
+for.body46.lr.ph: ; preds = %if.else.i
+ br label %vector.body198
+
+vector.body198: ; preds = %vector.body198, %for.body46.lr.ph
+ %0 = icmp ne <4 x i32> undef, zeroinitializer
+ %1 = select <4 x i1> %0, <4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, <4 x double> <double -5.000000e-01, double -5.000000e-01, double -5.000000e-01, double -5.000000e-01>
+ %2 = fmul <4 x double> undef, %1
+ %3 = fmul <4 x double> undef, %2
+ %4 = fmul <4 x double> %3, undef
+ store <4 x double> %4, <4 x double>* undef, align 8
+ br label %vector.body198
+
+; CHECK-LABEL: @gsl_sf_legendre_Pl_deriv_array
+; CHECK: qvlfiwzx
+; CHECK: qvfcfidu
+; CHECK: qvfcmpeq
+; CHECK: qvfsel
+; CHECK: qvfmul
+
+return: ; preds = %if.else.i
+ ret void
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/qpx-store.ll b/test/CodeGen/PowerPC/qpx-store.ll
new file mode 100644
index 000000000000..2579d2c681c9
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-store.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s
+target triple = "powerpc64-bgq-linux"
+
+define void @foo(<4 x double> %v, <4 x double>* %p) {
+entry:
+ store <4 x double> %v, <4 x double>* %p, align 8
+ ret void
+}
+
+; CHECK: @foo
+; CHECK: stfd
+; CHECK: stfd
+; CHECK: stfd
+; CHECK: stfd
+; CHECK: blr
+
+define void @bar(<4 x double> %v, <4 x double>* %p) {
+entry:
+ store <4 x double> %v, <4 x double>* %p, align 32
+ ret void
+}
+
+; CHECK: @bar
+; CHECK: qvstfdx
+
diff --git a/test/CodeGen/PowerPC/qpx-unalperm.ll b/test/CodeGen/PowerPC/qpx-unalperm.ll
new file mode 100644
index 000000000000..51b340c5835c
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-unalperm.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -mcpu=a2q | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define <4 x double> @foo(<4 x double>* %a) {
+entry:
+ %r = load <4 x double>, <4 x double>* %a, align 32
+ ret <4 x double> %r
+; CHECK: qvlfdx
+; CHECK: blr
+}
+
+define <4 x double> @bar(<4 x double>* %a) {
+entry:
+ %r = load <4 x double>, <4 x double>* %a, align 8
+ %b = getelementptr <4 x double>, <4 x double>* %a, i32 16
+ %s = load <4 x double>, <4 x double>* %b, align 32
+ %t = fadd <4 x double> %r, %s
+ ret <4 x double> %t
+; CHECK: qvlpcldx
+; CHECK: qvlfdx
+; CHECK: qvfperm
+; CHECK: blr
+}
+
+define <4 x double> @bar1(<4 x double>* %a) {
+entry:
+ %r = load <4 x double>, <4 x double>* %a, align 8
+ %b = getelementptr <4 x double>, <4 x double>* %a, i32 16
+ %s = load <4 x double>, <4 x double>* %b, align 8
+ %t = fadd <4 x double> %r, %s
+ ret <4 x double> %t
+}
+
+define <4 x double> @bar2(<4 x double>* %a) {
+entry:
+ %r = load <4 x double>, <4 x double>* %a, align 8
+ %b = getelementptr <4 x double>, <4 x double>* %a, i32 1
+ %s = load <4 x double>, <4 x double>* %b, align 32
+ %t = fadd <4 x double> %r, %s
+ ret <4 x double> %t
+}
+
+define <4 x double> @bar3(<4 x double>* %a) {
+entry:
+ %r = load <4 x double>, <4 x double>* %a, align 8
+ %b = getelementptr <4 x double>, <4 x double>* %a, i32 1
+ %s = load <4 x double>, <4 x double>* %b, align 8
+ %t = fadd <4 x double> %r, %s
+ ret <4 x double> %t
+}
+
+define <4 x double> @bar4(<4 x double>* %a) {
+entry:
+ %r = load <4 x double>, <4 x double>* %a, align 8
+ %b = getelementptr <4 x double>, <4 x double>* %a, i32 1
+ %s = load <4 x double>, <4 x double>* %b, align 8
+ %c = getelementptr <4 x double>, <4 x double>* %b, i32 1
+ %t = load <4 x double>, <4 x double>* %c, align 8
+ %u = fadd <4 x double> %r, %s
+ %v = fadd <4 x double> %u, %t
+ ret <4 x double> %v
+}
+
diff --git a/test/CodeGen/PowerPC/quadint-return.ll b/test/CodeGen/PowerPC/quadint-return.ll
index 03499915e78e..0743ce4a95c1 100644
--- a/test/CodeGen/PowerPC/quadint-return.ll
+++ b/test/CodeGen/PowerPC/quadint-return.ll
@@ -8,7 +8,7 @@ define i128 @foo() nounwind {
entry:
%x = alloca i128, align 16
store i128 27, i128* %x, align 16
- %0 = load i128* %x, align 16
+ %0 = load i128, i128* %x, align 16
ret i128 %0
}
diff --git a/test/CodeGen/PowerPC/reg-coalesce-simple.ll b/test/CodeGen/PowerPC/reg-coalesce-simple.ll
index e0ddb4250fd2..3f9cb8a74270 100644
--- a/test/CodeGen/PowerPC/reg-coalesce-simple.ll
+++ b/test/CodeGen/PowerPC/reg-coalesce-simple.ll
@@ -3,8 +3,8 @@
%struct.foo = type { i32, i32, [0 x i8] }
define i32 @test(%struct.foo* %X) nounwind {
- %tmp1 = getelementptr %struct.foo* %X, i32 0, i32 2, i32 100 ; <i8*> [#uses=1]
- %tmp = load i8* %tmp1 ; <i8> [#uses=1]
+ %tmp1 = getelementptr %struct.foo, %struct.foo* %X, i32 0, i32 2, i32 100 ; <i8*> [#uses=1]
+ %tmp = load i8, i8* %tmp1 ; <i8> [#uses=1]
%tmp2 = zext i8 %tmp to i32 ; <i32> [#uses=1]
ret i32 %tmp2
}
diff --git a/test/CodeGen/PowerPC/reloc-align.ll b/test/CodeGen/PowerPC/reloc-align.ll
index 13d6adadfcae..754997bccbd6 100644
--- a/test/CodeGen/PowerPC/reloc-align.ll
+++ b/test/CodeGen/PowerPC/reloc-align.ll
@@ -24,7 +24,7 @@ entry:
define internal fastcc signext i32 @func_90(%struct.S1* byval nocapture %p_91) #0 {
entry:
%0 = bitcast %struct.S1* %p_91 to i64*
- %bf.load = load i64* %0, align 1
+ %bf.load = load i64, i64* %0, align 1
%bf.shl = shl i64 %bf.load, 26
%bf.ashr = ashr i64 %bf.shl, 54
%bf.cast = trunc i64 %bf.ashr to i32
diff --git a/test/CodeGen/PowerPC/remat-imm.ll b/test/CodeGen/PowerPC/remat-imm.ll
index 520921f57a93..ffae8a97cc83 100644
--- a/test/CodeGen/PowerPC/remat-imm.ll
+++ b/test/CodeGen/PowerPC/remat-imm.ll
@@ -9,7 +9,7 @@ define i32 @main() nounwind {
entry:
; CHECK: li 4, 128
; CHECK-NOT: mr 4, {{.*}}
- %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i32 128, i32 128) nounwind
+ %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), i32 128, i32 128) nounwind
ret i32 0
}
diff --git a/test/CodeGen/PowerPC/resolvefi-basereg.ll b/test/CodeGen/PowerPC/resolvefi-basereg.ll
index 62c2d139920a..a613c3310a55 100644
--- a/test/CodeGen/PowerPC/resolvefi-basereg.ll
+++ b/test/CodeGen/PowerPC/resolvefi-basereg.ll
@@ -35,21 +35,21 @@ entry:
call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.S1998* @s1998 to i8*), i8 0, i64 5168, i32 16, i1 false)
call void @llvm.memset.p0i8.i64(i8* bitcast ([5 x %struct.S1998]* @a1998 to i8*), i8 0, i64 25840, i32 16, i1 false)
call void @llvm.memset.p0i8.i64(i8* bitcast (%struct.Info* @info to i8*), i8 0, i64 832, i32 8, i1 false)
- store i8* bitcast (%struct.S1998* @s1998 to i8*), i8** getelementptr inbounds (%struct.Info* @info, i32 0, i32 2), align 8
- store i8* bitcast ([5 x %struct.S1998]* @a1998 to i8*), i8** getelementptr inbounds (%struct.Info* @info, i32 0, i32 3), align 8
- store i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 3) to i8*), i8** getelementptr inbounds (%struct.Info* @info, i32 0, i32 4), align 8
- store i64 5168, i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 6), align 8
- store i64 16, i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 8), align 8
- store i64 16, i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 9), align 8
- store i64 16, i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 10), align 8
- %0 = load i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 8), align 8
+ store i8* bitcast (%struct.S1998* @s1998 to i8*), i8** getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 2), align 8
+ store i8* bitcast ([5 x %struct.S1998]* @a1998 to i8*), i8** getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 3), align 8
+ store i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 3) to i8*), i8** getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 4), align 8
+ store i64 5168, i64* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 6), align 8
+ store i64 16, i64* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 8), align 8
+ store i64 16, i64* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 9), align 8
+ store i64 16, i64* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 10), align 8
+ %0 = load i64, i64* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 8), align 8
%sub = sub i64 %0, 1
- %and = and i64 ptrtoint (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 3) to i64), %sub
+ %and = and i64 ptrtoint (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 3) to i64), %sub
%tobool = icmp ne i64 %and, 0
br i1 %tobool, label %if.then, label %if.end
if.then: ; preds = %entry
- %1 = load i32* @fails, align 4
+ %1 = load i32, i32* @fails, align 4
%inc = add nsw i32 %1, 1
store i32 %inc, i32* @fails, align 4
br label %if.end
@@ -57,299 +57,299 @@ if.then: ; preds = %entry
if.end: ; preds = %if.then, %entry
store i32 0, i32* %i, align 4
store i32 0, i32* %j, align 4
- %2 = load i32* %i, align 4
+ %2 = load i32, i32* %i, align 4
%idxprom = sext i32 %2 to i64
- %arrayidx = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom
- store i8* bitcast (i32** getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 0, i64 1) to i8*), i8** %arrayidx, align 8
- %3 = load i32* %i, align 4
+ %arrayidx = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom
+ store i8* bitcast (i32** getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 0, i64 1) to i8*), i8** %arrayidx, align 8
+ %3 = load i32, i32* %i, align 4
%idxprom1 = sext i32 %3 to i64
- %arrayidx2 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom1
+ %arrayidx2 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom1
store i64 8, i64* %arrayidx2, align 8
- %4 = load i32* %i, align 4
+ %4 = load i32, i32* %i, align 4
%idxprom3 = sext i32 %4 to i64
- %arrayidx4 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom3
+ %arrayidx4 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom3
store i64 8, i64* %arrayidx4, align 8
- store i32* getelementptr inbounds ([256 x i32]* @intarray, i32 0, i64 190), i32** getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 0, i64 1), align 8
- store i32* getelementptr inbounds ([256 x i32]* @intarray, i32 0, i64 241), i32** getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 0, i64 1), align 8
- %5 = load i32* %i, align 4
+ store i32* getelementptr inbounds ([256 x i32], [256 x i32]* @intarray, i32 0, i64 190), i32** getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 0, i64 1), align 8
+ store i32* getelementptr inbounds ([256 x i32], [256 x i32]* @intarray, i32 0, i64 241), i32** getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 0, i64 1), align 8
+ %5 = load i32, i32* %i, align 4
%inc5 = add nsw i32 %5, 1
store i32 %inc5, i32* %i, align 4
- %6 = load i32* %i, align 4
+ %6 = load i32, i32* %i, align 4
%idxprom6 = sext i32 %6 to i64
- %arrayidx7 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom6
- store i8* bitcast (i64* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 1) to i8*), i8** %arrayidx7, align 8
- %7 = load i32* %i, align 4
+ %arrayidx7 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom6
+ store i8* bitcast (i64* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 1) to i8*), i8** %arrayidx7, align 8
+ %7 = load i32, i32* %i, align 4
%idxprom8 = sext i32 %7 to i64
- %arrayidx9 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom8
+ %arrayidx9 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom8
store i64 8, i64* %arrayidx9, align 8
- %8 = load i32* %i, align 4
+ %8 = load i32, i32* %i, align 4
%idxprom10 = sext i32 %8 to i64
- %arrayidx11 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom10
+ %arrayidx11 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom10
store i64 8, i64* %arrayidx11, align 8
- store i64 -3866974208859106459, i64* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 1), align 8
- store i64 -185376695371304091, i64* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 1), align 8
- %9 = load i32* %i, align 4
+ store i64 -3866974208859106459, i64* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 1), align 8
+ store i64 -185376695371304091, i64* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 1), align 8
+ %9 = load i32, i32* %i, align 4
%inc12 = add nsw i32 %9, 1
store i32 %inc12, i32* %i, align 4
- %10 = load i32* %i, align 4
+ %10 = load i32, i32* %i, align 4
%idxprom13 = sext i32 %10 to i64
- %arrayidx14 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom13
- store i8* bitcast (i64* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 2) to i8*), i8** %arrayidx14, align 8
- %11 = load i32* %i, align 4
+ %arrayidx14 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom13
+ store i8* bitcast (i64* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 2) to i8*), i8** %arrayidx14, align 8
+ %11 = load i32, i32* %i, align 4
%idxprom15 = sext i32 %11 to i64
- %arrayidx16 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom15
+ %arrayidx16 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom15
store i64 8, i64* %arrayidx16, align 8
- %12 = load i32* %i, align 4
+ %12 = load i32, i32* %i, align 4
%idxprom17 = sext i32 %12 to i64
- %arrayidx18 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom17
+ %arrayidx18 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom17
store i64 8, i64* %arrayidx18, align 8
- store i64 -963638028680427187, i64* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 2), align 8
- store i64 7510542175772455554, i64* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 2), align 8
- %13 = load i32* %i, align 4
+ store i64 -963638028680427187, i64* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 2), align 8
+ store i64 7510542175772455554, i64* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 2), align 8
+ %13 = load i32, i32* %i, align 4
%inc19 = add nsw i32 %13, 1
store i32 %inc19, i32* %i, align 4
- %14 = load i32* %i, align 4
+ %14 = load i32, i32* %i, align 4
%idxprom20 = sext i32 %14 to i64
- %arrayidx21 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom20
- store i8* bitcast (double* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 3) to i8*), i8** %arrayidx21, align 8
- %15 = load i32* %i, align 4
+ %arrayidx21 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom20
+ store i8* bitcast (double* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 3) to i8*), i8** %arrayidx21, align 8
+ %15 = load i32, i32* %i, align 4
%idxprom22 = sext i32 %15 to i64
- %arrayidx23 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom22
+ %arrayidx23 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom22
store i64 8, i64* %arrayidx23, align 8
- %16 = load i32* %i, align 4
+ %16 = load i32, i32* %i, align 4
%idxprom24 = sext i32 %16 to i64
- %arrayidx25 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom24
+ %arrayidx25 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom24
store i64 16, i64* %arrayidx25, align 8
- store double 0xC0F8783300000000, double* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 3), align 16
- store double 0xC10DF3CCC0000000, double* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 3), align 16
- %17 = load i32* %i, align 4
+ store double 0xC0F8783300000000, double* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 3), align 16
+ store double 0xC10DF3CCC0000000, double* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 3), align 16
+ %17 = load i32, i32* %i, align 4
%inc26 = add nsw i32 %17, 1
store i32 %inc26, i32* %i, align 4
- %18 = load i32* %i, align 4
+ %18 = load i32, i32* %i, align 4
%idxprom27 = sext i32 %18 to i64
- %arrayidx28 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom27
- store i8* bitcast (i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 4) to i8*), i8** %arrayidx28, align 8
- %19 = load i32* %i, align 4
+ %arrayidx28 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom27
+ store i8* bitcast (i16* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 4) to i8*), i8** %arrayidx28, align 8
+ %19 = load i32, i32* %i, align 4
%idxprom29 = sext i32 %19 to i64
- %arrayidx30 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom29
+ %arrayidx30 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom29
store i64 2, i64* %arrayidx30, align 8
- %20 = load i32* %i, align 4
+ %20 = load i32, i32* %i, align 4
%idxprom31 = sext i32 %20 to i64
- %arrayidx32 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom31
+ %arrayidx32 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom31
store i64 2, i64* %arrayidx32, align 8
- store i16 -15897, i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 4), align 2
- store i16 30935, i16* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 4), align 2
- %21 = load i32* %i, align 4
+ store i16 -15897, i16* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 4), align 2
+ store i16 30935, i16* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 4), align 2
+ %21 = load i32, i32* %i, align 4
%inc33 = add nsw i32 %21, 1
store i32 %inc33, i32* %i, align 4
- store i32 -419541644, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 5), align 4
- store i32 2125926812, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 5), align 4
- %22 = load i32* %j, align 4
+ store i32 -419541644, i32* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 5), align 4
+ store i32 2125926812, i32* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 5), align 4
+ %22 = load i32, i32* %j, align 4
%inc34 = add nsw i32 %22, 1
store i32 %inc34, i32* %j, align 4
- %23 = load i32* %i, align 4
+ %23 = load i32, i32* %i, align 4
%idxprom35 = sext i32 %23 to i64
- %arrayidx36 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom35
- store i8* bitcast (double* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 0, i64 0) to i8*), i8** %arrayidx36, align 8
- %24 = load i32* %i, align 4
+ %arrayidx36 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom35
+ store i8* bitcast (double* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 0, i64 0) to i8*), i8** %arrayidx36, align 8
+ %24 = load i32, i32* %i, align 4
%idxprom37 = sext i32 %24 to i64
- %arrayidx38 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom37
+ %arrayidx38 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom37
store i64 8, i64* %arrayidx38, align 8
- %25 = load i32* %i, align 4
+ %25 = load i32, i32* %i, align 4
%idxprom39 = sext i32 %25 to i64
- %arrayidx40 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom39
+ %arrayidx40 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom39
store i64 8, i64* %arrayidx40, align 8
- store double 0xC0FC765780000000, double* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 0, i64 0), align 8
- store double 0xC1025CD7A0000000, double* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 0, i64 0), align 8
- %26 = load i32* %i, align 4
+ store double 0xC0FC765780000000, double* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 0, i64 0), align 8
+ store double 0xC1025CD7A0000000, double* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 0, i64 0), align 8
+ %26 = load i32, i32* %i, align 4
%inc41 = add nsw i32 %26, 1
store i32 %inc41, i32* %i, align 4
- %bf.load = load i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 1), align 8
+ %bf.load = load i32, i32* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 1), align 8
%bf.clear = and i32 %bf.load, 7
%bf.set = or i32 %bf.clear, 16
- store i32 %bf.set, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 1), align 8
- %bf.load42 = load i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 1), align 8
+ store i32 %bf.set, i32* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 1), align 8
+ %bf.load42 = load i32, i32* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 1), align 8
%bf.clear43 = and i32 %bf.load42, 7
%bf.set44 = or i32 %bf.clear43, 24
- store i32 %bf.set44, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 1), align 8
- %27 = load i32* %j, align 4
+ store i32 %bf.set44, i32* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 1), align 8
+ %27 = load i32, i32* %j, align 4
%inc45 = add nsw i32 %27, 1
store i32 %inc45, i32* %j, align 4
- %bf.load46 = load i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 2), align 4
+ %bf.load46 = load i16, i16* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 2), align 4
%bf.clear47 = and i16 %bf.load46, 127
- store i16 %bf.clear47, i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 2), align 4
- %bf.load48 = load i16* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 2), align 4
+ store i16 %bf.clear47, i16* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 2), align 4
+ %bf.load48 = load i16, i16* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 2), align 4
%bf.clear49 = and i16 %bf.load48, 127
- store i16 %bf.clear49, i16* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 2), align 4
- %28 = load i32* %j, align 4
+ store i16 %bf.clear49, i16* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 2), align 4
+ %28 = load i32, i32* %j, align 4
%inc50 = add nsw i32 %28, 1
store i32 %inc50, i32* %j, align 4
- %bf.load51 = load i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 3), align 8
+ %bf.load51 = load i32, i32* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 3), align 8
%bf.clear52 = and i32 %bf.load51, 63
- store i32 %bf.clear52, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 3), align 8
- %bf.load53 = load i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 3), align 8
+ store i32 %bf.clear52, i32* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 3), align 8
+ %bf.load53 = load i32, i32* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 3), align 8
%bf.clear54 = and i32 %bf.load53, 63
%bf.set55 = or i32 %bf.clear54, 64
- store i32 %bf.set55, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 3), align 8
- %29 = load i32* %j, align 4
+ store i32 %bf.set55, i32* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 3), align 8
+ %29 = load i32, i32* %j, align 4
%inc56 = add nsw i32 %29, 1
store i32 %inc56, i32* %j, align 4
- %bf.load57 = load i24* bitcast ([3 x i8]* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 4) to i24*), align 4
+ %bf.load57 = load i24, i24* bitcast ([3 x i8]* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 4) to i24*), align 4
%bf.clear58 = and i24 %bf.load57, 63
- store i24 %bf.clear58, i24* bitcast ([3 x i8]* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 4) to i24*), align 4
- %bf.load59 = load i24* bitcast ([3 x i8]* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 4) to i24*), align 4
+ store i24 %bf.clear58, i24* bitcast ([3 x i8]* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 4) to i24*), align 4
+ %bf.load59 = load i24, i24* bitcast ([3 x i8]* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 4) to i24*), align 4
%bf.clear60 = and i24 %bf.load59, 63
- store i24 %bf.clear60, i24* bitcast ([3 x i8]* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 4) to i24*), align 4
- %30 = load i32* %j, align 4
+ store i24 %bf.clear60, i24* bitcast ([3 x i8]* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 4) to i24*), align 4
+ %30 = load i32, i32* %j, align 4
%inc61 = add nsw i32 %30, 1
store i32 %inc61, i32* %j, align 4
- %31 = load i32* %i, align 4
+ %31 = load i32, i32* %i, align 4
%idxprom62 = sext i32 %31 to i64
- %arrayidx63 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom62
- store i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 5, i64 5), i8** %arrayidx63, align 8
- %32 = load i32* %i, align 4
+ %arrayidx63 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom62
+ store i8* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 5, i64 5), i8** %arrayidx63, align 8
+ %32 = load i32, i32* %i, align 4
%idxprom64 = sext i32 %32 to i64
- %arrayidx65 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom64
+ %arrayidx65 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom64
store i64 1, i64* %arrayidx65, align 8
- %33 = load i32* %i, align 4
+ %33 = load i32, i32* %i, align 4
%idxprom66 = sext i32 %33 to i64
- %arrayidx67 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom66
+ %arrayidx67 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom66
store i64 1, i64* %arrayidx67, align 8
- store i8 -83, i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 5, i64 5), align 1
- store i8 -67, i8* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 5, i64 5), align 1
- %34 = load i32* %i, align 4
+ store i8 -83, i8* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 5, i64 5), align 1
+ store i8 -67, i8* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 5, i64 5), align 1
+ %34 = load i32, i32* %i, align 4
%inc68 = add nsw i32 %34, 1
store i32 %inc68, i32* %i, align 4
- %35 = load i32* %i, align 4
+ %35 = load i32, i32* %i, align 4
%idxprom69 = sext i32 %35 to i64
- %arrayidx70 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom69
- store i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 5, i64 1), i8** %arrayidx70, align 8
- %36 = load i32* %i, align 4
+ %arrayidx70 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom69
+ store i8* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 5, i64 1), i8** %arrayidx70, align 8
+ %36 = load i32, i32* %i, align 4
%idxprom71 = sext i32 %36 to i64
- %arrayidx72 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom71
+ %arrayidx72 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom71
store i64 1, i64* %arrayidx72, align 8
- %37 = load i32* %i, align 4
+ %37 = load i32, i32* %i, align 4
%idxprom73 = sext i32 %37 to i64
- %arrayidx74 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom73
+ %arrayidx74 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom73
store i64 1, i64* %arrayidx74, align 8
- store i8 34, i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 5, i64 1), align 1
- store i8 64, i8* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 5, i64 1), align 1
- %38 = load i32* %i, align 4
+ store i8 34, i8* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 5, i64 1), align 1
+ store i8 64, i8* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 5, i64 1), align 1
+ %38 = load i32, i32* %i, align 4
%inc75 = add nsw i32 %38, 1
store i32 %inc75, i32* %i, align 4
- %39 = load i32* %i, align 4
+ %39 = load i32, i32* %i, align 4
%idxprom76 = sext i32 %39 to i64
- %arrayidx77 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom76
- store i8* bitcast (i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 6, i64 3) to i8*), i8** %arrayidx77, align 8
- %40 = load i32* %i, align 4
+ %arrayidx77 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom76
+ store i8* bitcast (i32* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 6, i64 3) to i8*), i8** %arrayidx77, align 8
+ %40 = load i32, i32* %i, align 4
%idxprom78 = sext i32 %40 to i64
- %arrayidx79 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom78
+ %arrayidx79 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom78
store i64 4, i64* %arrayidx79, align 8
- %41 = load i32* %i, align 4
+ %41 = load i32, i32* %i, align 4
%idxprom80 = sext i32 %41 to i64
- %arrayidx81 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom80
+ %arrayidx81 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom80
store i64 4, i64* %arrayidx81, align 8
- store i32 -3, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 6, i64 3), align 4
- store i32 -3, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 6, i64 3), align 4
- %42 = load i32* %i, align 4
+ store i32 -3, i32* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 6, i64 3), align 4
+ store i32 -3, i32* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 6, i64 3), align 4
+ %42 = load i32, i32* %i, align 4
%inc82 = add nsw i32 %42, 1
store i32 %inc82, i32* %i, align 4
- %43 = load i32* %i, align 4
+ %43 = load i32, i32* %i, align 4
%idxprom83 = sext i32 %43 to i64
- %arrayidx84 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom83
- store i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 7), i8** %arrayidx84, align 8
- %44 = load i32* %i, align 4
+ %arrayidx84 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom83
+ store i8* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 7), i8** %arrayidx84, align 8
+ %44 = load i32, i32* %i, align 4
%idxprom85 = sext i32 %44 to i64
- %arrayidx86 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom85
+ %arrayidx86 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom85
store i64 1, i64* %arrayidx86, align 8
- %45 = load i32* %i, align 4
+ %45 = load i32, i32* %i, align 4
%idxprom87 = sext i32 %45 to i64
- %arrayidx88 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom87
+ %arrayidx88 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom87
store i64 1, i64* %arrayidx88, align 8
- store i8 106, i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 7), align 1
- store i8 -102, i8* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 7), align 1
- %46 = load i32* %i, align 4
+ store i8 106, i8* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 7), align 1
+ store i8 -102, i8* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 7), align 1
+ %46 = load i32, i32* %i, align 4
%inc89 = add nsw i32 %46, 1
store i32 %inc89, i32* %i, align 4
- %47 = load i32* %i, align 4
+ %47 = load i32, i32* %i, align 4
%idxprom90 = sext i32 %47 to i64
- %arrayidx91 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom90
- store i8* bitcast (i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 7) to i8*), i8** %arrayidx91, align 8
- %48 = load i32* %i, align 4
+ %arrayidx91 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom90
+ store i8* bitcast (i16* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 7) to i8*), i8** %arrayidx91, align 8
+ %48 = load i32, i32* %i, align 4
%idxprom92 = sext i32 %48 to i64
- %arrayidx93 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom92
+ %arrayidx93 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom92
store i64 2, i64* %arrayidx93, align 8
- %49 = load i32* %i, align 4
+ %49 = load i32, i32* %i, align 4
%idxprom94 = sext i32 %49 to i64
- %arrayidx95 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom94
+ %arrayidx95 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom94
store i64 2, i64* %arrayidx95, align 8
- store i16 29665, i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 7), align 2
- store i16 7107, i16* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 7), align 2
- %50 = load i32* %i, align 4
+ store i16 29665, i16* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 7), align 2
+ store i16 7107, i16* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 7), align 2
+ %50 = load i32, i32* %i, align 4
%inc96 = add nsw i32 %50, 1
store i32 %inc96, i32* %i, align 4
- %51 = load i32* %i, align 4
+ %51 = load i32, i32* %i, align 4
%idxprom97 = sext i32 %51 to i64
- %arrayidx98 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom97
- store i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 8), i8** %arrayidx98, align 8
- %52 = load i32* %i, align 4
+ %arrayidx98 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom97
+ store i8* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 8), i8** %arrayidx98, align 8
+ %52 = load i32, i32* %i, align 4
%idxprom99 = sext i32 %52 to i64
- %arrayidx100 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom99
+ %arrayidx100 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom99
store i64 1, i64* %arrayidx100, align 8
- %53 = load i32* %i, align 4
+ %53 = load i32, i32* %i, align 4
%idxprom101 = sext i32 %53 to i64
- %arrayidx102 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom101
+ %arrayidx102 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom101
store i64 1, i64* %arrayidx102, align 8
- store i8 52, i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 8), align 1
- store i8 -86, i8* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 8), align 1
- %54 = load i32* %i, align 4
+ store i8 52, i8* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 8), align 1
+ store i8 -86, i8* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 8), align 1
+ %54 = load i32, i32* %i, align 4
%inc103 = add nsw i32 %54, 1
store i32 %inc103, i32* %i, align 4
- %55 = load i32* %i, align 4
+ %55 = load i32, i32* %i, align 4
%idxprom104 = sext i32 %55 to i64
- %arrayidx105 = getelementptr inbounds [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom104
- store i8* bitcast (i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 9) to i8*), i8** %arrayidx105, align 8
- %56 = load i32* %i, align 4
+ %arrayidx105 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom104
+ store i8* bitcast (i32* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 9) to i8*), i8** %arrayidx105, align 8
+ %56 = load i32, i32* %i, align 4
%idxprom106 = sext i32 %56 to i64
- %arrayidx107 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom106
+ %arrayidx107 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom106
store i64 4, i64* %arrayidx107, align 8
- %57 = load i32* %i, align 4
+ %57 = load i32, i32* %i, align 4
%idxprom108 = sext i32 %57 to i64
- %arrayidx109 = getelementptr inbounds [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom108
+ %arrayidx109 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom108
store i64 4, i64* %arrayidx109, align 8
- store i32 -54118453, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 9), align 4
- store i32 1668755823, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 9), align 4
- %58 = load i32* %i, align 4
+ store i32 -54118453, i32* getelementptr inbounds (%struct.S1998, %struct.S1998* @s1998, i32 0, i32 9), align 4
+ store i32 1668755823, i32* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 9), align 4
+ %58 = load i32, i32* %i, align 4
%inc110 = add nsw i32 %58, 1
store i32 %inc110, i32* %i, align 4
store i32 %inc110, i32* %tmp
- %59 = load i32* %tmp
- %60 = load i32* %i, align 4
- store i32 %60, i32* getelementptr inbounds (%struct.Info* @info, i32 0, i32 0), align 4
- %61 = load i32* %j, align 4
- store i32 %61, i32* getelementptr inbounds (%struct.Info* @info, i32 0, i32 1), align 4
+ %59 = load i32, i32* %tmp
+ %60 = load i32, i32* %i, align 4
+ store i32 %60, i32* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 0), align 4
+ %61 = load i32, i32* %j, align 4
+ store i32 %61, i32* getelementptr inbounds (%struct.Info, %struct.Info* @info, i32 0, i32 1), align 4
%62 = bitcast %struct.S1998* %agg.tmp111 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %62, i8* bitcast (%struct.S1998* @s1998 to i8*), i64 5168, i32 16, i1 false)
%63 = bitcast %struct.S1998* %agg.tmp112 to i8*
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %63, i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2) to i8*), i64 5168, i32 16, i1 false)
- call void @check1998(%struct.S1998* sret %agg.tmp, %struct.S1998* byval align 16 %agg.tmp111, %struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 1), %struct.S1998* byval align 16 %agg.tmp112)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %63, i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2) to i8*), i64 5168, i32 16, i1 false)
+ call void @check1998(%struct.S1998* sret %agg.tmp, %struct.S1998* byval align 16 %agg.tmp111, %struct.S1998* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 1), %struct.S1998* byval align 16 %agg.tmp112)
call void @checkx1998(%struct.S1998* byval align 16 %agg.tmp)
%64 = bitcast %struct.S1998* %agg.tmp113 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %64, i8* bitcast (%struct.S1998* @s1998 to i8*), i64 5168, i32 16, i1 false)
%65 = bitcast %struct.S1998* %agg.tmp114 to i8*
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %65, i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2) to i8*), i64 5168, i32 16, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %65, i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2) to i8*), i64 5168, i32 16, i1 false)
%66 = bitcast %struct.S1998* %agg.tmp115 to i8*
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %66, i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2) to i8*), i64 5168, i32 16, i1 false)
- call void (i32, ...)* @check1998va(i32 signext 1, double 1.000000e+00, %struct.S1998* byval align 16 %agg.tmp113, i64 2, %struct.S1998* byval align 16 %agg.tmp114, %struct.S1998* byval align 16 %agg.tmp115)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %66, i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2) to i8*), i64 5168, i32 16, i1 false)
+ call void (i32, ...) @check1998va(i32 signext 1, double 1.000000e+00, %struct.S1998* byval align 16 %agg.tmp113, i64 2, %struct.S1998* byval align 16 %agg.tmp114, %struct.S1998* byval align 16 %agg.tmp115)
%67 = bitcast %struct.S1998* %agg.tmp116 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %67, i8* bitcast (%struct.S1998* @s1998 to i8*), i64 5168, i32 16, i1 false)
%68 = bitcast %struct.S1998* %agg.tmp117 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %68, i8* bitcast (%struct.S1998* @s1998 to i8*), i64 5168, i32 16, i1 false)
%69 = bitcast %struct.S1998* %agg.tmp118 to i8*
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %69, i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2) to i8*), i64 5168, i32 16, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %69, i8* bitcast (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998], [5 x %struct.S1998]* @a1998, i32 0, i64 2) to i8*), i64 5168, i32 16, i1 false)
%70 = bitcast %struct.S1998* %agg.tmp119 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %70, i8* bitcast (%struct.S1998* @s1998 to i8*), i64 5168, i32 16, i1 false)
- call void (i32, ...)* @check1998va(i32 signext 2, %struct.S1998* byval align 16 %agg.tmp116, %struct.S1998* byval align 16 %agg.tmp117, ppc_fp128 0xM40000000000000000000000000000000, %struct.S1998* byval align 16 %agg.tmp118, %struct.S1998* byval align 16 %agg.tmp119)
+ call void (i32, ...) @check1998va(i32 signext 2, %struct.S1998* byval align 16 %agg.tmp116, %struct.S1998* byval align 16 %agg.tmp117, ppc_fp128 0xM40000000000000000000000000000000, %struct.S1998* byval align 16 %agg.tmp118, %struct.S1998* byval align 16 %agg.tmp119)
ret void
}
diff --git a/test/CodeGen/PowerPC/resolvefi-disp.ll b/test/CodeGen/PowerPC/resolvefi-disp.ll
index ca42bcd767a0..a1c2070a6f44 100644
--- a/test/CodeGen/PowerPC/resolvefi-disp.ll
+++ b/test/CodeGen/PowerPC/resolvefi-disp.ll
@@ -41,23 +41,23 @@ entry:
call void @llvm.memset.p0i8.i64(i8* %7, i8 0, i64 11104, i32 32, i1 false)
%8 = bitcast %struct.S2760* %b2 to i8*
call void @llvm.memset.p0i8.i64(i8* %8, i8 0, i64 11104, i32 32, i1 false)
- %b = getelementptr inbounds %struct.S2760* %arg0, i32 0, i32 1
- %g = getelementptr inbounds %struct.anon* %b, i32 0, i32 1
- %9 = load i64* %g, align 8
- %10 = load i64* getelementptr inbounds (%struct.S2760* @s2760, i32 0, i32 1, i32 1), align 8
+ %b = getelementptr inbounds %struct.S2760, %struct.S2760* %arg0, i32 0, i32 1
+ %g = getelementptr inbounds %struct.anon, %struct.anon* %b, i32 0, i32 1
+ %9 = load i64, i64* %g, align 8
+ %10 = load i64, i64* getelementptr inbounds (%struct.S2760, %struct.S2760* @s2760, i32 0, i32 1, i32 1), align 8
%cmp = icmp ne i64 %9, %10
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
- %11 = load i32* @fails, align 4
+ %11 = load i32, i32* @fails, align 4
%inc = add nsw i32 %11, 1
store i32 %inc, i32* @fails, align 4
br label %if.end
if.end: ; preds = %if.then, %entry
- %12 = load i64* getelementptr inbounds (%struct.S2760* @s2760, i32 0, i32 1, i32 1), align 8
- %b3 = getelementptr inbounds %struct.S2760* %ret, i32 0, i32 1
- %g4 = getelementptr inbounds %struct.anon* %b3, i32 0, i32 1
+ %12 = load i64, i64* getelementptr inbounds (%struct.S2760, %struct.S2760* @s2760, i32 0, i32 1, i32 1), align 8
+ %b3 = getelementptr inbounds %struct.S2760, %struct.S2760* %ret, i32 0, i32 1
+ %g4 = getelementptr inbounds %struct.anon, %struct.anon* %b3, i32 0, i32 1
store i64 %12, i64* %g4, align 8
%13 = bitcast %struct.S2760* %agg.result to i8*
%14 = bitcast %struct.S2760* %ret to i8*
diff --git a/test/CodeGen/PowerPC/retaddr2.ll b/test/CodeGen/PowerPC/retaddr2.ll
index 8fa3b4d13b7e..8581f6cb9a38 100644
--- a/test/CodeGen/PowerPC/retaddr2.ll
+++ b/test/CodeGen/PowerPC/retaddr2.ll
@@ -12,8 +12,7 @@ entry:
; CHECK-LABEL: @test1
; CHECK: mflr 0
; CHECK: std 0, 16(1)
-; FIXME: These next two lines don't both need to load the same value.
-; CHECK-DAG: ld 3, 16(1)
+; CHECK-DAG: ld 3, 64(1)
; CHECK-DAG: ld 0, 16(1)
; CHECK: mtlr 0
; CHECK: blr
diff --git a/test/CodeGen/PowerPC/return-val-i128.ll b/test/CodeGen/PowerPC/return-val-i128.ll
index e14a43809a7b..2f924096661a 100644
--- a/test/CodeGen/PowerPC/return-val-i128.ll
+++ b/test/CodeGen/PowerPC/return-val-i128.ll
@@ -7,29 +7,29 @@ entry:
%tmp = alloca i128, align 16 ; <i128*> [#uses=3]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store float %a, float* %a_addr
- %tmp1 = load float* %a_addr, align 4 ; <float> [#uses=1]
+ %tmp1 = load float, float* %a_addr, align 4 ; <float> [#uses=1]
%tmp2 = fcmp olt float %tmp1, 0.000000e+00 ; <i1> [#uses=1]
%tmp23 = zext i1 %tmp2 to i8 ; <i8> [#uses=1]
%toBool = icmp ne i8 %tmp23, 0 ; <i1> [#uses=1]
br i1 %toBool, label %bb, label %bb8
bb: ; preds = %entry
- %tmp4 = load float* %a_addr, align 4 ; <float> [#uses=1]
+ %tmp4 = load float, float* %a_addr, align 4 ; <float> [#uses=1]
%tmp5 = fsub float -0.000000e+00, %tmp4 ; <float> [#uses=1]
%tmp6 = call i128 @__fixunssfDI( float %tmp5 ) nounwind ; <i128> [#uses=1]
%tmp7 = sub i128 0, %tmp6 ; <i128> [#uses=1]
store i128 %tmp7, i128* %tmp, align 16
br label %bb11
bb8: ; preds = %entry
- %tmp9 = load float* %a_addr, align 4 ; <float> [#uses=1]
+ %tmp9 = load float, float* %a_addr, align 4 ; <float> [#uses=1]
%tmp10 = call i128 @__fixunssfDI( float %tmp9 ) nounwind ; <i128> [#uses=1]
store i128 %tmp10, i128* %tmp, align 16
br label %bb11
bb11: ; preds = %bb8, %bb
- %tmp12 = load i128* %tmp, align 16 ; <i128> [#uses=1]
+ %tmp12 = load i128, i128* %tmp, align 16 ; <i128> [#uses=1]
store i128 %tmp12, i128* %retval, align 16
br label %return
return: ; preds = %bb11
- %retval13 = load i128* %retval ; <i128> [#uses=1]
+ %retval13 = load i128, i128* %retval ; <i128> [#uses=1]
ret i128 %retval13
}
diff --git a/test/CodeGen/PowerPC/rlwimi-and.ll b/test/CodeGen/PowerPC/rlwimi-and.ll
index 9433f8e3dee2..59f704ee16bc 100644
--- a/test/CodeGen/PowerPC/rlwimi-and.ll
+++ b/test/CodeGen/PowerPC/rlwimi-and.ll
@@ -16,11 +16,11 @@ codeRepl12: ; preds = %codeRepl4
unreachable
codeRepl17: ; preds = %codeRepl4
- %0 = load i8* undef, align 2
+ %0 = load i8, i8* undef, align 2
%1 = and i8 %0, 1
%not.tobool.i.i.i = icmp eq i8 %1, 0
%2 = select i1 %not.tobool.i.i.i, i16 0, i16 256
- %3 = load i8* undef, align 1
+ %3 = load i8, i8* undef, align 1
%4 = and i8 %3, 1
%not.tobool.i.1.i.i = icmp eq i8 %4, 0
%rvml38.sroa.1.1.insert.ext = select i1 %not.tobool.i.1.i.i, i16 0, i16 1
@@ -29,7 +29,7 @@ codeRepl17: ; preds = %codeRepl4
unreachable
; CHECK: @test
-; CHECK: rlwinm [[R1:[0-9]+]], {{[0-9]+}}, 0, 31, 31
+; CHECK: clrlwi [[R1:[0-9]+]], {{[0-9]+}}, 31
; CHECK: rlwimi [[R1]], {{[0-9]+}}, 8, 23, 23
codeRepl29: ; preds = %codeRepl1
diff --git a/test/CodeGen/PowerPC/rlwimi-commute.ll b/test/CodeGen/PowerPC/rlwimi-commute.ll
index 3f90008c006b..cd0f49ed7807 100644
--- a/test/CodeGen/PowerPC/rlwimi-commute.ll
+++ b/test/CodeGen/PowerPC/rlwimi-commute.ll
@@ -4,8 +4,8 @@
; Make sure there is no register-register copies here.
define void @test1(i32* %A, i32* %B, i32* %D, i32* %E) {
- %A.upgrd.1 = load i32* %A ; <i32> [#uses=2]
- %B.upgrd.2 = load i32* %B ; <i32> [#uses=1]
+ %A.upgrd.1 = load i32, i32* %A ; <i32> [#uses=2]
+ %B.upgrd.2 = load i32, i32* %B ; <i32> [#uses=1]
%X = and i32 %A.upgrd.1, 15 ; <i32> [#uses=1]
%Y = and i32 %B.upgrd.2, -16 ; <i32> [#uses=1]
%Z = or i32 %X, %Y ; <i32> [#uses=1]
@@ -15,8 +15,8 @@ define void @test1(i32* %A, i32* %B, i32* %D, i32* %E) {
}
define void @test2(i32* %A, i32* %B, i32* %D, i32* %E) {
- %A.upgrd.3 = load i32* %A ; <i32> [#uses=1]
- %B.upgrd.4 = load i32* %B ; <i32> [#uses=2]
+ %A.upgrd.3 = load i32, i32* %A ; <i32> [#uses=1]
+ %B.upgrd.4 = load i32, i32* %B ; <i32> [#uses=2]
%X = and i32 %A.upgrd.3, 15 ; <i32> [#uses=1]
%Y = and i32 %B.upgrd.4, -16 ; <i32> [#uses=1]
%Z = or i32 %X, %Y ; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/rlwimi-dyn-and.ll b/test/CodeGen/PowerPC/rlwimi-dyn-and.ll
index e02801fafbf5..76f3da66dd04 100644
--- a/test/CodeGen/PowerPC/rlwimi-dyn-and.ll
+++ b/test/CodeGen/PowerPC/rlwimi-dyn-and.ll
@@ -4,13 +4,13 @@ target triple = "powerpc64-unknown-linux-gnu"
define i32 @test1() #0 {
entry:
- %conv67.reload = load i32* undef
+ %conv67.reload = load i32, i32* undef
%const = bitcast i32 65535 to i32
br label %next
next:
%shl161 = shl nuw nsw i32 %conv67.reload, 15
- %0 = load i8* undef, align 1
+ %0 = load i8, i8* undef, align 1
%conv169 = zext i8 %0 to i32
%shl170 = shl nuw nsw i32 %conv169, 7
%const_mat = add i32 %const, -32767
@@ -25,13 +25,13 @@ next:
define i32 @test2() #0 {
entry:
- %conv67.reload = load i32* undef
+ %conv67.reload = load i32, i32* undef
%const = bitcast i32 65535 to i32
br label %next
next:
%shl161 = shl nuw nsw i32 %conv67.reload, 15
- %0 = load i8* undef, align 1
+ %0 = load i8, i8* undef, align 1
%conv169 = zext i8 %0 to i32
%shl170 = shl nuw nsw i32 %conv169, 7
%shl161.masked = and i32 %shl161, 32768
diff --git a/test/CodeGen/PowerPC/rm-zext.ll b/test/CodeGen/PowerPC/rm-zext.ll
index 33995e114d27..97c546c0145f 100644
--- a/test/CodeGen/PowerPC/rm-zext.ll
+++ b/test/CodeGen/PowerPC/rm-zext.ll
@@ -45,7 +45,7 @@ declare i32 @llvm.bswap.i32(i32) #0
; Function Attrs: nounwind readonly
define zeroext i32 @bs32(i32* nocapture readonly %x) #1 {
entry:
- %0 = load i32* %x, align 4
+ %0 = load i32, i32* %x, align 4
%1 = tail call i32 @llvm.bswap.i32(i32 %0)
ret i32 %1
@@ -57,7 +57,7 @@ entry:
; Function Attrs: nounwind readonly
define zeroext i16 @bs16(i16* nocapture readonly %x) #1 {
entry:
- %0 = load i16* %x, align 2
+ %0 = load i16, i16* %x, align 2
%1 = tail call i16 @llvm.bswap.i16(i16 %0)
ret i16 %1
diff --git a/test/CodeGen/PowerPC/rotl-2.ll b/test/CodeGen/PowerPC/rotl-2.ll
index d32ef59be6c4..86539b6c119c 100644
--- a/test/CodeGen/PowerPC/rotl-2.ll
+++ b/test/CodeGen/PowerPC/rotl-2.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=ppc32 | grep rlwinm | count 4
-; RUN: llc < %s -march=ppc32 | grep rlwnm | count 2
+; RUN: llc < %s -march=ppc32 | grep rotlwi | count 2
+; RUN: llc < %s -march=ppc32 | grep clrlwi | count 2
+; RUN: llc < %s -march=ppc32 | grep rotlw | count 4
; RUN: llc < %s -march=ppc32 | not grep or
define i32 @rotl32(i32 %A, i8 %Amt) nounwind {
diff --git a/test/CodeGen/PowerPC/rotl-64.ll b/test/CodeGen/PowerPC/rotl-64.ll
index 674c9e4cc951..2ccdc29f2cd2 100644
--- a/test/CodeGen/PowerPC/rotl-64.ll
+++ b/test/CodeGen/PowerPC/rotl-64.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=ppc64 | grep rldicl
-; RUN: llc < %s -march=ppc64 | grep rldcl
+; RUN: llc < %s -march=ppc64 | grep rotld
+; RUN: llc < %s -march=ppc64 | grep rotldi
; PR1613
define i64 @t1(i64 %A) {
diff --git a/test/CodeGen/PowerPC/rotl.ll b/test/CodeGen/PowerPC/rotl.ll
index 56fc4a8c911f..671f524645a7 100644
--- a/test/CodeGen/PowerPC/rotl.ll
+++ b/test/CodeGen/PowerPC/rotl.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=ppc32 | grep rlwnm | count 2
-; RUN: llc < %s -march=ppc32 | grep rlwinm | count 2
+; RUN: llc < %s -march=ppc32 | grep rotrw: | count 1
+; RUN: llc < %s -march=ppc32 | grep rotlw: | count 1
+; RUN: llc < %s -march=ppc32 | grep rotlwi: | count 1
+; RUN: llc < %s -march=ppc32 | grep rotrwi: | count 1
define i32 @rotlw(i32 %x, i32 %sh) {
entry:
diff --git a/test/CodeGen/PowerPC/rs-undef-use.ll b/test/CodeGen/PowerPC/rs-undef-use.ll
index 24dd5fd9da99..007931e7407f 100644
--- a/test/CodeGen/PowerPC/rs-undef-use.ll
+++ b/test/CodeGen/PowerPC/rs-undef-use.ll
@@ -15,7 +15,7 @@ CF82.critedge: ; preds = %CF
br label %CF82
CF82: ; preds = %CF82, %CF82.critedge
- %L17 = load i8* %0
+ %L17 = load i8, i8* %0
%E18 = extractelement <2 x i64> undef, i32 0
%PC = bitcast <2 x i1>* %A3 to i64*
br i1 undef, label %CF82, label %CF84.critedge
@@ -25,13 +25,13 @@ CF84.critedge: ; preds = %CF82
br label %CF84
CF84: ; preds = %CF84, %CF84.critedge
- %L40 = load i64* %PC
+ %L40 = load i64, i64* %PC
store i64 -1, i64* %PC
%Sl46 = select i1 undef, i1 undef, i1 false
br i1 %Sl46, label %CF84, label %CF85
CF85: ; preds = %CF84
- %L47 = load i64* %PC
+ %L47 = load i64, i64* %PC
store i64 %E18, i64* %PC
%PC52 = bitcast <8 x i32>* %A2 to ppc_fp128*
store ppc_fp128 0xM4D436562A0416DE00000000000000000, ppc_fp128* %PC52
diff --git a/test/CodeGen/PowerPC/s000-alias-misched.ll b/test/CodeGen/PowerPC/s000-alias-misched.ll
index 3570a11b6271..2e34c65a0a38 100644
--- a/test/CodeGen/PowerPC/s000-alias-misched.ll
+++ b/test/CodeGen/PowerPC/s000-alias-misched.ll
@@ -22,7 +22,7 @@ declare signext i32 @init(i8*) nounwind
define signext i32 @s000() nounwind {
entry:
- %call = tail call signext i32 @init(i8* getelementptr inbounds ([6 x i8]* @.str1, i64 0, i64 0))
+ %call = tail call signext i32 @init(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str1, i64 0, i64 0))
%call1 = tail call i64 @clock() nounwind
br label %for.cond2.preheader
@@ -34,34 +34,34 @@ for.cond2.preheader: ; preds = %for.end, %entry
for.body4: ; preds = %for.body4, %for.cond2.preheader
%indvars.iv = phi i64 [ 0, %for.cond2.preheader ], [ %indvars.iv.next.15, %for.body4 ]
- %arrayidx = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv
- %arrayidx6 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv
+ %arrayidx = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv
+ %arrayidx6 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv
%0 = bitcast double* %arrayidx to <1 x double>*
- %1 = load <1 x double>* %0, align 32
+ %1 = load <1 x double>, <1 x double>* %0, align 32
%add = fadd <1 x double> %1, <double 1.000000e+00>
%2 = bitcast double* %arrayidx6 to <1 x double>*
store <1 x double> %add, <1 x double>* %2, align 32
%indvars.iv.next.322 = or i64 %indvars.iv, 4
- %arrayidx.4 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.322
- %arrayidx6.4 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.322
+ %arrayidx.4 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.322
+ %arrayidx6.4 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.322
%3 = bitcast double* %arrayidx.4 to <1 x double>*
- %4 = load <1 x double>* %3, align 32
+ %4 = load <1 x double>, <1 x double>* %3, align 32
%add.4 = fadd <1 x double> %4, <double 1.000000e+00>
%5 = bitcast double* %arrayidx6.4 to <1 x double>*
store <1 x double> %add.4, <1 x double>* %5, align 32
%indvars.iv.next.726 = or i64 %indvars.iv, 8
- %arrayidx.8 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.726
- %arrayidx6.8 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.726
+ %arrayidx.8 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.726
+ %arrayidx6.8 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.726
%6 = bitcast double* %arrayidx.8 to <1 x double>*
- %7 = load <1 x double>* %6, align 32
+ %7 = load <1 x double>, <1 x double>* %6, align 32
%add.8 = fadd <1 x double> %7, <double 1.000000e+00>
%8 = bitcast double* %arrayidx6.8 to <1 x double>*
store <1 x double> %add.8, <1 x double>* %8, align 32
%indvars.iv.next.1130 = or i64 %indvars.iv, 12
- %arrayidx.12 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1130
- %arrayidx6.12 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1130
+ %arrayidx.12 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1130
+ %arrayidx6.12 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1130
%9 = bitcast double* %arrayidx.12 to <1 x double>*
- %10 = load <1 x double>* %9, align 32
+ %10 = load <1 x double>, <1 x double>* %9, align 32
%add.12 = fadd <1 x double> %10, <double 1.000000e+00>
%11 = bitcast double* %arrayidx6.12 to <1 x double>*
store <1 x double> %add.12, <1 x double>* %11, align 32
@@ -77,7 +77,7 @@ for.body4: ; preds = %for.body4, %for.con
; CHECK: bdnz
for.end: ; preds = %for.body4
- %call7 = tail call signext i32 @dummy(double* getelementptr inbounds ([16000 x double]* @X, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Y, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Z, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @U, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @V, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @aa, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @bb, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @cc, i64 0, i64 0), double 0.000000e+00) nounwind
+ %call7 = tail call signext i32 @dummy(double* getelementptr inbounds ([16000 x double], [16000 x double]* @X, i64 0, i64 0), double* getelementptr inbounds ([16000 x double], [16000 x double]* @Y, i64 0, i64 0), double* getelementptr inbounds ([16000 x double], [16000 x double]* @Z, i64 0, i64 0), double* getelementptr inbounds ([16000 x double], [16000 x double]* @U, i64 0, i64 0), double* getelementptr inbounds ([16000 x double], [16000 x double]* @V, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]], [256 x [256 x double]]* @aa, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]], [256 x [256 x double]]* @bb, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]], [256 x [256 x double]]* @cc, i64 0, i64 0), double 0.000000e+00) nounwind
%inc9 = add nsw i32 %nl.018, 1
%exitcond = icmp eq i32 %inc9, 400000
br i1 %exitcond, label %for.end10, label %for.cond2.preheader
@@ -87,7 +87,7 @@ for.end10: ; preds = %for.end
%sub = sub nsw i64 %call11, %call1
%conv = sitofp i64 %sub to double
%div = fdiv double %conv, 1.000000e+06
- %call12 = tail call signext i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str137, i64 0, i64 0), double %div) nounwind
+ %call12 = tail call signext i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str137, i64 0, i64 0), double %div) nounwind
tail call void @check(i32 signext 1)
ret i32 0
}
diff --git a/test/CodeGen/PowerPC/sdag-ppcf128.ll b/test/CodeGen/PowerPC/sdag-ppcf128.ll
index c46bc6b22dde..6d2a04c72936 100644
--- a/test/CodeGen/PowerPC/sdag-ppcf128.ll
+++ b/test/CodeGen/PowerPC/sdag-ppcf128.ll
@@ -5,7 +5,7 @@
define fastcc void @_D3std4math4sqrtFNaNbNfcZc() {
entry:
br i1 undef, label %if, label %else
-; CHECK: cmplwi 0, 3, 0
+; CHECK: cmplwi 3, 0
if: ; preds = %entry
store { ppc_fp128, ppc_fp128 } zeroinitializer, { ppc_fp128, ppc_fp128 }* undef
ret void
diff --git a/test/CodeGen/PowerPC/seteq-0.ll b/test/CodeGen/PowerPC/seteq-0.ll
index b7dd78085eb1..4afb8fee1776 100644
--- a/test/CodeGen/PowerPC/seteq-0.ll
+++ b/test/CodeGen/PowerPC/seteq-0.ll
@@ -5,7 +5,7 @@ define i32 @eq0(i32 %a) {
%tmp.2 = zext i1 %tmp.1 to i32 ; <i32> [#uses=1]
ret i32 %tmp.2
-; CHECK: cntlzw [[REG:r[0-9]+]], r3
+; CHECK: cntlz [[REG:r[0-9]+]], r3
; CHECK: rlwinm r3, [[REG]], 27, 31, 31
; CHECK: blr
}
diff --git a/test/CodeGen/PowerPC/sjlj.ll b/test/CodeGen/PowerPC/sjlj.ll
index f9f887af31f3..62403e711968 100644
--- a/test/CodeGen/PowerPC/sjlj.ll
+++ b/test/CodeGen/PowerPC/sjlj.ll
@@ -37,7 +37,7 @@ entry:
%0 = call i8* @llvm.frameaddress(i32 0)
store i8* %0, i8** bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8**)
%1 = call i8* @llvm.stacksave()
- store i8* %1, i8** getelementptr (i8** bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8**), i32 2)
+ store i8* %1, i8** getelementptr (i8*, i8** bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8**), i32 2)
%2 = call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8*))
%tobool = icmp ne i32 %2, 0
br i1 %tobool, label %if.then, label %if.else
@@ -55,7 +55,7 @@ if.end: ; preds = %if.else
br label %return
return: ; preds = %if.end, %if.then
- %3 = load i32* %retval
+ %3 = load i32, i32* %retval
ret i32 %3
; FIXME: We should be saving VRSAVE on Darwin, but we're not!
@@ -110,7 +110,7 @@ entry:
%0 = call i8* @llvm.frameaddress(i32 0)
store i8* %0, i8** bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8**)
%1 = call i8* @llvm.stacksave()
- store i8* %1, i8** getelementptr (i8** bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8**), i32 2)
+ store i8* %1, i8** getelementptr (i8*, i8** bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8**), i32 2)
%2 = call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8*))
%tobool = icmp ne i32 %2, 0
br i1 %tobool, label %if.then, label %if.else
@@ -128,7 +128,7 @@ if.end: ; preds = %if.else
br label %return
return: ; preds = %if.end, %if.then
- %3 = load i32* %retval
+ %3 = load i32, i32* %retval
ret i32 %3
; CHECK: @main2
diff --git a/test/CodeGen/PowerPC/small-arguments.ll b/test/CodeGen/PowerPC/small-arguments.ll
index 19ca0985eef1..3cef817689b3 100644
--- a/test/CodeGen/PowerPC/small-arguments.ll
+++ b/test/CodeGen/PowerPC/small-arguments.ll
@@ -26,14 +26,14 @@ UnifiedReturnBlock:
}
define i32 @test4(i16* %P) {
- %tmp.1 = load i16* %P
+ %tmp.1 = load i16, i16* %P
%tmp.2 = zext i16 %tmp.1 to i32
%tmp.3 = and i32 %tmp.2, 255
ret i32 %tmp.3
}
define i32 @test5(i16* %P) {
- %tmp.1 = load i16* %P
+ %tmp.1 = load i16, i16* %P
%tmp.2 = bitcast i16 %tmp.1 to i16
%tmp.3 = zext i16 %tmp.2 to i32
%tmp.4 = and i32 %tmp.3, 255
@@ -41,7 +41,7 @@ define i32 @test5(i16* %P) {
}
define i32 @test6(i32* %P) {
- %tmp.1 = load i32* %P
+ %tmp.1 = load i32, i32* %P
%tmp.2 = and i32 %tmp.1, 255
ret i32 %tmp.2
}
diff --git a/test/CodeGen/PowerPC/split-index-tc.ll b/test/CodeGen/PowerPC/split-index-tc.ll
index 03aff243b231..38be93f28a85 100644
--- a/test/CodeGen/PowerPC/split-index-tc.ll
+++ b/test/CodeGen/PowerPC/split-index-tc.ll
@@ -13,16 +13,16 @@ define void @_ZN4llvm17ScheduleDAGInstrs14addPhysRegDepsEPNS_5SUnitEj() #0 align
; CHECK-NOT: lhzu
entry:
- %0 = load %"class.llvm::MachineOperand"** undef, align 8
+ %0 = load %"class.llvm::MachineOperand"*, %"class.llvm::MachineOperand"** undef, align 8
br i1 undef, label %_ZNK4llvm14MachineOperand6getRegEv.exit, label %cond.false.i123
cond.false.i123: ; preds = %_ZN4llvm12MachineInstr10getOperandEj.exit
unreachable
_ZNK4llvm14MachineOperand6getRegEv.exit: ; preds = %_ZN4llvm12MachineInstr10getOperandEj.exit
- %IsDef.i = getelementptr inbounds %"class.llvm::MachineOperand"* %0, i64 undef, i32 1
+ %IsDef.i = getelementptr inbounds %"class.llvm::MachineOperand", %"class.llvm::MachineOperand"* %0, i64 undef, i32 1
%1 = bitcast [3 x i8]* %IsDef.i to i24*
- %bf.load.i = load i24* %1, align 1
+ %bf.load.i = load i24, i24* %1, align 1
%2 = and i24 %bf.load.i, 128
br i1 undef, label %for.cond.cleanup, label %for.body.lr.ph
@@ -61,7 +61,7 @@ cond.false.i257: ; preds = %if.end55
unreachable
_ZNK4llvm14MachineOperand6isDeadEv.exit262: ; preds = %if.end55
- %bf.load.i259 = load i24* %1, align 1
+ %bf.load.i259 = load i24, i24* %1, align 1
br i1 undef, label %if.then57, label %if.else59
if.then57: ; preds = %_ZNK4llvm14MachineOperand6isDeadEv.exit262
diff --git a/test/CodeGen/PowerPC/stack-protector.ll b/test/CodeGen/PowerPC/stack-protector.ll
index b81d94181cdf..8d255bd9a43b 100644
--- a/test/CodeGen/PowerPC/stack-protector.ll
+++ b/test/CodeGen/PowerPC/stack-protector.ll
@@ -11,10 +11,10 @@ entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store i8* %a, i8** %a_addr
%buf1 = bitcast [8 x i8]* %buf to i8* ; <i8*> [#uses=1]
- %0 = load i8** %a_addr, align 4 ; <i8*> [#uses=1]
+ %0 = load i8*, i8** %a_addr, align 4 ; <i8*> [#uses=1]
%1 = call i8* @strcpy(i8* %buf1, i8* %0) nounwind ; <i8*> [#uses=0]
%buf2 = bitcast [8 x i8]* %buf to i8* ; <i8*> [#uses=1]
- %2 = call i32 (i8*, ...)* @printf(i8* getelementptr ([11 x i8]* @"\01LC", i32 0, i32 0), i8* %buf2) nounwind ; <i32> [#uses=0]
+ %2 = call i32 (i8*, ...) @printf(i8* getelementptr ([11 x i8], [11 x i8]* @"\01LC", i32 0, i32 0), i8* %buf2) nounwind ; <i32> [#uses=0]
br label %return
return: ; preds = %entry
diff --git a/test/CodeGen/PowerPC/stack-realign.ll b/test/CodeGen/PowerPC/stack-realign.ll
index 762f50a9cbe0..e91b563af72e 100644
--- a/test/CodeGen/PowerPC/stack-realign.ll
+++ b/test/CodeGen/PowerPC/stack-realign.ll
@@ -14,14 +14,14 @@ declare void @bar(i32*)
define void @goo(%struct.s* byval nocapture readonly %a) {
entry:
%x = alloca [2 x i32], align 32
- %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
- %0 = load i32* %a1, align 4
- %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
+ %a1 = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 0
+ %0 = load i32, i32* %a1, align 4
+ %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 0
store i32 %0, i32* %arrayidx, align 32
- %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
- %1 = load i32* %b, align 4
- %2 = load i32* @barbaz, align 4
- %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
+ %b = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 1
+ %1 = load i32, i32* %b, align 4
+ %2 = load i32, i32* @barbaz, align 4
+ %arrayidx2 = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 1
store i32 %2, i32* %arrayidx2, align 4
call void @bar(i32* %arrayidx)
ret void
@@ -30,7 +30,7 @@ entry:
; CHECK-LABEL: @goo
; CHECK-DAG: mflr 0
-; CHECK-DAG: rldicl [[REG:[0-9]+]], 1, 0, 59
+; CHECK-DAG: clrldi [[REG:[0-9]+]], 1, 59
; CHECK-DAG: std 30, -16(1)
; CHECK-DAG: mr 30, 1
; CHECK-DAG: std 0, 16(1)
@@ -52,7 +52,7 @@ entry:
; CHECK-FP-LABEL: @goo
; CHECK-FP-DAG: mflr 0
-; CHECK-FP-DAG: rldicl [[REG:[0-9]+]], 1, 0, 59
+; CHECK-FP-DAG: clrldi [[REG:[0-9]+]], 1, 59
; CHECK-FP-DAG: std 31, -8(1)
; CHECK-FP-DAG: std 30, -16(1)
; CHECK-FP-DAG: mr 30, 1
@@ -78,7 +78,7 @@ entry:
; CHECK-32-LABEL: @goo
; CHECK-32-DAG: mflr 0
-; CHECK-32-DAG: rlwinm [[REG:[0-9]+]], 1, 0, 27, 31
+; CHECK-32-DAG: clrlwi [[REG:[0-9]+]], 1, 27
; CHECK-32-DAG: stw 30, -8(1)
; CHECK-32-DAG: mr 30, 1
; CHECK-32-DAG: stw 0, 4(1)
@@ -87,7 +87,7 @@ entry:
; CHECK-32-PIC-LABEL: @goo
; CHECK-32-PIC-DAG: mflr 0
-; CHECK-32-PIC-DAG: rlwinm [[REG:[0-9]+]], 1, 0, 27, 31
+; CHECK-32-PIC-DAG: clrlwi [[REG:[0-9]+]], 1, 27
; CHECK-32-PIC-DAG: stw 29, -12(1)
; CHECK-32-PIC-DAG: mr 29, 1
; CHECK-32-PIC-DAG: stw 0, 4(1)
@@ -98,13 +98,13 @@ entry:
define void @hoo(%struct.s* byval nocapture readonly %a) {
entry:
%x = alloca [200000 x i32], align 32
- %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
- %0 = load i32* %a1, align 4
- %arrayidx = getelementptr inbounds [200000 x i32]* %x, i64 0, i64 0
+ %a1 = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 0
+ %0 = load i32, i32* %a1, align 4
+ %arrayidx = getelementptr inbounds [200000 x i32], [200000 x i32]* %x, i64 0, i64 0
store i32 %0, i32* %arrayidx, align 32
- %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
- %1 = load i32* %b, align 4
- %arrayidx2 = getelementptr inbounds [200000 x i32]* %x, i64 0, i64 1
+ %b = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 1
+ %1 = load i32, i32* %b, align 4
+ %arrayidx2 = getelementptr inbounds [200000 x i32], [200000 x i32]* %x, i64 0, i64 1
store i32 %1, i32* %arrayidx2, align 4
call void @bar(i32* %arrayidx)
ret void
@@ -113,7 +113,7 @@ entry:
; CHECK-LABEL: @hoo
; CHECK-DAG: lis [[REG1:[0-9]+]], -13
-; CHECK-DAG: rldicl [[REG3:[0-9]+]], 1, 0, 59
+; CHECK-DAG: clrldi [[REG3:[0-9]+]], 1, 59
; CHECK-DAG: mflr 0
; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 51808
; CHECK-DAG: std 30, -16(1)
@@ -129,7 +129,7 @@ entry:
; CHECK-32-LABEL: @hoo
; CHECK-32-DAG: lis [[REG1:[0-9]+]], -13
-; CHECK-32-DAG: rlwinm [[REG3:[0-9]+]], 1, 0, 27, 31
+; CHECK-32-DAG: clrlwi [[REG3:[0-9]+]], 1, 27
; CHECK-32-DAG: mflr 0
; CHECK-32-DAG: ori [[REG2:[0-9]+]], [[REG1]], 51904
; CHECK-32-DAG: stw 30, -8(1)
@@ -143,7 +143,7 @@ entry:
; CHECK-32-PIC-LABEL: @hoo
; CHECK-32-PIC-DAG: lis [[REG1:[0-9]+]], -13
-; CHECK-32-PIC-DAG: rlwinm [[REG3:[0-9]+]], 1, 0, 27, 31
+; CHECK-32-PIC-DAG: clrlwi [[REG3:[0-9]+]], 1, 27
; CHECK-32-PIC-DAG: mflr 0
; CHECK-32-PIC-DAG: ori [[REG2:[0-9]+]], [[REG1]], 51904
; CHECK-32-PIC-DAG: stw 29, -12(1)
@@ -159,13 +159,13 @@ entry:
define void @loo(%struct.s* byval nocapture readonly %a) {
entry:
%x = alloca [2 x i32], align 32
- %a1 = getelementptr inbounds %struct.s* %a, i64 0, i32 0
- %0 = load i32* %a1, align 4
- %arrayidx = getelementptr inbounds [2 x i32]* %x, i64 0, i64 0
+ %a1 = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 0
+ %0 = load i32, i32* %a1, align 4
+ %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 0
store i32 %0, i32* %arrayidx, align 32
- %b = getelementptr inbounds %struct.s* %a, i64 0, i32 1
- %1 = load i32* %b, align 4
- %arrayidx2 = getelementptr inbounds [2 x i32]* %x, i64 0, i64 1
+ %b = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 1
+ %1 = load i32, i32* %b, align 4
+ %arrayidx2 = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 1
store i32 %1, i32* %arrayidx2, align 4
call void @bar(i32* %arrayidx)
call void asm sideeffect "", "~{f30}"() nounwind
@@ -175,7 +175,7 @@ entry:
; CHECK-LABEL: @loo
; CHECK-DAG: mflr 0
-; CHECK-DAG: rldicl [[REG:[0-9]+]], 1, 0, 59
+; CHECK-DAG: clrldi [[REG:[0-9]+]], 1, 59
; CHECK-DAG: std 30, -32(1)
; CHECK-DAG: mr 30, 1
; CHECK-DAG: std 0, 16(1)
@@ -191,7 +191,7 @@ entry:
; CHECK-FP-LABEL: @loo
; CHECK-FP-DAG: mflr 0
-; CHECK-FP-DAG: rldicl [[REG:[0-9]+]], 1, 0, 59
+; CHECK-FP-DAG: clrldi [[REG:[0-9]+]], 1, 59
; CHECK-FP-DAG: std 31, -24(1)
; CHECK-FP-DAG: std 30, -32(1)
; CHECK-FP-DAG: mr 30, 1
diff --git a/test/CodeGen/PowerPC/std-unal-fi.ll b/test/CodeGen/PowerPC/std-unal-fi.ll
index 8b9606e1624f..74ea8cd373bd 100644
--- a/test/CodeGen/PowerPC/std-unal-fi.ll
+++ b/test/CodeGen/PowerPC/std-unal-fi.ll
@@ -9,7 +9,7 @@ BB:
br label %CF
CF: ; preds = %CF80, %CF, %BB
- %L5 = load i64* undef
+ %L5 = load i64, i64* undef
store i8 %0, i8* %A4
%Shuff7 = shufflevector <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> %Shuff, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 undef, i32 20, i32 22, i32 24, i32 26>
%PC10 = bitcast i8* %A4 to ppc_fp128*
@@ -19,13 +19,13 @@ CF77: ; preds = %CF81, %CF83, %CF77,
br i1 undef, label %CF77, label %CF82
CF82: ; preds = %CF82, %CF77
- %L19 = load i64* undef
+ %L19 = load i64, i64* undef
store <1 x ppc_fp128> zeroinitializer, <1 x ppc_fp128>* %A
store i8 -65, i8* %A4
br i1 undef, label %CF82, label %CF83
CF83: ; preds = %CF82
- %L34 = load i64* undef
+ %L34 = load i64, i64* undef
br i1 undef, label %CF77, label %CF81
CF81: ; preds = %CF83
@@ -54,7 +54,7 @@ define void @autogen_SD88042(i8*, i32*, i8) {
BB:
%A4 = alloca <2 x i1>
%A = alloca <16 x float>
- %L = load i8* %0
+ %L = load i8, i8* %0
%Sl = select i1 false, <16 x float>* %A, <16 x float>* %A
%PC = bitcast <2 x i1>* %A4 to i64*
%Sl27 = select i1 false, i8 undef, i8 %L
@@ -66,7 +66,7 @@ CF: ; preds = %CF78, %CF, %BB
CF77: ; preds = %CF80, %CF77, %CF
store <16 x float> zeroinitializer, <16 x float>* %Sl
- %L58 = load i32* %PC33
+ %L58 = load i32, i32* %PC33
store i8 0, i8* %0
br i1 undef, label %CF77, label %CF80
@@ -90,7 +90,7 @@ BB:
%A1 = alloca i1
%I8 = insertelement <1 x i32> <i32 -1>, i32 454855, i32 0
%Cmp = icmp ult <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, undef
- %L10 = load i64* %2
+ %L10 = load i64, i64* %2
%E11 = extractelement <4 x i1> %Cmp, i32 2
br label %CF72
@@ -103,7 +103,7 @@ CF72: ; preds = %CF74, %CF72, %BB
CF74: ; preds = %CF72
store i8 0, i8* %0
%PC = bitcast i1* %A1 to i64*
- %L31 = load i64* %PC
+ %L31 = load i64, i64* %PC
store i64 477323, i64* %PC
%Sl37 = select i1 false, i32* undef, i32* %1
%Cmp38 = icmp ugt i1 undef, undef
diff --git a/test/CodeGen/PowerPC/stdux-constuse.ll b/test/CodeGen/PowerPC/stdux-constuse.ll
index e62d438014ee..d4d17956868a 100644
--- a/test/CodeGen/PowerPC/stdux-constuse.ll
+++ b/test/CodeGen/PowerPC/stdux-constuse.ll
@@ -4,7 +4,7 @@ target triple = "powerpc64-unknown-linux-gnu"
define i32 @test1(i64 %add, i64* %ptr) nounwind {
entry:
- %p1 = getelementptr i64* %ptr, i64 144115188075855
+ %p1 = getelementptr i64, i64* %ptr, i64 144115188075855
br label %for.cond2.preheader
for.cond2.preheader:
@@ -14,10 +14,10 @@ for.cond2.preheader:
for.body4:
%lsr.iv = phi i32 [ %lsr.iv.next, %for.body4 ], [ 16000, %for.cond2.preheader ]
%i0 = phi i64* [ %p1, %for.cond2.preheader ], [ %i6, %for.body4 ]
- %i6 = getelementptr i64* %i0, i64 400000
- %i7 = getelementptr i64* %i6, i64 300000
- %i8 = getelementptr i64* %i6, i64 200000
- %i9 = getelementptr i64* %i6, i64 100000
+ %i6 = getelementptr i64, i64* %i0, i64 400000
+ %i7 = getelementptr i64, i64* %i6, i64 300000
+ %i8 = getelementptr i64, i64* %i6, i64 200000
+ %i9 = getelementptr i64, i64* %i6, i64 100000
store i64 %add, i64* %i6, align 32
store i64 %add, i64* %i7, align 32
store i64 %add, i64* %i8, align 32
diff --git a/test/CodeGen/PowerPC/stfiwx.ll b/test/CodeGen/PowerPC/stfiwx.ll
index 588e44fb28d3..5f90dcad032d 100644
--- a/test/CodeGen/PowerPC/stfiwx.ll
+++ b/test/CodeGen/PowerPC/stfiwx.ll
@@ -22,8 +22,8 @@ define void @test1(float %a, i32* %b) nounwind {
define void @test2(float %a, i32* %b, i32 %i) nounwind {
; CHECK-LABEL: @test2
; CHECK-LS-LABEL: @test2
- %tmp.2 = getelementptr i32* %b, i32 1 ; <i32*> [#uses=1]
- %tmp.5 = getelementptr i32* %b, i32 %i ; <i32*> [#uses=1]
+ %tmp.2 = getelementptr i32, i32* %b, i32 1 ; <i32*> [#uses=1]
+ %tmp.5 = getelementptr i32, i32* %b, i32 %i ; <i32*> [#uses=1]
%tmp.7 = fptosi float %a to i32 ; <i32> [#uses=3]
store i32 %tmp.7, i32* %tmp.5
store i32 %tmp.7, i32* %tmp.2
diff --git a/test/CodeGen/PowerPC/store-load-fwd.ll b/test/CodeGen/PowerPC/store-load-fwd.ll
index 25663c1ac68e..62dd79ec18a4 100644
--- a/test/CodeGen/PowerPC/store-load-fwd.ll
+++ b/test/CodeGen/PowerPC/store-load-fwd.ll
@@ -2,7 +2,7 @@
define i32 @test(i32* %P) {
store i32 1, i32* %P
- %V = load i32* %P ; <i32> [#uses=1]
+ %V = load i32, i32* %P ; <i32> [#uses=1]
ret i32 %V
}
diff --git a/test/CodeGen/PowerPC/store-update.ll b/test/CodeGen/PowerPC/store-update.ll
index 7b9e8f720a17..65f052869702 100644
--- a/test/CodeGen/PowerPC/store-update.ll
+++ b/test/CodeGen/PowerPC/store-update.ll
@@ -5,7 +5,7 @@ target triple = "powerpc64-unknown-linux-gnu"
define i8* @test_stbu(i8* %base, i8 zeroext %val) nounwind {
entry:
- %arrayidx = getelementptr inbounds i8* %base, i64 16
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 16
store i8 %val, i8* %arrayidx, align 1
ret i8* %arrayidx
}
@@ -16,7 +16,7 @@ entry:
define i8* @test_stbux(i8* %base, i8 zeroext %val, i64 %offset) nounwind {
entry:
- %arrayidx = getelementptr inbounds i8* %base, i64 %offset
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %offset
store i8 %val, i8* %arrayidx, align 1
ret i8* %arrayidx
}
@@ -27,7 +27,7 @@ entry:
define i16* @test_sthu(i16* %base, i16 zeroext %val) nounwind {
entry:
- %arrayidx = getelementptr inbounds i16* %base, i64 16
+ %arrayidx = getelementptr inbounds i16, i16* %base, i64 16
store i16 %val, i16* %arrayidx, align 2
ret i16* %arrayidx
}
@@ -38,7 +38,7 @@ entry:
define i16* @test_sthux(i16* %base, i16 zeroext %val, i64 %offset) nounwind {
entry:
- %arrayidx = getelementptr inbounds i16* %base, i64 %offset
+ %arrayidx = getelementptr inbounds i16, i16* %base, i64 %offset
store i16 %val, i16* %arrayidx, align 2
ret i16* %arrayidx
}
@@ -50,7 +50,7 @@ entry:
define i32* @test_stwu(i32* %base, i32 zeroext %val) nounwind {
entry:
- %arrayidx = getelementptr inbounds i32* %base, i64 16
+ %arrayidx = getelementptr inbounds i32, i32* %base, i64 16
store i32 %val, i32* %arrayidx, align 4
ret i32* %arrayidx
}
@@ -61,7 +61,7 @@ entry:
define i32* @test_stwux(i32* %base, i32 zeroext %val, i64 %offset) nounwind {
entry:
- %arrayidx = getelementptr inbounds i32* %base, i64 %offset
+ %arrayidx = getelementptr inbounds i32, i32* %base, i64 %offset
store i32 %val, i32* %arrayidx, align 4
ret i32* %arrayidx
}
@@ -74,7 +74,7 @@ entry:
define i8* @test_stbu8(i8* %base, i64 %val) nounwind {
entry:
%conv = trunc i64 %val to i8
- %arrayidx = getelementptr inbounds i8* %base, i64 16
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 16
store i8 %conv, i8* %arrayidx, align 1
ret i8* %arrayidx
}
@@ -86,7 +86,7 @@ entry:
define i8* @test_stbux8(i8* %base, i64 %val, i64 %offset) nounwind {
entry:
%conv = trunc i64 %val to i8
- %arrayidx = getelementptr inbounds i8* %base, i64 %offset
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %offset
store i8 %conv, i8* %arrayidx, align 1
ret i8* %arrayidx
}
@@ -98,7 +98,7 @@ entry:
define i16* @test_sthu8(i16* %base, i64 %val) nounwind {
entry:
%conv = trunc i64 %val to i16
- %arrayidx = getelementptr inbounds i16* %base, i64 16
+ %arrayidx = getelementptr inbounds i16, i16* %base, i64 16
store i16 %conv, i16* %arrayidx, align 2
ret i16* %arrayidx
}
@@ -110,7 +110,7 @@ entry:
define i16* @test_sthux8(i16* %base, i64 %val, i64 %offset) nounwind {
entry:
%conv = trunc i64 %val to i16
- %arrayidx = getelementptr inbounds i16* %base, i64 %offset
+ %arrayidx = getelementptr inbounds i16, i16* %base, i64 %offset
store i16 %conv, i16* %arrayidx, align 2
ret i16* %arrayidx
}
@@ -123,7 +123,7 @@ entry:
define i32* @test_stwu8(i32* %base, i64 %val) nounwind {
entry:
%conv = trunc i64 %val to i32
- %arrayidx = getelementptr inbounds i32* %base, i64 16
+ %arrayidx = getelementptr inbounds i32, i32* %base, i64 16
store i32 %conv, i32* %arrayidx, align 4
ret i32* %arrayidx
}
@@ -135,7 +135,7 @@ entry:
define i32* @test_stwux8(i32* %base, i64 %val, i64 %offset) nounwind {
entry:
%conv = trunc i64 %val to i32
- %arrayidx = getelementptr inbounds i32* %base, i64 %offset
+ %arrayidx = getelementptr inbounds i32, i32* %base, i64 %offset
store i32 %conv, i32* %arrayidx, align 4
ret i32* %arrayidx
}
@@ -147,7 +147,7 @@ entry:
define i64* @test_stdu(i64* %base, i64 %val) nounwind {
entry:
- %arrayidx = getelementptr inbounds i64* %base, i64 16
+ %arrayidx = getelementptr inbounds i64, i64* %base, i64 16
store i64 %val, i64* %arrayidx, align 8
ret i64* %arrayidx
}
@@ -158,7 +158,7 @@ entry:
define i64* @test_stdux(i64* %base, i64 %val, i64 %offset) nounwind {
entry:
- %arrayidx = getelementptr inbounds i64* %base, i64 %offset
+ %arrayidx = getelementptr inbounds i64, i64* %base, i64 %offset
store i64 %val, i64* %arrayidx, align 8
ret i64* %arrayidx
}
diff --git a/test/CodeGen/PowerPC/structsinmem.ll b/test/CodeGen/PowerPC/structsinmem.ll
index b5552af0eb51..bfada4c63714 100644
--- a/test/CodeGen/PowerPC/structsinmem.ll
+++ b/test/CodeGen/PowerPC/structsinmem.ll
@@ -43,7 +43,7 @@ entry:
%p6 = alloca %struct.s6, align 4
%p7 = alloca %struct.s7, align 4
%0 = bitcast %struct.s1* %p1 to i8*
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.s1* @caller1.p1, i32 0, i32 0), i64 1, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.s1, %struct.s1* @caller1.p1, i32 0, i32 0), i64 1, i32 1, i1 false)
%1 = bitcast %struct.s2* %p2 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s2* @caller1.p2 to i8*), i64 2, i32 2, i1 false)
%2 = bitcast %struct.s3* %p3 to i8*
@@ -88,28 +88,28 @@ entry:
store i32 %z6, i32* %z6.addr, align 4
store i32 %z7, i32* %z7.addr, align 4
store i32 %z8, i32* %z8.addr, align 4
- %a = getelementptr inbounds %struct.s1* %v1, i32 0, i32 0
- %0 = load i8* %a, align 1
+ %a = getelementptr inbounds %struct.s1, %struct.s1* %v1, i32 0, i32 0
+ %0 = load i8, i8* %a, align 1
%conv = zext i8 %0 to i32
- %a1 = getelementptr inbounds %struct.s2* %v2, i32 0, i32 0
- %1 = load i16* %a1, align 2
+ %a1 = getelementptr inbounds %struct.s2, %struct.s2* %v2, i32 0, i32 0
+ %1 = load i16, i16* %a1, align 2
%conv2 = sext i16 %1 to i32
%add = add nsw i32 %conv, %conv2
- %a3 = getelementptr inbounds %struct.s3* %v3, i32 0, i32 0
- %2 = load i16* %a3, align 2
+ %a3 = getelementptr inbounds %struct.s3, %struct.s3* %v3, i32 0, i32 0
+ %2 = load i16, i16* %a3, align 2
%conv4 = sext i16 %2 to i32
%add5 = add nsw i32 %add, %conv4
- %a6 = getelementptr inbounds %struct.s4* %v4, i32 0, i32 0
- %3 = load i32* %a6, align 4
+ %a6 = getelementptr inbounds %struct.s4, %struct.s4* %v4, i32 0, i32 0
+ %3 = load i32, i32* %a6, align 4
%add7 = add nsw i32 %add5, %3
- %a8 = getelementptr inbounds %struct.s5* %v5, i32 0, i32 0
- %4 = load i32* %a8, align 4
+ %a8 = getelementptr inbounds %struct.s5, %struct.s5* %v5, i32 0, i32 0
+ %4 = load i32, i32* %a8, align 4
%add9 = add nsw i32 %add7, %4
- %a10 = getelementptr inbounds %struct.s6* %v6, i32 0, i32 0
- %5 = load i32* %a10, align 4
+ %a10 = getelementptr inbounds %struct.s6, %struct.s6* %v6, i32 0, i32 0
+ %5 = load i32, i32* %a10, align 4
%add11 = add nsw i32 %add9, %5
- %a12 = getelementptr inbounds %struct.s7* %v7, i32 0, i32 0
- %6 = load i32* %a12, align 4
+ %a12 = getelementptr inbounds %struct.s7, %struct.s7* %v7, i32 0, i32 0
+ %6 = load i32, i32* %a12, align 4
%add13 = add nsw i32 %add11, %6
ret i32 %add13
@@ -132,7 +132,7 @@ entry:
%p6 = alloca %struct.t6, align 1
%p7 = alloca %struct.t7, align 1
%0 = bitcast %struct.t1* %p1 to i8*
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.t1* @caller2.p1, i32 0, i32 0), i64 1, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.t1, %struct.t1* @caller2.p1, i32 0, i32 0), i64 1, i32 1, i1 false)
%1 = bitcast %struct.t2* %p2 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ({ i16 }* @caller2.p2 to i8*), i64 2, i32 1, i1 false)
%2 = bitcast %struct.t3* %p3 to i8*
@@ -180,28 +180,28 @@ entry:
store i32 %z6, i32* %z6.addr, align 4
store i32 %z7, i32* %z7.addr, align 4
store i32 %z8, i32* %z8.addr, align 4
- %a = getelementptr inbounds %struct.t1* %v1, i32 0, i32 0
- %0 = load i8* %a, align 1
+ %a = getelementptr inbounds %struct.t1, %struct.t1* %v1, i32 0, i32 0
+ %0 = load i8, i8* %a, align 1
%conv = zext i8 %0 to i32
- %a1 = getelementptr inbounds %struct.t2* %v2, i32 0, i32 0
- %1 = load i16* %a1, align 1
+ %a1 = getelementptr inbounds %struct.t2, %struct.t2* %v2, i32 0, i32 0
+ %1 = load i16, i16* %a1, align 1
%conv2 = sext i16 %1 to i32
%add = add nsw i32 %conv, %conv2
- %a3 = getelementptr inbounds %struct.t3* %v3, i32 0, i32 0
- %2 = load i16* %a3, align 1
+ %a3 = getelementptr inbounds %struct.t3, %struct.t3* %v3, i32 0, i32 0
+ %2 = load i16, i16* %a3, align 1
%conv4 = sext i16 %2 to i32
%add5 = add nsw i32 %add, %conv4
- %a6 = getelementptr inbounds %struct.t4* %v4, i32 0, i32 0
- %3 = load i32* %a6, align 1
+ %a6 = getelementptr inbounds %struct.t4, %struct.t4* %v4, i32 0, i32 0
+ %3 = load i32, i32* %a6, align 1
%add7 = add nsw i32 %add5, %3
- %a8 = getelementptr inbounds %struct.t5* %v5, i32 0, i32 0
- %4 = load i32* %a8, align 1
+ %a8 = getelementptr inbounds %struct.t5, %struct.t5* %v5, i32 0, i32 0
+ %4 = load i32, i32* %a8, align 1
%add9 = add nsw i32 %add7, %4
- %a10 = getelementptr inbounds %struct.t6* %v6, i32 0, i32 0
- %5 = load i32* %a10, align 1
+ %a10 = getelementptr inbounds %struct.t6, %struct.t6* %v6, i32 0, i32 0
+ %5 = load i32, i32* %a10, align 1
%add11 = add nsw i32 %add9, %5
- %a12 = getelementptr inbounds %struct.t7* %v7, i32 0, i32 0
- %6 = load i32* %a12, align 1
+ %a12 = getelementptr inbounds %struct.t7, %struct.t7* %v7, i32 0, i32 0
+ %6 = load i32, i32* %a12, align 1
%add13 = add nsw i32 %add11, %6
ret i32 %add13
diff --git a/test/CodeGen/PowerPC/structsinregs.ll b/test/CodeGen/PowerPC/structsinregs.ll
index cfe32e9560ae..0fb9895a6227 100644
--- a/test/CodeGen/PowerPC/structsinregs.ll
+++ b/test/CodeGen/PowerPC/structsinregs.ll
@@ -43,7 +43,7 @@ entry:
%p6 = alloca %struct.s6, align 4
%p7 = alloca %struct.s7, align 4
%0 = bitcast %struct.s1* %p1 to i8*
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.s1* @caller1.p1, i32 0, i32 0), i64 1, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.s1, %struct.s1* @caller1.p1, i32 0, i32 0), i64 1, i32 1, i1 false)
%1 = bitcast %struct.s2* %p2 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s2* @caller1.p2 to i8*), i64 2, i32 2, i1 false)
%2 = bitcast %struct.s3* %p3 to i8*
@@ -72,28 +72,28 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
define internal i32 @callee1(%struct.s1* byval %v1, %struct.s2* byval %v2, %struct.s3* byval %v3, %struct.s4* byval %v4, %struct.s5* byval %v5, %struct.s6* byval %v6, %struct.s7* byval %v7) nounwind {
entry:
- %a = getelementptr inbounds %struct.s1* %v1, i32 0, i32 0
- %0 = load i8* %a, align 1
+ %a = getelementptr inbounds %struct.s1, %struct.s1* %v1, i32 0, i32 0
+ %0 = load i8, i8* %a, align 1
%conv = zext i8 %0 to i32
- %a1 = getelementptr inbounds %struct.s2* %v2, i32 0, i32 0
- %1 = load i16* %a1, align 2
+ %a1 = getelementptr inbounds %struct.s2, %struct.s2* %v2, i32 0, i32 0
+ %1 = load i16, i16* %a1, align 2
%conv2 = sext i16 %1 to i32
%add = add nsw i32 %conv, %conv2
- %a3 = getelementptr inbounds %struct.s3* %v3, i32 0, i32 0
- %2 = load i16* %a3, align 2
+ %a3 = getelementptr inbounds %struct.s3, %struct.s3* %v3, i32 0, i32 0
+ %2 = load i16, i16* %a3, align 2
%conv4 = sext i16 %2 to i32
%add5 = add nsw i32 %add, %conv4
- %a6 = getelementptr inbounds %struct.s4* %v4, i32 0, i32 0
- %3 = load i32* %a6, align 4
+ %a6 = getelementptr inbounds %struct.s4, %struct.s4* %v4, i32 0, i32 0
+ %3 = load i32, i32* %a6, align 4
%add7 = add nsw i32 %add5, %3
- %a8 = getelementptr inbounds %struct.s5* %v5, i32 0, i32 0
- %4 = load i32* %a8, align 4
+ %a8 = getelementptr inbounds %struct.s5, %struct.s5* %v5, i32 0, i32 0
+ %4 = load i32, i32* %a8, align 4
%add9 = add nsw i32 %add7, %4
- %a10 = getelementptr inbounds %struct.s6* %v6, i32 0, i32 0
- %5 = load i32* %a10, align 4
+ %a10 = getelementptr inbounds %struct.s6, %struct.s6* %v6, i32 0, i32 0
+ %5 = load i32, i32* %a10, align 4
%add11 = add nsw i32 %add9, %5
- %a12 = getelementptr inbounds %struct.s7* %v7, i32 0, i32 0
- %6 = load i32* %a12, align 4
+ %a12 = getelementptr inbounds %struct.s7, %struct.s7* %v7, i32 0, i32 0
+ %6 = load i32, i32* %a12, align 4
%add13 = add nsw i32 %add11, %6
ret i32 %add13
@@ -123,7 +123,7 @@ entry:
%p6 = alloca %struct.t6, align 1
%p7 = alloca %struct.t7, align 1
%0 = bitcast %struct.t1* %p1 to i8*
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.t1* @caller2.p1, i32 0, i32 0), i64 1, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds (%struct.t1, %struct.t1* @caller2.p1, i32 0, i32 0), i64 1, i32 1, i1 false)
%1 = bitcast %struct.t2* %p2 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ({ i16 }* @caller2.p2 to i8*), i64 2, i32 1, i1 false)
%2 = bitcast %struct.t3* %p3 to i8*
@@ -159,28 +159,28 @@ entry:
define internal i32 @callee2(%struct.t1* byval %v1, %struct.t2* byval %v2, %struct.t3* byval %v3, %struct.t4* byval %v4, %struct.t5* byval %v5, %struct.t6* byval %v6, %struct.t7* byval %v7) nounwind {
entry:
- %a = getelementptr inbounds %struct.t1* %v1, i32 0, i32 0
- %0 = load i8* %a, align 1
+ %a = getelementptr inbounds %struct.t1, %struct.t1* %v1, i32 0, i32 0
+ %0 = load i8, i8* %a, align 1
%conv = zext i8 %0 to i32
- %a1 = getelementptr inbounds %struct.t2* %v2, i32 0, i32 0
- %1 = load i16* %a1, align 1
+ %a1 = getelementptr inbounds %struct.t2, %struct.t2* %v2, i32 0, i32 0
+ %1 = load i16, i16* %a1, align 1
%conv2 = sext i16 %1 to i32
%add = add nsw i32 %conv, %conv2
- %a3 = getelementptr inbounds %struct.t3* %v3, i32 0, i32 0
- %2 = load i16* %a3, align 1
+ %a3 = getelementptr inbounds %struct.t3, %struct.t3* %v3, i32 0, i32 0
+ %2 = load i16, i16* %a3, align 1
%conv4 = sext i16 %2 to i32
%add5 = add nsw i32 %add, %conv4
- %a6 = getelementptr inbounds %struct.t4* %v4, i32 0, i32 0
- %3 = load i32* %a6, align 1
+ %a6 = getelementptr inbounds %struct.t4, %struct.t4* %v4, i32 0, i32 0
+ %3 = load i32, i32* %a6, align 1
%add7 = add nsw i32 %add5, %3
- %a8 = getelementptr inbounds %struct.t5* %v5, i32 0, i32 0
- %4 = load i32* %a8, align 1
+ %a8 = getelementptr inbounds %struct.t5, %struct.t5* %v5, i32 0, i32 0
+ %4 = load i32, i32* %a8, align 1
%add9 = add nsw i32 %add7, %4
- %a10 = getelementptr inbounds %struct.t6* %v6, i32 0, i32 0
- %5 = load i32* %a10, align 1
+ %a10 = getelementptr inbounds %struct.t6, %struct.t6* %v6, i32 0, i32 0
+ %5 = load i32, i32* %a10, align 1
%add11 = add nsw i32 %add9, %5
- %a12 = getelementptr inbounds %struct.t7* %v7, i32 0, i32 0
- %6 = load i32* %a12, align 1
+ %a12 = getelementptr inbounds %struct.t7, %struct.t7* %v7, i32 0, i32 0
+ %6 = load i32, i32* %a12, align 1
%add13 = add nsw i32 %add11, %6
ret i32 %add13
diff --git a/test/CodeGen/PowerPC/stwu-gta.ll b/test/CodeGen/PowerPC/stwu-gta.ll
index 980c1d502853..2b420156f739 100644
--- a/test/CodeGen/PowerPC/stwu-gta.ll
+++ b/test/CodeGen/PowerPC/stwu-gta.ll
@@ -8,8 +8,8 @@ target triple = "powerpc-unknown-linux"
define void @_GLOBAL__I_a() nounwind section ".text.startup" {
entry:
- store i32 5, i32* getelementptr inbounds (%class.Two.0.5* @foo, i32 0, i32 0), align 4
- store i32 6, i32* getelementptr inbounds (%class.Two.0.5* @foo, i32 0, i32 1), align 4
+ store i32 5, i32* getelementptr inbounds (%class.Two.0.5, %class.Two.0.5* @foo, i32 0, i32 0), align 4
+ store i32 6, i32* getelementptr inbounds (%class.Two.0.5, %class.Two.0.5* @foo, i32 0, i32 1), align 4
ret void
}
diff --git a/test/CodeGen/PowerPC/stwu8.ll b/test/CodeGen/PowerPC/stwu8.ll
index b220af2df4a4..bb2748432d79 100644
--- a/test/CodeGen/PowerPC/stwu8.ll
+++ b/test/CodeGen/PowerPC/stwu8.ll
@@ -11,7 +11,7 @@ target triple = "powerpc64-unknown-linux-gnu"
define void @test1(%class.spell_checker.21.103.513.538* %this) unnamed_addr align 2 {
entry:
- %_M_header.i.i.i.i.i.i = getelementptr inbounds %class.spell_checker.21.103.513.538* %this, i64 0, i32 0, i32 0, i32 0, i32 1
+ %_M_header.i.i.i.i.i.i = getelementptr inbounds %class.spell_checker.21.103.513.538, %class.spell_checker.21.103.513.538* %this, i64 0, i32 0, i32 0, i32 0, i32 1
%0 = bitcast %"struct.std::_Rb_tree_node_base.17.99.509.534"* %_M_header.i.i.i.i.i.i to i8*
call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 40, i32 4, i1 false) nounwind
store %"struct.std::_Rb_tree_node_base.17.99.509.534"* %_M_header.i.i.i.i.i.i, %"struct.std::_Rb_tree_node_base.17.99.509.534"** undef, align 8
diff --git a/test/CodeGen/PowerPC/stwux.ll b/test/CodeGen/PowerPC/stwux.ll
index 737e9d9f0ecb..2ed630d8002d 100644
--- a/test/CodeGen/PowerPC/stwux.ll
+++ b/test/CodeGen/PowerPC/stwux.ll
@@ -27,7 +27,7 @@ while.end: ; preds = %if.end12
if.end15: ; preds = %while.end
%idxprom.i.i230 = sext i32 %i.1 to i64
- %arrayidx18 = getelementptr inbounds [100 x i32]* @multvec_i, i64 0, i64 %idxprom.i.i230
+ %arrayidx18 = getelementptr inbounds [100 x i32], [100 x i32]* @multvec_i, i64 0, i64 %idxprom.i.i230
store i32 0, i32* %arrayidx18, align 4
br i1 undef, label %while.body21, label %while.end90
diff --git a/test/CodeGen/PowerPC/subreg-postra-2.ll b/test/CodeGen/PowerPC/subreg-postra-2.ll
index 2faaa6129294..051536443413 100644
--- a/test/CodeGen/PowerPC/subreg-postra-2.ll
+++ b/test/CodeGen/PowerPC/subreg-postra-2.ll
@@ -134,8 +134,8 @@ while.body392.lr.ph: ; preds = %do.body378
br label %while.body392
while.body392: ; preds = %wait_on_buffer.exit1319, %while.body392.lr.ph
- %0 = load i8** undef, align 8
- %add.ptr399 = getelementptr inbounds i8* %0, i64 -72
+ %0 = load i8*, i8** undef, align 8
+ %add.ptr399 = getelementptr inbounds i8, i8* %0, i64 -72
%b_state.i.i1314 = bitcast i8* %add.ptr399 to i64*
%tobool.i1316 = icmp eq i64 undef, 0
br i1 %tobool.i1316, label %wait_on_buffer.exit1319, label %if.then.i1317
@@ -144,7 +144,7 @@ if.then.i1317: ; preds = %while.body392
unreachable
wait_on_buffer.exit1319: ; preds = %while.body392
- %1 = load volatile i64* %b_state.i.i1314, align 8
+ %1 = load volatile i64, i64* %b_state.i.i1314, align 8
%conv.i.i1322 = and i64 %1, 1
%lnot404 = icmp eq i64 %conv.i.i1322, 0
%.err.4 = select i1 %lnot404, i32 -5, i32 undef
@@ -160,7 +160,7 @@ while.end418: ; preds = %wait_on_buffer.exit
; CHECK-LABEL: @jbd2_journal_commit_transaction
; CHECK: andi.
-; CHECK: cror [[REG:[0-9]+]], 1, 1
+; CHECK: crmove [[REG:[0-9]+]], 1
; CHECK: stdcx.
; CHECK: isel {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, [[REG]]
diff --git a/test/CodeGen/PowerPC/subreg-postra.ll b/test/CodeGen/PowerPC/subreg-postra.ll
index b10fa668cb8d..ba1b967cf204 100644
--- a/test/CodeGen/PowerPC/subreg-postra.ll
+++ b/test/CodeGen/PowerPC/subreg-postra.ll
@@ -120,8 +120,8 @@ while.body392.lr.ph: ; preds = %do.body378
br label %while.body392
while.body392: ; preds = %wait_on_buffer.exit1319, %while.body392.lr.ph
- %0 = load i8** undef, align 8
- %add.ptr399 = getelementptr inbounds i8* %0, i64 -72
+ %0 = load i8*, i8** undef, align 8
+ %add.ptr399 = getelementptr inbounds i8, i8* %0, i64 -72
%b_state.i.i1314 = bitcast i8* %add.ptr399 to i64*
%tobool.i1316 = icmp eq i64 undef, 0
br i1 %tobool.i1316, label %wait_on_buffer.exit1319, label %if.then.i1317
@@ -130,20 +130,20 @@ if.then.i1317: ; preds = %while.body392
unreachable
wait_on_buffer.exit1319: ; preds = %while.body392
- %1 = load volatile i64* %b_state.i.i1314, align 8
+ %1 = load volatile i64, i64* %b_state.i.i1314, align 8
%conv.i.i1322 = and i64 %1, 1
%lnot404 = icmp eq i64 %conv.i.i1322, 0
%.err.4 = select i1 %lnot404, i32 -5, i32 undef
%2 = call i64 asm sideeffect "1:.long 0x7c0000a8 $| ((($0) & 0x1f) << 21) $| (((0) & 0x1f) << 16) $| ((($3) & 0x1f) << 11) $| (((0) & 0x1) << 0) \0Aandc $0,$0,$2\0Astdcx. $0,0,$3\0Abne- 1b\0A", "=&r,=*m,r,r,*m,~{cc},~{memory}"(i64* %b_state.i.i1314, i64 262144, i64* %b_state.i.i1314, i64* %b_state.i.i1314) #1
- %prev.i.i.i1325 = getelementptr inbounds i8* %0, i64 8
- %3 = load i32** null, align 8
+ %prev.i.i.i1325 = getelementptr inbounds i8, i8* %0, i64 8
+ %3 = load i32*, i32** null, align 8
store i32* %3, i32** undef, align 8
call void @__brelse(i32* undef) #1
br i1 undef, label %while.end418, label %while.body392
; CHECK-LABEL: @jbd2_journal_commit_transaction
; CHECK: andi.
-; CHECK: cror [[REG:[0-9]+]], 1, 1
+; CHECK: crmove [[REG:[0-9]+]], 1
; CHECK: stdcx.
; CHECK: isel {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, [[REG]]
diff --git a/test/CodeGen/PowerPC/subsumes-pred-regs.ll b/test/CodeGen/PowerPC/subsumes-pred-regs.ll
index c510e36cb413..5389c1318445 100644
--- a/test/CodeGen/PowerPC/subsumes-pred-regs.ll
+++ b/test/CodeGen/PowerPC/subsumes-pred-regs.ll
@@ -20,7 +20,7 @@ if.then: ; preds = %lor.end
br i1 undef, label %return, label %if.end.i24
if.end.i24: ; preds = %if.then
- %0 = load i32* undef, align 4
+ %0 = load i32, i32* undef, align 4
%lnot.i.i16.i23 = icmp eq i32 %0, 0
br i1 %lnot.i.i16.i23, label %if.end7.i37, label %test.exit27.i34
diff --git a/test/CodeGen/PowerPC/swaps-le-1.ll b/test/CodeGen/PowerPC/swaps-le-1.ll
new file mode 100644
index 000000000000..0c4163169034
--- /dev/null
+++ b/test/CodeGen/PowerPC/swaps-le-1.ll
@@ -0,0 +1,147 @@
+; RUN: llc -O3 -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -O3 -mcpu=pwr8 -disable-ppc-vsx-swap-removal -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck -check-prefix=NOOPTSWAP %s
+
+; This test was generated from the following source:
+;
+; #define N 4096
+; int ca[N] __attribute__((aligned(16)));
+; int cb[N] __attribute__((aligned(16)));
+; int cc[N] __attribute__((aligned(16)));
+; int cd[N] __attribute__((aligned(16)));
+;
+; void foo ()
+; {
+; int i;
+; for (i = 0; i < N; i++) {
+; ca[i] = (cb[i] + cc[i]) * cd[i];
+; }
+; }
+
+@cb = common global [4096 x i32] zeroinitializer, align 16
+@cc = common global [4096 x i32] zeroinitializer, align 16
+@cd = common global [4096 x i32] zeroinitializer, align 16
+@ca = common global [4096 x i32] zeroinitializer, align 16
+
+define void @foo() {
+entry:
+ br label %vector.body
+
+vector.body:
+ %index = phi i64 [ 0, %entry ], [ %index.next.3, %vector.body ]
+ %0 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index
+ %1 = bitcast i32* %0 to <4 x i32>*
+ %wide.load = load <4 x i32>, <4 x i32>* %1, align 16
+ %2 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index
+ %3 = bitcast i32* %2 to <4 x i32>*
+ %wide.load13 = load <4 x i32>, <4 x i32>* %3, align 16
+ %4 = add nsw <4 x i32> %wide.load13, %wide.load
+ %5 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index
+ %6 = bitcast i32* %5 to <4 x i32>*
+ %wide.load14 = load <4 x i32>, <4 x i32>* %6, align 16
+ %7 = mul nsw <4 x i32> %4, %wide.load14
+ %8 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index
+ %9 = bitcast i32* %8 to <4 x i32>*
+ store <4 x i32> %7, <4 x i32>* %9, align 16
+ %index.next = add nuw nsw i64 %index, 4
+ %10 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index.next
+ %11 = bitcast i32* %10 to <4 x i32>*
+ %wide.load.1 = load <4 x i32>, <4 x i32>* %11, align 16
+ %12 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index.next
+ %13 = bitcast i32* %12 to <4 x i32>*
+ %wide.load13.1 = load <4 x i32>, <4 x i32>* %13, align 16
+ %14 = add nsw <4 x i32> %wide.load13.1, %wide.load.1
+ %15 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index.next
+ %16 = bitcast i32* %15 to <4 x i32>*
+ %wide.load14.1 = load <4 x i32>, <4 x i32>* %16, align 16
+ %17 = mul nsw <4 x i32> %14, %wide.load14.1
+ %18 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index.next
+ %19 = bitcast i32* %18 to <4 x i32>*
+ store <4 x i32> %17, <4 x i32>* %19, align 16
+ %index.next.1 = add nuw nsw i64 %index.next, 4
+ %20 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index.next.1
+ %21 = bitcast i32* %20 to <4 x i32>*
+ %wide.load.2 = load <4 x i32>, <4 x i32>* %21, align 16
+ %22 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index.next.1
+ %23 = bitcast i32* %22 to <4 x i32>*
+ %wide.load13.2 = load <4 x i32>, <4 x i32>* %23, align 16
+ %24 = add nsw <4 x i32> %wide.load13.2, %wide.load.2
+ %25 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index.next.1
+ %26 = bitcast i32* %25 to <4 x i32>*
+ %wide.load14.2 = load <4 x i32>, <4 x i32>* %26, align 16
+ %27 = mul nsw <4 x i32> %24, %wide.load14.2
+ %28 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index.next.1
+ %29 = bitcast i32* %28 to <4 x i32>*
+ store <4 x i32> %27, <4 x i32>* %29, align 16
+ %index.next.2 = add nuw nsw i64 %index.next.1, 4
+ %30 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cb, i64 0, i64 %index.next.2
+ %31 = bitcast i32* %30 to <4 x i32>*
+ %wide.load.3 = load <4 x i32>, <4 x i32>* %31, align 16
+ %32 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cc, i64 0, i64 %index.next.2
+ %33 = bitcast i32* %32 to <4 x i32>*
+ %wide.load13.3 = load <4 x i32>, <4 x i32>* %33, align 16
+ %34 = add nsw <4 x i32> %wide.load13.3, %wide.load.3
+ %35 = getelementptr inbounds [4096 x i32], [4096 x i32]* @cd, i64 0, i64 %index.next.2
+ %36 = bitcast i32* %35 to <4 x i32>*
+ %wide.load14.3 = load <4 x i32>, <4 x i32>* %36, align 16
+ %37 = mul nsw <4 x i32> %34, %wide.load14.3
+ %38 = getelementptr inbounds [4096 x i32], [4096 x i32]* @ca, i64 0, i64 %index.next.2
+ %39 = bitcast i32* %38 to <4 x i32>*
+ store <4 x i32> %37, <4 x i32>* %39, align 16
+ %index.next.3 = add nuw nsw i64 %index.next.2, 4
+ %40 = icmp eq i64 %index.next.3, 4096
+ br i1 %40, label %for.end, label %vector.body
+
+for.end:
+ ret void
+}
+
+; CHECK-LABEL: @foo
+; CHECK-NOT: xxpermdi
+; CHECK-NOT: xxswapd
+
+; CHECK: lxvd2x
+; CHECK: lxvd2x
+; CHECK-DAG: lxvd2x
+; CHECK-DAG: vadduwm
+; CHECK: vmuluwm
+; CHECK: stxvd2x
+
+; CHECK: lxvd2x
+; CHECK: lxvd2x
+; CHECK-DAG: lxvd2x
+; CHECK-DAG: vadduwm
+; CHECK: vmuluwm
+; CHECK: stxvd2x
+
+; CHECK: lxvd2x
+; CHECK: lxvd2x
+; CHECK-DAG: lxvd2x
+; CHECK-DAG: vadduwm
+; CHECK: vmuluwm
+; CHECK: stxvd2x
+
+; CHECK: lxvd2x
+; CHECK: lxvd2x
+; CHECK-DAG: lxvd2x
+; CHECK-DAG: vadduwm
+; CHECK: vmuluwm
+; CHECK: stxvd2x
+
+
+; NOOPTSWAP-LABEL: @foo
+
+; NOOPTSWAP: lxvd2x
+; NOOPTSWAP-DAG: lxvd2x
+; NOOPTSWAP-DAG: lxvd2x
+; NOOPTSWAP-DAG: xxswapd
+; NOOPTSWAP-DAG: xxswapd
+; NOOPTSWAP-DAG: xxswapd
+; NOOPTSWAP-DAG: vadduwm
+; NOOPTSWAP: vmuluwm
+; NOOPTSWAP: xxswapd
+; NOOPTSWAP-DAG: xxswapd
+; NOOPTSWAP-DAG: xxswapd
+; NOOPTSWAP-DAG: stxvd2x
+; NOOPTSWAP-DAG: stxvd2x
+; NOOPTSWAP: stxvd2x
+
diff --git a/test/CodeGen/PowerPC/swaps-le-2.ll b/test/CodeGen/PowerPC/swaps-le-2.ll
new file mode 100644
index 000000000000..08096ed20ddb
--- /dev/null
+++ b/test/CodeGen/PowerPC/swaps-le-2.ll
@@ -0,0 +1,91 @@
+; RUN: llc -O3 -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+
+; Test swap removal when a vector splat must be adjusted to make it legal.
+;
+; Test generated from following C code:
+;
+; vector char vc = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+; vector char vcr;
+; vector short vs = {0, 1, 2, 3, 4, 5, 6, 7};
+; vector short vsr;
+; vector int vi = {0, 1, 2, 3};
+; vector int vir;
+;
+; void cfoo ()
+; {
+; vcr = (vector char){vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], vc[5],
+; vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], vc[5], vc[5]};
+; }
+;
+; void sfoo ()
+; {
+; vsr = (vector short){vs[6], vs[6], vs[6], vs[6],
+; vs[6], vs[6], vs[6], vs[6]};
+; }
+;
+; void ifoo ()
+; {
+; vir = (vector int){vi[1], vi[1], vi[1], vi[1]};
+; }
+
+@vc = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+@vs = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
+@vi = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@vcr = common global <16 x i8> zeroinitializer, align 16
+@vsr = common global <8 x i16> zeroinitializer, align 16
+@vir = common global <4 x i32> zeroinitializer, align 16
+
+; Function Attrs: nounwind
+define void @cfoo() {
+entry:
+ %0 = load <16 x i8>, <16 x i8>* @vc, align 16
+ %vecinit30 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+ store <16 x i8> %vecinit30, <16 x i8>* @vcr, align 16
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @sfoo() {
+entry:
+ %0 = load <8 x i16>, <8 x i16>* @vs, align 16
+ %vecinit14 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
+ store <8 x i16> %vecinit14, <8 x i16>* @vsr, align 16
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @ifoo() {
+entry:
+ %0 = load <4 x i32>, <4 x i32>* @vi, align 16
+ %vecinit6 = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+ store <4 x i32> %vecinit6, <4 x i32>* @vir, align 16
+ ret void
+}
+
+; Justification:
+; Byte splat of element 5 (BE) becomes element 15-5 = 10 (LE)
+; which becomes (10+8)%16 = 2 (LE swapped).
+;
+; Halfword splat of element 6 (BE) becomes element 7-6 = 1 (LE)
+; which becomes (1+4)%8 = 5 (LE swapped).
+;
+; Word splat of element 1 (BE) becomes element 3-1 = 2 (LE)
+; which becomes (2+2)%4 = 0 (LE swapped).
+
+; CHECK-NOT: xxpermdi
+; CHECK-NOT: xxswapd
+
+; CHECK-LABEL: @cfoo
+; CHECK: lxvd2x
+; CHECK: vspltb {{[0-9]+}}, {{[0-9]+}}, 2
+; CHECK: stxvd2x
+
+; CHECK-LABEL: @sfoo
+; CHECK: lxvd2x
+; CHECK: vsplth {{[0-9]+}}, {{[0-9]+}}, 5
+; CHECK: stxvd2x
+
+; CHECK-LABEL: @ifoo
+; CHECK: lxvd2x
+; CHECK: vspltw {{[0-9]+}}, {{[0-9]+}}, 0
+; CHECK: stxvd2x
diff --git a/test/CodeGen/PowerPC/tls-cse.ll b/test/CodeGen/PowerPC/tls-cse.ll
new file mode 100644
index 000000000000..7375e9ccbae3
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-cse.ll
@@ -0,0 +1,52 @@
+; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | grep "__tls_get_addr" | count 1
+
+; This test was derived from LLVM's own
+; PrettyStackTraceEntry::~PrettyStackTraceEntry(). It demonstrates an
+; opportunity for CSE of calls to __tls_get_addr().
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+%"class.llvm::PrettyStackTraceEntry" = type { i32 (...)**, %"class.llvm::PrettyStackTraceEntry"* }
+
+@_ZTVN4llvm21PrettyStackTraceEntryE = unnamed_addr constant [5 x i8*] [i8* null, i8* null, i8* bitcast (void (%"class.llvm::PrettyStackTraceEntry"*)* @_ZN4llvm21PrettyStackTraceEntryD2Ev to i8*), i8* bitcast (void (%"class.llvm::PrettyStackTraceEntry"*)* @_ZN4llvm21PrettyStackTraceEntryD0Ev to i8*), i8* bitcast (void ()* @__cxa_pure_virtual to i8*)], align 8
+@_ZL20PrettyStackTraceHead = internal thread_local unnamed_addr global %"class.llvm::PrettyStackTraceEntry"* null, align 8
+@.str = private unnamed_addr constant [87 x i8] c"PrettyStackTraceHead == this && \22Pretty stack trace entry destruction is out of order\22\00", align 1
+@.str1 = private unnamed_addr constant [64 x i8] c"/home/wschmidt/llvm/llvm-test2/lib/Support/PrettyStackTrace.cpp\00", align 1
+@__PRETTY_FUNCTION__._ZN4llvm21PrettyStackTraceEntryD2Ev = private unnamed_addr constant [62 x i8] c"virtual llvm::PrettyStackTraceEntry::~PrettyStackTraceEntry()\00", align 1
+
+declare void @_ZN4llvm21PrettyStackTraceEntryD2Ev(%"class.llvm::PrettyStackTraceEntry"* %this) unnamed_addr
+declare void @__cxa_pure_virtual()
+declare void @__assert_fail(i8*, i8*, i32 zeroext, i8*)
+declare void @_ZdlPv(i8*)
+
+define void @_ZN4llvm21PrettyStackTraceEntryD0Ev(%"class.llvm::PrettyStackTraceEntry"* %this) unnamed_addr align 2 {
+entry:
+ %0 = getelementptr inbounds %"class.llvm::PrettyStackTraceEntry", %"class.llvm::PrettyStackTraceEntry"* %this, i64 0, i32 0
+ store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*], [5 x i8*]* @_ZTVN4llvm21PrettyStackTraceEntryE, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+ %1 = load %"class.llvm::PrettyStackTraceEntry"*, %"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead, align 8
+ %cmp.i = icmp eq %"class.llvm::PrettyStackTraceEntry"* %1, %this
+ br i1 %cmp.i, label %_ZN4llvm21PrettyStackTraceEntryD2Ev.exit, label %cond.false.i
+
+cond.false.i: ; preds = %entry
+ tail call void @__assert_fail(i8* getelementptr inbounds ([87 x i8], [87 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([64 x i8], [64 x i8]* @.str1, i64 0, i64 0), i32 zeroext 119, i8* getelementptr inbounds ([62 x i8], [62 x i8]* @__PRETTY_FUNCTION__._ZN4llvm21PrettyStackTraceEntryD2Ev, i64 0, i64 0))
+ unreachable
+
+_ZN4llvm21PrettyStackTraceEntryD2Ev.exit: ; preds = %entry
+ %NextEntry.i.i = getelementptr inbounds %"class.llvm::PrettyStackTraceEntry", %"class.llvm::PrettyStackTraceEntry"* %this, i64 0, i32 1
+ %2 = bitcast %"class.llvm::PrettyStackTraceEntry"** %NextEntry.i.i to i64*
+ %3 = load i64, i64* %2, align 8
+ store i64 %3, i64* bitcast (%"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead to i64*), align 8
+ %4 = bitcast %"class.llvm::PrettyStackTraceEntry"* %this to i8*
+ tail call void @_ZdlPv(i8* %4)
+ ret void
+}
+
+; CHECK-LABEL: _ZN4llvm21PrettyStackTraceEntryD0Ev:
+; CHECK: addis [[REG1:[0-9]+]], 2, _ZL20PrettyStackTraceHead@got@tlsld@ha
+; CHECK: addi 3, [[REG1]], _ZL20PrettyStackTraceHead@got@tlsld@l
+; CHECK: bl __tls_get_addr(_ZL20PrettyStackTraceHead@tlsld)
+; CHECK: addis 3, 3, _ZL20PrettyStackTraceHead@dtprel@ha
+; CHECK: ld {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l(3)
+; CHECK: std {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l(3)
diff --git a/test/CodeGen/PowerPC/tls-pic.ll b/test/CodeGen/PowerPC/tls-pic.ll
index 9ba372591e6e..b7d9298685de 100644
--- a/test/CodeGen/PowerPC/tls-pic.ll
+++ b/test/CodeGen/PowerPC/tls-pic.ll
@@ -13,38 +13,38 @@ define signext i32 @main() nounwind {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
ret i32 %0
}
; OPT0-LABEL: main:
; OPT0: addis [[REG:[0-9]+]], 2, a@got@tlsld@ha
-; OPT0-NEXT: addi 3, [[REG]], a@got@tlsld@l
+; OPT0: addi 3, [[REG]], a@got@tlsld@l
; OPT0: bl __tls_get_addr(a@tlsld)
; OPT0-NEXT: nop
; OPT0: addis [[REG2:[0-9]+]], 3, a@dtprel@ha
-; OPT0-NEXT: addi {{[0-9]+}}, [[REG2]], a@dtprel@l
+; OPT0: addi {{[0-9]+}}, [[REG2]], a@dtprel@l
; OPT0-32-LABEL: main
; OPT0-32: addi {{[0-9]+}}, {{[0-9]+}}, a@got@tlsld
; OPT0-32: bl __tls_get_addr(a@tlsld)@PLT
; OPT0-32: addis [[REG:[0-9]+]], 3, a@dtprel@ha
-; OPT0-32-NEXT: addi {{[0-9]+}}, [[REG]], a@dtprel@l
+; OPT0-32: addi {{[0-9]+}}, [[REG]], a@dtprel@l
; OPT1-32-LABEL: main
; OPT1-32: addi 3, {{[0-9]+}}, a@got@tlsld
; OPT1-32: bl __tls_get_addr(a@tlsld)@PLT
; OPT1-32: addis [[REG:[0-9]+]], 3, a@dtprel@ha
-; OPT1-32-NEXT: addi {{[0-9]+}}, [[REG]], a@dtprel@l
+; OPT1-32: addi {{[0-9]+}}, [[REG]], a@dtprel@l
; Test peephole optimization for thread-local storage using the
; local dynamic model.
; OPT1-LABEL: main:
; OPT1: addis [[REG:[0-9]+]], 2, a@got@tlsld@ha
-; OPT1-NEXT: addi 3, [[REG]], a@got@tlsld@l
+; OPT1: addi 3, [[REG]], a@got@tlsld@l
; OPT1: bl __tls_get_addr(a@tlsld)
; OPT1-NEXT: nop
; OPT1: addis [[REG2:[0-9]+]], 3, a@dtprel@ha
-; OPT1-NEXT: lwa {{[0-9]+}}, a@dtprel@l([[REG2]])
+; OPT1: lwa {{[0-9]+}}, a@dtprel@l([[REG2]])
; Test correct assembly code generation for thread-local storage using
; the general dynamic model.
@@ -55,13 +55,13 @@ define signext i32 @main2() nounwind {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
- %0 = load i32* @a2, align 4
+ %0 = load i32, i32* @a2, align 4
ret i32 %0
}
; OPT1-LABEL: main2
-; OPT1: addis [[REG:[0-9]+]], 2, a2@got@tlsgd@ha
-; OPT1-NEXT: addi 3, [[REG]], a2@got@tlsgd@l
+; OPT1: addis [[REG:[0-9]+]], 2, a2@got@tlsgd@ha
+; OPT1: addi 3, [[REG]], a2@got@tlsgd@l
; OPT1: bl __tls_get_addr(a2@tlsgd)
; OPT1-NEXT: nop
; OPT1-32-LABEL: main2
diff --git a/test/CodeGen/PowerPC/tls-store2.ll b/test/CodeGen/PowerPC/tls-store2.ll
index f884dd8a0a17..e9aa17e8c0ff 100644
--- a/test/CodeGen/PowerPC/tls-store2.ll
+++ b/test/CodeGen/PowerPC/tls-store2.ll
@@ -19,13 +19,14 @@ entry:
}
; CHECK-LABEL: call_once:
-; CHECK: addis 3, 2, __once_callable@got@tlsgd@ha
-; CHECK: addi 3, 3, __once_callable@got@tlsgd@l
+; CHECK: addi 3, {{[0-9]+}}, __once_callable@got@tlsgd@l
; CHECK: bl __tls_get_addr(__once_callable@tlsgd)
; CHECK-NEXT: nop
-; CHECK: std {{[0-9]+}}, 0(3)
-; CHECK: addis 3, 2, __once_call@got@tlsgd@ha
-; CHECK: addi 3, 3, __once_call@got@tlsgd@l
+; FIXME: We could check here for 'std {{[0-9]+}}, 0(3)', but that no longer
+; works because, with new scheduling freedom, we create a copy of R3 based on the
+; initial scheduling, but don't coalesce it again after we move the instructions
+; so that the copy is no longer necessary.
+; CHECK: addi 3, {{[0-9]+}}, __once_call@got@tlsgd@l
; CHECK: bl __tls_get_addr(__once_call@tlsgd)
; CHECK-NEXT: nop
; CHECK: std {{[0-9]+}}, 0(3)
diff --git a/test/CodeGen/PowerPC/tls.ll b/test/CodeGen/PowerPC/tls.ll
index 59b4de755988..c96e444e02c8 100644
--- a/test/CodeGen/PowerPC/tls.ll
+++ b/test/CodeGen/PowerPC/tls.ll
@@ -30,7 +30,7 @@ define signext i32 @main2() nounwind {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
- %0 = load i32* @a2, align 4
+ %0 = load i32, i32* @a2, align 4
ret i32 %0
}
diff --git a/test/CodeGen/PowerPC/toc-load-sched-bug.ll b/test/CodeGen/PowerPC/toc-load-sched-bug.ll
index e92c4f4018b1..5ac4e3635023 100644
--- a/test/CodeGen/PowerPC/toc-load-sched-bug.ll
+++ b/test/CodeGen/PowerPC/toc-load-sched-bug.ll
@@ -176,51 +176,51 @@ entry:
%FileOrErr = alloca %"class.llvm::ErrorOr", align 8
%ref.tmp = alloca %"class.llvm::SMDiagnostic", align 8
%ref.tmp5 = alloca %"class.std::basic_string", align 8
- %_M_p.i.i.i = getelementptr inbounds %"class.std::basic_string"* %Filename, i64 0, i32 0, i32 0
- %0 = load i8** %_M_p.i.i.i, align 8, !tbaa !1
+ %_M_p.i.i.i = getelementptr inbounds %"class.std::basic_string", %"class.std::basic_string"* %Filename, i64 0, i32 0, i32 0
+ %0 = load i8*, i8** %_M_p.i.i.i, align 8, !tbaa !1
%1 = ptrtoint i8* %0 to i64
- %arrayidx.i.i.i = getelementptr inbounds i8* %0, i64 -24
+ %arrayidx.i.i.i = getelementptr inbounds i8, i8* %0, i64 -24
%_M_length.i.i = bitcast i8* %arrayidx.i.i.i to i64*
- %2 = load i64* %_M_length.i.i, align 8, !tbaa !7
+ %2 = load i64, i64* %_M_length.i.i, align 8, !tbaa !7
%.fca.0.insert18 = insertvalue [2 x i64] undef, i64 %1, 0
%.fca.1.insert21 = insertvalue [2 x i64] %.fca.0.insert18, i64 %2, 1
call void @_ZN4llvm12MemoryBuffer14getFileOrSTDINENS_9StringRefEl(%"class.llvm::ErrorOr"* sret %FileOrErr, [2 x i64] %.fca.1.insert21, i64 -1) #3
- %HasError.i24 = getelementptr inbounds %"class.llvm::ErrorOr"* %FileOrErr, i64 0, i32 1
- %bf.load.i25 = load i8* %HasError.i24, align 8
+ %HasError.i24 = getelementptr inbounds %"class.llvm::ErrorOr", %"class.llvm::ErrorOr"* %FileOrErr, i64 0, i32 1
+ %bf.load.i25 = load i8, i8* %HasError.i24, align 8
%3 = and i8 %bf.load.i25, 1
%bf.cast.i26 = icmp eq i8 %3, 0
br i1 %bf.cast.i26, label %_ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE3getEv.exit, label %_ZNK4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE8getErrorEv.exit
_ZNK4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE8getErrorEv.exit: ; preds = %entry
%retval.sroa.0.0..sroa_cast.i = bitcast %"class.llvm::ErrorOr"* %FileOrErr to i64*
- %retval.sroa.0.0.copyload.i = load i64* %retval.sroa.0.0..sroa_cast.i, align 8
- %retval.sroa.3.0..sroa_idx.i = getelementptr inbounds %"class.llvm::ErrorOr"* %FileOrErr, i64 0, i32 0, i32 0, i32 0, i32 0, i64 8
+ %retval.sroa.0.0.copyload.i = load i64, i64* %retval.sroa.0.0..sroa_cast.i, align 8
+ %retval.sroa.3.0..sroa_idx.i = getelementptr inbounds %"class.llvm::ErrorOr", %"class.llvm::ErrorOr"* %FileOrErr, i64 0, i32 0, i32 0, i32 0, i32 0, i64 8
%retval.sroa.3.0..sroa_cast.i = bitcast i8* %retval.sroa.3.0..sroa_idx.i to i64*
- %retval.sroa.3.0.copyload.i = load i64* %retval.sroa.3.0..sroa_cast.i, align 8
+ %retval.sroa.3.0.copyload.i = load i64, i64* %retval.sroa.3.0..sroa_cast.i, align 8
%phitmp = trunc i64 %retval.sroa.0.0.copyload.i to i32
%cmp.i = icmp eq i32 %phitmp, 0
br i1 %cmp.i, label %cond.false.i.i, label %if.then
if.then: ; preds = %_ZNK4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE8getErrorEv.exit
%.c = inttoptr i64 %retval.sroa.3.0.copyload.i to %"class.std::error_category"*
- %4 = load i8** %_M_p.i.i.i, align 8, !tbaa !1
- %arrayidx.i.i.i30 = getelementptr inbounds i8* %4, i64 -24
+ %4 = load i8*, i8** %_M_p.i.i.i, align 8, !tbaa !1
+ %arrayidx.i.i.i30 = getelementptr inbounds i8, i8* %4, i64 -24
%_M_length.i.i31 = bitcast i8* %arrayidx.i.i.i30 to i64*
- %5 = load i64* %_M_length.i.i31, align 8, !tbaa !7
+ %5 = load i64, i64* %_M_length.i.i31, align 8, !tbaa !7
%6 = inttoptr i64 %retval.sroa.3.0.copyload.i to void (%"class.std::basic_string"*, %"class.std::error_category"*, i32)***
- %vtable.i = load void (%"class.std::basic_string"*, %"class.std::error_category"*, i32)*** %6, align 8, !tbaa !11
- %vfn.i = getelementptr inbounds void (%"class.std::basic_string"*, %"class.std::error_category"*, i32)** %vtable.i, i64 3
- %7 = load void (%"class.std::basic_string"*, %"class.std::error_category"*, i32)** %vfn.i, align 8
+ %vtable.i = load void (%"class.std::basic_string"*, %"class.std::error_category"*, i32)**, void (%"class.std::basic_string"*, %"class.std::error_category"*, i32)*** %6, align 8, !tbaa !11
+ %vfn.i = getelementptr inbounds void (%"class.std::basic_string"*, %"class.std::error_category"*, i32)*, void (%"class.std::basic_string"*, %"class.std::error_category"*, i32)** %vtable.i, i64 3
+ %7 = load void (%"class.std::basic_string"*, %"class.std::error_category"*, i32)*, void (%"class.std::basic_string"*, %"class.std::error_category"*, i32)** %vfn.i, align 8
call void %7(%"class.std::basic_string"* sret %ref.tmp5, %"class.std::error_category"* %.c, i32 signext %phitmp) #3
- %call2.i.i = call dereferenceable(8) %"class.std::basic_string"* @_ZNSs6insertEmPKcm(%"class.std::basic_string"* %ref.tmp5, i64 0, i8* getelementptr inbounds ([28 x i8]* @.str, i64 0, i64 0), i64 27) #3
- %_M_p2.i.i.i.i = getelementptr inbounds %"class.std::basic_string"* %call2.i.i, i64 0, i32 0, i32 0
- %8 = load i8** %_M_p2.i.i.i.i, align 8, !tbaa !13
- store i8* bitcast (i64* getelementptr inbounds ([0 x i64]* @_ZNSs4_Rep20_S_empty_rep_storageE, i64 0, i64 3) to i8*), i8** %_M_p2.i.i.i.i, align 8, !tbaa !1
- %arrayidx.i.i.i36 = getelementptr inbounds i8* %8, i64 -24
+ %call2.i.i = call dereferenceable(8) %"class.std::basic_string"* @_ZNSs6insertEmPKcm(%"class.std::basic_string"* %ref.tmp5, i64 0, i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str, i64 0, i64 0), i64 27) #3
+ %_M_p2.i.i.i.i = getelementptr inbounds %"class.std::basic_string", %"class.std::basic_string"* %call2.i.i, i64 0, i32 0, i32 0
+ %8 = load i8*, i8** %_M_p2.i.i.i.i, align 8, !tbaa !13
+ store i8* bitcast (i64* getelementptr inbounds ([0 x i64], [0 x i64]* @_ZNSs4_Rep20_S_empty_rep_storageE, i64 0, i64 3) to i8*), i8** %_M_p2.i.i.i.i, align 8, !tbaa !1
+ %arrayidx.i.i.i36 = getelementptr inbounds i8, i8* %8, i64 -24
%_M_length.i.i37 = bitcast i8* %arrayidx.i.i.i36 to i64*
- %9 = load i64* %_M_length.i.i37, align 8, !tbaa !7
- %Filename.i = getelementptr inbounds %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 2
- %10 = getelementptr inbounds %"class.std::allocator"* %ref.tmp.i.i2.i, i64 0, i32 0
+ %9 = load i64, i64* %_M_length.i.i37, align 8, !tbaa !7
+ %Filename.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 2
+ %10 = getelementptr inbounds %"class.std::allocator", %"class.std::allocator"* %ref.tmp.i.i2.i, i64 0, i32 0
%11 = bitcast %"class.llvm::SMDiagnostic"* %ref.tmp to i8*
call void @llvm.memset.p0i8.i64(i8* %11, i8 0, i64 16, i32 8, i1 false) #3
call void @llvm.lifetime.start(i64 1, i8* %10) #3
@@ -228,8 +228,8 @@ if.then: ; preds = %_ZNK4llvm7ErrorOrIS
br i1 %tobool.i.i4.i, label %if.then.i.i6.i, label %if.end.i.i8.i
if.then.i.i6.i: ; preds = %if.then
- %_M_p.i.i.i.i.i.i5.i = getelementptr inbounds %"class.std::basic_string"* %Filename.i, i64 0, i32 0, i32 0
- store i8* bitcast (i64* getelementptr inbounds ([0 x i64]* @_ZNSs4_Rep20_S_empty_rep_storageE, i64 0, i64 3) to i8*), i8** %_M_p.i.i.i.i.i.i5.i, align 8, !tbaa !13
+ %_M_p.i.i.i.i.i.i5.i = getelementptr inbounds %"class.std::basic_string", %"class.std::basic_string"* %Filename.i, i64 0, i32 0, i32 0
+ store i8* bitcast (i64* getelementptr inbounds ([0 x i64], [0 x i64]* @_ZNSs4_Rep20_S_empty_rep_storageE, i64 0, i64 3) to i8*), i8** %_M_p.i.i.i.i.i.i5.i, align 8, !tbaa !13
br label %_ZNK4llvm9StringRefcvSsEv.exit9.i
if.end.i.i8.i: ; preds = %if.then
@@ -238,21 +238,21 @@ if.end.i.i8.i: ; preds = %if.then
_ZNK4llvm9StringRefcvSsEv.exit9.i: ; preds = %if.end.i.i8.i, %if.then.i.i6.i
call void @llvm.lifetime.end(i64 1, i8* %10) #3
- %LineNo.i = getelementptr inbounds %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 3
+ %LineNo.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 3
store i32 -1, i32* %LineNo.i, align 8, !tbaa !14
- %ColumnNo.i = getelementptr inbounds %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 4
+ %ColumnNo.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 4
store i32 -1, i32* %ColumnNo.i, align 4, !tbaa !21
- %Kind.i = getelementptr inbounds %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 5
+ %Kind.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 5
store i32 0, i32* %Kind.i, align 8, !tbaa !22
- %Message.i = getelementptr inbounds %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 6
- %12 = getelementptr inbounds %"class.std::allocator"* %ref.tmp.i.i.i, i64 0, i32 0
+ %Message.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 6
+ %12 = getelementptr inbounds %"class.std::allocator", %"class.std::allocator"* %ref.tmp.i.i.i, i64 0, i32 0
call void @llvm.lifetime.start(i64 1, i8* %12) #3
%tobool.i.i.i = icmp eq i8* %8, null
br i1 %tobool.i.i.i, label %if.then.i.i.i, label %if.end.i.i.i
if.then.i.i.i: ; preds = %_ZNK4llvm9StringRefcvSsEv.exit9.i
- %_M_p.i.i.i.i.i.i.i = getelementptr inbounds %"class.std::basic_string"* %Message.i, i64 0, i32 0, i32 0
- store i8* bitcast (i64* getelementptr inbounds ([0 x i64]* @_ZNSs4_Rep20_S_empty_rep_storageE, i64 0, i64 3) to i8*), i8** %_M_p.i.i.i.i.i.i.i, align 8, !tbaa !13
+ %_M_p.i.i.i.i.i.i.i = getelementptr inbounds %"class.std::basic_string", %"class.std::basic_string"* %Message.i, i64 0, i32 0, i32 0
+ store i8* bitcast (i64* getelementptr inbounds ([0 x i64], [0 x i64]* @_ZNSs4_Rep20_S_empty_rep_storageE, i64 0, i64 3) to i8*), i8** %_M_p.i.i.i.i.i.i.i, align 8, !tbaa !13
br label %_ZN4llvm12SMDiagnosticC2ENS_9StringRefENS_9SourceMgr8DiagKindES1_.exit
if.end.i.i.i: ; preds = %_ZNK4llvm9StringRefcvSsEv.exit9.i
@@ -261,49 +261,49 @@ if.end.i.i.i: ; preds = %_ZNK4llvm9StringRef
_ZN4llvm12SMDiagnosticC2ENS_9StringRefENS_9SourceMgr8DiagKindES1_.exit: ; preds = %if.then.i.i.i, %if.end.i.i.i
call void @llvm.lifetime.end(i64 1, i8* %12) #3
- %_M_p.i.i.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 7, i32 0, i32 0
- store i8* bitcast (i64* getelementptr inbounds ([0 x i64]* @_ZNSs4_Rep20_S_empty_rep_storageE, i64 0, i64 3) to i8*), i8** %_M_p.i.i.i.i.i, align 8, !tbaa !13
- %Ranges.i = getelementptr inbounds %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 8
+ %_M_p.i.i.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 7, i32 0, i32 0
+ store i8* bitcast (i64* getelementptr inbounds ([0 x i64], [0 x i64]* @_ZNSs4_Rep20_S_empty_rep_storageE, i64 0, i64 3) to i8*), i8** %_M_p.i.i.i.i.i, align 8, !tbaa !13
+ %Ranges.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 8
%13 = bitcast %"class.std::vector.79"* %Ranges.i to i8*
call void @llvm.memset.p0i8.i64(i8* %13, i8 0, i64 24, i32 8, i1 false) #3
- %14 = getelementptr inbounds %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 9, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i64 0
- %BeginX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0
+ %14 = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 9, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i64 0
+ %BeginX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0
store i8* %14, i8** %BeginX.i.i.i.i.i.i, align 8, !tbaa !23
- %EndX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 1
+ %EndX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 1
store i8* %14, i8** %EndX.i.i.i.i.i.i, align 8, !tbaa !25
- %CapacityX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 2
- %add.ptr.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 9, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i64 96
+ %CapacityX.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 2
+ %add.ptr.i.i.i.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 9, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i64 96
store i8* %add.ptr.i.i.i.i.i.i, i8** %CapacityX.i.i.i.i.i.i, align 8, !tbaa !26
%15 = bitcast %"class.llvm::SMDiagnostic"* %Err to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %15, i8* %11, i64 16, i32 8, i1 false) #3
- %Filename.i38 = getelementptr inbounds %"class.llvm::SMDiagnostic"* %Err, i64 0, i32 2
+ %Filename.i38 = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %Err, i64 0, i32 2
call void @_ZNSs4swapERSs(%"class.std::basic_string"* %Filename.i38, %"class.std::basic_string"* dereferenceable(8) %Filename.i) #3
- %LineNo.i39 = getelementptr inbounds %"class.llvm::SMDiagnostic"* %Err, i64 0, i32 3
+ %LineNo.i39 = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %Err, i64 0, i32 3
%16 = bitcast i32* %LineNo.i39 to i8*
%17 = bitcast i32* %LineNo.i to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %16, i8* %17, i64 12, i32 4, i1 false) #3
- %Message.i40 = getelementptr inbounds %"class.llvm::SMDiagnostic"* %Err, i64 0, i32 6
+ %Message.i40 = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %Err, i64 0, i32 6
call void @_ZNSs4swapERSs(%"class.std::basic_string"* %Message.i40, %"class.std::basic_string"* dereferenceable(8) %Message.i) #3
- %LineContents.i = getelementptr inbounds %"class.llvm::SMDiagnostic"* %Err, i64 0, i32 7
- %LineContents7.i = getelementptr inbounds %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 7
+ %LineContents.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %Err, i64 0, i32 7
+ %LineContents7.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 7
call void @_ZNSs4swapERSs(%"class.std::basic_string"* %LineContents.i, %"class.std::basic_string"* dereferenceable(8) %LineContents7.i) #3
- %Ranges.i41 = getelementptr inbounds %"class.llvm::SMDiagnostic"* %Err, i64 0, i32 8
- %_M_start.i7.i.i.i = getelementptr inbounds %"class.std::vector.79"* %Ranges.i41, i64 0, i32 0, i32 0, i32 0
- %18 = load %"struct.std::pair"** %_M_start.i7.i.i.i, align 8, !tbaa !27
- %_M_finish.i9.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic"* %Err, i64 0, i32 8, i32 0, i32 0, i32 1
- %_M_end_of_storage.i11.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic"* %Err, i64 0, i32 8, i32 0, i32 0, i32 2
- %_M_start2.i.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 8, i32 0, i32 0, i32 0
+ %Ranges.i41 = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %Err, i64 0, i32 8
+ %_M_start.i7.i.i.i = getelementptr inbounds %"class.std::vector.79", %"class.std::vector.79"* %Ranges.i41, i64 0, i32 0, i32 0, i32 0
+ %18 = load %"struct.std::pair"*, %"struct.std::pair"** %_M_start.i7.i.i.i, align 8, !tbaa !27
+ %_M_finish.i9.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %Err, i64 0, i32 8, i32 0, i32 0, i32 1
+ %_M_end_of_storage.i11.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %Err, i64 0, i32 8, i32 0, i32 0, i32 2
+ %_M_start2.i.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 8, i32 0, i32 0, i32 0
%19 = bitcast %"class.std::vector.79"* %Ranges.i41 to i8*
call void @llvm.memset.p0i8.i64(i8* %19, i8 0, i64 16, i32 8, i1 false) #3
- %20 = load %"struct.std::pair"** %_M_start2.i.i.i.i, align 8, !tbaa !27
+ %20 = load %"struct.std::pair"*, %"struct.std::pair"** %_M_start2.i.i.i.i, align 8, !tbaa !27
store %"struct.std::pair"* %20, %"struct.std::pair"** %_M_start.i7.i.i.i, align 8, !tbaa !27
store %"struct.std::pair"* null, %"struct.std::pair"** %_M_start2.i.i.i.i, align 8, !tbaa !27
- %_M_finish3.i.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 8, i32 0, i32 0, i32 1
- %21 = load %"struct.std::pair"** %_M_finish3.i.i.i.i, align 8, !tbaa !27
+ %_M_finish3.i.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 8, i32 0, i32 0, i32 1
+ %21 = load %"struct.std::pair"*, %"struct.std::pair"** %_M_finish3.i.i.i.i, align 8, !tbaa !27
store %"struct.std::pair"* %21, %"struct.std::pair"** %_M_finish.i9.i.i.i, align 8, !tbaa !27
store %"struct.std::pair"* null, %"struct.std::pair"** %_M_finish3.i.i.i.i, align 8, !tbaa !27
- %_M_end_of_storage4.i.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 8, i32 0, i32 0, i32 2
- %22 = load %"struct.std::pair"** %_M_end_of_storage4.i.i.i.i, align 8, !tbaa !27
+ %_M_end_of_storage4.i.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 8, i32 0, i32 0, i32 2
+ %22 = load %"struct.std::pair"*, %"struct.std::pair"** %_M_end_of_storage4.i.i.i.i, align 8, !tbaa !27
store %"struct.std::pair"* %22, %"struct.std::pair"** %_M_end_of_storage.i11.i.i.i, align 8, !tbaa !27
store %"struct.std::pair"* null, %"struct.std::pair"** %_M_end_of_storage4.i.i.i.i, align 8, !tbaa !27
%tobool.i.i.i.i.i.i = icmp eq %"struct.std::pair"* %18, null
@@ -315,18 +315,18 @@ if.then.i.i.i.i.i.i: ; preds = %_ZN4llvm12SMDiagnos
br label %_ZN4llvm12SMDiagnosticaSEOS0_.exit
_ZN4llvm12SMDiagnosticaSEOS0_.exit: ; preds = %_ZN4llvm12SMDiagnosticC2ENS_9StringRefENS_9SourceMgr8DiagKindES1_.exit, %if.then.i.i.i.i.i.i
- %24 = getelementptr inbounds %"class.llvm::SMDiagnostic"* %Err, i64 0, i32 9, i32 0
- %25 = getelementptr inbounds %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 9, i32 0
+ %24 = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %Err, i64 0, i32 9, i32 0
+ %25 = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 9, i32 0
%call2.i.i42 = call dereferenceable(48) %"class.llvm::SmallVectorImpl.85"* @_ZN4llvm15SmallVectorImplINS_7SMFixItEEaSEOS2_(%"class.llvm::SmallVectorImpl.85"* %24, %"class.llvm::SmallVectorImpl.85"* dereferenceable(48) %25) #3
call void @_ZN4llvm12SMDiagnosticD2Ev(%"class.llvm::SMDiagnostic"* %ref.tmp) #3
- %26 = getelementptr inbounds %"class.std::allocator"* %ref.tmp.i.i, i64 0, i32 0
+ %26 = getelementptr inbounds %"class.std::allocator", %"class.std::allocator"* %ref.tmp.i.i, i64 0, i32 0
call void @llvm.lifetime.start(i64 1, i8* %26) #3
%27 = bitcast i8* %arrayidx.i.i.i36 to %"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Rep"*
%cmp.i.i.i = icmp eq i8* %arrayidx.i.i.i36, bitcast ([0 x i64]* @_ZNSs4_Rep20_S_empty_rep_storageE to i8*)
br i1 %cmp.i.i.i, label %_ZNSsD1Ev.exit, label %if.then.i.i.i45, !prof !28
if.then.i.i.i45: ; preds = %_ZN4llvm12SMDiagnosticaSEOS0_.exit
- %_M_refcount.i.i.i = getelementptr inbounds i8* %8, i64 -8
+ %_M_refcount.i.i.i = getelementptr inbounds i8, i8* %8, i64 -8
%28 = bitcast i8* %_M_refcount.i.i.i to i32*
br i1 icmp ne (i8* bitcast (i32 (i32*, void (i8*)*)* @__pthread_key_create to i8*), i8* null), label %if.then.i.i.i.i, label %if.else.i.i.i.i
@@ -335,12 +335,12 @@ if.then.i.i.i.i: ; preds = %if.then.i.i.i45
call void @llvm.lifetime.start(i64 4, i8* %.atomicdst.i.i.i.i.i.0..atomicdst.i.i.i.i.0..atomicdst.i.i.i.0..atomicdst.i.i.0..atomicdst.i.0..sroa_cast)
%29 = atomicrmw volatile add i32* %28, i32 -1 acq_rel
store i32 %29, i32* %.atomicdst.i.i.i.i.i, align 4
- %.atomicdst.i.i.i.i.i.0..atomicdst.i.i.i.i.0..atomicdst.i.i.i.0..atomicdst.i.i.0..atomicdst.i.0..atomicdst.0..atomicdst.0..i.i.i.i.i = load volatile i32* %.atomicdst.i.i.i.i.i, align 4
+ %.atomicdst.i.i.i.i.i.0..atomicdst.i.i.i.i.0..atomicdst.i.i.i.0..atomicdst.i.i.0..atomicdst.i.0..atomicdst.0..atomicdst.0..i.i.i.i.i = load volatile i32, i32* %.atomicdst.i.i.i.i.i, align 4
call void @llvm.lifetime.end(i64 4, i8* %.atomicdst.i.i.i.i.i.0..atomicdst.i.i.i.i.0..atomicdst.i.i.i.0..atomicdst.i.i.0..atomicdst.i.0..sroa_cast)
br label %_ZN9__gnu_cxxL27__exchange_and_add_dispatchEPii.exit.i.i.i
if.else.i.i.i.i: ; preds = %if.then.i.i.i45
- %30 = load i32* %28, align 4, !tbaa !29
+ %30 = load i32, i32* %28, align 4, !tbaa !29
%add.i.i.i.i.i = add nsw i32 %30, -1
store i32 %add.i.i.i.i.i, i32* %28, align 4, !tbaa !29
br label %_ZN9__gnu_cxxL27__exchange_and_add_dispatchEPii.exit.i.i.i
@@ -356,17 +356,17 @@ if.then4.i.i.i: ; preds = %_ZN9__gnu_cxxL27__e
_ZNSsD1Ev.exit: ; preds = %_ZN4llvm12SMDiagnosticaSEOS0_.exit, %_ZN9__gnu_cxxL27__exchange_and_add_dispatchEPii.exit.i.i.i, %if.then4.i.i.i
call void @llvm.lifetime.end(i64 1, i8* %26) #3
- %31 = getelementptr inbounds %"class.std::allocator"* %ref.tmp.i.i47, i64 0, i32 0
+ %31 = getelementptr inbounds %"class.std::allocator", %"class.std::allocator"* %ref.tmp.i.i47, i64 0, i32 0
call void @llvm.lifetime.start(i64 1, i8* %31) #3
- %_M_p.i.i.i.i48 = getelementptr inbounds %"class.std::basic_string"* %ref.tmp5, i64 0, i32 0, i32 0
- %32 = load i8** %_M_p.i.i.i.i48, align 8, !tbaa !1
- %arrayidx.i.i.i49 = getelementptr inbounds i8* %32, i64 -24
+ %_M_p.i.i.i.i48 = getelementptr inbounds %"class.std::basic_string", %"class.std::basic_string"* %ref.tmp5, i64 0, i32 0, i32 0
+ %32 = load i8*, i8** %_M_p.i.i.i.i48, align 8, !tbaa !1
+ %arrayidx.i.i.i49 = getelementptr inbounds i8, i8* %32, i64 -24
%33 = bitcast i8* %arrayidx.i.i.i49 to %"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Rep"*
%cmp.i.i.i50 = icmp eq i8* %arrayidx.i.i.i49, bitcast ([0 x i64]* @_ZNSs4_Rep20_S_empty_rep_storageE to i8*)
br i1 %cmp.i.i.i50, label %_ZNSsD1Ev.exit62, label %if.then.i.i.i52, !prof !28
if.then.i.i.i52: ; preds = %_ZNSsD1Ev.exit
- %_M_refcount.i.i.i51 = getelementptr inbounds i8* %32, i64 -8
+ %_M_refcount.i.i.i51 = getelementptr inbounds i8, i8* %32, i64 -8
%34 = bitcast i8* %_M_refcount.i.i.i51 to i32*
br i1 icmp ne (i8* bitcast (i32 (i32*, void (i8*)*)* @__pthread_key_create to i8*), i8* null), label %if.then.i.i.i.i55, label %if.else.i.i.i.i57
@@ -375,12 +375,12 @@ if.then.i.i.i.i55: ; preds = %if.then.i.i.i52
call void @llvm.lifetime.start(i64 4, i8* %.atomicdst.i.i.i.i.i46.0..atomicdst.i.i.i.i.0..atomicdst.i.i.i.0..atomicdst.i.i.0..atomicdst.i.0..sroa_cast)
%35 = atomicrmw volatile add i32* %34, i32 -1 acq_rel
store i32 %35, i32* %.atomicdst.i.i.i.i.i46, align 4
- %.atomicdst.i.i.i.i.i46.0..atomicdst.i.i.i.i.0..atomicdst.i.i.i.0..atomicdst.i.i.0..atomicdst.i.0..atomicdst.0..atomicdst.0..i.i.i.i.i54 = load volatile i32* %.atomicdst.i.i.i.i.i46, align 4
+ %.atomicdst.i.i.i.i.i46.0..atomicdst.i.i.i.i.0..atomicdst.i.i.i.0..atomicdst.i.i.0..atomicdst.i.0..atomicdst.0..atomicdst.0..i.i.i.i.i54 = load volatile i32, i32* %.atomicdst.i.i.i.i.i46, align 4
call void @llvm.lifetime.end(i64 4, i8* %.atomicdst.i.i.i.i.i46.0..atomicdst.i.i.i.i.0..atomicdst.i.i.i.0..atomicdst.i.i.0..atomicdst.i.0..sroa_cast)
br label %_ZN9__gnu_cxxL27__exchange_and_add_dispatchEPii.exit.i.i.i60
if.else.i.i.i.i57: ; preds = %if.then.i.i.i52
- %36 = load i32* %34, align 4, !tbaa !29
+ %36 = load i32, i32* %34, align 4, !tbaa !29
%add.i.i.i.i.i56 = add nsw i32 %36, -1
store i32 %add.i.i.i.i.i56, i32* %34, align 4, !tbaa !29
br label %_ZN9__gnu_cxxL27__exchange_and_add_dispatchEPii.exit.i.i.i60
@@ -399,33 +399,33 @@ _ZNSsD1Ev.exit62: ; preds = %_ZNSsD1Ev.exit, %_Z
br label %cleanup
cond.false.i.i: ; preds = %_ZNK4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE8getErrorEv.exit
- call void @__assert_fail(i8* getelementptr inbounds ([54 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([61 x i8]* @.str2, i64 0, i64 0), i32 zeroext 242, i8* getelementptr inbounds ([206 x i8]* @__PRETTY_FUNCTION__._ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE10getStorageEv, i64 0, i64 0)) #7
+ call void @__assert_fail(i8* getelementptr inbounds ([54 x i8], [54 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([61 x i8], [61 x i8]* @.str2, i64 0, i64 0), i32 zeroext 242, i8* getelementptr inbounds ([206 x i8], [206 x i8]* @__PRETTY_FUNCTION__._ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE10getStorageEv, i64 0, i64 0)) #7
unreachable
_ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE3getEv.exit: ; preds = %entry
%_M_head_impl.i.i.i.i.i = bitcast %"class.llvm::ErrorOr"* %FileOrErr to %"class.llvm::MemoryBuffer"**
- %37 = load %"class.llvm::MemoryBuffer"** %_M_head_impl.i.i.i.i.i, align 8, !tbaa !27
+ %37 = load %"class.llvm::MemoryBuffer"*, %"class.llvm::MemoryBuffer"** %_M_head_impl.i.i.i.i.i, align 8, !tbaa !27
%call9 = call %"class.llvm::Module"* @_ZN4llvm7ParseIREPNS_12MemoryBufferERNS_12SMDiagnosticERNS_11LLVMContextE(%"class.llvm::MemoryBuffer"* %37, %"class.llvm::SMDiagnostic"* dereferenceable(200) %Err, %"class.llvm::LLVMContext"* dereferenceable(8) %Context)
br label %cleanup
cleanup: ; preds = %_ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE3getEv.exit, %_ZNSsD1Ev.exit62
%retval.0 = phi %"class.llvm::Module"* [ null, %_ZNSsD1Ev.exit62 ], [ %call9, %_ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE3getEv.exit ]
- %bf.load.i = load i8* %HasError.i24, align 8
+ %bf.load.i = load i8, i8* %HasError.i24, align 8
%38 = and i8 %bf.load.i, 1
%bf.cast.i = icmp eq i8 %38, 0
br i1 %bf.cast.i, label %_ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE10getStorageEv.exit.i, label %_ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEED2Ev.exit
_ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE10getStorageEv.exit.i: ; preds = %cleanup
%_M_head_impl.i.i.i.i.i.i = bitcast %"class.llvm::ErrorOr"* %FileOrErr to %"class.llvm::MemoryBuffer"**
- %39 = load %"class.llvm::MemoryBuffer"** %_M_head_impl.i.i.i.i.i.i, align 8, !tbaa !27
+ %39 = load %"class.llvm::MemoryBuffer"*, %"class.llvm::MemoryBuffer"** %_M_head_impl.i.i.i.i.i.i, align 8, !tbaa !27
%cmp.i.i = icmp eq %"class.llvm::MemoryBuffer"* %39, null
br i1 %cmp.i.i, label %_ZNSt10unique_ptrIN4llvm12MemoryBufferESt14default_deleteIS1_EED2Ev.exit.i, label %_ZNKSt14default_deleteIN4llvm12MemoryBufferEEclEPS1_.exit.i.i
_ZNKSt14default_deleteIN4llvm12MemoryBufferEEclEPS1_.exit.i.i: ; preds = %_ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE10getStorageEv.exit.i
%40 = bitcast %"class.llvm::MemoryBuffer"* %39 to void (%"class.llvm::MemoryBuffer"*)***
- %vtable.i.i.i = load void (%"class.llvm::MemoryBuffer"*)*** %40, align 8, !tbaa !11
- %vfn.i.i.i = getelementptr inbounds void (%"class.llvm::MemoryBuffer"*)** %vtable.i.i.i, i64 1
- %41 = load void (%"class.llvm::MemoryBuffer"*)** %vfn.i.i.i, align 8
+ %vtable.i.i.i = load void (%"class.llvm::MemoryBuffer"*)**, void (%"class.llvm::MemoryBuffer"*)*** %40, align 8, !tbaa !11
+ %vfn.i.i.i = getelementptr inbounds void (%"class.llvm::MemoryBuffer"*)*, void (%"class.llvm::MemoryBuffer"*)** %vtable.i.i.i, i64 1
+ %41 = load void (%"class.llvm::MemoryBuffer"*)*, void (%"class.llvm::MemoryBuffer"*)** %vfn.i.i.i, align 8
call void %41(%"class.llvm::MemoryBuffer"* %39) #3
br label %_ZNSt10unique_ptrIN4llvm12MemoryBufferESt14default_deleteIS1_EED2Ev.exit.i
diff --git a/test/CodeGen/PowerPC/trampoline.ll b/test/CodeGen/PowerPC/trampoline.ll
index 3ea46f50e0c0..e1a26dae7291 100644
--- a/test/CodeGen/PowerPC/trampoline.ll
+++ b/test/CodeGen/PowerPC/trampoline.ll
@@ -29,20 +29,20 @@ module asm "\09.globl .objc_class_name_NSBitmapImageRep"
%struct.objc_super = type opaque
@_NSConcreteStackBlock = external global i8* ; <i8**> [#uses=1]
@"\01L_OBJC_SELECTOR_REFERENCES_1" = internal global %struct.objc_selector* bitcast ([34 x i8]* @"\01L_OBJC_METH_VAR_NAME_1" to %struct.objc_selector*), section "__OBJC,__message_refs,literal_pointers,no_dead_strip" ; <%struct.objc_selector**> [#uses=2]
-@"\01L_OBJC_CLASS_NSBitmapImageRep" = internal global %struct._objc_class { %struct._objc_class* @"\01L_OBJC_METACLASS_NSBitmapImageRep", %struct._objc_class* bitcast ([11 x i8]* @"\01L_OBJC_CLASS_NAME_1" to %struct._objc_class*), i8* getelementptr ([17 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), i32 0, i32 1, i32 0, %struct._objc_ivar_list* null, %struct._objc_method_list* bitcast ({ i8*, i32, [1 x %struct._objc_method] }* @"\01L_OBJC_INSTANCE_METHODS_NSBitmapImageRep" to %struct._objc_method_list*), %struct.objc_cache* null, %struct._objc_protocol** null, i8* null, %struct._objc_class_ext* null }, section "__OBJC,__class,regular,no_dead_strip" ; <%struct._objc_class*> [#uses=3]
+@"\01L_OBJC_CLASS_NSBitmapImageRep" = internal global %struct._objc_class { %struct._objc_class* @"\01L_OBJC_METACLASS_NSBitmapImageRep", %struct._objc_class* bitcast ([11 x i8]* @"\01L_OBJC_CLASS_NAME_1" to %struct._objc_class*), i8* getelementptr ([17 x i8], [17 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), i32 0, i32 1, i32 0, %struct._objc_ivar_list* null, %struct._objc_method_list* bitcast ({ i8*, i32, [1 x %struct._objc_method] }* @"\01L_OBJC_INSTANCE_METHODS_NSBitmapImageRep" to %struct._objc_method_list*), %struct.objc_cache* null, %struct._objc_protocol** null, i8* null, %struct._objc_class_ext* null }, section "__OBJC,__class,regular,no_dead_strip" ; <%struct._objc_class*> [#uses=3]
@"\01L_OBJC_SELECTOR_REFERENCES_0" = internal global %struct.objc_selector* bitcast ([14 x i8]* @"\01L_OBJC_METH_VAR_NAME_0" to %struct.objc_selector*), section "__OBJC,__message_refs,literal_pointers,no_dead_strip" ; <%struct.objc_selector**> [#uses=2]
@"\01L_OBJC_SYMBOLS" = internal global { i32, %struct.objc_selector**, i16, i16, [1 x %struct._objc_class*] } { i32 0, %struct.objc_selector** null, i16 1, i16 0, [1 x %struct._objc_class*] [ %struct._objc_class* @"\01L_OBJC_CLASS_NSBitmapImageRep" ] }, section "__OBJC,__symbols,regular,no_dead_strip" ; <{ i32, %struct.objc_selector**, i16, i16, [1 x %struct._objc_class*] }*> [#uses=2]
@"\01L_OBJC_METH_VAR_NAME_0" = internal global [14 x i8] c"copyWithZone:\00", section "__TEXT,__cstring,cstring_literals", align 4 ; <[14 x i8]*> [#uses=2]
@"\01L_OBJC_METH_VAR_TYPE_0" = internal global [20 x i8] c"@12@0:4^{_NSZone=}8\00", section "__TEXT,__cstring,cstring_literals", align 4 ; <[20 x i8]*> [#uses=1]
-@"\01L_OBJC_INSTANCE_METHODS_NSBitmapImageRep" = internal global { i8*, i32, [1 x %struct._objc_method] } { i8* null, i32 1, [1 x %struct._objc_method] [ %struct._objc_method { %struct.objc_selector* bitcast ([14 x i8]* @"\01L_OBJC_METH_VAR_NAME_0" to %struct.objc_selector*), i8* getelementptr ([20 x i8]* @"\01L_OBJC_METH_VAR_TYPE_0", i32 0, i32 0), i8* bitcast (%struct.objc_object* (%struct.NSBitmapImageRep*, %struct.objc_selector*, %struct.NSZone*)* @"-[NSBitmapImageRep copyWithZone:]" to i8*) } ] }, section "__OBJC,__inst_meth,regular,no_dead_strip" ; <{ i8*, i32, [1 x %struct._objc_method] }*> [#uses=2]
+@"\01L_OBJC_INSTANCE_METHODS_NSBitmapImageRep" = internal global { i8*, i32, [1 x %struct._objc_method] } { i8* null, i32 1, [1 x %struct._objc_method] [ %struct._objc_method { %struct.objc_selector* bitcast ([14 x i8]* @"\01L_OBJC_METH_VAR_NAME_0" to %struct.objc_selector*), i8* getelementptr ([20 x i8], [20 x i8]* @"\01L_OBJC_METH_VAR_TYPE_0", i32 0, i32 0), i8* bitcast (%struct.objc_object* (%struct.NSBitmapImageRep*, %struct.objc_selector*, %struct.NSZone*)* @"-[NSBitmapImageRep copyWithZone:]" to i8*) } ] }, section "__OBJC,__inst_meth,regular,no_dead_strip" ; <{ i8*, i32, [1 x %struct._objc_method] }*> [#uses=2]
@"\01L_OBJC_CLASS_NAME_0" = internal global [17 x i8] c"NSBitmapImageRep\00", section "__TEXT,__cstring,cstring_literals", align 4 ; <[17 x i8]*> [#uses=1]
@"\01L_OBJC_CLASS_NAME_1" = internal global [11 x i8] c"NSImageRep\00", section "__TEXT,__cstring,cstring_literals", align 4 ; <[11 x i8]*> [#uses=2]
-@"\01L_OBJC_METACLASS_NSBitmapImageRep" = internal global %struct._objc_class { %struct._objc_class* bitcast ([11 x i8]* @"\01L_OBJC_CLASS_NAME_1" to %struct._objc_class*), %struct._objc_class* bitcast ([11 x i8]* @"\01L_OBJC_CLASS_NAME_1" to %struct._objc_class*), i8* getelementptr ([17 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), i32 0, i32 2, i32 48, %struct._objc_ivar_list* null, %struct._objc_method_list* null, %struct.objc_cache* null, %struct._objc_protocol** null, i8* null, %struct._objc_class_ext* null }, section "__OBJC,__meta_class,regular,no_dead_strip" ; <%struct._objc_class*> [#uses=2]
+@"\01L_OBJC_METACLASS_NSBitmapImageRep" = internal global %struct._objc_class { %struct._objc_class* bitcast ([11 x i8]* @"\01L_OBJC_CLASS_NAME_1" to %struct._objc_class*), %struct._objc_class* bitcast ([11 x i8]* @"\01L_OBJC_CLASS_NAME_1" to %struct._objc_class*), i8* getelementptr ([17 x i8], [17 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), i32 0, i32 2, i32 48, %struct._objc_ivar_list* null, %struct._objc_method_list* null, %struct.objc_cache* null, %struct._objc_protocol** null, i8* null, %struct._objc_class_ext* null }, section "__OBJC,__meta_class,regular,no_dead_strip" ; <%struct._objc_class*> [#uses=2]
@"\01L_OBJC_METH_VAR_NAME_1" = internal global [34 x i8] c"_performBlockUsingBackingCGImage:\00", section "__TEXT,__cstring,cstring_literals", align 4 ; <[34 x i8]*> [#uses=2]
@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] zeroinitializer, section "__OBJC, __image_info,regular" ; <[2 x i32]*> [#uses=1]
@"\01L_OBJC_CLASS_NAME_2" = internal global [1 x i8] zeroinitializer, section "__TEXT,__cstring,cstring_literals", align 4 ; <[1 x i8]*> [#uses=1]
-@"\01L_OBJC_MODULES" = internal global %struct._objc_module { i32 7, i32 16, i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_2", i32 0, i32 0), %struct._objc_symtab* bitcast ({ i32, %struct.objc_selector**, i16, i16, [1 x %struct._objc_class*] }* @"\01L_OBJC_SYMBOLS" to %struct._objc_symtab*) }, section "__OBJC,__module_info,regular,no_dead_strip" ; <%struct._objc_module*> [#uses=1]
-@llvm.used = appending global [14 x i8*] [ i8* bitcast (%struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_1" to i8*), i8* bitcast (%struct._objc_class* @"\01L_OBJC_CLASS_NSBitmapImageRep" to i8*), i8* bitcast (%struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_0" to i8*), i8* bitcast ({ i32, %struct.objc_selector**, i16, i16, [1 x %struct._objc_class*] }* @"\01L_OBJC_SYMBOLS" to i8*), i8* getelementptr ([14 x i8]* @"\01L_OBJC_METH_VAR_NAME_0", i32 0, i32 0), i8* getelementptr ([20 x i8]* @"\01L_OBJC_METH_VAR_TYPE_0", i32 0, i32 0), i8* bitcast ({ i8*, i32, [1 x %struct._objc_method] }* @"\01L_OBJC_INSTANCE_METHODS_NSBitmapImageRep" to i8*), i8* getelementptr ([17 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), i8* getelementptr ([11 x i8]* @"\01L_OBJC_CLASS_NAME_1", i32 0, i32 0), i8* bitcast (%struct._objc_class* @"\01L_OBJC_METACLASS_NSBitmapImageRep" to i8*), i8* getelementptr ([34 x i8]* @"\01L_OBJC_METH_VAR_NAME_1", i32 0, i32 0), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*), i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_2", i32 0, i32 0), i8* bitcast (%struct._objc_module* @"\01L_OBJC_MODULES" to i8*) ], section "llvm.metadata" ; <[14 x i8*]*> [#uses=0]
+@"\01L_OBJC_MODULES" = internal global %struct._objc_module { i32 7, i32 16, i8* getelementptr ([1 x i8], [1 x i8]* @"\01L_OBJC_CLASS_NAME_2", i32 0, i32 0), %struct._objc_symtab* bitcast ({ i32, %struct.objc_selector**, i16, i16, [1 x %struct._objc_class*] }* @"\01L_OBJC_SYMBOLS" to %struct._objc_symtab*) }, section "__OBJC,__module_info,regular,no_dead_strip" ; <%struct._objc_module*> [#uses=1]
+@llvm.used = appending global [14 x i8*] [ i8* bitcast (%struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_1" to i8*), i8* bitcast (%struct._objc_class* @"\01L_OBJC_CLASS_NSBitmapImageRep" to i8*), i8* bitcast (%struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_0" to i8*), i8* bitcast ({ i32, %struct.objc_selector**, i16, i16, [1 x %struct._objc_class*] }* @"\01L_OBJC_SYMBOLS" to i8*), i8* getelementptr ([14 x i8], [14 x i8]* @"\01L_OBJC_METH_VAR_NAME_0", i32 0, i32 0), i8* getelementptr ([20 x i8], [20 x i8]* @"\01L_OBJC_METH_VAR_TYPE_0", i32 0, i32 0), i8* bitcast ({ i8*, i32, [1 x %struct._objc_method] }* @"\01L_OBJC_INSTANCE_METHODS_NSBitmapImageRep" to i8*), i8* getelementptr ([17 x i8], [17 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), i8* getelementptr ([11 x i8], [11 x i8]* @"\01L_OBJC_CLASS_NAME_1", i32 0, i32 0), i8* bitcast (%struct._objc_class* @"\01L_OBJC_METACLASS_NSBitmapImageRep" to i8*), i8* getelementptr ([34 x i8], [34 x i8]* @"\01L_OBJC_METH_VAR_NAME_1", i32 0, i32 0), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*), i8* getelementptr ([1 x i8], [1 x i8]* @"\01L_OBJC_CLASS_NAME_2", i32 0, i32 0), i8* bitcast (%struct._objc_module* @"\01L_OBJC_MODULES" to i8*) ], section "llvm.metadata" ; <[14 x i8*]*> [#uses=0]
define internal %struct.objc_object* @"-[NSBitmapImageRep copyWithZone:]"(%struct.NSBitmapImageRep* %self, %struct.objc_selector* %_cmd, %struct.NSZone* %zone) nounwind {
entry:
@@ -62,55 +62,55 @@ entry:
store %struct.NSBitmapImageRep* %self, %struct.NSBitmapImageRep** %self_addr
store %struct.objc_selector* %_cmd, %struct.objc_selector** %_cmd_addr
store %struct.NSZone* %zone, %struct.NSZone** %zone_addr
- %3 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 0 ; <%struct.NSBitmapImageRep**> [#uses=1]
- %4 = load %struct.NSBitmapImageRep** %self_addr, align 4 ; <%struct.NSBitmapImageRep*> [#uses=1]
+ %3 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]", %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 0 ; <%struct.NSBitmapImageRep**> [#uses=1]
+ %4 = load %struct.NSBitmapImageRep*, %struct.NSBitmapImageRep** %self_addr, align 4 ; <%struct.NSBitmapImageRep*> [#uses=1]
store %struct.NSBitmapImageRep* %4, %struct.NSBitmapImageRep** %3, align 4
%TRAMP.91 = bitcast %struct.__builtin_trampoline* %TRAMP.9 to i8* ; <i8*> [#uses=1]
%FRAME.72 = bitcast %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7 to i8* ; <i8*> [#uses=1]
call void @llvm.init.trampoline(i8* %TRAMP.91, i8* bitcast (void (%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*, %struct.__block_1*, %struct.CGImage*)* @__helper_1.1632 to i8*), i8* %FRAME.72) ; <i8*> [#uses=1]
%tramp = call i8* @llvm.adjust.trampoline(i8* %TRAMP.91)
store i8* %tramp, i8** %0, align 4
- %5 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 1 ; <void (%struct.__block_1*, %struct.CGImage*)**> [#uses=1]
- %6 = load i8** %0, align 4 ; <i8*> [#uses=1]
+ %5 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]", %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 1 ; <void (%struct.__block_1*, %struct.CGImage*)**> [#uses=1]
+ %6 = load i8*, i8** %0, align 4 ; <i8*> [#uses=1]
%7 = bitcast i8* %6 to void (%struct.__block_1*, %struct.CGImage*)* ; <void (%struct.__block_1*, %struct.CGImage*)*> [#uses=1]
store void (%struct.__block_1*, %struct.CGImage*)* %7, void (%struct.__block_1*, %struct.CGImage*)** %5, align 4
store %struct.NSBitmapImageRep* null, %struct.NSBitmapImageRep** %new, align 4
- %8 = getelementptr %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 0 ; <%struct.__invoke_impl*> [#uses=1]
- %9 = getelementptr %struct.__invoke_impl* %8, i32 0, i32 0 ; <i8**> [#uses=1]
+ %8 = getelementptr %struct.__block_1, %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 0 ; <%struct.__invoke_impl*> [#uses=1]
+ %9 = getelementptr %struct.__invoke_impl, %struct.__invoke_impl* %8, i32 0, i32 0 ; <i8**> [#uses=1]
store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %9, align 4
- %10 = getelementptr %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 0 ; <%struct.__invoke_impl*> [#uses=1]
- %11 = getelementptr %struct.__invoke_impl* %10, i32 0, i32 1 ; <i32*> [#uses=1]
+ %10 = getelementptr %struct.__block_1, %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 0 ; <%struct.__invoke_impl*> [#uses=1]
+ %11 = getelementptr %struct.__invoke_impl, %struct.__invoke_impl* %10, i32 0, i32 1 ; <i32*> [#uses=1]
store i32 67108864, i32* %11, align 4
- %12 = getelementptr %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 0 ; <%struct.__invoke_impl*> [#uses=1]
- %13 = getelementptr %struct.__invoke_impl* %12, i32 0, i32 2 ; <i32*> [#uses=1]
+ %12 = getelementptr %struct.__block_1, %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 0 ; <%struct.__invoke_impl*> [#uses=1]
+ %13 = getelementptr %struct.__invoke_impl, %struct.__invoke_impl* %12, i32 0, i32 2 ; <i32*> [#uses=1]
store i32 24, i32* %13, align 4
- %14 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 1 ; <void (%struct.__block_1*, %struct.CGImage*)**> [#uses=1]
- %15 = load void (%struct.__block_1*, %struct.CGImage*)** %14, align 4 ; <void (%struct.__block_1*, %struct.CGImage*)*> [#uses=1]
+ %14 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]", %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 1 ; <void (%struct.__block_1*, %struct.CGImage*)**> [#uses=1]
+ %15 = load void (%struct.__block_1*, %struct.CGImage*)*, void (%struct.__block_1*, %struct.CGImage*)** %14, align 4 ; <void (%struct.__block_1*, %struct.CGImage*)*> [#uses=1]
store void (%struct.__block_1*, %struct.CGImage*)* %15, void (%struct.__block_1*, %struct.CGImage*)** %1, align 4
- %16 = getelementptr %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 0 ; <%struct.__invoke_impl*> [#uses=1]
- %17 = getelementptr %struct.__invoke_impl* %16, i32 0, i32 3 ; <i8**> [#uses=1]
- %18 = load void (%struct.__block_1*, %struct.CGImage*)** %1, align 4 ; <void (%struct.__block_1*, %struct.CGImage*)*> [#uses=1]
+ %16 = getelementptr %struct.__block_1, %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 0 ; <%struct.__invoke_impl*> [#uses=1]
+ %17 = getelementptr %struct.__invoke_impl, %struct.__invoke_impl* %16, i32 0, i32 3 ; <i8**> [#uses=1]
+ %18 = load void (%struct.__block_1*, %struct.CGImage*)*, void (%struct.__block_1*, %struct.CGImage*)** %1, align 4 ; <void (%struct.__block_1*, %struct.CGImage*)*> [#uses=1]
%19 = bitcast void (%struct.__block_1*, %struct.CGImage*)* %18 to i8* ; <i8*> [#uses=1]
store i8* %19, i8** %17, align 4
- %20 = getelementptr %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 1 ; <%struct.NSZone**> [#uses=1]
- %21 = load %struct.NSZone** %zone_addr, align 4 ; <%struct.NSZone*> [#uses=1]
+ %20 = getelementptr %struct.__block_1, %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 1 ; <%struct.NSZone**> [#uses=1]
+ %21 = load %struct.NSZone*, %struct.NSZone** %zone_addr, align 4 ; <%struct.NSZone*> [#uses=1]
store %struct.NSZone* %21, %struct.NSZone** %20, align 4
- %22 = getelementptr %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 2 ; <%struct.NSBitmapImageRep***> [#uses=1]
+ %22 = getelementptr %struct.__block_1, %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 2 ; <%struct.NSBitmapImageRep***> [#uses=1]
store %struct.NSBitmapImageRep** %new, %struct.NSBitmapImageRep*** %22, align 4
- %23 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 0 ; <%struct.NSBitmapImageRep**> [#uses=1]
- %24 = load %struct.NSBitmapImageRep** %23, align 4 ; <%struct.NSBitmapImageRep*> [#uses=1]
+ %23 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]", %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 0 ; <%struct.NSBitmapImageRep**> [#uses=1]
+ %24 = load %struct.NSBitmapImageRep*, %struct.NSBitmapImageRep** %23, align 4 ; <%struct.NSBitmapImageRep*> [#uses=1]
store %struct.NSBitmapImageRep* %24, %struct.NSBitmapImageRep** %2, align 4
- %25 = load %struct.NSBitmapImageRep** %2, align 4 ; <%struct.NSBitmapImageRep*> [#uses=1]
+ %25 = load %struct.NSBitmapImageRep*, %struct.NSBitmapImageRep** %2, align 4 ; <%struct.NSBitmapImageRep*> [#uses=1]
%26 = bitcast %struct.NSBitmapImageRep* %25 to %struct.objc_object* ; <%struct.objc_object*> [#uses=1]
store %struct.objc_object* %26, %struct.objc_object** %self.1, align 4
- %27 = load %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_1", align 4 ; <%struct.objc_selector*> [#uses=1]
+ %27 = load %struct.objc_selector*, %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_1", align 4 ; <%struct.objc_selector*> [#uses=1]
%__block_holder_tmp_1.03 = bitcast %struct.__block_1* %__block_holder_tmp_1.0 to void (%struct.CGImage*)* ; <void (%struct.CGImage*)*> [#uses=1]
- %28 = load %struct.objc_object** %self.1, align 4 ; <%struct.objc_object*> [#uses=1]
- %29 = call %struct.objc_object* (%struct.objc_object*, %struct.objc_selector*, ...)* inttoptr (i64 4294901504 to %struct.objc_object* (%struct.objc_object*, %struct.objc_selector*, ...)*)(%struct.objc_object* %28, %struct.objc_selector* %27, void (%struct.CGImage*)* %__block_holder_tmp_1.03) nounwind ; <%struct.objc_object*> [#uses=0]
+ %28 = load %struct.objc_object*, %struct.objc_object** %self.1, align 4 ; <%struct.objc_object*> [#uses=1]
+ %29 = call %struct.objc_object* (%struct.objc_object*, %struct.objc_selector*, ...) inttoptr (i64 4294901504 to %struct.objc_object* (%struct.objc_object*, %struct.objc_selector*, ...)*)(%struct.objc_object* %28, %struct.objc_selector* %27, void (%struct.CGImage*)* %__block_holder_tmp_1.03) nounwind ; <%struct.objc_object*> [#uses=0]
br label %return
return: ; preds = %entry
- %retval5 = load %struct.objc_object** %retval ; <%struct.objc_object*> [#uses=1]
+ %retval5 = load %struct.objc_object*, %struct.objc_object** %retval ; <%struct.objc_object*> [#uses=1]
ret %struct.objc_object* %retval5
}
@@ -131,33 +131,33 @@ entry:
store %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %CHAIN.8, %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"** %CHAIN.8_addr
store %struct.__block_1* %_self, %struct.__block_1** %_self_addr
store %struct.CGImage* %cgImage, %struct.CGImage** %cgImage_addr
- %1 = load %struct.__block_1** %_self_addr, align 4 ; <%struct.__block_1*> [#uses=1]
- %2 = getelementptr %struct.__block_1* %1, i32 0, i32 2 ; <%struct.NSBitmapImageRep***> [#uses=1]
- %3 = load %struct.NSBitmapImageRep*** %2, align 4 ; <%struct.NSBitmapImageRep**> [#uses=1]
+ %1 = load %struct.__block_1*, %struct.__block_1** %_self_addr, align 4 ; <%struct.__block_1*> [#uses=1]
+ %2 = getelementptr %struct.__block_1, %struct.__block_1* %1, i32 0, i32 2 ; <%struct.NSBitmapImageRep***> [#uses=1]
+ %3 = load %struct.NSBitmapImageRep**, %struct.NSBitmapImageRep*** %2, align 4 ; <%struct.NSBitmapImageRep**> [#uses=1]
store %struct.NSBitmapImageRep** %3, %struct.NSBitmapImageRep*** %new, align 4
- %4 = load %struct.__block_1** %_self_addr, align 4 ; <%struct.__block_1*> [#uses=1]
- %5 = getelementptr %struct.__block_1* %4, i32 0, i32 1 ; <%struct.NSZone**> [#uses=1]
- %6 = load %struct.NSZone** %5, align 4 ; <%struct.NSZone*> [#uses=1]
+ %4 = load %struct.__block_1*, %struct.__block_1** %_self_addr, align 4 ; <%struct.__block_1*> [#uses=1]
+ %5 = getelementptr %struct.__block_1, %struct.__block_1* %4, i32 0, i32 1 ; <%struct.NSZone**> [#uses=1]
+ %6 = load %struct.NSZone*, %struct.NSZone** %5, align 4 ; <%struct.NSZone*> [#uses=1]
store %struct.NSZone* %6, %struct.NSZone** %zone, align 4
- %7 = load %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"** %CHAIN.8_addr, align 4 ; <%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*> [#uses=1]
- %8 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %7, i32 0, i32 0 ; <%struct.NSBitmapImageRep**> [#uses=1]
- %9 = load %struct.NSBitmapImageRep** %8, align 4 ; <%struct.NSBitmapImageRep*> [#uses=1]
+ %7 = load %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*, %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"** %CHAIN.8_addr, align 4 ; <%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*> [#uses=1]
+ %8 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]", %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %7, i32 0, i32 0 ; <%struct.NSBitmapImageRep**> [#uses=1]
+ %9 = load %struct.NSBitmapImageRep*, %struct.NSBitmapImageRep** %8, align 4 ; <%struct.NSBitmapImageRep*> [#uses=1]
store %struct.NSBitmapImageRep* %9, %struct.NSBitmapImageRep** %0, align 4
- %10 = load %struct.NSBitmapImageRep** %0, align 4 ; <%struct.NSBitmapImageRep*> [#uses=1]
+ %10 = load %struct.NSBitmapImageRep*, %struct.NSBitmapImageRep** %0, align 4 ; <%struct.NSBitmapImageRep*> [#uses=1]
%11 = bitcast %struct.NSBitmapImageRep* %10 to %struct.objc_object* ; <%struct.objc_object*> [#uses=1]
- %12 = getelementptr %struct._objc_super* %objc_super, i32 0, i32 0 ; <%struct.objc_object**> [#uses=1]
+ %12 = getelementptr %struct._objc_super, %struct._objc_super* %objc_super, i32 0, i32 0 ; <%struct.objc_object**> [#uses=1]
store %struct.objc_object* %11, %struct.objc_object** %12, align 4
- %13 = load %struct._objc_class** getelementptr (%struct._objc_class* @"\01L_OBJC_CLASS_NSBitmapImageRep", i32 0, i32 1), align 4 ; <%struct._objc_class*> [#uses=1]
- %14 = getelementptr %struct._objc_super* %objc_super, i32 0, i32 1 ; <%struct._objc_class**> [#uses=1]
+ %13 = load %struct._objc_class*, %struct._objc_class** getelementptr (%struct._objc_class, %struct._objc_class* @"\01L_OBJC_CLASS_NSBitmapImageRep", i32 0, i32 1), align 4 ; <%struct._objc_class*> [#uses=1]
+ %14 = getelementptr %struct._objc_super, %struct._objc_super* %objc_super, i32 0, i32 1 ; <%struct._objc_class**> [#uses=1]
store %struct._objc_class* %13, %struct._objc_class** %14, align 4
%objc_super1 = bitcast %struct._objc_super* %objc_super to %struct.objc_super* ; <%struct.objc_super*> [#uses=1]
store %struct.objc_super* %objc_super1, %struct.objc_super** %objc_super.5, align 4
- %15 = load %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_0", align 4 ; <%struct.objc_selector*> [#uses=1]
- %16 = load %struct.objc_super** %objc_super.5, align 4 ; <%struct.objc_super*> [#uses=1]
- %17 = load %struct.NSZone** %zone, align 4 ; <%struct.NSZone*> [#uses=1]
- %18 = call %struct.objc_object* (%struct.objc_super*, %struct.objc_selector*, ...)* @objc_msgSendSuper(%struct.objc_super* %16, %struct.objc_selector* %15, %struct.NSZone* %17) nounwind ; <%struct.objc_object*> [#uses=1]
+ %15 = load %struct.objc_selector*, %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_0", align 4 ; <%struct.objc_selector*> [#uses=1]
+ %16 = load %struct.objc_super*, %struct.objc_super** %objc_super.5, align 4 ; <%struct.objc_super*> [#uses=1]
+ %17 = load %struct.NSZone*, %struct.NSZone** %zone, align 4 ; <%struct.NSZone*> [#uses=1]
+ %18 = call %struct.objc_object* (%struct.objc_super*, %struct.objc_selector*, ...) @objc_msgSendSuper(%struct.objc_super* %16, %struct.objc_selector* %15, %struct.NSZone* %17) nounwind ; <%struct.objc_object*> [#uses=1]
%19 = bitcast %struct.objc_object* %18 to %struct.NSBitmapImageRep* ; <%struct.NSBitmapImageRep*> [#uses=1]
- %20 = load %struct.NSBitmapImageRep*** %new, align 4 ; <%struct.NSBitmapImageRep**> [#uses=1]
+ %20 = load %struct.NSBitmapImageRep**, %struct.NSBitmapImageRep*** %new, align 4 ; <%struct.NSBitmapImageRep**> [#uses=1]
store %struct.NSBitmapImageRep* %19, %struct.NSBitmapImageRep** %20, align 4
br label %return
diff --git a/test/CodeGen/PowerPC/unal-altivec-wint.ll b/test/CodeGen/PowerPC/unal-altivec-wint.ll
index 7e0963f54b33..b71a98bc83bb 100644
--- a/test/CodeGen/PowerPC/unal-altivec-wint.ll
+++ b/test/CodeGen/PowerPC/unal-altivec-wint.ll
@@ -6,11 +6,11 @@ declare <4 x i32> @llvm.ppc.altivec.lvx(i8*) #1
define <4 x i32> @test1(<4 x i32>* %h) #0 {
entry:
- %h1 = getelementptr <4 x i32>* %h, i64 1
+ %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
%hv = bitcast <4 x i32>* %h1 to i8*
%vl = call <4 x i32> @llvm.ppc.altivec.lvx(i8* %hv)
- %v0 = load <4 x i32>* %h, align 8
+ %v0 = load <4 x i32>, <4 x i32>* %h, align 8
%a = add <4 x i32> %v0, %vl
ret <4 x i32> %a
@@ -27,11 +27,11 @@ declare void @llvm.ppc.altivec.stvx(<4 x i32>, i8*) #0
define <4 x i32> @test2(<4 x i32>* %h, <4 x i32> %d) #0 {
entry:
- %h1 = getelementptr <4 x i32>* %h, i64 1
+ %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
%hv = bitcast <4 x i32>* %h1 to i8*
call void @llvm.ppc.altivec.stvx(<4 x i32> %d, i8* %hv)
- %v0 = load <4 x i32>* %h, align 8
+ %v0 = load <4 x i32>, <4 x i32>* %h, align 8
ret <4 x i32> %v0
diff --git a/test/CodeGen/PowerPC/unal-altivec.ll b/test/CodeGen/PowerPC/unal-altivec.ll
index 7f333a1c508b..02f7ab40f049 100644
--- a/test/CodeGen/PowerPC/unal-altivec.ll
+++ b/test/CodeGen/PowerPC/unal-altivec.ll
@@ -8,20 +8,20 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds float* %b, i64 %index
+ %0 = getelementptr inbounds float, float* %b, i64 %index
%1 = bitcast float* %0 to <4 x float>*
- %wide.load = load <4 x float>* %1, align 4
+ %wide.load = load <4 x float>, <4 x float>* %1, align 4
%.sum11 = or i64 %index, 4
- %2 = getelementptr float* %b, i64 %.sum11
+ %2 = getelementptr float, float* %b, i64 %.sum11
%3 = bitcast float* %2 to <4 x float>*
- %wide.load8 = load <4 x float>* %3, align 4
+ %wide.load8 = load <4 x float>, <4 x float>* %3, align 4
%4 = fadd <4 x float> %wide.load, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
%5 = fadd <4 x float> %wide.load8, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
- %6 = getelementptr inbounds float* %a, i64 %index
+ %6 = getelementptr inbounds float, float* %a, i64 %index
%7 = bitcast float* %6 to <4 x float>*
store <4 x float> %4, <4 x float>* %7, align 4
%.sum12 = or i64 %index, 4
- %8 = getelementptr float* %a, i64 %.sum12
+ %8 = getelementptr float, float* %a, i64 %.sum12
%9 = bitcast float* %8 to <4 x float>*
store <4 x float> %5, <4 x float>* %9, align 4
%index.next = add i64 %index, 8
diff --git a/test/CodeGen/PowerPC/unal-altivec2.ll b/test/CodeGen/PowerPC/unal-altivec2.ll
index 7464675470f9..0d15b977ca18 100644
--- a/test/CodeGen/PowerPC/unal-altivec2.ll
+++ b/test/CodeGen/PowerPC/unal-altivec2.ll
@@ -12,131 +12,131 @@ vector.body: ; preds = %vector.body, %entry
; CHECK: lvsl
; CHECK: blr
%index = phi i64 [ 0, %entry ], [ %index.next.15, %vector.body ]
- %0 = getelementptr inbounds float* %y, i64 %index
+ %0 = getelementptr inbounds float, float* %y, i64 %index
%1 = bitcast float* %0 to <4 x float>*
- %wide.load = load <4 x float>* %1, align 4
+ %wide.load = load <4 x float>, <4 x float>* %1, align 4
%2 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load)
- %3 = getelementptr inbounds float* %x, i64 %index
+ %3 = getelementptr inbounds float, float* %x, i64 %index
%4 = bitcast float* %3 to <4 x float>*
store <4 x float> %2, <4 x float>* %4, align 4
%index.next = add i64 %index, 4
- %5 = getelementptr inbounds float* %y, i64 %index.next
+ %5 = getelementptr inbounds float, float* %y, i64 %index.next
%6 = bitcast float* %5 to <4 x float>*
- %wide.load.1 = load <4 x float>* %6, align 4
+ %wide.load.1 = load <4 x float>, <4 x float>* %6, align 4
%7 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.1)
- %8 = getelementptr inbounds float* %x, i64 %index.next
+ %8 = getelementptr inbounds float, float* %x, i64 %index.next
%9 = bitcast float* %8 to <4 x float>*
store <4 x float> %7, <4 x float>* %9, align 4
%index.next.1 = add i64 %index.next, 4
- %10 = getelementptr inbounds float* %y, i64 %index.next.1
+ %10 = getelementptr inbounds float, float* %y, i64 %index.next.1
%11 = bitcast float* %10 to <4 x float>*
- %wide.load.2 = load <4 x float>* %11, align 4
+ %wide.load.2 = load <4 x float>, <4 x float>* %11, align 4
%12 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.2)
- %13 = getelementptr inbounds float* %x, i64 %index.next.1
+ %13 = getelementptr inbounds float, float* %x, i64 %index.next.1
%14 = bitcast float* %13 to <4 x float>*
store <4 x float> %12, <4 x float>* %14, align 4
%index.next.2 = add i64 %index.next.1, 4
- %15 = getelementptr inbounds float* %y, i64 %index.next.2
+ %15 = getelementptr inbounds float, float* %y, i64 %index.next.2
%16 = bitcast float* %15 to <4 x float>*
- %wide.load.3 = load <4 x float>* %16, align 4
+ %wide.load.3 = load <4 x float>, <4 x float>* %16, align 4
%17 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.3)
- %18 = getelementptr inbounds float* %x, i64 %index.next.2
+ %18 = getelementptr inbounds float, float* %x, i64 %index.next.2
%19 = bitcast float* %18 to <4 x float>*
store <4 x float> %17, <4 x float>* %19, align 4
%index.next.3 = add i64 %index.next.2, 4
- %20 = getelementptr inbounds float* %y, i64 %index.next.3
+ %20 = getelementptr inbounds float, float* %y, i64 %index.next.3
%21 = bitcast float* %20 to <4 x float>*
- %wide.load.4 = load <4 x float>* %21, align 4
+ %wide.load.4 = load <4 x float>, <4 x float>* %21, align 4
%22 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.4)
- %23 = getelementptr inbounds float* %x, i64 %index.next.3
+ %23 = getelementptr inbounds float, float* %x, i64 %index.next.3
%24 = bitcast float* %23 to <4 x float>*
store <4 x float> %22, <4 x float>* %24, align 4
%index.next.4 = add i64 %index.next.3, 4
- %25 = getelementptr inbounds float* %y, i64 %index.next.4
+ %25 = getelementptr inbounds float, float* %y, i64 %index.next.4
%26 = bitcast float* %25 to <4 x float>*
- %wide.load.5 = load <4 x float>* %26, align 4
+ %wide.load.5 = load <4 x float>, <4 x float>* %26, align 4
%27 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.5)
- %28 = getelementptr inbounds float* %x, i64 %index.next.4
+ %28 = getelementptr inbounds float, float* %x, i64 %index.next.4
%29 = bitcast float* %28 to <4 x float>*
store <4 x float> %27, <4 x float>* %29, align 4
%index.next.5 = add i64 %index.next.4, 4
- %30 = getelementptr inbounds float* %y, i64 %index.next.5
+ %30 = getelementptr inbounds float, float* %y, i64 %index.next.5
%31 = bitcast float* %30 to <4 x float>*
- %wide.load.6 = load <4 x float>* %31, align 4
+ %wide.load.6 = load <4 x float>, <4 x float>* %31, align 4
%32 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.6)
- %33 = getelementptr inbounds float* %x, i64 %index.next.5
+ %33 = getelementptr inbounds float, float* %x, i64 %index.next.5
%34 = bitcast float* %33 to <4 x float>*
store <4 x float> %32, <4 x float>* %34, align 4
%index.next.6 = add i64 %index.next.5, 4
- %35 = getelementptr inbounds float* %y, i64 %index.next.6
+ %35 = getelementptr inbounds float, float* %y, i64 %index.next.6
%36 = bitcast float* %35 to <4 x float>*
- %wide.load.7 = load <4 x float>* %36, align 4
+ %wide.load.7 = load <4 x float>, <4 x float>* %36, align 4
%37 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.7)
- %38 = getelementptr inbounds float* %x, i64 %index.next.6
+ %38 = getelementptr inbounds float, float* %x, i64 %index.next.6
%39 = bitcast float* %38 to <4 x float>*
store <4 x float> %37, <4 x float>* %39, align 4
%index.next.7 = add i64 %index.next.6, 4
- %40 = getelementptr inbounds float* %y, i64 %index.next.7
+ %40 = getelementptr inbounds float, float* %y, i64 %index.next.7
%41 = bitcast float* %40 to <4 x float>*
- %wide.load.8 = load <4 x float>* %41, align 4
+ %wide.load.8 = load <4 x float>, <4 x float>* %41, align 4
%42 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.8)
- %43 = getelementptr inbounds float* %x, i64 %index.next.7
+ %43 = getelementptr inbounds float, float* %x, i64 %index.next.7
%44 = bitcast float* %43 to <4 x float>*
store <4 x float> %42, <4 x float>* %44, align 4
%index.next.8 = add i64 %index.next.7, 4
- %45 = getelementptr inbounds float* %y, i64 %index.next.8
+ %45 = getelementptr inbounds float, float* %y, i64 %index.next.8
%46 = bitcast float* %45 to <4 x float>*
- %wide.load.9 = load <4 x float>* %46, align 4
+ %wide.load.9 = load <4 x float>, <4 x float>* %46, align 4
%47 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.9)
- %48 = getelementptr inbounds float* %x, i64 %index.next.8
+ %48 = getelementptr inbounds float, float* %x, i64 %index.next.8
%49 = bitcast float* %48 to <4 x float>*
store <4 x float> %47, <4 x float>* %49, align 4
%index.next.9 = add i64 %index.next.8, 4
- %50 = getelementptr inbounds float* %y, i64 %index.next.9
+ %50 = getelementptr inbounds float, float* %y, i64 %index.next.9
%51 = bitcast float* %50 to <4 x float>*
- %wide.load.10 = load <4 x float>* %51, align 4
+ %wide.load.10 = load <4 x float>, <4 x float>* %51, align 4
%52 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.10)
- %53 = getelementptr inbounds float* %x, i64 %index.next.9
+ %53 = getelementptr inbounds float, float* %x, i64 %index.next.9
%54 = bitcast float* %53 to <4 x float>*
store <4 x float> %52, <4 x float>* %54, align 4
%index.next.10 = add i64 %index.next.9, 4
- %55 = getelementptr inbounds float* %y, i64 %index.next.10
+ %55 = getelementptr inbounds float, float* %y, i64 %index.next.10
%56 = bitcast float* %55 to <4 x float>*
- %wide.load.11 = load <4 x float>* %56, align 4
+ %wide.load.11 = load <4 x float>, <4 x float>* %56, align 4
%57 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.11)
- %58 = getelementptr inbounds float* %x, i64 %index.next.10
+ %58 = getelementptr inbounds float, float* %x, i64 %index.next.10
%59 = bitcast float* %58 to <4 x float>*
store <4 x float> %57, <4 x float>* %59, align 4
%index.next.11 = add i64 %index.next.10, 4
- %60 = getelementptr inbounds float* %y, i64 %index.next.11
+ %60 = getelementptr inbounds float, float* %y, i64 %index.next.11
%61 = bitcast float* %60 to <4 x float>*
- %wide.load.12 = load <4 x float>* %61, align 4
+ %wide.load.12 = load <4 x float>, <4 x float>* %61, align 4
%62 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.12)
- %63 = getelementptr inbounds float* %x, i64 %index.next.11
+ %63 = getelementptr inbounds float, float* %x, i64 %index.next.11
%64 = bitcast float* %63 to <4 x float>*
store <4 x float> %62, <4 x float>* %64, align 4
%index.next.12 = add i64 %index.next.11, 4
- %65 = getelementptr inbounds float* %y, i64 %index.next.12
+ %65 = getelementptr inbounds float, float* %y, i64 %index.next.12
%66 = bitcast float* %65 to <4 x float>*
- %wide.load.13 = load <4 x float>* %66, align 4
+ %wide.load.13 = load <4 x float>, <4 x float>* %66, align 4
%67 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.13)
- %68 = getelementptr inbounds float* %x, i64 %index.next.12
+ %68 = getelementptr inbounds float, float* %x, i64 %index.next.12
%69 = bitcast float* %68 to <4 x float>*
store <4 x float> %67, <4 x float>* %69, align 4
%index.next.13 = add i64 %index.next.12, 4
- %70 = getelementptr inbounds float* %y, i64 %index.next.13
+ %70 = getelementptr inbounds float, float* %y, i64 %index.next.13
%71 = bitcast float* %70 to <4 x float>*
- %wide.load.14 = load <4 x float>* %71, align 4
+ %wide.load.14 = load <4 x float>, <4 x float>* %71, align 4
%72 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.14)
- %73 = getelementptr inbounds float* %x, i64 %index.next.13
+ %73 = getelementptr inbounds float, float* %x, i64 %index.next.13
%74 = bitcast float* %73 to <4 x float>*
store <4 x float> %72, <4 x float>* %74, align 4
%index.next.14 = add i64 %index.next.13, 4
- %75 = getelementptr inbounds float* %y, i64 %index.next.14
+ %75 = getelementptr inbounds float, float* %y, i64 %index.next.14
%76 = bitcast float* %75 to <4 x float>*
- %wide.load.15 = load <4 x float>* %76, align 4
+ %wide.load.15 = load <4 x float>, <4 x float>* %76, align 4
%77 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.15)
- %78 = getelementptr inbounds float* %x, i64 %index.next.14
+ %78 = getelementptr inbounds float, float* %x, i64 %index.next.14
%79 = bitcast float* %78 to <4 x float>*
store <4 x float> %77, <4 x float>* %79, align 4
%index.next.15 = add i64 %index.next.14, 4
@@ -153,7 +153,7 @@ declare <4 x float> @llvm_cos_v4f32(<4 x float>) #1
define <2 x double> @bar(double* %x) {
entry:
%p = bitcast double* %x to <2 x double>*
- %r = load <2 x double>* %p, align 8
+ %r = load <2 x double>, <2 x double>* %p, align 8
; CHECK-LABEL: @bar
; CHECK-NOT: lvsl
diff --git a/test/CodeGen/PowerPC/unaligned.ll b/test/CodeGen/PowerPC/unaligned.ll
index 64c03cdda35e..6b23b18762d3 100644
--- a/test/CodeGen/PowerPC/unaligned.ll
+++ b/test/CodeGen/PowerPC/unaligned.ll
@@ -5,7 +5,7 @@ target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
define void @foo1(i16* %p, i16* %r) nounwind {
entry:
- %v = load i16* %p, align 1
+ %v = load i16, i16* %p, align 1
store i16 %v, i16* %r, align 1
ret void
@@ -20,7 +20,7 @@ entry:
define void @foo2(i32* %p, i32* %r) nounwind {
entry:
- %v = load i32* %p, align 1
+ %v = load i32, i32* %p, align 1
store i32 %v, i32* %r, align 1
ret void
@@ -35,7 +35,7 @@ entry:
define void @foo3(i64* %p, i64* %r) nounwind {
entry:
- %v = load i64* %p, align 1
+ %v = load i64, i64* %p, align 1
store i64 %v, i64* %r, align 1
ret void
@@ -50,7 +50,7 @@ entry:
define void @foo4(float* %p, float* %r) nounwind {
entry:
- %v = load float* %p, align 1
+ %v = load float, float* %p, align 1
store float %v, float* %r, align 1
ret void
@@ -65,7 +65,7 @@ entry:
define void @foo5(double* %p, double* %r) nounwind {
entry:
- %v = load double* %p, align 1
+ %v = load double, double* %p, align 1
store double %v, double* %r, align 1
ret void
@@ -80,7 +80,7 @@ entry:
define void @foo6(<4 x float>* %p, <4 x float>* %r) nounwind {
entry:
- %v = load <4 x float>* %p, align 1
+ %v = load <4 x float>, <4 x float>* %p, align 1
store <4 x float> %v, <4 x float>* %r, align 1
ret void
diff --git a/test/CodeGen/PowerPC/unwind-dw2-g.ll b/test/CodeGen/PowerPC/unwind-dw2-g.ll
index 4ae6ff24a038..8bd158867c79 100644
--- a/test/CodeGen/PowerPC/unwind-dw2-g.ll
+++ b/test/CodeGen/PowerPC/unwind-dw2-g.ll
@@ -21,15 +21,15 @@ attributes #0 = { nounwind }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !11}
-!0 = !{!"0x11\0012\00clang version 3.4\000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/unwind-dw2.c] [DW_LANG_C99]
-!1 = !{!"/tmp/unwind-dw2.c", !"/tmp"}
+!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "/tmp/unwind-dw2.c", directory: "/tmp")
!2 = !{}
!3 = !{!4}
-!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\000\000\001", !1, !5, !6, null, void ()* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [/tmp/unwind-dw2.c]
-!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void ()* @foo, variables: !2)
+!5 = !DIFile(filename: "/tmp/unwind-dw2.c", directory: "/tmp")
+!6 = !DISubroutineType(types: !7)
!7 = !{null}
!8 = !{i32 2, !"Dwarf Version", i32 3}
-!9 = !MDLocation(line: 2, scope: !4)
-!10 = !MDLocation(line: 3, scope: !4)
-!11 = !{i32 1, !"Debug Info Version", i32 2}
+!9 = !DILocation(line: 2, scope: !4)
+!10 = !DILocation(line: 3, scope: !4)
+!11 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/PowerPC/vaddsplat.ll b/test/CodeGen/PowerPC/vaddsplat.ll
index 4236fabea0a6..70a7ea0c5533 100644
--- a/test/CodeGen/PowerPC/vaddsplat.ll
+++ b/test/CodeGen/PowerPC/vaddsplat.ll
@@ -10,7 +10,7 @@ target triple = "powerpc64-unknown-linux-gnu"
%v16i8 = type <16 x i8>
define void @test_v4i32_pos_even(%v4i32* %P, %v4i32* %S) {
- %p = load %v4i32* %P
+ %p = load %v4i32, %v4i32* %P
%r = add %v4i32 %p, < i32 18, i32 18, i32 18, i32 18 >
store %v4i32 %r, %v4i32* %S
ret void
@@ -21,7 +21,7 @@ define void @test_v4i32_pos_even(%v4i32* %P, %v4i32* %S) {
; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG1]]
define void @test_v4i32_neg_even(%v4i32* %P, %v4i32* %S) {
- %p = load %v4i32* %P
+ %p = load %v4i32, %v4i32* %P
%r = add %v4i32 %p, < i32 -28, i32 -28, i32 -28, i32 -28 >
store %v4i32 %r, %v4i32* %S
ret void
@@ -32,7 +32,7 @@ define void @test_v4i32_neg_even(%v4i32* %P, %v4i32* %S) {
; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG1]]
define void @test_v8i16_pos_even(%v8i16* %P, %v8i16* %S) {
- %p = load %v8i16* %P
+ %p = load %v8i16, %v8i16* %P
%r = add %v8i16 %p, < i16 30, i16 30, i16 30, i16 30, i16 30, i16 30, i16 30, i16 30 >
store %v8i16 %r, %v8i16* %S
ret void
@@ -43,7 +43,7 @@ define void @test_v8i16_pos_even(%v8i16* %P, %v8i16* %S) {
; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG1]]
define void @test_v8i16_neg_even(%v8i16* %P, %v8i16* %S) {
- %p = load %v8i16* %P
+ %p = load %v8i16, %v8i16* %P
%r = add %v8i16 %p, < i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32 >
store %v8i16 %r, %v8i16* %S
ret void
@@ -54,7 +54,7 @@ define void @test_v8i16_neg_even(%v8i16* %P, %v8i16* %S) {
; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG1]]
define void @test_v16i8_pos_even(%v16i8* %P, %v16i8* %S) {
- %p = load %v16i8* %P
+ %p = load %v16i8, %v16i8* %P
%r = add %v16i8 %p, < i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16 >
store %v16i8 %r, %v16i8* %S
ret void
@@ -65,7 +65,7 @@ define void @test_v16i8_pos_even(%v16i8* %P, %v16i8* %S) {
; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG1]]
define void @test_v16i8_neg_even(%v16i8* %P, %v16i8* %S) {
- %p = load %v16i8* %P
+ %p = load %v16i8, %v16i8* %P
%r = add %v16i8 %p, < i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18 >
store %v16i8 %r, %v16i8* %S
ret void
@@ -76,7 +76,7 @@ define void @test_v16i8_neg_even(%v16i8* %P, %v16i8* %S) {
; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG1]]
define void @test_v4i32_pos_odd(%v4i32* %P, %v4i32* %S) {
- %p = load %v4i32* %P
+ %p = load %v4i32, %v4i32* %P
%r = add %v4i32 %p, < i32 27, i32 27, i32 27, i32 27 >
store %v4i32 %r, %v4i32* %S
ret void
@@ -88,7 +88,7 @@ define void @test_v4i32_pos_odd(%v4i32* %P, %v4i32* %S) {
; CHECK: vsubuwm {{[0-9]+}}, [[REG1]], [[REG2]]
define void @test_v4i32_neg_odd(%v4i32* %P, %v4i32* %S) {
- %p = load %v4i32* %P
+ %p = load %v4i32, %v4i32* %P
%r = add %v4i32 %p, < i32 -27, i32 -27, i32 -27, i32 -27 >
store %v4i32 %r, %v4i32* %S
ret void
@@ -100,7 +100,7 @@ define void @test_v4i32_neg_odd(%v4i32* %P, %v4i32* %S) {
; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG2]]
define void @test_v8i16_pos_odd(%v8i16* %P, %v8i16* %S) {
- %p = load %v8i16* %P
+ %p = load %v8i16, %v8i16* %P
%r = add %v8i16 %p, < i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31 >
store %v8i16 %r, %v8i16* %S
ret void
@@ -112,7 +112,7 @@ define void @test_v8i16_pos_odd(%v8i16* %P, %v8i16* %S) {
; CHECK: vsubuhm {{[0-9]+}}, [[REG1]], [[REG2]]
define void @test_v8i16_neg_odd(%v8i16* %P, %v8i16* %S) {
- %p = load %v8i16* %P
+ %p = load %v8i16, %v8i16* %P
%r = add %v8i16 %p, < i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31 >
store %v8i16 %r, %v8i16* %S
ret void
@@ -124,7 +124,7 @@ define void @test_v8i16_neg_odd(%v8i16* %P, %v8i16* %S) {
; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG2]]
define void @test_v16i8_pos_odd(%v16i8* %P, %v16i8* %S) {
- %p = load %v16i8* %P
+ %p = load %v16i8, %v16i8* %P
%r = add %v16i8 %p, < i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17 >
store %v16i8 %r, %v16i8* %S
ret void
@@ -136,7 +136,7 @@ define void @test_v16i8_pos_odd(%v16i8* %P, %v16i8* %S) {
; CHECK: vsububm {{[0-9]+}}, [[REG1]], [[REG2]]
define void @test_v16i8_neg_odd(%v16i8* %P, %v16i8* %S) {
- %p = load %v16i8* %P
+ %p = load %v16i8, %v16i8* %P
%r = add %v16i8 %p, < i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17 >
store %v16i8 %r, %v16i8* %S
ret void
diff --git a/test/CodeGen/PowerPC/varargs-struct-float.ll b/test/CodeGen/PowerPC/varargs-struct-float.ll
index 0fd9fc50892e..7bb5a3444cf5 100644
--- a/test/CodeGen/PowerPC/varargs-struct-float.ll
+++ b/test/CodeGen/PowerPC/varargs-struct-float.ll
@@ -8,11 +8,11 @@ target triple = "powerpc64-unknown-linux-gnu"
define void @foo(float inreg %s.coerce) nounwind {
entry:
%s = alloca %struct.Sf1, align 4
- %coerce.dive = getelementptr %struct.Sf1* %s, i32 0, i32 0
+ %coerce.dive = getelementptr %struct.Sf1, %struct.Sf1* %s, i32 0, i32 0
store float %s.coerce, float* %coerce.dive, align 1
- %coerce.dive1 = getelementptr %struct.Sf1* %s, i32 0, i32 0
- %0 = load float* %coerce.dive1, align 1
- call void (i32, ...)* @testvaSf1(i32 1, float inreg %0)
+ %coerce.dive1 = getelementptr %struct.Sf1, %struct.Sf1* %s, i32 0, i32 0
+ %0 = load float, float* %coerce.dive1, align 1
+ call void (i32, ...) @testvaSf1(i32 1, float inreg %0)
ret void
}
diff --git a/test/CodeGen/PowerPC/vcmp-fold.ll b/test/CodeGen/PowerPC/vcmp-fold.ll
index 7a42c27d2b4a..ee167083d4d6 100644
--- a/test/CodeGen/PowerPC/vcmp-fold.ll
+++ b/test/CodeGen/PowerPC/vcmp-fold.ll
@@ -5,11 +5,11 @@
define void @test(<4 x float>* %x, <4 x float>* %y, i32* %P) {
entry:
- %tmp = load <4 x float>* %x ; <<4 x float>> [#uses=1]
- %tmp2 = load <4 x float>* %y ; <<4 x float>> [#uses=1]
+ %tmp = load <4 x float>, <4 x float>* %x ; <<4 x float>> [#uses=1]
+ %tmp2 = load <4 x float>, <4 x float>* %y ; <<4 x float>> [#uses=1]
%tmp.upgrd.1 = call i32 @llvm.ppc.altivec.vcmpbfp.p( i32 1, <4 x float> %tmp, <4 x float> %tmp2 ) ; <i32> [#uses=1]
- %tmp4 = load <4 x float>* %x ; <<4 x float>> [#uses=1]
- %tmp6 = load <4 x float>* %y ; <<4 x float>> [#uses=1]
+ %tmp4 = load <4 x float>, <4 x float>* %x ; <<4 x float>> [#uses=1]
+ %tmp6 = load <4 x float>, <4 x float>* %y ; <<4 x float>> [#uses=1]
%tmp.upgrd.2 = call <4 x i32> @llvm.ppc.altivec.vcmpbfp( <4 x float> %tmp4, <4 x float> %tmp6 ) ; <<4 x i32>> [#uses=1]
%tmp7 = bitcast <4 x i32> %tmp.upgrd.2 to <4 x float> ; <<4 x float>> [#uses=1]
store <4 x float> %tmp7, <4 x float>* %x
diff --git a/test/CodeGen/PowerPC/vec-abi-align.ll b/test/CodeGen/PowerPC/vec-abi-align.ll
index 5075ff2b8c07..48f1adbe5e59 100644
--- a/test/CodeGen/PowerPC/vec-abi-align.ll
+++ b/test/CodeGen/PowerPC/vec-abi-align.ll
@@ -26,52 +26,52 @@ entry:
; Function Attrs: nounwind
define void @test2(i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, %struct.s2* byval nocapture readonly %vs) #0 {
entry:
- %m = getelementptr inbounds %struct.s2* %vs, i64 0, i32 0
- %0 = load i64* %m, align 8
+ %m = getelementptr inbounds %struct.s2, %struct.s2* %vs, i64 0, i32 0
+ %0 = load i64, i64* %m, align 8
store i64 %0, i64* @n, align 8
- %v = getelementptr inbounds %struct.s2* %vs, i64 0, i32 1
- %1 = load <4 x float>* %v, align 16
+ %v = getelementptr inbounds %struct.s2, %struct.s2* %vs, i64 0, i32 1
+ %1 = load <4 x float>, <4 x float>* %v, align 16
store <4 x float> %1, <4 x float>* @ve, align 16
ret void
; CHECK-LABEL: @test2
-; CHECK: ld {{[0-9]+}}, 112(1)
-; CHECK: li [[REG16:[0-9]+]], 16
-; CHECK: addi [[REGB:[0-9]+]], 1, 112
-; CHECK: lvx 2, [[REGB]], [[REG16]]
+; CHECK-DAG: ld {{[0-9]+}}, 112(1)
+; CHECK-DAG: li [[REG16:[0-9]+]], 16
+; CHECK-DAG: addi [[REGB:[0-9]+]], 1, 112
+; CHECK-DAG: lvx 2, [[REGB]], [[REG16]]
; CHECK: blr
; CHECK-VSX-LABEL: @test2
-; CHECK-VSX: ld {{[0-9]+}}, 112(1)
-; CHECK-VSX: li [[REG16:[0-9]+]], 16
-; CHECK-VSX: addi [[REGB:[0-9]+]], 1, 112
-; CHECK-VSX: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
+; CHECK-VSX-DAG: ld {{[0-9]+}}, 112(1)
+; CHECK-VSX-DAG: li [[REG16:[0-9]+]], 16
+; CHECK-VSX-DAG: addi [[REGB:[0-9]+]], 1, 112
+; CHECK-VSX-DAG: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
; CHECK-VSX: blr
}
; Function Attrs: nounwind
define void @test3(i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, %struct.s2* byval nocapture readonly %vs) #0 {
entry:
- %m = getelementptr inbounds %struct.s2* %vs, i64 0, i32 0
- %0 = load i64* %m, align 8
+ %m = getelementptr inbounds %struct.s2, %struct.s2* %vs, i64 0, i32 0
+ %0 = load i64, i64* %m, align 8
store i64 %0, i64* @n, align 8
- %v = getelementptr inbounds %struct.s2* %vs, i64 0, i32 1
- %1 = load <4 x float>* %v, align 16
+ %v = getelementptr inbounds %struct.s2, %struct.s2* %vs, i64 0, i32 1
+ %1 = load <4 x float>, <4 x float>* %v, align 16
store <4 x float> %1, <4 x float>* @ve, align 16
ret void
; CHECK-LABEL: @test3
-; CHECK: ld {{[0-9]+}}, 128(1)
-; CHECK: li [[REG16:[0-9]+]], 16
-; CHECK: addi [[REGB:[0-9]+]], 1, 128
-; CHECK: lvx 2, [[REGB]], [[REG16]]
+; CHECK-DAG: ld {{[0-9]+}}, 128(1)
+; CHECK-DAG: li [[REG16:[0-9]+]], 16
+; CHECK-DAG: addi [[REGB:[0-9]+]], 1, 128
+; CHECK-DAG: lvx 2, [[REGB]], [[REG16]]
; CHECK: blr
; CHECK-VSX-LABEL: @test3
-; CHECK-VSX: ld {{[0-9]+}}, 128(1)
-; CHECK-VSX: li [[REG16:[0-9]+]], 16
-; CHECK-VSX: addi [[REGB:[0-9]+]], 1, 128
-; CHECK-VSX: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
+; CHECK-VSX-DAG: ld {{[0-9]+}}, 128(1)
+; CHECK-VSX-DAG: li [[REG16:[0-9]+]], 16
+; CHECK-VSX-DAG: addi [[REGB:[0-9]+]], 1, 128
+; CHECK-VSX-DAG: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
; CHECK-VSX: blr
}
diff --git a/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll b/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll
new file mode 100644
index 000000000000..6b41141163ad
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll
@@ -0,0 +1,62 @@
+; Check VMX 64-bit integer operations
+;
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+
+define <2 x i64> @test_add(<2 x i64> %x, <2 x i64> %y) nounwind {
+ %result = add <2 x i64> %x, %y
+ ret <2 x i64> %result
+; CHECK: vaddudm 2, 2, 3
+}
+
+define <2 x i64> @increment_by_one(<2 x i64> %x) nounwind {
+ %result = add <2 x i64> %x, <i64 1, i64 1>
+ ret <2 x i64> %result
+; CHECK: vaddudm 2, 2, 3
+}
+
+define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind {
+ %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0
+ %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1
+ %result = add <2 x i64> %x, %tmpvec2
+ ret <2 x i64> %result
+; CHECK: vaddudm 2, 2, 3
+; FIXME: This is currently generating the following instruction sequence
+;
+; std 5, -8(1)
+; std 5, -16(1)
+; addi 3, 1, -16
+; ori 2, 2, 0
+; lxvd2x 35, 0, 3
+; vaddudm 2, 2, 3
+; blr
+;
+; This will almost certainly cause a load-hit-store hazard.
+; Since val is a value parameter, it should not need to be
+; saved onto the stack at all (unless we're using this to set
+; up the vector register). Instead, it would be better to splat
+; the value into a vector register.
+}
+
+define <2 x i64> @test_sub(<2 x i64> %x, <2 x i64> %y) nounwind {
+ %result = sub <2 x i64> %x, %y
+ ret <2 x i64> %result
+; CHECK: vsubudm 2, 2, 3
+}
+
+define <2 x i64> @decrement_by_one(<2 x i64> %x) nounwind {
+ %result = sub <2 x i64> %x, <i64 -1, i64 -1>
+ ret <2 x i64> %result
+; CHECK: vsubudm 2, 2, 3
+}
+
+define <2 x i64> @decrement_by_val(<2 x i64> %x, i64 %val) nounwind {
+ %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0
+ %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1
+ %result = sub <2 x i64> %x, %tmpvec2
+ ret <2 x i64> %result
+; CHECK: vsubudm 2, 2, 3
+}
+
+
+
diff --git a/test/CodeGen/PowerPC/vec_add_sub_quadword.ll b/test/CodeGen/PowerPC/vec_add_sub_quadword.ll
new file mode 100644
index 000000000000..f7ebf479755c
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_add_sub_quadword.ll
@@ -0,0 +1,130 @@
+; Check VMX 128-bit integer operations
+;
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+
+define <1 x i128> @test_add(<1 x i128> %x, <1 x i128> %y) nounwind {
+ %result = add <1 x i128> %x, %y
+ ret <1 x i128> %result
+; CHECK-LABEL: @test_add
+; CHECK: vadduqm 2, 2, 3
+}
+
+define <1 x i128> @increment_by_one(<1 x i128> %x) nounwind {
+ %result = add <1 x i128> %x, <i128 1>
+ ret <1 x i128> %result
+; CHECK-LABEL: @increment_by_one
+; CHECK vadduqm 2, 2, 3
+}
+
+define <1 x i128> @increment_by_val(<1 x i128> %x, i128 %val) nounwind {
+ %tmpvec = insertelement <1 x i128> <i128 0>, i128 %val, i32 0
+ %tmpvec2 = insertelement <1 x i128> %tmpvec, i128 %val, i32 1
+ %result = add <1 x i128> %x, %tmpvec2
+ ret <1 x i128> %result
+; CHECK-LABEL: @increment_by_val
+; CHECK: vadduqm 2, 2, 3
+}
+
+define <1 x i128> @test_sub(<1 x i128> %x, <1 x i128> %y) nounwind {
+ %result = sub <1 x i128> %x, %y
+ ret <1 x i128> %result
+; CHECK-LABEL: @test_sub
+; CHECK: vsubuqm 2, 2, 3
+}
+
+define <1 x i128> @decrement_by_one(<1 x i128> %x) nounwind {
+ %result = sub <1 x i128> %x, <i128 1>
+ ret <1 x i128> %result
+; CHECK-LABEL: @decrement_by_one
+; CHECK vsubuqm 2, 2, 3
+}
+
+define <1 x i128> @decrement_by_val(<1 x i128> %x, i128 %val) nounwind {
+ %tmpvec = insertelement <1 x i128> <i128 0>, i128 %val, i32 0
+ %tmpvec2 = insertelement <1 x i128> %tmpvec, i128 %val, i32 1
+ %result = sub <1 x i128> %x, %tmpvec2
+ ret <1 x i128> %result
+; CHECK-LABEL: @decrement_by_val
+; CHECK vsubuqm 2, 2, 3
+}
+
+declare <1 x i128> @llvm.ppc.altivec.vaddeuqm(<1 x i128> %x,
+ <1 x i128> %y,
+ <1 x i128> %z) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vaddcuq(<1 x i128> %x,
+ <1 x i128> %y) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vaddecuq(<1 x i128> %x,
+ <1 x i128> %y,
+ <1 x i128> %z) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vsubeuqm(<1 x i128> %x,
+ <1 x i128> %y,
+ <1 x i128> %z) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vsubcuq(<1 x i128> %x,
+ <1 x i128> %y) nounwind readnone
+declare <1 x i128> @llvm.ppc.altivec.vsubecuq(<1 x i128> %x,
+ <1 x i128> %y,
+ <1 x i128> %z) nounwind readnone
+
+define <1 x i128> @test_vaddeuqm(<1 x i128> %x,
+ <1 x i128> %y,
+ <1 x i128> %z) nounwind {
+ %tmp = tail call <1 x i128> @llvm.ppc.altivec.vaddeuqm(<1 x i128> %x,
+ <1 x i128> %y,
+ <1 x i128> %z)
+ ret <1 x i128> %tmp
+; CHECK-LABEL: @test_vaddeuqm
+; CHECK: vaddeuqm 2, 2, 3, 4
+}
+
+define <1 x i128> @test_vaddcuq(<1 x i128> %x,
+ <1 x i128> %y) nounwind {
+ %tmp = tail call <1 x i128> @llvm.ppc.altivec.vaddcuq(<1 x i128> %x,
+ <1 x i128> %y)
+ ret <1 x i128> %tmp
+; CHECK-LABEL: @test_vaddcuq
+; CHECK: vaddcuq 2, 2, 3
+}
+
+define <1 x i128> @test_vaddecuq(<1 x i128> %x,
+ <1 x i128> %y,
+ <1 x i128> %z) nounwind {
+ %tmp = tail call <1 x i128> @llvm.ppc.altivec.vaddecuq(<1 x i128> %x,
+ <1 x i128> %y,
+ <1 x i128> %z)
+ ret <1 x i128> %tmp
+; CHECK-LABEL: @test_vaddecuq
+; CHECK: vaddecuq 2, 2, 3, 4
+}
+
+define <1 x i128> @test_vsubeuqm(<1 x i128> %x,
+ <1 x i128> %y,
+ <1 x i128> %z) nounwind {
+ %tmp = tail call <1 x i128> @llvm.ppc.altivec.vsubeuqm(<1 x i128> %x,
+ <1 x i128> %y,
+ <1 x i128> %z)
+ ret <1 x i128> %tmp
+; CHECK-LABEL: test_vsubeuqm
+; CHECK: vsubeuqm 2, 2, 3, 4
+}
+
+define <1 x i128> @test_vsubcuq(<1 x i128> %x,
+ <1 x i128> %y) nounwind {
+ %tmp = tail call <1 x i128> @llvm.ppc.altivec.vsubcuq(<1 x i128> %x,
+ <1 x i128> %y)
+ ret <1 x i128> %tmp
+; CHECK-LABEL: test_vsubcuq
+; CHECK: vsubcuq 2, 2, 3
+}
+
+define <1 x i128> @test_vsubecuq(<1 x i128> %x,
+ <1 x i128> %y,
+ <1 x i128> %z) nounwind {
+ %tmp = tail call <1 x i128> @llvm.ppc.altivec.vsubecuq(<1 x i128> %x,
+ <1 x i128> %y,
+ <1 x i128> %z)
+ ret <1 x i128> %tmp
+; CHECK-LABEL: test_vsubecuq
+; CHECK: vsubecuq 2, 2, 3, 4
+}
+
diff --git a/test/CodeGen/PowerPC/vec_auto_constant.ll b/test/CodeGen/PowerPC/vec_auto_constant.ll
index 973f0890b139..ba8ef531014f 100644
--- a/test/CodeGen/PowerPC/vec_auto_constant.ll
+++ b/test/CodeGen/PowerPC/vec_auto_constant.ll
@@ -25,8 +25,8 @@ entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store <16 x i8> %x, <16 x i8>* %x_addr
store <16 x i8> <i8 22, i8 21, i8 20, i8 3, i8 25, i8 24, i8 23, i8 3, i8 28, i8 27, i8 26, i8 3, i8 31, i8 30, i8 29, i8 3>, <16 x i8>* %temp, align 16
- %0 = load <16 x i8>* %x_addr, align 16 ; <<16 x i8>> [#uses=1]
- %1 = load <16 x i8>* %temp, align 16 ; <<16 x i8>> [#uses=1]
+ %0 = load <16 x i8>, <16 x i8>* %x_addr, align 16 ; <<16 x i8>> [#uses=1]
+ %1 = load <16 x i8>, <16 x i8>* %temp, align 16 ; <<16 x i8>> [#uses=1]
%tmp = add <16 x i8> %0, %1 ; <<16 x i8>> [#uses=1]
store <16 x i8> %tmp, <16 x i8>* @baz, align 16
br label %return
diff --git a/test/CodeGen/PowerPC/vec_br_cmp.ll b/test/CodeGen/PowerPC/vec_br_cmp.ll
index c34d850c0ac7..14c9620143a1 100644
--- a/test/CodeGen/PowerPC/vec_br_cmp.ll
+++ b/test/CodeGen/PowerPC/vec_br_cmp.ll
@@ -5,8 +5,8 @@
; A predicate compare used immediately by a branch should not generate an mfcr.
define void @test(<4 x float>* %A, <4 x float>* %B) {
- %tmp = load <4 x float>* %A ; <<4 x float>> [#uses=1]
- %tmp3 = load <4 x float>* %B ; <<4 x float>> [#uses=1]
+ %tmp = load <4 x float>, <4 x float>* %A ; <<4 x float>> [#uses=1]
+ %tmp3 = load <4 x float>, <4 x float>* %B ; <<4 x float>> [#uses=1]
%tmp.upgrd.1 = tail call i32 @llvm.ppc.altivec.vcmpeqfp.p( i32 1, <4 x float> %tmp, <4 x float> %tmp3 ) ; <i32> [#uses=1]
%tmp.upgrd.2 = icmp eq i32 %tmp.upgrd.1, 0 ; <i1> [#uses=1]
br i1 %tmp.upgrd.2, label %cond_true, label %UnifiedReturnBlock
diff --git a/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll b/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll
index 7e58ec0bdef4..3b8507728aea 100644
--- a/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll
+++ b/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll
@@ -8,7 +8,7 @@ define void @foo() nounwind ssp {
; CHECK: _foo:
; CHECK-NOT: stw
entry:
- %tmp0 = load <16 x i8>* @a, align 16
+ %tmp0 = load <16 x i8>, <16 x i8>* @a, align 16
%tmp180.i = extractelement <16 x i8> %tmp0, i32 0 ; <i8> [#uses=1]
%tmp181.i = insertelement <16 x i8> <i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp180.i, i32 2 ; <<16 x i8>> [#uses=1]
%tmp182.i = extractelement <16 x i8> %tmp0, i32 1 ; <i8> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vec_clz.ll b/test/CodeGen/PowerPC/vec_clz.ll
new file mode 100644
index 000000000000..01cdecdbb762
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_clz.ll
@@ -0,0 +1,40 @@
+; Check the vctlz* instructions that were added in P8
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+
+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>) nounwind readnone
+
+define <16 x i8> @test_v16i8(<16 x i8> %x) nounwind readnone {
+ %vcnt = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %x)
+ ret <16 x i8> %vcnt
+; CHECK: @test_v16i8
+; CHECK: vclzb 2, 2
+; CHECK: blr
+}
+
+define <8 x i16> @test_v8i16(<8 x i16> %x) nounwind readnone {
+ %vcnt = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %x)
+ ret <8 x i16> %vcnt
+; CHECK: @test_v8i16
+; CHECK: vclzh 2, 2
+; CHECK: blr
+}
+
+define <4 x i32> @test_v4i32(<4 x i32> %x) nounwind readnone {
+ %vcnt = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %x)
+ ret <4 x i32> %vcnt
+; CHECK: @test_v4i32
+; CHECK: vclzw 2, 2
+; CHECK: blr
+}
+
+define <2 x i64> @test_v2i64(<2 x i64> %x) nounwind readnone {
+ %vcnt = tail call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %x)
+ ret <2 x i64> %vcnt
+; CHECK: @test_v2i64
+; CHECK: vclzd 2, 2
+; CHECK: blr
+}
diff --git a/test/CodeGen/PowerPC/vec_cmpd.ll b/test/CodeGen/PowerPC/vec_cmpd.ll
new file mode 100644
index 000000000000..4a06ed9ffafd
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_cmpd.ll
@@ -0,0 +1,258 @@
+; Test the doubleword comparison instructions that were added in POWER8
+;
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+
+define <2 x i64> @v2si64_cmp(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+ %cmp = icmp eq <2 x i64> %x, %y
+ %result = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %result
+; CHECK-LABEL: v2si64_cmp:
+; CHECK: vcmpequd 2, 2, 3
+}
+
+define <4 x i64> @v4si64_cmp(<4 x i64> %x, <4 x i64> %y) nounwind readnone {
+ %cmp = icmp eq <4 x i64> %x, %y
+ %result = sext <4 x i1> %cmp to <4 x i64>
+ ret <4 x i64> %result
+; CHECK-LABEL: v4si64_cmp
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <8 x i64> @v8si64_cmp(<8 x i64> %x, <8 x i64> %y) nounwind readnone {
+ %cmp = icmp eq <8 x i64> %x, %y
+ %result = sext <8 x i1> %cmp to <8 x i64>
+ ret <8 x i64> %result
+; CHECK-LABEL: v8si64_cmp
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <16 x i64> @v16si64_cmp(<16 x i64> %x, <16 x i64> %y) nounwind readnone {
+ %cmp = icmp eq <16 x i64> %x, %y
+ %result = sext <16 x i1> %cmp to <16 x i64>
+ ret <16 x i64> %result
+; CHECK-LABEL: v16si64_cmp
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <32 x i64> @v32si64_cmp(<32 x i64> %x, <32 x i64> %y) nounwind readnone {
+ %cmp = icmp eq <32 x i64> %x, %y
+ %result = sext <32 x i1> %cmp to <32 x i64>
+ ret <32 x i64> %result
+; CHECK-LABEL: v32si64_cmp
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Greater than signed
+define <2 x i64> @v2si64_cmp_gt(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+ %cmp = icmp sgt <2 x i64> %x, %y
+ %result = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %result
+; CHECK-LABEL: v2si64_cmp_gt
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <4 x i64> @v4si64_cmp_gt(<4 x i64> %x, <4 x i64> %y) nounwind readnone {
+ %cmp = icmp sgt <4 x i64> %x, %y
+ %result = sext <4 x i1> %cmp to <4 x i64>
+ ret <4 x i64> %result
+; CHECK-LABEL: v4si64_cmp_gt
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <8 x i64> @v8si64_cmp_gt(<8 x i64> %x, <8 x i64> %y) nounwind readnone {
+ %cmp = icmp sgt <8 x i64> %x, %y
+ %result = sext <8 x i1> %cmp to <8 x i64>
+ ret <8 x i64> %result
+; CHECK-LABEL: v8si64_cmp_gt
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <16 x i64> @v16si64_cmp_gt(<16 x i64> %x, <16 x i64> %y) nounwind readnone {
+ %cmp = icmp sgt <16 x i64> %x, %y
+ %result = sext <16 x i1> %cmp to <16 x i64>
+ ret <16 x i64> %result
+; CHECK-LABEL: v16si64_cmp_gt
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <32 x i64> @v32si64_cmp_gt(<32 x i64> %x, <32 x i64> %y) nounwind readnone {
+ %cmp = icmp sgt <32 x i64> %x, %y
+ %result = sext <32 x i1> %cmp to <32 x i64>
+ ret <32 x i64> %result
+; CHECK-LABEL: v32si64_cmp_gt
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Greater than unsigned
+define <2 x i64> @v2ui64_cmp_gt(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+ %cmp = icmp ugt <2 x i64> %x, %y
+ %result = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %result
+; CHECK-LABEL: v2ui64_cmp_gt
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <4 x i64> @v4ui64_cmp_gt(<4 x i64> %x, <4 x i64> %y) nounwind readnone {
+ %cmp = icmp ugt <4 x i64> %x, %y
+ %result = sext <4 x i1> %cmp to <4 x i64>
+ ret <4 x i64> %result
+; CHECK-LABEL: v4ui64_cmp_gt
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <8 x i64> @v8ui64_cmp_gt(<8 x i64> %x, <8 x i64> %y) nounwind readnone {
+ %cmp = icmp ugt <8 x i64> %x, %y
+ %result = sext <8 x i1> %cmp to <8 x i64>
+ ret <8 x i64> %result
+; CHECK-LABEL: v8ui64_cmp_gt
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <16 x i64> @v16ui64_cmp_gt(<16 x i64> %x, <16 x i64> %y) nounwind readnone {
+ %cmp = icmp ugt <16 x i64> %x, %y
+ %result = sext <16 x i1> %cmp to <16 x i64>
+ ret <16 x i64> %result
+; CHECK-LABEL: v16ui64_cmp_gt
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <32 x i64> @v32ui64_cmp_gt(<32 x i64> %x, <32 x i64> %y) nounwind readnone {
+ %cmp = icmp ugt <32 x i64> %x, %y
+ %result = sext <32 x i1> %cmp to <32 x i64>
+ ret <32 x i64> %result
+; CHECK-LABEL: v32ui64_cmp_gt
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+; Check the intrinsics also
+declare <2 x i64> @llvm.ppc.altivec.vcmpequd(<2 x i64>, <2 x i64>) nounwind readnone
+declare i32 @llvm.ppc.altivec.vcmpequd.p(i32, <2 x i64>, <2 x i64>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64>, <2 x i64>) nounwind readnone
+declare i32 @llvm.ppc.altivec.vcmpgtsd.p(i32, <2 x i64>, <2 x i64>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64>, <2 x i64>) nounwind readnone
+declare i32 @llvm.ppc.altivec.vcmpgtud.p(i32, <2 x i64>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @test_vcmpequd(<2 x i64> %x, <2 x i64> %y) {
+ %tmp = tail call <2 x i64> @llvm.ppc.altivec.vcmpequd(<2 x i64> %x, <2 x i64> %y)
+ ret <2 x i64> %tmp
+; CHECK-LABEL: test_vcmpequd:
+; CHECK: vcmpequd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define i32 @test_vcmpequd_p(<2 x i64> %x, <2 x i64> %y) {
+ %tmp = tail call i32 @llvm.ppc.altivec.vcmpequd.p(i32 2, <2 x i64> %x, <2 x i64> %y)
+ ret i32 %tmp
+; CHECK-LABEL: test_vcmpequd_p:
+; CHECK: vcmpequd. {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <2 x i64> @test_vcmpgtsd(<2 x i64> %x, <2 x i64> %y) {
+ %tmp = tail call <2 x i64> @llvm.ppc.altivec.vcmpgtsd(<2 x i64> %x, <2 x i64> %y)
+ ret <2 x i64> %tmp
+; CHECK-LABEL: test_vcmpgtsd
+; CHECK: vcmpgtsd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define i32 @test_vcmpgtsd_p(<2 x i64> %x, <2 x i64> %y) {
+ %tmp = tail call i32 @llvm.ppc.altivec.vcmpgtsd.p(i32 2, <2 x i64> %x, <2 x i64> %y)
+ ret i32 %tmp
+; CHECK-LABEL: test_vcmpgtsd_p
+; CHECK: vcmpgtsd. {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define <2 x i64> @test_vcmpgtud(<2 x i64> %x, <2 x i64> %y) {
+ %tmp = tail call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %x, <2 x i64> %y)
+ ret <2 x i64> %tmp
+; CHECK-LABEL: test_vcmpgtud
+; CHECK: vcmpgtud {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+define i32 @test_vcmpgtud_p(<2 x i64> %x, <2 x i64> %y) {
+ %tmp = tail call i32 @llvm.ppc.altivec.vcmpgtud.p(i32 2, <2 x i64> %x, <2 x i64> %y)
+ ret i32 %tmp
+; CHECK-LABEL: test_vcmpgtud_p
+; CHECK: vcmpgtud. {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+}
+
+
+
+
diff --git a/test/CodeGen/PowerPC/vec_constants.ll b/test/CodeGen/PowerPC/vec_constants.ll
index f16b9f511f53..858b85dce543 100644
--- a/test/CodeGen/PowerPC/vec_constants.ll
+++ b/test/CodeGen/PowerPC/vec_constants.ll
@@ -1,16 +1,14 @@
-; RUN: llc -O0 -mcpu=pwr7 < %s | FileCheck %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
-target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc -O0 -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -O0 -mcpu=pwr7 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind {
- %tmp = load <4 x i32>* %P1 ; <<4 x i32>> [#uses=1]
+ %tmp = load <4 x i32>, <4 x i32>* %P1 ; <<4 x i32>> [#uses=1]
%tmp4 = and <4 x i32> %tmp, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 > ; <<4 x i32>> [#uses=1]
store <4 x i32> %tmp4, <4 x i32>* %P1
- %tmp7 = load <4 x i32>* %P2 ; <<4 x i32>> [#uses=1]
+ %tmp7 = load <4 x i32>, <4 x i32>* %P2 ; <<4 x i32>> [#uses=1]
%tmp9 = and <4 x i32> %tmp7, < i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647 > ; <<4 x i32>> [#uses=1]
store <4 x i32> %tmp9, <4 x i32>* %P2
- %tmp.upgrd.1 = load <4 x float>* %P3 ; <<4 x float>> [#uses=1]
+ %tmp.upgrd.1 = load <4 x float>, <4 x float>* %P3 ; <<4 x float>> [#uses=1]
%tmp11 = bitcast <4 x float> %tmp.upgrd.1 to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp12 = and <4 x i32> %tmp11, < i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647 > ; <<4 x i32>> [#uses=1]
%tmp13 = bitcast <4 x i32> %tmp12 to <4 x float> ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vec_conv.ll b/test/CodeGen/PowerPC/vec_conv.ll
index a39ae9100355..6e19f5a010d2 100644
--- a/test/CodeGen/PowerPC/vec_conv.ll
+++ b/test/CodeGen/PowerPC/vec_conv.ll
@@ -11,7 +11,7 @@ target triple = "powerpc64-unknown-linux-gnu"
define void @v4f32_to_v4i32(<4 x float> %x, <4 x i32>* nocapture %y) nounwind {
entry:
- %0 = load <4 x float>* @cte_float, align 16
+ %0 = load <4 x float>, <4 x float>* @cte_float, align 16
%mul = fmul <4 x float> %0, %x
%1 = fptosi <4 x float> %mul to <4 x i32>
store <4 x i32> %1, <4 x i32>* %y, align 16
@@ -23,7 +23,7 @@ entry:
define void @v4f32_to_v4u32(<4 x float> %x, <4 x i32>* nocapture %y) nounwind {
entry:
- %0 = load <4 x float>* @cte_float, align 16
+ %0 = load <4 x float>, <4 x float>* @cte_float, align 16
%mul = fmul <4 x float> %0, %x
%1 = fptoui <4 x float> %mul to <4 x i32>
store <4 x i32> %1, <4 x i32>* %y, align 16
@@ -35,7 +35,7 @@ entry:
define void @v4i32_to_v4f32(<4 x i32> %x, <4 x float>* nocapture %y) nounwind {
entry:
- %0 = load <4 x i32>* @cte_int, align 16
+ %0 = load <4 x i32>, <4 x i32>* @cte_int, align 16
%mul = mul <4 x i32> %0, %x
%1 = sitofp <4 x i32> %mul to <4 x float>
store <4 x float> %1, <4 x float>* %y, align 16
@@ -47,7 +47,7 @@ entry:
define void @v4u32_to_v4f32(<4 x i32> %x, <4 x float>* nocapture %y) nounwind {
entry:
- %0 = load <4 x i32>* @cte_int, align 16
+ %0 = load <4 x i32>, <4 x i32>* @cte_int, align 16
%mul = mul <4 x i32> %0, %x
%1 = uitofp <4 x i32> %mul to <4 x float>
store <4 x float> %1, <4 x float>* %y, align 16
diff --git a/test/CodeGen/PowerPC/vec_fneg.ll b/test/CodeGen/PowerPC/vec_fneg.ll
index e01e65979f6f..d6f6def64ea2 100644
--- a/test/CodeGen/PowerPC/vec_fneg.ll
+++ b/test/CodeGen/PowerPC/vec_fneg.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubfp
define void @t(<4 x float>* %A) {
- %tmp2 = load <4 x float>* %A
+ %tmp2 = load <4 x float>, <4 x float>* %A
%tmp3 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp2
store <4 x float> %tmp3, <4 x float>* %A
ret void
diff --git a/test/CodeGen/PowerPC/vec_minmax.ll b/test/CodeGen/PowerPC/vec_minmax.ll
new file mode 100644
index 000000000000..e9ba6a01a9b8
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_minmax.ll
@@ -0,0 +1,34 @@
+; Test the vector min/max doubleword instructions added for P8
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+
+declare <2 x i64> @llvm.ppc.altivec.vmaxsd(<2 x i64>, <2 x i64>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vmaxud(<2 x i64>, <2 x i64>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vminsd(<2 x i64>, <2 x i64>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vminud(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @test_vmaxsd(<2 x i64> %x, <2 x i64> %y) {
+ %tmp = tail call <2 x i64> @llvm.ppc.altivec.vmaxsd(<2 x i64> %x, <2 x i64> %y)
+ ret <2 x i64> %tmp
+; CHECK: vmaxsd 2, 2, 3
+}
+
+define <2 x i64> @test_vmaxud(<2 x i64> %x, <2 x i64> %y) {
+ %tmp = tail call <2 x i64> @llvm.ppc.altivec.vmaxud(<2 x i64> %x, <2 x i64> %y)
+ ret <2 x i64> %tmp
+; CHECK: vmaxud 2, 2, 3
+}
+
+define <2 x i64> @test_vminsd(<2 x i64> %x, <2 x i64> %y) {
+ %tmp = tail call <2 x i64> @llvm.ppc.altivec.vminsd(<2 x i64> %x, <2 x i64> %y)
+ ret <2 x i64> %tmp
+; CHECK: vminsd 2, 2, 3
+}
+
+define <2 x i64> @test_vminud(<2 x i64> %x, <2 x i64> %y) {
+ %tmp = tail call <2 x i64> @llvm.ppc.altivec.vminud(<2 x i64> %x, <2 x i64> %y)
+ ret <2 x i64> %tmp
+; CHECK: vminud 2, 2, 3
+}
+
+
diff --git a/test/CodeGen/PowerPC/vec_misaligned.ll b/test/CodeGen/PowerPC/vec_misaligned.ll
index 49f11e4e2604..ac639d719911 100644
--- a/test/CodeGen/PowerPC/vec_misaligned.ll
+++ b/test/CodeGen/PowerPC/vec_misaligned.ll
@@ -19,18 +19,18 @@ entry:
store i32 %x, i32* %x_addr
%ap1 = bitcast i8** %ap to i8* ; <i8*> [#uses=1]
call void @llvm.va_start( i8* %ap1 )
- %tmp = load i8** %ap, align 4 ; <i8*> [#uses=1]
+ %tmp = load i8*, i8** %ap, align 4 ; <i8*> [#uses=1]
store i8* %tmp, i8** %ap.0, align 4
- %tmp2 = load i8** %ap.0, align 4 ; <i8*> [#uses=1]
- %tmp3 = getelementptr i8* %tmp2, i64 16 ; <i8*> [#uses=1]
+ %tmp2 = load i8*, i8** %ap.0, align 4 ; <i8*> [#uses=1]
+ %tmp3 = getelementptr i8, i8* %tmp2, i64 16 ; <i8*> [#uses=1]
store i8* %tmp3, i8** %ap, align 4
- %tmp4 = load i8** %ap.0, align 4 ; <i8*> [#uses=1]
+ %tmp4 = load i8*, i8** %ap.0, align 4 ; <i8*> [#uses=1]
%tmp45 = bitcast i8* %tmp4 to %struct.S2203* ; <%struct.S2203*> [#uses=1]
- %tmp6 = getelementptr %struct.S2203* @s, i32 0, i32 0 ; <%struct.u16qi*> [#uses=1]
- %tmp7 = getelementptr %struct.S2203* %tmp45, i32 0, i32 0 ; <%struct.u16qi*> [#uses=1]
- %tmp8 = getelementptr %struct.u16qi* %tmp6, i32 0, i32 0 ; <<16 x i8>*> [#uses=1]
- %tmp9 = getelementptr %struct.u16qi* %tmp7, i32 0, i32 0 ; <<16 x i8>*> [#uses=1]
- %tmp10 = load <16 x i8>* %tmp9, align 4 ; <<16 x i8>> [#uses=1]
+ %tmp6 = getelementptr %struct.S2203, %struct.S2203* @s, i32 0, i32 0 ; <%struct.u16qi*> [#uses=1]
+ %tmp7 = getelementptr %struct.S2203, %struct.S2203* %tmp45, i32 0, i32 0 ; <%struct.u16qi*> [#uses=1]
+ %tmp8 = getelementptr %struct.u16qi, %struct.u16qi* %tmp6, i32 0, i32 0 ; <<16 x i8>*> [#uses=1]
+ %tmp9 = getelementptr %struct.u16qi, %struct.u16qi* %tmp7, i32 0, i32 0 ; <<16 x i8>*> [#uses=1]
+ %tmp10 = load <16 x i8>, <16 x i8>* %tmp9, align 4 ; <<16 x i8>> [#uses=1]
; CHECK: lvsl
; CHECK: vperm
; CHECK-LE: lvsr
diff --git a/test/CodeGen/PowerPC/vec_mul.ll b/test/CodeGen/PowerPC/vec_mul.ll
index 86596d4b0a87..e1c9217ff135 100644
--- a/test/CodeGen/PowerPC/vec_mul.ll
+++ b/test/CodeGen/PowerPC/vec_mul.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -march=ppc32 -mattr=+altivec -mattr=-vsx | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -march=ppc32 -mattr=+altivec -mattr=-vsx -mattr=-power8-altivec | FileCheck %s
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -march=ppc64 -mattr=+altivec -mattr=-vsx -mcpu=pwr7 | FileCheck %s
-; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -march=ppc64 -mattr=+altivec -mattr=-vsx -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-LE
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -march=ppc64 -mattr=+altivec -mattr=-vsx -mcpu=pwr8 -mattr=-power8-altivec | FileCheck %s -check-prefix=CHECK-LE
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -march=ppc64 -mattr=+altivec -mattr=+vsx -mcpu=pwr7 | FileCheck %s -check-prefix=CHECK-VSX
-; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -march=ppc64 -mattr=+altivec -mattr=+vsx -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-LE-VSX
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -march=ppc64 -mattr=+altivec -mattr=+vsx -mcpu=pwr8 -mattr=-power8-altivec | FileCheck %s -check-prefix=CHECK-LE-VSX
define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
- %tmp = load <4 x i32>* %X ; <<4 x i32>> [#uses=1]
- %tmp2 = load <4 x i32>* %Y ; <<4 x i32>> [#uses=1]
+ %tmp = load <4 x i32>, <4 x i32>* %X ; <<4 x i32>> [#uses=1]
+ %tmp2 = load <4 x i32>, <4 x i32>* %Y ; <<4 x i32>> [#uses=1]
%tmp3 = mul <4 x i32> %tmp, %tmp2 ; <<4 x i32>> [#uses=1]
ret <4 x i32> %tmp3
}
@@ -24,8 +24,8 @@ define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
; CHECK-LE-VSX-NOT: mullw
define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {
- %tmp = load <8 x i16>* %X ; <<8 x i16>> [#uses=1]
- %tmp2 = load <8 x i16>* %Y ; <<8 x i16>> [#uses=1]
+ %tmp = load <8 x i16>, <8 x i16>* %X ; <<8 x i16>> [#uses=1]
+ %tmp2 = load <8 x i16>, <8 x i16>* %Y ; <<8 x i16>> [#uses=1]
%tmp3 = mul <8 x i16> %tmp, %tmp2 ; <<8 x i16>> [#uses=1]
ret <8 x i16> %tmp3
}
@@ -43,8 +43,8 @@ define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {
; CHECK-LE-VSX-NOT: mullw
define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {
- %tmp = load <16 x i8>* %X ; <<16 x i8>> [#uses=1]
- %tmp2 = load <16 x i8>* %Y ; <<16 x i8>> [#uses=1]
+ %tmp = load <16 x i8>, <16 x i8>* %X ; <<16 x i8>> [#uses=1]
+ %tmp2 = load <16 x i8>, <16 x i8>* %Y ; <<16 x i8>> [#uses=1]
%tmp3 = mul <16 x i8> %tmp, %tmp2 ; <<16 x i8>> [#uses=1]
ret <16 x i8> %tmp3
}
@@ -68,8 +68,8 @@ define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {
; CHECK-LE-VSX-NOT: mullw
define <4 x float> @test_float(<4 x float>* %X, <4 x float>* %Y) {
- %tmp = load <4 x float>* %X
- %tmp2 = load <4 x float>* %Y
+ %tmp = load <4 x float>, <4 x float>* %X
+ %tmp2 = load <4 x float>, <4 x float>* %Y
%tmp3 = fmul <4 x float> %tmp, %tmp2
ret <4 x float> %tmp3
}
diff --git a/test/CodeGen/PowerPC/vec_mul_even_odd.ll b/test/CodeGen/PowerPC/vec_mul_even_odd.ll
new file mode 100644
index 000000000000..b24bafd196dd
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_mul_even_odd.ll
@@ -0,0 +1,42 @@
+; Check the vector multiply even/odd word instructions that were added in P8
+;
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+
+declare <2 x i64> @llvm.ppc.altivec.vmuleuw(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vmulesw(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vmulouw(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vmulosw(<4 x i32>, <4 x i32>) nounwind readnone
+declare <4 x i32> @llvm.ppc.altivec.vmuluwm(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_vmuleuw(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+ %tmp = tail call <2 x i64> @llvm.ppc.altivec.vmuleuw(<4 x i32> %x, <4 x i32> %y)
+ ret <2 x i64> %tmp
+; CHECK: vmuleuw 2, 2, 3
+}
+
+define <2 x i64> @test_vmulesw(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+ %tmp = tail call <2 x i64> @llvm.ppc.altivec.vmulesw(<4 x i32> %x, <4 x i32> %y)
+ ret <2 x i64> %tmp
+; CHECK: vmulesw 2, 2, 3
+}
+
+define <2 x i64> @test_vmulouw(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+ %tmp = tail call <2 x i64> @llvm.ppc.altivec.vmulouw(<4 x i32> %x, <4 x i32> %y)
+ ret <2 x i64> %tmp
+; CHECK: vmulouw 2, 2, 3
+}
+
+define <2 x i64> @test_vmulosw(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+ %tmp = tail call <2 x i64> @llvm.ppc.altivec.vmulosw(<4 x i32> %x, <4 x i32> %y)
+ ret <2 x i64> %tmp
+; CHECK: vmulosw 2, 2, 3
+}
+
+define <4 x i32> @test_vmuluwm(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
+ %tmp = mul <4 x i32> %x, %y
+ ret <4 x i32> %tmp
+; CHECK-LABEL: test_vmuluwm
+; CHECK: vmuluwm 2, 2, 3
+}
+
diff --git a/test/CodeGen/PowerPC/vec_perf_shuffle.ll b/test/CodeGen/PowerPC/vec_perf_shuffle.ll
index 2c3594d224fe..f8b37fae7c23 100644
--- a/test/CodeGen/PowerPC/vec_perf_shuffle.ll
+++ b/test/CodeGen/PowerPC/vec_perf_shuffle.ll
@@ -1,36 +1,36 @@
; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep vperm
define <4 x float> @test_uu72(<4 x float>* %P1, <4 x float>* %P2) {
- %V1 = load <4 x float>* %P1 ; <<4 x float>> [#uses=1]
- %V2 = load <4 x float>* %P2 ; <<4 x float>> [#uses=1]
+ %V1 = load <4 x float>, <4 x float>* %P1 ; <<4 x float>> [#uses=1]
+ %V2 = load <4 x float>, <4 x float>* %P2 ; <<4 x float>> [#uses=1]
%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 undef, i32 undef, i32 7, i32 2 > ; <<4 x float>> [#uses=1]
ret <4 x float> %V3
}
define <4 x float> @test_30u5(<4 x float>* %P1, <4 x float>* %P2) {
- %V1 = load <4 x float>* %P1 ; <<4 x float>> [#uses=1]
- %V2 = load <4 x float>* %P2 ; <<4 x float>> [#uses=1]
+ %V1 = load <4 x float>, <4 x float>* %P1 ; <<4 x float>> [#uses=1]
+ %V2 = load <4 x float>, <4 x float>* %P2 ; <<4 x float>> [#uses=1]
%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 3, i32 0, i32 undef, i32 5 > ; <<4 x float>> [#uses=1]
ret <4 x float> %V3
}
define <4 x float> @test_3u73(<4 x float>* %P1, <4 x float>* %P2) {
- %V1 = load <4 x float>* %P1 ; <<4 x float>> [#uses=1]
- %V2 = load <4 x float>* %P2 ; <<4 x float>> [#uses=1]
+ %V1 = load <4 x float>, <4 x float>* %P1 ; <<4 x float>> [#uses=1]
+ %V2 = load <4 x float>, <4 x float>* %P2 ; <<4 x float>> [#uses=1]
%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 3, i32 undef, i32 7, i32 3 > ; <<4 x float>> [#uses=1]
ret <4 x float> %V3
}
define <4 x float> @test_3774(<4 x float>* %P1, <4 x float>* %P2) {
- %V1 = load <4 x float>* %P1 ; <<4 x float>> [#uses=1]
- %V2 = load <4 x float>* %P2 ; <<4 x float>> [#uses=1]
+ %V1 = load <4 x float>, <4 x float>* %P1 ; <<4 x float>> [#uses=1]
+ %V2 = load <4 x float>, <4 x float>* %P2 ; <<4 x float>> [#uses=1]
%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 3, i32 7, i32 7, i32 4 > ; <<4 x float>> [#uses=1]
ret <4 x float> %V3
}
define <4 x float> @test_4450(<4 x float>* %P1, <4 x float>* %P2) {
- %V1 = load <4 x float>* %P1 ; <<4 x float>> [#uses=1]
- %V2 = load <4 x float>* %P2 ; <<4 x float>> [#uses=1]
+ %V1 = load <4 x float>, <4 x float>* %P1 ; <<4 x float>> [#uses=1]
+ %V2 = load <4 x float>, <4 x float>* %P2 ; <<4 x float>> [#uses=1]
%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 4, i32 4, i32 5, i32 0 > ; <<4 x float>> [#uses=1]
ret <4 x float> %V3
}
diff --git a/test/CodeGen/PowerPC/vec_popcnt.ll b/test/CodeGen/PowerPC/vec_popcnt.ll
new file mode 100644
index 000000000000..0ce9dfac1df6
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_popcnt.ll
@@ -0,0 +1,72 @@
+; Check the vecpopcnt* instructions that were added in P8
+; In addition, check the conversions to/from the v2i64 VMX register that was also added in P8.
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+
+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone
+
+define <16 x i8> @test_v16i8_v2i64(<2 x i64> %x) nounwind readnone {
+ %tmp = bitcast <2 x i64> %x to <16 x i8>;
+ %vcnt = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp)
+ ret <16 x i8> %vcnt
+; CHECK: @test_v16i8_v2i64
+; CHECK: vpopcntb 2, 2
+; CHECK: blr
+}
+
+define <8 x i16> @test_v8i16_v2i64(<2 x i64> %x) nounwind readnone {
+ %tmp = bitcast <2 x i64> %x to <8 x i16>
+ %vcnt = tail call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %tmp)
+ ret <8 x i16> %vcnt
+; CHECK: @test_v8i16_v2i64
+; CHECK: vpopcnth 2, 2
+; CHECK: blr
+}
+
+define <4 x i32> @test_v4i32_v2i64(<2 x i64> %x) nounwind readnone {
+ %tmp = bitcast <2 x i64> %x to <4 x i32>
+ %vcnt = tail call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %tmp)
+ ret <4 x i32> %vcnt
+; CHECK: @test_v4i32_v2i64
+; CHECK: vpopcntw 2, 2
+; CHECK: blr
+}
+
+define <2 x i64> @test_v2i64_v2i64(<2 x i64> %x) nounwind readnone {
+ %vcnt = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %x)
+ ret <2 x i64> %vcnt
+; CHECK: @test_v2i64_v2i64
+; CHECK: vpopcntd 2, 2
+; CHECK: blr
+}
+
+define <2 x i64> @test_v2i64_v4i32(<4 x i32> %x) nounwind readnone {
+ %tmp = bitcast <4 x i32> %x to <2 x i64>
+ %vcnt = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %tmp)
+ ret <2 x i64> %vcnt
+; CHECK: @test_v2i64_v4i32
+; CHECK: vpopcntd 2, 2
+; CHECK: blr
+}
+
+
+define <2 x i64> @test_v2i64_v8i16(<8 x i16> %x) nounwind readnone {
+ %tmp = bitcast <8 x i16> %x to <2 x i64>
+ %vcnt = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %tmp)
+ ret <2 x i64> %vcnt
+; CHECK: @test_v2i64_v8i16
+; CHECK: vpopcntd 2, 2
+; CHECK: blr
+}
+
+define <2 x i64> @test_v2i64_v16i8(<16 x i8> %x) nounwind readnone {
+ %tmp = bitcast <16 x i8> %x to <2 x i64>
+ %vcnt = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %tmp)
+ ret <2 x i64> %vcnt
+; CHECK: @test_v2i64_v16i8
+; CHECK: vpopcntd 2, 2
+; CHECK: blr
+}
diff --git a/test/CodeGen/PowerPC/vec_rotate_shift.ll b/test/CodeGen/PowerPC/vec_rotate_shift.ll
new file mode 100644
index 000000000000..1a2e9578e039
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_rotate_shift.ll
@@ -0,0 +1,36 @@
+; Test the vector rotate and shift doubleword instructions that were added in P8
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+
+declare <2 x i64> @llvm.ppc.altivec.vrld(<2 x i64>, <2 x i64>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vsld(<2 x i64>, <2 x i64>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vsrd(<2 x i64>, <2 x i64>) nounwind readnone
+declare <2 x i64> @llvm.ppc.altivec.vsrad(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @test_vrld(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+ %tmp = tail call <2 x i64> @llvm.ppc.altivec.vrld(<2 x i64> %x, <2 x i64> %y)
+ ret <2 x i64> %tmp
+; CHECK: vrld 2, 2, 3
+}
+
+define <2 x i64> @test_vsld(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+ %tmp = shl <2 x i64> %x, %y
+ ret <2 x i64> %tmp
+; CHECK-LABEL: @test_vsld
+; CHECK: vsld 2, 2, 3
+}
+
+define <2 x i64> @test_vsrd(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+ %tmp = lshr <2 x i64> %x, %y
+ ret <2 x i64> %tmp
+; CHECK-LABEL: @test_vsrd
+; CHECK: vsrd 2, 2, 3
+}
+
+define <2 x i64> @test_vsrad(<2 x i64> %x, <2 x i64> %y) nounwind readnone {
+ %tmp = ashr <2 x i64> %x, %y
+ ret <2 x i64> %tmp
+; CHECK-LABER: @test_vsrad
+; CHECK: vsrad 2, 2, 3
+}
+
diff --git a/test/CodeGen/PowerPC/vec_shuffle.ll b/test/CodeGen/PowerPC/vec_shuffle.ll
index 82706321c1c1..a942dd1c41c9 100644
--- a/test/CodeGen/PowerPC/vec_shuffle.ll
+++ b/test/CodeGen/PowerPC/vec_shuffle.ll
@@ -9,8 +9,8 @@
define void @VSLDOI_xy(<8 x i16>* %A, <8 x i16>* %B) {
entry:
- %tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=1]
- %tmp2 = load <8 x i16>* %B ; <<8 x i16>> [#uses=1]
+ %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=1]
+ %tmp2 = load <8 x i16>, <8 x i16>* %B ; <<8 x i16>> [#uses=1]
%tmp.upgrd.1 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=11]
%tmp2.upgrd.2 = bitcast <8 x i16> %tmp2 to <16 x i8> ; <<16 x i8>> [#uses=5]
%tmp.upgrd.3 = extractelement <16 x i8> %tmp.upgrd.1, i32 5 ; <i8> [#uses=1]
@@ -51,8 +51,8 @@ entry:
}
define void @VSLDOI_xx(<8 x i16>* %A, <8 x i16>* %B) {
- %tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=1]
- %tmp2 = load <8 x i16>* %A ; <<8 x i16>> [#uses=1]
+ %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=1]
+ %tmp2 = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=1]
%tmp.upgrd.5 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=11]
%tmp2.upgrd.6 = bitcast <8 x i16> %tmp2 to <16 x i8> ; <<16 x i8>> [#uses=5]
%tmp.upgrd.7 = extractelement <16 x i8> %tmp.upgrd.5, i32 5 ; <i8> [#uses=1]
@@ -94,9 +94,9 @@ define void @VSLDOI_xx(<8 x i16>* %A, <8 x i16>* %B) {
define void @VPERM_promote(<8 x i16>* %A, <8 x i16>* %B) {
entry:
- %tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=1]
+ %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=1]
%tmp.upgrd.9 = bitcast <8 x i16> %tmp to <4 x i32> ; <<4 x i32>> [#uses=1]
- %tmp2 = load <8 x i16>* %B ; <<8 x i16>> [#uses=1]
+ %tmp2 = load <8 x i16>, <8 x i16>* %B ; <<8 x i16>> [#uses=1]
%tmp2.upgrd.10 = bitcast <8 x i16> %tmp2 to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp3 = call <4 x i32> @llvm.ppc.altivec.vperm( <4 x i32> %tmp.upgrd.9, <4 x i32> %tmp2.upgrd.10, <16 x i8> < i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14 > ) ; <<4 x i32>> [#uses=1]
%tmp3.upgrd.11 = bitcast <4 x i32> %tmp3 to <8 x i16> ; <<8 x i16>> [#uses=1]
@@ -108,8 +108,8 @@ declare <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32>, <4 x i32>, <16 x i8>)
define void @tb_l(<16 x i8>* %A, <16 x i8>* %B) {
entry:
- %tmp = load <16 x i8>* %A ; <<16 x i8>> [#uses=8]
- %tmp2 = load <16 x i8>* %B ; <<16 x i8>> [#uses=8]
+ %tmp = load <16 x i8>, <16 x i8>* %A ; <<16 x i8>> [#uses=8]
+ %tmp2 = load <16 x i8>, <16 x i8>* %B ; <<16 x i8>> [#uses=8]
%tmp.upgrd.12 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1]
%tmp3 = extractelement <16 x i8> %tmp2, i32 8 ; <i8> [#uses=1]
%tmp4 = extractelement <16 x i8> %tmp, i32 9 ; <i8> [#uses=1]
@@ -148,8 +148,8 @@ entry:
define void @th_l(<8 x i16>* %A, <8 x i16>* %B) {
entry:
- %tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=4]
- %tmp2 = load <8 x i16>* %B ; <<8 x i16>> [#uses=4]
+ %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=4]
+ %tmp2 = load <8 x i16>, <8 x i16>* %B ; <<8 x i16>> [#uses=4]
%tmp.upgrd.13 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1]
%tmp3 = extractelement <8 x i16> %tmp2, i32 4 ; <i16> [#uses=1]
%tmp4 = extractelement <8 x i16> %tmp, i32 5 ; <i16> [#uses=1]
@@ -172,8 +172,8 @@ entry:
define void @tw_l(<4 x i32>* %A, <4 x i32>* %B) {
entry:
- %tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=2]
- %tmp2 = load <4 x i32>* %B ; <<4 x i32>> [#uses=2]
+ %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=2]
+ %tmp2 = load <4 x i32>, <4 x i32>* %B ; <<4 x i32>> [#uses=2]
%tmp.upgrd.14 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1]
%tmp3 = extractelement <4 x i32> %tmp2, i32 2 ; <i32> [#uses=1]
%tmp4 = extractelement <4 x i32> %tmp, i32 3 ; <i32> [#uses=1]
@@ -188,8 +188,8 @@ entry:
define void @tb_h(<16 x i8>* %A, <16 x i8>* %B) {
entry:
- %tmp = load <16 x i8>* %A ; <<16 x i8>> [#uses=8]
- %tmp2 = load <16 x i8>* %B ; <<16 x i8>> [#uses=8]
+ %tmp = load <16 x i8>, <16 x i8>* %A ; <<16 x i8>> [#uses=8]
+ %tmp2 = load <16 x i8>, <16 x i8>* %B ; <<16 x i8>> [#uses=8]
%tmp.upgrd.15 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1]
%tmp3 = extractelement <16 x i8> %tmp2, i32 0 ; <i8> [#uses=1]
%tmp4 = extractelement <16 x i8> %tmp, i32 1 ; <i8> [#uses=1]
@@ -228,8 +228,8 @@ entry:
define void @th_h(<8 x i16>* %A, <8 x i16>* %B) {
entry:
- %tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=4]
- %tmp2 = load <8 x i16>* %B ; <<8 x i16>> [#uses=4]
+ %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=4]
+ %tmp2 = load <8 x i16>, <8 x i16>* %B ; <<8 x i16>> [#uses=4]
%tmp.upgrd.16 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1]
%tmp3 = extractelement <8 x i16> %tmp2, i32 0 ; <i16> [#uses=1]
%tmp4 = extractelement <8 x i16> %tmp, i32 1 ; <i16> [#uses=1]
@@ -252,8 +252,8 @@ entry:
define void @tw_h(<4 x i32>* %A, <4 x i32>* %B) {
entry:
- %tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=2]
- %tmp2 = load <4 x i32>* %B ; <<4 x i32>> [#uses=2]
+ %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=2]
+ %tmp2 = load <4 x i32>, <4 x i32>* %B ; <<4 x i32>> [#uses=2]
%tmp.upgrd.17 = extractelement <4 x i32> %tmp2, i32 0 ; <i32> [#uses=1]
%tmp3 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1]
%tmp4 = extractelement <4 x i32> %tmp2, i32 1 ; <i32> [#uses=1]
@@ -267,8 +267,8 @@ entry:
}
define void @tw_h_flop(<4 x i32>* %A, <4 x i32>* %B) {
- %tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=2]
- %tmp2 = load <4 x i32>* %B ; <<4 x i32>> [#uses=2]
+ %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=2]
+ %tmp2 = load <4 x i32>, <4 x i32>* %B ; <<4 x i32>> [#uses=2]
%tmp.upgrd.18 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1]
%tmp3 = extractelement <4 x i32> %tmp2, i32 0 ; <i32> [#uses=1]
%tmp4 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1]
@@ -283,7 +283,7 @@ define void @tw_h_flop(<4 x i32>* %A, <4 x i32>* %B) {
define void @VMRG_UNARY_tb_l(<16 x i8>* %A, <16 x i8>* %B) {
entry:
- %tmp = load <16 x i8>* %A ; <<16 x i8>> [#uses=16]
+ %tmp = load <16 x i8>, <16 x i8>* %A ; <<16 x i8>> [#uses=16]
%tmp.upgrd.19 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1]
%tmp3 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1]
%tmp4 = extractelement <16 x i8> %tmp, i32 9 ; <i8> [#uses=1]
@@ -322,7 +322,7 @@ entry:
define void @VMRG_UNARY_th_l(<8 x i16>* %A, <8 x i16>* %B) {
entry:
- %tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=8]
+ %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=8]
%tmp.upgrd.20 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1]
%tmp3 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1]
%tmp4 = extractelement <8 x i16> %tmp, i32 5 ; <i16> [#uses=1]
@@ -345,7 +345,7 @@ entry:
define void @VMRG_UNARY_tw_l(<4 x i32>* %A, <4 x i32>* %B) {
entry:
- %tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=4]
+ %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=4]
%tmp.upgrd.21 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1]
%tmp3 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1]
%tmp4 = extractelement <4 x i32> %tmp, i32 3 ; <i32> [#uses=1]
@@ -360,7 +360,7 @@ entry:
define void @VMRG_UNARY_tb_h(<16 x i8>* %A, <16 x i8>* %B) {
entry:
- %tmp = load <16 x i8>* %A ; <<16 x i8>> [#uses=16]
+ %tmp = load <16 x i8>, <16 x i8>* %A ; <<16 x i8>> [#uses=16]
%tmp.upgrd.22 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1]
%tmp3 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1]
%tmp4 = extractelement <16 x i8> %tmp, i32 1 ; <i8> [#uses=1]
@@ -399,7 +399,7 @@ entry:
define void @VMRG_UNARY_th_h(<8 x i16>* %A, <8 x i16>* %B) {
entry:
- %tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=8]
+ %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=8]
%tmp.upgrd.23 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1]
%tmp3 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1]
%tmp4 = extractelement <8 x i16> %tmp, i32 1 ; <i16> [#uses=1]
@@ -422,7 +422,7 @@ entry:
define void @VMRG_UNARY_tw_h(<4 x i32>* %A, <4 x i32>* %B) {
entry:
- %tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=4]
+ %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=4]
%tmp.upgrd.24 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1]
%tmp3 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1]
%tmp4 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1]
@@ -437,7 +437,7 @@ entry:
define void @VPCKUHUM_unary(<8 x i16>* %A, <8 x i16>* %B) {
entry:
- %tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=2]
+ %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=2]
%tmp.upgrd.25 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=8]
%tmp3 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=8]
%tmp.upgrd.26 = extractelement <16 x i8> %tmp.upgrd.25, i32 1 ; <i8> [#uses=1]
@@ -479,7 +479,7 @@ entry:
define void @VPCKUWUM_unary(<4 x i32>* %A, <4 x i32>* %B) {
entry:
- %tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=2]
+ %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=2]
%tmp.upgrd.28 = bitcast <4 x i32> %tmp to <8 x i16> ; <<8 x i16>> [#uses=4]
%tmp3 = bitcast <4 x i32> %tmp to <8 x i16> ; <<8 x i16>> [#uses=4]
%tmp.upgrd.29 = extractelement <8 x i16> %tmp.upgrd.28, i32 1 ; <i16> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vec_shuffle_le.ll b/test/CodeGen/PowerPC/vec_shuffle_le.ll
index c7fc1c60c5ea..46d451ff1573 100644
--- a/test/CodeGen/PowerPC/vec_shuffle_le.ll
+++ b/test/CodeGen/PowerPC/vec_shuffle_le.ll
@@ -3,8 +3,8 @@
define void @VPKUHUM_xy(<16 x i8>* %A, <16 x i8>* %B) {
entry:
; CHECK: VPKUHUM_xy:
- %tmp = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
; CHECK: lvx [[REG1:[0-9]+]]
; CHECK: lvx [[REG2:[0-9]+]]
@@ -16,7 +16,7 @@ entry:
define void @VPKUHUM_xx(<16 x i8>* %A) {
entry:
; CHECK: VPKUHUM_xx:
- %tmp = load <16 x i8>* %A
+ %tmp = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
; CHECK: vpkuhum
store <16 x i8> %tmp2, <16 x i8>* %A
@@ -26,8 +26,8 @@ entry:
define void @VPKUWUM_xy(<16 x i8>* %A, <16 x i8>* %B) {
entry:
; CHECK: VPKUWUM_xy:
- %tmp = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13, i32 16, i32 17, i32 20, i32 21, i32 24, i32 25, i32 28, i32 29>
; CHECK: lvx [[REG1:[0-9]+]]
; CHECK: lvx [[REG2:[0-9]+]]
@@ -39,7 +39,7 @@ entry:
define void @VPKUWUM_xx(<16 x i8>* %A) {
entry:
; CHECK: VPKUWUM_xx:
- %tmp = load <16 x i8>* %A
+ %tmp = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13, i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13>
; CHECK: vpkuwum
store <16 x i8> %tmp2, <16 x i8>* %A
@@ -49,8 +49,8 @@ entry:
define void @VMRGLB_xy(<16 x i8>* %A, <16 x i8>* %B) {
entry:
; CHECK: VMRGLB_xy:
- %tmp = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
; CHECK: lvx [[REG1:[0-9]+]]
; CHECK: lvx [[REG2:[0-9]+]]
@@ -62,7 +62,7 @@ entry:
define void @VMRGLB_xx(<16 x i8>* %A) {
entry:
; CHECK: VMRGLB_xx:
- %tmp = load <16 x i8>* %A
+ %tmp = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
; CHECK: vmrglb
store <16 x i8> %tmp2, <16 x i8>* %A
@@ -72,8 +72,8 @@ entry:
define void @VMRGHB_xy(<16 x i8>* %A, <16 x i8>* %B) {
entry:
; CHECK: VMRGHB_xy:
- %tmp = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
; CHECK: lvx [[REG1:[0-9]+]]
; CHECK: lvx [[REG2:[0-9]+]]
@@ -85,7 +85,7 @@ entry:
define void @VMRGHB_xx(<16 x i8>* %A) {
entry:
; CHECK: VMRGHB_xx:
- %tmp = load <16 x i8>* %A
+ %tmp = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
; CHECK: vmrghb
store <16 x i8> %tmp2, <16 x i8>* %A
@@ -95,8 +95,8 @@ entry:
define void @VMRGLH_xy(<16 x i8>* %A, <16 x i8>* %B) {
entry:
; CHECK: VMRGLH_xy:
- %tmp = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 2, i32 3, i32 18, i32 19, i32 4, i32 5, i32 20, i32 21, i32 6, i32 7, i32 22, i32 23>
; CHECK: lvx [[REG1:[0-9]+]]
; CHECK: lvx [[REG2:[0-9]+]]
@@ -108,7 +108,7 @@ entry:
define void @VMRGLH_xx(<16 x i8>* %A) {
entry:
; CHECK: VMRGLH_xx:
- %tmp = load <16 x i8>* %A
+ %tmp = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 4, i32 5, i32 4, i32 5, i32 6, i32 7, i32 6, i32 7>
; CHECK: vmrglh
store <16 x i8> %tmp2, <16 x i8>* %A
@@ -118,8 +118,8 @@ entry:
define void @VMRGHH_xy(<16 x i8>* %A, <16 x i8>* %B) {
entry:
; CHECK: VMRGHH_xy:
- %tmp = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 9, i32 24, i32 25, i32 10, i32 11, i32 26, i32 27, i32 12, i32 13, i32 28, i32 29, i32 14, i32 15, i32 30, i32 31>
; CHECK: lvx [[REG1:[0-9]+]]
; CHECK: lvx [[REG2:[0-9]+]]
@@ -131,7 +131,7 @@ entry:
define void @VMRGHH_xx(<16 x i8>* %A) {
entry:
; CHECK: VMRGHH_xx:
- %tmp = load <16 x i8>* %A
+ %tmp = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 8, i32 9, i32 8, i32 9, i32 10, i32 11, i32 10, i32 11, i32 12, i32 13, i32 12, i32 13, i32 14, i32 15, i32 14, i32 15>
; CHECK: vmrghh
store <16 x i8> %tmp2, <16 x i8>* %A
@@ -141,8 +141,8 @@ entry:
define void @VMRGLW_xy(<16 x i8>* %A, <16 x i8>* %B) {
entry:
; CHECK: VMRGLW_xy:
- %tmp = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23>
; CHECK: lvx [[REG1:[0-9]+]]
; CHECK: lvx [[REG2:[0-9]+]]
@@ -154,7 +154,7 @@ entry:
define void @VMRGLW_xx(<16 x i8>* %A) {
entry:
; CHECK: VMRGLW_xx:
- %tmp = load <16 x i8>* %A
+ %tmp = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
; CHECK: vmrglw
store <16 x i8> %tmp2, <16 x i8>* %A
@@ -164,8 +164,8 @@ entry:
define void @VMRGHW_xy(<16 x i8>* %A, <16 x i8>* %B) {
entry:
; CHECK: VMRGHW_xy:
- %tmp = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 12, i32 13, i32 14, i32 15, i32 28, i32 29, i32 30, i32 31>
; CHECK: lvx [[REG1:[0-9]+]]
; CHECK: lvx [[REG2:[0-9]+]]
@@ -177,7 +177,7 @@ entry:
define void @VMRGHW_xx(<16 x i8>* %A) {
entry:
; CHECK: VMRGHW_xx:
- %tmp = load <16 x i8>* %A
+ %tmp = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15>
; CHECK: vmrghw
store <16 x i8> %tmp2, <16 x i8>* %A
@@ -187,8 +187,8 @@ entry:
define void @VSLDOI_xy(<16 x i8>* %A, <16 x i8>* %B) {
entry:
; CHECK: VSLDOI_xy:
- %tmp = load <16 x i8>* %A
- %tmp2 = load <16 x i8>* %B
+ %tmp = load <16 x i8>, <16 x i8>* %A
+ %tmp2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
; CHECK: lvx [[REG1:[0-9]+]]
; CHECK: lvx [[REG2:[0-9]+]]
@@ -200,7 +200,7 @@ entry:
define void @VSLDOI_xx(<16 x i8>* %A) {
entry:
; CHECK: VSLDOI_xx:
- %tmp = load <16 x i8>* %A
+ %tmp = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
; CHECK: vsldoi
store <16 x i8> %tmp2, <16 x i8>* %A
diff --git a/test/CodeGen/PowerPC/vec_shuffle_p8vector.ll b/test/CodeGen/PowerPC/vec_shuffle_p8vector.ll
new file mode 100644
index 000000000000..77802348d8e3
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_shuffle_p8vector.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -mattr=+power8-vector < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck -check-prefix=CHECK-PWR7 %s
+
+define void @VPKUDUM_unary(<2 x i64>* %A) {
+entry:
+ %tmp = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = bitcast <2 x i64> %tmp to <4 x i32>
+ %tmp3 = extractelement <4 x i32> %tmp2, i32 1
+ %tmp4 = extractelement <4 x i32> %tmp2, i32 3
+ %tmp5 = insertelement <4 x i32> undef, i32 %tmp3, i32 0
+ %tmp6 = insertelement <4 x i32> %tmp5, i32 %tmp4, i32 1
+ %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 2
+ %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 3
+ %tmp9 = bitcast <4 x i32> %tmp8 to <2 x i64>
+ store <2 x i64> %tmp9, <2 x i64>* %A
+ ret void
+}
+
+; CHECK-LABEL: @VPKUDUM_unary
+; CHECK-NOT: vperm
+; CHECK-NOT: vmrglw
+; CHECK-NOT: vmrghw
+; CHECK: vpkudum
+; CHECK-PWR7: vmrglw
+; CHECK-PWR7: vmrghw
+; CHECK-PWR7: vmrglw
+
+define void @VPKUDUM(<2 x i64>* %A, <2 x i64>* %B) {
+entry:
+ %tmp = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = bitcast <2 x i64> %tmp to <4 x i32>
+ %tmp3 = load <2 x i64>, <2 x i64>* %B
+ %tmp4 = bitcast <2 x i64> %tmp3 to <4 x i32>
+ %tmp5 = extractelement <4 x i32> %tmp2, i32 1
+ %tmp6 = extractelement <4 x i32> %tmp2, i32 3
+ %tmp7 = extractelement <4 x i32> %tmp4, i32 1
+ %tmp8 = extractelement <4 x i32> %tmp4, i32 3
+ %tmp9 = insertelement <4 x i32> undef, i32 %tmp5, i32 0
+ %tmp10 = insertelement <4 x i32> %tmp9, i32 %tmp6, i32 1
+ %tmp11 = insertelement <4 x i32> %tmp10, i32 %tmp7, i32 2
+ %tmp12 = insertelement <4 x i32> %tmp11, i32 %tmp8, i32 3
+ %tmp13 = bitcast <4 x i32> %tmp12 to <2 x i64>
+ store <2 x i64> %tmp13, <2 x i64>* %A
+ ret void
+}
+
+; CHECK-LABEL: @VPKUDUM
+; CHECK-NOT: vperm
+; CHECK-NOT: vmrglw
+; CHECK-NOT: vmrghw
+; CHECK: vpkudum
+; CHECK-PWR7: vmrglw
+; CHECK-PWR7: vmrghw
+; CHECK-PWR7: vmrglw
diff --git a/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll b/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll
new file mode 100644
index 000000000000..709388675f64
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_shuffle_p8vector_le.ll
@@ -0,0 +1,43 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -mattr=+power8-vector < %s | FileCheck %s
+
+define void @VPKUDUM_unary(<2 x i64>* %A) {
+entry:
+ %tmp = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = bitcast <2 x i64> %tmp to <4 x i32>
+ %tmp3 = extractelement <4 x i32> %tmp2, i32 0
+ %tmp4 = extractelement <4 x i32> %tmp2, i32 2
+ %tmp5 = insertelement <4 x i32> undef, i32 %tmp3, i32 0
+ %tmp6 = insertelement <4 x i32> %tmp5, i32 %tmp4, i32 1
+ %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 2
+ %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 3
+ %tmp9 = bitcast <4 x i32> %tmp8 to <2 x i64>
+ store <2 x i64> %tmp9, <2 x i64>* %A
+ ret void
+}
+
+; CHECK-LABEL: @VPKUDUM_unary
+; CHECK-NOT: vperm
+; CHECK: vpkudum
+
+define void @VPKUDUM(<2 x i64>* %A, <2 x i64>* %B) {
+entry:
+ %tmp = load <2 x i64>, <2 x i64>* %A
+ %tmp2 = bitcast <2 x i64> %tmp to <4 x i32>
+ %tmp3 = load <2 x i64>, <2 x i64>* %B
+ %tmp4 = bitcast <2 x i64> %tmp3 to <4 x i32>
+ %tmp5 = extractelement <4 x i32> %tmp2, i32 0
+ %tmp6 = extractelement <4 x i32> %tmp2, i32 2
+ %tmp7 = extractelement <4 x i32> %tmp4, i32 0
+ %tmp8 = extractelement <4 x i32> %tmp4, i32 2
+ %tmp9 = insertelement <4 x i32> undef, i32 %tmp5, i32 0
+ %tmp10 = insertelement <4 x i32> %tmp9, i32 %tmp6, i32 1
+ %tmp11 = insertelement <4 x i32> %tmp10, i32 %tmp7, i32 2
+ %tmp12 = insertelement <4 x i32> %tmp11, i32 %tmp8, i32 3
+ %tmp13 = bitcast <4 x i32> %tmp12 to <2 x i64>
+ store <2 x i64> %tmp13, <2 x i64>* %A
+ ret void
+}
+
+; CHECK-LABEL: @VPKUDUM
+; CHECK-NOT: vperm
+; CHECK: vpkudum
diff --git a/test/CodeGen/PowerPC/vec_splat.ll b/test/CodeGen/PowerPC/vec_splat.ll
index 61237284d36c..aeed94c91f40 100644
--- a/test/CodeGen/PowerPC/vec_splat.ll
+++ b/test/CodeGen/PowerPC/vec_splat.ll
@@ -14,7 +14,7 @@ define void @splat(%f4* %P, %f4* %Q, float %X) nounwind {
%tmp2 = insertelement %f4 %tmp, float %X, i32 1 ; <%f4> [#uses=1]
%tmp4 = insertelement %f4 %tmp2, float %X, i32 2 ; <%f4> [#uses=1]
%tmp6 = insertelement %f4 %tmp4, float %X, i32 3 ; <%f4> [#uses=1]
- %q = load %f4* %Q ; <%f4> [#uses=1]
+ %q = load %f4, %f4* %Q ; <%f4> [#uses=1]
%R = fadd %f4 %q, %tmp6 ; <%f4> [#uses=1]
store %f4 %R, %f4* %P
ret void
@@ -25,21 +25,21 @@ define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) nounwind {
%tmp2 = insertelement %i4 %tmp, i32 %X, i32 1 ; <%i4> [#uses=1]
%tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2 ; <%i4> [#uses=1]
%tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3 ; <%i4> [#uses=1]
- %q = load %i4* %Q ; <%i4> [#uses=1]
+ %q = load %i4, %i4* %Q ; <%i4> [#uses=1]
%R = add %i4 %q, %tmp6 ; <%i4> [#uses=1]
store %i4 %R, %i4* %P
ret void
}
define void @splat_imm_i32(%i4* %P, %i4* %Q, i32 %X) nounwind {
- %q = load %i4* %Q ; <%i4> [#uses=1]
+ %q = load %i4, %i4* %Q ; <%i4> [#uses=1]
%R = add %i4 %q, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <%i4> [#uses=1]
store %i4 %R, %i4* %P
ret void
}
define void @splat_imm_i16(%i4* %P, %i4* %Q, i32 %X) nounwind {
- %q = load %i4* %Q ; <%i4> [#uses=1]
+ %q = load %i4, %i4* %Q ; <%i4> [#uses=1]
%R = add %i4 %q, < i32 65537, i32 65537, i32 65537, i32 65537 > ; <%i4> [#uses=1]
store %i4 %R, %i4* %P
ret void
@@ -60,7 +60,7 @@ define void @splat_h(i16 %tmp, <16 x i8>* %dst) nounwind {
}
define void @spltish(<16 x i8>* %A, <16 x i8>* %B) nounwind {
- %tmp = load <16 x i8>* %B ; <<16 x i8>> [#uses=1]
+ %tmp = load <16 x i8>, <16 x i8>* %B ; <<16 x i8>> [#uses=1]
%tmp.s = bitcast <16 x i8> %tmp to <16 x i8> ; <<16 x i8>> [#uses=1]
%tmp4 = sub <16 x i8> %tmp.s, bitcast (<8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16
15, i16 15, i16 15 > to <16 x i8>) ; <<16 x i8>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vec_splat_constant.ll b/test/CodeGen/PowerPC/vec_splat_constant.ll
index b227794421f2..53676fcbba4a 100644
--- a/test/CodeGen/PowerPC/vec_splat_constant.ll
+++ b/test/CodeGen/PowerPC/vec_splat_constant.ll
@@ -12,8 +12,8 @@ entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store <16 x i8> %x, <16 x i8>* %x_addr
store <16 x i8> <i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14>, <16 x i8>* %temp, align 16
- %0 = load <16 x i8>* %x_addr, align 16 ; <<16 x i8>> [#uses=1]
- %1 = load <16 x i8>* %temp, align 16 ; <<16 x i8>> [#uses=1]
+ %0 = load <16 x i8>, <16 x i8>* %x_addr, align 16 ; <<16 x i8>> [#uses=1]
+ %1 = load <16 x i8>, <16 x i8>* %temp, align 16 ; <<16 x i8>> [#uses=1]
%tmp = add <16 x i8> %0, %1 ; <<16 x i8>> [#uses=1]
store <16 x i8> %tmp, <16 x i8>* @baz, align 16
br label %return
diff --git a/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll b/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll
new file mode 100644
index 000000000000..f7d5a51c11d4
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll
@@ -0,0 +1,29 @@
+; Check the miscellaneous logical vector operations added in P8
+;
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
+; Test x eqv y
+define <4 x i32> @test_veqv(<4 x i32> %x, <4 x i32> %y) nounwind {
+ %tmp = xor <4 x i32> %x, %y
+ %ret_val = xor <4 x i32> %tmp, < i32 -1, i32 -1, i32 -1, i32 -1>
+ ret <4 x i32> %ret_val
+; CHECK: veqv 2, 2, 3
+}
+
+; Test x vnand y
+define <4 x i32> @test_vnand(<4 x i32> %x, <4 x i32> %y) nounwind {
+ %tmp = and <4 x i32> %x, %y
+ %ret_val = xor <4 x i32> %tmp, <i32 -1, i32 -1, i32 -1, i32 -1>
+ ret <4 x i32> %ret_val
+; CHECK: vnand 2, 2, 3
+}
+
+; Test x vorc y and variants
+define <4 x i32> @test_vorc(<4 x i32> %x, <4 x i32> %y) nounwind {
+ %tmp1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %tmp2 = or <4 x i32> %x, %tmp1
+; CHECK: vorc 3, 2, 3
+ %tmp3 = xor <4 x i32> %tmp2, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %tmp4 = or <4 x i32> %tmp3, %x
+; CHECK: vorc 2, 2, 3
+ ret <4 x i32> %tmp4
+}
diff --git a/test/CodeGen/PowerPC/vec_zero.ll b/test/CodeGen/PowerPC/vec_zero.ll
index f862b2cb4c4b..aec61fbd6bd6 100644
--- a/test/CodeGen/PowerPC/vec_zero.ll
+++ b/test/CodeGen/PowerPC/vec_zero.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vxor
define void @foo(<4 x float>* %P) {
- %T = load <4 x float>* %P ; <<4 x float>> [#uses=1]
+ %T = load <4 x float>, <4 x float>* %P ; <<4 x float>> [#uses=1]
%S = fadd <4 x float> zeroinitializer, %T ; <<4 x float>> [#uses=1]
store <4 x float> %S, <4 x float>* %P
ret void
diff --git a/test/CodeGen/PowerPC/vector-identity-shuffle.ll b/test/CodeGen/PowerPC/vector-identity-shuffle.ll
index dfa2e35435a8..35979f68a886 100644
--- a/test/CodeGen/PowerPC/vector-identity-shuffle.ll
+++ b/test/CodeGen/PowerPC/vector-identity-shuffle.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep vperm
define void @test(<4 x float>* %tmp2.i) {
- %tmp2.i.upgrd.1 = load <4 x float>* %tmp2.i ; <<4 x float>> [#uses=4]
+ %tmp2.i.upgrd.1 = load <4 x float>, <4 x float>* %tmp2.i ; <<4 x float>> [#uses=4]
%xFloat0.48 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 0 ; <float> [#uses=1]
%inFloat0.49 = insertelement <4 x float> undef, float %xFloat0.48, i32 0 ; <<4 x float>> [#uses=1]
%xFloat1.50 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 1 ; <float> [#uses=1]
diff --git a/test/CodeGen/PowerPC/vector.ll b/test/CodeGen/PowerPC/vector.ll
index 859a85a14101..723ca54c02ba 100644
--- a/test/CodeGen/PowerPC/vector.ll
+++ b/test/CodeGen/PowerPC/vector.ll
@@ -12,56 +12,56 @@
;;; TEST HANDLING OF VARIOUS VECTOR SIZES
define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
- %p = load %f1* %P ; <%f1> [#uses=1]
- %q = load %f1* %Q ; <%f1> [#uses=1]
+ %p = load %f1, %f1* %P ; <%f1> [#uses=1]
+ %q = load %f1, %f1* %Q ; <%f1> [#uses=1]
%R = fadd %f1 %p, %q ; <%f1> [#uses=1]
store %f1 %R, %f1* %S
ret void
}
define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
- %p = load %f2* %P ; <%f2> [#uses=1]
- %q = load %f2* %Q ; <%f2> [#uses=1]
+ %p = load %f2, %f2* %P ; <%f2> [#uses=1]
+ %q = load %f2, %f2* %Q ; <%f2> [#uses=1]
%R = fadd %f2 %p, %q ; <%f2> [#uses=1]
store %f2 %R, %f2* %S
ret void
}
define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
- %p = load %f4* %P ; <%f4> [#uses=1]
- %q = load %f4* %Q ; <%f4> [#uses=1]
+ %p = load %f4, %f4* %P ; <%f4> [#uses=1]
+ %q = load %f4, %f4* %Q ; <%f4> [#uses=1]
%R = fadd %f4 %p, %q ; <%f4> [#uses=1]
store %f4 %R, %f4* %S
ret void
}
define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
- %p = load %f8* %P ; <%f8> [#uses=1]
- %q = load %f8* %Q ; <%f8> [#uses=1]
+ %p = load %f8, %f8* %P ; <%f8> [#uses=1]
+ %q = load %f8, %f8* %Q ; <%f8> [#uses=1]
%R = fadd %f8 %p, %q ; <%f8> [#uses=1]
store %f8 %R, %f8* %S
ret void
}
define void @test_fmul(%f8* %P, %f8* %Q, %f8* %S) {
- %p = load %f8* %P ; <%f8> [#uses=1]
- %q = load %f8* %Q ; <%f8> [#uses=1]
+ %p = load %f8, %f8* %P ; <%f8> [#uses=1]
+ %q = load %f8, %f8* %Q ; <%f8> [#uses=1]
%R = fmul %f8 %p, %q ; <%f8> [#uses=1]
store %f8 %R, %f8* %S
ret void
}
define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {
- %p = load %f8* %P ; <%f8> [#uses=1]
- %q = load %f8* %Q ; <%f8> [#uses=1]
+ %p = load %f8, %f8* %P ; <%f8> [#uses=1]
+ %q = load %f8, %f8* %Q ; <%f8> [#uses=1]
%R = fdiv %f8 %p, %q ; <%f8> [#uses=1]
store %f8 %R, %f8* %S
ret void
}
define void @test_rem(%f8* %P, %f8* %Q, %f8* %S) {
- %p = load %f8* %P ; <%f8> [#uses=1]
- %q = load %f8* %Q ; <%f8> [#uses=1]
+ %p = load %f8, %f8* %P ; <%f8> [#uses=1]
+ %q = load %f8, %f8* %Q ; <%f8> [#uses=1]
%R = frem %f8 %p, %q ; <%f8> [#uses=1]
store %f8 %R, %f8* %S
ret void
@@ -70,7 +70,7 @@ define void @test_rem(%f8* %P, %f8* %Q, %f8* %S) {
;;; TEST VECTOR CONSTRUCTS
define void @test_cst(%f4* %P, %f4* %S) {
- %p = load %f4* %P ; <%f4> [#uses=1]
+ %p = load %f4, %f4* %P ; <%f4> [#uses=1]
%R = fadd %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float
2.000000e+00, float 4.500000e+00 > ; <%f4> [#uses=1]
store %f4 %R, %f4* %S
@@ -78,14 +78,14 @@ define void @test_cst(%f4* %P, %f4* %S) {
}
define void @test_zero(%f4* %P, %f4* %S) {
- %p = load %f4* %P ; <%f4> [#uses=1]
+ %p = load %f4, %f4* %P ; <%f4> [#uses=1]
%R = fadd %f4 %p, zeroinitializer ; <%f4> [#uses=1]
store %f4 %R, %f4* %S
ret void
}
define void @test_undef(%f4* %P, %f4* %S) {
- %p = load %f4* %P ; <%f4> [#uses=1]
+ %p = load %f4, %f4* %P ; <%f4> [#uses=1]
%R = fadd %f4 %p, undef ; <%f4> [#uses=1]
store %f4 %R, %f4* %S
ret void
@@ -111,19 +111,19 @@ define void @test_scalar_to_vector(float %F, %f4* %S) {
}
define float @test_extract_elt(%f8* %P) {
- %p = load %f8* %P ; <%f8> [#uses=1]
+ %p = load %f8, %f8* %P ; <%f8> [#uses=1]
%R = extractelement %f8 %p, i32 3 ; <float> [#uses=1]
ret float %R
}
define double @test_extract_elt2(%d8* %P) {
- %p = load %d8* %P ; <%d8> [#uses=1]
+ %p = load %d8, %d8* %P ; <%d8> [#uses=1]
%R = extractelement %d8 %p, i32 3 ; <double> [#uses=1]
ret double %R
}
define void @test_cast_1(%f4* %b, %i4* %a) {
- %tmp = load %f4* %b ; <%f4> [#uses=1]
+ %tmp = load %f4, %f4* %b ; <%f4> [#uses=1]
%tmp2 = fadd %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float
3.000000e+00, float 4.000000e+00 > ; <%f4> [#uses=1]
%tmp3 = bitcast %f4 %tmp2 to %i4 ; <%i4> [#uses=1]
@@ -133,7 +133,7 @@ define void @test_cast_1(%f4* %b, %i4* %a) {
}
define void @test_cast_2(%f8* %a, <8 x i32>* %b) {
- %T = load %f8* %a ; <%f8> [#uses=1]
+ %T = load %f8, %f8* %a ; <%f8> [#uses=1]
%T2 = bitcast %f8 %T to <8 x i32>
store <8 x i32> %T2, <8 x i32>* %b
ret void
@@ -147,7 +147,7 @@ define void @splat(%f4* %P, %f4* %Q, float %X) {
%tmp2 = insertelement %f4 %tmp, float %X, i32 1
%tmp4 = insertelement %f4 %tmp2, float %X, i32 2
%tmp6 = insertelement %f4 %tmp4, float %X, i32 3
- %q = load %f4* %Q ; <%f4> [#uses=1]
+ %q = load %f4, %f4* %Q ; <%f4> [#uses=1]
%R = fadd %f4 %q, %tmp6 ; <%f4> [#uses=1]
store %f4 %R, %f4* %P
ret void
@@ -158,7 +158,7 @@ define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) {
%tmp2 = insertelement %i4 %tmp, i32 %X, i32 1
%tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2
%tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3
- %q = load %i4* %Q ; <%i4> [#uses=1]
+ %q = load %i4, %i4* %Q ; <%i4> [#uses=1]
%R = add %i4 %q, %tmp6 ; <%i4> [#uses=1]
store %i4 %R, %i4* %P
ret void
diff --git a/test/CodeGen/PowerPC/vperm-lowering.ll b/test/CodeGen/PowerPC/vperm-lowering.ll
index d55d26c959b6..c78ffdd0c073 100644
--- a/test/CodeGen/PowerPC/vperm-lowering.ll
+++ b/test/CodeGen/PowerPC/vperm-lowering.ll
@@ -9,58 +9,23 @@ define <16 x i8> @foo() nounwind ssp {
}
; CHECK: .LCPI0_0:
-; CHECK: .byte 31
-; CHECK: .byte 26
-; CHECK: .byte 21
-; CHECK: .byte 16
-; CHECK: .byte 11
-; CHECK: .byte 6
-; CHECK: .byte 1
-; CHECK: .byte 28
-; CHECK: .byte 23
-; CHECK: .byte 18
-; CHECK: .byte 13
-; CHECK: .byte 8
-; CHECK: .byte 3
-; CHECK: .byte 30
-; CHECK: .byte 25
-; CHECK: .byte 20
-; CHECK: .LCPI0_1:
; CHECK: .byte 0
-; CHECK: .byte 1
-; CHECK: .byte 2
-; CHECK: .byte 3
-; CHECK: .byte 4
; CHECK: .byte 5
-; CHECK: .byte 6
-; CHECK: .byte 7
-; CHECK: .byte 8
-; CHECK: .byte 9
; CHECK: .byte 10
-; CHECK: .byte 11
-; CHECK: .byte 12
-; CHECK: .byte 13
-; CHECK: .byte 14
; CHECK: .byte 15
-; CHECK: .LCPI0_2:
-; CHECK: .byte 16
-; CHECK: .byte 17
-; CHECK: .byte 18
-; CHECK: .byte 19
; CHECK: .byte 20
-; CHECK: .byte 21
-; CHECK: .byte 22
-; CHECK: .byte 23
-; CHECK: .byte 24
; CHECK: .byte 25
-; CHECK: .byte 26
-; CHECK: .byte 27
-; CHECK: .byte 28
-; CHECK: .byte 29
; CHECK: .byte 30
-; CHECK: .byte 31
+; CHECK: .byte 3
+; CHECK: .byte 8
+; CHECK: .byte 13
+; CHECK: .byte 18
+; CHECK: .byte 23
+; CHECK: .byte 28
+; CHECK: .byte 1
+; CHECK: .byte 6
+; CHECK: .byte 11
; CHECK: foo:
-; CHECK: addis [[REG1:[0-9]+]], 2, .LCPI0_2@toc@ha
-; CHECK: addi [[REG2:[0-9]+]], [[REG1]], .LCPI0_2@toc@l
+; CHECK: addis [[REG1:[0-9]+]], 2, .LCPI0_0@toc@ha
+; CHECK: addi [[REG2:[0-9]+]], [[REG1]], .LCPI0_0@toc@l
; CHECK: lvx [[REG3:[0-9]+]], 0, [[REG2]]
-; CHECK: vperm {{[0-9]+}}, [[REG3]], {{[0-9]+}}, {{[0-9]+}}
diff --git a/test/CodeGen/PowerPC/vsx-div.ll b/test/CodeGen/PowerPC/vsx-div.ll
index 8a9578e5ed80..0e8388543a2f 100644
--- a/test/CodeGen/PowerPC/vsx-div.ll
+++ b/test/CodeGen/PowerPC/vsx-div.ll
@@ -7,7 +7,7 @@
define void @test1() {
entry:
- %0 = load <4 x float>* @vf, align 16
+ %0 = load <4 x float>, <4 x float>* @vf, align 16
%1 = tail call <4 x float> @llvm.ppc.vsx.xvdivsp(<4 x float> %0, <4 x float> %0)
store <4 x float> %1, <4 x float>* @vf_res, align 16
ret void
@@ -17,7 +17,7 @@ entry:
define void @test2() {
entry:
- %0 = load <2 x double>* @vd, align 16
+ %0 = load <2 x double>, <2 x double>* @vd, align 16
%1 = tail call <2 x double> @llvm.ppc.vsx.xvdivdp(<2 x double> %0, <2 x double> %0)
store <2 x double> %1, <2 x double>* @vd_res, align 16
ret void
diff --git a/test/CodeGen/PowerPC/vsx-elementary-arith.ll b/test/CodeGen/PowerPC/vsx-elementary-arith.ll
new file mode 100644
index 000000000000..d8f76bb989e7
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx-elementary-arith.ll
@@ -0,0 +1,120 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
+@a = global float 3.000000e+00, align 4
+@b = global float 4.000000e+00, align 4
+@c = global double 3.000000e+00, align 8
+@d = global double 4.000000e+00, align 8
+
+; Function Attrs: nounwind
+define float @emit_xsaddsp() {
+entry:
+ %0 = load float, float* @a, align 4
+ %1 = load float, float* @b, align 4
+ %add = fadd float %0, %1
+ ret float %add
+; CHECK-LABEL: @emit_xsaddsp
+; CHECK: xsaddsp {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define float @emit_xssubsp() {
+entry:
+ %0 = load float, float* @a, align 4
+ %1 = load float, float* @b, align 4
+ %sub = fsub float %0, %1
+ ret float %sub
+; CHECK-LABEL: @emit_xssubsp
+; CHECK: xssubsp {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define float @emit_xsdivsp() {
+entry:
+ %0 = load float, float* @a, align 4
+ %1 = load float, float* @b, align 4
+ %div = fdiv float %0, %1
+ ret float %div
+; CHECK-LABEL: @emit_xsdivsp
+; CHECK: xsdivsp {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define float @emit_xsmulsp() {
+entry:
+ %0 = load float, float* @a, align 4
+ %1 = load float, float* @b, align 4
+ %mul = fmul float %0, %1
+ ret float %mul
+; CHECK-LABEL: @emit_xsmulsp
+; CHECK: xsmulsp {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define float @emit_xssqrtsp() {
+entry:
+ %0 = load float, float* @b, align 4
+ %call = call float @sqrtf(float %0)
+ ret float %call
+; CHECK-LABEL: @emit_xssqrtsp
+; CHECK: xssqrtsp {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+declare float @sqrtf(float)
+
+; Function Attrs: nounwind
+define double @emit_xsadddp() {
+entry:
+ %0 = load double, double* @c, align 8
+ %1 = load double, double* @d, align 8
+ %add = fadd double %0, %1
+ ret double %add
+; CHECK-LABEL: @emit_xsadddp
+; CHECK: xsadddp {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define double @emit_xssubdp() {
+entry:
+ %0 = load double, double* @c, align 8
+ %1 = load double, double* @d, align 8
+ %sub = fsub double %0, %1
+ ret double %sub
+; CHECK-LABEL: @emit_xssubdp
+; CHECK: xssubdp {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define double @emit_xsdivdp() {
+entry:
+ %0 = load double, double* @c, align 8
+ %1 = load double, double* @d, align 8
+ %div = fdiv double %0, %1
+ ret double %div
+; CHECK-LABEL: @emit_xsdivdp
+; CHECK: xsdivdp {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define double @emit_xsmuldp() {
+entry:
+ %0 = load double, double* @c, align 8
+ %1 = load double, double* @d, align 8
+ %mul = fmul double %0, %1
+ ret double %mul
+; CHECK-LABEL: @emit_xsmuldp
+; CHECK: xsmuldp {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define double @emit_xssqrtdp() {
+entry:
+ %0 = load double, double* @d, align 8
+ %call = call double @sqrt(double %0)
+ ret double %call
+; CHECK-LABEL: @emit_xssqrtdp
+; CHECK: xssqrtdp {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+declare double @sqrt(double)
diff --git a/test/CodeGen/PowerPC/vsx-fma-m.ll b/test/CodeGen/PowerPC/vsx-fma-m.ll
index c492e169e10f..d85927396e3e 100644
--- a/test/CodeGen/PowerPC/vsx-fma-m.ll
+++ b/test/CodeGen/PowerPC/vsx-fma-m.ll
@@ -12,7 +12,7 @@ entry:
%0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
store double %0, double* %d, align 8
%1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
- %arrayidx1 = getelementptr inbounds double* %d, i64 1
+ %arrayidx1 = getelementptr inbounds double, double* %d, i64 1
store double %1, double* %arrayidx1, align 8
ret void
@@ -39,10 +39,10 @@ entry:
%0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
store double %0, double* %d, align 8
%1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
- %arrayidx1 = getelementptr inbounds double* %d, i64 1
+ %arrayidx1 = getelementptr inbounds double, double* %d, i64 1
store double %1, double* %arrayidx1, align 8
%2 = tail call double @llvm.fma.f64(double %b, double %f, double %a)
- %arrayidx2 = getelementptr inbounds double* %d, i64 2
+ %arrayidx2 = getelementptr inbounds double, double* %d, i64 2
store double %2, double* %arrayidx2, align 8
ret void
@@ -77,12 +77,12 @@ entry:
store double %0, double* %d, align 8
%1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
%2 = tail call double @llvm.fma.f64(double %b, double %c, double %1)
- %arrayidx1 = getelementptr inbounds double* %d, i64 3
+ %arrayidx1 = getelementptr inbounds double, double* %d, i64 3
store double %2, double* %arrayidx1, align 8
%3 = tail call double @llvm.fma.f64(double %b, double %f, double %a)
- %arrayidx2 = getelementptr inbounds double* %d, i64 2
+ %arrayidx2 = getelementptr inbounds double, double* %d, i64 2
store double %3, double* %arrayidx2, align 8
- %arrayidx3 = getelementptr inbounds double* %d, i64 1
+ %arrayidx3 = getelementptr inbounds double, double* %d, i64 1
store double %1, double* %arrayidx3, align 8
ret void
@@ -98,9 +98,9 @@ entry:
; re-ordering the instructions.
; CHECK-DAG: xsmaddadp [[F1]], 2, 3
-; CHECK-DAG: xsmaddmdp 2, 3, 4
+; CHECK-DAG: xsmaddmdp 3, 2, 4
; CHECK-DAG: stxsdx [[F1]], 0, 8
-; CHECK-DAG: stxsdx 2, 8, [[C1]]
+; CHECK-DAG: stxsdx 3, 8, [[C1]]
; CHECK-DAG: stxsdx 1, 8, [[C2]]
; CHECK-DAG: stxsdx 4, 8, [[C3]]
; CHECK: blr
@@ -125,13 +125,13 @@ entry:
%0 = tail call double @llvm.fma.f64(double %b, double %c, double %a)
store double %0, double* %d, align 8
%1 = tail call double @llvm.fma.f64(double %b, double %e, double %a)
- %arrayidx1 = getelementptr inbounds double* %d, i64 1
+ %arrayidx1 = getelementptr inbounds double, double* %d, i64 1
store double %1, double* %arrayidx1, align 8
%2 = tail call double @llvm.fma.f64(double %b, double %c, double %1)
- %arrayidx3 = getelementptr inbounds double* %d, i64 3
+ %arrayidx3 = getelementptr inbounds double, double* %d, i64 3
store double %2, double* %arrayidx3, align 8
%3 = tail call double @llvm.fma.f64(double %b, double %f, double %a)
- %arrayidx4 = getelementptr inbounds double* %d, i64 2
+ %arrayidx4 = getelementptr inbounds double, double* %d, i64 2
store double %3, double* %arrayidx4, align 8
ret void
@@ -178,7 +178,7 @@ entry:
%0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
store <2 x double> %0, <2 x double>* %d, align 8
%1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
- %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1
+ %arrayidx1 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 1
store <2 x double> %1, <2 x double>* %arrayidx1, align 8
ret void
@@ -205,10 +205,10 @@ entry:
%0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
store <2 x double> %0, <2 x double>* %d, align 8
%1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
- %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1
+ %arrayidx1 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 1
store <2 x double> %1, <2 x double>* %arrayidx1, align 8
%2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
- %arrayidx2 = getelementptr inbounds <2 x double>* %d, i64 2
+ %arrayidx2 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 2
store <2 x double> %2, <2 x double>* %arrayidx2, align 8
ret void
@@ -243,12 +243,12 @@ entry:
store <2 x double> %0, <2 x double>* %d, align 8
%1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
%2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1)
- %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 3
+ %arrayidx1 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 3
store <2 x double> %2, <2 x double>* %arrayidx1, align 8
%3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
- %arrayidx2 = getelementptr inbounds <2 x double>* %d, i64 2
+ %arrayidx2 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 2
store <2 x double> %3, <2 x double>* %arrayidx2, align 8
- %arrayidx3 = getelementptr inbounds <2 x double>* %d, i64 1
+ %arrayidx3 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 1
store <2 x double> %1, <2 x double>* %arrayidx3, align 8
ret void
@@ -300,13 +300,13 @@ entry:
%0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a)
store <2 x double> %0, <2 x double>* %d, align 8
%1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a)
- %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1
+ %arrayidx1 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 1
store <2 x double> %1, <2 x double>* %arrayidx1, align 8
%2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1)
- %arrayidx3 = getelementptr inbounds <2 x double>* %d, i64 3
+ %arrayidx3 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 3
store <2 x double> %2, <2 x double>* %arrayidx3, align 8
%3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a)
- %arrayidx4 = getelementptr inbounds <2 x double>* %d, i64 2
+ %arrayidx4 = getelementptr inbounds <2 x double>, <2 x double>* %d, i64 2
store <2 x double> %3, <2 x double>* %arrayidx4, align 8
ret void
diff --git a/test/CodeGen/PowerPC/vsx-infl-copy1.ll b/test/CodeGen/PowerPC/vsx-infl-copy1.ll
new file mode 100644
index 000000000000..531e3ad2d87c
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx-infl-copy1.ll
@@ -0,0 +1,133 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@ub = external global [1024 x i32], align 4
+@uc = external global [1024 x i32], align 4
+
+; Function Attrs: noinline nounwind
+define void @_Z8example9Pj() #0 {
+entry:
+ br label %vector.body
+
+; CHECK-LABEL: @_Z8example9Pj
+; CHECK: xxlor
+
+vector.body: ; preds = %vector.body, %entry
+ %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+ %vec.phi = phi <4 x i32> [ zeroinitializer, %entry ], [ %43, %vector.body ]
+ %vec.phi20 = phi <4 x i32> [ zeroinitializer, %entry ], [ %44, %vector.body ]
+ %vec.phi21 = phi <4 x i32> [ zeroinitializer, %entry ], [ %45, %vector.body ]
+ %vec.phi23 = phi <4 x i32> [ zeroinitializer, %entry ], [ %46, %vector.body ]
+ %vec.phi24 = phi <4 x i32> [ zeroinitializer, %entry ], [ %47, %vector.body ]
+ %vec.phi25 = phi <4 x i32> [ zeroinitializer, %entry ], [ %48, %vector.body ]
+ %vec.phi26 = phi <4 x i32> [ zeroinitializer, %entry ], [ %49, %vector.body ]
+ %vec.phi27 = phi <4 x i32> [ zeroinitializer, %entry ], [ %50, %vector.body ]
+ %vec.phi28 = phi <4 x i32> [ zeroinitializer, %entry ], [ %51, %vector.body ]
+ %vec.phi29 = phi <4 x i32> [ zeroinitializer, %entry ], [ %52, %vector.body ]
+ %vec.phi30 = phi <4 x i32> [ zeroinitializer, %entry ], [ %53, %vector.body ]
+ %wide.load32 = load <4 x i32>, <4 x i32>* null, align 4
+ %.sum82 = add i64 %index, 24
+ %0 = getelementptr [1024 x i32], [1024 x i32]* @ub, i64 0, i64 %.sum82
+ %1 = bitcast i32* %0 to <4 x i32>*
+ %wide.load36 = load <4 x i32>, <4 x i32>* %1, align 4
+ %wide.load37 = load <4 x i32>, <4 x i32>* undef, align 4
+ %.sum84 = add i64 %index, 32
+ %2 = getelementptr [1024 x i32], [1024 x i32]* @ub, i64 0, i64 %.sum84
+ %3 = bitcast i32* %2 to <4 x i32>*
+ %wide.load38 = load <4 x i32>, <4 x i32>* %3, align 4
+ %.sum85 = add i64 %index, 36
+ %4 = getelementptr [1024 x i32], [1024 x i32]* @ub, i64 0, i64 %.sum85
+ %5 = bitcast i32* %4 to <4 x i32>*
+ %wide.load39 = load <4 x i32>, <4 x i32>* %5, align 4
+ %6 = getelementptr [1024 x i32], [1024 x i32]* @ub, i64 0, i64 undef
+ %7 = bitcast i32* %6 to <4 x i32>*
+ %wide.load40 = load <4 x i32>, <4 x i32>* %7, align 4
+ %.sum87 = add i64 %index, 44
+ %8 = getelementptr [1024 x i32], [1024 x i32]* @ub, i64 0, i64 %.sum87
+ %9 = bitcast i32* %8 to <4 x i32>*
+ %wide.load41 = load <4 x i32>, <4 x i32>* %9, align 4
+ %10 = getelementptr inbounds [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %index
+ %11 = bitcast i32* %10 to <4 x i32>*
+ %wide.load42 = load <4 x i32>, <4 x i32>* %11, align 4
+ %.sum8889 = or i64 %index, 4
+ %12 = getelementptr [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %.sum8889
+ %13 = bitcast i32* %12 to <4 x i32>*
+ %wide.load43 = load <4 x i32>, <4 x i32>* %13, align 4
+ %.sum9091 = or i64 %index, 8
+ %14 = getelementptr [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %.sum9091
+ %15 = bitcast i32* %14 to <4 x i32>*
+ %wide.load44 = load <4 x i32>, <4 x i32>* %15, align 4
+ %.sum94 = add i64 %index, 16
+ %16 = getelementptr [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %.sum94
+ %17 = bitcast i32* %16 to <4 x i32>*
+ %wide.load46 = load <4 x i32>, <4 x i32>* %17, align 4
+ %.sum95 = add i64 %index, 20
+ %18 = getelementptr [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %.sum95
+ %19 = bitcast i32* %18 to <4 x i32>*
+ %wide.load47 = load <4 x i32>, <4 x i32>* %19, align 4
+ %20 = getelementptr [1024 x i32], [1024 x i32]* @uc, i64 0, i64 undef
+ %21 = bitcast i32* %20 to <4 x i32>*
+ %wide.load48 = load <4 x i32>, <4 x i32>* %21, align 4
+ %.sum97 = add i64 %index, 28
+ %22 = getelementptr [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %.sum97
+ %23 = bitcast i32* %22 to <4 x i32>*
+ %wide.load49 = load <4 x i32>, <4 x i32>* %23, align 4
+ %.sum98 = add i64 %index, 32
+ %24 = getelementptr [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %.sum98
+ %25 = bitcast i32* %24 to <4 x i32>*
+ %wide.load50 = load <4 x i32>, <4 x i32>* %25, align 4
+ %.sum99 = add i64 %index, 36
+ %26 = getelementptr [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %.sum99
+ %27 = bitcast i32* %26 to <4 x i32>*
+ %wide.load51 = load <4 x i32>, <4 x i32>* %27, align 4
+ %.sum100 = add i64 %index, 40
+ %28 = getelementptr [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %.sum100
+ %29 = bitcast i32* %28 to <4 x i32>*
+ %wide.load52 = load <4 x i32>, <4 x i32>* %29, align 4
+ %.sum101 = add i64 %index, 44
+ %30 = getelementptr [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %.sum101
+ %31 = bitcast i32* %30 to <4 x i32>*
+ %wide.load53 = load <4 x i32>, <4 x i32>* %31, align 4
+ %32 = add <4 x i32> zeroinitializer, %vec.phi
+ %33 = add <4 x i32> zeroinitializer, %vec.phi20
+ %34 = add <4 x i32> %wide.load32, %vec.phi21
+ %35 = add <4 x i32> zeroinitializer, %vec.phi23
+ %36 = add <4 x i32> zeroinitializer, %vec.phi24
+ %37 = add <4 x i32> %wide.load36, %vec.phi25
+ %38 = add <4 x i32> %wide.load37, %vec.phi26
+ %39 = add <4 x i32> %wide.load38, %vec.phi27
+ %40 = add <4 x i32> %wide.load39, %vec.phi28
+ %41 = add <4 x i32> %wide.load40, %vec.phi29
+ %42 = add <4 x i32> %wide.load41, %vec.phi30
+ %43 = sub <4 x i32> %32, %wide.load42
+ %44 = sub <4 x i32> %33, %wide.load43
+ %45 = sub <4 x i32> %34, %wide.load44
+ %46 = sub <4 x i32> %35, %wide.load46
+ %47 = sub <4 x i32> %36, %wide.load47
+ %48 = sub <4 x i32> %37, %wide.load48
+ %49 = sub <4 x i32> %38, %wide.load49
+ %50 = sub <4 x i32> %39, %wide.load50
+ %51 = sub <4 x i32> %40, %wide.load51
+ %52 = sub <4 x i32> %41, %wide.load52
+ %53 = sub <4 x i32> %42, %wide.load53
+ %index.next = add i64 %index, 48
+ br i1 false, label %middle.block, label %vector.body
+
+middle.block: ; preds = %vector.body
+ %.lcssa112 = phi <4 x i32> [ %53, %vector.body ]
+ %.lcssa111 = phi <4 x i32> [ %52, %vector.body ]
+ %.lcssa110 = phi <4 x i32> [ %51, %vector.body ]
+ %.lcssa109 = phi <4 x i32> [ %50, %vector.body ]
+ %.lcssa108 = phi <4 x i32> [ %49, %vector.body ]
+ %.lcssa107 = phi <4 x i32> [ %48, %vector.body ]
+ %.lcssa106 = phi <4 x i32> [ %47, %vector.body ]
+ %.lcssa105 = phi <4 x i32> [ %46, %vector.body ]
+ %.lcssa103 = phi <4 x i32> [ %45, %vector.body ]
+ %.lcssa102 = phi <4 x i32> [ %44, %vector.body ]
+ %.lcssa = phi <4 x i32> [ %43, %vector.body ]
+ ret void
+}
+
+attributes #0 = { noinline nounwind }
+
diff --git a/test/CodeGen/PowerPC/vsx-infl-copy2.ll b/test/CodeGen/PowerPC/vsx-infl-copy2.ll
new file mode 100644
index 000000000000..32d6f1e68bd8
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx-infl-copy2.ll
@@ -0,0 +1,114 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @_Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc(i32* nocapture readonly %first) #0 {
+entry:
+ br i1 false, label %loop2_start, label %if.end5
+
+; CHECK-LABEL: @_Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc
+
+loop2_start: ; preds = %loop2_start, %entry
+ br i1 undef, label %loop2_start, label %if.then.i31
+
+if.end5: ; preds = %entry
+ br i1 undef, label %loop_start.preheader, label %if.then.i31
+
+loop_start.preheader: ; preds = %if.end5
+ br i1 false, label %middle.block, label %vector.body
+
+vector.body: ; preds = %vector.body, %loop_start.preheader
+ %vec.phi61 = phi <4 x i32> [ %34, %vector.body ], [ zeroinitializer, %loop_start.preheader ]
+ %vec.phi62 = phi <4 x i32> [ %35, %vector.body ], [ zeroinitializer, %loop_start.preheader ]
+ %vec.phi63 = phi <4 x i32> [ %36, %vector.body ], [ zeroinitializer, %loop_start.preheader ]
+ %vec.phi65 = phi <4 x i32> [ %37, %vector.body ], [ zeroinitializer, %loop_start.preheader ]
+ %vec.phi67 = phi <4 x i32> [ %38, %vector.body ], [ zeroinitializer, %loop_start.preheader ]
+ %vec.phi68 = phi <4 x i32> [ %39, %vector.body ], [ zeroinitializer, %loop_start.preheader ]
+ %vec.phi69 = phi <4 x i32> [ %40, %vector.body ], [ zeroinitializer, %loop_start.preheader ]
+ %vec.phi70 = phi <4 x i32> [ %41, %vector.body ], [ zeroinitializer, %loop_start.preheader ]
+ %vec.phi71 = phi <4 x i32> [ %42, %vector.body ], [ zeroinitializer, %loop_start.preheader ]
+ %.sum = add i64 0, 4
+ %wide.load72 = load <4 x i32>, <4 x i32>* null, align 4
+ %.sum109 = add i64 0, 8
+ %0 = getelementptr i32, i32* %first, i64 %.sum109
+ %1 = bitcast i32* %0 to <4 x i32>*
+ %wide.load73 = load <4 x i32>, <4 x i32>* %1, align 4
+ %.sum110 = add i64 0, 12
+ %2 = getelementptr i32, i32* %first, i64 %.sum110
+ %3 = bitcast i32* %2 to <4 x i32>*
+ %wide.load74 = load <4 x i32>, <4 x i32>* %3, align 4
+ %.sum112 = add i64 0, 20
+ %4 = getelementptr i32, i32* %first, i64 %.sum112
+ %5 = bitcast i32* %4 to <4 x i32>*
+ %wide.load76 = load <4 x i32>, <4 x i32>* %5, align 4
+ %.sum114 = add i64 0, 28
+ %6 = getelementptr i32, i32* %first, i64 %.sum114
+ %7 = bitcast i32* %6 to <4 x i32>*
+ %wide.load78 = load <4 x i32>, <4 x i32>* %7, align 4
+ %.sum115 = add i64 0, 32
+ %8 = getelementptr i32, i32* %first, i64 %.sum115
+ %9 = bitcast i32* %8 to <4 x i32>*
+ %wide.load79 = load <4 x i32>, <4 x i32>* %9, align 4
+ %.sum116 = add i64 0, 36
+ %10 = getelementptr i32, i32* %first, i64 %.sum116
+ %11 = bitcast i32* %10 to <4 x i32>*
+ %wide.load80 = load <4 x i32>, <4 x i32>* %11, align 4
+ %.sum117 = add i64 0, 40
+ %12 = getelementptr i32, i32* %first, i64 %.sum117
+ %13 = bitcast i32* %12 to <4 x i32>*
+ %wide.load81 = load <4 x i32>, <4 x i32>* %13, align 4
+ %.sum118 = add i64 0, 44
+ %14 = getelementptr i32, i32* %first, i64 %.sum118
+ %15 = bitcast i32* %14 to <4 x i32>*
+ %wide.load82 = load <4 x i32>, <4 x i32>* %15, align 4
+ %16 = mul <4 x i32> %wide.load72, <i32 269850533, i32 269850533, i32 269850533, i32 269850533>
+ %17 = mul <4 x i32> %wide.load73, <i32 269850533, i32 269850533, i32 269850533, i32 269850533>
+ %18 = mul <4 x i32> %wide.load74, <i32 269850533, i32 269850533, i32 269850533, i32 269850533>
+ %19 = mul <4 x i32> %wide.load76, <i32 269850533, i32 269850533, i32 269850533, i32 269850533>
+ %20 = mul <4 x i32> %wide.load78, <i32 269850533, i32 269850533, i32 269850533, i32 269850533>
+ %21 = mul <4 x i32> %wide.load79, <i32 269850533, i32 269850533, i32 269850533, i32 269850533>
+ %22 = mul <4 x i32> %wide.load80, <i32 269850533, i32 269850533, i32 269850533, i32 269850533>
+ %23 = mul <4 x i32> %wide.load81, <i32 269850533, i32 269850533, i32 269850533, i32 269850533>
+ %24 = mul <4 x i32> %wide.load82, <i32 269850533, i32 269850533, i32 269850533, i32 269850533>
+ %25 = add <4 x i32> %16, <i32 -1138325064, i32 -1138325064, i32 -1138325064, i32 -1138325064>
+ %26 = add <4 x i32> %17, <i32 -1138325064, i32 -1138325064, i32 -1138325064, i32 -1138325064>
+ %27 = add <4 x i32> %18, <i32 -1138325064, i32 -1138325064, i32 -1138325064, i32 -1138325064>
+ %28 = add <4 x i32> %19, <i32 -1138325064, i32 -1138325064, i32 -1138325064, i32 -1138325064>
+ %29 = add <4 x i32> %20, <i32 -1138325064, i32 -1138325064, i32 -1138325064, i32 -1138325064>
+ %30 = add <4 x i32> %21, <i32 -1138325064, i32 -1138325064, i32 -1138325064, i32 -1138325064>
+ %31 = add <4 x i32> %22, <i32 -1138325064, i32 -1138325064, i32 -1138325064, i32 -1138325064>
+ %32 = add <4 x i32> %23, <i32 -1138325064, i32 -1138325064, i32 -1138325064, i32 -1138325064>
+ %33 = add <4 x i32> %24, <i32 -1138325064, i32 -1138325064, i32 -1138325064, i32 -1138325064>
+ %34 = add nsw <4 x i32> %25, %vec.phi61
+ %35 = add nsw <4 x i32> %26, %vec.phi62
+ %36 = add nsw <4 x i32> %27, %vec.phi63
+ %37 = add nsw <4 x i32> %28, %vec.phi65
+ %38 = add nsw <4 x i32> %29, %vec.phi67
+ %39 = add nsw <4 x i32> %30, %vec.phi68
+ %40 = add nsw <4 x i32> %31, %vec.phi69
+ %41 = add nsw <4 x i32> %32, %vec.phi70
+ %42 = add nsw <4 x i32> %33, %vec.phi71
+ br i1 false, label %middle.block, label %vector.body
+
+middle.block: ; preds = %vector.body, %loop_start.preheader
+ %rdx.vec.exit.phi85 = phi <4 x i32> [ zeroinitializer, %loop_start.preheader ], [ %34, %vector.body ]
+ %rdx.vec.exit.phi86 = phi <4 x i32> [ zeroinitializer, %loop_start.preheader ], [ %35, %vector.body ]
+ %rdx.vec.exit.phi87 = phi <4 x i32> [ zeroinitializer, %loop_start.preheader ], [ %36, %vector.body ]
+ %rdx.vec.exit.phi89 = phi <4 x i32> [ zeroinitializer, %loop_start.preheader ], [ %37, %vector.body ]
+ %rdx.vec.exit.phi91 = phi <4 x i32> [ zeroinitializer, %loop_start.preheader ], [ %38, %vector.body ]
+ %rdx.vec.exit.phi92 = phi <4 x i32> [ zeroinitializer, %loop_start.preheader ], [ %39, %vector.body ]
+ %rdx.vec.exit.phi93 = phi <4 x i32> [ zeroinitializer, %loop_start.preheader ], [ %40, %vector.body ]
+ %rdx.vec.exit.phi94 = phi <4 x i32> [ zeroinitializer, %loop_start.preheader ], [ %41, %vector.body ]
+ %rdx.vec.exit.phi95 = phi <4 x i32> [ zeroinitializer, %loop_start.preheader ], [ %42, %vector.body ]
+ br i1 false, label %if.then.i31, label %loop_start.prol
+
+loop_start.prol: ; preds = %loop_start.prol, %middle.block
+ br label %loop_start.prol
+
+if.then.i31: ; preds = %middle.block, %if.end5, %loop2_start
+ unreachable
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll b/test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll
index 7367672eab8b..d6940e46df37 100644
--- a/test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll
+++ b/test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll
@@ -1,7 +1,6 @@
; RUN: llc -mcpu=pwr8 -mattr=+vsx -O2 -mtriple=powerpc64le-unknown-linux-gnu < %s > %t
; RUN: grep lxvd2x < %t | count 18
; RUN: grep stxvd2x < %t | count 18
-; RUN: grep xxpermdi < %t | count 36
@vf = global <4 x float> <float -1.500000e+00, float 2.500000e+00, float -3.500000e+00, float 4.500000e+00>, align 16
@vd = global <2 x double> <double 3.500000e+00, double -7.500000e+00>, align 16
@@ -51,117 +50,117 @@ entry:
%__b.addr.i = alloca <4 x i32>*, align 8
store i32 0, i32* %__a.addr.i, align 4
store <4 x i32>* @vsi, <4 x i32>** %__b.addr.i, align 8
- %0 = load i32* %__a.addr.i, align 4
- %1 = load <4 x i32>** %__b.addr.i, align 8
+ %0 = load i32, i32* %__a.addr.i, align 4
+ %1 = load <4 x i32>*, <4 x i32>** %__b.addr.i, align 8
%2 = bitcast <4 x i32>* %1 to i8*
- %3 = getelementptr i8* %2, i32 %0
+ %3 = getelementptr i8, i8* %2, i32 %0
%4 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %3)
store <4 x i32> %4, <4 x i32>* @res_vsi, align 16
store i32 0, i32* %__a.addr.i31, align 4
store <4 x i32>* @vui, <4 x i32>** %__b.addr.i32, align 8
- %5 = load i32* %__a.addr.i31, align 4
- %6 = load <4 x i32>** %__b.addr.i32, align 8
+ %5 = load i32, i32* %__a.addr.i31, align 4
+ %6 = load <4 x i32>*, <4 x i32>** %__b.addr.i32, align 8
%7 = bitcast <4 x i32>* %6 to i8*
- %8 = getelementptr i8* %7, i32 %5
+ %8 = getelementptr i8, i8* %7, i32 %5
%9 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %8)
store <4 x i32> %9, <4 x i32>* @res_vui, align 16
store i32 0, i32* %__a.addr.i29, align 4
store <4 x float>* @vf, <4 x float>** %__b.addr.i30, align 8
- %10 = load i32* %__a.addr.i29, align 4
- %11 = load <4 x float>** %__b.addr.i30, align 8
+ %10 = load i32, i32* %__a.addr.i29, align 4
+ %11 = load <4 x float>*, <4 x float>** %__b.addr.i30, align 8
%12 = bitcast <4 x float>* %11 to i8*
- %13 = getelementptr i8* %12, i32 %10
+ %13 = getelementptr i8, i8* %12, i32 %10
%14 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %13)
%15 = bitcast <4 x i32> %14 to <4 x float>
store <4 x float> %15, <4 x float>* @res_vf, align 16
store i32 0, i32* %__a.addr.i27, align 4
store <2 x i64>* @vsll, <2 x i64>** %__b.addr.i28, align 8
- %16 = load i32* %__a.addr.i27, align 4
- %17 = load <2 x i64>** %__b.addr.i28, align 8
+ %16 = load i32, i32* %__a.addr.i27, align 4
+ %17 = load <2 x i64>*, <2 x i64>** %__b.addr.i28, align 8
%18 = bitcast <2 x i64>* %17 to i8*
- %19 = getelementptr i8* %18, i32 %16
+ %19 = getelementptr i8, i8* %18, i32 %16
%20 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %19)
%21 = bitcast <2 x double> %20 to <2 x i64>
store <2 x i64> %21, <2 x i64>* @res_vsll, align 16
store i32 0, i32* %__a.addr.i25, align 4
store <2 x i64>* @vull, <2 x i64>** %__b.addr.i26, align 8
- %22 = load i32* %__a.addr.i25, align 4
- %23 = load <2 x i64>** %__b.addr.i26, align 8
+ %22 = load i32, i32* %__a.addr.i25, align 4
+ %23 = load <2 x i64>*, <2 x i64>** %__b.addr.i26, align 8
%24 = bitcast <2 x i64>* %23 to i8*
- %25 = getelementptr i8* %24, i32 %22
+ %25 = getelementptr i8, i8* %24, i32 %22
%26 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %25)
%27 = bitcast <2 x double> %26 to <2 x i64>
store <2 x i64> %27, <2 x i64>* @res_vull, align 16
store i32 0, i32* %__a.addr.i23, align 4
store <2 x double>* @vd, <2 x double>** %__b.addr.i24, align 8
- %28 = load i32* %__a.addr.i23, align 4
- %29 = load <2 x double>** %__b.addr.i24, align 8
+ %28 = load i32, i32* %__a.addr.i23, align 4
+ %29 = load <2 x double>*, <2 x double>** %__b.addr.i24, align 8
%30 = bitcast <2 x double>* %29 to i8*
- %31 = getelementptr i8* %30, i32 %28
+ %31 = getelementptr i8, i8* %30, i32 %28
%32 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %31)
store <2 x double> %32, <2 x double>* @res_vd, align 16
- %33 = load <4 x i32>* @vsi, align 16
+ %33 = load <4 x i32>, <4 x i32>* @vsi, align 16
store <4 x i32> %33, <4 x i32>* %__a.addr.i20, align 16
store i32 0, i32* %__b.addr.i21, align 4
store <4 x i32>* @res_vsi, <4 x i32>** %__c.addr.i22, align 8
- %34 = load <4 x i32>* %__a.addr.i20, align 16
- %35 = load i32* %__b.addr.i21, align 4
- %36 = load <4 x i32>** %__c.addr.i22, align 8
+ %34 = load <4 x i32>, <4 x i32>* %__a.addr.i20, align 16
+ %35 = load i32, i32* %__b.addr.i21, align 4
+ %36 = load <4 x i32>*, <4 x i32>** %__c.addr.i22, align 8
%37 = bitcast <4 x i32>* %36 to i8*
- %38 = getelementptr i8* %37, i32 %35
+ %38 = getelementptr i8, i8* %37, i32 %35
call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %34, i8* %38)
- %39 = load <4 x i32>* @vui, align 16
+ %39 = load <4 x i32>, <4 x i32>* @vui, align 16
store <4 x i32> %39, <4 x i32>* %__a.addr.i17, align 16
store i32 0, i32* %__b.addr.i18, align 4
store <4 x i32>* @res_vui, <4 x i32>** %__c.addr.i19, align 8
- %40 = load <4 x i32>* %__a.addr.i17, align 16
- %41 = load i32* %__b.addr.i18, align 4
- %42 = load <4 x i32>** %__c.addr.i19, align 8
+ %40 = load <4 x i32>, <4 x i32>* %__a.addr.i17, align 16
+ %41 = load i32, i32* %__b.addr.i18, align 4
+ %42 = load <4 x i32>*, <4 x i32>** %__c.addr.i19, align 8
%43 = bitcast <4 x i32>* %42 to i8*
- %44 = getelementptr i8* %43, i32 %41
+ %44 = getelementptr i8, i8* %43, i32 %41
call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %40, i8* %44)
- %45 = load <4 x float>* @vf, align 16
+ %45 = load <4 x float>, <4 x float>* @vf, align 16
store <4 x float> %45, <4 x float>* %__a.addr.i14, align 16
store i32 0, i32* %__b.addr.i15, align 4
store <4 x float>* @res_vf, <4 x float>** %__c.addr.i16, align 8
- %46 = load <4 x float>* %__a.addr.i14, align 16
+ %46 = load <4 x float>, <4 x float>* %__a.addr.i14, align 16
%47 = bitcast <4 x float> %46 to <4 x i32>
- %48 = load i32* %__b.addr.i15, align 4
- %49 = load <4 x float>** %__c.addr.i16, align 8
+ %48 = load i32, i32* %__b.addr.i15, align 4
+ %49 = load <4 x float>*, <4 x float>** %__c.addr.i16, align 8
%50 = bitcast <4 x float>* %49 to i8*
- %51 = getelementptr i8* %50, i32 %48
+ %51 = getelementptr i8, i8* %50, i32 %48
call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %47, i8* %51) #1
- %52 = load <2 x i64>* @vsll, align 16
+ %52 = load <2 x i64>, <2 x i64>* @vsll, align 16
store <2 x i64> %52, <2 x i64>* %__a.addr.i11, align 16
store i32 0, i32* %__b.addr.i12, align 4
store <2 x i64>* @res_vsll, <2 x i64>** %__c.addr.i13, align 8
- %53 = load <2 x i64>* %__a.addr.i11, align 16
+ %53 = load <2 x i64>, <2 x i64>* %__a.addr.i11, align 16
%54 = bitcast <2 x i64> %53 to <2 x double>
- %55 = load i32* %__b.addr.i12, align 4
- %56 = load <2 x i64>** %__c.addr.i13, align 8
+ %55 = load i32, i32* %__b.addr.i12, align 4
+ %56 = load <2 x i64>*, <2 x i64>** %__c.addr.i13, align 8
%57 = bitcast <2 x i64>* %56 to i8*
- %58 = getelementptr i8* %57, i32 %55
+ %58 = getelementptr i8, i8* %57, i32 %55
call void @llvm.ppc.vsx.stxvd2x(<2 x double> %54, i8* %58)
- %59 = load <2 x i64>* @vull, align 16
+ %59 = load <2 x i64>, <2 x i64>* @vull, align 16
store <2 x i64> %59, <2 x i64>* %__a.addr.i8, align 16
store i32 0, i32* %__b.addr.i9, align 4
store <2 x i64>* @res_vull, <2 x i64>** %__c.addr.i10, align 8
- %60 = load <2 x i64>* %__a.addr.i8, align 16
+ %60 = load <2 x i64>, <2 x i64>* %__a.addr.i8, align 16
%61 = bitcast <2 x i64> %60 to <2 x double>
- %62 = load i32* %__b.addr.i9, align 4
- %63 = load <2 x i64>** %__c.addr.i10, align 8
+ %62 = load i32, i32* %__b.addr.i9, align 4
+ %63 = load <2 x i64>*, <2 x i64>** %__c.addr.i10, align 8
%64 = bitcast <2 x i64>* %63 to i8*
- %65 = getelementptr i8* %64, i32 %62
+ %65 = getelementptr i8, i8* %64, i32 %62
call void @llvm.ppc.vsx.stxvd2x(<2 x double> %61, i8* %65)
- %66 = load <2 x double>* @vd, align 16
+ %66 = load <2 x double>, <2 x double>* @vd, align 16
store <2 x double> %66, <2 x double>* %__a.addr.i6, align 16
store i32 0, i32* %__b.addr.i7, align 4
store <2 x double>* @res_vd, <2 x double>** %__c.addr.i, align 8
- %67 = load <2 x double>* %__a.addr.i6, align 16
- %68 = load i32* %__b.addr.i7, align 4
- %69 = load <2 x double>** %__c.addr.i, align 8
+ %67 = load <2 x double>, <2 x double>* %__a.addr.i6, align 16
+ %68 = load i32, i32* %__b.addr.i7, align 4
+ %69 = load <2 x double>*, <2 x double>** %__c.addr.i, align 8
%70 = bitcast <2 x double>* %69 to i8*
- %71 = getelementptr i8* %70, i32 %68
+ %71 = getelementptr i8, i8* %70, i32 %68
call void @llvm.ppc.vsx.stxvd2x(<2 x double> %67, i8* %71)
ret void
}
diff --git a/test/CodeGen/PowerPC/vsx-ldst.ll b/test/CodeGen/PowerPC/vsx-ldst.ll
index 688187d1fcb6..7f12b0480e13 100644
--- a/test/CodeGen/PowerPC/vsx-ldst.ll
+++ b/test/CodeGen/PowerPC/vsx-ldst.ll
@@ -12,7 +12,6 @@
; RUN: llc -mcpu=pwr8 -mattr=+vsx -O2 -mtriple=powerpc64le-unknown-linux-gnu < %s > %t
; RUN: grep lxvd2x < %t | count 6
; RUN: grep stxvd2x < %t | count 6
-; RUN: grep xxpermdi < %t | count 12
@vsi = global <4 x i32> <i32 -1, i32 2, i32 -3, i32 4>, align 16
@vui = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
@@ -30,12 +29,12 @@
; Function Attrs: nounwind
define void @test1() {
entry:
- %0 = load <4 x i32>* @vsi, align 16
- %1 = load <4 x i32>* @vui, align 16
- %2 = load <4 x i32>* bitcast (<4 x float>* @vf to <4 x i32>*), align 16
- %3 = load <2 x double>* bitcast (<2 x i64>* @vsll to <2 x double>*), align 16
- %4 = load <2 x double>* bitcast (<2 x i64>* @vull to <2 x double>*), align 16
- %5 = load <2 x double>* @vd, align 16
+ %0 = load <4 x i32>, <4 x i32>* @vsi, align 16
+ %1 = load <4 x i32>, <4 x i32>* @vui, align 16
+ %2 = load <4 x i32>, <4 x i32>* bitcast (<4 x float>* @vf to <4 x i32>*), align 16
+ %3 = load <2 x double>, <2 x double>* bitcast (<2 x i64>* @vsll to <2 x double>*), align 16
+ %4 = load <2 x double>, <2 x double>* bitcast (<2 x i64>* @vull to <2 x double>*), align 16
+ %5 = load <2 x double>, <2 x double>* @vd, align 16
store <4 x i32> %0, <4 x i32>* @res_vsi, align 16
store <4 x i32> %1, <4 x i32>* @res_vui, align 16
store <4 x i32> %2, <4 x i32>* bitcast (<4 x float>* @res_vf to <4 x i32>*), align 16
diff --git a/test/CodeGen/PowerPC/vsx-minmax.ll b/test/CodeGen/PowerPC/vsx-minmax.ll
index 47f50abbc2a2..ad72cacae2a0 100644
--- a/test/CodeGen/PowerPC/vsx-minmax.ll
+++ b/test/CodeGen/PowerPC/vsx-minmax.ll
@@ -18,35 +18,35 @@ target triple = "powerpc64-unknown-linux-gnu"
define void @test1() #0 {
; CHECK-LABEL: @test1
entry:
- %0 = load volatile <4 x float>* @vf, align 16
- %1 = load volatile <4 x float>* @vf, align 16
+ %0 = load volatile <4 x float>, <4 x float>* @vf, align 16
+ %1 = load volatile <4 x float>, <4 x float>* @vf, align 16
%2 = tail call <4 x float> @llvm.ppc.vsx.xvmaxsp(<4 x float> %0, <4 x float> %1)
; CHECK: xvmaxsp
store <4 x float> %2, <4 x float>* @vf1, align 16
- %3 = load <2 x double>* @vd, align 16
+ %3 = load <2 x double>, <2 x double>* @vd, align 16
%4 = tail call <2 x double> @llvm.ppc.vsx.xvmaxdp(<2 x double> %3, <2 x double> %3)
; CHECK: xvmaxdp
store <2 x double> %4, <2 x double>* @vd1, align 16
- %5 = load volatile <4 x float>* @vf, align 16
- %6 = load volatile <4 x float>* @vf, align 16
+ %5 = load volatile <4 x float>, <4 x float>* @vf, align 16
+ %6 = load volatile <4 x float>, <4 x float>* @vf, align 16
%7 = tail call <4 x float> @llvm.ppc.vsx.xvmaxsp(<4 x float> %5, <4 x float> %6)
; CHECK: xvmaxsp
store <4 x float> %7, <4 x float>* @vf2, align 16
- %8 = load volatile <4 x float>* @vf, align 16
- %9 = load volatile <4 x float>* @vf, align 16
+ %8 = load volatile <4 x float>, <4 x float>* @vf, align 16
+ %9 = load volatile <4 x float>, <4 x float>* @vf, align 16
%10 = tail call <4 x float> @llvm.ppc.vsx.xvminsp(<4 x float> %8, <4 x float> %9)
; CHECK: xvminsp
store <4 x float> %10, <4 x float>* @vf3, align 16
- %11 = load <2 x double>* @vd, align 16
+ %11 = load <2 x double>, <2 x double>* @vd, align 16
%12 = tail call <2 x double> @llvm.ppc.vsx.xvmindp(<2 x double> %11, <2 x double> %11)
; CHECK: xvmindp
store <2 x double> %12, <2 x double>* @vd2, align 16
- %13 = load volatile <4 x float>* @vf, align 16
- %14 = load volatile <4 x float>* @vf, align 16
+ %13 = load volatile <4 x float>, <4 x float>* @vf, align 16
+ %14 = load volatile <4 x float>, <4 x float>* @vf, align 16
%15 = tail call <4 x float> @llvm.ppc.vsx.xvminsp(<4 x float> %13, <4 x float> %14)
; CHECK: xvminsp
store <4 x float> %15, <4 x float>* @vf4, align 16
- %16 = load double* @d, align 8
+ %16 = load double, double* @d, align 8
%17 = tail call double @llvm.ppc.vsx.xsmaxdp(double %16, double %16)
; CHECK: xsmaxdp
store double %17, double* @d1, align 8
diff --git a/test/CodeGen/PowerPC/vsx-p8.ll b/test/CodeGen/PowerPC/vsx-p8.ll
index d5a19059c60d..878714baab72 100644
--- a/test/CodeGen/PowerPC/vsx-p8.ll
+++ b/test/CodeGen/PowerPC/vsx-p8.ll
@@ -8,7 +8,7 @@ target triple = "powerpc64-unknown-linux-gnu"
; Unaligned loads/stores on P8 and later should use VSX where possible.
define <2 x double> @test28u(<2 x double>* %a) {
- %v = load <2 x double>* %a, align 8
+ %v = load <2 x double>, <2 x double>* %a, align 8
ret <2 x double> %v
; CHECK-LABEL: @test28u
@@ -26,7 +26,7 @@ define void @test29u(<2 x double>* %a, <2 x double> %b) {
}
define <4 x float> @test32u(<4 x float>* %a) {
- %v = load <4 x float>* %a, align 8
+ %v = load <4 x float>, <4 x float>* %a, align 8
ret <4 x float> %v
; CHECK-REG-LABEL: @test32u
diff --git a/test/CodeGen/PowerPC/vsx-recip-est.ll b/test/CodeGen/PowerPC/vsx-recip-est.ll
new file mode 100644
index 000000000000..f589c6c103e8
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx-recip-est.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -enable-unsafe-fp-math | FileCheck %s
+@a = global float 3.000000e+00, align 4
+@b = global float 4.000000e+00, align 4
+@c = global double 3.000000e+00, align 8
+@d = global double 4.000000e+00, align 8
+
+; Function Attrs: nounwind
+define float @emit_xsresp() {
+entry:
+ %0 = load float, float* @a, align 4
+ %1 = load float, float* @b, align 4
+ %div = fdiv fast float %0, %1
+ ret float %div
+; CHECK-LABEL: @emit_xsresp
+; CHECK: xsresp {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define float @emit_xsrsqrtesp(float %f) {
+entry:
+ %f.addr = alloca float, align 4
+ store float %f, float* %f.addr, align 4
+ %0 = load float, float* %f.addr, align 4
+ %1 = load float, float* @b, align 4
+ %2 = call float @llvm.sqrt.f32(float %1)
+ %div = fdiv fast float %0, %2
+ ret float %div
+; CHECK-LABEL: @emit_xsrsqrtesp
+; CHECK: xsrsqrtesp {{[0-9]+}}
+}
+
+; Function Attrs: nounwind readnone
+declare float @llvm.sqrt.f32(float)
+
+; Function Attrs: nounwind
+define double @emit_xsredp() {
+entry:
+ %0 = load double, double* @c, align 8
+ %1 = load double, double* @d, align 8
+ %div = fdiv fast double %0, %1
+ ret double %div
+; CHECK-LABEL: @emit_xsredp
+; CHECK: xsredp {{[0-9]+}}
+}
+
+; Function Attrs: nounwind
+define double @emit_xsrsqrtedp(double %f) {
+entry:
+ %f.addr = alloca double, align 8
+ store double %f, double* %f.addr, align 8
+ %0 = load double, double* %f.addr, align 8
+ %1 = load double, double* @d, align 8
+ %2 = call double @llvm.sqrt.f64(double %1)
+ %div = fdiv fast double %0, %2
+ ret double %div
+; CHECK-LABEL: @emit_xsrsqrtedp
+; CHECK: xsrsqrtedp {{[0-9]+}}
+}
+
+; Function Attrs: nounwind readnone
+declare double @llvm.sqrt.f64(double) #1
diff --git a/test/CodeGen/PowerPC/vsx-spill-norwstore.ll b/test/CodeGen/PowerPC/vsx-spill-norwstore.ll
new file mode 100644
index 000000000000..77b6cb29b24b
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx-spill-norwstore.ll
@@ -0,0 +1,63 @@
+; RUN: llc -mcpu=pwr7 -verify-machineinstrs < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@.str1 = external unnamed_addr constant [5 x i8], align 1
+@.str10 = external unnamed_addr constant [9 x i8], align 1
+
+; Function Attrs: nounwind
+define void @main() #0 {
+; CHECK-LABEL: @main
+; Make sure that the stxvd2x passes -verify-machineinstrs
+; CHECK: stxvd2x
+
+entry:
+ %0 = tail call <8 x i16> @llvm.ppc.altivec.vupkhsb(<16 x i8> <i8 0, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 -1, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1>) #0
+ %1 = tail call <8 x i16> @llvm.ppc.altivec.vupklsb(<16 x i8> <i8 0, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 -1, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1>) #0
+ br i1 false, label %if.then.i68.i, label %check.exit69.i
+
+if.then.i68.i: ; preds = %entry
+ unreachable
+
+check.exit69.i: ; preds = %entry
+ br i1 undef, label %if.then.i63.i, label %check.exit64.i
+
+if.then.i63.i: ; preds = %check.exit69.i
+ tail call void (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str10, i64 0, i64 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str1, i64 0, i64 0)) #0
+ br label %check.exit64.i
+
+check.exit64.i: ; preds = %if.then.i63.i, %check.exit69.i
+ %2 = tail call i32 @llvm.ppc.altivec.vcmpequh.p(i32 2, <8 x i16> %0, <8 x i16> <i16 0, i16 -1, i16 -1, i16 0, i16 0, i16 0, i16 -1, i16 0>) #0
+ %tobool.i55.i = icmp eq i32 %2, 0
+ br i1 %tobool.i55.i, label %if.then.i58.i, label %check.exit59.i
+
+if.then.i58.i: ; preds = %check.exit64.i
+ unreachable
+
+check.exit59.i: ; preds = %check.exit64.i
+ %3 = tail call i32 @llvm.ppc.altivec.vcmpequh.p(i32 2, <8 x i16> %1, <8 x i16> <i16 -1, i16 0, i16 0, i16 -1, i16 -1, i16 -1, i16 0, i16 -1>) #0
+ %tobool.i50.i = icmp eq i32 %3, 0
+ br i1 %tobool.i50.i, label %if.then.i53.i, label %check.exit54.i
+
+if.then.i53.i: ; preds = %check.exit59.i
+ unreachable
+
+check.exit54.i: ; preds = %check.exit59.i
+ unreachable
+}
+
+; Function Attrs: nounwind readnone
+declare <8 x i16> @llvm.ppc.altivec.vupkhsb(<16 x i8>) #1
+
+; Function Attrs: nounwind readnone
+declare <8 x i16> @llvm.ppc.altivec.vupklsb(<16 x i8>) #1
+
+; Function Attrs: nounwind
+declare void @printf(i8* nocapture readonly, ...) #0
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ppc.altivec.vcmpequh.p(i32, <8 x i16>, <8 x i16>) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
diff --git a/test/CodeGen/PowerPC/vsx.ll b/test/CodeGen/PowerPC/vsx.ll
index f91ffdb960bb..b185fed4cd5b 100644
--- a/test/CodeGen/PowerPC/vsx.ll
+++ b/test/CodeGen/PowerPC/vsx.ll
@@ -501,7 +501,7 @@ define <2 x i64> @test27(<2 x i64> %a, <2 x i64> %b) {
}
define <2 x double> @test28(<2 x double>* %a) {
- %v = load <2 x double>* %a, align 16
+ %v = load <2 x double>, <2 x double>* %a, align 16
ret <2 x double> %v
; CHECK-LABEL: @test28
@@ -519,7 +519,7 @@ define void @test29(<2 x double>* %a, <2 x double> %b) {
}
define <2 x double> @test28u(<2 x double>* %a) {
- %v = load <2 x double>* %a, align 8
+ %v = load <2 x double>, <2 x double>* %a, align 8
ret <2 x double> %v
; CHECK-LABEL: @test28u
@@ -537,7 +537,7 @@ define void @test29u(<2 x double>* %a, <2 x double> %b) {
}
define <2 x i64> @test30(<2 x i64>* %a) {
- %v = load <2 x i64>* %a, align 16
+ %v = load <2 x i64>, <2 x i64>* %a, align 16
ret <2 x i64> %v
; CHECK-REG-LABEL: @test30
@@ -562,7 +562,7 @@ define void @test31(<2 x i64>* %a, <2 x i64> %b) {
}
define <4 x float> @test32(<4 x float>* %a) {
- %v = load <4 x float>* %a, align 16
+ %v = load <4 x float>, <4 x float>* %a, align 16
ret <4 x float> %v
; CHECK-REG-LABEL: @test32
@@ -590,7 +590,7 @@ define void @test33(<4 x float>* %a, <4 x float> %b) {
}
define <4 x float> @test32u(<4 x float>* %a) {
- %v = load <4 x float>* %a, align 8
+ %v = load <4 x float>, <4 x float>* %a, align 8
ret <4 x float> %v
; CHECK-LABEL: @test32u
@@ -616,7 +616,7 @@ define void @test33u(<4 x float>* %a, <4 x float> %b) {
}
define <4 x i32> @test34(<4 x i32>* %a) {
- %v = load <4 x i32>* %a, align 16
+ %v = load <4 x i32>, <4 x i32>* %a, align 16
ret <4 x i32> %v
; CHECK-REG-LABEL: @test34
@@ -718,7 +718,7 @@ define <2 x i64> @test47(<2 x float> %a) {
}
define <2 x double> @test50(double* %a) {
- %v = load double* %a, align 8
+ %v = load double, double* %a, align 8
%w = insertelement <2 x double> undef, double %v, i32 0
%x = insertelement <2 x double> %w, double %v, i32 1
ret <2 x double> %x
@@ -733,7 +733,7 @@ define <2 x double> @test51(<2 x double> %a, <2 x double> %b) {
ret <2 x double> %v
; CHECK-LABEL: @test51
-; CHECK: xxpermdi 34, 34, 34, 0
+; CHECK: xxspltd 34, 34, 0
; CHECK: blr
}
@@ -742,7 +742,7 @@ define <2 x double> @test52(<2 x double> %a, <2 x double> %b) {
ret <2 x double> %v
; CHECK-LABEL: @test52
-; CHECK: xxpermdi 34, 34, 35, 0
+; CHECK: xxmrghd 34, 34, 35
; CHECK: blr
}
@@ -751,7 +751,7 @@ define <2 x double> @test53(<2 x double> %a, <2 x double> %b) {
ret <2 x double> %v
; CHECK-LABEL: @test53
-; CHECK: xxpermdi 34, 35, 34, 0
+; CHECK: xxmrghd 34, 35, 34
; CHECK: blr
}
@@ -769,7 +769,7 @@ define <2 x double> @test55(<2 x double> %a, <2 x double> %b) {
ret <2 x double> %v
; CHECK-LABEL: @test55
-; CHECK: xxpermdi 34, 34, 35, 3
+; CHECK: xxmrgld 34, 34, 35
; CHECK: blr
}
@@ -778,7 +778,7 @@ define <2 x i64> @test56(<2 x i64> %a, <2 x i64> %b) {
ret <2 x i64> %v
; CHECK-LABEL: @test56
-; CHECK: xxpermdi 34, 34, 35, 3
+; CHECK: xxmrgld 34, 34, 35
; CHECK: blr
}
@@ -843,11 +843,11 @@ define double @test64(<2 x double> %a) {
ret double %v
; CHECK-REG-LABEL: @test64
-; CHECK-REG: xxpermdi 1, 34, 34, 2
+; CHECK-REG: xxswapd 1, 34
; CHECK-REG: blr
; CHECK-FISL-LABEL: @test64
-; CHECK-FISL: xxpermdi 34, 34, 34, 2
+; CHECK-FISL: xxswapd 34, 34
; CHECK-FISL: xxlor 0, 34, 34
; CHECK-FISL: fmr 1, 0
; CHECK-FISL: blr
diff --git a/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
index 0a9df3779116..84bbdd75b0f7 100644
--- a/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
+++ b/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
@@ -1,35 +1,35 @@
; RUN: llc -mcpu=pwr8 -mattr=+vsx -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
define <2 x double> @testi0(<2 x double>* %p1, double* %p2) {
- %v = load <2 x double>* %p1
- %s = load double* %p2
+ %v = load <2 x double>, <2 x double>* %p1
+ %s = load double, double* %p2
%r = insertelement <2 x double> %v, double %s, i32 0
ret <2 x double> %r
; CHECK-LABEL: testi0
; CHECK: lxvd2x 0, 0, 3
; CHECK: lxsdx 34, 0, 4
-; CHECK: xxpermdi 0, 0, 0, 2
-; CHECK: xxpermdi 1, 34, 34, 0
+; CHECK: xxswapd 0, 0
+; CHECK: xxspltd 1, 34, 0
; CHECK: xxpermdi 34, 0, 1, 1
}
define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
- %v = load <2 x double>* %p1
- %s = load double* %p2
+ %v = load <2 x double>, <2 x double>* %p1
+ %s = load double, double* %p2
%r = insertelement <2 x double> %v, double %s, i32 1
ret <2 x double> %r
; CHECK-LABEL: testi1
; CHECK: lxvd2x 0, 0, 3
; CHECK: lxsdx 34, 0, 4
-; CHECK: xxpermdi 0, 0, 0, 2
-; CHECK: xxpermdi 1, 34, 34, 0
-; CHECK: xxpermdi 34, 1, 0, 3
+; CHECK: xxswapd 0, 0
+; CHECK: xxspltd 1, 34, 0
+; CHECK: xxmrgld 34, 1, 0
}
define double @teste0(<2 x double>* %p1) {
- %v = load <2 x double>* %p1
+ %v = load <2 x double>, <2 x double>* %p1
%r = extractelement <2 x double> %v, i32 0
ret double %r
@@ -37,16 +37,16 @@ define double @teste0(<2 x double>* %p1) {
; CHECK-LABEL: teste0
; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxpermdi 0, 0, 0, 2
-; CHECK: xxpermdi 1, 0, 0, 2
+; CHECK: xxswapd 0, 0
+; CHECK: xxswapd 1, 0
}
define double @teste1(<2 x double>* %p1) {
- %v = load <2 x double>* %p1
+ %v = load <2 x double>, <2 x double>* %p1
%r = extractelement <2 x double> %v, i32 1
ret double %r
; CHECK-LABEL: teste1
; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxpermdi 1, 0, 0, 2
+; CHECK: xxswapd 1, 0
}
diff --git a/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll b/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
new file mode 100644
index 000000000000..102970885963
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-direct-move | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-direct-move | FileCheck %s
+
+@d = common global double 0.000000e+00, align 8
+@f = common global float 0.000000e+00, align 4
+@i = common global i32 0, align 4
+@ui = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define void @dblToInt() #0 {
+entry:
+ %ii = alloca i32, align 4
+ %0 = load double, double* @d, align 8
+ %conv = fptosi double %0 to i32
+ store volatile i32 %conv, i32* %ii, align 4
+ ret void
+; CHECK-LABEL: @dblToInt
+; CHECK: xscvdpsxws [[REGCONV1:[0-9]+]],
+; CHECK: stxsiwx [[REGCONV1]],
+}
+
+; Function Attrs: nounwind
+define void @fltToInt() #0 {
+entry:
+ %ii = alloca i32, align 4
+ %0 = load float, float* @f, align 4
+ %conv = fptosi float %0 to i32
+ store volatile i32 %conv, i32* %ii, align 4
+ ret void
+; CHECK-LABEL: @fltToInt
+; CHECK: xscvdpsxws [[REGCONV2:[0-9]+]],
+; CHECK: stxsiwx [[REGCONV2]],
+}
+
+; Function Attrs: nounwind
+define void @intToDbl() #0 {
+entry:
+ %dd = alloca double, align 8
+ %0 = load i32, i32* @i, align 4
+ %conv = sitofp i32 %0 to double
+ store volatile double %conv, double* %dd, align 8
+ ret void
+; CHECK-LABEL: @intToDbl
+; CHECK: lxsiwax [[REGLD1:[0-9]+]],
+; CHECK: xscvsxddp {{[0-9]+}}, [[REGLD1]]
+}
+
+; Function Attrs: nounwind
+define void @intToFlt() #0 {
+entry:
+ %ff = alloca float, align 4
+ %0 = load i32, i32* @i, align 4
+ %conv = sitofp i32 %0 to float
+ store volatile float %conv, float* %ff, align 4
+ ret void
+; CHECK-LABEL: @intToFlt
+; CHECK: lxsiwax [[REGLD2:[0-9]+]],
+; FIXME: the below will change when the VSX form is implemented
+; CHECK: fcfids {{[0-9]}}, [[REGLD2]]
+}
+
+; Function Attrs: nounwind
+define void @dblToUInt() #0 {
+entry:
+ %uiui = alloca i32, align 4
+ %0 = load double, double* @d, align 8
+ %conv = fptoui double %0 to i32
+ store volatile i32 %conv, i32* %uiui, align 4
+ ret void
+; CHECK-LABEL: @dblToUInt
+; CHECK: xscvdpuxws [[REGCONV3:[0-9]+]],
+; CHECK: stxsiwx [[REGCONV3]],
+}
+
+; Function Attrs: nounwind
+define void @fltToUInt() #0 {
+entry:
+ %uiui = alloca i32, align 4
+ %0 = load float, float* @f, align 4
+ %conv = fptoui float %0 to i32
+ store volatile i32 %conv, i32* %uiui, align 4
+ ret void
+; CHECK-LABEL: @fltToUInt
+; CHECK: xscvdpuxws [[REGCONV4:[0-9]+]],
+; CHECK: stxsiwx [[REGCONV4]],
+}
+
+; Function Attrs: nounwind
+define void @uIntToDbl() #0 {
+entry:
+ %dd = alloca double, align 8
+ %0 = load i32, i32* @ui, align 4
+ %conv = uitofp i32 %0 to double
+ store volatile double %conv, double* %dd, align 8
+ ret void
+; CHECK-LABEL: @uIntToDbl
+; CHECK: lxsiwzx [[REGLD3:[0-9]+]],
+; CHECK: xscvuxddp {{[0-9]+}}, [[REGLD3]]
+}
+
+; Function Attrs: nounwind
+define void @uIntToFlt() #0 {
+entry:
+ %ff = alloca float, align 4
+ %0 = load i32, i32* @ui, align 4
+ %conv = uitofp i32 %0 to float
+ store volatile float %conv, float* %ff, align 4
+ ret void
+; CHECK-LABEL: @uIntToFlt
+; CHECK: lxsiwzx [[REGLD4:[0-9]+]],
+; FIXME: the below will change when the VSX form is implemented
+; CHECK: fcfidus {{[0-9]+}}, [[REGLD4]]
+}
+
+; Function Attrs: nounwind
+define void @dblToFloat() #0 {
+entry:
+ %ff = alloca float, align 4
+ %0 = load double, double* @d, align 8
+ %conv = fptrunc double %0 to float
+ store volatile float %conv, float* %ff, align 4
+ ret void
+; CHECK-LABEL: @dblToFloat
+; CHECK: lxsdx [[REGLD5:[0-9]+]],
+; CHECK: stxsspx [[REGLD5]],
+}
+
+; Function Attrs: nounwind
+define void @floatToDbl() #0 {
+entry:
+ %dd = alloca double, align 8
+ %0 = load float, float* @f, align 4
+ %conv = fpext float %0 to double
+ store volatile double %conv, double* %dd, align 8
+ ret void
+; CHECK-LABEL: @floatToDbl
+; CHECK: lxsspx [[REGLD5:[0-9]+]],
+; CHECK: stxsdx [[REGLD5]],
+}
diff --git a/test/CodeGen/PowerPC/vsx_shuffle_le.ll b/test/CodeGen/PowerPC/vsx_shuffle_le.ll
index 588cfdad7853..dcfa0e788867 100644
--- a/test/CodeGen/PowerPC/vsx_shuffle_le.ll
+++ b/test/CodeGen/PowerPC/vsx_shuffle_le.ll
@@ -1,207 +1,207 @@
; RUN: llc -mcpu=pwr8 -mattr=+vsx -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
define <2 x double> @test00(<2 x double>* %p1, <2 x double>* %p2) {
- %v1 = load <2 x double>* %p1
- %v2 = load <2 x double>* %p2
+ %v1 = load <2 x double>, <2 x double>* %p1
+ %v2 = load <2 x double>, <2 x double>* %p2
%v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 0, i32 0>
ret <2 x double> %v3
; CHECK-LABEL: test00
; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxpermdi 0, 0, 0, 2
-; CHECK: xxpermdi 34, 0, 0, 3
+; CHECK: xxswapd 0, 0
+; CHECK: xxspltd 34, 0, 1
}
define <2 x double> @test01(<2 x double>* %p1, <2 x double>* %p2) {
- %v1 = load <2 x double>* %p1
- %v2 = load <2 x double>* %p2
+ %v1 = load <2 x double>, <2 x double>* %p1
+ %v2 = load <2 x double>, <2 x double>* %p2
%v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 0, i32 1>
ret <2 x double> %v3
; CHECK-LABEL: test01
; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxpermdi 34, 0, 0, 2
+; CHECK: xxswapd 34, 0
}
define <2 x double> @test02(<2 x double>* %p1, <2 x double>* %p2) {
- %v1 = load <2 x double>* %p1
- %v2 = load <2 x double>* %p2
+ %v1 = load <2 x double>, <2 x double>* %p1
+ %v2 = load <2 x double>, <2 x double>* %p2
%v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 0, i32 2>
ret <2 x double> %v3
; CHECK-LABEL: @test02
; CHECK: lxvd2x 0, 0, 3
; CHECK: lxvd2x 1, 0, 4
-; CHECK: xxpermdi 0, 0, 0, 2
-; CHECK: xxpermdi 1, 1, 1, 2
-; CHECK: xxpermdi 34, 1, 0, 3
+; CHECK: xxswapd 0, 0
+; CHECK: xxswapd 1, 1
+; CHECK: xxmrgld 34, 1, 0
}
define <2 x double> @test03(<2 x double>* %p1, <2 x double>* %p2) {
- %v1 = load <2 x double>* %p1
- %v2 = load <2 x double>* %p2
+ %v1 = load <2 x double>, <2 x double>* %p1
+ %v2 = load <2 x double>, <2 x double>* %p2
%v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 0, i32 3>
ret <2 x double> %v3
; CHECK-LABEL: @test03
; CHECK: lxvd2x 0, 0, 3
; CHECK: lxvd2x 1, 0, 4
-; CHECK: xxpermdi 0, 0, 0, 2
-; CHECK: xxpermdi 1, 1, 1, 2
+; CHECK: xxswapd 0, 0
+; CHECK: xxswapd 1, 1
; CHECK: xxpermdi 34, 1, 0, 1
}
define <2 x double> @test10(<2 x double>* %p1, <2 x double>* %p2) {
- %v1 = load <2 x double>* %p1
- %v2 = load <2 x double>* %p2
+ %v1 = load <2 x double>, <2 x double>* %p1
+ %v2 = load <2 x double>, <2 x double>* %p2
%v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 1, i32 0>
ret <2 x double> %v3
; CHECK-LABEL: @test10
; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxpermdi 0, 0, 0, 2
-; CHECK: xxpermdi 34, 0, 0, 2
+; CHECK: xxswapd 0, 0
+; CHECK: xxswapd 34, 0
}
define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) {
- %v1 = load <2 x double>* %p1
- %v2 = load <2 x double>* %p2
+ %v1 = load <2 x double>, <2 x double>* %p1
+ %v2 = load <2 x double>, <2 x double>* %p2
%v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 1, i32 1>
ret <2 x double> %v3
; CHECK-LABEL: @test11
; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxpermdi 0, 0, 0, 2
-; CHECK: xxpermdi 34, 0, 0, 0
+; CHECK: xxswapd 0, 0
+; CHECK: xxspltd 34, 0, 0
}
define <2 x double> @test12(<2 x double>* %p1, <2 x double>* %p2) {
- %v1 = load <2 x double>* %p1
- %v2 = load <2 x double>* %p2
+ %v1 = load <2 x double>, <2 x double>* %p1
+ %v2 = load <2 x double>, <2 x double>* %p2
%v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 1, i32 2>
ret <2 x double> %v3
; CHECK-LABEL: @test12
; CHECK: lxvd2x 0, 0, 3
; CHECK: lxvd2x 1, 0, 4
-; CHECK: xxpermdi 0, 0, 0, 2
-; CHECK: xxpermdi 1, 1, 1, 2
+; CHECK: xxswapd 0, 0
+; CHECK: xxswapd 1, 1
; CHECK: xxpermdi 34, 1, 0, 2
}
define <2 x double> @test13(<2 x double>* %p1, <2 x double>* %p2) {
- %v1 = load <2 x double>* %p1
- %v2 = load <2 x double>* %p2
+ %v1 = load <2 x double>, <2 x double>* %p1
+ %v2 = load <2 x double>, <2 x double>* %p2
%v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 1, i32 3>
ret <2 x double> %v3
; CHECK-LABEL: @test13
; CHECK: lxvd2x 0, 0, 3
; CHECK: lxvd2x 1, 0, 4
-; CHECK: xxpermdi 0, 0, 0, 2
-; CHECK: xxpermdi 1, 1, 1, 2
-; CHECK: xxpermdi 34, 1, 0, 0
+; CHECK: xxswapd 0, 0
+; CHECK: xxswapd 1, 1
+; CHECK: xxmrghd 34, 1, 0
}
define <2 x double> @test20(<2 x double>* %p1, <2 x double>* %p2) {
- %v1 = load <2 x double>* %p1
- %v2 = load <2 x double>* %p2
+ %v1 = load <2 x double>, <2 x double>* %p1
+ %v2 = load <2 x double>, <2 x double>* %p2
%v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 2, i32 0>
ret <2 x double> %v3
; CHECK-LABEL: @test20
; CHECK: lxvd2x 0, 0, 3
; CHECK: lxvd2x 1, 0, 4
-; CHECK: xxpermdi 0, 0, 0, 2
-; CHECK: xxpermdi 1, 1, 1, 2
-; CHECK: xxpermdi 34, 0, 1, 3
+; CHECK: xxswapd 0, 0
+; CHECK: xxswapd 1, 1
+; CHECK: xxmrgld 34, 0, 1
}
define <2 x double> @test21(<2 x double>* %p1, <2 x double>* %p2) {
- %v1 = load <2 x double>* %p1
- %v2 = load <2 x double>* %p2
+ %v1 = load <2 x double>, <2 x double>* %p1
+ %v2 = load <2 x double>, <2 x double>* %p2
%v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 2, i32 1>
ret <2 x double> %v3
; CHECK-LABEL: @test21
; CHECK: lxvd2x 0, 0, 3
; CHECK: lxvd2x 1, 0, 4
-; CHECK: xxpermdi 0, 0, 0, 2
-; CHECK: xxpermdi 1, 1, 1, 2
+; CHECK: xxswapd 0, 0
+; CHECK: xxswapd 1, 1
; CHECK: xxpermdi 34, 0, 1, 1
}
define <2 x double> @test22(<2 x double>* %p1, <2 x double>* %p2) {
- %v1 = load <2 x double>* %p1
- %v2 = load <2 x double>* %p2
+ %v1 = load <2 x double>, <2 x double>* %p1
+ %v2 = load <2 x double>, <2 x double>* %p2
%v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 2, i32 2>
ret <2 x double> %v3
; CHECK-LABEL: @test22
; CHECK: lxvd2x 0, 0, 4
-; CHECK: xxpermdi 0, 0, 0, 2
-; CHECK: xxpermdi 34, 0, 0, 3
+; CHECK: xxswapd 0, 0
+; CHECK: xxspltd 34, 0, 1
}
define <2 x double> @test23(<2 x double>* %p1, <2 x double>* %p2) {
- %v1 = load <2 x double>* %p1
- %v2 = load <2 x double>* %p2
+ %v1 = load <2 x double>, <2 x double>* %p1
+ %v2 = load <2 x double>, <2 x double>* %p2
%v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 2, i32 3>
ret <2 x double> %v3
; CHECK-LABEL: @test23
; CHECK: lxvd2x 0, 0, 4
-; CHECK: xxpermdi 34, 0, 0, 2
+; CHECK: xxswapd 34, 0
}
define <2 x double> @test30(<2 x double>* %p1, <2 x double>* %p2) {
- %v1 = load <2 x double>* %p1
- %v2 = load <2 x double>* %p2
+ %v1 = load <2 x double>, <2 x double>* %p1
+ %v2 = load <2 x double>, <2 x double>* %p2
%v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 3, i32 0>
ret <2 x double> %v3
; CHECK-LABEL: @test30
; CHECK: lxvd2x 0, 0, 3
; CHECK: lxvd2x 1, 0, 4
-; CHECK: xxpermdi 0, 0, 0, 2
-; CHECK: xxpermdi 1, 1, 1, 2
+; CHECK: xxswapd 0, 0
+; CHECK: xxswapd 1, 1
; CHECK: xxpermdi 34, 0, 1, 2
}
define <2 x double> @test31(<2 x double>* %p1, <2 x double>* %p2) {
- %v1 = load <2 x double>* %p1
- %v2 = load <2 x double>* %p2
+ %v1 = load <2 x double>, <2 x double>* %p1
+ %v2 = load <2 x double>, <2 x double>* %p2
%v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 3, i32 1>
ret <2 x double> %v3
; CHECK-LABEL: @test31
; CHECK: lxvd2x 0, 0, 3
; CHECK: lxvd2x 1, 0, 4
-; CHECK: xxpermdi 0, 0, 0, 2
-; CHECK: xxpermdi 1, 1, 1, 2
-; CHECK: xxpermdi 34, 0, 1, 0
+; CHECK: xxswapd 0, 0
+; CHECK: xxswapd 1, 1
+; CHECK: xxmrghd 34, 0, 1
}
define <2 x double> @test32(<2 x double>* %p1, <2 x double>* %p2) {
- %v1 = load <2 x double>* %p1
- %v2 = load <2 x double>* %p2
+ %v1 = load <2 x double>, <2 x double>* %p1
+ %v2 = load <2 x double>, <2 x double>* %p2
%v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 3, i32 2>
ret <2 x double> %v3
; CHECK-LABEL: @test32
; CHECK: lxvd2x 0, 0, 4
-; CHECK: xxpermdi 0, 0, 0, 2
-; CHECK: xxpermdi 34, 0, 0, 2
+; CHECK: xxswapd 0, 0
+; CHECK: xxswapd 34, 0
}
define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) {
- %v1 = load <2 x double>* %p1
- %v2 = load <2 x double>* %p2
+ %v1 = load <2 x double>, <2 x double>* %p1
+ %v2 = load <2 x double>, <2 x double>* %p2
%v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 3, i32 3>
ret <2 x double> %v3
; CHECK-LABEL: @test33
; CHECK: lxvd2x 0, 0, 4
-; CHECK: xxpermdi 0, 0, 0, 2
-; CHECK: xxpermdi 34, 0, 0, 0
+; CHECK: xxswapd 0, 0
+; CHECK: xxspltd 34, 0, 0
}
diff --git a/test/CodeGen/PowerPC/weak_def_can_be_hidden.ll b/test/CodeGen/PowerPC/weak_def_can_be_hidden.ll
index e038b3f2fb25..0b87613bb4d8 100644
--- a/test/CodeGen/PowerPC/weak_def_can_be_hidden.ll
+++ b/test/CodeGen/PowerPC/weak_def_can_be_hidden.ll
@@ -11,7 +11,7 @@
; CHECK-D89: .weak_definition _v1
define i32 @f1() {
- %x = load i32 * @v1
+ %x = load i32 , i32 * @v1
ret i32 %x
}
@@ -45,6 +45,6 @@ define i32* @f3() {
; CHECK-D89: .weak_definition _v4
define i32 @f4() {
- %x = load i32 * @v4
+ %x = load i32 , i32 * @v4
ret i32 %x
}
diff --git a/test/CodeGen/PowerPC/xxleqv_xxlnand_xxlorc.ll b/test/CodeGen/PowerPC/xxleqv_xxlnand_xxlorc.ll
new file mode 100644
index 000000000000..4d929c627f1c
--- /dev/null
+++ b/test/CodeGen/PowerPC/xxleqv_xxlnand_xxlorc.ll
@@ -0,0 +1,52 @@
+; Check the miscellaneous logical vector operations added in P8
+;
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+; Test x eqv y
+define <4 x i32> @test_xxleqv(<4 x i32> %x, <4 x i32> %y) nounwind {
+ %tmp = xor <4 x i32> %x, %y
+ %ret_val = xor <4 x i32> %tmp, < i32 -1, i32 -1, i32 -1, i32 -1>
+ ret <4 x i32> %ret_val
+; CHECK: xxleqv 34, 34, 35
+}
+
+; Test x xxlnand y
+define <4 x i32> @test_xxlnand(<4 x i32> %x, <4 x i32> %y) nounwind {
+ %tmp = and <4 x i32> %x, %y
+ %ret_val = xor <4 x i32> %tmp, <i32 -1, i32 -1, i32 -1, i32 -1>
+ ret <4 x i32> %ret_val
+; CHECK: xxlnand 34, 34, 35
+}
+
+; Test x xxlorc y
+define <4 x i32> @test_xxlorc(<4 x i32> %x, <4 x i32> %y) nounwind {
+ %tmp = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %ret_val = or <4 x i32> %x, %tmp
+ ret <4 x i32> %ret_val
+; CHECK: xxlorc 34, 34, 35
+}
+
+; Test x eqv y
+define <8 x i16> @test_xxleqvv8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
+ %tmp = xor <8 x i16> %x, %y
+ %ret_val = xor <8 x i16> %tmp, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+ ret <8 x i16> %ret_val
+; CHECK: xxleqv 34, 34, 35
+}
+
+; Test x xxlnand y
+define <8 x i16> @test_xxlnandv8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
+ %tmp = and <8 x i16> %x, %y
+ %ret_val = xor <8 x i16> %tmp, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+ ret <8 x i16> %ret_val
+; CHECK: xxlnand 34, 34, 35
+}
+
+; Test x xxlorc y
+define <8 x i16> @test_xxlorcv8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
+ %tmp = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+ %ret_val = or <8 x i16> %x, %tmp
+ ret <8 x i16> %ret_val
+; CHECK: xxlorc 34, 34, 35
+}
+
diff --git a/test/CodeGen/PowerPC/zero-not-run.ll b/test/CodeGen/PowerPC/zero-not-run.ll
index 9df0d6e004ef..b3b7634f41a8 100644
--- a/test/CodeGen/PowerPC/zero-not-run.ll
+++ b/test/CodeGen/PowerPC/zero-not-run.ll
@@ -8,7 +8,7 @@ entry:
br i1 undef, label %for.body, label %for.end731
for.body: ; preds = %entry
- %0 = load i32* undef, align 4
+ %0 = load i32, i32* undef, align 4
%or31 = or i32 %0, 319143828
store i32 %or31, i32* undef, align 4
%cmp32 = icmp eq i32 319143828, %or31
diff --git a/test/CodeGen/PowerPC/zext-free.ll b/test/CodeGen/PowerPC/zext-free.ll
index 080dbaa58da1..ffbbb5445019 100644
--- a/test/CodeGen/PowerPC/zext-free.ll
+++ b/test/CodeGen/PowerPC/zext-free.ll
@@ -5,22 +5,22 @@ target triple = "powerpc64-unknown-linux-gnu"
; Function Attrs: noreturn nounwind
define signext i32 @_Z1fRPc(i8** nocapture dereferenceable(8) %p) #0 {
entry:
- %.pre = load i8** %p, align 8
+ %.pre = load i8*, i8** %p, align 8
br label %loop
loop: ; preds = %loop.backedge, %entry
%0 = phi i8* [ %.pre, %entry ], [ %.be, %loop.backedge ]
- %1 = load i8* %0, align 1
+ %1 = load i8, i8* %0, align 1
%tobool = icmp eq i8 %1, 0
- %incdec.ptr = getelementptr inbounds i8* %0, i64 1
+ %incdec.ptr = getelementptr inbounds i8, i8* %0, i64 1
store i8* %incdec.ptr, i8** %p, align 8
- %2 = load i8* %incdec.ptr, align 1
+ %2 = load i8, i8* %incdec.ptr, align 1
%tobool2 = icmp ne i8 %2, 0
%or.cond = and i1 %tobool, %tobool2
br i1 %or.cond, label %if.then3, label %loop.backedge
if.then3: ; preds = %loop
- %incdec.ptr4 = getelementptr inbounds i8* %0, i64 2
+ %incdec.ptr4 = getelementptr inbounds i8, i8* %0, i64 2
store i8* %incdec.ptr4, i8** %p, align 8
br label %loop.backedge
diff --git a/test/CodeGen/R600/128bit-kernel-args.ll b/test/CodeGen/R600/128bit-kernel-args.ll
deleted file mode 100644
index 557d86aa8376..000000000000
--- a/test/CodeGen/R600/128bit-kernel-args.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=SI
-
-; R600: {{^}}v4i32_kernel_arg:
-; R600-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y
-; R600-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z
-; R600-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
-; R600-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
-; SI: {{^}}v4i32_kernel_arg:
-; SI: buffer_store_dwordx4
-define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
-entry:
- store <4 x i32> %in, <4 x i32> addrspace(1)* %out
- ret void
-}
-
-; R600: {{^}}v4f32_kernel_arg:
-; R600-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y
-; R600-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z
-; R600-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
-; R600-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
-; SI: {{^}}v4f32_kernel_arg:
-; SI: buffer_store_dwordx4
-define void @v4f32_kernel_arg(<4 x float> addrspace(1)* %out, <4 x float> %in) {
-entry:
- store <4 x float> %in, <4 x float> addrspace(1)* %out
- ret void
-}
diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll
index 71940fd88f26..c7bcfd2ddab2 100644
--- a/test/CodeGen/R600/32-bit-local-address-space.ll
+++ b/test/CodeGen/R600/32-bit-local-address-space.ll
@@ -15,7 +15,7 @@
; SI: ds_read_b32 v{{[0-9]+}}, [[PTR]]
define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
- %0 = load i32 addrspace(3)* %in
+ %0 = load i32, i32 addrspace(3)* %in
store i32 %0, i32 addrspace(1)* %out
ret void
}
@@ -26,8 +26,8 @@ entry:
; SI: ds_read_b32 [[VPTR]]
define void @local_address_gep(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %offset) {
entry:
- %0 = getelementptr i32 addrspace(3)* %in, i32 %offset
- %1 = load i32 addrspace(3)* %0
+ %0 = getelementptr i32, i32 addrspace(3)* %in, i32 %offset
+ %1 = load i32, i32 addrspace(3)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -37,8 +37,8 @@ entry:
; SI: ds_read_b32 v{{[0-9]+}}, [[VPTR]] offset:4
define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
- %0 = getelementptr i32 addrspace(3)* %in, i32 1
- %1 = load i32 addrspace(3)* %0
+ %0 = getelementptr i32, i32 addrspace(3)* %in, i32 1
+ %1 = load i32, i32 addrspace(3)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -50,8 +50,8 @@ entry:
; SI: ds_read_b32 [[VPTR]]
define void @local_address_gep_large_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
- %0 = getelementptr i32 addrspace(3)* %in, i32 16385
- %1 = load i32 addrspace(3)* %0
+ %0 = getelementptr i32, i32 addrspace(3)* %in, i32 16385
+ %1 = load i32, i32 addrspace(3)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -72,8 +72,8 @@ define void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds)
; SI-NEXT: s_add_i32
; SI: ds_read_b32
define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %lds, i32 %tid) {
- %ptr = getelementptr [3 x float] addrspace(3)* %lds, i32 %tid, i32 0
- %val = load float addrspace(3)* %ptr
+ %ptr = getelementptr [3 x float], [3 x float] addrspace(3)* %lds, i32 %tid, i32 0
+ %val = load float, float addrspace(3)* %ptr
store float %val, float addrspace(1)* %out
ret void
}
@@ -84,7 +84,7 @@ define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
; SI: ds_read_b32 v{{[0-9]+}}, [[REG]]
define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %tid) {
- %val = load float addrspace(3)* @g_lds
+ %val = load float, float addrspace(3)* @g_lds
store float %val, float addrspace(1)* %out
ret void
}
@@ -96,7 +96,7 @@ define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %ti
; FUNC-LABEL: {{^}}global_ptr:
; SI: ds_write_b32
define void @global_ptr() nounwind {
- store i32 addrspace(3)* getelementptr ([16384 x i32] addrspace(3)* @dst, i32 0, i32 16), i32 addrspace(3)* addrspace(3)* @ptr
+ store i32 addrspace(3)* getelementptr ([16384 x i32], [16384 x i32] addrspace(3)* @dst, i32 0, i32 16), i32 addrspace(3)* addrspace(3)* @ptr
ret void
}
@@ -112,7 +112,7 @@ define void @local_address_store(i32 addrspace(3)* %out, i32 %val) {
; SI: v_mov_b32_e32 [[ADDR:v[0-9]+]], [[SADDR]]
; SI: ds_write_b32 [[ADDR]], v{{[0-9]+}}
define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32 %offset) {
- %gep = getelementptr i32 addrspace(3)* %out, i32 %offset
+ %gep = getelementptr i32, i32 addrspace(3)* %out, i32 %offset
store i32 %val, i32 addrspace(3)* %gep, align 4
ret void
}
@@ -122,7 +122,7 @@ define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32
; SI: v_mov_b32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
; SI: ds_write_b32 [[VPTR]], [[VAL]] offset:4
define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
- %gep = getelementptr i32 addrspace(3)* %out, i32 1
+ %gep = getelementptr i32, i32 addrspace(3)* %out, i32 1
store i32 %val, i32 addrspace(3)* %gep, align 4
ret void
}
@@ -131,9 +131,9 @@ define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %v
; FUNC-LABEL: {{^}}local_address_gep_large_const_offset_store:
; SI: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: ds_write_b32 [[VPTR]], v{{[0-9]+}} [M0]{{$}}
+; SI: ds_write_b32 [[VPTR]], v{{[0-9]+$}}
define void @local_address_gep_large_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
- %gep = getelementptr i32 addrspace(3)* %out, i32 16385
+ %gep = getelementptr i32, i32 addrspace(3)* %out, i32 16385
store i32 %val, i32 addrspace(3)* %gep, align 4
ret void
}
diff --git a/test/CodeGen/R600/64bit-kernel-args.ll b/test/CodeGen/R600/64bit-kernel-args.ll
deleted file mode 100644
index 9f2738edb6eb..000000000000
--- a/test/CodeGen/R600/64bit-kernel-args.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI
-
-; SI: {{^}}f64_kernel_arg:
-; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
-; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
-; SI: buffer_store_dwordx2
-define void @f64_kernel_arg(double addrspace(1)* %out, double %in) {
-entry:
- store double %in, double addrspace(1)* %out
- ret void
-}
diff --git a/test/CodeGen/R600/add-debug.ll b/test/CodeGen/R600/add-debug.ll
index a83c689eb182..529905dd36a2 100644
--- a/test/CodeGen/R600/add-debug.ll
+++ b/test/CodeGen/R600/add-debug.ll
@@ -9,7 +9,7 @@ entry:
br i1 %0, label %if, label %else
if:
- %1 = load i64 addrspace(1)* %in
+ %1 = load i64, i64 addrspace(1)* %in
br label %endif
else:
diff --git a/test/CodeGen/R600/add.ll b/test/CodeGen/R600/add.ll
index 3a8b97cd87e8..655e75dbc1a4 100644
--- a/test/CodeGen/R600/add.ll
+++ b/test/CodeGen/R600/add.ll
@@ -9,9 +9,9 @@
;SI-NOT: [[REG]]
;SI: buffer_store_dword [[REG]],
define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %a = load i32 addrspace(1)* %in
- %b = load i32 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1)* %in
+ %b = load i32, i32 addrspace(1)* %b_ptr
%result = add i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -25,9 +25,9 @@ define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1)* %in
- %b = load <2 x i32> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
+ %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
%result = add <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -45,9 +45,9 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1)* %in
- %b = load <4 x i32> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
%result = add <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
@@ -62,6 +62,7 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
+
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
@@ -94,6 +95,7 @@ entry:
; EG: ADD_INT
; EG: ADD_INT
; EG: ADD_INT
+
; SI: s_add_i32
; SI: s_add_i32
; SI: s_add_i32
@@ -120,6 +122,14 @@ entry:
; FUNC-LABEL: {{^}}add64:
; SI: s_add_u32
; SI: s_addc_u32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.[XYZW]]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[HI:T[0-9]+\.[XYZW]]]
+; EG-DAG: ADD_INT {{[* ]*}}[[LO]]
+; EG-DAG: ADDC_UINT
+; EG-DAG: ADD_INT
+; EG-DAG: ADD_INT {{[* ]*}}[[HI]]
+; EG-NOT: SUB
define void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
entry:
%0 = add i64 %a, %b
@@ -134,9 +144,17 @@ entry:
; FUNC-LABEL: {{^}}add64_sgpr_vgpr:
; SI-NOT: v_addc_u32_e32 s
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.[XYZW]]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[HI:T[0-9]+\.[XYZW]]]
+; EG-DAG: ADD_INT {{[* ]*}}[[LO]]
+; EG-DAG: ADDC_UINT
+; EG-DAG: ADD_INT
+; EG-DAG: ADD_INT {{[* ]*}}[[HI]]
+; EG-NOT: SUB
define void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) {
entry:
- %0 = load i64 addrspace(1)* %in
+ %0 = load i64, i64 addrspace(1)* %in
%1 = add i64 %a, %0
store i64 %1, i64 addrspace(1)* %out
ret void
@@ -146,13 +164,21 @@ entry:
; FUNC-LABEL: {{^}}add64_in_branch:
; SI: s_add_u32
; SI: s_addc_u32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.[XYZW]]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[HI:T[0-9]+\.[XYZW]]]
+; EG-DAG: ADD_INT {{[* ]*}}[[LO]]
+; EG-DAG: ADDC_UINT
+; EG-DAG: ADD_INT
+; EG-DAG: ADD_INT {{[* ]*}}[[HI]]
+; EG-NOT: SUB
define void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
entry:
%0 = icmp eq i64 %a, 0
br i1 %0, label %if, label %else
if:
- %1 = load i64 addrspace(1)* %in
+ %1 = load i64, i64 addrspace(1)* %in
br label %endif
else:
diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/R600/add_i64.ll
index 1769409f5ef1..8346add7df97 100644
--- a/test/CodeGen/R600/add_i64.ll
+++ b/test/CodeGen/R600/add_i64.ll
@@ -8,10 +8,10 @@ declare i32 @llvm.r600.read.tidig.x() readnone
; SI: v_addc_u32
define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
- %a_ptr = getelementptr i64 addrspace(1)* %inA, i32 %tid
- %b_ptr = getelementptr i64 addrspace(1)* %inB, i32 %tid
- %a = load i64 addrspace(1)* %a_ptr
- %b = load i64 addrspace(1)* %b_ptr
+ %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid
+ %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid
+ %a = load i64, i64 addrspace(1)* %a_ptr
+ %b = load i64, i64 addrspace(1)* %b_ptr
%result = add i64 %a, %b
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -22,7 +22,7 @@ define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noa
; SI: v_add_i32
; SI: v_addc_u32
define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {
- %foo = load i64 addrspace(1)* %in, align 8
+ %foo = load i64, i64 addrspace(1)* %in, align 8
%result = add i64 %foo, %a
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -35,7 +35,7 @@ define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noal
; SI: v_add_i32
; SI: v_addc_u32
define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {
- %foo = load i64 addrspace(1)* %in, align 8
+ %foo = load i64, i64 addrspace(1)* %in, align 8
%result = add i64 %a, %foo
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -60,10 +60,10 @@ define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a,
; SI: v_addc_u32
define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
- %a_ptr = getelementptr <2 x i64> addrspace(1)* %inA, i32 %tid
- %b_ptr = getelementptr <2 x i64> addrspace(1)* %inB, i32 %tid
- %a = load <2 x i64> addrspace(1)* %a_ptr
- %b = load <2 x i64> addrspace(1)* %b_ptr
+ %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid
+ %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid
+ %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr
+ %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
%result = add <2 x i64> %a, %b
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/address-space.ll b/test/CodeGen/R600/address-space.ll
index aaa0628ccdc9..4be8c5847529 100644
--- a/test/CodeGen/R600/address-space.ll
+++ b/test/CodeGen/R600/address-space.ll
@@ -10,18 +10,19 @@
; CHECK-LABEL: {{^}}do_as_ptr_calcs:
; CHECK: s_load_dword [[SREG1:s[0-9]+]],
+; CHECK: v_mov_b32_e32 [[VREG2:v[0-9]+]], [[SREG1]]
; CHECK: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG1]] offset:12
-; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:20
+; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG2]] offset:20
define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
entry:
- %x = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
- %y = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2
+ %x = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
+ %y = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2
br label %bb32
bb32:
- %a = load float addrspace(3)* %x, align 4
- %b = load float addrspace(3)* %y, align 4
+ %a = load float, float addrspace(3)* %x, align 4
+ %b = load float, float addrspace(3)* %y, align 4
%cmp = fcmp one float %a, %b
br i1 %cmp, label %bb34, label %bb33
diff --git a/test/CodeGen/R600/and.ll b/test/CodeGen/R600/and.ll
index 7a395ccb38d0..5672d470bd7e 100644
--- a/test/CodeGen/R600/and.ll
+++ b/test/CodeGen/R600/and.ll
@@ -10,9 +10,9 @@
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1) * %in
- %b = load <2 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+ %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
%result = and <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -30,9 +30,9 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1) * %in
- %b = load <4 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
%result = and <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
@@ -57,23 +57,41 @@ define void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
; FUNC-LABEL: {{^}}v_and_i32:
; SI: v_and_b32
define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
- %a = load i32 addrspace(1)* %aptr, align 4
- %b = load i32 addrspace(1)* %bptr, align 4
+ %a = load i32, i32 addrspace(1)* %aptr, align 4
+ %b = load i32, i32 addrspace(1)* %bptr, align 4
%and = and i32 %a, %b
store i32 %and, i32 addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: {{^}}v_and_constant_i32:
-; SI: v_and_b32
+; FUNC-LABEL: {{^}}v_and_constant_i32
+; SI: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, v{{[0-9]+}}
define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
- %a = load i32 addrspace(1)* %aptr, align 4
+ %a = load i32, i32 addrspace(1)* %aptr, align 4
%and = and i32 %a, 1234567
store i32 %and, i32 addrspace(1)* %out, align 4
ret void
}
-; FUNC-LABEL: {{^}}s_and_i64:
+; FUNC-LABEL: {{^}}v_and_inline_imm_64_i32
+; SI: v_and_b32_e32 v{{[0-9]+}}, 64, v{{[0-9]+}}
+define void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
+ %a = load i32, i32 addrspace(1)* %aptr, align 4
+ %and = and i32 %a, 64
+ store i32 %and, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_and_inline_imm_neg_16_i32
+; SI: v_and_b32_e32 v{{[0-9]+}}, -16, v{{[0-9]+}}
+define void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
+ %a = load i32, i32 addrspace(1)* %aptr, align 4
+ %and = and i32 %a, -16
+ store i32 %and, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_and_i64
; SI: s_and_b64
define void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
%and = and i64 %a, %b
@@ -90,8 +108,8 @@ define void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) {
ret void
}
-; FUNC-LABEL: {{^}}s_and_constant_i64:
-; SI: s_and_b64
+; FUNC-LABEL: {{^}}s_and_constant_i64
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}
define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
%and = and i64 %a, 281474976710655
store i64 %and, i64 addrspace(1)* %out, align 8
@@ -102,8 +120,8 @@ define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
; SI: v_and_b32
; SI: v_and_b32
define void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
- %a = load i64 addrspace(1)* %aptr, align 8
- %b = load i64 addrspace(1)* %bptr, align 8
+ %a = load i64, i64 addrspace(1)* %aptr, align 8
+ %b = load i64, i64 addrspace(1)* %bptr, align 8
%and = and i64 %a, %b
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@@ -118,8 +136,8 @@ entry:
br i1 %tmp0, label %if, label %endif
if:
- %a = load i64 addrspace(1)* %aptr, align 8
- %b = load i64 addrspace(1)* %bptr, align 8
+ %a = load i64, i64 addrspace(1)* %aptr, align 8
+ %b = load i64, i64 addrspace(1)* %bptr, align 8
%and = and i64 %a, %b
br label %endif
@@ -133,7 +151,7 @@ endif:
; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
- %a = load i64 addrspace(1)* %aptr, align 8
+ %a = load i64, i64 addrspace(1)* %aptr, align 8
%and = and i64 %a, 1234567
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
@@ -144,16 +162,135 @@ define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr)
; SI: v_and_b32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
; SI: v_and_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
- %a = load i64 addrspace(1)* %aptr, align 8
+ %a = load i64, i64 addrspace(1)* %aptr, align 8
%and = and i64 %a, 64
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
}
-; FUNC-LABEL: {{^}}s_and_inline_imm_i64:
+; FUNC-LABEL: {{^}}s_and_inline_imm_64_i64
; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 64
-define void @s_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+define void @s_and_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
%and = and i64 %a, 64
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
}
+
+; FUNC-LABEL: {{^}}s_and_inline_imm_1_i64
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 1
+define void @s_and_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+ %and = and i64 %a, 1
+ store i64 %and, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_and_inline_imm_1.0_i64
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 1.0
+define void @s_and_inline_imm_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+ %and = and i64 %a, 4607182418800017408
+ store i64 %and, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_and_inline_imm_neg_1.0_i64
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -1.0
+define void @s_and_inline_imm_neg_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+ %and = and i64 %a, 13830554455654793216
+ store i64 %and, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_and_inline_imm_0.5_i64
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0.5
+define void @s_and_inline_imm_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+ %and = and i64 %a, 4602678819172646912
+ store i64 %and, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_and_inline_imm_neg_0.5_i64
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -0.5
+define void @s_and_inline_imm_neg_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+ %and = and i64 %a, 13826050856027422720
+ store i64 %and, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_and_inline_imm_2.0_i64
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 2.0
+define void @s_and_inline_imm_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+ %and = and i64 %a, 4611686018427387904
+ store i64 %and, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_and_inline_imm_neg_2.0_i64
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -2.0
+define void @s_and_inline_imm_neg_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+ %and = and i64 %a, 13835058055282163712
+ store i64 %and, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_and_inline_imm_4.0_i64
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 4.0
+define void @s_and_inline_imm_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+ %and = and i64 %a, 4616189618054758400
+ store i64 %and, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_and_inline_imm_neg_4.0_i64
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -4.0
+define void @s_and_inline_imm_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+ %and = and i64 %a, 13839561654909534208
+ store i64 %and, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+
+; Test with the 64-bit integer bitpattern for a 32-bit float in the
+; low 32-bits, which is not a valid 64-bit inline immmediate.
+
+; FUNC-LABEL: {{^}}s_and_inline_imm_f32_4.0_i64
+; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 4.0
+; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0{{$}}
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
+define void @s_and_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+ %and = and i64 %a, 1082130432
+ store i64 %and, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FIXME: Copy of -1 register
+; FUNC-LABEL: {{^}}s_and_inline_imm_f32_neg_4.0_i64
+; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], -4.0
+; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], -1{{$}}
+; SI-DAG: s_mov_b32 s[[K_HI_COPY:[0-9]+]], s[[K_HI]]
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI_COPY]]{{\]}}
+define void @s_and_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+ %and = and i64 %a, -1065353216
+ store i64 %and, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; Shift into upper 32-bits
+; FUNC-LABEL: {{^}}s_and_inline_high_imm_f32_4.0_i64
+; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 4.0
+; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}}
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
+define void @s_and_inline_high_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+ %and = and i64 %a, 4647714815446351872
+ store i64 %and, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_and_inline_high_imm_f32_neg_4.0_i64
+; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], -4.0
+; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}}
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
+define void @s_and_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+ %and = and i64 %a, 13871086852301127680
+ store i64 %and, i64 addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/test/CodeGen/R600/array-ptr-calc-i32.ll b/test/CodeGen/R600/array-ptr-calc-i32.ll
index 33a8aee0164d..8c2a0795860d 100644
--- a/test/CodeGen/R600/array-ptr-calc-i32.ll
+++ b/test/CodeGen/R600/array-ptr-calc-i32.ll
@@ -20,24 +20,24 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
; alloca to a vector. It currently fails because it does not know how
; to interpret:
-; getelementptr [4 x i32]* %alloca, i32 1, i32 %b
+; getelementptr [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b
; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], 16
; SI-PROMOTE: ds_write_b32 [[PTRREG]]
define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
%alloca = alloca [4 x i32], i32 4, align 16
%tid = call i32 @llvm.SI.tid() readnone
- %a_ptr = getelementptr i32 addrspace(1)* %inA, i32 %tid
- %b_ptr = getelementptr i32 addrspace(1)* %inB, i32 %tid
- %a = load i32 addrspace(1)* %a_ptr
- %b = load i32 addrspace(1)* %b_ptr
+ %a_ptr = getelementptr i32, i32 addrspace(1)* %inA, i32 %tid
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %inB, i32 %tid
+ %a = load i32, i32 addrspace(1)* %a_ptr
+ %b = load i32, i32 addrspace(1)* %b_ptr
%result = add i32 %a, %b
- %alloca_ptr = getelementptr [4 x i32]* %alloca, i32 1, i32 %b
+ %alloca_ptr = getelementptr [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b
store i32 %result, i32* %alloca_ptr, align 4
; Dummy call
call void @llvm.AMDGPU.barrier.local() nounwind noduplicate
- %reload = load i32* %alloca_ptr, align 4
- %out_ptr = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %reload = load i32, i32* %alloca_ptr, align 4
+ %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
store i32 %reload, i32 addrspace(1)* %out_ptr, align 4
ret void
}
diff --git a/test/CodeGen/R600/array-ptr-calc-i64.ll b/test/CodeGen/R600/array-ptr-calc-i64.ll
index 32e657db7bc6..eae095eb8449 100644
--- a/test/CodeGen/R600/array-ptr-calc-i64.ll
+++ b/test/CodeGen/R600/array-ptr-calc-i64.ll
@@ -7,10 +7,10 @@ declare i32 @llvm.SI.tid() readnone
; SI: v_mul_hi_i32
define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [1025 x i32] addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.SI.tid() readnone
- %a_ptr = getelementptr [1025 x i32] addrspace(1)* %inA, i32 %tid, i32 0
- %b_ptr = getelementptr i32 addrspace(1)* %inB, i32 %tid
- %a = load i32 addrspace(1)* %a_ptr
- %b = load i32 addrspace(1)* %b_ptr
+ %a_ptr = getelementptr [1025 x i32], [1025 x i32] addrspace(1)* %inA, i32 %tid, i32 0
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %inB, i32 %tid
+ %a = load i32, i32 addrspace(1)* %a_ptr
+ %b = load i32, i32 addrspace(1)* %b_ptr
%result = add i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/atomic_cmp_swap_local.ll b/test/CodeGen/R600/atomic_cmp_swap_local.ll
index 0d5ece4b0e0b..ef2560ef1849 100644
--- a/test/CodeGen/R600/atomic_cmp_swap_local.ll
+++ b/test/CodeGen/R600/atomic_cmp_swap_local.ll
@@ -1,16 +1,19 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SICI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SICI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset:
-; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
-; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
-; SI: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
+; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SICI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
+; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
+; GCN: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16
+; GCN: s_endpgm
define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -18,18 +21,20 @@ define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrs
}
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i64_offset:
-; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
-; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
-; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
-; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
-; SI: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
-; SI: buffer_store_dwordx2 [[RESULT]],
-; SI: s_endpgm
+; GCN-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
+; GCN-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
+; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SICI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34
+; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; GCN-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
+; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
+; GCN: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32
+; GCN: buffer_store_dwordx2 [[RESULT]],
+; GCN: s_endpgm
define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
%result = extractvalue { i64, i1 } %pair, 0
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -38,12 +43,12 @@ define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrs
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_bad_si_offset
; SI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-; CI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
-; SI: s_endpgm
+; CIVI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -51,32 +56,36 @@ define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i3
}
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i32_offset:
-; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
-; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa
-; SI-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
-; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
-; SI: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
-; SI: s_endpgm
+; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
+; SICI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa
+; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
+; VI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x28
+; GCN-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
+; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; GCN-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
+; GCN: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16
+; GCN: s_endpgm
define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %swap) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i64_offset:
-; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
-; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
-; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
-; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
-; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
-; SI: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
-; SI: s_endpgm
+; SICI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
+; SICI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
+; VI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; GCN-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
+; GCN-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
+; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; GCN-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
+; GCN-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
+; GCN: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_cmpxchg_noret_i64_offset(i64 addrspace(3)* %ptr, i64 %swap) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
%result = extractvalue { i64, i1 } %pair, 0
ret void
diff --git a/test/CodeGen/R600/atomic_load_add.ll b/test/CodeGen/R600/atomic_load_add.ll
index 5fe05f2996af..20c685447eef 100644
--- a/test/CodeGen/R600/atomic_load_add.ll
+++ b/test/CodeGen/R600/atomic_load_add.ll
@@ -14,7 +14,7 @@ define void @atomic_add_local(i32 addrspace(3)* %local) {
; R600: LDS_ADD *
; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
define void @atomic_add_local_const_offset(i32 addrspace(3)* %local) {
- %gep = getelementptr i32 addrspace(3)* %local, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %local, i32 4
%val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst
ret void
}
@@ -32,7 +32,7 @@ define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %loc
; R600: LDS_ADD_RET *
; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
define void @atomic_add_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
- %gep = getelementptr i32 addrspace(3)* %local, i32 5
+ %gep = getelementptr i32, i32 addrspace(3)* %local, i32 5
%val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst
store i32 %val, i32 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/atomic_load_sub.ll b/test/CodeGen/R600/atomic_load_sub.ll
index 40722833d265..4c6f45525b9e 100644
--- a/test/CodeGen/R600/atomic_load_sub.ll
+++ b/test/CodeGen/R600/atomic_load_sub.ll
@@ -14,7 +14,7 @@ define void @atomic_sub_local(i32 addrspace(3)* %local) {
; R600: LDS_SUB *
; SI: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
define void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) {
- %gep = getelementptr i32 addrspace(3)* %local, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %local, i32 4
%val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst
ret void
}
@@ -32,7 +32,7 @@ define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %loc
; R600: LDS_SUB_RET *
; SI: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
define void @atomic_sub_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
- %gep = getelementptr i32 addrspace(3)* %local, i32 5
+ %gep = getelementptr i32, i32 addrspace(3)* %local, i32 5
%val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst
store i32 %val, i32 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/bfe_uint.ll b/test/CodeGen/R600/bfe_uint.ll
index 6fe23e912952..32e3fc26106f 100644
--- a/test/CodeGen/R600/bfe_uint.ll
+++ b/test/CodeGen/R600/bfe_uint.ll
@@ -1,7 +1,5 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-; XFAIL: *
-
; CHECK: {{^}}bfe_def:
; CHECK: BFE_UINT
define void @bfe_def(i32 addrspace(1)* %out, i32 %x) {
diff --git a/test/CodeGen/R600/big_alu.ll b/test/CodeGen/R600/big_alu.ll
index 28be216e76f2..2671c5d102b3 100644
--- a/test/CodeGen/R600/big_alu.ll
+++ b/test/CodeGen/R600/big_alu.ll
@@ -51,29 +51,29 @@ main_body:
%43 = extractelement <4 x float> %reg7, i32 1
%44 = extractelement <4 x float> %reg7, i32 2
%45 = extractelement <4 x float> %reg7, i32 3
- %46 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+ %46 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
%47 = extractelement <4 x float> %46, i32 0
- %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+ %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
%49 = extractelement <4 x float> %48, i32 1
- %50 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+ %50 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
%51 = extractelement <4 x float> %50, i32 2
- %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+ %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
%53 = extractelement <4 x float> %52, i32 0
- %54 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+ %54 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
%55 = extractelement <4 x float> %54, i32 0
- %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+ %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
%57 = extractelement <4 x float> %56, i32 1
- %58 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+ %58 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
%59 = extractelement <4 x float> %58, i32 2
- %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+ %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
%61 = extractelement <4 x float> %60, i32 3
- %62 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+ %62 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
%63 = extractelement <4 x float> %62, i32 0
- %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+ %64 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
%65 = extractelement <4 x float> %64, i32 1
- %66 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+ %66 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
%67 = extractelement <4 x float> %66, i32 2
- %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %68 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
%69 = extractelement <4 x float> %68, i32 0
%70 = fcmp oge float %69, 3.500000e+00
%71 = sext i1 %70 to i32
@@ -81,7 +81,7 @@ main_body:
%73 = bitcast float %72 to i32
%74 = icmp ne i32 %73, 0
%. = select i1 %74, float 0.000000e+00, float 0.000000e+00
- %75 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %75 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
%76 = extractelement <4 x float> %75, i32 0
%77 = fcmp oge float %76, 2.000000e+00
%78 = sext i1 %77 to i32
@@ -135,7 +135,7 @@ IF137: ; preds = %main_body
%123 = insertelement <4 x float> %122, float 0.000000e+00, i32 3
%124 = call float @llvm.AMDGPU.dp4(<4 x float> %119, <4 x float> %123)
%125 = fdiv float 1.000000e+00, %124
- %126 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %126 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
%127 = extractelement <4 x float> %126, i32 0
%128 = fmul float %127, %125
%129 = fmul float %103, %128
@@ -347,15 +347,15 @@ ENDIF136: ; preds = %main_body, %ENDIF15
%329 = fmul float %314, %328
%330 = fmul float %316, %328
%331 = fmul float %318, %328
- %332 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %332 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
%333 = extractelement <4 x float> %332, i32 0
%334 = fsub float -0.000000e+00, %333
%335 = fadd float 1.000000e+00, %334
- %336 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %336 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
%337 = extractelement <4 x float> %336, i32 0
%338 = fsub float -0.000000e+00, %337
%339 = fadd float 1.000000e+00, %338
- %340 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+ %340 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
%341 = extractelement <4 x float> %340, i32 0
%342 = fsub float -0.000000e+00, %341
%343 = fadd float 1.000000e+00, %342
@@ -1018,7 +1018,7 @@ ENDIF175: ; preds = %ENDIF172, %IF176
%temp92.11 = phi float [ %877, %IF176 ], [ %temp92.10, %ENDIF172 ]
%temp93.5 = phi float [ %878, %IF176 ], [ %temp93.4, %ENDIF172 ]
%temp94.5 = phi float [ %879, %IF176 ], [ %temp94.4, %ENDIF172 ]
- %880 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+ %880 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
%881 = extractelement <4 x float> %880, i32 0
%882 = fcmp olt float %881, %179
%883 = sext i1 %882 to i32
@@ -1114,12 +1114,12 @@ ENDIF178: ; preds = %ENDIF175, %IF179
%960 = fmul float %temp87.6, %956
%961 = fmul float %2, -2.000000e+00
%962 = fadd float %961, 1.000000e+00
- %963 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
+ %963 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
%964 = extractelement <4 x float> %963, i32 2
%965 = fsub float -0.000000e+00, %964
%966 = fadd float %962, %965
%967 = fdiv float 1.000000e+00, %966
- %968 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 24)
+ %968 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 24)
%969 = extractelement <4 x float> %968, i32 2
%970 = fmul float %969, %967
%971 = fsub float -0.000000e+00, %53
diff --git a/test/CodeGen/R600/bitcast.ll b/test/CodeGen/R600/bitcast.ll
index 1ba64af7dca3..fd56d956bf31 100644
--- a/test/CodeGen/R600/bitcast.ll
+++ b/test/CodeGen/R600/bitcast.ll
@@ -9,7 +9,7 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
; SI: s_endpgm
define void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
entry:
- %1 = load <32 x i8> addrspace(2)* %0
+ %1 = load <32 x i8>, <32 x i8> addrspace(2)* %0
%2 = bitcast <32 x i8> %1 to <8 x i32>
%3 = extractelement <8 x i32> %2, i32 1
%4 = icmp ne i32 %3, 0
@@ -23,34 +23,34 @@ entry:
define void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)*
- %1 = load <16 x i8> addrspace(1)* %0
+ %1 = load <16 x i8>, <16 x i8> addrspace(1)* %0
store <16 x i8> %1, <16 x i8> addrspace(1)* %out
ret void
}
define void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
- %load = load float addrspace(1)* %in, align 4
+ %load = load float, float addrspace(1)* %in, align 4
%bc = bitcast float %load to <2 x i16>
store <2 x i16> %bc, <2 x i16> addrspace(1)* %out, align 4
ret void
}
define void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
- %load = load <2 x i16> addrspace(1)* %in, align 4
+ %load = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4
%bc = bitcast <2 x i16> %load to float
store float %bc, float addrspace(1)* %out, align 4
ret void
}
define void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
- %load = load <4 x i8> addrspace(1)* %in, align 4
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
%bc = bitcast <4 x i8> %load to i32
store i32 %bc, i32 addrspace(1)* %out, align 4
ret void
}
define void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%bc = bitcast i32 %load to <4 x i8>
store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4
ret void
@@ -59,7 +59,7 @@ define void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nou
; FUNC-LABEL: {{^}}bitcast_v2i32_to_f64:
; SI: s_endpgm
define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %val = load <2 x i32> addrspace(1)* %in, align 8
+ %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
%add = add <2 x i32> %val, <i32 4, i32 9>
%bc = bitcast <2 x i32> %add to double
store double %bc, double addrspace(1)* %out, align 8
@@ -69,7 +69,7 @@ define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace
; FUNC-LABEL: {{^}}bitcast_f64_to_v2i32:
; SI: s_endpgm
define void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) {
- %val = load double addrspace(1)* %in, align 8
+ %val = load double, double addrspace(1)* %in, align 8
%add = fadd double %val, 4.0
%bc = bitcast double %add to <2 x i32>
store <2 x i32> %bc, <2 x i32> addrspace(1)* %out, align 8
diff --git a/test/CodeGen/R600/bswap.ll b/test/CodeGen/R600/bswap.ll
index e93543de49da..4cf8e4bfed50 100644
--- a/test/CodeGen/R600/bswap.ll
+++ b/test/CodeGen/R600/bswap.ll
@@ -18,7 +18,7 @@ declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) nounwind readnone
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%bswap = call i32 @llvm.bswap.i32(i32 %val) nounwind readnone
store i32 %bswap, i32 addrspace(1)* %out, align 4
ret void
@@ -33,7 +33,7 @@ define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI-DAG: v_bfi_b32
; SI: s_endpgm
define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
- %val = load <2 x i32> addrspace(1)* %in, align 8
+ %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
%bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %val) nounwind readnone
store <2 x i32> %bswap, <2 x i32> addrspace(1)* %out, align 8
ret void
@@ -54,7 +54,7 @@ define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(
; SI-DAG: v_bfi_b32
; SI: s_endpgm
define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) nounwind {
- %val = load <4 x i32> addrspace(1)* %in, align 16
+ %val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16
%bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) nounwind readnone
store <4 x i32> %bswap, <4 x i32> addrspace(1)* %out, align 16
ret void
@@ -87,28 +87,28 @@ define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(
; SI-DAG: v_bfi_b32
; SI: s_endpgm
define void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) nounwind {
- %val = load <8 x i32> addrspace(1)* %in, align 32
+ %val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32
%bswap = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %val) nounwind readnone
store <8 x i32> %bswap, <8 x i32> addrspace(1)* %out, align 32
ret void
}
define void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
- %val = load i64 addrspace(1)* %in, align 8
+ %val = load i64, i64 addrspace(1)* %in, align 8
%bswap = call i64 @llvm.bswap.i64(i64 %val) nounwind readnone
store i64 %bswap, i64 addrspace(1)* %out, align 8
ret void
}
define void @test_bswap_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) nounwind {
- %val = load <2 x i64> addrspace(1)* %in, align 16
+ %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
%bswap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val) nounwind readnone
store <2 x i64> %bswap, <2 x i64> addrspace(1)* %out, align 16
ret void
}
define void @test_bswap_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) nounwind {
- %val = load <4 x i64> addrspace(1)* %in, align 32
+ %val = load <4 x i64>, <4 x i64> addrspace(1)* %in, align 32
%bswap = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %val) nounwind readnone
store <4 x i64> %bswap, <4 x i64> addrspace(1)* %out, align 32
ret void
diff --git a/test/CodeGen/R600/call.ll b/test/CodeGen/R600/call.ll
index 9a0eb1cc3fa0..e769fd11c282 100644
--- a/test/CodeGen/R600/call.ll
+++ b/test/CodeGen/R600/call.ll
@@ -8,9 +8,9 @@
declare i32 @external_function(i32) nounwind
define void @test_call_external(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %a = load i32 addrspace(1)* %in
- %b = load i32 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1)* %in
+ %b = load i32, i32 addrspace(1)* %b_ptr
%c = call i32 @external_function(i32 %b) nounwind
%result = add i32 %a, %c
store i32 %result, i32 addrspace(1)* %out
@@ -23,9 +23,9 @@ define i32 @defined_function(i32 %x) nounwind noinline {
}
define void @test_call(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %a = load i32 addrspace(1)* %in
- %b = load i32 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1)* %in
+ %b = load i32, i32 addrspace(1)* %b_ptr
%c = call i32 @defined_function(i32 %b) nounwind
%result = add i32 %a, %c
store i32 %result, i32 addrspace(1)* %out
diff --git a/test/CodeGen/R600/call_fs.ll b/test/CodeGen/R600/call_fs.ll
index db2cb6e5011c..87bebbc49d52 100644
--- a/test/CodeGen/R600/call_fs.ll
+++ b/test/CodeGen/R600/call_fs.ll
@@ -2,11 +2,11 @@
; RUN: llc < %s -march=r600 -mcpu=redwood -show-mc-encoding -o - | FileCheck --check-prefix=EG %s
; RUN: llc < %s -march=r600 -mcpu=rv710 -show-mc-encoding -o - | FileCheck --check-prefix=R600 %s
-; EG: {{^}}call_fs:
; EG: .long 257
+; EG: {{^}}call_fs:
; EG: CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0xc0,0x84]
-; R600: {{^}}call_fs:
; R600: .long 257
+; R600: {{^}}call_fs:
; R600:CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x89]
diff --git a/test/CodeGen/R600/coalescer_remat.ll b/test/CodeGen/R600/coalescer_remat.ll
new file mode 100644
index 000000000000..f78a77b36154
--- /dev/null
+++ b/test/CodeGen/R600/coalescer_remat.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs -o - %s | FileCheck %s
+target triple="amdgcn--"
+
+declare float @llvm.fma.f32(float, float, float)
+
+; This checks that rematerialization support of the coalescer does not
+; unnecessarily widen the register class. Without those fixes > 20 VGprs
+; are used here
+; Also check that some rematerialization of the 0 constant happened.
+; CHECK-LABEL: foobar
+; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0
+; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0
+; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0
+; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0
+; CHECK: ; NumVgprs: 12
+define void @foobar(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in, i32 %flag) {
+entry:
+ %cmpflag = icmp eq i32 %flag, 1
+ br i1 %cmpflag, label %loop, label %exit
+
+loop:
+ %c = phi i32 [0, %entry], [%cnext, %loop]
+ %v0 = phi float [0.0, %entry], [%fma.0, %loop]
+ %v1 = phi float [0.0, %entry], [%fma.1, %loop]
+ %v2 = phi float [0.0, %entry], [%fma.2, %loop]
+ %v3 = phi float [0.0, %entry], [%fma.3, %loop]
+
+ ; Try to get the 0 constant to get coalesced into a wide register
+ %blup = insertelement <4 x float> undef, float %v0, i32 0
+ store <4 x float> %blup, <4 x float> addrspace(1)* %out
+
+ %load = load <4 x float>, <4 x float> addrspace(1)* %in
+ %load.0 = extractelement <4 x float> %load, i32 0
+ %load.1 = extractelement <4 x float> %load, i32 1
+ %load.2 = extractelement <4 x float> %load, i32 2
+ %load.3 = extractelement <4 x float> %load, i32 3
+ %fma.0 = call float @llvm.fma.f32(float %v0, float %load.0, float %v0)
+ %fma.1 = call float @llvm.fma.f32(float %v1, float %load.1, float %v1)
+ %fma.2 = call float @llvm.fma.f32(float %v2, float %load.2, float %v2)
+ %fma.3 = call float @llvm.fma.f32(float %v3, float %load.3, float %v3)
+
+ %cnext = add nsw i32 %c, 1
+ %cmp = icmp eq i32 %cnext, 42
+ br i1 %cmp, label %exit, label %loop
+
+exit:
+ %ev0 = phi float [0.0, %entry], [%fma.0, %loop]
+ %ev1 = phi float [0.0, %entry], [%fma.1, %loop]
+ %ev2 = phi float [0.0, %entry], [%fma.2, %loop]
+ %ev3 = phi float [0.0, %entry], [%fma.3, %loop]
+ %dst.0 = insertelement <4 x float> undef, float %ev0, i32 0
+ %dst.1 = insertelement <4 x float> %dst.0, float %ev1, i32 1
+ %dst.2 = insertelement <4 x float> %dst.1, float %ev2, i32 2
+ %dst.3 = insertelement <4 x float> %dst.2, float %ev3, i32 3
+ store <4 x float> %dst.3, <4 x float> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll b/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll
index e16a397bb5a4..0aecc189e0bf 100644
--- a/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll
+++ b/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll
@@ -14,7 +14,7 @@ define void @test(i8 addrspace(1)* nocapture readonly %in, i32 %a, i8 %b) {
entry:
%0 = mul nsw i32 %a, 3
%1 = sext i32 %0 to i64
- %2 = getelementptr i8 addrspace(1)* %in, i64 %1
+ %2 = getelementptr i8, i8 addrspace(1)* %in, i64 %1
store i8 %b, i8 addrspace(1)* %2
ret void
}
diff --git a/test/CodeGen/R600/combine_vloads.ll b/test/CodeGen/R600/combine_vloads.ll
index 38420b25cba9..01572afa6205 100644
--- a/test/CodeGen/R600/combine_vloads.ll
+++ b/test/CodeGen/R600/combine_vloads.ll
@@ -23,7 +23,7 @@ for.body: ; preds = %for.body, %entry
%i.01 = phi i32 [ 0, %entry ], [ %tmp19, %for.body ]
%arrayidx_v4 = bitcast <8 x i8> addrspace(1)* %src to <32 x i8> addrspace(1)*
%0 = bitcast <32 x i8> addrspace(1)* %arrayidx_v4 to <8 x i32> addrspace(1)*
- %vecload2 = load <8 x i32> addrspace(1)* %0, align 32
+ %vecload2 = load <8 x i32>, <8 x i32> addrspace(1)* %0, align 32
%1 = bitcast <8 x i32> %vecload2 to <32 x i8>
%tmp5 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%tmp8 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -32,7 +32,7 @@ for.body: ; preds = %for.body, %entry
%tmp13 = add nsw <8 x i8> %tmp9, %tmp12
%tmp16 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%tmp17 = add nsw <8 x i8> %tmp13, %tmp16
- %scevgep = getelementptr <8 x i8> addrspace(1)* %result, i32 %i.01
+ %scevgep = getelementptr <8 x i8>, <8 x i8> addrspace(1)* %result, i32 %i.01
%2 = bitcast <8 x i8> %tmp17 to <2 x i32>
%3 = bitcast <8 x i8> addrspace(1)* %scevgep to <2 x i32> addrspace(1)*
store <2 x i32> %2, <2 x i32> addrspace(1)* %3, align 8
diff --git a/test/CodeGen/R600/commute-compares.ll b/test/CodeGen/R600/commute-compares.ll
new file mode 100644
index 000000000000..31766047a358
--- /dev/null
+++ b/test/CodeGen/R600/commute-compares.ll
@@ -0,0 +1,697 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+
+declare i32 @llvm.r600.read.tidig.x() #0
+
+; --------------------------------------------------------------------------------
+; i32 compares
+; --------------------------------------------------------------------------------
+
+; GCN-LABEL: {{^}}commute_eq_64_i32:
+; GCN: v_cmp_eq_i32_e32 vcc, 64, v{{[0-9]+}}
+define void @commute_eq_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp eq i32 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ne_64_i32:
+; GCN: v_cmp_ne_i32_e32 vcc, 64, v{{[0-9]+}}
+define void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp ne i32 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; FIXME: Why isn't this being folded as a constant?
+; GCN-LABEL: {{^}}commute_ne_litk_i32:
+; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3039
+; GCN: v_cmp_ne_i32_e32 vcc, [[K]], v{{[0-9]+}}
+define void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp ne i32 %val, 12345
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ugt_64_i32:
+; GCN: v_cmp_lt_u32_e32 vcc, 64, v{{[0-9]+}}
+define void @commute_ugt_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp ugt i32 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_uge_64_i32:
+; GCN: v_cmp_lt_u32_e32 vcc, 63, v{{[0-9]+}}
+define void @commute_uge_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp uge i32 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ult_64_i32:
+; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}}
+define void @commute_ult_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp ult i32 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ule_63_i32:
+; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}}
+define void @commute_ule_63_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp ule i32 %val, 63
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; FIXME: Undo canonicalization to gt (x + 1) since it doesn't use the inline imm
+
+; GCN-LABEL: {{^}}commute_ule_64_i32:
+; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x41{{$}}
+; GCN: v_cmp_gt_u32_e32 vcc, [[K]], v{{[0-9]+}}
+define void @commute_ule_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp ule i32 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_sgt_neg1_i32:
+; GCN: v_cmp_lt_i32_e32 vcc, -1, v{{[0-9]+}}
+define void @commute_sgt_neg1_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp sgt i32 %val, -1
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_sge_neg2_i32:
+; GCN: v_cmp_lt_i32_e32 vcc, -3, v{{[0-9]+}}
+define void @commute_sge_neg2_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp sge i32 %val, -2
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_slt_neg16_i32:
+; GCN: v_cmp_gt_i32_e32 vcc, -16, v{{[0-9]+}}
+define void @commute_slt_neg16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp slt i32 %val, -16
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_sle_5_i32:
+; GCN: v_cmp_gt_i32_e32 vcc, 6, v{{[0-9]+}}
+define void @commute_sle_5_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %gep.in
+ %cmp = icmp sle i32 %val, 5
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; --------------------------------------------------------------------------------
+; i64 compares
+; --------------------------------------------------------------------------------
+
+; GCN-LABEL: {{^}}commute_eq_64_i64:
+; GCN: v_cmp_eq_i64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_eq_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp eq i64 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ne_64_i64:
+; GCN: v_cmp_ne_i64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ne_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp ne i64 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ugt_64_i64:
+; GCN: v_cmp_lt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ugt_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp ugt i64 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_uge_64_i64:
+; GCN: v_cmp_lt_u64_e32 vcc, 63, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_uge_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp uge i64 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ult_64_i64:
+; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ult_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp ult i64 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ule_63_i64:
+; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ule_63_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp ule i64 %val, 63
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; FIXME: Undo canonicalization to gt (x + 1) since it doesn't use the inline imm
+
+; GCN-LABEL: {{^}}commute_ule_64_i64:
+; GCN-DAG: s_movk_i32 s[[KLO:[0-9]+]], 0x41{{$}}
+; GCN: v_cmp_gt_u64_e32 vcc, s{{\[}}[[KLO]]:{{[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ule_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp ule i64 %val, 64
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_sgt_neg1_i64:
+; GCN: v_cmp_lt_i64_e32 vcc, -1, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_sgt_neg1_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp sgt i64 %val, -1
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_sge_neg2_i64:
+; GCN: v_cmp_lt_i64_e32 vcc, -3, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_sge_neg2_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp sge i64 %val, -2
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_slt_neg16_i64:
+; GCN: v_cmp_gt_i64_e32 vcc, -16, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_slt_neg16_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp slt i64 %val, -16
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_sle_5_i64:
+; GCN: v_cmp_gt_i64_e32 vcc, 6, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_sle_5_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep.in
+ %cmp = icmp sle i64 %val, 5
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; --------------------------------------------------------------------------------
+; f32 compares
+; --------------------------------------------------------------------------------
+
+
+; GCN-LABEL: {{^}}commute_oeq_2.0_f32:
+; GCN: v_cmp_eq_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_oeq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp oeq float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+
+; GCN-LABEL: {{^}}commute_ogt_2.0_f32:
+; GCN: v_cmp_lt_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_ogt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp ogt float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_oge_2.0_f32:
+; GCN: v_cmp_le_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_oge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp oge float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_olt_2.0_f32:
+; GCN: v_cmp_gt_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_olt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp olt float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ole_2.0_f32:
+; GCN: v_cmp_ge_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_ole_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp ole float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_one_2.0_f32:
+; GCN: v_cmp_lg_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_one_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp one float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ord_2.0_f32:
+; GCN: v_cmp_o_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]]
+define void @commute_ord_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp ord float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ueq_2.0_f32:
+; GCN: v_cmp_nlg_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_ueq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp ueq float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ugt_2.0_f32:
+; GCN: v_cmp_nge_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_ugt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp ugt float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_uge_2.0_f32:
+; GCN: v_cmp_ngt_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_uge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp uge float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ult_2.0_f32:
+; GCN: v_cmp_nle_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_ult_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp ult float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ule_2.0_f32:
+; GCN: v_cmp_nlt_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_ule_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp ule float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_une_2.0_f32:
+; GCN: v_cmp_neq_f32_e32 vcc, 2.0, v{{[0-9]+}}
+define void @commute_une_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp une float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_uno_2.0_f32:
+; GCN: v_cmp_u_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]]
+define void @commute_uno_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load float, float addrspace(1)* %gep.in
+ %cmp = fcmp uno float %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; --------------------------------------------------------------------------------
+; f64 compares
+; --------------------------------------------------------------------------------
+
+
+; GCN-LABEL: {{^}}commute_oeq_2.0_f64:
+; GCN: v_cmp_eq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_oeq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp oeq double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+
+; GCN-LABEL: {{^}}commute_ogt_2.0_f64:
+; GCN: v_cmp_lt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ogt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp ogt double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_oge_2.0_f64:
+; GCN: v_cmp_le_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_oge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp oge double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_olt_2.0_f64:
+; GCN: v_cmp_gt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_olt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp olt double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ole_2.0_f64:
+; GCN: v_cmp_ge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ole_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp ole double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_one_2.0_f64:
+; GCN: v_cmp_lg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_one_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp one double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ord_2.0_f64:
+; GCN: v_cmp_o_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]]
+define void @commute_ord_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp ord double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ueq_2.0_f64:
+; GCN: v_cmp_nlg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ueq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp ueq double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ugt_2.0_f64:
+; GCN: v_cmp_nge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ugt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp ugt double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_uge_2.0_f64:
+; GCN: v_cmp_ngt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_uge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp uge double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ult_2.0_f64:
+; GCN: v_cmp_nle_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ult_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp ult double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_ule_2.0_f64:
+; GCN: v_cmp_nlt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_ule_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp ule double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_une_2.0_f64:
+; GCN: v_cmp_neq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
+define void @commute_une_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp une double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_uno_2.0_f64:
+; GCN: v_cmp_u_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]]
+define void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #0
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load double, double addrspace(1)* %gep.in
+ %cmp = fcmp uno double %val, 2.0
+ %ext = sext i1 %cmp to i32
+ store i32 %ext, i32 addrspace(1)* %gep.out
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/R600/commute_modifiers.ll b/test/CodeGen/R600/commute_modifiers.ll
index 6fddb6d595c9..7fc36eabb780 100644
--- a/test/CodeGen/R600/commute_modifiers.ll
+++ b/test/CodeGen/R600/commute_modifiers.ll
@@ -10,8 +10,8 @@ declare float @llvm.fma.f32(float, float, float) nounwind readnone
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %x = load float addrspace(1)* %gep.0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %x = load float, float addrspace(1)* %gep.0
%x.fabs = call float @llvm.fabs.f32(float %x) #1
%z = fadd float 2.0, %x.fabs
store float %z, float addrspace(1)* %out
@@ -24,8 +24,8 @@ define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %x = load float addrspace(1)* %gep.0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %x = load float, float addrspace(1)* %gep.0
%x.fabs = call float @llvm.fabs.f32(float %x) #1
%x.fneg.fabs = fsub float -0.000000e+00, %x.fabs
%z = fmul float 4.0, %x.fneg.fabs
@@ -39,8 +39,8 @@ define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrs
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %x = load float addrspace(1)* %gep.0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %x = load float, float addrspace(1)* %gep.0
%x.fneg = fsub float -0.000000e+00, %x
%z = fmul float 4.0, %x.fneg
store float %z, float addrspace(1)* %out
@@ -55,8 +55,8 @@ define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %x = load float addrspace(1)* %gep.0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %x = load float, float addrspace(1)* %gep.0
%x.fabs = call float @llvm.fabs.f32(float %x) #1
%z = fadd float 1024.0, %x.fabs
store float %z, float addrspace(1)* %out
@@ -70,10 +70,10 @@ define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %x = load float addrspace(1)* %gep.0
- %y = load float addrspace(1)* %gep.1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %x = load float, float addrspace(1)* %gep.0
+ %y = load float, float addrspace(1)* %gep.1
%y.fabs = call float @llvm.fabs.f32(float %y) #1
%z = fadd float %x, %y.fabs
store float %z, float addrspace(1)* %out
@@ -87,10 +87,10 @@ define void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)*
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %x = load float addrspace(1)* %gep.0
- %y = load float addrspace(1)* %gep.1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %x = load float, float addrspace(1)* %gep.0
+ %y = load float, float addrspace(1)* %gep.1
%y.fneg = fsub float -0.000000e+00, %y
%z = fmul float %x, %y.fneg
store float %z, float addrspace(1)* %out
@@ -104,10 +104,10 @@ define void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)*
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %x = load float addrspace(1)* %gep.0
- %y = load float addrspace(1)* %gep.1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %x = load float, float addrspace(1)* %gep.0
+ %y = load float, float addrspace(1)* %gep.1
%y.fabs = call float @llvm.fabs.f32(float %y) #1
%y.fabs.fneg = fsub float -0.000000e+00, %y.fabs
%z = fmul float %x, %y.fabs.fneg
@@ -123,10 +123,10 @@ define void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %x = load float addrspace(1)* %gep.0
- %y = load float addrspace(1)* %gep.1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %x = load float, float addrspace(1)* %gep.0
+ %y = load float, float addrspace(1)* %gep.1
%x.fabs = call float @llvm.fabs.f32(float %x) #1
%y.fabs = call float @llvm.fabs.f32(float %y) #1
%z = fmul float %x.fabs, %y.fabs
@@ -141,10 +141,10 @@ define void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrs
; SI-NEXT: buffer_store_dword [[REG]]
define void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %x = load float addrspace(1)* %gep.0
- %y = load float addrspace(1)* %gep.1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %x = load float, float addrspace(1)* %gep.0
+ %y = load float, float addrspace(1)* %gep.1
%x.fabs = call float @llvm.fabs.f32(float %x) #1
%y.fabs = call float @llvm.fabs.f32(float %y) #1
%y.fabs.fneg = fsub float -0.000000e+00, %y.fabs
@@ -163,12 +163,12 @@ define void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float
; SI: buffer_store_dword [[RESULT]]
define void @fma_a_2.0_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r1 = load float addrspace(1)* %gep.0
- %r2 = load float addrspace(1)* %gep.1
+ %r1 = load float, float addrspace(1)* %gep.0
+ %r2 = load float, float addrspace(1)* %gep.1
%r2.fabs = call float @llvm.fabs.f32(float %r2)
diff --git a/test/CodeGen/R600/concat_vectors.ll b/test/CodeGen/R600/concat_vectors.ll
index b27bed3d4265..a09ed1f73857 100644
--- a/test/CodeGen/R600/concat_vectors.ll
+++ b/test/CodeGen/R600/concat_vectors.ll
@@ -288,7 +288,7 @@ define void @test_concat_v16i16(<32 x i16> addrspace(1)* %out, <16 x i16> %a, <1
; SI: s_endpgm
define void @concat_vector_crash(<8 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
bb:
- %tmp = load <2 x float> addrspace(1)* %in, align 4
+ %tmp = load <2 x float>, <2 x float> addrspace(1)* %in, align 4
%tmp1 = shufflevector <2 x float> %tmp, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%tmp2 = shufflevector <8 x float> undef, <8 x float> %tmp1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
store <8 x float> %tmp2, <8 x float> addrspace(1)* %out, align 32
diff --git a/test/CodeGen/R600/copy-illegal-type.ll b/test/CodeGen/R600/copy-illegal-type.ll
index 56c43d23b4a1..8b397566066a 100644
--- a/test/CodeGen/R600/copy-illegal-type.ll
+++ b/test/CodeGen/R600/copy-illegal-type.ll
@@ -6,7 +6,7 @@
; SI: buffer_store_dword [[REG]]
; SI: s_endpgm
define void @test_copy_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
- %val = load <4 x i8> addrspace(1)* %in, align 4
+ %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
ret void
}
@@ -17,7 +17,7 @@ define void @test_copy_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)*
; SI: buffer_store_dword [[REG]]
; SI: s_endpgm
define void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
- %val = load <4 x i8> addrspace(1)* %in, align 4
+ %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
ret void
@@ -30,7 +30,7 @@ define void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
; SI: buffer_store_dword [[REG]]
; SI: s_endpgm
define void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
- %val = load <4 x i8> addrspace(1)* %in, align 4
+ %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out2, align 4
@@ -45,7 +45,7 @@ define void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
; SI: buffer_store_dword [[REG]]
; SI: s_endpgm
define void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %out3, <4 x i8> addrspace(1)* %in) nounwind {
- %val = load <4 x i8> addrspace(1)* %in, align 4
+ %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out2, align 4
@@ -82,7 +82,7 @@ define void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
; SI: s_endpgm
define void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
- %val = load <4 x i8> addrspace(1)* %in, align 4
+ %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
%add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
store <4 x i8> %add, <4 x i8> addrspace(1)* %out1, align 4
@@ -120,7 +120,7 @@ define void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> add
; SI: s_endpgm
define void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
- %val = load <4 x i8> addrspace(1)* %in, align 4
+ %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
%add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
store <4 x i8> %add, <4 x i8> addrspace(1)* %out1, align 4
@@ -133,7 +133,7 @@ define void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8>
; SI-NOT: bfi
; SI: s_endpgm
define void @test_copy_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) nounwind {
- %val = load <3 x i8> addrspace(1)* %in, align 4
+ %val = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 4
store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 4
ret void
}
@@ -145,7 +145,7 @@ define void @test_copy_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)*
; SI: buffer_load_ubyte
; SI: s_endpgm
define void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
- %val = load volatile <4 x i8> addrspace(1)* %in, align 4
+ %val = load volatile <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
ret void
}
@@ -161,7 +161,7 @@ define void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8>
; SI: buffer_store_byte
; SI: s_endpgm
define void @test_copy_v4i8_volatile_store(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
- %val = load <4 x i8> addrspace(1)* %in, align 4
+ %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
store volatile <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
ret void
}
diff --git a/test/CodeGen/R600/copy-to-reg.ll b/test/CodeGen/R600/copy-to-reg.ll
index 9c1de73b3b1b..fc875f6ef7a3 100644
--- a/test/CodeGen/R600/copy-to-reg.ll
+++ b/test/CodeGen/R600/copy-to-reg.ll
@@ -13,15 +13,15 @@ entry:
loop:
%inc = phi i32 [0, %entry], [%inc.i, %loop]
- %ptr = getelementptr [16 x i32]* %alloca, i32 0, i32 %inc
+ %ptr = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %inc
store i32 %inc, i32* %ptr
%inc.i = add i32 %inc, 1
%cnd = icmp uge i32 %inc.i, 16
br i1 %cnd, label %done, label %loop
done:
- %tmp0 = getelementptr [16 x i32]* %alloca, i32 0, i32 0
- %tmp1 = load i32* %tmp0
+ %tmp0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 0
+ %tmp1 = load i32, i32* %tmp0
store i32 %tmp1, i32 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/ctlz_zero_undef.ll b/test/CodeGen/R600/ctlz_zero_undef.ll
index 1a4317b8095c..bd26c302fe5a 100644
--- a/test/CodeGen/R600/ctlz_zero_undef.ll
+++ b/test/CodeGen/R600/ctlz_zero_undef.ll
@@ -28,7 +28,7 @@ define void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nou
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
- %val = load i32 addrspace(1)* %valptr, align 4
+ %val = load i32, i32 addrspace(1)* %valptr, align 4
%ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
store i32 %ctlz, i32 addrspace(1)* %out, align 4
ret void
@@ -44,7 +44,7 @@ define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
- %val = load <2 x i32> addrspace(1)* %valptr, align 8
+ %val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr, align 8
%ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8
ret void
@@ -64,7 +64,7 @@ define void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
; EG: FFBH_UINT {{\*? *}}[[RESULT]]
define void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
- %val = load <4 x i32> addrspace(1)* %valptr, align 16
+ %val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr, align 16
%ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16
ret void
diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll
index c64f443ad697..0a031c5e24d1 100644
--- a/test/CodeGen/R600/ctpop.ll
+++ b/test/CodeGen/R600/ctpop.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
@@ -8,11 +9,11 @@ declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone
declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone
; FUNC-LABEL: {{^}}s_ctpop_i32:
-; SI: s_load_dword [[SVAL:s[0-9]+]],
-; SI: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]]
-; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: buffer_store_dword [[VRESULT]],
-; SI: s_endpgm
+; GCN: s_load_dword [[SVAL:s[0-9]+]],
+; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]]
+; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; GCN: buffer_store_dword [[VRESULT]],
+; GCN: s_endpgm
; EG: BCNT_INT
define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
@@ -23,32 +24,33 @@ define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
; XXX - Why 0 in register?
; FUNC-LABEL: {{^}}v_ctpop_i32:
-; SI: buffer_load_dword [[VAL:v[0-9]+]],
-; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; GCN: buffer_load_dword [[VAL:v[0-9]+]],
+; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
store i32 %ctpop, i32 addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32:
-; SI: buffer_load_dword [[VAL0:v[0-9]+]],
-; SI: buffer_load_dword [[VAL1:v[0-9]+]],
-; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0
-; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; GCN: buffer_load_dword [[VAL1:v[0-9]+]],
+; GCN: buffer_load_dword [[VAL0:v[0-9]+]],
+; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0
+; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
+; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind {
- %val0 = load i32 addrspace(1)* %in0, align 4
- %val1 = load i32 addrspace(1)* %in1, align 4
+ %val0 = load i32, i32 addrspace(1)* %in0, align 4
+ %val1 = load i32, i32 addrspace(1)* %in1, align 4
%ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
%ctpop1 = call i32 @llvm.ctpop.i32(i32 %val1) nounwind readnone
%add = add i32 %ctpop0, %ctpop1
@@ -57,13 +59,13 @@ define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace
}
; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32:
-; SI: buffer_load_dword [[VAL0:v[0-9]+]],
-; SI-NEXT: s_waitcnt
-; SI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
-; SI-NEXT: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; GCN: buffer_load_dword [[VAL0:v[0-9]+]],
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
+; GCN-NEXT: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
- %val0 = load i32 addrspace(1)* %in0, align 4
+ %val0 = load i32, i32 addrspace(1)* %in0, align 4
%ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
%add = add i32 %ctpop0, %sval
store i32 %add, i32 addrspace(1)* %out, align 4
@@ -71,47 +73,47 @@ define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(
}
; FUNC-LABEL: {{^}}v_ctpop_v2i32:
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: s_endpgm
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind {
- %val = load <2 x i32> addrspace(1)* %in, align 8
+ %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
%ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone
store <2 x i32> %ctpop, <2 x i32> addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: {{^}}v_ctpop_v4i32:
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: s_endpgm
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
; EG: BCNT_INT
; EG: BCNT_INT
define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind {
- %val = load <4 x i32> addrspace(1)* %in, align 16
+ %val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16
%ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone
store <4 x i32> %ctpop, <4 x i32> addrspace(1)* %out, align 16
ret void
}
; FUNC-LABEL: {{^}}v_ctpop_v8i32:
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: s_endpgm
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
@@ -122,30 +124,30 @@ define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrs
; EG: BCNT_INT
; EG: BCNT_INT
define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind {
- %val = load <8 x i32> addrspace(1)* %in, align 32
+ %val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32
%ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone
store <8 x i32> %ctpop, <8 x i32> addrspace(1)* %out, align 32
ret void
}
; FUNC-LABEL: {{^}}v_ctpop_v16i32:
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: v_bcnt_u32_b32_e64
-; SI: s_endpgm
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: v_bcnt_u32_b32_e64
+; GCN: s_endpgm
; EG: BCNT_INT
; EG: BCNT_INT
@@ -164,21 +166,21 @@ define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrs
; EG: BCNT_INT
; EG: BCNT_INT
define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind {
- %val = load <16 x i32> addrspace(1)* %in, align 32
+ %val = load <16 x i32>, <16 x i32> addrspace(1)* %in, align 32
%ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone
store <16 x i32> %ctpop, <16 x i32> addrspace(1)* %out, align 32
ret void
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant:
-; SI: buffer_load_dword [[VAL:v[0-9]+]],
-; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; GCN: buffer_load_dword [[VAL:v[0-9]+]],
+; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 %ctpop, 4
store i32 %add, i32 addrspace(1)* %out, align 4
@@ -186,14 +188,14 @@ define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv:
-; SI: buffer_load_dword [[VAL:v[0-9]+]],
-; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; GCN: buffer_load_dword [[VAL:v[0-9]+]],
+; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 4, %ctpop
store i32 %add, i32 addrspace(1)* %out, align 4
@@ -201,13 +203,14 @@ define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out,
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_literal:
-; SI: buffer_load_dword [[VAL:v[0-9]+]],
-; SI: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f
+; GCN: buffer_load_dword [[VAL:v[0-9]+]],
+; GCN: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 %ctpop, 99999
store i32 %add, i32 addrspace(1)* %out, align 4
@@ -215,15 +218,15 @@ define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspa
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_var:
-; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
-; SI-DAG: s_load_dword [[VAR:s[0-9]+]],
-; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]],
+; GCN-DAG: s_load_dword [[VAR:s[0-9]+]],
+; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 %ctpop, %const
store i32 %add, i32 addrspace(1)* %out, align 4
@@ -231,15 +234,15 @@ define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv:
-; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
-; SI-DAG: s_load_dword [[VAR:s[0-9]+]],
-; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]],
+; GCN-DAG: s_load_dword [[VAR:s[0-9]+]],
+; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
%add = add i32 %const, %ctpop
store i32 %add, i32 addrspace(1)* %out, align 4
@@ -247,18 +250,19 @@ define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspa
}
; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv:
-; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}}
-; SI-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:16
+; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}}
+; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:16
; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
; EG: BCNT_INT
define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
- %gep = getelementptr i32 addrspace(1)* %constptr, i32 4
- %const = load i32 addrspace(1)* %gep, align 4
+ %gep = getelementptr i32, i32 addrspace(1)* %constptr, i32 4
+ %const = load i32, i32 addrspace(1)* %gep, align 4
%add = add i32 %const, %ctpop
store i32 %add, i32 addrspace(1)* %out, align 4
ret void
@@ -269,10 +273,11 @@ define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrsp
; FUNC-LABEL: {{^}}ctpop_i32_in_br:
; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd
-; SI: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
-; SI: v_mov_b32_e32 [[RESULT]], [[SRESULT]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x34
+; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
+; GCN: v_mov_b32_e32 [[RESULT]], [[SRESULT]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
; EG: BCNT_INT
define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, i32 %cond) {
entry:
@@ -284,8 +289,8 @@ if:
br label %endif
else:
- %tmp3 = getelementptr i32 addrspace(1)* %in, i32 1
- %tmp4 = load i32 addrspace(1)* %tmp3
+ %tmp3 = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %tmp4 = load i32, i32 addrspace(1)* %tmp3
br label %endif
endif:
diff --git a/test/CodeGen/R600/ctpop64.ll b/test/CodeGen/R600/ctpop64.ll
index 9758ac96ea9b..e1a0ee3ea217 100644
--- a/test/CodeGen/R600/ctpop64.ll
+++ b/test/CodeGen/R600/ctpop64.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone
@@ -8,10 +9,11 @@ declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>) nounwind readnone
; FUNC-LABEL: {{^}}s_ctpop_i64:
; SI: s_load_dwordx2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: s_bcnt1_i32_b64 [[SRESULT:s[0-9]+]], [[SVAL]]
-; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: buffer_store_dword [[VRESULT]],
-; SI: s_endpgm
+; VI: s_load_dwordx2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; GCN: s_bcnt1_i32_b64 [[SRESULT:s[0-9]+]], [[SVAL]]
+; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; GCN: buffer_store_dword [[VRESULT]],
+; GCN: s_endpgm
define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
%truncctpop = trunc i64 %ctpop to i32
@@ -20,13 +22,14 @@ define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
}
; FUNC-LABEL: {{^}}v_ctpop_i64:
-; SI: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
-; SI: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
+; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
+; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
- %val = load i64 addrspace(1)* %in, align 8
+ %val = load i64, i64 addrspace(1)* %in, align 8
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
%truncctpop = trunc i64 %ctpop to i32
store i32 %truncctpop, i32 addrspace(1)* %out, align 4
@@ -34,9 +37,9 @@ define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noali
}
; FUNC-LABEL: {{^}}s_ctpop_v2i64:
-; SI: s_bcnt1_i32_b64
-; SI: s_bcnt1_i32_b64
-; SI: s_endpgm
+; GCN: s_bcnt1_i32_b64
+; GCN: s_bcnt1_i32_b64
+; GCN: s_endpgm
define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val) nounwind {
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
%truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
@@ -45,11 +48,11 @@ define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val)
}
; FUNC-LABEL: {{^}}s_ctpop_v4i64:
-; SI: s_bcnt1_i32_b64
-; SI: s_bcnt1_i32_b64
-; SI: s_bcnt1_i32_b64
-; SI: s_bcnt1_i32_b64
-; SI: s_endpgm
+; GCN: s_bcnt1_i32_b64
+; GCN: s_bcnt1_i32_b64
+; GCN: s_bcnt1_i32_b64
+; GCN: s_bcnt1_i32_b64
+; GCN: s_endpgm
define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val) nounwind {
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
%truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
@@ -58,13 +61,13 @@ define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val)
}
; FUNC-LABEL: {{^}}v_ctpop_v2i64:
-; SI: v_bcnt_u32_b32
-; SI: v_bcnt_u32_b32
-; SI: v_bcnt_u32_b32
-; SI: v_bcnt_u32_b32
-; SI: s_endpgm
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: s_endpgm
define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind {
- %val = load <2 x i64> addrspace(1)* %in, align 16
+ %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
%truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
store <2 x i32> %truncctpop, <2 x i32> addrspace(1)* %out, align 8
@@ -72,17 +75,17 @@ define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrs
}
; FUNC-LABEL: {{^}}v_ctpop_v4i64:
-; SI: v_bcnt_u32_b32
-; SI: v_bcnt_u32_b32
-; SI: v_bcnt_u32_b32
-; SI: v_bcnt_u32_b32
-; SI: v_bcnt_u32_b32
-; SI: v_bcnt_u32_b32
-; SI: v_bcnt_u32_b32
-; SI: v_bcnt_u32_b32
-; SI: s_endpgm
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: v_bcnt_u32_b32
+; GCN: s_endpgm
define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind {
- %val = load <4 x i64> addrspace(1)* %in, align 32
+ %val = load <4 x i64>, <4 x i64> addrspace(1)* %in, align 32
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
%truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
store <4 x i32> %truncctpop, <4 x i32> addrspace(1)* %out, align 16
@@ -94,11 +97,12 @@ define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrs
; FUNC-LABEL: {{^}}ctpop_i64_in_br:
; SI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
-; SI: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
-; SI: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
-; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
-; SI: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
-; SI: s_endpgm
+; VI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x34
+; GCN: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
+; GCN: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
+; GCN: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
+; GCN: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
+; GCN: s_endpgm
define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) {
entry:
%tmp0 = icmp eq i32 %cond, 0
@@ -109,8 +113,8 @@ if:
br label %endif
else:
- %tmp3 = getelementptr i64 addrspace(1)* %in, i32 1
- %tmp4 = load i64 addrspace(1)* %tmp3
+ %tmp3 = getelementptr i64, i64 addrspace(1)* %in, i32 1
+ %tmp4 = load i64, i64 addrspace(1)* %tmp3
br label %endif
endif:
diff --git a/test/CodeGen/R600/cttz-ctlz.ll b/test/CodeGen/R600/cttz-ctlz.ll
deleted file mode 100644
index c957a033c5d7..000000000000
--- a/test/CodeGen/R600/cttz-ctlz.ll
+++ /dev/null
@@ -1,225 +0,0 @@
-; RUN: opt -S -codegenprepare -mtriple=r600-unknown-unknown -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s
-; RUN: opt -S -codegenprepare -mtriple=r600-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s
-
-
-define i64 @test1(i64 %A) {
-; ALL-LABEL: @test1(
-; SI: [[CTLZ:%[A-Za-z0-9]+]] = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
-; SI-NEXT: ret i64 [[CTLZ]]
-entry:
- %tobool = icmp eq i64 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
- ret i64 %cond
-}
-
-
-define i32 @test2(i32 %A) {
-; ALL-LABEL: @test2(
-; SI: [[CTLZ:%[A-Za-z0-9]+]] = call i32 @llvm.ctlz.i32(i32 %A, i1 false)
-; SI-NEXT: ret i32 [[CTLZ]]
-entry:
- %tobool = icmp eq i32 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
- ret i32 %cond
-}
-
-
-define signext i16 @test3(i16 signext %A) {
-; ALL-LABEL: @test3(
-; SI: [[CTLZ:%[A-Za-z0-9]+]] = call i16 @llvm.ctlz.i16(i16 %A, i1 false)
-; SI-NEXT: ret i16 [[CTLZ]]
-entry:
- %tobool = icmp eq i16 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
- ret i16 %cond
-}
-
-
-define i64 @test1b(i64 %A) {
-; ALL-LABEL: @test1b(
-; SI: [[CTTZ:%[A-Za-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %A, i1 false)
-; SI-NEXT: ret i64 [[CTTZ]]
-entry:
- %tobool = icmp eq i64 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
- ret i64 %cond
-}
-
-
-define i32 @test2b(i32 %A) {
-; ALL-LABEL: @test2b(
-; SI: [[CTTZ:%[A-Za-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %A, i1 false)
-; SI-NEXT: ret i32 [[CTTZ]]
-entry:
- %tobool = icmp eq i32 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
- ret i32 %cond
-}
-
-
-define signext i16 @test3b(i16 signext %A) {
-; ALL-LABEL: @test3b(
-; SI: [[CTTZ:%[A-Za-z0-9]+]] = call i16 @llvm.cttz.i16(i16 %A, i1 false)
-; SI-NEXT: ret i16 [[CTTZ]]
-entry:
- %tobool = icmp eq i16 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
- ret i16 %cond
-}
-
-
-define i64 @test1c(i64 %A) {
-; ALL-LABEL: @test1c(
-; ALL: icmp eq i64 %A, 0
-; ALL: call i64 @llvm.ctlz.i64(i64 %A, i1 true)
-entry:
- %tobool = icmp eq i64 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ]
- ret i64 %cond
-}
-
-define i32 @test2c(i32 %A) {
-; ALL-LABEL: @test2c(
-; ALL: icmp eq i32 %A, 0
-; ALL: call i32 @llvm.ctlz.i32(i32 %A, i1 true)
-entry:
- %tobool = icmp eq i32 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ]
- ret i32 %cond
-}
-
-
-define signext i16 @test3c(i16 signext %A) {
-; ALL-LABEL: @test3c(
-; ALL: icmp eq i16 %A, 0
-; ALL: call i16 @llvm.ctlz.i16(i16 %A, i1 true)
-entry:
- %tobool = icmp eq i16 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ]
- ret i16 %cond
-}
-
-
-define i64 @test1d(i64 %A) {
-; ALL-LABEL: @test1d(
-; ALL: icmp eq i64 %A, 0
-; ALL: call i64 @llvm.cttz.i64(i64 %A, i1 true)
-entry:
- %tobool = icmp eq i64 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ]
- ret i64 %cond
-}
-
-
-define i32 @test2d(i32 %A) {
-; ALL-LABEL: @test2d(
-; ALL: icmp eq i32 %A, 0
-; ALL: call i32 @llvm.cttz.i32(i32 %A, i1 true)
-entry:
- %tobool = icmp eq i32 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ]
- ret i32 %cond
-}
-
-
-define signext i16 @test3d(i16 signext %A) {
-; ALL-LABEL: @test3d(
-; ALL: icmp eq i16 %A, 0
-; ALL: call i16 @llvm.cttz.i16(i16 %A, i1 true)
-entry:
- %tobool = icmp eq i16 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ]
- ret i16 %cond
-}
-
-
-declare i64 @llvm.ctlz.i64(i64, i1)
-declare i32 @llvm.ctlz.i32(i32, i1)
-declare i16 @llvm.ctlz.i16(i16, i1)
-declare i64 @llvm.cttz.i64(i64, i1)
-declare i32 @llvm.cttz.i32(i32, i1)
-declare i16 @llvm.cttz.i16(i16, i1)
diff --git a/test/CodeGen/R600/cttz_zero_undef.ll b/test/CodeGen/R600/cttz_zero_undef.ll
index d9d284c58865..56fcb51fe14e 100644
--- a/test/CodeGen/R600/cttz_zero_undef.ll
+++ b/test/CodeGen/R600/cttz_zero_undef.ll
@@ -28,7 +28,7 @@ define void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nou
; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
- %val = load i32 addrspace(1)* %valptr, align 4
+ %val = load i32, i32 addrspace(1)* %valptr, align 4
%cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
store i32 %cttz, i32 addrspace(1)* %out, align 4
ret void
@@ -44,7 +44,7 @@ define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace
; EG: FFBL_INT {{\*? *}}[[RESULT]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
define void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
- %val = load <2 x i32> addrspace(1)* %valptr, align 8
+ %val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr, align 8
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
store <2 x i32> %cttz, <2 x i32> addrspace(1)* %out, align 8
ret void
@@ -64,7 +64,7 @@ define void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x
; EG: FFBL_INT {{\*? *}}[[RESULT]]
; EG: FFBL_INT {{\*? *}}[[RESULT]]
define void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
- %val = load <4 x i32> addrspace(1)* %valptr, align 16
+ %val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr, align 16
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
store <4 x i32> %cttz, <4 x i32> addrspace(1)* %out, align 16
ret void
diff --git a/test/CodeGen/R600/cvt_f32_ubyte.ll b/test/CodeGen/R600/cvt_f32_ubyte.ll
index 69eea5919c05..3399d9da29e3 100644
--- a/test/CodeGen/R600/cvt_f32_ubyte.ll
+++ b/test/CodeGen/R600/cvt_f32_ubyte.ll
@@ -8,7 +8,7 @@
; SI: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[LOADREG]]
; SI: buffer_store_dword [[CONV]],
define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
- %load = load i8 addrspace(1)* %in, align 1
+ %load = load i8, i8 addrspace(1)* %in, align 1
%cvt = uitofp i8 %load to float
store float %cvt, float addrspace(1)* %out, align 4
ret void
@@ -23,7 +23,7 @@ define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* n
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <2 x i8> addrspace(1)* %in, align 2
+ %load = load <2 x i8>, <2 x i8> addrspace(1)* %in, align 2
%cvt = uitofp <2 x i8> %load to <2 x float>
store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16
ret void
@@ -37,7 +37,7 @@ define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8>
; SI-DAG: v_cvt_f32_ubyte0_e32
; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <3 x i8> addrspace(1)* %in, align 4
+ %load = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 4
%cvt = uitofp <3 x i8> %load to <3 x float>
store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16
ret void
@@ -53,7 +53,7 @@ define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8>
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <4 x i8> addrspace(1)* %in, align 4
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
%cvt = uitofp <4 x i8> %load to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
ret void
@@ -63,10 +63,10 @@ define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8>
; position in the word for the component.
; SI-LABEL: {{^}}load_v4i8_to_v4f32_unaligned:
-; SI: buffer_load_ubyte [[LOADREG0:v[0-9]+]]
-; SI: buffer_load_ubyte [[LOADREG1:v[0-9]+]]
-; SI: buffer_load_ubyte [[LOADREG2:v[0-9]+]]
; SI: buffer_load_ubyte [[LOADREG3:v[0-9]+]]
+; SI: buffer_load_ubyte [[LOADREG2:v[0-9]+]]
+; SI: buffer_load_ubyte [[LOADREG1:v[0-9]+]]
+; SI: buffer_load_ubyte [[LOADREG0:v[0-9]+]]
; SI-NOT: v_lshlrev_b32
; SI-NOT: v_or_b32
@@ -77,7 +77,7 @@ define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8>
; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <4 x i8> addrspace(1)* %in, align 1
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <4 x i8> %load to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
ret void
@@ -105,7 +105,7 @@ define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out
; XSI: v_cvt_f32_u32_e32
; SI: s_endpgm
define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <4 x i8> addrspace(1)* %in, align 4
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
%cvt = uitofp <4 x i8> %load to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
%add = add <4 x i8> %load, <i8 9, i8 9, i8 9, i8 9> ; Second use of %load
@@ -117,7 +117,7 @@ define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <
; SI-LABEL: {{^}}load_v7i8_to_v7f32:
; SI: s_endpgm
define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <7 x i8> addrspace(1)* %in, align 1
+ %load = load <7 x i8>, <7 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <7 x i8> %load to <7 x float>
store <7 x float> %cvt, <7 x float> addrspace(1)* %out, align 16
ret void
@@ -146,7 +146,7 @@ define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8>
; SI: buffer_store_dword
; SI: buffer_store_dword
define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <8 x i8> addrspace(1)* %in, align 8
+ %load = load <8 x i8>, <8 x i8> addrspace(1)* %in, align 8
%cvt = uitofp <8 x i8> %load to <8 x float>
store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16
ret void
@@ -158,7 +158,7 @@ define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8>
; SI-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]]
; SI: buffer_store_dword [[CONV]],
define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%add = add i32 %load, 2
%inreg = and i32 %add, 255
%cvt = uitofp i32 %inreg to float
@@ -168,7 +168,7 @@ define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addr
; SI-LABEL: {{^}}i8_zext_inreg_hi1_to_f32:
define void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%inreg = and i32 %load, 65280
%shr = lshr i32 %inreg, 8
%cvt = uitofp i32 %shr to float
@@ -180,7 +180,7 @@ define void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addr
; We don't get these ones because of the zext, but instcombine removes
; them so it shouldn't really matter.
define void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
- %load = load i8 addrspace(1)* %in, align 1
+ %load = load i8, i8 addrspace(1)* %in, align 1
%ext = zext i8 %load to i32
%cvt = uitofp i32 %ext to float
store float %cvt, float addrspace(1)* %out, align 4
@@ -188,7 +188,7 @@ define void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1
}
define void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <4 x i8> addrspace(1)* %in, align 1
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 1
%ext = zext <4 x i8> %load to <4 x i32>
%cvt = uitofp <4 x i32> %ext to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
diff --git a/test/CodeGen/R600/cvt_flr_i32_f32.ll b/test/CodeGen/R600/cvt_flr_i32_f32.ll
new file mode 100644
index 000000000000..2dd3a9f2a776
--- /dev/null
+++ b/test/CodeGen/R600/cvt_flr_i32_f32.ll
@@ -0,0 +1,86 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare float @llvm.fabs.f32(float) #1
+declare float @llvm.floor.f32(float) #1
+
+; FUNC-LABEL: {{^}}cvt_flr_i32_f32_0:
+; SI-SAFE-NOT: v_cvt_flr_i32_f32
+; SI-NOT: add
+; SI-NONAN: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+; SI: s_endpgm
+define void @cvt_flr_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 {
+ %floor = call float @llvm.floor.f32(float %x) #1
+ %cvt = fptosi float %floor to i32
+ store i32 %cvt, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}cvt_flr_i32_f32_1:
+; SI: v_add_f32_e64 [[TMP:v[0-9]+]], 1.0, s{{[0-9]+}}
+; SI-SAFE-NOT: v_cvt_flr_i32_f32
+; SI-NONAN: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, [[TMP]]
+; SI: s_endpgm
+define void @cvt_flr_i32_f32_1(i32 addrspace(1)* %out, float %x) #0 {
+ %fadd = fadd float %x, 1.0
+ %floor = call float @llvm.floor.f32(float %fadd) #1
+ %cvt = fptosi float %floor to i32
+ store i32 %cvt, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fabs:
+; SI-NOT: add
+; SI-SAFE-NOT: v_cvt_flr_i32_f32
+; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|
+; SI: s_endpgm
+define void @cvt_flr_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 {
+ %x.fabs = call float @llvm.fabs.f32(float %x) #1
+ %floor = call float @llvm.floor.f32(float %x.fabs) #1
+ %cvt = fptosi float %floor to i32
+ store i32 %cvt, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fneg:
+; SI-NOT: add
+; SI-SAFE-NOT: v_cvt_flr_i32_f32
+; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}
+; SI: s_endpgm
+define void @cvt_flr_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 {
+ %x.fneg = fsub float -0.000000e+00, %x
+ %floor = call float @llvm.floor.f32(float %x.fneg) #1
+ %cvt = fptosi float %floor to i32
+ store i32 %cvt, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}cvt_flr_i32_f32_fabs_fneg:
+; SI-NOT: add
+; SI-SAFE-NOT: v_cvt_flr_i32_f32
+; SI-NONAN: v_cvt_flr_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}|
+; SI: s_endpgm
+define void @cvt_flr_i32_f32_fabs_fneg(i32 addrspace(1)* %out, float %x) #0 {
+ %x.fabs = call float @llvm.fabs.f32(float %x) #1
+ %x.fabs.fneg = fsub float -0.000000e+00, %x.fabs
+ %floor = call float @llvm.floor.f32(float %x.fabs.fneg) #1
+ %cvt = fptosi float %floor to i32
+ store i32 %cvt, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}no_cvt_flr_i32_f32_0:
+; SI-NOT: v_cvt_flr_i32_f32
+; SI: v_floor_f32
+; SI: v_cvt_u32_f32_e32
+; SI: s_endpgm
+define void @no_cvt_flr_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 {
+ %floor = call float @llvm.floor.f32(float %x) #1
+ %cvt = fptoui float %floor to i32
+ store i32 %cvt, i32 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/cvt_rpi_i32_f32.ll b/test/CodeGen/R600/cvt_rpi_i32_f32.ll
new file mode 100644
index 000000000000..864ac40260b3
--- /dev/null
+++ b/test/CodeGen/R600/cvt_rpi_i32_f32.ll
@@ -0,0 +1,83 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+
+declare float @llvm.fabs.f32(float) #1
+declare float @llvm.floor.f32(float) #1
+
+; FUNC-LABEL: {{^}}cvt_rpi_i32_f32:
+; SI-SAFE-NOT: v_cvt_rpi_i32_f32
+; SI-NONAN: v_cvt_rpi_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+; SI: s_endpgm
+define void @cvt_rpi_i32_f32(i32 addrspace(1)* %out, float %x) #0 {
+ %fadd = fadd float %x, 0.5
+ %floor = call float @llvm.floor.f32(float %fadd) #1
+ %cvt = fptosi float %floor to i32
+ store i32 %cvt, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fabs:
+; SI-SAFE-NOT: v_cvt_rpi_i32_f32
+; SI-NONAN: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|{{$}}
+; SI: s_endpgm
+define void @cvt_rpi_i32_f32_fabs(i32 addrspace(1)* %out, float %x) #0 {
+ %x.fabs = call float @llvm.fabs.f32(float %x) #1
+ %fadd = fadd float %x.fabs, 0.5
+ %floor = call float @llvm.floor.f32(float %fadd) #1
+ %cvt = fptosi float %floor to i32
+ store i32 %cvt, i32 addrspace(1)* %out
+ ret void
+}
+
+; FIXME: This doesn't work because it forms fsub 0.5, x
+; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fneg:
+; XSI-NONAN: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}
+; SI: v_sub_f32_e64 [[TMP:v[0-9]+]], 0.5, s{{[0-9]+}}
+; SI-SAFE-NOT: v_cvt_flr_i32_f32
+; SI-NONAN: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]]
+; SI: s_endpgm
+define void @cvt_rpi_i32_f32_fneg(i32 addrspace(1)* %out, float %x) #0 {
+ %x.fneg = fsub float -0.000000e+00, %x
+ %fadd = fadd float %x.fneg, 0.5
+ %floor = call float @llvm.floor.f32(float %fadd) #1
+ %cvt = fptosi float %floor to i32
+ store i32 %cvt, i32 addrspace(1)* %out
+ ret void
+}
+
+; FIXME: This doesn't work for same reason as above
+; FUNC-LABEL: {{^}}cvt_rpi_i32_f32_fabs_fneg:
+; SI-SAFE-NOT: v_cvt_rpi_i32_f32
+; XSI-NONAN: v_cvt_rpi_i32_f32_e64 v{{[0-9]+}}, -|s{{[0-9]+}}|
+
+; SI: v_sub_f32_e64 [[TMP:v[0-9]+]], 0.5, |s{{[0-9]+}}|
+; SI-SAFE-NOT: v_cvt_flr_i32_f32
+; SI-NONAN: v_cvt_flr_i32_f32_e32 {{v[0-9]+}}, [[TMP]]
+; SI: s_endpgm
+define void @cvt_rpi_i32_f32_fabs_fneg(i32 addrspace(1)* %out, float %x) #0 {
+ %x.fabs = call float @llvm.fabs.f32(float %x) #1
+ %x.fabs.fneg = fsub float -0.000000e+00, %x.fabs
+ %fadd = fadd float %x.fabs.fneg, 0.5
+ %floor = call float @llvm.floor.f32(float %fadd) #1
+ %cvt = fptosi float %floor to i32
+ store i32 %cvt, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}no_cvt_rpi_i32_f32_0:
+; SI-NOT: v_cvt_rpi_i32_f32
+; SI: v_add_f32
+; SI: v_floor_f32
+; SI: v_cvt_u32_f32
+; SI: s_endpgm
+define void @no_cvt_rpi_i32_f32_0(i32 addrspace(1)* %out, float %x) #0 {
+ %fadd = fadd float %x, 0.5
+ %floor = call float @llvm.floor.f32(float %fadd) #1
+ %cvt = fptoui float %floor to i32
+ store i32 %cvt, i32 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
index 1e47bfa0c779..fb43ff4fbddd 100644
--- a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
+++ b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
@@ -12,8 +12,8 @@
define void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %sint = load i32 addrspace(1) * %in
+ %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %sint = load i32, i32 addrspace(1) * %in
%conv = sitofp i32 %sint to float
%0 = insertelement <4 x float> undef, float %conv, i32 0
%splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
@@ -26,8 +26,8 @@ entry:
define void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %uint = load i32 addrspace(1) * %in
+ %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %uint = load i32, i32 addrspace(1) * %in
%conv = uitofp i32 %uint to float
%0 = insertelement <4 x float> undef, float %conv, i32 0
%splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
diff --git a/test/CodeGen/R600/debug.ll b/test/CodeGen/R600/debug.ll
new file mode 100644
index 000000000000..a2e0e878b740
--- /dev/null
+++ b/test/CodeGen/R600/debug.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs -mattr=dumpcode -filetype=obj | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=dumpcode -filetype=obj | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+
+; Test for a crash in the custom assembly dump code.
+
+; SI: s_endpgm
+define void @test(i32 addrspace(1)* %out) {
+ store i32 0, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/disconnected-predset-break-bug.ll b/test/CodeGen/R600/disconnected-predset-break-bug.ll
index 858e4b98f3ab..cdd2c0cd4f43 100644
--- a/test/CodeGen/R600/disconnected-predset-break-bug.ll
+++ b/test/CodeGen/R600/disconnected-predset-break-bug.ll
@@ -18,7 +18,7 @@ for.body: ; preds = %for.body, %entry
%i.07.in = phi i32 [ %i.07, %for.body ], [ %iterations, %entry ]
%ai.06 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%i.07 = add nsw i32 %i.07.in, -1
- %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %ai.06
+ %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %ai.06
store i32 %i.07, i32 addrspace(1)* %arrayidx, align 4
%add = add nsw i32 %ai.06, 1
%exitcond = icmp eq i32 %add, %iterations
diff --git a/test/CodeGen/R600/dot4-folding.ll b/test/CodeGen/R600/dot4-folding.ll
index dca6a59c6e6a..4df7b63bf98e 100644
--- a/test/CodeGen/R600/dot4-folding.ll
+++ b/test/CodeGen/R600/dot4-folding.ll
@@ -14,8 +14,8 @@
define void @main(float addrspace(1)* %out) {
main_body:
- %0 = load <4 x float> addrspace(8)* null
- %1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %0 = load <4 x float>, <4 x float> addrspace(8)* null
+ %1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%2 = call float @llvm.AMDGPU.dp4(<4 x float> %0,<4 x float> %1)
%3 = insertelement <4 x float> undef, float %2, i32 0
call void @llvm.R600.store.swizzle(<4 x float> %3, i32 0, i32 0)
diff --git a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
index 41afd503ef88..e7e13d6178c4 100644
--- a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
+++ b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
@@ -18,7 +18,7 @@ declare void @llvm.AMDGPU.barrier.local() #1
; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]]
-; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:0 offset1:1
+; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:1
; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:33
; CI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] offset:256
; CHECK: s_endpgm
@@ -33,20 +33,20 @@ for.body: ; preds = %for.body, %entry
%offset.02 = phi i32 [ %mul, %entry ], [ %add14, %for.body ]
%k.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
tail call void @llvm.AMDGPU.barrier.local() #1
- %arrayidx = getelementptr inbounds float addrspace(3)* %lptr, i32 %offset.02
- %tmp = load float addrspace(3)* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %offset.02
+ %tmp = load float, float addrspace(3)* %arrayidx, align 4
%add1 = add nsw i32 %offset.02, 1
- %arrayidx2 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add1
- %tmp1 = load float addrspace(3)* %arrayidx2, align 4
+ %arrayidx2 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add1
+ %tmp1 = load float, float addrspace(3)* %arrayidx2, align 4
%add3 = add nsw i32 %offset.02, 32
- %arrayidx4 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add3
- %tmp2 = load float addrspace(3)* %arrayidx4, align 4
+ %arrayidx4 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add3
+ %tmp2 = load float, float addrspace(3)* %arrayidx4, align 4
%add5 = add nsw i32 %offset.02, 33
- %arrayidx6 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add5
- %tmp3 = load float addrspace(3)* %arrayidx6, align 4
+ %arrayidx6 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add5
+ %tmp3 = load float, float addrspace(3)* %arrayidx6, align 4
%add7 = add nsw i32 %offset.02, 64
- %arrayidx8 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add7
- %tmp4 = load float addrspace(3)* %arrayidx8, align 4
+ %arrayidx8 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add7
+ %tmp4 = load float, float addrspace(3)* %arrayidx8, align 4
%add9 = fadd float %tmp, %tmp1
%add10 = fadd float %add9, %tmp2
%add11 = fadd float %add10, %tmp3
@@ -59,7 +59,7 @@ for.body: ; preds = %for.body, %entry
for.end: ; preds = %for.body
%tmp5 = sext i32 %x.i to i64
- %arrayidx15 = getelementptr inbounds float addrspace(1)* %out, i64 %tmp5
+ %arrayidx15 = getelementptr inbounds float, float addrspace(1)* %out, i64 %tmp5
store float %add13, float addrspace(1)* %arrayidx15, align 4
ret void
}
diff --git a/test/CodeGen/R600/ds_read2.ll b/test/CodeGen/R600/ds_read2.ll
index c06b0b1392e2..5929898f8bd8 100644
--- a/test/CodeGen/R600/ds_read2.ll
+++ b/test/CodeGen/R600/ds_read2.ll
@@ -7,39 +7,39 @@
@lds.f64 = addrspace(3) global [512 x double] undef, align 8
; SI-LABEL: @simple_read2_f32
-; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:8
+; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:8
; SI: s_waitcnt lgkmcnt(0)
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @simple_read2_f32(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
; SI-LABEL: @simple_read2_f32_max_offset
-; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:255
+; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:255
; SI: s_waitcnt lgkmcnt(0)
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 255
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -51,77 +51,77 @@ define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 {
; SI: s_endpgm
define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 257
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
; SI-LABEL: @simple_read2_f32_x2
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
; SI: s_endpgm
define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 0
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%idx.1 = add nsw i32 %tid.x, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum.0 = fadd float %val0, %val1
%idx.2 = add nsw i32 %tid.x, 11
- %arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
- %val2 = load float addrspace(3)* %arrayidx2, align 4
+ %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
+ %val2 = load float, float addrspace(3)* %arrayidx2, align 4
%idx.3 = add nsw i32 %tid.x, 27
- %arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
- %val3 = load float addrspace(3)* %arrayidx3, align 4
+ %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
+ %val3 = load float, float addrspace(3)* %arrayidx3, align 4
%sum.1 = fadd float %val2, %val3
%sum = fadd float %sum.0, %sum.1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %idx.0
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %idx.0
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
; Make sure there is an instruction between the two sets of reads.
; SI-LABEL: @simple_read2_f32_x2_barrier
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset1:8
; SI: s_barrier
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
; SI: s_endpgm
define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 0
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%idx.1 = add nsw i32 %tid.x, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum.0 = fadd float %val0, %val1
call void @llvm.AMDGPU.barrier.local() #2
%idx.2 = add nsw i32 %tid.x, 11
- %arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
- %val2 = load float addrspace(3)* %arrayidx2, align 4
+ %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
+ %val2 = load float, float addrspace(3)* %arrayidx2, align 4
%idx.3 = add nsw i32 %tid.x, 27
- %arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
- %val3 = load float addrspace(3)* %arrayidx3, align 4
+ %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
+ %val3 = load float, float addrspace(3)* %arrayidx3, align 4
%sum.1 = fadd float %val2, %val3
%sum = fadd float %sum.0, %sum.1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %idx.0
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %idx.0
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -136,25 +136,25 @@ define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 {
define void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%idx.1 = add nsw i32 %tid.x, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum.0 = fadd float %val0, %val1
%idx.2 = add nsw i32 %tid.x, 11
- %arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
- %val2 = load float addrspace(3)* %arrayidx2, align 4
+ %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
+ %val2 = load float, float addrspace(3)* %arrayidx2, align 4
%idx.3 = add nsw i32 %tid.x, 27
- %arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
- %val3 = load float addrspace(3)* %arrayidx3, align 4
+ %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
+ %val3 = load float, float addrspace(3)* %arrayidx3, align 4
%sum.1 = fadd float %val2, %val3
%sum = fadd float %sum.0, %sum.1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %idx.0
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %idx.0
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -174,14 +174,14 @@ define void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float ad
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
%index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
- %gep = getelementptr inbounds <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
+ %gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
%gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
%gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
- %val0 = load float addrspace(3)* %gep.0, align 4
- %val1 = load float addrspace(3)* %gep.1, align 4
+ %val0 = load float, float addrspace(3)* %gep.0, align 4
+ %val1 = load float, float addrspace(3)* %gep.1, align 4
%add.x = add nsw i32 %x.i, 8
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -200,18 +200,18 @@ define void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x f
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
%index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
- %gep = getelementptr inbounds <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
+ %gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
%gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
%gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
; Apply an additional offset after the vector that will be more obviously folded.
- %gep.1.offset = getelementptr float addrspace(3)* %gep.1, i32 8
+ %gep.1.offset = getelementptr float, float addrspace(3)* %gep.1, i32 8
- %val0 = load float addrspace(3)* %gep.0, align 4
- %val1 = load float addrspace(3)* %gep.1.offset, align 4
+ %val0 = load float, float addrspace(3)* %gep.0, align 4
+ %val1 = load float, float addrspace(3)* %gep.1.offset, align 4
%add.x = add nsw i32 %x.i, 8
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -228,14 +228,14 @@ define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 {
%x.i.v.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
%x.i.v.1 = insertelement <2 x i32> %x.i.v.0, i32 %x.i, i32 1
%idx = add <2 x i32> %x.i.v.1, <i32 0, i32 8>
- %gep = getelementptr inbounds <2 x [512 x float] addrspace(3)*> %ptr.1, <2 x i32> <i32 0, i32 0>, <2 x i32> %idx
+ %gep = getelementptr inbounds [512 x float], <2 x [512 x float] addrspace(3)*> %ptr.1, <2 x i32> <i32 0, i32 0>, <2 x i32> %idx
%gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
%gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
- %val0 = load float addrspace(3)* %gep.0, align 4
- %val1 = load float addrspace(3)* %gep.1, align 4
+ %val0 = load float, float addrspace(3)* %gep.0, align 4
+ %val1 = load float, float addrspace(3)* %gep.1, align 4
%add.x = add nsw i32 %x.i, 8
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -247,13 +247,13 @@ define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 {
; SI: s_endpgm
define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
- %val0 = load volatile float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load volatile float, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -265,13 +265,13 @@ define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 {
; SI: s_endpgm
define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
- %val1 = load volatile float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load volatile float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -284,13 +284,13 @@ define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 {
; SI: s_endpgm
define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %x.i
- %val0 = load float addrspace(3)* %arrayidx0, align 1
+ %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 1
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x
- %val1 = load float addrspace(3)* %arrayidx1, align 1
+ %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 1
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -300,48 +300,48 @@ define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %
; SI: s_endpgm
define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %x.i
- %val0 = load float addrspace(3)* %arrayidx0, align 2
+ %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 2
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x
- %val1 = load float addrspace(3)* %arrayidx1, align 2
+ %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 2
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
; SI-LABEL: @simple_read2_f64
; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}}
-; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset0:0 offset1:8
+; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset1:8
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
; SI: buffer_store_dwordx2 [[RESULT]]
; SI: s_endpgm
define void @simple_read2_f64(double addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
- %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 8
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
- %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 8
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 8
ret void
}
; SI-LABEL: @simple_read2_f64_max_offset
-; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:255
+; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:255
; SI: s_endpgm
define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
- %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 8
%add.x = add nsw i32 %x.i, 255
- %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
- %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 8
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 8
ret void
}
@@ -353,31 +353,31 @@ define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
; SI: s_endpgm
define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
- %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 8
%add.x = add nsw i32 %x.i, 257
- %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
- %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 8
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 8
ret void
}
; Alignment only 4
; SI-LABEL: @misaligned_read2_f64
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15
; SI: s_endpgm
define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
- %val0 = load double addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 7
- %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
- %val1 = load double addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 4
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 4
ret void
}
@@ -386,10 +386,10 @@ define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)
; SI-LABEL: @load_constant_adjacent_offsets
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1
define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
- %val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
- %val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
+ %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
+ %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
%sum = add i32 %val0, %val1
store i32 %sum, i32 addrspace(1)* %out, align 4
ret void
@@ -397,10 +397,10 @@ define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
; SI-LABEL: @load_constant_disjoint_offsets
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:2
define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
- %val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
- %val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
+ %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
+ %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
%sum = add i32 %val0, %val1
store i32 %sum, i32 addrspace(1)* %out, align 4
ret void
@@ -410,11 +410,11 @@ define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
; SI-LABEL: @load_misaligned64_constant_offsets
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset1:1
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
- %val0 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
- %val1 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
+ %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
+ %val1 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
%sum = add i64 %val0, %val1
store i64 %sum, i64 addrspace(1)* %out, align 8
ret void
@@ -425,12 +425,12 @@ define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
; SI-LABEL: @load_misaligned64_constant_large_offsets
; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000
-; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1
-; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
+; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset1:1
+; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset1:1
; SI: s_endpgm
define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
- %val0 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
- %val1 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
+ %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
+ %val1 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
%sum = add i64 %val0, %val1
store i64 %sum, i64 addrspace(1)* %out, align 8
ret void
@@ -442,34 +442,34 @@ define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
define void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb) #0 {
%x.i = tail call i32 @llvm.r600.read.tgid.x() #1
%y.i = tail call i32 @llvm.r600.read.tidig.y() #1
- %arrayidx44 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
- %tmp16 = load float addrspace(3)* %arrayidx44, align 4
+ %arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
+ %tmp16 = load float, float addrspace(3)* %arrayidx44, align 4
%add47 = add nsw i32 %x.i, 1
- %arrayidx48 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47
- %tmp17 = load float addrspace(3)* %arrayidx48, align 4
+ %arrayidx48 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47
+ %tmp17 = load float, float addrspace(3)* %arrayidx48, align 4
%add51 = add nsw i32 %x.i, 16
- %arrayidx52 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51
- %tmp18 = load float addrspace(3)* %arrayidx52, align 4
+ %arrayidx52 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51
+ %tmp18 = load float, float addrspace(3)* %arrayidx52, align 4
%add55 = add nsw i32 %x.i, 17
- %arrayidx56 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55
- %tmp19 = load float addrspace(3)* %arrayidx56, align 4
- %arrayidx60 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i
- %tmp20 = load float addrspace(3)* %arrayidx60, align 4
+ %arrayidx56 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55
+ %tmp19 = load float, float addrspace(3)* %arrayidx56, align 4
+ %arrayidx60 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i
+ %tmp20 = load float, float addrspace(3)* %arrayidx60, align 4
%add63 = add nsw i32 %y.i, 1
- %arrayidx64 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63
- %tmp21 = load float addrspace(3)* %arrayidx64, align 4
+ %arrayidx64 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63
+ %tmp21 = load float, float addrspace(3)* %arrayidx64, align 4
%add67 = add nsw i32 %y.i, 32
- %arrayidx68 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67
- %tmp22 = load float addrspace(3)* %arrayidx68, align 4
+ %arrayidx68 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67
+ %tmp22 = load float, float addrspace(3)* %arrayidx68, align 4
%add71 = add nsw i32 %y.i, 33
- %arrayidx72 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71
- %tmp23 = load float addrspace(3)* %arrayidx72, align 4
+ %arrayidx72 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71
+ %tmp23 = load float, float addrspace(3)* %arrayidx72, align 4
%add75 = add nsw i32 %y.i, 64
- %arrayidx76 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75
- %tmp24 = load float addrspace(3)* %arrayidx76, align 4
+ %arrayidx76 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75
+ %tmp24 = load float, float addrspace(3)* %arrayidx76, align 4
%add79 = add nsw i32 %y.i, 65
- %arrayidx80 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79
- %tmp25 = load float addrspace(3)* %arrayidx80, align 4
+ %arrayidx80 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79
+ %tmp25 = load float, float addrspace(3)* %arrayidx80, align 4
%sum.0 = fadd float %tmp16, %tmp17
%sum.1 = fadd float %sum.0, %tmp18
%sum.2 = fadd float %sum.1, %tmp19
@@ -484,13 +484,13 @@ define void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %C, i32 %lda, i
}
define void @misaligned_read2_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(3)* %in) #0 {
- %load = load <2 x i32> addrspace(3)* %in, align 4
+ %load = load <2 x i32>, <2 x i32> addrspace(3)* %in, align 4
store <2 x i32> %load, <2 x i32> addrspace(1)* %out, align 8
ret void
}
define void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) #0 {
- %load = load i64 addrspace(3)* %in, align 4
+ %load = load i64, i64 addrspace(3)* %in, align 4
store i64 %load, i64 addrspace(1)* %out, align 8
ret void
}
diff --git a/test/CodeGen/R600/ds_read2_offset_order.ll b/test/CodeGen/R600/ds_read2_offset_order.ll
index 44306bc9d38f..9ea9a5a2617b 100644
--- a/test/CodeGen/R600/ds_read2_offset_order.ll
+++ b/test/CodeGen/R600/ds_read2_offset_order.ll
@@ -14,31 +14,31 @@
define void @offset_order(float addrspace(1)* %out) {
entry:
- %ptr0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 0
- %val0 = load float addrspace(3)* %ptr0
+ %ptr0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 0
+ %val0 = load float, float addrspace(3)* %ptr0
- %ptr1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 256
- %val1 = load float addrspace(3)* %ptr1
+ %ptr1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 256
+ %val1 = load float, float addrspace(3)* %ptr1
%add1 = fadd float %val0, %val1
- %ptr2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 3
- %val2 = load float addrspace(3)* %ptr2
+ %ptr2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 3
+ %val2 = load float, float addrspace(3)* %ptr2
%add2 = fadd float %add1, %val2
- %ptr3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 2
- %val3 = load float addrspace(3)* %ptr3
+ %ptr3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 2
+ %val3 = load float, float addrspace(3)* %ptr3
%add3 = fadd float %add2, %val3
- %ptr4 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 12
- %val4 = load float addrspace(3)* %ptr4
+ %ptr4 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 12
+ %val4 = load float, float addrspace(3)* %ptr4
%add4 = fadd float %add3, %val4
- %ptr5 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 14
- %val5 = load float addrspace(3)* %ptr5
+ %ptr5 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 14
+ %val5 = load float, float addrspace(3)* %ptr5
%add5 = fadd float %add4, %val5
- %ptr6 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 11
- %val6 = load float addrspace(3)* %ptr6
+ %ptr6 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 11
+ %val6 = load float, float addrspace(3)* %ptr6
%add6 = fadd float %add5, %val6
store float %add6, float addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/ds_read2st64.ll b/test/CodeGen/R600/ds_read2st64.ll
index efd875e93176..54b3b45636d6 100644
--- a/test/CodeGen/R600/ds_read2st64.ll
+++ b/test/CodeGen/R600/ds_read2st64.ll
@@ -5,20 +5,20 @@
; SI-LABEL: @simple_read2st64_f32_0_1
-; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
; SI: s_waitcnt lgkmcnt(0)
; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 64
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -32,13 +32,13 @@ define void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 {
define void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
- %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.0
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%add.x.1 = add nsw i32 %x.i, 128
- %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.1
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -52,13 +52,13 @@ define void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(
define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
- %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.0
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%add.x.1 = add nsw i32 %x.i, 16320
- %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.1
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -72,13 +72,13 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add
define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
- %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.0
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%add.x.1 = add nsw i32 %x.i, 16384
- %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.1
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -88,13 +88,13 @@ define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, floa
; SI: s_endpgm
define void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 63
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
@@ -105,32 +105,32 @@ define void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 {
define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%add.x.1 = add nsw i32 %x.i, 127
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.1
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.1
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
- %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
store float %sum, float addrspace(1)* %out.gep, align 4
ret void
}
; SI-LABEL: @simple_read2st64_f64_0_1
-; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1
; SI: s_waitcnt lgkmcnt(0)
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
; SI: buffer_store_dwordx2 [[RESULT]]
; SI: s_endpgm
define void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
- %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 8
%add.x = add nsw i32 %x.i, 64
- %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
- %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 8
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 8
ret void
}
@@ -144,13 +144,13 @@ define void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
- %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.0
- %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 8
%add.x.1 = add nsw i32 %x.i, 128
- %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.1
- %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 8
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 8
ret void
}
@@ -158,18 +158,18 @@ define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspac
; Alignment only
; SI-LABEL: @misaligned_read2st64_f64
-; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:1
; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
; SI: s_endpgm
define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
- %val0 = load double addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 64
- %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
- %val1 = load double addrspace(3)* %arrayidx1, align 4
+ %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 4
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 4
ret void
}
@@ -184,13 +184,13 @@ define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspac
define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 256
- %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.0
- %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 8
%add.x.1 = add nsw i32 %x.i, 8128
- %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.1
- %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 8
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 8
ret void
}
@@ -204,13 +204,13 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a
define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
- %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.0
- %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 8
%add.x.1 = add nsw i32 %x.i, 8192
- %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.1
- %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 8
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 8
ret void
}
@@ -221,13 +221,13 @@ define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, dou
define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.x.0 = add nsw i32 %x.i, 64
- %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.0
- %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 8
%add.x.1 = add nsw i32 %x.i, 8129
- %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.1
- %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 8
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 8
ret void
}
@@ -237,17 +237,17 @@ define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double
; SI-LABEL: @byte_size_only_divisible_64_read2_f64
; SI-NOT: ds_read2st_b64
-; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:8
+; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:8
; SI: s_endpgm
define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
- %val0 = load double addrspace(3)* %arrayidx0, align 8
+ %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
+ %val0 = load double, double addrspace(3)* %arrayidx0, align 8
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
- %val1 = load double addrspace(3)* %arrayidx1, align 8
+ %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
+ %val1 = load double, double addrspace(3)* %arrayidx1, align 8
%sum = fadd double %val0, %val1
- %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+ %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
store double %sum, double addrspace(1)* %out.gep, align 4
ret void
}
diff --git a/test/CodeGen/R600/ds_write2.ll b/test/CodeGen/R600/ds_write2.ll
index 27273e7c674d..b553d3459e40 100644
--- a/test/CodeGen/R600/ds_write2.ll
+++ b/test/CodeGen/R600/ds_write2.ll
@@ -7,16 +7,16 @@
; SI-LABEL: @simple_write2_one_val_f32
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
+; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:8
; SI: s_endpgm
define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep = getelementptr float addrspace(1)* %in, i32 %x.i
- %val = load float addrspace(1)* %in.gep, align 4
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i
+ %val = load float, float addrspace(1)* %in.gep, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -25,18 +25,18 @@ define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1
; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
+; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8
; SI: s_endpgm
define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
- %in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
- %val0 = load float addrspace(1)* %in.gep.0, align 4
- %val1 = load float addrspace(1)* %in.gep.1, align 4
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
+ %val0 = load float, float addrspace(1)* %in.gep.0, align 4
+ %val1 = load float, float addrspace(1)* %in.gep.1, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -48,14 +48,14 @@ define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1
; SI: s_endpgm
define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
- %in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
- %val0 = load float addrspace(1)* %in0.gep, align 4
- %val1 = load float addrspace(1)* %in1.gep, align 4
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
+ %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
+ %val0 = load float, float addrspace(1)* %in0.gep, align 4
+ %val1 = load float, float addrspace(1)* %in1.gep, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store volatile float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -67,14 +67,14 @@ define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float
; SI: s_endpgm
define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
- %in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
- %val0 = load float addrspace(1)* %in0.gep, align 4
- %val1 = load float addrspace(1)* %in1.gep, align 4
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
+ %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
+ %val0 = load float, float addrspace(1)* %in0.gep, align 4
+ %val1 = load float, float addrspace(1)* %in1.gep, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store volatile float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -84,20 +84,20 @@ define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float
; SI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
; SI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
; SI: s_endpgm
define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep.0 = getelementptr <2 x float> addrspace(1)* %in, i32 %x.i
- %in.gep.1 = getelementptr <2 x float> addrspace(1)* %in.gep.0, i32 1
- %val0 = load <2 x float> addrspace(1)* %in.gep.0, align 8
- %val1 = load <2 x float> addrspace(1)* %in.gep.1, align 8
+ %in.gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in.gep.0, i32 1
+ %val0 = load <2 x float>, <2 x float> addrspace(1)* %in.gep.0, align 8
+ %val1 = load <2 x float>, <2 x float> addrspace(1)* %in.gep.1, align 8
%val0.0 = extractelement <2 x float> %val0, i32 0
%val1.1 = extractelement <2 x float> %val1, i32 1
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0.0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1.1, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -105,18 +105,18 @@ define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2
; SI-LABEL: @simple_write2_two_val_subreg2_f32
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
; SI: s_endpgm
define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep = getelementptr <2 x float> addrspace(1)* %in, i32 %x.i
- %val = load <2 x float> addrspace(1)* %in.gep, align 8
+ %in.gep = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i
+ %val = load <2 x float>, <2 x float> addrspace(1)* %in.gep, align 8
%val0 = extractelement <2 x float> %val, i32 0
%val1 = extractelement <2 x float> %val, i32 1
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -124,18 +124,18 @@ define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x floa
; SI-LABEL: @simple_write2_two_val_subreg4_f32
; SI-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
; SI: s_endpgm
define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep = getelementptr <4 x float> addrspace(1)* %in, i32 %x.i
- %val = load <4 x float> addrspace(1)* %in.gep, align 16
+ %in.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 %x.i
+ %val = load <4 x float>, <4 x float> addrspace(1)* %in.gep, align 16
%val0 = extractelement <4 x float> %val, i32 0
%val1 = extractelement <4 x float> %val, i32 3
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -144,18 +144,18 @@ define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x floa
; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
+; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255
; SI: s_endpgm
define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
- %in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
- %val0 = load float addrspace(1)* %in.gep.0, align 4
- %val1 = load float addrspace(1)* %in.gep.1, align 4
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
+ %val0 = load float, float addrspace(1)* %in.gep.0, align 4
+ %val1 = load float, float addrspace(1)* %in.gep.1, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 255
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -166,43 +166,43 @@ define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float
; SI: s_endpgm
define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
- %in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
- %val0 = load float addrspace(1)* %in0.gep, align 4
- %val1 = load float addrspace(1)* %in1.gep, align 4
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
+ %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
+ %val0 = load float, float addrspace(1)* %in0.gep, align 4
+ %val1 = load float, float addrspace(1)* %in1.gep, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 257
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
; SI-LABEL: @simple_write2_two_val_f32_x2
-; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:0 offset1:8
+; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset1:8
; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
; SI: s_endpgm
define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %in0.gep = getelementptr float addrspace(1)* %in0, i32 %tid.x
- %in1.gep = getelementptr float addrspace(1)* %in1, i32 %tid.x
- %val0 = load float addrspace(1)* %in0.gep, align 4
- %val1 = load float addrspace(1)* %in1.gep, align 4
+ %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x
+ %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x
+ %val0 = load float, float addrspace(1)* %in0.gep, align 4
+ %val1 = load float, float addrspace(1)* %in1.gep, align 4
%idx.0 = add nsw i32 %tid.x, 0
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
store float %val0, float addrspace(3)* %arrayidx0, align 4
%idx.1 = add nsw i32 %tid.x, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
store float %val1, float addrspace(3)* %arrayidx1, align 4
%idx.2 = add nsw i32 %tid.x, 11
- %arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
+ %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
store float %val0, float addrspace(3)* %arrayidx2, align 4
%idx.3 = add nsw i32 %tid.x, 27
- %arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
+ %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
store float %val1, float addrspace(3)* %arrayidx3, align 4
ret void
@@ -214,25 +214,25 @@ define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspac
; SI: s_endpgm
define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %in0.gep = getelementptr float addrspace(1)* %in0, i32 %tid.x
- %in1.gep = getelementptr float addrspace(1)* %in1, i32 %tid.x
- %val0 = load float addrspace(1)* %in0.gep, align 4
- %val1 = load float addrspace(1)* %in1.gep, align 4
+ %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x
+ %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x
+ %val0 = load float, float addrspace(1)* %in0.gep, align 4
+ %val1 = load float, float addrspace(1)* %in1.gep, align 4
%idx.0 = add nsw i32 %tid.x, 3
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
store float %val0, float addrspace(3)* %arrayidx0, align 4
%idx.1 = add nsw i32 %tid.x, 8
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
store float %val1, float addrspace(3)* %arrayidx1, align 4
%idx.2 = add nsw i32 %tid.x, 11
- %arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
+ %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
store float %val0, float addrspace(3)* %arrayidx2, align 4
%idx.3 = add nsw i32 %tid.x, 27
- %arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
+ %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
store float %val1, float addrspace(3)* %arrayidx3, align 4
ret void
@@ -245,19 +245,19 @@ define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, f
; SI: s_endpgm
define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1, <2 x float addrspace(3)*> %lds.ptr) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
- %in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
- %val0 = load float addrspace(1)* %in0.gep, align 4
- %val1 = load float addrspace(1)* %in1.gep, align 4
+ %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
+ %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
+ %val0 = load float, float addrspace(1)* %in0.gep, align 4
+ %val1 = load float, float addrspace(1)* %in1.gep, align 4
%index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
%index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
- %gep = getelementptr inbounds <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
+ %gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
%gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
%gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
; Apply an additional offset after the vector that will be more obviously folded.
- %gep.1.offset = getelementptr float addrspace(3)* %gep.1, i32 8
+ %gep.1.offset = getelementptr float, float addrspace(3)* %gep.1, i32 8
store float %val0, float addrspace(3)* %gep.0, align 4
%add.x = add nsw i32 %x.i, 8
@@ -268,16 +268,16 @@ define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float add
; SI-LABEL: @simple_write2_one_val_f64
; SI: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]],
; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
-; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
+; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset1:8
; SI: s_endpgm
define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
- %val = load double addrspace(1)* %in.gep, align 8
- %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+ %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
+ %val = load double, double addrspace(1)* %in.gep, align 8
+ %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
store double %val, double addrspace(3)* %arrayidx0, align 8
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
store double %val, double addrspace(3)* %arrayidx1, align 8
ret void
}
@@ -285,17 +285,17 @@ define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace
; SI-LABEL: @misaligned_simple_write2_one_val_f64
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
-; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1 [M0]
-; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 [M0]
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:1
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15
; SI: s_endpgm
define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
- %val = load double addrspace(1)* %in.gep, align 8
- %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
+ %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
+ %val = load double, double addrspace(1)* %in.gep, align 8
+ %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
store double %val, double addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 7
- %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
+ %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
store double %val, double addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -304,18 +304,18 @@ define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, doubl
; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
-; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
+; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8
; SI: s_endpgm
define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep.0 = getelementptr double addrspace(1)* %in, i32 %x.i
- %in.gep.1 = getelementptr double addrspace(1)* %in.gep.0, i32 1
- %val0 = load double addrspace(1)* %in.gep.0, align 8
- %val1 = load double addrspace(1)* %in.gep.1, align 8
- %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+ %in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr double, double addrspace(1)* %in.gep.0, i32 1
+ %val0 = load double, double addrspace(1)* %in.gep.0, align 8
+ %val1 = load double, double addrspace(1)* %in.gep.1, align 8
+ %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
store double %val0, double addrspace(3)* %arrayidx0, align 8
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
store double %val1, double addrspace(3)* %arrayidx1, align 8
ret void
}
@@ -324,20 +324,20 @@ define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace
; SI-LABEL: @store_constant_adjacent_offsets
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
define void @store_constant_adjacent_offsets() {
- store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
- store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
+ store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
+ store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
ret void
}
; SI-LABEL: @store_constant_disjoint_offsets
; SI-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b{{$}}
; SI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2
+; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset1:2
define void @store_constant_disjoint_offsets() {
- store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
- store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
+ store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
+ store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
ret void
}
@@ -345,11 +345,11 @@ define void @store_constant_disjoint_offsets() {
; SI-LABEL: @store_misaligned64_constant_offsets
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
define void @store_misaligned64_constant_offsets() {
- store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
- store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
+ store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
+ store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
ret void
}
@@ -358,12 +358,12 @@ define void @store_misaligned64_constant_offsets() {
; SI-LABEL: @store_misaligned64_constant_large_offsets
; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
-; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
-; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
+; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
; SI: s_endpgm
define void @store_misaligned64_constant_large_offsets() {
- store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
- store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
+ store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
+ store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64], [4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
ret void
}
@@ -373,34 +373,34 @@ define void @store_misaligned64_constant_large_offsets() {
define void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tgid.x() #1
%y.i = tail call i32 @llvm.r600.read.tidig.y() #1
- %val = load float addrspace(1)* %in
- %arrayidx44 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
+ %val = load float, float addrspace(1)* %in
+ %arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
store float %val, float addrspace(3)* %arrayidx44, align 4
%add47 = add nsw i32 %x.i, 1
- %arrayidx48 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47
+ %arrayidx48 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47
store float %val, float addrspace(3)* %arrayidx48, align 4
%add51 = add nsw i32 %x.i, 16
- %arrayidx52 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51
+ %arrayidx52 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51
store float %val, float addrspace(3)* %arrayidx52, align 4
%add55 = add nsw i32 %x.i, 17
- %arrayidx56 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55
+ %arrayidx56 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55
store float %val, float addrspace(3)* %arrayidx56, align 4
- %arrayidx60 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i
+ %arrayidx60 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i
store float %val, float addrspace(3)* %arrayidx60, align 4
%add63 = add nsw i32 %y.i, 1
- %arrayidx64 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63
+ %arrayidx64 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63
store float %val, float addrspace(3)* %arrayidx64, align 4
%add67 = add nsw i32 %y.i, 32
- %arrayidx68 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67
+ %arrayidx68 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67
store float %val, float addrspace(3)* %arrayidx68, align 4
%add71 = add nsw i32 %y.i, 33
- %arrayidx72 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71
+ %arrayidx72 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71
store float %val, float addrspace(3)* %arrayidx72, align 4
%add75 = add nsw i32 %y.i, 64
- %arrayidx76 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75
+ %arrayidx76 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75
store float %val, float addrspace(3)* %arrayidx76, align 4
%add79 = add nsw i32 %y.i, 65
- %arrayidx80 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79
+ %arrayidx80 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79
store float %val, float addrspace(3)* %arrayidx80, align 4
ret void
}
diff --git a/test/CodeGen/R600/ds_write2st64.ll b/test/CodeGen/R600/ds_write2st64.ll
index de5f4efcbcd3..1d9d881c5c7e 100644
--- a/test/CodeGen/R600/ds_write2st64.ll
+++ b/test/CodeGen/R600/ds_write2st64.ll
@@ -7,16 +7,16 @@
; SI-LABEL: @simple_write2st64_one_val_f32_0_1
; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1 [M0]
+; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:1
; SI: s_endpgm
define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep = getelementptr float addrspace(1)* %in, i32 %x.i
- %val = load float addrspace(1)* %in.gep, align 4
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+ %in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i
+ %val = load float, float addrspace(1)* %in.gep, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
store float %val, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 64
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
store float %val, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -25,19 +25,19 @@ define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float add
; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5 [M0]
+; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5
; SI: s_endpgm
define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
- %in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
- %val0 = load float addrspace(1)* %in.gep.0, align 4
- %val1 = load float addrspace(1)* %in.gep.1, align 4
+ %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
+ %val0 = load float, float addrspace(1)* %in.gep.0, align 4
+ %val1 = load float, float addrspace(1)* %in.gep.1, align 4
%add.x.0 = add nsw i32 %x.i, 128
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x.1 = add nsw i32 %x.i, 320
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.1
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.1
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -46,18 +46,18 @@ define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float add
; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
-; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
+; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255
; SI: s_endpgm
define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
- %in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
- %val0 = load float addrspace(1)* %in.gep.0, align 4
- %val1 = load float addrspace(1)* %in.gep.1, align 4
- %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %x.i
+ %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
+ %val0 = load float, float addrspace(1)* %in.gep.0, align 4
+ %val1 = load float, float addrspace(1)* %in.gep.1, align 4
+ %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i
store float %val0, float addrspace(3)* %arrayidx0, align 4
%add.x = add nsw i32 %x.i, 16320
- %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x
+ %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x
store float %val1, float addrspace(3)* %arrayidx1, align 4
ret void
}
@@ -66,35 +66,35 @@ define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, fl
; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
; SI-DAG: v_add_i32_e32 [[VPTR:v[0-9]+]],
-; SI: ds_write2st64_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127 [M0]
+; SI: ds_write2st64_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127
; SI: s_endpgm
define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep.0 = getelementptr double addrspace(1)* %in, i32 %x.i
- %in.gep.1 = getelementptr double addrspace(1)* %in.gep.0, i32 1
- %val0 = load double addrspace(1)* %in.gep.0, align 8
- %val1 = load double addrspace(1)* %in.gep.1, align 8
+ %in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i
+ %in.gep.1 = getelementptr double, double addrspace(1)* %in.gep.0, i32 1
+ %val0 = load double, double addrspace(1)* %in.gep.0, align 8
+ %val1 = load double, double addrspace(1)* %in.gep.1, align 8
%add.x.0 = add nsw i32 %x.i, 256
- %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.0
+ %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
store double %val0, double addrspace(3)* %arrayidx0, align 8
%add.x.1 = add nsw i32 %x.i, 8128
- %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.1
+ %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
store double %val1, double addrspace(3)* %arrayidx1, align 8
ret void
}
; SI-LABEL: @byte_size_only_divisible_64_write2st64_f64
; SI-NOT: ds_write2st64_b64
-; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:0 offset1:8
+; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:8
; SI: s_endpgm
define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
%x.i = tail call i32 @llvm.r600.read.tidig.x() #1
- %in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
- %val = load double addrspace(1)* %in.gep, align 8
- %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
+ %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
+ %val = load double, double addrspace(1)* %in.gep, align 8
+ %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
store double %val, double addrspace(3)* %arrayidx0, align 8
%add.x = add nsw i32 %x.i, 8
- %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
+ %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
store double %val, double addrspace(3)* %arrayidx1, align 8
ret void
}
diff --git a/test/CodeGen/R600/elf.ll b/test/CodeGen/R600/elf.ll
index f801b3f57357..d0fd06a34379 100644
--- a/test/CodeGen/R600/elf.ll
+++ b/test/CodeGen/R600/elf.ll
@@ -5,6 +5,9 @@
; RUN: llc < %s -march=amdgcn -mcpu=carrizo -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s
; RUN: llc < %s -march=amdgcn -mcpu=carrizo -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG --check-prefix=TYPICAL %s
+; Test that we don't try to produce a COFF file on windows
+; RUN: llc < %s -mtriple=amdgcn-pc-mingw -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF %s
+
; ELF: Format: ELF32
; ELF: Name: .AMDGPU.config
; ELF: Type: SHT_PROGBITS
@@ -13,12 +16,12 @@
; ELF: Name: test
; ELF: Binding: Global
-; CONFIG: .align 256
-; CONFIG: test:
; CONFIG: .section .AMDGPU.config
; CONFIG-NEXT: .long 45096
; TYPICAL-NEXT: .long 0
; TONGA-NEXT: .long 576
+; CONFIG: .align 256
+; CONFIG: test:
define void @test(i32 %p) #0 {
%i = add i32 %p, 2
%r = bitcast i32 %i to float
diff --git a/test/CodeGen/R600/empty-function.ll b/test/CodeGen/R600/empty-function.ll
index b5593eb87ae4..a060900811ea 100644
--- a/test/CodeGen/R600/empty-function.ll
+++ b/test/CodeGen/R600/empty-function.ll
@@ -3,16 +3,16 @@
; Make sure we don't assert on empty functions
-; SI-LABEL: {{^}}empty_function_ret:
; SI: .text
+; SI-LABEL: {{^}}empty_function_ret:
; SI: s_endpgm
; SI: codeLenInByte = 4
define void @empty_function_ret() #0 {
ret void
}
-; SI-LABEL: {{^}}empty_function_unreachable:
; SI: .text
+; SI-LABEL: {{^}}empty_function_unreachable:
; SI: codeLenInByte = 0
define void @empty_function_unreachable() #0 {
unreachable
diff --git a/test/CodeGen/R600/endcf-loop-header.ll b/test/CodeGen/R600/endcf-loop-header.ll
index e3c5b3c1c364..267a323c5063 100644
--- a/test/CodeGen/R600/endcf-loop-header.ll
+++ b/test/CodeGen/R600/endcf-loop-header.ll
@@ -28,7 +28,7 @@ loop:
br i1 %tmp2, label %done, label %loop
done:
- %tmp3 = getelementptr i32 addrspace(1)* %out, i64 1
+ %tmp3 = getelementptr i32, i32 addrspace(1)* %out, i64 1
store i32 %inc, i32 addrspace(1)* %tmp3
ret void
}
diff --git a/test/CodeGen/R600/extload-private.ll b/test/CodeGen/R600/extload-private.ll
index fec868232507..294c3a9c6782 100644
--- a/test/CodeGen/R600/extload-private.ll
+++ b/test/CodeGen/R600/extload-private.ll
@@ -6,7 +6,7 @@
define void @load_i8_sext_private(i32 addrspace(1)* %out) {
entry:
%tmp0 = alloca i8
- %tmp1 = load i8* %tmp0
+ %tmp1 = load i8, i8* %tmp0
%tmp2 = sext i8 %tmp1 to i32
store i32 %tmp2, i32 addrspace(1)* %out
ret void
@@ -17,7 +17,7 @@ entry:
define void @load_i8_zext_private(i32 addrspace(1)* %out) {
entry:
%tmp0 = alloca i8
- %tmp1 = load i8* %tmp0
+ %tmp1 = load i8, i8* %tmp0
%tmp2 = zext i8 %tmp1 to i32
store i32 %tmp2, i32 addrspace(1)* %out
ret void
@@ -28,7 +28,7 @@ entry:
define void @load_i16_sext_private(i32 addrspace(1)* %out) {
entry:
%tmp0 = alloca i16
- %tmp1 = load i16* %tmp0
+ %tmp1 = load i16, i16* %tmp0
%tmp2 = sext i16 %tmp1 to i32
store i32 %tmp2, i32 addrspace(1)* %out
ret void
@@ -39,7 +39,7 @@ entry:
define void @load_i16_zext_private(i32 addrspace(1)* %out) {
entry:
%tmp0 = alloca i16
- %tmp1 = load i16* %tmp0
+ %tmp1 = load i16, i16* %tmp0
%tmp2 = zext i16 %tmp1 to i32
store i32 %tmp2, i32 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/extload.ll b/test/CodeGen/R600/extload.ll
index 73d6701bfb5b..662eb7a9716b 100644
--- a/test/CodeGen/R600/extload.ll
+++ b/test/CodeGen/R600/extload.ll
@@ -3,11 +3,12 @@
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}anyext_load_i8:
-; EG: AND_INT
-; EG: 255
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+.[XYZW]]],
+; EG: VTX_READ_32 [[VAL]]
+
define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
%cast = bitcast i8 addrspace(1)* %src to i32 addrspace(1)*
- %load = load i32 addrspace(1)* %cast, align 1
+ %load = load i32, i32 addrspace(1)* %cast, align 1
%x = bitcast i32 %load to <4 x i8>
%castOut = bitcast i8 addrspace(1)* %out to <4 x i8> addrspace(1)*
store <4 x i8> %x, <4 x i8> addrspace(1)* %castOut, align 1
@@ -15,13 +16,12 @@ define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspac
}
; FUNC-LABEL: {{^}}anyext_load_i16:
-; EG: AND_INT
-; EG: AND_INT
-; EG-DAG: 65535
-; EG-DAG: -65536
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+.[XYZW]]],
+; EG: VTX_READ_32 [[VAL]]
+
define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind {
%cast = bitcast i16 addrspace(1)* %src to i32 addrspace(1)*
- %load = load i32 addrspace(1)* %cast, align 1
+ %load = load i32, i32 addrspace(1)* %cast, align 1
%x = bitcast i32 %load to <2 x i16>
%castOut = bitcast i16 addrspace(1)* %out to <2 x i16> addrspace(1)*
store <2 x i16> %x, <2 x i16> addrspace(1)* %castOut, align 1
@@ -29,11 +29,11 @@ define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrs
}
; FUNC-LABEL: {{^}}anyext_load_lds_i8:
-; EG: AND_INT
-; EG: 255
+; EG: LDS_READ_RET {{.*}}, [[VAL:T[0-9]+.[XYZW]]]
+; EG: LDS_WRITE * [[VAL]]
define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
%cast = bitcast i8 addrspace(3)* %src to i32 addrspace(3)*
- %load = load i32 addrspace(3)* %cast, align 1
+ %load = load i32, i32 addrspace(3)* %cast, align 1
%x = bitcast i32 %load to <4 x i8>
%castOut = bitcast i8 addrspace(3)* %out to <4 x i8> addrspace(3)*
store <4 x i8> %x, <4 x i8> addrspace(3)* %castOut, align 1
@@ -41,13 +41,11 @@ define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addr
}
; FUNC-LABEL: {{^}}anyext_load_lds_i16:
-; EG: AND_INT
-; EG: AND_INT
-; EG-DAG: 65535
-; EG-DAG: -65536
+; EG: LDS_READ_RET {{.*}}, [[VAL:T[0-9]+.[XYZW]]]
+; EG: LDS_WRITE * [[VAL]]
define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind {
%cast = bitcast i16 addrspace(3)* %src to i32 addrspace(3)*
- %load = load i32 addrspace(3)* %cast, align 1
+ %load = load i32, i32 addrspace(3)* %cast, align 1
%x = bitcast i32 %load to <2 x i16>
%castOut = bitcast i16 addrspace(3)* %out to <2 x i16> addrspace(3)*
store <2 x i16> %x, <2 x i16> addrspace(3)* %castOut, align 1
diff --git a/test/CodeGen/R600/extract_vector_elt_i16.ll b/test/CodeGen/R600/extract_vector_elt_i16.ll
index 0774a9ae852b..c7572efc6f5b 100644
--- a/test/CodeGen/R600/extract_vector_elt_i16.ll
+++ b/test/CodeGen/R600/extract_vector_elt_i16.ll
@@ -9,7 +9,7 @@
define void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> %foo) nounwind {
%p0 = extractelement <2 x i16> %foo, i32 0
%p1 = extractelement <2 x i16> %foo, i32 1
- %out1 = getelementptr i16 addrspace(1)* %out, i32 1
+ %out1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
store i16 %p1, i16 addrspace(1)* %out, align 2
store i16 %p0, i16 addrspace(1)* %out1, align 2
ret void
@@ -23,7 +23,7 @@ define void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> %foo) no
define void @extract_vector_elt_v4i16(i16 addrspace(1)* %out, <4 x i16> %foo) nounwind {
%p0 = extractelement <4 x i16> %foo, i32 0
%p1 = extractelement <4 x i16> %foo, i32 2
- %out1 = getelementptr i16 addrspace(1)* %out, i32 1
+ %out1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
store i16 %p1, i16 addrspace(1)* %out, align 2
store i16 %p0, i16 addrspace(1)* %out1, align 2
ret void
diff --git a/test/CodeGen/R600/fabs.f64.ll b/test/CodeGen/R600/fabs.f64.ll
index d87c08260b4c..3c6136c1a7bd 100644
--- a/test/CodeGen/R600/fabs.f64.ll
+++ b/test/CodeGen/R600/fabs.f64.ll
@@ -13,8 +13,8 @@ declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone
define void @v_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%tidext = sext i32 %tid to i64
- %gep = getelementptr double addrspace(1)* %in, i64 %tidext
- %val = load double addrspace(1)* %gep, align 8
+ %gep = getelementptr double, double addrspace(1)* %in, i64 %tidext
+ %val = load double, double addrspace(1)* %gep, align 8
%fabs = call double @llvm.fabs.f64(double %val)
store double %fabs, double addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll
index add6b75d22ae..419a73d02669 100644
--- a/test/CodeGen/R600/fabs.ll
+++ b/test/CodeGen/R600/fabs.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
@@ -10,7 +11,7 @@
; R600-NOT: AND
; R600: |PV.{{[XYZW]}}|
-; SI: v_and_b32
+; GCN: v_and_b32
define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) {
%bc= bitcast i32 %in to float
@@ -23,7 +24,7 @@ define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) {
; R600-NOT: AND
; R600: |PV.{{[XYZW]}}|
-; SI: v_and_b32
+; GCN: v_and_b32
define void @fabs_free(float addrspace(1)* %out, i32 %in) {
%bc= bitcast i32 %in to float
@@ -35,7 +36,7 @@ define void @fabs_free(float addrspace(1)* %out, i32 %in) {
; FUNC-LABEL: {{^}}fabs_f32:
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
-; SI: v_and_b32
+; GCN: v_and_b32
define void @fabs_f32(float addrspace(1)* %out, float %in) {
%fabs = call float @llvm.fabs.f32(float %in)
store float %fabs, float addrspace(1)* %out
@@ -46,8 +47,8 @@ define void @fabs_f32(float addrspace(1)* %out, float %in) {
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
-; SI: v_and_b32
-; SI: v_and_b32
+; GCN: v_and_b32
+; GCN: v_and_b32
define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
store <2 x float> %fabs, <2 x float> addrspace(1)* %out
@@ -60,20 +61,21 @@ define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
; R600: |{{(PV|T[0-9])\.[XYZW]}}|
-; SI: v_and_b32
-; SI: v_and_b32
-; SI: v_and_b32
-; SI: v_and_b32
+; GCN: v_and_b32
+; GCN: v_and_b32
+; GCN: v_and_b32
+; GCN: v_and_b32
define void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
%fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
store <4 x float> %fabs, <4 x float> addrspace(1)* %out
ret void
}
-; SI-LABEL: {{^}}fabs_fn_fold:
+; GCN-LABEL: {{^}}fabs_fn_fold:
; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
-; SI-NOT: and
-; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
+; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
+; GCN-NOT: and
+; GCN: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) {
%fabs = call float @fabs(float %in0)
%fmul = fmul float %fabs, %in1
@@ -81,10 +83,11 @@ define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) {
ret void
}
-; SI-LABEL: {{^}}fabs_fold:
+; GCN-LABEL: {{^}}fabs_fold:
; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
-; SI-NOT: and
-; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
+; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
+; GCN-NOT: and
+; GCN: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) {
%fabs = call float @llvm.fabs.f32(float %in0)
%fmul = fmul float %fabs, %in1
diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll
index 365af9b73cc0..5fac328c5981 100644
--- a/test/CodeGen/R600/fadd.ll
+++ b/test/CodeGen/R600/fadd.ll
@@ -32,9 +32,9 @@ define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x flo
; SI: v_add_f32
; SI: v_add_f32
define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
- %a = load <4 x float> addrspace(1)* %in, align 16
- %b = load <4 x float> addrspace(1)* %b_ptr, align 16
+ %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
+ %a = load <4 x float>, <4 x float> addrspace(1)* %in, align 16
+ %b = load <4 x float>, <4 x float> addrspace(1)* %b_ptr, align 16
%result = fadd <4 x float> %a, %b
store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
ret void
diff --git a/test/CodeGen/R600/fadd64.ll b/test/CodeGen/R600/fadd64.ll
index f1f6fef54766..485c55870c47 100644
--- a/test/CodeGen/R600/fadd64.ll
+++ b/test/CodeGen/R600/fadd64.ll
@@ -6,8 +6,8 @@
define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
%r2 = fadd double %r0, %r1
store double %r2, double addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fceil64.ll b/test/CodeGen/R600/fceil64.ll
index 77cd8eae402c..e8c34f0141e4 100644
--- a/test/CodeGen/R600/fceil64.ll
+++ b/test/CodeGen/R600/fceil64.ll
@@ -1,5 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
declare double @llvm.ceil.f64(double) nounwind readnone
declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
@@ -11,19 +12,19 @@ declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
; FUNC-LABEL: {{^}}fceil_f64:
; CI: v_ceil_f64_e32
; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
+; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
; SI: s_lshr_b64
; SI: s_not_b64
; SI: s_and_b64
-; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-; SI-DAG: cmp_lt_i32
+; SI: cmp_gt_i32
; SI: cndmask_b32
; SI: cndmask_b32
-; SI: cmp_gt_i32
+; SI: cmp_lt_i32
; SI: cndmask_b32
; SI: cndmask_b32
-; SI: v_cmp_lg_f64
-; SI: v_cmp_gt_f64
+; SI-DAG: v_cmp_lt_f64
+; SI-DAG: v_cmp_lg_f64
; SI: s_and_b64
; SI: v_cndmask_b32
; SI: v_cndmask_b32
diff --git a/test/CodeGen/R600/fcmp-cnd.ll b/test/CodeGen/R600/fcmp-cnd.ll
index 1d4e323d3abf..530274f920f0 100644
--- a/test/CodeGen/R600/fcmp-cnd.ll
+++ b/test/CodeGen/R600/fcmp-cnd.ll
@@ -6,7 +6,7 @@
define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
entry:
- %0 = load float addrspace(1)* %in
+ %0 = load float, float addrspace(1)* %in
%cmp = fcmp oeq float %0, 0.000000e+00
%value = select i1 %cmp, i32 2, i32 3
store i32 %value, i32 addrspace(1)* %out
diff --git a/test/CodeGen/R600/fcmp-cnde-int-args.ll b/test/CodeGen/R600/fcmp-cnde-int-args.ll
index 55aba0d72d39..c402805feb39 100644
--- a/test/CodeGen/R600/fcmp-cnde-int-args.ll
+++ b/test/CodeGen/R600/fcmp-cnde-int-args.ll
@@ -8,7 +8,7 @@
define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
entry:
- %0 = load float addrspace(1)* %in
+ %0 = load float, float addrspace(1)* %in
%cmp = fcmp oeq float %0, 0.000000e+00
%value = select i1 %cmp, i32 -1, i32 0
store i32 %value, i32 addrspace(1)* %out
diff --git a/test/CodeGen/R600/fcmp.ll b/test/CodeGen/R600/fcmp.ll
index 33992181e0d9..5207ab57bade 100644
--- a/test/CodeGen/R600/fcmp.ll
+++ b/test/CodeGen/R600/fcmp.ll
@@ -5,9 +5,9 @@
define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) {
entry:
- %0 = load float addrspace(1)* %in
- %arrayidx1 = getelementptr inbounds float addrspace(1)* %in, i32 1
- %1 = load float addrspace(1)* %arrayidx1
+ %0 = load float, float addrspace(1)* %in
+ %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %in, i32 1
+ %1 = load float, float addrspace(1)* %arrayidx1
%cmp = fcmp oeq float %0, %1
%sext = sext i1 %cmp to i32
store i32 %sext, i32 addrspace(1)* %out
@@ -28,7 +28,7 @@ entry:
br i1 %0, label %IF, label %ENDIF
IF:
- %1 = getelementptr i32 addrspace(1)* %out, i32 1
+ %1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
store i32 0, i32 addrspace(1)* %1
br label %ENDIF
diff --git a/test/CodeGen/R600/fcmp64.ll b/test/CodeGen/R600/fcmp64.ll
index 9dc8b50513f2..053ab0ed7aaf 100644
--- a/test/CodeGen/R600/fcmp64.ll
+++ b/test/CodeGen/R600/fcmp64.ll
@@ -5,8 +5,8 @@
; CHECK: v_cmp_nge_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @flt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
%r2 = fcmp ult double %r0, %r1
%r3 = zext i1 %r2 to i32
store i32 %r3, i32 addrspace(1)* %out
@@ -17,8 +17,8 @@ define void @flt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
; CHECK: v_cmp_ngt_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @fle_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
%r2 = fcmp ule double %r0, %r1
%r3 = zext i1 %r2 to i32
store i32 %r3, i32 addrspace(1)* %out
@@ -29,8 +29,8 @@ define void @fle_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
; CHECK: v_cmp_nle_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @fgt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
%r2 = fcmp ugt double %r0, %r1
%r3 = zext i1 %r2 to i32
store i32 %r3, i32 addrspace(1)* %out
@@ -41,8 +41,8 @@ define void @fgt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
; CHECK: v_cmp_nlt_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @fge_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
%r2 = fcmp uge double %r0, %r1
%r3 = zext i1 %r2 to i32
store i32 %r3, i32 addrspace(1)* %out
@@ -53,8 +53,8 @@ define void @fge_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
; CHECK: v_cmp_neq_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
%r2 = fcmp une double %r0, %r1
%r3 = select i1 %r2, double %r0, double %r1
store double %r3, double addrspace(1)* %out
@@ -65,8 +65,8 @@ define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
; CHECK: v_cmp_nlg_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
define void @feq_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
%r2 = fcmp ueq double %r0, %r1
%r3 = select i1 %r2, double %r0, double %r1
store double %r3, double addrspace(1)* %out
diff --git a/test/CodeGen/R600/fconst64.ll b/test/CodeGen/R600/fconst64.ll
index 28e0c909747f..89af37545c99 100644
--- a/test/CodeGen/R600/fconst64.ll
+++ b/test/CodeGen/R600/fconst64.ll
@@ -6,7 +6,7 @@
; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0
define void @fconst_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
- %r1 = load double addrspace(1)* %in
+ %r1 = load double, double addrspace(1)* %in
%r2 = fadd double %r1, 5.000000e+00
store double %r2, double addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fcopysign.f32.ll b/test/CodeGen/R600/fcopysign.f32.ll
index 4bc5145bd4de..b719d5a39785 100644
--- a/test/CodeGen/R600/fcopysign.f32.ll
+++ b/test/CodeGen/R600/fcopysign.f32.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
@@ -10,12 +11,14 @@ declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind read
; FUNC-LABEL: {{^}}test_copysign_f32:
; SI: s_load_dword [[SMAG:s[0-9]+]], {{.*}} 0xb
; SI: s_load_dword [[SSIGN:s[0-9]+]], {{.*}} 0xc
-; SI-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
-; SI-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
-; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
-; SI: v_bfi_b32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; VI: s_load_dword [[SMAG:s[0-9]+]], {{.*}} 0x2c
+; VI: s_load_dword [[SSIGN:s[0-9]+]], {{.*}} 0x30
+; GCN-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
+; GCN-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
+; GCN-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
+; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
; EG: BFI_INT
define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign) nounwind {
@@ -25,7 +28,7 @@ define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign
}
; FUNC-LABEL: {{^}}test_copysign_v2f32:
-; SI: s_endpgm
+; GCN: s_endpgm
; EG: BFI_INT
; EG: BFI_INT
@@ -36,7 +39,7 @@ define void @test_copysign_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %ma
}
; FUNC-LABEL: {{^}}test_copysign_v4f32:
-; SI: s_endpgm
+; GCN: s_endpgm
; EG: BFI_INT
; EG: BFI_INT
diff --git a/test/CodeGen/R600/fcopysign.f64.ll b/test/CodeGen/R600/fcopysign.f64.ll
index a14a493f72c8..3d8c55993089 100644
--- a/test/CodeGen/R600/fcopysign.f64.ll
+++ b/test/CodeGen/R600/fcopysign.f64.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
declare double @llvm.copysign.f64(double, double) nounwind readnone
declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind readnone
@@ -7,13 +8,15 @@ declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind r
; FUNC-LABEL: {{^}}test_copysign_f64:
; SI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
-; SI-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
-; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
-; SI: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
-; SI: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
-; SI: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
-; SI: s_endpgm
+; VI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34
+; GCN-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
+; GCN-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
+; GCN-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
+; GCN: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
+; GCN: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
+; GCN: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
+; GCN: s_endpgm
define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %sign) nounwind {
%result = call double @llvm.copysign.f64(double %mag, double %sign)
store double %result, double addrspace(1)* %out, align 8
@@ -21,7 +24,7 @@ define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %s
}
; FUNC-LABEL: {{^}}test_copysign_v2f64:
-; SI: s_endpgm
+; GCN: s_endpgm
define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %mag, <2 x double> %sign) nounwind {
%result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign)
store <2 x double> %result, <2 x double> addrspace(1)* %out, align 8
@@ -29,7 +32,7 @@ define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %
}
; FUNC-LABEL: {{^}}test_copysign_v4f64:
-; SI: s_endpgm
+; GCN: s_endpgm
define void @test_copysign_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %mag, <4 x double> %sign) nounwind {
%result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign)
store <4 x double> %result, <4 x double> addrspace(1)* %out, align 8
diff --git a/test/CodeGen/R600/fdiv.f64.ll b/test/CodeGen/R600/fdiv.f64.ll
index 276642f99014..7c022e38c808 100644
--- a/test/CodeGen/R600/fdiv.f64.ll
+++ b/test/CodeGen/R600/fdiv.f64.ll
@@ -25,14 +25,14 @@
; COMMON-DAG: v_fma_f64 [[FMA3:v\[[0-9]+:[0-9]+\]]], [[FMA1]], [[FMA2]], [[FMA1]]
; COMMON-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[SCALE1]], [[FMA3]]
; COMMON-DAG: v_fma_f64 [[FMA4:v\[[0-9]+:[0-9]+\]]], -[[SCALE0]], [[MUL]], [[SCALE1]]
-; COMMON: v_div_fmas_f64 [[FMAS:v\[[0-9]+:[0-9]+\]]], [[FMA3]], [[FMA4]], [[MUL]]
+; COMMON: v_div_fmas_f64 [[FMAS:v\[[0-9]+:[0-9]+\]]], [[FMA4]], [[FMA3]], [[MUL]]
; COMMON: v_div_fixup_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[FMAS]], [[DEN]], [[NUM]]
; COMMON: buffer_store_dwordx2 [[RESULT]]
; COMMON: s_endpgm
define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in) nounwind {
- %gep.1 = getelementptr double addrspace(1)* %in, i32 1
- %num = load double addrspace(1)* %in
- %den = load double addrspace(1)* %gep.1
+ %gep.1 = getelementptr double, double addrspace(1)* %in, i32 1
+ %num = load double, double addrspace(1)* %in
+ %den = load double, double addrspace(1)* %gep.1
%result = fdiv double %num, %den
store double %result, double addrspace(1)* %out
ret void
@@ -40,7 +40,7 @@ define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in) nounw
; COMMON-LABEL: {{^}}fdiv_f64_s_v:
define void @fdiv_f64_s_v(double addrspace(1)* %out, double addrspace(1)* %in, double %num) nounwind {
- %den = load double addrspace(1)* %in
+ %den = load double, double addrspace(1)* %in
%result = fdiv double %num, %den
store double %result, double addrspace(1)* %out
ret void
@@ -48,7 +48,7 @@ define void @fdiv_f64_s_v(double addrspace(1)* %out, double addrspace(1)* %in, d
; COMMON-LABEL: {{^}}fdiv_f64_v_s:
define void @fdiv_f64_v_s(double addrspace(1)* %out, double addrspace(1)* %in, double %den) nounwind {
- %num = load double addrspace(1)* %in
+ %num = load double, double addrspace(1)* %in
%result = fdiv double %num, %den
store double %result, double addrspace(1)* %out
ret void
@@ -63,9 +63,9 @@ define void @fdiv_f64_s_s(double addrspace(1)* %out, double %num, double %den) n
; COMMON-LABEL: {{^}}v_fdiv_v2f64:
define void @v_fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in) nounwind {
- %gep.1 = getelementptr <2 x double> addrspace(1)* %in, i32 1
- %num = load <2 x double> addrspace(1)* %in
- %den = load <2 x double> addrspace(1)* %gep.1
+ %gep.1 = getelementptr <2 x double>, <2 x double> addrspace(1)* %in, i32 1
+ %num = load <2 x double>, <2 x double> addrspace(1)* %in
+ %den = load <2 x double>, <2 x double> addrspace(1)* %gep.1
%result = fdiv <2 x double> %num, %den
store <2 x double> %result, <2 x double> addrspace(1)* %out
ret void
@@ -80,9 +80,9 @@ define void @s_fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %num, <2
; COMMON-LABEL: {{^}}v_fdiv_v4f64:
define void @v_fdiv_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) nounwind {
- %gep.1 = getelementptr <4 x double> addrspace(1)* %in, i32 1
- %num = load <4 x double> addrspace(1)* %in
- %den = load <4 x double> addrspace(1)* %gep.1
+ %gep.1 = getelementptr <4 x double>, <4 x double> addrspace(1)* %in, i32 1
+ %num = load <4 x double>, <4 x double> addrspace(1)* %in
+ %den = load <4 x double>, <4 x double> addrspace(1)* %gep.1
%result = fdiv <4 x double> %num, %den
store <4 x double> %result, <4 x double> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fdiv.ll b/test/CodeGen/R600/fdiv.ll
index 603287fbdf4f..7cbf87336399 100644
--- a/test/CodeGen/R600/fdiv.ll
+++ b/test/CodeGen/R600/fdiv.ll
@@ -59,9 +59,9 @@ entry:
; SI-DAG: v_rcp_f32
; SI-DAG: v_mul_f32
define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
- %a = load <4 x float> addrspace(1) * %in
- %b = load <4 x float> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
+ %a = load <4 x float>, <4 x float> addrspace(1) * %in
+ %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
%result = fdiv <4 x float> %a, %b
store <4 x float> %result, <4 x float> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fetch-limits.r600.ll b/test/CodeGen/R600/fetch-limits.r600.ll
index d35573e818d4..e7160ef5d726 100644
--- a/test/CodeGen/R600/fetch-limits.r600.ll
+++ b/test/CodeGen/R600/fetch-limits.r600.ll
@@ -9,15 +9,15 @@
define void @fetch_limits_r600() #0 {
entry:
- %0 = load <4 x float> addrspace(8)* null
- %1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
- %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
- %3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
- %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
- %5 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
- %6 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
- %7 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
- %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+ %0 = load <4 x float>, <4 x float> addrspace(8)* null
+ %1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %6 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
%res0 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %0, i32 0, i32 0, i32 1)
%res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %1, i32 0, i32 0, i32 1)
%res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %2, i32 0, i32 0, i32 1)
diff --git a/test/CodeGen/R600/fetch-limits.r700+.ll b/test/CodeGen/R600/fetch-limits.r700+.ll
index 17760a05caa4..acaea2aa7943 100644
--- a/test/CodeGen/R600/fetch-limits.r700+.ll
+++ b/test/CodeGen/R600/fetch-limits.r700+.ll
@@ -18,23 +18,23 @@
define void @fetch_limits_r700() #0 {
entry:
- %0 = load <4 x float> addrspace(8)* null
- %1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
- %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
- %3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
- %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
- %5 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
- %6 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
- %7 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
- %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
- %9 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
- %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
- %11 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
- %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
- %13 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
- %14 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
- %15 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
- %16 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+ %0 = load <4 x float>, <4 x float> addrspace(8)* null
+ %1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %6 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+ %9 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+ %11 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+ %12 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+ %13 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
+ %14 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+ %15 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
+ %16 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
%res0 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %0, i32 0, i32 0, i32 1)
%res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %1, i32 0, i32 0, i32 1)
%res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %2, i32 0, i32 0, i32 1)
diff --git a/test/CodeGen/R600/ffloor.f64.ll b/test/CodeGen/R600/ffloor.f64.ll
new file mode 100644
index 000000000000..45f8382c3929
--- /dev/null
+++ b/test/CodeGen/R600/ffloor.f64.ll
@@ -0,0 +1,127 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+
+declare double @llvm.fabs.f64(double %Val)
+declare double @llvm.floor.f64(double) nounwind readnone
+declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone
+declare <3 x double> @llvm.floor.v3f64(<3 x double>) nounwind readnone
+declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
+declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone
+declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
+
+; FUNC-LABEL: {{^}}ffloor_f64:
+; CI: v_floor_f64_e32
+; SI: v_fract_f64_e32
+; SI: v_min_f64
+; SI: v_cmp_class_f64_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_add_f64
+; SI: s_endpgm
+define void @ffloor_f64(double addrspace(1)* %out, double %x) {
+ %y = call double @llvm.floor.f64(double %x) nounwind readnone
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}ffloor_f64_neg:
+; CI: v_floor_f64_e64
+; SI: v_fract_f64_e64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT:s[[0-9]+:[0-9]+]]]
+; SI: v_min_f64
+; SI: v_cmp_class_f64_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -[[INPUT]]
+; SI: s_endpgm
+define void @ffloor_f64_neg(double addrspace(1)* %out, double %x) {
+ %neg = fsub double 0.0, %x
+ %y = call double @llvm.floor.f64(double %neg) nounwind readnone
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}ffloor_f64_neg_abs:
+; CI: v_floor_f64_e64
+; SI: v_fract_f64_e64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT:s[[0-9]+:[0-9]+]]]|
+; SI: v_min_f64
+; SI: v_cmp_class_f64_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_add_f64 {{v[[0-9]+:[0-9]+]}}, -|[[INPUT]]|
+; SI: s_endpgm
+define void @ffloor_f64_neg_abs(double addrspace(1)* %out, double %x) {
+ %abs = call double @llvm.fabs.f64(double %x)
+ %neg = fsub double 0.0, %abs
+ %y = call double @llvm.floor.f64(double %neg) nounwind readnone
+ store double %y, double addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}ffloor_v2f64:
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
+ %y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone
+ store <2 x double> %y, <2 x double> addrspace(1)* %out
+ ret void
+}
+
+; FIXME-FUNC-LABEL: {{^}}ffloor_v3f64:
+; FIXME-CI: v_floor_f64_e32
+; FIXME-CI: v_floor_f64_e32
+; FIXME-CI: v_floor_f64_e32
+; define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
+; %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone
+; store <3 x double> %y, <3 x double> addrspace(1)* %out
+; ret void
+; }
+
+; FUNC-LABEL: {{^}}ffloor_v4f64:
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
+ %y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
+ store <4 x double> %y, <4 x double> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}ffloor_v8f64:
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
+ %y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone
+ store <8 x double> %y, <8 x double> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}ffloor_v16f64:
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+define void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
+ %y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone
+ store <16 x double> %y, <16 x double> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/ffloor.ll b/test/CodeGen/R600/ffloor.ll
index 9038ff81b073..61c46ac2bc03 100644
--- a/test/CodeGen/R600/ffloor.ll
+++ b/test/CodeGen/R600/ffloor.ll
@@ -1,106 +1,49 @@
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
-
-declare double @llvm.floor.f64(double) nounwind readnone
-declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone
-declare <3 x double> @llvm.floor.v3f64(<3 x double>) nounwind readnone
-declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
-declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone
-declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}floor_f32:
+; SI: v_floor_f32_e32
+; R600: FLOOR
+define void @floor_f32(float addrspace(1)* %out, float %in) {
+ %tmp = call float @llvm.floor.f32(float %in) #0
+ store float %tmp, float addrspace(1)* %out
+ ret void
+}
-; FUNC-LABEL: {{^}}ffloor_f64:
-; CI: v_floor_f64_e32
+; FUNC-LABEL: {{^}}floor_v2f32:
+; SI: v_floor_f32_e32
+; SI: v_floor_f32_e32
-; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
-; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
-; SI: s_lshr_b64
-; SI: s_not_b64
-; SI: s_and_b64
-; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-; SI-DAG: cmp_lt_i32
-; SI: cndmask_b32
-; SI: cndmask_b32
-; SI: cmp_gt_i32
-; SI: cndmask_b32
-; SI: cndmask_b32
-; SI: v_cmp_lg_f64
-; SI: v_cmp_lt_f64
-; SI: s_and_b64
-; SI: v_cndmask_b32
-; SI: v_cndmask_b32
-; SI: v_add_f64
-; SI: s_endpgm
-define void @ffloor_f64(double addrspace(1)* %out, double %x) {
- %y = call double @llvm.floor.f64(double %x) nounwind readnone
- store double %y, double addrspace(1)* %out
+define void @floor_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+ %tmp = call <2 x float> @llvm.floor.v2f32(<2 x float> %in) #0
+ store <2 x float> %tmp, <2 x float> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: {{^}}ffloor_v2f64:
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
- %y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone
- store <2 x double> %y, <2 x double> addrspace(1)* %out
+; FUNC-LABEL: {{^}}floor_v4f32:
+; SI: v_floor_f32_e32
+; SI: v_floor_f32_e32
+; SI: v_floor_f32_e32
+; SI: v_floor_f32_e32
+
+; R600: FLOOR
+; R600: FLOOR
+; R600: FLOOR
+; R600: FLOOR
+define void @floor_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+ %tmp = call <4 x float> @llvm.floor.v4f32(<4 x float> %in) #0
+ store <4 x float> %tmp, <4 x float> addrspace(1)* %out
ret void
}
-; FIXME-FUNC-LABEL: {{^}}ffloor_v3f64:
-; FIXME-CI: v_floor_f64_e32
-; FIXME-CI: v_floor_f64_e32
-; FIXME-CI: v_floor_f64_e32
-; define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
-; %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone
-; store <3 x double> %y, <3 x double> addrspace(1)* %out
-; ret void
-; }
+; Function Attrs: nounwind readonly
+declare float @llvm.floor.f32(float) #0
-; FUNC-LABEL: {{^}}ffloor_v4f64:
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
- %y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
- store <4 x double> %y, <4 x double> addrspace(1)* %out
- ret void
-}
+; Function Attrs: nounwind readonly
+declare <2 x float> @llvm.floor.v2f32(<2 x float>) #0
-; FUNC-LABEL: {{^}}ffloor_v8f64:
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
- %y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone
- store <8 x double> %y, <8 x double> addrspace(1)* %out
- ret void
-}
+; Function Attrs: nounwind readonly
+declare <4 x float> @llvm.floor.v4f32(<4 x float>) #0
-; FUNC-LABEL: {{^}}ffloor_v16f64:
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-; CI: v_floor_f64_e32
-define void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
- %y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone
- store <16 x double> %y, <16 x double> addrspace(1)* %out
- ret void
-}
+attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/R600/flat-address-space.ll b/test/CodeGen/R600/flat-address-space.ll
index 2e98bf51b23b..425d67d5b07c 100644
--- a/test/CodeGen/R600/flat-address-space.ll
+++ b/test/CodeGen/R600/flat-address-space.ll
@@ -26,7 +26,7 @@ global:
end:
%fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
store i32 %x, i32 addrspace(4)* %fptr, align 4
-; %val = load i32 addrspace(4)* %fptr, align 4
+; %val = load i32, i32 addrspace(4)* %fptr, align 4
; store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
@@ -87,7 +87,7 @@ define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
; CHECK: flat_load_dword
define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
- %fload = load i32 addrspace(4)* %fptr, align 4
+ %fload = load i32, i32 addrspace(4)* %fptr, align 4
store i32 %fload, i32 addrspace(1)* %out, align 4
ret void
}
@@ -96,7 +96,7 @@ define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noa
; CHECK: flat_load_dwordx2
define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
- %fload = load i64 addrspace(4)* %fptr, align 4
+ %fload = load i64, i64 addrspace(4)* %fptr, align 4
store i64 %fload, i64 addrspace(1)* %out, align 8
ret void
}
@@ -105,7 +105,7 @@ define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noa
; CHECK: flat_load_dwordx4
define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
- %fload = load <4 x i32> addrspace(4)* %fptr, align 4
+ %fload = load <4 x i32>, <4 x i32> addrspace(4)* %fptr, align 4
store <4 x i32> %fload, <4 x i32> addrspace(1)* %out, align 8
ret void
}
@@ -114,7 +114,7 @@ define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> add
; CHECK: flat_load_sbyte
define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
- %fload = load i8 addrspace(4)* %fptr, align 4
+ %fload = load i8, i8 addrspace(4)* %fptr, align 4
%ext = sext i8 %fload to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -124,7 +124,7 @@ define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* n
; CHECK: flat_load_ubyte
define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
- %fload = load i8 addrspace(4)* %fptr, align 4
+ %fload = load i8, i8 addrspace(4)* %fptr, align 4
%ext = zext i8 %fload to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -134,7 +134,7 @@ define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* n
; CHECK: flat_load_sshort
define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
- %fload = load i16 addrspace(4)* %fptr, align 4
+ %fload = load i16, i16 addrspace(4)* %fptr, align 4
%ext = sext i16 %fload to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -144,7 +144,7 @@ define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)*
; CHECK: flat_load_ushort
define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
%fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
- %fload = load i16 addrspace(4)* %fptr, align 4
+ %fload = load i16, i16 addrspace(4)* %fptr, align 4
%ext = zext i16 %fload to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -166,12 +166,12 @@ define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)*
define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
%alloca = alloca i32, i32 9, align 4
%x = call i32 @llvm.r600.read.tidig.x() #3
- %pptr = getelementptr i32* %alloca, i32 %x
+ %pptr = getelementptr i32, i32* %alloca, i32 %x
%fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
store i32 %x, i32 addrspace(4)* %fptr
; Dummy call
call void @llvm.AMDGPU.barrier.local() #1
- %reload = load i32 addrspace(4)* %fptr, align 4
+ %reload = load i32, i32 addrspace(4)* %fptr, align 4
store i32 %reload, i32 addrspace(1)* %out, align 4
ret void
}
diff --git a/test/CodeGen/R600/floor.ll b/test/CodeGen/R600/floor.ll
index 67e86c41fdcf..c6bfb8567a0f 100644
--- a/test/CodeGen/R600/floor.ll
+++ b/test/CodeGen/R600/floor.ll
@@ -1,7 +1,6 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: FLOOR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s
+; CHECK: FLOOR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @test(<4 x float> inreg %reg0) #0 {
%r0 = extractelement <4 x float> %reg0, i32 0
%r1 = call float @floor(float %r0)
@@ -13,4 +12,4 @@ define void @test(<4 x float> inreg %reg0) #0 {
declare float @floor(float) readonly
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
-attributes #0 = { "ShaderType"="0" } \ No newline at end of file
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/R600/fma-combine.ll b/test/CodeGen/R600/fma-combine.ll
new file mode 100644
index 000000000000..bd574b877117
--- /dev/null
+++ b/test/CodeGen/R600/fma-combine.ll
@@ -0,0 +1,368 @@
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-FASTFMAF -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-SLOWFMAF -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() #0
+declare double @llvm.fabs.f64(double) #0
+declare double @llvm.fma.f64(double, double, double) #0
+declare float @llvm.fma.f32(float, float, float) #0
+
+; (fadd (fmul x, y), z) -> (fma x, y, z)
+; FUNC-LABEL: {{^}}combine_to_fma_f64_0:
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
+; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+define void @combine_to_fma_f64_0(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+
+ %a = load double, double addrspace(1)* %gep.0
+ %b = load double, double addrspace(1)* %gep.1
+ %c = load double, double addrspace(1)* %gep.2
+
+ %mul = fmul double %a, %b
+ %fma = fadd double %mul, %c
+ store double %fma, double addrspace(1)* %gep.out
+ ret void
+}
+
+; (fadd (fmul x, y), z) -> (fma x, y, z)
+; FUNC-LABEL: {{^}}combine_to_fma_f64_0_2use:
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
+; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
+; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
+; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[D]]
+; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI: s_endpgm
+define void @combine_to_fma_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
+ %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
+ %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
+
+ %a = load double, double addrspace(1)* %gep.0
+ %b = load double, double addrspace(1)* %gep.1
+ %c = load double, double addrspace(1)* %gep.2
+ %d = load double, double addrspace(1)* %gep.3
+
+ %mul = fmul double %a, %b
+ %fma0 = fadd double %mul, %c
+ %fma1 = fadd double %mul, %d
+ store double %fma0, double addrspace(1)* %gep.out.0
+ store double %fma1, double addrspace(1)* %gep.out.1
+ ret void
+}
+
+; (fadd x, (fmul y, z)) -> (fma y, z, x)
+; FUNC-LABEL: {{^}}combine_to_fma_f64_1:
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
+; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+define void @combine_to_fma_f64_1(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+
+ %a = load double, double addrspace(1)* %gep.0
+ %b = load double, double addrspace(1)* %gep.1
+ %c = load double, double addrspace(1)* %gep.2
+
+ %mul = fmul double %a, %b
+ %fma = fadd double %c, %mul
+ store double %fma, double addrspace(1)* %gep.out
+ ret void
+}
+
+; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+; FUNC-LABEL: {{^}}combine_to_fma_fsub_0_f64:
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
+; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+define void @combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+
+ %a = load double, double addrspace(1)* %gep.0
+ %b = load double, double addrspace(1)* %gep.1
+ %c = load double, double addrspace(1)* %gep.2
+
+ %mul = fmul double %a, %b
+ %fma = fsub double %mul, %c
+ store double %fma, double addrspace(1)* %gep.out
+ ret void
+}
+
+; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+; FUNC-LABEL: {{^}}combine_to_fma_fsub_f64_0_2use:
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
+; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
+; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]]
+; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[D]]
+; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI: s_endpgm
+define void @combine_to_fma_fsub_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
+ %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
+ %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
+
+ %a = load double, double addrspace(1)* %gep.0
+ %b = load double, double addrspace(1)* %gep.1
+ %c = load double, double addrspace(1)* %gep.2
+ %d = load double, double addrspace(1)* %gep.3
+
+ %mul = fmul double %a, %b
+ %fma0 = fsub double %mul, %c
+ %fma1 = fsub double %mul, %d
+ store double %fma0, double addrspace(1)* %gep.out.0
+ store double %fma1, double addrspace(1)* %gep.out.1
+ ret void
+}
+
+; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+; FUNC-LABEL: {{^}}combine_to_fma_fsub_1_f64:
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
+; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+define void @combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+
+ %a = load double, double addrspace(1)* %gep.0
+ %b = load double, double addrspace(1)* %gep.1
+ %c = load double, double addrspace(1)* %gep.2
+
+ %mul = fmul double %a, %b
+ %fma = fsub double %c, %mul
+ store double %fma, double addrspace(1)* %gep.out
+ ret void
+}
+
+; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+; FUNC-LABEL: {{^}}combine_to_fma_fsub_1_f64_2use:
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
+; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
+; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]]
+; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[D]]
+; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI: s_endpgm
+define void @combine_to_fma_fsub_1_f64_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
+ %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
+ %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
+
+ %a = load double, double addrspace(1)* %gep.0
+ %b = load double, double addrspace(1)* %gep.1
+ %c = load double, double addrspace(1)* %gep.2
+ %d = load double, double addrspace(1)* %gep.3
+
+ %mul = fmul double %a, %b
+ %fma0 = fsub double %c, %mul
+ %fma1 = fsub double %d, %mul
+ store double %fma0, double addrspace(1)* %gep.out.0
+ store double %fma1, double addrspace(1)* %gep.out.1
+ ret void
+}
+
+; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
+; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64:
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
+; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+define void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+
+ %a = load double, double addrspace(1)* %gep.0
+ %b = load double, double addrspace(1)* %gep.1
+ %c = load double, double addrspace(1)* %gep.2
+
+ %mul = fmul double %a, %b
+ %mul.neg = fsub double -0.0, %mul
+ %fma = fsub double %mul.neg, %c
+
+ store double %fma, double addrspace(1)* %gep.out
+ ret void
+}
+
+; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
+; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64_2uses_neg:
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
+; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]]
+; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[D]]
+; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI: s_endpgm
+define void @combine_to_fma_fsub_2_f64_2uses_neg(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
+ %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
+ %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
+
+ %a = load double, double addrspace(1)* %gep.0
+ %b = load double, double addrspace(1)* %gep.1
+ %c = load double, double addrspace(1)* %gep.2
+ %d = load double, double addrspace(1)* %gep.3
+
+ %mul = fmul double %a, %b
+ %mul.neg = fsub double -0.0, %mul
+ %fma0 = fsub double %mul.neg, %c
+ %fma1 = fsub double %mul.neg, %d
+
+ store double %fma0, double addrspace(1)* %gep.out.0
+ store double %fma1, double addrspace(1)* %gep.out.1
+ ret void
+}
+
+; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
+; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64_2uses_mul:
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
+; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]]
+; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[D]]
+; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI: s_endpgm
+define void @combine_to_fma_fsub_2_f64_2uses_mul(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
+ %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
+ %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
+
+ %a = load double, double addrspace(1)* %gep.0
+ %b = load double, double addrspace(1)* %gep.1
+ %c = load double, double addrspace(1)* %gep.2
+ %d = load double, double addrspace(1)* %gep.3
+
+ %mul = fmul double %a, %b
+ %mul.neg = fsub double -0.0, %mul
+ %fma0 = fsub double %mul.neg, %c
+ %fma1 = fsub double %mul, %d
+
+ store double %fma0, double addrspace(1)* %gep.out.0
+ store double %fma1, double addrspace(1)* %gep.out.1
+ ret void
+}
+
+; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z)))
+
+; FUNC-LABEL: {{^}}aggressive_combine_to_fma_fsub_0_f64:
+; SI-DAG: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
+; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
+; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:32{{$}}
+; SI: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]], -[[Z]]
+; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[FMA0]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+define void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
+ %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4
+ %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+
+ %x = load double, double addrspace(1)* %gep.0
+ %y = load double, double addrspace(1)* %gep.1
+ %z = load double, double addrspace(1)* %gep.2
+ %u = load double, double addrspace(1)* %gep.3
+ %v = load double, double addrspace(1)* %gep.4
+
+ %tmp0 = fmul double %u, %v
+ %tmp1 = call double @llvm.fma.f64(double %x, double %y, double %tmp0) #0
+ %tmp2 = fsub double %tmp1, %z
+
+ store double %tmp2, double addrspace(1)* %gep.out
+ ret void
+}
+
+; fold (fsub x, (fma y, z, (fmul u, v)))
+; -> (fma (fneg y), z, (fma (fneg u), v, x))
+
+; FUNC-LABEL: {{^}}aggressive_combine_to_fma_fsub_1_f64:
+; SI-DAG: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
+; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}}
+; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:32{{$}}
+; SI: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[U]], [[V]], [[X]]
+; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[Y]], [[Z]], [[FMA0]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3
+ %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4
+ %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
+
+ %x = load double, double addrspace(1)* %gep.0
+ %y = load double, double addrspace(1)* %gep.1
+ %z = load double, double addrspace(1)* %gep.2
+ %u = load double, double addrspace(1)* %gep.3
+ %v = load double, double addrspace(1)* %gep.4
+
+ %tmp0 = fmul double %u, %v
+ %tmp1 = call double @llvm.fma.f64(double %y, double %z, double %tmp0) #0
+ %tmp2 = fsub double %x, %tmp1
+
+ store double %tmp2, double addrspace(1)* %gep.out
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/R600/fma.f64.ll b/test/CodeGen/R600/fma.f64.ll
index bca312bfa751..0a55ef778557 100644
--- a/test/CodeGen/R600/fma.f64.ll
+++ b/test/CodeGen/R600/fma.f64.ll
@@ -10,9 +10,9 @@ declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) n
; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2, double addrspace(1)* %in3) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
- %r2 = load double addrspace(1)* %in3
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
+ %r2 = load double, double addrspace(1)* %in3
%r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2)
store double %r3, double addrspace(1)* %out
ret void
@@ -23,9 +23,9 @@ define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
; SI: v_fma_f64
define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
<2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) {
- %r0 = load <2 x double> addrspace(1)* %in1
- %r1 = load <2 x double> addrspace(1)* %in2
- %r2 = load <2 x double> addrspace(1)* %in3
+ %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1
+ %r1 = load <2 x double>, <2 x double> addrspace(1)* %in2
+ %r2 = load <2 x double>, <2 x double> addrspace(1)* %in3
%r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2)
store <2 x double> %r3, <2 x double> addrspace(1)* %out
ret void
@@ -38,9 +38,9 @@ define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1
; SI: v_fma_f64
define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
<4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) {
- %r0 = load <4 x double> addrspace(1)* %in1
- %r1 = load <4 x double> addrspace(1)* %in2
- %r2 = load <4 x double> addrspace(1)* %in3
+ %r0 = load <4 x double>, <4 x double> addrspace(1)* %in1
+ %r1 = load <4 x double>, <4 x double> addrspace(1)* %in2
+ %r2 = load <4 x double>, <4 x double> addrspace(1)* %in3
%r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2)
store <4 x double> %r3, <4 x double> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fma.ll b/test/CodeGen/R600/fma.ll
index f3861ffa2835..d6024aa0b4c5 100644
--- a/test/CodeGen/R600/fma.ll
+++ b/test/CodeGen/R600/fma.ll
@@ -14,9 +14,9 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; EG: FMA {{\*? *}}[[RES]]
define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2, float addrspace(1)* %in3) {
- %r0 = load float addrspace(1)* %in1
- %r1 = load float addrspace(1)* %in2
- %r2 = load float addrspace(1)* %in3
+ %r0 = load float, float addrspace(1)* %in1
+ %r1 = load float, float addrspace(1)* %in2
+ %r2 = load float, float addrspace(1)* %in3
%r3 = tail call float @llvm.fma.f32(float %r0, float %r1, float %r2)
store float %r3, float addrspace(1)* %out
ret void
@@ -31,9 +31,9 @@ define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
; EG-DAG: FMA {{\*? *}}[[RES]].[[CHHI]]
define void @fma_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1,
<2 x float> addrspace(1)* %in2, <2 x float> addrspace(1)* %in3) {
- %r0 = load <2 x float> addrspace(1)* %in1
- %r1 = load <2 x float> addrspace(1)* %in2
- %r2 = load <2 x float> addrspace(1)* %in3
+ %r0 = load <2 x float>, <2 x float> addrspace(1)* %in1
+ %r1 = load <2 x float>, <2 x float> addrspace(1)* %in2
+ %r2 = load <2 x float>, <2 x float> addrspace(1)* %in3
%r3 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2)
store <2 x float> %r3, <2 x float> addrspace(1)* %out
ret void
@@ -52,9 +52,9 @@ define void @fma_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)*
; EG-DAG: FMA {{\*? *}}[[RES]].W
define void @fma_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1,
<4 x float> addrspace(1)* %in2, <4 x float> addrspace(1)* %in3) {
- %r0 = load <4 x float> addrspace(1)* %in1
- %r1 = load <4 x float> addrspace(1)* %in2
- %r2 = load <4 x float> addrspace(1)* %in3
+ %r0 = load <4 x float>, <4 x float> addrspace(1)* %in1
+ %r1 = load <4 x float>, <4 x float> addrspace(1)* %in2
+ %r2 = load <4 x float>, <4 x float> addrspace(1)* %in3
%r3 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %r0, <4 x float> %r1, <4 x float> %r2)
store <4 x float> %r3, <4 x float> addrspace(1)* %out
ret void
@@ -64,12 +64,12 @@ define void @fma_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)*
; SI: v_fma_f32 {{v[0-9]+}}, 2.0, {{v[0-9]+}}, {{v[0-9]+}}
define void @fma_commute_mul_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %in.a.gep = getelementptr float addrspace(1)* %in.a, i32 %tid
- %in.b.gep = getelementptr float addrspace(1)* %in.b, i32 %tid
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+ %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
+ %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %in.a.gep, align 4
- %b = load float addrspace(1)* %in.b.gep, align 4
+ %a = load float, float addrspace(1)* %in.a.gep, align 4
+ %b = load float, float addrspace(1)* %in.b.gep, align 4
%fma = call float @llvm.fma.f32(float %a, float 2.0, float %b)
store float %fma, float addrspace(1)* %out.gep, align 4
@@ -79,12 +79,12 @@ define void @fma_commute_mul_inline_imm_f32(float addrspace(1)* noalias %out, fl
; FUNC-LABEL: @fma_commute_mul_s_f32
define void @fma_commute_mul_s_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b, float %b) nounwind {
%tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %in.a.gep = getelementptr float addrspace(1)* %in.a, i32 %tid
- %in.b.gep = getelementptr float addrspace(1)* %in.b, i32 %tid
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+ %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
+ %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %in.a.gep, align 4
- %c = load float addrspace(1)* %in.b.gep, align 4
+ %a = load float, float addrspace(1)* %in.a.gep, align 4
+ %c = load float, float addrspace(1)* %in.b.gep, align 4
%fma = call float @llvm.fma.f32(float %a, float %b, float %c)
store float %fma, float addrspace(1)* %out.gep, align 4
diff --git a/test/CodeGen/R600/fmax3.f64.ll b/test/CodeGen/R600/fmax3.f64.ll
index 5ca789de2a08..f78c71b28264 100644
--- a/test/CodeGen/R600/fmax3.f64.ll
+++ b/test/CodeGen/R600/fmax3.f64.ll
@@ -12,11 +12,11 @@ declare double @llvm.maxnum.f64(double, double) nounwind readnone
; SI: buffer_store_dwordx2 [[RESULT]],
; SI: s_endpgm
define void @test_fmax3_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind {
- %bptr = getelementptr double addrspace(1)* %aptr, i32 1
- %cptr = getelementptr double addrspace(1)* %aptr, i32 2
- %a = load double addrspace(1)* %aptr, align 8
- %b = load double addrspace(1)* %bptr, align 8
- %c = load double addrspace(1)* %cptr, align 8
+ %bptr = getelementptr double, double addrspace(1)* %aptr, i32 1
+ %cptr = getelementptr double, double addrspace(1)* %aptr, i32 2
+ %a = load double, double addrspace(1)* %aptr, align 8
+ %b = load double, double addrspace(1)* %bptr, align 8
+ %c = load double, double addrspace(1)* %cptr, align 8
%f0 = call double @llvm.maxnum.f64(double %a, double %b) nounwind readnone
%f1 = call double @llvm.maxnum.f64(double %f0, double %c) nounwind readnone
store double %f1, double addrspace(1)* %out, align 8
diff --git a/test/CodeGen/R600/fmax3.ll b/test/CodeGen/R600/fmax3.ll
index e1b477c5921e..c3028a6217d5 100644
--- a/test/CodeGen/R600/fmax3.ll
+++ b/test/CodeGen/R600/fmax3.ll
@@ -4,16 +4,16 @@
declare float @llvm.maxnum.f32(float, float) nounwind readnone
; SI-LABEL: {{^}}test_fmax3_olt_0:
-; SI: buffer_load_dword [[REGA:v[0-9]+]]
-; SI: buffer_load_dword [[REGB:v[0-9]+]]
; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @test_fmax3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
- %a = load float addrspace(1)* %aptr, align 4
- %b = load float addrspace(1)* %bptr, align 4
- %c = load float addrspace(1)* %cptr, align 4
+ %a = load float, float addrspace(1)* %aptr, align 4
+ %b = load float, float addrspace(1)* %bptr, align 4
+ %c = load float, float addrspace(1)* %cptr, align 4
%f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
%f1 = call float @llvm.maxnum.f32(float %f0, float %c) nounwind readnone
store float %f1, float addrspace(1)* %out, align 4
@@ -22,16 +22,16 @@ define void @test_fmax3_olt_0(float addrspace(1)* %out, float addrspace(1)* %apt
; Commute operand of second fmax
; SI-LABEL: {{^}}test_fmax3_olt_1:
-; SI: buffer_load_dword [[REGA:v[0-9]+]]
; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
; SI: buffer_load_dword [[REGC:v[0-9]+]]
; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @test_fmax3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
- %a = load float addrspace(1)* %aptr, align 4
- %b = load float addrspace(1)* %bptr, align 4
- %c = load float addrspace(1)* %cptr, align 4
+ %a = load float, float addrspace(1)* %aptr, align 4
+ %b = load float, float addrspace(1)* %bptr, align 4
+ %c = load float, float addrspace(1)* %cptr, align 4
%f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
%f1 = call float @llvm.maxnum.f32(float %c, float %f0) nounwind readnone
store float %f1, float addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/fmax_legacy.f64.ll b/test/CodeGen/R600/fmax_legacy.f64.ll
index a615825a45d3..828243888ac7 100644
--- a/test/CodeGen/R600/fmax_legacy.f64.ll
+++ b/test/CodeGen/R600/fmax_legacy.f64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; Make sure we don't try to form FMAX_LEGACY nodes with f64
declare i32 @llvm.r600.read.tidig.x() #1
@@ -6,11 +6,11 @@ declare i32 @llvm.r600.read.tidig.x() #1
; FUNC-LABEL: @test_fmax_legacy_uge_f64
define void @test_fmax_legacy_uge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
- %a = load double addrspace(1)* %gep.0, align 8
- %b = load double addrspace(1)* %gep.1, align 8
+ %a = load double, double addrspace(1)* %gep.0, align 8
+ %b = load double, double addrspace(1)* %gep.1, align 8
%cmp = fcmp uge double %a, %b
%val = select i1 %cmp, double %a, double %b
@@ -21,11 +21,11 @@ define void @test_fmax_legacy_uge_f64(double addrspace(1)* %out, double addrspac
; FUNC-LABEL: @test_fmax_legacy_oge_f64
define void @test_fmax_legacy_oge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
- %a = load double addrspace(1)* %gep.0, align 8
- %b = load double addrspace(1)* %gep.1, align 8
+ %a = load double, double addrspace(1)* %gep.0, align 8
+ %b = load double, double addrspace(1)* %gep.1, align 8
%cmp = fcmp oge double %a, %b
%val = select i1 %cmp, double %a, double %b
@@ -36,11 +36,11 @@ define void @test_fmax_legacy_oge_f64(double addrspace(1)* %out, double addrspac
; FUNC-LABEL: @test_fmax_legacy_ugt_f64
define void @test_fmax_legacy_ugt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
- %a = load double addrspace(1)* %gep.0, align 8
- %b = load double addrspace(1)* %gep.1, align 8
+ %a = load double, double addrspace(1)* %gep.0, align 8
+ %b = load double, double addrspace(1)* %gep.1, align 8
%cmp = fcmp ugt double %a, %b
%val = select i1 %cmp, double %a, double %b
@@ -51,11 +51,11 @@ define void @test_fmax_legacy_ugt_f64(double addrspace(1)* %out, double addrspac
; FUNC-LABEL: @test_fmax_legacy_ogt_f64
define void @test_fmax_legacy_ogt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
- %a = load double addrspace(1)* %gep.0, align 8
- %b = load double addrspace(1)* %gep.1, align 8
+ %a = load double, double addrspace(1)* %gep.0, align 8
+ %b = load double, double addrspace(1)* %gep.1, align 8
%cmp = fcmp ogt double %a, %b
%val = select i1 %cmp, double %a, double %b
diff --git a/test/CodeGen/R600/fmax_legacy.ll b/test/CodeGen/R600/fmax_legacy.ll
index 46f0e9831e6a..413957d2982a 100644
--- a/test/CodeGen/R600/fmax_legacy.ll
+++ b/test/CodeGen/R600/fmax_legacy.ll
@@ -15,11 +15,11 @@ declare i32 @llvm.r600.read.tidig.x() #1
; EG: MAX
define void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%cmp = fcmp uge float %a, %b
%val = select i1 %cmp, float %a, float %b
@@ -35,11 +35,11 @@ define void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(
; EG: MAX
define void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%cmp = fcmp oge float %a, %b
%val = select i1 %cmp, float %a, float %b
@@ -55,11 +55,11 @@ define void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(
; EG: MAX
define void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%cmp = fcmp ugt float %a, %b
%val = select i1 %cmp, float %a, float %b
@@ -75,11 +75,11 @@ define void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(
; EG: MAX
define void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%cmp = fcmp ogt float %a, %b
%val = select i1 %cmp, float %a, float %b
@@ -99,11 +99,11 @@ define void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(
; EG: MAX
define void @test_fmax_legacy_ogt_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%cmp = fcmp ogt float %a, %b
%val = select i1 %cmp, float %a, float %b
diff --git a/test/CodeGen/R600/fmaxnum.ll b/test/CodeGen/R600/fmaxnum.ll
index c105598ff811..3029bd02e4db 100644
--- a/test/CodeGen/R600/fmaxnum.ll
+++ b/test/CodeGen/R600/fmaxnum.ll
@@ -11,6 +11,9 @@ declare double @llvm.maxnum.f64(double, double)
; FUNC-LABEL: @test_fmax_f32
; SI: v_max_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
define void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
%val = call float @llvm.maxnum.f32(float %a, float %b) #0
store float %val, float addrspace(1)* %out, align 4
@@ -20,6 +23,10 @@ define void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) nounwin
; FUNC-LABEL: @test_fmax_v2f32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
define void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
%val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b) #0
store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
@@ -31,6 +38,12 @@ define void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
+; EG: MAX_DX10 {{.*}}[[OUT]]
define void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
%val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) #0
store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
@@ -46,6 +59,17 @@ define void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W
define void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
%val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b) #0
store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
@@ -69,6 +93,27 @@ define void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8
; SI: v_max_f32_e32
; SI: v_max_f32_e32
; SI: v_max_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT3:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT4:T[0-9]+]]
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT1]].W
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W
+; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT3]].W
+; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].X
+; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Y
+; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Z
+; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].W
define void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
%val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b) #0
store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
@@ -79,6 +124,10 @@ define void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a,
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmax_f32(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 1.0, float 2.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -89,6 +138,11 @@ define void @constant_fold_fmax_f32(float addrspace(1)* %out) nounwind {
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+; EG: 2143289344(nan)
define void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
store float %val, float addrspace(1)* %out, align 4
@@ -99,6 +153,10 @@ define void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) nounwind {
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000) #0
store float %val, float addrspace(1)* %out, align 4
@@ -109,6 +167,10 @@ define void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) nounwind {
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -119,6 +181,10 @@ define void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) nounwind {
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 0.0, float 0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -129,6 +195,10 @@ define void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) nounwind {
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float 0.0, float -0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -139,6 +209,10 @@ define void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) nounwind {
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float -0.0, float 0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -149,6 +223,10 @@ define void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) nounwind {
; SI-NOT: v_max_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.maxnum.f32(float -0.0, float -0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -157,6 +235,10 @@ define void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) nounwind {
; FUNC-LABEL: @fmax_var_immediate_f32
; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MAX_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.maxnum.f32(float %a, float 2.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -165,6 +247,9 @@ define void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind
; FUNC-LABEL: @fmax_immediate_var_f32
; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
define void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.maxnum.f32(float 2.0, float %a) #0
store float %val, float addrspace(1)* %out, align 4
@@ -174,6 +259,9 @@ define void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind
; FUNC-LABEL: @fmax_var_literal_f32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
define void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.maxnum.f32(float %a, float 99.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -183,6 +271,9 @@ define void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
; FUNC-LABEL: @fmax_literal_var_f32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
define void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.maxnum.f32(float 99.0, float %a) #0
store float %val, float addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/fmin3.ll b/test/CodeGen/R600/fmin3.ll
index 716beb16bb10..0a76699b43e1 100644
--- a/test/CodeGen/R600/fmin3.ll
+++ b/test/CodeGen/R600/fmin3.ll
@@ -5,16 +5,16 @@
declare float @llvm.minnum.f32(float, float) nounwind readnone
; SI-LABEL: {{^}}test_fmin3_olt_0:
-; SI: buffer_load_dword [[REGA:v[0-9]+]]
-; SI: buffer_load_dword [[REGB:v[0-9]+]]
; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @test_fmin3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
- %a = load float addrspace(1)* %aptr, align 4
- %b = load float addrspace(1)* %bptr, align 4
- %c = load float addrspace(1)* %cptr, align 4
+ %a = load float, float addrspace(1)* %aptr, align 4
+ %b = load float, float addrspace(1)* %bptr, align 4
+ %c = load float, float addrspace(1)* %cptr, align 4
%f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
%f1 = call float @llvm.minnum.f32(float %f0, float %c) nounwind readnone
store float %f1, float addrspace(1)* %out, align 4
@@ -23,16 +23,16 @@ define void @test_fmin3_olt_0(float addrspace(1)* %out, float addrspace(1)* %apt
; Commute operand of second fmin
; SI-LABEL: {{^}}test_fmin3_olt_1:
-; SI: buffer_load_dword [[REGA:v[0-9]+]]
; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
; SI: buffer_load_dword [[REGC:v[0-9]+]]
; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @test_fmin3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
- %a = load float addrspace(1)* %aptr, align 4
- %b = load float addrspace(1)* %bptr, align 4
- %c = load float addrspace(1)* %cptr, align 4
+ %a = load float, float addrspace(1)* %aptr, align 4
+ %b = load float, float addrspace(1)* %bptr, align 4
+ %c = load float, float addrspace(1)* %cptr, align 4
%f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
%f1 = call float @llvm.minnum.f32(float %c, float %f0) nounwind readnone
store float %f1, float addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/fmin_legacy.f64.ll b/test/CodeGen/R600/fmin_legacy.f64.ll
index 51dcd06f9397..e19a48f3f7e2 100644
--- a/test/CodeGen/R600/fmin_legacy.f64.ll
+++ b/test/CodeGen/R600/fmin_legacy.f64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare i32 @llvm.r600.read.tidig.x() #1
@@ -16,11 +16,11 @@ define void @test_fmin_legacy_f64(<4 x double> addrspace(1)* %out, <4 x double>
; FUNC-LABEL: @test_fmin_legacy_ule_f64
define void @test_fmin_legacy_ule_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
- %a = load double addrspace(1)* %gep.0, align 8
- %b = load double addrspace(1)* %gep.1, align 8
+ %a = load double, double addrspace(1)* %gep.0, align 8
+ %b = load double, double addrspace(1)* %gep.1, align 8
%cmp = fcmp ule double %a, %b
%val = select i1 %cmp, double %a, double %b
@@ -31,11 +31,11 @@ define void @test_fmin_legacy_ule_f64(double addrspace(1)* %out, double addrspac
; FUNC-LABEL: @test_fmin_legacy_ole_f64
define void @test_fmin_legacy_ole_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
- %a = load double addrspace(1)* %gep.0, align 8
- %b = load double addrspace(1)* %gep.1, align 8
+ %a = load double, double addrspace(1)* %gep.0, align 8
+ %b = load double, double addrspace(1)* %gep.1, align 8
%cmp = fcmp ole double %a, %b
%val = select i1 %cmp, double %a, double %b
@@ -46,11 +46,11 @@ define void @test_fmin_legacy_ole_f64(double addrspace(1)* %out, double addrspac
; FUNC-LABEL: @test_fmin_legacy_olt_f64
define void @test_fmin_legacy_olt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
- %a = load double addrspace(1)* %gep.0, align 8
- %b = load double addrspace(1)* %gep.1, align 8
+ %a = load double, double addrspace(1)* %gep.0, align 8
+ %b = load double, double addrspace(1)* %gep.1, align 8
%cmp = fcmp olt double %a, %b
%val = select i1 %cmp, double %a, double %b
@@ -61,11 +61,11 @@ define void @test_fmin_legacy_olt_f64(double addrspace(1)* %out, double addrspac
; FUNC-LABEL: @test_fmin_legacy_ult_f64
define void @test_fmin_legacy_ult_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
- %a = load double addrspace(1)* %gep.0, align 8
- %b = load double addrspace(1)* %gep.1, align 8
+ %a = load double, double addrspace(1)* %gep.0, align 8
+ %b = load double, double addrspace(1)* %gep.1, align 8
%cmp = fcmp ult double %a, %b
%val = select i1 %cmp, double %a, double %b
diff --git a/test/CodeGen/R600/fmin_legacy.ll b/test/CodeGen/R600/fmin_legacy.ll
index 5014f6c55329..6a625c239d76 100644
--- a/test/CodeGen/R600/fmin_legacy.ll
+++ b/test/CodeGen/R600/fmin_legacy.ll
@@ -27,11 +27,11 @@ define void @test_fmin_legacy_f32(<4 x float> addrspace(1)* %out, <4 x float> in
; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
define void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%cmp = fcmp ule float %a, %b
%val = select i1 %cmp, float %a, float %b
@@ -46,11 +46,11 @@ define void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(
; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
define void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%cmp = fcmp ole float %a, %b
%val = select i1 %cmp, float %a, float %b
@@ -65,11 +65,11 @@ define void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(
; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
define void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%cmp = fcmp olt float %a, %b
%val = select i1 %cmp, float %a, float %b
@@ -84,11 +84,11 @@ define void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(
; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%cmp = fcmp ult float %a, %b
%val = select i1 %cmp, float %a, float %b
@@ -106,11 +106,11 @@ define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(
; SI: s_endpgm
define void @test_fmin_legacy_ole_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%cmp = fcmp ole float %a, %b
%val0 = select i1 %cmp, float %a, float %b
diff --git a/test/CodeGen/R600/fminnum.ll b/test/CodeGen/R600/fminnum.ll
index 6b93b830033b..4d7b52540d85 100644
--- a/test/CodeGen/R600/fminnum.ll
+++ b/test/CodeGen/R600/fminnum.ll
@@ -1,5 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare float @llvm.minnum.f32(float, float) #0
declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #0
@@ -9,6 +10,9 @@ declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #0
; FUNC-LABEL: @test_fmin_f32
; SI: v_min_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
define void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
%val = call float @llvm.minnum.f32(float %a, float %b) #0
store float %val, float addrspace(1)* %out, align 4
@@ -18,6 +22,10 @@ define void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) nounwin
; FUNC-LABEL: @test_fmin_v2f32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
define void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
%val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b) #0
store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
@@ -29,6 +37,12 @@ define void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
+; EG: MIN_DX10 {{.*}}[[OUT]]
define void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
%val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b) #0
store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
@@ -44,6 +58,17 @@ define void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].W
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].W
define void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
%val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b) #0
store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
@@ -67,6 +92,27 @@ define void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8
; SI: v_min_f32_e32
; SI: v_min_f32_e32
; SI: v_min_f32_e32
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT1:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT2:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT3:T[0-9]+]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT4:T[0-9]+]]
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT1]].W
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].W
+; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT3]].W
+; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].X
+; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].Y
+; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].Z
+; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].W
define void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
%val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b) #0
store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
@@ -77,6 +123,10 @@ define void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a,
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmin_f32(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 1.0, float 2.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -87,6 +137,11 @@ define void @constant_fold_fmin_f32(float addrspace(1)* %out) nounwind {
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
+; EG: 2143289344({{nan|1\.#QNAN0e\+00}})
define void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
store float %val, float addrspace(1)* %out, align 4
@@ -97,6 +152,10 @@ define void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) nounwind {
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 1.0, float 0x7FF8000000000000) #0
store float %val, float addrspace(1)* %out, align 4
@@ -107,6 +166,10 @@ define void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) nounwind {
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 1.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -117,6 +180,10 @@ define void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) nounwind {
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 0.0, float 0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -127,6 +194,10 @@ define void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) nounwind {
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float 0.0, float -0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -137,6 +208,10 @@ define void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) nounwind {
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float -0.0, float 0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -147,6 +222,10 @@ define void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) nounwind {
; SI-NOT: v_min_f32_e32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
; SI: buffer_store_dword [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG-NOT: MIN_DX10
+; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}}
define void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) nounwind {
%val = call float @llvm.minnum.f32(float -0.0, float -0.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -155,6 +234,9 @@ define void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) nounwind {
; FUNC-LABEL: @fmin_var_immediate_f32
; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
define void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.minnum.f32(float %a, float 2.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -163,6 +245,9 @@ define void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind
; FUNC-LABEL: @fmin_immediate_var_f32
; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
define void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.minnum.f32(float 2.0, float %a) #0
store float %val, float addrspace(1)* %out, align 4
@@ -172,6 +257,9 @@ define void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind
; FUNC-LABEL: @fmin_var_literal_f32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
; SI: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
define void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.minnum.f32(float %a, float 99.0) #0
store float %val, float addrspace(1)* %out, align 4
@@ -181,6 +269,9 @@ define void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
; FUNC-LABEL: @fmin_literal_var_f32
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
; SI: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+
+; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]]
+; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}}
define void @fmin_literal_var_f32(float addrspace(1)* %out, float %a) nounwind {
%val = call float @llvm.minnum.f32(float 99.0, float %a) #0
store float %val, float addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll
index 6c09aa242677..68ebc4dedfe0 100644
--- a/test/CodeGen/R600/fmul.ll
+++ b/test/CodeGen/R600/fmul.ll
@@ -42,9 +42,9 @@ entry:
; SI: v_mul_f32
; SI: v_mul_f32
define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
- %a = load <4 x float> addrspace(1) * %in
- %b = load <4 x float> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
+ %a = load <4 x float>, <4 x float> addrspace(1) * %in
+ %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
%result = fmul <4 x float> %a, %b
store <4 x float> %result, <4 x float> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fmul64.ll b/test/CodeGen/R600/fmul64.ll
index 9d7787ccbe1f..3c222eaba89d 100644
--- a/test/CodeGen/R600/fmul64.ll
+++ b/test/CodeGen/R600/fmul64.ll
@@ -5,8 +5,8 @@
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
%r2 = fmul double %r0, %r1
store double %r2, double addrspace(1)* %out
ret void
@@ -17,8 +17,8 @@ define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
<2 x double> addrspace(1)* %in2) {
- %r0 = load <2 x double> addrspace(1)* %in1
- %r1 = load <2 x double> addrspace(1)* %in2
+ %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1
+ %r1 = load <2 x double>, <2 x double> addrspace(1)* %in2
%r2 = fmul <2 x double> %r0, %r1
store <2 x double> %r2, <2 x double> addrspace(1)* %out
ret void
@@ -31,8 +31,8 @@ define void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(
; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fmul_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
<4 x double> addrspace(1)* %in2) {
- %r0 = load <4 x double> addrspace(1)* %in1
- %r1 = load <4 x double> addrspace(1)* %in2
+ %r0 = load <4 x double>, <4 x double> addrspace(1)* %in1
+ %r1 = load <4 x double>, <4 x double> addrspace(1)* %in2
%r2 = fmul <4 x double> %r0, %r1
store <4 x double> %r2, <4 x double> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fmuladd.ll b/test/CodeGen/R600/fmuladd.ll
index 2b708639b122..ae84d841021d 100644
--- a/test/CodeGen/R600/fmuladd.ll
+++ b/test/CodeGen/R600/fmuladd.ll
@@ -10,9 +10,9 @@ declare float @llvm.fabs.f32(float) nounwind readnone
define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2, float addrspace(1)* %in3) {
- %r0 = load float addrspace(1)* %in1
- %r1 = load float addrspace(1)* %in2
- %r2 = load float addrspace(1)* %in3
+ %r0 = load float, float addrspace(1)* %in1
+ %r1 = load float, float addrspace(1)* %in2
+ %r2 = load float, float addrspace(1)* %in3
%r3 = tail call float @llvm.fmuladd.f32(float %r0, float %r1, float %r2)
store float %r3, float addrspace(1)* %out
ret void
@@ -23,9 +23,9 @@ define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2, double addrspace(1)* %in3) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
- %r2 = load double addrspace(1)* %in3
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
+ %r2 = load double, double addrspace(1)* %in3
%r3 = tail call double @llvm.fmuladd.f64(double %r0, double %r1, double %r2)
store double %r3, double addrspace(1)* %out
ret void
@@ -38,12 +38,12 @@ define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r1 = load float addrspace(1)* %gep.0
- %r2 = load float addrspace(1)* %gep.1
+ %r1 = load float, float addrspace(1)* %gep.0
+ %r2 = load float, float addrspace(1)* %gep.1
%r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2)
store float %r3, float addrspace(1)* %gep.out
@@ -57,12 +57,12 @@ define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %
; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r1 = load float addrspace(1)* %gep.0
- %r2 = load float addrspace(1)* %gep.1
+ %r1 = load float, float addrspace(1)* %gep.0
+ %r2 = load float, float addrspace(1)* %gep.1
%r3 = tail call float @llvm.fmuladd.f32(float %r1, float 2.0, float %r2)
store float %r3, float addrspace(1)* %gep.out
@@ -78,12 +78,12 @@ define void @fadd_a_a_b_f32(float addrspace(1)* %out,
float addrspace(1)* %in1,
float addrspace(1)* %in2) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r0 = load float addrspace(1)* %gep.0
- %r1 = load float addrspace(1)* %gep.1
+ %r0 = load float, float addrspace(1)* %gep.0
+ %r1 = load float, float addrspace(1)* %gep.1
%add.0 = fadd float %r0, %r0
%add.1 = fadd float %add.0, %r1
@@ -100,12 +100,12 @@ define void @fadd_b_a_a_f32(float addrspace(1)* %out,
float addrspace(1)* %in1,
float addrspace(1)* %in2) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r0 = load float addrspace(1)* %gep.0
- %r1 = load float addrspace(1)* %gep.1
+ %r0 = load float, float addrspace(1)* %gep.0
+ %r1 = load float, float addrspace(1)* %gep.1
%add.0 = fadd float %r0, %r0
%add.1 = fadd float %r1, %add.0
@@ -120,12 +120,12 @@ define void @fadd_b_a_a_f32(float addrspace(1)* %out,
; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r1 = load float addrspace(1)* %gep.0
- %r2 = load float addrspace(1)* %gep.1
+ %r1 = load float, float addrspace(1)* %gep.0
+ %r2 = load float, float addrspace(1)* %gep.1
%r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1, float %r2)
store float %r3, float addrspace(1)* %gep.out
@@ -140,12 +140,12 @@ define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1
; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r1 = load float addrspace(1)* %gep.0
- %r2 = load float addrspace(1)* %gep.1
+ %r1 = load float, float addrspace(1)* %gep.0
+ %r2 = load float, float addrspace(1)* %gep.1
%r1.fneg = fsub float -0.000000e+00, %r1
@@ -162,12 +162,12 @@ define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspa
; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r1 = load float addrspace(1)* %gep.0
- %r2 = load float addrspace(1)* %gep.1
+ %r1 = load float, float addrspace(1)* %gep.0
+ %r2 = load float, float addrspace(1)* %gep.1
%r1.fneg = fsub float -0.000000e+00, %r1
@@ -184,12 +184,12 @@ define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1
; CHECK: buffer_store_dword [[RESULT]]
define void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r1 = load float addrspace(1)* %gep.0
- %r2 = load float addrspace(1)* %gep.1
+ %r1 = load float, float addrspace(1)* %gep.0
+ %r2 = load float, float addrspace(1)* %gep.1
%r2.fneg = fsub float -0.000000e+00, %r2
diff --git a/test/CodeGen/R600/fneg-fabs.f64.ll b/test/CodeGen/R600/fneg-fabs.f64.ll
index 7430e7ffb33d..8830e8273661 100644
--- a/test/CodeGen/R600/fneg-fabs.f64.ll
+++ b/test/CodeGen/R600/fneg-fabs.f64.ll
@@ -5,9 +5,7 @@
; into 2 modifiers, although theoretically that should work.
; FUNC-LABEL: {{^}}fneg_fabs_fadd_f64:
-; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x7fffffff
-; SI: v_and_b32_e32 v[[FABS:[0-9]+]], {{s[0-9]+}}, [[IMMREG]]
-; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+}}:[[FABS]]{{\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}|
define void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y) {
%fabs = call double @llvm.fabs.f64(double %x)
%fsub = fsub double -0.000000e+00, %fabs
@@ -17,8 +15,8 @@ define void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y)
}
define void @v_fneg_fabs_fadd_f64(double addrspace(1)* %out, double addrspace(1)* %xptr, double addrspace(1)* %yptr) {
- %x = load double addrspace(1)* %xptr, align 8
- %y = load double addrspace(1)* %xptr, align 8
+ %x = load double, double addrspace(1)* %xptr, align 8
+ %y = load double, double addrspace(1)* %xptr, align 8
%fabs = call double @llvm.fabs.f64(double %x)
%fsub = fsub double -0.000000e+00, %fabs
%fadd = fadd double %y, %fsub
@@ -57,8 +55,8 @@ define void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
}
; FUNC-LABEL: {{^}}fneg_fabs_f64:
-; SI: s_load_dwordx2
; SI: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}
+; SI: s_load_dwordx2
; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
; SI-DAG: v_or_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]], [[IMMREG]]
; SI-DAG: v_mov_b32_e32 v[[LO_V:[0-9]+]], s[[LO_X]]
diff --git a/test/CodeGen/R600/fneg-fabs.ll b/test/CodeGen/R600/fneg-fabs.ll
index 4fde0484567c..3b4930d9897d 100644
--- a/test/CodeGen/R600/fneg-fabs.ll
+++ b/test/CodeGen/R600/fneg-fabs.ll
@@ -72,7 +72,7 @@ define void @fneg_fabs_f32(float addrspace(1)* %out, float %in) {
; FUNC-LABEL: {{^}}v_fneg_fabs_f32:
; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
- %val = load float addrspace(1)* %in, align 4
+ %val = load float, float addrspace(1)* %in, align 4
%fabs = call float @llvm.fabs.f32(float %val)
%fsub = fsub float -0.000000e+00, %fabs
store float %fsub, float addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/fneg.f64.ll b/test/CodeGen/R600/fneg.f64.ll
index eb2eb08b88b1..aa6df209035b 100644
--- a/test/CodeGen/R600/fneg.f64.ll
+++ b/test/CodeGen/R600/fneg.f64.ll
@@ -1,7 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}fneg_f64:
-; SI: v_xor_b32
+; GCN: v_xor_b32
define void @fneg_f64(double addrspace(1)* %out, double %in) {
%fneg = fsub double -0.000000e+00, %in
store double %fneg, double addrspace(1)* %out
@@ -9,8 +10,8 @@ define void @fneg_f64(double addrspace(1)* %out, double %in) {
}
; FUNC-LABEL: {{^}}fneg_v2f64:
-; SI: v_xor_b32
-; SI: v_xor_b32
+; GCN: v_xor_b32
+; GCN: v_xor_b32
define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> %in) {
%fneg = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %in
store <2 x double> %fneg, <2 x double> addrspace(1)* %out
@@ -23,10 +24,10 @@ define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double>
; R600: -PV
; R600: -PV
-; SI: v_xor_b32
-; SI: v_xor_b32
-; SI: v_xor_b32
-; SI: v_xor_b32
+; GCN: v_xor_b32
+; GCN: v_xor_b32
+; GCN: v_xor_b32
+; GCN: v_xor_b32
define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> %in) {
%fneg = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %in
store <4 x double> %fneg, <4 x double> addrspace(1)* %out
@@ -38,8 +39,7 @@ define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double>
; unless the target returns true for isNegFree()
; FUNC-LABEL: {{^}}fneg_free_f64:
-; FIXME: Unnecessary copy to VGPRs
-; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}}
+; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, 0, -{{s\[[0-9]+:[0-9]+\]$}}
define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
%bc = bitcast i64 %in to double
%fsub = fsub double 0.0, %bc
@@ -47,10 +47,11 @@ define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
ret void
}
-; SI-LABEL: {{^}}fneg_fold_f64:
+; GCN-LABEL: {{^}}fneg_fold_f64:
; SI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-NOT: xor
-; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
+; VI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
+; GCN-NOT: xor
+; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
define void @fneg_fold_f64(double addrspace(1)* %out, double %in) {
%fsub = fsub double -0.0, %in
%fmul = fmul double %fsub, %in
diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll
index ca3350dd7f48..a0fd539863c6 100644
--- a/test/CodeGen/R600/fneg.ll
+++ b/test/CodeGen/R600/fneg.ll
@@ -1,10 +1,11 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}fneg_f32:
; R600: -PV
-; SI: v_xor_b32
+; GCN: v_xor_b32
define void @fneg_f32(float addrspace(1)* %out, float %in) {
%fneg = fsub float -0.000000e+00, %in
store float %fneg, float addrspace(1)* %out
@@ -15,8 +16,8 @@ define void @fneg_f32(float addrspace(1)* %out, float %in) {
; R600: -PV
; R600: -PV
-; SI: v_xor_b32
-; SI: v_xor_b32
+; GCN: v_xor_b32
+; GCN: v_xor_b32
define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
%fneg = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in
store <2 x float> %fneg, <2 x float> addrspace(1)* %out
@@ -29,10 +30,10 @@ define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %i
; R600: -PV
; R600: -PV
-; SI: v_xor_b32
-; SI: v_xor_b32
-; SI: v_xor_b32
-; SI: v_xor_b32
+; GCN: v_xor_b32
+; GCN: v_xor_b32
+; GCN: v_xor_b32
+; GCN: v_xor_b32
define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
%fneg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
store <4 x float> %fneg, <4 x float> addrspace(1)* %out
@@ -48,7 +49,7 @@ define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %i
; R600: -KC0[2].Z
; XXX: We could use v_add_f32_e64 with the negate bit here instead.
-; SI: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}}
+; GCN: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}}
define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
%bc = bitcast i32 %in to float
%fsub = fsub float 0.0, %bc
@@ -58,8 +59,9 @@ define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
; FUNC-LABEL: {{^}}fneg_fold_f32:
; SI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
-; SI-NOT: xor
-; SI: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
+; VI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
+; GCN-NOT: xor
+; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
define void @fneg_fold_f32(float addrspace(1)* %out, float %in) {
%fsub = fsub float -0.0, %in
%fmul = fmul float %fsub, %in
diff --git a/test/CodeGen/R600/fp-classify.ll b/test/CodeGen/R600/fp-classify.ll
index c1de85203104..4fac5176fac9 100644
--- a/test/CodeGen/R600/fp-classify.ll
+++ b/test/CodeGen/R600/fp-classify.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -march=r600 -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare i1 @llvm.AMDGPU.class.f32(float, i32) #1
declare i1 @llvm.AMDGPU.class.f64(double, i32) #1
diff --git a/test/CodeGen/R600/fp16_to_fp.ll b/test/CodeGen/R600/fp16_to_fp.ll
index da78f6155c85..5a79ca82bc29 100644
--- a/test/CodeGen/R600/fp16_to_fp.ll
+++ b/test/CodeGen/R600/fp16_to_fp.ll
@@ -9,7 +9,7 @@ declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
; SI: v_cvt_f32_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
; SI: buffer_store_dword [[RESULT]]
define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
- %val = load i16 addrspace(1)* %in, align 2
+ %val = load i16, i16 addrspace(1)* %in, align 2
%cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
store float %cvt, float addrspace(1)* %out, align 4
ret void
@@ -22,7 +22,7 @@ define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 add
; SI: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]]
; SI: buffer_store_dwordx2 [[RESULT]]
define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
- %val = load i16 addrspace(1)* %in, align 2
+ %val = load i16, i16 addrspace(1)* %in, align 2
%cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone
store double %cvt, double addrspace(1)* %out, align 4
ret void
diff --git a/test/CodeGen/R600/fp32_to_fp16.ll b/test/CodeGen/R600/fp32_to_fp16.ll
index c3c65aece082..67925ebd82b6 100644
--- a/test/CodeGen/R600/fp32_to_fp16.ll
+++ b/test/CodeGen/R600/fp32_to_fp16.ll
@@ -8,7 +8,7 @@ declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[VAL]]
; SI: buffer_store_short [[RESULT]]
define void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
- %val = load float addrspace(1)* %in, align 4
+ %val = load float, float addrspace(1)* %in, align 4
%cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone
store i16 %cvt, i16 addrspace(1)* %out, align 2
ret void
diff --git a/test/CodeGen/R600/fp_to_sint.f64.ll b/test/CodeGen/R600/fp_to_sint.f64.ll
index e6418477a9b4..12df6606e8ff 100644
--- a/test/CodeGen/R600/fp_to_sint.f64.ll
+++ b/test/CodeGen/R600/fp_to_sint.f64.ll
@@ -48,8 +48,8 @@ define void @fp_to_sint_v4f64_v4i32(<4 x i32> addrspace(1)* %out, <4 x double> %
; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr double addrspace(1)* %in, i32 %tid
- %val = load double addrspace(1)* %gep, align 8
+ %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %val = load double, double addrspace(1)* %gep, align 8
%cast = fptosi double %val to i64
store i64 %cast, i64 addrspace(1)* %out, align 8
ret void
diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll
index 16549c392b00..301a94b4904c 100644
--- a/test/CodeGen/R600/fp_to_sint.ll
+++ b/test/CodeGen/R600/fp_to_sint.ll
@@ -44,7 +44,7 @@ define void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
; SI: v_cvt_i32_f32_e32
; SI: v_cvt_i32_f32_e32
define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
- %value = load <4 x float> addrspace(1) * %in
+ %value = load <4 x float>, <4 x float> addrspace(1) * %in
%result = fptosi <4 x float> %value to <4 x i32>
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fp_to_uint.f64.ll b/test/CodeGen/R600/fp_to_uint.f64.ll
index 1ffe2faadf33..41bc2a780014 100644
--- a/test/CodeGen/R600/fp_to_uint.f64.ll
+++ b/test/CodeGen/R600/fp_to_uint.f64.ll
@@ -48,8 +48,8 @@ define void @fp_to_uint_v4i32_v4f64(<4 x i32> addrspace(1)* %out, <4 x double> %
; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @fp_to_uint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr double addrspace(1)* %in, i32 %tid
- %val = load double addrspace(1)* %gep, align 8
+ %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %val = load double, double addrspace(1)* %gep, align 8
%cast = fptoui double %val to i64
store i64 %cast, i64 addrspace(1)* %out, align 4
ret void
diff --git a/test/CodeGen/R600/fp_to_uint.ll b/test/CodeGen/R600/fp_to_uint.ll
index 804d90f476da..b7b6ccc238b3 100644
--- a/test/CodeGen/R600/fp_to_uint.ll
+++ b/test/CodeGen/R600/fp_to_uint.ll
@@ -36,7 +36,7 @@ define void @fp_to_uint_v2f32_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x float>
; SI: v_cvt_u32_f32_e32
define void @fp_to_uint_v4f32_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
- %value = load <4 x float> addrspace(1) * %in
+ %value = load <4 x float>, <4 x float> addrspace(1) * %in
%result = fptoui <4 x float> %value to <4 x i32>
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fpext.ll b/test/CodeGen/R600/fpext.ll
index 21c7bfd48df8..734a43be2296 100644
--- a/test/CodeGen/R600/fpext.ll
+++ b/test/CodeGen/R600/fpext.ll
@@ -1,10 +1,45 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; CHECK: {{^}}fpext:
-; CHECK: v_cvt_f64_f32_e32
-define void @fpext(double addrspace(1)* %out, float %in) {
+; FUNC-LABEL: {{^}}fpext_f32_to_f64:
+; SI: v_cvt_f64_f32_e32 {{v\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}
+define void @fpext_f32_to_f64(double addrspace(1)* %out, float %in) {
%result = fpext float %in to double
store double %result, double addrspace(1)* %out
ret void
}
+
+; FUNC-LABEL: {{^}}fpext_v2f32_to_v2f64:
+; SI: v_cvt_f64_f32_e32
+; SI: v_cvt_f64_f32_e32
+define void @fpext_v2f32_to_v2f64(<2 x double> addrspace(1)* %out, <2 x float> %in) {
+ %result = fpext <2 x float> %in to <2 x double>
+ store <2 x double> %result, <2 x double> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fpext_v4f32_to_v4f64:
+; SI: v_cvt_f64_f32_e32
+; SI: v_cvt_f64_f32_e32
+; SI: v_cvt_f64_f32_e32
+; SI: v_cvt_f64_f32_e32
+define void @fpext_v4f32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x float> %in) {
+ %result = fpext <4 x float> %in to <4 x double>
+ store <4 x double> %result, <4 x double> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fpext_v8f32_to_v8f64:
+; SI: v_cvt_f64_f32_e32
+; SI: v_cvt_f64_f32_e32
+; SI: v_cvt_f64_f32_e32
+; SI: v_cvt_f64_f32_e32
+; SI: v_cvt_f64_f32_e32
+; SI: v_cvt_f64_f32_e32
+; SI: v_cvt_f64_f32_e32
+; SI: v_cvt_f64_f32_e32
+define void @fpext_v8f32_to_v8f64(<8 x double> addrspace(1)* %out, <8 x float> %in) {
+ %result = fpext <8 x float> %in to <8 x double>
+ store <8 x double> %result, <8 x double> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/fptrunc.ll b/test/CodeGen/R600/fptrunc.ll
index 94fcdab9c52f..385e10e7baae 100644
--- a/test/CodeGen/R600/fptrunc.ll
+++ b/test/CodeGen/R600/fptrunc.ll
@@ -1,10 +1,45 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; CHECK: {{^}}fptrunc:
-; CHECK: v_cvt_f32_f64_e32
-define void @fptrunc(float addrspace(1)* %out, double %in) {
+; FUNC-LABEL: {{^}}fptrunc_f64_to_f32:
+; SI: v_cvt_f32_f64_e32 {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @fptrunc_f64_to_f32(float addrspace(1)* %out, double %in) {
%result = fptrunc double %in to float
store float %result, float addrspace(1)* %out
ret void
}
+
+; FUNC-LABEL: {{^}}fptrunc_v2f64_to_v2f32:
+; SI: v_cvt_f32_f64_e32
+; SI: v_cvt_f32_f64_e32
+define void @fptrunc_v2f64_to_v2f32(<2 x float> addrspace(1)* %out, <2 x double> %in) {
+ %result = fptrunc <2 x double> %in to <2 x float>
+ store <2 x float> %result, <2 x float> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fptrunc_v4f64_to_v4f32:
+; SI: v_cvt_f32_f64_e32
+; SI: v_cvt_f32_f64_e32
+; SI: v_cvt_f32_f64_e32
+; SI: v_cvt_f32_f64_e32
+define void @fptrunc_v4f64_to_v4f32(<4 x float> addrspace(1)* %out, <4 x double> %in) {
+ %result = fptrunc <4 x double> %in to <4 x float>
+ store <4 x float> %result, <4 x float> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fptrunc_v8f64_to_v8f32:
+; SI: v_cvt_f32_f64_e32
+; SI: v_cvt_f32_f64_e32
+; SI: v_cvt_f32_f64_e32
+; SI: v_cvt_f32_f64_e32
+; SI: v_cvt_f32_f64_e32
+; SI: v_cvt_f32_f64_e32
+; SI: v_cvt_f32_f64_e32
+; SI: v_cvt_f32_f64_e32
+define void @fptrunc_v8f64_to_v8f32(<8 x float> addrspace(1)* %out, <8 x double> %in) {
+ %result = fptrunc <8 x double> %in to <8 x float>
+ store <8 x float> %result, <8 x float> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/frem.ll b/test/CodeGen/R600/frem.ll
index 564634178656..f245ef08cb9d 100644
--- a/test/CodeGen/R600/frem.ll
+++ b/test/CodeGen/R600/frem.ll
@@ -1,73 +1,78 @@
-; RUN: llc -march=amdgcn -mcpu=SI -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -enable-misched < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -enable-misched < %s | FileCheck -check-prefix=CI -check-prefix=GCN -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}frem_f32:
-; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}
-; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
-; SI-DAG: v_cmp
-; SI-DAG: v_mul_f32
-; SI: v_rcp_f32_e32
-; SI: v_mul_f32_e32
-; SI: v_mul_f32_e32
-; SI: v_trunc_f32_e32
-; SI: v_mad_f32
-; SI: s_endpgm
+; GCN-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}
+; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
+; GCN-DAG: v_cmp
+; GCN-DAG: v_mul_f32
+; GCN: v_rcp_f32_e32
+; GCN: v_mul_f32_e32
+; GCN: v_mul_f32_e32
+; GCN: v_trunc_f32_e32
+; GCN: v_mad_f32
+; GCN: s_endpgm
define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2) #0 {
- %gep2 = getelementptr float addrspace(1)* %in2, i32 4
- %r0 = load float addrspace(1)* %in1, align 4
- %r1 = load float addrspace(1)* %gep2, align 4
+ %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4
+ %r0 = load float, float addrspace(1)* %in1, align 4
+ %r1 = load float, float addrspace(1)* %gep2, align 4
%r2 = frem float %r0, %r1
store float %r2, float addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: {{^}}unsafe_frem_f32:
-; SI: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
-; SI: buffer_load_dword [[X:v[0-9]+]], {{.*}}
-; SI: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]]
-; SI: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
-; SI: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
-; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]]
-; SI: buffer_store_dword [[RESULT]]
-; SI: s_endpgm
+; GCN: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
+; GCN: buffer_load_dword [[X:v[0-9]+]], {{.*}}
+; GCN: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]]
+; GCN: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
+; GCN: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
+; GCN: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]]
+; GCN: buffer_store_dword [[RESULT]]
+; GCN: s_endpgm
define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2) #1 {
- %gep2 = getelementptr float addrspace(1)* %in2, i32 4
- %r0 = load float addrspace(1)* %in1, align 4
- %r1 = load float addrspace(1)* %gep2, align 4
+ %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4
+ %r0 = load float, float addrspace(1)* %in1, align 4
+ %r1 = load float, float addrspace(1)* %gep2, align 4
%r2 = frem float %r0, %r1
store float %r2, float addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: {{^}}frem_f64:
-; SI: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0
-; SI: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0
-; SI-DAG: v_div_fmas_f64
-; SI-DAG: v_div_scale_f64
-; SI-DAG: v_mul_f64
-; SI: v_add_f64
-; SI: buffer_store_dwordx2
-; SI: s_endpgm
+; GCN: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0
+; GCN: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], {{.*}}, 0
+; GCN-DAG: v_div_fmas_f64
+; GCN-DAG: v_div_scale_f64
+; GCN-DAG: v_mul_f64
+; CI: v_trunc_f64_e32
+; CI: v_mul_f64
+; GCN: v_add_f64
+; GCN: buffer_store_dwordx2
+; GCN: s_endpgm
define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) #0 {
- %r0 = load double addrspace(1)* %in1, align 8
- %r1 = load double addrspace(1)* %in2, align 8
+ %r0 = load double, double addrspace(1)* %in1, align 8
+ %r1 = load double, double addrspace(1)* %in2, align 8
%r2 = frem double %r0, %r1
store double %r2, double addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: {{^}}unsafe_frem_f64:
-; SI: v_rcp_f64_e32
-; SI: v_mul_f64
+; GCN: v_rcp_f64_e32
+; GCN: v_mul_f64
; SI: v_bfe_u32
-; SI: v_fma_f64
-; SI: s_endpgm
+; CI: v_trunc_f64_e32
+; GCN: v_fma_f64
+; GCN: s_endpgm
define void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) #1 {
- %r0 = load double addrspace(1)* %in1, align 8
- %r1 = load double addrspace(1)* %in2, align 8
+ %r0 = load double, double addrspace(1)* %in1, align 8
+ %r1 = load double, double addrspace(1)* %in2, align 8
%r2 = frem double %r0, %r1
store double %r2, double addrspace(1)* %out, align 8
ret void
@@ -75,9 +80,9 @@ define void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in
define void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1,
<2 x float> addrspace(1)* %in2) #0 {
- %gep2 = getelementptr <2 x float> addrspace(1)* %in2, i32 4
- %r0 = load <2 x float> addrspace(1)* %in1, align 8
- %r1 = load <2 x float> addrspace(1)* %gep2, align 8
+ %gep2 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in2, i32 4
+ %r0 = load <2 x float>, <2 x float> addrspace(1)* %in1, align 8
+ %r1 = load <2 x float>, <2 x float> addrspace(1)* %gep2, align 8
%r2 = frem <2 x float> %r0, %r1
store <2 x float> %r2, <2 x float> addrspace(1)* %out, align 8
ret void
@@ -85,9 +90,9 @@ define void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)
define void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1,
<4 x float> addrspace(1)* %in2) #0 {
- %gep2 = getelementptr <4 x float> addrspace(1)* %in2, i32 4
- %r0 = load <4 x float> addrspace(1)* %in1, align 16
- %r1 = load <4 x float> addrspace(1)* %gep2, align 16
+ %gep2 = getelementptr <4 x float>, <4 x float> addrspace(1)* %in2, i32 4
+ %r0 = load <4 x float>, <4 x float> addrspace(1)* %in1, align 16
+ %r1 = load <4 x float>, <4 x float> addrspace(1)* %gep2, align 16
%r2 = frem <4 x float> %r0, %r1
store <4 x float> %r2, <4 x float> addrspace(1)* %out, align 16
ret void
@@ -95,9 +100,9 @@ define void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)
define void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
<2 x double> addrspace(1)* %in2) #0 {
- %gep2 = getelementptr <2 x double> addrspace(1)* %in2, i32 4
- %r0 = load <2 x double> addrspace(1)* %in1, align 16
- %r1 = load <2 x double> addrspace(1)* %gep2, align 16
+ %gep2 = getelementptr <2 x double>, <2 x double> addrspace(1)* %in2, i32 4
+ %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1, align 16
+ %r1 = load <2 x double>, <2 x double> addrspace(1)* %gep2, align 16
%r2 = frem <2 x double> %r0, %r1
store <2 x double> %r2, <2 x double> addrspace(1)* %out, align 16
ret void
diff --git a/test/CodeGen/R600/fsqrt.ll b/test/CodeGen/R600/fsqrt.ll
index 1fdf3e453bf3..04101346cdf9 100644
--- a/test/CodeGen/R600/fsqrt.ll
+++ b/test/CodeGen/R600/fsqrt.ll
@@ -9,7 +9,7 @@
; CHECK: v_sqrt_f32_e32 {{v[0-9]+, v[0-9]+}}
define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
- %r0 = load float addrspace(1)* %in
+ %r0 = load float, float addrspace(1)* %in
%r1 = call float @llvm.sqrt.f32(float %r0)
store float %r1, float addrspace(1)* %out
ret void
@@ -19,7 +19,7 @@ define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
; CHECK: v_sqrt_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
define void @fsqrt_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
- %r0 = load double addrspace(1)* %in
+ %r0 = load double, double addrspace(1)* %in
%r1 = call double @llvm.sqrt.f64(double %r0)
store double %r1, double addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll
index ef90fea67900..dfe41cb5b111 100644
--- a/test/CodeGen/R600/fsub.ll
+++ b/test/CodeGen/R600/fsub.ll
@@ -6,9 +6,9 @@
; FUNC-LABEL: {{^}}v_fsub_f32:
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @v_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
- %b_ptr = getelementptr float addrspace(1)* %in, i32 1
- %a = load float addrspace(1)* %in, align 4
- %b = load float addrspace(1)* %b_ptr, align 4
+ %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
+ %a = load float, float addrspace(1)* %in, align 4
+ %b = load float, float addrspace(1)* %b_ptr, align 4
%result = fsub float %a, %b
store float %result, float addrspace(1)* %out, align 4
ret void
@@ -52,9 +52,9 @@ define void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x flo
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
- %a = load <4 x float> addrspace(1)* %in, align 16
- %b = load <4 x float> addrspace(1)* %b_ptr, align 16
+ %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
+ %a = load <4 x float>, <4 x float> addrspace(1)* %in, align 16
+ %b = load <4 x float>, <4 x float> addrspace(1)* %b_ptr, align 16
%result = fsub <4 x float> %a, %b
store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
ret void
diff --git a/test/CodeGen/R600/fsub64.ll b/test/CodeGen/R600/fsub64.ll
index 62f46142fe0d..f34a48e30a86 100644
--- a/test/CodeGen/R600/fsub64.ll
+++ b/test/CodeGen/R600/fsub64.ll
@@ -1,13 +1,107 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+declare double @llvm.fabs.f64(double) #0
+
; SI-LABEL: {{^}}fsub_f64:
; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
double addrspace(1)* %in2) {
- %r0 = load double addrspace(1)* %in1
- %r1 = load double addrspace(1)* %in2
- %r2 = fsub double %r0, %r1
- store double %r2, double addrspace(1)* %out
- ret void
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
+ %r2 = fsub double %r0, %r1
+ store double %r2, double addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}fsub_fabs_f64:
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|}}
+define void @fsub_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+ double addrspace(1)* %in2) {
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
+ %r1.fabs = call double @llvm.fabs.f64(double %r1) #0
+ %r2 = fsub double %r0, %r1.fabs
+ store double %r2, double addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}fsub_fabs_inv_f64:
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], |v\[[0-9]+:[0-9]+\]|, -v\[[0-9]+:[0-9]+\]}}
+define void @fsub_fabs_inv_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+ double addrspace(1)* %in2) {
+ %r0 = load double, double addrspace(1)* %in1
+ %r1 = load double, double addrspace(1)* %in2
+ %r0.fabs = call double @llvm.fabs.f64(double %r0) #0
+ %r2 = fsub double %r0.fabs, %r1
+ store double %r2, double addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_fsub_f64:
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+define void @s_fsub_f64(double addrspace(1)* %out, double %a, double %b) {
+ %sub = fsub double %a, %b
+ store double %sub, double addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_fsub_imm_f64:
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], 4.0, -s\[[0-9]+:[0-9]+\]}}
+define void @s_fsub_imm_f64(double addrspace(1)* %out, double %a, double %b) {
+ %sub = fsub double 4.0, %a
+ store double %sub, double addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_fsub_imm_inv_f64:
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], -4.0, s\[[0-9]+:[0-9]+\]}}
+define void @s_fsub_imm_inv_f64(double addrspace(1)* %out, double %a, double %b) {
+ %sub = fsub double %a, 4.0
+ store double %sub, double addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}s_fsub_self_f64:
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -s\[[0-9]+:[0-9]+\]}}
+define void @s_fsub_self_f64(double addrspace(1)* %out, double %a) {
+ %sub = fsub double %a, %a
+ store double %sub, double addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}fsub_v2f64:
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+define void @fsub_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) {
+ %sub = fsub <2 x double> %a, %b
+ store <2 x double> %sub, <2 x double> addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}fsub_v4f64:
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+define void @fsub_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) {
+ %b_ptr = getelementptr <4 x double>, <4 x double> addrspace(1)* %in, i32 1
+ %a = load <4 x double>, <4 x double> addrspace(1)* %in
+ %b = load <4 x double>, <4 x double> addrspace(1)* %b_ptr
+ %result = fsub <4 x double> %a, %b
+ store <4 x double> %result, <4 x double> addrspace(1)* %out
+ ret void
}
+
+; SI-LABEL: {{^}}s_fsub_v4f64:
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+define void @s_fsub_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) {
+ %result = fsub <4 x double> %a, %b
+ store <4 x double> %result, <4 x double> addrspace(1)* %out, align 16
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/R600/ftrunc.f64.ll b/test/CodeGen/R600/ftrunc.f64.ll
index 2c7217ef0561..6618d8b5e57e 100644
--- a/test/CodeGen/R600/ftrunc.f64.ll
+++ b/test/CodeGen/R600/ftrunc.f64.ll
@@ -1,5 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
declare double @llvm.trunc.f64(double) nounwind readnone
declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone
@@ -13,7 +14,7 @@ declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone
; SI: v_bfe_u32 {{v[0-9]+}}, {{v[0-9]+}}, 20, 11
; SI: s_endpgm
define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
- %x = load double addrspace(1)* %in, align 8
+ %x = load double, double addrspace(1)* %in, align 8
%y = call double @llvm.trunc.f64(double %x) nounwind readnone
store double %y, double addrspace(1)* %out, align 8
ret void
@@ -23,15 +24,15 @@ define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
; CI: v_trunc_f64_e32
; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
+; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
; SI: s_lshr_b64
; SI: s_not_b64
; SI: s_and_b64
-; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-; SI: cmp_lt_i32
+; SI: cmp_gt_i32
; SI: cndmask_b32
; SI: cndmask_b32
-; SI: cmp_gt_i32
+; SI: cmp_lt_i32
; SI: cndmask_b32
; SI: cndmask_b32
; SI: s_endpgm
diff --git a/test/CodeGen/R600/gep-address-space.ll b/test/CodeGen/R600/gep-address-space.ll
index 2d1892534dc5..471b0f6b13e7 100644
--- a/test/CodeGen/R600/gep-address-space.ll
+++ b/test/CodeGen/R600/gep-address-space.ll
@@ -1,11 +1,12 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=CHECK %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s
define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
; CHECK-LABEL: {{^}}use_gep_address_space:
; CHECK: v_mov_b32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}}
; CHECK: ds_write_b32 [[PTR]], v{{[0-9]+}} offset:64
- %p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16
+ %p = getelementptr [1024 x i32], [1024 x i32] addrspace(3)* %array, i16 0, i16 16
store i32 99, i32 addrspace(3)* %p
ret void
}
@@ -17,7 +18,7 @@ define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %arra
; SI: s_or_b32
; CI: s_add_i32
; CHECK: ds_write_b32
- %p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16384
+ %p = getelementptr [1024 x i32], [1024 x i32] addrspace(3)* %array, i16 0, i16 16384
store i32 99, i32 addrspace(3)* %p
ret void
}
@@ -28,7 +29,7 @@ define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind
; CHECK: s_add_i32
; CHECK: s_add_i32
; CHECK: s_add_i32
- %p = getelementptr <4 x [1024 x i32] addrspace(3)*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
+ %p = getelementptr [1024 x i32], <4 x [1024 x i32] addrspace(3)*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
%p0 = extractelement <4 x i32 addrspace(3)*> %p, i32 0
%p1 = extractelement <4 x i32 addrspace(3)*> %p, i32 1
%p2 = extractelement <4 x i32 addrspace(3)*> %p, i32 2
@@ -44,7 +45,7 @@ define void @gep_as_vector_v2(<2 x [1024 x i32] addrspace(3)*> %array) nounwind
; CHECK-LABEL: {{^}}gep_as_vector_v2:
; CHECK: s_add_i32
; CHECK: s_add_i32
- %p = getelementptr <2 x [1024 x i32] addrspace(3)*> %array, <2 x i16> zeroinitializer, <2 x i16> <i16 16, i16 16>
+ %p = getelementptr [1024 x i32], <2 x [1024 x i32] addrspace(3)*> %array, <2 x i16> zeroinitializer, <2 x i16> <i16 16, i16 16>
%p0 = extractelement <2 x i32 addrspace(3)*> %p, i32 0
%p1 = extractelement <2 x i32 addrspace(3)*> %p, i32 1
store i32 99, i32 addrspace(3)* %p0
diff --git a/test/CodeGen/R600/global-directive.ll b/test/CodeGen/R600/global-directive.ll
index 3ba12c206ad3..be775cf9292f 100644
--- a/test/CodeGen/R600/global-directive.ll
+++ b/test/CodeGen/R600/global-directive.ll
@@ -6,9 +6,9 @@
; SI: .globl foo
; SI: {{^}}foo:
define void @foo(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %a = load i32 addrspace(1)* %in
- %b = load i32 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1)* %in
+ %b = load i32, i32 addrspace(1)* %b_ptr
%result = add i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/global-extload-i1.ll b/test/CodeGen/R600/global-extload-i1.ll
index 5dc494900ce8..bd9557d730fb 100644
--- a/test/CodeGen/R600/global-extload-i1.ll
+++ b/test/CodeGen/R600/global-extload-i1.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FIXME: Evergreen broken
@@ -8,7 +8,7 @@
; SI: buffer_store_dword
; SI: s_endpgm
define void @zextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %a = load i1 addrspace(1)* %in
+ %a = load i1, i1 addrspace(1)* %in
%ext = zext i1 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@@ -20,7 +20,7 @@ define void @zextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)*
; SI: buffer_store_dword
; SI: s_endpgm
define void @sextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %a = load i1 addrspace(1)* %in
+ %a = load i1, i1 addrspace(1)* %in
%ext = sext i1 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@@ -29,7 +29,7 @@ define void @sextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)*
; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i32:
; SI: s_endpgm
define void @zextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i1> addrspace(1)* %in
+ %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
%ext = zext <1 x i1> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
@@ -38,7 +38,7 @@ define void @zextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1
; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i32:
; SI: s_endpgm
define void @sextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i1> addrspace(1)* %in
+ %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
%ext = sext <1 x i1> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
@@ -47,7 +47,7 @@ define void @sextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1
; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i32:
; SI: s_endpgm
define void @zextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i1> addrspace(1)* %in
+ %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
%ext = zext <2 x i1> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
@@ -56,7 +56,7 @@ define void @zextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1
; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i32:
; SI: s_endpgm
define void @sextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i1> addrspace(1)* %in
+ %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
%ext = sext <2 x i1> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
@@ -65,7 +65,7 @@ define void @sextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1
; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i32:
; SI: s_endpgm
define void @zextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i1> addrspace(1)* %in
+ %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
%ext = zext <4 x i1> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
@@ -74,7 +74,7 @@ define void @zextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1
; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i32:
; SI: s_endpgm
define void @sextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i1> addrspace(1)* %in
+ %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
%ext = sext <4 x i1> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
@@ -83,7 +83,7 @@ define void @sextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1
; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i32:
; SI: s_endpgm
define void @zextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i1> addrspace(1)* %in
+ %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
%ext = zext <8 x i1> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
@@ -92,7 +92,7 @@ define void @zextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1
; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i32:
; SI: s_endpgm
define void @sextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i1> addrspace(1)* %in
+ %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
%ext = sext <8 x i1> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
@@ -101,7 +101,7 @@ define void @sextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1
; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i32:
; SI: s_endpgm
define void @zextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i1> addrspace(1)* %in
+ %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
%ext = zext <16 x i1> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
@@ -110,7 +110,7 @@ define void @zextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i32:
; SI: s_endpgm
define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i1> addrspace(1)* %in
+ %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
%ext = sext <16 x i1> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
@@ -119,7 +119,7 @@ define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i32:
; XSI: s_endpgm
; define void @zextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i1> addrspace(1)* %in
+; %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
; %ext = zext <32 x i1> %load to <32 x i32>
; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
; ret void
@@ -128,7 +128,7 @@ define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i32:
; XSI: s_endpgm
; define void @sextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i1> addrspace(1)* %in
+; %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
; %ext = sext <32 x i1> %load to <32 x i32>
; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
; ret void
@@ -137,7 +137,7 @@ define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i32:
; XSI: s_endpgm
; define void @zextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i1> addrspace(1)* %in
+; %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
; %ext = zext <64 x i1> %load to <64 x i32>
; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
; ret void
@@ -146,7 +146,7 @@ define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i32:
; XSI: s_endpgm
; define void @sextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i1> addrspace(1)* %in
+; %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
; %ext = sext <64 x i1> %load to <64 x i32>
; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
; ret void
@@ -157,7 +157,7 @@ define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
; SI: buffer_store_dwordx2
define void @zextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %a = load i1 addrspace(1)* %in
+ %a = load i1, i1 addrspace(1)* %in
%ext = zext i1 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -169,7 +169,7 @@ define void @zextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)*
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
; SI: buffer_store_dwordx2
define void @sextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %a = load i1 addrspace(1)* %in
+ %a = load i1, i1 addrspace(1)* %in
%ext = sext i1 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -178,7 +178,7 @@ define void @sextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)*
; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i64:
; SI: s_endpgm
define void @zextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i1> addrspace(1)* %in
+ %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
%ext = zext <1 x i1> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -187,7 +187,7 @@ define void @zextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1
; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i64:
; SI: s_endpgm
define void @sextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i1> addrspace(1)* %in
+ %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
%ext = sext <1 x i1> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -196,7 +196,7 @@ define void @sextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1
; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i64:
; SI: s_endpgm
define void @zextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i1> addrspace(1)* %in
+ %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
%ext = zext <2 x i1> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -205,7 +205,7 @@ define void @zextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1
; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i64:
; SI: s_endpgm
define void @sextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i1> addrspace(1)* %in
+ %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
%ext = sext <2 x i1> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -214,7 +214,7 @@ define void @sextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1
; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i64:
; SI: s_endpgm
define void @zextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i1> addrspace(1)* %in
+ %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
%ext = zext <4 x i1> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -223,7 +223,7 @@ define void @zextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1
; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i64:
; SI: s_endpgm
define void @sextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i1> addrspace(1)* %in
+ %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
%ext = sext <4 x i1> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -232,7 +232,7 @@ define void @sextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1
; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i64:
; SI: s_endpgm
define void @zextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i1> addrspace(1)* %in
+ %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
%ext = zext <8 x i1> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -241,7 +241,7 @@ define void @zextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1
; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i64:
; SI: s_endpgm
define void @sextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i1> addrspace(1)* %in
+ %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
%ext = sext <8 x i1> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -250,7 +250,7 @@ define void @sextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1
; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i64:
; SI: s_endpgm
define void @zextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i1> addrspace(1)* %in
+ %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
%ext = zext <16 x i1> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -259,7 +259,7 @@ define void @zextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i64:
; SI: s_endpgm
define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i1> addrspace(1)* %in
+ %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
%ext = sext <16 x i1> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -268,7 +268,7 @@ define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i64:
; XSI: s_endpgm
; define void @zextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i1> addrspace(1)* %in
+; %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
; %ext = zext <32 x i1> %load to <32 x i64>
; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
; ret void
@@ -277,7 +277,7 @@ define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i64:
; XSI: s_endpgm
; define void @sextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i1> addrspace(1)* %in
+; %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
; %ext = sext <32 x i1> %load to <32 x i64>
; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
; ret void
@@ -286,7 +286,7 @@ define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i64:
; XSI: s_endpgm
; define void @zextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i1> addrspace(1)* %in
+; %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
; %ext = zext <64 x i1> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
; ret void
@@ -295,7 +295,7 @@ define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i64:
; XSI: s_endpgm
; define void @sextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i1> addrspace(1)* %in
+; %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
; %ext = sext <64 x i1> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
; ret void
diff --git a/test/CodeGen/R600/global-extload-i16.ll b/test/CodeGen/R600/global-extload-i16.ll
index a1740ec8236a..103a40dee270 100644
--- a/test/CodeGen/R600/global-extload-i16.ll
+++ b/test/CodeGen/R600/global-extload-i16.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FIXME: cypress is broken because the bigger testcases spill and it's not implemented
@@ -8,7 +8,7 @@
; SI: buffer_store_dword
; SI: s_endpgm
define void @zextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
- %a = load i16 addrspace(1)* %in
+ %a = load i16, i16 addrspace(1)* %in
%ext = zext i16 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@@ -19,7 +19,7 @@ define void @zextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)
; SI: buffer_store_dword
; SI: s_endpgm
define void @sextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
- %a = load i16 addrspace(1)* %in
+ %a = load i16, i16 addrspace(1)* %in
%ext = sext i16 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@@ -29,7 +29,7 @@ define void @sextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)
; SI: buffer_load_ushort
; SI: s_endpgm
define void @zextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i16> addrspace(1)* %in
+ %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
%ext = zext <1 x i16> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
@@ -39,7 +39,7 @@ define void @zextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i
; SI: buffer_load_sshort
; SI: s_endpgm
define void @sextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i16> addrspace(1)* %in
+ %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
%ext = sext <1 x i16> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
@@ -48,7 +48,7 @@ define void @sextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i
; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i32:
; SI: s_endpgm
define void @zextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i16> addrspace(1)* %in
+ %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
%ext = zext <2 x i16> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
@@ -57,7 +57,7 @@ define void @zextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i
; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i32:
; SI: s_endpgm
define void @sextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i16> addrspace(1)* %in
+ %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
%ext = sext <2 x i16> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
@@ -66,7 +66,7 @@ define void @sextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i
; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i32:
; SI: s_endpgm
define void @zextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i16> addrspace(1)* %in
+ %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
%ext = zext <4 x i16> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
@@ -75,7 +75,7 @@ define void @zextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i
; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i32:
; SI: s_endpgm
define void @sextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i16> addrspace(1)* %in
+ %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
%ext = sext <4 x i16> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
@@ -84,7 +84,7 @@ define void @sextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i
; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i32:
; SI: s_endpgm
define void @zextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i16> addrspace(1)* %in
+ %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
%ext = zext <8 x i16> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
@@ -93,7 +93,7 @@ define void @zextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i
; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i32:
; SI: s_endpgm
define void @sextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i16> addrspace(1)* %in
+ %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
%ext = sext <8 x i16> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
@@ -102,7 +102,7 @@ define void @sextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i
; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i32:
; SI: s_endpgm
define void @zextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i16> addrspace(1)* %in
+ %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
%ext = zext <16 x i16> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
@@ -111,7 +111,7 @@ define void @zextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i32:
; SI: s_endpgm
define void @sextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i16> addrspace(1)* %in
+ %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
%ext = sext <16 x i16> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
@@ -120,7 +120,7 @@ define void @sextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i32:
; SI: s_endpgm
define void @zextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <32 x i16> addrspace(1)* %in
+ %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
%ext = zext <32 x i16> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
ret void
@@ -129,7 +129,7 @@ define void @zextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32
; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i32:
; SI: s_endpgm
define void @sextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <32 x i16> addrspace(1)* %in
+ %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
%ext = sext <32 x i16> %load to <32 x i32>
store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
ret void
@@ -138,7 +138,7 @@ define void @sextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32
; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i32:
; SI: s_endpgm
define void @zextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <64 x i16> addrspace(1)* %in
+ %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
%ext = zext <64 x i16> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
ret void
@@ -147,7 +147,7 @@ define void @zextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64
; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i32:
; SI: s_endpgm
define void @sextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <64 x i16> addrspace(1)* %in
+ %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
%ext = sext <64 x i16> %load to <64 x i32>
store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
ret void
@@ -158,7 +158,7 @@ define void @sextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64
; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
- %a = load i16 addrspace(1)* %in
+ %a = load i16, i16 addrspace(1)* %in
%ext = zext i16 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -169,7 +169,7 @@ define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
; SI: buffer_store_dwordx2
define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
- %a = load i16 addrspace(1)* %in
+ %a = load i16, i16 addrspace(1)* %in
%ext = sext i16 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -178,7 +178,7 @@ define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)
; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i64:
; SI: s_endpgm
define void @zextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i16> addrspace(1)* %in
+ %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
%ext = zext <1 x i16> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -187,7 +187,7 @@ define void @zextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i
; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i64:
; SI: s_endpgm
define void @sextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i16> addrspace(1)* %in
+ %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
%ext = sext <1 x i16> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -196,7 +196,7 @@ define void @sextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i
; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i64:
; SI: s_endpgm
define void @zextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i16> addrspace(1)* %in
+ %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
%ext = zext <2 x i16> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -205,7 +205,7 @@ define void @zextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i64:
; SI: s_endpgm
define void @sextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i16> addrspace(1)* %in
+ %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
%ext = sext <2 x i16> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -214,7 +214,7 @@ define void @sextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i64:
; SI: s_endpgm
define void @zextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i16> addrspace(1)* %in
+ %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
%ext = zext <4 x i16> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -223,7 +223,7 @@ define void @zextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i
; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i64:
; SI: s_endpgm
define void @sextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i16> addrspace(1)* %in
+ %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
%ext = sext <4 x i16> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -232,7 +232,7 @@ define void @sextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i
; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i64:
; SI: s_endpgm
define void @zextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i16> addrspace(1)* %in
+ %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
%ext = zext <8 x i16> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -241,7 +241,7 @@ define void @zextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i
; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i64:
; SI: s_endpgm
define void @sextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i16> addrspace(1)* %in
+ %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
%ext = sext <8 x i16> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -250,7 +250,7 @@ define void @sextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i
; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i64:
; SI: s_endpgm
define void @zextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i16> addrspace(1)* %in
+ %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
%ext = zext <16 x i16> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -259,7 +259,7 @@ define void @zextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i64:
; SI: s_endpgm
define void @sextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i16> addrspace(1)* %in
+ %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
%ext = sext <16 x i16> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -268,7 +268,7 @@ define void @sextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i64:
; SI: s_endpgm
define void @zextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <32 x i16> addrspace(1)* %in
+ %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
%ext = zext <32 x i16> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
@@ -277,7 +277,7 @@ define void @zextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32
; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i64:
; SI: s_endpgm
define void @sextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <32 x i16> addrspace(1)* %in
+ %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
%ext = sext <32 x i16> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
@@ -286,7 +286,7 @@ define void @sextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32
; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i64:
; SI: s_endpgm
define void @zextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <64 x i16> addrspace(1)* %in
+ %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
%ext = zext <64 x i16> %load to <64 x i64>
store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
ret void
@@ -295,7 +295,7 @@ define void @zextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64
; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i64:
; SI: s_endpgm
define void @sextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
- %load = load <64 x i16> addrspace(1)* %in
+ %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
%ext = sext <64 x i16> %load to <64 x i64>
store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/global-extload-i32.ll b/test/CodeGen/R600/global-extload-i32.ll
index f56b6ac8dc38..79b83452939e 100644
--- a/test/CodeGen/R600/global-extload-i32.ll
+++ b/test/CodeGen/R600/global-extload-i32.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}zextload_global_i32_to_i64:
@@ -7,7 +7,7 @@
; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %a = load i32 addrspace(1)* %in
+ %a = load i32, i32 addrspace(1)* %in
%ext = zext i32 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -18,7 +18,7 @@ define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
; SI: buffer_store_dwordx2
define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %a = load i32 addrspace(1)* %in
+ %a = load i32, i32 addrspace(1)* %in
%ext = sext i32 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -29,7 +29,7 @@ define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i32> addrspace(1)* %in
+ %load = load <1 x i32>, <1 x i32> addrspace(1)* %in
%ext = zext <1 x i32> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -41,7 +41,7 @@ define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i32> addrspace(1)* %in
+ %load = load <1 x i32>, <1 x i32> addrspace(1)* %in
%ext = sext <1 x i32> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -53,7 +53,7 @@ define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i32> addrspace(1)* %in
+ %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
%ext = zext <2 x i32> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -67,7 +67,7 @@ define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
; SI-DAG: buffer_store_dwordx2
; SI: s_endpgm
define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i32> addrspace(1)* %in
+ %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
%ext = sext <2 x i32> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -81,7 +81,7 @@ define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i32> addrspace(1)* %in
+ %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
%ext = zext <4 x i32> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -99,7 +99,7 @@ define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i
; SI-DAG: buffer_store_dwordx2
; SI: s_endpgm
define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i32> addrspace(1)* %in
+ %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
%ext = sext <4 x i32> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -124,7 +124,7 @@ define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i
; SI-DAG: buffer_store_dwordx2
; SI: s_endpgm
define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i32> addrspace(1)* %in
+ %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
%ext = zext <8 x i32> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -159,7 +159,7 @@ define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i
; SI: s_endpgm
define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i32> addrspace(1)* %in
+ %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
%ext = sext <8 x i32> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -212,7 +212,7 @@ define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i
; SI-DAG: buffer_store_dwordx2
; SI: s_endpgm
define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i32> addrspace(1)* %in
+ %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
%ext = sext <16 x i32> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -255,7 +255,7 @@ define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; SI: s_endpgm
define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i32> addrspace(1)* %in
+ %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
%ext = zext <16 x i32> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -369,7 +369,7 @@ define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; SI: s_endpgm
define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <32 x i32> addrspace(1)* %in
+ %load = load <32 x i32>, <32 x i32> addrspace(1)* %in
%ext = sext <32 x i32> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
@@ -450,7 +450,7 @@ define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32
; SI: s_endpgm
define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
- %load = load <32 x i32> addrspace(1)* %in
+ %load = load <32 x i32>, <32 x i32> addrspace(1)* %in
%ext = zext <32 x i32> %load to <32 x i64>
store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/global-extload-i8.ll b/test/CodeGen/R600/global-extload-i8.ll
index 86245232d3e4..b31d5361d5a2 100644
--- a/test/CodeGen/R600/global-extload-i8.ll
+++ b/test/CodeGen/R600/global-extload-i8.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}zextload_global_i8_to_i32:
@@ -7,7 +7,7 @@
; SI: buffer_store_dword
; SI: s_endpgm
define void @zextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
- %a = load i8 addrspace(1)* %in
+ %a = load i8, i8 addrspace(1)* %in
%ext = zext i8 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@@ -18,7 +18,7 @@ define void @zextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)*
; SI: buffer_store_dword
; SI: s_endpgm
define void @sextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
- %a = load i8 addrspace(1)* %in
+ %a = load i8, i8 addrspace(1)* %in
%ext = sext i8 %a to i32
store i32 %ext, i32 addrspace(1)* %out
ret void
@@ -27,7 +27,7 @@ define void @sextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)*
; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i32:
; SI: s_endpgm
define void @zextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i8> addrspace(1)* %in
+ %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
%ext = zext <1 x i8> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
@@ -36,7 +36,7 @@ define void @zextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8
; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i32:
; SI: s_endpgm
define void @sextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i8> addrspace(1)* %in
+ %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
%ext = sext <1 x i8> %load to <1 x i32>
store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
ret void
@@ -45,7 +45,7 @@ define void @sextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8
; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i32:
; SI: s_endpgm
define void @zextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i8> addrspace(1)* %in
+ %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
%ext = zext <2 x i8> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
@@ -54,7 +54,7 @@ define void @zextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8
; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i32:
; SI: s_endpgm
define void @sextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i8> addrspace(1)* %in
+ %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
%ext = sext <2 x i8> %load to <2 x i32>
store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
ret void
@@ -63,7 +63,7 @@ define void @sextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8
; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i32:
; SI: s_endpgm
define void @zextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i8> addrspace(1)* %in
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
%ext = zext <4 x i8> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
@@ -72,7 +72,7 @@ define void @zextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8
; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i32:
; SI: s_endpgm
define void @sextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i8> addrspace(1)* %in
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
%ext = sext <4 x i8> %load to <4 x i32>
store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
ret void
@@ -81,7 +81,7 @@ define void @sextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8
; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i32:
; SI: s_endpgm
define void @zextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i8> addrspace(1)* %in
+ %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
%ext = zext <8 x i8> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
@@ -90,7 +90,7 @@ define void @zextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8
; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i32:
; SI: s_endpgm
define void @sextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i8> addrspace(1)* %in
+ %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
%ext = sext <8 x i8> %load to <8 x i32>
store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
ret void
@@ -99,7 +99,7 @@ define void @sextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8
; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i32:
; SI: s_endpgm
define void @zextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i8> addrspace(1)* %in
+ %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
%ext = zext <16 x i8> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
@@ -108,7 +108,7 @@ define void @zextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i32:
; SI: s_endpgm
define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i8> addrspace(1)* %in
+ %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
%ext = sext <16 x i8> %load to <16 x i32>
store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
ret void
@@ -117,7 +117,7 @@ define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i32:
; XSI: s_endpgm
; define void @zextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i8> addrspace(1)* %in
+; %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
; %ext = zext <32 x i8> %load to <32 x i32>
; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
; ret void
@@ -126,7 +126,7 @@ define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i32:
; XSI: s_endpgm
; define void @sextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i8> addrspace(1)* %in
+; %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
; %ext = sext <32 x i8> %load to <32 x i32>
; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
; ret void
@@ -135,7 +135,7 @@ define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i32:
; XSI: s_endpgm
; define void @zextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i8> addrspace(1)* %in
+; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
; %ext = zext <64 x i8> %load to <64 x i32>
; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
; ret void
@@ -144,7 +144,7 @@ define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i32:
; XSI: s_endpgm
; define void @sextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i8> addrspace(1)* %in
+; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
; %ext = sext <64 x i8> %load to <64 x i32>
; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
; ret void
@@ -155,7 +155,7 @@ define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16
; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
- %a = load i8 addrspace(1)* %in
+ %a = load i8, i8 addrspace(1)* %in
%ext = zext i8 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -166,7 +166,7 @@ define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)*
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
; SI: buffer_store_dwordx2
define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
- %a = load i8 addrspace(1)* %in
+ %a = load i8, i8 addrspace(1)* %in
%ext = sext i8 %a to i64
store i64 %ext, i64 addrspace(1)* %out
ret void
@@ -175,7 +175,7 @@ define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)*
; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i64:
; SI: s_endpgm
define void @zextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i8> addrspace(1)* %in
+ %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
%ext = zext <1 x i8> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -184,7 +184,7 @@ define void @zextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8
; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i64:
; SI: s_endpgm
define void @sextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <1 x i8> addrspace(1)* %in
+ %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
%ext = sext <1 x i8> %load to <1 x i64>
store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
ret void
@@ -193,7 +193,7 @@ define void @sextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8
; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i64:
; SI: s_endpgm
define void @zextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i8> addrspace(1)* %in
+ %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
%ext = zext <2 x i8> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -202,7 +202,7 @@ define void @zextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8
; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i64:
; SI: s_endpgm
define void @sextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <2 x i8> addrspace(1)* %in
+ %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
%ext = sext <2 x i8> %load to <2 x i64>
store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
ret void
@@ -211,7 +211,7 @@ define void @sextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8
; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i64:
; SI: s_endpgm
define void @zextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i8> addrspace(1)* %in
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
%ext = zext <4 x i8> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -220,7 +220,7 @@ define void @zextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8
; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i64:
; SI: s_endpgm
define void @sextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <4 x i8> addrspace(1)* %in
+ %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
%ext = sext <4 x i8> %load to <4 x i64>
store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
ret void
@@ -229,7 +229,7 @@ define void @sextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8
; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i64:
; SI: s_endpgm
define void @zextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i8> addrspace(1)* %in
+ %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
%ext = zext <8 x i8> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -238,7 +238,7 @@ define void @zextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8
; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i64:
; SI: s_endpgm
define void @sextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <8 x i8> addrspace(1)* %in
+ %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
%ext = sext <8 x i8> %load to <8 x i64>
store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
ret void
@@ -247,7 +247,7 @@ define void @sextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8
; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i64:
; SI: s_endpgm
define void @zextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i8> addrspace(1)* %in
+ %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
%ext = zext <16 x i8> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -256,7 +256,7 @@ define void @zextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i64:
; SI: s_endpgm
define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
- %load = load <16 x i8> addrspace(1)* %in
+ %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
%ext = sext <16 x i8> %load to <16 x i64>
store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
ret void
@@ -265,7 +265,7 @@ define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i64:
; XSI: s_endpgm
; define void @zextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i8> addrspace(1)* %in
+; %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
; %ext = zext <32 x i8> %load to <32 x i64>
; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
; ret void
@@ -274,7 +274,7 @@ define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i64:
; XSI: s_endpgm
; define void @sextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <32 x i8> addrspace(1)* %in
+; %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
; %ext = sext <32 x i8> %load to <32 x i64>
; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
; ret void
@@ -283,7 +283,7 @@ define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i64:
; XSI: s_endpgm
; define void @zextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i8> addrspace(1)* %in
+; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
; %ext = zext <64 x i8> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
; ret void
@@ -292,7 +292,7 @@ define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16
; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i64:
; XSI: s_endpgm
; define void @sextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-; %load = load <64 x i8> addrspace(1)* %in
+; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
; %ext = sext <64 x i8> %load to <64 x i64>
; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
; ret void
diff --git a/test/CodeGen/R600/global-zero-initializer.ll b/test/CodeGen/R600/global-zero-initializer.ll
index 6909c58354c5..45aa8bf4e1d7 100644
--- a/test/CodeGen/R600/global-zero-initializer.ll
+++ b/test/CodeGen/R600/global-zero-initializer.ll
@@ -6,8 +6,8 @@
@lds = addrspace(1) global [256 x i32] zeroinitializer
define void @load_init_global_global(i32 addrspace(1)* %out, i1 %p) {
- %gep = getelementptr [256 x i32] addrspace(1)* @lds, i32 0, i32 10
- %ld = load i32 addrspace(1)* %gep
+ %gep = getelementptr [256 x i32], [256 x i32] addrspace(1)* @lds, i32 0, i32 10
+ %ld = load i32, i32 addrspace(1)* %gep
store i32 %ld, i32 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/global_atomics.ll b/test/CodeGen/R600/global_atomics.ll
index 5a07a028f44f..847950f6376e 100644
--- a/test/CodeGen/R600/global_atomics.ll
+++ b/test/CodeGen/R600/global_atomics.ll
@@ -4,7 +4,7 @@
; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -14,7 +14,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -24,8 +24,8 @@ entry:
; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
define void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -35,8 +35,8 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -64,7 +64,7 @@ entry:
; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
@@ -74,7 +74,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -84,7 +84,7 @@ entry:
; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -94,7 +94,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -104,8 +104,8 @@ entry:
; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
define void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -115,8 +115,8 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -144,7 +144,7 @@ entry:
; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
@@ -154,7 +154,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -164,7 +164,7 @@ entry:
; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -174,7 +174,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -184,8 +184,8 @@ entry:
; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
define void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -195,8 +195,8 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -224,7 +224,7 @@ entry:
; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
@@ -234,7 +234,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -244,7 +244,7 @@ entry:
; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -254,7 +254,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -264,8 +264,8 @@ entry:
; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
define void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -275,8 +275,8 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -304,7 +304,7 @@ entry:
; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
@@ -314,7 +314,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -324,7 +324,7 @@ entry:
; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -334,7 +334,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -344,8 +344,8 @@ entry:
; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
define void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -355,8 +355,8 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -384,7 +384,7 @@ entry:
; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
@@ -394,7 +394,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -404,7 +404,7 @@ entry:
; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -414,7 +414,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -424,8 +424,8 @@ entry:
; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
define void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -435,8 +435,8 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -464,7 +464,7 @@ entry:
; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
@@ -474,7 +474,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -484,7 +484,7 @@ entry:
; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -494,7 +494,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -504,8 +504,8 @@ entry:
; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
define void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -515,8 +515,8 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -544,7 +544,7 @@ entry:
; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
@@ -554,7 +554,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -564,7 +564,7 @@ entry:
; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -574,7 +574,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -584,8 +584,8 @@ entry:
; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
define void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -595,8 +595,8 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -624,7 +624,7 @@ entry:
; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
@@ -634,7 +634,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -644,7 +644,7 @@ entry:
; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -654,7 +654,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -664,8 +664,8 @@ entry:
; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
define void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -675,8 +675,8 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -704,7 +704,7 @@ entry:
; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
@@ -714,7 +714,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -724,7 +724,7 @@ entry:
; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -734,7 +734,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
entry:
- %gep = getelementptr i32 addrspace(1)* %out, i32 4
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
%0 = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -744,8 +744,8 @@ entry:
; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
define void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
ret void
}
@@ -755,8 +755,8 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
%0 = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
@@ -784,7 +784,7 @@ entry:
; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
define void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
ret void
}
@@ -794,7 +794,7 @@ entry:
; SI: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
entry:
- %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+ %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
%0 = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
store i32 %0, i32 addrspace(1)* %out2
ret void
diff --git a/test/CodeGen/R600/gv-const-addrspace-fail.ll b/test/CodeGen/R600/gv-const-addrspace-fail.ll
index af0df413ca58..014b0a5482ab 100644
--- a/test/CodeGen/R600/gv-const-addrspace-fail.ll
+++ b/test/CodeGen/R600/gv-const-addrspace-fail.ll
@@ -9,8 +9,8 @@
; SI: buffer_store_byte
; SI: s_endpgm
define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 {
- %arrayidx = getelementptr inbounds [1 x i8] addrspace(2)* @a, i32 0, i32 %s
- %1 = load i8 addrspace(2)* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds [1 x i8], [1 x i8] addrspace(2)* @a, i32 0, i32 %s
+ %1 = load i8, i8 addrspace(2)* %arrayidx, align 1
store i8 %1, i8 addrspace(1)* %out
ret void
}
@@ -22,8 +22,8 @@ define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 {
; SI: buffer_store_short
; SI: s_endpgm
define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 {
- %arrayidx = getelementptr inbounds [1 x i16] addrspace(2)* @b, i32 0, i32 %s
- %1 = load i16 addrspace(2)* %arrayidx, align 2
+ %arrayidx = getelementptr inbounds [1 x i16], [1 x i16] addrspace(2)* @b, i32 0, i32 %s
+ %1 = load i16, i16 addrspace(2)* %arrayidx, align 2
store i16 %1, i16 addrspace(1)* %out
ret void
}
@@ -35,8 +35,8 @@ define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 {
; FUNC-LABEL: {{^}}struct_bar_gv_load:
define void @struct_bar_gv_load(i8 addrspace(1)* %out, i32 %index) {
- %gep = getelementptr inbounds [1 x %struct.bar] addrspace(2)* @struct_bar_gv, i32 0, i32 0, i32 1, i32 %index
- %load = load i8 addrspace(2)* %gep, align 1
+ %gep = getelementptr inbounds [1 x %struct.bar], [1 x %struct.bar] addrspace(2)* @struct_bar_gv, i32 0, i32 0, i32 1, i32 %index
+ %load = load i8, i8 addrspace(2)* %gep, align 1
store i8 %load, i8 addrspace(1)* %out, align 1
ret void
}
@@ -50,8 +50,8 @@ define void @struct_bar_gv_load(i8 addrspace(1)* %out, i32 %index) {
; FUNC-LABEL: {{^}}array_vector_gv_load:
define void @array_vector_gv_load(<4 x i32> addrspace(1)* %out, i32 %index) {
- %gep = getelementptr inbounds [4 x <4 x i32>] addrspace(2)* @array_vector_gv, i32 0, i32 %index
- %load = load <4 x i32> addrspace(2)* %gep, align 16
+ %gep = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>] addrspace(2)* @array_vector_gv, i32 0, i32 %index
+ %load = load <4 x i32>, <4 x i32> addrspace(2)* %gep, align 16
store <4 x i32> %load, <4 x i32> addrspace(1)* %out, align 16
ret void
}
diff --git a/test/CodeGen/R600/gv-const-addrspace.ll b/test/CodeGen/R600/gv-const-addrspace.ll
index c58e5846d98c..3c1fc6c98f74 100644
--- a/test/CodeGen/R600/gv-const-addrspace.ll
+++ b/test/CodeGen/R600/gv-const-addrspace.ll
@@ -1,5 +1,6 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2
@@ -9,6 +10,7 @@
; FUNC-LABEL: {{^}}float:
; FIXME: We should be using s_load_dword here.
; SI: buffer_load_dword
+; VI: s_load_dword
; EG-DAG: MOV {{\** *}}T2.X
; EG-DAG: MOV {{\** *}}T3.X
@@ -19,8 +21,8 @@
define void @float(float addrspace(1)* %out, i32 %index) {
entry:
- %0 = getelementptr inbounds [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
- %1 = load float addrspace(2)* %0
+ %0 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
+ %1 = load float, float addrspace(2)* %0
store float %1, float addrspace(1)* %out
ret void
}
@@ -31,6 +33,7 @@ entry:
; FIXME: We should be using s_load_dword here.
; SI: buffer_load_dword
+; VI: s_load_dword
; EG-DAG: MOV {{\** *}}T2.X
; EG-DAG: MOV {{\** *}}T3.X
@@ -41,8 +44,8 @@ entry:
define void @i32(i32 addrspace(1)* %out, i32 %index) {
entry:
- %0 = getelementptr inbounds [5 x i32] addrspace(2)* @i32_gv, i32 0, i32 %index
- %1 = load i32 addrspace(2)* %0
+ %0 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(2)* @i32_gv, i32 0, i32 %index
+ %1 = load i32, i32 addrspace(2)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -53,11 +56,11 @@ entry:
@struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
; FUNC-LABEL: {{^}}struct_foo_gv_load:
-; SI: s_load_dword
+; GCN: s_load_dword
define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
- %gep = getelementptr inbounds [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
- %load = load i32 addrspace(2)* %gep, align 4
+ %gep = getelementptr inbounds [1 x %struct.foo], [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
+ %load = load i32, i32 addrspace(2)* %gep, align 4
store i32 %load, i32 addrspace(1)* %out, align 4
ret void
}
@@ -70,9 +73,10 @@ define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
; FUNC-LABEL: {{^}}array_v1_gv_load:
; FIXME: We should be using s_load_dword here.
; SI: buffer_load_dword
+; VI: s_load_dword
define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
- %gep = getelementptr inbounds [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
- %load = load <1 x i32> addrspace(2)* %gep, align 4
+ %gep = getelementptr inbounds [4 x <1 x i32>], [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
+ %load = load <1 x i32>, <1 x i32> addrspace(2)* %gep, align 4
store <1 x i32> %load, <1 x i32> addrspace(1)* %out, align 4
ret void
}
@@ -83,8 +87,8 @@ entry:
br i1 %0, label %if, label %else
if:
- %1 = getelementptr inbounds [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
- %2 = load float addrspace(2)* %1
+ %1 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
+ %2 = load float, float addrspace(2)* %1
store float %2, float addrspace(1)* %out
br label %endif
diff --git a/test/CodeGen/R600/half.ll b/test/CodeGen/R600/half.ll
index 35a41c5cd0b0..42ee788e88d5 100644
--- a/test/CodeGen/R600/half.ll
+++ b/test/CodeGen/R600/half.ll
@@ -5,7 +5,7 @@ define void @test_load_store(half addrspace(1)* %in, half addrspace(1)* %out) {
; CHECK-LABEL: {{^}}test_load_store:
; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
; CHECK: buffer_store_short [[TMP]]
- %val = load half addrspace(1)* %in
+ %val = load half, half addrspace(1)* %in
store half %val, half addrspace(1) * %out
ret void
}
@@ -14,7 +14,7 @@ define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %o
; CHECK-LABEL: {{^}}test_bitcast_from_half:
; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
; CHECK: buffer_store_short [[TMP]]
- %val = load half addrspace(1) * %in
+ %val = load half, half addrspace(1) * %in
%val_int = bitcast half %val to i16
store i16 %val_int, i16 addrspace(1)* %out
ret void
@@ -24,7 +24,7 @@ define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in
; CHECK-LABEL: {{^}}test_bitcast_to_half:
; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
; CHECK: buffer_store_short [[TMP]]
- %val = load i16 addrspace(1)* %in
+ %val = load i16, i16 addrspace(1)* %in
%val_fp = bitcast i16 %val to half
store half %val_fp, half addrspace(1)* %out
ret void
@@ -34,7 +34,7 @@ define void @test_extend32(half addrspace(1)* %in, float addrspace(1)* %out) {
; CHECK-LABEL: {{^}}test_extend32:
; CHECK: v_cvt_f32_f16_e32
- %val16 = load half addrspace(1)* %in
+ %val16 = load half, half addrspace(1)* %in
%val32 = fpext half %val16 to float
store float %val32, float addrspace(1)* %out
ret void
@@ -45,7 +45,7 @@ define void @test_extend64(half addrspace(1)* %in, double addrspace(1)* %out) {
; CHECK: v_cvt_f32_f16_e32
; CHECK: v_cvt_f64_f32_e32
- %val16 = load half addrspace(1)* %in
+ %val16 = load half, half addrspace(1)* %in
%val64 = fpext half %val16 to double
store double %val64, double addrspace(1)* %out
ret void
@@ -55,7 +55,7 @@ define void @test_trunc32(float addrspace(1)* %in, half addrspace(1)* %out) {
; CHECK-LABEL: {{^}}test_trunc32:
; CHECK: v_cvt_f16_f32_e32
- %val32 = load float addrspace(1)* %in
+ %val32 = load float, float addrspace(1)* %in
%val16 = fptrunc float %val32 to half
store half %val16, half addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/hsa.ll b/test/CodeGen/R600/hsa.ll
index 5ce3beaa16c0..f9113399afe8 100644
--- a/test/CodeGen/R600/hsa.ll
+++ b/test/CodeGen/R600/hsa.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -mtriple=r600--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
-; HSA: {{^}}simple:
; HSA: .section .hsa.version
; HSA-NEXT: .ascii "HSA Code Unit:0.0:AMD:0.1:GFX8.1:0"
+; HSA: {{^}}simple:
; Make sure we are setting the ATC bit:
; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000
; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0
diff --git a/test/CodeGen/R600/i1-copy-phi.ll b/test/CodeGen/R600/i1-copy-phi.ll
index 430466e9f80e..105cd06b330a 100644
--- a/test/CodeGen/R600/i1-copy-phi.ll
+++ b/test/CodeGen/R600/i1-copy-phi.ll
@@ -6,7 +6,7 @@
; SI: s_and_saveexec_b64
; SI: s_xor_b64
; SI: v_mov_b32_e32 [[REG]], -1{{$}}
-; SI: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[REG]], 0
+; SI: v_cmp_ne_i32_e32 vcc, 0, [[REG]]
; SI: s_and_saveexec_b64
; SI: s_xor_b64
; SI: s_endpgm
diff --git a/test/CodeGen/R600/i8-to-double-to-float.ll b/test/CodeGen/R600/i8-to-double-to-float.ll
index 604746627666..c218e1918bb0 100644
--- a/test/CodeGen/R600/i8-to-double-to-float.ll
+++ b/test/CodeGen/R600/i8-to-double-to-float.ll
@@ -3,7 +3,7 @@
;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) {
- %1 = load i8 addrspace(1)* %in
+ %1 = load i8, i8 addrspace(1)* %in
%2 = uitofp i8 %1 to double
%3 = fptrunc double %2 to float
store float %3, float addrspace(1)* %out
diff --git a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
index 71705a64f50e..60e59a5a5286 100644
--- a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
+++ b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
@@ -8,9 +8,9 @@
define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %0 = load i32 addrspace(1)* %in
- %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %in, i32 1
- %1 = load i32 addrspace(1)* %arrayidx1
+ %0 = load i32, i32 addrspace(1)* %in
+ %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
+ %1 = load i32, i32 addrspace(1)* %arrayidx1
%cmp = icmp eq i32 %0, %1
%value = select i1 %cmp, i32 0, i32 -1
store i32 %value, i32 addrspace(1)* %out
diff --git a/test/CodeGen/R600/imm.ll b/test/CodeGen/R600/imm.ll
index 6e4fa3cc60cb..8917cd6dba33 100644
--- a/test/CodeGen/R600/imm.ll
+++ b/test/CodeGen/R600/imm.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=CHECK %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CHECK %s
; Use a 64-bit value with lo bits that can be represented as an inline constant
; CHECK-LABEL: {{^}}i64_imm_inline_lo:
@@ -22,81 +23,100 @@ entry:
ret void
}
-; CHECK-LABEL: {{^}}store_inline_imm_0.0_f32
-; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; CHECK-LABEL: {{^}}store_imm_neg_0.0_i64:
+; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x80000000
+; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}}
+; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
+; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
+; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @store_imm_neg_0.0_i64(i64 addrspace(1) *%out) {
+ store i64 -9223372036854775808, i64 addrspace(1) *%out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_neg_0.0_i32:
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_inline_imm_neg_0.0_i32(i32 addrspace(1)* %out) {
+ store i32 -2147483648, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_0.0_f32:
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; CHECK: buffer_store_dword [[REG]]
define void @store_inline_imm_0.0_f32(float addrspace(1)* %out) {
store float 0.0, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}store_imm_neg_0.0_f32
+; CHECK-LABEL: {{^}}store_imm_neg_0.0_f32:
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @store_imm_neg_0.0_f32(float addrspace(1)* %out) {
store float -0.0, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}store_inline_imm_0.5_f32
+; CHECK-LABEL: {{^}}store_inline_imm_0.5_f32:
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0.5{{$}}
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @store_inline_imm_0.5_f32(float addrspace(1)* %out) {
store float 0.5, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}store_inline_imm_m_0.5_f32
+; CHECK-LABEL: {{^}}store_inline_imm_m_0.5_f32:
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -0.5{{$}}
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @store_inline_imm_m_0.5_f32(float addrspace(1)* %out) {
store float -0.5, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}store_inline_imm_1.0_f32
+; CHECK-LABEL: {{^}}store_inline_imm_1.0_f32:
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0{{$}}
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @store_inline_imm_1.0_f32(float addrspace(1)* %out) {
store float 1.0, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}store_inline_imm_m_1.0_f32
+; CHECK-LABEL: {{^}}store_inline_imm_m_1.0_f32:
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -1.0{{$}}
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @store_inline_imm_m_1.0_f32(float addrspace(1)* %out) {
store float -1.0, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}store_inline_imm_2.0_f32
+; CHECK-LABEL: {{^}}store_inline_imm_2.0_f32:
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0{{$}}
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @store_inline_imm_2.0_f32(float addrspace(1)* %out) {
store float 2.0, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}store_inline_imm_m_2.0_f32
+; CHECK-LABEL: {{^}}store_inline_imm_m_2.0_f32:
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -2.0{{$}}
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @store_inline_imm_m_2.0_f32(float addrspace(1)* %out) {
store float -2.0, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}store_inline_imm_4.0_f32
+; CHECK-LABEL: {{^}}store_inline_imm_4.0_f32:
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 4.0{{$}}
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @store_inline_imm_4.0_f32(float addrspace(1)* %out) {
store float 4.0, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}store_inline_imm_m_4.0_f32
+; CHECK-LABEL: {{^}}store_inline_imm_m_4.0_f32:
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -4.0{{$}}
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) {
store float -4.0, float addrspace(1)* %out
ret void
@@ -104,288 +124,298 @@ define void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) {
; CHECK-LABEL: {{^}}store_literal_imm_f32:
; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x45800000
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @store_literal_imm_f32(float addrspace(1)* %out) {
store float 4096.0, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_0.0_f32
+; CHECK-LABEL: {{^}}add_inline_imm_0.0_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0, [[VAL]]{{$}}
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0.0
store float %y, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_0.5_f32
+; CHECK-LABEL: {{^}}add_inline_imm_0.5_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0.5, [[VAL]]{{$}}
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0.5
store float %y, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f32
+; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -0.5, [[VAL]]{{$}}
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, -0.5
store float %y, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_1.0_f32
+; CHECK-LABEL: {{^}}add_inline_imm_1.0_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 1.0, [[VAL]]{{$}}
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 1.0
store float %y, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f32
+; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -1.0, [[VAL]]{{$}}
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, -1.0
store float %y, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_2.0_f32
+; CHECK-LABEL: {{^}}add_inline_imm_2.0_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, [[VAL]]{{$}}
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 2.0
store float %y, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f32
+; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -2.0, [[VAL]]{{$}}
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, -2.0
store float %y, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_4.0_f32
+; CHECK-LABEL: {{^}}add_inline_imm_4.0_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 4.0, [[VAL]]{{$}}
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 4.0
store float %y, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f32
+; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -4.0, [[VAL]]{{$}}
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, -4.0
store float %y, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: @commute_add_inline_imm_0.5_f32
+; CHECK-LABEL: {{^}}commute_add_inline_imm_0.5_f32:
; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0.5, [[VAL]]
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
- %x = load float addrspace(1)* %in
+ %x = load float, float addrspace(1)* %in
%y = fadd float %x, 0.5
store float %y, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: @commute_add_literal_f32
+; CHECK-LABEL: {{^}}commute_add_literal_f32:
; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0x44800000, [[VAL]]
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
- %x = load float addrspace(1)* %in
+ %x = load float, float addrspace(1)* %in
%y = fadd float %x, 1024.0
store float %y, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_1_f32
+; CHECK-LABEL: {{^}}add_inline_imm_1_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 1, [[VAL]]{{$}}
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_1_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0x36a0000000000000
store float %y, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_2_f32
+; CHECK-LABEL: {{^}}add_inline_imm_2_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 2, [[VAL]]{{$}}
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_2_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0x36b0000000000000
store float %y, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_16_f32
+; CHECK-LABEL: {{^}}add_inline_imm_16_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 16, [[VAL]]
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_16_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0x36e0000000000000
store float %y, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f32
+; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -1, [[VAL]]
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_1_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0xffffffffe0000000
store float %y, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f32
+; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -2, [[VAL]]
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_2_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0xffffffffc0000000
store float %y, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f32
+; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -16, [[VAL]]
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_neg_16_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0xfffffffe00000000
store float %y, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_63_f32
+; CHECK-LABEL: {{^}}add_inline_imm_63_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 63, [[VAL]]
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_63_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0x36ff800000000000
store float %y, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_64_f32
+; CHECK-LABEL: {{^}}add_inline_imm_64_f32:
; CHECK: s_load_dword [[VAL:s[0-9]+]]
; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 64, [[VAL]]
-; CHECK-NEXT: buffer_store_dword [[REG]]
+; CHECK: buffer_store_dword [[REG]]
define void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) {
%y = fadd float %x, 0x3700000000000000
store float %y, float addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_0.0_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+
+; CHECK-LABEL: {{^}}add_inline_imm_0.0_f64:
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0, [[VAL]]
-; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) {
%y = fadd double %x, 0.0
store double %y, double addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_0.5_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK-LABEL: {{^}}add_inline_imm_0.5_f64:
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0.5, [[VAL]]
-; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) {
%y = fadd double %x, 0.5
store double %y, double addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f64:
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -0.5, [[VAL]]
-; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) {
%y = fadd double %x, -0.5
store double %y, double addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_1.0_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK-LABEL: {{^}}add_inline_imm_1.0_f64:
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1.0, [[VAL]]
-; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) {
%y = fadd double %x, 1.0
store double %y, double addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f64:
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1.0, [[VAL]]
-; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) {
%y = fadd double %x, -1.0
store double %y, double addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_2.0_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK-LABEL: {{^}}add_inline_imm_2.0_f64:
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2.0, [[VAL]]
-; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) {
%y = fadd double %x, 2.0
store double %y, double addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f64:
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2.0, [[VAL]]
-; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) {
%y = fadd double %x, -2.0
store double %y, double addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_4.0_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK-LABEL: {{^}}add_inline_imm_4.0_f64:
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 4.0, [[VAL]]
-; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) {
%y = fadd double %x, 4.0
store double %y, double addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f64:
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -4.0, [[VAL]]
-; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) {
%y = fadd double %x, -4.0
store double %y, double addrspace(1)* %out
@@ -393,80 +423,88 @@ define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) {
}
-; CHECK-LABEL: {{^}}add_inline_imm_1_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK-LABEL: {{^}}add_inline_imm_1_f64:
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1, [[VAL]]
-; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) {
%y = fadd double %x, 0x0000000000000001
store double %y, double addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_2_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK-LABEL: {{^}}add_inline_imm_2_f64:
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2, [[VAL]]
-; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) {
%y = fadd double %x, 0x0000000000000002
store double %y, double addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_16_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK-LABEL: {{^}}add_inline_imm_16_f64:
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 16, [[VAL]]
-; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) {
%y = fadd double %x, 0x0000000000000010
store double %y, double addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f64:
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1, [[VAL]]
-; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) {
%y = fadd double %x, 0xffffffffffffffff
store double %y, double addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f64:
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2, [[VAL]]
-; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) {
%y = fadd double %x, 0xfffffffffffffffe
store double %y, double addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f64:
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -16, [[VAL]]
-; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) {
%y = fadd double %x, 0xfffffffffffffff0
store double %y, double addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_63_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK-LABEL: {{^}}add_inline_imm_63_f64:
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 63, [[VAL]]
-; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) {
%y = fadd double %x, 0x000000000000003F
store double %y, double addrspace(1)* %out
ret void
}
-; CHECK-LABEL: {{^}}add_inline_imm_64_f64
-; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK-LABEL: {{^}}add_inline_imm_64_f64:
+; SI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x2c
; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 64, [[VAL]]
-; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+; CHECK: buffer_store_dwordx2 [[REG]]
define void @add_inline_imm_64_f64(double addrspace(1)* %out, double %x) {
%y = fadd double %x, 0x0000000000000040
store double %y, double addrspace(1)* %out
@@ -474,7 +512,7 @@ define void @add_inline_imm_64_f64(double addrspace(1)* %out, double %x) {
}
-; CHECK-LABEL: {{^}}store_inline_imm_0.0_f64
+; CHECK-LABEL: {{^}}store_inline_imm_0.0_f64:
; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0
; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0
; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
@@ -482,3 +520,98 @@ define void @store_inline_imm_0.0_f64(double addrspace(1)* %out) {
store double 0.0, double addrspace(1)* %out
ret void
}
+
+
+; CHECK-LABEL: {{^}}store_literal_imm_neg_0.0_f64:
+; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x80000000
+; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}}
+; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
+; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
+; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @store_literal_imm_neg_0.0_f64(double addrspace(1)* %out) {
+ store double -0.0, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_0.5_f64:
+; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
+; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3fe00000
+; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @store_inline_imm_0.5_f64(double addrspace(1)* %out) {
+ store double 0.5, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_m_0.5_f64:
+; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
+; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbfe00000
+; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @store_inline_imm_m_0.5_f64(double addrspace(1)* %out) {
+ store double -0.5, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_1.0_f64:
+; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
+; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x3ff00000
+; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @store_inline_imm_1.0_f64(double addrspace(1)* %out) {
+ store double 1.0, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_m_1.0_f64:
+; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
+; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xbff00000
+; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @store_inline_imm_m_1.0_f64(double addrspace(1)* %out) {
+ store double -1.0, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_2.0_f64:
+; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
+; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 2.0
+; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @store_inline_imm_2.0_f64(double addrspace(1)* %out) {
+ store double 2.0, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_m_2.0_f64:
+; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
+; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], -2.0
+; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @store_inline_imm_m_2.0_f64(double addrspace(1)* %out) {
+ store double -2.0, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_4.0_f64:
+; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
+; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x40100000
+; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @store_inline_imm_4.0_f64(double addrspace(1)* %out) {
+ store double 4.0, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_m_4.0_f64:
+; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
+; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0xc0100000
+; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @store_inline_imm_m_4.0_f64(double addrspace(1)* %out) {
+ store double -4.0, double addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: {{^}}store_literal_imm_f64:
+; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x40b00000
+; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}}
+; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
+; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
+; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @store_literal_imm_f64(double addrspace(1)* %out) {
+ store double 4096.0, double addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/indirect-private-64.ll b/test/CodeGen/R600/indirect-private-64.ll
index cb06d609da49..d63e1b6c5212 100644
--- a/test/CodeGen/R600/indirect-private-64.ll
+++ b/test/CodeGen/R600/indirect-private-64.ll
@@ -14,12 +14,12 @@ declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind
; SI-PROMOTE: ds_write_b64
; SI-PROMOTE: ds_read_b64
define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind {
- %val = load double addrspace(1)* %in, align 8
+ %val = load double, double addrspace(1)* %in, align 8
%array = alloca double, i32 16, align 8
- %ptr = getelementptr double* %array, i32 %b
+ %ptr = getelementptr double, double* %array, i32 %b
store double %val, double* %ptr, align 8
call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
- %result = load double* %ptr, align 8
+ %result = load double, double* %ptr, align 8
store double %result, double addrspace(1)* %out, align 8
ret void
}
@@ -38,12 +38,12 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double
; SI-PROMOTE: ds_read_b32
; SI-PROMOTE: ds_read_b32
define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
- %val = load <2 x double> addrspace(1)* %in, align 16
+ %val = load <2 x double>, <2 x double> addrspace(1)* %in, align 16
%array = alloca <2 x double>, i32 16, align 16
- %ptr = getelementptr <2 x double>* %array, i32 %b
+ %ptr = getelementptr <2 x double>, <2 x double>* %array, i32 %b
store <2 x double> %val, <2 x double>* %ptr, align 16
call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
- %result = load <2 x double>* %ptr, align 16
+ %result = load <2 x double>, <2 x double>* %ptr, align 16
store <2 x double> %result, <2 x double> addrspace(1)* %out, align 16
ret void
}
@@ -56,12 +56,12 @@ define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out
; SI-PROMOTE: ds_write_b64
; SI-PROMOTE: ds_read_b64
define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind {
- %val = load i64 addrspace(1)* %in, align 8
+ %val = load i64, i64 addrspace(1)* %in, align 8
%array = alloca i64, i32 16, align 8
- %ptr = getelementptr i64* %array, i32 %b
+ %ptr = getelementptr i64, i64* %array, i32 %b
store i64 %val, i64* %ptr, align 8
call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
- %result = load i64* %ptr, align 8
+ %result = load i64, i64* %ptr, align 8
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
@@ -80,12 +80,12 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs
; SI-PROMOTE: ds_read_b32
; SI-PROMOTE: ds_read_b32
define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
- %val = load <2 x i64> addrspace(1)* %in, align 16
+ %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
%array = alloca <2 x i64>, i32 16, align 16
- %ptr = getelementptr <2 x i64>* %array, i32 %b
+ %ptr = getelementptr <2 x i64>, <2 x i64>* %array, i32 %b
store <2 x i64> %val, <2 x i64>* %ptr, align 16
call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
- %result = load <2 x i64>* %ptr, align 16
+ %result = load <2 x i64>, <2 x i64>* %ptr, align 16
store <2 x i64> %result, <2 x i64> addrspace(1)* %out, align 16
ret void
}
diff --git a/test/CodeGen/R600/inline-asm.ll b/test/CodeGen/R600/inline-asm.ll
index 37e4486db380..efc2292de3a5 100644
--- a/test/CodeGen/R600/inline-asm.ll
+++ b/test/CodeGen/R600/inline-asm.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=r600 -mcpu=tonga -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
; CHECK: {{^}}inline_asm:
; CHECK: s_endpgm
diff --git a/test/CodeGen/R600/insert_vector_elt.ll b/test/CodeGen/R600/insert_vector_elt.ll
index 64afddcca21d..6de3d408c486 100644
--- a/test/CodeGen/R600/insert_vector_elt.ll
+++ b/test/CodeGen/R600/insert_vector_elt.ll
@@ -185,13 +185,13 @@ entry:
br i1 %1, label %if, label %else
if:
- %2 = load i32 addrspace(1)* %in
+ %2 = load i32, i32 addrspace(1)* %in
%3 = insertelement <2 x i32> %0, i32 %2, i32 1
br label %endif
else:
- %4 = getelementptr i32 addrspace(1)* %in, i32 1
- %5 = load i32 addrspace(1)* %4
+ %4 = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %5 = load i32, i32 addrspace(1)* %4
%6 = insertelement <2 x i32> %0, i32 %5, i32 1
br label %endif
diff --git a/test/CodeGen/R600/jump-address.ll b/test/CodeGen/R600/jump-address.ll
index a1cd3882443a..f55912e37401 100644
--- a/test/CodeGen/R600/jump-address.ll
+++ b/test/CodeGen/R600/jump-address.ll
@@ -6,7 +6,7 @@
define void @main() #0 {
main_body:
- %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%1 = extractelement <4 x float> %0, i32 0
%2 = bitcast float %1 to i32
%3 = icmp eq i32 %2, 0
@@ -17,7 +17,7 @@ main_body:
br i1 %7, label %ENDIF, label %ELSE
ELSE: ; preds = %main_body
- %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%9 = extractelement <4 x float> %8, i32 0
%10 = bitcast float %9 to i32
%11 = icmp eq i32 %10, 1
@@ -40,7 +40,7 @@ ENDIF: ; preds = %IF13, %ELSE, %main_
ret void
IF13: ; preds = %ELSE
- %20 = load <4 x float> addrspace(8)* null
+ %20 = load <4 x float>, <4 x float> addrspace(8)* null
%21 = extractelement <4 x float> %20, i32 0
%22 = fsub float -0.000000e+00, %21
%23 = fadd float 0xFFF8000000000000, %22
diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll
index 27840b2e1609..7e2291cfdc35 100644
--- a/test/CodeGen/R600/kcache-fold.ll
+++ b/test/CodeGen/R600/kcache-fold.ll
@@ -4,35 +4,35 @@
; CHECK: MOV * T{{[0-9]+\.[XYZW], KC0}}
define void @main1() {
main_body:
- %0 = load <4 x float> addrspace(8)* null
+ %0 = load <4 x float>, <4 x float> addrspace(8)* null
%1 = extractelement <4 x float> %0, i32 0
- %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%3 = extractelement <4 x float> %2, i32 0
- %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%5 = extractelement <4 x float> %4, i32 0
%6 = fcmp ogt float %1, 0.000000e+00
%7 = select i1 %6, float %3, float %5
- %8 = load <4 x float> addrspace(8)* null
+ %8 = load <4 x float>, <4 x float> addrspace(8)* null
%9 = extractelement <4 x float> %8, i32 1
- %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%11 = extractelement <4 x float> %10, i32 1
- %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %12 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%13 = extractelement <4 x float> %12, i32 1
%14 = fcmp ogt float %9, 0.000000e+00
%15 = select i1 %14, float %11, float %13
- %16 = load <4 x float> addrspace(8)* null
+ %16 = load <4 x float>, <4 x float> addrspace(8)* null
%17 = extractelement <4 x float> %16, i32 2
- %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %18 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%19 = extractelement <4 x float> %18, i32 2
- %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %20 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%21 = extractelement <4 x float> %20, i32 2
%22 = fcmp ogt float %17, 0.000000e+00
%23 = select i1 %22, float %19, float %21
- %24 = load <4 x float> addrspace(8)* null
+ %24 = load <4 x float>, <4 x float> addrspace(8)* null
%25 = extractelement <4 x float> %24, i32 3
- %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %26 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%27 = extractelement <4 x float> %26, i32 3
- %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%29 = extractelement <4 x float> %28, i32 3
%30 = fcmp ogt float %25, 0.000000e+00
%31 = select i1 %30, float %27, float %29
@@ -52,35 +52,35 @@ main_body:
; CHECK-NOT: MOV
define void @main2() {
main_body:
- %0 = load <4 x float> addrspace(8)* null
+ %0 = load <4 x float>, <4 x float> addrspace(8)* null
%1 = extractelement <4 x float> %0, i32 0
- %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%3 = extractelement <4 x float> %2, i32 0
- %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%5 = extractelement <4 x float> %4, i32 1
%6 = fcmp ogt float %1, 0.000000e+00
%7 = select i1 %6, float %3, float %5
- %8 = load <4 x float> addrspace(8)* null
+ %8 = load <4 x float>, <4 x float> addrspace(8)* null
%9 = extractelement <4 x float> %8, i32 1
- %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%11 = extractelement <4 x float> %10, i32 0
- %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %12 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%13 = extractelement <4 x float> %12, i32 1
%14 = fcmp ogt float %9, 0.000000e+00
%15 = select i1 %14, float %11, float %13
- %16 = load <4 x float> addrspace(8)* null
+ %16 = load <4 x float>, <4 x float> addrspace(8)* null
%17 = extractelement <4 x float> %16, i32 2
- %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %18 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%19 = extractelement <4 x float> %18, i32 3
- %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %20 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%21 = extractelement <4 x float> %20, i32 2
%22 = fcmp ogt float %17, 0.000000e+00
%23 = select i1 %22, float %19, float %21
- %24 = load <4 x float> addrspace(8)* null
+ %24 = load <4 x float>, <4 x float> addrspace(8)* null
%25 = extractelement <4 x float> %24, i32 3
- %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %26 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%27 = extractelement <4 x float> %26, i32 3
- %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%29 = extractelement <4 x float> %28, i32 2
%30 = fcmp ogt float %25, 0.000000e+00
%31 = select i1 %30, float %27, float %29
diff --git a/test/CodeGen/R600/kernel-args.ll b/test/CodeGen/R600/kernel-args.ll
index 42d289d4ef23..1dd7c2cb7995 100644
--- a/test/CodeGen/R600/kernel-args.ll
+++ b/test/CodeGen/R600/kernel-args.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
+; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; EG-LABEL: {{^}}i8_arg:
+; FUNC-LABEL: {{^}}i8_arg:
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-LABEL: {{^}}i8_arg:
-; SI: buffer_load_ubyte
+; GCN: buffer_load_ubyte
define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
entry:
@@ -14,10 +14,10 @@ entry:
ret void
}
-; EG-LABEL: {{^}}i8_zext_arg:
+; FUNC-LABEL: {{^}}i8_zext_arg:
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-LABEL: {{^}}i8_zext_arg:
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
+; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
entry:
@@ -26,10 +26,10 @@ entry:
ret void
}
-; EG-LABEL: {{^}}i8_sext_arg:
+; FUNC-LABEL: {{^}}i8_sext_arg:
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-LABEL: {{^}}i8_sext_arg:
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
+; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
entry:
@@ -38,10 +38,9 @@ entry:
ret void
}
-; EG-LABEL: {{^}}i16_arg:
+; FUNC-LABEL: {{^}}i16_arg:
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-LABEL: {{^}}i16_arg:
-; SI: buffer_load_ushort
+; GCN: buffer_load_ushort
define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
entry:
@@ -50,10 +49,10 @@ entry:
ret void
}
-; EG-LABEL: {{^}}i16_zext_arg:
+; FUNC-LABEL: {{^}}i16_zext_arg:
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-LABEL: {{^}}i16_zext_arg:
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
+; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
entry:
@@ -62,10 +61,10 @@ entry:
ret void
}
-; EG-LABEL: {{^}}i16_sext_arg:
+; FUNC-LABEL: {{^}}i16_sext_arg:
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-LABEL: {{^}}i16_sext_arg:
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
+; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
entry:
@@ -74,176 +73,170 @@ entry:
ret void
}
-; EG-LABEL: {{^}}i32_arg:
+; FUNC-LABEL: {{^}}i32_arg:
; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
-; SI-LABEL: {{^}}i32_arg:
-; s_load_dword s{{[0-9]}}, s[0:1], 0xb
+; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
+; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
entry:
store i32 %in, i32 addrspace(1)* %out, align 4
ret void
}
-; EG-LABEL: {{^}}f32_arg:
+; FUNC-LABEL: {{^}}f32_arg:
; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
-; SI-LABEL: {{^}}f32_arg:
-; s_load_dword s{{[0-9]}}, s[0:1], 0xb
+; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
+; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
entry:
store float %in, float addrspace(1)* %out, align 4
ret void
}
-; EG-LABEL: {{^}}v2i8_arg:
+; FUNC-LABEL: {{^}}v2i8_arg:
; EG: VTX_READ_8
; EG: VTX_READ_8
-; SI-LABEL: {{^}}v2i8_arg:
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
entry:
store <2 x i8> %in, <2 x i8> addrspace(1)* %out
ret void
}
-; EG-LABEL: {{^}}v2i16_arg:
+; FUNC-LABEL: {{^}}v2i16_arg:
; EG: VTX_READ_16
; EG: VTX_READ_16
-; SI-LABEL: {{^}}v2i16_arg:
-; SI-DAG: buffer_load_ushort
-; SI-DAG: buffer_load_ushort
+; GCN-DAG: buffer_load_ushort
+; GCN-DAG: buffer_load_ushort
define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
entry:
store <2 x i16> %in, <2 x i16> addrspace(1)* %out
ret void
}
-; EG-LABEL: {{^}}v2i32_arg:
+; FUNC-LABEL: {{^}}v2i32_arg:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
-; SI-LABEL: {{^}}v2i32_arg:
; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
+; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
entry:
store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
ret void
}
-; EG-LABEL: {{^}}v2f32_arg:
+; FUNC-LABEL: {{^}}v2f32_arg:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
-; SI-LABEL: {{^}}v2f32_arg:
; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
+; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c
define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
entry:
store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
ret void
}
-; EG-LABEL: {{^}}v3i8_arg:
+; FUNC-LABEL: {{^}}v3i8_arg:
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
-; SI-LABEL: {{^}}v3i8_arg:
define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
entry:
store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
ret void
}
-; EG-LABEL: {{^}}v3i16_arg:
+; FUNC-LABEL: {{^}}v3i16_arg:
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
-; SI-LABEL: {{^}}v3i16_arg:
define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
entry:
store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
ret void
}
-; EG-LABEL: {{^}}v3i32_arg:
+; FUNC-LABEL: {{^}}v3i32_arg:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
-; SI-LABEL: {{^}}v3i32_arg:
; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
+; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
entry:
store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
ret void
}
-; EG-LABEL: {{^}}v3f32_arg:
+; FUNC-LABEL: {{^}}v3f32_arg:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
-; SI-LABEL: {{^}}v3f32_arg:
; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
+; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34
define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
entry:
store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
ret void
}
-; EG-LABEL: {{^}}v4i8_arg:
+; FUNC-LABEL: {{^}}v4i8_arg:
; EG: VTX_READ_8
; EG: VTX_READ_8
; EG: VTX_READ_8
; EG: VTX_READ_8
-; SI-LABEL: {{^}}v4i8_arg:
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
entry:
store <4 x i8> %in, <4 x i8> addrspace(1)* %out
ret void
}
-; EG-LABEL: {{^}}v4i16_arg:
+; FUNC-LABEL: {{^}}v4i16_arg:
; EG: VTX_READ_16
; EG: VTX_READ_16
; EG: VTX_READ_16
; EG: VTX_READ_16
-; SI-LABEL: {{^}}v4i16_arg:
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
entry:
store <4 x i16> %in, <4 x i16> addrspace(1)* %out
ret void
}
-; EG-LABEL: {{^}}v4i32_arg:
+; FUNC-LABEL: {{^}}v4i32_arg:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
-; SI-LABEL: {{^}}v4i32_arg:
; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
+; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
entry:
store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
ret void
}
-; EG-LABEL: {{^}}v4f32_arg:
+; FUNC-LABEL: {{^}}v4f32_arg:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
-; SI-LABEL: {{^}}v4f32_arg:
; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
+; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34
define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
entry:
store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
ret void
}
-; EG-LABEL: {{^}}v8i8_arg:
+; FUNC-LABEL: {{^}}v8i8_arg:
; EG: VTX_READ_8
; EG: VTX_READ_8
; EG: VTX_READ_8
@@ -252,21 +245,20 @@ entry:
; EG: VTX_READ_8
; EG: VTX_READ_8
; EG: VTX_READ_8
-; SI-LABEL: {{^}}v8i8_arg:
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
entry:
store <8 x i8> %in, <8 x i8> addrspace(1)* %out
ret void
}
-; EG-LABEL: {{^}}v8i16_arg:
+; FUNC-LABEL: {{^}}v8i16_arg:
; EG: VTX_READ_16
; EG: VTX_READ_16
; EG: VTX_READ_16
@@ -275,22 +267,21 @@ entry:
; EG: VTX_READ_16
; EG: VTX_READ_16
; EG: VTX_READ_16
-; SI-LABEL: {{^}}v8i16_arg:
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
entry:
store <8 x i16> %in, <8 x i16> addrspace(1)* %out
ret void
}
-; EG-LABEL: {{^}}v8i32_arg:
+; FUNC-LABEL: {{^}}v8i32_arg:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
@@ -299,15 +290,15 @@ entry:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
-; SI-LABEL: {{^}}v8i32_arg:
; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
+; VI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x44
define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
entry:
store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
ret void
}
-; EG-LABEL: {{^}}v8f32_arg:
+; FUNC-LABEL: {{^}}v8f32_arg:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
@@ -316,7 +307,6 @@ entry:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
-; SI-LABEL: {{^}}v8f32_arg:
; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
entry:
@@ -324,7 +314,7 @@ entry:
ret void
}
-; EG-LABEL: {{^}}v16i8_arg:
+; FUNC-LABEL: {{^}}v16i8_arg:
; EG: VTX_READ_8
; EG: VTX_READ_8
; EG: VTX_READ_8
@@ -341,30 +331,29 @@ entry:
; EG: VTX_READ_8
; EG: VTX_READ_8
; EG: VTX_READ_8
-; SI-LABEL: {{^}}v16i8_arg:
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
-; SI: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
entry:
store <16 x i8> %in, <16 x i8> addrspace(1)* %out
ret void
}
-; EG-LABEL: {{^}}v16i16_arg:
+; FUNC-LABEL: {{^}}v16i16_arg:
; EG: VTX_READ_16
; EG: VTX_READ_16
; EG: VTX_READ_16
@@ -381,30 +370,29 @@ entry:
; EG: VTX_READ_16
; EG: VTX_READ_16
; EG: VTX_READ_16
-; SI-LABEL: {{^}}v16i16_arg:
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
entry:
store <16 x i16> %in, <16 x i16> addrspace(1)* %out
ret void
}
-; EG-LABEL: {{^}}v16i32_arg:
+; FUNC-LABEL: {{^}}v16i32_arg:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
@@ -421,15 +409,15 @@ entry:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
-; SI-LABEL: {{^}}v16i32_arg:
; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
+; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64
define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
entry:
store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
ret void
}
-; EG-LABEL: {{^}}v16f32_arg:
+; FUNC-LABEL: {{^}}v16f32_arg:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
@@ -446,8 +434,8 @@ entry:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
-; SI-LABEL: {{^}}v16f32_arg:
; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
+; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64
define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
entry:
store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
@@ -455,18 +443,30 @@ entry:
}
; FUNC-LABEL: {{^}}kernel_arg_i64:
-; SI: s_load_dwordx2
-; SI: s_load_dwordx2
-; SI: buffer_store_dwordx2
+; GCN: s_load_dwordx2
+; GCN: s_load_dwordx2
+; GCN: buffer_store_dwordx2
define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
store i64 %a, i64 addrspace(1)* %out, align 8
ret void
}
+; FUNC-LABEL: {{^}}f64_kernel_arg:
+; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
+; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
+; VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x24
+; VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x2c
+; GCN: buffer_store_dwordx2
+define void @f64_kernel_arg(double addrspace(1)* %out, double %in) {
+entry:
+ store double %in, double addrspace(1)* %out
+ ret void
+}
+
; XFUNC-LABEL: {{^}}kernel_arg_v1i64:
-; XSI: s_load_dwordx2
-; XSI: s_load_dwordx2
-; XSI: buffer_store_dwordx2
+; XGCN: s_load_dwordx2
+; XGCN: s_load_dwordx2
+; XGCN: buffer_store_dwordx2
; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
; ret void
diff --git a/test/CodeGen/R600/large-alloca.ll b/test/CodeGen/R600/large-alloca.ll
index 788816cf723f..671833d1a33a 100644
--- a/test/CodeGen/R600/large-alloca.ll
+++ b/test/CodeGen/R600/large-alloca.ll
@@ -5,10 +5,10 @@
define void @large_alloca(i32 addrspace(1)* %out, i32 %x, i32 %y) nounwind {
%large = alloca [8192 x i32], align 4
- %gep = getelementptr [8192 x i32]* %large, i32 0, i32 8191
+ %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
store i32 %x, i32* %gep
- %gep1 = getelementptr [8192 x i32]* %large, i32 0, i32 %y
- %0 = load i32* %gep1
+ %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
+ %0 = load i32, i32* %gep1
store i32 %0, i32 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/large-constant-initializer.ll b/test/CodeGen/R600/large-constant-initializer.ll
index c8671efbe6f9..9975b1b7f5cc 100644
--- a/test/CodeGen/R600/large-constant-initializer.ll
+++ b/test/CodeGen/R600/large-constant-initializer.ll
@@ -5,7 +5,7 @@
@gv = external unnamed_addr addrspace(2) constant [239 x i32], align 4
define void @opencv_cvtfloat_crash(i32 addrspace(1)* %out, i32 %x) nounwind {
- %val = load i32 addrspace(2)* getelementptr ([239 x i32] addrspace(2)* @gv, i64 0, i64 239), align 4
+ %val = load i32, i32 addrspace(2)* getelementptr ([239 x i32], [239 x i32] addrspace(2)* @gv, i64 0, i64 239), align 4
%mul12 = mul nsw i32 %val, 7
br i1 undef, label %exit, label %bb
diff --git a/test/CodeGen/R600/lds-initializer.ll b/test/CodeGen/R600/lds-initializer.ll
index 7344eff2572f..bf8df63be9fd 100644
--- a/test/CodeGen/R600/lds-initializer.ll
+++ b/test/CodeGen/R600/lds-initializer.ll
@@ -6,8 +6,8 @@
@lds = addrspace(3) global [8 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8]
define void @load_init_lds_global(i32 addrspace(1)* %out, i1 %p) {
- %gep = getelementptr [8 x i32] addrspace(3)* @lds, i32 0, i32 10
- %ld = load i32 addrspace(3)* %gep
+ %gep = getelementptr [8 x i32], [8 x i32] addrspace(3)* @lds, i32 0, i32 10
+ %ld = load i32, i32 addrspace(3)* %gep
store i32 %ld, i32 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/lds-oqap-crash.ll b/test/CodeGen/R600/lds-oqap-crash.ll
index fbcd778de2c2..6ff6fc3d7afc 100644
--- a/test/CodeGen/R600/lds-oqap-crash.ll
+++ b/test/CodeGen/R600/lds-oqap-crash.ll
@@ -12,7 +12,7 @@
; CHECK: {{^}}lds_crash:
define void @lds_crash(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %a, i32 %b, i32 %c) {
entry:
- %0 = load i32 addrspace(3)* %in
+ %0 = load i32, i32 addrspace(3)* %in
; This block needs to be > 115 ISA instructions to hit the bug,
; so we'll use udiv instructions.
%div0 = udiv i32 %0, %b
diff --git a/test/CodeGen/R600/lds-output-queue.ll b/test/CodeGen/R600/lds-output-queue.ll
index cda75b0e0ccc..44ffc36af149 100644
--- a/test/CodeGen/R600/lds-output-queue.ll
+++ b/test/CodeGen/R600/lds-output-queue.ll
@@ -12,12 +12,12 @@
define void @lds_input_queue(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %index) {
entry:
- %0 = getelementptr inbounds [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
- %1 = load i32 addrspace(3)* %0
+ %0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
+ %1 = load i32, i32 addrspace(3)* %0
call void @llvm.AMDGPU.barrier.local()
; This will start a new clause for the vertex fetch
- %2 = load i32 addrspace(1)* %in
+ %2 = load i32, i32 addrspace(1)* %in
%3 = add i32 %1, %2
store i32 %3, i32 addrspace(1)* %out
ret void
@@ -40,9 +40,9 @@ declare void @llvm.AMDGPU.barrier.local()
; load from global memory which immediately follows a load from a global value that
; has been declared in the local memory space:
;
-; %0 = getelementptr inbounds [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
-; %1 = load i32 addrspace(3)* %0
-; %2 = load i32 addrspace(1)* %in
+; %0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
+; %1 = load i32, i32 addrspace(3)* %0
+; %2 = load i32, i32 addrspace(1)* %in
;
; The instruction selection phase will generate ISA that looks like this:
; %OQAP = LDS_READ_RET
@@ -90,9 +90,9 @@ declare void @llvm.AMDGPU.barrier.local()
; CHECK: MOV * T{{[0-9]\.[XYZW]}}, OQAP
define void @local_global_alias(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %0 = getelementptr inbounds [2 x i32] addrspace(3)* @local_mem, i32 0, i32 0
- %1 = load i32 addrspace(3)* %0
- %2 = load i32 addrspace(1)* %in
+ %0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 0
+ %1 = load i32, i32 addrspace(3)* %0
+ %2 = load i32, i32 addrspace(1)* %in
%3 = add i32 %2, %1
store i32 %3, i32 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/lds-size.ll b/test/CodeGen/R600/lds-size.ll
index 5287723ce191..3e8328659fdb 100644
--- a/test/CodeGen/R600/lds-size.ll
+++ b/test/CodeGen/R600/lds-size.ll
@@ -3,9 +3,9 @@
; This test makes sure we do not double count global values when they are
; used in different basic blocks.
-; CHECK-LABEL: {{^}}test:
; CHECK: .long 166120
; CHECK-NEXT: .long 1
+; CHECK-LABEL: {{^}}test:
@lds = internal unnamed_addr addrspace(3) global i32 undef, align 4
define void @test(i32 addrspace(1)* %out, i32 %cond) {
diff --git a/test/CodeGen/R600/lds-zero-initializer.ll b/test/CodeGen/R600/lds-zero-initializer.ll
index 1fb6f52f29b9..fb51bc0e50c2 100644
--- a/test/CodeGen/R600/lds-zero-initializer.ll
+++ b/test/CodeGen/R600/lds-zero-initializer.ll
@@ -6,8 +6,8 @@
@lds = addrspace(3) global [256 x i32] zeroinitializer
define void @load_zeroinit_lds_global(i32 addrspace(1)* %out, i1 %p) {
- %gep = getelementptr [256 x i32] addrspace(3)* @lds, i32 0, i32 10
- %ld = load i32 addrspace(3)* %gep
+ %gep = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds, i32 0, i32 10
+ %ld = load i32, i32 addrspace(3)* %gep
store i32 %ld, i32 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll b/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll
index b9fa8e938ae4..4244c48d240e 100644
--- a/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll
+++ b/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll
@@ -16,7 +16,7 @@ entry:
%0 = icmp eq i32 %in, 5
br i1 %0, label %IF, label %ENDIF
IF:
- %1 = getelementptr i32 addrspace(1)* %out, i32 1
+ %1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
store i32 0, i32 addrspace(1)* %1
br label %ENDIF
diff --git a/test/CodeGen/R600/llvm.AMDGPU.abs.ll b/test/CodeGen/R600/llvm.AMDGPU.abs.ll
index 8bc2583899bd..8bf094b8bc7b 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.abs.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.abs.ll
@@ -28,7 +28,7 @@ define void @s_abs_i32(i32 addrspace(1)* %out, i32 %src) nounwind {
; EG: SUB_INT
; EG: MAX_INT
define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
- %val = load i32 addrspace(1)* %src, align 4
+ %val = load i32, i32 addrspace(1)* %src, align 4
%abs = call i32 @llvm.AMDGPU.abs(i32 %val) nounwind readnone
store i32 %abs, i32 addrspace(1)* %out, align 4
ret void
@@ -42,7 +42,7 @@ define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind
; EG: SUB_INT
; EG: MAX_INT
define void @abs_i32_legacy_amdil(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
- %val = load i32 addrspace(1)* %src, align 4
+ %val = load i32, i32 addrspace(1)* %src, align 4
%abs = call i32 @llvm.AMDIL.abs.i32(i32 %val) nounwind readnone
store i32 %abs, i32 addrspace(1)* %out, align 4
ret void
diff --git a/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll b/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
index a11d9ae7af08..db883972d646 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
@@ -10,14 +10,14 @@
define void @test_barrier_global(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x()
- %1 = getelementptr i32 addrspace(1)* %out, i32 %0
+ %1 = getelementptr i32, i32 addrspace(1)* %out, i32 %0
store i32 %0, i32 addrspace(1)* %1
call void @llvm.AMDGPU.barrier.global()
%2 = call i32 @llvm.r600.read.local.size.x()
%3 = sub i32 %2, 1
%4 = sub i32 %3, %0
- %5 = getelementptr i32 addrspace(1)* %out, i32 %4
- %6 = load i32 addrspace(1)* %5
+ %5 = getelementptr i32, i32 addrspace(1)* %out, i32 %4
+ %6 = load i32, i32 addrspace(1)* %5
store i32 %6, i32 addrspace(1)* %1
ret void
}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll b/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
index 76c2453d089f..48fb2e0b1a8d 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
@@ -11,14 +11,14 @@
define void @test_barrier_local(i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x()
- %1 = getelementptr i32 addrspace(1)* %out, i32 %0
+ %1 = getelementptr i32, i32 addrspace(1)* %out, i32 %0
store i32 %0, i32 addrspace(1)* %1
call void @llvm.AMDGPU.barrier.local()
%2 = call i32 @llvm.r600.read.local.size.x()
%3 = sub i32 %2, 1
%4 = sub i32 %3, %0
- %5 = getelementptr i32 addrspace(1)* %out, i32 %4
- %6 = load i32 addrspace(1)* %5
+ %5 = getelementptr i32, i32 addrspace(1)* %out, i32 %4
+ %6 = load i32, i32 addrspace(1)* %5
store i32 %6, i32 addrspace(1)* %1
ret void
}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
index 2ec2546be39b..1168713ca66e 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
@@ -44,7 +44,7 @@ define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) n
; FUNC-LABEL: {{^}}v_bfe_print_arg:
; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) nounwind {
- %load = load i32 addrspace(1)* %src0, align 4
+ %load = load i32, i32 addrspace(1)* %src0, align 4
%bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 2, i32 8) nounwind readnone
store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
ret void
@@ -75,7 +75,7 @@ define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i
; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; SI: s_endpgm
define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 1, i32 31)
store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -89,20 +89,19 @@ define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 0, i32 31)
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
}
-; FIXME: The shifts should be 1 BFE
; FUNC-LABEL: {{^}}bfe_i32_test_8:
; SI: buffer_load_dword
; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
; SI: s_endpgm
define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -115,7 +114,7 @@ define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1)
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
@@ -127,7 +126,7 @@ define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31)
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
@@ -139,7 +138,7 @@ define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24)
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
@@ -151,7 +150,7 @@ define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8)
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
@@ -162,7 +161,7 @@ define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = ashr i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
@@ -173,7 +172,7 @@ define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = lshr i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
@@ -407,18 +406,14 @@ define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
ret void
}
-; XXX - This should really be a single BFE, but the sext_inreg of the
-; extended type i24 is never custom lowered.
; FUNC-LABEL: {{^}}bfe_sext_in_reg_i24:
; SI: buffer_load_dword [[LOAD:v[0-9]+]],
-; SI: v_lshlrev_b32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
-; SI: v_ashrrev_i32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
-; XSI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8
-; XSI-NOT: SHL
-; XSI-NOT: SHR
-; XSI: buffer_store_dword [[BFE]],
+; SI-NOT: v_lshl
+; SI-NOT: v_ashr
+; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 0, 24
+; SI: buffer_store_dword [[BFE]],
define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24)
%shl = shl i32 %bfe, 8
%ashr = ashr i32 %shl, 8
@@ -434,7 +429,7 @@ define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
; SI: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
; SI: buffer_store_dword [[TMP2]]
define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %src = load i32 addrspace(1)* %in, align 4
+ %src = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %src, i32 1, i32 16) nounwind readnone
%div = sdiv i32 %bfe, 2
store i32 %div, i32 addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
index 6cd0108def2d..541119242a94 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
@@ -65,7 +65,7 @@ define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
- %load = load i8 addrspace(1)* %in
+ %load = load i8, i8 addrspace(1)* %in
%ext = zext i8 %load to i32
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -79,7 +79,7 @@ define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) n
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
%ext = and i32 %add, 255
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
@@ -94,7 +94,7 @@ define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %i
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
%ext = and i32 %add, 65535
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 16)
@@ -108,7 +108,7 @@ define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %
; SI: bfe
; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
%ext = and i32 %add, 255
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 1, i32 8)
@@ -123,7 +123,7 @@ define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspa
; SI-NEXT: bfe
; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
%ext = and i32 %add, 255
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 3, i32 8)
@@ -138,7 +138,7 @@ define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspa
; SI-NEXT: bfe
; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
%ext = and i32 %add, 255
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 7, i32 8)
@@ -152,7 +152,7 @@ define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspa
; SI-NEXT: bfe
; SI: s_endpgm
define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%add = add i32 %load, 1
%ext = and i32 %add, 65535
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 8, i32 8)
@@ -166,14 +166,14 @@ define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrsp
; SI: s_endpgm
; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1,
define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 0, i32 1)
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
}
define void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 8)
store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -181,7 +181,7 @@ define void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
}
define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 1)
store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -196,7 +196,7 @@ define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%shr = lshr i32 %shl, 31
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 31, i32 1)
@@ -211,7 +211,7 @@ define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
; SI: s_endpgm
define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%shr = ashr i32 %shl, 31
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 0, i32 1)
@@ -224,7 +224,7 @@ define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; SI: s_endpgm
define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 1, i32 31)
store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -236,7 +236,7 @@ define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 31)
store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -249,7 +249,7 @@ define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -262,7 +262,7 @@ define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1)
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
@@ -274,7 +274,7 @@ define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31)
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
@@ -286,7 +286,7 @@ define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24)
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
@@ -298,7 +298,7 @@ define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8)
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
@@ -309,7 +309,7 @@ define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = ashr i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
@@ -320,7 +320,7 @@ define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = lshr i32 %x, 31
%bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
@@ -439,7 +439,7 @@ define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
; SI: buffer_store_dword [[VREG]],
; SI: s_endpgm
-; EG-NOT: BFEfppppppppppppp
+; EG-NOT: BFE
define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65536, i32 16, i32 8) nounwind readnone
store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
@@ -568,10 +568,60 @@ define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
i32 addrspace(1)* %out1,
i32 addrspace(1)* %in) nounwind {
- %src = load i32 addrspace(1)* %in, align 4
+ %src = load i32, i32 addrspace(1)* %in, align 4
%and = and i32 %src, 63
%bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %and, i32 2, i32 2) nounwind readnone
store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
store i32 %and, i32 addrspace(1)* %out1, align 4
ret void
}
+
+; FUNC-LABEL: {{^}}lshr_and:
+; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
+; SI: buffer_store_dword
+define void @lshr_and(i32 addrspace(1)* %out, i32 %a) nounwind {
+ %b = lshr i32 %a, 6
+ %c = and i32 %b, 7
+ store i32 %c, i32 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_lshr_and:
+; SI: v_bfe_u32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}, 3
+; SI: buffer_store_dword
+define void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %c = lshr i32 %a, %b
+ %d = and i32 %c, 7
+ store i32 %d, i32 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}and_lshr:
+; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
+; SI: buffer_store_dword
+define void @and_lshr(i32 addrspace(1)* %out, i32 %a) nounwind {
+ %b = and i32 %a, 448
+ %c = lshr i32 %b, 6
+ store i32 %c, i32 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}and_lshr2:
+; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x30006
+; SI: buffer_store_dword
+define void @and_lshr2(i32 addrspace(1)* %out, i32 %a) nounwind {
+ %b = and i32 %a, 511
+ %c = lshr i32 %b, 6
+ store i32 %c, i32 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}shl_lshr:
+; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x150002
+; SI: buffer_store_dword
+define void @shl_lshr(i32 addrspace(1)* %out, i32 %a) nounwind {
+ %b = shl i32 %a, 9
+ %c = lshr i32 %b, 11
+ store i32 %c, i32 addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfm.ll b/test/CodeGen/R600/llvm.AMDGPU.bfm.ll
index 2346f408ec44..50492289d744 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.bfm.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.bfm.ll
@@ -5,7 +5,7 @@
declare i32 @llvm.AMDGPU.bfm(i32, i32) nounwind readnone
; FUNC-LABEL: {{^}}bfm_arg_arg:
-; SI: v_bfm
+; SI: s_bfm_b32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
; EG: BFM_INT
define void @bfm_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
%bfm = call i32 @llvm.AMDGPU.bfm(i32 %src0, i32 %src1) nounwind readnone
@@ -14,7 +14,7 @@ define void @bfm_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind
}
; FUNC-LABEL: {{^}}bfm_arg_imm:
-; SI: v_bfm
+; SI: s_bfm_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x7b
; EG: BFM_INT
define void @bfm_arg_imm(i32 addrspace(1)* %out, i32 %src0) nounwind {
%bfm = call i32 @llvm.AMDGPU.bfm(i32 %src0, i32 123) nounwind readnone
@@ -23,7 +23,7 @@ define void @bfm_arg_imm(i32 addrspace(1)* %out, i32 %src0) nounwind {
}
; FUNC-LABEL: {{^}}bfm_imm_arg:
-; SI: v_bfm
+; SI: s_bfm_b32 {{s[0-9]+}}, 0x7b, {{s[0-9]+}}
; EG: BFM_INT
define void @bfm_imm_arg(i32 addrspace(1)* %out, i32 %src1) nounwind {
%bfm = call i32 @llvm.AMDGPU.bfm(i32 123, i32 %src1) nounwind readnone
@@ -32,10 +32,29 @@ define void @bfm_imm_arg(i32 addrspace(1)* %out, i32 %src1) nounwind {
}
; FUNC-LABEL: {{^}}bfm_imm_imm:
-; SI: v_bfm
+; SI: s_bfm_b32 {{s[0-9]+}}, 0x7b, 0x1c8
; EG: BFM_INT
define void @bfm_imm_imm(i32 addrspace(1)* %out) nounwind {
%bfm = call i32 @llvm.AMDGPU.bfm(i32 123, i32 456) nounwind readnone
store i32 %bfm, i32 addrspace(1)* %out, align 4
ret void
}
+
+; FUNC-LABEL: {{^}}bfm_pattern:
+; SI: s_bfm_b32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+define void @bfm_pattern(i32 addrspace(1)* %out, i32 %x, i32 %y) {
+ %a = shl i32 1, %x
+ %b = sub i32 %a, 1
+ %c = shl i32 %b, %y
+ store i32 %c, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}bfm_pattern_simple:
+; SI: s_bfm_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0
+define void @bfm_pattern_simple(i32 addrspace(1)* %out, i32 %x) {
+ %a = shl i32 1, %x
+ %b = sub i32 %a, 1
+ store i32 %b, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.brev.ll b/test/CodeGen/R600/llvm.AMDGPU.brev.ll
index 3973f539c135..301de4b1c82d 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.brev.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.brev.ll
@@ -21,7 +21,7 @@ define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
- %val = load i32 addrspace(1)* %valptr, align 4
+ %val = load i32, i32 addrspace(1)* %valptr, align 4
%ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
store i32 %ctlz, i32 addrspace(1)* %out, align 4
ret void
diff --git a/test/CodeGen/R600/llvm.AMDGPU.class.ll b/test/CodeGen/R600/llvm.AMDGPU.class.ll
index 974e3c71e622..805a88b59c72 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.class.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.class.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
declare i1 @llvm.AMDGPU.class.f32(float, i32) #1
declare i1 @llvm.AMDGPU.class.f64(double, i32) #1
@@ -134,9 +134,9 @@ define void @test_class_9bit_mask_f32(i32 addrspace(1)* %out, float %a) #0 {
; SI: s_endpgm
define void @v_test_class_full_mask_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.in
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep.in
%result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 511) #1
%sext = sext i1 %result to i32
@@ -152,9 +152,9 @@ define void @v_test_class_full_mask_f32(i32 addrspace(1)* %out, float addrspace(
; SI: s_endpgm
define void @test_class_inline_imm_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %b = load i32 addrspace(1)* %gep.in
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %b = load i32, i32 addrspace(1)* %gep.in
%result = call i1 @llvm.AMDGPU.class.f32(float 1.0, i32 %b) #1
%sext = sext i1 %result to i32
@@ -172,9 +172,9 @@ define void @test_class_inline_imm_constant_dynamic_mask_f32(i32 addrspace(1)* %
; SI: s_endpgm
define void @test_class_lit_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %b = load i32 addrspace(1)* %gep.in
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %b = load i32, i32 addrspace(1)* %gep.in
%result = call i1 @llvm.AMDGPU.class.f32(float 1024.0, i32 %b) #1
%sext = sext i1 %result to i32
@@ -290,9 +290,9 @@ define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 {
; SI: s_endpgm
define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load double addrspace(1)* %in
+ %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load double, double addrspace(1)* %in
%result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 511) #1
%sext = sext i1 %result to i32
@@ -306,9 +306,9 @@ define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace
; SI: s_endpgm
define void @test_class_inline_imm_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %b = load i32 addrspace(1)* %gep.in
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %b = load i32, i32 addrspace(1)* %gep.in
%result = call i1 @llvm.AMDGPU.class.f64(double 1.0, i32 %b) #1
%sext = sext i1 %result to i32
@@ -321,9 +321,9 @@ define void @test_class_inline_imm_constant_dynamic_mask_f64(i32 addrspace(1)* %
; SI: s_endpgm
define void @test_class_lit_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %b = load i32 addrspace(1)* %gep.in
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %b = load i32, i32 addrspace(1)* %gep.in
%result = call i1 @llvm.AMDGPU.class.f64(double 1024.0, i32 %b) #1
%sext = sext i1 %result to i32
@@ -338,9 +338,9 @@ define void @test_class_lit_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i3
; SI: s_endpgm
define void @test_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.in
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep.in
%class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
%class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 3) #1
@@ -358,9 +358,9 @@ define void @test_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)
; SI: s_endpgm
define void @test_fold_or3_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.in
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep.in
%class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
%class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1
@@ -381,9 +381,9 @@ define void @test_fold_or3_class_f32_0(i32 addrspace(1)* %out, float addrspace(1
; SI: s_endpgm
define void @test_fold_or_all_tests_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.in
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep.in
%class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
%class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1
@@ -416,9 +416,9 @@ define void @test_fold_or_all_tests_class_f32_0(i32 addrspace(1)* %out, float ad
; SI: s_endpgm
define void @test_fold_or_class_f32_1(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.in
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep.in
%class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
%class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 8) #1
@@ -436,9 +436,9 @@ define void @test_fold_or_class_f32_1(i32 addrspace(1)* %out, float addrspace(1)
; SI: s_endpgm
define void @test_fold_or_class_f32_2(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.in
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep.in
%class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1
%class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1
@@ -456,9 +456,9 @@ define void @test_fold_or_class_f32_2(i32 addrspace(1)* %out, float addrspace(1)
; SI: s_endpgm
define void @test_no_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in, float %b) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() #1
- %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load float addrspace(1)* %gep.in
+ %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep.in
%class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
%class1 = call i1 @llvm.AMDGPU.class.f32(float %b, i32 8) #1
@@ -472,7 +472,7 @@ define void @test_no_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace
; SI-LABEL: {{^}}test_class_0_f32:
; SI-NOT: v_cmp_class
; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
-; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_0_f32(i32 addrspace(1)* %out, float %a) #0 {
%result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 0) #1
diff --git a/test/CodeGen/R600/llvm.AMDGPU.cube.ll b/test/CodeGen/R600/llvm.AMDGPU.cube.ll
index aa07afdebea6..e95a51093cb7 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.cube.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.cube.ll
@@ -8,15 +8,15 @@
; CHECK: CUBE * T{{[0-9]}}.W
define void @cube() #0 {
main_body:
- %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
%1 = extractelement <4 x float> %0, i32 3
- %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
%3 = extractelement <4 x float> %2, i32 0
%4 = fdiv float %3, %1
- %5 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
%6 = extractelement <4 x float> %5, i32 1
%7 = fdiv float %6, %1
- %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
%9 = extractelement <4 x float> %8, i32 2
%10 = fdiv float %9, %1
%11 = insertelement <4 x float> undef, float %4, i32 0
diff --git a/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll b/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
index 799817e01096..8b32f696449e 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
@@ -9,7 +9,7 @@ declare float @llvm.AMDGPU.cvt.f32.ubyte3(i32) nounwind readnone
; SI-LABEL: {{^}}test_unpack_byte0_to_float:
; SI: v_cvt_f32_ubyte0
define void @test_unpack_byte0_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte0(i32 %val) nounwind readnone
store float %cvt, float addrspace(1)* %out, align 4
ret void
@@ -18,7 +18,7 @@ define void @test_unpack_byte0_to_float(float addrspace(1)* %out, i32 addrspace(
; SI-LABEL: {{^}}test_unpack_byte1_to_float:
; SI: v_cvt_f32_ubyte1
define void @test_unpack_byte1_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte1(i32 %val) nounwind readnone
store float %cvt, float addrspace(1)* %out, align 4
ret void
@@ -27,7 +27,7 @@ define void @test_unpack_byte1_to_float(float addrspace(1)* %out, i32 addrspace(
; SI-LABEL: {{^}}test_unpack_byte2_to_float:
; SI: v_cvt_f32_ubyte2
define void @test_unpack_byte2_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte2(i32 %val) nounwind readnone
store float %cvt, float addrspace(1)* %out, align 4
ret void
@@ -36,7 +36,7 @@ define void @test_unpack_byte2_to_float(float addrspace(1)* %out, i32 addrspace(
; SI-LABEL: {{^}}test_unpack_byte3_to_float:
; SI: v_cvt_f32_ubyte3
define void @test_unpack_byte3_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%cvt = call float @llvm.AMDGPU.cvt.f32.ubyte3(i32 %val) nounwind readnone
store float %cvt, float addrspace(1)* %out, align 4
ret void
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
index 52d0519ef277..55ca9c7536e5 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
@@ -1,25 +1,29 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone
declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone
-; SI-LABEL: {{^}}test_div_fixup_f32:
+; GCN-LABEL: {{^}}test_div_fixup_f32:
; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
-; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
-; SI: v_div_fixup_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; VI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x34
+; VI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
+; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
+; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; GCN: v_div_fixup_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
%result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone
store float %result, float addrspace(1)* %out, align 4
ret void
}
-; SI-LABEL: {{^}}test_div_fixup_f64:
-; SI: v_div_fixup_f64
+; GCN-LABEL: {{^}}test_div_fixup_f64:
+; GCN: v_div_fixup_f64
define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
%result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
index bbe910a45e8f..bcb7f870f1f4 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
@@ -9,13 +9,16 @@ declare float @llvm.AMDGPU.div.fmas.f32(float, float, float, i1) nounwind readno
declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind readnone
; GCN-LABEL: {{^}}test_div_fmas_f32:
-; GCN-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; GCN-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; GCN-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; VI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x34
+; VI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
; GCN-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
; GCN-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[SA]]
-; GCN: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[VA]], [[VB]], [[VC]]
+; GCN: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[VB]], [[VA]], [[VC]]
; GCN: buffer_store_dword [[RESULT]],
; GCN: s_endpgm
define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
@@ -24,8 +27,50 @@ define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, flo
ret void
}
-; SI-LABEL: {{^}}test_div_fmas_f64:
-; SI: v_div_fmas_f64
+; GCN-LABEL: {{^}}test_div_fmas_f32_inline_imm_0:
+; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
+; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], 1.0, [[VB]], [[VC]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
+ %result = call float @llvm.AMDGPU.div.fmas.f32(float 1.0, float %b, float %c, i1 %d) nounwind readnone
+ store float %result, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_div_fmas_f32_inline_imm_1:
+; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
+; SI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[SA]]
+; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], 1.0, [[VA]], [[VC]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
+ %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float 1.0, float %c, i1 %d) nounwind readnone
+ store float %result, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_div_fmas_f32_inline_imm_2:
+; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[SA]]
+; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[VA]], [[VB]], 1.0
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
+ %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float 1.0, i1 %d) nounwind readnone
+ store float %result, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}test_div_fmas_f64:
+; GCN: v_div_fmas_f64
define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) nounwind {
%result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
@@ -33,7 +78,7 @@ define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b,
}
; GCN-LABEL: {{^}}test_div_fmas_f32_cond_to_vcc:
-; SI: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0
+; SI: v_cmp_eq_i32_e64 vcc, 0, s{{[0-9]+}}
; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
define void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c, i32 %i) nounwind {
%cmp = icmp eq i32 %i, 0
@@ -61,25 +106,25 @@ define void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspace(1)* %out, fl
}
; GCN-LABEL: {{^}}test_div_fmas_f32_logical_cond_to_vcc:
-; SI-DAG: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0
-; SI-DAG: v_cmp_ne_i32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0
-; SI: s_and_b64 vcc, [[CMP0]], [[CMP1]]
; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
; SI-DAG: buffer_load_dword [[C:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
-; SI: v_div_fmas_f32 {{v[0-9]+}}, [[B]], [[A]], [[C]]
+; SI-DAG: v_cmp_eq_i32_e32 [[CMP0:vcc]], 0, v{{[0-9]+}}
+; SI-DAG: v_cmp_ne_i32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}}
+; SI: s_and_b64 vcc, [[CMP0]], [[CMP1]]
+; SI: v_div_fmas_f32 {{v[0-9]+}}, [[A]], [[B]], [[C]]
; SI: s_endpgm
define void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 %d) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.a = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.b = getelementptr float addrspace(1)* %gep.a, i32 1
- %gep.c = getelementptr float addrspace(1)* %gep.a, i32 2
- %gep.out = getelementptr float addrspace(1)* %out, i32 2
+ %gep.a = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1
+ %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 2
- %a = load float addrspace(1)* %gep.a
- %b = load float addrspace(1)* %gep.b
- %c = load float addrspace(1)* %gep.c
+ %a = load float, float addrspace(1)* %gep.a
+ %b = load float, float addrspace(1)* %gep.b
+ %c = load float, float addrspace(1)* %gep.c
%cmp0 = icmp eq i32 %tid, 0
%cmp1 = icmp ne i32 %d, 0
@@ -91,17 +136,17 @@ define void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, flo
}
; GCN-LABEL: {{^}}test_div_fmas_f32_i1_phi_vcc:
-; SI: v_cmp_eq_i32_e64 [[CMPTID:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0
-; SI: s_and_saveexec_b64 [[CMPTID]], [[CMPTID]]
-; SI: s_xor_b64 [[CMPTID]], exec, [[CMPTID]]
+; SI: v_cmp_eq_i32_e32 vcc, 0, v{{[0-9]+}}
+; SI: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], vcc
+; SI: s_xor_b64 [[SAVE]], exec, [[SAVE]]
; SI: buffer_load_dword [[LOAD:v[0-9]+]]
-; SI: v_cmp_ne_i32_e64 [[CMPLOAD:s\[[0-9]+:[0-9]+\]]], [[LOAD]], 0
-; SI: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, [[CMPLOAD]]
+; SI: v_cmp_ne_i32_e32 vcc, 0, [[LOAD]]
+; SI: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
-; SI: BB6_2:
-; SI: s_or_b64 exec, exec, [[CMPTID]]
+; SI: BB9_2:
+; SI: s_or_b64 exec, exec, [[SAVE]]
; SI: v_cmp_ne_i32_e32 vcc, 0, v0
; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
; SI: buffer_store_dword
@@ -109,20 +154,20 @@ define void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, flo
define void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 addrspace(1)* %dummy) nounwind {
entry:
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.out = getelementptr float addrspace(1)* %out, i32 2
- %gep.a = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.b = getelementptr float addrspace(1)* %gep.a, i32 1
- %gep.c = getelementptr float addrspace(1)* %gep.a, i32 2
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 2
+ %gep.a = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1
+ %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2
- %a = load float addrspace(1)* %gep.a
- %b = load float addrspace(1)* %gep.b
- %c = load float addrspace(1)* %gep.c
+ %a = load float, float addrspace(1)* %gep.a
+ %b = load float, float addrspace(1)* %gep.b
+ %c = load float, float addrspace(1)* %gep.c
%cmp0 = icmp eq i32 %tid, 0
br i1 %cmp0, label %bb, label %exit
bb:
- %val = load i32 addrspace(1)* %dummy
+ %val = load i32, i32 addrspace(1)* %dummy
%cmp1 = icmp ne i32 %val, 0
br label %exit
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
index 5773da0bb2e4..de830de039c7 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
@@ -13,11 +13,11 @@ declare float @llvm.fabs.f32(float) nounwind readnone
; SI: s_endpgm
define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
@@ -33,11 +33,11 @@ define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)*
; SI: s_endpgm
define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
@@ -53,11 +53,11 @@ define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)*
; SI: s_endpgm
define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
- %a = load double addrspace(1)* %gep.0, align 8
- %b = load double addrspace(1)* %gep.1, align 8
+ %a = load double, double addrspace(1)* %gep.0, align 8
+ %b = load double, double addrspace(1)* %gep.1, align 8
%result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
@@ -73,11 +73,11 @@ define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)
; SI: s_endpgm
define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
- %a = load double addrspace(1)* %gep.0, align 8
- %b = load double addrspace(1)* %gep.1, align 8
+ %a = load double, double addrspace(1)* %gep.0, align 8
+ %b = load double, double addrspace(1)* %gep.1, align 8
%result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
@@ -93,9 +93,9 @@ define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)
; SI: s_endpgm
define void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
- %b = load float addrspace(1)* %gep, align 4
+ %b = load float, float addrspace(1)* %gep, align 4
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
@@ -111,9 +111,9 @@ define void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float add
; SI: s_endpgm
define void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
- %b = load float addrspace(1)* %gep, align 4
+ %b = load float, float addrspace(1)* %gep, align 4
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
@@ -129,9 +129,9 @@ define void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float add
; SI: s_endpgm
define void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
- %a = load float addrspace(1)* %gep, align 4
+ %a = load float, float addrspace(1)* %gep, align 4
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
@@ -147,9 +147,9 @@ define void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float add
; SI: s_endpgm
define void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr float addrspace(1)* %in, i32 %tid
+ %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
- %a = load float addrspace(1)* %gep, align 4
+ %a = load float, float addrspace(1)* %gep, align 4
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
@@ -165,9 +165,9 @@ define void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float add
; SI: s_endpgm
define void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
- %b = load double addrspace(1)* %gep, align 8
+ %b = load double, double addrspace(1)* %gep, align 8
%result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
@@ -183,9 +183,9 @@ define void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double a
; SI: s_endpgm
define void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
- %b = load double addrspace(1)* %gep, align 8
+ %b = load double, double addrspace(1)* %gep, align 8
%result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
@@ -201,9 +201,9 @@ define void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double a
; SI: s_endpgm
define void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
- %a = load double addrspace(1)* %gep, align 8
+ %a = load double, double addrspace(1)* %gep, align 8
%result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
@@ -219,9 +219,9 @@ define void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double a
; SI: s_endpgm
define void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr double addrspace(1)* %in, i32 %tid
+ %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
- %a = load double addrspace(1)* %gep, align 8
+ %a = load double, double addrspace(1)* %gep, align 8
%result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
%result0 = extractvalue { double, i1 } %result, 0
@@ -294,8 +294,8 @@ define void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, double %
; SI: s_endpgm
define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %a = load float addrspace(1)* %gep.0, align 4
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %a = load float, float addrspace(1)* %gep.0, align 4
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
@@ -310,8 +310,8 @@ define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float a
; SI: s_endpgm
define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %a = load float addrspace(1)* %gep.0, align 4
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %a = load float, float addrspace(1)* %gep.0, align 4
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone
%result0 = extractvalue { float, i1 } %result, 0
@@ -327,11 +327,11 @@ define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float a
; SI: s_endpgm
define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
@@ -349,11 +349,11 @@ define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspa
; SI: s_endpgm
define void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
- %a = load float addrspace(1)* %gep.0, align 4
- %b = load float addrspace(1)* %gep.1, align 4
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
%b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.flbit.i32.ll b/test/CodeGen/R600/llvm.AMDGPU.flbit.i32.ll
index 19fbee8913b4..20c7af8ade5e 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.flbit.i32.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.flbit.i32.ll
@@ -21,7 +21,7 @@ define void @s_flbit(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
define void @v_flbit(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
- %val = load i32 addrspace(1)* %valptr, align 4
+ %val = load i32, i32 addrspace(1)* %valptr, align 4
%r = call i32 @llvm.AMDGPU.flbit.i32(i32 %val) nounwind readnone
store i32 %r, i32 addrspace(1)* %out, align 4
ret void
diff --git a/test/CodeGen/R600/llvm.AMDGPU.fract.f64.ll b/test/CodeGen/R600/llvm.AMDGPU.fract.f64.ll
new file mode 100644
index 000000000000..e098dd35d6da
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.fract.f64.ll
@@ -0,0 +1,60 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=FUNC %s
+
+declare double @llvm.fabs.f64(double %Val)
+declare double @llvm.AMDGPU.fract.f64(double) nounwind readnone
+
+; FUNC-LABEL: {{^}}fract_f64:
+; GCN: v_fract_f64_e32 [[FRC:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]
+; SI: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
+; SI: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
+; SI: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
+; SI: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
+; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[LO]], v[[MINLO]], [[COND]]
+; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[HI]], v[[MINHI]], [[COND]]
+; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]]
+; CI: buffer_store_dwordx2 [[FRC]]
+define void @fract_f64(double addrspace(1)* %out, double addrspace(1)* %src) nounwind {
+ %val = load double, double addrspace(1)* %src, align 4
+ %fract = call double @llvm.AMDGPU.fract.f64(double %val) nounwind readnone
+ store double %fract, double addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fract_f64_neg:
+; GCN: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]
+; SI: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
+; SI: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
+; SI: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
+; SI: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
+; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[LO]], v[[MINLO]], [[COND]]
+; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[HI]], v[[MINHI]], [[COND]]
+; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]]
+; CI: buffer_store_dwordx2 [[FRC]]
+define void @fract_f64_neg(double addrspace(1)* %out, double addrspace(1)* %src) nounwind {
+ %val = load double, double addrspace(1)* %src, align 4
+ %neg = fsub double 0.0, %val
+ %fract = call double @llvm.AMDGPU.fract.f64(double %neg) nounwind readnone
+ store double %fract, double addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}fract_f64_neg_abs:
+; GCN: v_fract_f64_e64 [[FRC:v\[[0-9]+:[0-9]+\]]], -|v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]]|
+; SI: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
+; SI: v_mov_b32_e32 v[[UPHI:[0-9]+]], 0x3fefffff
+; SI: v_min_f64 v{{\[}}[[MINLO:[0-9]+]]:[[MINHI:[0-9]+]]], v{{\[}}[[UPLO]]:[[UPHI]]], [[FRC]]
+; SI: v_cmp_class_f64_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO]]:[[HI]]], 3
+; SI: v_cndmask_b32_e64 v[[RESLO:[0-9]+]], v[[LO]], v[[MINLO]], [[COND]]
+; SI: v_cndmask_b32_e64 v[[RESHI:[0-9]+]], v[[HI]], v[[MINHI]], [[COND]]
+; SI: buffer_store_dwordx2 v{{\[}}[[RESLO]]:[[RESHI]]]
+; CI: buffer_store_dwordx2 [[FRC]]
+define void @fract_f64_neg_abs(double addrspace(1)* %out, double addrspace(1)* %src) nounwind {
+ %val = load double, double addrspace(1)* %src, align 4
+ %abs = call double @llvm.fabs.f64(double %val)
+ %neg = fsub double 0.0, %abs
+ %fract = call double @llvm.AMDGPU.fract.f64(double %neg) nounwind readnone
+ store double %fract, double addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.fract.ll b/test/CodeGen/R600/llvm.AMDGPU.fract.ll
index ef89742441c6..7501b4b75465 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.fract.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.fract.ll
@@ -16,7 +16,7 @@ declare float @llvm.AMDIL.fraction.f32(float) nounwind readnone
; GCN: buffer_store_dword [[RESULT]]
; EG: FRACT
define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
- %val = load float addrspace(1)* %src, align 4
+ %val = load float, float addrspace(1)* %src, align 4
%fract = call float @llvm.AMDGPU.fract.f32(float %val) nounwind readnone
store float %fract, float addrspace(1)* %out, align 4
ret void
@@ -29,7 +29,7 @@ define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) nounw
; GCN: buffer_store_dword [[RESULT]]
; EG: FRACT
define void @fract_f32_legacy_amdil(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
- %val = load float addrspace(1)* %src, align 4
+ %val = load float, float addrspace(1)* %src, align 4
%fract = call float @llvm.AMDIL.fraction.f32(float %val) nounwind readnone
store float %fract, float addrspace(1)* %out, align 4
ret void
@@ -42,7 +42,7 @@ define void @fract_f32_legacy_amdil(float addrspace(1)* %out, float addrspace(1)
; GCN: buffer_store_dword [[RESULT]]
; EG: FRACT
define void @fract_f32_neg(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
- %val = load float addrspace(1)* %src, align 4
+ %val = load float, float addrspace(1)* %src, align 4
%neg = fsub float 0.0, %val
%fract = call float @llvm.AMDGPU.fract.f32(float %neg) nounwind readnone
store float %fract, float addrspace(1)* %out, align 4
@@ -56,7 +56,7 @@ define void @fract_f32_neg(float addrspace(1)* %out, float addrspace(1)* %src) n
; GCN: buffer_store_dword [[RESULT]]
; EG: FRACT
define void @fract_f32_neg_abs(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
- %val = load float addrspace(1)* %src, align 4
+ %val = load float, float addrspace(1)* %src, align 4
%abs = call float @llvm.fabs.f32(float %val)
%neg = fsub float 0.0, %abs
%fract = call float @llvm.AMDGPU.fract.f32(float %neg) nounwind readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imax.ll b/test/CodeGen/R600/llvm.AMDGPU.imax.ll
index ce7fca056a02..46662f96c290 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.imax.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.imax.ll
@@ -5,7 +5,7 @@
; SI: v_max_i32_e32
define void @vector_imax(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
main_body:
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%max = call i32 @llvm.AMDGPU.imax(i32 %p0, i32 %load)
%bc = bitcast i32 %max to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imin.ll b/test/CodeGen/R600/llvm.AMDGPU.imin.ll
index 15cd38b19d7e..34b454e23755 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.imin.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.imin.ll
@@ -5,7 +5,7 @@
; SI: v_min_i32_e32
define void @vector_imin(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
main_body:
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%min = call i32 @llvm.AMDGPU.imin(i32 %p0, i32 %load)
%bc = bitcast i32 %min to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
diff --git a/test/CodeGen/R600/llvm.AMDGPU.kill.ll b/test/CodeGen/R600/llvm.AMDGPU.kill.ll
index 30b0fc2bd73b..057708e7b5cc 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.kill.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.kill.ll
@@ -18,8 +18,8 @@ main_body:
; SI-LABEL: {{^}}kill_vcc_implicit_def:
; SI-NOT: v_cmp_gt_f32_e32 vcc,
+; SI: v_cmp_gt_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], 0, v{{[0-9]+}}
; SI: v_cmpx_le_f32_e32 vcc, 0, v{{[0-9]+}}
-; SI: v_cmp_lt_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0, [[CMP]]
define void @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #1 {
entry:
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll
index 4318aeaac786..67f1d22c7178 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll
@@ -1,9 +1,21 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone
; FUNC-LABEL: {{^}}rsq_clamped_f64:
; SI: v_rsq_clamp_f64_e32
+
+; VI: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[2:3]
+; TODO: this constant should be folded:
+; VI: s_mov_b32 s[[ALLBITS:[0-9+]]], -1
+; VI: s_mov_b32 s[[HIGH1:[0-9+]]], 0x7fefffff
+; VI: s_mov_b32 s[[LOW1:[0-9+]]], s[[ALLBITS]]
+; VI: v_min_f64 v[0:1], [[RSQ]], s{{\[}}[[LOW1]]:[[HIGH1]]]
+; VI: s_mov_b32 s[[HIGH2:[0-9+]]], 0xffefffff
+; VI: s_mov_b32 s[[LOW2:[0-9+]]], s[[ALLBITS]]
+; VI: v_max_f64 v[0:1], v[0:1], s{{\[}}[[LOW2]]:[[HIGH2]]]
+
define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind {
%rsq_clamped = call double @llvm.AMDGPU.rsq.clamped.f64(double %src) nounwind readnone
store double %rsq_clamped, double addrspace(1)* %out, align 8
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll
index 9336baffc97f..eeff2536b232 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll
@@ -1,4 +1,5 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
@@ -6,7 +7,15 @@ declare float @llvm.AMDGPU.rsq.clamped.f32(float) nounwind readnone
; FUNC-LABEL: {{^}}rsq_clamped_f32:
; SI: v_rsq_clamp_f32_e32
+
+; VI: v_rsq_f32_e32 [[RSQ:v[0-9]+]], {{s[0-9]+}}
+; VI: v_min_f32_e32 [[MIN:v[0-9]+]], 0x7f7fffff, [[RSQ]]
+; TODO: this constant should be folded:
+; VI: v_mov_b32_e32 [[MINFLT:v[0-9]+]], 0xff7fffff
+; VI: v_max_f32_e32 {{v[0-9]+}}, [[MIN]], [[MINFLT]]
+
; EG: RECIPSQRT_CLAMPED
+
define void @rsq_clamped_f32(float addrspace(1)* %out, float %src) nounwind {
%rsq_clamped = call float @llvm.AMDGPU.rsq.clamped.f32(float %src) nounwind readnone
store float %rsq_clamped, float addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/llvm.AMDGPU.tex.ll b/test/CodeGen/R600/llvm.AMDGPU.tex.ll
index aac014bde456..10206609bb57 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.tex.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.tex.ll
@@ -18,7 +18,7 @@
;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN
define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
- %addr = load <4 x float> addrspace(1)* %in
+ %addr = load <4 x float>, <4 x float> addrspace(1)* %in
%res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %addr, i32 0, i32 0, i32 1)
%res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res1, i32 0, i32 0, i32 2)
%res3 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res2, i32 0, i32 0, i32 3)
diff --git a/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
index 5829f7348df9..6b546a7e17c1 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
@@ -10,8 +10,8 @@ declare double @llvm.AMDGPU.trig.preop.f64(double, i32) nounwind readnone
; SI: buffer_store_dwordx2 [[RESULT]],
; SI: s_endpgm
define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %a = load double addrspace(1)* %aptr, align 8
- %b = load i32 addrspace(1)* %bptr, align 4
+ %a = load double, double addrspace(1)* %aptr, align 8
+ %b = load i32, i32 addrspace(1)* %bptr, align 4
%result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 %b) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
ret void
@@ -23,7 +23,7 @@ define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)*
; SI: buffer_store_dwordx2 [[RESULT]],
; SI: s_endpgm
define void @test_trig_preop_f64_imm_segment(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind {
- %a = load double addrspace(1)* %aptr, align 8
+ %a = load double, double addrspace(1)* %aptr, align 8
%result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 7) nounwind readnone
store double %result, double addrspace(1)* %out, align 8
ret void
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
index 88613db2161f..77a073b0cb03 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
@@ -25,12 +25,12 @@ define void @test_umad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2
; SI: buffer_store_dword [[RESULT]]
define void @commute_umad24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %out.gep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %src0.gep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %src2.gep = getelementptr i32 addrspace(1)* %src0.gep, i32 1
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %src0.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %src2.gep = getelementptr i32, i32 addrspace(1)* %src0.gep, i32 1
- %src0 = load i32 addrspace(1)* %src0.gep, align 4
- %src2 = load i32 addrspace(1)* %src2.gep, align 4
+ %src0 = load i32, i32 addrspace(1)* %src0.gep, align 4
+ %src2 = load i32, i32 addrspace(1)* %src2.gep, align 4
%mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 4, i32 %src2) nounwind readnone
store i32 %mad, i32 addrspace(1)* %out.gep, align 4
ret void
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umax.ll b/test/CodeGen/R600/llvm.AMDGPU.umax.ll
index 4320dfe669d8..a97d103016d3 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.umax.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.umax.ll
@@ -5,7 +5,7 @@
; SI: v_max_u32_e32
define void @vector_umax(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
main_body:
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%max = call i32 @llvm.AMDGPU.umax(i32 %p0, i32 %load)
%bc = bitcast i32 %max to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
@@ -28,7 +28,7 @@ entry:
; SI-NOT: and
; SI: buffer_store_short [[RESULT]],
define void @trunc_zext_umax(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
- %tmp5 = load i8 addrspace(1)* %src, align 1
+ %tmp5 = load i8, i8 addrspace(1)* %src, align 1
%tmp2 = zext i8 %tmp5 to i32
%tmp3 = tail call i32 @llvm.AMDGPU.umax(i32 %tmp2, i32 0) nounwind readnone
%tmp4 = trunc i32 %tmp3 to i8
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umin.ll b/test/CodeGen/R600/llvm.AMDGPU.umin.ll
index e4cac33a07a7..2acd10e0c631 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.umin.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.umin.ll
@@ -5,7 +5,7 @@
; SI: v_min_u32_e32
define void @vector_umin(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
main_body:
- %load = load i32 addrspace(1)* %in, align 4
+ %load = load i32, i32 addrspace(1)* %in, align 4
%min = call i32 @llvm.AMDGPU.umin(i32 %p0, i32 %load)
%bc = bitcast i32 %min to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
@@ -28,7 +28,7 @@ entry:
; SI-NOT: and
; SI: buffer_store_short [[RESULT]],
define void @trunc_zext_umin(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
- %tmp5 = load i8 addrspace(1)* %src, align 1
+ %tmp5 = load i8, i8 addrspace(1)* %src, align 1
%tmp2 = zext i8 %tmp5 to i32
%tmp3 = tail call i32 @llvm.AMDGPU.umin(i32 %tmp2, i32 0) nounwind readnone
%tmp4 = trunc i32 %tmp3 to i8
diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.ll b/test/CodeGen/R600/llvm.SI.fs.interp.ll
index 9f87a41de247..3d05da616e4e 100644
--- a/test/CodeGen/R600/llvm.SI.fs.interp.ll
+++ b/test/CodeGen/R600/llvm.SI.fs.interp.ll
@@ -1,11 +1,13 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s
+;RUN: llc < %s -march=amdgcn -mcpu=kabini -verify-machineinstrs | FileCheck --check-prefix=GCN --check-prefix=16BANK %s
+;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s
-;CHECK-NOT: s_wqm
-;CHECK: s_mov_b32
-;CHECK: v_interp_p1_f32
-;CHECK: v_interp_p2_f32
-;CHECK: v_interp_mov_f32
+;GCN-LABEL: {{^}}main:
+;GCN-NOT: s_wqm
+;GCN: s_mov_b32
+;GCN-NEXT: v_interp_mov_f32
+;GCN: v_interp_p1_f32
+;GCN: v_interp_p2_f32
define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) #0 {
main_body:
@@ -16,7 +18,33 @@ main_body:
ret void
}
-declare void @llvm.AMDGPU.shader.type(i32)
+; Thest that v_interp_p1 uses different source and destination registers
+; on 16 bank LDS chips.
+
+; 16BANK-LABEL: {{^}}v_interp_p1_bank16_bug:
+; 16BANK-NOT: v_interp_p1_f32 [[DST:v[0-9]+]], [[DST]]
+
+define void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
+main_body:
+ %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
+ %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
+ %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
+ %25 = call float @fabs(float %22)
+ %26 = call float @fabs(float %23)
+ %27 = call float @fabs(float %24)
+ %28 = call i32 @llvm.SI.packf16(float %25, float %26)
+ %29 = bitcast i32 %28 to float
+ %30 = call i32 @llvm.SI.packf16(float %27, float 1.000000e+00)
+ %31 = bitcast i32 %30 to float
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31)
+ ret void
+}
+
+; Function Attrs: readnone
+declare float @fabs(float) #2
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.SI.packf16(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1
@@ -28,3 +56,4 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
+attributes #2 = { readnone }
diff --git a/test/CodeGen/R600/llvm.SI.imageload.ll b/test/CodeGen/R600/llvm.SI.imageload.ll
index 35e4591bb1fa..b67716c3b665 100644
--- a/test/CodeGen/R600/llvm.SI.imageload.ll
+++ b/test/CodeGen/R600/llvm.SI.imageload.ll
@@ -88,16 +88,16 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
; CHECK: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}
define void @vgpr_coords(float addrspace(2)* addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
- %20 = getelementptr float addrspace(2)* addrspace(2)* %0, i32 0
- %21 = load float addrspace(2)* addrspace(2)* %20, !tbaa !2
- %22 = getelementptr float addrspace(2)* %21, i32 0
- %23 = load float addrspace(2)* %22, !tbaa !2, !invariant.load !1
- %24 = getelementptr float addrspace(2)* %21, i32 1
- %25 = load float addrspace(2)* %24, !tbaa !2, !invariant.load !1
- %26 = getelementptr float addrspace(2)* %21, i32 4
- %27 = load float addrspace(2)* %26, !tbaa !2, !invariant.load !1
- %28 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
- %29 = load <32 x i8> addrspace(2)* %28, !tbaa !2
+ %20 = getelementptr float addrspace(2)*, float addrspace(2)* addrspace(2)* %0, i32 0
+ %21 = load float addrspace(2)*, float addrspace(2)* addrspace(2)* %20, !tbaa !2
+ %22 = getelementptr float, float addrspace(2)* %21, i32 0
+ %23 = load float, float addrspace(2)* %22, !tbaa !2, !invariant.load !1
+ %24 = getelementptr float, float addrspace(2)* %21, i32 1
+ %25 = load float, float addrspace(2)* %24, !tbaa !2, !invariant.load !1
+ %26 = getelementptr float, float addrspace(2)* %21, i32 4
+ %27 = load float, float addrspace(2)* %26, !tbaa !2, !invariant.load !1
+ %28 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %2, i32 0
+ %29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, !tbaa !2
%30 = bitcast float %27 to i32
%31 = bitcast float %23 to i32
%32 = bitcast float %25 to i32
diff --git a/test/CodeGen/R600/llvm.SI.load.dword.ll b/test/CodeGen/R600/llvm.SI.load.dword.ll
index 8c8f2eed7d9d..f6c258539d5b 100644
--- a/test/CodeGen/R600/llvm.SI.load.dword.ll
+++ b/test/CodeGen/R600/llvm.SI.load.dword.ll
@@ -1,29 +1,41 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=verde -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s
; Example of a simple geometry shader loading vertex attributes from the
; ESGS ring buffer
-; CHECK-LABEL: {{^}}main:
-; CHECK: buffer_load_dword
-; CHECK: buffer_load_dword
-; CHECK: buffer_load_dword
-; CHECK: buffer_load_dword
+; FIXME: Out of bounds immediate offset crashes
-define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [2 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32, i32, i32, i32) #0 {
+; CHECK-LABEL: {{^}}main:
+; CHECK: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 glc slc
+; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen glc slc
+; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen glc slc
+; CHECK: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen offen glc slc
+; CHECK: s_movk_i32 [[K:s[0-9]+]], 0x4d2 ; encoding
+; CHECK: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, [[K]] idxen offen offset:65535 glc slc
+
+define void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, [2 x <16 x i8>] addrspace(2)* byval %arg3, [17 x <16 x i8>] addrspace(2)* inreg %arg4, [17 x <16 x i8>] addrspace(2)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) #0 {
main_body:
- %10 = getelementptr [2 x <16 x i8>] addrspace(2)* %3, i64 0, i32 1
- %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0
- %12 = shl i32 %6, 2
- %13 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0)
- %14 = bitcast i32 %13 to float
- %15 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0)
- %16 = bitcast i32 %15 to float
- %17 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0)
- %18 = bitcast i32 %17 to float
- %19 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %11, <2 x i32> <i32 0, i32 0>, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 0)
- %20 = bitcast i32 %19 to float
- call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %14, float %16, float %18, float %20)
+ %tmp = getelementptr [2 x <16 x i8>], [2 x <16 x i8>] addrspace(2)* %arg3, i64 0, i32 1
+ %tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
+ %tmp11 = shl i32 %arg6, 2
+ %tmp12 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0)
+ %tmp13 = bitcast i32 %tmp12 to float
+ %tmp14 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp10, i32 %tmp11, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0)
+ %tmp15 = bitcast i32 %tmp14 to float
+ %tmp16 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp10, i32 %tmp11, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0)
+ %tmp17 = bitcast i32 %tmp16 to float
+ %tmp18 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %tmp10, <2 x i32> zeroinitializer, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 0)
+ %tmp19 = bitcast i32 %tmp18 to float
+
+ %tmp20 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %tmp10, <2 x i32> zeroinitializer, i32 0, i32 123, i32 1, i32 1, i32 1, i32 1, i32 0)
+ %tmp21 = bitcast i32 %tmp20 to float
+
+ %tmp22 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %tmp10, <2 x i32> zeroinitializer, i32 1234, i32 65535, i32 1, i32 1, i32 1, i32 1, i32 0)
+ %tmp23 = bitcast i32 %tmp22 to float
+
+ call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %tmp13, float %tmp15, float %tmp17, float %tmp19)
+ call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %tmp21, float %tmp23, float %tmp23, float %tmp23)
ret void
}
diff --git a/test/CodeGen/R600/llvm.SI.sendmsg.ll b/test/CodeGen/R600/llvm.SI.sendmsg.ll
index ce3800241953..09675d503355 100644
--- a/test/CodeGen/R600/llvm.SI.sendmsg.ll
+++ b/test/CodeGen/R600/llvm.SI.sendmsg.ll
@@ -2,6 +2,8 @@
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
; CHECK-LABEL: {{^}}main:
+; CHECK: s_mov_b32 m0, 0
+; CHECK-NOT: s_mov_b32 m0
; CHECK: s_sendmsg Gs(emit stream 0)
; CHECK: s_sendmsg Gs(cut stream 1)
; CHECK: s_sendmsg Gs(emit-cut stream 2)
diff --git a/test/CodeGen/R600/llvm.SI.tid.ll b/test/CodeGen/R600/llvm.SI.tid.ll
index 64efd2daf338..f6e6d7050ba7 100644
--- a/test/CodeGen/R600/llvm.SI.tid.ll
+++ b/test/CodeGen/R600/llvm.SI.tid.ll
@@ -1,7 +1,9 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s
+;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s
-;CHECK: v_mbcnt_lo_u32_b32_e64
-;CHECK: v_mbcnt_hi_u32_b32_e32
+;GCN: v_mbcnt_lo_u32_b32_e64
+;SI: v_mbcnt_hi_u32_b32_e32
+;VI: v_mbcnt_hi_u32_b32_e64
define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
main_body:
diff --git a/test/CodeGen/R600/llvm.amdgpu.dp4.ll b/test/CodeGen/R600/llvm.amdgpu.dp4.ll
index 812b6a40ee59..036cd2ca82a6 100644
--- a/test/CodeGen/R600/llvm.amdgpu.dp4.ll
+++ b/test/CodeGen/R600/llvm.amdgpu.dp4.ll
@@ -3,8 +3,8 @@
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) nounwind readnone
define void @test_dp4(float addrspace(1)* %out, <4 x float> addrspace(1)* %a, <4 x float> addrspace(1)* %b) nounwind {
- %src0 = load <4 x float> addrspace(1)* %a, align 16
- %src1 = load <4 x float> addrspace(1)* %b, align 16
+ %src0 = load <4 x float>, <4 x float> addrspace(1)* %a, align 16
+ %src1 = load <4 x float>, <4 x float> addrspace(1)* %b, align 16
%dp4 = call float @llvm.AMDGPU.dp4(<4 x float> %src0, <4 x float> %src1) nounwind readnone
store float %dp4, float addrspace(1)* %out, align 4
ret void
diff --git a/test/CodeGen/R600/llvm.floor.ll b/test/CodeGen/R600/llvm.floor.ll
deleted file mode 100644
index 1016ff75ce9b..000000000000
--- a/test/CodeGen/R600/llvm.floor.ll
+++ /dev/null
@@ -1,54 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
-
-; R600-CHECK: {{^}}f32:
-; R600-CHECK: FLOOR
-; SI-CHECK: {{^}}f32:
-; SI-CHECK: v_floor_f32_e32
-define void @f32(float addrspace(1)* %out, float %in) {
-entry:
- %0 = call float @llvm.floor.f32(float %in)
- store float %0, float addrspace(1)* %out
- ret void
-}
-
-; R600-CHECK: {{^}}v2f32:
-; R600-CHECK: FLOOR
-; R600-CHECK: FLOOR
-; SI-CHECK: {{^}}v2f32:
-; SI-CHECK: v_floor_f32_e32
-; SI-CHECK: v_floor_f32_e32
-define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
-entry:
- %0 = call <2 x float> @llvm.floor.v2f32(<2 x float> %in)
- store <2 x float> %0, <2 x float> addrspace(1)* %out
- ret void
-}
-
-; R600-CHECK: {{^}}v4f32:
-; R600-CHECK: FLOOR
-; R600-CHECK: FLOOR
-; R600-CHECK: FLOOR
-; R600-CHECK: FLOOR
-; SI-CHECK: {{^}}v4f32:
-; SI-CHECK: v_floor_f32_e32
-; SI-CHECK: v_floor_f32_e32
-; SI-CHECK: v_floor_f32_e32
-; SI-CHECK: v_floor_f32_e32
-define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
-entry:
- %0 = call <4 x float> @llvm.floor.v4f32(<4 x float> %in)
- store <4 x float> %0, <4 x float> addrspace(1)* %out
- ret void
-}
-
-; Function Attrs: nounwind readonly
-declare float @llvm.floor.f32(float) #0
-
-; Function Attrs: nounwind readonly
-declare <2 x float> @llvm.floor.v2f32(<2 x float>) #0
-
-; Function Attrs: nounwind readonly
-declare <4 x float> @llvm.floor.v4f32(<4 x float>) #0
-
-attributes #0 = { nounwind readonly }
diff --git a/test/CodeGen/R600/llvm.memcpy.ll b/test/CodeGen/R600/llvm.memcpy.ll
index d6f5f6275acf..e491732cf9c5 100644
--- a/test/CodeGen/R600/llvm.memcpy.ll
+++ b/test/CodeGen/R600/llvm.memcpy.ll
@@ -7,39 +7,23 @@ declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace
; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
; SI: ds_read_u8
-; SI: ds_write_b8
; SI: ds_read_u8
-; SI: ds_write_b8
; SI: ds_read_u8
-; SI: ds_write_b8
; SI: ds_read_u8
-; SI: ds_write_b8
; SI: ds_read_u8
-; SI: ds_write_b8
-
; SI: ds_read_u8
-; SI: ds_write_b8
; SI: ds_read_u8
-; SI: ds_write_b8
; SI: ds_read_u8
-; SI: ds_write_b8
+
; SI: ds_read_u8
-; SI: ds_write_b8
; SI: ds_read_u8
-; SI: ds_write_b8
-
; SI: ds_read_u8
-; SI: ds_write_b8
; SI: ds_read_u8
-; SI: ds_write_b8
; SI: ds_read_u8
-; SI: ds_write_b8
; SI: ds_read_u8
-; SI: ds_write_b8
; SI: ds_read_u8
; SI: ds_read_u8
-
; SI: ds_read_u8
; SI: ds_read_u8
; SI: ds_read_u8
@@ -66,6 +50,14 @@ declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
+
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
@@ -76,6 +68,14 @@ declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
+
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
; SI: ds_write_b8
; SI: s_endpgm
diff --git a/test/CodeGen/R600/llvm.rint.f64.ll b/test/CodeGen/R600/llvm.rint.f64.ll
index 2c926341f78a..c63fb1727940 100644
--- a/test/CodeGen/R600/llvm.rint.f64.ll
+++ b/test/CodeGen/R600/llvm.rint.f64.ll
@@ -1,3 +1,4 @@
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
diff --git a/test/CodeGen/R600/llvm.round.f64.ll b/test/CodeGen/R600/llvm.round.f64.ll
new file mode 100644
index 000000000000..3d0f57e33280
--- /dev/null
+++ b/test/CodeGen/R600/llvm.round.f64.ll
@@ -0,0 +1,74 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}round_f64:
+; SI: s_endpgm
+define void @round_f64(double addrspace(1)* %out, double %x) #0 {
+ %result = call double @llvm.round.f64(double %x) #1
+ store double %result, double addrspace(1)* %out
+ ret void
+}
+
+; This is a pretty large function, so just test a few of the
+; instructions that are necessary.
+
+; FUNC-LABEL: {{^}}v_round_f64:
+; SI: buffer_load_dwordx2
+; SI: v_bfe_u32 [[EXP:v[0-9]+]], v{{[0-9]+}}, 20, 11
+
+; SI-DAG: v_not_b32_e32
+; SI-DAG: v_not_b32_e32
+
+; SI-DAG: v_cmp_eq_i32
+
+; SI-DAG: s_mov_b32 [[BFIMASK:s[0-9]+]], 0x7fffffff
+; SI-DAG: v_cmp_gt_i32_e64
+; SI-DAG: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[BFIMASK]]
+
+; SI-DAG: v_cmp_gt_i32_e64
+
+
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @v_round_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x() #1
+ %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
+ %x = load double, double addrspace(1)* %gep
+ %result = call double @llvm.round.f64(double %x) #1
+ store double %result, double addrspace(1)* %out.gep
+ ret void
+}
+
+; FUNC-LABEL: {{^}}round_v2f64:
+; SI: s_endpgm
+define void @round_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) #0 {
+ %result = call <2 x double> @llvm.round.v2f64(<2 x double> %in) #1
+ store <2 x double> %result, <2 x double> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}round_v4f64:
+; SI: s_endpgm
+define void @round_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) #0 {
+ %result = call <4 x double> @llvm.round.v4f64(<4 x double> %in) #1
+ store <4 x double> %result, <4 x double> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}round_v8f64:
+; SI: s_endpgm
+define void @round_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %in) #0 {
+ %result = call <8 x double> @llvm.round.v8f64(<8 x double> %in) #1
+ store <8 x double> %result, <8 x double> addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+declare double @llvm.round.f64(double) #1
+declare <2 x double> @llvm.round.v2f64(<2 x double>) #1
+declare <4 x double> @llvm.round.v4f64(<4 x double>) #1
+declare <8 x double> @llvm.round.v8f64(<8 x double>) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/llvm.round.ll b/test/CodeGen/R600/llvm.round.ll
index bedf4ba72ae4..f5f124d915a5 100644
--- a/test/CodeGen/R600/llvm.round.ll
+++ b/test/CodeGen/R600/llvm.round.ll
@@ -1,17 +1,28 @@
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=R600 --check-prefix=FUNC
-
-; FUNC-LABEL: {{^}}f32:
-; R600: FRACT {{.*}}, [[ARG:KC[0-9]\[[0-9]+\]\.[XYZW]]]
-; R600-DAG: ADD {{.*}}, -0.5
-; R600-DAG: CEIL {{.*}} [[ARG]]
-; R600-DAG: FLOOR {{.*}} [[ARG]]
-; R600-DAG: CNDGE
-; R600-DAG: CNDGT
-; R600: CNDGE {{[^,]+}}, [[ARG]]
-define void @f32(float addrspace(1)* %out, float %in) {
-entry:
- %0 = call float @llvm.round.f32(float %in)
- store float %0, float addrspace(1)* %out
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}round_f32:
+; SI-DAG: s_load_dword [[SX:s[0-9]+]]
+; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0x7fffffff
+; SI: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[SX]]
+; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], [[SX]], [[TRUNC]]
+; SI: v_mov_b32_e32 [[VX:v[0-9]+]], [[SX]]
+; SI: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[K]], 1.0, [[VX]]
+; SI: v_cmp_le_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], 0.5, |[[SUB]]|
+; SI: v_cndmask_b32_e64 [[SEL:v[0-9]+]], 0, [[VX]], [[CMP]]
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SEL]], [[TRUNC]]
+; SI: buffer_store_dword [[RESULT]]
+
+; R600: TRUNC {{.*}}, [[ARG:KC[0-9]\[[0-9]+\]\.[XYZW]]]
+; R600-DAG: ADD {{.*}},
+; R600-DAG: BFI_INT
+; R600-DAG: SETGE
+; R600-DAG: CNDE
+; R600-DAG: ADD
+define void @round_f32(float addrspace(1)* %out, float %x) #0 {
+ %result = call float @llvm.round.f32(float %x) #1
+ store float %result, float addrspace(1)* %out
ret void
}
@@ -20,24 +31,37 @@ entry:
; a test for the scalar case, so the vector tests just check that the
; compiler doesn't crash.
-; FUNC-LABEL: v2f32
+; FUNC-LABEL: {{^}}round_v2f32:
+; SI: s_endpgm
; R600: CF_END
-define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
-entry:
- %0 = call <2 x float> @llvm.round.v2f32(<2 x float> %in)
- store <2 x float> %0, <2 x float> addrspace(1)* %out
+define void @round_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) #0 {
+ %result = call <2 x float> @llvm.round.v2f32(<2 x float> %in) #1
+ store <2 x float> %result, <2 x float> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: v4f32
+; FUNC-LABEL: {{^}}round_v4f32:
+; SI: s_endpgm
; R600: CF_END
-define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
-entry:
- %0 = call <4 x float> @llvm.round.v4f32(<4 x float> %in)
- store <4 x float> %0, <4 x float> addrspace(1)* %out
+define void @round_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) #0 {
+ %result = call <4 x float> @llvm.round.v4f32(<4 x float> %in) #1
+ store <4 x float> %result, <4 x float> addrspace(1)* %out
ret void
}
-declare float @llvm.round.f32(float)
-declare <2 x float> @llvm.round.v2f32(<2 x float>)
-declare <4 x float> @llvm.round.v4f32(<4 x float>)
+; FUNC-LABEL: {{^}}round_v8f32:
+; SI: s_endpgm
+; R600: CF_END
+define void @round_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %in) #0 {
+ %result = call <8 x float> @llvm.round.v8f32(<8 x float> %in) #1
+ store <8 x float> %result, <8 x float> addrspace(1)* %out
+ ret void
+}
+
+declare float @llvm.round.f32(float) #1
+declare <2 x float> @llvm.round.v2f32(<2 x float>) #1
+declare <4 x float> @llvm.round.v4f32(<4 x float>) #1
+declare <8 x float> @llvm.round.v8f32(<8 x float>) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/llvm.sqrt.ll b/test/CodeGen/R600/llvm.sqrt.ll
index 1f8df891654b..c6da047f5392 100644
--- a/test/CodeGen/R600/llvm.sqrt.ll
+++ b/test/CodeGen/R600/llvm.sqrt.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=r600 --mcpu=redwood | FileCheck %s --check-prefix=R600
-; RUN: llc < %s -march=r600 --mcpu=SI -verify-machineinstrs| FileCheck %s --check-prefix=SI
-; RUN: llc < %s -march=r600 --mcpu=tonga -verify-machineinstrs| FileCheck %s --check-prefix=SI
+; RUN: llc < %s -march=amdgcn --mcpu=SI -verify-machineinstrs| FileCheck %s --check-prefix=SI
+; RUN: llc < %s -march=amdgcn --mcpu=tonga -verify-machineinstrs| FileCheck %s --check-prefix=SI
; R600-LABEL: {{^}}sqrt_f32:
; R600: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].Z
@@ -50,6 +50,56 @@ entry:
ret void
}
+; SI-LABEL: {{^}}elim_redun_check:
+; SI: v_sqrt_f32_e32
+; SI-NOT: v_cndmask
+define void @elim_redun_check(float addrspace(1)* %out, float %in) {
+entry:
+ %sqrt = call float @llvm.sqrt.f32(float %in)
+ %cmp = fcmp olt float %in, -0.000000e+00
+ %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
+ store float %res, float addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}elim_redun_check_ult:
+; SI: v_sqrt_f32_e32
+; SI-NOT: v_cndmask
+define void @elim_redun_check_ult(float addrspace(1)* %out, float %in) {
+entry:
+ %sqrt = call float @llvm.sqrt.f32(float %in)
+ %cmp = fcmp ult float %in, -0.000000e+00
+ %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
+ store float %res, float addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}elim_redun_check_v2:
+; SI: v_sqrt_f32_e32
+; SI: v_sqrt_f32_e32
+; SI-NOT: v_cndmask
+define void @elim_redun_check_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+entry:
+ %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
+ %cmp = fcmp olt <2 x float> %in, <float -0.000000e+00, float -0.000000e+00>
+ %res = select <2 x i1> %cmp, <2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>, <2 x float> %sqrt
+ store <2 x float> %res, <2 x float> addrspace(1)* %out
+ ret void
+}
+
+; SI-LABEL: {{^}}elim_redun_check_v2_ult
+; SI: v_sqrt_f32_e32
+; SI: v_sqrt_f32_e32
+; SI-NOT: v_cndmask
+define void @elim_redun_check_v2_ult(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+entry:
+ %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
+ %cmp = fcmp ult <2 x float> %in, <float -0.000000e+00, float -0.000000e+00>
+ %res = select <2 x i1> %cmp, <2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>, <2 x float> %sqrt
+ store <2 x float> %res, <2 x float> addrspace(1)* %out
+ ret void
+}
+
declare float @llvm.sqrt.f32(float %in)
declare <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %in)
diff --git a/test/CodeGen/R600/llvm.trunc.ll b/test/CodeGen/R600/llvm.trunc.ll
deleted file mode 100644
index 5585477ef294..000000000000
--- a/test/CodeGen/R600/llvm.trunc.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-; CHECK-LABEL: {{^}}trunc_f32:
-; CHECK: TRUNC
-
-define void @trunc_f32(float addrspace(1)* %out, float %in) {
-entry:
- %0 = call float @llvm.trunc.f32(float %in)
- store float %0, float addrspace(1)* %out
- ret void
-}
-
-declare float @llvm.trunc.f32(float)
diff --git a/test/CodeGen/R600/load-i1.ll b/test/CodeGen/R600/load-i1.ll
index 315c0a37ebf3..0ca49fde3e7b 100644
--- a/test/CodeGen/R600/load-i1.ll
+++ b/test/CodeGen/R600/load-i1.ll
@@ -11,7 +11,7 @@
; EG: VTX_READ_8
; EG: AND_INT
define void @global_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
store i1 %load, i1 addrspace(1)* %out, align 1
ret void
}
@@ -26,7 +26,7 @@ define void @global_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) n
; EG: AND_INT
; EG: LDS_BYTE_WRITE
define void @local_copy_i1_to_i1(i1 addrspace(3)* %out, i1 addrspace(3)* %in) nounwind {
- %load = load i1 addrspace(3)* %in
+ %load = load i1, i1 addrspace(3)* %in
store i1 %load, i1 addrspace(3)* %out, align 1
ret void
}
@@ -40,7 +40,7 @@ define void @local_copy_i1_to_i1(i1 addrspace(3)* %out, i1 addrspace(3)* %in) no
; EG: VTX_READ_8
; EG: AND_INT
define void @constant_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(2)* %in) nounwind {
- %load = load i1 addrspace(2)* %in
+ %load = load i1, i1 addrspace(2)* %in
store i1 %load, i1 addrspace(1)* %out, align 1
ret void
}
@@ -54,7 +54,7 @@ define void @constant_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(2)* %in)
; EG: VTX_READ_8
; EG: BFE_INT
define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = sext i1 %load to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -66,7 +66,7 @@ define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)*
; SI: s_endpgm
define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = zext i1 %load to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
ret void
@@ -78,7 +78,7 @@ define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)*
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = sext i1 %load to i64
store i64 %ext, i64 addrspace(1)* %out, align 4
ret void
@@ -90,7 +90,7 @@ define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)*
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = zext i1 %load to i64
store i64 %ext, i64 addrspace(1)* %out, align 4
ret void
diff --git a/test/CodeGen/R600/load-input-fold.ll b/test/CodeGen/R600/load-input-fold.ll
index 265fa9bfeb42..1daf0e6527b9 100644
--- a/test/CodeGen/R600/load-input-fold.ll
+++ b/test/CodeGen/R600/load-input-fold.ll
@@ -14,71 +14,71 @@ main_body:
%9 = extractelement <4 x float> %reg3, i32 1
%10 = extractelement <4 x float> %reg3, i32 2
%11 = extractelement <4 x float> %reg3, i32 3
- %12 = load <4 x float> addrspace(8)* null
+ %12 = load <4 x float>, <4 x float> addrspace(8)* null
%13 = extractelement <4 x float> %12, i32 0
%14 = fmul float %0, %13
- %15 = load <4 x float> addrspace(8)* null
+ %15 = load <4 x float>, <4 x float> addrspace(8)* null
%16 = extractelement <4 x float> %15, i32 1
%17 = fmul float %0, %16
- %18 = load <4 x float> addrspace(8)* null
+ %18 = load <4 x float>, <4 x float> addrspace(8)* null
%19 = extractelement <4 x float> %18, i32 2
%20 = fmul float %0, %19
- %21 = load <4 x float> addrspace(8)* null
+ %21 = load <4 x float>, <4 x float> addrspace(8)* null
%22 = extractelement <4 x float> %21, i32 3
%23 = fmul float %0, %22
- %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %24 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%25 = extractelement <4 x float> %24, i32 0
%26 = fmul float %1, %25
%27 = fadd float %26, %14
- %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%29 = extractelement <4 x float> %28, i32 1
%30 = fmul float %1, %29
%31 = fadd float %30, %17
- %32 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %32 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%33 = extractelement <4 x float> %32, i32 2
%34 = fmul float %1, %33
%35 = fadd float %34, %20
- %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %36 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%37 = extractelement <4 x float> %36, i32 3
%38 = fmul float %1, %37
%39 = fadd float %38, %23
- %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %40 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%41 = extractelement <4 x float> %40, i32 0
%42 = fmul float %2, %41
%43 = fadd float %42, %27
- %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %44 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%45 = extractelement <4 x float> %44, i32 1
%46 = fmul float %2, %45
%47 = fadd float %46, %31
- %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%49 = extractelement <4 x float> %48, i32 2
%50 = fmul float %2, %49
%51 = fadd float %50, %35
- %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%53 = extractelement <4 x float> %52, i32 3
%54 = fmul float %2, %53
%55 = fadd float %54, %39
- %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%57 = extractelement <4 x float> %56, i32 0
%58 = fmul float %3, %57
%59 = fadd float %58, %43
- %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%61 = extractelement <4 x float> %60, i32 1
%62 = fmul float %3, %61
%63 = fadd float %62, %47
- %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %64 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%65 = extractelement <4 x float> %64, i32 2
%66 = fmul float %3, %65
%67 = fadd float %66, %51
- %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %68 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%69 = extractelement <4 x float> %68, i32 3
%70 = fmul float %3, %69
%71 = fadd float %70, %55
- %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%73 = extractelement <4 x float> %72, i32 0
- %74 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %74 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%75 = extractelement <4 x float> %74, i32 1
- %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%77 = extractelement <4 x float> %76, i32 2
%78 = insertelement <4 x float> undef, float %4, i32 0
%79 = insertelement <4 x float> %78, float %5, i32 1
diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll
index b71b7cb24c49..93b1b51a0d07 100644
--- a/test/CodeGen/R600/load.ll
+++ b/test/CodeGen/R600/load.ll
@@ -13,7 +13,7 @@
; SI: buffer_load_ubyte v{{[0-9]+}},
define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
- %1 = load i8 addrspace(1)* %in
+ %1 = load i8, i8 addrspace(1)* %in
%2 = zext i8 %1 to i32
store i32 %2, i32 addrspace(1)* %out
ret void
@@ -21,14 +21,12 @@ define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
; FUNC-LABEL: {{^}}load_i8_sext:
; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
-; R600: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
-; R600: 24
-; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
-; R600: 24
+; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
+; R600: 8
; SI: buffer_load_sbyte
define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
- %0 = load i8 addrspace(1)* %in
+ %0 = load i8, i8 addrspace(1)* %in
%1 = sext i8 %0 to i32
store i32 %1, i32 addrspace(1)* %out
ret void
@@ -41,7 +39,7 @@ entry:
; SI: buffer_load_ubyte
define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
entry:
- %0 = load <2 x i8> addrspace(1)* %in
+ %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
%1 = zext <2 x i8> %0 to <2 x i32>
store <2 x i32> %1, <2 x i32> addrspace(1)* %out
ret void
@@ -50,19 +48,16 @@ entry:
; FUNC-LABEL: {{^}}load_v2i8_sext:
; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
-; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
-; R600-DAG: 24
-; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
-; R600-DAG: 24
-; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
-; R600-DAG: 24
-; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
-; R600-DAG: 24
+; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
+; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
+; R600-DAG: 8
+; R600-DAG: 8
+
; SI: buffer_load_sbyte
; SI: buffer_load_sbyte
define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
entry:
- %0 = load <2 x i8> addrspace(1)* %in
+ %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
%1 = sext <2 x i8> %0 to <2 x i32>
store <2 x i32> %1, <2 x i32> addrspace(1)* %out
ret void
@@ -79,7 +74,7 @@ entry:
; SI: buffer_load_ubyte
define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
entry:
- %0 = load <4 x i8> addrspace(1)* %in
+ %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
%1 = zext <4 x i8> %0 to <4 x i32>
store <4 x i32> %1, <4 x i32> addrspace(1)* %out
ret void
@@ -90,29 +85,21 @@ entry:
; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
; R600-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
; R600-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
-; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
-; R600-DAG: 24
-; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
-; R600-DAG: 24
-; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
-; R600-DAG: 24
-; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
-; R600-DAG: 24
-; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]]
-; R600-DAG: 24
-; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]]
-; R600-DAG: 24
-; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]]
-; R600-DAG: 24
-; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
-; R600-DAG: 24
+; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
+; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
+; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
+; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
+; R600-DAG: 8
+; R600-DAG: 8
+; R600-DAG: 8
+; R600-DAG: 8
; SI: buffer_load_sbyte
; SI: buffer_load_sbyte
; SI: buffer_load_sbyte
; SI: buffer_load_sbyte
define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
entry:
- %0 = load <4 x i8> addrspace(1)* %in
+ %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
%1 = sext <4 x i8> %0 to <4 x i32>
store <4 x i32> %1, <4 x i32> addrspace(1)* %out
ret void
@@ -124,7 +111,7 @@ entry:
; SI: buffer_load_ushort
define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
entry:
- %0 = load i16 addrspace(1)* %in
+ %0 = load i16 , i16 addrspace(1)* %in
%1 = zext i16 %0 to i32
store i32 %1, i32 addrspace(1)* %out
ret void
@@ -132,14 +119,12 @@ entry:
; FUNC-LABEL: {{^}}load_i16_sext:
; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
-; R600: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
-; R600: 16
-; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
+; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
; R600: 16
; SI: buffer_load_sshort
define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
entry:
- %0 = load i16 addrspace(1)* %in
+ %0 = load i16, i16 addrspace(1)* %in
%1 = sext i16 %0 to i32
store i32 %1, i32 addrspace(1)* %out
ret void
@@ -152,7 +137,7 @@ entry:
; SI: buffer_load_ushort
define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
entry:
- %0 = load <2 x i16> addrspace(1)* %in
+ %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
%1 = zext <2 x i16> %0 to <2 x i32>
store <2 x i32> %1, <2 x i32> addrspace(1)* %out
ret void
@@ -161,19 +146,15 @@ entry:
; FUNC-LABEL: {{^}}load_v2i16_sext:
; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
-; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
-; R600-DAG: 16
-; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
+; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
+; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
; R600-DAG: 16
-; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
-; R600-DAG: 16
-; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
; R600-DAG: 16
; SI: buffer_load_sshort
; SI: buffer_load_sshort
define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
entry:
- %0 = load <2 x i16> addrspace(1)* %in
+ %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
%1 = sext <2 x i16> %0 to <2 x i32>
store <2 x i32> %1, <2 x i32> addrspace(1)* %out
ret void
@@ -190,7 +171,7 @@ entry:
; SI: buffer_load_ushort
define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
entry:
- %0 = load <4 x i16> addrspace(1)* %in
+ %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
%1 = zext <4 x i16> %0 to <4 x i32>
store <4 x i32> %1, <4 x i32> addrspace(1)* %out
ret void
@@ -201,21 +182,13 @@ entry:
; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
; R600-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
; R600-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
-; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
-; R600-DAG: 16
-; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]]
+; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
+; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
+; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
+; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
; R600-DAG: 16
-; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]]
; R600-DAG: 16
-; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
; R600-DAG: 16
-; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]]
-; R600-DAG: 16
-; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]]
-; R600-DAG: 16
-; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]]
-; R600-DAG: 16
-; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
; R600-DAG: 16
; SI: buffer_load_sshort
; SI: buffer_load_sshort
@@ -223,7 +196,7 @@ entry:
; SI: buffer_load_sshort
define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
entry:
- %0 = load <4 x i16> addrspace(1)* %in
+ %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
%1 = sext <4 x i16> %0 to <4 x i32>
store <4 x i32> %1, <4 x i32> addrspace(1)* %out
ret void
@@ -236,7 +209,7 @@ entry:
; SI: buffer_load_dword v{{[0-9]+}}
define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %0 = load i32 addrspace(1)* %in
+ %0 = load i32, i32 addrspace(1)* %in
store i32 %0, i32 addrspace(1)* %out
ret void
}
@@ -248,7 +221,7 @@ entry:
; SI: buffer_load_dword v{{[0-9]+}}
define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
entry:
- %0 = load float addrspace(1)* %in
+ %0 = load float, float addrspace(1)* %in
store float %0, float addrspace(1)* %out
ret void
}
@@ -260,7 +233,7 @@ entry:
; SI: buffer_load_dwordx2
define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
entry:
- %0 = load <2 x float> addrspace(1)* %in
+ %0 = load <2 x float>, <2 x float> addrspace(1)* %in
store <2 x float> %0, <2 x float> addrspace(1)* %out
ret void
}
@@ -270,7 +243,7 @@ entry:
; SI: buffer_load_dwordx2
define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
entry:
- %0 = load i64 addrspace(1)* %in
+ %0 = load i64, i64 addrspace(1)* %in
store i64 %0, i64 addrspace(1)* %out
ret void
}
@@ -284,7 +257,7 @@ entry:
define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %0 = load i32 addrspace(1)* %in
+ %0 = load i32, i32 addrspace(1)* %in
%1 = sext i32 %0 to i64
store i64 %1, i64 addrspace(1)* %out
ret void
@@ -295,7 +268,7 @@ entry:
; R600: MEM_RAT
define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %0 = load i32 addrspace(1)* %in
+ %0 = load i32, i32 addrspace(1)* %in
%1 = zext i32 %0 to i64
store i64 %1, i64 addrspace(1)* %out
ret void
@@ -315,7 +288,7 @@ entry:
; SI: buffer_load_dword
define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
entry:
- %0 = load <8 x i32> addrspace(1)* %in
+ %0 = load <8 x i32>, <8 x i32> addrspace(1)* %in
store <8 x i32> %0, <8 x i32> addrspace(1)* %out
ret void
}
@@ -344,7 +317,7 @@ entry:
; SI: buffer_load_dword
define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
entry:
- %0 = load <16 x i32> addrspace(1)* %in
+ %0 = load <16 x i32>, <16 x i32> addrspace(1)* %in
store <16 x i32> %0, <16 x i32> addrspace(1)* %out
ret void
}
@@ -356,14 +329,12 @@ entry:
; Load a sign-extended i8 value
; FUNC-LABEL: {{^}}load_const_i8_sext:
; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
-; R600: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
-; R600: 24
-; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
-; R600: 24
+; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
+; R600: 8
; SI: buffer_load_sbyte v{{[0-9]+}},
define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
entry:
- %0 = load i8 addrspace(2)* %in
+ %0 = load i8, i8 addrspace(2)* %in
%1 = sext i8 %0 to i32
store i32 %1, i32 addrspace(1)* %out
ret void
@@ -375,7 +346,7 @@ entry:
; SI: buffer_load_ubyte v{{[0-9]+}},
define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
entry:
- %0 = load i8 addrspace(2)* %in
+ %0 = load i8, i8 addrspace(2)* %in
%1 = zext i8 %0 to i32
store i32 %1, i32 addrspace(1)* %out
ret void
@@ -387,8 +358,8 @@ entry:
; SI: buffer_load_ubyte v{{[0-9]+}},
define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
entry:
- %0 = getelementptr i8 addrspace(2)* %in, i32 1
- %1 = load i8 addrspace(2)* %0
+ %0 = getelementptr i8, i8 addrspace(2)* %in, i32 1
+ %1 = load i8, i8 addrspace(2)* %0
%2 = zext i8 %1 to i32
store i32 %2, i32 addrspace(1)* %out
ret void
@@ -397,14 +368,12 @@ entry:
; Load a sign-extended i16 value
; FUNC-LABEL: {{^}}load_const_i16_sext:
; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
-; R600: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
-; R600: 16
-; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
+; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
; R600: 16
; SI: buffer_load_sshort
define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
entry:
- %0 = load i16 addrspace(2)* %in
+ %0 = load i16, i16 addrspace(2)* %in
%1 = sext i16 %0 to i32
store i32 %1, i32 addrspace(1)* %out
ret void
@@ -416,7 +385,7 @@ entry:
; SI: buffer_load_ushort
define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
entry:
- %0 = load i16 addrspace(2)* %in
+ %0 = load i16, i16 addrspace(2)* %in
%1 = zext i16 %0 to i32
store i32 %1, i32 addrspace(1)* %out
ret void
@@ -428,8 +397,8 @@ entry:
; SI: buffer_load_ushort
define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
entry:
- %0 = getelementptr i16 addrspace(2)* %in, i32 1
- %1 = load i16 addrspace(2)* %0
+ %0 = getelementptr i16, i16 addrspace(2)* %in, i32 1
+ %1 = load i16, i16 addrspace(2)* %0
%2 = zext i16 %1 to i32
store i32 %2, i32 addrspace(1)* %out
ret void
@@ -442,7 +411,7 @@ entry:
; SI: s_load_dword s{{[0-9]+}}
define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
entry:
- %0 = load i32 addrspace(2)* %in
+ %0 = load i32, i32 addrspace(2)* %in
store i32 %0, i32 addrspace(1)* %out
ret void
}
@@ -453,7 +422,7 @@ entry:
; SI: s_load_dword s{{[0-9]+}}
define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
- %1 = load float addrspace(2)* %in
+ %1 = load float, float addrspace(2)* %in
store float %1, float addrspace(1)* %out
ret void
}
@@ -469,7 +438,7 @@ define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(
; SI: s_mov_b32 m0
; SI: ds_read_u8
define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
- %1 = load i8 addrspace(3)* %in
+ %1 = load i8, i8 addrspace(3)* %in
%2 = zext i8 %1 to i32
store i32 %2, i32 addrspace(1)* %out
ret void
@@ -477,13 +446,13 @@ define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
; FUNC-LABEL: {{^}}load_i8_sext_local:
; R600: LDS_UBYTE_READ_RET
-; R600: ASHR
+; R600: BFE_INT
; SI-NOT: s_wqm_b64
; SI: s_mov_b32 m0
; SI: ds_read_i8
define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
entry:
- %0 = load i8 addrspace(3)* %in
+ %0 = load i8, i8 addrspace(3)* %in
%1 = sext i8 %0 to i32
store i32 %1, i32 addrspace(1)* %out
ret void
@@ -498,7 +467,7 @@ entry:
; SI: ds_read_u8
define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
entry:
- %0 = load <2 x i8> addrspace(3)* %in
+ %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
%1 = zext <2 x i8> %0 to <2 x i32>
store <2 x i32> %1, <2 x i32> addrspace(1)* %out
ret void
@@ -507,15 +476,15 @@ entry:
; FUNC-LABEL: {{^}}load_v2i8_sext_local:
; R600-DAG: LDS_UBYTE_READ_RET
; R600-DAG: LDS_UBYTE_READ_RET
-; R600-DAG: ASHR
-; R600-DAG: ASHR
+; R600-DAG: BFE_INT
+; R600-DAG: BFE_INT
; SI-NOT: s_wqm_b64
; SI: s_mov_b32 m0
; SI: ds_read_i8
; SI: ds_read_i8
define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
entry:
- %0 = load <2 x i8> addrspace(3)* %in
+ %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
%1 = sext <2 x i8> %0 to <2 x i32>
store <2 x i32> %1, <2 x i32> addrspace(1)* %out
ret void
@@ -534,7 +503,7 @@ entry:
; SI: ds_read_u8
define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
entry:
- %0 = load <4 x i8> addrspace(3)* %in
+ %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
%1 = zext <4 x i8> %0 to <4 x i32>
store <4 x i32> %1, <4 x i32> addrspace(1)* %out
ret void
@@ -545,10 +514,10 @@ entry:
; R600-DAG: LDS_UBYTE_READ_RET
; R600-DAG: LDS_UBYTE_READ_RET
; R600-DAG: LDS_UBYTE_READ_RET
-; R600-DAG: ASHR
-; R600-DAG: ASHR
-; R600-DAG: ASHR
-; R600-DAG: ASHR
+; R600-DAG: BFE_INT
+; R600-DAG: BFE_INT
+; R600-DAG: BFE_INT
+; R600-DAG: BFE_INT
; SI-NOT: s_wqm_b64
; SI: s_mov_b32 m0
; SI: ds_read_i8
@@ -557,7 +526,7 @@ entry:
; SI: ds_read_i8
define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
entry:
- %0 = load <4 x i8> addrspace(3)* %in
+ %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
%1 = sext <4 x i8> %0 to <4 x i32>
store <4 x i32> %1, <4 x i32> addrspace(1)* %out
ret void
@@ -571,7 +540,7 @@ entry:
; SI: ds_read_u16
define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
entry:
- %0 = load i16 addrspace(3)* %in
+ %0 = load i16 , i16 addrspace(3)* %in
%1 = zext i16 %0 to i32
store i32 %1, i32 addrspace(1)* %out
ret void
@@ -579,13 +548,13 @@ entry:
; FUNC-LABEL: {{^}}load_i16_sext_local:
; R600: LDS_USHORT_READ_RET
-; R600: ASHR
+; R600: BFE_INT
; SI-NOT: s_wqm_b64
; SI: s_mov_b32 m0
; SI: ds_read_i16
define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
entry:
- %0 = load i16 addrspace(3)* %in
+ %0 = load i16, i16 addrspace(3)* %in
%1 = sext i16 %0 to i32
store i32 %1, i32 addrspace(1)* %out
ret void
@@ -600,7 +569,7 @@ entry:
; SI: ds_read_u16
define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
entry:
- %0 = load <2 x i16> addrspace(3)* %in
+ %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
%1 = zext <2 x i16> %0 to <2 x i32>
store <2 x i32> %1, <2 x i32> addrspace(1)* %out
ret void
@@ -609,15 +578,15 @@ entry:
; FUNC-LABEL: {{^}}load_v2i16_sext_local:
; R600-DAG: LDS_USHORT_READ_RET
; R600-DAG: LDS_USHORT_READ_RET
-; R600-DAG: ASHR
-; R600-DAG: ASHR
+; R600-DAG: BFE_INT
+; R600-DAG: BFE_INT
; SI-NOT: s_wqm_b64
; SI: s_mov_b32 m0
; SI: ds_read_i16
; SI: ds_read_i16
define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
entry:
- %0 = load <2 x i16> addrspace(3)* %in
+ %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
%1 = sext <2 x i16> %0 to <2 x i32>
store <2 x i32> %1, <2 x i32> addrspace(1)* %out
ret void
@@ -636,7 +605,7 @@ entry:
; SI: ds_read_u16
define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
entry:
- %0 = load <4 x i16> addrspace(3)* %in
+ %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
%1 = zext <4 x i16> %0 to <4 x i32>
store <4 x i32> %1, <4 x i32> addrspace(1)* %out
ret void
@@ -647,10 +616,10 @@ entry:
; R600-DAG: LDS_USHORT_READ_RET
; R600-DAG: LDS_USHORT_READ_RET
; R600-DAG: LDS_USHORT_READ_RET
-; R600-DAG: ASHR
-; R600-DAG: ASHR
-; R600-DAG: ASHR
-; R600-DAG: ASHR
+; R600-DAG: BFE_INT
+; R600-DAG: BFE_INT
+; R600-DAG: BFE_INT
+; R600-DAG: BFE_INT
; SI-NOT: s_wqm_b64
; SI: s_mov_b32 m0
; SI: ds_read_i16
@@ -659,7 +628,7 @@ entry:
; SI: ds_read_i16
define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
entry:
- %0 = load <4 x i16> addrspace(3)* %in
+ %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
%1 = sext <4 x i16> %0 to <4 x i32>
store <4 x i32> %1, <4 x i32> addrspace(1)* %out
ret void
@@ -673,7 +642,7 @@ entry:
; SI: ds_read_b32
define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
- %0 = load i32 addrspace(3)* %in
+ %0 = load i32, i32 addrspace(3)* %in
store i32 %0, i32 addrspace(1)* %out
ret void
}
@@ -685,7 +654,7 @@ entry:
; SI: ds_read_b32
define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
entry:
- %0 = load float addrspace(3)* %in
+ %0 = load float, float addrspace(3)* %in
store float %0, float addrspace(1)* %out
ret void
}
@@ -698,7 +667,7 @@ entry:
; SI: ds_read_b64
define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
entry:
- %0 = load <2 x float> addrspace(3)* %in
+ %0 = load <2 x float>, <2 x float> addrspace(3)* %in
store <2 x float> %0, <2 x float> addrspace(1)* %out
ret void
}
@@ -711,10 +680,10 @@ entry:
; SI-DAG: ds_read_b32
; SI-DAG: ds_read2_b32
define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) {
- %scalar = load i32 addrspace(3)* %in
+ %scalar = load i32, i32 addrspace(3)* %in
%tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
- %vec_ptr = getelementptr <2 x i32> addrspace(3)* %tmp0, i32 2
- %vec0 = load <2 x i32> addrspace(3)* %vec_ptr, align 4
+ %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2
+ %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4
%vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0
%vec = add <2 x i32> %vec0, %vec1
store <2 x i32> %vec, <2 x i32> addrspace(1)* %out
@@ -732,9 +701,9 @@ define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)
; R600: LDS_READ_RET
define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
- %tmp0 = getelementptr [512 x i32] addrspace(3)* @lds, i32 0, i32 1
- %tmp1 = load i32 addrspace(3)* %tmp0
- %tmp2 = getelementptr i32 addrspace(1)* %out, i32 1
+ %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1
+ %tmp1 = load i32, i32 addrspace(3)* %tmp0
+ %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1
store i32 %tmp1, i32 addrspace(1)* %tmp2
ret void
}
diff --git a/test/CodeGen/R600/load.vec.ll b/test/CodeGen/R600/load.vec.ll
index 346d8dc0c6e4..02f883cd8e9c 100644
--- a/test/CodeGen/R600/load.vec.ll
+++ b/test/CodeGen/R600/load.vec.ll
@@ -8,7 +8,7 @@
; SI: {{^}}load_v2i32:
; SI: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %a = load <2 x i32> addrspace(1) * %in
+ %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
store <2 x i32> %a, <2 x i32> addrspace(1)* %out
ret void
}
@@ -19,7 +19,7 @@ define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i
; SI: {{^}}load_v4i32:
; SI: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}]
define void @load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %a = load <4 x i32> addrspace(1) * %in
+ %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
store <4 x i32> %a, <4 x i32> addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/load64.ll b/test/CodeGen/R600/load64.ll
index cb3d65466061..74beabdc0076 100644
--- a/test/CodeGen/R600/load64.ll
+++ b/test/CodeGen/R600/load64.ll
@@ -6,7 +6,7 @@
; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
define void @load_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
- %1 = load double addrspace(1)* %in
+ %1 = load double, double addrspace(1)* %in
store double %1, double addrspace(1)* %out
ret void
}
@@ -15,7 +15,7 @@ define void @load_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %tmp = load i64 addrspace(1)* %in
+ %tmp = load i64, i64 addrspace(1)* %in
store i64 %tmp, i64 addrspace(1)* %out, align 8
ret void
}
@@ -25,7 +25,7 @@ define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
; CHECK: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}]
; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
define void @load_const_addrspace_f64(double addrspace(1)* %out, double addrspace(2)* %in) {
- %1 = load double addrspace(2)* %in
+ %1 = load double, double addrspace(2)* %in
store double %1, double addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/local-64.ll b/test/CodeGen/R600/local-64.ll
index f975bc1f56b0..33f3159d13eb 100644
--- a/test/CodeGen/R600/local-64.ll
+++ b/test/CodeGen/R600/local-64.ll
@@ -1,32 +1,33 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=BOTH %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s
; BOTH-LABEL: {{^}}local_i32_load
-; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28 [M0]
+; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28
; BOTH: buffer_store_dword [[REG]],
define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
- %gep = getelementptr i32 addrspace(3)* %in, i32 7
- %val = load i32 addrspace(3)* %gep, align 4
+ %gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
+ %val = load i32, i32 addrspace(3)* %gep, align 4
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
; BOTH-LABEL: {{^}}local_i32_load_0_offset
-; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} [M0]
+; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}}
; BOTH: buffer_store_dword [[REG]],
define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
- %val = load i32 addrspace(3)* %in, align 4
+ %val = load i32, i32 addrspace(3)* %in, align 4
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
; BOTH-LABEL: {{^}}local_i8_load_i16_max_offset:
; BOTH-NOT: ADD
-; BOTH: ds_read_u8 [[REG:v[0-9]+]], {{v[0-9]+}} offset:65535 [M0]
+; BOTH: ds_read_u8 [[REG:v[0-9]+]], {{v[0-9]+}} offset:65535
; BOTH: buffer_store_byte [[REG]],
define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
- %gep = getelementptr i8 addrspace(3)* %in, i32 65535
- %val = load i8 addrspace(3)* %gep, align 4
+ %gep = getelementptr i8, i8 addrspace(3)* %in, i32 65535
+ %val = load i8, i8 addrspace(3)* %gep, align 4
store i8 %val, i8 addrspace(1)* %out, align 4
ret void
}
@@ -37,67 +38,67 @@ define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)
; SI: s_or_b32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
; CI: s_add_i32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
; BOTH: v_mov_b32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]]
-; BOTH: ds_read_u8 [[REG:v[0-9]+]], [[VREGADDR]] [M0]
+; BOTH: ds_read_u8 [[REG:v[0-9]+]], [[VREGADDR]]
; BOTH: buffer_store_byte [[REG]],
define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
- %gep = getelementptr i8 addrspace(3)* %in, i32 65536
- %val = load i8 addrspace(3)* %gep, align 4
+ %gep = getelementptr i8, i8 addrspace(3)* %in, i32 65536
+ %val = load i8, i8 addrspace(3)* %gep, align 4
store i8 %val, i8 addrspace(1)* %out, align 4
ret void
}
; BOTH-LABEL: {{^}}local_i64_load:
; BOTH-NOT: ADD
-; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 [M0]
+; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56
; BOTH: buffer_store_dwordx2 [[REG]],
define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
- %gep = getelementptr i64 addrspace(3)* %in, i32 7
- %val = load i64 addrspace(3)* %gep, align 8
+ %gep = getelementptr i64, i64 addrspace(3)* %in, i32 7
+ %val = load i64, i64 addrspace(3)* %gep, align 8
store i64 %val, i64 addrspace(1)* %out, align 8
ret void
}
; BOTH-LABEL: {{^}}local_i64_load_0_offset
-; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} [M0]
+; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
; BOTH: buffer_store_dwordx2 [[REG]],
define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
- %val = load i64 addrspace(3)* %in, align 8
+ %val = load i64, i64 addrspace(3)* %in, align 8
store i64 %val, i64 addrspace(1)* %out, align 8
ret void
}
; BOTH-LABEL: {{^}}local_f64_load:
; BOTH-NOT: ADD
-; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 [M0]
+; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56
; BOTH: buffer_store_dwordx2 [[REG]],
define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
- %gep = getelementptr double addrspace(3)* %in, i32 7
- %val = load double addrspace(3)* %gep, align 8
+ %gep = getelementptr double, double addrspace(3)* %in, i32 7
+ %val = load double, double addrspace(3)* %gep, align 8
store double %val, double addrspace(1)* %out, align 8
ret void
}
; BOTH-LABEL: {{^}}local_f64_load_0_offset
-; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} [M0]
+; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
; BOTH: buffer_store_dwordx2 [[REG]],
define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
- %val = load double addrspace(3)* %in, align 8
+ %val = load double, double addrspace(3)* %in, align 8
store double %val, double addrspace(1)* %out, align 8
ret void
}
; BOTH-LABEL: {{^}}local_i64_store:
; BOTH-NOT: ADD
-; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 [M0]
+; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56
define void @local_i64_store(i64 addrspace(3)* %out) nounwind {
- %gep = getelementptr i64 addrspace(3)* %out, i32 7
+ %gep = getelementptr i64, i64 addrspace(3)* %out, i32 7
store i64 5678, i64 addrspace(3)* %gep, align 8
ret void
}
; BOTH-LABEL: {{^}}local_i64_store_0_offset:
; BOTH-NOT: ADD
-; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
+; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind {
store i64 1234, i64 addrspace(3)* %out, align 8
ret void
@@ -105,15 +106,15 @@ define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind {
; BOTH-LABEL: {{^}}local_f64_store:
; BOTH-NOT: ADD
-; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 [M0]
+; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56
define void @local_f64_store(double addrspace(3)* %out) nounwind {
- %gep = getelementptr double addrspace(3)* %out, i32 7
+ %gep = getelementptr double, double addrspace(3)* %out, i32 7
store double 16.0, double addrspace(3)* %gep, align 8
ret void
}
; BOTH-LABEL: {{^}}local_f64_store_0_offset
-; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
+; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind {
store double 20.0, double addrspace(3)* %out, align 8
ret void
@@ -121,19 +122,19 @@ define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind {
; BOTH-LABEL: {{^}}local_v2i64_store:
; BOTH-NOT: ADD
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:112 [M0]
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:120 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:112
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:120
; BOTH: s_endpgm
define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
- %gep = getelementptr <2 x i64> addrspace(3)* %out, i32 7
+ %gep = getelementptr <2 x i64>, <2 x i64> addrspace(3)* %out, i32 7
store <2 x i64> <i64 5678, i64 5678>, <2 x i64> addrspace(3)* %gep, align 16
ret void
}
; BOTH-LABEL: {{^}}local_v2i64_store_0_offset:
; BOTH-NOT: ADD
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8
; BOTH: s_endpgm
define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
store <2 x i64> <i64 1234, i64 1234>, <2 x i64> addrspace(3)* %out, align 16
@@ -142,23 +143,23 @@ define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
; BOTH-LABEL: {{^}}local_v4i64_store:
; BOTH-NOT: ADD
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:224 [M0]
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:232 [M0]
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:240 [M0]
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:248 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:224
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:232
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:240
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:248
; BOTH: s_endpgm
define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
- %gep = getelementptr <4 x i64> addrspace(3)* %out, i32 7
+ %gep = getelementptr <4 x i64>, <4 x i64> addrspace(3)* %out, i32 7
store <4 x i64> <i64 5678, i64 5678, i64 5678, i64 5678>, <4 x i64> addrspace(3)* %gep, align 16
ret void
}
; BOTH-LABEL: {{^}}local_v4i64_store_0_offset:
; BOTH-NOT: ADD
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 [M0]
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16 [M0]
-; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:24 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:24
; BOTH: s_endpgm
define void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind {
store <4 x i64> <i64 1234, i64 1234, i64 1234, i64 1234>, <4 x i64> addrspace(3)* %out, align 16
diff --git a/test/CodeGen/R600/local-atomics.ll b/test/CodeGen/R600/local-atomics.ll
index 16d3173f3692..2aaf977ab903 100644
--- a/test/CodeGen/R600/local-atomics.ll
+++ b/test/CodeGen/R600/local-atomics.ll
@@ -1,15 +1,16 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32:
; EG: LDS_WRXCHG_RET *
-; SI: s_load_dword [[SPTR:s[0-9]+]],
-; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
+; GCN: s_load_dword [[SPTR:s[0-9]+]],
+; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -18,10 +19,10 @@ define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset:
; EG: LDS_WRXCHG_RET *
-; SI: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -30,12 +31,12 @@ define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac
; XXX - Is it really necessary to load 4 into VGPR?
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32:
; EG: LDS_ADD_RET *
-; SI: s_load_dword [[SPTR:s[0-9]+]],
-; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
+; GCN: s_load_dword [[SPTR:s[0-9]+]],
+; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
+; GCN: buffer_store_dword [[RESULT]],
+; GCN: s_endpgm
define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -44,10 +45,10 @@ define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset:
; EG: LDS_ADD_RET *
-; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -55,13 +56,13 @@ define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset:
; EG: LDS_ADD_RET *
-; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
-; CI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
%result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -69,9 +70,9 @@ define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 ad
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32:
; EG: LDS_ADD_RET *
-; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]]
+; GCN: s_endpgm
define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -80,11 +81,11 @@ define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_offset:
; EG: LDS_ADD_RET *
-; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
+; GCN: s_endpgm
define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -92,13 +93,13 @@ define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_bad_si_offset:
; EG: LDS_ADD_RET *
-; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
-; CI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; CIVI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
%result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -106,8 +107,8 @@ define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 ad
; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32:
; EG: LDS_SUB_RET *
-; SI: ds_sub_rtn_u32
-; SI: s_endpgm
+; GCN: ds_sub_rtn_u32
+; GCN: s_endpgm
define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -116,10 +117,10 @@ define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset:
; EG: LDS_SUB_RET *
-; SI: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -127,9 +128,9 @@ define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32:
; EG: LDS_SUB_RET *
-; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; SI: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]]
+; GCN: s_endpgm
define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -138,11 +139,11 @@ define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32_offset:
; EG: LDS_SUB_RET *
-; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; SI: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
+; GCN: s_endpgm
define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -150,8 +151,8 @@ define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32:
; EG: LDS_AND_RET *
-; SI: ds_and_rtn_b32
-; SI: s_endpgm
+; GCN: ds_and_rtn_b32
+; GCN: s_endpgm
define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -160,10 +161,10 @@ define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset:
; EG: LDS_AND_RET *
-; SI: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -171,8 +172,8 @@ define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32:
; EG: LDS_OR_RET *
-; SI: ds_or_rtn_b32
-; SI: s_endpgm
+; GCN: ds_or_rtn_b32
+; GCN: s_endpgm
define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -181,10 +182,10 @@ define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %pt
; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset:
; EG: LDS_OR_RET *
-; SI: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -192,8 +193,8 @@ define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(
; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32:
; EG: LDS_XOR_RET *
-; SI: ds_xor_rtn_b32
-; SI: s_endpgm
+; GCN: ds_xor_rtn_b32
+; GCN: s_endpgm
define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -202,10 +203,10 @@ define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset:
; EG: LDS_XOR_RET *
-; SI: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -221,8 +222,8 @@ define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32:
; EG: LDS_MIN_INT_RET *
-; SI: ds_min_rtn_i32
-; SI: s_endpgm
+; GCN: ds_min_rtn_i32
+; GCN: s_endpgm
define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -231,10 +232,10 @@ define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset:
; EG: LDS_MIN_INT_RET *
-; SI: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -242,8 +243,8 @@ define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32:
; EG: LDS_MAX_INT_RET *
-; SI: ds_max_rtn_i32
-; SI: s_endpgm
+; GCN: ds_max_rtn_i32
+; GCN: s_endpgm
define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -252,10 +253,10 @@ define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %p
; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset:
; EG: LDS_MAX_INT_RET *
-; SI: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -263,8 +264,8 @@ define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32:
; EG: LDS_MIN_UINT_RET *
-; SI: ds_min_rtn_u32
-; SI: s_endpgm
+; GCN: ds_min_rtn_u32
+; GCN: s_endpgm
define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -273,10 +274,10 @@ define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %
; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset:
; EG: LDS_MIN_UINT_RET *
-; SI: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
@@ -284,8 +285,8 @@ define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac
; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32:
; EG: LDS_MAX_UINT_RET *
-; SI: ds_max_rtn_u32
-; SI: s_endpgm
+; GCN: ds_max_rtn_u32
+; GCN: s_endpgm
define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -294,182 +295,182 @@ define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %
; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset:
; EG: LDS_MAX_UINT_RET *
-; SI: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32:
-; SI: s_load_dword [[SPTR:s[0-9]+]],
-; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
-; SI: s_endpgm
+; GCN: s_load_dword [[SPTR:s[0-9]+]],
+; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
+; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]]
+; GCN: s_endpgm
define void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset:
-; SI: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
; XXX - Is it really necessary to load 4 into VGPR?
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32:
-; SI: s_load_dword [[SPTR:s[0-9]+]],
-; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: ds_add_u32 [[VPTR]], [[DATA]] [M0]
-; SI: s_endpgm
+; GCN: s_load_dword [[SPTR:s[0-9]+]],
+; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
+; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; GCN: ds_add_u32 [[VPTR]], [[DATA]]
+; GCN: s_endpgm
define void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset:
-; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset
-; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} [M0]
-; CI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
-; SI: s_endpgm
+; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}}
+; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
%result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32:
-; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; SI: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] [M0]
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; GCN: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]]
+; GCN: s_endpgm
define void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
-; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; SI: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; GCN: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
+; GCN: s_endpgm
define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_bad_si_offset:
; SI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}}
-; CI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; CIVI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
%sub = sub i32 %a, %b
%add = add i32 %sub, 4
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add
%result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32:
-; SI: ds_sub_u32
-; SI: s_endpgm
+; GCN: ds_sub_u32
+; GCN: s_endpgm
define void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset:
-; SI: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32:
-; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; SI: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]]
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; GCN: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]]
+; GCN: s_endpgm
define void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
-; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
-; SI: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; GCN: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
+; GCN: s_endpgm
define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32:
-; SI: ds_and_b32
-; SI: s_endpgm
+; GCN: ds_and_b32
+; GCN: s_endpgm
define void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset:
-; SI: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32:
-; SI: ds_or_b32
-; SI: s_endpgm
+; GCN: ds_or_b32
+; GCN: s_endpgm
define void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset:
-; SI: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32:
-; SI: ds_xor_b32
-; SI: s_endpgm
+; GCN: ds_xor_b32
+; GCN: s_endpgm
define void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset:
-; SI: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
@@ -482,69 +483,69 @@ define void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
; }
; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32:
-; SI: ds_min_i32
-; SI: s_endpgm
+; GCN: ds_min_i32
+; GCN: s_endpgm
define void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset:
-; SI: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32:
-; SI: ds_max_i32
-; SI: s_endpgm
+; GCN: ds_max_i32
+; GCN: s_endpgm
define void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset:
-; SI: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32:
-; SI: ds_min_u32
-; SI: s_endpgm
+; GCN: ds_min_u32
+; GCN: s_endpgm
define void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset:
-; SI: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32:
-; SI: ds_max_u32
-; SI: s_endpgm
+; GCN: ds_max_u32
+; GCN: s_endpgm
define void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset:
-; SI: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
-; SI: s_endpgm
+; GCN: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; GCN: s_endpgm
define void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
ret void
}
diff --git a/test/CodeGen/R600/local-atomics64.ll b/test/CodeGen/R600/local-atomics64.ll
index ce6ddbd66265..0ffa5e751b7d 100644
--- a/test/CodeGen/R600/local-atomics64.ll
+++ b/test/CodeGen/R600/local-atomics64.ll
@@ -1,8 +1,9 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=VI -check-prefix=GCN %s
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64:
-; SI: ds_wrxchg_rtn_b64
-; SI: s_endpgm
+; GCN: ds_wrxchg_rtn_b64
+; GCN: s_endpgm
define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -10,18 +11,18 @@ define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %
}
; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset:
-; SI: ds_wrxchg_rtn_b64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64:
-; SI: ds_add_rtn_u64
-; SI: s_endpgm
+; GCN: ds_add_rtn_u64
+; GCN: s_endpgm
define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -29,26 +30,27 @@ define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64_offset:
+; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
+; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
-; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
-; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
-; SI: buffer_store_dwordx2 [[RESULT]],
-; SI: s_endpgm
+; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32
+; GCN: buffer_store_dwordx2 [[RESULT]],
+; GCN: s_endpgm
define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i64 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64:
-; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
-; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
-; SI: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
-; SI: buffer_store_dwordx2 [[RESULT]],
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
+; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
+; GCN: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; GCN: buffer_store_dwordx2 [[RESULT]],
+; GCN: s_endpgm
define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -56,18 +58,18 @@ define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
-; SI: ds_inc_rtn_u64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_inc_rtn_u64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64:
-; SI: ds_sub_rtn_u64
-; SI: s_endpgm
+; GCN: ds_sub_rtn_u64
+; GCN: s_endpgm
define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -75,21 +77,21 @@ define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64_offset:
-; SI: ds_sub_rtn_u64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_sub_rtn_u64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64:
-; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
-; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
-; SI: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
-; SI: buffer_store_dwordx2 [[RESULT]],
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
+; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
+; GCN: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; GCN: buffer_store_dwordx2 [[RESULT]],
+; GCN: s_endpgm
define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -97,18 +99,18 @@ define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
-; SI: ds_dec_rtn_u64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_dec_rtn_u64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64:
-; SI: ds_and_rtn_b64
-; SI: s_endpgm
+; GCN: ds_and_rtn_b64
+; GCN: s_endpgm
define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -116,18 +118,18 @@ define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64_offset:
-; SI: ds_and_rtn_b64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_and_rtn_b64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64:
-; SI: ds_or_rtn_b64
-; SI: s_endpgm
+; GCN: ds_or_rtn_b64
+; GCN: s_endpgm
define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -135,18 +137,18 @@ define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %pt
}
; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64_offset:
-; SI: ds_or_rtn_b64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_or_rtn_b64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64:
-; SI: ds_xor_rtn_b64
-; SI: s_endpgm
+; GCN: ds_xor_rtn_b64
+; GCN: s_endpgm
define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -154,10 +156,10 @@ define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64_offset:
-; SI: ds_xor_rtn_b64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_xor_rtn_b64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -172,8 +174,8 @@ define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
; }
; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64:
-; SI: ds_min_rtn_i64
-; SI: s_endpgm
+; GCN: ds_min_rtn_i64
+; GCN: s_endpgm
define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -181,18 +183,18 @@ define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64_offset:
-; SI: ds_min_rtn_i64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_min_rtn_i64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64:
-; SI: ds_max_rtn_i64
-; SI: s_endpgm
+; GCN: ds_max_rtn_i64
+; GCN: s_endpgm
define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -200,18 +202,18 @@ define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %p
}
; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64_offset:
-; SI: ds_max_rtn_i64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_max_rtn_i64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64:
-; SI: ds_min_rtn_u64
-; SI: s_endpgm
+; GCN: ds_min_rtn_u64
+; GCN: s_endpgm
define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -219,18 +221,18 @@ define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %
}
; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64_offset:
-; SI: ds_min_rtn_u64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_min_rtn_u64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64:
-; SI: ds_max_rtn_u64
-; SI: s_endpgm
+; GCN: ds_max_rtn_u64
+; GCN: s_endpgm
define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -238,35 +240,35 @@ define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %
}
; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64_offset:
-; SI: ds_max_rtn_u64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_max_rtn_u64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64:
-; SI: ds_wrxchg_rtn_b64
-; SI: s_endpgm
+; GCN: ds_wrxchg_rtn_b64
+; GCN: s_endpgm
define void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset:
-; SI: ds_wrxchg_rtn_b64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64:
-; SI: ds_add_u64
-; SI: s_endpgm
+; GCN: ds_add_u64
+; GCN: s_endpgm
define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
@@ -274,119 +276,120 @@ define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {
; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64_offset:
; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
-; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
-; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
-; SI: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
-; SI: s_endpgm
+; VI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
+; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
+; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
+; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; GCN: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i64 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i64 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64:
-; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
-; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
-; SI: ds_inc_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
+; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
+; GCN: ds_inc_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; GCN: s_endpgm
define void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64_offset:
-; SI: ds_inc_u64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_inc_u64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64:
-; SI: ds_sub_u64
-; SI: s_endpgm
+; GCN: ds_sub_u64
+; GCN: s_endpgm
define void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64_offset:
-; SI: ds_sub_u64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_sub_u64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64:
-; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
-; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
-; SI: ds_dec_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
+; GCN: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
+; GCN: ds_dec_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; GCN: s_endpgm
define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64_offset:
-; SI: ds_dec_u64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_dec_u64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64:
-; SI: ds_and_b64
-; SI: s_endpgm
+; GCN: ds_and_b64
+; GCN: s_endpgm
define void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64_offset:
-; SI: ds_and_b64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_and_b64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64:
-; SI: ds_or_b64
-; SI: s_endpgm
+; GCN: ds_or_b64
+; GCN: s_endpgm
define void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64_offset:
-; SI: ds_or_b64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_or_b64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64:
-; SI: ds_xor_b64
-; SI: s_endpgm
+; GCN: ds_xor_b64
+; GCN: s_endpgm
define void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64_offset:
-; SI: ds_xor_b64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_xor_b64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
}
@@ -399,69 +402,69 @@ define void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
; }
; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64:
-; SI: ds_min_i64
-; SI: s_endpgm
+; GCN: ds_min_i64
+; GCN: s_endpgm
define void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64_offset:
-; SI: ds_min_i64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_min_i64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64:
-; SI: ds_max_i64
-; SI: s_endpgm
+; GCN: ds_max_i64
+; GCN: s_endpgm
define void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64_offset:
-; SI: ds_max_i64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_max_i64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64:
-; SI: ds_min_u64
-; SI: s_endpgm
+; GCN: ds_min_u64
+; GCN: s_endpgm
define void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64_offset:
-; SI: ds_min_u64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_min_u64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64:
-; SI: ds_max_u64
-; SI: s_endpgm
+; GCN: ds_max_u64
+; GCN: s_endpgm
define void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind {
%result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
ret void
}
; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64_offset:
-; SI: ds_max_u64 {{.*}} offset:32
-; SI: s_endpgm
+; GCN: ds_max_u64 {{.*}} offset:32
+; GCN: s_endpgm
define void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
- %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+ %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
%result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
ret void
}
diff --git a/test/CodeGen/R600/local-memory-two-objects.ll b/test/CodeGen/R600/local-memory-two-objects.ll
index 60f1a0a4963a..06a8b1246e63 100644
--- a/test/CodeGen/R600/local-memory-two-objects.ll
+++ b/test/CodeGen/R600/local-memory-two-objects.ll
@@ -5,7 +5,6 @@
@local_memory_two_objects.local_mem0 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4
@local_memory_two_objects.local_mem1 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4
-; EG: {{^}}local_memory_two_objects:
; Check that the LDS size emitted correctly
; EG: .long 166120
@@ -13,6 +12,8 @@
; GCN: .long 47180
; GCN-NEXT: .long 38792
+; EG: {{^}}local_memory_two_objects:
+
; We would like to check the the lds writes are using different
; addresses, but due to variations in the scheduler, we can't do
; this consistently on evergreen GPUs.
@@ -30,28 +31,28 @@
; EG: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}}
-; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]] [M0]
-; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16 [M0]
-; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]] [M0]
+; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]]
+; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16
+; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]]
define void @local_memory_two_objects(i32 addrspace(1)* %out) {
entry:
%x.i = call i32 @llvm.r600.read.tidig.x() #0
- %arrayidx = getelementptr inbounds [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %x.i
+ %arrayidx = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %x.i
store i32 %x.i, i32 addrspace(3)* %arrayidx, align 4
%mul = shl nsw i32 %x.i, 1
- %arrayidx1 = getelementptr inbounds [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %x.i
+ %arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %x.i
store i32 %mul, i32 addrspace(3)* %arrayidx1, align 4
%sub = sub nsw i32 3, %x.i
call void @llvm.AMDGPU.barrier.local()
- %arrayidx2 = getelementptr inbounds [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %sub
- %0 = load i32 addrspace(3)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds i32 addrspace(1)* %out, i32 %x.i
+ %arrayidx2 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %sub
+ %0 = load i32, i32 addrspace(3)* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %x.i
store i32 %0, i32 addrspace(1)* %arrayidx3, align 4
- %arrayidx4 = getelementptr inbounds [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %sub
- %1 = load i32 addrspace(3)* %arrayidx4, align 4
+ %arrayidx4 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %sub
+ %1 = load i32, i32 addrspace(3)* %arrayidx4, align 4
%add = add nsw i32 %x.i, 4
- %arrayidx5 = getelementptr inbounds i32 addrspace(1)* %out, i32 %add
+ %arrayidx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %add
store i32 %1, i32 addrspace(1)* %arrayidx5, align 4
ret void
}
diff --git a/test/CodeGen/R600/local-memory.ll b/test/CodeGen/R600/local-memory.ll
index 68e72c556f66..9494ed75bd0c 100644
--- a/test/CodeGen/R600/local-memory.ll
+++ b/test/CodeGen/R600/local-memory.ll
@@ -4,7 +4,6 @@
@local_memory.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4
-; FUNC-LABEL: {{^}}local_memory:
; Check that the LDS size emitted correctly
; EG: .long 166120
@@ -14,6 +13,8 @@
; CI: .long 47180
; CI-NEXT: .long 38792
+; FUNC-LABEL: {{^}}local_memory:
+
; EG: LDS_WRITE
; SI-NOT: s_wqm_b64
; SI: ds_write_b32
@@ -29,15 +30,15 @@
define void @local_memory(i32 addrspace(1)* %out) {
entry:
%y.i = call i32 @llvm.r600.read.tidig.x() #0
- %arrayidx = getelementptr inbounds [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %y.i
+ %arrayidx = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %y.i
store i32 %y.i, i32 addrspace(3)* %arrayidx, align 4
%add = add nsw i32 %y.i, 1
%cmp = icmp eq i32 %add, 16
%.add = select i1 %cmp, i32 0, i32 %add
call void @llvm.AMDGPU.barrier.local()
- %arrayidx1 = getelementptr inbounds [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %.add
- %0 = load i32 addrspace(3)* %arrayidx1, align 4
- %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %out, i32 %y.i
+ %arrayidx1 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %.add
+ %0 = load i32, i32 addrspace(3)* %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %y.i
store i32 %0, i32 addrspace(1)* %arrayidx2, align 4
ret void
}
diff --git a/test/CodeGen/R600/loop-address.ll b/test/CodeGen/R600/loop-address.ll
index 03e0f011fffc..7fadb8dba7b8 100644
--- a/test/CodeGen/R600/loop-address.ll
+++ b/test/CodeGen/R600/loop-address.ll
@@ -17,7 +17,7 @@ for.body: ; preds = %for.body, %entry
%i.07.in = phi i32 [ %i.07, %for.body ], [ %iterations, %entry ]
%ai.06 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%i.07 = add nsw i32 %i.07.in, -1
- %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %ai.06
+ %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %ai.06
store i32 %i.07, i32 addrspace(1)* %arrayidx, align 4
%add = add nsw i32 %ai.06, 1
%exitcond = icmp eq i32 %add, %iterations
diff --git a/test/CodeGen/R600/loop-idiom.ll b/test/CodeGen/R600/loop-idiom.ll
index a0b00ab644b4..810b34fed865 100644
--- a/test/CodeGen/R600/loop-idiom.ll
+++ b/test/CodeGen/R600/loop-idiom.ll
@@ -20,9 +20,9 @@ entry:
for.body:
%0 = phi i32 [0, %entry], [%4, %for.body]
- %1 = getelementptr i8 addrspace(3)* %in, i32 %0
- %2 = getelementptr i8* %dest, i32 %0
- %3 = load i8 addrspace(3)* %1
+ %1 = getelementptr i8, i8 addrspace(3)* %in, i32 %0
+ %2 = getelementptr i8, i8* %dest, i32 %0
+ %3 = load i8, i8 addrspace(3)* %1
store i8 %3, i8* %2
%4 = add i32 %0, 1
%5 = icmp eq i32 %4, %size
@@ -44,7 +44,7 @@ entry:
for.body:
%0 = phi i32 [0, %entry], [%2, %for.body]
- %1 = getelementptr i8* %dest, i32 %0
+ %1 = getelementptr i8, i8* %dest, i32 %0
store i8 0, i8* %1
%2 = add i32 %0, 1
%3 = icmp eq i32 %2, %size
diff --git a/test/CodeGen/R600/m0-spill.ll b/test/CodeGen/R600/m0-spill.ll
index 4dade82325ce..1dddc85f775d 100644
--- a/test/CodeGen/R600/m0-spill.ll
+++ b/test/CodeGen/R600/m0-spill.ll
@@ -12,8 +12,8 @@ main_body:
br i1 %cmp, label %if, label %else
if:
- %lds_ptr = getelementptr [64 x float] addrspace(3)* @lds, i32 0, i32 0
- %lds_data = load float addrspace(3)* %lds_ptr
+ %lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
+ %lds_data = load float, float addrspace(3)* %lds_ptr
br label %endif
else:
diff --git a/test/CodeGen/R600/mad-combine.ll b/test/CodeGen/R600/mad-combine.ll
new file mode 100644
index 000000000000..bc071628ead0
--- /dev/null
+++ b/test/CodeGen/R600/mad-combine.ll
@@ -0,0 +1,567 @@
+; Make sure we still form mad even when unsafe math or fp-contract is allowed instead of fma.
+
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-STD -check-prefix=FUNC %s
+
+; Make sure we don't form mad with denormals
+; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() #0
+declare float @llvm.fabs.f32(float) #0
+declare float @llvm.fma.f32(float, float, float) #0
+declare float @llvm.fmuladd.f32(float, float, float) #0
+
+; (fadd (fmul x, y), z) -> (fma x, y, z)
+; FUNC-LABEL: {{^}}combine_to_mad_f32_0:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+
+; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
+
+; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
+
+; SI-DENORM-SLOWFMAF-NOT: v_fma
+; SI-DENORM-SLOWFMAF-NOT: v_mad
+
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
+; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
+
+; SI: buffer_store_dword [[RESULT]]
+define void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
+
+ %mul = fmul float %a, %b
+ %fma = fadd float %mul, %c
+ store float %fma, float addrspace(1)* %gep.out
+ ret void
+}
+
+; (fadd (fmul x, y), z) -> (fma x, y, z)
+; FUNC-LABEL: {{^}}combine_to_mad_f32_0_2use:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
+
+; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]]
+; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]]
+
+; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], [[C]]
+; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], [[D]]
+
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
+; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
+; SI-DENORM-SLOWFMAF-DAG: v_add_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
+
+; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI: s_endpgm
+define void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
+ %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
+
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
+ %d = load float, float addrspace(1)* %gep.3
+
+ %mul = fmul float %a, %b
+ %fma0 = fadd float %mul, %c
+ %fma1 = fadd float %mul, %d
+
+ store float %fma0, float addrspace(1)* %gep.out.0
+ store float %fma1, float addrspace(1)* %gep.out.1
+ ret void
+}
+
+; (fadd x, (fmul y, z)) -> (fma y, z, x)
+; FUNC-LABEL: {{^}}combine_to_mad_f32_1:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+
+; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
+; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]]
+
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
+; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
+
+; SI: buffer_store_dword [[RESULT]]
+define void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
+
+ %mul = fmul float %a, %b
+ %fma = fadd float %c, %mul
+ store float %fma, float addrspace(1)* %gep.out
+ ret void
+}
+
+; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+; FUNC-LABEL: {{^}}combine_to_mad_fsub_0_f32:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+
+; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]]
+; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], -[[C]]
+
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
+; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP]]
+
+; SI: buffer_store_dword [[RESULT]]
+define void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
+
+ %mul = fmul float %a, %b
+ %fma = fsub float %mul, %c
+ store float %fma, float addrspace(1)* %gep.out
+ ret void
+}
+
+; (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
+; FUNC-LABEL: {{^}}combine_to_mad_fsub_0_f32_2use:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
+
+; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]]
+; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
+
+; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[A]], [[B]], -[[C]]
+; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
+
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
+; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[C]], [[TMP]]
+; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
+
+; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI: s_endpgm
+define void @combine_to_mad_fsub_0_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
+ %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
+
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
+ %d = load float, float addrspace(1)* %gep.3
+
+ %mul = fmul float %a, %b
+ %fma0 = fsub float %mul, %c
+ %fma1 = fsub float %mul, %d
+ store float %fma0, float addrspace(1)* %gep.out.0
+ store float %fma1, float addrspace(1)* %gep.out.1
+ ret void
+}
+
+; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+; FUNC-LABEL: {{^}}combine_to_mad_fsub_1_f32:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+
+; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]]
+; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], [[C]]
+
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
+; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
+
+; SI: buffer_store_dword [[RESULT]]
+define void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
+
+ %mul = fmul float %a, %b
+ %fma = fsub float %c, %mul
+ store float %fma, float addrspace(1)* %gep.out
+ ret void
+}
+
+; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
+; FUNC-LABEL: {{^}}combine_to_mad_fsub_1_f32_2use:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+
+; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]]
+; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]]
+
+; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], [[C]]
+; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], [[D]]
+
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
+; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT0:v[0-9]+]], [[TMP]], [[C]]
+; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[TMP]], [[D]]
+
+; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI: s_endpgm
+define void @combine_to_mad_fsub_1_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
+ %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
+
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
+ %d = load float, float addrspace(1)* %gep.3
+
+ %mul = fmul float %a, %b
+ %fma0 = fsub float %c, %mul
+ %fma1 = fsub float %d, %mul
+ store float %fma0, float addrspace(1)* %gep.out.0
+ store float %fma1, float addrspace(1)* %gep.out.1
+ ret void
+}
+
+; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
+; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+
+; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], -[[C]]
+
+; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[A]], [[B]], -[[C]]
+
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
+; SI-DENORM-SLOWFMAF: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[TMP]], [[C]]
+
+; SI: buffer_store_dword [[RESULT]]
+define void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
+
+ %mul = fmul float %a, %b
+ %mul.neg = fsub float -0.0, %mul
+ %fma = fsub float %mul.neg, %c
+
+ store float %fma, float addrspace(1)* %gep.out
+ ret void
+}
+
+; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
+; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32_2uses_neg:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+
+; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
+; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], -[[D]]
+
+; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
+; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], -[[A]], [[B]], -[[D]]
+
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
+; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT0:v[0-9]+]], -[[TMP]], [[C]]
+; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT1:v[0-9]+]], -[[TMP]], [[D]]
+
+; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI: s_endpgm
+define void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
+ %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
+
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
+ %d = load float, float addrspace(1)* %gep.3
+
+ %mul = fmul float %a, %b
+ %mul.neg = fsub float -0.0, %mul
+ %fma0 = fsub float %mul.neg, %c
+ %fma1 = fsub float %mul.neg, %d
+
+ store float %fma0, float addrspace(1)* %gep.out.0
+ store float %fma1, float addrspace(1)* %gep.out.1
+ ret void
+}
+
+; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
+; FUNC-LABEL: {{^}}combine_to_mad_fsub_2_f32_2uses_mul:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+
+; SI-STD-DAG: v_mad_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
+; SI-STD-DAG: v_mad_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
+
+; SI-DENORM-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], -[[A]], [[B]], -[[C]]
+; SI-DENORM-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[A]], [[B]], -[[D]]
+
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP:v[0-9]+]], [[B]], [[A]]
+; SI-DENORM-SLOWFMAF-DAG: v_sub_f32_e64 [[RESULT0:v[0-9]+]], -[[TMP]], [[C]]
+; SI-DENORM-SLOWFMAF-DAG: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[D]], [[TMP]]
+
+; SI-DAG: buffer_store_dword [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI: s_endpgm
+define void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
+ %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
+
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
+ %d = load float, float addrspace(1)* %gep.3
+
+ %mul = fmul float %a, %b
+ %mul.neg = fsub float -0.0, %mul
+ %fma0 = fsub float %mul.neg, %c
+ %fma1 = fsub float %mul, %d
+
+ store float %fma0, float addrspace(1)* %gep.out.0
+ store float %fma1, float addrspace(1)* %gep.out.1
+ ret void
+}
+
+; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z)))
+
+; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_0_f32:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
+; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
+
+; SI-STD: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
+; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
+; SI-STD: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP1]]
+
+; SI-DENORM: v_fma_f32 [[TMP0:v[0-9]+]], [[D]], [[E]], -[[C]]
+; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP0]]
+
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
+; SI-DENORM-SLOWFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[A]], [[B]], [[TMP0]]
+; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT1:v[0-9]+]], [[C]], [[TMP1]]
+
+; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+define void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
+ %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %x = load float, float addrspace(1)* %gep.0
+ %y = load float, float addrspace(1)* %gep.1
+ %z = load float, float addrspace(1)* %gep.2
+ %u = load float, float addrspace(1)* %gep.3
+ %v = load float, float addrspace(1)* %gep.4
+
+ %tmp0 = fmul float %u, %v
+ %tmp1 = call float @llvm.fma.f32(float %x, float %y, float %tmp0) #0
+ %tmp2 = fsub float %tmp1, %z
+
+ store float %tmp2, float addrspace(1)* %gep.out
+ ret void
+}
+
+; fold (fsub x, (fma y, z, (fmul u, v)))
+; -> (fma (fneg y), z, (fma (fneg u), v, x))
+
+; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_1_f32:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
+; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
+
+; SI-STD: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
+; SI-STD: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
+; SI-STD: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]]
+
+; SI-DENORM: v_fma_f32 [[TMP0:v[0-9]+]], -[[D]], [[E]], [[A]]
+; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP0]]
+
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
+; SI-DENORM-SLOWFMAF: v_fma_f32 [[TMP1:v[0-9]+]], [[B]], [[C]], [[TMP0]]
+; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP1]], [[A]]
+
+; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: s_endpgm
+define void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
+ %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %x = load float, float addrspace(1)* %gep.0
+ %y = load float, float addrspace(1)* %gep.1
+ %z = load float, float addrspace(1)* %gep.2
+ %u = load float, float addrspace(1)* %gep.3
+ %v = load float, float addrspace(1)* %gep.4
+
+ %tmp0 = fmul float %u, %v
+ %tmp1 = call float @llvm.fma.f32(float %y, float %z, float %tmp0) #0
+ %tmp2 = fsub float %x, %tmp1
+
+ store float %tmp2, float addrspace(1)* %gep.out
+ ret void
+}
+
+; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z)))
+
+; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_2_f32:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
+; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
+
+; SI-STD: v_mad_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]]
+; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP]]
+
+; SI-DENORM: v_fma_f32 [[TMP:v[0-9]+]], [[D]], [[E]], -[[C]]
+; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[TMP]]
+
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[B]], [[A]]
+; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP0]], [[TMP1]]
+; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[C]], [[TMP2]]
+
+; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: s_endpgm
+define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
+ %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %x = load float, float addrspace(1)* %gep.0
+ %y = load float, float addrspace(1)* %gep.1
+ %z = load float, float addrspace(1)* %gep.2
+ %u = load float, float addrspace(1)* %gep.3
+ %v = load float, float addrspace(1)* %gep.4
+
+ %tmp0 = fmul float %u, %v
+ %tmp1 = call float @llvm.fmuladd.f32(float %x, float %y, float %tmp0) #0
+ %tmp2 = fsub float %tmp1, %z
+
+ store float %tmp2, float addrspace(1)* %gep.out
+ ret void
+}
+
+; fold (fsub x, (fmuladd y, z, (fmul u, v)))
+; -> (fmuladd (fneg y), z, (fmuladd (fneg u), v, x))
+
+; FUNC-LABEL: {{^}}aggressive_combine_to_mad_fsub_3_f32:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
+; SI-DAG: buffer_load_dword [[C:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
+; SI-DAG: buffer_load_dword [[D:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}}
+; SI-DAG: buffer_load_dword [[E:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
+
+; SI-STD: v_mad_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]]
+; SI-STD: v_mad_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]]
+
+; SI-DENORM: v_fma_f32 [[TMP:v[0-9]+]], -[[D]], [[E]], [[A]]
+; SI-DENORM: v_fma_f32 [[RESULT:v[0-9]+]], -[[B]], [[C]], [[TMP]]
+
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[E]], [[D]]
+; SI-DENORM-SLOWFMAF: v_mul_f32_e32 [[TMP1:v[0-9]+]], [[C]], [[B]]
+; SI-DENORM-SLOWFMAF: v_add_f32_e32 [[TMP2:v[0-9]+]], [[TMP0]], [[TMP1]]
+; SI-DENORM-SLOWFMAF: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP2]], [[A]]
+
+; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: s_endpgm
+define void @aggressive_combine_to_mad_fsub_3_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
+ %gep.3 = getelementptr float, float addrspace(1)* %gep.0, i32 3
+ %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %x = load float, float addrspace(1)* %gep.0
+ %y = load float, float addrspace(1)* %gep.1
+ %z = load float, float addrspace(1)* %gep.2
+ %u = load float, float addrspace(1)* %gep.3
+ %v = load float, float addrspace(1)* %gep.4
+
+ %tmp0 = fmul float %u, %v
+ %tmp1 = call float @llvm.fmuladd.f32(float %y, float %z, float %tmp0) #0
+ %tmp2 = fsub float %x, %tmp1
+
+ store float %tmp2, float addrspace(1)* %gep.out
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/R600/mad-sub.ll b/test/CodeGen/R600/mad-sub.ll
index 7b4020d2973c..aa4194ff6106 100644
--- a/test/CodeGen/R600/mad-sub.ll
+++ b/test/CodeGen/R600/mad-sub.ll
@@ -12,15 +12,15 @@ declare float @llvm.fabs.f32(float) #0
define void @mad_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+ %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
%add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+ %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
%add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
- %a = load float addrspace(1)* %gep0, align 4
- %b = load float addrspace(1)* %gep1, align 4
- %c = load float addrspace(1)* %gep2, align 4
+ %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
+ %a = load float, float addrspace(1)* %gep0, align 4
+ %b = load float, float addrspace(1)* %gep1, align 4
+ %c = load float, float addrspace(1)* %gep2, align 4
%mul = fmul float %a, %b
%sub = fsub float %mul, %c
store float %sub, float addrspace(1)* %outgep, align 4
@@ -36,15 +36,15 @@ define void @mad_sub_f32(float addrspace(1)* noalias nocapture %out, float addrs
define void @mad_sub_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+ %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
%add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+ %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
%add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
- %a = load float addrspace(1)* %gep0, align 4
- %b = load float addrspace(1)* %gep1, align 4
- %c = load float addrspace(1)* %gep2, align 4
+ %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
+ %a = load float, float addrspace(1)* %gep0, align 4
+ %b = load float, float addrspace(1)* %gep1, align 4
+ %c = load float, float addrspace(1)* %gep2, align 4
%mul = fmul float %a, %b
%sub = fsub float %c, %mul
store float %sub, float addrspace(1)* %outgep, align 4
@@ -57,15 +57,15 @@ define void @mad_sub_inv_f32(float addrspace(1)* noalias nocapture %out, float a
define void @mad_sub_f64(double addrspace(1)* noalias nocapture %out, double addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr double addrspace(1)* %ptr, i64 %tid.ext
+ %gep0 = getelementptr double, double addrspace(1)* %ptr, i64 %tid.ext
%add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr double addrspace(1)* %ptr, i64 %add1
+ %gep1 = getelementptr double, double addrspace(1)* %ptr, i64 %add1
%add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr double addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr double addrspace(1)* %out, i64 %tid.ext
- %a = load double addrspace(1)* %gep0, align 8
- %b = load double addrspace(1)* %gep1, align 8
- %c = load double addrspace(1)* %gep2, align 8
+ %gep2 = getelementptr double, double addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr double, double addrspace(1)* %out, i64 %tid.ext
+ %a = load double, double addrspace(1)* %gep0, align 8
+ %b = load double, double addrspace(1)* %gep1, align 8
+ %c = load double, double addrspace(1)* %gep2, align 8
%mul = fmul double %a, %b
%sub = fsub double %mul, %c
store double %sub, double addrspace(1)* %outgep, align 8
@@ -81,15 +81,15 @@ define void @mad_sub_f64(double addrspace(1)* noalias nocapture %out, double add
define void @mad_sub_fabs_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+ %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
%add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+ %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
%add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
- %a = load float addrspace(1)* %gep0, align 4
- %b = load float addrspace(1)* %gep1, align 4
- %c = load float addrspace(1)* %gep2, align 4
+ %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
+ %a = load float, float addrspace(1)* %gep0, align 4
+ %b = load float, float addrspace(1)* %gep1, align 4
+ %c = load float, float addrspace(1)* %gep2, align 4
%c.abs = call float @llvm.fabs.f32(float %c) #0
%mul = fmul float %a, %b
%sub = fsub float %mul, %c.abs
@@ -106,15 +106,15 @@ define void @mad_sub_fabs_f32(float addrspace(1)* noalias nocapture %out, float
define void @mad_sub_fabs_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+ %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
%add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+ %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
%add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
- %a = load float addrspace(1)* %gep0, align 4
- %b = load float addrspace(1)* %gep1, align 4
- %c = load float addrspace(1)* %gep2, align 4
+ %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
+ %a = load float, float addrspace(1)* %gep0, align 4
+ %b = load float, float addrspace(1)* %gep1, align 4
+ %c = load float, float addrspace(1)* %gep2, align 4
%c.abs = call float @llvm.fabs.f32(float %c) #0
%mul = fmul float %a, %b
%sub = fsub float %c.abs, %mul
@@ -127,15 +127,15 @@ define void @mad_sub_fabs_inv_f32(float addrspace(1)* noalias nocapture %out, fl
define void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+ %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
%add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+ %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
%add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
- %a = load float addrspace(1)* %gep0, align 4
- %b = load float addrspace(1)* %gep1, align 4
- %c = load float addrspace(1)* %gep2, align 4
+ %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
+ %a = load float, float addrspace(1)* %gep0, align 4
+ %b = load float, float addrspace(1)* %gep1, align 4
+ %c = load float, float addrspace(1)* %gep2, align 4
%nega = fsub float -0.000000e+00, %a
%negb = fsub float -0.000000e+00, %b
%mul = fmul float %nega, %negb
@@ -153,15 +153,15 @@ define void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture %out, float a
define void @mad_fabs_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
%tid = tail call i32 @llvm.r600.read.tidig.x() #0
%tid.ext = sext i32 %tid to i64
- %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+ %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
%add1 = add i64 %tid.ext, 1
- %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+ %gep1 = getelementptr float, float addrspace(1)* %ptr, i64 %add1
%add2 = add i64 %tid.ext, 2
- %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
- %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
- %a = load float addrspace(1)* %gep0, align 4
- %b = load float addrspace(1)* %gep1, align 4
- %c = load float addrspace(1)* %gep2, align 4
+ %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
+ %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
+ %a = load float, float addrspace(1)* %gep0, align 4
+ %b = load float, float addrspace(1)* %gep1, align 4
+ %c = load float, float addrspace(1)* %gep2, align 4
%b.abs = call float @llvm.fabs.f32(float %b) #0
%mul = fmul float %a, %b.abs
%sub = fsub float %mul, %c
@@ -176,12 +176,12 @@ define void @mad_fabs_sub_f32(float addrspace(1)* noalias nocapture %out, float
; SI: buffer_store_dword [[RESULT]]
define void @fsub_c_fadd_a_a(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r1 = load float addrspace(1)* %gep.0
- %r2 = load float addrspace(1)* %gep.1
+ %r1 = load float, float addrspace(1)* %gep.0
+ %r2 = load float, float addrspace(1)* %gep.1
%add = fadd float %r1, %r1
%r3 = fsub float %r2, %add
@@ -197,12 +197,12 @@ define void @fsub_c_fadd_a_a(float addrspace(1)* %out, float addrspace(1)* %in)
; SI: buffer_store_dword [[RESULT]]
define void @fsub_fadd_a_a_c(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
- %r1 = load float addrspace(1)* %gep.0
- %r2 = load float addrspace(1)* %gep.1
+ %r1 = load float, float addrspace(1)* %gep.0
+ %r2 = load float, float addrspace(1)* %gep.1
%add = fadd float %r1, %r1
%r3 = fsub float %add, %r2
diff --git a/test/CodeGen/R600/madak.ll b/test/CodeGen/R600/madak.ll
new file mode 100644
index 000000000000..933bb016d2c9
--- /dev/null
+++ b/test/CodeGen/R600/madak.ll
@@ -0,0 +1,193 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; XUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
+
+; FIXME: Enable VI
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
+
+; GCN-LABEL: {{^}}madak_f32:
+; GCN: buffer_load_dword [[VA:v[0-9]+]]
+; GCN: buffer_load_dword [[VB:v[0-9]+]]
+; GCN: v_madak_f32_e32 {{v[0-9]+}}, [[VB]], [[VA]], 0x41200000
+define void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
+ %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %a = load float, float addrspace(1)* %in.a.gep, align 4
+ %b = load float, float addrspace(1)* %in.b.gep, align 4
+
+ %mul = fmul float %a, %b
+ %madak = fadd float %mul, 10.0
+ store float %madak, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; Make sure this is only folded with one use. This is a code size
+; optimization and if we fold the immediate multiple times, we'll undo
+; it.
+
+; GCN-LABEL: {{^}}madak_2_use_f32:
+; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
+; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
+; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], [[VK]]
+; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VC]], [[VK]]
+; GCN: s_endpgm
+define void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+ %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
+ %in.gep.2 = getelementptr float, float addrspace(1)* %in.gep.0, i32 2
+
+ %out.gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %out.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
+
+ %a = load float, float addrspace(1)* %in.gep.0, align 4
+ %b = load float, float addrspace(1)* %in.gep.1, align 4
+ %c = load float, float addrspace(1)* %in.gep.2, align 4
+
+ %mul0 = fmul float %a, %b
+ %mul1 = fmul float %a, %c
+ %madak0 = fadd float %mul0, 10.0
+ %madak1 = fadd float %mul1, 10.0
+
+ store float %madak0, float addrspace(1)* %out.gep.0, align 4
+ store float %madak1, float addrspace(1)* %out.gep.1, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}madak_m_inline_imm_f32:
+; GCN: buffer_load_dword [[VA:v[0-9]+]]
+; GCN: v_madak_f32_e32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
+define void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %a = load float, float addrspace(1)* %in.a.gep, align 4
+
+ %mul = fmul float 4.0, %a
+ %madak = fadd float %mul, 10.0
+ store float %madak, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; Make sure nothing weird happens with a value that is also allowed as
+; an inline immediate.
+
+; GCN-LABEL: {{^}}madak_inline_imm_f32:
+; GCN: buffer_load_dword [[VA:v[0-9]+]]
+; GCN: buffer_load_dword [[VB:v[0-9]+]]
+; GCN: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
+define void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
+ %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %a = load float, float addrspace(1)* %in.a.gep, align 4
+ %b = load float, float addrspace(1)* %in.b.gep, align 4
+
+ %mul = fmul float %a, %b
+ %madak = fadd float %mul, 4.0
+ store float %madak, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; We can't use an SGPR when forming madak
+; GCN-LABEL: {{^}}s_v_madak_f32:
+; GCN: s_load_dword [[SB:s[0-9]+]]
+; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
+; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]]
+; GCN-NOT: v_madak_f32
+; GCN: v_mad_f32 {{v[0-9]+}}, [[SB]], [[VA]], [[VK]]
+define void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) nounwind {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %a = load float, float addrspace(1)* %in.a.gep, align 4
+
+ %mul = fmul float %a, %b
+ %madak = fadd float %mul, 10.0
+ store float %madak, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; GCN-LABEL: @v_s_madak_f32
+; GCN-DAG: s_load_dword [[SB:s[0-9]+]]
+; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
+; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]]
+; GCN-NOT: v_madak_f32
+; GCN: v_mad_f32 {{v[0-9]+}}, [[VA]], [[SB]], [[VK]]
+define void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) nounwind {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %b = load float, float addrspace(1)* %in.b.gep, align 4
+
+ %mul = fmul float %a, %b
+ %madak = fadd float %mul, 10.0
+ store float %madak, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}s_s_madak_f32:
+; GCN-NOT: v_madak_f32
+; GCN: v_mad_f32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+define void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
+ %mul = fmul float %a, %b
+ %madak = fadd float %mul, 10.0
+ store float %madak, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}no_madak_src0_modifier_f32:
+; GCN: buffer_load_dword [[VA:v[0-9]+]]
+; GCN: buffer_load_dword [[VB:v[0-9]+]]
+; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}}
+; GCN: s_endpgm
+define void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
+ %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %a = load float, float addrspace(1)* %in.a.gep, align 4
+ %b = load float, float addrspace(1)* %in.b.gep, align 4
+
+ %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
+
+ %mul = fmul float %a.fabs, %b
+ %madak = fadd float %mul, 10.0
+ store float %madak, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}no_madak_src1_modifier_f32:
+; GCN: buffer_load_dword [[VA:v[0-9]+]]
+; GCN: buffer_load_dword [[VB:v[0-9]+]]
+; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}}
+; GCN: s_endpgm
+define void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
+ %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %a = load float, float addrspace(1)* %in.a.gep, align 4
+ %b = load float, float addrspace(1)* %in.b.gep, align 4
+
+ %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
+
+ %mul = fmul float %a, %b.fabs
+ %madak = fadd float %mul, 10.0
+ store float %madak, float addrspace(1)* %out.gep, align 4
+ ret void
+}
diff --git a/test/CodeGen/R600/madmk.ll b/test/CodeGen/R600/madmk.ll
new file mode 100644
index 000000000000..ba7bb221a99a
--- /dev/null
+++ b/test/CodeGen/R600/madmk.ll
@@ -0,0 +1,205 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; XUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
+
+; GCN-LABEL: {{^}}madmk_f32:
+; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; GCN: v_madmk_f32_e32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
+define void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
+
+ %mul = fmul float %a, 10.0
+ %madmk = fadd float %mul, %b
+ store float %madmk, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}madmk_2_use_f32:
+; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; GCN-DAG: buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
+; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
+; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VK]], [[VB]]
+; GCN-DAG: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VK]], [[VC]]
+; GCN: s_endpgm
+define void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+ %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
+ %in.gep.2 = getelementptr float, float addrspace(1)* %in.gep.0, i32 2
+
+ %out.gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %out.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
+
+ %a = load float, float addrspace(1)* %in.gep.0, align 4
+ %b = load float, float addrspace(1)* %in.gep.1, align 4
+ %c = load float, float addrspace(1)* %in.gep.2, align 4
+
+ %mul0 = fmul float %a, 10.0
+ %mul1 = fmul float %a, 10.0
+ %madmk0 = fadd float %mul0, %b
+ %madmk1 = fadd float %mul1, %c
+
+ store float %madmk0, float addrspace(1)* %out.gep.0, align 4
+ store float %madmk1, float addrspace(1)* %out.gep.1, align 4
+ ret void
+}
+
+; We don't get any benefit if the constant is an inline immediate.
+; GCN-LABEL: {{^}}madmk_inline_imm_f32:
+; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; GCN: v_mad_f32 {{v[0-9]+}}, 4.0, [[VA]], [[VB]]
+define void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
+
+ %mul = fmul float %a, 4.0
+ %madmk = fadd float %mul, %b
+ store float %madmk, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}s_s_madmk_f32:
+; GCN-NOT: v_madmk_f32
+; GCN: v_mad_f32
+; GCN: s_endpgm
+define void @s_s_madmk_f32(float addrspace(1)* noalias %out, float %a, float %b) nounwind {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %mul = fmul float %a, 10.0
+ %madmk = fadd float %mul, %b
+ store float %madmk, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_s_madmk_f32:
+; GCN-NOT: v_madmk_f32
+; GCN: v_mad_f32
+; GCN: s_endpgm
+define void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %b) nounwind {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %a = load float, float addrspace(1)* %gep.0, align 4
+
+ %mul = fmul float %a, 10.0
+ %madmk = fadd float %mul, %b
+ store float %madmk, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}scalar_vector_madmk_f32:
+; GCN-NOT: v_madmk_f32
+; GCN: v_mad_f32
+; GCN: s_endpgm
+define void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %a) nounwind {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %b = load float, float addrspace(1)* %gep.0, align 4
+
+ %mul = fmul float %a, 10.0
+ %madmk = fadd float %mul, %b
+ store float %madmk, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}no_madmk_src0_modifier_f32:
+; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}}
+define void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
+
+ %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
+
+ %mul = fmul float %a.fabs, 10.0
+ %madmk = fadd float %mul, %b
+ store float %madmk, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}no_madmk_src2_modifier_f32:
+; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, |{{[sv][0-9]+}}|
+define void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %a = load float, float addrspace(1)* %gep.0, align 4
+ %b = load float, float addrspace(1)* %gep.1, align 4
+
+ %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
+
+ %mul = fmul float %a, 10.0
+ %madmk = fadd float %mul, %b.fabs
+ store float %madmk, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}madmk_add_inline_imm_f32:
+; GCN: buffer_load_dword [[A:v[0-9]+]]
+; GCN: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
+; GCN: v_mad_f32 {{v[0-9]+}}, [[VK]], [[A]], 2.0
+define void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
+ %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+
+ %a = load float, float addrspace(1)* %gep.0, align 4
+
+ %mul = fmul float %a, 10.0
+ %madmk = fadd float %mul, 2.0
+ store float %madmk, float addrspace(1)* %out.gep, align 4
+ ret void
+}
+
+; SI-LABEL: {{^}}kill_madmk_verifier_error:
+; SI: s_xor_b64
+; SI: v_madmk_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, 0x472aee8c
+; SI: s_or_b64
+define void @kill_madmk_verifier_error() nounwind {
+bb:
+ br label %bb2
+
+bb1: ; preds = %bb2
+ ret void
+
+bb2: ; preds = %bb6, %bb
+ %tmp = phi float [ undef, %bb ], [ %tmp8, %bb6 ]
+ %tmp3 = fsub float undef, %tmp
+ %tmp5 = fcmp oeq float %tmp3, 1.000000e+04
+ br i1 %tmp5, label %bb1, label %bb6
+
+bb6: ; preds = %bb2
+ %tmp4 = fmul float %tmp, undef
+ %tmp7 = fmul float %tmp4, 0x40E55DD180000000
+ %tmp8 = fadd float %tmp7, undef
+ br label %bb2
+}
diff --git a/test/CodeGen/R600/max.ll b/test/CodeGen/R600/max.ll
index 20af99332453..1aa9e6883011 100644
--- a/test/CodeGen/R600/max.ll
+++ b/test/CodeGen/R600/max.ll
@@ -6,11 +6,11 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; SI: v_max_i32_e32
define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
%cmp = icmp sge i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -26,15 +26,33 @@ define void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
ret void
}
+; FUNC-LABEL: {{^}}s_test_imax_sge_imm_i32:
+; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9
+define void @s_test_imax_sge_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
+ %cmp = icmp sge i32 %a, 9
+ %val = select i1 %cmp, i32 %a, i32 9
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_i32:
+; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9
+define void @s_test_imax_sgt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
+ %cmp = icmp sgt i32 %a, 9
+ %val = select i1 %cmp, i32 %a, i32 9
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
; FUNC-LABEL: @v_test_imax_sgt_i32
; SI: v_max_i32_e32
define void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
%cmp = icmp sgt i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -54,11 +72,11 @@ define void @s_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
; SI: v_max_u32_e32
define void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
%cmp = icmp uge i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -78,11 +96,11 @@ define void @s_test_umax_uge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
; SI: v_max_u32_e32
define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
%cmp = icmp ugt i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
diff --git a/test/CodeGen/R600/max3.ll b/test/CodeGen/R600/max3.ll
index f905e171b334..cfb94b272e51 100644
--- a/test/CodeGen/R600/max3.ll
+++ b/test/CodeGen/R600/max3.ll
@@ -6,13 +6,13 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; SI: v_max3_i32
define void @v_test_imax3_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
- %c = load i32 addrspace(1)* %gep2, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
+ %c = load i32, i32 addrspace(1)* %gep2, align 4
%icmp0 = icmp sgt i32 %a, %b
%i0 = select i1 %icmp0, i32 %a, i32 %b
%icmp1 = icmp sgt i32 %i0, %c
@@ -25,13 +25,13 @@ define void @v_test_imax3_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %apt
; SI: v_max3_u32
define void @v_test_umax3_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
- %c = load i32 addrspace(1)* %gep2, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
+ %c = load i32, i32 addrspace(1)* %gep2, align 4
%icmp0 = icmp ugt i32 %a, %b
%i0 = select i1 %icmp0, i32 %a, i32 %b
%icmp1 = icmp ugt i32 %i0, %c
diff --git a/test/CodeGen/R600/merge-stores.ll b/test/CodeGen/R600/merge-stores.ll
new file mode 100644
index 000000000000..dbf9d4481ffb
--- /dev/null
+++ b/test/CodeGen/R600/merge-stores.ll
@@ -0,0 +1,536 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+
+; Run with devices with different unaligned load restrictions.
+
+; TODO: Vector element tests
+; TODO: Non-zero base offset for load and store combinations
+; TODO: Same base addrspacecasted
+
+
+; GCN-LABEL: {{^}}merge_global_store_2_constants_i8:
+; GCN: buffer_store_byte
+; GCN: buffer_store_byte
+; GCN: s_endpgm
+define void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
+
+ store i8 123, i8 addrspace(1)* %out.gep.1
+ store i8 456, i8 addrspace(1)* %out, align 2
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_2_constants_i8_natural_align:
+; GCN: buffer_store_byte
+; GCN: buffer_store_byte
+; GCN: s_endpgm
+define void @merge_global_store_2_constants_i8_natural_align(i8 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
+
+ store i8 123, i8 addrspace(1)* %out.gep.1
+ store i8 456, i8 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_2_constants_i16:
+; GCN: buffer_store_dword v
+define void @merge_global_store_2_constants_i16(i16 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
+
+ store i16 123, i16 addrspace(1)* %out.gep.1
+ store i16 456, i16 addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_2_constants_0_i16:
+; GCN: buffer_store_dword v
+define void @merge_global_store_2_constants_0_i16(i16 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
+
+ store i16 0, i16 addrspace(1)* %out.gep.1
+ store i16 0, i16 addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_2_constants_i16_natural_align:
+; GCN: buffer_store_short
+; GCN: buffer_store_short
+; GCN: s_endpgm
+define void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
+
+ store i16 123, i16 addrspace(1)* %out.gep.1
+ store i16 456, i16 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_2_constants_i32:
+; SI-DAG: s_movk_i32 [[SLO:s[0-9]+]], 0x1c8
+; SI-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b
+; SI-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[SLO]]
+; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHI]]
+; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+
+ store i32 123, i32 addrspace(1)* %out.gep.1
+ store i32 456, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_2_constants_i32_f32:
+; GCN: buffer_store_dwordx2
+define void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %out.gep.1.bc = bitcast i32 addrspace(1)* %out.gep.1 to float addrspace(1)*
+ store float 1.0, float addrspace(1)* %out.gep.1.bc
+ store i32 456, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_2_constants_f32_i32:
+; GCN: buffer_store_dwordx2
+define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+ %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)*
+ store i32 123, i32 addrspace(1)* %out.gep.1.bc
+ store float 4.0, float addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_4_constants_i32:
+; GCN: buffer_store_dwordx4
+define void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
+
+ store i32 123, i32 addrspace(1)* %out.gep.1
+ store i32 456, i32 addrspace(1)* %out.gep.2
+ store i32 333, i32 addrspace(1)* %out.gep.3
+ store i32 1234, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_4_constants_f32_order:
+; XGCN: buffer_store_dwordx4
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dwordx2 v
+define void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
+ %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
+
+ store float 8.0, float addrspace(1)* %out
+ store float 1.0, float addrspace(1)* %out.gep.1
+ store float 2.0, float addrspace(1)* %out.gep.2
+ store float 4.0, float addrspace(1)* %out.gep.3
+ ret void
+}
+
+; First store is out of order. Because of order of combines, the
+; consecutive store fails because only some of the stores have been
+; replaced with integer constant stores, and then won't merge because
+; the types are different.
+
+; GCN-LABEL: {{^}}merge_global_store_4_constants_f32:
+; XGCN: buffer_store_dwordx4
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dword v
+define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
+ %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
+
+ store float 1.0, float addrspace(1)* %out.gep.1
+ store float 2.0, float addrspace(1)* %out.gep.2
+ store float 4.0, float addrspace(1)* %out.gep.3
+ store float 8.0, float addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_3_constants_i32:
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dword
+; SI-NOT: buffer_store_dword
+; GCN: s_endpgm
+define void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
+
+ store i32 123, i32 addrspace(1)* %out.gep.1
+ store i32 456, i32 addrspace(1)* %out.gep.2
+ store i32 1234, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_2_constants_i64:
+; XGCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx2
+; GCN: buffer_store_dwordx2
+define void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1
+
+ store i64 123, i64 addrspace(1)* %out.gep.1
+ store i64 456, i64 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_4_constants_i64:
+; XGCN: buffer_store_dwordx4
+; XGCN: buffer_store_dwordx4
+
+; GCN: buffer_store_dwordx2
+; GCN: buffer_store_dwordx2
+; GCN: buffer_store_dwordx2
+; GCN: buffer_store_dwordx2
+define void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1
+ %out.gep.2 = getelementptr i64, i64 addrspace(1)* %out, i64 2
+ %out.gep.3 = getelementptr i64, i64 addrspace(1)* %out, i64 3
+
+ store i64 123, i64 addrspace(1)* %out.gep.1
+ store i64 456, i64 addrspace(1)* %out.gep.2
+ store i64 333, i64 addrspace(1)* %out.gep.3
+ store i64 1234, i64 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32:
+; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
+; GCN: buffer_store_dwordx2 [[LOAD]]
+define void @merge_global_store_2_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
+
+ %lo = load i32, i32 addrspace(1)* %in
+ %hi = load i32, i32 addrspace(1)* %in.gep.1
+
+ store i32 %lo, i32 addrspace(1)* %out
+ store i32 %hi, i32 addrspace(1)* %out.gep.1
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32_nonzero_base:
+; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; GCN: buffer_store_dwordx2 [[LOAD]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+define void @merge_global_store_2_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 2
+ %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 3
+
+ %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i32 2
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 3
+ %lo = load i32, i32 addrspace(1)* %in.gep.0
+ %hi = load i32, i32 addrspace(1)* %in.gep.1
+
+ store i32 %lo, i32 addrspace(1)* %out.gep.0
+ store i32 %hi, i32 addrspace(1)* %out.gep.1
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_shuffle_i32:
+; GCN: buffer_load_dword v
+; GCN: buffer_load_dword v
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dword v
+define void @merge_global_store_2_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
+
+ %lo = load i32, i32 addrspace(1)* %in
+ %hi = load i32, i32 addrspace(1)* %in.gep.1
+
+ store i32 %hi, i32 addrspace(1)* %out
+ store i32 %lo, i32 addrspace(1)* %out.gep.1
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32:
+; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
+; GCN: buffer_store_dwordx4 [[LOAD]]
+define void @merge_global_store_4_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
+ %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2
+ %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 3
+
+ %x = load i32, i32 addrspace(1)* %in
+ %y = load i32, i32 addrspace(1)* %in.gep.1
+ %z = load i32, i32 addrspace(1)* %in.gep.2
+ %w = load i32, i32 addrspace(1)* %in.gep.3
+
+ store i32 %x, i32 addrspace(1)* %out
+ store i32 %y, i32 addrspace(1)* %out.gep.1
+ store i32 %z, i32 addrspace(1)* %out.gep.2
+ store i32 %w, i32 addrspace(1)* %out.gep.3
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_3_adjacent_loads_i32:
+; SI-DAG: buffer_load_dwordx2
+; SI-DAG: buffer_load_dword v
+; GCN: s_waitcnt
+; SI-DAG: buffer_store_dword v
+; SI-DAG: buffer_store_dwordx2 v
+; GCN: s_endpgm
+define void @merge_global_store_3_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
+ %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2
+
+ %x = load i32, i32 addrspace(1)* %in
+ %y = load i32, i32 addrspace(1)* %in.gep.1
+ %z = load i32, i32 addrspace(1)* %in.gep.2
+
+ store i32 %x, i32 addrspace(1)* %out
+ store i32 %y, i32 addrspace(1)* %out.gep.1
+ store i32 %z, i32 addrspace(1)* %out.gep.2
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_f32:
+; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
+; GCN: buffer_store_dwordx4 [[LOAD]]
+define void @merge_global_store_4_adjacent_loads_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
+ %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
+ %in.gep.1 = getelementptr float, float addrspace(1)* %in, i32 1
+ %in.gep.2 = getelementptr float, float addrspace(1)* %in, i32 2
+ %in.gep.3 = getelementptr float, float addrspace(1)* %in, i32 3
+
+ %x = load float, float addrspace(1)* %in
+ %y = load float, float addrspace(1)* %in.gep.1
+ %z = load float, float addrspace(1)* %in.gep.2
+ %w = load float, float addrspace(1)* %in.gep.3
+
+ store float %x, float addrspace(1)* %out
+ store float %y, float addrspace(1)* %out.gep.1
+ store float %z, float addrspace(1)* %out.gep.2
+ store float %w, float addrspace(1)* %out.gep.3
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32_nonzero_base:
+; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
+; GCN: buffer_store_dwordx4 [[LOAD]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
+define void @merge_global_store_4_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 11
+ %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 12
+ %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 13
+ %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 14
+ %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i32 7
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 8
+ %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 9
+ %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 10
+
+ %x = load i32, i32 addrspace(1)* %in.gep.0
+ %y = load i32, i32 addrspace(1)* %in.gep.1
+ %z = load i32, i32 addrspace(1)* %in.gep.2
+ %w = load i32, i32 addrspace(1)* %in.gep.3
+
+ store i32 %x, i32 addrspace(1)* %out.gep.0
+ store i32 %y, i32 addrspace(1)* %out.gep.1
+ store i32 %z, i32 addrspace(1)* %out.gep.2
+ store i32 %w, i32 addrspace(1)* %out.gep.3
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_inverse_i32:
+; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
+; GCN: s_barrier
+; GCN: buffer_store_dwordx4 [[LOAD]]
+define void @merge_global_store_4_adjacent_loads_inverse_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
+ %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2
+ %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 3
+
+ %x = load i32, i32 addrspace(1)* %in
+ %y = load i32, i32 addrspace(1)* %in.gep.1
+ %z = load i32, i32 addrspace(1)* %in.gep.2
+ %w = load i32, i32 addrspace(1)* %in.gep.3
+
+ ; Make sure the barrier doesn't stop this
+ tail call void @llvm.AMDGPU.barrier.local() #1
+
+ store i32 %w, i32 addrspace(1)* %out.gep.3
+ store i32 %z, i32 addrspace(1)* %out.gep.2
+ store i32 %y, i32 addrspace(1)* %out.gep.1
+ store i32 %x, i32 addrspace(1)* %out
+
+ ret void
+}
+
+; TODO: Re-packing of loaded register required. Maybe an IR pass
+; should catch this?
+
+; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_shuffle_i32:
+; GCN: buffer_load_dword v
+; GCN: buffer_load_dword v
+; GCN: buffer_load_dword v
+; GCN: buffer_load_dword v
+; GCN: s_barrier
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dword v
+define void @merge_global_store_4_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
+ %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2
+ %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 3
+
+ %x = load i32, i32 addrspace(1)* %in
+ %y = load i32, i32 addrspace(1)* %in.gep.1
+ %z = load i32, i32 addrspace(1)* %in.gep.2
+ %w = load i32, i32 addrspace(1)* %in.gep.3
+
+ ; Make sure the barrier doesn't stop this
+ tail call void @llvm.AMDGPU.barrier.local() #1
+
+ store i32 %w, i32 addrspace(1)* %out
+ store i32 %z, i32 addrspace(1)* %out.gep.1
+ store i32 %y, i32 addrspace(1)* %out.gep.2
+ store i32 %x, i32 addrspace(1)* %out.gep.3
+
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i8:
+; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
+; GCN: buffer_store_dword [[LOAD]]
+; GCN: s_endpgm
+define void @merge_global_store_4_adjacent_loads_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+ %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1
+ %out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2
+ %out.gep.3 = getelementptr i8, i8 addrspace(1)* %out, i8 3
+ %in.gep.1 = getelementptr i8, i8 addrspace(1)* %in, i8 1
+ %in.gep.2 = getelementptr i8, i8 addrspace(1)* %in, i8 2
+ %in.gep.3 = getelementptr i8, i8 addrspace(1)* %in, i8 3
+
+ %x = load i8, i8 addrspace(1)* %in, align 4
+ %y = load i8, i8 addrspace(1)* %in.gep.1
+ %z = load i8, i8 addrspace(1)* %in.gep.2
+ %w = load i8, i8 addrspace(1)* %in.gep.3
+
+ store i8 %x, i8 addrspace(1)* %out, align 4
+ store i8 %y, i8 addrspace(1)* %out.gep.1
+ store i8 %z, i8 addrspace(1)* %out.gep.2
+ store i8 %w, i8 addrspace(1)* %out.gep.3
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i8_natural_align:
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_load_ubyte
+; GCN: buffer_store_byte
+; GCN: buffer_store_byte
+; GCN: buffer_store_byte
+; GCN: buffer_store_byte
+; GCN: s_endpgm
+define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+ %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1
+ %out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2
+ %out.gep.3 = getelementptr i8, i8 addrspace(1)* %out, i8 3
+ %in.gep.1 = getelementptr i8, i8 addrspace(1)* %in, i8 1
+ %in.gep.2 = getelementptr i8, i8 addrspace(1)* %in, i8 2
+ %in.gep.3 = getelementptr i8, i8 addrspace(1)* %in, i8 3
+
+ %x = load i8, i8 addrspace(1)* %in
+ %y = load i8, i8 addrspace(1)* %in.gep.1
+ %z = load i8, i8 addrspace(1)* %in.gep.2
+ %w = load i8, i8 addrspace(1)* %in.gep.3
+
+ store i8 %x, i8 addrspace(1)* %out
+ store i8 %y, i8 addrspace(1)* %out.gep.1
+ store i8 %z, i8 addrspace(1)* %out.gep.2
+ store i8 %w, i8 addrspace(1)* %out.gep.3
+ ret void
+}
+
+; This works once AA is enabled on the subtarget
+; GCN-LABEL: {{^}}merge_global_store_4_vector_elts_loads_v4i32:
+; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
+; XGCN: buffer_store_dwordx4 [[LOAD]]
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dword v
+; GCN: buffer_store_dword v
+define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
+ %vec = load <4 x i32>, <4 x i32> addrspace(1)* %in
+
+ %x = extractelement <4 x i32> %vec, i32 0
+ %y = extractelement <4 x i32> %vec, i32 1
+ %z = extractelement <4 x i32> %vec, i32 2
+ %w = extractelement <4 x i32> %vec, i32 3
+
+ store i32 %x, i32 addrspace(1)* %out
+ store i32 %y, i32 addrspace(1)* %out.gep.1
+ store i32 %z, i32 addrspace(1)* %out.gep.2
+ store i32 %w, i32 addrspace(1)* %out.gep.3
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_local_store_2_constants_i8:
+; GCN: ds_write_b8
+; GCN: ds_write_b8
+; GCN: s_endpgm
+define void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 {
+ %out.gep.1 = getelementptr i8, i8 addrspace(3)* %out, i32 1
+
+ store i8 123, i8 addrspace(3)* %out.gep.1
+ store i8 456, i8 addrspace(3)* %out, align 2
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_local_store_2_constants_i32:
+; GCN-DAG: s_movk_i32 [[SLO:s[0-9]+]], 0x1c8
+; GCN-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b
+; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[SLO]]
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHI]]
+; GCN: ds_write2_b32 v{{[0-9]+}}, v[[LO]], v[[HI]] offset1:1{{$}}
+define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
+
+ store i32 123, i32 addrspace(3)* %out.gep.1
+ store i32 456, i32 addrspace(3)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}merge_local_store_4_constants_i32:
+; GCN: ds_write_b32
+; GCN: ds_write_b32
+; GCN: ds_write_b32
+; GCN: ds_write_b32
+define void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(3)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(3)* %out, i32 3
+
+ store i32 123, i32 addrspace(3)* %out.gep.1
+ store i32 456, i32 addrspace(3)* %out.gep.2
+ store i32 333, i32 addrspace(3)* %out.gep.3
+ store i32 1234, i32 addrspace(3)* %out
+ ret void
+}
+
+declare void @llvm.AMDGPU.barrier.local() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { noduplicate nounwind }
diff --git a/test/CodeGen/R600/min.ll b/test/CodeGen/R600/min.ll
index 00ba5c6cddb4..275e9a7d899b 100644
--- a/test/CodeGen/R600/min.ll
+++ b/test/CodeGen/R600/min.ll
@@ -6,11 +6,11 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; SI: v_min_i32_e32
define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
%cmp = icmp sle i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -30,11 +30,11 @@ define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
; SI: v_min_i32_e32
define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
%cmp = icmp slt i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -50,15 +50,33 @@ define void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
ret void
}
+; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i32:
+; SI: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
+define void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
+ %cmp = icmp slt i32 %a, 8
+ %val = select i1 %cmp, i32 %a, i32 8
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_test_imin_sle_imm_i32:
+; SI: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
+define void @s_test_imin_sle_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
+ %cmp = icmp sle i32 %a, 8
+ %val = select i1 %cmp, i32 %a, i32 8
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
; FUNC-LABEL: @v_test_umin_ule_i32
; SI: v_min_u32_e32
define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
%cmp = icmp ule i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -78,11 +96,11 @@ define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
; SI: v_min_u32_e32
define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
%cmp = icmp ult i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -106,12 +124,12 @@ define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
; SI: s_endpgm
define void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %outgep0 = getelementptr i32 addrspace(1)* %out0, i32 %tid
- %outgep1 = getelementptr i1 addrspace(1)* %out1, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %outgep0 = getelementptr i32, i32 addrspace(1)* %out0, i32 %tid
+ %outgep1 = getelementptr i1, i1 addrspace(1)* %out1, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
%cmp = icmp ult i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep0, align 4
diff --git a/test/CodeGen/R600/min3.ll b/test/CodeGen/R600/min3.ll
index 6c11a650fcbb..38ef46d1bdd6 100644
--- a/test/CodeGen/R600/min3.ll
+++ b/test/CodeGen/R600/min3.ll
@@ -6,13 +6,13 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; SI: v_min3_i32
define void @v_test_imin3_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
- %c = load i32 addrspace(1)* %gep2, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
+ %c = load i32, i32 addrspace(1)* %gep2, align 4
%icmp0 = icmp slt i32 %a, %b
%i0 = select i1 %icmp0, i32 %a, i32 %b
%icmp1 = icmp slt i32 %i0, %c
@@ -25,13 +25,13 @@ define void @v_test_imin3_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %apt
; SI: v_min3_u32
define void @v_test_umin3_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
- %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
- %c = load i32 addrspace(1)* %gep2, align 4
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
+ %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
+ %c = load i32, i32 addrspace(1)* %gep2, align 4
%icmp0 = icmp ult i32 %a, %b
%i0 = select i1 %icmp0, i32 %a, i32 %b
%icmp1 = icmp ult i32 %i0, %c
@@ -46,21 +46,21 @@ define void @v_test_umin3_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %apt
define void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%tid2 = mul i32 %tid, 2
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
- %gep3 = getelementptr i32 addrspace(1)* %aptr, i32 %tid2
- %gep4 = getelementptr i32 addrspace(1)* %bptr, i32 %tid2
- %gep5 = getelementptr i32 addrspace(1)* %cptr, i32 %tid2
+ %gep3 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid2
+ %gep4 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid2
+ %gep5 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid2
- %outgep0 = getelementptr i32 addrspace(1)* %out, i32 %tid
- %outgep1 = getelementptr i32 addrspace(1)* %out, i32 %tid2
+ %outgep0 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %outgep1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid2
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
- %c = load i32 addrspace(1)* %gep2, align 4
- %d = load i32 addrspace(1)* %gep3, align 4
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
+ %c = load i32, i32 addrspace(1)* %gep2, align 4
+ %d = load i32, i32 addrspace(1)* %gep3, align 4
%icmp0 = icmp slt i32 %a, %b
%i0 = select i1 %icmp0, i32 %a, i32 %b
@@ -80,21 +80,21 @@ define void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 addrspace(1)* %ap
define void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%tid2 = mul i32 %tid, 2
- %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
- %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
- %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+ %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+ %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+ %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
- %gep3 = getelementptr i32 addrspace(1)* %aptr, i32 %tid2
- %gep4 = getelementptr i32 addrspace(1)* %bptr, i32 %tid2
- %gep5 = getelementptr i32 addrspace(1)* %cptr, i32 %tid2
+ %gep3 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid2
+ %gep4 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid2
+ %gep5 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid2
- %outgep0 = getelementptr i32 addrspace(1)* %out, i32 %tid
- %outgep1 = getelementptr i32 addrspace(1)* %out, i32 %tid2
+ %outgep0 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %outgep1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid2
- %a = load i32 addrspace(1)* %gep0, align 4
- %b = load i32 addrspace(1)* %gep1, align 4
- %c = load i32 addrspace(1)* %gep2, align 4
- %d = load i32 addrspace(1)* %gep3, align 4
+ %a = load i32, i32 addrspace(1)* %gep0, align 4
+ %b = load i32, i32 addrspace(1)* %gep1, align 4
+ %c = load i32, i32 addrspace(1)* %gep2, align 4
+ %d = load i32, i32 addrspace(1)* %gep3, align 4
%icmp0 = icmp slt i32 %a, %b
%i0 = select i1 %icmp0, i32 %a, i32 %b
diff --git a/test/CodeGen/R600/misaligned-load.ll b/test/CodeGen/R600/misaligned-load.ll
deleted file mode 100644
index 6290ca09d502..000000000000
--- a/test/CodeGen/R600/misaligned-load.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-
-; SI: @byte_aligned_load64
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: ds_read_u8
-; SI: s_endpgm
-define void @byte_aligned_load64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) {
-entry:
- %0 = load i64 addrspace(3)* %in, align 1
- store i64 %0, i64 addrspace(1)* %out
- ret void
-}
diff --git a/test/CodeGen/R600/missing-store.ll b/test/CodeGen/R600/missing-store.ll
index 8ddef35a694a..4af9cdf1b960 100644
--- a/test/CodeGen/R600/missing-store.ll
+++ b/test/CodeGen/R600/missing-store.ll
@@ -12,11 +12,11 @@
; SI: buffer_store_dword
; SI: s_endpgm
define void @missing_store_reduced(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
- %ptr0 = load i32 addrspace(2)* addrspace(3)* @ptr_load, align 8
- %ptr2 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 2
+ %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @ptr_load, align 8
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
store i32 99, i32 addrspace(1)* %gptr, align 4
- %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
store i32 %tmp2, i32 addrspace(1)* %out, align 4
ret void
diff --git a/test/CodeGen/R600/mubuf.ll b/test/CodeGen/R600/mubuf.ll
index 9c2a17ce04f4..b19163f294e0 100644
--- a/test/CodeGen/R600/mubuf.ll
+++ b/test/CodeGen/R600/mubuf.ll
@@ -11,8 +11,8 @@ declare i32 @llvm.r600.read.tidig.x() readnone
; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x30,0xe0
define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %0 = getelementptr i32 addrspace(1)* %in, i64 1
- %1 = load i32 addrspace(1)* %0
+ %0 = getelementptr i32, i32 addrspace(1)* %in, i64 1
+ %1 = load i32, i32 addrspace(1)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -22,19 +22,20 @@ entry:
; CHECK: buffer_load_ubyte v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0
define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
- %0 = getelementptr i8 addrspace(1)* %in, i64 4095
- %1 = load i8 addrspace(1)* %0
+ %0 = getelementptr i8, i8 addrspace(1)* %in, i64 4095
+ %1 = load i8, i8 addrspace(1)* %0
store i8 %1, i8 addrspace(1)* %out
ret void
}
; MUBUF load with an immediate byte offset that doesn't fit into 12-bits
; CHECK-LABEL: {{^}}mubuf_load2:
-; CHECK: buffer_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 ; encoding: [0x00,0x80,0x30,0xe0
+; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x1000
+; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]+:[0-9]+}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x30,0xe0
define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %0 = getelementptr i32 addrspace(1)* %in, i64 1024
- %1 = load i32 addrspace(1)* %0
+ %0 = getelementptr i32, i32 addrspace(1)* %in, i64 1024
+ %1 = load i32, i32 addrspace(1)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
@@ -45,9 +46,9 @@ entry:
; CHECK: buffer_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:4 ; encoding: [0x04,0x80,0x30,0xe0
define void @mubuf_load3(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i64 %offset) {
entry:
- %0 = getelementptr i32 addrspace(1)* %in, i64 %offset
- %1 = getelementptr i32 addrspace(1)* %0, i64 1
- %2 = load i32 addrspace(1)* %1
+ %0 = getelementptr i32, i32 addrspace(1)* %in, i64 %offset
+ %1 = getelementptr i32, i32 addrspace(1)* %0, i64 1
+ %2 = load i32, i32 addrspace(1)* %1
store i32 %2, i32 addrspace(1)* %out
ret void
}
@@ -56,8 +57,8 @@ entry:
; CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 64 offen glc
define void @soffset_max_imm([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 {
main_body:
- %tmp0 = getelementptr [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
- %tmp1 = load <16 x i8> addrspace(2)* %tmp0
+ %tmp0 = getelementptr [6 x <16 x i8>], [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
+ %tmp1 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp0
%tmp2 = shl i32 %6, 2
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
%tmp4 = add i32 %6, 16
@@ -75,8 +76,8 @@ main_body:
; CHECK: buffer_load_dword v{{[0-9+]}}, v{{[0-9+]}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc
define void @soffset_no_fold([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 {
main_body:
- %tmp0 = getelementptr [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
- %tmp1 = load <16 x i8> addrspace(2)* %tmp0
+ %tmp0 = getelementptr [6 x <16 x i8>], [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
+ %tmp1 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp0
%tmp2 = shl i32 %6, 2
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
%tmp4 = add i32 %6, 16
@@ -85,12 +86,6 @@ main_body:
ret void
}
-declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #3
-declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
-
-attributes #1 = { "ShaderType"="2" "unsafe-fp-math"="true" }
-attributes #3 = { nounwind readonly }
-
;;;==========================================================================;;;
;;; MUBUF STORE TESTS
;;;==========================================================================;;;
@@ -100,7 +95,7 @@ attributes #3 = { nounwind readonly }
; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x70,0xe0
define void @mubuf_store0(i32 addrspace(1)* %out) {
entry:
- %0 = getelementptr i32 addrspace(1)* %out, i64 1
+ %0 = getelementptr i32, i32 addrspace(1)* %out, i64 1
store i32 0, i32 addrspace(1)* %0
ret void
}
@@ -111,17 +106,18 @@ entry:
define void @mubuf_store1(i8 addrspace(1)* %out) {
entry:
- %0 = getelementptr i8 addrspace(1)* %out, i64 4095
+ %0 = getelementptr i8, i8 addrspace(1)* %out, i64 4095
store i8 0, i8 addrspace(1)* %0
ret void
}
; MUBUF store with an immediate byte offset that doesn't fit into 12-bits
; CHECK-LABEL: {{^}}mubuf_store2:
-; CHECK: buffer_store_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0 addr64 ; encoding: [0x00,0x80,0x70,0xe0
+; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x1000
+; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x70,0xe0
define void @mubuf_store2(i32 addrspace(1)* %out) {
entry:
- %0 = getelementptr i32 addrspace(1)* %out, i64 1024
+ %0 = getelementptr i32, i32 addrspace(1)* %out, i64 1024
store i32 0, i32 addrspace(1)* %0
ret void
}
@@ -132,8 +128,8 @@ entry:
; CHECK: buffer_store_dword v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:4 ; encoding: [0x04,0x80,0x70,0xe0
define void @mubuf_store3(i32 addrspace(1)* %out, i64 %offset) {
entry:
- %0 = getelementptr i32 addrspace(1)* %out, i64 %offset
- %1 = getelementptr i32 addrspace(1)* %0, i64 1
+ %0 = getelementptr i32, i32 addrspace(1)* %out, i64 %offset
+ %1 = getelementptr i32, i32 addrspace(1)* %0, i64 1
store i32 0, i32 addrspace(1)* %1
ret void
}
@@ -148,24 +144,40 @@ define void @store_sgpr_ptr(i32 addrspace(1)* %out) #0 {
; CHECK-LABEL: {{^}}store_sgpr_ptr_offset:
; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:40
define void @store_sgpr_ptr_offset(i32 addrspace(1)* %out) #0 {
- %out.gep = getelementptr i32 addrspace(1)* %out, i32 10
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 10
store i32 99, i32 addrspace(1)* %out.gep, align 4
ret void
}
; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset:
-; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
+; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000
+; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]]
define void @store_sgpr_ptr_large_offset(i32 addrspace(1)* %out) #0 {
- %out.gep = getelementptr i32 addrspace(1)* %out, i32 32768
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 32768
store i32 99, i32 addrspace(1)* %out.gep, align 4
ret void
}
+; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset_atomic:
+; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000
+; CHECK: buffer_atomic_add v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]]
+define void @store_sgpr_ptr_large_offset_atomic(i32 addrspace(1)* %out) #0 {
+ %gep = getelementptr i32, i32 addrspace(1)* %out, i32 32768
+ %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 5 seq_cst
+ ret void
+}
+
; CHECK-LABEL: {{^}}store_vgpr_ptr:
; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
define void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
- %out.gep = getelementptr i32 addrspace(1)* %out, i32 %tid
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
store i32 99, i32 addrspace(1)* %out.gep, align 4
ret void
}
+
+declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #3
+declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
+
+attributes #1 = { "ShaderType"="2" "unsafe-fp-math"="true" }
+attributes #3 = { nounwind readonly }
diff --git a/test/CodeGen/R600/mul.ll b/test/CodeGen/R600/mul.ll
index 6f15e706dff8..94e0f96b323e 100644
--- a/test/CodeGen/R600/mul.ll
+++ b/test/CodeGen/R600/mul.ll
@@ -12,9 +12,9 @@
; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test_mul_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1) * %in
- %b = load <2 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+ %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
%result = mul <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -32,9 +32,9 @@ define void @test_mul_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)
; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @v_mul_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1) * %in
- %b = load <4 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
%result = mul <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
@@ -58,8 +58,8 @@ define void @s_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
; SI: v_mul_lo_i32
; SI: buffer_store_dword
define void @v_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %a = load i64 addrspace(1)* %aptr, align 8
- %b = load i64 addrspace(1)* %bptr, align 8
+ %a = load i64, i64 addrspace(1)* %aptr, align 8
+ %b = load i64, i64 addrspace(1)* %bptr, align 8
%mul = mul i64 %b, %a
%trunc = trunc i64 %mul to i32
store i32 %trunc, i32 addrspace(1)* %out, align 8
@@ -88,7 +88,7 @@ entry:
; SI-DAG: v_mul_hi_i32
; SI: s_endpgm
define void @v_mul64_sext_c(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%ext = sext i32 %val to i64
%mul = mul i64 %ext, 80
store i64 %mul, i64 addrspace(1)* %out, align 8
@@ -100,7 +100,7 @@ define void @v_mul64_sext_c(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
; SI-DAG: v_mul_hi_i32 v{{[0-9]+}}, 9, v{{[0-9]+}}
; SI: s_endpgm
define void @v_mul64_sext_inline_imm(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%ext = sext i32 %val to i64
%mul = mul i64 %ext, 9
store i64 %mul, i64 addrspace(1)* %out, align 8
@@ -123,9 +123,9 @@ define void @s_mul_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
; FUNC-LABEL: {{^}}v_mul_i32:
; SI: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @v_mul_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %a = load i32 addrspace(1)* %in
- %b = load i32 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1)* %in
+ %b = load i32, i32 addrspace(1)* %b_ptr
%result = mul i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -148,8 +148,8 @@ define void @s_mul_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
; FUNC-LABEL: {{^}}v_mul_i64:
; SI: v_mul_lo_i32
define void @v_mul_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
- %a = load i64 addrspace(1)* %aptr, align 8
- %b = load i64 addrspace(1)* %bptr, align 8
+ %a = load i64, i64 addrspace(1)* %aptr, align 8
+ %b = load i64, i64 addrspace(1)* %bptr, align 8
%mul = mul i64 %a, %b
store i64 %mul, i64 addrspace(1)* %out, align 8
ret void
@@ -163,7 +163,7 @@ entry:
br i1 %0, label %if, label %else
if:
- %1 = load i32 addrspace(1)* %in
+ %1 = load i32, i32 addrspace(1)* %in
br label %endif
else:
@@ -186,7 +186,7 @@ entry:
br i1 %0, label %if, label %else
if:
- %1 = load i64 addrspace(1)* %in
+ %1 = load i64, i64 addrspace(1)* %in
br label %endif
else:
diff --git a/test/CodeGen/R600/no-initializer-constant-addrspace.ll b/test/CodeGen/R600/no-initializer-constant-addrspace.ll
index 532edf07c301..9a814b579deb 100644
--- a/test/CodeGen/R600/no-initializer-constant-addrspace.ll
+++ b/test/CodeGen/R600/no-initializer-constant-addrspace.ll
@@ -6,7 +6,7 @@
; FUNC-LABEL: {{^}}load_extern_const_init:
define void @load_extern_const_init(i32 addrspace(1)* %out) nounwind {
- %val = load i32 addrspace(2)* getelementptr ([5 x i32] addrspace(2)* @extern_const_addrspace, i64 0, i64 3), align 4
+ %val = load i32, i32 addrspace(2)* getelementptr ([5 x i32], [5 x i32] addrspace(2)* @extern_const_addrspace, i64 0, i64 3), align 4
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
@@ -15,7 +15,7 @@ define void @load_extern_const_init(i32 addrspace(1)* %out) nounwind {
; FUNC-LABEL: {{^}}load_undef_const_init:
define void @load_undef_const_init(i32 addrspace(1)* %out) nounwind {
- %val = load i32 addrspace(2)* getelementptr ([5 x i32] addrspace(2)* @undef_const_addrspace, i64 0, i64 3), align 4
+ %val = load i32, i32 addrspace(2)* getelementptr ([5 x i32], [5 x i32] addrspace(2)* @undef_const_addrspace, i64 0, i64 3), align 4
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
diff --git a/test/CodeGen/R600/no-shrink-extloads.ll b/test/CodeGen/R600/no-shrink-extloads.ll
index 135d22d3036d..e4328ecbaca8 100644
--- a/test/CodeGen/R600/no-shrink-extloads.ll
+++ b/test/CodeGen/R600/no-shrink-extloads.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
@@ -23,9 +23,9 @@ define void @truncate_kernarg_i32_to_i16(i16 addrspace(1)* %out, i32 %arg) nounw
; SI: buffer_store_short v
define void @truncate_buffer_load_i32_to_i16(i16 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i16 addrspace(1)* %out, i32 %tid
- %load = load i32 addrspace(1)* %gep.in
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
+ %load = load i32, i32 addrspace(1)* %gep.in
%trunc = trunc i32 %load to i16
store i16 %trunc, i16 addrspace(1)* %gep.out
ret void
@@ -45,9 +45,9 @@ define void @truncate_kernarg_i32_to_i8(i8 addrspace(1)* %out, i32 %arg) nounwin
; SI: buffer_store_byte v
define void @truncate_buffer_load_i32_to_i8(i8 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i8 addrspace(1)* %out, i32 %tid
- %load = load i32 addrspace(1)* %gep.in
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
+ %load = load i32, i32 addrspace(1)* %gep.in
%trunc = trunc i32 %load to i8
store i8 %trunc, i8 addrspace(1)* %gep.out
ret void
@@ -67,9 +67,9 @@ define void @truncate_kernarg_i32_to_i1(i1 addrspace(1)* %out, i32 %arg) nounwin
; SI: buffer_store_byte v
define void @truncate_buffer_load_i32_to_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i1 addrspace(1)* %out, i32 %tid
- %load = load i32 addrspace(1)* %gep.in
+ %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i1, i1 addrspace(1)* %out, i32 %tid
+ %load = load i32, i32 addrspace(1)* %gep.in
%trunc = trunc i32 %load to i1
store i1 %trunc, i1 addrspace(1)* %gep.out
ret void
@@ -89,9 +89,9 @@ define void @truncate_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounw
; SI: buffer_store_dword v
define void @truncate_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i64 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %load = load i64 addrspace(1)* %gep.in
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %load = load i64, i64 addrspace(1)* %gep.in
%trunc = trunc i64 %load to i32
store i32 %trunc, i32 addrspace(1)* %gep.out
ret void
@@ -112,9 +112,9 @@ define void @srl_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind {
; SI: buffer_store_dword v
define void @srl_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i64 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
- %load = load i64 addrspace(1)* %gep.in
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %load = load i64, i64 addrspace(1)* %gep.in
%srl = lshr i64 %load, 32
%trunc = trunc i64 %srl to i32
store i32 %trunc, i32 addrspace(1)* %gep.out
@@ -136,9 +136,9 @@ define void @truncate_kernarg_i16_to_i8(i8 addrspace(1)* %out, i16 %arg) nounwin
; SI: buffer_store_byte v
define void @truncate_buffer_load_i16_to_i8(i8 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i16 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i8 addrspace(1)* %out, i32 %tid
- %load = load i16 addrspace(1)* %gep.in
+ %gep.in = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
+ %load = load i16, i16 addrspace(1)* %gep.in
%trunc = trunc i16 %load to i8
store i8 %trunc, i8 addrspace(1)* %gep.out
ret void
@@ -159,9 +159,9 @@ define void @srl_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind {
; SI: buffer_store_byte v
define void @srl_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i64 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i8 addrspace(1)* %out, i32 %tid
- %load = load i64 addrspace(1)* %gep.in
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
+ %load = load i64, i64 addrspace(1)* %gep.in
%srl = lshr i64 %load, 32
%trunc = trunc i64 %srl to i8
store i8 %trunc, i8 addrspace(1)* %gep.out
@@ -182,9 +182,9 @@ define void @truncate_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwin
; SI: buffer_store_byte v
define void @truncate_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.in = getelementptr i64 addrspace(1)* %in, i32 %tid
- %gep.out = getelementptr i8 addrspace(1)* %out, i32 %tid
- %load = load i64 addrspace(1)* %gep.in
+ %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
+ %load = load i64, i64 addrspace(1)* %gep.in
%trunc = trunc i64 %load to i8
store i8 %trunc, i8 addrspace(1)* %gep.out
ret void
diff --git a/test/CodeGen/R600/operand-folding.ll b/test/CodeGen/R600/operand-folding.ll
index 88a8145dcd62..816755efb07c 100644
--- a/test/CodeGen/R600/operand-folding.ll
+++ b/test/CodeGen/R600/operand-folding.ll
@@ -10,7 +10,7 @@ entry:
if:
%id = call i32 @llvm.r600.read.tidig.x()
%offset = add i32 %fold, %id
- %tmp1 = getelementptr i32 addrspace(1)* %out, i32 %offset
+ %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 %offset
store i32 0, i32 addrspace(1)* %tmp1
br label %endif
@@ -19,7 +19,7 @@ endif:
}
; CHECK-LABEL: {{^}}fold_imm:
-; CHECK v_or_i32_e32 v{{[0-9]+}}, 5
+; CHECK: v_or_b32_e32 v{{[0-9]+}}, 5
define void @fold_imm(i32 addrspace(1)* %out, i32 %cmp) {
entry:
%fold = add i32 3, 2
diff --git a/test/CodeGen/R600/operand-spacing.ll b/test/CodeGen/R600/operand-spacing.ll
index dd9f25aad7f2..20420a84de6f 100644
--- a/test/CodeGen/R600/operand-spacing.ll
+++ b/test/CodeGen/R600/operand-spacing.ll
@@ -1,13 +1,16 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=VI -check-prefix=GCN %s
; Make sure there isn't an extra space between the instruction name and first operands.
-; SI-LABEL: {{^}}add_f32:
+; GCN-LABEL: {{^}}add_f32:
; SI-DAG: s_load_dword [[SREGA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: s_load_dword [[SREGB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: v_mov_b32_e32 [[VREGB:v[0-9]+]], [[SREGB]]
-; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SREGA]], [[VREGB]]
-; SI: buffer_store_dword [[RESULT]],
+; VI-DAG: s_load_dword [[SREGA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI-DAG: s_load_dword [[SREGB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
+; GCN: v_mov_b32_e32 [[VREGB:v[0-9]+]], [[SREGB]]
+; GCN: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SREGA]], [[VREGB]]
+; GCN: buffer_store_dword [[RESULT]],
define void @add_f32(float addrspace(1)* %out, float %a, float %b) {
%result = fadd float %a, %b
store float %result, float addrspace(1)* %out
diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll
index 0d9a6992a6bd..1c04090b407f 100644
--- a/test/CodeGen/R600/or.ll
+++ b/test/CodeGen/R600/or.ll
@@ -2,45 +2,42 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; EG-LABEL: {{^}}or_v2i32:
+
+; FUNC-LABEL: {{^}}or_v2i32:
; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI-LABEL: {{^}}or_v2i32:
; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-
define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1) * %in
- %b = load <2 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+ %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
%result = or <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
}
-; EG-LABEL: {{^}}or_v4i32:
+; FUNC-LABEL: {{^}}or_v4i32:
; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI-LABEL: {{^}}or_v4i32:
; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-
define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1) * %in
- %b = load <4 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
%result = or <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
}
-; SI-LABEL: {{^}}scalar_or_i32:
+; FUNC-LABEL: {{^}}scalar_or_i32:
; SI: s_or_b32
define void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%or = or i32 %a, %b
@@ -48,16 +45,16 @@ define void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
ret void
}
-; SI-LABEL: {{^}}vector_or_i32:
+; FUNC-LABEL: {{^}}vector_or_i32:
; SI: v_or_b32_e32 v{{[0-9]}}
define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) {
- %loada = load i32 addrspace(1)* %a
+ %loada = load i32, i32 addrspace(1)* %a
%or = or i32 %loada, %b
store i32 %or, i32 addrspace(1)* %out
ret void
}
-; SI-LABEL: {{^}}scalar_or_literal_i32:
+; FUNC-LABEL: {{^}}scalar_or_literal_i32:
; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1869f
define void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a) {
%or = or i32 %a, 99999
@@ -65,28 +62,28 @@ define void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a) {
ret void
}
-; SI-LABEL: {{^}}vector_or_literal_i32:
+; FUNC-LABEL: {{^}}vector_or_literal_i32:
; SI: v_or_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
define void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
- %loada = load i32 addrspace(1)* %a, align 4
+ %loada = load i32, i32 addrspace(1)* %a, align 4
%or = or i32 %loada, 65535
store i32 %or, i32 addrspace(1)* %out, align 4
ret void
}
-; SI-LABEL: {{^}}vector_or_inline_immediate_i32:
+; FUNC-LABEL: {{^}}vector_or_inline_immediate_i32:
; SI: v_or_b32_e32 v{{[0-9]+}}, 4, v{{[0-9]+}}
define void @vector_or_inline_immediate_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
- %loada = load i32 addrspace(1)* %a, align 4
+ %loada = load i32, i32 addrspace(1)* %a, align 4
%or = or i32 %loada, 4
store i32 %or, i32 addrspace(1)* %out, align 4
ret void
}
-; EG-LABEL: {{^}}scalar_or_i64:
+; FUNC-LABEL: {{^}}scalar_or_i64:
; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
-; SI-LABEL: {{^}}scalar_or_i64:
+
; SI: s_or_b64
define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
%or = or i64 %a, %b
@@ -94,28 +91,28 @@ define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
ret void
}
-; SI-LABEL: {{^}}vector_or_i64:
+; FUNC-LABEL: {{^}}vector_or_i64:
; SI: v_or_b32_e32 v{{[0-9]}}
; SI: v_or_b32_e32 v{{[0-9]}}
define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 8
- %loadb = load i64 addrspace(1)* %a, align 8
+ %loada = load i64, i64 addrspace(1)* %a, align 8
+ %loadb = load i64, i64 addrspace(1)* %a, align 8
%or = or i64 %loada, %loadb
store i64 %or, i64 addrspace(1)* %out
ret void
}
-; SI-LABEL: {{^}}scalar_vector_or_i64:
+; FUNC-LABEL: {{^}}scalar_vector_or_i64:
; SI: v_or_b32_e32 v{{[0-9]}}
; SI: v_or_b32_e32 v{{[0-9]}}
define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) {
- %loada = load i64 addrspace(1)* %a
+ %loada = load i64, i64 addrspace(1)* %a
%or = or i64 %loada, %b
store i64 %or, i64 addrspace(1)* %out
ret void
}
-; SI-LABEL: {{^}}vector_or_i64_loadimm:
+; FUNC-LABEL: {{^}}vector_or_i64_loadimm:
; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f
; SI-DAG: s_movk_i32 [[HI_S_IMM:s[0-9]+]], 0x146f
; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
@@ -123,26 +120,26 @@ define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a,
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 8
+ %loada = load i64, i64 addrspace(1)* %a, align 8
%or = or i64 %loada, 22470723082367
store i64 %or, i64 addrspace(1)* %out
ret void
}
; FIXME: The or 0 should really be removed.
-; SI-LABEL: {{^}}vector_or_i64_imm:
+; FUNC-LABEL: {{^}}vector_or_i64_imm:
; SI: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
; SI: v_or_b32_e32 {{v[0-9]+}}, 8, v[[LO_VREG]]
; SI: v_or_b32_e32 {{v[0-9]+}}, 0, {{.*}}
; SI: s_endpgm
define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 8
+ %loada = load i64, i64 addrspace(1)* %a, align 8
%or = or i64 %loada, 8
store i64 %or, i64 addrspace(1)* %out
ret void
}
-; SI-LABEL: {{^}}trunc_i64_or_to_i32:
+; FUNC-LABEL: {{^}}trunc_i64_or_to_i32:
; SI: s_load_dword s[[SREG0:[0-9]+]]
; SI: s_load_dword s[[SREG1:[0-9]+]]
; SI: s_or_b32 s[[SRESULT:[0-9]+]], s[[SREG1]], s[[SREG0]]
@@ -155,18 +152,27 @@ define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
ret void
}
-; EG-CHECK: {{^}}or_i1:
-; EG-CHECK: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
+; FUNC-LABEL: {{^}}or_i1:
+; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
-; SI-CHECK: {{^}}or_i1:
-; SI-CHECK: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
-define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
- %a = load float addrspace(1) * %in0
- %b = load float addrspace(1) * %in1
+; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}]
+define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
+ %a = load float, float addrspace(1)* %in0
+ %b = load float, float addrspace(1)* %in1
%acmp = fcmp oge float %a, 0.000000e+00
%bcmp = fcmp oge float %b, 0.000000e+00
%or = or i1 %acmp, %bcmp
- %result = select i1 %or, float %a, float %b
- store float %result, float addrspace(1)* %out
+ %result = zext i1 %or to i32
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_or_i1:
+; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}]
+define void @s_or_i1(i1 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
+ %cmp0 = icmp eq i32 %a, %b
+ %cmp1 = icmp eq i32 %c, %d
+ %or = or i1 %cmp0, %cmp1
+ store i1 %or, i1 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/parallelandifcollapse.ll b/test/CodeGen/R600/parallelandifcollapse.ll
index 82b11501e865..f32b044198ab 100644
--- a/test/CodeGen/R600/parallelandifcollapse.ll
+++ b/test/CodeGen/R600/parallelandifcollapse.ll
@@ -23,14 +23,14 @@ entry:
%c1 = alloca i32, align 4
%d1 = alloca i32, align 4
%data = alloca i32, align 4
- %0 = load i32* %a0, align 4
- %1 = load i32* %b0, align 4
+ %0 = load i32, i32* %a0, align 4
+ %1 = load i32, i32* %b0, align 4
%cmp = icmp ne i32 %0, %1
br i1 %cmp, label %land.lhs.true, label %if.end
land.lhs.true: ; preds = %entry
- %2 = load i32* %c0, align 4
- %3 = load i32* %d0, align 4
+ %2 = load i32, i32* %c0, align 4
+ %3 = load i32, i32* %d0, align 4
%cmp1 = icmp ne i32 %2, %3
br i1 %cmp1, label %if.then, label %if.end
@@ -39,14 +39,14 @@ if.then: ; preds = %land.lhs.true
br label %if.end
if.end: ; preds = %if.then, %land.lhs.true, %entry
- %4 = load i32* %a1, align 4
- %5 = load i32* %b1, align 4
+ %4 = load i32, i32* %a1, align 4
+ %5 = load i32, i32* %b1, align 4
%cmp2 = icmp ne i32 %4, %5
br i1 %cmp2, label %land.lhs.true3, label %if.end6
land.lhs.true3: ; preds = %if.end
- %6 = load i32* %c1, align 4
- %7 = load i32* %d1, align 4
+ %6 = load i32, i32* %c1, align 4
+ %7 = load i32, i32* %d1, align 4
%cmp4 = icmp ne i32 %6, %7
br i1 %cmp4, label %if.then5, label %if.end6
diff --git a/test/CodeGen/R600/parallelorifcollapse.ll b/test/CodeGen/R600/parallelorifcollapse.ll
index feca688c30aa..1da1e91b8ab8 100644
--- a/test/CodeGen/R600/parallelorifcollapse.ll
+++ b/test/CodeGen/R600/parallelorifcollapse.ll
@@ -23,14 +23,14 @@ entry:
%c1 = alloca i32, align 4
%d1 = alloca i32, align 4
%data = alloca i32, align 4
- %0 = load i32* %a0, align 4
- %1 = load i32* %b0, align 4
+ %0 = load i32, i32* %a0, align 4
+ %1 = load i32, i32* %b0, align 4
%cmp = icmp ne i32 %0, %1
br i1 %cmp, label %land.lhs.true, label %if.else
land.lhs.true: ; preds = %entry
- %2 = load i32* %c0, align 4
- %3 = load i32* %d0, align 4
+ %2 = load i32, i32* %c0, align 4
+ %3 = load i32, i32* %d0, align 4
%cmp1 = icmp ne i32 %2, %3
br i1 %cmp1, label %if.then, label %if.else
@@ -42,14 +42,14 @@ if.else: ; preds = %land.lhs.true, %ent
br label %if.end
if.end: ; preds = %if.else, %if.then
- %4 = load i32* %a1, align 4
- %5 = load i32* %b1, align 4
+ %4 = load i32, i32* %a1, align 4
+ %5 = load i32, i32* %b1, align 4
%cmp2 = icmp ne i32 %4, %5
br i1 %cmp2, label %land.lhs.true3, label %if.else6
land.lhs.true3: ; preds = %if.end
- %6 = load i32* %c1, align 4
- %7 = load i32* %d1, align 4
+ %6 = load i32, i32* %c1, align 4
+ %7 = load i32, i32* %d1, align 4
%cmp4 = icmp ne i32 %6, %7
br i1 %cmp4, label %if.then5, label %if.else6
diff --git a/test/CodeGen/R600/private-memory-atomics.ll b/test/CodeGen/R600/private-memory-atomics.ll
index 3ceb0c00d114..a008ac98a43b 100644
--- a/test/CodeGen/R600/private-memory-atomics.ll
+++ b/test/CodeGen/R600/private-memory-atomics.ll
@@ -7,11 +7,11 @@
define void @atomicrmw_private(i32 addrspace(1)* %out, i32 %in) nounwind {
entry:
%tmp = alloca [2 x i32]
- %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1
+ %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
store i32 0, i32* %tmp1
store i32 1, i32* %tmp2
- %tmp3 = getelementptr [2 x i32]* %tmp, i32 0, i32 %in
+ %tmp3 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in
%tmp4 = atomicrmw add i32* %tmp3, i32 7 acq_rel
store i32 %tmp4, i32 addrspace(1)* %out
ret void
@@ -20,11 +20,11 @@ entry:
define void @cmpxchg_private(i32 addrspace(1)* %out, i32 %in) nounwind {
entry:
%tmp = alloca [2 x i32]
- %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1
+ %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
store i32 0, i32* %tmp1
store i32 1, i32* %tmp2
- %tmp3 = getelementptr [2 x i32]* %tmp, i32 0, i32 %in
+ %tmp3 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in
%tmp4 = cmpxchg i32* %tmp3, i32 0, i32 1 acq_rel monotonic
%val = extractvalue { i32, i1 } %tmp4, 0
store i32 %val, i32 addrspace(1)* %out
diff --git a/test/CodeGen/R600/private-memory-broken.ll b/test/CodeGen/R600/private-memory-broken.ll
index 10590a9802fb..6b18a19f1956 100644
--- a/test/CodeGen/R600/private-memory-broken.ll
+++ b/test/CodeGen/R600/private-memory-broken.ll
@@ -10,11 +10,11 @@ declare i32 @foo(i32*) nounwind
define void @call_private(i32 addrspace(1)* %out, i32 %in) nounwind {
entry:
%tmp = alloca [2 x i32]
- %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1
+ %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
store i32 0, i32* %tmp1
store i32 1, i32* %tmp2
- %tmp3 = getelementptr [2 x i32]* %tmp, i32 0, i32 %in
+ %tmp3 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in
%val = call i32 @foo(i32* %tmp3) nounwind
store i32 %val, i32 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll
index 15153c69a48d..1c5629780508 100644
--- a/test/CodeGen/R600/private-memory.ll
+++ b/test/CodeGen/R600/private-memory.ll
@@ -1,6 +1,8 @@
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
@@ -21,19 +23,19 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
entry:
%stack = alloca [5 x i32], align 4
- %0 = load i32 addrspace(1)* %in, align 4
- %arrayidx1 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %0
+ %0 = load i32, i32 addrspace(1)* %in, align 4
+ %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
store i32 4, i32* %arrayidx1, align 4
- %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %in, i32 1
- %1 = load i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx3 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 %1
+ %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
+ %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
store i32 5, i32* %arrayidx3, align 4
- %arrayidx10 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 0
- %2 = load i32* %arrayidx10, align 4
+ %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
+ %2 = load i32, i32* %arrayidx10, align 4
store i32 %2, i32 addrspace(1)* %out, align 4
- %arrayidx12 = getelementptr inbounds [5 x i32]* %stack, i32 0, i32 1
- %3 = load i32* %arrayidx12
- %arrayidx13 = getelementptr inbounds i32 addrspace(1)* %out, i32 1
+ %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
+ %3 = load i32, i32* %arrayidx12
+ %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
store i32 %3, i32 addrspace(1)* %arrayidx13
ret void
}
@@ -55,18 +57,18 @@ define void @multiple_structs(i32 addrspace(1)* %out) {
entry:
%a = alloca %struct.point
%b = alloca %struct.point
- %a.x.ptr = getelementptr %struct.point* %a, i32 0, i32 0
- %a.y.ptr = getelementptr %struct.point* %a, i32 0, i32 1
- %b.x.ptr = getelementptr %struct.point* %b, i32 0, i32 0
- %b.y.ptr = getelementptr %struct.point* %b, i32 0, i32 1
+ %a.x.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 0
+ %a.y.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 1
+ %b.x.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 0
+ %b.y.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 1
store i32 0, i32* %a.x.ptr
store i32 1, i32* %a.y.ptr
store i32 2, i32* %b.x.ptr
store i32 3, i32* %b.y.ptr
- %a.indirect.ptr = getelementptr %struct.point* %a, i32 0, i32 0
- %b.indirect.ptr = getelementptr %struct.point* %b, i32 0, i32 0
- %a.indirect = load i32* %a.indirect.ptr
- %b.indirect = load i32* %b.indirect.ptr
+ %a.indirect.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 0
+ %b.indirect.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 0
+ %a.indirect = load i32, i32* %a.indirect.ptr
+ %b.indirect = load i32, i32* %b.indirect.ptr
%0 = add i32 %a.indirect, %b.indirect
store i32 %0, i32 addrspace(1)* %out
ret void
@@ -84,21 +86,21 @@ define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%prv_array_const = alloca [2 x i32]
%prv_array = alloca [2 x i32]
- %a = load i32 addrspace(1)* %in
- %b_src_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %b = load i32 addrspace(1)* %b_src_ptr
- %a_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
+ %a = load i32, i32 addrspace(1)* %in
+ %b_src_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %b = load i32, i32 addrspace(1)* %b_src_ptr
+ %a_dst_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0
store i32 %a, i32* %a_dst_ptr
- %b_dst_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 1
+ %b_dst_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 1
store i32 %b, i32* %b_dst_ptr
br label %for.body
for.body:
%inc = phi i32 [0, %entry], [%count, %for.body]
- %x_ptr = getelementptr [2 x i32]* %prv_array_const, i32 0, i32 0
- %x = load i32* %x_ptr
- %y_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
- %y = load i32* %y_ptr
+ %x_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0
+ %x = load i32, i32* %x_ptr
+ %y_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0
+ %y = load i32, i32* %y_ptr
%xy = add i32 %x, %y
store i32 %xy, i32* %y_ptr
%count = add i32 %inc, 1
@@ -106,8 +108,8 @@ for.body:
br i1 %done, label %for.end, label %for.body
for.end:
- %value_ptr = getelementptr [2 x i32]* %prv_array, i32 0, i32 0
- %value = load i32* %value_ptr
+ %value_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0
+ %value = load i32, i32* %value_ptr
store i32 %value, i32 addrspace(1)* %out
ret void
}
@@ -122,12 +124,12 @@ for.end:
define void @short_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%0 = alloca [2 x i16]
- %1 = getelementptr [2 x i16]* %0, i32 0, i32 0
- %2 = getelementptr [2 x i16]* %0, i32 0, i32 1
+ %1 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 0
+ %2 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 1
store i16 0, i16* %1
store i16 1, i16* %2
- %3 = getelementptr [2 x i16]* %0, i32 0, i32 %index
- %4 = load i16* %3
+ %3 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 %index
+ %4 = load i16, i16* %3
%5 = sext i16 %4 to i32
store i32 %5, i32 addrspace(1)* %out
ret void
@@ -142,12 +144,12 @@ entry:
define void @char_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%0 = alloca [2 x i8]
- %1 = getelementptr [2 x i8]* %0, i32 0, i32 0
- %2 = getelementptr [2 x i8]* %0, i32 0, i32 1
+ %1 = getelementptr [2 x i8], [2 x i8]* %0, i32 0, i32 0
+ %2 = getelementptr [2 x i8], [2 x i8]* %0, i32 0, i32 1
store i8 0, i8* %1
store i8 1, i8* %2
- %3 = getelementptr [2 x i8]* %0, i32 0, i32 %index
- %4 = load i8* %3
+ %3 = getelementptr [2 x i8], [2 x i8]* %0, i32 0, i32 %index
+ %4 = load i8, i8* %3
%5 = sext i8 %4 to i32
store i32 %5, i32 addrspace(1)* %out
ret void
@@ -165,12 +167,12 @@ entry:
define void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = alloca [2 x i32]
- %1 = getelementptr [2 x i32]* %0, i32 0, i32 0
- %2 = getelementptr [2 x i32]* %0, i32 0, i32 1
+ %1 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 0
+ %2 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 1
store i32 0, i32* %1
store i32 1, i32* %2
- %3 = getelementptr [2 x i32]* %0, i32 0, i32 %in
- %4 = load i32* %3
+ %3 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 %in
+ %4 = load i32, i32* %3
%5 = call i32 @llvm.r600.read.tidig.x()
%6 = add i32 %4, %5
store i32 %6, i32 addrspace(1)* %out
@@ -188,20 +190,20 @@ define void @no_overlap(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = alloca [3 x i8], align 1
%1 = alloca [2 x i8], align 1
- %2 = getelementptr [3 x i8]* %0, i32 0, i32 0
- %3 = getelementptr [3 x i8]* %0, i32 0, i32 1
- %4 = getelementptr [3 x i8]* %0, i32 0, i32 2
- %5 = getelementptr [2 x i8]* %1, i32 0, i32 0
- %6 = getelementptr [2 x i8]* %1, i32 0, i32 1
+ %2 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 0
+ %3 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 1
+ %4 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 2
+ %5 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 0
+ %6 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 1
store i8 0, i8* %2
store i8 1, i8* %3
store i8 2, i8* %4
store i8 1, i8* %5
store i8 0, i8* %6
- %7 = getelementptr [3 x i8]* %0, i32 0, i32 %in
- %8 = getelementptr [2 x i8]* %1, i32 0, i32 %in
- %9 = load i8* %7
- %10 = load i8* %8
+ %7 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 %in
+ %8 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 %in
+ %9 = load i8, i8* %7
+ %10 = load i8, i8* %8
%11 = add i8 %9, %10
%12 = sext i8 %11 to i32
store i32 %12, i32 addrspace(1)* %out
@@ -211,12 +213,12 @@ entry:
define void @char_array_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%alloca = alloca [2 x [2 x i8]]
- %gep0 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 1
+ %gep0 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 0
+ %gep1 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 1
store i8 0, i8* %gep0
store i8 1, i8* %gep1
- %gep2 = getelementptr [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 %index
- %load = load i8* %gep2
+ %gep2 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 %index
+ %load = load i8, i8* %gep2
%sext = sext i8 %load to i32
store i32 %sext, i32 addrspace(1)* %out
ret void
@@ -225,12 +227,12 @@ entry:
define void @i32_array_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%alloca = alloca [2 x [2 x i32]]
- %gep0 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
+ %gep0 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0
+ %gep1 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1
store i32 0, i32* %gep0
store i32 1, i32* %gep1
- %gep2 = getelementptr [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
- %load = load i32* %gep2
+ %gep2 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
+ %load = load i32, i32* %gep2
store i32 %load, i32 addrspace(1)* %out
ret void
}
@@ -238,12 +240,12 @@ entry:
define void @i64_array_array(i64 addrspace(1)* %out, i32 %index) {
entry:
%alloca = alloca [2 x [2 x i64]]
- %gep0 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 0
- %gep1 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 1
+ %gep0 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 0
+ %gep1 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 1
store i64 0, i64* %gep0
store i64 1, i64* %gep1
- %gep2 = getelementptr [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 %index
- %load = load i64* %gep2
+ %gep2 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 %index
+ %load = load i64, i64* %gep2
store i64 %load, i64 addrspace(1)* %out
ret void
}
@@ -253,12 +255,12 @@ entry:
define void @struct_array_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%alloca = alloca [2 x [2 x %struct.pair32]]
- %gep0 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 0, i32 1
- %gep1 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 1, i32 1
+ %gep0 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 0, i32 1
+ %gep1 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 1, i32 1
store i32 0, i32* %gep0
store i32 1, i32* %gep1
- %gep2 = getelementptr [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 %index, i32 0
- %load = load i32* %gep2
+ %gep2 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 %index, i32 0
+ %load = load i32, i32* %gep2
store i32 %load, i32 addrspace(1)* %out
ret void
}
@@ -266,12 +268,12 @@ entry:
define void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%alloca = alloca [2 x %struct.pair32]
- %gep0 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 0, i32 1
- %gep1 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 1, i32 0
+ %gep0 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 0, i32 1
+ %gep1 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 1, i32 0
store i32 0, i32* %gep0
store i32 1, i32* %gep1
- %gep2 = getelementptr [2 x %struct.pair32]* %alloca, i32 0, i32 %index, i32 0
- %load = load i32* %gep2
+ %gep2 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 %index, i32 0
+ %load = load i32, i32* %gep2
store i32 %load, i32 addrspace(1)* %out
ret void
}
@@ -279,13 +281,13 @@ entry:
define void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind {
entry:
%tmp = alloca [2 x i32]
- %tmp1 = getelementptr [2 x i32]* %tmp, i32 0, i32 0
- %tmp2 = getelementptr [2 x i32]* %tmp, i32 0, i32 1
+ %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
+ %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
store i32 0, i32* %tmp1
store i32 1, i32* %tmp2
%cmp = icmp eq i32 %in, 0
%sel = select i1 %cmp, i32* %tmp1, i32* %tmp2
- %load = load i32* %sel
+ %load = load i32, i32* %sel
store i32 %load, i32 addrspace(1)* %out
ret void
}
@@ -299,13 +301,13 @@ entry:
; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:5
define void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%alloca = alloca [16 x i32]
- %tmp0 = getelementptr [16 x i32]* %alloca, i32 0, i32 %a
+ %tmp0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a
store i32 5, i32* %tmp0
%tmp1 = ptrtoint [16 x i32]* %alloca to i32
%tmp2 = add i32 %tmp1, 5
%tmp3 = inttoptr i32 %tmp2 to i32*
- %tmp4 = getelementptr i32* %tmp3, i32 %b
- %tmp5 = load i32* %tmp4
+ %tmp4 = getelementptr i32, i32* %tmp3, i32 %b
+ %tmp5 = load i32, i32* %tmp4
store i32 %tmp5, i32 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/pv-packing.ll b/test/CodeGen/R600/pv-packing.ll
index e5615b99728e..abeae563ff3f 100644
--- a/test/CodeGen/R600/pv-packing.ll
+++ b/test/CodeGen/R600/pv-packing.ll
@@ -14,8 +14,8 @@ main_body:
%6 = extractelement <4 x float> %reg3, i32 0
%7 = extractelement <4 x float> %reg3, i32 1
%8 = extractelement <4 x float> %reg3, i32 2
- %9 = load <4 x float> addrspace(8)* null
- %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %9 = load <4 x float>, <4 x float> addrspace(8)* null
+ %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%11 = call float @llvm.AMDGPU.dp4(<4 x float> %9, <4 x float> %9)
%12 = fmul float %0, %3
%13 = fadd float %12, %6
diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll
index 1908f15949a2..9a57dd19765a 100644
--- a/test/CodeGen/R600/pv.ll
+++ b/test/CodeGen/R600/pv.ll
@@ -33,63 +33,63 @@ main_body:
%25 = extractelement <4 x float> %reg7, i32 1
%26 = extractelement <4 x float> %reg7, i32 2
%27 = extractelement <4 x float> %reg7, i32 3
- %28 = load <4 x float> addrspace(8)* null
+ %28 = load <4 x float>, <4 x float> addrspace(8)* null
%29 = extractelement <4 x float> %28, i32 0
%30 = fmul float %0, %29
- %31 = load <4 x float> addrspace(8)* null
+ %31 = load <4 x float>, <4 x float> addrspace(8)* null
%32 = extractelement <4 x float> %31, i32 1
%33 = fmul float %0, %32
- %34 = load <4 x float> addrspace(8)* null
+ %34 = load <4 x float>, <4 x float> addrspace(8)* null
%35 = extractelement <4 x float> %34, i32 2
%36 = fmul float %0, %35
- %37 = load <4 x float> addrspace(8)* null
+ %37 = load <4 x float>, <4 x float> addrspace(8)* null
%38 = extractelement <4 x float> %37, i32 3
%39 = fmul float %0, %38
- %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %40 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%41 = extractelement <4 x float> %40, i32 0
%42 = fmul float %1, %41
%43 = fadd float %42, %30
- %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %44 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%45 = extractelement <4 x float> %44, i32 1
%46 = fmul float %1, %45
%47 = fadd float %46, %33
- %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%49 = extractelement <4 x float> %48, i32 2
%50 = fmul float %1, %49
%51 = fadd float %50, %36
- %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%53 = extractelement <4 x float> %52, i32 3
%54 = fmul float %1, %53
%55 = fadd float %54, %39
- %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%57 = extractelement <4 x float> %56, i32 0
%58 = fmul float %2, %57
%59 = fadd float %58, %43
- %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%61 = extractelement <4 x float> %60, i32 1
%62 = fmul float %2, %61
%63 = fadd float %62, %47
- %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %64 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%65 = extractelement <4 x float> %64, i32 2
%66 = fmul float %2, %65
%67 = fadd float %66, %51
- %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %68 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%69 = extractelement <4 x float> %68, i32 3
%70 = fmul float %2, %69
%71 = fadd float %70, %55
- %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%73 = extractelement <4 x float> %72, i32 0
%74 = fmul float %3, %73
%75 = fadd float %74, %59
- %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%77 = extractelement <4 x float> %76, i32 1
%78 = fmul float %3, %77
%79 = fadd float %78, %63
- %80 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %80 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%81 = extractelement <4 x float> %80, i32 2
%82 = fmul float %3, %81
%83 = fadd float %82, %67
- %84 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %84 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%85 = extractelement <4 x float> %84, i32 3
%86 = fmul float %3, %85
%87 = fadd float %86, %71
@@ -107,15 +107,15 @@ main_body:
%99 = fmul float %4, %98
%100 = fmul float %5, %98
%101 = fmul float %6, %98
- %102 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %102 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%103 = extractelement <4 x float> %102, i32 0
%104 = fmul float %103, %8
%105 = fadd float %104, %20
- %106 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %106 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%107 = extractelement <4 x float> %106, i32 1
%108 = fmul float %107, %9
%109 = fadd float %108, %21
- %110 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %110 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%111 = extractelement <4 x float> %110, i32 2
%112 = fmul float %111, %10
%113 = fadd float %112, %22
@@ -123,11 +123,11 @@ main_body:
%115 = call float @llvm.AMDIL.clamp.(float %109, float 0.000000e+00, float 1.000000e+00)
%116 = call float @llvm.AMDIL.clamp.(float %113, float 0.000000e+00, float 1.000000e+00)
%117 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
- %118 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %118 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
%119 = extractelement <4 x float> %118, i32 0
- %120 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %120 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
%121 = extractelement <4 x float> %120, i32 1
- %122 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %122 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
%123 = extractelement <4 x float> %122, i32 2
%124 = insertelement <4 x float> undef, float %99, i32 0
%125 = insertelement <4 x float> %124, float %100, i32 1
@@ -138,11 +138,11 @@ main_body:
%130 = insertelement <4 x float> %129, float %123, i32 2
%131 = insertelement <4 x float> %130, float 0.000000e+00, i32 3
%132 = call float @llvm.AMDGPU.dp4(<4 x float> %127, <4 x float> %131)
- %133 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %133 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
%134 = extractelement <4 x float> %133, i32 0
- %135 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %135 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
%136 = extractelement <4 x float> %135, i32 1
- %137 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %137 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
%138 = extractelement <4 x float> %137, i32 2
%139 = insertelement <4 x float> undef, float %99, i32 0
%140 = insertelement <4 x float> %139, float %100, i32 1
@@ -153,31 +153,31 @@ main_body:
%145 = insertelement <4 x float> %144, float %138, i32 2
%146 = insertelement <4 x float> %145, float 0.000000e+00, i32 3
%147 = call float @llvm.AMDGPU.dp4(<4 x float> %142, <4 x float> %146)
- %148 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+ %148 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
%149 = extractelement <4 x float> %148, i32 0
%150 = fmul float %149, %8
- %151 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+ %151 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
%152 = extractelement <4 x float> %151, i32 1
%153 = fmul float %152, %9
- %154 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+ %154 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
%155 = extractelement <4 x float> %154, i32 2
%156 = fmul float %155, %10
- %157 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %157 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
%158 = extractelement <4 x float> %157, i32 0
%159 = fmul float %158, %12
- %160 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %160 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
%161 = extractelement <4 x float> %160, i32 1
%162 = fmul float %161, %13
- %163 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+ %163 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
%164 = extractelement <4 x float> %163, i32 2
%165 = fmul float %164, %14
- %166 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+ %166 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
%167 = extractelement <4 x float> %166, i32 0
%168 = fmul float %167, %16
- %169 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+ %169 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
%170 = extractelement <4 x float> %169, i32 1
%171 = fmul float %170, %17
- %172 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+ %172 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
%173 = extractelement <4 x float> %172, i32 2
%174 = fmul float %173, %18
%175 = fcmp uge float %132, 0.000000e+00
diff --git a/test/CodeGen/R600/r600-export-fix.ll b/test/CodeGen/R600/r600-export-fix.ll
index 7d7285632078..7cb80195b368 100644
--- a/test/CodeGen/R600/r600-export-fix.ll
+++ b/test/CodeGen/R600/r600-export-fix.ll
@@ -16,83 +16,83 @@ main_body:
%1 = extractelement <4 x float> %reg1, i32 1
%2 = extractelement <4 x float> %reg1, i32 2
%3 = extractelement <4 x float> %reg1, i32 3
- %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%5 = extractelement <4 x float> %4, i32 0
%6 = fmul float %5, %0
- %7 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%8 = extractelement <4 x float> %7, i32 1
%9 = fmul float %8, %0
- %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%11 = extractelement <4 x float> %10, i32 2
%12 = fmul float %11, %0
- %13 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+ %13 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
%14 = extractelement <4 x float> %13, i32 3
%15 = fmul float %14, %0
- %16 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %16 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
%17 = extractelement <4 x float> %16, i32 0
%18 = fmul float %17, %1
%19 = fadd float %18, %6
- %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %20 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
%21 = extractelement <4 x float> %20, i32 1
%22 = fmul float %21, %1
%23 = fadd float %22, %9
- %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %24 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
%25 = extractelement <4 x float> %24, i32 2
%26 = fmul float %25, %1
%27 = fadd float %26, %12
- %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+ %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
%29 = extractelement <4 x float> %28, i32 3
%30 = fmul float %29, %1
%31 = fadd float %30, %15
- %32 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %32 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
%33 = extractelement <4 x float> %32, i32 0
%34 = fmul float %33, %2
%35 = fadd float %34, %19
- %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %36 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
%37 = extractelement <4 x float> %36, i32 1
%38 = fmul float %37, %2
%39 = fadd float %38, %23
- %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %40 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
%41 = extractelement <4 x float> %40, i32 2
%42 = fmul float %41, %2
%43 = fadd float %42, %27
- %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+ %44 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
%45 = extractelement <4 x float> %44, i32 3
%46 = fmul float %45, %2
%47 = fadd float %46, %31
- %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
%49 = extractelement <4 x float> %48, i32 0
%50 = fmul float %49, %3
%51 = fadd float %50, %35
- %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
%53 = extractelement <4 x float> %52, i32 1
%54 = fmul float %53, %3
%55 = fadd float %54, %39
- %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
%57 = extractelement <4 x float> %56, i32 2
%58 = fmul float %57, %3
%59 = fadd float %58, %43
- %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+ %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
%61 = extractelement <4 x float> %60, i32 3
%62 = fmul float %61, %3
%63 = fadd float %62, %47
- %64 = load <4 x float> addrspace(8)* null
+ %64 = load <4 x float>, <4 x float> addrspace(8)* null
%65 = extractelement <4 x float> %64, i32 0
- %66 = load <4 x float> addrspace(8)* null
+ %66 = load <4 x float>, <4 x float> addrspace(8)* null
%67 = extractelement <4 x float> %66, i32 1
- %68 = load <4 x float> addrspace(8)* null
+ %68 = load <4 x float>, <4 x float> addrspace(8)* null
%69 = extractelement <4 x float> %68, i32 2
- %70 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %70 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%71 = extractelement <4 x float> %70, i32 0
- %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%73 = extractelement <4 x float> %72, i32 1
- %74 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %74 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%75 = extractelement <4 x float> %74, i32 2
- %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%77 = extractelement <4 x float> %76, i32 0
- %78 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %78 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%79 = extractelement <4 x float> %78, i32 1
- %80 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+ %80 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
%81 = extractelement <4 x float> %80, i32 2
%82 = insertelement <4 x float> undef, float %51, i32 0
%83 = insertelement <4 x float> %82, float %55, i32 1
diff --git a/test/CodeGen/R600/r600cfg.ll b/test/CodeGen/R600/r600cfg.ll
index dddc9de7e963..c7b9d65220f3 100644
--- a/test/CodeGen/R600/r600cfg.ll
+++ b/test/CodeGen/R600/r600cfg.ll
@@ -83,7 +83,7 @@ ELSE45: ; preds = %ENDIF40
ENDIF43: ; preds = %ELSE45, %IF44
%.sink = phi i32 [ %49, %IF44 ], [ %51, %ELSE45 ]
%52 = bitcast i32 %.sink to float
- %53 = load <4 x float> addrspace(8)* null
+ %53 = load <4 x float>, <4 x float> addrspace(8)* null
%54 = extractelement <4 x float> %53, i32 0
%55 = bitcast float %54 to i32
br label %LOOP47
diff --git a/test/CodeGen/R600/register-count-comments.ll b/test/CodeGen/R600/register-count-comments.ll
index 2b49f977def7..de6bfb310883 100644
--- a/test/CodeGen/R600/register-count-comments.ll
+++ b/test/CodeGen/R600/register-count-comments.ll
@@ -9,11 +9,11 @@ declare i32 @llvm.SI.tid() nounwind readnone
; SI: ; NumVgprs: {{[0-9]+}}
define void @foo(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %abase, i32 addrspace(1)* %bbase) nounwind {
%tid = call i32 @llvm.SI.tid() nounwind readnone
- %aptr = getelementptr i32 addrspace(1)* %abase, i32 %tid
- %bptr = getelementptr i32 addrspace(1)* %bbase, i32 %tid
- %outptr = getelementptr i32 addrspace(1)* %out, i32 %tid
- %a = load i32 addrspace(1)* %aptr, align 4
- %b = load i32 addrspace(1)* %bptr, align 4
+ %aptr = getelementptr i32, i32 addrspace(1)* %abase, i32 %tid
+ %bptr = getelementptr i32, i32 addrspace(1)* %bbase, i32 %tid
+ %outptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %a = load i32, i32 addrspace(1)* %aptr, align 4
+ %b = load i32, i32 addrspace(1)* %bptr, align 4
%result = add i32 %a, %b
store i32 %result, i32 addrspace(1)* %outptr, align 4
ret void
diff --git a/test/CodeGen/R600/reorder-stores.ll b/test/CodeGen/R600/reorder-stores.ll
index ea50d5eed4df..187650ff9a53 100644
--- a/test/CodeGen/R600/reorder-stores.ll
+++ b/test/CodeGen/R600/reorder-stores.ll
@@ -12,8 +12,8 @@
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind {
- %tmp1 = load <2 x double> addrspace(1)* %x, align 16
- %tmp4 = load <2 x double> addrspace(1)* %y, align 16
+ %tmp1 = load <2 x double>, <2 x double> addrspace(1)* %x, align 16
+ %tmp4 = load <2 x double>, <2 x double> addrspace(1)* %y, align 16
store <2 x double> %tmp4, <2 x double> addrspace(1)* %x, align 16
store <2 x double> %tmp1, <2 x double> addrspace(1)* %y, align 16
ret void
@@ -26,8 +26,8 @@ define void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocap
; SI: ds_write_b64
; SI: s_endpgm
define void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace(3)* nocapture %x, <2 x double> addrspace(3)* nocapture %y) nounwind {
- %tmp1 = load <2 x double> addrspace(3)* %x, align 16
- %tmp4 = load <2 x double> addrspace(3)* %y, align 16
+ %tmp1 = load <2 x double>, <2 x double> addrspace(3)* %x, align 16
+ %tmp4 = load <2 x double>, <2 x double> addrspace(3)* %y, align 16
store <2 x double> %tmp4, <2 x double> addrspace(3)* %x, align 16
store <2 x double> %tmp1, <2 x double> addrspace(3)* %y, align 16
ret void
@@ -76,8 +76,8 @@ define void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace
; SI: buffer_store_dword
; SI: s_endpgm
define void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind {
- %tmp1 = load <8 x i32> addrspace(1)* %x, align 32
- %tmp4 = load <8 x i32> addrspace(1)* %y, align 32
+ %tmp1 = load <8 x i32>, <8 x i32> addrspace(1)* %x, align 32
+ %tmp4 = load <8 x i32>, <8 x i32> addrspace(1)* %y, align 32
store <8 x i32> %tmp4, <8 x i32> addrspace(1)* %x, align 32
store <8 x i32> %tmp1, <8 x i32> addrspace(1)* %y, align 32
ret void
@@ -91,8 +91,8 @@ define void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* no
; SI: ds_write_b64
; SI: s_endpgm
define void @no_reorder_extload_64(<2 x i32> addrspace(3)* nocapture %x, <2 x i32> addrspace(3)* nocapture %y) nounwind {
- %tmp1 = load <2 x i32> addrspace(3)* %x, align 8
- %tmp4 = load <2 x i32> addrspace(3)* %y, align 8
+ %tmp1 = load <2 x i32>, <2 x i32> addrspace(3)* %x, align 8
+ %tmp4 = load <2 x i32>, <2 x i32> addrspace(3)* %y, align 8
%tmp1ext = zext <2 x i32> %tmp1 to <2 x i64>
%tmp4ext = zext <2 x i32> %tmp4 to <2 x i64>
%tmp7 = add <2 x i64> %tmp1ext, <i64 1, i64 1>
diff --git a/test/CodeGen/R600/rotl.i64.ll b/test/CodeGen/R600/rotl.i64.ll
index 6da17a4fea93..3f4ceb7e0310 100644
--- a/test/CodeGen/R600/rotl.i64.ll
+++ b/test/CodeGen/R600/rotl.i64.ll
@@ -28,8 +28,8 @@ entry:
; BOTH: s_endpgm
define void @v_rotl_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
entry:
- %x = load i64 addrspace(1)* %xptr, align 8
- %y = load i64 addrspace(1)* %yptr, align 8
+ %x = load i64, i64 addrspace(1)* %xptr, align 8
+ %y = load i64, i64 addrspace(1)* %yptr, align 8
%tmp0 = shl i64 %x, %y
%tmp1 = sub i64 64, %y
%tmp2 = lshr i64 %x, %tmp1
diff --git a/test/CodeGen/R600/rotr.i64.ll b/test/CodeGen/R600/rotr.i64.ll
index f1d1d265f366..586de44a566c 100644
--- a/test/CodeGen/R600/rotr.i64.ll
+++ b/test/CodeGen/R600/rotr.i64.ll
@@ -26,8 +26,8 @@ entry:
; BOTH: v_or_b32
define void @v_rotr_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
entry:
- %x = load i64 addrspace(1)* %xptr, align 8
- %y = load i64 addrspace(1)* %yptr, align 8
+ %x = load i64, i64 addrspace(1)* %xptr, align 8
+ %y = load i64, i64 addrspace(1)* %yptr, align 8
%tmp0 = sub i64 64, %y
%tmp1 = shl i64 %x, %tmp0
%tmp2 = lshr i64 %x, %y
@@ -50,8 +50,8 @@ entry:
; BOTH-LABEL: {{^}}v_rotr_v2i64:
define void @v_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> addrspace(1)* %xptr, <2 x i64> addrspace(1)* %yptr) {
entry:
- %x = load <2 x i64> addrspace(1)* %xptr, align 8
- %y = load <2 x i64> addrspace(1)* %yptr, align 8
+ %x = load <2 x i64>, <2 x i64> addrspace(1)* %xptr, align 8
+ %y = load <2 x i64>, <2 x i64> addrspace(1)* %yptr, align 8
%tmp0 = sub <2 x i64> <i64 64, i64 64>, %y
%tmp1 = shl <2 x i64> %x, %tmp0
%tmp2 = lshr <2 x i64> %x, %y
diff --git a/test/CodeGen/R600/rsq.ll b/test/CodeGen/R600/rsq.ll
index b8a23df63d83..b67b800c7374 100644
--- a/test/CodeGen/R600/rsq.ll
+++ b/test/CodeGen/R600/rsq.ll
@@ -9,7 +9,7 @@ declare double @llvm.sqrt.f64(double) nounwind readnone
; SI: v_rsq_f32_e32
; SI: s_endpgm
define void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
- %val = load float addrspace(1)* %in, align 4
+ %val = load float, float addrspace(1)* %in, align 4
%sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
%div = fdiv float 1.0, %sqrt
store float %div, float addrspace(1)* %out, align 4
@@ -21,7 +21,7 @@ define void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noali
; SI-SAFE: v_sqrt_f64_e32
; SI: s_endpgm
define void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
- %val = load double addrspace(1)* %in, align 4
+ %val = load double, double addrspace(1)* %in, align 4
%sqrt = call double @llvm.sqrt.f64(double %val) nounwind readnone
%div = fdiv double 1.0, %sqrt
store double %div, double addrspace(1)* %out, align 4
@@ -57,14 +57,14 @@ define void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) nounwind
; SI: s_endpgm
define void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
- %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
- %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
- %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
+ %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+ %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
- %a = load float addrspace(1)* %gep.0
- %b = load float addrspace(1)* %gep.1
- %c = load float addrspace(1)* %gep.2
+ %a = load float, float addrspace(1)* %gep.0
+ %b = load float, float addrspace(1)* %gep.1
+ %c = load float, float addrspace(1)* %gep.2
%x = call float @llvm.sqrt.f32(float %a)
%y = fmul float %x, %b
diff --git a/test/CodeGen/R600/s_movk_i32.ll b/test/CodeGen/R600/s_movk_i32.ll
index 8be2d1d923cc..6b1a36c979c2 100644
--- a/test/CodeGen/R600/s_movk_i32.ll
+++ b/test/CodeGen/R600/s_movk_i32.ll
@@ -9,7 +9,7 @@
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 4295032831 ; ((1 << 16) - 1) | (1 << 32)
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -23,7 +23,7 @@ define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 4295000063 ; ((1 << 15) - 1) | (1 << 32)
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -37,7 +37,7 @@ define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 274877939711 ; ((1 << 15) - 1) | (64 << 32)
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -51,7 +51,7 @@ define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 4295000064 ; (1 << 15) | (1 << 32)
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -65,7 +65,7 @@ define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 4295098368 ; (1 << 17) | (1 << 32)
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -79,7 +79,7 @@ define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 18374967954648334319 ; -17 & 0xff00ffffffffffff
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -93,7 +93,7 @@ define void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 270582939713 ; 65 | (63 << 32)
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -107,7 +107,7 @@ define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k7(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 70368744185856; ((1 << 13)) | ((1 << 14) << 32)
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -122,7 +122,7 @@ define void @s_movk_i32_k7(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k8(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255906816 ; 0x11111111ffff8000
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -136,7 +136,7 @@ define void @s_movk_i32_k8(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k9(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255906817 ; 0x11111111ffff8001
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -150,7 +150,7 @@ define void @s_movk_i32_k9(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k10(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255909000 ; 0x11111111ffff8888
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -164,7 +164,7 @@ define void @s_movk_i32_k10(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 ad
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k11(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255910911 ; 0x11111111ffff8fff
store i64 %or, i64 addrspace(1)* %out
ret void
@@ -178,7 +178,7 @@ define void @s_movk_i32_k11(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 ad
; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
; SI: s_endpgm
define void @s_movk_i32_k12(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
- %loada = load i64 addrspace(1)* %a, align 4
+ %loada = load i64, i64 addrspace(1)* %a, align 4
%or = or i64 %loada, 1229782942255902721 ; 0x11111111ffff7001
store i64 %or, i64 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/saddo.ll b/test/CodeGen/R600/saddo.ll
index 8e625c1110a6..f8ced7942a60 100644
--- a/test/CodeGen/R600/saddo.ll
+++ b/test/CodeGen/R600/saddo.ll
@@ -28,8 +28,8 @@ define void @s_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
; FUNC-LABEL: {{^}}v_saddo_i32:
define void @v_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %a = load i32 addrspace(1)* %aptr, align 4
- %b = load i32 addrspace(1)* %bptr, align 4
+ %a = load i32, i32 addrspace(1)* %aptr, align 4
+ %b = load i32, i32 addrspace(1)* %bptr, align 4
%sadd = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind
%val = extractvalue { i32, i1 } %sadd, 0
%carry = extractvalue { i32, i1 } %sadd, 1
@@ -52,8 +52,8 @@ define void @s_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64
; SI: v_add_i32
; SI: v_addc_u32
define void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %a = load i64 addrspace(1)* %aptr, align 4
- %b = load i64 addrspace(1)* %bptr, align 4
+ %a = load i64, i64 addrspace(1)* %aptr, align 4
+ %b = load i64, i64 addrspace(1)* %bptr, align 4
%sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %sadd, 0
%carry = extractvalue { i64, i1 } %sadd, 1
diff --git a/test/CodeGen/R600/salu-to-valu.ll b/test/CodeGen/R600/salu-to-valu.ll
index dfb181da1801..0b9649576545 100644
--- a/test/CodeGen/R600/salu-to-valu.ll
+++ b/test/CodeGen/R600/salu-to-valu.ll
@@ -27,11 +27,11 @@ entry:
loop:
%4 = phi i64 [0, %entry], [%5, %loop]
%5 = add i64 %2, %4
- %6 = getelementptr i8 addrspace(1)* %in, i64 %5
- %7 = load i8 addrspace(1)* %6, align 1
+ %6 = getelementptr i8, i8 addrspace(1)* %in, i64 %5
+ %7 = load i8, i8 addrspace(1)* %6, align 1
%8 = or i64 %5, 1
- %9 = getelementptr i8 addrspace(1)* %in, i64 %8
- %10 = load i8 addrspace(1)* %9, align 1
+ %9 = getelementptr i8, i8 addrspace(1)* %in, i64 %8
+ %10 = load i8, i8 addrspace(1)* %9, align 1
%11 = add i8 %7, %10
%12 = sext i8 %11 to i32
store i32 %12, i32 addrspace(1)* %out
@@ -59,18 +59,18 @@ entry:
br i1 %0, label %if, label %else
if:
- %1 = load i32 addrspace(2)* addrspace(1)* %in
+ %1 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
br label %endif
else:
- %2 = getelementptr i32 addrspace(2)* addrspace(1)* %in
- %3 = load i32 addrspace(2)* addrspace(1)* %2
+ %2 = getelementptr i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
+ %3 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %2
br label %endif
endif:
%4 = phi i32 addrspace(2)* [%1, %if], [%3, %else]
- %5 = getelementptr i32 addrspace(2)* %4, i32 3000
- %6 = load i32 addrspace(2)* %5
+ %5 = getelementptr i32, i32 addrspace(2)* %4, i32 3000
+ %6 = load i32, i32 addrspace(2)* %5
store i32 %6, i32 addrspace(1)* %out
ret void
}
@@ -83,8 +83,8 @@ define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%1 = add i32 %0, 4
- %2 = getelementptr [8 x i32] addrspace(2)* %in, i32 %0, i32 4
- %3 = load i32 addrspace(2)* %2
+ %2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %0, i32 4
+ %3 = load i32, i32 addrspace(2)* %2
store i32 %3, i32 addrspace(1)* %out
ret void
}
@@ -95,9 +95,9 @@ entry:
define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) {
entry:
%tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
- %tmp1 = getelementptr inbounds i32 addrspace(2)* %in, i32 %tmp0
+ %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
- %tmp3 = load <8 x i32> addrspace(2)* %tmp2, align 4
+ %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4
store <8 x i32> %tmp3, <8 x i32> addrspace(1)* %out, align 32
ret void
}
@@ -110,9 +110,9 @@ entry:
define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) {
entry:
%tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
- %tmp1 = getelementptr inbounds i32 addrspace(2)* %in, i32 %tmp0
+ %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
%tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
- %tmp3 = load <16 x i32> addrspace(2)* %tmp2, align 4
+ %tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4
store <16 x i32> %tmp3, <16 x i32> addrspace(1)* %out, align 32
ret void
}
diff --git a/test/CodeGen/R600/scalar_to_vector.ll b/test/CodeGen/R600/scalar_to_vector.ll
index b82e5526f751..0970e5d30630 100644
--- a/test/CodeGen/R600/scalar_to_vector.ll
+++ b/test/CodeGen/R600/scalar_to_vector.ll
@@ -11,7 +11,7 @@
; SI: buffer_store_short [[RESULT]]
; SI: s_endpgm
define void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %tmp1 = load i32 addrspace(1)* %in, align 4
+ %tmp1 = load i32, i32 addrspace(1)* %in, align 4
%bc = bitcast i32 %tmp1 to <2 x i16>
%tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8
@@ -27,7 +27,7 @@ define void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(
; SI: buffer_store_short [[RESULT]]
; SI: s_endpgm
define void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
- %tmp1 = load float addrspace(1)* %in, align 4
+ %tmp1 = load float, float addrspace(1)* %in, align 4
%bc = bitcast float %tmp1 to <2 x i16>
%tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8
@@ -39,7 +39,7 @@ define void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out, float addrspac
; define void @scalar_to_vector_test2(<8 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-; %tmp1 = load i32 addrspace(1)* %in, align 4
+; %tmp1 = load i32, i32 addrspace(1)* %in, align 4
; %bc = bitcast i32 %tmp1 to <4 x i8>
; %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
diff --git a/test/CodeGen/R600/schedule-fs-loop-nested.ll b/test/CodeGen/R600/schedule-fs-loop-nested.ll
index b917ec6413e9..759197ca61f7 100644
--- a/test/CodeGen/R600/schedule-fs-loop-nested.ll
+++ b/test/CodeGen/R600/schedule-fs-loop-nested.ll
@@ -3,7 +3,7 @@
define void @main() {
main_body:
- %0 = load <4 x float> addrspace(9)* null
+ %0 = load <4 x float>, <4 x float> addrspace(9)* null
%1 = extractelement <4 x float> %0, i32 3
%2 = fptosi float %1 to i32
%3 = bitcast i32 %2 to float
@@ -20,11 +20,11 @@ main_body:
%14 = bitcast float %12 to i32
%15 = add i32 %13, %14
%16 = bitcast i32 %15 to float
- %17 = load <4 x float> addrspace(9)* null
+ %17 = load <4 x float>, <4 x float> addrspace(9)* null
%18 = extractelement <4 x float> %17, i32 0
- %19 = load <4 x float> addrspace(9)* null
+ %19 = load <4 x float>, <4 x float> addrspace(9)* null
%20 = extractelement <4 x float> %19, i32 1
- %21 = load <4 x float> addrspace(9)* null
+ %21 = load <4 x float>, <4 x float> addrspace(9)* null
%22 = extractelement <4 x float> %21, i32 2
br label %LOOP
diff --git a/test/CodeGen/R600/schedule-fs-loop.ll b/test/CodeGen/R600/schedule-fs-loop.ll
index d6c194b19b27..28cc08abc022 100644
--- a/test/CodeGen/R600/schedule-fs-loop.ll
+++ b/test/CodeGen/R600/schedule-fs-loop.ll
@@ -3,15 +3,15 @@
define void @main() {
main_body:
- %0 = load <4 x float> addrspace(9)* null
+ %0 = load <4 x float>, <4 x float> addrspace(9)* null
%1 = extractelement <4 x float> %0, i32 3
%2 = fptosi float %1 to i32
%3 = bitcast i32 %2 to float
- %4 = load <4 x float> addrspace(9)* null
+ %4 = load <4 x float>, <4 x float> addrspace(9)* null
%5 = extractelement <4 x float> %4, i32 0
- %6 = load <4 x float> addrspace(9)* null
+ %6 = load <4 x float>, <4 x float> addrspace(9)* null
%7 = extractelement <4 x float> %6, i32 1
- %8 = load <4 x float> addrspace(9)* null
+ %8 = load <4 x float>, <4 x float> addrspace(9)* null
%9 = extractelement <4 x float> %8, i32 2
br label %LOOP
diff --git a/test/CodeGen/R600/schedule-global-loads.ll b/test/CodeGen/R600/schedule-global-loads.ll
index b6437d25b8cb..3f728fd873b3 100644
--- a/test/CodeGen/R600/schedule-global-loads.ll
+++ b/test/CodeGen/R600/schedule-global-loads.ll
@@ -14,9 +14,9 @@ declare i32 @llvm.r600.read.tidig.x() #1
; SI: buffer_store_dword [[REG0]]
; SI: buffer_store_dword [[REG1]]
define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr) #0 {
- %load0 = load i32 addrspace(1)* %ptr, align 4
- %gep = getelementptr i32 addrspace(1)* %ptr, i32 1
- %load1 = load i32 addrspace(1)* %gep, align 4
+ %load0 = load i32, i32 addrspace(1)* %ptr, align 4
+ %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 1
+ %load1 = load i32, i32 addrspace(1)* %gep, align 4
store i32 %load0, i32 addrspace(1)* %out0, align 4
store i32 %load1, i32 addrspace(1)* %out1, align 4
ret void
@@ -29,9 +29,9 @@ define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)*
; SI: buffer_load_dword
define void @same_base_ptr_crash(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
entry:
- %out1 = getelementptr i32 addrspace(1)* %out, i32 %offset
- %tmp0 = load i32 addrspace(1)* %out
- %tmp1 = load i32 addrspace(1)* %out1
+ %out1 = getelementptr i32, i32 addrspace(1)* %out, i32 %offset
+ %tmp0 = load i32, i32 addrspace(1)* %out
+ %tmp1 = load i32, i32 addrspace(1)* %out1
%tmp2 = add i32 %tmp0, %tmp1
store i32 %tmp2, i32 addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/schedule-if-2.ll b/test/CodeGen/R600/schedule-if-2.ll
index 38aad1850f81..549465096833 100644
--- a/test/CodeGen/R600/schedule-if-2.ll
+++ b/test/CodeGen/R600/schedule-if-2.ll
@@ -3,10 +3,10 @@
define void @main() {
main_body:
- %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+ %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%1 = extractelement <4 x float> %0, i32 0
%2 = fadd float 1.000000e+03, %1
- %3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%4 = extractelement <4 x float> %3, i32 0
%5 = bitcast float %4 to i32
%6 = icmp eq i32 %5, 0
@@ -47,7 +47,7 @@ IF: ; preds = %main_body
br label %ENDIF
ELSE: ; preds = %main_body
- %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %36 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%37 = extractelement <4 x float> %36, i32 0
%38 = bitcast float %37 to i32
%39 = icmp eq i32 %38, 1
@@ -80,7 +80,7 @@ IF23: ; preds = %ELSE
%.28 = select i1 %54, float 0x36A0000000000000, float 0.000000e+00
%55 = bitcast float %.28 to i32
%56 = sitofp i32 %55 to float
- %57 = load <4 x float> addrspace(8)* null
+ %57 = load <4 x float>, <4 x float> addrspace(8)* null
%58 = extractelement <4 x float> %57, i32 0
%59 = fsub float -0.000000e+00, %58
%60 = fadd float %2, %59
diff --git a/test/CodeGen/R600/schedule-if.ll b/test/CodeGen/R600/schedule-if.ll
index f960c9323940..94c653c8f25b 100644
--- a/test/CodeGen/R600/schedule-if.ll
+++ b/test/CodeGen/R600/schedule-if.ll
@@ -3,7 +3,7 @@
define void @main() {
main_body:
- %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%1 = extractelement <4 x float> %0, i32 0
%2 = bitcast float %1 to i32
%3 = icmp eq i32 %2, 0
@@ -14,7 +14,7 @@ main_body:
br i1 %7, label %ENDIF, label %ELSE
ELSE: ; preds = %main_body
- %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%9 = extractelement <4 x float> %8, i32 0
%10 = bitcast float %9 to i32
%11 = icmp eq i32 %10, 1
@@ -36,7 +36,7 @@ ENDIF: ; preds = %IF13, %ELSE, %main_
ret void
IF13: ; preds = %ELSE
- %20 = load <4 x float> addrspace(8)* null
+ %20 = load <4 x float>, <4 x float> addrspace(8)* null
%21 = extractelement <4 x float> %20, i32 0
%22 = fsub float -0.000000e+00, %21
%23 = fadd float 1.000000e+03, %22
diff --git a/test/CodeGen/R600/schedule-kernel-arg-loads.ll b/test/CodeGen/R600/schedule-kernel-arg-loads.ll
index 01d897ff18cb..6b3e0814c380 100644
--- a/test/CodeGen/R600/schedule-kernel-arg-loads.ll
+++ b/test/CodeGen/R600/schedule-kernel-arg-loads.ll
@@ -6,6 +6,13 @@
; SI-NEXT: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xe
+; VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x24
+; VI-NEXT: s_nop 0
+; VI-NEXT: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI-NEXT: s_nop 0
+; VI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x34
+; VI-NEXT: s_nop 0
+; VI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x38
define void @cluster_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) nounwind {
store i32 %x, i32 addrspace(1)* %out0, align 4
store i32 %y, i32 addrspace(1)* %out1, align 4
diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
index 76b655d712d0..3863afda5dd3 100644
--- a/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
+++ b/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
@@ -39,63 +39,63 @@ ENDIF: ; preds = %main_body, %Flow2
%temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %101, %Flow2 ]
%15 = extractelement <4 x float> %reg1, i32 1
%16 = extractelement <4 x float> %reg1, i32 3
- %17 = load <4 x float> addrspace(9)* null
+ %17 = load <4 x float>, <4 x float> addrspace(9)* null
%18 = extractelement <4 x float> %17, i32 0
%19 = fmul float %18, %0
- %20 = load <4 x float> addrspace(9)* null
+ %20 = load <4 x float>, <4 x float> addrspace(9)* null
%21 = extractelement <4 x float> %20, i32 1
%22 = fmul float %21, %0
- %23 = load <4 x float> addrspace(9)* null
+ %23 = load <4 x float>, <4 x float> addrspace(9)* null
%24 = extractelement <4 x float> %23, i32 2
%25 = fmul float %24, %0
- %26 = load <4 x float> addrspace(9)* null
+ %26 = load <4 x float>, <4 x float> addrspace(9)* null
%27 = extractelement <4 x float> %26, i32 3
%28 = fmul float %27, %0
- %29 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %29 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
%30 = extractelement <4 x float> %29, i32 0
%31 = fmul float %30, %15
%32 = fadd float %31, %19
- %33 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %33 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
%34 = extractelement <4 x float> %33, i32 1
%35 = fmul float %34, %15
%36 = fadd float %35, %22
- %37 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %37 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
%38 = extractelement <4 x float> %37, i32 2
%39 = fmul float %38, %15
%40 = fadd float %39, %25
- %41 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %41 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
%42 = extractelement <4 x float> %41, i32 3
%43 = fmul float %42, %15
%44 = fadd float %43, %28
- %45 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %45 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
%46 = extractelement <4 x float> %45, i32 0
%47 = fmul float %46, %1
%48 = fadd float %47, %32
- %49 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %49 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
%50 = extractelement <4 x float> %49, i32 1
%51 = fmul float %50, %1
%52 = fadd float %51, %36
- %53 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %53 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
%54 = extractelement <4 x float> %53, i32 2
%55 = fmul float %54, %1
%56 = fadd float %55, %40
- %57 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %57 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
%58 = extractelement <4 x float> %57, i32 3
%59 = fmul float %58, %1
%60 = fadd float %59, %44
- %61 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %61 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
%62 = extractelement <4 x float> %61, i32 0
%63 = fmul float %62, %16
%64 = fadd float %63, %48
- %65 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %65 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
%66 = extractelement <4 x float> %65, i32 1
%67 = fmul float %66, %16
%68 = fadd float %67, %52
- %69 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %69 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
%70 = extractelement <4 x float> %69, i32 2
%71 = fmul float %70, %16
%72 = fadd float %71, %56
- %73 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %73 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
%74 = extractelement <4 x float> %73, i32 3
%75 = fmul float %74, %16
%76 = fadd float %75, %60
diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
index 33b20d36737b..8d980dbf8995 100644
--- a/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
+++ b/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
@@ -21,63 +21,63 @@ ENDIF: ; preds = %ENDIF16, %LOOP, %ma
%temp1.0 = phi float [ 1.000000e+00, %main_body ], [ %temp1.1, %LOOP ], [ %temp1.1, %ENDIF16 ]
%temp2.0 = phi float [ 0.000000e+00, %main_body ], [ %temp2.1, %LOOP ], [ %temp2.1, %ENDIF16 ]
%temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %temp3.1, %LOOP ], [ %temp3.1, %ENDIF16 ]
- %11 = load <4 x float> addrspace(9)* null
+ %11 = load <4 x float>, <4 x float> addrspace(9)* null
%12 = extractelement <4 x float> %11, i32 0
%13 = fmul float %12, %0
- %14 = load <4 x float> addrspace(9)* null
+ %14 = load <4 x float>, <4 x float> addrspace(9)* null
%15 = extractelement <4 x float> %14, i32 1
%16 = fmul float %15, %0
- %17 = load <4 x float> addrspace(9)* null
+ %17 = load <4 x float>, <4 x float> addrspace(9)* null
%18 = extractelement <4 x float> %17, i32 2
%19 = fmul float %18, %0
- %20 = load <4 x float> addrspace(9)* null
+ %20 = load <4 x float>, <4 x float> addrspace(9)* null
%21 = extractelement <4 x float> %20, i32 3
%22 = fmul float %21, %0
- %23 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %23 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
%24 = extractelement <4 x float> %23, i32 0
%25 = fmul float %24, %1
%26 = fadd float %25, %13
- %27 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %27 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
%28 = extractelement <4 x float> %27, i32 1
%29 = fmul float %28, %1
%30 = fadd float %29, %16
- %31 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %31 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
%32 = extractelement <4 x float> %31, i32 2
%33 = fmul float %32, %1
%34 = fadd float %33, %19
- %35 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+ %35 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
%36 = extractelement <4 x float> %35, i32 3
%37 = fmul float %36, %1
%38 = fadd float %37, %22
- %39 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %39 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
%40 = extractelement <4 x float> %39, i32 0
%41 = fmul float %40, %2
%42 = fadd float %41, %26
- %43 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %43 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
%44 = extractelement <4 x float> %43, i32 1
%45 = fmul float %44, %2
%46 = fadd float %45, %30
- %47 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %47 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
%48 = extractelement <4 x float> %47, i32 2
%49 = fmul float %48, %2
%50 = fadd float %49, %34
- %51 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+ %51 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
%52 = extractelement <4 x float> %51, i32 3
%53 = fmul float %52, %2
%54 = fadd float %53, %38
- %55 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %55 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
%56 = extractelement <4 x float> %55, i32 0
%57 = fmul float %56, %3
%58 = fadd float %57, %42
- %59 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %59 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
%60 = extractelement <4 x float> %59, i32 1
%61 = fmul float %60, %3
%62 = fadd float %61, %46
- %63 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %63 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
%64 = extractelement <4 x float> %63, i32 2
%65 = fmul float %64, %3
%66 = fadd float %65, %50
- %67 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+ %67 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
%68 = extractelement <4 x float> %67, i32 3
%69 = fmul float %68, %3
%70 = fadd float %69, %54
diff --git a/test/CodeGen/R600/scratch-buffer.ll b/test/CodeGen/R600/scratch-buffer.ll
index 740328a495da..56088718ada8 100644
--- a/test/CodeGen/R600/scratch-buffer.ll
+++ b/test/CodeGen/R600/scratch-buffer.ll
@@ -1,4 +1,5 @@
; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s
; When a frame index offset is more than 12-bits, make sure we don't store
; it in mubuf's offset field.
@@ -18,23 +19,23 @@ entry:
%scratch0 = alloca [8192 x i32]
%scratch1 = alloca [8192 x i32]
- %scratchptr0 = getelementptr [8192 x i32]* %scratch0, i32 0, i32 0
+ %scratchptr0 = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 0
store i32 1, i32* %scratchptr0
- %scratchptr1 = getelementptr [8192 x i32]* %scratch1, i32 0, i32 0
+ %scratchptr1 = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 0
store i32 2, i32* %scratchptr1
%cmp = icmp eq i32 %cond, 0
br i1 %cmp, label %if, label %else
if:
- %if_ptr = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %if_offset
- %if_value = load i32* %if_ptr
+ %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset
+ %if_value = load i32, i32* %if_ptr
br label %done
else:
- %else_ptr = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %else_offset
- %else_value = load i32* %else_ptr
+ %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset
+ %else_value = load i32, i32* %else_ptr
br label %done
done:
@@ -56,26 +57,26 @@ entry:
%scratch0 = alloca [8192 x i32]
%scratch1 = alloca [8192 x i32]
- %offset0 = load i32 addrspace(1)* %offsets
- %scratchptr0 = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %offset0
+ %offset0 = load i32, i32 addrspace(1)* %offsets
+ %scratchptr0 = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %offset0
store i32 %offset0, i32* %scratchptr0
- %offsetptr1 = getelementptr i32 addrspace(1)* %offsets, i32 1
- %offset1 = load i32 addrspace(1)* %offsetptr1
- %scratchptr1 = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %offset1
+ %offsetptr1 = getelementptr i32, i32 addrspace(1)* %offsets, i32 1
+ %offset1 = load i32, i32 addrspace(1)* %offsetptr1
+ %scratchptr1 = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %offset1
store i32 %offset1, i32* %scratchptr1
%cmp = icmp eq i32 %cond, 0
br i1 %cmp, label %if, label %else
if:
- %if_ptr = getelementptr [8192 x i32]* %scratch0, i32 0, i32 %if_offset
- %if_value = load i32* %if_ptr
+ %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset
+ %if_value = load i32, i32* %if_ptr
br label %done
else:
- %else_ptr = getelementptr [8192 x i32]* %scratch1, i32 0, i32 %else_offset
- %else_value = load i32* %else_ptr
+ %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset
+ %else_value = load i32, i32* %else_ptr
br label %done
done:
diff --git a/test/CodeGen/R600/sdiv.ll b/test/CodeGen/R600/sdiv.ll
index 07bb41768ee3..de645353a401 100644
--- a/test/CodeGen/R600/sdiv.ll
+++ b/test/CodeGen/R600/sdiv.ll
@@ -14,9 +14,9 @@
; FUNC-LABEL: {{^}}sdiv_i32:
; EG: CF_END
define void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in
- %den = load i32 addrspace(1) * %den_ptr
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in
+ %den = load i32, i32 addrspace(1) * %den_ptr
%result = sdiv i32 %num, %den
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -24,7 +24,7 @@ define void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; FUNC-LABEL: {{^}}sdiv_i32_4:
define void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %num = load i32 addrspace(1) * %in
+ %num = load i32, i32 addrspace(1) * %in
%result = sdiv i32 %num, 4
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -44,39 +44,39 @@ define void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; SI: buffer_store_dword
; SI: s_endpgm
define void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %num = load i32 addrspace(1) * %in
+ %num = load i32, i32 addrspace(1) * %in
%result = sdiv i32 %num, 3435
store i32 %result, i32 addrspace(1)* %out
ret void
}
define void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %den_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %num = load <2 x i32> addrspace(1) * %in
- %den = load <2 x i32> addrspace(1) * %den_ptr
+ %den_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
+ %den = load <2 x i32>, <2 x i32> addrspace(1) * %den_ptr
%result = sdiv <2 x i32> %num, %den
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
}
define void @sdiv_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %num = load <2 x i32> addrspace(1) * %in
+ %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
%result = sdiv <2 x i32> %num, <i32 4, i32 4>
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
}
define void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %den_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %num = load <4 x i32> addrspace(1) * %in
- %den = load <4 x i32> addrspace(1) * %den_ptr
+ %den_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %den = load <4 x i32>, <4 x i32> addrspace(1) * %den_ptr
%result = sdiv <4 x i32> %num, %den
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
}
define void @sdiv_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %num = load <4 x i32> addrspace(1) * %in
+ %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
%result = sdiv <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4>
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/sdivrem24.ll b/test/CodeGen/R600/sdivrem24.ll
index e8c5c252bd72..ad5df39f5505 100644
--- a/test/CodeGen/R600/sdivrem24.ll
+++ b/test/CodeGen/R600/sdivrem24.ll
@@ -13,9 +13,9 @@
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_INT
define void @sdiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
- %den_ptr = getelementptr i8 addrspace(1)* %in, i8 1
- %num = load i8 addrspace(1) * %in
- %den = load i8 addrspace(1) * %den_ptr
+ %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
+ %num = load i8, i8 addrspace(1) * %in
+ %den = load i8, i8 addrspace(1) * %den_ptr
%result = sdiv i8 %num, %den
store i8 %result, i8 addrspace(1)* %out
ret void
@@ -32,9 +32,9 @@ define void @sdiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_INT
define void @sdiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
- %den_ptr = getelementptr i16 addrspace(1)* %in, i16 1
- %num = load i16 addrspace(1) * %in, align 2
- %den = load i16 addrspace(1) * %den_ptr, align 2
+ %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
+ %num = load i16, i16 addrspace(1) * %in, align 2
+ %den = load i16, i16 addrspace(1) * %den_ptr, align 2
%result = sdiv i16 %num, %den
store i16 %result, i16 addrspace(1)* %out, align 2
ret void
@@ -51,9 +51,9 @@ define void @sdiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_INT
define void @sdiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 8
%den.i24.0 = shl i32 %den, 8
%num.i24 = ashr i32 %num.i24.0, 8
@@ -70,9 +70,9 @@ define void @sdiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @sdiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 7
%den.i24.0 = shl i32 %den, 7
%num.i24 = ashr i32 %num.i24.0, 7
@@ -89,9 +89,9 @@ define void @sdiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @test_no_sdiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 8
%den.i24.0 = shl i32 %den, 7
%num.i24 = ashr i32 %num.i24.0, 8
@@ -108,9 +108,9 @@ define void @test_no_sdiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @test_no_sdiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 7
%den.i24.0 = shl i32 %den, 8
%num.i24 = ashr i32 %num.i24.0, 7
@@ -131,9 +131,9 @@ define void @test_no_sdiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_INT
define void @srem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
- %den_ptr = getelementptr i8 addrspace(1)* %in, i8 1
- %num = load i8 addrspace(1) * %in
- %den = load i8 addrspace(1) * %den_ptr
+ %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
+ %num = load i8, i8 addrspace(1) * %in
+ %den = load i8, i8 addrspace(1) * %den_ptr
%result = srem i8 %num, %den
store i8 %result, i8 addrspace(1)* %out
ret void
@@ -150,9 +150,9 @@ define void @srem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_INT
define void @srem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
- %den_ptr = getelementptr i16 addrspace(1)* %in, i16 1
- %num = load i16 addrspace(1) * %in, align 2
- %den = load i16 addrspace(1) * %den_ptr, align 2
+ %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
+ %num = load i16, i16 addrspace(1) * %in, align 2
+ %den = load i16, i16 addrspace(1) * %den_ptr, align 2
%result = srem i16 %num, %den
store i16 %result, i16 addrspace(1)* %out, align 2
ret void
@@ -169,9 +169,9 @@ define void @srem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_INT
define void @srem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 8
%den.i24.0 = shl i32 %den, 8
%num.i24 = ashr i32 %num.i24.0, 8
@@ -188,9 +188,9 @@ define void @srem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @srem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 7
%den.i24.0 = shl i32 %den, 7
%num.i24 = ashr i32 %num.i24.0, 7
@@ -207,9 +207,9 @@ define void @srem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @test_no_srem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 8
%den.i24.0 = shl i32 %den, 7
%num.i24 = ashr i32 %num.i24.0, 8
@@ -226,9 +226,9 @@ define void @test_no_srem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
; EG-NOT: INT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @test_no_srem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 7
%den.i24.0 = shl i32 %den, 8
%num.i24 = ashr i32 %num.i24.0, 7
diff --git a/test/CodeGen/R600/sdivrem64.ll b/test/CodeGen/R600/sdivrem64.ll
new file mode 100644
index 000000000000..a9b2b7f9df55
--- /dev/null
+++ b/test/CodeGen/R600/sdivrem64.ll
@@ -0,0 +1,225 @@
+;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s
+;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s
+;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
+
+;FUNC-LABEL: {{^}}test_sdiv:
+;EG: RECIP_UINT
+;EG: LSHL {{.*}}, 1,
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN: v_bfe_u32
+;GCN-NOT: v_mad_f32
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: s_endpgm
+define void @test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+ %result = sdiv i64 %x, %y
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+;FUNC-LABEL: {{^}}test_srem:
+;EG: RECIP_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: BFE_UINT
+;EG: AND_INT {{.*}}, 1,
+
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN-NOT: v_mad_f32
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: s_endpgm
+define void @test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+ %result = urem i64 %x, %y
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+;FUNC-LABEL: {{^}}test_sdiv3264:
+;EG: RECIP_UINT
+;EG-NOT: BFE_UINT
+
+;GCN-NOT: s_bfe_u32
+;GCN-NOT: v_mad_f32
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: s_endpgm
+define void @test_sdiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+ %1 = ashr i64 %x, 33
+ %2 = ashr i64 %y, 33
+ %result = sdiv i64 %1, %2
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+;FUNC-LABEL: {{^}}test_srem3264:
+;EG: RECIP_UINT
+;EG-NOT: BFE_UINT
+
+;GCN-NOT: s_bfe_u32
+;GCN-NOT: v_mad_f32
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: s_endpgm
+define void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+ %1 = ashr i64 %x, 33
+ %2 = ashr i64 %y, 33
+ %result = srem i64 %1, %2
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+;FUNC-LABEL: {{^}}test_sdiv2464:
+;EG: INT_TO_FLT
+;EG: INT_TO_FLT
+;EG: FLT_TO_INT
+;EG-NOT: RECIP_UINT
+;EG-NOT: BFE_UINT
+
+;GCN-NOT: s_bfe_u32
+;GCN: v_mad_f32
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: s_endpgm
+define void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+ %1 = ashr i64 %x, 40
+ %2 = ashr i64 %y, 40
+ %result = sdiv i64 %1, %2
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+;FUNC-LABEL: {{^}}test_srem2464:
+;EG: INT_TO_FLT
+;EG: INT_TO_FLT
+;EG: FLT_TO_INT
+;EG-NOT: RECIP_UINT
+;EG-NOT: BFE_UINT
+
+;GCN-NOT: s_bfe_u32
+;GCN: v_mad_f32
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: s_endpgm
+define void @test_srem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+ %1 = ashr i64 %x, 40
+ %2 = ashr i64 %y, 40
+ %result = srem i64 %1, %2
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/select64.ll b/test/CodeGen/R600/select64.ll
index 3fd648139fe2..5cebb30dc72e 100644
--- a/test/CodeGen/R600/select64.ll
+++ b/test/CodeGen/R600/select64.ll
@@ -42,10 +42,27 @@ define void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 %a, i64 %
; CHECK-NOT: v_cndmask_b32
define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%cmp = icmp ugt i32 %cond, 5
- %a = load i64 addrspace(1)* %aptr, align 8
- %b = load i64 addrspace(1)* %bptr, align 8
+ %a = load i64, i64 addrspace(1)* %aptr, align 8
+ %b = load i64, i64 addrspace(1)* %bptr, align 8
%sel = select i1 %cmp, i64 %a, i64 %b
%trunc = trunc i64 %sel to i32
store i32 %trunc, i32 addrspace(1)* %out, align 4
ret void
}
+
+; CHECK-LABEL: {{^}}v_select_i64_split_imm:
+; CHECK: s_mov_b32 [[SHI:s[0-9]+]], 63
+; CHECK: s_mov_b32 [[SLO:s[0-9]+]], 0
+; CHECK-DAG: v_mov_b32_e32 [[VHI:v[0-9]+]], [[SHI]]
+; CHECK-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], [[SLO]]
+; CHECK-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, [[VLO]], {{v[0-9]+}}
+; CHECK-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, [[VHI]], {{v[0-9]+}}
+; CHECK: s_endpgm
+define void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
+ %cmp = icmp ugt i32 %cond, 5
+ %a = load i64, i64 addrspace(1)* %aptr, align 8
+ %b = load i64, i64 addrspace(1)* %bptr, align 8
+ %sel = select i1 %cmp, i64 %a, i64 270582939648 ; 63 << 32
+ store i64 %sel, i64 addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/test/CodeGen/R600/selectcc-cnd.ll b/test/CodeGen/R600/selectcc-cnd.ll
index 0bfca6937488..94d0ace75697 100644
--- a/test/CodeGen/R600/selectcc-cnd.ll
+++ b/test/CodeGen/R600/selectcc-cnd.ll
@@ -4,7 +4,7 @@
;CHECK: CNDE {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x,
;CHECK: 1073741824
define void @test(float addrspace(1)* %out, float addrspace(1)* %in) {
- %1 = load float addrspace(1)* %in
+ %1 = load float, float addrspace(1)* %in
%2 = fcmp oeq float %1, 0.0
%3 = select i1 %2, float 1.0, float 2.0
store float %3, float addrspace(1)* %out
diff --git a/test/CodeGen/R600/selectcc-cnde-int.ll b/test/CodeGen/R600/selectcc-cnde-int.ll
index d568888f7cb2..58a4ee7d62b2 100644
--- a/test/CodeGen/R600/selectcc-cnde-int.ll
+++ b/test/CodeGen/R600/selectcc-cnde-int.ll
@@ -4,7 +4,7 @@
;CHECK: CNDE_INT {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x,
;CHECK-NEXT: 2
define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %1 = load i32 addrspace(1)* %in
+ %1 = load i32, i32 addrspace(1)* %in
%2 = icmp eq i32 %1, 0
%3 = select i1 %2, i32 1, i32 2
store i32 %3, i32 addrspace(1)* %out
diff --git a/test/CodeGen/R600/selectcc-icmp-select-float.ll b/test/CodeGen/R600/selectcc-icmp-select-float.ll
index 6743800490b3..e870ee891e66 100644
--- a/test/CodeGen/R600/selectcc-icmp-select-float.ll
+++ b/test/CodeGen/R600/selectcc-icmp-select-float.ll
@@ -8,7 +8,7 @@
define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %0 = load i32 addrspace(1)* %in
+ %0 = load i32, i32 addrspace(1)* %in
%1 = icmp sge i32 %0, 0
%2 = select i1 %1, float 1.0, float 0.0
store float %2, float addrspace(1)* %out
diff --git a/test/CodeGen/R600/selectcc-opt.ll b/test/CodeGen/R600/selectcc-opt.ll
index 7780371329ce..65be4a626a18 100644
--- a/test/CodeGen/R600/selectcc-opt.ll
+++ b/test/CodeGen/R600/selectcc-opt.ll
@@ -19,7 +19,7 @@ entry:
br i1 %6, label %IF, label %ENDIF
IF:
- %7 = getelementptr i32 addrspace(1)* %out, i32 1
+ %7 = getelementptr i32, i32 addrspace(1)* %out, i32 1
store i32 0, i32 addrspace(1)* %7
br label %ENDIF
@@ -47,7 +47,7 @@ entry:
br i1 %6, label %ENDIF, label %IF
IF:
- %7 = getelementptr i32 addrspace(1)* %out, i32 1
+ %7 = getelementptr i32, i32 addrspace(1)* %out, i32 1
store i32 0, i32 addrspace(1)* %7
br label %ENDIF
diff --git a/test/CodeGen/R600/setcc-opt.ll b/test/CodeGen/R600/setcc-opt.ll
index a44c89f72cf5..4e6a10d6b78d 100644
--- a/test/CodeGen/R600/setcc-opt.ll
+++ b/test/CodeGen/R600/setcc-opt.ll
@@ -1,12 +1,13 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0:
-; SI-NOT: v_cmp
-; SI: v_cmp_ne_i32_e32 vcc,
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; SI-NEXT:buffer_store_byte [[RESULT]]
-; SI-NEXT: s_endpgm
+; GCN-NOT: v_cmp
+; GCN: v_cmp_ne_i32_e32 vcc,
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; GCN-NEXT:buffer_store_byte [[RESULT]]
+; GCN-NEXT: s_endpgm
; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
@@ -19,11 +20,11 @@ define void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
}
; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0:
-; SI-NOT: v_cmp
-; SI: v_cmp_ne_i32_e32 vcc,
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; SI-NEXT: buffer_store_byte [[RESULT]]
-; SI-NEXT: s_endpgm
+; GCN-NOT: v_cmp
+; GCN: v_cmp_ne_i32_e32 vcc,
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN-NEXT: s_endpgm
; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
@@ -37,12 +38,12 @@ define void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
; This really folds away to false
; FUNC-LABEL: {{^}}sext_bool_icmp_eq_1:
-; SI: v_cmp_eq_i32_e32 vcc,
-; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
-; SI-NEXT: v_cmp_eq_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}}
-; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
-; SI-NEXT: buffer_store_byte [[TMP]]
-; SI-NEXT: s_endpgm
+; GCN: v_cmp_eq_i32_e32 vcc,
+; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
+; GCN-NEXT: v_cmp_eq_i32_e32 vcc, 1, [[TMP]]{{$}}
+; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
+; GCN-NEXT: buffer_store_byte [[TMP]]
+; GCN-NEXT: s_endpgm
define void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp eq i32 %a, %b
%ext = sext i1 %icmp0 to i32
@@ -53,12 +54,12 @@ define void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
; This really folds away to true
; FUNC-LABEL: {{^}}sext_bool_icmp_ne_1:
-; SI: v_cmp_ne_i32_e32 vcc,
-; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
-; SI-NEXT: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}}
-; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
-; SI-NEXT: buffer_store_byte [[TMP]]
-; SI-NEXT: s_endpgm
+; GCN: v_cmp_ne_i32_e32 vcc,
+; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
+; GCN-NEXT: v_cmp_ne_i32_e32 vcc, 1, [[TMP]]{{$}}
+; GCN-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
+; GCN-NEXT: buffer_store_byte [[TMP]]
+; GCN-NEXT: s_endpgm
define void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = sext i1 %icmp0 to i32
@@ -68,11 +69,11 @@ define void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
}
; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0:
-; SI-NOT: v_cmp
-; SI: v_cmp_ne_i32_e32 vcc,
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; SI-NEXT: buffer_store_byte [[RESULT]]
-; SI-NEXT: s_endpgm
+; GCN-NOT: v_cmp
+; GCN: v_cmp_ne_i32_e32 vcc,
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN-NEXT: s_endpgm
define void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp eq i32 %a, %b
%ext = zext i1 %icmp0 to i32
@@ -82,11 +83,11 @@ define void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
}
; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0:
-; SI-NOT: v_cmp
-; SI: v_cmp_ne_i32_e32 vcc,
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; SI-NEXT: buffer_store_byte [[RESULT]]
-; SI-NEXT: s_endpgm
+; GCN-NOT: v_cmp
+; GCN: v_cmp_ne_i32_e32 vcc,
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN-NEXT: s_endpgm
define void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = zext i1 %icmp0 to i32
@@ -96,11 +97,11 @@ define void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
}
; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1:
-; SI-NOT: v_cmp
-; SI: v_cmp_eq_i32_e32 vcc,
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; SI-NEXT: buffer_store_byte [[RESULT]]
-; SI-NEXT: s_endpgm
+; GCN-NOT: v_cmp
+; GCN: v_cmp_eq_i32_e32 vcc,
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN-NEXT: s_endpgm
define void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp eq i32 %a, %b
%ext = zext i1 %icmp0 to i32
@@ -110,10 +111,10 @@ define void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
}
; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1:
-; SI-NOT: v_cmp
-; SI: v_cmp_eq_i32_e32 vcc,
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; SI-NEXT: buffer_store_byte [[RESULT]]
+; GCN-NOT: v_cmp
+; GCN: v_cmp_eq_i32_e32 vcc,
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; GCN-NEXT: buffer_store_byte [[RESULT]]
define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = zext i1 %icmp0 to i32
@@ -125,11 +126,13 @@ define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k:
; SI-DAG: s_load_dword [[A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI-DAG: s_load_dword [[B:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
-; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[VB]], 2{{$}}
-; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
-; SI: buffer_store_byte
-; SI: s_endpgm
+; VI-DAG: s_load_dword [[A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI-DAG: s_load_dword [[B:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
+; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
+; GCN: v_cmp_ne_i32_e32 vcc, 2, [[VB]]{{$}}
+; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; GCN: buffer_store_byte
+; GCN: s_endpgm
define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = sext i1 %icmp0 to i32
@@ -139,12 +142,12 @@ define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
}
; FUNC-LABEL: {{^}}cmp_zext_k_i8max:
-; SI: buffer_load_ubyte [[B:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
-; SI: v_mov_b32_e32 [[K255:v[0-9]+]], 0xff{{$}}
-; SI: v_cmp_ne_i32_e32 vcc, [[B]], [[K255]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; SI-NEXT: buffer_store_byte [[RESULT]]
-; SI: s_endpgm
+; GCN: buffer_load_ubyte [[B:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
+; GCN: v_mov_b32_e32 [[K255:v[0-9]+]], 0xff{{$}}
+; GCN: v_cmp_ne_i32_e32 vcc, [[K255]], [[B]]
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN: s_endpgm
define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
%b.ext = zext i8 %b to i32
%icmp0 = icmp ne i32 %b.ext, 255
@@ -153,13 +156,13 @@ define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
}
; FUNC-LABEL: {{^}}cmp_sext_k_neg1:
-; SI: buffer_load_sbyte [[B:v[0-9]+]]
-; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
-; SI-NEXT: buffer_store_byte [[RESULT]]
-; SI: s_endpgm
+; GCN: buffer_load_sbyte [[B:v[0-9]+]]
+; GCN: v_cmp_ne_i32_e32 vcc, -1, [[B]]{{$}}
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN: s_endpgm
define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind {
- %b = load i8 addrspace(1)* %b.ptr
+ %b = load i8, i8 addrspace(1)* %b.ptr
%b.ext = sext i8 %b to i32
%icmp0 = icmp ne i32 %b.ext, -1
store i1 %icmp0, i1 addrspace(1)* %out
@@ -167,11 +170,11 @@ define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nou
}
; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_sext_arg:
-; SI: s_load_dword [[B:s[0-9]+]]
-; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
-; SI-NEXT: buffer_store_byte [[RESULT]]
-; SI: s_endpgm
+; GCN: s_load_dword [[B:s[0-9]+]]
+; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -1, [[B]]
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
+; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN: s_endpgm
define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) nounwind {
%b.ext = sext i8 %b to i32
%icmp0 = icmp ne i32 %b.ext, -1
@@ -184,12 +187,12 @@ define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) n
; Should do a buffer_load_sbyte and compare with -1
; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg:
-; SI-DAG: buffer_load_ubyte [[B:v[0-9]+]]
-; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff{{$}}
-; SI: v_cmp_ne_i32_e32 vcc, [[B]], [[K]]{{$}}
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; SI-NEXT: buffer_store_byte [[RESULT]]
-; SI: s_endpgm
+; GCN-DAG: buffer_load_ubyte [[B:v[0-9]+]]
+; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff{{$}}
+; GCN: v_cmp_ne_i32_e32 vcc, [[K]], [[B]]{{$}}
+; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN: s_endpgm
define void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
%b.ext = sext i8 %b to i32
%icmp0 = icmp ne i32 %b.ext, -1
@@ -198,9 +201,9 @@ define void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
}
; FUNC-LABEL: {{^}}cmp_zext_k_neg1:
-; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
-; SI-NEXT: buffer_store_byte [[RESULT]]
-; SI: s_endpgm
+; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
+; GCN: buffer_store_byte [[RESULT]]
+; GCN: s_endpgm
define void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind {
%b.ext = zext i8 %b to i32
%icmp0 = icmp ne i32 %b.ext, -1
@@ -209,9 +212,9 @@ define void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind {
}
; FUNC-LABEL: {{^}}zext_bool_icmp_ne_k:
-; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
-; SI-NEXT: buffer_store_byte [[RESULT]]
-; SI-NEXT: s_endpgm
+; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
+; GCN: buffer_store_byte [[RESULT]]
+; GCN-NEXT: s_endpgm
define void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = zext i1 %icmp0 to i32
@@ -221,9 +224,9 @@ define void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
}
; FUNC-LABEL: {{^}}zext_bool_icmp_eq_k:
-; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
-; SI-NEXT: buffer_store_byte [[RESULT]]
-; SI-NEXT: s_endpgm
+; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
+; GCN: buffer_store_byte [[RESULT]]
+; GCN-NEXT: s_endpgm
define void @zext_bool_icmp_eq_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = zext i1 %icmp0 to i32
diff --git a/test/CodeGen/R600/setcc.ll b/test/CodeGen/R600/setcc.ll
index f9c7e4f36128..f33a82df5ffb 100644
--- a/test/CodeGen/R600/setcc.ll
+++ b/test/CodeGen/R600/setcc.ll
@@ -21,9 +21,9 @@ define void @setcc_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %
; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1) * %in
- %b = load <4 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
%result = icmp eq <4 x i32> %a, %b
%sext = sext <4 x i1> %result to <4 x i32>
store <4 x i32> %sext, <4 x i32> addrspace(1)* %out
@@ -344,11 +344,11 @@ entry:
; SI: s_endpgm
define void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptra, <3 x i32> addrspace(1)* %ptrb) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.a = getelementptr <3 x i32> addrspace(1)* %ptra, i32 %tid
- %gep.b = getelementptr <3 x i32> addrspace(1)* %ptrb, i32 %tid
- %gep.out = getelementptr <3 x i32> addrspace(1)* %out, i32 %tid
- %a = load <3 x i32> addrspace(1)* %gep.a
- %b = load <3 x i32> addrspace(1)* %gep.b
+ %gep.a = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptra, i32 %tid
+ %gep.b = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptrb, i32 %tid
+ %gep.out = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid
+ %a = load <3 x i32>, <3 x i32> addrspace(1)* %gep.a
+ %b = load <3 x i32>, <3 x i32> addrspace(1)* %gep.b
%cmp = icmp eq <3 x i32> %a, %b
%ext = sext <3 x i1> %cmp to <3 x i32>
store <3 x i32> %ext, <3 x i32> addrspace(1)* %gep.out
@@ -365,11 +365,11 @@ define void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptr
; SI: s_endpgm
define void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra, <3 x i8> addrspace(1)* %ptrb) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep.a = getelementptr <3 x i8> addrspace(1)* %ptra, i32 %tid
- %gep.b = getelementptr <3 x i8> addrspace(1)* %ptrb, i32 %tid
- %gep.out = getelementptr <3 x i8> addrspace(1)* %out, i32 %tid
- %a = load <3 x i8> addrspace(1)* %gep.a
- %b = load <3 x i8> addrspace(1)* %gep.b
+ %gep.a = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptra, i32 %tid
+ %gep.b = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptrb, i32 %tid
+ %gep.out = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %out, i32 %tid
+ %a = load <3 x i8>, <3 x i8> addrspace(1)* %gep.a
+ %b = load <3 x i8>, <3 x i8> addrspace(1)* %gep.b
%cmp = icmp eq <3 x i8> %a, %b
%ext = sext <3 x i1> %cmp to <3 x i8>
store <3 x i8> %ext, <3 x i8> addrspace(1)* %gep.out
diff --git a/test/CodeGen/R600/sext-in-reg.ll b/test/CodeGen/R600/sext-in-reg.ll
index 3260179921f9..d9ad4935968d 100644
--- a/test/CodeGen/R600/sext-in-reg.ll
+++ b/test/CodeGen/R600/sext-in-reg.ll
@@ -187,11 +187,11 @@ define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
- %a.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
- %b.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
- %out.gep = getelementptr i64 addrspace(1)* %out, i32 %tid
- %a = load i64 addrspace(1)* %a.gep, align 8
- %b = load i64 addrspace(1)* %b.gep, align 8
+ %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
+ %a = load i64, i64 addrspace(1)* %a.gep, align 8
+ %b = load i64, i64 addrspace(1)* %b.gep, align 8
%c = shl i64 %a, %b
%shl = shl i64 %c, 63
@@ -208,11 +208,11 @@ define void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
- %a.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
- %b.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
- %out.gep = getelementptr i64 addrspace(1)* %out, i32 %tid
- %a = load i64 addrspace(1)* %a.gep, align 8
- %b = load i64 addrspace(1)* %b.gep, align 8
+ %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
+ %a = load i64, i64 addrspace(1)* %a.gep, align 8
+ %b = load i64, i64 addrspace(1)* %b.gep, align 8
%c = shl i64 %a, %b
%shl = shl i64 %c, 56
@@ -229,11 +229,11 @@ define void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
define void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
- %a.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
- %b.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
- %out.gep = getelementptr i64 addrspace(1)* %out, i32 %tid
- %a = load i64 addrspace(1)* %a.gep, align 8
- %b = load i64 addrspace(1)* %b.gep, align 8
+ %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
+ %a = load i64, i64 addrspace(1)* %a.gep, align 8
+ %b = load i64, i64 addrspace(1)* %b.gep, align 8
%c = shl i64 %a, %b
%shl = shl i64 %c, 48
@@ -249,11 +249,11 @@ define void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)*
; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[SHR]]{{\]}}
define void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x()
- %a.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
- %b.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
- %out.gep = getelementptr i64 addrspace(1)* %out, i32 %tid
- %a = load i64 addrspace(1)* %a.gep, align 8
- %b = load i64 addrspace(1)* %b.gep, align 8
+ %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+ %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
+ %a = load i64, i64 addrspace(1)* %a.gep, align 8
+ %b = load i64, i64 addrspace(1)* %b.gep, align 8
%c = shl i64 %a, %b
%shl = shl i64 %c, 32
@@ -263,9 +263,9 @@ define void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)*
}
; FUNC-LABEL: {{^}}sext_in_reg_i1_in_i32_other_amount:
-; SI-NOT: {{[^@]}}bfe
-; SI: s_lshl_b32 [[REG:s[0-9]+]], {{s[0-9]+}}, 6
-; SI: s_ashr_i32 {{s[0-9]+}}, [[REG]], 7
+; SI-NOT: s_lshl
+; SI-NOT: s_ashr
+; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
; EG-NOT: BFE
@@ -282,10 +282,10 @@ define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a,
}
; FUNC-LABEL: {{^}}sext_in_reg_v2i1_in_v2i32_other_amount:
-; SI-DAG: s_lshl_b32 [[REG0:s[0-9]+]], {{s[0-9]}}, 6
-; SI-DAG: s_ashr_i32 {{s[0-9]+}}, [[REG0]], 7
-; SI-DAG: s_lshl_b32 [[REG1:s[0-9]+]], {{s[0-9]}}, 6
-; SI-DAG: s_ashr_i32 {{s[0-9]+}}, [[REG1]], 7
+; SI-NOT: s_lshl
+; SI-NOT: s_ashr
+; SI-DAG: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001
+; SI-DAG: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x190001
; SI: s_endpgm
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
@@ -428,8 +428,8 @@ define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind {
; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
- %loada = load <4 x i32> addrspace(1)* %a, align 16
- %loadb = load <4 x i32> addrspace(1)* %b, align 16
+ %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16
+ %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16
%c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
%shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
%ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
@@ -441,8 +441,8 @@ define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i
; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
- %loada = load <4 x i32> addrspace(1)* %a, align 16
- %loadb = load <4 x i32> addrspace(1)* %b, align 16
+ %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16
+ %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16
%c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
%shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
%ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16>
@@ -459,7 +459,7 @@ define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x
; SI: v_bfe_i32
; SI: buffer_store_short
define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
- %tmp5 = load i8 addrspace(1)* %src, align 1
+ %tmp5 = load i8, i8 addrspace(1)* %src, align 1
%tmp2 = sext i8 %tmp5 to i32
%tmp3 = tail call i32 @llvm.AMDGPU.imax(i32 %tmp2, i32 0) nounwind readnone
%tmp4 = trunc i32 %tmp3 to i8
@@ -474,7 +474,7 @@ declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
- %load = load i32 addrspace(1)* %ptr, align 4
+ %load = load i32, i32 addrspace(1)* %ptr, align 4
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone
store i32 %bfe, i32 addrspace(1)* %out, align 4
ret void
@@ -485,7 +485,7 @@ define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwin
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
- %load = load i32 addrspace(1)* %ptr, align 4
+ %load = load i32, i32 addrspace(1)* %ptr, align 4
%bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
%bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
store i32 %bfe1, i32 addrspace(1)* %out, align 4
@@ -496,7 +496,7 @@ define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwin
; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
; SI: s_endpgm
define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
- %load = load i32 addrspace(1)* %ptr, align 4
+ %load = load i32, i32 addrspace(1)* %ptr, align 4
%bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
%bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone
store i32 %bfe1, i32 addrspace(1)* %out, align 4
@@ -509,7 +509,7 @@ define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwi
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
- %load = load i32 addrspace(1)* %ptr, align 4
+ %load = load i32, i32 addrspace(1)* %ptr, align 4
%bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone
%bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
store i32 %bfe1, i32 addrspace(1)* %out, align 4
@@ -545,7 +545,7 @@ define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
- %load = load i8 addrspace(1)* %ptr, align 1
+ %load = load i8, i8 addrspace(1)* %ptr, align 1
%sext = sext i8 %load to i32
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone
%shl = shl i32 %bfe, 24
@@ -554,12 +554,12 @@ define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %pt
ret void
}
-; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe_0:
; SI: .text
+; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe_0:{{.*$}}
; SI-NOT: {{[^@]}}bfe
; SI: s_endpgm
define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
- %load = load i8 addrspace(1)* %ptr, align 1
+ %load = load i8, i8 addrspace(1)* %ptr, align 1
%sext = sext i8 %load to i32
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone
%shl = shl i32 %bfe, 24
@@ -574,7 +574,7 @@ define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %
; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
; SI: s_endpgm
define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 31
%shr = ashr i32 %shl, 31
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1)
@@ -589,7 +589,7 @@ define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1
; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
; SI: s_endpgm
define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 30
%shr = ashr i32 %shl, 30
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1)
@@ -599,12 +599,13 @@ define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1
; FUNC-LABEL: {{^}}sext_in_reg_i2_bfe_offset_1:
; SI: buffer_load_dword
-; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
-; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
+; SI-NOT: v_lshl
+; SI-NOT: v_ashr
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 2
; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
; SI: s_endpgm
define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %x = load i32 addrspace(1)* %in, align 4
+ %x = load i32, i32 addrspace(1)* %in, align 4
%shl = shl i32 %x, 30
%shr = ashr i32 %shl, 30
%bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2)
diff --git a/test/CodeGen/R600/sgpr-control-flow.ll b/test/CodeGen/R600/sgpr-control-flow.ll
index f0236acc6daa..38289ced632a 100644
--- a/test/CodeGen/R600/sgpr-control-flow.ll
+++ b/test/CodeGen/R600/sgpr-control-flow.ll
@@ -64,15 +64,15 @@ endif:
; SI-LABEL: {{^}}sgpr_if_else_valu_cmp_phi_br:
; SI: buffer_load_dword [[AVAL:v[0-9]+]]
-; SI: v_cmp_lt_i32_e64 [[CMP_IF:s\[[0-9]+:[0-9]+\]]], [[AVAL]], 0
+; SI: v_cmp_gt_i32_e32 [[CMP_IF:vcc]], 0, [[AVAL]]
; SI: v_cndmask_b32_e64 [[V_CMP:v[0-9]+]], 0, -1, [[CMP_IF]]
; SI: BB2_1:
; SI: buffer_load_dword [[AVAL:v[0-9]+]]
-; SI: v_cmp_eq_i32_e64 [[CMP_ELSE:s\[[0-9]+:[0-9]+\]]], [[AVAL]], 0
+; SI: v_cmp_eq_i32_e32 [[CMP_ELSE:vcc]], 0, [[AVAL]]
; SI: v_cndmask_b32_e64 [[V_CMP]], 0, -1, [[CMP_ELSE]]
-; SI: v_cmp_ne_i32_e64 [[CMP_CMP:s\[[0-9]+:[0-9]+\]]], [[V_CMP]], 0
+; SI: v_cmp_ne_i32_e32 [[CMP_CMP:vcc]], 0, [[V_CMP]]
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP_CMP]]
; SI: buffer_store_dword [[RESULT]]
define void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
@@ -82,14 +82,14 @@ entry:
br i1 %tmp1, label %if, label %else
if:
- %gep.if = getelementptr i32 addrspace(1)* %a, i32 %tid
- %a.val = load i32 addrspace(1)* %gep.if
+ %gep.if = getelementptr i32, i32 addrspace(1)* %a, i32 %tid
+ %a.val = load i32, i32 addrspace(1)* %gep.if
%cmp.if = icmp eq i32 %a.val, 0
br label %endif
else:
- %gep.else = getelementptr i32 addrspace(1)* %b, i32 %tid
- %b.val = load i32 addrspace(1)* %gep.else
+ %gep.else = getelementptr i32, i32 addrspace(1)* %b, i32 %tid
+ %b.val = load i32, i32 addrspace(1)* %gep.else
%cmp.else = icmp slt i32 %b.val, 0
br label %endif
diff --git a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
index 893f5a3c50db..df67fcca22fe 100644
--- a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
+++ b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
@@ -7,7 +7,7 @@
; SI-LABEL: {{^}}test_dup_operands:
; SI: v_add_i32_e32
define void @test_dup_operands(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) {
- %a = load <2 x i32> addrspace(1)* %in
+ %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
%lo = extractelement <2 x i32> %a, i32 0
%hi = extractelement <2 x i32> %a, i32 1
%add = add i32 %lo, %lo
diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/R600/sgpr-copy.ll
index 57cbadd9239d..b849c4038bc7 100644
--- a/test/CodeGen/R600/sgpr-copy.ll
+++ b/test/CodeGen/R600/sgpr-copy.ll
@@ -9,8 +9,8 @@
define void @phi1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
- %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+ %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
+ %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, !tbaa !1
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
%23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
%24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
@@ -33,8 +33,8 @@ ENDIF: ; preds = %main_body, %ELSE
; CHECK-LABEL: {{^}}phi2:
define void @phi2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
- %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+ %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
+ %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, !tbaa !1
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
%23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
%24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
@@ -50,10 +50,10 @@ main_body:
%34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
%35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
%36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
- %37 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
- %38 = load <32 x i8> addrspace(2)* %37, !tbaa !1
- %39 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
- %40 = load <16 x i8> addrspace(2)* %39, !tbaa !1
+ %37 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %2, i32 0
+ %38 = load <32 x i8>, <32 x i8> addrspace(2)* %37, !tbaa !1
+ %39 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %1, i32 0
+ %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, !tbaa !1
%41 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
%42 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
%43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
@@ -154,8 +154,8 @@ ENDIF24: ; preds = %ENDIF, %IF25
define void @loop(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
- %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+ %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
+ %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, !tbaa !1
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
%23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 4)
%24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 8)
@@ -236,13 +236,13 @@ declare i32 @llvm.SI.packf16(float, float) #1
define void @sample_v3([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
entry:
- %21 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
- %22 = load <16 x i8> addrspace(2)* %21, !tbaa !2
+ %21 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
+ %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, !tbaa !2
%23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16)
- %24 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
- %25 = load <32 x i8> addrspace(2)* %24, !tbaa !2
- %26 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
- %27 = load <16 x i8> addrspace(2)* %26, !tbaa !2
+ %24 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
+ %25 = load <32 x i8>, <32 x i8> addrspace(2)* %24, !tbaa !2
+ %26 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
+ %27 = load <16 x i8>, <16 x i8> addrspace(2)* %26, !tbaa !2
%28 = fcmp oeq float %23, 0.0
br i1 %28, label %if, label %else
@@ -276,7 +276,7 @@ endif:
; CHECK: s_endpgm
define void @copy1(float addrspace(1)* %out, float addrspace(1)* %in0) {
entry:
- %0 = load float addrspace(1)* %in0
+ %0 = load float, float addrspace(1)* %in0
%1 = fcmp oeq float %0, 0.0
br i1 %1, label %if0, label %endif
@@ -334,13 +334,13 @@ attributes #0 = { "ShaderType"="0" }
; CHECK: s_endpgm
define void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
bb:
- %tmp = getelementptr [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0
- %tmp22 = load <16 x i8> addrspace(2)* %tmp, !tbaa !0
+ %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0
+ %tmp22 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
%tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp22, i32 16)
- %tmp25 = getelementptr [32 x <8 x i32>] addrspace(2)* %arg3, i32 0, i32 0
- %tmp26 = load <8 x i32> addrspace(2)* %tmp25, !tbaa !0
- %tmp27 = getelementptr [16 x <4 x i32>] addrspace(2)* %arg2, i32 0, i32 0
- %tmp28 = load <4 x i32> addrspace(2)* %tmp27, !tbaa !0
+ %tmp25 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %arg3, i32 0, i32 0
+ %tmp26 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp25, !tbaa !0
+ %tmp27 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %arg2, i32 0, i32 0
+ %tmp28 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp27, !tbaa !0
%tmp29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg5, <2 x i32> %arg7)
%tmp30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %arg5, <2 x i32> %arg7)
%tmp31 = bitcast float %tmp23 to i32
diff --git a/test/CodeGen/R600/shl.ll b/test/CodeGen/R600/shl.ll
index f89353b10844..53b63dc4b8ad 100644
--- a/test/CodeGen/R600/shl.ll
+++ b/test/CodeGen/R600/shl.ll
@@ -15,9 +15,9 @@
;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1) * %in
- %b = load <2 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+ %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
%result = shl <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -42,9 +42,9 @@ define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in
;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1) * %in
- %b = load <4 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
%result = shl <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
@@ -69,9 +69,9 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
- %a = load i64 addrspace(1) * %in
- %b = load i64 addrspace(1) * %b_ptr
+ %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
+ %a = load i64, i64 addrspace(1) * %in
+ %b = load i64, i64 addrspace(1) * %b_ptr
%result = shl i64 %a, %b
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -108,9 +108,9 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
- %a = load <2 x i64> addrspace(1) * %in
- %b = load <2 x i64> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
+ %a = load <2 x i64>, <2 x i64> addrspace(1) * %in
+ %b = load <2 x i64>, <2 x i64> addrspace(1) * %b_ptr
%result = shl <2 x i64> %a, %b
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
ret void
@@ -171,9 +171,9 @@ define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in
;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
- %a = load <4 x i64> addrspace(1) * %in
- %b = load <4 x i64> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
+ %a = load <4 x i64>, <4 x i64> addrspace(1) * %in
+ %b = load <4 x i64>, <4 x i64> addrspace(1) * %b_ptr
%result = shl <4 x i64> %a, %b
store <4 x i64> %result, <4 x i64> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/shl_add_constant.ll b/test/CodeGen/R600/shl_add_constant.ll
index 6915495beece..b1485bfaaebb 100644
--- a/test/CodeGen/R600/shl_add_constant.ll
+++ b/test/CodeGen/R600/shl_add_constant.ll
@@ -11,8 +11,8 @@ declare i32 @llvm.r600.read.tidig.x() #1
; SI: s_endpgm
define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x
- %val = load i32 addrspace(1)* %ptr, align 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
+ %val = load i32, i32 addrspace(1)* %ptr, align 4
%add = add i32 %val, 9
%result = shl i32 %add, 2
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -27,8 +27,8 @@ define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
; SI: s_endpgm
define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x
- %val = load i32 addrspace(1)* %ptr, align 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
+ %val = load i32, i32 addrspace(1)* %ptr, align 4
%add = add i32 %val, 9
%result = shl i32 %add, 2
store i32 %result, i32 addrspace(1)* %out0, align 4
@@ -45,8 +45,8 @@ define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1
; SI: s_endpgm
define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
- %ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x
- %val = load i32 addrspace(1)* %ptr, align 4
+ %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
+ %val = load i32, i32 addrspace(1)* %ptr, align 4
%shl = add i32 %val, 999
%result = shl i32 %shl, 2
store i32 %result, i32 addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/shl_add_ptr.ll b/test/CodeGen/R600/shl_add_ptr.ll
index 15602e820608..6671e909cd1d 100644
--- a/test/CodeGen/R600/shl_add_ptr.ll
+++ b/test/CodeGen/R600/shl_add_ptr.ll
@@ -17,13 +17,13 @@ declare i32 @llvm.r600.read.tidig.x() #1
; SI-LABEL: {{^}}load_shl_base_lds_0:
; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8 [M0]
+; SI: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8
; SI: s_endpgm
define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
store float %val0, float addrspace(1)* %out
ret void
@@ -34,7 +34,7 @@ define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %ad
; SI-LABEL: {{^}}load_shl_base_lds_1:
; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8 [M0]
+; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8
; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], 8, v{{[0-9]+}}
; SI-DAG: buffer_store_dword [[RESULT]]
; SI-DAG: buffer_store_dword [[ADDUSE]]
@@ -42,8 +42,8 @@ define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %ad
define void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
- %val0 = load float addrspace(3)* %arrayidx0, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
%shl_add_use = shl i32 %idx.0, 2
store i32 %shl_add_use, i32 addrspace(1)* %add_use, align 4
store float %val0, float addrspace(1)* %out
@@ -58,8 +58,8 @@ define void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %ad
define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %lds, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 65535
- %arrayidx0 = getelementptr inbounds [65536 x i8] addrspace(3)* @maxlds, i32 0, i32 %idx.0
- %val0 = load i8 addrspace(3)* %arrayidx0
+ %arrayidx0 = getelementptr inbounds [65536 x i8], [65536 x i8] addrspace(3)* @maxlds, i32 0, i32 %idx.0
+ %val0 = load i8, i8 addrspace(3)* %arrayidx0
store i32 %idx.0, i32 addrspace(1)* %add_use
store i8 %val0, i8 addrspace(1)* %out
ret void
@@ -69,17 +69,17 @@ define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)
; pointer can be used with an offset into the second one.
; SI-LABEL: {{^}}load_shl_base_lds_2:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI: s_mov_b32 m0, -1
-; SI-NEXT: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9 [M0]
+; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9
; SI: s_endpgm
define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 64
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
- %val0 = load float addrspace(3)* %arrayidx0, align 4
- %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0
- %val1 = load float addrspace(3)* %arrayidx1, align 4
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
+ %val0 = load float, float addrspace(3)* %arrayidx0, align 4
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0
+ %val1 = load float, float addrspace(3)* %arrayidx1, align 4
%sum = fadd float %val0, %val1
store float %sum, float addrspace(1)* %out, align 4
ret void
@@ -87,12 +87,12 @@ define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
; SI-LABEL: {{^}}store_shl_base_lds_0:
; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8 [M0]
+; SI: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8
; SI: s_endpgm
define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
store float 1.0, float addrspace(3)* %arrayidx0, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
ret void
@@ -107,8 +107,8 @@ define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %a
; define void @atomic_load_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
; %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
; %idx.0 = add nsw i32 %tid.x, 2
-; %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-; %val = load atomic i32 addrspace(3)* %arrayidx0 seq_cst, align 4
+; %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+; %val = load atomic i32, i32 addrspace(3)* %arrayidx0 seq_cst, align 4
; store i32 %val, i32 addrspace(1)* %out, align 4
; store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
; ret void
@@ -122,7 +122,7 @@ define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %a
define void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use, i32 %swap) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%pair = cmpxchg i32 addrspace(3)* %arrayidx0, i32 7, i32 %swap seq_cst monotonic
%result = extractvalue { i32, i1 } %pair, 0
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -137,7 +137,7 @@ define void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace
define void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%val = atomicrmw xchg i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
store i32 %val, i32 addrspace(1)* %out, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
@@ -151,7 +151,7 @@ define void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)
define void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%val = atomicrmw add i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
store i32 %val, i32 addrspace(1)* %out, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
@@ -165,7 +165,7 @@ define void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
define void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%val = atomicrmw sub i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
store i32 %val, i32 addrspace(1)* %out, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
@@ -179,7 +179,7 @@ define void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
define void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%val = atomicrmw and i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
store i32 %val, i32 addrspace(1)* %out, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
@@ -193,7 +193,7 @@ define void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
define void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%val = atomicrmw or i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
store i32 %val, i32 addrspace(1)* %out, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
@@ -207,7 +207,7 @@ define void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%val = atomicrmw xor i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
store i32 %val, i32 addrspace(1)* %out, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
@@ -217,7 +217,7 @@ define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
; define void @atomic_nand_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
; %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
; %idx.0 = add nsw i32 %tid.x, 2
-; %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+; %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
; %val = atomicrmw nand i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
; store i32 %val, i32 addrspace(1)* %out, align 4
; store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
@@ -231,7 +231,7 @@ define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
define void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%val = atomicrmw min i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
store i32 %val, i32 addrspace(1)* %out, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
@@ -245,7 +245,7 @@ define void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
define void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%val = atomicrmw max i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
store i32 %val, i32 addrspace(1)* %out, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
@@ -259,7 +259,7 @@ define void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)*
define void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%val = atomicrmw umin i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
store i32 %val, i32 addrspace(1)* %out, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
@@ -273,7 +273,7 @@ define void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)
define void @atomic_umax_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
%tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
%idx.0 = add nsw i32 %tid.x, 2
- %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+ %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
%val = atomicrmw umax i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
store i32 %val, i32 addrspace(1)* %out, align 4
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
diff --git a/test/CodeGen/R600/si-annotate-cf.ll b/test/CodeGen/R600/si-annotate-cf.ll
index 1b49a8272fa3..bbcb861f37dc 100644
--- a/test/CodeGen/R600/si-annotate-cf.ll
+++ b/test/CodeGen/R600/si-annotate-cf.ll
@@ -29,10 +29,10 @@ ENDIF:
; FIXME: This could be folded into the s_or_b64 instruction
; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0
; SI: [[LOOP_LABEL:[A-Z0-9]+]]
-; SI: v_cmp_ne_i32_e64 [[COND:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0
+; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
; SI_IF_BREAK instruction:
-; SI: s_or_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], [[COND]], [[ZERO]]
+; SI: s_or_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], vcc, [[ZERO]]
; SI_LOOP instruction:
; SI: s_andn2_b64 exec, exec, [[BREAK]]
diff --git a/test/CodeGen/R600/si-lod-bias.ll b/test/CodeGen/R600/si-lod-bias.ll
index d6cbd0fd367d..944499a11461 100644
--- a/test/CodeGen/R600/si-lod-bias.ll
+++ b/test/CodeGen/R600/si-lod-bias.ll
@@ -9,13 +9,13 @@
define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
- %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+ %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
+ %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, !tbaa !1
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
- %23 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
- %24 = load <32 x i8> addrspace(2)* %23, !tbaa !1
- %25 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
- %26 = load <16 x i8> addrspace(2)* %25, !tbaa !1
+ %23 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %2, i32 0
+ %24 = load <32 x i8>, <32 x i8> addrspace(2)* %23, !tbaa !1
+ %25 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %1, i32 0
+ %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, !tbaa !1
%27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
%28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
%29 = bitcast float %22 to i32
diff --git a/test/CodeGen/R600/si-sgpr-spill.ll b/test/CodeGen/R600/si-sgpr-spill.ll
index 18fda20b0d14..84652701f773 100644
--- a/test/CodeGen/R600/si-sgpr-spill.ll
+++ b/test/CodeGen/R600/si-sgpr-spill.ll
@@ -13,8 +13,8 @@
define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
- %21 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
- %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0
+ %21 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
+ %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, !tbaa !0
%23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 96)
%24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 100)
%25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 104)
@@ -53,38 +53,38 @@ main_body:
%58 = call float @llvm.SI.load.const(<16 x i8> %22, i32 372)
%59 = call float @llvm.SI.load.const(<16 x i8> %22, i32 376)
%60 = call float @llvm.SI.load.const(<16 x i8> %22, i32 384)
- %61 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
- %62 = load <32 x i8> addrspace(2)* %61, !tbaa !0
- %63 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
- %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
- %65 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1
- %66 = load <32 x i8> addrspace(2)* %65, !tbaa !0
- %67 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1
- %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
- %69 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2
- %70 = load <32 x i8> addrspace(2)* %69, !tbaa !0
- %71 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2
- %72 = load <16 x i8> addrspace(2)* %71, !tbaa !0
- %73 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 3
- %74 = load <32 x i8> addrspace(2)* %73, !tbaa !0
- %75 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 3
- %76 = load <16 x i8> addrspace(2)* %75, !tbaa !0
- %77 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 4
- %78 = load <32 x i8> addrspace(2)* %77, !tbaa !0
- %79 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 4
- %80 = load <16 x i8> addrspace(2)* %79, !tbaa !0
- %81 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 5
- %82 = load <32 x i8> addrspace(2)* %81, !tbaa !0
- %83 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 5
- %84 = load <16 x i8> addrspace(2)* %83, !tbaa !0
- %85 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 6
- %86 = load <32 x i8> addrspace(2)* %85, !tbaa !0
- %87 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 6
- %88 = load <16 x i8> addrspace(2)* %87, !tbaa !0
- %89 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 7
- %90 = load <32 x i8> addrspace(2)* %89, !tbaa !0
- %91 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 7
- %92 = load <16 x i8> addrspace(2)* %91, !tbaa !0
+ %61 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
+ %62 = load <32 x i8>, <32 x i8> addrspace(2)* %61, !tbaa !0
+ %63 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
+ %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, !tbaa !0
+ %65 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1
+ %66 = load <32 x i8>, <32 x i8> addrspace(2)* %65, !tbaa !0
+ %67 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1
+ %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, !tbaa !0
+ %69 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2
+ %70 = load <32 x i8>, <32 x i8> addrspace(2)* %69, !tbaa !0
+ %71 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2
+ %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, !tbaa !0
+ %73 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 3
+ %74 = load <32 x i8>, <32 x i8> addrspace(2)* %73, !tbaa !0
+ %75 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 3
+ %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, !tbaa !0
+ %77 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 4
+ %78 = load <32 x i8>, <32 x i8> addrspace(2)* %77, !tbaa !0
+ %79 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 4
+ %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, !tbaa !0
+ %81 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 5
+ %82 = load <32 x i8>, <32 x i8> addrspace(2)* %81, !tbaa !0
+ %83 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 5
+ %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, !tbaa !0
+ %85 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 6
+ %86 = load <32 x i8>, <32 x i8> addrspace(2)* %85, !tbaa !0
+ %87 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 6
+ %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, !tbaa !0
+ %89 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 7
+ %90 = load <32 x i8>, <32 x i8> addrspace(2)* %89, !tbaa !0
+ %91 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 7
+ %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, !tbaa !0
%93 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %4, <2 x i32> %6)
%94 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %4, <2 x i32> %6)
%95 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %4, <2 x i32> %6)
@@ -103,29 +103,29 @@ main_body:
%108 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %4, <2 x i32> %6)
%109 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %4, <2 x i32> %6)
%110 = call i32 @llvm.SI.tid()
- %111 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %110
+ %111 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %110
%112 = bitcast float %93 to i32
store i32 %112, i32 addrspace(3)* %111
%113 = bitcast float %94 to i32
store i32 %113, i32 addrspace(3)* %111
%114 = call i32 @llvm.SI.tid()
- %115 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %114
+ %115 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %114
%116 = and i32 %114, -4
- %117 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %116
+ %117 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %116
%118 = add i32 %116, 1
- %119 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %118
+ %119 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %118
%120 = bitcast float %93 to i32
store i32 %120, i32 addrspace(3)* %115
- %121 = load i32 addrspace(3)* %117
+ %121 = load i32, i32 addrspace(3)* %117
%122 = bitcast i32 %121 to float
- %123 = load i32 addrspace(3)* %119
+ %123 = load i32, i32 addrspace(3)* %119
%124 = bitcast i32 %123 to float
%125 = fsub float %124, %122
%126 = bitcast float %94 to i32
store i32 %126, i32 addrspace(3)* %115
- %127 = load i32 addrspace(3)* %117
+ %127 = load i32, i32 addrspace(3)* %117
%128 = bitcast i32 %127 to float
- %129 = load i32 addrspace(3)* %119
+ %129 = load i32, i32 addrspace(3)* %119
%130 = bitcast i32 %129 to float
%131 = fsub float %130, %128
%132 = insertelement <4 x float> undef, float %125, i32 0
@@ -139,7 +139,7 @@ main_body:
%140 = fmul float %60, %94
%141 = fmul float %60, %94
%142 = call i32 @llvm.SI.tid()
- %143 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %142
+ %143 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %142
%144 = bitcast float %138 to i32
store i32 %144, i32 addrspace(3)* %143
%145 = bitcast float %139 to i32
@@ -149,37 +149,37 @@ main_body:
%147 = bitcast float %141 to i32
store i32 %147, i32 addrspace(3)* %143
%148 = call i32 @llvm.SI.tid()
- %149 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %148
+ %149 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %148
%150 = and i32 %148, -4
- %151 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %150
+ %151 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %150
%152 = add i32 %150, 2
- %153 = getelementptr [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %152
+ %153 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %152
%154 = bitcast float %138 to i32
store i32 %154, i32 addrspace(3)* %149
- %155 = load i32 addrspace(3)* %151
+ %155 = load i32, i32 addrspace(3)* %151
%156 = bitcast i32 %155 to float
- %157 = load i32 addrspace(3)* %153
+ %157 = load i32, i32 addrspace(3)* %153
%158 = bitcast i32 %157 to float
%159 = fsub float %158, %156
%160 = bitcast float %139 to i32
store i32 %160, i32 addrspace(3)* %149
- %161 = load i32 addrspace(3)* %151
+ %161 = load i32, i32 addrspace(3)* %151
%162 = bitcast i32 %161 to float
- %163 = load i32 addrspace(3)* %153
+ %163 = load i32, i32 addrspace(3)* %153
%164 = bitcast i32 %163 to float
%165 = fsub float %164, %162
%166 = bitcast float %140 to i32
store i32 %166, i32 addrspace(3)* %149
- %167 = load i32 addrspace(3)* %151
+ %167 = load i32, i32 addrspace(3)* %151
%168 = bitcast i32 %167 to float
- %169 = load i32 addrspace(3)* %153
+ %169 = load i32, i32 addrspace(3)* %153
%170 = bitcast i32 %169 to float
%171 = fsub float %170, %168
%172 = bitcast float %141 to i32
store i32 %172, i32 addrspace(3)* %149
- %173 = load i32 addrspace(3)* %151
+ %173 = load i32, i32 addrspace(3)* %151
%174 = bitcast i32 %173 to float
- %175 = load i32 addrspace(3)* %153
+ %175 = load i32, i32 addrspace(3)* %153
%176 = bitcast i32 %175 to float
%177 = fsub float %176, %174
%178 = insertelement <4 x float> undef, float %159, i32 0
@@ -694,8 +694,8 @@ attributes #4 = { nounwind readonly }
; CHECK: s_endpgm
define void @main1([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
- %21 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
- %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0
+ %21 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
+ %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, !tbaa !0
%23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 0)
%24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 4)
%25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 8)
@@ -799,42 +799,42 @@ main_body:
%123 = call float @llvm.SI.load.const(<16 x i8> %22, i32 716)
%124 = call float @llvm.SI.load.const(<16 x i8> %22, i32 864)
%125 = call float @llvm.SI.load.const(<16 x i8> %22, i32 868)
- %126 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
- %127 = load <32 x i8> addrspace(2)* %126, !tbaa !0
- %128 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
- %129 = load <16 x i8> addrspace(2)* %128, !tbaa !0
- %130 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1
- %131 = load <32 x i8> addrspace(2)* %130, !tbaa !0
- %132 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1
- %133 = load <16 x i8> addrspace(2)* %132, !tbaa !0
- %134 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2
- %135 = load <32 x i8> addrspace(2)* %134, !tbaa !0
- %136 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2
- %137 = load <16 x i8> addrspace(2)* %136, !tbaa !0
- %138 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 3
- %139 = load <32 x i8> addrspace(2)* %138, !tbaa !0
- %140 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 3
- %141 = load <16 x i8> addrspace(2)* %140, !tbaa !0
- %142 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 4
- %143 = load <32 x i8> addrspace(2)* %142, !tbaa !0
- %144 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 4
- %145 = load <16 x i8> addrspace(2)* %144, !tbaa !0
- %146 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 5
- %147 = load <32 x i8> addrspace(2)* %146, !tbaa !0
- %148 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 5
- %149 = load <16 x i8> addrspace(2)* %148, !tbaa !0
- %150 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 6
- %151 = load <32 x i8> addrspace(2)* %150, !tbaa !0
- %152 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 6
- %153 = load <16 x i8> addrspace(2)* %152, !tbaa !0
- %154 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 7
- %155 = load <32 x i8> addrspace(2)* %154, !tbaa !0
- %156 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 7
- %157 = load <16 x i8> addrspace(2)* %156, !tbaa !0
- %158 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 8
- %159 = load <32 x i8> addrspace(2)* %158, !tbaa !0
- %160 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 8
- %161 = load <16 x i8> addrspace(2)* %160, !tbaa !0
+ %126 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
+ %127 = load <32 x i8>, <32 x i8> addrspace(2)* %126, !tbaa !0
+ %128 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
+ %129 = load <16 x i8>, <16 x i8> addrspace(2)* %128, !tbaa !0
+ %130 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1
+ %131 = load <32 x i8>, <32 x i8> addrspace(2)* %130, !tbaa !0
+ %132 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1
+ %133 = load <16 x i8>, <16 x i8> addrspace(2)* %132, !tbaa !0
+ %134 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2
+ %135 = load <32 x i8>, <32 x i8> addrspace(2)* %134, !tbaa !0
+ %136 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2
+ %137 = load <16 x i8>, <16 x i8> addrspace(2)* %136, !tbaa !0
+ %138 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 3
+ %139 = load <32 x i8>, <32 x i8> addrspace(2)* %138, !tbaa !0
+ %140 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 3
+ %141 = load <16 x i8>, <16 x i8> addrspace(2)* %140, !tbaa !0
+ %142 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 4
+ %143 = load <32 x i8>, <32 x i8> addrspace(2)* %142, !tbaa !0
+ %144 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 4
+ %145 = load <16 x i8>, <16 x i8> addrspace(2)* %144, !tbaa !0
+ %146 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 5
+ %147 = load <32 x i8>, <32 x i8> addrspace(2)* %146, !tbaa !0
+ %148 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 5
+ %149 = load <16 x i8>, <16 x i8> addrspace(2)* %148, !tbaa !0
+ %150 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 6
+ %151 = load <32 x i8>, <32 x i8> addrspace(2)* %150, !tbaa !0
+ %152 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 6
+ %153 = load <16 x i8>, <16 x i8> addrspace(2)* %152, !tbaa !0
+ %154 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 7
+ %155 = load <32 x i8>, <32 x i8> addrspace(2)* %154, !tbaa !0
+ %156 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 7
+ %157 = load <16 x i8>, <16 x i8> addrspace(2)* %156, !tbaa !0
+ %158 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 8
+ %159 = load <32 x i8>, <32 x i8> addrspace(2)* %158, !tbaa !0
+ %160 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 8
+ %161 = load <16 x i8>, <16 x i8> addrspace(2)* %160, !tbaa !0
%162 = fcmp ugt float %17, 0.000000e+00
%163 = select i1 %162, float 1.000000e+00, float 0.000000e+00
%164 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %4, <2 x i32> %6)
diff --git a/test/CodeGen/R600/si-spill-cf.ll b/test/CodeGen/R600/si-spill-cf.ll
new file mode 100644
index 000000000000..4b2d8ec6bf0a
--- /dev/null
+++ b/test/CodeGen/R600/si-spill-cf.ll
@@ -0,0 +1,501 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s -verify-machineinstrs | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s -verify-machineinstrs | FileCheck -check-prefix=SI %s
+
+; If this occurs it is likely due to reordering and the restore was
+; originally supposed to happen before SI_END_CF.
+; SI: s_or_b64 exec, exec, [[SAVED:s\[[0-9]+:[0-9]+\]|[a-z]+]]
+; SI-NOT: v_readlane_b32 [[SAVED]]
+
+define void @main() #0 {
+main_body:
+ %0 = call float @llvm.SI.load.const(<16 x i8> undef, i32 16)
+ %1 = call float @llvm.SI.load.const(<16 x i8> undef, i32 32)
+ %2 = call float @llvm.SI.load.const(<16 x i8> undef, i32 80)
+ %3 = call float @llvm.SI.load.const(<16 x i8> undef, i32 84)
+ %4 = call float @llvm.SI.load.const(<16 x i8> undef, i32 88)
+ %5 = call float @llvm.SI.load.const(<16 x i8> undef, i32 96)
+ %6 = call float @llvm.SI.load.const(<16 x i8> undef, i32 100)
+ %7 = call float @llvm.SI.load.const(<16 x i8> undef, i32 104)
+ %8 = call float @llvm.SI.load.const(<16 x i8> undef, i32 112)
+ %9 = call float @llvm.SI.load.const(<16 x i8> undef, i32 116)
+ %10 = call float @llvm.SI.load.const(<16 x i8> undef, i32 120)
+ %11 = call float @llvm.SI.load.const(<16 x i8> undef, i32 128)
+ %12 = call float @llvm.SI.load.const(<16 x i8> undef, i32 132)
+ %13 = call float @llvm.SI.load.const(<16 x i8> undef, i32 136)
+ %14 = call float @llvm.SI.load.const(<16 x i8> undef, i32 144)
+ %15 = call float @llvm.SI.load.const(<16 x i8> undef, i32 148)
+ %16 = call float @llvm.SI.load.const(<16 x i8> undef, i32 152)
+ %17 = call float @llvm.SI.load.const(<16 x i8> undef, i32 160)
+ %18 = call float @llvm.SI.load.const(<16 x i8> undef, i32 164)
+ %19 = call float @llvm.SI.load.const(<16 x i8> undef, i32 168)
+ %20 = call float @llvm.SI.load.const(<16 x i8> undef, i32 176)
+ %21 = call float @llvm.SI.load.const(<16 x i8> undef, i32 180)
+ %22 = call float @llvm.SI.load.const(<16 x i8> undef, i32 184)
+ %23 = call float @llvm.SI.load.const(<16 x i8> undef, i32 192)
+ %24 = call float @llvm.SI.load.const(<16 x i8> undef, i32 196)
+ %25 = call float @llvm.SI.load.const(<16 x i8> undef, i32 200)
+ %26 = call float @llvm.SI.load.const(<16 x i8> undef, i32 208)
+ %27 = call float @llvm.SI.load.const(<16 x i8> undef, i32 212)
+ %28 = call float @llvm.SI.load.const(<16 x i8> undef, i32 216)
+ %29 = call float @llvm.SI.load.const(<16 x i8> undef, i32 224)
+ %30 = call float @llvm.SI.load.const(<16 x i8> undef, i32 228)
+ %31 = call float @llvm.SI.load.const(<16 x i8> undef, i32 232)
+ %32 = call float @llvm.SI.load.const(<16 x i8> undef, i32 240)
+ %33 = call float @llvm.SI.load.const(<16 x i8> undef, i32 244)
+ %34 = call float @llvm.SI.load.const(<16 x i8> undef, i32 248)
+ %35 = call float @llvm.SI.load.const(<16 x i8> undef, i32 256)
+ %36 = call float @llvm.SI.load.const(<16 x i8> undef, i32 260)
+ %37 = call float @llvm.SI.load.const(<16 x i8> undef, i32 264)
+ %38 = call float @llvm.SI.load.const(<16 x i8> undef, i32 272)
+ %39 = call float @llvm.SI.load.const(<16 x i8> undef, i32 276)
+ %40 = call float @llvm.SI.load.const(<16 x i8> undef, i32 280)
+ %41 = call float @llvm.SI.load.const(<16 x i8> undef, i32 288)
+ %42 = call float @llvm.SI.load.const(<16 x i8> undef, i32 292)
+ %43 = call float @llvm.SI.load.const(<16 x i8> undef, i32 296)
+ %44 = call float @llvm.SI.load.const(<16 x i8> undef, i32 304)
+ %45 = call float @llvm.SI.load.const(<16 x i8> undef, i32 308)
+ %46 = call float @llvm.SI.load.const(<16 x i8> undef, i32 312)
+ %47 = call float @llvm.SI.load.const(<16 x i8> undef, i32 320)
+ %48 = call float @llvm.SI.load.const(<16 x i8> undef, i32 324)
+ %49 = call float @llvm.SI.load.const(<16 x i8> undef, i32 328)
+ %50 = call float @llvm.SI.load.const(<16 x i8> undef, i32 336)
+ %51 = call float @llvm.SI.load.const(<16 x i8> undef, i32 340)
+ %52 = call float @llvm.SI.load.const(<16 x i8> undef, i32 344)
+ %53 = call float @llvm.SI.load.const(<16 x i8> undef, i32 352)
+ %54 = call float @llvm.SI.load.const(<16 x i8> undef, i32 356)
+ %55 = call float @llvm.SI.load.const(<16 x i8> undef, i32 360)
+ %56 = call float @llvm.SI.load.const(<16 x i8> undef, i32 368)
+ %57 = call float @llvm.SI.load.const(<16 x i8> undef, i32 372)
+ %58 = call float @llvm.SI.load.const(<16 x i8> undef, i32 376)
+ %59 = call float @llvm.SI.load.const(<16 x i8> undef, i32 384)
+ %60 = call float @llvm.SI.load.const(<16 x i8> undef, i32 388)
+ %61 = call float @llvm.SI.load.const(<16 x i8> undef, i32 392)
+ %62 = call float @llvm.SI.load.const(<16 x i8> undef, i32 400)
+ %63 = call float @llvm.SI.load.const(<16 x i8> undef, i32 404)
+ %64 = call float @llvm.SI.load.const(<16 x i8> undef, i32 408)
+ %65 = call float @llvm.SI.load.const(<16 x i8> undef, i32 416)
+ %66 = call float @llvm.SI.load.const(<16 x i8> undef, i32 420)
+ br label %LOOP
+
+LOOP: ; preds = %ENDIF2795, %main_body
+ %temp894.0 = phi float [ 0.000000e+00, %main_body ], [ %temp894.1, %ENDIF2795 ]
+ %temp18.0 = phi float [ undef, %main_body ], [ %temp18.1, %ENDIF2795 ]
+ %67 = icmp sgt i32 undef, 4
+ br i1 %67, label %ENDLOOP, label %ENDIF
+
+ENDLOOP: ; preds = %ELSE2566, %LOOP
+ %68 = call float @llvm.AMDGPU.lrp(float %0, float undef, float undef)
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float undef, float %68, float undef, float 1.000000e+00)
+ ret void
+
+ENDIF: ; preds = %LOOP
+ %69 = fsub float %2, undef
+ %70 = fsub float %3, undef
+ %71 = fsub float %4, undef
+ %72 = fmul float %69, 0.000000e+00
+ %73 = fmul float %70, undef
+ %74 = fmul float %71, undef
+ %75 = fsub float %6, undef
+ %76 = fsub float %7, undef
+ %77 = fmul float %75, undef
+ %78 = fmul float %76, 0.000000e+00
+ %79 = call float @llvm.minnum.f32(float %74, float %78)
+ %80 = call float @llvm.maxnum.f32(float %72, float 0.000000e+00)
+ %81 = call float @llvm.maxnum.f32(float %73, float %77)
+ %82 = call float @llvm.maxnum.f32(float undef, float %79)
+ %83 = call float @llvm.minnum.f32(float %80, float %81)
+ %84 = call float @llvm.minnum.f32(float %83, float undef)
+ %85 = fsub float %14, undef
+ %86 = fsub float %15, undef
+ %87 = fsub float %16, undef
+ %88 = fmul float %85, undef
+ %89 = fmul float %86, undef
+ %90 = fmul float %87, undef
+ %91 = fsub float %17, undef
+ %92 = fsub float %18, undef
+ %93 = fsub float %19, undef
+ %94 = fmul float %91, 0.000000e+00
+ %95 = fmul float %92, undef
+ %96 = fmul float %93, undef
+ %97 = call float @llvm.minnum.f32(float %89, float %95)
+ %98 = call float @llvm.maxnum.f32(float %88, float %94)
+ %99 = call float @llvm.maxnum.f32(float %90, float %96)
+ %100 = call float @llvm.maxnum.f32(float undef, float %97)
+ %101 = call float @llvm.maxnum.f32(float %100, float undef)
+ %102 = call float @llvm.minnum.f32(float %98, float undef)
+ %103 = call float @llvm.minnum.f32(float %102, float %99)
+ %104 = fsub float %30, undef
+ %105 = fsub float %31, undef
+ %106 = fmul float %104, 0.000000e+00
+ %107 = fmul float %105, 0.000000e+00
+ %108 = call float @llvm.minnum.f32(float undef, float %106)
+ %109 = call float @llvm.maxnum.f32(float undef, float %107)
+ %110 = call float @llvm.maxnum.f32(float undef, float %108)
+ %111 = call float @llvm.maxnum.f32(float %110, float undef)
+ %112 = call float @llvm.minnum.f32(float undef, float %109)
+ %113 = fsub float %32, undef
+ %114 = fsub float %33, undef
+ %115 = fsub float %34, undef
+ %116 = fmul float %113, 0.000000e+00
+ %117 = fmul float %114, undef
+ %118 = fmul float %115, undef
+ %119 = fsub float %35, undef
+ %120 = fsub float %36, undef
+ %121 = fsub float %37, undef
+ %122 = fmul float %119, undef
+ %123 = fmul float %120, undef
+ %124 = fmul float %121, undef
+ %125 = call float @llvm.minnum.f32(float %116, float %122)
+ %126 = call float @llvm.minnum.f32(float %117, float %123)
+ %127 = call float @llvm.minnum.f32(float %118, float %124)
+ %128 = call float @llvm.maxnum.f32(float %125, float %126)
+ %129 = call float @llvm.maxnum.f32(float %128, float %127)
+ %130 = fsub float %38, undef
+ %131 = fsub float %39, undef
+ %132 = fsub float %40, undef
+ %133 = fmul float %130, 0.000000e+00
+ %134 = fmul float %131, undef
+ %135 = fmul float %132, undef
+ %136 = fsub float %41, undef
+ %137 = fsub float %42, undef
+ %138 = fsub float %43, undef
+ %139 = fmul float %136, undef
+ %140 = fmul float %137, undef
+ %141 = fmul float %138, undef
+ %142 = call float @llvm.minnum.f32(float %133, float %139)
+ %143 = call float @llvm.minnum.f32(float %134, float %140)
+ %144 = call float @llvm.minnum.f32(float %135, float %141)
+ %145 = call float @llvm.maxnum.f32(float %142, float %143)
+ %146 = call float @llvm.maxnum.f32(float %145, float %144)
+ %147 = fsub float %44, undef
+ %148 = fsub float %45, undef
+ %149 = fsub float %46, undef
+ %150 = fmul float %147, 0.000000e+00
+ %151 = fmul float %148, 0.000000e+00
+ %152 = fmul float %149, undef
+ %153 = fsub float %47, undef
+ %154 = fsub float %48, undef
+ %155 = fsub float %49, undef
+ %156 = fmul float %153, undef
+ %157 = fmul float %154, 0.000000e+00
+ %158 = fmul float %155, undef
+ %159 = call float @llvm.minnum.f32(float %150, float %156)
+ %160 = call float @llvm.minnum.f32(float %151, float %157)
+ %161 = call float @llvm.minnum.f32(float %152, float %158)
+ %162 = call float @llvm.maxnum.f32(float %159, float %160)
+ %163 = call float @llvm.maxnum.f32(float %162, float %161)
+ %164 = fsub float %50, undef
+ %165 = fsub float %51, undef
+ %166 = fsub float %52, undef
+ %167 = fmul float %164, undef
+ %168 = fmul float %165, 0.000000e+00
+ %169 = fmul float %166, 0.000000e+00
+ %170 = fsub float %53, undef
+ %171 = fsub float %54, undef
+ %172 = fsub float %55, undef
+ %173 = fdiv float 1.000000e+00, %temp18.0
+ %174 = fmul float %170, undef
+ %175 = fmul float %171, undef
+ %176 = fmul float %172, %173
+ %177 = call float @llvm.minnum.f32(float %167, float %174)
+ %178 = call float @llvm.minnum.f32(float %168, float %175)
+ %179 = call float @llvm.minnum.f32(float %169, float %176)
+ %180 = call float @llvm.maxnum.f32(float %177, float %178)
+ %181 = call float @llvm.maxnum.f32(float %180, float %179)
+ %182 = fsub float %62, undef
+ %183 = fsub float %63, undef
+ %184 = fsub float %64, undef
+ %185 = fmul float %182, 0.000000e+00
+ %186 = fmul float %183, undef
+ %187 = fmul float %184, undef
+ %188 = fsub float %65, undef
+ %189 = fsub float %66, undef
+ %190 = fmul float %188, undef
+ %191 = fmul float %189, undef
+ %192 = call float @llvm.maxnum.f32(float %185, float %190)
+ %193 = call float @llvm.maxnum.f32(float %186, float %191)
+ %194 = call float @llvm.maxnum.f32(float %187, float undef)
+ %195 = call float @llvm.minnum.f32(float %192, float %193)
+ %196 = call float @llvm.minnum.f32(float %195, float %194)
+ %.temp292.7 = select i1 undef, float %163, float undef
+ %temp292.9 = select i1 false, float %181, float %.temp292.7
+ %.temp292.9 = select i1 undef, float undef, float %temp292.9
+ %197 = fcmp ogt float undef, 0.000000e+00
+ %198 = fcmp olt float undef, %196
+ %199 = and i1 %197, %198
+ %200 = fcmp olt float undef, %.temp292.9
+ %201 = and i1 %199, %200
+ %temp292.11 = select i1 %201, float undef, float %.temp292.9
+ br i1 undef, label %IF2565, label %ELSE2566
+
+IF2565: ; preds = %ENDIF
+ br i1 false, label %ENDIF2582, label %ELSE2584
+
+ELSE2566: ; preds = %ENDIF
+ %202 = fcmp oeq float %temp292.11, 1.000000e+04
+ br i1 %202, label %ENDLOOP, label %ELSE2593
+
+ENDIF2564: ; preds = %ENDIF2594, %ENDIF2588
+ %temp894.1 = phi float [ undef, %ENDIF2588 ], [ %temp894.2, %ENDIF2594 ]
+ %temp18.1 = phi float [ %219, %ENDIF2588 ], [ undef, %ENDIF2594 ]
+ %203 = fsub float %5, undef
+ %204 = fmul float %203, undef
+ %205 = call float @llvm.maxnum.f32(float undef, float %204)
+ %206 = call float @llvm.minnum.f32(float %205, float undef)
+ %207 = call float @llvm.minnum.f32(float %206, float undef)
+ %208 = fcmp ogt float undef, 0.000000e+00
+ %209 = fcmp olt float undef, 1.000000e+00
+ %210 = and i1 %208, %209
+ %211 = fcmp olt float undef, %207
+ %212 = and i1 %210, %211
+ br i1 %212, label %ENDIF2795, label %ELSE2797
+
+ELSE2584: ; preds = %IF2565
+ br label %ENDIF2582
+
+ENDIF2582: ; preds = %ELSE2584, %IF2565
+ %213 = fadd float %1, undef
+ %214 = fadd float 0.000000e+00, %213
+ %215 = call float @llvm.AMDIL.fraction.(float %214)
+ br i1 undef, label %IF2589, label %ELSE2590
+
+IF2589: ; preds = %ENDIF2582
+ br label %ENDIF2588
+
+ELSE2590: ; preds = %ENDIF2582
+ br label %ENDIF2588
+
+ENDIF2588: ; preds = %ELSE2590, %IF2589
+ %216 = fsub float 1.000000e+00, %215
+ %217 = call float @llvm.sqrt.f32(float %216)
+ %218 = fmul float %217, undef
+ %219 = fadd float %218, undef
+ br label %ENDIF2564
+
+ELSE2593: ; preds = %ELSE2566
+ %220 = fcmp oeq float %temp292.11, %82
+ %221 = fcmp olt float %82, %84
+ %222 = and i1 %220, %221
+ br i1 %222, label %ENDIF2594, label %ELSE2596
+
+ELSE2596: ; preds = %ELSE2593
+ %223 = fcmp oeq float %temp292.11, %101
+ %224 = fcmp olt float %101, %103
+ %225 = and i1 %223, %224
+ br i1 %225, label %ENDIF2594, label %ELSE2632
+
+ENDIF2594: ; preds = %ELSE2788, %ELSE2785, %ELSE2782, %ELSE2779, %IF2775, %ELSE2761, %ELSE2758, %IF2757, %ELSE2704, %ELSE2686, %ELSE2671, %ELSE2668, %IF2667, %ELSE2632, %ELSE2596, %ELSE2593
+ %temp894.2 = phi float [ 0.000000e+00, %IF2667 ], [ 0.000000e+00, %ELSE2671 ], [ 0.000000e+00, %IF2757 ], [ 0.000000e+00, %ELSE2761 ], [ %temp894.0, %ELSE2758 ], [ 0.000000e+00, %IF2775 ], [ 0.000000e+00, %ELSE2779 ], [ 0.000000e+00, %ELSE2782 ], [ %.2848, %ELSE2788 ], [ 0.000000e+00, %ELSE2785 ], [ 0.000000e+00, %ELSE2593 ], [ 0.000000e+00, %ELSE2632 ], [ 0.000000e+00, %ELSE2704 ], [ 0.000000e+00, %ELSE2686 ], [ 0.000000e+00, %ELSE2668 ], [ 0.000000e+00, %ELSE2596 ]
+ %226 = fmul float %temp894.2, undef
+ br label %ENDIF2564
+
+ELSE2632: ; preds = %ELSE2596
+ br i1 undef, label %ENDIF2594, label %ELSE2650
+
+ELSE2650: ; preds = %ELSE2632
+ %227 = fcmp oeq float %temp292.11, %111
+ %228 = fcmp olt float %111, %112
+ %229 = and i1 %227, %228
+ br i1 %229, label %IF2667, label %ELSE2668
+
+IF2667: ; preds = %ELSE2650
+ br i1 undef, label %ENDIF2594, label %ELSE2671
+
+ELSE2668: ; preds = %ELSE2650
+ %230 = fcmp oeq float %temp292.11, %129
+ %231 = fcmp olt float %129, undef
+ %232 = and i1 %230, %231
+ br i1 %232, label %ENDIF2594, label %ELSE2686
+
+ELSE2671: ; preds = %IF2667
+ br label %ENDIF2594
+
+ELSE2686: ; preds = %ELSE2668
+ %233 = fcmp oeq float %temp292.11, %146
+ %234 = fcmp olt float %146, undef
+ %235 = and i1 %233, %234
+ br i1 %235, label %ENDIF2594, label %ELSE2704
+
+ELSE2704: ; preds = %ELSE2686
+ %236 = fcmp oeq float %temp292.11, %181
+ %237 = fcmp olt float %181, undef
+ %238 = and i1 %236, %237
+ br i1 %238, label %ENDIF2594, label %ELSE2740
+
+ELSE2740: ; preds = %ELSE2704
+ br i1 undef, label %IF2757, label %ELSE2758
+
+IF2757: ; preds = %ELSE2740
+ br i1 undef, label %ENDIF2594, label %ELSE2761
+
+ELSE2758: ; preds = %ELSE2740
+ br i1 undef, label %IF2775, label %ENDIF2594
+
+ELSE2761: ; preds = %IF2757
+ br label %ENDIF2594
+
+IF2775: ; preds = %ELSE2758
+ %239 = fcmp olt float undef, undef
+ br i1 %239, label %ENDIF2594, label %ELSE2779
+
+ELSE2779: ; preds = %IF2775
+ br i1 undef, label %ENDIF2594, label %ELSE2782
+
+ELSE2782: ; preds = %ELSE2779
+ br i1 undef, label %ENDIF2594, label %ELSE2785
+
+ELSE2785: ; preds = %ELSE2782
+ %240 = fcmp olt float undef, 0.000000e+00
+ br i1 %240, label %ENDIF2594, label %ELSE2788
+
+ELSE2788: ; preds = %ELSE2785
+ %241 = fcmp olt float 0.000000e+00, undef
+ %.2848 = select i1 %241, float -1.000000e+00, float 1.000000e+00
+ br label %ENDIF2594
+
+ELSE2797: ; preds = %ENDIF2564
+ %242 = fsub float %8, undef
+ %243 = fsub float %9, undef
+ %244 = fsub float %10, undef
+ %245 = fmul float %242, undef
+ %246 = fmul float %243, undef
+ %247 = fmul float %244, undef
+ %248 = fsub float %11, undef
+ %249 = fsub float %12, undef
+ %250 = fsub float %13, undef
+ %251 = fmul float %248, undef
+ %252 = fmul float %249, undef
+ %253 = fmul float %250, undef
+ %254 = call float @llvm.minnum.f32(float %245, float %251)
+ %255 = call float @llvm.minnum.f32(float %246, float %252)
+ %256 = call float @llvm.maxnum.f32(float %247, float %253)
+ %257 = call float @llvm.maxnum.f32(float %254, float %255)
+ %258 = call float @llvm.maxnum.f32(float %257, float undef)
+ %259 = call float @llvm.minnum.f32(float undef, float %256)
+ %260 = fcmp ogt float %258, 0.000000e+00
+ %261 = fcmp olt float %258, 1.000000e+00
+ %262 = and i1 %260, %261
+ %263 = fcmp olt float %258, %259
+ %264 = and i1 %262, %263
+ br i1 %264, label %ENDIF2795, label %ELSE2800
+
+ENDIF2795: ; preds = %ELSE2824, %ELSE2821, %ELSE2818, %ELSE2815, %ELSE2812, %ELSE2809, %ELSE2806, %ELSE2803, %ELSE2800, %ELSE2797, %ENDIF2564
+ br label %LOOP
+
+ELSE2800: ; preds = %ELSE2797
+ br i1 undef, label %ENDIF2795, label %ELSE2803
+
+ELSE2803: ; preds = %ELSE2800
+ %265 = fsub float %20, undef
+ %266 = fsub float %21, undef
+ %267 = fsub float %22, undef
+ %268 = fmul float %265, undef
+ %269 = fmul float %266, undef
+ %270 = fmul float %267, 0.000000e+00
+ %271 = fsub float %23, undef
+ %272 = fsub float %24, undef
+ %273 = fsub float %25, undef
+ %274 = fmul float %271, undef
+ %275 = fmul float %272, undef
+ %276 = fmul float %273, undef
+ %277 = call float @llvm.minnum.f32(float %268, float %274)
+ %278 = call float @llvm.maxnum.f32(float %269, float %275)
+ %279 = call float @llvm.maxnum.f32(float %270, float %276)
+ %280 = call float @llvm.maxnum.f32(float %277, float undef)
+ %281 = call float @llvm.maxnum.f32(float %280, float undef)
+ %282 = call float @llvm.minnum.f32(float undef, float %278)
+ %283 = call float @llvm.minnum.f32(float %282, float %279)
+ %284 = fcmp ogt float %281, 0.000000e+00
+ %285 = fcmp olt float %281, 1.000000e+00
+ %286 = and i1 %284, %285
+ %287 = fcmp olt float %281, %283
+ %288 = and i1 %286, %287
+ br i1 %288, label %ENDIF2795, label %ELSE2806
+
+ELSE2806: ; preds = %ELSE2803
+ %289 = fsub float %26, undef
+ %290 = fsub float %27, undef
+ %291 = fsub float %28, undef
+ %292 = fmul float %289, undef
+ %293 = fmul float %290, 0.000000e+00
+ %294 = fmul float %291, undef
+ %295 = fsub float %29, undef
+ %296 = fmul float %295, undef
+ %297 = call float @llvm.minnum.f32(float %292, float %296)
+ %298 = call float @llvm.minnum.f32(float %293, float undef)
+ %299 = call float @llvm.maxnum.f32(float %294, float undef)
+ %300 = call float @llvm.maxnum.f32(float %297, float %298)
+ %301 = call float @llvm.maxnum.f32(float %300, float undef)
+ %302 = call float @llvm.minnum.f32(float undef, float %299)
+ %303 = fcmp ogt float %301, 0.000000e+00
+ %304 = fcmp olt float %301, 1.000000e+00
+ %305 = and i1 %303, %304
+ %306 = fcmp olt float %301, %302
+ %307 = and i1 %305, %306
+ br i1 %307, label %ENDIF2795, label %ELSE2809
+
+ELSE2809: ; preds = %ELSE2806
+ br i1 undef, label %ENDIF2795, label %ELSE2812
+
+ELSE2812: ; preds = %ELSE2809
+ br i1 undef, label %ENDIF2795, label %ELSE2815
+
+ELSE2815: ; preds = %ELSE2812
+ br i1 undef, label %ENDIF2795, label %ELSE2818
+
+ELSE2818: ; preds = %ELSE2815
+ br i1 undef, label %ENDIF2795, label %ELSE2821
+
+ELSE2821: ; preds = %ELSE2818
+ %308 = fsub float %56, undef
+ %309 = fsub float %57, undef
+ %310 = fsub float %58, undef
+ %311 = fmul float %308, undef
+ %312 = fmul float %309, 0.000000e+00
+ %313 = fmul float %310, undef
+ %314 = fsub float %59, undef
+ %315 = fsub float %60, undef
+ %316 = fsub float %61, undef
+ %317 = fmul float %314, undef
+ %318 = fmul float %315, undef
+ %319 = fmul float %316, undef
+ %320 = call float @llvm.maxnum.f32(float %311, float %317)
+ %321 = call float @llvm.maxnum.f32(float %312, float %318)
+ %322 = call float @llvm.maxnum.f32(float %313, float %319)
+ %323 = call float @llvm.minnum.f32(float %320, float %321)
+ %324 = call float @llvm.minnum.f32(float %323, float %322)
+ %325 = fcmp ogt float undef, 0.000000e+00
+ %326 = fcmp olt float undef, 1.000000e+00
+ %327 = and i1 %325, %326
+ %328 = fcmp olt float undef, %324
+ %329 = and i1 %327, %328
+ br i1 %329, label %ENDIF2795, label %ELSE2824
+
+ELSE2824: ; preds = %ELSE2821
+ %.2849 = select i1 undef, float 0.000000e+00, float 1.000000e+00
+ br label %ENDIF2795
+}
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+
+; Function Attrs: readnone
+declare float @llvm.AMDIL.fraction.(float) #2
+
+; Function Attrs: nounwind readnone
+declare float @llvm.sqrt.f32(float) #1
+
+; Function Attrs: nounwind readnone
+declare float @llvm.minnum.f32(float, float) #1
+
+; Function Attrs: nounwind readnone
+declare float @llvm.maxnum.f32(float, float) #1
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.lrp(float, float, float) #2
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { readnone }
diff --git a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
index f6dcb388248a..5a6129aaa3fa 100644
--- a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
+++ b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
@@ -14,14 +14,14 @@ declare void @llvm.AMDGPU.barrier.local() #2
; CI-NEXT: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
; CI: buffer_store_dword
define void @reorder_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
- %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+ %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
- %ptr1 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 1
- %ptr2 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 2
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2
- %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+ %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
store i32 99, i32 addrspace(1)* %gptr, align 4
- %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2
@@ -34,14 +34,14 @@ define void @reorder_local_load_global_store_local_load(i32 addrspace(1)* %out,
; CI: buffer_store_dword
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
define void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
- %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+ %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
- %ptr1 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 1
- %ptr2 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 2
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2
- %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+ %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
store volatile i32 99, i32 addrspace(1)* %gptr, align 4
- %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2
@@ -54,15 +54,15 @@ define void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspac
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
; CI: buffer_store_dword
define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
- %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+ %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
- %ptr1 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 1
- %ptr2 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 2
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2
- %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+ %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
store i32 99, i32 addrspace(1)* %gptr, align 4
call void @llvm.AMDGPU.barrier.local() #2
- %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2
@@ -79,14 +79,14 @@ define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace
; CI: buffer_load_dword
; CI: buffer_store_dword
define void @no_reorder_constant_load_global_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
- %ptr0 = load i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
+ %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
- %ptr1 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 1
- %ptr2 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 2
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
- %tmp1 = load i32 addrspace(2)* %ptr1, align 4
+ %tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
store i32 99, i32 addrspace(1)* %gptr, align 4
- %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2
@@ -94,22 +94,20 @@ define void @no_reorder_constant_load_global_store_constant_load(i32 addrspace(1
ret void
}
-; XXX: Should be able to reorder this, but the laods count as ordered
-
; FUNC-LABEL: @reorder_constant_load_local_store_constant_load
; CI: buffer_load_dword
-; CI: ds_write_b32
; CI: buffer_load_dword
+; CI: ds_write_b32
; CI: buffer_store_dword
define void @reorder_constant_load_local_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr) #0 {
- %ptr0 = load i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
+ %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
- %ptr1 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 1
- %ptr2 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 2
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
- %tmp1 = load i32 addrspace(2)* %ptr1, align 4
+ %tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
store i32 99, i32 addrspace(3)* %lptr, align 4
- %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2
@@ -124,12 +122,12 @@ define void @reorder_constant_load_local_store_constant_load(i32 addrspace(1)* %
; CI: ds_write_b32
; CI: buffer_store_dword
define void @reorder_smrd_load_local_store_smrd_load(i32 addrspace(1)* %out, i32 addrspace(3)* noalias %lptr, i32 addrspace(2)* %ptr0) #0 {
- %ptr1 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 1
- %ptr2 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 2
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
- %tmp1 = load i32 addrspace(2)* %ptr1, align 4
+ %tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
store i32 99, i32 addrspace(3)* %lptr, align 4
- %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2
@@ -143,12 +141,12 @@ define void @reorder_smrd_load_local_store_smrd_load(i32 addrspace(1)* %out, i32
; CI: ds_write_b32
; CI: buffer_store_dword
define void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr, i32 addrspace(1)* %ptr0) #0 {
- %ptr1 = getelementptr inbounds i32 addrspace(1)* %ptr0, i64 1
- %ptr2 = getelementptr inbounds i32 addrspace(1)* %ptr0, i64 2
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i64 1
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i64 2
- %tmp1 = load i32 addrspace(1)* %ptr1, align 4
+ %tmp1 = load i32, i32 addrspace(1)* %ptr1, align 4
store i32 99, i32 addrspace(3)* %lptr, align 4
- %tmp2 = load i32 addrspace(1)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(1)* %ptr2, align 4
%add = add nsw i32 %tmp1, %tmp2
@@ -165,15 +163,15 @@ define void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out,
; CI: buffer_store_dword
; CI: s_endpgm
define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(3)* noalias nocapture %ptr0) #0 {
- %ptr1 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 3
- %ptr2 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 100
- %ptr3 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 101
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 3
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 100
+ %ptr3 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 101
store i32 123, i32 addrspace(3)* %ptr1, align 4
- %tmp1 = load i32 addrspace(3)* %ptr2, align 4
- %tmp2 = load i32 addrspace(3)* %ptr3, align 4
+ %tmp1 = load i32, i32 addrspace(3)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(3)* %ptr3, align 4
store i32 123, i32 addrspace(3)* %ptr2, align 4
- %tmp3 = load i32 addrspace(3)* %ptr1, align 4
+ %tmp3 = load i32, i32 addrspace(3)* %ptr1, align 4
store i32 789, i32 addrspace(3)* %ptr3, align 4
%add.0 = add nsw i32 %tmp2, %tmp1
@@ -191,15 +189,15 @@ define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspa
; CI: buffer_store_dword
; CI: s_endpgm
define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(1)* noalias nocapture %ptr0) #0 {
- %ptr1 = getelementptr inbounds i32 addrspace(1)* %ptr0, i32 3
- %ptr2 = getelementptr inbounds i32 addrspace(1)* %ptr0, i32 100
- %ptr3 = getelementptr inbounds i32 addrspace(1)* %ptr0, i32 101
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 3
+ %ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 100
+ %ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 101
store i32 123, i32 addrspace(1)* %ptr1, align 4
- %tmp1 = load i32 addrspace(1)* %ptr2, align 4
- %tmp2 = load i32 addrspace(1)* %ptr3, align 4
+ %tmp1 = load i32, i32 addrspace(1)* %ptr2, align 4
+ %tmp2 = load i32, i32 addrspace(1)* %ptr3, align 4
store i32 123, i32 addrspace(1)* %ptr2, align 4
- %tmp3 = load i32 addrspace(1)* %ptr1, align 4
+ %tmp3 = load i32, i32 addrspace(1)* %ptr1, align 4
store i32 789, i32 addrspace(1)* %ptr3, align 4
%add.0 = add nsw i32 %tmp2, %tmp1
@@ -213,19 +211,19 @@ define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrsp
; XCI: TBUFFER_STORE_FORMAT
; XCI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}}, 0x8
; define void @reorder_local_load_tbuffer_store_local_load(i32 addrspace(1)* %out, i32 %a1, i32 %vaddr) #1 {
-; %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+; %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
-; %ptr1 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 1
-; %ptr2 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 2
+; %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
+; %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2
-; %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+; %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
; %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
; call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
; i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1,
; i32 1, i32 0)
-; %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+; %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
; %add = add nsw i32 %tmp1, %tmp2
diff --git a/test/CodeGen/R600/si-vector-hang.ll b/test/CodeGen/R600/si-vector-hang.ll
index 61812c61ba19..94c47fe3c600 100644
--- a/test/CodeGen/R600/si-vector-hang.ll
+++ b/test/CodeGen/R600/si-vector-hang.ll
@@ -17,52 +17,52 @@ target triple = "r600--"
; Function Attrs: nounwind
define void @test_8_min_char(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture readonly %in0, i8 addrspace(1)* nocapture readonly %in1) #0 {
entry:
- %0 = load i8 addrspace(1)* %in0, align 1
+ %0 = load i8, i8 addrspace(1)* %in0, align 1
%1 = insertelement <8 x i8> undef, i8 %0, i32 0
- %arrayidx2.i.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 1
- %2 = load i8 addrspace(1)* %arrayidx2.i.i, align 1
+ %arrayidx2.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 1
+ %2 = load i8, i8 addrspace(1)* %arrayidx2.i.i, align 1
%3 = insertelement <8 x i8> %1, i8 %2, i32 1
- %arrayidx6.i.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 2
- %4 = load i8 addrspace(1)* %arrayidx6.i.i, align 1
+ %arrayidx6.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 2
+ %4 = load i8, i8 addrspace(1)* %arrayidx6.i.i, align 1
%5 = insertelement <8 x i8> %3, i8 %4, i32 2
- %arrayidx10.i.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 3
- %6 = load i8 addrspace(1)* %arrayidx10.i.i, align 1
+ %arrayidx10.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 3
+ %6 = load i8, i8 addrspace(1)* %arrayidx10.i.i, align 1
%7 = insertelement <8 x i8> %5, i8 %6, i32 3
- %arrayidx.i.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 4
- %8 = load i8 addrspace(1)* %arrayidx.i.i, align 1
+ %arrayidx.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 4
+ %8 = load i8, i8 addrspace(1)* %arrayidx.i.i, align 1
%9 = insertelement <8 x i8> undef, i8 %8, i32 0
- %arrayidx2.i9.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 5
- %10 = load i8 addrspace(1)* %arrayidx2.i9.i, align 1
+ %arrayidx2.i9.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 5
+ %10 = load i8, i8 addrspace(1)* %arrayidx2.i9.i, align 1
%11 = insertelement <8 x i8> %9, i8 %10, i32 1
- %arrayidx6.i11.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 6
- %12 = load i8 addrspace(1)* %arrayidx6.i11.i, align 1
+ %arrayidx6.i11.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 6
+ %12 = load i8, i8 addrspace(1)* %arrayidx6.i11.i, align 1
%13 = insertelement <8 x i8> %11, i8 %12, i32 2
- %arrayidx10.i13.i = getelementptr inbounds i8 addrspace(1)* %in0, i64 7
- %14 = load i8 addrspace(1)* %arrayidx10.i13.i, align 1
+ %arrayidx10.i13.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 7
+ %14 = load i8, i8 addrspace(1)* %arrayidx10.i13.i, align 1
%15 = insertelement <8 x i8> %13, i8 %14, i32 3
%vecinit5.i = shufflevector <8 x i8> %7, <8 x i8> %15, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
- %16 = load i8 addrspace(1)* %in1, align 1
+ %16 = load i8, i8 addrspace(1)* %in1, align 1
%17 = insertelement <8 x i8> undef, i8 %16, i32 0
- %arrayidx2.i.i4 = getelementptr inbounds i8 addrspace(1)* %in1, i64 1
- %18 = load i8 addrspace(1)* %arrayidx2.i.i4, align 1
+ %arrayidx2.i.i4 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 1
+ %18 = load i8, i8 addrspace(1)* %arrayidx2.i.i4, align 1
%19 = insertelement <8 x i8> %17, i8 %18, i32 1
- %arrayidx6.i.i5 = getelementptr inbounds i8 addrspace(1)* %in1, i64 2
- %20 = load i8 addrspace(1)* %arrayidx6.i.i5, align 1
+ %arrayidx6.i.i5 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 2
+ %20 = load i8, i8 addrspace(1)* %arrayidx6.i.i5, align 1
%21 = insertelement <8 x i8> %19, i8 %20, i32 2
- %arrayidx10.i.i6 = getelementptr inbounds i8 addrspace(1)* %in1, i64 3
- %22 = load i8 addrspace(1)* %arrayidx10.i.i6, align 1
+ %arrayidx10.i.i6 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 3
+ %22 = load i8, i8 addrspace(1)* %arrayidx10.i.i6, align 1
%23 = insertelement <8 x i8> %21, i8 %22, i32 3
- %arrayidx.i.i7 = getelementptr inbounds i8 addrspace(1)* %in1, i64 4
- %24 = load i8 addrspace(1)* %arrayidx.i.i7, align 1
+ %arrayidx.i.i7 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 4
+ %24 = load i8, i8 addrspace(1)* %arrayidx.i.i7, align 1
%25 = insertelement <8 x i8> undef, i8 %24, i32 0
- %arrayidx2.i9.i8 = getelementptr inbounds i8 addrspace(1)* %in1, i64 5
- %26 = load i8 addrspace(1)* %arrayidx2.i9.i8, align 1
+ %arrayidx2.i9.i8 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 5
+ %26 = load i8, i8 addrspace(1)* %arrayidx2.i9.i8, align 1
%27 = insertelement <8 x i8> %25, i8 %26, i32 1
- %arrayidx6.i11.i9 = getelementptr inbounds i8 addrspace(1)* %in1, i64 6
- %28 = load i8 addrspace(1)* %arrayidx6.i11.i9, align 1
+ %arrayidx6.i11.i9 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 6
+ %28 = load i8, i8 addrspace(1)* %arrayidx6.i11.i9, align 1
%29 = insertelement <8 x i8> %27, i8 %28, i32 2
- %arrayidx10.i13.i10 = getelementptr inbounds i8 addrspace(1)* %in1, i64 7
- %30 = load i8 addrspace(1)* %arrayidx10.i13.i10, align 1
+ %arrayidx10.i13.i10 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 7
+ %30 = load i8, i8 addrspace(1)* %arrayidx10.i13.i10, align 1
%31 = insertelement <8 x i8> %29, i8 %30, i32 3
%vecinit5.i11 = shufflevector <8 x i8> %23, <8 x i8> %31, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
%cmp.i = icmp slt <8 x i8> %vecinit5.i, %vecinit5.i11
@@ -70,25 +70,25 @@ entry:
%32 = extractelement <8 x i8> %cond.i, i32 0
store i8 %32, i8 addrspace(1)* %out, align 1
%33 = extractelement <8 x i8> %cond.i, i32 1
- %arrayidx2.i.i.i = getelementptr inbounds i8 addrspace(1)* %out, i64 1
+ %arrayidx2.i.i.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 1
store i8 %33, i8 addrspace(1)* %arrayidx2.i.i.i, align 1
%34 = extractelement <8 x i8> %cond.i, i32 2
- %arrayidx.i.i.i = getelementptr inbounds i8 addrspace(1)* %out, i64 2
+ %arrayidx.i.i.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 2
store i8 %34, i8 addrspace(1)* %arrayidx.i.i.i, align 1
%35 = extractelement <8 x i8> %cond.i, i32 3
- %arrayidx2.i6.i.i = getelementptr inbounds i8 addrspace(1)* %out, i64 3
+ %arrayidx2.i6.i.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 3
store i8 %35, i8 addrspace(1)* %arrayidx2.i6.i.i, align 1
- %arrayidx.i.i3 = getelementptr inbounds i8 addrspace(1)* %out, i64 4
+ %arrayidx.i.i3 = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 4
%36 = extractelement <8 x i8> %cond.i, i32 4
store i8 %36, i8 addrspace(1)* %arrayidx.i.i3, align 1
%37 = extractelement <8 x i8> %cond.i, i32 5
- %arrayidx2.i.i6.i = getelementptr inbounds i8 addrspace(1)* %out, i64 5
+ %arrayidx2.i.i6.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 5
store i8 %37, i8 addrspace(1)* %arrayidx2.i.i6.i, align 1
%38 = extractelement <8 x i8> %cond.i, i32 6
- %arrayidx.i.i7.i = getelementptr inbounds i8 addrspace(1)* %out, i64 6
+ %arrayidx.i.i7.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 6
store i8 %38, i8 addrspace(1)* %arrayidx.i.i7.i, align 1
%39 = extractelement <8 x i8> %cond.i, i32 7
- %arrayidx2.i6.i8.i = getelementptr inbounds i8 addrspace(1)* %out, i64 7
+ %arrayidx2.i6.i8.i = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 7
store i8 %39, i8 addrspace(1)* %arrayidx2.i6.i8.i, align 1
ret void
}
diff --git a/test/CodeGen/R600/sign_extend.ll b/test/CodeGen/R600/sign_extend.ll
index 9550c2a7f061..06bee114c23a 100644
--- a/test/CodeGen/R600/sign_extend.ll
+++ b/test/CodeGen/R600/sign_extend.ll
@@ -24,8 +24,9 @@ entry:
}
; SI-LABEL: {{^}}s_sext_i1_to_i64:
-; SI: v_cndmask_b32_e64
-; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64 v[[LOREG:[0-9]+]], 0, -1, vcc
+; SI: v_mov_b32_e32 v[[HIREG:[0-9]+]], v[[LOREG]]
+; SI: buffer_store_dwordx2 v{{\[}}[[LOREG]]:[[HIREG]]{{\]}}
; SI: s_endpgm
define void @s_sext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp eq i32 %a, %b
@@ -47,7 +48,7 @@ define void @s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a) nounwind {
; SI: v_ashr
; SI: s_endpgm
define void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
- %val = load i32 addrspace(1)* %in, align 4
+ %val = load i32, i32 addrspace(1)* %in, align 4
%sext = sext i32 %val to i64
store i64 %sext, i64 addrspace(1)* %out, align 8
ret void
diff --git a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
index 28a413cd1b3c..dffee70b6b02 100644
--- a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
+++ b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
@@ -22,16 +22,16 @@ define void @trunc_select_i64(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) {
define void @trunc_load_alloca_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) {
%idx = add i32 %a, %b
%alloca = alloca i64, i32 4
- %gep0 = getelementptr i64* %alloca, i64 0
- %gep1 = getelementptr i64* %alloca, i64 1
- %gep2 = getelementptr i64* %alloca, i64 2
- %gep3 = getelementptr i64* %alloca, i64 3
+ %gep0 = getelementptr i64, i64* %alloca, i64 0
+ %gep1 = getelementptr i64, i64* %alloca, i64 1
+ %gep2 = getelementptr i64, i64* %alloca, i64 2
+ %gep3 = getelementptr i64, i64* %alloca, i64 3
store i64 24, i64* %gep0, align 8
store i64 9334, i64* %gep1, align 8
store i64 3935, i64* %gep2, align 8
store i64 9342, i64* %gep3, align 8
- %gep = getelementptr i64* %alloca, i32 %idx
- %load = load i64* %gep, align 8
+ %gep = getelementptr i64, i64* %alloca, i32 %idx
+ %load = load i64, i64* %gep, align 8
%mask = and i64 %load, 4294967296
%add = add i64 %mask, -1
store i64 %add, i64 addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/sint_to_fp.f64.ll b/test/CodeGen/R600/sint_to_fp.f64.ll
index 77844a6aa384..da4e91db3a38 100644
--- a/test/CodeGen/R600/sint_to_fp.f64.ll
+++ b/test/CodeGen/R600/sint_to_fp.f64.ll
@@ -10,12 +10,13 @@ define void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
ret void
}
+; FIXME: select on 0, 0
; SI-LABEL: {{^}}sint_to_fp_i1_f64:
; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
; uses an SGPR for [[CMP]]
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
-; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
+; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, [[CMP]]
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
@@ -52,8 +53,8 @@ define void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) {
; SI: buffer_store_dwordx2 [[RESULT]]
define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr i64 addrspace(1)* %in, i32 %tid
- %val = load i64 addrspace(1)* %gep, align 8
+ %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep, align 8
%result = sitofp i64 %val to double
store double %result, double addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/sint_to_fp.ll b/test/CodeGen/R600/sint_to_fp.ll
index 6a291cfe9269..8506441d1361 100644
--- a/test/CodeGen/R600/sint_to_fp.ll
+++ b/test/CodeGen/R600/sint_to_fp.ll
@@ -35,7 +35,7 @@ define void @sint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
; SI: v_cvt_f32_i32_e32
; SI: v_cvt_f32_i32_e32
define void @sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %value = load <4 x i32> addrspace(1) * %in
+ %value = load <4 x i32>, <4 x i32> addrspace(1) * %in
%result = sitofp <4 x i32> %value to <4 x float>
store <4 x float> %result, <4 x float> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/smrd.ll b/test/CodeGen/R600/smrd.ll
index a66ad0201bf9..b0c18ca5959c 100644
--- a/test/CodeGen/R600/smrd.ll
+++ b/test/CodeGen/R600/smrd.ll
@@ -1,65 +1,73 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s
; SMRD load with an immediate offset.
-; CHECK-LABEL: {{^}}smrd0:
-; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
+; GCN-LABEL: {{^}}smrd0:
+; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
+; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
- %0 = getelementptr i32 addrspace(2)* %ptr, i64 1
- %1 = load i32 addrspace(2)* %0
+ %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
+ %1 = load i32, i32 addrspace(2)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
; SMRD load with the largest possible immediate offset.
-; CHECK-LABEL: {{^}}smrd1:
-; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
+; GCN-LABEL: {{^}}smrd1:
+; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
+; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
- %0 = getelementptr i32 addrspace(2)* %ptr, i64 255
- %1 = load i32 addrspace(2)* %0
+ %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
+ %1 = load i32, i32 addrspace(2)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
; SMRD load with an offset greater than the largest possible immediate.
-; CHECK-LABEL: {{^}}smrd2:
-; CHECK: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
-; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
-; CHECK: s_endpgm
+; GCN-LABEL: {{^}}smrd2:
+; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
+; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
+; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
+; GCN: s_endpgm
define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
- %0 = getelementptr i32 addrspace(2)* %ptr, i64 256
- %1 = load i32 addrspace(2)* %0
+ %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
+ %1 = load i32, i32 addrspace(2)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
; SMRD load with a 64-bit offset
-; CHECK-LABEL: {{^}}smrd3:
-; CHECK-DAG: s_mov_b32 s[[SHI:[0-9]+]], 4
-; CHECK-DAG: s_mov_b32 s[[SLO:[0-9]+]], 0 ;
-; FIXME: We don't need to copy these values to VGPRs
-; CHECK-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
-; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
+; GCN-LABEL: {{^}}smrd3:
+; FIXME: There are too many copies here because we don't fold immediates
+; through REG_SEQUENCE
+; SI: s_mov_b32 s[[SLO:[0-9]+]], 0 ;
+; SI: s_mov_b32 s[[SHI:[0-9]+]], 4
+; SI: s_mov_b32 s[[SSLO:[0-9]+]], s[[SLO]]
+; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SSLO]]
+; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
; FIXME: We should be able to use s_load_dword here
-; CHECK: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64
-; CHECK: s_endpgm
+; SI: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64
+; TODO: Add VI checks
+; GCN: s_endpgm
define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
entry:
- %0 = getelementptr i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
- %1 = load i32 addrspace(2)* %0
+ %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
+ %1 = load i32, i32 addrspace(2)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
; SMRD load using the load.const intrinsic with an immediate offset
-; CHECK-LABEL: {{^}}smrd_load_const0:
-; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
+; GCN-LABEL: {{^}}smrd_load_const0:
+; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
+; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10
define void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
- %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8> addrspace(2)* %20
+ %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
+ %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
ret void
@@ -67,12 +75,13 @@ main_body:
; SMRD load using the load.const intrinsic with the largest possible immediate
; offset.
-; CHECK-LABEL: {{^}}smrd_load_const1:
-; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
+; GCN-LABEL: {{^}}smrd_load_const1:
+; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
+; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
define void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
- %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8> addrspace(2)* %20
+ %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
+ %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1020)
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
ret void
@@ -80,13 +89,14 @@ main_body:
; SMRD load using the load.const intrinsic with an offset greater than the
; largets possible immediate.
; immediate offset.
-; CHECK-LABEL: {{^}}smrd_load_const2:
-; CHECK: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
-; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
+; GCN-LABEL: {{^}}smrd_load_const2:
+; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
+; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
+; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
define void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
- %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
- %21 = load <16 x i8> addrspace(2)* %20
+ %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
+ %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
%22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1024)
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
ret void
diff --git a/test/CodeGen/R600/split-scalar-i64-add.ll b/test/CodeGen/R600/split-scalar-i64-add.ll
index ec50fd9f4c1e..46409cdfae1c 100644
--- a/test/CodeGen/R600/split-scalar-i64-add.ll
+++ b/test/CodeGen/R600/split-scalar-i64-add.ll
@@ -37,8 +37,8 @@ define void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64
; SI: v_addc_u32
define void @imp_def_vcc_split_i64_add_2(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
- %gep = getelementptr i32 addrspace(1)* %in, i32 %tid
- %load = load i32 addrspace(1)* %gep
+ %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %load = load i32, i32 addrspace(1)* %gep
%vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
%vec.1 = insertelement <2 x i32> %vec.0, i32 %load, i32 1
%bc = bitcast <2 x i32> %vec.1 to i64
diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll
index d6c6ccd28382..bcbc32f4c053 100644
--- a/test/CodeGen/R600/sra.ll
+++ b/test/CodeGen/R600/sra.ll
@@ -15,9 +15,9 @@
;VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1) * %in
- %b = load <2 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+ %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
%result = ashr <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -42,9 +42,9 @@ define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i
;VI: v_ashrrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1) * %in
- %b = load <4 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
%result = ashr <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
@@ -89,9 +89,9 @@ entry:
define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
entry:
- %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
- %a = load i64 addrspace(1) * %in
- %b = load i64 addrspace(1) * %b_ptr
+ %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
+ %a = load i64, i64 addrspace(1) * %in
+ %b = load i64, i64 addrspace(1) * %b_ptr
%result = ashr i64 %a, %b
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -132,9 +132,9 @@ entry:
;VI: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
- %a = load <2 x i64> addrspace(1) * %in
- %b = load <2 x i64> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
+ %a = load <2 x i64>, <2 x i64> addrspace(1) * %in
+ %b = load <2 x i64>, <2 x i64> addrspace(1) * %b_ptr
%result = ashr <2 x i64> %a, %b
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
ret void
@@ -203,9 +203,9 @@ define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i
;VI: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
- %a = load <4 x i64> addrspace(1) * %in
- %b = load <4 x i64> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
+ %a = load <4 x i64>, <4 x i64> addrspace(1) * %in
+ %b = load <4 x i64>, <4 x i64> addrspace(1) * %b_ptr
%result = ashr <4 x i64> %a, %b
store <4 x i64> %result, <4 x i64> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/srem.ll b/test/CodeGen/R600/srem.ll
index 2aa8c7452542..c78fd549b316 100644
--- a/test/CodeGen/R600/srem.ll
+++ b/test/CodeGen/R600/srem.ll
@@ -1,51 +1,112 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s
define void @srem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in
- %den = load i32 addrspace(1) * %den_ptr
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in
+ %den = load i32, i32 addrspace(1) * %den_ptr
%result = srem i32 %num, %den
store i32 %result, i32 addrspace(1)* %out
ret void
}
define void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %num = load i32 addrspace(1) * %in
+ %num = load i32, i32 addrspace(1) * %in
%result = srem i32 %num, 4
store i32 %result, i32 addrspace(1)* %out
ret void
}
+; FUNC-LABEL: {{^}}srem_i32_7:
+; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x92492493
+; SI: v_mul_hi_i32 {{v[0-9]+}}, [[MAGIC]],
+; SI: v_mul_lo_i32
+; SI: v_sub_i32
+; SI: s_endpgm
+define void @srem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %num = load i32, i32 addrspace(1) * %in
+ %result = srem i32 %num, 7
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
define void @srem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %den_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %num = load <2 x i32> addrspace(1) * %in
- %den = load <2 x i32> addrspace(1) * %den_ptr
+ %den_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
+ %den = load <2 x i32>, <2 x i32> addrspace(1) * %den_ptr
%result = srem <2 x i32> %num, %den
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
}
define void @srem_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %num = load <2 x i32> addrspace(1) * %in
+ %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
%result = srem <2 x i32> %num, <i32 4, i32 4>
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
}
define void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %den_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %num = load <4 x i32> addrspace(1) * %in
- %den = load <4 x i32> addrspace(1) * %den_ptr
+ %den_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %den = load <4 x i32>, <4 x i32> addrspace(1) * %den_ptr
%result = srem <4 x i32> %num, %den
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
}
define void @srem_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %num = load <4 x i32> addrspace(1) * %in
+ %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
%result = srem <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4>
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
}
+
+define void @srem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+ %den_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
+ %num = load i64, i64 addrspace(1) * %in
+ %den = load i64, i64 addrspace(1) * %den_ptr
+ %result = srem i64 %num, %den
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define void @srem_i64_4(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+ %num = load i64, i64 addrspace(1) * %in
+ %result = srem i64 %num, 4
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+define void @srem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+ %den_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
+ %num = load <2 x i64>, <2 x i64> addrspace(1) * %in
+ %den = load <2 x i64>, <2 x i64> addrspace(1) * %den_ptr
+ %result = srem <2 x i64> %num, %den
+ store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+define void @srem_v2i64_4(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+ %num = load <2 x i64>, <2 x i64> addrspace(1) * %in
+ %result = srem <2 x i64> %num, <i64 4, i64 4>
+ store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+define void @srem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+ %den_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
+ %num = load <4 x i64>, <4 x i64> addrspace(1) * %in
+ %den = load <4 x i64>, <4 x i64> addrspace(1) * %den_ptr
+ %result = srem <4 x i64> %num, %den
+ store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+define void @srem_v4i64_4(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+ %num = load <4 x i64>, <4 x i64> addrspace(1) * %in
+ %result = srem <4 x i64> %num, <i64 4, i64 4, i64 4, i64 4>
+ store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/srl.ll b/test/CodeGen/R600/srl.ll
index 9e7b35e8338a..4904d7fa1bd0 100644
--- a/test/CodeGen/R600/srl.ll
+++ b/test/CodeGen/R600/srl.ll
@@ -1,13 +1,15 @@
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}lshr_i32:
; SI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %a = load i32 addrspace(1)* %in
- %b = load i32 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1)* %in
+ %b = load i32, i32 addrspace(1)* %b_ptr
%result = lshr i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -17,12 +19,15 @@ define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1)* %in
- %b = load <2 x i32> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
+ %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
%result = lshr <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -34,14 +39,19 @@ define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i
; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; VI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1)* %in
- %b = load <4 x i32> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
%result = lshr <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
@@ -49,6 +59,7 @@ define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
; FUNC-LABEL: {{^}}lshr_i64:
; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
; EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
; EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
@@ -62,9 +73,9 @@ define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
; EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
- %a = load i64 addrspace(1)* %in
- %b = load i64 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
+ %a = load i64, i64 addrspace(1)* %in
+ %b = load i64, i64 addrspace(1)* %b_ptr
%result = lshr i64 %a, %b
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -74,6 +85,9 @@ define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
+; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
+
; EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
; EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
; EG-DAG: LSHL {{\*? *}}[[COMPSHA]]
@@ -97,9 +111,9 @@ define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
- %a = load <2 x i64> addrspace(1)* %in
- %b = load <2 x i64> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
+ %a = load <2 x i64>, <2 x i64> addrspace(1)* %in
+ %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
%result = lshr <2 x i64> %a, %b
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
ret void
@@ -111,6 +125,11 @@ define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i
; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
+; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
+; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
+; VI: v_lshrrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}}
+
; EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
; EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
; EG-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
@@ -158,9 +177,9 @@ define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i
; EG-DAG: CNDE_INT
; EG-DAG: CNDE_INT
define void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
- %a = load <4 x i64> addrspace(1)* %in
- %b = load <4 x i64> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
+ %a = load <4 x i64>, <4 x i64> addrspace(1)* %in
+ %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
%result = lshr <4 x i64> %a, %b
store <4 x i64> %result, <4 x i64> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/ssubo.ll b/test/CodeGen/R600/ssubo.ll
index 09d3959b2b3d..26884a1b7761 100644
--- a/test/CodeGen/R600/ssubo.ll
+++ b/test/CodeGen/R600/ssubo.ll
@@ -28,8 +28,8 @@ define void @s_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
; FUNC-LABEL: {{^}}v_ssubo_i32:
define void @v_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %a = load i32 addrspace(1)* %aptr, align 4
- %b = load i32 addrspace(1)* %bptr, align 4
+ %a = load i32, i32 addrspace(1)* %aptr, align 4
+ %b = load i32, i32 addrspace(1)* %bptr, align 4
%ssub = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind
%val = extractvalue { i32, i1 } %ssub, 0
%carry = extractvalue { i32, i1 } %ssub, 1
@@ -54,8 +54,8 @@ define void @s_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64
; SI: v_sub_i32_e32
; SI: v_subb_u32_e32
define void @v_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %a = load i64 addrspace(1)* %aptr, align 4
- %b = load i64 addrspace(1)* %bptr, align 4
+ %a = load i64, i64 addrspace(1)* %aptr, align 4
+ %b = load i64, i64 addrspace(1)* %bptr, align 4
%ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %ssub, 0
%carry = extractvalue { i64, i1 } %ssub, 1
diff --git a/test/CodeGen/R600/store-barrier.ll b/test/CodeGen/R600/store-barrier.ll
index 350b006ba5e0..4a72b4d090ad 100644
--- a/test/CodeGen/R600/store-barrier.ll
+++ b/test/CodeGen/R600/store-barrier.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck --check-prefix=CHECK %s
-; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck --check-prefix=CHECK %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck --check-prefix=CHECK %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck --check-prefix=CHECK %s
; This test is for a bug in the machine scheduler where stores without
; an underlying object would be moved across the barrier. In this
@@ -14,24 +14,24 @@
; Function Attrs: nounwind
define void @test(<2 x i8> addrspace(3)* nocapture %arg, <2 x i8> addrspace(1)* nocapture readonly %arg1, i32 addrspace(1)* nocapture readonly %arg2, <2 x i8> addrspace(1)* nocapture %arg3, i32 %arg4, i64 %tmp9) {
bb:
- %tmp10 = getelementptr inbounds i32 addrspace(1)* %arg2, i64 %tmp9
- %tmp13 = load i32 addrspace(1)* %tmp10, align 2
- %tmp14 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp13
- %tmp15 = load <2 x i8> addrspace(3)* %tmp14, align 2
+ %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp9
+ %tmp13 = load i32, i32 addrspace(1)* %tmp10, align 2
+ %tmp14 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp13
+ %tmp15 = load <2 x i8>, <2 x i8> addrspace(3)* %tmp14, align 2
%tmp16 = add i32 %tmp13, 1
- %tmp17 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp16
+ %tmp17 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp16
store <2 x i8> %tmp15, <2 x i8> addrspace(3)* %tmp17, align 2
tail call void @llvm.AMDGPU.barrier.local() #2
- %tmp25 = load i32 addrspace(1)* %tmp10, align 4
+ %tmp25 = load i32, i32 addrspace(1)* %tmp10, align 4
%tmp26 = sext i32 %tmp25 to i64
%tmp27 = sext i32 %arg4 to i64
- %tmp28 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 %arg4
- %tmp29 = load i8 addrspace(3)* %tmp28, align 1
- %tmp30 = getelementptr inbounds <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 %tmp27
+ %tmp28 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 %arg4
+ %tmp29 = load i8, i8 addrspace(3)* %tmp28, align 1
+ %tmp30 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 %tmp27
store i8 %tmp29, i8 addrspace(1)* %tmp30, align 1
- %tmp32 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 0
- %tmp33 = load i8 addrspace(3)* %tmp32, align 1
- %tmp35 = getelementptr inbounds <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 0
+ %tmp32 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 0
+ %tmp33 = load i8, i8 addrspace(3)* %tmp32, align 1
+ %tmp35 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 0
store i8 %tmp33, i8 addrspace(1)* %tmp35, align 1
ret void
}
diff --git a/test/CodeGen/R600/store-v3i64.ll b/test/CodeGen/R600/store-v3i64.ll
index 4db9b67e0118..e0c554ad2c17 100644
--- a/test/CodeGen/R600/store-v3i64.ll
+++ b/test/CodeGen/R600/store-v3i64.ll
@@ -1,6 +1,6 @@
; XFAIL: *
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
; SI-LABEL: {{^}}global_store_v3i64:
; SI: buffer_store_dwordx4
diff --git a/test/CodeGen/R600/store-vector-ptrs.ll b/test/CodeGen/R600/store-vector-ptrs.ll
index ba4d94f73245..d5af3b29118a 100644
--- a/test/CodeGen/R600/store-vector-ptrs.ll
+++ b/test/CodeGen/R600/store-vector-ptrs.ll
@@ -6,7 +6,7 @@
; scratch loads and stores.
; CHECK-LABEL: {{^}}store_vector_ptrs:
define void @store_vector_ptrs(<4 x i32*>* %out, <4 x [1024 x i32]*> %array) nounwind {
- %p = getelementptr <4 x [1024 x i32]*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
+ %p = getelementptr [1024 x i32], <4 x [1024 x i32]*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
store <4 x i32*> %p, <4 x i32*>* %out
ret void
}
diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll
index e4cb31365786..0f89405e073b 100644
--- a/test/CodeGen/R600/store.ll
+++ b/test/CodeGen/R600/store.ll
@@ -16,7 +16,7 @@ entry:
}
; i8 store
-; EG-LABEL: {{^}}store_i8:
+; FUNC-LABEL: {{^}}store_i8:
; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
; IG 0: Get the byte index and truncate the value
@@ -37,7 +37,6 @@ entry:
; EG: MOV T[[RW_GPR]].Y, 0.0
; EG: MOV * T[[RW_GPR]].Z, 0.0
-; SI-LABEL: {{^}}store_i8:
; SI: buffer_store_byte
define void @store_i8(i8 addrspace(1)* %out, i8 %in) {
@@ -47,7 +46,7 @@ entry:
}
; i16 store
-; EG-LABEL: {{^}}store_i16:
+; FUNC-LABEL: {{^}}store_i16:
; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
; IG 0: Get the byte index and truncate the value
@@ -71,7 +70,6 @@ entry:
; EG: MOV T[[RW_GPR]].Y, 0.0
; EG: MOV * T[[RW_GPR]].Z, 0.0
-; SI-LABEL: {{^}}store_i16:
; SI: buffer_store_short
define void @store_i16(i16 addrspace(1)* %out, i16 %in) {
entry:
@@ -79,10 +77,10 @@ entry:
ret void
}
-; EG-LABEL: {{^}}store_v2i8:
+; FUNC-LABEL: {{^}}store_v2i8:
; EG: MEM_RAT MSKOR
; EG-NOT: MEM_RAT MSKOR
-; SI-LABEL: {{^}}store_v2i8:
+
; SI: buffer_store_byte
; SI: buffer_store_byte
define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
@@ -93,11 +91,11 @@ entry:
}
-; EG-LABEL: {{^}}store_v2i16:
+; FUNC-LABEL: {{^}}store_v2i16:
; EG: MEM_RAT_CACHELESS STORE_RAW
-; CM-LABEL: {{^}}store_v2i16:
+
; CM: MEM_RAT_CACHELESS STORE_DWORD
-; SI-LABEL: {{^}}store_v2i16:
+
; SI: buffer_store_short
; SI: buffer_store_short
define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
@@ -107,11 +105,11 @@ entry:
ret void
}
-; EG-LABEL: {{^}}store_v4i8:
+; FUNC-LABEL: {{^}}store_v4i8:
; EG: MEM_RAT_CACHELESS STORE_RAW
-; CM-LABEL: {{^}}store_v4i8:
+
; CM: MEM_RAT_CACHELESS STORE_DWORD
-; SI-LABEL: {{^}}store_v4i8:
+
; SI: buffer_store_byte
; SI: buffer_store_byte
; SI: buffer_store_byte
@@ -124,11 +122,11 @@ entry:
}
; floating-point store
-; EG-LABEL: {{^}}store_f32:
+; FUNC-LABEL: {{^}}store_f32:
; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1
-; CM-LABEL: {{^}}store_f32:
+
; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-LABEL: {{^}}store_f32:
+
; SI: buffer_store_dword
define void @store_f32(float addrspace(1)* %out, float %in) {
@@ -136,13 +134,13 @@ define void @store_f32(float addrspace(1)* %out, float %in) {
ret void
}
-; EG-LABEL: {{^}}store_v4i16:
+; FUNC-LABEL: {{^}}store_v4i16:
; EG: MEM_RAT MSKOR
; EG: MEM_RAT MSKOR
; EG: MEM_RAT MSKOR
; EG: MEM_RAT MSKOR
; EG-NOT: MEM_RAT MSKOR
-; SI-LABEL: {{^}}store_v4i16:
+
; SI: buffer_store_short
; SI: buffer_store_short
; SI: buffer_store_short
@@ -156,11 +154,11 @@ entry:
}
; vec2 floating-point stores
-; EG-LABEL: {{^}}store_v2f32:
+; FUNC-LABEL: {{^}}store_v2f32:
; EG: MEM_RAT_CACHELESS STORE_RAW
-; CM-LABEL: {{^}}store_v2f32:
+
; CM: MEM_RAT_CACHELESS STORE_DWORD
-; SI-LABEL: {{^}}store_v2f32:
+
; SI: buffer_store_dwordx2
define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) {
@@ -171,13 +169,13 @@ entry:
ret void
}
-; EG-LABEL: {{^}}store_v4i32:
+; FUNC-LABEL: {{^}}store_v4i32:
; EG: MEM_RAT_CACHELESS STORE_RAW
; EG-NOT: MEM_RAT_CACHELESS STORE_RAW
-; CM-LABEL: {{^}}store_v4i32:
+
; CM: MEM_RAT_CACHELESS STORE_DWORD
; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
-; SI-LABEL: {{^}}store_v4i32:
+
; SI: buffer_store_dwordx4
define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
entry:
@@ -218,29 +216,29 @@ entry:
ret void
}
-; EG-LABEL: {{^}}store_local_i8:
+; FUNC-LABEL: {{^}}store_local_i8:
; EG: LDS_BYTE_WRITE
-; SI-LABEL: {{^}}store_local_i8:
+
; SI: ds_write_b8
define void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
store i8 %in, i8 addrspace(3)* %out
ret void
}
-; EG-LABEL: {{^}}store_local_i16:
+; FUNC-LABEL: {{^}}store_local_i16:
; EG: LDS_SHORT_WRITE
-; SI-LABEL: {{^}}store_local_i16:
+
; SI: ds_write_b16
define void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
store i16 %in, i16 addrspace(3)* %out
ret void
}
-; EG-LABEL: {{^}}store_local_v2i16:
+; FUNC-LABEL: {{^}}store_local_v2i16:
; EG: LDS_WRITE
-; CM-LABEL: {{^}}store_local_v2i16:
+
; CM: LDS_WRITE
-; SI-LABEL: {{^}}store_local_v2i16:
+
; SI: ds_write_b16
; SI: ds_write_b16
define void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
@@ -249,11 +247,11 @@ entry:
ret void
}
-; EG-LABEL: {{^}}store_local_v4i8:
+; FUNC-LABEL: {{^}}store_local_v4i8:
; EG: LDS_WRITE
-; CM-LABEL: {{^}}store_local_v4i8:
+
; CM: LDS_WRITE
-; SI-LABEL: {{^}}store_local_v4i8:
+
; SI: ds_write_b8
; SI: ds_write_b8
; SI: ds_write_b8
@@ -264,13 +262,13 @@ entry:
ret void
}
-; EG-LABEL: {{^}}store_local_v2i32:
+; FUNC-LABEL: {{^}}store_local_v2i32:
; EG: LDS_WRITE
; EG: LDS_WRITE
-; CM-LABEL: {{^}}store_local_v2i32:
+
; CM: LDS_WRITE
; CM: LDS_WRITE
-; SI-LABEL: {{^}}store_local_v2i32:
+
; SI: ds_write_b64
define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
entry:
@@ -278,17 +276,17 @@ entry:
ret void
}
-; EG-LABEL: {{^}}store_local_v4i32:
+; FUNC-LABEL: {{^}}store_local_v4i32:
; EG: LDS_WRITE
; EG: LDS_WRITE
; EG: LDS_WRITE
; EG: LDS_WRITE
-; CM-LABEL: {{^}}store_local_v4i32:
+
; CM: LDS_WRITE
; CM: LDS_WRITE
; CM: LDS_WRITE
; CM: LDS_WRITE
-; SI-LABEL: {{^}}store_local_v4i32:
+
; SI: ds_write_b32
; SI: ds_write_b32
; SI: ds_write_b32
@@ -326,19 +324,19 @@ entry:
; Evergreen / Northern Islands don't support 64-bit stores yet, so there should
; be two 32-bit stores.
-; EG-LABEL: {{^}}vecload2:
+; FUNC-LABEL: {{^}}vecload2:
; EG: MEM_RAT_CACHELESS STORE_RAW
-; CM-LABEL: {{^}}vecload2:
+
; CM: MEM_RAT_CACHELESS STORE_DWORD
-; SI-LABEL: {{^}}vecload2:
+
; SI: buffer_store_dwordx2
define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
entry:
- %0 = load i32 addrspace(2)* %mem, align 4
- %arrayidx1.i = getelementptr inbounds i32 addrspace(2)* %mem, i64 1
- %1 = load i32 addrspace(2)* %arrayidx1.i, align 4
+ %0 = load i32, i32 addrspace(2)* %mem, align 4
+ %arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1
+ %1 = load i32, i32 addrspace(2)* %arrayidx1.i, align 4
store i32 %0, i32 addrspace(1)* %out, align 4
- %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %out, i64 1
+ %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
ret void
}
@@ -357,16 +355,15 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
; CM: STORE_DWORD
; CM: STORE_DWORD
; CM: STORE_DWORD
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx4
define void @i128-const-store(i32 addrspace(1)* %out) {
entry:
store i32 1, i32 addrspace(1)* %out, align 4
- %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %out, i64 1
+ %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
store i32 1, i32 addrspace(1)* %arrayidx2, align 4
- %arrayidx4 = getelementptr inbounds i32 addrspace(1)* %out, i64 2
+ %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
store i32 2, i32 addrspace(1)* %arrayidx4, align 4
- %arrayidx6 = getelementptr inbounds i32 addrspace(1)* %out, i64 3
+ %arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
store i32 2, i32 addrspace(1)* %arrayidx6, align 4
ret void
}
diff --git a/test/CodeGen/R600/store.r600.ll b/test/CodeGen/R600/store.r600.ll
index 21972603cac9..696fb033b5ec 100644
--- a/test/CodeGen/R600/store.r600.ll
+++ b/test/CodeGen/R600/store.r600.ll
@@ -7,7 +7,7 @@
; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %1 = load <4 x i32> addrspace(1) * %in
+ %1 = load <4 x i32>, <4 x i32> addrspace(1) * %in
store <4 x i32> %1, <4 x i32> addrspace(1)* %out
ret void
}
@@ -16,7 +16,7 @@ define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %
; EG: {{^}}store_v4f32:
; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
define void @store_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
- %1 = load <4 x float> addrspace(1) * %in
+ %1 = load <4 x float>, <4 x float> addrspace(1) * %in
store <4 x float> %1, <4 x float> addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll
index be48e186e870..b7fba0efa5b2 100644
--- a/test/CodeGen/R600/sub.ll
+++ b/test/CodeGen/R600/sub.ll
@@ -9,9 +9,9 @@ declare i32 @llvm.r600.read.tidig.x() readnone
; SI: v_subrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %a = load i32 addrspace(1)* %in
- %b = load i32 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1)* %in
+ %b = load i32, i32 addrspace(1)* %b_ptr
%result = sub i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -26,9 +26,9 @@ define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1) * %in
- %b = load <2 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+ %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
%result = sub <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -46,9 +46,9 @@ define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)
; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1) * %in
- %b = load <4 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
%result = sub <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
@@ -58,11 +58,13 @@ define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)
; SI: s_sub_u32
; SI: s_subb_u32
-; EG-DAG: SETGE_UINT
-; EG-DAG: CNDE_INT
-; EG-DAG: SUB_INT
-; EG-DAG: SUB_INT
+; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.[XYZW]]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[HI:T[0-9]+\.[XYZW]]]
+; EG-DAG: SUB_INT {{[* ]*}}[[LO]]
+; EG-DAG: SUBB_UINT
; EG-DAG: SUB_INT
+; EG-DAG: SUB_INT {{[* ]*}}[[HI]]
+; EG-NOT: SUB
define void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind {
%result = sub i64 %a, %b
store i64 %result, i64 addrspace(1)* %out, align 8
@@ -73,17 +75,19 @@ define void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind
; SI: v_sub_i32_e32
; SI: v_subb_u32_e32
-; EG-DAG: SETGE_UINT
-; EG-DAG: CNDE_INT
-; EG-DAG: SUB_INT
-; EG-DAG: SUB_INT
+; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.[XYZW]]]
+; EG: MEM_RAT_CACHELESS STORE_RAW [[HI:T[0-9]+\.[XYZW]]]
+; EG-DAG: SUB_INT {{[* ]*}}[[LO]]
+; EG-DAG: SUBB_UINT
; EG-DAG: SUB_INT
+; EG-DAG: SUB_INT {{[* ]*}}[[HI]]
+; EG-NOT: SUB
define void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
- %a_ptr = getelementptr i64 addrspace(1)* %inA, i32 %tid
- %b_ptr = getelementptr i64 addrspace(1)* %inB, i32 %tid
- %a = load i64 addrspace(1)* %a_ptr
- %b = load i64 addrspace(1)* %b_ptr
+ %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid
+ %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid
+ %a = load i64, i64 addrspace(1)* %a_ptr
+ %b = load i64, i64 addrspace(1)* %b_ptr
%result = sub i64 %a, %b
store i64 %result, i64 addrspace(1)* %out, align 8
ret void
@@ -96,10 +100,10 @@ define void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias
; SI: v_subb_u32_e32
define void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
- %a_ptr = getelementptr <2 x i64> addrspace(1)* %inA, i32 %tid
- %b_ptr = getelementptr <2 x i64> addrspace(1)* %inB, i32 %tid
- %a = load <2 x i64> addrspace(1)* %a_ptr
- %b = load <2 x i64> addrspace(1)* %b_ptr
+ %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid
+ %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid
+ %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr
+ %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
%result = sub <2 x i64> %a, %b
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
ret void
@@ -116,10 +120,10 @@ define void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(
; SI: v_subb_u32_e32
define void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* noalias %inA, <4 x i64> addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
- %a_ptr = getelementptr <4 x i64> addrspace(1)* %inA, i32 %tid
- %b_ptr = getelementptr <4 x i64> addrspace(1)* %inB, i32 %tid
- %a = load <4 x i64> addrspace(1)* %a_ptr
- %b = load <4 x i64> addrspace(1)* %b_ptr
+ %a_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inA, i32 %tid
+ %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inB, i32 %tid
+ %a = load <4 x i64>, <4 x i64> addrspace(1)* %a_ptr
+ %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
%result = sub <4 x i64> %a, %b
store <4 x i64> %result, <4 x i64> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/subreg-coalescer-crash.ll b/test/CodeGen/R600/subreg-coalescer-crash.ll
index a9eec7908b6c..c4dae4736cfa 100644
--- a/test/CodeGen/R600/subreg-coalescer-crash.ll
+++ b/test/CodeGen/R600/subreg-coalescer-crash.ll
@@ -1,10 +1,10 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -o - %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - %s
-; ModuleID = 'bugpoint-reduced-simplified.bc'
+; SI-LABEL:{{^}}row_filter_C1_D0:
; SI: s_endpgm
; Function Attrs: nounwind
-define void @row_filter_C1_D0() #0 {
+define void @row_filter_C1_D0() {
entry:
br i1 undef, label %for.inc.1, label %do.body.preheader
@@ -42,3 +42,68 @@ for.inc.1: ; preds = %do.body.1562.prehea
unreachable
}
+; SI-LABEL: {{^}}foo:
+; SI: s_endpgm
+define void @foo() #0 {
+bb:
+ br i1 undef, label %bb2, label %bb1
+
+bb1: ; preds = %bb
+ br i1 undef, label %bb4, label %bb6
+
+bb2: ; preds = %bb4, %bb
+ %tmp = phi float [ %tmp5, %bb4 ], [ 0.000000e+00, %bb ]
+ br i1 undef, label %bb9, label %bb13
+
+bb4: ; preds = %bb7, %bb6, %bb1
+ %tmp5 = phi float [ undef, %bb1 ], [ undef, %bb6 ], [ %tmp8, %bb7 ]
+ br label %bb2
+
+bb6: ; preds = %bb1
+ br i1 undef, label %bb7, label %bb4
+
+bb7: ; preds = %bb6
+ %tmp8 = fmul float undef, undef
+ br label %bb4
+
+bb9: ; preds = %bb2
+ %tmp10 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 2)
+ %tmp11 = extractelement <4 x float> %tmp10, i32 1
+ %tmp12 = extractelement <4 x float> %tmp10, i32 3
+ br label %bb14
+
+bb13: ; preds = %bb2
+ br i1 undef, label %bb23, label %bb24
+
+bb14: ; preds = %bb27, %bb24, %bb9
+ %tmp15 = phi float [ %tmp12, %bb9 ], [ undef, %bb27 ], [ 0.000000e+00, %bb24 ]
+ %tmp16 = phi float [ %tmp11, %bb9 ], [ undef, %bb27 ], [ %tmp25, %bb24 ]
+ %tmp17 = fmul float 10.5, %tmp16
+ %tmp18 = fmul float 11.5, %tmp15
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp18, float %tmp17, float %tmp17, float %tmp17)
+ ret void
+
+bb23: ; preds = %bb13
+ br i1 undef, label %bb24, label %bb26
+
+bb24: ; preds = %bb26, %bb23, %bb13
+ %tmp25 = phi float [ %tmp, %bb13 ], [ %tmp, %bb26 ], [ 0.000000e+00, %bb23 ]
+ br i1 undef, label %bb27, label %bb14
+
+bb26: ; preds = %bb23
+ br label %bb24
+
+bb27: ; preds = %bb24
+ br label %bb14
+}
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.SI.packf16(float, float) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/swizzle-export.ll b/test/CodeGen/R600/swizzle-export.ll
index 5eaca7675237..000ee2faa478 100644
--- a/test/CodeGen/R600/swizzle-export.ll
+++ b/test/CodeGen/R600/swizzle-export.ll
@@ -12,56 +12,56 @@ main_body:
%1 = extractelement <4 x float> %reg1, i32 1
%2 = extractelement <4 x float> %reg1, i32 2
%3 = extractelement <4 x float> %reg1, i32 3
- %4 = load <4 x float> addrspace(8)* null
+ %4 = load <4 x float>, <4 x float> addrspace(8)* null
%5 = extractelement <4 x float> %4, i32 1
- %6 = load <4 x float> addrspace(8)* null
+ %6 = load <4 x float>, <4 x float> addrspace(8)* null
%7 = extractelement <4 x float> %6, i32 2
- %8 = load <4 x float> addrspace(8)* null
+ %8 = load <4 x float>, <4 x float> addrspace(8)* null
%9 = extractelement <4 x float> %8, i32 0
%10 = fmul float 0.000000e+00, %9
- %11 = load <4 x float> addrspace(8)* null
+ %11 = load <4 x float>, <4 x float> addrspace(8)* null
%12 = extractelement <4 x float> %11, i32 0
%13 = fmul float %5, %12
- %14 = load <4 x float> addrspace(8)* null
+ %14 = load <4 x float>, <4 x float> addrspace(8)* null
%15 = extractelement <4 x float> %14, i32 0
%16 = fmul float 0.000000e+00, %15
- %17 = load <4 x float> addrspace(8)* null
+ %17 = load <4 x float>, <4 x float> addrspace(8)* null
%18 = extractelement <4 x float> %17, i32 0
%19 = fmul float 0.000000e+00, %18
- %20 = load <4 x float> addrspace(8)* null
+ %20 = load <4 x float>, <4 x float> addrspace(8)* null
%21 = extractelement <4 x float> %20, i32 0
%22 = fmul float %7, %21
- %23 = load <4 x float> addrspace(8)* null
+ %23 = load <4 x float>, <4 x float> addrspace(8)* null
%24 = extractelement <4 x float> %23, i32 0
%25 = fmul float 0.000000e+00, %24
- %26 = load <4 x float> addrspace(8)* null
+ %26 = load <4 x float>, <4 x float> addrspace(8)* null
%27 = extractelement <4 x float> %26, i32 0
%28 = fmul float 0.000000e+00, %27
- %29 = load <4 x float> addrspace(8)* null
+ %29 = load <4 x float>, <4 x float> addrspace(8)* null
%30 = extractelement <4 x float> %29, i32 0
%31 = fmul float 0.000000e+00, %30
- %32 = load <4 x float> addrspace(8)* null
+ %32 = load <4 x float>, <4 x float> addrspace(8)* null
%33 = extractelement <4 x float> %32, i32 0
%34 = fmul float 0.000000e+00, %33
- %35 = load <4 x float> addrspace(8)* null
+ %35 = load <4 x float>, <4 x float> addrspace(8)* null
%36 = extractelement <4 x float> %35, i32 0
%37 = fmul float 0.000000e+00, %36
- %38 = load <4 x float> addrspace(8)* null
+ %38 = load <4 x float>, <4 x float> addrspace(8)* null
%39 = extractelement <4 x float> %38, i32 0
%40 = fmul float 1.000000e+00, %39
- %41 = load <4 x float> addrspace(8)* null
+ %41 = load <4 x float>, <4 x float> addrspace(8)* null
%42 = extractelement <4 x float> %41, i32 0
%43 = fmul float 0.000000e+00, %42
- %44 = load <4 x float> addrspace(8)* null
+ %44 = load <4 x float>, <4 x float> addrspace(8)* null
%45 = extractelement <4 x float> %44, i32 0
%46 = fmul float 0.000000e+00, %45
- %47 = load <4 x float> addrspace(8)* null
+ %47 = load <4 x float>, <4 x float> addrspace(8)* null
%48 = extractelement <4 x float> %47, i32 0
%49 = fmul float 0.000000e+00, %48
- %50 = load <4 x float> addrspace(8)* null
+ %50 = load <4 x float>, <4 x float> addrspace(8)* null
%51 = extractelement <4 x float> %50, i32 0
%52 = fmul float 0.000000e+00, %51
- %53 = load <4 x float> addrspace(8)* null
+ %53 = load <4 x float>, <4 x float> addrspace(8)* null
%54 = extractelement <4 x float> %53, i32 0
%55 = fmul float 1.000000e+00, %54
%56 = insertelement <4 x float> undef, float %0, i32 0
@@ -102,12 +102,12 @@ main_body:
%1 = extractelement <4 x float> %reg1, i32 1
%2 = fadd float %0, 2.5
%3 = fmul float %1, 3.5
- %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+ %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%5 = extractelement <4 x float> %4, i32 0
%6 = call float @llvm.cos.f32(float %5)
- %7 = load <4 x float> addrspace(8)* null
+ %7 = load <4 x float>, <4 x float> addrspace(8)* null
%8 = extractelement <4 x float> %7, i32 0
- %9 = load <4 x float> addrspace(8)* null
+ %9 = load <4 x float>, <4 x float> addrspace(8)* null
%10 = extractelement <4 x float> %9, i32 1
%11 = insertelement <4 x float> undef, float %2, i32 0
%12 = insertelement <4 x float> %11, float %3, i32 1
diff --git a/test/CodeGen/R600/trunc-cmp-constant.ll b/test/CodeGen/R600/trunc-cmp-constant.ll
index 67a9aaffb6ff..dac74728b3ce 100644
--- a/test/CodeGen/R600/trunc-cmp-constant.ll
+++ b/test/CodeGen/R600/trunc-cmp-constant.ll
@@ -1,15 +1,15 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL {{^}}sextload_i1_to_i32_trunc_cmp_eq_0:
; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
-; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[TMP]], 1{{$}}
-; SI: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -1{{$}}
+; SI: v_cmp_eq_i32_e32 vcc, 1, [[TMP]]{{$}}
+; SI: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, vcc, -1{{$}}
; SI: v_cndmask_b32_e64
; SI: buffer_store_byte
define void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = sext i1 %load to i32
%cmp = icmp eq i32 %ext, 0
store i1 %cmp, i1 addrspace(1)* %out
@@ -20,12 +20,12 @@ define void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspa
; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_0:
; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
-; SI: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 1{{$}}
-; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], [[CMP0]], -1
+; SI: v_cmp_eq_i32_e32 vcc, 1, [[TMP]]{{$}}
+; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], vcc, -1
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
; SI-NEXT: buffer_store_byte [[RESULT]]
define void @zextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = zext i1 %load to i32
%cmp = icmp eq i32 %ext, 0
store i1 %cmp, i1 addrspace(1)* %out
@@ -34,9 +34,9 @@ define void @zextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspa
; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_1:
; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
-; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI: buffer_store_byte [[RESULT]]
define void @sextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = sext i1 %load to i32
%cmp = icmp eq i32 %ext, 1
store i1 %cmp, i1 addrspace(1)* %out
@@ -48,7 +48,7 @@ define void @sextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspa
; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
; SI-NEXT: buffer_store_byte [[RESULT]]
define void @zextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = zext i1 %load to i32
%cmp = icmp eq i32 %ext, 1
store i1 %cmp, i1 addrspace(1)* %out
@@ -60,7 +60,7 @@ define void @zextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspa
; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
; SI-NEXT: buffer_store_byte [[RESULT]]
define void @sextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = sext i1 %load to i32
%cmp = icmp eq i32 %ext, -1
store i1 %cmp, i1 addrspace(1)* %out
@@ -69,9 +69,9 @@ define void @sextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addr
; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_neg1:
; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
-; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI: buffer_store_byte [[RESULT]]
define void @zextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = zext i1 %load to i32
%cmp = icmp eq i32 %ext, -1
store i1 %cmp, i1 addrspace(1)* %out
@@ -84,7 +84,7 @@ define void @zextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addr
; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
; SI-NEXT: buffer_store_byte [[RESULT]]
define void @sextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = sext i1 %load to i32
%cmp = icmp ne i32 %ext, 0
store i1 %cmp, i1 addrspace(1)* %out
@@ -96,7 +96,7 @@ define void @sextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspa
; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
; SI-NEXT: buffer_store_byte [[RESULT]]
define void @zextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = zext i1 %load to i32
%cmp = icmp ne i32 %ext, 0
store i1 %cmp, i1 addrspace(1)* %out
@@ -105,9 +105,9 @@ define void @zextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspa
; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_1:
; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
-; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI: buffer_store_byte [[RESULT]]
define void @sextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = sext i1 %load to i32
%cmp = icmp ne i32 %ext, 1
store i1 %cmp, i1 addrspace(1)* %out
@@ -117,12 +117,12 @@ define void @sextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspa
; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_1:
; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
-; SI: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 1{{$}}
-; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], [[CMP0]], -1
+; SI: v_cmp_eq_i32_e32 vcc, 1, [[TMP]]{{$}}
+; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], vcc, -1
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
; SI-NEXT: buffer_store_byte [[RESULT]]
define void @zextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = zext i1 %load to i32
%cmp = icmp ne i32 %ext, 1
store i1 %cmp, i1 addrspace(1)* %out
@@ -137,7 +137,7 @@ define void @zextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspa
; XSI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP0]]
; XSI-NEXT: buffer_store_byte [[RESULT]]
define void @sextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = sext i1 %load to i32
%cmp = icmp ne i32 %ext, -1
store i1 %cmp, i1 addrspace(1)* %out
@@ -146,9 +146,9 @@ define void @sextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addr
; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_neg1:
; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
-; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI: buffer_store_byte [[RESULT]]
define void @zextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
- %load = load i1 addrspace(1)* %in
+ %load = load i1, i1 addrspace(1)* %in
%ext = zext i1 %load to i32
%cmp = icmp ne i32 %ext, -1
store i1 %cmp, i1 addrspace(1)* %out
@@ -157,11 +157,11 @@ define void @zextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addr
; FUNC-LABEL: {{^}}masked_load_i1_to_i32_trunc_cmp_ne_neg1:
; SI: buffer_load_sbyte [[LOAD:v[0-9]+]]
-; SI: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[LOAD]], -1{{$}}
+; SI: v_cmp_ne_i32_e32 vcc, -1, [[LOAD]]{{$}}
; SI-NEXT: v_cndmask_b32_e64
; SI-NEXT: buffer_store_byte
define void @masked_load_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
- %load = load i8 addrspace(1)* %in
+ %load = load i8, i8 addrspace(1)* %in
%masked = and i8 %load, 255
%ext = sext i8 %masked to i32
%cmp = icmp ne i32 %ext, -1
diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/R600/trunc.ll
index bc00db7dbeef..bf690ca4cb28 100644
--- a/test/CodeGen/R600/trunc.ll
+++ b/test/CodeGen/R600/trunc.ll
@@ -36,6 +36,8 @@ define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) {
; SI: s_lshl_b64 s{{\[}}[[LO_SHL:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_SREG]]:{{[0-9]+\]}}, 2
; SI: s_add_u32 s[[LO_SREG2:[0-9]+]], s[[LO_SHL]],
; SI: s_addc_u32
+; SI: v_mov_b32_e32
+; SI: v_mov_b32_e32
; SI: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]]
; SI: buffer_store_dword v[[LO_VREG]],
define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) {
@@ -51,7 +53,7 @@ define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64
; SI: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; SI: v_cmp_eq_i32
define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) {
- %a = load i32 addrspace(1)* %ptr, align 4
+ %a = load i32, i32 addrspace(1)* %ptr, align 4
%trunc = trunc i32 %a to i1
%result = select i1 %trunc, i32 1, i32 0
store i32 %result, i32 addrspace(1)* %out, align 4
@@ -71,8 +73,8 @@ define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
; SI-LABEL: {{^}}s_trunc_i64_to_i1:
; SI: s_load_dwordx2 s{{\[}}[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb
; SI: v_and_b32_e64 [[MASKED:v[0-9]+]], 1, s[[SLO]]
-; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[MASKED]], 1
-; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, [[CMP]]
+; SI: v_cmp_eq_i32_e32 vcc, 1, [[MASKED]]
+; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, vcc
define void @s_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 %x) {
%trunc = trunc i64 %x to i1
%sel = select i1 %trunc, i32 63, i32 -12
@@ -83,13 +85,13 @@ define void @s_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 %x) {
; SI-LABEL: {{^}}v_trunc_i64_to_i1:
; SI: buffer_load_dwordx2 v{{\[}}[[VLO:[0-9]+]]:{{[0-9]+\]}}
; SI: v_and_b32_e32 [[MASKED:v[0-9]+]], 1, v[[VLO]]
-; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[MASKED]], 1
-; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, [[CMP]]
+; SI: v_cmp_eq_i32_e32 vcc, 1, [[MASKED]]
+; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, vcc
define void @v_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr i64 addrspace(1)* %in, i32 %tid
- %out.gep = getelementptr i32 addrspace(1)* %out, i32 %tid
- %x = load i64 addrspace(1)* %gep
+ %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %x = load i64, i64 addrspace(1)* %gep
%trunc = trunc i64 %x to i1
%sel = select i1 %trunc, i32 63, i32 -12
diff --git a/test/CodeGen/R600/tti-unroll-prefs.ll b/test/CodeGen/R600/tti-unroll-prefs.ll
index 0009c42f79bc..76c32afc1f21 100644
--- a/test/CodeGen/R600/tti-unroll-prefs.ll
+++ b/test/CodeGen/R600/tti-unroll-prefs.ll
@@ -39,7 +39,7 @@ if.then4: ; preds = %if.then4.lr.ph, %if
%add2 = add nsw i32 %b.addr.014, 1
%1 = sext i32 %b.addr.014 to i64
%add.ptr.sum = add nsw i64 %1, %0
- %add.ptr5 = getelementptr inbounds i8 addrspace(1)* %dst, i64 %add.ptr.sum
+ %add.ptr5 = getelementptr inbounds i8, i8 addrspace(1)* %dst, i64 %add.ptr.sum
store i8 0, i8 addrspace(1)* %add.ptr5, align 1
%inc = add nsw i32 %i.015, 1
%cmp1 = icmp slt i32 %inc, 4
diff --git a/test/CodeGen/R600/uaddo.ll b/test/CodeGen/R600/uaddo.ll
index 57d7835f99fb..11438f267ad0 100644
--- a/test/CodeGen/R600/uaddo.ll
+++ b/test/CodeGen/R600/uaddo.ll
@@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
@@ -9,6 +9,9 @@ declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
; SI: add
; SI: addc
; SI: addc
+
+; EG: ADDC_UINT
+; EG: ADDC_UINT
define void @uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %uadd, 0
@@ -21,6 +24,9 @@ define void @uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
; FUNC-LABEL: {{^}}s_uaddo_i32:
; SI: s_add_i32
+
+; EG: ADDC_UINT
+; EG: ADD_INT
define void @s_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
%uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) nounwind
%val = extractvalue { i32, i1 } %uadd, 0
@@ -32,9 +38,12 @@ define void @s_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
; FUNC-LABEL: {{^}}v_uaddo_i32:
; SI: v_add_i32
+
+; EG: ADDC_UINT
+; EG: ADD_INT
define void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %a = load i32 addrspace(1)* %aptr, align 4
- %b = load i32 addrspace(1)* %bptr, align 4
+ %a = load i32, i32 addrspace(1)* %aptr, align 4
+ %b = load i32, i32 addrspace(1)* %bptr, align 4
%uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) nounwind
%val = extractvalue { i32, i1 } %uadd, 0
%carry = extractvalue { i32, i1 } %uadd, 1
@@ -46,6 +55,9 @@ define void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
; FUNC-LABEL: {{^}}s_uaddo_i64:
; SI: s_add_u32
; SI: s_addc_u32
+
+; EG: ADDC_UINT
+; EG: ADD_INT
define void @s_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
%uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %uadd, 0
@@ -58,9 +70,12 @@ define void @s_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64
; FUNC-LABEL: {{^}}v_uaddo_i64:
; SI: v_add_i32
; SI: v_addc_u32
+
+; EG: ADDC_UINT
+; EG: ADD_INT
define void @v_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %a = load i64 addrspace(1)* %aptr, align 4
- %b = load i64 addrspace(1)* %bptr, align 4
+ %a = load i64, i64 addrspace(1)* %aptr, align 4
+ %b = load i64, i64 addrspace(1)* %bptr, align 4
%uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %uadd, 0
%carry = extractvalue { i64, i1 } %uadd, 1
diff --git a/test/CodeGen/R600/udiv.ll b/test/CodeGen/R600/udiv.ll
index 0c2c65bb7bf6..de22a22e5029 100644
--- a/test/CodeGen/R600/udiv.ll
+++ b/test/CodeGen/R600/udiv.ll
@@ -7,9 +7,9 @@
;EG: CF_END
define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %a = load i32 addrspace(1) * %in
- %b = load i32 addrspace(1) * %b_ptr
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1) * %in
+ %b = load i32, i32 addrspace(1) * %b_ptr
%result = udiv i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -25,9 +25,9 @@ define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
;SI: s_endpgm
define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1) * %in
- %b = load <2 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+ %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
%result = udiv <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -39,9 +39,9 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
;SI: s_endpgm
define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1) * %in
- %b = load <4 x i32> addrspace(1) * %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
%result = udiv <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/udivrem.ll b/test/CodeGen/R600/udivrem.ll
index b439d7aa892a..b3837f28209a 100644
--- a/test/CodeGen/R600/udivrem.ll
+++ b/test/CodeGen/R600/udivrem.ll
@@ -118,7 +118,7 @@ define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) {
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]]
; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_sub_i32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]]
+; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}}
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]]
@@ -141,7 +141,7 @@ define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) {
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]]
; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_sub_i32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]]
+; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}}
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]]
@@ -268,7 +268,7 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]]
; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_sub_i32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]]
+; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[l0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}}
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]]
@@ -291,7 +291,7 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]]
; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_sub_i32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]]
+; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}}
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]]
@@ -314,7 +314,7 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_mul_hi_u32 [[THIRD_Quotient:v[0-9]+]]
; SI-DAG: v_mul_lo_i32 [[THIRD_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_sub_i32_e32 [[THIRD_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[THIRD_Num_S_Remainder]]
+; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder:v[0-9]+]], [[THIRD_Num_S_Remainder]], {{v[0-9]+}}
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_and_b32_e32 [[THIRD_Tmp1:v[0-9]+]]
@@ -335,20 +335,6 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3
; SI-DAG: v_add_i32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
; SI-DAG: v_subrev_i32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[FOURTH_Quotient:v[0-9]+]]
-; SI-DAG: v_mul_lo_i32 [[FOURTH_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_sub_i32_e32 [[FOURTH_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FOURTH_Num_S_Remainder]]
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32 [[FOURTH_Tmp1:v[0-9]+]]
-; SI-DAG: v_add_i32_e32 [[FOURTH_Quotient_A_One:v[0-9]+]], {{.*}}, [[FOURTH_Quotient]]
-; SI-DAG: v_subrev_i32_e32 [[FOURTH_Quotient_S_One:v[0-9]+]],
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_add_i32_e32 [[FOURTH_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: v_subrev_i32_e32 [[FOURTH_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
; SI: s_endpgm
define void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) {
%result0 = udiv <4 x i32> %x, %y
diff --git a/test/CodeGen/R600/udivrem24.ll b/test/CodeGen/R600/udivrem24.ll
index 4b98ac67b220..4de881b66f10 100644
--- a/test/CodeGen/R600/udivrem24.ll
+++ b/test/CodeGen/R600/udivrem24.ll
@@ -13,9 +13,9 @@
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_UINT
define void @udiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
- %den_ptr = getelementptr i8 addrspace(1)* %in, i8 1
- %num = load i8 addrspace(1) * %in
- %den = load i8 addrspace(1) * %den_ptr
+ %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
+ %num = load i8, i8 addrspace(1) * %in
+ %den = load i8, i8 addrspace(1) * %den_ptr
%result = udiv i8 %num, %den
store i8 %result, i8 addrspace(1)* %out
ret void
@@ -32,9 +32,9 @@ define void @udiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_UINT
define void @udiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
- %den_ptr = getelementptr i16 addrspace(1)* %in, i16 1
- %num = load i16 addrspace(1) * %in, align 2
- %den = load i16 addrspace(1) * %den_ptr, align 2
+ %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
+ %num = load i16, i16 addrspace(1) * %in, align 2
+ %den = load i16, i16 addrspace(1) * %den_ptr, align 2
%result = udiv i16 %num, %den
store i16 %result, i16 addrspace(1)* %out, align 2
ret void
@@ -51,9 +51,9 @@ define void @udiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_UINT
define void @udiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 8
%den.i24.0 = shl i32 %den, 8
%num.i24 = lshr i32 %num.i24.0, 8
@@ -71,9 +71,9 @@ define void @udiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; EG-NOT: UINT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @udiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 7
%den.i24.0 = shl i32 %den, 7
%num.i24 = lshr i32 %num.i24.0, 7
@@ -91,9 +91,9 @@ define void @udiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; EG-NOT: UINT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @test_no_udiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 8
%den.i24.0 = shl i32 %den, 7
%num.i24 = lshr i32 %num.i24.0, 8
@@ -111,9 +111,9 @@ define void @test_no_udiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
; EG-NOT: UINT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @test_no_udiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 7
%den.i24.0 = shl i32 %den, 8
%num.i24 = lshr i32 %num.i24.0, 7
@@ -134,9 +134,9 @@ define void @test_no_udiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_UINT
define void @urem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
- %den_ptr = getelementptr i8 addrspace(1)* %in, i8 1
- %num = load i8 addrspace(1) * %in
- %den = load i8 addrspace(1) * %den_ptr
+ %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
+ %num = load i8, i8 addrspace(1) * %in
+ %den = load i8, i8 addrspace(1) * %den_ptr
%result = urem i8 %num, %den
store i8 %result, i8 addrspace(1)* %out
ret void
@@ -153,9 +153,9 @@ define void @urem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_UINT
define void @urem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
- %den_ptr = getelementptr i16 addrspace(1)* %in, i16 1
- %num = load i16 addrspace(1) * %in, align 2
- %den = load i16 addrspace(1) * %den_ptr, align 2
+ %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
+ %num = load i16, i16 addrspace(1) * %in, align 2
+ %den = load i16, i16 addrspace(1) * %den_ptr, align 2
%result = urem i16 %num, %den
store i16 %result, i16 addrspace(1)* %out, align 2
ret void
@@ -172,9 +172,9 @@ define void @urem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
; EG-DAG: RECIP_IEEE
; EG: FLT_TO_UINT
define void @urem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 8
%den.i24.0 = shl i32 %den, 8
%num.i24 = lshr i32 %num.i24.0, 8
@@ -192,9 +192,9 @@ define void @urem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; EG-NOT: UINT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @urem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 7
%den.i24.0 = shl i32 %den, 7
%num.i24 = lshr i32 %num.i24.0, 7
@@ -212,9 +212,9 @@ define void @urem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
; EG-NOT: UINT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @test_no_urem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 8
%den.i24.0 = shl i32 %den, 7
%num.i24 = lshr i32 %num.i24.0, 8
@@ -232,9 +232,9 @@ define void @test_no_urem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
; EG-NOT: UINT_TO_FLT
; EG-NOT: RECIP_IEEE
define void @test_no_urem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %num = load i32 addrspace(1) * %in, align 4
- %den = load i32 addrspace(1) * %den_ptr, align 4
+ %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %num = load i32, i32 addrspace(1) * %in, align 4
+ %den = load i32, i32 addrspace(1) * %den_ptr, align 4
%num.i24.0 = shl i32 %num, 7
%den.i24.0 = shl i32 %den, 8
%num.i24 = lshr i32 %num.i24.0, 7
diff --git a/test/CodeGen/R600/udivrem64.ll b/test/CodeGen/R600/udivrem64.ll
index 77922fe8dab6..9f3069bdf80c 100644
--- a/test/CodeGen/R600/udivrem64.ll
+++ b/test/CodeGen/R600/udivrem64.ll
@@ -1,5 +1,5 @@
-;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
-;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s
+;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s
;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
;FUNC-LABEL: {{^}}test_udiv:
@@ -35,7 +35,41 @@
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
-;SI: s_endpgm
+
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN-NOT: v_mad_f32
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: s_endpgm
define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%result = udiv i64 %x, %y
store i64 %result, i64 addrspace(1)* %out
@@ -75,9 +109,115 @@ define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
;EG: BFE_UINT
;EG: BFE_UINT
;EG: AND_INT {{.*}}, 1,
-;SI: s_endpgm
+
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN: s_bfe_u32
+;GCN-NOT: v_mad_f32
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: s_endpgm
define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
%result = urem i64 %x, %y
store i64 %result, i64 addrspace(1)* %out
ret void
}
+
+;FUNC-LABEL: {{^}}test_udiv3264:
+;EG: RECIP_UINT
+;EG-NOT: BFE_UINT
+
+;GCN-NOT: s_bfe_u32
+;GCN-NOT: v_mad_f32
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: s_endpgm
+define void @test_udiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+ %1 = lshr i64 %x, 33
+ %2 = lshr i64 %y, 33
+ %result = udiv i64 %1, %2
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+;FUNC-LABEL: {{^}}test_urem3264:
+;EG: RECIP_UINT
+;EG-NOT: BFE_UINT
+
+;GCN-NOT: s_bfe_u32
+;GCN-NOT: v_mad_f32
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: s_endpgm
+define void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+ %1 = lshr i64 %x, 33
+ %2 = lshr i64 %y, 33
+ %result = urem i64 %1, %2
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+;FUNC-LABEL: {{^}}test_udiv2464:
+;EG: UINT_TO_FLT
+;EG: UINT_TO_FLT
+;EG: FLT_TO_UINT
+;EG-NOT: RECIP_UINT
+;EG-NOT: BFE_UINT
+
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: v_mad_f32
+;GCN: s_endpgm
+define void @test_udiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+ %1 = lshr i64 %x, 40
+ %2 = lshr i64 %y, 40
+ %result = udiv i64 %1, %2
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
+;FUNC-LABEL: {{^}}test_urem2464:
+;EG: UINT_TO_FLT
+;EG: UINT_TO_FLT
+;EG: FLT_TO_UINT
+;EG-NOT: RECIP_UINT
+;EG-NOT: BFE_UINT
+
+;SI-NOT: v_lshr_b64
+;VI-NOT: v_lshrrev_b64
+;GCN: v_mad_f32
+;GCN: s_endpgm
+define void @test_urem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
+ %1 = lshr i64 %x, 40
+ %2 = lshr i64 %y, 40
+ %result = urem i64 %1, %2
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/R600/uint_to_fp.f64.ll b/test/CodeGen/R600/uint_to_fp.f64.ll
index 09e987dd14da..dfec8eb15cb7 100644
--- a/test/CodeGen/R600/uint_to_fp.f64.ll
+++ b/test/CodeGen/R600/uint_to_fp.f64.ll
@@ -11,8 +11,8 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; SI: buffer_store_dwordx2 [[RESULT]]
define void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
- %gep = getelementptr i64 addrspace(1)* %in, i32 %tid
- %val = load i64 addrspace(1)* %gep, align 8
+ %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %val = load i64, i64 addrspace(1)* %gep, align 8
%result = uitofp i64 %val to double
store double %result, double addrspace(1)* %out
ret void
@@ -70,12 +70,13 @@ define void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i
ret void
}
+; FIXME: select on 0, 0
; SI-LABEL: {{^}}uint_to_fp_i1_to_f64:
; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
; uses an SGPR for [[CMP]]
; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
-; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
+; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, [[CMP]]
; SI: buffer_store_dwordx2
; SI: s_endpgm
define void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) {
diff --git a/test/CodeGen/R600/uint_to_fp.ll b/test/CodeGen/R600/uint_to_fp.ll
index cf14c25759f7..00fea80b1bc8 100644
--- a/test/CodeGen/R600/uint_to_fp.ll
+++ b/test/CodeGen/R600/uint_to_fp.ll
@@ -38,7 +38,7 @@ define void @uint_to_fp_v2i32_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i32>
; SI: v_cvt_f32_u32_e32
; SI: s_endpgm
define void @uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %value = load <4 x i32> addrspace(1) * %in
+ %value = load <4 x i32>, <4 x i32> addrspace(1) * %in
%result = uitofp <4 x i32> %value to <4 x float>
store <4 x float> %result, <4 x float> addrspace(1)* %out
ret void
@@ -50,7 +50,7 @@ define void @uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32>
; R600: MULADD_IEEE
; SI: v_cvt_f32_u32_e32
; SI: v_cvt_f32_u32_e32
-; SI: v_mad_f32
+; SI: v_madmk_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, 0x4f800000
; SI: s_endpgm
define void @uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) {
entry:
diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll
index 665dc37c200a..82d88ebd3ae7 100644
--- a/test/CodeGen/R600/unaligned-load-store.ll
+++ b/test/CodeGen/R600/unaligned-load-store.ll
@@ -8,7 +8,7 @@
; SI: ds_write_b8
; SI: s_endpgm
define void @unaligned_load_store_i16_local(i16 addrspace(3)* %p, i16 addrspace(3)* %r) nounwind {
- %v = load i16 addrspace(3)* %p, align 1
+ %v = load i16, i16 addrspace(3)* %p, align 1
store i16 %v, i16 addrspace(3)* %r, align 1
ret void
}
@@ -20,7 +20,7 @@ define void @unaligned_load_store_i16_local(i16 addrspace(3)* %p, i16 addrspace(
; SI: buffer_store_byte
; SI: s_endpgm
define void @unaligned_load_store_i16_global(i16 addrspace(1)* %p, i16 addrspace(1)* %r) nounwind {
- %v = load i16 addrspace(1)* %p, align 1
+ %v = load i16, i16 addrspace(1)* %p, align 1
store i16 %v, i16 addrspace(1)* %r, align 1
ret void
}
@@ -36,7 +36,7 @@ define void @unaligned_load_store_i16_global(i16 addrspace(1)* %p, i16 addrspace
; SI: ds_write_b8
; SI: s_endpgm
define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
- %v = load i32 addrspace(3)* %p, align 1
+ %v = load i32, i32 addrspace(3)* %p, align 1
store i32 %v, i32 addrspace(3)* %r, align 1
ret void
}
@@ -51,7 +51,7 @@ define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(
; SI: buffer_store_byte
; SI: buffer_store_byte
define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind {
- %v = load i32 addrspace(1)* %p, align 1
+ %v = load i32, i32 addrspace(1)* %p, align 1
store i32 %v, i32 addrspace(1)* %r, align 1
ret void
}
@@ -75,7 +75,7 @@ define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace
; SI: ds_write_b8
; SI: s_endpgm
define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) {
- %v = load i64 addrspace(3)* %p, align 1
+ %v = load i64, i64 addrspace(3)* %p, align 1
store i64 %v, i64 addrspace(3)* %r, align 1
ret void
}
@@ -98,7 +98,7 @@ define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(
; SI: buffer_store_byte
; SI: buffer_store_byte
define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) {
- %v = load i64 addrspace(1)* %p, align 1
+ %v = load i64, i64 addrspace(1)* %p, align 1
store i64 %v, i64 addrspace(1)* %r, align 1
ret void
}
@@ -145,7 +145,7 @@ define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace
; SI: ds_write_b8
; SI: s_endpgm
define void @unaligned_load_store_v4i32_local(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
- %v = load <4 x i32> addrspace(3)* %p, align 1
+ %v = load <4 x i32>, <4 x i32> addrspace(3)* %p, align 1
store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
ret void
}
@@ -169,7 +169,7 @@ define void @unaligned_load_store_v4i32_local(<4 x i32> addrspace(3)* %p, <4 x i
; FIXME-SI: buffer_load_ubyte
; FIXME-SI: buffer_load_ubyte
define void @unaligned_load_store_v4i32_global(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) nounwind {
- %v = load <4 x i32> addrspace(1)* %p, align 1
+ %v = load <4 x i32>, <4 x i32> addrspace(1)* %p, align 1
store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1
ret void
}
@@ -178,7 +178,7 @@ define void @unaligned_load_store_v4i32_global(<4 x i32> addrspace(1)* %p, <4 x
; SI: ds_read2_b32
; SI: s_endpgm
define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
- %val = load i64 addrspace(3)* %in, align 4
+ %val = load i64, i64 addrspace(3)* %in, align 4
store i64 %val, i64 addrspace(1)* %out, align 8
ret void
}
@@ -187,21 +187,21 @@ define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspac
; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
; SI: s_endpgm
define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
- %ptr = getelementptr i64 addrspace(3)* %in, i32 4
- %val = load i64 addrspace(3)* %ptr, align 4
+ %ptr = getelementptr i64, i64 addrspace(3)* %in, i32 4
+ %val = load i64, i64 addrspace(3)* %ptr, align 4
store i64 %val, i64 addrspace(1)* %out, align 8
ret void
}
; SI-LABEL: {{^}}load_lds_i64_align_4_with_split_offset:
; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
-; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:0 offset1:1
+; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1
; SI: s_endpgm
define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
%ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
- %ptr255 = getelementptr i32 addrspace(3)* %ptr, i32 255
+ %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
%ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
- %val = load i64 addrspace(3)* %ptri64, align 4
+ %val = load i64, i64 addrspace(3)* %ptri64, align 4
store i64 %val, i64 addrspace(1)* %out, align 8
ret void
}
@@ -219,7 +219,7 @@ define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture
; SI: s_endpgm
define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
- %val = load i64 addrspace(3)* %in, align 1
+ %val = load i64, i64 addrspace(3)* %in, align 1
store i64 %val, i64 addrspace(1)* %out, align 8
ret void
}
@@ -236,18 +236,18 @@ define void @store_lds_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 {
; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
; SI: s_endpgm
define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
- %ptr = getelementptr i64 addrspace(3)* %out, i32 4
+ %ptr = getelementptr i64, i64 addrspace(3)* %out, i32 4
store i64 0, i64 addrspace(3)* %ptr, align 4
ret void
}
; SI-LABEL: {{^}}store_lds_i64_align_4_with_split_offset:
; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
-; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1
; SI: s_endpgm
define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
%ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*
- %ptr255 = getelementptr i32 addrspace(3)* %ptr, i32 255
+ %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
%ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
store i64 0, i64 addrspace(3)* %out, align 4
ret void
diff --git a/test/CodeGen/R600/unhandled-loop-condition-assertion.ll b/test/CodeGen/R600/unhandled-loop-condition-assertion.ll
index c615f0b84913..036a7e91b47f 100644
--- a/test/CodeGen/R600/unhandled-loop-condition-assertion.ll
+++ b/test/CodeGen/R600/unhandled-loop-condition-assertion.ll
@@ -20,20 +20,20 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body, %for.body.lr.ph
%main.addr.011 = phi i8 addrspace(1)* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
%0 = bitcast i8 addrspace(1)* %main.addr.011 to i32 addrspace(1)*
- %1 = load i32 addrspace(1)* %0, align 4
- %add.ptr = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %main_stride
+ %1 = load i32, i32 addrspace(1)* %0, align 4
+ %add.ptr = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %main_stride
%2 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
- %3 = load i32 addrspace(1)* %2, align 4
- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
+ %3 = load i32, i32 addrspace(1)* %2, align 4
+ %add.ptr1 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
%4 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
- %5 = load i32 addrspace(1)* %4, align 4
- %add.ptr2 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
+ %5 = load i32, i32 addrspace(1)* %4, align 4
+ %add.ptr2 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
%6 = bitcast i8 addrspace(1)* %add.ptr2 to i32 addrspace(1)*
- %7 = load i32 addrspace(1)* %6, align 4
- %add.ptr3 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
+ %7 = load i32, i32 addrspace(1)* %6, align 4
+ %add.ptr3 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
%8 = bitcast i8 addrspace(1)* %add.ptr3 to i32 addrspace(1)*
- %9 = load i32 addrspace(1)* %8, align 4
- %add.ptr6 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 undef
+ %9 = load i32, i32 addrspace(1)* %8, align 4
+ %add.ptr6 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 undef
br i1 undef, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
@@ -56,20 +56,20 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body, %for.body.lr.ph
%main.addr.011 = phi i8 addrspace(1)* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
%0 = bitcast i8 addrspace(1)* %main.addr.011 to i32 addrspace(1)*
- %1 = load i32 addrspace(1)* %0, align 4
- %add.ptr = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %main_stride
+ %1 = load i32, i32 addrspace(1)* %0, align 4
+ %add.ptr = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %main_stride
%2 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
- %3 = load i32 addrspace(1)* %2, align 4
- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
+ %3 = load i32, i32 addrspace(1)* %2, align 4
+ %add.ptr1 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
%4 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
- %5 = load i32 addrspace(1)* %4, align 4
- %add.ptr2 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
+ %5 = load i32, i32 addrspace(1)* %4, align 4
+ %add.ptr2 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
%6 = bitcast i8 addrspace(1)* %add.ptr2 to i32 addrspace(1)*
- %7 = load i32 addrspace(1)* %6, align 4
- %add.ptr3 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
+ %7 = load i32, i32 addrspace(1)* %6, align 4
+ %add.ptr3 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
%8 = bitcast i8 addrspace(1)* %add.ptr3 to i32 addrspace(1)*
- %9 = load i32 addrspace(1)* %8, align 4
- %add.ptr6 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 undef
+ %9 = load i32, i32 addrspace(1)* %8, align 4
+ %add.ptr6 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 undef
br i1 undef, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
@@ -92,20 +92,20 @@ for.body.lr.ph: ; preds = %entry
for.body: ; preds = %for.body, %for.body.lr.ph
%main.addr.011 = phi i8 addrspace(1)* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
%0 = bitcast i8 addrspace(1)* %main.addr.011 to i32 addrspace(1)*
- %1 = load i32 addrspace(1)* %0, align 4
- %add.ptr = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %main_stride
+ %1 = load i32, i32 addrspace(1)* %0, align 4
+ %add.ptr = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %main_stride
%2 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
- %3 = load i32 addrspace(1)* %2, align 4
- %add.ptr1 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
+ %3 = load i32, i32 addrspace(1)* %2, align 4
+ %add.ptr1 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
%4 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
- %5 = load i32 addrspace(1)* %4, align 4
- %add.ptr2 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
+ %5 = load i32, i32 addrspace(1)* %4, align 4
+ %add.ptr2 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
%6 = bitcast i8 addrspace(1)* %add.ptr2 to i32 addrspace(1)*
- %7 = load i32 addrspace(1)* %6, align 4
- %add.ptr3 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
+ %7 = load i32, i32 addrspace(1)* %6, align 4
+ %add.ptr3 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
%8 = bitcast i8 addrspace(1)* %add.ptr3 to i32 addrspace(1)*
- %9 = load i32 addrspace(1)* %8, align 4
- %add.ptr6 = getelementptr inbounds i8 addrspace(1)* %main.addr.011, i32 undef
+ %9 = load i32, i32 addrspace(1)* %8, align 4
+ %add.ptr6 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 undef
br i1 undef, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
diff --git a/test/CodeGen/R600/unroll.ll b/test/CodeGen/R600/unroll.ll
index e0035eae71cf..ca8d822ec7ed 100644
--- a/test/CodeGen/R600/unroll.ll
+++ b/test/CodeGen/R600/unroll.ll
@@ -20,7 +20,7 @@ loop.header:
br label %loop.body
loop.body:
- %ptr = getelementptr [32 x i32]* %0, i32 0, i32 %counter
+ %ptr = getelementptr [32 x i32], [32 x i32]* %0, i32 0, i32 %counter
store i32 %counter, i32* %ptr
br label %loop.inc
@@ -30,8 +30,8 @@ loop.inc:
br i1 %1, label %exit, label %loop.header
exit:
- %2 = getelementptr [32 x i32]* %0, i32 0, i32 5
- %3 = load i32* %2
+ %2 = getelementptr [32 x i32], [32 x i32]* %0, i32 0, i32 5
+ %3 = load i32, i32* %2
store i32 %3, i32 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/urem.ll b/test/CodeGen/R600/urem.ll
index dce517fcd823..62841ec2d6c5 100644
--- a/test/CodeGen/R600/urem.ll
+++ b/test/CodeGen/R600/urem.ll
@@ -10,21 +10,36 @@
; SI: s_endpgm
; EG: CF_END
define void @test_urem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
- %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
- %a = load i32 addrspace(1)* %in
- %b = load i32 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1)* %in
+ %b = load i32, i32 addrspace(1)* %b_ptr
%result = urem i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
}
+; FUNC-LABEL: {{^}}test_urem_i32_7:
+; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x24924925
+; SI: v_mul_hi_u32 {{v[0-9]+}}, [[MAGIC]]
+; SI: v_subrev_i32
+; SI: v_mul_lo_i32
+; SI: v_sub_i32
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @test_urem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %num = load i32, i32 addrspace(1) * %in
+ %result = urem i32 %num, 7
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
; FUNC-LABEL: {{^}}test_urem_v2i32:
; SI: s_endpgm
; EG: CF_END
define void @test_urem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
- %a = load <2 x i32> addrspace(1)* %in
- %b = load <2 x i32> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
+ %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
+ %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
%result = urem <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -34,9 +49,9 @@ define void @test_urem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1
; SI: s_endpgm
; EG: CF_END
define void @test_urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
- %a = load <4 x i32> addrspace(1)* %in
- %b = load <4 x i32> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
+ %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
+ %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
%result = urem <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
@@ -46,9 +61,9 @@ define void @test_urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1
; SI: s_endpgm
; EG: CF_END
define void @test_urem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
- %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
- %a = load i64 addrspace(1)* %in
- %b = load i64 addrspace(1)* %b_ptr
+ %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
+ %a = load i64, i64 addrspace(1)* %in
+ %b = load i64, i64 addrspace(1)* %b_ptr
%result = urem i64 %a, %b
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -58,9 +73,9 @@ define void @test_urem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
; SI: s_endpgm
; EG: CF_END
define void @test_urem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
- %a = load <2 x i64> addrspace(1)* %in
- %b = load <2 x i64> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
+ %a = load <2 x i64>, <2 x i64> addrspace(1)* %in
+ %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
%result = urem <2 x i64> %a, %b
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
ret void
@@ -70,9 +85,9 @@ define void @test_urem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1
; SI: s_endpgm
; EG: CF_END
define void @test_urem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
- %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
- %a = load <4 x i64> addrspace(1)* %in
- %b = load <4 x i64> addrspace(1)* %b_ptr
+ %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
+ %a = load <4 x i64>, <4 x i64> addrspace(1)* %in
+ %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
%result = urem <4 x i64> %a, %b
store <4 x i64> %result, <4 x i64> addrspace(1)* %out
ret void
diff --git a/test/CodeGen/R600/use-sgpr-multiple-times.ll b/test/CodeGen/R600/use-sgpr-multiple-times.ll
index 97d73ba74bc5..f26f30022b4f 100644
--- a/test/CodeGen/R600/use-sgpr-multiple-times.ll
+++ b/test/CodeGen/R600/use-sgpr-multiple-times.ll
@@ -1,80 +1,87 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
declare float @llvm.fma.f32(float, float, float) #1
declare float @llvm.fmuladd.f32(float, float, float) #1
declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) #1
-; SI-LABEL: {{^}}test_sgpr_use_twice_binop:
-; SI: s_load_dword [[SGPR:s[0-9]+]],
-; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]]
-; SI: buffer_store_dword [[RESULT]]
+; GCN-LABEL: {{^}}test_sgpr_use_twice_binop:
+; GCN: s_load_dword [[SGPR:s[0-9]+]],
+; GCN: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]]
+; GCN: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_binop(float addrspace(1)* %out, float %a) #0 {
%dbl = fadd float %a, %a
store float %dbl, float addrspace(1)* %out, align 4
ret void
}
-; SI-LABEL: {{^}}test_sgpr_use_three_ternary_op:
-; SI: s_load_dword [[SGPR:s[0-9]+]],
-; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
-; SI: buffer_store_dword [[RESULT]]
+; GCN-LABEL: {{^}}test_sgpr_use_three_ternary_op:
+; GCN: s_load_dword [[SGPR:s[0-9]+]],
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
+; GCN: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_three_ternary_op(float addrspace(1)* %out, float %a) #0 {
%fma = call float @llvm.fma.f32(float %a, float %a, float %a) #1
store float %fma, float addrspace(1)* %out, align 4
ret void
}
-; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b:
+; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b:
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
-; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
-; SI: buffer_store_dword [[RESULT]]
+; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
+; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
+; GCN: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, float %a, float %b) #0 {
%fma = call float @llvm.fma.f32(float %a, float %a, float %b) #1
store float %fma, float addrspace(1)* %out, align 4
ret void
}
-; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
+; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
-; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
-; SI: buffer_store_dword [[RESULT]]
+; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
+; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
+; GCN: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 {
%fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
store float %fma, float addrspace(1)* %out, align 4
ret void
}
-; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a:
+; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a:
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
-; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
-; SI: buffer_store_dword [[RESULT]]
+; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
+; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
+; GCN: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 {
%fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
store float %fma, float addrspace(1)* %out, align 4
ret void
}
-; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm:
-; SI: s_load_dword [[SGPR:s[0-9]+]]
-; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
-; SI: buffer_store_dword [[RESULT]]
+; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm:
+; GCN: s_load_dword [[SGPR:s[0-9]+]]
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
+; GCN: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_ternary_op_a_a_imm(float addrspace(1)* %out, float %a) #0 {
%fma = call float @llvm.fma.f32(float %a, float %a, float 2.0) #1
store float %fma, float addrspace(1)* %out, align 4
ret void
}
-; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a:
-; SI: s_load_dword [[SGPR:s[0-9]+]]
-; SI: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]]
-; SI: buffer_store_dword [[RESULT]]
+; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a:
+; GCN: s_load_dword [[SGPR:s[0-9]+]]
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]]
+; GCN: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, float %a) #0 {
%fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1
store float %fma, float addrspace(1)* %out, align 4
@@ -82,10 +89,10 @@ define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, fl
}
; Don't use fma since fma c, x, y is canonicalized to fma x, c, y
-; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a:
-; SI: s_load_dword [[SGPR:s[0-9]+]]
-; SI: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]]
-; SI: buffer_store_dword [[RESULT]]
+; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a:
+; GCN: s_load_dword [[SGPR:s[0-9]+]]
+; GCN: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]]
+; GCN: buffer_store_dword [[RESULT]]
define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32 %a) #0 {
%fma = call i32 @llvm.AMDGPU.imad24(i32 2, i32 %a, i32 %a) #1
store i32 %fma, i32 addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/usubo.ll b/test/CodeGen/R600/usubo.ll
index be1e66673bc9..3c9b1622a076 100644
--- a/test/CodeGen/R600/usubo.ll
+++ b/test/CodeGen/R600/usubo.ll
@@ -1,11 +1,14 @@
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
; FUNC-LABEL: {{^}}usubo_i64_zext:
+
+; EG: SUBB_UINT
+; EG: ADDC_UINT
define void @usubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
%usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %usub, 0
@@ -18,6 +21,9 @@ define void @usubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
; FUNC-LABEL: {{^}}s_usubo_i32:
; SI: s_sub_i32
+
+; EG-DAG: SUBB_UINT
+; EG-DAG: SUB_INT
define void @s_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
%usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) nounwind
%val = extractvalue { i32, i1 } %usub, 0
@@ -29,9 +35,12 @@ define void @s_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
; FUNC-LABEL: {{^}}v_usubo_i32:
; SI: v_subrev_i32_e32
+
+; EG-DAG: SUBB_UINT
+; EG-DAG: SUB_INT
define void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
- %a = load i32 addrspace(1)* %aptr, align 4
- %b = load i32 addrspace(1)* %bptr, align 4
+ %a = load i32, i32 addrspace(1)* %aptr, align 4
+ %b = load i32, i32 addrspace(1)* %bptr, align 4
%usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) nounwind
%val = extractvalue { i32, i1 } %usub, 0
%carry = extractvalue { i32, i1 } %usub, 1
@@ -43,6 +52,11 @@ define void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
; FUNC-LABEL: {{^}}s_usubo_i64:
; SI: s_sub_u32
; SI: s_subb_u32
+
+; EG-DAG: SUBB_UINT
+; EG-DAG: SUB_INT
+; EG-DAG: SUB_INT
+; EG: SUB_INT
define void @s_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
%usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %usub, 0
@@ -55,9 +69,14 @@ define void @s_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64
; FUNC-LABEL: {{^}}v_usubo_i64:
; SI: v_sub_i32
; SI: v_subb_u32
+
+; EG-DAG: SUBB_UINT
+; EG-DAG: SUB_INT
+; EG-DAG: SUB_INT
+; EG: SUB_INT
define void @v_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
- %a = load i64 addrspace(1)* %aptr, align 4
- %b = load i64 addrspace(1)* %bptr, align 4
+ %a = load i64, i64 addrspace(1)* %aptr, align 4
+ %b = load i64, i64 addrspace(1)* %bptr, align 4
%usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %usub, 0
%carry = extractvalue { i64, i1 } %usub, 1
diff --git a/test/CodeGen/R600/v_cndmask.ll b/test/CodeGen/R600/v_cndmask.ll
index 85936ecda774..c368c5aaf7dc 100644
--- a/test/CodeGen/R600/v_cndmask.ll
+++ b/test/CodeGen/R600/v_cndmask.ll
@@ -10,8 +10,8 @@ declare i32 @llvm.r600.read.tidig.x() #1
; SI: s_endpgm
define void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 {
%idx = call i32 @llvm.r600.read.tidig.x() #1
- %f.gep = getelementptr float addrspace(1)* %fptr, i32 %idx
- %f = load float addrspace(1)* %fptr
+ %f.gep = getelementptr float, float addrspace(1)* %fptr, i32 %idx
+ %f = load float, float addrspace(1)* %fptr
%setcc = icmp ne i32 %c, 0
%select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f
store float %select, float addrspace(1)* %out
diff --git a/test/CodeGen/R600/valu-i1.ll b/test/CodeGen/R600/valu-i1.ll
index a4027178431b..7d0ebd139f51 100644
--- a/test/CodeGen/R600/valu-i1.ll
+++ b/test/CodeGen/R600/valu-i1.ll
@@ -15,18 +15,18 @@ entry:
]
case0:
- %arrayidx1 = getelementptr i32 addrspace(1)* %dst, i32 %b
+ %arrayidx1 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b
store i32 0, i32 addrspace(1)* %arrayidx1, align 4
br label %end
case1:
- %arrayidx5 = getelementptr i32 addrspace(1)* %dst, i32 %b
+ %arrayidx5 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b
store i32 1, i32 addrspace(1)* %arrayidx5, align 4
br label %end
default:
%cmp8 = icmp eq i32 %a, 2
- %arrayidx10 = getelementptr i32 addrspace(1)* %dst, i32 %b
+ %arrayidx10 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b
br i1 %cmp8, label %if, label %else
if:
@@ -42,8 +42,8 @@ end:
}
; SI-LABEL: @simple_test_v_if
-; SI: v_cmp_ne_i32_e64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0
-; SI: s_and_saveexec_b64 [[BR_SREG]], [[BR_SREG]]
+; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
+; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
; SI: ; BB#1
@@ -59,7 +59,7 @@ define void @simple_test_v_if(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1
br i1 %is.0, label %store, label %exit
store:
- %gep = getelementptr i32 addrspace(1)* %dst, i32 %tid
+ %gep = getelementptr i32, i32 addrspace(1)* %dst, i32 %tid
store i32 999, i32 addrspace(1)* %gep
ret void
@@ -68,8 +68,8 @@ exit:
}
; SI-LABEL: @simple_test_v_loop
-; SI: v_cmp_ne_i32_e64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0
-; SI: s_and_saveexec_b64 [[BR_SREG]], [[BR_SREG]]
+; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}}
+; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc
; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
; SI: s_cbranch_execz BB2_2
@@ -81,7 +81,6 @@ exit:
; SI: buffer_store_dword
; SI: v_cmp_eq_i32_e32 vcc,
; SI: s_or_b64 [[OR_SREG:s\[[0-9]+:[0-9]+\]]]
-; SI: v_add_i32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
; SI: s_andn2_b64 exec, exec, [[OR_SREG]]
; SI: s_cbranch_execnz BB2_3
@@ -94,9 +93,9 @@ entry:
loop:
%i = phi i32 [%tid, %entry], [%i.inc, %loop]
- %gep.src = getelementptr i32 addrspace(1)* %src, i32 %i
- %gep.dst = getelementptr i32 addrspace(1)* %dst, i32 %i
- %load = load i32 addrspace(1)* %src
+ %gep.src = getelementptr i32, i32 addrspace(1)* %src, i32 %i
+ %gep.dst = getelementptr i32, i32 addrspace(1)* %dst, i32 %i
+ %load = load i32, i32 addrspace(1)* %src
store i32 %load, i32 addrspace(1)* %gep.dst
%i.inc = add nsw i32 %i, 1
%cmp = icmp eq i32 %limit, %i.inc
@@ -112,8 +111,8 @@ exit:
; Branch to exit if uniformly not taken
; SI: ; BB#0:
; SI: buffer_load_dword [[VBOUND:v[0-9]+]]
-; SI: v_cmp_gt_i32_e64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]]
-; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG]], [[OUTER_CMP_SREG]]
+; SI: v_cmp_lt_i32_e32 vcc
+; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]], vcc
; SI: s_xor_b64 [[OUTER_CMP_SREG]], exec, [[OUTER_CMP_SREG]]
; SI: s_cbranch_execz BB3_2
@@ -124,10 +123,10 @@ exit:
; Clear exec bits for workitems that load -1s
; SI: BB3_3:
-; SI: buffer_load_dword [[A:v[0-9]+]]
; SI: buffer_load_dword [[B:v[0-9]+]]
-; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], [[A]], -1
-; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_1:s\[[0-9]+:[0-9]+\]]], [[B]], -1
+; SI: buffer_load_dword [[A:v[0-9]+]]
+; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]]
+; SI-DAG: v_cmp_ne_i32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]]
; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
; SI: s_and_saveexec_b64 [[ORNEG1]], [[ORNEG1]]
; SI: s_xor_b64 [[ORNEG1]], exec, [[ORNEG1]]
@@ -155,8 +154,8 @@ define void @multi_vcond_loop(i32 addrspace(1)* noalias nocapture %arg, i32 addr
bb:
%tmp = tail call i32 @llvm.r600.read.tidig.x() #0
%tmp4 = sext i32 %tmp to i64
- %tmp5 = getelementptr inbounds i32 addrspace(1)* %arg3, i64 %tmp4
- %tmp6 = load i32 addrspace(1)* %tmp5, align 4
+ %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg3, i64 %tmp4
+ %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
%tmp7 = icmp sgt i32 %tmp6, 0
%tmp8 = sext i32 %tmp6 to i64
br i1 %tmp7, label %bb10, label %bb26
@@ -164,10 +163,10 @@ bb:
bb10: ; preds = %bb, %bb20
%tmp11 = phi i64 [ %tmp23, %bb20 ], [ 0, %bb ]
%tmp12 = add nsw i64 %tmp11, %tmp4
- %tmp13 = getelementptr inbounds i32 addrspace(1)* %arg1, i64 %tmp12
- %tmp14 = load i32 addrspace(1)* %tmp13, align 4
- %tmp15 = getelementptr inbounds i32 addrspace(1)* %arg2, i64 %tmp12
- %tmp16 = load i32 addrspace(1)* %tmp15, align 4
+ %tmp13 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp12
+ %tmp14 = load i32, i32 addrspace(1)* %tmp13, align 4
+ %tmp15 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp12
+ %tmp16 = load i32, i32 addrspace(1)* %tmp15, align 4
%tmp17 = icmp ne i32 %tmp14, -1
%tmp18 = icmp ne i32 %tmp16, -1
%tmp19 = and i1 %tmp17, %tmp18
@@ -175,7 +174,7 @@ bb10: ; preds = %bb, %bb20
bb20: ; preds = %bb10
%tmp21 = add nsw i32 %tmp16, %tmp14
- %tmp22 = getelementptr inbounds i32 addrspace(1)* %arg, i64 %tmp12
+ %tmp22 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp12
store i32 %tmp21, i32 addrspace(1)* %tmp22, align 4
%tmp23 = add nuw nsw i64 %tmp11, 1
%tmp24 = icmp slt i64 %tmp23, %tmp8
diff --git a/test/CodeGen/R600/vector-alloca.ll b/test/CodeGen/R600/vector-alloca.ll
index 228868aa7feb..6f3b4847fbdf 100644
--- a/test/CodeGen/R600/vector-alloca.ll
+++ b/test/CodeGen/R600/vector-alloca.ll
@@ -13,16 +13,16 @@
define void @vector_read(i32 addrspace(1)* %out, i32 %index) {
entry:
%0 = alloca [4 x i32]
- %x = getelementptr [4 x i32]* %0, i32 0, i32 0
- %y = getelementptr [4 x i32]* %0, i32 0, i32 1
- %z = getelementptr [4 x i32]* %0, i32 0, i32 2
- %w = getelementptr [4 x i32]* %0, i32 0, i32 3
+ %x = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 0
+ %y = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 1
+ %z = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 2
+ %w = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 3
store i32 0, i32* %x
store i32 1, i32* %y
store i32 2, i32* %z
store i32 3, i32* %w
- %1 = getelementptr [4 x i32]* %0, i32 0, i32 %index
- %2 = load i32* %1
+ %1 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 %index
+ %2 = load i32, i32* %1
store i32 %2, i32 addrspace(1)* %out
ret void
}
@@ -37,18 +37,18 @@ entry:
define void @vector_write(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
entry:
%0 = alloca [4 x i32]
- %x = getelementptr [4 x i32]* %0, i32 0, i32 0
- %y = getelementptr [4 x i32]* %0, i32 0, i32 1
- %z = getelementptr [4 x i32]* %0, i32 0, i32 2
- %w = getelementptr [4 x i32]* %0, i32 0, i32 3
+ %x = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 0
+ %y = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 1
+ %z = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 2
+ %w = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 3
store i32 0, i32* %x
store i32 0, i32* %y
store i32 0, i32* %z
store i32 0, i32* %w
- %1 = getelementptr [4 x i32]* %0, i32 0, i32 %w_index
+ %1 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 %w_index
store i32 1, i32* %1
- %2 = getelementptr [4 x i32]* %0, i32 0, i32 %r_index
- %3 = load i32* %2
+ %2 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 %r_index
+ %3 = load i32, i32* %2
store i32 %3, i32 addrspace(1)* %out
ret void
}
@@ -60,18 +60,18 @@ entry:
define void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
entry:
%0 = alloca [4 x i32]
- %x = getelementptr [4 x i32]* %0, i32 0, i32 0
- %y = getelementptr [4 x i32]* %0, i32 0, i32 1
- %z = getelementptr [4 x i32]* %0, i32 0, i32 2
- %w = getelementptr [4 x i32]* %0, i32 0, i32 3
+ %x = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 0
+ %y = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 1
+ %z = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 2
+ %w = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 3
store i32 0, i32* %x
store i32 0, i32* %y
store i32 0, i32* %z
store i32 0, i32* %w
- %1 = getelementptr [4 x i32]* %0, i32 0, i32 1
+ %1 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 1
%2 = bitcast i32* %1 to [4 x i32]*
- %3 = getelementptr [4 x i32]* %2, i32 0, i32 0
- %4 = load i32* %3
+ %3 = getelementptr [4 x i32], [4 x i32]* %2, i32 0, i32 0
+ %4 = load i32, i32* %3
store i32 %4, i32 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/vertex-fetch-encoding.ll b/test/CodeGen/R600/vertex-fetch-encoding.ll
index e4d117f6310b..fb6a17e67146 100644
--- a/test/CodeGen/R600/vertex-fetch-encoding.ll
+++ b/test/CodeGen/R600/vertex-fetch-encoding.ll
@@ -8,7 +8,7 @@
define void @vtx_fetch32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %0 = load i32 addrspace(1)* %in
+ %0 = load i32, i32 addrspace(1)* %in
store i32 %0, i32 addrspace(1)* %out
ret void
}
@@ -19,7 +19,7 @@ entry:
define void @vtx_fetch128(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
entry:
- %0 = load <4 x i32> addrspace(1)* %in
+ %0 = load <4 x i32>, <4 x i32> addrspace(1)* %in
store <4 x i32> %0, <4 x i32> addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/vop-shrink.ll b/test/CodeGen/R600/vop-shrink.ll
index d5a46e38ce26..9b2f229c05af 100644
--- a/test/CodeGen/R600/vop-shrink.ll
+++ b/test/CodeGen/R600/vop-shrink.ll
@@ -15,7 +15,7 @@ entry:
br i1 %tmp, label %if, label %else
if: ; preds = %entry
- %tmp1 = getelementptr i32 addrspace(1)* %out, i32 1
+ %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
%tmp2 = extractelement <4 x i32> %sgpr, i32 1
store i32 %tmp2, i32 addrspace(1)* %out
br label %endif
diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/R600/vselect.ll
index a6152f7881ef..a3014b03d2b3 100644
--- a/test/CodeGen/R600/vselect.ll
+++ b/test/CodeGen/R600/vselect.ll
@@ -12,8 +12,8 @@
define void @test_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
entry:
- %0 = load <2 x i32> addrspace(1)* %in0
- %1 = load <2 x i32> addrspace(1)* %in1
+ %0 = load <2 x i32>, <2 x i32> addrspace(1)* %in0
+ %1 = load <2 x i32>, <2 x i32> addrspace(1)* %in1
%cmp = icmp ne <2 x i32> %0, %1
%result = select <2 x i1> %cmp, <2 x i32> %0, <2 x i32> %1
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
@@ -30,8 +30,8 @@ entry:
define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in0, <2 x float> addrspace(1)* %in1) {
entry:
- %0 = load <2 x float> addrspace(1)* %in0
- %1 = load <2 x float> addrspace(1)* %in1
+ %0 = load <2 x float>, <2 x float> addrspace(1)* %in0
+ %1 = load <2 x float>, <2 x float> addrspace(1)* %in1
%cmp = fcmp une <2 x float> %0, %1
%result = select <2 x i1> %cmp, <2 x float> %0, <2 x float> %1
store <2 x float> %result, <2 x float> addrspace(1)* %out
@@ -52,8 +52,8 @@ entry:
define void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
entry:
- %0 = load <4 x i32> addrspace(1)* %in0
- %1 = load <4 x i32> addrspace(1)* %in1
+ %0 = load <4 x i32>, <4 x i32> addrspace(1)* %in0
+ %1 = load <4 x i32>, <4 x i32> addrspace(1)* %in1
%cmp = icmp ne <4 x i32> %0, %1
%result = select <4 x i1> %cmp, <4 x i32> %0, <4 x i32> %1
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
@@ -68,8 +68,8 @@ entry:
define void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in0, <4 x float> addrspace(1)* %in1) {
entry:
- %0 = load <4 x float> addrspace(1)* %in0
- %1 = load <4 x float> addrspace(1)* %in1
+ %0 = load <4 x float>, <4 x float> addrspace(1)* %in0
+ %1 = load <4 x float>, <4 x float> addrspace(1)* %in1
%cmp = fcmp une <4 x float> %0, %1
%result = select <4 x i1> %cmp, <4 x float> %0, <4 x float> %1
store <4 x float> %result, <4 x float> addrspace(1)* %out
diff --git a/test/CodeGen/R600/vtx-fetch-branch.ll b/test/CodeGen/R600/vtx-fetch-branch.ll
index bcbe34ea543b..4584d6e25254 100644
--- a/test/CodeGen/R600/vtx-fetch-branch.ll
+++ b/test/CodeGen/R600/vtx-fetch-branch.ll
@@ -16,7 +16,7 @@ entry:
br i1 %0, label %endif, label %if
if:
- %1 = load i32 addrspace(1)* %in
+ %1 = load i32, i32 addrspace(1)* %in
br label %endif
endif:
diff --git a/test/CodeGen/R600/vtx-schedule.ll b/test/CodeGen/R600/vtx-schedule.ll
index 8254c9923477..912e258ebb83 100644
--- a/test/CodeGen/R600/vtx-schedule.ll
+++ b/test/CodeGen/R600/vtx-schedule.ll
@@ -11,8 +11,8 @@
; CHECK: VTX_READ_32 [[IN1:T[0-9]+\.X]], [[IN1]], 0
define void @test(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* addrspace(1)* nocapture %in0) {
entry:
- %0 = load i32 addrspace(1)* addrspace(1)* %in0
- %1 = load i32 addrspace(1)* %0
+ %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in0
+ %1 = load i32, i32 addrspace(1)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}
diff --git a/test/CodeGen/R600/wait.ll b/test/CodeGen/R600/wait.ll
index 93cfdd46093e..5cc7577cad33 100644
--- a/test/CodeGen/R600/wait.ll
+++ b/test/CodeGen/R600/wait.ll
@@ -4,22 +4,21 @@
; CHECK-LABEL: {{^}}main:
; CHECK: s_load_dwordx4
; CHECK: s_load_dwordx4
-; CHECK: s_waitcnt lgkmcnt(0){{$}}
-; CHECK: s_waitcnt vmcnt(0){{$}}
-; CHECK: s_waitcnt expcnt(0) lgkmcnt(0){{$}}
+; CHECK: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
+; CHECK: s_endpgm
define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 {
main_body:
- %tmp = getelementptr <16 x i8> addrspace(2)* %arg3, i32 0
- %tmp10 = load <16 x i8> addrspace(2)* %tmp, !tbaa !0
+ %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 0
+ %tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
%tmp11 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp10, i32 0, i32 %arg6)
%tmp12 = extractelement <4 x float> %tmp11, i32 0
%tmp13 = extractelement <4 x float> %tmp11, i32 1
call void @llvm.AMDGPU.barrier.global() #1
%tmp14 = extractelement <4 x float> %tmp11, i32 2
; %tmp15 = extractelement <4 x float> %tmp11, i32 3
- %tmp15 = load float addrspace(2)* %constptr, align 4 ; Force waiting for expcnt and lgkmcnt
- %tmp16 = getelementptr <16 x i8> addrspace(2)* %arg3, i32 1
- %tmp17 = load <16 x i8> addrspace(2)* %tmp16, !tbaa !0
+ %tmp15 = load float, float addrspace(2)* %constptr, align 4 ; Force waiting for expcnt and lgkmcnt
+ %tmp16 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 1
+ %tmp17 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp16, !tbaa !0
%tmp18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp17, i32 0, i32 %arg6)
%tmp19 = extractelement <4 x float> %tmp18, i32 0
%tmp20 = extractelement <4 x float> %tmp18, i32 1
diff --git a/test/CodeGen/R600/work-item-intrinsics.ll b/test/CodeGen/R600/work-item-intrinsics.ll
index 37c0e0f304ce..4328e964c1bf 100644
--- a/test/CodeGen/R600/work-item-intrinsics.ll
+++ b/test/CodeGen/R600/work-item-intrinsics.ll
@@ -1,14 +1,15 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}ngroups_x:
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
; EG: MOV [[VAL]], KC0[0].X
-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[VVAL]]
+; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
define void @ngroups_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.x() #0
@@ -21,8 +22,9 @@ entry:
; EG: MOV [[VAL]], KC0[0].Y
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[VVAL]]
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
define void @ngroups_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.y() #0
@@ -35,8 +37,9 @@ entry:
; EG: MOV [[VAL]], KC0[0].Z
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[VVAL]]
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
define void @ngroups_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.ngroups.z() #0
@@ -49,8 +52,9 @@ entry:
; EG: MOV [[VAL]], KC0[0].W
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[VVAL]]
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
define void @global_size_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.x() #0
@@ -63,8 +67,9 @@ entry:
; EG: MOV [[VAL]], KC0[1].X
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[VVAL]]
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
define void @global_size_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.y() #0
@@ -77,8 +82,9 @@ entry:
; EG: MOV [[VAL]], KC0[1].Y
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[VVAL]]
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
define void @global_size_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.global.size.z() #0
@@ -91,8 +97,9 @@ entry:
; EG: MOV [[VAL]], KC0[1].Z
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[VVAL]]
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
define void @local_size_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.local.size.x() #0
@@ -105,8 +112,9 @@ entry:
; EG: MOV [[VAL]], KC0[1].W
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[VVAL]]
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
define void @local_size_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.local.size.y() #0
@@ -119,8 +127,9 @@ entry:
; EG: MOV [[VAL]], KC0[2].X
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[VVAL]]
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
define void @local_size_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.local.size.z() #0
@@ -133,8 +142,9 @@ entry:
; EG: MOV [[VAL]], KC0[2].Z
; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[VVAL]]
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
define void @get_work_dim (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.AMDGPU.read.workdim() #0
@@ -147,8 +157,8 @@ entry:
; kernel arguments, but this may change in the future.
; FUNC-LABEL: {{^}}tgid_x:
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4
-; SI: buffer_store_dword [[VVAL]]
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4
+; GCN: buffer_store_dword [[VVAL]]
define void @tgid_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.x() #0
@@ -157,8 +167,8 @@ entry:
}
; FUNC-LABEL: {{^}}tgid_y:
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5
-; SI: buffer_store_dword [[VVAL]]
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5
+; GCN: buffer_store_dword [[VVAL]]
define void @tgid_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.y() #0
@@ -167,8 +177,8 @@ entry:
}
; FUNC-LABEL: {{^}}tgid_z:
-; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6
-; SI: buffer_store_dword [[VVAL]]
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6
+; GCN: buffer_store_dword [[VVAL]]
define void @tgid_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.z() #0
@@ -177,7 +187,7 @@ entry:
}
; FUNC-LABEL: {{^}}tidig_x:
-; SI: buffer_store_dword v0
+; GCN: buffer_store_dword v0
define void @tidig_x (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() #0
@@ -186,7 +196,7 @@ entry:
}
; FUNC-LABEL: {{^}}tidig_y:
-; SI: buffer_store_dword v1
+; GCN: buffer_store_dword v1
define void @tidig_y (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.y() #0
@@ -195,7 +205,7 @@ entry:
}
; FUNC-LABEL: {{^}}tidig_z:
-; SI: buffer_store_dword v2
+; GCN: buffer_store_dword v2
define void @tidig_z (i32 addrspace(1)* %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.z() #0
diff --git a/test/CodeGen/R600/wrong-transalu-pos-fix.ll b/test/CodeGen/R600/wrong-transalu-pos-fix.ll
index 4e77c07c0ea1..5ab465338e15 100644
--- a/test/CodeGen/R600/wrong-transalu-pos-fix.ll
+++ b/test/CodeGen/R600/wrong-transalu-pos-fix.ll
@@ -35,7 +35,7 @@ entry:
%z.i8.i = tail call i32 @llvm.r600.read.tidig.z() #1
%add.i = add i32 %z.i8.i, %mul33.i
%add13 = add i32 %add.i, %add
- %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %add13
+ %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %add13
store i32 %mul3, i32 addrspace(1)* %arrayidx, align 4
ret void
}
diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/R600/xor.ll
index b43ff4006473..089db59eabc7 100644
--- a/test/CodeGen/R600/xor.ll
+++ b/test/CodeGen/R600/xor.ll
@@ -11,8 +11,8 @@
; SI: v_xor_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
define void @xor_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
- %a = load <2 x i32> addrspace(1) * %in0
- %b = load <2 x i32> addrspace(1) * %in1
+ %a = load <2 x i32>, <2 x i32> addrspace(1) * %in0
+ %b = load <2 x i32>, <2 x i32> addrspace(1) * %in1
%result = xor <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
@@ -30,8 +30,8 @@ define void @xor_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in
; SI: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
- %a = load <4 x i32> addrspace(1) * %in0
- %b = load <4 x i32> addrspace(1) * %in1
+ %a = load <4 x i32>, <4 x i32> addrspace(1) * %in0
+ %b = load <4 x i32>, <4 x i32> addrspace(1) * %in1
%result = xor <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
@@ -40,15 +40,15 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
; FUNC-LABEL: {{^}}xor_i1:
; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
-; SI-DAG: v_cmp_ge_f32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 0
-; SI-DAG: v_cmp_ge_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 1.0
+; SI-DAG: v_cmp_le_f32_e32 [[CMP0:vcc]], 0, {{v[0-9]+}}
+; SI-DAG: v_cmp_le_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 1.0, {{v[0-9]+}}
; SI: s_xor_b64 [[XOR:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]]
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[XOR]]
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
- %a = load float addrspace(1) * %in0
- %b = load float addrspace(1) * %in1
+ %a = load float, float addrspace(1) * %in0
+ %b = load float, float addrspace(1) * %in1
%acmp = fcmp oge float %a, 0.000000e+00
%bcmp = fcmp oge float %b, 1.000000e+00
%xor = xor i1 %acmp, %bcmp
@@ -58,14 +58,14 @@ define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float ad
}
; FUNC-LABEL: {{^}}v_xor_i1:
-; SI: buffer_load_ubyte [[A:v[0-9]+]]
; SI: buffer_load_ubyte [[B:v[0-9]+]]
+; SI: buffer_load_ubyte [[A:v[0-9]+]]
; SI: v_xor_b32_e32 [[XOR:v[0-9]+]], [[A]], [[B]]
; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[XOR]]
; SI: buffer_store_byte [[RESULT]]
define void @v_xor_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in0, i1 addrspace(1)* %in1) {
- %a = load i1 addrspace(1)* %in0
- %b = load i1 addrspace(1)* %in1
+ %a = load i1, i1 addrspace(1)* %in0
+ %b = load i1, i1 addrspace(1)* %in1
%xor = xor i1 %a, %b
store i1 %xor, i1 addrspace(1)* %out
ret void
@@ -74,8 +74,8 @@ define void @v_xor_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in0, i1 addrspace
; FUNC-LABEL: {{^}}vector_xor_i32:
; SI: v_xor_b32_e32
define void @vector_xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
- %a = load i32 addrspace(1)* %in0
- %b = load i32 addrspace(1)* %in1
+ %a = load i32, i32 addrspace(1)* %in0
+ %b = load i32, i32 addrspace(1)* %in1
%result = xor i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -100,8 +100,8 @@ define void @scalar_not_i32(i32 addrspace(1)* %out, i32 %a) {
; FUNC-LABEL: {{^}}vector_not_i32:
; SI: v_not_b32
define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
- %a = load i32 addrspace(1)* %in0
- %b = load i32 addrspace(1)* %in1
+ %a = load i32, i32 addrspace(1)* %in0
+ %b = load i32, i32 addrspace(1)* %in1
%result = xor i32 %a, -1
store i32 %result, i32 addrspace(1)* %out
ret void
@@ -112,8 +112,8 @@ define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32
; SI: v_xor_b32_e32
; SI: s_endpgm
define void @vector_xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) {
- %a = load i64 addrspace(1)* %in0
- %b = load i64 addrspace(1)* %in1
+ %a = load i64, i64 addrspace(1)* %in0
+ %b = load i64, i64 addrspace(1)* %in1
%result = xor i64 %a, %b
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -140,8 +140,8 @@ define void @scalar_not_i64(i64 addrspace(1)* %out, i64 %a) {
; SI: v_not_b32
; SI: v_not_b32
define void @vector_not_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) {
- %a = load i64 addrspace(1)* %in0
- %b = load i64 addrspace(1)* %in1
+ %a = load i64, i64 addrspace(1)* %in0
+ %b = load i64, i64 addrspace(1)* %in1
%result = xor i64 %a, -1
store i64 %result, i64 addrspace(1)* %out
ret void
@@ -163,7 +163,7 @@ if:
br label %endif
else:
- %2 = load i64 addrspace(1)* %in
+ %2 = load i64, i64 addrspace(1)* %in
br label %endif
endif:
diff --git a/test/CodeGen/R600/zero_extend.ll b/test/CodeGen/R600/zero_extend.ll
index d052ee64c6f1..033055db185a 100644
--- a/test/CodeGen/R600/zero_extend.ll
+++ b/test/CodeGen/R600/zero_extend.ll
@@ -30,9 +30,9 @@ entry:
}
; SI-LABEL: {{^}}zext_i1_to_i64:
+; SI: s_mov_b32 s{{[0-9]+}}, 0
; SI: v_cmp_eq_i32
; SI: v_cndmask_b32
-; SI: s_mov_b32 s{{[0-9]+}}, 0
define void @zext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp eq i32 %a, %b
%ext = zext i1 %cmp to i64
diff --git a/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll b/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
index e8315f17ebb6..07e250b3c98a 100644
--- a/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
+++ b/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=sparc
+; RUN: llc < %s -march=sparc -no-integrated-as
; PR 1557
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
@@ -8,7 +8,7 @@ define internal void @set_fast_math() nounwind {
entry:
%fsr = alloca i32 ; <i32*> [#uses=4]
call void asm "st %fsr, $0", "=*m"(i32* %fsr) nounwind
- %0 = load i32* %fsr, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* %fsr, align 4 ; <i32> [#uses=1]
%1 = or i32 %0, 4194304 ; <i32> [#uses=1]
store i32 %1, i32* %fsr, align 4
call void asm sideeffect "ld $0, %fsr", "*m"(i32* %fsr) nounwind
diff --git a/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll b/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll
index c12e9c13409b..7975ee468232 100644
--- a/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll
+++ b/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll
@@ -7,7 +7,7 @@ module asm "\09.section\09\22.dtors\22,#alloc,#write"
define void @frame_dummy() nounwind {
entry:
- %asmtmp = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0"(void (i8*)* @_Jv_RegisterClasses) nounwind ; <void (i8*)*> [#uses=0]
+ %asmtmp = tail call void (i8*)* (void (i8*)*) asm "", "=r,0"(void (i8*)* @_Jv_RegisterClasses) nounwind ; <void (i8*)*> [#uses=0]
unreachable
}
diff --git a/test/CodeGen/SPARC/2009-08-28-PIC.ll b/test/CodeGen/SPARC/2009-08-28-PIC.ll
index b004b11b853b..baad2ae507d3 100644
--- a/test/CodeGen/SPARC/2009-08-28-PIC.ll
+++ b/test/CodeGen/SPARC/2009-08-28-PIC.ll
@@ -14,7 +14,7 @@
define i32 @func(i32 %a) nounwind readonly {
entry:
- %0 = load i32* @foo, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* @foo, align 4 ; <i32> [#uses=1]
ret i32 %0
}
@@ -36,7 +36,7 @@ entry:
br i1 %cmp, label %if.then, label %if.end
if.then:
- %ret = load i32* @foo, align 4
+ %ret = load i32, i32* @foo, align 4
ret i32 %ret
if.end:
diff --git a/test/CodeGen/SPARC/2011-01-11-CC.ll b/test/CodeGen/SPARC/2011-01-11-CC.ll
index 50f3a65ff9a9..6ea78dd7e169 100755
--- a/test/CodeGen/SPARC/2011-01-11-CC.ll
+++ b/test/CodeGen/SPARC/2011-01-11-CC.ll
@@ -177,13 +177,13 @@ define void @test_adde_sube(i8* %a, i8* %b, i8* %sum, i8* %diff) {
entry:
%0 = bitcast i8* %a to i128*
%1 = bitcast i8* %b to i128*
- %2 = load i128* %0
- %3 = load i128* %1
+ %2 = load i128, i128* %0
+ %3 = load i128, i128* %1
%4 = add i128 %2, %3
%5 = bitcast i8* %sum to i128*
store i128 %4, i128* %5
tail call void asm sideeffect "", "=*m,*m"(i128 *%0, i128* %5) nounwind
- %6 = load i128* %0
+ %6 = load i128, i128* %0
%7 = sub i128 %2, %6
%8 = bitcast i8* %diff to i128*
store i128 %7, i128* %8
diff --git a/test/CodeGen/SPARC/2011-01-11-Call.ll b/test/CodeGen/SPARC/2011-01-11-Call.ll
index 067bade16609..8097e49ad3f7 100644
--- a/test/CodeGen/SPARC/2011-01-11-Call.ll
+++ b/test/CodeGen/SPARC/2011-01-11-Call.ll
@@ -22,8 +22,8 @@
define void @test() nounwind {
entry:
- %0 = tail call i32 (...)* @foo() nounwind
- tail call void (...)* @bar() nounwind
+ %0 = tail call i32 (...) @foo() nounwind
+ tail call void (...) @bar() nounwind
ret void
}
@@ -48,6 +48,6 @@ declare void @bar(...)
define i32 @test_tail_call_with_return() nounwind {
entry:
- %0 = tail call i32 (...)* @foo() nounwind
+ %0 = tail call i32 (...) @foo() nounwind
ret i32 %0
}
diff --git a/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
index 8a3edc64b2da..29bca67e2d24 100644
--- a/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
+++ b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
@@ -66,7 +66,7 @@ entry:
br i1 %0, label %bb, label %bb1
bb: ; preds = %entry
- %1 = tail call i32 (...)* @foo(i32 %a) nounwind
+ %1 = tail call i32 (...) @foo(i32 %a) nounwind
ret i32 %1
bb1: ; preds = %entry
diff --git a/test/CodeGen/SPARC/2011-01-22-SRet.ll b/test/CodeGen/SPARC/2011-01-22-SRet.ll
index fc44bc495f46..ae9764e82084 100644
--- a/test/CodeGen/SPARC/2011-01-22-SRet.ll
+++ b/test/CodeGen/SPARC/2011-01-22-SRet.ll
@@ -7,11 +7,11 @@ entry:
;CHECK-LABEL: make_foo:
;CHECK: ld [%sp+64], {{.+}}
;CHECK: jmp %o7+12
- %0 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 0
+ %0 = getelementptr inbounds %struct.foo_t, %struct.foo_t* %agg.result, i32 0, i32 0
store i32 %a, i32* %0, align 4
- %1 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 1
+ %1 = getelementptr inbounds %struct.foo_t, %struct.foo_t* %agg.result, i32 0, i32 1
store i32 %b, i32* %1, align 4
- %2 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 2
+ %2 = getelementptr inbounds %struct.foo_t, %struct.foo_t* %agg.result, i32 0, i32 2
store i32 %c, i32* %2, align 4
ret void
}
@@ -24,12 +24,12 @@ entry:
;CHECK: unimp 12
%f = alloca %struct.foo_t, align 8
call void @make_foo(%struct.foo_t* noalias sret %f, i32 10, i32 20, i32 30) nounwind
- %0 = getelementptr inbounds %struct.foo_t* %f, i32 0, i32 0
- %1 = load i32* %0, align 8
- %2 = getelementptr inbounds %struct.foo_t* %f, i32 0, i32 1
- %3 = load i32* %2, align 4
- %4 = getelementptr inbounds %struct.foo_t* %f, i32 0, i32 2
- %5 = load i32* %4, align 8
+ %0 = getelementptr inbounds %struct.foo_t, %struct.foo_t* %f, i32 0, i32 0
+ %1 = load i32, i32* %0, align 8
+ %2 = getelementptr inbounds %struct.foo_t, %struct.foo_t* %f, i32 0, i32 1
+ %3 = load i32, i32* %2, align 4
+ %4 = getelementptr inbounds %struct.foo_t, %struct.foo_t* %f, i32 0, i32 2
+ %5 = load i32, i32* %4, align 8
%6 = add nsw i32 %3, %1
%7 = add nsw i32 %6, %5
ret i32 %7
diff --git a/test/CodeGen/SPARC/64abi.ll b/test/CodeGen/SPARC/64abi.ll
index a88e19a5e2d4..7c08998a1427 100644
--- a/test/CodeGen/SPARC/64abi.ll
+++ b/test/CodeGen/SPARC/64abi.ll
@@ -240,10 +240,10 @@ define void @call_inreg_ii(i32* %p, i32 %i1, i32 %i2) {
; CHECK: ldx [%i2], %i0
; CHECK: ldx [%i3], %i1
define { i64, i64 } @ret_i64_pair(i32 %a0, i32 %a1, i64* %p, i64* %q) {
- %r1 = load i64* %p
+ %r1 = load i64, i64* %p
%rv1 = insertvalue { i64, i64 } undef, i64 %r1, 0
store i64 0, i64* %p
- %r2 = load i64* %q
+ %r2 = load i64, i64* %q
%rv2 = insertvalue { i64, i64 } %rv1, i64 %r2, 1
ret { i64, i64 } %rv2
}
@@ -268,10 +268,10 @@ define void @call_ret_i64_pair(i64* %i0) {
; CHECK: ld [%i3], %f2
define { i32, float } @ret_i32_float_pair(i32 %a0, i32 %a1,
i32* %p, float* %q) {
- %r1 = load i32* %p
+ %r1 = load i32, i32* %p
%rv1 = insertvalue { i32, float } undef, i32 %r1, 0
store i32 0, i32* %p
- %r2 = load float* %q
+ %r2 = load float, float* %q
%rv2 = insertvalue { i32, float } %rv1, float %r2, 1
ret { i32, float } %rv2
}
@@ -297,10 +297,10 @@ define void @call_ret_i32_float_pair(i32* %i0, float* %i1) {
; CHECK: ld [%i3], %f1
define inreg { i32, float } @ret_i32_float_packed(i32 %a0, i32 %a1,
i32* %p, float* %q) {
- %r1 = load i32* %p
+ %r1 = load i32, i32* %p
%rv1 = insertvalue { i32, float } undef, i32 %r1, 0
store i32 0, i32* %p
- %r2 = load float* %q
+ %r2 = load float, float* %q
%rv2 = insertvalue { i32, float } %rv1, float %r2, 1
ret { i32, float } %rv2
}
@@ -329,10 +329,10 @@ define void @call_ret_i32_float_packed(i32* %i0, float* %i1) {
; CHECK: or [[R3]], [[R1]], %i0
define inreg { i32, i32 } @ret_i32_packed(i32 %a0, i32 %a1,
i32* %p, i32* %q) {
- %r1 = load i32* %p
+ %r1 = load i32, i32* %p
%rv1 = insertvalue { i32, i32 } undef, i32 %r1, 1
store i32 0, i32* %p
- %r2 = load i32* %q
+ %r2 = load i32, i32* %q
%rv2 = insertvalue { i32, i32 } %rv1, i32 %r2, 0
ret { i32, i32 } %rv2
}
@@ -405,7 +405,7 @@ entry:
define i32 @test_large_stack() {
entry:
%buffer1 = alloca [16384 x i8], align 8
- %buffer1.sub = getelementptr inbounds [16384 x i8]* %buffer1, i32 0, i32 0
+ %buffer1.sub = getelementptr inbounds [16384 x i8], [16384 x i8]* %buffer1, i32 0, i32 0
%0 = call i32 @use_buf(i32 16384, i8* %buffer1.sub)
ret i32 %0
}
@@ -436,7 +436,7 @@ declare i64 @receive_fp128(i64 %a, ...)
; CHECK: call receive_fp128
define i64 @test_fp128_variable_args(i64 %a, fp128 %b) {
entry:
- %0 = call i64 (i64, ...)* @receive_fp128(i64 %a, fp128 %b)
+ %0 = call i64 (i64, ...) @receive_fp128(i64 %a, fp128 %b)
ret i64 %0
}
diff --git a/test/CodeGen/SPARC/64bit.ll b/test/CodeGen/SPARC/64bit.ll
index b18f1bc0e837..274fa32fad35 100644
--- a/test/CodeGen/SPARC/64bit.ll
+++ b/test/CodeGen/SPARC/64bit.ll
@@ -140,17 +140,17 @@ define i64 @reg_imm_alu(i64 %x, i64 %y, i64 %z) {
; CHECK: ldsh [%i3]
; CHECK: sth %
define i64 @loads(i64* %p, i32* %q, i32* %r, i16* %s) {
- %a = load i64* %p
+ %a = load i64, i64* %p
%ai = add i64 1, %a
store i64 %ai, i64* %p
- %b = load i32* %q
+ %b = load i32, i32* %q
%b2 = zext i32 %b to i64
%bi = trunc i64 %ai to i32
store i32 %bi, i32* %q
- %c = load i32* %r
+ %c = load i32, i32* %r
%c2 = sext i32 %c to i64
store i64 %ai, i64* %p
- %d = load i16* %s
+ %d = load i16, i16* %s
%d2 = sext i16 %d to i64
%di = trunc i64 %ai to i16
store i16 %di, i16* %s
@@ -164,7 +164,7 @@ define i64 @loads(i64* %p, i32* %q, i32* %r, i16* %s) {
; CHECK: load_bool
; CHECK: ldub [%i0], %i0
define i64 @load_bool(i1* %p) {
- %a = load i1* %p
+ %a = load i1, i1* %p
%b = zext i1 %a to i64
ret i64 %b
}
@@ -176,20 +176,20 @@ define i64 @load_bool(i1* %p) {
; CHECK: sth [[R]], [%i2+40]
; CHECK: stb [[R]], [%i3+-20]
define void @stores(i64* %p, i32* %q, i16* %r, i8* %s) {
- %p1 = getelementptr i64* %p, i64 1
- %p2 = getelementptr i64* %p, i64 2
- %pv = load i64* %p1
+ %p1 = getelementptr i64, i64* %p, i64 1
+ %p2 = getelementptr i64, i64* %p, i64 2
+ %pv = load i64, i64* %p1
store i64 %pv, i64* %p2
- %q2 = getelementptr i32* %q, i32 -2
+ %q2 = getelementptr i32, i32* %q, i32 -2
%qv = trunc i64 %pv to i32
store i32 %qv, i32* %q2
- %r2 = getelementptr i16* %r, i16 20
+ %r2 = getelementptr i16, i16* %r, i16 20
%rv = trunc i64 %pv to i16
store i16 %rv, i16* %r2
- %s2 = getelementptr i8* %s, i8 -20
+ %s2 = getelementptr i8, i8* %s, i8 -20
%sv = trunc i64 %pv to i8
store i8 %sv, i8* %s2
@@ -200,8 +200,8 @@ define void @stores(i64* %p, i32* %q, i16* %r, i8* %s) {
; CHECK: ldub [%i0], [[R:%[goli][0-7]]]
; CHECK: sll [[R]], [[R]], %i0
define i8 @promote_shifts(i8* %p) {
- %L24 = load i8* %p
- %L32 = load i8* %p
+ %L24 = load i8, i8* %p
+ %L32 = load i8, i8* %p
%B36 = shl i8 %L24, %L32
ret i8 %B36
}
@@ -230,7 +230,7 @@ define i64 @unsigned_divide(i64 %a, i64 %b) {
define void @access_fi() {
entry:
%b = alloca [32 x i8], align 1
- %arraydecay = getelementptr inbounds [32 x i8]* %b, i64 0, i64 0
+ %arraydecay = getelementptr inbounds [32 x i8], [32 x i8]* %b, i64 0, i64 0
call void @g(i8* %arraydecay) #2
ret void
}
@@ -281,7 +281,7 @@ define double @bitcast_f64_i64(i64 %x) {
define i64 @store_zero(i64* nocapture %a, i64* nocapture %b) {
entry:
store i64 0, i64* %a, align 8
- %0 = getelementptr inbounds i64* %b, i32 1
+ %0 = getelementptr inbounds i64, i64* %b, i32 1
store i64 0, i64* %0, align 8
ret i64 0
}
diff --git a/test/CodeGen/SPARC/atomics.ll b/test/CodeGen/SPARC/atomics.ll
index ee6c1f8999b0..bea9a3374696 100644
--- a/test/CodeGen/SPARC/atomics.ll
+++ b/test/CodeGen/SPARC/atomics.ll
@@ -9,8 +9,8 @@
; CHECK: st {{.+}}, [%o2]
define i32 @test_atomic_i32(i32* %ptr1, i32* %ptr2, i32* %ptr3) {
entry:
- %0 = load atomic i32* %ptr1 acquire, align 8
- %1 = load atomic i32* %ptr2 acquire, align 8
+ %0 = load atomic i32, i32* %ptr1 acquire, align 8
+ %1 = load atomic i32, i32* %ptr2 acquire, align 8
%2 = add i32 %0, %1
store atomic i32 %2, i32* %ptr3 release, align 8
ret i32 %2
@@ -25,8 +25,8 @@ entry:
; CHECK: stx {{.+}}, [%o2]
define i64 @test_atomic_i64(i64* %ptr1, i64* %ptr2, i64* %ptr3) {
entry:
- %0 = load atomic i64* %ptr1 acquire, align 8
- %1 = load atomic i64* %ptr2 acquire, align 8
+ %0 = load atomic i64, i64* %ptr1 acquire, align 8
+ %1 = load atomic i64, i64* %ptr2 acquire, align 8
%2 = add i64 %0, %1
store atomic i64 %2, i64* %ptr3 release, align 8
ret i64 %2
diff --git a/test/CodeGen/SPARC/basictest.ll b/test/CodeGen/SPARC/basictest.ll
index ba858253287a..7b540074a35f 100644
--- a/test/CodeGen/SPARC/basictest.ll
+++ b/test/CodeGen/SPARC/basictest.ll
@@ -31,8 +31,56 @@ define i32 @test2(i32 %X, i32 %Y) {
define i32 @store_zero(i32* %a, i32* %b) {
entry:
store i32 0, i32* %a, align 4
- %0 = getelementptr inbounds i32* %b, i32 1
+ %0 = getelementptr inbounds i32, i32* %b, i32 1
store i32 0, i32* %0, align 4
ret i32 0
}
+; CHECK-LABEL: signed_divide:
+; CHECK: sra %o0, 31, %o2
+; CHECK: wr %o2, %g0, %y
+; CHECK: sdiv %o0, %o1, %o0
+define i32 @signed_divide(i32 %a, i32 %b) {
+ %r = sdiv i32 %a, %b
+ ret i32 %r
+}
+
+; CHECK-LABEL: unsigned_divide:
+; CHECK: wr %g0, %g0, %y
+; CHECK: udiv %o0, %o1, %o0
+define i32 @unsigned_divide(i32 %a, i32 %b) {
+ %r = udiv i32 %a, %b
+ ret i32 %r
+}
+
+; CHECK-LABEL: multiply_32x32:
+; CHECK: smul %o0, %o1, %o0
+define i32 @multiply_32x32(i32 %a, i32 %b) {
+ %r = mul i32 %a, %b
+ ret i32 %r
+}
+
+; CHECK-LABEL: signed_multiply_32x32_64:
+; CHECK: smul %o0, %o1, %o1
+; CHECK: rd %y, %o0
+define i64 @signed_multiply_32x32_64(i32 %a, i32 %b) {
+ %xa = sext i32 %a to i64
+ %xb = sext i32 %b to i64
+ %r = mul i64 %xa, %xb
+ ret i64 %r
+}
+
+; CHECK-LABEL: unsigned_multiply_32x32_64:
+; CHECK: umul %o0, %o1, %o2
+; CHECK: rd %y, %o2
+;FIXME: the smul in the output is totally redundant and should not there.
+; CHECK: smul %o0, %o1, %o1
+; CHECK: retl
+; CHECK: mov %o2, %o0
+define i64 @unsigned_multiply_32x32_64(i32 %a, i32 %b) {
+ %xa = zext i32 %a to i64
+ %xb = zext i32 %b to i64
+ %r = mul i64 %xa, %xb
+ ret i64 %r
+}
+
diff --git a/test/CodeGen/SPARC/empty-functions.ll b/test/CodeGen/SPARC/empty-functions.ll
index 38d288903d4a..1f8c5e3a312d 100644
--- a/test/CodeGen/SPARC/empty-functions.ll
+++ b/test/CodeGen/SPARC/empty-functions.ll
@@ -28,5 +28,5 @@ entry:
; LINUX-FP-NEXT: {{^}}.L{{.*}}:{{$}}
; LINUX-FP-NEXT: .cfi_register 15, 31
; LINUX-FP-NEXT: {{^}}.L{{.*}}:{{$}}
-; LINUX-FP-NEXT: .size func, .Ltmp3-func
+; LINUX-FP-NEXT: .size func, .Lfunc_end0-func
; LINUX-FP-NEXT: .cfi_endproc
diff --git a/test/CodeGen/SPARC/exception.ll b/test/CodeGen/SPARC/exception.ll
index eca9c8bf739f..0af48d0b64b8 100644
--- a/test/CodeGen/SPARC/exception.ll
+++ b/test/CodeGen/SPARC/exception.ll
@@ -121,13 +121,13 @@ entry:
"11": ; preds = %"8"
%10 = tail call i8* @__cxa_begin_catch(i8* %exc_ptr12) #1
- %11 = tail call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.cst, i32 0, i32 0))
+ %11 = tail call i32 @puts(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.cst, i32 0, i32 0))
tail call void @__cxa_end_catch() #1
br label %"5"
"13": ; preds = %8
%12 = tail call i8* @__cxa_begin_catch(i8* %exc_ptr12) #1
- %13 = tail call i32 @puts(i8* getelementptr inbounds ([14 x i8]* @.cst1, i32 0, i32 0))
+ %13 = tail call i32 @puts(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.cst1, i32 0, i32 0))
tail call void @__cxa_end_catch() #1
br label %"5"
}
diff --git a/test/CodeGen/SPARC/float.ll b/test/CodeGen/SPARC/float.ll
index 66367042cad9..d7a79cb05a82 100644
--- a/test/CodeGen/SPARC/float.ll
+++ b/test/CodeGen/SPARC/float.ll
@@ -154,11 +154,11 @@ entry:
; SPARC64: fitod
; SPARC64: fdtoi
-define void @test_itod_dtoi(i32 %a, i32* %ptr0, double* %ptr1) {
+define void @test_itod_dtoi(i32 %a, double %b, i32* %ptr0, double* %ptr1) {
entry:
%0 = sitofp i32 %a to double
store double %0, double* %ptr1, align 8
- %1 = fptosi double %0 to i32
+ %1 = fptosi double %b to i32
store i32 %1, i32* %ptr0, align 8
ret void
}
diff --git a/test/CodeGen/SPARC/fp128.ll b/test/CodeGen/SPARC/fp128.ll
index abd89bf264e1..c864cb7d599b 100644
--- a/test/CodeGen/SPARC/fp128.ll
+++ b/test/CodeGen/SPARC/fp128.ll
@@ -28,10 +28,10 @@
define void @f128_ops(fp128* noalias sret %scalar.result, fp128* byval %a, fp128* byval %b, fp128* byval %c, fp128* byval %d) {
entry:
- %0 = load fp128* %a, align 8
- %1 = load fp128* %b, align 8
- %2 = load fp128* %c, align 8
- %3 = load fp128* %d, align 8
+ %0 = load fp128, fp128* %a, align 8
+ %1 = load fp128, fp128* %b, align 8
+ %2 = load fp128, fp128* %c, align 8
+ %3 = load fp128, fp128* %d, align 8
%4 = fadd fp128 %0, %1
%5 = fsub fp128 %4, %2
%6 = fmul fp128 %5, %3
@@ -56,7 +56,7 @@ entry:
define void @f128_spill(fp128* noalias sret %scalar.result, fp128* byval %a) {
entry:
- %0 = load fp128* %a, align 8
+ %0 = load fp128, fp128* %a, align 8
call void asm sideeffect "", "~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"()
store fp128 %0, fp128* %scalar.result, align 8
ret void
@@ -71,8 +71,8 @@ entry:
define i32 @f128_compare(fp128* byval %f0, fp128* byval %f1, i32 %a, i32 %b) {
entry:
- %0 = load fp128* %f0, align 8
- %1 = load fp128* %f1, align 8
+ %0 = load fp128, fp128* %f0, align 8
+ %1 = load fp128, fp128* %f1, align 8
%cond = fcmp ult fp128 %0, %1
%ret = select i1 %cond, i32 %a, i32 %b
ret i32 %ret
@@ -107,7 +107,7 @@ entry:
define void @f128_abs(fp128* noalias sret %scalar.result, fp128* byval %a) {
entry:
- %0 = load fp128* %a, align 8
+ %0 = load fp128, fp128* %a, align 8
%1 = tail call fp128 @llvm.fabs.f128(fp128 %0)
store fp128 %1, fp128* %scalar.result, align 8
ret void
@@ -142,8 +142,8 @@ entry:
define void @fp128_unaligned(fp128* %a, fp128* %b, fp128* %c) {
entry:
- %0 = load fp128* %a, align 1
- %1 = load fp128* %b, align 1
+ %0 = load fp128, fp128* %a, align 1
+ %1 = load fp128, fp128* %b, align 1
%2 = fadd fp128 %0, %1
store fp128 %2, fp128* %c, align 1
ret void
@@ -173,8 +173,8 @@ entry:
define i32 @f128_to_i32(fp128* %a, fp128* %b) {
entry:
- %0 = load fp128* %a, align 8
- %1 = load fp128* %b, align 8
+ %0 = load fp128, fp128* %a, align 8
+ %1 = load fp128, fp128* %b, align 8
%2 = fptoui fp128 %0 to i32
%3 = fptosi fp128 %1 to i32
%4 = add i32 %2, %3
@@ -182,26 +182,28 @@ entry:
}
; HARD-LABEL: test_itoq_qtoi
-; HARD: call _Q_lltoq
-; HARD: call _Q_qtoll
-; HARD: fitoq
-; HARD: fqtoi
+; HARD-DAG: call _Q_lltoq
+; HARD-DAG: call _Q_qtoll
+; HARD-DAG: fitoq
+; HARD-DAG: fqtoi
; SOFT-LABEL: test_itoq_qtoi
-; SOFT: call _Q_lltoq
-; SOFT: call _Q_qtoll
-; SOFT: call _Q_itoq
-; SOFT: call _Q_qtoi
+; SOFT-DAG: call _Q_lltoq
+; SOFT-DAG: call _Q_qtoll
+; SOFT-DAG: call _Q_itoq
+; SOFT-DAG: call _Q_qtoi
-define void @test_itoq_qtoi(i64 %a, i32 %b, i64* %ptr0, fp128* %ptr1) {
+define void @test_itoq_qtoi(i64 %a, i32 %b, fp128* %c, fp128* %d, i64* %ptr0, fp128* %ptr1) {
entry:
%0 = sitofp i64 %a to fp128
store fp128 %0, fp128* %ptr1, align 8
- %1 = fptosi fp128 %0 to i64
+ %cval = load fp128, fp128* %c, align 8
+ %1 = fptosi fp128 %cval to i64
store i64 %1, i64* %ptr0, align 8
%2 = sitofp i32 %b to fp128
store fp128 %2, fp128* %ptr1, align 8
- %3 = fptosi fp128 %2 to i32
+ %dval = load fp128, fp128* %d, align 8
+ %3 = fptosi fp128 %dval to i32
%4 = bitcast i64* %ptr0 to i32*
store i32 %3, i32* %4, align 8
ret void
@@ -219,15 +221,17 @@ entry:
; SOFT-DAG: call _Q_utoq
; SOFT-DAG: call _Q_qtou
-define void @test_utoq_qtou(i64 %a, i32 %b, i64* %ptr0, fp128* %ptr1) {
+define void @test_utoq_qtou(i64 %a, i32 %b, fp128* %c, fp128* %d, i64* %ptr0, fp128* %ptr1) {
entry:
%0 = uitofp i64 %a to fp128
store fp128 %0, fp128* %ptr1, align 8
- %1 = fptoui fp128 %0 to i64
+ %cval = load fp128, fp128* %c, align 8
+ %1 = fptoui fp128 %cval to i64
store i64 %1, i64* %ptr0, align 8
%2 = uitofp i32 %b to fp128
store fp128 %2, fp128* %ptr1, align 8
- %3 = fptoui fp128 %2 to i32
+ %dval = load fp128, fp128* %d, align 8
+ %3 = fptoui fp128 %dval to i32
%4 = bitcast i64* %ptr0 to i32*
store i32 %3, i32* %4, align 8
ret void
@@ -238,7 +242,7 @@ entry:
define void @f128_neg(fp128* noalias sret %scalar.result, fp128* byval %a) {
entry:
- %0 = load fp128* %a, align 8
+ %0 = load fp128, fp128* %a, align 8
%1 = fsub fp128 0xL00000000000000008000000000000000, %0
store fp128 %1, fp128* %scalar.result, align 8
ret void
diff --git a/test/CodeGen/SPARC/globals.ll b/test/CodeGen/SPARC/globals.ll
index 3d3eba28af62..3ef135f6d70b 100644
--- a/test/CodeGen/SPARC/globals.ll
+++ b/test/CodeGen/SPARC/globals.ll
@@ -8,7 +8,7 @@
@G = external global i8
define zeroext i8 @loadG() {
- %tmp = load i8* @G
+ %tmp = load i8, i8* @G
ret i8 %tmp
}
diff --git a/test/CodeGen/SPARC/inlineasm.ll b/test/CodeGen/SPARC/inlineasm.ll
index 2650533b7fec..526cde8de8b4 100644
--- a/test/CodeGen/SPARC/inlineasm.ll
+++ b/test/CodeGen/SPARC/inlineasm.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=sparc <%s | FileCheck %s
+; RUN: llc -march=sparc -no-integrated-as <%s | FileCheck %s
; CHECK-LABEL: test_constraint_r
; CHECK: add %o1, %o0, %o0
diff --git a/test/CodeGen/SPARC/leafproc.ll b/test/CodeGen/SPARC/leafproc.ll
index abb8ed9be439..fd74e5ca723e 100644
--- a/test/CodeGen/SPARC/leafproc.ll
+++ b/test/CodeGen/SPARC/leafproc.ll
@@ -70,11 +70,11 @@ define i32 @leaf_proc_with_local_array(i32 %a, i32 %b, i32 %c) {
entry:
%array = alloca [2 x i32], align 4
%0 = sub nsw i32 %b, %c
- %1 = getelementptr inbounds [2 x i32]* %array, i32 0, i32 0
+ %1 = getelementptr inbounds [2 x i32], [2 x i32]* %array, i32 0, i32 0
store i32 1, i32* %1, align 4
- %2 = getelementptr inbounds [2 x i32]* %array, i32 0, i32 1
+ %2 = getelementptr inbounds [2 x i32], [2 x i32]* %array, i32 0, i32 1
store i32 2, i32* %2, align 4
- %3 = getelementptr inbounds [2 x i32]* %array, i32 0, i32 %a
- %4 = load i32* %3, align 4
+ %3 = getelementptr inbounds [2 x i32], [2 x i32]* %array, i32 0, i32 %a
+ %4 = load i32, i32* %3, align 4
ret i32 %4
}
diff --git a/test/CodeGen/SPARC/mult-alt-generic-sparc.ll b/test/CodeGen/SPARC/mult-alt-generic-sparc.ll
index 6013b17d9372..49496c304d23 100644
--- a/test/CodeGen/SPARC/mult-alt-generic-sparc.ll
+++ b/test/CodeGen/SPARC/mult-alt-generic-sparc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=sparc
+; RUN: llc < %s -march=sparc -no-integrated-as
; ModuleID = 'mult-alt-generic.c'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32"
target triple = "sparc"
@@ -33,10 +33,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -48,10 +48,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -63,7 +63,7 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
ret void
@@ -120,10 +120,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
@@ -137,15 +137,15 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
store i32 %2, i32* %out0, align 4
- %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %3, i32* %out0, align 4
; No lowering support.
; %4 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
@@ -159,14 +159,14 @@ define void @single_p() nounwind {
entry:
%out0 = alloca i32, align 4
store i32 0, i32* %out0, align 4
- %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %0, i32* %out0, align 4
ret void
}
define void @multi_m() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
ret void
}
@@ -191,10 +191,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -206,10 +206,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -221,7 +221,7 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
ret void
@@ -278,10 +278,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
@@ -295,15 +295,15 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
store i32 %2, i32* %out0, align 4
- %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %3, i32* %out0, align 4
; No lowering support.
; %4 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
@@ -317,7 +317,7 @@ define void @multi_p() nounwind {
entry:
%out0 = alloca i32, align 4
store i32 0, i32* %out0, align 4
- %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %0, i32* %out0, align 4
ret void
}
diff --git a/test/CodeGen/SPARC/obj-relocs.ll b/test/CodeGen/SPARC/obj-relocs.ll
index 6d57598795d4..115263ac5d46 100644
--- a/test/CodeGen/SPARC/obj-relocs.ll
+++ b/test/CodeGen/SPARC/obj-relocs.ll
@@ -21,7 +21,7 @@
define i64 @foo(i64 %a) {
entry:
- %0 = load i64* @AGlobalVar, align 4
+ %0 = load i64, i64* @AGlobalVar, align 4
%1 = add i64 %a, %0
%2 = call i64 @bar(i64 %1)
ret i64 %2
diff --git a/test/CodeGen/SPARC/private.ll b/test/CodeGen/SPARC/private.ll
index 38cea4ca6e4f..400d907e150f 100644
--- a/test/CodeGen/SPARC/private.ll
+++ b/test/CodeGen/SPARC/private.ll
@@ -11,7 +11,7 @@ define private void @foo() {
define i32 @bar() {
call void @foo()
- %1 = load i32* @baz, align 4
+ %1 = load i32, i32* @baz, align 4
ret i32 %1
}
diff --git a/test/CodeGen/SPARC/setjmp.ll b/test/CodeGen/SPARC/setjmp.ll
index 17afb36ca74c..17519c516273 100644
--- a/test/CodeGen/SPARC/setjmp.ll
+++ b/test/CodeGen/SPARC/setjmp.ll
@@ -26,16 +26,16 @@
; Function Attrs: nounwind
define i32 @foo(%struct.jmpbuf_env* byval %inbuf) #0 {
entry:
- %0 = getelementptr inbounds %struct.jmpbuf_env* %inbuf, i32 0, i32 0
+ %0 = getelementptr inbounds %struct.jmpbuf_env, %struct.jmpbuf_env* %inbuf, i32 0, i32 0
store i32 0, i32* %0, align 4, !tbaa !4
- %1 = getelementptr inbounds %struct.jmpbuf_env* %inbuf, i32 0, i32 1
+ %1 = getelementptr inbounds %struct.jmpbuf_env, %struct.jmpbuf_env* %inbuf, i32 0, i32 1
store i32 1, i32* %1, align 4, !tbaa !4
- %2 = getelementptr inbounds %struct.jmpbuf_env* %inbuf, i32 0, i32 2, i32 0
+ %2 = getelementptr inbounds %struct.jmpbuf_env, %struct.jmpbuf_env* %inbuf, i32 0, i32 2, i32 0
%3 = call i32 @_setjmp(%struct.__jmp_buf_tag* %2) #2
- %4 = getelementptr inbounds %struct.jmpbuf_env* %inbuf, i32 0, i32 3
+ %4 = getelementptr inbounds %struct.jmpbuf_env, %struct.jmpbuf_env* %inbuf, i32 0, i32 3
store i32 %3, i32* %4, align 4, !tbaa !4
store %struct.jmpbuf_env* %inbuf, %struct.jmpbuf_env** @jenv, align 4, !tbaa !3
- %5 = load i32* %1, align 4, !tbaa !4
+ %5 = load i32, i32* %1, align 4, !tbaa !4
%6 = icmp eq i32 %5, 1
%7 = icmp eq i32 %3, 0
%or.cond = and i1 %6, %7
@@ -46,8 +46,8 @@ entry:
unreachable
bar.exit: ; preds = %entry
- %8 = load i32* %0, align 4, !tbaa !4
- %9 = call i32 (i8*, ...)* @printf(i8* noalias getelementptr inbounds ([30 x i8]* @.cst, i32 0, i32 0), i32 %8) #0
+ %8 = load i32, i32* %0, align 4, !tbaa !4
+ %9 = call i32 (i8*, ...) @printf(i8* noalias getelementptr inbounds ([30 x i8], [30 x i8]* @.cst, i32 0, i32 0), i32 %8) #0
ret i32 0
}
diff --git a/test/CodeGen/SPARC/spillsize.ll b/test/CodeGen/SPARC/spillsize.ll
index 64f63f97d509..a82e5098ffd0 100644
--- a/test/CodeGen/SPARC/spillsize.ll
+++ b/test/CodeGen/SPARC/spillsize.ll
@@ -11,13 +11,13 @@ target triple = "sparcv9"
; CHECK: ldx [%fp+
define void @spill4(i64* nocapture %p) {
entry:
- %val0 = load i64* %p
+ %val0 = load i64, i64* %p
%cmp0 = icmp ult i64 %val0, 385672958347594845
%cm80 = zext i1 %cmp0 to i64
store i64 %cm80, i64* %p, align 8
tail call void asm sideeffect "", "~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{g2},~{g3},~{g4},~{g5},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o7}"()
- %arrayidx1 = getelementptr inbounds i64* %p, i64 1
- %val = load i64* %arrayidx1
+ %arrayidx1 = getelementptr inbounds i64, i64* %p, i64 1
+ %val = load i64, i64* %arrayidx1
%cmp = icmp ult i64 %val, 385672958347594845
%cm8 = select i1 %cmp, i64 10, i64 20
store i64 %cm8, i64* %arrayidx1, align 8
diff --git a/test/CodeGen/SPARC/tls.ll b/test/CodeGen/SPARC/tls.ll
index ce3e00539845..a70637b283f5 100644
--- a/test/CodeGen/SPARC/tls.ll
+++ b/test/CodeGen/SPARC/tls.ll
@@ -34,7 +34,7 @@
define i32 @test_tls_local() {
entry:
- %0 = load i32* @local_symbol, align 4
+ %0 = load i32, i32* @local_symbol, align 4
%1 = add i32 %0, 1
store i32 %1, i32* @local_symbol, align 4
ret i32 %1
@@ -68,7 +68,7 @@ entry:
define i32 @test_tls_extern() {
entry:
- %0 = load i32* @extern_symbol, align 4
+ %0 = load i32, i32* @extern_symbol, align 4
%1 = add i32 %0, 1
store i32 %1, i32* @extern_symbol, align 4
ret i32 %1
@@ -99,7 +99,7 @@ entry:
; v9abs-obj: ]
; pic-obj: Relocations [
-; pic-obj: Section (2) .rela.text {
+; pic-obj: Section {{.*}} .rela.text {
; pic-obj: 0x{{[0-9,A-F]+}} R_SPARC_PC22 _GLOBAL_OFFSET_TABLE_ 0x4
; pic-obj: 0x{{[0-9,A-F]+}} R_SPARC_PC10 _GLOBAL_OFFSET_TABLE_ 0x8
; pic-obj: 0x{{[0-9,A-F]+}} R_SPARC_TLS_LDO_HIX22 local_symbol 0x0
diff --git a/test/CodeGen/SPARC/varargs.ll b/test/CodeGen/SPARC/varargs.ll
index 76e16cd44f6a..c2d1e98b698b 100644
--- a/test/CodeGen/SPARC/varargs.ll
+++ b/test/CodeGen/SPARC/varargs.ll
@@ -24,8 +24,8 @@ entry:
for.cond:
%fmt.addr.0 = phi i8* [ %fmt, %entry ], [ %incdec.ptr, %for.cond.backedge ]
%sum.addr.0 = phi double [ %sum, %entry ], [ %sum.addr.0.be, %for.cond.backedge ]
- %incdec.ptr = getelementptr inbounds i8* %fmt.addr.0, i64 1
- %0 = load i8* %fmt.addr.0, align 1
+ %incdec.ptr = getelementptr inbounds i8, i8* %fmt.addr.0, i64 1
+ %0 = load i8, i8* %fmt.addr.0, align 1
%conv = sext i8 %0 to i32
switch i32 %conv, label %sw.default [
i32 105, label %sw.bb
@@ -71,6 +71,6 @@ declare void @llvm.va_start(i8*)
; CHECK: , %f2
define i32 @call_1d() #0 {
entry:
- %call = call double (i8*, double, ...)* @varargsfunc(i8* undef, double 1.000000e+00, double 2.000000e+00)
+ %call = call double (i8*, double, ...) @varargsfunc(i8* undef, double 1.000000e+00, double 2.000000e+00)
ret i32 1
}
diff --git a/test/CodeGen/SystemZ/Large/branch-range-01.py b/test/CodeGen/SystemZ/Large/branch-range-01.py
index edb631d8c6d5..365d7e420818 100644
--- a/test/CodeGen/SystemZ/Large/branch-range-01.py
+++ b/test/CodeGen/SystemZ/Large/branch-range-01.py
@@ -78,8 +78,8 @@ print ''
for i in xrange(branch_blocks):
next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
print 'before%d:' % i
- print ' %%bstop%d = getelementptr i32 *%%stop, i64 %d' % (i, i)
- print ' %%bcur%d = load i32 *%%bstop%d' % (i, i)
+ print ' %%bstop%d = getelementptr i32, i32 *%%stop, i64 %d' % (i, i)
+ print ' %%bcur%d = load i32 , i32 *%%bstop%d' % (i, i)
print ' %%btest%d = icmp eq i32 %%limit, %%bcur%d' % (i, i)
print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
print ''
@@ -90,12 +90,12 @@ for i in xrange(0, main_size, 6):
a, b = b, a + b
offset = 4096 + b % 500000
value = a % 256
- print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+ print ' %%ptr%d = getelementptr i8, i8 *%%base, i64 %d' % (i, offset)
print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
for i in xrange(branch_blocks):
- print ' %%astop%d = getelementptr i32 *%%stop, i64 %d' % (i, i + 25)
- print ' %%acur%d = load i32 *%%astop%d' % (i, i)
+ print ' %%astop%d = getelementptr i32, i32 *%%stop, i64 %d' % (i, i + 25)
+ print ' %%acur%d = load i32 , i32 *%%astop%d' % (i, i)
print ' %%atest%d = icmp eq i32 %%limit, %%acur%d' % (i, i)
print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
print ''
diff --git a/test/CodeGen/SystemZ/Large/branch-range-02.py b/test/CodeGen/SystemZ/Large/branch-range-02.py
index 743e12de0f1f..7f7b09954a61 100644
--- a/test/CodeGen/SystemZ/Large/branch-range-02.py
+++ b/test/CodeGen/SystemZ/Large/branch-range-02.py
@@ -71,8 +71,8 @@ for i in xrange(blocks):
other = 'end' if 2 * i < blocks else 'b0'
print 'b%d:' % i
print ' store volatile i8 %d, i8 *%%base' % value
- print ' %%astop%d = getelementptr i32 *%%stop, i64 %d' % (i, i)
- print ' %%acur%d = load i32 *%%astop%d' % (i, i)
+ print ' %%astop%d = getelementptr i32, i32 *%%stop, i64 %d' % (i, i)
+ print ' %%acur%d = load i32 , i32 *%%astop%d' % (i, i)
print ' %%atest%d = icmp eq i32 %%limit, %%acur%d' % (i, i)
print ' br i1 %%atest%d, label %%%s, label %%%s' % (i, other, next)
diff --git a/test/CodeGen/SystemZ/Large/branch-range-03.py b/test/CodeGen/SystemZ/Large/branch-range-03.py
index 5c9a93b87f73..745d733211ff 100644
--- a/test/CodeGen/SystemZ/Large/branch-range-03.py
+++ b/test/CodeGen/SystemZ/Large/branch-range-03.py
@@ -78,8 +78,8 @@ print ''
for i in xrange(branch_blocks):
next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
print 'before%d:' % i
- print ' %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i)
- print ' %%bcur%d = load i8 *%%bstop%d' % (i, i)
+ print ' %%bstop%d = getelementptr i8, i8 *%%stop, i64 %d' % (i, i)
+ print ' %%bcur%d = load i8 , i8 *%%bstop%d' % (i, i)
print ' %%bext%d = sext i8 %%bcur%d to i32' % (i, i)
print ' %%btest%d = icmp eq i32 %%limit, %%bext%d' % (i, i)
print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
@@ -91,12 +91,12 @@ for i in xrange(0, main_size, 6):
a, b = b, a + b
offset = 4096 + b % 500000
value = a % 256
- print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+ print ' %%ptr%d = getelementptr i8, i8 *%%base, i64 %d' % (i, offset)
print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
for i in xrange(branch_blocks):
- print ' %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25)
- print ' %%acur%d = load i8 *%%astop%d' % (i, i)
+ print ' %%astop%d = getelementptr i8, i8 *%%stop, i64 %d' % (i, i + 25)
+ print ' %%acur%d = load i8 , i8 *%%astop%d' % (i, i)
print ' %%aext%d = sext i8 %%acur%d to i32' % (i, i)
print ' %%atest%d = icmp eq i32 %%limit, %%aext%d' % (i, i)
print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
diff --git a/test/CodeGen/SystemZ/Large/branch-range-04.py b/test/CodeGen/SystemZ/Large/branch-range-04.py
index 2c9090fa2067..a0c9c4426456 100644
--- a/test/CodeGen/SystemZ/Large/branch-range-04.py
+++ b/test/CodeGen/SystemZ/Large/branch-range-04.py
@@ -82,8 +82,8 @@ print ''
for i in xrange(branch_blocks):
next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
print 'before%d:' % i
- print ' %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i)
- print ' %%bcur%d = load i8 *%%bstop%d' % (i, i)
+ print ' %%bstop%d = getelementptr i8, i8 *%%stop, i64 %d' % (i, i)
+ print ' %%bcur%d = load i8 , i8 *%%bstop%d' % (i, i)
print ' %%bext%d = sext i8 %%bcur%d to i64' % (i, i)
print ' %%btest%d = icmp eq i64 %%limit, %%bext%d' % (i, i)
print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
@@ -95,12 +95,12 @@ for i in xrange(0, main_size, 6):
a, b = b, a + b
offset = 4096 + b % 500000
value = a % 256
- print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+ print ' %%ptr%d = getelementptr i8, i8 *%%base, i64 %d' % (i, offset)
print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
for i in xrange(branch_blocks):
- print ' %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25)
- print ' %%acur%d = load i8 *%%astop%d' % (i, i)
+ print ' %%astop%d = getelementptr i8, i8 *%%stop, i64 %d' % (i, i + 25)
+ print ' %%acur%d = load i8 , i8 *%%astop%d' % (i, i)
print ' %%aext%d = sext i8 %%acur%d to i64' % (i, i)
print ' %%atest%d = icmp eq i64 %%limit, %%aext%d' % (i, i)
print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
diff --git a/test/CodeGen/SystemZ/Large/branch-range-05.py b/test/CodeGen/SystemZ/Large/branch-range-05.py
index 52f4a961c88f..69a8112162a0 100644
--- a/test/CodeGen/SystemZ/Large/branch-range-05.py
+++ b/test/CodeGen/SystemZ/Large/branch-range-05.py
@@ -82,7 +82,7 @@ print ''
for i in xrange(branch_blocks):
next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
print 'before%d:' % i
- print ' %%bcur%d = load i8 *%%stop' % i
+ print ' %%bcur%d = load i8 , i8 *%%stop' % i
print ' %%bext%d = sext i8 %%bcur%d to i32' % (i, i)
print ' %%btest%d = icmp slt i32 %%bext%d, %d' % (i, i, i + 50)
print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
@@ -94,11 +94,11 @@ for i in xrange(0, main_size, 6):
a, b = b, a + b
offset = 4096 + b % 500000
value = a % 256
- print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+ print ' %%ptr%d = getelementptr i8, i8 *%%base, i64 %d' % (i, offset)
print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
for i in xrange(branch_blocks):
- print ' %%acur%d = load i8 *%%stop' % i
+ print ' %%acur%d = load i8 , i8 *%%stop' % i
print ' %%aext%d = sext i8 %%acur%d to i32' % (i, i)
print ' %%atest%d = icmp slt i32 %%aext%d, %d' % (i, i, i + 100)
print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
diff --git a/test/CodeGen/SystemZ/Large/branch-range-06.py b/test/CodeGen/SystemZ/Large/branch-range-06.py
index c34ebac4ce36..b08bc119c454 100644
--- a/test/CodeGen/SystemZ/Large/branch-range-06.py
+++ b/test/CodeGen/SystemZ/Large/branch-range-06.py
@@ -82,7 +82,7 @@ print ''
for i in xrange(branch_blocks):
next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
print 'before%d:' % i
- print ' %%bcur%d = load i8 *%%stop' % i
+ print ' %%bcur%d = load i8 , i8 *%%stop' % i
print ' %%bext%d = sext i8 %%bcur%d to i64' % (i, i)
print ' %%btest%d = icmp slt i64 %%bext%d, %d' % (i, i, i + 50)
print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
@@ -94,11 +94,11 @@ for i in xrange(0, main_size, 6):
a, b = b, a + b
offset = 4096 + b % 500000
value = a % 256
- print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+ print ' %%ptr%d = getelementptr i8, i8 *%%base, i64 %d' % (i, offset)
print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
for i in xrange(branch_blocks):
- print ' %%acur%d = load i8 *%%stop' % i
+ print ' %%acur%d = load i8 , i8 *%%stop' % i
print ' %%aext%d = sext i8 %%acur%d to i64' % (i, i)
print ' %%atest%d = icmp slt i64 %%aext%d, %d' % (i, i, i + 100)
print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
diff --git a/test/CodeGen/SystemZ/Large/branch-range-07.py b/test/CodeGen/SystemZ/Large/branch-range-07.py
index 90c442092e82..c5fef10577f6 100644
--- a/test/CodeGen/SystemZ/Large/branch-range-07.py
+++ b/test/CodeGen/SystemZ/Large/branch-range-07.py
@@ -39,8 +39,8 @@ print 'define void @f1(i8 *%base, i32 *%counts) {'
print 'entry:'
for i in xrange(branch_blocks - 1, -1, -1):
- print ' %%countptr%d = getelementptr i32 *%%counts, i64 %d' % (i, i)
- print ' %%initcount%d = load i32 *%%countptr%d' % (i, i)
+ print ' %%countptr%d = getelementptr i32, i32 *%%counts, i64 %d' % (i, i)
+ print ' %%initcount%d = load i32 , i32 *%%countptr%d' % (i, i)
print ' br label %%loop%d' % i
print 'loop%d:' % i
@@ -54,7 +54,7 @@ for i in xrange(0, main_size, 6):
a, b = b, a + b
offset = 4096 + b % 500000
value = a % 256
- print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+ print ' %%ptr%d = getelementptr i8, i8 *%%base, i64 %d' % (i, offset)
print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
for i in xrange(branch_blocks):
diff --git a/test/CodeGen/SystemZ/Large/branch-range-08.py b/test/CodeGen/SystemZ/Large/branch-range-08.py
index ac1b1370a3e3..8b6b67398337 100644
--- a/test/CodeGen/SystemZ/Large/branch-range-08.py
+++ b/test/CodeGen/SystemZ/Large/branch-range-08.py
@@ -40,8 +40,8 @@ print 'define void @f1(i8 *%base, i64 *%counts) {'
print 'entry:'
for i in xrange(branch_blocks - 1, -1, -1):
- print ' %%countptr%d = getelementptr i64 *%%counts, i64 %d' % (i, i)
- print ' %%initcount%d = load i64 *%%countptr%d' % (i, i)
+ print ' %%countptr%d = getelementptr i64, i64 *%%counts, i64 %d' % (i, i)
+ print ' %%initcount%d = load i64 , i64 *%%countptr%d' % (i, i)
print ' br label %%loop%d' % i
print 'loop%d:' % i
@@ -55,7 +55,7 @@ for i in xrange(0, main_size, 6):
a, b = b, a + b
offset = 4096 + b % 500000
value = a % 256
- print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+ print ' %%ptr%d = getelementptr i8, i8 *%%base, i64 %d' % (i, offset)
print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
for i in xrange(branch_blocks):
diff --git a/test/CodeGen/SystemZ/Large/branch-range-09.py b/test/CodeGen/SystemZ/Large/branch-range-09.py
index bc712cb164ea..d4693358f502 100644
--- a/test/CodeGen/SystemZ/Large/branch-range-09.py
+++ b/test/CodeGen/SystemZ/Large/branch-range-09.py
@@ -78,8 +78,8 @@ print ''
for i in xrange(branch_blocks):
next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
print 'before%d:' % i
- print ' %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i)
- print ' %%bcur%d = load i8 *%%bstop%d' % (i, i)
+ print ' %%bstop%d = getelementptr i8, i8 *%%stop, i64 %d' % (i, i)
+ print ' %%bcur%d = load i8 , i8 *%%bstop%d' % (i, i)
print ' %%bext%d = sext i8 %%bcur%d to i32' % (i, i)
print ' %%btest%d = icmp ult i32 %%limit, %%bext%d' % (i, i)
print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
@@ -91,12 +91,12 @@ for i in xrange(0, main_size, 6):
a, b = b, a + b
offset = 4096 + b % 500000
value = a % 256
- print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+ print ' %%ptr%d = getelementptr i8, i8 *%%base, i64 %d' % (i, offset)
print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
for i in xrange(branch_blocks):
- print ' %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25)
- print ' %%acur%d = load i8 *%%astop%d' % (i, i)
+ print ' %%astop%d = getelementptr i8, i8 *%%stop, i64 %d' % (i, i + 25)
+ print ' %%acur%d = load i8 , i8 *%%astop%d' % (i, i)
print ' %%aext%d = sext i8 %%acur%d to i32' % (i, i)
print ' %%atest%d = icmp ult i32 %%limit, %%aext%d' % (i, i)
print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
diff --git a/test/CodeGen/SystemZ/Large/branch-range-10.py b/test/CodeGen/SystemZ/Large/branch-range-10.py
index 8c483c33724c..c928081f5544 100644
--- a/test/CodeGen/SystemZ/Large/branch-range-10.py
+++ b/test/CodeGen/SystemZ/Large/branch-range-10.py
@@ -82,8 +82,8 @@ print ''
for i in xrange(branch_blocks):
next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
print 'before%d:' % i
- print ' %%bstop%d = getelementptr i8 *%%stop, i64 %d' % (i, i)
- print ' %%bcur%d = load i8 *%%bstop%d' % (i, i)
+ print ' %%bstop%d = getelementptr i8, i8 *%%stop, i64 %d' % (i, i)
+ print ' %%bcur%d = load i8 , i8 *%%bstop%d' % (i, i)
print ' %%bext%d = sext i8 %%bcur%d to i64' % (i, i)
print ' %%btest%d = icmp ult i64 %%limit, %%bext%d' % (i, i)
print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
@@ -95,12 +95,12 @@ for i in xrange(0, main_size, 6):
a, b = b, a + b
offset = 4096 + b % 500000
value = a % 256
- print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+ print ' %%ptr%d = getelementptr i8, i8 *%%base, i64 %d' % (i, offset)
print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
for i in xrange(branch_blocks):
- print ' %%astop%d = getelementptr i8 *%%stop, i64 %d' % (i, i + 25)
- print ' %%acur%d = load i8 *%%astop%d' % (i, i)
+ print ' %%astop%d = getelementptr i8, i8 *%%stop, i64 %d' % (i, i + 25)
+ print ' %%acur%d = load i8 , i8 *%%astop%d' % (i, i)
print ' %%aext%d = sext i8 %%acur%d to i64' % (i, i)
print ' %%atest%d = icmp ult i64 %%limit, %%aext%d' % (i, i)
print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
diff --git a/test/CodeGen/SystemZ/Large/branch-range-11.py b/test/CodeGen/SystemZ/Large/branch-range-11.py
index 054610380e31..85166bc15656 100644
--- a/test/CodeGen/SystemZ/Large/branch-range-11.py
+++ b/test/CodeGen/SystemZ/Large/branch-range-11.py
@@ -98,8 +98,8 @@ print ''
for i in xrange(branch_blocks):
next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
print 'before%d:' % i
- print ' %%bcur%da = load i32 *%%stopa' % i
- print ' %%bcur%db = load i32 *%%stopb' % i
+ print ' %%bcur%da = load i32 , i32 *%%stopa' % i
+ print ' %%bcur%db = load i32 , i32 *%%stopb' % i
print ' %%bsub%d = sub i32 %%bcur%da, %%bcur%db' % (i, i, i)
print ' %%btest%d = icmp ult i32 %%bsub%d, %d' % (i, i, i + 50)
print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
@@ -111,12 +111,12 @@ for i in xrange(0, main_size, 6):
a, b = b, a + b
offset = 4096 + b % 500000
value = a % 256
- print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+ print ' %%ptr%d = getelementptr i8, i8 *%%base, i64 %d' % (i, offset)
print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
for i in xrange(branch_blocks):
- print ' %%acur%da = load i32 *%%stopa' % i
- print ' %%acur%db = load i32 *%%stopb' % i
+ print ' %%acur%da = load i32 , i32 *%%stopa' % i
+ print ' %%acur%db = load i32 , i32 *%%stopb' % i
print ' %%asub%d = sub i32 %%acur%da, %%acur%db' % (i, i, i)
print ' %%atest%d = icmp ult i32 %%asub%d, %d' % (i, i, i + 100)
print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
diff --git a/test/CodeGen/SystemZ/Large/branch-range-12.py b/test/CodeGen/SystemZ/Large/branch-range-12.py
index 626c8998d5d4..e1d9e2977d41 100644
--- a/test/CodeGen/SystemZ/Large/branch-range-12.py
+++ b/test/CodeGen/SystemZ/Large/branch-range-12.py
@@ -98,8 +98,8 @@ print ''
for i in xrange(branch_blocks):
next = 'before%d' % (i + 1) if i + 1 < branch_blocks else 'main'
print 'before%d:' % i
- print ' %%bcur%da = load i64 *%%stopa' % i
- print ' %%bcur%db = load i64 *%%stopb' % i
+ print ' %%bcur%da = load i64 , i64 *%%stopa' % i
+ print ' %%bcur%db = load i64 , i64 *%%stopb' % i
print ' %%bsub%d = sub i64 %%bcur%da, %%bcur%db' % (i, i, i)
print ' %%btest%d = icmp ult i64 %%bsub%d, %d' % (i, i, i + 50)
print ' br i1 %%btest%d, label %%after0, label %%%s' % (i, next)
@@ -111,12 +111,12 @@ for i in xrange(0, main_size, 6):
a, b = b, a + b
offset = 4096 + b % 500000
value = a % 256
- print ' %%ptr%d = getelementptr i8 *%%base, i64 %d' % (i, offset)
+ print ' %%ptr%d = getelementptr i8, i8 *%%base, i64 %d' % (i, offset)
print ' store volatile i8 %d, i8 *%%ptr%d' % (value, i)
for i in xrange(branch_blocks):
- print ' %%acur%da = load i64 *%%stopa' % i
- print ' %%acur%db = load i64 *%%stopb' % i
+ print ' %%acur%da = load i64 , i64 *%%stopa' % i
+ print ' %%acur%db = load i64 , i64 *%%stopb' % i
print ' %%asub%d = sub i64 %%acur%da, %%acur%db' % (i, i, i)
print ' %%atest%d = icmp ult i64 %%asub%d, %d' % (i, i, i + 100)
print ' br i1 %%atest%d, label %%main, label %%after%d' % (i, i)
diff --git a/test/CodeGen/SystemZ/Large/lit.local.cfg b/test/CodeGen/SystemZ/Large/lit.local.cfg
index 4f22a970c3a6..d9d464726135 100644
--- a/test/CodeGen/SystemZ/Large/lit.local.cfg
+++ b/test/CodeGen/SystemZ/Large/lit.local.cfg
@@ -1,8 +1,8 @@
config.suffixes = ['.py']
# These tests take on the order of seconds to run, so skip them unless
-# running natively.
-if config.root.host_arch not in ['SystemZ']:
+# we're running long tests.
+if 'long_tests' not in config.available_features:
config.unsupported = True
if not 'SystemZ' in config.root.targets:
diff --git a/test/CodeGen/SystemZ/Large/spill-01.py b/test/CodeGen/SystemZ/Large/spill-01.py
index 3c1d0b611bb4..f59f607d5beb 100644
--- a/test/CodeGen/SystemZ/Large/spill-01.py
+++ b/test/CodeGen/SystemZ/Large/spill-01.py
@@ -25,8 +25,8 @@ print ''
print 'define void @f1(i64 *%base0, i64 *%base1) {'
for i in range(count):
- print ' %%ptr%d = getelementptr i64 *%%base%d, i64 %d' % (i, i % 2, i / 2)
- print ' %%val%d = load i64 *%%ptr%d' % (i, i)
+ print ' %%ptr%d = getelementptr i64, i64 *%%base%d, i64 %d' % (i, i % 2, i / 2)
+ print ' %%val%d = load i64 , i64 *%%ptr%d' % (i, i)
print ''
print ' call void @foo()'
diff --git a/test/CodeGen/SystemZ/Large/spill-02.py b/test/CodeGen/SystemZ/Large/spill-02.py
index 0aa43d18054b..4ccfa11a0d3e 100644
--- a/test/CodeGen/SystemZ/Large/spill-02.py
+++ b/test/CodeGen/SystemZ/Large/spill-02.py
@@ -29,7 +29,7 @@ print 'entry:'
# Make the allocation big, so that it goes at the top of the frame.
print ' %array = alloca [1000 x i64]'
-print ' %area = getelementptr [1000 x i64] *%array, i64 0, i64 0'
+print ' %area = getelementptr [1000 x i64], [1000 x i64] *%array, i64 0, i64 0'
print ' %%base = call i64 *@foo(i64 *%%area%s)' % (', i64 0' * args)
print ''
@@ -37,8 +37,8 @@ print ''
# another for %base, so we need 14 live values.
count = 14
for i in range(count):
- print ' %%ptr%d = getelementptr i64 *%%base, i64 %d' % (i, i / 2)
- print ' %%val%d = load volatile i64 *%%ptr%d' % (i, i)
+ print ' %%ptr%d = getelementptr i64, i64 *%%base, i64 %d' % (i, i / 2)
+ print ' %%val%d = load volatile i64 , i64 *%%ptr%d' % (i, i)
print ''
# Encourage the register allocator to give preference to these %vals
diff --git a/test/CodeGen/SystemZ/addr-01.ll b/test/CodeGen/SystemZ/addr-01.ll
index d0960cdb1047..736efe8887d6 100644
--- a/test/CodeGen/SystemZ/addr-01.ll
+++ b/test/CodeGen/SystemZ/addr-01.ll
@@ -10,7 +10,7 @@ define void @f1(i64 %addr, i64 %index) {
; CHECK: br %r14
%add = add i64 %addr, %index
%ptr = inttoptr i64 %add to i8 *
- %a = load volatile i8 *%ptr
+ %a = load volatile i8 , i8 *%ptr
ret void
}
@@ -22,7 +22,7 @@ define void @f2(i64 %addr, i64 %index) {
%add1 = add i64 %addr, %index
%add2 = add i64 %add1, 100
%ptr = inttoptr i64 %add2 to i8 *
- %a = load volatile i8 *%ptr
+ %a = load volatile i8 , i8 *%ptr
ret void
}
@@ -34,7 +34,7 @@ define void @f3(i64 %addr, i64 %index) {
%add1 = add i64 %addr, 100
%add2 = add i64 %add1, %index
%ptr = inttoptr i64 %add2 to i8 *
- %a = load volatile i8 *%ptr
+ %a = load volatile i8 , i8 *%ptr
ret void
}
@@ -46,7 +46,7 @@ define void @f4(i64 %addr, i64 %index) {
%add1 = add i64 %addr, %index
%add2 = sub i64 %add1, 100
%ptr = inttoptr i64 %add2 to i8 *
- %a = load volatile i8 *%ptr
+ %a = load volatile i8 , i8 *%ptr
ret void
}
@@ -58,7 +58,7 @@ define void @f5(i64 %addr, i64 %index) {
%add1 = sub i64 %addr, 100
%add2 = add i64 %add1, %index
%ptr = inttoptr i64 %add2 to i8 *
- %a = load volatile i8 *%ptr
+ %a = load volatile i8 , i8 *%ptr
ret void
}
@@ -72,7 +72,7 @@ define void @f6(i64 %addr, i64 %index) {
%or = or i64 %aligned, 6
%add = add i64 %or, %index
%ptr = inttoptr i64 %add to i8 *
- %a = load volatile i8 *%ptr
+ %a = load volatile i8 , i8 *%ptr
ret void
}
@@ -85,7 +85,7 @@ define void @f7(i64 %addr, i64 %index) {
%or = or i64 %addr, 6
%add = add i64 %or, %index
%ptr = inttoptr i64 %add to i8 *
- %a = load volatile i8 *%ptr
+ %a = load volatile i8 , i8 *%ptr
ret void
}
@@ -102,6 +102,6 @@ define void @f8(i64 %addr, i64 %index) {
%add = add i64 %aligned, %index
%or = or i64 %add, 6
%ptr = inttoptr i64 %or to i8 *
- %a = load volatile i8 *%ptr
+ %a = load volatile i8 , i8 *%ptr
ret void
}
diff --git a/test/CodeGen/SystemZ/addr-02.ll b/test/CodeGen/SystemZ/addr-02.ll
index 56c48794b072..7e9b2f18ef82 100644
--- a/test/CodeGen/SystemZ/addr-02.ll
+++ b/test/CodeGen/SystemZ/addr-02.ll
@@ -11,7 +11,7 @@ define void @f1(i64 %addr, i64 %index, i8 **%dst) {
; CHECK: br %r14
%add = add i64 %addr, %index
%ptr = inttoptr i64 %add to i8 *
- %a = load volatile i8 *%ptr
+ %a = load volatile i8 , i8 *%ptr
store volatile i8 *%ptr, i8 **%dst
ret void
}
@@ -24,7 +24,7 @@ define void @f2(i64 %addr, i64 %index, i8 **%dst) {
%add1 = add i64 %addr, %index
%add2 = add i64 %add1, 100
%ptr = inttoptr i64 %add2 to i8 *
- %a = load volatile i8 *%ptr
+ %a = load volatile i8 , i8 *%ptr
store volatile i8 *%ptr, i8 **%dst
ret void
}
@@ -37,7 +37,7 @@ define void @f3(i64 %addr, i64 %index, i8 **%dst) {
%add1 = add i64 %addr, 100
%add2 = add i64 %add1, %index
%ptr = inttoptr i64 %add2 to i8 *
- %a = load volatile i8 *%ptr
+ %a = load volatile i8 , i8 *%ptr
store volatile i8 *%ptr, i8 **%dst
ret void
}
@@ -50,7 +50,7 @@ define void @f4(i64 %addr, i64 %index, i8 **%dst) {
%add1 = add i64 %addr, %index
%add2 = sub i64 %add1, 100
%ptr = inttoptr i64 %add2 to i8 *
- %a = load volatile i8 *%ptr
+ %a = load volatile i8 , i8 *%ptr
store volatile i8 *%ptr, i8 **%dst
ret void
}
@@ -63,7 +63,7 @@ define void @f5(i64 %addr, i64 %index, i8 **%dst) {
%add1 = sub i64 %addr, 100
%add2 = add i64 %add1, %index
%ptr = inttoptr i64 %add2 to i8 *
- %a = load volatile i8 *%ptr
+ %a = load volatile i8 , i8 *%ptr
store volatile i8 *%ptr, i8 **%dst
ret void
}
@@ -78,7 +78,7 @@ define void @f6(i64 %addr, i64 %index, i8 **%dst) {
%or = or i64 %aligned, 6
%add = add i64 %or, %index
%ptr = inttoptr i64 %add to i8 *
- %a = load volatile i8 *%ptr
+ %a = load volatile i8 , i8 *%ptr
store volatile i8 *%ptr, i8 **%dst
ret void
}
@@ -92,7 +92,7 @@ define void @f7(i64 %addr, i64 %index, i8 **%dst) {
%or = or i64 %addr, 6
%add = add i64 %or, %index
%ptr = inttoptr i64 %add to i8 *
- %a = load volatile i8 *%ptr
+ %a = load volatile i8 , i8 *%ptr
store volatile i8 *%ptr, i8 **%dst
ret void
}
@@ -110,7 +110,7 @@ define void @f8(i64 %addr, i64 %index, i8 **%dst) {
%add = add i64 %aligned, %index
%or = or i64 %add, 6
%ptr = inttoptr i64 %or to i8 *
- %a = load volatile i8 *%ptr
+ %a = load volatile i8 , i8 *%ptr
store volatile i8 *%ptr, i8 **%dst
ret void
}
diff --git a/test/CodeGen/SystemZ/addr-03.ll b/test/CodeGen/SystemZ/addr-03.ll
index 1146926a4c2e..b2fd400c6bda 100644
--- a/test/CodeGen/SystemZ/addr-03.ll
+++ b/test/CodeGen/SystemZ/addr-03.ll
@@ -7,7 +7,7 @@ define void @f1() {
; CHECK: lb %r0, 0
; CHECK: br %r14
%ptr = inttoptr i64 0 to i8 *
- %val = load volatile i8 *%ptr
+ %val = load volatile i8 , i8 *%ptr
ret void
}
@@ -16,7 +16,7 @@ define void @f2() {
; CHECK: lb %r0, -524288
; CHECK: br %r14
%ptr = inttoptr i64 -524288 to i8 *
- %val = load volatile i8 *%ptr
+ %val = load volatile i8 , i8 *%ptr
ret void
}
@@ -25,7 +25,7 @@ define void @f3() {
; CHECK-NOT: lb %r0, -524289
; CHECK: br %r14
%ptr = inttoptr i64 -524289 to i8 *
- %val = load volatile i8 *%ptr
+ %val = load volatile i8 , i8 *%ptr
ret void
}
@@ -34,7 +34,7 @@ define void @f4() {
; CHECK: lb %r0, 524287
; CHECK: br %r14
%ptr = inttoptr i64 524287 to i8 *
- %val = load volatile i8 *%ptr
+ %val = load volatile i8 , i8 *%ptr
ret void
}
@@ -43,6 +43,6 @@ define void @f5() {
; CHECK-NOT: lb %r0, 524288
; CHECK: br %r14
%ptr = inttoptr i64 524288 to i8 *
- %val = load volatile i8 *%ptr
+ %val = load volatile i8 , i8 *%ptr
ret void
}
diff --git a/test/CodeGen/SystemZ/alias-01.ll b/test/CodeGen/SystemZ/alias-01.ll
index 89a731830187..852d18e8de79 100644
--- a/test/CodeGen/SystemZ/alias-01.ll
+++ b/test/CodeGen/SystemZ/alias-01.ll
@@ -7,7 +7,7 @@ define void @f1(<16 x i32> *%src1, <16 x float> *%dest) {
; CHECK-LABEL: f1:
; CHECK-NOT: %r15
; CHECK: br %r14
- %val = load <16 x i32> *%src1, !tbaa !1
+ %val = load <16 x i32> , <16 x i32> *%src1, !tbaa !1
%add = add <16 x i32> %val, %val
%res = bitcast <16 x i32> %add to <16 x float>
store <16 x float> %res, <16 x float> *%dest, !tbaa !2
diff --git a/test/CodeGen/SystemZ/alloca-01.ll b/test/CodeGen/SystemZ/alloca-01.ll
index 2ddefd70cc9d..06c336a331d8 100644
--- a/test/CodeGen/SystemZ/alloca-01.ll
+++ b/test/CodeGen/SystemZ/alloca-01.ll
@@ -52,13 +52,13 @@ define i64 @f1(i64 %length, i64 %index) {
; CHECK-FP: lgr %r11, %r15
; CHECK-FP: lmg %r6, %r15, 224(%r11)
%a = alloca i8, i64 %length
- %b = getelementptr i8 *%a, i64 1
+ %b = getelementptr i8, i8 *%a, i64 1
%cindex = add i64 %index, 3919
- %c = getelementptr i8 *%a, i64 %cindex
+ %c = getelementptr i8, i8 *%a, i64 %cindex
%dindex = add i64 %index, 3920
- %d = getelementptr i8 *%a, i64 %dindex
+ %d = getelementptr i8, i8 *%a, i64 %dindex
%eindex = add i64 %index, 4095
- %e = getelementptr i8 *%a, i64 %eindex
+ %e = getelementptr i8, i8 *%a, i64 %eindex
%count = call i64 @bar(i8 *%a, i8 *%b, i8 *%c, i8 *%d, i8 *%e, i64 0, i64 0)
%res = add i64 %count, 1
ret i64 %res
diff --git a/test/CodeGen/SystemZ/alloca-02.ll b/test/CodeGen/SystemZ/alloca-02.ll
index b5787b102358..092ad86e8529 100644
--- a/test/CodeGen/SystemZ/alloca-02.ll
+++ b/test/CodeGen/SystemZ/alloca-02.ll
@@ -38,13 +38,13 @@ define i64 @f1(i64 %length, i64 %index) {
; CHECK-E: stcy [[TMP]], 4096({{%r3,%r2|%r2,%r3}})
%a = alloca i8, i64 %length
store volatile i8 0, i8 *%a
- %b = getelementptr i8 *%a, i64 4095
+ %b = getelementptr i8, i8 *%a, i64 4095
store volatile i8 1, i8 *%b
- %c = getelementptr i8 *%a, i64 %index
+ %c = getelementptr i8, i8 *%a, i64 %index
store volatile i8 2, i8 *%c
- %d = getelementptr i8 *%c, i64 4095
+ %d = getelementptr i8, i8 *%c, i64 4095
store volatile i8 3, i8 *%d
- %e = getelementptr i8 *%d, i64 1
+ %e = getelementptr i8, i8 *%d, i64 1
store volatile i8 4, i8 *%e
%count = call i64 @bar(i8 *%a)
%res = add i64 %count, 1
diff --git a/test/CodeGen/SystemZ/and-01.ll b/test/CodeGen/SystemZ/and-01.ll
index 3b230ba1081f..56fe2799fdc0 100644
--- a/test/CodeGen/SystemZ/and-01.ll
+++ b/test/CodeGen/SystemZ/and-01.ll
@@ -19,7 +19,7 @@ define i32 @f2(i32 %a, i32 *%src) {
; CHECK-LABEL: f2:
; CHECK: n %r2, 0(%r3)
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%and = and i32 %a, %b
ret i32 %and
}
@@ -29,8 +29,8 @@ define i32 @f3(i32 %a, i32 *%src) {
; CHECK-LABEL: f3:
; CHECK: n %r2, 4092(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 1023
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 1023
+ %b = load i32 , i32 *%ptr
%and = and i32 %a, %b
ret i32 %and
}
@@ -40,8 +40,8 @@ define i32 @f4(i32 %a, i32 *%src) {
; CHECK-LABEL: f4:
; CHECK: ny %r2, 4096(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 1024
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 1024
+ %b = load i32 , i32 *%ptr
%and = and i32 %a, %b
ret i32 %and
}
@@ -51,8 +51,8 @@ define i32 @f5(i32 %a, i32 *%src) {
; CHECK-LABEL: f5:
; CHECK: ny %r2, 524284(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131071
+ %b = load i32 , i32 *%ptr
%and = and i32 %a, %b
ret i32 %and
}
@@ -64,8 +64,8 @@ define i32 @f6(i32 %a, i32 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: n %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131072
+ %b = load i32 , i32 *%ptr
%and = and i32 %a, %b
ret i32 %and
}
@@ -75,8 +75,8 @@ define i32 @f7(i32 %a, i32 *%src) {
; CHECK-LABEL: f7:
; CHECK: ny %r2, -4(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -1
+ %b = load i32 , i32 *%ptr
%and = and i32 %a, %b
ret i32 %and
}
@@ -86,8 +86,8 @@ define i32 @f8(i32 %a, i32 *%src) {
; CHECK-LABEL: f8:
; CHECK: ny %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
+ %b = load i32 , i32 *%ptr
%and = and i32 %a, %b
ret i32 %and
}
@@ -99,8 +99,8 @@ define i32 @f9(i32 %a, i32 *%src) {
; CHECK: agfi %r3, -524292
; CHECK: n %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
+ %b = load i32 , i32 *%ptr
%and = and i32 %a, %b
ret i32 %and
}
@@ -113,7 +113,7 @@ define i32 @f10(i32 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4092
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%and = and i32 %a, %b
ret i32 %and
}
@@ -126,7 +126,7 @@ define i32 @f11(i32 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%and = and i32 %a, %b
ret i32 %and
}
@@ -137,26 +137,26 @@ define i32 @f12(i32 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: n %r2, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i32 *%ptr0, i64 2
- %ptr2 = getelementptr i32 *%ptr0, i64 4
- %ptr3 = getelementptr i32 *%ptr0, i64 6
- %ptr4 = getelementptr i32 *%ptr0, i64 8
- %ptr5 = getelementptr i32 *%ptr0, i64 10
- %ptr6 = getelementptr i32 *%ptr0, i64 12
- %ptr7 = getelementptr i32 *%ptr0, i64 14
- %ptr8 = getelementptr i32 *%ptr0, i64 16
- %ptr9 = getelementptr i32 *%ptr0, i64 18
-
- %val0 = load i32 *%ptr0
- %val1 = load i32 *%ptr1
- %val2 = load i32 *%ptr2
- %val3 = load i32 *%ptr3
- %val4 = load i32 *%ptr4
- %val5 = load i32 *%ptr5
- %val6 = load i32 *%ptr6
- %val7 = load i32 *%ptr7
- %val8 = load i32 *%ptr8
- %val9 = load i32 *%ptr9
+ %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
+ %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
+ %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
+ %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
+ %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
+ %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
+ %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
+ %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
+ %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
+
+ %val0 = load i32 , i32 *%ptr0
+ %val1 = load i32 , i32 *%ptr1
+ %val2 = load i32 , i32 *%ptr2
+ %val3 = load i32 , i32 *%ptr3
+ %val4 = load i32 , i32 *%ptr4
+ %val5 = load i32 , i32 *%ptr5
+ %val6 = load i32 , i32 *%ptr6
+ %val7 = load i32 , i32 *%ptr7
+ %val8 = load i32 , i32 *%ptr8
+ %val9 = load i32 , i32 *%ptr9
%ret = call i32 @foo()
diff --git a/test/CodeGen/SystemZ/and-03.ll b/test/CodeGen/SystemZ/and-03.ll
index a0560d46e4ea..5c15d2462b89 100644
--- a/test/CodeGen/SystemZ/and-03.ll
+++ b/test/CodeGen/SystemZ/and-03.ll
@@ -19,7 +19,7 @@ define i64 @f2(i64 %a, i64 *%src) {
; CHECK-LABEL: f2:
; CHECK: ng %r2, 0(%r3)
; CHECK: br %r14
- %b = load i64 *%src
+ %b = load i64 , i64 *%src
%and = and i64 %a, %b
ret i64 %and
}
@@ -29,8 +29,8 @@ define i64 @f3(i64 %a, i64 *%src) {
; CHECK-LABEL: f3:
; CHECK: ng %r2, 524280(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65535
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65535
+ %b = load i64 , i64 *%ptr
%and = and i64 %a, %b
ret i64 %and
}
@@ -42,8 +42,8 @@ define i64 @f4(i64 %a, i64 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: ng %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65536
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65536
+ %b = load i64 , i64 *%ptr
%and = and i64 %a, %b
ret i64 %and
}
@@ -53,8 +53,8 @@ define i64 @f5(i64 %a, i64 *%src) {
; CHECK-LABEL: f5:
; CHECK: ng %r2, -8(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -1
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -1
+ %b = load i64 , i64 *%ptr
%and = and i64 %a, %b
ret i64 %and
}
@@ -64,8 +64,8 @@ define i64 @f6(i64 %a, i64 *%src) {
; CHECK-LABEL: f6:
; CHECK: ng %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65536
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65536
+ %b = load i64 , i64 *%ptr
%and = and i64 %a, %b
ret i64 %and
}
@@ -77,8 +77,8 @@ define i64 @f7(i64 %a, i64 *%src) {
; CHECK: agfi %r3, -524296
; CHECK: ng %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65537
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65537
+ %b = load i64 , i64 *%ptr
%and = and i64 %a, %b
ret i64 %and
}
@@ -91,7 +91,7 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524280
%ptr = inttoptr i64 %add2 to i64 *
- %b = load i64 *%ptr
+ %b = load i64 , i64 *%ptr
%and = and i64 %a, %b
ret i64 %and
}
@@ -102,26 +102,26 @@ define i64 @f9(i64 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: ng %r2, 160(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i64 *%ptr0, i64 2
- %ptr2 = getelementptr i64 *%ptr0, i64 4
- %ptr3 = getelementptr i64 *%ptr0, i64 6
- %ptr4 = getelementptr i64 *%ptr0, i64 8
- %ptr5 = getelementptr i64 *%ptr0, i64 10
- %ptr6 = getelementptr i64 *%ptr0, i64 12
- %ptr7 = getelementptr i64 *%ptr0, i64 14
- %ptr8 = getelementptr i64 *%ptr0, i64 16
- %ptr9 = getelementptr i64 *%ptr0, i64 18
+ %ptr1 = getelementptr i64, i64 *%ptr0, i64 2
+ %ptr2 = getelementptr i64, i64 *%ptr0, i64 4
+ %ptr3 = getelementptr i64, i64 *%ptr0, i64 6
+ %ptr4 = getelementptr i64, i64 *%ptr0, i64 8
+ %ptr5 = getelementptr i64, i64 *%ptr0, i64 10
+ %ptr6 = getelementptr i64, i64 *%ptr0, i64 12
+ %ptr7 = getelementptr i64, i64 *%ptr0, i64 14
+ %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
+ %ptr9 = getelementptr i64, i64 *%ptr0, i64 18
- %val0 = load i64 *%ptr0
- %val1 = load i64 *%ptr1
- %val2 = load i64 *%ptr2
- %val3 = load i64 *%ptr3
- %val4 = load i64 *%ptr4
- %val5 = load i64 *%ptr5
- %val6 = load i64 *%ptr6
- %val7 = load i64 *%ptr7
- %val8 = load i64 *%ptr8
- %val9 = load i64 *%ptr9
+ %val0 = load i64 , i64 *%ptr0
+ %val1 = load i64 , i64 *%ptr1
+ %val2 = load i64 , i64 *%ptr2
+ %val3 = load i64 , i64 *%ptr3
+ %val4 = load i64 , i64 *%ptr4
+ %val5 = load i64 , i64 *%ptr5
+ %val6 = load i64 , i64 *%ptr6
+ %val7 = load i64 , i64 *%ptr7
+ %val8 = load i64 , i64 *%ptr8
+ %val9 = load i64 , i64 *%ptr9
%ret = call i64 @foo()
diff --git a/test/CodeGen/SystemZ/and-05.ll b/test/CodeGen/SystemZ/and-05.ll
index dafd9d5c51b0..488ec5bc9ae6 100644
--- a/test/CodeGen/SystemZ/and-05.ll
+++ b/test/CodeGen/SystemZ/and-05.ll
@@ -7,7 +7,7 @@ define void @f1(i8 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: ni 0(%r2), 1
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%and = and i8 %val, -255
store i8 %and, i8 *%ptr
ret void
@@ -18,7 +18,7 @@ define void @f2(i8 *%ptr) {
; CHECK-LABEL: f2:
; CHECK: ni 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%and = and i8 %val, -2
store i8 %and, i8 *%ptr
ret void
@@ -29,7 +29,7 @@ define void @f3(i8 *%ptr) {
; CHECK-LABEL: f3:
; CHECK: ni 0(%r2), 1
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%and = and i8 %val, 1
store i8 %and, i8 *%ptr
ret void
@@ -40,7 +40,7 @@ define void @f4(i8 *%ptr) {
; CHECK-LABEL: f4:
; CHECK: ni 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%and = and i8 %val, 254
store i8 %and, i8 *%ptr
ret void
@@ -51,8 +51,8 @@ define void @f5(i8 *%src) {
; CHECK-LABEL: f5:
; CHECK: ni 4095(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 4095
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 4095
+ %val = load i8 , i8 *%ptr
%and = and i8 %val, 127
store i8 %and, i8 *%ptr
ret void
@@ -63,8 +63,8 @@ define void @f6(i8 *%src) {
; CHECK-LABEL: f6:
; CHECK: niy 4096(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 4096
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 4096
+ %val = load i8 , i8 *%ptr
%and = and i8 %val, 127
store i8 %and, i8 *%ptr
ret void
@@ -75,8 +75,8 @@ define void @f7(i8 *%src) {
; CHECK-LABEL: f7:
; CHECK: niy 524287(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524287
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524287
+ %val = load i8 , i8 *%ptr
%and = and i8 %val, 127
store i8 %and, i8 *%ptr
ret void
@@ -89,8 +89,8 @@ define void @f8(i8 *%src) {
; CHECK: agfi %r2, 524288
; CHECK: ni 0(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524288
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524288
+ %val = load i8 , i8 *%ptr
%and = and i8 %val, 127
store i8 %and, i8 *%ptr
ret void
@@ -101,8 +101,8 @@ define void @f9(i8 *%src) {
; CHECK-LABEL: f9:
; CHECK: niy -1(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -1
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -1
+ %val = load i8 , i8 *%ptr
%and = and i8 %val, 127
store i8 %and, i8 *%ptr
ret void
@@ -113,8 +113,8 @@ define void @f10(i8 *%src) {
; CHECK-LABEL: f10:
; CHECK: niy -524288(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524288
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524288
+ %val = load i8 , i8 *%ptr
%and = and i8 %val, 127
store i8 %and, i8 *%ptr
ret void
@@ -127,8 +127,8 @@ define void @f11(i8 *%src) {
; CHECK: agfi %r2, -524289
; CHECK: ni 0(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524289
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524289
+ %val = load i8 , i8 *%ptr
%and = and i8 %val, 127
store i8 %and, i8 *%ptr
ret void
@@ -143,7 +143,7 @@ define void @f12(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4095
%ptr = inttoptr i64 %add2 to i8 *
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%and = and i8 %val, 127
store i8 %and, i8 *%ptr
ret void
@@ -158,7 +158,7 @@ define void @f13(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i8 *
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%and = and i8 %val, 127
store i8 %and, i8 *%ptr
ret void
diff --git a/test/CodeGen/SystemZ/and-06.ll b/test/CodeGen/SystemZ/and-06.ll
index f796618dd4f4..537ee100589a 100644
--- a/test/CodeGen/SystemZ/and-06.ll
+++ b/test/CodeGen/SystemZ/and-06.ll
@@ -8,7 +8,7 @@ define void @f1(i8 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: ni 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i32
%and = and i32 %ext, -2
%trunc = trunc i32 %and to i8
@@ -21,7 +21,7 @@ define void @f2(i8 *%ptr) {
; CHECK-LABEL: f2:
; CHECK: ni 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i64
%and = and i64 %ext, -2
%trunc = trunc i64 %and to i8
@@ -34,7 +34,7 @@ define void @f3(i8 *%ptr) {
; CHECK-LABEL: f3:
; CHECK: ni 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i32
%and = and i32 %ext, 254
%trunc = trunc i32 %and to i8
@@ -47,7 +47,7 @@ define void @f4(i8 *%ptr) {
; CHECK-LABEL: f4:
; CHECK: ni 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i64
%and = and i64 %ext, 254
%trunc = trunc i64 %and to i8
@@ -60,7 +60,7 @@ define void @f5(i8 *%ptr) {
; CHECK-LABEL: f5:
; CHECK: ni 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%and = and i32 %ext, -2
%trunc = trunc i32 %and to i8
@@ -73,7 +73,7 @@ define void @f6(i8 *%ptr) {
; CHECK-LABEL: f6:
; CHECK: ni 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%and = and i64 %ext, -2
%trunc = trunc i64 %and to i8
@@ -86,7 +86,7 @@ define void @f7(i8 *%ptr) {
; CHECK-LABEL: f7:
; CHECK: ni 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%and = and i32 %ext, 254
%trunc = trunc i32 %and to i8
@@ -99,7 +99,7 @@ define void @f8(i8 *%ptr) {
; CHECK-LABEL: f8:
; CHECK: ni 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%and = and i64 %ext, 254
%trunc = trunc i64 %and to i8
diff --git a/test/CodeGen/SystemZ/and-08.ll b/test/CodeGen/SystemZ/and-08.ll
index a328c4ea2046..0622950243ed 100644
--- a/test/CodeGen/SystemZ/and-08.ll
+++ b/test/CodeGen/SystemZ/and-08.ll
@@ -12,9 +12,9 @@ define void @f1(i8 *%ptr1) {
; CHECK-LABEL: f1:
; CHECK: nc 1(1,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i8 *%ptr1, i64 1
- %val = load i8 *%ptr1
- %old = load i8 *%ptr2
+ %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
+ %val = load i8 , i8 *%ptr1
+ %old = load i8 , i8 *%ptr2
%and = and i8 %val, %old
store i8 %and, i8 *%ptr2
ret void
@@ -25,9 +25,9 @@ define void @f2(i8 *%ptr1) {
; CHECK-LABEL: f2:
; CHECK: nc 1(1,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i8 *%ptr1, i64 1
- %val = load i8 *%ptr1
- %old = load i8 *%ptr2
+ %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
+ %val = load i8 , i8 *%ptr1
+ %old = load i8 , i8 *%ptr2
%and = and i8 %old, %val
store i8 %and, i8 *%ptr2
ret void
@@ -39,10 +39,10 @@ define void @f3(i8 *%ptr1) {
; CHECK-LABEL: f3:
; CHECK: nc 1(1,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i8 *%ptr1, i64 1
- %val = load i8 *%ptr1
+ %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
+ %val = load i8 , i8 *%ptr1
%extval = zext i8 %val to i32
- %old = load i8 *%ptr2
+ %old = load i8 , i8 *%ptr2
%extold = sext i8 %old to i32
%and = and i32 %extval, %extold
%trunc = trunc i32 %and to i8
@@ -55,10 +55,10 @@ define void @f4(i8 *%ptr1) {
; CHECK-LABEL: f4:
; CHECK: nc 1(1,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i8 *%ptr1, i64 1
- %val = load i8 *%ptr1
+ %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
+ %val = load i8 , i8 *%ptr1
%extval = sext i8 %val to i32
- %old = load i8 *%ptr2
+ %old = load i8 , i8 *%ptr2
%extold = zext i8 %old to i32
%and = and i32 %extval, %extold
%trunc = trunc i32 %and to i8
@@ -71,10 +71,10 @@ define void @f5(i8 *%ptr1) {
; CHECK-LABEL: f5:
; CHECK: nc 1(1,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i8 *%ptr1, i64 1
- %val = load i8 *%ptr1
+ %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
+ %val = load i8 , i8 *%ptr1
%extval = sext i8 %val to i32
- %old = load i8 *%ptr2
+ %old = load i8 , i8 *%ptr2
%extold = sext i8 %old to i32
%and = and i32 %extval, %extold
%trunc = trunc i32 %and to i8
@@ -87,10 +87,10 @@ define void @f6(i8 *%ptr1) {
; CHECK-LABEL: f6:
; CHECK: nc 1(1,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i8 *%ptr1, i64 1
- %val = load i8 *%ptr1
+ %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
+ %val = load i8 , i8 *%ptr1
%extval = zext i8 %val to i32
- %old = load i8 *%ptr2
+ %old = load i8 , i8 *%ptr2
%extold = zext i8 %old to i32
%and = and i32 %extval, %extold
%trunc = trunc i32 %and to i8
@@ -104,10 +104,10 @@ define void @f7(i8 *%ptr1) {
; CHECK-LABEL: f7:
; CHECK: nc 1(1,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i8 *%ptr1, i64 1
- %val = load i8 *%ptr1
+ %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
+ %val = load i8 , i8 *%ptr1
%extval = sext i8 %val to i64
- %old = load i8 *%ptr2
+ %old = load i8 , i8 *%ptr2
%extold = zext i8 %old to i64
%and = and i64 %extval, %extold
%trunc = trunc i64 %and to i8
@@ -120,9 +120,9 @@ define void @f8(i16 *%ptr1) {
; CHECK-LABEL: f8:
; CHECK: nc 2(2,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i16 *%ptr1, i64 1
- %val = load i16 *%ptr1
- %old = load i16 *%ptr2
+ %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
+ %val = load i16 , i16 *%ptr1
+ %old = load i16 , i16 *%ptr2
%and = and i16 %val, %old
store i16 %and, i16 *%ptr2
ret void
@@ -133,10 +133,10 @@ define void @f9(i16 *%ptr1) {
; CHECK-LABEL: f9:
; CHECK: nc 2(2,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i16 *%ptr1, i64 1
- %val = load i16 *%ptr1
+ %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
+ %val = load i16 , i16 *%ptr1
%extval = zext i16 %val to i32
- %old = load i16 *%ptr2
+ %old = load i16 , i16 *%ptr2
%extold = sext i16 %old to i32
%and = and i32 %extval, %extold
%trunc = trunc i32 %and to i16
@@ -149,10 +149,10 @@ define void @f10(i16 *%ptr1) {
; CHECK-LABEL: f10:
; CHECK: nc 2(2,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i16 *%ptr1, i64 1
- %val = load i16 *%ptr1
+ %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
+ %val = load i16 , i16 *%ptr1
%extval = sext i16 %val to i64
- %old = load i16 *%ptr2
+ %old = load i16 , i16 *%ptr2
%extold = zext i16 %old to i64
%and = and i64 %extval, %extold
%trunc = trunc i64 %and to i16
@@ -165,9 +165,9 @@ define void @f11(i32 *%ptr1) {
; CHECK-LABEL: f11:
; CHECK: nc 4(4,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i32 *%ptr1, i64 1
- %val = load i32 *%ptr1
- %old = load i32 *%ptr2
+ %ptr2 = getelementptr i32, i32 *%ptr1, i64 1
+ %val = load i32 , i32 *%ptr1
+ %old = load i32 , i32 *%ptr2
%and = and i32 %old, %val
store i32 %and, i32 *%ptr2
ret void
@@ -178,10 +178,10 @@ define void @f12(i32 *%ptr1) {
; CHECK-LABEL: f12:
; CHECK: nc 4(4,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i32 *%ptr1, i64 1
- %val = load i32 *%ptr1
+ %ptr2 = getelementptr i32, i32 *%ptr1, i64 1
+ %val = load i32 , i32 *%ptr1
%extval = sext i32 %val to i64
- %old = load i32 *%ptr2
+ %old = load i32 , i32 *%ptr2
%extold = zext i32 %old to i64
%and = and i64 %extval, %extold
%trunc = trunc i64 %and to i32
@@ -194,9 +194,9 @@ define void @f13(i64 *%ptr1) {
; CHECK-LABEL: f13:
; CHECK: nc 8(8,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i64 *%ptr1, i64 1
- %val = load i64 *%ptr1
- %old = load i64 *%ptr2
+ %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
+ %val = load i64 , i64 *%ptr1
+ %old = load i64 , i64 *%ptr2
%and = and i64 %old, %val
store i64 %and, i64 *%ptr2
ret void
@@ -207,9 +207,9 @@ define void @f14(i64 *%ptr1) {
; CHECK-LABEL: f14:
; CHECK-NOT: nc
; CHECK: br %r14
- %ptr2 = getelementptr i64 *%ptr1, i64 1
- %val = load volatile i64 *%ptr1
- %old = load i64 *%ptr2
+ %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
+ %val = load volatile i64 , i64 *%ptr1
+ %old = load i64 , i64 *%ptr2
%and = and i64 %old, %val
store i64 %and, i64 *%ptr2
ret void
@@ -220,9 +220,9 @@ define void @f15(i64 *%ptr1) {
; CHECK-LABEL: f15:
; CHECK-NOT: nc
; CHECK: br %r14
- %ptr2 = getelementptr i64 *%ptr1, i64 1
- %val = load i64 *%ptr1
- %old = load volatile i64 *%ptr2
+ %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
+ %val = load i64 , i64 *%ptr1
+ %old = load volatile i64 , i64 *%ptr2
%and = and i64 %old, %val
store i64 %and, i64 *%ptr2
ret void
@@ -233,9 +233,9 @@ define void @f16(i64 *%ptr1) {
; CHECK-LABEL: f16:
; CHECK-NOT: nc
; CHECK: br %r14
- %ptr2 = getelementptr i64 *%ptr1, i64 1
- %val = load i64 *%ptr1
- %old = load i64 *%ptr2
+ %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
+ %val = load i64 , i64 *%ptr1
+ %old = load i64 , i64 *%ptr2
%and = and i64 %old, %val
store volatile i64 %and, i64 *%ptr2
ret void
@@ -248,8 +248,8 @@ define void @f17(i64 *%ptr1, i64 *%ptr2) {
; CHECK-LABEL: f17:
; CHECK-NOT: nc
; CHECK: br %r14
- %val = load i64 *%ptr1
- %old = load i64 *%ptr2
+ %val = load i64 , i64 *%ptr1
+ %old = load i64 , i64 *%ptr2
%and = and i64 %old, %val
store i64 %and, i64 *%ptr2
ret void
@@ -260,8 +260,8 @@ define void @f18(i64 *%ptr1, i64 *%ptr2) {
; CHECK-LABEL: f18:
; CHECK-NOT: nc
; CHECK: br %r14
- %val = load i64 *%ptr1, align 2
- %old = load i64 *%ptr2
+ %val = load i64 , i64 *%ptr1, align 2
+ %old = load i64 , i64 *%ptr2
%and = and i64 %old, %val
store i64 %and, i64 *%ptr2
ret void
@@ -272,8 +272,8 @@ define void @f19(i64 *%ptr1, i64 *%ptr2) {
; CHECK-LABEL: f19:
; CHECK-NOT: nc
; CHECK: br %r14
- %val = load i64 *%ptr1, align 2
- %old = load i64 *%ptr2
+ %val = load i64 , i64 *%ptr1, align 2
+ %old = load i64 , i64 *%ptr2
%and = and i64 %val, %old
store i64 %and, i64 *%ptr2
ret void
@@ -284,8 +284,8 @@ define void @f20(i64 *%ptr1, i64 *%ptr2) {
; CHECK-LABEL: f20:
; CHECK-NOT: nc
; CHECK: br %r14
- %val = load i64 *%ptr1
- %old = load i64 *%ptr2, align 2
+ %val = load i64 , i64 *%ptr1
+ %old = load i64 , i64 *%ptr2, align 2
%and = and i64 %val, %old
store i64 %and, i64 *%ptr2, align 2
ret void
@@ -299,8 +299,8 @@ define void @f21(i64 %base) {
%add = add i64 %base, 1
%ptr1 = inttoptr i64 %base to i64 *
%ptr2 = inttoptr i64 %add to i64 *
- %val = load i64 *%ptr1
- %old = load i64 *%ptr2, align 1
+ %val = load i64 , i64 *%ptr1
+ %old = load i64 , i64 *%ptr2, align 1
%and = and i64 %old, %val
store i64 %and, i64 *%ptr2, align 1
ret void
@@ -313,8 +313,8 @@ define void @f22(i8 *%ptr) {
; CHECK-DAG: larl [[DST:%r[0-5]]], g1dst
; CHECK: nc 0(1,[[DST]]), 0([[SRC]])
; CHECK: br %r14
- %val = load i8 *@g1src
- %old = load i8 *@g1dst
+ %val = load i8 , i8 *@g1src
+ %old = load i8 , i8 *@g1dst
%and = and i8 %val, %old
store i8 %and, i8 *@g1dst
ret void
@@ -327,8 +327,8 @@ define void @f23(i16 *%ptr) {
; CHECK-DAG: larl [[DST:%r[0-5]]], g2dst
; CHECK: nc 0(2,[[DST]]), 0([[SRC]])
; CHECK: br %r14
- %val = load i16 *@g2src
- %old = load i16 *@g2dst
+ %val = load i16 , i16 *@g2src
+ %old = load i16 , i16 *@g2dst
%and = and i16 %val, %old
store i16 %and, i16 *@g2dst
ret void
@@ -339,9 +339,9 @@ define void @f24(i64 *%ptr1) {
; CHECK-LABEL: f24:
; CHECK: nc 8(8,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i64 *%ptr1, i64 1
- %val = load i64 *%ptr1, align 1
- %old = load i64 *%ptr2, align 1
+ %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
+ %val = load i64 , i64 *%ptr1, align 1
+ %old = load i64 , i64 *%ptr2, align 1
%and = and i64 %old, %val
store i64 %and, i64 *%ptr2, align 1
ret void
@@ -352,8 +352,8 @@ define void @f25(i64 *%ptr1, i64 *%ptr2) {
; CHECK-LABEL: f25:
; CHECK: nc 0(8,%r3), 0(%r2)
; CHECK: br %r14
- %val = load i64 *%ptr1, align 2, !tbaa !3
- %old = load i64 *%ptr2, align 2, !tbaa !4
+ %val = load i64 , i64 *%ptr1, align 2, !tbaa !3
+ %old = load i64 , i64 *%ptr2, align 2, !tbaa !4
%and = and i64 %old, %val
store i64 %and, i64 *%ptr2, align 2, !tbaa !4
ret void
@@ -364,8 +364,8 @@ define void @f26(i64 *%ptr1, i64 *%ptr2) {
; CHECK-LABEL: f26:
; CHECK-NOT: nc
; CHECK: br %r14
- %val = load i64 *%ptr1, align 2, !tbaa !3
- %old = load i64 *%ptr2, align 2, !tbaa !3
+ %val = load i64 , i64 *%ptr1, align 2, !tbaa !3
+ %old = load i64 , i64 *%ptr2, align 2, !tbaa !3
%and = and i64 %old, %val
store i64 %and, i64 *%ptr2, align 2, !tbaa !3
ret void
diff --git a/test/CodeGen/SystemZ/asm-18.ll b/test/CodeGen/SystemZ/asm-18.ll
index 71e145a285ff..999984be88d4 100644
--- a/test/CodeGen/SystemZ/asm-18.ll
+++ b/test/CodeGen/SystemZ/asm-18.ll
@@ -16,12 +16,12 @@ define void @f1(i32 *%ptr1, i32 *%ptr2) {
; CHECK-DAG: stfh [[REG3]], 4096(%r2)
; CHECK-DAG: sty [[REG4]], 524284(%r3)
; CHECK: br %r14
- %ptr3 = getelementptr i32 *%ptr1, i64 1024
- %ptr4 = getelementptr i32 *%ptr2, i64 131071
- %old1 = load i32 *%ptr1
- %old2 = load i32 *%ptr2
- %old3 = load i32 *%ptr3
- %old4 = load i32 *%ptr4
+ %ptr3 = getelementptr i32, i32 *%ptr1, i64 1024
+ %ptr4 = getelementptr i32, i32 *%ptr2, i64 131071
+ %old1 = load i32 , i32 *%ptr1
+ %old2 = load i32 , i32 *%ptr2
+ %old3 = load i32 , i32 *%ptr3
+ %old4 = load i32 , i32 *%ptr4
%res = call { i32, i32, i32, i32 } asm "blah $0, $1, $2, $3",
"=h,=r,=h,=r,0,1,2,3"(i32 %old1, i32 %old2, i32 %old3, i32 %old4)
%new1 = extractvalue { i32, i32, i32, i32 } %res, 0
@@ -60,12 +60,12 @@ define void @f3(i8 *%ptr1, i8 *%ptr2) {
; CHECK-DAG: lb [[REG4:%r[0-5]]], 524287(%r3)
; CHECK: blah [[REG1]], [[REG2]]
; CHECK: br %r14
- %ptr3 = getelementptr i8 *%ptr1, i64 4096
- %ptr4 = getelementptr i8 *%ptr2, i64 524287
- %val1 = load i8 *%ptr1
- %val2 = load i8 *%ptr2
- %val3 = load i8 *%ptr3
- %val4 = load i8 *%ptr4
+ %ptr3 = getelementptr i8, i8 *%ptr1, i64 4096
+ %ptr4 = getelementptr i8, i8 *%ptr2, i64 524287
+ %val1 = load i8 , i8 *%ptr1
+ %val2 = load i8 , i8 *%ptr2
+ %val3 = load i8 , i8 *%ptr3
+ %val4 = load i8 , i8 *%ptr4
%ext1 = sext i8 %val1 to i32
%ext2 = sext i8 %val2 to i32
%ext3 = sext i8 %val3 to i32
@@ -84,12 +84,12 @@ define void @f4(i16 *%ptr1, i16 *%ptr2) {
; CHECK-DAG: lhy [[REG4:%r[0-5]]], 524286(%r3)
; CHECK: blah [[REG1]], [[REG2]]
; CHECK: br %r14
- %ptr3 = getelementptr i16 *%ptr1, i64 2048
- %ptr4 = getelementptr i16 *%ptr2, i64 262143
- %val1 = load i16 *%ptr1
- %val2 = load i16 *%ptr2
- %val3 = load i16 *%ptr3
- %val4 = load i16 *%ptr4
+ %ptr3 = getelementptr i16, i16 *%ptr1, i64 2048
+ %ptr4 = getelementptr i16, i16 *%ptr2, i64 262143
+ %val1 = load i16 , i16 *%ptr1
+ %val2 = load i16 , i16 *%ptr2
+ %val3 = load i16 , i16 *%ptr3
+ %val4 = load i16 , i16 *%ptr4
%ext1 = sext i16 %val1 to i32
%ext2 = sext i16 %val2 to i32
%ext3 = sext i16 %val3 to i32
@@ -108,12 +108,12 @@ define void @f5(i8 *%ptr1, i8 *%ptr2) {
; CHECK-DAG: llc [[REG4:%r[0-5]]], 524287(%r3)
; CHECK: blah [[REG1]], [[REG2]]
; CHECK: br %r14
- %ptr3 = getelementptr i8 *%ptr1, i64 4096
- %ptr4 = getelementptr i8 *%ptr2, i64 524287
- %val1 = load i8 *%ptr1
- %val2 = load i8 *%ptr2
- %val3 = load i8 *%ptr3
- %val4 = load i8 *%ptr4
+ %ptr3 = getelementptr i8, i8 *%ptr1, i64 4096
+ %ptr4 = getelementptr i8, i8 *%ptr2, i64 524287
+ %val1 = load i8 , i8 *%ptr1
+ %val2 = load i8 , i8 *%ptr2
+ %val3 = load i8 , i8 *%ptr3
+ %val4 = load i8 , i8 *%ptr4
%ext1 = zext i8 %val1 to i32
%ext2 = zext i8 %val2 to i32
%ext3 = zext i8 %val3 to i32
@@ -132,12 +132,12 @@ define void @f6(i16 *%ptr1, i16 *%ptr2) {
; CHECK-DAG: llh [[REG4:%r[0-5]]], 524286(%r3)
; CHECK: blah [[REG1]], [[REG2]]
; CHECK: br %r14
- %ptr3 = getelementptr i16 *%ptr1, i64 2048
- %ptr4 = getelementptr i16 *%ptr2, i64 262143
- %val1 = load i16 *%ptr1
- %val2 = load i16 *%ptr2
- %val3 = load i16 *%ptr3
- %val4 = load i16 *%ptr4
+ %ptr3 = getelementptr i16, i16 *%ptr1, i64 2048
+ %ptr4 = getelementptr i16, i16 *%ptr2, i64 262143
+ %val1 = load i16 , i16 *%ptr1
+ %val2 = load i16 , i16 *%ptr2
+ %val3 = load i16 , i16 *%ptr3
+ %val4 = load i16 , i16 *%ptr4
%ext1 = zext i16 %val1 to i32
%ext2 = zext i16 %val2 to i32
%ext3 = zext i16 %val3 to i32
@@ -161,8 +161,8 @@ define void @f7(i8 *%ptr1, i8 *%ptr2) {
%res2 = extractvalue { i32, i32 } %res, 1
%trunc1 = trunc i32 %res1 to i8
%trunc2 = trunc i32 %res2 to i8
- %ptr3 = getelementptr i8 *%ptr1, i64 4096
- %ptr4 = getelementptr i8 *%ptr2, i64 524287
+ %ptr3 = getelementptr i8, i8 *%ptr1, i64 4096
+ %ptr4 = getelementptr i8, i8 *%ptr2, i64 524287
store i8 %trunc1, i8 *%ptr1
store i8 %trunc2, i8 *%ptr2
store i8 %trunc1, i8 *%ptr3
@@ -184,8 +184,8 @@ define void @f8(i16 *%ptr1, i16 *%ptr2) {
%res2 = extractvalue { i32, i32 } %res, 1
%trunc1 = trunc i32 %res1 to i16
%trunc2 = trunc i32 %res2 to i16
- %ptr3 = getelementptr i16 *%ptr1, i64 2048
- %ptr4 = getelementptr i16 *%ptr2, i64 262143
+ %ptr3 = getelementptr i16, i16 *%ptr1, i64 2048
+ %ptr4 = getelementptr i16, i16 *%ptr2, i64 262143
store i16 %trunc1, i16 *%ptr1
store i16 %trunc2, i16 *%ptr2
store i16 %trunc1, i16 *%ptr3
@@ -713,11 +713,11 @@ define void @f33(i32 *%ptr1, i32 *%ptr2) {
; CHECK: clhf [[REG2]], 0(%r3)
; CHECK: br %r14
%res1 = call i32 asm "stepa $0", "=h"()
- %load1 = load i32 *%ptr1
+ %load1 = load i32 , i32 *%ptr1
%cmp1 = icmp sle i32 %res1, %load1
%sel1 = select i1 %cmp1, i32 0, i32 1
%res2 = call i32 asm "stepb $0, $1", "=h,r"(i32 %sel1)
- %load2 = load i32 *%ptr2
+ %load2 = load i32 , i32 *%ptr2
%cmp2 = icmp ule i32 %res2, %load2
%sel2 = select i1 %cmp2, i32 0, i32 1
store i32 %sel2, i32 *%ptr1
@@ -733,11 +733,11 @@ define void @f34(i32 *%ptr1, i32 *%ptr2) {
; CHECK: cl [[REG2]], 0(%r3)
; CHECK: br %r14
%res1 = call i32 asm "stepa $0", "=r"()
- %load1 = load i32 *%ptr1
+ %load1 = load i32 , i32 *%ptr1
%cmp1 = icmp sle i32 %res1, %load1
%sel1 = select i1 %cmp1, i32 0, i32 1
%res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %sel1)
- %load2 = load i32 *%ptr2
+ %load2 = load i32 , i32 *%ptr2
%cmp2 = icmp ule i32 %res2, %load2
%sel2 = select i1 %cmp2, i32 0, i32 1
store i32 %sel2, i32 *%ptr1
diff --git a/test/CodeGen/SystemZ/atomic-load-01.ll b/test/CodeGen/SystemZ/atomic-load-01.ll
index f3acd605b012..b2f4ebe6639e 100644
--- a/test/CodeGen/SystemZ/atomic-load-01.ll
+++ b/test/CodeGen/SystemZ/atomic-load-01.ll
@@ -7,6 +7,6 @@ define i8 @f1(i8 *%src) {
; CHECK: bcr 1{{[45]}}, %r0
; CHECK: lb %r2, 0(%r2)
; CHECK: br %r14
- %val = load atomic i8 *%src seq_cst, align 1
+ %val = load atomic i8 , i8 *%src seq_cst, align 1
ret i8 %val
}
diff --git a/test/CodeGen/SystemZ/atomic-load-02.ll b/test/CodeGen/SystemZ/atomic-load-02.ll
index d9bec60f4c1b..b2b60f3d0160 100644
--- a/test/CodeGen/SystemZ/atomic-load-02.ll
+++ b/test/CodeGen/SystemZ/atomic-load-02.ll
@@ -7,6 +7,6 @@ define i16 @f1(i16 *%src) {
; CHECK: bcr 1{{[45]}}, %r0
; CHECK: lh %r2, 0(%r2)
; CHECK: br %r14
- %val = load atomic i16 *%src seq_cst, align 2
+ %val = load atomic i16 , i16 *%src seq_cst, align 2
ret i16 %val
}
diff --git a/test/CodeGen/SystemZ/atomic-load-03.ll b/test/CodeGen/SystemZ/atomic-load-03.ll
index 7e5eb9249a93..d83c430bd0af 100644
--- a/test/CodeGen/SystemZ/atomic-load-03.ll
+++ b/test/CodeGen/SystemZ/atomic-load-03.ll
@@ -7,6 +7,6 @@ define i32 @f1(i32 *%src) {
; CHECK: bcr 1{{[45]}}, %r0
; CHECK: l %r2, 0(%r2)
; CHECK: br %r14
- %val = load atomic i32 *%src seq_cst, align 4
+ %val = load atomic i32 , i32 *%src seq_cst, align 4
ret i32 %val
}
diff --git a/test/CodeGen/SystemZ/atomic-load-04.ll b/test/CodeGen/SystemZ/atomic-load-04.ll
index c7a9a98a425d..dc6b271e00e5 100644
--- a/test/CodeGen/SystemZ/atomic-load-04.ll
+++ b/test/CodeGen/SystemZ/atomic-load-04.ll
@@ -7,6 +7,6 @@ define i64 @f1(i64 *%src) {
; CHECK: bcr 1{{[45]}}, %r0
; CHECK: lg %r2, 0(%r2)
; CHECK: br %r14
- %val = load atomic i64 *%src seq_cst, align 8
+ %val = load atomic i64 , i64 *%src seq_cst, align 8
ret i64 %val
}
diff --git a/test/CodeGen/SystemZ/atomicrmw-add-05.ll b/test/CodeGen/SystemZ/atomicrmw-add-05.ll
index 956c0d9642cd..f722f79bd42f 100644
--- a/test/CodeGen/SystemZ/atomicrmw-add-05.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-add-05.ll
@@ -26,7 +26,7 @@ define i32 @f3(i32 %dummy, i32 *%src, i32 %b) {
; CHECK-LABEL: f3:
; CHECK: laa %r2, %r4, 524284(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i32 131071
+ %ptr = getelementptr i32, i32 *%src, i32 131071
%res = atomicrmw add i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -37,7 +37,7 @@ define i32 @f4(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: agfi %r3, 524288
; CHECK: laa %r2, %r4, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i32 131072
+ %ptr = getelementptr i32, i32 *%src, i32 131072
%res = atomicrmw add i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -47,7 +47,7 @@ define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
; CHECK-LABEL: f5:
; CHECK: laa %r2, %r4, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i32 -131072
+ %ptr = getelementptr i32, i32 *%src, i32 -131072
%res = atomicrmw add i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -58,7 +58,7 @@ define i32 @f6(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: agfi %r3, -524292
; CHECK: laa %r2, %r4, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i32 -131073
+ %ptr = getelementptr i32, i32 *%src, i32 -131073
%res = atomicrmw add i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
diff --git a/test/CodeGen/SystemZ/atomicrmw-add-06.ll b/test/CodeGen/SystemZ/atomicrmw-add-06.ll
index f508858d1562..ef77dc018340 100644
--- a/test/CodeGen/SystemZ/atomicrmw-add-06.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-add-06.ll
@@ -26,7 +26,7 @@ define i64 @f3(i64 %dummy, i64 *%src, i64 %b) {
; CHECK-LABEL: f3:
; CHECK: laag %r2, %r4, 524280(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65535
+ %ptr = getelementptr i64, i64 *%src, i64 65535
%res = atomicrmw add i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -37,7 +37,7 @@ define i64 @f4(i64 %dummy, i64 *%src, i64 %b) {
; CHECK: agfi %r3, 524288
; CHECK: laag %r2, %r4, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65536
+ %ptr = getelementptr i64, i64 *%src, i64 65536
%res = atomicrmw add i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -47,7 +47,7 @@ define i64 @f5(i64 %dummy, i64 *%src, i64 %b) {
; CHECK-LABEL: f5:
; CHECK: laag %r2, %r4, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65536
+ %ptr = getelementptr i64, i64 *%src, i64 -65536
%res = atomicrmw add i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -58,7 +58,7 @@ define i64 @f6(i64 %dummy, i64 *%src, i64 %b) {
; CHECK: agfi %r3, -524296
; CHECK: laag %r2, %r4, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65537
+ %ptr = getelementptr i64, i64 *%src, i64 -65537
%res = atomicrmw add i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
diff --git a/test/CodeGen/SystemZ/atomicrmw-and-05.ll b/test/CodeGen/SystemZ/atomicrmw-and-05.ll
index f0b999c60431..b8ccbe2f4e50 100644
--- a/test/CodeGen/SystemZ/atomicrmw-and-05.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-and-05.ll
@@ -26,7 +26,7 @@ define i32 @f3(i32 %dummy, i32 *%src, i32 %b) {
; CHECK-LABEL: f3:
; CHECK: lan %r2, %r4, 524284(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i32 131071
+ %ptr = getelementptr i32, i32 *%src, i32 131071
%res = atomicrmw and i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -37,7 +37,7 @@ define i32 @f4(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: agfi %r3, 524288
; CHECK: lan %r2, %r4, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i32 131072
+ %ptr = getelementptr i32, i32 *%src, i32 131072
%res = atomicrmw and i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -47,7 +47,7 @@ define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
; CHECK-LABEL: f5:
; CHECK: lan %r2, %r4, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i32 -131072
+ %ptr = getelementptr i32, i32 *%src, i32 -131072
%res = atomicrmw and i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -58,7 +58,7 @@ define i32 @f6(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: agfi %r3, -524292
; CHECK: lan %r2, %r4, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i32 -131073
+ %ptr = getelementptr i32, i32 *%src, i32 -131073
%res = atomicrmw and i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
diff --git a/test/CodeGen/SystemZ/atomicrmw-and-06.ll b/test/CodeGen/SystemZ/atomicrmw-and-06.ll
index e5b71945d57c..9885cdec1f1f 100644
--- a/test/CodeGen/SystemZ/atomicrmw-and-06.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-and-06.ll
@@ -26,7 +26,7 @@ define i64 @f3(i64 %dummy, i64 *%src, i64 %b) {
; CHECK-LABEL: f3:
; CHECK: lang %r2, %r4, 524280(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65535
+ %ptr = getelementptr i64, i64 *%src, i64 65535
%res = atomicrmw and i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -37,7 +37,7 @@ define i64 @f4(i64 %dummy, i64 *%src, i64 %b) {
; CHECK: agfi %r3, 524288
; CHECK: lang %r2, %r4, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65536
+ %ptr = getelementptr i64, i64 *%src, i64 65536
%res = atomicrmw and i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -47,7 +47,7 @@ define i64 @f5(i64 %dummy, i64 *%src, i64 %b) {
; CHECK-LABEL: f5:
; CHECK: lang %r2, %r4, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65536
+ %ptr = getelementptr i64, i64 *%src, i64 -65536
%res = atomicrmw and i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -58,7 +58,7 @@ define i64 @f6(i64 %dummy, i64 *%src, i64 %b) {
; CHECK: agfi %r3, -524296
; CHECK: lang %r2, %r4, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65537
+ %ptr = getelementptr i64, i64 *%src, i64 -65537
%res = atomicrmw and i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll
index f2152c6f28bc..4ab48e46fc82 100644
--- a/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll
@@ -69,7 +69,7 @@ define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: l %r2, 4092(%r3)
; CHECK: cs %r2, {{%r[0-9]+}}, 4092(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 1023
+ %ptr = getelementptr i32, i32 *%src, i64 1023
%res = atomicrmw min i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -80,7 +80,7 @@ define i32 @f6(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: ly %r2, 4096(%r3)
; CHECK: csy %r2, {{%r[0-9]+}}, 4096(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 1024
+ %ptr = getelementptr i32, i32 *%src, i64 1024
%res = atomicrmw min i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -91,7 +91,7 @@ define i32 @f7(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: ly %r2, 524284(%r3)
; CHECK: csy %r2, {{%r[0-9]+}}, 524284(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
+ %ptr = getelementptr i32, i32 *%src, i64 131071
%res = atomicrmw min i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -103,7 +103,7 @@ define i32 @f8(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: l %r2, 0(%r3)
; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
+ %ptr = getelementptr i32, i32 *%src, i64 131072
%res = atomicrmw min i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -114,7 +114,7 @@ define i32 @f9(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: ly %r2, -4(%r3)
; CHECK: csy %r2, {{%r[0-9]+}}, -4(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
+ %ptr = getelementptr i32, i32 *%src, i64 -1
%res = atomicrmw min i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -125,7 +125,7 @@ define i32 @f10(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: ly %r2, -524288(%r3)
; CHECK: csy %r2, {{%r[0-9]+}}, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
%res = atomicrmw min i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -137,7 +137,7 @@ define i32 @f11(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: l %r2, 0(%r3)
; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
%res = atomicrmw min i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll
index 037eb1aa9367..afd88a3dd42d 100644
--- a/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll
@@ -69,7 +69,7 @@ define i64 @f5(i64 %dummy, i64 *%src, i64 %b) {
; CHECK: lg %r2, 524280(%r3)
; CHECK: csg %r2, {{%r[0-9]+}}, 524280(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65535
+ %ptr = getelementptr i64, i64 *%src, i64 65535
%res = atomicrmw min i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -81,7 +81,7 @@ define i64 @f6(i64 %dummy, i64 *%src, i64 %b) {
; CHECK: lg %r2, 0(%r3)
; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65536
+ %ptr = getelementptr i64, i64 *%src, i64 65536
%res = atomicrmw min i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -92,7 +92,7 @@ define i64 @f7(i64 %dummy, i64 *%src, i64 %b) {
; CHECK: lg %r2, -524288(%r3)
; CHECK: csg %r2, {{%r[0-9]+}}, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65536
+ %ptr = getelementptr i64, i64 *%src, i64 -65536
%res = atomicrmw min i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -104,7 +104,7 @@ define i64 @f8(i64 %dummy, i64 *%src, i64 %b) {
; CHECK: lg %r2, 0(%r3)
; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65537
+ %ptr = getelementptr i64, i64 *%src, i64 -65537
%res = atomicrmw min i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
diff --git a/test/CodeGen/SystemZ/atomicrmw-or-05.ll b/test/CodeGen/SystemZ/atomicrmw-or-05.ll
index b38654ca6f07..54b5be431473 100644
--- a/test/CodeGen/SystemZ/atomicrmw-or-05.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-or-05.ll
@@ -26,7 +26,7 @@ define i32 @f3(i32 %dummy, i32 *%src, i32 %b) {
; CHECK-LABEL: f3:
; CHECK: lao %r2, %r4, 524284(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i32 131071
+ %ptr = getelementptr i32, i32 *%src, i32 131071
%res = atomicrmw or i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -37,7 +37,7 @@ define i32 @f4(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: agfi %r3, 524288
; CHECK: lao %r2, %r4, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i32 131072
+ %ptr = getelementptr i32, i32 *%src, i32 131072
%res = atomicrmw or i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -47,7 +47,7 @@ define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
; CHECK-LABEL: f5:
; CHECK: lao %r2, %r4, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i32 -131072
+ %ptr = getelementptr i32, i32 *%src, i32 -131072
%res = atomicrmw or i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -58,7 +58,7 @@ define i32 @f6(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: agfi %r3, -524292
; CHECK: lao %r2, %r4, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i32 -131073
+ %ptr = getelementptr i32, i32 *%src, i32 -131073
%res = atomicrmw or i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
diff --git a/test/CodeGen/SystemZ/atomicrmw-or-06.ll b/test/CodeGen/SystemZ/atomicrmw-or-06.ll
index 30874abfe4a2..3f7a05c7324d 100644
--- a/test/CodeGen/SystemZ/atomicrmw-or-06.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-or-06.ll
@@ -26,7 +26,7 @@ define i64 @f3(i64 %dummy, i64 *%src, i64 %b) {
; CHECK-LABEL: f3:
; CHECK: laog %r2, %r4, 524280(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65535
+ %ptr = getelementptr i64, i64 *%src, i64 65535
%res = atomicrmw or i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -37,7 +37,7 @@ define i64 @f4(i64 %dummy, i64 *%src, i64 %b) {
; CHECK: agfi %r3, 524288
; CHECK: laog %r2, %r4, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65536
+ %ptr = getelementptr i64, i64 *%src, i64 65536
%res = atomicrmw or i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -47,7 +47,7 @@ define i64 @f5(i64 %dummy, i64 *%src, i64 %b) {
; CHECK-LABEL: f5:
; CHECK: laog %r2, %r4, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65536
+ %ptr = getelementptr i64, i64 *%src, i64 -65536
%res = atomicrmw or i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -58,7 +58,7 @@ define i64 @f6(i64 %dummy, i64 *%src, i64 %b) {
; CHECK: agfi %r3, -524296
; CHECK: laog %r2, %r4, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65537
+ %ptr = getelementptr i64, i64 *%src, i64 -65537
%res = atomicrmw or i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-05.ll b/test/CodeGen/SystemZ/atomicrmw-sub-05.ll
index 7668f0e2a7ac..e505f373ac73 100644
--- a/test/CodeGen/SystemZ/atomicrmw-sub-05.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-sub-05.ll
@@ -28,7 +28,7 @@ define i32 @f3(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: lcr [[NEG:%r[0-5]]], %r4
; CHECK: laa %r2, [[NEG]], 524284(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i32 131071
+ %ptr = getelementptr i32, i32 *%src, i32 131071
%res = atomicrmw sub i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -40,7 +40,7 @@ define i32 @f4(i32 %dummy, i32 *%src, i32 %b) {
; CHECK-DAG: agfi %r3, 524288
; CHECK: laa %r2, [[NEG]], 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i32 131072
+ %ptr = getelementptr i32, i32 *%src, i32 131072
%res = atomicrmw sub i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -51,7 +51,7 @@ define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: lcr [[NEG:%r[0-5]]], %r4
; CHECK: laa %r2, [[NEG]], -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i32 -131072
+ %ptr = getelementptr i32, i32 *%src, i32 -131072
%res = atomicrmw sub i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -63,7 +63,7 @@ define i32 @f6(i32 %dummy, i32 *%src, i32 %b) {
; CHECK-DAG: agfi %r3, -524292
; CHECK: laa %r2, [[NEG]], 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i32 -131073
+ %ptr = getelementptr i32, i32 *%src, i32 -131073
%res = atomicrmw sub i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-06.ll b/test/CodeGen/SystemZ/atomicrmw-sub-06.ll
index 5d11bdf96cde..7330cfa5aef3 100644
--- a/test/CodeGen/SystemZ/atomicrmw-sub-06.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-sub-06.ll
@@ -28,7 +28,7 @@ define i64 @f3(i64 %dummy, i64 *%src, i64 %b) {
; CHECK: lcgr [[NEG:%r[0-5]]], %r4
; CHECK: laag %r2, [[NEG]], 524280(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65535
+ %ptr = getelementptr i64, i64 *%src, i64 65535
%res = atomicrmw sub i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -40,7 +40,7 @@ define i64 @f4(i64 %dummy, i64 *%src, i64 %b) {
; CHECK-DAG: agfi %r3, 524288
; CHECK: laag %r2, [[NEG]], 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65536
+ %ptr = getelementptr i64, i64 *%src, i64 65536
%res = atomicrmw sub i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -51,7 +51,7 @@ define i64 @f5(i64 %dummy, i64 *%src, i64 %b) {
; CHECK: lcgr [[NEG:%r[0-5]]], %r4
; CHECK: laag %r2, [[NEG]], -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65536
+ %ptr = getelementptr i64, i64 *%src, i64 -65536
%res = atomicrmw sub i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -63,7 +63,7 @@ define i64 @f6(i64 %dummy, i64 *%src, i64 %b) {
; CHECK-DAG: agfi %r3, -524296
; CHECK: laag %r2, [[NEG]], 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65537
+ %ptr = getelementptr i64, i64 *%src, i64 -65537
%res = atomicrmw sub i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
diff --git a/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll b/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll
index a602a02a189e..0e49a9f1ede7 100644
--- a/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll
@@ -20,7 +20,7 @@ define i32 @f2(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: l %r2, 4092(%r3)
; CHECK: cs %r2, {{%r[0-9]+}}, 4092(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 1023
+ %ptr = getelementptr i32, i32 *%src, i64 1023
%res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -31,7 +31,7 @@ define i32 @f3(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: ly %r2, 4096(%r3)
; CHECK: csy %r2, {{%r[0-9]+}}, 4096(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 1024
+ %ptr = getelementptr i32, i32 *%src, i64 1024
%res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -42,7 +42,7 @@ define i32 @f4(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: ly %r2, 524284(%r3)
; CHECK: csy %r2, {{%r[0-9]+}}, 524284(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
+ %ptr = getelementptr i32, i32 *%src, i64 131071
%res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -54,7 +54,7 @@ define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: l %r2, 0(%r3)
; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
+ %ptr = getelementptr i32, i32 *%src, i64 131072
%res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -65,7 +65,7 @@ define i32 @f6(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: ly %r2, -4(%r3)
; CHECK: csy %r2, {{%r[0-9]+}}, -4(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
+ %ptr = getelementptr i32, i32 *%src, i64 -1
%res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -76,7 +76,7 @@ define i32 @f7(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: ly %r2, -524288(%r3)
; CHECK: csy %r2, {{%r[0-9]+}}, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
%res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -88,7 +88,7 @@ define i32 @f8(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: l %r2, 0(%r3)
; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
%res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
diff --git a/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll b/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll
index 80c0eeb7121b..7afc50e238fc 100644
--- a/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll
@@ -20,7 +20,7 @@ define i64 @f2(i64 %dummy, i64 *%src, i64 %b) {
; CHECK: lg %r2, 524280(%r3)
; CHECK: csg %r2, {{%r[0-9]+}}, 524280(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65535
+ %ptr = getelementptr i64, i64 *%src, i64 65535
%res = atomicrmw xchg i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -32,7 +32,7 @@ define i64 @f3(i64 %dummy, i64 *%src, i64 %b) {
; CHECK: lg %r2, 0(%r3)
; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65536
+ %ptr = getelementptr i64, i64 *%src, i64 65536
%res = atomicrmw xchg i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -43,7 +43,7 @@ define i64 @f4(i64 %dummy, i64 *%src, i64 %b) {
; CHECK: lg %r2, -524288(%r3)
; CHECK: csg %r2, {{%r[0-9]+}}, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65536
+ %ptr = getelementptr i64, i64 *%src, i64 -65536
%res = atomicrmw xchg i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -55,7 +55,7 @@ define i64 @f5(i64 %dummy, i64 *%src, i64 %b) {
; CHECK: lg %r2, 0(%r3)
; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65537
+ %ptr = getelementptr i64, i64 *%src, i64 -65537
%res = atomicrmw xchg i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-05.ll b/test/CodeGen/SystemZ/atomicrmw-xor-05.ll
index e9e7d30b3578..e821f7ee8ae4 100644
--- a/test/CodeGen/SystemZ/atomicrmw-xor-05.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-xor-05.ll
@@ -26,7 +26,7 @@ define i32 @f3(i32 %dummy, i32 *%src, i32 %b) {
; CHECK-LABEL: f3:
; CHECK: lax %r2, %r4, 524284(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i32 131071
+ %ptr = getelementptr i32, i32 *%src, i32 131071
%res = atomicrmw xor i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -37,7 +37,7 @@ define i32 @f4(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: agfi %r3, 524288
; CHECK: lax %r2, %r4, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i32 131072
+ %ptr = getelementptr i32, i32 *%src, i32 131072
%res = atomicrmw xor i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -47,7 +47,7 @@ define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
; CHECK-LABEL: f5:
; CHECK: lax %r2, %r4, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i32 -131072
+ %ptr = getelementptr i32, i32 *%src, i32 -131072
%res = atomicrmw xor i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
@@ -58,7 +58,7 @@ define i32 @f6(i32 %dummy, i32 *%src, i32 %b) {
; CHECK: agfi %r3, -524292
; CHECK: lax %r2, %r4, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i32 -131073
+ %ptr = getelementptr i32, i32 *%src, i32 -131073
%res = atomicrmw xor i32 *%ptr, i32 %b seq_cst
ret i32 %res
}
diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-06.ll b/test/CodeGen/SystemZ/atomicrmw-xor-06.ll
index 0870c6476f61..630ff6a60977 100644
--- a/test/CodeGen/SystemZ/atomicrmw-xor-06.ll
+++ b/test/CodeGen/SystemZ/atomicrmw-xor-06.ll
@@ -26,7 +26,7 @@ define i64 @f3(i64 %dummy, i64 *%src, i64 %b) {
; CHECK-LABEL: f3:
; CHECK: laxg %r2, %r4, 524280(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65535
+ %ptr = getelementptr i64, i64 *%src, i64 65535
%res = atomicrmw xor i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -37,7 +37,7 @@ define i64 @f4(i64 %dummy, i64 *%src, i64 %b) {
; CHECK: agfi %r3, 524288
; CHECK: laxg %r2, %r4, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65536
+ %ptr = getelementptr i64, i64 *%src, i64 65536
%res = atomicrmw xor i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -47,7 +47,7 @@ define i64 @f5(i64 %dummy, i64 *%src, i64 %b) {
; CHECK-LABEL: f5:
; CHECK: laxg %r2, %r4, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65536
+ %ptr = getelementptr i64, i64 *%src, i64 -65536
%res = atomicrmw xor i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
@@ -58,7 +58,7 @@ define i64 @f6(i64 %dummy, i64 *%src, i64 %b) {
; CHECK: agfi %r3, -524296
; CHECK: laxg %r2, %r4, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65537
+ %ptr = getelementptr i64, i64 *%src, i64 -65537
%res = atomicrmw xor i64 *%ptr, i64 %b seq_cst
ret i64 %res
}
diff --git a/test/CodeGen/SystemZ/branch-02.ll b/test/CodeGen/SystemZ/branch-02.ll
index 38b5d27049d8..5a30cad66918 100644
--- a/test/CodeGen/SystemZ/branch-02.ll
+++ b/test/CodeGen/SystemZ/branch-02.ll
@@ -12,7 +12,7 @@ define void @f1(i32 *%src, i32 %target) {
; CHECK-NEXT: je .L[[LABEL]]
br label %loop
loop:
- %val = load volatile i32 *%src
+ %val = load volatile i32 , i32 *%src
%cond = icmp eq i32 %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -27,7 +27,7 @@ define void @f2(i32 *%src, i32 %target) {
; CHECK-NEXT: jlh .L[[LABEL]]
br label %loop
loop:
- %val = load volatile i32 *%src
+ %val = load volatile i32 , i32 *%src
%cond = icmp ne i32 %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -42,7 +42,7 @@ define void @f3(i32 *%src, i32 %target) {
; CHECK-NEXT: jle .L[[LABEL]]
br label %loop
loop:
- %val = load volatile i32 *%src
+ %val = load volatile i32 , i32 *%src
%cond = icmp sle i32 %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -57,7 +57,7 @@ define void @f4(i32 *%src, i32 %target) {
; CHECK-NEXT: jl .L[[LABEL]]
br label %loop
loop:
- %val = load volatile i32 *%src
+ %val = load volatile i32 , i32 *%src
%cond = icmp slt i32 %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -72,7 +72,7 @@ define void @f5(i32 *%src, i32 %target) {
; CHECK-NEXT: jh .L[[LABEL]]
br label %loop
loop:
- %val = load volatile i32 *%src
+ %val = load volatile i32 , i32 *%src
%cond = icmp sgt i32 %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -87,7 +87,7 @@ define void @f6(i32 *%src, i32 %target) {
; CHECK-NEXT: jhe .L[[LABEL]]
br label %loop
loop:
- %val = load volatile i32 *%src
+ %val = load volatile i32 , i32 *%src
%cond = icmp sge i32 %target, %val
br i1 %cond, label %loop, label %exit
exit:
diff --git a/test/CodeGen/SystemZ/branch-03.ll b/test/CodeGen/SystemZ/branch-03.ll
index ef31a9c696ea..a258e850ea3e 100644
--- a/test/CodeGen/SystemZ/branch-03.ll
+++ b/test/CodeGen/SystemZ/branch-03.ll
@@ -10,7 +10,7 @@ define void @f1(i32 *%src, i32 %target) {
; CHECK-NEXT: jle .L[[LABEL]]
br label %loop
loop:
- %val = load volatile i32 *%src
+ %val = load volatile i32 , i32 *%src
%cond = icmp ule i32 %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -25,7 +25,7 @@ define void @f2(i32 *%src, i32 %target) {
; CHECK-NEXT: jl .L[[LABEL]]
br label %loop
loop:
- %val = load volatile i32 *%src
+ %val = load volatile i32 , i32 *%src
%cond = icmp ult i32 %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -40,7 +40,7 @@ define void @f3(i32 *%src, i32 %target) {
; CHECK-NEXT: jh .L[[LABEL]]
br label %loop
loop:
- %val = load volatile i32 *%src
+ %val = load volatile i32 , i32 *%src
%cond = icmp ugt i32 %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -55,7 +55,7 @@ define void @f4(i32 *%src, i32 %target) {
; CHECK-NEXT: jhe .L[[LABEL]]
br label %loop
loop:
- %val = load volatile i32 *%src
+ %val = load volatile i32 , i32 *%src
%cond = icmp uge i32 %target, %val
br i1 %cond, label %loop, label %exit
exit:
diff --git a/test/CodeGen/SystemZ/branch-04.ll b/test/CodeGen/SystemZ/branch-04.ll
index fafb234616f1..8df2ca029f5f 100644
--- a/test/CodeGen/SystemZ/branch-04.ll
+++ b/test/CodeGen/SystemZ/branch-04.ll
@@ -11,7 +11,7 @@ define void @f1(float *%src, float %target) {
; CHECK-NEXT: je .L[[LABEL]]
br label %loop
loop:
- %val = load volatile float *%src
+ %val = load volatile float , float *%src
%cond = fcmp oeq float %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -26,7 +26,7 @@ define void @f2(float *%src, float %target) {
; CHECK-NEXT: jlh .L[[LABEL]]
br label %loop
loop:
- %val = load volatile float *%src
+ %val = load volatile float , float *%src
%cond = fcmp one float %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -41,7 +41,7 @@ define void @f3(float *%src, float %target) {
; CHECK-NEXT: jle .L[[LABEL]]
br label %loop
loop:
- %val = load volatile float *%src
+ %val = load volatile float , float *%src
%cond = fcmp ole float %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -56,7 +56,7 @@ define void @f4(float *%src, float %target) {
; CHECK-NEXT: jl .L[[LABEL]]
br label %loop
loop:
- %val = load volatile float *%src
+ %val = load volatile float , float *%src
%cond = fcmp olt float %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -71,7 +71,7 @@ define void @f5(float *%src, float %target) {
; CHECK-NEXT: jh .L[[LABEL]]
br label %loop
loop:
- %val = load volatile float *%src
+ %val = load volatile float , float *%src
%cond = fcmp ogt float %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -86,7 +86,7 @@ define void @f6(float *%src, float %target) {
; CHECK-NEXT: jhe .L[[LABEL]]
br label %loop
loop:
- %val = load volatile float *%src
+ %val = load volatile float , float *%src
%cond = fcmp oge float %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -101,7 +101,7 @@ define void @f7(float *%src, float %target) {
; CHECK-NEXT: jnlh .L[[LABEL]]
br label %loop
loop:
- %val = load volatile float *%src
+ %val = load volatile float , float *%src
%cond = fcmp ueq float %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -116,7 +116,7 @@ define void @f8(float *%src, float %target) {
; CHECK-NEXT: jne .L[[LABEL]]
br label %loop
loop:
- %val = load volatile float *%src
+ %val = load volatile float , float *%src
%cond = fcmp une float %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -131,7 +131,7 @@ define void @f9(float *%src, float %target) {
; CHECK-NEXT: jnh .L[[LABEL]]
br label %loop
loop:
- %val = load volatile float *%src
+ %val = load volatile float , float *%src
%cond = fcmp ule float %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -146,7 +146,7 @@ define void @f10(float *%src, float %target) {
; CHECK-NEXT: jnhe .L[[LABEL]]
br label %loop
loop:
- %val = load volatile float *%src
+ %val = load volatile float , float *%src
%cond = fcmp ult float %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -161,7 +161,7 @@ define void @f11(float *%src, float %target) {
; CHECK-NEXT: jnle .L[[LABEL]]
br label %loop
loop:
- %val = load volatile float *%src
+ %val = load volatile float , float *%src
%cond = fcmp ugt float %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -176,7 +176,7 @@ define void @f12(float *%src, float %target) {
; CHECK-NEXT: jnl .L[[LABEL]]
br label %loop
loop:
- %val = load volatile float *%src
+ %val = load volatile float , float *%src
%cond = fcmp uge float %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -193,7 +193,7 @@ define void @f13(float *%src, float %target) {
; CHECK-NEXT: jno .L[[LABEL]]
br label %loop
loop:
- %val = load volatile float *%src
+ %val = load volatile float , float *%src
%cond = fcmp ord float %target, %val
br i1 %cond, label %loop, label %exit
exit:
@@ -210,7 +210,7 @@ define void @f14(float *%src, float %target) {
; CHECK-NEXT: jo .L[[LABEL]]
br label %loop
loop:
- %val = load volatile float *%src
+ %val = load volatile float , float *%src
%cond = fcmp uno float %target, %val
br i1 %cond, label %loop, label %exit
exit:
diff --git a/test/CodeGen/SystemZ/branch-06.ll b/test/CodeGen/SystemZ/branch-06.ll
index 2fa23b744afb..4549b1295cf8 100644
--- a/test/CodeGen/SystemZ/branch-06.ll
+++ b/test/CodeGen/SystemZ/branch-06.ll
@@ -100,7 +100,7 @@ define void @f7(i8 *%targetptr) {
br label %loop
loop:
%val = call i32 @foo()
- %byte = load i8 *%targetptr
+ %byte = load i8 , i8 *%targetptr
%target = zext i8 %byte to i32
%cond = icmp eq i32 %val, %target
br i1 %cond, label %loop, label %exit
@@ -118,7 +118,7 @@ define void @f8(i16 *%targetptr) {
br label %loop
loop:
%val = call i32 @foo()
- %half = load i16 *%targetptr
+ %half = load i16 , i16 *%targetptr
%target = zext i16 %half to i32
%cond = icmp eq i32 %val, %target
br i1 %cond, label %loop, label %exit
@@ -136,7 +136,7 @@ define void @f9(i16 *%targetptr) {
br label %loop
loop:
%val = call i32 @foo()
- %half = load i16 *@g1
+ %half = load i16 , i16 *@g1
%target = zext i16 %half to i32
%cond = icmp eq i32 %val, %target
br i1 %cond, label %loop, label %exit
@@ -156,9 +156,9 @@ define void @f10(i8 *%targetptr1) {
br label %loop
loop:
%val = call i32 @foo()
- %targetptr2 = getelementptr i8 *%targetptr1, i64 1
- %byte1 = load i8 *%targetptr1
- %byte2 = load i8 *%targetptr2
+ %targetptr2 = getelementptr i8, i8 *%targetptr1, i64 1
+ %byte1 = load i8 , i8 *%targetptr1
+ %byte2 = load i8 , i8 *%targetptr2
%ext1 = zext i8 %byte1 to i32
%ext2 = zext i8 %byte2 to i32
%cond = icmp ult i32 %ext1, %ext2
@@ -178,9 +178,9 @@ define void @f11(i16 *%targetptr1) {
br label %loop
loop:
%val = call i32 @foo()
- %targetptr2 = getelementptr i16 *%targetptr1, i64 1
- %half1 = load i16 *%targetptr1
- %half2 = load i16 *%targetptr2
+ %targetptr2 = getelementptr i16, i16 *%targetptr1, i64 1
+ %half1 = load i16 , i16 *%targetptr1
+ %half2 = load i16 , i16 *%targetptr2
%ext1 = zext i16 %half1 to i32
%ext2 = zext i16 %half2 to i32
%cond = icmp ult i32 %ext1, %ext2
diff --git a/test/CodeGen/SystemZ/branch-08.ll b/test/CodeGen/SystemZ/branch-08.ll
index 6741d29aec03..0aa48063d071 100644
--- a/test/CodeGen/SystemZ/branch-08.ll
+++ b/test/CodeGen/SystemZ/branch-08.ll
@@ -14,7 +14,7 @@ define i32 @f1(i32 %a, i32 *%bptr) {
; CHECK: .L[[LABEL]]:
; CHECK: brasl %r14, foo@PLT
entry:
- %b = load i32 *%bptr
+ %b = load i32 , i32 *%bptr
%cmp = icmp ult i32 %a, %b
br i1 %cmp, label %callit, label %return
diff --git a/test/CodeGen/SystemZ/bswap-02.ll b/test/CodeGen/SystemZ/bswap-02.ll
index db69ea53dfe1..9c964569dfd5 100644
--- a/test/CodeGen/SystemZ/bswap-02.ll
+++ b/test/CodeGen/SystemZ/bswap-02.ll
@@ -9,7 +9,7 @@ define i32 @f1(i32 *%src) {
; CHECK-LABEL: f1:
; CHECK: lrv %r2, 0(%r2)
; CHECK: br %r14
- %a = load i32 *%src
+ %a = load i32 , i32 *%src
%swapped = call i32 @llvm.bswap.i32(i32 %a)
ret i32 %swapped
}
@@ -19,8 +19,8 @@ define i32 @f2(i32 *%src) {
; CHECK-LABEL: f2:
; CHECK: lrv %r2, 524284(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
- %a = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131071
+ %a = load i32 , i32 *%ptr
%swapped = call i32 @llvm.bswap.i32(i32 %a)
ret i32 %swapped
}
@@ -32,8 +32,8 @@ define i32 @f3(i32 *%src) {
; CHECK: agfi %r2, 524288
; CHECK: lrv %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
- %a = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131072
+ %a = load i32 , i32 *%ptr
%swapped = call i32 @llvm.bswap.i32(i32 %a)
ret i32 %swapped
}
@@ -43,8 +43,8 @@ define i32 @f4(i32 *%src) {
; CHECK-LABEL: f4:
; CHECK: lrv %r2, -4(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
- %a = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -1
+ %a = load i32 , i32 *%ptr
%swapped = call i32 @llvm.bswap.i32(i32 %a)
ret i32 %swapped
}
@@ -54,8 +54,8 @@ define i32 @f5(i32 *%src) {
; CHECK-LABEL: f5:
; CHECK: lrv %r2, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
- %a = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
+ %a = load i32 , i32 *%ptr
%swapped = call i32 @llvm.bswap.i32(i32 %a)
ret i32 %swapped
}
@@ -67,8 +67,8 @@ define i32 @f6(i32 *%src) {
; CHECK: agfi %r2, -524292
; CHECK: lrv %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
- %a = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
+ %a = load i32 , i32 *%ptr
%swapped = call i32 @llvm.bswap.i32(i32 %a)
ret i32 %swapped
}
@@ -81,7 +81,7 @@ define i32 @f7(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i32 *
- %a = load i32 *%ptr
+ %a = load i32 , i32 *%ptr
%swapped = call i32 @llvm.bswap.i32(i32 %a)
ret i32 %swapped
}
@@ -93,7 +93,7 @@ define i32 @f8(i32 *%src) {
; CHECK: l [[REG:%r[0-5]]], 0(%r2)
; CHECK: lrvr %r2, [[REG]]
; CHECK: br %r14
- %a = load volatile i32 *%src
+ %a = load volatile i32 , i32 *%src
%swapped = call i32 @llvm.bswap.i32(i32 %a)
ret i32 %swapped
}
@@ -104,22 +104,22 @@ define void @f9(i32 *%ptr) {
; CHECK-LABEL: f9:
; CHECK: lrv {{%r[0-9]+}}, 16{{[04]}}(%r15)
; CHECK: br %r14
- %val0 = load volatile i32 *%ptr
- %val1 = load volatile i32 *%ptr
- %val2 = load volatile i32 *%ptr
- %val3 = load volatile i32 *%ptr
- %val4 = load volatile i32 *%ptr
- %val5 = load volatile i32 *%ptr
- %val6 = load volatile i32 *%ptr
- %val7 = load volatile i32 *%ptr
- %val8 = load volatile i32 *%ptr
- %val9 = load volatile i32 *%ptr
- %val10 = load volatile i32 *%ptr
- %val11 = load volatile i32 *%ptr
- %val12 = load volatile i32 *%ptr
- %val13 = load volatile i32 *%ptr
- %val14 = load volatile i32 *%ptr
- %val15 = load volatile i32 *%ptr
+ %val0 = load volatile i32 , i32 *%ptr
+ %val1 = load volatile i32 , i32 *%ptr
+ %val2 = load volatile i32 , i32 *%ptr
+ %val3 = load volatile i32 , i32 *%ptr
+ %val4 = load volatile i32 , i32 *%ptr
+ %val5 = load volatile i32 , i32 *%ptr
+ %val6 = load volatile i32 , i32 *%ptr
+ %val7 = load volatile i32 , i32 *%ptr
+ %val8 = load volatile i32 , i32 *%ptr
+ %val9 = load volatile i32 , i32 *%ptr
+ %val10 = load volatile i32 , i32 *%ptr
+ %val11 = load volatile i32 , i32 *%ptr
+ %val12 = load volatile i32 , i32 *%ptr
+ %val13 = load volatile i32 , i32 *%ptr
+ %val14 = load volatile i32 , i32 *%ptr
+ %val15 = load volatile i32 , i32 *%ptr
%swapped0 = call i32 @llvm.bswap.i32(i32 %val0)
%swapped1 = call i32 @llvm.bswap.i32(i32 %val1)
diff --git a/test/CodeGen/SystemZ/bswap-03.ll b/test/CodeGen/SystemZ/bswap-03.ll
index d9e5ad1b52f6..ea62c4f71df6 100644
--- a/test/CodeGen/SystemZ/bswap-03.ll
+++ b/test/CodeGen/SystemZ/bswap-03.ll
@@ -9,7 +9,7 @@ define i64 @f1(i64 *%src) {
; CHECK-LABEL: f1:
; CHECK: lrvg %r2, 0(%r2)
; CHECK: br %r14
- %a = load i64 *%src
+ %a = load i64 , i64 *%src
%swapped = call i64 @llvm.bswap.i64(i64 %a)
ret i64 %swapped
}
@@ -19,8 +19,8 @@ define i64 @f2(i64 *%src) {
; CHECK-LABEL: f2:
; CHECK: lrvg %r2, 524280(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65535
- %a = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65535
+ %a = load i64 , i64 *%ptr
%swapped = call i64 @llvm.bswap.i64(i64 %a)
ret i64 %swapped
}
@@ -32,8 +32,8 @@ define i64 @f3(i64 *%src) {
; CHECK: agfi %r2, 524288
; CHECK: lrvg %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65536
- %a = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65536
+ %a = load i64 , i64 *%ptr
%swapped = call i64 @llvm.bswap.i64(i64 %a)
ret i64 %swapped
}
@@ -43,8 +43,8 @@ define i64 @f4(i64 *%src) {
; CHECK-LABEL: f4:
; CHECK: lrvg %r2, -8(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -1
- %a = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -1
+ %a = load i64 , i64 *%ptr
%swapped = call i64 @llvm.bswap.i64(i64 %a)
ret i64 %swapped
}
@@ -54,8 +54,8 @@ define i64 @f5(i64 *%src) {
; CHECK-LABEL: f5:
; CHECK: lrvg %r2, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65536
- %a = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65536
+ %a = load i64 , i64 *%ptr
%swapped = call i64 @llvm.bswap.i64(i64 %a)
ret i64 %swapped
}
@@ -67,8 +67,8 @@ define i64 @f6(i64 *%src) {
; CHECK: agfi %r2, -524296
; CHECK: lrvg %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65537
- %a = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65537
+ %a = load i64 , i64 *%ptr
%swapped = call i64 @llvm.bswap.i64(i64 %a)
ret i64 %swapped
}
@@ -81,7 +81,7 @@ define i64 @f7(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i64 *
- %a = load i64 *%ptr
+ %a = load i64 , i64 *%ptr
%swapped = call i64 @llvm.bswap.i64(i64 %a)
ret i64 %swapped
}
@@ -93,7 +93,7 @@ define i64 @f8(i64 *%src) {
; CHECK: lg [[REG:%r[0-5]]], 0(%r2)
; CHECK: lrvgr %r2, [[REG]]
; CHECK: br %r14
- %a = load volatile i64 *%src
+ %a = load volatile i64 , i64 *%src
%swapped = call i64 @llvm.bswap.i64(i64 %a)
ret i64 %swapped
}
@@ -104,22 +104,22 @@ define void @f9(i64 *%ptr) {
; CHECK-LABEL: f9:
; CHECK: lrvg {{%r[0-9]+}}, 160(%r15)
; CHECK: br %r14
- %val0 = load volatile i64 *%ptr
- %val1 = load volatile i64 *%ptr
- %val2 = load volatile i64 *%ptr
- %val3 = load volatile i64 *%ptr
- %val4 = load volatile i64 *%ptr
- %val5 = load volatile i64 *%ptr
- %val6 = load volatile i64 *%ptr
- %val7 = load volatile i64 *%ptr
- %val8 = load volatile i64 *%ptr
- %val9 = load volatile i64 *%ptr
- %val10 = load volatile i64 *%ptr
- %val11 = load volatile i64 *%ptr
- %val12 = load volatile i64 *%ptr
- %val13 = load volatile i64 *%ptr
- %val14 = load volatile i64 *%ptr
- %val15 = load volatile i64 *%ptr
+ %val0 = load volatile i64 , i64 *%ptr
+ %val1 = load volatile i64 , i64 *%ptr
+ %val2 = load volatile i64 , i64 *%ptr
+ %val3 = load volatile i64 , i64 *%ptr
+ %val4 = load volatile i64 , i64 *%ptr
+ %val5 = load volatile i64 , i64 *%ptr
+ %val6 = load volatile i64 , i64 *%ptr
+ %val7 = load volatile i64 , i64 *%ptr
+ %val8 = load volatile i64 , i64 *%ptr
+ %val9 = load volatile i64 , i64 *%ptr
+ %val10 = load volatile i64 , i64 *%ptr
+ %val11 = load volatile i64 , i64 *%ptr
+ %val12 = load volatile i64 , i64 *%ptr
+ %val13 = load volatile i64 , i64 *%ptr
+ %val14 = load volatile i64 , i64 *%ptr
+ %val15 = load volatile i64 , i64 *%ptr
%swapped0 = call i64 @llvm.bswap.i64(i64 %val0)
%swapped1 = call i64 @llvm.bswap.i64(i64 %val1)
diff --git a/test/CodeGen/SystemZ/bswap-04.ll b/test/CodeGen/SystemZ/bswap-04.ll
index 29d5a7b07212..ce4395210f1f 100644
--- a/test/CodeGen/SystemZ/bswap-04.ll
+++ b/test/CodeGen/SystemZ/bswap-04.ll
@@ -19,7 +19,7 @@ define void @f2(i32 *%dst, i32 %a) {
; CHECK-LABEL: f2:
; CHECK: strv %r3, 524284(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%dst, i64 131071
+ %ptr = getelementptr i32, i32 *%dst, i64 131071
%swapped = call i32 @llvm.bswap.i32(i32 %a)
store i32 %swapped, i32 *%ptr
ret void
@@ -32,7 +32,7 @@ define void @f3(i32 *%dst, i32 %a) {
; CHECK: agfi %r2, 524288
; CHECK: strv %r3, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%dst, i64 131072
+ %ptr = getelementptr i32, i32 *%dst, i64 131072
%swapped = call i32 @llvm.bswap.i32(i32 %a)
store i32 %swapped, i32 *%ptr
ret void
@@ -43,7 +43,7 @@ define void @f4(i32 *%dst, i32 %a) {
; CHECK-LABEL: f4:
; CHECK: strv %r3, -4(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%dst, i64 -1
+ %ptr = getelementptr i32, i32 *%dst, i64 -1
%swapped = call i32 @llvm.bswap.i32(i32 %a)
store i32 %swapped, i32 *%ptr
ret void
@@ -54,7 +54,7 @@ define void @f5(i32 *%dst, i32 %a) {
; CHECK-LABEL: f5:
; CHECK: strv %r3, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%dst, i64 -131072
+ %ptr = getelementptr i32, i32 *%dst, i64 -131072
%swapped = call i32 @llvm.bswap.i32(i32 %a)
store i32 %swapped, i32 *%ptr
ret void
@@ -67,7 +67,7 @@ define void @f6(i32 *%dst, i32 %a) {
; CHECK: agfi %r2, -524292
; CHECK: strv %r3, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%dst, i64 -131073
+ %ptr = getelementptr i32, i32 *%dst, i64 -131073
%swapped = call i32 @llvm.bswap.i32(i32 %a)
store i32 %swapped, i32 *%ptr
ret void
diff --git a/test/CodeGen/SystemZ/bswap-05.ll b/test/CodeGen/SystemZ/bswap-05.ll
index 5c8361e26cea..5f90ef3b9b65 100644
--- a/test/CodeGen/SystemZ/bswap-05.ll
+++ b/test/CodeGen/SystemZ/bswap-05.ll
@@ -19,7 +19,7 @@ define void @f2(i64 *%dst, i64 %a) {
; CHECK-LABEL: f2:
; CHECK: strvg %r3, 524280(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%dst, i64 65535
+ %ptr = getelementptr i64, i64 *%dst, i64 65535
%swapped = call i64 @llvm.bswap.i64(i64 %a)
store i64 %swapped, i64 *%ptr
ret void
@@ -32,7 +32,7 @@ define void @f3(i64 *%dst, i64 %a) {
; CHECK: agfi %r2, 524288
; CHECK: strvg %r3, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%dst, i64 65536
+ %ptr = getelementptr i64, i64 *%dst, i64 65536
%swapped = call i64 @llvm.bswap.i64(i64 %a)
store i64 %swapped, i64 *%ptr
ret void
@@ -43,7 +43,7 @@ define void @f4(i64 *%dst, i64 %a) {
; CHECK-LABEL: f4:
; CHECK: strvg %r3, -8(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%dst, i64 -1
+ %ptr = getelementptr i64, i64 *%dst, i64 -1
%swapped = call i64 @llvm.bswap.i64(i64 %a)
store i64 %swapped, i64 *%ptr
ret void
@@ -54,7 +54,7 @@ define void @f5(i64 *%dst, i64 %a) {
; CHECK-LABEL: f5:
; CHECK: strvg %r3, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%dst, i64 -65536
+ %ptr = getelementptr i64, i64 *%dst, i64 -65536
%swapped = call i64 @llvm.bswap.i64(i64 %a)
store i64 %swapped, i64 *%ptr
ret void
@@ -67,7 +67,7 @@ define void @f6(i64 *%dst, i64 %a) {
; CHECK: agfi %r2, -524296
; CHECK: strvg %r3, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%dst, i64 -65537
+ %ptr = getelementptr i64, i64 *%dst, i64 -65537
%swapped = call i64 @llvm.bswap.i64(i64 %a)
store i64 %swapped, i64 *%ptr
ret void
diff --git a/test/CodeGen/SystemZ/cmpxchg-03.ll b/test/CodeGen/SystemZ/cmpxchg-03.ll
index c5fab4dc0439..c6e1955a50fd 100644
--- a/test/CodeGen/SystemZ/cmpxchg-03.ll
+++ b/test/CodeGen/SystemZ/cmpxchg-03.ll
@@ -17,7 +17,7 @@ define i32 @f2(i32 %cmp, i32 %swap, i32 *%src) {
; CHECK-LABEL: f2:
; CHECK: cs %r2, %r3, 4092(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 1023
+ %ptr = getelementptr i32, i32 *%src, i64 1023
%pair = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
%val = extractvalue { i32, i1 } %pair, 0
ret i32 %val
@@ -28,7 +28,7 @@ define i32 @f3(i32 %cmp, i32 %swap, i32 *%src) {
; CHECK-LABEL: f3:
; CHECK: csy %r2, %r3, 4096(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 1024
+ %ptr = getelementptr i32, i32 *%src, i64 1024
%pair = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
%val = extractvalue { i32, i1 } %pair, 0
ret i32 %val
@@ -39,7 +39,7 @@ define i32 @f4(i32 %cmp, i32 %swap, i32 *%src) {
; CHECK-LABEL: f4:
; CHECK: csy %r2, %r3, 524284(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
+ %ptr = getelementptr i32, i32 *%src, i64 131071
%pair = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
%val = extractvalue { i32, i1 } %pair, 0
ret i32 %val
@@ -52,7 +52,7 @@ define i32 @f5(i32 %cmp, i32 %swap, i32 *%src) {
; CHECK: agfi %r4, 524288
; CHECK: cs %r2, %r3, 0(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
+ %ptr = getelementptr i32, i32 *%src, i64 131072
%pair = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
%val = extractvalue { i32, i1 } %pair, 0
ret i32 %val
@@ -63,7 +63,7 @@ define i32 @f6(i32 %cmp, i32 %swap, i32 *%src) {
; CHECK-LABEL: f6:
; CHECK: csy %r2, %r3, -4(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
+ %ptr = getelementptr i32, i32 *%src, i64 -1
%pair = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
%val = extractvalue { i32, i1 } %pair, 0
ret i32 %val
@@ -74,7 +74,7 @@ define i32 @f7(i32 %cmp, i32 %swap, i32 *%src) {
; CHECK-LABEL: f7:
; CHECK: csy %r2, %r3, -524288(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
%pair = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
%val = extractvalue { i32, i1 } %pair, 0
ret i32 %val
@@ -87,7 +87,7 @@ define i32 @f8(i32 %cmp, i32 %swap, i32 *%src) {
; CHECK: agfi %r4, -524292
; CHECK: cs %r2, %r3, 0(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
%pair = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst seq_cst
%val = extractvalue { i32, i1 } %pair, 0
ret i32 %val
diff --git a/test/CodeGen/SystemZ/cmpxchg-04.ll b/test/CodeGen/SystemZ/cmpxchg-04.ll
index ba1493e1853e..b0560876b876 100644
--- a/test/CodeGen/SystemZ/cmpxchg-04.ll
+++ b/test/CodeGen/SystemZ/cmpxchg-04.ll
@@ -17,7 +17,7 @@ define i64 @f2(i64 %cmp, i64 %swap, i64 *%src) {
; CHECK-LABEL: f2:
; CHECK: csg %r2, %r3, 524280(%r4)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65535
+ %ptr = getelementptr i64, i64 *%src, i64 65535
%pairval = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
%val = extractvalue { i64, i1 } %pairval, 0
ret i64 %val
@@ -30,7 +30,7 @@ define i64 @f3(i64 %cmp, i64 %swap, i64 *%src) {
; CHECK: agfi %r4, 524288
; CHECK: csg %r2, %r3, 0(%r4)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65536
+ %ptr = getelementptr i64, i64 *%src, i64 65536
%pairval = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
%val = extractvalue { i64, i1 } %pairval, 0
ret i64 %val
@@ -41,7 +41,7 @@ define i64 @f4(i64 %cmp, i64 %swap, i64 *%src) {
; CHECK-LABEL: f4:
; CHECK: csg %r2, %r3, -8(%r4)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -1
+ %ptr = getelementptr i64, i64 *%src, i64 -1
%pairval = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
%val = extractvalue { i64, i1 } %pairval, 0
ret i64 %val
@@ -52,7 +52,7 @@ define i64 @f5(i64 %cmp, i64 %swap, i64 *%src) {
; CHECK-LABEL: f5:
; CHECK: csg %r2, %r3, -524288(%r4)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65536
+ %ptr = getelementptr i64, i64 *%src, i64 -65536
%pairval = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
%val = extractvalue { i64, i1 } %pairval, 0
ret i64 %val
@@ -65,7 +65,7 @@ define i64 @f6(i64 %cmp, i64 %swap, i64 *%src) {
; CHECK: agfi %r4, -524296
; CHECK: csg %r2, %r3, 0(%r4)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65537
+ %ptr = getelementptr i64, i64 *%src, i64 -65537
%pairval = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst seq_cst
%val = extractvalue { i64, i1 } %pairval, 0
ret i64 %val
diff --git a/test/CodeGen/SystemZ/cond-load-01.ll b/test/CodeGen/SystemZ/cond-load-01.ll
index 1030226798d1..97d4027126b8 100644
--- a/test/CodeGen/SystemZ/cond-load-01.ll
+++ b/test/CodeGen/SystemZ/cond-load-01.ll
@@ -11,7 +11,7 @@ define i32 @f1(i32 %easy, i32 *%ptr, i32 %limit) {
; CHECK: loche %r2, 0(%r3)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
- %other = load i32 *%ptr
+ %other = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %easy, i32 %other
ret i32 %res
}
@@ -23,7 +23,7 @@ define i32 @f2(i32 %easy, i32 *%ptr, i32 %limit) {
; CHECK: locl %r2, 0(%r3)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
- %other = load i32 *%ptr
+ %other = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %other, i32 %easy
ret i32 %res
}
@@ -34,9 +34,9 @@ define i32 @f3(i32 %easy, i32 *%base, i32 %limit) {
; CHECK: clfi %r4, 42
; CHECK: loche %r2, 524284(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 131071
+ %ptr = getelementptr i32, i32 *%base, i64 131071
%cond = icmp ult i32 %limit, 42
- %other = load i32 *%ptr
+ %other = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %easy, i32 %other
ret i32 %res
}
@@ -48,9 +48,9 @@ define i32 @f4(i32 %easy, i32 *%base, i32 %limit) {
; CHECK: clfi %r4, 42
; CHECK: loche %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 131072
+ %ptr = getelementptr i32, i32 *%base, i64 131072
%cond = icmp ult i32 %limit, 42
- %other = load i32 *%ptr
+ %other = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %easy, i32 %other
ret i32 %res
}
@@ -61,9 +61,9 @@ define i32 @f5(i32 %easy, i32 *%base, i32 %limit) {
; CHECK: clfi %r4, 42
; CHECK: loche %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -131072
+ %ptr = getelementptr i32, i32 *%base, i64 -131072
%cond = icmp ult i32 %limit, 42
- %other = load i32 *%ptr
+ %other = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %easy, i32 %other
ret i32 %res
}
@@ -75,9 +75,9 @@ define i32 @f6(i32 %easy, i32 *%base, i32 %limit) {
; CHECK: clfi %r4, 42
; CHECK: loche %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -131073
+ %ptr = getelementptr i32, i32 *%base, i64 -131073
%cond = icmp ult i32 %limit, 42
- %other = load i32 *%ptr
+ %other = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %easy, i32 %other
ret i32 %res
}
@@ -91,7 +91,7 @@ define i32 @f7(i32 %alt, i32 %limit) {
%ptr = alloca i32
%easy = call i32 @foo(i32 *%ptr)
%cond = icmp ult i32 %limit, 42
- %other = load i32 *%ptr
+ %other = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %easy, i32 %other
ret i32 %res
}
@@ -105,7 +105,7 @@ define i32 @f8(i32 %easy, i32 %limit, i64 %base, i64 %index) {
%add = add i64 %base, %index
%ptr = inttoptr i64 %add to i32 *
%cond = icmp ult i32 %limit, 42
- %other = load i32 *%ptr
+ %other = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %easy, i32 %other
ret i32 %res
}
@@ -121,7 +121,7 @@ entry:
br i1 %cmp, label %load, label %exit
load:
- %other = load i32 *%ptr
+ %other = load i32 , i32 *%ptr
br label %exit
exit:
diff --git a/test/CodeGen/SystemZ/cond-load-02.ll b/test/CodeGen/SystemZ/cond-load-02.ll
index e97f4728bc0b..d0fe65e2fe0e 100644
--- a/test/CodeGen/SystemZ/cond-load-02.ll
+++ b/test/CodeGen/SystemZ/cond-load-02.ll
@@ -11,7 +11,7 @@ define i64 @f1(i64 %easy, i64 *%ptr, i64 %limit) {
; CHECK: locghe %r2, 0(%r3)
; CHECK: br %r14
%cond = icmp ult i64 %limit, 42
- %other = load i64 *%ptr
+ %other = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %easy, i64 %other
ret i64 %res
}
@@ -23,7 +23,7 @@ define i64 @f2(i64 %easy, i64 *%ptr, i64 %limit) {
; CHECK: locgl %r2, 0(%r3)
; CHECK: br %r14
%cond = icmp ult i64 %limit, 42
- %other = load i64 *%ptr
+ %other = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %other, i64 %easy
ret i64 %res
}
@@ -34,9 +34,9 @@ define i64 @f3(i64 %easy, i64 *%base, i64 %limit) {
; CHECK: clgfi %r4, 42
; CHECK: locghe %r2, 524280(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 65535
+ %ptr = getelementptr i64, i64 *%base, i64 65535
%cond = icmp ult i64 %limit, 42
- %other = load i64 *%ptr
+ %other = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %easy, i64 %other
ret i64 %res
}
@@ -48,9 +48,9 @@ define i64 @f4(i64 %easy, i64 *%base, i64 %limit) {
; CHECK: clgfi %r4, 42
; CHECK: locghe %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 65536
+ %ptr = getelementptr i64, i64 *%base, i64 65536
%cond = icmp ult i64 %limit, 42
- %other = load i64 *%ptr
+ %other = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %easy, i64 %other
ret i64 %res
}
@@ -61,9 +61,9 @@ define i64 @f5(i64 %easy, i64 *%base, i64 %limit) {
; CHECK: clgfi %r4, 42
; CHECK: locghe %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 -65536
+ %ptr = getelementptr i64, i64 *%base, i64 -65536
%cond = icmp ult i64 %limit, 42
- %other = load i64 *%ptr
+ %other = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %easy, i64 %other
ret i64 %res
}
@@ -75,9 +75,9 @@ define i64 @f6(i64 %easy, i64 *%base, i64 %limit) {
; CHECK: clgfi %r4, 42
; CHECK: locghe %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 -65537
+ %ptr = getelementptr i64, i64 *%base, i64 -65537
%cond = icmp ult i64 %limit, 42
- %other = load i64 *%ptr
+ %other = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %easy, i64 %other
ret i64 %res
}
@@ -91,7 +91,7 @@ define i64 @f7(i64 %alt, i64 %limit) {
%ptr = alloca i64
%easy = call i64 @foo(i64 *%ptr)
%cond = icmp ult i64 %limit, 42
- %other = load i64 *%ptr
+ %other = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %easy, i64 %other
ret i64 %res
}
@@ -105,7 +105,7 @@ define i64 @f8(i64 %easy, i64 %limit, i64 %base, i64 %index) {
%add = add i64 %base, %index
%ptr = inttoptr i64 %add to i64 *
%cond = icmp ult i64 %limit, 42
- %other = load i64 *%ptr
+ %other = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %easy, i64 %other
ret i64 %res
}
@@ -121,7 +121,7 @@ entry:
br i1 %cmp, label %load, label %exit
load:
- %other = load i64 *%ptr
+ %other = load i64 , i64 *%ptr
br label %exit
exit:
diff --git a/test/CodeGen/SystemZ/cond-store-01.ll b/test/CodeGen/SystemZ/cond-store-01.ll
index 62e9796fa21b..ec7fc4a31fcd 100644
--- a/test/CodeGen/SystemZ/cond-store-01.ll
+++ b/test/CodeGen/SystemZ/cond-store-01.ll
@@ -15,7 +15,7 @@ define void @f1(i8 *%ptr, i8 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i8 *%ptr
+ %orig = load i8 , i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
ret void
@@ -31,7 +31,7 @@ define void @f2(i8 *%ptr, i8 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i8 *%ptr
+ %orig = load i8 , i8 *%ptr
%res = select i1 %cond, i8 %alt, i8 %orig
store i8 %res, i8 *%ptr
ret void
@@ -48,7 +48,7 @@ define void @f3(i8 *%ptr, i32 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i8 *%ptr
+ %orig = load i8 , i8 *%ptr
%ext = sext i8 %orig to i32
%res = select i1 %cond, i32 %ext, i32 %alt
%trunc = trunc i32 %res to i8
@@ -66,7 +66,7 @@ define void @f4(i8 *%ptr, i32 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i8 *%ptr
+ %orig = load i8 , i8 *%ptr
%ext = sext i8 %orig to i32
%res = select i1 %cond, i32 %alt, i32 %ext
%trunc = trunc i32 %res to i8
@@ -85,7 +85,7 @@ define void @f5(i8 *%ptr, i32 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i8 *%ptr
+ %orig = load i8 , i8 *%ptr
%ext = zext i8 %orig to i32
%res = select i1 %cond, i32 %ext, i32 %alt
%trunc = trunc i32 %res to i8
@@ -103,7 +103,7 @@ define void @f6(i8 *%ptr, i32 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i8 *%ptr
+ %orig = load i8 , i8 *%ptr
%ext = zext i8 %orig to i32
%res = select i1 %cond, i32 %alt, i32 %ext
%trunc = trunc i32 %res to i8
@@ -122,7 +122,7 @@ define void @f7(i8 *%ptr, i64 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i8 *%ptr
+ %orig = load i8 , i8 *%ptr
%ext = sext i8 %orig to i64
%res = select i1 %cond, i64 %ext, i64 %alt
%trunc = trunc i64 %res to i8
@@ -140,7 +140,7 @@ define void @f8(i8 *%ptr, i64 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i8 *%ptr
+ %orig = load i8 , i8 *%ptr
%ext = sext i8 %orig to i64
%res = select i1 %cond, i64 %alt, i64 %ext
%trunc = trunc i64 %res to i8
@@ -159,7 +159,7 @@ define void @f9(i8 *%ptr, i64 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i8 *%ptr
+ %orig = load i8 , i8 *%ptr
%ext = zext i8 %orig to i64
%res = select i1 %cond, i64 %ext, i64 %alt
%trunc = trunc i64 %res to i8
@@ -177,7 +177,7 @@ define void @f10(i8 *%ptr, i64 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i8 *%ptr
+ %orig = load i8 , i8 *%ptr
%ext = zext i8 %orig to i64
%res = select i1 %cond, i64 %alt, i64 %ext
%trunc = trunc i64 %res to i8
@@ -194,9 +194,9 @@ define void @f11(i8 *%base, i8 %alt, i32 %limit) {
; CHECK: stc %r3, 4095(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i8 *%base, i64 4095
+ %ptr = getelementptr i8, i8 *%base, i64 4095
%cond = icmp ult i32 %limit, 420
- %orig = load i8 *%ptr
+ %orig = load i8 , i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
ret void
@@ -211,9 +211,9 @@ define void @f12(i8 *%base, i8 %alt, i32 %limit) {
; CHECK: stcy %r3, 4096(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i8 *%base, i64 4096
+ %ptr = getelementptr i8, i8 *%base, i64 4096
%cond = icmp ult i32 %limit, 420
- %orig = load i8 *%ptr
+ %orig = load i8 , i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
ret void
@@ -228,9 +228,9 @@ define void @f13(i8 *%base, i8 %alt, i32 %limit) {
; CHECK: stcy %r3, 524287(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i8 *%base, i64 524287
+ %ptr = getelementptr i8, i8 *%base, i64 524287
%cond = icmp ult i32 %limit, 420
- %orig = load i8 *%ptr
+ %orig = load i8 , i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
ret void
@@ -247,9 +247,9 @@ define void @f14(i8 *%base, i8 %alt, i32 %limit) {
; CHECK: stc %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i8 *%base, i64 524288
+ %ptr = getelementptr i8, i8 *%base, i64 524288
%cond = icmp ult i32 %limit, 420
- %orig = load i8 *%ptr
+ %orig = load i8 , i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
ret void
@@ -264,9 +264,9 @@ define void @f15(i8 *%base, i8 %alt, i32 %limit) {
; CHECK: stcy %r3, -524288(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i8 *%base, i64 -524288
+ %ptr = getelementptr i8, i8 *%base, i64 -524288
%cond = icmp ult i32 %limit, 420
- %orig = load i8 *%ptr
+ %orig = load i8 , i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
ret void
@@ -283,9 +283,9 @@ define void @f16(i8 *%base, i8 %alt, i32 %limit) {
; CHECK: stc %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i8 *%base, i64 -524289
+ %ptr = getelementptr i8, i8 *%base, i64 -524289
%cond = icmp ult i32 %limit, 420
- %orig = load i8 *%ptr
+ %orig = load i8 , i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
ret void
@@ -304,7 +304,7 @@ define void @f17(i64 %base, i64 %index, i8 %alt, i32 %limit) {
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i8 *
%cond = icmp ult i32 %limit, 420
- %orig = load i8 *%ptr
+ %orig = load i8 , i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
ret void
@@ -319,7 +319,7 @@ define void @f18(i8 *%ptr, i8 %alt, i32 %limit) {
; CHECK: stc {{%r[0-5]}}, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load volatile i8 *%ptr
+ %orig = load volatile i8 , i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
ret void
@@ -334,7 +334,7 @@ define void @f19(i8 *%ptr, i8 %alt, i32 %limit) {
; CHECK: stc %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i8 *%ptr
+ %orig = load i8 , i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store volatile i8 %res, i8 *%ptr
ret void
@@ -353,7 +353,7 @@ define void @f20(i8 *%ptr, i8 %alt, i32 %limit) {
; CHECK: stc {{%r[0-9]+}}, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load atomic i8 *%ptr unordered, align 1
+ %orig = load atomic i8 , i8 *%ptr unordered, align 1
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
ret void
@@ -369,7 +369,7 @@ define void @f21(i8 *%ptr, i8 %alt, i32 %limit) {
; CHECK: stc %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i8 *%ptr
+ %orig = load i8 , i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store atomic i8 %res, i8 *%ptr unordered, align 1
ret void
@@ -389,7 +389,7 @@ define void @f22(i8 %alt, i32 %limit) {
%ptr = alloca i8
call void @foo(i8 *%ptr)
%cond = icmp ult i32 %limit, 420
- %orig = load i8 *%ptr
+ %orig = load i8 , i8 *%ptr
%res = select i1 %cond, i8 %orig, i8 %alt
store i8 %res, i8 *%ptr
call void @foo(i8 *%ptr)
diff --git a/test/CodeGen/SystemZ/cond-store-02.ll b/test/CodeGen/SystemZ/cond-store-02.ll
index 4fbcdaba5103..22bdfa3c27dc 100644
--- a/test/CodeGen/SystemZ/cond-store-02.ll
+++ b/test/CodeGen/SystemZ/cond-store-02.ll
@@ -15,7 +15,7 @@ define void @f1(i16 *%ptr, i16 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i16 *%ptr
+ %orig = load i16 , i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
ret void
@@ -31,7 +31,7 @@ define void @f2(i16 *%ptr, i16 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i16 *%ptr
+ %orig = load i16 , i16 *%ptr
%res = select i1 %cond, i16 %alt, i16 %orig
store i16 %res, i16 *%ptr
ret void
@@ -48,7 +48,7 @@ define void @f3(i16 *%ptr, i32 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i16 *%ptr
+ %orig = load i16 , i16 *%ptr
%ext = sext i16 %orig to i32
%res = select i1 %cond, i32 %ext, i32 %alt
%trunc = trunc i32 %res to i16
@@ -66,7 +66,7 @@ define void @f4(i16 *%ptr, i32 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i16 *%ptr
+ %orig = load i16 , i16 *%ptr
%ext = sext i16 %orig to i32
%res = select i1 %cond, i32 %alt, i32 %ext
%trunc = trunc i32 %res to i16
@@ -85,7 +85,7 @@ define void @f5(i16 *%ptr, i32 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i16 *%ptr
+ %orig = load i16 , i16 *%ptr
%ext = zext i16 %orig to i32
%res = select i1 %cond, i32 %ext, i32 %alt
%trunc = trunc i32 %res to i16
@@ -103,7 +103,7 @@ define void @f6(i16 *%ptr, i32 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i16 *%ptr
+ %orig = load i16 , i16 *%ptr
%ext = zext i16 %orig to i32
%res = select i1 %cond, i32 %alt, i32 %ext
%trunc = trunc i32 %res to i16
@@ -122,7 +122,7 @@ define void @f7(i16 *%ptr, i64 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i16 *%ptr
+ %orig = load i16 , i16 *%ptr
%ext = sext i16 %orig to i64
%res = select i1 %cond, i64 %ext, i64 %alt
%trunc = trunc i64 %res to i16
@@ -140,7 +140,7 @@ define void @f8(i16 *%ptr, i64 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i16 *%ptr
+ %orig = load i16 , i16 *%ptr
%ext = sext i16 %orig to i64
%res = select i1 %cond, i64 %alt, i64 %ext
%trunc = trunc i64 %res to i16
@@ -159,7 +159,7 @@ define void @f9(i16 *%ptr, i64 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i16 *%ptr
+ %orig = load i16 , i16 *%ptr
%ext = zext i16 %orig to i64
%res = select i1 %cond, i64 %ext, i64 %alt
%trunc = trunc i64 %res to i16
@@ -177,7 +177,7 @@ define void @f10(i16 *%ptr, i64 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i16 *%ptr
+ %orig = load i16 , i16 *%ptr
%ext = zext i16 %orig to i64
%res = select i1 %cond, i64 %alt, i64 %ext
%trunc = trunc i64 %res to i16
@@ -194,9 +194,9 @@ define void @f11(i16 *%base, i16 %alt, i32 %limit) {
; CHECK: sth %r3, 4094(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i16 *%base, i64 2047
+ %ptr = getelementptr i16, i16 *%base, i64 2047
%cond = icmp ult i32 %limit, 420
- %orig = load i16 *%ptr
+ %orig = load i16 , i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
ret void
@@ -211,9 +211,9 @@ define void @f12(i16 *%base, i16 %alt, i32 %limit) {
; CHECK: sthy %r3, 4096(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i16 *%base, i64 2048
+ %ptr = getelementptr i16, i16 *%base, i64 2048
%cond = icmp ult i32 %limit, 420
- %orig = load i16 *%ptr
+ %orig = load i16 , i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
ret void
@@ -228,9 +228,9 @@ define void @f13(i16 *%base, i16 %alt, i32 %limit) {
; CHECK: sthy %r3, 524286(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i16 *%base, i64 262143
+ %ptr = getelementptr i16, i16 *%base, i64 262143
%cond = icmp ult i32 %limit, 420
- %orig = load i16 *%ptr
+ %orig = load i16 , i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
ret void
@@ -247,9 +247,9 @@ define void @f14(i16 *%base, i16 %alt, i32 %limit) {
; CHECK: sth %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i16 *%base, i64 262144
+ %ptr = getelementptr i16, i16 *%base, i64 262144
%cond = icmp ult i32 %limit, 420
- %orig = load i16 *%ptr
+ %orig = load i16 , i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
ret void
@@ -264,9 +264,9 @@ define void @f15(i16 *%base, i16 %alt, i32 %limit) {
; CHECK: sthy %r3, -524288(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i16 *%base, i64 -262144
+ %ptr = getelementptr i16, i16 *%base, i64 -262144
%cond = icmp ult i32 %limit, 420
- %orig = load i16 *%ptr
+ %orig = load i16 , i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
ret void
@@ -283,9 +283,9 @@ define void @f16(i16 *%base, i16 %alt, i32 %limit) {
; CHECK: sth %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i16 *%base, i64 -262145
+ %ptr = getelementptr i16, i16 *%base, i64 -262145
%cond = icmp ult i32 %limit, 420
- %orig = load i16 *%ptr
+ %orig = load i16 , i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
ret void
@@ -304,7 +304,7 @@ define void @f17(i64 %base, i64 %index, i16 %alt, i32 %limit) {
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i16 *
%cond = icmp ult i32 %limit, 420
- %orig = load i16 *%ptr
+ %orig = load i16 , i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
ret void
@@ -319,7 +319,7 @@ define void @f18(i16 *%ptr, i16 %alt, i32 %limit) {
; CHECK: sth {{%r[0-5]}}, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load volatile i16 *%ptr
+ %orig = load volatile i16 , i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
ret void
@@ -334,7 +334,7 @@ define void @f19(i16 *%ptr, i16 %alt, i32 %limit) {
; CHECK: sth %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i16 *%ptr
+ %orig = load i16 , i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store volatile i16 %res, i16 *%ptr
ret void
@@ -353,7 +353,7 @@ define void @f20(i16 *%ptr, i16 %alt, i32 %limit) {
; CHECK: sth {{%r[0-9]+}}, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load atomic i16 *%ptr unordered, align 2
+ %orig = load atomic i16 , i16 *%ptr unordered, align 2
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
ret void
@@ -369,7 +369,7 @@ define void @f21(i16 *%ptr, i16 %alt, i32 %limit) {
; CHECK: sth %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i16 *%ptr
+ %orig = load i16 , i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store atomic i16 %res, i16 *%ptr unordered, align 2
ret void
@@ -389,7 +389,7 @@ define void @f22(i16 %alt, i32 %limit) {
%ptr = alloca i16
call void @foo(i16 *%ptr)
%cond = icmp ult i32 %limit, 420
- %orig = load i16 *%ptr
+ %orig = load i16 , i16 *%ptr
%res = select i1 %cond, i16 %orig, i16 %alt
store i16 %res, i16 *%ptr
call void @foo(i16 *%ptr)
diff --git a/test/CodeGen/SystemZ/cond-store-03.ll b/test/CodeGen/SystemZ/cond-store-03.ll
index 4b22555d0d60..7207164a6314 100644
--- a/test/CodeGen/SystemZ/cond-store-03.ll
+++ b/test/CodeGen/SystemZ/cond-store-03.ll
@@ -14,7 +14,7 @@ define void @f1(i32 *%ptr, i32 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
@@ -30,7 +30,7 @@ define void @f2(i32 *%ptr, i32 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %alt, i32 %orig
store i32 %res, i32 *%ptr
ret void
@@ -47,7 +47,7 @@ define void @f3(i32 *%ptr, i64 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%ext = sext i32 %orig to i64
%res = select i1 %cond, i64 %ext, i64 %alt
%trunc = trunc i64 %res to i32
@@ -65,7 +65,7 @@ define void @f4(i32 *%ptr, i64 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%ext = sext i32 %orig to i64
%res = select i1 %cond, i64 %alt, i64 %ext
%trunc = trunc i64 %res to i32
@@ -84,7 +84,7 @@ define void @f5(i32 *%ptr, i64 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%ext = zext i32 %orig to i64
%res = select i1 %cond, i64 %ext, i64 %alt
%trunc = trunc i64 %res to i32
@@ -102,7 +102,7 @@ define void @f6(i32 *%ptr, i64 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%ext = zext i32 %orig to i64
%res = select i1 %cond, i64 %alt, i64 %ext
%trunc = trunc i64 %res to i32
@@ -119,9 +119,9 @@ define void @f7(i32 *%base, i32 %alt, i32 %limit) {
; CHECK: st %r3, 4092(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 1023
+ %ptr = getelementptr i32, i32 *%base, i64 1023
%cond = icmp ult i32 %limit, 420
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
@@ -136,9 +136,9 @@ define void @f8(i32 *%base, i32 %alt, i32 %limit) {
; CHECK: sty %r3, 4096(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 1024
+ %ptr = getelementptr i32, i32 *%base, i64 1024
%cond = icmp ult i32 %limit, 420
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
@@ -153,9 +153,9 @@ define void @f9(i32 *%base, i32 %alt, i32 %limit) {
; CHECK: sty %r3, 524284(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 131071
+ %ptr = getelementptr i32, i32 *%base, i64 131071
%cond = icmp ult i32 %limit, 420
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
@@ -172,9 +172,9 @@ define void @f10(i32 *%base, i32 %alt, i32 %limit) {
; CHECK: st %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 131072
+ %ptr = getelementptr i32, i32 *%base, i64 131072
%cond = icmp ult i32 %limit, 420
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
@@ -189,9 +189,9 @@ define void @f11(i32 *%base, i32 %alt, i32 %limit) {
; CHECK: sty %r3, -524288(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -131072
+ %ptr = getelementptr i32, i32 *%base, i64 -131072
%cond = icmp ult i32 %limit, 420
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
@@ -208,9 +208,9 @@ define void @f12(i32 *%base, i32 %alt, i32 %limit) {
; CHECK: st %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -131073
+ %ptr = getelementptr i32, i32 *%base, i64 -131073
%cond = icmp ult i32 %limit, 420
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
@@ -229,7 +229,7 @@ define void @f13(i64 %base, i64 %index, i32 %alt, i32 %limit) {
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i32 *
%cond = icmp ult i32 %limit, 420
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
@@ -244,7 +244,7 @@ define void @f14(i32 *%ptr, i32 %alt, i32 %limit) {
; CHECK: st {{%r[0-5]}}, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load volatile i32 *%ptr
+ %orig = load volatile i32 , i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
@@ -259,7 +259,7 @@ define void @f15(i32 *%ptr, i32 %alt, i32 %limit) {
; CHECK: st %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store volatile i32 %res, i32 *%ptr
ret void
@@ -278,7 +278,7 @@ define void @f16(i32 *%ptr, i32 %alt, i32 %limit) {
; CHECK: st {{%r[0-5]}}, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load atomic i32 *%ptr unordered, align 4
+ %orig = load atomic i32 , i32 *%ptr unordered, align 4
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
@@ -294,7 +294,7 @@ define void @f17(i32 *%ptr, i32 %alt, i32 %limit) {
; CHECK: st %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store atomic i32 %res, i32 *%ptr unordered, align 4
ret void
@@ -314,7 +314,7 @@ define void @f18(i32 %alt, i32 %limit) {
%ptr = alloca i32
call void @foo(i32 *%ptr)
%cond = icmp ult i32 %limit, 420
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
call void @foo(i32 *%ptr)
diff --git a/test/CodeGen/SystemZ/cond-store-04.ll b/test/CodeGen/SystemZ/cond-store-04.ll
index 346b51a17d78..7e25bb5c14a0 100644
--- a/test/CodeGen/SystemZ/cond-store-04.ll
+++ b/test/CodeGen/SystemZ/cond-store-04.ll
@@ -14,7 +14,7 @@ define void @f1(i64 *%ptr, i64 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i64 *%ptr
+ %orig = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
@@ -30,7 +30,7 @@ define void @f2(i64 *%ptr, i64 %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i64 *%ptr
+ %orig = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %alt, i64 %orig
store i64 %res, i64 *%ptr
ret void
@@ -45,9 +45,9 @@ define void @f3(i64 *%base, i64 %alt, i32 %limit) {
; CHECK: stg %r3, 524280(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 65535
+ %ptr = getelementptr i64, i64 *%base, i64 65535
%cond = icmp ult i32 %limit, 420
- %orig = load i64 *%ptr
+ %orig = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
@@ -64,9 +64,9 @@ define void @f4(i64 *%base, i64 %alt, i32 %limit) {
; CHECK: stg %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 65536
+ %ptr = getelementptr i64, i64 *%base, i64 65536
%cond = icmp ult i32 %limit, 420
- %orig = load i64 *%ptr
+ %orig = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
@@ -81,9 +81,9 @@ define void @f5(i64 *%base, i64 %alt, i32 %limit) {
; CHECK: stg %r3, -524288(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 -65536
+ %ptr = getelementptr i64, i64 *%base, i64 -65536
%cond = icmp ult i32 %limit, 420
- %orig = load i64 *%ptr
+ %orig = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
@@ -100,9 +100,9 @@ define void @f6(i64 *%base, i64 %alt, i32 %limit) {
; CHECK: stg %r3, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 -65537
+ %ptr = getelementptr i64, i64 *%base, i64 -65537
%cond = icmp ult i32 %limit, 420
- %orig = load i64 *%ptr
+ %orig = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
@@ -121,7 +121,7 @@ define void @f7(i64 %base, i64 %index, i64 %alt, i32 %limit) {
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i64 *
%cond = icmp ult i32 %limit, 420
- %orig = load i64 *%ptr
+ %orig = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
@@ -136,7 +136,7 @@ define void @f8(i64 *%ptr, i64 %alt, i32 %limit) {
; CHECK: stg {{%r[0-5]}}, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load volatile i64 *%ptr
+ %orig = load volatile i64 , i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
@@ -151,7 +151,7 @@ define void @f9(i64 *%ptr, i64 %alt, i32 %limit) {
; CHECK: stg %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i64 *%ptr
+ %orig = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store volatile i64 %res, i64 *%ptr
ret void
@@ -170,7 +170,7 @@ define void @f10(i64 *%ptr, i64 %alt, i32 %limit) {
; CHECK: stg {{%r[0-5]}}, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load atomic i64 *%ptr unordered, align 8
+ %orig = load atomic i64 , i64 *%ptr unordered, align 8
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
@@ -186,7 +186,7 @@ define void @f11(i64 *%ptr, i64 %alt, i32 %limit) {
; CHECK: stg %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load i64 *%ptr
+ %orig = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store atomic i64 %res, i64 *%ptr unordered, align 8
ret void
@@ -206,7 +206,7 @@ define void @f12(i64 %alt, i32 %limit) {
%ptr = alloca i64
call void @foo(i64 *%ptr)
%cond = icmp ult i32 %limit, 420
- %orig = load i64 *%ptr
+ %orig = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
call void @foo(i64 *%ptr)
diff --git a/test/CodeGen/SystemZ/cond-store-05.ll b/test/CodeGen/SystemZ/cond-store-05.ll
index f8056f73c928..0cc068380e07 100644
--- a/test/CodeGen/SystemZ/cond-store-05.ll
+++ b/test/CodeGen/SystemZ/cond-store-05.ll
@@ -14,7 +14,7 @@ define void @f1(float *%ptr, float %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load float *%ptr
+ %orig = load float , float *%ptr
%res = select i1 %cond, float %orig, float %alt
store float %res, float *%ptr
ret void
@@ -30,7 +30,7 @@ define void @f2(float *%ptr, float %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load float *%ptr
+ %orig = load float , float *%ptr
%res = select i1 %cond, float %alt, float %orig
store float %res, float *%ptr
ret void
@@ -45,9 +45,9 @@ define void @f3(float *%base, float %alt, i32 %limit) {
; CHECK: ste %f0, 4092(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1023
+ %ptr = getelementptr float, float *%base, i64 1023
%cond = icmp ult i32 %limit, 420
- %orig = load float *%ptr
+ %orig = load float , float *%ptr
%res = select i1 %cond, float %orig, float %alt
store float %res, float *%ptr
ret void
@@ -62,9 +62,9 @@ define void @f4(float *%base, float %alt, i32 %limit) {
; CHECK: stey %f0, 4096(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1024
+ %ptr = getelementptr float, float *%base, i64 1024
%cond = icmp ult i32 %limit, 420
- %orig = load float *%ptr
+ %orig = load float , float *%ptr
%res = select i1 %cond, float %orig, float %alt
store float %res, float *%ptr
ret void
@@ -79,9 +79,9 @@ define void @f5(float *%base, float %alt, i32 %limit) {
; CHECK: stey %f0, 524284(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 131071
+ %ptr = getelementptr float, float *%base, i64 131071
%cond = icmp ult i32 %limit, 420
- %orig = load float *%ptr
+ %orig = load float , float *%ptr
%res = select i1 %cond, float %orig, float %alt
store float %res, float *%ptr
ret void
@@ -98,9 +98,9 @@ define void @f6(float *%base, float %alt, i32 %limit) {
; CHECK: ste %f0, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 131072
+ %ptr = getelementptr float, float *%base, i64 131072
%cond = icmp ult i32 %limit, 420
- %orig = load float *%ptr
+ %orig = load float , float *%ptr
%res = select i1 %cond, float %orig, float %alt
store float %res, float *%ptr
ret void
@@ -115,9 +115,9 @@ define void @f7(float *%base, float %alt, i32 %limit) {
; CHECK: stey %f0, -524288(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 -131072
+ %ptr = getelementptr float, float *%base, i64 -131072
%cond = icmp ult i32 %limit, 420
- %orig = load float *%ptr
+ %orig = load float , float *%ptr
%res = select i1 %cond, float %orig, float %alt
store float %res, float *%ptr
ret void
@@ -134,9 +134,9 @@ define void @f8(float *%base, float %alt, i32 %limit) {
; CHECK: ste %f0, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 -131073
+ %ptr = getelementptr float, float *%base, i64 -131073
%cond = icmp ult i32 %limit, 420
- %orig = load float *%ptr
+ %orig = load float , float *%ptr
%res = select i1 %cond, float %orig, float %alt
store float %res, float *%ptr
ret void
@@ -155,7 +155,7 @@ define void @f9(i64 %base, i64 %index, float %alt, i32 %limit) {
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to float *
%cond = icmp ult i32 %limit, 420
- %orig = load float *%ptr
+ %orig = load float , float *%ptr
%res = select i1 %cond, float %orig, float %alt
store float %res, float *%ptr
ret void
@@ -170,7 +170,7 @@ define void @f10(float *%ptr, float %alt, i32 %limit) {
; CHECK: ste {{%f[0-5]}}, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load volatile float *%ptr
+ %orig = load volatile float , float *%ptr
%res = select i1 %cond, float %orig, float %alt
store float %res, float *%ptr
ret void
@@ -185,7 +185,7 @@ define void @f11(float *%ptr, float %alt, i32 %limit) {
; CHECK: ste %f0, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load float *%ptr
+ %orig = load float , float *%ptr
%res = select i1 %cond, float %orig, float %alt
store volatile float %res, float *%ptr
ret void
@@ -205,7 +205,7 @@ define void @f12(float %alt, i32 %limit) {
%ptr = alloca float
call void @foo(float *%ptr)
%cond = icmp ult i32 %limit, 420
- %orig = load float *%ptr
+ %orig = load float , float *%ptr
%res = select i1 %cond, float %orig, float %alt
store float %res, float *%ptr
call void @foo(float *%ptr)
diff --git a/test/CodeGen/SystemZ/cond-store-06.ll b/test/CodeGen/SystemZ/cond-store-06.ll
index 66681958d474..01948b811504 100644
--- a/test/CodeGen/SystemZ/cond-store-06.ll
+++ b/test/CodeGen/SystemZ/cond-store-06.ll
@@ -14,7 +14,7 @@ define void @f1(double *%ptr, double %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load double *%ptr
+ %orig = load double , double *%ptr
%res = select i1 %cond, double %orig, double %alt
store double %res, double *%ptr
ret void
@@ -30,7 +30,7 @@ define void @f2(double *%ptr, double %alt, i32 %limit) {
; CHECK: [[LABEL]]:
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load double *%ptr
+ %orig = load double , double *%ptr
%res = select i1 %cond, double %alt, double %orig
store double %res, double *%ptr
ret void
@@ -45,9 +45,9 @@ define void @f3(double *%base, double %alt, i32 %limit) {
; CHECK: std %f0, 4088(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 511
+ %ptr = getelementptr double, double *%base, i64 511
%cond = icmp ult i32 %limit, 420
- %orig = load double *%ptr
+ %orig = load double , double *%ptr
%res = select i1 %cond, double %orig, double %alt
store double %res, double *%ptr
ret void
@@ -62,9 +62,9 @@ define void @f4(double *%base, double %alt, i32 %limit) {
; CHECK: stdy %f0, 4096(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 512
+ %ptr = getelementptr double, double *%base, i64 512
%cond = icmp ult i32 %limit, 420
- %orig = load double *%ptr
+ %orig = load double , double *%ptr
%res = select i1 %cond, double %orig, double %alt
store double %res, double *%ptr
ret void
@@ -79,9 +79,9 @@ define void @f5(double *%base, double %alt, i32 %limit) {
; CHECK: stdy %f0, 524280(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 65535
+ %ptr = getelementptr double, double *%base, i64 65535
%cond = icmp ult i32 %limit, 420
- %orig = load double *%ptr
+ %orig = load double , double *%ptr
%res = select i1 %cond, double %orig, double %alt
store double %res, double *%ptr
ret void
@@ -98,9 +98,9 @@ define void @f6(double *%base, double %alt, i32 %limit) {
; CHECK: std %f0, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 65536
+ %ptr = getelementptr double, double *%base, i64 65536
%cond = icmp ult i32 %limit, 420
- %orig = load double *%ptr
+ %orig = load double , double *%ptr
%res = select i1 %cond, double %orig, double %alt
store double %res, double *%ptr
ret void
@@ -115,9 +115,9 @@ define void @f7(double *%base, double %alt, i32 %limit) {
; CHECK: stdy %f0, -524288(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 -65536
+ %ptr = getelementptr double, double *%base, i64 -65536
%cond = icmp ult i32 %limit, 420
- %orig = load double *%ptr
+ %orig = load double , double *%ptr
%res = select i1 %cond, double %orig, double %alt
store double %res, double *%ptr
ret void
@@ -134,9 +134,9 @@ define void @f8(double *%base, double %alt, i32 %limit) {
; CHECK: std %f0, 0(%r2)
; CHECK: [[LABEL]]:
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 -65537
+ %ptr = getelementptr double, double *%base, i64 -65537
%cond = icmp ult i32 %limit, 420
- %orig = load double *%ptr
+ %orig = load double , double *%ptr
%res = select i1 %cond, double %orig, double %alt
store double %res, double *%ptr
ret void
@@ -155,7 +155,7 @@ define void @f9(i64 %base, i64 %index, double %alt, i32 %limit) {
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to double *
%cond = icmp ult i32 %limit, 420
- %orig = load double *%ptr
+ %orig = load double , double *%ptr
%res = select i1 %cond, double %orig, double %alt
store double %res, double *%ptr
ret void
@@ -170,7 +170,7 @@ define void @f10(double *%ptr, double %alt, i32 %limit) {
; CHECK: std {{%f[0-5]}}, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load volatile double *%ptr
+ %orig = load volatile double , double *%ptr
%res = select i1 %cond, double %orig, double %alt
store double %res, double *%ptr
ret void
@@ -185,7 +185,7 @@ define void @f11(double *%ptr, double %alt, i32 %limit) {
; CHECK: std %f0, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 420
- %orig = load double *%ptr
+ %orig = load double , double *%ptr
%res = select i1 %cond, double %orig, double %alt
store volatile double %res, double *%ptr
ret void
@@ -205,7 +205,7 @@ define void @f12(double %alt, i32 %limit) {
%ptr = alloca double
call void @foo(double *%ptr)
%cond = icmp ult i32 %limit, 420
- %orig = load double *%ptr
+ %orig = load double , double *%ptr
%res = select i1 %cond, double %orig, double %alt
store double %res, double *%ptr
call void @foo(double *%ptr)
diff --git a/test/CodeGen/SystemZ/cond-store-07.ll b/test/CodeGen/SystemZ/cond-store-07.ll
index b1df525566a3..35b1303f4024 100644
--- a/test/CodeGen/SystemZ/cond-store-07.ll
+++ b/test/CodeGen/SystemZ/cond-store-07.ll
@@ -11,7 +11,7 @@ define void @f1(i32 *%ptr, i32 %alt, i32 %limit) {
; CHECK: stoche %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
@@ -24,7 +24,7 @@ define void @f2(i32 *%ptr, i32 %alt, i32 %limit) {
; CHECK: stocl %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %alt, i32 %orig
store i32 %res, i32 *%ptr
ret void
@@ -38,7 +38,7 @@ define void @f3(i32 *%ptr, i64 %alt, i32 %limit) {
; CHECK: stoche %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%ext = sext i32 %orig to i64
%res = select i1 %cond, i64 %ext, i64 %alt
%trunc = trunc i64 %res to i32
@@ -53,7 +53,7 @@ define void @f4(i32 *%ptr, i64 %alt, i32 %limit) {
; CHECK: stocl %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%ext = sext i32 %orig to i64
%res = select i1 %cond, i64 %alt, i64 %ext
%trunc = trunc i64 %res to i32
@@ -69,7 +69,7 @@ define void @f5(i32 *%ptr, i64 %alt, i32 %limit) {
; CHECK: stoche %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%ext = zext i32 %orig to i64
%res = select i1 %cond, i64 %ext, i64 %alt
%trunc = trunc i64 %res to i32
@@ -84,7 +84,7 @@ define void @f6(i32 *%ptr, i64 %alt, i32 %limit) {
; CHECK: stocl %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%ext = zext i32 %orig to i64
%res = select i1 %cond, i64 %alt, i64 %ext
%trunc = trunc i64 %res to i32
@@ -98,9 +98,9 @@ define void @f7(i32 *%base, i32 %alt, i32 %limit) {
; CHECK: clfi %r4, 42
; CHECK: stoche %r3, 524284(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 131071
+ %ptr = getelementptr i32, i32 *%base, i64 131071
%cond = icmp ult i32 %limit, 42
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
@@ -113,9 +113,9 @@ define void @f8(i32 *%base, i32 %alt, i32 %limit) {
; CHECK: clfi %r4, 42
; CHECK: stoche %r3, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 131072
+ %ptr = getelementptr i32, i32 *%base, i64 131072
%cond = icmp ult i32 %limit, 42
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
@@ -127,9 +127,9 @@ define void @f9(i32 *%base, i32 %alt, i32 %limit) {
; CHECK: clfi %r4, 42
; CHECK: stoche %r3, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -131072
+ %ptr = getelementptr i32, i32 *%base, i64 -131072
%cond = icmp ult i32 %limit, 42
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
@@ -142,9 +142,9 @@ define void @f10(i32 *%base, i32 %alt, i32 %limit) {
; CHECK: clfi %r4, 42
; CHECK: stoche %r3, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -131073
+ %ptr = getelementptr i32, i32 *%base, i64 -131073
%cond = icmp ult i32 %limit, 42
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
@@ -160,7 +160,7 @@ define void @f11(i32 %alt, i32 %limit) {
%ptr = alloca i32
call void @foo(i32 *%ptr)
%cond = icmp ult i32 %limit, 42
- %orig = load i32 *%ptr
+ %orig = load i32 , i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
call void @foo(i32 *%ptr)
diff --git a/test/CodeGen/SystemZ/cond-store-08.ll b/test/CodeGen/SystemZ/cond-store-08.ll
index 56dc7ee7777c..4c2b005968e8 100644
--- a/test/CodeGen/SystemZ/cond-store-08.ll
+++ b/test/CodeGen/SystemZ/cond-store-08.ll
@@ -11,7 +11,7 @@ define void @f1(i64 *%ptr, i64 %alt, i32 %limit) {
; CHECK: stocghe %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
- %orig = load i64 *%ptr
+ %orig = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
@@ -24,7 +24,7 @@ define void @f2(i64 *%ptr, i64 %alt, i32 %limit) {
; CHECK: stocgl %r3, 0(%r2)
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
- %orig = load i64 *%ptr
+ %orig = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %alt, i64 %orig
store i64 %res, i64 *%ptr
ret void
@@ -36,9 +36,9 @@ define void @f3(i64 *%base, i64 %alt, i32 %limit) {
; CHECK: clfi %r4, 42
; CHECK: stocghe %r3, 524280(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 65535
+ %ptr = getelementptr i64, i64 *%base, i64 65535
%cond = icmp ult i32 %limit, 42
- %orig = load i64 *%ptr
+ %orig = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
@@ -51,9 +51,9 @@ define void @f4(i64 *%base, i64 %alt, i32 %limit) {
; CHECK: clfi %r4, 42
; CHECK: stocghe %r3, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 65536
+ %ptr = getelementptr i64, i64 *%base, i64 65536
%cond = icmp ult i32 %limit, 42
- %orig = load i64 *%ptr
+ %orig = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
@@ -65,9 +65,9 @@ define void @f5(i64 *%base, i64 %alt, i32 %limit) {
; CHECK: clfi %r4, 42
; CHECK: stocghe %r3, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 -65536
+ %ptr = getelementptr i64, i64 *%base, i64 -65536
%cond = icmp ult i32 %limit, 42
- %orig = load i64 *%ptr
+ %orig = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
@@ -80,9 +80,9 @@ define void @f6(i64 *%base, i64 %alt, i32 %limit) {
; CHECK: clfi %r4, 42
; CHECK: stocghe %r3, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 -65537
+ %ptr = getelementptr i64, i64 *%base, i64 -65537
%cond = icmp ult i32 %limit, 42
- %orig = load i64 *%ptr
+ %orig = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
ret void
@@ -98,7 +98,7 @@ define void @f7(i64 %alt, i32 %limit) {
%ptr = alloca i64
call void @foo(i64 *%ptr)
%cond = icmp ult i32 %limit, 42
- %orig = load i64 *%ptr
+ %orig = load i64 , i64 *%ptr
%res = select i1 %cond, i64 %orig, i64 %alt
store i64 %res, i64 *%ptr
call void @foo(i64 *%ptr)
diff --git a/test/CodeGen/SystemZ/ctpop-01.ll b/test/CodeGen/SystemZ/ctpop-01.ll
new file mode 100644
index 000000000000..ad80f9f21515
--- /dev/null
+++ b/test/CodeGen/SystemZ/ctpop-01.ll
@@ -0,0 +1,96 @@
+; Test population-count instruction
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+declare i32 @llvm.ctpop.i32(i32 %a)
+declare i64 @llvm.ctpop.i64(i64 %a)
+
+define i32 @f1(i32 %a) {
+; CHECK-LABEL: f1:
+; CHECK: popcnt %r0, %r2
+; CHECK: sllk %r1, %r0, 16
+; CHECK: ar %r1, %r0
+; CHECK: sllk %r2, %r1, 8
+; CHECK: ar %r2, %r1
+; CHECK: srl %r2, 24
+; CHECK: br %r14
+
+ %popcnt = call i32 @llvm.ctpop.i32(i32 %a)
+ ret i32 %popcnt
+}
+
+define i32 @f2(i32 %a) {
+; CHECK-LABEL: f2:
+; CHECK: llhr %r0, %r2
+; CHECK: popcnt %r0, %r0
+; CHECK: risblg %r2, %r0, 16, 151, 8
+; CHECK: ar %r2, %r0
+; CHECK: srl %r2, 8
+; CHECK: br %r14
+ %and = and i32 %a, 65535
+ %popcnt = call i32 @llvm.ctpop.i32(i32 %and)
+ ret i32 %popcnt
+}
+
+define i32 @f3(i32 %a) {
+; CHECK-LABEL: f3:
+; CHECK: llcr %r0, %r2
+; CHECK: popcnt %r2, %r0
+; CHECK: br %r14
+ %and = and i32 %a, 255
+ %popcnt = call i32 @llvm.ctpop.i32(i32 %and)
+ ret i32 %popcnt
+}
+
+define i64 @f4(i64 %a) {
+; CHECK-LABEL: f4:
+; CHECK: popcnt %r0, %r2
+; CHECK: sllg %r1, %r0, 32
+; CHECK: agr %r1, %r0
+; CHECK: sllg %r0, %r1, 16
+; CHECK: agr %r0, %r1
+; CHECK: sllg %r1, %r0, 8
+; CHECK: agr %r1, %r0
+; CHECK: srlg %r2, %r1, 56
+; CHECK: br %r14
+ %popcnt = call i64 @llvm.ctpop.i64(i64 %a)
+ ret i64 %popcnt
+}
+
+define i64 @f5(i64 %a) {
+; CHECK-LABEL: f5:
+; CHECK: llgfr %r0, %r2
+; CHECK: popcnt %r0, %r0
+; CHECK: sllg %r1, %r0, 16
+; CHECK: algfr %r0, %r1
+; CHECK: sllg %r1, %r0, 8
+; CHECK: algfr %r0, %r1
+; CHECK: srlg %r2, %r0, 24
+ %and = and i64 %a, 4294967295
+ %popcnt = call i64 @llvm.ctpop.i64(i64 %and)
+ ret i64 %popcnt
+}
+
+define i64 @f6(i64 %a) {
+; CHECK-LABEL: f6:
+; CHECK: llghr %r0, %r2
+; CHECK: popcnt %r0, %r0
+; CHECK: risbg %r1, %r0, 48, 183, 8
+; CHECK: agr %r1, %r0
+; CHECK: srlg %r2, %r1, 8
+; CHECK: br %r14
+ %and = and i64 %a, 65535
+ %popcnt = call i64 @llvm.ctpop.i64(i64 %and)
+ ret i64 %popcnt
+}
+
+define i64 @f7(i64 %a) {
+; CHECK-LABEL: f7:
+; CHECK: llgcr %r0, %r2
+; CHECK: popcnt %r2, %r0
+; CHECK: br %r14
+ %and = and i64 %a, 255
+ %popcnt = call i64 @llvm.ctpop.i64(i64 %and)
+ ret i64 %popcnt
+}
+
diff --git a/test/CodeGen/SystemZ/fp-abs-01.ll b/test/CodeGen/SystemZ/fp-abs-01.ll
index 0b4067da3d14..3b143d93315b 100644
--- a/test/CodeGen/SystemZ/fp-abs-01.ll
+++ b/test/CodeGen/SystemZ/fp-abs-01.ll
@@ -1,6 +1,7 @@
; Test floating-point absolute.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test f32.
declare float @llvm.fabs.f32(float %f)
@@ -31,9 +32,9 @@ define void @f3(fp128 *%ptr, fp128 *%ptr2) {
; CHECK: lpxbr
; CHECK: dxbr
; CHECK: br %r14
- %orig = load fp128 *%ptr
+ %orig = load fp128 , fp128 *%ptr
%abs = call fp128 @llvm.fabs.f128(fp128 %orig)
- %op2 = load fp128 *%ptr2
+ %op2 = load fp128 , fp128 *%ptr2
%res = fdiv fp128 %abs, %op2
store fp128 %res, fp128 *%ptr
ret void
diff --git a/test/CodeGen/SystemZ/fp-abs-02.ll b/test/CodeGen/SystemZ/fp-abs-02.ll
index 909c48a06377..e831ddb86fea 100644
--- a/test/CodeGen/SystemZ/fp-abs-02.ll
+++ b/test/CodeGen/SystemZ/fp-abs-02.ll
@@ -1,6 +1,7 @@
; Test negated floating-point absolute.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test f32.
declare float @llvm.fabs.f32(float %f)
@@ -33,10 +34,10 @@ define void @f3(fp128 *%ptr, fp128 *%ptr2) {
; CHECK: lnxbr
; CHECK: dxbr
; CHECK: br %r14
- %orig = load fp128 *%ptr
+ %orig = load fp128 , fp128 *%ptr
%abs = call fp128 @llvm.fabs.f128(fp128 %orig)
%negabs = fsub fp128 0xL00000000000000008000000000000000, %abs
- %op2 = load fp128 *%ptr2
+ %op2 = load fp128 , fp128 *%ptr2
%res = fdiv fp128 %negabs, %op2
store fp128 %res, fp128 *%ptr
ret void
diff --git a/test/CodeGen/SystemZ/fp-add-01.ll b/test/CodeGen/SystemZ/fp-add-01.ll
index 28a212801a63..5b0ed0513a37 100644
--- a/test/CodeGen/SystemZ/fp-add-01.ll
+++ b/test/CodeGen/SystemZ/fp-add-01.ll
@@ -18,7 +18,7 @@ define float @f2(float %f1, float *%ptr) {
; CHECK-LABEL: f2:
; CHECK: aeb %f0, 0(%r2)
; CHECK: br %r14
- %f2 = load float *%ptr
+ %f2 = load float , float *%ptr
%res = fadd float %f1, %f2
ret float %res
}
@@ -28,8 +28,8 @@ define float @f3(float %f1, float *%base) {
; CHECK-LABEL: f3:
; CHECK: aeb %f0, 4092(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1023
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1023
+ %f2 = load float , float *%ptr
%res = fadd float %f1, %f2
ret float %res
}
@@ -41,8 +41,8 @@ define float @f4(float %f1, float *%base) {
; CHECK: aghi %r2, 4096
; CHECK: aeb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1024
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1024
+ %f2 = load float , float *%ptr
%res = fadd float %f1, %f2
ret float %res
}
@@ -53,8 +53,8 @@ define float @f5(float %f1, float *%base) {
; CHECK: aghi %r2, -4
; CHECK: aeb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 -1
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 -1
+ %f2 = load float , float *%ptr
%res = fadd float %f1, %f2
ret float %res
}
@@ -65,9 +65,9 @@ define float @f6(float %f1, float *%base, i64 %index) {
; CHECK: sllg %r1, %r3, 2
; CHECK: aeb %f0, 400(%r1,%r2)
; CHECK: br %r14
- %ptr1 = getelementptr float *%base, i64 %index
- %ptr2 = getelementptr float *%ptr1, i64 100
- %f2 = load float *%ptr2
+ %ptr1 = getelementptr float, float *%base, i64 %index
+ %ptr2 = getelementptr float, float *%ptr1, i64 100
+ %f2 = load float , float *%ptr2
%res = fadd float %f1, %f2
ret float %res
}
@@ -78,28 +78,28 @@ define float @f7(float *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: aeb %f0, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr float *%ptr0, i64 2
- %ptr2 = getelementptr float *%ptr0, i64 4
- %ptr3 = getelementptr float *%ptr0, i64 6
- %ptr4 = getelementptr float *%ptr0, i64 8
- %ptr5 = getelementptr float *%ptr0, i64 10
- %ptr6 = getelementptr float *%ptr0, i64 12
- %ptr7 = getelementptr float *%ptr0, i64 14
- %ptr8 = getelementptr float *%ptr0, i64 16
- %ptr9 = getelementptr float *%ptr0, i64 18
- %ptr10 = getelementptr float *%ptr0, i64 20
+ %ptr1 = getelementptr float, float *%ptr0, i64 2
+ %ptr2 = getelementptr float, float *%ptr0, i64 4
+ %ptr3 = getelementptr float, float *%ptr0, i64 6
+ %ptr4 = getelementptr float, float *%ptr0, i64 8
+ %ptr5 = getelementptr float, float *%ptr0, i64 10
+ %ptr6 = getelementptr float, float *%ptr0, i64 12
+ %ptr7 = getelementptr float, float *%ptr0, i64 14
+ %ptr8 = getelementptr float, float *%ptr0, i64 16
+ %ptr9 = getelementptr float, float *%ptr0, i64 18
+ %ptr10 = getelementptr float, float *%ptr0, i64 20
- %val0 = load float *%ptr0
- %val1 = load float *%ptr1
- %val2 = load float *%ptr2
- %val3 = load float *%ptr3
- %val4 = load float *%ptr4
- %val5 = load float *%ptr5
- %val6 = load float *%ptr6
- %val7 = load float *%ptr7
- %val8 = load float *%ptr8
- %val9 = load float *%ptr9
- %val10 = load float *%ptr10
+ %val0 = load float , float *%ptr0
+ %val1 = load float , float *%ptr1
+ %val2 = load float , float *%ptr2
+ %val3 = load float , float *%ptr3
+ %val4 = load float , float *%ptr4
+ %val5 = load float , float *%ptr5
+ %val6 = load float , float *%ptr6
+ %val7 = load float , float *%ptr7
+ %val8 = load float , float *%ptr8
+ %val9 = load float , float *%ptr9
+ %val10 = load float , float *%ptr10
%ret = call float @foo()
diff --git a/test/CodeGen/SystemZ/fp-add-02.ll b/test/CodeGen/SystemZ/fp-add-02.ll
index 067c7474fb43..5be1ad79d453 100644
--- a/test/CodeGen/SystemZ/fp-add-02.ll
+++ b/test/CodeGen/SystemZ/fp-add-02.ll
@@ -1,7 +1,8 @@
; Test 64-bit floating-point addition.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @foo()
; Check register addition.
@@ -18,7 +19,7 @@ define double @f2(double %f1, double *%ptr) {
; CHECK-LABEL: f2:
; CHECK: adb %f0, 0(%r2)
; CHECK: br %r14
- %f2 = load double *%ptr
+ %f2 = load double , double *%ptr
%res = fadd double %f1, %f2
ret double %res
}
@@ -28,8 +29,8 @@ define double @f3(double %f1, double *%base) {
; CHECK-LABEL: f3:
; CHECK: adb %f0, 4088(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 511
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 511
+ %f2 = load double , double *%ptr
%res = fadd double %f1, %f2
ret double %res
}
@@ -41,8 +42,8 @@ define double @f4(double %f1, double *%base) {
; CHECK: aghi %r2, 4096
; CHECK: adb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 512
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 512
+ %f2 = load double , double *%ptr
%res = fadd double %f1, %f2
ret double %res
}
@@ -53,8 +54,8 @@ define double @f5(double %f1, double *%base) {
; CHECK: aghi %r2, -8
; CHECK: adb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 -1
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 -1
+ %f2 = load double , double *%ptr
%res = fadd double %f1, %f2
ret double %res
}
@@ -65,9 +66,9 @@ define double @f6(double %f1, double *%base, i64 %index) {
; CHECK: sllg %r1, %r3, 3
; CHECK: adb %f0, 800(%r1,%r2)
; CHECK: br %r14
- %ptr1 = getelementptr double *%base, i64 %index
- %ptr2 = getelementptr double *%ptr1, i64 100
- %f2 = load double *%ptr2
+ %ptr1 = getelementptr double, double *%base, i64 %index
+ %ptr2 = getelementptr double, double *%ptr1, i64 100
+ %f2 = load double , double *%ptr2
%res = fadd double %f1, %f2
ret double %res
}
@@ -76,30 +77,30 @@ define double @f6(double %f1, double *%base, i64 %index) {
define double @f7(double *%ptr0) {
; CHECK-LABEL: f7:
; CHECK: brasl %r14, foo@PLT
-; CHECK: adb %f0, 160(%r15)
+; CHECK-SCALAR: adb %f0, 160(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr double *%ptr0, i64 2
- %ptr2 = getelementptr double *%ptr0, i64 4
- %ptr3 = getelementptr double *%ptr0, i64 6
- %ptr4 = getelementptr double *%ptr0, i64 8
- %ptr5 = getelementptr double *%ptr0, i64 10
- %ptr6 = getelementptr double *%ptr0, i64 12
- %ptr7 = getelementptr double *%ptr0, i64 14
- %ptr8 = getelementptr double *%ptr0, i64 16
- %ptr9 = getelementptr double *%ptr0, i64 18
- %ptr10 = getelementptr double *%ptr0, i64 20
+ %ptr1 = getelementptr double, double *%ptr0, i64 2
+ %ptr2 = getelementptr double, double *%ptr0, i64 4
+ %ptr3 = getelementptr double, double *%ptr0, i64 6
+ %ptr4 = getelementptr double, double *%ptr0, i64 8
+ %ptr5 = getelementptr double, double *%ptr0, i64 10
+ %ptr6 = getelementptr double, double *%ptr0, i64 12
+ %ptr7 = getelementptr double, double *%ptr0, i64 14
+ %ptr8 = getelementptr double, double *%ptr0, i64 16
+ %ptr9 = getelementptr double, double *%ptr0, i64 18
+ %ptr10 = getelementptr double, double *%ptr0, i64 20
- %val0 = load double *%ptr0
- %val1 = load double *%ptr1
- %val2 = load double *%ptr2
- %val3 = load double *%ptr3
- %val4 = load double *%ptr4
- %val5 = load double *%ptr5
- %val6 = load double *%ptr6
- %val7 = load double *%ptr7
- %val8 = load double *%ptr8
- %val9 = load double *%ptr9
- %val10 = load double *%ptr10
+ %val0 = load double , double *%ptr0
+ %val1 = load double , double *%ptr1
+ %val2 = load double , double *%ptr2
+ %val3 = load double , double *%ptr3
+ %val4 = load double , double *%ptr4
+ %val5 = load double , double *%ptr5
+ %val6 = load double , double *%ptr6
+ %val7 = load double , double *%ptr7
+ %val8 = load double , double *%ptr8
+ %val9 = load double , double *%ptr9
+ %val10 = load double , double *%ptr10
%ret = call double @foo()
diff --git a/test/CodeGen/SystemZ/fp-add-03.ll b/test/CodeGen/SystemZ/fp-add-03.ll
index cb4042eee472..53342e194edf 100644
--- a/test/CodeGen/SystemZ/fp-add-03.ll
+++ b/test/CodeGen/SystemZ/fp-add-03.ll
@@ -12,7 +12,7 @@ define void @f1(fp128 *%ptr, float %f2) {
; CHECK: std %f1, 0(%r2)
; CHECK: std %f3, 8(%r2)
; CHECK: br %r14
- %f1 = load fp128 *%ptr
+ %f1 = load fp128 , fp128 *%ptr
%f2x = fpext float %f2 to fp128
%sum = fadd fp128 %f1, %f2x
store fp128 %sum, fp128 *%ptr
diff --git a/test/CodeGen/SystemZ/fp-cmp-01.ll b/test/CodeGen/SystemZ/fp-cmp-01.ll
index d7c0cce9c2a5..ed58103e59a5 100644
--- a/test/CodeGen/SystemZ/fp-cmp-01.ll
+++ b/test/CodeGen/SystemZ/fp-cmp-01.ll
@@ -24,7 +24,7 @@ define i64 @f2(i64 %a, i64 %b, float %f1, float *%ptr) {
; CHECK-NEXT: je
; CHECK: lgr %r2, %r3
; CHECK: br %r14
- %f2 = load float *%ptr
+ %f2 = load float , float *%ptr
%cond = fcmp oeq float %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -37,8 +37,8 @@ define i64 @f3(i64 %a, i64 %b, float %f1, float *%base) {
; CHECK-NEXT: je
; CHECK: lgr %r2, %r3
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1023
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1023
+ %f2 = load float , float *%ptr
%cond = fcmp oeq float %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -53,8 +53,8 @@ define i64 @f4(i64 %a, i64 %b, float %f1, float *%base) {
; CHECK-NEXT: je
; CHECK: lgr %r2, %r3
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1024
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1024
+ %f2 = load float , float *%ptr
%cond = fcmp oeq float %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -68,8 +68,8 @@ define i64 @f5(i64 %a, i64 %b, float %f1, float *%base) {
; CHECK-NEXT: je
; CHECK: lgr %r2, %r3
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 -1
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 -1
+ %f2 = load float , float *%ptr
%cond = fcmp oeq float %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -83,9 +83,9 @@ define i64 @f6(i64 %a, i64 %b, float %f1, float *%base, i64 %index) {
; CHECK-NEXT: je
; CHECK: lgr %r2, %r3
; CHECK: br %r14
- %ptr1 = getelementptr float *%base, i64 %index
- %ptr2 = getelementptr float *%ptr1, i64 100
- %f2 = load float *%ptr2
+ %ptr1 = getelementptr float, float *%base, i64 %index
+ %ptr2 = getelementptr float, float *%ptr1, i64 100
+ %f2 = load float , float *%ptr2
%cond = fcmp oeq float %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -97,28 +97,28 @@ define float @f7(float *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: ceb {{%f[0-9]+}}, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr float *%ptr0, i64 2
- %ptr2 = getelementptr float *%ptr0, i64 4
- %ptr3 = getelementptr float *%ptr0, i64 6
- %ptr4 = getelementptr float *%ptr0, i64 8
- %ptr5 = getelementptr float *%ptr0, i64 10
- %ptr6 = getelementptr float *%ptr0, i64 12
- %ptr7 = getelementptr float *%ptr0, i64 14
- %ptr8 = getelementptr float *%ptr0, i64 16
- %ptr9 = getelementptr float *%ptr0, i64 18
- %ptr10 = getelementptr float *%ptr0, i64 20
+ %ptr1 = getelementptr float, float *%ptr0, i64 2
+ %ptr2 = getelementptr float, float *%ptr0, i64 4
+ %ptr3 = getelementptr float, float *%ptr0, i64 6
+ %ptr4 = getelementptr float, float *%ptr0, i64 8
+ %ptr5 = getelementptr float, float *%ptr0, i64 10
+ %ptr6 = getelementptr float, float *%ptr0, i64 12
+ %ptr7 = getelementptr float, float *%ptr0, i64 14
+ %ptr8 = getelementptr float, float *%ptr0, i64 16
+ %ptr9 = getelementptr float, float *%ptr0, i64 18
+ %ptr10 = getelementptr float, float *%ptr0, i64 20
- %val0 = load float *%ptr0
- %val1 = load float *%ptr1
- %val2 = load float *%ptr2
- %val3 = load float *%ptr3
- %val4 = load float *%ptr4
- %val5 = load float *%ptr5
- %val6 = load float *%ptr6
- %val7 = load float *%ptr7
- %val8 = load float *%ptr8
- %val9 = load float *%ptr9
- %val10 = load float *%ptr10
+ %val0 = load float , float *%ptr0
+ %val1 = load float , float *%ptr1
+ %val2 = load float , float *%ptr2
+ %val3 = load float , float *%ptr3
+ %val4 = load float , float *%ptr4
+ %val5 = load float , float *%ptr5
+ %val6 = load float , float *%ptr6
+ %val7 = load float , float *%ptr7
+ %val8 = load float , float *%ptr8
+ %val9 = load float , float *%ptr9
+ %val10 = load float , float *%ptr10
%ret = call float @foo()
@@ -169,7 +169,7 @@ define i64 @f9(i64 %a, i64 %b, float %f2, float *%ptr) {
; CHECK-NEXT: je {{\.L.*}}
; CHECK: lgr %r2, %r3
; CHECK: br %r14
- %f1 = load float *%ptr
+ %f1 = load float , float *%ptr
%cond = fcmp oeq float %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -182,7 +182,7 @@ define i64 @f10(i64 %a, i64 %b, float %f2, float *%ptr) {
; CHECK-NEXT: jlh {{\.L.*}}
; CHECK: lgr %r2, %r3
; CHECK: br %r14
- %f1 = load float *%ptr
+ %f1 = load float , float *%ptr
%cond = fcmp one float %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -195,7 +195,7 @@ define i64 @f11(i64 %a, i64 %b, float %f2, float *%ptr) {
; CHECK-NEXT: jh {{\.L.*}}
; CHECK: lgr %r2, %r3
; CHECK: br %r14
- %f1 = load float *%ptr
+ %f1 = load float , float *%ptr
%cond = fcmp olt float %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -208,7 +208,7 @@ define i64 @f12(i64 %a, i64 %b, float %f2, float *%ptr) {
; CHECK-NEXT: jhe {{\.L.*}}
; CHECK: lgr %r2, %r3
; CHECK: br %r14
- %f1 = load float *%ptr
+ %f1 = load float , float *%ptr
%cond = fcmp ole float %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -221,7 +221,7 @@ define i64 @f13(i64 %a, i64 %b, float %f2, float *%ptr) {
; CHECK-NEXT: jle {{\.L.*}}
; CHECK: lgr %r2, %r3
; CHECK: br %r14
- %f1 = load float *%ptr
+ %f1 = load float , float *%ptr
%cond = fcmp oge float %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -234,7 +234,7 @@ define i64 @f14(i64 %a, i64 %b, float %f2, float *%ptr) {
; CHECK-NEXT: jl {{\.L.*}}
; CHECK: lgr %r2, %r3
; CHECK: br %r14
- %f1 = load float *%ptr
+ %f1 = load float , float *%ptr
%cond = fcmp ogt float %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -247,7 +247,7 @@ define i64 @f15(i64 %a, i64 %b, float %f2, float *%ptr) {
; CHECK-NEXT: jnlh {{\.L.*}}
; CHECK: lgr %r2, %r3
; CHECK: br %r14
- %f1 = load float *%ptr
+ %f1 = load float , float *%ptr
%cond = fcmp ueq float %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -260,7 +260,7 @@ define i64 @f16(i64 %a, i64 %b, float %f2, float *%ptr) {
; CHECK-NEXT: jne {{\.L.*}}
; CHECK: lgr %r2, %r3
; CHECK: br %r14
- %f1 = load float *%ptr
+ %f1 = load float , float *%ptr
%cond = fcmp une float %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -273,7 +273,7 @@ define i64 @f17(i64 %a, i64 %b, float %f2, float *%ptr) {
; CHECK-NEXT: jnle {{\.L.*}}
; CHECK: lgr %r2, %r3
; CHECK: br %r14
- %f1 = load float *%ptr
+ %f1 = load float , float *%ptr
%cond = fcmp ult float %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -286,7 +286,7 @@ define i64 @f18(i64 %a, i64 %b, float %f2, float *%ptr) {
; CHECK-NEXT: jnl {{\.L.*}}
; CHECK: lgr %r2, %r3
; CHECK: br %r14
- %f1 = load float *%ptr
+ %f1 = load float , float *%ptr
%cond = fcmp ule float %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -299,7 +299,7 @@ define i64 @f19(i64 %a, i64 %b, float %f2, float *%ptr) {
; CHECK-NEXT: jnh {{\.L.*}}
; CHECK: lgr %r2, %r3
; CHECK: br %r14
- %f1 = load float *%ptr
+ %f1 = load float , float *%ptr
%cond = fcmp uge float %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -312,7 +312,7 @@ define i64 @f20(i64 %a, i64 %b, float %f2, float *%ptr) {
; CHECK-NEXT: jnhe {{\.L.*}}
; CHECK: lgr %r2, %r3
; CHECK: br %r14
- %f1 = load float *%ptr
+ %f1 = load float , float *%ptr
%cond = fcmp ugt float %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
diff --git a/test/CodeGen/SystemZ/fp-cmp-02.ll b/test/CodeGen/SystemZ/fp-cmp-02.ll
index c61f04ed244e..94a256777c75 100644
--- a/test/CodeGen/SystemZ/fp-cmp-02.ll
+++ b/test/CodeGen/SystemZ/fp-cmp-02.ll
@@ -1,7 +1,10 @@
; Test 64-bit floating-point comparison. The tests assume a z10 implementation
; of select, using conditional branches rather than LOCGR.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
declare double @foo()
@@ -9,8 +12,9 @@ declare double @foo()
define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) {
; CHECK-LABEL: f1:
; CHECK: cdbr %f0, %f2
-; CHECK-NEXT: je
-; CHECK: lgr %r2, %r3
+; CHECK-SCALAR-NEXT: je
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14
%cond = fcmp oeq double %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
@@ -21,10 +25,11 @@ define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) {
define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) {
; CHECK-LABEL: f2:
; CHECK: cdb %f0, 0(%r4)
-; CHECK-NEXT: je
-; CHECK: lgr %r2, %r3
+; CHECK-SCALAR-NEXT: je
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14
- %f2 = load double *%ptr
+ %f2 = load double , double *%ptr
%cond = fcmp oeq double %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -34,11 +39,12 @@ define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) {
define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) {
; CHECK-LABEL: f3:
; CHECK: cdb %f0, 4088(%r4)
-; CHECK-NEXT: je
-; CHECK: lgr %r2, %r3
+; CHECK-SCALAR-NEXT: je
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 511
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 511
+ %f2 = load double , double *%ptr
%cond = fcmp oeq double %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -50,11 +56,12 @@ define i64 @f4(i64 %a, i64 %b, double %f1, double *%base) {
; CHECK-LABEL: f4:
; CHECK: aghi %r4, 4096
; CHECK: cdb %f0, 0(%r4)
-; CHECK-NEXT: je
-; CHECK: lgr %r2, %r3
+; CHECK-SCALAR-NEXT: je
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 512
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 512
+ %f2 = load double , double *%ptr
%cond = fcmp oeq double %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -65,11 +72,12 @@ define i64 @f5(i64 %a, i64 %b, double %f1, double *%base) {
; CHECK-LABEL: f5:
; CHECK: aghi %r4, -8
; CHECK: cdb %f0, 0(%r4)
-; CHECK-NEXT: je
-; CHECK: lgr %r2, %r3
+; CHECK-SCALAR-NEXT: je
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 -1
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 -1
+ %f2 = load double , double *%ptr
%cond = fcmp oeq double %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -80,12 +88,13 @@ define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) {
; CHECK-LABEL: f6:
; CHECK: sllg %r1, %r5, 3
; CHECK: cdb %f0, 800(%r1,%r4)
-; CHECK-NEXT: je
-; CHECK: lgr %r2, %r3
+; CHECK-SCALAR-NEXT: je
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14
- %ptr1 = getelementptr double *%base, i64 %index
- %ptr2 = getelementptr double *%ptr1, i64 100
- %f2 = load double *%ptr2
+ %ptr1 = getelementptr double, double *%base, i64 %index
+ %ptr2 = getelementptr double, double *%ptr1, i64 100
+ %f2 = load double , double *%ptr2
%cond = fcmp oeq double %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -95,30 +104,30 @@ define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) {
define double @f7(double *%ptr0) {
; CHECK-LABEL: f7:
; CHECK: brasl %r14, foo@PLT
-; CHECK: cdb {{%f[0-9]+}}, 160(%r15)
+; CHECK-SCALAR: cdb {{%f[0-9]+}}, 160(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr double *%ptr0, i64 2
- %ptr2 = getelementptr double *%ptr0, i64 4
- %ptr3 = getelementptr double *%ptr0, i64 6
- %ptr4 = getelementptr double *%ptr0, i64 8
- %ptr5 = getelementptr double *%ptr0, i64 10
- %ptr6 = getelementptr double *%ptr0, i64 12
- %ptr7 = getelementptr double *%ptr0, i64 14
- %ptr8 = getelementptr double *%ptr0, i64 16
- %ptr9 = getelementptr double *%ptr0, i64 18
- %ptr10 = getelementptr double *%ptr0, i64 20
+ %ptr1 = getelementptr double, double *%ptr0, i64 2
+ %ptr2 = getelementptr double, double *%ptr0, i64 4
+ %ptr3 = getelementptr double, double *%ptr0, i64 6
+ %ptr4 = getelementptr double, double *%ptr0, i64 8
+ %ptr5 = getelementptr double, double *%ptr0, i64 10
+ %ptr6 = getelementptr double, double *%ptr0, i64 12
+ %ptr7 = getelementptr double, double *%ptr0, i64 14
+ %ptr8 = getelementptr double, double *%ptr0, i64 16
+ %ptr9 = getelementptr double, double *%ptr0, i64 18
+ %ptr10 = getelementptr double, double *%ptr0, i64 20
- %val0 = load double *%ptr0
- %val1 = load double *%ptr1
- %val2 = load double *%ptr2
- %val3 = load double *%ptr3
- %val4 = load double *%ptr4
- %val5 = load double *%ptr5
- %val6 = load double *%ptr6
- %val7 = load double *%ptr7
- %val8 = load double *%ptr8
- %val9 = load double *%ptr9
- %val10 = load double *%ptr10
+ %val0 = load double , double *%ptr0
+ %val1 = load double , double *%ptr1
+ %val2 = load double , double *%ptr2
+ %val3 = load double , double *%ptr3
+ %val4 = load double , double *%ptr4
+ %val5 = load double , double *%ptr5
+ %val6 = load double , double *%ptr6
+ %val7 = load double , double *%ptr7
+ %val8 = load double , double *%ptr8
+ %val9 = load double , double *%ptr9
+ %val10 = load double , double *%ptr10
%ret = call double @foo()
@@ -152,9 +161,12 @@ define double @f7(double *%ptr0) {
; Check comparison with zero.
define i64 @f8(i64 %a, i64 %b, double %f) {
; CHECK-LABEL: f8:
-; CHECK: ltdbr %f0, %f0
-; CHECK-NEXT: je
-; CHECK: lgr %r2, %r3
+; CHECK-SCALAR: ltdbr %f0, %f0
+; CHECK-SCALAR-NEXT: je
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR: lzdr %f1
+; CHECK-VECTOR-NEXT: cdbr %f0, %f1
+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
; CHECK: br %r14
%cond = fcmp oeq double %f, 0.0
%res = select i1 %cond, i64 %a, i64 %b
@@ -165,10 +177,11 @@ define i64 @f8(i64 %a, i64 %b, double %f) {
define i64 @f9(i64 %a, i64 %b, double %f2, double *%ptr) {
; CHECK-LABEL: f9:
; CHECK: cdb %f0, 0(%r4)
-; CHECK-NEXT: jl {{\.L.*}}
-; CHECK: lgr %r2, %r3
+; CHECK-SCALAR-NEXT: jl
+; CHECK-SCALAR: lgr %r2, %r3
+; CHECK-VECTOR-NEXT: locgrnl %r2, %r3
; CHECK: br %r14
- %f1 = load double *%ptr
+ %f1 = load double , double *%ptr
%cond = fcmp ogt double %f1, %f2
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
diff --git a/test/CodeGen/SystemZ/fp-cmp-03.ll b/test/CodeGen/SystemZ/fp-cmp-03.ll
index e777d00c9687..862c5e9b65b8 100644
--- a/test/CodeGen/SystemZ/fp-cmp-03.ll
+++ b/test/CodeGen/SystemZ/fp-cmp-03.ll
@@ -14,7 +14,7 @@ define i64 @f1(i64 %a, i64 %b, fp128 *%ptr, float %f2) {
; CHECK: lgr %r2, %r3
; CHECK: br %r14
%f2x = fpext float %f2 to fp128
- %f1 = load fp128 *%ptr
+ %f1 = load fp128 , fp128 *%ptr
%cond = fcmp oeq fp128 %f1, %f2x
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
@@ -29,7 +29,7 @@ define i64 @f2(i64 %a, i64 %b, fp128 *%ptr) {
; CHECK-NEXT: je
; CHECK: lgr %r2, %r3
; CHECK: br %r14
- %f = load fp128 *%ptr
+ %f = load fp128 , fp128 *%ptr
%cond = fcmp oeq fp128 %f, 0xL00000000000000000000000000000000
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
diff --git a/test/CodeGen/SystemZ/fp-cmp-04.ll b/test/CodeGen/SystemZ/fp-cmp-04.ll
index 1637ccb0791b..05c6dfe7e8e4 100644
--- a/test/CodeGen/SystemZ/fp-cmp-04.ll
+++ b/test/CodeGen/SystemZ/fp-cmp-04.ll
@@ -88,7 +88,7 @@ define float @f5(float %a, float %b, float *%dest) {
; CHECK-NEXT: jnhe .L{{.*}}
; CHECK: br %r14
entry:
- %cur = load float *%dest
+ %cur = load float , float *%dest
%res = fsub float %a, %cur
%cmp = fcmp ult float %res, 0.0
br i1 %cmp, label %exit, label %store
@@ -284,8 +284,8 @@ define void @f14(fp128 *%ptr1, fp128 *%ptr2) {
; CHECK-NEXT: jl .L{{.*}}
; CHECK: br %r14
entry:
- %val1 = load fp128 *%ptr1
- %val2 = load fp128 *%ptr2
+ %val1 = load fp128 , fp128 *%ptr1
+ %val2 = load fp128 , fp128 *%ptr2
%div = fdiv fp128 %val1, %val2
store fp128 %div, fp128 *%ptr1
%mul = fmul fp128 %val1, %val2
diff --git a/test/CodeGen/SystemZ/fp-conv-01.ll b/test/CodeGen/SystemZ/fp-conv-01.ll
index 49ed43bce51c..06740ed4b4a6 100644
--- a/test/CodeGen/SystemZ/fp-conv-01.ll
+++ b/test/CodeGen/SystemZ/fp-conv-01.ll
@@ -1,11 +1,15 @@
; Test floating-point truncations.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
; Test f64->f32.
define float @f1(double %d1, double %d2) {
; CHECK-LABEL: f1:
-; CHECK: ledbr %f0, %f2
+; CHECK-SCALAR: ledbr %f0, %f2
+; CHECK-VECTOR: ledbra %f0, 0, %f2, 0
; CHECK: br %r14
%res = fptrunc double %d2 to float
ret float %res
@@ -16,7 +20,7 @@ define float @f2(fp128 *%ptr) {
; CHECK-LABEL: f2:
; CHECK: lexbr %f0, %f0
; CHECK: br %r14
- %val = load fp128 *%ptr
+ %val = load fp128 , fp128 *%ptr
%res = fptrunc fp128 %val to float
ret float %res
}
@@ -29,7 +33,7 @@ define void @f3(float *%dst, fp128 *%ptr, float %d1, float %d2) {
; CHECK: aebr %f1, %f2
; CHECK: ste %f1, 0(%r2)
; CHECK: br %r14
- %val = load fp128 *%ptr
+ %val = load fp128 , fp128 *%ptr
%conv = fptrunc fp128 %val to float
%res = fadd float %conv, %d2
store float %res, float *%dst
@@ -41,7 +45,7 @@ define double @f4(fp128 *%ptr) {
; CHECK-LABEL: f4:
; CHECK: ldxbr %f0, %f0
; CHECK: br %r14
- %val = load fp128 *%ptr
+ %val = load fp128 , fp128 *%ptr
%res = fptrunc fp128 %val to double
ret double %res
}
@@ -50,10 +54,12 @@ define double @f4(fp128 *%ptr) {
define void @f5(double *%dst, fp128 *%ptr, double %d1, double %d2) {
; CHECK-LABEL: f5:
; CHECK: ldxbr %f1, %f1
-; CHECK: adbr %f1, %f2
-; CHECK: std %f1, 0(%r2)
+; CHECK-SCALAR: adbr %f1, %f2
+; CHECK-SCALAR: std %f1, 0(%r2)
+; CHECK-VECTOR: wfadb [[REG:%f[0-9]+]], %f1, %f2
+; CHECK-VECTOR: std [[REG]], 0(%r2)
; CHECK: br %r14
- %val = load fp128 *%ptr
+ %val = load fp128 , fp128 *%ptr
%conv = fptrunc fp128 %val to double
%res = fadd double %conv, %d2
store double %res, double *%dst
diff --git a/test/CodeGen/SystemZ/fp-conv-02.ll b/test/CodeGen/SystemZ/fp-conv-02.ll
index 93fb7c8d4d92..be32bfe7ba9a 100644
--- a/test/CodeGen/SystemZ/fp-conv-02.ll
+++ b/test/CodeGen/SystemZ/fp-conv-02.ll
@@ -1,6 +1,8 @@
; Test extensions of f32 to f64.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Check register extension.
define double @f1(float %val) {
@@ -16,7 +18,7 @@ define double @f2(float *%ptr) {
; CHECK-LABEL: f2:
; CHECK: ldeb %f0, 0(%r2)
; CHECK: br %r14
- %val = load float *%ptr
+ %val = load float , float *%ptr
%res = fpext float %val to double
ret double %res
}
@@ -26,8 +28,8 @@ define double @f3(float *%base) {
; CHECK-LABEL: f3:
; CHECK: ldeb %f0, 4092(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1023
- %val = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1023
+ %val = load float , float *%ptr
%res = fpext float %val to double
ret double %res
}
@@ -39,8 +41,8 @@ define double @f4(float *%base) {
; CHECK: aghi %r2, 4096
; CHECK: ldeb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1024
- %val = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1024
+ %val = load float , float *%ptr
%res = fpext float %val to double
ret double %res
}
@@ -51,8 +53,8 @@ define double @f5(float *%base) {
; CHECK: aghi %r2, -4
; CHECK: ldeb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 -1
- %val = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 -1
+ %val = load float , float *%ptr
%res = fpext float %val to double
ret double %res
}
@@ -63,9 +65,9 @@ define double @f6(float *%base, i64 %index) {
; CHECK: sllg %r1, %r3, 2
; CHECK: ldeb %f0, 400(%r1,%r2)
; CHECK: br %r14
- %ptr1 = getelementptr float *%base, i64 %index
- %ptr2 = getelementptr float *%ptr1, i64 100
- %val = load float *%ptr2
+ %ptr1 = getelementptr float, float *%base, i64 %index
+ %ptr2 = getelementptr float, float *%ptr1, i64 100
+ %val = load float , float *%ptr2
%res = fpext float %val to double
ret double %res
}
@@ -74,25 +76,25 @@ define double @f6(float *%base, i64 %index) {
; to use LDEB if possible.
define void @f7(double *%ptr1, float *%ptr2) {
; CHECK-LABEL: f7:
-; CHECK: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15)
+; CHECK-SCALAR: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15)
; CHECK: br %r14
- %val0 = load volatile float *%ptr2
- %val1 = load volatile float *%ptr2
- %val2 = load volatile float *%ptr2
- %val3 = load volatile float *%ptr2
- %val4 = load volatile float *%ptr2
- %val5 = load volatile float *%ptr2
- %val6 = load volatile float *%ptr2
- %val7 = load volatile float *%ptr2
- %val8 = load volatile float *%ptr2
- %val9 = load volatile float *%ptr2
- %val10 = load volatile float *%ptr2
- %val11 = load volatile float *%ptr2
- %val12 = load volatile float *%ptr2
- %val13 = load volatile float *%ptr2
- %val14 = load volatile float *%ptr2
- %val15 = load volatile float *%ptr2
- %val16 = load volatile float *%ptr2
+ %val0 = load volatile float , float *%ptr2
+ %val1 = load volatile float , float *%ptr2
+ %val2 = load volatile float , float *%ptr2
+ %val3 = load volatile float , float *%ptr2
+ %val4 = load volatile float , float *%ptr2
+ %val5 = load volatile float , float *%ptr2
+ %val6 = load volatile float , float *%ptr2
+ %val7 = load volatile float , float *%ptr2
+ %val8 = load volatile float , float *%ptr2
+ %val9 = load volatile float , float *%ptr2
+ %val10 = load volatile float , float *%ptr2
+ %val11 = load volatile float , float *%ptr2
+ %val12 = load volatile float , float *%ptr2
+ %val13 = load volatile float , float *%ptr2
+ %val14 = load volatile float , float *%ptr2
+ %val15 = load volatile float , float *%ptr2
+ %val16 = load volatile float , float *%ptr2
%ext0 = fpext float %val0 to double
%ext1 = fpext float %val1 to double
diff --git a/test/CodeGen/SystemZ/fp-conv-03.ll b/test/CodeGen/SystemZ/fp-conv-03.ll
index d42ce6650aaf..bb14e610f720 100644
--- a/test/CodeGen/SystemZ/fp-conv-03.ll
+++ b/test/CodeGen/SystemZ/fp-conv-03.ll
@@ -21,7 +21,7 @@ define void @f2(fp128 *%dst, float *%ptr) {
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
- %val = load float *%ptr
+ %val = load float , float *%ptr
%res = fpext float %val to fp128
store fp128 %res, fp128 *%dst
ret void
@@ -34,8 +34,8 @@ define void @f3(fp128 *%dst, float *%base) {
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1023
- %val = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1023
+ %val = load float , float *%ptr
%res = fpext float %val to fp128
store fp128 %res, fp128 *%dst
ret void
@@ -50,8 +50,8 @@ define void @f4(fp128 *%dst, float *%base) {
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1024
- %val = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1024
+ %val = load float , float *%ptr
%res = fpext float %val to fp128
store fp128 %res, fp128 *%dst
ret void
@@ -65,8 +65,8 @@ define void @f5(fp128 *%dst, float *%base) {
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 -1
- %val = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 -1
+ %val = load float , float *%ptr
%res = fpext float %val to fp128
store fp128 %res, fp128 *%dst
ret void
@@ -80,9 +80,9 @@ define void @f6(fp128 *%dst, float *%base, i64 %index) {
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
- %ptr1 = getelementptr float *%base, i64 %index
- %ptr2 = getelementptr float *%ptr1, i64 100
- %val = load float *%ptr2
+ %ptr1 = getelementptr float, float *%base, i64 %index
+ %ptr2 = getelementptr float, float *%ptr1, i64 100
+ %val = load float , float *%ptr2
%res = fpext float %val to fp128
store fp128 %res, fp128 *%dst
ret void
@@ -94,23 +94,23 @@ define void @f7(fp128 *%ptr1, float *%ptr2) {
; CHECK-LABEL: f7:
; CHECK: lxeb {{%f[0-9]+}}, 16{{[04]}}(%r15)
; CHECK: br %r14
- %val0 = load volatile float *%ptr2
- %val1 = load volatile float *%ptr2
- %val2 = load volatile float *%ptr2
- %val3 = load volatile float *%ptr2
- %val4 = load volatile float *%ptr2
- %val5 = load volatile float *%ptr2
- %val6 = load volatile float *%ptr2
- %val7 = load volatile float *%ptr2
- %val8 = load volatile float *%ptr2
- %val9 = load volatile float *%ptr2
- %val10 = load volatile float *%ptr2
- %val11 = load volatile float *%ptr2
- %val12 = load volatile float *%ptr2
- %val13 = load volatile float *%ptr2
- %val14 = load volatile float *%ptr2
- %val15 = load volatile float *%ptr2
- %val16 = load volatile float *%ptr2
+ %val0 = load volatile float , float *%ptr2
+ %val1 = load volatile float , float *%ptr2
+ %val2 = load volatile float , float *%ptr2
+ %val3 = load volatile float , float *%ptr2
+ %val4 = load volatile float , float *%ptr2
+ %val5 = load volatile float , float *%ptr2
+ %val6 = load volatile float , float *%ptr2
+ %val7 = load volatile float , float *%ptr2
+ %val8 = load volatile float , float *%ptr2
+ %val9 = load volatile float , float *%ptr2
+ %val10 = load volatile float , float *%ptr2
+ %val11 = load volatile float , float *%ptr2
+ %val12 = load volatile float , float *%ptr2
+ %val13 = load volatile float , float *%ptr2
+ %val14 = load volatile float , float *%ptr2
+ %val15 = load volatile float , float *%ptr2
+ %val16 = load volatile float , float *%ptr2
%ext0 = fpext float %val0 to fp128
%ext1 = fpext float %val1 to fp128
diff --git a/test/CodeGen/SystemZ/fp-conv-04.ll b/test/CodeGen/SystemZ/fp-conv-04.ll
index 518d6c28d867..cfcb98aaa70a 100644
--- a/test/CodeGen/SystemZ/fp-conv-04.ll
+++ b/test/CodeGen/SystemZ/fp-conv-04.ll
@@ -21,7 +21,7 @@ define void @f2(fp128 *%dst, double *%ptr) {
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
- %val = load double *%ptr
+ %val = load double , double *%ptr
%res = fpext double %val to fp128
store fp128 %res, fp128 *%dst
ret void
@@ -34,8 +34,8 @@ define void @f3(fp128 *%dst, double *%base) {
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 511
- %val = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 511
+ %val = load double , double *%ptr
%res = fpext double %val to fp128
store fp128 %res, fp128 *%dst
ret void
@@ -50,8 +50,8 @@ define void @f4(fp128 *%dst, double *%base) {
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 512
- %val = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 512
+ %val = load double , double *%ptr
%res = fpext double %val to fp128
store fp128 %res, fp128 *%dst
ret void
@@ -65,8 +65,8 @@ define void @f5(fp128 *%dst, double *%base) {
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 -1
- %val = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 -1
+ %val = load double , double *%ptr
%res = fpext double %val to fp128
store fp128 %res, fp128 *%dst
ret void
@@ -80,9 +80,9 @@ define void @f6(fp128 *%dst, double *%base, i64 %index) {
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
- %ptr1 = getelementptr double *%base, i64 %index
- %ptr2 = getelementptr double *%ptr1, i64 100
- %val = load double *%ptr2
+ %ptr1 = getelementptr double, double *%base, i64 %index
+ %ptr2 = getelementptr double, double *%ptr1, i64 100
+ %val = load double , double *%ptr2
%res = fpext double %val to fp128
store fp128 %res, fp128 *%dst
ret void
@@ -94,23 +94,23 @@ define void @f7(fp128 *%ptr1, double *%ptr2) {
; CHECK-LABEL: f7:
; CHECK: lxdb {{%f[0-9]+}}, 160(%r15)
; CHECK: br %r14
- %val0 = load volatile double *%ptr2
- %val1 = load volatile double *%ptr2
- %val2 = load volatile double *%ptr2
- %val3 = load volatile double *%ptr2
- %val4 = load volatile double *%ptr2
- %val5 = load volatile double *%ptr2
- %val6 = load volatile double *%ptr2
- %val7 = load volatile double *%ptr2
- %val8 = load volatile double *%ptr2
- %val9 = load volatile double *%ptr2
- %val10 = load volatile double *%ptr2
- %val11 = load volatile double *%ptr2
- %val12 = load volatile double *%ptr2
- %val13 = load volatile double *%ptr2
- %val14 = load volatile double *%ptr2
- %val15 = load volatile double *%ptr2
- %val16 = load volatile double *%ptr2
+ %val0 = load volatile double , double *%ptr2
+ %val1 = load volatile double , double *%ptr2
+ %val2 = load volatile double , double *%ptr2
+ %val3 = load volatile double , double *%ptr2
+ %val4 = load volatile double , double *%ptr2
+ %val5 = load volatile double , double *%ptr2
+ %val6 = load volatile double , double *%ptr2
+ %val7 = load volatile double , double *%ptr2
+ %val8 = load volatile double , double *%ptr2
+ %val9 = load volatile double , double *%ptr2
+ %val10 = load volatile double , double *%ptr2
+ %val11 = load volatile double , double *%ptr2
+ %val12 = load volatile double , double *%ptr2
+ %val13 = load volatile double , double *%ptr2
+ %val14 = load volatile double , double *%ptr2
+ %val15 = load volatile double , double *%ptr2
+ %val16 = load volatile double , double *%ptr2
%ext0 = fpext double %val0 to fp128
%ext1 = fpext double %val1 to fp128
diff --git a/test/CodeGen/SystemZ/fp-conv-09.ll b/test/CodeGen/SystemZ/fp-conv-09.ll
index 6aee73644a16..21b809d40e29 100644
--- a/test/CodeGen/SystemZ/fp-conv-09.ll
+++ b/test/CodeGen/SystemZ/fp-conv-09.ll
@@ -27,7 +27,7 @@ define i32 @f3(fp128 *%src) {
; CHECK: ld %f2, 8(%r2)
; CHECK: cfxbr %r2, 5, %f0
; CHECK: br %r14
- %f = load fp128 *%src
+ %f = load fp128 , fp128 *%src
%conv = fptosi fp128 %f to i32
ret i32 %conv
}
diff --git a/test/CodeGen/SystemZ/fp-conv-10.ll b/test/CodeGen/SystemZ/fp-conv-10.ll
index b8155ed067da..cfbe0b939747 100644
--- a/test/CodeGen/SystemZ/fp-conv-10.ll
+++ b/test/CodeGen/SystemZ/fp-conv-10.ll
@@ -39,7 +39,7 @@ define i32 @f3(fp128 *%src) {
; CHECK: cfxbr
; CHECK: xilf
; CHECK: br %r14
- %f = load fp128 *%src
+ %f = load fp128 , fp128 *%src
%conv = fptoui fp128 %f to i32
ret i32 %conv
}
diff --git a/test/CodeGen/SystemZ/fp-conv-11.ll b/test/CodeGen/SystemZ/fp-conv-11.ll
index 46f4cb3a6d89..eb996cb3bb1b 100644
--- a/test/CodeGen/SystemZ/fp-conv-11.ll
+++ b/test/CodeGen/SystemZ/fp-conv-11.ll
@@ -27,7 +27,7 @@ define i64 @f3(fp128 *%src) {
; CHECK: ld %f2, 8(%r2)
; CHECK: cgxbr %r2, 5, %f0
; CHECK: br %r14
- %f = load fp128 *%src
+ %f = load fp128 , fp128 *%src
%conv = fptosi fp128 %f to i64
ret i64 %conv
}
diff --git a/test/CodeGen/SystemZ/fp-conv-12.ll b/test/CodeGen/SystemZ/fp-conv-12.ll
index 770c9407a0af..28a521605a95 100644
--- a/test/CodeGen/SystemZ/fp-conv-12.ll
+++ b/test/CodeGen/SystemZ/fp-conv-12.ll
@@ -38,7 +38,7 @@ define i64 @f3(fp128 *%src) {
; CHECK: cgxbr
; CHECK: xihf
; CHECK: br %r14
- %f = load fp128 *%src
+ %f = load fp128 , fp128 *%src
%conv = fptoui fp128 %f to i64
ret i64 %conv
}
diff --git a/test/CodeGen/SystemZ/fp-conv-14.ll b/test/CodeGen/SystemZ/fp-conv-14.ll
index e926e9bb31f5..e4f0a27022e8 100644
--- a/test/CodeGen/SystemZ/fp-conv-14.ll
+++ b/test/CodeGen/SystemZ/fp-conv-14.ll
@@ -27,7 +27,7 @@ define i32 @f3(fp128 *%src) {
; CHECK-DAG: ld %f2, 8(%r2)
; CHECK: clfxbr %r2, 5, %f0, 0
; CHECK: br %r14
- %f = load fp128 *%src
+ %f = load fp128 , fp128 *%src
%conv = fptoui fp128 %f to i32
ret i32 %conv
}
@@ -57,7 +57,7 @@ define i64 @f6(fp128 *%src) {
; CHECK-DAG: ld %f2, 8(%r2)
; CHECK: clgxbr %r2, 5, %f0, 0
; CHECK: br %r14
- %f = load fp128 *%src
+ %f = load fp128 , fp128 *%src
%conv = fptoui fp128 %f to i64
ret i64 %conv
}
diff --git a/test/CodeGen/SystemZ/fp-copysign-01.ll b/test/CodeGen/SystemZ/fp-copysign-01.ll
index 50177e5f41bf..57ad76fcbb2a 100644
--- a/test/CodeGen/SystemZ/fp-copysign-01.ll
+++ b/test/CodeGen/SystemZ/fp-copysign-01.ll
@@ -35,7 +35,7 @@ define float @f3(float %a, fp128 *%bptr) {
; CHECK: ld [[BLOW:%f[0-7]]], 8(%r2)
; CHECK: cpsdr %f0, %f0, [[BHIGH]]
; CHECK: br %r14
- %bl = load volatile fp128 *%bptr
+ %bl = load volatile fp128 , fp128 *%bptr
%b = fptrunc fp128 %bl to float
%res = call float @copysignf(float %a, float %b) readnone
ret float %res
@@ -69,7 +69,7 @@ define double @f6(double %a, fp128 *%bptr) {
; CHECK: ld [[BLOW:%f[0-7]]], 8(%r2)
; CHECK: cpsdr %f0, %f0, [[BHIGH]]
; CHECK: br %r14
- %bl = load volatile fp128 *%bptr
+ %bl = load volatile fp128 , fp128 *%bptr
%b = fptrunc fp128 %bl to double
%res = call double @copysign(double %a, double %b) readnone
ret double %res
@@ -86,7 +86,7 @@ define void @f7(fp128 *%cptr, fp128 *%aptr, float %bf) {
; CHECK: std [[AHIGH]], 0(%r2)
; CHECK: std [[ALOW]], 8(%r2)
; CHECK: br %r14
- %a = load volatile fp128 *%aptr
+ %a = load volatile fp128 , fp128 *%aptr
%b = fpext float %bf to fp128
%c = call fp128 @copysignl(fp128 %a, fp128 %b) readnone
store fp128 %c, fp128 *%cptr
@@ -102,7 +102,7 @@ define void @f8(fp128 *%cptr, fp128 *%aptr, double %bd) {
; CHECK: std [[AHIGH]], 0(%r2)
; CHECK: std [[ALOW]], 8(%r2)
; CHECK: br %r14
- %a = load volatile fp128 *%aptr
+ %a = load volatile fp128 , fp128 *%aptr
%b = fpext double %bd to fp128
%c = call fp128 @copysignl(fp128 %a, fp128 %b) readnone
store fp128 %c, fp128 *%cptr
@@ -120,8 +120,8 @@ define void @f9(fp128 *%cptr, fp128 *%aptr, fp128 *%bptr) {
; CHECK: std [[AHIGH]], 0(%r2)
; CHECK: std [[ALOW]], 8(%r2)
; CHECK: br %r14
- %a = load volatile fp128 *%aptr
- %b = load volatile fp128 *%bptr
+ %a = load volatile fp128 , fp128 *%aptr
+ %b = load volatile fp128 , fp128 *%bptr
%c = call fp128 @copysignl(fp128 %a, fp128 %b) readnone
store fp128 %c, fp128 *%cptr
ret void
diff --git a/test/CodeGen/SystemZ/fp-div-01.ll b/test/CodeGen/SystemZ/fp-div-01.ll
index 1b99463327b4..0791e8db93f8 100644
--- a/test/CodeGen/SystemZ/fp-div-01.ll
+++ b/test/CodeGen/SystemZ/fp-div-01.ll
@@ -18,7 +18,7 @@ define float @f2(float %f1, float *%ptr) {
; CHECK-LABEL: f2:
; CHECK: deb %f0, 0(%r2)
; CHECK: br %r14
- %f2 = load float *%ptr
+ %f2 = load float , float *%ptr
%res = fdiv float %f1, %f2
ret float %res
}
@@ -28,8 +28,8 @@ define float @f3(float %f1, float *%base) {
; CHECK-LABEL: f3:
; CHECK: deb %f0, 4092(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1023
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1023
+ %f2 = load float , float *%ptr
%res = fdiv float %f1, %f2
ret float %res
}
@@ -41,8 +41,8 @@ define float @f4(float %f1, float *%base) {
; CHECK: aghi %r2, 4096
; CHECK: deb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1024
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1024
+ %f2 = load float , float *%ptr
%res = fdiv float %f1, %f2
ret float %res
}
@@ -53,8 +53,8 @@ define float @f5(float %f1, float *%base) {
; CHECK: aghi %r2, -4
; CHECK: deb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 -1
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 -1
+ %f2 = load float , float *%ptr
%res = fdiv float %f1, %f2
ret float %res
}
@@ -65,9 +65,9 @@ define float @f6(float %f1, float *%base, i64 %index) {
; CHECK: sllg %r1, %r3, 2
; CHECK: deb %f0, 400(%r1,%r2)
; CHECK: br %r14
- %ptr1 = getelementptr float *%base, i64 %index
- %ptr2 = getelementptr float *%ptr1, i64 100
- %f2 = load float *%ptr2
+ %ptr1 = getelementptr float, float *%base, i64 %index
+ %ptr2 = getelementptr float, float *%ptr1, i64 100
+ %f2 = load float , float *%ptr2
%res = fdiv float %f1, %f2
ret float %res
}
@@ -78,28 +78,28 @@ define float @f7(float *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: deb %f0, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr float *%ptr0, i64 2
- %ptr2 = getelementptr float *%ptr0, i64 4
- %ptr3 = getelementptr float *%ptr0, i64 6
- %ptr4 = getelementptr float *%ptr0, i64 8
- %ptr5 = getelementptr float *%ptr0, i64 10
- %ptr6 = getelementptr float *%ptr0, i64 12
- %ptr7 = getelementptr float *%ptr0, i64 14
- %ptr8 = getelementptr float *%ptr0, i64 16
- %ptr9 = getelementptr float *%ptr0, i64 18
- %ptr10 = getelementptr float *%ptr0, i64 20
+ %ptr1 = getelementptr float, float *%ptr0, i64 2
+ %ptr2 = getelementptr float, float *%ptr0, i64 4
+ %ptr3 = getelementptr float, float *%ptr0, i64 6
+ %ptr4 = getelementptr float, float *%ptr0, i64 8
+ %ptr5 = getelementptr float, float *%ptr0, i64 10
+ %ptr6 = getelementptr float, float *%ptr0, i64 12
+ %ptr7 = getelementptr float, float *%ptr0, i64 14
+ %ptr8 = getelementptr float, float *%ptr0, i64 16
+ %ptr9 = getelementptr float, float *%ptr0, i64 18
+ %ptr10 = getelementptr float, float *%ptr0, i64 20
- %val0 = load float *%ptr0
- %val1 = load float *%ptr1
- %val2 = load float *%ptr2
- %val3 = load float *%ptr3
- %val4 = load float *%ptr4
- %val5 = load float *%ptr5
- %val6 = load float *%ptr6
- %val7 = load float *%ptr7
- %val8 = load float *%ptr8
- %val9 = load float *%ptr9
- %val10 = load float *%ptr10
+ %val0 = load float , float *%ptr0
+ %val1 = load float , float *%ptr1
+ %val2 = load float , float *%ptr2
+ %val3 = load float , float *%ptr3
+ %val4 = load float , float *%ptr4
+ %val5 = load float , float *%ptr5
+ %val6 = load float , float *%ptr6
+ %val7 = load float , float *%ptr7
+ %val8 = load float , float *%ptr8
+ %val9 = load float , float *%ptr9
+ %val10 = load float , float *%ptr10
%ret = call float @foo()
diff --git a/test/CodeGen/SystemZ/fp-div-02.ll b/test/CodeGen/SystemZ/fp-div-02.ll
index 513664bd9496..f120e7c923dc 100644
--- a/test/CodeGen/SystemZ/fp-div-02.ll
+++ b/test/CodeGen/SystemZ/fp-div-02.ll
@@ -1,6 +1,8 @@
; Test 64-bit floating-point division.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @foo()
@@ -18,7 +20,7 @@ define double @f2(double %f1, double *%ptr) {
; CHECK-LABEL: f2:
; CHECK: ddb %f0, 0(%r2)
; CHECK: br %r14
- %f2 = load double *%ptr
+ %f2 = load double , double *%ptr
%res = fdiv double %f1, %f2
ret double %res
}
@@ -28,8 +30,8 @@ define double @f3(double %f1, double *%base) {
; CHECK-LABEL: f3:
; CHECK: ddb %f0, 4088(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 511
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 511
+ %f2 = load double , double *%ptr
%res = fdiv double %f1, %f2
ret double %res
}
@@ -41,8 +43,8 @@ define double @f4(double %f1, double *%base) {
; CHECK: aghi %r2, 4096
; CHECK: ddb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 512
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 512
+ %f2 = load double , double *%ptr
%res = fdiv double %f1, %f2
ret double %res
}
@@ -53,8 +55,8 @@ define double @f5(double %f1, double *%base) {
; CHECK: aghi %r2, -8
; CHECK: ddb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 -1
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 -1
+ %f2 = load double , double *%ptr
%res = fdiv double %f1, %f2
ret double %res
}
@@ -65,9 +67,9 @@ define double @f6(double %f1, double *%base, i64 %index) {
; CHECK: sllg %r1, %r3, 3
; CHECK: ddb %f0, 800(%r1,%r2)
; CHECK: br %r14
- %ptr1 = getelementptr double *%base, i64 %index
- %ptr2 = getelementptr double *%ptr1, i64 100
- %f2 = load double *%ptr2
+ %ptr1 = getelementptr double, double *%base, i64 %index
+ %ptr2 = getelementptr double, double *%ptr1, i64 100
+ %f2 = load double , double *%ptr2
%res = fdiv double %f1, %f2
ret double %res
}
@@ -76,30 +78,30 @@ define double @f6(double %f1, double *%base, i64 %index) {
define double @f7(double *%ptr0) {
; CHECK-LABEL: f7:
; CHECK: brasl %r14, foo@PLT
-; CHECK: ddb %f0, 160(%r15)
+; CHECK-SCALAR: ddb %f0, 160(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr double *%ptr0, i64 2
- %ptr2 = getelementptr double *%ptr0, i64 4
- %ptr3 = getelementptr double *%ptr0, i64 6
- %ptr4 = getelementptr double *%ptr0, i64 8
- %ptr5 = getelementptr double *%ptr0, i64 10
- %ptr6 = getelementptr double *%ptr0, i64 12
- %ptr7 = getelementptr double *%ptr0, i64 14
- %ptr8 = getelementptr double *%ptr0, i64 16
- %ptr9 = getelementptr double *%ptr0, i64 18
- %ptr10 = getelementptr double *%ptr0, i64 20
+ %ptr1 = getelementptr double, double *%ptr0, i64 2
+ %ptr2 = getelementptr double, double *%ptr0, i64 4
+ %ptr3 = getelementptr double, double *%ptr0, i64 6
+ %ptr4 = getelementptr double, double *%ptr0, i64 8
+ %ptr5 = getelementptr double, double *%ptr0, i64 10
+ %ptr6 = getelementptr double, double *%ptr0, i64 12
+ %ptr7 = getelementptr double, double *%ptr0, i64 14
+ %ptr8 = getelementptr double, double *%ptr0, i64 16
+ %ptr9 = getelementptr double, double *%ptr0, i64 18
+ %ptr10 = getelementptr double, double *%ptr0, i64 20
- %val0 = load double *%ptr0
- %val1 = load double *%ptr1
- %val2 = load double *%ptr2
- %val3 = load double *%ptr3
- %val4 = load double *%ptr4
- %val5 = load double *%ptr5
- %val6 = load double *%ptr6
- %val7 = load double *%ptr7
- %val8 = load double *%ptr8
- %val9 = load double *%ptr9
- %val10 = load double *%ptr10
+ %val0 = load double , double *%ptr0
+ %val1 = load double , double *%ptr1
+ %val2 = load double , double *%ptr2
+ %val3 = load double , double *%ptr3
+ %val4 = load double , double *%ptr4
+ %val5 = load double , double *%ptr5
+ %val6 = load double , double *%ptr6
+ %val7 = load double , double *%ptr7
+ %val8 = load double , double *%ptr8
+ %val9 = load double , double *%ptr9
+ %val10 = load double , double *%ptr10
%ret = call double @foo()
diff --git a/test/CodeGen/SystemZ/fp-div-03.ll b/test/CodeGen/SystemZ/fp-div-03.ll
index 079b349b4084..f052635a483e 100644
--- a/test/CodeGen/SystemZ/fp-div-03.ll
+++ b/test/CodeGen/SystemZ/fp-div-03.ll
@@ -12,7 +12,7 @@ define void @f1(fp128 *%ptr, float %f2) {
; CHECK: std %f1, 0(%r2)
; CHECK: std %f3, 8(%r2)
; CHECK: br %r14
- %f1 = load fp128 *%ptr
+ %f1 = load fp128 , fp128 *%ptr
%f2x = fpext float %f2 to fp128
%sum = fdiv fp128 %f1, %f2x
store fp128 %sum, fp128 *%ptr
diff --git a/test/CodeGen/SystemZ/fp-move-01.ll b/test/CodeGen/SystemZ/fp-move-01.ll
index d16502f2f7c8..843b1b6a6e64 100644
--- a/test/CodeGen/SystemZ/fp-move-01.ll
+++ b/test/CodeGen/SystemZ/fp-move-01.ll
@@ -1,11 +1,13 @@
; Test moves between FPRs.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test f32 moves.
define float @f1(float %a, float %b) {
; CHECK-LABEL: f1:
; CHECK: ler %f0, %f2
+; CHECK: br %r14
ret float %b
}
@@ -13,6 +15,7 @@ define float @f1(float %a, float %b) {
define double @f2(double %a, double %b) {
; CHECK-LABEL: f2:
; CHECK: ldr %f0, %f2
+; CHECK: br %r14
ret double %b
}
@@ -22,7 +25,8 @@ define void @f3(fp128 *%x) {
; CHECK-LABEL: f3:
; CHECK: lxr
; CHECK: axbr
- %val = load volatile fp128 *%x
+; CHECK: br %r14
+ %val = load volatile fp128 , fp128 *%x
%sum = fadd fp128 %val, %val
store volatile fp128 %sum, fp128 *%x
store volatile fp128 %val, fp128 *%x
diff --git a/test/CodeGen/SystemZ/fp-move-02.ll b/test/CodeGen/SystemZ/fp-move-02.ll
index 505ee8d37a4e..2bd63f4674d9 100644
--- a/test/CodeGen/SystemZ/fp-move-02.ll
+++ b/test/CodeGen/SystemZ/fp-move-02.ll
@@ -71,7 +71,7 @@ define void @f6(fp128 *%a, i128 *%b) {
; CHECK: stg
; CHECK: stg
; CHECK: br %r14
- %val = load i128 *%b
+ %val = load i128 , i128 *%b
%res = bitcast i128 %val to fp128
store fp128 %res, fp128 *%a
ret void
@@ -102,7 +102,7 @@ define void @f9(fp128 *%a, i128 *%b) {
; CHECK: ld
; CHECK: std
; CHECK: std
- %val = load fp128 *%a
+ %val = load fp128 , fp128 *%a
%res = bitcast fp128 %val to i128
store i128 %res, i128 *%b
ret void
@@ -119,34 +119,34 @@ define void @f10(double %extra) {
; CHECK: %exit
; CHECK: br %r14
entry:
- %double0 = load volatile double *@dptr
+ %double0 = load volatile double , double *@dptr
%biased0 = fadd double %double0, %extra
%int0 = bitcast double %biased0 to i64
- %double1 = load volatile double *@dptr
+ %double1 = load volatile double , double *@dptr
%biased1 = fadd double %double1, %extra
%int1 = bitcast double %biased1 to i64
- %double2 = load volatile double *@dptr
+ %double2 = load volatile double , double *@dptr
%biased2 = fadd double %double2, %extra
%int2 = bitcast double %biased2 to i64
- %double3 = load volatile double *@dptr
+ %double3 = load volatile double , double *@dptr
%biased3 = fadd double %double3, %extra
%int3 = bitcast double %biased3 to i64
- %double4 = load volatile double *@dptr
+ %double4 = load volatile double , double *@dptr
%biased4 = fadd double %double4, %extra
%int4 = bitcast double %biased4 to i64
- %double5 = load volatile double *@dptr
+ %double5 = load volatile double , double *@dptr
%biased5 = fadd double %double5, %extra
%int5 = bitcast double %biased5 to i64
- %double6 = load volatile double *@dptr
+ %double6 = load volatile double , double *@dptr
%biased6 = fadd double %double6, %extra
%int6 = bitcast double %biased6 to i64
- %double7 = load volatile double *@dptr
+ %double7 = load volatile double , double *@dptr
%biased7 = fadd double %double7, %extra
%int7 = bitcast double %biased7 to i64
- %double8 = load volatile double *@dptr
+ %double8 = load volatile double , double *@dptr
%biased8 = fadd double %double8, %extra
%int8 = bitcast double %biased8 to i64
- %double9 = load volatile double *@dptr
+ %double9 = load volatile double , double *@dptr
%biased9 = fadd double %double9, %extra
%int9 = bitcast double %biased9 to i64
br label %loop
@@ -181,34 +181,34 @@ define void @f11(i64 %mask) {
; CHECK: %exit
; CHECK: br %r14
entry:
- %int0 = load volatile i64 *@iptr
+ %int0 = load volatile i64 , i64 *@iptr
%masked0 = and i64 %int0, %mask
%double0 = bitcast i64 %masked0 to double
- %int1 = load volatile i64 *@iptr
+ %int1 = load volatile i64 , i64 *@iptr
%masked1 = and i64 %int1, %mask
%double1 = bitcast i64 %masked1 to double
- %int2 = load volatile i64 *@iptr
+ %int2 = load volatile i64 , i64 *@iptr
%masked2 = and i64 %int2, %mask
%double2 = bitcast i64 %masked2 to double
- %int3 = load volatile i64 *@iptr
+ %int3 = load volatile i64 , i64 *@iptr
%masked3 = and i64 %int3, %mask
%double3 = bitcast i64 %masked3 to double
- %int4 = load volatile i64 *@iptr
+ %int4 = load volatile i64 , i64 *@iptr
%masked4 = and i64 %int4, %mask
%double4 = bitcast i64 %masked4 to double
- %int5 = load volatile i64 *@iptr
+ %int5 = load volatile i64 , i64 *@iptr
%masked5 = and i64 %int5, %mask
%double5 = bitcast i64 %masked5 to double
- %int6 = load volatile i64 *@iptr
+ %int6 = load volatile i64 , i64 *@iptr
%masked6 = and i64 %int6, %mask
%double6 = bitcast i64 %masked6 to double
- %int7 = load volatile i64 *@iptr
+ %int7 = load volatile i64 , i64 *@iptr
%masked7 = and i64 %int7, %mask
%double7 = bitcast i64 %masked7 to double
- %int8 = load volatile i64 *@iptr
+ %int8 = load volatile i64 , i64 *@iptr
%masked8 = and i64 %int8, %mask
%double8 = bitcast i64 %masked8 to double
- %int9 = load volatile i64 *@iptr
+ %int9 = load volatile i64 , i64 *@iptr
%masked9 = and i64 %int9, %mask
%double9 = bitcast i64 %masked9 to double
br label %loop
@@ -275,7 +275,7 @@ loop:
exit:
%unused1 = call i64 @foo()
- %factor = load volatile double *@dptr
+ %factor = load volatile double , double *@dptr
%conv0 = bitcast i64 %add0 to double
%mul0 = fmul double %conv0, %factor
@@ -354,7 +354,7 @@ loop:
exit:
%unused1 = call i64 @foo()
- %bias = load volatile i64 *@iptr
+ %bias = load volatile i64 , i64 *@iptr
%conv0 = bitcast double %mul0 to i64
%add0 = add i64 %conv0, %bias
diff --git a/test/CodeGen/SystemZ/fp-move-03.ll b/test/CodeGen/SystemZ/fp-move-03.ll
index 1273358f65ad..f50e097bef69 100644
--- a/test/CodeGen/SystemZ/fp-move-03.ll
+++ b/test/CodeGen/SystemZ/fp-move-03.ll
@@ -7,7 +7,7 @@ define float @f1(float *%src) {
; CHECK-LABEL: f1:
; CHECK: le %f0, 0(%r2)
; CHECK: br %r14
- %val = load float *%src
+ %val = load float , float *%src
ret float %val
}
@@ -16,8 +16,8 @@ define float @f2(float *%src) {
; CHECK-LABEL: f2:
; CHECK: le %f0, 4092(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%src, i64 1023
- %val = load float *%ptr
+ %ptr = getelementptr float, float *%src, i64 1023
+ %val = load float , float *%ptr
ret float %val
}
@@ -26,8 +26,8 @@ define float @f3(float *%src) {
; CHECK-LABEL: f3:
; CHECK: ley %f0, 4096(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%src, i64 1024
- %val = load float *%ptr
+ %ptr = getelementptr float, float *%src, i64 1024
+ %val = load float , float *%ptr
ret float %val
}
@@ -36,8 +36,8 @@ define float @f4(float *%src) {
; CHECK-LABEL: f4:
; CHECK: ley %f0, 524284(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%src, i64 131071
- %val = load float *%ptr
+ %ptr = getelementptr float, float *%src, i64 131071
+ %val = load float , float *%ptr
ret float %val
}
@@ -48,8 +48,8 @@ define float @f5(float *%src) {
; CHECK: agfi %r2, 524288
; CHECK: le %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%src, i64 131072
- %val = load float *%ptr
+ %ptr = getelementptr float, float *%src, i64 131072
+ %val = load float , float *%ptr
ret float %val
}
@@ -58,8 +58,8 @@ define float @f6(float *%src) {
; CHECK-LABEL: f6:
; CHECK: ley %f0, -4(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%src, i64 -1
- %val = load float *%ptr
+ %ptr = getelementptr float, float *%src, i64 -1
+ %val = load float , float *%ptr
ret float %val
}
@@ -68,8 +68,8 @@ define float @f7(float *%src) {
; CHECK-LABEL: f7:
; CHECK: ley %f0, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%src, i64 -131072
- %val = load float *%ptr
+ %ptr = getelementptr float, float *%src, i64 -131072
+ %val = load float , float *%ptr
ret float %val
}
@@ -80,8 +80,8 @@ define float @f8(float *%src) {
; CHECK: agfi %r2, -524292
; CHECK: le %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%src, i64 -131073
- %val = load float *%ptr
+ %ptr = getelementptr float, float *%src, i64 -131073
+ %val = load float , float *%ptr
ret float %val
}
@@ -93,7 +93,7 @@ define float @f9(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4092
%ptr = inttoptr i64 %add2 to float *
- %val = load float *%ptr
+ %val = load float , float *%ptr
ret float %val
}
@@ -105,6 +105,6 @@ define float @f10(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to float *
- %val = load float *%ptr
+ %val = load float , float *%ptr
ret float %val
}
diff --git a/test/CodeGen/SystemZ/fp-move-04.ll b/test/CodeGen/SystemZ/fp-move-04.ll
index 1b0278fdee0f..6650419b2c38 100644
--- a/test/CodeGen/SystemZ/fp-move-04.ll
+++ b/test/CodeGen/SystemZ/fp-move-04.ll
@@ -1,13 +1,14 @@
; Test 64-bit floating-point loads.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test the low end of the LD range.
define double @f1(double *%src) {
; CHECK-LABEL: f1:
; CHECK: ld %f0, 0(%r2)
; CHECK: br %r14
- %val = load double *%src
+ %val = load double , double *%src
ret double %val
}
@@ -16,8 +17,8 @@ define double @f2(double *%src) {
; CHECK-LABEL: f2:
; CHECK: ld %f0, 4088(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%src, i64 511
- %val = load double *%ptr
+ %ptr = getelementptr double, double *%src, i64 511
+ %val = load double , double *%ptr
ret double %val
}
@@ -26,8 +27,8 @@ define double @f3(double *%src) {
; CHECK-LABEL: f3:
; CHECK: ldy %f0, 4096(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%src, i64 512
- %val = load double *%ptr
+ %ptr = getelementptr double, double *%src, i64 512
+ %val = load double , double *%ptr
ret double %val
}
@@ -36,8 +37,8 @@ define double @f4(double *%src) {
; CHECK-LABEL: f4:
; CHECK: ldy %f0, 524280(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%src, i64 65535
- %val = load double *%ptr
+ %ptr = getelementptr double, double *%src, i64 65535
+ %val = load double , double *%ptr
ret double %val
}
@@ -48,8 +49,8 @@ define double @f5(double *%src) {
; CHECK: agfi %r2, 524288
; CHECK: ld %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%src, i64 65536
- %val = load double *%ptr
+ %ptr = getelementptr double, double *%src, i64 65536
+ %val = load double , double *%ptr
ret double %val
}
@@ -58,8 +59,8 @@ define double @f6(double *%src) {
; CHECK-LABEL: f6:
; CHECK: ldy %f0, -8(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%src, i64 -1
- %val = load double *%ptr
+ %ptr = getelementptr double, double *%src, i64 -1
+ %val = load double , double *%ptr
ret double %val
}
@@ -68,8 +69,8 @@ define double @f7(double *%src) {
; CHECK-LABEL: f7:
; CHECK: ldy %f0, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%src, i64 -65536
- %val = load double *%ptr
+ %ptr = getelementptr double, double *%src, i64 -65536
+ %val = load double , double *%ptr
ret double %val
}
@@ -80,8 +81,8 @@ define double @f8(double *%src) {
; CHECK: agfi %r2, -524296
; CHECK: ld %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%src, i64 -65537
- %val = load double *%ptr
+ %ptr = getelementptr double, double *%src, i64 -65537
+ %val = load double , double *%ptr
ret double %val
}
@@ -93,7 +94,7 @@ define double @f9(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4095
%ptr = inttoptr i64 %add2 to double *
- %val = load double *%ptr
+ %val = load double , double *%ptr
ret double %val
}
@@ -105,6 +106,6 @@ define double @f10(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to double *
- %val = load double *%ptr
+ %val = load double , double *%ptr
ret double %val
}
diff --git a/test/CodeGen/SystemZ/fp-move-05.ll b/test/CodeGen/SystemZ/fp-move-05.ll
index d302a0f9c633..da12af6d68c1 100644
--- a/test/CodeGen/SystemZ/fp-move-05.ll
+++ b/test/CodeGen/SystemZ/fp-move-05.ll
@@ -9,7 +9,7 @@ define double @f1(i64 %src) {
; CHECK: ld %f2, 8(%r2)
; CHECK: br %r14
%ptr = inttoptr i64 %src to fp128 *
- %val = load fp128 *%ptr
+ %val = load fp128 , fp128 *%ptr
%trunc = fptrunc fp128 %val to double
ret double %trunc
}
@@ -22,7 +22,7 @@ define double @f2(i64 %src) {
; CHECK: br %r14
%add = add i64 %src, 4080
%ptr = inttoptr i64 %add to fp128 *
- %val = load fp128 *%ptr
+ %val = load fp128 , fp128 *%ptr
%trunc = fptrunc fp128 %val to double
ret double %trunc
}
@@ -35,7 +35,7 @@ define double @f3(i64 %src) {
; CHECK: br %r14
%add = add i64 %src, 4088
%ptr = inttoptr i64 %add to fp128 *
- %val = load fp128 *%ptr
+ %val = load fp128 , fp128 *%ptr
%trunc = fptrunc fp128 %val to double
ret double %trunc
}
@@ -48,7 +48,7 @@ define double @f4(i64 %src) {
; CHECK: br %r14
%add = add i64 %src, 4096
%ptr = inttoptr i64 %add to fp128 *
- %val = load fp128 *%ptr
+ %val = load fp128 , fp128 *%ptr
%trunc = fptrunc fp128 %val to double
ret double %trunc
}
@@ -61,7 +61,7 @@ define double @f5(i64 %src) {
; CHECK: br %r14
%add = add i64 %src, 524272
%ptr = inttoptr i64 %add to fp128 *
- %val = load fp128 *%ptr
+ %val = load fp128 , fp128 *%ptr
%trunc = fptrunc fp128 %val to double
ret double %trunc
}
@@ -76,7 +76,7 @@ define double @f6(i64 %src) {
; CHECK: br %r14
%add = add i64 %src, 524280
%ptr = inttoptr i64 %add to fp128 *
- %val = load fp128 *%ptr
+ %val = load fp128 , fp128 *%ptr
%trunc = fptrunc fp128 %val to double
ret double %trunc
}
@@ -90,7 +90,7 @@ define double @f7(i64 %src) {
; CHECK: br %r14
%add = add i64 %src, -8
%ptr = inttoptr i64 %add to fp128 *
- %val = load fp128 *%ptr
+ %val = load fp128 , fp128 *%ptr
%trunc = fptrunc fp128 %val to double
ret double %trunc
}
@@ -103,7 +103,7 @@ define double @f8(i64 %src) {
; CHECK: br %r14
%add = add i64 %src, -16
%ptr = inttoptr i64 %add to fp128 *
- %val = load fp128 *%ptr
+ %val = load fp128 , fp128 *%ptr
%trunc = fptrunc fp128 %val to double
ret double %trunc
}
@@ -116,7 +116,7 @@ define double @f9(i64 %src) {
; CHECK: br %r14
%add = add i64 %src, -524288
%ptr = inttoptr i64 %add to fp128 *
- %val = load fp128 *%ptr
+ %val = load fp128 , fp128 *%ptr
%trunc = fptrunc fp128 %val to double
ret double %trunc
}
@@ -131,7 +131,7 @@ define double @f10(i64 %src) {
; CHECK: br %r14
%add = add i64 %src, -524296
%ptr = inttoptr i64 %add to fp128 *
- %val = load fp128 *%ptr
+ %val = load fp128 , fp128 *%ptr
%trunc = fptrunc fp128 %val to double
ret double %trunc
}
@@ -145,7 +145,7 @@ define double @f11(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4088
%ptr = inttoptr i64 %add2 to fp128 *
- %val = load fp128 *%ptr
+ %val = load fp128 , fp128 *%ptr
%trunc = fptrunc fp128 %val to double
ret double %trunc
}
diff --git a/test/CodeGen/SystemZ/fp-move-06.ll b/test/CodeGen/SystemZ/fp-move-06.ll
index da67691729e3..eb0f87f1c572 100644
--- a/test/CodeGen/SystemZ/fp-move-06.ll
+++ b/test/CodeGen/SystemZ/fp-move-06.ll
@@ -16,7 +16,7 @@ define void @f2(float *%src, float %val) {
; CHECK-LABEL: f2:
; CHECK: ste %f0, 4092(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%src, i64 1023
+ %ptr = getelementptr float, float *%src, i64 1023
store float %val, float *%ptr
ret void
}
@@ -26,7 +26,7 @@ define void @f3(float *%src, float %val) {
; CHECK-LABEL: f3:
; CHECK: stey %f0, 4096(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%src, i64 1024
+ %ptr = getelementptr float, float *%src, i64 1024
store float %val, float *%ptr
ret void
}
@@ -36,7 +36,7 @@ define void @f4(float *%src, float %val) {
; CHECK-LABEL: f4:
; CHECK: stey %f0, 524284(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%src, i64 131071
+ %ptr = getelementptr float, float *%src, i64 131071
store float %val, float *%ptr
ret void
}
@@ -48,7 +48,7 @@ define void @f5(float *%src, float %val) {
; CHECK: agfi %r2, 524288
; CHECK: ste %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%src, i64 131072
+ %ptr = getelementptr float, float *%src, i64 131072
store float %val, float *%ptr
ret void
}
@@ -58,7 +58,7 @@ define void @f6(float *%src, float %val) {
; CHECK-LABEL: f6:
; CHECK: stey %f0, -4(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%src, i64 -1
+ %ptr = getelementptr float, float *%src, i64 -1
store float %val, float *%ptr
ret void
}
@@ -68,7 +68,7 @@ define void @f7(float *%src, float %val) {
; CHECK-LABEL: f7:
; CHECK: stey %f0, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%src, i64 -131072
+ %ptr = getelementptr float, float *%src, i64 -131072
store float %val, float *%ptr
ret void
}
@@ -80,7 +80,7 @@ define void @f8(float *%src, float %val) {
; CHECK: agfi %r2, -524292
; CHECK: ste %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%src, i64 -131073
+ %ptr = getelementptr float, float *%src, i64 -131073
store float %val, float *%ptr
ret void
}
diff --git a/test/CodeGen/SystemZ/fp-move-07.ll b/test/CodeGen/SystemZ/fp-move-07.ll
index a4f1820d1204..5361002a97e0 100644
--- a/test/CodeGen/SystemZ/fp-move-07.ll
+++ b/test/CodeGen/SystemZ/fp-move-07.ll
@@ -1,6 +1,7 @@
; Test 64-bit floating-point stores.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test the low end of the STD range.
define void @f1(double *%src, double %val) {
@@ -16,7 +17,7 @@ define void @f2(double *%src, double %val) {
; CHECK-LABEL: f2:
; CHECK: std %f0, 4088(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%src, i64 511
+ %ptr = getelementptr double, double *%src, i64 511
store double %val, double *%ptr
ret void
}
@@ -26,7 +27,7 @@ define void @f3(double *%src, double %val) {
; CHECK-LABEL: f3:
; CHECK: stdy %f0, 4096(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%src, i64 512
+ %ptr = getelementptr double, double *%src, i64 512
store double %val, double *%ptr
ret void
}
@@ -36,7 +37,7 @@ define void @f4(double *%src, double %val) {
; CHECK-LABEL: f4:
; CHECK: stdy %f0, 524280(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%src, i64 65535
+ %ptr = getelementptr double, double *%src, i64 65535
store double %val, double *%ptr
ret void
}
@@ -48,7 +49,7 @@ define void @f5(double *%src, double %val) {
; CHECK: agfi %r2, 524288
; CHECK: std %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%src, i64 65536
+ %ptr = getelementptr double, double *%src, i64 65536
store double %val, double *%ptr
ret void
}
@@ -58,7 +59,7 @@ define void @f6(double *%src, double %val) {
; CHECK-LABEL: f6:
; CHECK: stdy %f0, -8(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%src, i64 -1
+ %ptr = getelementptr double, double *%src, i64 -1
store double %val, double *%ptr
ret void
}
@@ -68,7 +69,7 @@ define void @f7(double *%src, double %val) {
; CHECK-LABEL: f7:
; CHECK: stdy %f0, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%src, i64 -65536
+ %ptr = getelementptr double, double *%src, i64 -65536
store double %val, double *%ptr
ret void
}
@@ -80,7 +81,7 @@ define void @f8(double *%src, double %val) {
; CHECK: agfi %r2, -524296
; CHECK: std %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%src, i64 -65537
+ %ptr = getelementptr double, double *%src, i64 -65537
store double %val, double *%ptr
ret void
}
diff --git a/test/CodeGen/SystemZ/fp-move-09.ll b/test/CodeGen/SystemZ/fp-move-09.ll
index 52b2ee2e31ab..5e8dce272c23 100644
--- a/test/CodeGen/SystemZ/fp-move-09.ll
+++ b/test/CodeGen/SystemZ/fp-move-09.ll
@@ -1,4 +1,4 @@
-; Test moves between FPRs and GPRs for z196 and above.
+; Test moves between FPRs and GPRs for z196 and zEC12.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
@@ -9,7 +9,7 @@ define float @f1(i16 *%ptr) {
; CHECK: oihh [[REG]], 16256
; CHECK: ldgr %f0, [[REG]]
; CHECK: br %r14
- %base = load i16 *%ptr
+ %base = load i16 , i16 *%ptr
%ext = zext i16 %base to i32
%full = or i32 %ext, 1065353216
%res = bitcast i32 %full to float
@@ -38,7 +38,7 @@ define void @f3(float %val, i8 *%ptr, i32 %which) {
; CHECK: br %r14
%int = bitcast float %val to i32
%trunc = trunc i32 %int to i8
- %old = load i8 *%ptr
+ %old = load i8 , i8 *%ptr
%cmp = icmp eq i32 %which, 0
%res = select i1 %cmp, i8 %trunc, i8 %old
store i8 %res, i8 *%ptr
@@ -54,7 +54,7 @@ define void @f4(float %val, i16 *%ptr, i32 %which) {
; CHECK: br %r14
%int = bitcast float %val to i32
%trunc = trunc i32 %int to i16
- %old = load i16 *%ptr
+ %old = load i16 , i16 *%ptr
%cmp = icmp eq i32 %which, 0
%res = select i1 %cmp, i16 %trunc, i16 %old
store i16 %res, i16 *%ptr
diff --git a/test/CodeGen/SystemZ/fp-move-10.ll b/test/CodeGen/SystemZ/fp-move-10.ll
new file mode 100644
index 000000000000..602397d58a8d
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-move-10.ll
@@ -0,0 +1,61 @@
+; Test moves between FPRs and GPRs for z13 and above.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Check that moves from i32s to floats use a low GR32 and vector operation.
+define float @f1(i16 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: llh [[REG:%r[0-5]]], 0(%r2)
+; CHECK: oilh [[REG]], 16256
+; CHECK: vlvgf %v0, [[REG]], 0
+; CHECK: br %r14
+ %base = load i16, i16 *%ptr
+ %ext = zext i16 %base to i32
+ %full = or i32 %ext, 1065353216
+ %res = bitcast i32 %full to float
+ ret float %res
+}
+
+; Check that moves from floats to i32s use a low GR32 and vector operation.
+define void @f2(float %val, i8 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: vlgvf [[REG:%r[0-5]]], %v0, 0
+; CHECK: stc [[REG]], 0(%r2)
+; CHECK: br %r14
+ %res = bitcast float %val to i32
+ %trunc = trunc i32 %res to i8
+ store i8 %trunc, i8 *%ptr
+ ret void
+}
+
+; Like f2, but with a conditional store.
+define void @f3(float %val, i8 *%ptr, i32 %which) {
+; CHECK-LABEL: f3:
+; CHECK-DAG: cijlh %r3, 0,
+; CHECK-DAG: vlgvf [[REG:%r[0-5]]], %v0, 0
+; CHECK: stc [[REG]], 0(%r2)
+; CHECK: br %r14
+ %int = bitcast float %val to i32
+ %trunc = trunc i32 %int to i8
+ %old = load i8, i8 *%ptr
+ %cmp = icmp eq i32 %which, 0
+ %res = select i1 %cmp, i8 %trunc, i8 %old
+ store i8 %res, i8 *%ptr
+ ret void
+}
+
+; ...and again with 16-bit memory.
+define void @f4(float %val, i16 *%ptr, i32 %which) {
+; CHECK-LABEL: f4:
+; CHECK-DAG: cijlh %r3, 0,
+; CHECK-DAG: vlgvf [[REG:%r[0-5]]], %v0, 0
+; CHECK: sth [[REG]], 0(%r2)
+; CHECK: br %r14
+ %int = bitcast float %val to i32
+ %trunc = trunc i32 %int to i16
+ %old = load i16, i16 *%ptr
+ %cmp = icmp eq i32 %which, 0
+ %res = select i1 %cmp, i16 %trunc, i16 %old
+ store i16 %res, i16 *%ptr
+ ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-move-11.ll b/test/CodeGen/SystemZ/fp-move-11.ll
new file mode 100644
index 000000000000..ce45019425cb
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-move-11.ll
@@ -0,0 +1,110 @@
+; Test 32-bit floating-point loads for z13.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test that we use LDE instead of LE - low end of the LE range.
+define float @f1(float *%src) {
+; CHECK-LABEL: f1:
+; CHECK: lde %f0, 0(%r2)
+; CHECK: br %r14
+ %val = load float, float *%src
+ ret float %val
+}
+
+; Test that we use LDE instead of LE - high end of the LE range.
+define float @f2(float *%src) {
+; CHECK-LABEL: f2:
+; CHECK: lde %f0, 4092(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%src, i64 1023
+ %val = load float, float *%ptr
+ ret float %val
+}
+
+; Check the next word up, which should use LEY instead of LDE.
+define float @f3(float *%src) {
+; CHECK-LABEL: f3:
+; CHECK: ley %f0, 4096(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%src, i64 1024
+ %val = load float, float *%ptr
+ ret float %val
+}
+
+; Check the high end of the aligned LEY range.
+define float @f4(float *%src) {
+; CHECK-LABEL: f4:
+; CHECK: ley %f0, 524284(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%src, i64 131071
+ %val = load float, float *%ptr
+ ret float %val
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f5(float *%src) {
+; CHECK-LABEL: f5:
+; CHECK: agfi %r2, 524288
+; CHECK: lde %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%src, i64 131072
+ %val = load float, float *%ptr
+ ret float %val
+}
+
+; Check the high end of the negative aligned LEY range.
+define float @f6(float *%src) {
+; CHECK-LABEL: f6:
+; CHECK: ley %f0, -4(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%src, i64 -1
+ %val = load float, float *%ptr
+ ret float %val
+}
+
+; Check the low end of the LEY range.
+define float @f7(float *%src) {
+; CHECK-LABEL: f7:
+; CHECK: ley %f0, -524288(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%src, i64 -131072
+ %val = load float, float *%ptr
+ ret float %val
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f8(float *%src) {
+; CHECK-LABEL: f8:
+; CHECK: agfi %r2, -524292
+; CHECK: lde %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%src, i64 -131073
+ %val = load float, float *%ptr
+ ret float %val
+}
+
+; Check that LDE allows an index.
+define float @f9(i64 %src, i64 %index) {
+; CHECK-LABEL: f9:
+; CHECK: lde %f0, 4092({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+ %add1 = add i64 %src, %index
+ %add2 = add i64 %add1, 4092
+ %ptr = inttoptr i64 %add2 to float *
+ %val = load float, float *%ptr
+ ret float %val
+}
+
+; Check that LEY allows an index.
+define float @f10(i64 %src, i64 %index) {
+; CHECK-LABEL: f10:
+; CHECK: ley %f0, 4096({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+ %add1 = add i64 %src, %index
+ %add2 = add i64 %add1, 4096
+ %ptr = inttoptr i64 %add2 to float *
+ %val = load float, float *%ptr
+ ret float %val
+}
diff --git a/test/CodeGen/SystemZ/fp-mul-01.ll b/test/CodeGen/SystemZ/fp-mul-01.ll
index 7562d6bf071b..3b72d25e0b5c 100644
--- a/test/CodeGen/SystemZ/fp-mul-01.ll
+++ b/test/CodeGen/SystemZ/fp-mul-01.ll
@@ -18,7 +18,7 @@ define float @f2(float %f1, float *%ptr) {
; CHECK-LABEL: f2:
; CHECK: meeb %f0, 0(%r2)
; CHECK: br %r14
- %f2 = load float *%ptr
+ %f2 = load float , float *%ptr
%res = fmul float %f1, %f2
ret float %res
}
@@ -28,8 +28,8 @@ define float @f3(float %f1, float *%base) {
; CHECK-LABEL: f3:
; CHECK: meeb %f0, 4092(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1023
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1023
+ %f2 = load float , float *%ptr
%res = fmul float %f1, %f2
ret float %res
}
@@ -41,8 +41,8 @@ define float @f4(float %f1, float *%base) {
; CHECK: aghi %r2, 4096
; CHECK: meeb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1024
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1024
+ %f2 = load float , float *%ptr
%res = fmul float %f1, %f2
ret float %res
}
@@ -53,8 +53,8 @@ define float @f5(float %f1, float *%base) {
; CHECK: aghi %r2, -4
; CHECK: meeb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 -1
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 -1
+ %f2 = load float , float *%ptr
%res = fmul float %f1, %f2
ret float %res
}
@@ -65,9 +65,9 @@ define float @f6(float %f1, float *%base, i64 %index) {
; CHECK: sllg %r1, %r3, 2
; CHECK: meeb %f0, 400(%r1,%r2)
; CHECK: br %r14
- %ptr1 = getelementptr float *%base, i64 %index
- %ptr2 = getelementptr float *%ptr1, i64 100
- %f2 = load float *%ptr2
+ %ptr1 = getelementptr float, float *%base, i64 %index
+ %ptr2 = getelementptr float, float *%ptr1, i64 100
+ %f2 = load float , float *%ptr2
%res = fmul float %f1, %f2
ret float %res
}
@@ -78,28 +78,28 @@ define float @f7(float *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: meeb %f0, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr float *%ptr0, i64 2
- %ptr2 = getelementptr float *%ptr0, i64 4
- %ptr3 = getelementptr float *%ptr0, i64 6
- %ptr4 = getelementptr float *%ptr0, i64 8
- %ptr5 = getelementptr float *%ptr0, i64 10
- %ptr6 = getelementptr float *%ptr0, i64 12
- %ptr7 = getelementptr float *%ptr0, i64 14
- %ptr8 = getelementptr float *%ptr0, i64 16
- %ptr9 = getelementptr float *%ptr0, i64 18
- %ptr10 = getelementptr float *%ptr0, i64 20
+ %ptr1 = getelementptr float, float *%ptr0, i64 2
+ %ptr2 = getelementptr float, float *%ptr0, i64 4
+ %ptr3 = getelementptr float, float *%ptr0, i64 6
+ %ptr4 = getelementptr float, float *%ptr0, i64 8
+ %ptr5 = getelementptr float, float *%ptr0, i64 10
+ %ptr6 = getelementptr float, float *%ptr0, i64 12
+ %ptr7 = getelementptr float, float *%ptr0, i64 14
+ %ptr8 = getelementptr float, float *%ptr0, i64 16
+ %ptr9 = getelementptr float, float *%ptr0, i64 18
+ %ptr10 = getelementptr float, float *%ptr0, i64 20
- %val0 = load float *%ptr0
- %val1 = load float *%ptr1
- %val2 = load float *%ptr2
- %val3 = load float *%ptr3
- %val4 = load float *%ptr4
- %val5 = load float *%ptr5
- %val6 = load float *%ptr6
- %val7 = load float *%ptr7
- %val8 = load float *%ptr8
- %val9 = load float *%ptr9
- %val10 = load float *%ptr10
+ %val0 = load float , float *%ptr0
+ %val1 = load float , float *%ptr1
+ %val2 = load float , float *%ptr2
+ %val3 = load float , float *%ptr3
+ %val4 = load float , float *%ptr4
+ %val5 = load float , float *%ptr5
+ %val6 = load float , float *%ptr6
+ %val7 = load float , float *%ptr7
+ %val8 = load float , float *%ptr8
+ %val9 = load float , float *%ptr9
+ %val10 = load float , float *%ptr10
%ret = call float @foo()
diff --git a/test/CodeGen/SystemZ/fp-mul-02.ll b/test/CodeGen/SystemZ/fp-mul-02.ll
index cf4448fd7dd1..8435c3f5d3a1 100644
--- a/test/CodeGen/SystemZ/fp-mul-02.ll
+++ b/test/CodeGen/SystemZ/fp-mul-02.ll
@@ -20,7 +20,7 @@ define double @f2(float %f1, float *%ptr) {
; CHECK-LABEL: f2:
; CHECK: mdeb %f0, 0(%r2)
; CHECK: br %r14
- %f2 = load float *%ptr
+ %f2 = load float , float *%ptr
%f1x = fpext float %f1 to double
%f2x = fpext float %f2 to double
%res = fmul double %f1x, %f2x
@@ -32,8 +32,8 @@ define double @f3(float %f1, float *%base) {
; CHECK-LABEL: f3:
; CHECK: mdeb %f0, 4092(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1023
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1023
+ %f2 = load float , float *%ptr
%f1x = fpext float %f1 to double
%f2x = fpext float %f2 to double
%res = fmul double %f1x, %f2x
@@ -47,8 +47,8 @@ define double @f4(float %f1, float *%base) {
; CHECK: aghi %r2, 4096
; CHECK: mdeb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1024
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1024
+ %f2 = load float , float *%ptr
%f1x = fpext float %f1 to double
%f2x = fpext float %f2 to double
%res = fmul double %f1x, %f2x
@@ -61,8 +61,8 @@ define double @f5(float %f1, float *%base) {
; CHECK: aghi %r2, -4
; CHECK: mdeb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 -1
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 -1
+ %f2 = load float , float *%ptr
%f1x = fpext float %f1 to double
%f2x = fpext float %f2 to double
%res = fmul double %f1x, %f2x
@@ -75,9 +75,9 @@ define double @f6(float %f1, float *%base, i64 %index) {
; CHECK: sllg %r1, %r3, 2
; CHECK: mdeb %f0, 400(%r1,%r2)
; CHECK: br %r14
- %ptr1 = getelementptr float *%base, i64 %index
- %ptr2 = getelementptr float *%ptr1, i64 100
- %f2 = load float *%ptr2
+ %ptr1 = getelementptr float, float *%base, i64 %index
+ %ptr2 = getelementptr float, float *%ptr1, i64 100
+ %f2 = load float , float *%ptr2
%f1x = fpext float %f1 to double
%f2x = fpext float %f2 to double
%res = fmul double %f1x, %f2x
@@ -90,28 +90,28 @@ define float @f7(float *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: mdeb %f0, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr float *%ptr0, i64 2
- %ptr2 = getelementptr float *%ptr0, i64 4
- %ptr3 = getelementptr float *%ptr0, i64 6
- %ptr4 = getelementptr float *%ptr0, i64 8
- %ptr5 = getelementptr float *%ptr0, i64 10
- %ptr6 = getelementptr float *%ptr0, i64 12
- %ptr7 = getelementptr float *%ptr0, i64 14
- %ptr8 = getelementptr float *%ptr0, i64 16
- %ptr9 = getelementptr float *%ptr0, i64 18
- %ptr10 = getelementptr float *%ptr0, i64 20
-
- %val0 = load float *%ptr0
- %val1 = load float *%ptr1
- %val2 = load float *%ptr2
- %val3 = load float *%ptr3
- %val4 = load float *%ptr4
- %val5 = load float *%ptr5
- %val6 = load float *%ptr6
- %val7 = load float *%ptr7
- %val8 = load float *%ptr8
- %val9 = load float *%ptr9
- %val10 = load float *%ptr10
+ %ptr1 = getelementptr float, float *%ptr0, i64 2
+ %ptr2 = getelementptr float, float *%ptr0, i64 4
+ %ptr3 = getelementptr float, float *%ptr0, i64 6
+ %ptr4 = getelementptr float, float *%ptr0, i64 8
+ %ptr5 = getelementptr float, float *%ptr0, i64 10
+ %ptr6 = getelementptr float, float *%ptr0, i64 12
+ %ptr7 = getelementptr float, float *%ptr0, i64 14
+ %ptr8 = getelementptr float, float *%ptr0, i64 16
+ %ptr9 = getelementptr float, float *%ptr0, i64 18
+ %ptr10 = getelementptr float, float *%ptr0, i64 20
+
+ %val0 = load float , float *%ptr0
+ %val1 = load float , float *%ptr1
+ %val2 = load float , float *%ptr2
+ %val3 = load float , float *%ptr3
+ %val4 = load float , float *%ptr4
+ %val5 = load float , float *%ptr5
+ %val6 = load float , float *%ptr6
+ %val7 = load float , float *%ptr7
+ %val8 = load float , float *%ptr8
+ %val9 = load float , float *%ptr9
+ %val10 = load float , float *%ptr10
%frob0 = fadd float %val0, %val0
%frob1 = fadd float %val1, %val1
diff --git a/test/CodeGen/SystemZ/fp-mul-03.ll b/test/CodeGen/SystemZ/fp-mul-03.ll
index 6d296f07d1f2..0d52121f41c6 100644
--- a/test/CodeGen/SystemZ/fp-mul-03.ll
+++ b/test/CodeGen/SystemZ/fp-mul-03.ll
@@ -1,6 +1,8 @@
; Test multiplication of two f64s, producing an f64 result.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @foo()
@@ -18,7 +20,7 @@ define double @f2(double %f1, double *%ptr) {
; CHECK-LABEL: f2:
; CHECK: mdb %f0, 0(%r2)
; CHECK: br %r14
- %f2 = load double *%ptr
+ %f2 = load double , double *%ptr
%res = fmul double %f1, %f2
ret double %res
}
@@ -28,8 +30,8 @@ define double @f3(double %f1, double *%base) {
; CHECK-LABEL: f3:
; CHECK: mdb %f0, 4088(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 511
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 511
+ %f2 = load double , double *%ptr
%res = fmul double %f1, %f2
ret double %res
}
@@ -41,8 +43,8 @@ define double @f4(double %f1, double *%base) {
; CHECK: aghi %r2, 4096
; CHECK: mdb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 512
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 512
+ %f2 = load double , double *%ptr
%res = fmul double %f1, %f2
ret double %res
}
@@ -53,8 +55,8 @@ define double @f5(double %f1, double *%base) {
; CHECK: aghi %r2, -8
; CHECK: mdb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 -1
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 -1
+ %f2 = load double , double *%ptr
%res = fmul double %f1, %f2
ret double %res
}
@@ -65,9 +67,9 @@ define double @f6(double %f1, double *%base, i64 %index) {
; CHECK: sllg %r1, %r3, 3
; CHECK: mdb %f0, 800(%r1,%r2)
; CHECK: br %r14
- %ptr1 = getelementptr double *%base, i64 %index
- %ptr2 = getelementptr double *%ptr1, i64 100
- %f2 = load double *%ptr2
+ %ptr1 = getelementptr double, double *%base, i64 %index
+ %ptr2 = getelementptr double, double *%ptr1, i64 100
+ %f2 = load double , double *%ptr2
%res = fmul double %f1, %f2
ret double %res
}
@@ -76,30 +78,30 @@ define double @f6(double %f1, double *%base, i64 %index) {
define double @f7(double *%ptr0) {
; CHECK-LABEL: f7:
; CHECK: brasl %r14, foo@PLT
-; CHECK: mdb %f0, 160(%r15)
+; CHECK-SCALAR: mdb %f0, 160(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr double *%ptr0, i64 2
- %ptr2 = getelementptr double *%ptr0, i64 4
- %ptr3 = getelementptr double *%ptr0, i64 6
- %ptr4 = getelementptr double *%ptr0, i64 8
- %ptr5 = getelementptr double *%ptr0, i64 10
- %ptr6 = getelementptr double *%ptr0, i64 12
- %ptr7 = getelementptr double *%ptr0, i64 14
- %ptr8 = getelementptr double *%ptr0, i64 16
- %ptr9 = getelementptr double *%ptr0, i64 18
- %ptr10 = getelementptr double *%ptr0, i64 20
+ %ptr1 = getelementptr double, double *%ptr0, i64 2
+ %ptr2 = getelementptr double, double *%ptr0, i64 4
+ %ptr3 = getelementptr double, double *%ptr0, i64 6
+ %ptr4 = getelementptr double, double *%ptr0, i64 8
+ %ptr5 = getelementptr double, double *%ptr0, i64 10
+ %ptr6 = getelementptr double, double *%ptr0, i64 12
+ %ptr7 = getelementptr double, double *%ptr0, i64 14
+ %ptr8 = getelementptr double, double *%ptr0, i64 16
+ %ptr9 = getelementptr double, double *%ptr0, i64 18
+ %ptr10 = getelementptr double, double *%ptr0, i64 20
- %val0 = load double *%ptr0
- %val1 = load double *%ptr1
- %val2 = load double *%ptr2
- %val3 = load double *%ptr3
- %val4 = load double *%ptr4
- %val5 = load double *%ptr5
- %val6 = load double *%ptr6
- %val7 = load double *%ptr7
- %val8 = load double *%ptr8
- %val9 = load double *%ptr9
- %val10 = load double *%ptr10
+ %val0 = load double , double *%ptr0
+ %val1 = load double , double *%ptr1
+ %val2 = load double , double *%ptr2
+ %val3 = load double , double *%ptr3
+ %val4 = load double , double *%ptr4
+ %val5 = load double , double *%ptr5
+ %val6 = load double , double *%ptr6
+ %val7 = load double , double *%ptr7
+ %val8 = load double , double *%ptr8
+ %val9 = load double , double *%ptr9
+ %val10 = load double , double *%ptr10
%ret = call double @foo()
diff --git a/test/CodeGen/SystemZ/fp-mul-04.ll b/test/CodeGen/SystemZ/fp-mul-04.ll
index 3c4325e6cbbb..4226a3f61dff 100644
--- a/test/CodeGen/SystemZ/fp-mul-04.ll
+++ b/test/CodeGen/SystemZ/fp-mul-04.ll
@@ -27,7 +27,7 @@ define void @f2(double %f1, double *%ptr, fp128 *%dst) {
; CHECK: std %f0, 0(%r3)
; CHECK: std %f2, 8(%r3)
; CHECK: br %r14
- %f2 = load double *%ptr
+ %f2 = load double , double *%ptr
%f1x = fpext double %f1 to fp128
%f2x = fpext double %f2 to fp128
%res = fmul fp128 %f1x, %f2x
@@ -42,8 +42,8 @@ define void @f3(double %f1, double *%base, fp128 *%dst) {
; CHECK: std %f0, 0(%r3)
; CHECK: std %f2, 8(%r3)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 511
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 511
+ %f2 = load double , double *%ptr
%f1x = fpext double %f1 to fp128
%f2x = fpext double %f2 to fp128
%res = fmul fp128 %f1x, %f2x
@@ -60,8 +60,8 @@ define void @f4(double %f1, double *%base, fp128 *%dst) {
; CHECK: std %f0, 0(%r3)
; CHECK: std %f2, 8(%r3)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 512
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 512
+ %f2 = load double , double *%ptr
%f1x = fpext double %f1 to fp128
%f2x = fpext double %f2 to fp128
%res = fmul fp128 %f1x, %f2x
@@ -77,8 +77,8 @@ define void @f5(double %f1, double *%base, fp128 *%dst) {
; CHECK: std %f0, 0(%r3)
; CHECK: std %f2, 8(%r3)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 -1
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 -1
+ %f2 = load double , double *%ptr
%f1x = fpext double %f1 to fp128
%f2x = fpext double %f2 to fp128
%res = fmul fp128 %f1x, %f2x
@@ -94,9 +94,9 @@ define void @f6(double %f1, double *%base, i64 %index, fp128 *%dst) {
; CHECK: std %f0, 0(%r4)
; CHECK: std %f2, 8(%r4)
; CHECK: br %r14
- %ptr1 = getelementptr double *%base, i64 %index
- %ptr2 = getelementptr double *%ptr1, i64 100
- %f2 = load double *%ptr2
+ %ptr1 = getelementptr double, double *%base, i64 %index
+ %ptr2 = getelementptr double, double *%ptr1, i64 100
+ %f2 = load double , double *%ptr2
%f1x = fpext double %f1 to fp128
%f2x = fpext double %f2 to fp128
%res = fmul fp128 %f1x, %f2x
@@ -110,28 +110,28 @@ define double @f7(double *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: mxdb %f0, 160(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr double *%ptr0, i64 2
- %ptr2 = getelementptr double *%ptr0, i64 4
- %ptr3 = getelementptr double *%ptr0, i64 6
- %ptr4 = getelementptr double *%ptr0, i64 8
- %ptr5 = getelementptr double *%ptr0, i64 10
- %ptr6 = getelementptr double *%ptr0, i64 12
- %ptr7 = getelementptr double *%ptr0, i64 14
- %ptr8 = getelementptr double *%ptr0, i64 16
- %ptr9 = getelementptr double *%ptr0, i64 18
- %ptr10 = getelementptr double *%ptr0, i64 20
-
- %val0 = load double *%ptr0
- %val1 = load double *%ptr1
- %val2 = load double *%ptr2
- %val3 = load double *%ptr3
- %val4 = load double *%ptr4
- %val5 = load double *%ptr5
- %val6 = load double *%ptr6
- %val7 = load double *%ptr7
- %val8 = load double *%ptr8
- %val9 = load double *%ptr9
- %val10 = load double *%ptr10
+ %ptr1 = getelementptr double, double *%ptr0, i64 2
+ %ptr2 = getelementptr double, double *%ptr0, i64 4
+ %ptr3 = getelementptr double, double *%ptr0, i64 6
+ %ptr4 = getelementptr double, double *%ptr0, i64 8
+ %ptr5 = getelementptr double, double *%ptr0, i64 10
+ %ptr6 = getelementptr double, double *%ptr0, i64 12
+ %ptr7 = getelementptr double, double *%ptr0, i64 14
+ %ptr8 = getelementptr double, double *%ptr0, i64 16
+ %ptr9 = getelementptr double, double *%ptr0, i64 18
+ %ptr10 = getelementptr double, double *%ptr0, i64 20
+
+ %val0 = load double , double *%ptr0
+ %val1 = load double , double *%ptr1
+ %val2 = load double , double *%ptr2
+ %val3 = load double , double *%ptr3
+ %val4 = load double , double *%ptr4
+ %val5 = load double , double *%ptr5
+ %val6 = load double , double *%ptr6
+ %val7 = load double , double *%ptr7
+ %val8 = load double , double *%ptr8
+ %val9 = load double , double *%ptr9
+ %val10 = load double , double *%ptr10
%frob0 = fadd double %val0, %val0
%frob1 = fadd double %val1, %val1
diff --git a/test/CodeGen/SystemZ/fp-mul-05.ll b/test/CodeGen/SystemZ/fp-mul-05.ll
index 0be1fe8b41a0..48d0dcdcaff4 100644
--- a/test/CodeGen/SystemZ/fp-mul-05.ll
+++ b/test/CodeGen/SystemZ/fp-mul-05.ll
@@ -12,7 +12,7 @@ define void @f1(fp128 *%ptr, float %f2) {
; CHECK: std %f1, 0(%r2)
; CHECK: std %f3, 8(%r2)
; CHECK: br %r14
- %f1 = load fp128 *%ptr
+ %f1 = load fp128 , fp128 *%ptr
%f2x = fpext float %f2 to fp128
%diff = fmul fp128 %f1, %f2x
store fp128 %diff, fp128 *%ptr
diff --git a/test/CodeGen/SystemZ/fp-mul-06.ll b/test/CodeGen/SystemZ/fp-mul-06.ll
index 3f631a68b575..896fafecbdaf 100644
--- a/test/CodeGen/SystemZ/fp-mul-06.ll
+++ b/test/CodeGen/SystemZ/fp-mul-06.ll
@@ -16,7 +16,7 @@ define float @f2(float %f1, float *%ptr, float %acc) {
; CHECK: maeb %f2, %f0, 0(%r2)
; CHECK: ler %f0, %f2
; CHECK: br %r14
- %f2 = load float *%ptr
+ %f2 = load float , float *%ptr
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
ret float %res
}
@@ -26,8 +26,8 @@ define float @f3(float %f1, float *%base, float %acc) {
; CHECK: maeb %f2, %f0, 4092(%r2)
; CHECK: ler %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1023
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1023
+ %f2 = load float , float *%ptr
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
ret float %res
}
@@ -41,8 +41,8 @@ define float @f4(float %f1, float *%base, float %acc) {
; CHECK: maeb %f2, %f0, 0(%r2)
; CHECK: ler %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1024
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1024
+ %f2 = load float , float *%ptr
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
ret float %res
}
@@ -56,8 +56,8 @@ define float @f5(float %f1, float *%base, float %acc) {
; CHECK: maeb %f2, %f0, 0(%r2)
; CHECK: ler %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 -1
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 -1
+ %f2 = load float , float *%ptr
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
ret float %res
}
@@ -68,8 +68,8 @@ define float @f6(float %f1, float *%base, i64 %index, float %acc) {
; CHECK: maeb %f2, %f0, 0(%r1,%r2)
; CHECK: ler %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 %index
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 %index
+ %f2 = load float , float *%ptr
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
ret float %res
}
@@ -81,8 +81,8 @@ define float @f7(float %f1, float *%base, i64 %index, float %acc) {
; CHECK: ler %f0, %f2
; CHECK: br %r14
%index2 = add i64 %index, 1023
- %ptr = getelementptr float *%base, i64 %index2
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 %index2
+ %f2 = load float , float *%ptr
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
ret float %res
}
@@ -95,8 +95,8 @@ define float @f8(float %f1, float *%base, i64 %index, float %acc) {
; CHECK: ler %f0, %f2
; CHECK: br %r14
%index2 = add i64 %index, 1024
- %ptr = getelementptr float *%base, i64 %index2
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 %index2
+ %f2 = load float , float *%ptr
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
ret float %res
}
diff --git a/test/CodeGen/SystemZ/fp-mul-07.ll b/test/CodeGen/SystemZ/fp-mul-07.ll
index e4f590447215..e0b4a5c5d780 100644
--- a/test/CodeGen/SystemZ/fp-mul-07.ll
+++ b/test/CodeGen/SystemZ/fp-mul-07.ll
@@ -1,11 +1,15 @@
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
declare double @llvm.fma.f64(double %f1, double %f2, double %f3)
define double @f1(double %f1, double %f2, double %acc) {
; CHECK-LABEL: f1:
-; CHECK: madbr %f4, %f0, %f2
-; CHECK: ldr %f0, %f4
+; CHECK-SCALAR: madbr %f4, %f0, %f2
+; CHECK-SCALAR: ldr %f0, %f4
+; CHECK-VECTOR: wfmadb %f0, %f0, %f2, %f4
; CHECK: br %r14
%res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
ret double %res
@@ -16,7 +20,7 @@ define double @f2(double %f1, double *%ptr, double %acc) {
; CHECK: madb %f2, %f0, 0(%r2)
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %f2 = load double *%ptr
+ %f2 = load double , double *%ptr
%res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
ret double %res
}
@@ -26,8 +30,8 @@ define double @f3(double %f1, double *%base, double %acc) {
; CHECK: madb %f2, %f0, 4088(%r2)
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 511
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 511
+ %f2 = load double , double *%ptr
%res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
ret double %res
}
@@ -41,8 +45,8 @@ define double @f4(double %f1, double *%base, double %acc) {
; CHECK: madb %f2, %f0, 0(%r2)
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 512
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 512
+ %f2 = load double , double *%ptr
%res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
ret double %res
}
@@ -56,8 +60,8 @@ define double @f5(double %f1, double *%base, double %acc) {
; CHECK: madb %f2, %f0, 0(%r2)
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 -1
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 -1
+ %f2 = load double , double *%ptr
%res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
ret double %res
}
@@ -68,8 +72,8 @@ define double @f6(double %f1, double *%base, i64 %index, double %acc) {
; CHECK: madb %f2, %f0, 0(%r1,%r2)
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 %index
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 %index
+ %f2 = load double , double *%ptr
%res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
ret double %res
}
@@ -81,8 +85,8 @@ define double @f7(double %f1, double *%base, i64 %index, double %acc) {
; CHECK: ldr %f0, %f2
; CHECK: br %r14
%index2 = add i64 %index, 511
- %ptr = getelementptr double *%base, i64 %index2
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 %index2
+ %f2 = load double , double *%ptr
%res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
ret double %res
}
@@ -95,8 +99,8 @@ define double @f8(double %f1, double *%base, i64 %index, double %acc) {
; CHECK: ldr %f0, %f2
; CHECK: br %r14
%index2 = add i64 %index, 512
- %ptr = getelementptr double *%base, i64 %index2
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 %index2
+ %f2 = load double , double *%ptr
%res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
ret double %res
}
diff --git a/test/CodeGen/SystemZ/fp-mul-08.ll b/test/CodeGen/SystemZ/fp-mul-08.ll
index ab5fcb2cbefd..5e5538bfacc9 100644
--- a/test/CodeGen/SystemZ/fp-mul-08.ll
+++ b/test/CodeGen/SystemZ/fp-mul-08.ll
@@ -17,7 +17,7 @@ define float @f2(float %f1, float *%ptr, float %acc) {
; CHECK: mseb %f2, %f0, 0(%r2)
; CHECK: ler %f0, %f2
; CHECK: br %r14
- %f2 = load float *%ptr
+ %f2 = load float , float *%ptr
%negacc = fsub float -0.0, %acc
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
ret float %res
@@ -28,8 +28,8 @@ define float @f3(float %f1, float *%base, float %acc) {
; CHECK: mseb %f2, %f0, 4092(%r2)
; CHECK: ler %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1023
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1023
+ %f2 = load float , float *%ptr
%negacc = fsub float -0.0, %acc
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
ret float %res
@@ -44,8 +44,8 @@ define float @f4(float %f1, float *%base, float %acc) {
; CHECK: mseb %f2, %f0, 0(%r2)
; CHECK: ler %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1024
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1024
+ %f2 = load float , float *%ptr
%negacc = fsub float -0.0, %acc
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
ret float %res
@@ -60,8 +60,8 @@ define float @f5(float %f1, float *%base, float %acc) {
; CHECK: mseb %f2, %f0, 0(%r2)
; CHECK: ler %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 -1
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 -1
+ %f2 = load float , float *%ptr
%negacc = fsub float -0.0, %acc
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
ret float %res
@@ -73,8 +73,8 @@ define float @f6(float %f1, float *%base, i64 %index, float %acc) {
; CHECK: mseb %f2, %f0, 0(%r1,%r2)
; CHECK: ler %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 %index
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 %index
+ %f2 = load float , float *%ptr
%negacc = fsub float -0.0, %acc
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
ret float %res
@@ -87,8 +87,8 @@ define float @f7(float %f1, float *%base, i64 %index, float %acc) {
; CHECK: ler %f0, %f2
; CHECK: br %r14
%index2 = add i64 %index, 1023
- %ptr = getelementptr float *%base, i64 %index2
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 %index2
+ %f2 = load float , float *%ptr
%negacc = fsub float -0.0, %acc
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
ret float %res
@@ -102,8 +102,8 @@ define float @f8(float %f1, float *%base, i64 %index, float %acc) {
; CHECK: ler %f0, %f2
; CHECK: br %r14
%index2 = add i64 %index, 1024
- %ptr = getelementptr float *%base, i64 %index2
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 %index2
+ %f2 = load float , float *%ptr
%negacc = fsub float -0.0, %acc
%res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
ret float %res
diff --git a/test/CodeGen/SystemZ/fp-mul-09.ll b/test/CodeGen/SystemZ/fp-mul-09.ll
index 7e740968a8c7..927a8064823c 100644
--- a/test/CodeGen/SystemZ/fp-mul-09.ll
+++ b/test/CodeGen/SystemZ/fp-mul-09.ll
@@ -1,11 +1,15 @@
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
declare double @llvm.fma.f64(double %f1, double %f2, double %f3)
define double @f1(double %f1, double %f2, double %acc) {
; CHECK-LABEL: f1:
-; CHECK: msdbr %f4, %f0, %f2
-; CHECK: ldr %f0, %f4
+; CHECK-SCALAR: msdbr %f4, %f0, %f2
+; CHECK-SCALAR: ldr %f0, %f4
+; CHECK-VECTOR: wfmsdb %f0, %f0, %f2, %f4
; CHECK: br %r14
%negacc = fsub double -0.0, %acc
%res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
@@ -17,7 +21,7 @@ define double @f2(double %f1, double *%ptr, double %acc) {
; CHECK: msdb %f2, %f0, 0(%r2)
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %f2 = load double *%ptr
+ %f2 = load double , double *%ptr
%negacc = fsub double -0.0, %acc
%res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
ret double %res
@@ -28,8 +32,8 @@ define double @f3(double %f1, double *%base, double %acc) {
; CHECK: msdb %f2, %f0, 4088(%r2)
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 511
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 511
+ %f2 = load double , double *%ptr
%negacc = fsub double -0.0, %acc
%res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
ret double %res
@@ -44,8 +48,8 @@ define double @f4(double %f1, double *%base, double %acc) {
; CHECK: msdb %f2, %f0, 0(%r2)
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 512
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 512
+ %f2 = load double , double *%ptr
%negacc = fsub double -0.0, %acc
%res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
ret double %res
@@ -60,8 +64,8 @@ define double @f5(double %f1, double *%base, double %acc) {
; CHECK: msdb %f2, %f0, 0(%r2)
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 -1
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 -1
+ %f2 = load double , double *%ptr
%negacc = fsub double -0.0, %acc
%res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
ret double %res
@@ -73,8 +77,8 @@ define double @f6(double %f1, double *%base, i64 %index, double %acc) {
; CHECK: msdb %f2, %f0, 0(%r1,%r2)
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 %index
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 %index
+ %f2 = load double , double *%ptr
%negacc = fsub double -0.0, %acc
%res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
ret double %res
@@ -87,8 +91,8 @@ define double @f7(double %f1, double *%base, i64 %index, double %acc) {
; CHECK: ldr %f0, %f2
; CHECK: br %r14
%index2 = add i64 %index, 511
- %ptr = getelementptr double *%base, i64 %index2
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 %index2
+ %f2 = load double , double *%ptr
%negacc = fsub double -0.0, %acc
%res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
ret double %res
@@ -102,8 +106,8 @@ define double @f8(double %f1, double *%base, i64 %index, double %acc) {
; CHECK: ldr %f0, %f2
; CHECK: br %r14
%index2 = add i64 %index, 512
- %ptr = getelementptr double *%base, i64 %index2
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 %index2
+ %f2 = load double , double *%ptr
%negacc = fsub double -0.0, %acc
%res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
ret double %res
diff --git a/test/CodeGen/SystemZ/fp-neg-01.ll b/test/CodeGen/SystemZ/fp-neg-01.ll
index 1cc6d816fee3..fe2e5f67cf5b 100644
--- a/test/CodeGen/SystemZ/fp-neg-01.ll
+++ b/test/CodeGen/SystemZ/fp-neg-01.ll
@@ -1,6 +1,7 @@
; Test floating-point negation.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; Test f32.
define float @f1(float %f) {
@@ -28,10 +29,10 @@ define void @f3(fp128 *%ptr, fp128 *%ptr2) {
; CHECK: lcxbr
; CHECK: dxbr
; CHECK: br %r14
- %orig = load fp128 *%ptr
+ %orig = load fp128 , fp128 *%ptr
%negzero = fpext float -0.0 to fp128
%neg = fsub fp128 0xL00000000000000008000000000000000, %orig
- %op2 = load fp128 *%ptr2
+ %op2 = load fp128 , fp128 *%ptr2
%res = fdiv fp128 %neg, %op2
store fp128 %res, fp128 *%ptr
ret void
diff --git a/test/CodeGen/SystemZ/fp-round-01.ll b/test/CodeGen/SystemZ/fp-round-01.ll
index 565db5ad4f51..bdec02f19c83 100644
--- a/test/CodeGen/SystemZ/fp-round-01.ll
+++ b/test/CodeGen/SystemZ/fp-round-01.ll
@@ -28,7 +28,7 @@ define void @f3(fp128 *%ptr) {
; CHECK-LABEL: f3:
; CHECK: fixbr %f0, 0, %f0
; CHECK: br %r14
- %src = load fp128 *%ptr
+ %src = load fp128 , fp128 *%ptr
%res = call fp128 @llvm.rint.f128(fp128 %src)
store fp128 %res, fp128 *%ptr
ret void
diff --git a/test/CodeGen/SystemZ/fp-round-02.ll b/test/CodeGen/SystemZ/fp-round-02.ll
index d79c9c47050a..428261478dc8 100644
--- a/test/CodeGen/SystemZ/fp-round-02.ll
+++ b/test/CodeGen/SystemZ/fp-round-02.ll
@@ -1,6 +1,9 @@
; Test rounding functions for z196 and above.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
; Test rint for f32.
declare float @llvm.rint.f32(float %f)
@@ -16,7 +19,8 @@ define float @f1(float %f) {
declare double @llvm.rint.f64(double %f)
define double @f2(double %f) {
; CHECK-LABEL: f2:
-; CHECK: fidbr %f0, 0, %f0
+; CHECK-SCALAR: fidbr %f0, 0, %f0
+; CHECK-VECTOR: fidbra %f0, 0, %f0, 0
; CHECK: br %r14
%res = call double @llvm.rint.f64(double %f)
ret double %res
@@ -28,7 +32,7 @@ define void @f3(fp128 *%ptr) {
; CHECK-LABEL: f3:
; CHECK: fixbr %f0, 0, %f0
; CHECK: br %r14
- %src = load fp128 *%ptr
+ %src = load fp128 , fp128 *%ptr
%res = call fp128 @llvm.rint.f128(fp128 %src)
store fp128 %res, fp128 *%ptr
ret void
@@ -60,7 +64,7 @@ define void @f6(fp128 *%ptr) {
; CHECK-LABEL: f6:
; CHECK: fixbra %f0, 0, %f0, 4
; CHECK: br %r14
- %src = load fp128 *%ptr
+ %src = load fp128 , fp128 *%ptr
%res = call fp128 @llvm.nearbyint.f128(fp128 %src)
store fp128 %res, fp128 *%ptr
ret void
@@ -92,7 +96,7 @@ define void @f9(fp128 *%ptr) {
; CHECK-LABEL: f9:
; CHECK: fixbra %f0, 7, %f0, 4
; CHECK: br %r14
- %src = load fp128 *%ptr
+ %src = load fp128 , fp128 *%ptr
%res = call fp128 @llvm.floor.f128(fp128 %src)
store fp128 %res, fp128 *%ptr
ret void
@@ -124,7 +128,7 @@ define void @f12(fp128 *%ptr) {
; CHECK-LABEL: f12:
; CHECK: fixbra %f0, 6, %f0, 4
; CHECK: br %r14
- %src = load fp128 *%ptr
+ %src = load fp128 , fp128 *%ptr
%res = call fp128 @llvm.ceil.f128(fp128 %src)
store fp128 %res, fp128 *%ptr
ret void
@@ -156,7 +160,7 @@ define void @f15(fp128 *%ptr) {
; CHECK-LABEL: f15:
; CHECK: fixbra %f0, 5, %f0, 4
; CHECK: br %r14
- %src = load fp128 *%ptr
+ %src = load fp128 , fp128 *%ptr
%res = call fp128 @llvm.trunc.f128(fp128 %src)
store fp128 %res, fp128 *%ptr
ret void
@@ -188,7 +192,7 @@ define void @f18(fp128 *%ptr) {
; CHECK-LABEL: f18:
; CHECK: fixbra %f0, 1, %f0, 4
; CHECK: br %r14
- %src = load fp128 *%ptr
+ %src = load fp128 , fp128 *%ptr
%res = call fp128 @llvm.round.f128(fp128 %src)
store fp128 %res, fp128 *%ptr
ret void
diff --git a/test/CodeGen/SystemZ/fp-sqrt-01.ll b/test/CodeGen/SystemZ/fp-sqrt-01.ll
index 7465af456b83..e8bf65bdc981 100644
--- a/test/CodeGen/SystemZ/fp-sqrt-01.ll
+++ b/test/CodeGen/SystemZ/fp-sqrt-01.ll
@@ -19,7 +19,7 @@ define float @f2(float *%ptr) {
; CHECK-LABEL: f2:
; CHECK: sqeb %f0, 0(%r2)
; CHECK: br %r14
- %val = load float *%ptr
+ %val = load float , float *%ptr
%res = call float @llvm.sqrt.f32(float %val)
ret float %res
}
@@ -29,8 +29,8 @@ define float @f3(float *%base) {
; CHECK-LABEL: f3:
; CHECK: sqeb %f0, 4092(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1023
- %val = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1023
+ %val = load float , float *%ptr
%res = call float @llvm.sqrt.f32(float %val)
ret float %res
}
@@ -42,8 +42,8 @@ define float @f4(float *%base) {
; CHECK: aghi %r2, 4096
; CHECK: sqeb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1024
- %val = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1024
+ %val = load float , float *%ptr
%res = call float @llvm.sqrt.f32(float %val)
ret float %res
}
@@ -54,8 +54,8 @@ define float @f5(float *%base) {
; CHECK: aghi %r2, -4
; CHECK: sqeb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 -1
- %val = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 -1
+ %val = load float , float *%ptr
%res = call float @llvm.sqrt.f32(float %val)
ret float %res
}
@@ -66,9 +66,9 @@ define float @f6(float *%base, i64 %index) {
; CHECK: sllg %r1, %r3, 2
; CHECK: sqeb %f0, 400(%r1,%r2)
; CHECK: br %r14
- %ptr1 = getelementptr float *%base, i64 %index
- %ptr2 = getelementptr float *%ptr1, i64 100
- %val = load float *%ptr2
+ %ptr1 = getelementptr float, float *%base, i64 %index
+ %ptr2 = getelementptr float, float *%ptr1, i64 100
+ %val = load float , float *%ptr2
%res = call float @llvm.sqrt.f32(float %val)
ret float %res
}
@@ -79,23 +79,23 @@ define void @f7(float *%ptr) {
; CHECK-LABEL: f7:
; CHECK: sqeb {{%f[0-9]+}}, 16{{[04]}}(%r15)
; CHECK: br %r14
- %val0 = load volatile float *%ptr
- %val1 = load volatile float *%ptr
- %val2 = load volatile float *%ptr
- %val3 = load volatile float *%ptr
- %val4 = load volatile float *%ptr
- %val5 = load volatile float *%ptr
- %val6 = load volatile float *%ptr
- %val7 = load volatile float *%ptr
- %val8 = load volatile float *%ptr
- %val9 = load volatile float *%ptr
- %val10 = load volatile float *%ptr
- %val11 = load volatile float *%ptr
- %val12 = load volatile float *%ptr
- %val13 = load volatile float *%ptr
- %val14 = load volatile float *%ptr
- %val15 = load volatile float *%ptr
- %val16 = load volatile float *%ptr
+ %val0 = load volatile float , float *%ptr
+ %val1 = load volatile float , float *%ptr
+ %val2 = load volatile float , float *%ptr
+ %val3 = load volatile float , float *%ptr
+ %val4 = load volatile float , float *%ptr
+ %val5 = load volatile float , float *%ptr
+ %val6 = load volatile float , float *%ptr
+ %val7 = load volatile float , float *%ptr
+ %val8 = load volatile float , float *%ptr
+ %val9 = load volatile float , float *%ptr
+ %val10 = load volatile float , float *%ptr
+ %val11 = load volatile float , float *%ptr
+ %val12 = load volatile float , float *%ptr
+ %val13 = load volatile float , float *%ptr
+ %val14 = load volatile float , float *%ptr
+ %val15 = load volatile float , float *%ptr
+ %val16 = load volatile float , float *%ptr
%sqrt0 = call float @llvm.sqrt.f32(float %val0)
%sqrt1 = call float @llvm.sqrt.f32(float %val1)
diff --git a/test/CodeGen/SystemZ/fp-sqrt-02.ll b/test/CodeGen/SystemZ/fp-sqrt-02.ll
index 66ffd19d6c31..a162466064e8 100644
--- a/test/CodeGen/SystemZ/fp-sqrt-02.ll
+++ b/test/CodeGen/SystemZ/fp-sqrt-02.ll
@@ -1,6 +1,8 @@
; Test 64-bit square root.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @llvm.sqrt.f64(double %f)
declare double @sqrt(double)
@@ -19,7 +21,7 @@ define double @f2(double *%ptr) {
; CHECK-LABEL: f2:
; CHECK: sqdb %f0, 0(%r2)
; CHECK: br %r14
- %val = load double *%ptr
+ %val = load double , double *%ptr
%res = call double @llvm.sqrt.f64(double %val)
ret double %res
}
@@ -29,8 +31,8 @@ define double @f3(double *%base) {
; CHECK-LABEL: f3:
; CHECK: sqdb %f0, 4088(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 511
- %val = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 511
+ %val = load double , double *%ptr
%res = call double @llvm.sqrt.f64(double %val)
ret double %res
}
@@ -42,8 +44,8 @@ define double @f4(double *%base) {
; CHECK: aghi %r2, 4096
; CHECK: sqdb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 512
- %val = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 512
+ %val = load double , double *%ptr
%res = call double @llvm.sqrt.f64(double %val)
ret double %res
}
@@ -54,8 +56,8 @@ define double @f5(double *%base) {
; CHECK: aghi %r2, -8
; CHECK: sqdb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 -1
- %val = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 -1
+ %val = load double , double *%ptr
%res = call double @llvm.sqrt.f64(double %val)
ret double %res
}
@@ -66,9 +68,9 @@ define double @f6(double *%base, i64 %index) {
; CHECK: sllg %r1, %r3, 3
; CHECK: sqdb %f0, 800(%r1,%r2)
; CHECK: br %r14
- %ptr1 = getelementptr double *%base, i64 %index
- %ptr2 = getelementptr double *%ptr1, i64 100
- %val = load double *%ptr2
+ %ptr1 = getelementptr double, double *%base, i64 %index
+ %ptr2 = getelementptr double, double *%ptr1, i64 100
+ %val = load double , double *%ptr2
%res = call double @llvm.sqrt.f64(double %val)
ret double %res
}
@@ -77,25 +79,25 @@ define double @f6(double *%base, i64 %index) {
; to use SQDB if possible.
define void @f7(double *%ptr) {
; CHECK-LABEL: f7:
-; CHECK: sqdb {{%f[0-9]+}}, 160(%r15)
+; CHECK-SCALAR: sqdb {{%f[0-9]+}}, 160(%r15)
; CHECK: br %r14
- %val0 = load volatile double *%ptr
- %val1 = load volatile double *%ptr
- %val2 = load volatile double *%ptr
- %val3 = load volatile double *%ptr
- %val4 = load volatile double *%ptr
- %val5 = load volatile double *%ptr
- %val6 = load volatile double *%ptr
- %val7 = load volatile double *%ptr
- %val8 = load volatile double *%ptr
- %val9 = load volatile double *%ptr
- %val10 = load volatile double *%ptr
- %val11 = load volatile double *%ptr
- %val12 = load volatile double *%ptr
- %val13 = load volatile double *%ptr
- %val14 = load volatile double *%ptr
- %val15 = load volatile double *%ptr
- %val16 = load volatile double *%ptr
+ %val0 = load volatile double , double *%ptr
+ %val1 = load volatile double , double *%ptr
+ %val2 = load volatile double , double *%ptr
+ %val3 = load volatile double , double *%ptr
+ %val4 = load volatile double , double *%ptr
+ %val5 = load volatile double , double *%ptr
+ %val6 = load volatile double , double *%ptr
+ %val7 = load volatile double , double *%ptr
+ %val8 = load volatile double , double *%ptr
+ %val9 = load volatile double , double *%ptr
+ %val10 = load volatile double , double *%ptr
+ %val11 = load volatile double , double *%ptr
+ %val12 = load volatile double , double *%ptr
+ %val13 = load volatile double , double *%ptr
+ %val14 = load volatile double , double *%ptr
+ %val15 = load volatile double , double *%ptr
+ %val16 = load volatile double , double *%ptr
%sqrt0 = call double @llvm.sqrt.f64(double %val0)
%sqrt1 = call double @llvm.sqrt.f64(double %val1)
diff --git a/test/CodeGen/SystemZ/fp-sqrt-03.ll b/test/CodeGen/SystemZ/fp-sqrt-03.ll
index 71426440aca3..4bc05f100a0a 100644
--- a/test/CodeGen/SystemZ/fp-sqrt-03.ll
+++ b/test/CodeGen/SystemZ/fp-sqrt-03.ll
@@ -13,7 +13,7 @@ define void @f1(fp128 *%ptr) {
; CHECK: std %f0, 0(%r2)
; CHECK: std %f2, 8(%r2)
; CHECK: br %r14
- %orig = load fp128 *%ptr
+ %orig = load fp128 , fp128 *%ptr
%sqrt = call fp128 @llvm.sqrt.f128(fp128 %orig)
store fp128 %sqrt, fp128 *%ptr
ret void
diff --git a/test/CodeGen/SystemZ/fp-sub-01.ll b/test/CodeGen/SystemZ/fp-sub-01.ll
index 76f46f626705..f4185ca3108d 100644
--- a/test/CodeGen/SystemZ/fp-sub-01.ll
+++ b/test/CodeGen/SystemZ/fp-sub-01.ll
@@ -18,7 +18,7 @@ define float @f2(float %f1, float *%ptr) {
; CHECK-LABEL: f2:
; CHECK: seb %f0, 0(%r2)
; CHECK: br %r14
- %f2 = load float *%ptr
+ %f2 = load float , float *%ptr
%res = fsub float %f1, %f2
ret float %res
}
@@ -28,8 +28,8 @@ define float @f3(float %f1, float *%base) {
; CHECK-LABEL: f3:
; CHECK: seb %f0, 4092(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1023
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1023
+ %f2 = load float , float *%ptr
%res = fsub float %f1, %f2
ret float %res
}
@@ -41,8 +41,8 @@ define float @f4(float %f1, float *%base) {
; CHECK: aghi %r2, 4096
; CHECK: seb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 1024
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 1024
+ %f2 = load float , float *%ptr
%res = fsub float %f1, %f2
ret float %res
}
@@ -53,8 +53,8 @@ define float @f5(float %f1, float *%base) {
; CHECK: aghi %r2, -4
; CHECK: seb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr float *%base, i64 -1
- %f2 = load float *%ptr
+ %ptr = getelementptr float, float *%base, i64 -1
+ %f2 = load float , float *%ptr
%res = fsub float %f1, %f2
ret float %res
}
@@ -65,9 +65,9 @@ define float @f6(float %f1, float *%base, i64 %index) {
; CHECK: sllg %r1, %r3, 2
; CHECK: seb %f0, 400(%r1,%r2)
; CHECK: br %r14
- %ptr1 = getelementptr float *%base, i64 %index
- %ptr2 = getelementptr float *%ptr1, i64 100
- %f2 = load float *%ptr2
+ %ptr1 = getelementptr float, float *%base, i64 %index
+ %ptr2 = getelementptr float, float *%ptr1, i64 100
+ %f2 = load float , float *%ptr2
%res = fsub float %f1, %f2
ret float %res
}
@@ -78,28 +78,28 @@ define float @f7(float *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: seb %f0, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr float *%ptr0, i64 2
- %ptr2 = getelementptr float *%ptr0, i64 4
- %ptr3 = getelementptr float *%ptr0, i64 6
- %ptr4 = getelementptr float *%ptr0, i64 8
- %ptr5 = getelementptr float *%ptr0, i64 10
- %ptr6 = getelementptr float *%ptr0, i64 12
- %ptr7 = getelementptr float *%ptr0, i64 14
- %ptr8 = getelementptr float *%ptr0, i64 16
- %ptr9 = getelementptr float *%ptr0, i64 18
- %ptr10 = getelementptr float *%ptr0, i64 20
+ %ptr1 = getelementptr float, float *%ptr0, i64 2
+ %ptr2 = getelementptr float, float *%ptr0, i64 4
+ %ptr3 = getelementptr float, float *%ptr0, i64 6
+ %ptr4 = getelementptr float, float *%ptr0, i64 8
+ %ptr5 = getelementptr float, float *%ptr0, i64 10
+ %ptr6 = getelementptr float, float *%ptr0, i64 12
+ %ptr7 = getelementptr float, float *%ptr0, i64 14
+ %ptr8 = getelementptr float, float *%ptr0, i64 16
+ %ptr9 = getelementptr float, float *%ptr0, i64 18
+ %ptr10 = getelementptr float, float *%ptr0, i64 20
- %val0 = load float *%ptr0
- %val1 = load float *%ptr1
- %val2 = load float *%ptr2
- %val3 = load float *%ptr3
- %val4 = load float *%ptr4
- %val5 = load float *%ptr5
- %val6 = load float *%ptr6
- %val7 = load float *%ptr7
- %val8 = load float *%ptr8
- %val9 = load float *%ptr9
- %val10 = load float *%ptr10
+ %val0 = load float , float *%ptr0
+ %val1 = load float , float *%ptr1
+ %val2 = load float , float *%ptr2
+ %val3 = load float , float *%ptr3
+ %val4 = load float , float *%ptr4
+ %val5 = load float , float *%ptr5
+ %val6 = load float , float *%ptr6
+ %val7 = load float , float *%ptr7
+ %val8 = load float , float *%ptr8
+ %val9 = load float , float *%ptr9
+ %val10 = load float , float *%ptr10
%ret = call float @foo()
diff --git a/test/CodeGen/SystemZ/fp-sub-02.ll b/test/CodeGen/SystemZ/fp-sub-02.ll
index 99cafed8d08b..143baac23e19 100644
--- a/test/CodeGen/SystemZ/fp-sub-02.ll
+++ b/test/CodeGen/SystemZ/fp-sub-02.ll
@@ -1,6 +1,8 @@
; Test 64-bit floating-point subtraction.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare double @foo()
@@ -18,7 +20,7 @@ define double @f2(double %f1, double *%ptr) {
; CHECK-LABEL: f2:
; CHECK: sdb %f0, 0(%r2)
; CHECK: br %r14
- %f2 = load double *%ptr
+ %f2 = load double , double *%ptr
%res = fsub double %f1, %f2
ret double %res
}
@@ -28,8 +30,8 @@ define double @f3(double %f1, double *%base) {
; CHECK-LABEL: f3:
; CHECK: sdb %f0, 4088(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 511
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 511
+ %f2 = load double , double *%ptr
%res = fsub double %f1, %f2
ret double %res
}
@@ -41,8 +43,8 @@ define double @f4(double %f1, double *%base) {
; CHECK: aghi %r2, 4096
; CHECK: sdb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 512
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 512
+ %f2 = load double , double *%ptr
%res = fsub double %f1, %f2
ret double %res
}
@@ -53,8 +55,8 @@ define double @f5(double %f1, double *%base) {
; CHECK: aghi %r2, -8
; CHECK: sdb %f0, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr double *%base, i64 -1
- %f2 = load double *%ptr
+ %ptr = getelementptr double, double *%base, i64 -1
+ %f2 = load double , double *%ptr
%res = fsub double %f1, %f2
ret double %res
}
@@ -65,9 +67,9 @@ define double @f6(double %f1, double *%base, i64 %index) {
; CHECK: sllg %r1, %r3, 3
; CHECK: sdb %f0, 800(%r1,%r2)
; CHECK: br %r14
- %ptr1 = getelementptr double *%base, i64 %index
- %ptr2 = getelementptr double *%ptr1, i64 100
- %f2 = load double *%ptr2
+ %ptr1 = getelementptr double, double *%base, i64 %index
+ %ptr2 = getelementptr double, double *%ptr1, i64 100
+ %f2 = load double , double *%ptr2
%res = fsub double %f1, %f2
ret double %res
}
@@ -76,30 +78,30 @@ define double @f6(double %f1, double *%base, i64 %index) {
define double @f7(double *%ptr0) {
; CHECK-LABEL: f7:
; CHECK: brasl %r14, foo@PLT
-; CHECK: sdb %f0, 16{{[04]}}(%r15)
+; CHECK-SCALAR: sdb %f0, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr double *%ptr0, i64 2
- %ptr2 = getelementptr double *%ptr0, i64 4
- %ptr3 = getelementptr double *%ptr0, i64 6
- %ptr4 = getelementptr double *%ptr0, i64 8
- %ptr5 = getelementptr double *%ptr0, i64 10
- %ptr6 = getelementptr double *%ptr0, i64 12
- %ptr7 = getelementptr double *%ptr0, i64 14
- %ptr8 = getelementptr double *%ptr0, i64 16
- %ptr9 = getelementptr double *%ptr0, i64 18
- %ptr10 = getelementptr double *%ptr0, i64 20
+ %ptr1 = getelementptr double, double *%ptr0, i64 2
+ %ptr2 = getelementptr double, double *%ptr0, i64 4
+ %ptr3 = getelementptr double, double *%ptr0, i64 6
+ %ptr4 = getelementptr double, double *%ptr0, i64 8
+ %ptr5 = getelementptr double, double *%ptr0, i64 10
+ %ptr6 = getelementptr double, double *%ptr0, i64 12
+ %ptr7 = getelementptr double, double *%ptr0, i64 14
+ %ptr8 = getelementptr double, double *%ptr0, i64 16
+ %ptr9 = getelementptr double, double *%ptr0, i64 18
+ %ptr10 = getelementptr double, double *%ptr0, i64 20
- %val0 = load double *%ptr0
- %val1 = load double *%ptr1
- %val2 = load double *%ptr2
- %val3 = load double *%ptr3
- %val4 = load double *%ptr4
- %val5 = load double *%ptr5
- %val6 = load double *%ptr6
- %val7 = load double *%ptr7
- %val8 = load double *%ptr8
- %val9 = load double *%ptr9
- %val10 = load double *%ptr10
+ %val0 = load double , double *%ptr0
+ %val1 = load double , double *%ptr1
+ %val2 = load double , double *%ptr2
+ %val3 = load double , double *%ptr3
+ %val4 = load double , double *%ptr4
+ %val5 = load double , double *%ptr5
+ %val6 = load double , double *%ptr6
+ %val7 = load double , double *%ptr7
+ %val8 = load double , double *%ptr8
+ %val9 = load double , double *%ptr9
+ %val10 = load double , double *%ptr10
%ret = call double @foo()
diff --git a/test/CodeGen/SystemZ/fp-sub-03.ll b/test/CodeGen/SystemZ/fp-sub-03.ll
index a1404c4ff0e7..86faafeaaca2 100644
--- a/test/CodeGen/SystemZ/fp-sub-03.ll
+++ b/test/CodeGen/SystemZ/fp-sub-03.ll
@@ -12,7 +12,7 @@ define void @f1(fp128 *%ptr, float %f2) {
; CHECK: std %f1, 0(%r2)
; CHECK: std %f3, 8(%r2)
; CHECK: br %r14
- %f1 = load fp128 *%ptr
+ %f1 = load fp128 , fp128 *%ptr
%f2x = fpext float %f2 to fp128
%sum = fsub fp128 %f1, %f2x
store fp128 %sum, fp128 *%ptr
diff --git a/test/CodeGen/SystemZ/frame-01.ll b/test/CodeGen/SystemZ/frame-01.ll
index f61836ca8552..5afc4f1cef34 100644
--- a/test/CodeGen/SystemZ/frame-01.ll
+++ b/test/CodeGen/SystemZ/frame-01.ll
@@ -31,7 +31,7 @@ define void @f2(i64 %x) {
; CHECK: aghi %r15, 32760
; CHECK: br %r14
%y = alloca [4073 x i64], align 8
- %ptr = getelementptr inbounds [4073 x i64]* %y, i64 0, i64 0
+ %ptr = getelementptr inbounds [4073 x i64], [4073 x i64]* %y, i64 0, i64 0
store volatile i64 %x, i64* %ptr
ret void
}
@@ -46,7 +46,7 @@ define void @f3(i64 %x) {
; CHECK: agfi %r15, 32768
; CHECK: br %r14
%y = alloca [4074 x i64], align 8
- %ptr = getelementptr inbounds [4074 x i64]* %y, i64 0, i64 0
+ %ptr = getelementptr inbounds [4074 x i64], [4074 x i64]* %y, i64 0, i64 0
store volatile i64 %x, i64* %ptr
ret void
}
@@ -61,7 +61,7 @@ define void @f4(i64 %x) {
; CHECK: agfi %r15, 32776
; CHECK: br %r14
%y = alloca [4075 x i64], align 8
- %ptr = getelementptr inbounds [4075 x i64]* %y, i64 0, i64 0
+ %ptr = getelementptr inbounds [4075 x i64], [4075 x i64]* %y, i64 0, i64 0
store volatile i64 %x, i64* %ptr
ret void
}
@@ -75,7 +75,7 @@ define void @f5(i64 %x) {
; CHECK: agfi %r15, 2147483640
; CHECK: br %r14
%y = alloca [268435433 x i64], align 8
- %ptr = getelementptr inbounds [268435433 x i64]* %y, i64 0, i64 0
+ %ptr = getelementptr inbounds [268435433 x i64], [268435433 x i64]* %y, i64 0, i64 0
store volatile i64 %x, i64* %ptr
ret void
}
@@ -90,7 +90,7 @@ define void @f6(i64 %x) {
; CHECK: aghi %r15, 8
; CHECK: br %r14
%y = alloca [268435434 x i64], align 8
- %ptr = getelementptr inbounds [268435434 x i64]* %y, i64 0, i64 0
+ %ptr = getelementptr inbounds [268435434 x i64], [268435434 x i64]* %y, i64 0, i64 0
store volatile i64 %x, i64* %ptr
ret void
}
@@ -106,7 +106,7 @@ define void @f7(i64 %x) {
; CHECK: aghi %r15, 16
; CHECK: br %r14
%y = alloca [268435435 x i64], align 8
- %ptr = getelementptr inbounds [268435435 x i64]* %y, i64 0, i64 0
+ %ptr = getelementptr inbounds [268435435 x i64], [268435435 x i64]* %y, i64 0, i64 0
store volatile i64 %x, i64* %ptr
ret void
}
diff --git a/test/CodeGen/SystemZ/frame-02.ll b/test/CodeGen/SystemZ/frame-02.ll
index 9a7f8eac9eba..a41db77e851a 100644
--- a/test/CodeGen/SystemZ/frame-02.ll
+++ b/test/CodeGen/SystemZ/frame-02.ll
@@ -37,22 +37,22 @@ define void @f1(float *%ptr) {
; CHECK: ld %f15, 160(%r15)
; CHECK: aghi %r15, 224
; CHECK: br %r14
- %l0 = load volatile float *%ptr
- %l1 = load volatile float *%ptr
- %l2 = load volatile float *%ptr
- %l3 = load volatile float *%ptr
- %l4 = load volatile float *%ptr
- %l5 = load volatile float *%ptr
- %l6 = load volatile float *%ptr
- %l7 = load volatile float *%ptr
- %l8 = load volatile float *%ptr
- %l9 = load volatile float *%ptr
- %l10 = load volatile float *%ptr
- %l11 = load volatile float *%ptr
- %l12 = load volatile float *%ptr
- %l13 = load volatile float *%ptr
- %l14 = load volatile float *%ptr
- %l15 = load volatile float *%ptr
+ %l0 = load volatile float , float *%ptr
+ %l1 = load volatile float , float *%ptr
+ %l2 = load volatile float , float *%ptr
+ %l3 = load volatile float , float *%ptr
+ %l4 = load volatile float , float *%ptr
+ %l5 = load volatile float , float *%ptr
+ %l6 = load volatile float , float *%ptr
+ %l7 = load volatile float , float *%ptr
+ %l8 = load volatile float , float *%ptr
+ %l9 = load volatile float , float *%ptr
+ %l10 = load volatile float , float *%ptr
+ %l11 = load volatile float , float *%ptr
+ %l12 = load volatile float , float *%ptr
+ %l13 = load volatile float , float *%ptr
+ %l14 = load volatile float , float *%ptr
+ %l15 = load volatile float , float *%ptr
%add0 = fadd float %l0, %l0
%add1 = fadd float %l1, %add0
%add2 = fadd float %l2, %add1
@@ -119,21 +119,21 @@ define void @f2(float *%ptr) {
; CHECK: ld %f14, 160(%r15)
; CHECK: aghi %r15, 216
; CHECK: br %r14
- %l0 = load volatile float *%ptr
- %l1 = load volatile float *%ptr
- %l2 = load volatile float *%ptr
- %l3 = load volatile float *%ptr
- %l4 = load volatile float *%ptr
- %l5 = load volatile float *%ptr
- %l6 = load volatile float *%ptr
- %l7 = load volatile float *%ptr
- %l8 = load volatile float *%ptr
- %l9 = load volatile float *%ptr
- %l10 = load volatile float *%ptr
- %l11 = load volatile float *%ptr
- %l12 = load volatile float *%ptr
- %l13 = load volatile float *%ptr
- %l14 = load volatile float *%ptr
+ %l0 = load volatile float , float *%ptr
+ %l1 = load volatile float , float *%ptr
+ %l2 = load volatile float , float *%ptr
+ %l3 = load volatile float , float *%ptr
+ %l4 = load volatile float , float *%ptr
+ %l5 = load volatile float , float *%ptr
+ %l6 = load volatile float , float *%ptr
+ %l7 = load volatile float , float *%ptr
+ %l8 = load volatile float , float *%ptr
+ %l9 = load volatile float , float *%ptr
+ %l10 = load volatile float , float *%ptr
+ %l11 = load volatile float , float *%ptr
+ %l12 = load volatile float , float *%ptr
+ %l13 = load volatile float , float *%ptr
+ %l14 = load volatile float , float *%ptr
%add0 = fadd float %l0, %l0
%add1 = fadd float %l1, %add0
%add2 = fadd float %l2, %add1
@@ -185,15 +185,15 @@ define void @f3(float *%ptr) {
; CHECK: ld %f8, 160(%r15)
; CHECK: aghi %r15, 168
; CHECK: br %r14
- %l0 = load volatile float *%ptr
- %l1 = load volatile float *%ptr
- %l2 = load volatile float *%ptr
- %l3 = load volatile float *%ptr
- %l4 = load volatile float *%ptr
- %l5 = load volatile float *%ptr
- %l6 = load volatile float *%ptr
- %l7 = load volatile float *%ptr
- %l8 = load volatile float *%ptr
+ %l0 = load volatile float , float *%ptr
+ %l1 = load volatile float , float *%ptr
+ %l2 = load volatile float , float *%ptr
+ %l3 = load volatile float , float *%ptr
+ %l4 = load volatile float , float *%ptr
+ %l5 = load volatile float , float *%ptr
+ %l6 = load volatile float , float *%ptr
+ %l7 = load volatile float , float *%ptr
+ %l8 = load volatile float , float *%ptr
%add0 = fadd float %l0, %l0
%add1 = fadd float %l1, %add0
%add2 = fadd float %l2, %add1
@@ -229,14 +229,14 @@ define void @f4(float *%ptr) {
; CHECK-NOT: %f14
; CHECK-NOT: %f15
; CHECK: br %r14
- %l0 = load volatile float *%ptr
- %l1 = load volatile float *%ptr
- %l2 = load volatile float *%ptr
- %l3 = load volatile float *%ptr
- %l4 = load volatile float *%ptr
- %l5 = load volatile float *%ptr
- %l6 = load volatile float *%ptr
- %l7 = load volatile float *%ptr
+ %l0 = load volatile float , float *%ptr
+ %l1 = load volatile float , float *%ptr
+ %l2 = load volatile float , float *%ptr
+ %l3 = load volatile float , float *%ptr
+ %l4 = load volatile float , float *%ptr
+ %l5 = load volatile float , float *%ptr
+ %l6 = load volatile float , float *%ptr
+ %l7 = load volatile float , float *%ptr
%add0 = fadd float %l0, %l0
%add1 = fadd float %l1, %add0
%add2 = fadd float %l2, %add1
diff --git a/test/CodeGen/SystemZ/frame-03.ll b/test/CodeGen/SystemZ/frame-03.ll
index db146c7c985d..21b8fdb0d672 100644
--- a/test/CodeGen/SystemZ/frame-03.ll
+++ b/test/CodeGen/SystemZ/frame-03.ll
@@ -2,7 +2,7 @@
; uses a different register class, but the set of saved and restored
; registers should be the same.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; This function should require all FPRs, but no other spill slots.
; We need to save and restore 8 of the 16 FPRs, so the frame size
@@ -39,22 +39,22 @@ define void @f1(double *%ptr) {
; CHECK: ld %f15, 160(%r15)
; CHECK: aghi %r15, 224
; CHECK: br %r14
- %l0 = load volatile double *%ptr
- %l1 = load volatile double *%ptr
- %l2 = load volatile double *%ptr
- %l3 = load volatile double *%ptr
- %l4 = load volatile double *%ptr
- %l5 = load volatile double *%ptr
- %l6 = load volatile double *%ptr
- %l7 = load volatile double *%ptr
- %l8 = load volatile double *%ptr
- %l9 = load volatile double *%ptr
- %l10 = load volatile double *%ptr
- %l11 = load volatile double *%ptr
- %l12 = load volatile double *%ptr
- %l13 = load volatile double *%ptr
- %l14 = load volatile double *%ptr
- %l15 = load volatile double *%ptr
+ %l0 = load volatile double , double *%ptr
+ %l1 = load volatile double , double *%ptr
+ %l2 = load volatile double , double *%ptr
+ %l3 = load volatile double , double *%ptr
+ %l4 = load volatile double , double *%ptr
+ %l5 = load volatile double , double *%ptr
+ %l6 = load volatile double , double *%ptr
+ %l7 = load volatile double , double *%ptr
+ %l8 = load volatile double , double *%ptr
+ %l9 = load volatile double , double *%ptr
+ %l10 = load volatile double , double *%ptr
+ %l11 = load volatile double , double *%ptr
+ %l12 = load volatile double , double *%ptr
+ %l13 = load volatile double , double *%ptr
+ %l14 = load volatile double , double *%ptr
+ %l15 = load volatile double , double *%ptr
%add0 = fadd double %l0, %l0
%add1 = fadd double %l1, %add0
%add2 = fadd double %l2, %add1
@@ -121,21 +121,21 @@ define void @f2(double *%ptr) {
; CHECK: ld %f14, 160(%r15)
; CHECK: aghi %r15, 216
; CHECK: br %r14
- %l0 = load volatile double *%ptr
- %l1 = load volatile double *%ptr
- %l2 = load volatile double *%ptr
- %l3 = load volatile double *%ptr
- %l4 = load volatile double *%ptr
- %l5 = load volatile double *%ptr
- %l6 = load volatile double *%ptr
- %l7 = load volatile double *%ptr
- %l8 = load volatile double *%ptr
- %l9 = load volatile double *%ptr
- %l10 = load volatile double *%ptr
- %l11 = load volatile double *%ptr
- %l12 = load volatile double *%ptr
- %l13 = load volatile double *%ptr
- %l14 = load volatile double *%ptr
+ %l0 = load volatile double , double *%ptr
+ %l1 = load volatile double , double *%ptr
+ %l2 = load volatile double , double *%ptr
+ %l3 = load volatile double , double *%ptr
+ %l4 = load volatile double , double *%ptr
+ %l5 = load volatile double , double *%ptr
+ %l6 = load volatile double , double *%ptr
+ %l7 = load volatile double , double *%ptr
+ %l8 = load volatile double , double *%ptr
+ %l9 = load volatile double , double *%ptr
+ %l10 = load volatile double , double *%ptr
+ %l11 = load volatile double , double *%ptr
+ %l12 = load volatile double , double *%ptr
+ %l13 = load volatile double , double *%ptr
+ %l14 = load volatile double , double *%ptr
%add0 = fadd double %l0, %l0
%add1 = fadd double %l1, %add0
%add2 = fadd double %l2, %add1
@@ -187,15 +187,15 @@ define void @f3(double *%ptr) {
; CHECK: ld %f8, 160(%r15)
; CHECK: aghi %r15, 168
; CHECK: br %r14
- %l0 = load volatile double *%ptr
- %l1 = load volatile double *%ptr
- %l2 = load volatile double *%ptr
- %l3 = load volatile double *%ptr
- %l4 = load volatile double *%ptr
- %l5 = load volatile double *%ptr
- %l6 = load volatile double *%ptr
- %l7 = load volatile double *%ptr
- %l8 = load volatile double *%ptr
+ %l0 = load volatile double , double *%ptr
+ %l1 = load volatile double , double *%ptr
+ %l2 = load volatile double , double *%ptr
+ %l3 = load volatile double , double *%ptr
+ %l4 = load volatile double , double *%ptr
+ %l5 = load volatile double , double *%ptr
+ %l6 = load volatile double , double *%ptr
+ %l7 = load volatile double , double *%ptr
+ %l8 = load volatile double , double *%ptr
%add0 = fadd double %l0, %l0
%add1 = fadd double %l1, %add0
%add2 = fadd double %l2, %add1
@@ -231,14 +231,14 @@ define void @f4(double *%ptr) {
; CHECK-NOT: %f14
; CHECK-NOT: %f15
; CHECK: br %r14
- %l0 = load volatile double *%ptr
- %l1 = load volatile double *%ptr
- %l2 = load volatile double *%ptr
- %l3 = load volatile double *%ptr
- %l4 = load volatile double *%ptr
- %l5 = load volatile double *%ptr
- %l6 = load volatile double *%ptr
- %l7 = load volatile double *%ptr
+ %l0 = load volatile double , double *%ptr
+ %l1 = load volatile double , double *%ptr
+ %l2 = load volatile double , double *%ptr
+ %l3 = load volatile double , double *%ptr
+ %l4 = load volatile double , double *%ptr
+ %l5 = load volatile double , double *%ptr
+ %l6 = load volatile double , double *%ptr
+ %l7 = load volatile double , double *%ptr
%add0 = fadd double %l0, %l0
%add1 = fadd double %l1, %add0
%add2 = fadd double %l2, %add1
diff --git a/test/CodeGen/SystemZ/frame-04.ll b/test/CodeGen/SystemZ/frame-04.ll
index 93c59a3bc15f..602050978d30 100644
--- a/test/CodeGen/SystemZ/frame-04.ll
+++ b/test/CodeGen/SystemZ/frame-04.ll
@@ -38,14 +38,14 @@ define void @f1(fp128 *%ptr) {
; CHECK: ld %f15, 160(%r15)
; CHECK: aghi %r15, 224
; CHECK: br %r14
- %l0 = load volatile fp128 *%ptr
- %l1 = load volatile fp128 *%ptr
- %l4 = load volatile fp128 *%ptr
- %l5 = load volatile fp128 *%ptr
- %l8 = load volatile fp128 *%ptr
- %l9 = load volatile fp128 *%ptr
- %l12 = load volatile fp128 *%ptr
- %l13 = load volatile fp128 *%ptr
+ %l0 = load volatile fp128 , fp128 *%ptr
+ %l1 = load volatile fp128 , fp128 *%ptr
+ %l4 = load volatile fp128 , fp128 *%ptr
+ %l5 = load volatile fp128 , fp128 *%ptr
+ %l8 = load volatile fp128 , fp128 *%ptr
+ %l9 = load volatile fp128 , fp128 *%ptr
+ %l12 = load volatile fp128 , fp128 *%ptr
+ %l13 = load volatile fp128 , fp128 *%ptr
%add0 = fadd fp128 %l0, %l0
%add1 = fadd fp128 %l1, %add0
%add4 = fadd fp128 %l4, %add1
@@ -94,13 +94,13 @@ define void @f2(fp128 *%ptr) {
; CHECK: ld %f14, 160(%r15)
; CHECK: aghi %r15, 208
; CHECK: br %r14
- %l0 = load volatile fp128 *%ptr
- %l1 = load volatile fp128 *%ptr
- %l4 = load volatile fp128 *%ptr
- %l5 = load volatile fp128 *%ptr
- %l8 = load volatile fp128 *%ptr
- %l9 = load volatile fp128 *%ptr
- %l12 = load volatile fp128 *%ptr
+ %l0 = load volatile fp128 , fp128 *%ptr
+ %l1 = load volatile fp128 , fp128 *%ptr
+ %l4 = load volatile fp128 , fp128 *%ptr
+ %l5 = load volatile fp128 , fp128 *%ptr
+ %l8 = load volatile fp128 , fp128 *%ptr
+ %l9 = load volatile fp128 , fp128 *%ptr
+ %l12 = load volatile fp128 , fp128 *%ptr
%add0 = fadd fp128 %l0, %l0
%add1 = fadd fp128 %l1, %add0
%add4 = fadd fp128 %l4, %add1
@@ -139,11 +139,11 @@ define void @f3(fp128 *%ptr) {
; CHECK: ld %f10, 160(%r15)
; CHECK: aghi %r15, 176
; CHECK: br %r14
- %l0 = load volatile fp128 *%ptr
- %l1 = load volatile fp128 *%ptr
- %l4 = load volatile fp128 *%ptr
- %l5 = load volatile fp128 *%ptr
- %l8 = load volatile fp128 *%ptr
+ %l0 = load volatile fp128 , fp128 *%ptr
+ %l1 = load volatile fp128 , fp128 *%ptr
+ %l4 = load volatile fp128 , fp128 *%ptr
+ %l5 = load volatile fp128 , fp128 *%ptr
+ %l8 = load volatile fp128 , fp128 *%ptr
%add0 = fadd fp128 %l0, %l0
%add1 = fadd fp128 %l1, %add0
%add4 = fadd fp128 %l4, %add1
@@ -171,10 +171,10 @@ define void @f4(fp128 *%ptr) {
; CHECK-NOT: %f14
; CHECK-NOT: %f15
; CHECK: br %r14
- %l0 = load volatile fp128 *%ptr
- %l1 = load volatile fp128 *%ptr
- %l4 = load volatile fp128 *%ptr
- %l5 = load volatile fp128 *%ptr
+ %l0 = load volatile fp128 , fp128 *%ptr
+ %l1 = load volatile fp128 , fp128 *%ptr
+ %l4 = load volatile fp128 , fp128 *%ptr
+ %l5 = load volatile fp128 , fp128 *%ptr
%add0 = fadd fp128 %l0, %l0
%add1 = fadd fp128 %l1, %add0
%add4 = fadd fp128 %l4, %add1
diff --git a/test/CodeGen/SystemZ/frame-05.ll b/test/CodeGen/SystemZ/frame-05.ll
index f95284deeb79..93130dcbfbbc 100644
--- a/test/CodeGen/SystemZ/frame-05.ll
+++ b/test/CodeGen/SystemZ/frame-05.ll
@@ -32,20 +32,20 @@ define void @f1(i32 *%ptr) {
; CHECK: st {{.*}}, 4(%r2)
; CHECK: lmg %r6, %r15, 48(%r15)
; CHECK: br %r14
- %l0 = load volatile i32 *%ptr
- %l1 = load volatile i32 *%ptr
- %l3 = load volatile i32 *%ptr
- %l4 = load volatile i32 *%ptr
- %l5 = load volatile i32 *%ptr
- %l6 = load volatile i32 *%ptr
- %l7 = load volatile i32 *%ptr
- %l8 = load volatile i32 *%ptr
- %l9 = load volatile i32 *%ptr
- %l10 = load volatile i32 *%ptr
- %l11 = load volatile i32 *%ptr
- %l12 = load volatile i32 *%ptr
- %l13 = load volatile i32 *%ptr
- %l14 = load volatile i32 *%ptr
+ %l0 = load volatile i32 , i32 *%ptr
+ %l1 = load volatile i32 , i32 *%ptr
+ %l3 = load volatile i32 , i32 *%ptr
+ %l4 = load volatile i32 , i32 *%ptr
+ %l5 = load volatile i32 , i32 *%ptr
+ %l6 = load volatile i32 , i32 *%ptr
+ %l7 = load volatile i32 , i32 *%ptr
+ %l8 = load volatile i32 , i32 *%ptr
+ %l9 = load volatile i32 , i32 *%ptr
+ %l10 = load volatile i32 , i32 *%ptr
+ %l11 = load volatile i32 , i32 *%ptr
+ %l12 = load volatile i32 , i32 *%ptr
+ %l13 = load volatile i32 , i32 *%ptr
+ %l14 = load volatile i32 , i32 *%ptr
%add0 = add i32 %l0, %l0
%add1 = add i32 %l1, %add0
%add3 = add i32 %l3, %add1
@@ -73,7 +73,7 @@ define void @f1(i32 *%ptr) {
store volatile i32 %add11, i32 *%ptr
store volatile i32 %add12, i32 *%ptr
store volatile i32 %add13, i32 *%ptr
- %final = getelementptr i32 *%ptr, i32 1
+ %final = getelementptr i32, i32 *%ptr, i32 1
store volatile i32 %add14, i32 *%final
ret void
}
@@ -100,19 +100,19 @@ define void @f2(i32 *%ptr) {
; CHECK: st {{.*}}, 4(%r2)
; CHECK: lmg %r7, %r15, 56(%r15)
; CHECK: br %r14
- %l0 = load volatile i32 *%ptr
- %l1 = load volatile i32 *%ptr
- %l3 = load volatile i32 *%ptr
- %l4 = load volatile i32 *%ptr
- %l5 = load volatile i32 *%ptr
- %l7 = load volatile i32 *%ptr
- %l8 = load volatile i32 *%ptr
- %l9 = load volatile i32 *%ptr
- %l10 = load volatile i32 *%ptr
- %l11 = load volatile i32 *%ptr
- %l12 = load volatile i32 *%ptr
- %l13 = load volatile i32 *%ptr
- %l14 = load volatile i32 *%ptr
+ %l0 = load volatile i32 , i32 *%ptr
+ %l1 = load volatile i32 , i32 *%ptr
+ %l3 = load volatile i32 , i32 *%ptr
+ %l4 = load volatile i32 , i32 *%ptr
+ %l5 = load volatile i32 , i32 *%ptr
+ %l7 = load volatile i32 , i32 *%ptr
+ %l8 = load volatile i32 , i32 *%ptr
+ %l9 = load volatile i32 , i32 *%ptr
+ %l10 = load volatile i32 , i32 *%ptr
+ %l11 = load volatile i32 , i32 *%ptr
+ %l12 = load volatile i32 , i32 *%ptr
+ %l13 = load volatile i32 , i32 *%ptr
+ %l14 = load volatile i32 , i32 *%ptr
%add0 = add i32 %l0, %l0
%add1 = add i32 %l1, %add0
%add3 = add i32 %l3, %add1
@@ -138,7 +138,7 @@ define void @f2(i32 *%ptr) {
store volatile i32 %add11, i32 *%ptr
store volatile i32 %add12, i32 *%ptr
store volatile i32 %add13, i32 *%ptr
- %final = getelementptr i32 *%ptr, i32 1
+ %final = getelementptr i32, i32 *%ptr, i32 1
store volatile i32 %add14, i32 *%final
ret void
}
@@ -163,12 +163,12 @@ define void @f3(i32 *%ptr) {
; CHECK: st {{.*}}, 4(%r2)
; CHECK: lmg %r14, %r15, 112(%r15)
; CHECK: br %r14
- %l0 = load volatile i32 *%ptr
- %l1 = load volatile i32 *%ptr
- %l3 = load volatile i32 *%ptr
- %l4 = load volatile i32 *%ptr
- %l5 = load volatile i32 *%ptr
- %l14 = load volatile i32 *%ptr
+ %l0 = load volatile i32 , i32 *%ptr
+ %l1 = load volatile i32 , i32 *%ptr
+ %l3 = load volatile i32 , i32 *%ptr
+ %l4 = load volatile i32 , i32 *%ptr
+ %l5 = load volatile i32 , i32 *%ptr
+ %l14 = load volatile i32 , i32 *%ptr
%add0 = add i32 %l0, %l0
%add1 = add i32 %l1, %add0
%add3 = add i32 %l3, %add1
@@ -180,7 +180,7 @@ define void @f3(i32 *%ptr) {
store volatile i32 %add3, i32 *%ptr
store volatile i32 %add4, i32 *%ptr
store volatile i32 %add5, i32 *%ptr
- %final = getelementptr i32 *%ptr, i32 1
+ %final = getelementptr i32, i32 *%ptr, i32 1
store volatile i32 %add14, i32 *%final
ret void
}
@@ -199,11 +199,11 @@ define void @f4(i32 *%ptr) {
; CHECK-NOT: %r12
; CHECK-NOT: %r13
; CHECK: br %r14
- %l0 = load volatile i32 *%ptr
- %l1 = load volatile i32 *%ptr
- %l3 = load volatile i32 *%ptr
- %l4 = load volatile i32 *%ptr
- %l5 = load volatile i32 *%ptr
+ %l0 = load volatile i32 , i32 *%ptr
+ %l1 = load volatile i32 , i32 *%ptr
+ %l3 = load volatile i32 , i32 *%ptr
+ %l4 = load volatile i32 , i32 *%ptr
+ %l5 = load volatile i32 , i32 *%ptr
%add0 = add i32 %l0, %l0
%add1 = add i32 %l1, %add0
%add3 = add i32 %l3, %add1
@@ -213,7 +213,7 @@ define void @f4(i32 *%ptr) {
store volatile i32 %add1, i32 *%ptr
store volatile i32 %add3, i32 *%ptr
store volatile i32 %add4, i32 *%ptr
- %final = getelementptr i32 *%ptr, i32 1
+ %final = getelementptr i32, i32 *%ptr, i32 1
store volatile i32 %add5, i32 *%final
ret void
}
diff --git a/test/CodeGen/SystemZ/frame-06.ll b/test/CodeGen/SystemZ/frame-06.ll
index ad22f10903ad..c2aa8af336a0 100644
--- a/test/CodeGen/SystemZ/frame-06.ll
+++ b/test/CodeGen/SystemZ/frame-06.ll
@@ -29,20 +29,20 @@ define void @f1(i64 *%ptr) {
; CHECK: stg {{.*}}, 8(%r2)
; CHECK: lmg %r6, %r15, 48(%r15)
; CHECK: br %r14
- %l0 = load volatile i64 *%ptr
- %l1 = load volatile i64 *%ptr
- %l3 = load volatile i64 *%ptr
- %l4 = load volatile i64 *%ptr
- %l5 = load volatile i64 *%ptr
- %l6 = load volatile i64 *%ptr
- %l7 = load volatile i64 *%ptr
- %l8 = load volatile i64 *%ptr
- %l9 = load volatile i64 *%ptr
- %l10 = load volatile i64 *%ptr
- %l11 = load volatile i64 *%ptr
- %l12 = load volatile i64 *%ptr
- %l13 = load volatile i64 *%ptr
- %l14 = load volatile i64 *%ptr
+ %l0 = load volatile i64 , i64 *%ptr
+ %l1 = load volatile i64 , i64 *%ptr
+ %l3 = load volatile i64 , i64 *%ptr
+ %l4 = load volatile i64 , i64 *%ptr
+ %l5 = load volatile i64 , i64 *%ptr
+ %l6 = load volatile i64 , i64 *%ptr
+ %l7 = load volatile i64 , i64 *%ptr
+ %l8 = load volatile i64 , i64 *%ptr
+ %l9 = load volatile i64 , i64 *%ptr
+ %l10 = load volatile i64 , i64 *%ptr
+ %l11 = load volatile i64 , i64 *%ptr
+ %l12 = load volatile i64 , i64 *%ptr
+ %l13 = load volatile i64 , i64 *%ptr
+ %l14 = load volatile i64 , i64 *%ptr
%add0 = add i64 %l0, %l0
%add1 = add i64 %l1, %add0
%add3 = add i64 %l3, %add1
@@ -70,7 +70,7 @@ define void @f1(i64 *%ptr) {
store volatile i64 %add11, i64 *%ptr
store volatile i64 %add12, i64 *%ptr
store volatile i64 %add13, i64 *%ptr
- %final = getelementptr i64 *%ptr, i64 1
+ %final = getelementptr i64, i64 *%ptr, i64 1
store volatile i64 %add14, i64 *%final
ret void
}
@@ -97,19 +97,19 @@ define void @f2(i64 *%ptr) {
; CHECK: stg {{.*}}, 8(%r2)
; CHECK: lmg %r7, %r15, 56(%r15)
; CHECK: br %r14
- %l0 = load volatile i64 *%ptr
- %l1 = load volatile i64 *%ptr
- %l3 = load volatile i64 *%ptr
- %l4 = load volatile i64 *%ptr
- %l5 = load volatile i64 *%ptr
- %l7 = load volatile i64 *%ptr
- %l8 = load volatile i64 *%ptr
- %l9 = load volatile i64 *%ptr
- %l10 = load volatile i64 *%ptr
- %l11 = load volatile i64 *%ptr
- %l12 = load volatile i64 *%ptr
- %l13 = load volatile i64 *%ptr
- %l14 = load volatile i64 *%ptr
+ %l0 = load volatile i64 , i64 *%ptr
+ %l1 = load volatile i64 , i64 *%ptr
+ %l3 = load volatile i64 , i64 *%ptr
+ %l4 = load volatile i64 , i64 *%ptr
+ %l5 = load volatile i64 , i64 *%ptr
+ %l7 = load volatile i64 , i64 *%ptr
+ %l8 = load volatile i64 , i64 *%ptr
+ %l9 = load volatile i64 , i64 *%ptr
+ %l10 = load volatile i64 , i64 *%ptr
+ %l11 = load volatile i64 , i64 *%ptr
+ %l12 = load volatile i64 , i64 *%ptr
+ %l13 = load volatile i64 , i64 *%ptr
+ %l14 = load volatile i64 , i64 *%ptr
%add0 = add i64 %l0, %l0
%add1 = add i64 %l1, %add0
%add3 = add i64 %l3, %add1
@@ -135,7 +135,7 @@ define void @f2(i64 *%ptr) {
store volatile i64 %add11, i64 *%ptr
store volatile i64 %add12, i64 *%ptr
store volatile i64 %add13, i64 *%ptr
- %final = getelementptr i64 *%ptr, i64 1
+ %final = getelementptr i64, i64 *%ptr, i64 1
store volatile i64 %add14, i64 *%final
ret void
}
@@ -160,12 +160,12 @@ define void @f3(i64 *%ptr) {
; CHECK: stg {{.*}}, 8(%r2)
; CHECK: lmg %r14, %r15, 112(%r15)
; CHECK: br %r14
- %l0 = load volatile i64 *%ptr
- %l1 = load volatile i64 *%ptr
- %l3 = load volatile i64 *%ptr
- %l4 = load volatile i64 *%ptr
- %l5 = load volatile i64 *%ptr
- %l14 = load volatile i64 *%ptr
+ %l0 = load volatile i64 , i64 *%ptr
+ %l1 = load volatile i64 , i64 *%ptr
+ %l3 = load volatile i64 , i64 *%ptr
+ %l4 = load volatile i64 , i64 *%ptr
+ %l5 = load volatile i64 , i64 *%ptr
+ %l14 = load volatile i64 , i64 *%ptr
%add0 = add i64 %l0, %l0
%add1 = add i64 %l1, %add0
%add3 = add i64 %l3, %add1
@@ -177,7 +177,7 @@ define void @f3(i64 *%ptr) {
store volatile i64 %add3, i64 *%ptr
store volatile i64 %add4, i64 *%ptr
store volatile i64 %add5, i64 *%ptr
- %final = getelementptr i64 *%ptr, i64 1
+ %final = getelementptr i64, i64 *%ptr, i64 1
store volatile i64 %add14, i64 *%final
ret void
}
@@ -196,11 +196,11 @@ define void @f4(i64 *%ptr) {
; CHECK-NOT: %r12
; CHECK-NOT: %r13
; CHECK: br %r14
- %l0 = load volatile i64 *%ptr
- %l1 = load volatile i64 *%ptr
- %l3 = load volatile i64 *%ptr
- %l4 = load volatile i64 *%ptr
- %l5 = load volatile i64 *%ptr
+ %l0 = load volatile i64 , i64 *%ptr
+ %l1 = load volatile i64 , i64 *%ptr
+ %l3 = load volatile i64 , i64 *%ptr
+ %l4 = load volatile i64 , i64 *%ptr
+ %l5 = load volatile i64 , i64 *%ptr
%add0 = add i64 %l0, %l0
%add1 = add i64 %l1, %add0
%add3 = add i64 %l3, %add1
@@ -210,7 +210,7 @@ define void @f4(i64 *%ptr) {
store volatile i64 %add1, i64 *%ptr
store volatile i64 %add3, i64 *%ptr
store volatile i64 %add4, i64 *%ptr
- %final = getelementptr i64 *%ptr, i64 1
+ %final = getelementptr i64, i64 *%ptr, i64 1
store volatile i64 %add5, i64 *%final
ret void
}
diff --git a/test/CodeGen/SystemZ/frame-07.ll b/test/CodeGen/SystemZ/frame-07.ll
index eab313744b94..dd8101429628 100644
--- a/test/CodeGen/SystemZ/frame-07.ll
+++ b/test/CodeGen/SystemZ/frame-07.ll
@@ -1,7 +1,7 @@
; Test the saving and restoring of FPRs in large frames.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck -check-prefix=CHECK-NOFP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
; Test a frame size that requires some FPRs to be saved and loaded using
; the 20-bit STDY and LDY while others can use the 12-bit STD and LD.
@@ -66,24 +66,24 @@ define void @f1(double *%ptr, i64 %x) {
; CHECK-FP: lmg %r11, %r15, 4216(%r11)
; CHECK-FP: br %r14
%y = alloca [486 x i64], align 8
- %elem = getelementptr inbounds [486 x i64]* %y, i64 0, i64 0
+ %elem = getelementptr inbounds [486 x i64], [486 x i64]* %y, i64 0, i64 0
store volatile i64 %x, i64* %elem
- %l0 = load volatile double *%ptr
- %l1 = load volatile double *%ptr
- %l2 = load volatile double *%ptr
- %l3 = load volatile double *%ptr
- %l4 = load volatile double *%ptr
- %l5 = load volatile double *%ptr
- %l6 = load volatile double *%ptr
- %l7 = load volatile double *%ptr
- %l8 = load volatile double *%ptr
- %l9 = load volatile double *%ptr
- %l10 = load volatile double *%ptr
- %l11 = load volatile double *%ptr
- %l12 = load volatile double *%ptr
- %l13 = load volatile double *%ptr
- %l14 = load volatile double *%ptr
- %l15 = load volatile double *%ptr
+ %l0 = load volatile double , double *%ptr
+ %l1 = load volatile double , double *%ptr
+ %l2 = load volatile double , double *%ptr
+ %l3 = load volatile double , double *%ptr
+ %l4 = load volatile double , double *%ptr
+ %l5 = load volatile double , double *%ptr
+ %l6 = load volatile double , double *%ptr
+ %l7 = load volatile double , double *%ptr
+ %l8 = load volatile double , double *%ptr
+ %l9 = load volatile double , double *%ptr
+ %l10 = load volatile double , double *%ptr
+ %l11 = load volatile double , double *%ptr
+ %l12 = load volatile double , double *%ptr
+ %l13 = load volatile double , double *%ptr
+ %l14 = load volatile double , double *%ptr
+ %l15 = load volatile double , double *%ptr
%add0 = fadd double %l0, %l0
%add1 = fadd double %l1, %add0
%add2 = fadd double %l2, %add1
@@ -195,24 +195,24 @@ define void @f2(double *%ptr, i64 %x) {
; CHECK-FP: lmg %r11, %r15, 524280(%r11)
; CHECK-FP: br %r14
%y = alloca [65510 x i64], align 8
- %elem = getelementptr inbounds [65510 x i64]* %y, i64 0, i64 0
+ %elem = getelementptr inbounds [65510 x i64], [65510 x i64]* %y, i64 0, i64 0
store volatile i64 %x, i64* %elem
- %l0 = load volatile double *%ptr
- %l1 = load volatile double *%ptr
- %l2 = load volatile double *%ptr
- %l3 = load volatile double *%ptr
- %l4 = load volatile double *%ptr
- %l5 = load volatile double *%ptr
- %l6 = load volatile double *%ptr
- %l7 = load volatile double *%ptr
- %l8 = load volatile double *%ptr
- %l9 = load volatile double *%ptr
- %l10 = load volatile double *%ptr
- %l11 = load volatile double *%ptr
- %l12 = load volatile double *%ptr
- %l13 = load volatile double *%ptr
- %l14 = load volatile double *%ptr
- %l15 = load volatile double *%ptr
+ %l0 = load volatile double , double *%ptr
+ %l1 = load volatile double , double *%ptr
+ %l2 = load volatile double , double *%ptr
+ %l3 = load volatile double , double *%ptr
+ %l4 = load volatile double , double *%ptr
+ %l5 = load volatile double , double *%ptr
+ %l6 = load volatile double , double *%ptr
+ %l7 = load volatile double , double *%ptr
+ %l8 = load volatile double , double *%ptr
+ %l9 = load volatile double , double *%ptr
+ %l10 = load volatile double , double *%ptr
+ %l11 = load volatile double , double *%ptr
+ %l12 = load volatile double , double *%ptr
+ %l13 = load volatile double , double *%ptr
+ %l14 = load volatile double , double *%ptr
+ %l15 = load volatile double , double *%ptr
%add0 = fadd double %l0, %l0
%add1 = fadd double %l1, %add0
%add2 = fadd double %l2, %add1
diff --git a/test/CodeGen/SystemZ/frame-08.ll b/test/CodeGen/SystemZ/frame-08.ll
index aa4e3f481da4..99e64108bca1 100644
--- a/test/CodeGen/SystemZ/frame-08.ll
+++ b/test/CodeGen/SystemZ/frame-08.ll
@@ -25,19 +25,19 @@ define void @f1(i32 *%ptr, i64 %x) {
; CHECK-NOT: ag
; CHECK: lmg %r6, %r15, 524280(%r15)
; CHECK: br %r14
- %l0 = load volatile i32 *%ptr
- %l1 = load volatile i32 *%ptr
- %l4 = load volatile i32 *%ptr
- %l5 = load volatile i32 *%ptr
- %l6 = load volatile i32 *%ptr
- %l7 = load volatile i32 *%ptr
- %l8 = load volatile i32 *%ptr
- %l9 = load volatile i32 *%ptr
- %l10 = load volatile i32 *%ptr
- %l11 = load volatile i32 *%ptr
- %l12 = load volatile i32 *%ptr
- %l13 = load volatile i32 *%ptr
- %l14 = load volatile i32 *%ptr
+ %l0 = load volatile i32 , i32 *%ptr
+ %l1 = load volatile i32 , i32 *%ptr
+ %l4 = load volatile i32 , i32 *%ptr
+ %l5 = load volatile i32 , i32 *%ptr
+ %l6 = load volatile i32 , i32 *%ptr
+ %l7 = load volatile i32 , i32 *%ptr
+ %l8 = load volatile i32 , i32 *%ptr
+ %l9 = load volatile i32 , i32 *%ptr
+ %l10 = load volatile i32 , i32 *%ptr
+ %l11 = load volatile i32 , i32 *%ptr
+ %l12 = load volatile i32 , i32 *%ptr
+ %l13 = load volatile i32 , i32 *%ptr
+ %l14 = load volatile i32 , i32 *%ptr
%add0 = add i32 %l0, %l0
%add1 = add i32 %l1, %add0
%add4 = add i32 %l4, %add1
@@ -65,7 +65,7 @@ define void @f1(i32 *%ptr, i64 %x) {
store volatile i32 %add13, i32 *%ptr
store volatile i32 %add14, i32 *%ptr
%y = alloca [65507 x i64], align 8
- %entry = getelementptr inbounds [65507 x i64]* %y, i64 0, i64 0
+ %entry = getelementptr inbounds [65507 x i64], [65507 x i64]* %y, i64 0, i64 0
store volatile i64 %x, i64* %entry
ret void
}
@@ -85,11 +85,11 @@ define void @f2(i32 *%ptr, i64 %x) {
; CHECK-NOT: ag
; CHECK: lmg %r14, %r15, 524280(%r15)
; CHECK: br %r14
- %l0 = load volatile i32 *%ptr
- %l1 = load volatile i32 *%ptr
- %l4 = load volatile i32 *%ptr
- %l5 = load volatile i32 *%ptr
- %l14 = load volatile i32 *%ptr
+ %l0 = load volatile i32 , i32 *%ptr
+ %l1 = load volatile i32 , i32 *%ptr
+ %l4 = load volatile i32 , i32 *%ptr
+ %l5 = load volatile i32 , i32 *%ptr
+ %l14 = load volatile i32 , i32 *%ptr
%add0 = add i32 %l0, %l0
%add1 = add i32 %l1, %add0
%add4 = add i32 %l4, %add1
@@ -101,7 +101,7 @@ define void @f2(i32 *%ptr, i64 %x) {
store volatile i32 %add5, i32 *%ptr
store volatile i32 %add14, i32 *%ptr
%y = alloca [65499 x i64], align 8
- %entry = getelementptr inbounds [65499 x i64]* %y, i64 0, i64 0
+ %entry = getelementptr inbounds [65499 x i64], [65499 x i64]* %y, i64 0, i64 0
store volatile i64 %x, i64* %entry
ret void
}
@@ -128,19 +128,19 @@ define void @f3(i32 *%ptr, i64 %x) {
; CHECK: aghi %r15, 8
; CHECK: lmg %r6, %r15, 524280(%r15)
; CHECK: br %r14
- %l0 = load volatile i32 *%ptr
- %l1 = load volatile i32 *%ptr
- %l4 = load volatile i32 *%ptr
- %l5 = load volatile i32 *%ptr
- %l6 = load volatile i32 *%ptr
- %l7 = load volatile i32 *%ptr
- %l8 = load volatile i32 *%ptr
- %l9 = load volatile i32 *%ptr
- %l10 = load volatile i32 *%ptr
- %l11 = load volatile i32 *%ptr
- %l12 = load volatile i32 *%ptr
- %l13 = load volatile i32 *%ptr
- %l14 = load volatile i32 *%ptr
+ %l0 = load volatile i32 , i32 *%ptr
+ %l1 = load volatile i32 , i32 *%ptr
+ %l4 = load volatile i32 , i32 *%ptr
+ %l5 = load volatile i32 , i32 *%ptr
+ %l6 = load volatile i32 , i32 *%ptr
+ %l7 = load volatile i32 , i32 *%ptr
+ %l8 = load volatile i32 , i32 *%ptr
+ %l9 = load volatile i32 , i32 *%ptr
+ %l10 = load volatile i32 , i32 *%ptr
+ %l11 = load volatile i32 , i32 *%ptr
+ %l12 = load volatile i32 , i32 *%ptr
+ %l13 = load volatile i32 , i32 *%ptr
+ %l14 = load volatile i32 , i32 *%ptr
%add0 = add i32 %l0, %l0
%add1 = add i32 %l1, %add0
%add4 = add i32 %l4, %add1
@@ -168,7 +168,7 @@ define void @f3(i32 *%ptr, i64 %x) {
store volatile i32 %add13, i32 *%ptr
store volatile i32 %add14, i32 *%ptr
%y = alloca [65508 x i64], align 8
- %entry = getelementptr inbounds [65508 x i64]* %y, i64 0, i64 0
+ %entry = getelementptr inbounds [65508 x i64], [65508 x i64]* %y, i64 0, i64 0
store volatile i64 %x, i64* %entry
ret void
}
@@ -187,11 +187,11 @@ define void @f4(i32 *%ptr, i64 %x) {
; CHECK: aghi %r15, 8
; CHECK: lmg %r14, %r15, 524280(%r15)
; CHECK: br %r14
- %l0 = load volatile i32 *%ptr
- %l1 = load volatile i32 *%ptr
- %l4 = load volatile i32 *%ptr
- %l5 = load volatile i32 *%ptr
- %l14 = load volatile i32 *%ptr
+ %l0 = load volatile i32 , i32 *%ptr
+ %l1 = load volatile i32 , i32 *%ptr
+ %l4 = load volatile i32 , i32 *%ptr
+ %l5 = load volatile i32 , i32 *%ptr
+ %l14 = load volatile i32 , i32 *%ptr
%add0 = add i32 %l0, %l0
%add1 = add i32 %l1, %add0
%add4 = add i32 %l4, %add1
@@ -203,7 +203,7 @@ define void @f4(i32 *%ptr, i64 %x) {
store volatile i32 %add5, i32 *%ptr
store volatile i32 %add14, i32 *%ptr
%y = alloca [65500 x i64], align 8
- %entry = getelementptr inbounds [65500 x i64]* %y, i64 0, i64 0
+ %entry = getelementptr inbounds [65500 x i64], [65500 x i64]* %y, i64 0, i64 0
store volatile i64 %x, i64* %entry
ret void
}
@@ -221,11 +221,11 @@ define void @f5(i32 *%ptr, i64 %x) {
; CHECK: aghi %r15, 32760
; CHECK: lmg %r14, %r15, 524280(%r15)
; CHECK: br %r14
- %l0 = load volatile i32 *%ptr
- %l1 = load volatile i32 *%ptr
- %l4 = load volatile i32 *%ptr
- %l5 = load volatile i32 *%ptr
- %l14 = load volatile i32 *%ptr
+ %l0 = load volatile i32 , i32 *%ptr
+ %l1 = load volatile i32 , i32 *%ptr
+ %l4 = load volatile i32 , i32 *%ptr
+ %l5 = load volatile i32 , i32 *%ptr
+ %l14 = load volatile i32 , i32 *%ptr
%add0 = add i32 %l0, %l0
%add1 = add i32 %l1, %add0
%add4 = add i32 %l4, %add1
@@ -237,7 +237,7 @@ define void @f5(i32 *%ptr, i64 %x) {
store volatile i32 %add5, i32 *%ptr
store volatile i32 %add14, i32 *%ptr
%y = alloca [69594 x i64], align 8
- %entry = getelementptr inbounds [69594 x i64]* %y, i64 0, i64 0
+ %entry = getelementptr inbounds [69594 x i64], [69594 x i64]* %y, i64 0, i64 0
store volatile i64 %x, i64* %entry
ret void
}
@@ -255,11 +255,11 @@ define void @f6(i32 *%ptr, i64 %x) {
; CHECK: agfi %r15, 32768
; CHECK: lmg %r14, %r15, 524280(%r15)
; CHECK: br %r14
- %l0 = load volatile i32 *%ptr
- %l1 = load volatile i32 *%ptr
- %l4 = load volatile i32 *%ptr
- %l5 = load volatile i32 *%ptr
- %l14 = load volatile i32 *%ptr
+ %l0 = load volatile i32 , i32 *%ptr
+ %l1 = load volatile i32 , i32 *%ptr
+ %l4 = load volatile i32 , i32 *%ptr
+ %l5 = load volatile i32 , i32 *%ptr
+ %l14 = load volatile i32 , i32 *%ptr
%add0 = add i32 %l0, %l0
%add1 = add i32 %l1, %add0
%add4 = add i32 %l4, %add1
@@ -271,7 +271,7 @@ define void @f6(i32 *%ptr, i64 %x) {
store volatile i32 %add5, i32 *%ptr
store volatile i32 %add14, i32 *%ptr
%y = alloca [69595 x i64], align 8
- %entry = getelementptr inbounds [69595 x i64]* %y, i64 0, i64 0
+ %entry = getelementptr inbounds [69595 x i64], [69595 x i64]* %y, i64 0, i64 0
store volatile i64 %x, i64* %entry
ret void
}
diff --git a/test/CodeGen/SystemZ/frame-09.ll b/test/CodeGen/SystemZ/frame-09.ll
index 8a4f99c343a0..ead944e59f98 100644
--- a/test/CodeGen/SystemZ/frame-09.ll
+++ b/test/CodeGen/SystemZ/frame-09.ll
@@ -64,19 +64,19 @@ define void @f3(i32 *%ptr) {
; CHECK: st {{.*}}, 4(%r2)
; CHECK: lmg %r6, %r15, 48(%r11)
; CHECK: br %r14
- %l0 = load volatile i32 *%ptr
- %l1 = load volatile i32 *%ptr
- %l3 = load volatile i32 *%ptr
- %l4 = load volatile i32 *%ptr
- %l5 = load volatile i32 *%ptr
- %l6 = load volatile i32 *%ptr
- %l7 = load volatile i32 *%ptr
- %l8 = load volatile i32 *%ptr
- %l9 = load volatile i32 *%ptr
- %l10 = load volatile i32 *%ptr
- %l12 = load volatile i32 *%ptr
- %l13 = load volatile i32 *%ptr
- %l14 = load volatile i32 *%ptr
+ %l0 = load volatile i32 , i32 *%ptr
+ %l1 = load volatile i32 , i32 *%ptr
+ %l3 = load volatile i32 , i32 *%ptr
+ %l4 = load volatile i32 , i32 *%ptr
+ %l5 = load volatile i32 , i32 *%ptr
+ %l6 = load volatile i32 , i32 *%ptr
+ %l7 = load volatile i32 , i32 *%ptr
+ %l8 = load volatile i32 , i32 *%ptr
+ %l9 = load volatile i32 , i32 *%ptr
+ %l10 = load volatile i32 , i32 *%ptr
+ %l12 = load volatile i32 , i32 *%ptr
+ %l13 = load volatile i32 , i32 *%ptr
+ %l14 = load volatile i32 , i32 *%ptr
%add0 = add i32 %l0, %l0
%add1 = add i32 %l1, %add0
%add3 = add i32 %l3, %add1
@@ -102,7 +102,7 @@ define void @f3(i32 *%ptr) {
store volatile i32 %add10, i32 *%ptr
store volatile i32 %add12, i32 *%ptr
store volatile i32 %add13, i32 *%ptr
- %final = getelementptr i32 *%ptr, i32 1
+ %final = getelementptr i32, i32 *%ptr, i32 1
store volatile i32 %add14, i32 *%final
ret void
}
@@ -124,7 +124,7 @@ define void @f4(i64 %x) {
; CHECK: lmg %r11, %r15, 524280(%r11)
; CHECK: br %r14
%y = alloca [65502 x i64], align 8
- %ptr = getelementptr inbounds [65502 x i64]* %y, i64 0, i64 0
+ %ptr = getelementptr inbounds [65502 x i64], [65502 x i64]* %y, i64 0, i64 0
store volatile i64 %x, i64* %ptr
ret void
}
@@ -144,7 +144,7 @@ define void @f5(i64 %x) {
; CHECK: lmg %r11, %r15, 524280(%r11)
; CHECK: br %r14
%y = alloca [65503 x i64], align 8
- %ptr = getelementptr inbounds [65503 x i64]* %y, i64 0, i64 0
+ %ptr = getelementptr inbounds [65503 x i64], [65503 x i64]* %y, i64 0, i64 0
store volatile i64 %x, i64* %ptr
ret void
}
diff --git a/test/CodeGen/SystemZ/frame-13.ll b/test/CodeGen/SystemZ/frame-13.ll
index 58dee1da58b5..2afe6d74388b 100644
--- a/test/CodeGen/SystemZ/frame-13.ll
+++ b/test/CodeGen/SystemZ/frame-13.ll
@@ -34,8 +34,8 @@ define void @f1() {
; CHECK-FP: br %r14
%region1 = alloca [978 x i32], align 8
%region2 = alloca [978 x i32], align 8
- %ptr1 = getelementptr inbounds [978 x i32]* %region1, i64 0, i64 1
- %ptr2 = getelementptr inbounds [978 x i32]* %region2, i64 0, i64 1
+ %ptr1 = getelementptr inbounds [978 x i32], [978 x i32]* %region1, i64 0, i64 1
+ %ptr2 = getelementptr inbounds [978 x i32], [978 x i32]* %region2, i64 0, i64 1
store volatile i32 42, i32 *%ptr1
store volatile i32 42, i32 *%ptr2
ret void
@@ -54,8 +54,8 @@ define void @f2() {
; CHECK-FP: br %r14
%region1 = alloca [978 x i32], align 8
%region2 = alloca [978 x i32], align 8
- %ptr1 = getelementptr inbounds [978 x i32]* %region1, i64 0, i64 2
- %ptr2 = getelementptr inbounds [978 x i32]* %region2, i64 0, i64 2
+ %ptr1 = getelementptr inbounds [978 x i32], [978 x i32]* %region1, i64 0, i64 2
+ %ptr2 = getelementptr inbounds [978 x i32], [978 x i32]* %region2, i64 0, i64 2
store volatile i32 42, i32 *%ptr1
store volatile i32 42, i32 *%ptr2
ret void
@@ -74,8 +74,8 @@ define void @f3() {
; CHECK-FP: br %r14
%region1 = alloca [978 x i32], align 8
%region2 = alloca [978 x i32], align 8
- %ptr1 = getelementptr inbounds [978 x i32]* %region1, i64 0, i64 3
- %ptr2 = getelementptr inbounds [978 x i32]* %region2, i64 0, i64 3
+ %ptr1 = getelementptr inbounds [978 x i32], [978 x i32]* %region1, i64 0, i64 3
+ %ptr2 = getelementptr inbounds [978 x i32], [978 x i32]* %region2, i64 0, i64 3
store volatile i32 42, i32 *%ptr1
store volatile i32 42, i32 *%ptr2
ret void
@@ -94,8 +94,8 @@ define void @f4() {
; CHECK-FP: br %r14
%region1 = alloca [2002 x i32], align 8
%region2 = alloca [2002 x i32], align 8
- %ptr1 = getelementptr inbounds [2002 x i32]* %region1, i64 0, i64 1
- %ptr2 = getelementptr inbounds [2002 x i32]* %region2, i64 0, i64 1
+ %ptr1 = getelementptr inbounds [2002 x i32], [2002 x i32]* %region1, i64 0, i64 1
+ %ptr2 = getelementptr inbounds [2002 x i32], [2002 x i32]* %region2, i64 0, i64 1
store volatile i32 42, i32 *%ptr1
store volatile i32 42, i32 *%ptr2
ret void
@@ -114,8 +114,8 @@ define void @f5() {
; CHECK-FP: br %r14
%region1 = alloca [2002 x i32], align 8
%region2 = alloca [2002 x i32], align 8
- %ptr1 = getelementptr inbounds [2002 x i32]* %region1, i64 0, i64 2
- %ptr2 = getelementptr inbounds [2002 x i32]* %region2, i64 0, i64 2
+ %ptr1 = getelementptr inbounds [2002 x i32], [2002 x i32]* %region1, i64 0, i64 2
+ %ptr2 = getelementptr inbounds [2002 x i32], [2002 x i32]* %region2, i64 0, i64 2
store volatile i32 42, i32 *%ptr1
store volatile i32 42, i32 *%ptr2
ret void
@@ -134,8 +134,8 @@ define void @f6() {
; CHECK-FP: br %r14
%region1 = alloca [2002 x i32], align 8
%region2 = alloca [2002 x i32], align 8
- %ptr1 = getelementptr inbounds [2002 x i32]* %region1, i64 0, i64 3
- %ptr2 = getelementptr inbounds [2002 x i32]* %region2, i64 0, i64 3
+ %ptr1 = getelementptr inbounds [2002 x i32], [2002 x i32]* %region1, i64 0, i64 3
+ %ptr2 = getelementptr inbounds [2002 x i32], [2002 x i32]* %region2, i64 0, i64 3
store volatile i32 42, i32 *%ptr1
store volatile i32 42, i32 *%ptr2
ret void
@@ -156,8 +156,8 @@ define void @f7() {
; CHECK-FP: br %r14
%region1 = alloca [2004 x i32], align 8
%region2 = alloca [2004 x i32], align 8
- %ptr1 = getelementptr inbounds [2004 x i32]* %region1, i64 0, i64 1023
- %ptr2 = getelementptr inbounds [2004 x i32]* %region2, i64 0, i64 1023
+ %ptr1 = getelementptr inbounds [2004 x i32], [2004 x i32]* %region1, i64 0, i64 1023
+ %ptr2 = getelementptr inbounds [2004 x i32], [2004 x i32]* %region2, i64 0, i64 1023
store volatile i32 42, i32 *%ptr1
store volatile i32 42, i32 *%ptr2
ret void
@@ -177,8 +177,8 @@ define void @f8() {
; CHECK-FP: br %r14
%region1 = alloca [2006 x i32], align 8
%region2 = alloca [2006 x i32], align 8
- %ptr1 = getelementptr inbounds [2006 x i32]* %region1, i64 0, i64 1023
- %ptr2 = getelementptr inbounds [2006 x i32]* %region2, i64 0, i64 1023
+ %ptr1 = getelementptr inbounds [2006 x i32], [2006 x i32]* %region1, i64 0, i64 1023
+ %ptr2 = getelementptr inbounds [2006 x i32], [2006 x i32]* %region2, i64 0, i64 1023
store volatile i32 42, i32 *%ptr1
store volatile i32 42, i32 *%ptr2
ret void
@@ -198,8 +198,8 @@ define void @f9() {
; CHECK-FP: br %r14
%region1 = alloca [2006 x i32], align 8
%region2 = alloca [2006 x i32], align 8
- %ptr1 = getelementptr inbounds [2006 x i32]* %region1, i64 0, i64 1024
- %ptr2 = getelementptr inbounds [2006 x i32]* %region2, i64 0, i64 1024
+ %ptr1 = getelementptr inbounds [2006 x i32], [2006 x i32]* %region1, i64 0, i64 1024
+ %ptr2 = getelementptr inbounds [2006 x i32], [2006 x i32]* %region2, i64 0, i64 1024
store volatile i32 42, i32 *%ptr1
store volatile i32 42, i32 *%ptr2
ret void
@@ -222,15 +222,15 @@ define void @f10(i32 *%vptr) {
; CHECK-FP: mvhi 0([[REGISTER]]), 42
; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
; CHECK-FP: br %r14
- %i0 = load volatile i32 *%vptr
- %i1 = load volatile i32 *%vptr
- %i3 = load volatile i32 *%vptr
- %i4 = load volatile i32 *%vptr
- %i5 = load volatile i32 *%vptr
+ %i0 = load volatile i32 , i32 *%vptr
+ %i1 = load volatile i32 , i32 *%vptr
+ %i3 = load volatile i32 , i32 *%vptr
+ %i4 = load volatile i32 , i32 *%vptr
+ %i5 = load volatile i32 , i32 *%vptr
%region1 = alloca [978 x i32], align 8
%region2 = alloca [978 x i32], align 8
- %ptr1 = getelementptr inbounds [978 x i32]* %region1, i64 0, i64 2
- %ptr2 = getelementptr inbounds [978 x i32]* %region2, i64 0, i64 2
+ %ptr1 = getelementptr inbounds [978 x i32], [978 x i32]* %region1, i64 0, i64 2
+ %ptr2 = getelementptr inbounds [978 x i32], [978 x i32]* %region2, i64 0, i64 2
store volatile i32 42, i32 *%ptr1
store volatile i32 42, i32 *%ptr2
store volatile i32 %i0, i32 *%vptr
@@ -254,24 +254,24 @@ define void @f11(i32 *%vptr) {
; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
; CHECK-NOFP: lmg %r6, %r15,
; CHECK-NOFP: br %r14
- %i0 = load volatile i32 *%vptr
- %i1 = load volatile i32 *%vptr
- %i3 = load volatile i32 *%vptr
- %i4 = load volatile i32 *%vptr
- %i5 = load volatile i32 *%vptr
- %i6 = load volatile i32 *%vptr
- %i7 = load volatile i32 *%vptr
- %i8 = load volatile i32 *%vptr
- %i9 = load volatile i32 *%vptr
- %i10 = load volatile i32 *%vptr
- %i11 = load volatile i32 *%vptr
- %i12 = load volatile i32 *%vptr
- %i13 = load volatile i32 *%vptr
- %i14 = load volatile i32 *%vptr
+ %i0 = load volatile i32 , i32 *%vptr
+ %i1 = load volatile i32 , i32 *%vptr
+ %i3 = load volatile i32 , i32 *%vptr
+ %i4 = load volatile i32 , i32 *%vptr
+ %i5 = load volatile i32 , i32 *%vptr
+ %i6 = load volatile i32 , i32 *%vptr
+ %i7 = load volatile i32 , i32 *%vptr
+ %i8 = load volatile i32 , i32 *%vptr
+ %i9 = load volatile i32 , i32 *%vptr
+ %i10 = load volatile i32 , i32 *%vptr
+ %i11 = load volatile i32 , i32 *%vptr
+ %i12 = load volatile i32 , i32 *%vptr
+ %i13 = load volatile i32 , i32 *%vptr
+ %i14 = load volatile i32 , i32 *%vptr
%region1 = alloca [978 x i32], align 8
%region2 = alloca [978 x i32], align 8
- %ptr1 = getelementptr inbounds [978 x i32]* %region1, i64 0, i64 2
- %ptr2 = getelementptr inbounds [978 x i32]* %region2, i64 0, i64 2
+ %ptr1 = getelementptr inbounds [978 x i32], [978 x i32]* %region1, i64 0, i64 2
+ %ptr2 = getelementptr inbounds [978 x i32], [978 x i32]* %region2, i64 0, i64 2
store volatile i32 42, i32 *%ptr1
store volatile i32 42, i32 *%ptr2
store volatile i32 %i0, i32 *%vptr
diff --git a/test/CodeGen/SystemZ/frame-14.ll b/test/CodeGen/SystemZ/frame-14.ll
index 24169cf61f00..3c080a401648 100644
--- a/test/CodeGen/SystemZ/frame-14.ll
+++ b/test/CodeGen/SystemZ/frame-14.ll
@@ -33,8 +33,8 @@ define void @f1() {
; CHECK-FP: br %r14
%region1 = alloca [3912 x i8], align 8
%region2 = alloca [3912 x i8], align 8
- %ptr1 = getelementptr inbounds [3912 x i8]* %region1, i64 0, i64 7
- %ptr2 = getelementptr inbounds [3912 x i8]* %region2, i64 0, i64 7
+ %ptr1 = getelementptr inbounds [3912 x i8], [3912 x i8]* %region1, i64 0, i64 7
+ %ptr2 = getelementptr inbounds [3912 x i8], [3912 x i8]* %region2, i64 0, i64 7
store volatile i8 42, i8 *%ptr1
store volatile i8 42, i8 *%ptr2
ret void
@@ -51,8 +51,8 @@ define void @f2() {
; CHECK-FP: br %r14
%region1 = alloca [3912 x i8], align 8
%region2 = alloca [3912 x i8], align 8
- %ptr1 = getelementptr inbounds [3912 x i8]* %region1, i64 0, i64 8
- %ptr2 = getelementptr inbounds [3912 x i8]* %region2, i64 0, i64 8
+ %ptr1 = getelementptr inbounds [3912 x i8], [3912 x i8]* %region1, i64 0, i64 8
+ %ptr2 = getelementptr inbounds [3912 x i8], [3912 x i8]* %region2, i64 0, i64 8
store volatile i8 42, i8 *%ptr1
store volatile i8 42, i8 *%ptr2
ret void
@@ -72,8 +72,8 @@ define void @f3() {
; CHECK-FP: br %r14
%region1 = alloca [524104 x i8], align 8
%region2 = alloca [524104 x i8], align 8
- %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 7
- %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 7
+ %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 7
+ %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 7
store volatile i8 42, i8 *%ptr1
store volatile i8 42, i8 *%ptr2
ret void
@@ -96,8 +96,8 @@ define void @f4() {
; CHECK-FP: br %r14
%region1 = alloca [524104 x i8], align 8
%region2 = alloca [524104 x i8], align 8
- %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8
- %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8
+ %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 8
+ %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 8
store volatile i8 42, i8 *%ptr1
store volatile i8 42, i8 *%ptr2
ret void
@@ -119,8 +119,8 @@ define void @f5() {
; CHECK-FP: br %r14
%region1 = alloca [524104 x i8], align 8
%region2 = alloca [524104 x i8], align 8
- %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 4103
- %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 4103
+ %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 4103
+ %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 4103
store volatile i8 42, i8 *%ptr1
store volatile i8 42, i8 *%ptr2
ret void
@@ -141,8 +141,8 @@ define void @f6() {
; CHECK-FP: br %r14
%region1 = alloca [524104 x i8], align 8
%region2 = alloca [524104 x i8], align 8
- %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 4104
- %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 4104
+ %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 4104
+ %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 4104
store volatile i8 42, i8 *%ptr1
store volatile i8 42, i8 *%ptr2
ret void
@@ -166,8 +166,8 @@ define void @f7() {
; CHECK-FP: br %r14
%region1 = alloca [1048400 x i8], align 8
%region2 = alloca [1048400 x i8], align 8
- %ptr1 = getelementptr inbounds [1048400 x i8]* %region1, i64 0, i64 524287
- %ptr2 = getelementptr inbounds [1048400 x i8]* %region2, i64 0, i64 524287
+ %ptr1 = getelementptr inbounds [1048400 x i8], [1048400 x i8]* %region1, i64 0, i64 524287
+ %ptr2 = getelementptr inbounds [1048400 x i8], [1048400 x i8]* %region2, i64 0, i64 524287
store volatile i8 42, i8 *%ptr1
store volatile i8 42, i8 *%ptr2
ret void
@@ -189,8 +189,8 @@ define void @f8() {
; CHECK-FP: br %r14
%region1 = alloca [1048408 x i8], align 8
%region2 = alloca [1048408 x i8], align 8
- %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524287
- %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524287
+ %ptr1 = getelementptr inbounds [1048408 x i8], [1048408 x i8]* %region1, i64 0, i64 524287
+ %ptr2 = getelementptr inbounds [1048408 x i8], [1048408 x i8]* %region2, i64 0, i64 524287
store volatile i8 42, i8 *%ptr1
store volatile i8 42, i8 *%ptr2
ret void
@@ -219,8 +219,8 @@ define void @f9() {
; CHECK-FP: br %r14
%region1 = alloca [1048408 x i8], align 8
%region2 = alloca [1048408 x i8], align 8
- %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524288
- %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524288
+ %ptr1 = getelementptr inbounds [1048408 x i8], [1048408 x i8]* %region1, i64 0, i64 524288
+ %ptr2 = getelementptr inbounds [1048408 x i8], [1048408 x i8]* %region2, i64 0, i64 524288
store volatile i8 42, i8 *%ptr1
store volatile i8 42, i8 *%ptr2
ret void
@@ -245,15 +245,15 @@ define void @f10(i32 *%vptr) {
; CHECK-FP: mvi 0([[REGISTER]]), 42
; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
; CHECK-FP: br %r14
- %i0 = load volatile i32 *%vptr
- %i1 = load volatile i32 *%vptr
- %i3 = load volatile i32 *%vptr
- %i4 = load volatile i32 *%vptr
- %i5 = load volatile i32 *%vptr
+ %i0 = load volatile i32 , i32 *%vptr
+ %i1 = load volatile i32 , i32 *%vptr
+ %i3 = load volatile i32 , i32 *%vptr
+ %i4 = load volatile i32 , i32 *%vptr
+ %i5 = load volatile i32 , i32 *%vptr
%region1 = alloca [524104 x i8], align 8
%region2 = alloca [524104 x i8], align 8
- %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8
- %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8
+ %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 8
+ %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 8
store volatile i8 42, i8 *%ptr1
store volatile i8 42, i8 *%ptr2
store volatile i32 %i0, i32 *%vptr
@@ -278,24 +278,24 @@ define void @f11(i32 *%vptr) {
; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
; CHECK-NOFP: lmg %r6, %r15,
; CHECK-NOFP: br %r14
- %i0 = load volatile i32 *%vptr
- %i1 = load volatile i32 *%vptr
- %i3 = load volatile i32 *%vptr
- %i4 = load volatile i32 *%vptr
- %i5 = load volatile i32 *%vptr
- %i6 = load volatile i32 *%vptr
- %i7 = load volatile i32 *%vptr
- %i8 = load volatile i32 *%vptr
- %i9 = load volatile i32 *%vptr
- %i10 = load volatile i32 *%vptr
- %i11 = load volatile i32 *%vptr
- %i12 = load volatile i32 *%vptr
- %i13 = load volatile i32 *%vptr
- %i14 = load volatile i32 *%vptr
+ %i0 = load volatile i32 , i32 *%vptr
+ %i1 = load volatile i32 , i32 *%vptr
+ %i3 = load volatile i32 , i32 *%vptr
+ %i4 = load volatile i32 , i32 *%vptr
+ %i5 = load volatile i32 , i32 *%vptr
+ %i6 = load volatile i32 , i32 *%vptr
+ %i7 = load volatile i32 , i32 *%vptr
+ %i8 = load volatile i32 , i32 *%vptr
+ %i9 = load volatile i32 , i32 *%vptr
+ %i10 = load volatile i32 , i32 *%vptr
+ %i11 = load volatile i32 , i32 *%vptr
+ %i12 = load volatile i32 , i32 *%vptr
+ %i13 = load volatile i32 , i32 *%vptr
+ %i14 = load volatile i32 , i32 *%vptr
%region1 = alloca [524104 x i8], align 8
%region2 = alloca [524104 x i8], align 8
- %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8
- %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8
+ %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 8
+ %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 8
store volatile i8 42, i8 *%ptr1
store volatile i8 42, i8 *%ptr2
store volatile i32 %i0, i32 *%vptr
diff --git a/test/CodeGen/SystemZ/frame-15.ll b/test/CodeGen/SystemZ/frame-15.ll
index b3c95e73c1af..f81c9dc5c2c4 100644
--- a/test/CodeGen/SystemZ/frame-15.ll
+++ b/test/CodeGen/SystemZ/frame-15.ll
@@ -36,13 +36,13 @@ define void @f1(double *%dst) {
; CHECK-FP: br %r14
%region1 = alloca [978 x float], align 8
%region2 = alloca [978 x float], align 8
- %start1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 0
- %start2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 0
+ %start1 = getelementptr inbounds [978 x float], [978 x float]* %region1, i64 0, i64 0
+ %start2 = getelementptr inbounds [978 x float], [978 x float]* %region2, i64 0, i64 0
call void @foo(float *%start1, float *%start2)
- %ptr1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 1
- %ptr2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 1
- %float1 = load float *%ptr1
- %float2 = load float *%ptr2
+ %ptr1 = getelementptr inbounds [978 x float], [978 x float]* %region1, i64 0, i64 1
+ %ptr2 = getelementptr inbounds [978 x float], [978 x float]* %region2, i64 0, i64 1
+ %float1 = load float , float *%ptr1
+ %float2 = load float , float *%ptr2
%double1 = fpext float %float1 to double
%double2 = fpext float %float2 to double
store volatile double %double1, double *%dst
@@ -63,13 +63,13 @@ define void @f2(double *%dst) {
; CHECK-FP: br %r14
%region1 = alloca [978 x float], align 8
%region2 = alloca [978 x float], align 8
- %start1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 0
- %start2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 0
+ %start1 = getelementptr inbounds [978 x float], [978 x float]* %region1, i64 0, i64 0
+ %start2 = getelementptr inbounds [978 x float], [978 x float]* %region2, i64 0, i64 0
call void @foo(float *%start1, float *%start2)
- %ptr1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 2
- %ptr2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 2
- %float1 = load float *%ptr1
- %float2 = load float *%ptr2
+ %ptr1 = getelementptr inbounds [978 x float], [978 x float]* %region1, i64 0, i64 2
+ %ptr2 = getelementptr inbounds [978 x float], [978 x float]* %region2, i64 0, i64 2
+ %float1 = load float , float *%ptr1
+ %float2 = load float , float *%ptr2
%double1 = fpext float %float1 to double
%double2 = fpext float %float2 to double
store volatile double %double1, double *%dst
@@ -90,13 +90,13 @@ define void @f3(double *%dst) {
; CHECK-FP: br %r14
%region1 = alloca [978 x float], align 8
%region2 = alloca [978 x float], align 8
- %start1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 0
- %start2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 0
+ %start1 = getelementptr inbounds [978 x float], [978 x float]* %region1, i64 0, i64 0
+ %start2 = getelementptr inbounds [978 x float], [978 x float]* %region2, i64 0, i64 0
call void @foo(float *%start1, float *%start2)
- %ptr1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 3
- %ptr2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 3
- %float1 = load float *%ptr1
- %float2 = load float *%ptr2
+ %ptr1 = getelementptr inbounds [978 x float], [978 x float]* %region1, i64 0, i64 3
+ %ptr2 = getelementptr inbounds [978 x float], [978 x float]* %region2, i64 0, i64 3
+ %float1 = load float , float *%ptr1
+ %float2 = load float , float *%ptr2
%double1 = fpext float %float1 to double
%double2 = fpext float %float2 to double
store volatile double %double1, double *%dst
@@ -117,13 +117,13 @@ define void @f4(double *%dst) {
; CHECK-FP: br %r14
%region1 = alloca [2002 x float], align 8
%region2 = alloca [2002 x float], align 8
- %start1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 0
- %start2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 0
+ %start1 = getelementptr inbounds [2002 x float], [2002 x float]* %region1, i64 0, i64 0
+ %start2 = getelementptr inbounds [2002 x float], [2002 x float]* %region2, i64 0, i64 0
call void @foo(float *%start1, float *%start2)
- %ptr1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 1
- %ptr2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 1
- %float1 = load float *%ptr1
- %float2 = load float *%ptr2
+ %ptr1 = getelementptr inbounds [2002 x float], [2002 x float]* %region1, i64 0, i64 1
+ %ptr2 = getelementptr inbounds [2002 x float], [2002 x float]* %region2, i64 0, i64 1
+ %float1 = load float , float *%ptr1
+ %float2 = load float , float *%ptr2
%double1 = fpext float %float1 to double
%double2 = fpext float %float2 to double
store volatile double %double1, double *%dst
@@ -144,13 +144,13 @@ define void @f5(double *%dst) {
; CHECK-FP: br %r14
%region1 = alloca [2002 x float], align 8
%region2 = alloca [2002 x float], align 8
- %start1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 0
- %start2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 0
+ %start1 = getelementptr inbounds [2002 x float], [2002 x float]* %region1, i64 0, i64 0
+ %start2 = getelementptr inbounds [2002 x float], [2002 x float]* %region2, i64 0, i64 0
call void @foo(float *%start1, float *%start2)
- %ptr1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 2
- %ptr2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 2
- %float1 = load float *%ptr1
- %float2 = load float *%ptr2
+ %ptr1 = getelementptr inbounds [2002 x float], [2002 x float]* %region1, i64 0, i64 2
+ %ptr2 = getelementptr inbounds [2002 x float], [2002 x float]* %region2, i64 0, i64 2
+ %float1 = load float , float *%ptr1
+ %float2 = load float , float *%ptr2
%double1 = fpext float %float1 to double
%double2 = fpext float %float2 to double
store volatile double %double1, double *%dst
@@ -171,13 +171,13 @@ define void @f6(double *%dst) {
; CHECK-FP: br %r14
%region1 = alloca [2002 x float], align 8
%region2 = alloca [2002 x float], align 8
- %start1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 0
- %start2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 0
+ %start1 = getelementptr inbounds [2002 x float], [2002 x float]* %region1, i64 0, i64 0
+ %start2 = getelementptr inbounds [2002 x float], [2002 x float]* %region2, i64 0, i64 0
call void @foo(float *%start1, float *%start2)
- %ptr1 = getelementptr inbounds [2002 x float]* %region1, i64 0, i64 3
- %ptr2 = getelementptr inbounds [2002 x float]* %region2, i64 0, i64 3
- %float1 = load float *%ptr1
- %float2 = load float *%ptr2
+ %ptr1 = getelementptr inbounds [2002 x float], [2002 x float]* %region1, i64 0, i64 3
+ %ptr2 = getelementptr inbounds [2002 x float], [2002 x float]* %region2, i64 0, i64 3
+ %float1 = load float , float *%ptr1
+ %float2 = load float , float *%ptr2
%double1 = fpext float %float1 to double
%double2 = fpext float %float2 to double
store volatile double %double1, double *%dst
@@ -200,13 +200,13 @@ define void @f7(double *%dst) {
; CHECK-FP: br %r14
%region1 = alloca [2004 x float], align 8
%region2 = alloca [2004 x float], align 8
- %start1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 0
- %start2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 0
+ %start1 = getelementptr inbounds [2004 x float], [2004 x float]* %region1, i64 0, i64 0
+ %start2 = getelementptr inbounds [2004 x float], [2004 x float]* %region2, i64 0, i64 0
call void @foo(float *%start1, float *%start2)
- %ptr1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 1023
- %ptr2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 1023
- %float1 = load float *%ptr1
- %float2 = load float *%ptr2
+ %ptr1 = getelementptr inbounds [2004 x float], [2004 x float]* %region1, i64 0, i64 1023
+ %ptr2 = getelementptr inbounds [2004 x float], [2004 x float]* %region2, i64 0, i64 1023
+ %float1 = load float , float *%ptr1
+ %float2 = load float , float *%ptr2
%double1 = fpext float %float1 to double
%double2 = fpext float %float2 to double
store volatile double %double1, double *%dst
@@ -228,13 +228,13 @@ define void @f8(double *%dst) {
; CHECK-FP: br %r14
%region1 = alloca [2006 x float], align 8
%region2 = alloca [2006 x float], align 8
- %start1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 0
- %start2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 0
+ %start1 = getelementptr inbounds [2006 x float], [2006 x float]* %region1, i64 0, i64 0
+ %start2 = getelementptr inbounds [2006 x float], [2006 x float]* %region2, i64 0, i64 0
call void @foo(float *%start1, float *%start2)
- %ptr1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 1023
- %ptr2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 1023
- %float1 = load float *%ptr1
- %float2 = load float *%ptr2
+ %ptr1 = getelementptr inbounds [2006 x float], [2006 x float]* %region1, i64 0, i64 1023
+ %ptr2 = getelementptr inbounds [2006 x float], [2006 x float]* %region2, i64 0, i64 1023
+ %float1 = load float , float *%ptr1
+ %float2 = load float , float *%ptr2
%double1 = fpext float %float1 to double
%double2 = fpext float %float2 to double
store volatile double %double1, double *%dst
@@ -257,13 +257,13 @@ define void @f9(double *%dst) {
; CHECK-FP: br %r14
%region1 = alloca [2006 x float], align 8
%region2 = alloca [2006 x float], align 8
- %start1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 0
- %start2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 0
+ %start1 = getelementptr inbounds [2006 x float], [2006 x float]* %region1, i64 0, i64 0
+ %start2 = getelementptr inbounds [2006 x float], [2006 x float]* %region2, i64 0, i64 0
call void @foo(float *%start1, float *%start2)
- %ptr1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 1024
- %ptr2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 1024
- %float1 = load float *%ptr1
- %float2 = load float *%ptr2
+ %ptr1 = getelementptr inbounds [2006 x float], [2006 x float]* %region1, i64 0, i64 1024
+ %ptr2 = getelementptr inbounds [2006 x float], [2006 x float]* %region2, i64 0, i64 1024
+ %float1 = load float , float *%ptr1
+ %float2 = load float , float *%ptr2
%double1 = fpext float %float1 to double
%double2 = fpext float %float2 to double
store volatile double %double1, double *%dst
@@ -291,20 +291,20 @@ define void @f10(i32 *%vptr, double *%dst) {
; CHECK-FP: br %r14
%region1 = alloca [978 x float], align 8
%region2 = alloca [978 x float], align 8
- %start1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 0
- %start2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 0
+ %start1 = getelementptr inbounds [978 x float], [978 x float]* %region1, i64 0, i64 0
+ %start2 = getelementptr inbounds [978 x float], [978 x float]* %region2, i64 0, i64 0
call void @foo(float *%start1, float *%start2)
- %ptr1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 2
- %ptr2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 2
- %i0 = load volatile i32 *%vptr
- %i1 = load volatile i32 *%vptr
- %i2 = load volatile i32 *%vptr
- %i3 = load volatile i32 *%vptr
- %i4 = load volatile i32 *%vptr
- %i5 = load volatile i32 *%vptr
- %i14 = load volatile i32 *%vptr
- %float1 = load float *%ptr1
- %float2 = load float *%ptr2
+ %ptr1 = getelementptr inbounds [978 x float], [978 x float]* %region1, i64 0, i64 2
+ %ptr2 = getelementptr inbounds [978 x float], [978 x float]* %region2, i64 0, i64 2
+ %i0 = load volatile i32 , i32 *%vptr
+ %i1 = load volatile i32 , i32 *%vptr
+ %i2 = load volatile i32 , i32 *%vptr
+ %i3 = load volatile i32 , i32 *%vptr
+ %i4 = load volatile i32 , i32 *%vptr
+ %i5 = load volatile i32 , i32 *%vptr
+ %i14 = load volatile i32 , i32 *%vptr
+ %float1 = load float , float *%ptr1
+ %float2 = load float , float *%ptr2
%double1 = fpext float %float1 to double
%double2 = fpext float %float2 to double
store volatile double %double1, double *%dst
@@ -334,19 +334,19 @@ define void @f11(double *%dst, i64 %index) {
; CHECK-FP: br %r14
%region1 = alloca [978 x float], align 8
%region2 = alloca [978 x float], align 8
- %start1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 0
- %start2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 0
+ %start1 = getelementptr inbounds [978 x float], [978 x float]* %region1, i64 0, i64 0
+ %start2 = getelementptr inbounds [978 x float], [978 x float]* %region2, i64 0, i64 0
call void @foo(float *%start1, float *%start2)
- %elem1 = getelementptr inbounds [978 x float]* %region1, i64 0, i64 2
- %elem2 = getelementptr inbounds [978 x float]* %region2, i64 0, i64 2
+ %elem1 = getelementptr inbounds [978 x float], [978 x float]* %region1, i64 0, i64 2
+ %elem2 = getelementptr inbounds [978 x float], [978 x float]* %region2, i64 0, i64 2
%base1 = ptrtoint float *%elem1 to i64
%base2 = ptrtoint float *%elem2 to i64
%addr1 = add i64 %base1, %index
%addr2 = add i64 %base2, %index
%ptr1 = inttoptr i64 %addr1 to float *
%ptr2 = inttoptr i64 %addr2 to float *
- %float1 = load float *%ptr1
- %float2 = load float *%ptr2
+ %float1 = load float , float *%ptr1
+ %float2 = load float , float *%ptr2
%double1 = fpext float %float1 to double
%double2 = fpext float %float2 to double
store volatile double %double1, double *%dst
diff --git a/test/CodeGen/SystemZ/frame-16.ll b/test/CodeGen/SystemZ/frame-16.ll
index f7e2dfa35149..75da04447b3a 100644
--- a/test/CodeGen/SystemZ/frame-16.ll
+++ b/test/CodeGen/SystemZ/frame-16.ll
@@ -33,8 +33,8 @@ define void @f1(i8 %byte) {
; CHECK-FP: br %r14
%region1 = alloca [3912 x i8], align 8
%region2 = alloca [3912 x i8], align 8
- %ptr1 = getelementptr inbounds [3912 x i8]* %region1, i64 0, i64 7
- %ptr2 = getelementptr inbounds [3912 x i8]* %region2, i64 0, i64 7
+ %ptr1 = getelementptr inbounds [3912 x i8], [3912 x i8]* %region1, i64 0, i64 7
+ %ptr2 = getelementptr inbounds [3912 x i8], [3912 x i8]* %region2, i64 0, i64 7
store volatile i8 %byte, i8 *%ptr1
store volatile i8 %byte, i8 *%ptr2
ret void
@@ -51,8 +51,8 @@ define void @f2(i8 %byte) {
; CHECK-FP: br %r14
%region1 = alloca [3912 x i8], align 8
%region2 = alloca [3912 x i8], align 8
- %ptr1 = getelementptr inbounds [3912 x i8]* %region1, i64 0, i64 8
- %ptr2 = getelementptr inbounds [3912 x i8]* %region2, i64 0, i64 8
+ %ptr1 = getelementptr inbounds [3912 x i8], [3912 x i8]* %region1, i64 0, i64 8
+ %ptr2 = getelementptr inbounds [3912 x i8], [3912 x i8]* %region2, i64 0, i64 8
store volatile i8 %byte, i8 *%ptr1
store volatile i8 %byte, i8 *%ptr2
ret void
@@ -72,8 +72,8 @@ define void @f3(i8 %byte) {
; CHECK-FP: br %r14
%region1 = alloca [524104 x i8], align 8
%region2 = alloca [524104 x i8], align 8
- %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 7
- %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 7
+ %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 7
+ %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 7
store volatile i8 %byte, i8 *%ptr1
store volatile i8 %byte, i8 *%ptr2
ret void
@@ -94,8 +94,8 @@ define void @f4(i8 %byte) {
; CHECK-FP: br %r14
%region1 = alloca [524104 x i8], align 8
%region2 = alloca [524104 x i8], align 8
- %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8
- %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8
+ %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 8
+ %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 8
store volatile i8 %byte, i8 *%ptr1
store volatile i8 %byte, i8 *%ptr2
ret void
@@ -115,8 +115,8 @@ define void @f5(i8 %byte) {
; CHECK-FP: br %r14
%region1 = alloca [524104 x i8], align 8
%region2 = alloca [524104 x i8], align 8
- %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 4103
- %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 4103
+ %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 4103
+ %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 4103
store volatile i8 %byte, i8 *%ptr1
store volatile i8 %byte, i8 *%ptr2
ret void
@@ -135,8 +135,8 @@ define void @f6(i8 %byte) {
; CHECK-FP: br %r14
%region1 = alloca [524104 x i8], align 8
%region2 = alloca [524104 x i8], align 8
- %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 4104
- %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 4104
+ %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 4104
+ %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 4104
store volatile i8 %byte, i8 *%ptr1
store volatile i8 %byte, i8 *%ptr2
ret void
@@ -158,8 +158,8 @@ define void @f7(i8 %byte) {
; CHECK-FP: br %r14
%region1 = alloca [1048400 x i8], align 8
%region2 = alloca [1048400 x i8], align 8
- %ptr1 = getelementptr inbounds [1048400 x i8]* %region1, i64 0, i64 524287
- %ptr2 = getelementptr inbounds [1048400 x i8]* %region2, i64 0, i64 524287
+ %ptr1 = getelementptr inbounds [1048400 x i8], [1048400 x i8]* %region1, i64 0, i64 524287
+ %ptr2 = getelementptr inbounds [1048400 x i8], [1048400 x i8]* %region2, i64 0, i64 524287
store volatile i8 %byte, i8 *%ptr1
store volatile i8 %byte, i8 *%ptr2
ret void
@@ -179,8 +179,8 @@ define void @f8(i8 %byte) {
; CHECK-FP: br %r14
%region1 = alloca [1048408 x i8], align 8
%region2 = alloca [1048408 x i8], align 8
- %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524287
- %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524287
+ %ptr1 = getelementptr inbounds [1048408 x i8], [1048408 x i8]* %region1, i64 0, i64 524287
+ %ptr2 = getelementptr inbounds [1048408 x i8], [1048408 x i8]* %region2, i64 0, i64 524287
store volatile i8 %byte, i8 *%ptr1
store volatile i8 %byte, i8 *%ptr2
ret void
@@ -209,8 +209,8 @@ define void @f9(i8 %byte) {
; CHECK-FP: br %r14
%region1 = alloca [1048408 x i8], align 8
%region2 = alloca [1048408 x i8], align 8
- %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524288
- %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524288
+ %ptr1 = getelementptr inbounds [1048408 x i8], [1048408 x i8]* %region1, i64 0, i64 524288
+ %ptr2 = getelementptr inbounds [1048408 x i8], [1048408 x i8]* %region2, i64 0, i64 524288
store volatile i8 %byte, i8 *%ptr1
store volatile i8 %byte, i8 *%ptr2
ret void
@@ -233,14 +233,14 @@ define void @f10(i32 *%vptr, i8 %byte) {
; CHECK-FP: stc %r3, 0([[REGISTER]],%r11)
; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
; CHECK-FP: br %r14
- %i0 = load volatile i32 *%vptr
- %i1 = load volatile i32 *%vptr
- %i4 = load volatile i32 *%vptr
- %i5 = load volatile i32 *%vptr
+ %i0 = load volatile i32 , i32 *%vptr
+ %i1 = load volatile i32 , i32 *%vptr
+ %i4 = load volatile i32 , i32 *%vptr
+ %i5 = load volatile i32 , i32 *%vptr
%region1 = alloca [524104 x i8], align 8
%region2 = alloca [524104 x i8], align 8
- %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8
- %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8
+ %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 8
+ %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 8
store volatile i8 %byte, i8 *%ptr1
store volatile i8 %byte, i8 *%ptr2
store volatile i32 %i0, i32 *%vptr
@@ -272,23 +272,23 @@ define void @f11(i32 *%vptr, i8 %byte) {
; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
; CHECK-FP: lmg %r6, %r15,
; CHECK-FP: br %r14
- %i0 = load volatile i32 *%vptr
- %i1 = load volatile i32 *%vptr
- %i4 = load volatile i32 *%vptr
- %i5 = load volatile i32 *%vptr
- %i6 = load volatile i32 *%vptr
- %i7 = load volatile i32 *%vptr
- %i8 = load volatile i32 *%vptr
- %i9 = load volatile i32 *%vptr
- %i10 = load volatile i32 *%vptr
- %i11 = load volatile i32 *%vptr
- %i12 = load volatile i32 *%vptr
- %i13 = load volatile i32 *%vptr
- %i14 = load volatile i32 *%vptr
+ %i0 = load volatile i32 , i32 *%vptr
+ %i1 = load volatile i32 , i32 *%vptr
+ %i4 = load volatile i32 , i32 *%vptr
+ %i5 = load volatile i32 , i32 *%vptr
+ %i6 = load volatile i32 , i32 *%vptr
+ %i7 = load volatile i32 , i32 *%vptr
+ %i8 = load volatile i32 , i32 *%vptr
+ %i9 = load volatile i32 , i32 *%vptr
+ %i10 = load volatile i32 , i32 *%vptr
+ %i11 = load volatile i32 , i32 *%vptr
+ %i12 = load volatile i32 , i32 *%vptr
+ %i13 = load volatile i32 , i32 *%vptr
+ %i14 = load volatile i32 , i32 *%vptr
%region1 = alloca [524104 x i8], align 8
%region2 = alloca [524104 x i8], align 8
- %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 8
- %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 8
+ %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 8
+ %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 8
store volatile i8 %byte, i8 *%ptr1
store volatile i8 %byte, i8 *%ptr2
store volatile i32 %i0, i32 *%vptr
@@ -323,8 +323,8 @@ define void @f12(i8 %byte, i64 %index) {
%region1 = alloca [524104 x i8], align 8
%region2 = alloca [524104 x i8], align 8
%index1 = add i64 %index, 8
- %ptr1 = getelementptr inbounds [524104 x i8]* %region1, i64 0, i64 %index1
- %ptr2 = getelementptr inbounds [524104 x i8]* %region2, i64 0, i64 %index1
+ %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 %index1
+ %ptr2 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region2, i64 0, i64 %index1
store volatile i8 %byte, i8 *%ptr1
store volatile i8 %byte, i8 *%ptr2
ret void
diff --git a/test/CodeGen/SystemZ/frame-17.ll b/test/CodeGen/SystemZ/frame-17.ll
index 97cf83dfd78e..502e541bafc1 100644
--- a/test/CodeGen/SystemZ/frame-17.ll
+++ b/test/CodeGen/SystemZ/frame-17.ll
@@ -1,6 +1,6 @@
; Test spilling of FPRs.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
; We need to save and restore 8 of the 16 FPRs and allocate an additional
; 4-byte spill slot, rounded to 8 bytes. The frame size should be exactly
@@ -31,23 +31,23 @@ define void @f1(float *%ptr) {
; CHECK: ld %f15, 168(%r15)
; CHECK: aghi %r15, 232
; CHECK: br %r14
- %l0 = load volatile float *%ptr
- %l1 = load volatile float *%ptr
- %l2 = load volatile float *%ptr
- %l3 = load volatile float *%ptr
- %l4 = load volatile float *%ptr
- %l5 = load volatile float *%ptr
- %l6 = load volatile float *%ptr
- %l7 = load volatile float *%ptr
- %l8 = load volatile float *%ptr
- %l9 = load volatile float *%ptr
- %l10 = load volatile float *%ptr
- %l11 = load volatile float *%ptr
- %l12 = load volatile float *%ptr
- %l13 = load volatile float *%ptr
- %l14 = load volatile float *%ptr
- %l15 = load volatile float *%ptr
- %lx = load volatile float *%ptr
+ %l0 = load volatile float , float *%ptr
+ %l1 = load volatile float , float *%ptr
+ %l2 = load volatile float , float *%ptr
+ %l3 = load volatile float , float *%ptr
+ %l4 = load volatile float , float *%ptr
+ %l5 = load volatile float , float *%ptr
+ %l6 = load volatile float , float *%ptr
+ %l7 = load volatile float , float *%ptr
+ %l8 = load volatile float , float *%ptr
+ %l9 = load volatile float , float *%ptr
+ %l10 = load volatile float , float *%ptr
+ %l11 = load volatile float , float *%ptr
+ %l12 = load volatile float , float *%ptr
+ %l13 = load volatile float , float *%ptr
+ %l14 = load volatile float , float *%ptr
+ %l15 = load volatile float , float *%ptr
+ %lx = load volatile float , float *%ptr
store volatile float %lx, float *%ptr
store volatile float %l15, float *%ptr
store volatile float %l14, float *%ptr
@@ -92,23 +92,23 @@ define void @f2(double *%ptr) {
; CHECK: ld %f15, 168(%r15)
; CHECK: aghi %r15, 232
; CHECK: br %r14
- %l0 = load volatile double *%ptr
- %l1 = load volatile double *%ptr
- %l2 = load volatile double *%ptr
- %l3 = load volatile double *%ptr
- %l4 = load volatile double *%ptr
- %l5 = load volatile double *%ptr
- %l6 = load volatile double *%ptr
- %l7 = load volatile double *%ptr
- %l8 = load volatile double *%ptr
- %l9 = load volatile double *%ptr
- %l10 = load volatile double *%ptr
- %l11 = load volatile double *%ptr
- %l12 = load volatile double *%ptr
- %l13 = load volatile double *%ptr
- %l14 = load volatile double *%ptr
- %l15 = load volatile double *%ptr
- %lx = load volatile double *%ptr
+ %l0 = load volatile double , double *%ptr
+ %l1 = load volatile double , double *%ptr
+ %l2 = load volatile double , double *%ptr
+ %l3 = load volatile double , double *%ptr
+ %l4 = load volatile double , double *%ptr
+ %l5 = load volatile double , double *%ptr
+ %l6 = load volatile double , double *%ptr
+ %l7 = load volatile double , double *%ptr
+ %l8 = load volatile double , double *%ptr
+ %l9 = load volatile double , double *%ptr
+ %l10 = load volatile double , double *%ptr
+ %l11 = load volatile double , double *%ptr
+ %l12 = load volatile double , double *%ptr
+ %l13 = load volatile double , double *%ptr
+ %l14 = load volatile double , double *%ptr
+ %l15 = load volatile double , double *%ptr
+ %lx = load volatile double , double *%ptr
store volatile double %lx, double *%ptr
store volatile double %l15, double *%ptr
store volatile double %l14, double *%ptr
@@ -155,15 +155,15 @@ define void @f3(fp128 *%ptr) {
; CHECK: ld %f15, 176(%r15)
; CHECK: aghi %r15, 240
; CHECK: br %r14
- %l0 = load volatile fp128 *%ptr
- %l1 = load volatile fp128 *%ptr
- %l4 = load volatile fp128 *%ptr
- %l5 = load volatile fp128 *%ptr
- %l8 = load volatile fp128 *%ptr
- %l9 = load volatile fp128 *%ptr
- %l12 = load volatile fp128 *%ptr
- %l13 = load volatile fp128 *%ptr
- %lx = load volatile fp128 *%ptr
+ %l0 = load volatile fp128 , fp128 *%ptr
+ %l1 = load volatile fp128 , fp128 *%ptr
+ %l4 = load volatile fp128 , fp128 *%ptr
+ %l5 = load volatile fp128 , fp128 *%ptr
+ %l8 = load volatile fp128 , fp128 *%ptr
+ %l9 = load volatile fp128 , fp128 *%ptr
+ %l12 = load volatile fp128 , fp128 *%ptr
+ %l13 = load volatile fp128 , fp128 *%ptr
+ %lx = load volatile fp128 , fp128 *%ptr
store volatile fp128 %lx, fp128 *%ptr
store volatile fp128 %l13, fp128 *%ptr
store volatile fp128 %l12, fp128 *%ptr
diff --git a/test/CodeGen/SystemZ/frame-18.ll b/test/CodeGen/SystemZ/frame-18.ll
index 21dfc1238a13..0f58e437f9fd 100644
--- a/test/CodeGen/SystemZ/frame-18.ll
+++ b/test/CodeGen/SystemZ/frame-18.ll
@@ -16,21 +16,21 @@ define void @f1(i32 *%ptr) {
; CHECK-NOT: 160(%r15)
; CHECK: lmg %r6, %r15, 216(%r15)
; CHECK: br %r14
- %l0 = load volatile i32 *%ptr
- %l1 = load volatile i32 *%ptr
- %l3 = load volatile i32 *%ptr
- %l4 = load volatile i32 *%ptr
- %l5 = load volatile i32 *%ptr
- %l6 = load volatile i32 *%ptr
- %l7 = load volatile i32 *%ptr
- %l8 = load volatile i32 *%ptr
- %l9 = load volatile i32 *%ptr
- %l10 = load volatile i32 *%ptr
- %l11 = load volatile i32 *%ptr
- %l12 = load volatile i32 *%ptr
- %l13 = load volatile i32 *%ptr
- %l14 = load volatile i32 *%ptr
- %lx = load volatile i32 *%ptr
+ %l0 = load volatile i32 , i32 *%ptr
+ %l1 = load volatile i32 , i32 *%ptr
+ %l3 = load volatile i32 , i32 *%ptr
+ %l4 = load volatile i32 , i32 *%ptr
+ %l5 = load volatile i32 , i32 *%ptr
+ %l6 = load volatile i32 , i32 *%ptr
+ %l7 = load volatile i32 , i32 *%ptr
+ %l8 = load volatile i32 , i32 *%ptr
+ %l9 = load volatile i32 , i32 *%ptr
+ %l10 = load volatile i32 , i32 *%ptr
+ %l11 = load volatile i32 , i32 *%ptr
+ %l12 = load volatile i32 , i32 *%ptr
+ %l13 = load volatile i32 , i32 *%ptr
+ %l14 = load volatile i32 , i32 *%ptr
+ %lx = load volatile i32 , i32 *%ptr
store volatile i32 %lx, i32 *%ptr
store volatile i32 %l14, i32 *%ptr
store volatile i32 %l13, i32 *%ptr
@@ -58,21 +58,21 @@ define void @f2(i64 *%ptr) {
; CHECK: lg [[REGISTER]], 160(%r15)
; CHECK: lmg %r6, %r15, 216(%r15)
; CHECK: br %r14
- %l0 = load volatile i64 *%ptr
- %l1 = load volatile i64 *%ptr
- %l3 = load volatile i64 *%ptr
- %l4 = load volatile i64 *%ptr
- %l5 = load volatile i64 *%ptr
- %l6 = load volatile i64 *%ptr
- %l7 = load volatile i64 *%ptr
- %l8 = load volatile i64 *%ptr
- %l9 = load volatile i64 *%ptr
- %l10 = load volatile i64 *%ptr
- %l11 = load volatile i64 *%ptr
- %l12 = load volatile i64 *%ptr
- %l13 = load volatile i64 *%ptr
- %l14 = load volatile i64 *%ptr
- %lx = load volatile i64 *%ptr
+ %l0 = load volatile i64 , i64 *%ptr
+ %l1 = load volatile i64 , i64 *%ptr
+ %l3 = load volatile i64 , i64 *%ptr
+ %l4 = load volatile i64 , i64 *%ptr
+ %l5 = load volatile i64 , i64 *%ptr
+ %l6 = load volatile i64 , i64 *%ptr
+ %l7 = load volatile i64 , i64 *%ptr
+ %l8 = load volatile i64 , i64 *%ptr
+ %l9 = load volatile i64 , i64 *%ptr
+ %l10 = load volatile i64 , i64 *%ptr
+ %l11 = load volatile i64 , i64 *%ptr
+ %l12 = load volatile i64 , i64 *%ptr
+ %l13 = load volatile i64 , i64 *%ptr
+ %l14 = load volatile i64 , i64 *%ptr
+ %lx = load volatile i64 , i64 *%ptr
store volatile i64 %lx, i64 *%ptr
store volatile i64 %l14, i64 *%ptr
store volatile i64 %l13, i64 *%ptr
diff --git a/test/CodeGen/SystemZ/frame-19.ll b/test/CodeGen/SystemZ/frame-19.ll
new file mode 100644
index 000000000000..f6e327c3ae39
--- /dev/null
+++ b/test/CodeGen/SystemZ/frame-19.ll
@@ -0,0 +1,314 @@
+; Test spilling of vector registers.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; We need to allocate a 16-byte spill slot and save the 8 call-saved FPRs.
+; The frame size should be exactly 160 + 16 + 8 * 8 = 240.
+define void @f1(<16 x i8> *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: aghi %r15, -240
+; CHECK-DAG: std %f8,
+; CHECK-DAG: std %f9,
+; CHECK-DAG: std %f10,
+; CHECK-DAG: std %f11,
+; CHECK-DAG: std %f12,
+; CHECK-DAG: std %f13,
+; CHECK-DAG: std %f14,
+; CHECK-DAG: std %f15,
+; CHECK: vst {{%v[0-9]+}}, 160(%r15)
+; CHECK: vl {{%v[0-9]+}}, 160(%r15)
+; CHECK-DAG: ld %f8,
+; CHECK-DAG: ld %f9,
+; CHECK-DAG: ld %f10,
+; CHECK-DAG: ld %f11,
+; CHECK-DAG: ld %f12,
+; CHECK-DAG: ld %f13,
+; CHECK-DAG: ld %f14,
+; CHECK-DAG: ld %f15,
+; CHECK: aghi %r15, 240
+; CHECK: br %r14
+ %v0 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v1 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v2 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v3 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v4 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v5 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v6 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v7 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v8 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v9 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v10 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v11 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v12 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v13 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v14 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v15 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v16 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v17 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v18 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v19 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v20 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v21 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v22 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v23 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v24 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v25 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v26 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v27 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v28 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v29 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v30 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v31 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %vx = load volatile <16 x i8>, <16 x i8> *%ptr
+ store volatile <16 x i8> %vx, <16 x i8> *%ptr
+ store volatile <16 x i8> %v31, <16 x i8> *%ptr
+ store volatile <16 x i8> %v30, <16 x i8> *%ptr
+ store volatile <16 x i8> %v29, <16 x i8> *%ptr
+ store volatile <16 x i8> %v28, <16 x i8> *%ptr
+ store volatile <16 x i8> %v27, <16 x i8> *%ptr
+ store volatile <16 x i8> %v26, <16 x i8> *%ptr
+ store volatile <16 x i8> %v25, <16 x i8> *%ptr
+ store volatile <16 x i8> %v24, <16 x i8> *%ptr
+ store volatile <16 x i8> %v23, <16 x i8> *%ptr
+ store volatile <16 x i8> %v22, <16 x i8> *%ptr
+ store volatile <16 x i8> %v21, <16 x i8> *%ptr
+ store volatile <16 x i8> %v20, <16 x i8> *%ptr
+ store volatile <16 x i8> %v19, <16 x i8> *%ptr
+ store volatile <16 x i8> %v18, <16 x i8> *%ptr
+ store volatile <16 x i8> %v17, <16 x i8> *%ptr
+ store volatile <16 x i8> %v16, <16 x i8> *%ptr
+ store volatile <16 x i8> %v15, <16 x i8> *%ptr
+ store volatile <16 x i8> %v14, <16 x i8> *%ptr
+ store volatile <16 x i8> %v13, <16 x i8> *%ptr
+ store volatile <16 x i8> %v12, <16 x i8> *%ptr
+ store volatile <16 x i8> %v11, <16 x i8> *%ptr
+ store volatile <16 x i8> %v10, <16 x i8> *%ptr
+ store volatile <16 x i8> %v9, <16 x i8> *%ptr
+ store volatile <16 x i8> %v8, <16 x i8> *%ptr
+ store volatile <16 x i8> %v7, <16 x i8> *%ptr
+ store volatile <16 x i8> %v6, <16 x i8> *%ptr
+ store volatile <16 x i8> %v5, <16 x i8> *%ptr
+ store volatile <16 x i8> %v4, <16 x i8> *%ptr
+ store volatile <16 x i8> %v3, <16 x i8> *%ptr
+ store volatile <16 x i8> %v2, <16 x i8> *%ptr
+ store volatile <16 x i8> %v1, <16 x i8> *%ptr
+ store volatile <16 x i8> %v0, <16 x i8> *%ptr
+ ret void
+}
+
+; Like f1, but no 16-byte slot should be needed.
+define void @f2(<16 x i8> *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: aghi %r15, -224
+; CHECK-DAG: std %f8,
+; CHECK-DAG: std %f9,
+; CHECK-DAG: std %f10,
+; CHECK-DAG: std %f11,
+; CHECK-DAG: std %f12,
+; CHECK-DAG: std %f13,
+; CHECK-DAG: std %f14,
+; CHECK-DAG: std %f15,
+; CHECK-NOT: vst {{.*}}(%r15)
+; CHECK-NOT: vl {{.*}}(%r15)
+; CHECK-DAG: ld %f8,
+; CHECK-DAG: ld %f9,
+; CHECK-DAG: ld %f10,
+; CHECK-DAG: ld %f11,
+; CHECK-DAG: ld %f12,
+; CHECK-DAG: ld %f13,
+; CHECK-DAG: ld %f14,
+; CHECK-DAG: ld %f15,
+; CHECK: aghi %r15, 224
+; CHECK: br %r14
+ %v0 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v1 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v2 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v3 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v4 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v5 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v6 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v7 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v8 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v9 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v10 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v11 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v12 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v13 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v14 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v15 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v16 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v17 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v18 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v19 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v20 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v21 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v22 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v23 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v24 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v25 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v26 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v27 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v28 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v29 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v30 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v31 = load volatile <16 x i8>, <16 x i8> *%ptr
+ store volatile <16 x i8> %v31, <16 x i8> *%ptr
+ store volatile <16 x i8> %v30, <16 x i8> *%ptr
+ store volatile <16 x i8> %v29, <16 x i8> *%ptr
+ store volatile <16 x i8> %v28, <16 x i8> *%ptr
+ store volatile <16 x i8> %v27, <16 x i8> *%ptr
+ store volatile <16 x i8> %v26, <16 x i8> *%ptr
+ store volatile <16 x i8> %v25, <16 x i8> *%ptr
+ store volatile <16 x i8> %v24, <16 x i8> *%ptr
+ store volatile <16 x i8> %v23, <16 x i8> *%ptr
+ store volatile <16 x i8> %v22, <16 x i8> *%ptr
+ store volatile <16 x i8> %v21, <16 x i8> *%ptr
+ store volatile <16 x i8> %v20, <16 x i8> *%ptr
+ store volatile <16 x i8> %v19, <16 x i8> *%ptr
+ store volatile <16 x i8> %v18, <16 x i8> *%ptr
+ store volatile <16 x i8> %v17, <16 x i8> *%ptr
+ store volatile <16 x i8> %v16, <16 x i8> *%ptr
+ store volatile <16 x i8> %v15, <16 x i8> *%ptr
+ store volatile <16 x i8> %v14, <16 x i8> *%ptr
+ store volatile <16 x i8> %v13, <16 x i8> *%ptr
+ store volatile <16 x i8> %v12, <16 x i8> *%ptr
+ store volatile <16 x i8> %v11, <16 x i8> *%ptr
+ store volatile <16 x i8> %v10, <16 x i8> *%ptr
+ store volatile <16 x i8> %v9, <16 x i8> *%ptr
+ store volatile <16 x i8> %v8, <16 x i8> *%ptr
+ store volatile <16 x i8> %v7, <16 x i8> *%ptr
+ store volatile <16 x i8> %v6, <16 x i8> *%ptr
+ store volatile <16 x i8> %v5, <16 x i8> *%ptr
+ store volatile <16 x i8> %v4, <16 x i8> *%ptr
+ store volatile <16 x i8> %v3, <16 x i8> *%ptr
+ store volatile <16 x i8> %v2, <16 x i8> *%ptr
+ store volatile <16 x i8> %v1, <16 x i8> *%ptr
+ store volatile <16 x i8> %v0, <16 x i8> *%ptr
+ ret void
+}
+
+; Like f2, but only %f8 should be saved.
+define void @f3(<16 x i8> *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: aghi %r15, -168
+; CHECK-DAG: std %f8,
+; CHECK-NOT: vst {{.*}}(%r15)
+; CHECK-NOT: vl {{.*}}(%r15)
+; CHECK-NOT: %v9
+; CHECK-NOT: %v10
+; CHECK-NOT: %v11
+; CHECK-NOT: %v12
+; CHECK-NOT: %v13
+; CHECK-NOT: %v14
+; CHECK-NOT: %v15
+; CHECK-DAG: ld %f8,
+; CHECK: aghi %r15, 168
+; CHECK: br %r14
+ %v0 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v1 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v2 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v3 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v4 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v5 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v6 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v7 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v8 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v16 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v17 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v18 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v19 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v20 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v21 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v22 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v23 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v24 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v25 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v26 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v27 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v28 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v29 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v30 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v31 = load volatile <16 x i8>, <16 x i8> *%ptr
+ store volatile <16 x i8> %v31, <16 x i8> *%ptr
+ store volatile <16 x i8> %v30, <16 x i8> *%ptr
+ store volatile <16 x i8> %v29, <16 x i8> *%ptr
+ store volatile <16 x i8> %v28, <16 x i8> *%ptr
+ store volatile <16 x i8> %v27, <16 x i8> *%ptr
+ store volatile <16 x i8> %v26, <16 x i8> *%ptr
+ store volatile <16 x i8> %v25, <16 x i8> *%ptr
+ store volatile <16 x i8> %v24, <16 x i8> *%ptr
+ store volatile <16 x i8> %v23, <16 x i8> *%ptr
+ store volatile <16 x i8> %v22, <16 x i8> *%ptr
+ store volatile <16 x i8> %v21, <16 x i8> *%ptr
+ store volatile <16 x i8> %v20, <16 x i8> *%ptr
+ store volatile <16 x i8> %v19, <16 x i8> *%ptr
+ store volatile <16 x i8> %v18, <16 x i8> *%ptr
+ store volatile <16 x i8> %v17, <16 x i8> *%ptr
+ store volatile <16 x i8> %v16, <16 x i8> *%ptr
+ store volatile <16 x i8> %v8, <16 x i8> *%ptr
+ store volatile <16 x i8> %v7, <16 x i8> *%ptr
+ store volatile <16 x i8> %v6, <16 x i8> *%ptr
+ store volatile <16 x i8> %v5, <16 x i8> *%ptr
+ store volatile <16 x i8> %v4, <16 x i8> *%ptr
+ store volatile <16 x i8> %v3, <16 x i8> *%ptr
+ store volatile <16 x i8> %v2, <16 x i8> *%ptr
+ store volatile <16 x i8> %v1, <16 x i8> *%ptr
+ store volatile <16 x i8> %v0, <16 x i8> *%ptr
+ ret void
+}
+
+; Like f2, but no registers should be saved.
+define void @f4(<16 x i8> *%ptr) {
+; CHECK-LABEL: f4:
+; CHECK-NOT: %r15
+; CHECK: br %r14
+ %v0 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v1 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v2 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v3 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v4 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v5 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v6 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v7 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v16 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v17 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v18 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v19 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v20 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v21 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v22 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v23 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v24 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v25 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v26 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v27 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v28 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v29 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v30 = load volatile <16 x i8>, <16 x i8> *%ptr
+ %v31 = load volatile <16 x i8>, <16 x i8> *%ptr
+ store volatile <16 x i8> %v31, <16 x i8> *%ptr
+ store volatile <16 x i8> %v30, <16 x i8> *%ptr
+ store volatile <16 x i8> %v29, <16 x i8> *%ptr
+ store volatile <16 x i8> %v28, <16 x i8> *%ptr
+ store volatile <16 x i8> %v27, <16 x i8> *%ptr
+ store volatile <16 x i8> %v26, <16 x i8> *%ptr
+ store volatile <16 x i8> %v25, <16 x i8> *%ptr
+ store volatile <16 x i8> %v24, <16 x i8> *%ptr
+ store volatile <16 x i8> %v23, <16 x i8> *%ptr
+ store volatile <16 x i8> %v22, <16 x i8> *%ptr
+ store volatile <16 x i8> %v21, <16 x i8> *%ptr
+ store volatile <16 x i8> %v20, <16 x i8> *%ptr
+ store volatile <16 x i8> %v19, <16 x i8> *%ptr
+ store volatile <16 x i8> %v18, <16 x i8> *%ptr
+ store volatile <16 x i8> %v17, <16 x i8> *%ptr
+ store volatile <16 x i8> %v16, <16 x i8> *%ptr
+ store volatile <16 x i8> %v7, <16 x i8> *%ptr
+ store volatile <16 x i8> %v6, <16 x i8> *%ptr
+ store volatile <16 x i8> %v5, <16 x i8> *%ptr
+ store volatile <16 x i8> %v4, <16 x i8> *%ptr
+ store volatile <16 x i8> %v3, <16 x i8> *%ptr
+ store volatile <16 x i8> %v2, <16 x i8> *%ptr
+ store volatile <16 x i8> %v1, <16 x i8> *%ptr
+ store volatile <16 x i8> %v0, <16 x i8> *%ptr
+ ret void
+}
diff --git a/test/CodeGen/SystemZ/frame-20.ll b/test/CodeGen/SystemZ/frame-20.ll
new file mode 100644
index 000000000000..8d601c6f6d51
--- /dev/null
+++ b/test/CodeGen/SystemZ/frame-20.ll
@@ -0,0 +1,445 @@
+; Like frame-03.ll, but for z13. In this case we have 16 more registers
+; available.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; This function should require all FPRs, but no other spill slots.
+; We need to save and restore 8 of the 16 FPRs, so the frame size
+; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160
+; (the caller-allocated part of the frame) + 224.
+define void @f1(double *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: aghi %r15, -224
+; CHECK: .cfi_def_cfa_offset 384
+; CHECK: std %f8, 216(%r15)
+; CHECK: std %f9, 208(%r15)
+; CHECK: std %f10, 200(%r15)
+; CHECK: std %f11, 192(%r15)
+; CHECK: std %f12, 184(%r15)
+; CHECK: std %f13, 176(%r15)
+; CHECK: std %f14, 168(%r15)
+; CHECK: std %f15, 160(%r15)
+; CHECK: .cfi_offset %f8, -168
+; CHECK: .cfi_offset %f9, -176
+; CHECK: .cfi_offset %f10, -184
+; CHECK: .cfi_offset %f11, -192
+; CHECK: .cfi_offset %f12, -200
+; CHECK: .cfi_offset %f13, -208
+; CHECK: .cfi_offset %f14, -216
+; CHECK: .cfi_offset %f15, -224
+; CHECK-DAG: ld %f0, 0(%r2)
+; CHECK-DAG: ld %f7, 0(%r2)
+; CHECK-DAG: ld %f8, 0(%r2)
+; CHECK-DAG: ld %f15, 0(%r2)
+; CHECK-DAG: vlrepg %v16, 0(%r2)
+; CHECK-DAG: vlrepg %v23, 0(%r2)
+; CHECK-DAG: vlrepg %v24, 0(%r2)
+; CHECK-DAG: vlrepg %v31, 0(%r2)
+; CHECK: ld %f8, 216(%r15)
+; CHECK: ld %f9, 208(%r15)
+; CHECK: ld %f10, 200(%r15)
+; CHECK: ld %f11, 192(%r15)
+; CHECK: ld %f12, 184(%r15)
+; CHECK: ld %f13, 176(%r15)
+; CHECK: ld %f14, 168(%r15)
+; CHECK: ld %f15, 160(%r15)
+; CHECK: aghi %r15, 224
+; CHECK: br %r14
+ %l0 = load volatile double, double *%ptr
+ %l1 = load volatile double, double *%ptr
+ %l2 = load volatile double, double *%ptr
+ %l3 = load volatile double, double *%ptr
+ %l4 = load volatile double, double *%ptr
+ %l5 = load volatile double, double *%ptr
+ %l6 = load volatile double, double *%ptr
+ %l7 = load volatile double, double *%ptr
+ %l8 = load volatile double, double *%ptr
+ %l9 = load volatile double, double *%ptr
+ %l10 = load volatile double, double *%ptr
+ %l11 = load volatile double, double *%ptr
+ %l12 = load volatile double, double *%ptr
+ %l13 = load volatile double, double *%ptr
+ %l14 = load volatile double, double *%ptr
+ %l15 = load volatile double, double *%ptr
+ %l16 = load volatile double, double *%ptr
+ %l17 = load volatile double, double *%ptr
+ %l18 = load volatile double, double *%ptr
+ %l19 = load volatile double, double *%ptr
+ %l20 = load volatile double, double *%ptr
+ %l21 = load volatile double, double *%ptr
+ %l22 = load volatile double, double *%ptr
+ %l23 = load volatile double, double *%ptr
+ %l24 = load volatile double, double *%ptr
+ %l25 = load volatile double, double *%ptr
+ %l26 = load volatile double, double *%ptr
+ %l27 = load volatile double, double *%ptr
+ %l28 = load volatile double, double *%ptr
+ %l29 = load volatile double, double *%ptr
+ %l30 = load volatile double, double *%ptr
+ %l31 = load volatile double, double *%ptr
+ %acc0 = fsub double %l0, %l0
+ %acc1 = fsub double %l1, %acc0
+ %acc2 = fsub double %l2, %acc1
+ %acc3 = fsub double %l3, %acc2
+ %acc4 = fsub double %l4, %acc3
+ %acc5 = fsub double %l5, %acc4
+ %acc6 = fsub double %l6, %acc5
+ %acc7 = fsub double %l7, %acc6
+ %acc8 = fsub double %l8, %acc7
+ %acc9 = fsub double %l9, %acc8
+ %acc10 = fsub double %l10, %acc9
+ %acc11 = fsub double %l11, %acc10
+ %acc12 = fsub double %l12, %acc11
+ %acc13 = fsub double %l13, %acc12
+ %acc14 = fsub double %l14, %acc13
+ %acc15 = fsub double %l15, %acc14
+ %acc16 = fsub double %l16, %acc15
+ %acc17 = fsub double %l17, %acc16
+ %acc18 = fsub double %l18, %acc17
+ %acc19 = fsub double %l19, %acc18
+ %acc20 = fsub double %l20, %acc19
+ %acc21 = fsub double %l21, %acc20
+ %acc22 = fsub double %l22, %acc21
+ %acc23 = fsub double %l23, %acc22
+ %acc24 = fsub double %l24, %acc23
+ %acc25 = fsub double %l25, %acc24
+ %acc26 = fsub double %l26, %acc25
+ %acc27 = fsub double %l27, %acc26
+ %acc28 = fsub double %l28, %acc27
+ %acc29 = fsub double %l29, %acc28
+ %acc30 = fsub double %l30, %acc29
+ %acc31 = fsub double %l31, %acc30
+ store volatile double %acc0, double *%ptr
+ store volatile double %acc1, double *%ptr
+ store volatile double %acc2, double *%ptr
+ store volatile double %acc3, double *%ptr
+ store volatile double %acc4, double *%ptr
+ store volatile double %acc5, double *%ptr
+ store volatile double %acc6, double *%ptr
+ store volatile double %acc7, double *%ptr
+ store volatile double %acc8, double *%ptr
+ store volatile double %acc9, double *%ptr
+ store volatile double %acc10, double *%ptr
+ store volatile double %acc11, double *%ptr
+ store volatile double %acc12, double *%ptr
+ store volatile double %acc13, double *%ptr
+ store volatile double %acc14, double *%ptr
+ store volatile double %acc15, double *%ptr
+ store volatile double %acc16, double *%ptr
+ store volatile double %acc17, double *%ptr
+ store volatile double %acc18, double *%ptr
+ store volatile double %acc19, double *%ptr
+ store volatile double %acc20, double *%ptr
+ store volatile double %acc21, double *%ptr
+ store volatile double %acc22, double *%ptr
+ store volatile double %acc23, double *%ptr
+ store volatile double %acc24, double *%ptr
+ store volatile double %acc25, double *%ptr
+ store volatile double %acc26, double *%ptr
+ store volatile double %acc27, double *%ptr
+ store volatile double %acc28, double *%ptr
+ store volatile double %acc29, double *%ptr
+ store volatile double %acc30, double *%ptr
+ store volatile double %acc31, double *%ptr
+ ret void
+}
+
+; Like f1, but requires one fewer FPR. We allocate in numerical order,
+; so %f15 is the one that gets dropped.
+define void @f2(double *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: aghi %r15, -216
+; CHECK: .cfi_def_cfa_offset 376
+; CHECK: std %f8, 208(%r15)
+; CHECK: std %f9, 200(%r15)
+; CHECK: std %f10, 192(%r15)
+; CHECK: std %f11, 184(%r15)
+; CHECK: std %f12, 176(%r15)
+; CHECK: std %f13, 168(%r15)
+; CHECK: std %f14, 160(%r15)
+; CHECK: .cfi_offset %f8, -168
+; CHECK: .cfi_offset %f9, -176
+; CHECK: .cfi_offset %f10, -184
+; CHECK: .cfi_offset %f11, -192
+; CHECK: .cfi_offset %f12, -200
+; CHECK: .cfi_offset %f13, -208
+; CHECK: .cfi_offset %f14, -216
+; CHECK-NOT: %v15
+; CHECK-NOT: %f15
+; CHECK: ld %f8, 208(%r15)
+; CHECK: ld %f9, 200(%r15)
+; CHECK: ld %f10, 192(%r15)
+; CHECK: ld %f11, 184(%r15)
+; CHECK: ld %f12, 176(%r15)
+; CHECK: ld %f13, 168(%r15)
+; CHECK: ld %f14, 160(%r15)
+; CHECK: aghi %r15, 216
+; CHECK: br %r14
+ %l0 = load volatile double, double *%ptr
+ %l1 = load volatile double, double *%ptr
+ %l2 = load volatile double, double *%ptr
+ %l3 = load volatile double, double *%ptr
+ %l4 = load volatile double, double *%ptr
+ %l5 = load volatile double, double *%ptr
+ %l6 = load volatile double, double *%ptr
+ %l7 = load volatile double, double *%ptr
+ %l8 = load volatile double, double *%ptr
+ %l9 = load volatile double, double *%ptr
+ %l10 = load volatile double, double *%ptr
+ %l11 = load volatile double, double *%ptr
+ %l12 = load volatile double, double *%ptr
+ %l13 = load volatile double, double *%ptr
+ %l14 = load volatile double, double *%ptr
+ %l16 = load volatile double, double *%ptr
+ %l17 = load volatile double, double *%ptr
+ %l18 = load volatile double, double *%ptr
+ %l19 = load volatile double, double *%ptr
+ %l20 = load volatile double, double *%ptr
+ %l21 = load volatile double, double *%ptr
+ %l22 = load volatile double, double *%ptr
+ %l23 = load volatile double, double *%ptr
+ %l24 = load volatile double, double *%ptr
+ %l25 = load volatile double, double *%ptr
+ %l26 = load volatile double, double *%ptr
+ %l27 = load volatile double, double *%ptr
+ %l28 = load volatile double, double *%ptr
+ %l29 = load volatile double, double *%ptr
+ %l30 = load volatile double, double *%ptr
+ %l31 = load volatile double, double *%ptr
+ %acc0 = fsub double %l0, %l0
+ %acc1 = fsub double %l1, %acc0
+ %acc2 = fsub double %l2, %acc1
+ %acc3 = fsub double %l3, %acc2
+ %acc4 = fsub double %l4, %acc3
+ %acc5 = fsub double %l5, %acc4
+ %acc6 = fsub double %l6, %acc5
+ %acc7 = fsub double %l7, %acc6
+ %acc8 = fsub double %l8, %acc7
+ %acc9 = fsub double %l9, %acc8
+ %acc10 = fsub double %l10, %acc9
+ %acc11 = fsub double %l11, %acc10
+ %acc12 = fsub double %l12, %acc11
+ %acc13 = fsub double %l13, %acc12
+ %acc14 = fsub double %l14, %acc13
+ %acc16 = fsub double %l16, %acc14
+ %acc17 = fsub double %l17, %acc16
+ %acc18 = fsub double %l18, %acc17
+ %acc19 = fsub double %l19, %acc18
+ %acc20 = fsub double %l20, %acc19
+ %acc21 = fsub double %l21, %acc20
+ %acc22 = fsub double %l22, %acc21
+ %acc23 = fsub double %l23, %acc22
+ %acc24 = fsub double %l24, %acc23
+ %acc25 = fsub double %l25, %acc24
+ %acc26 = fsub double %l26, %acc25
+ %acc27 = fsub double %l27, %acc26
+ %acc28 = fsub double %l28, %acc27
+ %acc29 = fsub double %l29, %acc28
+ %acc30 = fsub double %l30, %acc29
+ %acc31 = fsub double %l31, %acc30
+ store volatile double %acc0, double *%ptr
+ store volatile double %acc1, double *%ptr
+ store volatile double %acc2, double *%ptr
+ store volatile double %acc3, double *%ptr
+ store volatile double %acc4, double *%ptr
+ store volatile double %acc5, double *%ptr
+ store volatile double %acc6, double *%ptr
+ store volatile double %acc7, double *%ptr
+ store volatile double %acc8, double *%ptr
+ store volatile double %acc9, double *%ptr
+ store volatile double %acc10, double *%ptr
+ store volatile double %acc11, double *%ptr
+ store volatile double %acc12, double *%ptr
+ store volatile double %acc13, double *%ptr
+ store volatile double %acc14, double *%ptr
+ store volatile double %acc16, double *%ptr
+ store volatile double %acc17, double *%ptr
+ store volatile double %acc18, double *%ptr
+ store volatile double %acc19, double *%ptr
+ store volatile double %acc20, double *%ptr
+ store volatile double %acc21, double *%ptr
+ store volatile double %acc22, double *%ptr
+ store volatile double %acc23, double *%ptr
+ store volatile double %acc24, double *%ptr
+ store volatile double %acc25, double *%ptr
+ store volatile double %acc26, double *%ptr
+ store volatile double %acc27, double *%ptr
+ store volatile double %acc28, double *%ptr
+ store volatile double %acc29, double *%ptr
+ store volatile double %acc30, double *%ptr
+ store volatile double %acc31, double *%ptr
+ ret void
+}
+
+; Like f1, but should require only one call-saved FPR.
+define void @f3(double *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: aghi %r15, -168
+; CHECK: .cfi_def_cfa_offset 328
+; CHECK: std %f8, 160(%r15)
+; CHECK: .cfi_offset %f8, -168
+; CHECK-NOT: {{%[fv]9}}
+; CHECK-NOT: {{%[fv]1[0-5]}}
+; CHECK: ld %f8, 160(%r15)
+; CHECK: aghi %r15, 168
+; CHECK: br %r14
+ %l0 = load volatile double, double *%ptr
+ %l1 = load volatile double, double *%ptr
+ %l2 = load volatile double, double *%ptr
+ %l3 = load volatile double, double *%ptr
+ %l4 = load volatile double, double *%ptr
+ %l5 = load volatile double, double *%ptr
+ %l6 = load volatile double, double *%ptr
+ %l7 = load volatile double, double *%ptr
+ %l8 = load volatile double, double *%ptr
+ %l16 = load volatile double, double *%ptr
+ %l17 = load volatile double, double *%ptr
+ %l18 = load volatile double, double *%ptr
+ %l19 = load volatile double, double *%ptr
+ %l20 = load volatile double, double *%ptr
+ %l21 = load volatile double, double *%ptr
+ %l22 = load volatile double, double *%ptr
+ %l23 = load volatile double, double *%ptr
+ %l24 = load volatile double, double *%ptr
+ %l25 = load volatile double, double *%ptr
+ %l26 = load volatile double, double *%ptr
+ %l27 = load volatile double, double *%ptr
+ %l28 = load volatile double, double *%ptr
+ %l29 = load volatile double, double *%ptr
+ %l30 = load volatile double, double *%ptr
+ %l31 = load volatile double, double *%ptr
+ %acc0 = fsub double %l0, %l0
+ %acc1 = fsub double %l1, %acc0
+ %acc2 = fsub double %l2, %acc1
+ %acc3 = fsub double %l3, %acc2
+ %acc4 = fsub double %l4, %acc3
+ %acc5 = fsub double %l5, %acc4
+ %acc6 = fsub double %l6, %acc5
+ %acc7 = fsub double %l7, %acc6
+ %acc8 = fsub double %l8, %acc7
+ %acc16 = fsub double %l16, %acc8
+ %acc17 = fsub double %l17, %acc16
+ %acc18 = fsub double %l18, %acc17
+ %acc19 = fsub double %l19, %acc18
+ %acc20 = fsub double %l20, %acc19
+ %acc21 = fsub double %l21, %acc20
+ %acc22 = fsub double %l22, %acc21
+ %acc23 = fsub double %l23, %acc22
+ %acc24 = fsub double %l24, %acc23
+ %acc25 = fsub double %l25, %acc24
+ %acc26 = fsub double %l26, %acc25
+ %acc27 = fsub double %l27, %acc26
+ %acc28 = fsub double %l28, %acc27
+ %acc29 = fsub double %l29, %acc28
+ %acc30 = fsub double %l30, %acc29
+ %acc31 = fsub double %l31, %acc30
+ store volatile double %acc0, double *%ptr
+ store volatile double %acc1, double *%ptr
+ store volatile double %acc2, double *%ptr
+ store volatile double %acc3, double *%ptr
+ store volatile double %acc4, double *%ptr
+ store volatile double %acc5, double *%ptr
+ store volatile double %acc6, double *%ptr
+ store volatile double %acc7, double *%ptr
+ store volatile double %acc8, double *%ptr
+ store volatile double %acc16, double *%ptr
+ store volatile double %acc17, double *%ptr
+ store volatile double %acc18, double *%ptr
+ store volatile double %acc19, double *%ptr
+ store volatile double %acc20, double *%ptr
+ store volatile double %acc21, double *%ptr
+ store volatile double %acc22, double *%ptr
+ store volatile double %acc23, double *%ptr
+ store volatile double %acc24, double *%ptr
+ store volatile double %acc25, double *%ptr
+ store volatile double %acc26, double *%ptr
+ store volatile double %acc27, double *%ptr
+ store volatile double %acc28, double *%ptr
+ store volatile double %acc29, double *%ptr
+ store volatile double %acc30, double *%ptr
+ store volatile double %acc31, double *%ptr
+ ret void
+}
+
+; This function should use all call-clobbered FPRs and vector registers
+; but no call-saved ones. It shouldn't need to create a frame.
+define void @f4(double *%ptr) {
+; CHECK-LABEL: f4:
+; CHECK-NOT: %r15
+; CHECK-NOT: {{%[fv][89]}}
+; CHECK-NOT: {{%[fv]1[0-5]}}
+; CHECK: br %r14
+ %l0 = load volatile double, double *%ptr
+ %l1 = load volatile double, double *%ptr
+ %l2 = load volatile double, double *%ptr
+ %l3 = load volatile double, double *%ptr
+ %l4 = load volatile double, double *%ptr
+ %l5 = load volatile double, double *%ptr
+ %l6 = load volatile double, double *%ptr
+ %l7 = load volatile double, double *%ptr
+ %l16 = load volatile double, double *%ptr
+ %l17 = load volatile double, double *%ptr
+ %l18 = load volatile double, double *%ptr
+ %l19 = load volatile double, double *%ptr
+ %l20 = load volatile double, double *%ptr
+ %l21 = load volatile double, double *%ptr
+ %l22 = load volatile double, double *%ptr
+ %l23 = load volatile double, double *%ptr
+ %l24 = load volatile double, double *%ptr
+ %l25 = load volatile double, double *%ptr
+ %l26 = load volatile double, double *%ptr
+ %l27 = load volatile double, double *%ptr
+ %l28 = load volatile double, double *%ptr
+ %l29 = load volatile double, double *%ptr
+ %l30 = load volatile double, double *%ptr
+ %l31 = load volatile double, double *%ptr
+ %acc0 = fsub double %l0, %l0
+ %acc1 = fsub double %l1, %acc0
+ %acc2 = fsub double %l2, %acc1
+ %acc3 = fsub double %l3, %acc2
+ %acc4 = fsub double %l4, %acc3
+ %acc5 = fsub double %l5, %acc4
+ %acc6 = fsub double %l6, %acc5
+ %acc7 = fsub double %l7, %acc6
+ %acc16 = fsub double %l16, %acc7
+ %acc17 = fsub double %l17, %acc16
+ %acc18 = fsub double %l18, %acc17
+ %acc19 = fsub double %l19, %acc18
+ %acc20 = fsub double %l20, %acc19
+ %acc21 = fsub double %l21, %acc20
+ %acc22 = fsub double %l22, %acc21
+ %acc23 = fsub double %l23, %acc22
+ %acc24 = fsub double %l24, %acc23
+ %acc25 = fsub double %l25, %acc24
+ %acc26 = fsub double %l26, %acc25
+ %acc27 = fsub double %l27, %acc26
+ %acc28 = fsub double %l28, %acc27
+ %acc29 = fsub double %l29, %acc28
+ %acc30 = fsub double %l30, %acc29
+ %acc31 = fsub double %l31, %acc30
+ store volatile double %acc0, double *%ptr
+ store volatile double %acc1, double *%ptr
+ store volatile double %acc2, double *%ptr
+ store volatile double %acc3, double *%ptr
+ store volatile double %acc4, double *%ptr
+ store volatile double %acc5, double *%ptr
+ store volatile double %acc6, double *%ptr
+ store volatile double %acc7, double *%ptr
+ store volatile double %acc16, double *%ptr
+ store volatile double %acc17, double *%ptr
+ store volatile double %acc18, double *%ptr
+ store volatile double %acc19, double *%ptr
+ store volatile double %acc20, double *%ptr
+ store volatile double %acc21, double *%ptr
+ store volatile double %acc22, double *%ptr
+ store volatile double %acc23, double *%ptr
+ store volatile double %acc24, double *%ptr
+ store volatile double %acc25, double *%ptr
+ store volatile double %acc26, double *%ptr
+ store volatile double %acc27, double *%ptr
+ store volatile double %acc28, double *%ptr
+ store volatile double %acc29, double *%ptr
+ store volatile double %acc30, double *%ptr
+ store volatile double %acc31, double *%ptr
+ ret void
+}
diff --git a/test/CodeGen/SystemZ/htm-intrinsics.ll b/test/CodeGen/SystemZ/htm-intrinsics.ll
new file mode 100644
index 000000000000..6441ef94b406
--- /dev/null
+++ b/test/CodeGen/SystemZ/htm-intrinsics.ll
@@ -0,0 +1,352 @@
+; Test transactional-execution intrinsics.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | FileCheck %s
+
+declare i32 @llvm.s390.tbegin(i8 *, i32)
+declare i32 @llvm.s390.tbegin.nofloat(i8 *, i32)
+declare void @llvm.s390.tbeginc(i8 *, i32)
+declare i32 @llvm.s390.tend()
+declare void @llvm.s390.tabort(i64)
+declare void @llvm.s390.ntstg(i64, i64 *)
+declare i32 @llvm.s390.etnd()
+declare void @llvm.s390.ppa.txassist(i32)
+
+; TBEGIN.
+define void @test_tbegin() {
+; CHECK-LABEL: test_tbegin:
+; CHECK-NOT: stmg
+; CHECK: std %f8,
+; CHECK: std %f9,
+; CHECK: std %f10,
+; CHECK: std %f11,
+; CHECK: std %f12,
+; CHECK: std %f13,
+; CHECK: std %f14,
+; CHECK: std %f15,
+; CHECK: tbegin 0, 65292
+; CHECK: ld %f8,
+; CHECK: ld %f9,
+; CHECK: ld %f10,
+; CHECK: ld %f11,
+; CHECK: ld %f12,
+; CHECK: ld %f13,
+; CHECK: ld %f14,
+; CHECK: ld %f15,
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin(i8 *null, i32 65292)
+ ret void
+}
+
+; TBEGIN (nofloat).
+define void @test_tbegin_nofloat1() {
+; CHECK-LABEL: test_tbegin_nofloat1:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65292
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292)
+ ret void
+}
+
+; TBEGIN (nofloat) with integer CC return value.
+define i32 @test_tbegin_nofloat2() {
+; CHECK-LABEL: test_tbegin_nofloat2:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65292
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292)
+ ret i32 %res
+}
+
+; TBEGIN (nofloat) with implicit CC check.
+define void @test_tbegin_nofloat3(i32 *%ptr) {
+; CHECK-LABEL: test_tbegin_nofloat3:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65292
+; CHECK: jnh {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292)
+ %cmp = icmp eq i32 %res, 2
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ store i32 0, i32* %ptr, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+; TBEGIN (nofloat) with dual CC use.
+define i32 @test_tbegin_nofloat4(i32 %pad, i32 *%ptr) {
+; CHECK-LABEL: test_tbegin_nofloat4:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65292
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: cijlh %r2, 2, {{\.L*}}
+; CHECK: mvhi 0(%r3), 0
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292)
+ %cmp = icmp eq i32 %res, 2
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ store i32 0, i32* %ptr, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret i32 %res
+}
+
+; TBEGIN (nofloat) with register.
+define void @test_tbegin_nofloat5(i8 *%ptr) {
+; CHECK-LABEL: test_tbegin_nofloat5:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0(%r2), 65292
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin.nofloat(i8 *%ptr, i32 65292)
+ ret void
+}
+
+; TBEGIN (nofloat) with GRSM 0x0f00.
+define void @test_tbegin_nofloat6() {
+; CHECK-LABEL: test_tbegin_nofloat6:
+; CHECK: stmg %r6, %r15,
+; CHECK-NOT: std
+; CHECK: tbegin 0, 3840
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 3840)
+ ret void
+}
+
+; TBEGIN (nofloat) with GRSM 0xf100.
+define void @test_tbegin_nofloat7() {
+; CHECK-LABEL: test_tbegin_nofloat7:
+; CHECK: stmg %r8, %r15,
+; CHECK-NOT: std
+; CHECK: tbegin 0, 61696
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 61696)
+ ret void
+}
+
+; TBEGIN (nofloat) with GRSM 0xfe00 -- stack pointer added automatically.
+define void @test_tbegin_nofloat8() {
+; CHECK-LABEL: test_tbegin_nofloat8:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65280
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65024)
+ ret void
+}
+
+; TBEGIN (nofloat) with GRSM 0xfb00 -- no frame pointer needed.
+define void @test_tbegin_nofloat9() {
+; CHECK-LABEL: test_tbegin_nofloat9:
+; CHECK: stmg %r10, %r15,
+; CHECK-NOT: std
+; CHECK: tbegin 0, 64256
+; CHECK: br %r14
+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 64256)
+ ret void
+}
+
+; TBEGIN (nofloat) with GRSM 0xfb00 -- frame pointer added automatically.
+define void @test_tbegin_nofloat10(i64 %n) {
+; CHECK-LABEL: test_tbegin_nofloat10:
+; CHECK: stmg %r11, %r15,
+; CHECK-NOT: std
+; CHECK: tbegin 0, 65280
+; CHECK: br %r14
+ %buf = alloca i8, i64 %n
+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 64256)
+ ret void
+}
+
+; TBEGINC.
+define void @test_tbeginc() {
+; CHECK-LABEL: test_tbeginc:
+; CHECK-NOT: stmg
+; CHECK-NOT: std
+; CHECK: tbeginc 0, 65288
+; CHECK: br %r14
+ call void @llvm.s390.tbeginc(i8 *null, i32 65288)
+ ret void
+}
+
+; TEND with integer CC return value.
+define i32 @test_tend1() {
+; CHECK-LABEL: test_tend1:
+; CHECK: tend
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tend()
+ ret i32 %res
+}
+
+; TEND with implicit CC check.
+define void @test_tend3(i32 *%ptr) {
+; CHECK-LABEL: test_tend3:
+; CHECK: tend
+; CHECK: je {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tend()
+ %cmp = icmp eq i32 %res, 2
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ store i32 0, i32* %ptr, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+; TEND with dual CC use.
+define i32 @test_tend2(i32 %pad, i32 *%ptr) {
+; CHECK-LABEL: test_tend2:
+; CHECK: tend
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: cijlh %r2, 2, {{\.L*}}
+; CHECK: mvhi 0(%r3), 0
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.tend()
+ %cmp = icmp eq i32 %res, 2
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ store i32 0, i32* %ptr, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret i32 %res
+}
+
+; TABORT with register only.
+define void @test_tabort1(i64 %val) {
+; CHECK-LABEL: test_tabort1:
+; CHECK: tabort 0(%r2)
+; CHECK: br %r14
+ call void @llvm.s390.tabort(i64 %val)
+ ret void
+}
+
+; TABORT with immediate only.
+define void @test_tabort2(i64 %val) {
+; CHECK-LABEL: test_tabort2:
+; CHECK: tabort 1234
+; CHECK: br %r14
+ call void @llvm.s390.tabort(i64 1234)
+ ret void
+}
+
+; TABORT with register + immediate.
+define void @test_tabort3(i64 %val) {
+; CHECK-LABEL: test_tabort3:
+; CHECK: tabort 1234(%r2)
+; CHECK: br %r14
+ %sum = add i64 %val, 1234
+ call void @llvm.s390.tabort(i64 %sum)
+ ret void
+}
+
+; TABORT with out-of-range immediate.
+define void @test_tabort4(i64 %val) {
+; CHECK-LABEL: test_tabort4:
+; CHECK: tabort 0({{%r[1-5]}})
+; CHECK: br %r14
+ call void @llvm.s390.tabort(i64 4096)
+ ret void
+}
+
+; NTSTG with base pointer only.
+define void @test_ntstg1(i64 *%ptr, i64 %val) {
+; CHECK-LABEL: test_ntstg1:
+; CHECK: ntstg %r3, 0(%r2)
+; CHECK: br %r14
+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+ ret void
+}
+
+; NTSTG with base and index.
+; Check that VSTL doesn't allow an index.
+define void @test_ntstg2(i64 *%base, i64 %index, i64 %val) {
+; CHECK-LABEL: test_ntstg2:
+; CHECK: sllg [[REG:%r[1-5]]], %r3, 3
+; CHECK: ntstg %r4, 0([[REG]],%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i64 %index
+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+ ret void
+}
+
+; NTSTG with the highest in-range displacement.
+define void @test_ntstg3(i64 *%base, i64 %val) {
+; CHECK-LABEL: test_ntstg3:
+; CHECK: ntstg %r3, 524280(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i64 65535
+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+ ret void
+}
+
+; NTSTG with an out-of-range positive displacement.
+define void @test_ntstg4(i64 *%base, i64 %val) {
+; CHECK-LABEL: test_ntstg4:
+; CHECK: ntstg %r3, 0({{%r[1-5]}})
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i64 65536
+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+ ret void
+}
+
+; NTSTG with the lowest in-range displacement.
+define void @test_ntstg5(i64 *%base, i64 %val) {
+; CHECK-LABEL: test_ntstg5:
+; CHECK: ntstg %r3, -524288(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i64 -65536
+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+ ret void
+}
+
+; NTSTG with an out-of-range negative displacement.
+define void @test_ntstg6(i64 *%base, i64 %val) {
+; CHECK-LABEL: test_ntstg6:
+; CHECK: ntstg %r3, 0({{%r[1-5]}})
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i64 -65537
+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
+ ret void
+}
+
+; ETND.
+define i32 @test_etnd() {
+; CHECK-LABEL: test_etnd:
+; CHECK: etnd %r2
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.etnd()
+ ret i32 %res
+}
+
+; PPA (Transaction-Abort Assist)
+define void @test_ppa_txassist(i32 %val) {
+; CHECK-LABEL: test_ppa_txassist:
+; CHECK: ppa %r2, 0, 1
+; CHECK: br %r14
+ call void @llvm.s390.ppa.txassist(i32 %val)
+ ret void
+}
+
diff --git a/test/CodeGen/SystemZ/insert-01.ll b/test/CodeGen/SystemZ/insert-01.ll
index 0b54e85dc4ed..eb39552f1a2e 100644
--- a/test/CodeGen/SystemZ/insert-01.ll
+++ b/test/CodeGen/SystemZ/insert-01.ll
@@ -9,7 +9,7 @@ define i32 @f1(i32 %orig, i8 *%ptr) {
; CHECK-NOT: ni
; CHECK: ic %r2, 0(%r3)
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ptr2 = zext i8 %val to i32
%ptr1 = and i32 %orig, -256
%or = or i32 %ptr1, %ptr2
@@ -22,7 +22,7 @@ define i32 @f2(i32 %orig, i8 *%ptr) {
; CHECK-NOT: ni
; CHECK: ic %r2, 0(%r3)
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ptr2 = zext i8 %val to i32
%ptr1 = and i32 %orig, -256
%or = or i32 %ptr2, %ptr1
@@ -36,7 +36,7 @@ define i32 @f3(i32 %orig, i8 *%ptr) {
; CHECK: nill %r2, 65024
; CHECK: ic %r2, 0(%r3)
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ptr2 = zext i8 %val to i32
%ptr1 = and i32 %orig, -512
%or = or i32 %ptr1, %ptr2
@@ -49,7 +49,7 @@ define i32 @f4(i32 %orig, i8 *%ptr) {
; CHECK: nill %r2, 65024
; CHECK: ic %r2, 0(%r3)
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ptr2 = zext i8 %val to i32
%ptr1 = and i32 %orig, -512
%or = or i32 %ptr2, %ptr1
@@ -62,7 +62,7 @@ define i32 @f5(i32 %orig, i8 *%ptr) {
; CHECK: sll %r2, 8
; CHECK: ic %r2, 0(%r3)
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ptr2 = zext i8 %val to i32
%ptr1 = shl i32 %orig, 8
%or = or i32 %ptr1, %ptr2
@@ -75,7 +75,7 @@ define i32 @f6(i32 %orig, i8 *%ptr) {
; CHECK: sll %r2, 8
; CHECK: ic %r2, 0(%r3)
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ptr2 = zext i8 %val to i32
%ptr1 = shl i32 %orig, 8
%or = or i32 %ptr2, %ptr1
@@ -88,7 +88,7 @@ define i32 @f7(i32 %orig, i8 *%ptr) {
; CHECK: lhi %r2, 256
; CHECK: ic %r2, 0(%r3)
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ptr2 = zext i8 %val to i32
%or = or i32 %ptr2, 256
ret i32 %or
@@ -100,7 +100,7 @@ define i32 @f8(i32 %orig, i8 *%ptr) {
; CHECK: lhi %r2, 256
; CHECK: ic %r2, 0(%r3)
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ptr2 = zext i8 %val to i32
%or = or i32 256, %ptr2
ret i32 %or
@@ -111,8 +111,8 @@ define i32 @f9(i32 %orig, i8 *%src) {
; CHECK-LABEL: f9:
; CHECK: ic %r2, 4095(%r3)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 4095
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 4095
+ %val = load i8 , i8 *%ptr
%src2 = zext i8 %val to i32
%src1 = and i32 %orig, -256
%or = or i32 %src2, %src1
@@ -124,8 +124,8 @@ define i32 @f10(i32 %orig, i8 *%src) {
; CHECK-LABEL: f10:
; CHECK: icy %r2, 4096(%r3)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 4096
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 4096
+ %val = load i8 , i8 *%ptr
%src2 = zext i8 %val to i32
%src1 = and i32 %orig, -256
%or = or i32 %src2, %src1
@@ -137,8 +137,8 @@ define i32 @f11(i32 %orig, i8 *%src) {
; CHECK-LABEL: f11:
; CHECK: icy %r2, 524287(%r3)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524287
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524287
+ %val = load i8 , i8 *%ptr
%src2 = zext i8 %val to i32
%src1 = and i32 %orig, -256
%or = or i32 %src2, %src1
@@ -152,8 +152,8 @@ define i32 @f12(i32 %orig, i8 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: ic %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524288
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524288
+ %val = load i8 , i8 *%ptr
%src2 = zext i8 %val to i32
%src1 = and i32 %orig, -256
%or = or i32 %src2, %src1
@@ -165,8 +165,8 @@ define i32 @f13(i32 %orig, i8 *%src) {
; CHECK-LABEL: f13:
; CHECK: icy %r2, -1(%r3)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -1
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -1
+ %val = load i8 , i8 *%ptr
%src2 = zext i8 %val to i32
%src1 = and i32 %orig, -256
%or = or i32 %src2, %src1
@@ -178,8 +178,8 @@ define i32 @f14(i32 %orig, i8 *%src) {
; CHECK-LABEL: f14:
; CHECK: icy %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524288
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524288
+ %val = load i8 , i8 *%ptr
%src2 = zext i8 %val to i32
%src1 = and i32 %orig, -256
%or = or i32 %src2, %src1
@@ -193,8 +193,8 @@ define i32 @f15(i32 %orig, i8 *%src) {
; CHECK: agfi %r3, -524289
; CHECK: ic %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524289
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524289
+ %val = load i8 , i8 *%ptr
%src2 = zext i8 %val to i32
%src1 = and i32 %orig, -256
%or = or i32 %src2, %src1
@@ -206,9 +206,9 @@ define i32 @f16(i32 %orig, i8 *%src, i64 %index) {
; CHECK-LABEL: f16:
; CHECK: ic %r2, 4095({{%r4,%r3|%r3,%r4}})
; CHECK: br %r14
- %ptr1 = getelementptr i8 *%src, i64 %index
- %ptr2 = getelementptr i8 *%ptr1, i64 4095
- %val = load i8 *%ptr2
+ %ptr1 = getelementptr i8, i8 *%src, i64 %index
+ %ptr2 = getelementptr i8, i8 *%ptr1, i64 4095
+ %val = load i8 , i8 *%ptr2
%src2 = zext i8 %val to i32
%src1 = and i32 %orig, -256
%or = or i32 %src2, %src1
@@ -220,9 +220,9 @@ define i32 @f17(i32 %orig, i8 *%src, i64 %index) {
; CHECK-LABEL: f17:
; CHECK: icy %r2, 4096({{%r4,%r3|%r3,%r4}})
; CHECK: br %r14
- %ptr1 = getelementptr i8 *%src, i64 %index
- %ptr2 = getelementptr i8 *%ptr1, i64 4096
- %val = load i8 *%ptr2
+ %ptr1 = getelementptr i8, i8 *%src, i64 %index
+ %ptr2 = getelementptr i8, i8 *%ptr1, i64 4096
+ %val = load i8 , i8 *%ptr2
%src2 = zext i8 %val to i32
%src1 = and i32 %orig, -256
%or = or i32 %src2, %src1
diff --git a/test/CodeGen/SystemZ/insert-02.ll b/test/CodeGen/SystemZ/insert-02.ll
index 7a85b0bee4d8..8ecfd1f16efe 100644
--- a/test/CodeGen/SystemZ/insert-02.ll
+++ b/test/CodeGen/SystemZ/insert-02.ll
@@ -9,7 +9,7 @@ define i64 @f1(i64 %orig, i8 *%ptr) {
; CHECK-NOT: ni
; CHECK: ic %r2, 0(%r3)
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ptr2 = zext i8 %val to i64
%ptr1 = and i64 %orig, -256
%or = or i64 %ptr1, %ptr2
@@ -22,7 +22,7 @@ define i64 @f2(i64 %orig, i8 *%ptr) {
; CHECK-NOT: ni
; CHECK: ic %r2, 0(%r3)
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ptr2 = zext i8 %val to i64
%ptr1 = and i64 %orig, -256
%or = or i64 %ptr2, %ptr1
@@ -36,7 +36,7 @@ define i64 @f3(i64 %orig, i8 *%ptr) {
; CHECK: nill %r2, 65024
; CHECK: ic %r2, 0(%r3)
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ptr2 = zext i8 %val to i64
%ptr1 = and i64 %orig, -512
%or = or i64 %ptr1, %ptr2
@@ -49,7 +49,7 @@ define i64 @f4(i64 %orig, i8 *%ptr) {
; CHECK: nill %r2, 65024
; CHECK: ic %r2, 0(%r3)
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ptr2 = zext i8 %val to i64
%ptr1 = and i64 %orig, -512
%or = or i64 %ptr2, %ptr1
@@ -62,7 +62,7 @@ define i64 @f5(i64 %orig, i8 *%ptr) {
; CHECK: sllg %r2, %r2, 8
; CHECK: ic %r2, 0(%r3)
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ptr2 = zext i8 %val to i64
%ptr1 = shl i64 %orig, 8
%or = or i64 %ptr1, %ptr2
@@ -75,7 +75,7 @@ define i64 @f6(i64 %orig, i8 *%ptr) {
; CHECK: sllg %r2, %r2, 8
; CHECK: ic %r2, 0(%r3)
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ptr2 = zext i8 %val to i64
%ptr1 = shl i64 %orig, 8
%or = or i64 %ptr2, %ptr1
@@ -88,7 +88,7 @@ define i64 @f7(i64 %orig, i8 *%ptr) {
; CHECK: lghi %r2, 256
; CHECK: ic %r2, 0(%r3)
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ptr2 = zext i8 %val to i64
%or = or i64 %ptr2, 256
ret i64 %or
@@ -100,7 +100,7 @@ define i64 @f8(i64 %orig, i8 *%ptr) {
; CHECK: lghi %r2, 256
; CHECK: ic %r2, 0(%r3)
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ptr2 = zext i8 %val to i64
%or = or i64 256, %ptr2
ret i64 %or
@@ -111,8 +111,8 @@ define i64 @f9(i64 %orig, i8 *%src) {
; CHECK-LABEL: f9:
; CHECK: ic %r2, 4095(%r3)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 4095
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 4095
+ %val = load i8 , i8 *%ptr
%src2 = zext i8 %val to i64
%src1 = and i64 %orig, -256
%or = or i64 %src2, %src1
@@ -124,8 +124,8 @@ define i64 @f10(i64 %orig, i8 *%src) {
; CHECK-LABEL: f10:
; CHECK: icy %r2, 4096(%r3)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 4096
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 4096
+ %val = load i8 , i8 *%ptr
%src2 = zext i8 %val to i64
%src1 = and i64 %orig, -256
%or = or i64 %src2, %src1
@@ -137,8 +137,8 @@ define i64 @f11(i64 %orig, i8 *%src) {
; CHECK-LABEL: f11:
; CHECK: icy %r2, 524287(%r3)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524287
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524287
+ %val = load i8 , i8 *%ptr
%src2 = zext i8 %val to i64
%src1 = and i64 %orig, -256
%or = or i64 %src2, %src1
@@ -152,8 +152,8 @@ define i64 @f12(i64 %orig, i8 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: ic %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524288
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524288
+ %val = load i8 , i8 *%ptr
%src2 = zext i8 %val to i64
%src1 = and i64 %orig, -256
%or = or i64 %src2, %src1
@@ -165,8 +165,8 @@ define i64 @f13(i64 %orig, i8 *%src) {
; CHECK-LABEL: f13:
; CHECK: icy %r2, -1(%r3)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -1
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -1
+ %val = load i8 , i8 *%ptr
%src2 = zext i8 %val to i64
%src1 = and i64 %orig, -256
%or = or i64 %src2, %src1
@@ -178,8 +178,8 @@ define i64 @f14(i64 %orig, i8 *%src) {
; CHECK-LABEL: f14:
; CHECK: icy %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524288
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524288
+ %val = load i8 , i8 *%ptr
%src2 = zext i8 %val to i64
%src1 = and i64 %orig, -256
%or = or i64 %src2, %src1
@@ -193,8 +193,8 @@ define i64 @f15(i64 %orig, i8 *%src) {
; CHECK: agfi %r3, -524289
; CHECK: ic %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524289
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524289
+ %val = load i8 , i8 *%ptr
%src2 = zext i8 %val to i64
%src1 = and i64 %orig, -256
%or = or i64 %src2, %src1
@@ -206,9 +206,9 @@ define i64 @f16(i64 %orig, i8 *%src, i64 %index) {
; CHECK-LABEL: f16:
; CHECK: ic %r2, 4095({{%r4,%r3|%r3,%r4}})
; CHECK: br %r14
- %ptr1 = getelementptr i8 *%src, i64 %index
- %ptr2 = getelementptr i8 *%ptr1, i64 4095
- %val = load i8 *%ptr2
+ %ptr1 = getelementptr i8, i8 *%src, i64 %index
+ %ptr2 = getelementptr i8, i8 *%ptr1, i64 4095
+ %val = load i8 , i8 *%ptr2
%src2 = zext i8 %val to i64
%src1 = and i64 %orig, -256
%or = or i64 %src2, %src1
@@ -220,9 +220,9 @@ define i64 @f17(i64 %orig, i8 *%src, i64 %index) {
; CHECK-LABEL: f17:
; CHECK: icy %r2, 4096({{%r4,%r3|%r3,%r4}})
; CHECK: br %r14
- %ptr1 = getelementptr i8 *%src, i64 %index
- %ptr2 = getelementptr i8 *%ptr1, i64 4096
- %val = load i8 *%ptr2
+ %ptr1 = getelementptr i8, i8 *%src, i64 %index
+ %ptr2 = getelementptr i8, i8 *%ptr1, i64 4096
+ %val = load i8 , i8 *%ptr2
%src2 = zext i8 %val to i64
%src1 = and i64 %orig, -256
%or = or i64 %src2, %src1
diff --git a/test/CodeGen/SystemZ/insert-06.ll b/test/CodeGen/SystemZ/insert-06.ll
index 81a9c8770708..3243d9f4cb0c 100644
--- a/test/CodeGen/SystemZ/insert-06.ll
+++ b/test/CodeGen/SystemZ/insert-06.ll
@@ -85,7 +85,7 @@ define i64 @f7(i64 %a, i32 *%src) {
; CHECK-NOT: {{%r[23]}}
; CHECK: l %r2, 0(%r3)
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%low = zext i32 %b to i64
%high = and i64 %a, -4294967296
%res = or i64 %high, %low
@@ -98,7 +98,7 @@ define i64 @f8(i64 %a, i8 *%src) {
; CHECK-NOT: {{%r[23]}}
; CHECK: lb %r2, 0(%r3)
; CHECK: br %r14
- %byte = load i8 *%src
+ %byte = load i8 , i8 *%src
%b = sext i8 %byte to i32
%low = zext i32 %b to i64
%high = and i64 %a, -4294967296
@@ -185,7 +185,7 @@ define i64 @f15(i64 %a, i8 *%src) {
; CHECK-NOT: {{%r[23]}}
; CHECK: lb %r2, 0(%r3)
; CHECK: br %r14
- %byte = load i8 *%src
+ %byte = load i8 , i8 *%src
%b = sext i8 %byte to i64
%low = and i64 %b, 4294967295
%high = and i64 %a, -4294967296
diff --git a/test/CodeGen/SystemZ/int-add-01.ll b/test/CodeGen/SystemZ/int-add-01.ll
index 4114686e41e8..f7a3a264913a 100644
--- a/test/CodeGen/SystemZ/int-add-01.ll
+++ b/test/CodeGen/SystemZ/int-add-01.ll
@@ -8,7 +8,7 @@ define i32 @f1(i32 %lhs, i16 *%src) {
; CHECK-LABEL: f1:
; CHECK: ah %r2, 0(%r3)
; CHECK: br %r14
- %half = load i16 *%src
+ %half = load i16 , i16 *%src
%rhs = sext i16 %half to i32
%res = add i32 %lhs, %rhs
ret i32 %res
@@ -19,8 +19,8 @@ define i32 @f2(i32 %lhs, i16 *%src) {
; CHECK-LABEL: f2:
; CHECK: ah %r2, 4094(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 2047
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 2047
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = add i32 %lhs, %rhs
ret i32 %res
@@ -31,8 +31,8 @@ define i32 @f3(i32 %lhs, i16 *%src) {
; CHECK-LABEL: f3:
; CHECK: ahy %r2, 4096(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 2048
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 2048
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = add i32 %lhs, %rhs
ret i32 %res
@@ -43,8 +43,8 @@ define i32 @f4(i32 %lhs, i16 *%src) {
; CHECK-LABEL: f4:
; CHECK: ahy %r2, 524286(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 262143
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 262143
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = add i32 %lhs, %rhs
ret i32 %res
@@ -57,8 +57,8 @@ define i32 @f5(i32 %lhs, i16 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: ah %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 262144
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 262144
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = add i32 %lhs, %rhs
ret i32 %res
@@ -69,8 +69,8 @@ define i32 @f6(i32 %lhs, i16 *%src) {
; CHECK-LABEL: f6:
; CHECK: ahy %r2, -2(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -1
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -1
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = add i32 %lhs, %rhs
ret i32 %res
@@ -81,8 +81,8 @@ define i32 @f7(i32 %lhs, i16 *%src) {
; CHECK-LABEL: f7:
; CHECK: ahy %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -262144
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -262144
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = add i32 %lhs, %rhs
ret i32 %res
@@ -95,8 +95,8 @@ define i32 @f8(i32 %lhs, i16 *%src) {
; CHECK: agfi %r3, -524290
; CHECK: ah %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -262145
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -262145
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = add i32 %lhs, %rhs
ret i32 %res
@@ -110,7 +110,7 @@ define i32 @f9(i32 %lhs, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4094
%ptr = inttoptr i64 %add2 to i16 *
- %half = load i16 *%ptr
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = add i32 %lhs, %rhs
ret i32 %res
@@ -124,7 +124,7 @@ define i32 @f10(i32 %lhs, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i16 *
- %half = load i16 *%ptr
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = add i32 %lhs, %rhs
ret i32 %res
diff --git a/test/CodeGen/SystemZ/int-add-02.ll b/test/CodeGen/SystemZ/int-add-02.ll
index 4386b5a4d496..01e77de31b5f 100644
--- a/test/CodeGen/SystemZ/int-add-02.ll
+++ b/test/CodeGen/SystemZ/int-add-02.ll
@@ -19,7 +19,7 @@ define i32 @f2(i32 %a, i32 *%src) {
; CHECK-LABEL: f2:
; CHECK: a %r2, 0(%r3)
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%add = add i32 %a, %b
ret i32 %add
}
@@ -29,8 +29,8 @@ define i32 @f3(i32 %a, i32 *%src) {
; CHECK-LABEL: f3:
; CHECK: a %r2, 4092(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 1023
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 1023
+ %b = load i32 , i32 *%ptr
%add = add i32 %a, %b
ret i32 %add
}
@@ -40,8 +40,8 @@ define i32 @f4(i32 %a, i32 *%src) {
; CHECK-LABEL: f4:
; CHECK: ay %r2, 4096(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 1024
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 1024
+ %b = load i32 , i32 *%ptr
%add = add i32 %a, %b
ret i32 %add
}
@@ -51,8 +51,8 @@ define i32 @f5(i32 %a, i32 *%src) {
; CHECK-LABEL: f5:
; CHECK: ay %r2, 524284(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131071
+ %b = load i32 , i32 *%ptr
%add = add i32 %a, %b
ret i32 %add
}
@@ -64,8 +64,8 @@ define i32 @f6(i32 %a, i32 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: a %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131072
+ %b = load i32 , i32 *%ptr
%add = add i32 %a, %b
ret i32 %add
}
@@ -75,8 +75,8 @@ define i32 @f7(i32 %a, i32 *%src) {
; CHECK-LABEL: f7:
; CHECK: ay %r2, -4(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -1
+ %b = load i32 , i32 *%ptr
%add = add i32 %a, %b
ret i32 %add
}
@@ -86,8 +86,8 @@ define i32 @f8(i32 %a, i32 *%src) {
; CHECK-LABEL: f8:
; CHECK: ay %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
+ %b = load i32 , i32 *%ptr
%add = add i32 %a, %b
ret i32 %add
}
@@ -99,8 +99,8 @@ define i32 @f9(i32 %a, i32 *%src) {
; CHECK: agfi %r3, -524292
; CHECK: a %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
+ %b = load i32 , i32 *%ptr
%add = add i32 %a, %b
ret i32 %add
}
@@ -113,7 +113,7 @@ define i32 @f10(i32 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4092
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%add = add i32 %a, %b
ret i32 %add
}
@@ -126,7 +126,7 @@ define i32 @f11(i32 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%add = add i32 %a, %b
ret i32 %add
}
@@ -137,26 +137,26 @@ define i32 @f12(i32 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: a %r2, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i32 *%ptr0, i64 2
- %ptr2 = getelementptr i32 *%ptr0, i64 4
- %ptr3 = getelementptr i32 *%ptr0, i64 6
- %ptr4 = getelementptr i32 *%ptr0, i64 8
- %ptr5 = getelementptr i32 *%ptr0, i64 10
- %ptr6 = getelementptr i32 *%ptr0, i64 12
- %ptr7 = getelementptr i32 *%ptr0, i64 14
- %ptr8 = getelementptr i32 *%ptr0, i64 16
- %ptr9 = getelementptr i32 *%ptr0, i64 18
-
- %val0 = load i32 *%ptr0
- %val1 = load i32 *%ptr1
- %val2 = load i32 *%ptr2
- %val3 = load i32 *%ptr3
- %val4 = load i32 *%ptr4
- %val5 = load i32 *%ptr5
- %val6 = load i32 *%ptr6
- %val7 = load i32 *%ptr7
- %val8 = load i32 *%ptr8
- %val9 = load i32 *%ptr9
+ %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
+ %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
+ %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
+ %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
+ %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
+ %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
+ %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
+ %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
+ %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
+
+ %val0 = load i32 , i32 *%ptr0
+ %val1 = load i32 , i32 *%ptr1
+ %val2 = load i32 , i32 *%ptr2
+ %val3 = load i32 , i32 *%ptr3
+ %val4 = load i32 , i32 *%ptr4
+ %val5 = load i32 , i32 *%ptr5
+ %val6 = load i32 , i32 *%ptr6
+ %val7 = load i32 , i32 *%ptr7
+ %val8 = load i32 , i32 *%ptr8
+ %val9 = load i32 , i32 *%ptr9
%ret = call i32 @foo()
diff --git a/test/CodeGen/SystemZ/int-add-03.ll b/test/CodeGen/SystemZ/int-add-03.ll
index 56000a80cd9b..92e3c1656d7f 100644
--- a/test/CodeGen/SystemZ/int-add-03.ll
+++ b/test/CodeGen/SystemZ/int-add-03.ll
@@ -19,7 +19,7 @@ define i64 @f2(i64 %a, i32 *%src) {
; CHECK-LABEL: f2:
; CHECK: agf %r2, 0(%r3)
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%bext = sext i32 %b to i64
%add = add i64 %a, %bext
ret i64 %add
@@ -30,8 +30,8 @@ define i64 @f3(i64 %a, i32 *%src) {
; CHECK-LABEL: f3:
; CHECK: agf %r2, 524284(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131071
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%add = add i64 %a, %bext
ret i64 %add
@@ -44,8 +44,8 @@ define i64 @f4(i64 %a, i32 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: agf %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131072
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%add = add i64 %a, %bext
ret i64 %add
@@ -56,8 +56,8 @@ define i64 @f5(i64 %a, i32 *%src) {
; CHECK-LABEL: f5:
; CHECK: agf %r2, -4(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -1
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%add = add i64 %a, %bext
ret i64 %add
@@ -68,8 +68,8 @@ define i64 @f6(i64 %a, i32 *%src) {
; CHECK-LABEL: f6:
; CHECK: agf %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%add = add i64 %a, %bext
ret i64 %add
@@ -82,8 +82,8 @@ define i64 @f7(i64 %a, i32 *%src) {
; CHECK: agfi %r3, -524292
; CHECK: agf %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%add = add i64 %a, %bext
ret i64 %add
@@ -97,7 +97,7 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524284
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%add = add i64 %a, %bext
ret i64 %add
@@ -109,26 +109,26 @@ define i64 @f9(i32 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: agf %r2, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i32 *%ptr0, i64 2
- %ptr2 = getelementptr i32 *%ptr0, i64 4
- %ptr3 = getelementptr i32 *%ptr0, i64 6
- %ptr4 = getelementptr i32 *%ptr0, i64 8
- %ptr5 = getelementptr i32 *%ptr0, i64 10
- %ptr6 = getelementptr i32 *%ptr0, i64 12
- %ptr7 = getelementptr i32 *%ptr0, i64 14
- %ptr8 = getelementptr i32 *%ptr0, i64 16
- %ptr9 = getelementptr i32 *%ptr0, i64 18
-
- %val0 = load i32 *%ptr0
- %val1 = load i32 *%ptr1
- %val2 = load i32 *%ptr2
- %val3 = load i32 *%ptr3
- %val4 = load i32 *%ptr4
- %val5 = load i32 *%ptr5
- %val6 = load i32 *%ptr6
- %val7 = load i32 *%ptr7
- %val8 = load i32 *%ptr8
- %val9 = load i32 *%ptr9
+ %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
+ %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
+ %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
+ %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
+ %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
+ %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
+ %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
+ %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
+ %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
+
+ %val0 = load i32 , i32 *%ptr0
+ %val1 = load i32 , i32 *%ptr1
+ %val2 = load i32 , i32 *%ptr2
+ %val3 = load i32 , i32 *%ptr3
+ %val4 = load i32 , i32 *%ptr4
+ %val5 = load i32 , i32 *%ptr5
+ %val6 = load i32 , i32 *%ptr6
+ %val7 = load i32 , i32 *%ptr7
+ %val8 = load i32 , i32 *%ptr8
+ %val9 = load i32 , i32 *%ptr9
%frob0 = add i32 %val0, 100
%frob1 = add i32 %val1, 100
diff --git a/test/CodeGen/SystemZ/int-add-04.ll b/test/CodeGen/SystemZ/int-add-04.ll
index 675e36babfa7..6828b615e238 100644
--- a/test/CodeGen/SystemZ/int-add-04.ll
+++ b/test/CodeGen/SystemZ/int-add-04.ll
@@ -19,7 +19,7 @@ define i64 @f2(i64 %a, i32 *%src) {
; CHECK-LABEL: f2:
; CHECK: algf %r2, 0(%r3)
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%bext = zext i32 %b to i64
%add = add i64 %a, %bext
ret i64 %add
@@ -30,8 +30,8 @@ define i64 @f3(i64 %a, i32 *%src) {
; CHECK-LABEL: f3:
; CHECK: algf %r2, 524284(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131071
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i64
%add = add i64 %a, %bext
ret i64 %add
@@ -44,8 +44,8 @@ define i64 @f4(i64 %a, i32 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: algf %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131072
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i64
%add = add i64 %a, %bext
ret i64 %add
@@ -56,8 +56,8 @@ define i64 @f5(i64 %a, i32 *%src) {
; CHECK-LABEL: f5:
; CHECK: algf %r2, -4(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -1
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i64
%add = add i64 %a, %bext
ret i64 %add
@@ -68,8 +68,8 @@ define i64 @f6(i64 %a, i32 *%src) {
; CHECK-LABEL: f6:
; CHECK: algf %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i64
%add = add i64 %a, %bext
ret i64 %add
@@ -82,8 +82,8 @@ define i64 @f7(i64 %a, i32 *%src) {
; CHECK: agfi %r3, -524292
; CHECK: algf %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i64
%add = add i64 %a, %bext
ret i64 %add
@@ -97,7 +97,7 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524284
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i64
%add = add i64 %a, %bext
ret i64 %add
@@ -109,26 +109,26 @@ define i64 @f9(i32 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: algf %r2, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i32 *%ptr0, i64 2
- %ptr2 = getelementptr i32 *%ptr0, i64 4
- %ptr3 = getelementptr i32 *%ptr0, i64 6
- %ptr4 = getelementptr i32 *%ptr0, i64 8
- %ptr5 = getelementptr i32 *%ptr0, i64 10
- %ptr6 = getelementptr i32 *%ptr0, i64 12
- %ptr7 = getelementptr i32 *%ptr0, i64 14
- %ptr8 = getelementptr i32 *%ptr0, i64 16
- %ptr9 = getelementptr i32 *%ptr0, i64 18
-
- %val0 = load i32 *%ptr0
- %val1 = load i32 *%ptr1
- %val2 = load i32 *%ptr2
- %val3 = load i32 *%ptr3
- %val4 = load i32 *%ptr4
- %val5 = load i32 *%ptr5
- %val6 = load i32 *%ptr6
- %val7 = load i32 *%ptr7
- %val8 = load i32 *%ptr8
- %val9 = load i32 *%ptr9
+ %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
+ %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
+ %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
+ %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
+ %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
+ %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
+ %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
+ %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
+ %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
+
+ %val0 = load i32 , i32 *%ptr0
+ %val1 = load i32 , i32 *%ptr1
+ %val2 = load i32 , i32 *%ptr2
+ %val3 = load i32 , i32 *%ptr3
+ %val4 = load i32 , i32 *%ptr4
+ %val5 = load i32 , i32 *%ptr5
+ %val6 = load i32 , i32 *%ptr6
+ %val7 = load i32 , i32 *%ptr7
+ %val8 = load i32 , i32 *%ptr8
+ %val9 = load i32 , i32 *%ptr9
%frob0 = add i32 %val0, 100
%frob1 = add i32 %val1, 100
diff --git a/test/CodeGen/SystemZ/int-add-05.ll b/test/CodeGen/SystemZ/int-add-05.ll
index a05fdd9059c1..f28e305dc407 100644
--- a/test/CodeGen/SystemZ/int-add-05.ll
+++ b/test/CodeGen/SystemZ/int-add-05.ll
@@ -19,7 +19,7 @@ define i64 @f2(i64 %a, i64 *%src) {
; CHECK-LABEL: f2:
; CHECK: ag %r2, 0(%r3)
; CHECK: br %r14
- %b = load i64 *%src
+ %b = load i64 , i64 *%src
%add = add i64 %a, %b
ret i64 %add
}
@@ -29,8 +29,8 @@ define i64 @f3(i64 %a, i64 *%src) {
; CHECK-LABEL: f3:
; CHECK: ag %r2, 524280(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65535
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65535
+ %b = load i64 , i64 *%ptr
%add = add i64 %a, %b
ret i64 %add
}
@@ -42,8 +42,8 @@ define i64 @f4(i64 %a, i64 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: ag %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65536
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65536
+ %b = load i64 , i64 *%ptr
%add = add i64 %a, %b
ret i64 %add
}
@@ -53,8 +53,8 @@ define i64 @f5(i64 %a, i64 *%src) {
; CHECK-LABEL: f5:
; CHECK: ag %r2, -8(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -1
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -1
+ %b = load i64 , i64 *%ptr
%add = add i64 %a, %b
ret i64 %add
}
@@ -64,8 +64,8 @@ define i64 @f6(i64 %a, i64 *%src) {
; CHECK-LABEL: f6:
; CHECK: ag %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65536
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65536
+ %b = load i64 , i64 *%ptr
%add = add i64 %a, %b
ret i64 %add
}
@@ -77,8 +77,8 @@ define i64 @f7(i64 %a, i64 *%src) {
; CHECK: agfi %r3, -524296
; CHECK: ag %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65537
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65537
+ %b = load i64 , i64 *%ptr
%add = add i64 %a, %b
ret i64 %add
}
@@ -91,7 +91,7 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524280
%ptr = inttoptr i64 %add2 to i64 *
- %b = load i64 *%ptr
+ %b = load i64 , i64 *%ptr
%add = add i64 %a, %b
ret i64 %add
}
@@ -102,26 +102,26 @@ define i64 @f9(i64 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: ag %r2, 160(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i64 *%ptr0, i64 2
- %ptr2 = getelementptr i64 *%ptr0, i64 4
- %ptr3 = getelementptr i64 *%ptr0, i64 6
- %ptr4 = getelementptr i64 *%ptr0, i64 8
- %ptr5 = getelementptr i64 *%ptr0, i64 10
- %ptr6 = getelementptr i64 *%ptr0, i64 12
- %ptr7 = getelementptr i64 *%ptr0, i64 14
- %ptr8 = getelementptr i64 *%ptr0, i64 16
- %ptr9 = getelementptr i64 *%ptr0, i64 18
+ %ptr1 = getelementptr i64, i64 *%ptr0, i64 2
+ %ptr2 = getelementptr i64, i64 *%ptr0, i64 4
+ %ptr3 = getelementptr i64, i64 *%ptr0, i64 6
+ %ptr4 = getelementptr i64, i64 *%ptr0, i64 8
+ %ptr5 = getelementptr i64, i64 *%ptr0, i64 10
+ %ptr6 = getelementptr i64, i64 *%ptr0, i64 12
+ %ptr7 = getelementptr i64, i64 *%ptr0, i64 14
+ %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
+ %ptr9 = getelementptr i64, i64 *%ptr0, i64 18
- %val0 = load i64 *%ptr0
- %val1 = load i64 *%ptr1
- %val2 = load i64 *%ptr2
- %val3 = load i64 *%ptr3
- %val4 = load i64 *%ptr4
- %val5 = load i64 *%ptr5
- %val6 = load i64 *%ptr6
- %val7 = load i64 *%ptr7
- %val8 = load i64 *%ptr8
- %val9 = load i64 *%ptr9
+ %val0 = load i64 , i64 *%ptr0
+ %val1 = load i64 , i64 *%ptr1
+ %val2 = load i64 , i64 *%ptr2
+ %val3 = load i64 , i64 *%ptr3
+ %val4 = load i64 , i64 *%ptr4
+ %val5 = load i64 , i64 *%ptr5
+ %val6 = load i64 , i64 *%ptr6
+ %val7 = load i64 , i64 *%ptr7
+ %val8 = load i64 , i64 *%ptr8
+ %val9 = load i64 , i64 *%ptr9
%ret = call i64 @foo()
diff --git a/test/CodeGen/SystemZ/int-add-08.ll b/test/CodeGen/SystemZ/int-add-08.ll
index bcef914ed872..75b85d0888a2 100644
--- a/test/CodeGen/SystemZ/int-add-08.ll
+++ b/test/CodeGen/SystemZ/int-add-08.ll
@@ -11,7 +11,7 @@ define void @f1(i128 *%ptr) {
; CHECK: algr
; CHECK: alcgr
; CHECK: br %r14
- %value = load i128 *%ptr
+ %value = load i128 , i128 *%ptr
%add = add i128 %value, %value
store i128 %add, i128 *%ptr
ret void
@@ -25,8 +25,8 @@ define void @f2(i128 *%aptr, i64 %addr) {
; CHECK: alcg {{%r[0-5]}}, 0(%r3)
; CHECK: br %r14
%bptr = inttoptr i64 %addr to i128 *
- %a = load volatile i128 *%aptr
- %b = load i128 *%bptr
+ %a = load volatile i128 , i128 *%aptr
+ %b = load i128 , i128 *%bptr
%add = add i128 %a, %b
store i128 %add, i128 *%aptr
ret void
@@ -40,8 +40,8 @@ define void @f3(i128 *%aptr, i64 %base) {
; CHECK: br %r14
%addr = add i64 %base, 524272
%bptr = inttoptr i64 %addr to i128 *
- %a = load volatile i128 *%aptr
- %b = load i128 *%bptr
+ %a = load volatile i128 , i128 *%aptr
+ %b = load i128 , i128 *%bptr
%add = add i128 %a, %b
store i128 %add, i128 *%aptr
ret void
@@ -57,8 +57,8 @@ define void @f4(i128 *%aptr, i64 %base) {
; CHECK: br %r14
%addr = add i64 %base, 524280
%bptr = inttoptr i64 %addr to i128 *
- %a = load volatile i128 *%aptr
- %b = load i128 *%bptr
+ %a = load volatile i128 , i128 *%aptr
+ %b = load i128 , i128 *%bptr
%add = add i128 %a, %b
store i128 %add, i128 *%aptr
ret void
@@ -74,8 +74,8 @@ define void @f5(i128 *%aptr, i64 %base) {
; CHECK: br %r14
%addr = add i64 %base, 524288
%bptr = inttoptr i64 %addr to i128 *
- %a = load volatile i128 *%aptr
- %b = load i128 *%bptr
+ %a = load volatile i128 , i128 *%aptr
+ %b = load i128 , i128 *%bptr
%add = add i128 %a, %b
store i128 %add, i128 *%aptr
ret void
@@ -89,8 +89,8 @@ define void @f6(i128 *%aptr, i64 %base) {
; CHECK: br %r14
%addr = add i64 %base, -524288
%bptr = inttoptr i64 %addr to i128 *
- %a = load volatile i128 *%aptr
- %b = load i128 *%bptr
+ %a = load volatile i128 , i128 *%aptr
+ %b = load i128 , i128 *%bptr
%add = add i128 %a, %b
store i128 %add, i128 *%aptr
ret void
@@ -104,8 +104,8 @@ define void @f7(i128 *%aptr, i64 %base) {
; CHECK: br %r14
%addr = add i64 %base, -524296
%bptr = inttoptr i64 %addr to i128 *
- %a = load volatile i128 *%aptr
- %b = load i128 *%bptr
+ %a = load volatile i128 , i128 *%aptr
+ %b = load i128 , i128 *%bptr
%add = add i128 %a, %b
store i128 %add, i128 *%aptr
ret void
@@ -119,20 +119,20 @@ define void @f8(i128 *%ptr0) {
; CHECK: alg {{%r[0-9]+}}, {{[0-9]+}}(%r15)
; CHECK: alcg {{%r[0-9]+}}, {{[0-9]+}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i128 *%ptr0, i128 2
- %ptr2 = getelementptr i128 *%ptr0, i128 4
- %ptr3 = getelementptr i128 *%ptr0, i128 6
- %ptr4 = getelementptr i128 *%ptr0, i128 8
+ %ptr1 = getelementptr i128, i128 *%ptr0, i128 2
+ %ptr2 = getelementptr i128, i128 *%ptr0, i128 4
+ %ptr3 = getelementptr i128, i128 *%ptr0, i128 6
+ %ptr4 = getelementptr i128, i128 *%ptr0, i128 8
- %val0 = load i128 *%ptr0
- %val1 = load i128 *%ptr1
- %val2 = load i128 *%ptr2
- %val3 = load i128 *%ptr3
- %val4 = load i128 *%ptr4
+ %val0 = load i128 , i128 *%ptr0
+ %val1 = load i128 , i128 *%ptr1
+ %val2 = load i128 , i128 *%ptr2
+ %val3 = load i128 , i128 *%ptr3
+ %val4 = load i128 , i128 *%ptr4
%retptr = call i128 *@foo()
- %ret = load i128 *%retptr
+ %ret = load i128 , i128 *%retptr
%add0 = add i128 %ret, %val0
%add1 = add i128 %add0, %val1
%add2 = add i128 %add1, %val2
diff --git a/test/CodeGen/SystemZ/int-add-09.ll b/test/CodeGen/SystemZ/int-add-09.ll
index fd151a7f979a..b7bcdc8b93a8 100644
--- a/test/CodeGen/SystemZ/int-add-09.ll
+++ b/test/CodeGen/SystemZ/int-add-09.ll
@@ -9,7 +9,7 @@ define void @f1(i128 *%aptr) {
; CHECK: algfi {{%r[0-5]}}, 1
; CHECK: alcg
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 128
%add = add i128 %xor, 1
store i128 %add, i128 *%aptr
@@ -22,7 +22,7 @@ define void @f2(i128 *%aptr) {
; CHECK: algfi {{%r[0-5]}}, 4294967295
; CHECK: alcg
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 128
%add = add i128 %xor, 4294967295
store i128 %add, i128 *%aptr
@@ -35,7 +35,7 @@ define void @f3(i128 *%aptr) {
; CHECK: algr
; CHECK: alcg
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 128
%add = add i128 %xor, 4294967296
store i128 %add, i128 *%aptr
@@ -48,7 +48,7 @@ define void @f4(i128 *%aptr) {
; CHECK: algr
; CHECK: alcg
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 128
%add = add i128 %xor, -1
store i128 %add, i128 *%aptr
diff --git a/test/CodeGen/SystemZ/int-add-10.ll b/test/CodeGen/SystemZ/int-add-10.ll
index 01d0a661ed29..f55788dc8bb6 100644
--- a/test/CodeGen/SystemZ/int-add-10.ll
+++ b/test/CodeGen/SystemZ/int-add-10.ll
@@ -9,7 +9,7 @@ define void @f1(i128 *%aptr, i32 %b) {
; CHECK: algfr {{%r[0-5]}}, %r3
; CHECK: alcg
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 127
%bext = zext i32 %b to i128
%add = add i128 %xor, %bext
@@ -23,7 +23,7 @@ define void @f2(i128 *%aptr, i64 %b) {
; CHECK: algfr {{%r[0-5]}}, %r3
; CHECK: alcg
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 127
%trunc = trunc i64 %b to i32
%bext = zext i32 %trunc to i128
@@ -39,7 +39,7 @@ define void @f3(i128 *%aptr, i64 %b) {
; CHECK: algfr {{%r[0-5]}}, %r3
; CHECK: alcg
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 127
%bext = zext i64 %b to i128
%and = and i128 %bext, 4294967295
@@ -54,9 +54,9 @@ define void @f4(i128 *%aptr, i32 *%bsrc) {
; CHECK: algf {{%r[0-5]}}, 0(%r3)
; CHECK: alcg
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 127
- %b = load i32 *%bsrc
+ %b = load i32 , i32 *%bsrc
%bext = zext i32 %b to i128
%add = add i128 %xor, %bext
store i128 %add, i128 *%aptr
@@ -69,10 +69,10 @@ define void @f5(i128 *%aptr, i32 *%bsrc) {
; CHECK: algf {{%r[0-5]}}, 524284(%r3)
; CHECK: alcg
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 127
- %ptr = getelementptr i32 *%bsrc, i64 131071
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%bsrc, i64 131071
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i128
%add = add i128 %xor, %bext
store i128 %add, i128 *%aptr
@@ -87,10 +87,10 @@ define void @f6(i128 *%aptr, i32 *%bsrc) {
; CHECK: algf {{%r[0-5]}}, 0(%r3)
; CHECK: alcg
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 127
- %ptr = getelementptr i32 *%bsrc, i64 131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%bsrc, i64 131072
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i128
%add = add i128 %xor, %bext
store i128 %add, i128 *%aptr
@@ -103,10 +103,10 @@ define void @f7(i128 *%aptr, i32 *%bsrc) {
; CHECK: algf {{%r[0-5]}}, -4(%r3)
; CHECK: alcg
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 127
- %ptr = getelementptr i32 *%bsrc, i128 -1
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%bsrc, i128 -1
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i128
%add = add i128 %xor, %bext
store i128 %add, i128 *%aptr
@@ -119,10 +119,10 @@ define void @f8(i128 *%aptr, i32 *%bsrc) {
; CHECK: algf {{%r[0-5]}}, -524288(%r3)
; CHECK: alcg
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 127
- %ptr = getelementptr i32 *%bsrc, i128 -131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%bsrc, i128 -131072
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i128
%add = add i128 %xor, %bext
store i128 %add, i128 *%aptr
@@ -137,10 +137,10 @@ define void @f9(i128 *%aptr, i32 *%bsrc) {
; CHECK: algf {{%r[0-5]}}, 0(%r3)
; CHECK: alcg
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 127
- %ptr = getelementptr i32 *%bsrc, i128 -131073
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%bsrc, i128 -131073
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i128
%add = add i128 %xor, %bext
store i128 %add, i128 *%aptr
@@ -152,12 +152,12 @@ define void @f10(i128 *%aptr, i64 %src, i64 %index) {
; CHECK-LABEL: f10:
; CHECK: algf {{%r[0-5]}}, 524284({{%r4,%r3|%r3,%r4}})
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 127
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524284
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i128
%add = add i128 %xor, %bext
store i128 %add, i128 *%aptr
diff --git a/test/CodeGen/SystemZ/int-add-11.ll b/test/CodeGen/SystemZ/int-add-11.ll
index 679c206094f3..b93555f5d357 100644
--- a/test/CodeGen/SystemZ/int-add-11.ll
+++ b/test/CodeGen/SystemZ/int-add-11.ll
@@ -8,7 +8,7 @@ define void @f1(i32 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: asi 0(%r2), 1
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%add = add i32 %val, 127
store i32 %add, i32 *%ptr
ret void
@@ -19,7 +19,7 @@ define void @f2(i32 *%ptr) {
; CHECK-LABEL: f2:
; CHECK: asi 0(%r2), 127
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%add = add i32 %val, 127
store i32 %add, i32 *%ptr
ret void
@@ -32,7 +32,7 @@ define void @f3(i32 *%ptr) {
; CHECK-NOT: asi
; CHECK: st %r0, 0(%r2)
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%add = add i32 %val, 128
store i32 %add, i32 *%ptr
ret void
@@ -43,7 +43,7 @@ define void @f4(i32 *%ptr) {
; CHECK-LABEL: f4:
; CHECK: asi 0(%r2), -128
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%add = add i32 %val, -128
store i32 %add, i32 *%ptr
ret void
@@ -55,7 +55,7 @@ define void @f5(i32 *%ptr) {
; CHECK-NOT: asi
; CHECK: st %r0, 0(%r2)
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%add = add i32 %val, -129
store i32 %add, i32 *%ptr
ret void
@@ -66,8 +66,8 @@ define void @f6(i32 *%base) {
; CHECK-LABEL: f6:
; CHECK: asi 524284(%r2), 1
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 131071
- %val = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 131071
+ %val = load i32 , i32 *%ptr
%add = add i32 %val, 1
store i32 %add, i32 *%ptr
ret void
@@ -80,8 +80,8 @@ define void @f7(i32 *%base) {
; CHECK: agfi %r2, 524288
; CHECK: asi 0(%r2), 1
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 131072
- %val = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 131072
+ %val = load i32 , i32 *%ptr
%add = add i32 %val, 1
store i32 %add, i32 *%ptr
ret void
@@ -92,8 +92,8 @@ define void @f8(i32 *%base) {
; CHECK-LABEL: f8:
; CHECK: asi -524288(%r2), 1
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -131072
- %val = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 -131072
+ %val = load i32 , i32 *%ptr
%add = add i32 %val, 1
store i32 %add, i32 *%ptr
ret void
@@ -106,8 +106,8 @@ define void @f9(i32 *%base) {
; CHECK: agfi %r2, -524292
; CHECK: asi 0(%r2), 1
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -131073
- %val = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 -131073
+ %val = load i32 , i32 *%ptr
%add = add i32 %val, 1
store i32 %add, i32 *%ptr
ret void
@@ -122,7 +122,7 @@ define void @f10(i64 %base, i64 %index) {
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 4
%ptr = inttoptr i64 %add2 to i32 *
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%add = add i32 %val, 1
store i32 %add, i32 *%ptr
ret void
@@ -134,22 +134,22 @@ define void @f11(i32 *%ptr, i32 %sel) {
; CHECK: asi {{[0-9]+}}(%r15), 127
; CHECK: br %r14
entry:
- %val0 = load volatile i32 *%ptr
- %val1 = load volatile i32 *%ptr
- %val2 = load volatile i32 *%ptr
- %val3 = load volatile i32 *%ptr
- %val4 = load volatile i32 *%ptr
- %val5 = load volatile i32 *%ptr
- %val6 = load volatile i32 *%ptr
- %val7 = load volatile i32 *%ptr
- %val8 = load volatile i32 *%ptr
- %val9 = load volatile i32 *%ptr
- %val10 = load volatile i32 *%ptr
- %val11 = load volatile i32 *%ptr
- %val12 = load volatile i32 *%ptr
- %val13 = load volatile i32 *%ptr
- %val14 = load volatile i32 *%ptr
- %val15 = load volatile i32 *%ptr
+ %val0 = load volatile i32 , i32 *%ptr
+ %val1 = load volatile i32 , i32 *%ptr
+ %val2 = load volatile i32 , i32 *%ptr
+ %val3 = load volatile i32 , i32 *%ptr
+ %val4 = load volatile i32 , i32 *%ptr
+ %val5 = load volatile i32 , i32 *%ptr
+ %val6 = load volatile i32 , i32 *%ptr
+ %val7 = load volatile i32 , i32 *%ptr
+ %val8 = load volatile i32 , i32 *%ptr
+ %val9 = load volatile i32 , i32 *%ptr
+ %val10 = load volatile i32 , i32 *%ptr
+ %val11 = load volatile i32 , i32 *%ptr
+ %val12 = load volatile i32 , i32 *%ptr
+ %val13 = load volatile i32 , i32 *%ptr
+ %val14 = load volatile i32 , i32 *%ptr
+ %val15 = load volatile i32 , i32 *%ptr
%test = icmp ne i32 %sel, 0
br i1 %test, label %add, label %store
@@ -217,22 +217,22 @@ define void @f12(i32 *%ptr, i32 %sel) {
; CHECK: asi {{[0-9]+}}(%r15), -128
; CHECK: br %r14
entry:
- %val0 = load volatile i32 *%ptr
- %val1 = load volatile i32 *%ptr
- %val2 = load volatile i32 *%ptr
- %val3 = load volatile i32 *%ptr
- %val4 = load volatile i32 *%ptr
- %val5 = load volatile i32 *%ptr
- %val6 = load volatile i32 *%ptr
- %val7 = load volatile i32 *%ptr
- %val8 = load volatile i32 *%ptr
- %val9 = load volatile i32 *%ptr
- %val10 = load volatile i32 *%ptr
- %val11 = load volatile i32 *%ptr
- %val12 = load volatile i32 *%ptr
- %val13 = load volatile i32 *%ptr
- %val14 = load volatile i32 *%ptr
- %val15 = load volatile i32 *%ptr
+ %val0 = load volatile i32 , i32 *%ptr
+ %val1 = load volatile i32 , i32 *%ptr
+ %val2 = load volatile i32 , i32 *%ptr
+ %val3 = load volatile i32 , i32 *%ptr
+ %val4 = load volatile i32 , i32 *%ptr
+ %val5 = load volatile i32 , i32 *%ptr
+ %val6 = load volatile i32 , i32 *%ptr
+ %val7 = load volatile i32 , i32 *%ptr
+ %val8 = load volatile i32 , i32 *%ptr
+ %val9 = load volatile i32 , i32 *%ptr
+ %val10 = load volatile i32 , i32 *%ptr
+ %val11 = load volatile i32 , i32 *%ptr
+ %val12 = load volatile i32 , i32 *%ptr
+ %val13 = load volatile i32 , i32 *%ptr
+ %val14 = load volatile i32 , i32 *%ptr
+ %val15 = load volatile i32 , i32 *%ptr
%test = icmp ne i32 %sel, 0
br i1 %test, label %add, label %store
diff --git a/test/CodeGen/SystemZ/int-add-12.ll b/test/CodeGen/SystemZ/int-add-12.ll
index 741cce19d72c..496650f435c9 100644
--- a/test/CodeGen/SystemZ/int-add-12.ll
+++ b/test/CodeGen/SystemZ/int-add-12.ll
@@ -7,7 +7,7 @@ define void @f1(i64 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: agsi 0(%r2), 1
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%add = add i64 %val, 127
store i64 %add, i64 *%ptr
ret void
@@ -18,7 +18,7 @@ define void @f2(i64 *%ptr) {
; CHECK-LABEL: f2:
; CHECK: agsi 0(%r2), 127
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%add = add i64 %val, 127
store i64 %add, i64 *%ptr
ret void
@@ -31,7 +31,7 @@ define void @f3(i64 *%ptr) {
; CHECK-NOT: agsi
; CHECK: stg %r0, 0(%r2)
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%add = add i64 %val, 128
store i64 %add, i64 *%ptr
ret void
@@ -42,7 +42,7 @@ define void @f4(i64 *%ptr) {
; CHECK-LABEL: f4:
; CHECK: agsi 0(%r2), -128
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%add = add i64 %val, -128
store i64 %add, i64 *%ptr
ret void
@@ -54,7 +54,7 @@ define void @f5(i64 *%ptr) {
; CHECK-NOT: agsi
; CHECK: stg %r0, 0(%r2)
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%add = add i64 %val, -129
store i64 %add, i64 *%ptr
ret void
@@ -65,8 +65,8 @@ define void @f6(i64 *%base) {
; CHECK-LABEL: f6:
; CHECK: agsi 524280(%r2), 1
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 65535
- %val = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%base, i64 65535
+ %val = load i64 , i64 *%ptr
%add = add i64 %val, 1
store i64 %add, i64 *%ptr
ret void
@@ -79,8 +79,8 @@ define void @f7(i64 *%base) {
; CHECK: agfi %r2, 524288
; CHECK: agsi 0(%r2), 1
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 65536
- %val = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%base, i64 65536
+ %val = load i64 , i64 *%ptr
%add = add i64 %val, 1
store i64 %add, i64 *%ptr
ret void
@@ -91,8 +91,8 @@ define void @f8(i64 *%base) {
; CHECK-LABEL: f8:
; CHECK: agsi -524288(%r2), 1
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 -65536
- %val = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%base, i64 -65536
+ %val = load i64 , i64 *%ptr
%add = add i64 %val, 1
store i64 %add, i64 *%ptr
ret void
@@ -105,8 +105,8 @@ define void @f9(i64 *%base) {
; CHECK: agfi %r2, -524296
; CHECK: agsi 0(%r2), 1
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 -65537
- %val = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%base, i64 -65537
+ %val = load i64 , i64 *%ptr
%add = add i64 %val, 1
store i64 %add, i64 *%ptr
ret void
@@ -121,7 +121,7 @@ define void @f10(i64 %base, i64 %index) {
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 8
%ptr = inttoptr i64 %add2 to i64 *
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%add = add i64 %val, 1
store i64 %add, i64 *%ptr
ret void
@@ -133,22 +133,22 @@ define void @f11(i64 *%ptr, i32 %sel) {
; CHECK: agsi {{[0-9]+}}(%r15), 127
; CHECK: br %r14
entry:
- %val0 = load volatile i64 *%ptr
- %val1 = load volatile i64 *%ptr
- %val2 = load volatile i64 *%ptr
- %val3 = load volatile i64 *%ptr
- %val4 = load volatile i64 *%ptr
- %val5 = load volatile i64 *%ptr
- %val6 = load volatile i64 *%ptr
- %val7 = load volatile i64 *%ptr
- %val8 = load volatile i64 *%ptr
- %val9 = load volatile i64 *%ptr
- %val10 = load volatile i64 *%ptr
- %val11 = load volatile i64 *%ptr
- %val12 = load volatile i64 *%ptr
- %val13 = load volatile i64 *%ptr
- %val14 = load volatile i64 *%ptr
- %val15 = load volatile i64 *%ptr
+ %val0 = load volatile i64 , i64 *%ptr
+ %val1 = load volatile i64 , i64 *%ptr
+ %val2 = load volatile i64 , i64 *%ptr
+ %val3 = load volatile i64 , i64 *%ptr
+ %val4 = load volatile i64 , i64 *%ptr
+ %val5 = load volatile i64 , i64 *%ptr
+ %val6 = load volatile i64 , i64 *%ptr
+ %val7 = load volatile i64 , i64 *%ptr
+ %val8 = load volatile i64 , i64 *%ptr
+ %val9 = load volatile i64 , i64 *%ptr
+ %val10 = load volatile i64 , i64 *%ptr
+ %val11 = load volatile i64 , i64 *%ptr
+ %val12 = load volatile i64 , i64 *%ptr
+ %val13 = load volatile i64 , i64 *%ptr
+ %val14 = load volatile i64 , i64 *%ptr
+ %val15 = load volatile i64 , i64 *%ptr
%test = icmp ne i32 %sel, 0
br i1 %test, label %add, label %store
@@ -216,22 +216,22 @@ define void @f12(i64 *%ptr, i32 %sel) {
; CHECK: agsi {{[0-9]+}}(%r15), -128
; CHECK: br %r14
entry:
- %val0 = load volatile i64 *%ptr
- %val1 = load volatile i64 *%ptr
- %val2 = load volatile i64 *%ptr
- %val3 = load volatile i64 *%ptr
- %val4 = load volatile i64 *%ptr
- %val5 = load volatile i64 *%ptr
- %val6 = load volatile i64 *%ptr
- %val7 = load volatile i64 *%ptr
- %val8 = load volatile i64 *%ptr
- %val9 = load volatile i64 *%ptr
- %val10 = load volatile i64 *%ptr
- %val11 = load volatile i64 *%ptr
- %val12 = load volatile i64 *%ptr
- %val13 = load volatile i64 *%ptr
- %val14 = load volatile i64 *%ptr
- %val15 = load volatile i64 *%ptr
+ %val0 = load volatile i64 , i64 *%ptr
+ %val1 = load volatile i64 , i64 *%ptr
+ %val2 = load volatile i64 , i64 *%ptr
+ %val3 = load volatile i64 , i64 *%ptr
+ %val4 = load volatile i64 , i64 *%ptr
+ %val5 = load volatile i64 , i64 *%ptr
+ %val6 = load volatile i64 , i64 *%ptr
+ %val7 = load volatile i64 , i64 *%ptr
+ %val8 = load volatile i64 , i64 *%ptr
+ %val9 = load volatile i64 , i64 *%ptr
+ %val10 = load volatile i64 , i64 *%ptr
+ %val11 = load volatile i64 , i64 *%ptr
+ %val12 = load volatile i64 , i64 *%ptr
+ %val13 = load volatile i64 , i64 *%ptr
+ %val14 = load volatile i64 , i64 *%ptr
+ %val15 = load volatile i64 , i64 *%ptr
%test = icmp ne i32 %sel, 0
br i1 %test, label %add, label %store
diff --git a/test/CodeGen/SystemZ/int-cmp-01.ll b/test/CodeGen/SystemZ/int-cmp-01.ll
index 6653b6f706f7..97b697db3bdb 100644
--- a/test/CodeGen/SystemZ/int-cmp-01.ll
+++ b/test/CodeGen/SystemZ/int-cmp-01.ll
@@ -8,7 +8,7 @@ define void @f1(i32 %lhs, i16 *%src, i32 *%dst) {
; CHECK-LABEL: f1:
; CHECK: ch %r2, 0(%r3)
; CHECK: br %r14
- %half = load i16 *%src
+ %half = load i16 , i16 *%src
%rhs = sext i16 %half to i32
%cond = icmp slt i32 %lhs, %rhs
%res = select i1 %cond, i32 100, i32 200
@@ -21,8 +21,8 @@ define void @f2(i32 %lhs, i16 *%src, i32 *%dst) {
; CHECK-LABEL: f2:
; CHECK: ch %r2, 4094(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 2047
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 2047
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%cond = icmp slt i32 %lhs, %rhs
%res = select i1 %cond, i32 100, i32 200
@@ -35,8 +35,8 @@ define void @f3(i32 %lhs, i16 *%src, i32 *%dst) {
; CHECK-LABEL: f3:
; CHECK: chy %r2, 4096(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 2048
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 2048
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%cond = icmp slt i32 %lhs, %rhs
%res = select i1 %cond, i32 100, i32 200
@@ -49,8 +49,8 @@ define void @f4(i32 %lhs, i16 *%src, i32 *%dst) {
; CHECK-LABEL: f4:
; CHECK: chy %r2, 524286(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 262143
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 262143
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%cond = icmp slt i32 %lhs, %rhs
%res = select i1 %cond, i32 100, i32 200
@@ -65,8 +65,8 @@ define void @f5(i32 %lhs, i16 *%src, i32 *%dst) {
; CHECK: agfi %r3, 524288
; CHECK: ch %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 262144
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 262144
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%cond = icmp slt i32 %lhs, %rhs
%res = select i1 %cond, i32 100, i32 200
@@ -79,8 +79,8 @@ define void @f6(i32 %lhs, i16 *%src, i32 *%dst) {
; CHECK-LABEL: f6:
; CHECK: chy %r2, -2(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -1
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -1
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%cond = icmp slt i32 %lhs, %rhs
%res = select i1 %cond, i32 100, i32 200
@@ -93,8 +93,8 @@ define void @f7(i32 %lhs, i16 *%src, i32 *%dst) {
; CHECK-LABEL: f7:
; CHECK: chy %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -262144
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -262144
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%cond = icmp slt i32 %lhs, %rhs
%res = select i1 %cond, i32 100, i32 200
@@ -109,8 +109,8 @@ define void @f8(i32 %lhs, i16 *%src, i32 *%dst) {
; CHECK: agfi %r3, -524290
; CHECK: ch %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -262145
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -262145
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%cond = icmp slt i32 %lhs, %rhs
%res = select i1 %cond, i32 100, i32 200
@@ -126,7 +126,7 @@ define void @f9(i32 %lhs, i64 %base, i64 %index, i32 *%dst) {
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 4094
%ptr = inttoptr i64 %add2 to i16 *
- %half = load i16 *%ptr
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%cond = icmp slt i32 %lhs, %rhs
%res = select i1 %cond, i32 100, i32 200
@@ -142,7 +142,7 @@ define void @f10(i32 %lhs, i64 %base, i64 %index, i32 *%dst) {
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i16 *
- %half = load i16 *%ptr
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%cond = icmp slt i32 %lhs, %rhs
%res = select i1 %cond, i32 100, i32 200
@@ -157,7 +157,7 @@ define double @f11(double %a, double %b, i32 %rhs, i16 *%src) {
; CHECK-NEXT: jh {{\.L.*}}
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %half = load i16 *%src
+ %half = load i16 , i16 *%src
%lhs = sext i16 %half to i32
%cond = icmp slt i32 %lhs, %rhs
%res = select i1 %cond, double %a, double %b
diff --git a/test/CodeGen/SystemZ/int-cmp-02.ll b/test/CodeGen/SystemZ/int-cmp-02.ll
index 4a8a1a9ade33..d5aef0f0f977 100644
--- a/test/CodeGen/SystemZ/int-cmp-02.ll
+++ b/test/CodeGen/SystemZ/int-cmp-02.ll
@@ -22,7 +22,7 @@ define double @f2(double %a, double %b, i32 %i1, i32 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %i2 = load i32 *%ptr
+ %i2 = load i32 , i32 *%ptr
%cond = icmp slt i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -35,8 +35,8 @@ define double @f3(double %a, double %b, i32 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 1023
- %i2 = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 1023
+ %i2 = load i32 , i32 *%ptr
%cond = icmp slt i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -49,8 +49,8 @@ define double @f4(double %a, double %b, i32 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 1024
- %i2 = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 1024
+ %i2 = load i32 , i32 *%ptr
%cond = icmp slt i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -63,8 +63,8 @@ define double @f5(double %a, double %b, i32 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 131071
- %i2 = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 131071
+ %i2 = load i32 , i32 *%ptr
%cond = icmp slt i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -79,8 +79,8 @@ define double @f6(double %a, double %b, i32 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 131072
- %i2 = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 131072
+ %i2 = load i32 , i32 *%ptr
%cond = icmp slt i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -93,8 +93,8 @@ define double @f7(double %a, double %b, i32 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -1
- %i2 = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 -1
+ %i2 = load i32 , i32 *%ptr
%cond = icmp slt i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -107,8 +107,8 @@ define double @f8(double %a, double %b, i32 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -131072
- %i2 = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 -131072
+ %i2 = load i32 , i32 *%ptr
%cond = icmp slt i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -123,8 +123,8 @@ define double @f9(double %a, double %b, i32 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -131073
- %i2 = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 -131073
+ %i2 = load i32 , i32 *%ptr
%cond = icmp slt i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -140,7 +140,7 @@ define double @f10(double %a, double %b, i32 %i1, i64 %base, i64 %index) {
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 4092
%ptr = inttoptr i64 %add2 to i32 *
- %i2 = load i32 *%ptr
+ %i2 = load i32 , i32 *%ptr
%cond = icmp slt i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -156,7 +156,7 @@ define double @f11(double %a, double %b, i32 %i1, i64 %base, i64 %index) {
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i32 *
- %i2 = load i32 *%ptr
+ %i2 = load i32 , i32 *%ptr
%cond = icmp slt i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -189,7 +189,7 @@ define double @f13(double %a, double %b, i32 %i2, i32 *%ptr) {
; CHECK-NEXT: jh {{\.L.*}}
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %i1 = load i32 *%ptr
+ %i1 = load i32 , i32 *%ptr
%cond = icmp slt i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
diff --git a/test/CodeGen/SystemZ/int-cmp-03.ll b/test/CodeGen/SystemZ/int-cmp-03.ll
index aa654e086dc6..0246666f06fe 100644
--- a/test/CodeGen/SystemZ/int-cmp-03.ll
+++ b/test/CodeGen/SystemZ/int-cmp-03.ll
@@ -20,7 +20,7 @@ define double @f2(double %a, double %b, i32 %i1, i32 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %i2 = load i32 *%ptr
+ %i2 = load i32 , i32 *%ptr
%cond = icmp ult i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -33,8 +33,8 @@ define double @f3(double %a, double %b, i32 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 1023
- %i2 = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 1023
+ %i2 = load i32 , i32 *%ptr
%cond = icmp ult i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -47,8 +47,8 @@ define double @f4(double %a, double %b, i32 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 1024
- %i2 = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 1024
+ %i2 = load i32 , i32 *%ptr
%cond = icmp ult i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -61,8 +61,8 @@ define double @f5(double %a, double %b, i32 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 131071
- %i2 = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 131071
+ %i2 = load i32 , i32 *%ptr
%cond = icmp ult i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -77,8 +77,8 @@ define double @f6(double %a, double %b, i32 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 131072
- %i2 = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 131072
+ %i2 = load i32 , i32 *%ptr
%cond = icmp ult i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -91,8 +91,8 @@ define double @f7(double %a, double %b, i32 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -1
- %i2 = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 -1
+ %i2 = load i32 , i32 *%ptr
%cond = icmp ult i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -105,8 +105,8 @@ define double @f8(double %a, double %b, i32 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -131072
- %i2 = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 -131072
+ %i2 = load i32 , i32 *%ptr
%cond = icmp ult i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -121,8 +121,8 @@ define double @f9(double %a, double %b, i32 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -131073
- %i2 = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 -131073
+ %i2 = load i32 , i32 *%ptr
%cond = icmp ult i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -138,7 +138,7 @@ define double @f10(double %a, double %b, i32 %i1, i64 %base, i64 %index) {
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 4092
%ptr = inttoptr i64 %add2 to i32 *
- %i2 = load i32 *%ptr
+ %i2 = load i32 , i32 *%ptr
%cond = icmp ult i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -154,7 +154,7 @@ define double @f11(double %a, double %b, i32 %i1, i64 %base, i64 %index) {
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i32 *
- %i2 = load i32 *%ptr
+ %i2 = load i32 , i32 *%ptr
%cond = icmp ult i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -167,7 +167,7 @@ define double @f12(double %a, double %b, i32 %i2, i32 *%ptr) {
; CHECK-NEXT: jh {{\.L.*}}
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %i1 = load i32 *%ptr
+ %i1 = load i32 , i32 *%ptr
%cond = icmp ult i32 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
diff --git a/test/CodeGen/SystemZ/int-cmp-04.ll b/test/CodeGen/SystemZ/int-cmp-04.ll
index a6606f392923..90f05ea38680 100644
--- a/test/CodeGen/SystemZ/int-cmp-04.ll
+++ b/test/CodeGen/SystemZ/int-cmp-04.ll
@@ -8,7 +8,7 @@ define void @f1(i64 %lhs, i16 *%src, i64 *%dst) {
; CHECK-LABEL: f1:
; CHECK: cgh %r2, 0(%r3)
; CHECK: br %r14
- %half = load i16 *%src
+ %half = load i16 , i16 *%src
%rhs = sext i16 %half to i64
%cond = icmp slt i64 %lhs, %rhs
%res = select i1 %cond, i64 100, i64 200
@@ -21,8 +21,8 @@ define void @f2(i64 %lhs, i16 *%src, i64 *%dst) {
; CHECK-LABEL: f2:
; CHECK: cgh %r2, 524286(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 262143
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 262143
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i64
%cond = icmp slt i64 %lhs, %rhs
%res = select i1 %cond, i64 100, i64 200
@@ -37,8 +37,8 @@ define void @f3(i64 %lhs, i16 *%src, i64 *%dst) {
; CHECK: agfi %r3, 524288
; CHECK: cgh %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 262144
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 262144
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i64
%cond = icmp slt i64 %lhs, %rhs
%res = select i1 %cond, i64 100, i64 200
@@ -51,8 +51,8 @@ define void @f4(i64 %lhs, i16 *%src, i64 *%dst) {
; CHECK-LABEL: f4:
; CHECK: cgh %r2, -2(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -1
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -1
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i64
%cond = icmp slt i64 %lhs, %rhs
%res = select i1 %cond, i64 100, i64 200
@@ -65,8 +65,8 @@ define void @f5(i64 %lhs, i16 *%src, i64 *%dst) {
; CHECK-LABEL: f5:
; CHECK: cgh %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -262144
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -262144
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i64
%cond = icmp slt i64 %lhs, %rhs
%res = select i1 %cond, i64 100, i64 200
@@ -81,8 +81,8 @@ define void @f6(i64 %lhs, i16 *%src, i64 *%dst) {
; CHECK: agfi %r3, -524290
; CHECK: cgh %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -262145
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -262145
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i64
%cond = icmp slt i64 %lhs, %rhs
%res = select i1 %cond, i64 100, i64 200
@@ -98,7 +98,7 @@ define void @f7(i64 %lhs, i64 %base, i64 %index, i64 *%dst) {
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i16 *
- %half = load i16 *%ptr
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i64
%cond = icmp slt i64 %lhs, %rhs
%res = select i1 %cond, i64 100, i64 200
@@ -113,7 +113,7 @@ define double @f8(double %a, double %b, i64 %rhs, i16 *%src) {
; CHECK-NEXT: jh {{\.L.*}}
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %half = load i16 *%src
+ %half = load i16 , i16 *%src
%lhs = sext i16 %half to i64
%cond = icmp slt i64 %lhs, %rhs
%res = select i1 %cond, double %a, double %b
diff --git a/test/CodeGen/SystemZ/int-cmp-05.ll b/test/CodeGen/SystemZ/int-cmp-05.ll
index 0be43a3ef1bf..70640b607bcd 100644
--- a/test/CodeGen/SystemZ/int-cmp-05.ll
+++ b/test/CodeGen/SystemZ/int-cmp-05.ll
@@ -61,7 +61,7 @@ define double @f5(double %a, double %b, i64 %i1, i32 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %unext = load i32 *%ptr
+ %unext = load i32 , i32 *%ptr
%i2 = sext i32 %unext to i64
%cond = icmp slt i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
@@ -73,7 +73,7 @@ define double @f6(double %a, double %b, i64 %i1, i32 *%ptr) {
; CHECK-LABEL: f6:
; CHECK-NOT: cgf
; CHECK: br %r14
- %unext = load i32 *%ptr
+ %unext = load i32 , i32 *%ptr
%i2 = sext i32 %unext to i64
%cond = icmp ult i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
@@ -87,7 +87,7 @@ define double @f7(double %a, double %b, i64 %i1, i32 *%ptr) {
; CHECK-NEXT: je
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %unext = load i32 *%ptr
+ %unext = load i32 , i32 *%ptr
%i2 = sext i32 %unext to i64
%cond = icmp eq i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
@@ -101,7 +101,7 @@ define double @f8(double %a, double %b, i64 %i1, i32 *%ptr) {
; CHECK-NEXT: jlh
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %unext = load i32 *%ptr
+ %unext = load i32 , i32 *%ptr
%i2 = sext i32 %unext to i64
%cond = icmp ne i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
@@ -115,8 +115,8 @@ define double @f9(double %a, double %b, i64 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 131071
- %unext = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 131071
+ %unext = load i32 , i32 *%ptr
%i2 = sext i32 %unext to i64
%cond = icmp slt i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
@@ -132,8 +132,8 @@ define double @f10(double %a, double %b, i64 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 131072
- %unext = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 131072
+ %unext = load i32 , i32 *%ptr
%i2 = sext i32 %unext to i64
%cond = icmp slt i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
@@ -147,8 +147,8 @@ define double @f11(double %a, double %b, i64 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -1
- %unext = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 -1
+ %unext = load i32 , i32 *%ptr
%i2 = sext i32 %unext to i64
%cond = icmp slt i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
@@ -162,8 +162,8 @@ define double @f12(double %a, double %b, i64 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -131072
- %unext = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 -131072
+ %unext = load i32 , i32 *%ptr
%i2 = sext i32 %unext to i64
%cond = icmp slt i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
@@ -179,8 +179,8 @@ define double @f13(double %a, double %b, i64 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -131073
- %unext = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 -131073
+ %unext = load i32 , i32 *%ptr
%i2 = sext i32 %unext to i64
%cond = icmp slt i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
@@ -197,7 +197,7 @@ define double @f14(double %a, double %b, i64 %i1, i64 %base, i64 %index) {
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 524284
%ptr = inttoptr i64 %add2 to i32 *
- %unext = load i32 *%ptr
+ %unext = load i32 , i32 *%ptr
%i2 = sext i32 %unext to i64
%cond = icmp slt i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
@@ -210,26 +210,26 @@ define i64 @f15(i32 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: cgf {{%r[0-9]+}}, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i32 *%ptr0, i64 2
- %ptr2 = getelementptr i32 *%ptr0, i64 4
- %ptr3 = getelementptr i32 *%ptr0, i64 6
- %ptr4 = getelementptr i32 *%ptr0, i64 8
- %ptr5 = getelementptr i32 *%ptr0, i64 10
- %ptr6 = getelementptr i32 *%ptr0, i64 12
- %ptr7 = getelementptr i32 *%ptr0, i64 14
- %ptr8 = getelementptr i32 *%ptr0, i64 16
- %ptr9 = getelementptr i32 *%ptr0, i64 18
+ %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
+ %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
+ %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
+ %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
+ %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
+ %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
+ %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
+ %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
+ %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
- %val0 = load i32 *%ptr0
- %val1 = load i32 *%ptr1
- %val2 = load i32 *%ptr2
- %val3 = load i32 *%ptr3
- %val4 = load i32 *%ptr4
- %val5 = load i32 *%ptr5
- %val6 = load i32 *%ptr6
- %val7 = load i32 *%ptr7
- %val8 = load i32 *%ptr8
- %val9 = load i32 *%ptr9
+ %val0 = load i32 , i32 *%ptr0
+ %val1 = load i32 , i32 *%ptr1
+ %val2 = load i32 , i32 *%ptr2
+ %val3 = load i32 , i32 *%ptr3
+ %val4 = load i32 , i32 *%ptr4
+ %val5 = load i32 , i32 *%ptr5
+ %val6 = load i32 , i32 *%ptr6
+ %val7 = load i32 , i32 *%ptr7
+ %val8 = load i32 , i32 *%ptr8
+ %val9 = load i32 , i32 *%ptr9
%frob0 = add i32 %val0, 100
%frob1 = add i32 %val1, 100
@@ -311,7 +311,7 @@ define double @f17(double %a, double %b, i64 %i2, i32 *%ptr) {
; CHECK-NEXT: jh {{\.L.*}}
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %unext = load i32 *%ptr
+ %unext = load i32 , i32 *%ptr
%i1 = sext i32 %unext to i64
%cond = icmp slt i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
diff --git a/test/CodeGen/SystemZ/int-cmp-06.ll b/test/CodeGen/SystemZ/int-cmp-06.ll
index 82007e221766..16c2ade83553 100644
--- a/test/CodeGen/SystemZ/int-cmp-06.ll
+++ b/test/CodeGen/SystemZ/int-cmp-06.ll
@@ -111,7 +111,7 @@ define double @f9(double %a, double %b, i64 %i1, i32 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %unext = load i32 *%ptr
+ %unext = load i32 , i32 *%ptr
%i2 = zext i32 %unext to i64
%cond = icmp ult i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
@@ -123,7 +123,7 @@ define double @f10(double %a, double %b, i64 %i1, i32 *%ptr) {
; CHECK-LABEL: f10:
; CHECK-NOT: clgf
; CHECK: br %r14
- %unext = load i32 *%ptr
+ %unext = load i32 , i32 *%ptr
%i2 = zext i32 %unext to i64
%cond = icmp slt i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
@@ -137,7 +137,7 @@ define double @f11(double %a, double %b, i64 %i1, i32 *%ptr) {
; CHECK-NEXT: je
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %unext = load i32 *%ptr
+ %unext = load i32 , i32 *%ptr
%i2 = zext i32 %unext to i64
%cond = icmp eq i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
@@ -151,7 +151,7 @@ define double @f12(double %a, double %b, i64 %i1, i32 *%ptr) {
; CHECK-NEXT: jlh
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %unext = load i32 *%ptr
+ %unext = load i32 , i32 *%ptr
%i2 = zext i32 %unext to i64
%cond = icmp ne i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
@@ -165,8 +165,8 @@ define double @f13(double %a, double %b, i64 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 131071
- %unext = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 131071
+ %unext = load i32 , i32 *%ptr
%i2 = zext i32 %unext to i64
%cond = icmp ult i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
@@ -182,8 +182,8 @@ define double @f14(double %a, double %b, i64 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 131072
- %unext = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 131072
+ %unext = load i32 , i32 *%ptr
%i2 = zext i32 %unext to i64
%cond = icmp ult i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
@@ -197,8 +197,8 @@ define double @f15(double %a, double %b, i64 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -1
- %unext = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 -1
+ %unext = load i32 , i32 *%ptr
%i2 = zext i32 %unext to i64
%cond = icmp ult i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
@@ -212,8 +212,8 @@ define double @f16(double %a, double %b, i64 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -131072
- %unext = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 -131072
+ %unext = load i32 , i32 *%ptr
%i2 = zext i32 %unext to i64
%cond = icmp ult i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
@@ -229,8 +229,8 @@ define double @f17(double %a, double %b, i64 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -131073
- %unext = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 -131073
+ %unext = load i32 , i32 *%ptr
%i2 = zext i32 %unext to i64
%cond = icmp ult i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
@@ -247,7 +247,7 @@ define double @f18(double %a, double %b, i64 %i1, i64 %base, i64 %index) {
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 524284
%ptr = inttoptr i64 %add2 to i32 *
- %unext = load i32 *%ptr
+ %unext = load i32 , i32 *%ptr
%i2 = zext i32 %unext to i64
%cond = icmp ult i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
@@ -260,26 +260,26 @@ define i64 @f19(i32 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: clgf {{%r[0-9]+}}, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i32 *%ptr0, i64 2
- %ptr2 = getelementptr i32 *%ptr0, i64 4
- %ptr3 = getelementptr i32 *%ptr0, i64 6
- %ptr4 = getelementptr i32 *%ptr0, i64 8
- %ptr5 = getelementptr i32 *%ptr0, i64 10
- %ptr6 = getelementptr i32 *%ptr0, i64 12
- %ptr7 = getelementptr i32 *%ptr0, i64 14
- %ptr8 = getelementptr i32 *%ptr0, i64 16
- %ptr9 = getelementptr i32 *%ptr0, i64 18
+ %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
+ %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
+ %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
+ %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
+ %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
+ %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
+ %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
+ %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
+ %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
- %val0 = load i32 *%ptr0
- %val1 = load i32 *%ptr1
- %val2 = load i32 *%ptr2
- %val3 = load i32 *%ptr3
- %val4 = load i32 *%ptr4
- %val5 = load i32 *%ptr5
- %val6 = load i32 *%ptr6
- %val7 = load i32 *%ptr7
- %val8 = load i32 *%ptr8
- %val9 = load i32 *%ptr9
+ %val0 = load i32 , i32 *%ptr0
+ %val1 = load i32 , i32 *%ptr1
+ %val2 = load i32 , i32 *%ptr2
+ %val3 = load i32 , i32 *%ptr3
+ %val4 = load i32 , i32 *%ptr4
+ %val5 = load i32 , i32 *%ptr5
+ %val6 = load i32 , i32 *%ptr6
+ %val7 = load i32 , i32 *%ptr7
+ %val8 = load i32 , i32 *%ptr8
+ %val9 = load i32 , i32 *%ptr9
%frob0 = add i32 %val0, 100
%frob1 = add i32 %val1, 100
@@ -374,7 +374,7 @@ define double @f22(double %a, double %b, i64 %i2, i32 *%ptr) {
; CHECK-NEXT: jh {{\.L.*}}
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %unext = load i32 *%ptr
+ %unext = load i32 , i32 *%ptr
%i1 = zext i32 %unext to i64
%cond = icmp ult i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
diff --git a/test/CodeGen/SystemZ/int-cmp-07.ll b/test/CodeGen/SystemZ/int-cmp-07.ll
index 530d1787a770..0a787c9ea01d 100644
--- a/test/CodeGen/SystemZ/int-cmp-07.ll
+++ b/test/CodeGen/SystemZ/int-cmp-07.ll
@@ -20,7 +20,7 @@ define double @f2(double %a, double %b, i64 %i1, i64 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %i2 = load i64 *%ptr
+ %i2 = load i64 , i64 *%ptr
%cond = icmp slt i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -33,8 +33,8 @@ define double @f3(double %a, double %b, i64 %i1, i64 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 65535
- %i2 = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%base, i64 65535
+ %i2 = load i64 , i64 *%ptr
%cond = icmp slt i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -49,8 +49,8 @@ define double @f4(double %a, double %b, i64 %i1, i64 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 65536
- %i2 = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%base, i64 65536
+ %i2 = load i64 , i64 *%ptr
%cond = icmp slt i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -63,8 +63,8 @@ define double @f5(double %a, double %b, i64 %i1, i64 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 -1
- %i2 = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%base, i64 -1
+ %i2 = load i64 , i64 *%ptr
%cond = icmp slt i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -77,8 +77,8 @@ define double @f6(double %a, double %b, i64 %i1, i64 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 -65536
- %i2 = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%base, i64 -65536
+ %i2 = load i64 , i64 *%ptr
%cond = icmp slt i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -93,8 +93,8 @@ define double @f7(double %a, double %b, i64 %i1, i64 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 -65537
- %i2 = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%base, i64 -65537
+ %i2 = load i64 , i64 *%ptr
%cond = icmp slt i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -110,7 +110,7 @@ define double @f8(double %a, double %b, i64 %i1, i64 %base, i64 %index) {
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 524280
%ptr = inttoptr i64 %add2 to i64 *
- %i2 = load i64 *%ptr
+ %i2 = load i64 , i64 *%ptr
%cond = icmp slt i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -123,7 +123,7 @@ define double @f9(double %a, double %b, i64 %i2, i64 *%ptr) {
; CHECK-NEXT: jh {{\.L.*}}
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %i1 = load i64 *%ptr
+ %i1 = load i64 , i64 *%ptr
%cond = icmp slt i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
diff --git a/test/CodeGen/SystemZ/int-cmp-08.ll b/test/CodeGen/SystemZ/int-cmp-08.ll
index ebf158a1144b..384b41b549b9 100644
--- a/test/CodeGen/SystemZ/int-cmp-08.ll
+++ b/test/CodeGen/SystemZ/int-cmp-08.ll
@@ -20,7 +20,7 @@ define double @f2(double %a, double %b, i64 %i1, i64 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %i2 = load i64 *%ptr
+ %i2 = load i64 , i64 *%ptr
%cond = icmp ult i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -33,8 +33,8 @@ define double @f3(double %a, double %b, i64 %i1, i64 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 65535
- %i2 = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%base, i64 65535
+ %i2 = load i64 , i64 *%ptr
%cond = icmp ult i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -49,8 +49,8 @@ define double @f4(double %a, double %b, i64 %i1, i64 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 65536
- %i2 = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%base, i64 65536
+ %i2 = load i64 , i64 *%ptr
%cond = icmp ult i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -63,8 +63,8 @@ define double @f5(double %a, double %b, i64 %i1, i64 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 -1
- %i2 = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%base, i64 -1
+ %i2 = load i64 , i64 *%ptr
%cond = icmp ult i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -77,8 +77,8 @@ define double @f6(double %a, double %b, i64 %i1, i64 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 -65536
- %i2 = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%base, i64 -65536
+ %i2 = load i64 , i64 *%ptr
%cond = icmp ult i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -93,8 +93,8 @@ define double @f7(double %a, double %b, i64 %i1, i64 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 -65537
- %i2 = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%base, i64 -65537
+ %i2 = load i64 , i64 *%ptr
%cond = icmp ult i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -110,7 +110,7 @@ define double @f8(double %a, double %b, i64 %i1, i64 %base, i64 %index) {
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 524280
%ptr = inttoptr i64 %add2 to i64 *
- %i2 = load i64 *%ptr
+ %i2 = load i64 , i64 *%ptr
%cond = icmp ult i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -123,7 +123,7 @@ define double @f9(double %a, double %b, i64 %i2, i64 *%ptr) {
; CHECK-NEXT: jh {{\.L.*}}
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %i1 = load i64 *%ptr
+ %i1 = load i64 , i64 *%ptr
%cond = icmp ult i64 %i1, %i2
%res = select i1 %cond, double %a, double %b
ret double %res
diff --git a/test/CodeGen/SystemZ/int-cmp-12.ll b/test/CodeGen/SystemZ/int-cmp-12.ll
index 077b22423e06..d9c6a9fc4efc 100644
--- a/test/CodeGen/SystemZ/int-cmp-12.ll
+++ b/test/CodeGen/SystemZ/int-cmp-12.ll
@@ -49,13 +49,24 @@ define double @f4(double %a, double %b, i64 %i1) {
ret double %res
}
-; Check the next value up, which must use a register comparison.
+; Check the next value up, which can use a shifted comparison
define double @f5(double %a, double %b, i64 %i1) {
; CHECK-LABEL: f5:
-; CHECK: clgrjl %r2,
+; CHECK: srlg [[REG:%r[0-5]]], %r2, 32
+; CHECK: cgije [[REG]], 0
; CHECK: ldr %f0, %f2
; CHECK: br %r14
%cond = icmp ult i64 %i1, 4294967296
%res = select i1 %cond, double %a, double %b
ret double %res
}
+; Check the next value up, which must use a register comparison.
+define double @f6(double %a, double %b, i64 %i1) {
+; CHECK-LABEL: f6:
+; CHECK: clgrjl %r2,
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+ %cond = icmp ult i64 %i1, 4294967297
+ %res = select i1 %cond, double %a, double %b
+ ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-15.ll b/test/CodeGen/SystemZ/int-cmp-15.ll
index 48a068e49e8f..3c1e052bc35f 100644
--- a/test/CodeGen/SystemZ/int-cmp-15.ll
+++ b/test/CodeGen/SystemZ/int-cmp-15.ll
@@ -8,7 +8,7 @@ define double @f1(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 1
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%cond = icmp ugt i8 %val, 1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -20,7 +20,7 @@ define double @f2(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 254
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%cond = icmp ult i8 %val, 254
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -32,7 +32,7 @@ define double @f3(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 127
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%cond = icmp slt i8 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -44,7 +44,7 @@ define double @f4(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 127
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%cond = icmp sle i8 %val, -1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -56,7 +56,7 @@ define double @f5(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 128
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%cond = icmp sge i8 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -68,7 +68,7 @@ define double @f6(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 128
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%cond = icmp sgt i8 %val, -1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -80,7 +80,7 @@ define double @f7(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 128
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%cond = icmp eq i8 %val, -128
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -92,7 +92,7 @@ define double @f8(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 0
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%cond = icmp eq i8 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -104,7 +104,7 @@ define double @f9(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 127
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%cond = icmp eq i8 %val, 127
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -116,7 +116,7 @@ define double @f10(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 255
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%cond = icmp eq i8 %val, 255
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -127,8 +127,8 @@ define double @f11(double %a, double %b, i8 *%src) {
; CHECK-LABEL: f11:
; CHECK: cli 4095(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 4095
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 4095
+ %val = load i8 , i8 *%ptr
%cond = icmp ult i8 %val, 127
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -139,8 +139,8 @@ define double @f12(double %a, double %b, i8 *%src) {
; CHECK-LABEL: f12:
; CHECK: cliy 4096(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 4096
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 4096
+ %val = load i8 , i8 *%ptr
%cond = icmp ult i8 %val, 127
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -151,8 +151,8 @@ define double @f13(double %a, double %b, i8 *%src) {
; CHECK-LABEL: f13:
; CHECK: cliy 524287(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524287
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524287
+ %val = load i8 , i8 *%ptr
%cond = icmp ult i8 %val, 127
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -165,8 +165,8 @@ define double @f14(double %a, double %b, i8 *%src) {
; CHECK: agfi %r2, 524288
; CHECK: cli 0(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524288
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524288
+ %val = load i8 , i8 *%ptr
%cond = icmp ult i8 %val, 127
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -177,8 +177,8 @@ define double @f15(double %a, double %b, i8 *%src) {
; CHECK-LABEL: f15:
; CHECK: cliy -1(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -1
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -1
+ %val = load i8 , i8 *%ptr
%cond = icmp ult i8 %val, 127
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -189,8 +189,8 @@ define double @f16(double %a, double %b, i8 *%src) {
; CHECK-LABEL: f16:
; CHECK: cliy -524288(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524288
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524288
+ %val = load i8 , i8 *%ptr
%cond = icmp ult i8 %val, 127
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -203,8 +203,8 @@ define double @f17(double %a, double %b, i8 *%src) {
; CHECK: agfi %r2, -524289
; CHECK: cli 0(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524289
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524289
+ %val = load i8 , i8 *%ptr
%cond = icmp ult i8 %val, 127
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -219,7 +219,7 @@ define double @f18(double %a, double %b, i64 %base, i64 %index) {
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 4095
%ptr = inttoptr i64 %add2 to i8 *
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%cond = icmp ult i8 %val, 127
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -234,7 +234,7 @@ define double @f19(double %a, double %b, i64 %base, i64 %index) {
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i8 *
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%cond = icmp ult i8 %val, 127
%res = select i1 %cond, double %a, double %b
ret double %res
diff --git a/test/CodeGen/SystemZ/int-cmp-16.ll b/test/CodeGen/SystemZ/int-cmp-16.ll
index be206d9c9470..37508b5e740f 100644
--- a/test/CodeGen/SystemZ/int-cmp-16.ll
+++ b/test/CodeGen/SystemZ/int-cmp-16.ll
@@ -9,7 +9,7 @@ define double @f1(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 0
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i32
%cond = icmp eq i32 %ext, 0
%res = select i1 %cond, double %a, double %b
@@ -22,7 +22,7 @@ define double @f2(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 255
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i32
%cond = icmp eq i32 %ext, 255
%res = select i1 %cond, double %a, double %b
@@ -34,7 +34,7 @@ define double @f3(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f3:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i32
%cond = icmp eq i32 %ext, 256
%res = select i1 %cond, double %a, double %b
@@ -47,7 +47,7 @@ define double @f4(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f4:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i32
%cond = icmp eq i32 %ext, -1
%res = select i1 %cond, double %a, double %b
@@ -60,7 +60,7 @@ define double @f5(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 0
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp eq i32 %ext, 0
%res = select i1 %cond, double %a, double %b
@@ -73,7 +73,7 @@ define double @f6(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 127
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp eq i32 %ext, 127
%res = select i1 %cond, double %a, double %b
@@ -86,7 +86,7 @@ define double @f7(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f7:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp eq i32 %ext, 128
%res = select i1 %cond, double %a, double %b
@@ -99,7 +99,7 @@ define double @f8(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 255
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp eq i32 %ext, -1
%res = select i1 %cond, double %a, double %b
@@ -112,7 +112,7 @@ define double @f9(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 128
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp eq i32 %ext, -128
%res = select i1 %cond, double %a, double %b
@@ -125,7 +125,7 @@ define double @f10(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f10:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp eq i32 %ext, -129
%res = select i1 %cond, double %a, double %b
diff --git a/test/CodeGen/SystemZ/int-cmp-17.ll b/test/CodeGen/SystemZ/int-cmp-17.ll
index 3df4ecc66803..a22fb604d453 100644
--- a/test/CodeGen/SystemZ/int-cmp-17.ll
+++ b/test/CodeGen/SystemZ/int-cmp-17.ll
@@ -9,7 +9,7 @@ define double @f1(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 0
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i32
%cond = icmp ne i32 %ext, 0
%res = select i1 %cond, double %a, double %b
@@ -22,7 +22,7 @@ define double @f2(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 255
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i32
%cond = icmp ne i32 %ext, 255
%res = select i1 %cond, double %a, double %b
@@ -34,7 +34,7 @@ define double @f3(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f3:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i32
%cond = icmp ne i32 %ext, 256
%res = select i1 %cond, double %a, double %b
@@ -47,7 +47,7 @@ define double @f4(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f4:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i32
%cond = icmp ne i32 %ext, -1
%res = select i1 %cond, double %a, double %b
@@ -60,7 +60,7 @@ define double @f5(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 0
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp ne i32 %ext, 0
%res = select i1 %cond, double %a, double %b
@@ -73,7 +73,7 @@ define double @f6(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 127
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp ne i32 %ext, 127
%res = select i1 %cond, double %a, double %b
@@ -86,7 +86,7 @@ define double @f7(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f7:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp ne i32 %ext, 128
%res = select i1 %cond, double %a, double %b
@@ -99,7 +99,7 @@ define double @f8(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 255
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp ne i32 %ext, -1
%res = select i1 %cond, double %a, double %b
@@ -112,7 +112,7 @@ define double @f9(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 128
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp ne i32 %ext, -128
%res = select i1 %cond, double %a, double %b
@@ -125,7 +125,7 @@ define double @f10(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f10:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp ne i32 %ext, -129
%res = select i1 %cond, double %a, double %b
diff --git a/test/CodeGen/SystemZ/int-cmp-18.ll b/test/CodeGen/SystemZ/int-cmp-18.ll
index d03d6ac9a2c7..f4bc5c0e5ce9 100644
--- a/test/CodeGen/SystemZ/int-cmp-18.ll
+++ b/test/CodeGen/SystemZ/int-cmp-18.ll
@@ -9,7 +9,7 @@ define double @f1(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 0
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i64
%cond = icmp eq i64 %ext, 0
%res = select i1 %cond, double %a, double %b
@@ -22,7 +22,7 @@ define double @f2(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 255
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i64
%cond = icmp eq i64 %ext, 255
%res = select i1 %cond, double %a, double %b
@@ -34,7 +34,7 @@ define double @f3(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f3:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i64
%cond = icmp eq i64 %ext, 256
%res = select i1 %cond, double %a, double %b
@@ -47,7 +47,7 @@ define double @f4(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f4:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i64
%cond = icmp eq i64 %ext, -1
%res = select i1 %cond, double %a, double %b
@@ -60,7 +60,7 @@ define double @f5(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 0
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp eq i64 %ext, 0
%res = select i1 %cond, double %a, double %b
@@ -73,7 +73,7 @@ define double @f6(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 127
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp eq i64 %ext, 127
%res = select i1 %cond, double %a, double %b
@@ -86,7 +86,7 @@ define double @f7(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f7:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp eq i64 %ext, 128
%res = select i1 %cond, double %a, double %b
@@ -99,7 +99,7 @@ define double @f8(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 255
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp eq i64 %ext, -1
%res = select i1 %cond, double %a, double %b
@@ -112,7 +112,7 @@ define double @f9(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 128
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp eq i64 %ext, -128
%res = select i1 %cond, double %a, double %b
@@ -125,7 +125,7 @@ define double @f10(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f10:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp eq i64 %ext, -129
%res = select i1 %cond, double %a, double %b
diff --git a/test/CodeGen/SystemZ/int-cmp-19.ll b/test/CodeGen/SystemZ/int-cmp-19.ll
index b5f0856b4002..0a23f06a0581 100644
--- a/test/CodeGen/SystemZ/int-cmp-19.ll
+++ b/test/CodeGen/SystemZ/int-cmp-19.ll
@@ -9,7 +9,7 @@ define double @f1(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 0
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i64
%cond = icmp ne i64 %ext, 0
%res = select i1 %cond, double %a, double %b
@@ -22,7 +22,7 @@ define double @f2(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 255
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i64
%cond = icmp ne i64 %ext, 255
%res = select i1 %cond, double %a, double %b
@@ -34,7 +34,7 @@ define double @f3(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f3:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i64
%cond = icmp ne i64 %ext, 256
%res = select i1 %cond, double %a, double %b
@@ -47,7 +47,7 @@ define double @f4(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f4:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i64
%cond = icmp ne i64 %ext, -1
%res = select i1 %cond, double %a, double %b
@@ -60,7 +60,7 @@ define double @f5(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 0
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp ne i64 %ext, 0
%res = select i1 %cond, double %a, double %b
@@ -73,7 +73,7 @@ define double @f6(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 127
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp ne i64 %ext, 127
%res = select i1 %cond, double %a, double %b
@@ -86,7 +86,7 @@ define double @f7(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f7:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp ne i64 %ext, 128
%res = select i1 %cond, double %a, double %b
@@ -99,7 +99,7 @@ define double @f8(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 255
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp ne i64 %ext, -1
%res = select i1 %cond, double %a, double %b
@@ -112,7 +112,7 @@ define double @f9(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 128
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp ne i64 %ext, -128
%res = select i1 %cond, double %a, double %b
@@ -125,7 +125,7 @@ define double @f10(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f10:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp ne i64 %ext, -129
%res = select i1 %cond, double %a, double %b
diff --git a/test/CodeGen/SystemZ/int-cmp-20.ll b/test/CodeGen/SystemZ/int-cmp-20.ll
index 98c41cd3a240..2acff55af59c 100644
--- a/test/CodeGen/SystemZ/int-cmp-20.ll
+++ b/test/CodeGen/SystemZ/int-cmp-20.ll
@@ -10,7 +10,7 @@ define double @f1(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 1
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i32
%cond = icmp ugt i32 %ext, 1
%res = select i1 %cond, double %a, double %b
@@ -24,7 +24,7 @@ define double @f2(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 1
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp ugt i32 %ext, 1
%res = select i1 %cond, double %a, double %b
@@ -38,7 +38,7 @@ define double @f3(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 254
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i32
%cond = icmp ult i32 %ext, 254
%res = select i1 %cond, double %a, double %b
@@ -52,7 +52,7 @@ define double @f4(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 254
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp ult i32 %ext, -2
%res = select i1 %cond, double %a, double %b
@@ -65,7 +65,7 @@ define double @f5(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f5:
; CHECK-NOT: cli {{.*}}
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i32
%cond = icmp ult i32 %ext, 256
%res = select i1 %cond, double %a, double %b
@@ -81,7 +81,7 @@ define double @f6(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f6:
; CHECK-NOT: cli {{.*}}
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp ult i32 %ext, 128
%res = select i1 %cond, double %a, double %b
@@ -93,7 +93,7 @@ define double @f7(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f7:
; CHECK-NOT: cli {{.*}}
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp ult i32 %ext, -129
%res = select i1 %cond, double %a, double %b
@@ -107,7 +107,7 @@ define double @f8(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 1
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i32
%cond = icmp sgt i32 %ext, 1
%res = select i1 %cond, double %a, double %b
@@ -120,7 +120,7 @@ define double @f9(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f9:
; CHECK-NOT: cli {{.*}}
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp sgt i32 %ext, 1
%res = select i1 %cond, double %a, double %b
@@ -134,7 +134,7 @@ define double @f10(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 254
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i32
%cond = icmp slt i32 %ext, 254
%res = select i1 %cond, double %a, double %b
@@ -147,7 +147,7 @@ define double @f11(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f11:
; CHECK-NOT: cli {{.*}}
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp slt i32 %ext, -2
%res = select i1 %cond, double %a, double %b
@@ -160,7 +160,7 @@ define double @f12(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f12:
; CHECK-NOT: cli {{.*}}
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i32
%cond = icmp slt i32 %ext, 256
%res = select i1 %cond, double %a, double %b
@@ -173,7 +173,7 @@ define double @f13(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 128
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp sge i32 %ext, 0
%res = select i1 %cond, double %a, double %b
@@ -186,7 +186,7 @@ define double @f14(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 128
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp sgt i32 %ext, -1
%res = select i1 %cond, double %a, double %b
@@ -199,7 +199,7 @@ define double @f15(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 127
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp slt i32 %ext, 0
%res = select i1 %cond, double %a, double %b
@@ -212,7 +212,7 @@ define double @f16(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 127
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%cond = icmp sle i32 %ext, -1
%res = select i1 %cond, double %a, double %b
diff --git a/test/CodeGen/SystemZ/int-cmp-21.ll b/test/CodeGen/SystemZ/int-cmp-21.ll
index ca9225dead92..5be97324f643 100644
--- a/test/CodeGen/SystemZ/int-cmp-21.ll
+++ b/test/CodeGen/SystemZ/int-cmp-21.ll
@@ -10,7 +10,7 @@ define double @f1(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 1
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i64
%cond = icmp ugt i64 %ext, 1
%res = select i1 %cond, double %a, double %b
@@ -24,7 +24,7 @@ define double @f2(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 1
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp ugt i64 %ext, 1
%res = select i1 %cond, double %a, double %b
@@ -38,7 +38,7 @@ define double @f3(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 254
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i64
%cond = icmp ult i64 %ext, 254
%res = select i1 %cond, double %a, double %b
@@ -52,7 +52,7 @@ define double @f4(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 254
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp ult i64 %ext, -2
%res = select i1 %cond, double %a, double %b
@@ -65,7 +65,7 @@ define double @f5(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f5:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i64
%cond = icmp ult i64 %ext, 256
%res = select i1 %cond, double %a, double %b
@@ -81,7 +81,7 @@ define double @f6(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f6:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp ult i64 %ext, 128
%res = select i1 %cond, double %a, double %b
@@ -93,7 +93,7 @@ define double @f7(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f7:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp ult i64 %ext, -129
%res = select i1 %cond, double %a, double %b
@@ -107,7 +107,7 @@ define double @f8(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 1
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i64
%cond = icmp sgt i64 %ext, 1
%res = select i1 %cond, double %a, double %b
@@ -120,7 +120,7 @@ define double @f9(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f9:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp sgt i64 %ext, 1
%res = select i1 %cond, double %a, double %b
@@ -134,7 +134,7 @@ define double @f10(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 254
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i64
%cond = icmp slt i64 %ext, 254
%res = select i1 %cond, double %a, double %b
@@ -147,7 +147,7 @@ define double @f11(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f11:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp slt i64 %ext, -2
%res = select i1 %cond, double %a, double %b
@@ -160,7 +160,7 @@ define double @f12(double %a, double %b, i8 *%ptr) {
; CHECK-LABEL: f12:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i64
%cond = icmp slt i64 %ext, 256
%res = select i1 %cond, double %a, double %b
@@ -173,7 +173,7 @@ define double @f13(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 128
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp sge i64 %ext, 0
%res = select i1 %cond, double %a, double %b
@@ -186,7 +186,7 @@ define double @f14(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 128
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp sgt i64 %ext, -1
%res = select i1 %cond, double %a, double %b
@@ -199,7 +199,7 @@ define double @f15(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 127
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp slt i64 %ext, 0
%res = select i1 %cond, double %a, double %b
@@ -212,7 +212,7 @@ define double @f16(double %a, double %b, i8 *%ptr) {
; CHECK: cli 0(%r2), 127
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%cond = icmp sle i64 %ext, -1
%res = select i1 %cond, double %a, double %b
diff --git a/test/CodeGen/SystemZ/int-cmp-22.ll b/test/CodeGen/SystemZ/int-cmp-22.ll
index 43daec95b7d8..f29023cf02ae 100644
--- a/test/CodeGen/SystemZ/int-cmp-22.ll
+++ b/test/CodeGen/SystemZ/int-cmp-22.ll
@@ -9,7 +9,7 @@ define double @f1(double %a, double %b, i16 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%cond = icmp slt i16 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -22,7 +22,7 @@ define double @f2(double %a, double %b, i16 *%ptr) {
; CHECK-NEXT: jle
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%cond = icmp slt i16 %val, 1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -35,7 +35,7 @@ define double @f3(double %a, double %b, i16 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%cond = icmp slt i16 %val, 32766
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -48,7 +48,7 @@ define double @f4(double %a, double %b, i16 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%cond = icmp slt i16 %val, -1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -61,7 +61,7 @@ define double @f5(double %a, double %b, i16 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%cond = icmp slt i16 %val, -32766
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -74,8 +74,8 @@ define double @f6(double %a, double %b, i16 %i1, i16 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i16 *%base, i64 2047
- %val = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%base, i64 2047
+ %val = load i16 , i16 *%ptr
%cond = icmp slt i16 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -89,8 +89,8 @@ define double @f7(double %a, double %b, i16 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i16 *%base, i64 2048
- %val = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%base, i64 2048
+ %val = load i16 , i16 *%ptr
%cond = icmp slt i16 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -104,8 +104,8 @@ define double @f8(double %a, double %b, i16 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i16 *%base, i64 -1
- %val = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%base, i64 -1
+ %val = load i16 , i16 *%ptr
%cond = icmp slt i16 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -121,7 +121,7 @@ define double @f9(double %a, double %b, i64 %base, i64 %index) {
; CHECK: br %r14
%add = add i64 %base, %index
%ptr = inttoptr i64 %add to i16 *
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%cond = icmp slt i16 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
diff --git a/test/CodeGen/SystemZ/int-cmp-23.ll b/test/CodeGen/SystemZ/int-cmp-23.ll
index 99fe74b1c787..df6b62616a79 100644
--- a/test/CodeGen/SystemZ/int-cmp-23.ll
+++ b/test/CodeGen/SystemZ/int-cmp-23.ll
@@ -9,7 +9,7 @@ define double @f1(double %a, double %b, i16 *%ptr) {
; CHECK-NEXT: jh
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%cond = icmp ugt i16 %val, 1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -22,7 +22,7 @@ define double @f2(double %a, double %b, i16 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%cond = icmp ult i16 %val, 65534
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -35,8 +35,8 @@ define double @f3(double %a, double %b, i16 %i1, i16 *%base) {
; CHECK-NEXT: jh
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i16 *%base, i64 2047
- %val = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%base, i64 2047
+ %val = load i16 , i16 *%ptr
%cond = icmp ugt i16 %val, 1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -50,8 +50,8 @@ define double @f4(double %a, double %b, i16 *%base) {
; CHECK-NEXT: jh
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i16 *%base, i64 2048
- %val = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%base, i64 2048
+ %val = load i16 , i16 *%ptr
%cond = icmp ugt i16 %val, 1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -65,8 +65,8 @@ define double @f5(double %a, double %b, i16 *%base) {
; CHECK-NEXT: jh
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i16 *%base, i64 -1
- %val = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%base, i64 -1
+ %val = load i16 , i16 *%ptr
%cond = icmp ugt i16 %val, 1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -82,7 +82,7 @@ define double @f6(double %a, double %b, i64 %base, i64 %index) {
; CHECK: br %r14
%add = add i64 %base, %index
%ptr = inttoptr i64 %add to i16 *
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%cond = icmp ugt i16 %val, 1
%res = select i1 %cond, double %a, double %b
ret double %res
diff --git a/test/CodeGen/SystemZ/int-cmp-24.ll b/test/CodeGen/SystemZ/int-cmp-24.ll
index 1a8e587b0341..e1141a78ddda 100644
--- a/test/CodeGen/SystemZ/int-cmp-24.ll
+++ b/test/CodeGen/SystemZ/int-cmp-24.ll
@@ -9,7 +9,7 @@ define double @f1(double %a, double %b, i16 *%ptr) {
; CHECK-NEXT: je
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%cond = icmp eq i16 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -22,7 +22,7 @@ define double @f2(double %a, double %b, i16 *%ptr) {
; CHECK-NEXT: je
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%cond = icmp eq i16 %val, 65535
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -35,7 +35,7 @@ define double @f3(double %a, double %b, i16 *%ptr) {
; CHECK-NEXT: je
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%cond = icmp eq i16 %val, -32768
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -48,7 +48,7 @@ define double @f4(double %a, double %b, i16 *%ptr) {
; CHECK-NEXT: je
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%cond = icmp eq i16 %val, 32767
%res = select i1 %cond, double %a, double %b
ret double %res
diff --git a/test/CodeGen/SystemZ/int-cmp-25.ll b/test/CodeGen/SystemZ/int-cmp-25.ll
index 50803df1ba91..268530316506 100644
--- a/test/CodeGen/SystemZ/int-cmp-25.ll
+++ b/test/CodeGen/SystemZ/int-cmp-25.ll
@@ -9,7 +9,7 @@ define double @f1(double %a, double %b, i16 *%ptr) {
; CHECK-NEXT: jlh
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%cond = icmp ne i16 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -22,7 +22,7 @@ define double @f2(double %a, double %b, i16 *%ptr) {
; CHECK-NEXT: jlh
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%cond = icmp ne i16 %val, 65535
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -35,7 +35,7 @@ define double @f3(double %a, double %b, i16 *%ptr) {
; CHECK-NEXT: jlh
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%cond = icmp ne i16 %val, -32768
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -48,7 +48,7 @@ define double @f4(double %a, double %b, i16 *%ptr) {
; CHECK-NEXT: jlh
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%cond = icmp ne i16 %val, 32767
%res = select i1 %cond, double %a, double %b
ret double %res
diff --git a/test/CodeGen/SystemZ/int-cmp-26.ll b/test/CodeGen/SystemZ/int-cmp-26.ll
index 60778654b275..ba93f081e9b9 100644
--- a/test/CodeGen/SystemZ/int-cmp-26.ll
+++ b/test/CodeGen/SystemZ/int-cmp-26.ll
@@ -9,7 +9,7 @@ define double @f1(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 0
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i32
%cond = icmp eq i32 %ext, 0
%res = select i1 %cond, double %a, double %b
@@ -22,7 +22,7 @@ define double @f2(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 65535
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i32
%cond = icmp eq i32 %ext, 65535
%res = select i1 %cond, double %a, double %b
@@ -34,7 +34,7 @@ define double @f3(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f3:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i32
%cond = icmp eq i32 %ext, 65536
%res = select i1 %cond, double %a, double %b
@@ -47,7 +47,7 @@ define double @f4(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f4:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i32
%cond = icmp eq i32 %ext, -1
%res = select i1 %cond, double %a, double %b
@@ -60,7 +60,7 @@ define double @f5(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 0
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp eq i32 %ext, 0
%res = select i1 %cond, double %a, double %b
@@ -73,7 +73,7 @@ define double @f6(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 32767
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp eq i32 %ext, 32767
%res = select i1 %cond, double %a, double %b
@@ -86,7 +86,7 @@ define double @f7(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f7:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp eq i32 %ext, 32768
%res = select i1 %cond, double %a, double %b
@@ -99,7 +99,7 @@ define double @f8(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 65535
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp eq i32 %ext, -1
%res = select i1 %cond, double %a, double %b
@@ -112,7 +112,7 @@ define double @f9(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 32768
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp eq i32 %ext, -32768
%res = select i1 %cond, double %a, double %b
@@ -125,7 +125,7 @@ define double @f10(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f10:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp eq i32 %ext, -32769
%res = select i1 %cond, double %a, double %b
diff --git a/test/CodeGen/SystemZ/int-cmp-27.ll b/test/CodeGen/SystemZ/int-cmp-27.ll
index 3102f5c5faa4..9a503c9254a2 100644
--- a/test/CodeGen/SystemZ/int-cmp-27.ll
+++ b/test/CodeGen/SystemZ/int-cmp-27.ll
@@ -9,7 +9,7 @@ define double @f1(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 0
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i32
%cond = icmp ne i32 %ext, 0
%res = select i1 %cond, double %a, double %b
@@ -22,7 +22,7 @@ define double @f2(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 65535
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i32
%cond = icmp ne i32 %ext, 65535
%res = select i1 %cond, double %a, double %b
@@ -34,7 +34,7 @@ define double @f3(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f3:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i32
%cond = icmp ne i32 %ext, 65536
%res = select i1 %cond, double %a, double %b
@@ -47,7 +47,7 @@ define double @f4(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f4:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i32
%cond = icmp ne i32 %ext, -1
%res = select i1 %cond, double %a, double %b
@@ -60,7 +60,7 @@ define double @f5(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 0
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp ne i32 %ext, 0
%res = select i1 %cond, double %a, double %b
@@ -73,7 +73,7 @@ define double @f6(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 32767
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp ne i32 %ext, 32767
%res = select i1 %cond, double %a, double %b
@@ -86,7 +86,7 @@ define double @f7(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f7:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp ne i32 %ext, 32768
%res = select i1 %cond, double %a, double %b
@@ -99,7 +99,7 @@ define double @f8(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 65535
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp ne i32 %ext, -1
%res = select i1 %cond, double %a, double %b
@@ -112,7 +112,7 @@ define double @f9(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 32768
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp ne i32 %ext, -32768
%res = select i1 %cond, double %a, double %b
@@ -125,7 +125,7 @@ define double @f10(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f10:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp ne i32 %ext, -32769
%res = select i1 %cond, double %a, double %b
diff --git a/test/CodeGen/SystemZ/int-cmp-28.ll b/test/CodeGen/SystemZ/int-cmp-28.ll
index c3b905974ebc..68f1cd28c62d 100644
--- a/test/CodeGen/SystemZ/int-cmp-28.ll
+++ b/test/CodeGen/SystemZ/int-cmp-28.ll
@@ -9,7 +9,7 @@ define double @f1(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 0
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i64
%cond = icmp eq i64 %ext, 0
%res = select i1 %cond, double %a, double %b
@@ -22,7 +22,7 @@ define double @f2(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 65535
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i64
%cond = icmp eq i64 %ext, 65535
%res = select i1 %cond, double %a, double %b
@@ -34,7 +34,7 @@ define double @f3(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f3:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i64
%cond = icmp eq i64 %ext, 65536
%res = select i1 %cond, double %a, double %b
@@ -47,7 +47,7 @@ define double @f4(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f4:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i64
%cond = icmp eq i64 %ext, -1
%res = select i1 %cond, double %a, double %b
@@ -60,7 +60,7 @@ define double @f5(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 0
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp eq i64 %ext, 0
%res = select i1 %cond, double %a, double %b
@@ -73,7 +73,7 @@ define double @f6(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 32767
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp eq i64 %ext, 32767
%res = select i1 %cond, double %a, double %b
@@ -86,7 +86,7 @@ define double @f7(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f7:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp eq i64 %ext, 32768
%res = select i1 %cond, double %a, double %b
@@ -99,7 +99,7 @@ define double @f8(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 65535
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp eq i64 %ext, -1
%res = select i1 %cond, double %a, double %b
@@ -112,7 +112,7 @@ define double @f9(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 32768
; CHECK-NEXT: je
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp eq i64 %ext, -32768
%res = select i1 %cond, double %a, double %b
@@ -125,7 +125,7 @@ define double @f10(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f10:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp eq i64 %ext, -32769
%res = select i1 %cond, double %a, double %b
diff --git a/test/CodeGen/SystemZ/int-cmp-29.ll b/test/CodeGen/SystemZ/int-cmp-29.ll
index 1b40d8cfb2ae..4fb2e8577699 100644
--- a/test/CodeGen/SystemZ/int-cmp-29.ll
+++ b/test/CodeGen/SystemZ/int-cmp-29.ll
@@ -9,7 +9,7 @@ define double @f1(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 0
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i64
%cond = icmp ne i64 %ext, 0
%res = select i1 %cond, double %a, double %b
@@ -22,7 +22,7 @@ define double @f2(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 65535
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i64
%cond = icmp ne i64 %ext, 65535
%res = select i1 %cond, double %a, double %b
@@ -34,7 +34,7 @@ define double @f3(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f3:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i64
%cond = icmp ne i64 %ext, 65536
%res = select i1 %cond, double %a, double %b
@@ -47,7 +47,7 @@ define double @f4(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f4:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i64
%cond = icmp ne i64 %ext, -1
%res = select i1 %cond, double %a, double %b
@@ -60,7 +60,7 @@ define double @f5(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 0
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp ne i64 %ext, 0
%res = select i1 %cond, double %a, double %b
@@ -73,7 +73,7 @@ define double @f6(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 32767
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp ne i64 %ext, 32767
%res = select i1 %cond, double %a, double %b
@@ -86,7 +86,7 @@ define double @f7(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f7:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp ne i64 %ext, 32768
%res = select i1 %cond, double %a, double %b
@@ -99,7 +99,7 @@ define double @f8(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 65535
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp ne i64 %ext, -1
%res = select i1 %cond, double %a, double %b
@@ -112,7 +112,7 @@ define double @f9(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 32768
; CHECK-NEXT: jlh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp ne i64 %ext, -32768
%res = select i1 %cond, double %a, double %b
@@ -125,7 +125,7 @@ define double @f10(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f10:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp ne i64 %ext, -32769
%res = select i1 %cond, double %a, double %b
diff --git a/test/CodeGen/SystemZ/int-cmp-30.ll b/test/CodeGen/SystemZ/int-cmp-30.ll
index 6c9498cb3320..043ff484c145 100644
--- a/test/CodeGen/SystemZ/int-cmp-30.ll
+++ b/test/CodeGen/SystemZ/int-cmp-30.ll
@@ -10,7 +10,7 @@ define double @f1(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 1
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i32
%cond = icmp ugt i32 %ext, 1
%res = select i1 %cond, double %a, double %b
@@ -24,7 +24,7 @@ define double @f2(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 1
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp ugt i32 %ext, 1
%res = select i1 %cond, double %a, double %b
@@ -38,7 +38,7 @@ define double @f3(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 65534
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i32
%cond = icmp ult i32 %ext, 65534
%res = select i1 %cond, double %a, double %b
@@ -52,7 +52,7 @@ define double @f4(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 65534
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp ult i32 %ext, -2
%res = select i1 %cond, double %a, double %b
@@ -65,7 +65,7 @@ define double @f5(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f5:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i32
%cond = icmp ult i32 %ext, 65536
%res = select i1 %cond, double %a, double %b
@@ -82,7 +82,7 @@ define double @f6(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f6:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp ult i32 %ext, 32768
%res = select i1 %cond, double %a, double %b
@@ -94,7 +94,7 @@ define double @f7(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f7:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp ult i32 %ext, -32769
%res = select i1 %cond, double %a, double %b
@@ -108,7 +108,7 @@ define double @f8(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 1
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i32
%cond = icmp sgt i32 %ext, 1
%res = select i1 %cond, double %a, double %b
@@ -122,7 +122,7 @@ define double @f9(double %a, double %b, i16 *%ptr) {
; CHECK: chhsi 0(%r2), 1
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp sgt i32 %ext, 1
%res = select i1 %cond, double %a, double %b
@@ -136,7 +136,7 @@ define double @f10(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 65534
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i32
%cond = icmp slt i32 %ext, 65534
%res = select i1 %cond, double %a, double %b
@@ -150,7 +150,7 @@ define double @f11(double %a, double %b, i16 *%ptr) {
; CHECK: chhsi 0(%r2), -2
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp slt i32 %ext, -2
%res = select i1 %cond, double %a, double %b
@@ -163,7 +163,7 @@ define double @f12(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f12:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i32
%cond = icmp slt i32 %ext, 65536
%res = select i1 %cond, double %a, double %b
@@ -177,7 +177,7 @@ define double @f13(double %a, double %b, i16 *%ptr) {
; CHECK: chhsi 0(%r2), 32766
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp slt i32 %ext, 32766
%res = select i1 %cond, double %a, double %b
@@ -190,7 +190,7 @@ define double @f14(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f14:
; CHECK-NOT: chhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp slt i32 %ext, 32768
%res = select i1 %cond, double %a, double %b
@@ -204,7 +204,7 @@ define double @f15(double %a, double %b, i16 *%ptr) {
; CHECK: chhsi 0(%r2), -32767
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp sgt i32 %ext, -32767
%res = select i1 %cond, double %a, double %b
@@ -217,7 +217,7 @@ define double @f16(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f16:
; CHECK-NOT: chhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i32
%cond = icmp sgt i32 %ext, -32769
%res = select i1 %cond, double %a, double %b
diff --git a/test/CodeGen/SystemZ/int-cmp-31.ll b/test/CodeGen/SystemZ/int-cmp-31.ll
index 21539f20470e..298b446e7f1d 100644
--- a/test/CodeGen/SystemZ/int-cmp-31.ll
+++ b/test/CodeGen/SystemZ/int-cmp-31.ll
@@ -10,7 +10,7 @@ define double @f1(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 1
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i64
%cond = icmp ugt i64 %ext, 1
%res = select i1 %cond, double %a, double %b
@@ -24,7 +24,7 @@ define double @f2(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 1
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp ugt i64 %ext, 1
%res = select i1 %cond, double %a, double %b
@@ -38,7 +38,7 @@ define double @f3(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 65534
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i64
%cond = icmp ult i64 %ext, 65534
%res = select i1 %cond, double %a, double %b
@@ -52,7 +52,7 @@ define double @f4(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 65534
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp ult i64 %ext, -2
%res = select i1 %cond, double %a, double %b
@@ -65,7 +65,7 @@ define double @f5(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f5:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i64
%cond = icmp ult i64 %ext, 65536
%res = select i1 %cond, double %a, double %b
@@ -82,7 +82,7 @@ define double @f6(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f6:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp ult i64 %ext, 32768
%res = select i1 %cond, double %a, double %b
@@ -94,7 +94,7 @@ define double @f7(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f7:
; CHECK-NOT: clhhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp ult i64 %ext, -32769
%res = select i1 %cond, double %a, double %b
@@ -108,7 +108,7 @@ define double @f8(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 1
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i64
%cond = icmp sgt i64 %ext, 1
%res = select i1 %cond, double %a, double %b
@@ -122,7 +122,7 @@ define double @f9(double %a, double %b, i16 *%ptr) {
; CHECK: chhsi 0(%r2), 1
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp sgt i64 %ext, 1
%res = select i1 %cond, double %a, double %b
@@ -136,7 +136,7 @@ define double @f10(double %a, double %b, i16 *%ptr) {
; CHECK: clhhsi 0(%r2), 65534
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i64
%cond = icmp slt i64 %ext, 65534
%res = select i1 %cond, double %a, double %b
@@ -150,7 +150,7 @@ define double @f11(double %a, double %b, i16 *%ptr) {
; CHECK: chhsi 0(%r2), -2
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp slt i64 %ext, -2
%res = select i1 %cond, double %a, double %b
@@ -163,7 +163,7 @@ define double @f12(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f12:
; CHECK-NOT: cli
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = zext i16 %val to i64
%cond = icmp slt i64 %ext, 65536
%res = select i1 %cond, double %a, double %b
@@ -177,7 +177,7 @@ define double @f13(double %a, double %b, i16 *%ptr) {
; CHECK: chhsi 0(%r2), 32766
; CHECK-NEXT: jl
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp slt i64 %ext, 32766
%res = select i1 %cond, double %a, double %b
@@ -190,7 +190,7 @@ define double @f14(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f14:
; CHECK-NOT: chhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp slt i64 %ext, 32768
%res = select i1 %cond, double %a, double %b
@@ -204,7 +204,7 @@ define double @f15(double %a, double %b, i16 *%ptr) {
; CHECK: chhsi 0(%r2), -32767
; CHECK-NEXT: jh
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp sgt i64 %ext, -32767
%res = select i1 %cond, double %a, double %b
@@ -217,7 +217,7 @@ define double @f16(double %a, double %b, i16 *%ptr) {
; CHECK-LABEL: f16:
; CHECK-NOT: chhsi
; CHECK: br %r14
- %val = load i16 *%ptr
+ %val = load i16 , i16 *%ptr
%ext = sext i16 %val to i64
%cond = icmp sgt i64 %ext, -32769
%res = select i1 %cond, double %a, double %b
diff --git a/test/CodeGen/SystemZ/int-cmp-32.ll b/test/CodeGen/SystemZ/int-cmp-32.ll
index 6596f9f3ad84..da0e2d7562dd 100644
--- a/test/CodeGen/SystemZ/int-cmp-32.ll
+++ b/test/CodeGen/SystemZ/int-cmp-32.ll
@@ -9,7 +9,7 @@ define double @f1(double %a, double %b, i32 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp slt i32 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -22,7 +22,7 @@ define double @f2(double %a, double %b, i32 *%ptr) {
; CHECK-NEXT: jle
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp slt i32 %val, 1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -35,7 +35,7 @@ define double @f3(double %a, double %b, i32 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp slt i32 %val, 32767
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -46,7 +46,7 @@ define double @f4(double %a, double %b, i32 *%ptr) {
; CHECK-LABEL: f4:
; CHECK-NOT: chsi
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp slt i32 %val, 32768
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -59,7 +59,7 @@ define double @f5(double %a, double %b, i32 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp slt i32 %val, -1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -72,7 +72,7 @@ define double @f6(double %a, double %b, i32 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp slt i32 %val, -32768
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -83,7 +83,7 @@ define double @f7(double %a, double %b, i32 *%ptr) {
; CHECK-LABEL: f7:
; CHECK-NOT: chsi
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp slt i32 %val, -32769
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -96,7 +96,7 @@ define double @f8(double %a, double %b, i32 *%ptr) {
; CHECK-NEXT: je
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp eq i32 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -109,7 +109,7 @@ define double @f9(double %a, double %b, i32 *%ptr) {
; CHECK-NEXT: je
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp eq i32 %val, 1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -122,7 +122,7 @@ define double @f10(double %a, double %b, i32 *%ptr) {
; CHECK-NEXT: je
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp eq i32 %val, 32767
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -133,7 +133,7 @@ define double @f11(double %a, double %b, i32 *%ptr) {
; CHECK-LABEL: f11:
; CHECK-NOT: chsi
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp eq i32 %val, 32768
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -146,7 +146,7 @@ define double @f12(double %a, double %b, i32 *%ptr) {
; CHECK-NEXT: je
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp eq i32 %val, -1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -159,7 +159,7 @@ define double @f13(double %a, double %b, i32 *%ptr) {
; CHECK-NEXT: je
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp eq i32 %val, -32768
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -170,7 +170,7 @@ define double @f14(double %a, double %b, i32 *%ptr) {
; CHECK-LABEL: f14:
; CHECK-NOT: chsi
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp eq i32 %val, -32769
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -183,8 +183,8 @@ define double @f15(double %a, double %b, i32 %i1, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 1023
- %val = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 1023
+ %val = load i32 , i32 *%ptr
%cond = icmp slt i32 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -198,8 +198,8 @@ define double @f16(double %a, double %b, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 1024
- %val = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 1024
+ %val = load i32 , i32 *%ptr
%cond = icmp slt i32 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -213,8 +213,8 @@ define double @f17(double %a, double %b, i32 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -1
- %val = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 -1
+ %val = load i32 , i32 *%ptr
%cond = icmp slt i32 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -230,7 +230,7 @@ define double @f18(double %a, double %b, i64 %base, i64 %index) {
; CHECK: br %r14
%add = add i64 %base, %index
%ptr = inttoptr i64 %add to i32 *
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp slt i32 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
diff --git a/test/CodeGen/SystemZ/int-cmp-33.ll b/test/CodeGen/SystemZ/int-cmp-33.ll
index e5a653b3815d..94f3e705391e 100644
--- a/test/CodeGen/SystemZ/int-cmp-33.ll
+++ b/test/CodeGen/SystemZ/int-cmp-33.ll
@@ -10,7 +10,7 @@ define double @f1(double %a, double %b, i32 *%ptr) {
; CHECK-NEXT: jh
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp ugt i32 %val, 1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -23,7 +23,7 @@ define double @f2(double %a, double %b, i32 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp ult i32 %val, 65535
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -34,7 +34,7 @@ define double @f3(double %a, double %b, i32 *%ptr) {
; CHECK-LABEL: f3:
; CHECK-NOT: clfhsi
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp ult i32 %val, 65536
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -48,7 +48,7 @@ define double @f4(double %a, double %b, i32 *%ptr) {
; CHECK-NEXT: je
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp eq i32 %val, 32768
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -61,7 +61,7 @@ define double @f5(double %a, double %b, i32 *%ptr) {
; CHECK-NEXT: je
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp eq i32 %val, 65535
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -72,7 +72,7 @@ define double @f6(double %a, double %b, i32 *%ptr) {
; CHECK-LABEL: f6:
; CHECK-NOT: clfhsi
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp eq i32 %val, 65536
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -85,8 +85,8 @@ define double @f7(double %a, double %b, i32 %i1, i32 *%base) {
; CHECK-NEXT: jh
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 1023
- %val = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 1023
+ %val = load i32 , i32 *%ptr
%cond = icmp ugt i32 %val, 1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -100,8 +100,8 @@ define double @f8(double %a, double %b, i32 *%base) {
; CHECK-NEXT: jh
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 1024
- %val = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 1024
+ %val = load i32 , i32 *%ptr
%cond = icmp ugt i32 %val, 1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -115,8 +115,8 @@ define double @f9(double %a, double %b, i32 *%base) {
; CHECK-NEXT: jh
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i32 *%base, i64 -1
- %val = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%base, i64 -1
+ %val = load i32 , i32 *%ptr
%cond = icmp ugt i32 %val, 1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -132,7 +132,7 @@ define double @f10(double %a, double %b, i64 %base, i64 %index) {
; CHECK: br %r14
%add = add i64 %base, %index
%ptr = inttoptr i64 %add to i32 *
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%cond = icmp ugt i32 %val, 1
%res = select i1 %cond, double %a, double %b
ret double %res
diff --git a/test/CodeGen/SystemZ/int-cmp-34.ll b/test/CodeGen/SystemZ/int-cmp-34.ll
index 8a0219775a4e..114b694a3b09 100644
--- a/test/CodeGen/SystemZ/int-cmp-34.ll
+++ b/test/CodeGen/SystemZ/int-cmp-34.ll
@@ -9,7 +9,7 @@ define double @f1(double %a, double %b, i64 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp slt i64 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -22,7 +22,7 @@ define double @f2(double %a, double %b, i64 *%ptr) {
; CHECK-NEXT: jle
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp slt i64 %val, 1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -35,7 +35,7 @@ define double @f3(double %a, double %b, i64 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp slt i64 %val, 32767
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -46,7 +46,7 @@ define double @f4(double %a, double %b, i64 *%ptr) {
; CHECK-LABEL: f4:
; CHECK-NOT: cghsi
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp slt i64 %val, 32768
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -59,7 +59,7 @@ define double @f5(double %a, double %b, i64 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp slt i64 %val, -1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -72,7 +72,7 @@ define double @f6(double %a, double %b, i64 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp slt i64 %val, -32768
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -83,7 +83,7 @@ define double @f7(double %a, double %b, i64 *%ptr) {
; CHECK-LABEL: f7:
; CHECK-NOT: cghsi
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp slt i64 %val, -32769
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -96,7 +96,7 @@ define double @f8(double %a, double %b, i64 *%ptr) {
; CHECK-NEXT: je
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp eq i64 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -109,7 +109,7 @@ define double @f9(double %a, double %b, i64 *%ptr) {
; CHECK-NEXT: je
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp eq i64 %val, 1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -122,7 +122,7 @@ define double @f10(double %a, double %b, i64 *%ptr) {
; CHECK-NEXT: je
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp eq i64 %val, 32767
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -133,7 +133,7 @@ define double @f11(double %a, double %b, i64 *%ptr) {
; CHECK-LABEL: f11:
; CHECK-NOT: cghsi
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp eq i64 %val, 32768
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -146,7 +146,7 @@ define double @f12(double %a, double %b, i64 *%ptr) {
; CHECK-NEXT: je
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp eq i64 %val, -1
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -159,7 +159,7 @@ define double @f13(double %a, double %b, i64 *%ptr) {
; CHECK-NEXT: je
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp eq i64 %val, -32768
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -170,7 +170,7 @@ define double @f14(double %a, double %b, i64 *%ptr) {
; CHECK-LABEL: f14:
; CHECK-NOT: cghsi
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp eq i64 %val, -32769
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -183,8 +183,8 @@ define double @f15(double %a, double %b, i64 %i1, i64 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 511
- %val = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%base, i64 511
+ %val = load i64 , i64 *%ptr
%cond = icmp slt i64 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -198,8 +198,8 @@ define double @f16(double %a, double %b, i64 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 512
- %val = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%base, i64 512
+ %val = load i64 , i64 *%ptr
%cond = icmp slt i64 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -213,8 +213,8 @@ define double @f17(double %a, double %b, i64 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 -1
- %val = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%base, i64 -1
+ %val = load i64 , i64 *%ptr
%cond = icmp slt i64 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -230,7 +230,7 @@ define double @f18(double %a, double %b, i64 %base, i64 %index) {
; CHECK: br %r14
%add = add i64 %base, %index
%ptr = inttoptr i64 %add to i64 *
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp slt i64 %val, 0
%res = select i1 %cond, double %a, double %b
ret double %res
diff --git a/test/CodeGen/SystemZ/int-cmp-35.ll b/test/CodeGen/SystemZ/int-cmp-35.ll
index 539248a86a7b..0eaf4fa0a075 100644
--- a/test/CodeGen/SystemZ/int-cmp-35.ll
+++ b/test/CodeGen/SystemZ/int-cmp-35.ll
@@ -10,7 +10,7 @@ define double @f1(double %a, double %b, i64 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp ult i64 %val, 2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -23,7 +23,7 @@ define double @f2(double %a, double %b, i64 *%ptr) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp ult i64 %val, 65535
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -34,7 +34,7 @@ define double @f3(double %a, double %b, i64 *%ptr) {
; CHECK-LABEL: f3:
; CHECK-NOT: clghsi
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp ult i64 %val, 65536
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -48,7 +48,7 @@ define double @f4(double %a, double %b, i64 *%ptr) {
; CHECK-NEXT: je
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp eq i64 %val, 32768
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -61,7 +61,7 @@ define double @f5(double %a, double %b, i64 *%ptr) {
; CHECK-NEXT: je
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp eq i64 %val, 65535
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -72,7 +72,7 @@ define double @f6(double %a, double %b, i64 *%ptr) {
; CHECK-LABEL: f6:
; CHECK-NOT: clghsi
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp eq i64 %val, 65536
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -85,8 +85,8 @@ define double @f7(double %a, double %b, i64 %i1, i64 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 511
- %val = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%base, i64 511
+ %val = load i64 , i64 *%ptr
%cond = icmp ult i64 %val, 2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -100,8 +100,8 @@ define double @f8(double %a, double %b, i64 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 512
- %val = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%base, i64 512
+ %val = load i64 , i64 *%ptr
%cond = icmp ult i64 %val, 2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -115,8 +115,8 @@ define double @f9(double %a, double %b, i64 *%base) {
; CHECK-NEXT: jl
; CHECK: ldr %f0, %f2
; CHECK: br %r14
- %ptr = getelementptr i64 *%base, i64 -1
- %val = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%base, i64 -1
+ %val = load i64 , i64 *%ptr
%cond = icmp ult i64 %val, 2
%res = select i1 %cond, double %a, double %b
ret double %res
@@ -132,7 +132,7 @@ define double @f10(double %a, double %b, i64 %base, i64 %index) {
; CHECK: br %r14
%add = add i64 %base, %index
%ptr = inttoptr i64 %add to i64 *
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
%cond = icmp ult i64 %val, 2
%res = select i1 %cond, double %a, double %b
ret double %res
diff --git a/test/CodeGen/SystemZ/int-cmp-36.ll b/test/CodeGen/SystemZ/int-cmp-36.ll
index fa2d4bf6c617..113d2c1587e0 100644
--- a/test/CodeGen/SystemZ/int-cmp-36.ll
+++ b/test/CodeGen/SystemZ/int-cmp-36.ll
@@ -13,7 +13,7 @@ define i32 @f1(i32 %src1) {
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
- %val = load i16 *@g
+ %val = load i16 , i16 *@g
%src2 = sext i16 %val to i32
%cond = icmp slt i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -31,7 +31,7 @@ define i32 @f2(i32 %src1) {
; CHECK-NOT: chrl
; CHECK: br %r14
entry:
- %val = load i16 *@g
+ %val = load i16 , i16 *@g
%src2 = sext i16 %val to i32
%cond = icmp ult i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -50,7 +50,7 @@ define i32 @f3(i32 %src1) {
; CHECK-NEXT: je
; CHECK: br %r14
entry:
- %val = load i16 *@g
+ %val = load i16 , i16 *@g
%src2 = sext i16 %val to i32
%cond = icmp eq i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -69,7 +69,7 @@ define i32 @f4(i32 %src1) {
; CHECK-NEXT: jlh
; CHECK: br %r14
entry:
- %val = load i16 *@g
+ %val = load i16 , i16 *@g
%src2 = sext i16 %val to i32
%cond = icmp ne i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -89,7 +89,7 @@ define i32 @f5(i32 %src1) {
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
- %val = load i16 *@h, align 1
+ %val = load i16 , i16 *@h, align 1
%src2 = sext i16 %val to i32
%cond = icmp slt i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -108,7 +108,7 @@ define i32 @f6(i32 %src2) {
; CHECK-NEXT: jh {{\.L.*}}
; CHECK: br %r14
entry:
- %val = load i16 *@g
+ %val = load i16 , i16 *@g
%src1 = sext i16 %val to i32
%cond = icmp slt i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
diff --git a/test/CodeGen/SystemZ/int-cmp-37.ll b/test/CodeGen/SystemZ/int-cmp-37.ll
index 8095ed173012..ac5d39f96511 100644
--- a/test/CodeGen/SystemZ/int-cmp-37.ll
+++ b/test/CodeGen/SystemZ/int-cmp-37.ll
@@ -13,7 +13,7 @@ define i32 @f1(i32 %src1) {
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
- %val = load i16 *@g
+ %val = load i16 , i16 *@g
%src2 = zext i16 %val to i32
%cond = icmp ult i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -31,7 +31,7 @@ define i32 @f2(i32 %src1) {
; CHECK-NOT: clhrl
; CHECK: br %r14
entry:
- %val = load i16 *@g
+ %val = load i16 , i16 *@g
%src2 = zext i16 %val to i32
%cond = icmp slt i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -50,7 +50,7 @@ define i32 @f3(i32 %src1) {
; CHECK-NEXT: je
; CHECK: br %r14
entry:
- %val = load i16 *@g
+ %val = load i16 , i16 *@g
%src2 = zext i16 %val to i32
%cond = icmp eq i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -69,7 +69,7 @@ define i32 @f4(i32 %src1) {
; CHECK-NEXT: jlh
; CHECK: br %r14
entry:
- %val = load i16 *@g
+ %val = load i16 , i16 *@g
%src2 = zext i16 %val to i32
%cond = icmp ne i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -89,7 +89,7 @@ define i32 @f5(i32 %src1) {
; CHECK: clrjl %r2, [[VAL]],
; CHECK: br %r14
entry:
- %val = load i16 *@h, align 1
+ %val = load i16 , i16 *@h, align 1
%src2 = zext i16 %val to i32
%cond = icmp ult i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -108,7 +108,7 @@ define i32 @f6(i32 %src2) {
; CHECK-NEXT: jh {{\.L.*}}
; CHECK: br %r14
entry:
- %val = load i16 *@g
+ %val = load i16 , i16 *@g
%src1 = zext i16 %val to i32
%cond = icmp ult i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
diff --git a/test/CodeGen/SystemZ/int-cmp-38.ll b/test/CodeGen/SystemZ/int-cmp-38.ll
index 901758378729..0d8913b02861 100644
--- a/test/CodeGen/SystemZ/int-cmp-38.ll
+++ b/test/CodeGen/SystemZ/int-cmp-38.ll
@@ -13,7 +13,7 @@ define i32 @f1(i32 %src1) {
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
- %src2 = load i32 *@g
+ %src2 = load i32 , i32 *@g
%cond = icmp slt i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
@@ -31,7 +31,7 @@ define i32 @f2(i32 %src1) {
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
- %src2 = load i32 *@g
+ %src2 = load i32 , i32 *@g
%cond = icmp ult i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
@@ -49,7 +49,7 @@ define i32 @f3(i32 %src1) {
; CHECK-NEXT: je
; CHECK: br %r14
entry:
- %src2 = load i32 *@g
+ %src2 = load i32 , i32 *@g
%cond = icmp eq i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
@@ -67,7 +67,7 @@ define i32 @f4(i32 %src1) {
; CHECK-NEXT: jlh
; CHECK: br %r14
entry:
- %src2 = load i32 *@g
+ %src2 = load i32 , i32 *@g
%cond = icmp ne i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
@@ -86,7 +86,7 @@ define i32 @f5(i32 %src1) {
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
- %src2 = load i32 *@h, align 2
+ %src2 = load i32 , i32 *@h, align 2
%cond = icmp slt i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
@@ -105,7 +105,7 @@ define i32 @f6(i32 %src1) {
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
- %src2 = load i32 *@h, align 2
+ %src2 = load i32 , i32 *@h, align 2
%cond = icmp ult i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
@@ -123,7 +123,7 @@ define i32 @f7(i32 %src2) {
; CHECK-NEXT: jh {{\.L.*}}
; CHECK: br %r14
entry:
- %src1 = load i32 *@g
+ %src1 = load i32 , i32 *@g
%cond = icmp slt i32 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
diff --git a/test/CodeGen/SystemZ/int-cmp-39.ll b/test/CodeGen/SystemZ/int-cmp-39.ll
index fc9547d4ceb4..5e3abceeca45 100644
--- a/test/CodeGen/SystemZ/int-cmp-39.ll
+++ b/test/CodeGen/SystemZ/int-cmp-39.ll
@@ -13,7 +13,7 @@ define i64 @f1(i64 %src1) {
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
- %val = load i16 *@g
+ %val = load i16 , i16 *@g
%src2 = sext i16 %val to i64
%cond = icmp slt i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -31,7 +31,7 @@ define i64 @f2(i64 %src1) {
; CHECK-NOT: cghrl
; CHECK: br %r14
entry:
- %val = load i16 *@g
+ %val = load i16 , i16 *@g
%src2 = sext i16 %val to i64
%cond = icmp ult i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -50,7 +50,7 @@ define i64 @f3(i64 %src1) {
; CHECK-NEXT: je
; CHECK: br %r14
entry:
- %val = load i16 *@g
+ %val = load i16 , i16 *@g
%src2 = sext i16 %val to i64
%cond = icmp eq i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -69,7 +69,7 @@ define i64 @f4(i64 %src1) {
; CHECK-NEXT: jlh
; CHECK: br %r14
entry:
- %val = load i16 *@g
+ %val = load i16 , i16 *@g
%src2 = sext i16 %val to i64
%cond = icmp ne i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -89,7 +89,7 @@ define i64 @f5(i64 %src1) {
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
- %val = load i16 *@h, align 1
+ %val = load i16 , i16 *@h, align 1
%src2 = sext i16 %val to i64
%cond = icmp slt i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -108,7 +108,7 @@ define i64 @f6(i64 %src2) {
; CHECK-NEXT: jh {{\.L.*}}
; CHECK: br %r14
entry:
- %val = load i16 *@g
+ %val = load i16 , i16 *@g
%src1 = sext i16 %val to i64
%cond = icmp slt i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
diff --git a/test/CodeGen/SystemZ/int-cmp-40.ll b/test/CodeGen/SystemZ/int-cmp-40.ll
index 9c532f1cbc6b..92696d71fc48 100644
--- a/test/CodeGen/SystemZ/int-cmp-40.ll
+++ b/test/CodeGen/SystemZ/int-cmp-40.ll
@@ -13,7 +13,7 @@ define i64 @f1(i64 %src1) {
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
- %val = load i16 *@g
+ %val = load i16 , i16 *@g
%src2 = zext i16 %val to i64
%cond = icmp ult i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -31,7 +31,7 @@ define i64 @f2(i64 %src1) {
; CHECK-NOT: clghrl
; CHECK: br %r14
entry:
- %val = load i16 *@g
+ %val = load i16 , i16 *@g
%src2 = zext i16 %val to i64
%cond = icmp slt i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -50,7 +50,7 @@ define i64 @f3(i64 %src1) {
; CHECK-NEXT: je
; CHECK: br %r14
entry:
- %val = load i16 *@g
+ %val = load i16 , i16 *@g
%src2 = zext i16 %val to i64
%cond = icmp eq i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -69,7 +69,7 @@ define i64 @f4(i64 %src1) {
; CHECK-NEXT: jlh
; CHECK: br %r14
entry:
- %val = load i16 *@g
+ %val = load i16 , i16 *@g
%src2 = zext i16 %val to i64
%cond = icmp ne i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -89,7 +89,7 @@ define i64 @f5(i64 %src1) {
; CHECK: clgrjl %r2, [[VAL]],
; CHECK: br %r14
entry:
- %val = load i16 *@h, align 1
+ %val = load i16 , i16 *@h, align 1
%src2 = zext i16 %val to i64
%cond = icmp ult i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -108,7 +108,7 @@ define i64 @f6(i64 %src2) {
; CHECK-NEXT: jh {{\.L.*}}
; CHECK: br %r14
entry:
- %val = load i16 *@g
+ %val = load i16 , i16 *@g
%src1 = zext i16 %val to i64
%cond = icmp ult i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
diff --git a/test/CodeGen/SystemZ/int-cmp-41.ll b/test/CodeGen/SystemZ/int-cmp-41.ll
index 77f6e7d76f1c..f4f5b4a0cf16 100644
--- a/test/CodeGen/SystemZ/int-cmp-41.ll
+++ b/test/CodeGen/SystemZ/int-cmp-41.ll
@@ -13,7 +13,7 @@ define i64 @f1(i64 %src1) {
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
- %val = load i32 *@g
+ %val = load i32 , i32 *@g
%src2 = sext i32 %val to i64
%cond = icmp slt i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -31,7 +31,7 @@ define i64 @f2(i64 %src1) {
; CHECK-NOT: cgfrl
; CHECK: br %r14
entry:
- %val = load i32 *@g
+ %val = load i32 , i32 *@g
%src2 = sext i32 %val to i64
%cond = icmp ult i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -50,7 +50,7 @@ define i64 @f3(i64 %src1) {
; CHECK-NEXT: je
; CHECK: br %r14
entry:
- %val = load i32 *@g
+ %val = load i32 , i32 *@g
%src2 = sext i32 %val to i64
%cond = icmp eq i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -69,7 +69,7 @@ define i64 @f4(i64 %src1) {
; CHECK-NEXT: jlh
; CHECK: br %r14
entry:
- %val = load i32 *@g
+ %val = load i32 , i32 *@g
%src2 = sext i32 %val to i64
%cond = icmp ne i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -89,7 +89,7 @@ define i64 @f5(i64 %src1) {
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
- %val = load i32 *@h, align 2
+ %val = load i32 , i32 *@h, align 2
%src2 = sext i32 %val to i64
%cond = icmp slt i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -108,7 +108,7 @@ define i64 @f6(i64 %src2) {
; CHECK-NEXT: jh {{\.L.*}}
; CHECK: br %r14
entry:
- %val = load i32 *@g
+ %val = load i32 , i32 *@g
%src1 = sext i32 %val to i64
%cond = icmp slt i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
diff --git a/test/CodeGen/SystemZ/int-cmp-42.ll b/test/CodeGen/SystemZ/int-cmp-42.ll
index 94ef0082c441..ca87b865ad14 100644
--- a/test/CodeGen/SystemZ/int-cmp-42.ll
+++ b/test/CodeGen/SystemZ/int-cmp-42.ll
@@ -13,7 +13,7 @@ define i64 @f1(i64 %src1) {
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
- %val = load i32 *@g
+ %val = load i32 , i32 *@g
%src2 = zext i32 %val to i64
%cond = icmp ult i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -31,7 +31,7 @@ define i64 @f2(i64 %src1) {
; CHECK-NOT: clgfrl
; CHECK: br %r14
entry:
- %val = load i32 *@g
+ %val = load i32 , i32 *@g
%src2 = zext i32 %val to i64
%cond = icmp slt i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -50,7 +50,7 @@ define i64 @f3(i64 %src1) {
; CHECK-NEXT: je
; CHECK: br %r14
entry:
- %val = load i32 *@g
+ %val = load i32 , i32 *@g
%src2 = zext i32 %val to i64
%cond = icmp eq i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -69,7 +69,7 @@ define i64 @f4(i64 %src1) {
; CHECK-NEXT: jlh
; CHECK: br %r14
entry:
- %val = load i32 *@g
+ %val = load i32 , i32 *@g
%src2 = zext i32 %val to i64
%cond = icmp ne i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -89,7 +89,7 @@ define i64 @f5(i64 %src1) {
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
- %val = load i32 *@h, align 2
+ %val = load i32 , i32 *@h, align 2
%src2 = zext i32 %val to i64
%cond = icmp ult i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
@@ -108,7 +108,7 @@ define i64 @f6(i64 %src2) {
; CHECK-NEXT: jh {{\.L.*}}
; CHECK: br %r14
entry:
- %val = load i32 *@g
+ %val = load i32 , i32 *@g
%src1 = zext i32 %val to i64
%cond = icmp ult i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
diff --git a/test/CodeGen/SystemZ/int-cmp-43.ll b/test/CodeGen/SystemZ/int-cmp-43.ll
index 1a625886dec2..108b041fa377 100644
--- a/test/CodeGen/SystemZ/int-cmp-43.ll
+++ b/test/CodeGen/SystemZ/int-cmp-43.ll
@@ -13,7 +13,7 @@ define i64 @f1(i64 %src1) {
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
- %src2 = load i64 *@g
+ %src2 = load i64 , i64 *@g
%cond = icmp slt i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
@@ -31,7 +31,7 @@ define i64 @f2(i64 %src1) {
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
- %src2 = load i64 *@g
+ %src2 = load i64 , i64 *@g
%cond = icmp ult i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
@@ -49,7 +49,7 @@ define i64 @f3(i64 %src1) {
; CHECK-NEXT: je
; CHECK: br %r14
entry:
- %src2 = load i64 *@g
+ %src2 = load i64 , i64 *@g
%cond = icmp eq i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
@@ -67,7 +67,7 @@ define i64 @f4(i64 %src1) {
; CHECK-NEXT: jlh
; CHECK: br %r14
entry:
- %src2 = load i64 *@g
+ %src2 = load i64 , i64 *@g
%cond = icmp ne i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
@@ -86,7 +86,7 @@ define i64 @f5(i64 %src1) {
; CHECK-NEXT: jl
; CHECK: br %r14
entry:
- %src2 = load i64 *@h, align 4
+ %src2 = load i64 , i64 *@h, align 4
%cond = icmp slt i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
@@ -104,7 +104,7 @@ define i64 @f6(i64 %src2) {
; CHECK-NEXT: jh {{\.L.*}}
; CHECK: br %r14
entry:
- %src1 = load i64 *@g
+ %src1 = load i64 , i64 *@g
%cond = icmp slt i64 %src1, %src2
br i1 %cond, label %exit, label %mulb
mulb:
diff --git a/test/CodeGen/SystemZ/int-cmp-44.ll b/test/CodeGen/SystemZ/int-cmp-44.ll
index 30c1c4f1ed6c..97d48521254d 100644
--- a/test/CodeGen/SystemZ/int-cmp-44.ll
+++ b/test/CodeGen/SystemZ/int-cmp-44.ll
@@ -127,7 +127,7 @@ define i32 @f7(i32 %a, i32 %b, i32 *%dest) {
; CHECK-NEXT: jne .L{{.*}}
; CHECK: br %r14
entry:
- %cur = load i32 *%dest
+ %cur = load i32 , i32 *%dest
%res = sub i32 %a, %cur
%cmp = icmp ne i32 %res, 0
br i1 %cmp, label %exit, label %store
@@ -147,7 +147,7 @@ define i32 @f8(i32 %a, i32 %b, i32 *%dest) {
; CHECK-NEXT: cijl %r2, 0, .L{{.*}}
; CHECK: br %r14
entry:
- %cur = load i32 *%dest
+ %cur = load i32 , i32 *%dest
%res = sub i32 %a, %cur
%cmp = icmp slt i32 %res, 0
br i1 %cmp, label %exit, label %store
@@ -468,7 +468,7 @@ define void @f24(i32 *%ptr) {
; CHECK-NEXT: cijlh [[REG]], 0, .L{{.*}}
; CHECK: br %r14
entry:
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%xor = xor i32 %val, 1
%add = add i32 %xor, 1000000
call void @foo()
@@ -561,7 +561,7 @@ define void @f28(i64 %a, i64 *%dest) {
; CHECK: br %r14
entry:
%ptr = inttoptr i64 %a to i8 *
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%xor = xor i8 %val, 15
store i8 %xor, i8 *%ptr
%cmp = icmp eq i64 %a, 0
@@ -584,7 +584,7 @@ define i32 @f29(i64 %base, i64 %index, i32 *%dest) {
entry:
%add = add i64 %base, %index
%ptr = inttoptr i64 %add to i32 *
- %res = load i32 *%ptr
+ %res = load i32 , i32 *%ptr
%cmp = icmp sle i32 %res, 0
br i1 %cmp, label %exit, label %store
@@ -606,7 +606,7 @@ entry:
%add1 = add i64 %base, %index
%add2 = add i64 %add1, 100000
%ptr = inttoptr i64 %add2 to i32 *
- %res = load i32 *%ptr
+ %res = load i32 , i32 *%ptr
%cmp = icmp sle i32 %res, 0
br i1 %cmp, label %exit, label %store
@@ -627,7 +627,7 @@ define i64 @f31(i64 %base, i64 %index, i64 *%dest) {
entry:
%add = add i64 %base, %index
%ptr = inttoptr i64 %add to i64 *
- %res = load i64 *%ptr
+ %res = load i64 , i64 *%ptr
%cmp = icmp sge i64 %res, 0
br i1 %cmp, label %exit, label %store
@@ -648,7 +648,7 @@ define i64 @f32(i64 %base, i64 %index, i64 *%dest) {
entry:
%add = add i64 %base, %index
%ptr = inttoptr i64 %add to i32 *
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%res = sext i32 %val to i64
%cmp = icmp sgt i64 %res, 0
br i1 %cmp, label %exit, label %store
@@ -853,7 +853,7 @@ define i32 @f41(i32 %a, i32 %b, i32 *%dest) {
; CHECK-NEXT: jne .L{{.*}}
; CHECK: br %r14
entry:
- %cur = load i32 *%dest
+ %cur = load i32 , i32 *%dest
%res = sub i32 %a, %cur
%cmp = icmp ne i32 %a, %cur
br i1 %cmp, label %exit, label %store
@@ -875,7 +875,7 @@ define i64 @f42(i64 %base, i64 %index, i64 *%dest) {
entry:
%add = add i64 %base, %index
%ptr = inttoptr i64 %add to i32 *
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
%res = sext i32 %val to i64
%cmp = icmp sgt i32 %val, 0
br i1 %cmp, label %exit, label %store
diff --git a/test/CodeGen/SystemZ/int-cmp-45.ll b/test/CodeGen/SystemZ/int-cmp-45.ll
index c9affa672d54..e5474fa4863d 100644
--- a/test/CodeGen/SystemZ/int-cmp-45.ll
+++ b/test/CodeGen/SystemZ/int-cmp-45.ll
@@ -12,7 +12,7 @@ define i32 @f1(i32 %a, i32 %b, i32 *%cptr) {
; CHECK: br %r14
%add = add i32 %a, 1000000
%cmp = icmp eq i32 %add, 0
- %c = load i32 *%cptr
+ %c = load i32 , i32 *%cptr
%arg = select i1 %cmp, i32 %c, i32 %b
call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
ret i32 %add
@@ -26,7 +26,7 @@ define i32 @f2(i32 %a, i32 %b, i32 *%cptr) {
; CHECK: br %r14
%add = add i32 %a, 1000000
%cmp = icmp eq i32 %add, 0
- %c = load i32 *%cptr
+ %c = load i32 , i32 *%cptr
%newval = select i1 %cmp, i32 %b, i32 %c
store i32 %newval, i32 *%cptr
ret i32 %add
@@ -53,7 +53,7 @@ define i32 @f4(i32 %a, i32 %b, i32 *%cptr) {
; CHECK: br %r14
%add = add i32 %a, 1000000
%cmp = icmp eq i32 %add, 0
- %c = load i32 *%cptr
+ %c = load i32 , i32 *%cptr
%arg = select i1 %cmp, i32 %b, i32 %c
call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
ret i32 %add
@@ -67,7 +67,7 @@ define i32 @f5(i32 %a, i32 %b, i32 *%cptr) {
; CHECK: br %r14
%add = add i32 %a, 1000000
%cmp = icmp eq i32 %add, 0
- %c = load i32 *%cptr
+ %c = load i32 , i32 *%cptr
%newval = select i1 %cmp, i32 %c, i32 %b
store i32 %newval, i32 *%cptr
ret i32 %add
@@ -94,7 +94,7 @@ define i32 @f7(i32 %a, i32 %b, i32 *%cptr) {
; CHECK: br %r14
%add = add i32 %a, 1000000
%cmp = icmp ne i32 %add, 0
- %c = load i32 *%cptr
+ %c = load i32 , i32 *%cptr
%arg = select i1 %cmp, i32 %b, i32 %c
call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
ret i32 %add
@@ -108,7 +108,7 @@ define i32 @f8(i32 %a, i32 %b, i32 *%cptr) {
; CHECK: br %r14
%add = add i32 %a, 1000000
%cmp = icmp ne i32 %add, 0
- %c = load i32 *%cptr
+ %c = load i32 , i32 *%cptr
%newval = select i1 %cmp, i32 %c, i32 %b
store i32 %newval, i32 *%cptr
ret i32 %add
diff --git a/test/CodeGen/SystemZ/int-cmp-47.ll b/test/CodeGen/SystemZ/int-cmp-47.ll
index 038a25b2a6ed..274350d24de1 100644
--- a/test/CodeGen/SystemZ/int-cmp-47.ll
+++ b/test/CodeGen/SystemZ/int-cmp-47.ll
@@ -309,7 +309,8 @@ exit:
define void @f17(i64 %a) {
; CHECK-LABEL: f17:
; CHECK-NOT: tmhh
-; CHECK: llihh {{%r[0-5]}}, 49151
+; CHECK: srlg [[REG:%r[0-5]]], %r2, 48
+; CHECK: cgfi [[REG]], 49151
; CHECK-NOT: tmhh
; CHECK: br %r14
entry:
diff --git a/test/CodeGen/SystemZ/int-cmp-48.ll b/test/CodeGen/SystemZ/int-cmp-48.ll
index d7c6370a2323..e26694753e7c 100644
--- a/test/CodeGen/SystemZ/int-cmp-48.ll
+++ b/test/CodeGen/SystemZ/int-cmp-48.ll
@@ -11,7 +11,7 @@ define void @f1(i8 *%src) {
; CHECK: je {{\.L.*}}
; CHECK: br %r14
entry:
- %byte = load i8 *%src
+ %byte = load i8 , i8 *%src
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
br i1 %cmp, label %exit, label %store
@@ -34,7 +34,7 @@ define void @f2(i8 *%src) {
; CHECK: je {{\.L.*}}
; CHECK: br %r14
entry:
- %byte = load i8 *%src
+ %byte = load i8 , i8 *%src
store i8 0, i8 *%src
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
@@ -54,7 +54,7 @@ define double @f3(i8 *%src, double %a, double %b) {
; CHECK: tm 0(%r2), 1
; CHECK: je {{\.L.*}}
; CHECK: br %r14
- %byte = load i8 *%src
+ %byte = load i8 , i8 *%src
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
%res = select i1 %cmp, double %b, double %a
@@ -68,7 +68,7 @@ define double @f4(i8 *%src, double %a, double %b) {
; CHECK: je {{\.L.*}}
; CHECK: mvi 0(%r2), 0
; CHECK: br %r14
- %byte = load i8 *%src
+ %byte = load i8 , i8 *%src
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
%res = select i1 %cmp, double %b, double %a
@@ -82,7 +82,7 @@ define double @f5(i8 *%src, double %a, double %b) {
; CHECK: tm 0(%r2), 1
; CHECK: jne {{\.L.*}}
; CHECK: br %r14
- %byte = load i8 *%src
+ %byte = load i8 , i8 *%src
%and = and i8 %byte, 1
%cmp = icmp ne i8 %and, 0
%res = select i1 %cmp, double %b, double %a
@@ -95,7 +95,7 @@ define double @f6(i8 *%src, double %a, double %b) {
; CHECK: tm 0(%r2), 254
; CHECK: jo {{\.L.*}}
; CHECK: br %r14
- %byte = load i8 *%src
+ %byte = load i8 , i8 *%src
%and = and i8 %byte, 254
%cmp = icmp eq i8 %and, 254
%res = select i1 %cmp, double %b, double %a
@@ -108,7 +108,7 @@ define double @f7(i8 *%src, double %a, double %b) {
; CHECK: tm 0(%r2), 254
; CHECK: jno {{\.L.*}}
; CHECK: br %r14
- %byte = load i8 *%src
+ %byte = load i8 , i8 *%src
%and = and i8 %byte, 254
%cmp = icmp ne i8 %and, 254
%res = select i1 %cmp, double %b, double %a
@@ -123,7 +123,7 @@ define double @f8(i8 *%src, double %a, double %b) {
; CHECK: tmll [[REG]], 3
; CHECK: jh {{\.L.*}}
; CHECK: br %r14
- %byte = load i8 *%src
+ %byte = load i8 , i8 *%src
%and = and i8 %byte, 3
%cmp = icmp eq i8 %and, 2
%res = select i1 %cmp, double %b, double %a
@@ -137,7 +137,7 @@ define double @f9(i8 *%src, double %a, double %b) {
; CHECK: tmll [[REG]], 3
; CHECK: jl {{\.L.*}}
; CHECK: br %r14
- %byte = load i8 *%src
+ %byte = load i8 , i8 *%src
%and = and i8 %byte, 3
%cmp = icmp eq i8 %and, 1
%res = select i1 %cmp, double %b, double %a
@@ -150,8 +150,8 @@ define double @f10(i8 *%src, double %a, double %b) {
; CHECK: tm 4095(%r2), 1
; CHECK: je {{\.L.*}}
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 4095
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 4095
+ %byte = load i8 , i8 *%ptr
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
%res = select i1 %cmp, double %b, double %a
@@ -164,8 +164,8 @@ define double @f11(i8 *%src, double %a, double %b) {
; CHECK: tmy 4096(%r2), 1
; CHECK: je {{\.L.*}}
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 4096
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 4096
+ %byte = load i8 , i8 *%ptr
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
%res = select i1 %cmp, double %b, double %a
@@ -178,8 +178,8 @@ define double @f12(i8 *%src, double %a, double %b) {
; CHECK: tmy 524287(%r2), 1
; CHECK: je {{\.L.*}}
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524287
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524287
+ %byte = load i8 , i8 *%ptr
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
%res = select i1 %cmp, double %b, double %a
@@ -193,8 +193,8 @@ define double @f13(i8 *%src, double %a, double %b) {
; CHECK: tm 0(%r2), 1
; CHECK: je {{\.L.*}}
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524288
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524288
+ %byte = load i8 , i8 *%ptr
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
%res = select i1 %cmp, double %b, double %a
@@ -207,8 +207,8 @@ define double @f14(i8 *%src, double %a, double %b) {
; CHECK: tmy -524288(%r2), 1
; CHECK: je {{\.L.*}}
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524288
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524288
+ %byte = load i8 , i8 *%ptr
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
%res = select i1 %cmp, double %b, double %a
@@ -222,8 +222,8 @@ define double @f15(i8 *%src, double %a, double %b) {
; CHECK: tm 0(%r2), 1
; CHECK: je {{\.L.*}}
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524289
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524289
+ %byte = load i8 , i8 *%ptr
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
%res = select i1 %cmp, double %b, double %a
@@ -236,8 +236,8 @@ define double @f16(i8 *%src, i64 %index, double %a, double %b) {
; CHECK: tm 0({{%r[1-5]}}), 1
; CHECK: je {{\.L.*}}
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 %index
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 %index
+ %byte = load i8 , i8 *%ptr
%and = and i8 %byte, 1
%cmp = icmp eq i8 %and, 0
%res = select i1 %cmp, double %b, double %a
diff --git a/test/CodeGen/SystemZ/int-cmp-50.ll b/test/CodeGen/SystemZ/int-cmp-50.ll
new file mode 100644
index 000000000000..287ac2c49a78
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-50.ll
@@ -0,0 +1,30 @@
+; Verify that we do not crash on always-true conditions
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -O0
+;
+; This test was compiled using clang -O0 from the following source code:
+;
+; int test(unsigned long x)
+; {
+; return x >= 0 && x <= 15;
+; }
+
+define signext i32 @test(i64 %x) {
+entry:
+ %x.addr = alloca i64, align 8
+ store i64 %x, i64* %x.addr, align 8
+ %0 = load i64, i64* %x.addr, align 8
+ %cmp = icmp uge i64 %0, 0
+ br i1 %cmp, label %land.rhs, label %land.end
+
+land.rhs: ; preds = %entry
+ %1 = load i64, i64* %x.addr, align 8
+ %cmp1 = icmp ule i64 %1, 15
+ br label %land.end
+
+land.end: ; preds = %land.rhs, %entry
+ %2 = phi i1 [ false, %entry ], [ %cmp1, %land.rhs ]
+ %land.ext = zext i1 %2 to i32
+ ret i32 %land.ext
+}
+
diff --git a/test/CodeGen/SystemZ/int-const-03.ll b/test/CodeGen/SystemZ/int-const-03.ll
index af1cef2c138a..7352ea32e76a 100644
--- a/test/CodeGen/SystemZ/int-const-03.ll
+++ b/test/CodeGen/SystemZ/int-const-03.ll
@@ -70,7 +70,7 @@ define void @f8(i8 *%src) {
; CHECK-LABEL: f8:
; CHECK: mvi 4095(%r2), 42
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 4095
+ %ptr = getelementptr i8, i8 *%src, i64 4095
store i8 42, i8 *%ptr
ret void
}
@@ -80,7 +80,7 @@ define void @f9(i8 *%src) {
; CHECK-LABEL: f9:
; CHECK: mviy 4096(%r2), 42
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 4096
+ %ptr = getelementptr i8, i8 *%src, i64 4096
store i8 42, i8 *%ptr
ret void
}
@@ -90,7 +90,7 @@ define void @f10(i8 *%src) {
; CHECK-LABEL: f10:
; CHECK: mviy 524287(%r2), 42
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524287
+ %ptr = getelementptr i8, i8 *%src, i64 524287
store i8 42, i8 *%ptr
ret void
}
@@ -102,7 +102,7 @@ define void @f11(i8 *%src) {
; CHECK: agfi %r2, 524288
; CHECK: mvi 0(%r2), 42
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524288
+ %ptr = getelementptr i8, i8 *%src, i64 524288
store i8 42, i8 *%ptr
ret void
}
@@ -112,7 +112,7 @@ define void @f12(i8 *%src) {
; CHECK-LABEL: f12:
; CHECK: mviy -1(%r2), 42
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -1
+ %ptr = getelementptr i8, i8 *%src, i64 -1
store i8 42, i8 *%ptr
ret void
}
@@ -122,7 +122,7 @@ define void @f13(i8 *%src) {
; CHECK-LABEL: f13:
; CHECK: mviy -524288(%r2), 42
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524288
+ %ptr = getelementptr i8, i8 *%src, i64 -524288
store i8 42, i8 *%ptr
ret void
}
@@ -134,7 +134,7 @@ define void @f14(i8 *%src) {
; CHECK: agfi %r2, -524289
; CHECK: mvi 0(%r2), 42
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524289
+ %ptr = getelementptr i8, i8 *%src, i64 -524289
store i8 42, i8 *%ptr
ret void
}
diff --git a/test/CodeGen/SystemZ/int-const-04.ll b/test/CodeGen/SystemZ/int-const-04.ll
index aced50b5601b..fd943991196e 100644
--- a/test/CodeGen/SystemZ/int-const-04.ll
+++ b/test/CodeGen/SystemZ/int-const-04.ll
@@ -70,7 +70,7 @@ define void @f8(i16 *%a) {
; CHECK-LABEL: f8:
; CHECK: mvhhi 4094(%r2), 42
; CHECK: br %r14
- %ptr = getelementptr i16 *%a, i64 2047
+ %ptr = getelementptr i16, i16 *%a, i64 2047
store i16 42, i16 *%ptr
ret void
}
@@ -82,7 +82,7 @@ define void @f9(i16 *%a) {
; CHECK: lhi [[TMP:%r[0-5]]], 42
; CHECK: sthy [[TMP]], 4096(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%a, i64 2048
+ %ptr = getelementptr i16, i16 *%a, i64 2048
store i16 42, i16 *%ptr
ret void
}
@@ -93,7 +93,7 @@ define void @f10(i16 *%a) {
; CHECK: lhi [[TMP:%r[0-5]]], 42
; CHECK: sthy [[TMP]], -2(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%a, i64 -1
+ %ptr = getelementptr i16, i16 *%a, i64 -1
store i16 42, i16 *%ptr
ret void
}
diff --git a/test/CodeGen/SystemZ/int-const-05.ll b/test/CodeGen/SystemZ/int-const-05.ll
index 98d6851c197d..c7b455168990 100644
--- a/test/CodeGen/SystemZ/int-const-05.ll
+++ b/test/CodeGen/SystemZ/int-const-05.ll
@@ -61,7 +61,7 @@ define void @f7(i32 *%a) {
; CHECK-LABEL: f7:
; CHECK: mvhi 4092(%r2), 42
; CHECK: br %r14
- %ptr = getelementptr i32 *%a, i64 1023
+ %ptr = getelementptr i32, i32 *%a, i64 1023
store i32 42, i32 *%ptr
ret void
}
@@ -72,7 +72,7 @@ define void @f8(i32 *%a) {
; CHECK: lhi [[TMP:%r[0-5]]], 42
; CHECK: sty [[TMP]], 4096(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%a, i64 1024
+ %ptr = getelementptr i32, i32 *%a, i64 1024
store i32 42, i32 *%ptr
ret void
}
@@ -83,7 +83,7 @@ define void @f9(i32 *%a) {
; CHECK: lhi [[TMP:%r[0-5]]], 42
; CHECK: sty [[TMP]], -4(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%a, i64 -1
+ %ptr = getelementptr i32, i32 *%a, i64 -1
store i32 42, i32 *%ptr
ret void
}
diff --git a/test/CodeGen/SystemZ/int-const-06.ll b/test/CodeGen/SystemZ/int-const-06.ll
index cf07c665dde7..fc47a04094b5 100644
--- a/test/CodeGen/SystemZ/int-const-06.ll
+++ b/test/CodeGen/SystemZ/int-const-06.ll
@@ -61,7 +61,7 @@ define void @f7(i64 *%a) {
; CHECK-LABEL: f7:
; CHECK: mvghi 4088(%r2), 42
; CHECK: br %r14
- %ptr = getelementptr i64 *%a, i64 511
+ %ptr = getelementptr i64, i64 *%a, i64 511
store i64 42, i64 *%ptr
ret void
}
@@ -73,7 +73,7 @@ define void @f8(i64 *%a) {
; CHECK: lghi [[TMP:%r[0-5]]], 42
; CHECK: stg [[TMP]], 4096(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%a, i64 512
+ %ptr = getelementptr i64, i64 *%a, i64 512
store i64 42, i64 *%ptr
ret void
}
@@ -84,7 +84,7 @@ define void @f9(i64 *%a) {
; CHECK: lghi [[TMP:%r[0-5]]], 42
; CHECK: stg [[TMP]], -8(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%a, i64 -1
+ %ptr = getelementptr i64, i64 *%a, i64 -1
store i64 42, i64 *%ptr
ret void
}
diff --git a/test/CodeGen/SystemZ/int-conv-01.ll b/test/CodeGen/SystemZ/int-conv-01.ll
index e5c411cdec1d..70ef78a06110 100644
--- a/test/CodeGen/SystemZ/int-conv-01.ll
+++ b/test/CodeGen/SystemZ/int-conv-01.ll
@@ -27,7 +27,7 @@ define i32 @f3(i8 *%src) {
; CHECK-LABEL: f3:
; CHECK: lb %r2, 0(%r2)
; CHECK: br %r14
- %byte = load i8 *%src
+ %byte = load i8 , i8 *%src
%ext = sext i8 %byte to i32
ret i32 %ext
}
@@ -37,8 +37,8 @@ define i32 @f4(i8 *%src) {
; CHECK-LABEL: f4:
; CHECK: lb %r2, 524287(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524287
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524287
+ %byte = load i8 , i8 *%ptr
%ext = sext i8 %byte to i32
ret i32 %ext
}
@@ -50,8 +50,8 @@ define i32 @f5(i8 *%src) {
; CHECK: agfi %r2, 524288
; CHECK: lb %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524288
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524288
+ %byte = load i8 , i8 *%ptr
%ext = sext i8 %byte to i32
ret i32 %ext
}
@@ -61,8 +61,8 @@ define i32 @f6(i8 *%src) {
; CHECK-LABEL: f6:
; CHECK: lb %r2, -1(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -1
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -1
+ %byte = load i8 , i8 *%ptr
%ext = sext i8 %byte to i32
ret i32 %ext
}
@@ -72,8 +72,8 @@ define i32 @f7(i8 *%src) {
; CHECK-LABEL: f7:
; CHECK: lb %r2, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524288
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524288
+ %byte = load i8 , i8 *%ptr
%ext = sext i8 %byte to i32
ret i32 %ext
}
@@ -85,8 +85,8 @@ define i32 @f8(i8 *%src) {
; CHECK: agfi %r2, -524289
; CHECK: lb %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524289
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524289
+ %byte = load i8 , i8 *%ptr
%ext = sext i8 %byte to i32
ret i32 %ext
}
@@ -99,7 +99,7 @@ define i32 @f9(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i8 *
- %byte = load i8 *%ptr
+ %byte = load i8 , i8 *%ptr
%ext = sext i8 %byte to i32
ret i32 %ext
}
@@ -110,22 +110,22 @@ define void @f10(i32 *%ptr) {
; CHECK-LABEL: f10:
; CHECK: lb {{%r[0-9]+}}, 16{{[37]}}(%r15)
; CHECK: br %r14
- %val0 = load volatile i32 *%ptr
- %val1 = load volatile i32 *%ptr
- %val2 = load volatile i32 *%ptr
- %val3 = load volatile i32 *%ptr
- %val4 = load volatile i32 *%ptr
- %val5 = load volatile i32 *%ptr
- %val6 = load volatile i32 *%ptr
- %val7 = load volatile i32 *%ptr
- %val8 = load volatile i32 *%ptr
- %val9 = load volatile i32 *%ptr
- %val10 = load volatile i32 *%ptr
- %val11 = load volatile i32 *%ptr
- %val12 = load volatile i32 *%ptr
- %val13 = load volatile i32 *%ptr
- %val14 = load volatile i32 *%ptr
- %val15 = load volatile i32 *%ptr
+ %val0 = load volatile i32 , i32 *%ptr
+ %val1 = load volatile i32 , i32 *%ptr
+ %val2 = load volatile i32 , i32 *%ptr
+ %val3 = load volatile i32 , i32 *%ptr
+ %val4 = load volatile i32 , i32 *%ptr
+ %val5 = load volatile i32 , i32 *%ptr
+ %val6 = load volatile i32 , i32 *%ptr
+ %val7 = load volatile i32 , i32 *%ptr
+ %val8 = load volatile i32 , i32 *%ptr
+ %val9 = load volatile i32 , i32 *%ptr
+ %val10 = load volatile i32 , i32 *%ptr
+ %val11 = load volatile i32 , i32 *%ptr
+ %val12 = load volatile i32 , i32 *%ptr
+ %val13 = load volatile i32 , i32 *%ptr
+ %val14 = load volatile i32 , i32 *%ptr
+ %val15 = load volatile i32 , i32 *%ptr
%trunc0 = trunc i32 %val0 to i8
%trunc1 = trunc i32 %val1 to i8
diff --git a/test/CodeGen/SystemZ/int-conv-02.ll b/test/CodeGen/SystemZ/int-conv-02.ll
index dd7760d08cf5..5b248cce1efc 100644
--- a/test/CodeGen/SystemZ/int-conv-02.ll
+++ b/test/CodeGen/SystemZ/int-conv-02.ll
@@ -37,7 +37,7 @@ define i32 @f4(i8 *%src) {
; CHECK-LABEL: f4:
; CHECK: llc %r2, 0(%r2)
; CHECK: br %r14
- %byte = load i8 *%src
+ %byte = load i8 , i8 *%src
%ext = zext i8 %byte to i32
ret i32 %ext
}
@@ -47,8 +47,8 @@ define i32 @f5(i8 *%src) {
; CHECK-LABEL: f5:
; CHECK: llc %r2, 524287(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524287
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524287
+ %byte = load i8 , i8 *%ptr
%ext = zext i8 %byte to i32
ret i32 %ext
}
@@ -60,8 +60,8 @@ define i32 @f6(i8 *%src) {
; CHECK: agfi %r2, 524288
; CHECK: llc %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524288
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524288
+ %byte = load i8 , i8 *%ptr
%ext = zext i8 %byte to i32
ret i32 %ext
}
@@ -71,8 +71,8 @@ define i32 @f7(i8 *%src) {
; CHECK-LABEL: f7:
; CHECK: llc %r2, -1(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -1
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -1
+ %byte = load i8 , i8 *%ptr
%ext = zext i8 %byte to i32
ret i32 %ext
}
@@ -82,8 +82,8 @@ define i32 @f8(i8 *%src) {
; CHECK-LABEL: f8:
; CHECK: llc %r2, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524288
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524288
+ %byte = load i8 , i8 *%ptr
%ext = zext i8 %byte to i32
ret i32 %ext
}
@@ -95,8 +95,8 @@ define i32 @f9(i8 *%src) {
; CHECK: agfi %r2, -524289
; CHECK: llc %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524289
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524289
+ %byte = load i8 , i8 *%ptr
%ext = zext i8 %byte to i32
ret i32 %ext
}
@@ -109,7 +109,7 @@ define i32 @f10(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i8 *
- %byte = load i8 *%ptr
+ %byte = load i8 , i8 *%ptr
%ext = zext i8 %byte to i32
ret i32 %ext
}
@@ -120,22 +120,22 @@ define void @f11(i32 *%ptr) {
; CHECK-LABEL: f11:
; CHECK: llc {{%r[0-9]+}}, 16{{[37]}}(%r15)
; CHECK: br %r14
- %val0 = load volatile i32 *%ptr
- %val1 = load volatile i32 *%ptr
- %val2 = load volatile i32 *%ptr
- %val3 = load volatile i32 *%ptr
- %val4 = load volatile i32 *%ptr
- %val5 = load volatile i32 *%ptr
- %val6 = load volatile i32 *%ptr
- %val7 = load volatile i32 *%ptr
- %val8 = load volatile i32 *%ptr
- %val9 = load volatile i32 *%ptr
- %val10 = load volatile i32 *%ptr
- %val11 = load volatile i32 *%ptr
- %val12 = load volatile i32 *%ptr
- %val13 = load volatile i32 *%ptr
- %val14 = load volatile i32 *%ptr
- %val15 = load volatile i32 *%ptr
+ %val0 = load volatile i32 , i32 *%ptr
+ %val1 = load volatile i32 , i32 *%ptr
+ %val2 = load volatile i32 , i32 *%ptr
+ %val3 = load volatile i32 , i32 *%ptr
+ %val4 = load volatile i32 , i32 *%ptr
+ %val5 = load volatile i32 , i32 *%ptr
+ %val6 = load volatile i32 , i32 *%ptr
+ %val7 = load volatile i32 , i32 *%ptr
+ %val8 = load volatile i32 , i32 *%ptr
+ %val9 = load volatile i32 , i32 *%ptr
+ %val10 = load volatile i32 , i32 *%ptr
+ %val11 = load volatile i32 , i32 *%ptr
+ %val12 = load volatile i32 , i32 *%ptr
+ %val13 = load volatile i32 , i32 *%ptr
+ %val14 = load volatile i32 , i32 *%ptr
+ %val15 = load volatile i32 , i32 *%ptr
%trunc0 = trunc i32 %val0 to i8
%trunc1 = trunc i32 %val1 to i8
diff --git a/test/CodeGen/SystemZ/int-conv-03.ll b/test/CodeGen/SystemZ/int-conv-03.ll
index cad9581296a4..e621bcd69dee 100644
--- a/test/CodeGen/SystemZ/int-conv-03.ll
+++ b/test/CodeGen/SystemZ/int-conv-03.ll
@@ -27,7 +27,7 @@ define i64 @f3(i8 *%src) {
; CHECK-LABEL: f3:
; CHECK: lgb %r2, 0(%r2)
; CHECK: br %r14
- %byte = load i8 *%src
+ %byte = load i8 , i8 *%src
%ext = sext i8 %byte to i64
ret i64 %ext
}
@@ -37,8 +37,8 @@ define i64 @f4(i8 *%src) {
; CHECK-LABEL: f4:
; CHECK: lgb %r2, 524287(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524287
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524287
+ %byte = load i8 , i8 *%ptr
%ext = sext i8 %byte to i64
ret i64 %ext
}
@@ -50,8 +50,8 @@ define i64 @f5(i8 *%src) {
; CHECK: agfi %r2, 524288
; CHECK: lgb %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524288
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524288
+ %byte = load i8 , i8 *%ptr
%ext = sext i8 %byte to i64
ret i64 %ext
}
@@ -61,8 +61,8 @@ define i64 @f6(i8 *%src) {
; CHECK-LABEL: f6:
; CHECK: lgb %r2, -1(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -1
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -1
+ %byte = load i8 , i8 *%ptr
%ext = sext i8 %byte to i64
ret i64 %ext
}
@@ -72,8 +72,8 @@ define i64 @f7(i8 *%src) {
; CHECK-LABEL: f7:
; CHECK: lgb %r2, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524288
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524288
+ %byte = load i8 , i8 *%ptr
%ext = sext i8 %byte to i64
ret i64 %ext
}
@@ -85,8 +85,8 @@ define i64 @f8(i8 *%src) {
; CHECK: agfi %r2, -524289
; CHECK: lgb %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524289
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524289
+ %byte = load i8 , i8 *%ptr
%ext = sext i8 %byte to i64
ret i64 %ext
}
@@ -99,7 +99,7 @@ define i64 @f9(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i8 *
- %byte = load i8 *%ptr
+ %byte = load i8 , i8 *%ptr
%ext = sext i8 %byte to i64
ret i64 %ext
}
@@ -110,22 +110,22 @@ define void @f10(i64 *%ptr) {
; CHECK-LABEL: f10:
; CHECK: lgb {{%r[0-9]+}}, 167(%r15)
; CHECK: br %r14
- %val0 = load volatile i64 *%ptr
- %val1 = load volatile i64 *%ptr
- %val2 = load volatile i64 *%ptr
- %val3 = load volatile i64 *%ptr
- %val4 = load volatile i64 *%ptr
- %val5 = load volatile i64 *%ptr
- %val6 = load volatile i64 *%ptr
- %val7 = load volatile i64 *%ptr
- %val8 = load volatile i64 *%ptr
- %val9 = load volatile i64 *%ptr
- %val10 = load volatile i64 *%ptr
- %val11 = load volatile i64 *%ptr
- %val12 = load volatile i64 *%ptr
- %val13 = load volatile i64 *%ptr
- %val14 = load volatile i64 *%ptr
- %val15 = load volatile i64 *%ptr
+ %val0 = load volatile i64 , i64 *%ptr
+ %val1 = load volatile i64 , i64 *%ptr
+ %val2 = load volatile i64 , i64 *%ptr
+ %val3 = load volatile i64 , i64 *%ptr
+ %val4 = load volatile i64 , i64 *%ptr
+ %val5 = load volatile i64 , i64 *%ptr
+ %val6 = load volatile i64 , i64 *%ptr
+ %val7 = load volatile i64 , i64 *%ptr
+ %val8 = load volatile i64 , i64 *%ptr
+ %val9 = load volatile i64 , i64 *%ptr
+ %val10 = load volatile i64 , i64 *%ptr
+ %val11 = load volatile i64 , i64 *%ptr
+ %val12 = load volatile i64 , i64 *%ptr
+ %val13 = load volatile i64 , i64 *%ptr
+ %val14 = load volatile i64 , i64 *%ptr
+ %val15 = load volatile i64 , i64 *%ptr
%trunc0 = trunc i64 %val0 to i8
%trunc1 = trunc i64 %val1 to i8
diff --git a/test/CodeGen/SystemZ/int-conv-04.ll b/test/CodeGen/SystemZ/int-conv-04.ll
index 1c6be7b6e8a4..a0f5d63d2605 100644
--- a/test/CodeGen/SystemZ/int-conv-04.ll
+++ b/test/CodeGen/SystemZ/int-conv-04.ll
@@ -36,7 +36,7 @@ define i64 @f4(i8 *%src) {
; CHECK-LABEL: f4:
; CHECK: llgc %r2, 0(%r2)
; CHECK: br %r14
- %byte = load i8 *%src
+ %byte = load i8 , i8 *%src
%ext = zext i8 %byte to i64
ret i64 %ext
}
@@ -46,8 +46,8 @@ define i64 @f5(i8 *%src) {
; CHECK-LABEL: f5:
; CHECK: llgc %r2, 524287(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524287
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524287
+ %byte = load i8 , i8 *%ptr
%ext = zext i8 %byte to i64
ret i64 %ext
}
@@ -59,8 +59,8 @@ define i64 @f6(i8 *%src) {
; CHECK: agfi %r2, 524288
; CHECK: llgc %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524288
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524288
+ %byte = load i8 , i8 *%ptr
%ext = zext i8 %byte to i64
ret i64 %ext
}
@@ -70,8 +70,8 @@ define i64 @f7(i8 *%src) {
; CHECK-LABEL: f7:
; CHECK: llgc %r2, -1(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -1
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -1
+ %byte = load i8 , i8 *%ptr
%ext = zext i8 %byte to i64
ret i64 %ext
}
@@ -81,8 +81,8 @@ define i64 @f8(i8 *%src) {
; CHECK-LABEL: f8:
; CHECK: llgc %r2, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524288
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524288
+ %byte = load i8 , i8 *%ptr
%ext = zext i8 %byte to i64
ret i64 %ext
}
@@ -94,8 +94,8 @@ define i64 @f9(i8 *%src) {
; CHECK: agfi %r2, -524289
; CHECK: llgc %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524289
- %byte = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524289
+ %byte = load i8 , i8 *%ptr
%ext = zext i8 %byte to i64
ret i64 %ext
}
@@ -108,7 +108,7 @@ define i64 @f10(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i8 *
- %byte = load i8 *%ptr
+ %byte = load i8 , i8 *%ptr
%ext = zext i8 %byte to i64
ret i64 %ext
}
@@ -119,22 +119,22 @@ define void @f11(i64 *%ptr) {
; CHECK-LABEL: f11:
; CHECK: llgc {{%r[0-9]+}}, 167(%r15)
; CHECK: br %r14
- %val0 = load volatile i64 *%ptr
- %val1 = load volatile i64 *%ptr
- %val2 = load volatile i64 *%ptr
- %val3 = load volatile i64 *%ptr
- %val4 = load volatile i64 *%ptr
- %val5 = load volatile i64 *%ptr
- %val6 = load volatile i64 *%ptr
- %val7 = load volatile i64 *%ptr
- %val8 = load volatile i64 *%ptr
- %val9 = load volatile i64 *%ptr
- %val10 = load volatile i64 *%ptr
- %val11 = load volatile i64 *%ptr
- %val12 = load volatile i64 *%ptr
- %val13 = load volatile i64 *%ptr
- %val14 = load volatile i64 *%ptr
- %val15 = load volatile i64 *%ptr
+ %val0 = load volatile i64 , i64 *%ptr
+ %val1 = load volatile i64 , i64 *%ptr
+ %val2 = load volatile i64 , i64 *%ptr
+ %val3 = load volatile i64 , i64 *%ptr
+ %val4 = load volatile i64 , i64 *%ptr
+ %val5 = load volatile i64 , i64 *%ptr
+ %val6 = load volatile i64 , i64 *%ptr
+ %val7 = load volatile i64 , i64 *%ptr
+ %val8 = load volatile i64 , i64 *%ptr
+ %val9 = load volatile i64 , i64 *%ptr
+ %val10 = load volatile i64 , i64 *%ptr
+ %val11 = load volatile i64 , i64 *%ptr
+ %val12 = load volatile i64 , i64 *%ptr
+ %val13 = load volatile i64 , i64 *%ptr
+ %val14 = load volatile i64 , i64 *%ptr
+ %val15 = load volatile i64 , i64 *%ptr
%trunc0 = trunc i64 %val0 to i8
%trunc1 = trunc i64 %val1 to i8
diff --git a/test/CodeGen/SystemZ/int-conv-05.ll b/test/CodeGen/SystemZ/int-conv-05.ll
index 5eade93ac584..55299529c480 100644
--- a/test/CodeGen/SystemZ/int-conv-05.ll
+++ b/test/CodeGen/SystemZ/int-conv-05.ll
@@ -27,7 +27,7 @@ define i32 @f3(i16 *%src) {
; CHECK-LABEL: f3:
; CHECK: lh %r2, 0(%r2)
; CHECK: br %r14
- %half = load i16 *%src
+ %half = load i16 , i16 *%src
%ext = sext i16 %half to i32
ret i32 %ext
}
@@ -37,8 +37,8 @@ define i32 @f4(i16 *%src) {
; CHECK-LABEL: f4:
; CHECK: lh %r2, 4094(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 2047
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 2047
+ %half = load i16 , i16 *%ptr
%ext = sext i16 %half to i32
ret i32 %ext
}
@@ -48,8 +48,8 @@ define i32 @f5(i16 *%src) {
; CHECK-LABEL: f5:
; CHECK: lhy %r2, 4096(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 2048
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 2048
+ %half = load i16 , i16 *%ptr
%ext = sext i16 %half to i32
ret i32 %ext
}
@@ -59,8 +59,8 @@ define i32 @f6(i16 *%src) {
; CHECK-LABEL: f6:
; CHECK: lhy %r2, 524286(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 262143
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 262143
+ %half = load i16 , i16 *%ptr
%ext = sext i16 %half to i32
ret i32 %ext
}
@@ -72,8 +72,8 @@ define i32 @f7(i16 *%src) {
; CHECK: agfi %r2, 524288
; CHECK: lh %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 262144
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 262144
+ %half = load i16 , i16 *%ptr
%ext = sext i16 %half to i32
ret i32 %ext
}
@@ -83,8 +83,8 @@ define i32 @f8(i16 *%src) {
; CHECK-LABEL: f8:
; CHECK: lhy %r2, -2(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -1
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -1
+ %half = load i16 , i16 *%ptr
%ext = sext i16 %half to i32
ret i32 %ext
}
@@ -94,8 +94,8 @@ define i32 @f9(i16 *%src) {
; CHECK-LABEL: f9:
; CHECK: lhy %r2, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -262144
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -262144
+ %half = load i16 , i16 *%ptr
%ext = sext i16 %half to i32
ret i32 %ext
}
@@ -107,8 +107,8 @@ define i32 @f10(i16 *%src) {
; CHECK: agfi %r2, -524290
; CHECK: lh %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -262145
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -262145
+ %half = load i16 , i16 *%ptr
%ext = sext i16 %half to i32
ret i32 %ext
}
@@ -121,7 +121,7 @@ define i32 @f11(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4094
%ptr = inttoptr i64 %add2 to i16 *
- %half = load i16 *%ptr
+ %half = load i16 , i16 *%ptr
%ext = sext i16 %half to i32
ret i32 %ext
}
@@ -134,7 +134,7 @@ define i32 @f12(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i16 *
- %half = load i16 *%ptr
+ %half = load i16 , i16 *%ptr
%ext = sext i16 %half to i32
ret i32 %ext
}
@@ -145,22 +145,22 @@ define void @f13(i32 *%ptr) {
; CHECK-LABEL: f13:
; CHECK: lh {{%r[0-9]+}}, 16{{[26]}}(%r15)
; CHECK: br %r14
- %val0 = load volatile i32 *%ptr
- %val1 = load volatile i32 *%ptr
- %val2 = load volatile i32 *%ptr
- %val3 = load volatile i32 *%ptr
- %val4 = load volatile i32 *%ptr
- %val5 = load volatile i32 *%ptr
- %val6 = load volatile i32 *%ptr
- %val7 = load volatile i32 *%ptr
- %val8 = load volatile i32 *%ptr
- %val9 = load volatile i32 *%ptr
- %val10 = load volatile i32 *%ptr
- %val11 = load volatile i32 *%ptr
- %val12 = load volatile i32 *%ptr
- %val13 = load volatile i32 *%ptr
- %val14 = load volatile i32 *%ptr
- %val15 = load volatile i32 *%ptr
+ %val0 = load volatile i32 , i32 *%ptr
+ %val1 = load volatile i32 , i32 *%ptr
+ %val2 = load volatile i32 , i32 *%ptr
+ %val3 = load volatile i32 , i32 *%ptr
+ %val4 = load volatile i32 , i32 *%ptr
+ %val5 = load volatile i32 , i32 *%ptr
+ %val6 = load volatile i32 , i32 *%ptr
+ %val7 = load volatile i32 , i32 *%ptr
+ %val8 = load volatile i32 , i32 *%ptr
+ %val9 = load volatile i32 , i32 *%ptr
+ %val10 = load volatile i32 , i32 *%ptr
+ %val11 = load volatile i32 , i32 *%ptr
+ %val12 = load volatile i32 , i32 *%ptr
+ %val13 = load volatile i32 , i32 *%ptr
+ %val14 = load volatile i32 , i32 *%ptr
+ %val15 = load volatile i32 , i32 *%ptr
%trunc0 = trunc i32 %val0 to i16
%trunc1 = trunc i32 %val1 to i16
diff --git a/test/CodeGen/SystemZ/int-conv-06.ll b/test/CodeGen/SystemZ/int-conv-06.ll
index 33860d12270f..99ff84efbe76 100644
--- a/test/CodeGen/SystemZ/int-conv-06.ll
+++ b/test/CodeGen/SystemZ/int-conv-06.ll
@@ -37,7 +37,7 @@ define i32 @f4(i16 *%src) {
; CHECK-LABEL: f4:
; CHECK: llh %r2, 0(%r2)
; CHECK: br %r14
- %half = load i16 *%src
+ %half = load i16 , i16 *%src
%ext = zext i16 %half to i32
ret i32 %ext
}
@@ -47,8 +47,8 @@ define i32 @f5(i16 *%src) {
; CHECK-LABEL: f5:
; CHECK: llh %r2, 524286(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 262143
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 262143
+ %half = load i16 , i16 *%ptr
%ext = zext i16 %half to i32
ret i32 %ext
}
@@ -60,8 +60,8 @@ define i32 @f6(i16 *%src) {
; CHECK: agfi %r2, 524288
; CHECK: llh %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 262144
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 262144
+ %half = load i16 , i16 *%ptr
%ext = zext i16 %half to i32
ret i32 %ext
}
@@ -71,8 +71,8 @@ define i32 @f7(i16 *%src) {
; CHECK-LABEL: f7:
; CHECK: llh %r2, -2(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -1
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -1
+ %half = load i16 , i16 *%ptr
%ext = zext i16 %half to i32
ret i32 %ext
}
@@ -82,8 +82,8 @@ define i32 @f8(i16 *%src) {
; CHECK-LABEL: f8:
; CHECK: llh %r2, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -262144
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -262144
+ %half = load i16 , i16 *%ptr
%ext = zext i16 %half to i32
ret i32 %ext
}
@@ -95,8 +95,8 @@ define i32 @f9(i16 *%src) {
; CHECK: agfi %r2, -524290
; CHECK: llh %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -262145
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -262145
+ %half = load i16 , i16 *%ptr
%ext = zext i16 %half to i32
ret i32 %ext
}
@@ -109,7 +109,7 @@ define i32 @f10(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i16 *
- %half = load i16 *%ptr
+ %half = load i16 , i16 *%ptr
%ext = zext i16 %half to i32
ret i32 %ext
}
@@ -120,22 +120,22 @@ define void @f11(i32 *%ptr) {
; CHECK-LABEL: f11:
; CHECK: llh {{%r[0-9]+}}, 16{{[26]}}(%r15)
; CHECK: br %r14
- %val0 = load volatile i32 *%ptr
- %val1 = load volatile i32 *%ptr
- %val2 = load volatile i32 *%ptr
- %val3 = load volatile i32 *%ptr
- %val4 = load volatile i32 *%ptr
- %val5 = load volatile i32 *%ptr
- %val6 = load volatile i32 *%ptr
- %val7 = load volatile i32 *%ptr
- %val8 = load volatile i32 *%ptr
- %val9 = load volatile i32 *%ptr
- %val10 = load volatile i32 *%ptr
- %val11 = load volatile i32 *%ptr
- %val12 = load volatile i32 *%ptr
- %val13 = load volatile i32 *%ptr
- %val14 = load volatile i32 *%ptr
- %val15 = load volatile i32 *%ptr
+ %val0 = load volatile i32 , i32 *%ptr
+ %val1 = load volatile i32 , i32 *%ptr
+ %val2 = load volatile i32 , i32 *%ptr
+ %val3 = load volatile i32 , i32 *%ptr
+ %val4 = load volatile i32 , i32 *%ptr
+ %val5 = load volatile i32 , i32 *%ptr
+ %val6 = load volatile i32 , i32 *%ptr
+ %val7 = load volatile i32 , i32 *%ptr
+ %val8 = load volatile i32 , i32 *%ptr
+ %val9 = load volatile i32 , i32 *%ptr
+ %val10 = load volatile i32 , i32 *%ptr
+ %val11 = load volatile i32 , i32 *%ptr
+ %val12 = load volatile i32 , i32 *%ptr
+ %val13 = load volatile i32 , i32 *%ptr
+ %val14 = load volatile i32 , i32 *%ptr
+ %val15 = load volatile i32 , i32 *%ptr
%trunc0 = trunc i32 %val0 to i16
%trunc1 = trunc i32 %val1 to i16
diff --git a/test/CodeGen/SystemZ/int-conv-07.ll b/test/CodeGen/SystemZ/int-conv-07.ll
index 4b78c773d1ea..5e989e4737e8 100644
--- a/test/CodeGen/SystemZ/int-conv-07.ll
+++ b/test/CodeGen/SystemZ/int-conv-07.ll
@@ -27,7 +27,7 @@ define i64 @f3(i16 *%src) {
; CHECK-LABEL: f3:
; CHECK: lgh %r2, 0(%r2)
; CHECK: br %r14
- %half = load i16 *%src
+ %half = load i16 , i16 *%src
%ext = sext i16 %half to i64
ret i64 %ext
}
@@ -37,8 +37,8 @@ define i64 @f4(i16 *%src) {
; CHECK-LABEL: f4:
; CHECK: lgh %r2, 524286(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 262143
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 262143
+ %half = load i16 , i16 *%ptr
%ext = sext i16 %half to i64
ret i64 %ext
}
@@ -50,8 +50,8 @@ define i64 @f5(i16 *%src) {
; CHECK: agfi %r2, 524288
; CHECK: lgh %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 262144
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 262144
+ %half = load i16 , i16 *%ptr
%ext = sext i16 %half to i64
ret i64 %ext
}
@@ -61,8 +61,8 @@ define i64 @f6(i16 *%src) {
; CHECK-LABEL: f6:
; CHECK: lgh %r2, -2(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -1
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -1
+ %half = load i16 , i16 *%ptr
%ext = sext i16 %half to i64
ret i64 %ext
}
@@ -72,8 +72,8 @@ define i64 @f7(i16 *%src) {
; CHECK-LABEL: f7:
; CHECK: lgh %r2, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -262144
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -262144
+ %half = load i16 , i16 *%ptr
%ext = sext i16 %half to i64
ret i64 %ext
}
@@ -85,8 +85,8 @@ define i64 @f8(i16 *%src) {
; CHECK: agfi %r2, -524290
; CHECK: lgh %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -262145
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -262145
+ %half = load i16 , i16 *%ptr
%ext = sext i16 %half to i64
ret i64 %ext
}
@@ -99,7 +99,7 @@ define i64 @f9(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i16 *
- %half = load i16 *%ptr
+ %half = load i16 , i16 *%ptr
%ext = sext i16 %half to i64
ret i64 %ext
}
@@ -110,22 +110,22 @@ define void @f10(i64 *%ptr) {
; CHECK-LABEL: f10:
; CHECK: lgh {{%r[0-9]+}}, 166(%r15)
; CHECK: br %r14
- %val0 = load volatile i64 *%ptr
- %val1 = load volatile i64 *%ptr
- %val2 = load volatile i64 *%ptr
- %val3 = load volatile i64 *%ptr
- %val4 = load volatile i64 *%ptr
- %val5 = load volatile i64 *%ptr
- %val6 = load volatile i64 *%ptr
- %val7 = load volatile i64 *%ptr
- %val8 = load volatile i64 *%ptr
- %val9 = load volatile i64 *%ptr
- %val10 = load volatile i64 *%ptr
- %val11 = load volatile i64 *%ptr
- %val12 = load volatile i64 *%ptr
- %val13 = load volatile i64 *%ptr
- %val14 = load volatile i64 *%ptr
- %val15 = load volatile i64 *%ptr
+ %val0 = load volatile i64 , i64 *%ptr
+ %val1 = load volatile i64 , i64 *%ptr
+ %val2 = load volatile i64 , i64 *%ptr
+ %val3 = load volatile i64 , i64 *%ptr
+ %val4 = load volatile i64 , i64 *%ptr
+ %val5 = load volatile i64 , i64 *%ptr
+ %val6 = load volatile i64 , i64 *%ptr
+ %val7 = load volatile i64 , i64 *%ptr
+ %val8 = load volatile i64 , i64 *%ptr
+ %val9 = load volatile i64 , i64 *%ptr
+ %val10 = load volatile i64 , i64 *%ptr
+ %val11 = load volatile i64 , i64 *%ptr
+ %val12 = load volatile i64 , i64 *%ptr
+ %val13 = load volatile i64 , i64 *%ptr
+ %val14 = load volatile i64 , i64 *%ptr
+ %val15 = load volatile i64 , i64 *%ptr
%trunc0 = trunc i64 %val0 to i16
%trunc1 = trunc i64 %val1 to i16
diff --git a/test/CodeGen/SystemZ/int-conv-08.ll b/test/CodeGen/SystemZ/int-conv-08.ll
index 6b6cb672fb9a..8524dfebe277 100644
--- a/test/CodeGen/SystemZ/int-conv-08.ll
+++ b/test/CodeGen/SystemZ/int-conv-08.ll
@@ -36,7 +36,7 @@ define i64 @f4(i16 *%src) {
; CHECK-LABEL: f4:
; CHECK: llgh %r2, 0(%r2)
; CHECK: br %r14
- %half = load i16 *%src
+ %half = load i16 , i16 *%src
%ext = zext i16 %half to i64
ret i64 %ext
}
@@ -46,8 +46,8 @@ define i64 @f5(i16 *%src) {
; CHECK-LABEL: f5:
; CHECK: llgh %r2, 524286(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 262143
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 262143
+ %half = load i16 , i16 *%ptr
%ext = zext i16 %half to i64
ret i64 %ext
}
@@ -59,8 +59,8 @@ define i64 @f6(i16 *%src) {
; CHECK: agfi %r2, 524288
; CHECK: llgh %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 262144
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 262144
+ %half = load i16 , i16 *%ptr
%ext = zext i16 %half to i64
ret i64 %ext
}
@@ -70,8 +70,8 @@ define i64 @f7(i16 *%src) {
; CHECK-LABEL: f7:
; CHECK: llgh %r2, -2(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -1
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -1
+ %half = load i16 , i16 *%ptr
%ext = zext i16 %half to i64
ret i64 %ext
}
@@ -81,8 +81,8 @@ define i64 @f8(i16 *%src) {
; CHECK-LABEL: f8:
; CHECK: llgh %r2, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -262144
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -262144
+ %half = load i16 , i16 *%ptr
%ext = zext i16 %half to i64
ret i64 %ext
}
@@ -94,8 +94,8 @@ define i64 @f9(i16 *%src) {
; CHECK: agfi %r2, -524290
; CHECK: llgh %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -262145
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -262145
+ %half = load i16 , i16 *%ptr
%ext = zext i16 %half to i64
ret i64 %ext
}
@@ -108,7 +108,7 @@ define i64 @f10(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i16 *
- %half = load i16 *%ptr
+ %half = load i16 , i16 *%ptr
%ext = zext i16 %half to i64
ret i64 %ext
}
@@ -119,22 +119,22 @@ define void @f11(i64 *%ptr) {
; CHECK-LABEL: f11:
; CHECK: llgh {{%r[0-9]+}}, 166(%r15)
; CHECK: br %r14
- %val0 = load volatile i64 *%ptr
- %val1 = load volatile i64 *%ptr
- %val2 = load volatile i64 *%ptr
- %val3 = load volatile i64 *%ptr
- %val4 = load volatile i64 *%ptr
- %val5 = load volatile i64 *%ptr
- %val6 = load volatile i64 *%ptr
- %val7 = load volatile i64 *%ptr
- %val8 = load volatile i64 *%ptr
- %val9 = load volatile i64 *%ptr
- %val10 = load volatile i64 *%ptr
- %val11 = load volatile i64 *%ptr
- %val12 = load volatile i64 *%ptr
- %val13 = load volatile i64 *%ptr
- %val14 = load volatile i64 *%ptr
- %val15 = load volatile i64 *%ptr
+ %val0 = load volatile i64 , i64 *%ptr
+ %val1 = load volatile i64 , i64 *%ptr
+ %val2 = load volatile i64 , i64 *%ptr
+ %val3 = load volatile i64 , i64 *%ptr
+ %val4 = load volatile i64 , i64 *%ptr
+ %val5 = load volatile i64 , i64 *%ptr
+ %val6 = load volatile i64 , i64 *%ptr
+ %val7 = load volatile i64 , i64 *%ptr
+ %val8 = load volatile i64 , i64 *%ptr
+ %val9 = load volatile i64 , i64 *%ptr
+ %val10 = load volatile i64 , i64 *%ptr
+ %val11 = load volatile i64 , i64 *%ptr
+ %val12 = load volatile i64 , i64 *%ptr
+ %val13 = load volatile i64 , i64 *%ptr
+ %val14 = load volatile i64 , i64 *%ptr
+ %val15 = load volatile i64 , i64 *%ptr
%trunc0 = trunc i64 %val0 to i16
%trunc1 = trunc i64 %val1 to i16
diff --git a/test/CodeGen/SystemZ/int-conv-09.ll b/test/CodeGen/SystemZ/int-conv-09.ll
index b9c508917d4d..ffd20491030a 100644
--- a/test/CodeGen/SystemZ/int-conv-09.ll
+++ b/test/CodeGen/SystemZ/int-conv-09.ll
@@ -26,7 +26,7 @@ define i64 @f3(i32 *%src) {
; CHECK-LABEL: f3:
; CHECK: lgf %r2, 0(%r2)
; CHECK: br %r14
- %word = load i32 *%src
+ %word = load i32 , i32 *%src
%ext = sext i32 %word to i64
ret i64 %ext
}
@@ -36,8 +36,8 @@ define i64 @f4(i32 *%src) {
; CHECK-LABEL: f4:
; CHECK: lgf %r2, 524284(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
- %word = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131071
+ %word = load i32 , i32 *%ptr
%ext = sext i32 %word to i64
ret i64 %ext
}
@@ -49,8 +49,8 @@ define i64 @f5(i32 *%src) {
; CHECK: agfi %r2, 524288
; CHECK: lgf %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
- %word = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131072
+ %word = load i32 , i32 *%ptr
%ext = sext i32 %word to i64
ret i64 %ext
}
@@ -60,8 +60,8 @@ define i64 @f6(i32 *%src) {
; CHECK-LABEL: f6:
; CHECK: lgf %r2, -4(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
- %word = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -1
+ %word = load i32 , i32 *%ptr
%ext = sext i32 %word to i64
ret i64 %ext
}
@@ -71,8 +71,8 @@ define i64 @f7(i32 *%src) {
; CHECK-LABEL: f7:
; CHECK: lgf %r2, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
- %word = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
+ %word = load i32 , i32 *%ptr
%ext = sext i32 %word to i64
ret i64 %ext
}
@@ -84,8 +84,8 @@ define i64 @f8(i32 *%src) {
; CHECK: agfi %r2, -524292
; CHECK: lgf %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
- %word = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
+ %word = load i32 , i32 *%ptr
%ext = sext i32 %word to i64
ret i64 %ext
}
@@ -98,7 +98,7 @@ define i64 @f9(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i32 *
- %word = load i32 *%ptr
+ %word = load i32 , i32 *%ptr
%ext = sext i32 %word to i64
ret i64 %ext
}
diff --git a/test/CodeGen/SystemZ/int-conv-10.ll b/test/CodeGen/SystemZ/int-conv-10.ll
index 781c74c7fa23..a5e74061017e 100644
--- a/test/CodeGen/SystemZ/int-conv-10.ll
+++ b/test/CodeGen/SystemZ/int-conv-10.ll
@@ -35,7 +35,7 @@ define i64 @f4(i32 *%src) {
; CHECK-LABEL: f4:
; CHECK: llgf %r2, 0(%r2)
; CHECK: br %r14
- %word = load i32 *%src
+ %word = load i32 , i32 *%src
%ext = zext i32 %word to i64
ret i64 %ext
}
@@ -45,8 +45,8 @@ define i64 @f5(i32 *%src) {
; CHECK-LABEL: f5:
; CHECK: llgf %r2, 524284(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
- %word = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131071
+ %word = load i32 , i32 *%ptr
%ext = zext i32 %word to i64
ret i64 %ext
}
@@ -58,8 +58,8 @@ define i64 @f6(i32 *%src) {
; CHECK: agfi %r2, 524288
; CHECK: llgf %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
- %word = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131072
+ %word = load i32 , i32 *%ptr
%ext = zext i32 %word to i64
ret i64 %ext
}
@@ -69,8 +69,8 @@ define i64 @f7(i32 *%src) {
; CHECK-LABEL: f7:
; CHECK: llgf %r2, -4(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
- %word = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -1
+ %word = load i32 , i32 *%ptr
%ext = zext i32 %word to i64
ret i64 %ext
}
@@ -80,8 +80,8 @@ define i64 @f8(i32 *%src) {
; CHECK-LABEL: f8:
; CHECK: llgf %r2, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
- %word = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
+ %word = load i32 , i32 *%ptr
%ext = zext i32 %word to i64
ret i64 %ext
}
@@ -93,8 +93,8 @@ define i64 @f9(i32 *%src) {
; CHECK: agfi %r2, -524292
; CHECK: llgf %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
- %word = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
+ %word = load i32 , i32 *%ptr
%ext = zext i32 %word to i64
ret i64 %ext
}
@@ -107,7 +107,7 @@ define i64 @f10(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i32 *
- %word = load i32 *%ptr
+ %word = load i32 , i32 *%ptr
%ext = zext i32 %word to i64
ret i64 %ext
}
diff --git a/test/CodeGen/SystemZ/int-conv-11.ll b/test/CodeGen/SystemZ/int-conv-11.ll
index 30769621bf82..cfa0870cd5d2 100644
--- a/test/CodeGen/SystemZ/int-conv-11.ll
+++ b/test/CodeGen/SystemZ/int-conv-11.ll
@@ -8,38 +8,38 @@ define void @f1(i32 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: llc{{h?}} {{%r[0-9]+}}, 16{{[37]}}(%r15)
; CHECK: br %r14
- %val0 = load volatile i32 *%ptr
- %val1 = load volatile i32 *%ptr
- %val2 = load volatile i32 *%ptr
- %val3 = load volatile i32 *%ptr
- %val4 = load volatile i32 *%ptr
- %val5 = load volatile i32 *%ptr
- %val6 = load volatile i32 *%ptr
- %val7 = load volatile i32 *%ptr
- %val8 = load volatile i32 *%ptr
- %val9 = load volatile i32 *%ptr
- %val10 = load volatile i32 *%ptr
- %val11 = load volatile i32 *%ptr
- %val12 = load volatile i32 *%ptr
- %val13 = load volatile i32 *%ptr
- %val14 = load volatile i32 *%ptr
- %val15 = load volatile i32 *%ptr
- %val16 = load volatile i32 *%ptr
- %val17 = load volatile i32 *%ptr
- %val18 = load volatile i32 *%ptr
- %val19 = load volatile i32 *%ptr
- %val20 = load volatile i32 *%ptr
- %val21 = load volatile i32 *%ptr
- %val22 = load volatile i32 *%ptr
- %val23 = load volatile i32 *%ptr
- %val24 = load volatile i32 *%ptr
- %val25 = load volatile i32 *%ptr
- %val26 = load volatile i32 *%ptr
- %val27 = load volatile i32 *%ptr
- %val28 = load volatile i32 *%ptr
- %val29 = load volatile i32 *%ptr
- %val30 = load volatile i32 *%ptr
- %val31 = load volatile i32 *%ptr
+ %val0 = load volatile i32 , i32 *%ptr
+ %val1 = load volatile i32 , i32 *%ptr
+ %val2 = load volatile i32 , i32 *%ptr
+ %val3 = load volatile i32 , i32 *%ptr
+ %val4 = load volatile i32 , i32 *%ptr
+ %val5 = load volatile i32 , i32 *%ptr
+ %val6 = load volatile i32 , i32 *%ptr
+ %val7 = load volatile i32 , i32 *%ptr
+ %val8 = load volatile i32 , i32 *%ptr
+ %val9 = load volatile i32 , i32 *%ptr
+ %val10 = load volatile i32 , i32 *%ptr
+ %val11 = load volatile i32 , i32 *%ptr
+ %val12 = load volatile i32 , i32 *%ptr
+ %val13 = load volatile i32 , i32 *%ptr
+ %val14 = load volatile i32 , i32 *%ptr
+ %val15 = load volatile i32 , i32 *%ptr
+ %val16 = load volatile i32 , i32 *%ptr
+ %val17 = load volatile i32 , i32 *%ptr
+ %val18 = load volatile i32 , i32 *%ptr
+ %val19 = load volatile i32 , i32 *%ptr
+ %val20 = load volatile i32 , i32 *%ptr
+ %val21 = load volatile i32 , i32 *%ptr
+ %val22 = load volatile i32 , i32 *%ptr
+ %val23 = load volatile i32 , i32 *%ptr
+ %val24 = load volatile i32 , i32 *%ptr
+ %val25 = load volatile i32 , i32 *%ptr
+ %val26 = load volatile i32 , i32 *%ptr
+ %val27 = load volatile i32 , i32 *%ptr
+ %val28 = load volatile i32 , i32 *%ptr
+ %val29 = load volatile i32 , i32 *%ptr
+ %val30 = load volatile i32 , i32 *%ptr
+ %val31 = load volatile i32 , i32 *%ptr
%trunc0 = trunc i32 %val0 to i8
%trunc1 = trunc i32 %val1 to i8
@@ -181,38 +181,38 @@ define void @f2(i32 *%ptr) {
; CHECK-LABEL: f2:
; CHECK: llh{{h?}} {{%r[0-9]+}}, 16{{[26]}}(%r15)
; CHECK: br %r14
- %val0 = load volatile i32 *%ptr
- %val1 = load volatile i32 *%ptr
- %val2 = load volatile i32 *%ptr
- %val3 = load volatile i32 *%ptr
- %val4 = load volatile i32 *%ptr
- %val5 = load volatile i32 *%ptr
- %val6 = load volatile i32 *%ptr
- %val7 = load volatile i32 *%ptr
- %val8 = load volatile i32 *%ptr
- %val9 = load volatile i32 *%ptr
- %val10 = load volatile i32 *%ptr
- %val11 = load volatile i32 *%ptr
- %val12 = load volatile i32 *%ptr
- %val13 = load volatile i32 *%ptr
- %val14 = load volatile i32 *%ptr
- %val15 = load volatile i32 *%ptr
- %val16 = load volatile i32 *%ptr
- %val17 = load volatile i32 *%ptr
- %val18 = load volatile i32 *%ptr
- %val19 = load volatile i32 *%ptr
- %val20 = load volatile i32 *%ptr
- %val21 = load volatile i32 *%ptr
- %val22 = load volatile i32 *%ptr
- %val23 = load volatile i32 *%ptr
- %val24 = load volatile i32 *%ptr
- %val25 = load volatile i32 *%ptr
- %val26 = load volatile i32 *%ptr
- %val27 = load volatile i32 *%ptr
- %val28 = load volatile i32 *%ptr
- %val29 = load volatile i32 *%ptr
- %val30 = load volatile i32 *%ptr
- %val31 = load volatile i32 *%ptr
+ %val0 = load volatile i32 , i32 *%ptr
+ %val1 = load volatile i32 , i32 *%ptr
+ %val2 = load volatile i32 , i32 *%ptr
+ %val3 = load volatile i32 , i32 *%ptr
+ %val4 = load volatile i32 , i32 *%ptr
+ %val5 = load volatile i32 , i32 *%ptr
+ %val6 = load volatile i32 , i32 *%ptr
+ %val7 = load volatile i32 , i32 *%ptr
+ %val8 = load volatile i32 , i32 *%ptr
+ %val9 = load volatile i32 , i32 *%ptr
+ %val10 = load volatile i32 , i32 *%ptr
+ %val11 = load volatile i32 , i32 *%ptr
+ %val12 = load volatile i32 , i32 *%ptr
+ %val13 = load volatile i32 , i32 *%ptr
+ %val14 = load volatile i32 , i32 *%ptr
+ %val15 = load volatile i32 , i32 *%ptr
+ %val16 = load volatile i32 , i32 *%ptr
+ %val17 = load volatile i32 , i32 *%ptr
+ %val18 = load volatile i32 , i32 *%ptr
+ %val19 = load volatile i32 , i32 *%ptr
+ %val20 = load volatile i32 , i32 *%ptr
+ %val21 = load volatile i32 , i32 *%ptr
+ %val22 = load volatile i32 , i32 *%ptr
+ %val23 = load volatile i32 , i32 *%ptr
+ %val24 = load volatile i32 , i32 *%ptr
+ %val25 = load volatile i32 , i32 *%ptr
+ %val26 = load volatile i32 , i32 *%ptr
+ %val27 = load volatile i32 , i32 *%ptr
+ %val28 = load volatile i32 , i32 *%ptr
+ %val29 = load volatile i32 , i32 *%ptr
+ %val30 = load volatile i32 , i32 *%ptr
+ %val31 = load volatile i32 , i32 *%ptr
%trunc0 = trunc i32 %val0 to i16
%trunc1 = trunc i32 %val1 to i16
diff --git a/test/CodeGen/SystemZ/int-div-01.ll b/test/CodeGen/SystemZ/int-div-01.ll
index 2c21186e3369..1442109dc23c 100644
--- a/test/CodeGen/SystemZ/int-div-01.ll
+++ b/test/CodeGen/SystemZ/int-div-01.ll
@@ -69,7 +69,7 @@ define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
; CHECK-NOT: dsgfr
; CHECK: or %r2, %r3
; CHECK: br %r14
- %a = load i32 *%src
+ %a = load i32 , i32 *%src
%div = sdiv i32 %a, %b
%rem = srem i32 %a, %b
%or = or i32 %rem, %div
@@ -83,7 +83,7 @@ define void @f6(i32 *%dest, i32 %a, i32 *%src) {
; CHECK: dsgf %r0, 0(%r4)
; CHECK: st %r1, 0(%r2)
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%div = sdiv i32 %a, %b
store i32 %div, i32 *%dest
ret void
@@ -96,7 +96,7 @@ define void @f7(i32 *%dest, i32 %a, i32 *%src) {
; CHECK: dsgf %r0, 0(%r4)
; CHECK: st %r0, 0(%r2)
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%rem = srem i32 %a, %b
store i32 %rem, i32 *%dest
ret void
@@ -112,7 +112,7 @@ define i32 @f8(i32 %dummy, i32 %a, i32 *%src) {
; CHECK-NOT: {{dsgf|dsgfr}}
; CHECK: or %r2, %r3
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%div = sdiv i32 %a, %b
%rem = srem i32 %a, %b
%or = or i32 %rem, %div
@@ -124,8 +124,8 @@ define i32 @f9(i32 %dummy, i32 %a, i32 *%src) {
; CHECK-LABEL: f9:
; CHECK: dsgf %r2, 524284(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131071
+ %b = load i32 , i32 *%ptr
%rem = srem i32 %a, %b
ret i32 %rem
}
@@ -137,8 +137,8 @@ define i32 @f10(i32 %dummy, i32 %a, i32 *%src) {
; CHECK: agfi %r4, 524288
; CHECK: dsgf %r2, 0(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131072
+ %b = load i32 , i32 *%ptr
%rem = srem i32 %a, %b
ret i32 %rem
}
@@ -148,8 +148,8 @@ define i32 @f11(i32 %dummy, i32 %a, i32 *%src) {
; CHECK-LABEL: f11:
; CHECK: dsgf %r2, -4(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -1
+ %b = load i32 , i32 *%ptr
%rem = srem i32 %a, %b
ret i32 %rem
}
@@ -159,8 +159,8 @@ define i32 @f12(i32 %dummy, i32 %a, i32 *%src) {
; CHECK-LABEL: f12:
; CHECK: dsgf %r2, -524288(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
+ %b = load i32 , i32 *%ptr
%rem = srem i32 %a, %b
ret i32 %rem
}
@@ -172,8 +172,8 @@ define i32 @f13(i32 %dummy, i32 %a, i32 *%src) {
; CHECK: agfi %r4, -524292
; CHECK: dsgf %r2, 0(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
+ %b = load i32 , i32 *%ptr
%rem = srem i32 %a, %b
ret i32 %rem
}
@@ -186,7 +186,7 @@ define i32 @f14(i32 %dummy, i32 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%rem = srem i32 %a, %b
ret i32 %rem
}
@@ -200,7 +200,7 @@ define void @f15(i32 *%dest, i32 *%src) {
; CHECK: lgfr %r1, %r2
; CHECK: dsgfr %r0, [[B]]
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%a = call i32 @foo()
%div = sdiv i32 %a, %b
store i32 %div, i32 *%dest
@@ -213,26 +213,26 @@ define i32 @f16(i32 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: dsgf {{%r[0-9]+}}, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i32 *%ptr0, i64 2
- %ptr2 = getelementptr i32 *%ptr0, i64 4
- %ptr3 = getelementptr i32 *%ptr0, i64 6
- %ptr4 = getelementptr i32 *%ptr0, i64 8
- %ptr5 = getelementptr i32 *%ptr0, i64 10
- %ptr6 = getelementptr i32 *%ptr0, i64 12
- %ptr7 = getelementptr i32 *%ptr0, i64 14
- %ptr8 = getelementptr i32 *%ptr0, i64 16
- %ptr9 = getelementptr i32 *%ptr0, i64 18
-
- %val0 = load i32 *%ptr0
- %val1 = load i32 *%ptr1
- %val2 = load i32 *%ptr2
- %val3 = load i32 *%ptr3
- %val4 = load i32 *%ptr4
- %val5 = load i32 *%ptr5
- %val6 = load i32 *%ptr6
- %val7 = load i32 *%ptr7
- %val8 = load i32 *%ptr8
- %val9 = load i32 *%ptr9
+ %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
+ %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
+ %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
+ %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
+ %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
+ %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
+ %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
+ %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
+ %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
+
+ %val0 = load i32 , i32 *%ptr0
+ %val1 = load i32 , i32 *%ptr1
+ %val2 = load i32 , i32 *%ptr2
+ %val3 = load i32 , i32 *%ptr3
+ %val4 = load i32 , i32 *%ptr4
+ %val5 = load i32 , i32 *%ptr5
+ %val6 = load i32 , i32 *%ptr6
+ %val7 = load i32 , i32 *%ptr7
+ %val8 = load i32 , i32 *%ptr8
+ %val9 = load i32 , i32 *%ptr9
%ret = call i32 @foo()
diff --git a/test/CodeGen/SystemZ/int-div-02.ll b/test/CodeGen/SystemZ/int-div-02.ll
index f3287a56c6cd..1a4b4d95c936 100644
--- a/test/CodeGen/SystemZ/int-div-02.ll
+++ b/test/CodeGen/SystemZ/int-div-02.ll
@@ -57,7 +57,7 @@ define void @f4(i32 %dummy, i32 %a, i32 *%src, i32 *%dest) {
; CHECK: dl %r2, 0(%r4)
; CHECK: st %r3, 0(%r5)
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%div = udiv i32 %a, %b
store i32 %div, i32 *%dest
ret void
@@ -72,7 +72,7 @@ define void @f5(i32 %dummy, i32 %a, i32 *%src, i32 *%dest) {
; CHECK: dl %r2, 0(%r4)
; CHECK: st %r2, 0(%r5)
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%rem = urem i32 %a, %b
store i32 %rem, i32 *%dest
ret void
@@ -88,7 +88,7 @@ define i32 @f6(i32 %dummy, i32 %a, i32 *%src) {
; CHECK-NOT: {{dl|dlr}}
; CHECK: or %r2, %r3
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%div = udiv i32 %a, %b
%rem = urem i32 %a, %b
%or = or i32 %rem, %div
@@ -100,8 +100,8 @@ define i32 @f7(i32 %dummy, i32 %a, i32 *%src) {
; CHECK-LABEL: f7:
; CHECK: dl %r2, 524284(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131071
+ %b = load i32 , i32 *%ptr
%rem = urem i32 %a, %b
ret i32 %rem
}
@@ -113,8 +113,8 @@ define i32 @f8(i32 %dummy, i32 %a, i32 *%src) {
; CHECK: agfi %r4, 524288
; CHECK: dl %r2, 0(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131072
+ %b = load i32 , i32 *%ptr
%rem = urem i32 %a, %b
ret i32 %rem
}
@@ -124,8 +124,8 @@ define i32 @f9(i32 %dummy, i32 %a, i32 *%src) {
; CHECK-LABEL: f9:
; CHECK: dl %r2, -4(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -1
+ %b = load i32 , i32 *%ptr
%rem = urem i32 %a, %b
ret i32 %rem
}
@@ -135,8 +135,8 @@ define i32 @f10(i32 %dummy, i32 %a, i32 *%src) {
; CHECK-LABEL: f10:
; CHECK: dl %r2, -524288(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
+ %b = load i32 , i32 *%ptr
%rem = urem i32 %a, %b
ret i32 %rem
}
@@ -148,8 +148,8 @@ define i32 @f11(i32 %dummy, i32 %a, i32 *%src) {
; CHECK: agfi %r4, -524292
; CHECK: dl %r2, 0(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
+ %b = load i32 , i32 *%ptr
%rem = urem i32 %a, %b
ret i32 %rem
}
@@ -162,7 +162,7 @@ define i32 @f12(i32 %dummy, i32 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%rem = urem i32 %a, %b
ret i32 %rem
}
@@ -173,26 +173,26 @@ define i32 @f13(i32 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: dl {{%r[0-9]+}}, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i32 *%ptr0, i64 2
- %ptr2 = getelementptr i32 *%ptr0, i64 4
- %ptr3 = getelementptr i32 *%ptr0, i64 6
- %ptr4 = getelementptr i32 *%ptr0, i64 8
- %ptr5 = getelementptr i32 *%ptr0, i64 10
- %ptr6 = getelementptr i32 *%ptr0, i64 12
- %ptr7 = getelementptr i32 *%ptr0, i64 14
- %ptr8 = getelementptr i32 *%ptr0, i64 16
- %ptr9 = getelementptr i32 *%ptr0, i64 18
-
- %val0 = load i32 *%ptr0
- %val1 = load i32 *%ptr1
- %val2 = load i32 *%ptr2
- %val3 = load i32 *%ptr3
- %val4 = load i32 *%ptr4
- %val5 = load i32 *%ptr5
- %val6 = load i32 *%ptr6
- %val7 = load i32 *%ptr7
- %val8 = load i32 *%ptr8
- %val9 = load i32 *%ptr9
+ %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
+ %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
+ %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
+ %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
+ %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
+ %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
+ %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
+ %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
+ %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
+
+ %val0 = load i32 , i32 *%ptr0
+ %val1 = load i32 , i32 *%ptr1
+ %val2 = load i32 , i32 *%ptr2
+ %val3 = load i32 , i32 *%ptr3
+ %val4 = load i32 , i32 *%ptr4
+ %val5 = load i32 , i32 *%ptr5
+ %val6 = load i32 , i32 *%ptr6
+ %val7 = load i32 , i32 *%ptr7
+ %val8 = load i32 , i32 *%ptr8
+ %val9 = load i32 , i32 *%ptr9
%ret = call i32 @foo()
diff --git a/test/CodeGen/SystemZ/int-div-03.ll b/test/CodeGen/SystemZ/int-div-03.ll
index 7c0409018f16..37a7c4f748c9 100644
--- a/test/CodeGen/SystemZ/int-div-03.ll
+++ b/test/CodeGen/SystemZ/int-div-03.ll
@@ -75,7 +75,7 @@ define void @f6(i64 %dummy, i64 %a, i32 *%src, i64 *%dest) {
; CHECK: dsgf %r2, 0(%r4)
; CHECK: stg %r3, 0(%r5)
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%bext = sext i32 %b to i64
%div = sdiv i64 %a, %bext
store i64 %div, i64 *%dest
@@ -89,7 +89,7 @@ define void @f7(i64 %dummy, i64 %a, i32 *%src, i64 *%dest) {
; CHECK: dsgf %r2, 0(%r4)
; CHECK: stg %r2, 0(%r5)
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%bext = sext i32 %b to i64
%rem = srem i64 %a, %bext
store i64 %rem, i64 *%dest
@@ -104,7 +104,7 @@ define i64 @f8(i64 %dummy, i64 %a, i32 *%src) {
; CHECK-NOT: {{dsgf|dsgfr}}
; CHECK: ogr %r2, %r3
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%bext = sext i32 %b to i64
%div = sdiv i64 %a, %bext
%rem = srem i64 %a, %bext
@@ -117,8 +117,8 @@ define i64 @f9(i64 %dummy, i64 %a, i32 *%src) {
; CHECK-LABEL: f9:
; CHECK: dsgf %r2, 524284(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131071
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%rem = srem i64 %a, %bext
ret i64 %rem
@@ -131,8 +131,8 @@ define i64 @f10(i64 %dummy, i64 %a, i32 *%src) {
; CHECK: agfi %r4, 524288
; CHECK: dsgf %r2, 0(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131072
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%rem = srem i64 %a, %bext
ret i64 %rem
@@ -143,8 +143,8 @@ define i64 @f11(i64 %dummy, i64 %a, i32 *%src) {
; CHECK-LABEL: f11:
; CHECK: dsgf %r2, -4(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -1
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%rem = srem i64 %a, %bext
ret i64 %rem
@@ -155,8 +155,8 @@ define i64 @f12(i64 %dummy, i64 %a, i32 *%src) {
; CHECK-LABEL: f12:
; CHECK: dsgf %r2, -524288(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%rem = srem i64 %a, %bext
ret i64 %rem
@@ -169,8 +169,8 @@ define i64 @f13(i64 %dummy, i64 %a, i32 *%src) {
; CHECK: agfi %r4, -524292
; CHECK: dsgf %r2, 0(%r4)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%rem = srem i64 %a, %bext
ret i64 %rem
@@ -184,7 +184,7 @@ define i64 @f14(i64 %dummy, i64 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%rem = srem i64 %a, %bext
ret i64 %rem
@@ -199,7 +199,7 @@ define void @f15(i64 *%dest, i32 *%src) {
; CHECK: lgr %r1, %r2
; CHECK: dsgfr %r0, [[B]]
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%a = call i64 @foo()
%ext = sext i32 %b to i64
%div = sdiv i64 %a, %ext
diff --git a/test/CodeGen/SystemZ/int-div-04.ll b/test/CodeGen/SystemZ/int-div-04.ll
index 87f1e105f6a4..e8c6f3e03c6a 100644
--- a/test/CodeGen/SystemZ/int-div-04.ll
+++ b/test/CodeGen/SystemZ/int-div-04.ll
@@ -49,7 +49,7 @@ define void @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) {
; CHECK: dsg %r2, 0(%r4)
; CHECK: stg %r3, 0(%r5)
; CHECK: br %r14
- %b = load i64 *%src
+ %b = load i64 , i64 *%src
%div = sdiv i64 %a, %b
store i64 %div, i64 *%dest
ret void
@@ -62,7 +62,7 @@ define void @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) {
; CHECK: dsg %r2, 0(%r4)
; CHECK: stg %r2, 0(%r5)
; CHECK: br %r14
- %b = load i64 *%src
+ %b = load i64 , i64 *%src
%rem = srem i64 %a, %b
store i64 %rem, i64 *%dest
ret void
@@ -76,7 +76,7 @@ define i64 @f6(i64 %dummy, i64 %a, i64 *%src) {
; CHECK-NOT: {{dsg|dsgr}}
; CHECK: ogr %r2, %r3
; CHECK: br %r14
- %b = load i64 *%src
+ %b = load i64 , i64 *%src
%div = sdiv i64 %a, %b
%rem = srem i64 %a, %b
%or = or i64 %rem, %div
@@ -88,8 +88,8 @@ define i64 @f7(i64 %dummy, i64 %a, i64 *%src) {
; CHECK-LABEL: f7:
; CHECK: dsg %r2, 524280(%r4)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65535
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65535
+ %b = load i64 , i64 *%ptr
%rem = srem i64 %a, %b
ret i64 %rem
}
@@ -101,8 +101,8 @@ define i64 @f8(i64 %dummy, i64 %a, i64 *%src) {
; CHECK: agfi %r4, 524288
; CHECK: dsg %r2, 0(%r4)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65536
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65536
+ %b = load i64 , i64 *%ptr
%rem = srem i64 %a, %b
ret i64 %rem
}
@@ -112,8 +112,8 @@ define i64 @f9(i64 %dummy, i64 %a, i64 *%src) {
; CHECK-LABEL: f9:
; CHECK: dsg %r2, -8(%r4)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -1
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -1
+ %b = load i64 , i64 *%ptr
%rem = srem i64 %a, %b
ret i64 %rem
}
@@ -123,8 +123,8 @@ define i64 @f10(i64 %dummy, i64 %a, i64 *%src) {
; CHECK-LABEL: f10:
; CHECK: dsg %r2, -524288(%r4)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65536
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65536
+ %b = load i64 , i64 *%ptr
%rem = srem i64 %a, %b
ret i64 %rem
}
@@ -136,8 +136,8 @@ define i64 @f11(i64 %dummy, i64 %a, i64 *%src) {
; CHECK: agfi %r4, -524296
; CHECK: dsg %r2, 0(%r4)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65537
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65537
+ %b = load i64 , i64 *%ptr
%rem = srem i64 %a, %b
ret i64 %rem
}
@@ -150,7 +150,7 @@ define i64 @f12(i64 %dummy, i64 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i64 *
- %b = load i64 *%ptr
+ %b = load i64 , i64 *%ptr
%rem = srem i64 %a, %b
ret i64 %rem
}
@@ -161,28 +161,28 @@ define i64 @f13(i64 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: dsg {{%r[0-9]+}}, 160(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i64 *%ptr0, i64 2
- %ptr2 = getelementptr i64 *%ptr0, i64 4
- %ptr3 = getelementptr i64 *%ptr0, i64 6
- %ptr4 = getelementptr i64 *%ptr0, i64 8
- %ptr5 = getelementptr i64 *%ptr0, i64 10
- %ptr6 = getelementptr i64 *%ptr0, i64 12
- %ptr7 = getelementptr i64 *%ptr0, i64 14
- %ptr8 = getelementptr i64 *%ptr0, i64 16
- %ptr9 = getelementptr i64 *%ptr0, i64 18
- %ptr10 = getelementptr i64 *%ptr0, i64 20
-
- %val0 = load i64 *%ptr0
- %val1 = load i64 *%ptr1
- %val2 = load i64 *%ptr2
- %val3 = load i64 *%ptr3
- %val4 = load i64 *%ptr4
- %val5 = load i64 *%ptr5
- %val6 = load i64 *%ptr6
- %val7 = load i64 *%ptr7
- %val8 = load i64 *%ptr8
- %val9 = load i64 *%ptr9
- %val10 = load i64 *%ptr10
+ %ptr1 = getelementptr i64, i64 *%ptr0, i64 2
+ %ptr2 = getelementptr i64, i64 *%ptr0, i64 4
+ %ptr3 = getelementptr i64, i64 *%ptr0, i64 6
+ %ptr4 = getelementptr i64, i64 *%ptr0, i64 8
+ %ptr5 = getelementptr i64, i64 *%ptr0, i64 10
+ %ptr6 = getelementptr i64, i64 *%ptr0, i64 12
+ %ptr7 = getelementptr i64, i64 *%ptr0, i64 14
+ %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
+ %ptr9 = getelementptr i64, i64 *%ptr0, i64 18
+ %ptr10 = getelementptr i64, i64 *%ptr0, i64 20
+
+ %val0 = load i64 , i64 *%ptr0
+ %val1 = load i64 , i64 *%ptr1
+ %val2 = load i64 , i64 *%ptr2
+ %val3 = load i64 , i64 *%ptr3
+ %val4 = load i64 , i64 *%ptr4
+ %val5 = load i64 , i64 *%ptr5
+ %val6 = load i64 , i64 *%ptr6
+ %val7 = load i64 , i64 *%ptr7
+ %val8 = load i64 , i64 *%ptr8
+ %val9 = load i64 , i64 *%ptr9
+ %val10 = load i64 , i64 *%ptr10
%ret = call i64 @foo()
diff --git a/test/CodeGen/SystemZ/int-div-05.ll b/test/CodeGen/SystemZ/int-div-05.ll
index 817983005a9d..f80a139238ea 100644
--- a/test/CodeGen/SystemZ/int-div-05.ll
+++ b/test/CodeGen/SystemZ/int-div-05.ll
@@ -57,7 +57,7 @@ define void @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) {
; CHECK: dlg %r2, 0(%r4)
; CHECK: stg %r3, 0(%r5)
; CHECK: br %r14
- %b = load i64 *%src
+ %b = load i64 , i64 *%src
%div = udiv i64 %a, %b
store i64 %div, i64 *%dest
ret void
@@ -72,7 +72,7 @@ define void @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) {
; CHECK: dlg %r2, 0(%r4)
; CHECK: stg %r2, 0(%r5)
; CHECK: br %r14
- %b = load i64 *%src
+ %b = load i64 , i64 *%src
%rem = urem i64 %a, %b
store i64 %rem, i64 *%dest
ret void
@@ -88,7 +88,7 @@ define i64 @f6(i64 %dummy, i64 %a, i64 *%src) {
; CHECK-NOT: {{dlg|dlgr}}
; CHECK: ogr %r2, %r3
; CHECK: br %r14
- %b = load i64 *%src
+ %b = load i64 , i64 *%src
%div = udiv i64 %a, %b
%rem = urem i64 %a, %b
%or = or i64 %rem, %div
@@ -100,8 +100,8 @@ define i64 @f7(i64 %dummy, i64 %a, i64 *%src) {
; CHECK-LABEL: f7:
; CHECK: dlg %r2, 524280(%r4)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65535
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65535
+ %b = load i64 , i64 *%ptr
%rem = urem i64 %a, %b
ret i64 %rem
}
@@ -113,8 +113,8 @@ define i64 @f8(i64 %dummy, i64 %a, i64 *%src) {
; CHECK: agfi %r4, 524288
; CHECK: dlg %r2, 0(%r4)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65536
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65536
+ %b = load i64 , i64 *%ptr
%rem = urem i64 %a, %b
ret i64 %rem
}
@@ -124,8 +124,8 @@ define i64 @f9(i64 %dummy, i64 %a, i64 *%src) {
; CHECK-LABEL: f9:
; CHECK: dlg %r2, -8(%r4)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -1
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -1
+ %b = load i64 , i64 *%ptr
%rem = urem i64 %a, %b
ret i64 %rem
}
@@ -135,8 +135,8 @@ define i64 @f10(i64 %dummy, i64 %a, i64 *%src) {
; CHECK-LABEL: f10:
; CHECK: dlg %r2, -524288(%r4)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65536
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65536
+ %b = load i64 , i64 *%ptr
%rem = urem i64 %a, %b
ret i64 %rem
}
@@ -148,8 +148,8 @@ define i64 @f11(i64 %dummy, i64 %a, i64 *%src) {
; CHECK: agfi %r4, -524296
; CHECK: dlg %r2, 0(%r4)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65537
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65537
+ %b = load i64 , i64 *%ptr
%rem = urem i64 %a, %b
ret i64 %rem
}
@@ -162,7 +162,7 @@ define i64 @f12(i64 %dummy, i64 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i64 *
- %b = load i64 *%ptr
+ %b = load i64 , i64 *%ptr
%rem = urem i64 %a, %b
ret i64 %rem
}
@@ -173,28 +173,28 @@ define i64 @f13(i64 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: dlg {{%r[0-9]+}}, 160(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i64 *%ptr0, i64 2
- %ptr2 = getelementptr i64 *%ptr0, i64 4
- %ptr3 = getelementptr i64 *%ptr0, i64 6
- %ptr4 = getelementptr i64 *%ptr0, i64 8
- %ptr5 = getelementptr i64 *%ptr0, i64 10
- %ptr6 = getelementptr i64 *%ptr0, i64 12
- %ptr7 = getelementptr i64 *%ptr0, i64 14
- %ptr8 = getelementptr i64 *%ptr0, i64 16
- %ptr9 = getelementptr i64 *%ptr0, i64 18
- %ptr10 = getelementptr i64 *%ptr0, i64 20
-
- %val0 = load i64 *%ptr0
- %val1 = load i64 *%ptr1
- %val2 = load i64 *%ptr2
- %val3 = load i64 *%ptr3
- %val4 = load i64 *%ptr4
- %val5 = load i64 *%ptr5
- %val6 = load i64 *%ptr6
- %val7 = load i64 *%ptr7
- %val8 = load i64 *%ptr8
- %val9 = load i64 *%ptr9
- %val10 = load i64 *%ptr10
+ %ptr1 = getelementptr i64, i64 *%ptr0, i64 2
+ %ptr2 = getelementptr i64, i64 *%ptr0, i64 4
+ %ptr3 = getelementptr i64, i64 *%ptr0, i64 6
+ %ptr4 = getelementptr i64, i64 *%ptr0, i64 8
+ %ptr5 = getelementptr i64, i64 *%ptr0, i64 10
+ %ptr6 = getelementptr i64, i64 *%ptr0, i64 12
+ %ptr7 = getelementptr i64, i64 *%ptr0, i64 14
+ %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
+ %ptr9 = getelementptr i64, i64 *%ptr0, i64 18
+ %ptr10 = getelementptr i64, i64 *%ptr0, i64 20
+
+ %val0 = load i64 , i64 *%ptr0
+ %val1 = load i64 , i64 *%ptr1
+ %val2 = load i64 , i64 *%ptr2
+ %val3 = load i64 , i64 *%ptr3
+ %val4 = load i64 , i64 *%ptr4
+ %val5 = load i64 , i64 *%ptr5
+ %val6 = load i64 , i64 *%ptr6
+ %val7 = load i64 , i64 *%ptr7
+ %val8 = load i64 , i64 *%ptr8
+ %val9 = load i64 , i64 *%ptr9
+ %val10 = load i64 , i64 *%ptr10
%ret = call i64 @foo()
diff --git a/test/CodeGen/SystemZ/int-move-02.ll b/test/CodeGen/SystemZ/int-move-02.ll
index 5fc0843290f9..7ec0f418502a 100644
--- a/test/CodeGen/SystemZ/int-move-02.ll
+++ b/test/CodeGen/SystemZ/int-move-02.ll
@@ -7,7 +7,7 @@ define i32 @f1(i32 *%src) {
; CHECK-LABEL: f1:
; CHECK: l %r2, 0(%r2)
; CHECK: br %r14
- %val = load i32 *%src
+ %val = load i32 , i32 *%src
ret i32 %val
}
@@ -16,8 +16,8 @@ define i32 @f2(i32 *%src) {
; CHECK-LABEL: f2:
; CHECK: l %r2, 4092(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 1023
- %val = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 1023
+ %val = load i32 , i32 *%ptr
ret i32 %val
}
@@ -26,8 +26,8 @@ define i32 @f3(i32 *%src) {
; CHECK-LABEL: f3:
; CHECK: ly %r2, 4096(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 1024
- %val = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 1024
+ %val = load i32 , i32 *%ptr
ret i32 %val
}
@@ -36,8 +36,8 @@ define i32 @f4(i32 *%src) {
; CHECK-LABEL: f4:
; CHECK: ly %r2, 524284(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
- %val = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131071
+ %val = load i32 , i32 *%ptr
ret i32 %val
}
@@ -48,8 +48,8 @@ define i32 @f5(i32 *%src) {
; CHECK: agfi %r2, 524288
; CHECK: l %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
- %val = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131072
+ %val = load i32 , i32 *%ptr
ret i32 %val
}
@@ -58,8 +58,8 @@ define i32 @f6(i32 *%src) {
; CHECK-LABEL: f6:
; CHECK: ly %r2, -4(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
- %val = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -1
+ %val = load i32 , i32 *%ptr
ret i32 %val
}
@@ -68,8 +68,8 @@ define i32 @f7(i32 *%src) {
; CHECK-LABEL: f7:
; CHECK: ly %r2, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
- %val = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
+ %val = load i32 , i32 *%ptr
ret i32 %val
}
@@ -80,8 +80,8 @@ define i32 @f8(i32 *%src) {
; CHECK: agfi %r2, -524292
; CHECK: l %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
- %val = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
+ %val = load i32 , i32 *%ptr
ret i32 %val
}
@@ -93,7 +93,7 @@ define i32 @f9(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4095
%ptr = inttoptr i64 %add2 to i32 *
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
ret i32 %val
}
@@ -105,6 +105,6 @@ define i32 @f10(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i32 *
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
ret i32 %val
}
diff --git a/test/CodeGen/SystemZ/int-move-03.ll b/test/CodeGen/SystemZ/int-move-03.ll
index 2894512e8eea..60eb0042ca87 100644
--- a/test/CodeGen/SystemZ/int-move-03.ll
+++ b/test/CodeGen/SystemZ/int-move-03.ll
@@ -7,7 +7,7 @@ define i64 @f1(i64 *%src) {
; CHECK-LABEL: f1:
; CHECK: lg %r2, 0(%r2)
; CHECK: br %r14
- %val = load i64 *%src
+ %val = load i64 , i64 *%src
ret i64 %val
}
@@ -16,8 +16,8 @@ define i64 @f2(i64 *%src) {
; CHECK-LABEL: f2:
; CHECK: lg %r2, 524280(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65535
- %val = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65535
+ %val = load i64 , i64 *%ptr
ret i64 %val
}
@@ -28,8 +28,8 @@ define i64 @f3(i64 *%src) {
; CHECK: agfi %r2, 524288
; CHECK: lg %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65536
- %val = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65536
+ %val = load i64 , i64 *%ptr
ret i64 %val
}
@@ -38,8 +38,8 @@ define i64 @f4(i64 *%src) {
; CHECK-LABEL: f4:
; CHECK: lg %r2, -8(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -1
- %val = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -1
+ %val = load i64 , i64 *%ptr
ret i64 %val
}
@@ -48,8 +48,8 @@ define i64 @f5(i64 *%src) {
; CHECK-LABEL: f5:
; CHECK: lg %r2, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65536
- %val = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65536
+ %val = load i64 , i64 *%ptr
ret i64 %val
}
@@ -60,8 +60,8 @@ define i64 @f6(i64 *%src) {
; CHECK: agfi %r2, -524296
; CHECK: lg %r2, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65537
- %val = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65537
+ %val = load i64 , i64 *%ptr
ret i64 %val
}
@@ -73,6 +73,6 @@ define i64 @f7(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i64 *
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
ret i64 %val
}
diff --git a/test/CodeGen/SystemZ/int-move-04.ll b/test/CodeGen/SystemZ/int-move-04.ll
index d97ed2f54a4b..cb7d86428b7e 100644
--- a/test/CodeGen/SystemZ/int-move-04.ll
+++ b/test/CodeGen/SystemZ/int-move-04.ll
@@ -36,7 +36,7 @@ define void @f4(i8 *%dst, i8 %val) {
; CHECK-LABEL: f4:
; CHECK: stc %r3, 4095(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%dst, i64 4095
+ %ptr = getelementptr i8, i8 *%dst, i64 4095
store i8 %val, i8 *%ptr
ret void
}
@@ -46,7 +46,7 @@ define void @f5(i8 *%dst, i8 %val) {
; CHECK-LABEL: f5:
; CHECK: stcy %r3, 4096(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%dst, i64 4096
+ %ptr = getelementptr i8, i8 *%dst, i64 4096
store i8 %val, i8 *%ptr
ret void
}
@@ -56,7 +56,7 @@ define void @f6(i8 *%dst, i8 %val) {
; CHECK-LABEL: f6:
; CHECK: stcy %r3, 524287(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%dst, i64 524287
+ %ptr = getelementptr i8, i8 *%dst, i64 524287
store i8 %val, i8 *%ptr
ret void
}
@@ -68,7 +68,7 @@ define void @f7(i8 *%dst, i8 %val) {
; CHECK: agfi %r2, 524288
; CHECK: stc %r3, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%dst, i64 524288
+ %ptr = getelementptr i8, i8 *%dst, i64 524288
store i8 %val, i8 *%ptr
ret void
}
@@ -78,7 +78,7 @@ define void @f8(i8 *%dst, i8 %val) {
; CHECK-LABEL: f8:
; CHECK: stcy %r3, -1(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%dst, i64 -1
+ %ptr = getelementptr i8, i8 *%dst, i64 -1
store i8 %val, i8 *%ptr
ret void
}
@@ -88,7 +88,7 @@ define void @f9(i8 *%dst, i8 %val) {
; CHECK-LABEL: f9:
; CHECK: stcy %r3, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%dst, i64 -524288
+ %ptr = getelementptr i8, i8 *%dst, i64 -524288
store i8 %val, i8 *%ptr
ret void
}
@@ -100,7 +100,7 @@ define void @f10(i8 *%dst, i8 %val) {
; CHECK: agfi %r2, -524289
; CHECK: stc %r3, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i8 *%dst, i64 -524289
+ %ptr = getelementptr i8, i8 *%dst, i64 -524289
store i8 %val, i8 *%ptr
ret void
}
diff --git a/test/CodeGen/SystemZ/int-move-05.ll b/test/CodeGen/SystemZ/int-move-05.ll
index c21b88aa7baa..2bfe03447c84 100644
--- a/test/CodeGen/SystemZ/int-move-05.ll
+++ b/test/CodeGen/SystemZ/int-move-05.ll
@@ -36,7 +36,7 @@ define void @f4(i16 *%dst, i16 %val) {
; CHECK-LABEL: f4:
; CHECK: sth %r3, 4094(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%dst, i64 2047
+ %ptr = getelementptr i16, i16 *%dst, i64 2047
store i16 %val, i16 *%ptr
ret void
}
@@ -46,7 +46,7 @@ define void @f5(i16 *%dst, i16 %val) {
; CHECK-LABEL: f5:
; CHECK: sthy %r3, 4096(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%dst, i64 2048
+ %ptr = getelementptr i16, i16 *%dst, i64 2048
store i16 %val, i16 *%ptr
ret void
}
@@ -56,7 +56,7 @@ define void @f6(i16 *%dst, i16 %val) {
; CHECK-LABEL: f6:
; CHECK: sthy %r3, 524286(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%dst, i64 262143
+ %ptr = getelementptr i16, i16 *%dst, i64 262143
store i16 %val, i16 *%ptr
ret void
}
@@ -68,7 +68,7 @@ define void @f7(i16 *%dst, i16 %val) {
; CHECK: agfi %r2, 524288
; CHECK: sth %r3, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%dst, i64 262144
+ %ptr = getelementptr i16, i16 *%dst, i64 262144
store i16 %val, i16 *%ptr
ret void
}
@@ -78,7 +78,7 @@ define void @f8(i16 *%dst, i16 %val) {
; CHECK-LABEL: f8:
; CHECK: sthy %r3, -2(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%dst, i64 -1
+ %ptr = getelementptr i16, i16 *%dst, i64 -1
store i16 %val, i16 *%ptr
ret void
}
@@ -88,7 +88,7 @@ define void @f9(i16 *%dst, i16 %val) {
; CHECK-LABEL: f9:
; CHECK: sthy %r3, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%dst, i64 -262144
+ %ptr = getelementptr i16, i16 *%dst, i64 -262144
store i16 %val, i16 *%ptr
ret void
}
@@ -100,7 +100,7 @@ define void @f10(i16 *%dst, i16 %val) {
; CHECK: agfi %r2, -524290
; CHECK: sth %r3, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i16 *%dst, i64 -262145
+ %ptr = getelementptr i16, i16 *%dst, i64 -262145
store i16 %val, i16 *%ptr
ret void
}
diff --git a/test/CodeGen/SystemZ/int-move-06.ll b/test/CodeGen/SystemZ/int-move-06.ll
index b8c6f53e15d8..f078ec62341f 100644
--- a/test/CodeGen/SystemZ/int-move-06.ll
+++ b/test/CodeGen/SystemZ/int-move-06.ll
@@ -23,7 +23,7 @@ define void @f3(i32 *%dst, i32 %val) {
; CHECK-LABEL: f3:
; CHECK: st %r3, 4092(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%dst, i64 1023
+ %ptr = getelementptr i32, i32 *%dst, i64 1023
store i32 %val, i32 *%ptr
ret void
}
@@ -33,7 +33,7 @@ define void @f4(i32 *%dst, i32 %val) {
; CHECK-LABEL: f4:
; CHECK: sty %r3, 4096(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%dst, i64 1024
+ %ptr = getelementptr i32, i32 *%dst, i64 1024
store i32 %val, i32 *%ptr
ret void
}
@@ -43,7 +43,7 @@ define void @f5(i32 *%dst, i32 %val) {
; CHECK-LABEL: f5:
; CHECK: sty %r3, 524284(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%dst, i64 131071
+ %ptr = getelementptr i32, i32 *%dst, i64 131071
store i32 %val, i32 *%ptr
ret void
}
@@ -55,7 +55,7 @@ define void @f6(i32 *%dst, i32 %val) {
; CHECK: agfi %r2, 524288
; CHECK: st %r3, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%dst, i64 131072
+ %ptr = getelementptr i32, i32 *%dst, i64 131072
store i32 %val, i32 *%ptr
ret void
}
@@ -65,7 +65,7 @@ define void @f7(i32 *%dst, i32 %val) {
; CHECK-LABEL: f7:
; CHECK: sty %r3, -4(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%dst, i64 -1
+ %ptr = getelementptr i32, i32 *%dst, i64 -1
store i32 %val, i32 *%ptr
ret void
}
@@ -75,7 +75,7 @@ define void @f8(i32 *%dst, i32 %val) {
; CHECK-LABEL: f8:
; CHECK: sty %r3, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%dst, i64 -131072
+ %ptr = getelementptr i32, i32 *%dst, i64 -131072
store i32 %val, i32 *%ptr
ret void
}
@@ -87,7 +87,7 @@ define void @f9(i32 *%dst, i32 %val) {
; CHECK: agfi %r2, -524292
; CHECK: st %r3, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i32 *%dst, i64 -131073
+ %ptr = getelementptr i32, i32 *%dst, i64 -131073
store i32 %val, i32 *%ptr
ret void
}
diff --git a/test/CodeGen/SystemZ/int-move-07.ll b/test/CodeGen/SystemZ/int-move-07.ll
index 5cac1e5b1a2e..77713adeebe3 100644
--- a/test/CodeGen/SystemZ/int-move-07.ll
+++ b/test/CodeGen/SystemZ/int-move-07.ll
@@ -16,7 +16,7 @@ define void @f2(i64 *%dst, i64 %val) {
; CHECK-LABEL: f2:
; CHECK: stg %r3, 524280(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%dst, i64 65535
+ %ptr = getelementptr i64, i64 *%dst, i64 65535
store i64 %val, i64 *%ptr
ret void
}
@@ -28,7 +28,7 @@ define void @f3(i64 *%dst, i64 %val) {
; CHECK: agfi %r2, 524288
; CHECK: stg %r3, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%dst, i64 65536
+ %ptr = getelementptr i64, i64 *%dst, i64 65536
store i64 %val, i64 *%ptr
ret void
}
@@ -38,7 +38,7 @@ define void @f4(i64 *%dst, i64 %val) {
; CHECK-LABEL: f4:
; CHECK: stg %r3, -8(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%dst, i64 -1
+ %ptr = getelementptr i64, i64 *%dst, i64 -1
store i64 %val, i64 *%ptr
ret void
}
@@ -48,7 +48,7 @@ define void @f5(i64 *%dst, i64 %val) {
; CHECK-LABEL: f5:
; CHECK: stg %r3, -524288(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%dst, i64 -65536
+ %ptr = getelementptr i64, i64 *%dst, i64 -65536
store i64 %val, i64 *%ptr
ret void
}
@@ -60,7 +60,7 @@ define void @f6(i64 *%dst, i64 %val) {
; CHECK: agfi %r2, -524296
; CHECK: stg %r3, 0(%r2)
; CHECK: br %r14
- %ptr = getelementptr i64 *%dst, i64 -65537
+ %ptr = getelementptr i64, i64 *%dst, i64 -65537
store i64 %val, i64 *%ptr
ret void
}
diff --git a/test/CodeGen/SystemZ/int-move-08.ll b/test/CodeGen/SystemZ/int-move-08.ll
index 56fcbc6d802b..d28d298013ff 100644
--- a/test/CodeGen/SystemZ/int-move-08.ll
+++ b/test/CodeGen/SystemZ/int-move-08.ll
@@ -18,7 +18,7 @@ define i32 @f1() {
; CHECK-LABEL: f1:
; CHECK: lhrl %r2, gsrc16
; CHECK: br %r14
- %val = load i16 *@gsrc16
+ %val = load i16 , i16 *@gsrc16
%ext = sext i16 %val to i32
ret i32 %ext
}
@@ -28,7 +28,7 @@ define i32 @f2() {
; CHECK-LABEL: f2:
; CHECK: llhrl %r2, gsrc16
; CHECK: br %r14
- %val = load i16 *@gsrc16
+ %val = load i16 , i16 *@gsrc16
%ext = zext i16 %val to i32
ret i32 %ext
}
@@ -49,7 +49,7 @@ define void @f4() {
; CHECK: lrl %r0, gsrc32
; CHECK: strl %r0, gdst32
; CHECK: br %r14
- %val = load i32 *@gsrc32
+ %val = load i32 , i32 *@gsrc32
store i32 %val, i32 *@gdst32
ret void
}
@@ -60,7 +60,7 @@ define i32 @f5() {
; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u
; CHECK: lh %r2, 0([[REG]])
; CHECK: br %r14
- %val = load i16 *@gsrc16u, align 1
+ %val = load i16 , i16 *@gsrc16u, align 1
%ext = sext i16 %val to i32
ret i32 %ext
}
@@ -71,7 +71,7 @@ define i32 @f6() {
; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u
; CHECK: llh %r2, 0([[REG]])
; CHECK: br %r14
- %val = load i16 *@gsrc16u, align 1
+ %val = load i16 , i16 *@gsrc16u, align 1
%ext = zext i16 %val to i32
ret i32 %ext
}
@@ -95,7 +95,7 @@ define void @f8() {
; CHECK: larl [[REG:%r[0-5]]], gdst32u
; CHECK: st [[VAL]], 0([[REG]])
; CHECK: br %r14
- %val = load i32 *@gsrc32u, align 2
+ %val = load i32 , i32 *@gsrc32u, align 2
store i32 %val, i32 *@gdst32u, align 2
ret void
}
@@ -109,9 +109,9 @@ define void @f9() {
; CHECK: srl [[VAL]], 1
; CHECK: stc [[VAL]], 1([[REG]])
; CHECK: br %r14
- %ptr1 = getelementptr [2 x i8] *@garray8, i64 0, i64 0
- %ptr2 = getelementptr [2 x i8] *@garray8, i64 0, i64 1
- %val = load i8 *%ptr1
+ %ptr1 = getelementptr [2 x i8], [2 x i8] *@garray8, i64 0, i64 0
+ %ptr2 = getelementptr [2 x i8], [2 x i8] *@garray8, i64 0, i64 1
+ %val = load i8 , i8 *%ptr1
%shr = lshr i8 %val, 1
store i8 %shr, i8 *%ptr2
ret void
@@ -125,9 +125,9 @@ define void @f10() {
; CHECK: srl [[VAL]], 1
; CHECK: sthrl [[VAL]], garray16+2
; CHECK: br %r14
- %ptr1 = getelementptr [2 x i16] *@garray16, i64 0, i64 0
- %ptr2 = getelementptr [2 x i16] *@garray16, i64 0, i64 1
- %val = load i16 *%ptr1
+ %ptr1 = getelementptr [2 x i16], [2 x i16] *@garray16, i64 0, i64 0
+ %ptr2 = getelementptr [2 x i16], [2 x i16] *@garray16, i64 0, i64 1
+ %val = load i16 , i16 *%ptr1
%shr = lshr i16 %val, 1
store i16 %shr, i16 *%ptr2
ret void
diff --git a/test/CodeGen/SystemZ/int-move-09.ll b/test/CodeGen/SystemZ/int-move-09.ll
index b5c9cb13d288..6476842dd12b 100644
--- a/test/CodeGen/SystemZ/int-move-09.ll
+++ b/test/CodeGen/SystemZ/int-move-09.ll
@@ -20,7 +20,7 @@ define i64 @f1() {
; CHECK-LABEL: f1:
; CHECK: lghrl %r2, gsrc16
; CHECK: br %r14
- %val = load i16 *@gsrc16
+ %val = load i16 , i16 *@gsrc16
%ext = sext i16 %val to i64
ret i64 %ext
}
@@ -30,7 +30,7 @@ define i64 @f2() {
; CHECK-LABEL: f2:
; CHECK: llghrl %r2, gsrc16
; CHECK: br %r14
- %val = load i16 *@gsrc16
+ %val = load i16 , i16 *@gsrc16
%ext = zext i16 %val to i64
ret i64 %ext
}
@@ -40,7 +40,7 @@ define i64 @f3() {
; CHECK-LABEL: f3:
; CHECK: lgfrl %r2, gsrc32
; CHECK: br %r14
- %val = load i32 *@gsrc32
+ %val = load i32 , i32 *@gsrc32
%ext = sext i32 %val to i64
ret i64 %ext
}
@@ -50,7 +50,7 @@ define i64 @f4() {
; CHECK-LABEL: f4:
; CHECK: llgfrl %r2, gsrc32
; CHECK: br %r14
- %val = load i32 *@gsrc32
+ %val = load i32 , i32 *@gsrc32
%ext = zext i32 %val to i64
ret i64 %ext
}
@@ -81,7 +81,7 @@ define void @f7() {
; CHECK: lgrl %r0, gsrc64
; CHECK: stgrl %r0, gdst64
; CHECK: br %r14
- %val = load i64 *@gsrc64
+ %val = load i64 , i64 *@gsrc64
store i64 %val, i64 *@gdst64
ret void
}
@@ -92,7 +92,7 @@ define i64 @f8() {
; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u@GOT
; CHECK: lgh %r2, 0([[REG]])
; CHECK: br %r14
- %val = load i16 *@gsrc16u, align 1
+ %val = load i16 , i16 *@gsrc16u, align 1
%ext = sext i16 %val to i64
ret i64 %ext
}
@@ -103,7 +103,7 @@ define i64 @f9() {
; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u@GOT
; CHECK: llgh %r2, 0([[REG]])
; CHECK: br %r14
- %val = load i16 *@gsrc16u, align 1
+ %val = load i16 , i16 *@gsrc16u, align 1
%ext = zext i16 %val to i64
ret i64 %ext
}
@@ -114,7 +114,7 @@ define i64 @f10() {
; CHECK: larl [[REG:%r[0-5]]], gsrc32u
; CHECK: lgf %r2, 0([[REG]])
; CHECK: br %r14
- %val = load i32 *@gsrc32u, align 2
+ %val = load i32 , i32 *@gsrc32u, align 2
%ext = sext i32 %val to i64
ret i64 %ext
}
@@ -125,7 +125,7 @@ define i64 @f11() {
; CHECK: larl [[REG:%r[0-5]]], gsrc32u
; CHECK: llgf %r2, 0([[REG]])
; CHECK: br %r14
- %val = load i32 *@gsrc32u, align 2
+ %val = load i32 , i32 *@gsrc32u, align 2
%ext = zext i32 %val to i64
ret i64 %ext
}
@@ -160,7 +160,7 @@ define void @f14() {
; CHECK: larl [[REG:%r[0-5]]], gdst64u
; CHECK: stg [[VAL]], 0([[REG]])
; CHECK: br %r14
- %val = load i64 *@gsrc64u, align 4
+ %val = load i64 , i64 *@gsrc64u, align 4
store i64 %val, i64 *@gdst64u, align 4
ret void
}
diff --git a/test/CodeGen/SystemZ/int-mul-01.ll b/test/CodeGen/SystemZ/int-mul-01.ll
index d5f7155f8c48..b0adc1874b9f 100644
--- a/test/CodeGen/SystemZ/int-mul-01.ll
+++ b/test/CodeGen/SystemZ/int-mul-01.ll
@@ -8,7 +8,7 @@ define i32 @f1(i32 %lhs, i16 *%src) {
; CHECK-LABEL: f1:
; CHECK: mh %r2, 0(%r3)
; CHECK: br %r14
- %half = load i16 *%src
+ %half = load i16 , i16 *%src
%rhs = sext i16 %half to i32
%res = mul i32 %lhs, %rhs
ret i32 %res
@@ -19,8 +19,8 @@ define i32 @f2(i32 %lhs, i16 *%src) {
; CHECK-LABEL: f2:
; CHECK: mh %r2, 4094(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 2047
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 2047
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = mul i32 %lhs, %rhs
ret i32 %res
@@ -31,8 +31,8 @@ define i32 @f3(i32 %lhs, i16 *%src) {
; CHECK-LABEL: f3:
; CHECK: mhy %r2, 4096(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 2048
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 2048
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = mul i32 %lhs, %rhs
ret i32 %res
@@ -43,8 +43,8 @@ define i32 @f4(i32 %lhs, i16 *%src) {
; CHECK-LABEL: f4:
; CHECK: mhy %r2, 524286(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 262143
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 262143
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = mul i32 %lhs, %rhs
ret i32 %res
@@ -57,8 +57,8 @@ define i32 @f5(i32 %lhs, i16 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: mh %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 262144
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 262144
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = mul i32 %lhs, %rhs
ret i32 %res
@@ -69,8 +69,8 @@ define i32 @f6(i32 %lhs, i16 *%src) {
; CHECK-LABEL: f6:
; CHECK: mhy %r2, -2(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -1
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -1
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = mul i32 %lhs, %rhs
ret i32 %res
@@ -81,8 +81,8 @@ define i32 @f7(i32 %lhs, i16 *%src) {
; CHECK-LABEL: f7:
; CHECK: mhy %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -262144
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -262144
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = mul i32 %lhs, %rhs
ret i32 %res
@@ -95,8 +95,8 @@ define i32 @f8(i32 %lhs, i16 *%src) {
; CHECK: agfi %r3, -524290
; CHECK: mh %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -262145
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -262145
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = mul i32 %lhs, %rhs
ret i32 %res
@@ -110,7 +110,7 @@ define i32 @f9(i32 %lhs, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4094
%ptr = inttoptr i64 %add2 to i16 *
- %half = load i16 *%ptr
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = mul i32 %lhs, %rhs
ret i32 %res
@@ -124,7 +124,7 @@ define i32 @f10(i32 %lhs, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i16 *
- %half = load i16 *%ptr
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = mul i32 %lhs, %rhs
ret i32 %res
diff --git a/test/CodeGen/SystemZ/int-mul-02.ll b/test/CodeGen/SystemZ/int-mul-02.ll
index d002a7f2f9bd..265674f6c667 100644
--- a/test/CodeGen/SystemZ/int-mul-02.ll
+++ b/test/CodeGen/SystemZ/int-mul-02.ll
@@ -18,7 +18,7 @@ define i32 @f2(i32 %a, i32 *%src) {
; CHECK-LABEL: f2:
; CHECK: ms %r2, 0(%r3)
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%mul = mul i32 %a, %b
ret i32 %mul
}
@@ -28,8 +28,8 @@ define i32 @f3(i32 %a, i32 *%src) {
; CHECK-LABEL: f3:
; CHECK: ms %r2, 4092(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 1023
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 1023
+ %b = load i32 , i32 *%ptr
%mul = mul i32 %a, %b
ret i32 %mul
}
@@ -39,8 +39,8 @@ define i32 @f4(i32 %a, i32 *%src) {
; CHECK-LABEL: f4:
; CHECK: msy %r2, 4096(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 1024
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 1024
+ %b = load i32 , i32 *%ptr
%mul = mul i32 %a, %b
ret i32 %mul
}
@@ -50,8 +50,8 @@ define i32 @f5(i32 %a, i32 *%src) {
; CHECK-LABEL: f5:
; CHECK: msy %r2, 524284(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131071
+ %b = load i32 , i32 *%ptr
%mul = mul i32 %a, %b
ret i32 %mul
}
@@ -63,8 +63,8 @@ define i32 @f6(i32 %a, i32 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: ms %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131072
+ %b = load i32 , i32 *%ptr
%mul = mul i32 %a, %b
ret i32 %mul
}
@@ -74,8 +74,8 @@ define i32 @f7(i32 %a, i32 *%src) {
; CHECK-LABEL: f7:
; CHECK: msy %r2, -4(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -1
+ %b = load i32 , i32 *%ptr
%mul = mul i32 %a, %b
ret i32 %mul
}
@@ -85,8 +85,8 @@ define i32 @f8(i32 %a, i32 *%src) {
; CHECK-LABEL: f8:
; CHECK: msy %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
+ %b = load i32 , i32 *%ptr
%mul = mul i32 %a, %b
ret i32 %mul
}
@@ -98,8 +98,8 @@ define i32 @f9(i32 %a, i32 *%src) {
; CHECK: agfi %r3, -524292
; CHECK: ms %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
+ %b = load i32 , i32 *%ptr
%mul = mul i32 %a, %b
ret i32 %mul
}
@@ -112,7 +112,7 @@ define i32 @f10(i32 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4092
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%mul = mul i32 %a, %b
ret i32 %mul
}
@@ -125,7 +125,7 @@ define i32 @f11(i32 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%mul = mul i32 %a, %b
ret i32 %mul
}
@@ -136,26 +136,26 @@ define i32 @f12(i32 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: ms %r2, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i32 *%ptr0, i64 2
- %ptr2 = getelementptr i32 *%ptr0, i64 4
- %ptr3 = getelementptr i32 *%ptr0, i64 6
- %ptr4 = getelementptr i32 *%ptr0, i64 8
- %ptr5 = getelementptr i32 *%ptr0, i64 10
- %ptr6 = getelementptr i32 *%ptr0, i64 12
- %ptr7 = getelementptr i32 *%ptr0, i64 14
- %ptr8 = getelementptr i32 *%ptr0, i64 16
- %ptr9 = getelementptr i32 *%ptr0, i64 18
-
- %val0 = load i32 *%ptr0
- %val1 = load i32 *%ptr1
- %val2 = load i32 *%ptr2
- %val3 = load i32 *%ptr3
- %val4 = load i32 *%ptr4
- %val5 = load i32 *%ptr5
- %val6 = load i32 *%ptr6
- %val7 = load i32 *%ptr7
- %val8 = load i32 *%ptr8
- %val9 = load i32 *%ptr9
+ %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
+ %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
+ %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
+ %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
+ %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
+ %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
+ %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
+ %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
+ %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
+
+ %val0 = load i32 , i32 *%ptr0
+ %val1 = load i32 , i32 *%ptr1
+ %val2 = load i32 , i32 *%ptr2
+ %val3 = load i32 , i32 *%ptr3
+ %val4 = load i32 , i32 *%ptr4
+ %val5 = load i32 , i32 *%ptr5
+ %val6 = load i32 , i32 *%ptr6
+ %val7 = load i32 , i32 *%ptr7
+ %val8 = load i32 , i32 *%ptr8
+ %val9 = load i32 , i32 *%ptr9
%ret = call i32 @foo()
diff --git a/test/CodeGen/SystemZ/int-mul-03.ll b/test/CodeGen/SystemZ/int-mul-03.ll
index df18050d0242..c4d16cefc1c2 100644
--- a/test/CodeGen/SystemZ/int-mul-03.ll
+++ b/test/CodeGen/SystemZ/int-mul-03.ll
@@ -19,7 +19,7 @@ define i64 @f2(i64 %a, i32 *%src) {
; CHECK-LABEL: f2:
; CHECK: msgf %r2, 0(%r3)
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%bext = sext i32 %b to i64
%mul = mul i64 %a, %bext
ret i64 %mul
@@ -30,8 +30,8 @@ define i64 @f3(i64 %a, i32 *%src) {
; CHECK-LABEL: f3:
; CHECK: msgf %r2, 524284(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131071
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%mul = mul i64 %a, %bext
ret i64 %mul
@@ -44,8 +44,8 @@ define i64 @f4(i64 %a, i32 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: msgf %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131072
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%mul = mul i64 %a, %bext
ret i64 %mul
@@ -56,8 +56,8 @@ define i64 @f5(i64 %a, i32 *%src) {
; CHECK-LABEL: f5:
; CHECK: msgf %r2, -4(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -1
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%mul = mul i64 %a, %bext
ret i64 %mul
@@ -68,8 +68,8 @@ define i64 @f6(i64 %a, i32 *%src) {
; CHECK-LABEL: f6:
; CHECK: msgf %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%mul = mul i64 %a, %bext
ret i64 %mul
@@ -82,8 +82,8 @@ define i64 @f7(i64 %a, i32 *%src) {
; CHECK: agfi %r3, -524292
; CHECK: msgf %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%mul = mul i64 %a, %bext
ret i64 %mul
@@ -97,7 +97,7 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524284
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%mul = mul i64 %a, %bext
ret i64 %mul
@@ -109,26 +109,26 @@ define i64 @f9(i32 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: msgf %r2, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i32 *%ptr0, i64 2
- %ptr2 = getelementptr i32 *%ptr0, i64 4
- %ptr3 = getelementptr i32 *%ptr0, i64 6
- %ptr4 = getelementptr i32 *%ptr0, i64 8
- %ptr5 = getelementptr i32 *%ptr0, i64 10
- %ptr6 = getelementptr i32 *%ptr0, i64 12
- %ptr7 = getelementptr i32 *%ptr0, i64 14
- %ptr8 = getelementptr i32 *%ptr0, i64 16
- %ptr9 = getelementptr i32 *%ptr0, i64 18
-
- %val0 = load i32 *%ptr0
- %val1 = load i32 *%ptr1
- %val2 = load i32 *%ptr2
- %val3 = load i32 *%ptr3
- %val4 = load i32 *%ptr4
- %val5 = load i32 *%ptr5
- %val6 = load i32 *%ptr6
- %val7 = load i32 *%ptr7
- %val8 = load i32 *%ptr8
- %val9 = load i32 *%ptr9
+ %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
+ %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
+ %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
+ %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
+ %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
+ %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
+ %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
+ %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
+ %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
+
+ %val0 = load i32 , i32 *%ptr0
+ %val1 = load i32 , i32 *%ptr1
+ %val2 = load i32 , i32 *%ptr2
+ %val3 = load i32 , i32 *%ptr3
+ %val4 = load i32 , i32 *%ptr4
+ %val5 = load i32 , i32 *%ptr5
+ %val6 = load i32 , i32 *%ptr6
+ %val7 = load i32 , i32 *%ptr7
+ %val8 = load i32 , i32 *%ptr8
+ %val9 = load i32 , i32 *%ptr9
%frob0 = add i32 %val0, 100
%frob1 = add i32 %val1, 100
diff --git a/test/CodeGen/SystemZ/int-mul-04.ll b/test/CodeGen/SystemZ/int-mul-04.ll
index 183a9a748c37..1ec466174bc7 100644
--- a/test/CodeGen/SystemZ/int-mul-04.ll
+++ b/test/CodeGen/SystemZ/int-mul-04.ll
@@ -18,7 +18,7 @@ define i64 @f2(i64 %a, i64 *%src) {
; CHECK-LABEL: f2:
; CHECK: msg %r2, 0(%r3)
; CHECK: br %r14
- %b = load i64 *%src
+ %b = load i64 , i64 *%src
%mul = mul i64 %a, %b
ret i64 %mul
}
@@ -28,8 +28,8 @@ define i64 @f3(i64 %a, i64 *%src) {
; CHECK-LABEL: f3:
; CHECK: msg %r2, 524280(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65535
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65535
+ %b = load i64 , i64 *%ptr
%mul = mul i64 %a, %b
ret i64 %mul
}
@@ -41,8 +41,8 @@ define i64 @f4(i64 %a, i64 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: msg %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65536
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65536
+ %b = load i64 , i64 *%ptr
%mul = mul i64 %a, %b
ret i64 %mul
}
@@ -52,8 +52,8 @@ define i64 @f5(i64 %a, i64 *%src) {
; CHECK-LABEL: f5:
; CHECK: msg %r2, -8(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -1
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -1
+ %b = load i64 , i64 *%ptr
%mul = mul i64 %a, %b
ret i64 %mul
}
@@ -63,8 +63,8 @@ define i64 @f6(i64 %a, i64 *%src) {
; CHECK-LABEL: f6:
; CHECK: msg %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65536
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65536
+ %b = load i64 , i64 *%ptr
%mul = mul i64 %a, %b
ret i64 %mul
}
@@ -76,8 +76,8 @@ define i64 @f7(i64 %a, i64 *%src) {
; CHECK: agfi %r3, -524296
; CHECK: msg %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65537
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65537
+ %b = load i64 , i64 *%ptr
%mul = mul i64 %a, %b
ret i64 %mul
}
@@ -90,7 +90,7 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524280
%ptr = inttoptr i64 %add2 to i64 *
- %b = load i64 *%ptr
+ %b = load i64 , i64 *%ptr
%mul = mul i64 %a, %b
ret i64 %mul
}
@@ -101,26 +101,26 @@ define i64 @f9(i64 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: msg %r2, 160(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i64 *%ptr0, i64 2
- %ptr2 = getelementptr i64 *%ptr0, i64 4
- %ptr3 = getelementptr i64 *%ptr0, i64 6
- %ptr4 = getelementptr i64 *%ptr0, i64 8
- %ptr5 = getelementptr i64 *%ptr0, i64 10
- %ptr6 = getelementptr i64 *%ptr0, i64 12
- %ptr7 = getelementptr i64 *%ptr0, i64 14
- %ptr8 = getelementptr i64 *%ptr0, i64 16
- %ptr9 = getelementptr i64 *%ptr0, i64 18
+ %ptr1 = getelementptr i64, i64 *%ptr0, i64 2
+ %ptr2 = getelementptr i64, i64 *%ptr0, i64 4
+ %ptr3 = getelementptr i64, i64 *%ptr0, i64 6
+ %ptr4 = getelementptr i64, i64 *%ptr0, i64 8
+ %ptr5 = getelementptr i64, i64 *%ptr0, i64 10
+ %ptr6 = getelementptr i64, i64 *%ptr0, i64 12
+ %ptr7 = getelementptr i64, i64 *%ptr0, i64 14
+ %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
+ %ptr9 = getelementptr i64, i64 *%ptr0, i64 18
- %val0 = load i64 *%ptr0
- %val1 = load i64 *%ptr1
- %val2 = load i64 *%ptr2
- %val3 = load i64 *%ptr3
- %val4 = load i64 *%ptr4
- %val5 = load i64 *%ptr5
- %val6 = load i64 *%ptr6
- %val7 = load i64 *%ptr7
- %val8 = load i64 *%ptr8
- %val9 = load i64 *%ptr9
+ %val0 = load i64 , i64 *%ptr0
+ %val1 = load i64 , i64 *%ptr1
+ %val2 = load i64 , i64 *%ptr2
+ %val3 = load i64 , i64 *%ptr3
+ %val4 = load i64 , i64 *%ptr4
+ %val5 = load i64 , i64 *%ptr5
+ %val6 = load i64 , i64 *%ptr6
+ %val7 = load i64 , i64 *%ptr7
+ %val8 = load i64 , i64 *%ptr8
+ %val9 = load i64 , i64 *%ptr9
%ret = call i64 @foo()
diff --git a/test/CodeGen/SystemZ/int-mul-08.ll b/test/CodeGen/SystemZ/int-mul-08.ll
index 90b26a4f3dde..c43089677ff5 100644
--- a/test/CodeGen/SystemZ/int-mul-08.ll
+++ b/test/CodeGen/SystemZ/int-mul-08.ll
@@ -88,7 +88,7 @@ define i64 @f6(i64 %dummy, i64 %a, i64 *%src) {
; CHECK-NOT: {{%r[234]}}
; CHECK: mlg %r2, 0(%r4)
; CHECK: br %r14
- %b = load i64 *%src
+ %b = load i64 , i64 *%src
%ax = zext i64 %a to i128
%bx = zext i64 %b to i128
%mulx = mul i128 %ax, %bx
@@ -102,8 +102,8 @@ define i64 @f7(i64 %dummy, i64 %a, i64 *%src) {
; CHECK-LABEL: f7:
; CHECK: mlg %r2, 524280(%r4)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65535
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65535
+ %b = load i64 , i64 *%ptr
%ax = zext i64 %a to i128
%bx = zext i64 %b to i128
%mulx = mul i128 %ax, %bx
@@ -119,8 +119,8 @@ define i64 @f8(i64 %dummy, i64 %a, i64 *%src) {
; CHECK: agfi %r4, 524288
; CHECK: mlg %r2, 0(%r4)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65536
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65536
+ %b = load i64 , i64 *%ptr
%ax = zext i64 %a to i128
%bx = zext i64 %b to i128
%mulx = mul i128 %ax, %bx
@@ -134,8 +134,8 @@ define i64 @f9(i64 %dummy, i64 %a, i64 *%src) {
; CHECK-LABEL: f9:
; CHECK: mlg %r2, -8(%r4)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -1
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -1
+ %b = load i64 , i64 *%ptr
%ax = zext i64 %a to i128
%bx = zext i64 %b to i128
%mulx = mul i128 %ax, %bx
@@ -149,8 +149,8 @@ define i64 @f10(i64 %dummy, i64 %a, i64 *%src) {
; CHECK-LABEL: f10:
; CHECK: mlg %r2, -524288(%r4)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65536
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65536
+ %b = load i64 , i64 *%ptr
%ax = zext i64 %a to i128
%bx = zext i64 %b to i128
%mulx = mul i128 %ax, %bx
@@ -166,8 +166,8 @@ define i64 @f11(i64 *%dest, i64 %a, i64 *%src) {
; CHECK: agfi %r4, -524296
; CHECK: mlg %r2, 0(%r4)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65537
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65537
+ %b = load i64 , i64 *%ptr
%ax = zext i64 %a to i128
%bx = zext i64 %b to i128
%mulx = mul i128 %ax, %bx
@@ -184,7 +184,7 @@ define i64 @f12(i64 *%dest, i64 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i64 *
- %b = load i64 *%ptr
+ %b = load i64 , i64 *%ptr
%ax = zext i64 %a to i128
%bx = zext i64 %b to i128
%mulx = mul i128 %ax, %bx
@@ -199,26 +199,26 @@ define i64 @f13(i64 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: mlg {{%r[0-9]+}}, 160(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i64 *%ptr0, i64 2
- %ptr2 = getelementptr i64 *%ptr0, i64 4
- %ptr3 = getelementptr i64 *%ptr0, i64 6
- %ptr4 = getelementptr i64 *%ptr0, i64 8
- %ptr5 = getelementptr i64 *%ptr0, i64 10
- %ptr6 = getelementptr i64 *%ptr0, i64 12
- %ptr7 = getelementptr i64 *%ptr0, i64 14
- %ptr8 = getelementptr i64 *%ptr0, i64 16
- %ptr9 = getelementptr i64 *%ptr0, i64 18
+ %ptr1 = getelementptr i64, i64 *%ptr0, i64 2
+ %ptr2 = getelementptr i64, i64 *%ptr0, i64 4
+ %ptr3 = getelementptr i64, i64 *%ptr0, i64 6
+ %ptr4 = getelementptr i64, i64 *%ptr0, i64 8
+ %ptr5 = getelementptr i64, i64 *%ptr0, i64 10
+ %ptr6 = getelementptr i64, i64 *%ptr0, i64 12
+ %ptr7 = getelementptr i64, i64 *%ptr0, i64 14
+ %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
+ %ptr9 = getelementptr i64, i64 *%ptr0, i64 18
- %val0 = load i64 *%ptr0
- %val1 = load i64 *%ptr1
- %val2 = load i64 *%ptr2
- %val3 = load i64 *%ptr3
- %val4 = load i64 *%ptr4
- %val5 = load i64 *%ptr5
- %val6 = load i64 *%ptr6
- %val7 = load i64 *%ptr7
- %val8 = load i64 *%ptr8
- %val9 = load i64 *%ptr9
+ %val0 = load i64 , i64 *%ptr0
+ %val1 = load i64 , i64 *%ptr1
+ %val2 = load i64 , i64 *%ptr2
+ %val3 = load i64 , i64 *%ptr3
+ %val4 = load i64 , i64 *%ptr4
+ %val5 = load i64 , i64 *%ptr5
+ %val6 = load i64 , i64 *%ptr6
+ %val7 = load i64 , i64 *%ptr7
+ %val8 = load i64 , i64 *%ptr8
+ %val9 = load i64 , i64 *%ptr9
%ret = call i64 @foo()
diff --git a/test/CodeGen/SystemZ/int-sub-01.ll b/test/CodeGen/SystemZ/int-sub-01.ll
index 8d1e56ddcaba..c04a619298da 100644
--- a/test/CodeGen/SystemZ/int-sub-01.ll
+++ b/test/CodeGen/SystemZ/int-sub-01.ll
@@ -19,7 +19,7 @@ define i32 @f2(i32 %a, i32 *%src) {
; CHECK-LABEL: f2:
; CHECK: s %r2, 0(%r3)
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%sub = sub i32 %a, %b
ret i32 %sub
}
@@ -29,8 +29,8 @@ define i32 @f3(i32 %a, i32 *%src) {
; CHECK-LABEL: f3:
; CHECK: s %r2, 4092(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 1023
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 1023
+ %b = load i32 , i32 *%ptr
%sub = sub i32 %a, %b
ret i32 %sub
}
@@ -40,8 +40,8 @@ define i32 @f4(i32 %a, i32 *%src) {
; CHECK-LABEL: f4:
; CHECK: sy %r2, 4096(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 1024
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 1024
+ %b = load i32 , i32 *%ptr
%sub = sub i32 %a, %b
ret i32 %sub
}
@@ -51,8 +51,8 @@ define i32 @f5(i32 %a, i32 *%src) {
; CHECK-LABEL: f5:
; CHECK: sy %r2, 524284(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131071
+ %b = load i32 , i32 *%ptr
%sub = sub i32 %a, %b
ret i32 %sub
}
@@ -64,8 +64,8 @@ define i32 @f6(i32 %a, i32 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: s %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131072
+ %b = load i32 , i32 *%ptr
%sub = sub i32 %a, %b
ret i32 %sub
}
@@ -75,8 +75,8 @@ define i32 @f7(i32 %a, i32 *%src) {
; CHECK-LABEL: f7:
; CHECK: sy %r2, -4(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -1
+ %b = load i32 , i32 *%ptr
%sub = sub i32 %a, %b
ret i32 %sub
}
@@ -86,8 +86,8 @@ define i32 @f8(i32 %a, i32 *%src) {
; CHECK-LABEL: f8:
; CHECK: sy %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
+ %b = load i32 , i32 *%ptr
%sub = sub i32 %a, %b
ret i32 %sub
}
@@ -99,8 +99,8 @@ define i32 @f9(i32 %a, i32 *%src) {
; CHECK: agfi %r3, -524292
; CHECK: s %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
+ %b = load i32 , i32 *%ptr
%sub = sub i32 %a, %b
ret i32 %sub
}
@@ -113,7 +113,7 @@ define i32 @f10(i32 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4092
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%sub = sub i32 %a, %b
ret i32 %sub
}
@@ -126,7 +126,7 @@ define i32 @f11(i32 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%sub = sub i32 %a, %b
ret i32 %sub
}
@@ -137,26 +137,26 @@ define i32 @f12(i32 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: s %r2, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i32 *%ptr0, i64 2
- %ptr2 = getelementptr i32 *%ptr0, i64 4
- %ptr3 = getelementptr i32 *%ptr0, i64 6
- %ptr4 = getelementptr i32 *%ptr0, i64 8
- %ptr5 = getelementptr i32 *%ptr0, i64 10
- %ptr6 = getelementptr i32 *%ptr0, i64 12
- %ptr7 = getelementptr i32 *%ptr0, i64 14
- %ptr8 = getelementptr i32 *%ptr0, i64 16
- %ptr9 = getelementptr i32 *%ptr0, i64 18
-
- %val0 = load i32 *%ptr0
- %val1 = load i32 *%ptr1
- %val2 = load i32 *%ptr2
- %val3 = load i32 *%ptr3
- %val4 = load i32 *%ptr4
- %val5 = load i32 *%ptr5
- %val6 = load i32 *%ptr6
- %val7 = load i32 *%ptr7
- %val8 = load i32 *%ptr8
- %val9 = load i32 *%ptr9
+ %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
+ %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
+ %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
+ %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
+ %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
+ %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
+ %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
+ %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
+ %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
+
+ %val0 = load i32 , i32 *%ptr0
+ %val1 = load i32 , i32 *%ptr1
+ %val2 = load i32 , i32 *%ptr2
+ %val3 = load i32 , i32 *%ptr3
+ %val4 = load i32 , i32 *%ptr4
+ %val5 = load i32 , i32 *%ptr5
+ %val6 = load i32 , i32 *%ptr6
+ %val7 = load i32 , i32 *%ptr7
+ %val8 = load i32 , i32 *%ptr8
+ %val9 = load i32 , i32 *%ptr9
%ret = call i32 @foo()
diff --git a/test/CodeGen/SystemZ/int-sub-02.ll b/test/CodeGen/SystemZ/int-sub-02.ll
index a1c5ec50ee9c..23be240b0c5b 100644
--- a/test/CodeGen/SystemZ/int-sub-02.ll
+++ b/test/CodeGen/SystemZ/int-sub-02.ll
@@ -19,7 +19,7 @@ define i64 @f2(i64 %a, i32 *%src) {
; CHECK-LABEL: f2:
; CHECK: sgf %r2, 0(%r3)
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%bext = sext i32 %b to i64
%sub = sub i64 %a, %bext
ret i64 %sub
@@ -30,8 +30,8 @@ define i64 @f3(i64 %a, i32 *%src) {
; CHECK-LABEL: f3:
; CHECK: sgf %r2, 524284(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131071
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%sub = sub i64 %a, %bext
ret i64 %sub
@@ -44,8 +44,8 @@ define i64 @f4(i64 %a, i32 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: sgf %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131072
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%sub = sub i64 %a, %bext
ret i64 %sub
@@ -56,8 +56,8 @@ define i64 @f5(i64 %a, i32 *%src) {
; CHECK-LABEL: f5:
; CHECK: sgf %r2, -4(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -1
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%sub = sub i64 %a, %bext
ret i64 %sub
@@ -68,8 +68,8 @@ define i64 @f6(i64 %a, i32 *%src) {
; CHECK-LABEL: f6:
; CHECK: sgf %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%sub = sub i64 %a, %bext
ret i64 %sub
@@ -82,8 +82,8 @@ define i64 @f7(i64 %a, i32 *%src) {
; CHECK: agfi %r3, -524292
; CHECK: sgf %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%sub = sub i64 %a, %bext
ret i64 %sub
@@ -97,7 +97,7 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524284
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%bext = sext i32 %b to i64
%sub = sub i64 %a, %bext
ret i64 %sub
@@ -109,26 +109,26 @@ define i64 @f9(i32 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: sgf %r2, 160(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i32 *%ptr0, i64 2
- %ptr2 = getelementptr i32 *%ptr0, i64 4
- %ptr3 = getelementptr i32 *%ptr0, i64 6
- %ptr4 = getelementptr i32 *%ptr0, i64 8
- %ptr5 = getelementptr i32 *%ptr0, i64 10
- %ptr6 = getelementptr i32 *%ptr0, i64 12
- %ptr7 = getelementptr i32 *%ptr0, i64 14
- %ptr8 = getelementptr i32 *%ptr0, i64 16
- %ptr9 = getelementptr i32 *%ptr0, i64 18
-
- %val0 = load i32 *%ptr0
- %val1 = load i32 *%ptr1
- %val2 = load i32 *%ptr2
- %val3 = load i32 *%ptr3
- %val4 = load i32 *%ptr4
- %val5 = load i32 *%ptr5
- %val6 = load i32 *%ptr6
- %val7 = load i32 *%ptr7
- %val8 = load i32 *%ptr8
- %val9 = load i32 *%ptr9
+ %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
+ %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
+ %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
+ %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
+ %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
+ %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
+ %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
+ %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
+ %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
+
+ %val0 = load i32 , i32 *%ptr0
+ %val1 = load i32 , i32 *%ptr1
+ %val2 = load i32 , i32 *%ptr2
+ %val3 = load i32 , i32 *%ptr3
+ %val4 = load i32 , i32 *%ptr4
+ %val5 = load i32 , i32 *%ptr5
+ %val6 = load i32 , i32 *%ptr6
+ %val7 = load i32 , i32 *%ptr7
+ %val8 = load i32 , i32 *%ptr8
+ %val9 = load i32 , i32 *%ptr9
%frob0 = add i32 %val0, 100
%frob1 = add i32 %val1, 100
diff --git a/test/CodeGen/SystemZ/int-sub-03.ll b/test/CodeGen/SystemZ/int-sub-03.ll
index 44edd84bda4f..9d5100644102 100644
--- a/test/CodeGen/SystemZ/int-sub-03.ll
+++ b/test/CodeGen/SystemZ/int-sub-03.ll
@@ -19,7 +19,7 @@ define i64 @f2(i64 %a, i32 *%src) {
; CHECK-LABEL: f2:
; CHECK: slgf %r2, 0(%r3)
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%bext = zext i32 %b to i64
%sub = sub i64 %a, %bext
ret i64 %sub
@@ -30,8 +30,8 @@ define i64 @f3(i64 %a, i32 *%src) {
; CHECK-LABEL: f3:
; CHECK: slgf %r2, 524284(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131071
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i64
%sub = sub i64 %a, %bext
ret i64 %sub
@@ -44,8 +44,8 @@ define i64 @f4(i64 %a, i32 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: slgf %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131072
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i64
%sub = sub i64 %a, %bext
ret i64 %sub
@@ -56,8 +56,8 @@ define i64 @f5(i64 %a, i32 *%src) {
; CHECK-LABEL: f5:
; CHECK: slgf %r2, -4(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -1
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i64
%sub = sub i64 %a, %bext
ret i64 %sub
@@ -68,8 +68,8 @@ define i64 @f6(i64 %a, i32 *%src) {
; CHECK-LABEL: f6:
; CHECK: slgf %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i64
%sub = sub i64 %a, %bext
ret i64 %sub
@@ -82,8 +82,8 @@ define i64 @f7(i64 %a, i32 *%src) {
; CHECK: agfi %r3, -524292
; CHECK: slgf %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i64
%sub = sub i64 %a, %bext
ret i64 %sub
@@ -97,7 +97,7 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524284
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i64
%sub = sub i64 %a, %bext
ret i64 %sub
@@ -109,26 +109,26 @@ define i64 @f9(i32 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: slgf %r2, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i32 *%ptr0, i64 2
- %ptr2 = getelementptr i32 *%ptr0, i64 4
- %ptr3 = getelementptr i32 *%ptr0, i64 6
- %ptr4 = getelementptr i32 *%ptr0, i64 8
- %ptr5 = getelementptr i32 *%ptr0, i64 10
- %ptr6 = getelementptr i32 *%ptr0, i64 12
- %ptr7 = getelementptr i32 *%ptr0, i64 14
- %ptr8 = getelementptr i32 *%ptr0, i64 16
- %ptr9 = getelementptr i32 *%ptr0, i64 18
-
- %val0 = load i32 *%ptr0
- %val1 = load i32 *%ptr1
- %val2 = load i32 *%ptr2
- %val3 = load i32 *%ptr3
- %val4 = load i32 *%ptr4
- %val5 = load i32 *%ptr5
- %val6 = load i32 *%ptr6
- %val7 = load i32 *%ptr7
- %val8 = load i32 *%ptr8
- %val9 = load i32 *%ptr9
+ %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
+ %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
+ %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
+ %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
+ %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
+ %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
+ %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
+ %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
+ %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
+
+ %val0 = load i32 , i32 *%ptr0
+ %val1 = load i32 , i32 *%ptr1
+ %val2 = load i32 , i32 *%ptr2
+ %val3 = load i32 , i32 *%ptr3
+ %val4 = load i32 , i32 *%ptr4
+ %val5 = load i32 , i32 *%ptr5
+ %val6 = load i32 , i32 *%ptr6
+ %val7 = load i32 , i32 *%ptr7
+ %val8 = load i32 , i32 *%ptr8
+ %val9 = load i32 , i32 *%ptr9
%frob0 = add i32 %val0, 100
%frob1 = add i32 %val1, 100
diff --git a/test/CodeGen/SystemZ/int-sub-04.ll b/test/CodeGen/SystemZ/int-sub-04.ll
index 85104536c5d7..ec2944d12709 100644
--- a/test/CodeGen/SystemZ/int-sub-04.ll
+++ b/test/CodeGen/SystemZ/int-sub-04.ll
@@ -19,7 +19,7 @@ define i64 @f2(i64 %a, i64 *%src) {
; CHECK-LABEL: f2:
; CHECK: sg %r2, 0(%r3)
; CHECK: br %r14
- %b = load i64 *%src
+ %b = load i64 , i64 *%src
%sub = sub i64 %a, %b
ret i64 %sub
}
@@ -29,8 +29,8 @@ define i64 @f3(i64 %a, i64 *%src) {
; CHECK-LABEL: f3:
; CHECK: sg %r2, 524280(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65535
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65535
+ %b = load i64 , i64 *%ptr
%sub = sub i64 %a, %b
ret i64 %sub
}
@@ -42,8 +42,8 @@ define i64 @f4(i64 %a, i64 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: sg %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65536
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65536
+ %b = load i64 , i64 *%ptr
%sub = sub i64 %a, %b
ret i64 %sub
}
@@ -53,8 +53,8 @@ define i64 @f5(i64 %a, i64 *%src) {
; CHECK-LABEL: f5:
; CHECK: sg %r2, -8(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -1
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -1
+ %b = load i64 , i64 *%ptr
%sub = sub i64 %a, %b
ret i64 %sub
}
@@ -64,8 +64,8 @@ define i64 @f6(i64 %a, i64 *%src) {
; CHECK-LABEL: f6:
; CHECK: sg %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65536
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65536
+ %b = load i64 , i64 *%ptr
%sub = sub i64 %a, %b
ret i64 %sub
}
@@ -77,8 +77,8 @@ define i64 @f7(i64 %a, i64 *%src) {
; CHECK: agfi %r3, -524296
; CHECK: sg %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65537
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65537
+ %b = load i64 , i64 *%ptr
%sub = sub i64 %a, %b
ret i64 %sub
}
@@ -91,7 +91,7 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524280
%ptr = inttoptr i64 %add2 to i64 *
- %b = load i64 *%ptr
+ %b = load i64 , i64 *%ptr
%sub = sub i64 %a, %b
ret i64 %sub
}
@@ -102,26 +102,26 @@ define i64 @f9(i64 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: sg %r2, 160(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i64 *%ptr0, i64 2
- %ptr2 = getelementptr i64 *%ptr0, i64 4
- %ptr3 = getelementptr i64 *%ptr0, i64 6
- %ptr4 = getelementptr i64 *%ptr0, i64 8
- %ptr5 = getelementptr i64 *%ptr0, i64 10
- %ptr6 = getelementptr i64 *%ptr0, i64 12
- %ptr7 = getelementptr i64 *%ptr0, i64 14
- %ptr8 = getelementptr i64 *%ptr0, i64 16
- %ptr9 = getelementptr i64 *%ptr0, i64 18
+ %ptr1 = getelementptr i64, i64 *%ptr0, i64 2
+ %ptr2 = getelementptr i64, i64 *%ptr0, i64 4
+ %ptr3 = getelementptr i64, i64 *%ptr0, i64 6
+ %ptr4 = getelementptr i64, i64 *%ptr0, i64 8
+ %ptr5 = getelementptr i64, i64 *%ptr0, i64 10
+ %ptr6 = getelementptr i64, i64 *%ptr0, i64 12
+ %ptr7 = getelementptr i64, i64 *%ptr0, i64 14
+ %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
+ %ptr9 = getelementptr i64, i64 *%ptr0, i64 18
- %val0 = load i64 *%ptr0
- %val1 = load i64 *%ptr1
- %val2 = load i64 *%ptr2
- %val3 = load i64 *%ptr3
- %val4 = load i64 *%ptr4
- %val5 = load i64 *%ptr5
- %val6 = load i64 *%ptr6
- %val7 = load i64 *%ptr7
- %val8 = load i64 *%ptr8
- %val9 = load i64 *%ptr9
+ %val0 = load i64 , i64 *%ptr0
+ %val1 = load i64 , i64 *%ptr1
+ %val2 = load i64 , i64 *%ptr2
+ %val3 = load i64 , i64 *%ptr3
+ %val4 = load i64 , i64 *%ptr4
+ %val5 = load i64 , i64 *%ptr5
+ %val6 = load i64 , i64 *%ptr6
+ %val7 = load i64 , i64 *%ptr7
+ %val8 = load i64 , i64 *%ptr8
+ %val9 = load i64 , i64 *%ptr9
%ret = call i64 @foo()
diff --git a/test/CodeGen/SystemZ/int-sub-05.ll b/test/CodeGen/SystemZ/int-sub-05.ll
index 85ea14cd15f3..9775298a7a2e 100644
--- a/test/CodeGen/SystemZ/int-sub-05.ll
+++ b/test/CodeGen/SystemZ/int-sub-05.ll
@@ -11,7 +11,7 @@ define void @f1(i128 *%ptr, i64 %high, i64 %low) {
; CHECK: slgr {{%r[0-5]}}, %r4
; CHECK: slbgr {{%r[0-5]}}, %r3
; CHECK: br %r14
- %a = load i128 *%ptr
+ %a = load i128 , i128 *%ptr
%highx = zext i64 %high to i128
%lowx = zext i64 %low to i128
%bhigh = shl i128 %highx, 64
@@ -28,9 +28,9 @@ define void @f2(i64 %addr) {
; CHECK: slbg {{%r[0-5]}}, 0(%r2)
; CHECK: br %r14
%bptr = inttoptr i64 %addr to i128 *
- %aptr = getelementptr i128 *%bptr, i64 -8
- %a = load i128 *%aptr
- %b = load i128 *%bptr
+ %aptr = getelementptr i128, i128 *%bptr, i64 -8
+ %a = load i128 , i128 *%aptr
+ %b = load i128 , i128 *%bptr
%sub = sub i128 %a, %b
store i128 %sub, i128 *%aptr
ret void
@@ -44,9 +44,9 @@ define void @f3(i64 %base) {
; CHECK: br %r14
%addr = add i64 %base, 524272
%bptr = inttoptr i64 %addr to i128 *
- %aptr = getelementptr i128 *%bptr, i64 -8
- %a = load i128 *%aptr
- %b = load i128 *%bptr
+ %aptr = getelementptr i128, i128 *%bptr, i64 -8
+ %a = load i128 , i128 *%aptr
+ %b = load i128 , i128 *%bptr
%sub = sub i128 %a, %b
store i128 %sub, i128 *%aptr
ret void
@@ -62,9 +62,9 @@ define void @f4(i64 %base) {
; CHECK: br %r14
%addr = add i64 %base, 524280
%bptr = inttoptr i64 %addr to i128 *
- %aptr = getelementptr i128 *%bptr, i64 -8
- %a = load i128 *%aptr
- %b = load i128 *%bptr
+ %aptr = getelementptr i128, i128 *%bptr, i64 -8
+ %a = load i128 , i128 *%aptr
+ %b = load i128 , i128 *%bptr
%sub = sub i128 %a, %b
store i128 %sub, i128 *%aptr
ret void
@@ -80,9 +80,9 @@ define void @f5(i64 %base) {
; CHECK: br %r14
%addr = add i64 %base, 524288
%bptr = inttoptr i64 %addr to i128 *
- %aptr = getelementptr i128 *%bptr, i64 -8
- %a = load i128 *%aptr
- %b = load i128 *%bptr
+ %aptr = getelementptr i128, i128 *%bptr, i64 -8
+ %a = load i128 , i128 *%aptr
+ %b = load i128 , i128 *%bptr
%sub = sub i128 %a, %b
store i128 %sub, i128 *%aptr
ret void
@@ -96,9 +96,9 @@ define void @f6(i64 %base) {
; CHECK: br %r14
%addr = add i64 %base, -524288
%bptr = inttoptr i64 %addr to i128 *
- %aptr = getelementptr i128 *%bptr, i64 -8
- %a = load i128 *%aptr
- %b = load i128 *%bptr
+ %aptr = getelementptr i128, i128 *%bptr, i64 -8
+ %a = load i128 , i128 *%aptr
+ %b = load i128 , i128 *%bptr
%sub = sub i128 %a, %b
store i128 %sub, i128 *%aptr
ret void
@@ -112,9 +112,9 @@ define void @f7(i64 %base) {
; CHECK: br %r14
%addr = add i64 %base, -524296
%bptr = inttoptr i64 %addr to i128 *
- %aptr = getelementptr i128 *%bptr, i64 -8
- %a = load i128 *%aptr
- %b = load i128 *%bptr
+ %aptr = getelementptr i128, i128 *%bptr, i64 -8
+ %a = load i128 , i128 *%aptr
+ %b = load i128 , i128 *%bptr
%sub = sub i128 %a, %b
store i128 %sub, i128 *%aptr
ret void
@@ -128,20 +128,20 @@ define void @f8(i128 *%ptr0) {
; CHECK: slg {{%r[0-9]+}}, {{[0-9]+}}(%r15)
; CHECK: slbg {{%r[0-9]+}}, {{[0-9]+}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i128 *%ptr0, i128 2
- %ptr2 = getelementptr i128 *%ptr0, i128 4
- %ptr3 = getelementptr i128 *%ptr0, i128 6
- %ptr4 = getelementptr i128 *%ptr0, i128 8
+ %ptr1 = getelementptr i128, i128 *%ptr0, i128 2
+ %ptr2 = getelementptr i128, i128 *%ptr0, i128 4
+ %ptr3 = getelementptr i128, i128 *%ptr0, i128 6
+ %ptr4 = getelementptr i128, i128 *%ptr0, i128 8
- %val0 = load i128 *%ptr0
- %val1 = load i128 *%ptr1
- %val2 = load i128 *%ptr2
- %val3 = load i128 *%ptr3
- %val4 = load i128 *%ptr4
+ %val0 = load i128 , i128 *%ptr0
+ %val1 = load i128 , i128 *%ptr1
+ %val2 = load i128 , i128 *%ptr2
+ %val3 = load i128 , i128 *%ptr3
+ %val4 = load i128 , i128 *%ptr4
%retptr = call i128 *@foo()
- %ret = load i128 *%retptr
+ %ret = load i128 , i128 *%retptr
%sub0 = sub i128 %ret, %val0
%sub1 = sub i128 %sub0, %val1
%sub2 = sub i128 %sub1, %val2
diff --git a/test/CodeGen/SystemZ/int-sub-06.ll b/test/CodeGen/SystemZ/int-sub-06.ll
index 395d584b23de..c26383e9df03 100644
--- a/test/CodeGen/SystemZ/int-sub-06.ll
+++ b/test/CodeGen/SystemZ/int-sub-06.ll
@@ -9,7 +9,7 @@ define void @f1(i128 *%aptr, i32 %b) {
; CHECK: slgfr {{%r[0-5]}}, %r3
; CHECK: slbgr
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 127
%bext = zext i32 %b to i128
%sub = sub i128 %xor, %bext
@@ -23,7 +23,7 @@ define void @f2(i128 *%aptr, i64 %b) {
; CHECK: slgfr {{%r[0-5]}}, %r3
; CHECK: slbgr
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 127
%trunc = trunc i64 %b to i32
%bext = zext i32 %trunc to i128
@@ -39,7 +39,7 @@ define void @f3(i128 *%aptr, i64 %b) {
; CHECK: slgfr {{%r[0-5]}}, %r3
; CHECK: slbgr
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 127
%bext = zext i64 %b to i128
%and = and i128 %bext, 4294967295
@@ -54,9 +54,9 @@ define void @f4(i128 *%aptr, i32 *%bsrc) {
; CHECK: slgf {{%r[0-5]}}, 0(%r3)
; CHECK: slbgr
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 127
- %b = load i32 *%bsrc
+ %b = load i32 , i32 *%bsrc
%bext = zext i32 %b to i128
%sub = sub i128 %xor, %bext
store i128 %sub, i128 *%aptr
@@ -69,10 +69,10 @@ define void @f5(i128 *%aptr, i32 *%bsrc) {
; CHECK: slgf {{%r[0-5]}}, 524284(%r3)
; CHECK: slbgr
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 127
- %ptr = getelementptr i32 *%bsrc, i64 131071
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%bsrc, i64 131071
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i128
%sub = sub i128 %xor, %bext
store i128 %sub, i128 *%aptr
@@ -87,10 +87,10 @@ define void @f6(i128 *%aptr, i32 *%bsrc) {
; CHECK: slgf {{%r[0-5]}}, 0(%r3)
; CHECK: slbgr
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 127
- %ptr = getelementptr i32 *%bsrc, i64 131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%bsrc, i64 131072
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i128
%sub = sub i128 %xor, %bext
store i128 %sub, i128 *%aptr
@@ -103,10 +103,10 @@ define void @f7(i128 *%aptr, i32 *%bsrc) {
; CHECK: slgf {{%r[0-5]}}, -4(%r3)
; CHECK: slbgr
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 127
- %ptr = getelementptr i32 *%bsrc, i128 -1
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%bsrc, i128 -1
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i128
%sub = sub i128 %xor, %bext
store i128 %sub, i128 *%aptr
@@ -119,10 +119,10 @@ define void @f8(i128 *%aptr, i32 *%bsrc) {
; CHECK: slgf {{%r[0-5]}}, -524288(%r3)
; CHECK: slbgr
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 127
- %ptr = getelementptr i32 *%bsrc, i128 -131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%bsrc, i128 -131072
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i128
%sub = sub i128 %xor, %bext
store i128 %sub, i128 *%aptr
@@ -137,10 +137,10 @@ define void @f9(i128 *%aptr, i32 *%bsrc) {
; CHECK: slgf {{%r[0-5]}}, 0(%r3)
; CHECK: slbgr
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 127
- %ptr = getelementptr i32 *%bsrc, i128 -131073
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%bsrc, i128 -131073
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i128
%sub = sub i128 %xor, %bext
store i128 %sub, i128 *%aptr
@@ -152,12 +152,12 @@ define void @f10(i128 *%aptr, i64 %src, i64 %index) {
; CHECK-LABEL: f10:
; CHECK: slgf {{%r[0-5]}}, 524284({{%r4,%r3|%r3,%r4}})
; CHECK: br %r14
- %a = load i128 *%aptr
+ %a = load i128 , i128 *%aptr
%xor = xor i128 %a, 127
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524284
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%bext = zext i32 %b to i128
%sub = sub i128 %xor, %bext
store i128 %sub, i128 *%aptr
diff --git a/test/CodeGen/SystemZ/int-sub-07.ll b/test/CodeGen/SystemZ/int-sub-07.ll
index 5c1f42c1cc96..1d54fd6714cf 100644
--- a/test/CodeGen/SystemZ/int-sub-07.ll
+++ b/test/CodeGen/SystemZ/int-sub-07.ll
@@ -8,7 +8,7 @@ define i32 @f1(i32 %lhs, i16 *%src) {
; CHECK-LABEL: f1:
; CHECK: sh %r2, 0(%r3)
; CHECK: br %r14
- %half = load i16 *%src
+ %half = load i16 , i16 *%src
%rhs = sext i16 %half to i32
%res = sub i32 %lhs, %rhs
ret i32 %res
@@ -19,8 +19,8 @@ define i32 @f2(i32 %lhs, i16 *%src) {
; CHECK-LABEL: f2:
; CHECK: sh %r2, 4094(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 2047
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 2047
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = sub i32 %lhs, %rhs
ret i32 %res
@@ -31,8 +31,8 @@ define i32 @f3(i32 %lhs, i16 *%src) {
; CHECK-LABEL: f3:
; CHECK: shy %r2, 4096(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 2048
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 2048
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = sub i32 %lhs, %rhs
ret i32 %res
@@ -43,8 +43,8 @@ define i32 @f4(i32 %lhs, i16 *%src) {
; CHECK-LABEL: f4:
; CHECK: shy %r2, 524286(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 262143
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 262143
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = sub i32 %lhs, %rhs
ret i32 %res
@@ -57,8 +57,8 @@ define i32 @f5(i32 %lhs, i16 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: sh %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 262144
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 262144
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = sub i32 %lhs, %rhs
ret i32 %res
@@ -69,8 +69,8 @@ define i32 @f6(i32 %lhs, i16 *%src) {
; CHECK-LABEL: f6:
; CHECK: shy %r2, -2(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -1
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -1
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = sub i32 %lhs, %rhs
ret i32 %res
@@ -81,8 +81,8 @@ define i32 @f7(i32 %lhs, i16 *%src) {
; CHECK-LABEL: f7:
; CHECK: shy %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -262144
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -262144
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = sub i32 %lhs, %rhs
ret i32 %res
@@ -95,8 +95,8 @@ define i32 @f8(i32 %lhs, i16 *%src) {
; CHECK: agfi %r3, -524290
; CHECK: sh %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i16 *%src, i64 -262145
- %half = load i16 *%ptr
+ %ptr = getelementptr i16, i16 *%src, i64 -262145
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = sub i32 %lhs, %rhs
ret i32 %res
@@ -110,7 +110,7 @@ define i32 @f9(i32 %lhs, i64 %src, i64 %index) {
%sub1 = add i64 %src, %index
%sub2 = add i64 %sub1, 4094
%ptr = inttoptr i64 %sub2 to i16 *
- %half = load i16 *%ptr
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = sub i32 %lhs, %rhs
ret i32 %res
@@ -124,7 +124,7 @@ define i32 @f10(i32 %lhs, i64 %src, i64 %index) {
%sub1 = add i64 %src, %index
%sub2 = add i64 %sub1, 4096
%ptr = inttoptr i64 %sub2 to i16 *
- %half = load i16 *%ptr
+ %half = load i16 , i16 *%ptr
%rhs = sext i16 %half to i32
%res = sub i32 %lhs, %rhs
ret i32 %res
diff --git a/test/CodeGen/SystemZ/loop-01.ll b/test/CodeGen/SystemZ/loop-01.ll
index 580080173563..b51c96d52e3a 100644
--- a/test/CodeGen/SystemZ/loop-01.ll
+++ b/test/CodeGen/SystemZ/loop-01.ll
@@ -14,7 +14,7 @@ entry:
loop:
%index = phi i64 [ 0, %entry ], [ %next, %loop ]
- %ptr = getelementptr i32 *%dest, i64 %index
+ %ptr = getelementptr i32, i32 *%dest, i64 %index
store i32 %a, i32 *%ptr
%next = add i64 %index, 1
%cmp = icmp ne i64 %next, 100
@@ -37,7 +37,7 @@ entry:
loop:
%count = phi i32 [ 0, %entry ], [ %next, %loop.next ]
%next = add i32 %count, 1
- %val = load volatile i32 *%src
+ %val = load volatile i32 , i32 *%src
%cmp = icmp eq i32 %val, 0
br i1 %cmp, label %loop.next, label %loop.store
@@ -67,7 +67,7 @@ entry:
loop:
%count = phi i64 [ 0, %entry ], [ %next, %loop.next ]
%next = add i64 %count, 1
- %val = load volatile i64 *%src
+ %val = load volatile i64 , i64 *%src
%cmp = icmp eq i64 %val, 0
br i1 %cmp, label %loop.next, label %loop.store
@@ -100,7 +100,7 @@ entry:
loop:
%left = phi i64 [ %count, %entry ], [ %next, %loop.next ]
store volatile i64 %left, i64 *%dest2
- %val = load volatile i32 *%src
+ %val = load volatile i32 , i32 *%src
%cmp = icmp eq i32 %val, 0
br i1 %cmp, label %loop.next, label %loop.store
diff --git a/test/CodeGen/SystemZ/memchr-02.ll b/test/CodeGen/SystemZ/memchr-02.ll
index 8986627a6057..71b2cf02b352 100644
--- a/test/CodeGen/SystemZ/memchr-02.ll
+++ b/test/CodeGen/SystemZ/memchr-02.ll
@@ -29,7 +29,7 @@ define i8 *@f2(i8 *%src, i8 *%charptr, i64 %len) {
; CHECK-NOT: %r0
; CHECK: srst %r2, [[RES1]]
; CHECK: br %r14
- %char = load volatile i8 *%charptr
+ %char = load volatile i8 , i8 *%charptr
%charext = zext i8 %char to i32
%res1 = call i8 *@memchr(i8 *%src, i32 %charext, i64 %len)
%res2 = call i8 *@memchr(i8 *%res1, i32 %charext, i64 %len)
@@ -48,7 +48,7 @@ define i8 *@f3(i8 *%src, i8 *%charptr, i64 %len) {
; CHECK: lr %r0, [[CHAR]]
; CHECK: srst %r2, [[RES1]]
; CHECK: br %r14
- %char = load volatile i8 *%charptr
+ %char = load volatile i8 , i8 *%charptr
%charext = zext i8 %char to i32
%res1 = call i8 *@memchr(i8 *%src, i32 %charext, i64 %len)
call void asm sideeffect "blah $0", "{r0}" (i32 0)
diff --git a/test/CodeGen/SystemZ/memcpy-01.ll b/test/CodeGen/SystemZ/memcpy-01.ll
index b53ec5452e25..1d7b28e940b9 100644
--- a/test/CodeGen/SystemZ/memcpy-01.ll
+++ b/test/CodeGen/SystemZ/memcpy-01.ll
@@ -126,8 +126,8 @@ define void @f11(i8 *%srcbase, i8 *%destbase) {
; CHECK: mvc 512(256,[[NEWDEST]]), 0([[NEWSRC]])
; CHECK: mvc 768(255,[[NEWDEST]]), 256([[NEWSRC]])
; CHECK: br %r14
- %dest = getelementptr i8 *%srcbase, i64 4000
- %src = getelementptr i8* %destbase, i64 3500
+ %dest = getelementptr i8, i8 *%srcbase, i64 4000
+ %src = getelementptr i8, i8* %destbase, i64 3500
call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
i1 false)
ret void
@@ -146,8 +146,8 @@ define void @f12() {
; CHECK: brasl %r14, foo@PLT
; CHECK: br %r14
%arr = alloca [6000 x i8]
- %dest = getelementptr [6000 x i8] *%arr, i64 0, i64 3900
- %src = getelementptr [6000 x i8] *%arr, i64 0, i64 1924
+ %dest = getelementptr [6000 x i8], [6000 x i8] *%arr, i64 0, i64 3900
+ %src = getelementptr [6000 x i8], [6000 x i8] *%arr, i64 0, i64 1924
call void @foo(i8 *%dest, i8 *%src)
call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
i1 false)
@@ -168,8 +168,8 @@ define void @f13() {
; CHECK: brasl %r14, foo@PLT
; CHECK: br %r14
%arr = alloca [6000 x i8]
- %dest = getelementptr [6000 x i8] *%arr, i64 0, i64 24
- %src = getelementptr [6000 x i8] *%arr, i64 0, i64 3650
+ %dest = getelementptr [6000 x i8], [6000 x i8] *%arr, i64 0, i64 24
+ %src = getelementptr [6000 x i8], [6000 x i8] *%arr, i64 0, i64 3650
call void @foo(i8 *%dest, i8 *%src)
call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
i1 false)
@@ -225,8 +225,8 @@ define void @f16() {
; CHECK: brasl %r14, foo@PLT
; CHECK: br %r14
%arr = alloca [3200 x i8]
- %dest = getelementptr [3200 x i8] *%arr, i64 0, i64 1600
- %src = getelementptr [3200 x i8] *%arr, i64 0, i64 0
+ %dest = getelementptr [3200 x i8], [3200 x i8] *%arr, i64 0, i64 1600
+ %src = getelementptr [3200 x i8], [3200 x i8] *%arr, i64 0, i64 0
call void @foo(i8 *%dest, i8 *%src)
call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1537, i32 1,
i1 false)
diff --git a/test/CodeGen/SystemZ/memcpy-02.ll b/test/CodeGen/SystemZ/memcpy-02.ll
index 776cfee50be9..df44502a8098 100644
--- a/test/CodeGen/SystemZ/memcpy-02.ll
+++ b/test/CodeGen/SystemZ/memcpy-02.ll
@@ -16,8 +16,8 @@ define void @f1(i8 *%ptr1) {
; CHECK-LABEL: f1:
; CHECK: mvc 1(1,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i8 *%ptr1, i64 1
- %val = load i8 *%ptr1
+ %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
+ %val = load i8 , i8 *%ptr1
store i8 %val, i8 *%ptr2
ret void
}
@@ -27,8 +27,8 @@ define void @f2(i8 *%ptr1) {
; CHECK-LABEL: f2:
; CHECK: mvc 1(1,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i8 *%ptr1, i64 1
- %val = load i8 *%ptr1
+ %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
+ %val = load i8 , i8 *%ptr1
%ext = zext i8 %val to i32
%trunc = trunc i32 %ext to i8
store i8 %trunc, i8 *%ptr2
@@ -40,8 +40,8 @@ define void @f3(i8 *%ptr1) {
; CHECK-LABEL: f3:
; CHECK: mvc 1(1,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i8 *%ptr1, i64 1
- %val = load i8 *%ptr1
+ %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
+ %val = load i8 , i8 *%ptr1
%ext = zext i8 %val to i64
%trunc = trunc i64 %ext to i8
store i8 %trunc, i8 *%ptr2
@@ -53,8 +53,8 @@ define void @f4(i8 *%ptr1) {
; CHECK-LABEL: f4:
; CHECK: mvc 1(1,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i8 *%ptr1, i64 1
- %val = load i8 *%ptr1
+ %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
+ %val = load i8 , i8 *%ptr1
%ext = sext i8 %val to i32
%trunc = trunc i32 %ext to i8
store i8 %trunc, i8 *%ptr2
@@ -66,8 +66,8 @@ define void @f5(i8 *%ptr1) {
; CHECK-LABEL: f5:
; CHECK: mvc 1(1,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i8 *%ptr1, i64 1
- %val = load i8 *%ptr1
+ %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
+ %val = load i8 , i8 *%ptr1
%ext = sext i8 %val to i64
%trunc = trunc i64 %ext to i8
store i8 %trunc, i8 *%ptr2
@@ -79,8 +79,8 @@ define void @f6(i16 *%ptr1) {
; CHECK-LABEL: f6:
; CHECK: mvc 2(2,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i16 *%ptr1, i64 1
- %val = load i16 *%ptr1
+ %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
+ %val = load i16 , i16 *%ptr1
store i16 %val, i16 *%ptr2
ret void
}
@@ -90,8 +90,8 @@ define void @f7(i16 *%ptr1) {
; CHECK-LABEL: f7:
; CHECK: mvc 2(2,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i16 *%ptr1, i64 1
- %val = load i16 *%ptr1
+ %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
+ %val = load i16 , i16 *%ptr1
%ext = zext i16 %val to i32
%trunc = trunc i32 %ext to i16
store i16 %trunc, i16 *%ptr2
@@ -103,8 +103,8 @@ define void @f8(i16 *%ptr1) {
; CHECK-LABEL: f8:
; CHECK: mvc 2(2,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i16 *%ptr1, i64 1
- %val = load i16 *%ptr1
+ %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
+ %val = load i16 , i16 *%ptr1
%ext = zext i16 %val to i64
%trunc = trunc i64 %ext to i16
store i16 %trunc, i16 *%ptr2
@@ -116,8 +116,8 @@ define void @f9(i16 *%ptr1) {
; CHECK-LABEL: f9:
; CHECK: mvc 2(2,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i16 *%ptr1, i64 1
- %val = load i16 *%ptr1
+ %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
+ %val = load i16 , i16 *%ptr1
%ext = sext i16 %val to i32
%trunc = trunc i32 %ext to i16
store i16 %trunc, i16 *%ptr2
@@ -129,8 +129,8 @@ define void @f10(i16 *%ptr1) {
; CHECK-LABEL: f10:
; CHECK: mvc 2(2,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i16 *%ptr1, i64 1
- %val = load i16 *%ptr1
+ %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
+ %val = load i16 , i16 *%ptr1
%ext = sext i16 %val to i64
%trunc = trunc i64 %ext to i16
store i16 %trunc, i16 *%ptr2
@@ -142,8 +142,8 @@ define void @f11(i32 *%ptr1) {
; CHECK-LABEL: f11:
; CHECK: mvc 4(4,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i32 *%ptr1, i64 1
- %val = load i32 *%ptr1
+ %ptr2 = getelementptr i32, i32 *%ptr1, i64 1
+ %val = load i32 , i32 *%ptr1
store i32 %val, i32 *%ptr2
ret void
}
@@ -153,8 +153,8 @@ define void @f12(i32 *%ptr1) {
; CHECK-LABEL: f12:
; CHECK: mvc 4(4,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i32 *%ptr1, i64 1
- %val = load i32 *%ptr1
+ %ptr2 = getelementptr i32, i32 *%ptr1, i64 1
+ %val = load i32 , i32 *%ptr1
%ext = zext i32 %val to i64
%trunc = trunc i64 %ext to i32
store i32 %trunc, i32 *%ptr2
@@ -166,8 +166,8 @@ define void @f13(i32 *%ptr1) {
; CHECK-LABEL: f13:
; CHECK: mvc 4(4,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i32 *%ptr1, i64 1
- %val = load i32 *%ptr1
+ %ptr2 = getelementptr i32, i32 *%ptr1, i64 1
+ %val = load i32 , i32 *%ptr1
%ext = sext i32 %val to i64
%trunc = trunc i64 %ext to i32
store i32 %trunc, i32 *%ptr2
@@ -179,8 +179,8 @@ define void @f14(i64 *%ptr1) {
; CHECK-LABEL: f14:
; CHECK: mvc 8(8,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i64 *%ptr1, i64 1
- %val = load i64 *%ptr1
+ %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
+ %val = load i64 , i64 *%ptr1
store i64 %val, i64 *%ptr2
ret void
}
@@ -190,8 +190,8 @@ define void @f15(float *%ptr1) {
; CHECK-LABEL: f15:
; CHECK: mvc 4(4,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr float *%ptr1, i64 1
- %val = load float *%ptr1
+ %ptr2 = getelementptr float, float *%ptr1, i64 1
+ %val = load float , float *%ptr1
store float %val, float *%ptr2
ret void
}
@@ -201,8 +201,8 @@ define void @f16(double *%ptr1) {
; CHECK-LABEL: f16:
; CHECK: mvc 8(8,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr double *%ptr1, i64 1
- %val = load double *%ptr1
+ %ptr2 = getelementptr double, double *%ptr1, i64 1
+ %val = load double , double *%ptr1
store double %val, double *%ptr2
ret void
}
@@ -212,8 +212,8 @@ define void @f17(fp128 *%ptr1) {
; CHECK-LABEL: f17:
; CHECK: mvc 16(16,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr fp128 *%ptr1, i64 1
- %val = load fp128 *%ptr1
+ %ptr2 = getelementptr fp128, fp128 *%ptr1, i64 1
+ %val = load fp128 , fp128 *%ptr1
store fp128 %val, fp128 *%ptr2
ret void
}
@@ -223,8 +223,8 @@ define void @f18(i64 *%ptr1) {
; CHECK-LABEL: f18:
; CHECK-NOT: mvc
; CHECK: br %r14
- %ptr2 = getelementptr i64 *%ptr1, i64 1
- %val = load volatile i64 *%ptr1
+ %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
+ %val = load volatile i64 , i64 *%ptr1
store i64 %val, i64 *%ptr2
ret void
}
@@ -234,8 +234,8 @@ define void @f19(i64 *%ptr1) {
; CHECK-LABEL: f19:
; CHECK-NOT: mvc
; CHECK: br %r14
- %ptr2 = getelementptr i64 *%ptr1, i64 1
- %val = load i64 *%ptr1
+ %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
+ %val = load i64 , i64 *%ptr1
store volatile i64 %val, i64 *%ptr2
ret void
}
@@ -247,7 +247,7 @@ define void @f20(i64 *%ptr1, i64 *%ptr2) {
; CHECK-LABEL: f20:
; CHECK-NOT: mvc
; CHECK: br %r14
- %val = load i64 *%ptr1
+ %val = load i64 , i64 *%ptr1
store i64 %val, i64 *%ptr2
ret void
}
@@ -257,7 +257,7 @@ define void @f21(i64 *%ptr1, i64 *%ptr2) {
; CHECK-LABEL: f21:
; CHECK-NOT: mvc
; CHECK: br %r14
- %val = load i64 *%ptr1, align 2
+ %val = load i64 , i64 *%ptr1, align 2
store i64 %val, i64 *%ptr2, align 2
ret void
}
@@ -270,7 +270,7 @@ define void @f22(i64 %base) {
%add = add i64 %base, 1
%ptr1 = inttoptr i64 %base to i64 *
%ptr2 = inttoptr i64 %add to i64 *
- %val = load i64 *%ptr1, align 1
+ %val = load i64 , i64 *%ptr1, align 1
store i64 %val, i64 *%ptr2, align 1
ret void
}
@@ -282,7 +282,7 @@ define void @f23(i8 *%ptr) {
; CHECK-DAG: larl [[DST:%r[0-5]]], g1dst
; CHECK: mvc 0(1,[[DST]]), 0([[SRC]])
; CHECK: br %r14
- %val = load i8 *@g1src
+ %val = load i8 , i8 *@g1src
store i8 %val, i8 *@g1dst
ret void
}
@@ -293,7 +293,7 @@ define void @f24(i16 *%ptr) {
; CHECK: lhrl [[REG:%r[0-5]]], g2src
; CHECK: sthrl [[REG]], g2dst
; CHECK: br %r14
- %val = load i16 *@g2src
+ %val = load i16 , i16 *@g2src
store i16 %val, i16 *@g2dst
ret void
}
@@ -304,7 +304,7 @@ define void @f25(i32 *%ptr) {
; CHECK: lrl [[REG:%r[0-5]]], g3
; CHECK: st [[REG]], 0(%r2)
; CHECK: br %r14
- %val = load i32 *@g3
+ %val = load i32 , i32 *@g3
store i32 %val, i32 *%ptr
ret void
}
@@ -315,7 +315,7 @@ define void @f26(i32 *%ptr) {
; CHECK: l [[REG:%r[0-5]]], 0(%r2)
; CHECK: strl [[REG]], g3
; CHECK: br %r14
- %val = load i32 *%ptr
+ %val = load i32 , i32 *%ptr
store i32 %val, i32 *@g3
ret void
}
@@ -326,7 +326,7 @@ define void @f27(i64 *%ptr) {
; CHECK: lgrl [[REG:%r[0-5]]], g4
; CHECK: stg [[REG]], 0(%r2)
; CHECK: br %r14
- %val = load i64 *@g4
+ %val = load i64 , i64 *@g4
store i64 %val, i64 *%ptr
ret void
}
@@ -337,7 +337,7 @@ define void @f28(i64 *%ptr) {
; CHECK: lg [[REG:%r[0-5]]], 0(%r2)
; CHECK: stgrl [[REG]], g4
; CHECK: br %r14
- %val = load i64 *%ptr
+ %val = load i64 , i64 *%ptr
store i64 %val, i64 *@g4
ret void
}
@@ -349,7 +349,7 @@ define void @f29(fp128 *%ptr) {
; CHECK-DAG: larl [[DST:%r[0-5]]], g5dst
; CHECK: mvc 0(16,[[DST]]), 0([[SRC]])
; CHECK: br %r14
- %val = load fp128 *@g5src, align 16
+ %val = load fp128 , fp128 *@g5src, align 16
store fp128 %val, fp128 *@g5dst, align 16
ret void
}
@@ -359,8 +359,8 @@ define void @f30(i64 *%ptr1) {
; CHECK-LABEL: f30:
; CHECK: mvc 8(8,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i64 *%ptr1, i64 1
- %val = load i64 *%ptr1, align 1
+ %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
+ %val = load i64 , i64 *%ptr1, align 1
store i64 %val, i64 *%ptr2, align 1
ret void
}
@@ -370,7 +370,7 @@ define void @f31(i64 *%ptr1, i64 *%ptr2) {
; CHECK-LABEL: f31:
; CHECK: mvc 0(8,%r3), 0(%r2)
; CHECK: br %r14
- %val = load i64 *%ptr1, align 2, !tbaa !1
+ %val = load i64 , i64 *%ptr1, align 2, !tbaa !1
store i64 %val, i64 *%ptr2, align 2, !tbaa !2
ret void
}
@@ -380,7 +380,7 @@ define void @f32(i64 *%ptr1, i64 *%ptr2) {
; CHECK-LABEL: f32:
; CHECK-NOT: mvc
; CHECK: br %r14
- %val = load i64 *%ptr1, align 2, !tbaa !1
+ %val = load i64 , i64 *%ptr1, align 2, !tbaa !1
store i64 %val, i64 *%ptr2, align 2, !tbaa !1
ret void
}
diff --git a/test/CodeGen/SystemZ/or-01.ll b/test/CodeGen/SystemZ/or-01.ll
index 23946d320678..ce556ef85a1b 100644
--- a/test/CodeGen/SystemZ/or-01.ll
+++ b/test/CodeGen/SystemZ/or-01.ll
@@ -19,7 +19,7 @@ define i32 @f2(i32 %a, i32 *%src) {
; CHECK-LABEL: f2:
; CHECK: o %r2, 0(%r3)
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%or = or i32 %a, %b
ret i32 %or
}
@@ -29,8 +29,8 @@ define i32 @f3(i32 %a, i32 *%src) {
; CHECK-LABEL: f3:
; CHECK: o %r2, 4092(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 1023
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 1023
+ %b = load i32 , i32 *%ptr
%or = or i32 %a, %b
ret i32 %or
}
@@ -40,8 +40,8 @@ define i32 @f4(i32 %a, i32 *%src) {
; CHECK-LABEL: f4:
; CHECK: oy %r2, 4096(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 1024
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 1024
+ %b = load i32 , i32 *%ptr
%or = or i32 %a, %b
ret i32 %or
}
@@ -51,8 +51,8 @@ define i32 @f5(i32 %a, i32 *%src) {
; CHECK-LABEL: f5:
; CHECK: oy %r2, 524284(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131071
+ %b = load i32 , i32 *%ptr
%or = or i32 %a, %b
ret i32 %or
}
@@ -64,8 +64,8 @@ define i32 @f6(i32 %a, i32 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: o %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131072
+ %b = load i32 , i32 *%ptr
%or = or i32 %a, %b
ret i32 %or
}
@@ -75,8 +75,8 @@ define i32 @f7(i32 %a, i32 *%src) {
; CHECK-LABEL: f7:
; CHECK: oy %r2, -4(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -1
+ %b = load i32 , i32 *%ptr
%or = or i32 %a, %b
ret i32 %or
}
@@ -86,8 +86,8 @@ define i32 @f8(i32 %a, i32 *%src) {
; CHECK-LABEL: f8:
; CHECK: oy %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
+ %b = load i32 , i32 *%ptr
%or = or i32 %a, %b
ret i32 %or
}
@@ -99,8 +99,8 @@ define i32 @f9(i32 %a, i32 *%src) {
; CHECK: agfi %r3, -524292
; CHECK: o %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
+ %b = load i32 , i32 *%ptr
%or = or i32 %a, %b
ret i32 %or
}
@@ -113,7 +113,7 @@ define i32 @f10(i32 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4092
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%or = or i32 %a, %b
ret i32 %or
}
@@ -126,7 +126,7 @@ define i32 @f11(i32 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%or = or i32 %a, %b
ret i32 %or
}
@@ -137,26 +137,26 @@ define i32 @f12(i32 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: o %r2, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i32 *%ptr0, i64 2
- %ptr2 = getelementptr i32 *%ptr0, i64 4
- %ptr3 = getelementptr i32 *%ptr0, i64 6
- %ptr4 = getelementptr i32 *%ptr0, i64 8
- %ptr5 = getelementptr i32 *%ptr0, i64 10
- %ptr6 = getelementptr i32 *%ptr0, i64 12
- %ptr7 = getelementptr i32 *%ptr0, i64 14
- %ptr8 = getelementptr i32 *%ptr0, i64 16
- %ptr9 = getelementptr i32 *%ptr0, i64 18
-
- %val0 = load i32 *%ptr0
- %val1 = load i32 *%ptr1
- %val2 = load i32 *%ptr2
- %val3 = load i32 *%ptr3
- %val4 = load i32 *%ptr4
- %val5 = load i32 *%ptr5
- %val6 = load i32 *%ptr6
- %val7 = load i32 *%ptr7
- %val8 = load i32 *%ptr8
- %val9 = load i32 *%ptr9
+ %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
+ %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
+ %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
+ %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
+ %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
+ %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
+ %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
+ %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
+ %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
+
+ %val0 = load i32 , i32 *%ptr0
+ %val1 = load i32 , i32 *%ptr1
+ %val2 = load i32 , i32 *%ptr2
+ %val3 = load i32 , i32 *%ptr3
+ %val4 = load i32 , i32 *%ptr4
+ %val5 = load i32 , i32 *%ptr5
+ %val6 = load i32 , i32 *%ptr6
+ %val7 = load i32 , i32 *%ptr7
+ %val8 = load i32 , i32 *%ptr8
+ %val9 = load i32 , i32 *%ptr9
%ret = call i32 @foo()
diff --git a/test/CodeGen/SystemZ/or-03.ll b/test/CodeGen/SystemZ/or-03.ll
index 5fdbdfd1ed1f..f29953796b06 100644
--- a/test/CodeGen/SystemZ/or-03.ll
+++ b/test/CodeGen/SystemZ/or-03.ll
@@ -19,7 +19,7 @@ define i64 @f2(i64 %a, i64 *%src) {
; CHECK-LABEL: f2:
; CHECK: og %r2, 0(%r3)
; CHECK: br %r14
- %b = load i64 *%src
+ %b = load i64 , i64 *%src
%or = or i64 %a, %b
ret i64 %or
}
@@ -29,8 +29,8 @@ define i64 @f3(i64 %a, i64 *%src) {
; CHECK-LABEL: f3:
; CHECK: og %r2, 524280(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65535
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65535
+ %b = load i64 , i64 *%ptr
%or = or i64 %a, %b
ret i64 %or
}
@@ -42,8 +42,8 @@ define i64 @f4(i64 %a, i64 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: og %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65536
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65536
+ %b = load i64 , i64 *%ptr
%or = or i64 %a, %b
ret i64 %or
}
@@ -53,8 +53,8 @@ define i64 @f5(i64 %a, i64 *%src) {
; CHECK-LABEL: f5:
; CHECK: og %r2, -8(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -1
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -1
+ %b = load i64 , i64 *%ptr
%or = or i64 %a, %b
ret i64 %or
}
@@ -64,8 +64,8 @@ define i64 @f6(i64 %a, i64 *%src) {
; CHECK-LABEL: f6:
; CHECK: og %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65536
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65536
+ %b = load i64 , i64 *%ptr
%or = or i64 %a, %b
ret i64 %or
}
@@ -77,8 +77,8 @@ define i64 @f7(i64 %a, i64 *%src) {
; CHECK: agfi %r3, -524296
; CHECK: og %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65537
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65537
+ %b = load i64 , i64 *%ptr
%or = or i64 %a, %b
ret i64 %or
}
@@ -91,7 +91,7 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524280
%ptr = inttoptr i64 %add2 to i64 *
- %b = load i64 *%ptr
+ %b = load i64 , i64 *%ptr
%or = or i64 %a, %b
ret i64 %or
}
@@ -102,26 +102,26 @@ define i64 @f9(i64 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: og %r2, 160(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i64 *%ptr0, i64 2
- %ptr2 = getelementptr i64 *%ptr0, i64 4
- %ptr3 = getelementptr i64 *%ptr0, i64 6
- %ptr4 = getelementptr i64 *%ptr0, i64 8
- %ptr5 = getelementptr i64 *%ptr0, i64 10
- %ptr6 = getelementptr i64 *%ptr0, i64 12
- %ptr7 = getelementptr i64 *%ptr0, i64 14
- %ptr8 = getelementptr i64 *%ptr0, i64 16
- %ptr9 = getelementptr i64 *%ptr0, i64 18
+ %ptr1 = getelementptr i64, i64 *%ptr0, i64 2
+ %ptr2 = getelementptr i64, i64 *%ptr0, i64 4
+ %ptr3 = getelementptr i64, i64 *%ptr0, i64 6
+ %ptr4 = getelementptr i64, i64 *%ptr0, i64 8
+ %ptr5 = getelementptr i64, i64 *%ptr0, i64 10
+ %ptr6 = getelementptr i64, i64 *%ptr0, i64 12
+ %ptr7 = getelementptr i64, i64 *%ptr0, i64 14
+ %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
+ %ptr9 = getelementptr i64, i64 *%ptr0, i64 18
- %val0 = load i64 *%ptr0
- %val1 = load i64 *%ptr1
- %val2 = load i64 *%ptr2
- %val3 = load i64 *%ptr3
- %val4 = load i64 *%ptr4
- %val5 = load i64 *%ptr5
- %val6 = load i64 *%ptr6
- %val7 = load i64 *%ptr7
- %val8 = load i64 *%ptr8
- %val9 = load i64 *%ptr9
+ %val0 = load i64 , i64 *%ptr0
+ %val1 = load i64 , i64 *%ptr1
+ %val2 = load i64 , i64 *%ptr2
+ %val3 = load i64 , i64 *%ptr3
+ %val4 = load i64 , i64 *%ptr4
+ %val5 = load i64 , i64 *%ptr5
+ %val6 = load i64 , i64 *%ptr6
+ %val7 = load i64 , i64 *%ptr7
+ %val8 = load i64 , i64 *%ptr8
+ %val9 = load i64 , i64 *%ptr9
%ret = call i64 @foo()
diff --git a/test/CodeGen/SystemZ/or-05.ll b/test/CodeGen/SystemZ/or-05.ll
index d90589128674..3fb70d94b37d 100644
--- a/test/CodeGen/SystemZ/or-05.ll
+++ b/test/CodeGen/SystemZ/or-05.ll
@@ -7,7 +7,7 @@ define void @f1(i8 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: oi 0(%r2), 1
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%or = or i8 %val, -255
store i8 %or, i8 *%ptr
ret void
@@ -18,7 +18,7 @@ define void @f2(i8 *%ptr) {
; CHECK-LABEL: f2:
; CHECK: oi 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%or = or i8 %val, -2
store i8 %or, i8 *%ptr
ret void
@@ -29,7 +29,7 @@ define void @f3(i8 *%ptr) {
; CHECK-LABEL: f3:
; CHECK: oi 0(%r2), 1
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%or = or i8 %val, 1
store i8 %or, i8 *%ptr
ret void
@@ -40,7 +40,7 @@ define void @f4(i8 *%ptr) {
; CHECK-LABEL: f4:
; CHECK: oi 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%or = or i8 %val, 254
store i8 %or, i8 *%ptr
ret void
@@ -51,8 +51,8 @@ define void @f5(i8 *%src) {
; CHECK-LABEL: f5:
; CHECK: oi 4095(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 4095
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 4095
+ %val = load i8 , i8 *%ptr
%or = or i8 %val, 127
store i8 %or, i8 *%ptr
ret void
@@ -63,8 +63,8 @@ define void @f6(i8 *%src) {
; CHECK-LABEL: f6:
; CHECK: oiy 4096(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 4096
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 4096
+ %val = load i8 , i8 *%ptr
%or = or i8 %val, 127
store i8 %or, i8 *%ptr
ret void
@@ -75,8 +75,8 @@ define void @f7(i8 *%src) {
; CHECK-LABEL: f7:
; CHECK: oiy 524287(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524287
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524287
+ %val = load i8 , i8 *%ptr
%or = or i8 %val, 127
store i8 %or, i8 *%ptr
ret void
@@ -89,8 +89,8 @@ define void @f8(i8 *%src) {
; CHECK: agfi %r2, 524288
; CHECK: oi 0(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524288
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524288
+ %val = load i8 , i8 *%ptr
%or = or i8 %val, 127
store i8 %or, i8 *%ptr
ret void
@@ -101,8 +101,8 @@ define void @f9(i8 *%src) {
; CHECK-LABEL: f9:
; CHECK: oiy -1(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -1
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -1
+ %val = load i8 , i8 *%ptr
%or = or i8 %val, 127
store i8 %or, i8 *%ptr
ret void
@@ -113,8 +113,8 @@ define void @f10(i8 *%src) {
; CHECK-LABEL: f10:
; CHECK: oiy -524288(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524288
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524288
+ %val = load i8 , i8 *%ptr
%or = or i8 %val, 127
store i8 %or, i8 *%ptr
ret void
@@ -127,8 +127,8 @@ define void @f11(i8 *%src) {
; CHECK: agfi %r2, -524289
; CHECK: oi 0(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524289
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524289
+ %val = load i8 , i8 *%ptr
%or = or i8 %val, 127
store i8 %or, i8 *%ptr
ret void
@@ -143,7 +143,7 @@ define void @f12(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4095
%ptr = inttoptr i64 %add2 to i8 *
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%or = or i8 %val, 127
store i8 %or, i8 *%ptr
ret void
@@ -158,7 +158,7 @@ define void @f13(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i8 *
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%or = or i8 %val, 127
store i8 %or, i8 *%ptr
ret void
diff --git a/test/CodeGen/SystemZ/or-06.ll b/test/CodeGen/SystemZ/or-06.ll
index 0a865d350942..6f441f44b96a 100644
--- a/test/CodeGen/SystemZ/or-06.ll
+++ b/test/CodeGen/SystemZ/or-06.ll
@@ -8,7 +8,7 @@ define void @f1(i8 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: oi 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i32
%or = or i32 %ext, -2
%trunc = trunc i32 %or to i8
@@ -21,7 +21,7 @@ define void @f2(i8 *%ptr) {
; CHECK-LABEL: f2:
; CHECK: oi 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i64
%or = or i64 %ext, -2
%trunc = trunc i64 %or to i8
@@ -34,7 +34,7 @@ define void @f3(i8 *%ptr) {
; CHECK-LABEL: f3:
; CHECK: oi 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i32
%or = or i32 %ext, 254
%trunc = trunc i32 %or to i8
@@ -47,7 +47,7 @@ define void @f4(i8 *%ptr) {
; CHECK-LABEL: f4:
; CHECK: oi 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i64
%or = or i64 %ext, 254
%trunc = trunc i64 %or to i8
@@ -60,7 +60,7 @@ define void @f5(i8 *%ptr) {
; CHECK-LABEL: f5:
; CHECK: oi 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%or = or i32 %ext, -2
%trunc = trunc i32 %or to i8
@@ -73,7 +73,7 @@ define void @f6(i8 *%ptr) {
; CHECK-LABEL: f6:
; CHECK: oi 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%or = or i64 %ext, -2
%trunc = trunc i64 %or to i8
@@ -86,7 +86,7 @@ define void @f7(i8 *%ptr) {
; CHECK-LABEL: f7:
; CHECK: oi 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%or = or i32 %ext, 254
%trunc = trunc i32 %or to i8
@@ -99,7 +99,7 @@ define void @f8(i8 *%ptr) {
; CHECK-LABEL: f8:
; CHECK: oi 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%or = or i64 %ext, 254
%trunc = trunc i64 %or to i8
diff --git a/test/CodeGen/SystemZ/or-08.ll b/test/CodeGen/SystemZ/or-08.ll
index 8f5bf3170bed..a9921b11e227 100644
--- a/test/CodeGen/SystemZ/or-08.ll
+++ b/test/CodeGen/SystemZ/or-08.ll
@@ -7,9 +7,9 @@ define void @f1(i8 *%ptr1) {
; CHECK-LABEL: f1:
; CHECK: oc 1(1,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i8 *%ptr1, i64 1
- %val = load i8 *%ptr1
- %old = load i8 *%ptr2
+ %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
+ %val = load i8 , i8 *%ptr1
+ %old = load i8 , i8 *%ptr2
%or = or i8 %val, %old
store i8 %or, i8 *%ptr2
ret void
@@ -20,9 +20,9 @@ define void @f2(i16 *%ptr1) {
; CHECK-LABEL: f2:
; CHECK: oc 2(2,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i16 *%ptr1, i64 1
- %val = load i16 *%ptr1
- %old = load i16 *%ptr2
+ %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
+ %val = load i16 , i16 *%ptr1
+ %old = load i16 , i16 *%ptr2
%or = or i16 %val, %old
store i16 %or, i16 *%ptr2
ret void
@@ -33,9 +33,9 @@ define void @f3(i32 *%ptr1) {
; CHECK-LABEL: f3:
; CHECK: oc 4(4,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i32 *%ptr1, i64 1
- %val = load i32 *%ptr1
- %old = load i32 *%ptr2
+ %ptr2 = getelementptr i32, i32 *%ptr1, i64 1
+ %val = load i32 , i32 *%ptr1
+ %old = load i32 , i32 *%ptr2
%or = or i32 %old, %val
store i32 %or, i32 *%ptr2
ret void
@@ -46,9 +46,9 @@ define void @f4(i64 *%ptr1) {
; CHECK-LABEL: f4:
; CHECK: oc 8(8,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i64 *%ptr1, i64 1
- %val = load i64 *%ptr1
- %old = load i64 *%ptr2
+ %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
+ %val = load i64 , i64 *%ptr1
+ %old = load i64 , i64 *%ptr2
%or = or i64 %old, %val
store i64 %or, i64 *%ptr2
ret void
diff --git a/test/CodeGen/SystemZ/prefetch-01.ll b/test/CodeGen/SystemZ/prefetch-01.ll
index bb7fea99ca7c..814738022269 100644
--- a/test/CodeGen/SystemZ/prefetch-01.ll
+++ b/test/CodeGen/SystemZ/prefetch-01.ll
@@ -48,7 +48,7 @@ define void @f5(i8 *%base, i64 %index) {
; CHECK: pfd 2, -524288({{%r2,%r3|%r3,%r2}})
; CHECK: br %r14
%add = add i64 %index, -524288
- %ptr = getelementptr i8 *%base, i64 %add
+ %ptr = getelementptr i8, i8 *%base, i64 %add
call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
ret void
}
@@ -59,7 +59,7 @@ define void @f6(i8 *%base, i64 %index) {
; CHECK: pfd 2, 524287({{%r2,%r3|%r3,%r2}})
; CHECK: br %r14
%add = add i64 %index, 524287
- %ptr = getelementptr i8 *%base, i64 %add
+ %ptr = getelementptr i8, i8 *%base, i64 %add
call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
ret void
}
@@ -71,7 +71,7 @@ define void @f7(i8 *%base, i64 %index) {
; CHECK: pfd 2,
; CHECK: br %r14
%add = add i64 %index, 524288
- %ptr = getelementptr i8 *%base, i64 %add
+ %ptr = getelementptr i8, i8 *%base, i64 %add
call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
ret void
}
@@ -81,7 +81,7 @@ define void @f8() {
; CHECK-LABEL: f8:
; CHECK: pfdrl 2, g
; CHECK: br %r14
- %ptr = getelementptr [4096 x i8] *@g, i64 0, i64 0
+ %ptr = getelementptr [4096 x i8], [4096 x i8] *@g, i64 0, i64 0
call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1)
ret void
}
diff --git a/test/CodeGen/SystemZ/risbg-03.ll b/test/CodeGen/SystemZ/risbg-03.ll
new file mode 100644
index 000000000000..c3c08ad17961
--- /dev/null
+++ b/test/CodeGen/SystemZ/risbg-03.ll
@@ -0,0 +1,30 @@
+; Test use of RISBG vs RISBGN on zEC12.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | FileCheck %s
+
+; On zEC12, we generally prefer RISBGN.
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK-LABEL: f1:
+; CHECK: risbgn %r2, %r3, 60, 62, 0
+; CHECK: br %r14
+ %anda = and i64 %a, -15
+ %andb = and i64 %b, 14
+ %or = or i64 %anda, %andb
+ ret i64 %or
+}
+
+; But we may fall back to RISBG if we can use the condition code.
+define i64 @f2(i64 %a, i64 %b, i32* %c) {
+; CHECK-LABEL: f2:
+; CHECK: risbg %r2, %r3, 60, 62, 0
+; CHECK-NEXT: ipm
+; CHECK: br %r14
+ %anda = and i64 %a, -15
+ %andb = and i64 %b, 14
+ %or = or i64 %anda, %andb
+ %cmp = icmp sgt i64 %or, 0
+ %conv = zext i1 %cmp to i32
+ store i32 %conv, i32* %c, align 4
+ ret i64 %or
+}
+
diff --git a/test/CodeGen/SystemZ/serialize-01.ll b/test/CodeGen/SystemZ/serialize-01.ll
index 7801fac8d472..4a245172465f 100644
--- a/test/CodeGen/SystemZ/serialize-01.ll
+++ b/test/CodeGen/SystemZ/serialize-01.ll
@@ -16,6 +16,6 @@ define i32 @f1(i32 *%src) {
; CHECK-FAST: bcr 14, %r0
; CHECK-FAST: l %r2, 0(%r2)
; CHECK-FAST: br %r14
- %val = load volatile i32 *%src
+ %val = load volatile i32 , i32 *%src
ret i32 %val
}
diff --git a/test/CodeGen/SystemZ/shift-01.ll b/test/CodeGen/SystemZ/shift-01.ll
index 5dab36b379c4..3e838f56ae3f 100644
--- a/test/CodeGen/SystemZ/shift-01.ll
+++ b/test/CodeGen/SystemZ/shift-01.ll
@@ -108,7 +108,7 @@ define i32 @f11(i32 %a, i32 *%ptr) {
; CHECK: l %r1, 0(%r3)
; CHECK: sll %r2, 0(%r1)
; CHECK: br %r14
- %amt = load i32 *%ptr
+ %amt = load i32 , i32 *%ptr
%shift = shl i32 %a, %amt
ret i32 %shift
}
diff --git a/test/CodeGen/SystemZ/shift-02.ll b/test/CodeGen/SystemZ/shift-02.ll
index 27e73cd3a1f8..43576dbddeb1 100644
--- a/test/CodeGen/SystemZ/shift-02.ll
+++ b/test/CodeGen/SystemZ/shift-02.ll
@@ -108,7 +108,7 @@ define i32 @f11(i32 %a, i32 *%ptr) {
; CHECK: l %r1, 0(%r3)
; CHECK: srl %r2, 0(%r1)
; CHECK: br %r14
- %amt = load i32 *%ptr
+ %amt = load i32 , i32 *%ptr
%shift = lshr i32 %a, %amt
ret i32 %shift
}
diff --git a/test/CodeGen/SystemZ/shift-03.ll b/test/CodeGen/SystemZ/shift-03.ll
index c45ae48b4071..6803ff5ae311 100644
--- a/test/CodeGen/SystemZ/shift-03.ll
+++ b/test/CodeGen/SystemZ/shift-03.ll
@@ -108,7 +108,7 @@ define i32 @f11(i32 %a, i32 *%ptr) {
; CHECK: l %r1, 0(%r3)
; CHECK: sra %r2, 0(%r1)
; CHECK: br %r14
- %amt = load i32 *%ptr
+ %amt = load i32 , i32 *%ptr
%shift = ashr i32 %a, %amt
ret i32 %shift
}
diff --git a/test/CodeGen/SystemZ/shift-04.ll b/test/CodeGen/SystemZ/shift-04.ll
index de2d74f27fa3..2a32872a69ce 100644
--- a/test/CodeGen/SystemZ/shift-04.ll
+++ b/test/CodeGen/SystemZ/shift-04.ll
@@ -180,7 +180,7 @@ define i32 @f14(i32 %a, i32 *%ptr) {
; CHECK: l %r1, 0(%r3)
; CHECK: rll %r2, %r2, 0(%r1)
; CHECK: br %r14
- %amt = load i32 *%ptr
+ %amt = load i32 , i32 *%ptr
%amtb = sub i32 32, %amt
%parta = shl i32 %a, %amt
%partb = lshr i32 %a, %amtb
diff --git a/test/CodeGen/SystemZ/shift-05.ll b/test/CodeGen/SystemZ/shift-05.ll
index 833b2fbae1e5..240be3f9df1f 100644
--- a/test/CodeGen/SystemZ/shift-05.ll
+++ b/test/CodeGen/SystemZ/shift-05.ll
@@ -143,7 +143,7 @@ define i64 @f14(i64 %a, i64 *%ptr) {
; CHECK: l %r1, 4(%r3)
; CHECK: sllg %r2, %r2, 0(%r1)
; CHECK: br %r14
- %amt = load i64 *%ptr
+ %amt = load i64 , i64 *%ptr
%shift = shl i64 %a, %amt
ret i64 %shift
}
diff --git a/test/CodeGen/SystemZ/shift-06.ll b/test/CodeGen/SystemZ/shift-06.ll
index 74cae1213a3e..d9b9f473fe7c 100644
--- a/test/CodeGen/SystemZ/shift-06.ll
+++ b/test/CodeGen/SystemZ/shift-06.ll
@@ -143,7 +143,7 @@ define i64 @f14(i64 %a, i64 *%ptr) {
; CHECK: l %r1, 4(%r3)
; CHECK: srlg %r2, %r2, 0(%r1)
; CHECK: br %r14
- %amt = load i64 *%ptr
+ %amt = load i64 , i64 *%ptr
%shift = lshr i64 %a, %amt
ret i64 %shift
}
diff --git a/test/CodeGen/SystemZ/shift-07.ll b/test/CodeGen/SystemZ/shift-07.ll
index 712849df8ad1..161628864e1b 100644
--- a/test/CodeGen/SystemZ/shift-07.ll
+++ b/test/CodeGen/SystemZ/shift-07.ll
@@ -143,7 +143,7 @@ define i64 @f14(i64 %a, i64 *%ptr) {
; CHECK: l %r1, 4(%r3)
; CHECK: srag %r2, %r2, 0(%r1)
; CHECK: br %r14
- %amt = load i64 *%ptr
+ %amt = load i64 , i64 *%ptr
%shift = ashr i64 %a, %amt
ret i64 %shift
}
diff --git a/test/CodeGen/SystemZ/shift-08.ll b/test/CodeGen/SystemZ/shift-08.ll
index 47283b50221c..0db53c92246d 100644
--- a/test/CodeGen/SystemZ/shift-08.ll
+++ b/test/CodeGen/SystemZ/shift-08.ll
@@ -181,7 +181,7 @@ define i64 @f14(i64 %a, i64 *%ptr) {
; CHECK: l %r1, 4(%r3)
; CHECK: rllg %r2, %r2, 0(%r1)
; CHECK: br %r14
- %amt = load i64 *%ptr
+ %amt = load i64 , i64 *%ptr
%amtb = sub i64 64, %amt
%parta = shl i64 %a, %amt
%partb = lshr i64 %a, %amtb
diff --git a/test/CodeGen/SystemZ/spill-01.ll b/test/CodeGen/SystemZ/spill-01.ll
index c1f780c55d3c..a59c06f192b6 100644
--- a/test/CodeGen/SystemZ/spill-01.ll
+++ b/test/CodeGen/SystemZ/spill-01.ll
@@ -37,20 +37,20 @@ define void @f1(i32 *%ptr0) {
; CHECK-NOT: %r15
; CHECK: lmg
; CHECK: br %r14
- %ptr1 = getelementptr i32 *%ptr0, i32 2
- %ptr2 = getelementptr i32 *%ptr0, i32 4
- %ptr3 = getelementptr i32 *%ptr0, i32 6
- %ptr4 = getelementptr i32 *%ptr0, i32 8
- %ptr5 = getelementptr i32 *%ptr0, i32 10
- %ptr6 = getelementptr i32 *%ptr0, i32 12
-
- %val0 = load i32 *%ptr0
- %val1 = load i32 *%ptr1
- %val2 = load i32 *%ptr2
- %val3 = load i32 *%ptr3
- %val4 = load i32 *%ptr4
- %val5 = load i32 *%ptr5
- %val6 = load i32 *%ptr6
+ %ptr1 = getelementptr i32, i32 *%ptr0, i32 2
+ %ptr2 = getelementptr i32, i32 *%ptr0, i32 4
+ %ptr3 = getelementptr i32, i32 *%ptr0, i32 6
+ %ptr4 = getelementptr i32, i32 *%ptr0, i32 8
+ %ptr5 = getelementptr i32, i32 *%ptr0, i32 10
+ %ptr6 = getelementptr i32, i32 *%ptr0, i32 12
+
+ %val0 = load i32 , i32 *%ptr0
+ %val1 = load i32 , i32 *%ptr1
+ %val2 = load i32 , i32 *%ptr2
+ %val3 = load i32 , i32 *%ptr3
+ %val4 = load i32 , i32 *%ptr4
+ %val5 = load i32 , i32 *%ptr5
+ %val6 = load i32 , i32 *%ptr6
call void @foo()
@@ -73,24 +73,24 @@ define void @f2(i32 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: mvc [[OFFSET2]](4,{{%r[0-9]+}}), [[OFFSET1]](%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i32 *%ptr0, i64 2
- %ptr2 = getelementptr i32 *%ptr0, i64 4
- %ptr3 = getelementptr i32 *%ptr0, i64 6
- %ptr4 = getelementptr i32 *%ptr0, i64 8
- %ptr5 = getelementptr i32 *%ptr0, i64 10
- %ptr6 = getelementptr i32 *%ptr0, i64 12
- %ptr7 = getelementptr i32 *%ptr0, i64 14
- %ptr8 = getelementptr i32 *%ptr0, i64 16
-
- %val0 = load i32 *%ptr0
- %val1 = load i32 *%ptr1
- %val2 = load i32 *%ptr2
- %val3 = load i32 *%ptr3
- %val4 = load i32 *%ptr4
- %val5 = load i32 *%ptr5
- %val6 = load i32 *%ptr6
- %val7 = load i32 *%ptr7
- %val8 = load i32 *%ptr8
+ %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
+ %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
+ %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
+ %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
+ %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
+ %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
+ %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
+ %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
+
+ %val0 = load i32 , i32 *%ptr0
+ %val1 = load i32 , i32 *%ptr1
+ %val2 = load i32 , i32 *%ptr2
+ %val3 = load i32 , i32 *%ptr3
+ %val4 = load i32 , i32 *%ptr4
+ %val5 = load i32 , i32 *%ptr5
+ %val6 = load i32 , i32 *%ptr6
+ %val7 = load i32 , i32 *%ptr7
+ %val8 = load i32 , i32 *%ptr8
call void @foo()
@@ -115,24 +115,24 @@ define void @f3(i64 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: mvc [[OFFSET]](8,{{%r[0-9]+}}), 160(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i64 *%ptr0, i64 2
- %ptr2 = getelementptr i64 *%ptr0, i64 4
- %ptr3 = getelementptr i64 *%ptr0, i64 6
- %ptr4 = getelementptr i64 *%ptr0, i64 8
- %ptr5 = getelementptr i64 *%ptr0, i64 10
- %ptr6 = getelementptr i64 *%ptr0, i64 12
- %ptr7 = getelementptr i64 *%ptr0, i64 14
- %ptr8 = getelementptr i64 *%ptr0, i64 16
-
- %val0 = load i64 *%ptr0
- %val1 = load i64 *%ptr1
- %val2 = load i64 *%ptr2
- %val3 = load i64 *%ptr3
- %val4 = load i64 *%ptr4
- %val5 = load i64 *%ptr5
- %val6 = load i64 *%ptr6
- %val7 = load i64 *%ptr7
- %val8 = load i64 *%ptr8
+ %ptr1 = getelementptr i64, i64 *%ptr0, i64 2
+ %ptr2 = getelementptr i64, i64 *%ptr0, i64 4
+ %ptr3 = getelementptr i64, i64 *%ptr0, i64 6
+ %ptr4 = getelementptr i64, i64 *%ptr0, i64 8
+ %ptr5 = getelementptr i64, i64 *%ptr0, i64 10
+ %ptr6 = getelementptr i64, i64 *%ptr0, i64 12
+ %ptr7 = getelementptr i64, i64 *%ptr0, i64 14
+ %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
+
+ %val0 = load i64 , i64 *%ptr0
+ %val1 = load i64 , i64 *%ptr1
+ %val2 = load i64 , i64 *%ptr2
+ %val3 = load i64 , i64 *%ptr3
+ %val4 = load i64 , i64 *%ptr4
+ %val5 = load i64 , i64 *%ptr5
+ %val6 = load i64 , i64 *%ptr6
+ %val7 = load i64 , i64 *%ptr7
+ %val8 = load i64 , i64 *%ptr8
call void @foo()
@@ -160,26 +160,26 @@ define void @f4(float *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: mvc [[OFFSET2]](4,{{%r[0-9]+}}), [[OFFSET1]](%r15)
; CHECK: br %r14
- %ptr1 = getelementptr float *%ptr0, i64 2
- %ptr2 = getelementptr float *%ptr0, i64 4
- %ptr3 = getelementptr float *%ptr0, i64 6
- %ptr4 = getelementptr float *%ptr0, i64 8
- %ptr5 = getelementptr float *%ptr0, i64 10
- %ptr6 = getelementptr float *%ptr0, i64 12
- %ptr7 = getelementptr float *%ptr0, i64 14
- %ptr8 = getelementptr float *%ptr0, i64 16
- %ptr9 = getelementptr float *%ptr0, i64 18
-
- %val0 = load float *%ptr0
- %val1 = load float *%ptr1
- %val2 = load float *%ptr2
- %val3 = load float *%ptr3
- %val4 = load float *%ptr4
- %val5 = load float *%ptr5
- %val6 = load float *%ptr6
- %val7 = load float *%ptr7
- %val8 = load float *%ptr8
- %val9 = load float *%ptr9
+ %ptr1 = getelementptr float, float *%ptr0, i64 2
+ %ptr2 = getelementptr float, float *%ptr0, i64 4
+ %ptr3 = getelementptr float, float *%ptr0, i64 6
+ %ptr4 = getelementptr float, float *%ptr0, i64 8
+ %ptr5 = getelementptr float, float *%ptr0, i64 10
+ %ptr6 = getelementptr float, float *%ptr0, i64 12
+ %ptr7 = getelementptr float, float *%ptr0, i64 14
+ %ptr8 = getelementptr float, float *%ptr0, i64 16
+ %ptr9 = getelementptr float, float *%ptr0, i64 18
+
+ %val0 = load float , float *%ptr0
+ %val1 = load float , float *%ptr1
+ %val2 = load float , float *%ptr2
+ %val3 = load float , float *%ptr3
+ %val4 = load float , float *%ptr4
+ %val5 = load float , float *%ptr5
+ %val6 = load float , float *%ptr6
+ %val7 = load float , float *%ptr7
+ %val8 = load float , float *%ptr8
+ %val9 = load float , float *%ptr9
call void @foo()
@@ -204,26 +204,26 @@ define void @f5(double *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: mvc [[OFFSET]](8,{{%r[0-9]+}}), 160(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr double *%ptr0, i64 2
- %ptr2 = getelementptr double *%ptr0, i64 4
- %ptr3 = getelementptr double *%ptr0, i64 6
- %ptr4 = getelementptr double *%ptr0, i64 8
- %ptr5 = getelementptr double *%ptr0, i64 10
- %ptr6 = getelementptr double *%ptr0, i64 12
- %ptr7 = getelementptr double *%ptr0, i64 14
- %ptr8 = getelementptr double *%ptr0, i64 16
- %ptr9 = getelementptr double *%ptr0, i64 18
-
- %val0 = load double *%ptr0
- %val1 = load double *%ptr1
- %val2 = load double *%ptr2
- %val3 = load double *%ptr3
- %val4 = load double *%ptr4
- %val5 = load double *%ptr5
- %val6 = load double *%ptr6
- %val7 = load double *%ptr7
- %val8 = load double *%ptr8
- %val9 = load double *%ptr9
+ %ptr1 = getelementptr double, double *%ptr0, i64 2
+ %ptr2 = getelementptr double, double *%ptr0, i64 4
+ %ptr3 = getelementptr double, double *%ptr0, i64 6
+ %ptr4 = getelementptr double, double *%ptr0, i64 8
+ %ptr5 = getelementptr double, double *%ptr0, i64 10
+ %ptr6 = getelementptr double, double *%ptr0, i64 12
+ %ptr7 = getelementptr double, double *%ptr0, i64 14
+ %ptr8 = getelementptr double, double *%ptr0, i64 16
+ %ptr9 = getelementptr double, double *%ptr0, i64 18
+
+ %val0 = load double , double *%ptr0
+ %val1 = load double , double *%ptr1
+ %val2 = load double , double *%ptr2
+ %val3 = load double , double *%ptr3
+ %val4 = load double , double *%ptr4
+ %val5 = load double , double *%ptr5
+ %val6 = load double , double *%ptr6
+ %val7 = load double , double *%ptr7
+ %val8 = load double , double *%ptr8
+ %val9 = load double , double *%ptr9
call void @foo()
@@ -246,24 +246,24 @@ define void @f6(i32 *%ptr0) {
; CHECK-LABEL: f6:
; CHECK-NOT: mvc
; CHECK: br %r14
- %ptr1 = getelementptr i32 *%ptr0, i64 2
- %ptr2 = getelementptr i32 *%ptr0, i64 4
- %ptr3 = getelementptr i32 *%ptr0, i64 6
- %ptr4 = getelementptr i32 *%ptr0, i64 8
- %ptr5 = getelementptr i32 *%ptr0, i64 10
- %ptr6 = getelementptr i32 *%ptr0, i64 12
- %ptr7 = getelementptr i32 *%ptr0, i64 14
- %ptr8 = getelementptr i32 *%ptr0, i64 16
-
- %val0 = load atomic i32 *%ptr0 unordered, align 4
- %val1 = load atomic i32 *%ptr1 unordered, align 4
- %val2 = load atomic i32 *%ptr2 unordered, align 4
- %val3 = load atomic i32 *%ptr3 unordered, align 4
- %val4 = load atomic i32 *%ptr4 unordered, align 4
- %val5 = load atomic i32 *%ptr5 unordered, align 4
- %val6 = load atomic i32 *%ptr6 unordered, align 4
- %val7 = load atomic i32 *%ptr7 unordered, align 4
- %val8 = load atomic i32 *%ptr8 unordered, align 4
+ %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
+ %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
+ %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
+ %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
+ %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
+ %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
+ %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
+ %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
+
+ %val0 = load atomic i32 , i32 *%ptr0 unordered, align 4
+ %val1 = load atomic i32 , i32 *%ptr1 unordered, align 4
+ %val2 = load atomic i32 , i32 *%ptr2 unordered, align 4
+ %val3 = load atomic i32 , i32 *%ptr3 unordered, align 4
+ %val4 = load atomic i32 , i32 *%ptr4 unordered, align 4
+ %val5 = load atomic i32 , i32 *%ptr5 unordered, align 4
+ %val6 = load atomic i32 , i32 *%ptr6 unordered, align 4
+ %val7 = load atomic i32 , i32 *%ptr7 unordered, align 4
+ %val8 = load atomic i32 , i32 *%ptr8 unordered, align 4
call void @foo()
@@ -285,24 +285,24 @@ define void @f7(i32 *%ptr0) {
; CHECK-LABEL: f7:
; CHECK-NOT: mvc
; CHECK: br %r14
- %ptr1 = getelementptr i32 *%ptr0, i64 2
- %ptr2 = getelementptr i32 *%ptr0, i64 4
- %ptr3 = getelementptr i32 *%ptr0, i64 6
- %ptr4 = getelementptr i32 *%ptr0, i64 8
- %ptr5 = getelementptr i32 *%ptr0, i64 10
- %ptr6 = getelementptr i32 *%ptr0, i64 12
- %ptr7 = getelementptr i32 *%ptr0, i64 14
- %ptr8 = getelementptr i32 *%ptr0, i64 16
-
- %val0 = load volatile i32 *%ptr0
- %val1 = load volatile i32 *%ptr1
- %val2 = load volatile i32 *%ptr2
- %val3 = load volatile i32 *%ptr3
- %val4 = load volatile i32 *%ptr4
- %val5 = load volatile i32 *%ptr5
- %val6 = load volatile i32 *%ptr6
- %val7 = load volatile i32 *%ptr7
- %val8 = load volatile i32 *%ptr8
+ %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
+ %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
+ %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
+ %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
+ %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
+ %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
+ %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
+ %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
+
+ %val0 = load volatile i32 , i32 *%ptr0
+ %val1 = load volatile i32 , i32 *%ptr1
+ %val2 = load volatile i32 , i32 *%ptr2
+ %val3 = load volatile i32 , i32 *%ptr3
+ %val4 = load volatile i32 , i32 *%ptr4
+ %val5 = load volatile i32 , i32 *%ptr5
+ %val6 = load volatile i32 , i32 *%ptr6
+ %val7 = load volatile i32 , i32 *%ptr7
+ %val8 = load volatile i32 , i32 *%ptr8
call void @foo()
@@ -324,16 +324,16 @@ define void @f8() {
; CHECK-LABEL: f8:
; CHECK-NOT: mvc
; CHECK: br %r14
- %val0 = load i32 *@g0
- %val1 = load i32 *@g1
- %val2 = load i32 *@g2
- %val3 = load i32 *@g3
- %val4 = load i32 *@g4
- %val5 = load i32 *@g5
- %val6 = load i32 *@g6
- %val7 = load i32 *@g7
- %val8 = load i32 *@g8
- %val9 = load i32 *@g9
+ %val0 = load i32 , i32 *@g0
+ %val1 = load i32 , i32 *@g1
+ %val2 = load i32 , i32 *@g2
+ %val3 = load i32 , i32 *@g3
+ %val4 = load i32 , i32 *@g4
+ %val5 = load i32 , i32 *@g5
+ %val6 = load i32 , i32 *@g6
+ %val7 = load i32 , i32 *@g7
+ %val8 = load i32 , i32 *@g8
+ %val9 = load i32 , i32 *@g9
call void @foo()
@@ -356,16 +356,16 @@ define void @f9() {
; CHECK-LABEL: f9:
; CHECK-NOT: mvc
; CHECK: br %r14
- %val0 = load i64 *@h0
- %val1 = load i64 *@h1
- %val2 = load i64 *@h2
- %val3 = load i64 *@h3
- %val4 = load i64 *@h4
- %val5 = load i64 *@h5
- %val6 = load i64 *@h6
- %val7 = load i64 *@h7
- %val8 = load i64 *@h8
- %val9 = load i64 *@h9
+ %val0 = load i64 , i64 *@h0
+ %val1 = load i64 , i64 *@h1
+ %val2 = load i64 , i64 *@h2
+ %val3 = load i64 , i64 *@h3
+ %val4 = load i64 , i64 *@h4
+ %val5 = load i64 , i64 *@h5
+ %val6 = load i64 , i64 *@h6
+ %val7 = load i64 , i64 *@h7
+ %val8 = load i64 , i64 *@h8
+ %val9 = load i64 , i64 *@h9
call void @foo()
@@ -400,16 +400,16 @@ define void @f10() {
; CHECK: stgrl [[REG]], h8
; CHECK: br %r14
entry:
- %val8 = load volatile i64 *@h8
- %val0 = load volatile i64 *@h0
- %val1 = load volatile i64 *@h1
- %val2 = load volatile i64 *@h2
- %val3 = load volatile i64 *@h3
- %val4 = load volatile i64 *@h4
- %val5 = load volatile i64 *@h5
- %val6 = load volatile i64 *@h6
- %val7 = load volatile i64 *@h7
- %val9 = load volatile i64 *@h9
+ %val8 = load volatile i64 , i64 *@h8
+ %val0 = load volatile i64 , i64 *@h0
+ %val1 = load volatile i64 , i64 *@h1
+ %val2 = load volatile i64 , i64 *@h2
+ %val3 = load volatile i64 , i64 *@h3
+ %val4 = load volatile i64 , i64 *@h4
+ %val5 = load volatile i64 , i64 *@h5
+ %val6 = load volatile i64 , i64 *@h6
+ %val7 = load volatile i64 , i64 *@h7
+ %val9 = load volatile i64 , i64 *@h9
call void @foo()
@@ -422,7 +422,7 @@ entry:
store volatile i64 %val6, i64 *@h6
store volatile i64 %val7, i64 *@h7
- %check = load volatile i64 *@h0
+ %check = load volatile i64 , i64 *@h0
%cond = icmp eq i64 %check, 0
br i1 %cond, label %skip, label %fallthru
@@ -464,17 +464,17 @@ define void @f11() {
; CHECK-NOT: mvc [[OFFSET:[0-9]+]](8,%r15), [[OFFSET]](%r15)
; CHECK: br %r14
entry:
- %val0 = load volatile i64 *@h0
- %val1 = load volatile i64 *@h1
- %val2 = load volatile i64 *@h2
- %val3 = load volatile i64 *@h3
- %val4 = load volatile i64 *@h4
- %val5 = load volatile i64 *@h5
- %val6 = load volatile i64 *@h6
- %val7 = load volatile i64 *@h7
-
- %altval0 = load volatile i64 *@h0
- %altval1 = load volatile i64 *@h1
+ %val0 = load volatile i64 , i64 *@h0
+ %val1 = load volatile i64 , i64 *@h1
+ %val2 = load volatile i64 , i64 *@h2
+ %val3 = load volatile i64 , i64 *@h3
+ %val4 = load volatile i64 , i64 *@h4
+ %val5 = load volatile i64 , i64 *@h5
+ %val6 = load volatile i64 , i64 *@h6
+ %val7 = load volatile i64 , i64 *@h7
+
+ %altval0 = load volatile i64 , i64 *@h0
+ %altval1 = load volatile i64 , i64 *@h1
call void @foo()
@@ -487,7 +487,7 @@ entry:
store volatile i64 %val6, i64 *@h6
store volatile i64 %val7, i64 *@h7
- %check = load volatile i64 *@h0
+ %check = load volatile i64 , i64 *@h0
%cond = icmp eq i64 %check, 0
br i1 %cond, label %a1, label %b1
diff --git a/test/CodeGen/SystemZ/strcpy-01.ll b/test/CodeGen/SystemZ/strcpy-01.ll
index 29bab629ecf8..d6d0edf494b1 100644
--- a/test/CodeGen/SystemZ/strcpy-01.ll
+++ b/test/CodeGen/SystemZ/strcpy-01.ll
@@ -43,7 +43,7 @@ define i32 @f3(i32 %dummy, i8 *%dest, i8 *%src, i32 *%resptr, i32 *%storeptr) {
; CHECK-NEXT: jo [[LABEL]]
; CHECK: mvhi 0(%r6), 0
; CHECK: br %r14
- %res = load i32 *%resptr
+ %res = load i32 , i32 *%resptr
%unused = call i8 *@strcpy(i8 *%dest, i8 *%src)
store i32 0, i32 *%storeptr
ret i32 %res
diff --git a/test/CodeGen/SystemZ/tail-call-mem-intrinsics.ll b/test/CodeGen/SystemZ/tail-call-mem-intrinsics.ll
new file mode 100644
index 000000000000..65cc394f8a98
--- /dev/null
+++ b/test/CodeGen/SystemZ/tail-call-mem-intrinsics.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=systemz < %s | FileCheck %s
+
+; CHECK-LABEL: tail_memcpy:
+; CHECK: jg memcpy
+define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
+entry:
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
+ ret void
+}
+
+; CHECK-LABEL: tail_memmove:
+; CHECK: jg memmove
+define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
+entry:
+ tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
+ ret void
+}
+
+; CHECK-LABEL: tail_memset:
+; CHECK: jg memset
+define void @tail_memset(i8* nocapture %p, i8 %c, i32 %n) #0 {
+entry:
+ tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/SystemZ/tls-01.ll b/test/CodeGen/SystemZ/tls-01.ll
index 16bc8f6e500f..da7176c0599f 100644
--- a/test/CodeGen/SystemZ/tls-01.ll
+++ b/test/CodeGen/SystemZ/tls-01.ll
@@ -1,7 +1,7 @@
-; Test initial-exec TLS accesses.
+; Test local-exec TLS accesses.
;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-MAIN
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-CP
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-MAIN
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-CP
@x = thread_local global i32 0
diff --git a/test/CodeGen/SystemZ/tls-02.ll b/test/CodeGen/SystemZ/tls-02.ll
new file mode 100644
index 000000000000..15918d08a936
--- /dev/null
+++ b/test/CodeGen/SystemZ/tls-02.ll
@@ -0,0 +1,18 @@
+; Test initial-exec TLS accesses.
+;
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-MAIN
+
+@x = thread_local(initialexec) global i32 0
+
+; The offset must be loaded from the GOT. This TLS access model does
+; not use literal pool constants.
+define i32 *@foo() {
+; CHECK-MAIN-LABEL: foo:
+; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0
+; CHECK-MAIN: sllg %r2, [[HIGH]], 32
+; CHECK-MAIN: ear %r2, %a1
+; CHECK-MAIN: larl %r1, x@INDNTPOFF
+; CHECK-MAIN: ag %r2, 0(%r1)
+; CHECK-MAIN: br %r14
+ ret i32 *@x
+}
diff --git a/test/CodeGen/SystemZ/tls-03.ll b/test/CodeGen/SystemZ/tls-03.ll
new file mode 100644
index 000000000000..c9f7bd632904
--- /dev/null
+++ b/test/CodeGen/SystemZ/tls-03.ll
@@ -0,0 +1,23 @@
+; Test general-dynamic TLS accesses.
+;
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-MAIN
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-CP
+
+@x = thread_local global i32 0
+
+; Call __tls_get_offset to retrieve the symbol's TLS offset.
+define i32 *@foo() {
+; CHECK-CP: .LCP{{.*}}:
+; CHECK-CP: .quad x@TLSGD
+;
+; CHECK-MAIN-LABEL: foo:
+; CHECK-MAIN-DAG: larl %r12, _GLOBAL_OFFSET_TABLE_
+; CHECK-MAIN-DAG: lgrl %r2, .LCP{{.*}}
+; CHECK-MAIN: brasl %r14, __tls_get_offset@PLT:tls_gdcall:x
+; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0
+; CHECK-MAIN: sllg [[TP:%r[0-5]]], [[HIGH]], 32
+; CHECK-MAIN: ear [[TP]], %a1
+; CHECK-MAIN: agr %r2, [[TP]]
+; CHECK-MAIN: br %r14
+ ret i32 *@x
+}
diff --git a/test/CodeGen/SystemZ/tls-04.ll b/test/CodeGen/SystemZ/tls-04.ll
new file mode 100644
index 000000000000..dcb210a71272
--- /dev/null
+++ b/test/CodeGen/SystemZ/tls-04.ll
@@ -0,0 +1,28 @@
+; Test local-dynamic TLS accesses.
+;
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-MAIN
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-CP
+
+@x = thread_local(localdynamic) global i32 0
+
+; Call __tls_get_offset to retrieve the module's TLS base offset.
+; Add the per-symbol offset and the thread pointer.
+define i32 *@foo() {
+; CHECK-CP: .LCP{{.*}}_0:
+; CHECK-CP: .quad x@TLSLDM
+; CHECK-CP: .LCP{{.*}}_1:
+; CHECK-CP: .quad x@DTPOFF
+;
+; CHECK-MAIN-LABEL: foo:
+; CHECK-MAIN-DAG: larl %r12, _GLOBAL_OFFSET_TABLE_
+; CHECK-MAIN-DAG: lgrl %r2, .LCP{{.*}}_0
+; CHECK-MAIN: brasl %r14, __tls_get_offset@PLT:tls_ldcall:x
+; CHECK-MAIN: larl %r1, .LCP{{.*}}_1
+; CHECK-MAIN: ag %r2, 0(%r1)
+; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0
+; CHECK-MAIN: sllg [[TP:%r[0-5]]], [[HIGH]], 32
+; CHECK-MAIN: ear [[TP]], %a1
+; CHECK-MAIN: agr %r2, [[TP]]
+; CHECK-MAIN: br %r14
+ ret i32 *@x
+}
diff --git a/test/CodeGen/SystemZ/tls-05.ll b/test/CodeGen/SystemZ/tls-05.ll
new file mode 100644
index 000000000000..502d6d45852c
--- /dev/null
+++ b/test/CodeGen/SystemZ/tls-05.ll
@@ -0,0 +1,15 @@
+; Test general-dynamic TLS access optimizations.
+;
+; If we access the same TLS variable twice, there should only be
+; a single call to __tls_get_offset.
+;
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "__tls_get_offset" | count 1
+
+@x = thread_local global i32 0
+
+define i32 @foo() {
+ %val = load i32, i32* @x
+ %inc = add nsw i32 %val, 1
+ store i32 %inc, i32* @x
+ ret i32 %val
+}
diff --git a/test/CodeGen/SystemZ/tls-06.ll b/test/CodeGen/SystemZ/tls-06.ll
new file mode 100644
index 000000000000..8f1796df7291
--- /dev/null
+++ b/test/CodeGen/SystemZ/tls-06.ll
@@ -0,0 +1,17 @@
+; Test general-dynamic TLS access optimizations.
+;
+; If we access two different TLS variables, we need two calls to
+; __tls_get_offset, but should load _GLOBAL_OFFSET_TABLE only once.
+;
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "__tls_get_offset" | count 2
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "_GLOBAL_OFFSET_TABLE_" | count 1
+
+@x = thread_local global i32 0
+@y = thread_local global i32 0
+
+define i32 @foo() {
+ %valx = load i32, i32* @x
+ %valy = load i32, i32* @y
+ %add = add nsw i32 %valx, %valy
+ ret i32 %add
+}
diff --git a/test/CodeGen/SystemZ/tls-07.ll b/test/CodeGen/SystemZ/tls-07.ll
new file mode 100644
index 000000000000..be66c093bd42
--- /dev/null
+++ b/test/CodeGen/SystemZ/tls-07.ll
@@ -0,0 +1,16 @@
+; Test local-dynamic TLS access optimizations.
+;
+; If we access two different local-dynamic TLS variables, we only
+; need a single call to __tls_get_offset.
+;
+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "__tls_get_offset" | count 1
+
+@x = thread_local(localdynamic) global i32 0
+@y = thread_local(localdynamic) global i32 0
+
+define i32 @foo() {
+ %valx = load i32, i32* @x
+ %valy = load i32, i32* @y
+ %add = add nsw i32 %valx, %valy
+ ret i32 %add
+}
diff --git a/test/CodeGen/SystemZ/unaligned-01.ll b/test/CodeGen/SystemZ/unaligned-01.ll
index 526a068100ef..94cad0e1743a 100644
--- a/test/CodeGen/SystemZ/unaligned-01.ll
+++ b/test/CodeGen/SystemZ/unaligned-01.ll
@@ -12,9 +12,9 @@ define void @f1(i8 *%ptr) {
; CHECK: iilf [[REG:%r[0-5]]], 66051
; CHECK: st [[REG]], 0(%r2)
; CHECK: br %r14
- %off1 = getelementptr i8 *%ptr, i64 1
- %off2 = getelementptr i8 *%ptr, i64 2
- %off3 = getelementptr i8 *%ptr, i64 3
+ %off1 = getelementptr i8, i8 *%ptr, i64 1
+ %off2 = getelementptr i8, i8 *%ptr, i64 2
+ %off3 = getelementptr i8, i8 *%ptr, i64 3
store i8 0, i8 *%ptr
store i8 1, i8 *%off1
store i8 2, i8 *%off2
@@ -28,7 +28,7 @@ define i16 @f2(i16 *%src, i16 *%dst) {
; CHECK: lh %r2, 0(%r2)
; CHECK: sth %r2, 0(%r3)
; CHECK: br %r14
- %val = load i16 *%src, align 1
+ %val = load i16 , i16 *%src, align 1
store i16 %val, i16 *%dst, align 1
ret i16 %val
}
@@ -40,8 +40,8 @@ define i32 @f3(i32 *%src1, i32 *%src2, i32 *%dst) {
; CHECK: s %r2, 0(%r3)
; CHECK: st %r2, 0(%r4)
; CHECK: br %r14
- %val1 = load i32 *%src1, align 1
- %val2 = load i32 *%src2, align 2
+ %val1 = load i32 , i32 *%src1, align 1
+ %val2 = load i32 , i32 *%src2, align 2
%sub = sub i32 %val1, %val2
store i32 %sub, i32 *%dst, align 1
ret i32 %sub
@@ -54,8 +54,8 @@ define i64 @f4(i64 *%src1, i64 *%src2, i64 *%dst) {
; CHECK: sg %r2, 0(%r3)
; CHECK: stg %r2, 0(%r4)
; CHECK: br %r14
- %val1 = load i64 *%src1, align 1
- %val2 = load i64 *%src2, align 2
+ %val1 = load i64 , i64 *%src1, align 1
+ %val2 = load i64 , i64 *%src2, align 2
%sub = sub i64 %val1, %val2
store i64 %sub, i64 *%dst, align 4
ret i64 %sub
diff --git a/test/CodeGen/SystemZ/vec-abi-align.ll b/test/CodeGen/SystemZ/vec-abi-align.ll
new file mode 100644
index 000000000000..01b97a8583eb
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-abi-align.ll
@@ -0,0 +1,49 @@
+; Verify that we use the vector ABI datalayout if and only if
+; the vector facility is present.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | \
+; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=generic | \
+; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
+; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | \
+; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | \
+; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
+
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=vector | \
+; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+vector | \
+; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector,vector | \
+; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector,+vector | \
+; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector | \
+; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=vector,-vector | \
+; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+vector,-vector | \
+; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
+
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -mattr=-vector | \
+; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s
+
+%struct.S = type { i8, <2 x i64> }
+
+define void @test(%struct.S* %s) nounwind {
+; CHECK-VECTOR-LABEL: @test
+; CHECK-VECTOR: vl %v0, 8(%r2)
+; CHECK-NOVECTOR-LABEL: @test
+; CHECK-NOVECTOR-DAG: agsi 16(%r2), 1
+; CHECK-NOVECTOR-DAG: agsi 24(%r2), 1
+ %ptr = getelementptr %struct.S, %struct.S* %s, i64 0, i32 1
+ %vec = load <2 x i64>, <2 x i64>* %ptr
+ %add = add <2 x i64> %vec, <i64 1, i64 1>
+ store <2 x i64> %add, <2 x i64>* %ptr
+ ret void
+}
+
diff --git a/test/CodeGen/SystemZ/vec-abs-01.ll b/test/CodeGen/SystemZ/vec-abs-01.ll
new file mode 100644
index 000000000000..aec3b9314f19
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-abs-01.ll
@@ -0,0 +1,146 @@
+; Test v16i8 absolute.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test with slt.
+define <16 x i8> @f1(<16 x i8> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vlpb %v24, %v24
+; CHECK: br %r14
+ %cmp = icmp slt <16 x i8> %val, zeroinitializer
+ %neg = sub <16 x i8> zeroinitializer, %val
+ %ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val
+ ret <16 x i8> %ret
+}
+
+; Test with sle.
+define <16 x i8> @f2(<16 x i8> %val) {
+; CHECK-LABEL: f2:
+; CHECK: vlpb %v24, %v24
+; CHECK: br %r14
+ %cmp = icmp sle <16 x i8> %val, zeroinitializer
+ %neg = sub <16 x i8> zeroinitializer, %val
+ %ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val
+ ret <16 x i8> %ret
+}
+
+; Test with sgt.
+define <16 x i8> @f3(<16 x i8> %val) {
+; CHECK-LABEL: f3:
+; CHECK: vlpb %v24, %v24
+; CHECK: br %r14
+ %cmp = icmp sgt <16 x i8> %val, zeroinitializer
+ %neg = sub <16 x i8> zeroinitializer, %val
+ %ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg
+ ret <16 x i8> %ret
+}
+
+; Test with sge.
+define <16 x i8> @f4(<16 x i8> %val) {
+; CHECK-LABEL: f4:
+; CHECK: vlpb %v24, %v24
+; CHECK: br %r14
+ %cmp = icmp sge <16 x i8> %val, zeroinitializer
+ %neg = sub <16 x i8> zeroinitializer, %val
+ %ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg
+ ret <16 x i8> %ret
+}
+
+; Test that negative absolute uses VLPB too. There is no vector equivalent
+; of LOAD NEGATIVE.
+define <16 x i8> @f5(<16 x i8> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vlpb [[REG:%v[0-9]+]], %v24
+; CHECK: vlcb %v24, [[REG]]
+; CHECK: br %r14
+ %cmp = icmp slt <16 x i8> %val, zeroinitializer
+ %neg = sub <16 x i8> zeroinitializer, %val
+ %abs = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val
+ %ret = sub <16 x i8> zeroinitializer, %abs
+ ret <16 x i8> %ret
+}
+
+; Try another form of negative absolute (slt version).
+define <16 x i8> @f6(<16 x i8> %val) {
+; CHECK-LABEL: f6:
+; CHECK: vlpb [[REG:%v[0-9]+]], %v24
+; CHECK: vlcb %v24, [[REG]]
+; CHECK: br %r14
+ %cmp = icmp slt <16 x i8> %val, zeroinitializer
+ %neg = sub <16 x i8> zeroinitializer, %val
+ %ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg
+ ret <16 x i8> %ret
+}
+
+; Test with sle.
+define <16 x i8> @f7(<16 x i8> %val) {
+; CHECK-LABEL: f7:
+; CHECK: vlpb [[REG:%v[0-9]+]], %v24
+; CHECK: vlcb %v24, [[REG]]
+; CHECK: br %r14
+ %cmp = icmp sle <16 x i8> %val, zeroinitializer
+ %neg = sub <16 x i8> zeroinitializer, %val
+ %ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg
+ ret <16 x i8> %ret
+}
+
+; Test with sgt.
+define <16 x i8> @f8(<16 x i8> %val) {
+; CHECK-LABEL: f8:
+; CHECK: vlpb [[REG:%v[0-9]+]], %v24
+; CHECK: vlcb %v24, [[REG]]
+; CHECK: br %r14
+ %cmp = icmp sgt <16 x i8> %val, zeroinitializer
+ %neg = sub <16 x i8> zeroinitializer, %val
+ %ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val
+ ret <16 x i8> %ret
+}
+
+; Test with sge.
+define <16 x i8> @f9(<16 x i8> %val) {
+; CHECK-LABEL: f9:
+; CHECK: vlpb [[REG:%v[0-9]+]], %v24
+; CHECK: vlcb %v24, [[REG]]
+; CHECK: br %r14
+ %cmp = icmp sge <16 x i8> %val, zeroinitializer
+ %neg = sub <16 x i8> zeroinitializer, %val
+ %ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val
+ ret <16 x i8> %ret
+}
+
+; Test with an SRA-based boolean vector.
+define <16 x i8> @f10(<16 x i8> %val) {
+; CHECK-LABEL: f10:
+; CHECK: vlpb %v24, %v24
+; CHECK: br %r14
+ %shr = ashr <16 x i8> %val,
+ <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,
+ i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ %neg = sub <16 x i8> zeroinitializer, %val
+ %and1 = and <16 x i8> %shr, %neg
+ %not = xor <16 x i8> %shr,
+ <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %and2 = and <16 x i8> %not, %val
+ %ret = or <16 x i8> %and1, %and2
+ ret <16 x i8> %ret
+}
+
+; ...and again in reverse
+define <16 x i8> @f11(<16 x i8> %val) {
+; CHECK-LABEL: f11:
+; CHECK: vlpb [[REG:%v[0-9]+]], %v24
+; CHECK: vlcb %v24, [[REG]]
+; CHECK: br %r14
+ %shr = ashr <16 x i8> %val,
+ <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,
+ i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ %and1 = and <16 x i8> %shr, %val
+ %not = xor <16 x i8> %shr,
+ <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %neg = sub <16 x i8> zeroinitializer, %val
+ %and2 = and <16 x i8> %not, %neg
+ %ret = or <16 x i8> %and1, %and2
+ ret <16 x i8> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-abs-02.ll b/test/CodeGen/SystemZ/vec-abs-02.ll
new file mode 100644
index 000000000000..c5af619f0ba6
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-abs-02.ll
@@ -0,0 +1,142 @@
+; Test v8i16 absolute.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test with slt.
+define <8 x i16> @f1(<8 x i16> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vlph %v24, %v24
+; CHECK: br %r14
+ %cmp = icmp slt <8 x i16> %val, zeroinitializer
+ %neg = sub <8 x i16> zeroinitializer, %val
+ %ret = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val
+ ret <8 x i16> %ret
+}
+
+; Test with sle.
+define <8 x i16> @f2(<8 x i16> %val) {
+; CHECK-LABEL: f2:
+; CHECK: vlph %v24, %v24
+; CHECK: br %r14
+ %cmp = icmp sle <8 x i16> %val, zeroinitializer
+ %neg = sub <8 x i16> zeroinitializer, %val
+ %ret = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val
+ ret <8 x i16> %ret
+}
+
+; Test with sgt.
+define <8 x i16> @f3(<8 x i16> %val) {
+; CHECK-LABEL: f3:
+; CHECK: vlph %v24, %v24
+; CHECK: br %r14
+ %cmp = icmp sgt <8 x i16> %val, zeroinitializer
+ %neg = sub <8 x i16> zeroinitializer, %val
+ %ret = select <8 x i1> %cmp, <8 x i16> %val, <8 x i16> %neg
+ ret <8 x i16> %ret
+}
+
+; Test with sge.
+define <8 x i16> @f4(<8 x i16> %val) {
+; CHECK-LABEL: f4:
+; CHECK: vlph %v24, %v24
+; CHECK: br %r14
+ %cmp = icmp sge <8 x i16> %val, zeroinitializer
+ %neg = sub <8 x i16> zeroinitializer, %val
+ %ret = select <8 x i1> %cmp, <8 x i16> %val, <8 x i16> %neg
+ ret <8 x i16> %ret
+}
+
+; Test that negative absolute uses VLPH too. There is no vector equivalent
+; of LOAD NEGATIVE.
+define <8 x i16> @f5(<8 x i16> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vlph [[REG:%v[0-9]+]], %v24
+; CHECK: vlch %v24, [[REG]]
+; CHECK: br %r14
+ %cmp = icmp slt <8 x i16> %val, zeroinitializer
+ %neg = sub <8 x i16> zeroinitializer, %val
+ %abs = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val
+ %ret = sub <8 x i16> zeroinitializer, %abs
+ ret <8 x i16> %ret
+}
+
+; Try another form of negative absolute (slt version).
+define <8 x i16> @f6(<8 x i16> %val) {
+; CHECK-LABEL: f6:
+; CHECK: vlph [[REG:%v[0-9]+]], %v24
+; CHECK: vlch %v24, [[REG]]
+; CHECK: br %r14
+ %cmp = icmp slt <8 x i16> %val, zeroinitializer
+ %neg = sub <8 x i16> zeroinitializer, %val
+ %ret = select <8 x i1> %cmp, <8 x i16> %val, <8 x i16> %neg
+ ret <8 x i16> %ret
+}
+
+; Test with sle.
+define <8 x i16> @f7(<8 x i16> %val) {
+; CHECK-LABEL: f7:
+; CHECK: vlph [[REG:%v[0-9]+]], %v24
+; CHECK: vlch %v24, [[REG]]
+; CHECK: br %r14
+ %cmp = icmp sle <8 x i16> %val, zeroinitializer
+ %neg = sub <8 x i16> zeroinitializer, %val
+ %ret = select <8 x i1> %cmp, <8 x i16> %val, <8 x i16> %neg
+ ret <8 x i16> %ret
+}
+
+; Test with sgt.
+define <8 x i16> @f8(<8 x i16> %val) {
+; CHECK-LABEL: f8:
+; CHECK: vlph [[REG:%v[0-9]+]], %v24
+; CHECK: vlch %v24, [[REG]]
+; CHECK: br %r14
+ %cmp = icmp sgt <8 x i16> %val, zeroinitializer
+ %neg = sub <8 x i16> zeroinitializer, %val
+ %ret = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val
+ ret <8 x i16> %ret
+}
+
+; Test with sge.
+define <8 x i16> @f9(<8 x i16> %val) {
+; CHECK-LABEL: f9:
+; CHECK: vlph [[REG:%v[0-9]+]], %v24
+; CHECK: vlch %v24, [[REG]]
+; CHECK: br %r14
+ %cmp = icmp sge <8 x i16> %val, zeroinitializer
+ %neg = sub <8 x i16> zeroinitializer, %val
+ %ret = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val
+ ret <8 x i16> %ret
+}
+
+; Test with an SRA-based boolean vector.
+define <8 x i16> @f10(<8 x i16> %val) {
+; CHECK-LABEL: f10:
+; CHECK: vlph %v24, %v24
+; CHECK: br %r14
+ %shr = ashr <8 x i16> %val,
+ <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %neg = sub <8 x i16> zeroinitializer, %val
+ %and1 = and <8 x i16> %shr, %neg
+ %not = xor <8 x i16> %shr,
+ <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+ %and2 = and <8 x i16> %not, %val
+ %ret = or <8 x i16> %and1, %and2
+ ret <8 x i16> %ret
+}
+
+; ...and again in reverse
+define <8 x i16> @f11(<8 x i16> %val) {
+; CHECK-LABEL: f11:
+; CHECK: vlph [[REG:%v[0-9]+]], %v24
+; CHECK: vlch %v24, [[REG]]
+; CHECK: br %r14
+ %shr = ashr <8 x i16> %val,
+ <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+ %and1 = and <8 x i16> %shr, %val
+ %not = xor <8 x i16> %shr,
+ <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+ %neg = sub <8 x i16> zeroinitializer, %val
+ %and2 = and <8 x i16> %not, %neg
+ %ret = or <8 x i16> %and1, %and2
+ ret <8 x i16> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-abs-03.ll b/test/CodeGen/SystemZ/vec-abs-03.ll
new file mode 100644
index 000000000000..cb17a8895e1a
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-abs-03.ll
@@ -0,0 +1,138 @@
+; Test v4i32 absolute.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test with slt.
+define <4 x i32> @f1(<4 x i32> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vlpf %v24, %v24
+; CHECK: br %r14
+ %cmp = icmp slt <4 x i32> %val, zeroinitializer
+ %neg = sub <4 x i32> zeroinitializer, %val
+ %ret = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val
+ ret <4 x i32> %ret
+}
+
+; Test with sle.
+define <4 x i32> @f2(<4 x i32> %val) {
+; CHECK-LABEL: f2:
+; CHECK: vlpf %v24, %v24
+; CHECK: br %r14
+ %cmp = icmp sle <4 x i32> %val, zeroinitializer
+ %neg = sub <4 x i32> zeroinitializer, %val
+ %ret = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val
+ ret <4 x i32> %ret
+}
+
+; Test with sgt.
+define <4 x i32> @f3(<4 x i32> %val) {
+; CHECK-LABEL: f3:
+; CHECK: vlpf %v24, %v24
+; CHECK: br %r14
+ %cmp = icmp sgt <4 x i32> %val, zeroinitializer
+ %neg = sub <4 x i32> zeroinitializer, %val
+ %ret = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %neg
+ ret <4 x i32> %ret
+}
+
+; Test with sge.
+define <4 x i32> @f4(<4 x i32> %val) {
+; CHECK-LABEL: f4:
+; CHECK: vlpf %v24, %v24
+; CHECK: br %r14
+ %cmp = icmp sge <4 x i32> %val, zeroinitializer
+ %neg = sub <4 x i32> zeroinitializer, %val
+ %ret = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %neg
+ ret <4 x i32> %ret
+}
+
+; Test that negative absolute uses VLPF too. There is no vector equivalent
+; of LOAD NEGATIVE.
+define <4 x i32> @f5(<4 x i32> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vlpf [[REG:%v[0-9]+]], %v24
+; CHECK: vlcf %v24, [[REG]]
+; CHECK: br %r14
+ %cmp = icmp slt <4 x i32> %val, zeroinitializer
+ %neg = sub <4 x i32> zeroinitializer, %val
+ %abs = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val
+ %ret = sub <4 x i32> zeroinitializer, %abs
+ ret <4 x i32> %ret
+}
+
+; Try another form of negative absolute (slt version).
+define <4 x i32> @f6(<4 x i32> %val) {
+; CHECK-LABEL: f6:
+; CHECK: vlpf [[REG:%v[0-9]+]], %v24
+; CHECK: vlcf %v24, [[REG]]
+; CHECK: br %r14
+ %cmp = icmp slt <4 x i32> %val, zeroinitializer
+ %neg = sub <4 x i32> zeroinitializer, %val
+ %ret = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %neg
+ ret <4 x i32> %ret
+}
+
+; Test with sle.
+define <4 x i32> @f7(<4 x i32> %val) {
+; CHECK-LABEL: f7:
+; CHECK: vlpf [[REG:%v[0-9]+]], %v24
+; CHECK: vlcf %v24, [[REG]]
+; CHECK: br %r14
+ %cmp = icmp sle <4 x i32> %val, zeroinitializer
+ %neg = sub <4 x i32> zeroinitializer, %val
+ %ret = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %neg
+ ret <4 x i32> %ret
+}
+
+; Test with sgt.
+define <4 x i32> @f8(<4 x i32> %val) {
+; CHECK-LABEL: f8:
+; CHECK: vlpf [[REG:%v[0-9]+]], %v24
+; CHECK: vlcf %v24, [[REG]]
+; CHECK: br %r14
+ %cmp = icmp sgt <4 x i32> %val, zeroinitializer
+ %neg = sub <4 x i32> zeroinitializer, %val
+ %ret = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val
+ ret <4 x i32> %ret
+}
+
+; Test with sge.
+define <4 x i32> @f9(<4 x i32> %val) {
+; CHECK-LABEL: f9:
+; CHECK: vlpf [[REG:%v[0-9]+]], %v24
+; CHECK: vlcf %v24, [[REG]]
+; CHECK: br %r14
+ %cmp = icmp sge <4 x i32> %val, zeroinitializer
+ %neg = sub <4 x i32> zeroinitializer, %val
+ %ret = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val
+ ret <4 x i32> %ret
+}
+
+; Test with an SRA-based boolean vector.
+define <4 x i32> @f10(<4 x i32> %val) {
+; CHECK-LABEL: f10:
+; CHECK: vlpf %v24, %v24
+; CHECK: br %r14
+ %shr = ashr <4 x i32> %val, <i32 31, i32 31, i32 31, i32 31>
+ %neg = sub <4 x i32> zeroinitializer, %val
+ %and1 = and <4 x i32> %shr, %neg
+ %not = xor <4 x i32> %shr, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %and2 = and <4 x i32> %not, %val
+ %ret = or <4 x i32> %and1, %and2
+ ret <4 x i32> %ret
+}
+
+; ...and again in reverse
+define <4 x i32> @f11(<4 x i32> %val) {
+; CHECK-LABEL: f11:
+; CHECK: vlpf [[REG:%v[0-9]+]], %v24
+; CHECK: vlcf %v24, [[REG]]
+; CHECK: br %r14
+ %shr = ashr <4 x i32> %val, <i32 31, i32 31, i32 31, i32 31>
+ %and1 = and <4 x i32> %shr, %val
+ %not = xor <4 x i32> %shr, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %neg = sub <4 x i32> zeroinitializer, %val
+ %and2 = and <4 x i32> %not, %neg
+ %ret = or <4 x i32> %and1, %and2
+ ret <4 x i32> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-abs-04.ll b/test/CodeGen/SystemZ/vec-abs-04.ll
new file mode 100644
index 000000000000..31c489b00b35
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-abs-04.ll
@@ -0,0 +1,138 @@
+; Test v2i64 absolute.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test with slt.
+define <2 x i64> @f1(<2 x i64> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vlpg %v24, %v24
+; CHECK: br %r14
+ %cmp = icmp slt <2 x i64> %val, zeroinitializer
+ %neg = sub <2 x i64> zeroinitializer, %val
+ %ret = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val
+ ret <2 x i64> %ret
+}
+
+; Test with sle.
+define <2 x i64> @f2(<2 x i64> %val) {
+; CHECK-LABEL: f2:
+; CHECK: vlpg %v24, %v24
+; CHECK: br %r14
+ %cmp = icmp sle <2 x i64> %val, zeroinitializer
+ %neg = sub <2 x i64> zeroinitializer, %val
+ %ret = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val
+ ret <2 x i64> %ret
+}
+
+; Test with sgt.
+define <2 x i64> @f3(<2 x i64> %val) {
+; CHECK-LABEL: f3:
+; CHECK: vlpg %v24, %v24
+; CHECK: br %r14
+ %cmp = icmp sgt <2 x i64> %val, zeroinitializer
+ %neg = sub <2 x i64> zeroinitializer, %val
+ %ret = select <2 x i1> %cmp, <2 x i64> %val, <2 x i64> %neg
+ ret <2 x i64> %ret
+}
+
+; Test with sge.
+define <2 x i64> @f4(<2 x i64> %val) {
+; CHECK-LABEL: f4:
+; CHECK: vlpg %v24, %v24
+; CHECK: br %r14
+ %cmp = icmp sge <2 x i64> %val, zeroinitializer
+ %neg = sub <2 x i64> zeroinitializer, %val
+ %ret = select <2 x i1> %cmp, <2 x i64> %val, <2 x i64> %neg
+ ret <2 x i64> %ret
+}
+
+; Test that negative absolute uses VLPG too. There is no vector equivalent
+; of LOAD NEGATIVE.
+define <2 x i64> @f5(<2 x i64> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vlpg [[REG:%v[0-9]+]], %v24
+; CHECK: vlcg %v24, [[REG]]
+; CHECK: br %r14
+ %cmp = icmp slt <2 x i64> %val, zeroinitializer
+ %neg = sub <2 x i64> zeroinitializer, %val
+ %abs = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val
+ %ret = sub <2 x i64> zeroinitializer, %abs
+ ret <2 x i64> %ret
+}
+
+; Try another form of negative absolute (slt version).
+define <2 x i64> @f6(<2 x i64> %val) {
+; CHECK-LABEL: f6:
+; CHECK: vlpg [[REG:%v[0-9]+]], %v24
+; CHECK: vlcg %v24, [[REG]]
+; CHECK: br %r14
+ %cmp = icmp slt <2 x i64> %val, zeroinitializer
+ %neg = sub <2 x i64> zeroinitializer, %val
+ %ret = select <2 x i1> %cmp, <2 x i64> %val, <2 x i64> %neg
+ ret <2 x i64> %ret
+}
+
+; Test with sle.
+define <2 x i64> @f7(<2 x i64> %val) {
+; CHECK-LABEL: f7:
+; CHECK: vlpg [[REG:%v[0-9]+]], %v24
+; CHECK: vlcg %v24, [[REG]]
+; CHECK: br %r14
+ %cmp = icmp sle <2 x i64> %val, zeroinitializer
+ %neg = sub <2 x i64> zeroinitializer, %val
+ %ret = select <2 x i1> %cmp, <2 x i64> %val, <2 x i64> %neg
+ ret <2 x i64> %ret
+}
+
+; Test with sgt.
+define <2 x i64> @f8(<2 x i64> %val) {
+; CHECK-LABEL: f8:
+; CHECK: vlpg [[REG:%v[0-9]+]], %v24
+; CHECK: vlcg %v24, [[REG]]
+; CHECK: br %r14
+ %cmp = icmp sgt <2 x i64> %val, zeroinitializer
+ %neg = sub <2 x i64> zeroinitializer, %val
+ %ret = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val
+ ret <2 x i64> %ret
+}
+
+; Test with sge.
+define <2 x i64> @f9(<2 x i64> %val) {
+; CHECK-LABEL: f9:
+; CHECK: vlpg [[REG:%v[0-9]+]], %v24
+; CHECK: vlcg %v24, [[REG]]
+; CHECK: br %r14
+ %cmp = icmp sge <2 x i64> %val, zeroinitializer
+ %neg = sub <2 x i64> zeroinitializer, %val
+ %ret = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val
+ ret <2 x i64> %ret
+}
+
+; Test with an SRA-based boolean vector.
+define <2 x i64> @f10(<2 x i64> %val) {
+; CHECK-LABEL: f10:
+; CHECK: vlpg %v24, %v24
+; CHECK: br %r14
+ %shr = ashr <2 x i64> %val, <i64 63, i64 63>
+ %neg = sub <2 x i64> zeroinitializer, %val
+ %and1 = and <2 x i64> %shr, %neg
+ %not = xor <2 x i64> %shr, <i64 -1, i64 -1>
+ %and2 = and <2 x i64> %not, %val
+ %ret = or <2 x i64> %and1, %and2
+ ret <2 x i64> %ret
+}
+
+; ...and again in reverse
+define <2 x i64> @f11(<2 x i64> %val) {
+; CHECK-LABEL: f11:
+; CHECK: vlpg [[REG:%v[0-9]+]], %v24
+; CHECK: vlcg %v24, [[REG]]
+; CHECK: br %r14
+ %shr = ashr <2 x i64> %val, <i64 63, i64 63>
+ %and1 = and <2 x i64> %shr, %val
+ %not = xor <2 x i64> %shr, <i64 -1, i64 -1>
+ %neg = sub <2 x i64> zeroinitializer, %val
+ %and2 = and <2 x i64> %not, %neg
+ %ret = or <2 x i64> %and1, %and2
+ ret <2 x i64> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-abs-05.ll b/test/CodeGen/SystemZ/vec-abs-05.ll
new file mode 100644
index 000000000000..63210f87b94e
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-abs-05.ll
@@ -0,0 +1,46 @@
+; Test f64 and v2f64 absolute.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.fabs.f64(double)
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
+
+; Test a plain absolute.
+define <2 x double> @f1(<2 x double> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vflpdb %v24, %v24
+; CHECK: br %r14
+ %ret = call <2 x double> @llvm.fabs.v2f64(<2 x double> %val)
+ ret <2 x double> %ret
+}
+
+; Test a negative absolute.
+define <2 x double> @f2(<2 x double> %val) {
+; CHECK-LABEL: f2:
+; CHECK: vflndb %v24, %v24
+; CHECK: br %r14
+ %abs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %val)
+ %ret = fsub <2 x double> <double -0.0, double -0.0>, %abs
+ ret <2 x double> %ret
+}
+
+; Test an f64 absolute that uses vector registers.
+define double @f3(<2 x double> %val) {
+; CHECK-LABEL: f3:
+; CHECK: wflpdb %f0, %v24
+; CHECK: br %r14
+ %scalar = extractelement <2 x double> %val, i32 0
+ %ret = call double @llvm.fabs.f64(double %scalar)
+ ret double %ret
+}
+
+; Test an f64 negative absolute that uses vector registers.
+define double @f4(<2 x double> %val) {
+; CHECK-LABEL: f4:
+; CHECK: wflndb %f0, %v24
+; CHECK: br %r14
+ %scalar = extractelement <2 x double> %val, i32 0
+ %abs = call double @llvm.fabs.f64(double %scalar)
+ %ret = fsub double -0.0, %abs
+ ret double %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-add-01.ll b/test/CodeGen/SystemZ/vec-add-01.ll
new file mode 100644
index 000000000000..317034377671
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-add-01.ll
@@ -0,0 +1,60 @@
+; Test vector addition.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i8 addition.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vab %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = add <16 x i8> %val1, %val2
+ ret <16 x i8> %ret
+}
+
+; Test a v8i16 addition.
+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vah %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = add <8 x i16> %val1, %val2
+ ret <8 x i16> %ret
+}
+
+; Test a v4i32 addition.
+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vaf %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = add <4 x i32> %val1, %val2
+ ret <4 x i32> %ret
+}
+
+; Test a v2i64 addition.
+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vag %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = add <2 x i64> %val1, %val2
+ ret <2 x i64> %ret
+}
+
+; Test a v2f64 addition.
+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
+ <2 x double> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vfadb %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = fadd <2 x double> %val1, %val2
+ ret <2 x double> %ret
+}
+
+; Test an f64 addition that uses vector registers.
+define double @f6(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: wfadb %f0, %v24, %v26
+; CHECK: br %r14
+ %scalar1 = extractelement <2 x double> %val1, i32 0
+ %scalar2 = extractelement <2 x double> %val2, i32 0
+ %ret = fadd double %scalar1, %scalar2
+ ret double %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-and-01.ll b/test/CodeGen/SystemZ/vec-and-01.ll
new file mode 100644
index 000000000000..d467de69cea2
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-and-01.ll
@@ -0,0 +1,39 @@
+; Test vector AND.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i8 AND.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vn %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = and <16 x i8> %val1, %val2
+ ret <16 x i8> %ret
+}
+
+; Test a v8i16 AND.
+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vn %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = and <8 x i16> %val1, %val2
+ ret <8 x i16> %ret
+}
+
+; Test a v4i32 AND.
+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vn %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = and <4 x i32> %val1, %val2
+ ret <4 x i32> %ret
+}
+
+; Test a v2i64 AND.
+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vn %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = and <2 x i64> %val1, %val2
+ ret <2 x i64> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-and-02.ll b/test/CodeGen/SystemZ/vec-and-02.ll
new file mode 100644
index 000000000000..30bc92416892
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-and-02.ll
@@ -0,0 +1,91 @@
+; Test vector AND-NOT.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i8 AND-NOT.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vnc %v24, %v26, %v28
+; CHECK: br %r14
+ %not = xor <16 x i8> %val2, <i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1>
+ %ret = and <16 x i8> %val1, %not
+ ret <16 x i8> %ret
+}
+
+; ...and again with the reverse.
+define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vnc %v24, %v28, %v26
+; CHECK: br %r14
+ %not = xor <16 x i8> %val1, <i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1>
+ %ret = and <16 x i8> %not, %val2
+ ret <16 x i8> %ret
+}
+
+; Test a v8i16 AND-NOT.
+define <8 x i16> @f3(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vnc %v24, %v26, %v28
+; CHECK: br %r14
+ %not = xor <8 x i16> %val2, <i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1>
+ %ret = and <8 x i16> %val1, %not
+ ret <8 x i16> %ret
+}
+
+; ...and again with the reverse.
+define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vnc %v24, %v28, %v26
+; CHECK: br %r14
+ %not = xor <8 x i16> %val1, <i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1>
+ %ret = and <8 x i16> %not, %val2
+ ret <8 x i16> %ret
+}
+
+; Test a v4i32 AND-NOT.
+define <4 x i32> @f5(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vnc %v24, %v26, %v28
+; CHECK: br %r14
+ %not = xor <4 x i32> %val2, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %ret = and <4 x i32> %val1, %not
+ ret <4 x i32> %ret
+}
+
+; ...and again with the reverse.
+define <4 x i32> @f6(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vnc %v24, %v28, %v26
+; CHECK: br %r14
+ %not = xor <4 x i32> %val1, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %ret = and <4 x i32> %not, %val2
+ ret <4 x i32> %ret
+}
+
+; Test a v2i64 AND-NOT.
+define <2 x i64> @f7(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: vnc %v24, %v26, %v28
+; CHECK: br %r14
+ %not = xor <2 x i64> %val2, <i64 -1, i64 -1>
+ %ret = and <2 x i64> %val1, %not
+ ret <2 x i64> %ret
+}
+
+; ...and again with the reverse.
+define <2 x i64> @f8(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vnc %v24, %v28, %v26
+; CHECK: br %r14
+ %not = xor <2 x i64> %val1, <i64 -1, i64 -1>
+ %ret = and <2 x i64> %not, %val2
+ ret <2 x i64> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-and-03.ll b/test/CodeGen/SystemZ/vec-and-03.ll
new file mode 100644
index 000000000000..c73d570fb7b0
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-and-03.ll
@@ -0,0 +1,113 @@
+; Test vector zero extensions, which need to be implemented as ANDs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i1->v16i8 extension.
+define <16 x i8> @f1(<16 x i8> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vrepib [[REG:%v[0-9]+]], 1
+; CHECK: vn %v24, %v24, [[REG]]
+; CHECK: br %r14
+ %trunc = trunc <16 x i8> %val to <16 x i1>
+ %ret = zext <16 x i1> %trunc to <16 x i8>
+ ret <16 x i8> %ret
+}
+
+; Test a v8i1->v8i16 extension.
+define <8 x i16> @f2(<8 x i16> %val) {
+; CHECK-LABEL: f2:
+; CHECK: vrepih [[REG:%v[0-9]+]], 1
+; CHECK: vn %v24, %v24, [[REG]]
+; CHECK: br %r14
+ %trunc = trunc <8 x i16> %val to <8 x i1>
+ %ret = zext <8 x i1> %trunc to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; Test a v8i8->v8i16 extension.
+define <8 x i16> @f3(<8 x i16> %val) {
+; CHECK-LABEL: f3:
+; CHECK: vgbm [[REG:%v[0-9]+]], 21845
+; CHECK: vn %v24, %v24, [[REG]]
+; CHECK: br %r14
+ %trunc = trunc <8 x i16> %val to <8 x i8>
+ %ret = zext <8 x i8> %trunc to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; Test a v4i1->v4i32 extension.
+define <4 x i32> @f4(<4 x i32> %val) {
+; CHECK-LABEL: f4:
+; CHECK: vrepif [[REG:%v[0-9]+]], 1
+; CHECK: vn %v24, %v24, [[REG]]
+; CHECK: br %r14
+ %trunc = trunc <4 x i32> %val to <4 x i1>
+ %ret = zext <4 x i1> %trunc to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test a v4i8->v4i32 extension.
+define <4 x i32> @f5(<4 x i32> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vgbm [[REG:%v[0-9]+]], 4369
+; CHECK: vn %v24, %v24, [[REG]]
+; CHECK: br %r14
+ %trunc = trunc <4 x i32> %val to <4 x i8>
+ %ret = zext <4 x i8> %trunc to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test a v4i16->v4i32 extension.
+define <4 x i32> @f6(<4 x i32> %val) {
+; CHECK-LABEL: f6:
+; CHECK: vgbm [[REG:%v[0-9]+]], 13107
+; CHECK: vn %v24, %v24, [[REG]]
+; CHECK: br %r14
+ %trunc = trunc <4 x i32> %val to <4 x i16>
+ %ret = zext <4 x i16> %trunc to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test a v2i1->v2i64 extension.
+define <2 x i64> @f7(<2 x i64> %val) {
+; CHECK-LABEL: f7:
+; CHECK: vrepig [[REG:%v[0-9]+]], 1
+; CHECK: vn %v24, %v24, [[REG]]
+; CHECK: br %r14
+ %trunc = trunc <2 x i64> %val to <2 x i1>
+ %ret = zext <2 x i1> %trunc to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test a v2i8->v2i64 extension.
+define <2 x i64> @f8(<2 x i64> %val) {
+; CHECK-LABEL: f8:
+; CHECK: vgbm [[REG:%v[0-9]+]], 257
+; CHECK: vn %v24, %v24, [[REG]]
+; CHECK: br %r14
+ %trunc = trunc <2 x i64> %val to <2 x i8>
+ %ret = zext <2 x i8> %trunc to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test a v2i16->v2i64 extension.
+define <2 x i64> @f9(<2 x i64> %val) {
+; CHECK-LABEL: f9:
+; CHECK: vgbm [[REG:%v[0-9]+]], 771
+; CHECK: vn %v24, %v24, [[REG]]
+; CHECK: br %r14
+ %trunc = trunc <2 x i64> %val to <2 x i16>
+ %ret = zext <2 x i16> %trunc to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test a v2i32->v2i64 extension.
+define <2 x i64> @f10(<2 x i64> %val) {
+; CHECK-LABEL: f10:
+; CHECK: vgbm [[REG:%v[0-9]+]], 3855
+; CHECK: vn %v24, %v24, [[REG]]
+; CHECK: br %r14
+ %trunc = trunc <2 x i64> %val to <2 x i32>
+ %ret = zext <2 x i32> %trunc to <2 x i64>
+ ret <2 x i64> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-args-01.ll b/test/CodeGen/SystemZ/vec-args-01.ll
new file mode 100644
index 000000000000..e07ab7447b2a
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-args-01.ll
@@ -0,0 +1,48 @@
+; Test the handling of named vector arguments.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-VEC
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-STACK
+
+; This routine has 6 integer arguments, which fill up r2-r5 and
+; the stack slot at offset 160, and 10 vector arguments, which
+; fill up v24-v31 and the two double-wide stack slots at 168
+; and 184.
+declare void @bar(i64, i64, i64, i64, i64, i64,
+ <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>,
+ <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>,
+ <4 x i32>, <4 x i32>)
+
+define void @foo() {
+; CHECK-VEC-LABEL: foo:
+; CHECK-VEC-DAG: vrepif %v24, 1
+; CHECK-VEC-DAG: vrepif %v26, 2
+; CHECK-VEC-DAG: vrepif %v28, 3
+; CHECK-VEC-DAG: vrepif %v30, 4
+; CHECK-VEC-DAG: vrepif %v25, 5
+; CHECK-VEC-DAG: vrepif %v27, 6
+; CHECK-VEC-DAG: vrepif %v29, 7
+; CHECK-VEC-DAG: vrepif %v31, 8
+; CHECK-VEC: brasl %r14, bar@PLT
+;
+; CHECK-STACK-LABEL: foo:
+; CHECK-STACK: aghi %r15, -200
+; CHECK-STACK-DAG: mvghi 160(%r15), 6
+; CHECK-STACK-DAG: vrepif [[REG1:%v[0-9]+]], 9
+; CHECK-STACK-DAG: vst [[REG1]], 168(%r15)
+; CHECK-STACK-DAG: vrepif [[REG2:%v[0-9]+]], 10
+; CHECK-STACK-DAG: vst [[REG2]], 184(%r15)
+; CHECK-STACK: brasl %r14, bar@PLT
+
+ call void @bar (i64 1, i64 2, i64 3, i64 4, i64 5, i64 6,
+ <4 x i32> <i32 1, i32 1, i32 1, i32 1>,
+ <4 x i32> <i32 2, i32 2, i32 2, i32 2>,
+ <4 x i32> <i32 3, i32 3, i32 3, i32 3>,
+ <4 x i32> <i32 4, i32 4, i32 4, i32 4>,
+ <4 x i32> <i32 5, i32 5, i32 5, i32 5>,
+ <4 x i32> <i32 6, i32 6, i32 6, i32 6>,
+ <4 x i32> <i32 7, i32 7, i32 7, i32 7>,
+ <4 x i32> <i32 8, i32 8, i32 8, i32 8>,
+ <4 x i32> <i32 9, i32 9, i32 9, i32 9>,
+ <4 x i32> <i32 10, i32 10, i32 10, i32 10>)
+ ret void
+}
diff --git a/test/CodeGen/SystemZ/vec-args-02.ll b/test/CodeGen/SystemZ/vec-args-02.ll
new file mode 100644
index 000000000000..b6081598326a
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-args-02.ll
@@ -0,0 +1,31 @@
+; Test the handling of unnamed vector arguments.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-VEC
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-STACK
+
+; This routine is called with two named vector argument (passed
+; in %v24 and %v26) and two unnamed vector arguments (passed
+; in the double-wide stack slots at 160 and 176).
+declare void @bar(<4 x i32>, <4 x i32>, ...)
+
+define void @foo() {
+; CHECK-VEC-LABEL: foo:
+; CHECK-VEC-DAG: vrepif %v24, 1
+; CHECK-VEC-DAG: vrepif %v26, 2
+; CHECK-VEC: brasl %r14, bar@PLT
+;
+; CHECK-STACK-LABEL: foo:
+; CHECK-STACK: aghi %r15, -192
+; CHECK-STACK-DAG: vrepif [[REG1:%v[0-9]+]], 3
+; CHECK-STACK-DAG: vst [[REG1]], 160(%r15)
+; CHECK-STACK-DAG: vrepif [[REG2:%v[0-9]+]], 4
+; CHECK-STACK-DAG: vst [[REG2]], 176(%r15)
+; CHECK-STACK: brasl %r14, bar@PLT
+
+ call void (<4 x i32>, <4 x i32>, ...) @bar
+ (<4 x i32> <i32 1, i32 1, i32 1, i32 1>,
+ <4 x i32> <i32 2, i32 2, i32 2, i32 2>,
+ <4 x i32> <i32 3, i32 3, i32 3, i32 3>,
+ <4 x i32> <i32 4, i32 4, i32 4, i32 4>)
+ ret void
+}
diff --git a/test/CodeGen/SystemZ/vec-args-03.ll b/test/CodeGen/SystemZ/vec-args-03.ll
new file mode 100644
index 000000000000..c47d8461021a
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-args-03.ll
@@ -0,0 +1,30 @@
+; Test the handling of incoming vector arguments.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; This routine has 10 vector arguments, which fill up %v24-%v31 and
+; the two double-wide stack slots at 160 and 176.
+define <4 x i32> @foo(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4,
+ <4 x i32> %v5, <4 x i32> %v6, <4 x i32> %v7, <4 x i32> %v8,
+ <4 x i32> %v9, <4 x i32> %v10) {
+; CHECK-LABEL: foo:
+; CHECK: vl [[REG1:%v[0-9]+]], 176(%r15)
+; CHECK: vsf %v24, %v26, [[REG1]]
+; CHECK: br %r14
+ %y = sub <4 x i32> %v2, %v10
+ ret <4 x i32> %y
+}
+
+; This routine has 10 vector arguments, which fill up %v24-%v31 and
+; the two single-wide stack slots at 160 and 168.
+define <4 x i8> @bar(<4 x i8> %v1, <4 x i8> %v2, <4 x i8> %v3, <4 x i8> %v4,
+ <4 x i8> %v5, <4 x i8> %v6, <4 x i8> %v7, <4 x i8> %v8,
+ <4 x i8> %v9, <4 x i8> %v10) {
+; CHECK-LABEL: bar:
+; CHECK: vlrepg [[REG1:%v[0-9]+]], 168(%r15)
+; CHECK: vsb %v24, %v26, [[REG1]]
+; CHECK: br %r14
+ %y = sub <4 x i8> %v2, %v10
+ ret <4 x i8> %y
+}
+
diff --git a/test/CodeGen/SystemZ/vec-args-04.ll b/test/CodeGen/SystemZ/vec-args-04.ll
new file mode 100644
index 000000000000..3a25404934e2
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-args-04.ll
@@ -0,0 +1,50 @@
+; Test the handling of named short vector arguments.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-VEC
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-STACK
+
+; This routine has 12 vector arguments, which fill up %v24-%v31
+; and the four single-wide stack slots starting at 160.
+declare void @bar(<1 x i8>, <2 x i8>, <4 x i8>, <8 x i8>,
+ <1 x i8>, <2 x i8>, <4 x i8>, <8 x i8>,
+ <1 x i8>, <2 x i8>, <4 x i8>, <8 x i8>)
+
+define void @foo() {
+; CHECK-VEC-LABEL: foo:
+; CHECK-VEC-DAG: vrepib %v24, 1
+; CHECK-VEC-DAG: vrepib %v26, 2
+; CHECK-VEC-DAG: vrepib %v28, 3
+; CHECK-VEC-DAG: vrepib %v30, 4
+; CHECK-VEC-DAG: vrepib %v25, 5
+; CHECK-VEC-DAG: vrepib %v27, 6
+; CHECK-VEC-DAG: vrepib %v29, 7
+; CHECK-VEC-DAG: vrepib %v31, 8
+; CHECK-VEC: brasl %r14, bar@PLT
+;
+; CHECK-STACK-LABEL: foo:
+; CHECK-STACK: aghi %r15, -192
+; CHECK-STACK-DAG: llihh [[REG1:%r[0-9]+]], 2304
+; CHECK-STACK-DAG: stg [[REG1]], 160(%r15)
+; CHECK-STACK-DAG: llihh [[REG2:%r[0-9]+]], 2570
+; CHECK-STACK-DAG: stg [[REG2]], 168(%r15)
+; CHECK-STACK-DAG: llihf [[REG3:%r[0-9]+]], 185273099
+; CHECK-STACK-DAG: stg [[REG3]], 176(%r15)
+; CHECK-STACK-DAG: llihf [[REG4:%r[0-9]+]], 202116108
+; CHECK-STACK-DAG: oilf [[REG4]], 202116108
+; CHECK-STACK-DAG: stg [[REG4]], 176(%r15)
+; CHECK-STACK: brasl %r14, bar@PLT
+
+ call void @bar (<1 x i8> <i8 1>,
+ <2 x i8> <i8 2, i8 2>,
+ <4 x i8> <i8 3, i8 3, i8 3, i8 3>,
+ <8 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>,
+ <1 x i8> <i8 5>,
+ <2 x i8> <i8 6, i8 6>,
+ <4 x i8> <i8 7, i8 7, i8 7, i8 7>,
+ <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>,
+ <1 x i8> <i8 9>,
+ <2 x i8> <i8 10, i8 10>,
+ <4 x i8> <i8 11, i8 11, i8 11, i8 11>,
+ <8 x i8> <i8 12, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12>)
+ ret void
+}
diff --git a/test/CodeGen/SystemZ/vec-args-05.ll b/test/CodeGen/SystemZ/vec-args-05.ll
new file mode 100644
index 000000000000..cd1448b8611e
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-args-05.ll
@@ -0,0 +1,32 @@
+; Test the handling of unnamed short vector arguments.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-VEC
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-STACK
+
+; This routine is called with two named vector argument (passed
+; in %v24 and %v26) and two unnamed vector arguments (passed
+; in the single-wide stack slots at 160 and 168).
+declare void @bar(<4 x i8>, <4 x i8>, ...)
+
+define void @foo() {
+; CHECK-VEC-LABEL: foo:
+; CHECK-VEC-DAG: vrepib %v24, 1
+; CHECK-VEC-DAG: vrepib %v26, 2
+; CHECK-VEC: brasl %r14, bar@PLT
+;
+; CHECK-STACK-LABEL: foo:
+; CHECK-STACK: aghi %r15, -176
+; CHECK-STACK-DAG: llihf [[REG1:%r[0-9]+]], 50529027
+; CHECK-STACK-DAG: stg [[REG1]], 160(%r15)
+; CHECK-STACK-DAG: llihf [[REG2:%r[0-9]+]], 67372036
+; CHECK-STACK-DAG: stg [[REG2]], 168(%r15)
+; CHECK-STACK: brasl %r14, bar@PLT
+
+ call void (<4 x i8>, <4 x i8>, ...) @bar
+ (<4 x i8> <i8 1, i8 1, i8 1, i8 1>,
+ <4 x i8> <i8 2, i8 2, i8 2, i8 2>,
+ <4 x i8> <i8 3, i8 3, i8 3, i8 3>,
+ <4 x i8> <i8 4, i8 4, i8 4, i8 4>)
+ ret void
+}
+
diff --git a/test/CodeGen/SystemZ/vec-args-error-01.ll b/test/CodeGen/SystemZ/vec-args-error-01.ll
new file mode 100644
index 000000000000..e2f537949595
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-args-error-01.ll
@@ -0,0 +1,9 @@
+; Verify that we detect unsupported single-element vector types.
+
+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s
+
+define void @foo(<1 x i128>) {
+ ret void
+}
+
+; CHECK: LLVM ERROR: Unsupported vector argument or return type
diff --git a/test/CodeGen/SystemZ/vec-args-error-02.ll b/test/CodeGen/SystemZ/vec-args-error-02.ll
new file mode 100644
index 000000000000..a5ae1102a748
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-args-error-02.ll
@@ -0,0 +1,9 @@
+; Verify that we detect unsupported single-element vector types.
+
+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s
+
+define <1 x i128> @foo() {
+ ret <1 x i128><i128 0>
+}
+
+; CHECK: LLVM ERROR: Unsupported vector argument or return type
diff --git a/test/CodeGen/SystemZ/vec-args-error-03.ll b/test/CodeGen/SystemZ/vec-args-error-03.ll
new file mode 100644
index 000000000000..14698aae43bc
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-args-error-03.ll
@@ -0,0 +1,12 @@
+; Verify that we detect unsupported single-element vector types.
+
+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s
+
+declare void @bar(<1 x i128>)
+
+define void @foo() {
+ call void @bar (<1 x i128> <i128 0>)
+ ret void
+}
+
+; CHECK: LLVM ERROR: Unsupported vector argument or return type
diff --git a/test/CodeGen/SystemZ/vec-args-error-04.ll b/test/CodeGen/SystemZ/vec-args-error-04.ll
new file mode 100644
index 000000000000..a54ee90022c8
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-args-error-04.ll
@@ -0,0 +1,12 @@
+; Verify that we detect unsupported single-element vector types.
+
+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s
+
+declare <1 x i128> @bar()
+
+define void @foo() {
+ %res = call <1 x i128> @bar ()
+ ret void
+}
+
+; CHECK: LLVM ERROR: Unsupported vector argument or return type
diff --git a/test/CodeGen/SystemZ/vec-args-error-05.ll b/test/CodeGen/SystemZ/vec-args-error-05.ll
new file mode 100644
index 000000000000..067deb1c88b8
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-args-error-05.ll
@@ -0,0 +1,9 @@
+; Verify that we detect unsupported single-element vector types.
+
+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s
+
+define void @foo(<1 x fp128>) {
+ ret void
+}
+
+; CHECK: LLVM ERROR: Unsupported vector argument or return type
diff --git a/test/CodeGen/SystemZ/vec-args-error-06.ll b/test/CodeGen/SystemZ/vec-args-error-06.ll
new file mode 100644
index 000000000000..a9184d735750
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-args-error-06.ll
@@ -0,0 +1,9 @@
+; Verify that we detect unsupported single-element vector types.
+
+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s
+
+define <1 x fp128> @foo() {
+ ret <1 x fp128><fp128 0xL00000000000000000000000000000000>
+}
+
+; CHECK: LLVM ERROR: Unsupported vector argument or return type
diff --git a/test/CodeGen/SystemZ/vec-args-error-07.ll b/test/CodeGen/SystemZ/vec-args-error-07.ll
new file mode 100644
index 000000000000..4e9140093915
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-args-error-07.ll
@@ -0,0 +1,12 @@
+; Verify that we detect unsupported single-element vector types.
+
+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s
+
+declare void @bar(<1 x fp128>)
+
+define void @foo() {
+ call void @bar (<1 x fp128> <fp128 0xL00000000000000000000000000000000>)
+ ret void
+}
+
+; CHECK: LLVM ERROR: Unsupported vector argument or return type
diff --git a/test/CodeGen/SystemZ/vec-args-error-08.ll b/test/CodeGen/SystemZ/vec-args-error-08.ll
new file mode 100644
index 000000000000..7b16b9f46e39
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-args-error-08.ll
@@ -0,0 +1,12 @@
+; Verify that we detect unsupported single-element vector types.
+
+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s
+
+declare <1 x fp128> @bar()
+
+define void @foo() {
+ %res = call <1 x fp128> @bar ()
+ ret void
+}
+
+; CHECK: LLVM ERROR: Unsupported vector argument or return type
diff --git a/test/CodeGen/SystemZ/vec-cmp-01.ll b/test/CodeGen/SystemZ/vec-cmp-01.ll
new file mode 100644
index 000000000000..a7546db8d7f1
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-cmp-01.ll
@@ -0,0 +1,228 @@
+; Test v16i8 comparisons.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test eq.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vceqb %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = icmp eq <16 x i8> %val1, %val2
+ %ret = sext <16 x i1> %cmp to <16 x i8>
+ ret <16 x i8> %ret
+}
+
+; Test ne.
+define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vceqb [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ne <16 x i8> %val1, %val2
+ %ret = sext <16 x i1> %cmp to <16 x i8>
+ ret <16 x i8> %ret
+}
+
+; Test sgt.
+define <16 x i8> @f3(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vchb %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = icmp sgt <16 x i8> %val1, %val2
+ %ret = sext <16 x i1> %cmp to <16 x i8>
+ ret <16 x i8> %ret
+}
+
+; Test sge.
+define <16 x i8> @f4(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vchb [[REG:%v[0-9]+]], %v28, %v26
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp sge <16 x i8> %val1, %val2
+ %ret = sext <16 x i1> %cmp to <16 x i8>
+ ret <16 x i8> %ret
+}
+
+; Test sle.
+define <16 x i8> @f5(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vchb [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp sle <16 x i8> %val1, %val2
+ %ret = sext <16 x i1> %cmp to <16 x i8>
+ ret <16 x i8> %ret
+}
+
+; Test slt.
+define <16 x i8> @f6(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vchb %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+ %cmp = icmp slt <16 x i8> %val1, %val2
+ %ret = sext <16 x i1> %cmp to <16 x i8>
+ ret <16 x i8> %ret
+}
+
+; Test ugt.
+define <16 x i8> @f7(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: vchlb %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = icmp ugt <16 x i8> %val1, %val2
+ %ret = sext <16 x i1> %cmp to <16 x i8>
+ ret <16 x i8> %ret
+}
+
+; Test uge.
+define <16 x i8> @f8(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vchlb [[REG:%v[0-9]+]], %v28, %v26
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp uge <16 x i8> %val1, %val2
+ %ret = sext <16 x i1> %cmp to <16 x i8>
+ ret <16 x i8> %ret
+}
+
+; Test ule.
+define <16 x i8> @f9(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f9:
+; CHECK: vchlb [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ule <16 x i8> %val1, %val2
+ %ret = sext <16 x i1> %cmp to <16 x i8>
+ ret <16 x i8> %ret
+}
+
+; Test ult.
+define <16 x i8> @f10(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f10:
+; CHECK: vchlb %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+ %cmp = icmp ult <16 x i8> %val1, %val2
+ %ret = sext <16 x i1> %cmp to <16 x i8>
+ ret <16 x i8> %ret
+}
+
+; Test eq selects.
+define <16 x i8> @f11(<16 x i8> %val1, <16 x i8> %val2,
+ <16 x i8> %val3, <16 x i8> %val4) {
+; CHECK-LABEL: f11:
+; CHECK: vceqb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp eq <16 x i8> %val1, %val2
+ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+ ret <16 x i8> %ret
+}
+
+; Test ne selects.
+define <16 x i8> @f12(<16 x i8> %val1, <16 x i8> %val2,
+ <16 x i8> %val3, <16 x i8> %val4) {
+; CHECK-LABEL: f12:
+; CHECK: vceqb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ne <16 x i8> %val1, %val2
+ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+ ret <16 x i8> %ret
+}
+
+; Test sgt selects.
+define <16 x i8> @f13(<16 x i8> %val1, <16 x i8> %val2,
+ <16 x i8> %val3, <16 x i8> %val4) {
+; CHECK-LABEL: f13:
+; CHECK: vchb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp sgt <16 x i8> %val1, %val2
+ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+ ret <16 x i8> %ret
+}
+
+; Test sge selects.
+define <16 x i8> @f14(<16 x i8> %val1, <16 x i8> %val2,
+ <16 x i8> %val3, <16 x i8> %val4) {
+; CHECK-LABEL: f14:
+; CHECK: vchb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp sge <16 x i8> %val1, %val2
+ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+ ret <16 x i8> %ret
+}
+
+; Test sle selects.
+define <16 x i8> @f15(<16 x i8> %val1, <16 x i8> %val2,
+ <16 x i8> %val3, <16 x i8> %val4) {
+; CHECK-LABEL: f15:
+; CHECK: vchb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp sle <16 x i8> %val1, %val2
+ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+ ret <16 x i8> %ret
+}
+
+; Test slt selects.
+define <16 x i8> @f16(<16 x i8> %val1, <16 x i8> %val2,
+ <16 x i8> %val3, <16 x i8> %val4) {
+; CHECK-LABEL: f16:
+; CHECK: vchb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp slt <16 x i8> %val1, %val2
+ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+ ret <16 x i8> %ret
+}
+
+; Test ugt selects.
+define <16 x i8> @f17(<16 x i8> %val1, <16 x i8> %val2,
+ <16 x i8> %val3, <16 x i8> %val4) {
+; CHECK-LABEL: f17:
+; CHECK: vchlb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ugt <16 x i8> %val1, %val2
+ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+ ret <16 x i8> %ret
+}
+
+; Test uge selects.
+define <16 x i8> @f18(<16 x i8> %val1, <16 x i8> %val2,
+ <16 x i8> %val3, <16 x i8> %val4) {
+; CHECK-LABEL: f18:
+; CHECK: vchlb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp uge <16 x i8> %val1, %val2
+ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+ ret <16 x i8> %ret
+}
+
+; Test ule selects.
+define <16 x i8> @f19(<16 x i8> %val1, <16 x i8> %val2,
+ <16 x i8> %val3, <16 x i8> %val4) {
+; CHECK-LABEL: f19:
+; CHECK: vchlb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ule <16 x i8> %val1, %val2
+ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+ ret <16 x i8> %ret
+}
+
+; Test ult selects.
+define <16 x i8> @f20(<16 x i8> %val1, <16 x i8> %val2,
+ <16 x i8> %val3, <16 x i8> %val4) {
+; CHECK-LABEL: f20:
+; CHECK: vchlb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ult <16 x i8> %val1, %val2
+ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
+ ret <16 x i8> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-cmp-02.ll b/test/CodeGen/SystemZ/vec-cmp-02.ll
new file mode 100644
index 000000000000..78fb46c01c08
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-cmp-02.ll
@@ -0,0 +1,228 @@
+; Test v8i16 comparisons.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test eq.
+define <8 x i16> @f1(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vceqh %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = icmp eq <8 x i16> %val1, %val2
+ %ret = sext <8 x i1> %cmp to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; Test ne.
+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vceqh [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ne <8 x i16> %val1, %val2
+ %ret = sext <8 x i1> %cmp to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; Test sgt.
+define <8 x i16> @f3(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vchh %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = icmp sgt <8 x i16> %val1, %val2
+ %ret = sext <8 x i1> %cmp to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; Test sge.
+define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vchh [[REG:%v[0-9]+]], %v28, %v26
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp sge <8 x i16> %val1, %val2
+ %ret = sext <8 x i1> %cmp to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; Test sle.
+define <8 x i16> @f5(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vchh [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp sle <8 x i16> %val1, %val2
+ %ret = sext <8 x i1> %cmp to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; Test slt.
+define <8 x i16> @f6(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vchh %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+ %cmp = icmp slt <8 x i16> %val1, %val2
+ %ret = sext <8 x i1> %cmp to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; Test ugt.
+define <8 x i16> @f7(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: vchlh %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = icmp ugt <8 x i16> %val1, %val2
+ %ret = sext <8 x i1> %cmp to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; Test uge.
+define <8 x i16> @f8(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vchlh [[REG:%v[0-9]+]], %v28, %v26
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp uge <8 x i16> %val1, %val2
+ %ret = sext <8 x i1> %cmp to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; Test ule.
+define <8 x i16> @f9(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f9:
+; CHECK: vchlh [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ule <8 x i16> %val1, %val2
+ %ret = sext <8 x i1> %cmp to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; Test ult.
+define <8 x i16> @f10(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f10:
+; CHECK: vchlh %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+ %cmp = icmp ult <8 x i16> %val1, %val2
+ %ret = sext <8 x i1> %cmp to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; Test eq selects.
+define <8 x i16> @f11(<8 x i16> %val1, <8 x i16> %val2,
+ <8 x i16> %val3, <8 x i16> %val4) {
+; CHECK-LABEL: f11:
+; CHECK: vceqh [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp eq <8 x i16> %val1, %val2
+ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+ ret <8 x i16> %ret
+}
+
+; Test ne selects.
+define <8 x i16> @f12(<8 x i16> %val1, <8 x i16> %val2,
+ <8 x i16> %val3, <8 x i16> %val4) {
+; CHECK-LABEL: f12:
+; CHECK: vceqh [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ne <8 x i16> %val1, %val2
+ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+ ret <8 x i16> %ret
+}
+
+; Test sgt selects.
+define <8 x i16> @f13(<8 x i16> %val1, <8 x i16> %val2,
+ <8 x i16> %val3, <8 x i16> %val4) {
+; CHECK-LABEL: f13:
+; CHECK: vchh [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp sgt <8 x i16> %val1, %val2
+ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+ ret <8 x i16> %ret
+}
+
+; Test sge selects.
+define <8 x i16> @f14(<8 x i16> %val1, <8 x i16> %val2,
+ <8 x i16> %val3, <8 x i16> %val4) {
+; CHECK-LABEL: f14:
+; CHECK: vchh [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp sge <8 x i16> %val1, %val2
+ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+ ret <8 x i16> %ret
+}
+
+; Test sle selects.
+define <8 x i16> @f15(<8 x i16> %val1, <8 x i16> %val2,
+ <8 x i16> %val3, <8 x i16> %val4) {
+; CHECK-LABEL: f15:
+; CHECK: vchh [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp sle <8 x i16> %val1, %val2
+ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+ ret <8 x i16> %ret
+}
+
+; Test slt selects.
+define <8 x i16> @f16(<8 x i16> %val1, <8 x i16> %val2,
+ <8 x i16> %val3, <8 x i16> %val4) {
+; CHECK-LABEL: f16:
+; CHECK: vchh [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp slt <8 x i16> %val1, %val2
+ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+ ret <8 x i16> %ret
+}
+
+; Test ugt selects.
+define <8 x i16> @f17(<8 x i16> %val1, <8 x i16> %val2,
+ <8 x i16> %val3, <8 x i16> %val4) {
+; CHECK-LABEL: f17:
+; CHECK: vchlh [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ugt <8 x i16> %val1, %val2
+ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+ ret <8 x i16> %ret
+}
+
+; Test uge selects.
+define <8 x i16> @f18(<8 x i16> %val1, <8 x i16> %val2,
+ <8 x i16> %val3, <8 x i16> %val4) {
+; CHECK-LABEL: f18:
+; CHECK: vchlh [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp uge <8 x i16> %val1, %val2
+ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+ ret <8 x i16> %ret
+}
+
+; Test ule selects.
+define <8 x i16> @f19(<8 x i16> %val1, <8 x i16> %val2,
+ <8 x i16> %val3, <8 x i16> %val4) {
+; CHECK-LABEL: f19:
+; CHECK: vchlh [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ule <8 x i16> %val1, %val2
+ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+ ret <8 x i16> %ret
+}
+
+; Test ult selects.
+define <8 x i16> @f20(<8 x i16> %val1, <8 x i16> %val2,
+ <8 x i16> %val3, <8 x i16> %val4) {
+; CHECK-LABEL: f20:
+; CHECK: vchlh [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ult <8 x i16> %val1, %val2
+ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
+ ret <8 x i16> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-cmp-03.ll b/test/CodeGen/SystemZ/vec-cmp-03.ll
new file mode 100644
index 000000000000..4b070acc935b
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-cmp-03.ll
@@ -0,0 +1,228 @@
+; Test v4i32 comparisons.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test eq.
+define <4 x i32> @f1(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vceqf %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = icmp eq <4 x i32> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ne.
+define <4 x i32> @f2(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vceqf [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ne <4 x i32> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test sgt.
+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vchf %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = icmp sgt <4 x i32> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test sge.
+define <4 x i32> @f4(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vchf [[REG:%v[0-9]+]], %v28, %v26
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp sge <4 x i32> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test sle.
+define <4 x i32> @f5(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vchf [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp sle <4 x i32> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test slt.
+define <4 x i32> @f6(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vchf %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+ %cmp = icmp slt <4 x i32> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ugt.
+define <4 x i32> @f7(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: vchlf %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = icmp ugt <4 x i32> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test uge.
+define <4 x i32> @f8(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vchlf [[REG:%v[0-9]+]], %v28, %v26
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp uge <4 x i32> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ule.
+define <4 x i32> @f9(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f9:
+; CHECK: vchlf [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ule <4 x i32> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ult.
+define <4 x i32> @f10(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f10:
+; CHECK: vchlf %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+ %cmp = icmp ult <4 x i32> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test eq selects.
+define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> %val3, <4 x i32> %val4) {
+; CHECK-LABEL: f11:
+; CHECK: vceqf [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp eq <4 x i32> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+ ret <4 x i32> %ret
+}
+
+; Test ne selects.
+define <4 x i32> @f12(<4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> %val3, <4 x i32> %val4) {
+; CHECK-LABEL: f12:
+; CHECK: vceqf [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ne <4 x i32> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+ ret <4 x i32> %ret
+}
+
+; Test sgt selects.
+define <4 x i32> @f13(<4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> %val3, <4 x i32> %val4) {
+; CHECK-LABEL: f13:
+; CHECK: vchf [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp sgt <4 x i32> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+ ret <4 x i32> %ret
+}
+
+; Test sge selects.
+define <4 x i32> @f14(<4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> %val3, <4 x i32> %val4) {
+; CHECK-LABEL: f14:
+; CHECK: vchf [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp sge <4 x i32> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+ ret <4 x i32> %ret
+}
+
+; Test sle selects.
+define <4 x i32> @f15(<4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> %val3, <4 x i32> %val4) {
+; CHECK-LABEL: f15:
+; CHECK: vchf [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp sle <4 x i32> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+ ret <4 x i32> %ret
+}
+
+; Test slt selects.
+define <4 x i32> @f16(<4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> %val3, <4 x i32> %val4) {
+; CHECK-LABEL: f16:
+; CHECK: vchf [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp slt <4 x i32> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+ ret <4 x i32> %ret
+}
+
+; Test ugt selects.
+define <4 x i32> @f17(<4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> %val3, <4 x i32> %val4) {
+; CHECK-LABEL: f17:
+; CHECK: vchlf [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ugt <4 x i32> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+ ret <4 x i32> %ret
+}
+
+; Test uge selects.
+define <4 x i32> @f18(<4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> %val3, <4 x i32> %val4) {
+; CHECK-LABEL: f18:
+; CHECK: vchlf [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp uge <4 x i32> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+ ret <4 x i32> %ret
+}
+
+; Test ule selects.
+define <4 x i32> @f19(<4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> %val3, <4 x i32> %val4) {
+; CHECK-LABEL: f19:
+; CHECK: vchlf [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ule <4 x i32> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+ ret <4 x i32> %ret
+}
+
+; Test ult selects.
+define <4 x i32> @f20(<4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> %val3, <4 x i32> %val4) {
+; CHECK-LABEL: f20:
+; CHECK: vchlf [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ult <4 x i32> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
+ ret <4 x i32> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-cmp-04.ll b/test/CodeGen/SystemZ/vec-cmp-04.ll
new file mode 100644
index 000000000000..5cecaa7251b7
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-cmp-04.ll
@@ -0,0 +1,228 @@
+; Test v2i64 comparisons.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test eq.
+define <2 x i64> @f1(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vceqg %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = icmp eq <2 x i64> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ne.
+define <2 x i64> @f2(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vceqg [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ne <2 x i64> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test sgt.
+define <2 x i64> @f3(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vchg %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = icmp sgt <2 x i64> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test sge.
+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vchg [[REG:%v[0-9]+]], %v28, %v26
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp sge <2 x i64> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test sle.
+define <2 x i64> @f5(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vchg [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp sle <2 x i64> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test slt.
+define <2 x i64> @f6(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vchg %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+ %cmp = icmp slt <2 x i64> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ugt.
+define <2 x i64> @f7(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: vchlg %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = icmp ugt <2 x i64> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test uge.
+define <2 x i64> @f8(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vchlg [[REG:%v[0-9]+]], %v28, %v26
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp uge <2 x i64> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ule.
+define <2 x i64> @f9(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f9:
+; CHECK: vchlg [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ule <2 x i64> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ult.
+define <2 x i64> @f10(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f10:
+; CHECK: vchlg %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+ %cmp = icmp ult <2 x i64> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test eq selects.
+define <2 x i64> @f11(<2 x i64> %val1, <2 x i64> %val2,
+ <2 x i64> %val3, <2 x i64> %val4) {
+; CHECK-LABEL: f11:
+; CHECK: vceqg [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp eq <2 x i64> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+ ret <2 x i64> %ret
+}
+
+; Test ne selects.
+define <2 x i64> @f12(<2 x i64> %val1, <2 x i64> %val2,
+ <2 x i64> %val3, <2 x i64> %val4) {
+; CHECK-LABEL: f12:
+; CHECK: vceqg [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ne <2 x i64> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+ ret <2 x i64> %ret
+}
+
+; Test sgt selects.
+define <2 x i64> @f13(<2 x i64> %val1, <2 x i64> %val2,
+ <2 x i64> %val3, <2 x i64> %val4) {
+; CHECK-LABEL: f13:
+; CHECK: vchg [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp sgt <2 x i64> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+ ret <2 x i64> %ret
+}
+
+; Test sge selects.
+define <2 x i64> @f14(<2 x i64> %val1, <2 x i64> %val2,
+ <2 x i64> %val3, <2 x i64> %val4) {
+; CHECK-LABEL: f14:
+; CHECK: vchg [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp sge <2 x i64> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+ ret <2 x i64> %ret
+}
+
+; Test sle selects.
+define <2 x i64> @f15(<2 x i64> %val1, <2 x i64> %val2,
+ <2 x i64> %val3, <2 x i64> %val4) {
+; CHECK-LABEL: f15:
+; CHECK: vchg [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp sle <2 x i64> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+ ret <2 x i64> %ret
+}
+
+; Test slt selects.
+define <2 x i64> @f16(<2 x i64> %val1, <2 x i64> %val2,
+ <2 x i64> %val3, <2 x i64> %val4) {
+; CHECK-LABEL: f16:
+; CHECK: vchg [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp slt <2 x i64> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+ ret <2 x i64> %ret
+}
+
+; Test ugt selects.
+define <2 x i64> @f17(<2 x i64> %val1, <2 x i64> %val2,
+ <2 x i64> %val3, <2 x i64> %val4) {
+; CHECK-LABEL: f17:
+; CHECK: vchlg [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ugt <2 x i64> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+ ret <2 x i64> %ret
+}
+
+; Test uge selects.
+define <2 x i64> @f18(<2 x i64> %val1, <2 x i64> %val2,
+ <2 x i64> %val3, <2 x i64> %val4) {
+; CHECK-LABEL: f18:
+; CHECK: vchlg [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp uge <2 x i64> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+ ret <2 x i64> %ret
+}
+
+; Test ule selects.
+define <2 x i64> @f19(<2 x i64> %val1, <2 x i64> %val2,
+ <2 x i64> %val3, <2 x i64> %val4) {
+; CHECK-LABEL: f19:
+; CHECK: vchlg [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ule <2 x i64> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+ ret <2 x i64> %ret
+}
+
+; Test ult selects.
+define <2 x i64> @f20(<2 x i64> %val1, <2 x i64> %val2,
+ <2 x i64> %val3, <2 x i64> %val4) {
+; CHECK-LABEL: f20:
+; CHECK: vchlg [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = icmp ult <2 x i64> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
+ ret <2 x i64> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-cmp-05.ll b/test/CodeGen/SystemZ/vec-cmp-05.ll
new file mode 100644
index 000000000000..74e990960972
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-cmp-05.ll
@@ -0,0 +1,472 @@
+; Test v4f32 comparisons.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test oeq.
+define <4 x i32> @f1(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfcedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
+; CHECK-DAG: vfcedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
+; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp oeq <4 x float> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test one.
+define <4 x i32> @f2(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchdb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
+; CHECK-DAG: vfchdb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
+; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
+; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
+; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]]
+; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]]
+; CHECK: vo %v24, [[RES1]], [[RES0]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp one <4 x float> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ogt.
+define <4 x i32> @f3(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f3:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
+; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
+; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ogt <4 x float> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test oge.
+define <4 x i32> @f4(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f4:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
+; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
+; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp oge <4 x float> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ole.
+define <4 x i32> @f5(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
+; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
+; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ole <4 x float> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test olt.
+define <4 x i32> @f6(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f6:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
+; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
+; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp olt <4 x float> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ueq.
+define <4 x i32> @f7(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f7:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchdb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
+; CHECK-DAG: vfchdb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
+; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
+; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
+; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]]
+; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]]
+; CHECK: vno %v24, [[RES1]], [[RES0]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ueq <4 x float> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test une.
+define <4 x i32> @f8(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f8:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfcedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
+; CHECK-DAG: vfcedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
+; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]]
+; CHECK-NEXT: vno %v24, [[RES]], [[RES]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp une <4 x float> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ugt.
+define <4 x i32> @f9(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f9:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
+; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
+; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]]
+; CHECK-NEXT: vno %v24, [[RES]], [[RES]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ugt <4 x float> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test uge.
+define <4 x i32> @f10(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f10:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
+; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
+; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]]
+; CHECK-NEXT: vno %v24, [[RES]], [[RES]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp uge <4 x float> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ule.
+define <4 x i32> @f11(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f11:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
+; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
+; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]]
+; CHECK-NEXT: vno %v24, [[RES]], [[RES]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ule <4 x float> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ult.
+define <4 x i32> @f12(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f12:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
+; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
+; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]]
+; CHECK-NEXT: vno %v24, [[RES]], [[RES]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ult <4 x float> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test ord.
+define <4 x i32> @f13(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f13:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchedb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
+; CHECK-DAG: vfchedb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
+; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
+; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
+; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]]
+; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]]
+; CHECK: vo %v24, [[RES1]], [[RES0]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ord <4 x float> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test uno.
+define <4 x i32> @f14(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f14:
+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
+; CHECK-DAG: vfchedb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
+; CHECK-DAG: vfchedb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
+; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
+; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
+; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]]
+; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]]
+; CHECK: vno %v24, [[RES1]], [[RES0]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp uno <4 x float> %val1, %val2
+ %ret = sext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test oeq selects.
+define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) {
+; CHECK-LABEL: f15:
+; CHECK: vpkg [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp oeq <4 x float> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test one selects.
+define <4 x float> @f16(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) {
+; CHECK-LABEL: f16:
+; CHECK: vo [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp one <4 x float> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ogt selects.
+define <4 x float> @f17(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) {
+; CHECK-LABEL: f17:
+; CHECK: vpkg [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ogt <4 x float> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test oge selects.
+define <4 x float> @f18(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) {
+; CHECK-LABEL: f18:
+; CHECK: vpkg [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp oge <4 x float> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ole selects.
+define <4 x float> @f19(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) {
+; CHECK-LABEL: f19:
+; CHECK: vpkg [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ole <4 x float> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test olt selects.
+define <4 x float> @f20(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) {
+; CHECK-LABEL: f20:
+; CHECK: vpkg [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp olt <4 x float> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ueq selects.
+define <4 x float> @f21(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) {
+; CHECK-LABEL: f21:
+; CHECK: vo [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ueq <4 x float> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test une selects.
+define <4 x float> @f22(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) {
+; CHECK-LABEL: f22:
+; CHECK: vpkg [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp une <4 x float> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ugt selects.
+define <4 x float> @f23(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) {
+; CHECK-LABEL: f23:
+; CHECK: vpkg [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ugt <4 x float> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test uge selects.
+define <4 x float> @f24(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) {
+; CHECK-LABEL: f24:
+; CHECK: vpkg [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp uge <4 x float> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ule selects.
+define <4 x float> @f25(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) {
+; CHECK-LABEL: f25:
+; CHECK: vpkg [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ule <4 x float> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ult selects.
+define <4 x float> @f26(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) {
+; CHECK-LABEL: f26:
+; CHECK: vpkg [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ult <4 x float> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test ord selects.
+define <4 x float> @f27(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) {
+; CHECK-LABEL: f27:
+; CHECK: vo [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ord <4 x float> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
+
+; Test uno selects.
+define <4 x float> @f28(<4 x float> %val1, <4 x float> %val2,
+ <4 x float> %val3, <4 x float> %val4) {
+; CHECK-LABEL: f28:
+; CHECK: vo [[REG:%v[0-9]+]],
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp uno <4 x float> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
+ ret <4 x float> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-cmp-06.ll b/test/CodeGen/SystemZ/vec-cmp-06.ll
new file mode 100644
index 000000000000..eef57555b482
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-cmp-06.ll
@@ -0,0 +1,349 @@
+; Test f64 and v2f64 comparisons.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test oeq.
+define <2 x i64> @f1(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vfcedb %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = fcmp oeq <2 x double> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test one.
+define <2 x i64> @f2(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26
+; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK: vo %v24, [[REG1]], [[REG2]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp one <2 x double> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ogt.
+define <2 x i64> @f3(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vfchdb %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ogt <2 x double> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test oge.
+define <2 x i64> @f4(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vfchedb %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %cmp = fcmp oge <2 x double> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ole.
+define <2 x i64> @f5(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vfchedb %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ole <2 x double> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test olt.
+define <2 x i64> @f6(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vfchdb %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+ %cmp = fcmp olt <2 x double> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ueq.
+define <2 x i64> @f7(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f7:
+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26
+; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK: vno %v24, [[REG1]], [[REG2]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ueq <2 x double> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test une.
+define <2 x i64> @f8(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vfcedb [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp une <2 x double> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ugt.
+define <2 x i64> @f9(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f9:
+; CHECK: vfchedb [[REG:%v[0-9]+]], %v28, %v26
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ugt <2 x double> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test uge.
+define <2 x i64> @f10(<2 x i64> %dummy, <2 x double> %val1,
+ <2 x double> %val2) {
+; CHECK-LABEL: f10:
+; CHECK: vfchdb [[REG:%v[0-9]+]], %v28, %v26
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp uge <2 x double> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ule.
+define <2 x i64> @f11(<2 x i64> %dummy, <2 x double> %val1,
+ <2 x double> %val2) {
+; CHECK-LABEL: f11:
+; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ule <2 x double> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ult.
+define <2 x i64> @f12(<2 x i64> %dummy, <2 x double> %val1,
+ <2 x double> %val2) {
+; CHECK-LABEL: f12:
+; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v28
+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ult <2 x double> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test ord.
+define <2 x i64> @f13(<2 x i64> %dummy, <2 x double> %val1,
+ <2 x double> %val2) {
+; CHECK-LABEL: f13:
+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26
+; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK: vo %v24, [[REG1]], [[REG2]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ord <2 x double> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test uno.
+define <2 x i64> @f14(<2 x i64> %dummy, <2 x double> %val1,
+ <2 x double> %val2) {
+; CHECK-LABEL: f14:
+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26
+; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK: vno %v24, [[REG1]], [[REG2]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp uno <2 x double> %val1, %val2
+ %ret = sext <2 x i1> %cmp to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test oeq selects.
+define <2 x double> @f15(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) {
+; CHECK-LABEL: f15:
+; CHECK: vfcedb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp oeq <2 x double> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test one selects.
+define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) {
+; CHECK-LABEL: f16:
+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24
+; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v24, %v26
+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp one <2 x double> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ogt selects.
+define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) {
+; CHECK-LABEL: f17:
+; CHECK: vfchdb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ogt <2 x double> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test oge selects.
+define <2 x double> @f18(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) {
+; CHECK-LABEL: f18:
+; CHECK: vfchedb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp oge <2 x double> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ole selects.
+define <2 x double> @f19(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) {
+; CHECK-LABEL: f19:
+; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ole <2 x double> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test olt selects.
+define <2 x double> @f20(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) {
+; CHECK-LABEL: f20:
+; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp olt <2 x double> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ueq selects.
+define <2 x double> @f21(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) {
+; CHECK-LABEL: f21:
+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24
+; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v24, %v26
+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ueq <2 x double> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test une selects.
+define <2 x double> @f22(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) {
+; CHECK-LABEL: f22:
+; CHECK: vfcedb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp une <2 x double> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ugt selects.
+define <2 x double> @f23(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) {
+; CHECK-LABEL: f23:
+; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ugt <2 x double> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test uge selects.
+define <2 x double> @f24(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) {
+; CHECK-LABEL: f24:
+; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v24
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp uge <2 x double> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ule selects.
+define <2 x double> @f25(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) {
+; CHECK-LABEL: f25:
+; CHECK: vfchdb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ule <2 x double> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ult selects.
+define <2 x double> @f26(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) {
+; CHECK-LABEL: f26:
+; CHECK: vfchedb [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ult <2 x double> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test ord selects.
+define <2 x double> @f27(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) {
+; CHECK-LABEL: f27:
+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24
+; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v24, %v26
+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp ord <2 x double> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test uno selects.
+define <2 x double> @f28(<2 x double> %val1, <2 x double> %val2,
+ <2 x double> %val3, <2 x double> %val4) {
+; CHECK-LABEL: f28:
+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24
+; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v24, %v26
+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
+; CHECK-NEXT: br %r14
+ %cmp = fcmp uno <2 x double> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
+ ret <2 x double> %ret
+}
+
+; Test an f64 comparison that uses vector registers.
+define i64 @f29(i64 %a, i64 %b, double %f1, <2 x double> %vec) {
+; CHECK-LABEL: f29:
+; CHECK: wfcdb %f0, %v24
+; CHECK-NEXT: locgrne %r2, %r3
+; CHECK: br %r14
+ %f2 = extractelement <2 x double> %vec, i32 0
+ %cond = fcmp oeq double %f1, %f2
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/vec-combine-01.ll b/test/CodeGen/SystemZ/vec-combine-01.ll
new file mode 100644
index 000000000000..a35934421726
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-combine-01.ll
@@ -0,0 +1,155 @@
+; Test various target-specific DAG combiner patterns.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Check that an extraction followed by a truncation is effectively treated
+; as a bitcast.
+define void @f1(<4 x i32> %v1, <4 x i32> %v2, i8 *%ptr1, i8 *%ptr2) {
+; CHECK-LABEL: f1:
+; CHECK: vaf [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-DAG: vsteb [[REG]], 0(%r2), 3
+; CHECK-DAG: vsteb [[REG]], 0(%r3), 15
+; CHECK: br %r14
+ %add = add <4 x i32> %v1, %v2
+ %elem1 = extractelement <4 x i32> %add, i32 0
+ %elem2 = extractelement <4 x i32> %add, i32 3
+ %trunc1 = trunc i32 %elem1 to i8
+ %trunc2 = trunc i32 %elem2 to i8
+ store i8 %trunc1, i8 *%ptr1
+ store i8 %trunc2, i8 *%ptr2
+ ret void
+}
+
+; Test a case where a pack-type shuffle can be eliminated.
+define i16 @f2(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
+; CHECK-LABEL: f2:
+; CHECK-NOT: vpk
+; CHECK-DAG: vaf [[REG1:%v[0-9]+]], %v24, %v26
+; CHECK-DAG: vaf [[REG2:%v[0-9]+]], %v26, %v28
+; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG1]], 3
+; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG2]], 7
+; CHECK: br %r14
+ %add1 = add <4 x i32> %v1, %v2
+ %add2 = add <4 x i32> %v2, %v3
+ %shuffle = shufflevector <4 x i32> %add1, <4 x i32> %add2,
+ <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
+ %elem1 = extractelement <8 x i16> %bitcast, i32 1
+ %elem2 = extractelement <8 x i16> %bitcast, i32 7
+ %res = add i16 %elem1, %elem2
+ ret i16 %res
+}
+
+; ...and again in a case where there's also a splat and a bitcast.
+define i16 @f3(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
+; CHECK-LABEL: f3:
+; CHECK-NOT: vrepg
+; CHECK-NOT: vpk
+; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6
+; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
+; CHECK: br %r14
+ %add = add <4 x i32> %v1, %v2
+ %splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
+ <2 x i32> <i32 0, i32 0>
+ %splatcast = bitcast <2 x i64> %splat to <4 x i32>
+ %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
+ <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
+ %elem1 = extractelement <8 x i16> %bitcast, i32 2
+ %elem2 = extractelement <8 x i16> %bitcast, i32 7
+ %res = add i16 %elem1, %elem2
+ ret i16 %res
+}
+
+; ...and again with a merge low instead of a pack.
+define i16 @f4(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
+; CHECK-LABEL: f4:
+; CHECK-NOT: vrepg
+; CHECK-NOT: vmr
+; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6
+; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
+; CHECK: br %r14
+ %add = add <4 x i32> %v1, %v2
+ %splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
+ <2 x i32> <i32 0, i32 0>
+ %splatcast = bitcast <2 x i64> %splat to <4 x i32>
+ %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
+ <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ %bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
+ %elem1 = extractelement <8 x i16> %bitcast, i32 4
+ %elem2 = extractelement <8 x i16> %bitcast, i32 7
+ %res = add i16 %elem1, %elem2
+ ret i16 %res
+}
+
+; ...and again with a merge high.
+define i16 @f5(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
+; CHECK-LABEL: f5:
+; CHECK-NOT: vrepg
+; CHECK-NOT: vmr
+; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
+; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 2
+; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
+; CHECK: br %r14
+ %add = add <4 x i32> %v1, %v2
+ %splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
+ <2 x i32> <i32 0, i32 0>
+ %splatcast = bitcast <2 x i64> %splat to <4 x i32>
+ %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
+ <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ %bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
+ %elem1 = extractelement <8 x i16> %bitcast, i32 4
+ %elem2 = extractelement <8 x i16> %bitcast, i32 7
+ %res = add i16 %elem1, %elem2
+ ret i16 %res
+}
+
+; Test a case where an unpack high can be eliminated from the usual
+; load-extend sequence.
+define void @f6(<8 x i8> *%ptr1, i8 *%ptr2, i8 *%ptr3, i8 *%ptr4) {
+; CHECK-LABEL: f6:
+; CHECK: vlrepg [[REG:%v[0-9]+]], 0(%r2)
+; CHECK-NOT: vup
+; CHECK-DAG: vsteb [[REG]], 0(%r3), 1
+; CHECK-DAG: vsteb [[REG]], 0(%r4), 2
+; CHECK-DAG: vsteb [[REG]], 0(%r5), 7
+; CHECK: br %r14
+ %vec = load <8 x i8>, <8 x i8> *%ptr1
+ %ext = sext <8 x i8> %vec to <8 x i16>
+ %elem1 = extractelement <8 x i16> %ext, i32 1
+ %elem2 = extractelement <8 x i16> %ext, i32 2
+ %elem3 = extractelement <8 x i16> %ext, i32 7
+ %trunc1 = trunc i16 %elem1 to i8
+ %trunc2 = trunc i16 %elem2 to i8
+ %trunc3 = trunc i16 %elem3 to i8
+ store i8 %trunc1, i8 *%ptr2
+ store i8 %trunc2, i8 *%ptr3
+ store i8 %trunc3, i8 *%ptr4
+ ret void
+}
+
+; ...and again with a bitcast inbetween.
+define void @f7(<4 x i8> *%ptr1, i8 *%ptr2, i8 *%ptr3, i8 *%ptr4) {
+; CHECK-LABEL: f7:
+; CHECK: vlrepf [[REG:%v[0-9]+]], 0(%r2)
+; CHECK-NOT: vup
+; CHECK-DAG: vsteb [[REG]], 0(%r3), 0
+; CHECK-DAG: vsteb [[REG]], 0(%r4), 1
+; CHECK-DAG: vsteb [[REG]], 0(%r5), 3
+; CHECK: br %r14
+ %vec = load <4 x i8>, <4 x i8> *%ptr1
+ %ext = sext <4 x i8> %vec to <4 x i32>
+ %bitcast = bitcast <4 x i32> %ext to <8 x i16>
+ %elem1 = extractelement <8 x i16> %bitcast, i32 1
+ %elem2 = extractelement <8 x i16> %bitcast, i32 3
+ %elem3 = extractelement <8 x i16> %bitcast, i32 7
+ %trunc1 = trunc i16 %elem1 to i8
+ %trunc2 = trunc i16 %elem2 to i8
+ %trunc3 = trunc i16 %elem3 to i8
+ store i8 %trunc1, i8 *%ptr2
+ store i8 %trunc2, i8 *%ptr3
+ store i8 %trunc3, i8 *%ptr4
+ ret void
+}
diff --git a/test/CodeGen/SystemZ/vec-combine-02.ll b/test/CodeGen/SystemZ/vec-combine-02.ll
new file mode 100644
index 000000000000..db0bf849017b
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-combine-02.ll
@@ -0,0 +1,433 @@
+; Test various representations of pack-like operations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; One way of writing a <4 x i32> -> <8 x i16> pack.
+define <8 x i16> @f1(<4 x i32> %val0, <4 x i32> %val1) {
+; CHECK-LABEL: f1:
+; CHECK: vpkf %v24, %v24, %v26
+; CHECK: br %r14
+ %elem0 = extractelement <4 x i32> %val0, i32 0
+ %elem1 = extractelement <4 x i32> %val0, i32 1
+ %elem2 = extractelement <4 x i32> %val0, i32 2
+ %elem3 = extractelement <4 x i32> %val0, i32 3
+ %elem4 = extractelement <4 x i32> %val1, i32 0
+ %elem5 = extractelement <4 x i32> %val1, i32 1
+ %elem6 = extractelement <4 x i32> %val1, i32 2
+ %elem7 = extractelement <4 x i32> %val1, i32 3
+ %hboth0 = bitcast i32 %elem0 to <2 x i16>
+ %hboth1 = bitcast i32 %elem1 to <2 x i16>
+ %hboth2 = bitcast i32 %elem2 to <2 x i16>
+ %hboth3 = bitcast i32 %elem3 to <2 x i16>
+ %hboth4 = bitcast i32 %elem4 to <2 x i16>
+ %hboth5 = bitcast i32 %elem5 to <2 x i16>
+ %hboth6 = bitcast i32 %elem6 to <2 x i16>
+ %hboth7 = bitcast i32 %elem7 to <2 x i16>
+ %hlow0 = shufflevector <2 x i16> %hboth0, <2 x i16> %hboth1,
+ <2 x i32> <i32 1, i32 3>
+ %hlow1 = shufflevector <2 x i16> %hboth2, <2 x i16> %hboth3,
+ <2 x i32> <i32 1, i32 3>
+ %hlow2 = shufflevector <2 x i16> %hboth4, <2 x i16> %hboth5,
+ <2 x i32> <i32 1, i32 3>
+ %hlow3 = shufflevector <2 x i16> %hboth6, <2 x i16> %hboth7,
+ <2 x i32> <i32 1, i32 3>
+ %join0 = shufflevector <2 x i16> %hlow0, <2 x i16> %hlow1,
+ <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %join1 = shufflevector <2 x i16> %hlow2, <2 x i16> %hlow3,
+ <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %ret = shufflevector <4 x i16> %join0, <4 x i16> %join1,
+ <8 x i32> <i32 0, i32 1, i32 2, i32 3,
+ i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %ret
+}
+
+; A different way of writing a <4 x i32> -> <8 x i16> pack.
+define <8 x i16> @f2(<4 x i32> %val0, <4 x i32> %val1) {
+; CHECK-LABEL: f2:
+; CHECK: vpkf %v24, %v24, %v26
+; CHECK: br %r14
+ %elem0 = extractelement <4 x i32> %val0, i32 0
+ %elem1 = extractelement <4 x i32> %val0, i32 1
+ %elem2 = extractelement <4 x i32> %val0, i32 2
+ %elem3 = extractelement <4 x i32> %val0, i32 3
+ %elem4 = extractelement <4 x i32> %val1, i32 0
+ %elem5 = extractelement <4 x i32> %val1, i32 1
+ %elem6 = extractelement <4 x i32> %val1, i32 2
+ %elem7 = extractelement <4 x i32> %val1, i32 3
+ %wvec0 = insertelement <4 x i32> undef, i32 %elem0, i32 0
+ %wvec1 = insertelement <4 x i32> undef, i32 %elem1, i32 0
+ %wvec2 = insertelement <4 x i32> undef, i32 %elem2, i32 0
+ %wvec3 = insertelement <4 x i32> undef, i32 %elem3, i32 0
+ %wvec4 = insertelement <4 x i32> undef, i32 %elem4, i32 0
+ %wvec5 = insertelement <4 x i32> undef, i32 %elem5, i32 0
+ %wvec6 = insertelement <4 x i32> undef, i32 %elem6, i32 0
+ %wvec7 = insertelement <4 x i32> undef, i32 %elem7, i32 0
+ %hvec0 = bitcast <4 x i32> %wvec0 to <8 x i16>
+ %hvec1 = bitcast <4 x i32> %wvec1 to <8 x i16>
+ %hvec2 = bitcast <4 x i32> %wvec2 to <8 x i16>
+ %hvec3 = bitcast <4 x i32> %wvec3 to <8 x i16>
+ %hvec4 = bitcast <4 x i32> %wvec4 to <8 x i16>
+ %hvec5 = bitcast <4 x i32> %wvec5 to <8 x i16>
+ %hvec6 = bitcast <4 x i32> %wvec6 to <8 x i16>
+ %hvec7 = bitcast <4 x i32> %wvec7 to <8 x i16>
+ %hlow0 = shufflevector <8 x i16> %hvec0, <8 x i16> %hvec1,
+ <8 x i32> <i32 1, i32 9, i32 undef, i32 undef,
+ i32 undef, i32 undef, i32 undef, i32 undef>
+ %hlow1 = shufflevector <8 x i16> %hvec2, <8 x i16> %hvec3,
+ <8 x i32> <i32 1, i32 9, i32 undef, i32 undef,
+ i32 undef, i32 undef, i32 undef, i32 undef>
+ %hlow2 = shufflevector <8 x i16> %hvec4, <8 x i16> %hvec5,
+ <8 x i32> <i32 1, i32 9, i32 undef, i32 undef,
+ i32 undef, i32 undef, i32 undef, i32 undef>
+ %hlow3 = shufflevector <8 x i16> %hvec6, <8 x i16> %hvec7,
+ <8 x i32> <i32 1, i32 9, i32 undef, i32 undef,
+ i32 undef, i32 undef, i32 undef, i32 undef>
+ %join0 = shufflevector <8 x i16> %hlow0, <8 x i16> %hlow1,
+ <8 x i32> <i32 0, i32 1, i32 8, i32 9,
+ i32 undef, i32 undef, i32 undef, i32 undef>
+ %join1 = shufflevector <8 x i16> %hlow2, <8 x i16> %hlow3,
+ <8 x i32> <i32 0, i32 1, i32 8, i32 9,
+ i32 undef, i32 undef, i32 undef, i32 undef>
+ %ret = shufflevector <8 x i16> %join0, <8 x i16> %join1,
+ <8 x i32> <i32 0, i32 1, i32 2, i32 3,
+ i32 8, i32 9, i32 10, i32 11>
+ ret <8 x i16> %ret
+}
+
+; A direct pack operation.
+define <8 x i16> @f3(<4 x i32> %val0, <4 x i32> %val1) {
+; CHECK-LABEL: f3:
+; CHECK: vpkf %v24, %v24, %v26
+; CHECK: br %r14
+ %bitcast0 = bitcast <4 x i32> %val0 to <8 x i16>
+ %bitcast1 = bitcast <4 x i32> %val1 to <8 x i16>
+ %ret = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1,
+ <8 x i32> <i32 1, i32 3, i32 5, i32 7,
+ i32 9, i32 11, i32 13, i32 15>
+ ret <8 x i16> %ret
+}
+
+; One way of writing a <4 x i32> -> <16 x i8> pack. It doesn't matter
+; whether the first pack is VPKF or VPKH since the even bytes of the
+; result are discarded.
+define <16 x i8> @f4(<4 x i32> %val0, <4 x i32> %val1,
+ <4 x i32> %val2, <4 x i32> %val3) {
+; CHECK-LABEL: f4:
+; CHECK-DAG: vpk{{[hf]}} [[REG1:%v[0-9]+]], %v24, %v26
+; CHECK-DAG: vpk{{[hf]}} [[REG2:%v[0-9]+]], %v28, %v30
+; CHECK: vpkh %v24, [[REG1]], [[REG2]]
+; CHECK: br %r14
+ %bitcast0 = bitcast <4 x i32> %val0 to <8 x i16>
+ %bitcast1 = bitcast <4 x i32> %val1 to <8 x i16>
+ %bitcast2 = bitcast <4 x i32> %val2 to <8 x i16>
+ %bitcast3 = bitcast <4 x i32> %val3 to <8 x i16>
+ %join0 = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1,
+ <8 x i32> <i32 1, i32 3, i32 5, i32 7,
+ i32 9, i32 11, i32 13, i32 15>
+ %join1 = shufflevector <8 x i16> %bitcast2, <8 x i16> %bitcast3,
+ <8 x i32> <i32 1, i32 3, i32 5, i32 7,
+ i32 9, i32 11, i32 13, i32 15>
+ %bitcast4 = bitcast <8 x i16> %join0 to <16 x i8>
+ %bitcast5 = bitcast <8 x i16> %join1 to <16 x i8>
+ %ret = shufflevector <16 x i8> %bitcast4, <16 x i8> %bitcast5,
+ <16 x i32> <i32 1, i32 3, i32 5, i32 7,
+ i32 9, i32 11, i32 13, i32 15,
+ i32 17, i32 19, i32 21, i32 23,
+ i32 25, i32 27, i32 29, i32 31>
+ ret <16 x i8> %ret
+}
+
+; Check the same operation, but with elements being extracted from the result.
+define void @f5(<4 x i32> %val0, <4 x i32> %val1,
+ <4 x i32> %val2, <4 x i32> %val3,
+ i8 *%base) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: vsteb %v24, 0(%r2), 11
+; CHECK-DAG: vsteb %v26, 1(%r2), 15
+; CHECK-DAG: vsteb %v28, 2(%r2), 3
+; CHECK-DAG: vsteb %v30, 3(%r2), 7
+; CHECK: br %r14
+ %bitcast0 = bitcast <4 x i32> %val0 to <8 x i16>
+ %bitcast1 = bitcast <4 x i32> %val1 to <8 x i16>
+ %bitcast2 = bitcast <4 x i32> %val2 to <8 x i16>
+ %bitcast3 = bitcast <4 x i32> %val3 to <8 x i16>
+ %join0 = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1,
+ <8 x i32> <i32 1, i32 3, i32 5, i32 7,
+ i32 9, i32 11, i32 13, i32 15>
+ %join1 = shufflevector <8 x i16> %bitcast2, <8 x i16> %bitcast3,
+ <8 x i32> <i32 1, i32 3, i32 5, i32 7,
+ i32 9, i32 11, i32 13, i32 15>
+ %bitcast4 = bitcast <8 x i16> %join0 to <16 x i8>
+ %bitcast5 = bitcast <8 x i16> %join1 to <16 x i8>
+ %vec = shufflevector <16 x i8> %bitcast4, <16 x i8> %bitcast5,
+ <16 x i32> <i32 1, i32 3, i32 5, i32 7,
+ i32 9, i32 11, i32 13, i32 15,
+ i32 17, i32 19, i32 21, i32 23,
+ i32 25, i32 27, i32 29, i32 31>
+
+ %ptr0 = getelementptr i8, i8 *%base, i64 0
+ %ptr1 = getelementptr i8, i8 *%base, i64 1
+ %ptr2 = getelementptr i8, i8 *%base, i64 2
+ %ptr3 = getelementptr i8, i8 *%base, i64 3
+
+ %byte0 = extractelement <16 x i8> %vec, i32 2
+ %byte1 = extractelement <16 x i8> %vec, i32 7
+ %byte2 = extractelement <16 x i8> %vec, i32 8
+ %byte3 = extractelement <16 x i8> %vec, i32 13
+
+ store i8 %byte0, i8 *%ptr0
+ store i8 %byte1, i8 *%ptr1
+ store i8 %byte2, i8 *%ptr2
+ store i8 %byte3, i8 *%ptr3
+
+ ret void
+}
+
+; A different way of writing a <4 x i32> -> <16 x i8> pack.
+define <16 x i8> @f6(<4 x i32> %val0, <4 x i32> %val1,
+ <4 x i32> %val2, <4 x i32> %val3) {
+; CHECK-LABEL: f6:
+; CHECK-DAG: vpk{{[hf]}} [[REG1:%v[0-9]+]], %v24, %v26
+; CHECK-DAG: vpk{{[hf]}} [[REG2:%v[0-9]+]], %v28, %v30
+; CHECK: vpkh %v24, [[REG1]], [[REG2]]
+; CHECK: br %r14
+ %elem0 = extractelement <4 x i32> %val0, i32 0
+ %elem1 = extractelement <4 x i32> %val0, i32 1
+ %elem2 = extractelement <4 x i32> %val0, i32 2
+ %elem3 = extractelement <4 x i32> %val0, i32 3
+ %elem4 = extractelement <4 x i32> %val1, i32 0
+ %elem5 = extractelement <4 x i32> %val1, i32 1
+ %elem6 = extractelement <4 x i32> %val1, i32 2
+ %elem7 = extractelement <4 x i32> %val1, i32 3
+ %elem8 = extractelement <4 x i32> %val2, i32 0
+ %elem9 = extractelement <4 x i32> %val2, i32 1
+ %elem10 = extractelement <4 x i32> %val2, i32 2
+ %elem11 = extractelement <4 x i32> %val2, i32 3
+ %elem12 = extractelement <4 x i32> %val3, i32 0
+ %elem13 = extractelement <4 x i32> %val3, i32 1
+ %elem14 = extractelement <4 x i32> %val3, i32 2
+ %elem15 = extractelement <4 x i32> %val3, i32 3
+ %bitcast0 = bitcast i32 %elem0 to <2 x i16>
+ %bitcast1 = bitcast i32 %elem1 to <2 x i16>
+ %bitcast2 = bitcast i32 %elem2 to <2 x i16>
+ %bitcast3 = bitcast i32 %elem3 to <2 x i16>
+ %bitcast4 = bitcast i32 %elem4 to <2 x i16>
+ %bitcast5 = bitcast i32 %elem5 to <2 x i16>
+ %bitcast6 = bitcast i32 %elem6 to <2 x i16>
+ %bitcast7 = bitcast i32 %elem7 to <2 x i16>
+ %bitcast8 = bitcast i32 %elem8 to <2 x i16>
+ %bitcast9 = bitcast i32 %elem9 to <2 x i16>
+ %bitcast10 = bitcast i32 %elem10 to <2 x i16>
+ %bitcast11 = bitcast i32 %elem11 to <2 x i16>
+ %bitcast12 = bitcast i32 %elem12 to <2 x i16>
+ %bitcast13 = bitcast i32 %elem13 to <2 x i16>
+ %bitcast14 = bitcast i32 %elem14 to <2 x i16>
+ %bitcast15 = bitcast i32 %elem15 to <2 x i16>
+ %low0 = shufflevector <2 x i16> %bitcast0, <2 x i16> %bitcast1,
+ <2 x i32> <i32 1, i32 3>
+ %low1 = shufflevector <2 x i16> %bitcast2, <2 x i16> %bitcast3,
+ <2 x i32> <i32 1, i32 3>
+ %low2 = shufflevector <2 x i16> %bitcast4, <2 x i16> %bitcast5,
+ <2 x i32> <i32 1, i32 3>
+ %low3 = shufflevector <2 x i16> %bitcast6, <2 x i16> %bitcast7,
+ <2 x i32> <i32 1, i32 3>
+ %low4 = shufflevector <2 x i16> %bitcast8, <2 x i16> %bitcast9,
+ <2 x i32> <i32 1, i32 3>
+ %low5 = shufflevector <2 x i16> %bitcast10, <2 x i16> %bitcast11,
+ <2 x i32> <i32 1, i32 3>
+ %low6 = shufflevector <2 x i16> %bitcast12, <2 x i16> %bitcast13,
+ <2 x i32> <i32 1, i32 3>
+ %low7 = shufflevector <2 x i16> %bitcast14, <2 x i16> %bitcast15,
+ <2 x i32> <i32 1, i32 3>
+ %bytes0 = bitcast <2 x i16> %low0 to <4 x i8>
+ %bytes1 = bitcast <2 x i16> %low1 to <4 x i8>
+ %bytes2 = bitcast <2 x i16> %low2 to <4 x i8>
+ %bytes3 = bitcast <2 x i16> %low3 to <4 x i8>
+ %bytes4 = bitcast <2 x i16> %low4 to <4 x i8>
+ %bytes5 = bitcast <2 x i16> %low5 to <4 x i8>
+ %bytes6 = bitcast <2 x i16> %low6 to <4 x i8>
+ %bytes7 = bitcast <2 x i16> %low7 to <4 x i8>
+ %blow0 = shufflevector <4 x i8> %bytes0, <4 x i8> %bytes1,
+ <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %blow1 = shufflevector <4 x i8> %bytes2, <4 x i8> %bytes3,
+ <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %blow2 = shufflevector <4 x i8> %bytes4, <4 x i8> %bytes5,
+ <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %blow3 = shufflevector <4 x i8> %bytes6, <4 x i8> %bytes7,
+ <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %join0 = shufflevector <4 x i8> %blow0, <4 x i8> %blow1,
+ <8 x i32> <i32 0, i32 1, i32 2, i32 3,
+ i32 4, i32 5, i32 6, i32 7>
+ %join1 = shufflevector <4 x i8> %blow2, <4 x i8> %blow3,
+ <8 x i32> <i32 0, i32 1, i32 2, i32 3,
+ i32 4, i32 5, i32 6, i32 7>
+ %ret = shufflevector <8 x i8> %join0, <8 x i8> %join1,
+ <16 x i32> <i32 0, i32 1, i32 2, i32 3,
+ i32 4, i32 5, i32 6, i32 7,
+ i32 8, i32 9, i32 10, i32 11,
+ i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i8> %ret
+}
+
+; One way of writing a <2 x i64> -> <16 x i8> pack.
+define <16 x i8> @f7(<2 x i64> %val0, <2 x i64> %val1,
+ <2 x i64> %val2, <2 x i64> %val3,
+ <2 x i64> %val4, <2 x i64> %val5,
+ <2 x i64> %val6, <2 x i64> %val7) {
+; CHECK-LABEL: f7:
+; CHECK-DAG: vpk{{[hfg]}} [[REG1:%v[0-9]+]], %v24, %v26
+; CHECK-DAG: vpk{{[hfg]}} [[REG2:%v[0-9]+]], %v28, %v30
+; CHECK-DAG: vpk{{[hfg]}} [[REG3:%v[0-9]+]], %v25, %v27
+; CHECK-DAG: vpk{{[hfg]}} [[REG4:%v[0-9]+]], %v29, %v31
+; CHECK-DAG: vpk{{[hf]}} [[REG5:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-DAG: vpk{{[hf]}} [[REG6:%v[0-9]+]], [[REG3]], [[REG4]]
+; CHECK: vpkh %v24, [[REG5]], [[REG6]]
+; CHECK: br %r14
+ %elem0 = extractelement <2 x i64> %val0, i32 0
+ %elem1 = extractelement <2 x i64> %val0, i32 1
+ %elem2 = extractelement <2 x i64> %val1, i32 0
+ %elem3 = extractelement <2 x i64> %val1, i32 1
+ %elem4 = extractelement <2 x i64> %val2, i32 0
+ %elem5 = extractelement <2 x i64> %val2, i32 1
+ %elem6 = extractelement <2 x i64> %val3, i32 0
+ %elem7 = extractelement <2 x i64> %val3, i32 1
+ %elem8 = extractelement <2 x i64> %val4, i32 0
+ %elem9 = extractelement <2 x i64> %val4, i32 1
+ %elem10 = extractelement <2 x i64> %val5, i32 0
+ %elem11 = extractelement <2 x i64> %val5, i32 1
+ %elem12 = extractelement <2 x i64> %val6, i32 0
+ %elem13 = extractelement <2 x i64> %val6, i32 1
+ %elem14 = extractelement <2 x i64> %val7, i32 0
+ %elem15 = extractelement <2 x i64> %val7, i32 1
+ %bitcast0 = bitcast i64 %elem0 to <2 x i32>
+ %bitcast1 = bitcast i64 %elem1 to <2 x i32>
+ %bitcast2 = bitcast i64 %elem2 to <2 x i32>
+ %bitcast3 = bitcast i64 %elem3 to <2 x i32>
+ %bitcast4 = bitcast i64 %elem4 to <2 x i32>
+ %bitcast5 = bitcast i64 %elem5 to <2 x i32>
+ %bitcast6 = bitcast i64 %elem6 to <2 x i32>
+ %bitcast7 = bitcast i64 %elem7 to <2 x i32>
+ %bitcast8 = bitcast i64 %elem8 to <2 x i32>
+ %bitcast9 = bitcast i64 %elem9 to <2 x i32>
+ %bitcast10 = bitcast i64 %elem10 to <2 x i32>
+ %bitcast11 = bitcast i64 %elem11 to <2 x i32>
+ %bitcast12 = bitcast i64 %elem12 to <2 x i32>
+ %bitcast13 = bitcast i64 %elem13 to <2 x i32>
+ %bitcast14 = bitcast i64 %elem14 to <2 x i32>
+ %bitcast15 = bitcast i64 %elem15 to <2 x i32>
+ %low0 = shufflevector <2 x i32> %bitcast0, <2 x i32> %bitcast1,
+ <2 x i32> <i32 1, i32 3>
+ %low1 = shufflevector <2 x i32> %bitcast2, <2 x i32> %bitcast3,
+ <2 x i32> <i32 1, i32 3>
+ %low2 = shufflevector <2 x i32> %bitcast4, <2 x i32> %bitcast5,
+ <2 x i32> <i32 1, i32 3>
+ %low3 = shufflevector <2 x i32> %bitcast6, <2 x i32> %bitcast7,
+ <2 x i32> <i32 1, i32 3>
+ %low4 = shufflevector <2 x i32> %bitcast8, <2 x i32> %bitcast9,
+ <2 x i32> <i32 1, i32 3>
+ %low5 = shufflevector <2 x i32> %bitcast10, <2 x i32> %bitcast11,
+ <2 x i32> <i32 1, i32 3>
+ %low6 = shufflevector <2 x i32> %bitcast12, <2 x i32> %bitcast13,
+ <2 x i32> <i32 1, i32 3>
+ %low7 = shufflevector <2 x i32> %bitcast14, <2 x i32> %bitcast15,
+ <2 x i32> <i32 1, i32 3>
+ %half0 = bitcast <2 x i32> %low0 to <4 x i16>
+ %half1 = bitcast <2 x i32> %low1 to <4 x i16>
+ %half2 = bitcast <2 x i32> %low2 to <4 x i16>
+ %half3 = bitcast <2 x i32> %low3 to <4 x i16>
+ %half4 = bitcast <2 x i32> %low4 to <4 x i16>
+ %half5 = bitcast <2 x i32> %low5 to <4 x i16>
+ %half6 = bitcast <2 x i32> %low6 to <4 x i16>
+ %half7 = bitcast <2 x i32> %low7 to <4 x i16>
+ %hlow0 = shufflevector <4 x i16> %half0, <4 x i16> %half1,
+ <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %hlow1 = shufflevector <4 x i16> %half2, <4 x i16> %half3,
+ <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %hlow2 = shufflevector <4 x i16> %half4, <4 x i16> %half5,
+ <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %hlow3 = shufflevector <4 x i16> %half6, <4 x i16> %half7,
+ <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %bytes0 = bitcast <4 x i16> %hlow0 to <8 x i8>
+ %bytes1 = bitcast <4 x i16> %hlow1 to <8 x i8>
+ %bytes2 = bitcast <4 x i16> %hlow2 to <8 x i8>
+ %bytes3 = bitcast <4 x i16> %hlow3 to <8 x i8>
+ %join0 = shufflevector <8 x i8> %bytes0, <8 x i8> %bytes1,
+ <8 x i32> <i32 1, i32 3, i32 5, i32 7,
+ i32 9, i32 11, i32 13, i32 15>
+ %join1 = shufflevector <8 x i8> %bytes2, <8 x i8> %bytes3,
+ <8 x i32> <i32 1, i32 3, i32 5, i32 7,
+ i32 9, i32 11, i32 13, i32 15>
+ %ret = shufflevector <8 x i8> %join0, <8 x i8> %join1,
+ <16 x i32> <i32 0, i32 1, i32 2, i32 3,
+ i32 4, i32 5, i32 6, i32 7,
+ i32 8, i32 9, i32 10, i32 11,
+ i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i8> %ret
+}
+
+; Test a <2 x i64> -> <4 x f32> pack in which only individual elements are
+; needed.
+define float @f8(i64 %scalar0, i64 %scalar1, i64 %scalar2, i64 %scalar3) {
+; CHECK-LABEL: f8:
+; CHECK-NOT: vperm
+; CHECK-NOT: vpk
+; CHECK-NOT: vmrh
+; CHECK: aebr {{%f[0-7]}},
+; CHECK: aebr {{%f[0-7]}},
+; CHECK: meebr %f0,
+; CHECK: br %r14
+ %vec0 = insertelement <2 x i64> undef, i64 %scalar0, i32 0
+ %vec1 = insertelement <2 x i64> undef, i64 %scalar1, i32 0
+ %vec2 = insertelement <2 x i64> undef, i64 %scalar2, i32 0
+ %vec3 = insertelement <2 x i64> undef, i64 %scalar3, i32 0
+ %join0 = shufflevector <2 x i64> %vec0, <2 x i64> %vec1,
+ <2 x i32> <i32 0, i32 2>
+ %join1 = shufflevector <2 x i64> %vec2, <2 x i64> %vec3,
+ <2 x i32> <i32 0, i32 2>
+ %bitcast0 = bitcast <2 x i64> %join0 to <4 x float>
+ %bitcast1 = bitcast <2 x i64> %join1 to <4 x float>
+ %pack = shufflevector <4 x float> %bitcast0, <4 x float> %bitcast1,
+ <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %elt0 = extractelement <4 x float> %pack, i32 0
+ %elt1 = extractelement <4 x float> %pack, i32 1
+ %elt2 = extractelement <4 x float> %pack, i32 2
+ %elt3 = extractelement <4 x float> %pack, i32 3
+ %add0 = fadd float %elt0, %elt2
+ %add1 = fadd float %elt1, %elt3
+ %ret = fmul float %add0, %add1
+ ret float %ret
+}
+
+; Test a <2 x f64> -> <4 x i32> pack in which only individual elements are
+; needed.
+define i32 @f9(double %scalar0, double %scalar1, double %scalar2,
+ double %scalar3) {
+; CHECK-LABEL: f9:
+; CHECK-NOT: vperm
+; CHECK-NOT: vpk
+; CHECK-NOT: vmrh
+; CHECK: ar {{%r[0-5]}},
+; CHECK: ar {{%r[0-5]}},
+; CHECK: or %r2,
+; CHECK: br %r14
+ %vec0 = insertelement <2 x double> undef, double %scalar0, i32 0
+ %vec1 = insertelement <2 x double> undef, double %scalar1, i32 0
+ %vec2 = insertelement <2 x double> undef, double %scalar2, i32 0
+ %vec3 = insertelement <2 x double> undef, double %scalar3, i32 0
+ %join0 = shufflevector <2 x double> %vec0, <2 x double> %vec1,
+ <2 x i32> <i32 0, i32 2>
+ %join1 = shufflevector <2 x double> %vec2, <2 x double> %vec3,
+ <2 x i32> <i32 0, i32 2>
+ %bitcast0 = bitcast <2 x double> %join0 to <4 x i32>
+ %bitcast1 = bitcast <2 x double> %join1 to <4 x i32>
+ %pack = shufflevector <4 x i32> %bitcast0, <4 x i32> %bitcast1,
+ <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %elt0 = extractelement <4 x i32> %pack, i32 0
+ %elt1 = extractelement <4 x i32> %pack, i32 1
+ %elt2 = extractelement <4 x i32> %pack, i32 2
+ %elt3 = extractelement <4 x i32> %pack, i32 3
+ %add0 = add i32 %elt0, %elt2
+ %add1 = add i32 %elt1, %elt3
+ %ret = or i32 %add0, %add1
+ ret i32 %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-const-01.ll b/test/CodeGen/SystemZ/vec-const-01.ll
new file mode 100644
index 000000000000..4cdcbf7c2dcb
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-const-01.ll
@@ -0,0 +1,103 @@
+; Test vector byte masks, v16i8 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test an all-zeros vector.
+define <16 x i8> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vgbm %v24, 0
+; CHECK: br %r14
+ ret <16 x i8> zeroinitializer
+}
+
+; Test an all-ones vector.
+define <16 x i8> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vgbm %v24, 65535
+; CHECK: br %r14
+ ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1>
+}
+
+; Test a mixed vector (mask 0x8c75).
+define <16 x i8> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vgbm %v24, 35957
+; CHECK: br %r14
+ ret <16 x i8> <i8 -1, i8 0, i8 0, i8 0,
+ i8 -1, i8 -1, i8 0, i8 0,
+ i8 0, i8 -1, i8 -1, i8 -1,
+ i8 0, i8 -1, i8 0, i8 -1>
+}
+
+; Test that undefs are treated as zero.
+define <16 x i8> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vgbm %v24, 35957
+; CHECK: br %r14
+ ret <16 x i8> <i8 -1, i8 undef, i8 undef, i8 undef,
+ i8 -1, i8 -1, i8 undef, i8 undef,
+ i8 undef, i8 -1, i8 -1, i8 -1,
+ i8 undef, i8 -1, i8 undef, i8 -1>
+}
+
+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
+define <16 x i8> @f5() {
+; CHECK-LABEL: f5:
+; CHECK-NOT: vgbm
+; CHECK: br %r14
+ ret <16 x i8> <i8 -1, i8 0, i8 0, i8 0,
+ i8 -1, i8 -1, i8 0, i8 1,
+ i8 0, i8 -1, i8 -1, i8 -1,
+ i8 0, i8 -1, i8 0, i8 -1>
+}
+
+; Test an all-zeros v2i8 that gets promoted to v16i8.
+define <2 x i8> @f6() {
+; CHECK-LABEL: f6:
+; CHECK: vgbm %v24, 0
+; CHECK: br %r14
+ ret <2 x i8> zeroinitializer
+}
+
+; Test a mixed v2i8 that gets promoted to v16i8 (mask 0x8000).
+define <2 x i8> @f7() {
+; CHECK-LABEL: f7:
+; CHECK: vgbm %v24, 32768
+; CHECK: br %r14
+ ret <2 x i8> <i8 255, i8 0>
+}
+
+; Test an all-zeros v4i8 that gets promoted to v16i8.
+define <4 x i8> @f8() {
+; CHECK-LABEL: f8:
+; CHECK: vgbm %v24, 0
+; CHECK: br %r14
+ ret <4 x i8> zeroinitializer
+}
+
+; Test a mixed v4i8 that gets promoted to v16i8 (mask 0x9000).
+define <4 x i8> @f9() {
+; CHECK-LABEL: f9:
+; CHECK: vgbm %v24, 36864
+; CHECK: br %r14
+ ret <4 x i8> <i8 255, i8 0, i8 0, i8 255>
+}
+
+; Test an all-zeros v8i8 that gets promoted to v16i8.
+define <8 x i8> @f10() {
+; CHECK-LABEL: f10:
+; CHECK: vgbm %v24, 0
+; CHECK: br %r14
+ ret <8 x i8> zeroinitializer
+}
+
+; Test a mixed v8i8 that gets promoted to v16i8 (mask 0xE500).
+define <8 x i8> @f11() {
+; CHECK-LABEL: f11:
+; CHECK: vgbm %v24, 58624
+; CHECK: br %r14
+ ret <8 x i8> <i8 255, i8 255, i8 255, i8 0, i8 0, i8 255, i8 0, i8 255>
+}
diff --git a/test/CodeGen/SystemZ/vec-const-02.ll b/test/CodeGen/SystemZ/vec-const-02.ll
new file mode 100644
index 000000000000..73a89d4a841b
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-const-02.ll
@@ -0,0 +1,79 @@
+; Test vector byte masks, v8i16 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test an all-zeros vector.
+define <8 x i16> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vgbm %v24, 0
+; CHECK: br %r14
+ ret <8 x i16> zeroinitializer
+}
+
+; Test an all-ones vector.
+define <8 x i16> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vgbm %v24, 65535
+; CHECK: br %r14
+ ret <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1>
+}
+
+; Test a mixed vector (mask 0x8c76).
+define <8 x i16> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vgbm %v24, 35958
+; CHECK: br %r14
+ ret <8 x i16> <i16 65280, i16 0, i16 65535, i16 0,
+ i16 255, i16 65535, i16 255, i16 65280>
+}
+
+; Test that undefs are treated as zero.
+define <8 x i16> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vgbm %v24, 35958
+; CHECK: br %r14
+ ret <8 x i16> <i16 65280, i16 undef, i16 65535, i16 undef,
+ i16 255, i16 65535, i16 255, i16 65280>
+}
+
+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
+define <8 x i16> @f5() {
+; CHECK-LABEL: f5:
+; CHECK-NOT: vgbm
+; CHECK: br %r14
+ ret <8 x i16> <i16 65280, i16 0, i16 65535, i16 0,
+ i16 255, i16 65535, i16 256, i16 65280>
+}
+
+; Test an all-zeros v2i16 that gets promoted to v8i16.
+define <2 x i16> @f6() {
+; CHECK-LABEL: f6:
+; CHECK: vgbm %v24, 0
+; CHECK: br %r14
+ ret <2 x i16> zeroinitializer
+}
+
+; Test a mixed v2i16 that gets promoted to v8i16 (mask 0xc000).
+define <2 x i16> @f7() {
+; CHECK-LABEL: f7:
+; CHECK: vgbm %v24, 49152
+; CHECK: br %r14
+ ret <2 x i16> <i16 65535, i16 0>
+}
+
+; Test an all-zeros v4i16 that gets promoted to v8i16.
+define <4 x i16> @f8() {
+; CHECK-LABEL: f8:
+; CHECK: vgbm %v24, 0
+; CHECK: br %r14
+ ret <4 x i16> zeroinitializer
+}
+
+; Test a mixed v4i16 that gets promoted to v8i16 (mask 0x7200).
+define <4 x i16> @f9() {
+; CHECK-LABEL: f9:
+; CHECK: vgbm %v24, 29184
+; CHECK: br %r14
+ ret <4 x i16> <i16 255, i16 65535, i16 0, i16 65280>
+}
diff --git a/test/CodeGen/SystemZ/vec-const-03.ll b/test/CodeGen/SystemZ/vec-const-03.ll
new file mode 100644
index 000000000000..adc1105229e6
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-const-03.ll
@@ -0,0 +1,59 @@
+; Test vector byte masks, v4i32 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test an all-zeros vector.
+define <4 x i32> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vgbm %v24, 0
+; CHECK: br %r14
+ ret <4 x i32> zeroinitializer
+}
+
+; Test an all-ones vector.
+define <4 x i32> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vgbm %v24, 65535
+; CHECK: br %r14
+ ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+}
+
+; Test a mixed vector (mask 0x8c76).
+define <4 x i32> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vgbm %v24, 35958
+; CHECK: br %r14
+ ret <4 x i32> <i32 4278190080, i32 4294901760, i32 16777215, i32 16776960>
+}
+
+; Test that undefs are treated as zero (mask 0x8076).
+define <4 x i32> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vgbm %v24, 32886
+; CHECK: br %r14
+ ret <4 x i32> <i32 4278190080, i32 undef, i32 16777215, i32 16776960>
+}
+
+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
+define <4 x i32> @f5() {
+; CHECK-LABEL: f5:
+; CHECK-NOT: vgbm
+; CHECK: br %r14
+ ret <4 x i32> <i32 4278190080, i32 1, i32 16777215, i32 16776960>
+}
+
+; Test an all-zeros v2i32 that gets promoted to v4i32.
+define <2 x i32> @f6() {
+; CHECK-LABEL: f6:
+; CHECK: vgbm %v24, 0
+; CHECK: br %r14
+ ret <2 x i32> zeroinitializer
+}
+
+; Test a mixed v2i32 that gets promoted to v4i32 (mask 0xae00).
+define <2 x i32> @f7() {
+; CHECK-LABEL: f7:
+; CHECK: vgbm %v24, 44544
+; CHECK: br %r14
+ ret <2 x i32> <i32 4278255360, i32 -256>
+}
diff --git a/test/CodeGen/SystemZ/vec-const-04.ll b/test/CodeGen/SystemZ/vec-const-04.ll
new file mode 100644
index 000000000000..1c2fb414d25f
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-const-04.ll
@@ -0,0 +1,43 @@
+; Test vector byte masks, v2i64 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test an all-zeros vector.
+define <2 x i64> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vgbm %v24, 0
+; CHECK: br %r14
+ ret <2 x i64> zeroinitializer
+}
+
+; Test an all-ones vector.
+define <2 x i64> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vgbm %v24, 65535
+; CHECK: br %r14
+ ret <2 x i64> <i64 -1, i64 -1>
+}
+
+; Test a mixed vector (mask 0x8c76).
+define <2 x i64> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vgbm %v24, 35958
+; CHECK: br %r14
+ ret <2 x i64> <i64 18374686483966525440, i64 72057589759737600>
+}
+
+; Test that undefs are treated as zero (mask 0x8c00).
+define <2 x i64> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vgbm %v24, 35840
+; CHECK: br %r14
+ ret <2 x i64> <i64 18374686483966525440, i64 undef>
+}
+
+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
+define <2 x i64> @f5() {
+; CHECK-LABEL: f5:
+; CHECK-NOT: vgbm
+; CHECK: br %r14
+ ret <2 x i64> <i64 18374686483966525441, i64 72057589759737600>
+}
diff --git a/test/CodeGen/SystemZ/vec-const-05.ll b/test/CodeGen/SystemZ/vec-const-05.ll
new file mode 100644
index 000000000000..55f3cdd59027
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-const-05.ll
@@ -0,0 +1,63 @@
+; Test vector byte masks, v4f32 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test an all-zeros vector.
+define <4 x float> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vgbm %v24, 0
+; CHECK: br %r14
+ ret <4 x float> zeroinitializer
+}
+
+; Test an all-ones vector.
+define <4 x float> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vgbm %v24, 65535
+; CHECK: br %r14
+ ret <4 x float> <float 0xffffffffe0000000, float 0xffffffffe0000000,
+ float 0xffffffffe0000000, float 0xffffffffe0000000>
+}
+
+; Test a mixed vector (mask 0xc731).
+define <4 x float> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vgbm %v24, 50993
+; CHECK: br %r14
+ ret <4 x float> <float 0xffffe00000000000, float 0x381fffffe0000000,
+ float 0x379fffe000000000, float 0x371fe00000000000>
+}
+
+; Test that undefs are treated as zero (mask 0xc031).
+define <4 x float> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vgbm %v24, 49201
+; CHECK: br %r14
+ ret <4 x float> <float 0xffffe00000000000, float undef,
+ float 0x379fffe000000000, float 0x371fe00000000000>
+}
+
+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
+define <4 x float> @f5() {
+; CHECK-LABEL: f5:
+; CHECK-NOT: vgbm
+; CHECK: br %r14
+ ret <4 x float> <float 0xffffe00000000000, float 0x381fffffc0000000,
+ float 0x379fffe000000000, float 0x371fe00000000000>
+}
+
+; Test an all-zeros v2f32 that gets promoted to v4f32.
+define <2 x float> @f6() {
+; CHECK-LABEL: f6:
+; CHECK: vgbm %v24, 0
+; CHECK: br %r14
+ ret <2 x float> zeroinitializer
+}
+
+; Test a mixed v2f32 that gets promoted to v4f32 (mask 0xc700).
+define <2 x float> @f7() {
+; CHECK-LABEL: f7:
+; CHECK: vgbm %v24, 50944
+; CHECK: br %r14
+ ret <2 x float> <float 0xffffe00000000000, float 0x381fffffe0000000>
+}
diff --git a/test/CodeGen/SystemZ/vec-const-06.ll b/test/CodeGen/SystemZ/vec-const-06.ll
new file mode 100644
index 000000000000..be53a0581ec2
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-const-06.ll
@@ -0,0 +1,43 @@
+; Test vector byte masks, v2f64 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test an all-zeros vector.
+define <2 x double> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vgbm %v24, 0
+; CHECK: br %r14
+ ret <2 x double> zeroinitializer
+}
+
+; Test an all-ones vector.
+define <2 x double> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vgbm %v24, 65535
+; CHECK: br %r14
+ ret <2 x double> <double 0xffffffffffffffff, double 0xffffffffffffffff>
+}
+
+; Test a mixed vector (mask 0x8c76).
+define <2 x double> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vgbm %v24, 35958
+; CHECK: br %r14
+ ret <2 x double> <double 0xff000000ffff0000, double 0x00ffffff00ffff00>
+}
+
+; Test that undefs are treated as zero (mask 0x8c00).
+define <2 x double> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vgbm %v24, 35840
+; CHECK: br %r14
+ ret <2 x double> <double 0xff000000ffff0000, double undef>
+}
+
+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
+define <2 x double> @f5() {
+; CHECK-LABEL: f5:
+; CHECK-NOT: vgbm
+; CHECK: br %r14
+ ret <2 x double> <double 0xfe000000ffff0000, double 0x00ffffff00ffff00>
+}
diff --git a/test/CodeGen/SystemZ/vec-const-07.ll b/test/CodeGen/SystemZ/vec-const-07.ll
new file mode 100644
index 000000000000..6fcf95b69218
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-const-07.ll
@@ -0,0 +1,229 @@
+; Test vector replicates, v16i8 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a byte-granularity replicate with the lowest useful value.
+define <16 x i8> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vrepib %v24, 1
+; CHECK: br %r14
+ ret <16 x i8> <i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1>
+}
+
+; Test a byte-granularity replicate with an arbitrary value.
+define <16 x i8> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vrepib %v24, -55
+; CHECK: br %r14
+ ret <16 x i8> <i8 201, i8 201, i8 201, i8 201,
+ i8 201, i8 201, i8 201, i8 201,
+ i8 201, i8 201, i8 201, i8 201,
+ i8 201, i8 201, i8 201, i8 201>
+}
+
+; Test a byte-granularity replicate with the highest useful value.
+define <16 x i8> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vrepib %v24, -2
+; CHECK: br %r14
+ ret <16 x i8> <i8 254, i8 254, i8 254, i8 254,
+ i8 254, i8 254, i8 254, i8 254,
+ i8 254, i8 254, i8 254, i8 254,
+ i8 254, i8 254, i8 254, i8 254>
+}
+
+; Test a halfword-granularity replicate with the lowest useful value.
+define <16 x i8> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vrepih %v24, 1
+; CHECK: br %r14
+ ret <16 x i8> <i8 0, i8 1, i8 0, i8 1,
+ i8 0, i8 1, i8 0, i8 1,
+ i8 0, i8 1, i8 0, i8 1,
+ i8 0, i8 1, i8 0, i8 1>
+}
+
+; Test a halfword-granularity replicate with an arbitrary value.
+define <16 x i8> @f5() {
+; CHECK-LABEL: f5:
+; CHECK: vrepih %v24, 25650
+; CHECK: br %r14
+ ret <16 x i8> <i8 100, i8 50, i8 100, i8 50,
+ i8 100, i8 50, i8 100, i8 50,
+ i8 100, i8 50, i8 100, i8 50,
+ i8 100, i8 50, i8 100, i8 50>
+}
+
+; Test a halfword-granularity replicate with the highest useful value.
+define <16 x i8> @f6() {
+; CHECK-LABEL: f6:
+; CHECK: vrepih %v24, -2
+; CHECK: br %r14
+ ret <16 x i8> <i8 255, i8 254, i8 255, i8 254,
+ i8 255, i8 254, i8 255, i8 254,
+ i8 255, i8 254, i8 255, i8 254,
+ i8 255, i8 254, i8 255, i8 254>
+}
+
+; Test a word-granularity replicate with the lowest useful positive value.
+define <16 x i8> @f7() {
+; CHECK-LABEL: f7:
+; CHECK: vrepif %v24, 1
+; CHECK: br %r14
+ ret <16 x i8> <i8 0, i8 0, i8 0, i8 1,
+ i8 0, i8 0, i8 0, i8 1,
+ i8 0, i8 0, i8 0, i8 1,
+ i8 0, i8 0, i8 0, i8 1>
+}
+
+; Test a word-granularity replicate with the highest in-range value.
+define <16 x i8> @f8() {
+; CHECK-LABEL: f8:
+; CHECK: vrepif %v24, 32767
+; CHECK: br %r14
+ ret <16 x i8> <i8 0, i8 0, i8 127, i8 255,
+ i8 0, i8 0, i8 127, i8 255,
+ i8 0, i8 0, i8 127, i8 255,
+ i8 0, i8 0, i8 127, i8 255>
+}
+
+; Test a word-granularity replicate with the next highest value.
+; This cannot use VREPIF.
+define <16 x i8> @f9() {
+; CHECK-LABEL: f9:
+; CHECK-NOT: vrepif
+; CHECK: br %r14
+ ret <16 x i8> <i8 0, i8 0, i8 128, i8 0,
+ i8 0, i8 0, i8 128, i8 0,
+ i8 0, i8 0, i8 128, i8 0,
+ i8 0, i8 0, i8 128, i8 0>
+}
+
+; Test a word-granularity replicate with the lowest in-range value.
+define <16 x i8> @f10() {
+; CHECK-LABEL: f10:
+; CHECK: vrepif %v24, -32768
+; CHECK: br %r14
+ ret <16 x i8> <i8 255, i8 255, i8 128, i8 0,
+ i8 255, i8 255, i8 128, i8 0,
+ i8 255, i8 255, i8 128, i8 0,
+ i8 255, i8 255, i8 128, i8 0>
+}
+
+; Test a word-granularity replicate with the next lowest value.
+; This cannot use VREPIF.
+define <16 x i8> @f11() {
+; CHECK-LABEL: f11:
+; CHECK-NOT: vrepif
+; CHECK: br %r14
+ ret <16 x i8> <i8 255, i8 255, i8 127, i8 255,
+ i8 255, i8 255, i8 127, i8 255,
+ i8 255, i8 255, i8 127, i8 255,
+ i8 255, i8 255, i8 127, i8 255>
+}
+
+; Test a word-granularity replicate with the highest useful negative value.
+define <16 x i8> @f12() {
+; CHECK-LABEL: f12:
+; CHECK: vrepif %v24, -2
+; CHECK: br %r14
+ ret <16 x i8> <i8 255, i8 255, i8 255, i8 254,
+ i8 255, i8 255, i8 255, i8 254,
+ i8 255, i8 255, i8 255, i8 254,
+ i8 255, i8 255, i8 255, i8 254>
+}
+
+; Test a doubleword-granularity replicate with the lowest useful positive
+; value.
+define <16 x i8> @f13() {
+; CHECK-LABEL: f13:
+; CHECK: vrepig %v24, 1
+; CHECK: br %r14
+ ret <16 x i8> <i8 0, i8 0, i8 0, i8 0,
+ i8 0, i8 0, i8 0, i8 1,
+ i8 0, i8 0, i8 0, i8 0,
+ i8 0, i8 0, i8 0, i8 1>
+}
+
+; Test a doubleword-granularity replicate with the highest in-range value.
+define <16 x i8> @f14() {
+; CHECK-LABEL: f14:
+; CHECK: vrepig %v24, 32767
+; CHECK: br %r14
+ ret <16 x i8> <i8 0, i8 0, i8 0, i8 0,
+ i8 0, i8 0, i8 127, i8 255,
+ i8 0, i8 0, i8 0, i8 0,
+ i8 0, i8 0, i8 127, i8 255>
+}
+
+; Test a doubleword-granularity replicate with the next highest value.
+; This cannot use VREPIG.
+define <16 x i8> @f15() {
+; CHECK-LABEL: f15:
+; CHECK-NOT: vrepig
+; CHECK: br %r14
+ ret <16 x i8> <i8 0, i8 0, i8 0, i8 0,
+ i8 0, i8 0, i8 128, i8 0,
+ i8 0, i8 0, i8 0, i8 0,
+ i8 0, i8 0, i8 128, i8 0>
+}
+
+; Test a doubleword-granularity replicate with the lowest in-range value.
+define <16 x i8> @f16() {
+; CHECK-LABEL: f16:
+; CHECK: vrepig %v24, -32768
+; CHECK: br %r14
+ ret <16 x i8> <i8 255, i8 255, i8 255, i8 255,
+ i8 255, i8 255, i8 128, i8 0,
+ i8 255, i8 255, i8 255, i8 255,
+ i8 255, i8 255, i8 128, i8 0>
+}
+
+; Test a doubleword-granularity replicate with the next lowest value.
+; This cannot use VREPIG.
+define <16 x i8> @f17() {
+; CHECK-LABEL: f17:
+; CHECK-NOT: vrepig
+; CHECK: br %r14
+ ret <16 x i8> <i8 255, i8 255, i8 255, i8 255,
+ i8 255, i8 255, i8 127, i8 255,
+ i8 255, i8 255, i8 255, i8 255,
+ i8 255, i8 255, i8 127, i8 255>
+}
+
+; Test a doubleword-granularity replicate with the highest useful negative
+; value.
+define <16 x i8> @f18() {
+; CHECK-LABEL: f18:
+; CHECK: vrepig %v24, -2
+; CHECK: br %r14
+ ret <16 x i8> <i8 255, i8 255, i8 255, i8 255,
+ i8 255, i8 255, i8 255, i8 254,
+ i8 255, i8 255, i8 255, i8 255,
+ i8 255, i8 255, i8 255, i8 254>
+}
+
+; Repeat f14 with undefs optimistically treated as 0.
+define <16 x i8> @f19() {
+; CHECK-LABEL: f19:
+; CHECK: vrepig %v24, 32767
+; CHECK: br %r14
+ ret <16 x i8> <i8 0, i8 undef, i8 0, i8 0,
+ i8 0, i8 0, i8 127, i8 255,
+ i8 undef, i8 0, i8 undef, i8 0,
+ i8 0, i8 0, i8 127, i8 255>
+}
+
+; Repeat f18 with undefs optimistically treated as -1.
+define <16 x i8> @f20() {
+; CHECK-LABEL: f20:
+; CHECK: vrepig %v24, -2
+; CHECK: br %r14
+ ret <16 x i8> <i8 undef, i8 255, i8 255, i8 255,
+ i8 255, i8 255, i8 undef, i8 254,
+ i8 255, i8 255, i8 255, i8 undef,
+ i8 255, i8 undef, i8 255, i8 254>
+}
diff --git a/test/CodeGen/SystemZ/vec-const-08.ll b/test/CodeGen/SystemZ/vec-const-08.ll
new file mode 100644
index 000000000000..5ab6947e548e
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-const-08.ll
@@ -0,0 +1,189 @@
+; Test vector replicates, v8i16 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a byte-granularity replicate with the lowest useful value.
+define <8 x i16> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vrepib %v24, 1
+; CHECK: br %r14
+ ret <8 x i16> <i16 257, i16 257, i16 257, i16 257,
+ i16 257, i16 257, i16 257, i16 257>
+}
+
+; Test a byte-granularity replicate with an arbitrary value.
+define <8 x i16> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vrepib %v24, -55
+; CHECK: br %r14
+ ret <8 x i16> <i16 51657, i16 51657, i16 51657, i16 51657,
+ i16 51657, i16 51657, i16 51657, i16 51657>
+}
+
+; Test a byte-granularity replicate with the highest useful value.
+define <8 x i16> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vrepib %v24, -2
+; CHECK: br %r14
+ ret <8 x i16> <i16 -258, i16 -258, i16 -258, i16 -258,
+ i16 -258, i16 -258, i16 -258, i16 -258>
+}
+
+; Test a halfword-granularity replicate with the lowest useful value.
+define <8 x i16> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vrepih %v24, 1
+; CHECK: br %r14
+ ret <8 x i16> <i16 1, i16 1, i16 1, i16 1,
+ i16 1, i16 1, i16 1, i16 1>
+}
+
+; Test a halfword-granularity replicate with an arbitrary value.
+define <8 x i16> @f5() {
+; CHECK-LABEL: f5:
+; CHECK: vrepih %v24, 25650
+; CHECK: br %r14
+ ret <8 x i16> <i16 25650, i16 25650, i16 25650, i16 25650,
+ i16 25650, i16 25650, i16 25650, i16 25650>
+}
+
+; Test a halfword-granularity replicate with the highest useful value.
+define <8 x i16> @f6() {
+; CHECK-LABEL: f6:
+; CHECK: vrepih %v24, -2
+; CHECK: br %r14
+ ret <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534,
+ i16 65534, i16 65534, i16 65534, i16 65534>
+}
+
+; Test a word-granularity replicate with the lowest useful positive value.
+define <8 x i16> @f7() {
+; CHECK-LABEL: f7:
+; CHECK: vrepif %v24, 1
+; CHECK: br %r14
+ ret <8 x i16> <i16 0, i16 1, i16 0, i16 1,
+ i16 0, i16 1, i16 0, i16 1>
+}
+
+; Test a word-granularity replicate with the highest in-range value.
+define <8 x i16> @f8() {
+; CHECK-LABEL: f8:
+; CHECK: vrepif %v24, 32767
+; CHECK: br %r14
+ ret <8 x i16> <i16 0, i16 32767, i16 0, i16 32767,
+ i16 0, i16 32767, i16 0, i16 32767>
+}
+
+; Test a word-granularity replicate with the next highest value.
+; This cannot use VREPIF.
+define <8 x i16> @f9() {
+; CHECK-LABEL: f9:
+; CHECK-NOT: vrepif
+; CHECK: br %r14
+ ret <8 x i16> <i16 0, i16 32768, i16 0, i16 32768,
+ i16 0, i16 32768, i16 0, i16 32768>
+}
+
+; Test a word-granularity replicate with the lowest in-range value.
+define <8 x i16> @f10() {
+; CHECK-LABEL: f10:
+; CHECK: vrepif %v24, -32768
+; CHECK: br %r14
+ ret <8 x i16> <i16 -1, i16 -32768, i16 -1, i16 -32768,
+ i16 -1, i16 -32768, i16 -1, i16 -32768>
+}
+
+; Test a word-granularity replicate with the next lowest value.
+; This cannot use VREPIF.
+define <8 x i16> @f11() {
+; CHECK-LABEL: f11:
+; CHECK-NOT: vrepif
+; CHECK: br %r14
+ ret <8 x i16> <i16 -1, i16 -32769, i16 -1, i16 -32769,
+ i16 -1, i16 -32769, i16 -1, i16 -32769>
+}
+
+; Test a word-granularity replicate with the highest useful negative value.
+define <8 x i16> @f12() {
+; CHECK-LABEL: f12:
+; CHECK: vrepif %v24, -2
+; CHECK: br %r14
+ ret <8 x i16> <i16 -1, i16 -2, i16 -1, i16 -2,
+ i16 -1, i16 -2, i16 -1, i16 -2>
+}
+
+; Test a doubleword-granularity replicate with the lowest useful positive
+; value.
+define <8 x i16> @f13() {
+; CHECK-LABEL: f13:
+; CHECK: vrepig %v24, 1
+; CHECK: br %r14
+ ret <8 x i16> <i16 0, i16 0, i16 0, i16 1,
+ i16 0, i16 0, i16 0, i16 1>
+}
+
+; Test a doubleword-granularity replicate with the highest in-range value.
+define <8 x i16> @f14() {
+; CHECK-LABEL: f14:
+; CHECK: vrepig %v24, 32767
+; CHECK: br %r14
+ ret <8 x i16> <i16 0, i16 0, i16 0, i16 32767,
+ i16 0, i16 0, i16 0, i16 32767>
+}
+
+; Test a doubleword-granularity replicate with the next highest value.
+; This cannot use VREPIG.
+define <8 x i16> @f15() {
+; CHECK-LABEL: f15:
+; CHECK-NOT: vrepig
+; CHECK: br %r14
+ ret <8 x i16> <i16 0, i16 0, i16 0, i16 32768,
+ i16 0, i16 0, i16 0, i16 32768>
+}
+
+; Test a doubleword-granularity replicate with the lowest in-range value.
+define <8 x i16> @f16() {
+; CHECK-LABEL: f16:
+; CHECK: vrepig %v24, -32768
+; CHECK: br %r14
+ ret <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -32768,
+ i16 -1, i16 -1, i16 -1, i16 -32768>
+}
+
+; Test a doubleword-granularity replicate with the next lowest value.
+; This cannot use VREPIG.
+define <8 x i16> @f17() {
+; CHECK-LABEL: f17:
+; CHECK-NOT: vrepig
+; CHECK: br %r14
+ ret <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -32769,
+ i16 -1, i16 -1, i16 -1, i16 -32769>
+}
+
+; Test a doubleword-granularity replicate with the highest useful negative
+; value.
+define <8 x i16> @f18() {
+; CHECK-LABEL: f18:
+; CHECK: vrepig %v24, -2
+; CHECK: br %r14
+ ret <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -2,
+ i16 -1, i16 -1, i16 -1, i16 -2>
+}
+
+; Repeat f14 with undefs optimistically treated as 0.
+define <8 x i16> @f19() {
+; CHECK-LABEL: f19:
+; CHECK: vrepig %v24, 32767
+; CHECK: br %r14
+ ret <8 x i16> <i16 0, i16 undef, i16 0, i16 32767,
+ i16 undef, i16 0, i16 undef, i16 32767>
+}
+
+; Repeat f18 with undefs optimistically treated as -1.
+define <8 x i16> @f20() {
+; CHECK-LABEL: f20:
+; CHECK: vrepig %v24, -2
+; CHECK: br %r14
+ ret <8 x i16> <i16 -1, i16 -1, i16 undef, i16 -2,
+ i16 undef, i16 undef, i16 -1, i16 -2>
+}
diff --git a/test/CodeGen/SystemZ/vec-const-09.ll b/test/CodeGen/SystemZ/vec-const-09.ll
new file mode 100644
index 000000000000..2cbe92594525
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-const-09.ll
@@ -0,0 +1,169 @@
+; Test vector replicates, v4i32 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a byte-granularity replicate with the lowest useful value.
+define <4 x i32> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vrepib %v24, 1
+; CHECK: br %r14
+ ret <4 x i32> <i32 16843009, i32 16843009, i32 16843009, i32 16843009>
+}
+
+; Test a byte-granularity replicate with an arbitrary value.
+define <4 x i32> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vrepib %v24, -55
+; CHECK: br %r14
+ ret <4 x i32> <i32 3385444809, i32 3385444809, i32 3385444809, i32 3385444809>
+}
+
+; Test a byte-granularity replicate with the highest useful value.
+define <4 x i32> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vrepib %v24, -2
+; CHECK: br %r14
+ ret <4 x i32> <i32 4278124286, i32 4278124286, i32 4278124286, i32 4278124286>
+}
+
+; Test a halfword-granularity replicate with the lowest useful value.
+define <4 x i32> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vrepih %v24, 1
+; CHECK: br %r14
+ ret <4 x i32> <i32 65537, i32 65537, i32 65537, i32 65537>
+}
+
+; Test a halfword-granularity replicate with an arbitrary value.
+define <4 x i32> @f5() {
+; CHECK-LABEL: f5:
+; CHECK: vrepih %v24, 25650
+; CHECK: br %r14
+ ret <4 x i32> <i32 1681024050, i32 1681024050, i32 1681024050, i32 1681024050>
+}
+
+; Test a halfword-granularity replicate with the highest useful value.
+define <4 x i32> @f6() {
+; CHECK-LABEL: f6:
+; CHECK: vrepih %v24, -2
+; CHECK: br %r14
+ ret <4 x i32> <i32 -65538, i32 -65538, i32 -65538, i32 -65538>
+}
+
+; Test a word-granularity replicate with the lowest useful positive value.
+define <4 x i32> @f7() {
+; CHECK-LABEL: f7:
+; CHECK: vrepif %v24, 1
+; CHECK: br %r14
+ ret <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+}
+
+; Test a word-granularity replicate with the highest in-range value.
+define <4 x i32> @f8() {
+; CHECK-LABEL: f8:
+; CHECK: vrepif %v24, 32767
+; CHECK: br %r14
+ ret <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
+}
+
+; Test a word-granularity replicate with the next highest value.
+; This cannot use VREPIF.
+define <4 x i32> @f9() {
+; CHECK-LABEL: f9:
+; CHECK-NOT: vrepif
+; CHECK: br %r14
+ ret <4 x i32> <i32 32768, i32 32768, i32 32768, i32 32768>
+}
+
+; Test a word-granularity replicate with the lowest in-range value.
+define <4 x i32> @f10() {
+; CHECK-LABEL: f10:
+; CHECK: vrepif %v24, -32768
+; CHECK: br %r14
+ ret <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+}
+
+; Test a word-granularity replicate with the next lowest value.
+; This cannot use VREPIF.
+define <4 x i32> @f11() {
+; CHECK-LABEL: f11:
+; CHECK-NOT: vrepif
+; CHECK: br %r14
+ ret <4 x i32> <i32 -32769, i32 -32769, i32 -32769, i32 -32769>
+}
+
+; Test a word-granularity replicate with the highest useful negative value.
+define <4 x i32> @f12() {
+; CHECK-LABEL: f12:
+; CHECK: vrepif %v24, -2
+; CHECK: br %r14
+ ret <4 x i32> <i32 -2, i32 -2, i32 -2, i32 -2>
+}
+
+; Test a doubleword-granularity replicate with the lowest useful positive
+; value.
+define <4 x i32> @f13() {
+; CHECK-LABEL: f13:
+; CHECK: vrepig %v24, 1
+; CHECK: br %r14
+ ret <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+}
+
+; Test a doubleword-granularity replicate with the highest in-range value.
+define <4 x i32> @f14() {
+; CHECK-LABEL: f14:
+; CHECK: vrepig %v24, 32767
+; CHECK: br %r14
+ ret <4 x i32> <i32 0, i32 32767, i32 0, i32 32767>
+}
+
+; Test a doubleword-granularity replicate with the next highest value.
+; This cannot use VREPIG.
+define <4 x i32> @f15() {
+; CHECK-LABEL: f15:
+; CHECK-NOT: vrepig
+; CHECK: br %r14
+ ret <4 x i32> <i32 0, i32 32768, i32 0, i32 32768>
+}
+
+; Test a doubleword-granularity replicate with the lowest in-range value.
+define <4 x i32> @f16() {
+; CHECK-LABEL: f16:
+; CHECK: vrepig %v24, -32768
+; CHECK: br %r14
+ ret <4 x i32> <i32 -1, i32 -32768, i32 -1, i32 -32768>
+}
+
+; Test a doubleword-granularity replicate with the next lowest value.
+; This cannot use VREPIG.
+define <4 x i32> @f17() {
+; CHECK-LABEL: f17:
+; CHECK-NOT: vrepig
+; CHECK: br %r14
+ ret <4 x i32> <i32 -1, i32 -32769, i32 -1, i32 -32769>
+}
+
+; Test a doubleword-granularity replicate with the highest useful negative
+; value.
+define <4 x i32> @f18() {
+; CHECK-LABEL: f18:
+; CHECK: vrepig %v24, -2
+; CHECK: br %r14
+ ret <4 x i32> <i32 -1, i32 -2, i32 -1, i32 -2>
+}
+
+; Repeat f14 with undefs optimistically treated as 0, 32767.
+define <4 x i32> @f19() {
+; CHECK-LABEL: f19:
+; CHECK: vrepig %v24, 32767
+; CHECK: br %r14
+ ret <4 x i32> <i32 undef, i32 undef, i32 0, i32 32767>
+}
+
+; Repeat f18 with undefs optimistically treated as -2, -1.
+define <4 x i32> @f20() {
+; CHECK-LABEL: f20:
+; CHECK: vrepig %v24, -2
+; CHECK: br %r14
+ ret <4 x i32> <i32 -1, i32 undef, i32 undef, i32 -2>
+}
diff --git a/test/CodeGen/SystemZ/vec-const-10.ll b/test/CodeGen/SystemZ/vec-const-10.ll
new file mode 100644
index 000000000000..0613b69a2777
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-const-10.ll
@@ -0,0 +1,169 @@
+; Test vector replicates, v2i64 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a byte-granularity replicate with the lowest useful value.
+define <2 x i64> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vrepib %v24, 1
+; CHECK: br %r14
+ ret <2 x i64> <i64 72340172838076673, i64 72340172838076673>
+}
+
+; Test a byte-granularity replicate with an arbitrary value.
+define <2 x i64> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vrepib %v24, -55
+; CHECK: br %r14
+ ret <2 x i64> <i64 -3906369333256140343, i64 -3906369333256140343>
+}
+
+; Test a byte-granularity replicate with the highest useful value.
+define <2 x i64> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vrepib %v24, -2
+; CHECK: br %r14
+ ret <2 x i64> <i64 -72340172838076674, i64 -72340172838076674>
+}
+
+; Test a halfword-granularity replicate with the lowest useful value.
+define <2 x i64> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vrepih %v24, 1
+; CHECK: br %r14
+ ret <2 x i64> <i64 281479271743489, i64 281479271743489>
+}
+
+; Test a halfword-granularity replicate with an arbitrary value.
+define <2 x i64> @f5() {
+; CHECK-LABEL: f5:
+; CHECK: vrepih %v24, 25650
+; CHECK: br %r14
+ ret <2 x i64> <i64 7219943320220492850, i64 7219943320220492850>
+}
+
+; Test a halfword-granularity replicate with the highest useful value.
+define <2 x i64> @f6() {
+; CHECK-LABEL: f6:
+; CHECK: vrepih %v24, -2
+; CHECK: br %r14
+ ret <2 x i64> <i64 -281479271743490, i64 -281479271743490>
+}
+
+; Test a word-granularity replicate with the lowest useful positive value.
+define <2 x i64> @f7() {
+; CHECK-LABEL: f7:
+; CHECK: vrepif %v24, 1
+; CHECK: br %r14
+ ret <2 x i64> <i64 4294967297, i64 4294967297>
+}
+
+; Test a word-granularity replicate with the highest in-range value.
+define <2 x i64> @f8() {
+; CHECK-LABEL: f8:
+; CHECK: vrepif %v24, 32767
+; CHECK: br %r14
+ ret <2 x i64> <i64 140733193420799, i64 140733193420799>
+}
+
+; Test a word-granularity replicate with the next highest value.
+; This cannot use VREPIF.
+define <2 x i64> @f9() {
+; CHECK-LABEL: f9:
+; CHECK-NOT: vrepif
+; CHECK: br %r14
+ ret <2 x i64> <i64 140737488388096, i64 140737488388096>
+}
+
+; Test a word-granularity replicate with the lowest in-range value.
+define <2 x i64> @f10() {
+; CHECK-LABEL: f10:
+; CHECK: vrepif %v24, -32768
+; CHECK: br %r14
+ ret <2 x i64> <i64 -140733193420800, i64 -140733193420800>
+}
+
+; Test a word-granularity replicate with the next lowest value.
+; This cannot use VREPIF.
+define <2 x i64> @f11() {
+; CHECK-LABEL: f11:
+; CHECK-NOT: vrepif
+; CHECK: br %r14
+ ret <2 x i64> <i64 -140737488388097, i64 -140737488388097>
+}
+
+; Test a word-granularity replicate with the highest useful negative value.
+define <2 x i64> @f12() {
+; CHECK-LABEL: f12:
+; CHECK: vrepif %v24, -2
+; CHECK: br %r14
+ ret <2 x i64> <i64 -4294967298, i64 -4294967298>
+}
+
+; Test a doubleword-granularity replicate with the lowest useful positive
+; value.
+define <2 x i64> @f13() {
+; CHECK-LABEL: f13:
+; CHECK: vrepig %v24, 1
+; CHECK: br %r14
+ ret <2 x i64> <i64 1, i64 1>
+}
+
+; Test a doubleword-granularity replicate with the highest in-range value.
+define <2 x i64> @f14() {
+; CHECK-LABEL: f14:
+; CHECK: vrepig %v24, 32767
+; CHECK: br %r14
+ ret <2 x i64> <i64 32767, i64 32767>
+}
+
+; Test a doubleword-granularity replicate with the next highest value.
+; This cannot use VREPIG.
+define <2 x i64> @f15() {
+; CHECK-LABEL: f15:
+; CHECK-NOT: vrepig
+; CHECK: br %r14
+ ret <2 x i64> <i64 32768, i64 32768>
+}
+
+; Test a doubleword-granularity replicate with the lowest in-range value.
+define <2 x i64> @f16() {
+; CHECK-LABEL: f16:
+; CHECK: vrepig %v24, -32768
+; CHECK: br %r14
+ ret <2 x i64> <i64 -32768, i64 -32768>
+}
+
+; Test a doubleword-granularity replicate with the next lowest value.
+; This cannot use VREPIG.
+define <2 x i64> @f17() {
+; CHECK-LABEL: f17:
+; CHECK-NOT: vrepig
+; CHECK: br %r14
+ ret <2 x i64> <i64 -32769, i64 -32769>
+}
+
+; Test a doubleword-granularity replicate with the highest useful negative
+; value.
+define <2 x i64> @f18() {
+; CHECK-LABEL: f18:
+; CHECK: vrepig %v24, -2
+; CHECK: br %r14
+ ret <2 x i64> <i64 -2, i64 -2>
+}
+
+; Repeat f14 with undefs optimistically treated as 32767.
+define <2 x i64> @f19() {
+; CHECK-LABEL: f19:
+; CHECK: vrepig %v24, 32767
+; CHECK: br %r14
+ ret <2 x i64> <i64 undef, i64 32767>
+}
+
+; Repeat f18 with undefs optimistically treated as -2.
+define <2 x i64> @f20() {
+; CHECK-LABEL: f20:
+; CHECK: vrepig %v24, -2
+; CHECK: br %r14
+ ret <2 x i64> <i64 undef, i64 -2>
+}
diff --git a/test/CodeGen/SystemZ/vec-const-11.ll b/test/CodeGen/SystemZ/vec-const-11.ll
new file mode 100644
index 000000000000..0c69b8803b2f
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-const-11.ll
@@ -0,0 +1,189 @@
+; Test vector replicates, v4f32 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a byte-granularity replicate with the lowest useful value.
+define <4 x float> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vrepib %v24, 1
+; CHECK: br %r14
+ ret <4 x float> <float 0x3820202020000000, float 0x3820202020000000,
+ float 0x3820202020000000, float 0x3820202020000000>
+}
+
+; Test a byte-granularity replicate with an arbitrary value.
+define <4 x float> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vrepib %v24, -55
+; CHECK: br %r14
+ ret <4 x float> <float 0xc139393920000000, float 0xc139393920000000,
+ float 0xc139393920000000, float 0xc139393920000000>
+}
+
+; Test a byte-granularity replicate with the highest useful value.
+define <4 x float> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vrepib %v24, -2
+; CHECK: br %r14
+ ret <4 x float> <float 0xc7dfdfdfc0000000, float 0xc7dfdfdfc0000000,
+ float 0xc7dfdfdfc0000000, float 0xc7dfdfdfc0000000>
+}
+
+; Test a halfword-granularity replicate with the lowest useful value.
+define <4 x float> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vrepih %v24, 1
+; CHECK: br %r14
+ ret <4 x float> <float 0x37a0001000000000, float 0x37a0001000000000,
+ float 0x37a0001000000000, float 0x37a0001000000000>
+}
+
+; Test a halfword-granularity replicate with an arbitrary value.
+define <4 x float> @f5() {
+; CHECK-LABEL: f5:
+; CHECK: vrepih %v24, 25650
+; CHECK: br %r14
+ ret <4 x float> <float 0x44864c8640000000, float 0x44864c8640000000,
+ float 0x44864c8640000000, float 0x44864c8640000000>
+}
+
+; Test a halfword-granularity replicate with the highest useful value.
+define <4 x float> @f6() {
+; CHECK-LABEL: f6:
+; CHECK: vrepih %v24, -2
+; CHECK: br %r14
+ ret <4 x float> <float 0xffffdfffc0000000, float 0xffffdfffc0000000,
+ float 0xffffdfffc0000000, float 0xffffdfffc0000000>
+}
+
+; Test a word-granularity replicate with the lowest useful positive value.
+define <4 x float> @f7() {
+; CHECK-LABEL: f7:
+; CHECK: vrepif %v24, 1
+; CHECK: br %r14
+ ret <4 x float> <float 0x36a0000000000000, float 0x36a0000000000000,
+ float 0x36a0000000000000, float 0x36a0000000000000>
+}
+
+; Test a word-granularity replicate with the highest in-range value.
+define <4 x float> @f8() {
+; CHECK-LABEL: f8:
+; CHECK: vrepif %v24, 32767
+; CHECK: br %r14
+ ret <4 x float> <float 0x378fffc000000000, float 0x378fffc000000000,
+ float 0x378fffc000000000, float 0x378fffc000000000>
+}
+
+; Test a word-granularity replicate with the next highest value.
+; This cannot use VREPIF.
+define <4 x float> @f9() {
+; CHECK-LABEL: f9:
+; CHECK-NOT: vrepif
+; CHECK: br %r14
+ ret <4 x float> <float 0x3790000000000000, float 0x3790000000000000,
+ float 0x3790000000000000, float 0x3790000000000000>
+}
+
+; Test a word-granularity replicate with the lowest in-range value.
+define <4 x float> @f10() {
+; CHECK-LABEL: f10:
+; CHECK: vrepif %v24, -32768
+; CHECK: br %r14
+ ret <4 x float> <float 0xfffff00000000000, float 0xfffff00000000000,
+ float 0xfffff00000000000, float 0xfffff00000000000>
+}
+
+; Test a word-granularity replicate with the next lowest value.
+; This cannot use VREPIF.
+define <4 x float> @f11() {
+; CHECK-LABEL: f11:
+; CHECK-NOT: vrepif
+; CHECK: br %r14
+ ret <4 x float> <float 0xffffefffe0000000, float 0xffffefffe0000000,
+ float 0xffffefffe0000000, float 0xffffefffe0000000>
+}
+
+; Test a word-granularity replicate with the highest useful negative value.
+define <4 x float> @f12() {
+; CHECK-LABEL: f12:
+; CHECK: vrepif %v24, -2
+; CHECK: br %r14
+ ret <4 x float> <float 0xffffffffc0000000, float 0xffffffffc0000000,
+ float 0xffffffffc0000000, float 0xffffffffc0000000>
+}
+
+; Test a doubleword-granularity replicate with the lowest useful positive
+; value.
+define <4 x float> @f13() {
+; CHECK-LABEL: f13:
+; CHECK: vrepig %v24, 1
+; CHECK: br %r14
+ ret <4 x float> <float 0.0, float 0x36a0000000000000,
+ float 0.0, float 0x36a0000000000000>
+}
+
+; Test a doubleword-granularity replicate with the highest in-range value.
+define <4 x float> @f14() {
+; CHECK-LABEL: f14:
+; CHECK: vrepig %v24, 32767
+; CHECK: br %r14
+ ret <4 x float> <float 0.0, float 0x378fffc000000000,
+ float 0.0, float 0x378fffc000000000>
+}
+
+; Test a doubleword-granularity replicate with the next highest value.
+; This cannot use VREPIG.
+define <4 x float> @f15() {
+; CHECK-LABEL: f15:
+; CHECK-NOT: vrepig
+; CHECK: br %r14
+ ret <4 x float> <float 0.0, float 0x3790000000000000,
+ float 0.0, float 0x3790000000000000>
+}
+
+; Test a doubleword-granularity replicate with the lowest in-range value.
+define <4 x float> @f16() {
+; CHECK-LABEL: f16:
+; CHECK: vrepig %v24, -32768
+; CHECK: br %r14
+ ret <4 x float> <float 0xffffffffe0000000, float 0xfffff00000000000,
+ float 0xffffffffe0000000, float 0xfffff00000000000>
+}
+
+; Test a doubleword-granularity replicate with the next lowest value.
+; This cannot use VREPIG.
+define <4 x float> @f17() {
+; CHECK-LABEL: f17:
+; CHECK-NOT: vrepig
+; CHECK: br %r14
+ ret <4 x float> <float 0xffffffffe0000000, float 0xffffefffe0000000,
+ float 0xffffffffe0000000, float 0xffffefffe0000000>
+}
+
+; Test a doubleword-granularity replicate with the highest useful negative
+; value.
+define <4 x float> @f18() {
+; CHECK-LABEL: f18:
+; CHECK: vrepig %v24, -2
+; CHECK: br %r14
+ ret <4 x float> <float 0xffffffffe0000000, float 0xffffffffc0000000,
+ float 0xffffffffe0000000, float 0xffffffffc0000000>
+}
+
+; Repeat f14 with undefs optimistically treated as 0, 32767.
+define <4 x float> @f19() {
+; CHECK-LABEL: f19:
+; CHECK: vrepig %v24, 32767
+; CHECK: br %r14
+ ret <4 x float> <float undef, float undef,
+ float 0.0, float 0x378fffc000000000>
+}
+
+; Repeat f18 with undefs optimistically treated as -2, -1.
+define <4 x float> @f20() {
+; CHECK-LABEL: f20:
+; CHECK: vrepig %v24, -2
+; CHECK: br %r14
+ ret <4 x float> <float 0xffffffffe0000000, float undef,
+ float undef, float 0xffffffffc0000000>
+}
diff --git a/test/CodeGen/SystemZ/vec-const-12.ll b/test/CodeGen/SystemZ/vec-const-12.ll
new file mode 100644
index 000000000000..ca66a3d173eb
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-const-12.ll
@@ -0,0 +1,169 @@
+; Test vector replicates, v2f64 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a byte-granularity replicate with the lowest useful value.
+define <2 x double> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vrepib %v24, 1
+; CHECK: br %r14
+ ret <2 x double> <double 0x0101010101010101, double 0x0101010101010101>
+}
+
+; Test a byte-granularity replicate with an arbitrary value.
+define <2 x double> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vrepib %v24, -55
+; CHECK: br %r14
+ ret <2 x double> <double 0xc9c9c9c9c9c9c9c9, double 0xc9c9c9c9c9c9c9c9>
+}
+
+; Test a byte-granularity replicate with the highest useful value.
+define <2 x double> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vrepib %v24, -2
+; CHECK: br %r14
+ ret <2 x double> <double 0xfefefefefefefefe, double 0xfefefefefefefefe>
+}
+
+; Test a halfword-granularity replicate with the lowest useful value.
+define <2 x double> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vrepih %v24, 1
+; CHECK: br %r14
+ ret <2 x double> <double 0x0001000100010001, double 0x0001000100010001>
+}
+
+; Test a halfword-granularity replicate with an arbitrary value.
+define <2 x double> @f5() {
+; CHECK-LABEL: f5:
+; CHECK: vrepih %v24, 25650
+; CHECK: br %r14
+ ret <2 x double> <double 0x6432643264326432, double 0x6432643264326432>
+}
+
+; Test a halfword-granularity replicate with the highest useful value.
+define <2 x double> @f6() {
+; CHECK-LABEL: f6:
+; CHECK: vrepih %v24, -2
+; CHECK: br %r14
+ ret <2 x double> <double 0xfffefffefffefffe, double 0xfffefffefffefffe>
+}
+
+; Test a word-granularity replicate with the lowest useful positive value.
+define <2 x double> @f7() {
+; CHECK-LABEL: f7:
+; CHECK: vrepif %v24, 1
+; CHECK: br %r14
+ ret <2 x double> <double 0x0000000100000001, double 0x0000000100000001>
+}
+
+; Test a word-granularity replicate with the highest in-range value.
+define <2 x double> @f8() {
+; CHECK-LABEL: f8:
+; CHECK: vrepif %v24, 32767
+; CHECK: br %r14
+ ret <2 x double> <double 0x00007fff00007fff, double 0x00007fff00007fff>
+}
+
+; Test a word-granularity replicate with the next highest value.
+; This cannot use VREPIF.
+define <2 x double> @f9() {
+; CHECK-LABEL: f9:
+; CHECK-NOT: vrepif
+; CHECK: br %r14
+ ret <2 x double> <double 0x0000800000008000, double 0x0000800000008000>
+}
+
+; Test a word-granularity replicate with the lowest in-range value.
+define <2 x double> @f10() {
+; CHECK-LABEL: f10:
+; CHECK: vrepif %v24, -32768
+; CHECK: br %r14
+ ret <2 x double> <double 0xffff8000ffff8000, double 0xffff8000ffff8000>
+}
+
+; Test a word-granularity replicate with the next lowest value.
+; This cannot use VREPIF.
+define <2 x double> @f11() {
+; CHECK-LABEL: f11:
+; CHECK-NOT: vrepif
+; CHECK: br %r14
+ ret <2 x double> <double 0xffff7fffffff7fff, double 0xffff7fffffff7fff>
+}
+
+; Test a word-granularity replicate with the highest useful negative value.
+define <2 x double> @f12() {
+; CHECK-LABEL: f12:
+; CHECK: vrepif %v24, -2
+; CHECK: br %r14
+ ret <2 x double> <double 0xfffffffefffffffe, double 0xfffffffefffffffe>
+}
+
+; Test a doubleword-granularity replicate with the lowest useful positive
+; value.
+define <2 x double> @f13() {
+; CHECK-LABEL: f13:
+; CHECK: vrepig %v24, 1
+; CHECK: br %r14
+ ret <2 x double> <double 0x0000000000000001, double 0x0000000000000001>
+}
+
+; Test a doubleword-granularity replicate with the highest in-range value.
+define <2 x double> @f14() {
+; CHECK-LABEL: f14:
+; CHECK: vrepig %v24, 32767
+; CHECK: br %r14
+ ret <2 x double> <double 0x0000000000007fff, double 0x0000000000007fff>
+}
+
+; Test a doubleword-granularity replicate with the next highest value.
+; This cannot use VREPIG.
+define <2 x double> @f15() {
+; CHECK-LABEL: f15:
+; CHECK-NOT: vrepig
+; CHECK: br %r14
+ ret <2 x double> <double 0x0000000000008000, double 0x0000000000008000>
+}
+
+; Test a doubleword-granularity replicate with the lowest in-range value.
+define <2 x double> @f16() {
+; CHECK-LABEL: f16:
+; CHECK: vrepig %v24, -32768
+; CHECK: br %r14
+ ret <2 x double> <double 0xffffffffffff8000, double 0xffffffffffff8000>
+}
+
+; Test a doubleword-granularity replicate with the next lowest value.
+; This cannot use VREPIG.
+define <2 x double> @f17() {
+; CHECK-LABEL: f17:
+; CHECK-NOT: vrepig
+; CHECK: br %r14
+ ret <2 x double> <double 0xffffffffffff7fff, double 0xffffffffffff7fff>
+}
+
+; Test a doubleword-granularity replicate with the highest useful negative
+; value.
+define <2 x double> @f18() {
+; CHECK-LABEL: f18:
+; CHECK: vrepig %v24, -2
+; CHECK: br %r14
+ ret <2 x double> <double 0xfffffffffffffffe, double 0xfffffffffffffffe>
+}
+
+; Repeat f14 with undefs optimistically treated as 32767.
+define <2 x double> @f19() {
+; CHECK-LABEL: f19:
+; CHECK: vrepig %v24, 32767
+; CHECK: br %r14
+ ret <2 x double> <double undef, double 0x0000000000007fff>
+}
+
+; Repeat f18 with undefs optimistically treated as -2.
+define <2 x double> @f20() {
+; CHECK-LABEL: f20:
+; CHECK: vrepig %v24, -2
+; CHECK: br %r14
+ ret <2 x double> <double undef, double 0xfffffffffffffffe>
+}
diff --git a/test/CodeGen/SystemZ/vec-const-13.ll b/test/CodeGen/SystemZ/vec-const-13.ll
new file mode 100644
index 000000000000..2cc425252c21
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-const-13.ll
@@ -0,0 +1,193 @@
+; Test vector replicates that use VECTOR GENERATE MASK, v16i8 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a word-granularity replicate with the lowest value that cannot use
+; VREPIF.
+define <16 x i8> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vgmf %v24, 16, 16
+; CHECK: br %r14
+ ret <16 x i8> <i8 0, i8 0, i8 128, i8 0,
+ i8 0, i8 0, i8 128, i8 0,
+ i8 0, i8 0, i8 128, i8 0,
+ i8 0, i8 0, i8 128, i8 0>
+}
+
+; Test a word-granularity replicate that has the lower 17 bits set.
+define <16 x i8> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vgmf %v24, 15, 31
+; CHECK: br %r14
+ ret <16 x i8> <i8 0, i8 1, i8 255, i8 255,
+ i8 0, i8 1, i8 255, i8 255,
+ i8 0, i8 1, i8 255, i8 255,
+ i8 0, i8 1, i8 255, i8 255>
+}
+
+; Test a word-granularity replicate that has the upper 15 bits set.
+define <16 x i8> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vgmf %v24, 0, 14
+; CHECK: br %r14
+ ret <16 x i8> <i8 255, i8 254, i8 0, i8 0,
+ i8 255, i8 254, i8 0, i8 0,
+ i8 255, i8 254, i8 0, i8 0,
+ i8 255, i8 254, i8 0, i8 0>
+}
+
+; Test a word-granularity replicate that has middle bits set.
+define <16 x i8> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vgmf %v24, 12, 17
+; CHECK: br %r14
+ ret <16 x i8> <i8 0, i8 15, i8 192, i8 0,
+ i8 0, i8 15, i8 192, i8 0,
+ i8 0, i8 15, i8 192, i8 0,
+ i8 0, i8 15, i8 192, i8 0>
+}
+
+; Test a word-granularity replicate with a wrap-around mask.
+define <16 x i8> @f5() {
+; CHECK-LABEL: f5:
+; CHECK: vgmf %v24, 17, 15
+; CHECK: br %r14
+ ret <16 x i8> <i8 255, i8 255, i8 127, i8 255,
+ i8 255, i8 255, i8 127, i8 255,
+ i8 255, i8 255, i8 127, i8 255,
+ i8 255, i8 255, i8 127, i8 255>
+}
+
+; Test a doubleword-granularity replicate with the lowest value that cannot
+; use VREPIG.
+define <16 x i8> @f6() {
+; CHECK-LABEL: f6:
+; CHECK: vgmg %v24, 48, 48
+; CHECK: br %r14
+ ret <16 x i8> <i8 0, i8 0, i8 0, i8 0,
+ i8 0, i8 0, i8 128, i8 0,
+ i8 0, i8 0, i8 0, i8 0,
+ i8 0, i8 0, i8 128, i8 0>
+}
+
+; Test a doubleword-granularity replicate that has the lower 22 bits set.
+define <16 x i8> @f7() {
+; CHECK-LABEL: f7:
+; CHECK: vgmg %v24, 42, 63
+; CHECK: br %r14
+ ret <16 x i8> <i8 0, i8 0, i8 0, i8 0,
+ i8 0, i8 63, i8 255, i8 255,
+ i8 0, i8 0, i8 0, i8 0,
+ i8 0, i8 63, i8 255, i8 255>
+}
+
+; Test a doubleword-granularity replicate that has the upper 45 bits set.
+define <16 x i8> @f8() {
+; CHECK-LABEL: f8:
+; CHECK: vgmg %v24, 0, 44
+; CHECK: br %r14
+ ret <16 x i8> <i8 255, i8 255, i8 255, i8 255,
+ i8 255, i8 248, i8 0, i8 0,
+ i8 255, i8 255, i8 255, i8 255,
+ i8 255, i8 248, i8 0, i8 0>
+}
+
+; Test a doubleword-granularity replicate that has middle bits set.
+define <16 x i8> @f9() {
+; CHECK-LABEL: f9:
+; CHECK: vgmg %v24, 31, 42
+; CHECK: br %r14
+ ret <16 x i8> <i8 0, i8 0, i8 0, i8 1,
+ i8 255, i8 224, i8 0, i8 0,
+ i8 0, i8 0, i8 0, i8 1,
+ i8 255, i8 224, i8 0, i8 0>
+}
+
+; Test a doubleword-granularity replicate with a wrap-around mask.
+define <16 x i8> @f10() {
+; CHECK-LABEL: f10:
+; CHECK: vgmg %v24, 18, 0
+; CHECK: br %r14
+ ret <16 x i8> <i8 128, i8 0, i8 63, i8 255,
+ i8 255, i8 255, i8 255, i8 255,
+ i8 128, i8 0, i8 63, i8 255,
+ i8 255, i8 255, i8 255, i8 255>
+}
+
+; Retest f1 with arbitrary undefs instead of 0s.
+define <16 x i8> @f11() {
+; CHECK-LABEL: f11:
+; CHECK: vgmf %v24, 16, 16
+; CHECK: br %r14
+ ret <16 x i8> <i8 0, i8 undef, i8 128, i8 0,
+ i8 0, i8 0, i8 128, i8 undef,
+ i8 undef, i8 0, i8 128, i8 0,
+ i8 undef, i8 undef, i8 128, i8 0>
+}
+
+; Try a case where we want consistent undefs to be treated as 0.
+define <16 x i8> @f12() {
+; CHECK-LABEL: f12:
+; CHECK: vgmf %v24, 15, 23
+; CHECK: br %r14
+ ret <16 x i8> <i8 undef, i8 1, i8 255, i8 0,
+ i8 undef, i8 1, i8 255, i8 0,
+ i8 undef, i8 1, i8 255, i8 0,
+ i8 undef, i8 1, i8 255, i8 0>
+}
+
+; ...and again with the lower bits of the replicated constant.
+define <16 x i8> @f13() {
+; CHECK-LABEL: f13:
+; CHECK: vgmf %v24, 15, 22
+; CHECK: br %r14
+ ret <16 x i8> <i8 0, i8 1, i8 254, i8 undef,
+ i8 0, i8 1, i8 254, i8 undef,
+ i8 0, i8 1, i8 254, i8 undef,
+ i8 0, i8 1, i8 254, i8 undef>
+}
+
+; Try a case where we want consistent undefs to be treated as -1.
+define <16 x i8> @f14() {
+; CHECK-LABEL: f14:
+; CHECK: vgmf %v24, 28, 8
+; CHECK: br %r14
+ ret <16 x i8> <i8 undef, i8 128, i8 0, i8 15,
+ i8 undef, i8 128, i8 0, i8 15,
+ i8 undef, i8 128, i8 0, i8 15,
+ i8 undef, i8 128, i8 0, i8 15>
+}
+
+; ...and again with the lower bits of the replicated constant.
+define <16 x i8> @f15() {
+; CHECK-LABEL: f15:
+; CHECK: vgmf %v24, 18, 3
+; CHECK: br %r14
+ ret <16 x i8> <i8 240, i8 0, i8 63, i8 undef,
+ i8 240, i8 0, i8 63, i8 undef,
+ i8 240, i8 0, i8 63, i8 undef,
+ i8 240, i8 0, i8 63, i8 undef>
+}
+
+; Repeat f9 with arbitrary undefs.
+define <16 x i8> @f16() {
+; CHECK-LABEL: f16:
+; CHECK: vgmg %v24, 31, 42
+; CHECK: br %r14
+ ret <16 x i8> <i8 undef, i8 0, i8 undef, i8 1,
+ i8 255, i8 undef, i8 0, i8 0,
+ i8 0, i8 0, i8 0, i8 1,
+ i8 undef, i8 224, i8 undef, i8 undef>
+}
+
+; Try a case where we want some consistent undefs to be treated as 0
+; and some to be treated as 255.
+define <16 x i8> @f17() {
+; CHECK-LABEL: f17:
+; CHECK: vgmg %v24, 23, 35
+; CHECK: br %r14
+ ret <16 x i8> <i8 0, i8 undef, i8 1, i8 undef,
+ i8 240, i8 undef, i8 0, i8 0,
+ i8 0, i8 undef, i8 1, i8 undef,
+ i8 240, i8 undef, i8 0, i8 0>
+}
diff --git a/test/CodeGen/SystemZ/vec-const-14.ll b/test/CodeGen/SystemZ/vec-const-14.ll
new file mode 100644
index 000000000000..0e3f124dbf6a
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-const-14.ll
@@ -0,0 +1,113 @@
+; Test vector replicates that use VECTOR GENERATE MASK, v8i16 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a word-granularity replicate with the lowest value that cannot use
+; VREPIF.
+define <8 x i16> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vgmf %v24, 16, 16
+; CHECK: br %r14
+ ret <8 x i16> <i16 0, i16 32768, i16 0, i16 32768,
+ i16 0, i16 32768, i16 0, i16 32768>
+}
+
+; Test a word-granularity replicate that has the lower 17 bits set.
+define <8 x i16> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vgmf %v24, 15, 31
+; CHECK: br %r14
+ ret <8 x i16> <i16 1, i16 -1, i16 1, i16 -1,
+ i16 1, i16 -1, i16 1, i16 -1>
+}
+
+; Test a word-granularity replicate that has the upper 15 bits set.
+define <8 x i16> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vgmf %v24, 0, 14
+; CHECK: br %r14
+ ret <8 x i16> <i16 -2, i16 0, i16 -2, i16 0,
+ i16 -2, i16 0, i16 -2, i16 0>
+}
+
+; Test a word-granularity replicate that has middle bits set.
+define <8 x i16> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vgmf %v24, 12, 17
+; CHECK: br %r14
+ ret <8 x i16> <i16 15, i16 49152, i16 15, i16 49152,
+ i16 15, i16 49152, i16 15, i16 49152>
+}
+
+; Test a word-granularity replicate with a wrap-around mask.
+define <8 x i16> @f5() {
+; CHECK-LABEL: f5:
+; CHECK: vgmf %v24, 17, 15
+; CHECK: br %r14
+ ret <8 x i16> <i16 -1, i16 32767, i16 -1, i16 32767,
+ i16 -1, i16 32767, i16 -1, i16 32767>
+}
+
+; Test a doubleword-granularity replicate with the lowest value that cannot
+; use VREPIG.
+define <8 x i16> @f6() {
+; CHECK-LABEL: f6:
+; CHECK: vgmg %v24, 48, 48
+; CHECK: br %r14
+ ret <8 x i16> <i16 0, i16 0, i16 0, i16 32768,
+ i16 0, i16 0, i16 0, i16 32768>
+}
+
+; Test a doubleword-granularity replicate that has the lower 22 bits set.
+define <8 x i16> @f7() {
+; CHECK-LABEL: f7:
+; CHECK: vgmg %v24, 42, 63
+; CHECK: br %r14
+ ret <8 x i16> <i16 0, i16 0, i16 63, i16 -1,
+ i16 0, i16 0, i16 63, i16 -1>
+}
+
+; Test a doubleword-granularity replicate that has the upper 45 bits set.
+define <8 x i16> @f8() {
+; CHECK-LABEL: f8:
+; CHECK: vgmg %v24, 0, 44
+; CHECK: br %r14
+ ret <8 x i16> <i16 -1, i16 -1, i16 -8, i16 0,
+ i16 -1, i16 -1, i16 -8, i16 0>
+}
+
+; Test a doubleword-granularity replicate that has middle bits set.
+define <8 x i16> @f9() {
+; CHECK-LABEL: f9:
+; CHECK: vgmg %v24, 31, 42
+; CHECK: br %r14
+ ret <8 x i16> <i16 0, i16 1, i16 -32, i16 0,
+ i16 0, i16 1, i16 -32, i16 0>
+}
+
+; Test a doubleword-granularity replicate with a wrap-around mask.
+define <8 x i16> @f10() {
+; CHECK-LABEL: f10:
+; CHECK: vgmg %v24, 18, 0
+; CHECK: br %r14
+ ret <8 x i16> <i16 32768, i16 16383, i16 -1, i16 -1,
+ i16 32768, i16 16383, i16 -1, i16 -1>
+}
+
+; Retest f1 with arbitrary undefs instead of 0s.
+define <8 x i16> @f11() {
+; CHECK-LABEL: f11:
+; CHECK: vgmf %v24, 16, 16
+; CHECK: br %r14
+ ret <8 x i16> <i16 undef, i16 32768, i16 0, i16 32768,
+ i16 0, i16 32768, i16 undef, i16 32768>
+}
+
+; ...likewise f9.
+define <8 x i16> @f12() {
+; CHECK-LABEL: f12:
+; CHECK: vgmg %v24, 31, 42
+; CHECK: br %r14
+ ret <8 x i16> <i16 undef, i16 1, i16 -32, i16 0,
+ i16 0, i16 1, i16 -32, i16 undef>
+}
diff --git a/test/CodeGen/SystemZ/vec-const-15.ll b/test/CodeGen/SystemZ/vec-const-15.ll
new file mode 100644
index 000000000000..cec445efe893
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-const-15.ll
@@ -0,0 +1,85 @@
+; Test vector replicates that use VECTOR GENERATE MASK, v4i32 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a word-granularity replicate with the lowest value that cannot use
+; VREPIF.
+define <4 x i32> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vgmf %v24, 16, 16
+; CHECK: br %r14
+ ret <4 x i32> <i32 32768, i32 32768, i32 32768, i32 32768>
+}
+
+; Test a word-granularity replicate that has the lower 17 bits set.
+define <4 x i32> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vgmf %v24, 15, 31
+; CHECK: br %r14
+ ret <4 x i32> <i32 131071, i32 131071, i32 131071, i32 131071>
+}
+
+; Test a word-granularity replicate that has the upper 15 bits set.
+define <4 x i32> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vgmf %v24, 0, 14
+; CHECK: br %r14
+ ret <4 x i32> <i32 -131072, i32 -131072, i32 -131072, i32 -131072>
+}
+
+; Test a word-granularity replicate that has middle bits set.
+define <4 x i32> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vgmf %v24, 12, 17
+; CHECK: br %r14
+ ret <4 x i32> <i32 1032192, i32 1032192, i32 1032192, i32 1032192>
+}
+
+; Test a word-granularity replicate with a wrap-around mask.
+define <4 x i32> @f5() {
+; CHECK-LABEL: f5:
+; CHECK: vgmf %v24, 17, 15
+; CHECK: br %r14
+ ret <4 x i32> <i32 -32769, i32 -32769, i32 -32769, i32 -32769>
+}
+
+; Test a doubleword-granularity replicate with the lowest value that cannot
+; use VREPIG.
+define <4 x i32> @f6() {
+; CHECK-LABEL: f6:
+; CHECK: vgmg %v24, 48, 48
+; CHECK: br %r14
+ ret <4 x i32> <i32 0, i32 32768, i32 0, i32 32768>
+}
+
+; Test a doubleword-granularity replicate that has the lower 22 bits set.
+define <4 x i32> @f7() {
+; CHECK-LABEL: f7:
+; CHECK: vgmg %v24, 42, 63
+; CHECK: br %r14
+ ret <4 x i32> <i32 0, i32 4194303, i32 0, i32 4194303>
+}
+
+; Test a doubleword-granularity replicate that has the upper 45 bits set.
+define <4 x i32> @f8() {
+; CHECK-LABEL: f8:
+; CHECK: vgmg %v24, 0, 44
+; CHECK: br %r14
+ ret <4 x i32> <i32 -1, i32 -524288, i32 -1, i32 -524288>
+}
+
+; Test a doubleword-granularity replicate that has middle bits set.
+define <4 x i32> @f9() {
+; CHECK-LABEL: f9:
+; CHECK: vgmg %v24, 31, 42
+; CHECK: br %r14
+ ret <4 x i32> <i32 1, i32 -2097152, i32 1, i32 -2097152>
+}
+
+; Test a doubleword-granularity replicate with a wrap-around mask.
+define <4 x i32> @f10() {
+; CHECK-LABEL: f10:
+; CHECK: vgmg %v24, 18, 0
+; CHECK: br %r14
+ ret <4 x i32> <i32 -2147467265, i32 -1, i32 -2147467265, i32 -1>
+}
diff --git a/test/CodeGen/SystemZ/vec-const-16.ll b/test/CodeGen/SystemZ/vec-const-16.ll
new file mode 100644
index 000000000000..1ab7de2761cf
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-const-16.ll
@@ -0,0 +1,85 @@
+; Test vector replicates that use VECTOR GENERATE MASK, v2i64 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a word-granularity replicate with the lowest value that cannot use
+; VREPIF.
+define <2 x i64> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vgmf %v24, 16, 16
+; CHECK: br %r14
+ ret <2 x i64> <i64 140737488388096, i64 140737488388096>
+}
+
+; Test a word-granularity replicate that has the lower 17 bits set.
+define <2 x i64> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vgmf %v24, 15, 31
+; CHECK: br %r14
+ ret <2 x i64> <i64 562945658585087, i64 562945658585087>
+}
+
+; Test a word-granularity replicate that has the upper 15 bits set.
+define <2 x i64> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vgmf %v24, 0, 14
+; CHECK: br %r14
+ ret <2 x i64> <i64 -562945658585088, i64 -562945658585088>
+}
+
+; Test a word-granularity replicate that has middle bits set.
+define <2 x i64> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vgmf %v24, 12, 17
+; CHECK: br %r14
+ ret <2 x i64> <i64 4433230884225024, i64 4433230884225024>
+}
+
+; Test a word-granularity replicate with a wrap-around mask.
+define <2 x i64> @f5() {
+; CHECK-LABEL: f5:
+; CHECK: vgmf %v24, 17, 15
+; CHECK: br %r14
+ ret <2 x i64> <i64 -140737488388097, i64 -140737488388097>
+}
+
+; Test a doubleword-granularity replicate with the lowest value that cannot
+; use VREPIG.
+define <2 x i64> @f6() {
+; CHECK-LABEL: f6:
+; CHECK: vgmg %v24, 48, 48
+; CHECK: br %r14
+ ret <2 x i64> <i64 32768, i64 32768>
+}
+
+; Test a doubleword-granularity replicate that has the lower 22 bits set.
+define <2 x i64> @f7() {
+; CHECK-LABEL: f7:
+; CHECK: vgmg %v24, 42, 63
+; CHECK: br %r14
+ ret <2 x i64> <i64 4194303, i64 4194303>
+}
+
+; Test a doubleword-granularity replicate that has the upper 45 bits set.
+define <2 x i64> @f8() {
+; CHECK-LABEL: f8:
+; CHECK: vgmg %v24, 0, 44
+; CHECK: br %r14
+ ret <2 x i64> <i64 -524288, i64 -524288>
+}
+
+; Test a doubleword-granularity replicate that has middle bits set.
+define <2 x i64> @f9() {
+; CHECK-LABEL: f9:
+; CHECK: vgmg %v24, 31, 42
+; CHECK: br %r14
+ ret <2 x i64> <i64 8587837440, i64 8587837440>
+}
+
+; Test a doubleword-granularity replicate with a wrap-around mask.
+define <2 x i64> @f10() {
+; CHECK-LABEL: f10:
+; CHECK: vgmg %v24, 18, 0
+; CHECK: br %r14
+ ret <2 x i64> <i64 -9223301668110598145, i64 -9223301668110598145>
+}
diff --git a/test/CodeGen/SystemZ/vec-const-17.ll b/test/CodeGen/SystemZ/vec-const-17.ll
new file mode 100644
index 000000000000..1306eab556e9
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-const-17.ll
@@ -0,0 +1,95 @@
+; Test vector replicates that use VECTOR GENERATE MASK, v4f32 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a word-granularity replicate with the lowest value that cannot use
+; VREPIF.
+define <4 x float> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vgmf %v24, 16, 16
+; CHECK: br %r14
+ ret <4 x float> <float 0x3790000000000000, float 0x3790000000000000,
+ float 0x3790000000000000, float 0x3790000000000000>
+}
+
+; Test a word-granularity replicate that has the lower 17 bits set.
+define <4 x float> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vgmf %v24, 15, 31
+; CHECK: br %r14
+ ret <4 x float> <float 0x37affff000000000, float 0x37affff000000000,
+ float 0x37affff000000000, float 0x37affff000000000>
+}
+
+; Test a word-granularity replicate that has the upper 15 bits set.
+define <4 x float> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vgmf %v24, 0, 14
+; CHECK: br %r14
+ ret <4 x float> <float 0xffffc00000000000, float 0xffffc00000000000,
+ float 0xffffc00000000000, float 0xffffc00000000000>
+}
+
+; Test a word-granularity replicate that has middle bits set.
+define <4 x float> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vgmf %v24, 2, 8
+; CHECK: br %r14
+ ret <4 x float> <float 0x3ff0000000000000, float 0x3ff0000000000000,
+ float 0x3ff0000000000000, float 0x3ff0000000000000>
+}
+
+; Test a word-granularity replicate with a wrap-around mask.
+define <4 x float> @f5() {
+; CHECK-LABEL: f5:
+; CHECK: vgmf %v24, 9, 1
+; CHECK: br %r14
+ ret <4 x float> <float 0xc00fffffe0000000, float 0xc00fffffe0000000,
+ float 0xc00fffffe0000000, float 0xc00fffffe0000000>
+}
+
+; Test a doubleword-granularity replicate with the lowest value that cannot
+; use VREPIG.
+define <4 x float> @f6() {
+; CHECK-LABEL: f6:
+; CHECK: vgmg %v24, 48, 48
+; CHECK: br %r14
+ ret <4 x float> <float 0.0, float 0x3790000000000000,
+ float 0.0, float 0x3790000000000000>
+}
+
+; Test a doubleword-granularity replicate that has the lower 22 bits set.
+define <4 x float> @f7() {
+; CHECK-LABEL: f7:
+; CHECK: vgmg %v24, 42, 63
+; CHECK: br %r14
+ ret <4 x float> <float 0.0, float 0x37ffffff80000000,
+ float 0.0, float 0x37ffffff80000000>
+}
+
+; Test a doubleword-granularity replicate that has the upper 45 bits set.
+define <4 x float> @f8() {
+; CHECK-LABEL: f8:
+; CHECK: vgmg %v24, 0, 44
+; CHECK: br %r14
+ ret <4 x float> <float 0xffffffffe0000000, float 0xffff000000000000,
+ float 0xffffffffe0000000, float 0xffff000000000000>
+}
+
+; Test a doubleword-granularity replicate that has middle bits set.
+define <4 x float> @f9() {
+; CHECK-LABEL: f9:
+; CHECK: vgmg %v24, 34, 41
+; CHECK: br %r14
+ ret <4 x float> <float 0.0, float 0x3ff8000000000000,
+ float 0.0, float 0x3ff8000000000000>
+}
+
+; Test a doubleword-granularity replicate with a wrap-around mask.
+define <4 x float> @f10() {
+; CHECK-LABEL: f10:
+; CHECK: vgmg %v24, 32, 0
+; CHECK: br %r14
+ ret <4 x float> <float 0x8000000000000000, float 0xffffffffe0000000,
+ float 0x8000000000000000, float 0xffffffffe0000000>
+}
diff --git a/test/CodeGen/SystemZ/vec-const-18.ll b/test/CodeGen/SystemZ/vec-const-18.ll
new file mode 100644
index 000000000000..c6c20c2a0037
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-const-18.ll
@@ -0,0 +1,85 @@
+; Test vector replicates that use VECTOR GENERATE MASK, v2f64 version.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a word-granularity replicate with the lowest value that cannot use
+; VREPIF.
+define <2 x double> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vgmf %v24, 16, 16
+; CHECK: br %r14
+ ret <2 x double> <double 0x0000800000008000, double 0x0000800000008000>
+}
+
+; Test a word-granularity replicate that has the lower 17 bits set.
+define <2 x double> @f2() {
+; CHECK-LABEL: f2:
+; CHECK: vgmf %v24, 15, 31
+; CHECK: br %r14
+ ret <2 x double> <double 0x0001ffff0001ffff, double 0x0001ffff0001ffff>
+}
+
+; Test a word-granularity replicate that has the upper 15 bits set.
+define <2 x double> @f3() {
+; CHECK-LABEL: f3:
+; CHECK: vgmf %v24, 0, 14
+; CHECK: br %r14
+ ret <2 x double> <double 0xfffe0000fffe0000, double 0xfffe0000fffe0000>
+}
+
+; Test a word-granularity replicate that has middle bits set.
+define <2 x double> @f4() {
+; CHECK-LABEL: f4:
+; CHECK: vgmf %v24, 2, 11
+; CHECK: br %r14
+ ret <2 x double> <double 0x3ff000003ff00000, double 0x3ff000003ff00000>
+}
+
+; Test a word-granularity replicate with a wrap-around mask.
+define <2 x double> @f5() {
+; CHECK-LABEL: f5:
+; CHECK: vgmf %v24, 17, 15
+; CHECK: br %r14
+ ret <2 x double> <double 0xffff7fffffff7fff, double 0xffff7fffffff7fff>
+}
+
+; Test a doubleword-granularity replicate with the lowest value that cannot
+; use VREPIG.
+define <2 x double> @f6() {
+; CHECK-LABEL: f6:
+; CHECK: vgmg %v24, 48, 48
+; CHECK: br %r14
+ ret <2 x double> <double 0x0000000000008000, double 0x0000000000008000>
+}
+
+; Test a doubleword-granularity replicate that has the lower 22 bits set.
+define <2 x double> @f7() {
+; CHECK-LABEL: f7:
+; CHECK: vgmg %v24, 42, 63
+; CHECK: br %r14
+ ret <2 x double> <double 0x000000000003fffff, double 0x000000000003fffff>
+}
+
+; Test a doubleword-granularity replicate that has the upper 45 bits set.
+define <2 x double> @f8() {
+; CHECK-LABEL: f8:
+; CHECK: vgmg %v24, 0, 44
+; CHECK: br %r14
+ ret <2 x double> <double 0xfffffffffff80000, double 0xfffffffffff80000>
+}
+
+; Test a doubleword-granularity replicate that has middle bits set.
+define <2 x double> @f9() {
+; CHECK-LABEL: f9:
+; CHECK: vgmg %v24, 2, 11
+; CHECK: br %r14
+ ret <2 x double> <double 0x3ff0000000000000, double 0x3ff0000000000000>
+}
+
+; Test a doubleword-granularity replicate with a wrap-around mask.
+define <2 x double> @f10() {
+; CHECK-LABEL: f10:
+; CHECK: vgmg %v24, 10, 0
+; CHECK: br %r14
+ ret <2 x double> <double 0x803fffffffffffff, double 0x803fffffffffffff>
+}
diff --git a/test/CodeGen/SystemZ/vec-conv-01.ll b/test/CodeGen/SystemZ/vec-conv-01.ll
new file mode 100644
index 000000000000..cbf42c0f533e
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-conv-01.ll
@@ -0,0 +1,95 @@
+; Test conversions between integer and float elements.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test conversion of f64s to signed i64s.
+define <2 x i64> @f1(<2 x double> %doubles) {
+; CHECK-LABEL: f1:
+; CHECK: vcgdb %v24, %v24, 0, 5
+; CHECK: br %r14
+ %dwords = fptosi <2 x double> %doubles to <2 x i64>
+ ret <2 x i64> %dwords
+}
+
+; Test conversion of f64s to unsigned i64s.
+define <2 x i64> @f2(<2 x double> %doubles) {
+; CHECK-LABEL: f2:
+; CHECK: vclgdb %v24, %v24, 0, 5
+; CHECK: br %r14
+ %dwords = fptoui <2 x double> %doubles to <2 x i64>
+ ret <2 x i64> %dwords
+}
+
+; Test conversion of signed i64s to f64s.
+define <2 x double> @f3(<2 x i64> %dwords) {
+; CHECK-LABEL: f3:
+; CHECK: vcdgb %v24, %v24, 0, 0
+; CHECK: br %r14
+ %doubles = sitofp <2 x i64> %dwords to <2 x double>
+ ret <2 x double> %doubles
+}
+
+; Test conversion of unsigned i64s to f64s.
+define <2 x double> @f4(<2 x i64> %dwords) {
+; CHECK-LABEL: f4:
+; CHECK: vcdlgb %v24, %v24, 0, 0
+; CHECK: br %r14
+ %doubles = uitofp <2 x i64> %dwords to <2 x double>
+ ret <2 x double> %doubles
+}
+
+; Test conversion of f64s to signed i32s, which must compile.
+define void @f5(<2 x double> %doubles, <2 x i32> *%ptr) {
+ %words = fptosi <2 x double> %doubles to <2 x i32>
+ store <2 x i32> %words, <2 x i32> *%ptr
+ ret void
+}
+
+; Test conversion of f64s to unsigned i32s, which must compile.
+define void @f6(<2 x double> %doubles, <2 x i32> *%ptr) {
+ %words = fptoui <2 x double> %doubles to <2 x i32>
+ store <2 x i32> %words, <2 x i32> *%ptr
+ ret void
+}
+
+; Test conversion of signed i32s to f64s, which must compile.
+define <2 x double> @f7(<2 x i32> *%ptr) {
+ %words = load <2 x i32>, <2 x i32> *%ptr
+ %doubles = sitofp <2 x i32> %words to <2 x double>
+ ret <2 x double> %doubles
+}
+
+; Test conversion of unsigned i32s to f64s, which must compile.
+define <2 x double> @f8(<2 x i32> *%ptr) {
+ %words = load <2 x i32>, <2 x i32> *%ptr
+ %doubles = uitofp <2 x i32> %words to <2 x double>
+ ret <2 x double> %doubles
+}
+
+; Test conversion of f32s to signed i64s, which must compile.
+define <2 x i64> @f9(<2 x float> *%ptr) {
+ %floats = load <2 x float>, <2 x float> *%ptr
+ %dwords = fptosi <2 x float> %floats to <2 x i64>
+ ret <2 x i64> %dwords
+}
+
+; Test conversion of f32s to unsigned i64s, which must compile.
+define <2 x i64> @f10(<2 x float> *%ptr) {
+ %floats = load <2 x float>, <2 x float> *%ptr
+ %dwords = fptoui <2 x float> %floats to <2 x i64>
+ ret <2 x i64> %dwords
+}
+
+; Test conversion of signed i64s to f32, which must compile.
+define void @f11(<2 x i64> %dwords, <2 x float> *%ptr) {
+ %floats = sitofp <2 x i64> %dwords to <2 x float>
+ store <2 x float> %floats, <2 x float> *%ptr
+ ret void
+}
+
+; Test conversion of unsigned i64s to f32, which must compile.
+define void @f12(<2 x i64> %dwords, <2 x float> *%ptr) {
+ %floats = uitofp <2 x i64> %dwords to <2 x float>
+ store <2 x float> %floats, <2 x float> *%ptr
+ ret void
+}
diff --git a/test/CodeGen/SystemZ/vec-conv-02.ll b/test/CodeGen/SystemZ/vec-conv-02.ll
new file mode 100644
index 000000000000..ab84389f3c8e
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-conv-02.ll
@@ -0,0 +1,33 @@
+; Test conversions between different-sized float elements.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test cases where both elements of a v2f64 are converted to f32s.
+define void @f1(<2 x double> %val, <2 x float> *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vledb {{%v[0-9]+}}, %v24, 0, 0
+; CHECK: br %r14
+ %res = fptrunc <2 x double> %val to <2 x float>
+ store <2 x float> %res, <2 x float> *%ptr
+ ret void
+}
+
+; Test conversion of an f64 in a vector register to an f32.
+define float @f2(<2 x double> %vec) {
+; CHECK-LABEL: f2:
+; CHECK: wledb %f0, %v24
+; CHECK: br %r14
+ %scalar = extractelement <2 x double> %vec, i32 0
+ %ret = fptrunc double %scalar to float
+ ret float %ret
+}
+
+; Test conversion of an f32 in a vector register to an f64.
+define double @f3(<4 x float> %vec) {
+; CHECK-LABEL: f3:
+; CHECK: wldeb %f0, %v24
+; CHECK: br %r14
+ %scalar = extractelement <4 x float> %vec, i32 0
+ %ret = fpext float %scalar to double
+ ret double %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-ctlz-01.ll b/test/CodeGen/SystemZ/vec-ctlz-01.ll
new file mode 100644
index 000000000000..f6502202ef58
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-ctlz-01.ll
@@ -0,0 +1,81 @@
+; Test vector count leading zeros
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %src, i1 %is_zero_undef)
+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %src, i1 %is_zero_undef)
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %src, i1 %is_zero_undef)
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %src, i1 %is_zero_undef)
+
+define <16 x i8> @f1(<16 x i8> %a) {
+; CHECK-LABEL: f1:
+; CHECK: vclzb %v24, %v24
+; CHECK: br %r14
+
+ %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @f2(<16 x i8> %a) {
+; CHECK-LABEL: f2:
+; CHECK: vclzb %v24, %v24
+; CHECK: br %r14
+
+ %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true)
+ ret <16 x i8> %res
+}
+
+define <8 x i16> @f3(<8 x i16> %a) {
+; CHECK-LABEL: f3:
+; CHECK: vclzh %v24, %v24
+; CHECK: br %r14
+
+ %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @f4(<8 x i16> %a) {
+; CHECK-LABEL: f4:
+; CHECK: vclzh %v24, %v24
+; CHECK: br %r14
+
+ %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true)
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @f5(<4 x i32> %a) {
+; CHECK-LABEL: f5:
+; CHECK: vclzf %v24, %v24
+; CHECK: br %r14
+
+ %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @f6(<4 x i32> %a) {
+; CHECK-LABEL: f6:
+; CHECK: vclzf %v24, %v24
+; CHECK: br %r14
+
+ %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true)
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @f7(<2 x i64> %a) {
+; CHECK-LABEL: f7:
+; CHECK: vclzg %v24, %v24
+; CHECK: br %r14
+
+ %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @f8(<2 x i64> %a) {
+; CHECK-LABEL: f8:
+; CHECK: vclzg %v24, %v24
+; CHECK: br %r14
+
+ %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true)
+ ret <2 x i64> %res
+}
+
diff --git a/test/CodeGen/SystemZ/vec-ctpop-01.ll b/test/CodeGen/SystemZ/vec-ctpop-01.ll
new file mode 100644
index 000000000000..0056af73a2e1
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-ctpop-01.ll
@@ -0,0 +1,53 @@
+; Test vector population-count instruction
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+
+define <16 x i8> @f1(<16 x i8> %a) {
+; CHECK-LABEL: f1:
+; CHECK: vpopct %v24, %v24, 0
+; CHECK: br %r14
+
+ %popcnt = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
+ ret <16 x i8> %popcnt
+}
+
+define <8 x i16> @f2(<8 x i16> %a) {
+; CHECK-LABEL: f2:
+; CHECK: vpopct [[T1:%v[0-9]+]], %v24, 0
+; CHECK: veslh [[T2:%v[0-9]+]], [[T1]], 8
+; CHECK: vah [[T3:%v[0-9]+]], [[T1]], [[T2]]
+; CHECK: vesrlh %v24, [[T3]], 8
+; CHECK: br %r14
+
+ %popcnt = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
+ ret <8 x i16> %popcnt
+}
+
+define <4 x i32> @f3(<4 x i32> %a) {
+; CHECK-LABEL: f3:
+; CHECK: vpopct [[T1:%v[0-9]+]], %v24, 0
+; CHECK: vgbm [[T2:%v[0-9]+]], 0
+; CHECK: vsumb %v24, [[T1]], [[T2]]
+; CHECK: br %r14
+
+ %popcnt = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
+ ret <4 x i32> %popcnt
+}
+
+define <2 x i64> @f4(<2 x i64> %a) {
+; CHECK-LABEL: f4:
+; CHECK: vpopct [[T1:%v[0-9]+]], %v24, 0
+; CHECK: vgbm [[T2:%v[0-9]+]], 0
+; CHECK: vsumb [[T3:%v[0-9]+]], [[T1]], [[T2]]
+; CHECK: vsumgf %v24, [[T3]], [[T2]]
+; CHECK: br %r14
+
+ %popcnt = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+ ret <2 x i64> %popcnt
+}
+
diff --git a/test/CodeGen/SystemZ/vec-cttz-01.ll b/test/CodeGen/SystemZ/vec-cttz-01.ll
new file mode 100644
index 000000000000..00a0d21b42fe
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-cttz-01.ll
@@ -0,0 +1,81 @@
+; Test vector count trailing zeros
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare <16 x i8> @llvm.cttz.v16i8(<16 x i8> %src, i1 %is_zero_undef)
+declare <8 x i16> @llvm.cttz.v8i16(<8 x i16> %src, i1 %is_zero_undef)
+declare <4 x i32> @llvm.cttz.v4i32(<4 x i32> %src, i1 %is_zero_undef)
+declare <2 x i64> @llvm.cttz.v2i64(<2 x i64> %src, i1 %is_zero_undef)
+
+define <16 x i8> @f1(<16 x i8> %a) {
+; CHECK-LABEL: f1:
+; CHECK: vctzb %v24, %v24
+; CHECK: br %r14
+
+ %res = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @f2(<16 x i8> %a) {
+; CHECK-LABEL: f2:
+; CHECK: vctzb %v24, %v24
+; CHECK: br %r14
+
+ %res = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
+ ret <16 x i8> %res
+}
+
+define <8 x i16> @f3(<8 x i16> %a) {
+; CHECK-LABEL: f3:
+; CHECK: vctzh %v24, %v24
+; CHECK: br %r14
+
+ %res = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @f4(<8 x i16> %a) {
+; CHECK-LABEL: f4:
+; CHECK: vctzh %v24, %v24
+; CHECK: br %r14
+
+ %res = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @f5(<4 x i32> %a) {
+; CHECK-LABEL: f5:
+; CHECK: vctzf %v24, %v24
+; CHECK: br %r14
+
+ %res = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @f6(<4 x i32> %a) {
+; CHECK-LABEL: f6:
+; CHECK: vctzf %v24, %v24
+; CHECK: br %r14
+
+ %res = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
+ ret <4 x i32> %res
+}
+
+define <2 x i64> @f7(<2 x i64> %a) {
+; CHECK-LABEL: f7:
+; CHECK: vctzg %v24, %v24
+; CHECK: br %r14
+
+ %res = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @f8(<2 x i64> %a) {
+; CHECK-LABEL: f8:
+; CHECK: vctzg %v24, %v24
+; CHECK: br %r14
+
+ %res = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
+ ret <2 x i64> %res
+}
+
diff --git a/test/CodeGen/SystemZ/vec-div-01.ll b/test/CodeGen/SystemZ/vec-div-01.ll
new file mode 100644
index 000000000000..506d40861d35
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-div-01.ll
@@ -0,0 +1,83 @@
+; Test vector division. There is no native integer support for this,
+; so the integer cases are really a test of the operation legalization code.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i8 division.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vlvgp [[REG:%v[0-9]+]],
+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 0
+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 1
+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 2
+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 3
+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 4
+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 5
+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 6
+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 8
+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 9
+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 10
+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 11
+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 12
+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 13
+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 14
+; CHECK: br %r14
+ %ret = sdiv <16 x i8> %val1, %val2
+ ret <16 x i8> %ret
+}
+
+; Test a v8i16 division.
+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vlvgp [[REG:%v[0-9]+]],
+; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 0
+; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 1
+; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 2
+; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 4
+; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 5
+; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 6
+; CHECK: br %r14
+ %ret = sdiv <8 x i16> %val1, %val2
+ ret <8 x i16> %ret
+}
+
+; Test a v4i32 division.
+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vlvgp [[REG:%v[0-9]+]],
+; CHECK-DAG: vlvgf [[REG]], {{%r[0-5]}}, 0
+; CHECK-DAG: vlvgf [[REG]], {{%r[0-5]}}, 2
+; CHECK: br %r14
+ %ret = sdiv <4 x i32> %val1, %val2
+ ret <4 x i32> %ret
+}
+
+; Test a v2i64 division.
+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vlvgp %v24,
+; CHECK: br %r14
+ %ret = sdiv <2 x i64> %val1, %val2
+ ret <2 x i64> %ret
+}
+
+; Test a v2f64 division.
+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
+ <2 x double> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vfddb %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = fdiv <2 x double> %val1, %val2
+ ret <2 x double> %ret
+}
+
+; Test an f64 division that uses vector registers.
+define double @f6(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: wfddb %f0, %v24, %v26
+; CHECK: br %r14
+ %scalar1 = extractelement <2 x double> %val1, i32 0
+ %scalar2 = extractelement <2 x double> %val2, i32 0
+ %ret = fdiv double %scalar1, %scalar2
+ ret double %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-extract-01.ll b/test/CodeGen/SystemZ/vec-extract-01.ll
new file mode 100644
index 000000000000..549392ffd64d
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-extract-01.ll
@@ -0,0 +1,13 @@
+; Verify ReplaceExtractVectorEltOfLoadWithNarrowedLoad fixes
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a memory copy of a v2i32 (via the constant pool).
+define void @f1(<2 x i32> *%dest) {
+; CHECK-LABEL: f1:
+; CHECK: lgrl [[REG:%r[0-5]]], {{[._A-Za-z0-9]}}
+; CHECK: stg [[REG]], 0(%r2)
+; CHECK: br %r14
+ store <2 x i32> <i32 1000000, i32 99999>, <2 x i32> *%dest
+ ret void
+}
diff --git a/test/CodeGen/SystemZ/vec-extract-02.ll b/test/CodeGen/SystemZ/vec-extract-02.ll
new file mode 100644
index 000000000000..c91e852fcf45
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-extract-02.ll
@@ -0,0 +1,15 @@
+; Verify ReplaceExtractVectorEltOfLoadWithNarrowedLoad fixes
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a case where a vector extraction can be simplified to a scalar load.
+; The index must be extended from i32 to i64.
+define i32 @f1(<4 x i32> *%ptr, i32 %index) {
+; CHECK-LABEL: f1:
+; CHECK: risbg {{%r[0-5]}}, %r3, 30, 189, 2
+; CHECK: l %r2,
+; CHECK: br %r14
+ %vec = load <4 x i32>, <4 x i32> *%ptr
+ %res = extractelement <4 x i32> %vec, i32 %index
+ ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/vec-intrinsics.ll b/test/CodeGen/SystemZ/vec-intrinsics.ll
new file mode 100644
index 000000000000..55527787da4c
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-intrinsics.ll
@@ -0,0 +1,3335 @@
+; Test vector intrinsics.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare i32 @llvm.s390.lcbb(i8 *, i32)
+declare <16 x i8> @llvm.s390.vlbb(i8 *, i32)
+declare <16 x i8> @llvm.s390.vll(i32, i8 *)
+declare <2 x i64> @llvm.s390.vpdi(<2 x i64>, <2 x i64>, i32)
+declare <16 x i8> @llvm.s390.vperm(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vpksh(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.s390.vpksf(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.s390.vpksg(<2 x i64>, <2 x i64>)
+declare {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16>, <8 x i16>)
+declare {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32>, <4 x i32>)
+declare {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64>, <2 x i64>)
+declare <16 x i8> @llvm.s390.vpklsh(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.s390.vpklsf(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.s390.vpklsg(<2 x i64>, <2 x i64>)
+declare {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16>, <8 x i16>)
+declare {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32>, <4 x i32>)
+declare {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64>, <2 x i64>)
+declare void @llvm.s390.vstl(<16 x i8>, i32, i8 *)
+declare <8 x i16> @llvm.s390.vuphb(<16 x i8>)
+declare <4 x i32> @llvm.s390.vuphh(<8 x i16>)
+declare <2 x i64> @llvm.s390.vuphf(<4 x i32>)
+declare <8 x i16> @llvm.s390.vuplhb(<16 x i8>)
+declare <4 x i32> @llvm.s390.vuplhh(<8 x i16>)
+declare <2 x i64> @llvm.s390.vuplhf(<4 x i32>)
+declare <8 x i16> @llvm.s390.vuplb(<16 x i8>)
+declare <4 x i32> @llvm.s390.vuplhw(<8 x i16>)
+declare <2 x i64> @llvm.s390.vuplf(<4 x i32>)
+declare <8 x i16> @llvm.s390.vupllb(<16 x i8>)
+declare <4 x i32> @llvm.s390.vupllh(<8 x i16>)
+declare <2 x i64> @llvm.s390.vupllf(<4 x i32>)
+declare <16 x i8> @llvm.s390.vaccb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vacch(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vaccf(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.s390.vaccg(<2 x i64>, <2 x i64>)
+declare <16 x i8> @llvm.s390.vaq(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vacq(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vaccq(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vacccq(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vavgb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vavgh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vavgf(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.s390.vavgg(<2 x i64>, <2 x i64>)
+declare <16 x i8> @llvm.s390.vavglb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vavglh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vavglf(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.s390.vavglg(<2 x i64>, <2 x i64>)
+declare <4 x i32> @llvm.s390.vcksm(<4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.s390.vgfmb(<16 x i8>, <16 x i8>)
+declare <4 x i32> @llvm.s390.vgfmh(<8 x i16>, <8 x i16>)
+declare <2 x i64> @llvm.s390.vgfmf(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.s390.vgfmg(<2 x i64>, <2 x i64>)
+declare <8 x i16> @llvm.s390.vgfmab(<16 x i8>, <16 x i8>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vgfmah(<8 x i16>, <8 x i16>, <4 x i32>)
+declare <2 x i64> @llvm.s390.vgfmaf(<4 x i32>, <4 x i32>, <2 x i64>)
+declare <16 x i8> @llvm.s390.vgfmag(<2 x i64>, <2 x i64>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vmahb(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vmahh(<8 x i16>, <8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vmahf(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.s390.vmalhb(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vmalhh(<8 x i16>, <8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vmalhf(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.s390.vmaeb(<16 x i8>, <16 x i8>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vmaeh(<8 x i16>, <8 x i16>, <4 x i32>)
+declare <2 x i64> @llvm.s390.vmaef(<4 x i32>, <4 x i32>, <2 x i64>)
+declare <8 x i16> @llvm.s390.vmaleb(<16 x i8>, <16 x i8>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vmaleh(<8 x i16>, <8 x i16>, <4 x i32>)
+declare <2 x i64> @llvm.s390.vmalef(<4 x i32>, <4 x i32>, <2 x i64>)
+declare <8 x i16> @llvm.s390.vmaob(<16 x i8>, <16 x i8>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vmaoh(<8 x i16>, <8 x i16>, <4 x i32>)
+declare <2 x i64> @llvm.s390.vmaof(<4 x i32>, <4 x i32>, <2 x i64>)
+declare <8 x i16> @llvm.s390.vmalob(<16 x i8>, <16 x i8>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vmaloh(<8 x i16>, <8 x i16>, <4 x i32>)
+declare <2 x i64> @llvm.s390.vmalof(<4 x i32>, <4 x i32>, <2 x i64>)
+declare <16 x i8> @llvm.s390.vmhb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vmhh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vmhf(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.s390.vmlhb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vmlhh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vmlhf(<4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.s390.vmeb(<16 x i8>, <16 x i8>)
+declare <4 x i32> @llvm.s390.vmeh(<8 x i16>, <8 x i16>)
+declare <2 x i64> @llvm.s390.vmef(<4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.s390.vmleb(<16 x i8>, <16 x i8>)
+declare <4 x i32> @llvm.s390.vmleh(<8 x i16>, <8 x i16>)
+declare <2 x i64> @llvm.s390.vmlef(<4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.s390.vmob(<16 x i8>, <16 x i8>)
+declare <4 x i32> @llvm.s390.vmoh(<8 x i16>, <8 x i16>)
+declare <2 x i64> @llvm.s390.vmof(<4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.s390.vmlob(<16 x i8>, <16 x i8>)
+declare <4 x i32> @llvm.s390.vmloh(<8 x i16>, <8 x i16>)
+declare <2 x i64> @llvm.s390.vmlof(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.s390.verllvb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.verllvh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.verllvf(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.s390.verllvg(<2 x i64>, <2 x i64>)
+declare <16 x i8> @llvm.s390.verllb(<16 x i8>, i32)
+declare <8 x i16> @llvm.s390.verllh(<8 x i16>, i32)
+declare <4 x i32> @llvm.s390.verllf(<4 x i32>, i32)
+declare <2 x i64> @llvm.s390.verllg(<2 x i64>, i32)
+declare <16 x i8> @llvm.s390.verimb(<16 x i8>, <16 x i8>, <16 x i8>, i32)
+declare <8 x i16> @llvm.s390.verimh(<8 x i16>, <8 x i16>, <8 x i16>, i32)
+declare <4 x i32> @llvm.s390.verimf(<4 x i32>, <4 x i32>, <4 x i32>, i32)
+declare <2 x i64> @llvm.s390.verimg(<2 x i64>, <2 x i64>, <2 x i64>, i32)
+declare <16 x i8> @llvm.s390.vsl(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vslb(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vsra(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vsrab(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vsrl(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vsrlb(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vsldb(<16 x i8>, <16 x i8>, i32)
+declare <16 x i8> @llvm.s390.vscbib(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vscbih(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vscbif(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.s390.vscbig(<2 x i64>, <2 x i64>)
+declare <16 x i8> @llvm.s390.vsq(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vsbiq(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vscbiq(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vsbcbiq(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <4 x i32> @llvm.s390.vsumb(<16 x i8>, <16 x i8>)
+declare <4 x i32> @llvm.s390.vsumh(<8 x i16>, <8 x i16>)
+declare <2 x i64> @llvm.s390.vsumgh(<8 x i16>, <8 x i16>)
+declare <2 x i64> @llvm.s390.vsumgf(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.s390.vsumqf(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.s390.vsumqg(<2 x i64>, <2 x i64>)
+declare i32 @llvm.s390.vtm(<16 x i8>, <16 x i8>)
+declare {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8>, <16 x i8>)
+declare {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16>, <8 x i16>)
+declare {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32>, <4 x i32>)
+declare {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64>, <2 x i64>)
+declare {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8>, <16 x i8>)
+declare {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16>, <8 x i16>)
+declare {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32>, <4 x i32>)
+declare {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64>, <2 x i64>)
+declare {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8>, <16 x i8>)
+declare {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16>, <8 x i16>)
+declare {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32>, <4 x i32>)
+declare {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64>, <2 x i64>)
+declare <16 x i8> @llvm.s390.vfaeb(<16 x i8>, <16 x i8>, i32)
+declare <8 x i16> @llvm.s390.vfaeh(<8 x i16>, <8 x i16>, i32)
+declare <4 x i32> @llvm.s390.vfaef(<4 x i32>, <4 x i32>, i32)
+declare {<16 x i8>, i32} @llvm.s390.vfaebs(<16 x i8>, <16 x i8>, i32)
+declare {<8 x i16>, i32} @llvm.s390.vfaehs(<8 x i16>, <8 x i16>, i32)
+declare {<4 x i32>, i32} @llvm.s390.vfaefs(<4 x i32>, <4 x i32>, i32)
+declare <16 x i8> @llvm.s390.vfaezb(<16 x i8>, <16 x i8>, i32)
+declare <8 x i16> @llvm.s390.vfaezh(<8 x i16>, <8 x i16>, i32)
+declare <4 x i32> @llvm.s390.vfaezf(<4 x i32>, <4 x i32>, i32)
+declare {<16 x i8>, i32} @llvm.s390.vfaezbs(<16 x i8>, <16 x i8>, i32)
+declare {<8 x i16>, i32} @llvm.s390.vfaezhs(<8 x i16>, <8 x i16>, i32)
+declare {<4 x i32>, i32} @llvm.s390.vfaezfs(<4 x i32>, <4 x i32>, i32)
+declare <16 x i8> @llvm.s390.vfeeb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vfeeh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vfeef(<4 x i32>, <4 x i32>)
+declare {<16 x i8>, i32} @llvm.s390.vfeebs(<16 x i8>, <16 x i8>)
+declare {<8 x i16>, i32} @llvm.s390.vfeehs(<8 x i16>, <8 x i16>)
+declare {<4 x i32>, i32} @llvm.s390.vfeefs(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.s390.vfeezb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vfeezh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vfeezf(<4 x i32>, <4 x i32>)
+declare {<16 x i8>, i32} @llvm.s390.vfeezbs(<16 x i8>, <16 x i8>)
+declare {<8 x i16>, i32} @llvm.s390.vfeezhs(<8 x i16>, <8 x i16>)
+declare {<4 x i32>, i32} @llvm.s390.vfeezfs(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.s390.vfeneb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vfeneh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vfenef(<4 x i32>, <4 x i32>)
+declare {<16 x i8>, i32} @llvm.s390.vfenebs(<16 x i8>, <16 x i8>)
+declare {<8 x i16>, i32} @llvm.s390.vfenehs(<8 x i16>, <8 x i16>)
+declare {<4 x i32>, i32} @llvm.s390.vfenefs(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.s390.vfenezb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vfenezh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vfenezf(<4 x i32>, <4 x i32>)
+declare {<16 x i8>, i32} @llvm.s390.vfenezbs(<16 x i8>, <16 x i8>)
+declare {<8 x i16>, i32} @llvm.s390.vfenezhs(<8 x i16>, <8 x i16>)
+declare {<4 x i32>, i32} @llvm.s390.vfenezfs(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.s390.vistrb(<16 x i8>)
+declare <8 x i16> @llvm.s390.vistrh(<8 x i16>)
+declare <4 x i32> @llvm.s390.vistrf(<4 x i32>)
+declare {<16 x i8>, i32} @llvm.s390.vistrbs(<16 x i8>)
+declare {<8 x i16>, i32} @llvm.s390.vistrhs(<8 x i16>)
+declare {<4 x i32>, i32} @llvm.s390.vistrfs(<4 x i32>)
+declare <16 x i8> @llvm.s390.vstrcb(<16 x i8>, <16 x i8>, <16 x i8>, i32)
+declare <8 x i16> @llvm.s390.vstrch(<8 x i16>, <8 x i16>, <8 x i16>, i32)
+declare <4 x i32> @llvm.s390.vstrcf(<4 x i32>, <4 x i32>, <4 x i32>, i32)
+declare {<16 x i8>, i32} @llvm.s390.vstrcbs(<16 x i8>, <16 x i8>, <16 x i8>,
+ i32)
+declare {<8 x i16>, i32} @llvm.s390.vstrchs(<8 x i16>, <8 x i16>, <8 x i16>,
+ i32)
+declare {<4 x i32>, i32} @llvm.s390.vstrcfs(<4 x i32>, <4 x i32>, <4 x i32>,
+ i32)
+declare <16 x i8> @llvm.s390.vstrczb(<16 x i8>, <16 x i8>, <16 x i8>, i32)
+declare <8 x i16> @llvm.s390.vstrczh(<8 x i16>, <8 x i16>, <8 x i16>, i32)
+declare <4 x i32> @llvm.s390.vstrczf(<4 x i32>, <4 x i32>, <4 x i32>, i32)
+declare {<16 x i8>, i32} @llvm.s390.vstrczbs(<16 x i8>, <16 x i8>, <16 x i8>,
+ i32)
+declare {<8 x i16>, i32} @llvm.s390.vstrczhs(<8 x i16>, <8 x i16>, <8 x i16>,
+ i32)
+declare {<4 x i32>, i32} @llvm.s390.vstrczfs(<4 x i32>, <4 x i32>, <4 x i32>,
+ i32)
+declare {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double>, <2 x double>)
+declare {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double>, <2 x double>)
+declare {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double>, <2 x double>)
+declare {<2 x i64>, i32} @llvm.s390.vftcidb(<2 x double>, i32)
+declare <2 x double> @llvm.s390.vfidb(<2 x double>, i32, i32)
+
+; LCBB with the lowest M3 operand.
+define i32 @test_lcbb1(i8 *%ptr) {
+; CHECK-LABEL: test_lcbb1:
+; CHECK: lcbb %r2, 0(%r2), 0
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 0)
+ ret i32 %res
+}
+
+; LCBB with the highest M3 operand.
+define i32 @test_lcbb2(i8 *%ptr) {
+; CHECK-LABEL: test_lcbb2:
+; CHECK: lcbb %r2, 0(%r2), 15
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 15)
+ ret i32 %res
+}
+
+; LCBB with a displacement and index.
+define i32 @test_lcbb3(i8 *%base, i64 %index) {
+; CHECK-LABEL: test_lcbb3:
+; CHECK: lcbb %r2, 4095({{%r2,%r3|%r3,%r2}}), 4
+; CHECK: br %r14
+ %add = add i64 %index, 4095
+ %ptr = getelementptr i8, i8 *%base, i64 %add
+ %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 4)
+ ret i32 %res
+}
+
+; LCBB with an out-of-range displacement.
+define i32 @test_lcbb4(i8 *%base) {
+; CHECK-LABEL: test_lcbb4:
+; CHECK: lcbb %r2, 0({{%r[1-5]}}), 5
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 4096
+ %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 5)
+ ret i32 %res
+}
+
+; VLBB with the lowest M3 operand.
+define <16 x i8> @test_vlbb1(i8 *%ptr) {
+; CHECK-LABEL: test_vlbb1:
+; CHECK: vlbb %v24, 0(%r2), 0
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 0)
+ ret <16 x i8> %res
+}
+
+; VLBB with the highest M3 operand.
+define <16 x i8> @test_vlbb2(i8 *%ptr) {
+; CHECK-LABEL: test_vlbb2:
+; CHECK: vlbb %v24, 0(%r2), 15
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 15)
+ ret <16 x i8> %res
+}
+
+; VLBB with a displacement and index.
+define <16 x i8> @test_vlbb3(i8 *%base, i64 %index) {
+; CHECK-LABEL: test_vlbb3:
+; CHECK: vlbb %v24, 4095({{%r2,%r3|%r3,%r2}}), 4
+; CHECK: br %r14
+ %add = add i64 %index, 4095
+ %ptr = getelementptr i8, i8 *%base, i64 %add
+ %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 4)
+ ret <16 x i8> %res
+}
+
+; VLBB with an out-of-range displacement.
+define <16 x i8> @test_vlbb4(i8 *%base) {
+; CHECK-LABEL: test_vlbb4:
+; CHECK: vlbb %v24, 0({{%r[1-5]}}), 5
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 4096
+ %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 5)
+ ret <16 x i8> %res
+}
+
+; VLL with the lowest in-range displacement.
+define <16 x i8> @test_vll1(i8 *%ptr, i32 %length) {
+; CHECK-LABEL: test_vll1:
+; CHECK: vll %v24, %r3, 0(%r2)
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr)
+ ret <16 x i8> %res
+}
+
+; VLL with the highest in-range displacement.
+define <16 x i8> @test_vll2(i8 *%base, i32 %length) {
+; CHECK-LABEL: test_vll2:
+; CHECK: vll %v24, %r3, 4095(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 4095
+ %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr)
+ ret <16 x i8> %res
+}
+
+; VLL with an out-of-range displacementa.
+define <16 x i8> @test_vll3(i8 *%base, i32 %length) {
+; CHECK-LABEL: test_vll3:
+; CHECK: vll %v24, %r3, 0({{%r[1-5]}})
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 4096
+ %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr)
+ ret <16 x i8> %res
+}
+
+; Check that VLL doesn't allow an index.
+define <16 x i8> @test_vll4(i8 *%base, i64 %index, i32 %length) {
+; CHECK-LABEL: test_vll4:
+; CHECK: vll %v24, %r4, 0({{%r[1-5]}})
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 %index
+ %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr)
+ ret <16 x i8> %res
+}
+
+; VPDI taking element 0 from each half.
+define <2 x i64> @test_vpdi1(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vpdi1:
+; CHECK: vpdi %v24, %v24, %v26, 0
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vpdi(<2 x i64> %a, <2 x i64> %b, i32 0)
+ ret <2 x i64> %res
+}
+
+; VPDI taking element 1 from each half.
+define <2 x i64> @test_vpdi2(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vpdi2:
+; CHECK: vpdi %v24, %v24, %v26, 10
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vpdi(<2 x i64> %a, <2 x i64> %b, i32 10)
+ ret <2 x i64> %res
+}
+
+; VPERM.
+define <16 x i8> @test_vperm(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vperm:
+; CHECK: vperm %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vperm(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c)
+ ret <16 x i8> %res
+}
+
+; VPKSH.
+define <16 x i8> @test_vpksh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vpksh:
+; CHECK: vpksh %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vpksh(<8 x i16> %a, <8 x i16> %b)
+ ret <16 x i8> %res
+}
+
+; VPKSF.
+define <8 x i16> @test_vpksf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vpksf:
+; CHECK: vpksf %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vpksf(<4 x i32> %a, <4 x i32> %b)
+ ret <8 x i16> %res
+}
+
+; VPKSG.
+define <4 x i32> @test_vpksg(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vpksg:
+; CHECK: vpksg %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vpksg(<2 x i64> %a, <2 x i64> %b)
+ ret <4 x i32> %res
+}
+
+; VPKSHS with no processing of the result.
+define <16 x i8> @test_vpkshs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vpkshs:
+; CHECK: vpkshs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16> %a, <8 x i16> %b)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <16 x i8> %res
+}
+
+; VPKSHS, storing to %ptr if all values were saturated.
+define <16 x i8> @test_vpkshs_all_store(<8 x i16> %a, <8 x i16> %b, i32 *%ptr) {
+; CHECK-LABEL: test_vpkshs_all_store:
+; CHECK: vpkshs %v24, %v24, %v26
+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16> %a, <8 x i16> %b)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ %cmp = icmp uge i32 %cc, 3
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <16 x i8> %res
+}
+
+; VPKSFS with no processing of the result.
+define <8 x i16> @test_vpksfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vpksfs:
+; CHECK: vpksfs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> %a, <4 x i32> %b)
+ %res = extractvalue {<8 x i16>, i32} %call, 0
+ %cc = extractvalue {<8 x i16>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <8 x i16> %res
+}
+
+; VPKSFS, storing to %ptr if any values were saturated.
+define <8 x i16> @test_vpksfs_any_store(<4 x i32> %a, <4 x i32> %b, i32 *%ptr) {
+; CHECK-LABEL: test_vpksfs_any_store:
+; CHECK: vpksfs %v24, %v24, %v26
+; CHECK-NEXT: {{jhe|je}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> %a, <4 x i32> %b)
+ %res = extractvalue {<8 x i16>, i32} %call, 0
+ %cc = extractvalue {<8 x i16>, i32} %call, 1
+ %cmp = icmp ugt i32 %cc, 0
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <8 x i16> %res
+}
+
+; VPKSGS with no processing of the result.
+define <4 x i32> @test_vpksgs(<2 x i64> %a, <2 x i64> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vpksgs:
+; CHECK: vpksgs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> %a, <2 x i64> %b)
+ %res = extractvalue {<4 x i32>, i32} %call, 0
+ %cc = extractvalue {<4 x i32>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <4 x i32> %res
+}
+
+; VPKSGS, storing to %ptr if no elements were saturated
+define <4 x i32> @test_vpksgs_none_store(<2 x i64> %a, <2 x i64> %b,
+ i32 *%ptr) {
+; CHECK-LABEL: test_vpksgs_none_store:
+; CHECK: vpksgs %v24, %v24, %v26
+; CHECK-NEXT: {{jnhe|jne}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> %a, <2 x i64> %b)
+ %res = extractvalue {<4 x i32>, i32} %call, 0
+ %cc = extractvalue {<4 x i32>, i32} %call, 1
+ %cmp = icmp sle i32 %cc, 0
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <4 x i32> %res
+}
+
+; VPKLSH.
+define <16 x i8> @test_vpklsh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vpklsh:
+; CHECK: vpklsh %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vpklsh(<8 x i16> %a, <8 x i16> %b)
+ ret <16 x i8> %res
+}
+
+; VPKLSF.
+define <8 x i16> @test_vpklsf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vpklsf:
+; CHECK: vpklsf %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> %a, <4 x i32> %b)
+ ret <8 x i16> %res
+}
+
+; VPKLSG.
+define <4 x i32> @test_vpklsg(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vpklsg:
+; CHECK: vpklsg %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> %a, <2 x i64> %b)
+ ret <4 x i32> %res
+}
+
+; VPKLSHS with no processing of the result.
+define <16 x i8> @test_vpklshs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vpklshs:
+; CHECK: vpklshs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16> %a, <8 x i16> %b)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <16 x i8> %res
+}
+
+; VPKLSHS, storing to %ptr if all values were saturated.
+define <16 x i8> @test_vpklshs_all_store(<8 x i16> %a, <8 x i16> %b,
+ i32 *%ptr) {
+; CHECK-LABEL: test_vpklshs_all_store:
+; CHECK: vpklshs %v24, %v24, %v26
+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16> %a, <8 x i16> %b)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ %cmp = icmp eq i32 %cc, 3
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <16 x i8> %res
+}
+
+; VPKLSFS with no processing of the result.
+define <8 x i16> @test_vpklsfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vpklsfs:
+; CHECK: vpklsfs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> %a, <4 x i32> %b)
+ %res = extractvalue {<8 x i16>, i32} %call, 0
+ %cc = extractvalue {<8 x i16>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <8 x i16> %res
+}
+
+; VPKLSFS, storing to %ptr if any values were saturated.
+define <8 x i16> @test_vpklsfs_any_store(<4 x i32> %a, <4 x i32> %b,
+ i32 *%ptr) {
+; CHECK-LABEL: test_vpklsfs_any_store:
+; CHECK: vpklsfs %v24, %v24, %v26
+; CHECK-NEXT: {{jhe|je}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> %a, <4 x i32> %b)
+ %res = extractvalue {<8 x i16>, i32} %call, 0
+ %cc = extractvalue {<8 x i16>, i32} %call, 1
+ %cmp = icmp ne i32 %cc, 0
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <8 x i16> %res
+}
+
+; VPKLSGS with no processing of the result.
+define <4 x i32> @test_vpklsgs(<2 x i64> %a, <2 x i64> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vpklsgs:
+; CHECK: vpklsgs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> %a, <2 x i64> %b)
+ %res = extractvalue {<4 x i32>, i32} %call, 0
+ %cc = extractvalue {<4 x i32>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <4 x i32> %res
+}
+
+; VPKLSGS, storing to %ptr if no elements were saturated
+define <4 x i32> @test_vpklsgs_none_store(<2 x i64> %a, <2 x i64> %b,
+ i32 *%ptr) {
+; CHECK-LABEL: test_vpklsgs_none_store:
+; CHECK: vpklsgs %v24, %v24, %v26
+; CHECK-NEXT: {{jnhe|jne}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> %a, <2 x i64> %b)
+ %res = extractvalue {<4 x i32>, i32} %call, 0
+ %cc = extractvalue {<4 x i32>, i32} %call, 1
+ %cmp = icmp eq i32 %cc, 0
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <4 x i32> %res
+}
+
+; VSTL with the lowest in-range displacement.
+define void @test_vstl1(<16 x i8> %vec, i8 *%ptr, i32 %length) {
+; CHECK-LABEL: test_vstl1:
+; CHECK: vstl %v24, %r3, 0(%r2)
+; CHECK: br %r14
+ call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr)
+ ret void
+}
+
+; VSTL with the highest in-range displacement.
+define void @test_vstl2(<16 x i8> %vec, i8 *%base, i32 %length) {
+; CHECK-LABEL: test_vstl2:
+; CHECK: vstl %v24, %r3, 4095(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 4095
+ call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr)
+ ret void
+}
+
+; VSTL with an out-of-range displacement.
+define void @test_vstl3(<16 x i8> %vec, i8 *%base, i32 %length) {
+; CHECK-LABEL: test_vstl3:
+; CHECK: vstl %v24, %r3, 0({{%r[1-5]}})
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 4096
+ call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr)
+ ret void
+}
+
+; Check that VSTL doesn't allow an index.
+define void @test_vstl4(<16 x i8> %vec, i8 *%base, i64 %index, i32 %length) {
+; CHECK-LABEL: test_vstl4:
+; CHECK: vstl %v24, %r4, 0({{%r[1-5]}})
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 %index
+ call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr)
+ ret void
+}
+
+; VUPHB.
+define <8 x i16> @test_vuphb(<16 x i8> %a) {
+; CHECK-LABEL: test_vuphb:
+; CHECK: vuphb %v24, %v24
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vuphb(<16 x i8> %a)
+ ret <8 x i16> %res
+}
+
+; VUPHH.
+define <4 x i32> @test_vuphh(<8 x i16> %a) {
+; CHECK-LABEL: test_vuphh:
+; CHECK: vuphh %v24, %v24
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vuphh(<8 x i16> %a)
+ ret <4 x i32> %res
+}
+
+; VUPHF.
+define <2 x i64> @test_vuphf(<4 x i32> %a) {
+; CHECK-LABEL: test_vuphf:
+; CHECK: vuphf %v24, %v24
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vuphf(<4 x i32> %a)
+ ret <2 x i64> %res
+}
+
+; VUPLHB.
+define <8 x i16> @test_vuplhb(<16 x i8> %a) {
+; CHECK-LABEL: test_vuplhb:
+; CHECK: vuplhb %v24, %v24
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vuplhb(<16 x i8> %a)
+ ret <8 x i16> %res
+}
+
+; VUPLHH.
+define <4 x i32> @test_vuplhh(<8 x i16> %a) {
+; CHECK-LABEL: test_vuplhh:
+; CHECK: vuplhh %v24, %v24
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vuplhh(<8 x i16> %a)
+ ret <4 x i32> %res
+}
+
+; VUPLHF.
+define <2 x i64> @test_vuplhf(<4 x i32> %a) {
+; CHECK-LABEL: test_vuplhf:
+; CHECK: vuplhf %v24, %v24
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> %a)
+ ret <2 x i64> %res
+}
+
+; VUPLB.
+define <8 x i16> @test_vuplb(<16 x i8> %a) {
+; CHECK-LABEL: test_vuplb:
+; CHECK: vuplb %v24, %v24
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vuplb(<16 x i8> %a)
+ ret <8 x i16> %res
+}
+
+; VUPLHW.
+define <4 x i32> @test_vuplhw(<8 x i16> %a) {
+; CHECK-LABEL: test_vuplhw:
+; CHECK: vuplhw %v24, %v24
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vuplhw(<8 x i16> %a)
+ ret <4 x i32> %res
+}
+
+; VUPLF.
+define <2 x i64> @test_vuplf(<4 x i32> %a) {
+; CHECK-LABEL: test_vuplf:
+; CHECK: vuplf %v24, %v24
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vuplf(<4 x i32> %a)
+ ret <2 x i64> %res
+}
+
+; VUPLLB.
+define <8 x i16> @test_vupllb(<16 x i8> %a) {
+; CHECK-LABEL: test_vupllb:
+; CHECK: vupllb %v24, %v24
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vupllb(<16 x i8> %a)
+ ret <8 x i16> %res
+}
+
+; VUPLLH.
+define <4 x i32> @test_vupllh(<8 x i16> %a) {
+; CHECK-LABEL: test_vupllh:
+; CHECK: vupllh %v24, %v24
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vupllh(<8 x i16> %a)
+ ret <4 x i32> %res
+}
+
+; VUPLLF.
+define <2 x i64> @test_vupllf(<4 x i32> %a) {
+; CHECK-LABEL: test_vupllf:
+; CHECK: vupllf %v24, %v24
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vupllf(<4 x i32> %a)
+ ret <2 x i64> %res
+}
+
+; VACCB.
+define <16 x i8> @test_vaccb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vaccb:
+; CHECK: vaccb %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vaccb(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VACCH.
+define <8 x i16> @test_vacch(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vacch:
+; CHECK: vacch %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vacch(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %res
+}
+
+; VACCF.
+define <4 x i32> @test_vaccf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vaccf:
+; CHECK: vaccf %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vaccf(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %res
+}
+
+; VACCG.
+define <2 x i64> @test_vaccg(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vaccg:
+; CHECK: vaccg %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vaccg(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %res
+}
+
+; VAQ.
+define <16 x i8> @test_vaq(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vaq:
+; CHECK: vaq %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vaq(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VACQ.
+define <16 x i8> @test_vacq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vacq:
+; CHECK: vacq %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vacq(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c)
+ ret <16 x i8> %res
+}
+
+; VACCQ.
+define <16 x i8> @test_vaccq(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vaccq:
+; CHECK: vaccq %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vaccq(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VACCCQ.
+define <16 x i8> @test_vacccq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vacccq:
+; CHECK: vacccq %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vacccq(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c)
+ ret <16 x i8> %res
+}
+
+; VAVGB.
+define <16 x i8> @test_vavgb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vavgb:
+; CHECK: vavgb %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vavgb(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VAVGH.
+define <8 x i16> @test_vavgh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vavgh:
+; CHECK: vavgh %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vavgh(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %res
+}
+
+; VAVGF.
+define <4 x i32> @test_vavgf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vavgf:
+; CHECK: vavgf %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vavgf(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %res
+}
+
+; VAVGG.
+define <2 x i64> @test_vavgg(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vavgg:
+; CHECK: vavgg %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vavgg(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %res
+}
+
+; VAVGLB.
+define <16 x i8> @test_vavglb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vavglb:
+; CHECK: vavglb %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vavglb(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VAVGLH.
+define <8 x i16> @test_vavglh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vavglh:
+; CHECK: vavglh %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vavglh(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %res
+}
+
+; VAVGLF.
+define <4 x i32> @test_vavglf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vavglf:
+; CHECK: vavglf %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vavglf(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %res
+}
+
+; VAVGLG.
+define <2 x i64> @test_vavglg(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vavglg:
+; CHECK: vavglg %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vavglg(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %res
+}
+
+; VCKSM.
+define <4 x i32> @test_vcksm(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vcksm:
+; CHECK: vcksm %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vcksm(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %res
+}
+
+; VGFMB.
+define <8 x i16> @test_vgfmb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vgfmb:
+; CHECK: vgfmb %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vgfmb(<16 x i8> %a, <16 x i8> %b)
+ ret <8 x i16> %res
+}
+
+; VGFMH.
+define <4 x i32> @test_vgfmh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vgfmh:
+; CHECK: vgfmh %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vgfmh(<8 x i16> %a, <8 x i16> %b)
+ ret <4 x i32> %res
+}
+
+; VGFMF.
+define <2 x i64> @test_vgfmf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vgfmf:
+; CHECK: vgfmf %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vgfmf(<4 x i32> %a, <4 x i32> %b)
+ ret <2 x i64> %res
+}
+
+; VGFMG.
+define <16 x i8> @test_vgfmg(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vgfmg:
+; CHECK: vgfmg %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vgfmg(<2 x i64> %a, <2 x i64> %b)
+ ret <16 x i8> %res
+}
+
+; VGFMAB.
+define <8 x i16> @test_vgfmab(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vgfmab:
+; CHECK: vgfmab %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vgfmab(<16 x i8> %a, <16 x i8> %b,
+ <8 x i16> %c)
+ ret <8 x i16> %res
+}
+
+; VGFMAH.
+define <4 x i32> @test_vgfmah(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vgfmah:
+; CHECK: vgfmah %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vgfmah(<8 x i16> %a, <8 x i16> %b,
+ <4 x i32> %c)
+ ret <4 x i32> %res
+}
+
+; VGFMAF.
+define <2 x i64> @test_vgfmaf(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) {
+; CHECK-LABEL: test_vgfmaf:
+; CHECK: vgfmaf %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vgfmaf(<4 x i32> %a, <4 x i32> %b,
+ <2 x i64> %c)
+ ret <2 x i64> %res
+}
+
+; VGFMAG.
+define <16 x i8> @test_vgfmag(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vgfmag:
+; CHECK: vgfmag %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vgfmag(<2 x i64> %a, <2 x i64> %b,
+ <16 x i8> %c)
+ ret <16 x i8> %res
+}
+
+; VMAHB.
+define <16 x i8> @test_vmahb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vmahb:
+; CHECK: vmahb %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vmahb(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c)
+ ret <16 x i8> %res
+}
+
+; VMAHH.
+define <8 x i16> @test_vmahh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vmahh:
+; CHECK: vmahh %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vmahh(<8 x i16> %a, <8 x i16> %b,
+ <8 x i16> %c)
+ ret <8 x i16> %res
+}
+
+; VMAHF.
+define <4 x i32> @test_vmahf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vmahf:
+; CHECK: vmahf %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vmahf(<4 x i32> %a, <4 x i32> %b,
+ <4 x i32> %c)
+ ret <4 x i32> %res
+}
+
+; VMALHB.
+define <16 x i8> @test_vmalhb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vmalhb:
+; CHECK: vmalhb %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vmalhb(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c)
+ ret <16 x i8> %res
+}
+
+; VMALHH.
+define <8 x i16> @test_vmalhh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vmalhh:
+; CHECK: vmalhh %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vmalhh(<8 x i16> %a, <8 x i16> %b,
+ <8 x i16> %c)
+ ret <8 x i16> %res
+}
+
+; VMALHF.
+define <4 x i32> @test_vmalhf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vmalhf:
+; CHECK: vmalhf %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vmalhf(<4 x i32> %a, <4 x i32> %b,
+ <4 x i32> %c)
+ ret <4 x i32> %res
+}
+
+; VMAEB.
+define <8 x i16> @test_vmaeb(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vmaeb:
+; CHECK: vmaeb %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vmaeb(<16 x i8> %a, <16 x i8> %b,
+ <8 x i16> %c)
+ ret <8 x i16> %res
+}
+
+; VMAEH.
+define <4 x i32> @test_vmaeh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vmaeh:
+; CHECK: vmaeh %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vmaeh(<8 x i16> %a, <8 x i16> %b,
+ <4 x i32> %c)
+ ret <4 x i32> %res
+}
+
+; VMAEF.
+define <2 x i64> @test_vmaef(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) {
+; CHECK-LABEL: test_vmaef:
+; CHECK: vmaef %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vmaef(<4 x i32> %a, <4 x i32> %b,
+ <2 x i64> %c)
+ ret <2 x i64> %res
+}
+
+; VMALEB.
+define <8 x i16> @test_vmaleb(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vmaleb:
+; CHECK: vmaleb %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vmaleb(<16 x i8> %a, <16 x i8> %b,
+ <8 x i16> %c)
+ ret <8 x i16> %res
+}
+
+; VMALEH.
+define <4 x i32> @test_vmaleh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vmaleh:
+; CHECK: vmaleh %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vmaleh(<8 x i16> %a, <8 x i16> %b,
+ <4 x i32> %c)
+ ret <4 x i32> %res
+}
+
+; VMALEF.
+define <2 x i64> @test_vmalef(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) {
+; CHECK-LABEL: test_vmalef:
+; CHECK: vmalef %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vmalef(<4 x i32> %a, <4 x i32> %b,
+ <2 x i64> %c)
+ ret <2 x i64> %res
+}
+
+; VMAOB.
+define <8 x i16> @test_vmaob(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vmaob:
+; CHECK: vmaob %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vmaob(<16 x i8> %a, <16 x i8> %b,
+ <8 x i16> %c)
+ ret <8 x i16> %res
+}
+
+; VMAOH.
+define <4 x i32> @test_vmaoh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vmaoh:
+; CHECK: vmaoh %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vmaoh(<8 x i16> %a, <8 x i16> %b,
+ <4 x i32> %c)
+ ret <4 x i32> %res
+}
+
+; VMAOF.
+define <2 x i64> @test_vmaof(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) {
+; CHECK-LABEL: test_vmaof:
+; CHECK: vmaof %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vmaof(<4 x i32> %a, <4 x i32> %b,
+ <2 x i64> %c)
+ ret <2 x i64> %res
+}
+
+; VMALOB.
+define <8 x i16> @test_vmalob(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vmalob:
+; CHECK: vmalob %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vmalob(<16 x i8> %a, <16 x i8> %b,
+ <8 x i16> %c)
+ ret <8 x i16> %res
+}
+
+; VMALOH.
+define <4 x i32> @test_vmaloh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vmaloh:
+; CHECK: vmaloh %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vmaloh(<8 x i16> %a, <8 x i16> %b,
+ <4 x i32> %c)
+ ret <4 x i32> %res
+}
+
+; VMALOF.
+define <2 x i64> @test_vmalof(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) {
+; CHECK-LABEL: test_vmalof:
+; CHECK: vmalof %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vmalof(<4 x i32> %a, <4 x i32> %b,
+ <2 x i64> %c)
+ ret <2 x i64> %res
+}
+
+; VMHB.
+define <16 x i8> @test_vmhb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vmhb:
+; CHECK: vmhb %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vmhb(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VMHH.
+define <8 x i16> @test_vmhh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vmhh:
+; CHECK: vmhh %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vmhh(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %res
+}
+
+; VMHF.
+define <4 x i32> @test_vmhf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmhf:
+; CHECK: vmhf %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vmhf(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %res
+}
+
+; VMLHB.
+define <16 x i8> @test_vmlhb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vmlhb:
+; CHECK: vmlhb %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vmlhb(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VMLHH.
+define <8 x i16> @test_vmlhh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vmlhh:
+; CHECK: vmlhh %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vmlhh(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %res
+}
+
+; VMLHF.
+define <4 x i32> @test_vmlhf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmlhf:
+; CHECK: vmlhf %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vmlhf(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %res
+}
+
+; VMEB.
+define <8 x i16> @test_vmeb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vmeb:
+; CHECK: vmeb %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vmeb(<16 x i8> %a, <16 x i8> %b)
+ ret <8 x i16> %res
+}
+
+; VMEH.
+define <4 x i32> @test_vmeh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vmeh:
+; CHECK: vmeh %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vmeh(<8 x i16> %a, <8 x i16> %b)
+ ret <4 x i32> %res
+}
+
+; VMEF.
+define <2 x i64> @test_vmef(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmef:
+; CHECK: vmef %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vmef(<4 x i32> %a, <4 x i32> %b)
+ ret <2 x i64> %res
+}
+
+; VMLEB.
+define <8 x i16> @test_vmleb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vmleb:
+; CHECK: vmleb %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vmleb(<16 x i8> %a, <16 x i8> %b)
+ ret <8 x i16> %res
+}
+
+; VMLEH.
+define <4 x i32> @test_vmleh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vmleh:
+; CHECK: vmleh %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vmleh(<8 x i16> %a, <8 x i16> %b)
+ ret <4 x i32> %res
+}
+
+; VMLEF.
+define <2 x i64> @test_vmlef(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmlef:
+; CHECK: vmlef %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vmlef(<4 x i32> %a, <4 x i32> %b)
+ ret <2 x i64> %res
+}
+
+; VMOB.
+define <8 x i16> @test_vmob(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vmob:
+; CHECK: vmob %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vmob(<16 x i8> %a, <16 x i8> %b)
+ ret <8 x i16> %res
+}
+
+; VMOH.
+define <4 x i32> @test_vmoh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vmoh:
+; CHECK: vmoh %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vmoh(<8 x i16> %a, <8 x i16> %b)
+ ret <4 x i32> %res
+}
+
+; VMOF.
+define <2 x i64> @test_vmof(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmof:
+; CHECK: vmof %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vmof(<4 x i32> %a, <4 x i32> %b)
+ ret <2 x i64> %res
+}
+
+; VMLOB.
+define <8 x i16> @test_vmlob(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vmlob:
+; CHECK: vmlob %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vmlob(<16 x i8> %a, <16 x i8> %b)
+ ret <8 x i16> %res
+}
+
+; VMLOH.
+define <4 x i32> @test_vmloh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vmloh:
+; CHECK: vmloh %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vmloh(<8 x i16> %a, <8 x i16> %b)
+ ret <4 x i32> %res
+}
+
+; VMLOF.
+define <2 x i64> @test_vmlof(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmlof:
+; CHECK: vmlof %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vmlof(<4 x i32> %a, <4 x i32> %b)
+ ret <2 x i64> %res
+}
+
+; VERLLVB.
+define <16 x i8> @test_verllvb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_verllvb:
+; CHECK: verllvb %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.verllvb(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VERLLVH.
+define <8 x i16> @test_verllvh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_verllvh:
+; CHECK: verllvh %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.verllvh(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %res
+}
+
+; VERLLVF.
+define <4 x i32> @test_verllvf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_verllvf:
+; CHECK: verllvf %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.verllvf(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %res
+}
+
+; VERLLVG.
+define <2 x i64> @test_verllvg(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_verllvg:
+; CHECK: verllvg %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.verllvg(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %res
+}
+
+; VERLLB.
+define <16 x i8> @test_verllb(<16 x i8> %a, i32 %b) {
+; CHECK-LABEL: test_verllb:
+; CHECK: verllb %v24, %v24, 0(%r2)
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 %b)
+ ret <16 x i8> %res
+}
+
+; VERLLH.
+define <8 x i16> @test_verllh(<8 x i16> %a, i32 %b) {
+; CHECK-LABEL: test_verllh:
+; CHECK: verllh %v24, %v24, 0(%r2)
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.verllh(<8 x i16> %a, i32 %b)
+ ret <8 x i16> %res
+}
+
+; VERLLF.
+define <4 x i32> @test_verllf(<4 x i32> %a, i32 %b) {
+; CHECK-LABEL: test_verllf:
+; CHECK: verllf %v24, %v24, 0(%r2)
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.verllf(<4 x i32> %a, i32 %b)
+ ret <4 x i32> %res
+}
+
+; VERLLG.
+define <2 x i64> @test_verllg(<2 x i64> %a, i32 %b) {
+; CHECK-LABEL: test_verllg:
+; CHECK: verllg %v24, %v24, 0(%r2)
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.verllg(<2 x i64> %a, i32 %b)
+ ret <2 x i64> %res
+}
+
+; VERLLB with the smallest count.
+define <16 x i8> @test_verllb_1(<16 x i8> %a) {
+; CHECK-LABEL: test_verllb_1:
+; CHECK: verllb %v24, %v24, 1
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 1)
+ ret <16 x i8> %res
+}
+
+; VERLLB with the largest count.
+define <16 x i8> @test_verllb_4095(<16 x i8> %a) {
+; CHECK-LABEL: test_verllb_4095:
+; CHECK: verllb %v24, %v24, 4095
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 4095)
+ ret <16 x i8> %res
+}
+
+; VERLLB with the largest count + 1.
+define <16 x i8> @test_verllb_4096(<16 x i8> %a) {
+; CHECK-LABEL: test_verllb_4096:
+; CHECK: lhi [[REG:%r[1-5]]], 4096
+; CHECK: verllb %v24, %v24, 0([[REG]])
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 4096)
+ ret <16 x i8> %res
+}
+
+; VERIMB.
+define <16 x i8> @test_verimb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_verimb:
+; CHECK: verimb %v24, %v26, %v28, 1
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.verimb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, i32 1)
+ ret <16 x i8> %res
+}
+
+; VERIMH.
+define <8 x i16> @test_verimh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_verimh:
+; CHECK: verimh %v24, %v26, %v28, 1
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.verimh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, i32 1)
+ ret <8 x i16> %res
+}
+
+; VERIMF.
+define <4 x i32> @test_verimf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_verimf:
+; CHECK: verimf %v24, %v26, %v28, 1
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.verimf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, i32 1)
+ ret <4 x i32> %res
+}
+
+; VERIMG.
+define <2 x i64> @test_verimg(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
+; CHECK-LABEL: test_verimg:
+; CHECK: verimg %v24, %v26, %v28, 1
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.verimg(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, i32 1)
+ ret <2 x i64> %res
+}
+
+; VERIMB with a different mask.
+define <16 x i8> @test_verimb_254(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_verimb_254:
+; CHECK: verimb %v24, %v26, %v28, 254
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.verimb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, i32 254)
+ ret <16 x i8> %res
+}
+
+; VSL.
+define <16 x i8> @test_vsl(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsl:
+; CHECK: vsl %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vsl(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VSLB.
+define <16 x i8> @test_vslb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vslb:
+; CHECK: vslb %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vslb(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VSRA.
+define <16 x i8> @test_vsra(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsra:
+; CHECK: vsra %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vsra(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VSRAB.
+define <16 x i8> @test_vsrab(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsrab:
+; CHECK: vsrab %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vsrab(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VSRL.
+define <16 x i8> @test_vsrl(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsrl:
+; CHECK: vsrl %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vsrl(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VSRLB.
+define <16 x i8> @test_vsrlb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsrlb:
+; CHECK: vsrlb %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VSLDB with the minimum useful value.
+define <16 x i8> @test_vsldb_1(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsldb_1:
+; CHECK: vsldb %v24, %v24, %v26, 1
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vsldb(<16 x i8> %a, <16 x i8> %b, i32 1)
+ ret <16 x i8> %res
+}
+
+; VSLDB with the maximum value.
+define <16 x i8> @test_vsldb_15(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsldb_15:
+; CHECK: vsldb %v24, %v24, %v26, 15
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vsldb(<16 x i8> %a, <16 x i8> %b, i32 15)
+ ret <16 x i8> %res
+}
+
+; VSCBIB.
+define <16 x i8> @test_vscbib(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vscbib:
+; CHECK: vscbib %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vscbib(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VSCBIH.
+define <8 x i16> @test_vscbih(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vscbih:
+; CHECK: vscbih %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vscbih(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %res
+}
+
+; VSCBIF.
+define <4 x i32> @test_vscbif(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vscbif:
+; CHECK: vscbif %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vscbif(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %res
+}
+
+; VSCBIG.
+define <2 x i64> @test_vscbig(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vscbig:
+; CHECK: vscbig %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vscbig(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %res
+}
+
+; VSQ.
+define <16 x i8> @test_vsq(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsq:
+; CHECK: vsq %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vsq(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VSBIQ.
+define <16 x i8> @test_vsbiq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vsbiq:
+; CHECK: vsbiq %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vsbiq(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c)
+ ret <16 x i8> %res
+}
+
+; VSCBIQ.
+define <16 x i8> @test_vscbiq(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vscbiq:
+; CHECK: vscbiq %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vscbiq(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VSBCBIQ.
+define <16 x i8> @test_vsbcbiq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vsbcbiq:
+; CHECK: vsbcbiq %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vsbcbiq(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c)
+ ret <16 x i8> %res
+}
+
+; VSUMB.
+define <4 x i32> @test_vsumb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsumb:
+; CHECK: vsumb %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vsumb(<16 x i8> %a, <16 x i8> %b)
+ ret <4 x i32> %res
+}
+
+; VSUMH.
+define <4 x i32> @test_vsumh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vsumh:
+; CHECK: vsumh %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vsumh(<8 x i16> %a, <8 x i16> %b)
+ ret <4 x i32> %res
+}
+
+; VSUMGH.
+define <2 x i64> @test_vsumgh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vsumgh:
+; CHECK: vsumgh %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vsumgh(<8 x i16> %a, <8 x i16> %b)
+ ret <2 x i64> %res
+}
+
+; VSUMGF.
+define <2 x i64> @test_vsumgf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vsumgf:
+; CHECK: vsumgf %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <2 x i64> @llvm.s390.vsumgf(<4 x i32> %a, <4 x i32> %b)
+ ret <2 x i64> %res
+}
+
+; VSUMQF.
+define <16 x i8> @test_vsumqf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vsumqf:
+; CHECK: vsumqf %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vsumqf(<4 x i32> %a, <4 x i32> %b)
+ ret <16 x i8> %res
+}
+
+; VSUMQG.
+define <16 x i8> @test_vsumqg(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vsumqg:
+; CHECK: vsumqg %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vsumqg(<2 x i64> %a, <2 x i64> %b)
+ ret <16 x i8> %res
+}
+
+; VTM with no processing of the result.
+define i32 @test_vtm(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vtm:
+; CHECK: vtm %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.vtm(<16 x i8> %a, <16 x i8> %b)
+ ret i32 %res
+}
+
+; VTM, storing to %ptr if all bits are set.
+define void @test_vtm_all_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) {
+; CHECK-LABEL: test_vtm_all_store:
+; CHECK-NOT: %r
+; CHECK: vtm %v24, %v26
+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %res = call i32 @llvm.s390.vtm(<16 x i8> %a, <16 x i8> %b)
+ %cmp = icmp sge i32 %res, 3
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret void
+}
+
+; VCEQBS with no processing of the result.
+define i32 @test_vceqbs(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vceqbs:
+; CHECK: vceqbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8> %a, <16 x i8> %b)
+ %res = extractvalue {<16 x i8>, i32} %call, 1
+ ret i32 %res
+}
+
+; VCEQBS, returning 1 if any elements are equal (CC != 3).
+define i32 @test_vceqbs_any_bool(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vceqbs_any_bool:
+; CHECK: vceqbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: afi %r2, -536870912
+; CHECK: srl %r2, 31
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8> %a, <16 x i8> %b)
+ %res = extractvalue {<16 x i8>, i32} %call, 1
+ %cmp = icmp ne i32 %res, 3
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; VCEQBS, storing to %ptr if any elements are equal.
+define <16 x i8> @test_vceqbs_any_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) {
+; CHECK-LABEL: test_vceqbs_any_store:
+; CHECK-NOT: %r
+; CHECK: vceqbs %v24, %v24, %v26
+; CHECK-NEXT: {{jo|jnle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8> %a, <16 x i8> %b)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ %cmp = icmp ule i32 %cc, 2
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <16 x i8> %res
+}
+
+; VCEQHS with no processing of the result.
+define i32 @test_vceqhs(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vceqhs:
+; CHECK: vceqhs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16> %a, <8 x i16> %b)
+ %res = extractvalue {<8 x i16>, i32} %call, 1
+ ret i32 %res
+}
+
+; VCEQHS, returning 1 if not all elements are equal.
+define i32 @test_vceqhs_notall_bool(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vceqhs_notall_bool:
+; CHECK: vceqhs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: risblg %r2, [[REG]], 31, 159, 36
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16> %a, <8 x i16> %b)
+ %res = extractvalue {<8 x i16>, i32} %call, 1
+ %cmp = icmp sge i32 %res, 1
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; VCEQHS, storing to %ptr if not all elements are equal.
+define <8 x i16> @test_vceqhs_notall_store(<8 x i16> %a, <8 x i16> %b,
+ i32 *%ptr) {
+; CHECK-LABEL: test_vceqhs_notall_store:
+; CHECK-NOT: %r
+; CHECK: vceqhs %v24, %v24, %v26
+; CHECK-NEXT: {{jhe|je}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16> %a, <8 x i16> %b)
+ %res = extractvalue {<8 x i16>, i32} %call, 0
+ %cc = extractvalue {<8 x i16>, i32} %call, 1
+ %cmp = icmp ugt i32 %cc, 0
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <8 x i16> %res
+}
+
+; VCEQFS with no processing of the result.
+define i32 @test_vceqfs(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vceqfs:
+; CHECK: vceqfs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32> %a, <4 x i32> %b)
+ %res = extractvalue {<4 x i32>, i32} %call, 1
+ ret i32 %res
+}
+
+; VCEQFS, returning 1 if no elements are equal.
+define i32 @test_vceqfs_none_bool(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vceqfs_none_bool:
+; CHECK: vceqfs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: risblg %r2, [[REG]], 31, 159, 35
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32> %a, <4 x i32> %b)
+ %res = extractvalue {<4 x i32>, i32} %call, 1
+ %cmp = icmp eq i32 %res, 3
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; VCEQFS, storing to %ptr if no elements are equal.
+define <4 x i32> @test_vceqfs_none_store(<4 x i32> %a, <4 x i32> %b,
+ i32 *%ptr) {
+; CHECK-LABEL: test_vceqfs_none_store:
+; CHECK-NOT: %r
+; CHECK: vceqfs %v24, %v24, %v26
+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32> %a, <4 x i32> %b)
+ %res = extractvalue {<4 x i32>, i32} %call, 0
+ %cc = extractvalue {<4 x i32>, i32} %call, 1
+ %cmp = icmp uge i32 %cc, 3
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <4 x i32> %res
+}
+
+; VCEQGS with no processing of the result.
+define i32 @test_vceqgs(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vceqgs:
+; CHECK: vceqgs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %call = call {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64> %a, <2 x i64> %b)
+ %res = extractvalue {<2 x i64>, i32} %call, 1
+ ret i32 %res
+}
+
+; VCEQGS returning 1 if all elements are equal (CC == 0).
+define i32 @test_vceqgs_all_bool(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vceqgs_all_bool:
+; CHECK: vceqgs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: afi %r2, -268435456
+; CHECK: srl %r2, 31
+; CHECK: br %r14
+ %call = call {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64> %a, <2 x i64> %b)
+ %res = extractvalue {<2 x i64>, i32} %call, 1
+ %cmp = icmp ult i32 %res, 1
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; VCEQGS, storing to %ptr if all elements are equal.
+define <2 x i64> @test_vceqgs_all_store(<2 x i64> %a, <2 x i64> %b, i32 *%ptr) {
+; CHECK-LABEL: test_vceqgs_all_store:
+; CHECK-NOT: %r
+; CHECK: vceqgs %v24, %v24, %v26
+; CHECK-NEXT: {{jnhe|jne}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64> %a, <2 x i64> %b)
+ %res = extractvalue {<2 x i64>, i32} %call, 0
+ %cc = extractvalue {<2 x i64>, i32} %call, 1
+ %cmp = icmp sle i32 %cc, 0
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <2 x i64> %res
+}
+
+; VCHBS with no processing of the result.
+define i32 @test_vchbs(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vchbs:
+; CHECK: vchbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8> %a, <16 x i8> %b)
+ %res = extractvalue {<16 x i8>, i32} %call, 1
+ ret i32 %res
+}
+
+; VCHBS, returning 1 if any elements are higher (CC != 3).
+define i32 @test_vchbs_any_bool(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vchbs_any_bool:
+; CHECK: vchbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: afi %r2, -536870912
+; CHECK: srl %r2, 31
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8> %a, <16 x i8> %b)
+ %res = extractvalue {<16 x i8>, i32} %call, 1
+ %cmp = icmp ne i32 %res, 3
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; VCHBS, storing to %ptr if any elements are higher.
+define <16 x i8> @test_vchbs_any_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) {
+; CHECK-LABEL: test_vchbs_any_store:
+; CHECK-NOT: %r
+; CHECK: vchbs %v24, %v24, %v26
+; CHECK-NEXT: {{jo|jnle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8> %a, <16 x i8> %b)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ %cmp = icmp ule i32 %cc, 2
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <16 x i8> %res
+}
+
+; VCHHS with no processing of the result.
+define i32 @test_vchhs(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vchhs:
+; CHECK: vchhs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16> %a, <8 x i16> %b)
+ %res = extractvalue {<8 x i16>, i32} %call, 1
+ ret i32 %res
+}
+
+; VCHHS, returning 1 if not all elements are higher.
+define i32 @test_vchhs_notall_bool(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vchhs_notall_bool:
+; CHECK: vchhs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: risblg %r2, [[REG]], 31, 159, 36
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16> %a, <8 x i16> %b)
+ %res = extractvalue {<8 x i16>, i32} %call, 1
+ %cmp = icmp sge i32 %res, 1
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; VCHHS, storing to %ptr if not all elements are higher.
+define <8 x i16> @test_vchhs_notall_store(<8 x i16> %a, <8 x i16> %b,
+ i32 *%ptr) {
+; CHECK-LABEL: test_vchhs_notall_store:
+; CHECK-NOT: %r
+; CHECK: vchhs %v24, %v24, %v26
+; CHECK-NEXT: {{jhe|je}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16> %a, <8 x i16> %b)
+ %res = extractvalue {<8 x i16>, i32} %call, 0
+ %cc = extractvalue {<8 x i16>, i32} %call, 1
+ %cmp = icmp ugt i32 %cc, 0
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <8 x i16> %res
+}
+
+; VCHFS with no processing of the result.
+define i32 @test_vchfs(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vchfs:
+; CHECK: vchfs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32> %a, <4 x i32> %b)
+ %res = extractvalue {<4 x i32>, i32} %call, 1
+ ret i32 %res
+}
+
+; VCHFS, returning 1 if no elements are higher.
+define i32 @test_vchfs_none_bool(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vchfs_none_bool:
+; CHECK: vchfs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: risblg %r2, [[REG]], 31, 159, 35
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32> %a, <4 x i32> %b)
+ %res = extractvalue {<4 x i32>, i32} %call, 1
+ %cmp = icmp eq i32 %res, 3
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; VCHFS, storing to %ptr if no elements are higher.
+define <4 x i32> @test_vchfs_none_store(<4 x i32> %a, <4 x i32> %b, i32 *%ptr) {
+; CHECK-LABEL: test_vchfs_none_store:
+; CHECK-NOT: %r
+; CHECK: vchfs %v24, %v24, %v26
+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32> %a, <4 x i32> %b)
+ %res = extractvalue {<4 x i32>, i32} %call, 0
+ %cc = extractvalue {<4 x i32>, i32} %call, 1
+ %cmp = icmp uge i32 %cc, 3
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <4 x i32> %res
+}
+
+; VCHGS with no processing of the result.
+define i32 @test_vchgs(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vchgs:
+; CHECK: vchgs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %call = call {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64> %a, <2 x i64> %b)
+ %res = extractvalue {<2 x i64>, i32} %call, 1
+ ret i32 %res
+}
+
+; VCHGS returning 1 if all elements are higher (CC == 0).
+define i32 @test_vchgs_all_bool(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vchgs_all_bool:
+; CHECK: vchgs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: afi %r2, -268435456
+; CHECK: srl %r2, 31
+; CHECK: br %r14
+ %call = call {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64> %a, <2 x i64> %b)
+ %res = extractvalue {<2 x i64>, i32} %call, 1
+ %cmp = icmp ult i32 %res, 1
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; VCHGS, storing to %ptr if all elements are higher.
+define <2 x i64> @test_vchgs_all_store(<2 x i64> %a, <2 x i64> %b, i32 *%ptr) {
+; CHECK-LABEL: test_vchgs_all_store:
+; CHECK-NOT: %r
+; CHECK: vchgs %v24, %v24, %v26
+; CHECK-NEXT: {{jnhe|jne}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64> %a, <2 x i64> %b)
+ %res = extractvalue {<2 x i64>, i32} %call, 0
+ %cc = extractvalue {<2 x i64>, i32} %call, 1
+ %cmp = icmp sle i32 %cc, 0
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <2 x i64> %res
+}
+
+; VCHLBS with no processing of the result.
+define i32 @test_vchlbs(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vchlbs:
+; CHECK: vchlbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8> %a, <16 x i8> %b)
+ %res = extractvalue {<16 x i8>, i32} %call, 1
+ ret i32 %res
+}
+
+; VCHLBS, returning 1 if any elements are higher (CC != 3).
+define i32 @test_vchlbs_any_bool(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vchlbs_any_bool:
+; CHECK: vchlbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: afi %r2, -536870912
+; CHECK: srl %r2, 31
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8> %a, <16 x i8> %b)
+ %res = extractvalue {<16 x i8>, i32} %call, 1
+ %cmp = icmp ne i32 %res, 3
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; VCHLBS, storing to %ptr if any elements are higher.
+define <16 x i8> @test_vchlbs_any_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) {
+; CHECK-LABEL: test_vchlbs_any_store:
+; CHECK-NOT: %r
+; CHECK: vchlbs %v24, %v24, %v26
+; CHECK-NEXT: {{jo|jnle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8> %a, <16 x i8> %b)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ %cmp = icmp sle i32 %cc, 2
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <16 x i8> %res
+}
+
+; VCHLHS with no processing of the result.
+define i32 @test_vchlhs(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vchlhs:
+; CHECK: vchlhs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16> %a, <8 x i16> %b)
+ %res = extractvalue {<8 x i16>, i32} %call, 1
+ ret i32 %res
+}
+
+; VCHLHS, returning 1 if not all elements are higher.
+define i32 @test_vchlhs_notall_bool(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vchlhs_notall_bool:
+; CHECK: vchlhs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: risblg %r2, [[REG]], 31, 159, 36
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16> %a, <8 x i16> %b)
+ %res = extractvalue {<8 x i16>, i32} %call, 1
+ %cmp = icmp uge i32 %res, 1
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; VCHLHS, storing to %ptr if not all elements are higher.
+define <8 x i16> @test_vchlhs_notall_store(<8 x i16> %a, <8 x i16> %b,
+ i32 *%ptr) {
+; CHECK-LABEL: test_vchlhs_notall_store:
+; CHECK-NOT: %r
+; CHECK: vchlhs %v24, %v24, %v26
+; CHECK-NEXT: {{jhe|je}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16> %a, <8 x i16> %b)
+ %res = extractvalue {<8 x i16>, i32} %call, 0
+ %cc = extractvalue {<8 x i16>, i32} %call, 1
+ %cmp = icmp sgt i32 %cc, 0
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <8 x i16> %res
+}
+
+; VCHLFS with no processing of the result.
+define i32 @test_vchlfs(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vchlfs:
+; CHECK: vchlfs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32> %a, <4 x i32> %b)
+ %res = extractvalue {<4 x i32>, i32} %call, 1
+ ret i32 %res
+}
+
+; VCHLFS, returning 1 if no elements are higher.
+define i32 @test_vchlfs_none_bool(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vchlfs_none_bool:
+; CHECK: vchlfs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: risblg %r2, [[REG]], 31, 159, 35
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32> %a, <4 x i32> %b)
+ %res = extractvalue {<4 x i32>, i32} %call, 1
+ %cmp = icmp eq i32 %res, 3
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; VCHLFS, storing to %ptr if no elements are higher.
+define <4 x i32> @test_vchlfs_none_store(<4 x i32> %a, <4 x i32> %b,
+ i32 *%ptr) {
+; CHECK-LABEL: test_vchlfs_none_store:
+; CHECK-NOT: %r
+; CHECK: vchlfs %v24, %v24, %v26
+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32> %a, <4 x i32> %b)
+ %res = extractvalue {<4 x i32>, i32} %call, 0
+ %cc = extractvalue {<4 x i32>, i32} %call, 1
+ %cmp = icmp sge i32 %cc, 3
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <4 x i32> %res
+}
+
+; VCHLGS with no processing of the result.
+define i32 @test_vchlgs(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vchlgs:
+; CHECK: vchlgs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %call = call {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64> %a, <2 x i64> %b)
+ %res = extractvalue {<2 x i64>, i32} %call, 1
+ ret i32 %res
+}
+
+; VCHLGS returning 1 if all elements are higher (CC == 0).
+define i32 @test_vchlgs_all_bool(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vchlgs_all_bool:
+; CHECK: vchlgs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: afi %r2, -268435456
+; CHECK: srl %r2, 31
+; CHECK: br %r14
+ %call = call {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64> %a, <2 x i64> %b)
+ %res = extractvalue {<2 x i64>, i32} %call, 1
+ %cmp = icmp slt i32 %res, 1
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; VCHLGS, storing to %ptr if all elements are higher.
+define <2 x i64> @test_vchlgs_all_store(<2 x i64> %a, <2 x i64> %b, i32 *%ptr) {
+; CHECK-LABEL: test_vchlgs_all_store:
+; CHECK-NOT: %r
+; CHECK: vchlgs %v24, %v24, %v26
+; CHECK-NEXT: {{jnhe|jne}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64> %a, <2 x i64> %b)
+ %res = extractvalue {<2 x i64>, i32} %call, 0
+ %cc = extractvalue {<2 x i64>, i32} %call, 1
+ %cmp = icmp ule i32 %cc, 0
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <2 x i64> %res
+}
+
+; VFAEB with !IN !RT.
+define <16 x i8> @test_vfaeb_0(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfaeb_0:
+; CHECK: vfaeb %v24, %v24, %v26, 0
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 0)
+ ret <16 x i8> %res
+}
+
+; VFAEB with !IN RT.
+define <16 x i8> @test_vfaeb_4(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfaeb_4:
+; CHECK: vfaeb %v24, %v24, %v26, 4
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 4)
+ ret <16 x i8> %res
+}
+
+; VFAEB with IN !RT.
+define <16 x i8> @test_vfaeb_8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfaeb_8:
+; CHECK: vfaeb %v24, %v24, %v26, 8
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 8)
+ ret <16 x i8> %res
+}
+
+; VFAEB with IN RT.
+define <16 x i8> @test_vfaeb_12(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfaeb_12:
+; CHECK: vfaeb %v24, %v24, %v26, 12
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 12)
+ ret <16 x i8> %res
+}
+
+; VFAEB with CS -- should be ignored.
+define <16 x i8> @test_vfaeb_1(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfaeb_1:
+; CHECK: vfaeb %v24, %v24, %v26, 0
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 1)
+ ret <16 x i8> %res
+}
+
+; VFAEH.
+define <8 x i16> @test_vfaeh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vfaeh:
+; CHECK: vfaeh %v24, %v24, %v26, 4
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vfaeh(<8 x i16> %a, <8 x i16> %b, i32 4)
+ ret <8 x i16> %res
+}
+
+; VFAEF.
+define <4 x i32> @test_vfaef(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vfaef:
+; CHECK: vfaef %v24, %v24, %v26, 8
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vfaef(<4 x i32> %a, <4 x i32> %b, i32 8)
+ ret <4 x i32> %res
+}
+
+; VFAEBS.
+define <16 x i8> @test_vfaebs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfaebs:
+; CHECK: vfaebs %v24, %v24, %v26, 0
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vfaebs(<16 x i8> %a, <16 x i8> %b,
+ i32 0)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <16 x i8> %res
+}
+
+; VFAEHS.
+define <8 x i16> @test_vfaehs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfaehs:
+; CHECK: vfaehs %v24, %v24, %v26, 4
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vfaehs(<8 x i16> %a, <8 x i16> %b,
+ i32 4)
+ %res = extractvalue {<8 x i16>, i32} %call, 0
+ %cc = extractvalue {<8 x i16>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <8 x i16> %res
+}
+
+; VFAEFS.
+define <4 x i32> @test_vfaefs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfaefs:
+; CHECK: vfaefs %v24, %v24, %v26, 8
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vfaefs(<4 x i32> %a, <4 x i32> %b,
+ i32 8)
+ %res = extractvalue {<4 x i32>, i32} %call, 0
+ %cc = extractvalue {<4 x i32>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <4 x i32> %res
+}
+
+; VFAEZB with !IN !RT.
+define <16 x i8> @test_vfaezb_0(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfaezb_0:
+; CHECK: vfaezb %v24, %v24, %v26, 0
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 0)
+ ret <16 x i8> %res
+}
+
+; VFAEZB with !IN RT.
+define <16 x i8> @test_vfaezb_4(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfaezb_4:
+; CHECK: vfaezb %v24, %v24, %v26, 4
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 4)
+ ret <16 x i8> %res
+}
+
+; VFAEZB with IN !RT.
+define <16 x i8> @test_vfaezb_8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfaezb_8:
+; CHECK: vfaezb %v24, %v24, %v26, 8
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 8)
+ ret <16 x i8> %res
+}
+
+; VFAEZB with IN RT.
+define <16 x i8> @test_vfaezb_12(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfaezb_12:
+; CHECK: vfaezb %v24, %v24, %v26, 12
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 12)
+ ret <16 x i8> %res
+}
+
+; VFAEZB with CS -- should be ignored.
+define <16 x i8> @test_vfaezb_1(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfaezb_1:
+; CHECK: vfaezb %v24, %v24, %v26, 0
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 1)
+ ret <16 x i8> %res
+}
+
+; VFAEZH.
+define <8 x i16> @test_vfaezh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vfaezh:
+; CHECK: vfaezh %v24, %v24, %v26, 4
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vfaezh(<8 x i16> %a, <8 x i16> %b, i32 4)
+ ret <8 x i16> %res
+}
+
+; VFAEZF.
+define <4 x i32> @test_vfaezf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vfaezf:
+; CHECK: vfaezf %v24, %v24, %v26, 8
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vfaezf(<4 x i32> %a, <4 x i32> %b, i32 8)
+ ret <4 x i32> %res
+}
+
+; VFAEZBS.
+define <16 x i8> @test_vfaezbs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfaezbs:
+; CHECK: vfaezbs %v24, %v24, %v26, 0
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vfaezbs(<16 x i8> %a, <16 x i8> %b,
+ i32 0)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <16 x i8> %res
+}
+
+; VFAEZHS.
+define <8 x i16> @test_vfaezhs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfaezhs:
+; CHECK: vfaezhs %v24, %v24, %v26, 4
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vfaezhs(<8 x i16> %a, <8 x i16> %b,
+ i32 4)
+ %res = extractvalue {<8 x i16>, i32} %call, 0
+ %cc = extractvalue {<8 x i16>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <8 x i16> %res
+}
+
+; VFAEZFS.
+define <4 x i32> @test_vfaezfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfaezfs:
+; CHECK: vfaezfs %v24, %v24, %v26, 8
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vfaezfs(<4 x i32> %a, <4 x i32> %b,
+ i32 8)
+ %res = extractvalue {<4 x i32>, i32} %call, 0
+ %cc = extractvalue {<4 x i32>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <4 x i32> %res
+}
+
+; VFEEB.
+define <16 x i8> @test_vfeeb_0(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfeeb_0:
+; CHECK: vfeeb %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vfeeb(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VFEEH.
+define <8 x i16> @test_vfeeh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vfeeh:
+; CHECK: vfeeh %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vfeeh(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %res
+}
+
+; VFEEF.
+define <4 x i32> @test_vfeef(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vfeef:
+; CHECK: vfeef %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vfeef(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %res
+}
+
+; VFEEBS.
+define <16 x i8> @test_vfeebs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfeebs:
+; CHECK: vfeebs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vfeebs(<16 x i8> %a, <16 x i8> %b)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <16 x i8> %res
+}
+
+; VFEEHS.
+define <8 x i16> @test_vfeehs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfeehs:
+; CHECK: vfeehs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vfeehs(<8 x i16> %a, <8 x i16> %b)
+ %res = extractvalue {<8 x i16>, i32} %call, 0
+ %cc = extractvalue {<8 x i16>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <8 x i16> %res
+}
+
+; VFEEFS.
+define <4 x i32> @test_vfeefs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfeefs:
+; CHECK: vfeefs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vfeefs(<4 x i32> %a, <4 x i32> %b)
+ %res = extractvalue {<4 x i32>, i32} %call, 0
+ %cc = extractvalue {<4 x i32>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <4 x i32> %res
+}
+
+; VFEEZB.
+define <16 x i8> @test_vfeezb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfeezb:
+; CHECK: vfeezb %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vfeezb(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VFEEZH.
+define <8 x i16> @test_vfeezh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vfeezh:
+; CHECK: vfeezh %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vfeezh(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %res
+}
+
+; VFEEZF.
+define <4 x i32> @test_vfeezf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vfeezf:
+; CHECK: vfeezf %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vfeezf(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %res
+}
+
+; VFEEZBS.
+define <16 x i8> @test_vfeezbs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfeezbs:
+; CHECK: vfeezbs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vfeezbs(<16 x i8> %a, <16 x i8> %b)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <16 x i8> %res
+}
+
+; VFEEZHS.
+define <8 x i16> @test_vfeezhs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfeezhs:
+; CHECK: vfeezhs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vfeezhs(<8 x i16> %a, <8 x i16> %b)
+ %res = extractvalue {<8 x i16>, i32} %call, 0
+ %cc = extractvalue {<8 x i16>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <8 x i16> %res
+}
+
+; VFEEZFS.
+define <4 x i32> @test_vfeezfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfeezfs:
+; CHECK: vfeezfs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vfeezfs(<4 x i32> %a, <4 x i32> %b)
+ %res = extractvalue {<4 x i32>, i32} %call, 0
+ %cc = extractvalue {<4 x i32>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <4 x i32> %res
+}
+
+; VFENEB.
+define <16 x i8> @test_vfeneb_0(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfeneb_0:
+; CHECK: vfeneb %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vfeneb(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VFENEH.
+define <8 x i16> @test_vfeneh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vfeneh:
+; CHECK: vfeneh %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vfeneh(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %res
+}
+
+; VFENEF.
+define <4 x i32> @test_vfenef(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vfenef:
+; CHECK: vfenef %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vfenef(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %res
+}
+
+; VFENEBS.
+define <16 x i8> @test_vfenebs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfenebs:
+; CHECK: vfenebs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vfenebs(<16 x i8> %a, <16 x i8> %b)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <16 x i8> %res
+}
+
+; VFENEHS.
+define <8 x i16> @test_vfenehs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfenehs:
+; CHECK: vfenehs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vfenehs(<8 x i16> %a, <8 x i16> %b)
+ %res = extractvalue {<8 x i16>, i32} %call, 0
+ %cc = extractvalue {<8 x i16>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <8 x i16> %res
+}
+
+; VFENEFS.
+define <4 x i32> @test_vfenefs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfenefs:
+; CHECK: vfenefs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vfenefs(<4 x i32> %a, <4 x i32> %b)
+ %res = extractvalue {<4 x i32>, i32} %call, 0
+ %cc = extractvalue {<4 x i32>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <4 x i32> %res
+}
+
+; VFENEZB.
+define <16 x i8> @test_vfenezb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfenezb:
+; CHECK: vfenezb %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vfenezb(<16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %res
+}
+
+; VFENEZH.
+define <8 x i16> @test_vfenezh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vfenezh:
+; CHECK: vfenezh %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vfenezh(<8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %res
+}
+
+; VFENEZF.
+define <4 x i32> @test_vfenezf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vfenezf:
+; CHECK: vfenezf %v24, %v24, %v26
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vfenezf(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %res
+}
+
+; VFENEZBS.
+define <16 x i8> @test_vfenezbs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfenezbs:
+; CHECK: vfenezbs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vfenezbs(<16 x i8> %a, <16 x i8> %b)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <16 x i8> %res
+}
+
+; VFENEZHS.
+define <8 x i16> @test_vfenezhs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfenezhs:
+; CHECK: vfenezhs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vfenezhs(<8 x i16> %a, <8 x i16> %b)
+ %res = extractvalue {<8 x i16>, i32} %call, 0
+ %cc = extractvalue {<8 x i16>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <8 x i16> %res
+}
+
+; VFENEZFS.
+define <4 x i32> @test_vfenezfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfenezfs:
+; CHECK: vfenezfs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vfenezfs(<4 x i32> %a, <4 x i32> %b)
+ %res = extractvalue {<4 x i32>, i32} %call, 0
+ %cc = extractvalue {<4 x i32>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <4 x i32> %res
+}
+
+; VISTRB.
+define <16 x i8> @test_vistrb(<16 x i8> %a) {
+; CHECK-LABEL: test_vistrb:
+; CHECK: vistrb %v24, %v24
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vistrb(<16 x i8> %a)
+ ret <16 x i8> %res
+}
+
+; VISTRH.
+define <8 x i16> @test_vistrh(<8 x i16> %a) {
+; CHECK-LABEL: test_vistrh:
+; CHECK: vistrh %v24, %v24
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vistrh(<8 x i16> %a)
+ ret <8 x i16> %res
+}
+
+; VISTRF.
+define <4 x i32> @test_vistrf(<4 x i32> %a) {
+; CHECK-LABEL: test_vistrf:
+; CHECK: vistrf %v24, %v24
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vistrf(<4 x i32> %a)
+ ret <4 x i32> %res
+}
+
+; VISTRBS.
+define <16 x i8> @test_vistrbs(<16 x i8> %a, i32 *%ccptr) {
+; CHECK-LABEL: test_vistrbs:
+; CHECK: vistrbs %v24, %v24
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vistrbs(<16 x i8> %a)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <16 x i8> %res
+}
+
+; VISTRHS.
+define <8 x i16> @test_vistrhs(<8 x i16> %a, i32 *%ccptr) {
+; CHECK-LABEL: test_vistrhs:
+; CHECK: vistrhs %v24, %v24
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vistrhs(<8 x i16> %a)
+ %res = extractvalue {<8 x i16>, i32} %call, 0
+ %cc = extractvalue {<8 x i16>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <8 x i16> %res
+}
+
+; VISTRFS.
+define <4 x i32> @test_vistrfs(<4 x i32> %a, i32 *%ccptr) {
+; CHECK-LABEL: test_vistrfs:
+; CHECK: vistrfs %v24, %v24
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vistrfs(<4 x i32> %a)
+ %res = extractvalue {<4 x i32>, i32} %call, 0
+ %cc = extractvalue {<4 x i32>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <4 x i32> %res
+}
+
+; VSTRCB with !IN !RT.
+define <16 x i8> @test_vstrcb_0(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vstrcb_0:
+; CHECK: vstrcb %v24, %v24, %v26, %v28, 0
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c, i32 0)
+ ret <16 x i8> %res
+}
+
+; VSTRCB with !IN RT.
+define <16 x i8> @test_vstrcb_4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vstrcb_4:
+; CHECK: vstrcb %v24, %v24, %v26, %v28, 4
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c, i32 4)
+ ret <16 x i8> %res
+}
+
+; VSTRCB with IN !RT.
+define <16 x i8> @test_vstrcb_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vstrcb_8:
+; CHECK: vstrcb %v24, %v24, %v26, %v28, 8
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c, i32 8)
+ ret <16 x i8> %res
+}
+
+; VSTRCB with IN RT.
+define <16 x i8> @test_vstrcb_12(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vstrcb_12:
+; CHECK: vstrcb %v24, %v24, %v26, %v28, 12
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c, i32 12)
+ ret <16 x i8> %res
+}
+
+; VSTRCB with CS -- should be ignored.
+define <16 x i8> @test_vstrcb_1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vstrcb_1:
+; CHECK: vstrcb %v24, %v24, %v26, %v28, 0
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c, i32 1)
+ ret <16 x i8> %res
+}
+
+; VSTRCH.
+define <8 x i16> @test_vstrch(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vstrch:
+; CHECK: vstrch %v24, %v24, %v26, %v28, 4
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vstrch(<8 x i16> %a, <8 x i16> %b,
+ <8 x i16> %c, i32 4)
+ ret <8 x i16> %res
+}
+
+; VSTRCF.
+define <4 x i32> @test_vstrcf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vstrcf:
+; CHECK: vstrcf %v24, %v24, %v26, %v28, 8
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vstrcf(<4 x i32> %a, <4 x i32> %b,
+ <4 x i32> %c, i32 8)
+ ret <4 x i32> %res
+}
+
+; VSTRCBS.
+define <16 x i8> @test_vstrcbs(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c,
+ i32 *%ccptr) {
+; CHECK-LABEL: test_vstrcbs:
+; CHECK: vstrcbs %v24, %v24, %v26, %v28, 0
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vstrcbs(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c, i32 0)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <16 x i8> %res
+}
+
+; VSTRCHS.
+define <8 x i16> @test_vstrchs(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c,
+ i32 *%ccptr) {
+; CHECK-LABEL: test_vstrchs:
+; CHECK: vstrchs %v24, %v24, %v26, %v28, 4
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vstrchs(<8 x i16> %a, <8 x i16> %b,
+ <8 x i16> %c, i32 4)
+ %res = extractvalue {<8 x i16>, i32} %call, 0
+ %cc = extractvalue {<8 x i16>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <8 x i16> %res
+}
+
+; VSTRCFS.
+define <4 x i32> @test_vstrcfs(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c,
+ i32 *%ccptr) {
+; CHECK-LABEL: test_vstrcfs:
+; CHECK: vstrcfs %v24, %v24, %v26, %v28, 8
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vstrcfs(<4 x i32> %a, <4 x i32> %b,
+ <4 x i32> %c, i32 8)
+ %res = extractvalue {<4 x i32>, i32} %call, 0
+ %cc = extractvalue {<4 x i32>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <4 x i32> %res
+}
+
+; VSTRCZB with !IN !RT.
+define <16 x i8> @test_vstrczb_0(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vstrczb_0:
+; CHECK: vstrczb %v24, %v24, %v26, %v28, 0
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c, i32 0)
+ ret <16 x i8> %res
+}
+
+; VSTRCZB with !IN RT.
+define <16 x i8> @test_vstrczb_4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vstrczb_4:
+; CHECK: vstrczb %v24, %v24, %v26, %v28, 4
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c, i32 4)
+ ret <16 x i8> %res
+}
+
+; VSTRCZB with IN !RT.
+define <16 x i8> @test_vstrczb_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vstrczb_8:
+; CHECK: vstrczb %v24, %v24, %v26, %v28, 8
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c, i32 8)
+ ret <16 x i8> %res
+}
+
+; VSTRCZB with IN RT.
+define <16 x i8> @test_vstrczb_12(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vstrczb_12:
+; CHECK: vstrczb %v24, %v24, %v26, %v28, 12
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c, i32 12)
+ ret <16 x i8> %res
+}
+
+; VSTRCZB with CS -- should be ignored.
+define <16 x i8> @test_vstrczb_1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vstrczb_1:
+; CHECK: vstrczb %v24, %v24, %v26, %v28, 0
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c, i32 1)
+ ret <16 x i8> %res
+}
+
+; VSTRCZH.
+define <8 x i16> @test_vstrczh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vstrczh:
+; CHECK: vstrczh %v24, %v24, %v26, %v28, 4
+; CHECK: br %r14
+ %res = call <8 x i16> @llvm.s390.vstrczh(<8 x i16> %a, <8 x i16> %b,
+ <8 x i16> %c, i32 4)
+ ret <8 x i16> %res
+}
+
+; VSTRCZF.
+define <4 x i32> @test_vstrczf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vstrczf:
+; CHECK: vstrczf %v24, %v24, %v26, %v28, 8
+; CHECK: br %r14
+ %res = call <4 x i32> @llvm.s390.vstrczf(<4 x i32> %a, <4 x i32> %b,
+ <4 x i32> %c, i32 8)
+ ret <4 x i32> %res
+}
+
+; VSTRCZBS.
+define <16 x i8> @test_vstrczbs(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c,
+ i32 *%ccptr) {
+; CHECK-LABEL: test_vstrczbs:
+; CHECK: vstrczbs %v24, %v24, %v26, %v28, 0
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vstrczbs(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c, i32 0)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <16 x i8> %res
+}
+
+; VSTRCZHS.
+define <8 x i16> @test_vstrczhs(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c,
+ i32 *%ccptr) {
+; CHECK-LABEL: test_vstrczhs:
+; CHECK: vstrczhs %v24, %v24, %v26, %v28, 4
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vstrczhs(<8 x i16> %a, <8 x i16> %b,
+ <8 x i16> %c, i32 4)
+ %res = extractvalue {<8 x i16>, i32} %call, 0
+ %cc = extractvalue {<8 x i16>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <8 x i16> %res
+}
+
+; VSTRCZFS.
+define <4 x i32> @test_vstrczfs(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c,
+ i32 *%ccptr) {
+; CHECK-LABEL: test_vstrczfs:
+; CHECK: vstrczfs %v24, %v24, %v26, %v28, 8
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vstrczfs(<4 x i32> %a, <4 x i32> %b,
+ <4 x i32> %c, i32 8)
+ %res = extractvalue {<4 x i32>, i32} %call, 0
+ %cc = extractvalue {<4 x i32>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <4 x i32> %res
+}
+
+; VFCEDBS with no processing of the result.
+define i32 @test_vfcedbs(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_vfcedbs:
+; CHECK: vfcedbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %call = call {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double> %a,
+ <2 x double> %b)
+ %res = extractvalue {<2 x i64>, i32} %call, 1
+ ret i32 %res
+}
+
+; VFCEDBS, returning 1 if any elements are equal (CC != 3).
+define i32 @test_vfcedbs_any_bool(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_vfcedbs_any_bool:
+; CHECK: vfcedbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: afi %r2, -536870912
+; CHECK: srl %r2, 31
+; CHECK: br %r14
+ %call = call {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double> %a,
+ <2 x double> %b)
+ %res = extractvalue {<2 x i64>, i32} %call, 1
+ %cmp = icmp ne i32 %res, 3
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; VFCEDBS, storing to %ptr if any elements are equal.
+define <2 x i64> @test_vfcedbs_any_store(<2 x double> %a, <2 x double> %b,
+ i32 *%ptr) {
+; CHECK-LABEL: test_vfcedbs_any_store:
+; CHECK-NOT: %r
+; CHECK: vfcedbs %v24, %v24, %v26
+; CHECK-NEXT: {{jo|jnle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double> %a,
+ <2 x double> %b)
+ %res = extractvalue {<2 x i64>, i32} %call, 0
+ %cc = extractvalue {<2 x i64>, i32} %call, 1
+ %cmp = icmp ule i32 %cc, 2
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <2 x i64> %res
+}
+
+; VFCHDBS with no processing of the result.
+define i32 @test_vfchdbs(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_vfchdbs:
+; CHECK: vfchdbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %call = call {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double> %a,
+ <2 x double> %b)
+ %res = extractvalue {<2 x i64>, i32} %call, 1
+ ret i32 %res
+}
+
+; VFCHDBS, returning 1 if not all elements are higher.
+define i32 @test_vfchdbs_notall_bool(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_vfchdbs_notall_bool:
+; CHECK: vfchdbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: risblg %r2, [[REG]], 31, 159, 36
+; CHECK: br %r14
+ %call = call {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double> %a,
+ <2 x double> %b)
+ %res = extractvalue {<2 x i64>, i32} %call, 1
+ %cmp = icmp sge i32 %res, 1
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; VFCHDBS, storing to %ptr if not all elements are higher.
+define <2 x i64> @test_vfchdbs_notall_store(<2 x double> %a, <2 x double> %b,
+ i32 *%ptr) {
+; CHECK-LABEL: test_vfchdbs_notall_store:
+; CHECK-NOT: %r
+; CHECK: vfchdbs %v24, %v24, %v26
+; CHECK-NEXT: {{jhe|je}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double> %a,
+ <2 x double> %b)
+ %res = extractvalue {<2 x i64>, i32} %call, 0
+ %cc = extractvalue {<2 x i64>, i32} %call, 1
+ %cmp = icmp ugt i32 %cc, 0
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <2 x i64> %res
+}
+
+; VFCHEDBS with no processing of the result.
+define i32 @test_vfchedbs(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_vfchedbs:
+; CHECK: vfchedbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %call = call {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double> %a,
+ <2 x double> %b)
+ %res = extractvalue {<2 x i64>, i32} %call, 1
+ ret i32 %res
+}
+
+; VFCHEDBS, returning 1 if neither element is higher or equal.
+define i32 @test_vfchedbs_none_bool(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_vfchedbs_none_bool:
+; CHECK: vfchedbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: risblg %r2, [[REG]], 31, 159, 35
+; CHECK: br %r14
+ %call = call {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double> %a,
+ <2 x double> %b)
+ %res = extractvalue {<2 x i64>, i32} %call, 1
+ %cmp = icmp eq i32 %res, 3
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; VFCHEDBS, storing to %ptr if neither element is higher or equal.
+define <2 x i64> @test_vfchedbs_none_store(<2 x double> %a, <2 x double> %b,
+ i32 *%ptr) {
+; CHECK-LABEL: test_vfchedbs_none_store:
+; CHECK-NOT: %r
+; CHECK: vfchedbs %v24, %v24, %v26
+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+ %call = call {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double> %a,
+ <2 x double> %b)
+ %res = extractvalue {<2 x i64>, i32} %call, 0
+ %cc = extractvalue {<2 x i64>, i32} %call, 1
+ %cmp = icmp uge i32 %cc, 3
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, i32 *%ptr
+ br label %exit
+
+exit:
+ ret <2 x i64> %res
+}
+
+; VFTCIDB with the lowest useful class selector and no processing of the result.
+define i32 @test_vftcidb(<2 x double> %a) {
+; CHECK-LABEL: test_vftcidb:
+; CHECK: vftcidb {{%v[0-9]+}}, %v24, 1
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+ %call = call {<2 x i64>, i32} @llvm.s390.vftcidb(<2 x double> %a, i32 1)
+ %res = extractvalue {<2 x i64>, i32} %call, 1
+ ret i32 %res
+}
+
+; VFTCIDB with the highest useful class selector, returning 1 if all elements
+; have the right class (CC == 0).
+define i32 @test_vftcidb_all_bool(<2 x double> %a) {
+; CHECK-LABEL: test_vftcidb_all_bool:
+; CHECK: vftcidb {{%v[0-9]+}}, %v24, 4094
+; CHECK: afi %r2, -268435456
+; CHECK: srl %r2, 31
+; CHECK: br %r14
+ %call = call {<2 x i64>, i32} @llvm.s390.vftcidb(<2 x double> %a, i32 4094)
+ %res = extractvalue {<2 x i64>, i32} %call, 1
+ %cmp = icmp eq i32 %res, 0
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; VFIDB with a rounding mode not usable via standard intrinsics.
+define <2 x double> @test_vfidb_0_4(<2 x double> %a) {
+; CHECK-LABEL: test_vfidb_0_4:
+; CHECK: vfidb %v24, %v24, 0, 4
+; CHECK: br %r14
+ %res = call <2 x double> @llvm.s390.vfidb(<2 x double> %a, i32 0, i32 4)
+ ret <2 x double> %res
+}
+
+; VFIDB with IEEE-inexact exception suppressed.
+define <2 x double> @test_vfidb_4_0(<2 x double> %a) {
+; CHECK-LABEL: test_vfidb_4_0:
+; CHECK: vfidb %v24, %v24, 4, 0
+; CHECK: br %r14
+ %res = call <2 x double> @llvm.s390.vfidb(<2 x double> %a, i32 4, i32 0)
+ ret <2 x double> %res
+}
+
diff --git a/test/CodeGen/SystemZ/vec-log-01.ll b/test/CodeGen/SystemZ/vec-log-01.ll
new file mode 100644
index 000000000000..f9b7402f08e7
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-log-01.ll
@@ -0,0 +1,15 @@
+; Test v2f64 logarithm.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare <2 x double> @llvm.log.v2f64(<2 x double>)
+
+define <2 x double> @f1(<2 x double> %val) {
+; CHECK-LABEL: f1:
+; CHECK: brasl %r14, log@PLT
+; CHECK: brasl %r14, log@PLT
+; CHECK: vmrhg %v24,
+; CHECK: br %r14
+ %ret = call <2 x double> @llvm.log.v2f64(<2 x double> %val)
+ ret <2 x double> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-max-01.ll b/test/CodeGen/SystemZ/vec-max-01.ll
new file mode 100644
index 000000000000..ca6f08aa493f
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-max-01.ll
@@ -0,0 +1,83 @@
+; Test v16i8 maximum.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test with slt.
+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vmxb %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp slt <16 x i8> %val1, %val2
+ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
+ ret <16 x i8> %ret
+}
+
+; Test with sle.
+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vmxb %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sle <16 x i8> %val1, %val2
+ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
+ ret <16 x i8> %ret
+}
+
+; Test with sgt.
+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vmxb %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sgt <16 x i8> %val1, %val2
+ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
+ ret <16 x i8> %ret
+}
+
+; Test with sge.
+define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vmxb %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sge <16 x i8> %val1, %val2
+ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
+ ret <16 x i8> %ret
+}
+
+; Test with ult.
+define <16 x i8> @f5(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vmxlb %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ult <16 x i8> %val1, %val2
+ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
+ ret <16 x i8> %ret
+}
+
+; Test with ule.
+define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vmxlb %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ule <16 x i8> %val1, %val2
+ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
+ ret <16 x i8> %ret
+}
+
+; Test with ugt.
+define <16 x i8> @f7(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: vmxlb %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ugt <16 x i8> %val1, %val2
+ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
+ ret <16 x i8> %ret
+}
+
+; Test with uge.
+define <16 x i8> @f8(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vmxlb %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp uge <16 x i8> %val1, %val2
+ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
+ ret <16 x i8> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-max-02.ll b/test/CodeGen/SystemZ/vec-max-02.ll
new file mode 100644
index 000000000000..2c61603b6f3b
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-max-02.ll
@@ -0,0 +1,83 @@
+; Test v8i16 maximum.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test with slt.
+define <8 x i16> @f1(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vmxh %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp slt <8 x i16> %val1, %val2
+ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
+ ret <8 x i16> %ret
+}
+
+; Test with sle.
+define <8 x i16> @f2(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vmxh %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sle <8 x i16> %val1, %val2
+ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
+ ret <8 x i16> %ret
+}
+
+; Test with sgt.
+define <8 x i16> @f3(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vmxh %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sgt <8 x i16> %val1, %val2
+ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
+ ret <8 x i16> %ret
+}
+
+; Test with sge.
+define <8 x i16> @f4(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vmxh %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sge <8 x i16> %val1, %val2
+ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
+ ret <8 x i16> %ret
+}
+
+; Test with ult.
+define <8 x i16> @f5(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vmxlh %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ult <8 x i16> %val1, %val2
+ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
+ ret <8 x i16> %ret
+}
+
+; Test with ule.
+define <8 x i16> @f6(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vmxlh %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ule <8 x i16> %val1, %val2
+ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
+ ret <8 x i16> %ret
+}
+
+; Test with ugt.
+define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: vmxlh %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ugt <8 x i16> %val1, %val2
+ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
+ ret <8 x i16> %ret
+}
+
+; Test with uge.
+define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vmxlh %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp uge <8 x i16> %val1, %val2
+ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
+ ret <8 x i16> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-max-03.ll b/test/CodeGen/SystemZ/vec-max-03.ll
new file mode 100644
index 000000000000..a43879483997
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-max-03.ll
@@ -0,0 +1,83 @@
+; Test v4i32 maximum.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test with slt.
+define <4 x i32> @f1(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vmxf %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp slt <4 x i32> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
+ ret <4 x i32> %ret
+}
+
+; Test with sle.
+define <4 x i32> @f2(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vmxf %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sle <4 x i32> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
+ ret <4 x i32> %ret
+}
+
+; Test with sgt.
+define <4 x i32> @f3(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vmxf %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sgt <4 x i32> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
+ ret <4 x i32> %ret
+}
+
+; Test with sge.
+define <4 x i32> @f4(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vmxf %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sge <4 x i32> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
+ ret <4 x i32> %ret
+}
+
+; Test with ult.
+define <4 x i32> @f5(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vmxlf %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ult <4 x i32> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
+ ret <4 x i32> %ret
+}
+
+; Test with ule.
+define <4 x i32> @f6(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vmxlf %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ule <4 x i32> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
+ ret <4 x i32> %ret
+}
+
+; Test with ugt.
+define <4 x i32> @f7(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: vmxlf %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ugt <4 x i32> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
+ ret <4 x i32> %ret
+}
+
+; Test with uge.
+define <4 x i32> @f8(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vmxlf %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp uge <4 x i32> %val1, %val2
+ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
+ ret <4 x i32> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-max-04.ll b/test/CodeGen/SystemZ/vec-max-04.ll
new file mode 100644
index 000000000000..ab7c62391277
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-max-04.ll
@@ -0,0 +1,83 @@
+; Test v2i64 maximum.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test with slt.
+define <2 x i64> @f1(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vmxg %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp slt <2 x i64> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
+ ret <2 x i64> %ret
+}
+
+; Test with sle.
+define <2 x i64> @f2(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vmxg %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sle <2 x i64> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
+ ret <2 x i64> %ret
+}
+
+; Test with sgt.
+define <2 x i64> @f3(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vmxg %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sgt <2 x i64> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
+ ret <2 x i64> %ret
+}
+
+; Test with sge.
+define <2 x i64> @f4(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vmxg %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sge <2 x i64> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
+ ret <2 x i64> %ret
+}
+
+; Test with ult.
+define <2 x i64> @f5(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vmxlg %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ult <2 x i64> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
+ ret <2 x i64> %ret
+}
+
+; Test with ule.
+define <2 x i64> @f6(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vmxlg %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ule <2 x i64> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
+ ret <2 x i64> %ret
+}
+
+; Test with ugt.
+define <2 x i64> @f7(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: vmxlg %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ugt <2 x i64> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
+ ret <2 x i64> %ret
+}
+
+; Test with uge.
+define <2 x i64> @f8(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vmxlg %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp uge <2 x i64> %val1, %val2
+ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
+ ret <2 x i64> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-min-01.ll b/test/CodeGen/SystemZ/vec-min-01.ll
new file mode 100644
index 000000000000..255dc57e1134
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-min-01.ll
@@ -0,0 +1,83 @@
+; Test v16i8 minimum.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test with slt.
+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vmnb %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp slt <16 x i8> %val2, %val1
+ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
+ ret <16 x i8> %ret
+}
+
+; Test with sle.
+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vmnb %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sle <16 x i8> %val2, %val1
+ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
+ ret <16 x i8> %ret
+}
+
+; Test with sgt.
+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vmnb %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sgt <16 x i8> %val2, %val1
+ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
+ ret <16 x i8> %ret
+}
+
+; Test with sge.
+define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vmnb %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sge <16 x i8> %val2, %val1
+ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
+ ret <16 x i8> %ret
+}
+
+; Test with ult.
+define <16 x i8> @f5(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vmnlb %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ult <16 x i8> %val2, %val1
+ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
+ ret <16 x i8> %ret
+}
+
+; Test with ule.
+define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vmnlb %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ule <16 x i8> %val2, %val1
+ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
+ ret <16 x i8> %ret
+}
+
+; Test with ugt.
+define <16 x i8> @f7(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: vmnlb %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ugt <16 x i8> %val2, %val1
+ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
+ ret <16 x i8> %ret
+}
+
+; Test with uge.
+define <16 x i8> @f8(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vmnlb %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp uge <16 x i8> %val2, %val1
+ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
+ ret <16 x i8> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-min-02.ll b/test/CodeGen/SystemZ/vec-min-02.ll
new file mode 100644
index 000000000000..cad8a61506c8
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-min-02.ll
@@ -0,0 +1,83 @@
+; Test v8i16 minimum.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test with slt.
+define <8 x i16> @f1(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vmnh %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp slt <8 x i16> %val2, %val1
+ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
+ ret <8 x i16> %ret
+}
+
+; Test with sle.
+define <8 x i16> @f2(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vmnh %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sle <8 x i16> %val2, %val1
+ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
+ ret <8 x i16> %ret
+}
+
+; Test with sgt.
+define <8 x i16> @f3(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vmnh %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sgt <8 x i16> %val2, %val1
+ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
+ ret <8 x i16> %ret
+}
+
+; Test with sge.
+define <8 x i16> @f4(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vmnh %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sge <8 x i16> %val2, %val1
+ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
+ ret <8 x i16> %ret
+}
+
+; Test with ult.
+define <8 x i16> @f5(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vmnlh %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ult <8 x i16> %val2, %val1
+ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
+ ret <8 x i16> %ret
+}
+
+; Test with ule.
+define <8 x i16> @f6(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vmnlh %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ule <8 x i16> %val2, %val1
+ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
+ ret <8 x i16> %ret
+}
+
+; Test with ugt.
+define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: vmnlh %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ugt <8 x i16> %val2, %val1
+ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
+ ret <8 x i16> %ret
+}
+
+; Test with uge.
+define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vmnlh %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp uge <8 x i16> %val2, %val1
+ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
+ ret <8 x i16> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-min-03.ll b/test/CodeGen/SystemZ/vec-min-03.ll
new file mode 100644
index 000000000000..febac50aa462
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-min-03.ll
@@ -0,0 +1,83 @@
+; Test v4i32 minimum.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test with slt.
+define <4 x i32> @f1(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vmnf %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp slt <4 x i32> %val2, %val1
+ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
+ ret <4 x i32> %ret
+}
+
+; Test with sle.
+define <4 x i32> @f2(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vmnf %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sle <4 x i32> %val2, %val1
+ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
+ ret <4 x i32> %ret
+}
+
+; Test with sgt.
+define <4 x i32> @f3(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vmnf %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sgt <4 x i32> %val2, %val1
+ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
+ ret <4 x i32> %ret
+}
+
+; Test with sge.
+define <4 x i32> @f4(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vmnf %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sge <4 x i32> %val2, %val1
+ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
+ ret <4 x i32> %ret
+}
+
+; Test with ult.
+define <4 x i32> @f5(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vmnlf %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ult <4 x i32> %val2, %val1
+ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
+ ret <4 x i32> %ret
+}
+
+; Test with ule.
+define <4 x i32> @f6(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vmnlf %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ule <4 x i32> %val2, %val1
+ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
+ ret <4 x i32> %ret
+}
+
+; Test with ugt.
+define <4 x i32> @f7(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: vmnlf %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ugt <4 x i32> %val2, %val1
+ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
+ ret <4 x i32> %ret
+}
+
+; Test with uge.
+define <4 x i32> @f8(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vmnlf %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp uge <4 x i32> %val2, %val1
+ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
+ ret <4 x i32> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-min-04.ll b/test/CodeGen/SystemZ/vec-min-04.ll
new file mode 100644
index 000000000000..765ce1956b5d
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-min-04.ll
@@ -0,0 +1,83 @@
+; Test v2i64 minimum.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test with slt.
+define <2 x i64> @f1(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vmng %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp slt <2 x i64> %val2, %val1
+ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
+ ret <2 x i64> %ret
+}
+
+; Test with sle.
+define <2 x i64> @f2(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vmng %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sle <2 x i64> %val2, %val1
+ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
+ ret <2 x i64> %ret
+}
+
+; Test with sgt.
+define <2 x i64> @f3(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vmng %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sgt <2 x i64> %val2, %val1
+ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
+ ret <2 x i64> %ret
+}
+
+; Test with sge.
+define <2 x i64> @f4(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vmng %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp sge <2 x i64> %val2, %val1
+ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
+ ret <2 x i64> %ret
+}
+
+; Test with ult.
+define <2 x i64> @f5(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vmnlg %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ult <2 x i64> %val2, %val1
+ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
+ ret <2 x i64> %ret
+}
+
+; Test with ule.
+define <2 x i64> @f6(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vmnlg %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ule <2 x i64> %val2, %val1
+ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
+ ret <2 x i64> %ret
+}
+
+; Test with ugt.
+define <2 x i64> @f7(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: vmnlg %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp ugt <2 x i64> %val2, %val1
+ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
+ ret <2 x i64> %ret
+}
+
+; Test with uge.
+define <2 x i64> @f8(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vmnlg %v24, {{%v24, %v26|%v26, %v24}}
+; CHECK: br %r14
+ %cmp = icmp uge <2 x i64> %val2, %val1
+ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
+ ret <2 x i64> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-move-01.ll b/test/CodeGen/SystemZ/vec-move-01.ll
new file mode 100644
index 000000000000..3ef98b7eda03
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-move-01.ll
@@ -0,0 +1,107 @@
+; Test vector register moves.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test v16i8 moves.
+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vlr %v24, %v26
+; CHECK: br %r14
+ ret <16 x i8> %val2
+}
+
+; Test v8i16 moves.
+define <8 x i16> @f2(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vlr %v24, %v26
+; CHECK: br %r14
+ ret <8 x i16> %val2
+}
+
+; Test v4i32 moves.
+define <4 x i32> @f3(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vlr %v24, %v26
+; CHECK: br %r14
+ ret <4 x i32> %val2
+}
+
+; Test v2i64 moves.
+define <2 x i64> @f4(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vlr %v24, %v26
+; CHECK: br %r14
+ ret <2 x i64> %val2
+}
+
+; Test v4f32 moves.
+define <4 x float> @f5(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vlr %v24, %v26
+; CHECK: br %r14
+ ret <4 x float> %val2
+}
+
+; Test v2f64 moves.
+define <2 x double> @f6(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vlr %v24, %v26
+; CHECK: br %r14
+ ret <2 x double> %val2
+}
+
+; Test v2i8 moves.
+define <2 x i8> @f7(<2 x i8> %val1, <2 x i8> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: vlr %v24, %v26
+; CHECK: br %r14
+ ret <2 x i8> %val2
+}
+
+; Test v4i8 moves.
+define <4 x i8> @f8(<4 x i8> %val1, <4 x i8> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vlr %v24, %v26
+; CHECK: br %r14
+ ret <4 x i8> %val2
+}
+
+; Test v8i8 moves.
+define <8 x i8> @f9(<8 x i8> %val1, <8 x i8> %val2) {
+; CHECK-LABEL: f9:
+; CHECK: vlr %v24, %v26
+; CHECK: br %r14
+ ret <8 x i8> %val2
+}
+
+; Test v2i16 moves.
+define <2 x i16> @f10(<2 x i16> %val1, <2 x i16> %val2) {
+; CHECK-LABEL: f10:
+; CHECK: vlr %v24, %v26
+; CHECK: br %r14
+ ret <2 x i16> %val2
+}
+
+; Test v4i16 moves.
+define <4 x i16> @f11(<4 x i16> %val1, <4 x i16> %val2) {
+; CHECK-LABEL: f11:
+; CHECK: vlr %v24, %v26
+; CHECK: br %r14
+ ret <4 x i16> %val2
+}
+
+; Test v2i32 moves.
+define <2 x i32> @f12(<2 x i32> %val1, <2 x i32> %val2) {
+; CHECK-LABEL: f12:
+; CHECK: vlr %v24, %v26
+; CHECK: br %r14
+ ret <2 x i32> %val2
+}
+
+; Test v2f32 moves.
+define <2 x float> @f13(<2 x float> %val1, <2 x float> %val2) {
+; CHECK-LABEL: f13:
+; CHECK: vlr %v24, %v26
+; CHECK: br %r14
+ ret <2 x float> %val2
+}
diff --git a/test/CodeGen/SystemZ/vec-move-02.ll b/test/CodeGen/SystemZ/vec-move-02.ll
new file mode 100644
index 000000000000..dcaf0acccb2f
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-move-02.ll
@@ -0,0 +1,174 @@
+; Test vector loads.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test v16i8 loads.
+define <16 x i8> @f1(<16 x i8> *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vl %v24, 0(%r2)
+; CHECK: br %r14
+ %ret = load <16 x i8>, <16 x i8> *%ptr
+ ret <16 x i8> %ret
+}
+
+; Test v8i16 loads.
+define <8 x i16> @f2(<8 x i16> *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: vl %v24, 0(%r2)
+; CHECK: br %r14
+ %ret = load <8 x i16>, <8 x i16> *%ptr
+ ret <8 x i16> %ret
+}
+
+; Test v4i32 loads.
+define <4 x i32> @f3(<4 x i32> *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: vl %v24, 0(%r2)
+; CHECK: br %r14
+ %ret = load <4 x i32>, <4 x i32> *%ptr
+ ret <4 x i32> %ret
+}
+
+; Test v2i64 loads.
+define <2 x i64> @f4(<2 x i64> *%ptr) {
+; CHECK-LABEL: f4:
+; CHECK: vl %v24, 0(%r2)
+; CHECK: br %r14
+ %ret = load <2 x i64>, <2 x i64> *%ptr
+ ret <2 x i64> %ret
+}
+
+; Test v4f32 loads.
+define <4 x float> @f5(<4 x float> *%ptr) {
+; CHECK-LABEL: f5:
+; CHECK: vl %v24, 0(%r2)
+; CHECK: br %r14
+ %ret = load <4 x float>, <4 x float> *%ptr
+ ret <4 x float> %ret
+}
+
+; Test v2f64 loads.
+define <2 x double> @f6(<2 x double> *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: vl %v24, 0(%r2)
+; CHECK: br %r14
+ %ret = load <2 x double>, <2 x double> *%ptr
+ ret <2 x double> %ret
+}
+
+; Test the highest aligned in-range offset.
+define <16 x i8> @f7(<16 x i8> *%base) {
+; CHECK-LABEL: f7:
+; CHECK: vl %v24, 4080(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr <16 x i8>, <16 x i8> *%base, i64 255
+ %ret = load <16 x i8>, <16 x i8> *%ptr
+ ret <16 x i8> %ret
+}
+
+; Test the highest unaligned in-range offset.
+define <16 x i8> @f8(i8 *%base) {
+; CHECK-LABEL: f8:
+; CHECK: vl %v24, 4095(%r2)
+; CHECK: br %r14
+ %addr = getelementptr i8, i8 *%base, i64 4095
+ %ptr = bitcast i8 *%addr to <16 x i8> *
+ %ret = load <16 x i8>, <16 x i8> *%ptr, align 1
+ ret <16 x i8> %ret
+}
+
+; Test the next offset up, which requires separate address logic,
+define <16 x i8> @f9(<16 x i8> *%base) {
+; CHECK-LABEL: f9:
+; CHECK: aghi %r2, 4096
+; CHECK: vl %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr <16 x i8>, <16 x i8> *%base, i64 256
+ %ret = load <16 x i8>, <16 x i8> *%ptr
+ ret <16 x i8> %ret
+}
+
+; Test negative offsets, which also require separate address logic,
+define <16 x i8> @f10(<16 x i8> *%base) {
+; CHECK-LABEL: f10:
+; CHECK: aghi %r2, -16
+; CHECK: vl %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr <16 x i8>, <16 x i8> *%base, i64 -1
+ %ret = load <16 x i8>, <16 x i8> *%ptr
+ ret <16 x i8> %ret
+}
+
+; Check that indexes are allowed.
+define <16 x i8> @f11(i8 *%base, i64 %index) {
+; CHECK-LABEL: f11:
+; CHECK: vl %v24, 0(%r3,%r2)
+; CHECK: br %r14
+ %addr = getelementptr i8, i8 *%base, i64 %index
+ %ptr = bitcast i8 *%addr to <16 x i8> *
+ %ret = load <16 x i8>, <16 x i8> *%ptr, align 1
+ ret <16 x i8> %ret
+}
+
+; Test v2i8 loads.
+define <2 x i8> @f12(<2 x i8> *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: vlreph %v24, 0(%r2)
+; CHECK: br %r14
+ %ret = load <2 x i8>, <2 x i8> *%ptr
+ ret <2 x i8> %ret
+}
+
+; Test v4i8 loads.
+define <4 x i8> @f13(<4 x i8> *%ptr) {
+; CHECK-LABEL: f13:
+; CHECK: vlrepf %v24, 0(%r2)
+; CHECK: br %r14
+ %ret = load <4 x i8>, <4 x i8> *%ptr
+ ret <4 x i8> %ret
+}
+
+; Test v8i8 loads.
+define <8 x i8> @f14(<8 x i8> *%ptr) {
+; CHECK-LABEL: f14:
+; CHECK: vlrepg %v24, 0(%r2)
+; CHECK: br %r14
+ %ret = load <8 x i8>, <8 x i8> *%ptr
+ ret <8 x i8> %ret
+}
+
+; Test v2i16 loads.
+define <2 x i16> @f15(<2 x i16> *%ptr) {
+; CHECK-LABEL: f15:
+; CHECK: vlrepf %v24, 0(%r2)
+; CHECK: br %r14
+ %ret = load <2 x i16>, <2 x i16> *%ptr
+ ret <2 x i16> %ret
+}
+
+; Test v4i16 loads.
+define <4 x i16> @f16(<4 x i16> *%ptr) {
+; CHECK-LABEL: f16:
+; CHECK: vlrepg %v24, 0(%r2)
+; CHECK: br %r14
+ %ret = load <4 x i16>, <4 x i16> *%ptr
+ ret <4 x i16> %ret
+}
+
+; Test v2i32 loads.
+define <2 x i32> @f17(<2 x i32> *%ptr) {
+; CHECK-LABEL: f17:
+; CHECK: vlrepg %v24, 0(%r2)
+; CHECK: br %r14
+ %ret = load <2 x i32>, <2 x i32> *%ptr
+ ret <2 x i32> %ret
+}
+
+; Test v2f32 loads.
+define <2 x float> @f18(<2 x float> *%ptr) {
+; CHECK-LABEL: f18:
+; CHECK: vlrepg %v24, 0(%r2)
+; CHECK: br %r14
+ %ret = load <2 x float>, <2 x float> *%ptr
+ ret <2 x float> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-move-03.ll b/test/CodeGen/SystemZ/vec-move-03.ll
new file mode 100644
index 000000000000..f40e2cb2bf28
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-move-03.ll
@@ -0,0 +1,174 @@
+; Test vector stores.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test v16i8 stores.
+define void @f1(<16 x i8> %val, <16 x i8> *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vst %v24, 0(%r2)
+; CHECK: br %r14
+ store <16 x i8> %val, <16 x i8> *%ptr
+ ret void
+}
+
+; Test v8i16 stores.
+define void @f2(<8 x i16> %val, <8 x i16> *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: vst %v24, 0(%r2)
+; CHECK: br %r14
+ store <8 x i16> %val, <8 x i16> *%ptr
+ ret void
+}
+
+; Test v4i32 stores.
+define void @f3(<4 x i32> %val, <4 x i32> *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: vst %v24, 0(%r2)
+; CHECK: br %r14
+ store <4 x i32> %val, <4 x i32> *%ptr
+ ret void
+}
+
+; Test v2i64 stores.
+define void @f4(<2 x i64> %val, <2 x i64> *%ptr) {
+; CHECK-LABEL: f4:
+; CHECK: vst %v24, 0(%r2)
+; CHECK: br %r14
+ store <2 x i64> %val, <2 x i64> *%ptr
+ ret void
+}
+
+; Test v4f32 stores.
+define void @f5(<4 x float> %val, <4 x float> *%ptr) {
+; CHECK-LABEL: f5:
+; CHECK: vst %v24, 0(%r2)
+; CHECK: br %r14
+ store <4 x float> %val, <4 x float> *%ptr
+ ret void
+}
+
+; Test v2f64 stores.
+define void @f6(<2 x double> %val, <2 x double> *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: vst %v24, 0(%r2)
+; CHECK: br %r14
+ store <2 x double> %val, <2 x double> *%ptr
+ ret void
+}
+
+; Test the highest aligned in-range offset.
+define void @f7(<16 x i8> %val, <16 x i8> *%base) {
+; CHECK-LABEL: f7:
+; CHECK: vst %v24, 4080(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr <16 x i8>, <16 x i8> *%base, i64 255
+ store <16 x i8> %val, <16 x i8> *%ptr
+ ret void
+}
+
+; Test the highest unaligned in-range offset.
+define void @f8(<16 x i8> %val, i8 *%base) {
+; CHECK-LABEL: f8:
+; CHECK: vst %v24, 4095(%r2)
+; CHECK: br %r14
+ %addr = getelementptr i8, i8 *%base, i64 4095
+ %ptr = bitcast i8 *%addr to <16 x i8> *
+ store <16 x i8> %val, <16 x i8> *%ptr, align 1
+ ret void
+}
+
+; Test the next offset up, which requires separate address logic,
+define void @f9(<16 x i8> %val, <16 x i8> *%base) {
+; CHECK-LABEL: f9:
+; CHECK: aghi %r2, 4096
+; CHECK: vst %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr <16 x i8>, <16 x i8> *%base, i64 256
+ store <16 x i8> %val, <16 x i8> *%ptr
+ ret void
+}
+
+; Test negative offsets, which also require separate address logic,
+define void @f10(<16 x i8> %val, <16 x i8> *%base) {
+; CHECK-LABEL: f10:
+; CHECK: aghi %r2, -16
+; CHECK: vst %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr <16 x i8>, <16 x i8> *%base, i64 -1
+ store <16 x i8> %val, <16 x i8> *%ptr
+ ret void
+}
+
+; Check that indexes are allowed.
+define void @f11(<16 x i8> %val, i8 *%base, i64 %index) {
+; CHECK-LABEL: f11:
+; CHECK: vst %v24, 0(%r3,%r2)
+; CHECK: br %r14
+ %addr = getelementptr i8, i8 *%base, i64 %index
+ %ptr = bitcast i8 *%addr to <16 x i8> *
+ store <16 x i8> %val, <16 x i8> *%ptr, align 1
+ ret void
+}
+
+; Test v2i8 stores.
+define void @f12(<2 x i8> %val, <2 x i8> *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: vsteh %v24, 0(%r2), 0
+; CHECK: br %r14
+ store <2 x i8> %val, <2 x i8> *%ptr
+ ret void
+}
+
+; Test v4i8 stores.
+define void @f13(<4 x i8> %val, <4 x i8> *%ptr) {
+; CHECK-LABEL: f13:
+; CHECK: vstef %v24, 0(%r2)
+; CHECK: br %r14
+ store <4 x i8> %val, <4 x i8> *%ptr
+ ret void
+}
+
+; Test v8i8 stores.
+define void @f14(<8 x i8> %val, <8 x i8> *%ptr) {
+; CHECK-LABEL: f14:
+; CHECK: vsteg %v24, 0(%r2)
+; CHECK: br %r14
+ store <8 x i8> %val, <8 x i8> *%ptr
+ ret void
+}
+
+; Test v2i16 stores.
+define void @f15(<2 x i16> %val, <2 x i16> *%ptr) {
+; CHECK-LABEL: f15:
+; CHECK: vstef %v24, 0(%r2), 0
+; CHECK: br %r14
+ store <2 x i16> %val, <2 x i16> *%ptr
+ ret void
+}
+
+; Test v4i16 stores.
+define void @f16(<4 x i16> %val, <4 x i16> *%ptr) {
+; CHECK-LABEL: f16:
+; CHECK: vsteg %v24, 0(%r2)
+; CHECK: br %r14
+ store <4 x i16> %val, <4 x i16> *%ptr
+ ret void
+}
+
+; Test v2i32 stores.
+define void @f17(<2 x i32> %val, <2 x i32> *%ptr) {
+; CHECK-LABEL: f17:
+; CHECK: vsteg %v24, 0(%r2), 0
+; CHECK: br %r14
+ store <2 x i32> %val, <2 x i32> *%ptr
+ ret void
+}
+
+; Test v2f32 stores.
+define void @f18(<2 x float> %val, <2 x float> *%ptr) {
+; CHECK-LABEL: f18:
+; CHECK: vsteg %v24, 0(%r2), 0
+; CHECK: br %r14
+ store <2 x float> %val, <2 x float> *%ptr
+ ret void
+}
diff --git a/test/CodeGen/SystemZ/vec-move-04.ll b/test/CodeGen/SystemZ/vec-move-04.ll
new file mode 100644
index 000000000000..27c9e5f71f40
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-move-04.ll
@@ -0,0 +1,179 @@
+; Test vector insertion of register variables.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test v16i8 insertion into the first element.
+define <16 x i8> @f1(<16 x i8> %val, i8 %element) {
+; CHECK-LABEL: f1:
+; CHECK: vlvgb %v24, %r2, 0
+; CHECK: br %r14
+ %ret = insertelement <16 x i8> %val, i8 %element, i32 0
+ ret <16 x i8> %ret
+}
+
+; Test v16i8 insertion into the last element.
+define <16 x i8> @f2(<16 x i8> %val, i8 %element) {
+; CHECK-LABEL: f2:
+; CHECK: vlvgb %v24, %r2, 15
+; CHECK: br %r14
+ %ret = insertelement <16 x i8> %val, i8 %element, i32 15
+ ret <16 x i8> %ret
+}
+
+; Test v16i8 insertion into a variable element.
+define <16 x i8> @f3(<16 x i8> %val, i8 %element, i32 %index) {
+; CHECK-LABEL: f3:
+; CHECK: vlvgb %v24, %r2, 0(%r3)
+; CHECK: br %r14
+ %ret = insertelement <16 x i8> %val, i8 %element, i32 %index
+ ret <16 x i8> %ret
+}
+
+; Test v8i16 insertion into the first element.
+define <8 x i16> @f4(<8 x i16> %val, i16 %element) {
+; CHECK-LABEL: f4:
+; CHECK: vlvgh %v24, %r2, 0
+; CHECK: br %r14
+ %ret = insertelement <8 x i16> %val, i16 %element, i32 0
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion into the last element.
+define <8 x i16> @f5(<8 x i16> %val, i16 %element) {
+; CHECK-LABEL: f5:
+; CHECK: vlvgh %v24, %r2, 7
+; CHECK: br %r14
+ %ret = insertelement <8 x i16> %val, i16 %element, i32 7
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion into a variable element.
+define <8 x i16> @f6(<8 x i16> %val, i16 %element, i32 %index) {
+; CHECK-LABEL: f6:
+; CHECK: vlvgh %v24, %r2, 0(%r3)
+; CHECK: br %r14
+ %ret = insertelement <8 x i16> %val, i16 %element, i32 %index
+ ret <8 x i16> %ret
+}
+
+; Test v4i32 insertion into the first element.
+define <4 x i32> @f7(<4 x i32> %val, i32 %element) {
+; CHECK-LABEL: f7:
+; CHECK: vlvgf %v24, %r2, 0
+; CHECK: br %r14
+ %ret = insertelement <4 x i32> %val, i32 %element, i32 0
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion into the last element.
+define <4 x i32> @f8(<4 x i32> %val, i32 %element) {
+; CHECK-LABEL: f8:
+; CHECK: vlvgf %v24, %r2, 3
+; CHECK: br %r14
+ %ret = insertelement <4 x i32> %val, i32 %element, i32 3
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion into a variable element.
+define <4 x i32> @f9(<4 x i32> %val, i32 %element, i32 %index) {
+; CHECK-LABEL: f9:
+; CHECK: vlvgf %v24, %r2, 0(%r3)
+; CHECK: br %r14
+ %ret = insertelement <4 x i32> %val, i32 %element, i32 %index
+ ret <4 x i32> %ret
+}
+
+; Test v2i64 insertion into the first element.
+define <2 x i64> @f10(<2 x i64> %val, i64 %element) {
+; CHECK-LABEL: f10:
+; CHECK: vlvgg %v24, %r2, 0
+; CHECK: br %r14
+ %ret = insertelement <2 x i64> %val, i64 %element, i32 0
+ ret <2 x i64> %ret
+}
+
+; Test v2i64 insertion into the last element.
+define <2 x i64> @f11(<2 x i64> %val, i64 %element) {
+; CHECK-LABEL: f11:
+; CHECK: vlvgg %v24, %r2, 1
+; CHECK: br %r14
+ %ret = insertelement <2 x i64> %val, i64 %element, i32 1
+ ret <2 x i64> %ret
+}
+
+; Test v2i64 insertion into a variable element.
+define <2 x i64> @f12(<2 x i64> %val, i64 %element, i32 %index) {
+; CHECK-LABEL: f12:
+; CHECK: vlvgg %v24, %r2, 0(%r3)
+; CHECK: br %r14
+ %ret = insertelement <2 x i64> %val, i64 %element, i32 %index
+ ret <2 x i64> %ret
+}
+
+; Test v4f32 insertion into the first element.
+define <4 x float> @f13(<4 x float> %val, float %element) {
+; CHECK-LABEL: f13:
+; CHECK: vlgvf [[REG:%r[0-5]]], %v0, 0
+; CHECK: vlvgf %v24, [[REG]], 0
+; CHECK: br %r14
+ %ret = insertelement <4 x float> %val, float %element, i32 0
+ ret <4 x float> %ret
+}
+
+; Test v4f32 insertion into the last element.
+define <4 x float> @f14(<4 x float> %val, float %element) {
+; CHECK-LABEL: f14:
+; CHECK: vlgvf [[REG:%r[0-5]]], %v0, 0
+; CHECK: vlvgf %v24, [[REG]], 3
+; CHECK: br %r14
+ %ret = insertelement <4 x float> %val, float %element, i32 3
+ ret <4 x float> %ret
+}
+
+; Test v4f32 insertion into a variable element.
+define <4 x float> @f15(<4 x float> %val, float %element, i32 %index) {
+; CHECK-LABEL: f15:
+; CHECK: vlgvf [[REG:%r[0-5]]], %v0, 0
+; CHECK: vlvgf %v24, [[REG]], 0(%r2)
+; CHECK: br %r14
+ %ret = insertelement <4 x float> %val, float %element, i32 %index
+ ret <4 x float> %ret
+}
+
+; Test v2f64 insertion into the first element.
+define <2 x double> @f16(<2 x double> %val, double %element) {
+; CHECK-LABEL: f16:
+; CHECK: vpdi %v24, %v0, %v24, 1
+; CHECK: br %r14
+ %ret = insertelement <2 x double> %val, double %element, i32 0
+ ret <2 x double> %ret
+}
+
+; Test v2f64 insertion into the last element.
+define <2 x double> @f17(<2 x double> %val, double %element) {
+; CHECK-LABEL: f17:
+; CHECK: vpdi %v24, %v24, %v0, 0
+; CHECK: br %r14
+ %ret = insertelement <2 x double> %val, double %element, i32 1
+ ret <2 x double> %ret
+}
+
+; Test v2f64 insertion into a variable element.
+define <2 x double> @f18(<2 x double> %val, double %element, i32 %index) {
+; CHECK-LABEL: f18:
+; CHECK: lgdr [[REG:%r[0-5]]], %f0
+; CHECK: vlvgg %v24, [[REG]], 0(%r2)
+; CHECK: br %r14
+ %ret = insertelement <2 x double> %val, double %element, i32 %index
+ ret <2 x double> %ret
+}
+
+; Test v16i8 insertion into a variable element plus one.
+define <16 x i8> @f19(<16 x i8> %val, i8 %element, i32 %index) {
+; CHECK-LABEL: f19:
+; CHECK: vlvgb %v24, %r2, 1(%r3)
+; CHECK: br %r14
+ %add = add i32 %index, 1
+ %ret = insertelement <16 x i8> %val, i8 %element, i32 %add
+ ret <16 x i8> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-move-05.ll b/test/CodeGen/SystemZ/vec-move-05.ll
new file mode 100644
index 000000000000..99871196d685
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-move-05.ll
@@ -0,0 +1,249 @@
+; Test vector extraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test v16i8 extraction of the first element.
+define i8 @f1(<16 x i8> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vlgvb %r2, %v24, 0
+; CHECK: br %r14
+ %ret = extractelement <16 x i8> %val, i32 0
+ ret i8 %ret
+}
+
+; Test v16i8 extraction of the last element.
+define i8 @f2(<16 x i8> %val) {
+; CHECK-LABEL: f2:
+; CHECK: vlgvb %r2, %v24, 15
+; CHECK: br %r14
+ %ret = extractelement <16 x i8> %val, i32 15
+ ret i8 %ret
+}
+
+; Test v16i8 extractions of an absurd element number. This must compile
+; but we don't care what it does.
+define i8 @f3(<16 x i8> %val) {
+; CHECK-LABEL: f3:
+; CHECK-NOT: vlgvb %r2, %v24, 100000
+; CHECK: br %r14
+ %ret = extractelement <16 x i8> %val, i32 100000
+ ret i8 %ret
+}
+
+; Test v16i8 extraction of a variable element.
+define i8 @f4(<16 x i8> %val, i32 %index) {
+; CHECK-LABEL: f4:
+; CHECK: vlgvb %r2, %v24, 0(%r2)
+; CHECK: br %r14
+ %ret = extractelement <16 x i8> %val, i32 %index
+ ret i8 %ret
+}
+
+; Test v8i16 extraction of the first element.
+define i16 @f5(<8 x i16> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vlgvh %r2, %v24, 0
+; CHECK: br %r14
+ %ret = extractelement <8 x i16> %val, i32 0
+ ret i16 %ret
+}
+
+; Test v8i16 extraction of the last element.
+define i16 @f6(<8 x i16> %val) {
+; CHECK-LABEL: f6:
+; CHECK: vlgvh %r2, %v24, 7
+; CHECK: br %r14
+ %ret = extractelement <8 x i16> %val, i32 7
+ ret i16 %ret
+}
+
+; Test v8i16 extractions of an absurd element number. This must compile
+; but we don't care what it does.
+define i16 @f7(<8 x i16> %val) {
+; CHECK-LABEL: f7:
+; CHECK-NOT: vlgvh %r2, %v24, 100000
+; CHECK: br %r14
+ %ret = extractelement <8 x i16> %val, i32 100000
+ ret i16 %ret
+}
+
+; Test v8i16 extraction of a variable element.
+define i16 @f8(<8 x i16> %val, i32 %index) {
+; CHECK-LABEL: f8:
+; CHECK: vlgvh %r2, %v24, 0(%r2)
+; CHECK: br %r14
+ %ret = extractelement <8 x i16> %val, i32 %index
+ ret i16 %ret
+}
+
+; Test v4i32 extraction of the first element.
+define i32 @f9(<4 x i32> %val) {
+; CHECK-LABEL: f9:
+; CHECK: vlgvf %r2, %v24, 0
+; CHECK: br %r14
+ %ret = extractelement <4 x i32> %val, i32 0
+ ret i32 %ret
+}
+
+; Test v4i32 extraction of the last element.
+define i32 @f10(<4 x i32> %val) {
+; CHECK-LABEL: f10:
+; CHECK: vlgvf %r2, %v24, 3
+; CHECK: br %r14
+ %ret = extractelement <4 x i32> %val, i32 3
+ ret i32 %ret
+}
+
+; Test v4i32 extractions of an absurd element number. This must compile
+; but we don't care what it does.
+define i32 @f11(<4 x i32> %val) {
+; CHECK-LABEL: f11:
+; CHECK-NOT: vlgvf %r2, %v24, 100000
+; CHECK: br %r14
+ %ret = extractelement <4 x i32> %val, i32 100000
+ ret i32 %ret
+}
+
+; Test v4i32 extraction of a variable element.
+define i32 @f12(<4 x i32> %val, i32 %index) {
+; CHECK-LABEL: f12:
+; CHECK: vlgvf %r2, %v24, 0(%r2)
+; CHECK: br %r14
+ %ret = extractelement <4 x i32> %val, i32 %index
+ ret i32 %ret
+}
+
+; Test v2i64 extraction of the first element.
+define i64 @f13(<2 x i64> %val) {
+; CHECK-LABEL: f13:
+; CHECK: vlgvg %r2, %v24, 0
+; CHECK: br %r14
+ %ret = extractelement <2 x i64> %val, i32 0
+ ret i64 %ret
+}
+
+; Test v2i64 extraction of the last element.
+define i64 @f14(<2 x i64> %val) {
+; CHECK-LABEL: f14:
+; CHECK: vlgvg %r2, %v24, 1
+; CHECK: br %r14
+ %ret = extractelement <2 x i64> %val, i32 1
+ ret i64 %ret
+}
+
+; Test v2i64 extractions of an absurd element number. This must compile
+; but we don't care what it does.
+define i64 @f15(<2 x i64> %val) {
+; CHECK-LABEL: f15:
+; CHECK-NOT: vlgvg %r2, %v24, 100000
+; CHECK: br %r14
+ %ret = extractelement <2 x i64> %val, i32 100000
+ ret i64 %ret
+}
+
+; Test v2i64 extraction of a variable element.
+define i64 @f16(<2 x i64> %val, i32 %index) {
+; CHECK-LABEL: f16:
+; CHECK: vlgvg %r2, %v24, 0(%r2)
+; CHECK: br %r14
+ %ret = extractelement <2 x i64> %val, i32 %index
+ ret i64 %ret
+}
+
+; Test v4f32 extraction of element 0.
+define float @f17(<4 x float> %val) {
+; CHECK-LABEL: f17:
+; CHECK: vlr %v0, %v24
+; CHECK: br %r14
+ %ret = extractelement <4 x float> %val, i32 0
+ ret float %ret
+}
+
+; Test v4f32 extraction of element 1.
+define float @f18(<4 x float> %val) {
+; CHECK-LABEL: f18:
+; CHECK: vrepf %v0, %v24, 1
+; CHECK: br %r14
+ %ret = extractelement <4 x float> %val, i32 1
+ ret float %ret
+}
+
+; Test v4f32 extraction of element 2.
+define float @f19(<4 x float> %val) {
+; CHECK-LABEL: f19:
+; CHECK: vrepf %v0, %v24, 2
+; CHECK: br %r14
+ %ret = extractelement <4 x float> %val, i32 2
+ ret float %ret
+}
+
+; Test v4f32 extraction of element 3.
+define float @f20(<4 x float> %val) {
+; CHECK-LABEL: f20:
+; CHECK: vrepf %v0, %v24, 3
+; CHECK: br %r14
+ %ret = extractelement <4 x float> %val, i32 3
+ ret float %ret
+}
+
+; Test v4f32 extractions of an absurd element number. This must compile
+; but we don't care what it does.
+define float @f21(<4 x float> %val) {
+ %ret = extractelement <4 x float> %val, i32 100000
+ ret float %ret
+}
+
+; Test v4f32 extraction of a variable element.
+define float @f22(<4 x float> %val, i32 %index) {
+; CHECK-LABEL: f22:
+; CHECK: vlgvf [[REG:%r[0-5]]], %v24, 0(%r2)
+; CHECK: vlvgf %v0, [[REG]], 0
+; CHECK: br %r14
+ %ret = extractelement <4 x float> %val, i32 %index
+ ret float %ret
+}
+
+; Test v2f64 extraction of the first element.
+define double @f23(<2 x double> %val) {
+; CHECK-LABEL: f23:
+; CHECK: vlr %v0, %v24
+; CHECK: br %r14
+ %ret = extractelement <2 x double> %val, i32 0
+ ret double %ret
+}
+
+; Test v2f64 extraction of the last element.
+define double @f24(<2 x double> %val) {
+; CHECK-LABEL: f24:
+; CHECK: vrepg %v0, %v24, 1
+; CHECK: br %r14
+ %ret = extractelement <2 x double> %val, i32 1
+ ret double %ret
+}
+
+; Test v2f64 extractions of an absurd element number. This must compile
+; but we don't care what it does.
+define double @f25(<2 x double> %val) {
+ %ret = extractelement <2 x double> %val, i32 100000
+ ret double %ret
+}
+
+; Test v2f64 extraction of a variable element.
+define double @f26(<2 x double> %val, i32 %index) {
+; CHECK-LABEL: f26:
+; CHECK: vlgvg [[REG:%r[0-5]]], %v24, 0(%r2)
+; CHECK: ldgr %f0, [[REG]]
+; CHECK: br %r14
+ %ret = extractelement <2 x double> %val, i32 %index
+ ret double %ret
+}
+
+; Test v16i8 extraction of a variable element with an offset.
+define i8 @f27(<16 x i8> %val, i32 %index) {
+; CHECK-LABEL: f27:
+; CHECK: vlgvb %r2, %v24, 1(%r2)
+; CHECK: br %r14
+ %add = add i32 %index, 1
+ %ret = extractelement <16 x i8> %val, i32 %add
+ ret i8 %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-move-06.ll b/test/CodeGen/SystemZ/vec-move-06.ll
new file mode 100644
index 000000000000..de3960cad956
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-move-06.ll
@@ -0,0 +1,13 @@
+; Test vector builds using VLVGP.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test the basic v2i64 usage.
+define <2 x i64> @f1(i64 %a, i64 %b) {
+; CHECK-LABEL: f1:
+; CHECK: vlvgp %v24, %r2, %r3
+; CHECK: br %r14
+ %veca = insertelement <2 x i64> undef, i64 %a, i32 0
+ %vecb = insertelement <2 x i64> %veca, i64 %b, i32 1
+ ret <2 x i64> %vecb
+}
diff --git a/test/CodeGen/SystemZ/vec-move-07.ll b/test/CodeGen/SystemZ/vec-move-07.ll
new file mode 100644
index 000000000000..b0d06f782dee
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-move-07.ll
@@ -0,0 +1,57 @@
+; Test scalar_to_vector expansion.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test v16i8.
+define <16 x i8> @f1(i8 %val) {
+; CHECK-LABEL: f1:
+; CHECK: vlvgb %v24, %r2, 0
+; CHECK: br %r14
+ %ret = insertelement <16 x i8> undef, i8 %val, i32 0
+ ret <16 x i8> %ret
+}
+
+; Test v8i16.
+define <8 x i16> @f2(i16 %val) {
+; CHECK-LABEL: f2:
+; CHECK: vlvgh %v24, %r2, 0
+; CHECK: br %r14
+ %ret = insertelement <8 x i16> undef, i16 %val, i32 0
+ ret <8 x i16> %ret
+}
+
+; Test v4i32.
+define <4 x i32> @f3(i32 %val) {
+; CHECK-LABEL: f3:
+; CHECK: vlvgf %v24, %r2, 0
+; CHECK: br %r14
+ %ret = insertelement <4 x i32> undef, i32 %val, i32 0
+ ret <4 x i32> %ret
+}
+
+; Test v2i64. Here we load %val into both halves.
+define <2 x i64> @f4(i64 %val) {
+; CHECK-LABEL: f4:
+; CHECK: vlvgp %v24, %r2, %r2
+; CHECK: br %r14
+ %ret = insertelement <2 x i64> undef, i64 %val, i32 0
+ ret <2 x i64> %ret
+}
+
+; Test v4f32, which is just a move.
+define <4 x float> @f5(float %val) {
+; CHECK-LABEL: f5:
+; CHECK: vlr %v24, %v0
+; CHECK: br %r14
+ %ret = insertelement <4 x float> undef, float %val, i32 0
+ ret <4 x float> %ret
+}
+
+; Likewise v2f64.
+define <2 x double> @f6(double %val) {
+; CHECK-LABEL: f6:
+; CHECK: vlr %v24, %v0
+; CHECK: br %r14
+ %ret = insertelement <2 x double> undef, double %val, i32 0
+ ret <2 x double> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-move-08.ll b/test/CodeGen/SystemZ/vec-move-08.ll
new file mode 100644
index 000000000000..5396a1edec6a
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-move-08.ll
@@ -0,0 +1,444 @@
+; Test vector insertion of memory values.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test v16i8 insertion into the first element.
+define <16 x i8> @f1(<16 x i8> %val, i8 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vleb %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = load i8, i8 *%ptr
+ %ret = insertelement <16 x i8> %val, i8 %element, i32 0
+ ret <16 x i8> %ret
+}
+
+; Test v16i8 insertion into the last element.
+define <16 x i8> @f2(<16 x i8> %val, i8 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: vleb %v24, 0(%r2), 15
+; CHECK: br %r14
+ %element = load i8, i8 *%ptr
+ %ret = insertelement <16 x i8> %val, i8 %element, i32 15
+ ret <16 x i8> %ret
+}
+
+; Test v16i8 insertion with the highest in-range offset.
+define <16 x i8> @f3(<16 x i8> %val, i8 *%base) {
+; CHECK-LABEL: f3:
+; CHECK: vleb %v24, 4095(%r2), 10
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i32 4095
+ %element = load i8, i8 *%ptr
+ %ret = insertelement <16 x i8> %val, i8 %element, i32 10
+ ret <16 x i8> %ret
+}
+
+; Test v16i8 insertion with the first ouf-of-range offset.
+define <16 x i8> @f4(<16 x i8> %val, i8 *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: vleb %v24, 0(%r2), 5
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i32 4096
+ %element = load i8, i8 *%ptr
+ %ret = insertelement <16 x i8> %val, i8 %element, i32 5
+ ret <16 x i8> %ret
+}
+
+; Test v16i8 insertion into a variable element.
+define <16 x i8> @f5(<16 x i8> %val, i8 *%ptr, i32 %index) {
+; CHECK-LABEL: f5:
+; CHECK-NOT: vleb
+; CHECK: br %r14
+ %element = load i8, i8 *%ptr
+ %ret = insertelement <16 x i8> %val, i8 %element, i32 %index
+ ret <16 x i8> %ret
+}
+
+; Test v8i16 insertion into the first element.
+define <8 x i16> @f6(<8 x i16> %val, i16 *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: vleh %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = load i16, i16 *%ptr
+ %ret = insertelement <8 x i16> %val, i16 %element, i32 0
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion into the last element.
+define <8 x i16> @f7(<8 x i16> %val, i16 *%ptr) {
+; CHECK-LABEL: f7:
+; CHECK: vleh %v24, 0(%r2), 7
+; CHECK: br %r14
+ %element = load i16, i16 *%ptr
+ %ret = insertelement <8 x i16> %val, i16 %element, i32 7
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion with the highest in-range offset.
+define <8 x i16> @f8(<8 x i16> %val, i16 *%base) {
+; CHECK-LABEL: f8:
+; CHECK: vleh %v24, 4094(%r2), 5
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%base, i32 2047
+ %element = load i16, i16 *%ptr
+ %ret = insertelement <8 x i16> %val, i16 %element, i32 5
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion with the first ouf-of-range offset.
+define <8 x i16> @f9(<8 x i16> %val, i16 *%base) {
+; CHECK-LABEL: f9:
+; CHECK: aghi %r2, 4096
+; CHECK: vleh %v24, 0(%r2), 1
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%base, i32 2048
+ %element = load i16, i16 *%ptr
+ %ret = insertelement <8 x i16> %val, i16 %element, i32 1
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion into a variable element.
+define <8 x i16> @f10(<8 x i16> %val, i16 *%ptr, i32 %index) {
+; CHECK-LABEL: f10:
+; CHECK-NOT: vleh
+; CHECK: br %r14
+ %element = load i16, i16 *%ptr
+ %ret = insertelement <8 x i16> %val, i16 %element, i32 %index
+ ret <8 x i16> %ret
+}
+
+; Test v4i32 insertion into the first element.
+define <4 x i32> @f11(<4 x i32> %val, i32 *%ptr) {
+; CHECK-LABEL: f11:
+; CHECK: vlef %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = load i32, i32 *%ptr
+ %ret = insertelement <4 x i32> %val, i32 %element, i32 0
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion into the last element.
+define <4 x i32> @f12(<4 x i32> %val, i32 *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: vlef %v24, 0(%r2), 3
+; CHECK: br %r14
+ %element = load i32, i32 *%ptr
+ %ret = insertelement <4 x i32> %val, i32 %element, i32 3
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion with the highest in-range offset.
+define <4 x i32> @f13(<4 x i32> %val, i32 *%base) {
+; CHECK-LABEL: f13:
+; CHECK: vlef %v24, 4092(%r2), 2
+; CHECK: br %r14
+ %ptr = getelementptr i32, i32 *%base, i32 1023
+ %element = load i32, i32 *%ptr
+ %ret = insertelement <4 x i32> %val, i32 %element, i32 2
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion with the first ouf-of-range offset.
+define <4 x i32> @f14(<4 x i32> %val, i32 *%base) {
+; CHECK-LABEL: f14:
+; CHECK: aghi %r2, 4096
+; CHECK: vlef %v24, 0(%r2), 1
+; CHECK: br %r14
+ %ptr = getelementptr i32, i32 *%base, i32 1024
+ %element = load i32, i32 *%ptr
+ %ret = insertelement <4 x i32> %val, i32 %element, i32 1
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion into a variable element.
+define <4 x i32> @f15(<4 x i32> %val, i32 *%ptr, i32 %index) {
+; CHECK-LABEL: f15:
+; CHECK-NOT: vlef
+; CHECK: br %r14
+ %element = load i32, i32 *%ptr
+ %ret = insertelement <4 x i32> %val, i32 %element, i32 %index
+ ret <4 x i32> %ret
+}
+
+; Test v2i64 insertion into the first element.
+define <2 x i64> @f16(<2 x i64> %val, i64 *%ptr) {
+; CHECK-LABEL: f16:
+; CHECK: vleg %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = load i64, i64 *%ptr
+ %ret = insertelement <2 x i64> %val, i64 %element, i32 0
+ ret <2 x i64> %ret
+}
+
+; Test v2i64 insertion into the last element.
+define <2 x i64> @f17(<2 x i64> %val, i64 *%ptr) {
+; CHECK-LABEL: f17:
+; CHECK: vleg %v24, 0(%r2), 1
+; CHECK: br %r14
+ %element = load i64, i64 *%ptr
+ %ret = insertelement <2 x i64> %val, i64 %element, i32 1
+ ret <2 x i64> %ret
+}
+
+; Test v2i64 insertion with the highest in-range offset.
+define <2 x i64> @f18(<2 x i64> %val, i64 *%base) {
+; CHECK-LABEL: f18:
+; CHECK: vleg %v24, 4088(%r2), 1
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i32 511
+ %element = load i64, i64 *%ptr
+ %ret = insertelement <2 x i64> %val, i64 %element, i32 1
+ ret <2 x i64> %ret
+}
+
+; Test v2i64 insertion with the first ouf-of-range offset.
+define <2 x i64> @f19(<2 x i64> %val, i64 *%base) {
+; CHECK-LABEL: f19:
+; CHECK: aghi %r2, 4096
+; CHECK: vleg %v24, 0(%r2), 0
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i32 512
+ %element = load i64, i64 *%ptr
+ %ret = insertelement <2 x i64> %val, i64 %element, i32 0
+ ret <2 x i64> %ret
+}
+
+; Test v2i64 insertion into a variable element.
+define <2 x i64> @f20(<2 x i64> %val, i64 *%ptr, i32 %index) {
+; CHECK-LABEL: f20:
+; CHECK-NOT: vleg
+; CHECK: br %r14
+ %element = load i64, i64 *%ptr
+ %ret = insertelement <2 x i64> %val, i64 %element, i32 %index
+ ret <2 x i64> %ret
+}
+
+; Test v4f32 insertion into the first element.
+define <4 x float> @f21(<4 x float> %val, float *%ptr) {
+; CHECK-LABEL: f21:
+; CHECK: vlef %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = load float, float *%ptr
+ %ret = insertelement <4 x float> %val, float %element, i32 0
+ ret <4 x float> %ret
+}
+
+; Test v4f32 insertion into the last element.
+define <4 x float> @f22(<4 x float> %val, float *%ptr) {
+; CHECK-LABEL: f22:
+; CHECK: vlef %v24, 0(%r2), 3
+; CHECK: br %r14
+ %element = load float, float *%ptr
+ %ret = insertelement <4 x float> %val, float %element, i32 3
+ ret <4 x float> %ret
+}
+
+; Test v4f32 insertion with the highest in-range offset.
+define <4 x float> @f23(<4 x float> %val, float *%base) {
+; CHECK-LABEL: f23:
+; CHECK: vlef %v24, 4092(%r2), 2
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i32 1023
+ %element = load float, float *%ptr
+ %ret = insertelement <4 x float> %val, float %element, i32 2
+ ret <4 x float> %ret
+}
+
+; Test v4f32 insertion with the first ouf-of-range offset.
+define <4 x float> @f24(<4 x float> %val, float *%base) {
+; CHECK-LABEL: f24:
+; CHECK: aghi %r2, 4096
+; CHECK: vlef %v24, 0(%r2), 1
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i32 1024
+ %element = load float, float *%ptr
+ %ret = insertelement <4 x float> %val, float %element, i32 1
+ ret <4 x float> %ret
+}
+
+; Test v4f32 insertion into a variable element.
+define <4 x float> @f25(<4 x float> %val, float *%ptr, i32 %index) {
+; CHECK-LABEL: f25:
+; CHECK-NOT: vlef
+; CHECK: br %r14
+ %element = load float, float *%ptr
+ %ret = insertelement <4 x float> %val, float %element, i32 %index
+ ret <4 x float> %ret
+}
+
+; Test v2f64 insertion into the first element.
+define <2 x double> @f26(<2 x double> %val, double *%ptr) {
+; CHECK-LABEL: f26:
+; CHECK: vleg %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = load double, double *%ptr
+ %ret = insertelement <2 x double> %val, double %element, i32 0
+ ret <2 x double> %ret
+}
+
+; Test v2f64 insertion into the last element.
+define <2 x double> @f27(<2 x double> %val, double *%ptr) {
+; CHECK-LABEL: f27:
+; CHECK: vleg %v24, 0(%r2), 1
+; CHECK: br %r14
+ %element = load double, double *%ptr
+ %ret = insertelement <2 x double> %val, double %element, i32 1
+ ret <2 x double> %ret
+}
+
+; Test v2f64 insertion with the highest in-range offset.
+define <2 x double> @f28(<2 x double> %val, double *%base) {
+; CHECK-LABEL: f28:
+; CHECK: vleg %v24, 4088(%r2), 1
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i32 511
+ %element = load double, double *%ptr
+ %ret = insertelement <2 x double> %val, double %element, i32 1
+ ret <2 x double> %ret
+}
+
+; Test v2f64 insertion with the first ouf-of-range offset.
+define <2 x double> @f29(<2 x double> %val, double *%base) {
+; CHECK-LABEL: f29:
+; CHECK: aghi %r2, 4096
+; CHECK: vleg %v24, 0(%r2), 0
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i32 512
+ %element = load double, double *%ptr
+ %ret = insertelement <2 x double> %val, double %element, i32 0
+ ret <2 x double> %ret
+}
+
+; Test v2f64 insertion into a variable element.
+define <2 x double> @f30(<2 x double> %val, double *%ptr, i32 %index) {
+; CHECK-LABEL: f30:
+; CHECK-NOT: vleg
+; CHECK: br %r14
+ %element = load double, double *%ptr
+ %ret = insertelement <2 x double> %val, double %element, i32 %index
+ ret <2 x double> %ret
+}
+
+; Test a v4i32 gather of the first element.
+define <4 x i32> @f31(<4 x i32> %val, <4 x i32> %index, i64 %base) {
+; CHECK-LABEL: f31:
+; CHECK: vgef %v24, 0(%v26,%r2), 0
+; CHECK: br %r14
+ %elem = extractelement <4 x i32> %index, i32 0
+ %ext = zext i32 %elem to i64
+ %add = add i64 %base, %ext
+ %ptr = inttoptr i64 %add to i32 *
+ %element = load i32, i32 *%ptr
+ %ret = insertelement <4 x i32> %val, i32 %element, i32 0
+ ret <4 x i32> %ret
+}
+
+; Test a v4i32 gather of the last element.
+define <4 x i32> @f32(<4 x i32> %val, <4 x i32> %index, i64 %base) {
+; CHECK-LABEL: f32:
+; CHECK: vgef %v24, 0(%v26,%r2), 3
+; CHECK: br %r14
+ %elem = extractelement <4 x i32> %index, i32 3
+ %ext = zext i32 %elem to i64
+ %add = add i64 %base, %ext
+ %ptr = inttoptr i64 %add to i32 *
+ %element = load i32, i32 *%ptr
+ %ret = insertelement <4 x i32> %val, i32 %element, i32 3
+ ret <4 x i32> %ret
+}
+
+; Test a v4i32 gather with the highest in-range offset.
+define <4 x i32> @f33(<4 x i32> %val, <4 x i32> %index, i64 %base) {
+; CHECK-LABEL: f33:
+; CHECK: vgef %v24, 4095(%v26,%r2), 1
+; CHECK: br %r14
+ %elem = extractelement <4 x i32> %index, i32 1
+ %ext = zext i32 %elem to i64
+ %add1 = add i64 %base, %ext
+ %add2 = add i64 %add1, 4095
+ %ptr = inttoptr i64 %add2 to i32 *
+ %element = load i32, i32 *%ptr
+ %ret = insertelement <4 x i32> %val, i32 %element, i32 1
+ ret <4 x i32> %ret
+}
+
+; Test a v2i64 gather of the first element.
+define <2 x i64> @f34(<2 x i64> %val, <2 x i64> %index, i64 %base) {
+; CHECK-LABEL: f34:
+; CHECK: vgeg %v24, 0(%v26,%r2), 0
+; CHECK: br %r14
+ %elem = extractelement <2 x i64> %index, i32 0
+ %add = add i64 %base, %elem
+ %ptr = inttoptr i64 %add to i64 *
+ %element = load i64, i64 *%ptr
+ %ret = insertelement <2 x i64> %val, i64 %element, i32 0
+ ret <2 x i64> %ret
+}
+
+; Test a v2i64 gather of the last element.
+define <2 x i64> @f35(<2 x i64> %val, <2 x i64> %index, i64 %base) {
+; CHECK-LABEL: f35:
+; CHECK: vgeg %v24, 0(%v26,%r2), 1
+; CHECK: br %r14
+ %elem = extractelement <2 x i64> %index, i32 1
+ %add = add i64 %base, %elem
+ %ptr = inttoptr i64 %add to i64 *
+ %element = load i64, i64 *%ptr
+ %ret = insertelement <2 x i64> %val, i64 %element, i32 1
+ ret <2 x i64> %ret
+}
+
+; Test a v4f32 gather of the first element.
+define <4 x float> @f36(<4 x float> %val, <4 x i32> %index, i64 %base) {
+; CHECK-LABEL: f36:
+; CHECK: vgef %v24, 0(%v26,%r2), 0
+; CHECK: br %r14
+ %elem = extractelement <4 x i32> %index, i32 0
+ %ext = zext i32 %elem to i64
+ %add = add i64 %base, %ext
+ %ptr = inttoptr i64 %add to float *
+ %element = load float, float *%ptr
+ %ret = insertelement <4 x float> %val, float %element, i32 0
+ ret <4 x float> %ret
+}
+
+; Test a v4f32 gather of the last element.
+define <4 x float> @f37(<4 x float> %val, <4 x i32> %index, i64 %base) {
+; CHECK-LABEL: f37:
+; CHECK: vgef %v24, 0(%v26,%r2), 3
+; CHECK: br %r14
+ %elem = extractelement <4 x i32> %index, i32 3
+ %ext = zext i32 %elem to i64
+ %add = add i64 %base, %ext
+ %ptr = inttoptr i64 %add to float *
+ %element = load float, float *%ptr
+ %ret = insertelement <4 x float> %val, float %element, i32 3
+ ret <4 x float> %ret
+}
+
+; Test a v2f64 gather of the first element.
+define <2 x double> @f38(<2 x double> %val, <2 x i64> %index, i64 %base) {
+; CHECK-LABEL: f38:
+; CHECK: vgeg %v24, 0(%v26,%r2), 0
+; CHECK: br %r14
+ %elem = extractelement <2 x i64> %index, i32 0
+ %add = add i64 %base, %elem
+ %ptr = inttoptr i64 %add to double *
+ %element = load double, double *%ptr
+ %ret = insertelement <2 x double> %val, double %element, i32 0
+ ret <2 x double> %ret
+}
+
+; Test a v2f64 gather of the last element.
+define <2 x double> @f39(<2 x double> %val, <2 x i64> %index, i64 %base) {
+; CHECK-LABEL: f39:
+; CHECK: vgeg %v24, 0(%v26,%r2), 1
+; CHECK: br %r14
+ %elem = extractelement <2 x i64> %index, i32 1
+ %add = add i64 %base, %elem
+ %ptr = inttoptr i64 %add to double *
+ %element = load double, double *%ptr
+ %ret = insertelement <2 x double> %val, double %element, i32 1
+ ret <2 x double> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-move-09.ll b/test/CodeGen/SystemZ/vec-move-09.ll
new file mode 100644
index 000000000000..5a53a2d6a198
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-move-09.ll
@@ -0,0 +1,291 @@
+; Test vector insertion of constants.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test v16i8 insertion into the first element.
+define <16 x i8> @f1(<16 x i8> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vleib %v24, 0, 0
+; CHECK: br %r14
+ %ret = insertelement <16 x i8> %val, i8 0, i32 0
+ ret <16 x i8> %ret
+}
+
+; Test v16i8 insertion into the last element.
+define <16 x i8> @f2(<16 x i8> %val) {
+; CHECK-LABEL: f2:
+; CHECK: vleib %v24, 100, 15
+; CHECK: br %r14
+ %ret = insertelement <16 x i8> %val, i8 100, i32 15
+ ret <16 x i8> %ret
+}
+
+; Test v16i8 insertion with the maximum signed value.
+define <16 x i8> @f3(<16 x i8> %val) {
+; CHECK-LABEL: f3:
+; CHECK: vleib %v24, 127, 10
+; CHECK: br %r14
+ %ret = insertelement <16 x i8> %val, i8 127, i32 10
+ ret <16 x i8> %ret
+}
+
+; Test v16i8 insertion with the minimum signed value.
+define <16 x i8> @f4(<16 x i8> %val) {
+; CHECK-LABEL: f4:
+; CHECK: vleib %v24, -128, 11
+; CHECK: br %r14
+ %ret = insertelement <16 x i8> %val, i8 128, i32 11
+ ret <16 x i8> %ret
+}
+
+; Test v16i8 insertion with the maximum unsigned value.
+define <16 x i8> @f5(<16 x i8> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vleib %v24, -1, 12
+; CHECK: br %r14
+ %ret = insertelement <16 x i8> %val, i8 255, i32 12
+ ret <16 x i8> %ret
+}
+
+; Test v16i8 insertion into a variable element.
+define <16 x i8> @f6(<16 x i8> %val, i32 %index) {
+; CHECK-LABEL: f6:
+; CHECK-NOT: vleib
+; CHECK: br %r14
+ %ret = insertelement <16 x i8> %val, i8 0, i32 %index
+ ret <16 x i8> %ret
+}
+
+; Test v8i16 insertion into the first element.
+define <8 x i16> @f7(<8 x i16> %val) {
+; CHECK-LABEL: f7:
+; CHECK: vleih %v24, 0, 0
+; CHECK: br %r14
+ %ret = insertelement <8 x i16> %val, i16 0, i32 0
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion into the last element.
+define <8 x i16> @f8(<8 x i16> %val) {
+; CHECK-LABEL: f8:
+; CHECK: vleih %v24, 0, 7
+; CHECK: br %r14
+ %ret = insertelement <8 x i16> %val, i16 0, i32 7
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion with the maximum signed value.
+define <8 x i16> @f9(<8 x i16> %val) {
+; CHECK-LABEL: f9:
+; CHECK: vleih %v24, 32767, 4
+; CHECK: br %r14
+ %ret = insertelement <8 x i16> %val, i16 32767, i32 4
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion with the minimum signed value.
+define <8 x i16> @f10(<8 x i16> %val) {
+; CHECK-LABEL: f10:
+; CHECK: vleih %v24, -32768, 5
+; CHECK: br %r14
+ %ret = insertelement <8 x i16> %val, i16 32768, i32 5
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion with the maximum unsigned value.
+define <8 x i16> @f11(<8 x i16> %val) {
+; CHECK-LABEL: f11:
+; CHECK: vleih %v24, -1, 6
+; CHECK: br %r14
+ %ret = insertelement <8 x i16> %val, i16 65535, i32 6
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion into a variable element.
+define <8 x i16> @f12(<8 x i16> %val, i32 %index) {
+; CHECK-LABEL: f12:
+; CHECK-NOT: vleih
+; CHECK: br %r14
+ %ret = insertelement <8 x i16> %val, i16 0, i32 %index
+ ret <8 x i16> %ret
+}
+
+; Test v4i32 insertion into the first element.
+define <4 x i32> @f13(<4 x i32> %val) {
+; CHECK-LABEL: f13:
+; CHECK: vleif %v24, 0, 0
+; CHECK: br %r14
+ %ret = insertelement <4 x i32> %val, i32 0, i32 0
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion into the last element.
+define <4 x i32> @f14(<4 x i32> %val) {
+; CHECK-LABEL: f14:
+; CHECK: vleif %v24, 0, 3
+; CHECK: br %r14
+ %ret = insertelement <4 x i32> %val, i32 0, i32 3
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion with the maximum value allowed by VLEIF.
+define <4 x i32> @f15(<4 x i32> %val) {
+; CHECK-LABEL: f15:
+; CHECK: vleif %v24, 32767, 1
+; CHECK: br %r14
+ %ret = insertelement <4 x i32> %val, i32 32767, i32 1
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion with the next value up.
+define <4 x i32> @f16(<4 x i32> %val) {
+; CHECK-LABEL: f16:
+; CHECK-NOT: vleif
+; CHECK: br %r14
+ %ret = insertelement <4 x i32> %val, i32 32768, i32 1
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion with the minimum value allowed by VLEIF.
+define <4 x i32> @f17(<4 x i32> %val) {
+; CHECK-LABEL: f17:
+; CHECK: vleif %v24, -32768, 2
+; CHECK: br %r14
+ %ret = insertelement <4 x i32> %val, i32 -32768, i32 2
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion with the next value down.
+define <4 x i32> @f18(<4 x i32> %val) {
+; CHECK-LABEL: f18:
+; CHECK-NOT: vleif
+; CHECK: br %r14
+ %ret = insertelement <4 x i32> %val, i32 -32769, i32 2
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion into a variable element.
+define <4 x i32> @f19(<4 x i32> %val, i32 %index) {
+; CHECK-LABEL: f19:
+; CHECK-NOT: vleif
+; CHECK: br %r14
+ %ret = insertelement <4 x i32> %val, i32 0, i32 %index
+ ret <4 x i32> %ret
+}
+
+; Test v2i64 insertion into the first element.
+define <2 x i64> @f20(<2 x i64> %val) {
+; CHECK-LABEL: f20:
+; CHECK: vleig %v24, 0, 0
+; CHECK: br %r14
+ %ret = insertelement <2 x i64> %val, i64 0, i32 0
+ ret <2 x i64> %ret
+}
+
+; Test v2i64 insertion into the last element.
+define <2 x i64> @f21(<2 x i64> %val) {
+; CHECK-LABEL: f21:
+; CHECK: vleig %v24, 0, 1
+; CHECK: br %r14
+ %ret = insertelement <2 x i64> %val, i64 0, i32 1
+ ret <2 x i64> %ret
+}
+
+; Test v2i64 insertion with the maximum value allowed by VLEIG.
+define <2 x i64> @f22(<2 x i64> %val) {
+; CHECK-LABEL: f22:
+; CHECK: vleig %v24, 32767, 1
+; CHECK: br %r14
+ %ret = insertelement <2 x i64> %val, i64 32767, i32 1
+ ret <2 x i64> %ret
+}
+
+; Test v2i64 insertion with the next value up.
+define <2 x i64> @f23(<2 x i64> %val) {
+; CHECK-LABEL: f23:
+; CHECK-NOT: vleig
+; CHECK: br %r14
+ %ret = insertelement <2 x i64> %val, i64 32768, i32 1
+ ret <2 x i64> %ret
+}
+
+; Test v2i64 insertion with the minimum value allowed by VLEIG.
+define <2 x i64> @f24(<2 x i64> %val) {
+; CHECK-LABEL: f24:
+; CHECK: vleig %v24, -32768, 0
+; CHECK: br %r14
+ %ret = insertelement <2 x i64> %val, i64 -32768, i32 0
+ ret <2 x i64> %ret
+}
+
+; Test v2i64 insertion with the next value down.
+define <2 x i64> @f25(<2 x i64> %val) {
+; CHECK-LABEL: f25:
+; CHECK-NOT: vleig
+; CHECK: br %r14
+ %ret = insertelement <2 x i64> %val, i64 -32769, i32 0
+ ret <2 x i64> %ret
+}
+
+; Test v2i64 insertion into a variable element.
+define <2 x i64> @f26(<2 x i64> %val, i32 %index) {
+; CHECK-LABEL: f26:
+; CHECK-NOT: vleig
+; CHECK: br %r14
+ %ret = insertelement <2 x i64> %val, i64 0, i32 %index
+ ret <2 x i64> %ret
+}
+
+; Test v4f32 insertion of 0 into the first element.
+define <4 x float> @f27(<4 x float> %val) {
+; CHECK-LABEL: f27:
+; CHECK: vleif %v24, 0, 0
+; CHECK: br %r14
+ %ret = insertelement <4 x float> %val, float 0.0, i32 0
+ ret <4 x float> %ret
+}
+
+; Test v4f32 insertion of 0 into the last element.
+define <4 x float> @f28(<4 x float> %val) {
+; CHECK-LABEL: f28:
+; CHECK: vleif %v24, 0, 3
+; CHECK: br %r14
+ %ret = insertelement <4 x float> %val, float 0.0, i32 3
+ ret <4 x float> %ret
+}
+
+; Test v4f32 insertion of a nonzero value.
+define <4 x float> @f29(<4 x float> %val) {
+; CHECK-LABEL: f29:
+; CHECK-NOT: vleif
+; CHECK: br %r14
+ %ret = insertelement <4 x float> %val, float 1.0, i32 1
+ ret <4 x float> %ret
+}
+
+; Test v2f64 insertion of 0 into the first element.
+define <2 x double> @f30(<2 x double> %val) {
+; CHECK-LABEL: f30:
+; CHECK: vleig %v24, 0, 0
+; CHECK: br %r14
+ %ret = insertelement <2 x double> %val, double 0.0, i32 0
+ ret <2 x double> %ret
+}
+
+; Test v2f64 insertion of 0 into the last element.
+define <2 x double> @f31(<2 x double> %val) {
+; CHECK-LABEL: f31:
+; CHECK: vleig %v24, 0, 1
+; CHECK: br %r14
+ %ret = insertelement <2 x double> %val, double 0.0, i32 1
+ ret <2 x double> %ret
+}
+
+; Test v2f64 insertion of a nonzero value.
+define <2 x double> @f32(<2 x double> %val) {
+; CHECK-LABEL: f32:
+; CHECK-NOT: vleig
+; CHECK: br %r14
+ %ret = insertelement <2 x double> %val, double 1.0, i32 1
+ ret <2 x double> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-move-10.ll b/test/CodeGen/SystemZ/vec-move-10.ll
new file mode 100644
index 000000000000..894d0c2b41fa
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-move-10.ll
@@ -0,0 +1,499 @@
+; Test vector extraction to memory.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test v16i8 extraction from the first element.
+define void @f1(<16 x i8> %val, i8 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vsteb %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = extractelement <16 x i8> %val, i32 0
+ store i8 %element, i8 *%ptr
+ ret void
+}
+
+; Test v16i8 extraction from the last element.
+define void @f2(<16 x i8> %val, i8 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: vsteb %v24, 0(%r2), 15
+; CHECK: br %r14
+ %element = extractelement <16 x i8> %val, i32 15
+ store i8 %element, i8 *%ptr
+ ret void
+}
+
+; Test v16i8 extraction of an invalid element. This must compile,
+; but we don't care what it does.
+define void @f3(<16 x i8> %val, i8 *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK-NOT: vsteb %v24, 0(%r2), 16
+; CHECK: br %r14
+ %element = extractelement <16 x i8> %val, i32 16
+ store i8 %element, i8 *%ptr
+ ret void
+}
+
+; Test v16i8 extraction with the highest in-range offset.
+define void @f4(<16 x i8> %val, i8 *%base) {
+; CHECK-LABEL: f4:
+; CHECK: vsteb %v24, 4095(%r2), 10
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i32 4095
+ %element = extractelement <16 x i8> %val, i32 10
+ store i8 %element, i8 *%ptr
+ ret void
+}
+
+; Test v16i8 extraction with the first ouf-of-range offset.
+define void @f5(<16 x i8> %val, i8 *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, 4096
+; CHECK: vsteb %v24, 0(%r2), 5
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i32 4096
+ %element = extractelement <16 x i8> %val, i32 5
+ store i8 %element, i8 *%ptr
+ ret void
+}
+
+; Test v16i8 extraction from a variable element.
+define void @f6(<16 x i8> %val, i8 *%ptr, i32 %index) {
+; CHECK-LABEL: f6:
+; CHECK-NOT: vsteb
+; CHECK: br %r14
+ %element = extractelement <16 x i8> %val, i32 %index
+ store i8 %element, i8 *%ptr
+ ret void
+}
+
+; Test v8i16 extraction from the first element.
+define void @f7(<8 x i16> %val, i16 *%ptr) {
+; CHECK-LABEL: f7:
+; CHECK: vsteh %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = extractelement <8 x i16> %val, i32 0
+ store i16 %element, i16 *%ptr
+ ret void
+}
+
+; Test v8i16 extraction from the last element.
+define void @f8(<8 x i16> %val, i16 *%ptr) {
+; CHECK-LABEL: f8:
+; CHECK: vsteh %v24, 0(%r2), 7
+; CHECK: br %r14
+ %element = extractelement <8 x i16> %val, i32 7
+ store i16 %element, i16 *%ptr
+ ret void
+}
+
+; Test v8i16 extraction of an invalid element. This must compile,
+; but we don't care what it does.
+define void @f9(<8 x i16> %val, i16 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK-NOT: vsteh %v24, 0(%r2), 8
+; CHECK: br %r14
+ %element = extractelement <8 x i16> %val, i32 8
+ store i16 %element, i16 *%ptr
+ ret void
+}
+
+; Test v8i16 extraction with the highest in-range offset.
+define void @f10(<8 x i16> %val, i16 *%base) {
+; CHECK-LABEL: f10:
+; CHECK: vsteh %v24, 4094(%r2), 5
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%base, i32 2047
+ %element = extractelement <8 x i16> %val, i32 5
+ store i16 %element, i16 *%ptr
+ ret void
+}
+
+; Test v8i16 extraction with the first ouf-of-range offset.
+define void @f11(<8 x i16> %val, i16 *%base) {
+; CHECK-LABEL: f11:
+; CHECK: aghi %r2, 4096
+; CHECK: vsteh %v24, 0(%r2), 1
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%base, i32 2048
+ %element = extractelement <8 x i16> %val, i32 1
+ store i16 %element, i16 *%ptr
+ ret void
+}
+
+; Test v8i16 extraction from a variable element.
+define void @f12(<8 x i16> %val, i16 *%ptr, i32 %index) {
+; CHECK-LABEL: f12:
+; CHECK-NOT: vsteh
+; CHECK: br %r14
+ %element = extractelement <8 x i16> %val, i32 %index
+ store i16 %element, i16 *%ptr
+ ret void
+}
+
+; Test v4i32 extraction from the first element.
+define void @f13(<4 x i32> %val, i32 *%ptr) {
+; CHECK-LABEL: f13:
+; CHECK: vstef %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = extractelement <4 x i32> %val, i32 0
+ store i32 %element, i32 *%ptr
+ ret void
+}
+
+; Test v4i32 extraction from the last element.
+define void @f14(<4 x i32> %val, i32 *%ptr) {
+; CHECK-LABEL: f14:
+; CHECK: vstef %v24, 0(%r2), 3
+; CHECK: br %r14
+ %element = extractelement <4 x i32> %val, i32 3
+ store i32 %element, i32 *%ptr
+ ret void
+}
+
+; Test v4i32 extraction of an invalid element. This must compile,
+; but we don't care what it does.
+define void @f15(<4 x i32> %val, i32 *%ptr) {
+; CHECK-LABEL: f15:
+; CHECK-NOT: vstef %v24, 0(%r2), 4
+; CHECK: br %r14
+ %element = extractelement <4 x i32> %val, i32 4
+ store i32 %element, i32 *%ptr
+ ret void
+}
+
+; Test v4i32 extraction with the highest in-range offset.
+define void @f16(<4 x i32> %val, i32 *%base) {
+; CHECK-LABEL: f16:
+; CHECK: vstef %v24, 4092(%r2), 2
+; CHECK: br %r14
+ %ptr = getelementptr i32, i32 *%base, i32 1023
+ %element = extractelement <4 x i32> %val, i32 2
+ store i32 %element, i32 *%ptr
+ ret void
+}
+
+; Test v4i32 extraction with the first ouf-of-range offset.
+define void @f17(<4 x i32> %val, i32 *%base) {
+; CHECK-LABEL: f17:
+; CHECK: aghi %r2, 4096
+; CHECK: vstef %v24, 0(%r2), 1
+; CHECK: br %r14
+ %ptr = getelementptr i32, i32 *%base, i32 1024
+ %element = extractelement <4 x i32> %val, i32 1
+ store i32 %element, i32 *%ptr
+ ret void
+}
+
+; Test v4i32 extraction from a variable element.
+define void @f18(<4 x i32> %val, i32 *%ptr, i32 %index) {
+; CHECK-LABEL: f18:
+; CHECK-NOT: vstef
+; CHECK: br %r14
+ %element = extractelement <4 x i32> %val, i32 %index
+ store i32 %element, i32 *%ptr
+ ret void
+}
+
+; Test v2i64 extraction from the first element.
+define void @f19(<2 x i64> %val, i64 *%ptr) {
+; CHECK-LABEL: f19:
+; CHECK: vsteg %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = extractelement <2 x i64> %val, i32 0
+ store i64 %element, i64 *%ptr
+ ret void
+}
+
+; Test v2i64 extraction from the last element.
+define void @f20(<2 x i64> %val, i64 *%ptr) {
+; CHECK-LABEL: f20:
+; CHECK: vsteg %v24, 0(%r2), 1
+; CHECK: br %r14
+ %element = extractelement <2 x i64> %val, i32 1
+ store i64 %element, i64 *%ptr
+ ret void
+}
+
+; Test v2i64 extraction of an invalid element. This must compile,
+; but we don't care what it does.
+define void @f21(<2 x i64> %val, i64 *%ptr) {
+; CHECK-LABEL: f21:
+; CHECK-NOT: vsteg %v24, 0(%r2), 2
+; CHECK: br %r14
+ %element = extractelement <2 x i64> %val, i32 2
+ store i64 %element, i64 *%ptr
+ ret void
+}
+
+; Test v2i64 extraction with the highest in-range offset.
+define void @f22(<2 x i64> %val, i64 *%base) {
+; CHECK-LABEL: f22:
+; CHECK: vsteg %v24, 4088(%r2), 1
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i32 511
+ %element = extractelement <2 x i64> %val, i32 1
+ store i64 %element, i64 *%ptr
+ ret void
+}
+
+; Test v2i64 extraction with the first ouf-of-range offset.
+define void @f23(<2 x i64> %val, i64 *%base) {
+; CHECK-LABEL: f23:
+; CHECK: aghi %r2, 4096
+; CHECK: vsteg %v24, 0(%r2), 0
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i32 512
+ %element = extractelement <2 x i64> %val, i32 0
+ store i64 %element, i64 *%ptr
+ ret void
+}
+
+; Test v2i64 extraction from a variable element.
+define void @f24(<2 x i64> %val, i64 *%ptr, i32 %index) {
+; CHECK-LABEL: f24:
+; CHECK-NOT: vsteg
+; CHECK: br %r14
+ %element = extractelement <2 x i64> %val, i32 %index
+ store i64 %element, i64 *%ptr
+ ret void
+}
+
+; Test v4f32 extraction from the first element.
+define void @f25(<4 x float> %val, float *%ptr) {
+; CHECK-LABEL: f25:
+; CHECK: vstef %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = extractelement <4 x float> %val, i32 0
+ store float %element, float *%ptr
+ ret void
+}
+
+; Test v4f32 extraction from the last element.
+define void @f26(<4 x float> %val, float *%ptr) {
+; CHECK-LABEL: f26:
+; CHECK: vstef %v24, 0(%r2), 3
+; CHECK: br %r14
+ %element = extractelement <4 x float> %val, i32 3
+ store float %element, float *%ptr
+ ret void
+}
+
+; Test v4f32 extraction of an invalid element. This must compile,
+; but we don't care what it does.
+define void @f27(<4 x float> %val, float *%ptr) {
+; CHECK-LABEL: f27:
+; CHECK-NOT: vstef %v24, 0(%r2), 4
+; CHECK: br %r14
+ %element = extractelement <4 x float> %val, i32 4
+ store float %element, float *%ptr
+ ret void
+}
+
+; Test v4f32 extraction with the highest in-range offset.
+define void @f28(<4 x float> %val, float *%base) {
+; CHECK-LABEL: f28:
+; CHECK: vstef %v24, 4092(%r2), 2
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i32 1023
+ %element = extractelement <4 x float> %val, i32 2
+ store float %element, float *%ptr
+ ret void
+}
+
+; Test v4f32 extraction with the first ouf-of-range offset.
+define void @f29(<4 x float> %val, float *%base) {
+; CHECK-LABEL: f29:
+; CHECK: aghi %r2, 4096
+; CHECK: vstef %v24, 0(%r2), 1
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i32 1024
+ %element = extractelement <4 x float> %val, i32 1
+ store float %element, float *%ptr
+ ret void
+}
+
+; Test v4f32 extraction from a variable element.
+define void @f30(<4 x float> %val, float *%ptr, i32 %index) {
+; CHECK-LABEL: f30:
+; CHECK-NOT: vstef
+; CHECK: br %r14
+ %element = extractelement <4 x float> %val, i32 %index
+ store float %element, float *%ptr
+ ret void
+}
+
+; Test v2f64 extraction from the first element.
+define void @f32(<2 x double> %val, double *%ptr) {
+; CHECK-LABEL: f32:
+; CHECK: vsteg %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = extractelement <2 x double> %val, i32 0
+ store double %element, double *%ptr
+ ret void
+}
+
+; Test v2f64 extraction from the last element.
+define void @f33(<2 x double> %val, double *%ptr) {
+; CHECK-LABEL: f33:
+; CHECK: vsteg %v24, 0(%r2), 1
+; CHECK: br %r14
+ %element = extractelement <2 x double> %val, i32 1
+ store double %element, double *%ptr
+ ret void
+}
+
+; Test v2f64 extraction with the highest in-range offset.
+define void @f34(<2 x double> %val, double *%base) {
+; CHECK-LABEL: f34:
+; CHECK: vsteg %v24, 4088(%r2), 1
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i32 511
+ %element = extractelement <2 x double> %val, i32 1
+ store double %element, double *%ptr
+ ret void
+}
+
+; Test v2f64 extraction with the first ouf-of-range offset.
+define void @f35(<2 x double> %val, double *%base) {
+; CHECK-LABEL: f35:
+; CHECK: aghi %r2, 4096
+; CHECK: vsteg %v24, 0(%r2), 0
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i32 512
+ %element = extractelement <2 x double> %val, i32 0
+ store double %element, double *%ptr
+ ret void
+}
+
+; Test v2f64 extraction from a variable element.
+define void @f36(<2 x double> %val, double *%ptr, i32 %index) {
+; CHECK-LABEL: f36:
+; CHECK-NOT: vsteg
+; CHECK: br %r14
+ %element = extractelement <2 x double> %val, i32 %index
+ store double %element, double *%ptr
+ ret void
+}
+
+; Test a v4i32 scatter of the first element.
+define void @f37(<4 x i32> %val, <4 x i32> %index, i64 %base) {
+; CHECK-LABEL: f37:
+; CHECK: vscef %v24, 0(%v26,%r2), 0
+; CHECK: br %r14
+ %elem = extractelement <4 x i32> %index, i32 0
+ %ext = zext i32 %elem to i64
+ %add = add i64 %base, %ext
+ %ptr = inttoptr i64 %add to i32 *
+ %element = extractelement <4 x i32> %val, i32 0
+ store i32 %element, i32 *%ptr
+ ret void
+}
+
+; Test a v4i32 scatter of the last element.
+define void @f38(<4 x i32> %val, <4 x i32> %index, i64 %base) {
+; CHECK-LABEL: f38:
+; CHECK: vscef %v24, 0(%v26,%r2), 3
+; CHECK: br %r14
+ %elem = extractelement <4 x i32> %index, i32 3
+ %ext = zext i32 %elem to i64
+ %add = add i64 %base, %ext
+ %ptr = inttoptr i64 %add to i32 *
+ %element = extractelement <4 x i32> %val, i32 3
+ store i32 %element, i32 *%ptr
+ ret void
+}
+
+; Test a v4i32 scatter with the highest in-range offset.
+define void @f39(<4 x i32> %val, <4 x i32> %index, i64 %base) {
+; CHECK-LABEL: f39:
+; CHECK: vscef %v24, 4095(%v26,%r2), 1
+; CHECK: br %r14
+ %elem = extractelement <4 x i32> %index, i32 1
+ %ext = zext i32 %elem to i64
+ %add1 = add i64 %base, %ext
+ %add2 = add i64 %add1, 4095
+ %ptr = inttoptr i64 %add2 to i32 *
+ %element = extractelement <4 x i32> %val, i32 1
+ store i32 %element, i32 *%ptr
+ ret void
+}
+
+; Test a v2i64 scatter of the first element.
+define void @f40(<2 x i64> %val, <2 x i64> %index, i64 %base) {
+; CHECK-LABEL: f40:
+; CHECK: vsceg %v24, 0(%v26,%r2), 0
+; CHECK: br %r14
+ %elem = extractelement <2 x i64> %index, i32 0
+ %add = add i64 %base, %elem
+ %ptr = inttoptr i64 %add to i64 *
+ %element = extractelement <2 x i64> %val, i32 0
+ store i64 %element, i64 *%ptr
+ ret void
+}
+
+; Test a v2i64 scatter of the last element.
+define void @f41(<2 x i64> %val, <2 x i64> %index, i64 %base) {
+; CHECK-LABEL: f41:
+; CHECK: vsceg %v24, 0(%v26,%r2), 1
+; CHECK: br %r14
+ %elem = extractelement <2 x i64> %index, i32 1
+ %add = add i64 %base, %elem
+ %ptr = inttoptr i64 %add to i64 *
+ %element = extractelement <2 x i64> %val, i32 1
+ store i64 %element, i64 *%ptr
+ ret void
+}
+
+; Test a v4f32 scatter of the first element.
+define void @f42(<4 x float> %val, <4 x i32> %index, i64 %base) {
+; CHECK-LABEL: f42:
+; CHECK: vscef %v24, 0(%v26,%r2), 0
+; CHECK: br %r14
+ %elem = extractelement <4 x i32> %index, i32 0
+ %ext = zext i32 %elem to i64
+ %add = add i64 %base, %ext
+ %ptr = inttoptr i64 %add to float *
+ %element = extractelement <4 x float> %val, i32 0
+ store float %element, float *%ptr
+ ret void
+}
+
+; Test a v4f32 scatter of the last element.
+define void @f43(<4 x float> %val, <4 x i32> %index, i64 %base) {
+; CHECK-LABEL: f43:
+; CHECK: vscef %v24, 0(%v26,%r2), 3
+; CHECK: br %r14
+ %elem = extractelement <4 x i32> %index, i32 3
+ %ext = zext i32 %elem to i64
+ %add = add i64 %base, %ext
+ %ptr = inttoptr i64 %add to float *
+ %element = extractelement <4 x float> %val, i32 3
+ store float %element, float *%ptr
+ ret void
+}
+
+; Test a v2f64 scatter of the first element.
+define void @f44(<2 x double> %val, <2 x i64> %index, i64 %base) {
+; CHECK-LABEL: f44:
+; CHECK: vsceg %v24, 0(%v26,%r2), 0
+; CHECK: br %r14
+ %elem = extractelement <2 x i64> %index, i32 0
+ %add = add i64 %base, %elem
+ %ptr = inttoptr i64 %add to double *
+ %element = extractelement <2 x double> %val, i32 0
+ store double %element, double *%ptr
+ ret void
+}
+
+; Test a v2f64 scatter of the last element.
+define void @f45(<2 x double> %val, <2 x i64> %index, i64 %base) {
+; CHECK-LABEL: f45:
+; CHECK: vsceg %v24, 0(%v26,%r2), 1
+; CHECK: br %r14
+ %elem = extractelement <2 x i64> %index, i32 1
+ %add = add i64 %base, %elem
+ %ptr = inttoptr i64 %add to double *
+ %element = extractelement <2 x double> %val, i32 1
+ store double %element, double *%ptr
+ ret void
+}
diff --git a/test/CodeGen/SystemZ/vec-move-11.ll b/test/CodeGen/SystemZ/vec-move-11.ll
new file mode 100644
index 000000000000..fd9c3d3559f0
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-move-11.ll
@@ -0,0 +1,111 @@
+; Test insertions of register values into a nonzero index of an undef.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test v16i8 insertion into an undef, with an arbitrary index.
+define <16 x i8> @f1(i8 %val) {
+; CHECK-LABEL: f1:
+; CHECK: vlvgb %v24, %r2, 12
+; CHECK-NEXT: br %r14
+ %ret = insertelement <16 x i8> undef, i8 %val, i32 12
+ ret <16 x i8> %ret
+}
+
+; Test v16i8 insertion into an undef, with the first good index for VLVGP.
+define <16 x i8> @f2(i8 %val) {
+; CHECK-LABEL: f2:
+; CHECK: vlvgp %v24, %r2, %r2
+; CHECK-NEXT: br %r14
+ %ret = insertelement <16 x i8> undef, i8 %val, i32 7
+ ret <16 x i8> %ret
+}
+
+; Test v16i8 insertion into an undef, with the second good index for VLVGP.
+define <16 x i8> @f3(i8 %val) {
+; CHECK-LABEL: f3:
+; CHECK: vlvgp %v24, %r2, %r2
+; CHECK-NEXT: br %r14
+ %ret = insertelement <16 x i8> undef, i8 %val, i32 15
+ ret <16 x i8> %ret
+}
+
+; Test v8i16 insertion into an undef, with an arbitrary index.
+define <8 x i16> @f4(i16 %val) {
+; CHECK-LABEL: f4:
+; CHECK: vlvgh %v24, %r2, 5
+; CHECK-NEXT: br %r14
+ %ret = insertelement <8 x i16> undef, i16 %val, i32 5
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion into an undef, with the first good index for VLVGP.
+define <8 x i16> @f5(i16 %val) {
+; CHECK-LABEL: f5:
+; CHECK: vlvgp %v24, %r2, %r2
+; CHECK-NEXT: br %r14
+ %ret = insertelement <8 x i16> undef, i16 %val, i32 3
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion into an undef, with the second good index for VLVGP.
+define <8 x i16> @f6(i16 %val) {
+; CHECK-LABEL: f6:
+; CHECK: vlvgp %v24, %r2, %r2
+; CHECK-NEXT: br %r14
+ %ret = insertelement <8 x i16> undef, i16 %val, i32 7
+ ret <8 x i16> %ret
+}
+
+; Test v4i32 insertion into an undef, with an arbitrary index.
+define <4 x i32> @f7(i32 %val) {
+; CHECK-LABEL: f7:
+; CHECK: vlvgf %v24, %r2, 2
+; CHECK-NEXT: br %r14
+ %ret = insertelement <4 x i32> undef, i32 %val, i32 2
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion into an undef, with the first good index for VLVGP.
+define <4 x i32> @f8(i32 %val) {
+; CHECK-LABEL: f8:
+; CHECK: vlvgp %v24, %r2, %r2
+; CHECK-NEXT: br %r14
+ %ret = insertelement <4 x i32> undef, i32 %val, i32 1
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion into an undef, with the second good index for VLVGP.
+define <4 x i32> @f9(i32 %val) {
+; CHECK-LABEL: f9:
+; CHECK: vlvgp %v24, %r2, %r2
+; CHECK-NEXT: br %r14
+ %ret = insertelement <4 x i32> undef, i32 %val, i32 3
+ ret <4 x i32> %ret
+}
+
+; Test v2i64 insertion into an undef.
+define <2 x i64> @f10(i64 %val) {
+; CHECK-LABEL: f10:
+; CHECK: vlvgp %v24, %r2, %r2
+; CHECK-NEXT: br %r14
+ %ret = insertelement <2 x i64> undef, i64 %val, i32 1
+ ret <2 x i64> %ret
+}
+
+; Test v4f32 insertion into an undef.
+define <4 x float> @f11(float %val) {
+; CHECK-LABEL: f11:
+; CHECK: vrepf %v24, %v0, 0
+; CHECK: br %r14
+ %ret = insertelement <4 x float> undef, float %val, i32 2
+ ret <4 x float> %ret
+}
+
+; Test v2f64 insertion into an undef.
+define <2 x double> @f12(double %val) {
+; CHECK-LABEL: f12:
+; CHECK: vrepg %v24, %v0, 0
+; CHECK: br %r14
+ %ret = insertelement <2 x double> undef, double %val, i32 1
+ ret <2 x double> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-move-12.ll b/test/CodeGen/SystemZ/vec-move-12.ll
new file mode 100644
index 000000000000..bc8ff97f8057
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-move-12.ll
@@ -0,0 +1,123 @@
+; Test insertions of memory values into a nonzero index of an undef.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test v16i8 insertion into an undef, with an arbitrary index.
+define <16 x i8> @f1(i8 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vlrepb %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load i8, i8 *%ptr
+ %ret = insertelement <16 x i8> undef, i8 %val, i32 12
+ ret <16 x i8> %ret
+}
+
+; Test v16i8 insertion into an undef, with the first good index for VLVGP.
+define <16 x i8> @f2(i8 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: {{vlrepb|vllezb}} %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load i8, i8 *%ptr
+ %ret = insertelement <16 x i8> undef, i8 %val, i32 7
+ ret <16 x i8> %ret
+}
+
+; Test v16i8 insertion into an undef, with the second good index for VLVGP.
+define <16 x i8> @f3(i8 *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: vlrepb %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load i8, i8 *%ptr
+ %ret = insertelement <16 x i8> undef, i8 %val, i32 15
+ ret <16 x i8> %ret
+}
+
+; Test v8i16 insertion into an undef, with an arbitrary index.
+define <8 x i16> @f4(i16 *%ptr) {
+; CHECK-LABEL: f4:
+; CHECK: vlreph %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load i16, i16 *%ptr
+ %ret = insertelement <8 x i16> undef, i16 %val, i32 5
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion into an undef, with the first good index for VLVGP.
+define <8 x i16> @f5(i16 *%ptr) {
+; CHECK-LABEL: f5:
+; CHECK: {{vlreph|vllezh}} %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load i16, i16 *%ptr
+ %ret = insertelement <8 x i16> undef, i16 %val, i32 3
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion into an undef, with the second good index for VLVGP.
+define <8 x i16> @f6(i16 *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: vlreph %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load i16, i16 *%ptr
+ %ret = insertelement <8 x i16> undef, i16 %val, i32 7
+ ret <8 x i16> %ret
+}
+
+; Test v4i32 insertion into an undef, with an arbitrary index.
+define <4 x i32> @f7(i32 *%ptr) {
+; CHECK-LABEL: f7:
+; CHECK: vlrepf %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load i32, i32 *%ptr
+ %ret = insertelement <4 x i32> undef, i32 %val, i32 2
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion into an undef, with the first good index for VLVGP.
+define <4 x i32> @f8(i32 *%ptr) {
+; CHECK-LABEL: f8:
+; CHECK: {{vlrepf|vllezf}} %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load i32, i32 *%ptr
+ %ret = insertelement <4 x i32> undef, i32 %val, i32 1
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion into an undef, with the second good index for VLVGP.
+define <4 x i32> @f9(i32 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: vlrepf %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load i32, i32 *%ptr
+ %ret = insertelement <4 x i32> undef, i32 %val, i32 3
+ ret <4 x i32> %ret
+}
+
+; Test v2i64 insertion into an undef.
+define <2 x i64> @f10(i64 *%ptr) {
+; CHECK-LABEL: f10:
+; CHECK: vlrepg %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load i64, i64 *%ptr
+ %ret = insertelement <2 x i64> undef, i64 %val, i32 1
+ ret <2 x i64> %ret
+}
+
+; Test v4f32 insertion into an undef.
+define <4 x float> @f11(float *%ptr) {
+; CHECK-LABEL: f11:
+; CHECK: vlrepf %v24, 0(%r2)
+; CHECK: br %r14
+ %val = load float, float *%ptr
+ %ret = insertelement <4 x float> undef, float %val, i32 2
+ ret <4 x float> %ret
+}
+
+; Test v2f64 insertion into an undef.
+define <2 x double> @f12(double *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: vlrepg %v24, 0(%r2)
+; CHECK: br %r14
+ %val = load double, double *%ptr
+ %ret = insertelement <2 x double> undef, double %val, i32 1
+ ret <2 x double> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-move-13.ll b/test/CodeGen/SystemZ/vec-move-13.ll
new file mode 100644
index 000000000000..165c3498702f
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-move-13.ll
@@ -0,0 +1,69 @@
+; Test insertions of register values into 0.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test v16i8 insertion into 0.
+define <16 x i8> @f1(i8 %val1, i8 %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vgbm %v24, 0
+; CHECK-DAG: vlvgb %v24, %r2, 2
+; CHECK-DAG: vlvgb %v24, %r3, 12
+; CHECK: br %r14
+ %vec1 = insertelement <16 x i8> zeroinitializer, i8 %val1, i32 2
+ %vec2 = insertelement <16 x i8> %vec1, i8 %val2, i32 12
+ ret <16 x i8> %vec2
+}
+
+; Test v8i16 insertion into 0.
+define <8 x i16> @f2(i16 %val1, i16 %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vgbm %v24, 0
+; CHECK-DAG: vlvgh %v24, %r2, 3
+; CHECK-DAG: vlvgh %v24, %r3, 5
+; CHECK: br %r14
+ %vec1 = insertelement <8 x i16> zeroinitializer, i16 %val1, i32 3
+ %vec2 = insertelement <8 x i16> %vec1, i16 %val2, i32 5
+ ret <8 x i16> %vec2
+}
+
+; Test v4i32 insertion into 0.
+define <4 x i32> @f3(i32 %val) {
+; CHECK-LABEL: f3:
+; CHECK: vgbm %v24, 0
+; CHECK: vlvgf %v24, %r2, 3
+; CHECK: br %r14
+ %ret = insertelement <4 x i32> zeroinitializer, i32 %val, i32 3
+ ret <4 x i32> %ret
+}
+
+; Test v2i64 insertion into 0.
+define <2 x i64> @f4(i64 %val) {
+; CHECK-LABEL: f4:
+; CHECK: lghi [[REG:%r[0-5]]], 0
+; CHECK: vlvgp %v24, [[REG]], %r2
+; CHECK: br %r14
+ %ret = insertelement <2 x i64> zeroinitializer, i64 %val, i32 1
+ ret <2 x i64> %ret
+}
+
+; Test v4f32 insertion into 0.
+define <4 x float> @f5(float %val) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: vuplhf [[REG:%v[0-9]+]], %v0
+; CHECK-DAG: vgbm [[ZERO:%v[0-9]+]], 0
+; CHECK: vmrhg %v24, [[ZERO]], [[REG]]
+; CHECK: br %r14
+ %ret = insertelement <4 x float> zeroinitializer, float %val, i32 3
+ ret <4 x float> %ret
+}
+
+; Test v2f64 insertion into 0.
+define <2 x double> @f6(double %val) {
+; CHECK-LABEL: f6:
+; CHECK: vgbm [[REG:%v[0-9]+]], 0
+; CHECK: vmrhg %v24, [[REG]], %v0
+; CHECK: br %r14
+ %ret = insertelement <2 x double> zeroinitializer, double %val, i32 1
+ ret <2 x double> %ret
+}
+
diff --git a/test/CodeGen/SystemZ/vec-move-14.ll b/test/CodeGen/SystemZ/vec-move-14.ll
new file mode 100644
index 000000000000..e41eb9da0346
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-move-14.ll
@@ -0,0 +1,96 @@
+; Test insertions of memory values into 0.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test VLLEZB.
+define <16 x i8> @f1(i8 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vllezb %v24, 0(%r2)
+; CHECK: br %r14
+ %val = load i8, i8 *%ptr
+ %ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7
+ ret <16 x i8> %ret
+}
+
+; Test VLLEZB with the highest in-range offset.
+define <16 x i8> @f2(i8 *%base) {
+; CHECK-LABEL: f2:
+; CHECK: vllezb %v24, 4095(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 4095
+ %val = load i8, i8 *%ptr
+ %ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7
+ ret <16 x i8> %ret
+}
+
+; Test VLLEZB with the next highest offset.
+define <16 x i8> @f3(i8 *%base) {
+; CHECK-LABEL: f3:
+; CHECK-NOT: vllezb %v24, 4096(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 4096
+ %val = load i8, i8 *%ptr
+ %ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7
+ ret <16 x i8> %ret
+}
+
+; Test that VLLEZB allows an index.
+define <16 x i8> @f4(i8 *%base, i64 %index) {
+; CHECK-LABEL: f4:
+; CHECK: vllezb %v24, 0({{%r2,%r3|%r3,%r2}})
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 %index
+ %val = load i8, i8 *%ptr
+ %ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7
+ ret <16 x i8> %ret
+}
+
+; Test VLLEZH.
+define <8 x i16> @f5(i16 *%ptr) {
+; CHECK-LABEL: f5:
+; CHECK: vllezh %v24, 0(%r2)
+; CHECK: br %r14
+ %val = load i16, i16 *%ptr
+ %ret = insertelement <8 x i16> zeroinitializer, i16 %val, i32 3
+ ret <8 x i16> %ret
+}
+
+; Test VLLEZF.
+define <4 x i32> @f6(i32 *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: vllezf %v24, 0(%r2)
+; CHECK: br %r14
+ %val = load i32, i32 *%ptr
+ %ret = insertelement <4 x i32> zeroinitializer, i32 %val, i32 1
+ ret <4 x i32> %ret
+}
+
+; Test VLLEZG.
+define <2 x i64> @f7(i64 *%ptr) {
+; CHECK-LABEL: f7:
+; CHECK: vllezg %v24, 0(%r2)
+; CHECK: br %r14
+ %val = load i64, i64 *%ptr
+ %ret = insertelement <2 x i64> zeroinitializer, i64 %val, i32 0
+ ret <2 x i64> %ret
+}
+
+; Test VLLEZF with a float.
+define <4 x float> @f8(float *%ptr) {
+; CHECK-LABEL: f8:
+; CHECK: vllezf %v24, 0(%r2)
+; CHECK: br %r14
+ %val = load float, float *%ptr
+ %ret = insertelement <4 x float> zeroinitializer, float %val, i32 1
+ ret <4 x float> %ret
+}
+
+; Test VLLEZG with a double.
+define <2 x double> @f9(double *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: vllezg %v24, 0(%r2)
+; CHECK: br %r14
+ %val = load double, double *%ptr
+ %ret = insertelement <2 x double> zeroinitializer, double %val, i32 0
+ ret <2 x double> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-move-15.ll b/test/CodeGen/SystemZ/vec-move-15.ll
new file mode 100644
index 000000000000..503627c163c6
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-move-15.ll
@@ -0,0 +1,105 @@
+; Test vector sign-extending loads.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i1->v16i8 extension.
+define <16 x i8> @f1(<16 x i1> *%ptr) {
+; No expected output, but must compile.
+ %val = load <16 x i1>, <16 x i1> *%ptr
+ %ret = sext <16 x i1> %val to <16 x i8>
+ ret <16 x i8> %ret
+}
+
+; Test a v8i1->v8i16 extension.
+define <8 x i16> @f2(<8 x i1> *%ptr) {
+; No expected output, but must compile.
+ %val = load <8 x i1>, <8 x i1> *%ptr
+ %ret = sext <8 x i1> %val to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; Test a v8i8->v8i16 extension.
+define <8 x i16> @f3(<8 x i8> *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK: vuphb %v24, [[REG1]]
+; CHECK: br %r14
+ %val = load <8 x i8>, <8 x i8> *%ptr
+ %ret = sext <8 x i8> %val to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; Test a v4i1->v4i32 extension.
+define <4 x i32> @f4(<4 x i1> *%ptr) {
+; No expected output, but must compile.
+ %val = load <4 x i1>, <4 x i1> *%ptr
+ %ret = sext <4 x i1> %val to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test a v4i8->v4i32 extension.
+define <4 x i32> @f5(<4 x i8> *%ptr) {
+; CHECK-LABEL: f5:
+; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK: vuphb [[REG2:%v[0-9]+]], [[REG1]]
+; CHECK: vuphh %v24, [[REG2]]
+; CHECK: br %r14
+ %val = load <4 x i8>, <4 x i8> *%ptr
+ %ret = sext <4 x i8> %val to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test a v4i16->v4i32 extension.
+define <4 x i32> @f6(<4 x i16> *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK: vuphh %v24, [[REG1]]
+; CHECK: br %r14
+ %val = load <4 x i16>, <4 x i16> *%ptr
+ %ret = sext <4 x i16> %val to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test a v2i1->v2i64 extension.
+define <2 x i64> @f7(<2 x i1> *%ptr) {
+; No expected output, but must compile.
+ %val = load <2 x i1>, <2 x i1> *%ptr
+ %ret = sext <2 x i1> %val to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test a v2i8->v2i64 extension.
+define <2 x i64> @f8(<2 x i8> *%ptr) {
+; CHECK-LABEL: f8:
+; CHECK: vlreph [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK: vuphb [[REG2:%v[0-9]+]], [[REG1]]
+; CHECK: vuphh [[REG3:%v[0-9]+]], [[REG2]]
+; CHECK: vuphf %v24, [[REG3]]
+; CHECK: br %r14
+ %val = load <2 x i8>, <2 x i8> *%ptr
+ %ret = sext <2 x i8> %val to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test a v2i16->v2i64 extension.
+define <2 x i64> @f9(<2 x i16> *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK: vuphh [[REG2:%v[0-9]+]], [[REG1]]
+; CHECK: vuphf %v24, [[REG2]]
+; CHECK: br %r14
+ %val = load <2 x i16>, <2 x i16> *%ptr
+ %ret = sext <2 x i16> %val to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test a v2i32->v2i64 extension.
+define <2 x i64> @f10(<2 x i32> *%ptr) {
+; CHECK-LABEL: f10:
+; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK: vuphf %v24, [[REG1]]
+; CHECK: br %r14
+ %val = load <2 x i32>, <2 x i32> *%ptr
+ %ret = sext <2 x i32> %val to <2 x i64>
+ ret <2 x i64> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-move-16.ll b/test/CodeGen/SystemZ/vec-move-16.ll
new file mode 100644
index 000000000000..cd2577396800
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-move-16.ll
@@ -0,0 +1,105 @@
+; Test vector zero-extending loads.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i1->v16i8 extension.
+define <16 x i8> @f1(<16 x i1> *%ptr) {
+; No expected output, but must compile.
+ %val = load <16 x i1>, <16 x i1> *%ptr
+ %ret = zext <16 x i1> %val to <16 x i8>
+ ret <16 x i8> %ret
+}
+
+; Test a v8i1->v8i16 extension.
+define <8 x i16> @f2(<8 x i1> *%ptr) {
+; No expected output, but must compile.
+ %val = load <8 x i1>, <8 x i1> *%ptr
+ %ret = zext <8 x i1> %val to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; Test a v8i8->v8i16 extension.
+define <8 x i16> @f3(<8 x i8> *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK: vuplhb %v24, [[REG1]]
+; CHECK: br %r14
+ %val = load <8 x i8>, <8 x i8> *%ptr
+ %ret = zext <8 x i8> %val to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; Test a v4i1->v4i32 extension.
+define <4 x i32> @f4(<4 x i1> *%ptr) {
+; No expected output, but must compile.
+ %val = load <4 x i1>, <4 x i1> *%ptr
+ %ret = zext <4 x i1> %val to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test a v4i8->v4i32 extension.
+define <4 x i32> @f5(<4 x i8> *%ptr) {
+; CHECK-LABEL: f5:
+; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK: vuplhb [[REG2:%v[0-9]+]], [[REG1]]
+; CHECK: vuplhh %v24, [[REG2]]
+; CHECK: br %r14
+ %val = load <4 x i8>, <4 x i8> *%ptr
+ %ret = zext <4 x i8> %val to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test a v4i16->v4i32 extension.
+define <4 x i32> @f6(<4 x i16> *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK: vuplhh %v24, [[REG1]]
+; CHECK: br %r14
+ %val = load <4 x i16>, <4 x i16> *%ptr
+ %ret = zext <4 x i16> %val to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test a v2i1->v2i64 extension.
+define <2 x i64> @f7(<2 x i1> *%ptr) {
+; No expected output, but must compile.
+ %val = load <2 x i1>, <2 x i1> *%ptr
+ %ret = zext <2 x i1> %val to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test a v2i8->v2i64 extension.
+define <2 x i64> @f8(<2 x i8> *%ptr) {
+; CHECK-LABEL: f8:
+; CHECK: vlreph [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK: vuplhb [[REG2:%v[0-9]+]], [[REG1]]
+; CHECK: vuplhh [[REG3:%v[0-9]+]], [[REG2]]
+; CHECK: vuplhf %v24, [[REG3]]
+; CHECK: br %r14
+ %val = load <2 x i8>, <2 x i8> *%ptr
+ %ret = zext <2 x i8> %val to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test a v2i16->v2i64 extension.
+define <2 x i64> @f9(<2 x i16> *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK: vuplhh [[REG2:%v[0-9]+]], [[REG1]]
+; CHECK: vuplhf %v24, [[REG2]]
+; CHECK: br %r14
+ %val = load <2 x i16>, <2 x i16> *%ptr
+ %ret = zext <2 x i16> %val to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test a v2i32->v2i64 extension.
+define <2 x i64> @f10(<2 x i32> *%ptr) {
+; CHECK-LABEL: f10:
+; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK: vuplhf %v24, [[REG1]]
+; CHECK: br %r14
+ %val = load <2 x i32>, <2 x i32> *%ptr
+ %ret = zext <2 x i32> %val to <2 x i64>
+ ret <2 x i64> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-move-17.ll b/test/CodeGen/SystemZ/vec-move-17.ll
new file mode 100644
index 000000000000..e7fc06c9260c
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-move-17.ll
@@ -0,0 +1,104 @@
+; Test vector truncating stores.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i8->v16i1 truncation.
+define void @f1(<16 x i8> %val, <16 x i1> *%ptr) {
+; No expected output, but must compile.
+ %trunc = trunc <16 x i8> %val to <16 x i1>
+ store <16 x i1> %trunc, <16 x i1> *%ptr
+ ret void
+}
+
+; Test a v8i16->v8i1 truncation.
+define void @f2(<8 x i16> %val, <8 x i1> *%ptr) {
+; No expected output, but must compile.
+ %trunc = trunc <8 x i16> %val to <8 x i1>
+ store <8 x i1> %trunc, <8 x i1> *%ptr
+ ret void
+}
+
+; Test a v8i16->v8i8 truncation.
+define void @f3(<8 x i16> %val, <8 x i8> *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: vpkh [[REG1:%v[0-9]+]], %v24, %v24
+; CHECK: vsteg [[REG1]], 0(%r2)
+; CHECK: br %r14
+ %trunc = trunc <8 x i16> %val to <8 x i8>
+ store <8 x i8> %trunc, <8 x i8> *%ptr
+ ret void
+}
+
+; Test a v4i32->v4i1 truncation.
+define void @f4(<4 x i32> %val, <4 x i1> *%ptr) {
+; No expected output, but must compile.
+ %trunc = trunc <4 x i32> %val to <4 x i1>
+ store <4 x i1> %trunc, <4 x i1> *%ptr
+ ret void
+}
+
+; Test a v4i32->v4i8 truncation. At the moment we use a VPERM rather than
+; a chain of packs.
+define void @f5(<4 x i32> %val, <4 x i8> *%ptr) {
+; CHECK-LABEL: f5:
+; CHECK: vperm [[REG:%v[0-9]+]],
+; CHECK: vstef [[REG]], 0(%r2)
+; CHECK: br %r14
+ %trunc = trunc <4 x i32> %val to <4 x i8>
+ store <4 x i8> %trunc, <4 x i8> *%ptr
+ ret void
+}
+
+; Test a v4i32->v4i16 truncation.
+define void @f6(<4 x i32> %val, <4 x i16> *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: vpkf [[REG1:%v[0-9]+]], %v24, %v24
+; CHECK: vsteg [[REG1]], 0(%r2)
+; CHECK: br %r14
+ %trunc = trunc <4 x i32> %val to <4 x i16>
+ store <4 x i16> %trunc, <4 x i16> *%ptr
+ ret void
+}
+
+; Test a v2i64->v2i1 truncation.
+define void @f7(<2 x i64> %val, <2 x i1> *%ptr) {
+; No expected output, but must compile.
+ %trunc = trunc <2 x i64> %val to <2 x i1>
+ store <2 x i1> %trunc, <2 x i1> *%ptr
+ ret void
+}
+
+; Test a v2i64->v2i8 truncation. At the moment we use a VPERM rather than
+; a chain of packs.
+define void @f8(<2 x i64> %val, <2 x i8> *%ptr) {
+; CHECK-LABEL: f8:
+; CHECK: vperm [[REG:%v[0-9]+]],
+; CHECK: vsteh [[REG]], 0(%r2)
+; CHECK: br %r14
+ %trunc = trunc <2 x i64> %val to <2 x i8>
+ store <2 x i8> %trunc, <2 x i8> *%ptr
+ ret void
+}
+
+; Test a v2i64->v2i16 truncation. At the moment we use a VPERM rather than
+; a chain of packs.
+define void @f9(<2 x i64> %val, <2 x i16> *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: vperm [[REG:%v[0-9]+]],
+; CHECK: vstef [[REG]], 0(%r2)
+; CHECK: br %r14
+ %trunc = trunc <2 x i64> %val to <2 x i16>
+ store <2 x i16> %trunc, <2 x i16> *%ptr
+ ret void
+}
+
+; Test a v2i64->v2i32 truncation.
+define void @f10(<2 x i64> %val, <2 x i32> *%ptr) {
+; CHECK-LABEL: f10:
+; CHECK: vpkg [[REG1:%v[0-9]+]], %v24, %v24
+; CHECK: vsteg [[REG1]], 0(%r2)
+; CHECK: br %r14
+ %trunc = trunc <2 x i64> %val to <2 x i32>
+ store <2 x i32> %trunc, <2 x i32> *%ptr
+ ret void
+}
diff --git a/test/CodeGen/SystemZ/vec-mul-01.ll b/test/CodeGen/SystemZ/vec-mul-01.ll
new file mode 100644
index 000000000000..5ecc30d4427a
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-mul-01.ll
@@ -0,0 +1,60 @@
+; Test vector multiplication.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i8 multiplication.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vmlb %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = mul <16 x i8> %val1, %val2
+ ret <16 x i8> %ret
+}
+
+; Test a v8i16 multiplication.
+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vmlhw %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = mul <8 x i16> %val1, %val2
+ ret <8 x i16> %ret
+}
+
+; Test a v4i32 multiplication.
+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vmlf %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = mul <4 x i32> %val1, %val2
+ ret <4 x i32> %ret
+}
+
+; Test a v2i64 multiplication. There's no vector equivalent.
+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f4:
+; CHECK-NOT: vmlg
+; CHECK: br %r14
+ %ret = mul <2 x i64> %val1, %val2
+ ret <2 x i64> %ret
+}
+
+; Test a v2f64 multiplication.
+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
+ <2 x double> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vfmdb %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = fmul <2 x double> %val1, %val2
+ ret <2 x double> %ret
+}
+
+; Test an f64 multiplication that uses vector registers.
+define double @f6(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: wfmdb %f0, %v24, %v26
+; CHECK: br %r14
+ %scalar1 = extractelement <2 x double> %val1, i32 0
+ %scalar2 = extractelement <2 x double> %val2, i32 0
+ %ret = fmul double %scalar1, %scalar2
+ ret double %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-mul-02.ll b/test/CodeGen/SystemZ/vec-mul-02.ll
new file mode 100644
index 000000000000..11a651e49975
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-mul-02.ll
@@ -0,0 +1,63 @@
+; Test vector multiply-and-add.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
+
+; Test a v16i8 multiply-and-add.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i8> %val3) {
+; CHECK-LABEL: f1:
+; CHECK: vmalb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+ %mul = mul <16 x i8> %val1, %val2
+ %ret = add <16 x i8> %mul, %val3
+ ret <16 x i8> %ret
+}
+
+; Test a v8i16 multiply-and-add.
+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2,
+ <8 x i16> %val3) {
+; CHECK-LABEL: f2:
+; CHECK: vmalhw %v24, %v26, %v28, %v30
+; CHECK: br %r14
+ %mul = mul <8 x i16> %val1, %val2
+ %ret = add <8 x i16> %mul, %val3
+ ret <8 x i16> %ret
+}
+
+; Test a v4i32 multiply-and-add.
+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> %val3) {
+; CHECK-LABEL: f3:
+; CHECK: vmalf %v24, %v26, %v28, %v30
+; CHECK: br %r14
+ %mul = mul <4 x i32> %val1, %val2
+ %ret = add <4 x i32> %mul, %val3
+ ret <4 x i32> %ret
+}
+
+; Test a v2f64 multiply-and-add.
+define <2 x double> @f4(<2 x double> %dummy, <2 x double> %val1,
+ <2 x double> %val2, <2 x double> %val3) {
+; CHECK-LABEL: f4:
+; CHECK: vfmadb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+ %ret = call <2 x double> @llvm.fma.v2f64 (<2 x double> %val1,
+ <2 x double> %val2,
+ <2 x double> %val3)
+ ret <2 x double> %ret
+}
+
+; Test a v2f64 multiply-and-subtract.
+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
+ <2 x double> %val2, <2 x double> %val3) {
+; CHECK-LABEL: f5:
+; CHECK: vfmsdb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+ %negval3 = fsub <2 x double> <double -0.0, double -0.0>, %val3
+ %ret = call <2 x double> @llvm.fma.v2f64 (<2 x double> %val1,
+ <2 x double> %val2,
+ <2 x double> %negval3)
+ ret <2 x double> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-neg-01.ll b/test/CodeGen/SystemZ/vec-neg-01.ll
new file mode 100644
index 000000000000..b1389ce4d6d0
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-neg-01.ll
@@ -0,0 +1,58 @@
+; Test vector negation.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i8 negation.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vlcb %v24, %v26
+; CHECK: br %r14
+ %ret = sub <16 x i8> zeroinitializer, %val
+ ret <16 x i8> %ret
+}
+
+; Test a v8i16 negation.
+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val) {
+; CHECK-LABEL: f2:
+; CHECK: vlch %v24, %v26
+; CHECK: br %r14
+ %ret = sub <8 x i16> zeroinitializer, %val
+ ret <8 x i16> %ret
+}
+
+; Test a v4i32 negation.
+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val) {
+; CHECK-LABEL: f3:
+; CHECK: vlcf %v24, %v26
+; CHECK: br %r14
+ %ret = sub <4 x i32> zeroinitializer, %val
+ ret <4 x i32> %ret
+}
+
+; Test a v2i64 negation.
+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val) {
+; CHECK-LABEL: f4:
+; CHECK: vlcg %v24, %v26
+; CHECK: br %r14
+ %ret = sub <2 x i64> zeroinitializer, %val
+ ret <2 x i64> %ret
+}
+
+; Test a v2f64 negation.
+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vflcdb %v24, %v26
+; CHECK: br %r14
+ %ret = fsub <2 x double> <double -0.0, double -0.0>, %val
+ ret <2 x double> %ret
+}
+
+; Test an f64 negation that uses vector registers.
+define double @f6(<2 x double> %val) {
+; CHECK-LABEL: f6:
+; CHECK: wflcdb %f0, %v24
+; CHECK: br %r14
+ %scalar = extractelement <2 x double> %val, i32 0
+ %ret = fsub double -0.0, %scalar
+ ret double %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-or-01.ll b/test/CodeGen/SystemZ/vec-or-01.ll
new file mode 100644
index 000000000000..789150ad2d1b
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-or-01.ll
@@ -0,0 +1,39 @@
+; Test vector OR.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i8 OR.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vo %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = or <16 x i8> %val1, %val2
+ ret <16 x i8> %ret
+}
+
+; Test a v8i16 OR.
+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vo %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = or <8 x i16> %val1, %val2
+ ret <8 x i16> %ret
+}
+
+; Test a v4i32 OR.
+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vo %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = or <4 x i32> %val1, %val2
+ ret <4 x i32> %ret
+}
+
+; Test a v2i64 OR.
+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vo %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = or <2 x i64> %val1, %val2
+ ret <2 x i64> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-or-02.ll b/test/CodeGen/SystemZ/vec-or-02.ll
new file mode 100644
index 000000000000..eeb86e36ff00
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-or-02.ll
@@ -0,0 +1,107 @@
+; Test vector (or (and X, Z), (and Y, (not Z))) patterns.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test v16i8.
+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2, <16 x i8> %val3) {
+; CHECK-LABEL: f1:
+; CHECK: vsel %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %not = xor <16 x i8> %val3, <i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1>
+ %and1 = and <16 x i8> %val1, %val3
+ %and2 = and <16 x i8> %val2, %not
+ %ret = or <16 x i8> %and1, %and2
+ ret <16 x i8> %ret
+}
+
+; ...and again with the XOR applied to the other operand of the AND.
+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2, <16 x i8> %val3) {
+; CHECK-LABEL: f2:
+; CHECK: vsel %v24, %v26, %v24, %v28
+; CHECK: br %r14
+ %not = xor <16 x i8> %val3, <i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1>
+ %and1 = and <16 x i8> %val1, %not
+ %and2 = and <16 x i8> %val2, %val3
+ %ret = or <16 x i8> %and1, %and2
+ ret <16 x i8> %ret
+}
+
+; Test v8i16.
+define <8 x i16> @f3(<8 x i16> %val1, <8 x i16> %val2, <8 x i16> %val3) {
+; CHECK-LABEL: f3:
+; CHECK: vsel %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %not = xor <8 x i16> %val3, <i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1>
+ %and1 = and <8 x i16> %val1, %val3
+ %and2 = and <8 x i16> %val2, %not
+ %ret = or <8 x i16> %and1, %and2
+ ret <8 x i16> %ret
+}
+
+; ...and again with the XOR applied to the other operand of the AND.
+define <8 x i16> @f4(<8 x i16> %val1, <8 x i16> %val2, <8 x i16> %val3) {
+; CHECK-LABEL: f4:
+; CHECK: vsel %v24, %v26, %v24, %v28
+; CHECK: br %r14
+ %not = xor <8 x i16> %val3, <i16 -1, i16 -1, i16 -1, i16 -1,
+ i16 -1, i16 -1, i16 -1, i16 -1>
+ %and1 = and <8 x i16> %val1, %not
+ %and2 = and <8 x i16> %val2, %val3
+ %ret = or <8 x i16> %and1, %and2
+ ret <8 x i16> %ret
+}
+
+; Test v4i32.
+define <4 x i32> @f5(<4 x i32> %val1, <4 x i32> %val2, <4 x i32> %val3) {
+; CHECK-LABEL: f5:
+; CHECK: vsel %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %not = xor <4 x i32> %val3, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %and1 = and <4 x i32> %val1, %val3
+ %and2 = and <4 x i32> %val2, %not
+ %ret = or <4 x i32> %and1, %and2
+ ret <4 x i32> %ret
+}
+
+; ...and again with the XOR applied to the other operand of the AND.
+define <4 x i32> @f6(<4 x i32> %val1, <4 x i32> %val2, <4 x i32> %val3) {
+; CHECK-LABEL: f6:
+; CHECK: vsel %v24, %v26, %v24, %v28
+; CHECK: br %r14
+ %not = xor <4 x i32> %val3, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %and1 = and <4 x i32> %val1, %not
+ %and2 = and <4 x i32> %val2, %val3
+ %ret = or <4 x i32> %and1, %and2
+ ret <4 x i32> %ret
+}
+
+; Test v2i64.
+define <2 x i64> @f7(<2 x i64> %val1, <2 x i64> %val2, <2 x i64> %val3) {
+; CHECK-LABEL: f7:
+; CHECK: vsel %v24, %v24, %v26, %v28
+; CHECK: br %r14
+ %not = xor <2 x i64> %val3, <i64 -1, i64 -1>
+ %and1 = and <2 x i64> %val1, %val3
+ %and2 = and <2 x i64> %val2, %not
+ %ret = or <2 x i64> %and1, %and2
+ ret <2 x i64> %ret
+}
+
+; ...and again with the XOR applied to the other operand of the AND.
+define <2 x i64> @f8(<2 x i64> %val1, <2 x i64> %val2, <2 x i64> %val3) {
+; CHECK-LABEL: f8:
+; CHECK: vsel %v24, %v26, %v24, %v28
+; CHECK: br %r14
+ %not = xor <2 x i64> %val3, <i64 -1, i64 -1>
+ %and1 = and <2 x i64> %val1, %not
+ %and2 = and <2 x i64> %val2, %val3
+ %ret = or <2 x i64> %and1, %and2
+ ret <2 x i64> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-perm-01.ll b/test/CodeGen/SystemZ/vec-perm-01.ll
new file mode 100644
index 000000000000..4beec05eaece
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-perm-01.ll
@@ -0,0 +1,175 @@
+; Test vector splat.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test v16i8 splat of the first element.
+define <16 x i8> @f1(<16 x i8> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vrepb %v24, %v24, 0
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val, <16 x i8> undef,
+ <16 x i32> zeroinitializer
+ ret <16 x i8> %ret
+}
+
+; Test v16i8 splat of the last element.
+define <16 x i8> @f2(<16 x i8> %val) {
+; CHECK-LABEL: f2:
+; CHECK: vrepb %v24, %v24, 15
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val, <16 x i8> undef,
+ <16 x i32> <i32 15, i32 15, i32 15, i32 15,
+ i32 15, i32 15, i32 15, i32 15,
+ i32 15, i32 15, i32 15, i32 15,
+ i32 15, i32 15, i32 15, i32 15>
+ ret <16 x i8> %ret
+}
+
+; Test v16i8 splat of an arbitrary element, using the second operand of
+; the shufflevector.
+define <16 x i8> @f3(<16 x i8> %val) {
+; CHECK-LABEL: f3:
+; CHECK: vrepb %v24, %v24, 4
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> undef, <16 x i8> %val,
+ <16 x i32> <i32 20, i32 20, i32 20, i32 20,
+ i32 20, i32 20, i32 20, i32 20,
+ i32 20, i32 20, i32 20, i32 20,
+ i32 20, i32 20, i32 20, i32 20>
+ ret <16 x i8> %ret
+}
+
+; Test v8i16 splat of the first element.
+define <8 x i16> @f4(<8 x i16> %val) {
+; CHECK-LABEL: f4:
+; CHECK: vreph %v24, %v24, 0
+; CHECK: br %r14
+ %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
+ <8 x i32> zeroinitializer
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 splat of the last element.
+define <8 x i16> @f5(<8 x i16> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vreph %v24, %v24, 7
+; CHECK: br %r14
+ %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
+ <8 x i32> <i32 7, i32 7, i32 7, i32 7,
+ i32 7, i32 7, i32 7, i32 7>
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 splat of an arbitrary element, using the second operand of
+; the shufflevector.
+define <8 x i16> @f6(<8 x i16> %val) {
+; CHECK-LABEL: f6:
+; CHECK: vreph %v24, %v24, 2
+; CHECK: br %r14
+ %ret = shufflevector <8 x i16> undef, <8 x i16> %val,
+ <8 x i32> <i32 10, i32 10, i32 10, i32 10,
+ i32 10, i32 10, i32 10, i32 10>
+ ret <8 x i16> %ret
+}
+
+; Test v4i32 splat of the first element.
+define <4 x i32> @f7(<4 x i32> %val) {
+; CHECK-LABEL: f7:
+; CHECK: vrepf %v24, %v24, 0
+; CHECK: br %r14
+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 splat of the last element.
+define <4 x i32> @f8(<4 x i32> %val) {
+; CHECK-LABEL: f8:
+; CHECK: vrepf %v24, %v24, 3
+; CHECK: br %r14
+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 splat of an arbitrary element, using the second operand of
+; the shufflevector.
+define <4 x i32> @f9(<4 x i32> %val) {
+; CHECK-LABEL: f9:
+; CHECK: vrepf %v24, %v24, 1
+; CHECK: br %r14
+ %ret = shufflevector <4 x i32> undef, <4 x i32> %val,
+ <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+ ret <4 x i32> %ret
+}
+
+; Test v2i64 splat of the first element.
+define <2 x i64> @f10(<2 x i64> %val) {
+; CHECK-LABEL: f10:
+; CHECK: vrepg %v24, %v24, 0
+; CHECK: br %r14
+ %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
+ <2 x i32> zeroinitializer
+ ret <2 x i64> %ret
+}
+
+; Test v2i64 splat of the last element.
+define <2 x i64> @f11(<2 x i64> %val) {
+; CHECK-LABEL: f11:
+; CHECK: vrepg %v24, %v24, 1
+; CHECK: br %r14
+ %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
+ <2 x i32> <i32 1, i32 1>
+ ret <2 x i64> %ret
+}
+
+; Test v4f32 splat of the first element.
+define <4 x float> @f12(<4 x float> %val) {
+; CHECK-LABEL: f12:
+; CHECK: vrepf %v24, %v24, 0
+; CHECK: br %r14
+ %ret = shufflevector <4 x float> %val, <4 x float> undef,
+ <4 x i32> zeroinitializer
+ ret <4 x float> %ret
+}
+
+; Test v4f32 splat of the last element.
+define <4 x float> @f13(<4 x float> %val) {
+; CHECK-LABEL: f13:
+; CHECK: vrepf %v24, %v24, 3
+; CHECK: br %r14
+ %ret = shufflevector <4 x float> %val, <4 x float> undef,
+ <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ ret <4 x float> %ret
+}
+
+; Test v4f32 splat of an arbitrary element, using the second operand of
+; the shufflevector.
+define <4 x float> @f14(<4 x float> %val) {
+; CHECK-LABEL: f14:
+; CHECK: vrepf %v24, %v24, 1
+; CHECK: br %r14
+ %ret = shufflevector <4 x float> undef, <4 x float> %val,
+ <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+ ret <4 x float> %ret
+}
+
+; Test v2f64 splat of the first element.
+define <2 x double> @f15(<2 x double> %val) {
+; CHECK-LABEL: f15:
+; CHECK: vrepg %v24, %v24, 0
+; CHECK: br %r14
+ %ret = shufflevector <2 x double> %val, <2 x double> undef,
+ <2 x i32> zeroinitializer
+ ret <2 x double> %ret
+}
+
+; Test v2f64 splat of the last element.
+define <2 x double> @f16(<2 x double> %val) {
+; CHECK-LABEL: f16:
+; CHECK: vrepg %v24, %v24, 1
+; CHECK: br %r14
+ %ret = shufflevector <2 x double> %val, <2 x double> undef,
+ <2 x i32> <i32 1, i32 1>
+ ret <2 x double> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-perm-02.ll b/test/CodeGen/SystemZ/vec-perm-02.ll
new file mode 100644
index 000000000000..e5c6df8e955a
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-perm-02.ll
@@ -0,0 +1,200 @@
+; Test replications of a scalar register value, represented as splats.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test v16i8 splat of the first element.
+define <16 x i8> @f1(i8 %scalar) {
+; CHECK-LABEL: f1:
+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
+; CHECK: vrepb %v24, [[REG]], 7
+; CHECK: br %r14
+ %val = insertelement <16 x i8> undef, i8 %scalar, i32 0
+ %ret = shufflevector <16 x i8> %val, <16 x i8> undef,
+ <16 x i32> zeroinitializer
+ ret <16 x i8> %ret
+}
+
+; Test v16i8 splat of the last element.
+define <16 x i8> @f2(i8 %scalar) {
+; CHECK-LABEL: f2:
+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
+; CHECK: vrepb %v24, [[REG]], 7
+; CHECK: br %r14
+ %val = insertelement <16 x i8> undef, i8 %scalar, i32 15
+ %ret = shufflevector <16 x i8> %val, <16 x i8> undef,
+ <16 x i32> <i32 15, i32 15, i32 15, i32 15,
+ i32 15, i32 15, i32 15, i32 15,
+ i32 15, i32 15, i32 15, i32 15,
+ i32 15, i32 15, i32 15, i32 15>
+ ret <16 x i8> %ret
+}
+
+; Test v16i8 splat of an arbitrary element, using the second operand of
+; the shufflevector.
+define <16 x i8> @f3(i8 %scalar) {
+; CHECK-LABEL: f3:
+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
+; CHECK: vrepb %v24, [[REG]], 7
+; CHECK: br %r14
+ %val = insertelement <16 x i8> undef, i8 %scalar, i32 4
+ %ret = shufflevector <16 x i8> undef, <16 x i8> %val,
+ <16 x i32> <i32 20, i32 20, i32 20, i32 20,
+ i32 20, i32 20, i32 20, i32 20,
+ i32 20, i32 20, i32 20, i32 20,
+ i32 20, i32 20, i32 20, i32 20>
+ ret <16 x i8> %ret
+}
+
+; Test v8i16 splat of the first element.
+define <8 x i16> @f4(i16 %scalar) {
+; CHECK-LABEL: f4:
+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
+; CHECK: vreph %v24, [[REG]], 3
+; CHECK: br %r14
+ %val = insertelement <8 x i16> undef, i16 %scalar, i32 0
+ %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
+ <8 x i32> zeroinitializer
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 splat of the last element.
+define <8 x i16> @f5(i16 %scalar) {
+; CHECK-LABEL: f5:
+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
+; CHECK: vreph %v24, [[REG]], 3
+; CHECK: br %r14
+ %val = insertelement <8 x i16> undef, i16 %scalar, i32 7
+ %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
+ <8 x i32> <i32 7, i32 7, i32 7, i32 7,
+ i32 7, i32 7, i32 7, i32 7>
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 splat of an arbitrary element, using the second operand of
+; the shufflevector.
+define <8 x i16> @f6(i16 %scalar) {
+; CHECK-LABEL: f6:
+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
+; CHECK: vreph %v24, [[REG]], 3
+; CHECK: br %r14
+ %val = insertelement <8 x i16> undef, i16 %scalar, i32 2
+ %ret = shufflevector <8 x i16> undef, <8 x i16> %val,
+ <8 x i32> <i32 10, i32 10, i32 10, i32 10,
+ i32 10, i32 10, i32 10, i32 10>
+ ret <8 x i16> %ret
+}
+
+; Test v4i32 splat of the first element.
+define <4 x i32> @f7(i32 %scalar) {
+; CHECK-LABEL: f7:
+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
+; CHECK: vrepf %v24, [[REG]], 1
+; CHECK: br %r14
+ %val = insertelement <4 x i32> undef, i32 %scalar, i32 0
+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 splat of the last element.
+define <4 x i32> @f8(i32 %scalar) {
+; CHECK-LABEL: f8:
+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
+; CHECK: vrepf %v24, [[REG]], 1
+; CHECK: br %r14
+ %val = insertelement <4 x i32> undef, i32 %scalar, i32 3
+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 splat of an arbitrary element, using the second operand of
+; the shufflevector.
+define <4 x i32> @f9(i32 %scalar) {
+; CHECK-LABEL: f9:
+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
+; CHECK: vrepf %v24, [[REG]], 1
+; CHECK: br %r14
+ %val = insertelement <4 x i32> undef, i32 %scalar, i32 1
+ %ret = shufflevector <4 x i32> undef, <4 x i32> %val,
+ <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+ ret <4 x i32> %ret
+}
+
+; Test v2i64 splat of the first element.
+define <2 x i64> @f10(i64 %scalar) {
+; CHECK-LABEL: f10:
+; CHECK: vlvgp %v24, %r2, %r2
+; CHECK: br %r14
+ %val = insertelement <2 x i64> undef, i64 %scalar, i32 0
+ %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
+ <2 x i32> zeroinitializer
+ ret <2 x i64> %ret
+}
+
+; Test v2i64 splat of the last element.
+define <2 x i64> @f11(i64 %scalar) {
+; CHECK-LABEL: f11:
+; CHECK: vlvgp %v24, %r2, %r2
+; CHECK: br %r14
+ %val = insertelement <2 x i64> undef, i64 %scalar, i32 1
+ %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
+ <2 x i32> <i32 1, i32 1>
+ ret <2 x i64> %ret
+}
+
+; Test v4f32 splat of the first element.
+define <4 x float> @f12(float %scalar) {
+; CHECK-LABEL: f12:
+; CHECK: vrepf %v24, %v0, 0
+; CHECK: br %r14
+ %val = insertelement <4 x float> undef, float %scalar, i32 0
+ %ret = shufflevector <4 x float> %val, <4 x float> undef,
+ <4 x i32> zeroinitializer
+ ret <4 x float> %ret
+}
+
+; Test v4f32 splat of the last element.
+define <4 x float> @f13(float %scalar) {
+; CHECK-LABEL: f13:
+; CHECK: vrepf %v24, %v0, 0
+; CHECK: br %r14
+ %val = insertelement <4 x float> undef, float %scalar, i32 3
+ %ret = shufflevector <4 x float> %val, <4 x float> undef,
+ <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ ret <4 x float> %ret
+}
+
+; Test v4f32 splat of an arbitrary element, using the second operand of
+; the shufflevector.
+define <4 x float> @f14(float %scalar) {
+; CHECK-LABEL: f14:
+; CHECK: vrepf %v24, %v0, 0
+; CHECK: br %r14
+ %val = insertelement <4 x float> undef, float %scalar, i32 1
+ %ret = shufflevector <4 x float> undef, <4 x float> %val,
+ <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+ ret <4 x float> %ret
+}
+
+; Test v2f64 splat of the first element.
+define <2 x double> @f15(double %scalar) {
+; CHECK-LABEL: f15:
+; CHECK: vrepg %v24, %v0, 0
+; CHECK: br %r14
+ %val = insertelement <2 x double> undef, double %scalar, i32 0
+ %ret = shufflevector <2 x double> %val, <2 x double> undef,
+ <2 x i32> zeroinitializer
+ ret <2 x double> %ret
+}
+
+; Test v2f64 splat of the last element.
+define <2 x double> @f16(double %scalar) {
+; CHECK-LABEL: f16:
+; CHECK: vrepg %v24, %v0, 0
+; CHECK: br %r14
+ %val = insertelement <2 x double> undef, double %scalar, i32 1
+ %ret = shufflevector <2 x double> %val, <2 x double> undef,
+ <2 x i32> <i32 1, i32 1>
+ ret <2 x double> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-perm-03.ll b/test/CodeGen/SystemZ/vec-perm-03.ll
new file mode 100644
index 000000000000..663815549c33
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-perm-03.ll
@@ -0,0 +1,251 @@
+; Test replications of a scalar memory value, represented as splats.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i8 replicating load with no offset.
+define <16 x i8> @f1(i8 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vlrepb %v24, 0(%r2)
+; CHECK: br %r14
+ %scalar = load i8, i8 *%ptr
+ %val = insertelement <16 x i8> undef, i8 %scalar, i32 0
+ %ret = shufflevector <16 x i8> %val, <16 x i8> undef,
+ <16 x i32> zeroinitializer
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 replicating load with the maximum in-range offset.
+define <16 x i8> @f2(i8 *%base) {
+; CHECK-LABEL: f2:
+; CHECK: vlrepb %v24, 4095(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 4095
+ %scalar = load i8, i8 *%ptr
+ %val = insertelement <16 x i8> undef, i8 %scalar, i32 0
+ %ret = shufflevector <16 x i8> %val, <16 x i8> undef,
+ <16 x i32> zeroinitializer
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 replicating load with the first out-of-range offset.
+define <16 x i8> @f3(i8 *%base) {
+; CHECK-LABEL: f3:
+; CHECK: aghi %r2, 4096
+; CHECK: vlrepb %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i8, i8 *%base, i64 4096
+ %scalar = load i8, i8 *%ptr
+ %val = insertelement <16 x i8> undef, i8 %scalar, i32 0
+ %ret = shufflevector <16 x i8> %val, <16 x i8> undef,
+ <16 x i32> zeroinitializer
+ ret <16 x i8> %ret
+}
+
+; Test a v8i16 replicating load with no offset.
+define <8 x i16> @f4(i16 *%ptr) {
+; CHECK-LABEL: f4:
+; CHECK: vlreph %v24, 0(%r2)
+; CHECK: br %r14
+ %scalar = load i16, i16 *%ptr
+ %val = insertelement <8 x i16> undef, i16 %scalar, i32 0
+ %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
+ <8 x i32> zeroinitializer
+ ret <8 x i16> %ret
+}
+
+; Test a v8i16 replicating load with the maximum in-range offset.
+define <8 x i16> @f5(i16 *%base) {
+; CHECK-LABEL: f5:
+; CHECK: vlreph %v24, 4094(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%base, i64 2047
+ %scalar = load i16, i16 *%ptr
+ %val = insertelement <8 x i16> undef, i16 %scalar, i32 0
+ %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
+ <8 x i32> zeroinitializer
+ ret <8 x i16> %ret
+}
+
+; Test a v8i16 replicating load with the first out-of-range offset.
+define <8 x i16> @f6(i16 *%base) {
+; CHECK-LABEL: f6:
+; CHECK: aghi %r2, 4096
+; CHECK: vlreph %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%base, i64 2048
+ %scalar = load i16, i16 *%ptr
+ %val = insertelement <8 x i16> undef, i16 %scalar, i32 0
+ %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
+ <8 x i32> zeroinitializer
+ ret <8 x i16> %ret
+}
+
+; Test a v4i32 replicating load with no offset.
+define <4 x i32> @f7(i32 *%ptr) {
+; CHECK-LABEL: f7:
+; CHECK: vlrepf %v24, 0(%r2)
+; CHECK: br %r14
+ %scalar = load i32, i32 *%ptr
+ %val = insertelement <4 x i32> undef, i32 %scalar, i32 0
+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+; Test a v4i32 replicating load with the maximum in-range offset.
+define <4 x i32> @f8(i32 *%base) {
+; CHECK-LABEL: f8:
+; CHECK: vlrepf %v24, 4092(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i32, i32 *%base, i64 1023
+ %scalar = load i32, i32 *%ptr
+ %val = insertelement <4 x i32> undef, i32 %scalar, i32 0
+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+; Test a v4i32 replicating load with the first out-of-range offset.
+define <4 x i32> @f9(i32 *%base) {
+; CHECK-LABEL: f9:
+; CHECK: aghi %r2, 4096
+; CHECK: vlrepf %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i32, i32 *%base, i64 1024
+ %scalar = load i32, i32 *%ptr
+ %val = insertelement <4 x i32> undef, i32 %scalar, i32 0
+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+; Test a v2i64 replicating load with no offset.
+define <2 x i64> @f10(i64 *%ptr) {
+; CHECK-LABEL: f10:
+; CHECK: vlrepg %v24, 0(%r2)
+; CHECK: br %r14
+ %scalar = load i64, i64 *%ptr
+ %val = insertelement <2 x i64> undef, i64 %scalar, i32 0
+ %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
+ <2 x i32> zeroinitializer
+ ret <2 x i64> %ret
+}
+
+; Test a v2i64 replicating load with the maximum in-range offset.
+define <2 x i64> @f11(i64 *%base) {
+; CHECK-LABEL: f11:
+; CHECK: vlrepg %v24, 4088(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i32 511
+ %scalar = load i64, i64 *%ptr
+ %val = insertelement <2 x i64> undef, i64 %scalar, i32 0
+ %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
+ <2 x i32> zeroinitializer
+ ret <2 x i64> %ret
+}
+
+; Test a v2i64 replicating load with the first out-of-range offset.
+define <2 x i64> @f12(i64 *%base) {
+; CHECK-LABEL: f12:
+; CHECK: aghi %r2, 4096
+; CHECK: vlrepg %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i32 512
+ %scalar = load i64, i64 *%ptr
+ %val = insertelement <2 x i64> undef, i64 %scalar, i32 0
+ %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
+ <2 x i32> zeroinitializer
+ ret <2 x i64> %ret
+}
+
+; Test a v4f32 replicating load with no offset.
+define <4 x float> @f13(float *%ptr) {
+; CHECK-LABEL: f13:
+; CHECK: vlrepf %v24, 0(%r2)
+; CHECK: br %r14
+ %scalar = load float, float *%ptr
+ %val = insertelement <4 x float> undef, float %scalar, i32 0
+ %ret = shufflevector <4 x float> %val, <4 x float> undef,
+ <4 x i32> zeroinitializer
+ ret <4 x float> %ret
+}
+
+; Test a v4f32 replicating load with the maximum in-range offset.
+define <4 x float> @f14(float *%base) {
+; CHECK-LABEL: f14:
+; CHECK: vlrepf %v24, 4092(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1023
+ %scalar = load float, float *%ptr
+ %val = insertelement <4 x float> undef, float %scalar, i32 0
+ %ret = shufflevector <4 x float> %val, <4 x float> undef,
+ <4 x i32> zeroinitializer
+ ret <4 x float> %ret
+}
+
+; Test a v4f32 replicating load with the first out-of-range offset.
+define <4 x float> @f15(float *%base) {
+; CHECK-LABEL: f15:
+; CHECK: aghi %r2, 4096
+; CHECK: vlrepf %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1024
+ %scalar = load float, float *%ptr
+ %val = insertelement <4 x float> undef, float %scalar, i32 0
+ %ret = shufflevector <4 x float> %val, <4 x float> undef,
+ <4 x i32> zeroinitializer
+ ret <4 x float> %ret
+}
+
+; Test a v2f64 replicating load with no offset.
+define <2 x double> @f16(double *%ptr) {
+; CHECK-LABEL: f16:
+; CHECK: vlrepg %v24, 0(%r2)
+; CHECK: br %r14
+ %scalar = load double, double *%ptr
+ %val = insertelement <2 x double> undef, double %scalar, i32 0
+ %ret = shufflevector <2 x double> %val, <2 x double> undef,
+ <2 x i32> zeroinitializer
+ ret <2 x double> %ret
+}
+
+; Test a v2f64 replicating load with the maximum in-range offset.
+define <2 x double> @f17(double *%base) {
+; CHECK-LABEL: f17:
+; CHECK: vlrepg %v24, 4088(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i32 511
+ %scalar = load double, double *%ptr
+ %val = insertelement <2 x double> undef, double %scalar, i32 0
+ %ret = shufflevector <2 x double> %val, <2 x double> undef,
+ <2 x i32> zeroinitializer
+ ret <2 x double> %ret
+}
+
+; Test a v2f64 replicating load with the first out-of-range offset.
+define <2 x double> @f18(double *%base) {
+; CHECK-LABEL: f18:
+; CHECK: aghi %r2, 4096
+; CHECK: vlrepg %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i32 512
+ %scalar = load double, double *%ptr
+ %val = insertelement <2 x double> undef, double %scalar, i32 0
+ %ret = shufflevector <2 x double> %val, <2 x double> undef,
+ <2 x i32> zeroinitializer
+ ret <2 x double> %ret
+}
+
+; Test a v16i8 replicating load with an index.
+define <16 x i8> @f19(i8 *%base, i64 %index) {
+; CHECK-LABEL: f19:
+; CHECK: vlrepb %v24, 1023(%r3,%r2)
+; CHECK: br %r14
+ %ptr1 = getelementptr i8, i8 *%base, i64 %index
+ %ptr = getelementptr i8, i8 *%ptr1, i64 1023
+ %scalar = load i8, i8 *%ptr
+ %val = insertelement <16 x i8> undef, i8 %scalar, i32 0
+ %ret = shufflevector <16 x i8> %val, <16 x i8> undef,
+ <16 x i32> zeroinitializer
+ ret <16 x i8> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-perm-04.ll b/test/CodeGen/SystemZ/vec-perm-04.ll
new file mode 100644
index 000000000000..0df6f4fbb012
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-perm-04.ll
@@ -0,0 +1,200 @@
+; Test vector merge high.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a canonical v16i8 merge high.
+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vmrhb %v24, %v24, %v26
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 0, i32 16, i32 1, i32 17,
+ i32 2, i32 18, i32 3, i32 19,
+ i32 4, i32 20, i32 5, i32 21,
+ i32 6, i32 22, i32 7, i32 23>
+ ret <16 x i8> %ret
+}
+
+; Test a reversed v16i8 merge high.
+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vmrhb %v24, %v26, %v24
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 16, i32 0, i32 17, i32 1,
+ i32 18, i32 2, i32 19, i32 3,
+ i32 20, i32 4, i32 21, i32 5,
+ i32 22, i32 6, i32 23, i32 7>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 merge high with only the first operand being used.
+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vmrhb %v24, %v24, %v24
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 0, i32 0, i32 1, i32 1,
+ i32 2, i32 2, i32 3, i32 3,
+ i32 4, i32 4, i32 5, i32 5,
+ i32 6, i32 6, i32 7, i32 7>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 merge high with only the second operand being used.
+; This is converted into @f3 by target-independent code.
+define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vmrhb %v24, %v26, %v26
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 16, i32 16, i32 17, i32 17,
+ i32 18, i32 18, i32 19, i32 19,
+ i32 20, i32 20, i32 21, i32 21,
+ i32 22, i32 22, i32 23, i32 23>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 merge with both operands being the same. This too is
+; converted into @f3 by target-independent code.
+define <16 x i8> @f5(<16 x i8> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vmrhb %v24, %v24, %v24
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val, <16 x i8> %val,
+ <16 x i32> <i32 0, i32 16, i32 17, i32 17,
+ i32 18, i32 2, i32 3, i32 3,
+ i32 20, i32 20, i32 5, i32 5,
+ i32 6, i32 22, i32 23, i32 7>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 merge in which some of the indices are don't care.
+define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vmrhb %v24, %v24, %v26
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 0, i32 undef, i32 1, i32 17,
+ i32 undef, i32 18, i32 undef, i32 undef,
+ i32 undef, i32 20, i32 5, i32 21,
+ i32 undef, i32 22, i32 7, i32 undef>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 merge in which one of the operands is undefined and where
+; indices for that operand are "don't care". Target-independent code
+; converts the indices themselves into "undef"s.
+define <16 x i8> @f7(<16 x i8> %val) {
+; CHECK-LABEL: f7:
+; CHECK: vmrhb %v24, %v24, %v24
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> undef, <16 x i8> %val,
+ <16 x i32> <i32 11, i32 16, i32 17, i32 5,
+ i32 18, i32 10, i32 19, i32 19,
+ i32 20, i32 20, i32 21, i32 3,
+ i32 2, i32 22, i32 9, i32 23>
+ ret <16 x i8> %ret
+}
+
+; Test a canonical v8i16 merge high.
+define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vmrhh %v24, %v24, %v26
+; CHECK: br %r14
+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
+ <8 x i32> <i32 0, i32 8, i32 1, i32 9,
+ i32 2, i32 10, i32 3, i32 11>
+ ret <8 x i16> %ret
+}
+
+; Test a reversed v8i16 merge high.
+define <8 x i16> @f9(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f9:
+; CHECK: vmrhh %v24, %v26, %v24
+; CHECK: br %r14
+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
+ <8 x i32> <i32 8, i32 0, i32 9, i32 1,
+ i32 10, i32 2, i32 11, i32 3>
+ ret <8 x i16> %ret
+}
+
+; Test a canonical v4i32 merge high.
+define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f10:
+; CHECK: vmrhf %v24, %v24, %v26
+; CHECK: br %r14
+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ ret <4 x i32> %ret
+}
+
+; Test a reversed v4i32 merge high.
+define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f11:
+; CHECK: vmrhf %v24, %v26, %v24
+; CHECK: br %r14
+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> <i32 4, i32 0, i32 5, i32 1>
+ ret <4 x i32> %ret
+}
+
+; Test a canonical v2i64 merge high.
+define <2 x i64> @f12(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f12:
+; CHECK: vmrhg %v24, %v24, %v26
+; CHECK: br %r14
+ %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2,
+ <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %ret
+}
+
+; Test a reversed v2i64 merge high.
+define <2 x i64> @f13(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f13:
+; CHECK: vmrhg %v24, %v26, %v24
+; CHECK: br %r14
+ %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2,
+ <2 x i32> <i32 2, i32 0>
+ ret <2 x i64> %ret
+}
+
+; Test a canonical v4f32 merge high.
+define <4 x float> @f14(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f14:
+; CHECK: vmrhf %v24, %v24, %v26
+; CHECK: br %r14
+ %ret = shufflevector <4 x float> %val1, <4 x float> %val2,
+ <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ ret <4 x float> %ret
+}
+
+; Test a reversed v4f32 merge high.
+define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f15:
+; CHECK: vmrhf %v24, %v26, %v24
+; CHECK: br %r14
+ %ret = shufflevector <4 x float> %val1, <4 x float> %val2,
+ <4 x i32> <i32 4, i32 0, i32 5, i32 1>
+ ret <4 x float> %ret
+}
+
+; Test a canonical v2f64 merge high.
+define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f16:
+; CHECK: vmrhg %v24, %v24, %v26
+; CHECK: br %r14
+ %ret = shufflevector <2 x double> %val1, <2 x double> %val2,
+ <2 x i32> <i32 0, i32 2>
+ ret <2 x double> %ret
+}
+
+; Test a reversed v2f64 merge high.
+define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f17:
+; CHECK: vmrhg %v24, %v26, %v24
+; CHECK: br %r14
+ %ret = shufflevector <2 x double> %val1, <2 x double> %val2,
+ <2 x i32> <i32 2, i32 0>
+ ret <2 x double> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-perm-05.ll b/test/CodeGen/SystemZ/vec-perm-05.ll
new file mode 100644
index 000000000000..b585cefbf845
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-perm-05.ll
@@ -0,0 +1,200 @@
+; Test vector merge low.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a canonical v16i8 merge low.
+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vmrlb %v24, %v24, %v26
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 8, i32 24, i32 9, i32 25,
+ i32 10, i32 26, i32 11, i32 27,
+ i32 12, i32 28, i32 13, i32 29,
+ i32 14, i32 30, i32 15, i32 31>
+ ret <16 x i8> %ret
+}
+
+; Test a reversed v16i8 merge low.
+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vmrlb %v24, %v26, %v24
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 24, i32 8, i32 25, i32 9,
+ i32 26, i32 10, i32 27, i32 11,
+ i32 28, i32 12, i32 29, i32 13,
+ i32 30, i32 14, i32 31, i32 15>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 merge low with only the first operand being used.
+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vmrlb %v24, %v24, %v24
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 8, i32 8, i32 9, i32 9,
+ i32 10, i32 10, i32 11, i32 11,
+ i32 12, i32 12, i32 13, i32 13,
+ i32 14, i32 14, i32 15, i32 15>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 merge low with only the second operand being used.
+; This is converted into @f3 by target-independent code.
+define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vmrlb %v24, %v26, %v26
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 24, i32 24, i32 25, i32 25,
+ i32 26, i32 26, i32 27, i32 27,
+ i32 28, i32 28, i32 29, i32 29,
+ i32 30, i32 30, i32 31, i32 31>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 merge with both operands being the same. This too is
+; converted into @f3 by target-independent code.
+define <16 x i8> @f5(<16 x i8> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vmrlb %v24, %v24, %v24
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val, <16 x i8> %val,
+ <16 x i32> <i32 8, i32 24, i32 25, i32 25,
+ i32 26, i32 10, i32 11, i32 11,
+ i32 28, i32 28, i32 13, i32 13,
+ i32 14, i32 30, i32 31, i32 15>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 merge in which some of the indices are don't care.
+define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vmrlb %v24, %v24, %v26
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 8, i32 undef, i32 9, i32 25,
+ i32 undef, i32 26, i32 undef, i32 undef,
+ i32 undef, i32 28, i32 13, i32 29,
+ i32 undef, i32 30, i32 15, i32 undef>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 merge in which one of the operands is undefined and where
+; indices for that operand are "don't care". Target-independent code
+; converts the indices themselves into "undef"s.
+define <16 x i8> @f7(<16 x i8> %val) {
+; CHECK-LABEL: f7:
+; CHECK: vmrlb %v24, %v24, %v24
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> undef, <16 x i8> %val,
+ <16 x i32> <i32 11, i32 24, i32 25, i32 5,
+ i32 26, i32 10, i32 27, i32 27,
+ i32 28, i32 28, i32 29, i32 3,
+ i32 2, i32 30, i32 9, i32 31>
+ ret <16 x i8> %ret
+}
+
+; Test a canonical v8i16 merge low.
+define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vmrlh %v24, %v24, %v26
+; CHECK: br %r14
+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
+ <8 x i32> <i32 4, i32 12, i32 5, i32 13,
+ i32 6, i32 14, i32 7, i32 15>
+ ret <8 x i16> %ret
+}
+
+; Test a reversed v8i16 merge low.
+define <8 x i16> @f9(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f9:
+; CHECK: vmrlh %v24, %v26, %v24
+; CHECK: br %r14
+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
+ <8 x i32> <i32 12, i32 4, i32 13, i32 5,
+ i32 14, i32 6, i32 15, i32 7>
+ ret <8 x i16> %ret
+}
+
+; Test a canonical v4i32 merge low.
+define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f10:
+; CHECK: vmrlf %v24, %v24, %v26
+; CHECK: br %r14
+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ ret <4 x i32> %ret
+}
+
+; Test a reversed v4i32 merge low.
+define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f11:
+; CHECK: vmrlf %v24, %v26, %v24
+; CHECK: br %r14
+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> <i32 6, i32 2, i32 7, i32 3>
+ ret <4 x i32> %ret
+}
+
+; Test a canonical v2i64 merge low.
+define <2 x i64> @f12(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f12:
+; CHECK: vmrlg %v24, %v24, %v26
+; CHECK: br %r14
+ %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2,
+ <2 x i32> <i32 1, i32 3>
+ ret <2 x i64> %ret
+}
+
+; Test a reversed v2i64 merge low.
+define <2 x i64> @f13(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f13:
+; CHECK: vmrlg %v24, %v26, %v24
+; CHECK: br %r14
+ %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2,
+ <2 x i32> <i32 3, i32 1>
+ ret <2 x i64> %ret
+}
+
+; Test a canonical v4f32 merge low.
+define <4 x float> @f14(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f14:
+; CHECK: vmrlf %v24, %v24, %v26
+; CHECK: br %r14
+ %ret = shufflevector <4 x float> %val1, <4 x float> %val2,
+ <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ ret <4 x float> %ret
+}
+
+; Test a reversed v4f32 merge low.
+define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f15:
+; CHECK: vmrlf %v24, %v26, %v24
+; CHECK: br %r14
+ %ret = shufflevector <4 x float> %val1, <4 x float> %val2,
+ <4 x i32> <i32 6, i32 2, i32 7, i32 3>
+ ret <4 x float> %ret
+}
+
+; Test a canonical v2f64 merge low.
+define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f16:
+; CHECK: vmrlg %v24, %v24, %v26
+; CHECK: br %r14
+ %ret = shufflevector <2 x double> %val1, <2 x double> %val2,
+ <2 x i32> <i32 1, i32 3>
+ ret <2 x double> %ret
+}
+
+; Test a reversed v2f64 merge low.
+define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f17:
+; CHECK: vmrlg %v24, %v26, %v24
+; CHECK: br %r14
+ %ret = shufflevector <2 x double> %val1, <2 x double> %val2,
+ <2 x i32> <i32 3, i32 1>
+ ret <2 x double> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-perm-06.ll b/test/CodeGen/SystemZ/vec-perm-06.ll
new file mode 100644
index 000000000000..835276a36725
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-perm-06.ll
@@ -0,0 +1,160 @@
+; Test vector pack.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a canonical v16i8 pack.
+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vpkh %v24, %v24, %v26
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 1, i32 3, i32 5, i32 7,
+ i32 9, i32 11, i32 13, i32 15,
+ i32 17, i32 19, i32 21, i32 23,
+ i32 25, i32 27, i32 29, i32 31>
+ ret <16 x i8> %ret
+}
+
+; Test a reversed v16i8 pack.
+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vpkh %v24, %v26, %v24
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 17, i32 19, i32 21, i32 23,
+ i32 25, i32 27, i32 29, i32 31,
+ i32 1, i32 3, i32 5, i32 7,
+ i32 9, i32 11, i32 13, i32 15>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 pack with only the first operand being used.
+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vpkh %v24, %v24, %v24
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 1, i32 3, i32 5, i32 7,
+ i32 9, i32 11, i32 13, i32 15,
+ i32 1, i32 3, i32 5, i32 7,
+ i32 9, i32 11, i32 13, i32 15>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 pack with only the second operand being used.
+; This is converted into @f3 by target-independent code.
+define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vpkh %v24, %v26, %v26
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 17, i32 19, i32 21, i32 23,
+ i32 25, i32 27, i32 29, i32 31,
+ i32 17, i32 19, i32 21, i32 23,
+ i32 25, i32 27, i32 29, i32 31>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 pack with both operands being the same. This too is
+; converted into @f3 by target-independent code.
+define <16 x i8> @f5(<16 x i8> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vpkh %v24, %v24, %v24
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val, <16 x i8> %val,
+ <16 x i32> <i32 1, i32 3, i32 5, i32 7,
+ i32 9, i32 11, i32 13, i32 15,
+ i32 17, i32 19, i32 21, i32 23,
+ i32 25, i32 27, i32 29, i32 31>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 pack in which some of the indices are don't care.
+define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vpkh %v24, %v24, %v26
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 1, i32 undef, i32 5, i32 7,
+ i32 undef, i32 11, i32 undef, i32 undef,
+ i32 undef, i32 19, i32 21, i32 23,
+ i32 undef, i32 27, i32 29, i32 undef>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 pack in which one of the operands is undefined and where
+; indices for that operand are "don't care". Target-independent code
+; converts the indices themselves into "undef"s.
+define <16 x i8> @f7(<16 x i8> %val) {
+; CHECK-LABEL: f7:
+; CHECK: vpkh %v24, %v24, %v24
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> undef, <16 x i8> %val,
+ <16 x i32> <i32 7, i32 1, i32 9, i32 15,
+ i32 15, i32 3, i32 5, i32 1,
+ i32 17, i32 19, i32 21, i32 23,
+ i32 25, i32 27, i32 29, i32 31>
+ ret <16 x i8> %ret
+}
+
+; Test a canonical v8i16 pack.
+define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vpkf %v24, %v24, %v26
+; CHECK: br %r14
+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
+ <8 x i32> <i32 1, i32 3, i32 5, i32 7,
+ i32 9, i32 11, i32 13, i32 15>
+ ret <8 x i16> %ret
+}
+
+; Test a reversed v8i16 pack.
+define <8 x i16> @f9(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f9:
+; CHECK: vpkf %v24, %v26, %v24
+; CHECK: br %r14
+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
+ <8 x i32> <i32 9, i32 11, i32 13, i32 15,
+ i32 1, i32 3, i32 5, i32 7>
+ ret <8 x i16> %ret
+}
+
+; Test a canonical v4i32 pack.
+define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f10:
+; CHECK: vpkg %v24, %v24, %v26
+; CHECK: br %r14
+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x i32> %ret
+}
+
+; Test a reversed v4i32 pack.
+define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f11:
+; CHECK: vpkg %v24, %v26, %v24
+; CHECK: br %r14
+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> <i32 5, i32 7, i32 1, i32 3>
+ ret <4 x i32> %ret
+}
+
+; Test a canonical v4f32 pack.
+define <4 x float> @f12(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f12:
+; CHECK: vpkg %v24, %v24, %v26
+; CHECK: br %r14
+ %ret = shufflevector <4 x float> %val1, <4 x float> %val2,
+ <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x float> %ret
+}
+
+; Test a reversed v4f32 pack.
+define <4 x float> @f13(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f13:
+; CHECK: vpkg %v24, %v26, %v24
+; CHECK: br %r14
+ %ret = shufflevector <4 x float> %val1, <4 x float> %val2,
+ <4 x i32> <i32 5, i32 7, i32 1, i32 3>
+ ret <4 x float> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-perm-07.ll b/test/CodeGen/SystemZ/vec-perm-07.ll
new file mode 100644
index 000000000000..9a370af2c0e7
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-perm-07.ll
@@ -0,0 +1,145 @@
+; Test vector shift left double immediate.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i8 shift with the lowest useful shift amount.
+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vsldb %v24, %v24, %v26, 1
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 1, i32 2, i32 3, i32 4,
+ i32 5, i32 6, i32 7, i32 8,
+ i32 9, i32 10, i32 11, i32 12,
+ i32 13, i32 14, i32 15, i32 16>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 shift with the highest shift amount.
+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vsldb %v24, %v24, %v26, 15
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 15, i32 16, i32 17, i32 18,
+ i32 19, i32 20, i32 21, i32 22,
+ i32 23, i32 24, i32 25, i32 26,
+ i32 27, i32 28, i32 29, i32 30>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 shift in which the operands need to be reversed.
+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vsldb %v24, %v26, %v24, 4
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 20, i32 21, i32 22, i32 23,
+ i32 24, i32 25, i32 26, i32 27,
+ i32 28, i32 29, i32 30, i32 31,
+ i32 0, i32 1, i32 2, i32 3>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 shift in which the operands need to be duplicated.
+define <16 x i8> @f4(<16 x i8> %val) {
+; CHECK-LABEL: f4:
+; CHECK: vsldb %v24, %v24, %v24, 7
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val, <16 x i8> undef,
+ <16 x i32> <i32 7, i32 8, i32 9, i32 10,
+ i32 11, i32 12, i32 13, i32 14,
+ i32 15, i32 0, i32 1, i32 2,
+ i32 3, i32 4, i32 5, i32 6>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 shift in which some of the indices are undefs.
+define <16 x i8> @f5(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vsldb %v24, %v24, %v26, 11
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef,
+ i32 15, i32 16, i32 undef, i32 18,
+ i32 19, i32 20, i32 21, i32 22,
+ i32 23, i32 24, i32 25, i32 26>
+ ret <16 x i8> %ret
+}
+
+; ...and again with reversed operands.
+define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vsldb %v24, %v26, %v24, 13
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 undef, i32 undef, i32 31, i32 0,
+ i32 1, i32 2, i32 3, i32 4,
+ i32 5, i32 6, i32 7, i32 8,
+ i32 9, i32 10, i32 11, i32 12>
+ ret <16 x i8> %ret
+}
+
+; Test a v8i16 shift with the lowest useful shift amount.
+define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: vsldb %v24, %v24, %v26, 2
+; CHECK: br %r14
+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
+ <8 x i32> <i32 1, i32 2, i32 3, i32 4,
+ i32 5, i32 6, i32 7, i32 8>
+ ret <8 x i16> %ret
+}
+
+; Test a v8i16 shift with the highest useful shift amount.
+define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vsldb %v24, %v24, %v26, 14
+; CHECK: br %r14
+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
+ <8 x i32> <i32 7, i32 8, i32 9, i32 10,
+ i32 11, i32 12, i32 13, i32 14>
+ ret <8 x i16> %ret
+}
+
+; Test a v4i32 shift with the lowest useful shift amount.
+define <4 x i32> @f9(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f9:
+; CHECK: vsldb %v24, %v24, %v26, 4
+; CHECK: br %r14
+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+ ret <4 x i32> %ret
+}
+
+; Test a v4i32 shift with the highest useful shift amount.
+define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f10:
+; CHECK: vsldb %v24, %v24, %v26, 12
+; CHECK: br %r14
+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+ ret <4 x i32> %ret
+}
+
+; Test a v4f32 shift with the lowest useful shift amount.
+define <4 x float> @f12(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f12:
+; CHECK: vsldb %v24, %v24, %v26, 4
+; CHECK: br %r14
+ %ret = shufflevector <4 x float> %val1, <4 x float> %val2,
+ <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+ ret <4 x float> %ret
+}
+
+; Test a v4f32 shift with the highest useful shift amount.
+define <4 x float> @f13(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f13:
+; CHECK: vsldb %v24, %v24, %v26, 12
+; CHECK: br %r14
+ %ret = shufflevector <4 x float> %val1, <4 x float> %val2,
+ <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+ ret <4 x float> %ret
+}
+
+; We use VPDI for v2i64 shuffles.
diff --git a/test/CodeGen/SystemZ/vec-perm-08.ll b/test/CodeGen/SystemZ/vec-perm-08.ll
new file mode 100644
index 000000000000..a18ca7b73975
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-perm-08.ll
@@ -0,0 +1,170 @@
+; Test vector permutes using VPDI.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a high1/low2 permute for v16i8.
+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vpdi %v24, %v24, %v26, 1
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 0, i32 1, i32 2, i32 3,
+ i32 4, i32 5, i32 6, i32 7,
+ i32 24, i32 25, i32 26, i32 27,
+ i32 28, i32 29, i32 30, i32 31>
+ ret <16 x i8> %ret
+}
+
+; Test a low2/high1 permute for v16i8.
+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vpdi %v24, %v26, %v24, 4
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 24, i32 25, i32 26, i32 27,
+ i32 28, i32 29, i32 30, i32 31,
+ i32 0, i32 1, i32 2, i32 3,
+ i32 4, i32 5, i32 6, i32 7>
+ ret <16 x i8> %ret
+}
+
+; Test a low1/high2 permute for v16i8.
+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vpdi %v24, %v24, %v26, 4
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 8, i32 9, i32 10, i32 undef,
+ i32 12, i32 undef, i32 14, i32 15,
+ i32 16, i32 17, i32 undef, i32 19,
+ i32 20, i32 21, i32 22, i32 undef>
+ ret <16 x i8> %ret
+}
+
+; Test a high2/low1 permute for v16i8.
+define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vpdi %v24, %v26, %v24, 1
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 16, i32 17, i32 18, i32 19,
+ i32 20, i32 21, i32 22, i32 23,
+ i32 8, i32 9, i32 10, i32 11,
+ i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i8> %ret
+}
+
+; Test reversing two doublewords in a v16i8.
+define <16 x i8> @f5(<16 x i8> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vpdi %v24, %v24, %v24, 4
+; CHECK: br %r14
+ %ret = shufflevector <16 x i8> %val, <16 x i8> undef,
+ <16 x i32> <i32 8, i32 9, i32 10, i32 11,
+ i32 12, i32 13, i32 14, i32 15,
+ i32 0, i32 1, i32 2, i32 3,
+ i32 4, i32 5, i32 6, i32 7>
+ ret <16 x i8> %ret
+}
+
+; Test a high1/low2 permute for v8i16.
+define <8 x i16> @f6(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vpdi %v24, %v24, %v26, 1
+; CHECK: br %r14
+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
+ <8 x i32> <i32 0, i32 1, i32 2, i32 3,
+ i32 12, i32 13, i32 14, i32 15>
+ ret <8 x i16> %ret
+}
+
+; Test a low2/high1 permute for v8i16.
+define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: vpdi %v24, %v26, %v24, 4
+; CHECK: br %r14
+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
+ <8 x i32> <i32 12, i32 13, i32 14, i32 15,
+ i32 0, i32 1, i32 2, i32 3>
+ ret <8 x i16> %ret
+}
+
+; Test a high1/low2 permute for v4i32.
+define <4 x i32> @f8(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vpdi %v24, %v24, %v26, 1
+; CHECK: br %r14
+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+ ret <4 x i32> %ret
+}
+
+; Test a low2/high1 permute for v4i32.
+define <4 x i32> @f9(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f9:
+; CHECK: vpdi %v24, %v26, %v24, 4
+; CHECK: br %r14
+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+ ret <4 x i32> %ret
+}
+
+; Test a high1/low2 permute for v2i64.
+define <2 x i64> @f10(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f10:
+; CHECK: vpdi %v24, %v24, %v26, 1
+; CHECK: br %r14
+ %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2,
+ <2 x i32> <i32 0, i32 3>
+ ret <2 x i64> %ret
+}
+
+; Test low2/high1 permute for v2i64.
+define <2 x i64> @f11(<2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f11:
+; CHECK: vpdi %v24, %v26, %v24, 4
+; CHECK: br %r14
+ %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2,
+ <2 x i32> <i32 3, i32 0>
+ ret <2 x i64> %ret
+}
+
+; Test a high1/low2 permute for v4f32.
+define <4 x float> @f12(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f12:
+; CHECK: vpdi %v24, %v24, %v26, 1
+; CHECK: br %r14
+ %ret = shufflevector <4 x float> %val1, <4 x float> %val2,
+ <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+ ret <4 x float> %ret
+}
+
+; Test a low2/high1 permute for v4f32.
+define <4 x float> @f13(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f13:
+; CHECK: vpdi %v24, %v26, %v24, 4
+; CHECK: br %r14
+ %ret = shufflevector <4 x float> %val1, <4 x float> %val2,
+ <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+ ret <4 x float> %ret
+}
+
+; Test a high1/low2 permute for v2f64.
+define <2 x double> @f14(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f14:
+; CHECK: vpdi %v24, %v24, %v26, 1
+; CHECK: br %r14
+ %ret = shufflevector <2 x double> %val1, <2 x double> %val2,
+ <2 x i32> <i32 0, i32 3>
+ ret <2 x double> %ret
+}
+
+; Test a low2/high1 permute for v2f64.
+define <2 x double> @f15(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f15:
+; CHECK: vpdi %v24, %v26, %v24, 4
+; CHECK: br %r14
+ %ret = shufflevector <2 x double> %val1, <2 x double> %val2,
+ <2 x i32> <i32 3, i32 0>
+ ret <2 x double> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-perm-09.ll b/test/CodeGen/SystemZ/vec-perm-09.ll
new file mode 100644
index 000000000000..9c9632cf0305
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-perm-09.ll
@@ -0,0 +1,38 @@
+; Test general vector permute of a v16i8.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN: FileCheck -check-prefix=CHECK-CODE %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
+
+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-CODE-LABEL: f1:
+; CHECK-CODE: larl [[REG:%r[0-5]]],
+; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]])
+; CHECK-CODE: vperm %v24, %v24, %v26, [[MASK]]
+; CHECK-CODE: br %r14
+;
+; CHECK-VECTOR: .byte 1
+; CHECK-VECTOR-NEXT: .byte 19
+; CHECK-VECTOR-NEXT: .byte 6
+; CHECK-VECTOR-NEXT: .byte 5
+; CHECK-VECTOR-NEXT: .byte 20
+; CHECK-VECTOR-NEXT: .byte 22
+; CHECK-VECTOR-NEXT: .byte 1
+; CHECK-VECTOR-NEXT: .byte 1
+; CHECK-VECTOR-NEXT: .byte 25
+; CHECK-VECTOR-NEXT: .byte 29
+; CHECK-VECTOR-NEXT: .byte 11
+; Any byte would be OK here
+; CHECK-VECTOR-NEXT: .space 1
+; CHECK-VECTOR-NEXT: .byte 31
+; CHECK-VECTOR-NEXT: .byte 4
+; CHECK-VECTOR-NEXT: .byte 15
+; CHECK-VECTOR-NEXT: .byte 19
+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
+ <16 x i32> <i32 1, i32 19, i32 6, i32 5,
+ i32 20, i32 22, i32 1, i32 1,
+ i32 25, i32 29, i32 11, i32 undef,
+ i32 31, i32 4, i32 15, i32 19>
+ ret <16 x i8> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-perm-10.ll b/test/CodeGen/SystemZ/vec-perm-10.ll
new file mode 100644
index 000000000000..382e6dc4c3fb
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-perm-10.ll
@@ -0,0 +1,36 @@
+; Test general vector permute of a v8i16.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN: FileCheck -check-prefix=CHECK-CODE %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
+
+define <8 x i16> @f1(<8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-CODE-LABEL: f1:
+; CHECK-CODE: larl [[REG:%r[0-5]]],
+; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]])
+; CHECK-CODE: vperm %v24, %v26, %v24, [[MASK]]
+; CHECK-CODE: br %r14
+;
+; CHECK-VECTOR: .byte 0
+; CHECK-VECTOR-NEXT: .byte 1
+; CHECK-VECTOR-NEXT: .byte 26
+; CHECK-VECTOR-NEXT: .byte 27
+; Any 2 bytes would be OK here
+; CHECK-VECTOR-NEXT: .space 1
+; CHECK-VECTOR-NEXT: .space 1
+; CHECK-VECTOR-NEXT: .byte 28
+; CHECK-VECTOR-NEXT: .byte 29
+; CHECK-VECTOR-NEXT: .byte 6
+; CHECK-VECTOR-NEXT: .byte 7
+; CHECK-VECTOR-NEXT: .byte 14
+; CHECK-VECTOR-NEXT: .byte 15
+; CHECK-VECTOR-NEXT: .byte 8
+; CHECK-VECTOR-NEXT: .byte 9
+; CHECK-VECTOR-NEXT: .byte 16
+; CHECK-VECTOR-NEXT: .byte 17
+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
+ <8 x i32> <i32 8, i32 5, i32 undef, i32 6,
+ i32 11, i32 15, i32 12, i32 0>
+ ret <8 x i16> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-perm-11.ll b/test/CodeGen/SystemZ/vec-perm-11.ll
new file mode 100644
index 000000000000..c9e29880fe07
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-perm-11.ll
@@ -0,0 +1,35 @@
+; Test general vector permute of a v4i32.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN: FileCheck -check-prefix=CHECK-CODE %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
+
+define <4 x i32> @f1(<4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-CODE-LABEL: f1:
+; CHECK-CODE: larl [[REG:%r[0-5]]],
+; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]])
+; CHECK-CODE: vperm %v24, %v26, %v24, [[MASK]]
+; CHECK-CODE: br %r14
+;
+; CHECK-VECTOR: .byte 4
+; CHECK-VECTOR-NEXT: .byte 5
+; CHECK-VECTOR-NEXT: .byte 6
+; CHECK-VECTOR-NEXT: .byte 7
+; CHECK-VECTOR-NEXT: .byte 20
+; CHECK-VECTOR-NEXT: .byte 21
+; CHECK-VECTOR-NEXT: .byte 22
+; CHECK-VECTOR-NEXT: .byte 23
+; Any 4 bytes would be OK here
+; CHECK-VECTOR-NEXT: .space 1
+; CHECK-VECTOR-NEXT: .space 1
+; CHECK-VECTOR-NEXT: .space 1
+; CHECK-VECTOR-NEXT: .space 1
+; CHECK-VECTOR-NEXT: .byte 12
+; CHECK-VECTOR-NEXT: .byte 13
+; CHECK-VECTOR-NEXT: .byte 14
+; CHECK-VECTOR-NEXT: .byte 15
+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
+ <4 x i32> <i32 5, i32 1, i32 undef, i32 7>
+ ret <4 x i32> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-round-01.ll b/test/CodeGen/SystemZ/vec-round-01.ll
new file mode 100644
index 000000000000..82718276bb08
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-round-01.ll
@@ -0,0 +1,118 @@
+; Test v2f64 rounding.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.rint.f64(double)
+declare double @llvm.nearbyint.f64(double)
+declare double @llvm.floor.f64(double)
+declare double @llvm.ceil.f64(double)
+declare double @llvm.trunc.f64(double)
+declare double @llvm.round.f64(double)
+declare <2 x double> @llvm.rint.v2f64(<2 x double>)
+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
+declare <2 x double> @llvm.floor.v2f64(<2 x double>)
+declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
+declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
+declare <2 x double> @llvm.round.v2f64(<2 x double>)
+
+define <2 x double> @f1(<2 x double> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vfidb %v24, %v24, 0, 0
+; CHECK: br %r14
+ %res = call <2 x double> @llvm.rint.v2f64(<2 x double> %val)
+ ret <2 x double> %res
+}
+
+define <2 x double> @f2(<2 x double> %val) {
+; CHECK-LABEL: f2:
+; CHECK: vfidb %v24, %v24, 4, 0
+; CHECK: br %r14
+ %res = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %val)
+ ret <2 x double> %res
+}
+
+define <2 x double> @f3(<2 x double> %val) {
+; CHECK-LABEL: f3:
+; CHECK: vfidb %v24, %v24, 4, 7
+; CHECK: br %r14
+ %res = call <2 x double> @llvm.floor.v2f64(<2 x double> %val)
+ ret <2 x double> %res
+}
+
+define <2 x double> @f4(<2 x double> %val) {
+; CHECK-LABEL: f4:
+; CHECK: vfidb %v24, %v24, 4, 6
+; CHECK: br %r14
+ %res = call <2 x double> @llvm.ceil.v2f64(<2 x double> %val)
+ ret <2 x double> %res
+}
+
+define <2 x double> @f5(<2 x double> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vfidb %v24, %v24, 4, 5
+; CHECK: br %r14
+ %res = call <2 x double> @llvm.trunc.v2f64(<2 x double> %val)
+ ret <2 x double> %res
+}
+
+define <2 x double> @f6(<2 x double> %val) {
+; CHECK-LABEL: f6:
+; CHECK: vfidb %v24, %v24, 4, 1
+; CHECK: br %r14
+ %res = call <2 x double> @llvm.round.v2f64(<2 x double> %val)
+ ret <2 x double> %res
+}
+
+define double @f7(<2 x double> %val) {
+; CHECK-LABEL: f7:
+; CHECK: wfidb %f0, %v24, 0, 0
+; CHECK: br %r14
+ %scalar = extractelement <2 x double> %val, i32 0
+ %res = call double @llvm.rint.f64(double %scalar)
+ ret double %res
+}
+
+define double @f8(<2 x double> %val) {
+; CHECK-LABEL: f8:
+; CHECK: wfidb %f0, %v24, 4, 0
+; CHECK: br %r14
+ %scalar = extractelement <2 x double> %val, i32 0
+ %res = call double @llvm.nearbyint.f64(double %scalar)
+ ret double %res
+}
+
+define double @f9(<2 x double> %val) {
+; CHECK-LABEL: f9:
+; CHECK: wfidb %f0, %v24, 4, 7
+; CHECK: br %r14
+ %scalar = extractelement <2 x double> %val, i32 0
+ %res = call double @llvm.floor.f64(double %scalar)
+ ret double %res
+}
+
+define double @f10(<2 x double> %val) {
+; CHECK-LABEL: f10:
+; CHECK: wfidb %f0, %v24, 4, 6
+; CHECK: br %r14
+ %scalar = extractelement <2 x double> %val, i32 0
+ %res = call double @llvm.ceil.f64(double %scalar)
+ ret double %res
+}
+
+define double @f11(<2 x double> %val) {
+; CHECK-LABEL: f11:
+; CHECK: wfidb %f0, %v24, 4, 5
+; CHECK: br %r14
+ %scalar = extractelement <2 x double> %val, i32 0
+ %res = call double @llvm.trunc.f64(double %scalar)
+ ret double %res
+}
+
+define double @f12(<2 x double> %val) {
+; CHECK-LABEL: f12:
+; CHECK: wfidb %f0, %v24, 4, 1
+; CHECK: br %r14
+ %scalar = extractelement <2 x double> %val, i32 0
+ %res = call double @llvm.round.f64(double %scalar)
+ ret double %res
+}
diff --git a/test/CodeGen/SystemZ/vec-shift-01.ll b/test/CodeGen/SystemZ/vec-shift-01.ll
new file mode 100644
index 000000000000..be8605b182c9
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-shift-01.ll
@@ -0,0 +1,39 @@
+; Test vector shift left with vector shift amount.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i8 shift.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: veslvb %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = shl <16 x i8> %val1, %val2
+ ret <16 x i8> %ret
+}
+
+; Test a v8i16 shift.
+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: veslvh %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = shl <8 x i16> %val1, %val2
+ ret <8 x i16> %ret
+}
+
+; Test a v4i32 shift.
+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: veslvf %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = shl <4 x i32> %val1, %val2
+ ret <4 x i32> %ret
+}
+
+; Test a v2i64 shift.
+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: veslvg %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = shl <2 x i64> %val1, %val2
+ ret <2 x i64> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-shift-02.ll b/test/CodeGen/SystemZ/vec-shift-02.ll
new file mode 100644
index 000000000000..2825872e023d
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-shift-02.ll
@@ -0,0 +1,39 @@
+; Test vector arithmetic shift right with vector shift amount.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i8 shift.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vesravb %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = ashr <16 x i8> %val1, %val2
+ ret <16 x i8> %ret
+}
+
+; Test a v8i16 shift.
+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vesravh %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = ashr <8 x i16> %val1, %val2
+ ret <8 x i16> %ret
+}
+
+; Test a v4i32 shift.
+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vesravf %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = ashr <4 x i32> %val1, %val2
+ ret <4 x i32> %ret
+}
+
+; Test a v2i64 shift.
+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vesravg %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = ashr <2 x i64> %val1, %val2
+ ret <2 x i64> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-shift-03.ll b/test/CodeGen/SystemZ/vec-shift-03.ll
new file mode 100644
index 000000000000..c923d8b5d452
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-shift-03.ll
@@ -0,0 +1,39 @@
+; Test vector logical shift right with vector shift amount.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i8 shift.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vesrlvb %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = lshr <16 x i8> %val1, %val2
+ ret <16 x i8> %ret
+}
+
+; Test a v8i16 shift.
+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vesrlvh %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = lshr <8 x i16> %val1, %val2
+ ret <8 x i16> %ret
+}
+
+; Test a v4i32 shift.
+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vesrlvf %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = lshr <4 x i32> %val1, %val2
+ ret <4 x i32> %ret
+}
+
+; Test a v2i64 shift.
+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vesrlvg %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = lshr <2 x i64> %val1, %val2
+ ret <2 x i64> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-shift-04.ll b/test/CodeGen/SystemZ/vec-shift-04.ll
new file mode 100644
index 000000000000..6fd12897bf5a
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-shift-04.ll
@@ -0,0 +1,134 @@
+; Test vector shift left with scalar shift amount.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i8 shift by a variable.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, i32 %shift) {
+; CHECK-LABEL: f1:
+; CHECK: veslb %v24, %v26, 0(%r2)
+; CHECK: br %r14
+ %truncshift = trunc i32 %shift to i8
+ %shiftvec = insertelement <16 x i8> undef, i8 %truncshift, i32 0
+ %val2 = shufflevector <16 x i8> %shiftvec, <16 x i8> undef,
+ <16 x i32> zeroinitializer
+ %ret = shl <16 x i8> %val1, %val2
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 shift by the lowest useful constant.
+define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val) {
+; CHECK-LABEL: f2:
+; CHECK: veslb %v24, %v26, 1
+; CHECK: br %r14
+ %ret = shl <16 x i8> %val, <i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 shift by the highest useful constant.
+define <16 x i8> @f3(<16 x i8> %dummy, <16 x i8> %val) {
+; CHECK-LABEL: f3:
+; CHECK: veslb %v24, %v26, 7
+; CHECK: br %r14
+ %ret = shl <16 x i8> %val, <i8 7, i8 7, i8 7, i8 7,
+ i8 7, i8 7, i8 7, i8 7,
+ i8 7, i8 7, i8 7, i8 7,
+ i8 7, i8 7, i8 7, i8 7>
+ ret <16 x i8> %ret
+}
+
+; Test a v8i16 shift by a variable.
+define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, i32 %shift) {
+; CHECK-LABEL: f4:
+; CHECK: veslh %v24, %v26, 0(%r2)
+; CHECK: br %r14
+ %truncshift = trunc i32 %shift to i16
+ %shiftvec = insertelement <8 x i16> undef, i16 %truncshift, i32 0
+ %val2 = shufflevector <8 x i16> %shiftvec, <8 x i16> undef,
+ <8 x i32> zeroinitializer
+ %ret = shl <8 x i16> %val1, %val2
+ ret <8 x i16> %ret
+}
+
+; Test a v8i16 shift by the lowest useful constant.
+define <8 x i16> @f5(<8 x i16> %dummy, <8 x i16> %val) {
+; CHECK-LABEL: f5:
+; CHECK: veslh %v24, %v26, 1
+; CHECK: br %r14
+ %ret = shl <8 x i16> %val, <i16 1, i16 1, i16 1, i16 1,
+ i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %ret
+}
+
+; Test a v8i16 shift by the highest useful constant.
+define <8 x i16> @f6(<8 x i16> %dummy, <8 x i16> %val) {
+; CHECK-LABEL: f6:
+; CHECK: veslh %v24, %v26, 15
+; CHECK: br %r14
+ %ret = shl <8 x i16> %val, <i16 15, i16 15, i16 15, i16 15,
+ i16 15, i16 15, i16 15, i16 15>
+ ret <8 x i16> %ret
+}
+
+; Test a v4i32 shift by a variable.
+define <4 x i32> @f7(<4 x i32> %dummy, <4 x i32> %val1, i32 %shift) {
+; CHECK-LABEL: f7:
+; CHECK: veslf %v24, %v26, 0(%r2)
+; CHECK: br %r14
+ %shiftvec = insertelement <4 x i32> undef, i32 %shift, i32 0
+ %val2 = shufflevector <4 x i32> %shiftvec, <4 x i32> undef,
+ <4 x i32> zeroinitializer
+ %ret = shl <4 x i32> %val1, %val2
+ ret <4 x i32> %ret
+}
+
+; Test a v4i32 shift by the lowest useful constant.
+define <4 x i32> @f8(<4 x i32> %dummy, <4 x i32> %val) {
+; CHECK-LABEL: f8:
+; CHECK: veslf %v24, %v26, 1
+; CHECK: br %r14
+ %ret = shl <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %ret
+}
+
+; Test a v4i32 shift by the highest useful constant.
+define <4 x i32> @f9(<4 x i32> %dummy, <4 x i32> %val) {
+; CHECK-LABEL: f9:
+; CHECK: veslf %v24, %v26, 31
+; CHECK: br %r14
+ %ret = shl <4 x i32> %val, <i32 31, i32 31, i32 31, i32 31>
+ ret <4 x i32> %ret
+}
+
+; Test a v2i64 shift by a variable.
+define <2 x i64> @f10(<2 x i64> %dummy, <2 x i64> %val1, i32 %shift) {
+; CHECK-LABEL: f10:
+; CHECK: veslg %v24, %v26, 0(%r2)
+; CHECK: br %r14
+ %extshift = sext i32 %shift to i64
+ %shiftvec = insertelement <2 x i64> undef, i64 %extshift, i32 0
+ %val2 = shufflevector <2 x i64> %shiftvec, <2 x i64> undef,
+ <2 x i32> zeroinitializer
+ %ret = shl <2 x i64> %val1, %val2
+ ret <2 x i64> %ret
+}
+
+; Test a v2i64 shift by the lowest useful constant.
+define <2 x i64> @f11(<2 x i64> %dummy, <2 x i64> %val) {
+; CHECK-LABEL: f11:
+; CHECK: veslg %v24, %v26, 1
+; CHECK: br %r14
+ %ret = shl <2 x i64> %val, <i64 1, i64 1>
+ ret <2 x i64> %ret
+}
+
+; Test a v2i64 shift by the highest useful constant.
+define <2 x i64> @f12(<2 x i64> %dummy, <2 x i64> %val) {
+; CHECK-LABEL: f12:
+; CHECK: veslg %v24, %v26, 63
+; CHECK: br %r14
+ %ret = shl <2 x i64> %val, <i64 63, i64 63>
+ ret <2 x i64> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-shift-05.ll b/test/CodeGen/SystemZ/vec-shift-05.ll
new file mode 100644
index 000000000000..22ce46b2d0d6
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-shift-05.ll
@@ -0,0 +1,134 @@
+; Test vector arithmetic shift right with scalar shift amount.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i8 shift by a variable.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, i32 %shift) {
+; CHECK-LABEL: f1:
+; CHECK: vesrab %v24, %v26, 0(%r2)
+; CHECK: br %r14
+ %truncshift = trunc i32 %shift to i8
+ %shiftvec = insertelement <16 x i8> undef, i8 %truncshift, i32 0
+ %val2 = shufflevector <16 x i8> %shiftvec, <16 x i8> undef,
+ <16 x i32> zeroinitializer
+ %ret = ashr <16 x i8> %val1, %val2
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 shift by the lowest useful constant.
+define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val) {
+; CHECK-LABEL: f2:
+; CHECK: vesrab %v24, %v26, 1
+; CHECK: br %r14
+ %ret = ashr <16 x i8> %val, <i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 shift by the highest useful constant.
+define <16 x i8> @f3(<16 x i8> %dummy, <16 x i8> %val) {
+; CHECK-LABEL: f3:
+; CHECK: vesrab %v24, %v26, 7
+; CHECK: br %r14
+ %ret = ashr <16 x i8> %val, <i8 7, i8 7, i8 7, i8 7,
+ i8 7, i8 7, i8 7, i8 7,
+ i8 7, i8 7, i8 7, i8 7,
+ i8 7, i8 7, i8 7, i8 7>
+ ret <16 x i8> %ret
+}
+
+; Test a v8i16 shift by a variable.
+define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, i32 %shift) {
+; CHECK-LABEL: f4:
+; CHECK: vesrah %v24, %v26, 0(%r2)
+; CHECK: br %r14
+ %truncshift = trunc i32 %shift to i16
+ %shiftvec = insertelement <8 x i16> undef, i16 %truncshift, i32 0
+ %val2 = shufflevector <8 x i16> %shiftvec, <8 x i16> undef,
+ <8 x i32> zeroinitializer
+ %ret = ashr <8 x i16> %val1, %val2
+ ret <8 x i16> %ret
+}
+
+; Test a v8i16 shift by the lowest useful constant.
+define <8 x i16> @f5(<8 x i16> %dummy, <8 x i16> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vesrah %v24, %v26, 1
+; CHECK: br %r14
+ %ret = ashr <8 x i16> %val, <i16 1, i16 1, i16 1, i16 1,
+ i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %ret
+}
+
+; Test a v8i16 shift by the highest useful constant.
+define <8 x i16> @f6(<8 x i16> %dummy, <8 x i16> %val) {
+; CHECK-LABEL: f6:
+; CHECK: vesrah %v24, %v26, 15
+; CHECK: br %r14
+ %ret = ashr <8 x i16> %val, <i16 15, i16 15, i16 15, i16 15,
+ i16 15, i16 15, i16 15, i16 15>
+ ret <8 x i16> %ret
+}
+
+; Test a v4i32 shift by a variable.
+define <4 x i32> @f7(<4 x i32> %dummy, <4 x i32> %val1, i32 %shift) {
+; CHECK-LABEL: f7:
+; CHECK: vesraf %v24, %v26, 0(%r2)
+; CHECK: br %r14
+ %shiftvec = insertelement <4 x i32> undef, i32 %shift, i32 0
+ %val2 = shufflevector <4 x i32> %shiftvec, <4 x i32> undef,
+ <4 x i32> zeroinitializer
+ %ret = ashr <4 x i32> %val1, %val2
+ ret <4 x i32> %ret
+}
+
+; Test a v4i32 shift by the lowest useful constant.
+define <4 x i32> @f8(<4 x i32> %dummy, <4 x i32> %val) {
+; CHECK-LABEL: f8:
+; CHECK: vesraf %v24, %v26, 1
+; CHECK: br %r14
+ %ret = ashr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %ret
+}
+
+; Test a v4i32 shift by the highest useful constant.
+define <4 x i32> @f9(<4 x i32> %dummy, <4 x i32> %val) {
+; CHECK-LABEL: f9:
+; CHECK: vesraf %v24, %v26, 31
+; CHECK: br %r14
+ %ret = ashr <4 x i32> %val, <i32 31, i32 31, i32 31, i32 31>
+ ret <4 x i32> %ret
+}
+
+; Test a v2i64 shift by a variable.
+define <2 x i64> @f10(<2 x i64> %dummy, <2 x i64> %val1, i32 %shift) {
+; CHECK-LABEL: f10:
+; CHECK: vesrag %v24, %v26, 0(%r2)
+; CHECK: br %r14
+ %extshift = sext i32 %shift to i64
+ %shiftvec = insertelement <2 x i64> undef, i64 %extshift, i32 0
+ %val2 = shufflevector <2 x i64> %shiftvec, <2 x i64> undef,
+ <2 x i32> zeroinitializer
+ %ret = ashr <2 x i64> %val1, %val2
+ ret <2 x i64> %ret
+}
+
+; Test a v2i64 shift by the lowest useful constant.
+define <2 x i64> @f11(<2 x i64> %dummy, <2 x i64> %val) {
+; CHECK-LABEL: f11:
+; CHECK: vesrag %v24, %v26, 1
+; CHECK: br %r14
+ %ret = ashr <2 x i64> %val, <i64 1, i64 1>
+ ret <2 x i64> %ret
+}
+
+; Test a v2i64 shift by the highest useful constant.
+define <2 x i64> @f12(<2 x i64> %dummy, <2 x i64> %val) {
+; CHECK-LABEL: f12:
+; CHECK: vesrag %v24, %v26, 63
+; CHECK: br %r14
+ %ret = ashr <2 x i64> %val, <i64 63, i64 63>
+ ret <2 x i64> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-shift-06.ll b/test/CodeGen/SystemZ/vec-shift-06.ll
new file mode 100644
index 000000000000..8a5bb0a9a55a
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-shift-06.ll
@@ -0,0 +1,134 @@
+; Test vector logical shift right with scalar shift amount.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i8 shift by a variable.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, i32 %shift) {
+; CHECK-LABEL: f1:
+; CHECK: vesrlb %v24, %v26, 0(%r2)
+; CHECK: br %r14
+ %truncshift = trunc i32 %shift to i8
+ %shiftvec = insertelement <16 x i8> undef, i8 %truncshift, i32 0
+ %val2 = shufflevector <16 x i8> %shiftvec, <16 x i8> undef,
+ <16 x i32> zeroinitializer
+ %ret = lshr <16 x i8> %val1, %val2
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 shift by the lowest useful constant.
+define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val) {
+; CHECK-LABEL: f2:
+; CHECK: vesrlb %v24, %v26, 1
+; CHECK: br %r14
+ %ret = lshr <16 x i8> %val, <i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %ret
+}
+
+; Test a v16i8 shift by the highest useful constant.
+define <16 x i8> @f3(<16 x i8> %dummy, <16 x i8> %val) {
+; CHECK-LABEL: f3:
+; CHECK: vesrlb %v24, %v26, 7
+; CHECK: br %r14
+ %ret = lshr <16 x i8> %val, <i8 7, i8 7, i8 7, i8 7,
+ i8 7, i8 7, i8 7, i8 7,
+ i8 7, i8 7, i8 7, i8 7,
+ i8 7, i8 7, i8 7, i8 7>
+ ret <16 x i8> %ret
+}
+
+; Test a v8i16 shift by a variable.
+define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, i32 %shift) {
+; CHECK-LABEL: f4:
+; CHECK: vesrlh %v24, %v26, 0(%r2)
+; CHECK: br %r14
+ %truncshift = trunc i32 %shift to i16
+ %shiftvec = insertelement <8 x i16> undef, i16 %truncshift, i32 0
+ %val2 = shufflevector <8 x i16> %shiftvec, <8 x i16> undef,
+ <8 x i32> zeroinitializer
+ %ret = lshr <8 x i16> %val1, %val2
+ ret <8 x i16> %ret
+}
+
+; Test a v8i16 shift by the lowest useful constant.
+define <8 x i16> @f5(<8 x i16> %dummy, <8 x i16> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vesrlh %v24, %v26, 1
+; CHECK: br %r14
+ %ret = lshr <8 x i16> %val, <i16 1, i16 1, i16 1, i16 1,
+ i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %ret
+}
+
+; Test a v8i16 shift by the highest useful constant.
+define <8 x i16> @f6(<8 x i16> %dummy, <8 x i16> %val) {
+; CHECK-LABEL: f6:
+; CHECK: vesrlh %v24, %v26, 15
+; CHECK: br %r14
+ %ret = lshr <8 x i16> %val, <i16 15, i16 15, i16 15, i16 15,
+ i16 15, i16 15, i16 15, i16 15>
+ ret <8 x i16> %ret
+}
+
+; Test a v4i32 shift by a variable.
+define <4 x i32> @f7(<4 x i32> %dummy, <4 x i32> %val1, i32 %shift) {
+; CHECK-LABEL: f7:
+; CHECK: vesrlf %v24, %v26, 0(%r2)
+; CHECK: br %r14
+ %shiftvec = insertelement <4 x i32> undef, i32 %shift, i32 0
+ %val2 = shufflevector <4 x i32> %shiftvec, <4 x i32> undef,
+ <4 x i32> zeroinitializer
+ %ret = lshr <4 x i32> %val1, %val2
+ ret <4 x i32> %ret
+}
+
+; Test a v4i32 shift by the lowest useful constant.
+define <4 x i32> @f8(<4 x i32> %dummy, <4 x i32> %val) {
+; CHECK-LABEL: f8:
+; CHECK: vesrlf %v24, %v26, 1
+; CHECK: br %r14
+ %ret = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %ret
+}
+
+; Test a v4i32 shift by the highest useful constant.
+define <4 x i32> @f9(<4 x i32> %dummy, <4 x i32> %val) {
+; CHECK-LABEL: f9:
+; CHECK: vesrlf %v24, %v26, 31
+; CHECK: br %r14
+ %ret = lshr <4 x i32> %val, <i32 31, i32 31, i32 31, i32 31>
+ ret <4 x i32> %ret
+}
+
+; Test a v2i64 shift by a variable.
+define <2 x i64> @f10(<2 x i64> %dummy, <2 x i64> %val1, i32 %shift) {
+; CHECK-LABEL: f10:
+; CHECK: vesrlg %v24, %v26, 0(%r2)
+; CHECK: br %r14
+ %extshift = sext i32 %shift to i64
+ %shiftvec = insertelement <2 x i64> undef, i64 %extshift, i32 0
+ %val2 = shufflevector <2 x i64> %shiftvec, <2 x i64> undef,
+ <2 x i32> zeroinitializer
+ %ret = lshr <2 x i64> %val1, %val2
+ ret <2 x i64> %ret
+}
+
+; Test a v2i64 shift by the lowest useful constant.
+define <2 x i64> @f11(<2 x i64> %dummy, <2 x i64> %val) {
+; CHECK-LABEL: f11:
+; CHECK: vesrlg %v24, %v26, 1
+; CHECK: br %r14
+ %ret = lshr <2 x i64> %val, <i64 1, i64 1>
+ ret <2 x i64> %ret
+}
+
+; Test a v2i64 shift by the highest useful constant.
+define <2 x i64> @f12(<2 x i64> %dummy, <2 x i64> %val) {
+; CHECK-LABEL: f12:
+; CHECK: vesrlg %v24, %v26, 63
+; CHECK: br %r14
+ %ret = lshr <2 x i64> %val, <i64 63, i64 63>
+ ret <2 x i64> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-shift-07.ll b/test/CodeGen/SystemZ/vec-shift-07.ll
new file mode 100644
index 000000000000..f229c5e25a46
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-shift-07.ll
@@ -0,0 +1,182 @@
+; Test vector sign extensions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i1->v16i8 extension.
+define <16 x i8> @f1(<16 x i8> %val) {
+; CHECK-LABEL: f1:
+; CHECK: veslb [[REG:%v[0-9]+]], %v24, 7
+; CHECK: vesrab %v24, [[REG]], 7
+; CHECK: br %r14
+ %trunc = trunc <16 x i8> %val to <16 x i1>
+ %ret = sext <16 x i1> %trunc to <16 x i8>
+ ret <16 x i8> %ret
+}
+
+; Test a v8i1->v8i16 extension.
+define <8 x i16> @f2(<8 x i16> %val) {
+; CHECK-LABEL: f2:
+; CHECK: veslh [[REG:%v[0-9]+]], %v24, 15
+; CHECK: vesrah %v24, [[REG]], 15
+; CHECK: br %r14
+ %trunc = trunc <8 x i16> %val to <8 x i1>
+ %ret = sext <8 x i1> %trunc to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; Test a v8i8->v8i16 extension.
+define <8 x i16> @f3(<8 x i16> %val) {
+; CHECK-LABEL: f3:
+; CHECK: veslh [[REG:%v[0-9]+]], %v24, 8
+; CHECK: vesrah %v24, [[REG]], 8
+; CHECK: br %r14
+ %trunc = trunc <8 x i16> %val to <8 x i8>
+ %ret = sext <8 x i8> %trunc to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; Test a v4i1->v4i32 extension.
+define <4 x i32> @f4(<4 x i32> %val) {
+; CHECK-LABEL: f4:
+; CHECK: veslf [[REG:%v[0-9]+]], %v24, 31
+; CHECK: vesraf %v24, [[REG]], 31
+; CHECK: br %r14
+ %trunc = trunc <4 x i32> %val to <4 x i1>
+ %ret = sext <4 x i1> %trunc to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test a v4i8->v4i32 extension.
+define <4 x i32> @f5(<4 x i32> %val) {
+; CHECK-LABEL: f5:
+; CHECK: veslf [[REG:%v[0-9]+]], %v24, 24
+; CHECK: vesraf %v24, [[REG]], 24
+; CHECK: br %r14
+ %trunc = trunc <4 x i32> %val to <4 x i8>
+ %ret = sext <4 x i8> %trunc to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test a v4i16->v4i32 extension.
+define <4 x i32> @f6(<4 x i32> %val) {
+; CHECK-LABEL: f6:
+; CHECK: veslf [[REG:%v[0-9]+]], %v24, 16
+; CHECK: vesraf %v24, [[REG]], 16
+; CHECK: br %r14
+ %trunc = trunc <4 x i32> %val to <4 x i16>
+ %ret = sext <4 x i16> %trunc to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Test a v2i1->v2i64 extension.
+define <2 x i64> @f7(<2 x i64> %val) {
+; CHECK-LABEL: f7:
+; CHECK: veslg [[REG:%v[0-9]+]], %v24, 63
+; CHECK: vesrag %v24, [[REG]], 63
+; CHECK: br %r14
+ %trunc = trunc <2 x i64> %val to <2 x i1>
+ %ret = sext <2 x i1> %trunc to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test a v2i8->v2i64 extension.
+define <2 x i64> @f8(<2 x i64> %val) {
+; CHECK-LABEL: f8:
+; CHECK: vsegb %v24, %v24
+; CHECK: br %r14
+ %trunc = trunc <2 x i64> %val to <2 x i8>
+ %ret = sext <2 x i8> %trunc to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test a v2i16->v2i64 extension.
+define <2 x i64> @f9(<2 x i64> %val) {
+; CHECK-LABEL: f9:
+; CHECK: vsegh %v24, %v24
+; CHECK: br %r14
+ %trunc = trunc <2 x i64> %val to <2 x i16>
+ %ret = sext <2 x i16> %trunc to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test a v2i32->v2i64 extension.
+define <2 x i64> @f10(<2 x i64> %val) {
+; CHECK-LABEL: f10:
+; CHECK: vsegf %v24, %v24
+; CHECK: br %r14
+ %trunc = trunc <2 x i64> %val to <2 x i32>
+ %ret = sext <2 x i32> %trunc to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+; Test an alternative v2i8->v2i64 extension.
+define <2 x i64> @f11(<2 x i64> %val) {
+; CHECK-LABEL: f11:
+; CHECK: vsegb %v24, %v24
+; CHECK: br %r14
+ %shl = shl <2 x i64> %val, <i64 56, i64 56>
+ %ret = ashr <2 x i64> %shl, <i64 56, i64 56>
+ ret <2 x i64> %ret
+}
+
+; Test an alternative v2i16->v2i64 extension.
+define <2 x i64> @f12(<2 x i64> %val) {
+; CHECK-LABEL: f12:
+; CHECK: vsegh %v24, %v24
+; CHECK: br %r14
+ %shl = shl <2 x i64> %val, <i64 48, i64 48>
+ %ret = ashr <2 x i64> %shl, <i64 48, i64 48>
+ ret <2 x i64> %ret
+}
+
+; Test an alternative v2i32->v2i64 extension.
+define <2 x i64> @f13(<2 x i64> %val) {
+; CHECK-LABEL: f13:
+; CHECK: vsegf %v24, %v24
+; CHECK: br %r14
+ %shl = shl <2 x i64> %val, <i64 32, i64 32>
+ %ret = ashr <2 x i64> %shl, <i64 32, i64 32>
+ ret <2 x i64> %ret
+}
+
+; Test an extraction-based v2i8->v2i64 extension.
+define <2 x i64> @f14(<16 x i8> %val) {
+; CHECK-LABEL: f14:
+; CHECK: vsegb %v24, %v24
+; CHECK: br %r14
+ %elt0 = extractelement <16 x i8> %val, i32 7
+ %elt1 = extractelement <16 x i8> %val, i32 15
+ %ext0 = sext i8 %elt0 to i64
+ %ext1 = sext i8 %elt1 to i64
+ %vec0 = insertelement <2 x i64> undef, i64 %ext0, i32 0
+ %vec1 = insertelement <2 x i64> %vec0, i64 %ext1, i32 1
+ ret <2 x i64> %vec1
+}
+
+; Test an extraction-based v2i16->v2i64 extension.
+define <2 x i64> @f15(<16 x i16> %val) {
+; CHECK-LABEL: f15:
+; CHECK: vsegh %v24, %v24
+; CHECK: br %r14
+ %elt0 = extractelement <16 x i16> %val, i32 3
+ %elt1 = extractelement <16 x i16> %val, i32 7
+ %ext0 = sext i16 %elt0 to i64
+ %ext1 = sext i16 %elt1 to i64
+ %vec0 = insertelement <2 x i64> undef, i64 %ext0, i32 0
+ %vec1 = insertelement <2 x i64> %vec0, i64 %ext1, i32 1
+ ret <2 x i64> %vec1
+}
+
+; Test an extraction-based v2i32->v2i64 extension.
+define <2 x i64> @f16(<16 x i32> %val) {
+; CHECK-LABEL: f16:
+; CHECK: vsegf %v24, %v24
+; CHECK: br %r14
+ %elt0 = extractelement <16 x i32> %val, i32 1
+ %elt1 = extractelement <16 x i32> %val, i32 3
+ %ext0 = sext i32 %elt0 to i64
+ %ext1 = sext i32 %elt1 to i64
+ %vec0 = insertelement <2 x i64> undef, i64 %ext0, i32 0
+ %vec1 = insertelement <2 x i64> %vec0, i64 %ext1, i32 1
+ ret <2 x i64> %vec1
+}
diff --git a/test/CodeGen/SystemZ/vec-sqrt-01.ll b/test/CodeGen/SystemZ/vec-sqrt-01.ll
new file mode 100644
index 000000000000..5c3ffb3b0643
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-sqrt-01.ll
@@ -0,0 +1,23 @@
+; Test f64 and v2f64 square root.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.sqrt.f64(double)
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
+
+define <2 x double> @f1(<2 x double> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vfsqdb %v24, %v24
+; CHECK: br %r14
+ %ret = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %val)
+ ret <2 x double> %ret
+}
+
+define double @f2(<2 x double> %val) {
+; CHECK-LABEL: f2:
+; CHECK: wfsqdb %f0, %v24
+; CHECK: br %r14
+ %scalar = extractelement <2 x double> %val, i32 0
+ %ret = call double @llvm.sqrt.f64(double %scalar)
+ ret double %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-sub-01.ll b/test/CodeGen/SystemZ/vec-sub-01.ll
new file mode 100644
index 000000000000..4afad8bef659
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-sub-01.ll
@@ -0,0 +1,148 @@
+; Test vector subtraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i8 subtraction.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vsb %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = sub <16 x i8> %val1, %val2
+ ret <16 x i8> %ret
+}
+
+; Test a v8i16 subtraction.
+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vsh %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = sub <8 x i16> %val1, %val2
+ ret <8 x i16> %ret
+}
+
+; Test a v4i32 subtraction.
+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vsf %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = sub <4 x i32> %val1, %val2
+ ret <4 x i32> %ret
+}
+
+; Test a v2i64 subtraction.
+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vsg %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = sub <2 x i64> %val1, %val2
+ ret <2 x i64> %ret
+}
+
+; Test a v4f32 subtraction, as an example of an operation that needs to be
+; scalarized and reassembled. At present there's an unnecessary move that
+; could be avoided with smarter ordering. It also isn't important whether
+; the VSLDBs use the result of the VLRs or use %v24 and %v26 directly.
+define <4 x float> @f5(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: vlr %v[[A1:[0-5]]], %v24
+; CHECK-DAG: vlr %v[[A2:[0-5]]], %v26
+; CHECK-DAG: vrepf %v[[B1:[0-5]]], %v[[A1]], 1
+; CHECK-DAG: vrepf %v[[B2:[0-5]]], %v[[A2]], 1
+; CHECK-DAG: vrepf %v[[C1:[0-5]]], %v[[A1]], 2
+; CHECK-DAG: vrepf %v[[C2:[0-5]]], %v[[A2]], 2
+; CHECK-DAG: vrepf %v[[D1:[0-5]]], %v[[A1]], 3
+; CHECK-DAG: vrepf %v[[D2:[0-5]]], %v[[A2]], 3
+; CHECK-DAG: ler %f[[A1copy:[0-5]]], %f[[A1]]
+; CHECK-DAG: sebr %f[[A1copy]], %f[[A2]]
+; CHECK-DAG: sebr %f[[B1]], %f[[B2]]
+; CHECK-DAG: sebr %f[[C1]], %f[[C2]]
+; CHECK-DAG: sebr %f[[D1]], %f[[D2]]
+; CHECK-DAG: vmrhf [[HIGH:%v[0-9]+]], %v[[A1copy]], %v[[B1]]
+; CHECK-DAG: vmrhf [[LOW:%v[0-9]+]], %v[[C1]], %v[[D1]]
+; CHECK: vmrhg %v24, [[HIGH]], [[LOW]]
+; CHECK: br %r14
+ %ret = fsub <4 x float> %val1, %val2
+ ret <4 x float> %ret
+}
+
+; Test a v2f64 subtraction.
+define <2 x double> @f6(<2 x double> %dummy, <2 x double> %val1,
+ <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vfsdb %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = fsub <2 x double> %val1, %val2
+ ret <2 x double> %ret
+}
+
+; Test an f64 subtraction that uses vector registers.
+define double @f7(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: wfsdb %f0, %v24, %v26
+; CHECK: br %r14
+ %scalar1 = extractelement <2 x double> %val1, i32 0
+ %scalar2 = extractelement <2 x double> %val2, i32 0
+ %ret = fsub double %scalar1, %scalar2
+ ret double %ret
+}
+
+; Test a v2i8 subtraction, which gets promoted to v16i8.
+define <2 x i8> @f8(<2 x i8> %dummy, <2 x i8> %val1, <2 x i8> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vsb %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = sub <2 x i8> %val1, %val2
+ ret <2 x i8> %ret
+}
+
+; Test a v4i8 subtraction, which gets promoted to v16i8.
+define <4 x i8> @f9(<4 x i8> %dummy, <4 x i8> %val1, <4 x i8> %val2) {
+; CHECK-LABEL: f9:
+; CHECK: vsb %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = sub <4 x i8> %val1, %val2
+ ret <4 x i8> %ret
+}
+
+; Test a v8i8 subtraction, which gets promoted to v16i8.
+define <8 x i8> @f10(<8 x i8> %dummy, <8 x i8> %val1, <8 x i8> %val2) {
+; CHECK-LABEL: f10:
+; CHECK: vsb %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = sub <8 x i8> %val1, %val2
+ ret <8 x i8> %ret
+}
+
+; Test a v2i16 subtraction, which gets promoted to v8i16.
+define <2 x i16> @f11(<2 x i16> %dummy, <2 x i16> %val1, <2 x i16> %val2) {
+; CHECK-LABEL: f11:
+; CHECK: vsh %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = sub <2 x i16> %val1, %val2
+ ret <2 x i16> %ret
+}
+
+; Test a v4i16 subtraction, which gets promoted to v8i16.
+define <4 x i16> @f12(<4 x i16> %dummy, <4 x i16> %val1, <4 x i16> %val2) {
+; CHECK-LABEL: f12:
+; CHECK: vsh %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = sub <4 x i16> %val1, %val2
+ ret <4 x i16> %ret
+}
+
+; Test a v2i32 subtraction, which gets promoted to v4i32.
+define <2 x i32> @f13(<2 x i32> %dummy, <2 x i32> %val1, <2 x i32> %val2) {
+; CHECK-LABEL: f13:
+; CHECK: vsf %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = sub <2 x i32> %val1, %val2
+ ret <2 x i32> %ret
+}
+
+; Test a v2f32 subtraction, which gets promoted to v4f32.
+define <2 x float> @f14(<2 x float> %val1, <2 x float> %val2) {
+; No particular output expected, but must compile.
+ %ret = fsub <2 x float> %val1, %val2
+ ret <2 x float> %ret
+}
diff --git a/test/CodeGen/SystemZ/vec-xor-01.ll b/test/CodeGen/SystemZ/vec-xor-01.ll
new file mode 100644
index 000000000000..063b768117c1
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-xor-01.ll
@@ -0,0 +1,39 @@
+; Test vector XOR.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test a v16i8 XOR.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vx %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = xor <16 x i8> %val1, %val2
+ ret <16 x i8> %ret
+}
+
+; Test a v8i16 XOR.
+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vx %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = xor <8 x i16> %val1, %val2
+ ret <8 x i16> %ret
+}
+
+; Test a v4i32 XOR.
+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vx %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = xor <4 x i32> %val1, %val2
+ ret <4 x i32> %ret
+}
+
+; Test a v2i64 XOR.
+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vx %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = xor <2 x i64> %val1, %val2
+ ret <2 x i64> %ret
+}
diff --git a/test/CodeGen/SystemZ/xor-01.ll b/test/CodeGen/SystemZ/xor-01.ll
index 185d6bb0a754..e0aaffbb257e 100644
--- a/test/CodeGen/SystemZ/xor-01.ll
+++ b/test/CodeGen/SystemZ/xor-01.ll
@@ -19,7 +19,7 @@ define i32 @f2(i32 %a, i32 *%src) {
; CHECK-LABEL: f2:
; CHECK: x %r2, 0(%r3)
; CHECK: br %r14
- %b = load i32 *%src
+ %b = load i32 , i32 *%src
%xor = xor i32 %a, %b
ret i32 %xor
}
@@ -29,8 +29,8 @@ define i32 @f3(i32 %a, i32 *%src) {
; CHECK-LABEL: f3:
; CHECK: x %r2, 4092(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 1023
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 1023
+ %b = load i32 , i32 *%ptr
%xor = xor i32 %a, %b
ret i32 %xor
}
@@ -40,8 +40,8 @@ define i32 @f4(i32 %a, i32 *%src) {
; CHECK-LABEL: f4:
; CHECK: xy %r2, 4096(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 1024
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 1024
+ %b = load i32 , i32 *%ptr
%xor = xor i32 %a, %b
ret i32 %xor
}
@@ -51,8 +51,8 @@ define i32 @f5(i32 %a, i32 *%src) {
; CHECK-LABEL: f5:
; CHECK: xy %r2, 524284(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131071
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131071
+ %b = load i32 , i32 *%ptr
%xor = xor i32 %a, %b
ret i32 %xor
}
@@ -64,8 +64,8 @@ define i32 @f6(i32 %a, i32 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: x %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 131072
+ %b = load i32 , i32 *%ptr
%xor = xor i32 %a, %b
ret i32 %xor
}
@@ -75,8 +75,8 @@ define i32 @f7(i32 %a, i32 *%src) {
; CHECK-LABEL: f7:
; CHECK: xy %r2, -4(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -1
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -1
+ %b = load i32 , i32 *%ptr
%xor = xor i32 %a, %b
ret i32 %xor
}
@@ -86,8 +86,8 @@ define i32 @f8(i32 %a, i32 *%src) {
; CHECK-LABEL: f8:
; CHECK: xy %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131072
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131072
+ %b = load i32 , i32 *%ptr
%xor = xor i32 %a, %b
ret i32 %xor
}
@@ -99,8 +99,8 @@ define i32 @f9(i32 %a, i32 *%src) {
; CHECK: agfi %r3, -524292
; CHECK: x %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i32 *%src, i64 -131073
- %b = load i32 *%ptr
+ %ptr = getelementptr i32, i32 *%src, i64 -131073
+ %b = load i32 , i32 *%ptr
%xor = xor i32 %a, %b
ret i32 %xor
}
@@ -113,7 +113,7 @@ define i32 @f10(i32 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4092
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%xor = xor i32 %a, %b
ret i32 %xor
}
@@ -126,7 +126,7 @@ define i32 @f11(i32 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i32 *
- %b = load i32 *%ptr
+ %b = load i32 , i32 *%ptr
%xor = xor i32 %a, %b
ret i32 %xor
}
@@ -137,26 +137,26 @@ define i32 @f12(i32 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: x %r2, 16{{[04]}}(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i32 *%ptr0, i64 2
- %ptr2 = getelementptr i32 *%ptr0, i64 4
- %ptr3 = getelementptr i32 *%ptr0, i64 6
- %ptr4 = getelementptr i32 *%ptr0, i64 8
- %ptr5 = getelementptr i32 *%ptr0, i64 10
- %ptr6 = getelementptr i32 *%ptr0, i64 12
- %ptr7 = getelementptr i32 *%ptr0, i64 14
- %ptr8 = getelementptr i32 *%ptr0, i64 16
- %ptr9 = getelementptr i32 *%ptr0, i64 18
-
- %val0 = load i32 *%ptr0
- %val1 = load i32 *%ptr1
- %val2 = load i32 *%ptr2
- %val3 = load i32 *%ptr3
- %val4 = load i32 *%ptr4
- %val5 = load i32 *%ptr5
- %val6 = load i32 *%ptr6
- %val7 = load i32 *%ptr7
- %val8 = load i32 *%ptr8
- %val9 = load i32 *%ptr9
+ %ptr1 = getelementptr i32, i32 *%ptr0, i64 2
+ %ptr2 = getelementptr i32, i32 *%ptr0, i64 4
+ %ptr3 = getelementptr i32, i32 *%ptr0, i64 6
+ %ptr4 = getelementptr i32, i32 *%ptr0, i64 8
+ %ptr5 = getelementptr i32, i32 *%ptr0, i64 10
+ %ptr6 = getelementptr i32, i32 *%ptr0, i64 12
+ %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
+ %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
+ %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
+
+ %val0 = load i32 , i32 *%ptr0
+ %val1 = load i32 , i32 *%ptr1
+ %val2 = load i32 , i32 *%ptr2
+ %val3 = load i32 , i32 *%ptr3
+ %val4 = load i32 , i32 *%ptr4
+ %val5 = load i32 , i32 *%ptr5
+ %val6 = load i32 , i32 *%ptr6
+ %val7 = load i32 , i32 *%ptr7
+ %val8 = load i32 , i32 *%ptr8
+ %val9 = load i32 , i32 *%ptr9
%ret = call i32 @foo()
diff --git a/test/CodeGen/SystemZ/xor-03.ll b/test/CodeGen/SystemZ/xor-03.ll
index ab7f2584b60d..36fb1df45a84 100644
--- a/test/CodeGen/SystemZ/xor-03.ll
+++ b/test/CodeGen/SystemZ/xor-03.ll
@@ -19,7 +19,7 @@ define i64 @f2(i64 %a, i64 *%src) {
; CHECK-LABEL: f2:
; CHECK: xg %r2, 0(%r3)
; CHECK: br %r14
- %b = load i64 *%src
+ %b = load i64 , i64 *%src
%xor = xor i64 %a, %b
ret i64 %xor
}
@@ -29,8 +29,8 @@ define i64 @f3(i64 %a, i64 *%src) {
; CHECK-LABEL: f3:
; CHECK: xg %r2, 524280(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65535
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65535
+ %b = load i64 , i64 *%ptr
%xor = xor i64 %a, %b
ret i64 %xor
}
@@ -42,8 +42,8 @@ define i64 @f4(i64 %a, i64 *%src) {
; CHECK: agfi %r3, 524288
; CHECK: xg %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 65536
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 65536
+ %b = load i64 , i64 *%ptr
%xor = xor i64 %a, %b
ret i64 %xor
}
@@ -53,8 +53,8 @@ define i64 @f5(i64 %a, i64 *%src) {
; CHECK-LABEL: f5:
; CHECK: xg %r2, -8(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -1
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -1
+ %b = load i64 , i64 *%ptr
%xor = xor i64 %a, %b
ret i64 %xor
}
@@ -64,8 +64,8 @@ define i64 @f6(i64 %a, i64 *%src) {
; CHECK-LABEL: f6:
; CHECK: xg %r2, -524288(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65536
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65536
+ %b = load i64 , i64 *%ptr
%xor = xor i64 %a, %b
ret i64 %xor
}
@@ -77,8 +77,8 @@ define i64 @f7(i64 %a, i64 *%src) {
; CHECK: agfi %r3, -524296
; CHECK: xg %r2, 0(%r3)
; CHECK: br %r14
- %ptr = getelementptr i64 *%src, i64 -65537
- %b = load i64 *%ptr
+ %ptr = getelementptr i64, i64 *%src, i64 -65537
+ %b = load i64 , i64 *%ptr
%xor = xor i64 %a, %b
ret i64 %xor
}
@@ -91,7 +91,7 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524280
%ptr = inttoptr i64 %add2 to i64 *
- %b = load i64 *%ptr
+ %b = load i64 , i64 *%ptr
%xor = xor i64 %a, %b
ret i64 %xor
}
@@ -102,26 +102,26 @@ define i64 @f9(i64 *%ptr0) {
; CHECK: brasl %r14, foo@PLT
; CHECK: xg %r2, 160(%r15)
; CHECK: br %r14
- %ptr1 = getelementptr i64 *%ptr0, i64 2
- %ptr2 = getelementptr i64 *%ptr0, i64 4
- %ptr3 = getelementptr i64 *%ptr0, i64 6
- %ptr4 = getelementptr i64 *%ptr0, i64 8
- %ptr5 = getelementptr i64 *%ptr0, i64 10
- %ptr6 = getelementptr i64 *%ptr0, i64 12
- %ptr7 = getelementptr i64 *%ptr0, i64 14
- %ptr8 = getelementptr i64 *%ptr0, i64 16
- %ptr9 = getelementptr i64 *%ptr0, i64 18
+ %ptr1 = getelementptr i64, i64 *%ptr0, i64 2
+ %ptr2 = getelementptr i64, i64 *%ptr0, i64 4
+ %ptr3 = getelementptr i64, i64 *%ptr0, i64 6
+ %ptr4 = getelementptr i64, i64 *%ptr0, i64 8
+ %ptr5 = getelementptr i64, i64 *%ptr0, i64 10
+ %ptr6 = getelementptr i64, i64 *%ptr0, i64 12
+ %ptr7 = getelementptr i64, i64 *%ptr0, i64 14
+ %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
+ %ptr9 = getelementptr i64, i64 *%ptr0, i64 18
- %val0 = load i64 *%ptr0
- %val1 = load i64 *%ptr1
- %val2 = load i64 *%ptr2
- %val3 = load i64 *%ptr3
- %val4 = load i64 *%ptr4
- %val5 = load i64 *%ptr5
- %val6 = load i64 *%ptr6
- %val7 = load i64 *%ptr7
- %val8 = load i64 *%ptr8
- %val9 = load i64 *%ptr9
+ %val0 = load i64 , i64 *%ptr0
+ %val1 = load i64 , i64 *%ptr1
+ %val2 = load i64 , i64 *%ptr2
+ %val3 = load i64 , i64 *%ptr3
+ %val4 = load i64 , i64 *%ptr4
+ %val5 = load i64 , i64 *%ptr5
+ %val6 = load i64 , i64 *%ptr6
+ %val7 = load i64 , i64 *%ptr7
+ %val8 = load i64 , i64 *%ptr8
+ %val9 = load i64 , i64 *%ptr9
%ret = call i64 @foo()
diff --git a/test/CodeGen/SystemZ/xor-05.ll b/test/CodeGen/SystemZ/xor-05.ll
index fbd5660ad058..7b79c7f544d0 100644
--- a/test/CodeGen/SystemZ/xor-05.ll
+++ b/test/CodeGen/SystemZ/xor-05.ll
@@ -7,7 +7,7 @@ define void @f1(i8 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: xi 0(%r2), 1
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%xor = xor i8 %val, -255
store i8 %xor, i8 *%ptr
ret void
@@ -18,7 +18,7 @@ define void @f2(i8 *%ptr) {
; CHECK-LABEL: f2:
; CHECK: xi 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%xor = xor i8 %val, -2
store i8 %xor, i8 *%ptr
ret void
@@ -29,7 +29,7 @@ define void @f3(i8 *%ptr) {
; CHECK-LABEL: f3:
; CHECK: xi 0(%r2), 1
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%xor = xor i8 %val, 1
store i8 %xor, i8 *%ptr
ret void
@@ -40,7 +40,7 @@ define void @f4(i8 *%ptr) {
; CHECK-LABEL: f4:
; CHECK: xi 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%xor = xor i8 %val, 254
store i8 %xor, i8 *%ptr
ret void
@@ -51,8 +51,8 @@ define void @f5(i8 *%src) {
; CHECK-LABEL: f5:
; CHECK: xi 4095(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 4095
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 4095
+ %val = load i8 , i8 *%ptr
%xor = xor i8 %val, 127
store i8 %xor, i8 *%ptr
ret void
@@ -63,8 +63,8 @@ define void @f6(i8 *%src) {
; CHECK-LABEL: f6:
; CHECK: xiy 4096(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 4096
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 4096
+ %val = load i8 , i8 *%ptr
%xor = xor i8 %val, 127
store i8 %xor, i8 *%ptr
ret void
@@ -75,8 +75,8 @@ define void @f7(i8 *%src) {
; CHECK-LABEL: f7:
; CHECK: xiy 524287(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524287
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524287
+ %val = load i8 , i8 *%ptr
%xor = xor i8 %val, 127
store i8 %xor, i8 *%ptr
ret void
@@ -89,8 +89,8 @@ define void @f8(i8 *%src) {
; CHECK: agfi %r2, 524288
; CHECK: xi 0(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 524288
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 524288
+ %val = load i8 , i8 *%ptr
%xor = xor i8 %val, 127
store i8 %xor, i8 *%ptr
ret void
@@ -101,8 +101,8 @@ define void @f9(i8 *%src) {
; CHECK-LABEL: f9:
; CHECK: xiy -1(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -1
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -1
+ %val = load i8 , i8 *%ptr
%xor = xor i8 %val, 127
store i8 %xor, i8 *%ptr
ret void
@@ -113,8 +113,8 @@ define void @f10(i8 *%src) {
; CHECK-LABEL: f10:
; CHECK: xiy -524288(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524288
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524288
+ %val = load i8 , i8 *%ptr
%xor = xor i8 %val, 127
store i8 %xor, i8 *%ptr
ret void
@@ -127,8 +127,8 @@ define void @f11(i8 *%src) {
; CHECK: agfi %r2, -524289
; CHECK: xi 0(%r2), 127
; CHECK: br %r14
- %ptr = getelementptr i8 *%src, i64 -524289
- %val = load i8 *%ptr
+ %ptr = getelementptr i8, i8 *%src, i64 -524289
+ %val = load i8 , i8 *%ptr
%xor = xor i8 %val, 127
store i8 %xor, i8 *%ptr
ret void
@@ -143,7 +143,7 @@ define void @f12(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4095
%ptr = inttoptr i64 %add2 to i8 *
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%xor = xor i8 %val, 127
store i8 %xor, i8 *%ptr
ret void
@@ -158,7 +158,7 @@ define void @f13(i64 %src, i64 %index) {
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 4096
%ptr = inttoptr i64 %add2 to i8 *
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%xor = xor i8 %val, 127
store i8 %xor, i8 *%ptr
ret void
diff --git a/test/CodeGen/SystemZ/xor-06.ll b/test/CodeGen/SystemZ/xor-06.ll
index f39c0fec4e40..40db3cb21a3e 100644
--- a/test/CodeGen/SystemZ/xor-06.ll
+++ b/test/CodeGen/SystemZ/xor-06.ll
@@ -8,7 +8,7 @@ define void @f1(i8 *%ptr) {
; CHECK-LABEL: f1:
; CHECK: xi 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i32
%xor = xor i32 %ext, -2
%trunc = trunc i32 %xor to i8
@@ -21,7 +21,7 @@ define void @f2(i8 *%ptr) {
; CHECK-LABEL: f2:
; CHECK: xi 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i64
%xor = xor i64 %ext, -2
%trunc = trunc i64 %xor to i8
@@ -34,7 +34,7 @@ define void @f3(i8 *%ptr) {
; CHECK-LABEL: f3:
; CHECK: xi 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i32
%xor = xor i32 %ext, 254
%trunc = trunc i32 %xor to i8
@@ -47,7 +47,7 @@ define void @f4(i8 *%ptr) {
; CHECK-LABEL: f4:
; CHECK: xi 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = zext i8 %val to i64
%xor = xor i64 %ext, 254
%trunc = trunc i64 %xor to i8
@@ -60,7 +60,7 @@ define void @f5(i8 *%ptr) {
; CHECK-LABEL: f5:
; CHECK: xi 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%xor = xor i32 %ext, -2
%trunc = trunc i32 %xor to i8
@@ -73,7 +73,7 @@ define void @f6(i8 *%ptr) {
; CHECK-LABEL: f6:
; CHECK: xi 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%xor = xor i64 %ext, -2
%trunc = trunc i64 %xor to i8
@@ -86,7 +86,7 @@ define void @f7(i8 *%ptr) {
; CHECK-LABEL: f7:
; CHECK: xi 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i32
%xor = xor i32 %ext, 254
%trunc = trunc i32 %xor to i8
@@ -99,7 +99,7 @@ define void @f8(i8 *%ptr) {
; CHECK-LABEL: f8:
; CHECK: xi 0(%r2), 254
; CHECK: br %r14
- %val = load i8 *%ptr
+ %val = load i8 , i8 *%ptr
%ext = sext i8 %val to i64
%xor = xor i64 %ext, 254
%trunc = trunc i64 %xor to i8
diff --git a/test/CodeGen/SystemZ/xor-08.ll b/test/CodeGen/SystemZ/xor-08.ll
index 8cba41e742ce..9988a4cb45c2 100644
--- a/test/CodeGen/SystemZ/xor-08.ll
+++ b/test/CodeGen/SystemZ/xor-08.ll
@@ -7,9 +7,9 @@ define void @f1(i8 *%ptr1) {
; CHECK-LABEL: f1:
; CHECK: xc 1(1,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i8 *%ptr1, i64 1
- %val = load i8 *%ptr1
- %old = load i8 *%ptr2
+ %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
+ %val = load i8 , i8 *%ptr1
+ %old = load i8 , i8 *%ptr2
%xor = xor i8 %val, %old
store i8 %xor, i8 *%ptr2
ret void
@@ -20,9 +20,9 @@ define void @f2(i16 *%ptr1) {
; CHECK-LABEL: f2:
; CHECK: xc 2(2,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i16 *%ptr1, i64 1
- %val = load i16 *%ptr1
- %old = load i16 *%ptr2
+ %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
+ %val = load i16 , i16 *%ptr1
+ %old = load i16 , i16 *%ptr2
%xor = xor i16 %val, %old
store i16 %xor, i16 *%ptr2
ret void
@@ -33,9 +33,9 @@ define void @f3(i32 *%ptr1) {
; CHECK-LABEL: f3:
; CHECK: xc 4(4,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i32 *%ptr1, i64 1
- %val = load i32 *%ptr1
- %old = load i32 *%ptr2
+ %ptr2 = getelementptr i32, i32 *%ptr1, i64 1
+ %val = load i32 , i32 *%ptr1
+ %old = load i32 , i32 *%ptr2
%xor = xor i32 %old, %val
store i32 %xor, i32 *%ptr2
ret void
@@ -46,9 +46,9 @@ define void @f4(i64 *%ptr1) {
; CHECK-LABEL: f4:
; CHECK: xc 8(8,%r2), 0(%r2)
; CHECK: br %r14
- %ptr2 = getelementptr i64 *%ptr1, i64 1
- %val = load i64 *%ptr1
- %old = load i64 *%ptr2
+ %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
+ %val = load i64 , i64 *%ptr1
+ %old = load i64 , i64 *%ptr2
%xor = xor i64 %old, %val
store i64 %xor, i64 *%ptr2
ret void
diff --git a/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll b/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
index 1e61b235a2bb..2d2ac9c566a9 100644
--- a/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
+++ b/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
@@ -6,10 +6,10 @@
define void @f1() {
%D = alloca %struct.rtx_def, align 1
%tmp1 = bitcast %struct.rtx_def* %D to i32*
- %tmp7 = load i32* %tmp1
+ %tmp7 = load i32, i32* %tmp1
%tmp14 = lshr i32 %tmp7, 1
%tmp1415 = and i32 %tmp14, 1
- call void (i32, ...)* @printf( i32 undef, i32 0, i32 %tmp1415 )
+ call void (i32, ...) @printf( i32 undef, i32 0, i32 %tmp1415 )
ret void
}
diff --git a/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll b/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
index 929c472d1ef6..079ab879afbf 100644
--- a/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
+++ b/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
@@ -9,7 +9,7 @@ target triple = "thumb-unknown-linux-gnueabi"
define i8* @f(i8* %a) {
entry:
- %tmp1 = load i32* @i.1882 ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* @i.1882 ; <i32> [#uses=1]
%tmp2 = add i32 %tmp1, 1 ; <i32> [#uses=2]
store i32 %tmp2, i32* @i.1882
%tmp34 = inttoptr i32 %tmp2 to i8* ; <i8*> [#uses=1]
@@ -21,16 +21,16 @@ entry:
%t = alloca i32, align 4 ; <i32*> [#uses=4]
%ret = alloca i32, align 4 ; <i32*> [#uses=3]
%tmp1 = call i32 @pthread_create( i32* %t, %struct.pthread_attr_t* null, i8* (i8*)* @f, i8* null ) ; <i32> [#uses=0]
- %tmp2 = load i32* %t ; <i32> [#uses=1]
+ %tmp2 = load i32, i32* %t ; <i32> [#uses=1]
%ret3 = bitcast i32* %ret to i8** ; <i8**> [#uses=2]
%tmp4 = call i32 @pthread_join( i32 %tmp2, i8** %ret3 ) ; <i32> [#uses=0]
- %tmp5 = load i32* %ret ; <i32> [#uses=1]
- %tmp7 = call i32 (i8*, ...)* @printf( i8* getelementptr ([14 x i8]* @.str, i32 0, i32 0), i32 %tmp5 ) ; <i32> [#uses=0]
+ %tmp5 = load i32, i32* %ret ; <i32> [#uses=1]
+ %tmp7 = call i32 (i8*, ...) @printf( i8* getelementptr ([14 x i8], [14 x i8]* @.str, i32 0, i32 0), i32 %tmp5 ) ; <i32> [#uses=0]
%tmp8 = call i32 @pthread_create( i32* %t, %struct.pthread_attr_t* null, i8* (i8*)* @f, i8* null ) ; <i32> [#uses=0]
- %tmp9 = load i32* %t ; <i32> [#uses=1]
+ %tmp9 = load i32, i32* %t ; <i32> [#uses=1]
%tmp11 = call i32 @pthread_join( i32 %tmp9, i8** %ret3 ) ; <i32> [#uses=0]
- %tmp12 = load i32* %ret ; <i32> [#uses=1]
- %tmp14 = call i32 (i8*, ...)* @printf( i8* getelementptr ([14 x i8]* @.str1, i32 0, i32 0), i32 %tmp12 ) ; <i32> [#uses=0]
+ %tmp12 = load i32, i32* %ret ; <i32> [#uses=1]
+ %tmp14 = call i32 (i8*, ...) @printf( i8* getelementptr ([14 x i8], [14 x i8]* @.str1, i32 0, i32 0), i32 %tmp12 ) ; <i32> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll b/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll
index d4651a1f3fa7..fd300322f23f 100644
--- a/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll
+++ b/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll
@@ -4,7 +4,7 @@
define i64 @millisecs() nounwind {
entry:
- %0 = load i64* @Time.2535, align 4 ; <i64> [#uses=2]
+ %0 = load i64, i64* @Time.2535, align 4 ; <i64> [#uses=2]
%1 = add i64 %0, 1 ; <i64> [#uses=1]
store i64 %1, i64* @Time.2535, align 4
ret i64 %0
diff --git a/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll b/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll
index 5b420fc74503..7036dd19b4bc 100644
--- a/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll
+++ b/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll
@@ -4,697 +4,697 @@
define void @BF_encrypt(i32* nocapture %data, %struct.BF_KEY* nocapture %key, i32 %encrypt) nounwind {
entry:
- %0 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 0; <i32*> [#uses=2]
- %1 = load i32* %data, align 4 ; <i32> [#uses=2]
- %2 = load i32* undef, align 4 ; <i32> [#uses=2]
+ %0 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 0; <i32*> [#uses=2]
+ %1 = load i32, i32* %data, align 4 ; <i32> [#uses=2]
+ %2 = load i32, i32* undef, align 4 ; <i32> [#uses=2]
br i1 undef, label %bb1, label %bb
bb: ; preds = %entry
- %3 = load i32* %0, align 4 ; <i32> [#uses=1]
+ %3 = load i32, i32* %0, align 4 ; <i32> [#uses=1]
%4 = xor i32 %3, %1 ; <i32> [#uses=4]
- %5 = load i32* null, align 4 ; <i32> [#uses=1]
+ %5 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%6 = lshr i32 %4, 24 ; <i32> [#uses=1]
- %7 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %6; <i32*> [#uses=1]
- %8 = load i32* %7, align 4 ; <i32> [#uses=1]
+ %7 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %6; <i32*> [#uses=1]
+ %8 = load i32, i32* %7, align 4 ; <i32> [#uses=1]
%9 = lshr i32 %4, 16 ; <i32> [#uses=1]
%10 = or i32 %9, 256 ; <i32> [#uses=1]
%11 = and i32 %10, 511 ; <i32> [#uses=1]
- %12 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %11; <i32*> [#uses=1]
- %13 = load i32* %12, align 4 ; <i32> [#uses=1]
+ %12 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %11; <i32*> [#uses=1]
+ %13 = load i32, i32* %12, align 4 ; <i32> [#uses=1]
%14 = add i32 %13, %8 ; <i32> [#uses=1]
- %15 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 undef; <i32*> [#uses=1]
- %16 = load i32* %15, align 4 ; <i32> [#uses=1]
+ %15 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 undef; <i32*> [#uses=1]
+ %16 = load i32, i32* %15, align 4 ; <i32> [#uses=1]
%17 = xor i32 %14, %16 ; <i32> [#uses=1]
%18 = or i32 %4, 768 ; <i32> [#uses=1]
%19 = and i32 %18, 1023 ; <i32> [#uses=1]
- %20 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %19; <i32*> [#uses=1]
- %21 = load i32* %20, align 4 ; <i32> [#uses=1]
+ %20 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %19; <i32*> [#uses=1]
+ %21 = load i32, i32* %20, align 4 ; <i32> [#uses=1]
%22 = add i32 %17, %21 ; <i32> [#uses=1]
%23 = xor i32 %5, %2 ; <i32> [#uses=1]
%24 = xor i32 %23, %22 ; <i32> [#uses=5]
- %25 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 2; <i32*> [#uses=1]
- %26 = load i32* %25, align 4 ; <i32> [#uses=1]
+ %25 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 2; <i32*> [#uses=1]
+ %26 = load i32, i32* %25, align 4 ; <i32> [#uses=1]
%27 = lshr i32 %24, 24 ; <i32> [#uses=1]
- %28 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %27; <i32*> [#uses=1]
- %29 = load i32* %28, align 4 ; <i32> [#uses=1]
+ %28 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %27; <i32*> [#uses=1]
+ %29 = load i32, i32* %28, align 4 ; <i32> [#uses=1]
%30 = lshr i32 %24, 16 ; <i32> [#uses=1]
%31 = or i32 %30, 256 ; <i32> [#uses=1]
%32 = and i32 %31, 511 ; <i32> [#uses=1]
- %33 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %32; <i32*> [#uses=1]
- %34 = load i32* %33, align 4 ; <i32> [#uses=1]
+ %33 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %32; <i32*> [#uses=1]
+ %34 = load i32, i32* %33, align 4 ; <i32> [#uses=1]
%35 = add i32 %34, %29 ; <i32> [#uses=1]
%36 = lshr i32 %24, 8 ; <i32> [#uses=1]
%37 = or i32 %36, 512 ; <i32> [#uses=1]
%38 = and i32 %37, 767 ; <i32> [#uses=1]
- %39 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %38; <i32*> [#uses=1]
- %40 = load i32* %39, align 4 ; <i32> [#uses=1]
+ %39 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %38; <i32*> [#uses=1]
+ %40 = load i32, i32* %39, align 4 ; <i32> [#uses=1]
%41 = xor i32 %35, %40 ; <i32> [#uses=1]
%42 = or i32 %24, 768 ; <i32> [#uses=1]
%43 = and i32 %42, 1023 ; <i32> [#uses=1]
- %44 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %43; <i32*> [#uses=1]
- %45 = load i32* %44, align 4 ; <i32> [#uses=1]
+ %44 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %43; <i32*> [#uses=1]
+ %45 = load i32, i32* %44, align 4 ; <i32> [#uses=1]
%46 = add i32 %41, %45 ; <i32> [#uses=1]
%47 = xor i32 %26, %4 ; <i32> [#uses=1]
%48 = xor i32 %47, %46 ; <i32> [#uses=5]
- %49 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 3; <i32*> [#uses=1]
- %50 = load i32* %49, align 4 ; <i32> [#uses=1]
+ %49 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 3; <i32*> [#uses=1]
+ %50 = load i32, i32* %49, align 4 ; <i32> [#uses=1]
%51 = lshr i32 %48, 24 ; <i32> [#uses=1]
- %52 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %51; <i32*> [#uses=1]
- %53 = load i32* %52, align 4 ; <i32> [#uses=1]
+ %52 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %51; <i32*> [#uses=1]
+ %53 = load i32, i32* %52, align 4 ; <i32> [#uses=1]
%54 = lshr i32 %48, 16 ; <i32> [#uses=1]
%55 = or i32 %54, 256 ; <i32> [#uses=1]
%56 = and i32 %55, 511 ; <i32> [#uses=1]
- %57 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %56; <i32*> [#uses=1]
- %58 = load i32* %57, align 4 ; <i32> [#uses=1]
+ %57 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %56; <i32*> [#uses=1]
+ %58 = load i32, i32* %57, align 4 ; <i32> [#uses=1]
%59 = add i32 %58, %53 ; <i32> [#uses=1]
%60 = lshr i32 %48, 8 ; <i32> [#uses=1]
%61 = or i32 %60, 512 ; <i32> [#uses=1]
%62 = and i32 %61, 767 ; <i32> [#uses=1]
- %63 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %62; <i32*> [#uses=1]
- %64 = load i32* %63, align 4 ; <i32> [#uses=1]
+ %63 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %62; <i32*> [#uses=1]
+ %64 = load i32, i32* %63, align 4 ; <i32> [#uses=1]
%65 = xor i32 %59, %64 ; <i32> [#uses=1]
%66 = or i32 %48, 768 ; <i32> [#uses=1]
%67 = and i32 %66, 1023 ; <i32> [#uses=1]
- %68 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %67; <i32*> [#uses=1]
- %69 = load i32* %68, align 4 ; <i32> [#uses=1]
+ %68 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %67; <i32*> [#uses=1]
+ %69 = load i32, i32* %68, align 4 ; <i32> [#uses=1]
%70 = add i32 %65, %69 ; <i32> [#uses=1]
%71 = xor i32 %50, %24 ; <i32> [#uses=1]
%72 = xor i32 %71, %70 ; <i32> [#uses=5]
- %73 = load i32* null, align 4 ; <i32> [#uses=1]
+ %73 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%74 = lshr i32 %72, 24 ; <i32> [#uses=1]
- %75 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %74; <i32*> [#uses=1]
- %76 = load i32* %75, align 4 ; <i32> [#uses=1]
+ %75 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %74; <i32*> [#uses=1]
+ %76 = load i32, i32* %75, align 4 ; <i32> [#uses=1]
%77 = lshr i32 %72, 16 ; <i32> [#uses=1]
%78 = or i32 %77, 256 ; <i32> [#uses=1]
%79 = and i32 %78, 511 ; <i32> [#uses=1]
- %80 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %79; <i32*> [#uses=1]
- %81 = load i32* %80, align 4 ; <i32> [#uses=1]
+ %80 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %79; <i32*> [#uses=1]
+ %81 = load i32, i32* %80, align 4 ; <i32> [#uses=1]
%82 = add i32 %81, %76 ; <i32> [#uses=1]
%83 = lshr i32 %72, 8 ; <i32> [#uses=1]
%84 = or i32 %83, 512 ; <i32> [#uses=1]
%85 = and i32 %84, 767 ; <i32> [#uses=1]
- %86 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %85; <i32*> [#uses=1]
- %87 = load i32* %86, align 4 ; <i32> [#uses=1]
+ %86 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %85; <i32*> [#uses=1]
+ %87 = load i32, i32* %86, align 4 ; <i32> [#uses=1]
%88 = xor i32 %82, %87 ; <i32> [#uses=1]
%89 = or i32 %72, 768 ; <i32> [#uses=1]
%90 = and i32 %89, 1023 ; <i32> [#uses=1]
- %91 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %90; <i32*> [#uses=1]
- %92 = load i32* %91, align 4 ; <i32> [#uses=1]
+ %91 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %90; <i32*> [#uses=1]
+ %92 = load i32, i32* %91, align 4 ; <i32> [#uses=1]
%93 = add i32 %88, %92 ; <i32> [#uses=1]
%94 = xor i32 %73, %48 ; <i32> [#uses=1]
%95 = xor i32 %94, %93 ; <i32> [#uses=5]
- %96 = load i32* undef, align 4 ; <i32> [#uses=1]
+ %96 = load i32, i32* undef, align 4 ; <i32> [#uses=1]
%97 = lshr i32 %95, 24 ; <i32> [#uses=1]
- %98 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %97; <i32*> [#uses=1]
- %99 = load i32* %98, align 4 ; <i32> [#uses=1]
+ %98 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %97; <i32*> [#uses=1]
+ %99 = load i32, i32* %98, align 4 ; <i32> [#uses=1]
%100 = lshr i32 %95, 16 ; <i32> [#uses=1]
%101 = or i32 %100, 256 ; <i32> [#uses=1]
%102 = and i32 %101, 511 ; <i32> [#uses=1]
- %103 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %102; <i32*> [#uses=1]
- %104 = load i32* %103, align 4 ; <i32> [#uses=1]
+ %103 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %102; <i32*> [#uses=1]
+ %104 = load i32, i32* %103, align 4 ; <i32> [#uses=1]
%105 = add i32 %104, %99 ; <i32> [#uses=1]
%106 = lshr i32 %95, 8 ; <i32> [#uses=1]
%107 = or i32 %106, 512 ; <i32> [#uses=1]
%108 = and i32 %107, 767 ; <i32> [#uses=1]
- %109 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %108; <i32*> [#uses=1]
- %110 = load i32* %109, align 4 ; <i32> [#uses=1]
+ %109 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %108; <i32*> [#uses=1]
+ %110 = load i32, i32* %109, align 4 ; <i32> [#uses=1]
%111 = xor i32 %105, %110 ; <i32> [#uses=1]
%112 = or i32 %95, 768 ; <i32> [#uses=1]
%113 = and i32 %112, 1023 ; <i32> [#uses=1]
- %114 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %113; <i32*> [#uses=1]
- %115 = load i32* %114, align 4 ; <i32> [#uses=1]
+ %114 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %113; <i32*> [#uses=1]
+ %115 = load i32, i32* %114, align 4 ; <i32> [#uses=1]
%116 = add i32 %111, %115 ; <i32> [#uses=1]
%117 = xor i32 %96, %72 ; <i32> [#uses=1]
%118 = xor i32 %117, %116 ; <i32> [#uses=5]
- %119 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 6; <i32*> [#uses=1]
- %120 = load i32* %119, align 4 ; <i32> [#uses=1]
+ %119 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 6; <i32*> [#uses=1]
+ %120 = load i32, i32* %119, align 4 ; <i32> [#uses=1]
%121 = lshr i32 %118, 24 ; <i32> [#uses=1]
- %122 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %121; <i32*> [#uses=1]
- %123 = load i32* %122, align 4 ; <i32> [#uses=1]
+ %122 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %121; <i32*> [#uses=1]
+ %123 = load i32, i32* %122, align 4 ; <i32> [#uses=1]
%124 = lshr i32 %118, 16 ; <i32> [#uses=1]
%125 = or i32 %124, 256 ; <i32> [#uses=1]
%126 = and i32 %125, 511 ; <i32> [#uses=1]
- %127 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %126; <i32*> [#uses=1]
- %128 = load i32* %127, align 4 ; <i32> [#uses=1]
+ %127 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %126; <i32*> [#uses=1]
+ %128 = load i32, i32* %127, align 4 ; <i32> [#uses=1]
%129 = add i32 %128, %123 ; <i32> [#uses=1]
%130 = lshr i32 %118, 8 ; <i32> [#uses=1]
%131 = or i32 %130, 512 ; <i32> [#uses=1]
%132 = and i32 %131, 767 ; <i32> [#uses=1]
- %133 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %132; <i32*> [#uses=1]
- %134 = load i32* %133, align 4 ; <i32> [#uses=1]
+ %133 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %132; <i32*> [#uses=1]
+ %134 = load i32, i32* %133, align 4 ; <i32> [#uses=1]
%135 = xor i32 %129, %134 ; <i32> [#uses=1]
%136 = or i32 %118, 768 ; <i32> [#uses=1]
%137 = and i32 %136, 1023 ; <i32> [#uses=1]
- %138 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %137; <i32*> [#uses=1]
- %139 = load i32* %138, align 4 ; <i32> [#uses=1]
+ %138 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %137; <i32*> [#uses=1]
+ %139 = load i32, i32* %138, align 4 ; <i32> [#uses=1]
%140 = add i32 %135, %139 ; <i32> [#uses=1]
%141 = xor i32 %120, %95 ; <i32> [#uses=1]
%142 = xor i32 %141, %140 ; <i32> [#uses=5]
- %143 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 7; <i32*> [#uses=1]
- %144 = load i32* %143, align 4 ; <i32> [#uses=1]
+ %143 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 7; <i32*> [#uses=1]
+ %144 = load i32, i32* %143, align 4 ; <i32> [#uses=1]
%145 = lshr i32 %142, 24 ; <i32> [#uses=1]
- %146 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %145; <i32*> [#uses=1]
- %147 = load i32* %146, align 4 ; <i32> [#uses=1]
+ %146 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %145; <i32*> [#uses=1]
+ %147 = load i32, i32* %146, align 4 ; <i32> [#uses=1]
%148 = lshr i32 %142, 16 ; <i32> [#uses=1]
%149 = or i32 %148, 256 ; <i32> [#uses=1]
%150 = and i32 %149, 511 ; <i32> [#uses=1]
- %151 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %150; <i32*> [#uses=1]
- %152 = load i32* %151, align 4 ; <i32> [#uses=1]
+ %151 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %150; <i32*> [#uses=1]
+ %152 = load i32, i32* %151, align 4 ; <i32> [#uses=1]
%153 = add i32 %152, %147 ; <i32> [#uses=1]
%154 = lshr i32 %142, 8 ; <i32> [#uses=1]
%155 = or i32 %154, 512 ; <i32> [#uses=1]
%156 = and i32 %155, 767 ; <i32> [#uses=1]
- %157 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %156; <i32*> [#uses=1]
- %158 = load i32* %157, align 4 ; <i32> [#uses=1]
+ %157 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %156; <i32*> [#uses=1]
+ %158 = load i32, i32* %157, align 4 ; <i32> [#uses=1]
%159 = xor i32 %153, %158 ; <i32> [#uses=1]
%160 = or i32 %142, 768 ; <i32> [#uses=1]
%161 = and i32 %160, 1023 ; <i32> [#uses=1]
- %162 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %161; <i32*> [#uses=1]
- %163 = load i32* %162, align 4 ; <i32> [#uses=1]
+ %162 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %161; <i32*> [#uses=1]
+ %163 = load i32, i32* %162, align 4 ; <i32> [#uses=1]
%164 = add i32 %159, %163 ; <i32> [#uses=1]
%165 = xor i32 %144, %118 ; <i32> [#uses=1]
%166 = xor i32 %165, %164 ; <i32> [#uses=5]
- %167 = load i32* undef, align 4 ; <i32> [#uses=1]
+ %167 = load i32, i32* undef, align 4 ; <i32> [#uses=1]
%168 = lshr i32 %166, 24 ; <i32> [#uses=1]
- %169 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %168; <i32*> [#uses=1]
- %170 = load i32* %169, align 4 ; <i32> [#uses=1]
+ %169 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %168; <i32*> [#uses=1]
+ %170 = load i32, i32* %169, align 4 ; <i32> [#uses=1]
%171 = lshr i32 %166, 16 ; <i32> [#uses=1]
%172 = or i32 %171, 256 ; <i32> [#uses=1]
%173 = and i32 %172, 511 ; <i32> [#uses=1]
- %174 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %173; <i32*> [#uses=1]
- %175 = load i32* %174, align 4 ; <i32> [#uses=1]
+ %174 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %173; <i32*> [#uses=1]
+ %175 = load i32, i32* %174, align 4 ; <i32> [#uses=1]
%176 = add i32 %175, %170 ; <i32> [#uses=1]
%177 = lshr i32 %166, 8 ; <i32> [#uses=1]
%178 = or i32 %177, 512 ; <i32> [#uses=1]
%179 = and i32 %178, 767 ; <i32> [#uses=1]
- %180 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %179; <i32*> [#uses=1]
- %181 = load i32* %180, align 4 ; <i32> [#uses=1]
+ %180 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %179; <i32*> [#uses=1]
+ %181 = load i32, i32* %180, align 4 ; <i32> [#uses=1]
%182 = xor i32 %176, %181 ; <i32> [#uses=1]
%183 = or i32 %166, 768 ; <i32> [#uses=1]
%184 = and i32 %183, 1023 ; <i32> [#uses=1]
- %185 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %184; <i32*> [#uses=1]
- %186 = load i32* %185, align 4 ; <i32> [#uses=1]
+ %185 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %184; <i32*> [#uses=1]
+ %186 = load i32, i32* %185, align 4 ; <i32> [#uses=1]
%187 = add i32 %182, %186 ; <i32> [#uses=1]
%188 = xor i32 %167, %142 ; <i32> [#uses=1]
%189 = xor i32 %188, %187 ; <i32> [#uses=5]
- %190 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 9; <i32*> [#uses=1]
- %191 = load i32* %190, align 4 ; <i32> [#uses=1]
+ %190 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 9; <i32*> [#uses=1]
+ %191 = load i32, i32* %190, align 4 ; <i32> [#uses=1]
%192 = lshr i32 %189, 24 ; <i32> [#uses=1]
- %193 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %192; <i32*> [#uses=1]
- %194 = load i32* %193, align 4 ; <i32> [#uses=1]
+ %193 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %192; <i32*> [#uses=1]
+ %194 = load i32, i32* %193, align 4 ; <i32> [#uses=1]
%195 = lshr i32 %189, 16 ; <i32> [#uses=1]
%196 = or i32 %195, 256 ; <i32> [#uses=1]
%197 = and i32 %196, 511 ; <i32> [#uses=1]
- %198 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %197; <i32*> [#uses=1]
- %199 = load i32* %198, align 4 ; <i32> [#uses=1]
+ %198 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %197; <i32*> [#uses=1]
+ %199 = load i32, i32* %198, align 4 ; <i32> [#uses=1]
%200 = add i32 %199, %194 ; <i32> [#uses=1]
%201 = lshr i32 %189, 8 ; <i32> [#uses=1]
%202 = or i32 %201, 512 ; <i32> [#uses=1]
%203 = and i32 %202, 767 ; <i32> [#uses=1]
- %204 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %203; <i32*> [#uses=1]
- %205 = load i32* %204, align 4 ; <i32> [#uses=1]
+ %204 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %203; <i32*> [#uses=1]
+ %205 = load i32, i32* %204, align 4 ; <i32> [#uses=1]
%206 = xor i32 %200, %205 ; <i32> [#uses=1]
%207 = or i32 %189, 768 ; <i32> [#uses=1]
%208 = and i32 %207, 1023 ; <i32> [#uses=1]
- %209 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %208; <i32*> [#uses=1]
- %210 = load i32* %209, align 4 ; <i32> [#uses=1]
+ %209 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %208; <i32*> [#uses=1]
+ %210 = load i32, i32* %209, align 4 ; <i32> [#uses=1]
%211 = add i32 %206, %210 ; <i32> [#uses=1]
%212 = xor i32 %191, %166 ; <i32> [#uses=1]
%213 = xor i32 %212, %211 ; <i32> [#uses=5]
- %214 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 10; <i32*> [#uses=1]
- %215 = load i32* %214, align 4 ; <i32> [#uses=1]
+ %214 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 10; <i32*> [#uses=1]
+ %215 = load i32, i32* %214, align 4 ; <i32> [#uses=1]
%216 = lshr i32 %213, 24 ; <i32> [#uses=1]
- %217 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %216; <i32*> [#uses=1]
- %218 = load i32* %217, align 4 ; <i32> [#uses=1]
+ %217 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %216; <i32*> [#uses=1]
+ %218 = load i32, i32* %217, align 4 ; <i32> [#uses=1]
%219 = lshr i32 %213, 16 ; <i32> [#uses=1]
%220 = or i32 %219, 256 ; <i32> [#uses=1]
%221 = and i32 %220, 511 ; <i32> [#uses=1]
- %222 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %221; <i32*> [#uses=1]
- %223 = load i32* %222, align 4 ; <i32> [#uses=1]
+ %222 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %221; <i32*> [#uses=1]
+ %223 = load i32, i32* %222, align 4 ; <i32> [#uses=1]
%224 = add i32 %223, %218 ; <i32> [#uses=1]
%225 = lshr i32 %213, 8 ; <i32> [#uses=1]
%226 = or i32 %225, 512 ; <i32> [#uses=1]
%227 = and i32 %226, 767 ; <i32> [#uses=1]
- %228 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %227; <i32*> [#uses=1]
- %229 = load i32* %228, align 4 ; <i32> [#uses=1]
+ %228 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %227; <i32*> [#uses=1]
+ %229 = load i32, i32* %228, align 4 ; <i32> [#uses=1]
%230 = xor i32 %224, %229 ; <i32> [#uses=1]
%231 = or i32 %213, 768 ; <i32> [#uses=1]
%232 = and i32 %231, 1023 ; <i32> [#uses=1]
- %233 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %232; <i32*> [#uses=1]
- %234 = load i32* %233, align 4 ; <i32> [#uses=1]
+ %233 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %232; <i32*> [#uses=1]
+ %234 = load i32, i32* %233, align 4 ; <i32> [#uses=1]
%235 = add i32 %230, %234 ; <i32> [#uses=1]
%236 = xor i32 %215, %189 ; <i32> [#uses=1]
%237 = xor i32 %236, %235 ; <i32> [#uses=5]
- %238 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 11; <i32*> [#uses=1]
- %239 = load i32* %238, align 4 ; <i32> [#uses=1]
+ %238 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 11; <i32*> [#uses=1]
+ %239 = load i32, i32* %238, align 4 ; <i32> [#uses=1]
%240 = lshr i32 %237, 24 ; <i32> [#uses=1]
- %241 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %240; <i32*> [#uses=1]
- %242 = load i32* %241, align 4 ; <i32> [#uses=1]
+ %241 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %240; <i32*> [#uses=1]
+ %242 = load i32, i32* %241, align 4 ; <i32> [#uses=1]
%243 = lshr i32 %237, 16 ; <i32> [#uses=1]
%244 = or i32 %243, 256 ; <i32> [#uses=1]
%245 = and i32 %244, 511 ; <i32> [#uses=1]
- %246 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %245; <i32*> [#uses=1]
- %247 = load i32* %246, align 4 ; <i32> [#uses=1]
+ %246 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %245; <i32*> [#uses=1]
+ %247 = load i32, i32* %246, align 4 ; <i32> [#uses=1]
%248 = add i32 %247, %242 ; <i32> [#uses=1]
%249 = lshr i32 %237, 8 ; <i32> [#uses=1]
%250 = or i32 %249, 512 ; <i32> [#uses=1]
%251 = and i32 %250, 767 ; <i32> [#uses=1]
- %252 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %251; <i32*> [#uses=1]
- %253 = load i32* %252, align 4 ; <i32> [#uses=1]
+ %252 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %251; <i32*> [#uses=1]
+ %253 = load i32, i32* %252, align 4 ; <i32> [#uses=1]
%254 = xor i32 %248, %253 ; <i32> [#uses=1]
%255 = or i32 %237, 768 ; <i32> [#uses=1]
%256 = and i32 %255, 1023 ; <i32> [#uses=1]
- %257 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %256; <i32*> [#uses=1]
- %258 = load i32* %257, align 4 ; <i32> [#uses=1]
+ %257 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %256; <i32*> [#uses=1]
+ %258 = load i32, i32* %257, align 4 ; <i32> [#uses=1]
%259 = add i32 %254, %258 ; <i32> [#uses=1]
%260 = xor i32 %239, %213 ; <i32> [#uses=1]
%261 = xor i32 %260, %259 ; <i32> [#uses=5]
- %262 = load i32* undef, align 4 ; <i32> [#uses=1]
+ %262 = load i32, i32* undef, align 4 ; <i32> [#uses=1]
%263 = lshr i32 %261, 24 ; <i32> [#uses=1]
- %264 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %263; <i32*> [#uses=1]
- %265 = load i32* %264, align 4 ; <i32> [#uses=1]
+ %264 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %263; <i32*> [#uses=1]
+ %265 = load i32, i32* %264, align 4 ; <i32> [#uses=1]
%266 = lshr i32 %261, 16 ; <i32> [#uses=1]
%267 = or i32 %266, 256 ; <i32> [#uses=1]
%268 = and i32 %267, 511 ; <i32> [#uses=1]
- %269 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %268; <i32*> [#uses=1]
- %270 = load i32* %269, align 4 ; <i32> [#uses=1]
+ %269 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %268; <i32*> [#uses=1]
+ %270 = load i32, i32* %269, align 4 ; <i32> [#uses=1]
%271 = add i32 %270, %265 ; <i32> [#uses=1]
%272 = lshr i32 %261, 8 ; <i32> [#uses=1]
%273 = or i32 %272, 512 ; <i32> [#uses=1]
%274 = and i32 %273, 767 ; <i32> [#uses=1]
- %275 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %274; <i32*> [#uses=1]
- %276 = load i32* %275, align 4 ; <i32> [#uses=1]
+ %275 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %274; <i32*> [#uses=1]
+ %276 = load i32, i32* %275, align 4 ; <i32> [#uses=1]
%277 = xor i32 %271, %276 ; <i32> [#uses=1]
%278 = or i32 %261, 768 ; <i32> [#uses=1]
%279 = and i32 %278, 1023 ; <i32> [#uses=1]
- %280 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %279; <i32*> [#uses=1]
- %281 = load i32* %280, align 4 ; <i32> [#uses=1]
+ %280 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %279; <i32*> [#uses=1]
+ %281 = load i32, i32* %280, align 4 ; <i32> [#uses=1]
%282 = add i32 %277, %281 ; <i32> [#uses=1]
%283 = xor i32 %262, %237 ; <i32> [#uses=1]
%284 = xor i32 %283, %282 ; <i32> [#uses=4]
- %285 = load i32* null, align 4 ; <i32> [#uses=1]
+ %285 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%286 = lshr i32 %284, 24 ; <i32> [#uses=1]
- %287 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %286; <i32*> [#uses=1]
- %288 = load i32* %287, align 4 ; <i32> [#uses=1]
+ %287 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %286; <i32*> [#uses=1]
+ %288 = load i32, i32* %287, align 4 ; <i32> [#uses=1]
%289 = lshr i32 %284, 16 ; <i32> [#uses=1]
%290 = or i32 %289, 256 ; <i32> [#uses=1]
%291 = and i32 %290, 511 ; <i32> [#uses=1]
- %292 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %291; <i32*> [#uses=1]
- %293 = load i32* %292, align 4 ; <i32> [#uses=1]
+ %292 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %291; <i32*> [#uses=1]
+ %293 = load i32, i32* %292, align 4 ; <i32> [#uses=1]
%294 = add i32 %293, %288 ; <i32> [#uses=1]
%295 = lshr i32 %284, 8 ; <i32> [#uses=1]
%296 = or i32 %295, 512 ; <i32> [#uses=1]
%297 = and i32 %296, 767 ; <i32> [#uses=1]
- %298 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %297; <i32*> [#uses=1]
- %299 = load i32* %298, align 4 ; <i32> [#uses=1]
+ %298 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %297; <i32*> [#uses=1]
+ %299 = load i32, i32* %298, align 4 ; <i32> [#uses=1]
%300 = xor i32 %294, %299 ; <i32> [#uses=1]
%301 = or i32 %284, 768 ; <i32> [#uses=1]
%302 = and i32 %301, 1023 ; <i32> [#uses=1]
- %303 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %302; <i32*> [#uses=1]
- %304 = load i32* %303, align 4 ; <i32> [#uses=1]
+ %303 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %302; <i32*> [#uses=1]
+ %304 = load i32, i32* %303, align 4 ; <i32> [#uses=1]
%305 = add i32 %300, %304 ; <i32> [#uses=1]
%306 = xor i32 %285, %261 ; <i32> [#uses=1]
%307 = xor i32 %306, %305 ; <i32> [#uses=1]
- %308 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 15; <i32*> [#uses=1]
- %309 = load i32* %308, align 4 ; <i32> [#uses=1]
- %310 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 0; <i32*> [#uses=1]
- %311 = load i32* %310, align 4 ; <i32> [#uses=1]
+ %308 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 15; <i32*> [#uses=1]
+ %309 = load i32, i32* %308, align 4 ; <i32> [#uses=1]
+ %310 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 0; <i32*> [#uses=1]
+ %311 = load i32, i32* %310, align 4 ; <i32> [#uses=1]
%312 = or i32 0, 256 ; <i32> [#uses=1]
%313 = and i32 %312, 511 ; <i32> [#uses=1]
- %314 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %313; <i32*> [#uses=1]
- %315 = load i32* %314, align 4 ; <i32> [#uses=1]
+ %314 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %313; <i32*> [#uses=1]
+ %315 = load i32, i32* %314, align 4 ; <i32> [#uses=1]
%316 = add i32 %315, %311 ; <i32> [#uses=1]
%317 = or i32 0, 512 ; <i32> [#uses=1]
%318 = and i32 %317, 767 ; <i32> [#uses=1]
- %319 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %318; <i32*> [#uses=1]
- %320 = load i32* %319, align 4 ; <i32> [#uses=1]
+ %319 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %318; <i32*> [#uses=1]
+ %320 = load i32, i32* %319, align 4 ; <i32> [#uses=1]
%321 = xor i32 %316, %320 ; <i32> [#uses=1]
%322 = or i32 0, 768 ; <i32> [#uses=1]
%323 = and i32 %322, 1023 ; <i32> [#uses=1]
- %324 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %323; <i32*> [#uses=1]
- %325 = load i32* %324, align 4 ; <i32> [#uses=1]
+ %324 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %323; <i32*> [#uses=1]
+ %325 = load i32, i32* %324, align 4 ; <i32> [#uses=1]
%326 = add i32 %321, %325 ; <i32> [#uses=1]
%327 = xor i32 %309, %307 ; <i32> [#uses=1]
%328 = xor i32 %327, %326 ; <i32> [#uses=5]
- %329 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 17; <i32*> [#uses=1]
+ %329 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 17; <i32*> [#uses=1]
br label %bb2
bb1: ; preds = %entry
- %330 = load i32* null, align 4 ; <i32> [#uses=1]
+ %330 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%331 = xor i32 %330, %1 ; <i32> [#uses=4]
- %332 = load i32* null, align 4 ; <i32> [#uses=1]
+ %332 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%333 = lshr i32 %331, 24 ; <i32> [#uses=1]
- %334 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %333; <i32*> [#uses=1]
- %335 = load i32* %334, align 4 ; <i32> [#uses=1]
- %336 = load i32* null, align 4 ; <i32> [#uses=1]
+ %334 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %333; <i32*> [#uses=1]
+ %335 = load i32, i32* %334, align 4 ; <i32> [#uses=1]
+ %336 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%337 = add i32 %336, %335 ; <i32> [#uses=1]
%338 = lshr i32 %331, 8 ; <i32> [#uses=1]
%339 = or i32 %338, 512 ; <i32> [#uses=1]
%340 = and i32 %339, 767 ; <i32> [#uses=1]
- %341 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %340; <i32*> [#uses=1]
- %342 = load i32* %341, align 4 ; <i32> [#uses=1]
+ %341 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %340; <i32*> [#uses=1]
+ %342 = load i32, i32* %341, align 4 ; <i32> [#uses=1]
%343 = xor i32 %337, %342 ; <i32> [#uses=1]
%344 = or i32 %331, 768 ; <i32> [#uses=1]
%345 = and i32 %344, 1023 ; <i32> [#uses=1]
- %346 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %345; <i32*> [#uses=1]
- %347 = load i32* %346, align 4 ; <i32> [#uses=1]
+ %346 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %345; <i32*> [#uses=1]
+ %347 = load i32, i32* %346, align 4 ; <i32> [#uses=1]
%348 = add i32 %343, %347 ; <i32> [#uses=1]
%349 = xor i32 %332, %2 ; <i32> [#uses=1]
%350 = xor i32 %349, %348 ; <i32> [#uses=5]
- %351 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 15; <i32*> [#uses=1]
- %352 = load i32* %351, align 4 ; <i32> [#uses=1]
+ %351 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 15; <i32*> [#uses=1]
+ %352 = load i32, i32* %351, align 4 ; <i32> [#uses=1]
%353 = lshr i32 %350, 24 ; <i32> [#uses=1]
- %354 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %353; <i32*> [#uses=1]
- %355 = load i32* %354, align 4 ; <i32> [#uses=1]
+ %354 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %353; <i32*> [#uses=1]
+ %355 = load i32, i32* %354, align 4 ; <i32> [#uses=1]
%356 = lshr i32 %350, 16 ; <i32> [#uses=1]
%357 = or i32 %356, 256 ; <i32> [#uses=1]
%358 = and i32 %357, 511 ; <i32> [#uses=1]
- %359 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %358; <i32*> [#uses=1]
- %360 = load i32* %359, align 4 ; <i32> [#uses=1]
+ %359 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %358; <i32*> [#uses=1]
+ %360 = load i32, i32* %359, align 4 ; <i32> [#uses=1]
%361 = add i32 %360, %355 ; <i32> [#uses=1]
%362 = lshr i32 %350, 8 ; <i32> [#uses=1]
%363 = or i32 %362, 512 ; <i32> [#uses=1]
%364 = and i32 %363, 767 ; <i32> [#uses=1]
- %365 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %364; <i32*> [#uses=1]
- %366 = load i32* %365, align 4 ; <i32> [#uses=1]
+ %365 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %364; <i32*> [#uses=1]
+ %366 = load i32, i32* %365, align 4 ; <i32> [#uses=1]
%367 = xor i32 %361, %366 ; <i32> [#uses=1]
%368 = or i32 %350, 768 ; <i32> [#uses=1]
%369 = and i32 %368, 1023 ; <i32> [#uses=1]
- %370 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %369; <i32*> [#uses=1]
- %371 = load i32* %370, align 4 ; <i32> [#uses=1]
+ %370 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %369; <i32*> [#uses=1]
+ %371 = load i32, i32* %370, align 4 ; <i32> [#uses=1]
%372 = add i32 %367, %371 ; <i32> [#uses=1]
%373 = xor i32 %352, %331 ; <i32> [#uses=1]
%374 = xor i32 %373, %372 ; <i32> [#uses=5]
- %375 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 14; <i32*> [#uses=1]
- %376 = load i32* %375, align 4 ; <i32> [#uses=1]
+ %375 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 14; <i32*> [#uses=1]
+ %376 = load i32, i32* %375, align 4 ; <i32> [#uses=1]
%377 = lshr i32 %374, 24 ; <i32> [#uses=1]
- %378 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %377; <i32*> [#uses=1]
- %379 = load i32* %378, align 4 ; <i32> [#uses=1]
+ %378 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %377; <i32*> [#uses=1]
+ %379 = load i32, i32* %378, align 4 ; <i32> [#uses=1]
%380 = lshr i32 %374, 16 ; <i32> [#uses=1]
%381 = or i32 %380, 256 ; <i32> [#uses=1]
%382 = and i32 %381, 511 ; <i32> [#uses=1]
- %383 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %382; <i32*> [#uses=1]
- %384 = load i32* %383, align 4 ; <i32> [#uses=1]
+ %383 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %382; <i32*> [#uses=1]
+ %384 = load i32, i32* %383, align 4 ; <i32> [#uses=1]
%385 = add i32 %384, %379 ; <i32> [#uses=1]
%386 = lshr i32 %374, 8 ; <i32> [#uses=1]
%387 = or i32 %386, 512 ; <i32> [#uses=1]
%388 = and i32 %387, 767 ; <i32> [#uses=1]
- %389 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %388; <i32*> [#uses=1]
- %390 = load i32* %389, align 4 ; <i32> [#uses=1]
+ %389 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %388; <i32*> [#uses=1]
+ %390 = load i32, i32* %389, align 4 ; <i32> [#uses=1]
%391 = xor i32 %385, %390 ; <i32> [#uses=1]
%392 = or i32 %374, 768 ; <i32> [#uses=1]
%393 = and i32 %392, 1023 ; <i32> [#uses=1]
- %394 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %393; <i32*> [#uses=1]
- %395 = load i32* %394, align 4 ; <i32> [#uses=1]
+ %394 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %393; <i32*> [#uses=1]
+ %395 = load i32, i32* %394, align 4 ; <i32> [#uses=1]
%396 = add i32 %391, %395 ; <i32> [#uses=1]
%397 = xor i32 %376, %350 ; <i32> [#uses=1]
%398 = xor i32 %397, %396 ; <i32> [#uses=5]
- %399 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 13; <i32*> [#uses=1]
- %400 = load i32* %399, align 4 ; <i32> [#uses=1]
+ %399 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 13; <i32*> [#uses=1]
+ %400 = load i32, i32* %399, align 4 ; <i32> [#uses=1]
%401 = lshr i32 %398, 24 ; <i32> [#uses=1]
- %402 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %401; <i32*> [#uses=1]
- %403 = load i32* %402, align 4 ; <i32> [#uses=1]
+ %402 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %401; <i32*> [#uses=1]
+ %403 = load i32, i32* %402, align 4 ; <i32> [#uses=1]
%404 = lshr i32 %398, 16 ; <i32> [#uses=1]
%405 = or i32 %404, 256 ; <i32> [#uses=1]
%406 = and i32 %405, 511 ; <i32> [#uses=1]
- %407 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %406; <i32*> [#uses=1]
- %408 = load i32* %407, align 4 ; <i32> [#uses=1]
+ %407 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %406; <i32*> [#uses=1]
+ %408 = load i32, i32* %407, align 4 ; <i32> [#uses=1]
%409 = add i32 %408, %403 ; <i32> [#uses=1]
%410 = lshr i32 %398, 8 ; <i32> [#uses=1]
%411 = or i32 %410, 512 ; <i32> [#uses=1]
%412 = and i32 %411, 767 ; <i32> [#uses=1]
- %413 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %412; <i32*> [#uses=1]
- %414 = load i32* %413, align 4 ; <i32> [#uses=1]
+ %413 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %412; <i32*> [#uses=1]
+ %414 = load i32, i32* %413, align 4 ; <i32> [#uses=1]
%415 = xor i32 %409, %414 ; <i32> [#uses=1]
%416 = or i32 %398, 768 ; <i32> [#uses=1]
%417 = and i32 %416, 1023 ; <i32> [#uses=1]
- %418 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %417; <i32*> [#uses=1]
- %419 = load i32* %418, align 4 ; <i32> [#uses=1]
+ %418 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %417; <i32*> [#uses=1]
+ %419 = load i32, i32* %418, align 4 ; <i32> [#uses=1]
%420 = add i32 %415, %419 ; <i32> [#uses=1]
%421 = xor i32 %400, %374 ; <i32> [#uses=1]
%422 = xor i32 %421, %420 ; <i32> [#uses=5]
- %423 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 12; <i32*> [#uses=1]
- %424 = load i32* %423, align 4 ; <i32> [#uses=1]
+ %423 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 12; <i32*> [#uses=1]
+ %424 = load i32, i32* %423, align 4 ; <i32> [#uses=1]
%425 = lshr i32 %422, 24 ; <i32> [#uses=1]
- %426 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %425; <i32*> [#uses=1]
- %427 = load i32* %426, align 4 ; <i32> [#uses=1]
+ %426 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %425; <i32*> [#uses=1]
+ %427 = load i32, i32* %426, align 4 ; <i32> [#uses=1]
%428 = lshr i32 %422, 16 ; <i32> [#uses=1]
%429 = or i32 %428, 256 ; <i32> [#uses=1]
%430 = and i32 %429, 511 ; <i32> [#uses=1]
- %431 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %430; <i32*> [#uses=1]
- %432 = load i32* %431, align 4 ; <i32> [#uses=1]
+ %431 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %430; <i32*> [#uses=1]
+ %432 = load i32, i32* %431, align 4 ; <i32> [#uses=1]
%433 = add i32 %432, %427 ; <i32> [#uses=1]
%434 = lshr i32 %422, 8 ; <i32> [#uses=1]
%435 = or i32 %434, 512 ; <i32> [#uses=1]
%436 = and i32 %435, 767 ; <i32> [#uses=1]
- %437 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %436; <i32*> [#uses=1]
- %438 = load i32* %437, align 4 ; <i32> [#uses=1]
+ %437 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %436; <i32*> [#uses=1]
+ %438 = load i32, i32* %437, align 4 ; <i32> [#uses=1]
%439 = xor i32 %433, %438 ; <i32> [#uses=1]
%440 = or i32 %422, 768 ; <i32> [#uses=1]
%441 = and i32 %440, 1023 ; <i32> [#uses=1]
- %442 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %441; <i32*> [#uses=1]
- %443 = load i32* %442, align 4 ; <i32> [#uses=1]
+ %442 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %441; <i32*> [#uses=1]
+ %443 = load i32, i32* %442, align 4 ; <i32> [#uses=1]
%444 = add i32 %439, %443 ; <i32> [#uses=1]
%445 = xor i32 %424, %398 ; <i32> [#uses=1]
%446 = xor i32 %445, %444 ; <i32> [#uses=5]
- %447 = load i32* undef, align 4 ; <i32> [#uses=1]
+ %447 = load i32, i32* undef, align 4 ; <i32> [#uses=1]
%448 = lshr i32 %446, 24 ; <i32> [#uses=1]
- %449 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %448; <i32*> [#uses=1]
- %450 = load i32* %449, align 4 ; <i32> [#uses=1]
+ %449 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %448; <i32*> [#uses=1]
+ %450 = load i32, i32* %449, align 4 ; <i32> [#uses=1]
%451 = lshr i32 %446, 16 ; <i32> [#uses=1]
%452 = or i32 %451, 256 ; <i32> [#uses=1]
%453 = and i32 %452, 511 ; <i32> [#uses=1]
- %454 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %453; <i32*> [#uses=1]
- %455 = load i32* %454, align 4 ; <i32> [#uses=1]
+ %454 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %453; <i32*> [#uses=1]
+ %455 = load i32, i32* %454, align 4 ; <i32> [#uses=1]
%456 = add i32 %455, %450 ; <i32> [#uses=1]
%457 = lshr i32 %446, 8 ; <i32> [#uses=1]
%458 = or i32 %457, 512 ; <i32> [#uses=1]
%459 = and i32 %458, 767 ; <i32> [#uses=1]
- %460 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %459; <i32*> [#uses=1]
- %461 = load i32* %460, align 4 ; <i32> [#uses=1]
+ %460 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %459; <i32*> [#uses=1]
+ %461 = load i32, i32* %460, align 4 ; <i32> [#uses=1]
%462 = xor i32 %456, %461 ; <i32> [#uses=1]
%463 = or i32 %446, 768 ; <i32> [#uses=1]
%464 = and i32 %463, 1023 ; <i32> [#uses=1]
- %465 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %464; <i32*> [#uses=1]
- %466 = load i32* %465, align 4 ; <i32> [#uses=1]
+ %465 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %464; <i32*> [#uses=1]
+ %466 = load i32, i32* %465, align 4 ; <i32> [#uses=1]
%467 = add i32 %462, %466 ; <i32> [#uses=1]
%468 = xor i32 %447, %422 ; <i32> [#uses=1]
%469 = xor i32 %468, %467 ; <i32> [#uses=5]
- %470 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 10; <i32*> [#uses=1]
- %471 = load i32* %470, align 4 ; <i32> [#uses=1]
+ %470 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 10; <i32*> [#uses=1]
+ %471 = load i32, i32* %470, align 4 ; <i32> [#uses=1]
%472 = lshr i32 %469, 24 ; <i32> [#uses=1]
- %473 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %472; <i32*> [#uses=1]
- %474 = load i32* %473, align 4 ; <i32> [#uses=1]
+ %473 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %472; <i32*> [#uses=1]
+ %474 = load i32, i32* %473, align 4 ; <i32> [#uses=1]
%475 = lshr i32 %469, 16 ; <i32> [#uses=1]
%476 = or i32 %475, 256 ; <i32> [#uses=1]
%477 = and i32 %476, 511 ; <i32> [#uses=1]
- %478 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %477; <i32*> [#uses=1]
- %479 = load i32* %478, align 4 ; <i32> [#uses=1]
+ %478 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %477; <i32*> [#uses=1]
+ %479 = load i32, i32* %478, align 4 ; <i32> [#uses=1]
%480 = add i32 %479, %474 ; <i32> [#uses=1]
%481 = lshr i32 %469, 8 ; <i32> [#uses=1]
%482 = or i32 %481, 512 ; <i32> [#uses=1]
%483 = and i32 %482, 767 ; <i32> [#uses=1]
- %484 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %483; <i32*> [#uses=1]
- %485 = load i32* %484, align 4 ; <i32> [#uses=1]
+ %484 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %483; <i32*> [#uses=1]
+ %485 = load i32, i32* %484, align 4 ; <i32> [#uses=1]
%486 = xor i32 %480, %485 ; <i32> [#uses=1]
%487 = or i32 %469, 768 ; <i32> [#uses=1]
%488 = and i32 %487, 1023 ; <i32> [#uses=1]
- %489 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %488; <i32*> [#uses=1]
- %490 = load i32* %489, align 4 ; <i32> [#uses=1]
+ %489 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %488; <i32*> [#uses=1]
+ %490 = load i32, i32* %489, align 4 ; <i32> [#uses=1]
%491 = add i32 %486, %490 ; <i32> [#uses=1]
%492 = xor i32 %471, %446 ; <i32> [#uses=1]
%493 = xor i32 %492, %491 ; <i32> [#uses=5]
- %494 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 9; <i32*> [#uses=1]
- %495 = load i32* %494, align 4 ; <i32> [#uses=1]
+ %494 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 9; <i32*> [#uses=1]
+ %495 = load i32, i32* %494, align 4 ; <i32> [#uses=1]
%496 = lshr i32 %493, 24 ; <i32> [#uses=1]
- %497 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %496; <i32*> [#uses=1]
- %498 = load i32* %497, align 4 ; <i32> [#uses=1]
+ %497 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %496; <i32*> [#uses=1]
+ %498 = load i32, i32* %497, align 4 ; <i32> [#uses=1]
%499 = lshr i32 %493, 16 ; <i32> [#uses=1]
%500 = or i32 %499, 256 ; <i32> [#uses=1]
%501 = and i32 %500, 511 ; <i32> [#uses=1]
- %502 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %501; <i32*> [#uses=1]
- %503 = load i32* %502, align 4 ; <i32> [#uses=1]
+ %502 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %501; <i32*> [#uses=1]
+ %503 = load i32, i32* %502, align 4 ; <i32> [#uses=1]
%504 = add i32 %503, %498 ; <i32> [#uses=1]
%505 = lshr i32 %493, 8 ; <i32> [#uses=1]
%506 = or i32 %505, 512 ; <i32> [#uses=1]
%507 = and i32 %506, 767 ; <i32> [#uses=1]
- %508 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %507; <i32*> [#uses=1]
- %509 = load i32* %508, align 4 ; <i32> [#uses=1]
+ %508 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %507; <i32*> [#uses=1]
+ %509 = load i32, i32* %508, align 4 ; <i32> [#uses=1]
%510 = xor i32 %504, %509 ; <i32> [#uses=1]
%511 = or i32 %493, 768 ; <i32> [#uses=1]
%512 = and i32 %511, 1023 ; <i32> [#uses=1]
- %513 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %512; <i32*> [#uses=1]
- %514 = load i32* %513, align 4 ; <i32> [#uses=1]
+ %513 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %512; <i32*> [#uses=1]
+ %514 = load i32, i32* %513, align 4 ; <i32> [#uses=1]
%515 = add i32 %510, %514 ; <i32> [#uses=1]
%516 = xor i32 %495, %469 ; <i32> [#uses=1]
%517 = xor i32 %516, %515 ; <i32> [#uses=5]
- %518 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 8; <i32*> [#uses=1]
- %519 = load i32* %518, align 4 ; <i32> [#uses=1]
+ %518 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 8; <i32*> [#uses=1]
+ %519 = load i32, i32* %518, align 4 ; <i32> [#uses=1]
%520 = lshr i32 %517, 24 ; <i32> [#uses=1]
- %521 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %520; <i32*> [#uses=1]
- %522 = load i32* %521, align 4 ; <i32> [#uses=1]
+ %521 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %520; <i32*> [#uses=1]
+ %522 = load i32, i32* %521, align 4 ; <i32> [#uses=1]
%523 = lshr i32 %517, 16 ; <i32> [#uses=1]
%524 = or i32 %523, 256 ; <i32> [#uses=1]
%525 = and i32 %524, 511 ; <i32> [#uses=1]
- %526 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %525; <i32*> [#uses=1]
- %527 = load i32* %526, align 4 ; <i32> [#uses=1]
+ %526 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %525; <i32*> [#uses=1]
+ %527 = load i32, i32* %526, align 4 ; <i32> [#uses=1]
%528 = add i32 %527, %522 ; <i32> [#uses=1]
%529 = lshr i32 %517, 8 ; <i32> [#uses=1]
%530 = or i32 %529, 512 ; <i32> [#uses=1]
%531 = and i32 %530, 767 ; <i32> [#uses=1]
- %532 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %531; <i32*> [#uses=1]
- %533 = load i32* %532, align 4 ; <i32> [#uses=1]
+ %532 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %531; <i32*> [#uses=1]
+ %533 = load i32, i32* %532, align 4 ; <i32> [#uses=1]
%534 = xor i32 %528, %533 ; <i32> [#uses=1]
%535 = or i32 %517, 768 ; <i32> [#uses=1]
%536 = and i32 %535, 1023 ; <i32> [#uses=1]
- %537 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %536; <i32*> [#uses=1]
- %538 = load i32* %537, align 4 ; <i32> [#uses=1]
+ %537 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %536; <i32*> [#uses=1]
+ %538 = load i32, i32* %537, align 4 ; <i32> [#uses=1]
%539 = add i32 %534, %538 ; <i32> [#uses=1]
%540 = xor i32 %519, %493 ; <i32> [#uses=1]
%541 = xor i32 %540, %539 ; <i32> [#uses=5]
- %542 = load i32* undef, align 4 ; <i32> [#uses=1]
+ %542 = load i32, i32* undef, align 4 ; <i32> [#uses=1]
%543 = lshr i32 %541, 24 ; <i32> [#uses=1]
- %544 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %543; <i32*> [#uses=1]
- %545 = load i32* %544, align 4 ; <i32> [#uses=1]
+ %544 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %543; <i32*> [#uses=1]
+ %545 = load i32, i32* %544, align 4 ; <i32> [#uses=1]
%546 = lshr i32 %541, 16 ; <i32> [#uses=1]
%547 = or i32 %546, 256 ; <i32> [#uses=1]
%548 = and i32 %547, 511 ; <i32> [#uses=1]
- %549 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %548; <i32*> [#uses=1]
- %550 = load i32* %549, align 4 ; <i32> [#uses=1]
+ %549 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %548; <i32*> [#uses=1]
+ %550 = load i32, i32* %549, align 4 ; <i32> [#uses=1]
%551 = add i32 %550, %545 ; <i32> [#uses=1]
%552 = lshr i32 %541, 8 ; <i32> [#uses=1]
%553 = or i32 %552, 512 ; <i32> [#uses=1]
%554 = and i32 %553, 767 ; <i32> [#uses=1]
- %555 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %554; <i32*> [#uses=1]
- %556 = load i32* %555, align 4 ; <i32> [#uses=1]
+ %555 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %554; <i32*> [#uses=1]
+ %556 = load i32, i32* %555, align 4 ; <i32> [#uses=1]
%557 = xor i32 %551, %556 ; <i32> [#uses=1]
%558 = or i32 %541, 768 ; <i32> [#uses=1]
%559 = and i32 %558, 1023 ; <i32> [#uses=1]
- %560 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %559; <i32*> [#uses=1]
- %561 = load i32* %560, align 4 ; <i32> [#uses=1]
+ %560 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %559; <i32*> [#uses=1]
+ %561 = load i32, i32* %560, align 4 ; <i32> [#uses=1]
%562 = add i32 %557, %561 ; <i32> [#uses=1]
%563 = xor i32 %542, %517 ; <i32> [#uses=1]
%564 = xor i32 %563, %562 ; <i32> [#uses=5]
- %565 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 6; <i32*> [#uses=1]
- %566 = load i32* %565, align 4 ; <i32> [#uses=1]
+ %565 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 6; <i32*> [#uses=1]
+ %566 = load i32, i32* %565, align 4 ; <i32> [#uses=1]
%567 = lshr i32 %564, 24 ; <i32> [#uses=1]
- %568 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %567; <i32*> [#uses=1]
- %569 = load i32* %568, align 4 ; <i32> [#uses=1]
+ %568 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %567; <i32*> [#uses=1]
+ %569 = load i32, i32* %568, align 4 ; <i32> [#uses=1]
%570 = lshr i32 %564, 16 ; <i32> [#uses=1]
%571 = or i32 %570, 256 ; <i32> [#uses=1]
%572 = and i32 %571, 511 ; <i32> [#uses=1]
- %573 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %572; <i32*> [#uses=1]
- %574 = load i32* %573, align 4 ; <i32> [#uses=1]
+ %573 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %572; <i32*> [#uses=1]
+ %574 = load i32, i32* %573, align 4 ; <i32> [#uses=1]
%575 = add i32 %574, %569 ; <i32> [#uses=1]
%576 = lshr i32 %564, 8 ; <i32> [#uses=1]
%577 = or i32 %576, 512 ; <i32> [#uses=1]
%578 = and i32 %577, 767 ; <i32> [#uses=1]
- %579 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %578; <i32*> [#uses=1]
- %580 = load i32* %579, align 4 ; <i32> [#uses=1]
+ %579 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %578; <i32*> [#uses=1]
+ %580 = load i32, i32* %579, align 4 ; <i32> [#uses=1]
%581 = xor i32 %575, %580 ; <i32> [#uses=1]
%582 = or i32 %564, 768 ; <i32> [#uses=1]
%583 = and i32 %582, 1023 ; <i32> [#uses=1]
- %584 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %583; <i32*> [#uses=1]
- %585 = load i32* %584, align 4 ; <i32> [#uses=1]
+ %584 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %583; <i32*> [#uses=1]
+ %585 = load i32, i32* %584, align 4 ; <i32> [#uses=1]
%586 = add i32 %581, %585 ; <i32> [#uses=1]
%587 = xor i32 %566, %541 ; <i32> [#uses=1]
%588 = xor i32 %587, %586 ; <i32> [#uses=5]
- %589 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 5; <i32*> [#uses=1]
- %590 = load i32* %589, align 4 ; <i32> [#uses=1]
+ %589 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 5; <i32*> [#uses=1]
+ %590 = load i32, i32* %589, align 4 ; <i32> [#uses=1]
%591 = lshr i32 %588, 24 ; <i32> [#uses=1]
- %592 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %591; <i32*> [#uses=1]
- %593 = load i32* %592, align 4 ; <i32> [#uses=1]
+ %592 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %591; <i32*> [#uses=1]
+ %593 = load i32, i32* %592, align 4 ; <i32> [#uses=1]
%594 = lshr i32 %588, 16 ; <i32> [#uses=1]
%595 = or i32 %594, 256 ; <i32> [#uses=1]
%596 = and i32 %595, 511 ; <i32> [#uses=1]
- %597 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %596; <i32*> [#uses=1]
- %598 = load i32* %597, align 4 ; <i32> [#uses=1]
+ %597 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %596; <i32*> [#uses=1]
+ %598 = load i32, i32* %597, align 4 ; <i32> [#uses=1]
%599 = add i32 %598, %593 ; <i32> [#uses=1]
%600 = lshr i32 %588, 8 ; <i32> [#uses=1]
%601 = or i32 %600, 512 ; <i32> [#uses=1]
%602 = and i32 %601, 767 ; <i32> [#uses=1]
- %603 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %602; <i32*> [#uses=1]
- %604 = load i32* %603, align 4 ; <i32> [#uses=1]
+ %603 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %602; <i32*> [#uses=1]
+ %604 = load i32, i32* %603, align 4 ; <i32> [#uses=1]
%605 = xor i32 %599, %604 ; <i32> [#uses=1]
%606 = or i32 %588, 768 ; <i32> [#uses=1]
%607 = and i32 %606, 1023 ; <i32> [#uses=1]
- %608 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %607; <i32*> [#uses=1]
- %609 = load i32* %608, align 4 ; <i32> [#uses=1]
+ %608 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %607; <i32*> [#uses=1]
+ %609 = load i32, i32* %608, align 4 ; <i32> [#uses=1]
%610 = add i32 %605, %609 ; <i32> [#uses=1]
%611 = xor i32 %590, %564 ; <i32> [#uses=1]
%612 = xor i32 %611, %610 ; <i32> [#uses=5]
- %613 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 4; <i32*> [#uses=1]
- %614 = load i32* %613, align 4 ; <i32> [#uses=1]
+ %613 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 4; <i32*> [#uses=1]
+ %614 = load i32, i32* %613, align 4 ; <i32> [#uses=1]
%615 = lshr i32 %612, 24 ; <i32> [#uses=1]
- %616 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %615; <i32*> [#uses=1]
- %617 = load i32* %616, align 4 ; <i32> [#uses=1]
+ %616 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %615; <i32*> [#uses=1]
+ %617 = load i32, i32* %616, align 4 ; <i32> [#uses=1]
%618 = lshr i32 %612, 16 ; <i32> [#uses=1]
%619 = or i32 %618, 256 ; <i32> [#uses=1]
%620 = and i32 %619, 511 ; <i32> [#uses=1]
- %621 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %620; <i32*> [#uses=1]
- %622 = load i32* %621, align 4 ; <i32> [#uses=1]
+ %621 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %620; <i32*> [#uses=1]
+ %622 = load i32, i32* %621, align 4 ; <i32> [#uses=1]
%623 = add i32 %622, %617 ; <i32> [#uses=1]
%624 = lshr i32 %612, 8 ; <i32> [#uses=1]
%625 = or i32 %624, 512 ; <i32> [#uses=1]
%626 = and i32 %625, 767 ; <i32> [#uses=1]
- %627 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %626; <i32*> [#uses=1]
- %628 = load i32* %627, align 4 ; <i32> [#uses=1]
+ %627 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %626; <i32*> [#uses=1]
+ %628 = load i32, i32* %627, align 4 ; <i32> [#uses=1]
%629 = xor i32 %623, %628 ; <i32> [#uses=1]
%630 = or i32 %612, 768 ; <i32> [#uses=1]
%631 = and i32 %630, 1023 ; <i32> [#uses=1]
- %632 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %631; <i32*> [#uses=1]
- %633 = load i32* %632, align 4 ; <i32> [#uses=1]
+ %632 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %631; <i32*> [#uses=1]
+ %633 = load i32, i32* %632, align 4 ; <i32> [#uses=1]
%634 = add i32 %629, %633 ; <i32> [#uses=1]
%635 = xor i32 %614, %588 ; <i32> [#uses=1]
%636 = xor i32 %635, %634 ; <i32> [#uses=5]
- %637 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 3; <i32*> [#uses=1]
- %638 = load i32* %637, align 4 ; <i32> [#uses=1]
+ %637 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 3; <i32*> [#uses=1]
+ %638 = load i32, i32* %637, align 4 ; <i32> [#uses=1]
%639 = lshr i32 %636, 24 ; <i32> [#uses=1]
- %640 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %639; <i32*> [#uses=1]
- %641 = load i32* %640, align 4 ; <i32> [#uses=1]
+ %640 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %639; <i32*> [#uses=1]
+ %641 = load i32, i32* %640, align 4 ; <i32> [#uses=1]
%642 = lshr i32 %636, 16 ; <i32> [#uses=1]
%643 = or i32 %642, 256 ; <i32> [#uses=1]
%644 = and i32 %643, 511 ; <i32> [#uses=1]
- %645 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %644; <i32*> [#uses=1]
- %646 = load i32* %645, align 4 ; <i32> [#uses=1]
+ %645 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %644; <i32*> [#uses=1]
+ %646 = load i32, i32* %645, align 4 ; <i32> [#uses=1]
%647 = add i32 %646, %641 ; <i32> [#uses=1]
%648 = lshr i32 %636, 8 ; <i32> [#uses=1]
%649 = or i32 %648, 512 ; <i32> [#uses=1]
%650 = and i32 %649, 767 ; <i32> [#uses=1]
- %651 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %650; <i32*> [#uses=1]
- %652 = load i32* %651, align 4 ; <i32> [#uses=1]
+ %651 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %650; <i32*> [#uses=1]
+ %652 = load i32, i32* %651, align 4 ; <i32> [#uses=1]
%653 = xor i32 %647, %652 ; <i32> [#uses=1]
%654 = or i32 %636, 768 ; <i32> [#uses=1]
%655 = and i32 %654, 1023 ; <i32> [#uses=1]
- %656 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %655; <i32*> [#uses=1]
- %657 = load i32* %656, align 4 ; <i32> [#uses=1]
+ %656 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %655; <i32*> [#uses=1]
+ %657 = load i32, i32* %656, align 4 ; <i32> [#uses=1]
%658 = add i32 %653, %657 ; <i32> [#uses=1]
%659 = xor i32 %638, %612 ; <i32> [#uses=1]
%660 = xor i32 %659, %658 ; <i32> [#uses=5]
- %661 = load i32* undef, align 4 ; <i32> [#uses=1]
+ %661 = load i32, i32* undef, align 4 ; <i32> [#uses=1]
%662 = lshr i32 %660, 24 ; <i32> [#uses=1]
- %663 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %662; <i32*> [#uses=1]
- %664 = load i32* %663, align 4 ; <i32> [#uses=1]
+ %663 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %662; <i32*> [#uses=1]
+ %664 = load i32, i32* %663, align 4 ; <i32> [#uses=1]
%665 = lshr i32 %660, 16 ; <i32> [#uses=1]
%666 = or i32 %665, 256 ; <i32> [#uses=1]
%667 = and i32 %666, 511 ; <i32> [#uses=1]
- %668 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %667; <i32*> [#uses=1]
- %669 = load i32* %668, align 4 ; <i32> [#uses=1]
+ %668 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %667; <i32*> [#uses=1]
+ %669 = load i32, i32* %668, align 4 ; <i32> [#uses=1]
%670 = add i32 %669, %664 ; <i32> [#uses=1]
%671 = lshr i32 %660, 8 ; <i32> [#uses=1]
%672 = or i32 %671, 512 ; <i32> [#uses=1]
%673 = and i32 %672, 767 ; <i32> [#uses=1]
- %674 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %673; <i32*> [#uses=1]
- %675 = load i32* %674, align 4 ; <i32> [#uses=1]
+ %674 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %673; <i32*> [#uses=1]
+ %675 = load i32, i32* %674, align 4 ; <i32> [#uses=1]
%676 = xor i32 %670, %675 ; <i32> [#uses=1]
%677 = or i32 %660, 768 ; <i32> [#uses=1]
%678 = and i32 %677, 1023 ; <i32> [#uses=1]
- %679 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %678; <i32*> [#uses=1]
- %680 = load i32* %679, align 4 ; <i32> [#uses=1]
+ %679 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %678; <i32*> [#uses=1]
+ %680 = load i32, i32* %679, align 4 ; <i32> [#uses=1]
%681 = add i32 %676, %680 ; <i32> [#uses=1]
%682 = xor i32 %661, %636 ; <i32> [#uses=1]
%683 = xor i32 %682, %681 ; <i32> [#uses=5]
- %684 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 1; <i32*> [#uses=1]
+ %684 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 1; <i32*> [#uses=1]
br label %bb2
bb2: ; preds = %bb1, %bb
@@ -712,21 +712,21 @@ bb2: ; preds = %bb1, %bb
%.pn15 = lshr i32 %.pn15.in, 24 ; <i32> [#uses=1]
%.pn14 = and i32 %.pn14.in, 511 ; <i32> [#uses=1]
%.pn13.in = or i32 %.pn13.in.in, 512 ; <i32> [#uses=1]
- %.pn11.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn15; <i32*> [#uses=1]
- %.pn12.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn14; <i32*> [#uses=1]
+ %.pn11.in = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn15; <i32*> [#uses=1]
+ %.pn12.in = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn14; <i32*> [#uses=1]
%.pn13 = and i32 %.pn13.in, 767 ; <i32> [#uses=1]
%.pn10.in = or i32 %.pn10.in.in, 768 ; <i32> [#uses=1]
- %.pn11 = load i32* %.pn11.in ; <i32> [#uses=1]
- %.pn12 = load i32* %.pn12.in ; <i32> [#uses=1]
- %.pn9.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn13; <i32*> [#uses=1]
+ %.pn11 = load i32, i32* %.pn11.in ; <i32> [#uses=1]
+ %.pn12 = load i32, i32* %.pn12.in ; <i32> [#uses=1]
+ %.pn9.in = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn13; <i32*> [#uses=1]
%.pn10 = and i32 %.pn10.in, 1023 ; <i32> [#uses=1]
%.pn8 = add i32 %.pn12, %.pn11 ; <i32> [#uses=1]
- %.pn9 = load i32* %.pn9.in ; <i32> [#uses=1]
- %.pn7.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn10; <i32*> [#uses=1]
+ %.pn9 = load i32, i32* %.pn9.in ; <i32> [#uses=1]
+ %.pn7.in = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn10; <i32*> [#uses=1]
%.pn6 = xor i32 %.pn8, %.pn9 ; <i32> [#uses=1]
- %.pn7 = load i32* %.pn7.in ; <i32> [#uses=1]
- %.pn4 = load i32* %.pn4.in ; <i32> [#uses=1]
- %.pn2 = load i32* %.pn2.in ; <i32> [#uses=1]
+ %.pn7 = load i32, i32* %.pn7.in ; <i32> [#uses=1]
+ %.pn4 = load i32, i32* %.pn4.in ; <i32> [#uses=1]
+ %.pn2 = load i32, i32* %.pn2.in ; <i32> [#uses=1]
%.pn = add i32 %.pn6, %.pn7 ; <i32> [#uses=1]
%r.0 = xor i32 %.pn2, %.pn3 ; <i32> [#uses=1]
%.pn1 = xor i32 %.pn, %.pn5 ; <i32> [#uses=1]
diff --git a/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll b/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll
index 041306db9f0d..983e0f5cfda4 100644
--- a/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll
+++ b/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll
@@ -11,19 +11,19 @@ define i8* @vorbis_comment_query(%struct.vorbis_comment* nocapture %vc, i8* %tag
entry:
%0 = alloca i8, i32 undef, align 4 ; <i8*> [#uses=2]
%1 = call i8* @__strcpy_chk(i8* %0, i8* %tag, i32 -1) nounwind; <i8*> [#uses=0]
- %2 = call i8* @__strcat_chk(i8* %0, i8* getelementptr ([2 x i8]* @.str16, i32 0, i32 0), i32 -1) nounwind; <i8*> [#uses=0]
- %3 = getelementptr %struct.vorbis_comment* %vc, i32 0, i32 0; <i8***> [#uses=1]
+ %2 = call i8* @__strcat_chk(i8* %0, i8* getelementptr ([2 x i8], [2 x i8]* @.str16, i32 0, i32 0), i32 -1) nounwind; <i8*> [#uses=0]
+ %3 = getelementptr %struct.vorbis_comment, %struct.vorbis_comment* %vc, i32 0, i32 0; <i8***> [#uses=1]
br label %bb11
bb6: ; preds = %bb11
- %4 = load i8*** %3, align 4 ; <i8**> [#uses=1]
- %scevgep = getelementptr i8** %4, i32 %8 ; <i8**> [#uses=1]
- %5 = load i8** %scevgep, align 4 ; <i8*> [#uses=1]
+ %4 = load i8**, i8*** %3, align 4 ; <i8**> [#uses=1]
+ %scevgep = getelementptr i8*, i8** %4, i32 %8 ; <i8**> [#uses=1]
+ %5 = load i8*, i8** %scevgep, align 4 ; <i8*> [#uses=1]
br label %bb3.i
bb3.i: ; preds = %bb3.i, %bb6
- %scevgep7.i = getelementptr i8* %5, i32 0 ; <i8*> [#uses=1]
- %6 = load i8* %scevgep7.i, align 1 ; <i8> [#uses=0]
+ %scevgep7.i = getelementptr i8, i8* %5, i32 0 ; <i8*> [#uses=1]
+ %6 = load i8, i8* %scevgep7.i, align 1 ; <i8> [#uses=0]
br i1 undef, label %bb3.i, label %bb10
bb10: ; preds = %bb3.i
diff --git a/test/CodeGen/Thumb/2009-08-20-ISelBug.ll b/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
index 414b76d750b9..86d702395726 100644
--- a/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
+++ b/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
@@ -18,15 +18,15 @@ entry:
br i1 %0, label %bb13, label %bb1
bb1: ; preds = %entry
- %1 = getelementptr inbounds %struct.asl_file_t* %s, i32 0, i32 11 ; <%struct.FILE**> [#uses=2]
- %2 = load %struct.FILE** %1, align 4 ; <%struct.FILE*> [#uses=2]
+ %1 = getelementptr inbounds %struct.asl_file_t, %struct.asl_file_t* %s, i32 0, i32 11 ; <%struct.FILE**> [#uses=2]
+ %2 = load %struct.FILE*, %struct.FILE** %1, align 4 ; <%struct.FILE*> [#uses=2]
%3 = icmp eq %struct.FILE* %2, null ; <i1> [#uses=1]
br i1 %3, label %bb13, label %bb3
bb3: ; preds = %bb1
%4 = add nsw i64 %off, 8 ; <i64> [#uses=1]
- %5 = getelementptr inbounds %struct.asl_file_t* %s, i32 0, i32 10 ; <i32*> [#uses=1]
- %6 = load i32* %5, align 4 ; <i32> [#uses=1]
+ %5 = getelementptr inbounds %struct.asl_file_t, %struct.asl_file_t* %s, i32 0, i32 10 ; <i32*> [#uses=1]
+ %6 = load i32, i32* %5, align 4 ; <i32> [#uses=1]
%7 = zext i32 %6 to i64 ; <i64> [#uses=1]
%8 = icmp sgt i64 %4, %7 ; <i1> [#uses=1]
br i1 %8, label %bb13, label %bb5
@@ -38,7 +38,7 @@ bb5: ; preds = %bb3
bb7: ; preds = %bb5
store i64 0, i64* %val, align 4
- %11 = load %struct.FILE** %1, align 4 ; <%struct.FILE*> [#uses=1]
+ %11 = load %struct.FILE*, %struct.FILE** %1, align 4 ; <%struct.FILE*> [#uses=1]
%val8 = bitcast i64* %val to i8* ; <i8*> [#uses=1]
%12 = call i32 @fread(i8* noalias %val8, i32 8, i32 1, %struct.FILE* noalias %11) nounwind ; <i32> [#uses=1]
%13 = icmp eq i32 %12, 1 ; <i1> [#uses=1]
@@ -49,7 +49,7 @@ bb10: ; preds = %bb7
br i1 %14, label %bb13, label %bb11
bb11: ; preds = %bb10
- %15 = load i64* %val, align 4 ; <i64> [#uses=1]
+ %15 = load i64, i64* %val, align 4 ; <i64> [#uses=1]
%16 = call i64 @asl_core_ntohq(i64 %15) nounwind ; <i64> [#uses=1]
store i64 %16, i64* %out, align 4
ret i32 0
diff --git a/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll b/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll
index 132d9acf6745..e7684177e31c 100644
--- a/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll
+++ b/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll
@@ -12,17 +12,17 @@ target triple = "thumbv7-apple-darwin10"
define i32 @interpret_threaded(i8* nocapture %opcodes) nounwind readonly optsize {
entry:
- %0 = load i8* %opcodes, align 1 ; <i8> [#uses=1]
+ %0 = load i8, i8* %opcodes, align 1 ; <i8> [#uses=1]
%1 = zext i8 %0 to i32 ; <i32> [#uses=1]
- %2 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %1 ; <i8**> [#uses=1]
+ %2 = getelementptr inbounds [5 x i8*], [5 x i8*]* @codetable.2928, i32 0, i32 %1 ; <i8**> [#uses=1]
br label %bb
bb: ; preds = %bb.backedge, %entry
%indvar = phi i32 [ %phitmp, %bb.backedge ], [ 1, %entry ] ; <i32> [#uses=2]
%gotovar.22.0.in = phi i8** [ %gotovar.22.0.in.be, %bb.backedge ], [ %2, %entry ] ; <i8**> [#uses=1]
%result.0 = phi i32 [ %result.0.be, %bb.backedge ], [ 0, %entry ] ; <i32> [#uses=6]
- %opcodes_addr.0 = getelementptr i8* %opcodes, i32 %indvar ; <i8*> [#uses=4]
- %gotovar.22.0 = load i8** %gotovar.22.0.in, align 4 ; <i8*> [#uses=1]
+ %opcodes_addr.0 = getelementptr i8, i8* %opcodes, i32 %indvar ; <i8*> [#uses=4]
+ %gotovar.22.0 = load i8*, i8** %gotovar.22.0.in, align 4 ; <i8*> [#uses=1]
indirectbr i8* %gotovar.22.0, [label %RETURN, label %INCREMENT, label %DECREMENT, label %DOUBLE, label %SWAPWORD]
RETURN: ; preds = %bb
@@ -30,9 +30,9 @@ RETURN: ; preds = %bb
INCREMENT: ; preds = %bb
%3 = add nsw i32 %result.0, 1 ; <i32> [#uses=1]
- %4 = load i8* %opcodes_addr.0, align 1 ; <i8> [#uses=1]
+ %4 = load i8, i8* %opcodes_addr.0, align 1 ; <i8> [#uses=1]
%5 = zext i8 %4 to i32 ; <i32> [#uses=1]
- %6 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %5 ; <i8**> [#uses=1]
+ %6 = getelementptr inbounds [5 x i8*], [5 x i8*]* @codetable.2928, i32 0, i32 %5 ; <i8**> [#uses=1]
br label %bb.backedge
bb.backedge: ; preds = %SWAPWORD, %DOUBLE, %DECREMENT, %INCREMENT
@@ -43,24 +43,24 @@ bb.backedge: ; preds = %SWAPWORD, %DOUBLE,
DECREMENT: ; preds = %bb
%7 = add i32 %result.0, -1 ; <i32> [#uses=1]
- %8 = load i8* %opcodes_addr.0, align 1 ; <i8> [#uses=1]
+ %8 = load i8, i8* %opcodes_addr.0, align 1 ; <i8> [#uses=1]
%9 = zext i8 %8 to i32 ; <i32> [#uses=1]
- %10 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %9 ; <i8**> [#uses=1]
+ %10 = getelementptr inbounds [5 x i8*], [5 x i8*]* @codetable.2928, i32 0, i32 %9 ; <i8**> [#uses=1]
br label %bb.backedge
DOUBLE: ; preds = %bb
%11 = shl i32 %result.0, 1 ; <i32> [#uses=1]
- %12 = load i8* %opcodes_addr.0, align 1 ; <i8> [#uses=1]
+ %12 = load i8, i8* %opcodes_addr.0, align 1 ; <i8> [#uses=1]
%13 = zext i8 %12 to i32 ; <i32> [#uses=1]
- %14 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %13 ; <i8**> [#uses=1]
+ %14 = getelementptr inbounds [5 x i8*], [5 x i8*]* @codetable.2928, i32 0, i32 %13 ; <i8**> [#uses=1]
br label %bb.backedge
SWAPWORD: ; preds = %bb
%15 = shl i32 %result.0, 16 ; <i32> [#uses=1]
%16 = ashr i32 %result.0, 16 ; <i32> [#uses=1]
%17 = or i32 %15, %16 ; <i32> [#uses=1]
- %18 = load i8* %opcodes_addr.0, align 1 ; <i8> [#uses=1]
+ %18 = load i8, i8* %opcodes_addr.0, align 1 ; <i8> [#uses=1]
%19 = zext i8 %18 to i32 ; <i32> [#uses=1]
- %20 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %19 ; <i8**> [#uses=1]
+ %20 = getelementptr inbounds [5 x i8*], [5 x i8*]* @codetable.2928, i32 0, i32 %19 ; <i8**> [#uses=1]
br label %bb.backedge
}
diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
index 622f55d994f6..0fd1a9e1e232 100644
--- a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
+++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
@@ -25,11 +25,11 @@ define void @_Z19getClosestDiagonal3ii(%0* noalias sret, i32, i32) nounwind {
%storemerge = phi double [ -1.000000e+00, %4 ], [ 1.000000e+00, %3 ], [ 1.000000e+00, %3 ] ; <double> [#uses=1]
%v_6 = icmp slt i32 %1, 2 ; <i1> [#uses=1]
%storemerge1 = select i1 %v_6, double 1.000000e+00, double -1.000000e+00 ; <double> [#uses=3]
- call void @llvm.dbg.value(metadata double %storemerge, i64 0, metadata !91, metadata !{!"0x102"}), !dbg !0
+ call void @llvm.dbg.value(metadata double %storemerge, i64 0, metadata !91, metadata !DIExpression()), !dbg !0
%v_7 = icmp eq i32 %2, 1, !dbg !92 ; <i1> [#uses=1]
%storemerge2 = select i1 %v_7, double 1.000000e+00, double -1.000000e+00 ; <double> [#uses=3]
- %v_8 = getelementptr inbounds %0* %0, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %v_10 = getelementptr inbounds %0* %0, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %v_8 = getelementptr inbounds %0, %0* %0, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %v_10 = getelementptr inbounds %0, %0* %0, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
%v_11 = fmul double %storemerge1, %storemerge1, !dbg !93 ; <double> [#uses=1]
%v_15 = tail call double @sqrt(double %v_11) nounwind readonly, !dbg !93 ; <double> [#uses=1]
%v_16 = fdiv double 1.000000e+00, %v_15, !dbg !93 ; <double> [#uses=3]
@@ -48,108 +48,108 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!5}
!llvm.module.flags = !{!104}
-!0 = !MDLocation(line: 46, scope: !1)
-!1 = !{!"0xb\0044\000\000", !101, !2} ; [ DW_TAG_lexical_block ]
-!2 = !{!"0xb\0044\000\000", !101, !3} ; [ DW_TAG_lexical_block ]
-!3 = !{!"0x2e\00getClosestDiagonal3\00getClosestDiagonal3\00_Z19getClosestDiagonal3ii\0044\000\001\000\006\000\000\000", !101, null, !6, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!4 = !{!"0x29", !101} ; [ DW_TAG_file_type ]
-!5 = !{!"0x11\004\004.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)\001\00\000\00\000", !101, !102, !102, !103, null, null} ; [ DW_TAG_compile_unit ]
-!6 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = !DILocation(line: 46, scope: !1)
+!1 = distinct !DILexicalBlock(line: 44, column: 0, file: !101, scope: !2)
+!2 = distinct !DILexicalBlock(line: 44, column: 0, file: !101, scope: !3)
+!3 = !DISubprogram(name: "getClosestDiagonal3", linkageName: "_Z19getClosestDiagonal3ii", line: 44, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !101, scope: null, type: !6)
+!4 = !DIFile(filename: "ggEdgeDiscrepancy.cc", directory: "/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src")
+!5 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", isOptimized: true, emissionKind: 0, file: !101, enums: !102, retainedTypes: !102, subprograms: !103)
+!6 = !DISubroutineType(types: !7)
!7 = !{!8, !22, !22}
-!8 = !{!"0x13\00ggVector3\0066\00192\0032\000\000\000", !99, null, null, !10, null, null, null} ; [ DW_TAG_structure_type ] [ggVector3] [line 66, size 192, align 32, offset 0] [def] [from ]
-!9 = !{!"0x29", !"ggVector3.h", !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", !5} ; [ DW_TAG_file_type ]
-!99 = !{!"ggVector3.h", !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src"}
+!8 = !DICompositeType(tag: DW_TAG_structure_type, name: "ggVector3", line: 66, size: 192, align: 32, file: !99, elements: !10)
+!9 = !DIFile(filename: "ggVector3.h", directory: "/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src")
+!99 = !DIFile(filename: "ggVector3.h", directory: "/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src")
!10 = !{!11, !16, !23, !26, !29, !30, !35, !36, !37, !41, !42, !43, !46, !47, !48, !52, !53, !54, !57, !60, !63, !66, !70, !71, !74, !75, !76, !77, !78, !81, !82, !83, !84, !85, !88, !89, !90}
-!11 = !{!"0xd\00e\00160\00192\0032\000\000", !99, !8, !12} ; [ DW_TAG_member ]
-!12 = !{!"0x1\00\000\00192\0032\000\000", !101, !4, !13, !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 192, align 32, offset 0] [from double]
-!13 = !{!"0x24\00double\000\0064\0032\000\000\004", !101, !4} ; [ DW_TAG_base_type ]
+!11 = !DIDerivedType(tag: DW_TAG_member, name: "e", line: 160, size: 192, align: 32, file: !99, scope: !8, baseType: !12)
+!12 = !DICompositeType(tag: DW_TAG_array_type, size: 192, align: 32, file: !101, scope: !4, baseType: !13, elements: !14)
+!13 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 32, encoding: DW_ATE_float)
!14 = !{!15}
-!15 = !{!"0x21\000\003"} ; [ DW_TAG_subrange_type ]
-!16 = !{!"0x2e\00ggVector3\00ggVector3\00\0072\000\000\000\006\000\000\000", !9, !8, !17, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!17 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !18, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = !DISubrange(count: 3)
+!16 = !DISubprogram(name: "ggVector3", line: 72, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !17)
+!17 = !DISubroutineType(types: !18)
!18 = !{null, !19, !20}
-!19 = !{!"0xf\00\000\0032\0032\000\0064", !101, !4, !8} ; [ DW_TAG_pointer_type ]
-!20 = !{!"0x16\00ggBoolean\00478\000\000\000\000", !100, null, !22} ; [ DW_TAG_typedef ]
-!21 = !{!"0x29", !"math.h", !"/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.Internal.sdk/usr/include/architecture/arm", !5} ; [ DW_TAG_file_type ]
-!100 = !{!"math.h", !"/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.Internal.sdk/usr/include/architecture/arm"}
-!22 = !{!"0x24\00int\000\0032\0032\000\000\005", !101, !4} ; [ DW_TAG_base_type ]
-!23 = !{!"0x2e\00ggVector3\00ggVector3\00\0073\000\000\000\006\000\000\000", !9, !8, !24, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!24 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !25, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!19 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, flags: DIFlagArtificial, file: !101, scope: !4, baseType: !8)
+!20 = !DIDerivedType(tag: DW_TAG_typedef, name: "ggBoolean", line: 478, file: !100, baseType: !22)
+!21 = !DIFile(filename: "math.h", directory: "/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.Internal.sdk/usr/include/architecture/arm")
+!100 = !DIFile(filename: "math.h", directory: "/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.Internal.sdk/usr/include/architecture/arm")
+!22 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!23 = !DISubprogram(name: "ggVector3", line: 73, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !24)
+!24 = !DISubroutineType(types: !25)
!25 = !{null, !19}
-!26 = !{!"0x2e\00ggVector3\00ggVector3\00\0074\000\000\000\006\000\000\000", !9, !8, !27, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!27 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !28, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!26 = !DISubprogram(name: "ggVector3", line: 74, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !27)
+!27 = !DISubroutineType(types: !28)
!28 = !{null, !19, !13, !13, !13}
-!29 = !{!"0x2e\00Set\00Set\00_ZN9ggVector33SetEddd\0081\000\000\000\006\000\000\000", !9, !8, !27, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!30 = !{!"0x2e\00x\00x\00_ZNK9ggVector31xEv\0082\000\000\000\006\000\000\000", !9, !8, !31, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!31 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !32, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!29 = !DISubprogram(name: "Set", linkageName: "_ZN9ggVector33SetEddd", line: 81, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !27)
+!30 = !DISubprogram(name: "x", linkageName: "_ZNK9ggVector31xEv", line: 82, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !31)
+!31 = !DISubroutineType(types: !32)
!32 = !{!13, !33}
-!33 = !{!"0xf\00\000\0032\0032\000\0064", !101, !4, !34} ; [ DW_TAG_pointer_type ]
-!34 = !{!"0x26\00\000\00192\0032\000\000", !101, !4, !8} ; [ DW_TAG_const_type ]
-!35 = !{!"0x2e\00y\00y\00_ZNK9ggVector31yEv\0083\000\000\000\006\000\000\000", !9, !8, !31, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!36 = !{!"0x2e\00z\00z\00_ZNK9ggVector31zEv\0084\000\000\000\006\000\000\000", !9, !8, !31, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!37 = !{!"0x2e\00x\00x\00_ZN9ggVector31xEv\0085\000\001\000\006\000\000\000", !9, !8, !38, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!38 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !39, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!33 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, flags: DIFlagArtificial, file: !101, scope: !4, baseType: !34)
+!34 = !DIDerivedType(tag: DW_TAG_const_type, size: 192, align: 32, file: !101, scope: !4, baseType: !8)
+!35 = !DISubprogram(name: "y", linkageName: "_ZNK9ggVector31yEv", line: 83, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !31)
+!36 = !DISubprogram(name: "z", linkageName: "_ZNK9ggVector31zEv", line: 84, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !31)
+!37 = !DISubprogram(name: "x", linkageName: "_ZN9ggVector31xEv", line: 85, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !38)
+!38 = !DISubroutineType(types: !39)
!39 = !{!40, !19}
-!40 = !{!"0x10\00double\000\0032\0032\000\000", !101, !4, !13} ; [ DW_TAG_reference_type ]
-!41 = !{!"0x2e\00y\00y\00_ZN9ggVector31yEv\0086\000\001\000\006\000\000\000", !9, !8, !38, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!42 = !{!"0x2e\00z\00z\00_ZN9ggVector31zEv\0087\000\001\000\006\000\000\000", !9, !8, !38, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!43 = !{!"0x2e\00SetX\00SetX\00_ZN9ggVector34SetXEd\0088\000\000\000\006\000\000\000", !9, !8, !44, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!44 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !45, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!40 = !DIDerivedType(tag: DW_TAG_reference_type, name: "double", size: 32, align: 32, file: !101, scope: !4, baseType: !13)
+!41 = !DISubprogram(name: "y", linkageName: "_ZN9ggVector31yEv", line: 86, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !38)
+!42 = !DISubprogram(name: "z", linkageName: "_ZN9ggVector31zEv", line: 87, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !38)
+!43 = !DISubprogram(name: "SetX", linkageName: "_ZN9ggVector34SetXEd", line: 88, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !44)
+!44 = !DISubroutineType(types: !45)
!45 = !{null, !19, !13}
-!46 = !{!"0x2e\00SetY\00SetY\00_ZN9ggVector34SetYEd\0089\000\000\000\006\000\000\000", !9, !8, !44, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!47 = !{!"0x2e\00SetZ\00SetZ\00_ZN9ggVector34SetZEd\0090\000\000\000\006\000\000\000", !9, !8, !44, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!48 = !{!"0x2e\00ggVector3\00ggVector3\00\0092\000\000\000\006\000\000\000", !9, !8, !49, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!49 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !50, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!46 = !DISubprogram(name: "SetY", linkageName: "_ZN9ggVector34SetYEd", line: 89, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !44)
+!47 = !DISubprogram(name: "SetZ", linkageName: "_ZN9ggVector34SetZEd", line: 90, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !44)
+!48 = !DISubprogram(name: "ggVector3", line: 92, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !49)
+!49 = !DISubroutineType(types: !50)
!50 = !{null, !19, !51}
-!51 = !{!"0x10\00\000\0032\0032\000\000", !101, !4, !34} ; [ DW_TAG_reference_type ]
-!52 = !{!"0x2e\00tolerance\00tolerance\00_ZNK9ggVector39toleranceEv\00100\000\000\000\006\000\000\000", !9, !8, !31, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!53 = !{!"0x2e\00tolerance\00tolerance\00_ZN9ggVector39toleranceEv\00101\000\000\000\006\000\000\000", !9, !8, !38, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!54 = !{!"0x2e\00operator+\00operator+\00_ZNK9ggVector3psEv\00107\000\000\000\006\000\000\000", !9, !8, !55, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!55 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !56, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!51 = !DIDerivedType(tag: DW_TAG_reference_type, size: 32, align: 32, file: !101, scope: !4, baseType: !34)
+!52 = !DISubprogram(name: "tolerance", linkageName: "_ZNK9ggVector39toleranceEv", line: 100, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !31)
+!53 = !DISubprogram(name: "tolerance", linkageName: "_ZN9ggVector39toleranceEv", line: 101, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !38)
+!54 = !DISubprogram(name: "operator+", linkageName: "_ZNK9ggVector3psEv", line: 107, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !55)
+!55 = !DISubroutineType(types: !56)
!56 = !{!51, !33}
-!57 = !{!"0x2e\00operator-\00operator-\00_ZNK9ggVector3ngEv\00108\000\000\000\006\000\000\000", !9, !8, !58, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!58 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !59, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!57 = !DISubprogram(name: "operator-", linkageName: "_ZNK9ggVector3ngEv", line: 108, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !58)
+!58 = !DISubroutineType(types: !59)
!59 = !{!8, !33}
-!60 = !{!"0x2e\00operator[]\00operator[]\00_ZNK9ggVector3ixEi\00290\000\000\000\006\000\000\000", !9, !8, !61, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!61 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !62, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!60 = !DISubprogram(name: "operator[]", linkageName: "_ZNK9ggVector3ixEi", line: 290, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !61)
+!61 = !DISubroutineType(types: !62)
!62 = !{!13, !33, !22}
-!63 = !{!"0x2e\00operator[]\00operator[]\00_ZN9ggVector3ixEi\00278\000\000\000\006\000\000\000", !9, !8, !64, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!64 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !65, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!63 = !DISubprogram(name: "operator[]", linkageName: "_ZN9ggVector3ixEi", line: 278, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !64)
+!64 = !DISubroutineType(types: !65)
!65 = !{!40, !19, !22}
-!66 = !{!"0x2e\00operator+=\00operator+=\00_ZN9ggVector3pLERKS_\00303\000\000\000\006\000\000\000", !9, !8, !67, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!67 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !68, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!66 = !DISubprogram(name: "operator+=", linkageName: "_ZN9ggVector3pLERKS_", line: 303, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !67)
+!67 = !DISubroutineType(types: !68)
!68 = !{!69, !19, !51}
-!69 = !{!"0x10\00ggVector3\000\0032\0032\000\000", !101, !4, !8} ; [ DW_TAG_reference_type ]
-!70 = !{!"0x2e\00operator-=\00operator-=\00_ZN9ggVector3mIERKS_\00310\000\000\000\006\000\000\000", !9, !8, !67, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!71 = !{!"0x2e\00operator*=\00operator*=\00_ZN9ggVector3mLEd\00317\000\000\000\006\000\000\000", !9, !8, !72, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!72 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !73, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!69 = !DIDerivedType(tag: DW_TAG_reference_type, name: "ggVector3", size: 32, align: 32, file: !101, scope: !4, baseType: !8)
+!70 = !DISubprogram(name: "operator-=", linkageName: "_ZN9ggVector3mIERKS_", line: 310, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !67)
+!71 = !DISubprogram(name: "operator*=", linkageName: "_ZN9ggVector3mLEd", line: 317, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !72)
+!72 = !DISubroutineType(types: !73)
!73 = !{!69, !19, !13}
-!74 = !{!"0x2e\00operator/=\00operator/=\00_ZN9ggVector3dVEd\00324\000\000\000\006\000\000\000", !9, !8, !72, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!75 = !{!"0x2e\00length\00length\00_ZNK9ggVector36lengthEv\00121\000\000\000\006\000\000\000", !9, !8, !31, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!76 = !{!"0x2e\00squaredLength\00squaredLength\00_ZNK9ggVector313squaredLengthEv\00122\000\000\000\006\000\000\000", !9, !8, !31, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!77 = !{!"0x2e\00MakeUnitVector\00MakeUnitVector\00_ZN9ggVector314MakeUnitVectorEv\00217\000\001\000\006\000\000\000", !9, !8, !24, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!78 = !{!"0x2e\00Perturb\00Perturb\00_ZNK9ggVector37PerturbEdd\00126\000\000\000\006\000\000\000", !9, !8, !79, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!79 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !80, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!74 = !DISubprogram(name: "operator/=", linkageName: "_ZN9ggVector3dVEd", line: 324, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !72)
+!75 = !DISubprogram(name: "length", linkageName: "_ZNK9ggVector36lengthEv", line: 121, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !31)
+!76 = !DISubprogram(name: "squaredLength", linkageName: "_ZNK9ggVector313squaredLengthEv", line: 122, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !31)
+!77 = !DISubprogram(name: "MakeUnitVector", linkageName: "_ZN9ggVector314MakeUnitVectorEv", line: 217, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !24)
+!78 = !DISubprogram(name: "Perturb", linkageName: "_ZNK9ggVector37PerturbEdd", line: 126, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !79)
+!79 = !DISubroutineType(types: !80)
!80 = !{!8, !33, !13, !13}
-!81 = !{!"0x2e\00maxComponent\00maxComponent\00_ZNK9ggVector312maxComponentEv\00128\000\000\000\006\000\000\000", !9, !8, !31, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!82 = !{!"0x2e\00minComponent\00minComponent\00_ZNK9ggVector312minComponentEv\00129\000\000\000\006\000\000\000", !9, !8, !31, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!83 = !{!"0x2e\00maxAbsComponent\00maxAbsComponent\00_ZNK9ggVector315maxAbsComponentEv\00131\000\000\000\006\000\000\000", !9, !8, !31, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!84 = !{!"0x2e\00minAbsComponent\00minAbsComponent\00_ZNK9ggVector315minAbsComponentEv\00132\000\000\000\006\000\000\000", !9, !8, !31, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!85 = !{!"0x2e\00indexOfMinComponent\00indexOfMinComponent\00_ZNK9ggVector319indexOfMinComponentEv\00133\000\000\000\006\000\000\000", !9, !8, !86, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!86 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !87, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!81 = !DISubprogram(name: "maxComponent", linkageName: "_ZNK9ggVector312maxComponentEv", line: 128, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !31)
+!82 = !DISubprogram(name: "minComponent", linkageName: "_ZNK9ggVector312minComponentEv", line: 129, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !31)
+!83 = !DISubprogram(name: "maxAbsComponent", linkageName: "_ZNK9ggVector315maxAbsComponentEv", line: 131, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !31)
+!84 = !DISubprogram(name: "minAbsComponent", linkageName: "_ZNK9ggVector315minAbsComponentEv", line: 132, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !31)
+!85 = !DISubprogram(name: "indexOfMinComponent", linkageName: "_ZNK9ggVector319indexOfMinComponentEv", line: 133, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !86)
+!86 = !DISubroutineType(types: !87)
!87 = !{!22, !33}
-!88 = !{!"0x2e\00indexOfMinAbsComponent\00indexOfMinAbsComponent\00_ZNK9ggVector322indexOfMinAbsComponentEv\00137\000\000\000\006\000\000\000", !9, !8, !86, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!89 = !{!"0x2e\00indexOfMaxComponent\00indexOfMaxComponent\00_ZNK9ggVector319indexOfMaxComponentEv\00146\000\000\000\006\000\000\000", !9, !8, !86, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!90 = !{!"0x2e\00indexOfMaxAbsComponent\00indexOfMaxAbsComponent\00_ZNK9ggVector322indexOfMaxAbsComponentEv\00150\000\000\000\006\000\000\000", !9, !8, !86, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!91 = !{!"0x100\00vx\0046\000", !1, !4, !13} ; [ DW_TAG_auto_variable ]
-!92 = !MDLocation(line: 48, scope: !1)
-!93 = !MDLocation(line: 218, scope: !94, inlinedAt: !96)
-!94 = !{!"0xb\00217\000\000", !101, !95} ; [ DW_TAG_lexical_block ]
-!95 = !{!"0xb\00217\000\000", !101, !77} ; [ DW_TAG_lexical_block ]
-!96 = !MDLocation(line: 51, scope: !1)
-!97 = !MDLocation(line: 227, scope: !94, inlinedAt: !96)
-!98 = !MDLocation(line: 52, scope: !1)
-!101 = !{!"ggEdgeDiscrepancy.cc", !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src"}
-!102 = !{i32 0}
+!88 = !DISubprogram(name: "indexOfMinAbsComponent", linkageName: "_ZNK9ggVector322indexOfMinAbsComponentEv", line: 137, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !86)
+!89 = !DISubprogram(name: "indexOfMaxComponent", linkageName: "_ZNK9ggVector319indexOfMaxComponentEv", line: 146, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !86)
+!90 = !DISubprogram(name: "indexOfMaxAbsComponent", linkageName: "_ZNK9ggVector322indexOfMaxAbsComponentEv", line: 150, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !86)
+!91 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vx", line: 46, scope: !1, file: !4, type: !13)
+!92 = !DILocation(line: 48, scope: !1)
+!93 = !DILocation(line: 218, scope: !94, inlinedAt: !96)
+!94 = distinct !DILexicalBlock(line: 217, column: 0, file: !101, scope: !95)
+!95 = distinct !DILexicalBlock(line: 217, column: 0, file: !101, scope: !77)
+!96 = !DILocation(line: 51, scope: !1)
+!97 = !DILocation(line: 227, scope: !94, inlinedAt: !96)
+!98 = !DILocation(line: 52, scope: !1)
+!101 = !DIFile(filename: "ggEdgeDiscrepancy.cc", directory: "/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src")
+!102 = !{}
!103 = !{!3, !77}
-!104 = !{i32 1, !"Debug Info Version", i32 2}
+!104 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll b/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
index ed55bb5dcf89..d8e165145bd6 100644
--- a/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
+++ b/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
@@ -23,32 +23,32 @@ entry:
br label %do.body
do.body: ; preds = %entry
- %tmp = load i8** @kkkkkk, align 4
- %tmp1 = load %struct.MMMMMMMMMMMM** %aidData.addr
- %eph = getelementptr inbounds %struct.MMMMMMMMMMMM* %tmp1, i32 0, i32 0
- %arrayidx = getelementptr inbounds [4 x %struct.RRRRRRRR]* %eph, i32 0, i32 0
+ %tmp = load i8*, i8** @kkkkkk, align 4
+ %tmp1 = load %struct.MMMMMMMMMMMM*, %struct.MMMMMMMMMMMM** %aidData.addr
+ %eph = getelementptr inbounds %struct.MMMMMMMMMMMM, %struct.MMMMMMMMMMMM* %tmp1, i32 0, i32 0
+ %arrayidx = getelementptr inbounds [4 x %struct.RRRRRRRR], [4 x %struct.RRRRRRRR]* %eph, i32 0, i32 0
%tmp2 = bitcast %struct.RRRRRRRR* %agg.tmp to i8*
%tmp3 = bitcast %struct.RRRRRRRR* %arrayidx to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 312, i32 4, i1 false)
- %tmp5 = load %struct.MMMMMMMMMMMM** %aidData.addr
- %eph6 = getelementptr inbounds %struct.MMMMMMMMMMMM* %tmp5, i32 0, i32 0
- %arrayidx7 = getelementptr inbounds [4 x %struct.RRRRRRRR]* %eph6, i32 0, i32 1
+ %tmp5 = load %struct.MMMMMMMMMMMM*, %struct.MMMMMMMMMMMM** %aidData.addr
+ %eph6 = getelementptr inbounds %struct.MMMMMMMMMMMM, %struct.MMMMMMMMMMMM* %tmp5, i32 0, i32 0
+ %arrayidx7 = getelementptr inbounds [4 x %struct.RRRRRRRR], [4 x %struct.RRRRRRRR]* %eph6, i32 0, i32 1
%tmp8 = bitcast %struct.RRRRRRRR* %agg.tmp4 to i8*
%tmp9 = bitcast %struct.RRRRRRRR* %arrayidx7 to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp8, i8* %tmp9, i32 312, i32 4, i1 false)
- %tmp11 = load %struct.MMMMMMMMMMMM** %aidData.addr
- %eph12 = getelementptr inbounds %struct.MMMMMMMMMMMM* %tmp11, i32 0, i32 0
- %arrayidx13 = getelementptr inbounds [4 x %struct.RRRRRRRR]* %eph12, i32 0, i32 2
+ %tmp11 = load %struct.MMMMMMMMMMMM*, %struct.MMMMMMMMMMMM** %aidData.addr
+ %eph12 = getelementptr inbounds %struct.MMMMMMMMMMMM, %struct.MMMMMMMMMMMM* %tmp11, i32 0, i32 0
+ %arrayidx13 = getelementptr inbounds [4 x %struct.RRRRRRRR], [4 x %struct.RRRRRRRR]* %eph12, i32 0, i32 2
%tmp14 = bitcast %struct.RRRRRRRR* %agg.tmp10 to i8*
%tmp15 = bitcast %struct.RRRRRRRR* %arrayidx13 to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp14, i8* %tmp15, i32 312, i32 4, i1 false)
- %tmp17 = load %struct.MMMMMMMMMMMM** %aidData.addr
- %eph18 = getelementptr inbounds %struct.MMMMMMMMMMMM* %tmp17, i32 0, i32 0
- %arrayidx19 = getelementptr inbounds [4 x %struct.RRRRRRRR]* %eph18, i32 0, i32 3
+ %tmp17 = load %struct.MMMMMMMMMMMM*, %struct.MMMMMMMMMMMM** %aidData.addr
+ %eph18 = getelementptr inbounds %struct.MMMMMMMMMMMM, %struct.MMMMMMMMMMMM* %tmp17, i32 0, i32 0
+ %arrayidx19 = getelementptr inbounds [4 x %struct.RRRRRRRR], [4 x %struct.RRRRRRRR]* %eph18, i32 0, i32 3
%tmp20 = bitcast %struct.RRRRRRRR* %agg.tmp16 to i8*
%tmp21 = bitcast %struct.RRRRRRRR* %arrayidx19 to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp20, i8* %tmp21, i32 312, i32 4, i1 false)
- call void (i8*, i32, i8*, i8*, ...)* @CLLoggingLog(i8* %tmp, i32 2, i8* getelementptr inbounds ([62 x i8]* @__PRETTY_FUNCTION__._ZN12CLGll, i32 0, i32 0), i8* getelementptr inbounds ([75 x i8]* @.str, i32 0, i32 0), %struct.RRRRRRRR* byval %agg.tmp, %struct.RRRRRRRR* byval %agg.tmp4, %struct.RRRRRRRR* byval %agg.tmp10, %struct.RRRRRRRR* byval %agg.tmp16)
+ call void (i8*, i32, i8*, i8*, ...) @CLLoggingLog(i8* %tmp, i32 2, i8* getelementptr inbounds ([62 x i8], [62 x i8]* @__PRETTY_FUNCTION__._ZN12CLGll, i32 0, i32 0), i8* getelementptr inbounds ([75 x i8], [75 x i8]* @.str, i32 0, i32 0), %struct.RRRRRRRR* byval %agg.tmp, %struct.RRRRRRRR* byval %agg.tmp4, %struct.RRRRRRRR* byval %agg.tmp10, %struct.RRRRRRRR* byval %agg.tmp16)
br label %do.end
do.end: ; preds = %do.body
diff --git a/test/CodeGen/Thumb/2011-06-16-NoGPRs.ll b/test/CodeGen/Thumb/2011-06-16-NoGPRs.ll
index d39a76085af2..accb82c18e70 100644
--- a/test/CodeGen/Thumb/2011-06-16-NoGPRs.ll
+++ b/test/CodeGen/Thumb/2011-06-16-NoGPRs.ll
@@ -16,7 +16,7 @@ declare i8* @f2(i8*, i8*, ...)
define internal void @f(i8* %self, i8* %_cmd, %0* %inObjects, %0* %inIndexes) optsize ssp {
entry:
- %call14 = tail call i8* (i8*, i8*, ...)* (i8*, i8*)* @f1(i8* undef, i8* %_cmd) optsize
+ %call14 = tail call i8* (i8*, i8*, ...)* (i8*, i8*) @f1(i8* undef, i8* %_cmd) optsize
%0 = bitcast i8* (i8*, i8*, ...)* %call14 to void (i8*, i8*, %0*, %0*)*
tail call void %0(i8* %self, i8* %_cmd, %0* %inObjects, %0* %inIndexes) optsize
tail call void bitcast (i8* (i8*, i8*, ...)* @f2 to void (i8*, i8*, i32, %0*, %0*)*)(i8* %self, i8* undef, i32 2, %0* %inIndexes, %0* undef) optsize
diff --git a/test/CodeGen/Thumb/2011-EpilogueBug.ll b/test/CodeGen/Thumb/2011-EpilogueBug.ll
index 16789e66cc18..9657dc7e0abc 100644
--- a/test/CodeGen/Thumb/2011-EpilogueBug.ll
+++ b/test/CodeGen/Thumb/2011-EpilogueBug.ll
@@ -6,7 +6,7 @@
define void @t1(%struct.state* %v) {
; CHECK: push {r4
- %tmp6 = load i32* null
+ %tmp6 = load i32, i32* null
%tmp8 = alloca float, i32 %tmp6
store i32 1, i32* null
br label %return
diff --git a/test/CodeGen/Thumb/2014-06-10-thumb1-ldst-opt-bug.ll b/test/CodeGen/Thumb/2014-06-10-thumb1-ldst-opt-bug.ll
index cfa1159bda2c..d5c8db76de86 100644
--- a/test/CodeGen/Thumb/2014-06-10-thumb1-ldst-opt-bug.ll
+++ b/test/CodeGen/Thumb/2014-06-10-thumb1-ldst-opt-bug.ll
@@ -7,9 +7,9 @@ entry:
; CHECK: ldm
; CHECK-NEXT: subs
; CHECK-NEXT: bl
- %0 = load i32* %A, align 4
- %arrayidx1 = getelementptr inbounds i32* %A, i32 1
- %1 = load i32* %arrayidx1, align 4
+ %0 = load i32, i32* %A, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %A, i32 1
+ %1 = load i32, i32* %arrayidx1, align 4
tail call void @bar(i32* %A, i32 %0, i32 %1) #2
ret void
}
diff --git a/test/CodeGen/Thumb/PR17309.ll b/test/CodeGen/Thumb/PR17309.ll
index b7b08e941898..f1033e7d7418 100644
--- a/test/CodeGen/Thumb/PR17309.ll
+++ b/test/CodeGen/Thumb/PR17309.ll
@@ -10,7 +10,7 @@
define void @pass_C() #0 {
entry:
%c = alloca %struct.C, align 1
- %0 = getelementptr inbounds %struct.C* %c, i32 0, i32 0, i32 0
+ %0 = getelementptr inbounds %struct.C, %struct.C* %c, i32 0, i32 0, i32 0
call void @llvm.lifetime.start(i64 1000, i8* %0) #1
call void @use_C(%struct.C* byval %c) #3
call void @llvm.lifetime.end(i64 1000, i8* %0) #1
diff --git a/test/CodeGen/Thumb/asmprinter-bug.ll b/test/CodeGen/Thumb/asmprinter-bug.ll
index 18e11baf444c..e12fcb161a4a 100644
--- a/test/CodeGen/Thumb/asmprinter-bug.ll
+++ b/test/CodeGen/Thumb/asmprinter-bug.ll
@@ -15,14 +15,14 @@
define void @adpcm_coder(i16* nocapture %indata, i8* nocapture %outdata, i32 %len, %struct.adpcm_state* nocapture %state) nounwind {
entry:
- %0 = getelementptr %struct.adpcm_state* %state, i32 0, i32 0 ; <i16*> [#uses=2]
- %1 = load i16* %0, align 2 ; <i16> [#uses=1]
+ %0 = getelementptr %struct.adpcm_state, %struct.adpcm_state* %state, i32 0, i32 0 ; <i16*> [#uses=2]
+ %1 = load i16, i16* %0, align 2 ; <i16> [#uses=1]
%2 = sext i16 %1 to i32 ; <i32> [#uses=2]
- %3 = getelementptr %struct.adpcm_state* %state, i32 0, i32 1 ; <i8*> [#uses=2]
- %4 = load i8* %3, align 2 ; <i8> [#uses=1]
+ %3 = getelementptr %struct.adpcm_state, %struct.adpcm_state* %state, i32 0, i32 1 ; <i8*> [#uses=2]
+ %4 = load i8, i8* %3, align 2 ; <i8> [#uses=1]
%5 = sext i8 %4 to i32 ; <i32> [#uses=3]
- %6 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %5 ; <i32*> [#uses=1]
- %7 = load i32* %6, align 4 ; <i32> [#uses=1]
+ %6 = getelementptr [89 x i32], [89 x i32]* @stepsizeTable, i32 0, i32 %5 ; <i32*> [#uses=1]
+ %7 = load i32, i32* %6, align 4 ; <i32> [#uses=1]
%8 = icmp sgt i32 %len, 0 ; <i1> [#uses=1]
br i1 %8, label %bb, label %bb27
@@ -34,8 +34,8 @@ bb: ; preds = %bb25, %entry
%index.033 = phi i32 [ %5, %entry ], [ %index.2, %bb25 ] ; <i32> [#uses=1]
%valpred.132 = phi i32 [ %2, %entry ], [ %valpred.2, %bb25 ] ; <i32> [#uses=2]
%step.031 = phi i32 [ %7, %entry ], [ %36, %bb25 ] ; <i32> [#uses=5]
- %inp.038 = getelementptr i16* %indata, i32 %indvar ; <i16*> [#uses=1]
- %9 = load i16* %inp.038, align 2 ; <i16> [#uses=1]
+ %inp.038 = getelementptr i16, i16* %indata, i32 %indvar ; <i16*> [#uses=1]
+ %9 = load i16, i16* %inp.038, align 2 ; <i16> [#uses=1]
%10 = sext i16 %9 to i32 ; <i32> [#uses=1]
%11 = sub i32 %10, %valpred.132 ; <i32> [#uses=3]
%12 = icmp slt i32 %11, 0 ; <i1> [#uses=1]
@@ -79,15 +79,15 @@ bb18: ; preds = %bb17, %bb16, %bb
%delta.1 = or i32 %21, %iftmp.1.0 ; <i32> [#uses=1]
%delta.2 = or i32 %delta.1, %25 ; <i32> [#uses=1]
%29 = xor i32 %delta.2, 1 ; <i32> [#uses=3]
- %30 = getelementptr [16 x i32]* @indexTable, i32 0, i32 %29 ; <i32*> [#uses=1]
- %31 = load i32* %30, align 4 ; <i32> [#uses=1]
+ %30 = getelementptr [16 x i32], [16 x i32]* @indexTable, i32 0, i32 %29 ; <i32*> [#uses=1]
+ %31 = load i32, i32* %30, align 4 ; <i32> [#uses=1]
%32 = add i32 %31, %index.033 ; <i32> [#uses=2]
%33 = icmp slt i32 %32, 0 ; <i1> [#uses=1]
%index.1 = select i1 %33, i32 0, i32 %32 ; <i32> [#uses=2]
%34 = icmp sgt i32 %index.1, 88 ; <i1> [#uses=1]
%index.2 = select i1 %34, i32 88, i32 %index.1 ; <i32> [#uses=3]
- %35 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %index.2 ; <i32*> [#uses=1]
- %36 = load i32* %35, align 4 ; <i32> [#uses=1]
+ %35 = getelementptr [89 x i32], [89 x i32]* @stepsizeTable, i32 0, i32 %index.2 ; <i32*> [#uses=1]
+ %36 = load i32, i32* %35, align 4 ; <i32> [#uses=1]
%37 = icmp eq i32 %bufferstep.035, 0 ; <i1> [#uses=1]
br i1 %37, label %bb24, label %bb23
@@ -102,7 +102,7 @@ bb24: ; preds = %bb18
%42 = trunc i32 %outputbuffer.134 to i8 ; <i8> [#uses=1]
%43 = or i8 %41, %42 ; <i8> [#uses=1]
store i8 %43, i8* %outp.136, align 1
- %44 = getelementptr i8* %outp.136, i32 1 ; <i8*> [#uses=1]
+ %44 = getelementptr i8, i8* %outp.136, i32 1 ; <i8*> [#uses=1]
br label %bb25
bb25: ; preds = %bb24, %bb23
@@ -140,14 +140,14 @@ bb29: ; preds = %bb28, %bb27
define void @adpcm_decoder(i8* nocapture %indata, i16* nocapture %outdata, i32 %len, %struct.adpcm_state* nocapture %state) nounwind {
entry:
- %0 = getelementptr %struct.adpcm_state* %state, i32 0, i32 0 ; <i16*> [#uses=2]
- %1 = load i16* %0, align 2 ; <i16> [#uses=1]
+ %0 = getelementptr %struct.adpcm_state, %struct.adpcm_state* %state, i32 0, i32 0 ; <i16*> [#uses=2]
+ %1 = load i16, i16* %0, align 2 ; <i16> [#uses=1]
%2 = sext i16 %1 to i32 ; <i32> [#uses=2]
- %3 = getelementptr %struct.adpcm_state* %state, i32 0, i32 1 ; <i8*> [#uses=2]
- %4 = load i8* %3, align 2 ; <i8> [#uses=1]
+ %3 = getelementptr %struct.adpcm_state, %struct.adpcm_state* %state, i32 0, i32 1 ; <i8*> [#uses=2]
+ %4 = load i8, i8* %3, align 2 ; <i8> [#uses=1]
%5 = sext i8 %4 to i32 ; <i32> [#uses=3]
- %6 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %5 ; <i32*> [#uses=1]
- %7 = load i32* %6, align 4 ; <i32> [#uses=1]
+ %6 = getelementptr [89 x i32], [89 x i32]* @stepsizeTable, i32 0, i32 %5 ; <i32*> [#uses=1]
+ %7 = load i32, i32* %6, align 4 ; <i32> [#uses=1]
%8 = icmp sgt i32 %len, 0 ; <i1> [#uses=1]
br i1 %8, label %bb, label %bb22
@@ -159,14 +159,14 @@ bb: ; preds = %bb20, %entry
%index.026 = phi i32 [ %5, %entry ], [ %index.2, %bb20 ] ; <i32> [#uses=1]
%valpred.125 = phi i32 [ %2, %entry ], [ %valpred.2, %bb20 ] ; <i32> [#uses=1]
%step.024 = phi i32 [ %7, %entry ], [ %35, %bb20 ] ; <i32> [#uses=4]
- %outp.030 = getelementptr i16* %outdata, i32 %indvar ; <i16*> [#uses=1]
+ %outp.030 = getelementptr i16, i16* %outdata, i32 %indvar ; <i16*> [#uses=1]
%9 = icmp eq i32 %bufferstep.028, 0 ; <i1> [#uses=1]
br i1 %9, label %bb2, label %bb3
bb2: ; preds = %bb
- %10 = load i8* %inp.131, align 1 ; <i8> [#uses=1]
+ %10 = load i8, i8* %inp.131, align 1 ; <i8> [#uses=1]
%11 = sext i8 %10 to i32 ; <i32> [#uses=2]
- %12 = getelementptr i8* %inp.131, i32 1 ; <i8*> [#uses=1]
+ %12 = getelementptr i8, i8* %inp.131, i32 1 ; <i8*> [#uses=1]
%13 = ashr i32 %11, 4 ; <i32> [#uses=1]
br label %bb3
@@ -176,8 +176,8 @@ bb3: ; preds = %bb2, %bb
%inp.0 = phi i8* [ %12, %bb2 ], [ %inp.131, %bb ] ; <i8*> [#uses=1]
%delta.0 = and i32 %delta.0.in, 15 ; <i32> [#uses=1]
%tmp = xor i32 %bufferstep.028, 1 ; <i32> [#uses=1]
- %14 = getelementptr [16 x i32]* @indexTable, i32 0, i32 %delta.0 ; <i32*> [#uses=1]
- %15 = load i32* %14, align 4 ; <i32> [#uses=1]
+ %14 = getelementptr [16 x i32], [16 x i32]* @indexTable, i32 0, i32 %delta.0 ; <i32*> [#uses=1]
+ %15 = load i32, i32* %14, align 4 ; <i32> [#uses=1]
%16 = add i32 %15, %index.026 ; <i32> [#uses=2]
%17 = icmp slt i32 %16, 0 ; <i1> [#uses=1]
%index.1 = select i1 %17, i32 0, i32 %16 ; <i32> [#uses=2]
@@ -227,8 +227,8 @@ bb19: ; preds = %bb18
bb20: ; preds = %bb19, %bb18, %bb13
%valpred.2 = phi i32 [ -32768, %bb19 ], [ 32767, %bb13 ], [ %valpred.0, %bb18 ] ; <i32> [#uses=3]
- %34 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %index.2 ; <i32*> [#uses=1]
- %35 = load i32* %34, align 4 ; <i32> [#uses=1]
+ %34 = getelementptr [89 x i32], [89 x i32]* @stepsizeTable, i32 0, i32 %index.2 ; <i32*> [#uses=1]
+ %35 = load i32, i32* %34, align 4 ; <i32> [#uses=1]
%36 = trunc i32 %valpred.2 to i16 ; <i16> [#uses=1]
store i16 %36, i16* %outp.030, align 2
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
@@ -250,12 +250,12 @@ entry:
br label %bb
bb: ; preds = %bb3, %entry
- %0 = tail call i32 (...)* @read(i32 0, i8* getelementptr ([500 x i8]* @abuf, i32 0, i32 0), i32 500) nounwind ; <i32> [#uses=4]
+ %0 = tail call i32 (...) @read(i32 0, i8* getelementptr ([500 x i8], [500 x i8]* @abuf, i32 0, i32 0), i32 500) nounwind ; <i32> [#uses=4]
%1 = icmp slt i32 %0, 0 ; <i1> [#uses=1]
br i1 %1, label %bb1, label %bb2
bb1: ; preds = %bb
- tail call void @perror(i8* getelementptr ([11 x i8]* @.str, i32 0, i32 0)) nounwind
+ tail call void @perror(i8* getelementptr ([11 x i8], [11 x i8]* @.str, i32 0, i32 0)) nounwind
ret i32 1
bb2: ; preds = %bb
@@ -264,18 +264,18 @@ bb2: ; preds = %bb
bb3: ; preds = %bb2
%3 = shl i32 %0, 1 ; <i32> [#uses=1]
- tail call void @adpcm_decoder(i8* getelementptr ([500 x i8]* @abuf, i32 0, i32 0), i16* getelementptr ([1000 x i16]* @sbuf, i32 0, i32 0), i32 %3, %struct.adpcm_state* @state) nounwind
+ tail call void @adpcm_decoder(i8* getelementptr ([500 x i8], [500 x i8]* @abuf, i32 0, i32 0), i16* getelementptr ([1000 x i16], [1000 x i16]* @sbuf, i32 0, i32 0), i32 %3, %struct.adpcm_state* @state) nounwind
%4 = shl i32 %0, 2 ; <i32> [#uses=1]
- %5 = tail call i32 (...)* @write(i32 1, i16* getelementptr ([1000 x i16]* @sbuf, i32 0, i32 0), i32 %4) nounwind ; <i32> [#uses=0]
+ %5 = tail call i32 (...) @write(i32 1, i16* getelementptr ([1000 x i16], [1000 x i16]* @sbuf, i32 0, i32 0), i32 %4) nounwind ; <i32> [#uses=0]
br label %bb
bb4: ; preds = %bb2
- %6 = load %struct.FILE** @__stderrp, align 4 ; <%struct.FILE*> [#uses=1]
- %7 = load i16* getelementptr (%struct.adpcm_state* @state, i32 0, i32 0), align 4 ; <i16> [#uses=1]
+ %6 = load %struct.FILE*, %struct.FILE** @__stderrp, align 4 ; <%struct.FILE*> [#uses=1]
+ %7 = load i16, i16* getelementptr (%struct.adpcm_state, %struct.adpcm_state* @state, i32 0, i32 0), align 4 ; <i16> [#uses=1]
%8 = sext i16 %7 to i32 ; <i32> [#uses=1]
- %9 = load i8* getelementptr (%struct.adpcm_state* @state, i32 0, i32 1), align 2 ; <i8> [#uses=1]
+ %9 = load i8, i8* getelementptr (%struct.adpcm_state, %struct.adpcm_state* @state, i32 0, i32 1), align 2 ; <i8> [#uses=1]
%10 = sext i8 %9 to i32 ; <i32> [#uses=1]
- %11 = tail call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %6, i8* getelementptr ([28 x i8]* @.str1, i32 0, i32 0), i32 %8, i32 %10) nounwind ; <i32> [#uses=0]
+ %11 = tail call i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* %6, i8* getelementptr ([28 x i8], [28 x i8]* @.str1, i32 0, i32 0), i32 %8, i32 %10) nounwind ; <i32> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/Thumb/cortex-m0-unaligned-access.ll b/test/CodeGen/Thumb/cortex-m0-unaligned-access.ll
index c4403fe7efe6..cba1ca68569f 100644
--- a/test/CodeGen/Thumb/cortex-m0-unaligned-access.ll
+++ b/test/CodeGen/Thumb/cortex-m0-unaligned-access.ll
@@ -8,6 +8,6 @@ define i32 @split_load(i32* %p) nounwind {
; V7M-LABEL: split_load
; V7M-NOT: ldrh
; V7M: bx lr
- %val = load i32* %p, align 2
+ %val = load i32, i32* %p, align 2
ret i32 %val
}
diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll
index 309d80217c11..c94c904e4cd2 100644
--- a/test/CodeGen/Thumb/dyn-stackalloc.ll
+++ b/test/CodeGen/Thumb/dyn-stackalloc.ll
@@ -12,7 +12,7 @@ define void @t1(%struct.state* %v) {
; CHECK: mov r[[R1:[0-9]+]], sp
; CHECK: subs r[[R2:[0-9]+]], r[[R1]], r[[R0]]
; CHECK: mov sp, r[[R2]]
- %tmp6 = load i32* null
+ %tmp6 = load i32, i32* null
%tmp8 = alloca float, i32 %tmp6
store i32 1, i32* null
br i1 false, label %bb123.preheader, label %return
@@ -22,8 +22,8 @@ bb123.preheader:
bb43:
call fastcc void @f1( float* %tmp8, float* null, i32 0 )
- %tmp70 = load i32* null
- %tmp85 = getelementptr float* %tmp8, i32 0
+ %tmp70 = load i32, i32* null
+ %tmp85 = getelementptr float, float* %tmp8, i32 0
call fastcc void @f2( float* null, float* null, float* %tmp85, i32 %tmp70 )
ret void
@@ -60,8 +60,8 @@ define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
%tmp6 = alloca i8, i32 %tmp5
%tmp9 = call i8* @strcpy( i8* %tmp6, i8* %tag )
%tmp6.len = call i32 @strlen( i8* %tmp6 )
- %tmp6.indexed = getelementptr i8* %tmp6, i32 %tmp6.len
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp6.indexed, i8* getelementptr inbounds ([2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1, i1 false)
+ %tmp6.indexed = getelementptr i8, i8* %tmp6, i32 %tmp6.len
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp6.indexed, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1, i1 false)
%tmp15 = call i8* @strcat( i8* %tmp6, i8* %contents )
call fastcc void @comment_add( %struct.comment* %vc, i8* %tmp6 )
ret void
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
index 269bdd938d0a..0d534589ae0a 100644
--- a/test/CodeGen/Thumb/large-stack.ll
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -56,7 +56,7 @@ define i32 @test3() {
%tmp = alloca i32, align 4
%a = alloca [805306369 x i8], align 16
store i32 0, i32* %tmp
- %tmp1 = load i32* %tmp
+ %tmp1 = load i32, i32* %tmp
ret i32 %tmp1
}
diff --git a/test/CodeGen/Thumb/ldm-merge-call.ll b/test/CodeGen/Thumb/ldm-merge-call.ll
index febc96b07ce6..eb7852934726 100644
--- a/test/CodeGen/Thumb/ldm-merge-call.ll
+++ b/test/CodeGen/Thumb/ldm-merge-call.ll
@@ -8,9 +8,9 @@ entry:
; CHECK-LABEL: foo:
; CHECK: ldm r[[BASE:[0-9]]]!,
; CHECK-NEXT: mov r[[BASE]],
- %0 = load i32* %A, align 4
- %arrayidx1 = getelementptr inbounds i32* %A, i32 1
- %1 = load i32* %arrayidx1, align 4
+ %0 = load i32, i32* %A, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %A, i32 1
+ %1 = load i32, i32* %arrayidx1, align 4
%call = tail call i32 @bar(i32 %0, i32 %1, i32 %0, i32 %1) #2
%call2 = tail call i32 @bar(i32 %0, i32 %1, i32 %0, i32 %1) #2
ret void
diff --git a/test/CodeGen/Thumb/ldm-merge-struct.ll b/test/CodeGen/Thumb/ldm-merge-struct.ll
index 2f732e00bf74..9815a9e505f1 100644
--- a/test/CodeGen/Thumb/ldm-merge-struct.ll
+++ b/test/CodeGen/Thumb/ldm-merge-struct.ll
@@ -11,8 +11,8 @@ entry:
; CHECK-LABEL: f:
; CHECK: ldm r[[BASE:[0-9]]],
; CHECK-NEXT-NOT: subs r[[BASE]]
- %0 = load i32* getelementptr inbounds (%struct.S* @s, i32 0, i32 0), align 4
- %1 = load i32* getelementptr inbounds (%struct.S* @s, i32 0, i32 1), align 4
+ %0 = load i32, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 0), align 4
+ %1 = load i32, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 1), align 4
%cmp = icmp sgt i32 %0, %1
%2 = sub i32 0, %1
%cond.p = select i1 %cmp, i32 %1, i32 %2
diff --git a/test/CodeGen/Thumb/ldm-stm-base-materialization.ll b/test/CodeGen/Thumb/ldm-stm-base-materialization.ll
index 6382c25b60fe..916e5ea299a3 100644
--- a/test/CodeGen/Thumb/ldm-stm-base-materialization.ll
+++ b/test/CodeGen/Thumb/ldm-stm-base-materialization.ll
@@ -15,11 +15,11 @@ entry:
; CHECK-NEXT: ldm r[[NLB]],
; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4
; CHECK-NEXT: stm r[[NSB]]
- %0 = load i32** @a, align 4
- %arrayidx = getelementptr inbounds i32* %0, i32 1
+ %0 = load i32*, i32** @a, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
%1 = bitcast i32* %arrayidx to i8*
- %2 = load i32** @b, align 4
- %arrayidx1 = getelementptr inbounds i32* %2, i32 1
+ %2 = load i32*, i32** @b, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
%3 = bitcast i32* %arrayidx1 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 24, i32 4, i1 false)
ret void
diff --git a/test/CodeGen/Thumb/ldr_ext.ll b/test/CodeGen/Thumb/ldr_ext.ll
index 2d25af35b513..90194aecec97 100644
--- a/test/CodeGen/Thumb/ldr_ext.ll
+++ b/test/CodeGen/Thumb/ldr_ext.ll
@@ -7,7 +7,7 @@ define i32 @test1(i8* %t1) nounwind {
; V5: ldrb
; V6: ldrb
- %tmp.u = load i8* %t1
+ %tmp.u = load i8, i8* %t1
%tmp1.s = zext i8 %tmp.u to i32
ret i32 %tmp1.s
}
@@ -16,7 +16,7 @@ define i32 @test2(i16* %t1) nounwind {
; V5: ldrh
; V6: ldrh
- %tmp.u = load i16* %t1
+ %tmp.u = load i16, i16* %t1
%tmp1.s = zext i16 %tmp.u to i32
ret i32 %tmp1.s
}
@@ -28,7 +28,7 @@ define i32 @test3(i8* %t0) nounwind {
; V6: ldrb
; V6: sxtb
- %tmp.s = load i8* %t0
+ %tmp.s = load i8, i8* %t0
%tmp1.s = sext i8 %tmp.s to i32
ret i32 %tmp1.s
}
@@ -40,7 +40,7 @@ define i32 @test4(i16* %t0) nounwind {
; V6: ldrh
; V6: sxth
- %tmp.s = load i16* %t0
+ %tmp.s = load i16, i16* %t0
%tmp1.s = sext i16 %tmp.s to i32
ret i32 %tmp1.s
}
@@ -51,7 +51,7 @@ define i32 @test5() nounwind {
; V6: movs r0, #0
; V6: ldrsh
- %tmp.s = load i16* null
+ %tmp.s = load i16, i16* null
%tmp1.s = sext i16 %tmp.s to i32
ret i32 %tmp1.s
}
diff --git a/test/CodeGen/Thumb/ldr_frame.ll b/test/CodeGen/Thumb/ldr_frame.ll
index 0e879d7379a8..fdcf3b7678e5 100644
--- a/test/CodeGen/Thumb/ldr_frame.ll
+++ b/test/CodeGen/Thumb/ldr_frame.ll
@@ -4,8 +4,8 @@ define i32 @f1() {
; CHECK-LABEL: f1:
; CHECK: ldr r0
%buf = alloca [32 x i32], align 4
- %tmp = getelementptr [32 x i32]* %buf, i32 0, i32 0
- %tmp1 = load i32* %tmp
+ %tmp = getelementptr [32 x i32], [32 x i32]* %buf, i32 0, i32 0
+ %tmp1 = load i32, i32* %tmp
ret i32 %tmp1
}
@@ -14,8 +14,8 @@ define i32 @f2() {
; CHECK: mov r0
; CHECK: ldrb
%buf = alloca [32 x i8], align 4
- %tmp = getelementptr [32 x i8]* %buf, i32 0, i32 0
- %tmp1 = load i8* %tmp
+ %tmp = getelementptr [32 x i8], [32 x i8]* %buf, i32 0, i32 0
+ %tmp1 = load i8, i8* %tmp
%tmp2 = zext i8 %tmp1 to i32
ret i32 %tmp2
}
@@ -24,8 +24,8 @@ define i32 @f3() {
; CHECK-LABEL: f3:
; CHECK: ldr r0
%buf = alloca [32 x i32], align 4
- %tmp = getelementptr [32 x i32]* %buf, i32 0, i32 32
- %tmp1 = load i32* %tmp
+ %tmp = getelementptr [32 x i32], [32 x i32]* %buf, i32 0, i32 32
+ %tmp1 = load i32, i32* %tmp
ret i32 %tmp1
}
@@ -34,8 +34,8 @@ define i32 @f4() {
; CHECK: mov r0
; CHECK: ldrb
%buf = alloca [32 x i8], align 4
- %tmp = getelementptr [32 x i8]* %buf, i32 0, i32 2
- %tmp1 = load i8* %tmp
+ %tmp = getelementptr [32 x i8], [32 x i8]* %buf, i32 0, i32 2
+ %tmp1 = load i8, i8* %tmp
%tmp2 = zext i8 %tmp1 to i32
ret i32 %tmp2
}
diff --git a/test/CodeGen/Thumb/long.ll b/test/CodeGen/Thumb/long.ll
index 2449e5ad6777..33f63892ec3f 100644
--- a/test/CodeGen/Thumb/long.ll
+++ b/test/CodeGen/Thumb/long.ll
@@ -65,7 +65,7 @@ entry:
define i64 @f10() {
entry:
%a = alloca i64, align 8 ; <i64*> [#uses=1]
- %retval = load i64* %a ; <i64> [#uses=1]
+ %retval = load i64, i64* %a ; <i64> [#uses=1]
ret i64 %retval
}
diff --git a/test/CodeGen/Thumb/segmented-stacks.ll b/test/CodeGen/Thumb/segmented-stacks.ll
index d6e25c7792e8..09f5db852bf4 100644
--- a/test/CodeGen/Thumb/segmented-stacks.ll
+++ b/test/CodeGen/Thumb/segmented-stacks.ll
@@ -55,7 +55,7 @@ define void @test_basic() #0 {
}
define i32 @test_nested(i32 * nest %closure, i32 %other) #0 {
- %addend = load i32 * %closure
+ %addend = load i32 , i32 * %closure
%result = add i32 %other, %addend
%mem = alloca i32, i32 10
call void @dummy_use (i32* %mem, i32 10)
diff --git a/test/CodeGen/Thumb/stack-access.ll b/test/CodeGen/Thumb/stack-access.ll
new file mode 100644
index 000000000000..fded4104207c
--- /dev/null
+++ b/test/CodeGen/Thumb/stack-access.ll
@@ -0,0 +1,127 @@
+; RUN: llc -mtriple=thumb-eabi < %s -o - | FileCheck %s
+
+; Check that stack addresses are generated using a single ADD
+define void @test1(i8** %p) {
+ %x = alloca i8, align 1
+ %y = alloca i8, align 1
+ %z = alloca i8, align 1
+; CHECK: add r1, sp, #8
+; CHECK: str r1, [r0]
+ store i8* %x, i8** %p, align 4
+; CHECK: add r1, sp, #4
+; CHECK: str r1, [r0]
+ store i8* %y, i8** %p, align 4
+; CHECK: mov r1, sp
+; CHECK: str r1, [r0]
+ store i8* %z, i8** %p, align 4
+ ret void
+}
+
+; Stack offsets larger than 1020 still need two ADDs
+define void @test2([1024 x i8]** %p) {
+ %arr1 = alloca [1024 x i8], align 1
+ %arr2 = alloca [1024 x i8], align 1
+; CHECK: add r1, sp, #1020
+; CHECK: adds r1, #4
+; CHECK: str r1, [r0]
+ store [1024 x i8]* %arr1, [1024 x i8]** %p, align 4
+; CHECK: mov r1, sp
+; CHECK: str r1, [r0]
+ store [1024 x i8]* %arr2, [1024 x i8]** %p, align 4
+ ret void
+}
+
+; If possible stack-based lrdb/ldrh are widened to use SP-based addressing
+define i32 @test3() #0 {
+ %x = alloca i8, align 1
+ %y = alloca i8, align 1
+; CHECK: ldr r0, [sp]
+ %1 = load i8, i8* %x, align 1
+; CHECK: ldr r1, [sp, #4]
+ %2 = load i8, i8* %y, align 1
+ %3 = add nsw i8 %1, %2
+ %4 = zext i8 %3 to i32
+ ret i32 %4
+}
+
+define i32 @test4() #0 {
+ %x = alloca i16, align 2
+ %y = alloca i16, align 2
+; CHECK: ldr r0, [sp]
+ %1 = load i16, i16* %x, align 2
+; CHECK: ldr r1, [sp, #4]
+ %2 = load i16, i16* %y, align 2
+ %3 = add nsw i16 %1, %2
+ %4 = zext i16 %3 to i32
+ ret i32 %4
+}
+
+; Don't widen if the value needs to be zero-extended
+define zeroext i8 @test5() {
+ %x = alloca i8, align 1
+; CHECK: mov r0, sp
+; CHECK: ldrb r0, [r0]
+ %1 = load i8, i8* %x, align 1
+ ret i8 %1
+}
+
+define zeroext i16 @test6() {
+ %x = alloca i16, align 2
+; CHECK: mov r0, sp
+; CHECK: ldrh r0, [r0]
+ %1 = load i16, i16* %x, align 2
+ ret i16 %1
+}
+
+; Accessing the bottom of a large array shouldn't require materializing a base
+define void @test7() {
+ %arr = alloca [200 x i32], align 4
+
+ ; CHECK: movs [[REG:r[0-9]+]], #1
+ ; CHECK: str [[REG]], [sp, #4]
+ %arrayidx = getelementptr inbounds [200 x i32], [200 x i32]* %arr, i32 0, i32 1
+ store i32 1, i32* %arrayidx, align 4
+
+ ; CHECK: str [[REG]], [sp, #16]
+ %arrayidx1 = getelementptr inbounds [200 x i32], [200 x i32]* %arr, i32 0, i32 4
+ store i32 1, i32* %arrayidx1, align 4
+
+ ret void
+}
+
+; Check that loads/stores with out-of-range offsets are handled correctly
+define void @test8() {
+ %arr3 = alloca [224 x i32], align 4
+ %arr2 = alloca [224 x i32], align 4
+ %arr1 = alloca [224 x i32], align 4
+
+; CHECK: movs [[REG:r[0-9]+]], #1
+; CHECK: str [[REG]], [sp]
+ %arr1idx1 = getelementptr inbounds [224 x i32], [224 x i32]* %arr1, i32 0, i32 0
+ store i32 1, i32* %arr1idx1, align 4
+
+; Offset in range for sp-based store, but not for non-sp-based store
+; CHECK: str [[REG]], [sp, #128]
+ %arr1idx2 = getelementptr inbounds [224 x i32], [224 x i32]* %arr1, i32 0, i32 32
+ store i32 1, i32* %arr1idx2, align 4
+
+; CHECK: str [[REG]], [sp, #896]
+ %arr2idx1 = getelementptr inbounds [224 x i32], [224 x i32]* %arr2, i32 0, i32 0
+ store i32 1, i32* %arr2idx1, align 4
+
+; %arr2 is in range, but this element of it is not
+; CHECK: str [[REG]], [{{r[0-9]+}}]
+ %arr2idx2 = getelementptr inbounds [224 x i32], [224 x i32]* %arr2, i32 0, i32 32
+ store i32 1, i32* %arr2idx2, align 4
+
+; %arr3 is not in range
+; CHECK: str [[REG]], [{{r[0-9]+}}]
+ %arr3idx1 = getelementptr inbounds [224 x i32], [224 x i32]* %arr3, i32 0, i32 0
+ store i32 1, i32* %arr3idx1, align 4
+
+; CHECK: str [[REG]], [{{r[0-9]+}}]
+ %arr3idx2 = getelementptr inbounds [224 x i32], [224 x i32]* %arr3, i32 0, i32 32
+ store i32 1, i32* %arr3idx2, align 4
+
+ ret void
+}
diff --git a/test/CodeGen/Thumb/stack_guard_remat.ll b/test/CodeGen/Thumb/stack_guard_remat.ll
index e949cc181f15..41edef5a58e6 100644
--- a/test/CodeGen/Thumb/stack_guard_remat.ll
+++ b/test/CodeGen/Thumb/stack_guard_remat.ll
@@ -28,7 +28,7 @@ define i32 @test_stack_guard_remat() #0 {
%a1 = alloca [256 x i32], align 4
%1 = bitcast [256 x i32]* %a1 to i8*
call void @llvm.lifetime.start(i64 1024, i8* %1)
- %2 = getelementptr inbounds [256 x i32]* %a1, i32 0, i32 0
+ %2 = getelementptr inbounds [256 x i32], [256 x i32]* %a1, i32 0, i32 0
call void @foo3(i32* %2) #3
call void asm sideeffect "foo2", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{sp},~{lr}"()
call void @llvm.lifetime.end(i64 1024, i8* %1)
diff --git a/test/CodeGen/Thumb/stm-merge.ll b/test/CodeGen/Thumb/stm-merge.ll
index 76e71f4da652..5492ad8f5121 100644
--- a/test/CodeGen/Thumb/stm-merge.ll
+++ b/test/CodeGen/Thumb/stm-merge.ll
@@ -7,17 +7,18 @@ target triple = "thumbv6m--linux-gnueabi"
@e = internal unnamed_addr global i32* null, align 4
; Function Attrs: nounwind optsize
-define void @fn1() #0 {
+define void @fn1(i32 %x, i32 %y, i32 %z) #0 {
entry:
; CHECK-LABEL: fn1:
; CHECK: stm r[[BASE:[0-9]]]!, {{.*}}
; CHECK-NOT: {{.*}} r[[BASE]]
-; CHECK: ldr r[[BASE]], {{.*}}
%g = alloca i32, align 4
%h = alloca i32, align 4
- store i32 1, i32* %g, align 4
- store i32 0, i32* %h, align 4
- %.pr = load i32* @d, align 4
+ %i = alloca i32, align 4
+ store i32 %x, i32* %i, align 4
+ store i32 %y, i32* %h, align 4
+ store i32 %z, i32* %g, align 4
+ %.pr = load i32, i32* @d, align 4
%cmp11 = icmp slt i32 %.pr, 1
br i1 %cmp11, label %for.inc.lr.ph, label %for.body5
diff --git a/test/CodeGen/Thumb/thumb-ldm.ll b/test/CodeGen/Thumb/thumb-ldm.ll
index 7e9560eec8d5..8c49669943ce 100644
--- a/test/CodeGen/Thumb/thumb-ldm.ll
+++ b/test/CodeGen/Thumb/thumb-ldm.ll
@@ -7,8 +7,8 @@ define i32 @t1() {
; CHECK: push {r7, lr}
; CHECK: ldm
; CHECK: pop {r7, pc}
- %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
- %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
+ %tmp = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
+ %tmp3 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
%tmp4 = call i32 @f1( i32 %tmp, i32 %tmp3 ) ; <i32> [#uses=1]
ret i32 %tmp4
}
@@ -18,9 +18,9 @@ define i32 @t2() {
; CHECK: push {r7, lr}
; CHECK: ldm
; CHECK: pop {r7, pc}
- %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
- %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
- %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4) ; <i32> [#uses=1]
+ %tmp = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
+ %tmp3 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
+ %tmp5 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 4) ; <i32> [#uses=1]
%tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
ret i32 %tmp6
}
@@ -30,9 +30,9 @@ define i32 @t3() {
; CHECK: push {r7, lr}
; CHECK: ldm
; CHECK: pop {r7, pc}
- %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
- %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
- %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
+ %tmp = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
+ %tmp3 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
+ %tmp5 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
%tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
ret i32 %tmp6
}
diff --git a/test/CodeGen/Thumb/vargs.ll b/test/CodeGen/Thumb/vargs.ll
index 4078b01ba30c..1c7b631741b7 100644
--- a/test/CodeGen/Thumb/vargs.ll
+++ b/test/CodeGen/Thumb/vargs.ll
@@ -6,6 +6,10 @@
define void @f(i32 %a, ...) {
entry:
+; Check that space is reserved above the pushed lr for variadic argument
+; registers to be stored in.
+; CHECK: sub sp, #[[IMM:[0-9]+]]
+; CHECK: push
%va = alloca i8*, align 4 ; <i8**> [#uses=4]
%va.upgrd.1 = bitcast i8** %va to i8* ; <i8*> [#uses=1]
call void @llvm.va_start( i8* %va.upgrd.1 )
@@ -13,8 +17,8 @@ entry:
bb: ; preds = %bb, %entry
%a_addr.0 = phi i32 [ %a, %entry ], [ %tmp5, %bb ] ; <i32> [#uses=2]
- %tmp = load volatile i8** %va ; <i8*> [#uses=2]
- %tmp2 = getelementptr i8* %tmp, i32 4 ; <i8*> [#uses=1]
+ %tmp = load volatile i8*, i8** %va ; <i8*> [#uses=2]
+ %tmp2 = getelementptr i8, i8* %tmp, i32 4 ; <i8*> [#uses=1]
store volatile i8* %tmp2, i8** %va
%tmp5 = add i32 %a_addr.0, -1 ; <i32> [#uses=1]
%tmp.upgrd.2 = icmp eq i32 %a_addr.0, 1 ; <i1> [#uses=1]
@@ -22,11 +26,18 @@ bb: ; preds = %bb, %entry
bb7: ; preds = %bb
%tmp3 = bitcast i8* %tmp to i32* ; <i32*> [#uses=1]
- %tmp.upgrd.3 = load i32* %tmp3 ; <i32> [#uses=1]
- %tmp10 = call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @str, i32 0, i64 0), i32 %tmp.upgrd.3 ) ; <i32> [#uses=0]
+ %tmp.upgrd.3 = load i32, i32* %tmp3 ; <i32> [#uses=1]
+ %tmp10 = call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @str, i32 0, i64 0), i32 %tmp.upgrd.3 ) ; <i32> [#uses=0]
%va.upgrd.4 = bitcast i8** %va to i8* ; <i8*> [#uses=1]
call void @llvm.va_end( i8* %va.upgrd.4 )
ret void
+
+; The return sequence should pop the lr to r3, recover the stack space used to
+; store variadic argument registers, then return via r3. Possibly there is a pop
+; before this, but only if the function happened to use callee-saved registers.
+; CHECK: pop {r3}
+; CHECK: add sp, #[[IMM]]
+; CHECK: bx r3
}
declare void @llvm.va_start(i8*)
@@ -34,8 +45,3 @@ declare void @llvm.va_start(i8*)
declare i32 @printf(i8*, ...)
declare void @llvm.va_end(i8*)
-
-; CHECK: pop
-; CHECK: pop
-; CHECK-NOT: pop
-
diff --git a/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll b/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll
index 76ffe2a18f19..f76c8ff4d0f8 100644
--- a/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll
+++ b/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll
@@ -17,8 +17,8 @@ bb1: ; preds = %entry
bb.i: ; preds = %bb.i, %bb1
%indvar.i = phi i32 [ 0, %bb1 ], [ %2, %bb.i ] ; <i32> [#uses=3]
%tmp39 = add i32 %indvar.i, %tmp38 ; <i32> [#uses=1]
- %p_addr.0.i = getelementptr i8* undef, i32 %tmp39 ; <i8*> [#uses=1]
- %0 = load i8* %p_addr.0.i, align 1 ; <i8> [#uses=1]
+ %p_addr.0.i = getelementptr i8, i8* undef, i32 %tmp39 ; <i8*> [#uses=1]
+ %0 = load i8, i8* %p_addr.0.i, align 1 ; <i8> [#uses=1]
%1 = icmp slt i8 %0, 0 ; <i1> [#uses=1]
%2 = add i32 %indvar.i, 1 ; <i32> [#uses=1]
br i1 %1, label %bb.i, label %read_uleb128.exit
@@ -26,7 +26,7 @@ bb.i: ; preds = %bb.i, %bb1
read_uleb128.exit: ; preds = %bb.i
%.sum40 = add i32 %indvar.i, undef ; <i32> [#uses=1]
%.sum31 = add i32 %.sum40, 2 ; <i32> [#uses=1]
- %scevgep.i = getelementptr %struct.dwarf_cie* %cie, i32 0, i32 3, i32 %.sum31 ; <i8*> [#uses=1]
+ %scevgep.i = getelementptr %struct.dwarf_cie, %struct.dwarf_cie* %cie, i32 0, i32 3, i32 %.sum31 ; <i8*> [#uses=1]
%3 = call i8* @read_sleb128(i8* %scevgep.i, i32* undef) ; <i8*> [#uses=0]
unreachable
diff --git a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
index 4abeca930c1f..e363a343f0be 100644
--- a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
+++ b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
@@ -7,24 +7,24 @@ define i32 @t(i32, ...) nounwind {
entry:
; CHECK-LABEL: t:
; CHECK: add r7, sp, #12
- %1 = load i8** undef, align 4 ; <i8*> [#uses=3]
- %2 = getelementptr i8* %1, i32 4 ; <i8*> [#uses=1]
- %3 = getelementptr i8* %1, i32 8 ; <i8*> [#uses=1]
+ %1 = load i8*, i8** undef, align 4 ; <i8*> [#uses=3]
+ %2 = getelementptr i8, i8* %1, i32 4 ; <i8*> [#uses=1]
+ %3 = getelementptr i8, i8* %1, i32 8 ; <i8*> [#uses=1]
%4 = bitcast i8* %2 to i32* ; <i32*> [#uses=1]
- %5 = load i32* %4, align 4 ; <i32> [#uses=1]
+ %5 = load i32, i32* %4, align 4 ; <i32> [#uses=1]
%6 = trunc i32 %5 to i8 ; <i8> [#uses=1]
- %7 = getelementptr i8* %1, i32 12 ; <i8*> [#uses=1]
+ %7 = getelementptr i8, i8* %1, i32 12 ; <i8*> [#uses=1]
%8 = bitcast i8* %3 to i32* ; <i32*> [#uses=1]
- %9 = load i32* %8, align 4 ; <i32> [#uses=1]
+ %9 = load i32, i32* %8, align 4 ; <i32> [#uses=1]
%10 = trunc i32 %9 to i16 ; <i16> [#uses=1]
%11 = bitcast i8* %7 to i32* ; <i32*> [#uses=1]
- %12 = load i32* %11, align 4 ; <i32> [#uses=1]
+ %12 = load i32, i32* %11, align 4 ; <i32> [#uses=1]
%13 = trunc i32 %12 to i16 ; <i16> [#uses=1]
- %14 = load i32* undef, align 4 ; <i32> [#uses=2]
+ %14 = load i32, i32* undef, align 4 ; <i32> [#uses=2]
%15 = sext i8 %6 to i32 ; <i32> [#uses=2]
%16 = sext i16 %10 to i32 ; <i32> [#uses=2]
%17 = sext i16 %13 to i32 ; <i32> [#uses=2]
- %18 = call i32 (i8*, ...)* @printf(i8* getelementptr ([36 x i8]* @"\01LC", i32 0, i32 0), i32 -128, i32 0, i32 %15, i32 %16, i32 %17, i32 0, i32 %14) nounwind ; <i32> [#uses=0]
+ %18 = call i32 (i8*, ...) @printf(i8* getelementptr ([36 x i8], [36 x i8]* @"\01LC", i32 0, i32 0), i32 -128, i32 0, i32 %15, i32 %16, i32 %17, i32 0, i32 %14) nounwind ; <i32> [#uses=0]
%19 = add i32 0, %15 ; <i32> [#uses=1]
%20 = add i32 %19, %16 ; <i32> [#uses=1]
%21 = add i32 %20, %14 ; <i32> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll b/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll
index 3e0761898925..77d2991160d9 100644
--- a/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll
+++ b/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll
@@ -31,35 +31,35 @@
define void @jpeg_idct_float(%struct.jpeg_decompress_struct* nocapture %cinfo, %struct.jpeg_component_info* nocapture %compptr, i16* nocapture %coef_block, i8** nocapture %output_buf, i32 %output_col) nounwind {
entry:
%workspace = alloca [64 x float], align 4 ; <[64 x float]*> [#uses=11]
- %0 = load i8** undef, align 4 ; <i8*> [#uses=5]
+ %0 = load i8*, i8** undef, align 4 ; <i8*> [#uses=5]
br label %bb
bb: ; preds = %bb, %entry
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=11]
%tmp39 = add i32 %indvar, 8 ; <i32> [#uses=0]
%tmp41 = add i32 %indvar, 16 ; <i32> [#uses=2]
- %scevgep42 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp41 ; <float*> [#uses=1]
+ %scevgep42 = getelementptr [64 x float], [64 x float]* %workspace, i32 0, i32 %tmp41 ; <float*> [#uses=1]
%tmp43 = add i32 %indvar, 24 ; <i32> [#uses=1]
- %scevgep44 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp43 ; <float*> [#uses=1]
+ %scevgep44 = getelementptr [64 x float], [64 x float]* %workspace, i32 0, i32 %tmp43 ; <float*> [#uses=1]
%tmp45 = add i32 %indvar, 32 ; <i32> [#uses=1]
- %scevgep46 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp45 ; <float*> [#uses=1]
+ %scevgep46 = getelementptr [64 x float], [64 x float]* %workspace, i32 0, i32 %tmp45 ; <float*> [#uses=1]
%tmp47 = add i32 %indvar, 40 ; <i32> [#uses=1]
- %scevgep48 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp47 ; <float*> [#uses=1]
+ %scevgep48 = getelementptr [64 x float], [64 x float]* %workspace, i32 0, i32 %tmp47 ; <float*> [#uses=1]
%tmp49 = add i32 %indvar, 48 ; <i32> [#uses=1]
- %scevgep50 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp49 ; <float*> [#uses=1]
+ %scevgep50 = getelementptr [64 x float], [64 x float]* %workspace, i32 0, i32 %tmp49 ; <float*> [#uses=1]
%tmp51 = add i32 %indvar, 56 ; <i32> [#uses=1]
- %scevgep52 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp51 ; <float*> [#uses=1]
- %wsptr.119 = getelementptr [64 x float]* %workspace, i32 0, i32 %indvar ; <float*> [#uses=1]
+ %scevgep52 = getelementptr [64 x float], [64 x float]* %workspace, i32 0, i32 %tmp51 ; <float*> [#uses=1]
+ %wsptr.119 = getelementptr [64 x float], [64 x float]* %workspace, i32 0, i32 %indvar ; <float*> [#uses=1]
%tmp54 = shl i32 %indvar, 2 ; <i32> [#uses=1]
- %scevgep76 = getelementptr i8* undef, i32 %tmp54 ; <i8*> [#uses=1]
+ %scevgep76 = getelementptr i8, i8* undef, i32 %tmp54 ; <i8*> [#uses=1]
%quantptr.118 = bitcast i8* %scevgep76 to float* ; <float*> [#uses=1]
- %scevgep79 = getelementptr i16* %coef_block, i32 %tmp41 ; <i16*> [#uses=0]
- %inptr.117 = getelementptr i16* %coef_block, i32 %indvar ; <i16*> [#uses=1]
- %1 = load i16* null, align 2 ; <i16> [#uses=1]
- %2 = load i16* undef, align 2 ; <i16> [#uses=1]
- %3 = load i16* %inptr.117, align 2 ; <i16> [#uses=1]
+ %scevgep79 = getelementptr i16, i16* %coef_block, i32 %tmp41 ; <i16*> [#uses=0]
+ %inptr.117 = getelementptr i16, i16* %coef_block, i32 %indvar ; <i16*> [#uses=1]
+ %1 = load i16, i16* null, align 2 ; <i16> [#uses=1]
+ %2 = load i16, i16* undef, align 2 ; <i16> [#uses=1]
+ %3 = load i16, i16* %inptr.117, align 2 ; <i16> [#uses=1]
%4 = sitofp i16 %3 to float ; <float> [#uses=1]
- %5 = load float* %quantptr.118, align 4 ; <float> [#uses=1]
+ %5 = load float, float* %quantptr.118, align 4 ; <float> [#uses=1]
%6 = fmul float %4, %5 ; <float> [#uses=1]
%7 = fsub float %6, undef ; <float> [#uses=2]
%8 = fmul float undef, 0x3FF6A09E60000000 ; <float> [#uses=1]
@@ -70,7 +70,7 @@ bb: ; preds = %bb, %entry
%13 = sitofp i16 %1 to float ; <float> [#uses=1]
%14 = fmul float %13, undef ; <float> [#uses=2]
%15 = sitofp i16 %2 to float ; <float> [#uses=1]
- %16 = load float* undef, align 4 ; <float> [#uses=1]
+ %16 = load float, float* undef, align 4 ; <float> [#uses=1]
%17 = fmul float %15, %16 ; <float> [#uses=1]
%18 = fadd float %14, undef ; <float> [#uses=2]
%19 = fsub float %14, undef ; <float> [#uses=2]
@@ -114,22 +114,22 @@ bb6: ; preds = %bb
bb8: ; preds = %bb8, %bb6
%ctr.116 = phi i32 [ 0, %bb6 ], [ %88, %bb8 ] ; <i32> [#uses=3]
- %scevgep = getelementptr i8** %output_buf, i32 %ctr.116 ; <i8**> [#uses=1]
+ %scevgep = getelementptr i8*, i8** %output_buf, i32 %ctr.116 ; <i8**> [#uses=1]
%tmp = shl i32 %ctr.116, 3 ; <i32> [#uses=5]
%tmp2392 = or i32 %tmp, 4 ; <i32> [#uses=1]
- %scevgep24 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp2392 ; <float*> [#uses=1]
+ %scevgep24 = getelementptr [64 x float], [64 x float]* %workspace, i32 0, i32 %tmp2392 ; <float*> [#uses=1]
%tmp2591 = or i32 %tmp, 2 ; <i32> [#uses=1]
- %scevgep26 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp2591 ; <float*> [#uses=1]
+ %scevgep26 = getelementptr [64 x float], [64 x float]* %workspace, i32 0, i32 %tmp2591 ; <float*> [#uses=1]
%tmp2790 = or i32 %tmp, 6 ; <i32> [#uses=1]
- %scevgep28 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp2790 ; <float*> [#uses=1]
+ %scevgep28 = getelementptr [64 x float], [64 x float]* %workspace, i32 0, i32 %tmp2790 ; <float*> [#uses=1]
%tmp3586 = or i32 %tmp, 7 ; <i32> [#uses=0]
- %wsptr.215 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp ; <float*> [#uses=1]
- %40 = load i8** %scevgep, align 4 ; <i8*> [#uses=4]
- %41 = load float* %wsptr.215, align 4 ; <float> [#uses=1]
- %42 = load float* %scevgep24, align 4 ; <float> [#uses=1]
+ %wsptr.215 = getelementptr [64 x float], [64 x float]* %workspace, i32 0, i32 %tmp ; <float*> [#uses=1]
+ %40 = load i8*, i8** %scevgep, align 4 ; <i8*> [#uses=4]
+ %41 = load float, float* %wsptr.215, align 4 ; <float> [#uses=1]
+ %42 = load float, float* %scevgep24, align 4 ; <float> [#uses=1]
%43 = fadd float %41, %42 ; <float> [#uses=1]
- %44 = load float* %scevgep26, align 4 ; <float> [#uses=1]
- %45 = load float* %scevgep28, align 4 ; <float> [#uses=1]
+ %44 = load float, float* %scevgep26, align 4 ; <float> [#uses=1]
+ %45 = load float, float* %scevgep28, align 4 ; <float> [#uses=1]
%46 = fadd float %44, %45 ; <float> [#uses=1]
%47 = fsub float %43, %46 ; <float> [#uses=2]
%48 = fsub float undef, 0.000000e+00 ; <float> [#uses=1]
@@ -139,13 +139,13 @@ bb8: ; preds = %bb8, %bb6
%52 = lshr i32 %51, 3 ; <i32> [#uses=1]
%53 = and i32 %52, 1023 ; <i32> [#uses=1]
%.sum14 = add i32 %53, 128 ; <i32> [#uses=1]
- %54 = getelementptr i8* %0, i32 %.sum14 ; <i8*> [#uses=1]
- %55 = load i8* %54, align 1 ; <i8> [#uses=1]
+ %54 = getelementptr i8, i8* %0, i32 %.sum14 ; <i8*> [#uses=1]
+ %55 = load i8, i8* %54, align 1 ; <i8> [#uses=1]
store i8 %55, i8* null, align 1
- %56 = getelementptr i8* %40, i32 %.sum10 ; <i8*> [#uses=1]
+ %56 = getelementptr i8, i8* %40, i32 %.sum10 ; <i8*> [#uses=1]
store i8 0, i8* %56, align 1
- %57 = load i8* null, align 1 ; <i8> [#uses=1]
- %58 = getelementptr i8* %40, i32 %.sum8 ; <i8*> [#uses=1]
+ %57 = load i8, i8* null, align 1 ; <i8> [#uses=1]
+ %58 = getelementptr i8, i8* %40, i32 %.sum8 ; <i8*> [#uses=1]
store i8 %57, i8* %58, align 1
%59 = fadd float undef, %48 ; <float> [#uses=1]
%60 = fptosi float %59 to i32 ; <i32> [#uses=1]
@@ -153,17 +153,17 @@ bb8: ; preds = %bb8, %bb6
%62 = lshr i32 %61, 3 ; <i32> [#uses=1]
%63 = and i32 %62, 1023 ; <i32> [#uses=1]
%.sum7 = add i32 %63, 128 ; <i32> [#uses=1]
- %64 = getelementptr i8* %0, i32 %.sum7 ; <i8*> [#uses=1]
- %65 = load i8* %64, align 1 ; <i8> [#uses=1]
- %66 = getelementptr i8* %40, i32 %.sum6 ; <i8*> [#uses=1]
+ %64 = getelementptr i8, i8* %0, i32 %.sum7 ; <i8*> [#uses=1]
+ %65 = load i8, i8* %64, align 1 ; <i8> [#uses=1]
+ %66 = getelementptr i8, i8* %40, i32 %.sum6 ; <i8*> [#uses=1]
store i8 %65, i8* %66, align 1
%67 = fptosi float undef to i32 ; <i32> [#uses=1]
%68 = add i32 %67, 4 ; <i32> [#uses=1]
%69 = lshr i32 %68, 3 ; <i32> [#uses=1]
%70 = and i32 %69, 1023 ; <i32> [#uses=1]
%.sum5 = add i32 %70, 128 ; <i32> [#uses=1]
- %71 = getelementptr i8* %0, i32 %.sum5 ; <i8*> [#uses=1]
- %72 = load i8* %71, align 1 ; <i8> [#uses=1]
+ %71 = getelementptr i8, i8* %0, i32 %.sum5 ; <i8*> [#uses=1]
+ %72 = load i8, i8* %71, align 1 ; <i8> [#uses=1]
store i8 %72, i8* undef, align 1
%73 = fadd float %47, undef ; <float> [#uses=1]
%74 = fptosi float %73 to i32 ; <i32> [#uses=1]
@@ -171,8 +171,8 @@ bb8: ; preds = %bb8, %bb6
%76 = lshr i32 %75, 3 ; <i32> [#uses=1]
%77 = and i32 %76, 1023 ; <i32> [#uses=1]
%.sum3 = add i32 %77, 128 ; <i32> [#uses=1]
- %78 = getelementptr i8* %0, i32 %.sum3 ; <i8*> [#uses=1]
- %79 = load i8* %78, align 1 ; <i8> [#uses=1]
+ %78 = getelementptr i8, i8* %0, i32 %.sum3 ; <i8*> [#uses=1]
+ %79 = load i8, i8* %78, align 1 ; <i8> [#uses=1]
store i8 %79, i8* undef, align 1
%80 = fsub float %47, undef ; <float> [#uses=1]
%81 = fptosi float %80 to i32 ; <i32> [#uses=1]
@@ -180,9 +180,9 @@ bb8: ; preds = %bb8, %bb6
%83 = lshr i32 %82, 3 ; <i32> [#uses=1]
%84 = and i32 %83, 1023 ; <i32> [#uses=1]
%.sum1 = add i32 %84, 128 ; <i32> [#uses=1]
- %85 = getelementptr i8* %0, i32 %.sum1 ; <i8*> [#uses=1]
- %86 = load i8* %85, align 1 ; <i8> [#uses=1]
- %87 = getelementptr i8* %40, i32 %.sum ; <i8*> [#uses=1]
+ %85 = getelementptr i8, i8* %0, i32 %.sum1 ; <i8*> [#uses=1]
+ %86 = load i8, i8* %85, align 1 ; <i8> [#uses=1]
+ %87 = getelementptr i8, i8* %40, i32 %.sum ; <i8*> [#uses=1]
store i8 %86, i8* %87, align 1
%88 = add i32 %ctr.116, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %88, 8 ; <i1> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll b/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
index 09e0ed1ead63..4a99e280e7ab 100644
--- a/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
+++ b/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
@@ -22,7 +22,7 @@ bb4.preheader.i: ; preds = %entry
br i1 undef, label %tbl.exit, label %bb.i.preheader
bb.i.preheader: ; preds = %bb4.preheader.i
- %line3.i.i.i = getelementptr [200 x i8]* %line.i.i.i, i32 0, i32 0 ; <i8*> [#uses=1]
+ %line3.i.i.i = getelementptr [200 x i8], [200 x i8]* %line.i.i.i, i32 0, i32 0 ; <i8*> [#uses=1]
br label %bb.i
bb.i: ; preds = %bb4.backedge.i, %bb.i.preheader
@@ -38,7 +38,7 @@ bb2.i184.i.i: ; preds = %bb.i183.i.i, %bb3.i
br i1 undef, label %bb5.i185.i.i, label %bb35.preheader.i.i.i
bb35.preheader.i.i.i: ; preds = %bb2.i184.i.i
- %0 = load i8* %line3.i.i.i, align 1 ; <i8> [#uses=1]
+ %0 = load i8, i8* %line3.i.i.i, align 1 ; <i8> [#uses=1]
%1 = icmp eq i8 %0, 59 ; <i1> [#uses=1]
br i1 %1, label %bb36.i.i.i, label %bb9.i186.i.i
@@ -53,16 +53,16 @@ bb36.i.i.i: ; preds = %bb35.preheader.i.i.i
bb.i171.i.i: ; preds = %bb3.i176.i.i, %bb36.i.i.i, %bb5.i185.i.i
%2 = phi i32 [ %4, %bb3.i176.i.i ], [ 0, %bb36.i.i.i ], [ 0, %bb5.i185.i.i ] ; <i32> [#uses=6]
- %scevgep16.i.i.i = getelementptr [20 x i32]* @sep, i32 0, i32 %2 ; <i32*> [#uses=1]
- %scevgep18.i.i.i = getelementptr [20 x [10 x i8]]* @cll, i32 0, i32 %2, i32 0 ; <i8*> [#uses=0]
+ %scevgep16.i.i.i = getelementptr [20 x i32], [20 x i32]* @sep, i32 0, i32 %2 ; <i32*> [#uses=1]
+ %scevgep18.i.i.i = getelementptr [20 x [10 x i8]], [20 x [10 x i8]]* @cll, i32 0, i32 %2, i32 0 ; <i8*> [#uses=0]
store i32 -1, i32* %scevgep16.i.i.i, align 4
br label %bb1.i175.i.i
bb1.i175.i.i: ; preds = %bb1.i175.i.i, %bb.i171.i.i
%i.03.i172.i.i = phi i32 [ 0, %bb.i171.i.i ], [ %3, %bb1.i175.i.i ] ; <i32> [#uses=4]
- %scevgep11.i.i.i = getelementptr [100 x [20 x i32]]* @lefline, i32 0, i32 %i.03.i172.i.i, i32 %2 ; <i32*> [#uses=1]
- %scevgep12.i.i.i = getelementptr [100 x [20 x [4 x i8]]]* @vsize, i32 0, i32 %i.03.i172.i.i, i32 %2, i32 0 ; <i8*> [#uses=1]
- %scevgep13.i.i.i = getelementptr [100 x [20 x [4 x i8]]]* @csize, i32 0, i32 %i.03.i172.i.i, i32 %2, i32 0 ; <i8*> [#uses=0]
+ %scevgep11.i.i.i = getelementptr [100 x [20 x i32]], [100 x [20 x i32]]* @lefline, i32 0, i32 %i.03.i172.i.i, i32 %2 ; <i32*> [#uses=1]
+ %scevgep12.i.i.i = getelementptr [100 x [20 x [4 x i8]]], [100 x [20 x [4 x i8]]]* @vsize, i32 0, i32 %i.03.i172.i.i, i32 %2, i32 0 ; <i8*> [#uses=1]
+ %scevgep13.i.i.i = getelementptr [100 x [20 x [4 x i8]]], [100 x [20 x [4 x i8]]]* @csize, i32 0, i32 %i.03.i172.i.i, i32 %2, i32 0 ; <i8*> [#uses=0]
store i8 0, i8* %scevgep12.i.i.i, align 1
store i32 0, i32* %scevgep11.i.i.i, align 4
store i32 108, i32* undef, align 4
diff --git a/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll b/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
index 0b5610327107..55b0921779d4 100644
--- a/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
@@ -26,15 +26,15 @@ declare void @_ZN10xalanc_1_814FormatterToXML17writeParentTagEndEv(%"struct.xala
define void @_ZN10xalanc_1_814FormatterToXML5cdataEPKtj(%"struct.xalanc_1_8::FormatterToXML"* %this, i16* %ch, i32 %length) {
entry:
- %0 = getelementptr %"struct.xalanc_1_8::FormatterToXML"* %this, i32 0, i32 13 ; <i8*> [#uses=1]
+ %0 = getelementptr %"struct.xalanc_1_8::FormatterToXML", %"struct.xalanc_1_8::FormatterToXML"* %this, i32 0, i32 13 ; <i8*> [#uses=1]
br i1 undef, label %bb4, label %bb
bb: ; preds = %entry
store i8 0, i8* %0, align 1
- %1 = getelementptr %"struct.xalanc_1_8::FormatterToXML"* %this, i32 0, i32 0, i32 0, i32 0 ; <i32 (...)***> [#uses=1]
- %2 = load i32 (...)*** %1, align 4 ; <i32 (...)**> [#uses=1]
- %3 = getelementptr i32 (...)** %2, i32 11 ; <i32 (...)**> [#uses=1]
- %4 = load i32 (...)** %3, align 4 ; <i32 (...)*> [#uses=1]
+ %1 = getelementptr %"struct.xalanc_1_8::FormatterToXML", %"struct.xalanc_1_8::FormatterToXML"* %this, i32 0, i32 0, i32 0, i32 0 ; <i32 (...)***> [#uses=1]
+ %2 = load i32 (...)**, i32 (...)*** %1, align 4 ; <i32 (...)**> [#uses=1]
+ %3 = getelementptr i32 (...)*, i32 (...)** %2, i32 11 ; <i32 (...)**> [#uses=1]
+ %4 = load i32 (...)*, i32 (...)** %3, align 4 ; <i32 (...)*> [#uses=1]
%5 = bitcast i32 (...)* %4 to void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32)* ; <void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32)*> [#uses=1]
tail call void %5(%"struct.xalanc_1_8::FormatterToXML"* %this, i16* %ch, i32 %length)
ret void
diff --git a/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll b/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll
index acff2615cbb3..b75a14b0a674 100644
--- a/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll
+++ b/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll
@@ -17,7 +17,7 @@ bb1: ; preds = %entry
bb2: ; preds = %bb1
%0 = call i8* @llvm.frameaddress(i32 0) ; <i8*> [#uses=1]
- %1 = call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* noalias undef, i8* noalias getelementptr ([30 x i8]* @.str2, i32 0, i32 0), i8* %0, i8* null) nounwind ; <i32> [#uses=0]
+ %1 = call i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* noalias undef, i8* noalias getelementptr ([30 x i8], [30 x i8]* @.str2, i32 0, i32 0), i8* %0, i8* null) nounwind ; <i32> [#uses=0]
unreachable
bb9: ; preds = %bb1
diff --git a/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll b/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
index 28ac28bbc550..ccec979bf8cf 100644
--- a/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
@@ -44,8 +44,8 @@ entry:
bb5: ; preds = %bb5, %entry
%.pn = phi %struct.rec* [ %y.0, %bb5 ], [ undef, %entry ] ; <%struct.rec*> [#uses=1]
- %y.0.in = getelementptr %struct.rec* %.pn, i32 0, i32 0, i32 0, i32 1, i32 0 ; <%struct.rec**> [#uses=1]
- %y.0 = load %struct.rec** %y.0.in ; <%struct.rec*> [#uses=2]
+ %y.0.in = getelementptr %struct.rec, %struct.rec* %.pn, i32 0, i32 0, i32 0, i32 1, i32 0 ; <%struct.rec**> [#uses=1]
+ %y.0 = load %struct.rec*, %struct.rec** %y.0.in ; <%struct.rec*> [#uses=2]
br i1 undef, label %bb5, label %bb6
bb6: ; preds = %bb5
@@ -62,33 +62,33 @@ bb.i1: ; preds = %FontHalfXHeight.exit
br label %FontSize.exit
FontSize.exit: ; preds = %bb.i1, %FontHalfXHeight.exit
- %1 = load i32* undef, align 4 ; <i32> [#uses=1]
+ %1 = load i32, i32* undef, align 4 ; <i32> [#uses=1]
%2 = icmp ult i32 0, undef ; <i1> [#uses=1]
br i1 %2, label %bb.i5, label %FontName.exit
bb.i5: ; preds = %FontSize.exit
- call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([10 x i8]* @.str81872, i32 0, i32 0)) nounwind
+ call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...) @Error(i32 1, i32 2, i8* getelementptr ([20 x i8], [20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([10 x i8], [10 x i8]* @.str81872, i32 0, i32 0)) nounwind
br label %FontName.exit
FontName.exit: ; preds = %bb.i5, %FontSize.exit
- %3 = call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([8 x i8]* @.str1822946, i32 0, i32 0), i32 %1, i8* undef) nounwind ; <i32> [#uses=0]
- %4 = call i32 @"\01_fwrite"(i8* getelementptr ([11 x i8]* @.str1842948, i32 0, i32 0), i32 1, i32 10, i8* undef) nounwind ; <i32> [#uses=0]
+ %3 = call i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* undef, i8* getelementptr ([8 x i8], [8 x i8]* @.str1822946, i32 0, i32 0), i32 %1, i8* undef) nounwind ; <i32> [#uses=0]
+ %4 = call i32 @"\01_fwrite"(i8* getelementptr ([11 x i8], [11 x i8]* @.str1842948, i32 0, i32 0), i32 1, i32 10, i8* undef) nounwind ; <i32> [#uses=0]
%5 = sub i32 %colmark, undef ; <i32> [#uses=1]
%6 = sub i32 %rowmark, undef ; <i32> [#uses=1]
- %7 = load %struct.FILE** @out_fp, align 4 ; <%struct.FILE*> [#uses=1]
- %8 = call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %7, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 %5, i32 %6) nounwind ; <i32> [#uses=0]
+ %7 = load %struct.FILE*, %struct.FILE** @out_fp, align 4 ; <%struct.FILE*> [#uses=1]
+ %8 = call i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* %7, i8* getelementptr ([17 x i8], [17 x i8]* @.str212784, i32 0, i32 0), i32 %5, i32 %6) nounwind ; <i32> [#uses=0]
store i32 0, i32* @cpexists, align 4
- %9 = getelementptr %struct.rec* %y.0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 1 ; <i32*> [#uses=1]
- %10 = load i32* %9, align 4 ; <i32> [#uses=1]
+ %9 = getelementptr %struct.rec, %struct.rec* %y.0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 1 ; <i32*> [#uses=1]
+ %10 = load i32, i32* %9, align 4 ; <i32> [#uses=1]
%11 = sub i32 0, %10 ; <i32> [#uses=1]
- %12 = load %struct.FILE** @out_fp, align 4 ; <%struct.FILE*> [#uses=1]
- %13 = call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %12, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 undef, i32 %11) nounwind ; <i32> [#uses=0]
+ %12 = load %struct.FILE*, %struct.FILE** @out_fp, align 4 ; <%struct.FILE*> [#uses=1]
+ %13 = call i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* %12, i8* getelementptr ([17 x i8], [17 x i8]* @.str212784, i32 0, i32 0), i32 undef, i32 %11) nounwind ; <i32> [#uses=0]
store i32 0, i32* @cpexists, align 4
br label %bb100.outer.outer
bb100.outer.outer: ; preds = %bb79.critedge, %bb1.i3, %FontName.exit
%x_addr.0.ph.ph = phi %struct.rec* [ %x, %FontName.exit ], [ null, %bb79.critedge ], [ null, %bb1.i3 ] ; <%struct.rec*> [#uses=1]
- %14 = getelementptr %struct.rec* %x_addr.0.ph.ph, i32 0, i32 0, i32 1, i32 0 ; <%struct.FILE_POS*> [#uses=0]
+ %14 = getelementptr %struct.rec, %struct.rec* %x_addr.0.ph.ph, i32 0, i32 0, i32 1, i32 0 ; <%struct.FILE_POS*> [#uses=0]
br label %bb100.outer
bb.i80: ; preds = %bb3.i85
@@ -116,7 +116,7 @@ bb.i47: ; preds = %bb3.i52
br i1 undef, label %bb2.i51, label %bb2.i.i15.critedge
bb2.i51: ; preds = %bb.i47, %StringBeginsWith.exit88, %bb.i80
- %15 = load i8* undef, align 1 ; <i8> [#uses=0]
+ %15 = load i8, i8* undef, align 1 ; <i8> [#uses=0]
br i1 false, label %StringBeginsWith.exit55thread-split, label %bb3.i52
bb3.i52: ; preds = %bb2.i51
diff --git a/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll b/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
index 88accf8063ed..89f47d9d26f9 100644
--- a/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
+++ b/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
@@ -76,8 +76,8 @@ declare i8* @fgets(i8*, i32, %struct.FILE* nocapture) nounwind
define void @PS_PrintGraphicInclude(%struct.rec* %x, i32 %colmark, i32 %rowmark) nounwind {
entry:
%buff = alloca [512 x i8], align 4 ; <[512 x i8]*> [#uses=5]
- %0 = getelementptr %struct.rec* %x, i32 0, i32 0, i32 1, i32 0, i32 0 ; <i8*> [#uses=2]
- %1 = load i8* %0, align 4 ; <i8> [#uses=1]
+ %0 = getelementptr %struct.rec, %struct.rec* %x, i32 0, i32 0, i32 1, i32 0, i32 0 ; <i8*> [#uses=2]
+ %1 = load i8, i8* %0, align 4 ; <i8> [#uses=1]
%2 = add i8 %1, -94 ; <i8> [#uses=1]
%3 = icmp ugt i8 %2, 1 ; <i1> [#uses=1]
br i1 %3, label %bb, label %bb1
@@ -86,28 +86,28 @@ bb: ; preds = %entry
br label %bb1
bb1: ; preds = %bb, %entry
- %4 = getelementptr %struct.rec* %x, i32 0, i32 0, i32 2 ; <%struct.SECOND_UNION*> [#uses=1]
+ %4 = getelementptr %struct.rec, %struct.rec* %x, i32 0, i32 0, i32 2 ; <%struct.SECOND_UNION*> [#uses=1]
%5 = bitcast %struct.SECOND_UNION* %4 to %5* ; <%5*> [#uses=1]
- %6 = getelementptr %5* %5, i32 0, i32 1 ; <i8*> [#uses=1]
- %7 = load i8* %6, align 1 ; <i8> [#uses=1]
+ %6 = getelementptr %5, %5* %5, i32 0, i32 1 ; <i8*> [#uses=1]
+ %7 = load i8, i8* %6, align 1 ; <i8> [#uses=1]
%8 = icmp eq i8 %7, 0 ; <i1> [#uses=1]
br i1 %8, label %bb2, label %bb3
bb2: ; preds = %bb1
- call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([40 x i8]* @.str1802944, i32 0, i32 0)) nounwind
+ call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...) @Error(i32 1, i32 2, i8* getelementptr ([20 x i8], [20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([40 x i8], [40 x i8]* @.str1802944, i32 0, i32 0)) nounwind
br label %bb3
bb3: ; preds = %bb2, %bb1
- %9 = load %struct.rec** undef, align 4 ; <%struct.rec*> [#uses=0]
+ %9 = load %struct.rec*, %struct.rec** undef, align 4 ; <%struct.rec*> [#uses=0]
br label %bb5
bb5: ; preds = %bb5, %bb3
- %y.0 = load %struct.rec** null ; <%struct.rec*> [#uses=2]
+ %y.0 = load %struct.rec*, %struct.rec** null ; <%struct.rec*> [#uses=2]
br i1 false, label %bb5, label %bb6
bb6: ; preds = %bb5
- %10 = load i8* %0, align 4 ; <i8> [#uses=1]
- %11 = getelementptr %struct.rec* %y.0, i32 0, i32 0, i32 1, i32 0 ; <%struct.FILE_POS*> [#uses=1]
+ %10 = load i8, i8* %0, align 4 ; <i8> [#uses=1]
+ %11 = getelementptr %struct.rec, %struct.rec* %y.0, i32 0, i32 0, i32 1, i32 0 ; <%struct.FILE_POS*> [#uses=1]
%12 = call %struct.FILE* @OpenIncGraphicFile(i8* undef, i8 zeroext %10, %struct.rec** null, %struct.FILE_POS* %11, i32* undef) nounwind ; <%struct.FILE*> [#uses=4]
br i1 false, label %bb7, label %bb8
@@ -116,7 +116,7 @@ bb7: ; preds = %bb6
bb8: ; preds = %bb6
%13 = and i32 undef, 4095 ; <i32> [#uses=2]
- %14 = load i32* @currentfont, align 4 ; <i32> [#uses=0]
+ %14 = load i32, i32* @currentfont, align 4 ; <i32> [#uses=0]
br i1 false, label %bb10, label %bb9
bb9: ; preds = %bb8
@@ -124,8 +124,8 @@ bb9: ; preds = %bb8
br i1 %15, label %bb.i, label %FontHalfXHeight.exit
bb.i: ; preds = %bb9
- call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([17 x i8]* @.str111875, i32 0, i32 0)) nounwind
- %.pre186 = load i32* @currentfont, align 4 ; <i32> [#uses=1]
+ call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...) @Error(i32 1, i32 2, i8* getelementptr ([20 x i8], [20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([17 x i8], [17 x i8]* @.str111875, i32 0, i32 0)) nounwind
+ %.pre186 = load i32, i32* @currentfont, align 4 ; <i32> [#uses=1]
br label %FontHalfXHeight.exit
FontHalfXHeight.exit: ; preds = %bb.i, %bb9
@@ -139,55 +139,55 @@ bb1.i: ; preds = %bb.i1, %FontHalfXHeight.exit
br i1 undef, label %bb2.i, label %FontSize.exit
bb2.i: ; preds = %bb1.i
- call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 37, i32 61, i8* getelementptr ([30 x i8]* @.str101874, i32 0, i32 0), i32 1, %struct.FILE_POS* null) nounwind
+ call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...) @Error(i32 37, i32 61, i8* getelementptr ([30 x i8], [30 x i8]* @.str101874, i32 0, i32 0), i32 1, %struct.FILE_POS* null) nounwind
unreachable
FontSize.exit: ; preds = %bb1.i
- %17 = getelementptr %struct.FONT_INFO* undef, i32 %16, i32 5 ; <%struct.rec**> [#uses=0]
- %18 = load i32* undef, align 4 ; <i32> [#uses=1]
- %19 = load i32* @currentfont, align 4 ; <i32> [#uses=2]
- %20 = load i32* @font_count, align 4 ; <i32> [#uses=1]
+ %17 = getelementptr %struct.FONT_INFO, %struct.FONT_INFO* undef, i32 %16, i32 5 ; <%struct.rec**> [#uses=0]
+ %18 = load i32, i32* undef, align 4 ; <i32> [#uses=1]
+ %19 = load i32, i32* @currentfont, align 4 ; <i32> [#uses=2]
+ %20 = load i32, i32* @font_count, align 4 ; <i32> [#uses=1]
%21 = icmp ult i32 %20, %19 ; <i1> [#uses=1]
br i1 %21, label %bb.i5, label %FontName.exit
bb.i5: ; preds = %FontSize.exit
- call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([10 x i8]* @.str81872, i32 0, i32 0)) nounwind
+ call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...) @Error(i32 1, i32 2, i8* getelementptr ([20 x i8], [20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([10 x i8], [10 x i8]* @.str81872, i32 0, i32 0)) nounwind
br label %FontName.exit
FontName.exit: ; preds = %bb.i5, %FontSize.exit
%22 = phi %struct.FONT_INFO* [ undef, %bb.i5 ], [ undef, %FontSize.exit ] ; <%struct.FONT_INFO*> [#uses=1]
- %23 = getelementptr %struct.FONT_INFO* %22, i32 %19, i32 5 ; <%struct.rec**> [#uses=0]
- %24 = call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([8 x i8]* @.str1822946, i32 0, i32 0), i32 %18, i8* null) nounwind ; <i32> [#uses=0]
+ %23 = getelementptr %struct.FONT_INFO, %struct.FONT_INFO* %22, i32 %19, i32 5 ; <%struct.rec**> [#uses=0]
+ %24 = call i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* undef, i8* getelementptr ([8 x i8], [8 x i8]* @.str1822946, i32 0, i32 0), i32 %18, i8* null) nounwind ; <i32> [#uses=0]
br label %bb10
bb10: ; preds = %FontName.exit, %bb8
- %25 = call i32 @"\01_fwrite"(i8* getelementptr ([11 x i8]* @.str1842948, i32 0, i32 0), i32 1, i32 10, i8* undef) nounwind ; <i32> [#uses=0]
+ %25 = call i32 @"\01_fwrite"(i8* getelementptr ([11 x i8], [11 x i8]* @.str1842948, i32 0, i32 0), i32 1, i32 10, i8* undef) nounwind ; <i32> [#uses=0]
%26 = sub i32 %rowmark, undef ; <i32> [#uses=1]
- %27 = load %struct.FILE** @out_fp, align 4 ; <%struct.FILE*> [#uses=1]
- %28 = call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %27, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 undef, i32 %26) nounwind ; <i32> [#uses=0]
+ %27 = load %struct.FILE*, %struct.FILE** @out_fp, align 4 ; <%struct.FILE*> [#uses=1]
+ %28 = call i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* %27, i8* getelementptr ([17 x i8], [17 x i8]* @.str212784, i32 0, i32 0), i32 undef, i32 %26) nounwind ; <i32> [#uses=0]
store i32 0, i32* @cpexists, align 4
- %29 = call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([17 x i8]* @.str192782, i32 0, i32 0), double 2.000000e+01, double 2.000000e+01) nounwind ; <i32> [#uses=0]
- %30 = getelementptr %struct.rec* %y.0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0 ; <i32*> [#uses=1]
- %31 = load i32* %30, align 4 ; <i32> [#uses=1]
+ %29 = call i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* undef, i8* getelementptr ([17 x i8], [17 x i8]* @.str192782, i32 0, i32 0), double 2.000000e+01, double 2.000000e+01) nounwind ; <i32> [#uses=0]
+ %30 = getelementptr %struct.rec, %struct.rec* %y.0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0 ; <i32*> [#uses=1]
+ %31 = load i32, i32* %30, align 4 ; <i32> [#uses=1]
%32 = sub i32 0, %31 ; <i32> [#uses=1]
- %33 = load i32* undef, align 4 ; <i32> [#uses=1]
+ %33 = load i32, i32* undef, align 4 ; <i32> [#uses=1]
%34 = sub i32 0, %33 ; <i32> [#uses=1]
- %35 = load %struct.FILE** @out_fp, align 4 ; <%struct.FILE*> [#uses=1]
- %36 = call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %35, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 %32, i32 %34) nounwind ; <i32> [#uses=0]
+ %35 = load %struct.FILE*, %struct.FILE** @out_fp, align 4 ; <%struct.FILE*> [#uses=1]
+ %36 = call i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* %35, i8* getelementptr ([17 x i8], [17 x i8]* @.str212784, i32 0, i32 0), i32 %32, i32 %34) nounwind ; <i32> [#uses=0]
store i32 0, i32* @cpexists, align 4
- %37 = load %struct.rec** null, align 4 ; <%struct.rec*> [#uses=1]
- %38 = getelementptr %struct.rec* %37, i32 0, i32 0, i32 4 ; <%struct.FOURTH_UNION*> [#uses=1]
- %39 = call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([23 x i8]* @.str1852949, i32 0, i32 0), %struct.FOURTH_UNION* %38) nounwind ; <i32> [#uses=0]
- %buff14 = getelementptr [512 x i8]* %buff, i32 0, i32 0 ; <i8*> [#uses=5]
+ %37 = load %struct.rec*, %struct.rec** null, align 4 ; <%struct.rec*> [#uses=1]
+ %38 = getelementptr %struct.rec, %struct.rec* %37, i32 0, i32 0, i32 4 ; <%struct.FOURTH_UNION*> [#uses=1]
+ %39 = call i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* undef, i8* getelementptr ([23 x i8], [23 x i8]* @.str1852949, i32 0, i32 0), %struct.FOURTH_UNION* %38) nounwind ; <i32> [#uses=0]
+ %buff14 = getelementptr [512 x i8], [512 x i8]* %buff, i32 0, i32 0 ; <i8*> [#uses=5]
%40 = call i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind ; <i8*> [#uses=0]
%iftmp.506.0 = select i1 undef, i32 2, i32 0 ; <i32> [#uses=1]
- %41 = getelementptr [512 x i8]* %buff, i32 0, i32 26 ; <i8*> [#uses=1]
+ %41 = getelementptr [512 x i8], [512 x i8]* %buff, i32 0, i32 26 ; <i8*> [#uses=1]
br label %bb100.outer.outer
bb100.outer.outer: ; preds = %bb83, %bb10
%state.0.ph.ph = phi i32 [ %iftmp.506.0, %bb10 ], [ undef, %bb83 ] ; <i32> [#uses=1]
%x_addr.0.ph.ph = phi %struct.rec* [ %x, %bb10 ], [ %71, %bb83 ] ; <%struct.rec*> [#uses=1]
- %42 = getelementptr %struct.rec* %x_addr.0.ph.ph, i32 0, i32 0, i32 1, i32 0 ; <%struct.FILE_POS*> [#uses=0]
+ %42 = getelementptr %struct.rec, %struct.rec* %x_addr.0.ph.ph, i32 0, i32 0, i32 1, i32 0 ; <%struct.FILE_POS*> [#uses=0]
br label %bb100.outer
bb.i80: ; preds = %bb3.i85
@@ -197,19 +197,19 @@ bb.i80: ; preds = %bb3.i85
bb2.i84: ; preds = %bb100.outer, %bb.i80
%indvar.i81 = phi i32 [ %indvar.next.i79, %bb.i80 ], [ 0, %bb100.outer ] ; <i32> [#uses=3]
- %pp.0.i82 = getelementptr [27 x i8]* @.str141878, i32 0, i32 %indvar.i81 ; <i8*> [#uses=2]
- %sp.0.i83 = getelementptr [512 x i8]* %buff, i32 0, i32 %indvar.i81 ; <i8*> [#uses=1]
- %44 = load i8* %sp.0.i83, align 1 ; <i8> [#uses=2]
+ %pp.0.i82 = getelementptr [27 x i8], [27 x i8]* @.str141878, i32 0, i32 %indvar.i81 ; <i8*> [#uses=2]
+ %sp.0.i83 = getelementptr [512 x i8], [512 x i8]* %buff, i32 0, i32 %indvar.i81 ; <i8*> [#uses=1]
+ %44 = load i8, i8* %sp.0.i83, align 1 ; <i8> [#uses=2]
%45 = icmp eq i8 %44, 0 ; <i1> [#uses=1]
br i1 %45, label %StringBeginsWith.exit88thread-split, label %bb3.i85
bb3.i85: ; preds = %bb2.i84
- %46 = load i8* %pp.0.i82, align 1 ; <i8> [#uses=3]
+ %46 = load i8, i8* %pp.0.i82, align 1 ; <i8> [#uses=3]
%47 = icmp eq i8 %46, 0 ; <i1> [#uses=1]
br i1 %47, label %StringBeginsWith.exit88, label %bb.i80
StringBeginsWith.exit88thread-split: ; preds = %bb2.i84
- %.pr = load i8* %pp.0.i82 ; <i8> [#uses=1]
+ %.pr = load i8, i8* %pp.0.i82 ; <i8> [#uses=1]
br label %StringBeginsWith.exit88
StringBeginsWith.exit88: ; preds = %StringBeginsWith.exit88thread-split, %bb3.i85
@@ -224,17 +224,17 @@ bb2.i75: ; preds = %bb2.i.i68
br label %bb3.i77
bb3.i77: ; preds = %bb2.i75, %StringBeginsWith.exit88
- %sp.0.i76 = getelementptr [512 x i8]* %buff, i32 0, i32 undef ; <i8*> [#uses=1]
- %49 = load i8* %sp.0.i76, align 1 ; <i8> [#uses=1]
+ %sp.0.i76 = getelementptr [512 x i8], [512 x i8]* %buff, i32 0, i32 undef ; <i8*> [#uses=1]
+ %49 = load i8, i8* %sp.0.i76, align 1 ; <i8> [#uses=1]
%50 = icmp eq i8 %49, 0 ; <i1> [#uses=1]
br i1 %50, label %bb24, label %bb2.i.i68
bb24: ; preds = %bb3.i77
%51 = call %struct.rec* @MakeWord(i32 11, i8* %41, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind ; <%struct.rec*> [#uses=0]
- %52 = load i8* getelementptr ([150 x i8]* @zz_lengths, i32 0, i32 0), align 4 ; <i8> [#uses=1]
+ %52 = load i8, i8* getelementptr ([150 x i8], [150 x i8]* @zz_lengths, i32 0, i32 0), align 4 ; <i8> [#uses=1]
%53 = zext i8 %52 to i32 ; <i32> [#uses=2]
- %54 = getelementptr [524 x %struct.rec*]* @zz_free, i32 0, i32 %53 ; <%struct.rec**> [#uses=2]
- %55 = load %struct.rec** %54, align 4 ; <%struct.rec*> [#uses=3]
+ %54 = getelementptr [524 x %struct.rec*], [524 x %struct.rec*]* @zz_free, i32 0, i32 %53 ; <%struct.rec**> [#uses=2]
+ %55 = load %struct.rec*, %struct.rec** %54, align 4 ; <%struct.rec*> [#uses=3]
%56 = icmp eq %struct.rec* %55, null ; <i1> [#uses=1]
br i1 %56, label %bb27, label %bb28
@@ -245,7 +245,7 @@ bb.i56: ; preds = %bb27
br i1 undef, label %bb1.i58, label %bb2.i60
bb1.i58: ; preds = %bb.i56
- call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 31, i32 1, i8* getelementptr ([32 x i8]* @.str1575, i32 0, i32 0), i32 1, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind
+ call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...) @Error(i32 31, i32 1, i8* getelementptr ([32 x i8], [32 x i8]* @.str1575, i32 0, i32 0), i32 1, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind
br label %bb2.i60
bb2.i60: ; preds = %bb1.i58, %bb.i56
@@ -255,14 +255,14 @@ bb2.i60: ; preds = %bb1.i58, %bb.i56
GetMemory.exit62: ; preds = %bb2.i60, %bb27
%57 = phi i8** [ %.pre1.i59, %bb2.i60 ], [ undef, %bb27 ] ; <i8**> [#uses=1]
- %58 = getelementptr i8** %57, i32 %53 ; <i8**> [#uses=1]
+ %58 = getelementptr i8*, i8** %57, i32 %53 ; <i8**> [#uses=1]
store i8** %58, i8*** @next_free.4772, align 4
store %struct.rec* undef, %struct.rec** @zz_hold, align 4
br label %bb29
bb28: ; preds = %bb24
store %struct.rec* %55, %struct.rec** @zz_hold, align 4
- %59 = load %struct.rec** null, align 4 ; <%struct.rec*> [#uses=1]
+ %59 = load %struct.rec*, %struct.rec** null, align 4 ; <%struct.rec*> [#uses=1]
store %struct.rec* %59, %struct.rec** %54, align 4
br label %bb29
@@ -280,7 +280,7 @@ bb35: ; preds = %bb31, %bb29
br i1 undef, label %bb41, label %bb37
bb37: ; preds = %bb35
- %60 = load %struct.rec** null, align 4 ; <%struct.rec*> [#uses=1]
+ %60 = load %struct.rec*, %struct.rec** null, align 4 ; <%struct.rec*> [#uses=1]
store %struct.rec* %60, %struct.rec** undef
store %struct.rec* undef, %struct.rec** null
store %struct.rec* %.pre184, %struct.rec** null, align 4
@@ -297,12 +297,12 @@ bb.i47: ; preds = %bb3.i52
br i1 %63, label %bb2.i51, label %bb2.i41
bb2.i51: ; preds = %bb.i47, %bb2.i.i68, %StringBeginsWith.exit88, %bb.i80
- %pp.0.i49 = getelementptr [17 x i8]* @.str1872951, i32 0, i32 0 ; <i8*> [#uses=1]
- %64 = load i8* null, align 1 ; <i8> [#uses=1]
+ %pp.0.i49 = getelementptr [17 x i8], [17 x i8]* @.str1872951, i32 0, i32 0 ; <i8*> [#uses=1]
+ %64 = load i8, i8* null, align 1 ; <i8> [#uses=1]
br i1 false, label %StringBeginsWith.exit55thread-split, label %bb3.i52
bb3.i52: ; preds = %bb2.i51
- %65 = load i8* %pp.0.i49, align 1 ; <i8> [#uses=1]
+ %65 = load i8, i8* %pp.0.i49, align 1 ; <i8> [#uses=1]
br i1 false, label %StringBeginsWith.exit55, label %bb.i47
StringBeginsWith.exit55thread-split: ; preds = %bb2.i51
@@ -318,11 +318,11 @@ bb2.i41: ; preds = %bb2.i41, %bb49, %StringBeginsWith.exit55, %bb.i47
br i1 false, label %bb2.i41, label %bb2.i.i15
bb2.i.i15: ; preds = %bb2.i41
- %pp.0.i.i13 = getelementptr [6 x i8]* @.str742838, i32 0, i32 0 ; <i8*> [#uses=1]
+ %pp.0.i.i13 = getelementptr [6 x i8], [6 x i8]* @.str742838, i32 0, i32 0 ; <i8*> [#uses=1]
br i1 false, label %StringBeginsWith.exitthread-split.i18, label %bb3.i.i16
bb3.i.i16: ; preds = %bb2.i.i15
- %66 = load i8* %pp.0.i.i13, align 1 ; <i8> [#uses=1]
+ %66 = load i8, i8* %pp.0.i.i13, align 1 ; <i8> [#uses=1]
br label %StringBeginsWith.exit.i20
StringBeginsWith.exitthread-split.i18: ; preds = %bb2.i.i15
@@ -335,9 +335,9 @@ StringBeginsWith.exit.i20: ; preds = %StringBeginsWith.exitthread-split.i18, %b
bb2.i6.i26: ; preds = %bb2.i6.i26, %StringBeginsWith.exit.i20
%indvar.i3.i23 = phi i32 [ %indvar.next.i1.i21, %bb2.i6.i26 ], [ 0, %StringBeginsWith.exit.i20 ] ; <i32> [#uses=3]
- %sp.0.i5.i25 = getelementptr [512 x i8]* %buff, i32 0, i32 %indvar.i3.i23 ; <i8*> [#uses=0]
- %pp.0.i4.i24 = getelementptr [10 x i8]* @.str752839, i32 0, i32 %indvar.i3.i23 ; <i8*> [#uses=1]
- %68 = load i8* %pp.0.i4.i24, align 1 ; <i8> [#uses=0]
+ %sp.0.i5.i25 = getelementptr [512 x i8], [512 x i8]* %buff, i32 0, i32 %indvar.i3.i23 ; <i8*> [#uses=0]
+ %pp.0.i4.i24 = getelementptr [10 x i8], [10 x i8]* @.str752839, i32 0, i32 %indvar.i3.i23 ; <i8*> [#uses=1]
+ %68 = load i8, i8* %pp.0.i4.i24, align 1 ; <i8> [#uses=0]
%indvar.next.i1.i21 = add i32 %indvar.i3.i23, 1 ; <i32> [#uses=1]
br i1 undef, label %bb2.i6.i26, label %bb55
@@ -368,10 +368,10 @@ StringBeginsWith.exit: ; preds = %StringBeginsWith.exitthread-split, %bb3.i
bb66: ; preds = %StringBeginsWith.exit
%71 = call %struct.rec* @MakeWord(i32 11, i8* undef, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind ; <%struct.rec*> [#uses=4]
- %72 = load i8* getelementptr ([150 x i8]* @zz_lengths, i32 0, i32 0), align 4 ; <i8> [#uses=1]
+ %72 = load i8, i8* getelementptr ([150 x i8], [150 x i8]* @zz_lengths, i32 0, i32 0), align 4 ; <i8> [#uses=1]
%73 = zext i8 %72 to i32 ; <i32> [#uses=2]
- %74 = getelementptr [524 x %struct.rec*]* @zz_free, i32 0, i32 %73 ; <%struct.rec**> [#uses=2]
- %75 = load %struct.rec** %74, align 4 ; <%struct.rec*> [#uses=3]
+ %74 = getelementptr [524 x %struct.rec*], [524 x %struct.rec*]* @zz_free, i32 0, i32 %73 ; <%struct.rec**> [#uses=2]
+ %75 = load %struct.rec*, %struct.rec** %74, align 4 ; <%struct.rec*> [#uses=3]
%76 = icmp eq %struct.rec* %75, null ; <i1> [#uses=1]
br i1 %76, label %bb69, label %bb70
@@ -385,43 +385,43 @@ bb.i2: ; preds = %bb69
br i1 undef, label %bb1.i3, label %bb2.i4
bb1.i3: ; preds = %bb.i2
- call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 31, i32 1, i8* getelementptr ([32 x i8]* @.str1575, i32 0, i32 0), i32 1, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind
+ call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...) @Error(i32 31, i32 1, i8* getelementptr ([32 x i8], [32 x i8]* @.str1575, i32 0, i32 0), i32 1, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind
br label %bb2.i4
bb2.i4: ; preds = %bb1.i3, %bb.i2
%.pre1.i = phi i8** [ undef, %bb1.i3 ], [ %78, %bb.i2 ] ; <i8**> [#uses=1]
%79 = phi i8** [ undef, %bb1.i3 ], [ %78, %bb.i2 ] ; <i8**> [#uses=1]
- %80 = getelementptr i8** %79, i32 1020 ; <i8**> [#uses=1]
+ %80 = getelementptr i8*, i8** %79, i32 1020 ; <i8**> [#uses=1]
store i8** %80, i8*** @top_free.4773, align 4
br label %GetMemory.exit
GetMemory.exit: ; preds = %bb2.i4, %bb69
%81 = phi i8** [ %.pre1.i, %bb2.i4 ], [ undef, %bb69 ] ; <i8**> [#uses=2]
%82 = bitcast i8** %81 to %struct.rec* ; <%struct.rec*> [#uses=3]
- %83 = getelementptr i8** %81, i32 %73 ; <i8**> [#uses=1]
+ %83 = getelementptr i8*, i8** %81, i32 %73 ; <i8**> [#uses=1]
store i8** %83, i8*** @next_free.4772, align 4
store %struct.rec* %82, %struct.rec** @zz_hold, align 4
br label %bb71
bb70: ; preds = %bb66
- %84 = load %struct.rec** null, align 4 ; <%struct.rec*> [#uses=1]
+ %84 = load %struct.rec*, %struct.rec** null, align 4 ; <%struct.rec*> [#uses=1]
store %struct.rec* %84, %struct.rec** %74, align 4
br label %bb71
bb71: ; preds = %bb70, %GetMemory.exit
%.pre185 = phi %struct.rec* [ %75, %bb70 ], [ %82, %GetMemory.exit ] ; <%struct.rec*> [#uses=8]
%85 = phi %struct.rec* [ %75, %bb70 ], [ %82, %GetMemory.exit ] ; <%struct.rec*> [#uses=1]
- %86 = getelementptr %struct.rec* %85, i32 0, i32 0, i32 1, i32 0, i32 0 ; <i8*> [#uses=0]
- %87 = getelementptr %struct.rec* %.pre185, i32 0, i32 0, i32 0, i32 1, i32 1 ; <%struct.rec**> [#uses=0]
- %88 = getelementptr %struct.rec* %.pre185, i32 0, i32 0, i32 0, i32 1, i32 0 ; <%struct.rec**> [#uses=1]
+ %86 = getelementptr %struct.rec, %struct.rec* %85, i32 0, i32 0, i32 1, i32 0, i32 0 ; <i8*> [#uses=0]
+ %87 = getelementptr %struct.rec, %struct.rec* %.pre185, i32 0, i32 0, i32 0, i32 1, i32 1 ; <%struct.rec**> [#uses=0]
+ %88 = getelementptr %struct.rec, %struct.rec* %.pre185, i32 0, i32 0, i32 0, i32 1, i32 0 ; <%struct.rec**> [#uses=1]
store %struct.rec* %.pre185, %struct.rec** @xx_link, align 4
store %struct.rec* %.pre185, %struct.rec** @zz_res, align 4
- %89 = load %struct.rec** @needs, align 4 ; <%struct.rec*> [#uses=2]
+ %89 = load %struct.rec*, %struct.rec** @needs, align 4 ; <%struct.rec*> [#uses=2]
store %struct.rec* %89, %struct.rec** @zz_hold, align 4
br i1 false, label %bb77, label %bb73
bb73: ; preds = %bb71
- %90 = getelementptr %struct.rec* %89, i32 0, i32 0, i32 0, i32 0, i32 0 ; <%struct.rec**> [#uses=1]
+ %90 = getelementptr %struct.rec, %struct.rec* %89, i32 0, i32 0, i32 0, i32 0, i32 0 ; <%struct.rec**> [#uses=1]
store %struct.rec* null, %struct.rec** @zz_tmp, align 4
store %struct.rec* %.pre185, %struct.rec** %90
store %struct.rec* %.pre185, %struct.rec** undef, align 4
@@ -433,11 +433,11 @@ bb77: ; preds = %bb73, %bb71
br i1 undef, label %bb83, label %bb79
bb79: ; preds = %bb77
- %91 = getelementptr %struct.rec* %71, i32 0, i32 0, i32 0, i32 1, i32 0 ; <%struct.rec**> [#uses=1]
+ %91 = getelementptr %struct.rec, %struct.rec* %71, i32 0, i32 0, i32 0, i32 1, i32 0 ; <%struct.rec**> [#uses=1]
store %struct.rec* null, %struct.rec** @zz_tmp, align 4
- %92 = load %struct.rec** %88, align 4 ; <%struct.rec*> [#uses=1]
+ %92 = load %struct.rec*, %struct.rec** %88, align 4 ; <%struct.rec*> [#uses=1]
store %struct.rec* %92, %struct.rec** %91
- %93 = getelementptr %struct.rec* undef, i32 0, i32 0, i32 0, i32 1, i32 1 ; <%struct.rec**> [#uses=1]
+ %93 = getelementptr %struct.rec, %struct.rec* undef, i32 0, i32 0, i32 0, i32 1, i32 1 ; <%struct.rec**> [#uses=1]
store %struct.rec* %71, %struct.rec** %93, align 4
store %struct.rec* %.pre185, %struct.rec** undef, align 4
br label %bb83
@@ -467,11 +467,11 @@ bb2.i6.i: ; preds = %bb.i2.i, %StringBeginsWith.exit.i, %bb.i.i
br i1 undef, label %strip_out.exitthread-split, label %bb3.i7.i
bb3.i7.i: ; preds = %bb2.i6.i
- %94 = load i8* undef, align 1 ; <i8> [#uses=1]
+ %94 = load i8, i8* undef, align 1 ; <i8> [#uses=1]
br i1 undef, label %strip_out.exit, label %bb.i2.i
strip_out.exitthread-split: ; preds = %bb2.i6.i
- %.pr100 = load i8* undef ; <i8> [#uses=1]
+ %.pr100 = load i8, i8* undef ; <i8> [#uses=1]
br label %strip_out.exit
strip_out.exit: ; preds = %strip_out.exitthread-split, %bb3.i7.i
@@ -497,12 +497,12 @@ bb101.split: ; preds = %bb100.outer
br i1 %97, label %bb103, label %bb102
bb102: ; preds = %bb101.split
- %98 = call i32 @remove(i8* getelementptr ([9 x i8]* @.str19294, i32 0, i32 0)) nounwind ; <i32> [#uses=0]
+ %98 = call i32 @remove(i8* getelementptr ([9 x i8], [9 x i8]* @.str19294, i32 0, i32 0)) nounwind ; <i32> [#uses=0]
unreachable
bb103: ; preds = %bb101.split
- %99 = load %struct.FILE** @out_fp, align 4 ; <%struct.FILE*> [#uses=1]
- %100 = call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %99, i8* getelementptr ([26 x i8]* @.str1932957, i32 0, i32 0)) nounwind ; <i32> [#uses=0]
+ %99 = load %struct.FILE*, %struct.FILE** @out_fp, align 4 ; <%struct.FILE*> [#uses=1]
+ %100 = call i32 (%struct.FILE*, i8*, ...) @fprintf(%struct.FILE* %99, i8* getelementptr ([26 x i8], [26 x i8]* @.str1932957, i32 0, i32 0)) nounwind ; <i32> [#uses=0]
store i32 0, i32* @wordcount, align 4
ret void
}
diff --git a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll
index ad32dc9d0a07..66ed876f98e3 100644
--- a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll
+++ b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll
@@ -17,7 +17,7 @@ bb5: ; preds = %bb5, %bb3.preheader
br i1 undef, label %bb11, label %bb5
bb11: ; preds = %bb5
- %0 = load i32* undef, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* undef, align 4 ; <i32> [#uses=1]
%1 = xor i32 %0, 123459876 ; <i32> [#uses=1]
%2 = sdiv i32 %1, 127773 ; <i32> [#uses=1]
%3 = mul i32 %2, 2836 ; <i32> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
index 59c236732118..a451321838e7 100644
--- a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
@@ -7,8 +7,8 @@ entry:
; CHECK-LABEL: __gcov_execlp:
; CHECK: sub sp, #8
; CHECK: push
-; CHECK: add r7, sp, #4
-; CHECK: sub.w r4, r7, #4
+; CHECK: add r7, sp, #8
+; CHECK: sub.w r4, r7, #8
; CHECK: mov sp, r4
; CHECK-NOT: mov sp, r7
; CHECK: add sp, #8
diff --git a/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll b/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll
index f3baeb74e2cb..5480868d7a66 100644
--- a/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll
@@ -33,12 +33,12 @@ entry:
br label %bb
bb: ; preds = %bb, %entry
- %0 = load float* undef, align 4 ; <float> [#uses=1]
+ %0 = load float, float* undef, align 4 ; <float> [#uses=1]
%1 = fmul float undef, %0 ; <float> [#uses=2]
%tmp73 = add i32 0, 224 ; <i32> [#uses=1]
- %scevgep74 = getelementptr i8* null, i32 %tmp73 ; <i8*> [#uses=1]
+ %scevgep74 = getelementptr i8, i8* null, i32 %tmp73 ; <i8*> [#uses=1]
%scevgep7475 = bitcast i8* %scevgep74 to float* ; <float*> [#uses=1]
- %2 = load float* null, align 4 ; <float> [#uses=1]
+ %2 = load float, float* null, align 4 ; <float> [#uses=1]
%3 = fmul float 0.000000e+00, %2 ; <float> [#uses=2]
%4 = fadd float %1, %3 ; <float> [#uses=1]
%5 = fsub float %1, %3 ; <float> [#uses=2]
@@ -51,7 +51,7 @@ bb: ; preds = %bb, %entry
%12 = sitofp i16 undef to float ; <float> [#uses=1]
%13 = fmul float %12, 0.000000e+00 ; <float> [#uses=2]
%14 = sitofp i16 undef to float ; <float> [#uses=1]
- %15 = load float* %scevgep7475, align 4 ; <float> [#uses=1]
+ %15 = load float, float* %scevgep7475, align 4 ; <float> [#uses=1]
%16 = fmul float %14, %15 ; <float> [#uses=2]
%17 = fadd float undef, undef ; <float> [#uses=2]
%18 = fadd float %13, %16 ; <float> [#uses=2]
diff --git a/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll b/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll
index 974ce50d6d44..2bbed1beae15 100644
--- a/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll
@@ -3,11 +3,11 @@
define float @t1(i32 %v0) nounwind {
entry:
store i32 undef, i32* undef, align 4
- %0 = load [4 x i8]** undef, align 4 ; <[4 x i8]*> [#uses=1]
- %1 = load i8* undef, align 1 ; <i8> [#uses=1]
+ %0 = load [4 x i8]*, [4 x i8]** undef, align 4 ; <[4 x i8]*> [#uses=1]
+ %1 = load i8, i8* undef, align 1 ; <i8> [#uses=1]
%2 = zext i8 %1 to i32 ; <i32> [#uses=1]
- %3 = getelementptr [4 x i8]* %0, i32 %v0, i32 0 ; <i8*> [#uses=1]
- %4 = load i8* %3, align 1 ; <i8> [#uses=1]
+ %3 = getelementptr [4 x i8], [4 x i8]* %0, i32 %v0, i32 0 ; <i8*> [#uses=1]
+ %4 = load i8, i8* %3, align 1 ; <i8> [#uses=1]
%5 = zext i8 %4 to i32 ; <i32> [#uses=1]
%6 = sub i32 %5, %2 ; <i32> [#uses=1]
%7 = sitofp i32 %6 to float ; <float> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll b/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll
index 5cfc68d09408..04dcb9d03ad5 100644
--- a/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll
+++ b/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll
@@ -18,9 +18,9 @@ declare i32 @printf(i8* nocapture, ...) nounwind
define i32 @main() nounwind {
entry:
- %0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) nounwind ; <i32> [#uses=0]
- %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 -128, i32 116, i32 116, i32 -3852, i32 -31232, i32 -1708916736) nounwind ; <i32> [#uses=0]
- %2 = tail call i32 (i32, ...)* @getUnknown(i32 undef, i32 116, i32 116, i32 -3852, i32 -31232, i32 30556, i32 -1708916736) nounwind ; <i32> [#uses=1]
- %3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str2, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
+ %0 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([31 x i8], [31 x i8]* @.str1, i32 0, i32 0), i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) nounwind ; <i32> [#uses=0]
+ %1 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([31 x i8], [31 x i8]* @.str1, i32 0, i32 0), i32 -128, i32 116, i32 116, i32 -3852, i32 -31232, i32 -1708916736) nounwind ; <i32> [#uses=0]
+ %2 = tail call i32 (i32, ...) @getUnknown(i32 undef, i32 116, i32 116, i32 -3852, i32 -31232, i32 30556, i32 -1708916736) nounwind ; <i32> [#uses=1]
+ %3 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @.str2, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll b/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll
index 06a152d56e4d..84f69f4b6e0b 100644
--- a/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll
+++ b/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll
@@ -37,25 +37,25 @@ declare i32 @getchar() nounwind
define internal i32 @transpose() nounwind readonly {
; CHECK: push
entry:
- %0 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 1), align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* getelementptr inbounds ([128 x i32], [128 x i32]* @columns, i32 0, i32 1), align 4 ; <i32> [#uses=1]
%1 = shl i32 %0, 7 ; <i32> [#uses=1]
- %2 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 2), align 4 ; <i32> [#uses=1]
+ %2 = load i32, i32* getelementptr inbounds ([128 x i32], [128 x i32]* @columns, i32 0, i32 2), align 4 ; <i32> [#uses=1]
%3 = or i32 %1, %2 ; <i32> [#uses=1]
%4 = shl i32 %3, 7 ; <i32> [#uses=1]
- %5 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 3), align 4 ; <i32> [#uses=1]
+ %5 = load i32, i32* getelementptr inbounds ([128 x i32], [128 x i32]* @columns, i32 0, i32 3), align 4 ; <i32> [#uses=1]
%6 = or i32 %4, %5 ; <i32> [#uses=3]
- %7 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 7), align 4 ; <i32> [#uses=1]
+ %7 = load i32, i32* getelementptr inbounds ([128 x i32], [128 x i32]* @columns, i32 0, i32 7), align 4 ; <i32> [#uses=1]
%8 = shl i32 %7, 7 ; <i32> [#uses=1]
- %9 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 6), align 4 ; <i32> [#uses=1]
+ %9 = load i32, i32* getelementptr inbounds ([128 x i32], [128 x i32]* @columns, i32 0, i32 6), align 4 ; <i32> [#uses=1]
%10 = or i32 %8, %9 ; <i32> [#uses=1]
%11 = shl i32 %10, 7 ; <i32> [#uses=1]
- %12 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 5), align 4 ; <i32> [#uses=1]
+ %12 = load i32, i32* getelementptr inbounds ([128 x i32], [128 x i32]* @columns, i32 0, i32 5), align 4 ; <i32> [#uses=1]
%13 = or i32 %11, %12 ; <i32> [#uses=3]
%14 = icmp ugt i32 %6, %13 ; <i1> [#uses=2]
%.pn2.in.i = select i1 %14, i32 %6, i32 %13 ; <i32> [#uses=1]
%.pn1.in.i = select i1 %14, i32 %13, i32 %6 ; <i32> [#uses=1]
%.pn2.i = shl i32 %.pn2.in.i, 7 ; <i32> [#uses=1]
- %.pn3.i = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 4) ; <i32> [#uses=1]
+ %.pn3.i = load i32, i32* getelementptr inbounds ([128 x i32], [128 x i32]* @columns, i32 0, i32 4) ; <i32> [#uses=1]
%.pn.in.in.i = or i32 %.pn2.i, %.pn3.i ; <i32> [#uses=1]
%.pn.in.i = zext i32 %.pn.in.in.i to i64 ; <i64> [#uses=1]
%.pn.i = shl i64 %.pn.in.i, 21 ; <i64> [#uses=1]
@@ -67,19 +67,19 @@ entry:
%18 = trunc i64 %17 to i32 ; <i32> [#uses=1]
%19 = urem i32 %16, 179 ; <i32> [#uses=1]
%20 = or i32 %19, 131072 ; <i32> [#uses=1]
- %21 = load i32** @ht, align 4 ; <i32*> [#uses=1]
+ %21 = load i32*, i32** @ht, align 4 ; <i32*> [#uses=1]
br label %bb5
bb: ; preds = %bb5
- %22 = getelementptr inbounds i32* %21, i32 %x.0 ; <i32*> [#uses=1]
- %23 = load i32* %22, align 4 ; <i32> [#uses=1]
+ %22 = getelementptr inbounds i32, i32* %21, i32 %x.0 ; <i32*> [#uses=1]
+ %23 = load i32, i32* %22, align 4 ; <i32> [#uses=1]
%24 = icmp eq i32 %23, %16 ; <i1> [#uses=1]
br i1 %24, label %bb1, label %bb2
bb1: ; preds = %bb
- %25 = load i8** @he, align 4 ; <i8*> [#uses=1]
- %26 = getelementptr inbounds i8* %25, i32 %x.0 ; <i8*> [#uses=1]
- %27 = load i8* %26, align 1 ; <i8> [#uses=1]
+ %25 = load i8*, i8** @he, align 4 ; <i8*> [#uses=1]
+ %26 = getelementptr inbounds i8, i8* %25, i32 %x.0 ; <i8*> [#uses=1]
+ %27 = load i8, i8* %26, align 1 ; <i8> [#uses=1]
%28 = sext i8 %27 to i32 ; <i32> [#uses=1]
ret i32 %28
diff --git a/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll b/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
index a9a2478e4034..8fdff02f9c1b 100644
--- a/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
+++ b/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
@@ -25,8 +25,8 @@ if.then366: ; preds = %lor.end, %lor.end
unreachable
if.end371: ; preds = %lor.end
- %arrayidx56.2.i = getelementptr [4 x %struct.pix_pos]* %pix_a.i294, i32 0, i32 2 ; <%struct.pix_pos*> [#uses=1]
- %arrayidx56.3.i = getelementptr [4 x %struct.pix_pos]* %pix_a.i294, i32 0, i32 3 ; <%struct.pix_pos*> [#uses=1]
+ %arrayidx56.2.i = getelementptr [4 x %struct.pix_pos], [4 x %struct.pix_pos]* %pix_a.i294, i32 0, i32 2 ; <%struct.pix_pos*> [#uses=1]
+ %arrayidx56.3.i = getelementptr [4 x %struct.pix_pos], [4 x %struct.pix_pos]* %pix_a.i294, i32 0, i32 3 ; <%struct.pix_pos*> [#uses=1]
br i1 undef, label %for.body1857, label %for.end4557
for.body1857: ; preds = %if.end371
@@ -44,13 +44,13 @@ for.body1940: ; preds = %for.cond1933
br i1 undef, label %if.then1992, label %if.else2003
if.then1992: ; preds = %for.body1940
- %tmp14.i302 = load i32* undef ; <i32> [#uses=4]
+ %tmp14.i302 = load i32, i32* undef ; <i32> [#uses=4]
%add.i307452 = or i32 %shl1959, 1 ; <i32> [#uses=1]
%sub.i308 = add i32 %shl, -1 ; <i32> [#uses=4]
call void undef(i32 %tmp14.i302, i32 %sub.i308, i32 %shl1959, i32 0, %struct.pix_pos* undef) nounwind
- %tmp49.i309 = load void (i32, i32, i32, i32, %struct.pix_pos*)** @getNeighbour ; <void (i32, i32, i32, i32, %struct.pix_pos*)*> [#uses=1]
+ %tmp49.i309 = load void (i32, i32, i32, i32, %struct.pix_pos*)*, void (i32, i32, i32, i32, %struct.pix_pos*)** @getNeighbour ; <void (i32, i32, i32, i32, %struct.pix_pos*)*> [#uses=1]
call void %tmp49.i309(i32 %tmp14.i302, i32 %sub.i308, i32 %add.i307452, i32 0, %struct.pix_pos* null) nounwind
- %tmp49.1.i = load void (i32, i32, i32, i32, %struct.pix_pos*)** @getNeighbour ; <void (i32, i32, i32, i32, %struct.pix_pos*)*> [#uses=1]
+ %tmp49.1.i = load void (i32, i32, i32, i32, %struct.pix_pos*)*, void (i32, i32, i32, i32, %struct.pix_pos*)** @getNeighbour ; <void (i32, i32, i32, i32, %struct.pix_pos*)*> [#uses=1]
call void %tmp49.1.i(i32 %tmp14.i302, i32 %sub.i308, i32 undef, i32 0, %struct.pix_pos* %arrayidx56.2.i) nounwind
call void undef(i32 %tmp14.i302, i32 %sub.i308, i32 undef, i32 0, %struct.pix_pos* %arrayidx56.3.i) nounwind
unreachable
@@ -101,12 +101,12 @@ for.inc3040: ; preds = %for.inc3040, %for.c
if.then3689: ; preds = %for.cond2882.preheader
%add3695 = add nsw i32 %mul3693, %shl1959 ; <i32> [#uses=1]
%mul3697 = shl i32 %add3695, 2 ; <i32> [#uses=2]
- %arrayidx3705 = getelementptr inbounds i16* undef, i32 1 ; <i16*> [#uses=1]
- %tmp3706 = load i16* %arrayidx3705 ; <i16> [#uses=1]
+ %arrayidx3705 = getelementptr inbounds i16, i16* undef, i32 1 ; <i16*> [#uses=1]
+ %tmp3706 = load i16, i16* %arrayidx3705 ; <i16> [#uses=1]
%conv3707 = sext i16 %tmp3706 to i32 ; <i32> [#uses=1]
%add3708 = add nsw i32 %conv3707, %mul3697 ; <i32> [#uses=1]
- %arrayidx3724 = getelementptr inbounds i16* null, i32 1 ; <i16*> [#uses=1]
- %tmp3725 = load i16* %arrayidx3724 ; <i16> [#uses=1]
+ %arrayidx3724 = getelementptr inbounds i16, i16* null, i32 1 ; <i16*> [#uses=1]
+ %tmp3725 = load i16, i16* %arrayidx3724 ; <i16> [#uses=1]
%conv3726 = sext i16 %tmp3725 to i32 ; <i32> [#uses=1]
%add3727 = add nsw i32 %conv3726, %mul3697 ; <i32> [#uses=1]
br label %if.end3770
@@ -115,11 +115,11 @@ if.else3728: ; preds = %for.cond2882.prehea
%mul3733 = add i32 %shl1959, 1073741816 ; <i32> [#uses=1]
%add3735 = add nsw i32 %mul3733, %mul3693 ; <i32> [#uses=1]
%mul3737 = shl i32 %add3735, 2 ; <i32> [#uses=2]
- %tmp3746 = load i16* undef ; <i16> [#uses=1]
+ %tmp3746 = load i16, i16* undef ; <i16> [#uses=1]
%conv3747 = sext i16 %tmp3746 to i32 ; <i32> [#uses=1]
%add3748 = add nsw i32 %conv3747, %mul3737 ; <i32> [#uses=1]
- %arrayidx3765 = getelementptr inbounds i16* null, i32 1 ; <i16*> [#uses=1]
- %tmp3766 = load i16* %arrayidx3765 ; <i16> [#uses=1]
+ %arrayidx3765 = getelementptr inbounds i16, i16* null, i32 1 ; <i16*> [#uses=1]
+ %tmp3766 = load i16, i16* %arrayidx3765 ; <i16> [#uses=1]
%conv3767 = sext i16 %tmp3766 to i32 ; <i32> [#uses=1]
%add3768 = add nsw i32 %conv3767, %mul3737 ; <i32> [#uses=1]
br label %if.end3770
diff --git a/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll b/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll
index 956263b4fe2d..e283cb3434cc 100644
--- a/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll
+++ b/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll
@@ -7,7 +7,7 @@ declare void @Perl_mg_set(%struct.SV*) nounwind
define %struct.OP* @Perl_pp_complement() nounwind {
entry:
- %0 = load %struct.SV** null, align 4 ; <%struct.SV*> [#uses=2]
+ %0 = load %struct.SV*, %struct.SV** null, align 4 ; <%struct.SV*> [#uses=2]
br i1 undef, label %bb21, label %bb5
bb5: ; preds = %entry
@@ -17,15 +17,15 @@ bb6: ; preds = %bb5
br i1 undef, label %bb8, label %bb7
bb7: ; preds = %bb6
- %1 = getelementptr inbounds %struct.SV* %0, i32 0, i32 0 ; <i8**> [#uses=1]
- %2 = load i8** %1, align 4 ; <i8*> [#uses=1]
- %3 = getelementptr inbounds i8* %2, i32 12 ; <i8*> [#uses=1]
+ %1 = getelementptr inbounds %struct.SV, %struct.SV* %0, i32 0, i32 0 ; <i8**> [#uses=1]
+ %2 = load i8*, i8** %1, align 4 ; <i8*> [#uses=1]
+ %3 = getelementptr inbounds i8, i8* %2, i32 12 ; <i8*> [#uses=1]
%4 = bitcast i8* %3 to i32* ; <i32*> [#uses=1]
- %5 = load i32* %4, align 4 ; <i32> [#uses=1]
+ %5 = load i32, i32* %4, align 4 ; <i32> [#uses=1]
%storemerge5 = xor i32 %5, -1 ; <i32> [#uses=1]
call void @Perl_sv_setiv(%struct.SV* undef, i32 %storemerge5) nounwind
- %6 = getelementptr inbounds %struct.SV* undef, i32 0, i32 2 ; <i32*> [#uses=1]
- %7 = load i32* %6, align 4 ; <i32> [#uses=1]
+ %6 = getelementptr inbounds %struct.SV, %struct.SV* undef, i32 0, i32 2 ; <i32*> [#uses=1]
+ %7 = load i32, i32* %6, align 4 ; <i32> [#uses=1]
%8 = and i32 %7, 16384 ; <i32> [#uses=1]
%9 = icmp eq i32 %8, 0 ; <i1> [#uses=1]
br i1 %9, label %bb12, label %bb11
@@ -53,8 +53,8 @@ bb1.i: ; preds = %bb13
br label %Perl_sv_setuv.exit
Perl_sv_setuv.exit: ; preds = %bb1.i, %bb.i
- %11 = getelementptr inbounds %struct.SV* undef, i32 0, i32 2 ; <i32*> [#uses=1]
- %12 = load i32* %11, align 4 ; <i32> [#uses=1]
+ %11 = getelementptr inbounds %struct.SV, %struct.SV* undef, i32 0, i32 2 ; <i32*> [#uses=1]
+ %12 = load i32, i32* %11, align 4 ; <i32> [#uses=1]
%13 = and i32 %12, 16384 ; <i32> [#uses=1]
%14 = icmp eq i32 %13, 0 ; <i1> [#uses=1]
br i1 %14, label %bb20, label %bb19
diff --git a/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll b/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
index 89b7148f5ecd..e59e84d49ecf 100644
--- a/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
+++ b/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
@@ -1,6 +1,8 @@
; RUN: opt < %s -O3 | \
; RUN: llc -mtriple=thumbv7-apple-darwin10 -mattr=+neon | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+
define void @fred(i32 %three_by_three, i8* %in, double %dt1, i32 %x_size, i32 %y_size, i8* %bp) nounwind {
entry:
; -- The loop following the load should only use a single add-literation
@@ -43,16 +45,16 @@ entry:
store i32 %x_size, i32* %x_size_addr
store i32 %y_size, i32* %y_size_addr
store i8* %bp, i8** %bp_addr
- %0 = load i8** %in_addr, align 4 ; <i8*> [#uses=1]
+ %0 = load i8*, i8** %in_addr, align 4 ; <i8*> [#uses=1]
store i8* %0, i8** %out, align 4
- %1 = call i32 (...)* @foo() nounwind ; <i32> [#uses=1]
+ %1 = call i32 (...) @foo() nounwind ; <i32> [#uses=1]
store i32 %1, i32* %i, align 4
- %2 = load i32* %three_by_three_addr, align 4 ; <i32> [#uses=1]
+ %2 = load i32, i32* %three_by_three_addr, align 4 ; <i32> [#uses=1]
%3 = icmp eq i32 %2, 0 ; <i1> [#uses=1]
br i1 %3, label %bb, label %bb2
bb: ; preds = %entry
- %4 = load float* %dt_addr, align 4 ; <float> [#uses=1]
+ %4 = load float, float* %dt_addr, align 4 ; <float> [#uses=1]
%5 = fpext float %4 to double ; <double> [#uses=1]
%6 = fmul double %5, 1.500000e+00 ; <double> [#uses=1]
%7 = fptosi double %6 to i32 ; <i32> [#uses=1]
@@ -65,54 +67,54 @@ bb2: ; preds = %entry
br label %bb3
bb3: ; preds = %bb2, %bb
- %9 = load i32* %mask_size, align 4 ; <i32> [#uses=1]
+ %9 = load i32, i32* %mask_size, align 4 ; <i32> [#uses=1]
%10 = mul i32 %9, 2 ; <i32> [#uses=1]
%11 = add nsw i32 %10, 1 ; <i32> [#uses=1]
store i32 %11, i32* %n_max, align 4
- %12 = load i32* %x_size_addr, align 4 ; <i32> [#uses=1]
- %13 = load i32* %n_max, align 4 ; <i32> [#uses=1]
+ %12 = load i32, i32* %x_size_addr, align 4 ; <i32> [#uses=1]
+ %13 = load i32, i32* %n_max, align 4 ; <i32> [#uses=1]
%14 = sub i32 %12, %13 ; <i32> [#uses=1]
store i32 %14, i32* %increment, align 4
- %15 = load i32* %n_max, align 4 ; <i32> [#uses=1]
- %16 = load i32* %n_max, align 4 ; <i32> [#uses=1]
+ %15 = load i32, i32* %n_max, align 4 ; <i32> [#uses=1]
+ %16 = load i32, i32* %n_max, align 4 ; <i32> [#uses=1]
%17 = mul i32 %15, %16 ; <i32> [#uses=1]
%18 = call noalias i8* @malloc(i32 %17) nounwind ; <i8*> [#uses=1]
store i8* %18, i8** %dp, align 4
- %19 = load i8** %dp, align 4 ; <i8*> [#uses=1]
+ %19 = load i8*, i8** %dp, align 4 ; <i8*> [#uses=1]
store i8* %19, i8** %dpt, align 4
- %20 = load float* %dt_addr, align 4 ; <float> [#uses=1]
- %21 = load float* %dt_addr, align 4 ; <float> [#uses=1]
+ %20 = load float, float* %dt_addr, align 4 ; <float> [#uses=1]
+ %21 = load float, float* %dt_addr, align 4 ; <float> [#uses=1]
%22 = fmul float %20, %21 ; <float> [#uses=1]
%23 = fsub float -0.000000e+00, %22 ; <float> [#uses=1]
store float %23, float* %temp, align 4
- %24 = load i32* %mask_size, align 4 ; <i32> [#uses=1]
+ %24 = load i32, i32* %mask_size, align 4 ; <i32> [#uses=1]
%25 = sub i32 0, %24 ; <i32> [#uses=1]
store i32 %25, i32* %j, align 4
br label %bb5
bb4: ; preds = %bb5
- %26 = load i32* %j, align 4 ; <i32> [#uses=1]
- %27 = load i32* %j, align 4 ; <i32> [#uses=1]
+ %26 = load i32, i32* %j, align 4 ; <i32> [#uses=1]
+ %27 = load i32, i32* %j, align 4 ; <i32> [#uses=1]
%28 = mul i32 %26, %27 ; <i32> [#uses=1]
%29 = sitofp i32 %28 to double ; <double> [#uses=1]
%30 = fmul double %29, 1.234000e+00 ; <double> [#uses=1]
%31 = fptosi double %30 to i32 ; <i32> [#uses=1]
store i32 %31, i32* %x, align 4
- %32 = load i32* %x, align 4 ; <i32> [#uses=1]
+ %32 = load i32, i32* %x, align 4 ; <i32> [#uses=1]
%33 = trunc i32 %32 to i8 ; <i8> [#uses=1]
- %34 = load i8** %dpt, align 4 ; <i8*> [#uses=1]
+ %34 = load i8*, i8** %dpt, align 4 ; <i8*> [#uses=1]
store i8 %33, i8* %34, align 1
- %35 = load i8** %dpt, align 4 ; <i8*> [#uses=1]
- %36 = getelementptr inbounds i8* %35, i64 1 ; <i8*> [#uses=1]
+ %35 = load i8*, i8** %dpt, align 4 ; <i8*> [#uses=1]
+ %36 = getelementptr inbounds i8, i8* %35, i64 1 ; <i8*> [#uses=1]
store i8* %36, i8** %dpt, align 4
- %37 = load i32* %j, align 4 ; <i32> [#uses=1]
+ %37 = load i32, i32* %j, align 4 ; <i32> [#uses=1]
%38 = add nsw i32 %37, 1 ; <i32> [#uses=1]
store i32 %38, i32* %j, align 4
br label %bb5
bb5: ; preds = %bb4, %bb3
- %39 = load i32* %j, align 4 ; <i32> [#uses=1]
- %40 = load i32* %mask_size, align 4 ; <i32> [#uses=1]
+ %39 = load i32, i32* %j, align 4 ; <i32> [#uses=1]
+ %40 = load i32, i32* %mask_size, align 4 ; <i32> [#uses=1]
%41 = icmp sle i32 %39, %40 ; <i1> [#uses=1]
br i1 %41, label %bb4, label %bb6
diff --git a/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll b/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll
index 348e9d3f20a7..56206ef77336 100644
--- a/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll
+++ b/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll
@@ -19,8 +19,8 @@ target triple = "thumbv7-apple-darwin10"
define %"struct.WTF::TCMalloc_ThreadCache"* @_ZN3WTF20TCMalloc_ThreadCache22CreateCacheIfNecessaryEv() nounwind {
entry:
- %0 = tail call i32 @pthread_mutex_lock(%struct.PlatformMutex* getelementptr inbounds (%struct.SpinLock* @_ZN3WTFL13pageheap_lockE, i32 0, i32 0)) nounwind
- %.b24 = load i1* @_ZN3WTFL10tsd_initedE.b, align 4 ; <i1> [#uses=1]
+ %0 = tail call i32 @pthread_mutex_lock(%struct.PlatformMutex* getelementptr inbounds (%struct.SpinLock, %struct.SpinLock* @_ZN3WTFL13pageheap_lockE, i32 0, i32 0)) nounwind
+ %.b24 = load i1, i1* @_ZN3WTFL10tsd_initedE.b, align 4 ; <i1> [#uses=1]
br i1 %.b24, label %bb5, label %bb6
bb5: ; preds = %entry
@@ -32,19 +32,19 @@ bb6: ; preds = %bb5, %entry
br label %bb11
bb7: ; preds = %bb11
- %2 = getelementptr inbounds %"struct.WTF::TCMalloc_ThreadCache"* %h.0, i32 0, i32 1
- %3 = load %struct._opaque_pthread_t** %2, align 4
+ %2 = getelementptr inbounds %"struct.WTF::TCMalloc_ThreadCache", %"struct.WTF::TCMalloc_ThreadCache"* %h.0, i32 0, i32 1
+ %3 = load %struct._opaque_pthread_t*, %struct._opaque_pthread_t** %2, align 4
%4 = tail call i32 @pthread_equal(%struct._opaque_pthread_t* %3, %struct._opaque_pthread_t* %me.0) nounwind
%5 = icmp eq i32 %4, 0
br i1 %5, label %bb10, label %bb14
bb10: ; preds = %bb7
- %6 = getelementptr inbounds %"struct.WTF::TCMalloc_ThreadCache"* %h.0, i32 0, i32 6
+ %6 = getelementptr inbounds %"struct.WTF::TCMalloc_ThreadCache", %"struct.WTF::TCMalloc_ThreadCache"* %h.0, i32 0, i32 6
br label %bb11
bb11: ; preds = %bb10, %bb6
%h.0.in = phi %"struct.WTF::TCMalloc_ThreadCache"** [ @_ZN3WTFL12thread_heapsE, %bb6 ], [ %6, %bb10 ] ; <%"struct.WTF::TCMalloc_ThreadCache"**> [#uses=1]
- %h.0 = load %"struct.WTF::TCMalloc_ThreadCache"** %h.0.in, align 4 ; <%"struct.WTF::TCMalloc_ThreadCache"*> [#uses=4]
+ %h.0 = load %"struct.WTF::TCMalloc_ThreadCache"*, %"struct.WTF::TCMalloc_ThreadCache"** %h.0.in, align 4 ; <%"struct.WTF::TCMalloc_ThreadCache"*> [#uses=4]
%7 = icmp eq %"struct.WTF::TCMalloc_ThreadCache"* %h.0, null
br i1 %7, label %bb13, label %bb7
@@ -54,19 +54,19 @@ bb13: ; preds = %bb11
bb14: ; preds = %bb13, %bb7
%heap.1 = phi %"struct.WTF::TCMalloc_ThreadCache"* [ %8, %bb13 ], [ %h.0, %bb7 ] ; <%"struct.WTF::TCMalloc_ThreadCache"*> [#uses=4]
- %9 = tail call i32 @pthread_mutex_unlock(%struct.PlatformMutex* getelementptr inbounds (%struct.SpinLock* @_ZN3WTFL13pageheap_lockE, i32 0, i32 0)) nounwind
- %10 = getelementptr inbounds %"struct.WTF::TCMalloc_ThreadCache"* %heap.1, i32 0, i32 2
- %11 = load i8* %10, align 4
+ %9 = tail call i32 @pthread_mutex_unlock(%struct.PlatformMutex* getelementptr inbounds (%struct.SpinLock, %struct.SpinLock* @_ZN3WTFL13pageheap_lockE, i32 0, i32 0)) nounwind
+ %10 = getelementptr inbounds %"struct.WTF::TCMalloc_ThreadCache", %"struct.WTF::TCMalloc_ThreadCache"* %heap.1, i32 0, i32 2
+ %11 = load i8, i8* %10, align 4
%toBool15not = icmp eq i8 %11, 0 ; <i1> [#uses=1]
br i1 %toBool15not, label %bb19, label %bb22
bb19: ; preds = %bb14
- %.b = load i1* @_ZN3WTFL10tsd_initedE.b, align 4 ; <i1> [#uses=1]
+ %.b = load i1, i1* @_ZN3WTFL10tsd_initedE.b, align 4 ; <i1> [#uses=1]
br i1 %.b, label %bb21, label %bb22
bb21: ; preds = %bb19
store i8 1, i8* %10, align 4
- %12 = load i32* @_ZN3WTFL8heap_keyE, align 4
+ %12 = load i32, i32* @_ZN3WTFL8heap_keyE, align 4
%13 = bitcast %"struct.WTF::TCMalloc_ThreadCache"* %heap.1 to i8*
%14 = tail call i32 @pthread_setspecific(i32 %12, i8* %13) nounwind
ret %"struct.WTF::TCMalloc_ThreadCache"* %heap.1
diff --git a/test/CodeGen/Thumb2/2010-01-19-RemovePredicates.ll b/test/CodeGen/Thumb2/2010-01-19-RemovePredicates.ll
index 771a4f813634..a20d36ba5ed3 100644
--- a/test/CodeGen/Thumb2/2010-01-19-RemovePredicates.ll
+++ b/test/CodeGen/Thumb2/2010-01-19-RemovePredicates.ll
@@ -47,7 +47,7 @@ bb35: ; preds = %bb5
bb46: ; preds = %bb26, %bb10
%1 = bitcast double* %value to i16* ; <i16*> [#uses=1]
- %v47 = getelementptr inbounds [6 x i16]* %v, i32 0, i32 0 ; <i16*> [#uses=1]
+ %v47 = getelementptr inbounds [6 x i16], [6 x i16]* %v, i32 0, i32 0 ; <i16*> [#uses=1]
call void @etoe53(i16* %v47, i16* %1) nounwind
ret void
}
diff --git a/test/CodeGen/Thumb2/2010-03-08-addi12-ccout.ll b/test/CodeGen/Thumb2/2010-03-08-addi12-ccout.ll
index 7ce3c2586677..7d19d15f2a30 100644
--- a/test/CodeGen/Thumb2/2010-03-08-addi12-ccout.ll
+++ b/test/CodeGen/Thumb2/2010-03-08-addi12-ccout.ll
@@ -70,7 +70,7 @@ bb163: ; preds = %bb162, %bb161
unreachable
bb224: ; preds = %bb162
- %0 = call i32 @SyFopen(i8* undef, i8* getelementptr inbounds ([2 x i8]* @.str41196, i32 0, i32 0)) nounwind ; <i32> [#uses=2]
+ %0 = call i32 @SyFopen(i8* undef, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str41196, i32 0, i32 0)) nounwind ; <i32> [#uses=2]
br i1 false, label %bb297, label %bb300
bb297: ; preds = %bb224
@@ -138,8 +138,8 @@ bb345: ; preds = %bb345, %bb339
%4 = phi i8 [ %5, %bb345 ], [ undef, %bb339 ] ; <i8> [#uses=0]
%indvar670 = phi i32 [ %tmp673, %bb345 ], [ 0, %bb339 ] ; <i32> [#uses=1]
%tmp673 = add i32 %indvar670, 1 ; <i32> [#uses=2]
- %scevgep674 = getelementptr [256 x i8]* %last, i32 0, i32 %tmp673 ; <i8*> [#uses=1]
- %5 = load i8* %scevgep674, align 1 ; <i8> [#uses=1]
+ %scevgep674 = getelementptr [256 x i8], [256 x i8]* %last, i32 0, i32 %tmp673 ; <i8*> [#uses=1]
+ %5 = load i8, i8* %scevgep674, align 1 ; <i8> [#uses=1]
br i1 undef, label %bb347, label %bb345
bb347: ; preds = %bb345
@@ -166,8 +166,8 @@ bb362: ; preds = %bb361
bb366: ; preds = %bb366, %bb360
%indvar662 = phi i32 [ %tmp665, %bb366 ], [ 0, %bb360 ] ; <i32> [#uses=1]
%tmp665 = add i32 %indvar662, 1 ; <i32> [#uses=2]
- %scevgep666 = getelementptr [256 x i8]* %last2, i32 0, i32 %tmp665 ; <i8*> [#uses=1]
- %6 = load i8* %scevgep666, align 1 ; <i8> [#uses=0]
+ %scevgep666 = getelementptr [256 x i8], [256 x i8]* %last2, i32 0, i32 %tmp665 ; <i8*> [#uses=1]
+ %6 = load i8, i8* %scevgep666, align 1 ; <i8> [#uses=0]
br i1 false, label %bb368, label %bb366
bb368: ; preds = %bb366
@@ -189,7 +189,7 @@ bb376: ; preds = %bb375
ret void
bb383: ; preds = %bb373
- %10 = load i8* undef, align 1 ; <i8> [#uses=1]
+ %10 = load i8, i8* undef, align 1 ; <i8> [#uses=1]
%cond1 = icmp eq i8 %10, 46 ; <i1> [#uses=1]
br i1 %cond1, label %bb373, label %bb388
@@ -203,7 +203,7 @@ bb390: ; preds = %isdigit1498.exit83,
bb391: ; preds = %bb390, %bb388
%indvar724 = phi i32 [ %indvar.next725, %bb390 ], [ 0, %bb388 ] ; <i32> [#uses=2]
- %11 = load i8* undef, align 1 ; <i8> [#uses=0]
+ %11 = load i8, i8* undef, align 1 ; <i8> [#uses=0]
br i1 false, label %bb395, label %bb392
bb392: ; preds = %bb391
@@ -217,7 +217,7 @@ bb394: ; preds = %isdigit1498.exit87
bb395: ; preds = %bb394, %isdigit1498.exit83, %bb391
%storemerge14.sum = add i32 %indvar724, undef ; <i32> [#uses=1]
- %p.26 = getelementptr [256 x i8]* %line, i32 0, i32 %storemerge14.sum ; <i8*> [#uses=1]
+ %p.26 = getelementptr [256 x i8], [256 x i8]* %line, i32 0, i32 %storemerge14.sum ; <i8*> [#uses=1]
br i1 undef, label %bb400, label %isdigit1498.exit87
isdigit1498.exit87: ; preds = %bb395
@@ -227,7 +227,7 @@ bb400: ; preds = %isdigit1498.exit87,
br i1 undef, label %bb402, label %bb403
bb402: ; preds = %bb400
- %12 = getelementptr inbounds i8* %p.26, i32 undef ; <i8*> [#uses=1]
+ %12 = getelementptr inbounds i8, i8* %p.26, i32 undef ; <i8*> [#uses=1]
br label %bb403
bb403: ; preds = %bb402, %bb400
diff --git a/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll b/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
index fcf1bae796b0..d3a44957a2eb 100644
--- a/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
+++ b/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
@@ -20,23 +20,23 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
; CHECK: InlineAsm Start
define void @test(%s1* %this, i32 %format, i32 %w, i32 %h, i32 %levels, i32* %s, i8* %data, i32* nocapture %rowbytes, void (i8*, i8*)* %release, i8* %info) nounwind {
entry:
- %tmp1 = getelementptr inbounds %s1* %this, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0
+ %tmp1 = getelementptr inbounds %s1, %s1* %this, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0
store volatile i32 1, i32* %tmp1, align 4
- %tmp12 = getelementptr inbounds %s1* %this, i32 0, i32 1
+ %tmp12 = getelementptr inbounds %s1, %s1* %this, i32 0, i32 1
store i32 %levels, i32* %tmp12, align 4
- %tmp13 = getelementptr inbounds %s1* %this, i32 0, i32 3
+ %tmp13 = getelementptr inbounds %s1, %s1* %this, i32 0, i32 3
store i8* %data, i8** %tmp13, align 4
- %tmp14 = getelementptr inbounds %s1* %this, i32 0, i32 4
+ %tmp14 = getelementptr inbounds %s1, %s1* %this, i32 0, i32 4
store void (i8*, i8*)* %release, void (i8*, i8*)** %tmp14, align 4
- %tmp15 = getelementptr inbounds %s1* %this, i32 0, i32 5
+ %tmp15 = getelementptr inbounds %s1, %s1* %this, i32 0, i32 5
store i8* %info, i8** %tmp15, align 4
- %tmp16 = getelementptr inbounds %s1* %this, i32 0, i32 6
+ %tmp16 = getelementptr inbounds %s1, %s1* %this, i32 0, i32 6
store i32* null, i32** %tmp16, align 4
- %tmp17 = getelementptr inbounds %s1* %this, i32 0, i32 7
+ %tmp17 = getelementptr inbounds %s1, %s1* %this, i32 0, i32 7
store i32* null, i32** %tmp17, align 4
- %tmp19 = getelementptr inbounds %s1* %this, i32 0, i32 10
+ %tmp19 = getelementptr inbounds %s1, %s1* %this, i32 0, i32 10
store i64 0, i64* %tmp19, align 4
- %tmp20 = getelementptr inbounds %s1* %this, i32 0, i32 0
+ %tmp20 = getelementptr inbounds %s1, %s1* %this, i32 0, i32 0
tail call void @f1(%s3* %tmp20, i32* %s) nounwind
%tmp21 = shl i32 %format, 6
%tmp22 = tail call zeroext i8 @f2(i32 %format) nounwind
@@ -45,16 +45,16 @@ entry:
%flags.0 = or i32 %tmp23, %tmp21
%tmp24 = shl i32 %flags.0, 16
%asmtmp.i.i.i = tail call %0 asm sideeffect "\0A0:\09ldrex $1, [$2]\0A\09orr $1, $1, $3\0A\09strex $0, $1, [$2]\0A\09cmp $0, #0\0A\09bne 0b", "=&r,=&r,r,r,~{memory},~{cc}"(i32* %tmp1, i32 %tmp24) nounwind
- %tmp25 = getelementptr inbounds %s1* %this, i32 0, i32 2, i32 0, i32 0
+ %tmp25 = getelementptr inbounds %s1, %s1* %this, i32 0, i32 2, i32 0, i32 0
store volatile i32 1, i32* %tmp25, align 4
%tmp26 = icmp eq i32 %levels, 0
br i1 %tmp26, label %return, label %bb4
bb4:
%l.09 = phi i32 [ %tmp28, %bb4 ], [ 0, %entry ]
- %scevgep = getelementptr %s1* %this, i32 0, i32 11, i32 %l.09
- %scevgep10 = getelementptr i32* %rowbytes, i32 %l.09
- %tmp27 = load i32* %scevgep10, align 4
+ %scevgep = getelementptr %s1, %s1* %this, i32 0, i32 11, i32 %l.09
+ %scevgep10 = getelementptr i32, i32* %rowbytes, i32 %l.09
+ %tmp27 = load i32, i32* %scevgep10, align 4
store i32 %tmp27, i32* %scevgep, align 4
%tmp28 = add i32 %l.09, 1
%exitcond = icmp eq i32 %tmp28, %levels
diff --git a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
index 06762bad854f..24a995a11538 100644
--- a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
+++ b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
@@ -32,10 +32,10 @@ entry:
%tmp7 = extractelement <2 x double> %5, i32 0 ; <double> [#uses=1]
%tmp5 = extractelement <2 x double> %5, i32 1 ; <double> [#uses=1]
; CHECK: printf
- %7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), double %tmp7, double %tmp5) nounwind ; <i32> [#uses=0]
+ %7 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), double %tmp7, double %tmp5) nounwind ; <i32> [#uses=0]
%tmp3 = extractelement <2 x double> %6, i32 0 ; <double> [#uses=1]
%tmp1 = extractelement <2 x double> %6, i32 1 ; <double> [#uses=1]
- %8 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), double %tmp3, double %tmp1) nounwind ; <i32> [#uses=0]
+ %8 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), double %tmp3, double %tmp1) nounwind ; <i32> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/Thumb2/2010-06-19-ITBlockCrash.ll b/test/CodeGen/Thumb2/2010-06-19-ITBlockCrash.ll
index 501f763bda28..eba2e584ddc0 100644
--- a/test/CodeGen/Thumb2/2010-06-19-ITBlockCrash.ll
+++ b/test/CodeGen/Thumb2/2010-06-19-ITBlockCrash.ll
@@ -18,7 +18,7 @@ bb1.i.i11: ; preds = %bb11
%1 = tail call arm_apcscc i32 @__maskrune(i32 %0, i32 32768) nounwind ; <i32> [#uses=1]
%2 = icmp ne i32 %1, 0 ; <i1> [#uses=1]
%3 = zext i1 %2 to i32 ; <i32> [#uses=1]
- %.pre = load i8* undef, align 1 ; <i8> [#uses=1]
+ %.pre = load i8, i8* undef, align 1 ; <i8> [#uses=1]
br label %isupper144.exit12
isupper144.exit12: ; preds = %bb1.i.i11, %bb.i.i10
diff --git a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
index f3046e1fcb82..34569e9116f2 100644
--- a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
+++ b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
@@ -86,17 +86,17 @@ bb10: ; preds = %bb9
bb11: ; preds = %bb10, %bb9
%p.0 = phi i8* [ undef, %bb10 ], [ %p.1, %bb9 ] ; <i8*> [#uses=1]
- %0 = load %struct.FILE** @finput, align 4 ; <%struct.FILE*> [#uses=1]
+ %0 = load %struct.FILE*, %struct.FILE** @finput, align 4 ; <%struct.FILE*> [#uses=1]
%1 = tail call i32 @getc(%struct.FILE* %0) nounwind ; <i32> [#uses=0]
br label %bb12
bb12: ; preds = %bb11, %bb.i.i
- %p.1 = phi i8* [ %p.0, %bb11 ], [ getelementptr inbounds ([1025 x i8]* @token_buffer, i32 0, i32 0), %bb.i.i ] ; <i8*> [#uses=2]
+ %p.1 = phi i8* [ %p.0, %bb11 ], [ getelementptr inbounds ([1025 x i8], [1025 x i8]* @token_buffer, i32 0, i32 0), %bb.i.i ] ; <i8*> [#uses=2]
%2 = icmp ult i32 undef, 128 ; <i1> [#uses=1]
br i1 %2, label %bb.i.i2, label %bb1.i.i3
bb.i.i2: ; preds = %bb12
- %3 = load i32* null, align 4 ; <i32> [#uses=1]
+ %3 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%4 = lshr i32 %3, 8 ; <i32> [#uses=1]
%.lobit.i1 = and i32 %4, 1 ; <i32> [#uses=1]
%.not = icmp ne i32 %.lobit.i1, 0 ; <i1> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
index 547950fb17f0..3b14d22ddbff 100644
--- a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
+++ b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
@@ -14,9 +14,9 @@ entry:
bb: ; preds = %entry
%1 = alloca [1000 x i8], align 4 ; <[1000 x i8]*> [#uses=1]
- %.sub = getelementptr inbounds [1000 x i8]* %1, i32 0, i32 0 ; <i8*> [#uses=2]
- %2 = call i32 (i8*, i32, i32, i8*, ...)* @__sprintf_chk(i8* %.sub, i32 0, i32 1000, i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %i) nounwind ; <i32> [#uses=0]
- %3 = load i8* %.sub, align 4 ; <i8> [#uses=1]
+ %.sub = getelementptr inbounds [1000 x i8], [1000 x i8]* %1, i32 0, i32 0 ; <i8*> [#uses=2]
+ %2 = call i32 (i8*, i32, i32, i8*, ...) @__sprintf_chk(i8* %.sub, i32 0, i32 1000, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %i) nounwind ; <i32> [#uses=0]
+ %3 = load i8, i8* %.sub, align 4 ; <i8> [#uses=1]
%4 = sext i8 %3 to i32 ; <i32> [#uses=1]
ret i32 %4
@@ -52,7 +52,7 @@ bb2: ; preds = %bb
; CHECK-NOT: mov sp, r7
; CHECK-NOT: sub sp, #12
; CHECK: pop
- %4 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
+ %4 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll b/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll
index 240df83252cc..b9bfdcbec4e4 100644
--- a/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll
+++ b/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll
@@ -3,13 +3,13 @@
%struct.op = type { %struct.op*, %struct.op*, %struct.op* ()*, i32, i16, i16, i8, i8 }
; CHECK: Perl_ck_sort
-; CHECK: ldreq
-; CHECK: moveq [[REGISTER:(r[0-9]+)|(lr)]]
-; CHECK: streq {{(r[0-9])|(lr)}}, {{\[}}[[REGISTER]]{{\]}}, #24
+; CHECK: ldr
+; CHECK: mov [[REGISTER:(r[0-9]+)|(lr)]]
+; CHECK: str {{(r[0-9])|(lr)}}, {{\[}}[[REGISTER]]{{\]}}, #24
define void @Perl_ck_sort() nounwind optsize {
entry:
- %tmp27 = load %struct.op** undef, align 4
+ %tmp27 = load %struct.op*, %struct.op** undef, align 4
switch i16 undef, label %if.end151 [
i16 178, label %if.then60
i16 177, label %if.then60
@@ -19,14 +19,14 @@ if.then60: ; preds = %if.then40
br i1 undef, label %if.then67, label %if.end95
if.then67: ; preds = %if.then60
- %op_next71 = getelementptr inbounds %struct.op* %tmp27, i32 0, i32 0
+ %op_next71 = getelementptr inbounds %struct.op, %struct.op* %tmp27, i32 0, i32 0
store %struct.op* %tmp27, %struct.op** %op_next71, align 4
- %0 = getelementptr inbounds %struct.op* %tmp27, i32 1, i32 0
+ %0 = getelementptr inbounds %struct.op, %struct.op* %tmp27, i32 1, i32 0
br label %if.end95
if.end95: ; preds = %if.else92, %if.then67
%.pre-phi = phi %struct.op** [ undef, %if.then60 ], [ %0, %if.then67 ]
- %tmp98 = load %struct.op** %.pre-phi, align 4
+ %tmp98 = load %struct.op*, %struct.op** %.pre-phi, align 4
br label %if.end151
if.end151: ; preds = %if.end100, %if.end, %entry
diff --git a/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll b/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll
index ea8d233e79f1..7c8802ddad67 100644
--- a/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll
+++ b/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll
@@ -8,15 +8,15 @@ entry:
br label %bb3
bb: ; preds = %bb3
- %Scan.0.idx7.val = load i8** undef, align 4
- %.idx = getelementptr i8* %Scan.0.idx7.val, i32 4
+ %Scan.0.idx7.val = load i8*, i8** undef, align 4
+ %.idx = getelementptr i8, i8* %Scan.0.idx7.val, i32 4
%0 = bitcast i8* %.idx to i8**
- %.idx.val = load i8** %0, align 4
+ %.idx.val = load i8*, i8** %0, align 4
%1 = icmp eq i8* %.idx.val, %Key
br i1 %1, label %bb5, label %bb2
bb2: ; preds = %bb
- %Scan.0.idx8.val = load %struct.LIST_NODE.0.16** undef, align 4
+ %Scan.0.idx8.val = load %struct.LIST_NODE.0.16*, %struct.LIST_NODE.0.16** undef, align 4
br label %bb3
bb3: ; preds = %bb2, %entry
@@ -34,7 +34,7 @@ entry:
br i1 undef, label %for.body, label %for.end
for.body: ; preds = %for.body, %entry
- %0 = load double* null, align 8
+ %0 = load double, double* null, align 8
%cmp2.6 = fcmp ogt double %0, 0.000000e+00
%idx.1.6 = select i1 %cmp2.6, i32 undef, i32 0
%idx.1.7 = select i1 undef, i32 undef, i32 %idx.1.6
diff --git a/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll b/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
index 500871519234..9121044be4ff 100644
--- a/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
+++ b/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
@@ -22,16 +22,16 @@ tailrecurse: ; preds = %if.then10, %entry
br i1 %cmp, label %if.end11, label %if.end
if.end: ; preds = %tailrecurse
- %string = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 0
- %0 = load i8** %string, align 4
+ %string = getelementptr inbounds %struct.Dict_node_struct, %struct.Dict_node_struct* %dn.tr, i32 0, i32 0
+ %0 = load i8*, i8** %string, align 4
br label %while.cond.i
while.cond.i: ; preds = %while.body.i, %if.end
%1 = phi i8* [ %s, %if.end ], [ %incdec.ptr.i, %while.body.i ]
%storemerge.i = phi i8* [ %0, %if.end ], [ %incdec.ptr6.i, %while.body.i ]
- %2 = load i8* %1, align 1
+ %2 = load i8, i8* %1, align 1
%cmp.i = icmp eq i8 %2, 0
- %.pre.i = load i8* %storemerge.i, align 1
+ %.pre.i = load i8, i8* %storemerge.i, align 1
br i1 %cmp.i, label %lor.lhs.false.i, label %land.end.i
land.end.i: ; preds = %while.cond.i
@@ -39,8 +39,8 @@ land.end.i: ; preds = %while.cond.i
br i1 %cmp4.i, label %while.body.i, label %while.end.i
while.body.i: ; preds = %land.end.i
- %incdec.ptr.i = getelementptr inbounds i8* %1, i32 1
- %incdec.ptr6.i = getelementptr inbounds i8* %storemerge.i, i32 1
+ %incdec.ptr.i = getelementptr inbounds i8, i8* %1, i32 1
+ %incdec.ptr6.i = getelementptr inbounds i8, i8* %storemerge.i, i32 1
br label %while.cond.i
while.end.i: ; preds = %land.end.i
@@ -68,8 +68,8 @@ if.end3: ; preds = %dict_match.exit, %l
; CHECK: cmp
; CHECK-NOT: cbnz
%storemerge1.i3 = phi i32 [ %sub.i, %dict_match.exit ], [ 0, %lor.lhs.false.i ], [ 0, %while.end.i ]
- %right = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 4
- %4 = load %struct.Dict_node_struct** %right, align 4
+ %right = getelementptr inbounds %struct.Dict_node_struct, %struct.Dict_node_struct* %dn.tr, i32 0, i32 4
+ %4 = load %struct.Dict_node_struct*, %struct.Dict_node_struct** %right, align 4
tail call fastcc void @rdictionary_lookup(%struct.Dict_node_struct* %4, i8* %s)
%cmp4 = icmp eq i32 %storemerge1.i3, 0
br i1 %cmp4, label %if.then5, label %if.end8
@@ -79,8 +79,8 @@ if.then5: ; preds = %if.end3
%5 = bitcast i8* %call6 to %struct.Dict_node_struct*
%6 = bitcast %struct.Dict_node_struct* %dn.tr to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %call6, i8* %6, i32 16, i32 4, i1 false)
- %7 = load %struct.Dict_node_struct** @lookup_list, align 4
- %right7 = getelementptr inbounds i8* %call6, i32 16
+ %7 = load %struct.Dict_node_struct*, %struct.Dict_node_struct** @lookup_list, align 4
+ %right7 = getelementptr inbounds i8, i8* %call6, i32 16
%8 = bitcast i8* %right7 to %struct.Dict_node_struct**
store %struct.Dict_node_struct* %7, %struct.Dict_node_struct** %8, align 4
store %struct.Dict_node_struct* %5, %struct.Dict_node_struct** @lookup_list, align 4
@@ -91,8 +91,8 @@ if.end8: ; preds = %if.end3
br i1 %cmp9, label %if.then10, label %if.end11
if.then10: ; preds = %if.end8, %if.then5, %dict_match.exit
- %left = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 3
- %9 = load %struct.Dict_node_struct** %left, align 4
+ %left = getelementptr inbounds %struct.Dict_node_struct, %struct.Dict_node_struct* %dn.tr, i32 0, i32 3
+ %9 = load %struct.Dict_node_struct*, %struct.Dict_node_struct** %left, align 4
br label %tailrecurse
if.end11: ; preds = %if.end8, %tailrecurse
diff --git a/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll b/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll
index e905cb9114c2..c9d3f3dd2847 100644
--- a/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll
+++ b/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll
@@ -12,19 +12,19 @@
%struct.foo = type { i32, [40 x i8] }
define hidden void @func(i8* %Data) nounwind ssp {
- %1 = getelementptr inbounds i8* %Data, i32 12
+ %1 = getelementptr inbounds i8, i8* %Data, i32 12
%2 = bitcast i8* %1 to %"myclass"*
tail call void @abc(%"myclass"* %2) nounwind
tail call void @def(%"myclass"* %2) nounwind
- %3 = getelementptr inbounds i8* %Data, i32 8
+ %3 = getelementptr inbounds i8, i8* %Data, i32 8
%4 = bitcast i8* %3 to i8**
- %5 = load i8** %4, align 4
+ %5 = load i8*, i8** %4, align 4
tail call void @ghi(i8* %5) nounwind
%6 = bitcast i8* %Data to void (i8*)**
- %7 = load void (i8*)** %6, align 4
- %8 = getelementptr inbounds i8* %Data, i32 4
+ %7 = load void (i8*)*, void (i8*)** %6, align 4
+ %8 = getelementptr inbounds i8, i8* %Data, i32 4
%9 = bitcast i8* %8 to i8**
- %10 = load i8** %9, align 4
+ %10 = load i8*, i8** %9, align 4
%11 = icmp eq i8* %Data, null
br i1 %11, label %14, label %12
diff --git a/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll b/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll
index 937ecc0d6679..5936b7803002 100644
--- a/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll
+++ b/test/CodeGen/Thumb2/2013-03-02-vduplane-nonconstant-source-index.ll
@@ -1,9 +1,9 @@
; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
define void @bar(<4 x i32>* %p, i32 %lane, <4 x i32> %phitmp) nounwind {
-; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[SOURCE:[0-9]+]]:128]
-; CHECK: add.w r[[ADDR:[0-9]+]], r[[SOURCE]], {{r[0-9]+}}, lsl #2
-; CHECK: vld1.32 {[[DREG:d[0-9]+]][], [[DREG2:d[0-9]+]][]}, [r[[ADDR]]:32]
+; CHECK: lsls r[[ADDR:[0-9]+]], r[[ADDR]], #2
+; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r[[SOURCE:[0-9]+]]:128], r[[ADDR]]
+; CHECK: vld1.32 {[[DREG:d[0-9]+]][], [[DREG2:d[0-9]+]][]}, [r[[SOURCE]]:32]
; CHECK: vst1.32 {[[DREG]], [[DREG2]]}, [r0]
%val = extractelement <4 x i32> %phitmp, i32 %lane
%r1 = insertelement <4 x i32> undef, i32 %val, i32 1
diff --git a/test/CodeGen/Thumb2/aligned-constants.ll b/test/CodeGen/Thumb2/aligned-constants.ll
index 16b3a193c9d7..13cca113452c 100644
--- a/test/CodeGen/Thumb2/aligned-constants.ll
+++ b/test/CodeGen/Thumb2/aligned-constants.ll
@@ -16,10 +16,10 @@ target triple = "thumbv7-apple-ios"
; CHECK: .long 1123477881
define void @func(float* nocapture %x, double* nocapture %y) nounwind ssp {
entry:
- %0 = load float* %x, align 4
+ %0 = load float, float* %x, align 4
%add = fadd float %0, 0x405EDD2F20000000
store float %add, float* %x, align 4
- %1 = load double* %y, align 4
+ %1 = load double, double* %y, align 4
%add1 = fadd double %1, 2.234560e+02
store double %add1, double* %y, align 4
ret void
diff --git a/test/CodeGen/Thumb2/aligned-spill.ll b/test/CodeGen/Thumb2/aligned-spill.ll
index 4ef294bdf5ff..59f546b8e8e6 100644
--- a/test/CodeGen/Thumb2/aligned-spill.ll
+++ b/test/CodeGen/Thumb2/aligned-spill.ll
@@ -13,7 +13,7 @@ target triple = "thumbv7-apple-ios"
; CHECK: mov sp, r4
define void @f(double* nocapture %p) nounwind ssp {
entry:
- %0 = load double* %p, align 4
+ %0 = load double, double* %p, align 4
tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind
tail call void @g() nounwind
store double %0, double* %p, align 4
diff --git a/test/CodeGen/Thumb2/bfi.ll b/test/CodeGen/Thumb2/bfi.ll
index 4f056d571c4c..337f46a22748 100644
--- a/test/CodeGen/Thumb2/bfi.ll
+++ b/test/CodeGen/Thumb2/bfi.ll
@@ -9,7 +9,7 @@ entry:
; CHECK: f1
; CHECK: movs r2, #10
; CHECK: bfi r1, r2, #22, #4
- %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1]
%1 = and i32 %0, -62914561 ; <i32> [#uses=1]
%2 = or i32 %1, 41943040 ; <i32> [#uses=1]
store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4
diff --git a/test/CodeGen/Thumb2/cbnz.ll b/test/CodeGen/Thumb2/cbnz.ll
new file mode 100644
index 000000000000..5c0bb5bfe1cd
--- /dev/null
+++ b/test/CodeGen/Thumb2/cbnz.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple thumbv7-unknown-linux -o - %s | FileCheck %s
+
+declare void @x()
+declare void @y()
+
+define void @f(i32 %x, i32 %y) {
+ ; CHECK-LABEL: f:
+ ; CHECK: cbnz
+ %p = icmp eq i32 %x, 0
+ br i1 %p, label %t, label %f
+
+t:
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ ; CHECK: cbnz
+ %q = icmp eq i32 %y, 0
+ br i1 %q, label %t2, label %f
+
+t2:
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ call void @x()
+ br label %f
+
+f:
+ call void @y()
+ ret void
+}
diff --git a/test/CodeGen/Thumb2/constant-islands-new-island-padding.ll b/test/CodeGen/Thumb2/constant-islands-new-island-padding.ll
index 991b043f0bdc..c2a2c068ca48 100644
--- a/test/CodeGen/Thumb2/constant-islands-new-island-padding.ll
+++ b/test/CodeGen/Thumb2/constant-islands-new-island-padding.ll
@@ -15,22 +15,22 @@ declare i32 @llvm.arm.space(i32, i32)
define i32 @testpadding(i32 %a) {
entry:
- %0 = load i32* @g0, align 4
+ %0 = load i32, i32* @g0, align 4
%add = add nsw i32 %0, 12
store i32 %add, i32* @g0, align 4
- %1 = load double* @d0, align 8
+ %1 = load double, double* @d0, align 8
%add1 = fadd double %1, 0x3FF3C0B8ED46EACB
store double %add1, double* @d0, align 8
%tmpcall11 = call i32 @llvm.arm.space(i32 28, i32 undef)
call void @foo20(i32 191)
- %2 = load float* @f0, align 4
+ %2 = load float, float* @f0, align 4
%add2 = fadd float %2, 0x3FF3C0BDC0000000
store float %add2, float* @f0, align 4
br label %do.body
do.body: ; preds = %do.body, %entry
tail call void @foo20(i32 19)
- %3 = load i32* @g1, align 4
+ %3 = load i32, i32* @g1, align 4
%tobool = icmp eq i32 %3, 0
br i1 %tobool, label %do.end, label %do.body
diff --git a/test/CodeGen/Thumb2/constant-islands.ll b/test/CodeGen/Thumb2/constant-islands.ll
index 255b709edb73..a64d72e86efb 100644
--- a/test/CodeGen/Thumb2/constant-islands.ll
+++ b/test/CodeGen/Thumb2/constant-islands.ll
@@ -262,170 +262,170 @@ entry:
store %class.btDynamicsWorld* %ownerWorld, %class.btDynamicsWorld** %ownerWorld.addr, align 4
store %class.btVector3* %positionOffset, %class.btVector3** %positionOffset.addr, align 4
store float %scale, float* %scale.addr, align 4
- %this1 = load %class.RagDoll** %this.addr
+ %this1 = load %class.RagDoll*, %class.RagDoll** %this.addr
store %class.RagDoll* %this1, %class.RagDoll** %retval
%0 = bitcast %class.RagDoll* %this1 to i8***
- store i8** getelementptr inbounds ([4 x i8*]* @_ZTV7RagDoll, i64 0, i64 2), i8*** %0
- %m_ownerWorld = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
- %1 = load %class.btDynamicsWorld** %ownerWorld.addr, align 4
+ store i8** getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV7RagDoll, i64 0, i64 2), i8*** %0
+ %m_ownerWorld = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
+ %1 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %ownerWorld.addr, align 4
store %class.btDynamicsWorld* %1, %class.btDynamicsWorld** %m_ownerWorld, align 4
%call = call i8* @_ZN13btConvexShapenwEm(i32 56)
%2 = bitcast i8* %call to %class.btCapsuleShape*
- %3 = load float* %scale.addr, align 4
+ %3 = load float, float* %scale.addr, align 4
%mul = fmul float 0x3FC3333340000000, %3
- %4 = load float* %scale.addr, align 4
+ %4 = load float, float* %scale.addr, align 4
%mul2 = fmul float 0x3FC99999A0000000, %4
%call3 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %2, float %mul, float %mul2)
to label %invoke.cont unwind label %lpad
invoke.cont: ; preds = %entry
%5 = bitcast %class.btCapsuleShape* %2 to %class.btCollisionShape*
- %m_shapes = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes, i32 0, i32 0
+ %m_shapes = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes, i32 0, i32 0
store %class.btCollisionShape* %5, %class.btCollisionShape** %arrayidx, align 4
%call5 = call i8* @_ZN13btConvexShapenwEm(i32 56)
%6 = bitcast i8* %call5 to %class.btCapsuleShape*
- %7 = load float* %scale.addr, align 4
+ %7 = load float, float* %scale.addr, align 4
%mul6 = fmul float 0x3FC3333340000000, %7
- %8 = load float* %scale.addr, align 4
+ %8 = load float, float* %scale.addr, align 4
%mul7 = fmul float 0x3FD1EB8520000000, %8
%call10 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %6, float %mul6, float %mul7)
to label %invoke.cont9 unwind label %lpad8
invoke.cont9: ; preds = %invoke.cont
%9 = bitcast %class.btCapsuleShape* %6 to %class.btCollisionShape*
- %m_shapes12 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx13 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes12, i32 0, i32 1
+ %m_shapes12 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx13 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes12, i32 0, i32 1
store %class.btCollisionShape* %9, %class.btCollisionShape** %arrayidx13, align 4
%call14 = call i8* @_ZN13btConvexShapenwEm(i32 56)
%10 = bitcast i8* %call14 to %class.btCapsuleShape*
- %11 = load float* %scale.addr, align 4
+ %11 = load float, float* %scale.addr, align 4
%mul15 = fmul float 0x3FB99999A0000000, %11
- %12 = load float* %scale.addr, align 4
+ %12 = load float, float* %scale.addr, align 4
%mul16 = fmul float 0x3FA99999A0000000, %12
%call19 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %10, float %mul15, float %mul16)
to label %invoke.cont18 unwind label %lpad17
invoke.cont18: ; preds = %invoke.cont9
%13 = bitcast %class.btCapsuleShape* %10 to %class.btCollisionShape*
- %m_shapes21 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx22 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes21, i32 0, i32 2
+ %m_shapes21 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx22 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes21, i32 0, i32 2
store %class.btCollisionShape* %13, %class.btCollisionShape** %arrayidx22, align 4
%call23 = call i8* @_ZN13btConvexShapenwEm(i32 56)
%14 = bitcast i8* %call23 to %class.btCapsuleShape*
- %15 = load float* %scale.addr, align 4
+ %15 = load float, float* %scale.addr, align 4
%mul24 = fmul float 0x3FB1EB8520000000, %15
- %16 = load float* %scale.addr, align 4
+ %16 = load float, float* %scale.addr, align 4
%mul25 = fmul float 0x3FDCCCCCC0000000, %16
%call28 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %14, float %mul24, float %mul25)
to label %invoke.cont27 unwind label %lpad26
invoke.cont27: ; preds = %invoke.cont18
%17 = bitcast %class.btCapsuleShape* %14 to %class.btCollisionShape*
- %m_shapes30 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx31 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes30, i32 0, i32 3
+ %m_shapes30 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx31 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes30, i32 0, i32 3
store %class.btCollisionShape* %17, %class.btCollisionShape** %arrayidx31, align 4
%call32 = call i8* @_ZN13btConvexShapenwEm(i32 56)
%18 = bitcast i8* %call32 to %class.btCapsuleShape*
- %19 = load float* %scale.addr, align 4
+ %19 = load float, float* %scale.addr, align 4
%mul33 = fmul float 0x3FA99999A0000000, %19
- %20 = load float* %scale.addr, align 4
+ %20 = load float, float* %scale.addr, align 4
%mul34 = fmul float 0x3FD7AE1480000000, %20
%call37 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %18, float %mul33, float %mul34)
to label %invoke.cont36 unwind label %lpad35
invoke.cont36: ; preds = %invoke.cont27
%21 = bitcast %class.btCapsuleShape* %18 to %class.btCollisionShape*
- %m_shapes39 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx40 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes39, i32 0, i32 4
+ %m_shapes39 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx40 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes39, i32 0, i32 4
store %class.btCollisionShape* %21, %class.btCollisionShape** %arrayidx40, align 4
%call41 = call i8* @_ZN13btConvexShapenwEm(i32 56)
%22 = bitcast i8* %call41 to %class.btCapsuleShape*
- %23 = load float* %scale.addr, align 4
+ %23 = load float, float* %scale.addr, align 4
%mul42 = fmul float 0x3FB1EB8520000000, %23
- %24 = load float* %scale.addr, align 4
+ %24 = load float, float* %scale.addr, align 4
%mul43 = fmul float 0x3FDCCCCCC0000000, %24
%call46 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %22, float %mul42, float %mul43)
to label %invoke.cont45 unwind label %lpad44
invoke.cont45: ; preds = %invoke.cont36
%25 = bitcast %class.btCapsuleShape* %22 to %class.btCollisionShape*
- %m_shapes48 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx49 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes48, i32 0, i32 5
+ %m_shapes48 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx49 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes48, i32 0, i32 5
store %class.btCollisionShape* %25, %class.btCollisionShape** %arrayidx49, align 4
%call50 = call i8* @_ZN13btConvexShapenwEm(i32 56)
%26 = bitcast i8* %call50 to %class.btCapsuleShape*
- %27 = load float* %scale.addr, align 4
+ %27 = load float, float* %scale.addr, align 4
%mul51 = fmul float 0x3FA99999A0000000, %27
- %28 = load float* %scale.addr, align 4
+ %28 = load float, float* %scale.addr, align 4
%mul52 = fmul float 0x3FD7AE1480000000, %28
%call55 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %26, float %mul51, float %mul52)
to label %invoke.cont54 unwind label %lpad53
invoke.cont54: ; preds = %invoke.cont45
%29 = bitcast %class.btCapsuleShape* %26 to %class.btCollisionShape*
- %m_shapes57 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx58 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes57, i32 0, i32 6
+ %m_shapes57 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx58 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes57, i32 0, i32 6
store %class.btCollisionShape* %29, %class.btCollisionShape** %arrayidx58, align 4
%call59 = call i8* @_ZN13btConvexShapenwEm(i32 56)
%30 = bitcast i8* %call59 to %class.btCapsuleShape*
- %31 = load float* %scale.addr, align 4
+ %31 = load float, float* %scale.addr, align 4
%mul60 = fmul float 0x3FA99999A0000000, %31
- %32 = load float* %scale.addr, align 4
+ %32 = load float, float* %scale.addr, align 4
%mul61 = fmul float 0x3FD51EB860000000, %32
%call64 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %30, float %mul60, float %mul61)
to label %invoke.cont63 unwind label %lpad62
invoke.cont63: ; preds = %invoke.cont54
%33 = bitcast %class.btCapsuleShape* %30 to %class.btCollisionShape*
- %m_shapes66 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx67 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes66, i32 0, i32 7
+ %m_shapes66 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx67 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes66, i32 0, i32 7
store %class.btCollisionShape* %33, %class.btCollisionShape** %arrayidx67, align 4
%call68 = call i8* @_ZN13btConvexShapenwEm(i32 56)
%34 = bitcast i8* %call68 to %class.btCapsuleShape*
- %35 = load float* %scale.addr, align 4
+ %35 = load float, float* %scale.addr, align 4
%mul69 = fmul float 0x3FA47AE140000000, %35
- %36 = load float* %scale.addr, align 4
+ %36 = load float, float* %scale.addr, align 4
%mul70 = fmul float 2.500000e-01, %36
%call73 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %34, float %mul69, float %mul70)
to label %invoke.cont72 unwind label %lpad71
invoke.cont72: ; preds = %invoke.cont63
%37 = bitcast %class.btCapsuleShape* %34 to %class.btCollisionShape*
- %m_shapes75 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx76 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes75, i32 0, i32 8
+ %m_shapes75 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx76 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes75, i32 0, i32 8
store %class.btCollisionShape* %37, %class.btCollisionShape** %arrayidx76, align 4
%call77 = call i8* @_ZN13btConvexShapenwEm(i32 56)
%38 = bitcast i8* %call77 to %class.btCapsuleShape*
- %39 = load float* %scale.addr, align 4
+ %39 = load float, float* %scale.addr, align 4
%mul78 = fmul float 0x3FA99999A0000000, %39
- %40 = load float* %scale.addr, align 4
+ %40 = load float, float* %scale.addr, align 4
%mul79 = fmul float 0x3FD51EB860000000, %40
%call82 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %38, float %mul78, float %mul79)
to label %invoke.cont81 unwind label %lpad80
invoke.cont81: ; preds = %invoke.cont72
%41 = bitcast %class.btCapsuleShape* %38 to %class.btCollisionShape*
- %m_shapes84 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx85 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes84, i32 0, i32 9
+ %m_shapes84 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx85 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes84, i32 0, i32 9
store %class.btCollisionShape* %41, %class.btCollisionShape** %arrayidx85, align 4
%call86 = call i8* @_ZN13btConvexShapenwEm(i32 56)
%42 = bitcast i8* %call86 to %class.btCapsuleShape*
- %43 = load float* %scale.addr, align 4
+ %43 = load float, float* %scale.addr, align 4
%mul87 = fmul float 0x3FA47AE140000000, %43
- %44 = load float* %scale.addr, align 4
+ %44 = load float, float* %scale.addr, align 4
%mul88 = fmul float 2.500000e-01, %44
%call91 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %42, float %mul87, float %mul88)
to label %invoke.cont90 unwind label %lpad89
invoke.cont90: ; preds = %invoke.cont81
%45 = bitcast %class.btCapsuleShape* %42 to %class.btCollisionShape*
- %m_shapes93 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx94 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes93, i32 0, i32 10
+ %m_shapes93 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx94 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes93, i32 0, i32 10
store %class.btCollisionShape* %45, %class.btCollisionShape** %arrayidx94, align 4
%call95 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %offset)
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %offset)
- %46 = load %class.btVector3** %positionOffset.addr, align 4
+ %46 = load %class.btVector3*, %class.btVector3** %positionOffset.addr, align 4
call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %offset, %class.btVector3* %46)
%call96 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %transform)
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
@@ -436,12 +436,12 @@ invoke.cont90: ; preds = %invoke.cont81
call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp, float* %scale.addr, %class.btVector3* %ref.tmp97)
call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp)
call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp102, %class.btTransform* %offset, %class.btTransform* %transform)
- %m_shapes103 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx104 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes103, i32 0, i32 0
- %47 = load %class.btCollisionShape** %arrayidx104, align 4
+ %m_shapes103 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx104 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes103, i32 0, i32 0
+ %47 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx104, align 4
%call105 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp102, %class.btCollisionShape* %47)
- %m_bodies = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx106 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies, i32 0, i32 0
+ %m_bodies = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx106 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies, i32 0, i32 0
store %class.btRigidBody* %call105, %class.btRigidBody** %arrayidx106, align 4
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
store float 0.000000e+00, float* %ref.tmp109, align 4
@@ -451,12 +451,12 @@ invoke.cont90: ; preds = %invoke.cont81
call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp107, float* %scale.addr, %class.btVector3* %ref.tmp108)
call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp107)
call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp113, %class.btTransform* %offset, %class.btTransform* %transform)
- %m_shapes114 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx115 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes114, i32 0, i32 1
- %48 = load %class.btCollisionShape** %arrayidx115, align 4
+ %m_shapes114 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx115 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes114, i32 0, i32 1
+ %48 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx115, align 4
%call116 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp113, %class.btCollisionShape* %48)
- %m_bodies117 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx118 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies117, i32 0, i32 1
+ %m_bodies117 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx118 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies117, i32 0, i32 1
store %class.btRigidBody* %call116, %class.btRigidBody** %arrayidx118, align 4
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
store float 0.000000e+00, float* %ref.tmp121, align 4
@@ -466,12 +466,12 @@ invoke.cont90: ; preds = %invoke.cont81
call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp119, float* %scale.addr, %class.btVector3* %ref.tmp120)
call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp119)
call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp125, %class.btTransform* %offset, %class.btTransform* %transform)
- %m_shapes126 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx127 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes126, i32 0, i32 2
- %49 = load %class.btCollisionShape** %arrayidx127, align 4
+ %m_shapes126 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx127 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes126, i32 0, i32 2
+ %49 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx127, align 4
%call128 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp125, %class.btCollisionShape* %49)
- %m_bodies129 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx130 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies129, i32 0, i32 2
+ %m_bodies129 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx130 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies129, i32 0, i32 2
store %class.btRigidBody* %call128, %class.btRigidBody** %arrayidx130, align 4
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
store float 0xBFC70A3D80000000, float* %ref.tmp133, align 4
@@ -481,12 +481,12 @@ invoke.cont90: ; preds = %invoke.cont81
call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp131, float* %scale.addr, %class.btVector3* %ref.tmp132)
call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp131)
call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp137, %class.btTransform* %offset, %class.btTransform* %transform)
- %m_shapes138 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx139 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes138, i32 0, i32 3
- %50 = load %class.btCollisionShape** %arrayidx139, align 4
+ %m_shapes138 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx139 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes138, i32 0, i32 3
+ %50 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx139, align 4
%call140 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp137, %class.btCollisionShape* %50)
- %m_bodies141 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx142 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies141, i32 0, i32 3
+ %m_bodies141 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx142 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies141, i32 0, i32 3
store %class.btRigidBody* %call140, %class.btRigidBody** %arrayidx142, align 4
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
store float 0xBFC70A3D80000000, float* %ref.tmp145, align 4
@@ -496,12 +496,12 @@ invoke.cont90: ; preds = %invoke.cont81
call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp143, float* %scale.addr, %class.btVector3* %ref.tmp144)
call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp143)
call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp149, %class.btTransform* %offset, %class.btTransform* %transform)
- %m_shapes150 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx151 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes150, i32 0, i32 4
- %51 = load %class.btCollisionShape** %arrayidx151, align 4
+ %m_shapes150 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx151 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes150, i32 0, i32 4
+ %51 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx151, align 4
%call152 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp149, %class.btCollisionShape* %51)
- %m_bodies153 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx154 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies153, i32 0, i32 4
+ %m_bodies153 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx154 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies153, i32 0, i32 4
store %class.btRigidBody* %call152, %class.btRigidBody** %arrayidx154, align 4
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
store float 0x3FC70A3D80000000, float* %ref.tmp157, align 4
@@ -511,12 +511,12 @@ invoke.cont90: ; preds = %invoke.cont81
call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp155, float* %scale.addr, %class.btVector3* %ref.tmp156)
call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp155)
call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp161, %class.btTransform* %offset, %class.btTransform* %transform)
- %m_shapes162 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx163 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes162, i32 0, i32 5
- %52 = load %class.btCollisionShape** %arrayidx163, align 4
+ %m_shapes162 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx163 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes162, i32 0, i32 5
+ %52 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx163, align 4
%call164 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp161, %class.btCollisionShape* %52)
- %m_bodies165 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx166 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies165, i32 0, i32 5
+ %m_bodies165 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx166 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies165, i32 0, i32 5
store %class.btRigidBody* %call164, %class.btRigidBody** %arrayidx166, align 4
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
store float 0x3FC70A3D80000000, float* %ref.tmp169, align 4
@@ -526,12 +526,12 @@ invoke.cont90: ; preds = %invoke.cont81
call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp167, float* %scale.addr, %class.btVector3* %ref.tmp168)
call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp167)
call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp173, %class.btTransform* %offset, %class.btTransform* %transform)
- %m_shapes174 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx175 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes174, i32 0, i32 6
- %53 = load %class.btCollisionShape** %arrayidx175, align 4
+ %m_shapes174 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx175 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes174, i32 0, i32 6
+ %53 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx175, align 4
%call176 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp173, %class.btCollisionShape* %53)
- %m_bodies177 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx178 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies177, i32 0, i32 6
+ %m_bodies177 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx178 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies177, i32 0, i32 6
store %class.btRigidBody* %call176, %class.btRigidBody** %arrayidx178, align 4
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
store float 0xBFD6666660000000, float* %ref.tmp181, align 4
@@ -543,12 +543,12 @@ invoke.cont90: ; preds = %invoke.cont81
%call185 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call185, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp186, %class.btTransform* %offset, %class.btTransform* %transform)
- %m_shapes187 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx188 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes187, i32 0, i32 7
- %54 = load %class.btCollisionShape** %arrayidx188, align 4
+ %m_shapes187 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx188 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes187, i32 0, i32 7
+ %54 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx188, align 4
%call189 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp186, %class.btCollisionShape* %54)
- %m_bodies190 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx191 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies190, i32 0, i32 7
+ %m_bodies190 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx191 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies190, i32 0, i32 7
store %class.btRigidBody* %call189, %class.btRigidBody** %arrayidx191, align 4
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
store float 0xBFE6666660000000, float* %ref.tmp194, align 4
@@ -560,12 +560,12 @@ invoke.cont90: ; preds = %invoke.cont81
%call198 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call198, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp199, %class.btTransform* %offset, %class.btTransform* %transform)
- %m_shapes200 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx201 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes200, i32 0, i32 8
- %55 = load %class.btCollisionShape** %arrayidx201, align 4
+ %m_shapes200 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx201 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes200, i32 0, i32 8
+ %55 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx201, align 4
%call202 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp199, %class.btCollisionShape* %55)
- %m_bodies203 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx204 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies203, i32 0, i32 8
+ %m_bodies203 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx204 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies203, i32 0, i32 8
store %class.btRigidBody* %call202, %class.btRigidBody** %arrayidx204, align 4
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
store float 0x3FD6666660000000, float* %ref.tmp207, align 4
@@ -577,12 +577,12 @@ invoke.cont90: ; preds = %invoke.cont81
%call211 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call211, float 0.000000e+00, float 0.000000e+00, float 0xBFF921FB60000000)
call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp212, %class.btTransform* %offset, %class.btTransform* %transform)
- %m_shapes213 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx214 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes213, i32 0, i32 9
- %56 = load %class.btCollisionShape** %arrayidx214, align 4
+ %m_shapes213 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx214 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes213, i32 0, i32 9
+ %56 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx214, align 4
%call215 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp212, %class.btCollisionShape* %56)
- %m_bodies216 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx217 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies216, i32 0, i32 9
+ %m_bodies216 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx217 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies216, i32 0, i32 9
store %class.btRigidBody* %call215, %class.btRigidBody** %arrayidx217, align 4
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
store float 0x3FE6666660000000, float* %ref.tmp220, align 4
@@ -594,42 +594,42 @@ invoke.cont90: ; preds = %invoke.cont81
%call224 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call224, float 0.000000e+00, float 0.000000e+00, float 0xBFF921FB60000000)
call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp225, %class.btTransform* %offset, %class.btTransform* %transform)
- %m_shapes226 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
- %arrayidx227 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes226, i32 0, i32 10
- %57 = load %class.btCollisionShape** %arrayidx227, align 4
+ %m_shapes226 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
+ %arrayidx227 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes226, i32 0, i32 10
+ %57 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx227, align 4
%call228 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp225, %class.btCollisionShape* %57)
- %m_bodies229 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx230 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies229, i32 0, i32 10
+ %m_bodies229 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx230 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies229, i32 0, i32 10
store %class.btRigidBody* %call228, %class.btRigidBody** %arrayidx230, align 4
store i32 0, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc, %invoke.cont90
- %58 = load i32* %i, align 4
+ %58 = load i32, i32* %i, align 4
%cmp = icmp slt i32 %58, 11
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
- %59 = load i32* %i, align 4
- %m_bodies231 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx232 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies231, i32 0, i32 %59
- %60 = load %class.btRigidBody** %arrayidx232, align 4
+ %59 = load i32, i32* %i, align 4
+ %m_bodies231 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx232 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies231, i32 0, i32 %59
+ %60 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx232, align 4
call void @_ZN11btRigidBody10setDampingEff(%class.btRigidBody* %60, float 0x3FA99999A0000000, float 0x3FEB333340000000)
- %61 = load i32* %i, align 4
- %m_bodies233 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx234 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies233, i32 0, i32 %61
- %62 = load %class.btRigidBody** %arrayidx234, align 4
+ %61 = load i32, i32* %i, align 4
+ %m_bodies233 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx234 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies233, i32 0, i32 %61
+ %62 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx234, align 4
%63 = bitcast %class.btRigidBody* %62 to %class.btCollisionObject*
call void @_ZN17btCollisionObject19setDeactivationTimeEf(%class.btCollisionObject* %63, float 0x3FE99999A0000000)
- %64 = load i32* %i, align 4
- %m_bodies235 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx236 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies235, i32 0, i32 %64
- %65 = load %class.btRigidBody** %arrayidx236, align 4
+ %64 = load i32, i32* %i, align 4
+ %m_bodies235 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx236 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies235, i32 0, i32 %64
+ %65 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx236, align 4
call void @_ZN11btRigidBody21setSleepingThresholdsEff(%class.btRigidBody* %65, float 0x3FF99999A0000000, float 2.500000e+00)
br label %for.inc
for.inc: ; preds = %for.body
- %66 = load i32* %i, align 4
+ %66 = load i32, i32* %i, align 4
%inc = add nsw i32 %66, 1
store i32 %inc, i32* %i, align 4
br label %for.cond
@@ -800,33 +800,33 @@ for.end: ; preds = %for.cond
call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp247)
%call253 = call noalias i8* @_Znwm(i32 780)
%100 = bitcast i8* %call253 to %class.btHingeConstraint*
- %m_bodies254 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx255 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies254, i32 0, i32 0
- %101 = load %class.btRigidBody** %arrayidx255, align 4
- %m_bodies256 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx257 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies256, i32 0, i32 1
- %102 = load %class.btRigidBody** %arrayidx257, align 4
+ %m_bodies254 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx255 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies254, i32 0, i32 0
+ %101 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx255, align 4
+ %m_bodies256 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx257 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies256, i32 0, i32 1
+ %102 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx257, align 4
%call260 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %100, %class.btRigidBody* %101, %class.btRigidBody* %102, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
to label %invoke.cont259 unwind label %lpad258
invoke.cont259: ; preds = %for.end
store %class.btHingeConstraint* %100, %class.btHingeConstraint** %hingeC, align 4
- %103 = load %class.btHingeConstraint** %hingeC, align 4
+ %103 = load %class.btHingeConstraint*, %class.btHingeConstraint** %hingeC, align 4
call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %103, float 0xBFE921FB60000000, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
- %104 = load %class.btHingeConstraint** %hingeC, align 4
+ %104 = load %class.btHingeConstraint*, %class.btHingeConstraint** %hingeC, align 4
%105 = bitcast %class.btHingeConstraint* %104 to %class.btTypedConstraint*
- %m_joints = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
- %arrayidx261 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints, i32 0, i32 0
+ %m_joints = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
+ %arrayidx261 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints, i32 0, i32 0
store %class.btTypedConstraint* %105, %class.btTypedConstraint** %arrayidx261, align 4
- %m_ownerWorld262 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
- %106 = load %class.btDynamicsWorld** %m_ownerWorld262, align 4
+ %m_ownerWorld262 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
+ %106 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %m_ownerWorld262, align 4
%107 = bitcast %class.btDynamicsWorld* %106 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
- %vtable = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %107
- %vfn = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable, i64 10
- %108 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn
- %m_joints263 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
- %arrayidx264 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints263, i32 0, i32 0
- %109 = load %class.btTypedConstraint** %arrayidx264, align 4
+ %vtable = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)**, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %107
+ %vfn = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable, i64 10
+ %108 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn
+ %m_joints263 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
+ %arrayidx264 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints263, i32 0, i32 0
+ %109 = load %class.btTypedConstraint*, %class.btTypedConstraint** %arrayidx264, align 4
call void %108(%class.btDynamicsWorld* %106, %class.btTypedConstraint* %109, i1 zeroext true)
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
@@ -848,33 +848,33 @@ invoke.cont259: ; preds = %for.end
call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp273)
%call279 = call noalias i8* @_Znwm(i32 628)
%110 = bitcast i8* %call279 to %class.btConeTwistConstraint*
- %m_bodies280 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx281 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies280, i32 0, i32 1
- %111 = load %class.btRigidBody** %arrayidx281, align 4
- %m_bodies282 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx283 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies282, i32 0, i32 2
- %112 = load %class.btRigidBody** %arrayidx283, align 4
+ %m_bodies280 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx281 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies280, i32 0, i32 1
+ %111 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx281, align 4
+ %m_bodies282 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx283 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies282, i32 0, i32 2
+ %112 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx283, align 4
%call286 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %110, %class.btRigidBody* %111, %class.btRigidBody* %112, %class.btTransform* %localA, %class.btTransform* %localB)
to label %invoke.cont285 unwind label %lpad284
invoke.cont285: ; preds = %invoke.cont259
store %class.btConeTwistConstraint* %110, %class.btConeTwistConstraint** %coneC, align 4
- %113 = load %class.btConeTwistConstraint** %coneC, align 4
+ %113 = load %class.btConeTwistConstraint*, %class.btConeTwistConstraint** %coneC, align 4
call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %113, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0x3FF921FB60000000, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
- %114 = load %class.btConeTwistConstraint** %coneC, align 4
+ %114 = load %class.btConeTwistConstraint*, %class.btConeTwistConstraint** %coneC, align 4
%115 = bitcast %class.btConeTwistConstraint* %114 to %class.btTypedConstraint*
- %m_joints287 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
- %arrayidx288 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints287, i32 0, i32 1
+ %m_joints287 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
+ %arrayidx288 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints287, i32 0, i32 1
store %class.btTypedConstraint* %115, %class.btTypedConstraint** %arrayidx288, align 4
- %m_ownerWorld289 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
- %116 = load %class.btDynamicsWorld** %m_ownerWorld289, align 4
+ %m_ownerWorld289 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
+ %116 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %m_ownerWorld289, align 4
%117 = bitcast %class.btDynamicsWorld* %116 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
- %vtable290 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %117
- %vfn291 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable290, i64 10
- %118 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn291
- %m_joints292 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
- %arrayidx293 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints292, i32 0, i32 1
- %119 = load %class.btTypedConstraint** %arrayidx293, align 4
+ %vtable290 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)**, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %117
+ %vfn291 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable290, i64 10
+ %118 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn291
+ %m_joints292 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
+ %arrayidx293 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints292, i32 0, i32 1
+ %119 = load %class.btTypedConstraint*, %class.btTypedConstraint** %arrayidx293, align 4
call void %118(%class.btDynamicsWorld* %116, %class.btTypedConstraint* %119, i1 zeroext true)
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
@@ -896,33 +896,33 @@ invoke.cont285: ; preds = %invoke.cont259
call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp302)
%call308 = call noalias i8* @_Znwm(i32 628)
%120 = bitcast i8* %call308 to %class.btConeTwistConstraint*
- %m_bodies309 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx310 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies309, i32 0, i32 0
- %121 = load %class.btRigidBody** %arrayidx310, align 4
- %m_bodies311 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx312 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies311, i32 0, i32 3
- %122 = load %class.btRigidBody** %arrayidx312, align 4
+ %m_bodies309 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx310 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies309, i32 0, i32 0
+ %121 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx310, align 4
+ %m_bodies311 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx312 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies311, i32 0, i32 3
+ %122 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx312, align 4
%call315 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %120, %class.btRigidBody* %121, %class.btRigidBody* %122, %class.btTransform* %localA, %class.btTransform* %localB)
to label %invoke.cont314 unwind label %lpad313
invoke.cont314: ; preds = %invoke.cont285
store %class.btConeTwistConstraint* %120, %class.btConeTwistConstraint** %coneC, align 4
- %123 = load %class.btConeTwistConstraint** %coneC, align 4
+ %123 = load %class.btConeTwistConstraint*, %class.btConeTwistConstraint** %coneC, align 4
call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %123, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
- %124 = load %class.btConeTwistConstraint** %coneC, align 4
+ %124 = load %class.btConeTwistConstraint*, %class.btConeTwistConstraint** %coneC, align 4
%125 = bitcast %class.btConeTwistConstraint* %124 to %class.btTypedConstraint*
- %m_joints316 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
- %arrayidx317 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints316, i32 0, i32 2
+ %m_joints316 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
+ %arrayidx317 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints316, i32 0, i32 2
store %class.btTypedConstraint* %125, %class.btTypedConstraint** %arrayidx317, align 4
- %m_ownerWorld318 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
- %126 = load %class.btDynamicsWorld** %m_ownerWorld318, align 4
+ %m_ownerWorld318 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
+ %126 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %m_ownerWorld318, align 4
%127 = bitcast %class.btDynamicsWorld* %126 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
- %vtable319 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %127
- %vfn320 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable319, i64 10
- %128 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn320
- %m_joints321 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
- %arrayidx322 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints321, i32 0, i32 2
- %129 = load %class.btTypedConstraint** %arrayidx322, align 4
+ %vtable319 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)**, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %127
+ %vfn320 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable319, i64 10
+ %128 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn320
+ %m_joints321 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
+ %arrayidx322 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints321, i32 0, i32 2
+ %129 = load %class.btTypedConstraint*, %class.btTypedConstraint** %arrayidx322, align 4
call void %128(%class.btDynamicsWorld* %126, %class.btTypedConstraint* %129, i1 zeroext true)
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
@@ -944,33 +944,33 @@ invoke.cont314: ; preds = %invoke.cont285
call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp331)
%call337 = call noalias i8* @_Znwm(i32 780)
%130 = bitcast i8* %call337 to %class.btHingeConstraint*
- %m_bodies338 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx339 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies338, i32 0, i32 3
- %131 = load %class.btRigidBody** %arrayidx339, align 4
- %m_bodies340 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx341 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies340, i32 0, i32 4
- %132 = load %class.btRigidBody** %arrayidx341, align 4
+ %m_bodies338 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx339 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies338, i32 0, i32 3
+ %131 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx339, align 4
+ %m_bodies340 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx341 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies340, i32 0, i32 4
+ %132 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx341, align 4
%call344 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %130, %class.btRigidBody* %131, %class.btRigidBody* %132, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
to label %invoke.cont343 unwind label %lpad342
invoke.cont343: ; preds = %invoke.cont314
store %class.btHingeConstraint* %130, %class.btHingeConstraint** %hingeC, align 4
- %133 = load %class.btHingeConstraint** %hingeC, align 4
+ %133 = load %class.btHingeConstraint*, %class.btHingeConstraint** %hingeC, align 4
call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %133, float 0.000000e+00, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
- %134 = load %class.btHingeConstraint** %hingeC, align 4
+ %134 = load %class.btHingeConstraint*, %class.btHingeConstraint** %hingeC, align 4
%135 = bitcast %class.btHingeConstraint* %134 to %class.btTypedConstraint*
- %m_joints345 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
- %arrayidx346 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints345, i32 0, i32 3
+ %m_joints345 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
+ %arrayidx346 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints345, i32 0, i32 3
store %class.btTypedConstraint* %135, %class.btTypedConstraint** %arrayidx346, align 4
- %m_ownerWorld347 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
- %136 = load %class.btDynamicsWorld** %m_ownerWorld347, align 4
+ %m_ownerWorld347 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
+ %136 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %m_ownerWorld347, align 4
%137 = bitcast %class.btDynamicsWorld* %136 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
- %vtable348 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %137
- %vfn349 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable348, i64 10
- %138 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn349
- %m_joints350 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
- %arrayidx351 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints350, i32 0, i32 3
- %139 = load %class.btTypedConstraint** %arrayidx351, align 4
+ %vtable348 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)**, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %137
+ %vfn349 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable348, i64 10
+ %138 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn349
+ %m_joints350 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
+ %arrayidx351 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints350, i32 0, i32 3
+ %139 = load %class.btTypedConstraint*, %class.btTypedConstraint** %arrayidx351, align 4
call void %138(%class.btDynamicsWorld* %136, %class.btTypedConstraint* %139, i1 zeroext true)
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
@@ -992,33 +992,33 @@ invoke.cont343: ; preds = %invoke.cont314
call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp360)
%call366 = call noalias i8* @_Znwm(i32 628)
%140 = bitcast i8* %call366 to %class.btConeTwistConstraint*
- %m_bodies367 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx368 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies367, i32 0, i32 0
- %141 = load %class.btRigidBody** %arrayidx368, align 4
- %m_bodies369 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx370 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies369, i32 0, i32 5
- %142 = load %class.btRigidBody** %arrayidx370, align 4
+ %m_bodies367 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx368 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies367, i32 0, i32 0
+ %141 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx368, align 4
+ %m_bodies369 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx370 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies369, i32 0, i32 5
+ %142 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx370, align 4
%call373 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %140, %class.btRigidBody* %141, %class.btRigidBody* %142, %class.btTransform* %localA, %class.btTransform* %localB)
to label %invoke.cont372 unwind label %lpad371
invoke.cont372: ; preds = %invoke.cont343
store %class.btConeTwistConstraint* %140, %class.btConeTwistConstraint** %coneC, align 4
- %143 = load %class.btConeTwistConstraint** %coneC, align 4
+ %143 = load %class.btConeTwistConstraint*, %class.btConeTwistConstraint** %coneC, align 4
call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %143, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
- %144 = load %class.btConeTwistConstraint** %coneC, align 4
+ %144 = load %class.btConeTwistConstraint*, %class.btConeTwistConstraint** %coneC, align 4
%145 = bitcast %class.btConeTwistConstraint* %144 to %class.btTypedConstraint*
- %m_joints374 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
- %arrayidx375 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints374, i32 0, i32 4
+ %m_joints374 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
+ %arrayidx375 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints374, i32 0, i32 4
store %class.btTypedConstraint* %145, %class.btTypedConstraint** %arrayidx375, align 4
- %m_ownerWorld376 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
- %146 = load %class.btDynamicsWorld** %m_ownerWorld376, align 4
+ %m_ownerWorld376 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
+ %146 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %m_ownerWorld376, align 4
%147 = bitcast %class.btDynamicsWorld* %146 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
- %vtable377 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %147
- %vfn378 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable377, i64 10
- %148 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn378
- %m_joints379 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
- %arrayidx380 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints379, i32 0, i32 4
- %149 = load %class.btTypedConstraint** %arrayidx380, align 4
+ %vtable377 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)**, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %147
+ %vfn378 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable377, i64 10
+ %148 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn378
+ %m_joints379 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
+ %arrayidx380 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints379, i32 0, i32 4
+ %149 = load %class.btTypedConstraint*, %class.btTypedConstraint** %arrayidx380, align 4
call void %148(%class.btDynamicsWorld* %146, %class.btTypedConstraint* %149, i1 zeroext true)
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
@@ -1040,33 +1040,33 @@ invoke.cont372: ; preds = %invoke.cont343
call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp389)
%call395 = call noalias i8* @_Znwm(i32 780)
%150 = bitcast i8* %call395 to %class.btHingeConstraint*
- %m_bodies396 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx397 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies396, i32 0, i32 5
- %151 = load %class.btRigidBody** %arrayidx397, align 4
- %m_bodies398 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx399 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies398, i32 0, i32 6
- %152 = load %class.btRigidBody** %arrayidx399, align 4
+ %m_bodies396 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx397 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies396, i32 0, i32 5
+ %151 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx397, align 4
+ %m_bodies398 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx399 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies398, i32 0, i32 6
+ %152 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx399, align 4
%call402 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %150, %class.btRigidBody* %151, %class.btRigidBody* %152, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
to label %invoke.cont401 unwind label %lpad400
invoke.cont401: ; preds = %invoke.cont372
store %class.btHingeConstraint* %150, %class.btHingeConstraint** %hingeC, align 4
- %153 = load %class.btHingeConstraint** %hingeC, align 4
+ %153 = load %class.btHingeConstraint*, %class.btHingeConstraint** %hingeC, align 4
call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %153, float 0.000000e+00, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
- %154 = load %class.btHingeConstraint** %hingeC, align 4
+ %154 = load %class.btHingeConstraint*, %class.btHingeConstraint** %hingeC, align 4
%155 = bitcast %class.btHingeConstraint* %154 to %class.btTypedConstraint*
- %m_joints403 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
- %arrayidx404 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints403, i32 0, i32 5
+ %m_joints403 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
+ %arrayidx404 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints403, i32 0, i32 5
store %class.btTypedConstraint* %155, %class.btTypedConstraint** %arrayidx404, align 4
- %m_ownerWorld405 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
- %156 = load %class.btDynamicsWorld** %m_ownerWorld405, align 4
+ %m_ownerWorld405 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
+ %156 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %m_ownerWorld405, align 4
%157 = bitcast %class.btDynamicsWorld* %156 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
- %vtable406 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %157
- %vfn407 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable406, i64 10
- %158 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn407
- %m_joints408 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
- %arrayidx409 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints408, i32 0, i32 5
- %159 = load %class.btTypedConstraint** %arrayidx409, align 4
+ %vtable406 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)**, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %157
+ %vfn407 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable406, i64 10
+ %158 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn407
+ %m_joints408 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
+ %arrayidx409 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints408, i32 0, i32 5
+ %159 = load %class.btTypedConstraint*, %class.btTypedConstraint** %arrayidx409, align 4
call void %158(%class.btDynamicsWorld* %156, %class.btTypedConstraint* %159, i1 zeroext true)
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
@@ -1088,33 +1088,33 @@ invoke.cont401: ; preds = %invoke.cont372
call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp418)
%call424 = call noalias i8* @_Znwm(i32 628)
%160 = bitcast i8* %call424 to %class.btConeTwistConstraint*
- %m_bodies425 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx426 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies425, i32 0, i32 1
- %161 = load %class.btRigidBody** %arrayidx426, align 4
- %m_bodies427 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx428 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies427, i32 0, i32 7
- %162 = load %class.btRigidBody** %arrayidx428, align 4
+ %m_bodies425 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx426 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies425, i32 0, i32 1
+ %161 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx426, align 4
+ %m_bodies427 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx428 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies427, i32 0, i32 7
+ %162 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx428, align 4
%call431 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %160, %class.btRigidBody* %161, %class.btRigidBody* %162, %class.btTransform* %localA, %class.btTransform* %localB)
to label %invoke.cont430 unwind label %lpad429
invoke.cont430: ; preds = %invoke.cont401
store %class.btConeTwistConstraint* %160, %class.btConeTwistConstraint** %coneC, align 4
- %163 = load %class.btConeTwistConstraint** %coneC, align 4
+ %163 = load %class.btConeTwistConstraint*, %class.btConeTwistConstraint** %coneC, align 4
call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %163, float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
- %164 = load %class.btConeTwistConstraint** %coneC, align 4
+ %164 = load %class.btConeTwistConstraint*, %class.btConeTwistConstraint** %coneC, align 4
%165 = bitcast %class.btConeTwistConstraint* %164 to %class.btTypedConstraint*
- %m_joints432 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
- %arrayidx433 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints432, i32 0, i32 6
+ %m_joints432 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
+ %arrayidx433 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints432, i32 0, i32 6
store %class.btTypedConstraint* %165, %class.btTypedConstraint** %arrayidx433, align 4
- %m_ownerWorld434 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
- %166 = load %class.btDynamicsWorld** %m_ownerWorld434, align 4
+ %m_ownerWorld434 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
+ %166 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %m_ownerWorld434, align 4
%167 = bitcast %class.btDynamicsWorld* %166 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
- %vtable435 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %167
- %vfn436 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable435, i64 10
- %168 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn436
- %m_joints437 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
- %arrayidx438 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints437, i32 0, i32 6
- %169 = load %class.btTypedConstraint** %arrayidx438, align 4
+ %vtable435 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)**, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %167
+ %vfn436 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable435, i64 10
+ %168 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn436
+ %m_joints437 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
+ %arrayidx438 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints437, i32 0, i32 6
+ %169 = load %class.btTypedConstraint*, %class.btTypedConstraint** %arrayidx438, align 4
call void %168(%class.btDynamicsWorld* %166, %class.btTypedConstraint* %169, i1 zeroext true)
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
@@ -1136,33 +1136,33 @@ invoke.cont430: ; preds = %invoke.cont401
call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp447)
%call453 = call noalias i8* @_Znwm(i32 780)
%170 = bitcast i8* %call453 to %class.btHingeConstraint*
- %m_bodies454 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx455 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies454, i32 0, i32 7
- %171 = load %class.btRigidBody** %arrayidx455, align 4
- %m_bodies456 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx457 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies456, i32 0, i32 8
- %172 = load %class.btRigidBody** %arrayidx457, align 4
+ %m_bodies454 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx455 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies454, i32 0, i32 7
+ %171 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx455, align 4
+ %m_bodies456 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx457 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies456, i32 0, i32 8
+ %172 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx457, align 4
%call460 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %170, %class.btRigidBody* %171, %class.btRigidBody* %172, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
to label %invoke.cont459 unwind label %lpad458
invoke.cont459: ; preds = %invoke.cont430
store %class.btHingeConstraint* %170, %class.btHingeConstraint** %hingeC, align 4
- %173 = load %class.btHingeConstraint** %hingeC, align 4
+ %173 = load %class.btHingeConstraint*, %class.btHingeConstraint** %hingeC, align 4
call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %173, float 0xBFF921FB60000000, float 0.000000e+00, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
- %174 = load %class.btHingeConstraint** %hingeC, align 4
+ %174 = load %class.btHingeConstraint*, %class.btHingeConstraint** %hingeC, align 4
%175 = bitcast %class.btHingeConstraint* %174 to %class.btTypedConstraint*
- %m_joints461 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
- %arrayidx462 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints461, i32 0, i32 7
+ %m_joints461 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
+ %arrayidx462 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints461, i32 0, i32 7
store %class.btTypedConstraint* %175, %class.btTypedConstraint** %arrayidx462, align 4
- %m_ownerWorld463 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
- %176 = load %class.btDynamicsWorld** %m_ownerWorld463, align 4
+ %m_ownerWorld463 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
+ %176 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %m_ownerWorld463, align 4
%177 = bitcast %class.btDynamicsWorld* %176 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
- %vtable464 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %177
- %vfn465 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable464, i64 10
- %178 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn465
- %m_joints466 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
- %arrayidx467 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints466, i32 0, i32 7
- %179 = load %class.btTypedConstraint** %arrayidx467, align 4
+ %vtable464 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)**, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %177
+ %vfn465 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable464, i64 10
+ %178 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn465
+ %m_joints466 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
+ %arrayidx467 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints466, i32 0, i32 7
+ %179 = load %class.btTypedConstraint*, %class.btTypedConstraint** %arrayidx467, align 4
call void %178(%class.btDynamicsWorld* %176, %class.btTypedConstraint* %179, i1 zeroext true)
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
@@ -1184,33 +1184,33 @@ invoke.cont459: ; preds = %invoke.cont430
call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp476)
%call482 = call noalias i8* @_Znwm(i32 628)
%180 = bitcast i8* %call482 to %class.btConeTwistConstraint*
- %m_bodies483 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx484 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies483, i32 0, i32 1
- %181 = load %class.btRigidBody** %arrayidx484, align 4
- %m_bodies485 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx486 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies485, i32 0, i32 9
- %182 = load %class.btRigidBody** %arrayidx486, align 4
+ %m_bodies483 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx484 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies483, i32 0, i32 1
+ %181 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx484, align 4
+ %m_bodies485 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx486 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies485, i32 0, i32 9
+ %182 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx486, align 4
%call489 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %180, %class.btRigidBody* %181, %class.btRigidBody* %182, %class.btTransform* %localA, %class.btTransform* %localB)
to label %invoke.cont488 unwind label %lpad487
invoke.cont488: ; preds = %invoke.cont459
store %class.btConeTwistConstraint* %180, %class.btConeTwistConstraint** %coneC, align 4
- %183 = load %class.btConeTwistConstraint** %coneC, align 4
+ %183 = load %class.btConeTwistConstraint*, %class.btConeTwistConstraint** %coneC, align 4
call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %183, float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
- %184 = load %class.btConeTwistConstraint** %coneC, align 4
+ %184 = load %class.btConeTwistConstraint*, %class.btConeTwistConstraint** %coneC, align 4
%185 = bitcast %class.btConeTwistConstraint* %184 to %class.btTypedConstraint*
- %m_joints490 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
- %arrayidx491 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints490, i32 0, i32 8
+ %m_joints490 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
+ %arrayidx491 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints490, i32 0, i32 8
store %class.btTypedConstraint* %185, %class.btTypedConstraint** %arrayidx491, align 4
- %m_ownerWorld492 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
- %186 = load %class.btDynamicsWorld** %m_ownerWorld492, align 4
+ %m_ownerWorld492 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
+ %186 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %m_ownerWorld492, align 4
%187 = bitcast %class.btDynamicsWorld* %186 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
- %vtable493 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %187
- %vfn494 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable493, i64 10
- %188 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn494
- %m_joints495 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
- %arrayidx496 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints495, i32 0, i32 8
- %189 = load %class.btTypedConstraint** %arrayidx496, align 4
+ %vtable493 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)**, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %187
+ %vfn494 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable493, i64 10
+ %188 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn494
+ %m_joints495 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
+ %arrayidx496 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints495, i32 0, i32 8
+ %189 = load %class.btTypedConstraint*, %class.btTypedConstraint** %arrayidx496, align 4
call void %188(%class.btDynamicsWorld* %186, %class.btTypedConstraint* %189, i1 zeroext true)
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
@@ -1232,35 +1232,35 @@ invoke.cont488: ; preds = %invoke.cont459
call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp505)
%call511 = call noalias i8* @_Znwm(i32 780)
%190 = bitcast i8* %call511 to %class.btHingeConstraint*
- %m_bodies512 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx513 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies512, i32 0, i32 9
- %191 = load %class.btRigidBody** %arrayidx513, align 4
- %m_bodies514 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
- %arrayidx515 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies514, i32 0, i32 10
- %192 = load %class.btRigidBody** %arrayidx515, align 4
+ %m_bodies512 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx513 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies512, i32 0, i32 9
+ %191 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx513, align 4
+ %m_bodies514 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
+ %arrayidx515 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies514, i32 0, i32 10
+ %192 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx515, align 4
%call518 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %190, %class.btRigidBody* %191, %class.btRigidBody* %192, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
to label %invoke.cont517 unwind label %lpad516
invoke.cont517: ; preds = %invoke.cont488
store %class.btHingeConstraint* %190, %class.btHingeConstraint** %hingeC, align 4
- %193 = load %class.btHingeConstraint** %hingeC, align 4
+ %193 = load %class.btHingeConstraint*, %class.btHingeConstraint** %hingeC, align 4
call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %193, float 0xBFF921FB60000000, float 0.000000e+00, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
- %194 = load %class.btHingeConstraint** %hingeC, align 4
+ %194 = load %class.btHingeConstraint*, %class.btHingeConstraint** %hingeC, align 4
%195 = bitcast %class.btHingeConstraint* %194 to %class.btTypedConstraint*
- %m_joints519 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
- %arrayidx520 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints519, i32 0, i32 9
+ %m_joints519 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
+ %arrayidx520 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints519, i32 0, i32 9
store %class.btTypedConstraint* %195, %class.btTypedConstraint** %arrayidx520, align 4
- %m_ownerWorld521 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
- %196 = load %class.btDynamicsWorld** %m_ownerWorld521, align 4
+ %m_ownerWorld521 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
+ %196 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %m_ownerWorld521, align 4
%197 = bitcast %class.btDynamicsWorld* %196 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
- %vtable522 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %197
- %vfn523 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable522, i64 10
- %198 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn523
- %m_joints524 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
- %arrayidx525 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints524, i32 0, i32 9
- %199 = load %class.btTypedConstraint** %arrayidx525, align 4
+ %vtable522 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)**, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %197
+ %vfn523 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable522, i64 10
+ %198 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn523
+ %m_joints524 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
+ %arrayidx525 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints524, i32 0, i32 9
+ %199 = load %class.btTypedConstraint*, %class.btTypedConstraint** %arrayidx525, align 4
call void %198(%class.btDynamicsWorld* %196, %class.btTypedConstraint* %199, i1 zeroext true)
- %200 = load %class.RagDoll** %retval
+ %200 = load %class.RagDoll*, %class.RagDoll** %retval
ret %class.RagDoll* %200
lpad258: ; preds = %for.end
@@ -1364,8 +1364,8 @@ lpad516: ; preds = %invoke.cont488
br label %eh.resume
eh.resume: ; preds = %lpad516, %lpad487, %lpad458, %lpad429, %lpad400, %lpad371, %lpad342, %lpad313, %lpad284, %lpad258, %invoke.cont92, %invoke.cont83, %invoke.cont74, %invoke.cont65, %invoke.cont56, %invoke.cont47, %invoke.cont38, %invoke.cont29, %invoke.cont20, %invoke.cont11, %invoke.cont4
- %exn = load i8** %exn.slot
- %sel = load i32* %ehselector.slot
+ %exn = load i8*, i8** %exn.slot
+ %sel = load i32, i32* %ehselector.slot
%lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
%lpad.val526 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
resume { i8*, i32 } %lpad.val526
diff --git a/test/CodeGen/Thumb2/crash.ll b/test/CodeGen/Thumb2/crash.ll
index 6ce0b82b94d7..893a45d8f722 100644
--- a/test/CodeGen/Thumb2/crash.ll
+++ b/test/CodeGen/Thumb2/crash.ll
@@ -7,13 +7,13 @@ target triple = "thumbv7-apple-darwin10"
define arm_apcscc void @NEON_vst4q_u32(i32* nocapture %sp0, i32* nocapture %sp1, i32* nocapture %sp2, i32* nocapture %sp3, i32* %dp) nounwind {
entry:
%0 = bitcast i32* %sp0 to <4 x i32>* ; <<4 x i32>*> [#uses=1]
- %1 = load <4 x i32>* %0, align 16 ; <<4 x i32>> [#uses=1]
+ %1 = load <4 x i32>, <4 x i32>* %0, align 16 ; <<4 x i32>> [#uses=1]
%2 = bitcast i32* %sp1 to <4 x i32>* ; <<4 x i32>*> [#uses=1]
- %3 = load <4 x i32>* %2, align 16 ; <<4 x i32>> [#uses=1]
+ %3 = load <4 x i32>, <4 x i32>* %2, align 16 ; <<4 x i32>> [#uses=1]
%4 = bitcast i32* %sp2 to <4 x i32>* ; <<4 x i32>*> [#uses=1]
- %5 = load <4 x i32>* %4, align 16 ; <<4 x i32>> [#uses=1]
+ %5 = load <4 x i32>, <4 x i32>* %4, align 16 ; <<4 x i32>> [#uses=1]
%6 = bitcast i32* %sp3 to <4 x i32>* ; <<4 x i32>*> [#uses=1]
- %7 = load <4 x i32>* %6, align 16 ; <<4 x i32>> [#uses=1]
+ %7 = load <4 x i32>, <4 x i32>* %6, align 16 ; <<4 x i32>> [#uses=1]
%8 = bitcast i32* %dp to i8* ; <i8*> [#uses=1]
tail call void @llvm.arm.neon.vst4.v4i32(i8* %8, <4 x i32> %1, <4 x i32> %3, <4 x i32> %5, <4 x i32> %7, i32 1)
ret void
@@ -32,8 +32,8 @@ bb.nph:
bb: ; preds = %bb, %bb.nph
%0 = phi i32 [ 0, %bb.nph ], [ %1, %bb ] ; <i32> [#uses=4]
- %scevgep = getelementptr [16 x i32]* @sbuf, i32 0, i32 %0 ; <i32*> [#uses=1]
- %scevgep5 = getelementptr [16 x i32]* @dbuf, i32 0, i32 %0 ; <i32*> [#uses=1]
+ %scevgep = getelementptr [16 x i32], [16 x i32]* @sbuf, i32 0, i32 %0 ; <i32*> [#uses=1]
+ %scevgep5 = getelementptr [16 x i32], [16 x i32]* @dbuf, i32 0, i32 %0 ; <i32*> [#uses=1]
store i32 %0, i32* %scevgep, align 4
store i32 -1, i32* %scevgep5, align 4
%1 = add nsw i32 %0, 1 ; <i32> [#uses=2]
@@ -41,10 +41,10 @@ bb: ; preds = %bb, %bb.nph
br i1 %exitcond, label %bb2, label %bb
bb2: ; preds = %bb
- %2 = load <4 x i32>* bitcast ([16 x i32]* @sbuf to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
- %3 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 4) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
- %4 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 8) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
- %5 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 12) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+ %2 = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @sbuf to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+ %3 = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @sbuf, i32 0, i32 4) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+ %4 = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @sbuf, i32 0, i32 8) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+ %5 = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @sbuf, i32 0, i32 12) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5, i32 1) nounwind
ret i32 0
}
@@ -70,8 +70,8 @@ declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
%class = type { i8*, %class*, i32 }
define void @f11101911(%class* %this, i32 %num) ssp align 2 {
entry:
- %p1 = getelementptr inbounds %class* %this, i32 0, i32 1
- %p2 = getelementptr inbounds %class* %this, i32 0, i32 2
+ %p1 = getelementptr inbounds %class, %class* %this, i32 0, i32 1
+ %p2 = getelementptr inbounds %class, %class* %this, i32 0, i32 2
tail call void asm sideeffect "", "~{r1},~{r3},~{r5},~{r11},~{r13}"() nounwind
store %class* %this, %class** %p1, align 4
store i32 %num, i32* %p2, align 4
diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
index 88c7f0f17ab9..ecb63b18b622 100644
--- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
+++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
@@ -26,27 +26,27 @@ bb8: ; preds = %bb8, %bb7
; CHECK-NOT: vmov.f32
; CHECK: blt
%tmp54 = add i32 0, %tmp53 ; <i32> [#uses=0]
- %fi.1 = getelementptr float* %fz, i32 undef ; <float*> [#uses=2]
+ %fi.1 = getelementptr float, float* %fz, i32 undef ; <float*> [#uses=2]
%tmp80 = add i32 0, %tmp79 ; <i32> [#uses=1]
- %scevgep81 = getelementptr float* %fz, i32 %tmp80 ; <float*> [#uses=1]
- %2 = load float* undef, align 4 ; <float> [#uses=1]
+ %scevgep81 = getelementptr float, float* %fz, i32 %tmp80 ; <float*> [#uses=1]
+ %2 = load float, float* undef, align 4 ; <float> [#uses=1]
%3 = fmul float %2, %1 ; <float> [#uses=1]
- %4 = load float* null, align 4 ; <float> [#uses=2]
+ %4 = load float, float* null, align 4 ; <float> [#uses=2]
%5 = fmul float %4, %0 ; <float> [#uses=1]
%6 = fsub float %3, %5 ; <float> [#uses=1]
%7 = fmul float %4, %1 ; <float> [#uses=1]
%8 = fadd float undef, %7 ; <float> [#uses=2]
- %9 = load float* %fi.1, align 4 ; <float> [#uses=2]
+ %9 = load float, float* %fi.1, align 4 ; <float> [#uses=2]
%10 = fsub float %9, %8 ; <float> [#uses=1]
%11 = fadd float %9, %8 ; <float> [#uses=1]
%12 = fsub float 0.000000e+00, %6 ; <float> [#uses=1]
%13 = fsub float 0.000000e+00, undef ; <float> [#uses=2]
%14 = fmul float undef, %0 ; <float> [#uses=1]
%15 = fadd float %14, undef ; <float> [#uses=2]
- %16 = load float* %scevgep81, align 4 ; <float> [#uses=2]
+ %16 = load float, float* %scevgep81, align 4 ; <float> [#uses=2]
%17 = fsub float %16, %15 ; <float> [#uses=1]
%18 = fadd float %16, %15 ; <float> [#uses=2]
- %19 = load float* undef, align 4 ; <float> [#uses=2]
+ %19 = load float, float* undef, align 4 ; <float> [#uses=2]
%20 = fsub float %19, %13 ; <float> [#uses=2]
%21 = fadd float %19, %13 ; <float> [#uses=1]
%22 = fmul float %s1.02, %18 ; <float> [#uses=1]
diff --git a/test/CodeGen/Thumb2/div.ll b/test/CodeGen/Thumb2/div.ll
index b273a8903265..80a842dc07fd 100644
--- a/test/CodeGen/Thumb2/div.ll
+++ b/test/CodeGen/Thumb2/div.ll
@@ -4,6 +4,10 @@
; RUN: | FileCheck %s -check-prefix=CHECK-THUMBV7M
; RUN: llc -mtriple=thumb-apple-darwin -mcpu=swift %s -o - \
; RUN: | FileCheck %s -check-prefix=CHECK-HWDIV
+; RUN: llc -mtriple=thumb-apple-darwin -mcpu=cortex-r4 %s -o - \
+; RUN: | FileCheck %s -check-prefix=CHECK-HWDIV
+; RUN: llc -mtriple=thumb-apple-darwin -mcpu=cortex-r4f %s -o - \
+; RUN: | FileCheck %s -check-prefix=CHECK-HWDIV
; RUN: llc -mtriple=thumb-apple-darwin -mcpu=cortex-r5 %s -o - \
; RUN: | FileCheck %s -check-prefix=CHECK-HWDIV
diff --git a/test/CodeGen/Thumb2/float-ops.ll b/test/CodeGen/Thumb2/float-ops.ll
index d383065cd53e..7ec08f866655 100644
--- a/test/CodeGen/Thumb2/float-ops.ll
+++ b/test/CodeGen/Thumb2/float-ops.ll
@@ -102,16 +102,16 @@ entry:
; CHECK-LABEL: load_f:
; NONE: ldr r0, [r0]
; HARD: vldr s0, [r0]
- %0 = load float* %a, align 4
+ %0 = load float, float* %a, align 4
ret float %0
}
define double @load_d(double* %a) {
entry:
; CHECK-LABEL: load_d:
-; NONE: ldm.w r0, {r0, r1}
+; NONE: ldm r0, {r0, r1}
; HARD: vldr d0, [r0]
- %0 = load double* %a, align 8
+ %0 = load double, double* %a, align 8
ret double %0
}
diff --git a/test/CodeGen/Thumb2/frameless2.ll b/test/CodeGen/Thumb2/frameless2.ll
index c5d32390266b..374335421a54 100644
--- a/test/CodeGen/Thumb2/frameless2.ll
+++ b/test/CodeGen/Thumb2/frameless2.ll
@@ -5,8 +5,8 @@
define void @vorbis_encode_noisebias_setup(i8* nocapture %vi.0.7.val, double %s, i32 %block, i32* nocapture %suppress, %struct.noise3* nocapture %in, %struct.noiseguard* nocapture %guard, double %userbias) nounwind {
entry:
- %0 = getelementptr %struct.noiseguard* %guard, i32 %block, i32 2; <i32*> [#uses=1]
- %1 = load i32* %0, align 4 ; <i32> [#uses=1]
+ %0 = getelementptr %struct.noiseguard, %struct.noiseguard* %guard, i32 %block, i32 2; <i32*> [#uses=1]
+ %1 = load i32, i32* %0, align 4 ; <i32> [#uses=1]
store i32 %1, i32* undef, align 4
unreachable
}
diff --git a/test/CodeGen/Thumb2/ifcvt-compare.ll b/test/CodeGen/Thumb2/ifcvt-compare.ll
new file mode 100644
index 000000000000..8af139a5ef6e
--- /dev/null
+++ b/test/CodeGen/Thumb2/ifcvt-compare.ll
@@ -0,0 +1,47 @@
+; RUN: llc -mtriple=thumbv7-unknown-linux %s -o - | FileCheck %s
+
+declare void @x()
+
+define void @f0(i32 %x) optsize {
+ ; CHECK-LABEL: f0:
+ ; CHECK: cbnz
+ %p = icmp eq i32 %x, 0
+ br i1 %p, label %t, label %f
+
+t:
+ call void @x()
+ br label %f
+
+f:
+ ret void
+}
+
+define void @f1(i32 %x) optsize {
+ ; CHECK-LABEL: f1:
+ ; CHECK: cmp r0, #1
+ ; CHECK: it eq
+ %p = icmp eq i32 %x, 1
+ br i1 %p, label %t, label %f
+
+t:
+ call void @x()
+ br label %f
+
+f:
+ ret void
+}
+
+define void @f2(i32 %x) {
+ ; CHECK-LABEL: f2:
+ ; CHECK: cmp r0, #0
+ ; CHECK: it eq
+ %p = icmp eq i32 %x, 0
+ br i1 %p, label %t, label %f
+
+t:
+ call void @x()
+ br label %f
+
+f:
+ ret void
+}
diff --git a/test/CodeGen/Thumb2/ifcvt-neon.ll b/test/CodeGen/Thumb2/ifcvt-neon.ll
index 501b0b6a007c..83c0b601aba9 100644
--- a/test/CodeGen/Thumb2/ifcvt-neon.ll
+++ b/test/CodeGen/Thumb2/ifcvt-neon.ll
@@ -7,14 +7,14 @@
define float @t(i32 %c) nounwind {
entry:
%0 = icmp sgt i32 %c, 1 ; <i1> [#uses=1]
- %1 = load float* @a, align 4 ; <float> [#uses=2]
- %2 = load float* @b, align 4 ; <float> [#uses=2]
+ %1 = load float, float* @a, align 4 ; <float> [#uses=2]
+ %2 = load float, float* @b, align 4 ; <float> [#uses=2]
br i1 %0, label %bb, label %bb1
bb: ; preds = %entry
-; CHECK: ite lt
-; CHECK: vsublt.f32
-; CHECK-NEXT: vaddge.f32
+; CHECK: vsub.f32
+; CHECK-NEXT: vadd.f32
+; CHECK: it gt
%3 = fadd float %1, %2 ; <float> [#uses=1]
br label %bb2
diff --git a/test/CodeGen/Thumb2/inflate-regs.ll b/test/CodeGen/Thumb2/inflate-regs.ll
index d8a558c97e27..4814db281bfe 100644
--- a/test/CodeGen/Thumb2/inflate-regs.ll
+++ b/test/CodeGen/Thumb2/inflate-regs.ll
@@ -14,7 +14,7 @@ target triple = "thumbv7-apple-ios"
; CHECK: vstr s
define void @local_split(float* nocapture %p) nounwind ssp {
entry:
- %x = load float* %p, align 4
+ %x = load float, float* %p, align 4
%a = fadd float %x, 1.0
tail call void asm sideeffect "", "~{d0},~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind
store float %a, float* %p, align 4
@@ -33,7 +33,7 @@ entry:
; CHECK: vstr s
define void @global_split(float* nocapture %p1, float* nocapture %p2) nounwind ssp {
entry:
- %0 = load float* %p1, align 4
+ %0 = load float, float* %p1, align 4
%add = fadd float %0, 1.000000e+00
tail call void asm sideeffect "", "~{d0},~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind
%cmp = fcmp ogt float %add, 0.000000e+00
diff --git a/test/CodeGen/Thumb2/large-call.ll b/test/CodeGen/Thumb2/large-call.ll
index 1b4d4625dd05..f6a5a60ba3c2 100644
--- a/test/CodeGen/Thumb2/large-call.ll
+++ b/test/CodeGen/Thumb2/large-call.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mcpu=cortex-a8 | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
target triple = "thumbv7-apple-ios0.0.0"
@@ -21,8 +21,8 @@ define i32 @main() ssp {
entry:
%d = alloca double, align 8
store double 1.000000e+00, double* %d, align 8
- %0 = load double* %d, align 8
- call void (i8*, i8*, i8*, ...)* @variadic(i8* null, i8* null, i8* null, i32 1, double 1.234800e+03, double 2.363450e+03, double %0, i32 1, double 1.234560e+03, double 2.345670e+03, double 4.6334563e+03, double 2.423440e+03, double 4.234330e+03, double 2.965430e+03, i32 1, double 4.669300e+03, double 2.927500e+03, double 4.663100e+03, double 2.921000e+03, double 4.663100e+03, double 2.345100e+03, i32 1, double 3.663100e+03, double 2.905100e+03, double 4.669300e+03, double 2.898600e+03, double 4.676900e+03, double 2.898600e+03, i32 1, double 4.684600e+03, double 2.898600e+03, double 1.234800e+03, double 2.905100e+03, double 1.234800e+03, double 2.345100e+03, i32 1, double 7.719700e+03, double 2.920500e+03, double 4.713500e+03, double 2.927000e+03, double 4.705800e+03, double 2.927000e+03, i32 1, double 8.698200e+03, double 2.927000e+03, double 4.692000e+03, double 2.920500e+03, double 4.692000e+03, double 2.912500e+03, i32 1, double 4.692000e+03, double 2.945600e+03, double 4.698200e+03, double 2.898100e+03, double 4.705800e+03, double 2.898100e+03, i32 1, double 4.713500e+03, double 2.898100e+03, double 4.719700e+03, double 2.945600e+03, double 4.719700e+03, double 2.912500e+03, i32 1, double 4.749200e+03, double 2.920100e+03, double 4.743000e+03, double 2.926600e+03, double 4.735300e+03, double 2.926600e+03, i32 1, double 4.727700e+03, double 2.926600e+03, double 4.721500e+03, double 2.920100e+03, double 4.721500e+03, double 2.912100e+03, i32 1, double 4.721500e+03, double 2.945100e+03, double 4.727700e+03, double 2.897700e+03, double 4.735300e+03, double 2.897700e+03, i32 1, double 4.743000e+03, double 2.897700e+03, double 4.749200e+03, double 2.945100e+03, double 4.749200e+03, double 2.912100e+03, i32 1, double 4.778200e+03, double 2.920100e+03, double 4.772000e+03, double 2.926600e+03, double 4.764300e+03, double 2.926600e+03, i32 1, double 4.756700e+03, double 2.926600e+03, double 4.750500e+03, double 2.920100e+03, double 4.750500e+03, double 2.912100e+03, i32 1, double 4.750500e+03, double 2.945100e+03, double 4.756700e+03, double 2.897700e+03, double 4.764300e+03, double 2.897700e+03, i32 1, double 4.772000e+03, double 2.897700e+03, double 4.778200e+03, double 2.945100e+03, double 4.778200e+03, double 2.912100e+03, i32 1, double 4.801900e+03, double 2.942100e+03, double 4.795700e+03, double 2.948500e+03, double 4.788100e+03, double 2.948500e+03, i32 1, double 4.780500e+03, double 2.948500e+03, double 4.774300e+03, double 2.942100e+03, double 4.774300e+03, double 2.934100e+03, i32 1, double 4.774300e+03, double 2.926100e+03, double 4.780500e+03, double 2.919600e+03, double 4.788100e+03, double 2.919600e+03, i32 1, double 4.795700e+03, double 2.919600e+03, double 4.801900e+03, double 2.926100e+03, double 4.801900e+03, double 2.934100e+03, i32 1, double 4.801500e+03, double 2.972500e+03, double 4.795300e+03, double 2.978900e+03, double 4.787700e+03, double 2.978900e+03, i32 1, double 4.780000e+03, double 2.978900e+03, double 4.773800e+03, double 2.972500e+03, double 4.773800e+03, double 2.964500e+03, i32 1, double 4.773800e+03, double 2.956500e+03, double 4.780000e+03, double 2.950000e+03, double 4.787700e+03, double 2.950000e+03, i32 1, double 4.795300e+03, double 2.950000e+03, double 4.801500e+03, double 2.956500e+03, double 4.801500e+03, double 2.964500e+03, i32 1, double 4.802400e+03, double 3.010200e+03, double 4.796200e+03, double 3.016600e+03, double 4.788500e+03, double 3.016600e+03, i32 1, double 4.780900e+03, double 3.016600e+03, double 4.774700e+03, double 3.010200e+03, double 4.774700e+03, double 3.002200e+03, i32 1, double 4.774700e+03, double 2.994200e+03, double 4.780900e+03, double 2.987700e+03, double 4.788500e+03, double 2.987700e+03, i32 1, double 4.796200e+03, double 2.987700e+03, double 4.802400e+03, double 2.994200e+03, double 4.802400e+03, double 3.002200e+03, i32 1, double 4.802400e+03, double 3.039400e+03, double 4.796200e+03, double 3.455800e+03, double 4.788500e+03, double 3.455800e+03, i32 1, double 4.780900e+03, double 3.455800e+03, double 4.774700e+03, double 3.039400e+03, double 4.774700e+03, double 3.031400e+03, i32 1, double 4.774700e+03, double 3.023400e+03, double 4.780900e+03, double 3.016900e+03, double 4.788500e+03, double 3.016900e+03, i32 1, double 4.796200e+03, double 3.016900e+03, double 4.802400e+03, double 3.023400e+03, double 4.802400e+03, double 3.031400e+03, i32 1, double 4.778600e+03, double 3.063100e+03, double 4.772400e+03, double 3.069600e+03, double 4.764700e+03, double 3.069600e+03, i32 1, double 4.757100e+03, double 3.069600e+03, double 4.750900e+03, double 3.063100e+03, double 4.750900e+03, double 3.055100e+03, i32 1, double 4.750900e+03, double 3.457100e+03, double 4.757100e+03, double 3.450700e+03, double 4.764700e+03, double 3.450700e+03, i32 1, double 4.772400e+03, double 3.450700e+03, double 4.778600e+03, double 3.457100e+03, double 4.778600e+03, double 3.055100e+03, i32 1, double 4.748600e+03, double 3.063600e+03, double 4.742400e+03, double 3.070000e+03, double 4.734700e+03, double 3.070000e+03, i32 1, double 4.727100e+03, double 3.070000e+03, double 4.720900e+03, double 3.063600e+03, double 4.720900e+03, double 3.055600e+03, i32 1, double 4.720900e+03, double 3.457600e+03, double 4.727100e+03, double 3.451100e+03, double 4.734700e+03, double 3.451100e+03, i32 1, double 4.742400e+03, double 3.451100e+03, double 4.748600e+03, double 3.457600e+03, double 4.748600e+03, double 3.055600e+03, i32 1, double 4.719500e+03, double 3.063600e+03, double 4.713300e+03, double 3.070000e+03, double 4.705700e+03, double 3.070000e+03, i32 1, double 4.698000e+03, double 3.070000e+03, double 4.691900e+03, double 3.063600e+03, double 4.691900e+03, double 3.055600e+03, i32 1, double 4.691900e+03, double 3.457600e+03, double 4.698000e+03, double 3.451100e+03, double 4.705700e+03, double 3.451100e+03, i32 1, double 4.713300e+03, double 3.451100e+03, double 4.719500e+03, double 3.457600e+03, double 4.719500e+03, double 3.055600e+03, i32 1, double 4.691300e+03, double 3.064000e+03, double 4.685100e+03, double 3.070500e+03, double 4.677500e+03, double 3.070500e+03, i32 1, double 4.669900e+03, double 3.070500e+03, double 4.663700e+03, double 3.064000e+03, double 4.663700e+03, double 3.056000e+03, i32 1, double 4.663700e+03, double 3.458000e+03, double 4.669900e+03, double 3.451600e+03, double 4.677500e+03, double 3.451600e+03, i32 1, double 4.685100e+03, double 3.451600e+03, double 4.691300e+03, double 3.458000e+03, double 4.691300e+03, double 3.056000e+03, i32 1, double 4.668500e+03, double 3.453000e+03, double 4.662300e+03, double 3.459400e+03, double 4.654700e+03, double 3.459400e+03, i32 1, double 4.647000e+03, double 3.459400e+03, double 4.640900e+03, double 3.453000e+03, double 4.640900e+03, double 3.035000e+03, i32 1, double 4.640900e+03, double 3.027000e+03, double 4.647000e+03, double 3.020500e+03, double 4.654700e+03, double 3.020500e+03, i32 1, double 4.662300e+03, double 3.020500e+03, double 4.668500e+03, double 3.027000e+03, double 4.668500e+03, double 3.035000e+03, i32 1, double 4.668500e+03, double 3.014300e+03, double 4.662300e+03, double 3.020800e+03, double 4.654700e+03, double 3.020800e+03, i32 1, double 4.647000e+03, double 3.020800e+03, double 4.640900e+03, double 3.014300e+03, double 4.640900e+03, double 3.006400e+03, i32 1, double 4.640900e+03, double 2.998400e+03, double 4.647000e+03, double 2.991900e+03, double 4.654700e+03, double 2.991900e+03, i32 1, double 4.662300e+03, double 2.991900e+03, double 4.668500e+03, double 2.998400e+03, double 4.668500e+03, double 3.006400e+03, i32 1, double 4.668100e+03, double 2.941100e+03, double 4.661900e+03, double 2.947600e+03, double 4.654200e+03, double 2.947600e+03, i32 1, double 4.646600e+03, double 2.947600e+03, double 4.640400e+03, double 2.941100e+03, double 4.640400e+03, double 2.933100e+03, i32 1, double 4.640400e+03, double 2.925200e+03, double 4.646600e+03, double 2.918700e+03, double 4.654200e+03, double 2.918700e+03, i32 1, double 4.661900e+03, double 2.918700e+03, double 4.668100e+03, double 2.925200e+03, double 4.668100e+03, double 2.933100e+03, i32 1, double 4.668500e+03, double 2.971600e+03, double 4.662300e+03, double 2.978100e+03, double 4.654700e+03, double 2.978100e+03, i32 1, double 4.647000e+03, double 2.978100e+03, double 4.640900e+03, double 2.971600e+03, double 4.640900e+03, double 2.963600e+03, i32 1, double 4.640900e+03, double 2.955700e+03, double 4.647000e+03, double 2.949200e+03, double 4.654700e+03, double 2.949200e+03, i32 1, double 4.662300e+03, double 2.949200e+03, double 4.668500e+03, double 2.955700e+03, double 4.668500e+03, double 2.963600e+03, i32 2, i32 1, double 4.691300e+03, double 3.056000e+03, i32 2, i32 1, double 4.748600e+03, double 3.055600e+03, i32 2, i32 1, double 4.778200e+03, double 2.912100e+03, i32 2, i32 1, double 4.749200e+03, double 2.912100e+03, i32 2, i32 1, double 4.802400e+03, double 3.031400e+03, i32 2, i32 1, double 4.778600e+03, double 3.055100e+03, i32 2, i32 1, double 4.801500e+03, double 2.964500e+03, i32 2, i32 1, double 4.802400e+03, double 3.002200e+03, i32 2, i32 1, double 4.719700e+03, double 2.912500e+03, i32 2, i32 1, double 4.801900e+03, double 2.934100e+03, i32 2, i32 1, double 4.719500e+03, double 3.055600e+03, i32 2, i32 1, double 4.668500e+03, double 3.006400e+03, i32 2, i32 1, double 4.668500e+03, double 3.035000e+03, i32 2, i32 1, double 4.668100e+03, double 2.933100e+03, i32 2, i32 1, double 4.668500e+03, double 2.963600e+03, i32 2, i32 48)
+ %0 = load double, double* %d, align 8
+ call void (i8*, i8*, i8*, ...) @variadic(i8* null, i8* null, i8* null, i32 1, double 1.234800e+03, double 2.363450e+03, double %0, i32 1, double 1.234560e+03, double 2.345670e+03, double 4.6334563e+03, double 2.423440e+03, double 4.234330e+03, double 2.965430e+03, i32 1, double 4.669300e+03, double 2.927500e+03, double 4.663100e+03, double 2.921000e+03, double 4.663100e+03, double 2.345100e+03, i32 1, double 3.663100e+03, double 2.905100e+03, double 4.669300e+03, double 2.898600e+03, double 4.676900e+03, double 2.898600e+03, i32 1, double 4.684600e+03, double 2.898600e+03, double 1.234800e+03, double 2.905100e+03, double 1.234800e+03, double 2.345100e+03, i32 1, double 7.719700e+03, double 2.920500e+03, double 4.713500e+03, double 2.927000e+03, double 4.705800e+03, double 2.927000e+03, i32 1, double 8.698200e+03, double 2.927000e+03, double 4.692000e+03, double 2.920500e+03, double 4.692000e+03, double 2.912500e+03, i32 1, double 4.692000e+03, double 2.945600e+03, double 4.698200e+03, double 2.898100e+03, double 4.705800e+03, double 2.898100e+03, i32 1, double 4.713500e+03, double 2.898100e+03, double 4.719700e+03, double 2.945600e+03, double 4.719700e+03, double 2.912500e+03, i32 1, double 4.749200e+03, double 2.920100e+03, double 4.743000e+03, double 2.926600e+03, double 4.735300e+03, double 2.926600e+03, i32 1, double 4.727700e+03, double 2.926600e+03, double 4.721500e+03, double 2.920100e+03, double 4.721500e+03, double 2.912100e+03, i32 1, double 4.721500e+03, double 2.945100e+03, double 4.727700e+03, double 2.897700e+03, double 4.735300e+03, double 2.897700e+03, i32 1, double 4.743000e+03, double 2.897700e+03, double 4.749200e+03, double 2.945100e+03, double 4.749200e+03, double 2.912100e+03, i32 1, double 4.778200e+03, double 2.920100e+03, double 4.772000e+03, double 2.926600e+03, double 4.764300e+03, double 2.926600e+03, i32 1, double 4.756700e+03, double 2.926600e+03, double 4.750500e+03, double 2.920100e+03, double 4.750500e+03, double 2.912100e+03, i32 1, double 4.750500e+03, double 2.945100e+03, double 4.756700e+03, double 2.897700e+03, double 4.764300e+03, double 2.897700e+03, i32 1, double 4.772000e+03, double 2.897700e+03, double 4.778200e+03, double 2.945100e+03, double 4.778200e+03, double 2.912100e+03, i32 1, double 4.801900e+03, double 2.942100e+03, double 4.795700e+03, double 2.948500e+03, double 4.788100e+03, double 2.948500e+03, i32 1, double 4.780500e+03, double 2.948500e+03, double 4.774300e+03, double 2.942100e+03, double 4.774300e+03, double 2.934100e+03, i32 1, double 4.774300e+03, double 2.926100e+03, double 4.780500e+03, double 2.919600e+03, double 4.788100e+03, double 2.919600e+03, i32 1, double 4.795700e+03, double 2.919600e+03, double 4.801900e+03, double 2.926100e+03, double 4.801900e+03, double 2.934100e+03, i32 1, double 4.801500e+03, double 2.972500e+03, double 4.795300e+03, double 2.978900e+03, double 4.787700e+03, double 2.978900e+03, i32 1, double 4.780000e+03, double 2.978900e+03, double 4.773800e+03, double 2.972500e+03, double 4.773800e+03, double 2.964500e+03, i32 1, double 4.773800e+03, double 2.956500e+03, double 4.780000e+03, double 2.950000e+03, double 4.787700e+03, double 2.950000e+03, i32 1, double 4.795300e+03, double 2.950000e+03, double 4.801500e+03, double 2.956500e+03, double 4.801500e+03, double 2.964500e+03, i32 1, double 4.802400e+03, double 3.010200e+03, double 4.796200e+03, double 3.016600e+03, double 4.788500e+03, double 3.016600e+03, i32 1, double 4.780900e+03, double 3.016600e+03, double 4.774700e+03, double 3.010200e+03, double 4.774700e+03, double 3.002200e+03, i32 1, double 4.774700e+03, double 2.994200e+03, double 4.780900e+03, double 2.987700e+03, double 4.788500e+03, double 2.987700e+03, i32 1, double 4.796200e+03, double 2.987700e+03, double 4.802400e+03, double 2.994200e+03, double 4.802400e+03, double 3.002200e+03, i32 1, double 4.802400e+03, double 3.039400e+03, double 4.796200e+03, double 3.455800e+03, double 4.788500e+03, double 3.455800e+03, i32 1, double 4.780900e+03, double 3.455800e+03, double 4.774700e+03, double 3.039400e+03, double 4.774700e+03, double 3.031400e+03, i32 1, double 4.774700e+03, double 3.023400e+03, double 4.780900e+03, double 3.016900e+03, double 4.788500e+03, double 3.016900e+03, i32 1, double 4.796200e+03, double 3.016900e+03, double 4.802400e+03, double 3.023400e+03, double 4.802400e+03, double 3.031400e+03, i32 1, double 4.778600e+03, double 3.063100e+03, double 4.772400e+03, double 3.069600e+03, double 4.764700e+03, double 3.069600e+03, i32 1, double 4.757100e+03, double 3.069600e+03, double 4.750900e+03, double 3.063100e+03, double 4.750900e+03, double 3.055100e+03, i32 1, double 4.750900e+03, double 3.457100e+03, double 4.757100e+03, double 3.450700e+03, double 4.764700e+03, double 3.450700e+03, i32 1, double 4.772400e+03, double 3.450700e+03, double 4.778600e+03, double 3.457100e+03, double 4.778600e+03, double 3.055100e+03, i32 1, double 4.748600e+03, double 3.063600e+03, double 4.742400e+03, double 3.070000e+03, double 4.734700e+03, double 3.070000e+03, i32 1, double 4.727100e+03, double 3.070000e+03, double 4.720900e+03, double 3.063600e+03, double 4.720900e+03, double 3.055600e+03, i32 1, double 4.720900e+03, double 3.457600e+03, double 4.727100e+03, double 3.451100e+03, double 4.734700e+03, double 3.451100e+03, i32 1, double 4.742400e+03, double 3.451100e+03, double 4.748600e+03, double 3.457600e+03, double 4.748600e+03, double 3.055600e+03, i32 1, double 4.719500e+03, double 3.063600e+03, double 4.713300e+03, double 3.070000e+03, double 4.705700e+03, double 3.070000e+03, i32 1, double 4.698000e+03, double 3.070000e+03, double 4.691900e+03, double 3.063600e+03, double 4.691900e+03, double 3.055600e+03, i32 1, double 4.691900e+03, double 3.457600e+03, double 4.698000e+03, double 3.451100e+03, double 4.705700e+03, double 3.451100e+03, i32 1, double 4.713300e+03, double 3.451100e+03, double 4.719500e+03, double 3.457600e+03, double 4.719500e+03, double 3.055600e+03, i32 1, double 4.691300e+03, double 3.064000e+03, double 4.685100e+03, double 3.070500e+03, double 4.677500e+03, double 3.070500e+03, i32 1, double 4.669900e+03, double 3.070500e+03, double 4.663700e+03, double 3.064000e+03, double 4.663700e+03, double 3.056000e+03, i32 1, double 4.663700e+03, double 3.458000e+03, double 4.669900e+03, double 3.451600e+03, double 4.677500e+03, double 3.451600e+03, i32 1, double 4.685100e+03, double 3.451600e+03, double 4.691300e+03, double 3.458000e+03, double 4.691300e+03, double 3.056000e+03, i32 1, double 4.668500e+03, double 3.453000e+03, double 4.662300e+03, double 3.459400e+03, double 4.654700e+03, double 3.459400e+03, i32 1, double 4.647000e+03, double 3.459400e+03, double 4.640900e+03, double 3.453000e+03, double 4.640900e+03, double 3.035000e+03, i32 1, double 4.640900e+03, double 3.027000e+03, double 4.647000e+03, double 3.020500e+03, double 4.654700e+03, double 3.020500e+03, i32 1, double 4.662300e+03, double 3.020500e+03, double 4.668500e+03, double 3.027000e+03, double 4.668500e+03, double 3.035000e+03, i32 1, double 4.668500e+03, double 3.014300e+03, double 4.662300e+03, double 3.020800e+03, double 4.654700e+03, double 3.020800e+03, i32 1, double 4.647000e+03, double 3.020800e+03, double 4.640900e+03, double 3.014300e+03, double 4.640900e+03, double 3.006400e+03, i32 1, double 4.640900e+03, double 2.998400e+03, double 4.647000e+03, double 2.991900e+03, double 4.654700e+03, double 2.991900e+03, i32 1, double 4.662300e+03, double 2.991900e+03, double 4.668500e+03, double 2.998400e+03, double 4.668500e+03, double 3.006400e+03, i32 1, double 4.668100e+03, double 2.941100e+03, double 4.661900e+03, double 2.947600e+03, double 4.654200e+03, double 2.947600e+03, i32 1, double 4.646600e+03, double 2.947600e+03, double 4.640400e+03, double 2.941100e+03, double 4.640400e+03, double 2.933100e+03, i32 1, double 4.640400e+03, double 2.925200e+03, double 4.646600e+03, double 2.918700e+03, double 4.654200e+03, double 2.918700e+03, i32 1, double 4.661900e+03, double 2.918700e+03, double 4.668100e+03, double 2.925200e+03, double 4.668100e+03, double 2.933100e+03, i32 1, double 4.668500e+03, double 2.971600e+03, double 4.662300e+03, double 2.978100e+03, double 4.654700e+03, double 2.978100e+03, i32 1, double 4.647000e+03, double 2.978100e+03, double 4.640900e+03, double 2.971600e+03, double 4.640900e+03, double 2.963600e+03, i32 1, double 4.640900e+03, double 2.955700e+03, double 4.647000e+03, double 2.949200e+03, double 4.654700e+03, double 2.949200e+03, i32 1, double 4.662300e+03, double 2.949200e+03, double 4.668500e+03, double 2.955700e+03, double 4.668500e+03, double 2.963600e+03, i32 2, i32 1, double 4.691300e+03, double 3.056000e+03, i32 2, i32 1, double 4.748600e+03, double 3.055600e+03, i32 2, i32 1, double 4.778200e+03, double 2.912100e+03, i32 2, i32 1, double 4.749200e+03, double 2.912100e+03, i32 2, i32 1, double 4.802400e+03, double 3.031400e+03, i32 2, i32 1, double 4.778600e+03, double 3.055100e+03, i32 2, i32 1, double 4.801500e+03, double 2.964500e+03, i32 2, i32 1, double 4.802400e+03, double 3.002200e+03, i32 2, i32 1, double 4.719700e+03, double 2.912500e+03, i32 2, i32 1, double 4.801900e+03, double 2.934100e+03, i32 2, i32 1, double 4.719500e+03, double 3.055600e+03, i32 2, i32 1, double 4.668500e+03, double 3.006400e+03, i32 2, i32 1, double 4.668500e+03, double 3.035000e+03, i32 2, i32 1, double 4.668100e+03, double 2.933100e+03, i32 2, i32 1, double 4.668500e+03, double 2.963600e+03, i32 2, i32 48)
ret i32 0
}
diff --git a/test/CodeGen/Thumb2/large-stack.ll b/test/CodeGen/Thumb2/large-stack.ll
index 8d79da7982b1..4fe49825fa32 100644
--- a/test/CodeGen/Thumb2/large-stack.ll
+++ b/test/CodeGen/Thumb2/large-stack.ll
@@ -29,13 +29,13 @@ define i32 @test3() {
; DARWIN: sub.w sp, sp, #805306368
; DARWIN: sub sp, #20
; LINUX-LABEL: test3:
-; LINUX: push.w {r4, r7, r11, lr}
+; LINUX: push {r4, r6, r7, lr}
; LINUX: sub.w sp, sp, #805306368
; LINUX: sub sp, #16
%retval = alloca i32, align 4
%tmp = alloca i32, align 4
%a = alloca [805306369 x i8], align 16
store i32 0, i32* %tmp
- %tmp1 = load i32* %tmp
+ %tmp1 = load i32, i32* %tmp
ret i32 %tmp1
}
diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll
index 7ce6768a2187..ccf7faedac6e 100644
--- a/test/CodeGen/Thumb2/lsr-deficiency.ll
+++ b/test/CodeGen/Thumb2/lsr-deficiency.ll
@@ -10,7 +10,7 @@ define void @t() nounwind optsize {
; CHECK-LABEL: t:
; CHECK: mov{{.*}}, #1000
entry:
- %.pre = load i32* @G, align 4 ; <i32> [#uses=1]
+ %.pre = load i32, i32* @G, align 4 ; <i32> [#uses=1]
br label %bb
bb: ; preds = %bb, %entry
@@ -22,9 +22,9 @@ bb: ; preds = %bb, %entry
%0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1]
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
%tmp5 = sub i32 1000, %indvar ; <i32> [#uses=1]
- %1 = load i32** @array, align 4 ; <i32*> [#uses=1]
- %scevgep = getelementptr i32* %1, i32 %tmp5 ; <i32*> [#uses=1]
- %2 = load i32* %scevgep, align 4 ; <i32> [#uses=1]
+ %1 = load i32*, i32** @array, align 4 ; <i32*> [#uses=1]
+ %scevgep = getelementptr i32, i32* %1, i32 %tmp5 ; <i32*> [#uses=1]
+ %2 = load i32, i32* %scevgep, align 4 ; <i32> [#uses=1]
%3 = add nsw i32 %2, %0 ; <i32> [#uses=2]
store i32 %3, i32* @G, align 4
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index d9da846294c4..2b1caa393072 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -29,14 +29,14 @@ bb.nph: ; preds = %entry
; PIC: LBB0_
; PIC-NOT: LCPI0_0:
; PIC: .section
- %.pre = load i32* @GV, align 4 ; <i32> [#uses=1]
+ %.pre = load i32, i32* @GV, align 4 ; <i32> [#uses=1]
br label %bb
bb: ; preds = %bb, %bb.nph
%1 = phi i32 [ %.pre, %bb.nph ], [ %3, %bb ] ; <i32> [#uses=1]
%i.03 = phi i32 [ 0, %bb.nph ], [ %4, %bb ] ; <i32> [#uses=2]
- %scevgep = getelementptr i32* %vals, i32 %i.03 ; <i32*> [#uses=1]
- %2 = load i32* %scevgep, align 4 ; <i32> [#uses=1]
+ %scevgep = getelementptr i32, i32* %vals, i32 %i.03 ; <i32*> [#uses=1]
+ %2 = load i32, i32* %scevgep, align 4 ; <i32> [#uses=1]
%3 = add nsw i32 %1, %2 ; <i32> [#uses=2]
store i32 %3, i32* @GV, align 4
%4 = add i32 %i.03, 1 ; <i32> [#uses=2]
@@ -58,10 +58,10 @@ bb1:
; CHECK: %bb1
%indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
%tmp1 = shl i32 %indvar, 2
- %gep1 = getelementptr i8* %ptr1, i32 %tmp1
+ %gep1 = getelementptr i8, i8* %ptr1, i32 %tmp1
%tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1)
%tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2)
- %gep2 = getelementptr i8* %ptr2, i32 %tmp1
+ %gep2 = getelementptr i8, i8* %ptr2, i32 %tmp1
call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1)
%indvar.next = add i32 %indvar, 1
%cond = icmp eq i32 %indvar.next, 10
diff --git a/test/CodeGen/Thumb2/pic-load.ll b/test/CodeGen/Thumb2/pic-load.ll
index b22fd1dc72e1..53d456c53452 100644
--- a/test/CodeGen/Thumb2/pic-load.ll
+++ b/test/CodeGen/Thumb2/pic-load.ll
@@ -10,9 +10,9 @@ entry:
; CHECK-LABEL: atexit:
; CHECK: add r0, pc
%r = alloca %struct.one_atexit_routine, align 4 ; <%struct.one_atexit_routine*> [#uses=3]
- %0 = getelementptr %struct.one_atexit_routine* %r, i32 0, i32 0, i32 0 ; <void ()**> [#uses=1]
+ %0 = getelementptr %struct.one_atexit_routine, %struct.one_atexit_routine* %r, i32 0, i32 0, i32 0 ; <void ()**> [#uses=1]
store void ()* %func, void ()** %0, align 4
- %1 = getelementptr %struct.one_atexit_routine* %r, i32 0, i32 1 ; <i32*> [#uses=1]
+ %1 = getelementptr %struct.one_atexit_routine, %struct.one_atexit_routine* %r, i32 0, i32 1 ; <i32*> [#uses=1]
store i32 0, i32* %1, align 4
%2 = call i32 @atexit_common(%struct.one_atexit_routine* %r, i8* bitcast ({ }* @__dso_handle to i8*)) nounwind ; <i32> [#uses=1]
ret i32 %2
diff --git a/test/CodeGen/Thumb2/stack_guard_remat.ll b/test/CodeGen/Thumb2/stack_guard_remat.ll
index c8ea8714d317..cf34e8c0c2fb 100644
--- a/test/CodeGen/Thumb2/stack_guard_remat.ll
+++ b/test/CodeGen/Thumb2/stack_guard_remat.ll
@@ -25,7 +25,7 @@ define i32 @test_stack_guard_remat() #0 {
%a1 = alloca [256 x i32], align 4
%1 = bitcast [256 x i32]* %a1 to i8*
call void @llvm.lifetime.start(i64 1024, i8* %1)
- %2 = getelementptr inbounds [256 x i32]* %a1, i32 0, i32 0
+ %2 = getelementptr inbounds [256 x i32], [256 x i32]* %a1, i32 0, i32 0
call void @foo3(i32* %2) #3
call void asm sideeffect "foo2", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{sp},~{lr}"()
call void @llvm.lifetime.end(i64 1024, i8* %1)
diff --git a/test/CodeGen/Thumb2/tail-call-r9.ll b/test/CodeGen/Thumb2/tail-call-r9.ll
index 673aa7c12ebc..33cbd3d37c99 100644
--- a/test/CodeGen/Thumb2/tail-call-r9.ll
+++ b/test/CodeGen/Thumb2/tail-call-r9.ll
@@ -7,7 +7,7 @@
define arm_aapcscc void @test(i32 %a) nounwind {
; CHECK-LABEL: test:
; CHECK-NOT: bx r9
- %tmp = load void ()** @foo, align 4
+ %tmp = load void ()*, void ()** @foo, align 4
tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r12}"() nounwind
tail call arm_aapcscc void %tmp() nounwind
ret void
diff --git a/test/CodeGen/Thumb2/thumb2-call-tc.ll b/test/CodeGen/Thumb2/thumb2-call-tc.ll
index 2902949d9768..96f63ba9ac0b 100644
--- a/test/CodeGen/Thumb2/thumb2-call-tc.ll
+++ b/test/CodeGen/Thumb2/thumb2-call-tc.ll
@@ -22,7 +22,7 @@ define void @h() {
; LINUX-LABEL: h:
; LINUX: bx r0 @ TAILCALL
- %tmp = load i32 ()** @t ; <i32 ()*> [#uses=1]
+ %tmp = load i32 ()*, i32 ()** @t ; <i32 ()*> [#uses=1]
%tmp.upgrd.2 = tail call i32 %tmp( ) ; <i32> [#uses=0]
ret void
}
diff --git a/test/CodeGen/Thumb2/thumb2-call.ll b/test/CodeGen/Thumb2/thumb2-call.ll
index 1d2eaa77c7fe..62b47a44b494 100644
--- a/test/CodeGen/Thumb2/thumb2-call.ll
+++ b/test/CodeGen/Thumb2/thumb2-call.ll
@@ -21,7 +21,7 @@ define void @h() {
; LINUX-LABEL: h:
; LINUX: blx r0
- %tmp = load i32 ()** @t ; <i32 ()*> [#uses=1]
+ %tmp = load i32 ()*, i32 ()** @t ; <i32 ()*> [#uses=1]
%tmp.upgrd.2 = call i32 %tmp( ) ; <i32> [#uses=0]
ret void
}
diff --git a/test/CodeGen/Thumb2/thumb2-cbnz.ll b/test/CodeGen/Thumb2/thumb2-cbnz.ll
index f0f79168c904..8104dc714da0 100644
--- a/test/CodeGen/Thumb2/thumb2-cbnz.ll
+++ b/test/CodeGen/Thumb2/thumb2-cbnz.ll
@@ -29,7 +29,7 @@ bb9: ; preds = %bb7
br label %bb11
bb11: ; preds = %bb9, %bb7
- %1 = getelementptr i32* undef, i32 0
+ %1 = getelementptr i32, i32* undef, i32 0
store i32 0, i32* %1
ret void
}
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
index d86a897a4a09..ebc12dc3c1d5 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
@@ -63,7 +63,7 @@ bb17: ; preds = %cond_false, %cond_true, %entry
define void @foo(i32 %a) nounwind {
entry:
- %tmp = load i32** @x ; <i32*> [#uses=1]
+ %tmp = load i32*, i32** @x ; <i32*> [#uses=1]
store i32 %a, i32* %tmp
ret void
}
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
index 13a1ca2e26cd..da1057b8bb4a 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
@@ -65,7 +65,7 @@ bb17: ; preds = %cond_false, %cond_true, %entry
define void @foo(i32 %a) nounwind {
entry:
- %tmp = load i32** @x ; <i32*> [#uses=1]
+ %tmp = load i32*, i32** @x ; <i32*> [#uses=1]
store i32 %a, i32* %tmp
ret void
}
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
index a861912fe113..1d2ba0008be8 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
@@ -15,7 +15,7 @@ entry:
br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock
cond_true: ; preds = %entry
- %tmp10 = call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp10 = call i32 (...) @bar( ) ; <i32> [#uses=0]
ret void
UnifiedReturnBlock: ; preds = %entry
@@ -41,9 +41,9 @@ entry:
br label %tailrecurse
tailrecurse: ; preds = %bb, %entry
- %tmp6 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
- %tmp9 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=2]
- %tmp12 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
+ %tmp6 = load %struct.quad_struct*, %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
+ %tmp9 = load %struct.quad_struct*, %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=2]
+ %tmp12 = load %struct.quad_struct*, %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
%tmp14 = icmp eq %struct.quad_struct* null, null ; <i1> [#uses=1]
%tmp17 = icmp eq %struct.quad_struct* %tmp6, null ; <i1> [#uses=1]
%tmp23 = icmp eq %struct.quad_struct* %tmp9, null ; <i1> [#uses=1]
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
index 79667d43b95e..24eb1a98a066 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
@@ -25,7 +25,7 @@ bb52: ; preds = %newFuncRoot
; CHECK: movne
; CHECK: moveq
; CHECK: pop
- %0 = load i64* @posed, align 4 ; <i64> [#uses=3]
+ %0 = load i64, i64* @posed, align 4 ; <i64> [#uses=3]
%1 = sub i64 %0, %.reload78 ; <i64> [#uses=1]
%2 = ashr i64 %1, 1 ; <i64> [#uses=3]
%3 = icmp eq i64 %2, 0 ; <i1> [#uses=1]
diff --git a/test/CodeGen/Thumb2/thumb2-ldm.ll b/test/CodeGen/Thumb2/thumb2-ldm.ll
index adfcf2b6aaf1..28903aca3267 100644
--- a/test/CodeGen/Thumb2/thumb2-ldm.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldm.ll
@@ -7,8 +7,8 @@ define i32 @t1() {
; CHECK: push {r7, lr}
; CHECK: ldrd
; CHECK: pop {r7, pc}
- %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
- %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
+ %tmp = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 0) ; <i32> [#uses=1]
+ %tmp3 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
%tmp4 = call i32 @f1( i32 %tmp, i32 %tmp3 ) ; <i32> [#uses=1]
ret i32 %tmp4
}
@@ -18,9 +18,9 @@ define i32 @t2() {
; CHECK: push {r7, lr}
; CHECK: ldm
; CHECK: pop {r7, pc}
- %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
- %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
- %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4) ; <i32> [#uses=1]
+ %tmp = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
+ %tmp3 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
+ %tmp5 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 4) ; <i32> [#uses=1]
%tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
ret i32 %tmp6
}
@@ -30,9 +30,9 @@ define i32 @t3() {
; CHECK: push {r7, lr}
; CHECK: ldm
; CHECK: pop {r7, pc}
- %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
- %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
- %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
+ %tmp = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 1) ; <i32> [#uses=1]
+ %tmp3 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 2) ; <i32> [#uses=1]
+ %tmp5 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @X, i32 0, i32 3) ; <i32> [#uses=1]
%tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; <i32> [#uses=1]
ret i32 %tmp6
}
diff --git a/test/CodeGen/Thumb2/thumb2-ldr.ll b/test/CodeGen/Thumb2/thumb2-ldr.ll
index c25ed789de04..4b3ce86ef8d1 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr.ll
@@ -4,7 +4,7 @@ define i32 @f1(i32* %v) {
entry:
; CHECK-LABEL: f1:
; CHECK: ldr r0, [r0]
- %tmp = load i32* %v
+ %tmp = load i32, i32* %v
ret i32 %tmp
}
@@ -12,8 +12,8 @@ define i32 @f2(i32* %v) {
entry:
; CHECK-LABEL: f2:
; CHECK: ldr.w r0, [r0, #4092]
- %tmp2 = getelementptr i32* %v, i32 1023
- %tmp = load i32* %tmp2
+ %tmp2 = getelementptr i32, i32* %v, i32 1023
+ %tmp = load i32, i32* %tmp2
ret i32 %tmp
}
@@ -22,8 +22,8 @@ entry:
; CHECK-LABEL: f3:
; CHECK: mov.w r1, #4096
; CHECK: ldr r0, [r0, r1]
- %tmp2 = getelementptr i32* %v, i32 1024
- %tmp = load i32* %tmp2
+ %tmp2 = getelementptr i32, i32* %v, i32 1024
+ %tmp = load i32, i32* %tmp2
ret i32 %tmp
}
@@ -33,7 +33,7 @@ entry:
; CHECK: ldr r0, [r0, #-128]
%tmp1 = sub i32 %base, 128
%tmp2 = inttoptr i32 %tmp1 to i32*
- %tmp3 = load i32* %tmp2
+ %tmp3 = load i32, i32* %tmp2
ret i32 %tmp3
}
@@ -43,7 +43,7 @@ entry:
; CHECK: ldr r0, [r0, r1]
%tmp1 = add i32 %base, %offset
%tmp2 = inttoptr i32 %tmp1 to i32*
- %tmp3 = load i32* %tmp2
+ %tmp3 = load i32, i32* %tmp2
ret i32 %tmp3
}
@@ -54,7 +54,7 @@ entry:
%tmp1 = shl i32 %offset, 2
%tmp2 = add i32 %base, %tmp1
%tmp3 = inttoptr i32 %tmp2 to i32*
- %tmp4 = load i32* %tmp3
+ %tmp4 = load i32, i32* %tmp3
ret i32 %tmp4
}
@@ -67,6 +67,6 @@ entry:
%tmp1 = lshr i32 %offset, 2
%tmp2 = add i32 %base, %tmp1
%tmp3 = inttoptr i32 %tmp2 to i32*
- %tmp4 = load i32* %tmp3
+ %tmp4 = load i32, i32* %tmp3
ret i32 %tmp4
}
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_ext.ll b/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
index b50b33320597..a911775ebc7b 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
@@ -1,25 +1,25 @@
; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
define i32 @test1(i8* %v.pntr.s0.u1) {
- %tmp.u = load i8* %v.pntr.s0.u1
+ %tmp.u = load i8, i8* %v.pntr.s0.u1
%tmp1.s = zext i8 %tmp.u to i32
ret i32 %tmp1.s
}
define i32 @test2(i16* %v.pntr.s0.u1) {
- %tmp.u = load i16* %v.pntr.s0.u1
+ %tmp.u = load i16, i16* %v.pntr.s0.u1
%tmp1.s = zext i16 %tmp.u to i32
ret i32 %tmp1.s
}
define i32 @test3(i8* %v.pntr.s1.u0) {
- %tmp.s = load i8* %v.pntr.s1.u0
+ %tmp.s = load i8, i8* %v.pntr.s1.u0
%tmp1.s = sext i8 %tmp.s to i32
ret i32 %tmp1.s
}
define i32 @test4() {
- %tmp.s = load i16* null
+ %tmp.s = load i16, i16* null
%tmp1.s = sext i16 %tmp.s to i32
ret i32 %tmp1.s
}
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_post.ll b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
index c26e6b154e55..cb7e795a0450 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_post.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_post.ll
@@ -3,7 +3,7 @@
define i32 @test(i32 %a, i32 %b, i32 %c) {
%tmp1 = mul i32 %a, %b ; <i32> [#uses=2]
%tmp2 = inttoptr i32 %tmp1 to i32* ; <i32*> [#uses=1]
- %tmp3 = load i32* %tmp2 ; <i32> [#uses=1]
+ %tmp3 = load i32, i32* %tmp2 ; <i32> [#uses=1]
%tmp4 = sub i32 %tmp1, 8 ; <i32> [#uses=1]
%tmp5 = mul i32 %tmp4, %tmp3 ; <i32> [#uses=1]
ret i32 %tmp5
diff --git a/test/CodeGen/Thumb2/thumb2-ldr_pre.ll b/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
index cafb02a4984f..2bb327c8f864 100644
--- a/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
@@ -1,8 +1,8 @@
; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
define i32* @test1(i32* %X, i32* %dest) {
- %Y = getelementptr i32* %X, i32 4 ; <i32*> [#uses=2]
- %A = load i32* %Y ; <i32> [#uses=1]
+ %Y = getelementptr i32, i32* %X, i32 4 ; <i32*> [#uses=2]
+ %A = load i32, i32* %Y ; <i32> [#uses=1]
store i32 %A, i32* %dest
ret i32* %Y
}
@@ -12,7 +12,7 @@ define i32* @test1(i32* %X, i32* %dest) {
define i32 @test2(i32 %a, i32 %b) {
%tmp1 = sub i32 %a, 64 ; <i32> [#uses=2]
%tmp2 = inttoptr i32 %tmp1 to i32* ; <i32*> [#uses=1]
- %tmp3 = load i32* %tmp2 ; <i32> [#uses=1]
+ %tmp3 = load i32, i32* %tmp2 ; <i32> [#uses=1]
%tmp4 = sub i32 %tmp1, %b ; <i32> [#uses=1]
%tmp5 = add i32 %tmp4, %tmp3 ; <i32> [#uses=1]
ret i32 %tmp5
@@ -21,8 +21,8 @@ define i32 @test2(i32 %a, i32 %b) {
; CHECK: ldr{{.*}}!
define i8* @test3(i8* %X, i32* %dest) {
- %tmp1 = getelementptr i8* %X, i32 4
- %tmp2 = load i8* %tmp1
+ %tmp1 = getelementptr i8, i8* %X, i32 4
+ %tmp2 = load i8, i8* %tmp1
%tmp3 = sext i8 %tmp2 to i32
store i32 %tmp3, i32* %dest
ret i8* %tmp1
diff --git a/test/CodeGen/Thumb2/thumb2-ldrb.ll b/test/CodeGen/Thumb2/thumb2-ldrb.ll
index 0b3441eb1e22..cf8fd6dca6df 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrb.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrb.ll
@@ -4,7 +4,7 @@ define i8 @f1(i8* %v) {
entry:
; CHECK-LABEL: f1:
; CHECK: ldrb r0, [r0]
- %tmp = load i8* %v
+ %tmp = load i8, i8* %v
ret i8 %tmp
}
@@ -12,8 +12,8 @@ define i8 @f2(i8* %v) {
entry:
; CHECK-LABEL: f2:
; CHECK: ldrb r0, [r0, #-1]
- %tmp2 = getelementptr i8* %v, i8 1023
- %tmp = load i8* %tmp2
+ %tmp2 = getelementptr i8, i8* %v, i8 1023
+ %tmp = load i8, i8* %tmp2
ret i8 %tmp
}
@@ -24,7 +24,7 @@ entry:
; CHECK: ldrb r0, [r0, r1]
%tmp1 = add i32 %base, 4096
%tmp2 = inttoptr i32 %tmp1 to i8*
- %tmp3 = load i8* %tmp2
+ %tmp3 = load i8, i8* %tmp2
ret i8 %tmp3
}
@@ -34,7 +34,7 @@ entry:
; CHECK: ldrb r0, [r0, #-128]
%tmp1 = sub i32 %base, 128
%tmp2 = inttoptr i32 %tmp1 to i8*
- %tmp3 = load i8* %tmp2
+ %tmp3 = load i8, i8* %tmp2
ret i8 %tmp3
}
@@ -44,7 +44,7 @@ entry:
; CHECK: ldrb r0, [r0, r1]
%tmp1 = add i32 %base, %offset
%tmp2 = inttoptr i32 %tmp1 to i8*
- %tmp3 = load i8* %tmp2
+ %tmp3 = load i8, i8* %tmp2
ret i8 %tmp3
}
@@ -55,7 +55,7 @@ entry:
%tmp1 = shl i32 %offset, 2
%tmp2 = add i32 %base, %tmp1
%tmp3 = inttoptr i32 %tmp2 to i8*
- %tmp4 = load i8* %tmp3
+ %tmp4 = load i8, i8* %tmp3
ret i8 %tmp4
}
@@ -67,6 +67,6 @@ entry:
%tmp1 = lshr i32 %offset, 2
%tmp2 = add i32 %base, %tmp1
%tmp3 = inttoptr i32 %tmp2 to i8*
- %tmp4 = load i8* %tmp3
+ %tmp4 = load i8, i8* %tmp3
ret i8 %tmp4
}
diff --git a/test/CodeGen/Thumb2/thumb2-ldrd.ll b/test/CodeGen/Thumb2/thumb2-ldrd.ll
index 2e83ea146cd0..c25359b40577 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrd.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrd.ll
@@ -6,8 +6,8 @@ define i64 @t(i64 %a) nounwind readonly {
entry:
; CHECK: ldrd
; CHECK: umull
- %0 = load i64** @b, align 4
- %1 = load i64* %0, align 4
+ %0 = load i64*, i64** @b, align 4
+ %1 = load i64, i64* %0, align 4
%2 = mul i64 %1, %a
ret i64 %2
}
diff --git a/test/CodeGen/Thumb2/thumb2-ldrh.ll b/test/CodeGen/Thumb2/thumb2-ldrh.ll
index db5dcfac2ba1..33dd681bb04b 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrh.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrh.ll
@@ -4,7 +4,7 @@ define i16 @f1(i16* %v) {
entry:
; CHECK-LABEL: f1:
; CHECK: ldrh r0, [r0]
- %tmp = load i16* %v
+ %tmp = load i16, i16* %v
ret i16 %tmp
}
@@ -12,8 +12,8 @@ define i16 @f2(i16* %v) {
entry:
; CHECK-LABEL: f2:
; CHECK: ldrh.w r0, [r0, #2046]
- %tmp2 = getelementptr i16* %v, i16 1023
- %tmp = load i16* %tmp2
+ %tmp2 = getelementptr i16, i16* %v, i16 1023
+ %tmp = load i16, i16* %tmp2
ret i16 %tmp
}
@@ -22,8 +22,8 @@ entry:
; CHECK-LABEL: f3:
; CHECK: mov.w r1, #4096
; CHECK: ldrh r0, [r0, r1]
- %tmp2 = getelementptr i16* %v, i16 2048
- %tmp = load i16* %tmp2
+ %tmp2 = getelementptr i16, i16* %v, i16 2048
+ %tmp = load i16, i16* %tmp2
ret i16 %tmp
}
@@ -33,7 +33,7 @@ entry:
; CHECK: ldrh r0, [r0, #-128]
%tmp1 = sub i32 %base, 128
%tmp2 = inttoptr i32 %tmp1 to i16*
- %tmp3 = load i16* %tmp2
+ %tmp3 = load i16, i16* %tmp2
ret i16 %tmp3
}
@@ -43,7 +43,7 @@ entry:
; CHECK: ldrh r0, [r0, r1]
%tmp1 = add i32 %base, %offset
%tmp2 = inttoptr i32 %tmp1 to i16*
- %tmp3 = load i16* %tmp2
+ %tmp3 = load i16, i16* %tmp2
ret i16 %tmp3
}
@@ -54,7 +54,7 @@ entry:
%tmp1 = shl i32 %offset, 2
%tmp2 = add i32 %base, %tmp1
%tmp3 = inttoptr i32 %tmp2 to i16*
- %tmp4 = load i16* %tmp3
+ %tmp4 = load i16, i16* %tmp3
ret i16 %tmp4
}
@@ -66,6 +66,6 @@ entry:
%tmp1 = lshr i32 %offset, 2
%tmp2 = add i32 %base, %tmp1
%tmp3 = inttoptr i32 %tmp2 to i16*
- %tmp4 = load i16* %tmp3
+ %tmp4 = load i16, i16* %tmp3
ret i16 %tmp4
}
diff --git a/test/CodeGen/Thumb2/thumb2-smul.ll b/test/CodeGen/Thumb2/thumb2-smul.ll
index 67783d284e9c..937f7737f2b3 100644
--- a/test/CodeGen/Thumb2/thumb2-smul.ll
+++ b/test/CodeGen/Thumb2/thumb2-smul.ll
@@ -6,7 +6,7 @@
define i32 @f1(i32 %y) {
; CHECK: f1
; CHECK: smulbt r0, r1, r0
- %tmp = load i16* @x ; <i16> [#uses=1]
+ %tmp = load i16, i16* @x ; <i16> [#uses=1]
%tmp1 = add i16 %tmp, 2 ; <i16> [#uses=1]
%tmp2 = sext i16 %tmp1 to i32 ; <i32> [#uses=1]
%tmp3 = ashr i32 %y, 16 ; <i32> [#uses=1]
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index d1deb461574f..e0f7b5bd919c 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -43,7 +43,7 @@ entry:
store float 0.000000e+00, float* undef, align 4
%ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
store float 0.000000e+00, float* undef, align 4
- %val173 = load <4 x float>* undef ; <<4 x float>> [#uses=1]
+ %val173 = load <4 x float>, <4 x float>* undef ; <<4 x float>> [#uses=1]
br label %bb4
bb4: ; preds = %bb193, %entry
diff --git a/test/CodeGen/Thumb2/thumb2-str.ll b/test/CodeGen/Thumb2/thumb2-str.ll
index 4008145b0732..9bda67ae406c 100644
--- a/test/CodeGen/Thumb2/thumb2-str.ll
+++ b/test/CodeGen/Thumb2/thumb2-str.ll
@@ -10,7 +10,7 @@ define i32 @f1(i32 %a, i32* %v) {
define i32 @f2(i32 %a, i32* %v) {
; CHECK-LABEL: f2:
; CHECK: str.w r0, [r1, #4092]
- %tmp2 = getelementptr i32* %v, i32 1023
+ %tmp2 = getelementptr i32, i32* %v, i32 1023
store i32 %a, i32* %tmp2
ret i32 %a
}
@@ -18,7 +18,7 @@ define i32 @f2(i32 %a, i32* %v) {
define i32 @f2a(i32 %a, i32* %v) {
; CHECK-LABEL: f2a:
; CHECK: str r0, [r1, #-128]
- %tmp2 = getelementptr i32* %v, i32 -32
+ %tmp2 = getelementptr i32, i32* %v, i32 -32
store i32 %a, i32* %tmp2
ret i32 %a
}
@@ -27,7 +27,7 @@ define i32 @f3(i32 %a, i32* %v) {
; CHECK-LABEL: f3:
; CHECK: mov.w r2, #4096
; CHECK: str r0, [r1, r2]
- %tmp2 = getelementptr i32* %v, i32 1024
+ %tmp2 = getelementptr i32, i32* %v, i32 1024
store i32 %a, i32* %tmp2
ret i32 %a
}
diff --git a/test/CodeGen/Thumb2/thumb2-str_post.ll b/test/CodeGen/Thumb2/thumb2-str_post.ll
index aed849e50f74..377c814823cb 100644
--- a/test/CodeGen/Thumb2/thumb2-str_post.ll
+++ b/test/CodeGen/Thumb2/thumb2-str_post.ll
@@ -3,7 +3,7 @@
define i16 @test1(i32* %X, i16* %A) {
; CHECK-LABEL: test1:
; CHECK: strh {{.*}}[{{.*}}], #-4
- %Y = load i32* %X ; <i32> [#uses=1]
+ %Y = load i32, i32* %X ; <i32> [#uses=1]
%tmp1 = trunc i32 %Y to i16 ; <i16> [#uses=1]
store i16 %tmp1, i16* %A
%tmp2 = ptrtoint i16* %A to i16 ; <i16> [#uses=1]
@@ -14,7 +14,7 @@ define i16 @test1(i32* %X, i16* %A) {
define i32 @test2(i32* %X, i32* %A) {
; CHECK-LABEL: test2:
; CHECK: str {{.*}}[{{.*}}],
- %Y = load i32* %X ; <i32> [#uses=1]
+ %Y = load i32, i32* %X ; <i32> [#uses=1]
store i32 %Y, i32* %A
%tmp1 = ptrtoint i32* %A to i32 ; <i32> [#uses=1]
%tmp2 = sub i32 %tmp1, 4 ; <i32> [#uses=1]
diff --git a/test/CodeGen/Thumb2/thumb2-str_pre.ll b/test/CodeGen/Thumb2/thumb2-str_pre.ll
index e957400fe28f..d69a1024fadb 100644
--- a/test/CodeGen/Thumb2/thumb2-str_pre.ll
+++ b/test/CodeGen/Thumb2/thumb2-str_pre.ll
@@ -3,8 +3,8 @@
define void @test1(i32* %X, i32* %A, i32** %dest) {
; CHECK: test1
; CHECK: str r1, [r0, #16]!
- %B = load i32* %A ; <i32> [#uses=1]
- %Y = getelementptr i32* %X, i32 4 ; <i32*> [#uses=2]
+ %B = load i32, i32* %A ; <i32> [#uses=1]
+ %Y = getelementptr i32, i32* %X, i32 4 ; <i32*> [#uses=2]
store i32 %B, i32* %Y
store i32* %Y, i32** %dest
ret void
@@ -13,8 +13,8 @@ define void @test1(i32* %X, i32* %A, i32** %dest) {
define i16* @test2(i16* %X, i32* %A) {
; CHECK: test2
; CHECK: strh r1, [r0, #8]!
- %B = load i32* %A ; <i32> [#uses=1]
- %Y = getelementptr i16* %X, i32 4 ; <i16*> [#uses=2]
+ %B = load i32, i32* %A ; <i32> [#uses=1]
+ %Y = getelementptr i16, i16* %X, i32 4 ; <i16*> [#uses=2]
%tmp = trunc i32 %B to i16 ; <i16> [#uses=1]
store i16 %tmp, i16* %Y
ret i16* %Y
diff --git a/test/CodeGen/Thumb2/thumb2-strb.ll b/test/CodeGen/Thumb2/thumb2-strb.ll
index a2558eccc2b1..8ee9d2d158ea 100644
--- a/test/CodeGen/Thumb2/thumb2-strb.ll
+++ b/test/CodeGen/Thumb2/thumb2-strb.ll
@@ -10,7 +10,7 @@ define i8 @f1(i8 %a, i8* %v) {
define i8 @f2(i8 %a, i8* %v) {
; CHECK-LABEL: f2:
; CHECK: strb.w r0, [r1, #4092]
- %tmp2 = getelementptr i8* %v, i32 4092
+ %tmp2 = getelementptr i8, i8* %v, i32 4092
store i8 %a, i8* %tmp2
ret i8 %a
}
@@ -18,7 +18,7 @@ define i8 @f2(i8 %a, i8* %v) {
define i8 @f2a(i8 %a, i8* %v) {
; CHECK-LABEL: f2a:
; CHECK: strb r0, [r1, #-128]
- %tmp2 = getelementptr i8* %v, i32 -128
+ %tmp2 = getelementptr i8, i8* %v, i32 -128
store i8 %a, i8* %tmp2
ret i8 %a
}
@@ -27,7 +27,7 @@ define i8 @f3(i8 %a, i8* %v) {
; CHECK-LABEL: f3:
; CHECK: mov.w r2, #4096
; CHECK: strb r0, [r1, r2]
- %tmp2 = getelementptr i8* %v, i32 4096
+ %tmp2 = getelementptr i8, i8* %v, i32 4096
store i8 %a, i8* %tmp2
ret i8 %a
}
diff --git a/test/CodeGen/Thumb2/thumb2-strh.ll b/test/CodeGen/Thumb2/thumb2-strh.ll
index cbe73d5cf057..dfd1c90d9623 100644
--- a/test/CodeGen/Thumb2/thumb2-strh.ll
+++ b/test/CodeGen/Thumb2/thumb2-strh.ll
@@ -10,7 +10,7 @@ define i16 @f1(i16 %a, i16* %v) {
define i16 @f2(i16 %a, i16* %v) {
; CHECK-LABEL: f2:
; CHECK: strh.w r0, [r1, #4092]
- %tmp2 = getelementptr i16* %v, i32 2046
+ %tmp2 = getelementptr i16, i16* %v, i32 2046
store i16 %a, i16* %tmp2
ret i16 %a
}
@@ -18,7 +18,7 @@ define i16 @f2(i16 %a, i16* %v) {
define i16 @f2a(i16 %a, i16* %v) {
; CHECK-LABEL: f2a:
; CHECK: strh r0, [r1, #-128]
- %tmp2 = getelementptr i16* %v, i32 -64
+ %tmp2 = getelementptr i16, i16* %v, i32 -64
store i16 %a, i16* %tmp2
ret i16 %a
}
@@ -27,7 +27,7 @@ define i16 @f3(i16 %a, i16* %v) {
; CHECK-LABEL: f3:
; CHECK: mov.w r2, #4096
; CHECK: strh r0, [r1, r2]
- %tmp2 = getelementptr i16* %v, i32 2048
+ %tmp2 = getelementptr i16, i16* %v, i32 2048
store i16 %a, i16* %tmp2
ret i16 %a
}
diff --git a/test/CodeGen/Thumb2/thumb2-tbb.ll b/test/CodeGen/Thumb2/thumb2-tbb.ll
index d57638bbb4f6..758f792695fd 100644
--- a/test/CodeGen/Thumb2/thumb2-tbb.ll
+++ b/test/CodeGen/Thumb2/thumb2-tbb.ll
@@ -11,43 +11,43 @@ entry:
switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
bb:
- tail call void(...)* @foo1()
+ tail call void(...) @foo1()
ret void
bb1:
- tail call void(...)* @foo2()
+ tail call void(...) @foo2()
ret void
bb2:
- tail call void(...)* @foo6()
+ tail call void(...) @foo6()
ret void
bb3:
- tail call void(...)* @foo3()
+ tail call void(...) @foo3()
ret void
bb4:
- tail call void(...)* @foo4()
+ tail call void(...) @foo4()
ret void
bb5:
- tail call void(...)* @foo5()
+ tail call void(...) @foo5()
ret void
bb6:
- tail call void(...)* @foo1()
+ tail call void(...) @foo1()
ret void
bb7:
- tail call void(...)* @foo2()
+ tail call void(...) @foo2()
ret void
bb8:
- tail call void(...)* @foo6()
+ tail call void(...) @foo6()
ret void
bb9:
- tail call void(...)* @foo3()
+ tail call void(...) @foo3()
ret void
bb10:
- tail call void(...)* @foo4()
+ tail call void(...) @foo4()
ret void
bb11:
- tail call void(...)* @foo5()
+ tail call void(...) @foo5()
ret void
bb12:
- tail call void(...)* @foo6()
+ tail call void(...) @foo6()
ret void
}
diff --git a/test/CodeGen/Thumb2/thumb2-tbh.ll b/test/CodeGen/Thumb2/thumb2-tbh.ll
index bf1c7c613ab5..a5a5ed0c8da2 100644
--- a/test/CodeGen/Thumb2/thumb2-tbh.ll
+++ b/test/CodeGen/Thumb2/thumb2-tbh.ll
@@ -45,7 +45,7 @@ bb33.i: ; preds = %bb42.i
unreachable
bb34.i: ; preds = %bb42.i
- %3 = load i32* @_C_nextcmd, align 4 ; <i32> [#uses=1]
+ %3 = load i32, i32* @_C_nextcmd, align 4 ; <i32> [#uses=1]
%4 = add i32 %3, 1 ; <i32> [#uses=1]
store i32 %4, i32* @_C_nextcmd, align 4
%5 = call noalias i8* @calloc(i32 22, i32 1) nounwind ; <i8*> [#uses=0]
@@ -60,7 +60,7 @@ bb37.i: ; preds = %bb42.i
unreachable
bb39.i: ; preds = %bb42.i
- call void @Z_fatal(i8* getelementptr ([28 x i8]* @.str31, i32 0, i32 0)) nounwind
+ call void @Z_fatal(i8* getelementptr ([28 x i8], [28 x i8]* @.str31, i32 0, i32 0)) nounwind
unreachable
bb40.i: ; preds = %bb42.i, %bb5.i, %bb1.i2
diff --git a/test/CodeGen/Thumb2/tls1.ll b/test/CodeGen/Thumb2/tls1.ll
index 40973562d2b9..6acf27d1ad5d 100644
--- a/test/CodeGen/Thumb2/tls1.ll
+++ b/test/CodeGen/Thumb2/tls1.ll
@@ -10,7 +10,7 @@
define i32 @f() {
entry:
- %tmp1 = load i32* @i ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* @i ; <i32> [#uses=1]
ret i32 %tmp1
}
diff --git a/test/CodeGen/Thumb2/tls2.ll b/test/CodeGen/Thumb2/tls2.ll
index e6bed2f65a49..8f05ceab19fc 100644
--- a/test/CodeGen/Thumb2/tls2.ll
+++ b/test/CodeGen/Thumb2/tls2.ll
@@ -12,7 +12,7 @@ entry:
; CHECK-PIC-LABEL: f:
; CHECK-PIC: bl __tls_get_addr(PLT)
- %tmp1 = load i32* @i ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* @i ; <i32> [#uses=1]
ret i32 %tmp1
}
diff --git a/test/CodeGen/Thumb2/tpsoft.ll b/test/CodeGen/Thumb2/tpsoft.ll
index 6ab8bf01761b..de358d98d49c 100644
--- a/test/CodeGen/Thumb2/tpsoft.ll
+++ b/test/CodeGen/Thumb2/tpsoft.ll
@@ -16,7 +16,7 @@
define arm_aapcs_vfpcc i32 @main() nounwind {
entry:
- %0 = load i32* @i, align 4
+ %0 = load i32, i32* @i, align 4
switch i32 %0, label %bb2 [
i32 12, label %bb
i32 13, label %bb1
@@ -32,13 +32,13 @@ bb: ; preds = %entry
; ELFOBJ: Section {
; ELFOBJ: Name: .text
; ELFOBJ-LE: SectionData (
-;;; BL __aeabi_read_tp is ---------+
-;;; V
-; ELFOBJ-LE-NEXT: 0000: 2DE90048 0E487844 0168FFF7 FEFF4058
+;;; BL __aeabi_read_tp is ---+
+;;; V
+; ELFOBJ-LE-NEXT: 0000: 80B50E48 78440168 FFF7FEFF 40580D28
; ELFOBJ-BE: SectionData (
-;;; BL __aeabi_read_tp is ---------+
-;;; V
-; ELFOBJ-BE-NEXT: 0000: E92D4800 480E4478 6801F7FF FFFE5840
+;;; BL __aeabi_read_tp is ---+
+;;; V
+; ELFOBJ-BE-NEXT: 0000: B580480E 44786801 F7FFFFFE 5840280D
bb1: ; preds = %entry
diff --git a/test/CodeGen/Thumb2/v8_IT_2.ll b/test/CodeGen/Thumb2/v8_IT_2.ll
index 170b4135b536..9a3f263c5259 100644
--- a/test/CodeGen/Thumb2/v8_IT_2.ll
+++ b/test/CodeGen/Thumb2/v8_IT_2.ll
@@ -16,9 +16,9 @@ entry:
br label %tailrecurse
tailrecurse: ; preds = %bb, %entry
- %tmp6 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
- %tmp9 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=2]
- %tmp12 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
+ %tmp6 = load %struct.quad_struct*, %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
+ %tmp9 = load %struct.quad_struct*, %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=2]
+ %tmp12 = load %struct.quad_struct*, %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
%tmp14 = icmp eq %struct.quad_struct* null, null ; <i1> [#uses=1]
%tmp17 = icmp eq %struct.quad_struct* %tmp6, null ; <i1> [#uses=1]
%tmp23 = icmp eq %struct.quad_struct* %tmp9, null ; <i1> [#uses=1]
diff --git a/test/CodeGen/Thumb2/v8_IT_3.ll b/test/CodeGen/Thumb2/v8_IT_3.ll
index a028deebc8e8..3ccee5fbb8ca 100644
--- a/test/CodeGen/Thumb2/v8_IT_3.ll
+++ b/test/CodeGen/Thumb2/v8_IT_3.ll
@@ -21,17 +21,17 @@ entry:
%block_count = alloca i32, align 4
%index_cache = alloca i32, align 4
store i32 0, i32* %index_cache, align 4
- %tmp = load i32* @G, align 4
+ %tmp = load i32, i32* @G, align 4
%tmp1 = call i32 @bar(i32 0, i32 0, i32 %tmp) nounwind
switch i32 %tmp1, label %bb8 [
- i32 0, label %bb
+ i32 1, label %bb
i32 536870913, label %bb4
i32 536870914, label %bb6
]
bb:
- %tmp2 = load i32* @G, align 4
- %tmp4 = icmp eq i32 %tmp2, 0
+ %tmp2 = load i32, i32* @G, align 4
+ %tmp4 = icmp eq i32 %tmp2, 1
br i1 %tmp4, label %bb1, label %bb8
bb1:
@@ -41,14 +41,14 @@ bb1:
; CHECK-NEXT: it eq
; CHECK-NEXT: cmpeq
; CHECK: %bb1
- %tmp5 = load i32* %block_size, align 4
- %tmp6 = load i32* %block_count, align 4
+ %tmp5 = load i32, i32* %block_size, align 4
+ %tmp6 = load i32, i32* %block_count, align 4
%tmp7 = call %struct.FF* @Get() nounwind
store %struct.FF* %tmp7, %struct.FF** @FuncPtr, align 4
%tmp10 = zext i32 %tmp6 to i64
%tmp11 = zext i32 %tmp5 to i64
%tmp12 = mul nsw i64 %tmp10, %tmp11
- %tmp13 = call i32 @foo(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), i64 %tmp12, i32 %tmp5) nounwind
+ %tmp13 = call i32 @foo(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str1, i32 0, i32 0), i64 %tmp12, i32 %tmp5) nounwind
br label %bb8
bb4:
diff --git a/test/CodeGen/Thumb2/v8_IT_5.ll b/test/CodeGen/Thumb2/v8_IT_5.ll
index 2da75ad21436..78b80d7dcdef 100644
--- a/test/CodeGen/Thumb2/v8_IT_5.ll
+++ b/test/CodeGen/Thumb2/v8_IT_5.ll
@@ -3,8 +3,7 @@
; CHECK: it ne
; CHECK-NEXT: cmpne
; CHECK-NEXT: bne [[JUMPTARGET:.LBB[0-9]+_[0-9]+]]
-; CHECK: cmp
-; CHECK-NEXT: beq
+; CHECK: cbz
; CHECK-NEXT: %if.else163
; CHECK-NEXT: mov.w
; CHECK-NEXT: b
diff --git a/test/CodeGen/WinEH/cppeh-alloca-sink.ll b/test/CodeGen/WinEH/cppeh-alloca-sink.ll
new file mode 100644
index 000000000000..d50237fa78a7
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-alloca-sink.ll
@@ -0,0 +1,180 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
+
+; This test describes two difficult cases in sinking allocas into child frames.
+; We don't currently do this optimization, but we'll need to tweak these tests
+; when we do.
+
+; This test is based on the following code:
+;
+; // In this case we can sink the alloca from the parent into the catch because
+; // the lifetime is limited to the catch.
+; extern "C" void may_throw();
+; extern "C" void sink_alloca_to_catch() {
+; try {
+; may_throw();
+; } catch (int) {
+; volatile int only_used_in_catch = 42;
+; }
+; }
+;
+; // In this case we cannot. The variable should live as long as the parent
+; // frame lives.
+; extern "C" void use_catch_var(int *);
+; extern "C" void dont_sink_alloca_to_catch(int n) {
+; int live_in_out_catch = 0;
+; while (n > 0) {
+; try {
+; may_throw();
+; } catch (int) {
+; use_catch_var(&live_in_out_catch);
+; }
+; n--;
+; }
+; }
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+declare void @may_throw() #1
+declare i32 @__CxxFrameHandler3(...)
+declare i32 @llvm.eh.typeid.for(i8*) #2
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
+declare void @llvm.eh.endcatch() #3
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchHandlerType = type { i32, i8* }
+
+$"\01??_R0H@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
+
+; Function Attrs: uwtable
+define void @sink_alloca_to_catch() #0 {
+entry:
+ %0 = alloca i32
+ %only_used_in_catch = alloca i32, align 4
+ invoke void @may_throw()
+ to label %try.cont unwind label %lpad
+
+lpad: ; preds = %entry
+ %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
+ %2 = extractvalue { i8*, i32 } %1, 1
+ %3 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)) #3
+ %matches = icmp eq i32 %2, %3
+ br i1 %matches, label %catch, label %eh.resume
+
+catch: ; preds = %lpad
+ %4 = extractvalue { i8*, i32 } %1, 0
+ call void @llvm.eh.begincatch(i8* %4, i8* null) #3
+ store volatile i32 42, i32* %only_used_in_catch, align 4
+ tail call void @llvm.eh.endcatch() #3
+ br label %try.cont
+
+try.cont: ; preds = %entry, %catch
+ ret void
+
+eh.resume: ; preds = %lpad
+ resume { i8*, i32 } %1
+}
+
+; CHECK-LABEL: define void @sink_alloca_to_catch()
+; CHECK: call void (...) @llvm.frameescape(i32* %only_used_in_catch)
+
+declare void @use_catch_var(i32*) #1
+
+; Function Attrs: uwtable
+define void @dont_sink_alloca_to_catch(i32 %n) #0 {
+entry:
+ %0 = alloca i32
+ %n.addr = alloca i32, align 4
+ %live_in_out_catch = alloca i32, align 4
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ store i32 %n, i32* %n.addr, align 4
+ br label %while.cond
+
+while.cond: ; preds = %try.cont, %entry
+ %1 = load i32, i32* %n.addr, align 4
+ %cmp = icmp sgt i32 %1, 0
+ br i1 %cmp, label %while.body, label %while.end
+
+while.body: ; preds = %while.cond
+ invoke void @may_throw()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %while.body
+ br label %try.cont
+
+lpad: ; preds = %while.body
+ %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)
+ %3 = extractvalue { i8*, i32 } %2, 0
+ store i8* %3, i8** %exn.slot
+ %4 = extractvalue { i8*, i32 } %2, 1
+ store i32 %4, i32* %ehselector.slot
+ br label %catch.dispatch
+
+catch.dispatch: ; preds = %lpad
+ %sel = load i32, i32* %ehselector.slot
+ %5 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)) #3
+ %matches = icmp eq i32 %sel, %5
+ br i1 %matches, label %catch, label %eh.resume
+
+catch: ; preds = %catch.dispatch
+ %exn = load i8*, i8** %exn.slot
+ call void @llvm.eh.begincatch(i8* %exn, i8* null) #3
+ invoke void @use_catch_var(i32* %live_in_out_catch)
+ to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2: ; preds = %catch
+ call void @llvm.eh.endcatch() #3
+ br label %try.cont
+
+try.cont: ; preds = %invoke.cont2, %invoke.cont
+ %6 = load i32, i32* %0
+ %7 = load i32, i32* %n.addr, align 4
+ %dec = add nsw i32 %7, -1
+ store i32 %dec, i32* %n.addr, align 4
+ br label %while.cond
+
+lpad1: ; preds = %catch
+ %8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ %9 = extractvalue { i8*, i32 } %8, 0
+ store i8* %9, i8** %exn.slot
+ %10 = extractvalue { i8*, i32 } %8, 1
+ store i32 %10, i32* %ehselector.slot
+ call void @llvm.eh.endcatch() #3
+ br label %eh.resume
+
+while.end: ; preds = %while.cond
+ ret void
+
+eh.resume: ; preds = %lpad1, %catch.dispatch
+ %exn3 = load i8*, i8** %exn.slot
+ %sel4 = load i32, i32* %ehselector.slot
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn3, 0
+ %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel4, 1
+ resume { i8*, i32 } %lpad.val5
+}
+
+; CHECK-LABEL: define void @dont_sink_alloca_to_catch(i32 %n)
+; CHECK: call void (...) @llvm.frameescape(i32* %live_in_out_catch)
+
+; CHECK-LABEL: define internal i8* @sink_alloca_to_catch.catch(i8*, i8*)
+; CHECK: %only_used_in_catch.i8 = call i8* @llvm.framerecover({{.*}}, i32 0)
+; CHECK: %only_used_in_catch = bitcast
+
+; CHECK-LABEL: define internal i8* @dont_sink_alloca_to_catch.catch(i8*, i8*)
+; CHECK: %live_in_out_catch.i8 = call i8* @llvm.framerecover({{.*}}, i32 0)
+; CHECK: %live_in_out_catch = bitcast
+
+
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
diff --git a/test/CodeGen/WinEH/cppeh-catch-all.ll b/test/CodeGen/WinEH/cppeh-catch-all.ll
new file mode 100644
index 000000000000..a6c94d400797
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-catch-all.ll
@@ -0,0 +1,97 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
+
+; This test is based on the following code:
+;
+; void test()
+; {
+; try {
+; may_throw();
+; } catch (...) {
+; handle_exception();
+; }
+; }
+;
+; Parts of the IR have been hand-edited to simplify the test case.
+; The full IR will be restored when Windows C++ EH support is complete.
+
+; ModuleID = 'catch-all.cpp'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+; The function entry in this case remains unchanged.
+; CHECK: define void @_Z4testv()
+; CHECK: entry:
+; CHECK: invoke void @_Z9may_throwv()
+; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
+
+; Function Attrs: uwtable
+define void @_Z4testv() #0 {
+entry:
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ invoke void @_Z9may_throwv()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %entry
+ br label %try.cont
+
+; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
+; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK-NEXT: catch i8* null
+; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @_Z4testv.catch)
+; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %try.cont]
+
+lpad: ; preds = %entry
+ %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* null
+ %tmp1 = extractvalue { i8*, i32 } %tmp, 0
+ store i8* %tmp1, i8** %exn.slot
+ %tmp2 = extractvalue { i8*, i32 } %tmp, 1
+ store i32 %tmp2, i32* %ehselector.slot
+ br label %catch
+
+; CHECK-NOT: catch:
+
+catch: ; preds = %lpad
+ %exn = load i8*, i8** %exn.slot
+ call void @llvm.eh.begincatch(i8* %exn, i8* null) #2
+ call void @_Z16handle_exceptionv()
+ br label %invoke.cont2
+
+; CHECK-NOT: invoke.cont2:
+
+invoke.cont2: ; preds = %catch
+ call void @llvm.eh.endcatch()
+ br label %try.cont
+
+try.cont: ; preds = %invoke.cont2, %invoke.cont
+ ret void
+
+; CHECK: }
+}
+
+; CHECK: define internal i8* @_Z4testv.catch(i8*, i8*)
+; CHECK: entry:
+; CHECK: call void @_Z16handle_exceptionv()
+; CHECK: ret i8* blockaddress(@_Z4testv, %try.cont)
+; CHECK: }
+
+declare void @_Z9may_throwv() #1
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @llvm.eh.begincatch(i8*, i8*)
+
+declare void @_Z16handle_exceptionv() #1
+
+declare void @llvm.eh.endcatch()
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { noinline noreturn nounwind }
+attributes #3 = { nounwind }
+attributes #4 = { noreturn nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 226027)"}
diff --git a/test/CodeGen/WinEH/cppeh-catch-and-throw.ll b/test/CodeGen/WinEH/cppeh-catch-and-throw.ll
new file mode 100644
index 000000000000..c60a339f6ba2
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-catch-and-throw.ll
@@ -0,0 +1,143 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
+
+; This test is based on the following code:
+;
+; class Obj {
+; public:
+; ~Obj();
+; };
+;
+; void test(void)
+; {
+; try {
+; Obj o;
+; throw 1;
+; } catch (...) {
+; throw;
+; }
+; }
+
+; ModuleID = 'cppeh-catch-and-throw.cpp'
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
+%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
+%eh.ThrowInfo = type { i32, i32, i32, i32 }
+%class.Obj = type { i8 }
+
+$"\01??_R0H@8" = comdat any
+
+$"_CT??_R0H@84" = comdat any
+
+$_CTA1H = comdat any
+
+$_TI1H = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@__ImageBase = external constant i8
+@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
+@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
+@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
+
+; This is just a minimal check to verify that main was handled by WinEHPrepare.
+; CHECK: define void @"\01?test@@YAXXZ"()
+; CHECK: entry:
+; CHECK: call void (...) @llvm.frameescape
+; CHECK: invoke void @_CxxThrowException
+; CHECK: }
+
+; Function Attrs: uwtable
+define void @"\01?test@@YAXXZ"() #0 {
+entry:
+ %o = alloca %class.Obj, align 1
+ %tmp = alloca i32, align 4
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ store i32 1, i32* %tmp
+ %0 = bitcast i32* %tmp to i8*
+ invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #3
+ to label %unreachable unwind label %lpad
+
+lpad: ; preds = %entry
+ %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* null
+ %2 = extractvalue { i8*, i32 } %1, 0
+ store i8* %2, i8** %exn.slot
+ %3 = extractvalue { i8*, i32 } %1, 1
+ store i32 %3, i32* %ehselector.slot
+ call void @"\01??1Obj@@QEAA@XZ"(%class.Obj* %o) #2
+ br label %catch
+
+catch: ; preds = %lpad
+ %exn = load i8*, i8** %exn.slot
+ call void @llvm.eh.begincatch(i8* %exn, i8* null) #2
+ invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #3
+ to label %unreachable unwind label %lpad1
+
+lpad1: ; preds = %catch
+ %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ %5 = extractvalue { i8*, i32 } %4, 0
+ store i8* %5, i8** %exn.slot
+ %6 = extractvalue { i8*, i32 } %4, 1
+ store i32 %6, i32* %ehselector.slot
+ call void @llvm.eh.endcatch() #2
+ br label %eh.resume
+
+try.cont: ; No predecessors!
+ ret void
+
+eh.resume: ; preds = %lpad1
+ %exn2 = load i8*, i8** %exn.slot
+ %sel = load i32, i32* %ehselector.slot
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn2, 0
+ %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
+ resume { i8*, i32 } %lpad.val3
+
+unreachable: ; preds = %catch, %entry
+ unreachable
+}
+
+; Verify that we inserted a stub invoke into the outlined cleanup handler.
+;
+; CHECK-LABEL: define internal void @"\01?test@@YAXXZ.cleanup"(i8*, i8*)
+; CHECK: entry:
+; CHECK: call i8* @llvm.framerecover
+; CHECK: call void @"\01??1Obj@@QEAA@XZ"
+; CHECK: invoke void @llvm.donothing()
+; CHECK: to label %[[SPLIT_LABEL:.+]] unwind label %[[LPAD_LABEL:.+]]
+;
+; CHECK: [[SPLIT_LABEL]]
+;
+; CHECK: [[LPAD_LABEL]]
+; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK: cleanup
+; CHECK: unreachable
+; CHECK: }
+
+declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind
+declare void @"\01??1Obj@@QEAA@XZ"(%class.Obj*) #1
+
+; Function Attrs: nounwind
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #2
+
+; Function Attrs: nounwind
+declare void @llvm.eh.endcatch() #2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+attributes #3 = { noreturn }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.7.0 (trunk 235214) (llvm/trunk 235213)"}
diff --git a/test/CodeGen/WinEH/cppeh-catch-scalar.ll b/test/CodeGen/WinEH/cppeh-catch-scalar.ll
new file mode 100644
index 000000000000..4faef82a75fc
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-catch-scalar.ll
@@ -0,0 +1,126 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
+
+; This test is based on the following code:
+;
+; void test()
+; {
+; try {
+; may_throw();
+; } catch (int i) {
+; handle_int(i);
+; }
+; }
+;
+; Parts of the IR have been hand-edited to simplify the test case.
+; The full IR will be restored when Windows C++ EH support is complete.
+
+;ModuleID = 'cppeh-catch-scalar.cpp'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+@_ZTIi = external constant i8*
+
+; The function entry will be rewritten like this.
+; CHECK: define void @_Z4testv()
+; CHECK: entry:
+; CHECK: [[I_PTR:\%.+]] = alloca i32, align 4
+; CHECK: call void (...) @llvm.frameescape(i32* [[I_PTR]])
+; CHECK: invoke void @_Z9may_throwv()
+; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
+
+; Function Attrs: uwtable
+define void @_Z4testv() #0 {
+entry:
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ %i = alloca i32, align 4
+ invoke void @_Z9may_throwv()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %entry
+ br label %try.cont
+
+; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
+; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i8** @_ZTIi to i8*), i32 0, i8* (i8*, i8*)* @_Z4testv.catch)
+; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %try.cont]
+
+lpad: ; preds = %entry
+ %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* bitcast (i8** @_ZTIi to i8*)
+ %tmp1 = extractvalue { i8*, i32 } %tmp, 0
+ store i8* %tmp1, i8** %exn.slot
+ %tmp2 = extractvalue { i8*, i32 } %tmp, 1
+ store i32 %tmp2, i32* %ehselector.slot
+ br label %catch.dispatch
+
+; CHECK-NOT: catch-dispatch:
+
+catch.dispatch: ; preds = %lpad
+ %sel = load i32, i32* %ehselector.slot
+ %tmp3 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #3
+ %matches = icmp eq i32 %sel, %tmp3
+ br i1 %matches, label %catch, label %eh.resume
+
+; CHECK-NOT: catch:
+
+catch: ; preds = %catch.dispatch
+ %exn11 = load i8*, i8** %exn.slot
+ %i.i8 = bitcast i32* %i to i8*
+ call void @llvm.eh.begincatch(i8* %exn11, i8* %i.i8) #3
+ %tmp7 = load i32, i32* %i, align 4
+ call void @_Z10handle_inti(i32 %tmp7)
+ br label %invoke.cont2
+
+; CHECK-NOT: invoke.cont2:
+
+invoke.cont2: ; preds = %catch
+ call void @llvm.eh.endcatch() #3
+ br label %try.cont
+
+try.cont: ; preds = %invoke.cont2, %invoke.cont
+ ret void
+
+; CHECK-NOT: eh.resume:
+
+eh.resume: ; preds = %catch.dispatch
+ %exn3 = load i8*, i8** %exn.slot
+ %sel4 = load i32, i32* %ehselector.slot
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn3, 0
+ %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel4, 1
+ resume { i8*, i32 } %lpad.val5
+
+; CHECK: }
+}
+
+; CHECK: define internal i8* @_Z4testv.catch(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_I:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 0)
+; CHECK: [[I_PTR1:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
+; CHECK: [[TMP:\%.+]] = load i32, i32* [[I_PTR1]], align 4
+; CHECK: call void @_Z10handle_inti(i32 [[TMP]])
+; CHECK: ret i8* blockaddress(@_Z4testv, %try.cont)
+; CHECK: }
+
+declare void @_Z9may_throwv() #1
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #2
+
+declare void @llvm.eh.begincatch(i8*, i8*)
+
+declare void @llvm.eh.endcatch()
+
+declare void @_Z10handle_inti(i32) #1
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 227474) (llvm/trunk 227508)"}
diff --git a/test/CodeGen/WinEH/cppeh-catch-unwind.ll b/test/CodeGen/WinEH/cppeh-catch-unwind.ll
new file mode 100644
index 000000000000..0fd735be57a1
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-catch-unwind.ll
@@ -0,0 +1,240 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
+
+; This test was generated from the following source:
+;
+; void test() {
+; try {
+; SomeClass obj;
+; may_throw();
+; try {
+; may_throw();
+; } catch (int) {
+; handle_exception();
+; }
+; } catch (int) {
+; handle_exception();
+; }
+; }
+;
+; The code above was compiled with the -O2 option.
+
+; ModuleID = 'catch-unwind.cpp'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%class.SomeClass = type { i8 }
+
+$"\01??_R0H@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+
+
+; CHECK-LABEL: define void @"\01?test@@YAXXZ"() #0 {
+; CHECK: entry:
+; CHECK: [[OBJ_PTR:\%.+]] = alloca %class.SomeClass
+; CHECK: [[TMP0:\%.+]] = alloca i32, align 4
+; CHECK: [[TMP1:\%.+]] = alloca i32, align 4
+; CHECK: call void (...) @llvm.frameescape(i32* [[TMP1]], %class.SomeClass* [[OBJ_PTR]], i32* [[TMP0]])
+; CHECK: %call = invoke %class.SomeClass* @"\01??0SomeClass@@QEAA@XZ"(%class.SomeClass* %obj)
+; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
+
+; Function Attrs: uwtable
+define void @"\01?test@@YAXXZ"() #0 {
+entry:
+ %obj = alloca %class.SomeClass, align 1
+ %0 = alloca i32, align 4
+ %1 = alloca i32, align 4
+ %call = invoke %class.SomeClass* @"\01??0SomeClass@@QEAA@XZ"(%class.SomeClass* %obj)
+ to label %invoke.cont unwind label %lpad
+
+; CHECK: invoke.cont:
+; CHECK: invoke void @"\01?may_throw@@YAXXZ"()
+; CHECK: to label %invoke.cont2 unwind label %[[LPAD1_LABEL:lpad[0-9]*]]
+
+invoke.cont: ; preds = %entry
+ invoke void @"\01?may_throw@@YAXXZ"()
+ to label %invoke.cont2 unwind label %lpad1
+
+; CHECK: invoke.cont2:
+; CHECK: invoke void @"\01?may_throw@@YAXXZ"()
+; CHECK: to label %try.cont unwind label %[[LPAD3_LABEL:lpad[0-9]*]]
+
+invoke.cont2: ; preds = %invoke.cont
+ invoke void @"\01?may_throw@@YAXXZ"()
+ to label %try.cont unwind label %lpad3
+
+; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
+; CHECK: [[LPAD_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK-NEXT: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
+; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
+; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %try.cont15]
+
+lpad: ; preds = %entry
+ %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
+ %3 = extractvalue { i8*, i32 } %2, 0
+ %4 = extractvalue { i8*, i32 } %2, 1
+ br label %catch.dispatch7
+
+; CHECK: [[LPAD1_LABEL]]:{{[ ]+}}; preds = %invoke.cont
+; CHECK: [[LPAD1_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
+; CHECK-NEXT: [[RECOVER1:\%.+]] = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test@@YAXXZ.cleanup", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
+; CHECK-NEXT: indirectbr i8* [[RECOVER1]], [label %try.cont15]
+
+lpad1: ; preds = %invoke.cont
+ %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
+ %6 = extractvalue { i8*, i32 } %5, 0
+ %7 = extractvalue { i8*, i32 } %5, 1
+ br label %ehcleanup
+
+; CHECK: [[LPAD3_LABEL]]:{{[ ]+}}; preds = %invoke.cont2
+; CHECK: [[LPAD3_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
+; CHECK-NEXT: [[RECOVER3:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1", i32 0, void (i8*, i8*)* @"\01?test@@YAXXZ.cleanup")
+; CHECK-NEXT: indirectbr i8* [[RECOVER3]], [label %try.cont, label %try.cont15]
+
+lpad3: ; preds = %invoke.cont2
+ %8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
+ %9 = extractvalue { i8*, i32 } %8, 0
+ %10 = extractvalue { i8*, i32 } %8, 1
+ %11 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #3
+ %matches = icmp eq i32 %10, %11
+ br i1 %matches, label %catch, label %ehcleanup
+
+; CHECK-NOT: catch:
+catch: ; preds = %lpad3
+ %12 = bitcast i32* %0 to i8*
+ call void @llvm.eh.begincatch(i8* %9, i8* %12) #3
+ invoke void @"\01?handle_exception@@YAXXZ"()
+ to label %invoke.cont6 unwind label %lpad5
+
+; CHECK-NOT: invoke.cont6:
+invoke.cont6: ; preds = %catch
+ call void @llvm.eh.endcatch() #3
+ br label %try.cont
+
+try.cont: ; preds = %invoke.cont2, %invoke.cont6
+ call void @"\01??1SomeClass@@QEAA@XZ"(%class.SomeClass* %obj) #3
+ br label %try.cont15
+
+; CHECK-NOT: lpad5:
+lpad5: ; preds = %catch
+ %13 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
+ %14 = extractvalue { i8*, i32 } %13, 0
+ %15 = extractvalue { i8*, i32 } %13, 1
+ call void @llvm.eh.endcatch() #3
+ br label %ehcleanup
+
+; CHECK-NOT: ehcleanup
+ehcleanup: ; preds = %lpad5, %lpad3, %lpad1
+ %exn.slot.0 = phi i8* [ %14, %lpad5 ], [ %9, %lpad3 ], [ %6, %lpad1 ]
+ %ehselector.slot.0 = phi i32 [ %15, %lpad5 ], [ %10, %lpad3 ], [ %7, %lpad1 ]
+ call void @"\01??1SomeClass@@QEAA@XZ"(%class.SomeClass* %obj) #3
+ br label %catch.dispatch7
+
+; CHECK-NOT: catch.dispatch7:
+catch.dispatch7: ; preds = %ehcleanup, %lpad
+ %exn.slot.1 = phi i8* [ %exn.slot.0, %ehcleanup ], [ %3, %lpad ]
+ %ehselector.slot.1 = phi i32 [ %ehselector.slot.0, %ehcleanup ], [ %4, %lpad ]
+ %16 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #3
+ %matches9 = icmp eq i32 %ehselector.slot.1, %16
+ br i1 %matches9, label %catch10, label %eh.resume
+
+; CHECK-NOT: catch10:
+catch10: ; preds = %catch.dispatch7
+ %17 = bitcast i32* %1 to i8*
+ call void @llvm.eh.begincatch(i8* %exn.slot.1, i8* %17) #3
+ call void @"\01?handle_exception@@YAXXZ"()
+ br label %invoke.cont13
+
+; CHECK-NOT: invoke.cont13:
+invoke.cont13: ; preds = %catch10
+ call void @llvm.eh.endcatch() #3
+ br label %try.cont15
+
+try.cont15: ; preds = %invoke.cont13, %try.cont
+ ret void
+
+; CHECK-NOT: eh.resume
+eh.resume: ; preds = %catch.dispatch7
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.1, 0
+ %lpad.val18 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.1, 1
+ resume { i8*, i32 } %lpad.val18
+
+; CHECK: }
+}
+
+; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_TMP1:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
+; CHECK: [[TMP1_PTR:\%.+]] = bitcast i8* [[RECOVER_TMP1]] to i32*
+; CHECK: call void @"\01?handle_exception@@YAXXZ"()
+; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont15)
+; CHECK: }
+
+; CHECK-LABEL: define internal void @"\01?test@@YAXXZ.cleanup"(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_OBJ:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
+; CHECK: [[OBJ_PTR:\%.+]] = bitcast i8* %obj.i8 to %class.SomeClass*
+; CHECK: call void @"\01??1SomeClass@@QEAA@XZ"(%class.SomeClass* [[OBJ_PTR]])
+; CHECK: ret void
+; CHECK: }
+
+; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch.1"(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_TMP0:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
+; CHECK: [[TMP0_PTR:\%.+]] = bitcast i8* [[RECOVER_TMP0]] to i32*
+; CHECK: invoke void @"\01?handle_exception@@YAXXZ"()
+; CHECK: to label %invoke.cont6 unwind label %[[LPAD5_LABEL:lpad[0-9]+]]
+;
+; CHECK: invoke.cont6: ; preds = %entry
+; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont)
+;
+; CHECK: [[LPAD5_LABEL]]:{{[ ]+}}; preds = %entry
+; CHECK: [[LPAD5_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK: cleanup
+; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
+; CHECK: }
+
+declare %class.SomeClass* @"\01??0SomeClass@@QEAA@XZ"(%class.SomeClass* returned) #1
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @"\01?may_throw@@YAXXZ"() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
+
+declare void @"\01?handle_exception@@YAXXZ"() #1
+
+; Function Attrs: nounwind
+declare void @llvm.eh.endcatch() #3
+
+; Function Attrs: nounwind
+declare void @"\01??1SomeClass@@QEAA@XZ"(%class.SomeClass*) #4
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+attributes #4 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.7.0 (trunk 232069) (llvm/trunk 232070)"}
diff --git a/test/CodeGen/WinEH/cppeh-cleanup-invoke.ll b/test/CodeGen/WinEH/cppeh-cleanup-invoke.ll
new file mode 100644
index 000000000000..5a570431510f
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-cleanup-invoke.ll
@@ -0,0 +1,91 @@
+; RUN: opt -winehprepare -S < %s | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+; Modified based on this code:
+; struct HasDtor {
+; ~HasDtor();
+; };
+; extern "C" void may_throw();
+; int main() {
+; try {
+; HasDtor o;
+; may_throw();
+; } catch (int) {
+; }
+; }
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchHandlerType = type { i32, i8* }
+%struct.HasDtor = type { i8 }
+
+$"\01??_R0H@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
+
+define i32 @main() {
+entry:
+ %o = alloca %struct.HasDtor, align 1
+ invoke void @may_throw()
+ to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2: ; preds = %invoke.cont
+ call void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor* %o)
+ br label %try.cont
+
+lpad: ; preds = %entry
+ %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
+ %1 = extractvalue { i8*, i32 } %0, 0
+ %2 = extractvalue { i8*, i32 } %0, 1
+ br label %catch.dispatch
+
+lpad1: ; preds = %invoke.cont
+ %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
+ %4 = extractvalue { i8*, i32 } %3, 0
+ %5 = extractvalue { i8*, i32 } %3, 1
+ invoke void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor* %o)
+ to label %catch.dispatch unwind label %lpad
+
+catch.dispatch: ; preds = %lpad1, %lpad
+ %exn.slot.0 = phi i8* [ %4, %lpad1 ], [ %1, %lpad ]
+ %ehselector.slot.0 = phi i32 [ %5, %lpad1 ], [ %2, %lpad ]
+ %6 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*))
+ %matches = icmp eq i32 %ehselector.slot.0, %6
+ br i1 %matches, label %catch, label %eh.resume
+
+catch: ; preds = %catch.dispatch
+ call void @llvm.eh.begincatch(i8* %exn.slot.0, i8* null)
+ call void @llvm.eh.endcatch()
+ br label %try.cont
+
+try.cont: ; preds = %catch, %invoke.cont2
+ ret i32 0
+
+eh.resume: ; preds = %catch.dispatch
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0
+ %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1
+ resume { i8*, i32 } %lpad.val5
+}
+
+; CHECK-LABEL: define i32 @main()
+; CHECK: @llvm.eh.actions(i32 0, void (i8*, i8*)* @main.cleanup, i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 -1, i8* (i8*, i8*)* @main.catch)
+
+; CHECK-LABEL: define internal void @main.cleanup(i8*, i8*)
+; CHECK: call void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor* %{{.*}})
+; CHECK: ret void
+
+declare void @may_throw()
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor*)
+
+declare i32 @llvm.eh.typeid.for(i8*)
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture)
+declare void @llvm.eh.endcatch()
diff --git a/test/CodeGen/WinEH/cppeh-demote-liveout.ll b/test/CodeGen/WinEH/cppeh-demote-liveout.ll
new file mode 100644
index 000000000000..48d9b39ca64a
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-demote-liveout.ll
@@ -0,0 +1,72 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S < %s | FileCheck %s
+
+; Notionally based on this C++ source:
+; int liveout_catch(int p) {
+; int val = p + 1;
+; try {
+; might_throw();
+; } catch (int) {
+; val++;
+; }
+; return val;
+; }
+
+declare void @llvm.eh.begincatch(i8*, i8*)
+declare void @llvm.eh.endcatch()
+declare void @might_throw()
+declare i32 @__CxxFrameHandler3(...)
+declare i32 @llvm.eh.typeid.for(i8*)
+
+@typeinfo.int = external global i32
+
+define i32 @liveout_catch(i32 %p) {
+entry:
+ %val.entry = add i32 %p, 1
+ invoke void @might_throw()
+ to label %ret unwind label %lpad
+
+lpad:
+ %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__CxxFrameHandler3
+ cleanup
+ catch i32* @typeinfo.int
+ %ehptr = extractvalue { i8*, i32 } %ehvals, 0
+ %sel = extractvalue { i8*, i32 } %ehvals, 1
+ %int_sel = call i32 @llvm.eh.typeid.for(i8* bitcast (i32* @typeinfo.int to i8*))
+ %match = icmp eq i32 %sel, %int_sel
+ br i1 %match, label %catchit, label %resume
+
+catchit:
+ call void @llvm.eh.begincatch(i8* %ehptr, i8* null)
+ %val.lpad = add i32 %val.entry, 1
+ call void @llvm.eh.endcatch()
+ br label %ret
+
+ret:
+ %rv = phi i32 [%val.entry, %entry], [%val.lpad, %catchit]
+ ret i32 %rv
+
+resume:
+ resume {i8*, i32} %ehvals
+}
+
+; CHECK-LABEL: define i32 @liveout_catch(i32 %p)
+; CHECK: %val.entry = add i32 %p, 1
+; CHECK-NEXT: store i32 %val.entry, i32* %val.entry.reg2mem
+; CHECK: invoke void @might_throw()
+;
+; CHECK: landingpad
+; CHECK: indirectbr i8* {{.*}}, [label %catchit.split]
+;
+; CHECK: catchit.split:
+; CHECK: load i32, i32* %val.lpad.reg2mem
+; CHECK: br label %ret
+;
+; CHECK: ret:
+; CHECK: %rv = phi i32 [ {{.*}}, %entry ], [ {{.*}}, %catchit.split ]
+; CHECK: ret i32
+
+; CHECK-LABEL: define internal i8* @liveout_catch.catch(i8*, i8*)
+; CHECK: %[[val:[^ ]*]] = load i32, i32*
+; CHECK-NEXT: %[[val_lpad:[^ ]*]] = add i32 %[[val]], 1
+; CHECK-NEXT: store i32 %[[val_lpad]], i32*
+; CHECK: ret i8* blockaddress(@liveout_catch, %catchit.split)
diff --git a/test/CodeGen/WinEH/cppeh-frame-vars.ll b/test/CodeGen/WinEH/cppeh-frame-vars.ll
new file mode 100644
index 000000000000..eeda4319a6e6
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-frame-vars.ll
@@ -0,0 +1,272 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
+
+; This test is based on the following code:
+;
+; struct SomeData {
+; int a;
+; int b;
+; };
+;
+; void may_throw();
+; void does_not_throw(int i);
+; void dump(int *, int, SomeData&);
+;
+; void test() {
+; int NumExceptions = 0;
+; int ExceptionVal[10];
+; SomeData Data = { 0, 0 };
+;
+; for (int i = 0; i < 10; ++i) {
+; try {
+; may_throw();
+; Data.a += i;
+; }
+; catch (int e) {
+; ExceptionVal[NumExceptions] = e;
+; ++NumExceptions;
+; if (e == i)
+; Data.b += e;
+; else
+; Data.a += e;
+; }
+; does_not_throw(NumExceptions);
+; }
+; dump(ExceptionVal, NumExceptions, Data);
+; }
+
+; ModuleID = 'cppeh-frame-vars.cpp'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%struct.SomeData = type { i32, i32 }
+
+$"\01??_R0H@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+
+; The function entry should be rewritten like this.
+; CHECK: define void @"\01?test@@YAXXZ"()
+; CHECK: entry:
+; CHECK: [[NUMEXCEPTIONS_PTR:\%.+]] = alloca i32, align 4
+; CHECK: [[EXCEPTIONVAL_PTR:\%.+]] = alloca [10 x i32], align 16
+; CHECK: [[DATA_PTR:\%.+]] = alloca %struct.SomeData, align 4
+; CHECK: [[I_PTR:\%.+]] = alloca i32, align 4
+; CHECK: [[E_PTR:\%.+]] = alloca i32, align 4
+; CHECK: store i32 0, i32* [[NUMEXCEPTIONS_PTR]], align 4
+; CHECK: [[TMP:\%.+]] = bitcast %struct.SomeData* [[DATA_PTR]] to i8*
+; CHECK: call void @llvm.memset(i8* [[TMP]], i8 0, i64 8, i32 4, i1 false)
+; CHECK: store i32 0, i32* [[I_PTR]], align 4
+; CHECK: call void (...) @llvm.frameescape(i32* [[E_PTR]], i32* [[NUMEXCEPTIONS_PTR]], [10 x i32]* [[EXCEPTIONVAL_PTR]], i32* [[I_PTR]], %struct.SomeData* [[DATA_PTR]])
+; CHECK: br label %for.cond
+
+; Function Attrs: uwtable
+define void @"\01?test@@YAXXZ"() #0 {
+entry:
+ %NumExceptions = alloca i32, align 4
+ %ExceptionVal = alloca [10 x i32], align 16
+ %Data = alloca %struct.SomeData, align 4
+ %i = alloca i32, align 4
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ %e = alloca i32, align 4
+ store i32 0, i32* %NumExceptions, align 4
+ %tmp = bitcast %struct.SomeData* %Data to i8*
+ call void @llvm.memset(i8* %tmp, i8 0, i64 8, i32 4, i1 false)
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %tmp1 = load i32, i32* %i, align 4
+ %cmp = icmp slt i32 %tmp1, 10
+ br i1 %cmp, label %for.body, label %for.end
+
+; CHECK: for.body:
+; CHECK: invoke void @"\01?may_throw@@YAXXZ"()
+; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
+
+for.body: ; preds = %for.cond
+ invoke void @"\01?may_throw@@YAXXZ"()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %for.body
+ %tmp2 = load i32, i32* %i, align 4
+ %a = getelementptr inbounds %struct.SomeData, %struct.SomeData* %Data, i32 0, i32 0
+ %tmp3 = load i32, i32* %a, align 4
+ %add = add nsw i32 %tmp3, %tmp2
+ store i32 %add, i32* %a, align 4
+ br label %try.cont
+
+; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %for.body
+; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK-NEXT: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
+; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
+; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %try.cont]
+
+lpad: ; preds = %for.body
+ %tmp4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
+ %tmp5 = extractvalue { i8*, i32 } %tmp4, 0
+ store i8* %tmp5, i8** %exn.slot
+ %tmp6 = extractvalue { i8*, i32 } %tmp4, 1
+ store i32 %tmp6, i32* %ehselector.slot
+ br label %catch.dispatch
+
+; CHECK-NOT: catch.dispatch:
+
+catch.dispatch: ; preds = %lpad
+ %sel = load i32, i32* %ehselector.slot
+ %tmp7 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #1
+ %matches = icmp eq i32 %sel, %tmp7
+ br i1 %matches, label %catch, label %eh.resume
+
+; CHECK-NOT: catch:
+
+catch: ; preds = %catch.dispatch
+ %exn = load i8*, i8** %exn.slot
+ %e.i8 = bitcast i32* %e to i8*
+ call void @llvm.eh.begincatch(i8* %exn, i8* %e.i8) #1
+ %tmp11 = load i32, i32* %e, align 4
+ %tmp12 = load i32, i32* %NumExceptions, align 4
+ %idxprom = sext i32 %tmp12 to i64
+ %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %ExceptionVal, i32 0, i64 %idxprom
+ store i32 %tmp11, i32* %arrayidx, align 4
+ %tmp13 = load i32, i32* %NumExceptions, align 4
+ %inc = add nsw i32 %tmp13, 1
+ store i32 %inc, i32* %NumExceptions, align 4
+ %tmp14 = load i32, i32* %e, align 4
+ %tmp15 = load i32, i32* %i, align 4
+ %cmp1 = icmp eq i32 %tmp14, %tmp15
+ br i1 %cmp1, label %if.then, label %if.else
+
+; CHECK-NOT: if.then:
+
+if.then: ; preds = %catch
+ %tmp16 = load i32, i32* %e, align 4
+ %b = getelementptr inbounds %struct.SomeData, %struct.SomeData* %Data, i32 0, i32 1
+ %tmp17 = load i32, i32* %b, align 4
+ %add2 = add nsw i32 %tmp17, %tmp16
+ store i32 %add2, i32* %b, align 4
+ br label %if.end
+
+; CHECK-NOT: if.else:
+
+if.else: ; preds = %catch
+ %tmp18 = load i32, i32* %e, align 4
+ %a3 = getelementptr inbounds %struct.SomeData, %struct.SomeData* %Data, i32 0, i32 0
+ %tmp19 = load i32, i32* %a3, align 4
+ %add4 = add nsw i32 %tmp19, %tmp18
+ store i32 %add4, i32* %a3, align 4
+ br label %if.end
+
+; CHECK-NOT: if.end:
+
+if.end: ; preds = %if.else, %if.then
+ call void @llvm.eh.endcatch() #1
+ br label %try.cont
+
+try.cont: ; preds = %if.end, %invoke.cont
+ %tmp20 = load i32, i32* %NumExceptions, align 4
+ call void @"\01?does_not_throw@@YAXH@Z"(i32 %tmp20)
+ br label %for.inc
+
+for.inc: ; preds = %try.cont
+ %tmp21 = load i32, i32* %i, align 4
+ %inc5 = add nsw i32 %tmp21, 1
+ store i32 %inc5, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ %tmp22 = load i32, i32* %NumExceptions, align 4
+ %arraydecay = getelementptr inbounds [10 x i32], [10 x i32]* %ExceptionVal, i32 0, i32 0
+ call void @"\01?dump@@YAXPEAHHAEAUSomeData@@@Z"(i32* %arraydecay, i32 %tmp22, %struct.SomeData* dereferenceable(8) %Data)
+ ret void
+
+; CHECK-NOT: eh.resume:
+
+eh.resume: ; preds = %catch.dispatch
+ %exn6 = load i8*, i8** %exn.slot
+ %sel7 = load i32, i32* %ehselector.slot
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn6, 0
+ %lpad.val8 = insertvalue { i8*, i32 } %lpad.val, i32 %sel7, 1
+ resume { i8*, i32 } %lpad.val8
+
+; CHECK: }
+}
+
+; The following catch handler should be outlined.
+; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_E:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
+; CHECK: [[E_PTR1:\%.+]] = bitcast i8* [[RECOVER_E]] to i32*
+; CHECK: [[RECOVER_NUMEXCEPTIONS:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
+; CHECK: [[NUMEXCEPTIONS_PTR1:\%.+]] = bitcast i8* [[RECOVER_NUMEXCEPTIONS]] to i32*
+; CHECK: [[RECOVER_EXCEPTIONVAL:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
+; CHECK: [[EXCEPTIONVAL_PTR1:\%.+]] = bitcast i8* [[RECOVER_EXCEPTIONVAL]] to [10 x i32]*
+; CHECK: [[RECOVER_I:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 3)
+; CHECK: [[I_PTR1:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
+; CHECK: [[RECOVER_DATA:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 4)
+; CHECK: [[DATA_PTR1:\%.+]] = bitcast i8* [[RECOVER_DATA]] to %struct.SomeData*
+; CHECK: [[TMP:\%.+]] = load i32, i32* [[E_PTR1]], align 4
+; CHECK: [[TMP1:\%.+]] = load i32, i32* [[NUMEXCEPTIONS_PTR]], align 4
+; CHECK: [[IDXPROM:\%.+]] = sext i32 [[TMP1]] to i64
+; CHECK: [[ARRAYIDX:\%.+]] = getelementptr inbounds [10 x i32], [10 x i32]* [[EXCEPTIONVAL_PTR1]], i32 0, i64 [[IDXPROM]]
+; CHECK: store i32 [[TMP]], i32* [[ARRAYIDX]], align 4
+; CHECK: [[TMP2:\%.+]] = load i32, i32* [[NUMEXCEPTIONS_PTR1]], align 4
+; CHECK: [[INC:\%.+]] = add nsw i32 [[TMP2]], 1
+; CHECK: store i32 [[INC]], i32* [[NUMEXCEPTIONS_PTR]], align 4
+; CHECK: [[TMP3:\%.+]] = load i32, i32* [[E_PTR1]], align 4
+; CHECK: [[TMP4:\%.+]] = load i32, i32* [[I_PTR1]], align 4
+; CHECK: [[CMP:\%.+]] = icmp eq i32 [[TMP3]], [[TMP4]]
+; CHECK: br i1 [[CMP]], label %if.then, label %if.else
+;
+; CHECK: if.then: ; preds = %entry
+; CHECK: [[TMP5:\%.+]] = load i32, i32* [[E_PTR1]], align 4
+; CHECK: [[B_PTR:\%.+]] = getelementptr inbounds %struct.SomeData, %struct.SomeData* [[DATA_PTR1]], i32 0, i32 1
+; CHECK: [[TMP6:\%.+]] = load i32, i32* [[B_PTR]], align 4
+; CHECK: %add2 = add nsw i32 [[TMP6]], [[TMP5]]
+; CHECK: store i32 [[ADD:\%.+]], i32* [[B_PTR]], align 4
+; CHECK: br label %if.end
+;
+; CHECK: if.else: ; preds = %entry
+; CHECK: [[TMP7:\%.+]] = load i32, i32* %e, align 4
+; CHECK: [[A3:\%.+]] = getelementptr inbounds %struct.SomeData, %struct.SomeData* %Data, i32 0, i32 0
+; CHECK: [[TMP8:\%.+]] = load i32, i32* %a3, align 4
+; CHECK: [[ADD1:\%.+]] = add nsw i32 [[TMP8]], [[TMP7]]
+; CHECK: store i32 [[ADD1]], i32* [[A3]], align 4
+; CHECK: br label %if.end
+;
+; CHECK: if.end: ; preds = %if.else, %if.then
+; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont)
+; CHECK: }
+
+
+; Function Attrs: nounwind
+declare void @llvm.memset(i8* nocapture, i8, i64, i32, i1) #1
+
+declare void @"\01?may_throw@@YAXXZ"() #2
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #3
+
+declare void @llvm.eh.begincatch(i8*, i8*)
+
+declare void @llvm.eh.endcatch()
+
+declare void @"\01?does_not_throw@@YAXH@Z"(i32) #2
+
+declare void @"\01?dump@@YAXPEAHHAEAUSomeData@@@Z"(i32*, i32, %struct.SomeData* dereferenceable(8)) #2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind readnone }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.7.0 (trunk 228868)"}
diff --git a/test/CodeGen/WinEH/cppeh-inalloca.ll b/test/CodeGen/WinEH/cppeh-inalloca.ll
new file mode 100644
index 000000000000..13471b8661a3
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-inalloca.ll
@@ -0,0 +1,194 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
+
+; This test is built from the following code:
+; struct A {
+; A(int a);
+; A(const A &o);
+; ~A();
+; int a;
+; };
+;
+; void may_throw();
+;
+; int test(A a) {
+; try {
+; may_throw();
+; }
+; catch (int e) {
+; return a.a + e;
+; }
+; return 0;
+; }
+;
+; The test was built for a 32-bit Windows target and then the reference to
+; the inalloca instruction was manually sunk into the landingpad.
+
+; ModuleID = 'cppeh-inalloca.cpp'
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%struct.A = type { i32 }
+
+$"\01??_R0H@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+
+; The function entry should be rewritten like this.
+; CHECK: define i32 @"\01?test@@YAHUA@@@Z"(<{ %struct.A }>* inalloca)
+; CHECK: entry:
+; CHECK: [[TMP_REGMEM:\%.+]] = alloca <{ %struct.A }>*
+; CHECK: store <{ %struct.A }>* %0, <{ %struct.A }>** [[TMP_REGMEM]]
+; CHECK: [[RETVAL:\%.+]] = alloca i32, align 4
+; CHECK: [[E_PTR:\%.+]] = alloca i32, align 4
+; CHECK: [[CLEANUP_SLOT:\%.+]] = alloca i32
+; CHECK: call void (...) @llvm.frameescape(i32* %e, <{ %struct.A }>** [[TMP_REGMEM]], i32* [[RETVAL]], i32* [[CLEANUP_SLOT]])
+; CHECK: invoke void @"\01?may_throw@@YAXXZ"()
+; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
+
+define i32 @"\01?test@@YAHUA@@@Z"(<{ %struct.A }>* inalloca) #0 {
+entry:
+ %retval = alloca i32, align 4
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ %e = alloca i32, align 4
+ %cleanup.dest.slot = alloca i32
+ invoke void @"\01?may_throw@@YAXXZ"()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %entry
+ br label %try.cont
+
+; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
+; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
+; CHECK-NEXT: [[RECOVER:\%recover.*]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAHUA@@@Z.catch", i32 0, void (i8*, i8*)* @"\01?test@@YAHUA@@@Z.cleanup")
+; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %cleanup]
+
+lpad: ; preds = %entry
+ %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
+ %2 = extractvalue { i8*, i32 } %1, 0
+ store i8* %2, i8** %exn.slot
+ %3 = extractvalue { i8*, i32 } %1, 1
+ store i32 %3, i32* %ehselector.slot
+ br label %catch.dispatch
+
+; CHECK-NOT: catch.dispatch:
+
+catch.dispatch: ; preds = %lpad
+ %sel = load i32, i32* %ehselector.slot
+ %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #3
+ %matches = icmp eq i32 %sel, %4
+ br i1 %matches, label %catch, label %ehcleanup
+
+; CHECK-NOT: catch:
+
+catch: ; preds = %catch.dispatch
+ %exn = load i8*, i8** %exn.slot
+ %e.i8 = bitcast i32* %e to i8*
+ call void @llvm.eh.begincatch(i8* %exn, i8* %e.i8) #3
+ %a = getelementptr inbounds <{ %struct.A }>, <{ %struct.A }>* %0, i32 0, i32 0
+ %a1 = getelementptr inbounds %struct.A, %struct.A* %a, i32 0, i32 0
+ %tmp8 = load i32, i32* %a1, align 4
+ %tmp9 = load i32, i32* %e, align 4
+ %add = add nsw i32 %tmp8, %tmp9
+ store i32 %add, i32* %retval
+ store i32 1, i32* %cleanup.dest.slot
+ call void @llvm.eh.endcatch() #3
+ br label %cleanup
+
+try.cont: ; preds = %invoke.cont
+ store i32 0, i32* %retval
+ store i32 1, i32* %cleanup.dest.slot
+ br label %cleanup
+
+; The cleanup block should be re-written like this.
+; CHECK: cleanup:{{[ ]+}}; preds = %[[LPAD_LABEL]], %try.cont
+; CHECK: %a2 = getelementptr inbounds <{ %struct.A }>, <{ %struct.A }>* %0, i32 0, i32 0
+; CHECK: call x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* %a2)
+; CHECK: [[TMP1:\%.+]] = load i32, i32* [[RETVAL]]
+; CHECK: ret i32 [[TMP1]]
+
+cleanup: ; preds = %try.cont, %catch
+ %a2 = getelementptr inbounds <{ %struct.A }>, <{ %struct.A }>* %0, i32 0, i32 0
+ call x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* %a2) #3
+ %tmp10 = load i32, i32* %retval
+ ret i32 %tmp10
+
+; CHECK-NOT: ehcleanup:
+
+ehcleanup: ; preds = %catch.dispatch
+ %a3 = getelementptr inbounds <{ %struct.A }>, <{ %struct.A }>* %0, i32 0, i32 0
+ call x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* %a3) #3
+ br label %eh.resume
+
+; CHECK-NOT: eh.resume:
+
+eh.resume: ; preds = %ehcleanup
+ %exn2 = load i8*, i8** %exn.slot
+ %sel3 = load i32, i32* %ehselector.slot
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn2, 0
+ %lpad.val4 = insertvalue { i8*, i32 } %lpad.val, i32 %sel3, 1
+ resume { i8*, i32 } %lpad.val4
+
+; CHECK: }
+}
+
+; The following catch handler should be outlined.
+; CHECK: define internal i8* @"\01?test@@YAHUA@@@Z.catch"(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_E:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (i32 (<{ %struct.A }>*)* @"\01?test@@YAHUA@@@Z" to i8*), i8* %1, i32 0)
+; CHECK: [[E_PTR:\%.+]] = bitcast i8* [[RECOVER_E]] to i32*
+; CHECK: [[RECOVER_EH_TEMP:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (i32 (<{ %struct.A }>*)* @"\01?test@@YAHUA@@@Z" to i8*), i8* %1, i32 1)
+; CHECK: [[EH_TEMP:\%.+]] = bitcast i8* [[RECOVER_EH_TEMP]] to <{ %struct.A }>**
+; CHECK: [[RECOVER_RETVAL:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (i32 (<{ %struct.A }>*)* @"\01?test@@YAHUA@@@Z" to i8*), i8* %1, i32 2)
+; CHECK: [[RETVAL1:\%.+]] = bitcast i8* [[RECOVER_RETVAL]] to i32*
+; CHECK: [[RECOVER_CLEANUPSLOT:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (i32 (<{ %struct.A }>*)* @"\01?test@@YAHUA@@@Z" to i8*), i8* %1, i32 3)
+; CHECK: [[CLEANUPSLOT1:\%.+]] = bitcast i8* [[RECOVER_CLEANUPSLOT]] to i32*
+; CHECK: [[E_I8PTR:\%.+]] = bitcast i32* [[E_PTR]] to i8*
+; CHECK: [[TMP_RELOAD:\%.+]] = load <{ %struct.A }>*, <{ %struct.A }>** [[EH_TEMP]]
+; CHECK: [[RECOVER_A:\%.+]] = getelementptr inbounds <{ %struct.A }>, <{ %struct.A }>* [[TMP_RELOAD]], i32 0, i32 0
+; CHECK: [[A1:\%.+]] = getelementptr inbounds %struct.A, %struct.A* [[RECOVER_A]], i32 0, i32 0
+; CHECK: [[TMP2:\%.+]] = load i32, i32* [[A1]], align 4
+; CHECK: [[TMP3:\%.+]] = load i32, i32* [[E_PTR]], align 4
+; CHECK: [[ADD:\%.+]] = add nsw i32 [[TMP2]], [[TMP3]]
+; CHECK: store i32 [[ADD]], i32* [[RETVAL1]]
+; CHECK: store i32 1, i32* [[CLEANUPSLOT1]]
+; CHECK: ret i8* blockaddress(@"\01?test@@YAHUA@@@Z", %cleanup)
+; CHECK: }
+
+; The following cleanup handler should be outlined.
+; CHECK: define internal void @"\01?test@@YAHUA@@@Z.cleanup"(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_EH_TEMP1:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (i32 (<{ %struct.A }>*)* @"\01?test@@YAHUA@@@Z" to i8*), i8* %1, i32 1)
+; CHECK: [[EH_TEMP1:\%.+]] = bitcast i8* [[RECOVER_EH_TEMP]] to <{ %struct.A }>**
+; CHECK: [[TMP_RELOAD1:\%.+]] = load <{ %struct.A }>*, <{ %struct.A }>** [[EH_TEMP1]]
+; CHECK: [[A3:\%.+]] = getelementptr inbounds <{ %struct.A }>, <{ %struct.A }>* [[TMP_RELOAD1]], i32 0, i32 0
+; CHECK: call x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* [[A3]])
+; CHECK: ret void
+; CHECK: }
+
+declare void @"\01?may_throw@@YAXXZ"() #0
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #1
+
+declare void @llvm.eh.begincatch(i8*, i8*)
+
+declare void @llvm.eh.endcatch()
+
+; Function Attrs: nounwind
+declare x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A*) #2
+
+attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 228868)"}
diff --git a/test/CodeGen/WinEH/cppeh-min-unwind.ll b/test/CodeGen/WinEH/cppeh-min-unwind.ll
new file mode 100644
index 000000000000..3fffa47a081b
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-min-unwind.ll
@@ -0,0 +1,99 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
+
+; This test was generated from the following source:
+;
+; class SomeClass {
+; public:
+; SomeClass();
+; ~SomeClass();
+; };
+;
+; void test() {
+; SomeClass obj;
+; may_throw();
+; }
+
+
+; ModuleID = 'min-unwind.cpp'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%class.SomeClass = type { [28 x i32] }
+
+; The function entry should be rewritten like this.
+; CHECK: define void @_Z4testv()
+; CHECK: entry:
+; CHECK: [[OBJ_PTR:\%.+]] = alloca %class.SomeClass, align 4
+; CHECK: call void @_ZN9SomeClassC1Ev(%class.SomeClass* [[OBJ_PTR]])
+; CHECK: call void (...) @llvm.frameescape(%class.SomeClass* [[OBJ_PTR]])
+; CHECK: invoke void @_Z9may_throwv()
+; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
+
+; Function Attrs: uwtable
+define void @_Z4testv() #0 {
+entry:
+ %obj = alloca %class.SomeClass, align 4
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ call void @_ZN9SomeClassC1Ev(%class.SomeClass* %obj)
+ invoke void @_Z9may_throwv()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %entry
+ call void @_ZN9SomeClassD1Ev(%class.SomeClass* %obj)
+ ret void
+
+; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
+; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @_Z4testv.cleanup)
+; CHECK-NEXT: indirectbr i8* [[RECOVER]], []
+
+lpad: ; preds = %entry
+ %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ %tmp1 = extractvalue { i8*, i32 } %tmp, 0
+ store i8* %tmp1, i8** %exn.slot
+ %tmp2 = extractvalue { i8*, i32 } %tmp, 1
+ store i32 %tmp2, i32* %ehselector.slot
+ call void @_ZN9SomeClassD1Ev(%class.SomeClass* %obj)
+ br label %eh.resume
+
+; CHECK-NOT: eh.resume:
+
+eh.resume: ; preds = %lpad
+ %exn = load i8*, i8** %exn.slot
+ %sel = load i32, i32* %ehselector.slot
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
+ %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
+ resume { i8*, i32 } %lpad.val2
+
+; CHECK: }
+}
+
+; This cleanup handler should be outlined.
+; CHECK: define internal void @_Z4testv.cleanup(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_OBJ:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 0)
+; CHECK: [[OBJ_PTR1:\%.+]] = bitcast i8* [[RECOVER_OBJ]] to %class.SomeClass*
+; CHECK: call void @_ZN9SomeClassD1Ev(%class.SomeClass* [[OBJ_PTR1]])
+; CHECK: ret void
+; CHECK: }
+
+declare void @_ZN9SomeClassC1Ev(%class.SomeClass*) #1
+
+declare void @_Z9may_throwv() #1
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @_ZN9SomeClassD1Ev(%class.SomeClass*) #1
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { noinline noreturn nounwind }
+attributes #3 = { noreturn nounwind }
+attributes #4 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 226027)"}
diff --git a/test/CodeGen/WinEH/cppeh-mixed-catch-and-cleanup.ll b/test/CodeGen/WinEH/cppeh-mixed-catch-and-cleanup.ll
new file mode 100644
index 000000000000..52f613276d54
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-mixed-catch-and-cleanup.ll
@@ -0,0 +1,106 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
+
+; This test is based on the following code:
+;
+; void test()
+; {
+; try {
+; Obj o;
+; may_throw();
+; } catch (...) {
+; }
+; }
+;
+; The purpose of this test is to verify that we create separate catch and
+; cleanup handlers. When compiling for the C++ 11 standard, this isn't
+; strictly necessary, since calling the destructor from the catch handler
+; would be logically equivalent to calling it from a cleanup handler.
+; However, if the -std=c++98 option is used, an exception in the cleanup
+; code should terminate the process (the MSVCRT runtime will do that) but
+; if the destructor is called from the catch handler, it wouldn't terminate
+; the process
+
+
+; ModuleID = 'cppeh-mixed-catch-and-cleanup.cpp'
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%class.Obj = type { i8 }
+
+; This just verifies that the function was processed by WinEHPrepare.
+;
+; CHECK-LABEL: define void @"\01?test@@YAXXZ"()
+; CHECK: entry:
+; CHECK: call void (...) @llvm.frameescape
+; CHECK: }
+
+; Function Attrs: nounwind uwtable
+define void @"\01?test@@YAXXZ"() #0 {
+entry:
+ %o = alloca %class.Obj, align 1
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ invoke void @"\01?may_throw@@YAXXZ"()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %entry
+ call void @"\01??1Obj@@QEAA@XZ"(%class.Obj* %o) #3
+ br label %try.cont
+
+lpad: ; preds = %entry
+ %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* null
+ %1 = extractvalue { i8*, i32 } %0, 0
+ store i8* %1, i8** %exn.slot
+ %2 = extractvalue { i8*, i32 } %0, 1
+ store i32 %2, i32* %ehselector.slot
+ call void @"\01??1Obj@@QEAA@XZ"(%class.Obj* %o) #3
+ %exn = load i8*, i8** %exn.slot
+ call void @llvm.eh.begincatch(i8* %exn, i8* null) #3
+ call void @llvm.eh.endcatch() #3
+ br label %try.cont
+
+try.cont: ; preds = %catch, %invoke.cont
+ ret void
+}
+
+; Verify that a cleanup handler was created and that it calls ~Obj().
+; CHECK-LABEL: define internal void @"\01?test@@YAXXZ.cleanup"(i8*, i8*)
+; CHECK: entry:
+; CHECK: @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
+; CHECK: call void @"\01??1Obj@@QEAA@XZ"
+; CHECK: ret void
+; CHECK: }
+
+; Verify that a catch handler was created and that it does not call ~Obj().
+; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
+; CHECK: entry:
+; CHECK-NOT: call void @"\01??1Obj@@QEAA@XZ"
+; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont)
+; CHECK: }
+
+
+
+declare void @"\01?may_throw@@YAXXZ"() #1
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind
+declare void @"\01??1Obj@@QEAA@XZ"(%class.Obj*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
+
+; Function Attrs: nounwind
+declare void @llvm.eh.endcatch() #3
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.7.0 (trunk 235779) (llvm/trunk 235769)"}
diff --git a/test/CodeGen/WinEH/cppeh-multi-catch.ll b/test/CodeGen/WinEH/cppeh-multi-catch.ll
new file mode 100644
index 000000000000..28340c60ad1e
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-multi-catch.ll
@@ -0,0 +1,226 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
+
+; This test is based on the following code:
+;
+; void test()
+; {
+; try {
+; may_throw();
+; } catch (int i) {
+; handle_int(i);
+; } catch (long long ll) {
+; handle_long_long(ll);
+; } catch (SomeClass &obj) {
+; handle_obj(&obj);
+; } catch (...) {
+; handle_exception();
+; }
+; }
+;
+; The catch handlers were edited to insert 'ret void' after the endcatch call.
+
+; ModuleID = 'catch-with-type.cpp'
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.HandlerMapEntry = type { i32, i32 }
+%rtti.TypeDescriptor3 = type { i8**, i8*, [4 x i8] }
+%rtti.TypeDescriptor15 = type { i8**, i8*, [16 x i8] }
+%class.SomeClass = type { i8 }
+
+$"\01??_R0H@8" = comdat any
+
+$"\01??_R0_J@8" = comdat any
+
+$"\01??_R0?AVSomeClass@@@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@__ImageBase = external constant i8
+@llvm.eh.handlermapentry.H = private unnamed_addr constant %eh.HandlerMapEntry { i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section "llvm.metadata"
+@"\01??_R0_J@8" = linkonce_odr global %rtti.TypeDescriptor3 { i8** @"\01??_7type_info@@6B@", i8* null, [4 x i8] c"._J\00" }, comdat
+@llvm.eh.handlermapentry._J = private unnamed_addr constant %eh.HandlerMapEntry { i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor3* @"\01??_R0_J@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section "llvm.metadata"
+@"\01??_R0?AVSomeClass@@@8" = linkonce_odr global %rtti.TypeDescriptor15 { i8** @"\01??_7type_info@@6B@", i8* null, [16 x i8] c".?AVSomeClass@@\00" }, comdat
+@"llvm.eh.handlermapentry.reference.?AVSomeClass@@" = private unnamed_addr constant %eh.HandlerMapEntry { i32 8, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor15* @"\01??_R0?AVSomeClass@@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section "llvm.metadata"
+
+
+; CHECK: define void @"\01?test@@YAXXZ"() #0 {
+; CHECK: entry:
+; CHECK: [[OBJ_PTR:\%.+]] = alloca %class.SomeClass*, align 8
+; CHECK: [[LL_PTR:\%.+]] = alloca i64, align 8
+; CHECK: [[I_PTR:\%.+]] = alloca i32, align 4
+; CHECK: call void (...) @llvm.frameescape(i32* [[I_PTR]], i64* [[LL_PTR]], %class.SomeClass** [[OBJ_PTR]])
+; CHECK: invoke void @"\01?may_throw@@YAXXZ"()
+; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
+
+; Function Attrs: uwtable
+define void @"\01?test@@YAXXZ"() #0 {
+entry:
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ %obj = alloca %class.SomeClass*, align 8
+ %ll = alloca i64, align 8
+ %i = alloca i32, align 4
+ invoke void @"\01?may_throw@@YAXXZ"()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %entry
+ br label %try.cont
+
+; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
+; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK-NEXT: catch %eh.HandlerMapEntry* @llvm.eh.handlermapentry.H
+; CHECK-NEXT: catch %eh.HandlerMapEntry* @llvm.eh.handlermapentry._J
+; CHECK-NEXT: catch %eh.HandlerMapEntry* @"llvm.eh.handlermapentry.reference.?AVSomeClass@@"
+; CHECK-NEXT: catch i8* null
+; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(
+; CHECK-SAME: i32 1, i8* bitcast (%eh.HandlerMapEntry* @llvm.eh.handlermapentry.H to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch",
+; CHECK-SAME: i32 1, i8* bitcast (%eh.HandlerMapEntry* @llvm.eh.handlermapentry._J to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1",
+; CHECK-SAME: i32 1, i8* bitcast (%eh.HandlerMapEntry* @"llvm.eh.handlermapentry.reference.?AVSomeClass@@" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.2",
+; CHECK-SAME: i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.3")
+; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %ret]
+
+lpad: ; preds = %entry
+ %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch %eh.HandlerMapEntry* @llvm.eh.handlermapentry.H
+ catch %eh.HandlerMapEntry* @llvm.eh.handlermapentry._J
+ catch %eh.HandlerMapEntry* @"llvm.eh.handlermapentry.reference.?AVSomeClass@@"
+ catch i8* null
+ %1 = extractvalue { i8*, i32 } %0, 0
+ store i8* %1, i8** %exn.slot
+ %2 = extractvalue { i8*, i32 } %0, 1
+ store i32 %2, i32* %ehselector.slot
+ br label %catch.dispatch
+
+; CHECK-NOT: catch.dispatch:
+catch.dispatch: ; preds = %lpad
+ %sel = load i32, i32* %ehselector.slot
+ %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.HandlerMapEntry* @llvm.eh.handlermapentry.H to i8*)) #3
+ %matches = icmp eq i32 %sel, %3
+ br i1 %matches, label %catch14, label %catch.fallthrough
+
+ret:
+ ret void
+
+; CHECK-NOT: catch14:
+; CHECK: ret:
+; CHECK-NEXT: ret void
+catch14: ; preds = %catch.dispatch
+ %exn15 = load i8*, i8** %exn.slot
+ %4 = bitcast i32* %i to i8*
+ call void @llvm.eh.begincatch(i8* %exn15, i8* %4) #3
+ %5 = load i32, i32* %i, align 4
+ call void @"\01?handle_int@@YAXH@Z"(i32 %5)
+ call void @llvm.eh.endcatch() #3
+ br label %ret
+
+try.cont: ; preds = %invoke.cont
+ br label %ret
+
+; CHECK-NOT: catch.fallthrough:
+catch.fallthrough: ; preds = %catch.dispatch
+ %6 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.HandlerMapEntry* @llvm.eh.handlermapentry._J to i8*)) #3
+ %matches1 = icmp eq i32 %sel, %6
+ br i1 %matches1, label %catch10, label %catch.fallthrough2
+
+; CHECK-NOT: catch10:
+catch10: ; preds = %catch.fallthrough
+ %exn11 = load i8*, i8** %exn.slot
+ %7 = bitcast i64* %ll to i8*
+ call void @llvm.eh.begincatch(i8* %exn11, i8* %7) #3
+ %8 = load i64, i64* %ll, align 8
+ call void @"\01?handle_long_long@@YAX_J@Z"(i64 %8)
+ call void @llvm.eh.endcatch() #3
+ br label %ret
+
+; CHECK-NOT: catch.fallthrough2:
+catch.fallthrough2: ; preds = %catch.fallthrough
+ %9 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.HandlerMapEntry* @"llvm.eh.handlermapentry.reference.?AVSomeClass@@" to i8*)) #3
+ %matches3 = icmp eq i32 %sel, %9
+ br i1 %matches3, label %catch6, label %catch
+
+; CHECK-NOT: catch6:
+catch6: ; preds = %catch.fallthrough2
+ %exn7 = load i8*, i8** %exn.slot
+ %10 = bitcast %class.SomeClass** %obj to i8*
+ call void @llvm.eh.begincatch(i8* %exn7, i8* %10) #3
+ %11 = load %class.SomeClass*, %class.SomeClass** %obj, align 8
+ call void @"\01?handle_obj@@YAXPEAVSomeClass@@@Z"(%class.SomeClass* %11)
+ call void @llvm.eh.endcatch() #3
+ br label %ret
+
+; CHECK-NOT: catch:
+catch: ; preds = %catch.fallthrough2
+ %exn = load i8*, i8** %exn.slot
+ call void @llvm.eh.begincatch(i8* %exn, i8* null) #3
+ call void @"\01?handle_exception@@YAXXZ"() call void @llvm.eh.endcatch() #3
+ br label %ret
+; CHECK: }
+}
+
+; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_I:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
+; CHECK: [[I_PTR:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
+; CHECK: [[TMP1:\%.+]] = load i32, i32* [[I_PTR]], align 4
+; CHECK: call void @"\01?handle_int@@YAXH@Z"(i32 [[TMP1]])
+; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %ret)
+; CHECK: }
+
+; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch.1"(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_LL:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
+; CHECK: [[LL_PTR:\%.+]] = bitcast i8* [[RECOVER_LL]] to i64*
+; CHECK: [[TMP2:\%.+]] = load i64, i64* [[LL_PTR]], align 8
+; CHECK: call void @"\01?handle_long_long@@YAX_J@Z"(i64 [[TMP2]])
+; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %ret)
+; CHECK: }
+
+; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch.2"(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_OBJ:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
+; CHECK: [[OBJ_PTR:\%.+]] = bitcast i8* [[RECOVER_OBJ]] to %class.SomeClass**
+; CHECK: [[TMP3:\%.+]] = load %class.SomeClass*, %class.SomeClass** [[OBJ_PTR]], align 8
+; CHECK: call void @"\01?handle_obj@@YAXPEAVSomeClass@@@Z"(%class.SomeClass* [[TMP3]])
+; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %ret)
+; CHECK: }
+
+; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch.3"(i8*, i8*)
+; CHECK: entry:
+; CHECK: call void @"\01?handle_exception@@YAXXZ"()
+; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %ret)
+; CHECK: }
+
+
+declare void @"\01?may_throw@@YAXXZ"() #1
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
+
+declare void @"\01?handle_exception@@YAXXZ"() #1
+
+; Function Attrs: nounwind
+declare void @llvm.eh.endcatch() #3
+
+declare void @"\01?handle_obj@@YAXPEAVSomeClass@@@Z"(%class.SomeClass*) #1
+
+declare void @"\01?handle_long_long@@YAX_J@Z"(i64) #1
+
+declare void @"\01?handle_int@@YAXH@Z"(i32) #1
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.7.0 (trunk 233155) (llvm/trunk 233153)"}
diff --git a/test/CodeGen/WinEH/cppeh-nested-1.ll b/test/CodeGen/WinEH/cppeh-nested-1.ll
new file mode 100644
index 000000000000..2b13510c5745
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-nested-1.ll
@@ -0,0 +1,194 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
+
+; This test is based on the following code:
+;
+;void test()
+;{
+; try {
+; try {
+; may_throw();
+; } catch (int i) {
+; handle_int(i);
+; }
+; } catch (float f) {
+; handle_float(f);
+; }
+; done();
+;}
+
+; ModuleID = 'cppeh-nested-1.cpp'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+
+$"\01??_R0M@8" = comdat any
+
+$"\01??_R0H@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0M@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".M\00" }, comdat
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+
+; CHECK: define void @"\01?test@@YAXXZ"()
+; CHECK: entry:
+; CHECK: %i = alloca i32, align 4
+; CHECK: %f = alloca float, align 4
+; CHECK: call void (...) @llvm.frameescape(float* %f, i32* %i)
+; CHECK: invoke void @"\01?may_throw@@YAXXZ"()
+; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
+
+; Function Attrs: uwtable
+define void @"\01?test@@YAXXZ"() #0 {
+entry:
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ %i = alloca i32, align 4
+ %f = alloca float, align 4
+ invoke void @"\01?may_throw@@YAXXZ"()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %entry
+ br label %try.cont
+
+; CHECK: [[LPAD_LABEL]]:
+; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
+; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
+; CHECK: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
+; CHECK: indirectbr i8* [[RECOVER]], [label %try.cont, label %try.cont10]
+
+lpad: ; preds = %entry
+ %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
+ catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
+ %1 = extractvalue { i8*, i32 } %0, 0
+ store i8* %1, i8** %exn.slot
+ %2 = extractvalue { i8*, i32 } %0, 1
+ store i32 %2, i32* %ehselector.slot
+ br label %catch.dispatch
+
+; CHECK-NOT: catch.dispatch:
+catch.dispatch: ; preds = %lpad
+ %sel = load i32, i32* %ehselector.slot
+ %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #3
+ %matches = icmp eq i32 %sel, %3
+ br i1 %matches, label %catch, label %catch.dispatch3
+
+; CHECK-NOT: catch:
+catch: ; preds = %catch.dispatch
+ %exn = load i8*, i8** %exn.slot
+ %4 = bitcast i32* %i to i8*
+ call void @llvm.eh.begincatch(i8* %exn, i8* %4) #3
+ %5 = load i32, i32* %i, align 4
+ invoke void @"\01?handle_int@@YAXH@Z"(i32 %5)
+ to label %invoke.cont2 unwind label %lpad1
+
+; CHECK-NOT: invoke.cont2:
+invoke.cont2: ; preds = %catch
+ call void @llvm.eh.endcatch() #3
+ br label %try.cont
+
+try.cont: ; preds = %invoke.cont2, %invoke.cont
+ br label %try.cont10
+
+; CHECK-NOT: lpad1:
+lpad1: ; preds = %catch
+ %6 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
+ %7 = extractvalue { i8*, i32 } %6, 0
+ store i8* %7, i8** %exn.slot
+ %8 = extractvalue { i8*, i32 } %6, 1
+ store i32 %8, i32* %ehselector.slot
+ call void @llvm.eh.endcatch() #3
+ br label %catch.dispatch3
+
+; CHECK-NOT: catch.dispatch3:
+catch.dispatch3: ; preds = %lpad1, %catch.dispatch
+ %sel4 = load i32, i32* %ehselector.slot
+ %9 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)) #3
+ %matches5 = icmp eq i32 %sel4, %9
+ br i1 %matches5, label %catch6, label %eh.resume
+
+; CHECK-NOT: catch6:
+catch6: ; preds = %catch.dispatch3
+ %exn7 = load i8*, i8** %exn.slot
+ %10 = bitcast float* %f to i8*
+ call void @llvm.eh.begincatch(i8* %exn7, i8* %10) #3
+ %11 = load float, float* %f, align 4
+ call void @"\01?handle_float@@YAXM@Z"(float %11)
+ call void @llvm.eh.endcatch() #3
+ br label %try.cont10
+
+try.cont10: ; preds = %catch6, %try.cont
+ call void @"\01?done@@YAXXZ"()
+ ret void
+
+; CHECK-NOT: eh.resume:
+eh.resume: ; %catch.dispatch3
+ %exn11 = load i8*, i8** %exn.slot
+ %sel12 = load i32, i32* %ehselector.slot
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn11, 0
+ %lpad.val13 = insertvalue { i8*, i32 } %lpad.val, i32 %sel12, 1
+ resume { i8*, i32 } %lpad.val13
+; CHECK: }
+}
+
+; CHECK: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_F1:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
+; CHECK: [[F_PTR1:\%.+]] = bitcast i8* [[RECOVER_F1]] to float*
+; CHECK: [[TMP2:\%.+]] = load float, float* [[F_PTR1]], align 4
+; CHECK: call void @"\01?handle_float@@YAXM@Z"(float [[TMP2]])
+; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont10)
+; CHECK: }
+
+; CHECK: define internal i8* @"\01?test@@YAXXZ.catch.1"(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_I:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
+; CHECK: [[I_PTR:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
+; CHECK: [[TMP1:\%.+]] = load i32, i32* [[I_PTR]], align 4
+; CHECK: invoke void @"\01?handle_int@@YAXH@Z"(i32 [[TMP1]])
+; CHECK: to label %invoke.cont2 unwind label %[[LPAD1_LABEL:lpad[0-9]*]]
+;
+; CHECK: invoke.cont2:
+; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont)
+;
+; CHECK: [[LPAD1_LABEL]]:{{[ ]+}}; preds = %entry
+; CHECK: [[LPAD1_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
+; CHECK: [[RECOVER1:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
+; CHECK: indirectbr i8* [[RECOVER1]], []
+;
+; CHECK: }
+
+
+declare void @"\01?may_throw@@YAXXZ"() #1
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
+
+declare void @"\01?handle_int@@YAXH@Z"(i32) #1
+
+; Function Attrs: nounwind
+declare void @llvm.eh.endcatch() #3
+
+declare void @"\01?handle_float@@YAXM@Z"(float) #1
+
+declare void @"\01?done@@YAXXZ"() #1
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.7.0 (trunk 232069) (llvm/trunk 232070)"}
diff --git a/test/CodeGen/WinEH/cppeh-nested-2.ll b/test/CodeGen/WinEH/cppeh-nested-2.ll
new file mode 100644
index 000000000000..f12f3dbed085
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-nested-2.ll
@@ -0,0 +1,324 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
+
+; This test is based on the following code:
+;
+; class Inner {
+; public:
+; Inner();
+; ~Inner();
+; };
+; class Outer {
+; public:
+; Outer();
+; ~Outer();
+; };
+; void test() {
+; try {
+; Outer outer;
+; try {
+; Inner inner;
+; may_throw();
+; } catch (int i) {
+; handle_int(i);
+; }
+; } catch (float f) {
+; handle_float(f);
+; }
+; done();
+; }
+
+; ModuleID = 'nested-2.cpp'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%class.Outer = type { i8 }
+%class.Inner = type { i8 }
+
+@_ZTIf = external constant i8*
+@_ZTIi = external constant i8*
+
+; The function entry should be rewritten like this.
+; CHECK: define void @_Z4testv()
+; CHECK: entry:
+; CHECK: %outer = alloca %class.Outer, align 1
+; CHECK: %inner = alloca %class.Inner, align 1
+; CHECK: %i = alloca i32, align 4
+; CHECK: %f = alloca float, align 4
+; CHECK: call void (...) @llvm.frameescape(float* %f, i32* %i, %class.Outer* %outer, %class.Inner* %inner)
+; CHECK: invoke void @_ZN5OuterC1Ev(%class.Outer* %outer)
+; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
+
+; Function Attrs: uwtable
+define void @_Z4testv() #0 {
+entry:
+ %outer = alloca %class.Outer, align 1
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ %inner = alloca %class.Inner, align 1
+ %i = alloca i32, align 4
+ %f = alloca float, align 4
+ invoke void @_ZN5OuterC1Ev(%class.Outer* %outer)
+ to label %invoke.cont unwind label %lpad
+
+; CHECK: invoke.cont:
+; CHECK: invoke void @_ZN5InnerC1Ev(%class.Inner* %inner)
+; CHECK: to label %invoke.cont2 unwind label %[[LPAD1_LABEL:lpad[0-9]*]]
+
+invoke.cont: ; preds = %entry
+ invoke void @_ZN5InnerC1Ev(%class.Inner* %inner)
+ to label %invoke.cont2 unwind label %lpad1
+
+; CHECK: invoke.cont2:
+; CHECK: invoke void @_Z9may_throwv()
+; CHECK: to label %invoke.cont4 unwind label %[[LPAD3_LABEL:lpad[0-9]*]]
+
+invoke.cont2: ; preds = %invoke.cont
+ invoke void @_Z9may_throwv()
+ to label %invoke.cont4 unwind label %lpad3
+
+; CHECK: invoke.cont4:
+; CHECK: invoke void @_ZN5InnerD1Ev(%class.Inner* %inner)
+; CHECK: to label %invoke.cont5 unwind label %[[LPAD1_LABEL]]
+
+invoke.cont4: ; preds = %invoke.cont2
+ invoke void @_ZN5InnerD1Ev(%class.Inner* %inner)
+ to label %invoke.cont5 unwind label %lpad1
+
+; CHECK: invoke.cont5:
+; CHECK: br label %try.cont
+
+invoke.cont5: ; preds = %invoke.cont4
+ br label %try.cont
+
+; CHECK: [[LPAD_LABEL]]:
+; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIf to i8*)
+; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i8** @_ZTIf to i8*), i32 0, i8* (i8*, i8*)* @_Z4testv.catch)
+; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %try.cont19]
+
+lpad: ; preds = %try.cont, %entry
+ %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* bitcast (i8** @_ZTIf to i8*)
+ %tmp1 = extractvalue { i8*, i32 } %tmp, 0
+ store i8* %tmp1, i8** %exn.slot
+ %tmp2 = extractvalue { i8*, i32 } %tmp, 1
+ store i32 %tmp2, i32* %ehselector.slot
+ br label %catch.dispatch11
+
+; CHECK: [[LPAD1_LABEL]]:
+; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIf to i8*)
+; CHECK-NEXT: [[RECOVER1:\%.+]] = call i8* (...) @llvm.eh.actions(
+; CHECK-SAME: i32 1, i8* bitcast (i8** @_ZTIi to i8*), i32 1, i8* (i8*, i8*)* @_Z4testv.catch.1,
+; CHECK-SAME: i32 0, void (i8*, i8*)* @_Z4testv.cleanup,
+; CHECK-SAME: i32 1, i8* bitcast (i8** @_ZTIf to i8*), i32 0, i8* (i8*, i8*)* @_Z4testv.catch)
+; CHECK-NEXT: indirectbr i8* [[RECOVER1]], [label %try.cont, label %try.cont19]
+
+lpad1: ; preds = %invoke.cont4, %invoke.cont
+ %tmp3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ catch i8* bitcast (i8** @_ZTIi to i8*)
+ catch i8* bitcast (i8** @_ZTIf to i8*)
+ %tmp4 = extractvalue { i8*, i32 } %tmp3, 0
+ store i8* %tmp4, i8** %exn.slot
+ %tmp5 = extractvalue { i8*, i32 } %tmp3, 1
+ store i32 %tmp5, i32* %ehselector.slot
+ br label %catch.dispatch
+
+; CHECK: [[LPAD3_LABEL]]:
+; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*)
+; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIf to i8*)
+; CHECK-NEXT: [[RECOVER3:\%.+]] = call i8* (...) @llvm.eh.actions(
+; CHECK-SAME: i32 0, void (i8*, i8*)* @_Z4testv.cleanup.2,
+; CHECK-SAME: i32 1, i8* bitcast (i8** @_ZTIi to i8*), i32 1, i8* (i8*, i8*)* @_Z4testv.catch.1,
+; CHECK-SAME: i32 0, void (i8*, i8*)* @_Z4testv.cleanup,
+; CHECK-SAME: i32 1, i8* bitcast (i8** @_ZTIf to i8*), i32 0, i8* (i8*, i8*)* @_Z4testv.catch)
+; CHECK-NEXT: indirectbr i8* [[RECOVER3]], [label %try.cont, label %try.cont19]
+
+lpad3: ; preds = %invoke.cont2
+ %tmp6 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ catch i8* bitcast (i8** @_ZTIi to i8*)
+ catch i8* bitcast (i8** @_ZTIf to i8*)
+ %tmp7 = extractvalue { i8*, i32 } %tmp6, 0
+ store i8* %tmp7, i8** %exn.slot
+ %tmp8 = extractvalue { i8*, i32 } %tmp6, 1
+ store i32 %tmp8, i32* %ehselector.slot
+ call void @_ZN5InnerD1Ev(%class.Inner* %inner)
+ br label %catch.dispatch
+
+; CHECK-NOT: catch.dispatch:
+
+catch.dispatch: ; preds = %lpad3, %lpad1
+ %sel = load i32, i32* %ehselector.slot
+ %tmp9 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #4
+ %matches = icmp eq i32 %sel, %tmp9
+ br i1 %matches, label %catch, label %ehcleanup
+
+; CHECK-NOT: catch:
+
+catch: ; preds = %catch.dispatch
+ %exn = load i8*, i8** %exn.slot
+ %i.i8 = bitcast i32* %i to i8*
+ call void @llvm.eh.begincatch(i8* %exn, i8* %i.i8) #4
+ %tmp13 = load i32, i32* %i, align 4
+ invoke void @_Z10handle_inti(i32 %tmp13)
+ to label %invoke.cont8 unwind label %lpad7
+
+; CHECK-NOT: invoke.cont8:
+
+invoke.cont8: ; preds = %catch
+ call void @llvm.eh.endcatch() #4
+ br label %try.cont
+
+; CHECK: try.cont:
+; CHECK: invoke void @_ZN5OuterD1Ev(%class.Outer* %outer)
+; CHECK: to label %invoke.cont9 unwind label %[[LPAD_LABEL]]
+
+try.cont: ; preds = %invoke.cont8, %invoke.cont5
+ invoke void @_ZN5OuterD1Ev(%class.Outer* %outer)
+ to label %invoke.cont9 unwind label %lpad
+
+invoke.cont9: ; preds = %try.cont
+ br label %try.cont19
+
+; CHECK-NOT: lpad7:
+
+lpad7: ; preds = %catch
+ %tmp14 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ catch i8* bitcast (i8** @_ZTIf to i8*)
+ %tmp15 = extractvalue { i8*, i32 } %tmp14, 0
+ store i8* %tmp15, i8** %exn.slot
+ %tmp16 = extractvalue { i8*, i32 } %tmp14, 1
+ store i32 %tmp16, i32* %ehselector.slot
+ call void @llvm.eh.endcatch() #4
+ br label %ehcleanup
+
+; CHECK-NOT: ehcleanup: ; preds = %lpad7, %catch.dispatch
+
+ehcleanup: ; preds = %lpad7, %catch.dispatch
+ call void @_ZN5OuterD1Ev(%class.Outer* %outer)
+ br label %catch.dispatch11
+
+; CHECK-NOT: catch.dispatch11:
+
+catch.dispatch11: ; preds = %ehcleanup, %lpad
+ %sel12 = load i32, i32* %ehselector.slot
+ %tmp17 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*)) #4
+ %matches13 = icmp eq i32 %sel12, %tmp17
+ br i1 %matches13, label %catch14, label %eh.resume
+
+; CHECK-NOT: catch14:
+
+catch14: ; preds = %catch.dispatch11
+ %exn15 = load i8*, i8** %exn.slot
+ %f.i8 = bitcast float* %f to i8*
+ call void @llvm.eh.begincatch(i8* %exn15, i8* %f.i8) #4
+ %tmp21 = load float, float* %f, align 4
+ call void @_Z12handle_floatf(float %tmp21)
+ call void @llvm.eh.endcatch() #4
+ br label %try.cont19
+
+try.cont19: ; preds = %catch14, %invoke.cont9
+ call void @_Z4donev()
+ ret void
+
+; CHECK-NOT: eh.resume:
+
+eh.resume: ; preds = %catch.dispatch11
+ %exn20 = load i8*, i8** %exn.slot
+ %sel21 = load i32, i32* %ehselector.slot
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn20, 0
+ %lpad.val22 = insertvalue { i8*, i32 } %lpad.val, i32 %sel21, 1
+ resume { i8*, i32 } %lpad.val22
+
+; CHECK: }
+}
+
+; This catch handler should be outlined.
+; CHECK: define internal i8* @_Z4testv.catch(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_F:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 0)
+; CHECK: [[F_PTR:\%.+]] = bitcast i8* [[RECOVER_F]] to float*
+; CHECK: [[TMP:\%.+]] = load float, float* [[F_PTR]], align 4
+; CHECK: call void @_Z12handle_floatf(float [[TMP]])
+; CHECK: ret i8* blockaddress(@_Z4testv, %try.cont19)
+; CHECK: }
+
+; This catch handler should be outlined.
+; CHECK: define internal i8* @_Z4testv.catch.1(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_I:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 1)
+; CHECK: [[I_PTR:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
+; CHECK: [[TMP1:\%.+]] = load i32, i32* [[I_PTR]], align 4
+; CHECK: invoke void @_Z10handle_inti(i32 [[TMP1]])
+; CHECK: to label %invoke.cont8 unwind label %[[LPAD7_LABEL:lpad[0-9]*]]
+;
+; CHECK: invoke.cont8: ; preds = %entry
+; CHECK: ret i8* blockaddress(@_Z4testv, %try.cont)
+;
+; CHECK: [[LPAD7_LABEL]]:{{[ ]+}}; preds = %entry
+; CHECK: [[LPAD7_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; (FIXME) The nested handler body isn't being populated yet.
+; CHECK: }
+
+; This cleanup handler should be outlined.
+; CHECK: define internal void @_Z4testv.cleanup(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_OUTER:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 2)
+; CHECK: [[OUTER_PTR:\%.+]] = bitcast i8* [[RECOVER_OUTER]] to %class.Outer*
+; CHECK: call void @_ZN5OuterD1Ev(%class.Outer* [[OUTER_PTR]])
+; CHECK: ret void
+; CHECK: }
+
+; This cleanup handler should be outlined.
+; CHECK: define internal void @_Z4testv.cleanup.2(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_INNER:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 3)
+; CHECK: [[INNER_PTR:\%.+]] = bitcast i8* [[RECOVER_INNER]] to %class.Inner*
+; CHECK: call void @_ZN5InnerD1Ev(%class.Inner* [[INNER_PTR]])
+; CHECK: ret void
+; CHECK: }
+
+
+
+declare void @_ZN5OuterC1Ev(%class.Outer*) #1
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @_ZN5InnerC1Ev(%class.Inner*) #1
+
+declare void @_Z9may_throwv() #1
+
+declare void @_ZN5InnerD1Ev(%class.Inner*) #1
+
+declare void @llvm.eh.begincatch(i8*, i8*)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #3
+
+declare void @_Z10handle_inti(i32) #1
+
+declare void @llvm.eh.endcatch()
+
+declare void @_ZN5OuterD1Ev(%class.Outer*) #1
+
+declare void @_Z12handle_floatf(float) #1
+
+declare void @_Z4donev() #1
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { noinline noreturn nounwind }
+attributes #3 = { nounwind readnone }
+attributes #4 = { nounwind }
+attributes #5 = { noreturn nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 226027)"}
diff --git a/test/CodeGen/WinEH/cppeh-nested-3.ll b/test/CodeGen/WinEH/cppeh-nested-3.ll
new file mode 100644
index 000000000000..c96abcc6e81c
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-nested-3.ll
@@ -0,0 +1,260 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
+
+; This test is based on the following code:
+;
+;void test()
+;{
+; try {
+; try {
+; may_throw();
+; } catch (int i) {
+; try {
+; may_throw();
+; }
+; catch (int j) {
+; i = j;
+; }
+; handle_int(i);
+; }
+; } catch (float f) {
+; handle_float(f);
+; }
+; done();
+;}
+
+; ModuleID = 'cppeh-nested-3.cpp'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+
+$"\01??_R0M@8" = comdat any
+
+$"\01??_R0H@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0M@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".M\00" }, comdat
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+
+; CHECK: define void @"\01?test@@YAXXZ"()
+; CHECK: entry:
+; CHECK: %i = alloca i32, align 4
+; CHECK: %j = alloca i32, align 4
+; CHECK: %f = alloca float, align 4
+; CHECK: call void (...) @llvm.frameescape(i32* %j, i32* %i, float* %f)
+; CHECK: invoke void @"\01?may_throw@@YAXXZ"()
+; CHECK: to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
+
+; Function Attrs: uwtable
+define void @"\01?test@@YAXXZ"() #0 {
+entry:
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ %i = alloca i32, align 4
+ %j = alloca i32, align 4
+ %f = alloca float, align 4
+ invoke void @"\01?may_throw@@YAXXZ"()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %entry
+ br label %try.cont10
+
+; CHECK: [[LPAD_LABEL]]:
+; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
+; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
+; CHECK: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.2", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1")
+; CHECK: indirectbr i8* [[RECOVER]], [label %try.cont10, label %try.cont19]
+
+lpad: ; preds = %entry
+ %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
+ catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
+ %1 = extractvalue { i8*, i32 } %0, 0
+ store i8* %1, i8** %exn.slot
+ %2 = extractvalue { i8*, i32 } %0, 1
+ store i32 %2, i32* %ehselector.slot
+ br label %catch.dispatch
+
+; CHECK-NOT: catch.dispatch:
+catch.dispatch: ; preds = %lpad
+ %sel = load i32, i32* %ehselector.slot
+ %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #3
+ %matches = icmp eq i32 %sel, %3
+ br i1 %matches, label %catch, label %catch.dispatch11
+
+; CHECK-NOT: catch:
+catch: ; preds = %catch.dispatch
+ %exn = load i8*, i8** %exn.slot
+ %4 = bitcast i32* %i to i8*
+ call void @llvm.eh.begincatch(i8* %exn, i8* %4) #3
+ invoke void @"\01?may_throw@@YAXXZ"()
+ to label %invoke.cont2 unwind label %lpad1
+
+; CHECK-NOT: invoke.cont2:
+invoke.cont2: ; preds = %catch
+ br label %try.cont
+
+; CHECK-NOT: lpad1:
+lpad1: ; preds = %catch
+ %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
+ catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
+ %6 = extractvalue { i8*, i32 } %5, 0
+ store i8* %6, i8** %exn.slot
+ %7 = extractvalue { i8*, i32 } %5, 1
+ store i32 %7, i32* %ehselector.slot
+ br label %catch.dispatch3
+
+; CHECK-NOT: catch.dispatch3:
+catch.dispatch3: ; preds = %lpad1
+ %sel4 = load i32, i32* %ehselector.slot
+ %8 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #3
+ %matches5 = icmp eq i32 %sel4, %8
+ br i1 %matches5, label %catch6, label %catch.dispatch11
+
+; CHECK-NOT: catch6:
+catch6: ; preds = %catch.dispatch3
+ %exn7 = load i8*, i8** %exn.slot
+ %9 = bitcast i32* %j to i8*
+ call void @llvm.eh.begincatch(i8* %exn7, i8* %9) #3
+ %10 = load i32, i32* %j, align 4
+ store i32 %10, i32* %i, align 4
+ call void @llvm.eh.endcatch() #3
+ br label %try.cont
+
+; CHECK-NOT: try.cont:
+try.cont: ; preds = %catch6, %invoke.cont2
+ %11 = load i32, i32* %i, align 4
+ invoke void @"\01?handle_int@@YAXH@Z"(i32 %11)
+ to label %invoke.cont9 unwind label %lpad8
+
+; CHECK-NOT: invoke.cont9:
+invoke.cont9: ; preds = %try.cont
+ call void @llvm.eh.endcatch() #3
+ br label %try.cont10
+
+try.cont10: ; preds = %invoke.cont9, %invoke.cont
+ br label %try.cont19
+
+; CHECK-NOT: lpad8:
+lpad8: ; preds = %try.cont
+ %12 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
+ %13 = extractvalue { i8*, i32 } %12, 0
+ store i8* %13, i8** %exn.slot
+ %14 = extractvalue { i8*, i32 } %12, 1
+ store i32 %14, i32* %ehselector.slot
+ call void @llvm.eh.endcatch() #3
+ br label %catch.dispatch11
+
+; CHECK-NOT: catch.dispatch11:
+catch.dispatch11: ; preds = %lpad8, %catch.dispatch3, %catch.dispatch
+ %sel12 = load i32, i32* %ehselector.slot
+ %15 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)) #3
+ %matches13 = icmp eq i32 %sel12, %15
+ br i1 %matches13, label %catch14, label %eh.resume
+
+; CHECK-NOT: catch14:
+catch14: ; preds = %catch.dispatch11
+ %exn15 = load i8*, i8** %exn.slot
+ %16 = bitcast float* %f to i8*
+ call void @llvm.eh.begincatch(i8* %exn15, i8* %16) #3
+ %17 = load float, float* %f, align 4
+ call void @"\01?handle_float@@YAXM@Z"(float %17)
+ call void @llvm.eh.endcatch() #3
+ br label %try.cont19
+
+try.cont19: ; preds = %catch14, %try.cont10
+ call void @"\01?done@@YAXXZ"()
+ ret void
+
+; CHECK-NOT: eh.resume:
+eh.resume: ; preds = %lpad16, %catch.dispatch11
+ %exn20 = load i8*, i8** %exn.slot
+ %sel21 = load i32, i32* %ehselector.slot
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn20, 0
+ %lpad.val22 = insertvalue { i8*, i32 } %lpad.val, i32 %sel21, 1
+ resume { i8*, i32 } %lpad.val22
+; CHECK: }
+}
+
+; CHECK: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_J:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
+; CHECK: [[J_PTR:\%.+]] = bitcast i8* [[RECOVER_J]] to i32*
+; CHECK: [[RECOVER_I1:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
+; CHECK: [[I_PTR1:\%.+]] = bitcast i8* [[RECOVER_I1]] to i32*
+; CHECK: [[TMP3:\%.+]] = load i32, i32* [[J_PTR]], align 4
+; CHECK: store i32 [[TMP3]], i32* [[I_PTR1]]
+; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ.catch.2", %invoke.cont2)
+; CHECK: }
+
+; CHECK: define internal i8* @"\01?test@@YAXXZ.catch.1"(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_F:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
+; CHECK: [[F_PTR:\%.+]] = bitcast i8* [[RECOVER_F]] to float*
+; CHECK: [[TMP2:\%.+]] = load float, float* [[F_PTR]], align 4
+; CHECK: call void @"\01?handle_float@@YAXM@Z"(float [[TMP2]])
+; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont19)
+; CHECK: }
+
+; CHECK: define internal i8* @"\01?test@@YAXXZ.catch.2"(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_I:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
+; CHECK: [[I_PTR:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
+; CHECK: invoke void @"\01?may_throw@@YAXXZ"()
+; CHECK: to label %invoke.cont2 unwind label %[[LPAD1_LABEL:lpad[0-9]*]]
+;
+; CHECK: invoke.cont2: ; preds = %[[LPAD1_LABEL]], %entry
+; CHECK: [[TMP1:\%.+]] = load i32, i32* [[I_PTR]], align 4
+; CHECK: invoke void @"\01?handle_int@@YAXH@Z"(i32 [[TMP1]])
+; CHECK: to label %invoke.cont9 unwind label %[[LPAD8_LABEL:lpad[0-9]*]]
+;
+; CHECK: [[LPAD1_LABEL]]:{{[ ]+}}; preds = %entry
+; CHECK: [[LPAD1_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
+; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
+; CHECK: [[RECOVER1:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1")
+; CHECK: indirectbr i8* [[RECOVER1]], [label %invoke.cont2]
+;
+; CHECK: invoke.cont9:
+; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont10)
+;
+; CHECK: [[LPAD8_LABEL]]:{{[ ]+}}; preds = %invoke.cont2
+; CHECK: [[LPAD8_VAL:\%.+]] = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
+; CHECK: [[RECOVER2:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1")
+; CHECK: indirectbr i8* [[RECOVER2]], []
+;
+; CHECK: }
+
+declare void @"\01?may_throw@@YAXXZ"() #1
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
+
+; Function Attrs: nounwind
+declare void @llvm.eh.endcatch() #3
+
+declare void @"\01?handle_int@@YAXH@Z"(i32) #1
+
+declare void @"\01?handle_float@@YAXM@Z"(float) #1
+
+declare void @"\01?done@@YAXXZ"() #1
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.7.0 (trunk 232069) (llvm/trunk 232070)"}
diff --git a/test/CodeGen/WinEH/cppeh-nested-rethrow.ll b/test/CodeGen/WinEH/cppeh-nested-rethrow.ll
new file mode 100644
index 000000000000..60b404113345
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-nested-rethrow.ll
@@ -0,0 +1,212 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
+
+; This test was generated from the following code.
+;
+; void test1() {
+; try {
+; try {
+; throw 1;
+; } catch(...) { throw; }
+; } catch (...) { }
+; }
+; void test2() {
+; try {
+; throw 1;
+; } catch(...) {
+; try {
+; throw;
+; } catch (...) {}
+; }
+; }
+;
+; These two functions result in functionally equivalent code, but the last
+; catch block contains a call to llvm.eh.endcatch that tripped up processing
+; during development.
+;
+; The main purpose of this test is to verify that we can correctly
+; handle the case of nested landing pads that return directly to a block in
+; the parent function.
+
+; ModuleID = 'cppeh-nested-rethrow.cpp'
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
+%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
+%eh.ThrowInfo = type { i32, i32, i32, i32 }
+
+$"\01??_R0H@8" = comdat any
+
+$"_CT??_R0H@84" = comdat any
+
+$_CTA1H = comdat any
+
+$_TI1H = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@__ImageBase = external constant i8
+@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
+@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
+@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
+
+; CHECK-LABEL: define void @"\01?test1@@YAXXZ"()
+; CHECK: entry:
+; CHECK: call void (...) @llvm.frameescape
+
+; Function Attrs: nounwind uwtable
+define void @"\01?test1@@YAXXZ"() #0 {
+entry:
+ %tmp = alloca i32, align 4
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ store i32 1, i32* %tmp
+ %0 = bitcast i32* %tmp to i8*
+ invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #2
+ to label %unreachable unwind label %lpad
+
+lpad: ; preds = %entry
+ %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* null
+ %2 = extractvalue { i8*, i32 } %1, 0
+ store i8* %2, i8** %exn.slot
+ %3 = extractvalue { i8*, i32 } %1, 1
+ store i32 %3, i32* %ehselector.slot
+ br label %catch
+
+catch: ; preds = %lpad
+ %exn = load i8*, i8** %exn.slot
+ call void @llvm.eh.begincatch(i8* %exn, i8* null) #1
+ invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #2
+ to label %unreachable unwind label %lpad1
+
+lpad1: ; preds = %catch
+ %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* null
+ %5 = extractvalue { i8*, i32 } %4, 0
+ store i8* %5, i8** %exn.slot
+ %6 = extractvalue { i8*, i32 } %4, 1
+ store i32 %6, i32* %ehselector.slot
+ br label %catch2
+
+catch2: ; preds = %lpad1
+ %exn3 = load i8*, i8** %exn.slot
+ call void @llvm.eh.begincatch(i8* %exn3, i8* null) #1
+ call void @llvm.eh.endcatch() #1
+ br label %try.cont.4
+
+; This block should not be eliminated.
+; CHECK: try.cont.4:
+try.cont.4: ; preds = %catch2, %try.cont
+ ret void
+
+try.cont: ; No predecessors!
+ br label %try.cont.4
+
+unreachable: ; preds = %catch, %entry
+ unreachable
+; CHECK: }
+}
+
+declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #1
+
+; Function Attrs: nounwind
+declare void @llvm.eh.endcatch() #1
+
+; CHECK-LABEL: define void @"\01?test2@@YAXXZ"()
+; CHECK: entry:
+; CHECK: call void (...) @llvm.frameescape
+
+; Function Attrs: nounwind uwtable
+define void @"\01?test2@@YAXXZ"() #0 {
+entry:
+ %tmp = alloca i32, align 4
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ store i32 1, i32* %tmp
+ %0 = bitcast i32* %tmp to i8*
+ invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #2
+ to label %unreachable unwind label %lpad
+
+lpad: ; preds = %entry
+ %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* null
+ %2 = extractvalue { i8*, i32 } %1, 0
+ store i8* %2, i8** %exn.slot
+ %3 = extractvalue { i8*, i32 } %1, 1
+ store i32 %3, i32* %ehselector.slot
+ br label %catch
+
+catch: ; preds = %lpad
+ %exn = load i8*, i8** %exn.slot
+ call void @llvm.eh.begincatch(i8* %exn, i8* null) #1
+ invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #2
+ to label %unreachable unwind label %lpad1
+
+lpad1: ; preds = %catch
+ %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* null
+ %5 = extractvalue { i8*, i32 } %4, 0
+ store i8* %5, i8** %exn.slot
+ %6 = extractvalue { i8*, i32 } %4, 1
+ store i32 %6, i32* %ehselector.slot
+ br label %catch2
+
+catch2: ; preds = %lpad1
+ %exn3 = load i8*, i8** %exn.slot
+ call void @llvm.eh.begincatch(i8* %exn3, i8* null) #1
+ call void @llvm.eh.endcatch() #1
+ br label %try.cont
+
+; This block should not be eliminated.
+; CHECK: try.cont:
+; The endcatch call should be eliminated.
+; CHECK-NOT: call void @llvm.eh.endcatch()
+try.cont: ; preds = %catch2
+ call void @llvm.eh.endcatch() #1
+ br label %try.cont.4
+
+try.cont.4: ; preds = %try.cont
+ ret void
+
+unreachable: ; preds = %catch, %entry
+ unreachable
+; CHECK: }
+}
+
+; The outlined test1.catch handler should return to a valid block address.
+; CHECK-LABEL: define internal i8* @"\01?test1@@YAXXZ.catch"(i8*, i8*)
+; CHECK-NOT: ret i8* inttoptr (i32 1 to i8*)
+; CHECK: }
+
+; The outlined test1.catch1 handler should not contain a return instruction.
+; CHECK-LABEL: define internal i8* @"\01?test1@@YAXXZ.catch.1"(i8*, i8*)
+; CHECK-NOT: ret
+; CHECK: }
+
+; The outlined test2.catch handler should return to a valid block address.
+; CHECK-LABEL: define internal i8* @"\01?test2@@YAXXZ.catch"(i8*, i8*)
+; CHECK-NOT: ret i8* inttoptr (i32 1 to i8*)
+; CHECK: }
+
+; The outlined test2.catch2 handler should not contain a return instruction.
+; CHECK-LABEL: define internal i8* @"\01?test2@@YAXXZ.catch.2"(i8*, i8*)
+; CHECK-NOT: ret
+; CHECK: }
+
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+attributes #2 = { noreturn }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.7.0 (trunk 236059)"}
diff --git a/test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll b/test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll
new file mode 100644
index 000000000000..15f6bfb4680d
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll
@@ -0,0 +1,278 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
+
+; This test is based on the following code:
+;
+; struct SomeData {
+; int a;
+; int b;
+; };
+;
+; void may_throw();
+; void does_not_throw(int i);
+; void dump(int *, int, SomeData&);
+;
+; void test() {
+; int NumExceptions = 0;
+; int ExceptionVal[10];
+; SomeData Data = { 0, 0 };
+;
+; for (int i = 0; i < 10; ++i) {
+; try {
+; may_throw();
+; Data.a += i;
+; }
+; catch (int e) {
+; ExceptionVal[NumExceptions] = e;
+; ++NumExceptions;
+; if (e == i)
+; Data.b += e;
+; else
+; Data.a += e;
+; }
+; does_not_throw(NumExceptions);
+; }
+; dump(ExceptionVal, NumExceptions, Data);
+; }
+;
+; Unlike the cppeh-frame-vars.ll test, this test was generated using -O2
+; optimization, which results in non-alloca values being used in the
+; catch handler.
+
+; ModuleID = 'cppeh-frame-vars.cpp'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%struct.SomeData = type { i32, i32 }
+
+$"\01??_R0H@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+
+; The function entry should be rewritten like this.
+; CHECK: define void @"\01?test@@YAXXZ"()
+; CHECK: entry:
+; CHECK: [[NUMEXCEPTIONS_REGMEM:\%.+]] = alloca i32
+; CHECK: [[I_REGMEM:\%.+]] = alloca i32
+; CHECK: [[B_REGMEM:\%.+]] = alloca i32*
+; CHECK: [[A_REGMEM:\%.+]] = alloca i32*
+; CHECK: [[E_PTR:\%.+]] = alloca i32, align 4
+; CHECK: [[EXCEPTIONVAL:\%.+]] = alloca [10 x i32], align 16
+; CHECK: [[DATA_PTR:\%.+]] = alloca i64, align 8
+; CHECK: [[TMPCAST:\%.+]] = bitcast i64* [[DATA_PTR]] to %struct.SomeData*
+; CHECK: [[TMP:\%.+]] = bitcast [10 x i32]* [[EXCEPTIONVAL]] to i8*
+; CHECK: call void @llvm.lifetime.start(i64 40, i8* [[TMP]])
+; CHECK: store i64 0, i64* [[DATA_PTR]], align 8
+; CHECK: [[A_PTR:\%.+]] = bitcast i64* [[DATA_PTR]] to i32*
+; CHECK: store i32* [[A_PTR]], i32** [[A_REGMEM]]
+; CHECK: [[B_PTR:\%.+]] = getelementptr inbounds %struct.SomeData, %struct.SomeData* [[TMPCAST]], i64 0, i32 1
+; CHECK: store i32* [[B_PTR]], i32** [[B_REGMEM]]
+; CHECK: call void (...) @llvm.frameescape(i32* %e, i32* %NumExceptions.020.reg2mem, [10 x i32]* [[EXCEPTIONVAL]], i32* %inc.reg2mem, i32* [[I_REGMEM]], i32** [[A_REGMEM]], i32** [[B_REGMEM]])
+; CHECK: br label %for.body
+
+; Function Attrs: uwtable
+define void @"\01?test@@YAXXZ"() #0 {
+entry:
+ %e = alloca i32, align 4
+ %ExceptionVal = alloca [10 x i32], align 16
+ %Data = alloca i64, align 8
+ %tmpcast = bitcast i64* %Data to %struct.SomeData*
+ %0 = bitcast [10 x i32]* %ExceptionVal to i8*
+ call void @llvm.lifetime.start(i64 40, i8* %0) #1
+ store i64 0, i64* %Data, align 8
+ %a = bitcast i64* %Data to i32*
+ %b = getelementptr inbounds %struct.SomeData, %struct.SomeData* %tmpcast, i64 0, i32 1
+ br label %for.body
+
+; CHECK: for.body:
+; CHECK: [[NUMEXCEPTIONS_PHI:\%.*]] = phi i32 [ 0, %entry ], [ {{\%NumExceptions.*}}, %try.cont ]
+; CHECK: [[I_PHI:\%.*]] = phi i32 [ 0, %entry ], [ {{\%inc.*}}, %try.cont ]
+; CHECK: store i32 [[I_PHI]], i32* [[I_REGMEM]]
+; CHECK: store i32 [[NUMEXCEPTIONS_PHI]], i32* [[NUMEXCEPTIONS_REGMEM]]
+; CHECK: invoke void @"\01?may_throw@@YAXXZ"()
+for.body: ; preds = %entry, %try.cont
+ %NumExceptions.020 = phi i32 [ 0, %entry ], [ %NumExceptions.1, %try.cont ]
+ %i.019 = phi i32 [ 0, %entry ], [ %inc5, %try.cont ]
+ invoke void @"\01?may_throw@@YAXXZ"()
+ to label %invoke.cont unwind label %lpad
+
+; CHECK: invoke.cont: ; preds = %for.body
+; CHECK: [[A_RELOAD:\%.+]] = load i32*, i32** [[A_REGMEM]]
+; CHECK: [[TMP1:\%.+]] = load i32, i32* [[A_RELOAD]], align 8
+; CHECK: [[I_RELOAD:\%.+]] = load i32, i32* [[I_REGMEM]]
+; CHECK: [[ADD:\%.+]] = add nsw i32 [[TMP1]], [[I_RELOAD]]
+; CHECK: [[A_RELOAD1:\%.+]] = load i32*, i32** [[A_REGMEM]]
+; CHECK: [[NUMEXCEPTIONS_RELOAD:\%.+]] = load i32, i32* [[NUMEXCEPTIONS_REGMEM]]
+; CHECK: br label %try.cont
+invoke.cont: ; preds = %for.body
+ %1 = load i32, i32* %a, align 8, !tbaa !2
+ %add = add nsw i32 %1, %i.019
+ store i32 %add, i32* %a, align 8, !tbaa !2
+ br label %try.cont
+
+; CHECK: [[LPAD_LABEL:lpad[0-9]*]]:{{[ ]+}}; preds = %for.body
+; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK-NEXT: catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
+; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
+; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %[[SPLIT_RECOVER_BB:.*]]]
+
+lpad: ; preds = %for.body
+ %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
+ %3 = extractvalue { i8*, i32 } %2, 1
+ %4 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #1
+ %matches = icmp eq i32 %3, %4
+ br i1 %matches, label %catch, label %eh.resume
+
+; CHECK-NOT: catch:
+
+catch: ; preds = %lpad
+ %5 = extractvalue { i8*, i32 } %2, 0
+ %e.i8 = bitcast i32* %e to i8*
+ call void @llvm.eh.begincatch(i8* %5, i8* %e.i8) #1
+ %tmp8 = load i32, i32* %e, align 4, !tbaa !7
+ %idxprom = sext i32 %NumExceptions.020 to i64
+ %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %ExceptionVal, i64 0, i64 %idxprom
+ store i32 %tmp8, i32* %arrayidx, align 4, !tbaa !7
+ %inc = add nsw i32 %NumExceptions.020, 1
+ %cmp1 = icmp eq i32 %tmp8, %i.019
+ br i1 %cmp1, label %if.then, label %if.else
+
+if.then: ; preds = %catch
+ %tmp9 = load i32, i32* %b, align 4, !tbaa !8
+ %add2 = add nsw i32 %tmp9, %i.019
+ store i32 %add2, i32* %b, align 4, !tbaa !8
+ br label %if.end
+
+; CHECK-NOT: if.else:
+
+if.else: ; preds = %catch
+ %tmp10 = load i32, i32* %a, align 8, !tbaa !2
+ %add4 = add nsw i32 %tmp10, %tmp8
+ store i32 %add4, i32* %a, align 8, !tbaa !2
+ br label %if.end
+
+; CHECK-NOT: if.end:
+; CHECK: [[SPLIT_RECOVER_BB]]:
+; CHECK: [[INC_RELOAD:\%.*]] = load i32, i32*
+; CHECK: br label %try.cont
+
+if.end: ; preds = %if.else, %if.then
+ tail call void @llvm.eh.endcatch() #1
+ br label %try.cont
+
+; CHECK: try.cont:{{[ ]+}}; preds = %[[SPLIT_RECOVER_BB]], %invoke.cont
+; CHECK: [[NUMEXCEPTIONS_PHI:\%.*]] = phi i32 [ [[NUMEXCEPTIONS_RELOAD]], %invoke.cont ], [ [[INC_RELOAD]], %[[SPLIT_RECOVER_BB]] ]
+; CHECK: tail call void @"\01?does_not_throw@@YAXH@Z"(i32 [[NUMEXCEPTIONS_PHI]])
+; CHECK: [[I_RELOAD:\%.+]] = load i32, i32* [[I_REGMEM]]
+; CHECK: [[INC:\%.+]] = add nuw nsw i32 [[I_RELOAD]], 1
+; CHECK: [[CMP:\%.+]] = icmp slt i32 [[INC]], 10
+; CHECK: br i1 [[CMP]], label %for.body, label %for.end
+
+try.cont: ; preds = %if.end, %invoke.cont
+ %NumExceptions.1 = phi i32 [ %NumExceptions.020, %invoke.cont ], [ %inc, %if.end ]
+ tail call void @"\01?does_not_throw@@YAXH@Z"(i32 %NumExceptions.1)
+ %inc5 = add nuw nsw i32 %i.019, 1
+ %cmp = icmp slt i32 %inc5, 10
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %try.cont
+ %NumExceptions.1.lcssa = phi i32 [ %NumExceptions.1, %try.cont ]
+ %arraydecay = getelementptr inbounds [10 x i32], [10 x i32]* %ExceptionVal, i64 0, i64 0
+ call void @"\01?dump@@YAXPEAHHAEAUSomeData@@@Z"(i32* %arraydecay, i32 %NumExceptions.1.lcssa, %struct.SomeData* dereferenceable(8) %tmpcast)
+ call void @llvm.lifetime.end(i64 40, i8* %0) #1
+ ret void
+
+eh.resume: ; preds = %lpad
+ %.lcssa = phi { i8*, i32 } [ %2, %lpad ]
+ resume { i8*, i32 } %.lcssa
+}
+
+; The following catch handler should be outlined.
+; CHECK: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
+; CHECK: entry:
+; CHECK: [[RECOVER_E:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
+; CHECK: [[E_PTR:\%.+]] = bitcast i8* [[RECOVER_E]] to i32*
+; CHECK: [[RECOVER_NUMEXCEPTIONS:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
+; CHECK: [[NUMEXCEPTIONS_REGMEM:\%.+]] = bitcast i8* [[RECOVER_NUMEXCEPTIONS]] to i32*
+; CHECK: [[RECOVER_EXCEPTIONVAL:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
+; CHECK: [[EXCEPTIONVAL:\%.+]] = bitcast i8* [[RECOVER_EXCEPTIONVAL]] to [10 x i32]*
+; CHECK: [[RECOVER_INC:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 3)
+; CHECK: [[INC_REGMEM:\%.+]] = bitcast i8* [[RECOVER_INC]] to i32*
+; CHECK: [[RECOVER_I:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 4)
+; CHECK: [[I_REGMEM:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
+; CHECK: [[RECOVER_A:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 5)
+; CHECK: [[A_REGMEM:\%.+]] = bitcast i8* [[RECOVER_A]] to i32**
+; CHECK: [[RECOVER_B:\%.+]] = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 6)
+; CHECK: [[B_REGMEM:\%.+]] = bitcast i8* [[RECOVER_B]] to i32**
+; CHECK: [[E_I8PTR:\%.+]] = bitcast i32* [[E_PTR]] to i8*
+; CHECK: [[TMP:\%.+]] = load i32, i32* [[E_PTR]], align 4
+; CHECK: [[NUMEXCEPTIONS_RELOAD:\%.+]] = load i32, i32* [[NUMEXCEPTIONS_REGMEM]]
+; CHECK: [[IDXPROM:\%.+]] = sext i32 [[NUMEXCEPTIONS_RELOAD]] to i64
+; CHECK: [[ARRAYIDX:\%.+]] = getelementptr inbounds [10 x i32], [10 x i32]* [[EXCEPTIONVAL]], i64 0, i64 [[IDXPROM]]
+; CHECK: store i32 [[TMP]], i32* [[ARRAYIDX]], align 4
+; CHECK: [[NUMEXCEPTIONS_RELOAD:\%.+]] = load i32, i32* [[NUMEXCEPTIONS_REGMEM]]
+; CHECK: [[INC:\%.+]] = add nsw i32 [[NUMEXCEPTIONS_RELOAD]], 1
+; CHECK: [[CMP:\%.+]] = icmp eq i32 [[TMP]], [[I_RELOAD]]
+; CHECK: br i1 [[CMP]], label %if.then, label %if.else
+;
+; CHECK: if.then:{{[ ]+}}; preds = %entry
+; CHECK: [[B_RELOAD:\%.+]] = load i32*, i32** [[B_REGMEM]]
+; CHECK: [[TMP1:\%.+]] = load i32, i32* [[B_RELOAD]], align 4
+; CHECK: [[I_RELOAD:\%.+]] = load i32, i32* [[I_REGMEM]]
+; CHECK: [[ADD:\%.+]] = add nsw i32 [[TMP1]], [[I_RELOAD]]
+; CHECK: [[B_RELOAD:\%.+]] = load i32*, i32** [[B_REGMEM]]
+; CHECK: store i32 [[ADD]], i32* [[B_RELOAD]], align 4
+; CHECK: br label %if.end
+;
+; CHECK: if.else:{{[ ]+}}; preds = %entry
+; CHECK: [[A_RELOAD:\%.+]] = load i32*, i32** [[A_REGMEM]]
+; CHECK: [[TMP2:\%.+]] = load i32, i32* [[A_RELOAD]], align 8
+; CHECK: [[ADD2:\%.+]] = add nsw i32 [[TMP2]], [[TMP]]
+; CHECK: [[A_RELOAD:\%.+]] = load i32*, i32** [[A_REGMEM]]
+; CHECK: store i32 [[ADD2]], i32* [[A_RELOAD]], align 8
+; CHECK: br label %if.end
+;
+; CHECK: if.end:{{[ ]+}}; preds = %if.else, %if.then
+; CHECK: ret i8* blockaddress(@"\01?test@@YAXXZ", %[[SPLIT_RECOVER_BB]])
+; CHECK: }
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+declare void @"\01?may_throw@@YAXXZ"() #2
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #3
+
+declare void @llvm.eh.begincatch(i8*, i8*)
+
+declare void @llvm.eh.endcatch()
+
+declare void @"\01?does_not_throw@@YAXH@Z"(i32) #2
+
+declare void @"\01?dump@@YAXPEAHHAEAUSomeData@@@Z"(i32*, i32, %struct.SomeData* dereferenceable(8)) #2
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind readnone }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.7.0 (trunk 228868)"}
+!2 = !{!3, !4, i64 0}
+!3 = !{!"?AUSomeData@@", !4, i64 0, !4, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!4, !4, i64 0}
+!8 = !{!3, !4, i64 4}
diff --git a/test/CodeGen/WinEH/cppeh-prepared-catch-all.ll b/test/CodeGen/WinEH/cppeh-prepared-catch-all.ll
new file mode 100644
index 000000000000..f395d64c7b5e
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-prepared-catch-all.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+; This test case is equivalent to:
+; extern "C" void may_throw();
+; extern "C" void test_catch_all() {
+; try {
+; may_throw();
+; } catch (...) {
+; }
+; }
+
+declare void @may_throw() #1
+declare i32 @__CxxFrameHandler3(...)
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #2
+declare void @llvm.eh.endcatch() #2
+
+; Function Attrs: nounwind uwtable
+define void @test_catch_all() #0 {
+entry:
+ invoke void @may_throw()
+ to label %try.cont unwind label %lpad
+
+lpad: ; preds = %entry
+ %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* null
+ %1 = extractvalue { i8*, i32 } %0, 0
+ tail call void @llvm.eh.begincatch(i8* %1, i8* null) #2
+ tail call void @llvm.eh.endcatch() #2
+ br label %try.cont
+
+try.cont: ; preds = %entry, %lpad
+ ret void
+}
+
+; CHECK-LABEL: $handlerMap$0$test_catch_all:
+; CHECK: .long {{[0-9]+}}
+; CHECK: .long 0
+; CHECK: .long 0
+; CHECK: .long test_catch_all.catch@IMGREL
+; CHECK: .long .Ltest_catch_all.catch$parent_frame_offset
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
diff --git a/test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll b/test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll
new file mode 100644
index 000000000000..6383ca7f1883
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll
@@ -0,0 +1,164 @@
+; RUN: llc < %s | FileCheck %s
+
+; Verify that we get the right frame escape label when the catch comes after the
+; parent function.
+
+; This test case is equivalent to:
+; int main() {
+; try {
+; throw 42;
+; } catch (int e) {
+; printf("e: %d\n", e);
+; }
+; }
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
+%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
+%eh.ThrowInfo = type { i32, i32, i32, i32 }
+%eh.CatchHandlerType = type { i32, i8* }
+
+$"\01??_R0H@8" = comdat any
+
+$"_CT??_R0H@84" = comdat any
+
+$_CTA1H = comdat any
+
+$_TI1H = comdat any
+
+$"\01??_C@_06PNOAJMHG@e?3?5?$CFd?6?$AA@" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@__ImageBase = external constant i8
+@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
+@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
+@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
+@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
+@"\01??_C@_06PNOAJMHG@e?3?5?$CFd?6?$AA@" = linkonce_odr unnamed_addr constant [7 x i8] c"e: %d\0A\00", comdat, align 1
+
+declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+
+; Function Attrs: uwtable
+define i32 @main() #1 {
+entry:
+ %tmp.i = alloca i32, align 4
+ %e = alloca i32, align 4
+ %0 = bitcast i32* %tmp.i to i8*
+ store i32 42, i32* %tmp.i, align 4, !tbaa !2
+ call void (...) @llvm.frameescape(i32* %e)
+ invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #6
+ to label %.noexc unwind label %lpad1
+
+.noexc: ; preds = %entry
+ unreachable
+
+lpad1: ; preds = %entry
+ %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
+ %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 0, i8* (i8*, i8*)* @main.catch)
+ indirectbr i8* %recover, [label %try.cont.split]
+
+try.cont.split: ; preds = %lpad1
+ ret i32 0
+}
+
+; CHECK-LABEL: main:
+; CHECK: .seh_handlerdata
+; CHECK: .long ($cppxdata$main)@IMGREL
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
+
+; Function Attrs: nounwind
+declare i32 @printf(i8* nocapture readonly, ...) #4
+
+; Function Attrs: nounwind
+declare void @llvm.eh.endcatch() #3
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.eh.actions(...) #3
+
+define internal i8* @main.catch(i8*, i8*) #5 {
+entry:
+ %e.i8 = call i8* @llvm.framerecover(i8* bitcast (i32 ()* @main to i8*), i8* %1, i32 0)
+ %e = bitcast i8* %e.i8 to i32*
+ %2 = bitcast i32* %e to i8*
+ %3 = load i32, i32* %e, align 4, !tbaa !2
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @"\01??_C@_06PNOAJMHG@e?3?5?$CFd?6?$AA@", i64 0, i64 0), i32 %3)
+ invoke void @llvm.donothing()
+ to label %entry.split unwind label %stub
+
+entry.split: ; preds = %entry
+ ret i8* blockaddress(@main, %try.cont.split)
+
+stub: ; preds = %entry
+ %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ %recover = call i8* (...) @llvm.eh.actions()
+ unreachable
+}
+
+; CHECK-LABEL: main.catch:
+; CHECK: .seh_handlerdata
+; CHECK: .long ($cppxdata$main)@IMGREL
+
+; CHECK-NEXT: $cppxdata$main:
+; CHECK-NEXT: .long 429065506
+; CHECK-NEXT: .long 2
+; CHECK-NEXT: .long ($stateUnwindMap$main)@IMGREL
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long ($tryMap$main)@IMGREL
+; CHECK-NEXT: .long 3
+; CHECK-NEXT: .long ($ip2state$main)@IMGREL
+; CHECK-NEXT: .long 40
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 1
+
+; Make sure we get the right frame escape label.
+
+; CHECK: $handlerMap$0$main:
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long "??_R0H@8"@IMGREL
+; CHECK-NEXT: .long .Lmain$frame_escape_0
+; CHECK-NEXT: .long main.catch@IMGREL
+; CHECK-NEXT: .long .Lmain.catch$parent_frame_offset
+
+; Function Attrs: nounwind readnone
+declare void @llvm.donothing() #2
+
+; Function Attrs: nounwind
+declare void @llvm.frameescape(...) #3
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.framerecover(i8*, i8*, i32) #2
+
+attributes #0 = { noreturn uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="main" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+attributes #4 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #5 = { "wineh-parent"="main" }
+attributes #6 = { noreturn }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.7.0 "}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+
diff --git a/test/CodeGen/WinEH/cppeh-prepared-catch.ll b/test/CodeGen/WinEH/cppeh-prepared-catch.ll
new file mode 100644
index 000000000000..e7aaca86a882
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-prepared-catch.ll
@@ -0,0 +1,210 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+; This test case is equivalent to:
+; void f() {
+; try {
+; try {
+; may_throw();
+; } catch (int &) {
+; may_throw();
+; }
+; may_throw();
+; } catch (double) {
+; }
+; }
+
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchHandlerType = type { i32, i8* }
+
+$"\01??_R0N@8" = comdat any
+
+$"\01??_R0H@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0N@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".N\00" }, comdat
+@llvm.eh.handlertype.N.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0N@8" to i8*) }, section "llvm.metadata"
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@llvm.eh.handlertype.H.8 = private unnamed_addr constant %eh.CatchHandlerType { i32 8, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
+
+define internal i8* @"\01?f@@YAXXZ.catch"(i8*, i8*) #4 {
+entry:
+ %.i8 = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?f@@YAXXZ" to i8*), i8* %1, i32 0)
+ %bc2 = bitcast i8* %.i8 to i32**
+ %bc3 = bitcast i32** %bc2 to i8*
+ invoke void @"\01?may_throw@@YAXXZ"()
+ to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2: ; preds = %entry
+ ret i8* blockaddress(@"\01?f@@YAXXZ", %try.cont)
+
+lpad1: ; preds = %entry
+ %lp4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ catch %eh.CatchHandlerType* @llvm.eh.handlertype.N.0
+ %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.N.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch1")
+ indirectbr i8* %recover, [label %invoke.cont2]
+}
+
+; CHECK-LABEL: "?f@@YAXXZ.catch":
+; No code should be generated for the indirectbr.
+; CHECK-NOT: jmpq *
+; CHECK: .seh_handlerdata
+; CHECK: .long ("$cppxdata$?f@@YAXXZ")@IMGREL
+
+
+define internal i8* @"\01?f@@YAXXZ.catch1"(i8*, i8*) #4 {
+entry:
+ %.i8 = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?f@@YAXXZ" to i8*), i8* %1, i32 1)
+ %2 = bitcast i8* %.i8 to double*
+ %3 = bitcast double* %2 to i8*
+ invoke void (...) @llvm.donothing()
+ to label %done unwind label %lpad
+
+done:
+ ret i8* blockaddress(@"\01?f@@YAXXZ", %try.cont8)
+
+lpad: ; preds = %entry
+ %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ %recover = call i8* (...) @llvm.eh.actions()
+ unreachable
+}
+
+; CHECK-LABEL: "?f@@YAXXZ.catch1":
+; No code should be generated for the indirectbr.
+; CHECK-NOT: jmpq *
+; CHECK: ".L?f@@YAXXZ.catch1$parent_frame_offset" = 16
+; CHECK: movq %rdx, 16(%rsp)
+; CHECK: .seh_handlerdata
+; CHECK: .long ("$cppxdata$?f@@YAXXZ")@IMGREL
+
+define void @"\01?f@@YAXXZ"() #0 {
+entry:
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ %0 = alloca i32*, align 8
+ %1 = alloca double, align 8
+ call void (...) @llvm.frameescape(i32** %0, double* %1)
+ invoke void @"\01?may_throw@@YAXXZ"()
+ to label %invoke.cont unwind label %lpad2
+
+invoke.cont: ; preds = %entry
+ br label %try.cont
+
+lpad2: ; preds = %entry
+ %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.8
+ catch %eh.CatchHandlerType* @llvm.eh.handlertype.N.0
+ %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.8 to i8*), i32 0, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch", i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.N.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch1")
+ indirectbr i8* %recover, [label %try.cont, label %try.cont8]
+
+try.cont: ; preds = %lpad2, %invoke.cont
+ invoke void @"\01?may_throw@@YAXXZ"()
+ to label %try.cont8 unwind label %lpad1
+
+lpad1:
+ %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch %eh.CatchHandlerType* @llvm.eh.handlertype.N.0
+ %recover2 = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.N.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch1")
+ indirectbr i8* %recover2, [label %try.cont8]
+
+try.cont8: ; preds = %lpad2, %try.cont
+ ret void
+}
+
+; CHECK-LABEL: "?f@@YAXXZ":
+; No code should be generated for the indirectbr.
+; CHECK-NOT: jmpq *
+; CHECK: .seh_handlerdata
+; CHECK-NEXT: .long ("$cppxdata$?f@@YAXXZ")@IMGREL
+; CHECK-NEXT:"$cppxdata$?f@@YAXXZ":
+; CHECK-NEXT: .long 429065506
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .long ("$stateUnwindMap$?f@@YAXXZ")@IMGREL
+; CHECK-NEXT: .long 2
+; CHECK-NEXT: .long ("$tryMap$?f@@YAXXZ")@IMGREL
+; CHECK-NEXT: .long 6
+; CHECK-NEXT: .long ("$ip2state$?f@@YAXXZ")@IMGREL
+; CHECK-NEXT: .long 32
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 1
+; CHECK-NEXT:"$stateUnwindMap$?f@@YAXXZ":
+; CHECK-NEXT: .long -1
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long -1
+; CHECK-NEXT: .long 0
+; CHECK-NEXT:"$tryMap$?f@@YAXXZ":
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long 2
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long ("$handlerMap$0$?f@@YAXXZ")@IMGREL
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 2
+; CHECK-NEXT: .long 3
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long ("$handlerMap$1$?f@@YAXXZ")@IMGREL
+; CHECK-NEXT:"$handlerMap$0$?f@@YAXXZ":
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .long "??_R0H@8"@IMGREL
+; CHECK-NEXT: .long ".L?f@@YAXXZ$frame_escape_0"
+; CHECK-NEXT: .long "?f@@YAXXZ.catch"@IMGREL
+; CHECK-NEXT: .long ".L?f@@YAXXZ.catch$parent_frame_offset"
+; CHECK-NEXT:"$handlerMap$1$?f@@YAXXZ":
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long "??_R0N@8"@IMGREL
+; CHECK-NEXT: .long ".L?f@@YAXXZ$frame_escape_1"
+; CHECK-NEXT: .long "?f@@YAXXZ.catch1"@IMGREL
+; CHECK-NEXT: .long ".L?f@@YAXXZ.catch1$parent_frame_offset"
+; CHECK-NEXT:"$ip2state$?f@@YAXXZ":
+; CHECK-NEXT: .long .Lfunc_begin0@IMGREL
+; CHECK-NEXT: .long 2
+; CHECK-NEXT: .long .Ltmp0@IMGREL
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long .Lfunc_begin1@IMGREL
+; CHECK-NEXT: .long 3
+; CHECK-NEXT: .long .Lfunc_begin2@IMGREL
+; CHECK-NEXT: .long -1
+; CHECK-NEXT: .long .Ltmp13@IMGREL
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long .Ltmp16@IMGREL
+; CHECK-NEXT: .long 0
+
+
+declare void @"\01?may_throw@@YAXXZ"() #1
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
+
+; Function Attrs: nounwind
+declare void @llvm.eh.endcatch() #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.eh.actions(...) #3
+
+; Function Attrs: nounwind
+declare void @llvm.frameescape(...) #3
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.framerecover(i8*, i8*, i32) #2
+
+declare void @llvm.donothing(...)
+
+attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="?f@@YAXXZ" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+attributes #4 = { "wineh-parent"="?f@@YAXXZ" }
diff --git a/test/CodeGen/WinEH/cppeh-prepared-cleanups.ll b/test/CodeGen/WinEH/cppeh-prepared-cleanups.ll
new file mode 100644
index 000000000000..876cb53baba1
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-prepared-cleanups.ll
@@ -0,0 +1,243 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
+%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
+%eh.ThrowInfo = type { i32, i32, i32, i32 }
+%struct.S = type { i8 }
+
+$"\01??_DS@@QEAA@XZ" = comdat any
+
+$"\01??_R0H@8" = comdat any
+
+$"_CT??_R0H@84" = comdat any
+
+$_CTA1H = comdat any
+
+$_TI1H = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@__ImageBase = external constant i8
+@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
+@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
+@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
+
+
+; CHECK-LABEL: "?test1@@YAXXZ":
+; CHECK: .seh_handlerdata
+; CHECK-NEXT: .long ("$cppxdata$?test1@@YAXXZ")@IMGREL
+; CHECK-NEXT:"$cppxdata$?test1@@YAXXZ":
+; CHECK-NEXT: .long 429065506
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long ("$stateUnwindMap$?test1@@YAXXZ")@IMGREL
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 2
+; CHECK-NEXT: .long ("$ip2state$?test1@@YAXXZ")@IMGREL
+; CHECK-NEXT: .long 32
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 1
+; CHECK-NEXT:"$stateUnwindMap$?test1@@YAXXZ":
+; CHECK-NEXT: .long -1
+; CHECK-NEXT: .long "?test1@@YAXXZ.cleanup"@IMGREL
+; CHECK-NEXT:"$ip2state$?test1@@YAXXZ":
+; CHECK-NEXT: .long .Lfunc_begin0@IMGREL
+; CHECK-NEXT: .long -1
+; CHECK-NEXT: .long .Ltmp0@IMGREL
+; CHECK-NEXT: .long 0
+
+define void @"\01?test1@@YAXXZ"() #0 {
+entry:
+ %unwindhelp = alloca i64
+ %tmp = alloca i32, align 4
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ store i32 0, i32* %tmp
+ %0 = bitcast i32* %tmp to i8*
+ call void (...) @llvm.frameescape()
+ store volatile i64 -2, i64* %unwindhelp
+ %1 = bitcast i64* %unwindhelp to i8*
+ call void @llvm.eh.unwindhelp(i8* %1)
+ invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #8
+ to label %unreachable unwind label %lpad1
+
+lpad1: ; preds = %entry
+ %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ %recover = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test1@@YAXXZ.cleanup")
+ indirectbr i8* %recover, []
+
+unreachable: ; preds = %entry
+ unreachable
+}
+
+declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind
+define linkonce_odr void @"\01??_DS@@QEAA@XZ"(%struct.S* %this) unnamed_addr #1 comdat align 2 {
+entry:
+ %this.addr = alloca %struct.S*, align 8
+ store %struct.S* %this, %struct.S** %this.addr, align 8
+ %this1 = load %struct.S*, %struct.S** %this.addr
+ call void @"\01??1S@@QEAA@XZ"(%struct.S* %this1) #4
+ ret void
+}
+
+; CHECK-LABEL: "?test2@@YAX_N@Z":
+; CHECK: .seh_handlerdata
+; CHECK-NEXT: .long ("$cppxdata$?test2@@YAX_N@Z")@IMGREL
+; CHECK-NEXT:"$cppxdata$?test2@@YAX_N@Z":
+; CHECK-NEXT: .long 429065506
+; CHECK-NEXT: .long 2
+; CHECK-NEXT: .long ("$stateUnwindMap$?test2@@YAX_N@Z")@IMGREL
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .long ("$ip2state$?test2@@YAX_N@Z")@IMGREL
+; CHECK-NEXT: .long 40
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 1
+; CHECK-NEXT:"$stateUnwindMap$?test2@@YAX_N@Z":
+; CHECK-NEXT: .long -1
+; CHECK-NEXT: .long "?test2@@YAX_N@Z.cleanup"@IMGREL
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long "?test2@@YAX_N@Z.cleanup1"@IMGREL
+; CHECK-NEXT:"$ip2state$?test2@@YAX_N@Z":
+; CHECK-NEXT: .long .Lfunc_begin1@IMGREL
+; CHECK-NEXT: .long -1
+; CHECK-NEXT: .long .Ltmp7@IMGREL
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long .Ltmp9@IMGREL
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long .Ltmp12@IMGREL
+; CHECK-NEXT: .long 0
+
+define void @"\01?test2@@YAX_N@Z"(i1 zeroext %b) #2 {
+ %b.addr = alloca i8, align 1
+ %s = alloca %struct.S, align 1
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ %s1 = alloca %struct.S, align 1
+ %frombool = zext i1 %b to i8
+ store i8 %frombool, i8* %b.addr, align 1
+ call void (...) @llvm.frameescape(%struct.S* %s, %struct.S* %s1)
+ call void @"\01?may_throw@@YAXXZ"()
+ invoke void @"\01?may_throw@@YAXXZ"()
+ to label %invoke.cont unwind label %lpad1
+
+invoke.cont: ; preds = %entry
+ %1 = load i8, i8* %b.addr, align 1
+ %tobool = trunc i8 %1 to i1
+ br i1 %tobool, label %if.then, label %if.else
+
+if.then: ; preds = %invoke.cont
+ invoke void @"\01?may_throw@@YAXXZ"()
+ to label %invoke.cont3 unwind label %lpad3
+
+invoke.cont3: ; preds = %if.then
+ call void @"\01??_DS@@QEAA@XZ"(%struct.S* %s1) #4
+ br label %if.end
+
+lpad1: ; preds = %entry, %if.end
+ %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ %recover = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test2@@YAX_N@Z.cleanup")
+ indirectbr i8* %recover, []
+
+lpad3: ; preds = %if.then
+ %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ %recover4 = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test2@@YAX_N@Z.cleanup1", i32 0, void (i8*, i8*)* @"\01?test2@@YAX_N@Z.cleanup")
+ indirectbr i8* %recover4, []
+
+if.else: ; preds = %invoke.cont
+ call void @"\01?dont_throw@@YAXXZ"() #4
+ br label %if.end
+
+if.end: ; preds = %if.else, %invoke.cont3
+ invoke void @"\01?may_throw@@YAXXZ"()
+ to label %invoke.cont4 unwind label %lpad1
+
+invoke.cont4: ; preds = %if.end
+ call void @"\01??_DS@@QEAA@XZ"(%struct.S* %s) #4
+ ret void
+}
+
+declare void @"\01?may_throw@@YAXXZ"() #3
+
+; Function Attrs: nounwind
+declare void @"\01?dont_throw@@YAXXZ"() #1
+
+; Function Attrs: nounwind
+declare void @"\01??1S@@QEAA@XZ"(%struct.S*) #1
+
+; Function Attrs: nounwind
+declare i8* @llvm.eh.actions(...) #4
+
+define internal void @"\01?test1@@YAXXZ.cleanup"(i8*, i8*) #5 {
+entry:
+ %s = alloca %struct.S, align 1
+ call void @"\01??_DS@@QEAA@XZ"(%struct.S* %s) #4
+ ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.frameescape(...) #4
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.framerecover(i8*, i8*, i32) #6
+
+; Function Attrs: nounwind
+declare void @llvm.eh.unwindhelp(i8*) #4
+
+define internal void @"\01?test2@@YAX_N@Z.cleanup"(i8*, i8*) #7 {
+entry:
+ %s.i8 = call i8* @llvm.framerecover(i8* bitcast (void (i1)* @"\01?test2@@YAX_N@Z" to i8*), i8* %1, i32 0)
+ %s = bitcast i8* %s.i8 to %struct.S*
+ call void @"\01??_DS@@QEAA@XZ"(%struct.S* %s) #4
+ invoke void @llvm.donothing()
+ to label %entry.split unwind label %stub
+
+entry.split: ; preds = %entry
+ ret void
+
+stub: ; preds = %entry
+ %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ unreachable
+}
+
+define internal void @"\01?test2@@YAX_N@Z.cleanup1"(i8*, i8*) #7 {
+entry:
+ %s1.i8 = call i8* @llvm.framerecover(i8* bitcast (void (i1)* @"\01?test2@@YAX_N@Z" to i8*), i8* %1, i32 1)
+ %s1 = bitcast i8* %s1.i8 to %struct.S*
+ call void @"\01??_DS@@QEAA@XZ"(%struct.S* %s1) #4
+ invoke void @llvm.donothing()
+ to label %entry.split unwind label %stub
+
+entry.split: ; preds = %entry
+ ret void
+
+stub: ; preds = %entry
+ %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ unreachable
+}
+
+declare void @llvm.donothing()
+
+attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="?test1@@YAXXZ" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="?test2@@YAX_N@Z" }
+attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nounwind }
+attributes #5 = { "wineh-parent"="?test1@@YAXXZ" }
+attributes #6 = { nounwind readnone }
+attributes #7 = { "wineh-parent"="?test2@@YAX_N@Z" }
+attributes #8 = { noreturn }
diff --git a/test/CodeGen/WinEH/cppeh-shared-empty-catch.ll b/test/CodeGen/WinEH/cppeh-shared-empty-catch.ll
new file mode 100644
index 000000000000..dd99a092b201
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-shared-empty-catch.ll
@@ -0,0 +1,110 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
+
+; This test is based on the following source, built with -O2
+;
+; void f() {
+; try {
+; g();
+; try {
+; throw;
+; } catch (int) {
+; }
+; } catch (...) {
+; }
+; }
+;
+
+; ModuleID = '<stdin>'
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchHandlerType = type { i32, i8* }
+%eh.ThrowInfo = type { i32, i32, i32, i32 }
+
+$"\01??_R0H@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
+
+; CHECK-LABEL: define void @"\01?f@@YAXXZ"()
+; CHECK: entry:
+; CHECK: call void (...) @llvm.frameescape()
+; CHECK: invoke void @"\01?g@@YAXXZ"()
+
+; Function Attrs: nounwind
+define void @"\01?f@@YAXXZ"() #0 {
+entry:
+ invoke void @"\01?g@@YAXXZ"()
+ to label %invoke.cont unwind label %lpad
+
+; CHECK-LABEL: invoke.cont:
+; CHECK: invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null)
+; CHECK: to label %unreachable unwind label %[[LPAD1_LABEL:lpad[0-9]+]]
+
+invoke.cont: ; preds = %entry
+ invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #4
+ to label %unreachable unwind label %lpad1
+
+lpad: ; preds = %entry
+ %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* null
+ %1 = extractvalue { i8*, i32 } %0, 0
+ br label %catch2
+
+; Note: Even though this landing pad has two catch clauses, it only has one action because both
+; handlers do the same thing.
+; CHECK: [[LPAD1_LABEL]]:
+; CHECK: landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+; CHECK-NEXT: catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
+; CHECK-NEXT: catch i8* null
+; CHECK-NEXT: [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch")
+; CHECK-NEXT: indirectbr i8* [[RECOVER]], [label %try.cont4]
+
+lpad1: ; preds = %invoke.cont
+ %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
+ catch i8* null
+ %3 = extractvalue { i8*, i32 } %2, 0
+ br label %catch2
+
+catch2: ; preds = %lpad1, %lpad
+ %exn.slot.0 = phi i8* [ %3, %lpad1 ], [ %1, %lpad ]
+ tail call void @llvm.eh.begincatch(i8* %exn.slot.0, i8* null) #3
+ tail call void @llvm.eh.endcatch() #3
+ br label %try.cont4
+
+try.cont4: ; preds = %catch, %catch2
+ ret void
+
+unreachable: ; preds = %invoke.cont
+ unreachable
+
+; CHECK: }
+}
+
+declare void @"\01?g@@YAXXZ"() #1
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
+
+; Function Attrs: nounwind
+declare void @llvm.eh.endcatch() #3
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+attributes #4 = { noreturn }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.7.0 (trunk 235112) (llvm/trunk 235121)"}
diff --git a/test/CodeGen/WinEH/cppeh-similar-catch-blocks.ll b/test/CodeGen/WinEH/cppeh-similar-catch-blocks.ll
new file mode 100644
index 000000000000..81ee4542062d
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-similar-catch-blocks.ll
@@ -0,0 +1,394 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
+
+; This test is based on the following code:
+;
+; int main(void) {
+; try {
+; try {
+; throw 'a';
+; } catch (char c) {
+; printf("%c\n", c);
+; }
+; throw 1;
+; } catch(int x) {
+; printf("%d\n", x);
+; } catch(...) {
+; printf("...\n");
+; }
+; try {
+; try {
+; throw 'b';
+; } catch (char c) {
+; printf("%c\n", c);
+; }
+; throw 2;
+; } catch(int x) {
+; printf("%d\n", x);
+; } catch (char c) {
+; printf("%c\n", c);
+; } catch(...) {
+; printf("...\n");
+; }
+; return 0;
+; }
+
+; This test is just checking for failures in processing the IR.
+; Extensive handler matching is not required.
+
+; ModuleID = 'cppeh-similar-catch-blocks.cpp'
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchHandlerType = type { i32, i8* }
+%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
+%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
+%eh.ThrowInfo = type { i32, i32, i32, i32 }
+
+$"\01??_R0H@8" = comdat any
+
+$"\01??_R0D@8" = comdat any
+
+$"_CT??_R0D@81" = comdat any
+
+$_CTA1D = comdat any
+
+$_TI1D = comdat any
+
+$"\01??_C@_03PJCJOCBM@?$CFc?6?$AA@" = comdat any
+
+$"_CT??_R0H@84" = comdat any
+
+$_CTA1H = comdat any
+
+$_TI1H = comdat any
+
+$"\01??_C@_04MPPNMCOK@?4?4?4?6?$AA@" = comdat any
+
+$"\01??_C@_03PMGGPEJJ@?$CFd?6?$AA@" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
+@"\01??_R0D@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".D\00" }, comdat
+@llvm.eh.handlertype.D.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0D@8" to i8*) }, section "llvm.metadata"
+@__ImageBase = external constant i8
+@"_CT??_R0D@81" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0D@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 1, i32 0 }, section ".xdata", comdat
+@_CTA1D = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0D@81" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
+@_TI1D = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1D to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
+@"\01??_C@_03PJCJOCBM@?$CFc?6?$AA@" = linkonce_odr unnamed_addr constant [4 x i8] c"%c\0A\00", comdat, align 1
+@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
+@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
+@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
+@"\01??_C@_04MPPNMCOK@?4?4?4?6?$AA@" = linkonce_odr unnamed_addr constant [5 x i8] c"...\0A\00", comdat, align 1
+@"\01??_C@_03PMGGPEJJ@?$CFd?6?$AA@" = linkonce_odr unnamed_addr constant [4 x i8] c"%d\0A\00", comdat, align 1
+
+; This is just a minimal check to verify that main was handled by WinEHPrepare.
+; CHECK: define i32 @main()
+; CHECK: entry:
+; CHECK: call void (...) @llvm.frameescape(i32* [[X_PTR:\%.+]], i32* [[X2_PTR:\%.+]], i8* [[C2_PTR:\%.+]], i8* [[C3_PTR:\%.+]], i8* [[C_PTR:\%.+]])
+; CHECK: invoke void @_CxxThrowException
+; CHECK: }
+
+; Function Attrs: uwtable
+define i32 @main() #0 {
+entry:
+ %retval = alloca i32, align 4
+ %tmp = alloca i8, align 1
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ %c = alloca i8, align 1
+ %tmp3 = alloca i32, align 4
+ %x = alloca i32, align 4
+ %tmp20 = alloca i8, align 1
+ %c28 = alloca i8, align 1
+ %tmp34 = alloca i32, align 4
+ %c48 = alloca i8, align 1
+ %x56 = alloca i32, align 4
+ store i32 0, i32* %retval
+ store i8 97, i8* %tmp
+ invoke void @_CxxThrowException(i8* %tmp, %eh.ThrowInfo* @_TI1D) #4
+ to label %unreachable unwind label %lpad
+
+lpad: ; preds = %entry
+ %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch %eh.CatchHandlerType* @llvm.eh.handlertype.D.0
+ catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
+ catch i8* null
+ %1 = extractvalue { i8*, i32 } %0, 0
+ store i8* %1, i8** %exn.slot
+ %2 = extractvalue { i8*, i32 } %0, 1
+ store i32 %2, i32* %ehselector.slot
+ br label %catch.dispatch
+
+catch.dispatch: ; preds = %lpad
+ %sel = load i32, i32* %ehselector.slot
+ %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*)) #2
+ %matches = icmp eq i32 %sel, %3
+ br i1 %matches, label %catch, label %catch.dispatch5
+
+catch: ; preds = %catch.dispatch
+ %exn = load i8*, i8** %exn.slot
+ call void @llvm.eh.begincatch(i8* %exn, i8* %c) #2
+ %4 = load i8, i8* %c, align 1
+ %conv = sext i8 %4 to i32
+ %call = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01??_C@_03PJCJOCBM@?$CFc?6?$AA@", i32 0, i32 0), i32 %conv)
+ to label %invoke.cont unwind label %lpad2
+
+invoke.cont: ; preds = %catch
+ call void @llvm.eh.endcatch() #2
+ br label %try.cont
+
+try.cont: ; preds = %invoke.cont
+ store i32 1, i32* %tmp3
+ %5 = bitcast i32* %tmp3 to i8*
+ invoke void @_CxxThrowException(i8* %5, %eh.ThrowInfo* @_TI1H) #4
+ to label %unreachable unwind label %lpad4
+
+lpad2: ; preds = %catch
+ %6 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
+ catch i8* null
+ %7 = extractvalue { i8*, i32 } %6, 0
+ store i8* %7, i8** %exn.slot
+ %8 = extractvalue { i8*, i32 } %6, 1
+ store i32 %8, i32* %ehselector.slot
+ call void @llvm.eh.endcatch() #2
+ br label %catch.dispatch5
+
+lpad4: ; preds = %try.cont
+ %9 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
+ catch i8* null
+ %10 = extractvalue { i8*, i32 } %9, 0
+ store i8* %10, i8** %exn.slot
+ %11 = extractvalue { i8*, i32 } %9, 1
+ store i32 %11, i32* %ehselector.slot
+ br label %catch.dispatch5
+
+catch.dispatch5: ; preds = %lpad4, %lpad2, %catch.dispatch
+ %sel6 = load i32, i32* %ehselector.slot
+ %12 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)) #2
+ %matches7 = icmp eq i32 %sel6, %12
+ br i1 %matches7, label %catch13, label %catch8
+
+catch13: ; preds = %catch.dispatch5
+ %exn14 = load i8*, i8** %exn.slot
+ %13 = bitcast i32* %x to i8*
+ call void @llvm.eh.begincatch(i8* %exn14, i8* %13) #2
+ %14 = load i32, i32* %x, align 4
+ %call18 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01??_C@_03PMGGPEJJ@?$CFd?6?$AA@", i32 0, i32 0), i32 %14)
+ to label %invoke.cont17 unwind label %lpad16
+
+invoke.cont17: ; preds = %catch13
+ call void @llvm.eh.endcatch() #2
+ br label %try.cont19
+
+try.cont19: ; preds = %invoke.cont17, %invoke.cont11
+ store i8 98, i8* %tmp20
+ invoke void @_CxxThrowException(i8* %tmp20, %eh.ThrowInfo* @_TI1D) #4
+ to label %unreachable unwind label %lpad21
+
+catch8: ; preds = %catch.dispatch5
+ %exn9 = load i8*, i8** %exn.slot
+ call void @llvm.eh.begincatch(i8* %exn9, i8* null) #2
+ %call12 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @"\01??_C@_04MPPNMCOK@?4?4?4?6?$AA@", i32 0, i32 0))
+ to label %invoke.cont11 unwind label %lpad10
+
+invoke.cont11: ; preds = %catch8
+ call void @llvm.eh.endcatch() #2
+ br label %try.cont19
+
+lpad10: ; preds = %catch8
+ %15 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ %16 = extractvalue { i8*, i32 } %15, 0
+ store i8* %16, i8** %exn.slot
+ %17 = extractvalue { i8*, i32 } %15, 1
+ store i32 %17, i32* %ehselector.slot
+ call void @llvm.eh.endcatch() #2
+ br label %eh.resume
+
+lpad16: ; preds = %catch13
+ %18 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ %19 = extractvalue { i8*, i32 } %18, 0
+ store i8* %19, i8** %exn.slot
+ %20 = extractvalue { i8*, i32 } %18, 1
+ store i32 %20, i32* %ehselector.slot
+ call void @llvm.eh.endcatch() #2
+ br label %eh.resume
+
+lpad21: ; preds = %try.cont19
+ %21 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*)
+ catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)
+ catch i8* null
+ %22 = extractvalue { i8*, i32 } %21, 0
+ store i8* %22, i8** %exn.slot
+ %23 = extractvalue { i8*, i32 } %21, 1
+ store i32 %23, i32* %ehselector.slot
+ br label %catch.dispatch22
+
+catch.dispatch22: ; preds = %lpad21
+ %sel23 = load i32, i32* %ehselector.slot
+ %24 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*)) #2
+ %matches24 = icmp eq i32 %sel23, %24
+ br i1 %matches24, label %catch25, label %catch.dispatch36
+
+catch25: ; preds = %catch.dispatch22
+ %exn26 = load i8*, i8** %exn.slot
+ call void @llvm.eh.begincatch(i8* %exn26, i8* %c28) #2
+ %25 = load i8, i8* %c28, align 1
+ %conv29 = sext i8 %25 to i32
+ %call32 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01??_C@_03PJCJOCBM@?$CFc?6?$AA@", i32 0, i32 0), i32 %conv29)
+ to label %invoke.cont31 unwind label %lpad30
+
+invoke.cont31: ; preds = %catch25
+ call void @llvm.eh.endcatch() #2
+ br label %try.cont33
+
+try.cont33: ; preds = %invoke.cont31
+ store i32 2, i32* %tmp34
+ %26 = bitcast i32* %tmp34 to i8*
+ invoke void @_CxxThrowException(i8* %26, %eh.ThrowInfo* @_TI1H) #4
+ to label %unreachable unwind label %lpad35
+
+lpad30: ; preds = %catch25
+ %27 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)
+ catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*)
+ catch i8* null
+ %28 = extractvalue { i8*, i32 } %27, 0
+ store i8* %28, i8** %exn.slot
+ %29 = extractvalue { i8*, i32 } %27, 1
+ store i32 %29, i32* %ehselector.slot
+ call void @llvm.eh.endcatch() #2
+ br label %catch.dispatch36
+
+lpad35: ; preds = %try.cont33
+ %30 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)
+ catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*)
+ catch i8* null
+ %31 = extractvalue { i8*, i32 } %30, 0
+ store i8* %31, i8** %exn.slot
+ %32 = extractvalue { i8*, i32 } %30, 1
+ store i32 %32, i32* %ehselector.slot
+ br label %catch.dispatch36
+
+catch.dispatch36: ; preds = %lpad35, %lpad30, %catch.dispatch22
+ %sel37 = load i32, i32* %ehselector.slot
+ %33 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)) #2
+ %matches38 = icmp eq i32 %sel37, %33
+ br i1 %matches38, label %catch53, label %catch.fallthrough
+
+catch53: ; preds = %catch.dispatch36
+ %exn54 = load i8*, i8** %exn.slot
+ %34 = bitcast i32* %x56 to i8*
+ call void @llvm.eh.begincatch(i8* %exn54, i8* %34) #2
+ %35 = load i32, i32* %x56, align 4
+ %call59 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01??_C@_03PMGGPEJJ@?$CFd?6?$AA@", i32 0, i32 0), i32 %35)
+ to label %invoke.cont58 unwind label %lpad57
+
+invoke.cont58: ; preds = %catch53
+ call void @llvm.eh.endcatch() #2
+ br label %try.cont60
+
+try.cont60: ; preds = %invoke.cont58, %invoke.cont51, %invoke.cont43
+ ret i32 0
+
+catch.fallthrough: ; preds = %catch.dispatch36
+ %36 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*)) #2
+ %matches39 = icmp eq i32 %sel37, %36
+ br i1 %matches39, label %catch45, label %catch40
+
+catch45: ; preds = %catch.fallthrough
+ %exn46 = load i8*, i8** %exn.slot
+ call void @llvm.eh.begincatch(i8* %exn46, i8* %c48) #2
+ %37 = load i8, i8* %c48, align 1
+ %conv49 = sext i8 %37 to i32
+ %call52 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01??_C@_03PJCJOCBM@?$CFc?6?$AA@", i32 0, i32 0), i32 %conv49)
+ to label %invoke.cont51 unwind label %lpad50
+
+invoke.cont51: ; preds = %catch45
+ call void @llvm.eh.endcatch() #2
+ br label %try.cont60
+
+catch40: ; preds = %catch.fallthrough
+ %exn41 = load i8*, i8** %exn.slot
+ call void @llvm.eh.begincatch(i8* %exn41, i8* null) #2
+ %call44 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @"\01??_C@_04MPPNMCOK@?4?4?4?6?$AA@", i32 0, i32 0))
+ to label %invoke.cont43 unwind label %lpad42
+
+invoke.cont43: ; preds = %catch40
+ call void @llvm.eh.endcatch() #2
+ br label %try.cont60
+
+lpad42: ; preds = %catch40
+ %38 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ %39 = extractvalue { i8*, i32 } %38, 0
+ store i8* %39, i8** %exn.slot
+ %40 = extractvalue { i8*, i32 } %38, 1
+ store i32 %40, i32* %ehselector.slot
+ call void @llvm.eh.endcatch() #2
+ br label %eh.resume
+
+lpad50: ; preds = %catch45
+ %41 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ %42 = extractvalue { i8*, i32 } %41, 0
+ store i8* %42, i8** %exn.slot
+ %43 = extractvalue { i8*, i32 } %41, 1
+ store i32 %43, i32* %ehselector.slot
+ call void @llvm.eh.endcatch() #2
+ br label %eh.resume
+
+lpad57: ; preds = %catch53
+ %44 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ %45 = extractvalue { i8*, i32 } %44, 0
+ store i8* %45, i8** %exn.slot
+ %46 = extractvalue { i8*, i32 } %44, 1
+ store i32 %46, i32* %ehselector.slot
+ call void @llvm.eh.endcatch() #2
+ br label %eh.resume
+
+eh.resume: ; preds = %lpad57, %lpad50, %lpad42, %lpad16, %lpad10
+ %exn61 = load i8*, i8** %exn.slot
+ %sel62 = load i32, i32* %ehselector.slot
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn61, 0
+ %lpad.val63 = insertvalue { i8*, i32 } %lpad.val, i32 %sel62, 1
+ resume { i8*, i32 } %lpad.val63
+
+unreachable: ; preds = %try.cont33, %try.cont19, %try.cont, %entry
+ unreachable
+}
+
+declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #1
+
+; Function Attrs: nounwind
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #2
+
+declare i32 @printf(i8*, ...) #3
+
+; Function Attrs: nounwind
+declare void @llvm.eh.endcatch() #2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { noreturn }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.7.0 (trunk 235214) (llvm/trunk 235213)"}
diff --git a/test/CodeGen/WinEH/cppeh-state-calc-1.ll b/test/CodeGen/WinEH/cppeh-state-calc-1.ll
new file mode 100644
index 000000000000..3549b1d51dee
--- /dev/null
+++ b/test/CodeGen/WinEH/cppeh-state-calc-1.ll
@@ -0,0 +1,289 @@
+; RUN: llc < %s | FileCheck %s
+
+; This test was generated from the following code.
+;
+; void test() {
+; try {
+; try {
+; try {
+; two();
+; throw 2;
+; } catch (int x) {
+; catch_two();
+; }
+; a();
+; throw 'a';
+; } catch (char c) {
+; catch_a();
+; }
+; one();
+; throw 1;
+; } catch(int x) {
+; catch_one();
+; } catch(...) {
+; catch_all();
+; }
+; }
+;
+; The function calls before the throws were declared as 'noexcept' and are
+; just here to make blocks easier to identify in the IR.
+
+; ModuleID = '<stdin>'
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchHandlerType = type { i32, i8* }
+%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
+%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
+%eh.ThrowInfo = type { i32, i32, i32, i32 }
+
+$"\01??_R0H@8" = comdat any
+
+$"\01??_R0D@8" = comdat any
+
+$"_CT??_R0H@84" = comdat any
+
+$_CTA1H = comdat any
+
+$_TI1H = comdat any
+
+$"_CT??_R0D@81" = comdat any
+
+$_CTA1D = comdat any
+
+$_TI1D = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
+@"\01??_R0D@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".D\00" }, comdat
+@llvm.eh.handlertype.D.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0D@8" to i8*) }, section "llvm.metadata"
+@__ImageBase = external constant i8
+@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
+@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
+@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
+@"_CT??_R0D@81" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0D@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 1, i32 0 }, section ".xdata", comdat
+@_CTA1D = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0D@81" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
+@_TI1D = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1D to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
+
+; Function Attrs: nounwind uwtable
+define void @"\01?test@@YAXXZ"() #0 {
+entry:
+ %tmp = alloca i32, align 4
+ %x = alloca i32, align 4
+ %tmp2 = alloca i8, align 1
+ %c = alloca i8, align 1
+ %tmp11 = alloca i32, align 4
+ %x21 = alloca i32, align 4
+ call void @"\01?two@@YAXXZ"() #3
+ store i32 2, i32* %tmp
+ %0 = bitcast i32* %tmp to i8*
+ call void (...) @llvm.frameescape(i32* %x, i8* %c, i32* %x21)
+ invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #5
+ to label %unreachable unwind label %lpad
+
+lpad: ; preds = %entry
+ %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)
+ catch %eh.CatchHandlerType* @llvm.eh.handlertype.D.0
+ catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
+ catch i8* null
+ %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch", i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch1", i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch2", i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch3")
+ indirectbr i8* %recover, [label %try.cont, label %try.cont10, label %try.cont22]
+
+try.cont: ; preds = %lpad
+ call void @"\01?a@@YAXXZ"() #3
+ store i8 97, i8* %tmp2
+ invoke void @_CxxThrowException(i8* %tmp2, %eh.ThrowInfo* @_TI1D) #5
+ to label %unreachable unwind label %lpad3
+
+lpad3: ; preds = %try.cont
+ %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch %eh.CatchHandlerType* @llvm.eh.handlertype.D.0
+ catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
+ catch i8* null
+ %recover1 = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch1", i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch2", i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch3")
+ indirectbr i8* %recover1, [label %try.cont10, label %try.cont22]
+
+try.cont10: ; preds = %lpad3, %lpad
+ call void @"\01?one@@YAXXZ"() #3
+ store i32 1, i32* %tmp11
+ %3 = bitcast i32* %tmp11 to i8*
+ invoke void @_CxxThrowException(i8* %3, %eh.ThrowInfo* @_TI1H) #5
+ to label %unreachable unwind label %lpad12
+
+lpad12: ; preds = %try.cont10
+ %4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
+ catch i8* null
+ %recover2 = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch2", i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch3")
+ indirectbr i8* %recover2, [label %try.cont22]
+
+try.cont22: ; preds = %lpad12, %lpad3, %lpad
+ ret void
+
+unreachable: ; preds = %try.cont10, %try.cont, %entry
+ unreachable
+}
+
+; Function Attrs: nounwind
+declare void @"\01?two@@YAXXZ"() #1
+
+declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #2
+
+; Function Attrs: nounwind
+declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
+
+; Function Attrs: nounwind
+declare void @"\01?catch_two@@YAXXZ"() #1
+
+; Function Attrs: nounwind
+declare void @llvm.eh.endcatch() #3
+
+; Function Attrs: nounwind
+declare void @"\01?a@@YAXXZ"() #1
+
+; Function Attrs: nounwind
+declare void @"\01?catch_a@@YAXXZ"() #1
+
+; Function Attrs: nounwind
+declare void @"\01?one@@YAXXZ"() #1
+
+; Function Attrs: nounwind
+declare void @"\01?catch_all@@YAXXZ"() #1
+
+; Function Attrs: nounwind
+declare void @"\01?catch_one@@YAXXZ"() #1
+
+; Function Attrs: nounwind
+declare i8* @llvm.eh.actions(...) #3
+
+define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*) #4 {
+entry:
+ %x.i8 = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
+ %x = bitcast i8* %x.i8 to i32*
+ %2 = bitcast i32* %x to i8*
+ call void @"\01?catch_two@@YAXXZ"() #3
+ invoke void @llvm.donothing()
+ to label %entry.split unwind label %stub
+
+entry.split: ; preds = %entry
+ ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont)
+
+stub: ; preds = %entry
+ %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ %recover = call i8* (...) @llvm.eh.actions()
+ unreachable
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.donothing() #2
+
+define internal i8* @"\01?test@@YAXXZ.catch1"(i8*, i8*) #4 {
+entry:
+ call void @"\01?catch_a@@YAXXZ"() #3
+ invoke void @llvm.donothing()
+ to label %entry.split unwind label %stub
+
+entry.split: ; preds = %entry
+ ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont10)
+
+stub: ; preds = %entry
+ %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ %recover = call i8* (...) @llvm.eh.actions()
+ unreachable
+}
+
+define internal i8* @"\01?test@@YAXXZ.catch2"(i8*, i8*) #4 {
+entry:
+ %x21.i8 = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
+ %x21 = bitcast i8* %x21.i8 to i32*
+ %2 = bitcast i32* %x21 to i8*
+ call void @"\01?catch_one@@YAXXZ"() #3
+ invoke void @llvm.donothing()
+ to label %entry.split unwind label %stub
+
+entry.split: ; preds = %entry
+ ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont22)
+
+stub: ; preds = %entry
+ %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ %recover = call i8* (...) @llvm.eh.actions()
+ unreachable
+}
+
+define internal i8* @"\01?test@@YAXXZ.catch3"(i8*, i8*) #4 {
+entry:
+ call void @"\01?catch_all@@YAXXZ"() #3
+ invoke void @llvm.donothing()
+ to label %entry.split unwind label %stub
+
+entry.split: ; preds = %entry
+ ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont22)
+
+stub: ; preds = %entry
+ %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
+ cleanup
+ %recover = call i8* (...) @llvm.eh.actions()
+ unreachable
+}
+
+; Function Attrs: nounwind
+declare void @llvm.frameescape(...) #3
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.framerecover(i8*, i8*, i32) #2
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="?test@@YAXXZ" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+attributes #4 = { "wineh-parent"="?test@@YAXXZ" }
+attributes #5 = { noreturn }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.7.0 (trunk 236059)"}
+
+; CHECK-LABEL: "$cppxdata$?test@@YAXXZ":
+; CHECK-NEXT: .long 429065506
+; CHECK-NEXT: .long
+; CHECK-NEXT: .long ("$stateUnwindMap$?test@@YAXXZ")@IMGREL
+; CHECK-NEXT: .long
+; CHECK-NEXT: .long ("$tryMap$?test@@YAXXZ")@IMGREL
+; CHECK-NEXT: .long
+; CHECK-NEXT: .long ("$ip2state$?test@@YAXXZ")@IMGREL
+; CHECK-NEXT: .long 40
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 1
+; CHECK: "$stateUnwindMap$?test@@YAXXZ":
+; CHECK: "$tryMap$?test@@YAXXZ":
+; CHECK: "$handlerMap$0$?test@@YAXXZ":
+; CHECK: "$ip2state$?test@@YAXXZ":
+; CHECK-NEXT: .long .Lfunc_begin0@IMGREL
+; CHECK-NEXT: .long -1
+; CHECK-NEXT: .long .Ltmp0@IMGREL
+; CHECK-NEXT: .long 2
+; CHECK-NEXT: .long .Ltmp3@IMGREL
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long .Ltmp6@IMGREL
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long .Lfunc_begin1@IMGREL
+; CHECK-NEXT: .long 3
+; CHECK-NEXT: .long .Lfunc_begin2@IMGREL
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .long .Lfunc_begin3@IMGREL
+; CHECK-NEXT: .long 5
+; CHECK-NEXT: .long .Lfunc_begin4@IMGREL
+; CHECK-NEXT: .long 6
diff --git a/test/CodeGen/WinEH/lit.local.cfg b/test/CodeGen/WinEH/lit.local.cfg
new file mode 100644
index 000000000000..67905d7e06e6
--- /dev/null
+++ b/test/CodeGen/WinEH/lit.local.cfg
@@ -0,0 +1,12 @@
+# FIXME: For now, override suffixes to exclude any .s tests, because some of the
+# buildbots have a stray misched-copy.s output file lying around that causes
+# failures. See misched-copy.s where we try and clean up that file.
+#
+# It should be possible to remove this override once all the bots have cycled
+# cleanly.
+config.suffixes = ['.ll', '.test', '.txt']
+
+# FIXME: Add Windows on ARM support to these tests.
+if not 'X86' in config.root.targets:
+ config.unsupported = True
+
diff --git a/test/CodeGen/WinEH/seh-catch-all.ll b/test/CodeGen/WinEH/seh-catch-all.ll
new file mode 100644
index 000000000000..c2a652b80990
--- /dev/null
+++ b/test/CodeGen/WinEH/seh-catch-all.ll
@@ -0,0 +1,59 @@
+; RUN: opt -S -winehprepare < %s | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+@str.__except = internal unnamed_addr constant [9 x i8] c"__except\00", align 1
+
+; Function Attrs: uwtable
+
+declare i32 @puts(i8*)
+
+define void @may_crash() {
+entry:
+ store volatile i32 42, i32* null, align 4
+ ret void
+}
+
+declare i32 @__C_specific_handler(...)
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.frameaddress(i32)
+
+; Function Attrs: uwtable
+define void @seh_catch_all() {
+entry:
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ invoke void @may_crash()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %entry
+ br label %__try.cont
+
+lpad: ; preds = %entry
+ %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+ catch i8* null
+ %1 = extractvalue { i8*, i32 } %0, 0
+ store i8* %1, i8** %exn.slot
+ %2 = extractvalue { i8*, i32 } %0, 1
+ store i32 %2, i32* %ehselector.slot
+ br label %__except
+
+__except: ; preds = %lpad
+ %call = call i32 @puts(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @str.__except, i32 0, i32 0))
+ br label %__try.cont
+
+__try.cont: ; preds = %__except, %invoke.cont
+ ret void
+}
+
+; CHECK-LABEL: define void @seh_catch_all()
+; CHECK: landingpad
+; CHECK-NEXT: catch i8* null
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 1, i8* null, i32 -1, i8* blockaddress(@seh_catch_all, %lpad.split))
+; CHECK-NEXT: indirectbr
+;
+; CHECK: lpad.split:
+; CHECK-NOT: extractvalue
+; CHECK: call i32 @puts
diff --git a/test/CodeGen/WinEH/seh-inlined-finally.ll b/test/CodeGen/WinEH/seh-inlined-finally.ll
new file mode 100644
index 000000000000..d2080cff79d4
--- /dev/null
+++ b/test/CodeGen/WinEH/seh-inlined-finally.ll
@@ -0,0 +1,83 @@
+; RUN: opt -S -winehprepare < %s | FileCheck %s
+
+; Check that things work when the mid-level optimizer inlines the finally
+; block.
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%struct._RTL_CRITICAL_SECTION = type { %struct._RTL_CRITICAL_SECTION_DEBUG*, i32, i32, i8*, i8*, i64 }
+%struct._RTL_CRITICAL_SECTION_DEBUG = type { i16, i16, %struct._RTL_CRITICAL_SECTION*, %struct._LIST_ENTRY, i32, i32, i32, i16, i16 }
+%struct._LIST_ENTRY = type { %struct._LIST_ENTRY*, %struct._LIST_ENTRY* }
+
+declare i32 @puts(i8*)
+declare void @may_crash()
+declare i32 @__C_specific_handler(...)
+declare i8* @llvm.framerecover(i8*, i8*, i32) #1
+declare i8* @llvm.frameaddress(i32)
+declare void @llvm.frameescape(...)
+declare dllimport void @EnterCriticalSection(%struct._RTL_CRITICAL_SECTION*)
+declare dllimport void @LeaveCriticalSection(%struct._RTL_CRITICAL_SECTION*)
+
+define void @use_finally() {
+entry:
+ invoke void @may_crash()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %entry
+ %call.i = tail call i32 @puts(i8* null)
+ ret void
+
+lpad: ; preds = %entry
+ %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+ cleanup
+ %call.i2 = tail call i32 @puts(i8* null)
+ resume { i8*, i32 } %0
+}
+
+; CHECK-LABEL: define void @use_finally()
+; CHECK: invoke void @may_crash()
+;
+; CHECK: landingpad
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @use_finally.cleanup)
+; CHECK-NEXT: indirectbr i8* %recover, []
+
+; Function Attrs: nounwind uwtable
+define i32 @call_may_crash_locked() {
+entry:
+ %p = alloca %struct._RTL_CRITICAL_SECTION, align 8
+ call void (...) @llvm.frameescape(%struct._RTL_CRITICAL_SECTION* %p)
+ call void @EnterCriticalSection(%struct._RTL_CRITICAL_SECTION* %p)
+ invoke void @may_crash()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %entry
+ %tmp2 = call i8* @llvm.frameaddress(i32 0)
+ %tmp3 = call i8* @llvm.framerecover(i8* bitcast (i32 ()* @call_may_crash_locked to i8*), i8* %tmp2, i32 0) #2
+ %tmp6 = bitcast i8* %tmp3 to %struct._RTL_CRITICAL_SECTION*
+ call void @LeaveCriticalSection(%struct._RTL_CRITICAL_SECTION* %tmp6)
+ ret i32 42
+
+lpad: ; preds = %entry
+ %tmp7 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+ cleanup
+ %tmp8 = call i8* @llvm.frameaddress(i32 0)
+ %tmp9 = call i8* @llvm.framerecover(i8* bitcast (i32 ()* @call_may_crash_locked to i8*), i8* %tmp8, i32 0)
+ %tmp12 = bitcast i8* %tmp9 to %struct._RTL_CRITICAL_SECTION*
+ call void @LeaveCriticalSection(%struct._RTL_CRITICAL_SECTION* %tmp12)
+ resume { i8*, i32 } %tmp7
+}
+
+; CHECK-LABEL: define i32 @call_may_crash_locked()
+; CHECK: invoke void @may_crash()
+;
+; CHECK: landingpad
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @call_may_crash_locked.cleanup)
+; CHECK-NEXT: indirectbr i8* %recover, []
+
+; CHECK-LABEL: define internal void @call_may_crash_locked.cleanup(i8*, i8*)
+; CHECK: %tmp9 = call i8* @llvm.framerecover(i8* bitcast (i32 ()* @call_may_crash_locked to i8*), i8* %1, i32 0)
+; CHECK: %tmp12 = bitcast i8* %tmp9 to %struct._RTL_CRITICAL_SECTION*
+; CHECK: call void @LeaveCriticalSection(%struct._RTL_CRITICAL_SECTION* %tmp12)
diff --git a/test/CodeGen/WinEH/seh-outlined-finally.ll b/test/CodeGen/WinEH/seh-outlined-finally.ll
new file mode 100644
index 000000000000..19558b705308
--- /dev/null
+++ b/test/CodeGen/WinEH/seh-outlined-finally.ll
@@ -0,0 +1,155 @@
+; RUN: opt -S -winehprepare -mtriple=x86_64-windows-msvc < %s | FileCheck %s
+
+; Test case based on this code:
+;
+; extern "C" int _abnormal_termination();
+; #pragma intrinsic(_abnormal_termination)
+; extern "C" int printf(const char *, ...);
+; extern "C" void may_crash() {
+; *(volatile int *)0 = 42;
+; }
+; int main() {
+; int myres = 0;
+; __try {
+; __try {
+; may_crash();
+; } __finally {
+; printf("inner finally %d\n", _abnormal_termination());
+; may_crash();
+; }
+; } __finally {
+; printf("outer finally %d\n", _abnormal_termination());
+; }
+; }
+;
+; Note that if the inner finally crashes, the outer finally still runs. There
+; is nothing like a std::terminate call in this situation.
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+@str_outer_finally = linkonce_odr unnamed_addr constant [18 x i8] c"outer finally %d\0A\00", align 1
+@str_inner_finally = linkonce_odr unnamed_addr constant [18 x i8] c"inner finally %d\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define void @may_crash() #0 {
+entry:
+ store volatile i32 42, i32* null, align 4
+ ret void
+}
+
+; Function Attrs: uwtable
+define i32 @main() #1 {
+entry:
+ %myres = alloca i32, align 4
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ store i32 0, i32* %myres, align 4
+ invoke void @may_crash() #4
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %entry
+ %0 = call i8* @llvm.frameaddress(i32 0)
+ invoke void @"\01?fin$1@0@main@@"(i1 zeroext false, i8* %0) #4
+ to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2: ; preds = %invoke.cont
+ %1 = call i8* @llvm.frameaddress(i32 0)
+ call void @"\01?fin$0@0@main@@"(i1 zeroext false, i8* %1)
+ ret i32 0
+
+lpad: ; preds = %entry
+ %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+ cleanup
+ %3 = extractvalue { i8*, i32 } %2, 0
+ store i8* %3, i8** %exn.slot
+ %4 = extractvalue { i8*, i32 } %2, 1
+ store i32 %4, i32* %ehselector.slot
+ %5 = call i8* @llvm.frameaddress(i32 0)
+ invoke void @"\01?fin$1@0@main@@"(i1 zeroext true, i8* %5) #4
+ to label %invoke.cont3 unwind label %lpad1
+
+lpad1: ; preds = %lpad, %invoke.cont
+ %6 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+ cleanup
+ %7 = extractvalue { i8*, i32 } %6, 0
+ store i8* %7, i8** %exn.slot
+ %8 = extractvalue { i8*, i32 } %6, 1
+ store i32 %8, i32* %ehselector.slot
+ br label %ehcleanup
+
+invoke.cont3: ; preds = %lpad
+ br label %ehcleanup
+
+ehcleanup: ; preds = %invoke.cont3, %lpad1
+ %9 = call i8* @llvm.frameaddress(i32 0)
+ call void @"\01?fin$0@0@main@@"(i1 zeroext true, i8* %9)
+ br label %eh.resume
+
+eh.resume: ; preds = %ehcleanup
+ %exn = load i8*, i8** %exn.slot
+ %sel = load i32, i32* %ehselector.slot
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
+ %lpad.val4 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
+ resume { i8*, i32 } %lpad.val4
+}
+
+; CHECK-NOT: define internal void @
+
+; CHECK-LABEL: define i32 @main()
+; CHECK: invoke void @may_crash()
+;
+; CHECK: landingpad { i8*, i32 }
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void (i1, i8*)* @"\01?fin$1@0@main@@", i32 0, void (i1, i8*)* @"\01?fin$0@0@main@@")
+; CHECK-NEXT: indirectbr
+;
+; CHECK: landingpad { i8*, i32 }
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void (i1, i8*)* @"\01?fin$0@0@main@@")
+; CHECK-NEXT: indirectbr
+
+; There should not be any *new* cleanup helpers, just the existing ones.
+; CHECK-NOT: define internal void @
+; CHECK: define internal void @"\01?fin$0@0@main@@"(i1 zeroext %abnormal_termination, i8* %frame_pointer)
+; CHECK-NOT: define internal void @
+; CHECK: define internal void @"\01?fin$1@0@main@@"(i1 zeroext %abnormal_termination, i8* %frame_pointer)
+; CHECK-NOT: define internal void @
+
+define internal void @"\01?fin$0@0@main@@"(i1 zeroext %abnormal_termination, i8* %frame_pointer) #2 {
+entry:
+ %frame_pointer.addr = alloca i8*, align 8
+ %abnormal_termination.addr = alloca i8, align 1
+ store i8* %frame_pointer, i8** %frame_pointer.addr, align 8
+ %frombool = zext i1 %abnormal_termination to i8
+ store i8 %frombool, i8* %abnormal_termination.addr, align 1
+ %0 = zext i1 %abnormal_termination to i32
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @str_outer_finally, i32 0, i32 0), i32 %0)
+ ret void
+}
+
+declare i32 @printf(i8*, ...) #2
+
+define internal void @"\01?fin$1@0@main@@"(i1 zeroext %abnormal_termination, i8* %frame_pointer) #2 {
+entry:
+ %frame_pointer.addr = alloca i8*, align 8
+ %abnormal_termination.addr = alloca i8, align 1
+ store i8* %frame_pointer, i8** %frame_pointer.addr, align 8
+ %frombool = zext i1 %abnormal_termination to i8
+ store i8 %frombool, i8* %abnormal_termination.addr, align 1
+ %0 = zext i1 %abnormal_termination to i32
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @str_inner_finally, i32 0, i32 0), i32 %0)
+ call void @may_crash()
+ ret void
+}
+
+declare i32 @__C_specific_handler(...)
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.frameaddress(i32) #3
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind readnone }
+attributes #4 = { noinline }
diff --git a/test/CodeGen/WinEH/seh-prepared-basic.ll b/test/CodeGen/WinEH/seh-prepared-basic.ll
new file mode 100644
index 000000000000..880bb3c33a8d
--- /dev/null
+++ b/test/CodeGen/WinEH/seh-prepared-basic.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s | FileCheck %s
+
+; Test case based on this code:
+; extern "C" unsigned long _exception_code();
+; extern "C" int filt(unsigned long);
+; extern "C" void g();
+; extern "C" void do_except() {
+; __try {
+; g();
+; } __except(filt(_exception_code())) {
+; }
+; }
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+; Function Attrs: uwtable
+define void @do_except() #0 {
+entry:
+ call void (...) @llvm.frameescape()
+ invoke void @g() #5
+ to label %__try.cont unwind label %lpad1
+
+lpad1: ; preds = %entry
+ %ehvals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+ catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@do_except@@" to i8*)
+ %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@do_except@@" to i8*), i32 -1, i8* blockaddress(@do_except, %__try.cont))
+ indirectbr i8* %recover, [label %__try.cont]
+
+__try.cont: ; preds = %lpad1, %entry
+ ret void
+}
+
+; CHECK-LABEL: do_except:
+; CHECK: .seh_handler __C_specific_handler
+; CHECK-NOT: jmpq *
+; CHECK: .seh_handlerdata
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long .Ltmp{{.*}}
+; CHECK-NEXT: .long .Ltmp{{.*}}
+; CHECK-NEXT: .long "?filt$0@0@do_except@@"@IMGREL
+; CHECK-NEXT: .long .Ltmp{{.*}}@IMGREL
+
+; Function Attrs: noinline nounwind
+define internal i32 @"\01?filt$0@0@do_except@@"(i8* nocapture readonly %exception_pointers, i8* nocapture readnone %frame_pointer) #1 {
+entry:
+ %0 = bitcast i8* %exception_pointers to i32**
+ %1 = load i32*, i32** %0, align 8
+ %2 = load i32, i32* %1, align 4
+ %call = tail call i32 @filt(i32 %2) #4
+ ret i32 %call
+}
+
+declare i32 @filt(i32) #2
+
+declare void @g() #2
+
+declare i32 @__C_specific_handler(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #3
+
+; Function Attrs: nounwind
+declare i8* @llvm.eh.actions(...) #4
+
+; Function Attrs: nounwind
+declare void @llvm.frameescape(...) #4
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.framerecover(i8*, i8*, i32) #3
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="do_except" }
+attributes #1 = { noinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind readnone }
+attributes #4 = { nounwind }
+attributes #5 = { noinline }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.7.0 "}
diff --git a/test/CodeGen/WinEH/seh-resume-phi.ll b/test/CodeGen/WinEH/seh-resume-phi.ll
new file mode 100644
index 000000000000..256dd852d287
--- /dev/null
+++ b/test/CodeGen/WinEH/seh-resume-phi.ll
@@ -0,0 +1,66 @@
+; RUN: opt -S -winehprepare < %s | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+declare void @might_crash(i8* %ehptr)
+declare i32 @filt()
+declare void @cleanup()
+declare i32 @__C_specific_handler(...)
+declare i32 @llvm.eh.typeid.for(i8*)
+
+define void @resume_phi() {
+entry:
+ invoke void @might_crash(i8* null)
+ to label %return unwind label %lpad1
+
+lpad1:
+ %ehvals1 = landingpad { i8*, i32 } personality i32 (...)* @__C_specific_handler
+ catch i32 ()* @filt
+ %ehptr1 = extractvalue { i8*, i32 } %ehvals1, 0
+ %ehsel1 = extractvalue { i8*, i32 } %ehvals1, 1
+ %filt_sel = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @filt to i8*))
+ %matches = icmp eq i32 %ehsel1, %filt_sel
+ br i1 %matches, label %__except, label %eh.resume
+
+__except:
+ invoke void @might_crash(i8* %ehptr1)
+ to label %return unwind label %lpad2
+
+lpad2:
+ %ehvals2 = landingpad { i8*, i32 } personality i32 (...)* @__C_specific_handler
+ cleanup
+ %ehptr2 = extractvalue { i8*, i32 } %ehvals2, 0
+ %ehsel2 = extractvalue { i8*, i32 } %ehvals2, 1
+ call void @cleanup()
+ br label %eh.resume
+
+return:
+ ret void
+
+eh.resume:
+ %ehptr.phi = phi i8* [ %ehptr1, %lpad1 ], [ %ehptr2, %lpad2 ]
+ %ehsel.phi = phi i32 [ %ehsel1, %lpad1 ], [ %ehsel2, %lpad2 ]
+ %ehval.phi1 = insertvalue { i8*, i32 } undef, i8* %ehptr.phi, 0
+ %ehval.phi2 = insertvalue { i8*, i32 } %ehval.phi1, i32 %ehsel.phi, 1
+ resume { i8*, i32 } %ehval.phi2
+}
+
+; CHECK-LABEL: define void @resume_phi()
+; CHECK: invoke void @might_crash(i8* null)
+; CHECK: landingpad { i8*, i32 }
+; CHECK-NEXT: catch i32 ()* @filt
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions(
+; CHECK-SAME: i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@resume_phi, %__except))
+; CHECK-NEXT: indirectbr {{.*}} [label %__except]
+;
+; CHECK: __except:
+; CHECK: call i32 @llvm.eh.exceptioncode()
+; CHECK: invoke void @might_crash(i8* %{{.*}})
+; CHECK: landingpad { i8*, i32 }
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @resume_phi.cleanup)
+; CHECK-NEXT: indirectbr {{.*}} []
+
+; CHECK-LABEL: define internal void @resume_phi.cleanup(i8*, i8*)
+; CHECK: call void @cleanup()
diff --git a/test/CodeGen/WinEH/seh-simple.ll b/test/CodeGen/WinEH/seh-simple.ll
new file mode 100644
index 000000000000..9a451874d587
--- /dev/null
+++ b/test/CodeGen/WinEH/seh-simple.ll
@@ -0,0 +1,201 @@
+; RUN: opt -S -winehprepare -mtriple=x86_64-windows-msvc < %s \
+; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=X64
+
+; This test should also pass in 32-bit using _except_handler3.
+; RUN: sed -e 's/__C_specific_handler/_except_handler3/' %s \
+; RUN: | opt -S -winehprepare -mtriple=i686-windows-msvc \
+; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=X86
+
+declare void @cleanup()
+declare i32 @filt()
+declare void @might_crash()
+declare i32 @__C_specific_handler(...)
+declare i32 @llvm.eh.typeid.for(i8*)
+
+define i32 @simple_except_store() {
+entry:
+ %retval = alloca i32
+ store i32 0, i32* %retval
+ invoke void @might_crash()
+ to label %return unwind label %lpad
+
+lpad:
+ %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__C_specific_handler
+ catch i32 ()* @filt
+ %sel = extractvalue { i8*, i32 } %ehvals, 1
+ %filt_sel = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @filt to i8*))
+ %matches = icmp eq i32 %sel, %filt_sel
+ br i1 %matches, label %__except, label %eh.resume
+
+__except:
+ store i32 1, i32* %retval
+ br label %return
+
+return:
+ %r = load i32, i32* %retval
+ ret i32 %r
+
+eh.resume:
+ resume { i8*, i32 } %ehvals
+}
+
+; CHECK-LABEL: define i32 @simple_except_store()
+; CHECK: landingpad { i8*, i32 }
+; CHECK-NEXT: catch i32 ()* @filt
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@simple_except_store, %__except))
+; CHECK-NEXT: indirectbr {{.*}} [label %__except]
+
+define i32 @catch_all() {
+entry:
+ %retval = alloca i32
+ store i32 0, i32* %retval
+ invoke void @might_crash()
+ to label %return unwind label %lpad
+
+lpad:
+ %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__C_specific_handler
+ catch i8* null
+ store i32 1, i32* %retval
+ br label %return
+
+return:
+ %r = load i32, i32* %retval
+ ret i32 %r
+}
+
+; CHECK-LABEL: define i32 @catch_all()
+; CHECK: landingpad { i8*, i32 }
+; CHECK-NEXT: catch i8* null
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 1, i8* null, i32 -1, i8* blockaddress(@catch_all, %lpad.split))
+; CHECK-NEXT: indirectbr {{.*}} [label %lpad.split]
+;
+; CHECK: lpad.split:
+; CHECK: store i32 1, i32* %retval
+
+
+define i32 @except_phi() {
+entry:
+ invoke void @might_crash()
+ to label %return unwind label %lpad
+
+lpad:
+ %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__C_specific_handler
+ catch i32 ()* @filt
+ %sel = extractvalue { i8*, i32 } %ehvals, 1
+ %filt_sel = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @filt to i8*))
+ %matches = icmp eq i32 %sel, %filt_sel
+ br i1 %matches, label %return, label %eh.resume
+
+return:
+ %r = phi i32 [0, %entry], [1, %lpad]
+ ret i32 %r
+
+eh.resume:
+ resume { i8*, i32 } %ehvals
+}
+
+; CHECK-LABEL: define i32 @except_phi()
+; CHECK: landingpad { i8*, i32 }
+; CHECK-NEXT: catch i32 ()* @filt
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@except_phi, %lpad.return_crit_edge))
+; CHECK-NEXT: indirectbr {{.*}} [label %lpad.return_crit_edge]
+;
+; CHECK: lpad.return_crit_edge:
+; CHECK: br label %return
+;
+; CHECK: return:
+; CHECK-NEXT: %r = phi i32 [ 0, %entry ], [ 1, %lpad.return_crit_edge ]
+; CHECK-NEXT: ret i32 %r
+
+define i32 @lpad_phi() {
+entry:
+ invoke void @might_crash()
+ to label %cont unwind label %lpad
+
+cont:
+ invoke void @might_crash()
+ to label %return unwind label %lpad
+
+lpad:
+ %ncalls.1 = phi i32 [ 0, %entry ], [ 1, %cont ]
+ %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__C_specific_handler
+ catch i32 ()* @filt
+ %sel = extractvalue { i8*, i32 } %ehvals, 1
+ %filt_sel = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @filt to i8*))
+ %matches = icmp eq i32 %sel, %filt_sel
+ br i1 %matches, label %return, label %eh.resume
+
+return:
+ %r = phi i32 [2, %cont], [%ncalls.1, %lpad]
+ ret i32 %r
+
+eh.resume:
+ resume { i8*, i32 } %ehvals
+}
+
+; CHECK-LABEL: define i32 @lpad_phi()
+; CHECK: alloca i32
+; CHECK: store i32 0, i32*
+; CHECK: invoke void @might_crash()
+; CHECK: store i32 1, i32*
+; CHECK: invoke void @might_crash()
+; CHECK: landingpad { i8*, i32 }
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: catch i32 ()* @filt
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void ({{.*}})* @lpad_phi.cleanup, i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@lpad_phi, %lpad.return_crit_edge))
+; CHECK-NEXT: indirectbr {{.*}} [label %lpad.return_crit_edge]
+;
+; CHECK: lpad.return_crit_edge:
+; CHECK: load i32, i32*
+; CHECK: br label %return
+;
+; CHECK: return:
+; CHECK-NEXT: %r = phi i32 [ 2, %cont ], [ %{{.*}}, %lpad.return_crit_edge ]
+; CHECK-NEXT: ret i32 %r
+
+define i32 @cleanup_and_except() {
+entry:
+ invoke void @might_crash()
+ to label %return unwind label %lpad
+
+lpad:
+ %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__C_specific_handler
+ cleanup
+ catch i32 ()* @filt
+ call void @cleanup()
+ %sel = extractvalue { i8*, i32 } %ehvals, 1
+ %filt_sel = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @filt to i8*))
+ %matches = icmp eq i32 %sel, %filt_sel
+ br i1 %matches, label %return, label %eh.resume
+
+return:
+ %r = phi i32 [0, %entry], [1, %lpad]
+ ret i32 %r
+
+eh.resume:
+ resume { i8*, i32 } %ehvals
+}
+
+; CHECK-LABEL: define i32 @cleanup_and_except()
+; CHECK: landingpad { i8*, i32 }
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: catch i32 ()* @filt
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions(
+; CHECK: i32 0, void ({{.*}})* @cleanup_and_except.cleanup,
+; CHECK: i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@cleanup_and_except, %lpad.return_crit_edge))
+; CHECK-NEXT: indirectbr {{.*}} [label %lpad.return_crit_edge]
+;
+; CHECK: lpad.return_crit_edge:
+; CHECK: br label %return
+;
+; CHECK: return:
+; CHECK-NEXT: %r = phi i32 [ 0, %entry ], [ 1, %lpad.return_crit_edge ]
+; CHECK-NEXT: ret i32 %r
+
+; FIXME: This cleanup is an artifact of bad demotion.
+; X64-LABEL: define internal void @lpad_phi.cleanup(i8*, i8*)
+; X86-LABEL: define internal void @lpad_phi.cleanup()
+; X86: call i8* @llvm.frameaddress(i32 1)
+; CHECK: call i8* @llvm.framerecover({{.*}})
+; CHECK: load i32
+; CHECK: store i32 %{{.*}}, i32*
diff --git a/test/CodeGen/X86/2005-01-17-CycleInDAG.ll b/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
index fe6674da041f..48236cd0c8fe 100644
--- a/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
+++ b/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
@@ -8,9 +8,9 @@
@GLOBAL = external global i32 ; <i32*> [#uses=1]
define i32 @test(i32* %P1, i32* %P2, i32* %P3) nounwind {
- %L = load i32* @GLOBAL ; <i32> [#uses=1]
+ %L = load i32, i32* @GLOBAL ; <i32> [#uses=1]
store i32 12, i32* %P2
- %Y = load i32* %P3 ; <i32> [#uses=1]
+ %Y = load i32, i32* %P3 ; <i32> [#uses=1]
%Z = sub i32 %Y, %L ; <i32> [#uses=1]
ret i32 %Z
}
diff --git a/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll b/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll
index 30a6ac6fbdf1..a05fc840922f 100644
--- a/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll
+++ b/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 | not grep 18446744073709551612
@A = external global i32 ; <i32*> [#uses=1]
-@Y = global i32* getelementptr (i32* @A, i32 -1) ; <i32**> [#uses=0]
+@Y = global i32* getelementptr (i32, i32* @A, i32 -1) ; <i32**> [#uses=0]
diff --git a/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll b/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
index 1b3fc382e890..f6b5b2c103fe 100644
--- a/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
+++ b/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
@@ -7,7 +7,7 @@
@A = external global i32 ; <i32*> [#uses=2]
define i32 @test5(i32 %B, i8 %C) {
- %tmp.1 = load i32* @A ; <i32> [#uses=1]
+ %tmp.1 = load i32, i32* @A ; <i32> [#uses=1]
%shift.upgrd.1 = zext i8 %C to i32 ; <i32> [#uses=1]
%tmp.2 = shl i32 %tmp.1, %shift.upgrd.1 ; <i32> [#uses=1]
%tmp.3 = sub i8 32, %C ; <i8> [#uses=1]
diff --git a/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll b/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
index fb1262a37295..9f44bc348e37 100644
--- a/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
+++ b/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
@@ -13,14 +13,14 @@ label.0.no_exit.1_crit_edge.exitStub: ; preds = %label.0
codeRepl5.exitStub: ; preds = %label.0
ret i1 false
label.0: ; preds = %newFuncRoot
- %tmp.35 = load i32* @last ; <i32> [#uses=1]
+ %tmp.35 = load i32, i32* @last ; <i32> [#uses=1]
%inc.1 = add i32 %tmp.35, 1 ; <i32> [#uses=2]
store i32 %inc.1, i32* @last
- %tmp.36 = load i8** @block ; <i8*> [#uses=1]
- %tmp.38 = getelementptr i8* %tmp.36, i32 %inc.1 ; <i8*> [#uses=1]
+ %tmp.36 = load i8*, i8** @block ; <i8*> [#uses=1]
+ %tmp.38 = getelementptr i8, i8* %tmp.36, i32 %inc.1 ; <i8*> [#uses=1]
%tmp.40 = trunc i32 %tmp.21.reload to i8 ; <i8> [#uses=1]
store i8 %tmp.40, i8* %tmp.38
- %tmp.910 = load i32* @last ; <i32> [#uses=1]
+ %tmp.910 = load i32, i32* @last ; <i32> [#uses=1]
%tmp.1111 = icmp slt i32 %tmp.910, %tmp.8 ; <i1> [#uses=1]
%tmp.1412 = icmp ne i32 %tmp.21.reload, 257 ; <i1> [#uses=1]
%tmp.1613 = and i1 %tmp.1111, %tmp.1412 ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
index faa3e21a934d..583877e66582 100644
--- a/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
+++ b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
@@ -5,10 +5,10 @@
define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>* %d) {
- %tmp44 = load <4 x float>* %a ; <<4 x float>> [#uses=9]
- %tmp46 = load <4 x float>* %b ; <<4 x float>> [#uses=1]
- %tmp48 = load <4 x float>* %c ; <<4 x float>> [#uses=1]
- %tmp50 = load <4 x float>* %d ; <<4 x float>> [#uses=1]
+ %tmp44 = load <4 x float>, <4 x float>* %a ; <<4 x float>> [#uses=9]
+ %tmp46 = load <4 x float>, <4 x float>* %b ; <<4 x float>> [#uses=1]
+ %tmp48 = load <4 x float>, <4 x float>* %c ; <<4 x float>> [#uses=1]
+ %tmp50 = load <4 x float>, <4 x float>* %d ; <<4 x float>> [#uses=1]
%tmp51 = bitcast <4 x float> %tmp44 to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp = shufflevector <4 x i32> %tmp51, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>> [#uses=2]
%tmp52 = bitcast <4 x i32> %tmp to <4 x float> ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/2006-05-02-InstrSched1.ll b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
index 69266dc4e44b..46c5e88955f4 100644
--- a/test/CodeGen/X86/2006-05-02-InstrSched1.ll
+++ b/test/CodeGen/X86/2006-05-02-InstrSched1.ll
@@ -11,14 +11,14 @@
define i32 @compare(i8* %a, i8* %b) nounwind {
%tmp = bitcast i8* %a to i32* ; <i32*> [#uses=1]
%tmp1 = bitcast i8* %b to i32* ; <i32*> [#uses=1]
- %tmp.upgrd.1 = load i32* @size20 ; <i32> [#uses=1]
- %tmp.upgrd.2 = load i8** @in5 ; <i8*> [#uses=2]
- %tmp3 = load i32* %tmp1 ; <i32> [#uses=1]
+ %tmp.upgrd.1 = load i32, i32* @size20 ; <i32> [#uses=1]
+ %tmp.upgrd.2 = load i8*, i8** @in5 ; <i8*> [#uses=2]
+ %tmp3 = load i32, i32* %tmp1 ; <i32> [#uses=1]
%gep.upgrd.3 = zext i32 %tmp3 to i64 ; <i64> [#uses=1]
- %tmp4 = getelementptr i8* %tmp.upgrd.2, i64 %gep.upgrd.3 ; <i8*> [#uses=2]
- %tmp7 = load i32* %tmp ; <i32> [#uses=1]
+ %tmp4 = getelementptr i8, i8* %tmp.upgrd.2, i64 %gep.upgrd.3 ; <i8*> [#uses=2]
+ %tmp7 = load i32, i32* %tmp ; <i32> [#uses=1]
%gep.upgrd.4 = zext i32 %tmp7 to i64 ; <i64> [#uses=1]
- %tmp8 = getelementptr i8* %tmp.upgrd.2, i64 %gep.upgrd.4 ; <i8*> [#uses=2]
+ %tmp8 = getelementptr i8, i8* %tmp.upgrd.2, i64 %gep.upgrd.4 ; <i8*> [#uses=2]
%tmp.upgrd.5 = tail call i32 @memcmp( i8* %tmp8, i8* %tmp4, i32 %tmp.upgrd.1 ) ; <i32> [#uses=1]
ret i32 %tmp.upgrd.5
}
diff --git a/test/CodeGen/X86/2006-05-02-InstrSched2.ll b/test/CodeGen/X86/2006-05-02-InstrSched2.ll
index 222b7a0b41fd..3281c68e9334 100644
--- a/test/CodeGen/X86/2006-05-02-InstrSched2.ll
+++ b/test/CodeGen/X86/2006-05-02-InstrSched2.ll
@@ -12,13 +12,13 @@ cond_true456.i: ; preds = %cond_true456.i, %newFuncRoot
%__s441.2.4.i = phi i8* [ %tmp451.i.upgrd.1, %cond_true456.i ], [ %tmp435.i, %newFuncRoot ] ; <i8*> [#uses=2]
%__h.2.4.i = phi i32 [ %tmp449.i, %cond_true456.i ], [ 0, %newFuncRoot ] ; <i32> [#uses=1]
%tmp446.i = mul i32 %__h.2.4.i, 5 ; <i32> [#uses=1]
- %tmp.i = load i8* %__s441.2.4.i ; <i8> [#uses=1]
+ %tmp.i = load i8, i8* %__s441.2.4.i ; <i8> [#uses=1]
%tmp448.i = sext i8 %tmp.i to i32 ; <i32> [#uses=1]
%tmp449.i = add i32 %tmp448.i, %tmp446.i ; <i32> [#uses=2]
%tmp450.i = ptrtoint i8* %__s441.2.4.i to i32 ; <i32> [#uses=1]
%tmp451.i = add i32 %tmp450.i, 1 ; <i32> [#uses=1]
%tmp451.i.upgrd.1 = inttoptr i32 %tmp451.i to i8* ; <i8*> [#uses=2]
- %tmp45435.i = load i8* %tmp451.i.upgrd.1 ; <i8> [#uses=1]
+ %tmp45435.i = load i8, i8* %tmp451.i.upgrd.1 ; <i8> [#uses=1]
%tmp45536.i = icmp eq i8 %tmp45435.i, 0 ; <i1> [#uses=1]
br i1 %tmp45536.i, label %bb459.i.exitStub, label %cond_true456.i
}
diff --git a/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll b/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll
index 8421483ecb55..b70d375bf51a 100644
--- a/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll
+++ b/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll
@@ -9,13 +9,13 @@
define void @test(i32 %A) {
%A.upgrd.1 = trunc i32 %A to i8 ; <i8> [#uses=1]
- %tmp2 = load i32* @B ; <i32> [#uses=1]
+ %tmp2 = load i32, i32* @B ; <i32> [#uses=1]
%tmp3 = and i8 %A.upgrd.1, 16 ; <i8> [#uses=1]
%shift.upgrd.2 = zext i8 %tmp3 to i32 ; <i32> [#uses=1]
%tmp4 = shl i32 %tmp2, %shift.upgrd.2 ; <i32> [#uses=1]
store i32 %tmp4, i32* @B
%tmp6 = lshr i32 %A, 3 ; <i32> [#uses=1]
- %tmp = load i16** @C ; <i16*> [#uses=1]
+ %tmp = load i16*, i16** @C ; <i16*> [#uses=1]
%tmp8 = ptrtoint i16* %tmp to i32 ; <i32> [#uses=1]
%tmp9 = add i32 %tmp8, %tmp6 ; <i32> [#uses=1]
%tmp9.upgrd.3 = inttoptr i32 %tmp9 to i16* ; <i16*> [#uses=1]
diff --git a/test/CodeGen/X86/2006-05-08-InstrSched.ll b/test/CodeGen/X86/2006-05-08-InstrSched.ll
index 3419d01fa083..cd46ecfef525 100644
--- a/test/CodeGen/X86/2006-05-08-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-08-InstrSched.ll
@@ -5,13 +5,13 @@
@C = external global i32 ; <i32*> [#uses=2]
define void @test() {
- %tmp = load i16** @A ; <i16*> [#uses=1]
- %tmp1 = getelementptr i16* %tmp, i32 1 ; <i16*> [#uses=1]
- %tmp.upgrd.1 = load i16* %tmp1 ; <i16> [#uses=1]
+ %tmp = load i16*, i16** @A ; <i16*> [#uses=1]
+ %tmp1 = getelementptr i16, i16* %tmp, i32 1 ; <i16*> [#uses=1]
+ %tmp.upgrd.1 = load i16, i16* %tmp1 ; <i16> [#uses=1]
%tmp3 = zext i16 %tmp.upgrd.1 to i32 ; <i32> [#uses=1]
- %tmp.upgrd.2 = load i32* @B ; <i32> [#uses=1]
+ %tmp.upgrd.2 = load i32, i32* @B ; <i32> [#uses=1]
%tmp4 = and i32 %tmp.upgrd.2, 16 ; <i32> [#uses=1]
- %tmp5 = load i32* @C ; <i32> [#uses=1]
+ %tmp5 = load i32, i32* @C ; <i32> [#uses=1]
%tmp6 = trunc i32 %tmp4 to i8 ; <i8> [#uses=2]
%shift.upgrd.3 = zext i8 %tmp6 to i32 ; <i32> [#uses=1]
%tmp7 = shl i32 %tmp5, %shift.upgrd.3 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index 3d0946698500..b1deb2c5f567 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -13,21 +13,21 @@ cond_true: ; preds = %cond_true, %entry
%tmp. = shl i32 %indvar, 2 ; <i32> [#uses=1]
%tmp.10 = add nsw i32 %tmp., 1 ; <i32> [#uses=2]
%tmp31 = add nsw i32 %tmp.10, -1 ; <i32> [#uses=4]
- %tmp32 = getelementptr i32* %mpp, i32 %tmp31 ; <i32*> [#uses=1]
+ %tmp32 = getelementptr i32, i32* %mpp, i32 %tmp31 ; <i32*> [#uses=1]
%tmp34 = bitcast i32* %tmp32 to <16 x i8>* ; <i8*> [#uses=1]
- %tmp = load <16 x i8>* %tmp34, align 1
- %tmp42 = getelementptr i32* %tpmm, i32 %tmp31 ; <i32*> [#uses=1]
+ %tmp = load <16 x i8>, <16 x i8>* %tmp34, align 1
+ %tmp42 = getelementptr i32, i32* %tpmm, i32 %tmp31 ; <i32*> [#uses=1]
%tmp42.upgrd.1 = bitcast i32* %tmp42 to <4 x i32>* ; <<4 x i32>*> [#uses=1]
- %tmp46 = load <4 x i32>* %tmp42.upgrd.1 ; <<4 x i32>> [#uses=1]
+ %tmp46 = load <4 x i32>, <4 x i32>* %tmp42.upgrd.1 ; <<4 x i32>> [#uses=1]
%tmp54 = bitcast <16 x i8> %tmp to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp55 = add <4 x i32> %tmp54, %tmp46 ; <<4 x i32>> [#uses=2]
%tmp55.upgrd.2 = bitcast <4 x i32> %tmp55 to <2 x i64> ; <<2 x i64>> [#uses=1]
- %tmp62 = getelementptr i32* %ip, i32 %tmp31 ; <i32*> [#uses=1]
+ %tmp62 = getelementptr i32, i32* %ip, i32 %tmp31 ; <i32*> [#uses=1]
%tmp65 = bitcast i32* %tmp62 to <16 x i8>* ; <i8*> [#uses=1]
- %tmp66 = load <16 x i8>* %tmp65, align 1
- %tmp73 = getelementptr i32* %tpim, i32 %tmp31 ; <i32*> [#uses=1]
+ %tmp66 = load <16 x i8>, <16 x i8>* %tmp65, align 1
+ %tmp73 = getelementptr i32, i32* %tpim, i32 %tmp31 ; <i32*> [#uses=1]
%tmp73.upgrd.3 = bitcast i32* %tmp73 to <4 x i32>* ; <<4 x i32>*> [#uses=1]
- %tmp77 = load <4 x i32>* %tmp73.upgrd.3 ; <<4 x i32>> [#uses=1]
+ %tmp77 = load <4 x i32>, <4 x i32>* %tmp73.upgrd.3 ; <<4 x i32>> [#uses=1]
%tmp87 = bitcast <16 x i8> %tmp66 to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp88 = add <4 x i32> %tmp87, %tmp77 ; <<4 x i32>> [#uses=2]
%tmp88.upgrd.4 = bitcast <4 x i32> %tmp88 to <2 x i64> ; <<2 x i64>> [#uses=1]
@@ -37,7 +37,7 @@ cond_true: ; preds = %cond_true, %entry
%tmp111 = and <2 x i64> %tmp110, %tmp55.upgrd.2 ; <<2 x i64>> [#uses=1]
%tmp121 = and <2 x i64> %tmp99.upgrd.5, %tmp88.upgrd.4 ; <<2 x i64>> [#uses=1]
%tmp131 = or <2 x i64> %tmp121, %tmp111 ; <<2 x i64>> [#uses=1]
- %tmp137 = getelementptr i32* %mc, i32 %tmp.10 ; <i32*> [#uses=1]
+ %tmp137 = getelementptr i32, i32* %mc, i32 %tmp.10 ; <i32*> [#uses=1]
%tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>* ; <<2 x i64>*> [#uses=1]
store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7
%tmp147 = add nsw i32 %tmp.10, 8 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
index 6c5a4fb3bd5c..3be77f5c3099 100644
--- a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
+++ b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -march=x86 -mattr=-sse | grep setnp
-; RUN: llc < %s -march=x86 -mattr=-sse -enable-unsafe-fp-math -enable-no-nans-fp-math | \
-; RUN: not grep setnp
+; RUN: llc < %s -march=x86 -mattr=-sse | FileCheck %s -check-prefix=WITHNANS
+; RUN: llc < %s -march=x86 -mattr=-sse -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s -check-prefix=NONANS
+; WITHNANS-LABEL: test:
+; WITHNANS: setnp
+; NONANS-LABEL: test:
+; NONANS-NOT: setnp
define i32 @test(float %f) {
%tmp = fcmp oeq float %f, 0.000000e+00 ; <i1> [#uses=1]
%tmp.upgrd.1 = zext i1 %tmp to i32 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-05-25-CycleInDAG.ll b/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
index 0288278d626e..6ff879760ea0 100644
--- a/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
@@ -6,7 +6,7 @@ cond_true12: ; preds = %0
ret i32 0
cond_next33: ; preds = %0
%tmp44.i = call double @foo( double 0.000000e+00, i32 32 ) ; <double> [#uses=1]
- %tmp61.i = load i8* null ; <i8> [#uses=1]
+ %tmp61.i = load i8, i8* null ; <i8> [#uses=1]
%tmp61.i.upgrd.1 = zext i8 %tmp61.i to i32 ; <i32> [#uses=1]
%tmp58.i = or i32 0, %tmp61.i.upgrd.1 ; <i32> [#uses=1]
%tmp62.i = or i32 %tmp58.i, 0 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-07-20-InlineAsm.ll b/test/CodeGen/X86/2006-07-20-InlineAsm.ll
index 1facf15b9f40..795e898df347 100644
--- a/test/CodeGen/X86/2006-07-20-InlineAsm.ll
+++ b/test/CodeGen/X86/2006-07-20-InlineAsm.ll
@@ -8,7 +8,7 @@ entry:
%X_addr = alloca i32 ; <i32*> [#uses=3]
store i32 %X, i32* %X_addr
call void asm sideeffect "xchg{l} {$0,$1|$1,$0}", "=*m,=*r,m,1,~{dirflag},~{fpsr},~{flags}"( i32* @G, i32* %X_addr, i32* @G, i32 %X )
- %tmp1 = load i32* %X_addr ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* %X_addr ; <i32> [#uses=1]
ret i32 %tmp1
}
@@ -17,7 +17,7 @@ entry:
%X_addr = alloca i32 ; <i32*> [#uses=3]
store i32 %X, i32* %X_addr
call void asm sideeffect "xchg{l} {$0,$1|$1,$0}", "=*m,=*r,1,~{dirflag},~{fpsr},~{flags}"( i32* @G, i32* %X_addr, i32 %X )
- %tmp1 = load i32* %X_addr ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* %X_addr ; <i32> [#uses=1]
ret i32 %tmp1
}
diff --git a/test/CodeGen/X86/2006-08-07-CycleInDAG.ll b/test/CodeGen/X86/2006-08-07-CycleInDAG.ll
index aea707ee8fe4..397bc26dbecd 100644
--- a/test/CodeGen/X86/2006-08-07-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-08-07-CycleInDAG.ll
@@ -8,10 +8,10 @@ cond_true.i: ; preds = %0
ret i32 0
ilog2.exit: ; preds = %0
- %tmp24.i = load i32* null ; <i32> [#uses=1]
+ %tmp24.i = load i32, i32* null ; <i32> [#uses=1]
%tmp13.i12.i = tail call double @ldexp( double 0.000000e+00, i32 0 ) ; <double> [#uses=1]
%tmp13.i13.i = fptrunc double %tmp13.i12.i to float ; <float> [#uses=1]
- %tmp11.s = load i32* null ; <i32> [#uses=1]
+ %tmp11.s = load i32, i32* null ; <i32> [#uses=1]
%tmp11.i = bitcast i32 %tmp11.s to i32 ; <i32> [#uses=1]
%n.i = bitcast i32 %tmp24.i to i32 ; <i32> [#uses=1]
%tmp13.i7 = mul i32 %tmp11.i, %n.i ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-08-16-CycleInDAG.ll b/test/CodeGen/X86/2006-08-16-CycleInDAG.ll
index 5fee326d530d..2c44adf6829c 100644
--- a/test/CodeGen/X86/2006-08-16-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-08-16-CycleInDAG.ll
@@ -6,14 +6,14 @@
%struct.u = type { [1 x i64] }
define void @test() {
- %tmp = load i32* null ; <i32> [#uses=1]
+ %tmp = load i32, i32* null ; <i32> [#uses=1]
%tmp8 = call i32 @hash_rtx( ) ; <i32> [#uses=1]
%tmp11 = urem i32 %tmp8, %tmp ; <i32> [#uses=1]
br i1 false, label %cond_next, label %return
cond_next: ; preds = %0
%gep.upgrd.1 = zext i32 %tmp11 to i64 ; <i64> [#uses=1]
- %tmp17 = getelementptr %struct.expr** null, i64 %gep.upgrd.1 ; <%struct.expr**> [#uses=0]
+ %tmp17 = getelementptr %struct.expr*, %struct.expr** null, i64 %gep.upgrd.1 ; <%struct.expr**> [#uses=0]
ret void
return: ; preds = %0
diff --git a/test/CodeGen/X86/2006-09-01-CycleInDAG.ll b/test/CodeGen/X86/2006-09-01-CycleInDAG.ll
index 1e890bbc02e5..a7a10afaae1d 100644
--- a/test/CodeGen/X86/2006-09-01-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-09-01-CycleInDAG.ll
@@ -111,21 +111,21 @@ bb3502.exitStub: ; preds = %cond_true3632
ret void
cond_true3632: ; preds = %newFuncRoot
- %tmp3378 = load i32* %tmp3629 ; <i32> [#uses=1]
+ %tmp3378 = load i32, i32* %tmp3629 ; <i32> [#uses=1]
%tmp3379 = add i32 %tmp3378, -1 ; <i32> [#uses=1]
- %tmp3381 = getelementptr %struct.varray_head_tag* %stack3023.6, i32 0, i32 4 ; <%struct.varray_data*> [#uses=1]
+ %tmp3381 = getelementptr %struct.varray_head_tag, %struct.varray_head_tag* %stack3023.6, i32 0, i32 4 ; <%struct.varray_data*> [#uses=1]
%tmp3382 = bitcast %struct.varray_data* %tmp3381 to [1 x i32]* ; <[1 x i32]*> [#uses=1]
%gep.upgrd.1 = zext i32 %tmp3379 to i64 ; <i64> [#uses=1]
- %tmp3383 = getelementptr [1 x i32]* %tmp3382, i32 0, i64 %gep.upgrd.1 ; <i32*> [#uses=1]
- %tmp3384 = load i32* %tmp3383 ; <i32> [#uses=1]
- %tmp3387 = load i32* %tmp3629 ; <i32> [#uses=1]
+ %tmp3383 = getelementptr [1 x i32], [1 x i32]* %tmp3382, i32 0, i64 %gep.upgrd.1 ; <i32*> [#uses=1]
+ %tmp3384 = load i32, i32* %tmp3383 ; <i32> [#uses=1]
+ %tmp3387 = load i32, i32* %tmp3629 ; <i32> [#uses=1]
%tmp3388 = add i32 %tmp3387, -1 ; <i32> [#uses=1]
store i32 %tmp3388, i32* %tmp3629
- %tmp3391 = load %struct.varray_head_tag** @basic_block_info ; <%struct.varray_head_tag*> [#uses=1]
- %tmp3393 = getelementptr %struct.varray_head_tag* %tmp3391, i32 0, i32 4 ; <%struct.varray_data*> [#uses=1]
+ %tmp3391 = load %struct.varray_head_tag*, %struct.varray_head_tag** @basic_block_info ; <%struct.varray_head_tag*> [#uses=1]
+ %tmp3393 = getelementptr %struct.varray_head_tag, %struct.varray_head_tag* %tmp3391, i32 0, i32 4 ; <%struct.varray_data*> [#uses=1]
%tmp3394 = bitcast %struct.varray_data* %tmp3393 to [1 x %struct.basic_block_def*]* ; <[1 x %struct.basic_block_def*]*> [#uses=1]
- %tmp3395 = getelementptr [1 x %struct.basic_block_def*]* %tmp3394, i32 0, i32 %tmp3384 ; <%struct.basic_block_def**> [#uses=1]
- %tmp3396 = load %struct.basic_block_def** %tmp3395 ; <%struct.basic_block_def*> [#uses=1]
- %tmp3397 = getelementptr %struct.basic_block_def* %tmp3396, i32 0, i32 3 ; <%struct.VEC_edge**> [#uses=1]
+ %tmp3395 = getelementptr [1 x %struct.basic_block_def*], [1 x %struct.basic_block_def*]* %tmp3394, i32 0, i32 %tmp3384 ; <%struct.basic_block_def**> [#uses=1]
+ %tmp3396 = load %struct.basic_block_def*, %struct.basic_block_def** %tmp3395 ; <%struct.basic_block_def*> [#uses=1]
+ %tmp3397 = getelementptr %struct.basic_block_def, %struct.basic_block_def* %tmp3396, i32 0, i32 3 ; <%struct.VEC_edge**> [#uses=1]
br label %bb3502.exitStub
}
diff --git a/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll b/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll
deleted file mode 100644
index d09d06147696..000000000000
--- a/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=sse | grep movaps
-; Test that the load is NOT folded into the intrinsic, which would zero the top
-; elts of the loaded vector.
-
-target datalayout = "e-p:32:32"
-target triple = "i686-apple-darwin8.7.2"
-
-define <4 x float> @test(<4 x float> %A, <4 x float>* %B) nounwind {
- %BV = load <4 x float>* %B ; <<4 x float>> [#uses=1]
- %tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %A, <4 x float> %BV ) ; <<4 x float>> [#uses=1]
- ret <4 x float> %tmp28
-}
-
-declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
-
diff --git a/test/CodeGen/X86/2006-10-09-CycleInDAG.ll b/test/CodeGen/X86/2006-10-09-CycleInDAG.ll
index fbb14ee16151..e2c84ea569e6 100644
--- a/test/CodeGen/X86/2006-10-09-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-10-09-CycleInDAG.ll
@@ -1,9 +1,9 @@
; RUN: llc < %s -march=x86
define void @_ZN13QFSFileEngine4readEPcx() {
- %tmp201 = load i32* null ; <i32> [#uses=1]
+ %tmp201 = load i32, i32* null ; <i32> [#uses=1]
%tmp201.upgrd.1 = sext i32 %tmp201 to i64 ; <i64> [#uses=1]
- %tmp202 = load i64* null ; <i64> [#uses=1]
+ %tmp202 = load i64, i64* null ; <i64> [#uses=1]
%tmp203 = add i64 %tmp201.upgrd.1, %tmp202 ; <i64> [#uses=1]
store i64 %tmp203, i64* null
ret void
diff --git a/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll b/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll
index b1f04518acaa..435582587856 100644
--- a/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll
+++ b/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll
@@ -4,14 +4,14 @@
@tree_code_type = external global [0 x i32] ; <[0 x i32]*> [#uses=1]
define void @copy_if_shared_r() {
- %tmp = load i32* null ; <i32> [#uses=1]
+ %tmp = load i32, i32* null ; <i32> [#uses=1]
%tmp56 = and i32 %tmp, 255 ; <i32> [#uses=1]
%gep.upgrd.1 = zext i32 %tmp56 to i64 ; <i64> [#uses=1]
- %tmp8 = getelementptr [0 x i32]* @tree_code_type, i32 0, i64 %gep.upgrd.1 ; <i32*> [#uses=1]
- %tmp9 = load i32* %tmp8 ; <i32> [#uses=1]
+ %tmp8 = getelementptr [0 x i32], [0 x i32]* @tree_code_type, i32 0, i64 %gep.upgrd.1 ; <i32*> [#uses=1]
+ %tmp9 = load i32, i32* %tmp8 ; <i32> [#uses=1]
%tmp10 = add i32 %tmp9, -1 ; <i32> [#uses=1]
%tmp.upgrd.2 = icmp ugt i32 %tmp10, 2 ; <i1> [#uses=1]
- %tmp14 = load i32* null ; <i32> [#uses=1]
+ %tmp14 = load i32, i32* null ; <i32> [#uses=1]
%tmp15 = lshr i32 %tmp14, 31 ; <i32> [#uses=1]
%tmp15.upgrd.3 = trunc i32 %tmp15 to i8 ; <i8> [#uses=1]
%tmp16 = icmp ne i8 %tmp15.upgrd.3, 0 ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2006-10-12-CycleInDAG.ll b/test/CodeGen/X86/2006-10-12-CycleInDAG.ll
index 3b987ac79f94..7a32ef7801d9 100644
--- a/test/CodeGen/X86/2006-10-12-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-10-12-CycleInDAG.ll
@@ -29,10 +29,10 @@ bb441: ; preds = %cond_next330
ret void
cond_next472: ; preds = %cond_next330
- %tmp490 = load %struct.tree_node** null ; <%struct.tree_node*> [#uses=1]
- %tmp492 = getelementptr %struct.tree_node* %tmp490, i32 0, i32 0, i32 0, i32 3 ; <i8*> [#uses=1]
+ %tmp490 = load %struct.tree_node*, %struct.tree_node** null ; <%struct.tree_node*> [#uses=1]
+ %tmp492 = getelementptr %struct.tree_node, %struct.tree_node* %tmp490, i32 0, i32 0, i32 0, i32 3 ; <i8*> [#uses=1]
%tmp492.upgrd.1 = bitcast i8* %tmp492 to i32* ; <i32*> [#uses=1]
- %tmp493 = load i32* %tmp492.upgrd.1 ; <i32> [#uses=1]
+ %tmp493 = load i32, i32* %tmp492.upgrd.1 ; <i32> [#uses=1]
%tmp495 = trunc i32 %tmp493 to i8 ; <i8> [#uses=1]
%tmp496 = icmp eq i8 %tmp495, 11 ; <i1> [#uses=1]
%tmp496.upgrd.2 = zext i1 %tmp496 to i8 ; <i8> [#uses=1]
diff --git a/test/CodeGen/X86/2006-10-13-CycleInDAG.ll b/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
index 6ed2e7bb5751..c45469d4e3ee 100644
--- a/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
+++ b/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
@@ -3,9 +3,9 @@
define void @test() {
bb.i:
- %tmp.i660 = load <4 x float>* null ; <<4 x float>> [#uses=1]
- call void (i32, ...)* @printf( i32 0, i8* getelementptr ([18 x i8]* @str, i32 0, i64 0), double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00 )
- %tmp152.i = load <4 x i32>* null ; <<4 x i32>> [#uses=1]
+ %tmp.i660 = load <4 x float>, <4 x float>* null ; <<4 x float>> [#uses=1]
+ call void (i32, ...) @printf( i32 0, i8* getelementptr ([18 x i8], [18 x i8]* @str, i32 0, i64 0), double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00 )
+ %tmp152.i = load <4 x i32>, <4 x i32>* null ; <<4 x i32>> [#uses=1]
%tmp156.i = bitcast <4 x i32> %tmp152.i to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp175.i = bitcast <4 x float> %tmp.i660 to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp176.i = xor <4 x i32> %tmp156.i, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll b/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
index d583e5964dc2..dd670648daf6 100644
--- a/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
+++ b/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
@@ -15,11 +15,11 @@ entry:
]
bb: ; preds = %entry
- %tmp1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([14 x i8]* @str, i32 0, i64 0) ) ; <i32> [#uses=0]
+ %tmp1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([14 x i8], [14 x i8]* @str, i32 0, i64 0) ) ; <i32> [#uses=0]
ret i32 0
bb2: ; preds = %entry
- %tmp4 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([13 x i8]* @str.upgrd.1, i32 0, i64 0) ) ; <i32> [#uses=0]
+ %tmp4 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([13 x i8], [13 x i8]* @str.upgrd.1, i32 0, i64 0) ) ; <i32> [#uses=0]
ret i32 0
UnifiedReturnBlock: ; preds = %entry
diff --git a/test/CodeGen/X86/2006-11-12-CSRetCC.ll b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
index d7af1c3fdc71..b6a8fc0bb2f8 100644
--- a/test/CodeGen/X86/2006-11-12-CSRetCC.ll
+++ b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
@@ -18,43 +18,43 @@ entry:
%z = alloca { double, double }, align 16 ; <{ double, double }*> [#uses=4]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store double 0x400921FB54442D18, double* %pi
- %tmp.upgrd.1 = load double* %pi ; <double> [#uses=1]
- %real = getelementptr { double, double }* %tmp1, i64 0, i32 0 ; <double*> [#uses=1]
+ %tmp.upgrd.1 = load double, double* %pi ; <double> [#uses=1]
+ %real = getelementptr { double, double }, { double, double }* %tmp1, i64 0, i32 0 ; <double*> [#uses=1]
store double 0.000000e+00, double* %real
- %real3 = getelementptr { double, double }* %tmp1, i64 0, i32 1 ; <double*> [#uses=1]
+ %real3 = getelementptr { double, double }, { double, double }* %tmp1, i64 0, i32 1 ; <double*> [#uses=1]
store double %tmp.upgrd.1, double* %real3
- %tmp.upgrd.2 = getelementptr { double, double }* %tmp, i64 0, i32 0 ; <double*> [#uses=1]
- %tmp4 = getelementptr { double, double }* %tmp1, i64 0, i32 0 ; <double*> [#uses=1]
- %tmp5 = load double* %tmp4 ; <double> [#uses=1]
+ %tmp.upgrd.2 = getelementptr { double, double }, { double, double }* %tmp, i64 0, i32 0 ; <double*> [#uses=1]
+ %tmp4 = getelementptr { double, double }, { double, double }* %tmp1, i64 0, i32 0 ; <double*> [#uses=1]
+ %tmp5 = load double, double* %tmp4 ; <double> [#uses=1]
store double %tmp5, double* %tmp.upgrd.2
- %tmp6 = getelementptr { double, double }* %tmp, i64 0, i32 1 ; <double*> [#uses=1]
- %tmp7 = getelementptr { double, double }* %tmp1, i64 0, i32 1 ; <double*> [#uses=1]
- %tmp8 = load double* %tmp7 ; <double> [#uses=1]
+ %tmp6 = getelementptr { double, double }, { double, double }* %tmp, i64 0, i32 1 ; <double*> [#uses=1]
+ %tmp7 = getelementptr { double, double }, { double, double }* %tmp1, i64 0, i32 1 ; <double*> [#uses=1]
+ %tmp8 = load double, double* %tmp7 ; <double> [#uses=1]
store double %tmp8, double* %tmp6
%tmp.upgrd.3 = bitcast { double, double }* %tmp to { i64, i64 }* ; <{ i64, i64 }*> [#uses=1]
- %tmp.upgrd.4 = getelementptr { i64, i64 }* %tmp.upgrd.3, i64 0, i32 0 ; <i64*> [#uses=1]
- %tmp.upgrd.5 = load i64* %tmp.upgrd.4 ; <i64> [#uses=1]
+ %tmp.upgrd.4 = getelementptr { i64, i64 }, { i64, i64 }* %tmp.upgrd.3, i64 0, i32 0 ; <i64*> [#uses=1]
+ %tmp.upgrd.5 = load i64, i64* %tmp.upgrd.4 ; <i64> [#uses=1]
%tmp9 = bitcast { double, double }* %tmp to { i64, i64 }* ; <{ i64, i64 }*> [#uses=1]
- %tmp10 = getelementptr { i64, i64 }* %tmp9, i64 0, i32 1 ; <i64*> [#uses=1]
- %tmp11 = load i64* %tmp10 ; <i64> [#uses=1]
+ %tmp10 = getelementptr { i64, i64 }, { i64, i64 }* %tmp9, i64 0, i32 1 ; <i64*> [#uses=1]
+ %tmp11 = load i64, i64* %tmp10 ; <i64> [#uses=1]
call void @cexp( { double, double }* sret %tmp2, i64 %tmp.upgrd.5, i64 %tmp11 )
- %tmp12 = getelementptr { double, double }* %z, i64 0, i32 0 ; <double*> [#uses=1]
- %tmp13 = getelementptr { double, double }* %tmp2, i64 0, i32 0 ; <double*> [#uses=1]
- %tmp14 = load double* %tmp13 ; <double> [#uses=1]
+ %tmp12 = getelementptr { double, double }, { double, double }* %z, i64 0, i32 0 ; <double*> [#uses=1]
+ %tmp13 = getelementptr { double, double }, { double, double }* %tmp2, i64 0, i32 0 ; <double*> [#uses=1]
+ %tmp14 = load double, double* %tmp13 ; <double> [#uses=1]
store double %tmp14, double* %tmp12
- %tmp15 = getelementptr { double, double }* %z, i64 0, i32 1 ; <double*> [#uses=1]
- %tmp16 = getelementptr { double, double }* %tmp2, i64 0, i32 1 ; <double*> [#uses=1]
- %tmp17 = load double* %tmp16 ; <double> [#uses=1]
+ %tmp15 = getelementptr { double, double }, { double, double }* %z, i64 0, i32 1 ; <double*> [#uses=1]
+ %tmp16 = getelementptr { double, double }, { double, double }* %tmp2, i64 0, i32 1 ; <double*> [#uses=1]
+ %tmp17 = load double, double* %tmp16 ; <double> [#uses=1]
store double %tmp17, double* %tmp15
- %tmp18 = getelementptr { double, double }* %z, i64 0, i32 1 ; <double*> [#uses=1]
- %tmp19 = load double* %tmp18 ; <double> [#uses=1]
- %tmp20 = getelementptr { double, double }* %z, i64 0, i32 0 ; <double*> [#uses=1]
- %tmp21 = load double* %tmp20 ; <double> [#uses=1]
- %tmp.upgrd.6 = getelementptr [9 x i8]* @str, i32 0, i64 0 ; <i8*> [#uses=1]
- %tmp.upgrd.7 = call i32 (i8*, ...)* @printf( i8* %tmp.upgrd.6, double %tmp21, double %tmp19 ) ; <i32> [#uses=0]
+ %tmp18 = getelementptr { double, double }, { double, double }* %z, i64 0, i32 1 ; <double*> [#uses=1]
+ %tmp19 = load double, double* %tmp18 ; <double> [#uses=1]
+ %tmp20 = getelementptr { double, double }, { double, double }* %z, i64 0, i32 0 ; <double*> [#uses=1]
+ %tmp21 = load double, double* %tmp20 ; <double> [#uses=1]
+ %tmp.upgrd.6 = getelementptr [9 x i8], [9 x i8]* @str, i32 0, i64 0 ; <i8*> [#uses=1]
+ %tmp.upgrd.7 = call i32 (i8*, ...) @printf( i8* %tmp.upgrd.6, double %tmp21, double %tmp19 ) ; <i32> [#uses=0]
br label %finish
finish:
- %retval.upgrd.8 = load i32* %retval ; <i32> [#uses=1]
+ %retval.upgrd.8 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %retval.upgrd.8
}
diff --git a/test/CodeGen/X86/2006-11-17-IllegalMove.ll b/test/CodeGen/X86/2006-11-17-IllegalMove.ll
index 783d9f94caeb..c0bd6f728422 100644
--- a/test/CodeGen/X86/2006-11-17-IllegalMove.ll
+++ b/test/CodeGen/X86/2006-11-17-IllegalMove.ll
@@ -5,7 +5,7 @@
define void @handle_vector_size_attribute() nounwind {
entry:
- %tmp69 = load i32* null ; <i32> [#uses=1]
+ %tmp69 = load i32, i32* null ; <i32> [#uses=1]
switch i32 %tmp69, label %bb84 [
i32 2, label %bb77
i32 1, label %bb77
@@ -13,7 +13,7 @@ entry:
bb77: ; preds = %entry, %entry
%tmp99 = udiv i64 0, 0 ; <i64> [#uses=1]
- %tmp = load i8* null ; <i8> [#uses=1]
+ %tmp = load i8, i8* null ; <i8> [#uses=1]
%tmp114 = icmp eq i64 0, 0 ; <i1> [#uses=1]
br label %cond_true115
@@ -21,7 +21,7 @@ bb84: ; preds = %entry
ret void
cond_true115: ; preds = %bb77
- %tmp118 = load i8* null ; <i8> [#uses=1]
+ %tmp118 = load i8, i8* null ; <i8> [#uses=1]
br label %cond_true120
cond_true120: ; preds = %cond_true115
diff --git a/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll b/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll
index 50a244b9e05b..080de1fb553e 100644
--- a/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll
+++ b/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll
@@ -20,8 +20,8 @@ target triple = "i686-pc-linux-gnu"
%"struct.QString::Data" = type { %struct.QBasicAtomic, i32, i32, i16*, i8, i8, [1 x i16] }
define i1 @_ZNK12QImageWriter8canWriteEv() {
- %tmp62 = load %struct.QImageWriterPrivate** null ; <%struct.QImageWriterPrivate*> [#uses=1]
- %tmp = getelementptr %struct.QImageWriterPrivate* %tmp62, i32 0, i32 9 ; <%struct.QString*> [#uses=1]
+ %tmp62 = load %struct.QImageWriterPrivate*, %struct.QImageWriterPrivate** null ; <%struct.QImageWriterPrivate*> [#uses=1]
+ %tmp = getelementptr %struct.QImageWriterPrivate, %struct.QImageWriterPrivate* %tmp62, i32 0, i32 9 ; <%struct.QString*> [#uses=1]
%tmp75 = call %struct.QString* @_ZN7QStringaSERKS_( %struct.QString* %tmp, %struct.QString* null ) ; <%struct.QString*> [#uses=0]
call void asm sideeffect "lock\0Adecl $0\0Asetne 1", "=*m"( i32* null )
ret i1 false
diff --git a/test/CodeGen/X86/2006-12-19-IntelSyntax.ll b/test/CodeGen/X86/2006-12-19-IntelSyntax.ll
index f81b303e3b80..2c3c5c99c1b2 100644
--- a/test/CodeGen/X86/2006-12-19-IntelSyntax.ll
+++ b/test/CodeGen/X86/2006-12-19-IntelSyntax.ll
@@ -21,55 +21,55 @@ entry:
]
bb: ; preds = %entry
- call void (...)* @foo1( )
+ call void (...) @foo1( )
ret void
bb1: ; preds = %entry
- call void (...)* @foo2( )
+ call void (...) @foo2( )
ret void
bb2: ; preds = %entry
- call void (...)* @foo6( )
+ call void (...) @foo6( )
ret void
bb3: ; preds = %entry
- call void (...)* @foo3( )
+ call void (...) @foo3( )
ret void
bb4: ; preds = %entry
- call void (...)* @foo4( )
+ call void (...) @foo4( )
ret void
bb5: ; preds = %entry
- call void (...)* @foo5( )
+ call void (...) @foo5( )
ret void
bb6: ; preds = %entry
- call void (...)* @foo1( )
+ call void (...) @foo1( )
ret void
bb7: ; preds = %entry
- call void (...)* @foo2( )
+ call void (...) @foo2( )
ret void
bb8: ; preds = %entry
- call void (...)* @foo6( )
+ call void (...) @foo6( )
ret void
bb9: ; preds = %entry
- call void (...)* @foo3( )
+ call void (...) @foo3( )
ret void
bb10: ; preds = %entry
- call void (...)* @foo4( )
+ call void (...) @foo4( )
ret void
bb11: ; preds = %entry
- call void (...)* @foo5( )
+ call void (...) @foo5( )
ret void
bb12: ; preds = %entry
- call void (...)* @foo6( )
+ call void (...) @foo6( )
ret void
}
diff --git a/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll b/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll
index 3458550aa103..67c4bcd5fb71 100644
--- a/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll
+++ b/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll
@@ -10,7 +10,7 @@ entry:
bb:
%i = phi i64 [ 0, %entry ], [ %k, %bb ]
- %j = getelementptr double* %y, i64 %i
+ %j = getelementptr double, double* %y, i64 %i
store double 0.000000e+00, double* %j
%k = add i64 %i, 1
%n = icmp eq i64 %k, 0
diff --git a/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll b/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
index 04d4b8ee57eb..f83eea179d6a 100644
--- a/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
+++ b/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
@@ -8,12 +8,12 @@ target datalayout = "e-p:64:64"
define void @foo(i32* %a0, i32* %a1, i32* %a2, i32* %a3, i32* %a4, i32* %a5) {
b:
- %r = load i32* %a0
- %r2 = load i32* %a1
- %r4 = load i32* %a2
- %r6 = load i32* %a3
- %r8 = load i32* %a4
- %r14 = load i32* %a5
+ %r = load i32, i32* %a0
+ %r2 = load i32, i32* %a1
+ %r4 = load i32, i32* %a2
+ %r6 = load i32, i32* %a3
+ %r8 = load i32, i32* %a4
+ %r14 = load i32, i32* %a5
%rx = sext i32 %r2 to i64
%r9 = sext i32 %r to i64
%r11 = add i64 %rx, 0
@@ -31,13 +31,13 @@ b:
%r24 = shl i32 %r23a, 0
%r25 = add i32 %r24, 0
%ras2 = alloca i8, i32 %r25, align 16
- %r28 = getelementptr i8* %ras2, i32 0
+ %r28 = getelementptr i8, i8* %ras2, i32 0
%r38 = shl i64 %r12, 0
%s2013 = add i64 %r38, 0
- %c22012 = getelementptr i8* %ras2, i64 %s2013
+ %c22012 = getelementptr i8, i8* %ras2, i64 %s2013
%r42 = shl i64 %r12, 0
%s2011 = add i64 %r42, 16
- %c22010 = getelementptr i8* %ras2, i64 %s2011
+ %c22010 = getelementptr i8, i8* %ras2, i64 %s2011
%r50 = add i64 %r16, 0
%r51 = icmp slt i64 %r50, 0
%r50sh = shl i64 %r50, 0
@@ -45,7 +45,7 @@ b:
%r54 = select i1 %r51, i64 0, i64 %r50j
%r56 = mul i64 %r54, %r12
%r28s = add i64 %r56, 16
- %c2 = getelementptr i8* %ras2, i64 %r28s
+ %c2 = getelementptr i8, i8* %ras2, i64 %r28s
%r60 = sub i32 %r2, %r
%r61 = icmp slt i32 %r60, 0
br i1 %r61, label %a29b, label %b63
@@ -132,14 +132,14 @@ b341:
%d753 = bitcast i64 %w1874 to i64
%r343 = add i64 %s661, 0
%r346 = add i64 %r343, 0
- %r347 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r346
- %r348 = load float* %r347
+ %r347 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r346
+ %r348 = load float, float* %r347
%r352 = add i64 %r343, 0
- %r353 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r352
- %r354 = load float* %r353
- %r362 = load float* bitcast ([128 x i64]* @i6000 to float*)
+ %r353 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r352
+ %r354 = load float, float* %r353
+ %r362 = load float, float* bitcast ([128 x i64]* @i6000 to float*)
%r363 = fadd float 0.000000e+00, %r362
- %r370 = load float* bitcast ([128 x i64]* @i6000 to float*)
+ %r370 = load float, float* bitcast ([128 x i64]* @i6000 to float*)
%r376 = icmp slt i64 %r16, 0
br i1 %r376, label %b377, label %a35b
b377:
@@ -183,9 +183,9 @@ b535:
%s923 = phi i64 [ 0, %b514 ], [ %r799, %b712 ]
%s933 = phi i64 [ %r533, %b514 ], [ %r795, %b712 ]
%r538 = add i64 %w1855, 0
- %r539 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r538
- %r540 = load float* %r539
- %r551 = load float* bitcast ([128 x i64]* @i6000 to float*)
+ %r539 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r538
+ %r540 = load float, float* %r539
+ %r551 = load float, float* bitcast ([128 x i64]* @i6000 to float*)
%r562 = sub i64 %s933, 0
%r564 = icmp slt i64 %r512, 0
br i1 %r564, label %b565, label %a45b
@@ -212,23 +212,23 @@ b712:
a45b714:
%r717 = add i64 %e944, 0
%r720 = add i64 %r717, 0
- %r721 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r720
- %r722 = load float* %r721
+ %r721 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r720
+ %r722 = load float, float* %r721
%r726 = add i64 %r717, 0
- %r727 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r726
- %r728 = load float* %r727
+ %r727 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r726
+ %r728 = load float, float* %r727
%r732 = add i64 %r717, 0
- %r733 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r732
- %r734 = load float* %r733
+ %r733 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r732
+ %r734 = load float, float* %r733
%r738 = add i64 %r717, 0
- %r739 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r738
- %r740 = load float* %r739
+ %r739 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r738
+ %r740 = load float, float* %r739
%r744 = add i64 %r717, 0
- %r745 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r744
- %r746 = load float* %r745
+ %r745 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r744
+ %r746 = load float, float* %r745
%r750 = add i64 %r717, 0
- %r751 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r750
- %r752 = load float* %r751
+ %r751 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r750
+ %r752 = load float, float* %r751
%r753 = fadd float %r752, %r746
%r754 = fadd float %r728, %r722
%r755 = fadd float %r734, %r754
@@ -236,11 +236,11 @@ a45b714:
%r757 = fadd float %r753, %r756
%r759 = fadd float %r757, %r540
%r770 = add i64 %r717, 0
- %r771 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r770
- %r772 = load float* %r771
+ %r771 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r770
+ %r772 = load float, float* %r771
%r776 = add i64 %r717, 0
- %r777 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r776
- %r778 = load float* %r777
+ %r777 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r776
+ %r778 = load float, float* %r777
%r781 = fadd float %r363, %r772
%r782 = fadd float %r781, %r778
%r783 = fadd float %r551, %r782
@@ -253,7 +253,7 @@ b820:
%r844 = add i64 %r16, 0
%r846 = sext i32 %r60 to i64
%r847 = add i64 %r846, 0
- %r851 = load float* bitcast ([128 x i64]* @i6000 to float*)
+ %r851 = load float, float* bitcast ([128 x i64]* @i6000 to float*)
%r856 = sub i64 %rx, 0
br label %b858
b858:
@@ -265,11 +265,11 @@ b858:
%s1173 = add i64 %b1902, 0
%r859 = add i64 %r856, 0
%r862 = add i64 %w1891, 0
- %r863 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r862
- %r864 = load float* %r863
+ %r863 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r862
+ %r864 = load float, float* %r863
%r868 = add i64 %w1891, 0
- %r869 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r868
- %r870 = load float* %r869
+ %r869 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r868
+ %r870 = load float, float* %r869
%r873 = sub i64 %r859, 0
%r876 = sub i64 %s1173, 0
%r878 = icmp slt i64 %r847, 0
@@ -301,14 +301,14 @@ a53b1019:
%r1022 = add i64 %r876, 0
%r1024 = bitcast i8* %c2 to float*
%r1025 = add i64 %r1022, 0
- %r1026 = getelementptr float* %r1024, i64 %r1025
- %r1027 = load float* %r1026
+ %r1026 = getelementptr float, float* %r1024, i64 %r1025
+ %r1027 = load float, float* %r1026
%r1032 = add i64 %r873, 0
%r1033 = add i64 %r1032, 0
- %r1034 = getelementptr float* %r1024, i64 %r1033
- %r1035 = load float* %r1034
+ %r1034 = getelementptr float, float* %r1024, i64 %r1033
+ %r1035 = load float, float* %r1034
%r1037 = bitcast i8* %c22010 to float*
- %r1040 = getelementptr float* %r1037, i64 %r1025
+ %r1040 = getelementptr float, float* %r1037, i64 %r1025
%r1044 = fadd float %r864, %r1035
%r1046 = fadd float %r870, %r1027
%r1047 = fadd float %r1044, %r1046
@@ -335,11 +335,11 @@ b1117:
%d1353 = bitcast i64 %w1915 to i64
%r1120 = add i64 %s661, 0
%r1121 = add i64 %r1120, 0
- %r1122 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1121
- %r1123 = load float* %r1122
+ %r1122 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1121
+ %r1123 = load float, float* %r1122
%r1132 = bitcast i8* %c22012 to float*
- %r1134 = getelementptr float* %r1132, i64 %w1915
- %r1135 = load float* %r1134
+ %r1134 = getelementptr float, float* %r1132, i64 %w1915
+ %r1135 = load float, float* %r1134
%r1136 = fadd float %r1123, %r1135
%r1138 = icmp slt i64 %r1114, 0
br i1 %r1138, label %b1139, label %a63b
@@ -409,8 +409,8 @@ b1342:
%r1352 = add i64 %s1523, 0
%r1355 = sub i64 %r1352, 0
%r1370 = add i64 %d1533, 0
- %r1371 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1370
- %r1372 = load float* %r1371
+ %r1371 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1370
+ %r1372 = load float, float* %r1371
br label %a74b
a74b:
%w1958 = phi i64 [ 0, %b1342 ], [ %v1959, %a74b ]
@@ -440,13 +440,13 @@ a97b:
%r1614 = mul i64 %r1613, 0
%r1622 = add i64 %r1614, 0
%r1754 = bitcast i8* %r28 to float*
- %r1756 = getelementptr float* %r1754, i64 %w1970
- %r1757 = load float* %r1756
+ %r1756 = getelementptr float, float* %r1754, i64 %w1970
+ %r1757 = load float, float* %r1756
%r1761 = add i64 %r1622, 0
- %r1762 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1761
- %r1763 = load float* %r1762
+ %r1762 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1761
+ %r1763 = load float, float* %r1762
%r1767 = add i64 %r1622, 0
- %r1768 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1767
+ %r1768 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1767
%r1772 = fadd float %r1763, 0.000000e+00
%r1773 = fadd float %r1772, 0.000000e+00
%r1809 = fadd float %r1757, 0.000000e+00
diff --git a/test/CodeGen/X86/2007-02-04-OrAddrMode.ll b/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
index cea4d9d272fc..f05175259c80 100644
--- a/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
+++ b/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
@@ -7,10 +7,10 @@ define i32 @test(float ** %tmp2, i32 %tmp12) nounwind {
; CHECK: orl $1, %{{.*}}
; CHECK: ret
- %tmp3 = load float** %tmp2
+ %tmp3 = load float*, float** %tmp2
%tmp132 = shl i32 %tmp12, 2 ; <i32> [#uses=1]
%tmp4 = bitcast float* %tmp3 to i8* ; <i8*> [#uses=1]
- %ctg2 = getelementptr i8* %tmp4, i32 %tmp132 ; <i8*> [#uses=1]
+ %ctg2 = getelementptr i8, i8* %tmp4, i32 %tmp132 ; <i8*> [#uses=1]
%tmp6 = ptrtoint i8* %ctg2 to i32 ; <i32> [#uses=1]
%tmp14 = or i32 %tmp6, 1 ; <i32> [#uses=1]
ret i32 %tmp14
diff --git a/test/CodeGen/X86/2007-02-16-BranchFold.ll b/test/CodeGen/X86/2007-02-16-BranchFold.ll
index 6bf5631b4e34..22e0a4e806f9 100644
--- a/test/CodeGen/X86/2007-02-16-BranchFold.ll
+++ b/test/CodeGen/X86/2007-02-16-BranchFold.ll
@@ -54,15 +54,15 @@ bb28.i.i938.exitStub: ; preds = %LeafBlock3
ret i16 4
bb.i9.i.i932.ce: ; preds = %newFuncRoot
- %tmp1.i3.i.i930 = getelementptr %struct.list* %l_addr.01.0.i2.i.i929, i32 0, i32 0 ; <i8**> [#uses=1]
- %tmp2.i4.i.i931 = load i8** %tmp1.i3.i.i930 ; <i8*> [#uses=1]
+ %tmp1.i3.i.i930 = getelementptr %struct.list, %struct.list* %l_addr.01.0.i2.i.i929, i32 0, i32 0 ; <i8**> [#uses=1]
+ %tmp2.i4.i.i931 = load i8*, i8** %tmp1.i3.i.i930 ; <i8*> [#uses=1]
%tmp66.i62.i = bitcast i8* %tmp2.i4.i.i931 to %struct.operator* ; <%struct.operator*> [#uses=7]
- %tmp1.i6.i = getelementptr %struct.operator* %tmp66.i62.i, i32 0, i32 2 ; <i32*> [#uses=1]
- %tmp2.i7.i = load i32* %tmp1.i6.i ; <i32> [#uses=1]
- %tmp3.i8.i = load %struct.FILE** @outfile ; <%struct.FILE*> [#uses=1]
- %tmp5.i9.i = call i32 (%struct.FILE*, i8*, ...)* @fprintf( %struct.FILE* %tmp3.i8.i, i8* getelementptr ([11 x i8]* @str1, i32 0, i32 0), i32 %tmp2.i7.i ) ; <i32> [#uses=0]
- %tmp7.i10.i = getelementptr %struct.operator* %tmp66.i62.i, i32 0, i32 5 ; <i32*> [#uses=1]
- %tmp8.i11.i = load i32* %tmp7.i10.i ; <i32> [#uses=7]
+ %tmp1.i6.i = getelementptr %struct.operator, %struct.operator* %tmp66.i62.i, i32 0, i32 2 ; <i32*> [#uses=1]
+ %tmp2.i7.i = load i32, i32* %tmp1.i6.i ; <i32> [#uses=1]
+ %tmp3.i8.i = load %struct.FILE*, %struct.FILE** @outfile ; <%struct.FILE*> [#uses=1]
+ %tmp5.i9.i = call i32 (%struct.FILE*, i8*, ...) @fprintf( %struct.FILE* %tmp3.i8.i, i8* getelementptr ([11 x i8], [11 x i8]* @str1, i32 0, i32 0), i32 %tmp2.i7.i ) ; <i32> [#uses=0]
+ %tmp7.i10.i = getelementptr %struct.operator, %struct.operator* %tmp66.i62.i, i32 0, i32 5 ; <i32*> [#uses=1]
+ %tmp8.i11.i = load i32, i32* %tmp7.i10.i ; <i32> [#uses=7]
br label %NodeBlock5
NodeBlock5: ; preds = %bb.i9.i.i932.ce
diff --git a/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
index 954c95d69611..a9b85b94cd41 100644
--- a/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
+++ b/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
@@ -6,9 +6,9 @@
@stderr = external global %struct._IO_FILE*
define void @__eprintf(i8* %string, i8* %expression, i32 %line, i8* %filename) {
- %tmp = load %struct._IO_FILE** @stderr
- %tmp5 = tail call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf( %struct._IO_FILE* %tmp, i8* %string, i8* %expression, i32 %line, i8* %filename )
- %tmp6 = load %struct._IO_FILE** @stderr
+ %tmp = load %struct._IO_FILE*, %struct._IO_FILE** @stderr
+ %tmp5 = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf( %struct._IO_FILE* %tmp, i8* %string, i8* %expression, i32 %line, i8* %filename )
+ %tmp6 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr
%tmp7 = tail call i32 @fflush( %struct._IO_FILE* %tmp6 )
tail call void @abort( )
unreachable
diff --git a/test/CodeGen/X86/2007-03-01-SpillerCrash.ll b/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
index 112d1ab65e7b..dbbb611dc75a 100644
--- a/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
+++ b/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
@@ -4,7 +4,7 @@
define void @test() nounwind {
test.exit:
fmul <4 x float> zeroinitializer, zeroinitializer ; <<4 x float>>:0 [#uses=4]
- load <4 x float>* null ; <<4 x float>>:1 [#uses=1]
+ load <4 x float>, <4 x float>* null ; <<4 x float>>:1 [#uses=1]
shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x float>>:2 [#uses=1]
fmul <4 x float> %0, %2 ; <<4 x float>>:3 [#uses=1]
fsub <4 x float> zeroinitializer, %3 ; <<4 x float>>:4 [#uses=1]
diff --git a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
index 4d7c3a185a8b..f159bcdee134 100644
--- a/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
+++ b/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
@@ -25,50 +25,50 @@ bb.preheader: ; preds = %entry
bb: ; preds = %bb, %bb.preheader
%i.073.0 = phi i32 [ 0, %bb.preheader ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
%p_addr.076.0.rec = mul i32 %i.073.0, 9 ; <i32> [#uses=9]
- %p_addr.076.0 = getelementptr i8* %p, i32 %p_addr.076.0.rec ; <i8*> [#uses=1]
- %tmp2 = getelementptr i8** %buf, i32 %i.073.0 ; <i8**> [#uses=1]
- %tmp3 = load i8** %tmp2 ; <i8*> [#uses=8]
- %tmp5 = getelementptr i8* %tmp3, i32 %col ; <i8*> [#uses=1]
- %tmp7 = load i8* %p_addr.076.0 ; <i8> [#uses=1]
+ %p_addr.076.0 = getelementptr i8, i8* %p, i32 %p_addr.076.0.rec ; <i8*> [#uses=1]
+ %tmp2 = getelementptr i8*, i8** %buf, i32 %i.073.0 ; <i8**> [#uses=1]
+ %tmp3 = load i8*, i8** %tmp2 ; <i8*> [#uses=8]
+ %tmp5 = getelementptr i8, i8* %tmp3, i32 %col ; <i8*> [#uses=1]
+ %tmp7 = load i8, i8* %p_addr.076.0 ; <i8> [#uses=1]
store i8 %tmp7, i8* %tmp5
%p_addr.076.0.sum93 = add i32 %p_addr.076.0.rec, 1 ; <i32> [#uses=1]
- %tmp11 = getelementptr i8* %p, i32 %p_addr.076.0.sum93 ; <i8*> [#uses=1]
- %tmp13 = load i8* %tmp11 ; <i8> [#uses=1]
- %tmp15 = getelementptr i8* %tmp3, i32 %tmp5.sum72 ; <i8*> [#uses=1]
+ %tmp11 = getelementptr i8, i8* %p, i32 %p_addr.076.0.sum93 ; <i8*> [#uses=1]
+ %tmp13 = load i8, i8* %tmp11 ; <i8> [#uses=1]
+ %tmp15 = getelementptr i8, i8* %tmp3, i32 %tmp5.sum72 ; <i8*> [#uses=1]
store i8 %tmp13, i8* %tmp15
%p_addr.076.0.sum92 = add i32 %p_addr.076.0.rec, 2 ; <i32> [#uses=1]
- %tmp17 = getelementptr i8* %p, i32 %p_addr.076.0.sum92 ; <i8*> [#uses=1]
- %tmp19 = load i8* %tmp17 ; <i8> [#uses=1]
- %tmp21 = getelementptr i8* %tmp3, i32 %tmp5.sum71 ; <i8*> [#uses=1]
+ %tmp17 = getelementptr i8, i8* %p, i32 %p_addr.076.0.sum92 ; <i8*> [#uses=1]
+ %tmp19 = load i8, i8* %tmp17 ; <i8> [#uses=1]
+ %tmp21 = getelementptr i8, i8* %tmp3, i32 %tmp5.sum71 ; <i8*> [#uses=1]
store i8 %tmp19, i8* %tmp21
%p_addr.076.0.sum91 = add i32 %p_addr.076.0.rec, 3 ; <i32> [#uses=1]
- %tmp23 = getelementptr i8* %p, i32 %p_addr.076.0.sum91 ; <i8*> [#uses=1]
- %tmp25 = load i8* %tmp23 ; <i8> [#uses=1]
- %tmp27 = getelementptr i8* %tmp3, i32 %tmp5.sum70 ; <i8*> [#uses=1]
+ %tmp23 = getelementptr i8, i8* %p, i32 %p_addr.076.0.sum91 ; <i8*> [#uses=1]
+ %tmp25 = load i8, i8* %tmp23 ; <i8> [#uses=1]
+ %tmp27 = getelementptr i8, i8* %tmp3, i32 %tmp5.sum70 ; <i8*> [#uses=1]
store i8 %tmp25, i8* %tmp27
%p_addr.076.0.sum90 = add i32 %p_addr.076.0.rec, 4 ; <i32> [#uses=1]
- %tmp29 = getelementptr i8* %p, i32 %p_addr.076.0.sum90 ; <i8*> [#uses=1]
- %tmp31 = load i8* %tmp29 ; <i8> [#uses=1]
- %tmp33 = getelementptr i8* %tmp3, i32 %tmp5.sum69 ; <i8*> [#uses=2]
+ %tmp29 = getelementptr i8, i8* %p, i32 %p_addr.076.0.sum90 ; <i8*> [#uses=1]
+ %tmp31 = load i8, i8* %tmp29 ; <i8> [#uses=1]
+ %tmp33 = getelementptr i8, i8* %tmp3, i32 %tmp5.sum69 ; <i8*> [#uses=2]
store i8 %tmp31, i8* %tmp33
%p_addr.076.0.sum89 = add i32 %p_addr.076.0.rec, 5 ; <i32> [#uses=1]
- %tmp35 = getelementptr i8* %p, i32 %p_addr.076.0.sum89 ; <i8*> [#uses=1]
- %tmp37 = load i8* %tmp35 ; <i8> [#uses=1]
- %tmp39 = getelementptr i8* %tmp3, i32 %tmp5.sum68 ; <i8*> [#uses=1]
+ %tmp35 = getelementptr i8, i8* %p, i32 %p_addr.076.0.sum89 ; <i8*> [#uses=1]
+ %tmp37 = load i8, i8* %tmp35 ; <i8> [#uses=1]
+ %tmp39 = getelementptr i8, i8* %tmp3, i32 %tmp5.sum68 ; <i8*> [#uses=1]
store i8 %tmp37, i8* %tmp39
%p_addr.076.0.sum88 = add i32 %p_addr.076.0.rec, 6 ; <i32> [#uses=1]
- %tmp41 = getelementptr i8* %p, i32 %p_addr.076.0.sum88 ; <i8*> [#uses=1]
- %tmp43 = load i8* %tmp41 ; <i8> [#uses=1]
+ %tmp41 = getelementptr i8, i8* %p, i32 %p_addr.076.0.sum88 ; <i8*> [#uses=1]
+ %tmp43 = load i8, i8* %tmp41 ; <i8> [#uses=1]
store i8 %tmp43, i8* %tmp33
%p_addr.076.0.sum87 = add i32 %p_addr.076.0.rec, 7 ; <i32> [#uses=1]
- %tmp47 = getelementptr i8* %p, i32 %p_addr.076.0.sum87 ; <i8*> [#uses=1]
- %tmp49 = load i8* %tmp47 ; <i8> [#uses=1]
- %tmp51 = getelementptr i8* %tmp3, i32 %tmp5.sum66 ; <i8*> [#uses=1]
+ %tmp47 = getelementptr i8, i8* %p, i32 %p_addr.076.0.sum87 ; <i8*> [#uses=1]
+ %tmp49 = load i8, i8* %tmp47 ; <i8> [#uses=1]
+ %tmp51 = getelementptr i8, i8* %tmp3, i32 %tmp5.sum66 ; <i8*> [#uses=1]
store i8 %tmp49, i8* %tmp51
%p_addr.076.0.sum = add i32 %p_addr.076.0.rec, 8 ; <i32> [#uses=1]
- %tmp53 = getelementptr i8* %p, i32 %p_addr.076.0.sum ; <i8*> [#uses=1]
- %tmp55 = load i8* %tmp53 ; <i8> [#uses=1]
- %tmp57 = getelementptr i8* %tmp3, i32 %tmp5.sum ; <i8*> [#uses=1]
+ %tmp53 = getelementptr i8, i8* %p, i32 %p_addr.076.0.sum ; <i8*> [#uses=1]
+ %tmp55 = load i8, i8* %tmp53 ; <i8> [#uses=1]
+ %tmp57 = getelementptr i8, i8* %tmp3, i32 %tmp5.sum ; <i8*> [#uses=1]
store i8 %tmp55, i8* %tmp57
%indvar.next = add i32 %i.073.0, 1 ; <i32> [#uses=2]
icmp eq i32 %indvar.next, %size ; <i1>:1 [#uses=1]
diff --git a/test/CodeGen/X86/2007-03-16-InlineAsm.ll b/test/CodeGen/X86/2007-03-16-InlineAsm.ll
index 3bd6d590efc1..61746814f9a0 100644
--- a/test/CodeGen/X86/2007-03-16-InlineAsm.ll
+++ b/test/CodeGen/X86/2007-03-16-InlineAsm.ll
@@ -11,16 +11,16 @@ entry:
%ret = alloca i32, align 4 ; <i32*> [#uses=2]
store i32 %A, i32* %A_addr
store i32 %B, i32* %B_addr
- %tmp1 = load i32* %A_addr ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* %A_addr ; <i32> [#uses=1]
%tmp2 = call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"( i32 7, i32 %tmp1 ) ; <i32> [#uses=1]
store i32 %tmp2, i32* %ret
- %tmp3 = load i32* %ret ; <i32> [#uses=1]
+ %tmp3 = load i32, i32* %ret ; <i32> [#uses=1]
store i32 %tmp3, i32* %tmp
- %tmp4 = load i32* %tmp ; <i32> [#uses=1]
+ %tmp4 = load i32, i32* %tmp ; <i32> [#uses=1]
store i32 %tmp4, i32* %retval
br label %return
return: ; preds = %entry
- %retval5 = load i32* %retval ; <i32> [#uses=1]
+ %retval5 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %retval5
}
diff --git a/test/CodeGen/X86/2007-03-26-CoalescerBug.ll b/test/CodeGen/X86/2007-03-26-CoalescerBug.ll
index 9676f143bca6..9a3d4cb3bee7 100644
--- a/test/CodeGen/X86/2007-03-26-CoalescerBug.ll
+++ b/test/CodeGen/X86/2007-03-26-CoalescerBug.ll
@@ -4,7 +4,7 @@
define void @foo(...) {
bb1:
- %t43 = load i64* getelementptr ([339 x i64]* @data, i32 0, i64 212), align 4
+ %t43 = load i64, i64* getelementptr ([339 x i64], [339 x i64]* @data, i32 0, i64 212), align 4
br i1 false, label %bb80, label %bb6
bb6:
br i1 false, label %bb38, label %bb265
diff --git a/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll b/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll
index 9f09e88664c6..176b566fe0ae 100644
--- a/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll
+++ b/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-unknown-linux-gnu"
define fastcc i32 @bc_divide(%struct.bc_struct* %n1, %struct.bc_struct* %n2, %struct.bc_struct** %quot, i32 %scale) nounwind {
entry:
- %tmp7.i46 = tail call i64 asm sideeffect ".byte 0x0f,0x31", "={dx},=*{ax},~{dirflag},~{fpsr},~{flags}"( i64* getelementptr (%struct.CycleCount* @_programStartTime, i32 0, i32 1) ) ; <i64> [#uses=0]
+ %tmp7.i46 = tail call i64 asm sideeffect ".byte 0x0f,0x31", "={dx},=*{ax},~{dirflag},~{fpsr},~{flags}"( i64* getelementptr (%struct.CycleCount, %struct.CycleCount* @_programStartTime, i32 0, i32 1) ) ; <i64> [#uses=0]
%tmp221 = sdiv i32 10, 0 ; <i32> [#uses=1]
tail call fastcc void @_one_mult( i8* null, i32 0, i32 %tmp221, i8* null )
ret i32 0
diff --git a/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll
index 4604f46c533f..31c6b532d8c7 100644
--- a/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll
+++ b/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll
@@ -19,8 +19,8 @@ cond_true: ; preds = %bb.preheader
bb32: ; preds = %bb32, %cond_true
%i.2115.0 = phi i32 [ 0, %cond_true ], [ %indvar.next127, %bb32 ] ; <i32> [#uses=1]
%c.2112.0 = phi i32 [ 0, %cond_true ], [ %tmp49, %bb32 ] ; <i32> [#uses=1]
- %tmp43 = getelementptr %struct.partition_def* %part, i32 0, i32 1, i32 %c.2112.0, i32 1 ; <%struct.partition_elem**> [#uses=1]
- %tmp44 = load %struct.partition_elem** %tmp43 ; <%struct.partition_elem*> [#uses=1]
+ %tmp43 = getelementptr %struct.partition_def, %struct.partition_def* %part, i32 0, i32 1, i32 %c.2112.0, i32 1 ; <%struct.partition_elem**> [#uses=1]
+ %tmp44 = load %struct.partition_elem*, %struct.partition_elem** %tmp43 ; <%struct.partition_elem*> [#uses=1]
%tmp4445 = ptrtoint %struct.partition_elem* %tmp44 to i32 ; <i32> [#uses=1]
%tmp48 = sub i32 %tmp4445, 0 ; <i32> [#uses=1]
%tmp49 = sdiv i32 %tmp48, 12 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll b/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
deleted file mode 100644
index 11c0bf957983..000000000000
--- a/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
+++ /dev/null
@@ -1,64 +0,0 @@
-; RUN: llc < %s -o - -march=x86 -mattr=+mmx | FileCheck %s
-; There are no MMX instructions here. We use add+adcl for the adds.
-
-define <1 x i64> @unsigned_add3(<1 x i64>* %a, <1 x i64>* %b, i32 %count) nounwind {
-entry:
- %tmp2942 = icmp eq i32 %count, 0 ; <i1> [#uses=1]
- br i1 %tmp2942, label %bb31, label %bb26
-
-bb26: ; preds = %bb26, %entry
-
-; CHECK: addl
-; CHECK: adcl
-
- %i.037.0 = phi i32 [ 0, %entry ], [ %tmp25, %bb26 ] ; <i32> [#uses=3]
- %sum.035.0 = phi <1 x i64> [ zeroinitializer, %entry ], [ %tmp22, %bb26 ] ; <<1 x i64>> [#uses=1]
- %tmp13 = getelementptr <1 x i64>* %b, i32 %i.037.0 ; <<1 x i64>*> [#uses=1]
- %tmp14 = load <1 x i64>* %tmp13 ; <<1 x i64>> [#uses=1]
- %tmp18 = getelementptr <1 x i64>* %a, i32 %i.037.0 ; <<1 x i64>*> [#uses=1]
- %tmp19 = load <1 x i64>* %tmp18 ; <<1 x i64>> [#uses=1]
- %tmp21 = add <1 x i64> %tmp19, %tmp14 ; <<1 x i64>> [#uses=1]
- %tmp22 = add <1 x i64> %tmp21, %sum.035.0 ; <<1 x i64>> [#uses=2]
- %tmp25 = add i32 %i.037.0, 1 ; <i32> [#uses=2]
- %tmp29 = icmp ult i32 %tmp25, %count ; <i1> [#uses=1]
- br i1 %tmp29, label %bb26, label %bb31
-
-bb31: ; preds = %bb26, %entry
- %sum.035.1 = phi <1 x i64> [ zeroinitializer, %entry ], [ %tmp22, %bb26 ] ; <<1 x i64>> [#uses=1]
- ret <1 x i64> %sum.035.1
-}
-
-
-; This is the original test converted to use MMX intrinsics.
-
-define <1 x i64> @unsigned_add3a(x86_mmx* %a, x86_mmx* %b, i32 %count) nounwind {
-entry:
- %tmp2943 = bitcast <1 x i64><i64 0> to x86_mmx
- %tmp2942 = icmp eq i32 %count, 0 ; <i1> [#uses=1]
- br i1 %tmp2942, label %bb31, label %bb26
-
-bb26: ; preds = %bb26, %entry
-
-; CHECK: movq ({{.*}},8), %mm
-; CHECK: paddq ({{.*}},8), %mm
-; CHECK: paddq %mm{{[0-7]}}, %mm
-
- %i.037.0 = phi i32 [ 0, %entry ], [ %tmp25, %bb26 ] ; <i32> [#uses=3]
- %sum.035.0 = phi x86_mmx [ %tmp2943, %entry ], [ %tmp22, %bb26 ] ; <x86_mmx> [#uses=1]
- %tmp13 = getelementptr x86_mmx* %b, i32 %i.037.0 ; <x86_mmx*> [#uses=1]
- %tmp14 = load x86_mmx* %tmp13 ; <x86_mmx> [#uses=1]
- %tmp18 = getelementptr x86_mmx* %a, i32 %i.037.0 ; <x86_mmx*> [#uses=1]
- %tmp19 = load x86_mmx* %tmp18 ; <x86_mmx> [#uses=1]
- %tmp21 = call x86_mmx @llvm.x86.mmx.padd.q (x86_mmx %tmp19, x86_mmx %tmp14) ; <x86_mmx> [#uses=1]
- %tmp22 = call x86_mmx @llvm.x86.mmx.padd.q (x86_mmx %tmp21, x86_mmx %sum.035.0) ; <x86_mmx> [#uses=2]
- %tmp25 = add i32 %i.037.0, 1 ; <i32> [#uses=2]
- %tmp29 = icmp ult i32 %tmp25, %count ; <i1> [#uses=1]
- br i1 %tmp29, label %bb26, label %bb31
-
-bb31: ; preds = %bb26, %entry
- %sum.035.1 = phi x86_mmx [ %tmp2943, %entry ], [ %tmp22, %bb26 ] ; <x86_mmx> [#uses=1]
- %t = bitcast x86_mmx %sum.035.1 to <1 x i64>
- ret <1 x i64> %t
-}
-
-declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
diff --git a/test/CodeGen/X86/2007-05-05-VecCastExpand.ll b/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
index e58b1932197d..0edf1398295d 100644
--- a/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
+++ b/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
@@ -5,9 +5,9 @@
define void @test() {
bb.i:
- %tmp.i660 = load <4 x float>* null ; <<4 x float>> [#uses=1]
- call void (i32, ...)* @printf( i32 0, i8* getelementptr ([18 x i8]* @str, i32 0, i64 0), double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00 )
- %tmp152.i = load <4 x i32>* null ; <<4 x i32>> [#uses=1]
+ %tmp.i660 = load <4 x float>, <4 x float>* null ; <<4 x float>> [#uses=1]
+ call void (i32, ...) @printf( i32 0, i8* getelementptr ([18 x i8], [18 x i8]* @str, i32 0, i64 0), double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00 )
+ %tmp152.i = load <4 x i32>, <4 x i32>* null ; <<4 x i32>> [#uses=1]
%tmp156.i = bitcast <4 x i32> %tmp152.i to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp175.i = bitcast <4 x float> %tmp.i660 to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp176.i = xor <4 x i32> %tmp156.i, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
index ecc5835405d7..9ce5f5ac63a1 100644
--- a/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
+++ b/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
@@ -19,7 +19,7 @@ cond_true109: ; preds = %entry
cond_next164: ; preds = %cond_true109
%tmp176 = call signext i16 @GetParamDesc( %struct.XDesc* null, i32 1701999219, i32 1413830740, %struct.XDesc* null )
- call void (i64, i8*, ...)* @r_raise( i64 0, i8* null )
+ call void (i64, i8*, ...) @r_raise( i64 0, i8* null )
unreachable
cond_true239: ; preds = %cond_true109
diff --git a/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll b/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll
index 321e11651b60..1291dc9e6edc 100644
--- a/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll
+++ b/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll
@@ -12,7 +12,7 @@ entry:
bb.i: ; preds = %bb.i, %entry
%i.1.i1.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb.i ] ; <i32> [#uses=2]
%tmp1012.i = sext i32 %i.1.i1.0 to i64 ; <i64> [#uses=1]
- %tmp13.i = getelementptr %struct.A* @_ZN1A1aE, i32 0, i32 0, i64 %tmp1012.i ; <i8*> [#uses=1]
+ %tmp13.i = getelementptr %struct.A, %struct.A* @_ZN1A1aE, i32 0, i32 0, i64 %tmp1012.i ; <i8*> [#uses=1]
store i8 0, i8* %tmp13.i
%indvar.next = add i32 %i.1.i1.0, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next, 1024 ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2007-06-15-IntToMMX.ll b/test/CodeGen/X86/2007-06-15-IntToMMX.ll
deleted file mode 100644
index 5612d9eb282c..000000000000
--- a/test/CodeGen/X86/2007-06-15-IntToMMX.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | FileCheck %s
-
-; CHECK: paddusw
-
-@R = external global x86_mmx ; <x86_mmx*> [#uses=1]
-
-define void @foo(<1 x i64> %A, <1 x i64> %B) {
-entry:
- %tmp2 = bitcast <1 x i64> %A to x86_mmx
- %tmp3 = bitcast <1 x i64> %B to x86_mmx
- %tmp7 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp2, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=1]
- store x86_mmx %tmp7, x86_mmx* @R
- tail call void @llvm.x86.mmx.emms( )
- ret void
-}
-
-declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
-
-declare void @llvm.x86.mmx.emms()
diff --git a/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll b/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
index dc11eec9c17f..87edab77ac19 100644
--- a/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
+++ b/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
@@ -1,10 +1,10 @@
; RUN: llc < %s -march=x86 -mattr=+sse2
define void @test(<4 x float>* %arg) {
- %tmp89 = getelementptr <4 x float>* %arg, i64 3
+ %tmp89 = getelementptr <4 x float>, <4 x float>* %arg, i64 3
%tmp1144 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, zeroinitializer
store <4 x float> %tmp1144, <4 x float>* null
- %tmp1149 = load <4 x float>* %tmp89
+ %tmp1149 = load <4 x float>, <4 x float>* %tmp89
%tmp1150 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1149
store <4 x float> %tmp1150, <4 x float>* %tmp89
ret void
diff --git a/test/CodeGen/X86/2007-07-10-StackerAssert.ll b/test/CodeGen/X86/2007-07-10-StackerAssert.ll
index d611677942c2..c8660f797e2c 100644
--- a/test/CodeGen/X86/2007-07-10-StackerAssert.ll
+++ b/test/CodeGen/X86/2007-07-10-StackerAssert.ll
@@ -22,15 +22,15 @@ cond_true354: ; preds = %bb164
ret i32 0
bb383: ; preds = %bb164
- %tmp408 = load float* null ; <float> [#uses=2]
+ %tmp408 = load float, float* null ; <float> [#uses=2]
br i1 false, label %cond_true425, label %cond_next443
cond_true425: ; preds = %bb383
- %tmp430 = load float* null ; <float> [#uses=1]
+ %tmp430 = load float, float* null ; <float> [#uses=1]
%tmp432 = fsub float %tmp430, %tmp408 ; <float> [#uses=1]
%tmp432433 = fpext float %tmp432 to double ; <double> [#uses=1]
%tmp434435 = fpext float %tmp408 to double ; <double> [#uses=1]
- call void (i8*, ...)* @PR_LogPrint( i8* getelementptr ([56 x i8]* @.str97, i32 0, i32 0), double 0.000000e+00, double %tmp434435, double %tmp432433 )
+ call void (i8*, ...) @PR_LogPrint( i8* getelementptr ([56 x i8], [56 x i8]* @.str97, i32 0, i32 0), double 0.000000e+00, double %tmp434435, double %tmp432433 )
ret i32 0
cond_next443: ; preds = %bb383
diff --git a/test/CodeGen/X86/2007-07-18-Vector-Extract.ll b/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
index 6288c4a892c3..63ed4601a04a 100644
--- a/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
+++ b/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
@@ -4,14 +4,14 @@
; CHECK: movq 8([[A0]]), %rax
define i64 @foo_0(<2 x i64>* %val) {
entry:
- %val12 = getelementptr <2 x i64>* %val, i32 0, i32 0 ; <i64*> [#uses=1]
- %tmp7 = load i64* %val12 ; <i64> [#uses=1]
+ %val12 = getelementptr <2 x i64>, <2 x i64>* %val, i32 0, i32 0 ; <i64*> [#uses=1]
+ %tmp7 = load i64, i64* %val12 ; <i64> [#uses=1]
ret i64 %tmp7
}
define i64 @foo_1(<2 x i64>* %val) {
entry:
- %tmp2.gep = getelementptr <2 x i64>* %val, i32 0, i32 1 ; <i64*> [#uses=1]
- %tmp4 = load i64* %tmp2.gep ; <i64> [#uses=1]
+ %tmp2.gep = getelementptr <2 x i64>, <2 x i64>* %val, i32 0, i32 1 ; <i64*> [#uses=1]
+ %tmp4 = load i64, i64* %tmp2.gep ; <i64> [#uses=1]
ret i64 %tmp4
}
diff --git a/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
index 7768f36efae5..3bf8225abd0a 100644
--- a/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
+++ b/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
@@ -46,14 +46,14 @@ _ubyte_convert2_to_ctypes.exit: ; preds = %entry
]
bb4: ; preds = %_ubyte_convert2_to_ctypes.exit, %cond_next.i
- %tmp5 = load i8*** @PyArray_API, align 8 ; <i8**> [#uses=1]
- %tmp6 = getelementptr i8** %tmp5, i64 2 ; <i8**> [#uses=1]
- %tmp7 = load i8** %tmp6 ; <i8*> [#uses=1]
+ %tmp5 = load i8**, i8*** @PyArray_API, align 8 ; <i8**> [#uses=1]
+ %tmp6 = getelementptr i8*, i8** %tmp5, i64 2 ; <i8**> [#uses=1]
+ %tmp7 = load i8*, i8** %tmp6 ; <i8*> [#uses=1]
%tmp78 = bitcast i8* %tmp7 to %struct._typeobject* ; <%struct._typeobject*> [#uses=1]
- %tmp9 = getelementptr %struct._typeobject* %tmp78, i32 0, i32 12 ; <%struct.PyNumberMethods**> [#uses=1]
- %tmp10 = load %struct.PyNumberMethods** %tmp9 ; <%struct.PyNumberMethods*> [#uses=1]
- %tmp11 = getelementptr %struct.PyNumberMethods* %tmp10, i32 0, i32 5 ; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)**> [#uses=1]
- %tmp12 = load %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)** %tmp11 ; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*> [#uses=1]
+ %tmp9 = getelementptr %struct._typeobject, %struct._typeobject* %tmp78, i32 0, i32 12 ; <%struct.PyNumberMethods**> [#uses=1]
+ %tmp10 = load %struct.PyNumberMethods*, %struct.PyNumberMethods** %tmp9 ; <%struct.PyNumberMethods*> [#uses=1]
+ %tmp11 = getelementptr %struct.PyNumberMethods, %struct.PyNumberMethods* %tmp10, i32 0, i32 5 ; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)**> [#uses=1]
+ %tmp12 = load %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)** %tmp11 ; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*> [#uses=1]
%tmp15 = call %struct.PyObject* %tmp12( %struct.PyObject* %a, %struct.PyObject* %b ) ; <%struct.PyObject*> [#uses=1]
ret %struct.PyObject* %tmp15
@@ -63,38 +63,38 @@ bb17: ; preds = %_ubyte_convert2_to_ctypes.exit, %cond_next.i
br i1 %tmp19, label %cond_next, label %UnifiedReturnBlock
cond_next: ; preds = %bb17
- %tmp22 = load i8*** @PyArray_API, align 8 ; <i8**> [#uses=1]
- %tmp23 = getelementptr i8** %tmp22, i64 10 ; <i8**> [#uses=1]
- %tmp24 = load i8** %tmp23 ; <i8*> [#uses=1]
+ %tmp22 = load i8**, i8*** @PyArray_API, align 8 ; <i8**> [#uses=1]
+ %tmp23 = getelementptr i8*, i8** %tmp22, i64 10 ; <i8**> [#uses=1]
+ %tmp24 = load i8*, i8** %tmp23 ; <i8*> [#uses=1]
%tmp2425 = bitcast i8* %tmp24 to %struct._typeobject* ; <%struct._typeobject*> [#uses=1]
- %tmp26 = getelementptr %struct._typeobject* %tmp2425, i32 0, i32 12 ; <%struct.PyNumberMethods**> [#uses=1]
- %tmp27 = load %struct.PyNumberMethods** %tmp26 ; <%struct.PyNumberMethods*> [#uses=1]
- %tmp28 = getelementptr %struct.PyNumberMethods* %tmp27, i32 0, i32 5 ; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)**> [#uses=1]
- %tmp29 = load %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)** %tmp28 ; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*> [#uses=1]
+ %tmp26 = getelementptr %struct._typeobject, %struct._typeobject* %tmp2425, i32 0, i32 12 ; <%struct.PyNumberMethods**> [#uses=1]
+ %tmp27 = load %struct.PyNumberMethods*, %struct.PyNumberMethods** %tmp26 ; <%struct.PyNumberMethods*> [#uses=1]
+ %tmp28 = getelementptr %struct.PyNumberMethods, %struct.PyNumberMethods* %tmp27, i32 0, i32 5 ; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)**> [#uses=1]
+ %tmp29 = load %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)** %tmp28 ; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*> [#uses=1]
%tmp32 = call %struct.PyObject* %tmp29( %struct.PyObject* %a, %struct.PyObject* %b ) ; <%struct.PyObject*> [#uses=1]
ret %struct.PyObject* %tmp32
bb35: ; preds = %_ubyte_convert2_to_ctypes.exit, %cond_next.i
- %tmp36 = load i8*** @PyUFunc_API, align 8 ; <i8**> [#uses=1]
- %tmp37 = getelementptr i8** %tmp36, i64 27 ; <i8**> [#uses=1]
- %tmp38 = load i8** %tmp37 ; <i8*> [#uses=1]
+ %tmp36 = load i8**, i8*** @PyUFunc_API, align 8 ; <i8**> [#uses=1]
+ %tmp37 = getelementptr i8*, i8** %tmp36, i64 27 ; <i8**> [#uses=1]
+ %tmp38 = load i8*, i8** %tmp37 ; <i8*> [#uses=1]
%tmp3839 = bitcast i8* %tmp38 to void ()* ; <void ()*> [#uses=1]
call void %tmp3839( )
- %tmp40 = load i8* %arg2, align 1 ; <i8> [#uses=4]
+ %tmp40 = load i8, i8* %arg2, align 1 ; <i8> [#uses=4]
%tmp1.i = icmp eq i8 %tmp40, 0 ; <i1> [#uses=2]
br i1 %tmp1.i, label %cond_true.i, label %cond_false.i
cond_true.i: ; preds = %bb35
%tmp3.i196 = call i32 @feraiseexcept( i32 4 ) ; <i32> [#uses=0]
- %tmp46207 = load i8* %arg2, align 1 ; <i8> [#uses=3]
- %tmp48208 = load i8* %arg1, align 1 ; <i8> [#uses=2]
+ %tmp46207 = load i8, i8* %arg2, align 1 ; <i8> [#uses=3]
+ %tmp48208 = load i8, i8* %arg1, align 1 ; <i8> [#uses=2]
%tmp1.i197210 = icmp eq i8 %tmp48208, 0 ; <i1> [#uses=1]
%tmp4.i212 = icmp eq i8 %tmp46207, 0 ; <i1> [#uses=1]
%tmp7.i198213 = or i1 %tmp1.i197210, %tmp4.i212 ; <i1> [#uses=1]
br i1 %tmp7.i198213, label %cond_true.i200, label %cond_next17.i
cond_false.i: ; preds = %bb35
- %tmp42 = load i8* %arg1, align 1 ; <i8> [#uses=3]
+ %tmp42 = load i8, i8* %arg1, align 1 ; <i8> [#uses=3]
%tmp7.i = udiv i8 %tmp42, %tmp40 ; <i8> [#uses=2]
%tmp1.i197 = icmp eq i8 %tmp42, 0 ; <i1> [#uses=1]
%tmp7.i198 = or i1 %tmp1.i197, %tmp1.i ; <i1> [#uses=1]
@@ -120,31 +120,31 @@ cond_next17.i: ; preds = %cond_false.i, %cond_true.i
ubyte_ctype_remainder.exit: ; preds = %cond_next17.i, %cond_true14.i, %cond_true.i200
%out2.0 = phi i8 [ %tmp20.i, %cond_next17.i ], [ 0, %cond_true14.i ], [ 0, %cond_true.i200 ] ; <i8> [#uses=1]
%out.2 = phi i8 [ %out.1, %cond_next17.i ], [ %out.0, %cond_true14.i ], [ %out.0, %cond_true.i200 ] ; <i8> [#uses=1]
- %tmp52 = load i8*** @PyUFunc_API, align 8 ; <i8**> [#uses=1]
- %tmp53 = getelementptr i8** %tmp52, i64 28 ; <i8**> [#uses=1]
- %tmp54 = load i8** %tmp53 ; <i8*> [#uses=1]
+ %tmp52 = load i8**, i8*** @PyUFunc_API, align 8 ; <i8**> [#uses=1]
+ %tmp53 = getelementptr i8*, i8** %tmp52, i64 28 ; <i8**> [#uses=1]
+ %tmp54 = load i8*, i8** %tmp53 ; <i8*> [#uses=1]
%tmp5455 = bitcast i8* %tmp54 to i32 ()* ; <i32 ()*> [#uses=1]
%tmp56 = call i32 %tmp5455( ) ; <i32> [#uses=2]
%tmp58 = icmp eq i32 %tmp56, 0 ; <i1> [#uses=1]
br i1 %tmp58, label %cond_next89, label %cond_true61
cond_true61: ; preds = %ubyte_ctype_remainder.exit
- %tmp62 = load i8*** @PyUFunc_API, align 8 ; <i8**> [#uses=1]
- %tmp63 = getelementptr i8** %tmp62, i64 25 ; <i8**> [#uses=1]
- %tmp64 = load i8** %tmp63 ; <i8*> [#uses=1]
+ %tmp62 = load i8**, i8*** @PyUFunc_API, align 8 ; <i8**> [#uses=1]
+ %tmp63 = getelementptr i8*, i8** %tmp62, i64 25 ; <i8**> [#uses=1]
+ %tmp64 = load i8*, i8** %tmp63 ; <i8*> [#uses=1]
%tmp6465 = bitcast i8* %tmp64 to i32 (i8*, i32*, i32*, %struct.PyObject**)* ; <i32 (i8*, i32*, i32*, %struct.PyObject**)*> [#uses=1]
- %tmp67 = call i32 %tmp6465( i8* getelementptr ([14 x i8]* @.str5, i32 0, i64 0), i32* %bufsize, i32* %errmask, %struct.PyObject** %errobj ) ; <i32> [#uses=1]
+ %tmp67 = call i32 %tmp6465( i8* getelementptr ([14 x i8], [14 x i8]* @.str5, i32 0, i64 0), i32* %bufsize, i32* %errmask, %struct.PyObject** %errobj ) ; <i32> [#uses=1]
%tmp68 = icmp slt i32 %tmp67, 0 ; <i1> [#uses=1]
br i1 %tmp68, label %UnifiedReturnBlock, label %cond_next73
cond_next73: ; preds = %cond_true61
store i32 1, i32* %first, align 4
- %tmp74 = load i8*** @PyUFunc_API, align 8 ; <i8**> [#uses=1]
- %tmp75 = getelementptr i8** %tmp74, i64 29 ; <i8**> [#uses=1]
- %tmp76 = load i8** %tmp75 ; <i8*> [#uses=1]
+ %tmp74 = load i8**, i8*** @PyUFunc_API, align 8 ; <i8**> [#uses=1]
+ %tmp75 = getelementptr i8*, i8** %tmp74, i64 29 ; <i8**> [#uses=1]
+ %tmp76 = load i8*, i8** %tmp75 ; <i8*> [#uses=1]
%tmp7677 = bitcast i8* %tmp76 to i32 (i32, %struct.PyObject*, i32, i32*)* ; <i32 (i32, %struct.PyObject*, i32, i32*)*> [#uses=1]
- %tmp79 = load %struct.PyObject** %errobj, align 8 ; <%struct.PyObject*> [#uses=1]
- %tmp80 = load i32* %errmask, align 4 ; <i32> [#uses=1]
+ %tmp79 = load %struct.PyObject*, %struct.PyObject** %errobj, align 8 ; <%struct.PyObject*> [#uses=1]
+ %tmp80 = load i32, i32* %errmask, align 4 ; <i32> [#uses=1]
%tmp82 = call i32 %tmp7677( i32 %tmp80, %struct.PyObject* %tmp79, i32 %tmp56, i32* %first ) ; <i32> [#uses=1]
%tmp83 = icmp eq i32 %tmp82, 0 ; <i1> [#uses=1]
br i1 %tmp83, label %cond_next89, label %UnifiedReturnBlock
@@ -155,70 +155,70 @@ cond_next89: ; preds = %cond_next73, %ubyte_ctype_remainder.exit
br i1 %tmp92, label %UnifiedReturnBlock, label %cond_next97
cond_next97: ; preds = %cond_next89
- %tmp98 = load i8*** @PyArray_API, align 8 ; <i8**> [#uses=1]
- %tmp99 = getelementptr i8** %tmp98, i64 25 ; <i8**> [#uses=1]
- %tmp100 = load i8** %tmp99 ; <i8*> [#uses=1]
+ %tmp98 = load i8**, i8*** @PyArray_API, align 8 ; <i8**> [#uses=1]
+ %tmp99 = getelementptr i8*, i8** %tmp98, i64 25 ; <i8**> [#uses=1]
+ %tmp100 = load i8*, i8** %tmp99 ; <i8*> [#uses=1]
%tmp100101 = bitcast i8* %tmp100 to %struct._typeobject* ; <%struct._typeobject*> [#uses=2]
- %tmp102 = getelementptr %struct._typeobject* %tmp100101, i32 0, i32 38 ; <%struct.PyObject* (%struct._typeobject*, i64)**> [#uses=1]
- %tmp103 = load %struct.PyObject* (%struct._typeobject*, i64)** %tmp102 ; <%struct.PyObject* (%struct._typeobject*, i64)*> [#uses=1]
+ %tmp102 = getelementptr %struct._typeobject, %struct._typeobject* %tmp100101, i32 0, i32 38 ; <%struct.PyObject* (%struct._typeobject*, i64)**> [#uses=1]
+ %tmp103 = load %struct.PyObject* (%struct._typeobject*, i64)*, %struct.PyObject* (%struct._typeobject*, i64)** %tmp102 ; <%struct.PyObject* (%struct._typeobject*, i64)*> [#uses=1]
%tmp108 = call %struct.PyObject* %tmp103( %struct._typeobject* %tmp100101, i64 0 ) ; <%struct.PyObject*> [#uses=3]
%tmp110 = icmp eq %struct.PyObject* %tmp108, null ; <i1> [#uses=1]
br i1 %tmp110, label %cond_true113, label %cond_next135
cond_true113: ; preds = %cond_next97
- %tmp115 = getelementptr %struct.PyObject* %tmp90, i32 0, i32 0 ; <i64*> [#uses=2]
- %tmp116 = load i64* %tmp115 ; <i64> [#uses=1]
+ %tmp115 = getelementptr %struct.PyObject, %struct.PyObject* %tmp90, i32 0, i32 0 ; <i64*> [#uses=2]
+ %tmp116 = load i64, i64* %tmp115 ; <i64> [#uses=1]
%tmp117 = add i64 %tmp116, -1 ; <i64> [#uses=2]
store i64 %tmp117, i64* %tmp115
%tmp123 = icmp eq i64 %tmp117, 0 ; <i1> [#uses=1]
br i1 %tmp123, label %cond_true126, label %UnifiedReturnBlock
cond_true126: ; preds = %cond_true113
- %tmp128 = getelementptr %struct.PyObject* %tmp90, i32 0, i32 1 ; <%struct._typeobject**> [#uses=1]
- %tmp129 = load %struct._typeobject** %tmp128 ; <%struct._typeobject*> [#uses=1]
- %tmp130 = getelementptr %struct._typeobject* %tmp129, i32 0, i32 6 ; <void (%struct.PyObject*)**> [#uses=1]
- %tmp131 = load void (%struct.PyObject*)** %tmp130 ; <void (%struct.PyObject*)*> [#uses=1]
+ %tmp128 = getelementptr %struct.PyObject, %struct.PyObject* %tmp90, i32 0, i32 1 ; <%struct._typeobject**> [#uses=1]
+ %tmp129 = load %struct._typeobject*, %struct._typeobject** %tmp128 ; <%struct._typeobject*> [#uses=1]
+ %tmp130 = getelementptr %struct._typeobject, %struct._typeobject* %tmp129, i32 0, i32 6 ; <void (%struct.PyObject*)**> [#uses=1]
+ %tmp131 = load void (%struct.PyObject*)*, void (%struct.PyObject*)** %tmp130 ; <void (%struct.PyObject*)*> [#uses=1]
call void %tmp131( %struct.PyObject* %tmp90 )
ret %struct.PyObject* null
cond_next135: ; preds = %cond_next97
%tmp136137 = bitcast %struct.PyObject* %tmp108 to %struct.PyBoolScalarObject* ; <%struct.PyBoolScalarObject*> [#uses=1]
- %tmp139 = getelementptr %struct.PyBoolScalarObject* %tmp136137, i32 0, i32 2 ; <i8*> [#uses=1]
+ %tmp139 = getelementptr %struct.PyBoolScalarObject, %struct.PyBoolScalarObject* %tmp136137, i32 0, i32 2 ; <i8*> [#uses=1]
store i8 %out.2, i8* %tmp139
%tmp140141 = bitcast %struct.PyObject* %tmp90 to %struct.PyTupleObject* ; <%struct.PyTupleObject*> [#uses=2]
- %tmp143 = getelementptr %struct.PyTupleObject* %tmp140141, i32 0, i32 3, i64 0 ; <%struct.PyObject**> [#uses=1]
+ %tmp143 = getelementptr %struct.PyTupleObject, %struct.PyTupleObject* %tmp140141, i32 0, i32 3, i64 0 ; <%struct.PyObject**> [#uses=1]
store %struct.PyObject* %tmp108, %struct.PyObject** %tmp143
- %tmp145 = load i8*** @PyArray_API, align 8 ; <i8**> [#uses=1]
- %tmp146 = getelementptr i8** %tmp145, i64 25 ; <i8**> [#uses=1]
- %tmp147 = load i8** %tmp146 ; <i8*> [#uses=1]
+ %tmp145 = load i8**, i8*** @PyArray_API, align 8 ; <i8**> [#uses=1]
+ %tmp146 = getelementptr i8*, i8** %tmp145, i64 25 ; <i8**> [#uses=1]
+ %tmp147 = load i8*, i8** %tmp146 ; <i8*> [#uses=1]
%tmp147148 = bitcast i8* %tmp147 to %struct._typeobject* ; <%struct._typeobject*> [#uses=2]
- %tmp149 = getelementptr %struct._typeobject* %tmp147148, i32 0, i32 38 ; <%struct.PyObject* (%struct._typeobject*, i64)**> [#uses=1]
- %tmp150 = load %struct.PyObject* (%struct._typeobject*, i64)** %tmp149 ; <%struct.PyObject* (%struct._typeobject*, i64)*> [#uses=1]
+ %tmp149 = getelementptr %struct._typeobject, %struct._typeobject* %tmp147148, i32 0, i32 38 ; <%struct.PyObject* (%struct._typeobject*, i64)**> [#uses=1]
+ %tmp150 = load %struct.PyObject* (%struct._typeobject*, i64)*, %struct.PyObject* (%struct._typeobject*, i64)** %tmp149 ; <%struct.PyObject* (%struct._typeobject*, i64)*> [#uses=1]
%tmp155 = call %struct.PyObject* %tmp150( %struct._typeobject* %tmp147148, i64 0 ) ; <%struct.PyObject*> [#uses=3]
%tmp157 = icmp eq %struct.PyObject* %tmp155, null ; <i1> [#uses=1]
br i1 %tmp157, label %cond_true160, label %cond_next182
cond_true160: ; preds = %cond_next135
- %tmp162 = getelementptr %struct.PyObject* %tmp90, i32 0, i32 0 ; <i64*> [#uses=2]
- %tmp163 = load i64* %tmp162 ; <i64> [#uses=1]
+ %tmp162 = getelementptr %struct.PyObject, %struct.PyObject* %tmp90, i32 0, i32 0 ; <i64*> [#uses=2]
+ %tmp163 = load i64, i64* %tmp162 ; <i64> [#uses=1]
%tmp164 = add i64 %tmp163, -1 ; <i64> [#uses=2]
store i64 %tmp164, i64* %tmp162
%tmp170 = icmp eq i64 %tmp164, 0 ; <i1> [#uses=1]
br i1 %tmp170, label %cond_true173, label %UnifiedReturnBlock
cond_true173: ; preds = %cond_true160
- %tmp175 = getelementptr %struct.PyObject* %tmp90, i32 0, i32 1 ; <%struct._typeobject**> [#uses=1]
- %tmp176 = load %struct._typeobject** %tmp175 ; <%struct._typeobject*> [#uses=1]
- %tmp177 = getelementptr %struct._typeobject* %tmp176, i32 0, i32 6 ; <void (%struct.PyObject*)**> [#uses=1]
- %tmp178 = load void (%struct.PyObject*)** %tmp177 ; <void (%struct.PyObject*)*> [#uses=1]
+ %tmp175 = getelementptr %struct.PyObject, %struct.PyObject* %tmp90, i32 0, i32 1 ; <%struct._typeobject**> [#uses=1]
+ %tmp176 = load %struct._typeobject*, %struct._typeobject** %tmp175 ; <%struct._typeobject*> [#uses=1]
+ %tmp177 = getelementptr %struct._typeobject, %struct._typeobject* %tmp176, i32 0, i32 6 ; <void (%struct.PyObject*)**> [#uses=1]
+ %tmp178 = load void (%struct.PyObject*)*, void (%struct.PyObject*)** %tmp177 ; <void (%struct.PyObject*)*> [#uses=1]
call void %tmp178( %struct.PyObject* %tmp90 )
ret %struct.PyObject* null
cond_next182: ; preds = %cond_next135
%tmp183184 = bitcast %struct.PyObject* %tmp155 to %struct.PyBoolScalarObject* ; <%struct.PyBoolScalarObject*> [#uses=1]
- %tmp186 = getelementptr %struct.PyBoolScalarObject* %tmp183184, i32 0, i32 2 ; <i8*> [#uses=1]
+ %tmp186 = getelementptr %struct.PyBoolScalarObject, %struct.PyBoolScalarObject* %tmp183184, i32 0, i32 2 ; <i8*> [#uses=1]
store i8 %out2.0, i8* %tmp186
- %tmp190 = getelementptr %struct.PyTupleObject* %tmp140141, i32 0, i32 3, i64 1 ; <%struct.PyObject**> [#uses=1]
+ %tmp190 = getelementptr %struct.PyTupleObject, %struct.PyTupleObject* %tmp140141, i32 0, i32 3, i64 1 ; <%struct.PyObject**> [#uses=1]
store %struct.PyObject* %tmp155, %struct.PyObject** %tmp190
ret %struct.PyObject* %tmp90
diff --git a/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll b/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll
index c90a85f16949..e08a5c493b5c 100644
--- a/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll
+++ b/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll
@@ -7,6 +7,6 @@ zeroinitializer
define %hlvm_programs_element* @hlvm_get_programs() {
entry:
- ret %hlvm_programs_element* getelementptr([1 x %hlvm_programs_element]*
+ ret %hlvm_programs_element* getelementptr([1 x %hlvm_programs_element], [1 x %hlvm_programs_element]*
@hlvm_programs, i32 0, i32 0)
}
diff --git a/test/CodeGen/X86/2007-09-05-InvalidAsm.ll b/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
index e81534b0110b..eb715125b17d 100644
--- a/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
+++ b/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
@@ -13,33 +13,33 @@ entry:
cond_true: ; preds = %entry
%tmp1415 = shl i16 %param, 3 ; <i16> [#uses=1]
- %tmp17 = getelementptr %struct.AGenericCall* %this, i32 0, i32 1 ; <%struct.ComponentParameters**> [#uses=1]
- %tmp18 = load %struct.ComponentParameters** %tmp17, align 8 ; <%struct.ComponentParameters*> [#uses=1]
+ %tmp17 = getelementptr %struct.AGenericCall, %struct.AGenericCall* %this, i32 0, i32 1 ; <%struct.ComponentParameters**> [#uses=1]
+ %tmp18 = load %struct.ComponentParameters*, %struct.ComponentParameters** %tmp17, align 8 ; <%struct.ComponentParameters*> [#uses=1]
%tmp1920 = bitcast %struct.ComponentParameters* %tmp18 to i8* ; <i8*> [#uses=1]
%tmp212223 = sext i16 %tmp1415 to i64 ; <i64> [#uses=1]
- %tmp24 = getelementptr i8* %tmp1920, i64 %tmp212223 ; <i8*> [#uses=1]
+ %tmp24 = getelementptr i8, i8* %tmp1920, i64 %tmp212223 ; <i8*> [#uses=1]
%tmp2425 = bitcast i8* %tmp24 to i64* ; <i64*> [#uses=1]
- %tmp28 = load i64* %tmp2425, align 8 ; <i64> [#uses=1]
+ %tmp28 = load i64, i64* %tmp2425, align 8 ; <i64> [#uses=1]
%tmp2829 = inttoptr i64 %tmp28 to i32* ; <i32*> [#uses=1]
- %tmp31 = getelementptr %struct.AGenericCall* %this, i32 0, i32 2 ; <i32**> [#uses=1]
+ %tmp31 = getelementptr %struct.AGenericCall, %struct.AGenericCall* %this, i32 0, i32 2 ; <i32**> [#uses=1]
store i32* %tmp2829, i32** %tmp31, align 8
br label %cond_next
cond_next: ; preds = %cond_true, %entry
%tmp4243 = shl i16 %param, 3 ; <i16> [#uses=1]
- %tmp46 = getelementptr %struct.AGenericCall* %this, i32 0, i32 1 ; <%struct.ComponentParameters**> [#uses=1]
- %tmp47 = load %struct.ComponentParameters** %tmp46, align 8 ; <%struct.ComponentParameters*> [#uses=1]
+ %tmp46 = getelementptr %struct.AGenericCall, %struct.AGenericCall* %this, i32 0, i32 1 ; <%struct.ComponentParameters**> [#uses=1]
+ %tmp47 = load %struct.ComponentParameters*, %struct.ComponentParameters** %tmp46, align 8 ; <%struct.ComponentParameters*> [#uses=1]
%tmp4849 = bitcast %struct.ComponentParameters* %tmp47 to i8* ; <i8*> [#uses=1]
%tmp505152 = sext i16 %tmp4243 to i64 ; <i64> [#uses=1]
- %tmp53 = getelementptr i8* %tmp4849, i64 %tmp505152 ; <i8*> [#uses=1]
+ %tmp53 = getelementptr i8, i8* %tmp4849, i64 %tmp505152 ; <i8*> [#uses=1]
%tmp5354 = bitcast i8* %tmp53 to i64* ; <i64*> [#uses=1]
- %tmp58 = load i64* %tmp5354, align 8 ; <i64> [#uses=1]
+ %tmp58 = load i64, i64* %tmp5354, align 8 ; <i64> [#uses=1]
%tmp59 = icmp eq i64 %tmp58, 0 ; <i1> [#uses=1]
br i1 %tmp59, label %UnifiedReturnBlock, label %cond_true63
cond_true63: ; preds = %cond_next
- %tmp65 = getelementptr %struct.AGenericCall* %this, i32 0, i32 0 ; <%struct.AGenericManager**> [#uses=1]
- %tmp66 = load %struct.AGenericManager** %tmp65, align 8 ; <%struct.AGenericManager*> [#uses=1]
+ %tmp65 = getelementptr %struct.AGenericCall, %struct.AGenericCall* %this, i32 0, i32 0 ; <%struct.AGenericManager**> [#uses=1]
+ %tmp66 = load %struct.AGenericManager*, %struct.AGenericManager** %tmp65, align 8 ; <%struct.AGenericManager*> [#uses=1]
%tmp69 = tail call i32 @_ZN15AGenericManager24DefaultComponentInstanceERP23ComponentInstanceRecord( %struct.AGenericManager* %tmp66, %struct.ComponentInstanceRecord** %instance ) ; <i32> [#uses=1]
ret i32 %tmp69
diff --git a/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll b/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll
index 6fc8ec907eac..7eb018ce525f 100644
--- a/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll
+++ b/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll
@@ -5,7 +5,7 @@
define fastcc void @sample_3d_linear(%struct.gl_texture_object* %tObj, %struct.gl_texture_image* %img, float %s, float %t, float %r, i8* %red, i8* %green, i8* %blue, i8* %alpha) {
entry:
- %tmp15 = load i32* null, align 4 ; <i32> [#uses=1]
+ %tmp15 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%tmp16 = icmp eq i32 %tmp15, 10497 ; <i1> [#uses=1]
%tmp2152 = call float @floorf( float 0.000000e+00 ) ; <float> [#uses=0]
br i1 %tmp16, label %cond_true, label %cond_false
diff --git a/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll b/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
index d3a47aefb7d0..c535392ffdfc 100644
--- a/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
+++ b/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
@@ -7,15 +7,15 @@ entry:
cond_next127: ; preds = %cond_next391, %entry
%v.1 = phi i32 [ undef, %entry ], [ %tmp411, %cond_next391 ] ; <i32> [#uses=1]
%tmp149 = mul i32 0, %v.1 ; <i32> [#uses=0]
- %tmpss = load i32* %ss, align 4 ; <i32> [#uses=1]
- %tmpbp = load i32* %bp, align 4 ; <i32> [#uses=2]
+ %tmpss = load i32, i32* %ss, align 4 ; <i32> [#uses=1]
+ %tmpbp = load i32, i32* %bp, align 4 ; <i32> [#uses=2]
%tmp254 = and i32 %tmpss, 15 ; <i32> [#uses=1]
%tmp256 = and i32 %tmpbp, 15 ; <i32> [#uses=2]
br label %cond_next391
cond_next391: ; preds = %cond_next127
- %tmp393 = load i32* %ss, align 4 ; <i32> [#uses=1]
- %tmp395 = load i32* %bp, align 4 ; <i32> [#uses=2]
+ %tmp393 = load i32, i32* %ss, align 4 ; <i32> [#uses=1]
+ %tmp395 = load i32, i32* %bp, align 4 ; <i32> [#uses=2]
%tmp396 = shl i32 %tmp393, %tmp395 ; <i32> [#uses=2]
%tmp398 = sub i32 32, %tmp256 ; <i32> [#uses=2]
%tmp399 = lshr i32 %tmp396, %tmp398 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll b/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
index ea1bbc464693..c4d5cb970c3f 100644
--- a/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
+++ b/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
@@ -6,20 +6,20 @@ entry:
bb171.preheader: ; preds = %entry
%tmp176 = fadd float 0.000000e+00, 1.000000e+00 ; <float> [#uses=2]
- %gi.1 = getelementptr float* %fz, i32 0 ; <float*> [#uses=2]
- %tmp240 = load float* %gi.1, align 4 ; <float> [#uses=1]
+ %gi.1 = getelementptr float, float* %fz, i32 0 ; <float*> [#uses=2]
+ %tmp240 = load float, float* %gi.1, align 4 ; <float> [#uses=1]
%tmp242 = fsub float %tmp240, 0.000000e+00 ; <float> [#uses=2]
- %tmp251 = getelementptr float* %fz, i32 0 ; <float*> [#uses=1]
- %tmp252 = load float* %tmp251, align 4 ; <float> [#uses=1]
- %tmp258 = getelementptr float* %fz, i32 0 ; <float*> [#uses=2]
- %tmp259 = load float* %tmp258, align 4 ; <float> [#uses=2]
+ %tmp251 = getelementptr float, float* %fz, i32 0 ; <float*> [#uses=1]
+ %tmp252 = load float, float* %tmp251, align 4 ; <float> [#uses=1]
+ %tmp258 = getelementptr float, float* %fz, i32 0 ; <float*> [#uses=2]
+ %tmp259 = load float, float* %tmp258, align 4 ; <float> [#uses=2]
%tmp261 = fmul float %tmp259, %tmp176 ; <float> [#uses=1]
%tmp262 = fsub float 0.000000e+00, %tmp261 ; <float> [#uses=2]
%tmp269 = fmul float %tmp252, %tmp176 ; <float> [#uses=1]
%tmp276 = fmul float %tmp259, 0.000000e+00 ; <float> [#uses=1]
%tmp277 = fadd float %tmp269, %tmp276 ; <float> [#uses=2]
- %tmp281 = getelementptr float* %fz, i32 0 ; <float*> [#uses=1]
- %tmp282 = load float* %tmp281, align 4 ; <float> [#uses=2]
+ %tmp281 = getelementptr float, float* %fz, i32 0 ; <float*> [#uses=1]
+ %tmp282 = load float, float* %tmp281, align 4 ; <float> [#uses=2]
%tmp284 = fsub float %tmp282, %tmp277 ; <float> [#uses=1]
%tmp291 = fadd float %tmp282, %tmp277 ; <float> [#uses=1]
%tmp298 = fsub float 0.000000e+00, %tmp262 ; <float> [#uses=1]
diff --git a/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll b/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
index 1ec9c70d570c..4b1c1d77f7a0 100644
--- a/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
+++ b/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
@@ -22,13 +22,13 @@ cond_next245: ; preds = %cond_next127
br i1 false, label %cond_true267, label %cond_next391
cond_true267: ; preds = %cond_next245
- %tmp269 = load i8** %byteptr, align 4 ; <i8*> [#uses=3]
- %tmp270 = load i8* %tmp269, align 1 ; <i8> [#uses=1]
+ %tmp269 = load i8*, i8** %byteptr, align 4 ; <i8*> [#uses=3]
+ %tmp270 = load i8, i8* %tmp269, align 1 ; <i8> [#uses=1]
%tmp270271 = zext i8 %tmp270 to i32 ; <i32> [#uses=1]
- %tmp272 = getelementptr i8* %tmp269, i32 1 ; <i8*> [#uses=2]
+ %tmp272 = getelementptr i8, i8* %tmp269, i32 1 ; <i8*> [#uses=2]
store i8* %tmp272, i8** %byteptr, align 4
- %tmp276 = load i8* %tmp272, align 1 ; <i8> [#uses=1]
- %tmp278 = getelementptr i8* %tmp269, i32 2 ; <i8*> [#uses=1]
+ %tmp276 = load i8, i8* %tmp272, align 1 ; <i8> [#uses=1]
+ %tmp278 = getelementptr i8, i8* %tmp269, i32 2 ; <i8*> [#uses=1]
store i8* %tmp278, i8** %byteptr, align 4
%tmp286 = icmp eq i32 %tmp270271, %markerPrefix ; <i1> [#uses=1]
%cond = icmp eq i8 %tmp276, 0 ; <i1> [#uses=1]
@@ -42,7 +42,7 @@ cond_next327: ; preds = %cond_true267
br i1 false, label %cond_true343, label %cond_next391
cond_true343: ; preds = %cond_next327
- %tmp345 = load i8** %byteptr, align 4 ; <i8*> [#uses=1]
+ %tmp345 = load i8*, i8** %byteptr, align 4 ; <i8*> [#uses=1]
store i8* null, i8** %byteptr, align 4
store i8* %tmp345, i8** %byteptr, align 4
br label %cond_next391
diff --git a/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll b/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll
index 8a55935cc1f8..941925987fd4 100644
--- a/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll
@@ -10,8 +10,8 @@ bb: ; preds = %bb31, %entry
br i1 false, label %bb6, label %bb31
bb6: ; preds = %bb
- %tmp10 = load i64* null, align 8 ; <i64> [#uses=1]
- %tmp16 = load i64* null, align 8 ; <i64> [#uses=1]
+ %tmp10 = load i64, i64* null, align 8 ; <i64> [#uses=1]
+ %tmp16 = load i64, i64* null, align 8 ; <i64> [#uses=1]
br i1 false, label %bb23, label %bb31
bb23: ; preds = %bb6
diff --git a/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll b/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
index 2b56b4ea7129..c6eb6f0f0d7a 100644
--- a/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
@@ -362,7 +362,7 @@ bb1159: ; preds = %cond_next1150
cond_true1169: ; preds = %bb1159
%tmp11741175 = trunc i64 %lsum.11225.0 to i32 ; <i32> [#uses=1]
- %tmp1178 = tail call i32 (%struct._IO_FILE* , i8* , ...)* @fprintf( %struct._IO_FILE* noalias %file , i8* getelementptr ([49 x i8]* @.str32, i32 0, i64 0) , i32 %tmp11741175, i32 0 ) ; <i32> [#uses=0]
+ %tmp1178 = tail call i32 (%struct._IO_FILE* , i8* , ...) @fprintf( %struct._IO_FILE* noalias %file , i8* getelementptr ([49 x i8], [49 x i8]* @.str32, i32 0, i64 0) , i32 %tmp11741175, i32 0 ) ; <i32> [#uses=0]
ret void
UnifiedReturnBlock: ; preds = %bb1159
diff --git a/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll b/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
index 82052b13e18d..30e1f575caf0 100644
--- a/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
+++ b/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
@@ -25,7 +25,7 @@ cond_next127: ; preds = %cond_true, %bb
%tmp154155156 = sext i16 %tmp154155 to i32 ; <i32> [#uses=1]
%tmp158 = xor i32 %tmp154155156, %tmp153 ; <i32> [#uses=1]
%tmp160 = or i32 %tmp158, %cnt.0 ; <i32> [#uses=1]
- %tmp171 = load i32* %bitptr, align 4 ; <i32> [#uses=1]
+ %tmp171 = load i32, i32* %bitptr, align 4 ; <i32> [#uses=1]
%tmp180181 = sext i16 0 to i32 ; <i32> [#uses=3]
%tmp183 = add i32 %tmp160, 1 ; <i32> [#uses=1]
br i1 false, label %cond_true188, label %cond_next245
@@ -54,7 +54,7 @@ cond_next327: ; preds = %cond_true267
br i1 false, label %cond_true343, label %cond_next385
cond_true343: ; preds = %cond_next327
- %tmp345 = load i8** %byteptr, align 4 ; <i8*> [#uses=1]
+ %tmp345 = load i8*, i8** %byteptr, align 4 ; <i8*> [#uses=1]
store i8* null, i8** %byteptr, align 4
br i1 false, label %cond_next385, label %cond_true352
@@ -69,8 +69,8 @@ cond_next385: ; preds = %cond_true352, %cond_true343, %cond_next327
br label %cond_next391
cond_next391: ; preds = %cond_next385, %cond_next245
- %tmp393 = load i32* %source, align 4 ; <i32> [#uses=1]
- %tmp395 = load i32* %bitptr, align 4 ; <i32> [#uses=2]
+ %tmp393 = load i32, i32* %source, align 4 ; <i32> [#uses=1]
+ %tmp395 = load i32, i32* %bitptr, align 4 ; <i32> [#uses=2]
%tmp396 = shl i32 %tmp393, %tmp395 ; <i32> [#uses=1]
%tmp398 = sub i32 32, %tmp256 ; <i32> [#uses=1]
%tmp405 = lshr i32 %tmp396, 31 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll b/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
index 573a2177b74e..9f57df87fe48 100644
--- a/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
+++ b/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
@@ -2,7 +2,7 @@
define signext i16 @t() {
entry:
- %tmp180 = load i16* null, align 2 ; <i16> [#uses=3]
+ %tmp180 = load i16, i16* null, align 2 ; <i16> [#uses=3]
%tmp180181 = sext i16 %tmp180 to i32 ; <i32> [#uses=1]
%tmp185 = icmp slt i16 %tmp180, 0 ; <i1> [#uses=1]
br i1 %tmp185, label %cond_true188, label %cond_next245
diff --git a/test/CodeGen/X86/2007-10-30-LSRCrash.ll b/test/CodeGen/X86/2007-10-30-LSRCrash.ll
index 42db98b44750..d945d57fad7c 100644
--- a/test/CodeGen/X86/2007-10-30-LSRCrash.ll
+++ b/test/CodeGen/X86/2007-10-30-LSRCrash.ll
@@ -19,7 +19,7 @@ bb76.split: ; preds = %bb69.outer.split.split, %bb69.us208
bb69.outer: ; preds = %bb76.split, %bb98.preheader
%from.0.reg2mem.0.ph.rec = phi i32 [ %tmp75.rec, %bb76.split ], [ 0, %bb98.preheader ] ; <i32> [#uses=1]
%tmp75.rec = add i32 %from.0.reg2mem.0.ph.rec, 1 ; <i32> [#uses=2]
- %tmp75 = getelementptr i8* null, i32 %tmp75.rec ; <i8*> [#uses=6]
+ %tmp75 = getelementptr i8, i8* null, i32 %tmp75.rec ; <i8*> [#uses=6]
br i1 false, label %bb69.us208, label %bb69.outer.split.split
bb69.us208: ; preds = %bb69.outer
diff --git a/test/CodeGen/X86/2007-10-31-extractelement-i64.ll b/test/CodeGen/X86/2007-10-31-extractelement-i64.ll
index 1b8e67dcc9b3..3d52b6cf7b3e 100644
--- a/test/CodeGen/X86/2007-10-31-extractelement-i64.ll
+++ b/test/CodeGen/X86/2007-10-31-extractelement-i64.ll
@@ -9,16 +9,16 @@ entry:
%retval = alloca <1 x i64>, align 8 ; <<1 x i64>*> [#uses=3]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store <2 x i64> %__A, <2 x i64>* %__A_addr
- %tmp = load <2 x i64>* %__A_addr, align 16 ; <<2 x i64>> [#uses=1]
+ %tmp = load <2 x i64>, <2 x i64>* %__A_addr, align 16 ; <<2 x i64>> [#uses=1]
%tmp1 = bitcast <2 x i64> %tmp to <2 x i64> ; <<2 x i64>> [#uses=1]
%tmp2 = extractelement <2 x i64> %tmp1, i32 0 ; <i64> [#uses=1]
%tmp3 = bitcast i64 %tmp2 to <1 x i64> ; <<1 x i64>> [#uses=1]
store <1 x i64> %tmp3, <1 x i64>* %retval, align 8
- %tmp4 = load <1 x i64>* %retval, align 8 ; <<1 x i64>> [#uses=0]
+ %tmp4 = load <1 x i64>, <1 x i64>* %retval, align 8 ; <<1 x i64>> [#uses=0]
br label %return
return: ; preds = %entry
- %retval5 = load <1 x i64>* %retval ; <<1 x i64>> [#uses=1]
+ %retval5 = load <1 x i64>, <1 x i64>* %retval ; <<1 x i64>> [#uses=1]
ret <1 x i64> %retval5
}
@@ -28,16 +28,16 @@ entry:
%retval = alloca <1 x i64>, align 8 ; <<1 x i64>*> [#uses=3]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store <2 x i64> %__A, <2 x i64>* %__A_addr
- %tmp = load <2 x i64>* %__A_addr, align 16 ; <<2 x i64>> [#uses=1]
+ %tmp = load <2 x i64>, <2 x i64>* %__A_addr, align 16 ; <<2 x i64>> [#uses=1]
%tmp1 = bitcast <2 x i64> %tmp to <2 x i64> ; <<2 x i64>> [#uses=1]
%tmp2 = extractelement <2 x i64> %tmp1, i32 1 ; <i64> [#uses=1]
%tmp3 = bitcast i64 %tmp2 to <1 x i64> ; <<1 x i64>> [#uses=1]
store <1 x i64> %tmp3, <1 x i64>* %retval, align 8
- %tmp4 = load <1 x i64>* %retval, align 8 ; <<1 x i64>> [#uses=0]
+ %tmp4 = load <1 x i64>, <1 x i64>* %retval, align 8 ; <<1 x i64>> [#uses=0]
br label %return
return: ; preds = %entry
- %retval5 = load <1 x i64>* %retval ; <<1 x i64>> [#uses=1]
+ %retval5 = load <1 x i64>, <1 x i64>* %retval ; <<1 x i64>> [#uses=1]
ret <1 x i64> %retval5
}
@@ -48,16 +48,16 @@ entry:
%tmp = alloca i64, align 8 ; <i64*> [#uses=2]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store <2 x i64> %__A, <2 x i64>* %__A_addr
- %tmp1 = load <2 x i64>* %__A_addr, align 16 ; <<2 x i64>> [#uses=1]
+ %tmp1 = load <2 x i64>, <2 x i64>* %__A_addr, align 16 ; <<2 x i64>> [#uses=1]
%tmp2 = bitcast <2 x i64> %tmp1 to <2 x i64> ; <<2 x i64>> [#uses=1]
%tmp3 = extractelement <2 x i64> %tmp2, i32 0 ; <i64> [#uses=1]
store i64 %tmp3, i64* %tmp, align 8
- %tmp4 = load i64* %tmp, align 8 ; <i64> [#uses=1]
+ %tmp4 = load i64, i64* %tmp, align 8 ; <i64> [#uses=1]
store i64 %tmp4, i64* %retval, align 8
br label %return
return: ; preds = %entry
- %retval5 = load i64* %retval ; <i64> [#uses=1]
+ %retval5 = load i64, i64* %retval ; <i64> [#uses=1]
ret i64 %retval5
}
@@ -68,15 +68,15 @@ entry:
%tmp = alloca i64, align 8 ; <i64*> [#uses=2]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store <2 x i64> %__A, <2 x i64>* %__A_addr
- %tmp1 = load <2 x i64>* %__A_addr, align 16 ; <<2 x i64>> [#uses=1]
+ %tmp1 = load <2 x i64>, <2 x i64>* %__A_addr, align 16 ; <<2 x i64>> [#uses=1]
%tmp2 = bitcast <2 x i64> %tmp1 to <2 x i64> ; <<2 x i64>> [#uses=1]
%tmp3 = extractelement <2 x i64> %tmp2, i32 1 ; <i64> [#uses=1]
store i64 %tmp3, i64* %tmp, align 8
- %tmp4 = load i64* %tmp, align 8 ; <i64> [#uses=1]
+ %tmp4 = load i64, i64* %tmp, align 8 ; <i64> [#uses=1]
store i64 %tmp4, i64* %retval, align 8
br label %return
return: ; preds = %entry
- %retval5 = load i64* %retval ; <i64> [#uses=1]
+ %retval5 = load i64, i64* %retval ; <i64> [#uses=1]
ret i64 %retval5
}
diff --git a/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll b/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
index 404561848b71..a20fb47d7b10 100644
--- a/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
+++ b/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
@@ -12,7 +12,7 @@ entry:
bb: ; preds = %bb, %entry
%name8.0.reg2mem.0.rec = phi i64 [ %indvar.next, %bb ], [ 0, %entry ] ; <i64> [#uses=1]
%hash.0.reg2mem.0 = phi i64 [ %tmp27, %bb ], [ 0, %entry ] ; <i64> [#uses=1]
- %tmp13 = load i8* null, align 1 ; <i8> [#uses=1]
+ %tmp13 = load i8, i8* null, align 1 ; <i8> [#uses=1]
%tmp1314 = zext i8 %tmp13 to i64 ; <i64> [#uses=1]
%tmp25 = lshr i64 %tmp1314, 4 ; <i64> [#uses=1]
%tmp22 = add i64 %tmp25, %hash.0.reg2mem.0 ; <i64> [#uses=1]
@@ -30,7 +30,7 @@ bb37: ; preds = %bb37.loopexit, %entry
%hash.0.reg2mem.1 = phi i32 [ %phitmp, %bb37.loopexit ], [ 0, %entry ] ; <i32> [#uses=1]
store i32 %hash.0.reg2mem.1, i32* null, align 8
%tmp75 = tail call i32 null( %struct.dentry* %dir, %struct.qstr* %name ) ; <i32> [#uses=0]
- %tmp84 = tail call i32 (...)* @d_lookup( %struct.dentry* %dir, %struct.qstr* %name ) ; <i32> [#uses=0]
+ %tmp84 = tail call i32 (...) @d_lookup( %struct.dentry* %dir, %struct.qstr* %name ) ; <i32> [#uses=0]
ret %struct.dentry* null
}
diff --git a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
index d1699d557113..4618e4b68980 100644
--- a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
+++ b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
@@ -8,6 +8,6 @@ target triple = "x86_64-pc-linux"
define i32 @unknown_bootoption() {
entry:
- tail call void asm sideeffect "foo ${0:c}\0A", "i,~{dirflag},~{fpsr},~{flags}"( i8* getelementptr ([12 x i8]* @str, i32 0, i64 0) )
+ tail call void asm sideeffect "foo ${0:c}\0A", "i,~{dirflag},~{fpsr},~{flags}"( i8* getelementptr ([12 x i8], [12 x i8]* @str, i32 0, i64 0) )
ret i32 undef
}
diff --git a/test/CodeGen/X86/2007-11-06-InstrSched.ll b/test/CodeGen/X86/2007-11-06-InstrSched.ll
index 838a0c35646f..d88b45f68390 100644
--- a/test/CodeGen/X86/2007-11-06-InstrSched.ll
+++ b/test/CodeGen/X86/2007-11-06-InstrSched.ll
@@ -8,11 +8,11 @@ entry:
bb18: ; preds = %bb18, %entry
%i.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %tmp17, %bb18 ] ; <i32> [#uses=3]
%res.0.reg2mem.0 = phi float [ 0.000000e+00, %entry ], [ %tmp14, %bb18 ] ; <float> [#uses=1]
- %tmp3 = getelementptr i32* %x, i32 %i.0.reg2mem.0 ; <i32*> [#uses=1]
- %tmp4 = load i32* %tmp3, align 4 ; <i32> [#uses=1]
+ %tmp3 = getelementptr i32, i32* %x, i32 %i.0.reg2mem.0 ; <i32*> [#uses=1]
+ %tmp4 = load i32, i32* %tmp3, align 4 ; <i32> [#uses=1]
%tmp45 = sitofp i32 %tmp4 to float ; <float> [#uses=1]
- %tmp8 = getelementptr float* %y, i32 %i.0.reg2mem.0 ; <float*> [#uses=1]
- %tmp9 = load float* %tmp8, align 4 ; <float> [#uses=1]
+ %tmp8 = getelementptr float, float* %y, i32 %i.0.reg2mem.0 ; <float*> [#uses=1]
+ %tmp9 = load float, float* %tmp8, align 4 ; <float> [#uses=1]
%tmp11 = fmul float %tmp9, %tmp45 ; <float> [#uses=1]
%tmp14 = fadd float %tmp11, %res.0.reg2mem.0 ; <float> [#uses=2]
%tmp17 = add i32 %i.0.reg2mem.0, 1 ; <i32> [#uses=2]
diff --git a/test/CodeGen/X86/2007-11-07-MulBy4.ll b/test/CodeGen/X86/2007-11-07-MulBy4.ll
index d5b630b59d9f..06e0a779be1b 100644
--- a/test/CodeGen/X86/2007-11-07-MulBy4.ll
+++ b/test/CodeGen/X86/2007-11-07-MulBy4.ll
@@ -7,7 +7,7 @@
define fastcc i32 @foo(i16* %eptr, i8* %ecode, %struct.foo_data* %md, i32 %ims) {
entry:
- %tmp36 = load i32* null, align 4 ; <i32> [#uses=1]
+ %tmp36 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%tmp37 = icmp ult i32 0, %tmp36 ; <i1> [#uses=1]
br i1 %tmp37, label %cond_next79, label %cond_true
@@ -15,7 +15,7 @@ cond_true: ; preds = %entry
ret i32 0
cond_next79: ; preds = %entry
- %tmp85 = load i32* null, align 4 ; <i32> [#uses=1]
+ %tmp85 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%tmp86 = icmp ult i32 0, %tmp85 ; <i1> [#uses=1]
br i1 %tmp86, label %cond_next130, label %cond_true89
diff --git a/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll b/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll
index 455de91d30ab..3404fe61fc12 100644
--- a/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll
+++ b/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll
@@ -13,14 +13,14 @@ bb848: ; preds = %entry
ret void
bb1271: ; preds = %bb898
- %tmp1272 = getelementptr %struct.c34007g__pkg__parent* %x8, i32 0, i32 0 ; <i32**> [#uses=1]
+ %tmp1272 = getelementptr %struct.c34007g__pkg__parent, %struct.c34007g__pkg__parent* %x8, i32 0, i32 0 ; <i32**> [#uses=1]
%x82167 = bitcast %struct.c34007g__pkg__parent* %x8 to i64* ; <i64*> [#uses=1]
br i1 true, label %bb4668, label %bb848
bb4668: ; preds = %bb4648
- %tmp5464 = load i64* %x82167, align 8 ; <i64> [#uses=1]
+ %tmp5464 = load i64, i64* %x82167, align 8 ; <i64> [#uses=1]
%tmp5467 = icmp ne i64 0, %tmp5464 ; <i1> [#uses=1]
- %tmp5470 = load i32** %tmp1272, align 8 ; <i32*> [#uses=1]
+ %tmp5470 = load i32*, i32** %tmp1272, align 8 ; <i32*> [#uses=1]
%tmp5471 = icmp eq i32* %tmp5470, null ; <i1> [#uses=1]
call fastcc void @c34007g__pkg__create.311( %struct.c34007g__pkg__parent* null, i32 7, i32 9, i32 2, i32 4, i32 1 )
%tmp5475 = or i1 %tmp5471, %tmp5467 ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
index 99df20da2510..26d18273dd47 100644
--- a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
+++ b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
@@ -7,14 +7,14 @@
define void @_ada_c34007g() {
entry:
%x8 = alloca %struct.c34007g__pkg__parent, align 8 ; <%struct.c34007g__pkg__parent*> [#uses=2]
- %tmp1272 = getelementptr %struct.c34007g__pkg__parent* %x8, i32 0, i32 0 ; <i32**> [#uses=1]
+ %tmp1272 = getelementptr %struct.c34007g__pkg__parent, %struct.c34007g__pkg__parent* %x8, i32 0, i32 0 ; <i32**> [#uses=1]
%x82167 = bitcast %struct.c34007g__pkg__parent* %x8 to i64* ; <i64*> [#uses=1]
br i1 true, label %bb4668, label %bb848
bb4668: ; preds = %bb4648
- %tmp5464 = load i64* %x82167, align 8 ; <i64> [#uses=1]
+ %tmp5464 = load i64, i64* %x82167, align 8 ; <i64> [#uses=1]
%tmp5467 = icmp ne i64 0, %tmp5464 ; <i1> [#uses=1]
- %tmp5470 = load i32** %tmp1272, align 8 ; <i32*> [#uses=1]
+ %tmp5470 = load i32*, i32** %tmp1272, align 8 ; <i32*> [#uses=1]
%tmp5471 = icmp eq i32* %tmp5470, null ; <i1> [#uses=1]
%tmp5475 = or i1 %tmp5471, %tmp5467 ; <i1> [#uses=1]
%tmp5497 = or i1 %tmp5475, false ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
index 9b9b781cfa2e..7da85d3a9a1d 100644
--- a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
+++ b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
@@ -20,14 +20,14 @@ bb917: ; preds = %entry
bb951: ; preds = %bb986, %entry
%tmp955 = sdiv i32 %offset, 2 ; <i32> [#uses=3]
- %tmp961 = getelementptr %struct.indexentry* null, i32 %tmp955, i32 0 ; <i32*> [#uses=1]
+ %tmp961 = getelementptr %struct.indexentry, %struct.indexentry* null, i32 %tmp955, i32 0 ; <i32*> [#uses=1]
br i1 %cond, label %bb986, label %bb967
bb967: ; preds = %bb951
ret i32 0
bb986: ; preds = %bb951
- %tmp993 = load i32* %tmp961, align 4 ; <i32> [#uses=1]
+ %tmp993 = load i32, i32* %tmp961, align 4 ; <i32> [#uses=1]
%tmp995 = icmp ugt i32 %tmp993, %tmp910 ; <i1> [#uses=2]
%tmp1002 = add i32 %tmp955, 1 ; <i32> [#uses=1]
%low.0 = select i1 %tmp995, i32 0, i32 %tmp1002 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
index 9584b718fea0..6e98f9cb219a 100644
--- a/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
+++ b/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
@@ -2,29 +2,29 @@
define void @SolveCubic(double %a, double %b, double %c, double %d, i32* %solutions, double* %x) {
entry:
- %tmp71 = load x86_fp80* null, align 16 ; <x86_fp80> [#uses=1]
+ %tmp71 = load x86_fp80, x86_fp80* null, align 16 ; <x86_fp80> [#uses=1]
%tmp72 = fdiv x86_fp80 %tmp71, 0xKC000C000000000000000 ; <x86_fp80> [#uses=1]
%tmp73 = fadd x86_fp80 0xK00000000000000000000, %tmp72 ; <x86_fp80> [#uses=1]
%tmp7374 = fptrunc x86_fp80 %tmp73 to double ; <double> [#uses=1]
store double %tmp7374, double* null, align 8
- %tmp81 = load double* null, align 8 ; <double> [#uses=1]
+ %tmp81 = load double, double* null, align 8 ; <double> [#uses=1]
%tmp82 = fadd double %tmp81, 0x401921FB54442D18 ; <double> [#uses=1]
%tmp83 = fdiv double %tmp82, 3.000000e+00 ; <double> [#uses=1]
%tmp84 = call double @cos( double %tmp83 ) ; <double> [#uses=1]
%tmp85 = fmul double 0.000000e+00, %tmp84 ; <double> [#uses=1]
%tmp8586 = fpext double %tmp85 to x86_fp80 ; <x86_fp80> [#uses=1]
- %tmp87 = load x86_fp80* null, align 16 ; <x86_fp80> [#uses=1]
+ %tmp87 = load x86_fp80, x86_fp80* null, align 16 ; <x86_fp80> [#uses=1]
%tmp88 = fdiv x86_fp80 %tmp87, 0xKC000C000000000000000 ; <x86_fp80> [#uses=1]
%tmp89 = fadd x86_fp80 %tmp8586, %tmp88 ; <x86_fp80> [#uses=1]
%tmp8990 = fptrunc x86_fp80 %tmp89 to double ; <double> [#uses=1]
store double %tmp8990, double* null, align 8
- %tmp97 = load double* null, align 8 ; <double> [#uses=1]
+ %tmp97 = load double, double* null, align 8 ; <double> [#uses=1]
%tmp98 = fadd double %tmp97, 0x402921FB54442D18 ; <double> [#uses=1]
%tmp99 = fdiv double %tmp98, 3.000000e+00 ; <double> [#uses=1]
%tmp100 = call double @cos( double %tmp99 ) ; <double> [#uses=1]
%tmp101 = fmul double 0.000000e+00, %tmp100 ; <double> [#uses=1]
%tmp101102 = fpext double %tmp101 to x86_fp80 ; <x86_fp80> [#uses=1]
- %tmp103 = load x86_fp80* null, align 16 ; <x86_fp80> [#uses=1]
+ %tmp103 = load x86_fp80, x86_fp80* null, align 16 ; <x86_fp80> [#uses=1]
%tmp104 = fdiv x86_fp80 %tmp103, 0xKC000C000000000000000 ; <x86_fp80> [#uses=1]
%tmp105 = fadd x86_fp80 %tmp101102, %tmp104 ; <x86_fp80> [#uses=1]
%tmp105106 = fptrunc x86_fp80 %tmp105 to double ; <double> [#uses=1]
diff --git a/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll b/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll
index e91f52ef0569..ffc5a1fb6d41 100644
--- a/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll
+++ b/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll
@@ -4,27 +4,27 @@
define void @localize_local_bb19_bb(%struct.node_t** %cur_node) {
newFuncRoot:
- %tmp1 = load %struct.node_t** %cur_node, align 4 ; <%struct.node_t*> [#uses=1]
- %tmp2 = getelementptr %struct.node_t* %tmp1, i32 0, i32 4 ; <double**> [#uses=1]
- %tmp3 = load double** %tmp2, align 4 ; <double*> [#uses=1]
- %tmp4 = load %struct.node_t** %cur_node, align 4 ; <%struct.node_t*> [#uses=1]
- %tmp5 = getelementptr %struct.node_t* %tmp4, i32 0, i32 4 ; <double**> [#uses=1]
+ %tmp1 = load %struct.node_t*, %struct.node_t** %cur_node, align 4 ; <%struct.node_t*> [#uses=1]
+ %tmp2 = getelementptr %struct.node_t, %struct.node_t* %tmp1, i32 0, i32 4 ; <double**> [#uses=1]
+ %tmp3 = load double*, double** %tmp2, align 4 ; <double*> [#uses=1]
+ %tmp4 = load %struct.node_t*, %struct.node_t** %cur_node, align 4 ; <%struct.node_t*> [#uses=1]
+ %tmp5 = getelementptr %struct.node_t, %struct.node_t* %tmp4, i32 0, i32 4 ; <double**> [#uses=1]
store double* %tmp3, double** %tmp5, align 4
- %tmp6 = load %struct.node_t** %cur_node, align 4 ; <%struct.node_t*> [#uses=1]
- %tmp7 = getelementptr %struct.node_t* %tmp6, i32 0, i32 3 ; <double***> [#uses=1]
- %tmp8 = load double*** %tmp7, align 4 ; <double**> [#uses=1]
- %tmp9 = load %struct.node_t** %cur_node, align 4 ; <%struct.node_t*> [#uses=1]
- %tmp10 = getelementptr %struct.node_t* %tmp9, i32 0, i32 3 ; <double***> [#uses=1]
+ %tmp6 = load %struct.node_t*, %struct.node_t** %cur_node, align 4 ; <%struct.node_t*> [#uses=1]
+ %tmp7 = getelementptr %struct.node_t, %struct.node_t* %tmp6, i32 0, i32 3 ; <double***> [#uses=1]
+ %tmp8 = load double**, double*** %tmp7, align 4 ; <double**> [#uses=1]
+ %tmp9 = load %struct.node_t*, %struct.node_t** %cur_node, align 4 ; <%struct.node_t*> [#uses=1]
+ %tmp10 = getelementptr %struct.node_t, %struct.node_t* %tmp9, i32 0, i32 3 ; <double***> [#uses=1]
store double** %tmp8, double*** %tmp10, align 4
- %tmp11 = load %struct.node_t** %cur_node, align 4 ; <%struct.node_t*> [#uses=1]
- %tmp12 = getelementptr %struct.node_t* %tmp11, i32 0, i32 0 ; <double**> [#uses=1]
- %tmp13 = load double** %tmp12, align 4 ; <double*> [#uses=1]
- %tmp14 = load %struct.node_t** %cur_node, align 4 ; <%struct.node_t*> [#uses=1]
- %tmp15 = getelementptr %struct.node_t* %tmp14, i32 0, i32 0 ; <double**> [#uses=1]
+ %tmp11 = load %struct.node_t*, %struct.node_t** %cur_node, align 4 ; <%struct.node_t*> [#uses=1]
+ %tmp12 = getelementptr %struct.node_t, %struct.node_t* %tmp11, i32 0, i32 0 ; <double**> [#uses=1]
+ %tmp13 = load double*, double** %tmp12, align 4 ; <double*> [#uses=1]
+ %tmp14 = load %struct.node_t*, %struct.node_t** %cur_node, align 4 ; <%struct.node_t*> [#uses=1]
+ %tmp15 = getelementptr %struct.node_t, %struct.node_t* %tmp14, i32 0, i32 0 ; <double**> [#uses=1]
store double* %tmp13, double** %tmp15, align 4
- %tmp16 = load %struct.node_t** %cur_node, align 4 ; <%struct.node_t*> [#uses=1]
- %tmp17 = getelementptr %struct.node_t* %tmp16, i32 0, i32 1 ; <%struct.node_t**> [#uses=1]
- %tmp18 = load %struct.node_t** %tmp17, align 4 ; <%struct.node_t*> [#uses=1]
+ %tmp16 = load %struct.node_t*, %struct.node_t** %cur_node, align 4 ; <%struct.node_t*> [#uses=1]
+ %tmp17 = getelementptr %struct.node_t, %struct.node_t* %tmp16, i32 0, i32 1 ; <%struct.node_t**> [#uses=1]
+ %tmp18 = load %struct.node_t*, %struct.node_t** %tmp17, align 4 ; <%struct.node_t*> [#uses=1]
store %struct.node_t* %tmp18, %struct.node_t** %cur_node, align 4
ret void
}
diff --git a/test/CodeGen/X86/2008-02-05-ISelCrash.ll b/test/CodeGen/X86/2008-02-05-ISelCrash.ll
index 443a32de3b42..ce233a9a554a 100644
--- a/test/CodeGen/X86/2008-02-05-ISelCrash.ll
+++ b/test/CodeGen/X86/2008-02-05-ISelCrash.ll
@@ -5,7 +5,7 @@
define fastcc i32 @ab(i32 %alpha, i32 %beta) nounwind {
entry:
- %tmp1 = load i64* @nodes, align 8 ; <i64> [#uses=1]
+ %tmp1 = load i64, i64* @nodes, align 8 ; <i64> [#uses=1]
%tmp2 = add i64 %tmp1, 1 ; <i64> [#uses=1]
store i64 %tmp2, i64* @nodes, align 8
ret i32 0
diff --git a/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll b/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
index 35857b7e01e6..56b1c7836e12 100644
--- a/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
+++ b/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
@@ -6,14 +6,14 @@ entry:
%memtmp = alloca { double, double }, align 8 ; <{ double, double }*> [#uses=3]
%tmp4 = fsub double -0.000000e+00, %z.1 ; <double> [#uses=1]
call void @casinh( { double, double }* sret %memtmp, double %tmp4, double %z.0 ) nounwind
- %tmp19 = getelementptr { double, double }* %memtmp, i32 0, i32 0 ; <double*> [#uses=1]
- %tmp20 = load double* %tmp19, align 8 ; <double> [#uses=1]
- %tmp22 = getelementptr { double, double }* %memtmp, i32 0, i32 1 ; <double*> [#uses=1]
- %tmp23 = load double* %tmp22, align 8 ; <double> [#uses=1]
+ %tmp19 = getelementptr { double, double }, { double, double }* %memtmp, i32 0, i32 0 ; <double*> [#uses=1]
+ %tmp20 = load double, double* %tmp19, align 8 ; <double> [#uses=1]
+ %tmp22 = getelementptr { double, double }, { double, double }* %memtmp, i32 0, i32 1 ; <double*> [#uses=1]
+ %tmp23 = load double, double* %tmp22, align 8 ; <double> [#uses=1]
%tmp32 = fsub double -0.000000e+00, %tmp20 ; <double> [#uses=1]
- %tmp37 = getelementptr { double, double }* %agg.result, i32 0, i32 0 ; <double*> [#uses=1]
+ %tmp37 = getelementptr { double, double }, { double, double }* %agg.result, i32 0, i32 0 ; <double*> [#uses=1]
store double %tmp23, double* %tmp37, align 8
- %tmp40 = getelementptr { double, double }* %agg.result, i32 0, i32 1 ; <double*> [#uses=1]
+ %tmp40 = getelementptr { double, double }, { double, double }* %agg.result, i32 0, i32 1 ; <double*> [#uses=1]
store double %tmp32, double* %tmp40, align 8
ret void
}
diff --git a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
index 363a6008a00d..ef69bd01cb96 100644
--- a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
+++ b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
@@ -6,13 +6,13 @@
define void @minmax(float* %result) nounwind optsize {
entry:
- %tmp2 = load float* %result, align 4 ; <float> [#uses=6]
- %tmp4 = getelementptr float* %result, i32 2 ; <float*> [#uses=5]
- %tmp5 = load float* %tmp4, align 4 ; <float> [#uses=10]
- %tmp7 = getelementptr float* %result, i32 4 ; <float*> [#uses=5]
- %tmp8 = load float* %tmp7, align 4 ; <float> [#uses=8]
- %tmp10 = getelementptr float* %result, i32 6 ; <float*> [#uses=3]
- %tmp11 = load float* %tmp10, align 4 ; <float> [#uses=8]
+ %tmp2 = load float, float* %result, align 4 ; <float> [#uses=6]
+ %tmp4 = getelementptr float, float* %result, i32 2 ; <float*> [#uses=5]
+ %tmp5 = load float, float* %tmp4, align 4 ; <float> [#uses=10]
+ %tmp7 = getelementptr float, float* %result, i32 4 ; <float*> [#uses=5]
+ %tmp8 = load float, float* %tmp7, align 4 ; <float> [#uses=8]
+ %tmp10 = getelementptr float, float* %result, i32 6 ; <float*> [#uses=3]
+ %tmp11 = load float, float* %tmp10, align 4 ; <float> [#uses=8]
%tmp12 = fcmp olt float %tmp8, %tmp11 ; <i1> [#uses=5]
br i1 %tmp12, label %bb, label %bb21
@@ -59,7 +59,7 @@ bb103: ; preds = %bb80, %bb72
bb111: ; preds = %bb103, %bb80, %bb72, %bb50, %bb40, %bb26
%iftmp.0.0.in = phi float* [ %tmp10, %bb103 ], [ %result, %bb26 ], [ %result, %bb40 ], [ %result, %bb50 ], [ %tmp4.mux, %bb80 ], [ %tmp4.mux787, %bb72 ] ; <float*> [#uses=1]
- %iftmp.0.0 = load float* %iftmp.0.0.in ; <float> [#uses=1]
+ %iftmp.0.0 = load float, float* %iftmp.0.0.in ; <float> [#uses=1]
%tmp125 = fcmp ogt float %tmp8, %tmp11 ; <i1> [#uses=5]
br i1 %tmp125, label %bb128, label %bb136
@@ -106,15 +106,15 @@ bb218: ; preds = %bb195, %bb187
bb226: ; preds = %bb218, %bb195, %bb187, %bb165, %bb155, %bb141
%iftmp.7.0.in = phi float* [ %tmp10, %bb218 ], [ %result, %bb141 ], [ %result, %bb155 ], [ %result, %bb165 ], [ %tmp4.mux789, %bb195 ], [ %tmp4.mux791, %bb187 ] ; <float*> [#uses=1]
- %iftmp.7.0 = load float* %iftmp.7.0.in ; <float> [#uses=1]
- %tmp229 = getelementptr float* %result, i32 1 ; <float*> [#uses=7]
- %tmp230 = load float* %tmp229, align 4 ; <float> [#uses=6]
- %tmp232 = getelementptr float* %result, i32 3 ; <float*> [#uses=5]
- %tmp233 = load float* %tmp232, align 4 ; <float> [#uses=10]
- %tmp235 = getelementptr float* %result, i32 5 ; <float*> [#uses=5]
- %tmp236 = load float* %tmp235, align 4 ; <float> [#uses=8]
- %tmp238 = getelementptr float* %result, i32 7 ; <float*> [#uses=3]
- %tmp239 = load float* %tmp238, align 4 ; <float> [#uses=8]
+ %iftmp.7.0 = load float, float* %iftmp.7.0.in ; <float> [#uses=1]
+ %tmp229 = getelementptr float, float* %result, i32 1 ; <float*> [#uses=7]
+ %tmp230 = load float, float* %tmp229, align 4 ; <float> [#uses=6]
+ %tmp232 = getelementptr float, float* %result, i32 3 ; <float*> [#uses=5]
+ %tmp233 = load float, float* %tmp232, align 4 ; <float> [#uses=10]
+ %tmp235 = getelementptr float, float* %result, i32 5 ; <float*> [#uses=5]
+ %tmp236 = load float, float* %tmp235, align 4 ; <float> [#uses=8]
+ %tmp238 = getelementptr float, float* %result, i32 7 ; <float*> [#uses=3]
+ %tmp239 = load float, float* %tmp238, align 4 ; <float> [#uses=8]
%tmp240 = fcmp olt float %tmp236, %tmp239 ; <i1> [#uses=5]
br i1 %tmp240, label %bb243, label %bb251
@@ -161,7 +161,7 @@ bb333: ; preds = %bb310, %bb302
bb341: ; preds = %bb333, %bb310, %bb302, %bb280, %bb270, %bb256
%iftmp.14.0.in = phi float* [ %tmp238, %bb333 ], [ %tmp229, %bb280 ], [ %tmp229, %bb270 ], [ %tmp229, %bb256 ], [ %tmp232.mux, %bb310 ], [ %tmp232.mux794, %bb302 ] ; <float*> [#uses=1]
- %iftmp.14.0 = load float* %iftmp.14.0.in ; <float> [#uses=1]
+ %iftmp.14.0 = load float, float* %iftmp.14.0.in ; <float> [#uses=1]
%tmp355 = fcmp ogt float %tmp236, %tmp239 ; <i1> [#uses=5]
br i1 %tmp355, label %bb358, label %bb366
@@ -208,12 +208,12 @@ bb448: ; preds = %bb425, %bb417
bb456: ; preds = %bb448, %bb425, %bb417, %bb395, %bb385, %bb371
%iftmp.21.0.in = phi float* [ %tmp238, %bb448 ], [ %tmp229, %bb395 ], [ %tmp229, %bb385 ], [ %tmp229, %bb371 ], [ %tmp232.mux796, %bb425 ], [ %tmp232.mux798, %bb417 ] ; <float*> [#uses=1]
- %iftmp.21.0 = load float* %iftmp.21.0.in ; <float> [#uses=1]
+ %iftmp.21.0 = load float, float* %iftmp.21.0.in ; <float> [#uses=1]
%tmp458459 = fpext float %iftmp.21.0 to double ; <double> [#uses=1]
%tmp460461 = fpext float %iftmp.7.0 to double ; <double> [#uses=1]
%tmp462463 = fpext float %iftmp.14.0 to double ; <double> [#uses=1]
%tmp464465 = fpext float %iftmp.0.0 to double ; <double> [#uses=1]
- %tmp467 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([48 x i8]* @.str, i32 0, i32 0), double %tmp464465, double %tmp462463, double %tmp460461, double %tmp458459 ) nounwind ; <i32> [#uses=0]
+ %tmp467 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([48 x i8], [48 x i8]* @.str, i32 0, i32 0), double %tmp464465, double %tmp462463, double %tmp460461, double %tmp458459 ) nounwind ; <i32> [#uses=0]
ret void
}
diff --git a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
index 319e884139ae..b3f303f8fd85 100644
--- a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
+++ b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
@@ -8,7 +8,7 @@ target triple = "i386-apple-darwin8"
define void @test() nounwind {
entry:
- %tmp = load i32* @pixels, align 4 ; <i32> [#uses=1]
+ %tmp = load i32, i32* @pixels, align 4 ; <i32> [#uses=1]
%tmp1 = tail call i32 asm sideeffect "a: $0 $1", "=r,0,~{dirflag},~{fpsr},~{flags},~{ax}"( i32 %tmp ) nounwind ; <i32> [#uses=1]
store i32 %tmp1, i32* @pixels, align 4
ret void
@@ -20,7 +20,7 @@ entry:
define void @test2(i16* %block, i8* %pixels, i32 %line_size) nounwind {
entry:
- %tmp1 = getelementptr i16* %block, i32 64 ; <i16*> [#uses=1]
+ %tmp1 = getelementptr i16, i16* %block, i32 64 ; <i16*> [#uses=1]
%tmp3 = tail call i8* asm sideeffect "b: $0 $1 $2", "=r,r,0,~{dirflag},~{fpsr},~{flags},~{ax}"( i16* %tmp1, i8* %pixels ) nounwind ; <i8*> [#uses=0]
ret void
}
diff --git a/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
index 9185a3671184..75f88b0e3811 100644
--- a/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
@@ -13,39 +13,39 @@ entry:
store i8* %src, i8** %src_addr
store i32 %dst_stride, i32* %dst_stride_addr
store i32 %src_stride, i32* %src_stride_addr
- %tmp = load i8** %dst_addr, align 4 ; <i8*> [#uses=1]
- %tmp1 = getelementptr i8* %tmp, i32 0 ; <i8*> [#uses=1]
+ %tmp = load i8*, i8** %dst_addr, align 4 ; <i8*> [#uses=1]
+ %tmp1 = getelementptr i8, i8* %tmp, i32 0 ; <i8*> [#uses=1]
%tmp12 = bitcast i8* %tmp1 to i32* ; <i32*> [#uses=1]
- %tmp3 = load i8** %dst_addr, align 4 ; <i8*> [#uses=1]
- %tmp4 = load i32* %dst_stride_addr, align 4 ; <i32> [#uses=1]
- %tmp5 = getelementptr i8* %tmp3, i32 %tmp4 ; <i8*> [#uses=1]
+ %tmp3 = load i8*, i8** %dst_addr, align 4 ; <i8*> [#uses=1]
+ %tmp4 = load i32, i32* %dst_stride_addr, align 4 ; <i32> [#uses=1]
+ %tmp5 = getelementptr i8, i8* %tmp3, i32 %tmp4 ; <i8*> [#uses=1]
%tmp56 = bitcast i8* %tmp5 to i32* ; <i32*> [#uses=1]
- %tmp7 = load i32* %dst_stride_addr, align 4 ; <i32> [#uses=1]
+ %tmp7 = load i32, i32* %dst_stride_addr, align 4 ; <i32> [#uses=1]
%tmp8 = mul i32 %tmp7, 2 ; <i32> [#uses=1]
- %tmp9 = load i8** %dst_addr, align 4 ; <i8*> [#uses=1]
- %tmp10 = getelementptr i8* %tmp9, i32 %tmp8 ; <i8*> [#uses=1]
+ %tmp9 = load i8*, i8** %dst_addr, align 4 ; <i8*> [#uses=1]
+ %tmp10 = getelementptr i8, i8* %tmp9, i32 %tmp8 ; <i8*> [#uses=1]
%tmp1011 = bitcast i8* %tmp10 to i32* ; <i32*> [#uses=1]
- %tmp13 = load i32* %dst_stride_addr, align 4 ; <i32> [#uses=1]
+ %tmp13 = load i32, i32* %dst_stride_addr, align 4 ; <i32> [#uses=1]
%tmp14 = mul i32 %tmp13, 3 ; <i32> [#uses=1]
- %tmp15 = load i8** %dst_addr, align 4 ; <i8*> [#uses=1]
- %tmp16 = getelementptr i8* %tmp15, i32 %tmp14 ; <i8*> [#uses=1]
+ %tmp15 = load i8*, i8** %dst_addr, align 4 ; <i8*> [#uses=1]
+ %tmp16 = getelementptr i8, i8* %tmp15, i32 %tmp14 ; <i8*> [#uses=1]
%tmp1617 = bitcast i8* %tmp16 to i32* ; <i32*> [#uses=1]
- %tmp18 = load i8** %src_addr, align 4 ; <i8*> [#uses=1]
- %tmp19 = getelementptr i8* %tmp18, i32 0 ; <i8*> [#uses=1]
+ %tmp18 = load i8*, i8** %src_addr, align 4 ; <i8*> [#uses=1]
+ %tmp19 = getelementptr i8, i8* %tmp18, i32 0 ; <i8*> [#uses=1]
%tmp1920 = bitcast i8* %tmp19 to i32* ; <i32*> [#uses=1]
- %tmp21 = load i8** %src_addr, align 4 ; <i8*> [#uses=1]
- %tmp22 = load i32* %src_stride_addr, align 4 ; <i32> [#uses=1]
- %tmp23 = getelementptr i8* %tmp21, i32 %tmp22 ; <i8*> [#uses=1]
+ %tmp21 = load i8*, i8** %src_addr, align 4 ; <i8*> [#uses=1]
+ %tmp22 = load i32, i32* %src_stride_addr, align 4 ; <i32> [#uses=1]
+ %tmp23 = getelementptr i8, i8* %tmp21, i32 %tmp22 ; <i8*> [#uses=1]
%tmp2324 = bitcast i8* %tmp23 to i32* ; <i32*> [#uses=1]
- %tmp25 = load i32* %src_stride_addr, align 4 ; <i32> [#uses=1]
+ %tmp25 = load i32, i32* %src_stride_addr, align 4 ; <i32> [#uses=1]
%tmp26 = mul i32 %tmp25, 2 ; <i32> [#uses=1]
- %tmp27 = load i8** %src_addr, align 4 ; <i8*> [#uses=1]
- %tmp28 = getelementptr i8* %tmp27, i32 %tmp26 ; <i8*> [#uses=1]
+ %tmp27 = load i8*, i8** %src_addr, align 4 ; <i8*> [#uses=1]
+ %tmp28 = getelementptr i8, i8* %tmp27, i32 %tmp26 ; <i8*> [#uses=1]
%tmp2829 = bitcast i8* %tmp28 to i32* ; <i32*> [#uses=1]
- %tmp30 = load i32* %src_stride_addr, align 4 ; <i32> [#uses=1]
+ %tmp30 = load i32, i32* %src_stride_addr, align 4 ; <i32> [#uses=1]
%tmp31 = mul i32 %tmp30, 3 ; <i32> [#uses=1]
- %tmp32 = load i8** %src_addr, align 4 ; <i8*> [#uses=1]
- %tmp33 = getelementptr i8* %tmp32, i32 %tmp31 ; <i8*> [#uses=1]
+ %tmp32 = load i8*, i8** %src_addr, align 4 ; <i8*> [#uses=1]
+ %tmp33 = getelementptr i8, i8* %tmp32, i32 %tmp31 ; <i8*> [#uses=1]
%tmp3334 = bitcast i8* %tmp33 to i32* ; <i32*> [#uses=1]
call void asm sideeffect "movd $4, %mm0 \0A\09movd $5, %mm1 \0A\09movd $6, %mm2 \0A\09movd $7, %mm3 \0A\09punpcklbw %mm1, %mm0 \0A\09punpcklbw %mm3, %mm2 \0A\09movq %mm0, %mm1 \0A\09punpcklwd %mm2, %mm0 \0A\09punpckhwd %mm2, %mm1 \0A\09movd %mm0, $0 \0A\09punpckhdq %mm0, %mm0 \0A\09movd %mm0, $1 \0A\09movd %mm1, $2 \0A\09punpckhdq %mm1, %mm1 \0A\09movd %mm1, $3 \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( i32* %tmp12, i32* %tmp56, i32* %tmp1011, i32* %tmp1617, i32* %tmp1920, i32* %tmp2324, i32* %tmp2829, i32* %tmp3334 ) nounwind
br label %return
diff --git a/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll b/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll
index 1d31859f46cc..1251e3fda8c7 100644
--- a/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll
+++ b/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll
@@ -3,9 +3,9 @@
define void @h264_h_loop_filter_luma_mmx2(i8* %pix, i32 %stride, i32 %alpha, i32 %beta, i8* %tc0) nounwind {
entry:
- %tmp164 = getelementptr [16 x i32]* null, i32 0, i32 11 ; <i32*> [#uses=1]
- %tmp169 = getelementptr [16 x i32]* null, i32 0, i32 13 ; <i32*> [#uses=1]
- %tmp174 = getelementptr [16 x i32]* null, i32 0, i32 15 ; <i32*> [#uses=1]
+ %tmp164 = getelementptr [16 x i32], [16 x i32]* null, i32 0, i32 11 ; <i32*> [#uses=1]
+ %tmp169 = getelementptr [16 x i32], [16 x i32]* null, i32 0, i32 13 ; <i32*> [#uses=1]
+ %tmp174 = getelementptr [16 x i32], [16 x i32]* null, i32 0, i32 15 ; <i32*> [#uses=1]
%tmp154.sum317 = add i32 0, %stride ; <i32> [#uses=1]
%tmp154.sum315 = mul i32 %stride, 6 ; <i32> [#uses=1]
%tmp154.sum = mul i32 %stride, 7 ; <i32> [#uses=1]
@@ -14,16 +14,16 @@ entry:
bb32: ; preds = %entry
%pix_addr.0327.sum340 = add i32 %pix_addr.0327.rec, 0 ; <i32> [#uses=1]
- %tmp154 = getelementptr i8* %pix, i32 %pix_addr.0327.sum340 ; <i8*> [#uses=1]
+ %tmp154 = getelementptr i8, i8* %pix, i32 %pix_addr.0327.sum340 ; <i8*> [#uses=1]
%tmp177178 = bitcast i8* %tmp154 to i32* ; <i32*> [#uses=1]
%pix_addr.0327.sum339 = add i32 %pix_addr.0327.rec, %tmp154.sum317 ; <i32> [#uses=1]
- %tmp181 = getelementptr i8* %pix, i32 %pix_addr.0327.sum339 ; <i8*> [#uses=1]
+ %tmp181 = getelementptr i8, i8* %pix, i32 %pix_addr.0327.sum339 ; <i8*> [#uses=1]
%tmp181182 = bitcast i8* %tmp181 to i32* ; <i32*> [#uses=1]
%pix_addr.0327.sum338 = add i32 %pix_addr.0327.rec, %tmp154.sum315 ; <i32> [#uses=1]
- %tmp186 = getelementptr i8* %pix, i32 %pix_addr.0327.sum338 ; <i8*> [#uses=1]
+ %tmp186 = getelementptr i8, i8* %pix, i32 %pix_addr.0327.sum338 ; <i8*> [#uses=1]
%tmp186187 = bitcast i8* %tmp186 to i32* ; <i32*> [#uses=1]
%pix_addr.0327.sum337 = add i32 %pix_addr.0327.rec, %tmp154.sum ; <i32> [#uses=1]
- %tmp191 = getelementptr i8* %pix, i32 %pix_addr.0327.sum337 ; <i8*> [#uses=1]
+ %tmp191 = getelementptr i8, i8* %pix, i32 %pix_addr.0327.sum337 ; <i8*> [#uses=1]
%tmp191192 = bitcast i8* %tmp191 to i32* ; <i32*> [#uses=1]
call void asm sideeffect "movd $4, %mm0 \0A\09movd $5, %mm1 \0A\09movd $6, %mm2 \0A\09movd $7, %mm3 \0A\09punpcklbw %mm1, %mm0 \0A\09punpcklbw %mm3, %mm2 \0A\09movq %mm0, %mm1 \0A\09punpcklwd %mm2, %mm0 \0A\09punpckhwd %mm2, %mm1 \0A\09movd %mm0, $0 \0A\09punpckhdq %mm0, %mm0 \0A\09movd %mm0, $1 \0A\09movd %mm1, $2 \0A\09punpckhdq %mm1, %mm1 \0A\09movd %mm1, $3 \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( i32* null, i32* %tmp164, i32* %tmp169, i32* %tmp174, i32* %tmp177178, i32* %tmp181182, i32* %tmp186187, i32* %tmp191192 ) nounwind
unreachable
diff --git a/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll b/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
index fd9c35e58b29..382fbed9b88e 100644
--- a/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
@@ -11,10 +11,10 @@ entry:
bb53: ; preds = %entry
%tmp55 = call %struct.YY** @AA( i64 1, %struct.XX* %uen ) ; <%struct.YY**> [#uses=3]
- %tmp2728128 = load %struct.XX** null ; <%struct.XX*> [#uses=1]
- %tmp61 = load %struct.YY** %tmp55, align 8 ; <%struct.YY*> [#uses=1]
- %tmp62 = getelementptr %struct.YY* %tmp61, i32 0, i32 0 ; <i64*> [#uses=1]
- %tmp63 = load i64* %tmp62, align 8 ; <i64> [#uses=1]
+ %tmp2728128 = load %struct.XX*, %struct.XX** null ; <%struct.XX*> [#uses=1]
+ %tmp61 = load %struct.YY*, %struct.YY** %tmp55, align 8 ; <%struct.YY*> [#uses=1]
+ %tmp62 = getelementptr %struct.YY, %struct.YY* %tmp61, i32 0, i32 0 ; <i64*> [#uses=1]
+ %tmp63 = load i64, i64* %tmp62, align 8 ; <i64> [#uses=1]
%tmp6566 = zext i16 %tmp45 to i64 ; <i64> [#uses=1]
%tmp67 = shl i64 %tmp6566, 1 ; <i64> [#uses=1]
call void @BB( %struct.YY** %tmp55, i64 %tmp67, i8 signext 0, %struct.XX* %uen )
@@ -30,7 +30,7 @@ bb70: ; preds = %bb119, %bb70.preheader
%tmp.135 = trunc i64 %tmp63 to i32 ; <i32> [#uses=1]
%tmp136 = shl i32 %indvar133, 1 ; <i32> [#uses=1]
%DD = add i32 %tmp136, %tmp.135 ; <i32> [#uses=1]
- %tmp73 = load %struct.ZZ*** %tmp72, align 8 ; <%struct.ZZ**> [#uses=0]
+ %tmp73 = load %struct.ZZ**, %struct.ZZ*** %tmp72, align 8 ; <%struct.ZZ**> [#uses=0]
br i1 false, label %bb119, label %bb77
bb77: ; preds = %bb70
diff --git a/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll b/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
index ad7950ccd8e3..857e6237d14f 100644
--- a/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
+++ b/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
@@ -13,8 +13,8 @@ entry:
bb24: ; preds = %bb24, %entry
%tmp9.0.reg2mem.0.rec = phi i32 [ %indvar.next, %bb24 ], [ 0, %entry ] ; <i32> [#uses=3]
- %tmp3.i.i = getelementptr %struct.CompAtom* %tmp1819, i32 %tmp9.0.reg2mem.0.rec, i32 0, i32 1 ; <double*> [#uses=0]
- %tmp5.i.i = getelementptr %struct.CompAtom* %tmp1819, i32 %tmp9.0.reg2mem.0.rec, i32 0, i32 2 ; <double*> [#uses=1]
+ %tmp3.i.i = getelementptr %struct.CompAtom, %struct.CompAtom* %tmp1819, i32 %tmp9.0.reg2mem.0.rec, i32 0, i32 1 ; <double*> [#uses=0]
+ %tmp5.i.i = getelementptr %struct.CompAtom, %struct.CompAtom* %tmp1819, i32 %tmp9.0.reg2mem.0.rec, i32 0, i32 2 ; <double*> [#uses=1]
store double -9.999900e+04, double* %tmp5.i.i, align 4
%indvar.next = add i32 %tmp9.0.reg2mem.0.rec, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next, %n ; <i1> [#uses=1]
@@ -33,8 +33,8 @@ bb35: ; preds = %bb24, %entry
%tmp55 = srem i32 %i, 3 ; <i32> [#uses=1]
%tmp56 = add i32 %tmp55, -1 ; <i32> [#uses=1]
%tmp5657 = sitofp i32 %tmp56 to double ; <double> [#uses=1]
- %tmp15.i49 = getelementptr %struct.Lattice* %this, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
- %tmp16.i50 = load double* %tmp15.i49, align 4 ; <double> [#uses=1]
+ %tmp15.i49 = getelementptr %struct.Lattice, %struct.Lattice* %this, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %tmp16.i50 = load double, double* %tmp15.i49, align 4 ; <double> [#uses=1]
%tmp17.i = fmul double %tmp5657, %tmp16.i50 ; <double> [#uses=1]
%tmp20.i39 = fadd double %tmp17.i, %tmp17.i63 ; <double> [#uses=1]
%tmp20.i23 = fadd double %tmp20.i39, %tmp17.i76 ; <double> [#uses=1]
@@ -45,13 +45,13 @@ bb58.preheader: ; preds = %bb35
br label %bb58
bb58: ; preds = %bb58, %bb58.preheader
- %tmp20.i7 = getelementptr %struct.CompAtom* %d, i32 0, i32 2 ; <i32*> [#uses=2]
- %tmp25.i = getelementptr %struct.CompAtom* %tmp1819, i32 0, i32 2 ; <i32*> [#uses=2]
- %tmp74.i = load i32* %tmp20.i7, align 1 ; <i32> [#uses=1]
+ %tmp20.i7 = getelementptr %struct.CompAtom, %struct.CompAtom* %d, i32 0, i32 2 ; <i32*> [#uses=2]
+ %tmp25.i = getelementptr %struct.CompAtom, %struct.CompAtom* %tmp1819, i32 0, i32 2 ; <i32*> [#uses=2]
+ %tmp74.i = load i32, i32* %tmp20.i7, align 1 ; <i32> [#uses=1]
%tmp82.i = and i32 %tmp74.i, 134217728 ; <i32> [#uses=1]
%tmp85.i = or i32 0, %tmp82.i ; <i32> [#uses=1]
store i32 %tmp85.i, i32* %tmp25.i, align 1
- %tmp88.i = load i32* %tmp20.i7, align 1 ; <i32> [#uses=1]
+ %tmp88.i = load i32, i32* %tmp20.i7, align 1 ; <i32> [#uses=1]
%tmp95.i = and i32 %tmp88.i, -268435456 ; <i32> [#uses=1]
%tmp97.i = or i32 0, %tmp95.i ; <i32> [#uses=1]
store i32 %tmp97.i, i32* %tmp25.i, align 1
diff --git a/test/CodeGen/X86/2008-03-07-APIntBug.ll b/test/CodeGen/X86/2008-03-07-APIntBug.ll
index 84e4827d0416..409bcd51a13b 100644
--- a/test/CodeGen/X86/2008-03-07-APIntBug.ll
+++ b/test/CodeGen/X86/2008-03-07-APIntBug.ll
@@ -18,73 +18,73 @@ newFuncRoot:
bb1233.exitStub: ; preds = %bb1163
ret void
bb1163: ; preds = %newFuncRoot
- %tmp1164 = load %struct.rec** %s, align 4 ; <%struct.rec*> [#uses=1]
- %tmp1165 = getelementptr %struct.rec* %tmp1164, i32 0, i32 0 ; <%struct.head_type*> [#uses=1]
+ %tmp1164 = load %struct.rec*, %struct.rec** %s, align 4 ; <%struct.rec*> [#uses=1]
+ %tmp1165 = getelementptr %struct.rec, %struct.rec* %tmp1164, i32 0, i32 0 ; <%struct.head_type*> [#uses=1]
%tmp11651166 = bitcast %struct.head_type* %tmp1165 to %struct.symbol_type* ; <%struct.symbol_type*> [#uses=1]
- %tmp1167 = getelementptr %struct.symbol_type* %tmp11651166, i32 0, i32 3 ; <%struct.rec**> [#uses=1]
- %tmp1168 = load %struct.rec** %tmp1167, align 1 ; <%struct.rec*> [#uses=2]
- %tmp1169 = load %struct.rec** %s, align 4 ; <%struct.rec*> [#uses=1]
- %tmp1170 = getelementptr %struct.rec* %tmp1169, i32 0, i32 0 ; <%struct.head_type*> [#uses=1]
+ %tmp1167 = getelementptr %struct.symbol_type, %struct.symbol_type* %tmp11651166, i32 0, i32 3 ; <%struct.rec**> [#uses=1]
+ %tmp1168 = load %struct.rec*, %struct.rec** %tmp1167, align 1 ; <%struct.rec*> [#uses=2]
+ %tmp1169 = load %struct.rec*, %struct.rec** %s, align 4 ; <%struct.rec*> [#uses=1]
+ %tmp1170 = getelementptr %struct.rec, %struct.rec* %tmp1169, i32 0, i32 0 ; <%struct.head_type*> [#uses=1]
%tmp11701171 = bitcast %struct.head_type* %tmp1170 to %struct.symbol_type* ; <%struct.symbol_type*> [#uses=1]
- %tmp1172 = getelementptr %struct.symbol_type* %tmp11701171, i32 0, i32 3 ; <%struct.rec**> [#uses=1]
- %tmp1173 = load %struct.rec** %tmp1172, align 1 ; <%struct.rec*> [#uses=2]
- %tmp1174 = getelementptr %struct.rec* %tmp1173, i32 0, i32 0 ; <%struct.head_type*> [#uses=1]
+ %tmp1172 = getelementptr %struct.symbol_type, %struct.symbol_type* %tmp11701171, i32 0, i32 3 ; <%struct.rec**> [#uses=1]
+ %tmp1173 = load %struct.rec*, %struct.rec** %tmp1172, align 1 ; <%struct.rec*> [#uses=2]
+ %tmp1174 = getelementptr %struct.rec, %struct.rec* %tmp1173, i32 0, i32 0 ; <%struct.head_type*> [#uses=1]
%tmp11741175 = bitcast %struct.head_type* %tmp1174 to %struct.word_type* ; <%struct.word_type*> [#uses=1]
- %tmp1176 = getelementptr %struct.word_type* %tmp11741175, i32 0, i32 2 ; <%struct.SECOND_UNION*> [#uses=1]
- %tmp1177 = getelementptr %struct.SECOND_UNION* %tmp1176, i32 0, i32 0 ; <{ i16, i8, i8 }*> [#uses=1]
+ %tmp1176 = getelementptr %struct.word_type, %struct.word_type* %tmp11741175, i32 0, i32 2 ; <%struct.SECOND_UNION*> [#uses=1]
+ %tmp1177 = getelementptr %struct.SECOND_UNION, %struct.SECOND_UNION* %tmp1176, i32 0, i32 0 ; <{ i16, i8, i8 }*> [#uses=1]
%tmp11771178 = bitcast { i16, i8, i8 }* %tmp1177 to <{ i8, i8, i8, i8 }>* ; <<{ i8, i8, i8, i8 }>*> [#uses=1]
- %tmp1179 = getelementptr <{ i8, i8, i8, i8 }>* %tmp11771178, i32 0, i32 2 ; <i8*> [#uses=2]
+ %tmp1179 = getelementptr <{ i8, i8, i8, i8 }>, <{ i8, i8, i8, i8 }>* %tmp11771178, i32 0, i32 2 ; <i8*> [#uses=2]
%mask1180 = and i8 1, 1 ; <i8> [#uses=2]
- %tmp1181 = load i8* %tmp1179, align 1 ; <i8> [#uses=1]
+ %tmp1181 = load i8, i8* %tmp1179, align 1 ; <i8> [#uses=1]
%tmp1182 = shl i8 %mask1180, 7 ; <i8> [#uses=1]
%tmp1183 = and i8 %tmp1181, 127 ; <i8> [#uses=1]
%tmp1184 = or i8 %tmp1183, %tmp1182 ; <i8> [#uses=1]
store i8 %tmp1184, i8* %tmp1179, align 1
%mask1185 = and i8 %mask1180, 1 ; <i8> [#uses=0]
- %tmp1186 = getelementptr %struct.rec* %tmp1173, i32 0, i32 0 ; <%struct.head_type*> [#uses=1]
+ %tmp1186 = getelementptr %struct.rec, %struct.rec* %tmp1173, i32 0, i32 0 ; <%struct.head_type*> [#uses=1]
%tmp11861187 = bitcast %struct.head_type* %tmp1186 to %struct.word_type* ; <%struct.word_type*> [#uses=1]
- %tmp1188 = getelementptr %struct.word_type* %tmp11861187, i32 0, i32 2 ; <%struct.SECOND_UNION*> [#uses=1]
- %tmp1189 = getelementptr %struct.SECOND_UNION* %tmp1188, i32 0, i32 0 ; <{ i16, i8, i8 }*> [#uses=1]
+ %tmp1188 = getelementptr %struct.word_type, %struct.word_type* %tmp11861187, i32 0, i32 2 ; <%struct.SECOND_UNION*> [#uses=1]
+ %tmp1189 = getelementptr %struct.SECOND_UNION, %struct.SECOND_UNION* %tmp1188, i32 0, i32 0 ; <{ i16, i8, i8 }*> [#uses=1]
%tmp11891190 = bitcast { i16, i8, i8 }* %tmp1189 to <{ i8, i8, i8, i8 }>* ; <<{ i8, i8, i8, i8 }>*> [#uses=1]
- %tmp1191 = getelementptr <{ i8, i8, i8, i8 }>* %tmp11891190, i32 0, i32 2 ; <i8*> [#uses=1]
- %tmp1192 = load i8* %tmp1191, align 1 ; <i8> [#uses=1]
+ %tmp1191 = getelementptr <{ i8, i8, i8, i8 }>, <{ i8, i8, i8, i8 }>* %tmp11891190, i32 0, i32 2 ; <i8*> [#uses=1]
+ %tmp1192 = load i8, i8* %tmp1191, align 1 ; <i8> [#uses=1]
%tmp1193 = lshr i8 %tmp1192, 7 ; <i8> [#uses=1]
%mask1194 = and i8 %tmp1193, 1 ; <i8> [#uses=2]
%mask1195 = and i8 %mask1194, 1 ; <i8> [#uses=0]
- %tmp1196 = getelementptr %struct.rec* %tmp1168, i32 0, i32 0 ; <%struct.head_type*> [#uses=1]
+ %tmp1196 = getelementptr %struct.rec, %struct.rec* %tmp1168, i32 0, i32 0 ; <%struct.head_type*> [#uses=1]
%tmp11961197 = bitcast %struct.head_type* %tmp1196 to %struct.word_type* ; <%struct.word_type*> [#uses=1]
- %tmp1198 = getelementptr %struct.word_type* %tmp11961197, i32 0, i32 2 ; <%struct.SECOND_UNION*> [#uses=1]
- %tmp1199 = getelementptr %struct.SECOND_UNION* %tmp1198, i32 0, i32 0 ; <{ i16, i8, i8 }*> [#uses=1]
+ %tmp1198 = getelementptr %struct.word_type, %struct.word_type* %tmp11961197, i32 0, i32 2 ; <%struct.SECOND_UNION*> [#uses=1]
+ %tmp1199 = getelementptr %struct.SECOND_UNION, %struct.SECOND_UNION* %tmp1198, i32 0, i32 0 ; <{ i16, i8, i8 }*> [#uses=1]
%tmp11991200 = bitcast { i16, i8, i8 }* %tmp1199 to <{ i8, i8, i8, i8 }>* ; <<{ i8, i8, i8, i8 }>*> [#uses=1]
- %tmp1201 = getelementptr <{ i8, i8, i8, i8 }>* %tmp11991200, i32 0, i32 1 ; <i8*> [#uses=2]
+ %tmp1201 = getelementptr <{ i8, i8, i8, i8 }>, <{ i8, i8, i8, i8 }>* %tmp11991200, i32 0, i32 1 ; <i8*> [#uses=2]
%mask1202 = and i8 %mask1194, 1 ; <i8> [#uses=2]
- %tmp1203 = load i8* %tmp1201, align 1 ; <i8> [#uses=1]
+ %tmp1203 = load i8, i8* %tmp1201, align 1 ; <i8> [#uses=1]
%tmp1204 = shl i8 %mask1202, 1 ; <i8> [#uses=1]
%tmp1205 = and i8 %tmp1204, 2 ; <i8> [#uses=1]
%tmp1206 = and i8 %tmp1203, -3 ; <i8> [#uses=1]
%tmp1207 = or i8 %tmp1206, %tmp1205 ; <i8> [#uses=1]
store i8 %tmp1207, i8* %tmp1201, align 1
%mask1208 = and i8 %mask1202, 1 ; <i8> [#uses=0]
- %tmp1209 = getelementptr %struct.rec* %tmp1168, i32 0, i32 0 ; <%struct.head_type*> [#uses=1]
+ %tmp1209 = getelementptr %struct.rec, %struct.rec* %tmp1168, i32 0, i32 0 ; <%struct.head_type*> [#uses=1]
%tmp12091210 = bitcast %struct.head_type* %tmp1209 to %struct.word_type* ; <%struct.word_type*> [#uses=1]
- %tmp1211 = getelementptr %struct.word_type* %tmp12091210, i32 0, i32 2 ; <%struct.SECOND_UNION*> [#uses=1]
- %tmp1212 = getelementptr %struct.SECOND_UNION* %tmp1211, i32 0, i32 0 ; <{ i16, i8, i8 }*> [#uses=1]
+ %tmp1211 = getelementptr %struct.word_type, %struct.word_type* %tmp12091210, i32 0, i32 2 ; <%struct.SECOND_UNION*> [#uses=1]
+ %tmp1212 = getelementptr %struct.SECOND_UNION, %struct.SECOND_UNION* %tmp1211, i32 0, i32 0 ; <{ i16, i8, i8 }*> [#uses=1]
%tmp12121213 = bitcast { i16, i8, i8 }* %tmp1212 to <{ i8, i8, i8, i8 }>* ; <<{ i8, i8, i8, i8 }>*> [#uses=1]
- %tmp1214 = getelementptr <{ i8, i8, i8, i8 }>* %tmp12121213, i32 0, i32 1 ; <i8*> [#uses=1]
- %tmp1215 = load i8* %tmp1214, align 1 ; <i8> [#uses=1]
+ %tmp1214 = getelementptr <{ i8, i8, i8, i8 }>, <{ i8, i8, i8, i8 }>* %tmp12121213, i32 0, i32 1 ; <i8*> [#uses=1]
+ %tmp1215 = load i8, i8* %tmp1214, align 1 ; <i8> [#uses=1]
%tmp1216 = shl i8 %tmp1215, 6 ; <i8> [#uses=1]
%tmp1217 = lshr i8 %tmp1216, 7 ; <i8> [#uses=1]
%mask1218 = and i8 %tmp1217, 1 ; <i8> [#uses=2]
%mask1219 = and i8 %mask1218, 1 ; <i8> [#uses=0]
- %tmp1220 = load %struct.rec** %s, align 4 ; <%struct.rec*> [#uses=1]
- %tmp1221 = getelementptr %struct.rec* %tmp1220, i32 0, i32 0 ; <%struct.head_type*> [#uses=1]
+ %tmp1220 = load %struct.rec*, %struct.rec** %s, align 4 ; <%struct.rec*> [#uses=1]
+ %tmp1221 = getelementptr %struct.rec, %struct.rec* %tmp1220, i32 0, i32 0 ; <%struct.head_type*> [#uses=1]
%tmp12211222 = bitcast %struct.head_type* %tmp1221 to %struct.word_type* ; <%struct.word_type*> [#uses=1]
- %tmp1223 = getelementptr %struct.word_type* %tmp12211222, i32 0, i32 2 ; <%struct.SECOND_UNION*> [#uses=1]
- %tmp1224 = getelementptr %struct.SECOND_UNION* %tmp1223, i32 0, i32 0 ; <{ i16, i8, i8 }*> [#uses=1]
+ %tmp1223 = getelementptr %struct.word_type, %struct.word_type* %tmp12211222, i32 0, i32 2 ; <%struct.SECOND_UNION*> [#uses=1]
+ %tmp1224 = getelementptr %struct.SECOND_UNION, %struct.SECOND_UNION* %tmp1223, i32 0, i32 0 ; <{ i16, i8, i8 }*> [#uses=1]
%tmp12241225 = bitcast { i16, i8, i8 }* %tmp1224 to <{ i8, i8, i8, i8 }>* ; <<{ i8, i8, i8, i8 }>*> [#uses=1]
- %tmp1226 = getelementptr <{ i8, i8, i8, i8 }>* %tmp12241225, i32 0, i32 1 ; <i8*> [#uses=2]
+ %tmp1226 = getelementptr <{ i8, i8, i8, i8 }>, <{ i8, i8, i8, i8 }>* %tmp12241225, i32 0, i32 1 ; <i8*> [#uses=2]
%mask1227 = and i8 %mask1218, 1 ; <i8> [#uses=2]
- %tmp1228 = load i8* %tmp1226, align 1 ; <i8> [#uses=1]
+ %tmp1228 = load i8, i8* %tmp1226, align 1 ; <i8> [#uses=1]
%tmp1229 = and i8 %mask1227, 1 ; <i8> [#uses=1]
%tmp1230 = and i8 %tmp1228, -2 ; <i8> [#uses=1]
%tmp1231 = or i8 %tmp1230, %tmp1229 ; <i8> [#uses=1]
diff --git a/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll b/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll
index 40aafb4c54d5..9fb325c1223b 100644
--- a/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll
+++ b/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll
@@ -5,7 +5,7 @@ declare fastcc i8* @w_addchar(i8*, i32*, i32*, i8 signext ) nounwind
define x86_stdcallcc i32 @parse_backslash(i8** inreg %word, i32* inreg %word_length, i32* inreg %max_length) nounwind {
entry:
- %tmp6 = load i8* null, align 1 ; <i8> [#uses=1]
+ %tmp6 = load i8, i8* null, align 1 ; <i8> [#uses=1]
br label %bb13
bb13: ; preds = %entry
%tmp26 = call fastcc i8* @w_addchar( i8* null, i32* %word_length, i32* %max_length, i8 signext %tmp6 ) nounwind ; <i8*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
index a0106d7798d5..a9e3f33ec618 100644
--- a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
+++ b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
@@ -16,12 +16,12 @@ define i32 @foo() {
entry:
%retval = alloca i32 ; <i32*> [#uses=1]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- %tmp = load %struct.__res_state** @__libc_resp, align 4 ; <%struct.__res_state*> [#uses=1]
- %tmp1 = getelementptr %struct.__res_state* %tmp, i32 0, i32 0 ; <i32*> [#uses=1]
+ %tmp = load %struct.__res_state*, %struct.__res_state** @__libc_resp, align 4 ; <%struct.__res_state*> [#uses=1]
+ %tmp1 = getelementptr %struct.__res_state, %struct.__res_state* %tmp, i32 0, i32 0 ; <i32*> [#uses=1]
store i32 0, i32* %tmp1, align 4
br label %return
return: ; preds = %entry
- %retval2 = load i32* %retval ; <i32> [#uses=1]
+ %retval2 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %retval2
}
@@ -31,11 +31,11 @@ define i32 @bar() {
entry:
%retval = alloca i32 ; <i32*> [#uses=1]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- %tmp = load %struct.__res_state** @__libc_resp, align 4 ; <%struct.__res_state*> [#uses=1]
- %tmp1 = getelementptr %struct.__res_state* %tmp, i32 0, i32 0 ; <i32*> [#uses=1]
+ %tmp = load %struct.__res_state*, %struct.__res_state** @__libc_resp, align 4 ; <%struct.__res_state*> [#uses=1]
+ %tmp1 = getelementptr %struct.__res_state, %struct.__res_state* %tmp, i32 0, i32 0 ; <i32*> [#uses=1]
store i32 1, i32* %tmp1, align 4
br label %return
return: ; preds = %entry
- %retval2 = load i32* %retval ; <i32> [#uses=1]
+ %retval2 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %retval2
}
diff --git a/test/CodeGen/X86/2008-03-14-SpillerCrash.ll b/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
index 6b374a7f6f08..d60d0c2fb0bc 100644
--- a/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
+++ b/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
@@ -10,8 +10,8 @@
define i64 @____wcstoll_l_internal(i32* %nptr, i32** %endptr, i32 %base, i32 %group, %struct.__locale_struct* %loc) nounwind {
entry:
- %tmp27 = load i32* null, align 4 ; <i32> [#uses=1]
- %tmp83 = getelementptr i32* %nptr, i32 1 ; <i32*> [#uses=1]
+ %tmp27 = load i32, i32* null, align 4 ; <i32> [#uses=1]
+ %tmp83 = getelementptr i32, i32* %nptr, i32 1 ; <i32*> [#uses=1]
%tmp233 = add i32 0, -48 ; <i32> [#uses=1]
br label %bb271.us
bb271.us: ; preds = %entry
@@ -31,8 +31,8 @@ bb374.outer: ; preds = %bb311.split, %bb271.us
%tmp373.reg2mem.0.ph = add i64 %tmp370371552.pn, %tmp369551.pn ; <i64> [#uses=1]
br label %bb374.us
bb374.us: ; preds = %bb314.us, %bb374.outer
- %tmp376.us = getelementptr i32* %s.5.ph, i32 0 ; <i32*> [#uses=3]
- %tmp378.us = load i32* %tmp376.us, align 4 ; <i32> [#uses=2]
+ %tmp376.us = getelementptr i32, i32* %s.5.ph, i32 0 ; <i32*> [#uses=3]
+ %tmp378.us = load i32, i32* %tmp376.us, align 4 ; <i32> [#uses=2]
%tmp302.us = icmp eq i32* %tmp376.us, %tmp83 ; <i1> [#uses=1]
%bothcond484.us = or i1 false, %tmp302.us ; <i1> [#uses=1]
br i1 %bothcond484.us, label %bb383, label %bb305.us
diff --git a/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll b/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
index 5ca7e3ed3dbf..3e55390de9f1 100644
--- a/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
+++ b/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
@@ -12,33 +12,33 @@ entry:
cond_true: ; preds = %entry
%tmp1415 = shl i16 %param, 3 ; <i16> [#uses=1]
- %tmp17 = getelementptr %struct.AGenericCall* %this, i32 0, i32 1 ; <%struct.ComponentParameters**> [#uses=1]
- %tmp18 = load %struct.ComponentParameters** %tmp17, align 8 ; <%struct.ComponentParameters*> [#uses=1]
+ %tmp17 = getelementptr %struct.AGenericCall, %struct.AGenericCall* %this, i32 0, i32 1 ; <%struct.ComponentParameters**> [#uses=1]
+ %tmp18 = load %struct.ComponentParameters*, %struct.ComponentParameters** %tmp17, align 8 ; <%struct.ComponentParameters*> [#uses=1]
%tmp1920 = bitcast %struct.ComponentParameters* %tmp18 to i8* ; <i8*> [#uses=1]
%tmp212223 = sext i16 %tmp1415 to i64 ; <i64> [#uses=1]
- %tmp24 = getelementptr i8* %tmp1920, i64 %tmp212223 ; <i8*> [#uses=1]
+ %tmp24 = getelementptr i8, i8* %tmp1920, i64 %tmp212223 ; <i8*> [#uses=1]
%tmp2425 = bitcast i8* %tmp24 to i64* ; <i64*> [#uses=1]
- %tmp28 = load i64* %tmp2425, align 8 ; <i64> [#uses=1]
+ %tmp28 = load i64, i64* %tmp2425, align 8 ; <i64> [#uses=1]
%tmp2829 = inttoptr i64 %tmp28 to i32* ; <i32*> [#uses=1]
- %tmp31 = getelementptr %struct.AGenericCall* %this, i32 0, i32 2 ; <i32**> [#uses=1]
+ %tmp31 = getelementptr %struct.AGenericCall, %struct.AGenericCall* %this, i32 0, i32 2 ; <i32**> [#uses=1]
store i32* %tmp2829, i32** %tmp31, align 8
br label %cond_next
cond_next: ; preds = %cond_true, %entry
%tmp4243 = shl i16 %param, 3 ; <i16> [#uses=1]
- %tmp46 = getelementptr %struct.AGenericCall* %this, i32 0, i32 1 ; <%struct.ComponentParameters**> [#uses=1]
- %tmp47 = load %struct.ComponentParameters** %tmp46, align 8 ; <%struct.ComponentParameters*> [#uses=1]
+ %tmp46 = getelementptr %struct.AGenericCall, %struct.AGenericCall* %this, i32 0, i32 1 ; <%struct.ComponentParameters**> [#uses=1]
+ %tmp47 = load %struct.ComponentParameters*, %struct.ComponentParameters** %tmp46, align 8 ; <%struct.ComponentParameters*> [#uses=1]
%tmp4849 = bitcast %struct.ComponentParameters* %tmp47 to i8* ; <i8*> [#uses=1]
%tmp505152 = sext i16 %tmp4243 to i64 ; <i64> [#uses=1]
- %tmp53 = getelementptr i8* %tmp4849, i64 %tmp505152 ; <i8*> [#uses=1]
+ %tmp53 = getelementptr i8, i8* %tmp4849, i64 %tmp505152 ; <i8*> [#uses=1]
%tmp5354 = bitcast i8* %tmp53 to i64* ; <i64*> [#uses=1]
- %tmp58 = load i64* %tmp5354, align 8 ; <i64> [#uses=1]
+ %tmp58 = load i64, i64* %tmp5354, align 8 ; <i64> [#uses=1]
%tmp59 = icmp eq i64 %tmp58, 0 ; <i1> [#uses=1]
br i1 %tmp59, label %UnifiedReturnBlock, label %cond_true63
cond_true63: ; preds = %cond_next
- %tmp65 = getelementptr %struct.AGenericCall* %this, i32 0, i32 0 ; <%struct.AGenericManager**> [#uses=1]
- %tmp66 = load %struct.AGenericManager** %tmp65, align 8 ; <%struct.AGenericManager*> [#uses=1]
+ %tmp65 = getelementptr %struct.AGenericCall, %struct.AGenericCall* %this, i32 0, i32 0 ; <%struct.AGenericManager**> [#uses=1]
+ %tmp66 = load %struct.AGenericManager*, %struct.AGenericManager** %tmp65, align 8 ; <%struct.AGenericManager*> [#uses=1]
%tmp69 = tail call i32 @_ZN15AGenericManager24DefaultComponentInstanceERP23ComponentInstanceRecord( %struct.AGenericManager* %tmp66, %struct.ComponentInstanceRecord** %instance ) ; <i32> [#uses=1]
ret i32 %tmp69
diff --git a/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll b/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll
index 305968ac3778..3cc3b83a3405 100644
--- a/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll
+++ b/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll
@@ -15,21 +15,21 @@
define void @_GLOBAL__I__ZN5Pooma5pinfoE() nounwind {
entry:
- store i32 (...)** getelementptr ([10 x i32 (...)*]* @_ZTVSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE, i32 0, i32 8), i32 (...)*** null, align 4
+ store i32 (...)** getelementptr ([10 x i32 (...)*], [10 x i32 (...)*]* @_ZTVSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE, i32 0, i32 8), i32 (...)*** null, align 4
%tmp96.i.i142.i = call i8* @_Znwm( i32 180 ) nounwind ; <i8*> [#uses=2]
call void @_ZNSt8ios_baseC2Ev( %"struct.std::ios_base"* null ) nounwind
- store i32 (...)** getelementptr ([4 x i32 (...)*]* @_ZTVSt9basic_iosIcSt11char_traitsIcEE, i32 0, i32 2), i32 (...)*** null, align 4
+ store i32 (...)** getelementptr ([4 x i32 (...)*], [4 x i32 (...)*]* @_ZTVSt9basic_iosIcSt11char_traitsIcEE, i32 0, i32 2), i32 (...)*** null, align 4
store i32 (...)** null, i32 (...)*** null, align 4
- %ctg2242.i.i163.i = getelementptr i8* %tmp96.i.i142.i, i32 0 ; <i8*> [#uses=1]
- %tmp150.i.i164.i = load i8** getelementptr ([4 x i8*]* @_ZTTSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE, i32 0, i64 2), align 4 ; <i8*> [#uses=1]
+ %ctg2242.i.i163.i = getelementptr i8, i8* %tmp96.i.i142.i, i32 0 ; <i8*> [#uses=1]
+ %tmp150.i.i164.i = load i8*, i8** getelementptr ([4 x i8*], [4 x i8*]* @_ZTTSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE, i32 0, i64 2), align 4 ; <i8*> [#uses=1]
%tmp150151.i.i165.i = bitcast i8* %tmp150.i.i164.i to i32 (...)** ; <i32 (...)**> [#uses=1]
%tmp153.i.i166.i = bitcast i8* %ctg2242.i.i163.i to i32 (...)*** ; <i32 (...)***> [#uses=1]
store i32 (...)** %tmp150151.i.i165.i, i32 (...)*** %tmp153.i.i166.i, align 4
%tmp159.i.i167.i = bitcast i8* %tmp96.i.i142.i to i32 (...)*** ; <i32 (...)***> [#uses=1]
- store i32 (...)** getelementptr ([10 x i32 (...)*]* @_ZTVSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE, i32 0, i32 3), i32 (...)*** %tmp159.i.i167.i, align 4
- store i32 (...)** getelementptr ([16 x i32 (...)*]* @_ZTVSt15basic_streambufIcSt11char_traitsIcEE, i32 0, i32 2), i32 (...)*** null, align 4
+ store i32 (...)** getelementptr ([10 x i32 (...)*], [10 x i32 (...)*]* @_ZTVSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE, i32 0, i32 3), i32 (...)*** %tmp159.i.i167.i, align 4
+ store i32 (...)** getelementptr ([16 x i32 (...)*], [16 x i32 (...)*]* @_ZTVSt15basic_streambufIcSt11char_traitsIcEE, i32 0, i32 2), i32 (...)*** null, align 4
call void @_ZNSt6localeC1Ev( %"struct.std::locale"* null ) nounwind
- store i32 (...)** getelementptr ([16 x i32 (...)*]* @_ZTVSt15basic_stringbufIcSt11char_traitsIcESaIcEE, i32 0, i32 2), i32 (...)*** null, align 4
+ store i32 (...)** getelementptr ([16 x i32 (...)*], [16 x i32 (...)*]* @_ZTVSt15basic_stringbufIcSt11char_traitsIcESaIcEE, i32 0, i32 2), i32 (...)*** null, align 4
unreachable
}
diff --git a/test/CodeGen/X86/2008-04-09-BranchFolding.ll b/test/CodeGen/X86/2008-04-09-BranchFolding.ll
index f4b2d719ae14..f21a6f37f4b5 100644
--- a/test/CodeGen/X86/2008-04-09-BranchFolding.ll
+++ b/test/CodeGen/X86/2008-04-09-BranchFolding.ll
@@ -16,7 +16,7 @@ bb140: ; preds = %entry
bb17.i: ; preds = %bb140
ret %struct.tree_node* null
bb143: ; preds = %entry
- %tmp8.i43 = load %struct.tree_node** null, align 4 ; <%struct.tree_node*> [#uses=1]
+ %tmp8.i43 = load %struct.tree_node*, %struct.tree_node** null, align 4 ; <%struct.tree_node*> [#uses=1]
br i1 %tmp3.i40, label %bb160, label %bb9.i48
bb9.i48: ; preds = %bb143
ret %struct.tree_node* null
@@ -39,7 +39,7 @@ bb226.i: ; preds = %bb73.i
bb273.i: ; preds = %bb226.i
ret %struct.tree_node* null
bb260: ; preds = %bb226.i
- tail call void (i8*, i32, ...)* @pedwarn_with_file_and_line( i8* %file.0, i32 %line.0, i8* null ) nounwind
+ tail call void (i8*, i32, ...) @pedwarn_with_file_and_line( i8* %file.0, i32 %line.0, i8* null ) nounwind
ret %struct.tree_node* null
bb344: ; preds = %bb174
ret %struct.tree_node* null
diff --git a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
index 0742371dc9ba..b52659134c1b 100644
--- a/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
+++ b/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
@@ -40,10 +40,10 @@
define void @"-[AA BB:optionIndex:delegate:CC:contextInfo:]"(%struct.AA* %self, %struct._message_ref_t* %_cmd, %struct.NSError* %inError, i64 %inOptionIndex, %struct.NSObject* %inDelegate, %struct.objc_selector* %inDidRecoverSelector, i8* %inContextInfo) {
entry:
- %tmp105 = load %struct.NSArray** null, align 8 ; <%struct.NSArray*> [#uses=1]
- %tmp107 = load %struct.NSObject** null, align 8 ; <%struct.NSObject*> [#uses=1]
+ %tmp105 = load %struct.NSArray*, %struct.NSArray** null, align 8 ; <%struct.NSArray*> [#uses=1]
+ %tmp107 = load %struct.NSObject*, %struct.NSObject** null, align 8 ; <%struct.NSObject*> [#uses=1]
call void null( %struct.NSObject* %tmp107, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_228", %struct.NSArray* %tmp105, i8 signext 0 )
- %tmp111 = call %struct.NSObject* (%struct.NSObject*, %struct.objc_selector*, ...)* @objc_msgSend( %struct.NSObject* null, %struct.objc_selector* null, i32 0, i8* null ) ; <%struct.NSObject*> [#uses=0]
+ %tmp111 = call %struct.NSObject* (%struct.NSObject*, %struct.objc_selector*, ...) @objc_msgSend( %struct.NSObject* null, %struct.objc_selector* null, i32 0, i8* null ) ; <%struct.NSObject*> [#uses=0]
ret void
}
diff --git a/test/CodeGen/X86/2008-04-16-CoalescerBug.ll b/test/CodeGen/X86/2008-04-16-CoalescerBug.ll
index 3ccc0fe16340..1488034f2eb9 100644
--- a/test/CodeGen/X86/2008-04-16-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-04-16-CoalescerBug.ll
@@ -22,7 +22,7 @@ bb94.us: ; preds = %bb71.us, %bb53.us
store i16 %tmp113.us, i16* null, align 2
br label %bb53.us
bb71.us: ; preds = %bb53.us
- %tmp80.us = load i8* null, align 1 ; <i8> [#uses=1]
+ %tmp80.us = load i8, i8* null, align 1 ; <i8> [#uses=1]
%tmp8081.us = zext i8 %tmp80.us to i32 ; <i32> [#uses=1]
%tmp87.us = mul i32 %tmp8081.us, 0 ; <i32> [#uses=1]
%tmp92.us = add i32 0, %tmp87.us ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2008-04-16-ReMatBug.ll b/test/CodeGen/X86/2008-04-16-ReMatBug.ll
index 3a1de11ea21b..9cae76f658d8 100644
--- a/test/CodeGen/X86/2008-04-16-ReMatBug.ll
+++ b/test/CodeGen/X86/2008-04-16-ReMatBug.ll
@@ -15,7 +15,7 @@ bb: ; preds = %entry
bb28: ; preds = %entry
br i1 false, label %bb37, label %done
bb37: ; preds = %bb28
- %tmp46 = getelementptr %struct.GENV_t* %tmp12, i32 0, i32 10 ; <i16*> [#uses=1]
+ %tmp46 = getelementptr %struct.GENV_t, %struct.GENV_t* %tmp12, i32 0, i32 10 ; <i16*> [#uses=1]
store i16 0, i16* %tmp46, align 4
br i1 false, label %bb74, label %bb92
bb74: ; preds = %bb37
diff --git a/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
index f244793e7a95..d1cfb447a2c3 100644
--- a/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
@@ -33,16 +33,16 @@ bb161.i: ; preds = %bb142.i
bb182.i: ; preds = %bb142.i
ret void
bb3261: ; preds = %bb7834, %bb161.i
- %tmp3263 = load i32* null, align 4 ; <i32> [#uses=1]
+ %tmp3263 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%tmp3264 = icmp eq i32 %tmp3263, 37 ; <i1> [#uses=1]
br i1 %tmp3264, label %bb3306, label %bb3267
bb3267: ; preds = %bb3261
ret void
bb3306: ; preds = %bb3261
- %tmp3310 = invoke %struct.wxStringBase* @_ZN12wxStringBaseaSEPKw( %struct.wxStringBase* null, i32* getelementptr ([5 x i32]* @.str89, i32 0, i32 0) )
+ %tmp3310 = invoke %struct.wxStringBase* @_ZN12wxStringBaseaSEPKw( %struct.wxStringBase* null, i32* getelementptr ([5 x i32], [5 x i32]* @.str89, i32 0, i32 0) )
to label %bb3314 unwind label %lpad ; <%struct.wxStringBase*> [#uses=0]
bb3314: ; preds = %bb3306
- %tmp3316 = load i32* null, align 4 ; <i32> [#uses=1]
+ %tmp3316 = load i32, i32* null, align 4 ; <i32> [#uses=1]
switch i32 %tmp3316, label %bb7595 [
i32 0, label %bb7819
i32 37, label %bb7806
@@ -108,7 +108,7 @@ bb278.i8617: ; preds = %bb182.i8560
%timeOnly50.0.i8622 = add i32 0, 0 ; <i32> [#uses=1]
br i1 %foo, label %bb440.i8663, label %bb448.i8694
bb440.i8663: ; preds = %bb278.i8617
- invoke void @_Z10wxOnAssertPKwiPKcS0_S0_( i32* getelementptr ([27 x i32]* @.str, i32 0, i32 0), i32 1717, i8* getelementptr ([6 x i8]* @_ZZNK10wxDateTime5GetTmERKNS_8TimeZoneEE12__FUNCTION__, i32 0, i32 0), i32* getelementptr ([29 x i32]* @.str33, i32 0, i32 0), i32* getelementptr ([14 x i32]* @.str4, i32 0, i32 0) )
+ invoke void @_Z10wxOnAssertPKwiPKcS0_S0_( i32* getelementptr ([27 x i32], [27 x i32]* @.str, i32 0, i32 0), i32 1717, i8* getelementptr ([6 x i8], [6 x i8]* @_ZZNK10wxDateTime5GetTmERKNS_8TimeZoneEE12__FUNCTION__, i32 0, i32 0), i32* getelementptr ([29 x i32], [29 x i32]* @.str33, i32 0, i32 0), i32* getelementptr ([14 x i32], [14 x i32]* @.str4, i32 0, i32 0) )
to label %bb448.i8694 unwind label %lpad
bb448.i8694: ; preds = %bb440.i8663, %bb278.i8617
%tmp477.i8669 = srem i32 %timeOnly50.0.i8622, 1000 ; <i32> [#uses=1]
@@ -117,13 +117,13 @@ bb448.i8694: ; preds = %bb440.i8663, %bb278.i8617
invcont5814: ; preds = %bb448.i8694, %bb265.i8606
%tmp812.0.0 = phi i16 [ %tmp477478.i8670, %bb448.i8694 ], [ %tmp273274.i8595, %bb265.i8606 ] ; <i16> [#uses=1]
%tmp58165817 = zext i16 %tmp812.0.0 to i32 ; <i32> [#uses=1]
- invoke void (%struct.wxString*, i32*, ...)* @_ZN8wxString6FormatEPKwz( %struct.wxString* noalias sret null, i32* null, i32 %tmp58165817 )
+ invoke void (%struct.wxString*, i32*, ...) @_ZN8wxString6FormatEPKwz( %struct.wxString* noalias sret null, i32* null, i32 %tmp58165817 )
to label %invcont5831 unwind label %lpad
invcont5831: ; preds = %invcont5814
%tmp5862 = invoke zeroext i8 @_ZN12wxStringBase10ConcatSelfEmPKwm( %struct.wxStringBase* null, i32 0, i32* null, i32 0 )
to label %bb7834 unwind label %lpad8185 ; <i8> [#uses=0]
bb5968: ; preds = %bb3314
- invoke void (%struct.wxString*, i32*, ...)* @_ZN8wxString6FormatEPKwz( %struct.wxString* noalias sret null, i32* null, i32 0 )
+ invoke void (%struct.wxString*, i32*, ...) @_ZN8wxString6FormatEPKwz( %struct.wxString* noalias sret null, i32* null, i32 0 )
to label %invcont5981 unwind label %lpad
invcont5981: ; preds = %bb5968
ret void
diff --git a/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll b/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
index 86bce8e977ac..06f7907fec9e 100644
--- a/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
+++ b/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
@@ -6,7 +6,7 @@ target triple = "i386-apple-darwin8"
define i32 @main() nounwind {
entry:
- %tmp122 = load <2 x i64>* null, align 16 ; <<2 x i64>> [#uses=1]
+ %tmp122 = load <2 x i64>, <2 x i64>* null, align 16 ; <<2 x i64>> [#uses=1]
%tmp126 = bitcast <2 x i64> %tmp122 to <8 x i16> ; <<8 x i16>> [#uses=1]
%tmp129 = call <8 x i16> @llvm.x86.sse41.pblendw( <8 x i16> zeroinitializer, <8 x i16> %tmp126, i32 2 ) nounwind ; <<8 x i16>> [#uses=0]
ret i32 0
diff --git a/test/CodeGen/X86/2008-04-28-CoalescerBug.ll b/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
index 7c04206de72f..06bbd74e8bde 100644
--- a/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
@@ -33,7 +33,7 @@ bb13101: ; preds = %bb13088
bb13107: ; preds = %bb13101, %bb13088
%iftmp.684.0 = phi i32 [ 0, %bb13101 ], [ 65535, %bb13088 ] ; <i32> [#uses=2]
- %tmp13111 = load i64* null, align 8 ; <i64> [#uses=3]
+ %tmp13111 = load i64, i64* null, align 8 ; <i64> [#uses=3]
%tmp13116 = lshr i64 %tmp13111, 16 ; <i64> [#uses=1]
%tmp1311613117 = trunc i64 %tmp13116 to i32 ; <i32> [#uses=1]
%tmp13118 = and i32 %tmp1311613117, 65535 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2008-05-09-PHIElimBug.ll b/test/CodeGen/X86/2008-05-09-PHIElimBug.ll
index cea0076076d6..8ed1b2a759b8 100644
--- a/test/CodeGen/X86/2008-05-09-PHIElimBug.ll
+++ b/test/CodeGen/X86/2008-05-09-PHIElimBug.ll
@@ -14,7 +14,7 @@ bb23821: ; preds = %entry
br i1 false, label %bb23830, label %bb23827
bb23827: ; preds = %bb23821
- %tmp23829 = getelementptr %struct.V* null, i32 0, i32 42 ; <i32*> [#uses=0]
+ %tmp23829 = getelementptr %struct.V, %struct.V* null, i32 0, i32 42 ; <i32*> [#uses=0]
br label %bb23830
bb23830: ; preds = %bb23827, %bb23821, %bb23816.preheader
diff --git a/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll b/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll
index 5ceb5464d2b0..0e4ef1c3260a 100644
--- a/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll
+++ b/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=x86 -mattr=+sse2
define fastcc void @glgVectorFloatConversion() nounwind {
- %tmp12745 = load <4 x float>* null, align 16 ; <<4 x float>> [#uses=1]
+ %tmp12745 = load <4 x float>, <4 x float>* null, align 16 ; <<4 x float>> [#uses=1]
%tmp12773 = insertelement <4 x float> %tmp12745, float 1.000000e+00, i32 1 ; <<4 x float>> [#uses=1]
%tmp12774 = insertelement <4 x float> %tmp12773, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1]
%tmp12775 = insertelement <4 x float> %tmp12774, float 1.000000e+00, i32 3 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/2008-05-12-tailmerge-5.ll b/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
index 4852e89c4d99..0669a3267180 100644
--- a/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
+++ b/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
@@ -17,36 +17,36 @@ entry:
%d_addr = alloca i8 ; <i8*> [#uses=2]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
%tmp = bitcast %struct.BoundaryAlignment* %str_addr to { i64, i64 }* ; <{ i64, i64 }*> [#uses=1]
- %tmp1 = getelementptr { i64, i64 }* %tmp, i32 0, i32 0 ; <i64*> [#uses=1]
+ %tmp1 = getelementptr { i64, i64 }, { i64, i64 }* %tmp, i32 0, i32 0 ; <i64*> [#uses=1]
store i64 %str.0, i64* %tmp1
%tmp2 = bitcast %struct.BoundaryAlignment* %str_addr to { i64, i64 }* ; <{ i64, i64 }*> [#uses=1]
- %tmp3 = getelementptr { i64, i64 }* %tmp2, i32 0, i32 1 ; <i64*> [#uses=1]
+ %tmp3 = getelementptr { i64, i64 }, { i64, i64 }* %tmp2, i32 0, i32 1 ; <i64*> [#uses=1]
%bc = bitcast i64* %tmp3 to i8* ; <i8*> [#uses=2]
%byte = trunc i64 %str.1 to i8 ; <i8> [#uses=1]
store i8 %byte, i8* %bc
%shft = lshr i64 %str.1, 8 ; <i64> [#uses=2]
- %Loc = getelementptr i8* %bc, i32 1 ; <i8*> [#uses=2]
+ %Loc = getelementptr i8, i8* %bc, i32 1 ; <i8*> [#uses=2]
%byte4 = trunc i64 %shft to i8 ; <i8> [#uses=1]
store i8 %byte4, i8* %Loc
%shft5 = lshr i64 %shft, 8 ; <i64> [#uses=2]
- %Loc6 = getelementptr i8* %Loc, i32 1 ; <i8*> [#uses=2]
+ %Loc6 = getelementptr i8, i8* %Loc, i32 1 ; <i8*> [#uses=2]
%byte7 = trunc i64 %shft5 to i8 ; <i8> [#uses=1]
store i8 %byte7, i8* %Loc6
%shft8 = lshr i64 %shft5, 8 ; <i64> [#uses=2]
- %Loc9 = getelementptr i8* %Loc6, i32 1 ; <i8*> [#uses=2]
+ %Loc9 = getelementptr i8, i8* %Loc6, i32 1 ; <i8*> [#uses=2]
%byte10 = trunc i64 %shft8 to i8 ; <i8> [#uses=1]
store i8 %byte10, i8* %Loc9
%shft11 = lshr i64 %shft8, 8 ; <i64> [#uses=0]
- %Loc12 = getelementptr i8* %Loc9, i32 1 ; <i8*> [#uses=0]
+ %Loc12 = getelementptr i8, i8* %Loc9, i32 1 ; <i8*> [#uses=0]
store i16 %s, i16* %s_addr
store i32 %j, i32* %j_addr
store i8 %c, i8* %c_addr
store i16 %t, i16* %t_addr
store i16 %u, i16* %u_addr
store i8 %d, i8* %d_addr
- %tmp13 = getelementptr %struct.BoundaryAlignment* %str_addr, i32 0, i32 0 ; <[3 x i8]*> [#uses=1]
+ %tmp13 = getelementptr %struct.BoundaryAlignment, %struct.BoundaryAlignment* %str_addr, i32 0, i32 0 ; <[3 x i8]*> [#uses=1]
%tmp1314 = bitcast [3 x i8]* %tmp13 to i32* ; <i32*> [#uses=1]
- %tmp15 = load i32* %tmp1314, align 4 ; <i32> [#uses=1]
+ %tmp15 = load i32, i32* %tmp1314, align 4 ; <i32> [#uses=1]
%tmp16 = shl i32 %tmp15, 14 ; <i32> [#uses=1]
%tmp17 = ashr i32 %tmp16, 23 ; <i32> [#uses=1]
%tmp1718 = trunc i32 %tmp17 to i16 ; <i16> [#uses=1]
@@ -57,32 +57,32 @@ entry:
%sextl21 = shl i16 %sextr, 7 ; <i16> [#uses=1]
%sextr22 = ashr i16 %sextl21, 7 ; <i16> [#uses=1]
%sextr2223 = sext i16 %sextr22 to i32 ; <i32> [#uses=1]
- %tmp24 = load i32* %j_addr, align 4 ; <i32> [#uses=1]
+ %tmp24 = load i32, i32* %j_addr, align 4 ; <i32> [#uses=1]
%tmp25 = icmp ne i32 %sextr2223, %tmp24 ; <i1> [#uses=1]
%tmp2526 = zext i1 %tmp25 to i8 ; <i8> [#uses=1]
%toBool = icmp ne i8 %tmp2526, 0 ; <i1> [#uses=1]
br i1 %toBool, label %bb, label %bb27
bb: ; preds = %entry
- call void (...)* @abort( ) noreturn nounwind
+ call void (...) @abort( ) noreturn nounwind
unreachable
bb27: ; preds = %entry
- %tmp28 = getelementptr %struct.BoundaryAlignment* %str_addr, i32 0, i32 1 ; <i8*> [#uses=1]
- %tmp29 = load i8* %tmp28, align 4 ; <i8> [#uses=1]
- %tmp30 = load i8* %c_addr, align 1 ; <i8> [#uses=1]
+ %tmp28 = getelementptr %struct.BoundaryAlignment, %struct.BoundaryAlignment* %str_addr, i32 0, i32 1 ; <i8*> [#uses=1]
+ %tmp29 = load i8, i8* %tmp28, align 4 ; <i8> [#uses=1]
+ %tmp30 = load i8, i8* %c_addr, align 1 ; <i8> [#uses=1]
%tmp31 = icmp ne i8 %tmp29, %tmp30 ; <i1> [#uses=1]
%tmp3132 = zext i1 %tmp31 to i8 ; <i8> [#uses=1]
%toBool33 = icmp ne i8 %tmp3132, 0 ; <i1> [#uses=1]
br i1 %toBool33, label %bb34, label %bb35
bb34: ; preds = %bb27
- call void (...)* @abort( ) noreturn nounwind
+ call void (...) @abort( ) noreturn nounwind
unreachable
bb35: ; preds = %bb27
- %tmp36 = getelementptr %struct.BoundaryAlignment* %str_addr, i32 0, i32 2 ; <i16*> [#uses=1]
- %tmp37 = load i16* %tmp36, align 4 ; <i16> [#uses=1]
+ %tmp36 = getelementptr %struct.BoundaryAlignment, %struct.BoundaryAlignment* %str_addr, i32 0, i32 2 ; <i16*> [#uses=1]
+ %tmp37 = load i16, i16* %tmp36, align 4 ; <i16> [#uses=1]
%tmp38 = shl i16 %tmp37, 7 ; <i16> [#uses=1]
%tmp39 = ashr i16 %tmp38, 7 ; <i16> [#uses=1]
%sextl40 = shl i16 %tmp39, 7 ; <i16> [#uses=1]
@@ -91,19 +91,19 @@ bb35: ; preds = %bb27
%sextr43 = ashr i16 %sextl42, 7 ; <i16> [#uses=0]
%sextl44 = shl i16 %sextr41, 7 ; <i16> [#uses=1]
%sextr45 = ashr i16 %sextl44, 7 ; <i16> [#uses=1]
- %tmp46 = load i16* %t_addr, align 2 ; <i16> [#uses=1]
+ %tmp46 = load i16, i16* %t_addr, align 2 ; <i16> [#uses=1]
%tmp47 = icmp ne i16 %sextr45, %tmp46 ; <i1> [#uses=1]
%tmp4748 = zext i1 %tmp47 to i8 ; <i8> [#uses=1]
%toBool49 = icmp ne i8 %tmp4748, 0 ; <i1> [#uses=1]
br i1 %toBool49, label %bb50, label %bb51
bb50: ; preds = %bb35
- call void (...)* @abort( ) noreturn nounwind
+ call void (...) @abort( ) noreturn nounwind
unreachable
bb51: ; preds = %bb35
- %tmp52 = getelementptr %struct.BoundaryAlignment* %str_addr, i32 0, i32 3 ; <i16*> [#uses=1]
- %tmp53 = load i16* %tmp52, align 4 ; <i16> [#uses=1]
+ %tmp52 = getelementptr %struct.BoundaryAlignment, %struct.BoundaryAlignment* %str_addr, i32 0, i32 3 ; <i16*> [#uses=1]
+ %tmp53 = load i16, i16* %tmp52, align 4 ; <i16> [#uses=1]
%tmp54 = shl i16 %tmp53, 7 ; <i16> [#uses=1]
%tmp55 = ashr i16 %tmp54, 7 ; <i16> [#uses=1]
%sextl56 = shl i16 %tmp55, 7 ; <i16> [#uses=1]
@@ -112,27 +112,27 @@ bb51: ; preds = %bb35
%sextr59 = ashr i16 %sextl58, 7 ; <i16> [#uses=0]
%sextl60 = shl i16 %sextr57, 7 ; <i16> [#uses=1]
%sextr61 = ashr i16 %sextl60, 7 ; <i16> [#uses=1]
- %tmp62 = load i16* %u_addr, align 2 ; <i16> [#uses=1]
+ %tmp62 = load i16, i16* %u_addr, align 2 ; <i16> [#uses=1]
%tmp63 = icmp ne i16 %sextr61, %tmp62 ; <i1> [#uses=1]
%tmp6364 = zext i1 %tmp63 to i8 ; <i8> [#uses=1]
%toBool65 = icmp ne i8 %tmp6364, 0 ; <i1> [#uses=1]
br i1 %toBool65, label %bb66, label %bb67
bb66: ; preds = %bb51
- call void (...)* @abort( ) noreturn nounwind
+ call void (...) @abort( ) noreturn nounwind
unreachable
bb67: ; preds = %bb51
- %tmp68 = getelementptr %struct.BoundaryAlignment* %str_addr, i32 0, i32 4 ; <i8*> [#uses=1]
- %tmp69 = load i8* %tmp68, align 4 ; <i8> [#uses=1]
- %tmp70 = load i8* %d_addr, align 1 ; <i8> [#uses=1]
+ %tmp68 = getelementptr %struct.BoundaryAlignment, %struct.BoundaryAlignment* %str_addr, i32 0, i32 4 ; <i8*> [#uses=1]
+ %tmp69 = load i8, i8* %tmp68, align 4 ; <i8> [#uses=1]
+ %tmp70 = load i8, i8* %d_addr, align 1 ; <i8> [#uses=1]
%tmp71 = icmp ne i8 %tmp69, %tmp70 ; <i1> [#uses=1]
%tmp7172 = zext i1 %tmp71 to i8 ; <i8> [#uses=1]
%toBool73 = icmp ne i8 %tmp7172, 0 ; <i1> [#uses=1]
br i1 %toBool73, label %bb74, label %bb75
bb74: ; preds = %bb67
- call void (...)* @abort( ) noreturn nounwind
+ call void (...) @abort( ) noreturn nounwind
unreachable
bb75: ; preds = %bb67
diff --git a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
index ac167b009a8d..c6709a86d85b 100644
--- a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
@@ -74,7 +74,7 @@ entry:
br label %bb497
bb483: ; preds = %bb497
- %tmp496 = load %struct.tree_node** null, align 4 ; <%struct.tree_node*> [#uses=1]
+ %tmp496 = load %struct.tree_node*, %struct.tree_node** null, align 4 ; <%struct.tree_node*> [#uses=1]
br label %bb497
bb497: ; preds = %bb483, %entry
@@ -87,9 +87,9 @@ bb502: ; preds = %bb497
br i1 %foo, label %bb507, label %bb841
bb507: ; preds = %bb502
- %tmp517 = getelementptr %struct.tree_node* %last.0, i32 0, i32 0 ; <%struct.tree_function_decl*> [#uses=1]
+ %tmp517 = getelementptr %struct.tree_node, %struct.tree_node* %last.0, i32 0, i32 0 ; <%struct.tree_function_decl*> [#uses=1]
%tmp517518 = bitcast %struct.tree_function_decl* %tmp517 to %struct.tree_common* ; <%struct.tree_common*> [#uses=1]
- %tmp519 = getelementptr %struct.tree_common* %tmp517518, i32 0, i32 0 ; <%struct.tree_node**> [#uses=1]
+ %tmp519 = getelementptr %struct.tree_common, %struct.tree_common* %tmp517518, i32 0, i32 0 ; <%struct.tree_node**> [#uses=1]
store %struct.tree_node* null, %struct.tree_node** %tmp519, align 4
br label %bb841
diff --git a/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll b/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
index da56ce7ab583..a91a422f55d1 100644
--- a/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
+++ b/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
@@ -2,7 +2,7 @@
define void @a(<4 x float>* %x) nounwind {
entry:
- %tmp2 = load <4 x float>* %x, align 1
+ %tmp2 = load <4 x float>, <4 x float>* %x, align 1
%inv = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %tmp2)
store <4 x float> %inv, <4 x float>* %x, align 1
ret void
diff --git a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
index 4e73b5aa1cdb..fc7ddf0bc67a 100644
--- a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
@@ -24,7 +24,7 @@ lpad243: ; preds = %bb37
%exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
cleanup
%eh_ptr244 = extractvalue { i8*, i32 } %exn, 0
- store i32 (...)** getelementptr ([5 x i32 (...)*]* @_ZTVN10Evaluation10GridOutputILi3EEE, i32 0, i32 2), i32 (...)*** null, align 8
+ store i32 (...)** getelementptr ([5 x i32 (...)*], [5 x i32 (...)*]* @_ZTVN10Evaluation10GridOutputILi3EEE, i32 0, i32 2), i32 (...)*** null, align 8
%tmp133 = call i8* @__cxa_begin_catch( i8* %eh_ptr244 ) nounwind ; <i8*> [#uses=0]
unreachable
}
diff --git a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
index a6234d377df3..422d68e7ff49 100644
--- a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
@@ -13,9 +13,9 @@ define i16 @f(i64 %x) {
%b = bitcast i64 %x to double ; <double> [#uses=1]
store double %b, double* @atomic
store double 0.000000e+00, double* @atomic2
- %l = load i32* @ioport ; <i32> [#uses=1]
+ %l = load i32, i32* @ioport ; <i32> [#uses=1]
%t = trunc i32 %l to i16 ; <i16> [#uses=1]
- %l2 = load i32* @ioport2 ; <i32> [#uses=1]
+ %l2 = load i32, i32* @ioport2 ; <i32> [#uses=1]
%tmp = lshr i32 %l2, 16 ; <i32> [#uses=1]
%t2 = trunc i32 %tmp to i16 ; <i16> [#uses=1]
%f = add i16 %t, %t2 ; <i16> [#uses=1]
diff --git a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
index 037559edaf57..5a05ec13f35b 100644
--- a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
@@ -12,9 +12,9 @@ define i16 @f(i64 %x, double %y) {
store volatile double 0.000000e+00, double* @atomic2 ; one processor operation only
%b2 = bitcast double %y to i64 ; <i64> [#uses=1]
store volatile i64 %b2, i64* @anything ; may transform to store of double
- %l = load volatile i32* @ioport ; must not narrow
+ %l = load volatile i32, i32* @ioport ; must not narrow
%t = trunc i32 %l to i16 ; <i16> [#uses=1]
- %l2 = load volatile i32* @ioport ; must not narrow
+ %l2 = load volatile i32, i32* @ioport ; must not narrow
%tmp = lshr i32 %l2, 16 ; <i32> [#uses=1]
%t2 = trunc i32 %tmp to i16 ; <i16> [#uses=1]
%f = add i16 %t, %t2 ; <i16> [#uses=1]
diff --git a/test/CodeGen/X86/2008-06-16-SubregsBug.ll b/test/CodeGen/X86/2008-06-16-SubregsBug.ll
index 4d4819ab05d5..cdd1b0bfe607 100644
--- a/test/CodeGen/X86/2008-06-16-SubregsBug.ll
+++ b/test/CodeGen/X86/2008-06-16-SubregsBug.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=i386-apple-darwin | grep mov | count 4
define i16 @test(i16* %tmp179) nounwind {
- %tmp180 = load i16* %tmp179, align 2 ; <i16> [#uses=2]
+ %tmp180 = load i16, i16* %tmp179, align 2 ; <i16> [#uses=2]
%tmp184 = and i16 %tmp180, -1024 ; <i16> [#uses=1]
%tmp186 = icmp eq i16 %tmp184, -32768 ; <i1> [#uses=1]
br i1 %tmp186, label %bb189, label %bb288
diff --git a/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll b/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll
index 46341fc87103..c92768c91d79 100644
--- a/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll
+++ b/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll
@@ -27,8 +27,8 @@ bb39: ; preds = %bb39, %bb40.preheader
br i1 false, label %bb39, label %bb49.outer
bb49.outer: ; preds = %bb39, %bb40.preheader
- getelementptr %struct.res_state* %state, i32 0, i32 3 ; <i32*>:1 [#uses=0]
- getelementptr %struct.res_state* %state, i32 0, i32 7 ; <i32*>:2 [#uses=0]
+ getelementptr %struct.res_state, %struct.res_state* %state, i32 0, i32 3 ; <i32*>:1 [#uses=0]
+ getelementptr %struct.res_state, %struct.res_state* %state, i32 0, i32 7 ; <i32*>:2 [#uses=0]
%base10.1 = select i1 false, float* null, float* null ; <float*> [#uses=1]
br label %bb74
@@ -43,7 +43,7 @@ bb71: ; preds = %bb74, %bb69
bb73: ; preds = %bb71
%.rec = add i32 %base10.2.ph.rec, 1 ; <i32> [#uses=2]
- getelementptr float* %base10.1, i32 %.rec ; <float*>:4 [#uses=1]
+ getelementptr float, float* %base10.1, i32 %.rec ; <float*>:4 [#uses=1]
br label %bb74
bb74: ; preds = %bb73, %bb71, %bb49.outer
@@ -63,7 +63,7 @@ entry:
br i1 false, label %bb17.preheader, label %bb30
bb17.preheader: ; preds = %entry
- load i32* null, align 4 ; <i32>:0 [#uses=0]
+ load i32, i32* null, align 4 ; <i32>:0 [#uses=0]
br label %bb16
bb16: ; preds = %bb16, %bb17.preheader
@@ -89,7 +89,7 @@ entry:
define i32 @vorbis_comment_query_count(%struct.vorbis_comment* %vc, i8* %tag) nounwind {
entry:
%strlen = call i32 @strlen( i8* null ) ; <i32> [#uses=1]
- %endptr = getelementptr i8* null, i32 %strlen ; <i8*> [#uses=0]
+ %endptr = getelementptr i8, i8* null, i32 %strlen ; <i8*> [#uses=0]
unreachable
}
diff --git a/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll b/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll
index f56604b75bd7..a1b9d9d5ab93 100644
--- a/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll
@@ -26,7 +26,7 @@ bb31: ; preds = %bb6
br label %bb33
bb33: ; preds = %bb31, %bb
- tail call void (%struct.SV*, i8*, ...)* @Perl_sv_catpvf( %struct.SV* %dsv, i8* getelementptr ([8 x i8]* @"\01LC25", i32 0, i64 0), i64 %0 ) nounwind
+ tail call void (%struct.SV*, i8*, ...) @Perl_sv_catpvf( %struct.SV* %dsv, i8* getelementptr ([8 x i8], [8 x i8]* @"\01LC25", i32 0, i64 0), i64 %0 ) nounwind
unreachable
bb40: ; preds = %entry
diff --git a/test/CodeGen/X86/2008-07-19-movups-spills.ll b/test/CodeGen/X86/2008-07-19-movups-spills.ll
index cd86ee188949..45ea69943e87 100644
--- a/test/CodeGen/X86/2008-07-19-movups-spills.ll
+++ b/test/CodeGen/X86/2008-07-19-movups-spills.ll
@@ -75,38 +75,38 @@ define void @test1() {
; CHECK: movups
; CHECK: movups
; CHECK-NOT: movups
- load <4 x float>* @0, align 1 ; <<4 x float>>:1 [#uses=2]
- load <4 x float>* @1, align 1 ; <<4 x float>>:2 [#uses=3]
- load <4 x float>* @2, align 1 ; <<4 x float>>:3 [#uses=4]
- load <4 x float>* @3, align 1 ; <<4 x float>>:4 [#uses=5]
- load <4 x float>* @4, align 1 ; <<4 x float>>:5 [#uses=6]
- load <4 x float>* @5, align 1 ; <<4 x float>>:6 [#uses=7]
- load <4 x float>* @6, align 1 ; <<4 x float>>:7 [#uses=8]
- load <4 x float>* @7, align 1 ; <<4 x float>>:8 [#uses=9]
- load <4 x float>* @8, align 1 ; <<4 x float>>:9 [#uses=10]
- load <4 x float>* @9, align 1 ; <<4 x float>>:10 [#uses=11]
- load <4 x float>* @10, align 1 ; <<4 x float>>:11 [#uses=12]
- load <4 x float>* @11, align 1 ; <<4 x float>>:12 [#uses=13]
- load <4 x float>* @12, align 1 ; <<4 x float>>:13 [#uses=14]
- load <4 x float>* @13, align 1 ; <<4 x float>>:14 [#uses=15]
- load <4 x float>* @14, align 1 ; <<4 x float>>:15 [#uses=16]
- load <4 x float>* @15, align 1 ; <<4 x float>>:16 [#uses=17]
- load <4 x float>* @16, align 1 ; <<4 x float>>:17 [#uses=18]
- load <4 x float>* @17, align 1 ; <<4 x float>>:18 [#uses=19]
- load <4 x float>* @18, align 1 ; <<4 x float>>:19 [#uses=20]
- load <4 x float>* @19, align 1 ; <<4 x float>>:20 [#uses=21]
- load <4 x float>* @20, align 1 ; <<4 x float>>:21 [#uses=22]
- load <4 x float>* @21, align 1 ; <<4 x float>>:22 [#uses=23]
- load <4 x float>* @22, align 1 ; <<4 x float>>:23 [#uses=24]
- load <4 x float>* @23, align 1 ; <<4 x float>>:24 [#uses=25]
- load <4 x float>* @24, align 1 ; <<4 x float>>:25 [#uses=26]
- load <4 x float>* @25, align 1 ; <<4 x float>>:26 [#uses=27]
- load <4 x float>* @26, align 1 ; <<4 x float>>:27 [#uses=28]
- load <4 x float>* @27, align 1 ; <<4 x float>>:28 [#uses=29]
- load <4 x float>* @28, align 1 ; <<4 x float>>:29 [#uses=30]
- load <4 x float>* @29, align 1 ; <<4 x float>>:30 [#uses=31]
- load <4 x float>* @30, align 1 ; <<4 x float>>:31 [#uses=32]
- load <4 x float>* @31, align 1 ; <<4 x float>>:32 [#uses=33]
+ load <4 x float>, <4 x float>* @0, align 1 ; <<4 x float>>:1 [#uses=2]
+ load <4 x float>, <4 x float>* @1, align 1 ; <<4 x float>>:2 [#uses=3]
+ load <4 x float>, <4 x float>* @2, align 1 ; <<4 x float>>:3 [#uses=4]
+ load <4 x float>, <4 x float>* @3, align 1 ; <<4 x float>>:4 [#uses=5]
+ load <4 x float>, <4 x float>* @4, align 1 ; <<4 x float>>:5 [#uses=6]
+ load <4 x float>, <4 x float>* @5, align 1 ; <<4 x float>>:6 [#uses=7]
+ load <4 x float>, <4 x float>* @6, align 1 ; <<4 x float>>:7 [#uses=8]
+ load <4 x float>, <4 x float>* @7, align 1 ; <<4 x float>>:8 [#uses=9]
+ load <4 x float>, <4 x float>* @8, align 1 ; <<4 x float>>:9 [#uses=10]
+ load <4 x float>, <4 x float>* @9, align 1 ; <<4 x float>>:10 [#uses=11]
+ load <4 x float>, <4 x float>* @10, align 1 ; <<4 x float>>:11 [#uses=12]
+ load <4 x float>, <4 x float>* @11, align 1 ; <<4 x float>>:12 [#uses=13]
+ load <4 x float>, <4 x float>* @12, align 1 ; <<4 x float>>:13 [#uses=14]
+ load <4 x float>, <4 x float>* @13, align 1 ; <<4 x float>>:14 [#uses=15]
+ load <4 x float>, <4 x float>* @14, align 1 ; <<4 x float>>:15 [#uses=16]
+ load <4 x float>, <4 x float>* @15, align 1 ; <<4 x float>>:16 [#uses=17]
+ load <4 x float>, <4 x float>* @16, align 1 ; <<4 x float>>:17 [#uses=18]
+ load <4 x float>, <4 x float>* @17, align 1 ; <<4 x float>>:18 [#uses=19]
+ load <4 x float>, <4 x float>* @18, align 1 ; <<4 x float>>:19 [#uses=20]
+ load <4 x float>, <4 x float>* @19, align 1 ; <<4 x float>>:20 [#uses=21]
+ load <4 x float>, <4 x float>* @20, align 1 ; <<4 x float>>:21 [#uses=22]
+ load <4 x float>, <4 x float>* @21, align 1 ; <<4 x float>>:22 [#uses=23]
+ load <4 x float>, <4 x float>* @22, align 1 ; <<4 x float>>:23 [#uses=24]
+ load <4 x float>, <4 x float>* @23, align 1 ; <<4 x float>>:24 [#uses=25]
+ load <4 x float>, <4 x float>* @24, align 1 ; <<4 x float>>:25 [#uses=26]
+ load <4 x float>, <4 x float>* @25, align 1 ; <<4 x float>>:26 [#uses=27]
+ load <4 x float>, <4 x float>* @26, align 1 ; <<4 x float>>:27 [#uses=28]
+ load <4 x float>, <4 x float>* @27, align 1 ; <<4 x float>>:28 [#uses=29]
+ load <4 x float>, <4 x float>* @28, align 1 ; <<4 x float>>:29 [#uses=30]
+ load <4 x float>, <4 x float>* @29, align 1 ; <<4 x float>>:30 [#uses=31]
+ load <4 x float>, <4 x float>* @30, align 1 ; <<4 x float>>:31 [#uses=32]
+ load <4 x float>, <4 x float>* @31, align 1 ; <<4 x float>>:32 [#uses=33]
fmul <4 x float> %1, %1 ; <<4 x float>>:33 [#uses=1]
fmul <4 x float> %33, %2 ; <<4 x float>>:34 [#uses=1]
fmul <4 x float> %34, %3 ; <<4 x float>>:35 [#uses=1]
@@ -708,38 +708,38 @@ define void @test2() "no-realign-stack" {
; CHECK: movups
; CHECK: movups
; CHECK-NOT: movups
- load <4 x float>* @0, align 1
- load <4 x float>* @1, align 1
- load <4 x float>* @2, align 1
- load <4 x float>* @3, align 1
- load <4 x float>* @4, align 1
- load <4 x float>* @5, align 1
- load <4 x float>* @6, align 1
- load <4 x float>* @7, align 1
- load <4 x float>* @8, align 1
- load <4 x float>* @9, align 1
- load <4 x float>* @10, align 1
- load <4 x float>* @11, align 1
- load <4 x float>* @12, align 1
- load <4 x float>* @13, align 1
- load <4 x float>* @14, align 1
- load <4 x float>* @15, align 1
- load <4 x float>* @16, align 1
- load <4 x float>* @17, align 1
- load <4 x float>* @18, align 1
- load <4 x float>* @19, align 1
- load <4 x float>* @20, align 1
- load <4 x float>* @21, align 1
- load <4 x float>* @22, align 1
- load <4 x float>* @23, align 1
- load <4 x float>* @24, align 1
- load <4 x float>* @25, align 1
- load <4 x float>* @26, align 1
- load <4 x float>* @27, align 1
- load <4 x float>* @28, align 1
- load <4 x float>* @29, align 1
- load <4 x float>* @30, align 1
- load <4 x float>* @31, align 1
+ load <4 x float>, <4 x float>* @0, align 1
+ load <4 x float>, <4 x float>* @1, align 1
+ load <4 x float>, <4 x float>* @2, align 1
+ load <4 x float>, <4 x float>* @3, align 1
+ load <4 x float>, <4 x float>* @4, align 1
+ load <4 x float>, <4 x float>* @5, align 1
+ load <4 x float>, <4 x float>* @6, align 1
+ load <4 x float>, <4 x float>* @7, align 1
+ load <4 x float>, <4 x float>* @8, align 1
+ load <4 x float>, <4 x float>* @9, align 1
+ load <4 x float>, <4 x float>* @10, align 1
+ load <4 x float>, <4 x float>* @11, align 1
+ load <4 x float>, <4 x float>* @12, align 1
+ load <4 x float>, <4 x float>* @13, align 1
+ load <4 x float>, <4 x float>* @14, align 1
+ load <4 x float>, <4 x float>* @15, align 1
+ load <4 x float>, <4 x float>* @16, align 1
+ load <4 x float>, <4 x float>* @17, align 1
+ load <4 x float>, <4 x float>* @18, align 1
+ load <4 x float>, <4 x float>* @19, align 1
+ load <4 x float>, <4 x float>* @20, align 1
+ load <4 x float>, <4 x float>* @21, align 1
+ load <4 x float>, <4 x float>* @22, align 1
+ load <4 x float>, <4 x float>* @23, align 1
+ load <4 x float>, <4 x float>* @24, align 1
+ load <4 x float>, <4 x float>* @25, align 1
+ load <4 x float>, <4 x float>* @26, align 1
+ load <4 x float>, <4 x float>* @27, align 1
+ load <4 x float>, <4 x float>* @28, align 1
+ load <4 x float>, <4 x float>* @29, align 1
+ load <4 x float>, <4 x float>* @30, align 1
+ load <4 x float>, <4 x float>* @31, align 1
fmul <4 x float> %1, %1
fmul <4 x float> %33, %2
fmul <4 x float> %34, %3
diff --git a/test/CodeGen/X86/2008-07-22-CombinerCrash.ll b/test/CodeGen/X86/2008-07-22-CombinerCrash.ll
index 0f6714579bcc..35bb5f054282 100644
--- a/test/CodeGen/X86/2008-07-22-CombinerCrash.ll
+++ b/test/CodeGen/X86/2008-07-22-CombinerCrash.ll
@@ -7,7 +7,7 @@ external global <4 x i16> ; <<4 x i16>*>:1 [#uses=1]
declare void @abort()
define void @t() nounwind {
- load i16* @0 ; <i16>:1 [#uses=1]
+ load i16, i16* @0 ; <i16>:1 [#uses=1]
zext i16 %1 to i64 ; <i64>:2 [#uses=1]
bitcast i64 %2 to <4 x i16> ; <<4 x i16>>:3 [#uses=1]
shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer ; <<4 x i16>>:4 [#uses=1]
diff --git a/test/CodeGen/X86/2008-08-06-CmpStride.ll b/test/CodeGen/X86/2008-08-06-CmpStride.ll
index bdac8fd48422..a030fbeed513 100644
--- a/test/CodeGen/X86/2008-08-06-CmpStride.ll
+++ b/test/CodeGen/X86/2008-08-06-CmpStride.ll
@@ -13,7 +13,7 @@ forbody:
%sub14 = sub i32 1027, %i.0 ; <i32> [#uses=1]
%mul15 = mul i32 %sub14, 10 ; <i32> [#uses=1]
%add166 = or i32 %mul15, 1 ; <i32> [#uses=1] *
- call i32 (i8*, ...)* @printf( i8* noalias getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %add166 ) nounwind
+ call i32 (i8*, ...) @printf( i8* noalias getelementptr ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %add166 ) nounwind
%inc = add i32 %i.0, 1 ; <i32> [#uses=3]
%cmp = icmp ne i32 %inc, 1027 ; <i1> [#uses=1]
br i1 %cmp, label %forbody, label %afterfor
diff --git a/test/CodeGen/X86/2008-08-06-RewriterBug.ll b/test/CodeGen/X86/2008-08-06-RewriterBug.ll
index 4428035cc827..f9c5467713fc 100644
--- a/test/CodeGen/X86/2008-08-06-RewriterBug.ll
+++ b/test/CodeGen/X86/2008-08-06-RewriterBug.ll
@@ -4,14 +4,14 @@
@data = external global [400 x i64] ; <[400 x i64]*> [#uses=5]
define void @foo(double* noalias, double* noalias) {
- load i64* getelementptr ([400 x i64]* @data, i32 0, i64 200), align 4 ; <i64>:3 [#uses=1]
- load i64* getelementptr ([400 x i64]* @data, i32 0, i64 199), align 4 ; <i64>:4 [#uses=1]
- load i64* getelementptr ([400 x i64]* @data, i32 0, i64 198), align 4 ; <i64>:5 [#uses=2]
- load i64* getelementptr ([400 x i64]* @data, i32 0, i64 197), align 4 ; <i64>:6 [#uses=1]
+ load i64, i64* getelementptr ([400 x i64], [400 x i64]* @data, i32 0, i64 200), align 4 ; <i64>:3 [#uses=1]
+ load i64, i64* getelementptr ([400 x i64], [400 x i64]* @data, i32 0, i64 199), align 4 ; <i64>:4 [#uses=1]
+ load i64, i64* getelementptr ([400 x i64], [400 x i64]* @data, i32 0, i64 198), align 4 ; <i64>:5 [#uses=2]
+ load i64, i64* getelementptr ([400 x i64], [400 x i64]* @data, i32 0, i64 197), align 4 ; <i64>:6 [#uses=1]
br i1 false, label %28, label %7
; <label>:7 ; preds = %2
- load double** getelementptr (double** bitcast ([400 x i64]* @data to double**), i64 180), align 8 ; <double*>:8 [#uses=1]
+ load double*, double** getelementptr (double*, double** bitcast ([400 x i64]* @data to double**), i64 180), align 8 ; <double*>:8 [#uses=1]
bitcast double* %8 to double* ; <double*>:9 [#uses=1]
ptrtoint double* %9 to i64 ; <i64>:10 [#uses=1]
mul i64 %4, %3 ; <i64>:11 [#uses=1]
diff --git a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
index 51064f1d2173..291090263a2f 100644
--- a/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
+++ b/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
@@ -29,7 +29,7 @@ declare void @llvm.eh.unwind.init()
; CHECK: _Unwind_Resume_or_Rethrow
define i32 @_Unwind_Resume_or_Rethrow() nounwind uwtable ssp {
entry:
- %0 = load i32* @b, align 4
+ %0 = load i32, i32* @b, align 4
%tobool = icmp eq i32 %0, 0
br i1 %tobool, label %if.end, label %if.then
@@ -37,7 +37,7 @@ if.then: ; preds = %entry
ret i32 0
if.end: ; preds = %entry
- %call = tail call i32 (...)* @_Unwind_ForcedUnwind_Phase2() nounwind
+ %call = tail call i32 (...) @_Unwind_ForcedUnwind_Phase2() nounwind
store i32 %call, i32* @a, align 4
%tobool1 = icmp eq i32 %call, 0
br i1 %tobool1, label %cond.end, label %cond.true
diff --git a/test/CodeGen/X86/2008-09-09-LinearScanBug.ll b/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
index b3312d9464d1..9a1a3ddeae72 100644
--- a/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
+++ b/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
@@ -5,7 +5,7 @@
define i32 @func_125(i32 %p_126, i32 %p_128, i32 %p_129) nounwind {
entry:
- %tmp2.i = load i32* @g_3 ; <i32> [#uses=2]
+ %tmp2.i = load i32, i32* @g_3 ; <i32> [#uses=2]
%conv = trunc i32 %tmp2.i to i16 ; <i16> [#uses=3]
br label %forcond1.preheader.i.i7
@@ -58,7 +58,7 @@ ifend.i: ; preds = %lor_rhs.i
safe_mod_int16_t_s_s.exit: ; preds = %ifend.i, %lor_rhs.i, %func_106.exit27
%call31 = phi i16 [ %conv8.i, %ifend.i ], [ %conv, %func_106.exit27 ], [ %conv, %lor_rhs.i ] ; <i16> [#uses=1]
%conv4 = sext i16 %call31 to i32 ; <i32> [#uses=1]
- %call5 = tail call i32 (...)* @func_104( i32 %conv4 ) ; <i32> [#uses=0]
+ %call5 = tail call i32 (...) @func_104( i32 %conv4 ) ; <i32> [#uses=0]
ret i32 undef
}
diff --git a/test/CodeGen/X86/2008-09-11-CoalescerBug.ll b/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
index 108f24307ea9..8c46bb3ec8b7 100644
--- a/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
@@ -6,18 +6,18 @@
define i32 @func_3(i32 %p_5) nounwind {
entry:
%0 = srem i32 1, 0 ; <i32> [#uses=2]
- %1 = load i16* @g_15, align 2 ; <i16> [#uses=1]
+ %1 = load i16, i16* @g_15, align 2 ; <i16> [#uses=1]
%2 = zext i16 %1 to i32 ; <i32> [#uses=1]
%3 = and i32 %2, 1 ; <i32> [#uses=1]
- %4 = tail call i32 (...)* @rshift_u_s( i32 1 ) nounwind ; <i32> [#uses=1]
+ %4 = tail call i32 (...) @rshift_u_s( i32 1 ) nounwind ; <i32> [#uses=1]
%5 = icmp slt i32 %4, 2 ; <i1> [#uses=1]
%6 = zext i1 %5 to i32 ; <i32> [#uses=1]
%7 = icmp sge i32 %3, %6 ; <i1> [#uses=1]
%8 = zext i1 %7 to i32 ; <i32> [#uses=1]
- %9 = load i16* @g_15, align 2 ; <i16> [#uses=1]
+ %9 = load i16, i16* @g_15, align 2 ; <i16> [#uses=1]
%10 = icmp eq i16 %9, 0 ; <i1> [#uses=1]
%11 = zext i1 %10 to i32 ; <i32> [#uses=1]
- %12 = tail call i32 (...)* @func_20( i32 1 ) nounwind ; <i32> [#uses=1]
+ %12 = tail call i32 (...) @func_20( i32 1 ) nounwind ; <i32> [#uses=1]
%13 = icmp sge i32 %11, %12 ; <i1> [#uses=1]
%14 = zext i1 %13 to i32 ; <i32> [#uses=1]
%15 = sub i32 %8, %14 ; <i32> [#uses=1]
@@ -27,7 +27,7 @@ entry:
%or.cond = or i1 false, %18 ; <i1> [#uses=1]
%19 = select i1 %or.cond, i32 0, i32 %0 ; <i32> [#uses=1]
%.0 = lshr i32 %17, %19 ; <i32> [#uses=1]
- %20 = tail call i32 (...)* @func_7( i32 %.0 ) nounwind ; <i32> [#uses=0]
+ %20 = tail call i32 (...) @func_7( i32 %.0 ) nounwind ; <i32> [#uses=0]
ret i32 undef
}
diff --git a/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll b/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
index 59d1c7f77abf..757dff4230fc 100644
--- a/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
+++ b/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
@@ -18,11 +18,11 @@ entry:
; SOURCE-SCHED: subl
; SOURCE-SCHED: testb
; SOURCE-SCHED: jne
- %0 = load i32* @g_5, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* @g_5, align 4 ; <i32> [#uses=1]
%1 = ashr i32 %0, 1 ; <i32> [#uses=1]
%2 = icmp sgt i32 %1, 1 ; <i1> [#uses=1]
%3 = zext i1 %2 to i32 ; <i32> [#uses=1]
- %4 = load i32* @g_73, align 4 ; <i32> [#uses=1]
+ %4 = load i32, i32* @g_73, align 4 ; <i32> [#uses=1]
%5 = zext i16 %p_46 to i64 ; <i64> [#uses=1]
%6 = sub i64 0, %5 ; <i64> [#uses=1]
%7 = trunc i64 %6 to i8 ; <i8> [#uses=2]
@@ -38,7 +38,7 @@ bb12: ; preds = %bb11, %entry
%.014.in = phi i8 [ %10, %bb11 ], [ %7, %entry ] ; <i8> [#uses=1]
%11 = icmp ne i8 %.014.in, 0 ; <i1> [#uses=1]
%12 = zext i1 %11 to i32 ; <i32> [#uses=1]
- %13 = tail call i32 (...)* @func_48( i32 %12, i32 %3, i32 0 ) nounwind ; <i32> [#uses=0]
+ %13 = tail call i32 (...) @func_48( i32 %12, i32 %3, i32 0 ) nounwind ; <i32> [#uses=0]
ret i32 undef
}
diff --git a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
index 4b2774b64b7b..3edd72bdba90 100644
--- a/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
+++ b/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
@@ -19,7 +19,7 @@ target triple = "i386-apple-darwin8"
define i32 @aci(i32* %pw) nounwind {
entry:
- %0 = load i32* @x, align 4
+ %0 = load i32, i32* @x, align 4
%asmtmp = tail call { i32, i32 } asm "movl $0, %eax\0A\090:\0A\09test %eax, %eax\0A\09je 1f\0A\09movl %eax, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{ax},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind
%asmtmp2 = tail call { i32, i32 } asm "movl $0, %edx\0A\090:\0A\09test %edx, %edx\0A\09je 1f\0A\09movl %edx, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{dx},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind
%asmresult2 = extractvalue { i32, i32 } %asmtmp, 0
diff --git a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
index f4a43a1e978a..0058d979a2fa 100644
--- a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
+++ b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
@@ -32,12 +32,12 @@ target triple = "i386-apple-darwin8"
define i32 @get(%struct.foo* %c, i8* %state) nounwind {
entry:
- %0 = getelementptr %struct.foo* %c, i32 0, i32 0 ; <i32*> [#uses=2]
- %1 = getelementptr %struct.foo* %c, i32 0, i32 1 ; <i32*> [#uses=2]
- %2 = getelementptr %struct.foo* %c, i32 0, i32 2 ; <i8**> [#uses=2]
- %3 = load i32* %0, align 4 ; <i32> [#uses=1]
- %4 = load i32* %1, align 4 ; <i32> [#uses=1]
- %5 = load i8* %state, align 1 ; <i8> [#uses=1]
+ %0 = getelementptr %struct.foo, %struct.foo* %c, i32 0, i32 0 ; <i32*> [#uses=2]
+ %1 = getelementptr %struct.foo, %struct.foo* %c, i32 0, i32 1 ; <i32*> [#uses=2]
+ %2 = getelementptr %struct.foo, %struct.foo* %c, i32 0, i32 2 ; <i8**> [#uses=2]
+ %3 = load i32, i32* %0, align 4 ; <i32> [#uses=1]
+ %4 = load i32, i32* %1, align 4 ; <i32> [#uses=1]
+ %5 = load i8, i8* %state, align 1 ; <i8> [#uses=1]
%asmtmp = tail call { i32, i32, i32, i32 } asm sideeffect "#1st=$0 $1 2nd=$1 $2 3rd=$2 $4 5th=$4 $3=4th 1$0 1%eXx 5$4 5%eXx 6th=$5", "=&r,=r,=r,=*m,=&q,=*imr,1,2,*m,5,~{dirflag},~{fpsr},~{flags},~{cx}"(i8** %2, i8* %state, i32 %3, i32 %4, i8** %2, i8 %5) nounwind ; <{ i32, i32, i32, i32 }> [#uses=3]
%asmresult = extractvalue { i32, i32, i32, i32 } %asmtmp, 0 ; <i32> [#uses=1]
%asmresult1 = extractvalue { i32, i32, i32, i32 } %asmtmp, 1 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2008-09-19-RegAllocBug.ll b/test/CodeGen/X86/2008-09-19-RegAllocBug.ll
index a8f2912a70af..83a1fac7f3ab 100644
--- a/test/CodeGen/X86/2008-09-19-RegAllocBug.ll
+++ b/test/CodeGen/X86/2008-09-19-RegAllocBug.ll
@@ -5,7 +5,7 @@
define i32 @func_4() nounwind {
entry:
- %0 = load i32* @g_3, align 4 ; <i32> [#uses=2]
+ %0 = load i32, i32* @g_3, align 4 ; <i32> [#uses=2]
%1 = trunc i32 %0 to i8 ; <i8> [#uses=1]
%2 = sub i8 1, %1 ; <i8> [#uses=1]
%3 = sext i8 %2 to i32 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2008-09-29-ReMatBug.ll b/test/CodeGen/X86/2008-09-29-ReMatBug.ll
index c36cf39fb341..754fd8f0ab64 100644
--- a/test/CodeGen/X86/2008-09-29-ReMatBug.ll
+++ b/test/CodeGen/X86/2008-09-29-ReMatBug.ll
@@ -5,7 +5,7 @@
%struct.XCStringList = type { i32, %struct._XCStringListNode* }
%struct._XCStringListNode = type { [3 x i8], [0 x i8], i8 }
%struct.__builtin_CFString = type { i32*, i32, i8*, i32 }
-internal constant %struct.__builtin_CFString { i32* getelementptr ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr ([3 x i8]* @"\01LC", i32 0, i32 0), i32 2 } ; <%struct.__builtin_CFString*>:0 [#uses=1]
+internal constant %struct.__builtin_CFString { i32* getelementptr ([0 x i32], [0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr ([3 x i8], [3 x i8]* @"\01LC", i32 0, i32 0), i32 2 } ; <%struct.__builtin_CFString*>:0 [#uses=1]
@__CFConstantStringClassReference = external global [0 x i32] ; <[0 x i32]*> [#uses=1]
@"\01LC" = internal constant [3 x i8] c"NO\00" ; <[3 x i8]*> [#uses=1]
@"\01LC1" = internal constant [1 x i8] zeroinitializer ; <[1 x i8]*> [#uses=1]
@@ -13,13 +13,13 @@ internal constant %struct.__builtin_CFString { i32* getelementptr ([0 x i32]* @_
define %struct.NSString* @"-[XCStringList stringRepresentation]"(%struct.XCStringList* %self, %struct..0objc_selector* %_cmd) nounwind {
entry:
- %0 = load i32* null, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%1 = and i32 %0, 16777215 ; <i32> [#uses=1]
%2 = icmp eq i32 %1, 0 ; <i1> [#uses=1]
br i1 %2, label %bb44, label %bb4
bb4: ; preds = %entry
- %3 = load %struct._XCStringListNode** null, align 4 ; <%struct._XCStringListNode*> [#uses=2]
+ %3 = load %struct._XCStringListNode*, %struct._XCStringListNode** null, align 4 ; <%struct._XCStringListNode*> [#uses=2]
%4 = icmp eq %struct._XCStringListNode* %3, null ; <i1> [#uses=1]
%5 = bitcast %struct._XCStringListNode* %3 to i32* ; <i32*> [#uses=1]
br label %bb37.outer
@@ -35,11 +35,11 @@ bb19: ; preds = %bb37, %bb6
br i1 %7, label %bb25.split, label %bb37
bb25.split: ; preds = %bb19
- call void @foo(i8* getelementptr ([1 x i8]* @"\01LC1", i32 0, i32 0)) nounwind nounwind
+ call void @foo(i8* getelementptr ([1 x i8], [1 x i8]* @"\01LC1", i32 0, i32 0)) nounwind nounwind
br label %bb35.outer
bb34: ; preds = %bb35, %bb35, %bb35, %bb35
- %8 = getelementptr i8* %bufptr.0.lcssa, i32 %totalLength.0.ph ; <i8*> [#uses=1]
+ %8 = getelementptr i8, i8* %bufptr.0.lcssa, i32 %totalLength.0.ph ; <i8*> [#uses=1]
store i8 92, i8* %8, align 1
br label %bb35.outer
@@ -48,7 +48,7 @@ bb35.outer: ; preds = %bb34, %bb25.split
br label %bb35
bb35: ; preds = %bb35, %bb35.outer
- %9 = load i8* null, align 1 ; <i8> [#uses=1]
+ %9 = load i8, i8* null, align 1 ; <i8> [#uses=1]
switch i8 %9, label %bb35 [
i8 0, label %bb37.outer
i8 32, label %bb34
@@ -63,7 +63,7 @@ bb37.outer: ; preds = %bb35, %bb4
br i1 %4, label %bb39.split, label %bb37
bb37: ; preds = %bb37.outer, %bb19
- %10 = load i32* %5, align 4 ; <i32> [#uses=1]
+ %10 = load i32, i32* %5, align 4 ; <i32> [#uses=1]
br i1 false, label %bb6, label %bb19
bb39.split: ; preds = %bb37.outer
diff --git a/test/CodeGen/X86/2008-09-29-VolatileBug.ll b/test/CodeGen/X86/2008-09-29-VolatileBug.ll
index f35245bb2af7..6ee8cf2f5e33 100644
--- a/test/CodeGen/X86/2008-09-29-VolatileBug.ll
+++ b/test/CodeGen/X86/2008-09-29-VolatileBug.ll
@@ -6,7 +6,7 @@
define i32 @main() nounwind {
entry:
- %0 = load volatile i32* @g_407, align 4 ; <i32> [#uses=1]
+ %0 = load volatile i32, i32* @g_407, align 4 ; <i32> [#uses=1]
%1 = trunc i32 %0 to i8 ; <i8> [#uses=1]
%2 = tail call i32 @func_45(i8 zeroext %1) nounwind ; <i32> [#uses=0]
ret i32 0
diff --git a/test/CodeGen/X86/2008-10-06-MMXISelBug.ll b/test/CodeGen/X86/2008-10-06-MMXISelBug.ll
deleted file mode 100644
index 7f7b1a436d24..000000000000
--- a/test/CodeGen/X86/2008-10-06-MMXISelBug.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2
-; PR2850
-
-@tmp_V2i = common global <2 x i32> zeroinitializer ; <<2 x i32>*> [#uses=2]
-
-define void @f0() nounwind {
-entry:
- %0 = load <2 x i32>* @tmp_V2i, align 8 ; <<2 x i32>> [#uses=1]
- %1 = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer ; <<2 x i32>> [#uses=1]
- store <2 x i32> %1, <2 x i32>* @tmp_V2i, align 8
- ret void
-}
diff --git a/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll b/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll
index bd48105f129a..34c9857b00ea 100644
--- a/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll
+++ b/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll
@@ -10,7 +10,7 @@ declare x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %f)
define i32 @main() {
entry_nan.main:
- %tmp = load x86_fp80* @_D3nan4rvale ; <x86_fp80> [#uses=1]
+ %tmp = load x86_fp80, x86_fp80* @_D3nan4rvale ; <x86_fp80> [#uses=1]
call x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %tmp)
call x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 0xK7FFF8001234000000000)
call x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 0xK7FFFC001234000000400)
diff --git a/test/CodeGen/X86/2008-10-07-SSEISelBug.ll b/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
index bc5761288c9b..26e802ac05f9 100644
--- a/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
+++ b/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
@@ -6,17 +6,17 @@ entry:
%w.addr = alloca float ; <float*> [#uses=2]
%.compoundliteral = alloca <4 x float> ; <<4 x float>*> [#uses=2]
store float %w, float* %w.addr
- %tmp = load float* %w.addr ; <float> [#uses=1]
+ %tmp = load float, float* %w.addr ; <float> [#uses=1]
%0 = insertelement <4 x float> undef, float %tmp, i32 0 ; <<4 x float>> [#uses=1]
%1 = insertelement <4 x float> %0, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1]
%2 = insertelement <4 x float> %1, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1]
%3 = insertelement <4 x float> %2, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1]
store <4 x float> %3, <4 x float>* %.compoundliteral
- %tmp1 = load <4 x float>* %.compoundliteral ; <<4 x float>> [#uses=1]
+ %tmp1 = load <4 x float>, <4 x float>* %.compoundliteral ; <<4 x float>> [#uses=1]
store <4 x float> %tmp1, <4 x float>* %retval
br label %return
return: ; preds = %entry
- %4 = load <4 x float>* %retval ; <<4 x float>> [#uses=1]
+ %4 = load <4 x float>, <4 x float>* %retval ; <<4 x float>> [#uses=1]
ret <4 x float> %4
}
diff --git a/test/CodeGen/X86/2008-10-11-CallCrash.ll b/test/CodeGen/X86/2008-10-11-CallCrash.ll
index efc6125cfc2d..9ad7ab268ad2 100644
--- a/test/CodeGen/X86/2008-10-11-CallCrash.ll
+++ b/test/CodeGen/X86/2008-10-11-CallCrash.ll
@@ -6,13 +6,13 @@ target triple = "i386-apple-darwin7"
define i32 @func_45(i64 %p_46, i32 %p_48) nounwind {
entry:
- %0 = tail call i32 (...)* @lshift_s_u(i64 %p_46, i64 0) nounwind ; <i32> [#uses=0]
- %1 = load i32* @g_385, align 4 ; <i32> [#uses=1]
+ %0 = tail call i32 (...) @lshift_s_u(i64 %p_46, i64 0) nounwind ; <i32> [#uses=0]
+ %1 = load i32, i32* @g_385, align 4 ; <i32> [#uses=1]
%2 = shl i32 %1, 1 ; <i32> [#uses=1]
%3 = and i32 %2, 32 ; <i32> [#uses=1]
- %4 = tail call i32 (...)* @func_87(i32 undef, i32 %p_48, i32 1) nounwind ; <i32> [#uses=1]
+ %4 = tail call i32 (...) @func_87(i32 undef, i32 %p_48, i32 1) nounwind ; <i32> [#uses=1]
%5 = add i32 %3, %4 ; <i32> [#uses=1]
- %6 = tail call i32 (...)* @div_rhs(i32 %5) nounwind ; <i32> [#uses=0]
+ %6 = tail call i32 (...) @div_rhs(i32 %5) nounwind ; <i32> [#uses=0]
ret i32 undef
}
diff --git a/test/CodeGen/X86/2008-10-13-CoalescerBug.ll b/test/CodeGen/X86/2008-10-13-CoalescerBug.ll
index 4d3f8c2071b5..c285ae4fdd28 100644
--- a/test/CodeGen/X86/2008-10-13-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-13-CoalescerBug.ll
@@ -3,7 +3,7 @@
define i32 @func_77(i8 zeroext %p_79) nounwind {
entry:
- %0 = tail call i32 (...)* @func_43(i32 1) nounwind ; <i32> [#uses=1]
+ %0 = tail call i32 (...) @func_43(i32 1) nounwind ; <i32> [#uses=1]
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
br i1 %1, label %bb3, label %bb
@@ -14,7 +14,7 @@ bb3: ; preds = %bb, %entry
%p_79_addr.0 = phi i8 [ 0, %bb ], [ %p_79, %entry ] ; <i8> [#uses=1]
%2 = zext i8 %p_79_addr.0 to i32 ; <i32> [#uses=2]
%3 = zext i1 false to i32 ; <i32> [#uses=2]
- %4 = tail call i32 (...)* @rshift_u_s(i32 1) nounwind ; <i32> [#uses=0]
+ %4 = tail call i32 (...) @rshift_u_s(i32 1) nounwind ; <i32> [#uses=0]
%5 = lshr i32 %2, %2 ; <i32> [#uses=3]
%6 = icmp eq i32 0, 0 ; <i1> [#uses=1]
br i1 %6, label %bb6, label %bb9
diff --git a/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll b/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll
index de4c1e70b8d8..ac6fa0dc9b26 100644
--- a/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll
+++ b/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=x86 -mattr=+sse2
; PR2762
define void @foo(<4 x i32>* %p, <4 x double>* %q) {
- %n = load <4 x i32>* %p
+ %n = load <4 x i32>, <4 x i32>* %p
%z = sitofp <4 x i32> %n to <4 x double>
store <4 x double> %z, <4 x double>* %q
ret void
diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
index 0310a5dcb565..b1dcd031265b 100644
--- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
@@ -20,7 +20,7 @@ bb: ; preds = %bb, %entry
; CHECK: movsd %xmm0, 16(%esp)
; CHECK: %bb3
bb3: ; preds = %bb30.loopexit, %bb25, %bb3
- %2 = load i32* null, align 4 ; <i32> [#uses=1]
+ %2 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%3 = mul i32 %2, 0 ; <i32> [#uses=1]
%4 = icmp slt i32 0, %3 ; <i1> [#uses=1]
br i1 %4, label %bb18, label %bb3
diff --git a/test/CodeGen/X86/2008-11-06-testb.ll b/test/CodeGen/X86/2008-11-06-testb.ll
index e7caa7a10670..c8fad0635828 100644
--- a/test/CodeGen/X86/2008-11-06-testb.ll
+++ b/test/CodeGen/X86/2008-11-06-testb.ll
@@ -9,16 +9,16 @@ target triple = "i386-apple-darwin9.5"
define i32 @foo(%struct.x* %p) nounwind {
entry:
- %0 = getelementptr %struct.x* %p, i32 0, i32 0 ; <i8*> [#uses=1]
+ %0 = getelementptr %struct.x, %struct.x* %p, i32 0, i32 0 ; <i8*> [#uses=1]
store i8 55, i8* %0, align 1
%1 = bitcast %struct.x* %p to i32* ; <i32*> [#uses=1]
- %2 = load i32* %1, align 1 ; <i32> [#uses=1]
+ %2 = load i32, i32* %1, align 1 ; <i32> [#uses=1]
%3 = and i32 %2, 512 ; <i32> [#uses=1]
%4 = icmp eq i32 %3, 0 ; <i1> [#uses=1]
br i1 %4, label %bb5, label %bb
bb: ; preds = %entry
- %5 = tail call i32 (...)* @xx() nounwind ; <i32> [#uses=1]
+ %5 = tail call i32 (...) @xx() nounwind ; <i32> [#uses=1]
ret i32 %5
bb5: ; preds = %entry
diff --git a/test/CodeGen/X86/2008-11-29-ULT-Sign.ll b/test/CodeGen/X86/2008-11-29-ULT-Sign.ll
index 6dca141639e4..03442d631ac7 100644
--- a/test/CodeGen/X86/2008-11-29-ULT-Sign.ll
+++ b/test/CodeGen/X86/2008-11-29-ULT-Sign.ll
@@ -8,7 +8,7 @@ entry:
br i1 %cmp, label %if.end, label %if.then
if.then: ; preds = %entry
- %call = call i32 (...)* @b() ; <i32> [#uses=0]
+ %call = call i32 (...) @b() ; <i32> [#uses=0]
br label %if.end
if.end: ; preds = %if.then, %entry
diff --git a/test/CodeGen/X86/2008-12-01-SpillerAssert.ll b/test/CodeGen/X86/2008-12-01-SpillerAssert.ll
index d96d806388c9..cf292e3c0c52 100644
--- a/test/CodeGen/X86/2008-12-01-SpillerAssert.ll
+++ b/test/CodeGen/X86/2008-12-01-SpillerAssert.ll
@@ -10,6 +10,6 @@ declare i32 @printk(i8*, ...)
define void @display_cacheinfo(%struct.cpuinfo_x86* %c) nounwind section ".cpuinit.text" {
entry:
%asmtmp = tail call { i32, i32, i32, i32 } asm "cpuid", "={ax},={bx},={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"(i32 -2147483643, i32 0) nounwind ; <{ i32, i32, i32, i32 }> [#uses=0]
- %0 = tail call i32 (i8*, ...)* @printk(i8* getelementptr ([70 x i8]* @.str10, i32 0, i64 0), i32 0, i32 0, i32 0, i32 0) nounwind ; <i32> [#uses=0]
+ %0 = tail call i32 (i8*, ...) @printk(i8* getelementptr ([70 x i8], [70 x i8]* @.str10, i32 0, i64 0), i32 0, i32 0, i32 0, i32 0) nounwind ; <i32> [#uses=0]
unreachable
}
diff --git a/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll b/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
index 1f8bd45da14d..840b8ba0f8ce 100644
--- a/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
+++ b/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
@@ -15,8 +15,8 @@ bb: ; preds = %bb1, %bb1
bb1: ; preds = %bb, %entry
%P.0.rec = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
- %P.0 = getelementptr i8* %Q, i32 %P.0.rec ; <i8*> [#uses=2]
- %0 = load i8* %P.0, align 1 ; <i8> [#uses=1]
+ %P.0 = getelementptr i8, i8* %Q, i32 %P.0.rec ; <i8*> [#uses=2]
+ %0 = load i8, i8* %P.0, align 1 ; <i8> [#uses=1]
switch i8 %0, label %bb3 [
i8 12, label %bb
i8 42, label %bb
@@ -24,7 +24,7 @@ bb1: ; preds = %bb, %entry
bb3: ; preds = %bb1
%P.0.sum = add i32 %P.0.rec, 2 ; <i32> [#uses=1]
- %1 = getelementptr i8* %Q, i32 %P.0.sum ; <i8*> [#uses=1]
+ %1 = getelementptr i8, i8* %Q, i32 %P.0.sum ; <i8*> [#uses=1]
store i8 4, i8* %1, align 1
ret i8* %P.0
}
diff --git a/test/CodeGen/X86/2008-12-02-IllegalResultType.ll b/test/CodeGen/X86/2008-12-02-IllegalResultType.ll
index 4b72cb919ffa..c828879e6b99 100644
--- a/test/CodeGen/X86/2008-12-02-IllegalResultType.ll
+++ b/test/CodeGen/X86/2008-12-02-IllegalResultType.ll
@@ -7,7 +7,7 @@ target triple = "i386-pc-linux-gnu"
define i32 @func_73(i32 %p_74) nounwind {
entry:
- %0 = load i32* @g_7, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* @g_7, align 4 ; <i32> [#uses=1]
%1 = or i8 0, 118 ; <i8> [#uses=1]
%2 = zext i8 %1 to i64 ; <i64> [#uses=1]
%3 = icmp ne i32 %0, 0 ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2008-12-02-dagcombine-1.ll b/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
index fe5bff3e3459..004adc08091e 100644
--- a/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
+++ b/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
@@ -11,7 +11,7 @@ entry:
%4 = ptrtoint i8* %L to i32
%5 = add i32 %4, %3
%6 = add i32 %5, %1 ; <i32> [#uses=1]
- %7 = getelementptr i8* %a, i32 %6 ; <i8*> [#uses=1]
+ %7 = getelementptr i8, i8* %a, i32 %6 ; <i8*> [#uses=1]
br label %return
return: ; preds = %bb3
diff --git a/test/CodeGen/X86/2008-12-02-dagcombine-2.ll b/test/CodeGen/X86/2008-12-02-dagcombine-2.ll
index 4cb1b42693b9..6622bc26692b 100644
--- a/test/CodeGen/X86/2008-12-02-dagcombine-2.ll
+++ b/test/CodeGen/X86/2008-12-02-dagcombine-2.ll
@@ -9,7 +9,7 @@ entry:
%2 = sub i32 %1, %0
%3 = ptrtoint i8* %L to i32
%4 = sub i32 %2, %3 ; <i32> [#uses=1]
- %5 = getelementptr i8* %a, i32 %4 ; <i8*> [#uses=1]
+ %5 = getelementptr i8, i8* %a, i32 %4 ; <i8*> [#uses=1]
br label %return
return: ; preds = %bb3
diff --git a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
index 435adbbebfad..6bb29fde8454 100644
--- a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
+++ b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
@@ -16,7 +16,7 @@ entry:
%1 = trunc i64 %u to i32 ; <i32> [#uses=4]
%2 = lshr i64 %u, 32 ; <i64> [#uses=1]
%3 = trunc i64 %2 to i32 ; <i32> [#uses=2]
- %4 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([7 x i8]* @"\01LC", i32 0, i32 0), i32 %1) nounwind ; <i32> [#uses=0]
+ %4 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([7 x i8], [7 x i8]* @"\01LC", i32 0, i32 0), i32 %1) nounwind ; <i32> [#uses=0]
%5 = icmp ult i32 %1, %0 ; <i1> [#uses=1]
br i1 %5, label %bb2, label %bb
diff --git a/test/CodeGen/X86/2008-12-23-crazy-address.ll b/test/CodeGen/X86/2008-12-23-crazy-address.ll
index 0e95c9e34e1c..b80f4731f8ab 100644
--- a/test/CodeGen/X86/2008-12-23-crazy-address.ll
+++ b/test/CodeGen/X86/2008-12-23-crazy-address.ll
@@ -7,7 +7,7 @@ entry:
%Y = alloca i32
call void @frob(i32* %Y) nounwind
%Y3 = bitcast i32* %Y to i8*
- %ctg2 = getelementptr i8* %Y3, i32 ptrtoint ([0 x i32]* @X to i32)
+ %ctg2 = getelementptr i8, i8* %Y3, i32 ptrtoint ([0 x i32]* @X to i32)
%0 = ptrtoint i8* %ctg2 to i32
call void @borf(i32 %0) nounwind
ret void
@@ -16,13 +16,13 @@ entry:
define void @bar(i32 %i) nounwind {
entry:
%Y = alloca [10 x i32]
- %0 = getelementptr [10 x i32]* %Y, i32 0, i32 0
+ %0 = getelementptr [10 x i32], [10 x i32]* %Y, i32 0, i32 0
call void @frob(i32* %0) nounwind
- %1 = getelementptr [0 x i32]* @X, i32 0, i32 %i
- %2 = getelementptr [10 x i32]* %Y, i32 0, i32 0
+ %1 = getelementptr [0 x i32], [0 x i32]* @X, i32 0, i32 %i
+ %2 = getelementptr [10 x i32], [10 x i32]* %Y, i32 0, i32 0
%3 = ptrtoint i32* %2 to i32
%4 = bitcast i32* %1 to i8*
- %ctg2 = getelementptr i8* %4, i32 %3
+ %ctg2 = getelementptr i8, i8* %4, i32 %3
%5 = ptrtoint i8* %ctg2 to i32
call void @borf(i32 %5) nounwind
ret void
diff --git a/test/CodeGen/X86/2009-01-16-SchedulerBug.ll b/test/CodeGen/X86/2009-01-16-SchedulerBug.ll
index 99bef6ce3fc9..ac6d0a983e44 100644
--- a/test/CodeGen/X86/2009-01-16-SchedulerBug.ll
+++ b/test/CodeGen/X86/2009-01-16-SchedulerBug.ll
@@ -10,12 +10,12 @@ declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind
define fastcc %XXV* @bar(%CF* %call_frame, %XXV** %exception) nounwind {
prologue:
- %param_x = load %XXV** null ; <%XXV*> [#uses=1]
+ %param_x = load %XXV*, %XXV** null ; <%XXV*> [#uses=1]
%unique_1.i = ptrtoint %XXV* %param_x to i1 ; <i1> [#uses=1]
br i1 %unique_1.i, label %NextVerify42, label %FailedVerify
NextVerify42: ; preds = %prologue
- %param_y = load %XXV** null ; <%XXV*> [#uses=1]
+ %param_y = load %XXV*, %XXV** null ; <%XXV*> [#uses=1]
%unique_1.i58 = ptrtoint %XXV* %param_y to i1 ; <i1> [#uses=1]
br i1 %unique_1.i58, label %function_setup.cont, label %FailedVerify
diff --git a/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll b/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll
index f895336491e2..9a4e6eeae189 100644
--- a/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll
+++ b/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll
@@ -25,7 +25,7 @@ bb4.i.i70: ; preds = %bb4.i.i70, %bb.i51
br i1 false, label %_ZN11xercesc_2_59XMLString9stringLenEPKt.exit.i73, label %bb4.i.i70
_ZN11xercesc_2_59XMLString9stringLenEPKt.exit.i73: ; preds = %bb4.i.i70
- %0 = load i16* getelementptr ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE, i32 0, i32 add (i32 ashr (i32 sub (i32 ptrtoint (i16* getelementptr ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE, i32 0, i32 4) to i32), i32 ptrtoint ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE to i32)), i32 1), i32 1)), align 4 ; <i16> [#uses=0]
+ %0 = load i16, i16* getelementptr ([7 x i16], [7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE, i32 0, i32 add (i32 ashr (i32 sub (i32 ptrtoint (i16* getelementptr ([7 x i16], [7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE, i32 0, i32 4) to i32), i32 ptrtoint ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE to i32)), i32 1), i32 1)), align 4 ; <i16> [#uses=0]
br label %bb4.i5.i141
bb4.i5.i141: ; preds = %bb4.i5.i141, %_ZN11xercesc_2_59XMLString9stringLenEPKt.exit.i73
diff --git a/test/CodeGen/X86/2009-01-25-NoSSE.ll b/test/CodeGen/X86/2009-01-25-NoSSE.ll
index 8406c4a2cc83..c655f2c374ac 100644
--- a/test/CodeGen/X86/2009-01-25-NoSSE.ll
+++ b/test/CodeGen/X86/2009-01-25-NoSSE.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=-sse,-sse2 | not grep xmm
+; RUN: llc < %s -march=x86-64 -mattr=-sse,-sse2 | FileCheck %s
; PR3402
target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
@@ -6,6 +6,8 @@ target triple = "x86_64-unknown-linux-gnu"
%struct.ktermios = type { i32, i32, i32, i32, i8, [19 x i8], i32, i32 }
+; CHECK-NOT: xmm
+; CHECK-NOT: ymm
define void @foo() nounwind {
entry:
%termios = alloca %struct.ktermios, align 8
diff --git a/test/CodeGen/X86/2009-01-31-BigShift2.ll b/test/CodeGen/X86/2009-01-31-BigShift2.ll
index b478f27a95b9..90d14e7b707d 100644
--- a/test/CodeGen/X86/2009-01-31-BigShift2.ll
+++ b/test/CodeGen/X86/2009-01-31-BigShift2.ll
@@ -2,7 +2,7 @@
; PR3449
define void @test(<8 x double>* %P, i64* %Q) nounwind {
- %A = load <8 x double>* %P ; <<8 x double>> [#uses=1]
+ %A = load <8 x double>, <8 x double>* %P ; <<8 x double>> [#uses=1]
%B = bitcast <8 x double> %A to i512 ; <i512> [#uses=1]
%C = lshr i512 %B, 448 ; <i512> [#uses=1]
%D = trunc i512 %C to i64 ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2009-02-01-LargeMask.ll b/test/CodeGen/X86/2009-02-01-LargeMask.ll
index c4042e6c9c68..e91208d5b3a1 100644
--- a/test/CodeGen/X86/2009-02-01-LargeMask.ll
+++ b/test/CodeGen/X86/2009-02-01-LargeMask.ll
@@ -19,7 +19,7 @@ entry:
bb.i49.i72: ; preds = %bb.i49.i72, %entry
%UNP.i1482.0 = phi i288 [ %.ins659, %bb.i49.i72 ], [ undef, %entry ] ; <i288> [#uses=1]
- %0 = load i32* null, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%1 = xor i32 %0, 17834 ; <i32> [#uses=1]
%2 = zext i32 %1 to i288 ; <i288> [#uses=1]
%3 = shl i288 %2, 160 ; <i288> [#uses=1]
diff --git a/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll b/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll
index e75af13a600b..592a7e33b196 100644
--- a/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll
+++ b/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll
@@ -13,7 +13,7 @@ bb: ; preds = %entry
unreachable
bb1: ; preds = %entry
- %0 = load i32* @g_3, align 4 ; <i32> [#uses=2]
+ %0 = load i32, i32* @g_3, align 4 ; <i32> [#uses=2]
%1 = sext i32 %0 to i64 ; <i64> [#uses=1]
%2 = or i64 %1, %p_66 ; <i64> [#uses=1]
%3 = shl i64 %2, 0 ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll b/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll
index 4880f626d5da..04dfd846ad4a 100644
--- a/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll
+++ b/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll
@@ -5,5 +5,5 @@
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
target triple = "i386-pc-linux-gnu"
@p = common global [10 x i32] zeroinitializer, align 4 ; <[10 x i32]*>
-@g = global [1 x i32*] [ i32* bitcast (i8* getelementptr (i8* bitcast
+@g = global [1 x i32*] [ i32* bitcast (i8* getelementptr (i8, i8* bitcast
([10 x i32]* @p to i8*), i64 17179869092) to i32*) ], align 4
diff --git a/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll b/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll
index 36cc53545103..040bcff1bb25 100644
--- a/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll
+++ b/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll
@@ -21,7 +21,7 @@ bb56: ; preds = %bb62, %bb54
%indvar202 = trunc i64 %p_addr.0.pn.rec to i32 ; <i32>[#uses=1]
%frac_bits.0 = mul i32 %indvar202, %shift.0 ; <i32>[#uses=1]
%p_addr.6.rec = add i64 %p_addr.0.pn.rec, 1 ; <i64>[#uses=2]
- %p_addr.6 = getelementptr i8* null, i64 %p_addr.6.rec ; <i8*>[#uses=1]
+ %p_addr.6 = getelementptr i8, i8* null, i64 %p_addr.6.rec ; <i8*>[#uses=1]
br i1 false, label %bb66, label %bb62
bb62: ; preds = %bb56
diff --git a/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll b/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll
index 1284b0d1b7b2..19c2dfd4ea87 100644
--- a/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll
+++ b/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll
@@ -6,10 +6,10 @@ target triple = "i386-apple-darwin9.6"
define i32 @alac_decode_frame() nounwind {
entry:
- %tmp2 = load i8** null ; <i8*> [#uses=2]
- %tmp34 = getelementptr i8* %tmp2, i32 4 ; <i8*> [#uses=2]
+ %tmp2 = load i8*, i8** null ; <i8*> [#uses=2]
+ %tmp34 = getelementptr i8, i8* %tmp2, i32 4 ; <i8*> [#uses=2]
%tmp5.i424 = bitcast i8* %tmp34 to i8** ; <i8**> [#uses=2]
- %tmp15.i = getelementptr i8* %tmp2, i32 12 ; <i8*> [#uses=1]
+ %tmp15.i = getelementptr i8, i8* %tmp2, i32 12 ; <i8*> [#uses=1]
%0 = bitcast i8* %tmp15.i to i32* ; <i32*> [#uses=1]
br i1 false, label %if.then43, label %if.end47
@@ -17,9 +17,9 @@ if.then43: ; preds = %entry
ret i32 0
if.end47: ; preds = %entry
- %tmp5.i590 = load i8** %tmp5.i424 ; <i8*> [#uses=0]
+ %tmp5.i590 = load i8*, i8** %tmp5.i424 ; <i8*> [#uses=0]
store i32 19, i32* %0
- %tmp6.i569 = load i8** %tmp5.i424 ; <i8*> [#uses=0]
+ %tmp6.i569 = load i8*, i8** %tmp5.i424 ; <i8*> [#uses=0]
%1 = call i32 asm "bswap $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 0) nounwind ; <i32> [#uses=0]
br i1 false, label %bb.nph, label %if.then63
diff --git a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
index 6c8e3b5a8fdc..6c177e5b5f5a 100644
--- a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
+++ b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
@@ -1,9 +1,19 @@
; RUN: llc < %s
-; RUN: llc < %s -march=x86-64
+; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s
; PR3538
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin9"
define signext i8 @foo(i8* %s1) nounwind ssp {
+
+; Make sure we generate:
+; movq -40(%rbp), %rsp
+; Instead of:
+; movq -40(%rbp), %rax
+; movq %rax, %rsp
+
+; CHECK-LABEL: @foo
+; CHECK: movq -40(%rbp), %rsp
+
entry:
%s1_addr = alloca i8* ; <i8**> [#uses=2]
%retval = alloca i32 ; <i32*> [#uses=2]
@@ -14,46 +24,46 @@ entry:
%2 = alloca i64 ; <i64*> [#uses=1]
%3 = alloca i64 ; <i64*> [#uses=6]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- call void @llvm.dbg.declare(metadata i8** %s1_addr, metadata !0, metadata !{!"0x102"}), !dbg !7
+ call void @llvm.dbg.declare(metadata i8** %s1_addr, metadata !0, metadata !DIExpression()), !dbg !7
store i8* %s1, i8** %s1_addr
- call void @llvm.dbg.declare(metadata [0 x i8]** %str.0, metadata !8, metadata !{!"0x102"}), !dbg !7
+ call void @llvm.dbg.declare(metadata [0 x i8]** %str.0, metadata !8, metadata !DIExpression()), !dbg !7
%4 = call i8* @llvm.stacksave(), !dbg !7 ; <i8*> [#uses=1]
store i8* %4, i8** %saved_stack.1, align 8, !dbg !7
- %5 = load i8** %s1_addr, align 8, !dbg !13 ; <i8*> [#uses=1]
+ %5 = load i8*, i8** %s1_addr, align 8, !dbg !13 ; <i8*> [#uses=1]
%6 = call i64 @strlen(i8* %5) nounwind readonly, !dbg !13 ; <i64> [#uses=1]
%7 = add i64 %6, 1, !dbg !13 ; <i64> [#uses=1]
store i64 %7, i64* %3, align 8, !dbg !13
- %8 = load i64* %3, align 8, !dbg !13 ; <i64> [#uses=1]
+ %8 = load i64, i64* %3, align 8, !dbg !13 ; <i64> [#uses=1]
%9 = sub nsw i64 %8, 1, !dbg !13 ; <i64> [#uses=0]
- %10 = load i64* %3, align 8, !dbg !13 ; <i64> [#uses=1]
+ %10 = load i64, i64* %3, align 8, !dbg !13 ; <i64> [#uses=1]
%11 = mul i64 %10, 8, !dbg !13 ; <i64> [#uses=0]
- %12 = load i64* %3, align 8, !dbg !13 ; <i64> [#uses=1]
+ %12 = load i64, i64* %3, align 8, !dbg !13 ; <i64> [#uses=1]
store i64 %12, i64* %2, align 8, !dbg !13
- %13 = load i64* %3, align 8, !dbg !13 ; <i64> [#uses=1]
+ %13 = load i64, i64* %3, align 8, !dbg !13 ; <i64> [#uses=1]
%14 = mul i64 %13, 8, !dbg !13 ; <i64> [#uses=0]
- %15 = load i64* %3, align 8, !dbg !13 ; <i64> [#uses=1]
+ %15 = load i64, i64* %3, align 8, !dbg !13 ; <i64> [#uses=1]
store i64 %15, i64* %1, align 8, !dbg !13
- %16 = load i64* %1, align 8, !dbg !13 ; <i64> [#uses=1]
+ %16 = load i64, i64* %1, align 8, !dbg !13 ; <i64> [#uses=1]
%17 = trunc i64 %16 to i32, !dbg !13 ; <i32> [#uses=1]
%18 = alloca i8, i32 %17, !dbg !13 ; <i8*> [#uses=1]
%19 = bitcast i8* %18 to [0 x i8]*, !dbg !13 ; <[0 x i8]*> [#uses=1]
store [0 x i8]* %19, [0 x i8]** %str.0, align 8, !dbg !13
- %20 = load [0 x i8]** %str.0, align 8, !dbg !15 ; <[0 x i8]*> [#uses=1]
- %21 = getelementptr inbounds [0 x i8]* %20, i64 0, i64 0, !dbg !15 ; <i8*> [#uses=1]
+ %20 = load [0 x i8]*, [0 x i8]** %str.0, align 8, !dbg !15 ; <[0 x i8]*> [#uses=1]
+ %21 = getelementptr inbounds [0 x i8], [0 x i8]* %20, i64 0, i64 0, !dbg !15 ; <i8*> [#uses=1]
store i8 0, i8* %21, align 1, !dbg !15
- %22 = load [0 x i8]** %str.0, align 8, !dbg !16 ; <[0 x i8]*> [#uses=1]
- %23 = getelementptr inbounds [0 x i8]* %22, i64 0, i64 0, !dbg !16 ; <i8*> [#uses=1]
- %24 = load i8* %23, align 1, !dbg !16 ; <i8> [#uses=1]
+ %22 = load [0 x i8]*, [0 x i8]** %str.0, align 8, !dbg !16 ; <[0 x i8]*> [#uses=1]
+ %23 = getelementptr inbounds [0 x i8], [0 x i8]* %22, i64 0, i64 0, !dbg !16 ; <i8*> [#uses=1]
+ %24 = load i8, i8* %23, align 1, !dbg !16 ; <i8> [#uses=1]
%25 = sext i8 %24 to i32, !dbg !16 ; <i32> [#uses=1]
store i32 %25, i32* %0, align 4, !dbg !16
- %26 = load i8** %saved_stack.1, align 8, !dbg !16 ; <i8*> [#uses=1]
+ %26 = load i8*, i8** %saved_stack.1, align 8, !dbg !16 ; <i8*> [#uses=1]
call void @llvm.stackrestore(i8* %26), !dbg !16
- %27 = load i32* %0, align 4, !dbg !16 ; <i32> [#uses=1]
+ %27 = load i32, i32* %0, align 4, !dbg !16 ; <i32> [#uses=1]
store i32 %27, i32* %retval, align 4, !dbg !16
br label %return, !dbg !16
return: ; preds = %entry
- %retval1 = load i32* %retval, !dbg !16 ; <i32> [#uses=1]
+ %retval1 = load i32, i32* %retval, !dbg !16 ; <i32> [#uses=1]
%retval12 = trunc i32 %retval1 to i8, !dbg !16 ; <i8> [#uses=1]
ret i8 %retval12, !dbg !16
}
@@ -66,22 +76,22 @@ declare i64 @strlen(i8*) nounwind readonly
declare void @llvm.stackrestore(i8*) nounwind
-!0 = !{!"0x101\00s1\002\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
-!1 = !{!"0x2e\00foo\00foo\00foo\002\000\001\000\006\000\000\000", i32 0, !2, !3, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!2 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\000", !17, !18, !18, null, null, null} ; [ DW_TAG_compile_unit ]
-!3 = !{!"0x15\00\000\000\000\000\000\000", null, !2, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s1", line: 2, arg: 0, scope: !1, file: !2, type: !6)
+!1 = !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !2, type: !3)
+!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !17, enums: !18, retainedTypes: !18)
+!3 = !DISubroutineType(types: !4)
!4 = !{!5, !6}
-!5 = !{!"0x24\00char\000\008\008\000\000\006", null, !2} ; [ DW_TAG_base_type ]
-!6 = !{!"0xf\00\000\0064\0064\000\000", null, !2, !5} ; [ DW_TAG_pointer_type ]
-!7 = !MDLocation(line: 2, scope: !1)
-!8 = !{!"0x100\00str.0\003\000", !1, !2, !9} ; [ DW_TAG_auto_variable ]
-!9 = !{!"0xf\00\000\0064\0064\000\0064", null, !2, !10} ; [ DW_TAG_pointer_type ]
-!10 = !{!"0x1\00\000\008\008\000\000", null, !2, !5, !11, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 8, align 8, offset 0] [from char]
+!5 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !2, baseType: !5)
+!7 = !DILocation(line: 2, scope: !1)
+!8 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "str.0", line: 3, scope: !1, file: !2, type: !9)
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, scope: !2, baseType: !10)
+!10 = !DICompositeType(tag: DW_TAG_array_type, size: 8, align: 8, scope: !2, baseType: !5, elements: !11)
!11 = !{!12}
-!12 = !{!"0x21\000\001"} ; [ DW_TAG_subrange_type ]
-!13 = !MDLocation(line: 3, scope: !14)
-!14 = !{!"0xb\000\000\000", !17, !1} ; [ DW_TAG_lexical_block ]
-!15 = !MDLocation(line: 4, scope: !14)
-!16 = !MDLocation(line: 5, scope: !14)
-!17 = !{!"vla.c", !"/tmp/"}
+!12 = !DISubrange(count: 1)
+!13 = !DILocation(line: 3, scope: !14)
+!14 = distinct !DILexicalBlock(line: 0, column: 0, file: !17, scope: !1)
+!15 = !DILocation(line: 4, scope: !14)
+!16 = !DILocation(line: 5, scope: !14)
+!17 = !DIFile(filename: "vla.c", directory: "/tmp/")
!18 = !{i32 0}
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index e046b966921f..172a00a7c86f 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,5 +1,5 @@
; REQUIRES: asserts
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "7 machine-licm"
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "9 machine-licm"
; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn | FileCheck %s
; rdar://6627786
; rdar://7792037
@@ -21,9 +21,9 @@ bb4: ; preds = %bb.i, %bb26, %bb4, %entry
; CHECK: xorl
; CHECK: movq
- %0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind ; <i32> [#uses=0]
+ %0 = call i32 (...) @xxGetOffsetForCode(i32 undef) nounwind ; <i32> [#uses=0]
%ins = or i64 %p, 2097152 ; <i64> [#uses=1]
- %1 = call i32 (...)* @xxCalculateMidType(%struct.Key* %desc, i32 0) nounwind ; <i32> [#uses=1]
+ %1 = call i32 (...) @xxCalculateMidType(%struct.Key* %desc, i32 0) nounwind ; <i32> [#uses=1]
%cond = icmp eq i32 %1, 1 ; <i1> [#uses=1]
br i1 %cond, label %bb26, label %bb4
@@ -33,17 +33,17 @@ bb26: ; preds = %bb4
br i1 %cond.i, label %bb.i, label %bb4
bb.i: ; preds = %bb26
- %3 = load i32* null, align 4 ; <i32> [#uses=1]
+ %3 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%4 = uitofp i32 %3 to float ; <float> [#uses=1]
%.sum13.i = add i64 0, 4 ; <i64> [#uses=1]
- %5 = getelementptr i8* null, i64 %.sum13.i ; <i8*> [#uses=1]
+ %5 = getelementptr i8, i8* null, i64 %.sum13.i ; <i8*> [#uses=1]
%6 = bitcast i8* %5 to i32* ; <i32*> [#uses=1]
- %7 = load i32* %6, align 4 ; <i32> [#uses=1]
+ %7 = load i32, i32* %6, align 4 ; <i32> [#uses=1]
%8 = uitofp i32 %7 to float ; <float> [#uses=1]
%.sum.i = add i64 0, 8 ; <i64> [#uses=1]
- %9 = getelementptr i8* null, i64 %.sum.i ; <i8*> [#uses=1]
+ %9 = getelementptr i8, i8* null, i64 %.sum.i ; <i8*> [#uses=1]
%10 = bitcast i8* %9 to i32* ; <i32*> [#uses=1]
- %11 = load i32* %10, align 4 ; <i32> [#uses=1]
+ %11 = load i32, i32* %10, align 4 ; <i32> [#uses=1]
%12 = uitofp i32 %11 to float ; <float> [#uses=1]
%13 = insertelement <4 x float> undef, float %4, i32 0 ; <<4 x float>> [#uses=1]
%14 = insertelement <4 x float> %13, float %8, i32 1 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/2009-03-03-BTHang.ll b/test/CodeGen/X86/2009-03-03-BTHang.ll
index bb9592577435..d6d24cda295f 100644
--- a/test/CodeGen/X86/2009-03-03-BTHang.ll
+++ b/test/CodeGen/X86/2009-03-03-BTHang.ll
@@ -9,23 +9,23 @@ entry:
%0 = ptrtoint i8** %h to i32 ; <i32> [#uses=2]
%1 = and i32 %0, -4096 ; <i32> [#uses=1]
%2 = inttoptr i32 %1 to %struct.HandleBlock* ; <%struct.HandleBlock*> [#uses=3]
- %3 = getelementptr %struct.HandleBlock* %2, i32 0, i32 0, i32 0 ; <i32*> [#uses=1]
- %4 = load i32* %3, align 4096 ; <i32> [#uses=1]
+ %3 = getelementptr %struct.HandleBlock, %struct.HandleBlock* %2, i32 0, i32 0, i32 0 ; <i32*> [#uses=1]
+ %4 = load i32, i32* %3, align 4096 ; <i32> [#uses=1]
%5 = icmp eq i32 %4, 1751280747 ; <i1> [#uses=1]
br i1 %5, label %bb, label %bb1
bb: ; preds = %entry
- %6 = getelementptr %struct.HandleBlock* %2, i32 0, i32 1 ; <[990 x i8*]*> [#uses=1]
+ %6 = getelementptr %struct.HandleBlock, %struct.HandleBlock* %2, i32 0, i32 1 ; <[990 x i8*]*> [#uses=1]
%7 = ptrtoint [990 x i8*]* %6 to i32 ; <i32> [#uses=1]
%8 = sub i32 %0, %7 ; <i32> [#uses=2]
%9 = lshr i32 %8, 2 ; <i32> [#uses=1]
%10 = ashr i32 %8, 7 ; <i32> [#uses=1]
%11 = and i32 %10, 134217727 ; <i32> [#uses=1]
- %12 = getelementptr %struct.HandleBlock* %2, i32 0, i32 0, i32 %11 ; <i32*> [#uses=1]
+ %12 = getelementptr %struct.HandleBlock, %struct.HandleBlock* %2, i32 0, i32 0, i32 %11 ; <i32*> [#uses=1]
%not.i = and i32 %9, 31 ; <i32> [#uses=1]
%13 = xor i32 %not.i, 31 ; <i32> [#uses=1]
%14 = shl i32 1, %13 ; <i32> [#uses=1]
- %15 = load i32* %12, align 4 ; <i32> [#uses=1]
+ %15 = load i32, i32* %12, align 4 ; <i32> [#uses=1]
%16 = and i32 %15, %14 ; <i32> [#uses=1]
%17 = icmp eq i32 %16, 0 ; <i1> [#uses=1]
%tmp = zext i1 %17 to i8 ; <i8> [#uses=1]
diff --git a/test/CodeGen/X86/2009-03-05-burr-list-crash.ll b/test/CodeGen/X86/2009-03-05-burr-list-crash.ll
index 411a0c92830a..853bb16aa327 100644
--- a/test/CodeGen/X86/2009-03-05-burr-list-crash.ll
+++ b/test/CodeGen/X86/2009-03-05-burr-list-crash.ll
@@ -15,12 +15,12 @@ define fastcc i8* @1(i8*) nounwind {
; <label>:3 ; preds = %1
%4 = call i64 @strlen(i8* %0) nounwind readonly ; <i64> [#uses=1]
%5 = trunc i64 %4 to i32 ; <i32> [#uses=2]
- %6 = load i32* @0, align 4 ; <i32> [#uses=1]
+ %6 = load i32, i32* @0, align 4 ; <i32> [#uses=1]
%7 = sub i32 %5, %6 ; <i32> [#uses=2]
%8 = sext i32 %5 to i64 ; <i64> [#uses=1]
%9 = sext i32 %7 to i64 ; <i64> [#uses=1]
%10 = sub i64 %8, %9 ; <i64> [#uses=1]
- %11 = getelementptr i8* %0, i64 %10 ; <i8*> [#uses=1]
+ %11 = getelementptr i8, i8* %0, i64 %10 ; <i8*> [#uses=1]
%12 = icmp sgt i32 %7, 0 ; <i1> [#uses=1]
br i1 %12, label %13, label %14
diff --git a/test/CodeGen/X86/2009-03-09-APIntCrash.ll b/test/CodeGen/X86/2009-03-09-APIntCrash.ll
index 896c9686cc4e..3bff7dc76561 100644
--- a/test/CodeGen/X86/2009-03-09-APIntCrash.ll
+++ b/test/CodeGen/X86/2009-03-09-APIntCrash.ll
@@ -7,7 +7,7 @@ entry:
br i1 false, label %if.then, label %return
if.then: ; preds = %entry
- %srcval18 = load i128* null, align 8 ; <i128> [#uses=1]
+ %srcval18 = load i128, i128* null, align 8 ; <i128> [#uses=1]
%tmp15 = lshr i128 %srcval18, 64 ; <i128> [#uses=1]
%tmp9 = mul i128 %tmp15, 18446744073709551616000 ; <i128> [#uses=1]
br label %return
diff --git a/test/CodeGen/X86/2009-03-10-CoalescerBug.ll b/test/CodeGen/X86/2009-03-10-CoalescerBug.ll
index 90dff8878a78..38dd2fa0b108 100644
--- a/test/CodeGen/X86/2009-03-10-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-03-10-CoalescerBug.ll
@@ -8,7 +8,7 @@
define i32 @pnoutrefresh(%struct.WINDOW* %win, i32 %pminrow, i32 %pmincol, i32 %sminrow, i32 %smincol, i32 %smaxrow, i32 %smaxcol) nounwind optsize ssp {
entry:
- %0 = load i16* null, align 4 ; <i16> [#uses=2]
+ %0 = load i16, i16* null, align 4 ; <i16> [#uses=2]
%1 = icmp sgt i16 0, %0 ; <i1> [#uses=1]
br i1 %1, label %bb12, label %bb13
diff --git a/test/CodeGen/X86/2009-03-23-LinearScanBug.ll b/test/CodeGen/X86/2009-03-23-LinearScanBug.ll
index 06dfdc0c767f..584e4e63e4a9 100644
--- a/test/CodeGen/X86/2009-03-23-LinearScanBug.ll
+++ b/test/CodeGen/X86/2009-03-23-LinearScanBug.ll
@@ -2,9 +2,9 @@
define fastcc void @optimize_bit_field() nounwind {
bb4:
- %a = load i32* null ; <i32> [#uses=1]
- %s = load i32* getelementptr (i32* null, i32 1) ; <i32> [#uses=1]
- %z = load i32* getelementptr (i32* null, i32 2) ; <i32> [#uses=1]
+ %a = load i32, i32* null ; <i32> [#uses=1]
+ %s = load i32, i32* getelementptr (i32, i32* null, i32 1) ; <i32> [#uses=1]
+ %z = load i32, i32* getelementptr (i32, i32* null, i32 2) ; <i32> [#uses=1]
%r = bitcast i32 0 to i32 ; <i32> [#uses=1]
%q = trunc i32 %z to i8 ; <i8> [#uses=1]
%b = icmp eq i8 0, %q ; <i1> [#uses=1]
@@ -16,7 +16,7 @@ bb72: ; preds = %bb4
bb73: ; preds = %bb72, %bb4
%y = phi i32 [ %f, %bb72 ], [ %s, %bb4 ] ; <i32> [#uses=1]
- store i32 %y, i32* getelementptr (i32* null, i32 3)
+ store i32 %y, i32* getelementptr (i32, i32* null, i32 3)
unreachable
}
diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index bbc1d341d4e0..276d52366ae8 100644
--- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -10,30 +10,30 @@
@X = external global i64 ; <i64*> [#uses=25]
define fastcc i64 @foo() nounwind {
- %tmp = load volatile i64* @X ; <i64> [#uses=7]
- %tmp1 = load volatile i64* @X ; <i64> [#uses=5]
- %tmp2 = load volatile i64* @X ; <i64> [#uses=3]
- %tmp3 = load volatile i64* @X ; <i64> [#uses=1]
- %tmp4 = load volatile i64* @X ; <i64> [#uses=5]
- %tmp5 = load volatile i64* @X ; <i64> [#uses=3]
- %tmp6 = load volatile i64* @X ; <i64> [#uses=2]
- %tmp7 = load volatile i64* @X ; <i64> [#uses=1]
- %tmp8 = load volatile i64* @X ; <i64> [#uses=1]
- %tmp9 = load volatile i64* @X ; <i64> [#uses=1]
- %tmp10 = load volatile i64* @X ; <i64> [#uses=1]
- %tmp11 = load volatile i64* @X ; <i64> [#uses=1]
- %tmp12 = load volatile i64* @X ; <i64> [#uses=1]
- %tmp13 = load volatile i64* @X ; <i64> [#uses=1]
- %tmp14 = load volatile i64* @X ; <i64> [#uses=1]
- %tmp15 = load volatile i64* @X ; <i64> [#uses=1]
- %tmp16 = load volatile i64* @X ; <i64> [#uses=1]
- %tmp17 = load volatile i64* @X ; <i64> [#uses=1]
- %tmp18 = load volatile i64* @X ; <i64> [#uses=1]
- %tmp19 = load volatile i64* @X ; <i64> [#uses=1]
- %tmp20 = load volatile i64* @X ; <i64> [#uses=1]
- %tmp21 = load volatile i64* @X ; <i64> [#uses=1]
- %tmp22 = load volatile i64* @X ; <i64> [#uses=1]
- %tmp23 = load volatile i64* @X ; <i64> [#uses=1]
+ %tmp = load volatile i64, i64* @X ; <i64> [#uses=7]
+ %tmp1 = load volatile i64, i64* @X ; <i64> [#uses=5]
+ %tmp2 = load volatile i64, i64* @X ; <i64> [#uses=3]
+ %tmp3 = load volatile i64, i64* @X ; <i64> [#uses=1]
+ %tmp4 = load volatile i64, i64* @X ; <i64> [#uses=5]
+ %tmp5 = load volatile i64, i64* @X ; <i64> [#uses=3]
+ %tmp6 = load volatile i64, i64* @X ; <i64> [#uses=2]
+ %tmp7 = load volatile i64, i64* @X ; <i64> [#uses=1]
+ %tmp8 = load volatile i64, i64* @X ; <i64> [#uses=1]
+ %tmp9 = load volatile i64, i64* @X ; <i64> [#uses=1]
+ %tmp10 = load volatile i64, i64* @X ; <i64> [#uses=1]
+ %tmp11 = load volatile i64, i64* @X ; <i64> [#uses=1]
+ %tmp12 = load volatile i64, i64* @X ; <i64> [#uses=1]
+ %tmp13 = load volatile i64, i64* @X ; <i64> [#uses=1]
+ %tmp14 = load volatile i64, i64* @X ; <i64> [#uses=1]
+ %tmp15 = load volatile i64, i64* @X ; <i64> [#uses=1]
+ %tmp16 = load volatile i64, i64* @X ; <i64> [#uses=1]
+ %tmp17 = load volatile i64, i64* @X ; <i64> [#uses=1]
+ %tmp18 = load volatile i64, i64* @X ; <i64> [#uses=1]
+ %tmp19 = load volatile i64, i64* @X ; <i64> [#uses=1]
+ %tmp20 = load volatile i64, i64* @X ; <i64> [#uses=1]
+ %tmp21 = load volatile i64, i64* @X ; <i64> [#uses=1]
+ %tmp22 = load volatile i64, i64* @X ; <i64> [#uses=1]
+ %tmp23 = load volatile i64, i64* @X ; <i64> [#uses=1]
%tmp24 = call i64 @llvm.bswap.i64(i64 %tmp8) ; <i64> [#uses=1]
%tmp25 = add i64 %tmp6, %tmp5 ; <i64> [#uses=1]
%tmp26 = add i64 %tmp25, %tmp4 ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2009-03-25-TestBug.ll b/test/CodeGen/X86/2009-03-25-TestBug.ll
index cc1d73da05c5..367a6d2a3b84 100644
--- a/test/CodeGen/X86/2009-03-25-TestBug.ll
+++ b/test/CodeGen/X86/2009-03-25-TestBug.ll
@@ -9,17 +9,17 @@
define void @func(i32* %b) nounwind {
bb1579.i.i: ; preds = %bb1514.i.i, %bb191.i.i
- %tmp176 = load i32* %b, align 4
+ %tmp176 = load i32, i32* %b, align 4
%tmp177 = and i32 %tmp176, 2
%tmp178 = icmp eq i32 %tmp177, 0
br i1 %tmp178, label %hello, label %world
hello:
- %h = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @hello, i32 0, i32 0))
+ %h = tail call i32 (i8*, ...) @printf( i8* getelementptr ([7 x i8], [7 x i8]* @hello, i32 0, i32 0))
ret void
world:
- %w = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @world, i32 0, i32 0))
+ %w = tail call i32 (i8*, ...) @printf( i8* getelementptr ([7 x i8], [7 x i8]* @world, i32 0, i32 0))
ret void
}
diff --git a/test/CodeGen/X86/2009-04-12-picrel.ll b/test/CodeGen/X86/2009-04-12-picrel.ll
index f1942801c7af..037dee95717f 100644
--- a/test/CodeGen/X86/2009-04-12-picrel.ll
+++ b/test/CodeGen/X86/2009-04-12-picrel.ll
@@ -7,7 +7,7 @@
define void @off01(i64 %i) nounwind {
entry:
%.sum = add i64 %i, 16
- %0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %.sum
+ %0 = getelementptr [131072 x i32], [131072 x i32]* @dst, i64 0, i64 %.sum
store i32* %0, i32** @ptr, align 8
ret void
}
diff --git a/test/CodeGen/X86/2009-04-13-2AddrAssert.ll b/test/CodeGen/X86/2009-04-13-2AddrAssert.ll
index 4362ba437541..a3607c6815ac 100644
--- a/test/CodeGen/X86/2009-04-13-2AddrAssert.ll
+++ b/test/CodeGen/X86/2009-04-13-2AddrAssert.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-undermydesk-freebsd8.0"
define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
entry:
- %call = tail call i32 (...)* @getpid() ; <i32> [#uses=1]
+ %call = tail call i32 (...) @getpid() ; <i32> [#uses=1]
%conv = trunc i32 %call to i16 ; <i16> [#uses=1]
%0 = tail call i16 asm "xchgb ${0:h}, ${0:b}","=Q,0,~{dirflag},~{fpsr},~{flags}"(i16 %conv) nounwind ; <i16> [#uses=0]
ret i32 undef
diff --git a/test/CodeGen/X86/2009-04-14-IllegalRegs.ll b/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
index bed863e405a8..8055ea880795 100644
--- a/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
+++ b/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
@@ -10,25 +10,25 @@ entry:
%xxx = alloca %struct.X ; <%struct.X*> [#uses=6]
%0 = alloca i32 ; <i32*> [#uses=2]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- %1 = getelementptr %struct.X* %xxx, i32 0, i32 1 ; <[32 x i8]*> [#uses=1]
- %2 = getelementptr [32 x i8]* %1, i32 0, i32 31 ; <i8*> [#uses=1]
+ %1 = getelementptr %struct.X, %struct.X* %xxx, i32 0, i32 1 ; <[32 x i8]*> [#uses=1]
+ %2 = getelementptr [32 x i8], [32 x i8]* %1, i32 0, i32 31 ; <i8*> [#uses=1]
store i8 48, i8* %2, align 1
- %3 = getelementptr %struct.X* %xxx, i32 0, i32 1 ; <[32 x i8]*> [#uses=1]
- %4 = getelementptr [32 x i8]* %3, i32 0, i32 31 ; <i8*> [#uses=1]
- %5 = load i8* %4, align 1 ; <i8> [#uses=1]
- %6 = getelementptr %struct.X* %xxx, i32 0, i32 1 ; <[32 x i8]*> [#uses=1]
- %7 = getelementptr [32 x i8]* %6, i32 0, i32 0 ; <i8*> [#uses=1]
+ %3 = getelementptr %struct.X, %struct.X* %xxx, i32 0, i32 1 ; <[32 x i8]*> [#uses=1]
+ %4 = getelementptr [32 x i8], [32 x i8]* %3, i32 0, i32 31 ; <i8*> [#uses=1]
+ %5 = load i8, i8* %4, align 1 ; <i8> [#uses=1]
+ %6 = getelementptr %struct.X, %struct.X* %xxx, i32 0, i32 1 ; <[32 x i8]*> [#uses=1]
+ %7 = getelementptr [32 x i8], [32 x i8]* %6, i32 0, i32 0 ; <i8*> [#uses=1]
store i8 %5, i8* %7, align 1
- %8 = getelementptr %struct.X* %xxx, i32 0, i32 0 ; <i8*> [#uses=1]
+ %8 = getelementptr %struct.X, %struct.X* %xxx, i32 0, i32 0 ; <i8*> [#uses=1]
store i8 15, i8* %8, align 1
- %9 = call i32 (...)* bitcast (i32 (%struct.X*, %struct.X*)* @f to i32 (...)*)(%struct.X* byval align 4 %xxx, %struct.X* byval align 4 %xxx) nounwind ; <i32> [#uses=1]
+ %9 = call i32 (...) bitcast (i32 (%struct.X*, %struct.X*)* @f to i32 (...)*)(%struct.X* byval align 4 %xxx, %struct.X* byval align 4 %xxx) nounwind ; <i32> [#uses=1]
store i32 %9, i32* %0, align 4
- %10 = load i32* %0, align 4 ; <i32> [#uses=1]
+ %10 = load i32, i32* %0, align 4 ; <i32> [#uses=1]
store i32 %10, i32* %retval, align 4
br label %return
return: ; preds = %entry
- %retval1 = load i32* %retval ; <i32> [#uses=1]
+ %retval1 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %retval1
}
diff --git a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
index 679a65d93d09..1d03a1b20a34 100644
--- a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
+++ b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
@@ -47,7 +47,7 @@ bb349: ; preds = %bb349, %entry
%23 = add i32 0, 12 ; <i32> [#uses=1]
%24 = and i32 %23, 12 ; <i32> [#uses=1]
%25 = zext i32 %24 to i64 ; <i64> [#uses=1]
- %26 = getelementptr [16 x i64]* null, i64 0, i64 %25 ; <i64*> [#uses=0]
+ %26 = getelementptr [16 x i64], [16 x i64]* null, i64 0, i64 %25 ; <i64*> [#uses=0]
%27 = add i64 0, %e.0489 ; <i64> [#uses=1]
%28 = add i64 %27, 0 ; <i64> [#uses=1]
%29 = add i64 %28, 0 ; <i64> [#uses=1]
@@ -67,12 +67,12 @@ bb349: ; preds = %bb349, %entry
%43 = or i32 0, 6 ; <i32> [#uses=1]
%44 = and i32 %43, 14 ; <i32> [#uses=1]
%45 = zext i32 %44 to i64 ; <i64> [#uses=1]
- %46 = getelementptr [16 x i64]* null, i64 0, i64 %45 ; <i64*> [#uses=1]
+ %46 = getelementptr [16 x i64], [16 x i64]* null, i64 0, i64 %45 ; <i64*> [#uses=1]
%not417 = xor i64 %42, -1 ; <i64> [#uses=1]
%47 = and i64 %20, %not417 ; <i64> [#uses=1]
%48 = xor i64 0, %47 ; <i64> [#uses=1]
- %49 = getelementptr [80 x i64]* @K512, i64 0, i64 0 ; <i64*> [#uses=1]
- %50 = load i64* %49, align 8 ; <i64> [#uses=1]
+ %49 = getelementptr [80 x i64], [80 x i64]* @K512, i64 0, i64 0 ; <i64*> [#uses=1]
+ %50 = load i64, i64* %49, align 8 ; <i64> [#uses=1]
%51 = add i64 %48, 0 ; <i64> [#uses=1]
%52 = add i64 %51, 0 ; <i64> [#uses=1]
%53 = add i64 %52, 0 ; <i64> [#uses=1]
@@ -87,13 +87,13 @@ bb349: ; preds = %bb349, %entry
%60 = or i32 0, 7 ; <i32> [#uses=1]
%61 = and i32 %60, 15 ; <i32> [#uses=1]
%62 = zext i32 %61 to i64 ; <i64> [#uses=1]
- %63 = getelementptr [16 x i64]* null, i64 0, i64 %62 ; <i64*> [#uses=2]
- %64 = load i64* null, align 8 ; <i64> [#uses=1]
+ %63 = getelementptr [16 x i64], [16 x i64]* null, i64 0, i64 %62 ; <i64*> [#uses=2]
+ %64 = load i64, i64* null, align 8 ; <i64> [#uses=1]
%65 = lshr i64 %64, 6 ; <i64> [#uses=1]
%66 = xor i64 0, %65 ; <i64> [#uses=1]
%67 = xor i64 %66, 0 ; <i64> [#uses=1]
- %68 = load i64* %46, align 8 ; <i64> [#uses=1]
- %69 = load i64* null, align 8 ; <i64> [#uses=1]
+ %68 = load i64, i64* %46, align 8 ; <i64> [#uses=1]
+ %69 = load i64, i64* null, align 8 ; <i64> [#uses=1]
%70 = add i64 %68, 0 ; <i64> [#uses=1]
%71 = add i64 %70, %67 ; <i64> [#uses=1]
%72 = add i64 %71, %69 ; <i64> [#uses=1]
@@ -105,8 +105,8 @@ bb349: ; preds = %bb349, %entry
%not429 = xor i64 %57, -1 ; <i64> [#uses=1]
%76 = and i64 %33, %not429 ; <i64> [#uses=1]
%77 = xor i64 %75, %76 ; <i64> [#uses=1]
- %78 = getelementptr [80 x i64]* @K512, i64 0, i64 0 ; <i64*> [#uses=1]
- %79 = load i64* %78, align 16 ; <i64> [#uses=1]
+ %78 = getelementptr [80 x i64], [80 x i64]* @K512, i64 0, i64 0 ; <i64*> [#uses=1]
+ %79 = load i64, i64* %78, align 16 ; <i64> [#uses=1]
%80 = add i64 %77, %20 ; <i64> [#uses=1]
%81 = add i64 %80, %72 ; <i64> [#uses=1]
%82 = add i64 %81, %74 ; <i64> [#uses=1]
@@ -119,14 +119,14 @@ bb349: ; preds = %bb349, %entry
%87 = add i64 0, %85 ; <i64> [#uses=1]
%asmtmp435 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 8, i64 0) nounwind ; <i64> [#uses=1]
%88 = xor i64 0, %asmtmp435 ; <i64> [#uses=1]
- %89 = load i64* null, align 8 ; <i64> [#uses=3]
+ %89 = load i64, i64* null, align 8 ; <i64> [#uses=3]
%asmtmp436 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 19, i64 %89) nounwind ; <i64> [#uses=1]
%asmtmp437 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 61, i64 %89) nounwind ; <i64> [#uses=1]
%90 = lshr i64 %89, 6 ; <i64> [#uses=1]
%91 = xor i64 %asmtmp436, %90 ; <i64> [#uses=1]
%92 = xor i64 %91, %asmtmp437 ; <i64> [#uses=1]
- %93 = load i64* %63, align 8 ; <i64> [#uses=1]
- %94 = load i64* null, align 8 ; <i64> [#uses=1]
+ %93 = load i64, i64* %63, align 8 ; <i64> [#uses=1]
+ %94 = load i64, i64* null, align 8 ; <i64> [#uses=1]
%95 = add i64 %93, %88 ; <i64> [#uses=1]
%96 = add i64 %95, %92 ; <i64> [#uses=1]
%97 = add i64 %96, %94 ; <i64> [#uses=2]
diff --git a/test/CodeGen/X86/2009-04-24.ll b/test/CodeGen/X86/2009-04-24.ll
index d104c875760a..7647dcc7febd 100644
--- a/test/CodeGen/X86/2009-04-24.ll
+++ b/test/CodeGen/X86/2009-04-24.ll
@@ -8,6 +8,6 @@
define i32 @f() {
entry:
- %tmp1 = load i32* @i
+ %tmp1 = load i32, i32* @i
ret i32 %tmp1
}
diff --git a/test/CodeGen/X86/2009-04-25-CoalescerBug.ll b/test/CodeGen/X86/2009-04-25-CoalescerBug.ll
index 94d3eb21cecc..c687b6905b7d 100644
--- a/test/CodeGen/X86/2009-04-25-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-04-25-CoalescerBug.ll
@@ -6,7 +6,7 @@ entry:
br label %while.cond
while.cond: ; preds = %while.cond, %entry
- %tmp15 = load i32* %tmp13 ; <i32> [#uses=2]
+ %tmp15 = load i32, i32* %tmp13 ; <i32> [#uses=2]
%bf.lo = lshr i32 %tmp15, 1 ; <i32> [#uses=1]
%bf.lo.cleared = and i32 %bf.lo, 2147483647 ; <i32> [#uses=1]
%conv = zext i32 %bf.lo.cleared to i64 ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll b/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
index 7981a52e740a..a364c89b5f49 100644
--- a/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
+++ b/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
@@ -22,24 +22,24 @@
define void @getAffNeighbour(i32 %curr_mb_nr, i32 %xN, i32 %yN, i32 %is_chroma, %struct.PixelPos* %pix) nounwind {
entry:
%Opq.sa.calc = add i32 0, 2 ; <i32> [#uses=2]
- %0 = load %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=3]
- %1 = getelementptr %struct.ImageParameters* %0, i64 0, i32 39 ; <%struct.Macroblock**> [#uses=1]
- %2 = load %struct.Macroblock** %1, align 8 ; <%struct.Macroblock*> [#uses=24]
+ %0 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=3]
+ %1 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %0, i64 0, i32 39 ; <%struct.Macroblock**> [#uses=1]
+ %2 = load %struct.Macroblock*, %struct.Macroblock** %1, align 8 ; <%struct.Macroblock*> [#uses=24]
%3 = zext i32 %curr_mb_nr to i64 ; <i64> [#uses=24]
%4 = sext i32 %is_chroma to i64 ; <i64> [#uses=8]
br label %meshBB392
entry.fragment: ; preds = %meshBB392
%Opq.sa.calc747 = add i32 %Opq.sa.calc921, 70 ; <i32> [#uses=0]
- %5 = getelementptr %struct.ImageParameters* %0, i64 0, i32 119, i64 %4, i64 0 ; <i32*> [#uses=1]
- %6 = load i32* %5, align 4 ; <i32> [#uses=2]
- %7 = getelementptr %struct.ImageParameters* %0, i64 0, i32 119, i64 %4, i64 1 ; <i32*> [#uses=1]
- %8 = load i32* %7, align 4 ; <i32> [#uses=5]
+ %5 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %0, i64 0, i32 119, i64 %4, i64 0 ; <i32*> [#uses=1]
+ %6 = load i32, i32* %5, align 4 ; <i32> [#uses=2]
+ %7 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %0, i64 0, i32 119, i64 %4, i64 1 ; <i32*> [#uses=1]
+ %8 = load i32, i32* %7, align 4 ; <i32> [#uses=5]
br label %entry.fragment181
entry.fragment181: ; preds = %entry.fragment
%Opq.sa.calc863 = add i32 %Opq.sa.calc921, -50 ; <i32> [#uses=4]
- %9 = getelementptr %struct.PixelPos* %pix, i64 0, i32 0 ; <i32*> [#uses=4]
+ %9 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 0 ; <i32*> [#uses=4]
store i32 0, i32* %9, align 4
%10 = add i32 %8, -1 ; <i32> [#uses=6]
%11 = icmp slt i32 %10, %yN ; <i1> [#uses=1]
@@ -74,8 +74,8 @@ bb4: ; preds = %bb3
bb5: ; preds = %meshBB428
%Opq.sa.calc470 = sub i32 %Opq.sa.calc897, -49 ; <i32> [#uses=1]
- %17 = getelementptr %struct.Macroblock* %2, i64 %3, i32 20 ; <i32*> [#uses=1]
- %18 = load i32* %17, align 4 ; <i32> [#uses=1]
+ %17 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 20 ; <i32*> [#uses=1]
+ %18 = load i32, i32* %17, align 4 ; <i32> [#uses=1]
br label %bb5.fragment
bb5.fragment: ; preds = %bb5
@@ -91,10 +91,10 @@ bb6: ; preds = %bb5.fragment
bb7: ; preds = %bb6
%Opq.sa.calc476 = add i32 %Opq.sa.calc873, -58 ; <i32> [#uses=1]
- %22 = getelementptr %struct.Macroblock* %2, i64 %3, i32 25 ; <i32*> [#uses=1]
- %23 = load i32* %22, align 8 ; <i32> [#uses=1]
+ %22 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 25 ; <i32*> [#uses=1]
+ %23 = load i32, i32* %22, align 8 ; <i32> [#uses=1]
%24 = add i32 %23, 1 ; <i32> [#uses=1]
- %25 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=1]
+ %25 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=1]
br label %meshBB388
bb7.fragment: ; preds = %meshBB388
@@ -102,41 +102,41 @@ bb7.fragment: ; preds = %meshBB388
%Opq.sa.calc707 = add i32 %Opq.sa.calc709, %Opq.sa.calc886 ; <i32> [#uses=1]
%Opq.sa.calc708 = xor i32 %Opq.sa.calc707, 474 ; <i32> [#uses=0]
store i32 %.SV194.phi, i32* %.SV196.phi, align 4
- %26 = getelementptr %struct.Macroblock* %.load17.SV.phi, i64 %.load36.SV.phi, i32 29 ; <i32*> [#uses=1]
- %27 = load i32* %26, align 8 ; <i32> [#uses=2]
+ %26 = getelementptr %struct.Macroblock, %struct.Macroblock* %.load17.SV.phi, i64 %.load36.SV.phi, i32 29 ; <i32*> [#uses=1]
+ %27 = load i32, i32* %26, align 8 ; <i32> [#uses=2]
store i32 %27, i32* %.load67.SV.phi, align 4
br label %bb96
bb8: ; preds = %meshBB348
%Opq.sa.calc479 = sub i32 %Opq.sa.calc805, 141 ; <i32> [#uses=1]
- %28 = getelementptr %struct.Macroblock* %2, i64 %3, i32 22 ; <i32*> [#uses=2]
- %29 = load i32* %28, align 4 ; <i32> [#uses=2]
- %30 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=2]
+ %28 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 22 ; <i32*> [#uses=2]
+ %29 = load i32, i32* %28, align 4 ; <i32> [#uses=2]
+ %30 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=2]
br label %meshBB368
bb8.fragment: ; preds = %meshBB368
%Opq.sa.calc765 = sub i32 %Opq.sa.calc768, -115 ; <i32> [#uses=2]
store i32 %.SV198.phi, i32* %.SV200.phi, align 4
- %31 = getelementptr %struct.Macroblock* %.load16.SV.phi, i64 %.load35.SV.phi, i32 26 ; <i32*> [#uses=2]
- %32 = load i32* %31, align 4 ; <i32> [#uses=4]
+ %31 = getelementptr %struct.Macroblock, %struct.Macroblock* %.load16.SV.phi, i64 %.load35.SV.phi, i32 26 ; <i32*> [#uses=2]
+ %32 = load i32, i32* %31, align 4 ; <i32> [#uses=4]
store i32 %32, i32* %.load66.SV.phi, align 4
- %33 = load i32* %31, align 4 ; <i32> [#uses=1]
+ %33 = load i32, i32* %31, align 4 ; <i32> [#uses=1]
%34 = icmp eq i32 %33, 0 ; <i1> [#uses=1]
br i1 %34, label %bb96, label %bb9
bb9: ; preds = %bb8.fragment
%Opq.sa.calc482 = xor i32 %Opq.sa.calc765, 163 ; <i32> [#uses=0]
- %35 = load %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
- %36 = getelementptr %struct.ImageParameters* %35, i64 0, i32 39 ; <%struct.Macroblock**> [#uses=1]
- %37 = load %struct.Macroblock** %36, align 8 ; <%struct.Macroblock*> [#uses=1]
- %38 = load i32* %.SV76.phi, align 4 ; <i32> [#uses=1]
+ %35 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
+ %36 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %35, i64 0, i32 39 ; <%struct.Macroblock**> [#uses=1]
+ %37 = load %struct.Macroblock*, %struct.Macroblock** %36, align 8 ; <%struct.Macroblock*> [#uses=1]
+ %38 = load i32, i32* %.SV76.phi, align 4 ; <i32> [#uses=1]
br label %bb9.fragment
bb9.fragment: ; preds = %bb9
%Opq.sa.calc999 = add i32 %Opq.sa.calc765, -44 ; <i32> [#uses=1]
%39 = sext i32 %38 to i64 ; <i64> [#uses=1]
- %40 = getelementptr %struct.Macroblock* %37, i64 %39, i32 20 ; <i32*> [#uses=1]
- %41 = load i32* %40, align 4 ; <i32> [#uses=1]
+ %40 = getelementptr %struct.Macroblock, %struct.Macroblock* %37, i64 %39, i32 20 ; <i32*> [#uses=1]
+ %41 = load i32, i32* %40, align 4 ; <i32> [#uses=1]
%42 = icmp eq i32 %41, 0 ; <i1> [#uses=1]
br i1 %42, label %bb96, label %bb11
@@ -156,42 +156,42 @@ bb13: ; preds = %bb5.fragment
%Opq.sa.calc490 = xor i32 %Opq.sa.calc873, 175 ; <i32> [#uses=1]
%Opq.sa.calc488 = sub i32 %Opq.sa.calc490, %Opq.sa.calc873 ; <i32> [#uses=1]
%Opq.sa.calc489 = sub i32 %Opq.sa.calc488, 133 ; <i32> [#uses=1]
- %46 = getelementptr %struct.Macroblock* %2, i64 %3, i32 25 ; <i32*> [#uses=1]
+ %46 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 25 ; <i32*> [#uses=1]
br label %meshBB360
bb13.fragment: ; preds = %meshBB360
%Opq.sa.calc870 = add i32 %Opq.sa.calc866, -129 ; <i32> [#uses=3]
- %47 = load i32* %.SV208.phi, align 8 ; <i32> [#uses=3]
+ %47 = load i32, i32* %.SV208.phi, align 8 ; <i32> [#uses=3]
br i1 %.load74.SV.phi, label %bb14, label %meshBB412
bb14: ; preds = %bb13.fragment
%Opq.sa.calc493 = add i32 %Opq.sa.calc870, 103 ; <i32> [#uses=1]
- %48 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=2]
+ %48 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=2]
store i32 %47, i32* %48, align 4
- %49 = getelementptr %struct.Macroblock* %2, i64 %3, i32 29 ; <i32*> [#uses=2]
+ %49 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 29 ; <i32*> [#uses=2]
br label %bb14.fragment
bb14.fragment: ; preds = %bb14
%Opq.sa.calc723 = sub i32 %Opq.sa.calc493, 117 ; <i32> [#uses=4]
- %50 = load i32* %49, align 8 ; <i32> [#uses=4]
+ %50 = load i32, i32* %49, align 8 ; <i32> [#uses=4]
store i32 %50, i32* %.SV52.phi1113, align 4
- %51 = load i32* %49, align 8 ; <i32> [#uses=1]
+ %51 = load i32, i32* %49, align 8 ; <i32> [#uses=1]
%52 = icmp eq i32 %51, 0 ; <i1> [#uses=1]
br i1 %52, label %meshBB, label %bb15
bb15: ; preds = %bb14.fragment
%Opq.sa.calc496 = sub i32 %Opq.sa.calc723, -8 ; <i32> [#uses=1]
- %53 = load %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
- %54 = getelementptr %struct.ImageParameters* %53, i64 0, i32 39 ; <%struct.Macroblock**> [#uses=1]
- %55 = load %struct.Macroblock** %54, align 8 ; <%struct.Macroblock*> [#uses=1]
- %56 = load i32* %.SV208.phi, align 8 ; <i32> [#uses=1]
+ %53 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
+ %54 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %53, i64 0, i32 39 ; <%struct.Macroblock**> [#uses=1]
+ %55 = load %struct.Macroblock*, %struct.Macroblock** %54, align 8 ; <%struct.Macroblock*> [#uses=1]
+ %56 = load i32, i32* %.SV208.phi, align 8 ; <i32> [#uses=1]
br label %meshBB324
bb15.fragment: ; preds = %meshBB324
%Opq.sa.calc925 = xor i32 %Opq.sa.calc750, 215 ; <i32> [#uses=2]
%57 = sext i32 %.SV214.phi to i64 ; <i64> [#uses=1]
- %58 = getelementptr %struct.Macroblock* %.SV212.phi, i64 %57, i32 20 ; <i32*> [#uses=1]
- %59 = load i32* %58, align 4 ; <i32> [#uses=1]
+ %58 = getelementptr %struct.Macroblock, %struct.Macroblock* %.SV212.phi, i64 %57, i32 20 ; <i32*> [#uses=1]
+ %59 = load i32, i32* %58, align 4 ; <i32> [#uses=1]
%60 = icmp eq i32 %59, 0 ; <i1> [#uses=1]
br i1 %60, label %bb16, label %bb96
@@ -209,14 +209,14 @@ bb16.fragment: ; preds = %bb16
bb19: ; preds = %meshBB412
%Opq.sa.calc502 = sub i32 %Opq.sa.calc932, -94 ; <i32> [#uses=0]
%63 = add i32 %.SV87.phi1030, 1 ; <i32> [#uses=1]
- %64 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=1]
+ %64 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=1]
br label %bb19.fragment
bb19.fragment: ; preds = %bb19
%Opq.sa.calc880 = xor i32 %Opq.sa.calc932, 246 ; <i32> [#uses=0]
store i32 %63, i32* %64, align 4
- %65 = getelementptr %struct.Macroblock* %2, i64 %3, i32 29 ; <i32*> [#uses=1]
- %66 = load i32* %65, align 8 ; <i32> [#uses=2]
+ %65 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 29 ; <i32*> [#uses=1]
+ %66 = load i32, i32* %65, align 8 ; <i32> [#uses=2]
store i32 %66, i32* %.SV52.phi1186, align 4
br label %bb96
@@ -227,8 +227,8 @@ bb21: ; preds = %meshBB392
bb23: ; preds = %meshBB360
%Opq.sa.calc509 = xor i32 %Opq.sa.calc866, 70 ; <i32> [#uses=1]
%Opq.sa.calc508 = sub i32 %Opq.sa.calc509, -19 ; <i32> [#uses=0]
- %67 = getelementptr %struct.Macroblock* %2, i64 %3, i32 20 ; <i32*> [#uses=1]
- %68 = load i32* %67, align 4 ; <i32> [#uses=1]
+ %67 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 20 ; <i32*> [#uses=1]
+ %68 = load i32, i32* %67, align 4 ; <i32> [#uses=1]
%69 = icmp eq i32 %68, 0 ; <i1> [#uses=1]
%70 = and i32 %curr_mb_nr, 1 ; <i32> [#uses=1]
%71 = icmp eq i32 %70, 0 ; <i1> [#uses=2]
@@ -236,9 +236,9 @@ bb23: ; preds = %meshBB360
bb23.fragment: ; preds = %bb23
%Opq.sa.calc847 = sub i32 %Opq.sa.calc866, -9 ; <i32> [#uses=2]
- %72 = getelementptr %struct.Macroblock* %2, i64 %3, i32 22 ; <i32*> [#uses=3]
- %73 = load i32* %72, align 4 ; <i32> [#uses=3]
- %74 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=3]
+ %72 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 22 ; <i32*> [#uses=3]
+ %73 = load i32, i32* %72, align 4 ; <i32> [#uses=3]
+ %74 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=3]
store i32 %73, i32* %74, align 4
br label %bb23.fragment182
@@ -246,10 +246,10 @@ bb23.fragment182: ; preds = %bb23.fragment
%Opq.sa.calc744 = xor i32 %Opq.sa.calc847, 152 ; <i32> [#uses=4]
%Opq.sa.calc742 = add i32 %Opq.sa.calc744, %Opq.sa.calc847 ; <i32> [#uses=1]
%Opq.sa.calc743 = add i32 %Opq.sa.calc742, -149 ; <i32> [#uses=2]
- %75 = getelementptr %struct.Macroblock* %2, i64 %3, i32 26 ; <i32*> [#uses=2]
- %76 = load i32* %75, align 4 ; <i32> [#uses=3]
+ %75 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 26 ; <i32*> [#uses=2]
+ %76 = load i32, i32* %75, align 4 ; <i32> [#uses=3]
store i32 %76, i32* %.SV52.phi1113, align 4
- %77 = load i32* %75, align 4 ; <i32> [#uses=1]
+ %77 = load i32, i32* %75, align 4 ; <i32> [#uses=1]
%78 = icmp ne i32 %77, 0 ; <i1> [#uses=2]
br i1 %69, label %meshBB344, label %meshBB432
@@ -264,10 +264,10 @@ bb25: ; preds = %bb24
bb26: ; preds = %bb25
%Opq.sa.calc519 = xor i32 %Opq.sa.calc515, 23 ; <i32> [#uses=2]
%Opq.sa.calc518 = xor i32 %Opq.sa.calc519, 84 ; <i32> [#uses=1]
- %79 = load %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
- %80 = getelementptr %struct.ImageParameters* %79, i64 0, i32 39 ; <%struct.Macroblock**> [#uses=1]
- %81 = load %struct.Macroblock** %80, align 8 ; <%struct.Macroblock*> [#uses=1]
- %82 = load i32* %.SV99.phi, align 4 ; <i32> [#uses=1]
+ %79 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
+ %80 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %79, i64 0, i32 39 ; <%struct.Macroblock**> [#uses=1]
+ %81 = load %struct.Macroblock*, %struct.Macroblock** %80, align 8 ; <%struct.Macroblock*> [#uses=1]
+ %82 = load i32, i32* %.SV99.phi, align 4 ; <i32> [#uses=1]
br label %meshBB340
bb26.fragment: ; preds = %meshBB340
@@ -275,8 +275,8 @@ bb26.fragment: ; preds = %meshBB340
%Opq.sa.calc916 = add i32 %Opq.sa.calc918, %Opq.sa.calc754 ; <i32> [#uses=1]
%Opq.sa.calc917 = add i32 %Opq.sa.calc916, -237 ; <i32> [#uses=1]
%83 = sext i32 %.SV230.phi to i64 ; <i64> [#uses=1]
- %84 = getelementptr %struct.Macroblock* %.SV228.phi, i64 %83, i32 20 ; <i32*> [#uses=1]
- %85 = load i32* %84, align 4 ; <i32> [#uses=1]
+ %84 = getelementptr %struct.Macroblock, %struct.Macroblock* %.SV228.phi, i64 %83, i32 20 ; <i32*> [#uses=1]
+ %85 = load i32, i32* %84, align 4 ; <i32> [#uses=1]
%86 = icmp eq i32 %85, 0 ; <i1> [#uses=1]
br i1 %86, label %meshBB420, label %meshBB356
@@ -308,17 +308,17 @@ bb32: ; preds = %bb24
bb33: ; preds = %bb32
%Opq.sa.calc534 = sub i32 %Opq.sa.calc512, -75 ; <i32> [#uses=2]
- %92 = load %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
- %93 = getelementptr %struct.ImageParameters* %92, i64 0, i32 39 ; <%struct.Macroblock**> [#uses=1]
- %94 = load %struct.Macroblock** %93, align 8 ; <%struct.Macroblock*> [#uses=1]
- %95 = load i32* %.SV99.phi, align 4 ; <i32> [#uses=1]
+ %92 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
+ %93 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %92, i64 0, i32 39 ; <%struct.Macroblock**> [#uses=1]
+ %94 = load %struct.Macroblock*, %struct.Macroblock** %93, align 8 ; <%struct.Macroblock*> [#uses=1]
+ %95 = load i32, i32* %.SV99.phi, align 4 ; <i32> [#uses=1]
br label %bb33.fragment
bb33.fragment: ; preds = %bb33
%Opq.sa.calc712 = add i32 %Opq.sa.calc534, -109 ; <i32> [#uses=3]
%96 = sext i32 %95 to i64 ; <i64> [#uses=1]
- %97 = getelementptr %struct.Macroblock* %94, i64 %96, i32 20 ; <i32*> [#uses=1]
- %98 = load i32* %97, align 4 ; <i32> [#uses=1]
+ %97 = getelementptr %struct.Macroblock, %struct.Macroblock* %94, i64 %96, i32 20 ; <i32*> [#uses=1]
+ %98 = load i32, i32* %97, align 4 ; <i32> [#uses=1]
%99 = icmp eq i32 %98, 0 ; <i1> [#uses=1]
br i1 %99, label %bb34, label %meshBB
@@ -372,17 +372,17 @@ bb40: ; preds = %bb39
bb41: ; preds = %meshBB336
%Opq.sa.calc557 = sub i32 %Opq.sa.calc979, 143 ; <i32> [#uses=1]
- %108 = load %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
- %109 = getelementptr %struct.ImageParameters* %108, i64 0, i32 39 ; <%struct.Macroblock**> [#uses=1]
- %110 = load %struct.Macroblock** %109, align 8 ; <%struct.Macroblock*> [#uses=1]
- %111 = load i32* %.SV99.phi1128, align 4 ; <i32> [#uses=1]
+ %108 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
+ %109 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %108, i64 0, i32 39 ; <%struct.Macroblock**> [#uses=1]
+ %110 = load %struct.Macroblock*, %struct.Macroblock** %109, align 8 ; <%struct.Macroblock*> [#uses=1]
+ %111 = load i32, i32* %.SV99.phi1128, align 4 ; <i32> [#uses=1]
br label %bb41.fragment
bb41.fragment: ; preds = %bb41
%Opq.sa.calc987 = xor i32 %Opq.sa.calc557, 213 ; <i32> [#uses=4]
%112 = sext i32 %111 to i64 ; <i64> [#uses=1]
- %113 = getelementptr %struct.Macroblock* %110, i64 %112, i32 20 ; <i32*> [#uses=1]
- %114 = load i32* %113, align 4 ; <i32> [#uses=1]
+ %113 = getelementptr %struct.Macroblock, %struct.Macroblock* %110, i64 %112, i32 20 ; <i32*> [#uses=1]
+ %114 = load i32, i32* %113, align 4 ; <i32> [#uses=1]
%115 = icmp eq i32 %114, 0 ; <i1> [#uses=1]
br i1 %115, label %bb42, label %bb96
@@ -415,17 +415,17 @@ bb48: ; preds = %bb39
bb49: ; preds = %bb48
%Opq.sa.calc572 = add i32 %Opq.sa.calc798, 84 ; <i32> [#uses=0]
- %122 = load %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
- %123 = getelementptr %struct.ImageParameters* %122, i64 0, i32 39 ; <%struct.Macroblock**> [#uses=1]
- %124 = load %struct.Macroblock** %123, align 8 ; <%struct.Macroblock*> [#uses=1]
- %125 = load i32* %.SV99.phi1037, align 4 ; <i32> [#uses=1]
+ %122 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
+ %123 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %122, i64 0, i32 39 ; <%struct.Macroblock**> [#uses=1]
+ %124 = load %struct.Macroblock*, %struct.Macroblock** %123, align 8 ; <%struct.Macroblock*> [#uses=1]
+ %125 = load i32, i32* %.SV99.phi1037, align 4 ; <i32> [#uses=1]
br label %bb49.fragment
bb49.fragment: ; preds = %bb49
%Opq.sa.calc860 = sub i32 %Opq.sa.calc569, 114 ; <i32> [#uses=5]
%126 = sext i32 %125 to i64 ; <i64> [#uses=1]
- %127 = getelementptr %struct.Macroblock* %124, i64 %126, i32 20 ; <i32*> [#uses=1]
- %128 = load i32* %127, align 4 ; <i32> [#uses=1]
+ %127 = getelementptr %struct.Macroblock, %struct.Macroblock* %124, i64 %126, i32 20 ; <i32*> [#uses=1]
+ %128 = load i32, i32* %127, align 4 ; <i32> [#uses=1]
%129 = icmp eq i32 %128, 0 ; <i1> [#uses=1]
br i1 %129, label %bb50, label %meshBB380
@@ -484,8 +484,8 @@ bb58: ; preds = %bb56.fragment
bb59: ; preds = %bb58
%Opq.sa.calc599 = add i32 %Opq.sa.calc1002, 151 ; <i32> [#uses=0]
- %141 = getelementptr %struct.Macroblock* %2, i64 %3, i32 20 ; <i32*> [#uses=1]
- %142 = load i32* %141, align 4 ; <i32> [#uses=1]
+ %141 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 20 ; <i32*> [#uses=1]
+ %142 = load i32, i32* %141, align 4 ; <i32> [#uses=1]
br label %bb59.fragment
bb59.fragment: ; preds = %bb59
@@ -501,40 +501,40 @@ bb60: ; preds = %bb59.fragment
bb61: ; preds = %bb60
%Opq.sa.calc605 = xor i32 %Opq.sa.calc731, 57 ; <i32> [#uses=1]
- %146 = getelementptr %struct.Macroblock* %2, i64 %3, i32 23 ; <i32*> [#uses=2]
- %147 = load i32* %146, align 8 ; <i32> [#uses=3]
- %148 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=3]
+ %146 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 23 ; <i32*> [#uses=2]
+ %147 = load i32, i32* %146, align 8 ; <i32> [#uses=3]
+ %148 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=3]
br label %bb61.fragment
bb61.fragment: ; preds = %bb61
%Opq.sa.calc700 = sub i32 %Opq.sa.calc605, 108 ; <i32> [#uses=3]
store i32 %147, i32* %148, align 4
- %149 = getelementptr %struct.Macroblock* %2, i64 %3, i32 27 ; <i32*> [#uses=4]
- %150 = load i32* %149, align 8 ; <i32> [#uses=1]
+ %149 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 27 ; <i32*> [#uses=4]
+ %150 = load i32, i32* %149, align 8 ; <i32> [#uses=1]
%151 = icmp eq i32 %150, 0 ; <i1> [#uses=1]
br i1 %151, label %bb65, label %bb62
bb62: ; preds = %bb61.fragment
%Opq.sa.calc608 = add i32 %Opq.sa.calc700, -94 ; <i32> [#uses=1]
- %152 = load %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=2]
- %153 = getelementptr %struct.ImageParameters* %152, i64 0, i32 45 ; <i32*> [#uses=1]
- %154 = load i32* %153, align 4 ; <i32> [#uses=1]
+ %152 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=2]
+ %153 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %152, i64 0, i32 45 ; <i32*> [#uses=1]
+ %154 = load i32, i32* %153, align 4 ; <i32> [#uses=1]
%155 = icmp eq i32 %154, 1 ; <i1> [#uses=1]
br i1 %155, label %bb63, label %bb64
bb63: ; preds = %bb62
%Opq.sa.calc611 = add i32 %Opq.sa.calc700, -101 ; <i32> [#uses=2]
- %156 = getelementptr %struct.ImageParameters* %152, i64 0, i32 39 ; <%struct.Macroblock**> [#uses=1]
- %157 = load %struct.Macroblock** %156, align 8 ; <%struct.Macroblock*> [#uses=1]
- %158 = load i32* %146, align 8 ; <i32> [#uses=1]
+ %156 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %152, i64 0, i32 39 ; <%struct.Macroblock**> [#uses=1]
+ %157 = load %struct.Macroblock*, %struct.Macroblock** %156, align 8 ; <%struct.Macroblock*> [#uses=1]
+ %158 = load i32, i32* %146, align 8 ; <i32> [#uses=1]
br label %meshBB452
bb63.fragment: ; preds = %meshBB452
%Opq.sa.calc891 = add i32 %Opq.link.mask823, 18 ; <i32> [#uses=2]
%Opq.sa.calc890 = add i32 %Opq.sa.calc891, -3 ; <i32> [#uses=2]
%159 = sext i32 %.SV266.phi to i64 ; <i64> [#uses=1]
- %160 = getelementptr %struct.Macroblock* %.SV264.phi, i64 %159, i32 20 ; <i32*> [#uses=1]
- %161 = load i32* %160, align 4 ; <i32> [#uses=1]
+ %160 = getelementptr %struct.Macroblock, %struct.Macroblock* %.SV264.phi, i64 %159, i32 20 ; <i32*> [#uses=1]
+ %161 = load i32, i32* %160, align 4 ; <i32> [#uses=1]
%162 = icmp eq i32 %161, 0 ; <i1> [#uses=1]
br i1 %162, label %bb64, label %meshBB456
@@ -562,7 +562,7 @@ bb65: ; preds = %meshBB456, %bb64, %bb61.fragment
%Opq.link.SV618.phi = phi i32 [ %Opq.sa.calc816, %meshBB456 ], [ %Opq.sa.calc700, %bb61.fragment ], [ %Opq.sa.calc614, %bb64 ] ; <i32> [#uses=1]
%Opq.link.mask620 = and i32 %Opq.link.SV618.phi, 40 ; <i32> [#uses=1]
%Opq.sa.calc617 = add i32 %Opq.link.mask620, -35 ; <i32> [#uses=2]
- %164 = load i32* %.SV152.phi1058, align 8 ; <i32> [#uses=1]
+ %164 = load i32, i32* %.SV152.phi1058, align 8 ; <i32> [#uses=1]
br label %meshBB436
bb65.fragment: ; preds = %meshBB436
@@ -573,7 +573,7 @@ bb65.fragment: ; preds = %meshBB436
bb66: ; preds = %bb60
%Opq.sa.calc621 = add i32 %Opq.sa.calc602, -217 ; <i32> [#uses=1]
%165 = add i32 %curr_mb_nr, -1 ; <i32> [#uses=1]
- %166 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=1]
+ %166 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=1]
br label %meshBB420
bb66.fragment: ; preds = %meshBB420
@@ -585,45 +585,45 @@ bb66.fragment: ; preds = %meshBB420
bb68: ; preds = %bb59.fragment
%Opq.sa.calc624 = sub i32 %Opq.sa.calc731, 229 ; <i32> [#uses=3]
- %167 = getelementptr %struct.Macroblock* %2, i64 %3, i32 23 ; <i32*> [#uses=1]
+ %167 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 23 ; <i32*> [#uses=1]
br label %meshBB344
bb68.fragment: ; preds = %meshBB344
%Opq.sa.calc784 = sub i32 %Opq.link.mask722, 3 ; <i32> [#uses=5]
- %168 = load i32* %.SV274.phi, align 8 ; <i32> [#uses=3]
+ %168 = load i32, i32* %.SV274.phi, align 8 ; <i32> [#uses=3]
br i1 %.load144.SV.phi, label %bb69, label %meshBB412
bb69: ; preds = %bb68.fragment
%Opq.sa.calc627 = add i32 %Opq.sa.calc784, 163 ; <i32> [#uses=0]
- %169 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=2]
+ %169 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=2]
store i32 %168, i32* %169, align 4
- %170 = getelementptr %struct.Macroblock* %2, i64 %3, i32 27 ; <i32*> [#uses=2]
+ %170 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 27 ; <i32*> [#uses=2]
br label %bb69.fragment
bb69.fragment: ; preds = %bb69
%Opq.sa.calc996 = sub i32 %Opq.sa.calc784, -9 ; <i32> [#uses=3]
%Opq.sa.calc994 = sub i32 %Opq.sa.calc996, %Opq.sa.calc784 ; <i32> [#uses=1]
%Opq.sa.calc995 = sub i32 %Opq.sa.calc994, 3 ; <i32> [#uses=2]
- %171 = load i32* %170, align 8 ; <i32> [#uses=3]
+ %171 = load i32, i32* %170, align 8 ; <i32> [#uses=3]
store i32 %171, i32* %.SV52.phi1170, align 4
- %172 = load i32* %170, align 8 ; <i32> [#uses=1]
+ %172 = load i32, i32* %170, align 8 ; <i32> [#uses=1]
%173 = icmp eq i32 %172, 0 ; <i1> [#uses=1]
br i1 %173, label %meshBB396, label %meshBB400
bb70: ; preds = %meshBB400
%Opq.sa.calc630 = add i32 %Opq.sa.calc824, -203 ; <i32> [#uses=2]
- %174 = load %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
- %175 = getelementptr %struct.ImageParameters* %174, i64 0, i32 39 ; <%struct.Macroblock**> [#uses=1]
- %176 = load %struct.Macroblock** %175, align 8 ; <%struct.Macroblock*> [#uses=1]
- %177 = load i32* %.SV156.phi, align 8 ; <i32> [#uses=1]
+ %174 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
+ %175 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %174, i64 0, i32 39 ; <%struct.Macroblock**> [#uses=1]
+ %176 = load %struct.Macroblock*, %struct.Macroblock** %175, align 8 ; <%struct.Macroblock*> [#uses=1]
+ %177 = load i32, i32* %.SV156.phi, align 8 ; <i32> [#uses=1]
br label %meshBB428
bb70.fragment: ; preds = %meshBB428
%Opq.sa.calc739 = xor i32 %Opq.sa.calc897, 213 ; <i32> [#uses=2]
%Opq.sa.calc738 = sub i32 %Opq.sa.calc739, 1 ; <i32> [#uses=2]
%178 = sext i32 %.SV280.phi to i64 ; <i64> [#uses=1]
- %179 = getelementptr %struct.Macroblock* %.SV278.phi, i64 %178, i32 20 ; <i32*> [#uses=1]
- %180 = load i32* %179, align 4 ; <i32> [#uses=1]
+ %179 = getelementptr %struct.Macroblock, %struct.Macroblock* %.SV278.phi, i64 %178, i32 20 ; <i32*> [#uses=1]
+ %180 = load i32, i32* %179, align 4 ; <i32> [#uses=1]
%181 = icmp eq i32 %180, 0 ; <i1> [#uses=1]
br i1 %181, label %meshBB452, label %meshBB356
@@ -641,14 +641,14 @@ bb71.fragment: ; preds = %meshBB352
bb74: ; preds = %meshBB412
%Opq.sa.calc636 = xor i32 %Opq.sa.calc932, 233 ; <i32> [#uses=1]
%184 = add i32 %.SV158.phi1063, 1 ; <i32> [#uses=1]
- %185 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=1]
+ %185 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=1]
br label %bb74.fragment
bb74.fragment: ; preds = %bb74
%Opq.sa.calc1011 = sub i32 %Opq.sa.calc636, -19 ; <i32> [#uses=0]
store i32 %184, i32* %185, align 4
- %186 = getelementptr %struct.Macroblock* %2, i64 %3, i32 27 ; <i32*> [#uses=1]
- %187 = load i32* %186, align 8 ; <i32> [#uses=2]
+ %186 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 27 ; <i32*> [#uses=1]
+ %187 = load i32, i32* %186, align 8 ; <i32> [#uses=2]
store i32 %187, i32* %.SV52.phi1186, align 4
br label %bb96
@@ -660,23 +660,23 @@ bb76: ; preds = %bb58
bb77: ; preds = %bb76
%Opq.sa.calc643 = add i32 %Opq.sa.calc640, 2 ; <i32> [#uses=2]
- %189 = load %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
- %190 = getelementptr %struct.ImageParameters* %189, i64 0, i32 45 ; <i32*> [#uses=1]
- %191 = load i32* %190, align 4 ; <i32> [#uses=1]
+ %189 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
+ %190 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %189, i64 0, i32 45 ; <i32*> [#uses=1]
+ %191 = load i32, i32* %190, align 4 ; <i32> [#uses=1]
%192 = icmp eq i32 %191, 2 ; <i1> [#uses=1]
br i1 %192, label %meshBB416, label %bb79
bb78: ; preds = %meshBB416
%Opq.sa.calc647 = xor i32 %Opq.sa.calc971, 25 ; <i32> [#uses=2]
%Opq.sa.calc646 = sub i32 %Opq.sa.calc647, 29 ; <i32> [#uses=0]
- %193 = getelementptr %struct.Macroblock* %2, i64 %3, i32 23 ; <i32*> [#uses=1]
- %194 = load i32* %193, align 8 ; <i32> [#uses=1]
+ %193 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 23 ; <i32*> [#uses=1]
+ %194 = load i32, i32* %193, align 8 ; <i32> [#uses=1]
%195 = add i32 %194, 1 ; <i32> [#uses=1]
br label %bb78.fragment
bb78.fragment: ; preds = %bb78
%Opq.sa.calc850 = sub i32 %Opq.sa.calc647, -93 ; <i32> [#uses=0]
- %196 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=1]
+ %196 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=1]
store i32 %195, i32* %196, align 4
store i32 1, i32* %.SV52.phi1200, align 4
%197 = add i32 %yN, -1 ; <i32> [#uses=1]
@@ -691,7 +691,7 @@ bb79: ; preds = %bb77, %bb76
bb81: ; preds = %meshBB456
%Opq.sa.calc655 = add i32 %Opq.sa.calc816, 56 ; <i32> [#uses=0]
- %198 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=1]
+ %198 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=1]
store i32 %curr_mb_nr, i32* %198, align 4
store i32 1, i32* %.SV52.phi1136, align 4
br label %bb98
@@ -702,8 +702,8 @@ bb83: ; preds = %bb56.fragment
bb84: ; preds = %bb83
%Opq.sa.calc661 = xor i32 %Opq.sa.calc658, 22 ; <i32> [#uses=1]
- %199 = getelementptr %struct.Macroblock* %2, i64 %3, i32 20 ; <i32*> [#uses=1]
- %200 = load i32* %199, align 4 ; <i32> [#uses=1]
+ %199 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 20 ; <i32*> [#uses=1]
+ %200 = load i32, i32* %199, align 4 ; <i32> [#uses=1]
br label %meshBB400
bb84.fragment: ; preds = %meshBB400
@@ -722,17 +722,17 @@ bb85: ; preds = %meshBB372
bb86: ; preds = %meshBB336
%Opq.sa.calc670 = sub i32 %Opq.sa.calc979, 35 ; <i32> [#uses=1]
- %204 = getelementptr %struct.Macroblock* %2, i64 %3, i32 24 ; <i32*> [#uses=1]
- %205 = load i32* %204, align 4 ; <i32> [#uses=1]
+ %204 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 24 ; <i32*> [#uses=1]
+ %205 = load i32, i32* %204, align 4 ; <i32> [#uses=1]
%206 = add i32 %205, 1 ; <i32> [#uses=1]
- %207 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=1]
+ %207 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=1]
br label %bb86.fragment
bb86.fragment: ; preds = %bb86
%Opq.sa.calc943 = xor i32 %Opq.sa.calc670, 123 ; <i32> [#uses=2]
store i32 %206, i32* %207, align 4
- %208 = getelementptr %struct.Macroblock* %2, i64 %3, i32 28 ; <i32*> [#uses=1]
- %209 = load i32* %208, align 4 ; <i32> [#uses=2]
+ %208 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 28 ; <i32*> [#uses=1]
+ %209 = load i32, i32* %208, align 4 ; <i32> [#uses=2]
store i32 %209, i32* %.SV52.phi1234, align 4
br label %meshBB424
@@ -744,43 +744,43 @@ bb87: ; preds = %meshBB440
bb89: ; preds = %bb84.fragment
%Opq.sa.calc677 = sub i32 %Opq.sa.calc802, -183 ; <i32> [#uses=1]
- %210 = getelementptr %struct.Macroblock* %2, i64 %3, i32 24 ; <i32*> [#uses=2]
+ %210 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 24 ; <i32*> [#uses=2]
br label %bb89.fragment
bb89.fragment: ; preds = %bb89
%Opq.sa.calc962 = add i32 %Opq.sa.calc677, -188 ; <i32> [#uses=3]
- %211 = load i32* %210, align 4 ; <i32> [#uses=3]
+ %211 = load i32, i32* %210, align 4 ; <i32> [#uses=3]
br i1 %203, label %bb90, label %meshBB408
bb90: ; preds = %bb89.fragment
%Opq.sa.calc680 = xor i32 %Opq.sa.calc962, 92 ; <i32> [#uses=1]
- %212 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=2]
+ %212 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=2]
store i32 %211, i32* %212, align 4
- %213 = getelementptr %struct.Macroblock* %2, i64 %3, i32 28 ; <i32*> [#uses=2]
+ %213 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 28 ; <i32*> [#uses=2]
br label %bb90.fragment
bb90.fragment: ; preds = %bb90
%Opq.sa.calc773 = sub i32 %Opq.sa.calc680, 60 ; <i32> [#uses=3]
%Opq.sa.calc772 = add i32 %Opq.sa.calc773, -25 ; <i32> [#uses=2]
- %214 = load i32* %213, align 4 ; <i32> [#uses=3]
+ %214 = load i32, i32* %213, align 4 ; <i32> [#uses=3]
store i32 %214, i32* %.SV52.phi1190, align 4
- %215 = load i32* %213, align 4 ; <i32> [#uses=1]
+ %215 = load i32, i32* %213, align 4 ; <i32> [#uses=1]
%216 = icmp eq i32 %215, 0 ; <i1> [#uses=1]
br i1 %216, label %meshBB416, label %meshBB368
bb91: ; preds = %meshBB368
%Opq.sa.calc683 = sub i32 %Opq.sa.calc768, -7 ; <i32> [#uses=0]
- %217 = load %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
- %218 = getelementptr %struct.ImageParameters* %217, i64 0, i32 39 ; <%struct.Macroblock**> [#uses=1]
- %219 = load %struct.Macroblock** %218, align 8 ; <%struct.Macroblock*> [#uses=1]
- %220 = load i32* %.SV170.phi, align 4 ; <i32> [#uses=1]
+ %217 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
+ %218 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %217, i64 0, i32 39 ; <%struct.Macroblock**> [#uses=1]
+ %219 = load %struct.Macroblock*, %struct.Macroblock** %218, align 8 ; <%struct.Macroblock*> [#uses=1]
+ %220 = load i32, i32* %.SV170.phi, align 4 ; <i32> [#uses=1]
br label %bb91.fragment
bb91.fragment: ; preds = %bb91
%Opq.sa.calc853 = xor i32 %Opq.sa.calc768, 8 ; <i32> [#uses=1]
%221 = sext i32 %220 to i64 ; <i64> [#uses=1]
- %222 = getelementptr %struct.Macroblock* %219, i64 %221, i32 20 ; <i32*> [#uses=1]
- %223 = load i32* %222, align 4 ; <i32> [#uses=1]
+ %222 = getelementptr %struct.Macroblock, %struct.Macroblock* %219, i64 %221, i32 20 ; <i32*> [#uses=1]
+ %223 = load i32, i32* %222, align 4 ; <i32> [#uses=1]
%224 = icmp eq i32 %223, 0 ; <i1> [#uses=1]
br i1 %224, label %bb92, label %bb96
@@ -798,14 +798,14 @@ bb92.fragment: ; preds = %bb92
bb95: ; preds = %meshBB408
%Opq.sa.calc689 = xor i32 %Opq.sa.calc912, 207 ; <i32> [#uses=3]
%227 = add i32 %.SV172.phi1074, 1 ; <i32> [#uses=1]
- %228 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=1]
+ %228 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=1]
br label %meshBB384
bb95.fragment: ; preds = %meshBB384
%Opq.sa.calc841 = sub i32 %Opq.sa.calc901, 76 ; <i32> [#uses=0]
store i32 %.SV306.phi, i32* %.SV308.phi, align 4
- %229 = getelementptr %struct.Macroblock* %.load.SV.phi, i64 %.load20.SV.phi, i32 28 ; <i32*> [#uses=1]
- %230 = load i32* %229, align 4 ; <i32> [#uses=2]
+ %229 = getelementptr %struct.Macroblock, %struct.Macroblock* %.load.SV.phi, i64 %.load20.SV.phi, i32 28 ; <i32*> [#uses=1]
+ %230 = load i32, i32* %229, align 4 ; <i32> [#uses=2]
store i32 %230, i32* %.load53.SV.phi, align 4
br label %bb96
@@ -826,13 +826,13 @@ bb97: ; preds = %meshBB424, %meshBB408, %meshBB352, %bb96, %bb21
%.SV70.phi1148 = phi i32 [ %.SV70.phi1195, %meshBB424 ], [ %.SV70.phi1215, %meshBB408 ], [ %.SV70.phi1138, %meshBB352 ], [ %.SV70.phi1085, %bb96 ], [ %.SV70.phi1027, %bb21 ] ; <i32> [#uses=1]
%yM.0.reg2mem.0.SV.phi = phi i32 [ -1, %meshBB424 ], [ -1, %meshBB408 ], [ -1, %meshBB352 ], [ %yM.0.SV.phi, %bb96 ], [ -1, %bb21 ] ; <i32> [#uses=1]
%Opq.sa.calc694 = xor i32 0, 243 ; <i32> [#uses=1]
- %232 = load %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
- %233 = getelementptr %struct.ImageParameters* %232, i64 0, i32 45 ; <i32*> [#uses=1]
+ %232 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
+ %233 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %232, i64 0, i32 45 ; <i32*> [#uses=1]
br label %bb97.fragment
bb97.fragment: ; preds = %bb97
%Opq.sa.calc928 = xor i32 %Opq.sa.calc694, 128 ; <i32> [#uses=1]
- %234 = load i32* %233, align 4 ; <i32> [#uses=1]
+ %234 = load i32, i32* %233, align 4 ; <i32> [#uses=1]
%235 = icmp eq i32 %234, 0 ; <i1> [#uses=1]
br i1 %235, label %return, label %bb98
@@ -843,25 +843,25 @@ bb98: ; preds = %meshBB444, %meshBB404, %bb97.fragment, %bb81, %bb78.fragment
%yM.0.reg2mem.1.SV.phi1068 = phi i32 [ %yN, %meshBB444 ], [ %yM.0.reg2mem.1.SV.phi1077, %meshBB404 ], [ %yM.0.reg2mem.0.SV.phi, %bb97.fragment ], [ %yN, %bb81 ], [ %197, %bb78.fragment ] ; <i32> [#uses=1]
%Opq.sa.calc695 = xor i32 0, 23 ; <i32> [#uses=2]
%236 = and i32 %.SV70.phi1091, %xN ; <i32> [#uses=1]
- %237 = getelementptr %struct.PixelPos* %pix, i64 0, i32 2 ; <i32*> [#uses=2]
+ %237 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 2 ; <i32*> [#uses=2]
store i32 %236, i32* %237, align 4
%238 = and i32 %yM.0.reg2mem.1.SV.phi1068, %.SV68.phi1092 ; <i32> [#uses=1]
- %239 = getelementptr %struct.PixelPos* %pix, i64 0, i32 3 ; <i32*> [#uses=2]
+ %239 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 3 ; <i32*> [#uses=2]
store i32 %238, i32* %239, align 4
- %240 = getelementptr %struct.PixelPos* %pix, i64 0, i32 5 ; <i32*> [#uses=1]
+ %240 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 5 ; <i32*> [#uses=1]
br label %meshBB376
bb98.fragment: ; preds = %meshBB376
%Opq.sa.calc1008 = sub i32 %Opq.link.mask911, 13 ; <i32> [#uses=1]
- %241 = getelementptr %struct.PixelPos* %pix, i64 0, i32 4 ; <i32*> [#uses=4]
- %242 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=1]
- %243 = load i32* %242, align 4 ; <i32> [#uses=1]
- %244 = load void (i32, i32*, i32*)** @get_mb_block_pos, align 8 ; <void (i32, i32*, i32*)*> [#uses=1]
+ %241 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 4 ; <i32*> [#uses=4]
+ %242 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1 ; <i32*> [#uses=1]
+ %243 = load i32, i32* %242, align 4 ; <i32> [#uses=1]
+ %244 = load void (i32, i32*, i32*)*, void (i32, i32*, i32*)** @get_mb_block_pos, align 8 ; <void (i32, i32*, i32*)*> [#uses=1]
tail call void %244(i32 %243, i32* %241, i32* %.SV317.phi) nounwind
- %245 = load i32* %241, align 4 ; <i32> [#uses=1]
- %246 = load %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
- %247 = getelementptr %struct.ImageParameters* %246, i64 0, i32 119, i64 %.load39.SV.phi, i64 0 ; <i32*> [#uses=1]
- %248 = load i32* %247, align 4 ; <i32> [#uses=1]
+ %245 = load i32, i32* %241, align 4 ; <i32> [#uses=1]
+ %246 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
+ %247 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %246, i64 0, i32 119, i64 %.load39.SV.phi, i64 0 ; <i32*> [#uses=1]
+ %248 = load i32, i32* %247, align 4 ; <i32> [#uses=1]
%249 = mul i32 %248, %245 ; <i32> [#uses=2]
store i32 %249, i32* %241, align 4
br label %bb98.fragment183
@@ -869,15 +869,15 @@ bb98.fragment: ; preds = %meshBB376
bb98.fragment183: ; preds = %bb98.fragment
%Opq.sa.calc777 = sub i32 %Opq.sa.calc1008, -158 ; <i32> [#uses=1]
%Opq.sa.calc776 = sub i32 %Opq.sa.calc777, 46 ; <i32> [#uses=0]
- %250 = load i32* %.SV317.phi, align 4 ; <i32> [#uses=1]
- %251 = load %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
- %252 = getelementptr %struct.ImageParameters* %251, i64 0, i32 119, i64 %.load39.SV.phi, i64 1 ; <i32*> [#uses=1]
- %253 = load i32* %252, align 4 ; <i32> [#uses=1]
+ %250 = load i32, i32* %.SV317.phi, align 4 ; <i32> [#uses=1]
+ %251 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8 ; <%struct.ImageParameters*> [#uses=1]
+ %252 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %251, i64 0, i32 119, i64 %.load39.SV.phi, i64 1 ; <i32*> [#uses=1]
+ %253 = load i32, i32* %252, align 4 ; <i32> [#uses=1]
%254 = mul i32 %253, %250 ; <i32> [#uses=1]
- %255 = load i32* %.SV313.phi, align 4 ; <i32> [#uses=1]
+ %255 = load i32, i32* %.SV313.phi, align 4 ; <i32> [#uses=1]
%256 = add i32 %255, %249 ; <i32> [#uses=1]
store i32 %256, i32* %241, align 4
- %257 = load i32* %.SV315.phi, align 4 ; <i32> [#uses=1]
+ %257 = load i32, i32* %.SV315.phi, align 4 ; <i32> [#uses=1]
%258 = add i32 %257, %254 ; <i32> [#uses=1]
store i32 %258, i32* %.SV317.phi, align 4
ret void
diff --git a/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll b/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
index a6ed74ba2ee9..5ddb5cae2966 100644
--- a/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
+++ b/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
@@ -4,18 +4,18 @@ target triple = "i386-apple-darwin9.0"
define void @cpuid(i32* %data) nounwind {
entry:
- %arrayidx = getelementptr i32* %data, i32 1 ; <i32*> [#uses=1]
- %arrayidx2 = getelementptr i32* %data, i32 2 ; <i32*> [#uses=1]
- %arrayidx4 = getelementptr i32* %data, i32 3 ; <i32*> [#uses=1]
- %arrayidx6 = getelementptr i32* %data, i32 4 ; <i32*> [#uses=1]
- %arrayidx8 = getelementptr i32* %data, i32 5 ; <i32*> [#uses=1]
- %tmp9 = load i32* %arrayidx8 ; <i32> [#uses=1]
- %arrayidx11 = getelementptr i32* %data, i32 6 ; <i32*> [#uses=1]
- %tmp12 = load i32* %arrayidx11 ; <i32> [#uses=1]
- %arrayidx14 = getelementptr i32* %data, i32 7 ; <i32*> [#uses=1]
- %tmp15 = load i32* %arrayidx14 ; <i32> [#uses=1]
- %arrayidx17 = getelementptr i32* %data, i32 8 ; <i32*> [#uses=1]
- %tmp18 = load i32* %arrayidx17 ; <i32> [#uses=1]
+ %arrayidx = getelementptr i32, i32* %data, i32 1 ; <i32*> [#uses=1]
+ %arrayidx2 = getelementptr i32, i32* %data, i32 2 ; <i32*> [#uses=1]
+ %arrayidx4 = getelementptr i32, i32* %data, i32 3 ; <i32*> [#uses=1]
+ %arrayidx6 = getelementptr i32, i32* %data, i32 4 ; <i32*> [#uses=1]
+ %arrayidx8 = getelementptr i32, i32* %data, i32 5 ; <i32*> [#uses=1]
+ %tmp9 = load i32, i32* %arrayidx8 ; <i32> [#uses=1]
+ %arrayidx11 = getelementptr i32, i32* %data, i32 6 ; <i32*> [#uses=1]
+ %tmp12 = load i32, i32* %arrayidx11 ; <i32> [#uses=1]
+ %arrayidx14 = getelementptr i32, i32* %data, i32 7 ; <i32*> [#uses=1]
+ %tmp15 = load i32, i32* %arrayidx14 ; <i32> [#uses=1]
+ %arrayidx17 = getelementptr i32, i32* %data, i32 8 ; <i32*> [#uses=1]
+ %tmp18 = load i32, i32* %arrayidx17 ; <i32> [#uses=1]
%0 = call i32 asm "cpuid", "={ax},=*{bx},=*{cx},=*{dx},{ax},{bx},{cx},{dx},~{dirflag},~{fpsr},~{flags}"(i32* %arrayidx2, i32* %arrayidx4, i32* %arrayidx6, i32 %tmp9, i32 %tmp12, i32 %tmp15, i32 %tmp18) nounwind ; <i32> [#uses=1]
store i32 %0, i32* %arrayidx
ret void
diff --git a/test/CodeGen/X86/2009-04-29-LinearScanBug.ll b/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
index 2fbf7aa5ed1a..b4d202c168d6 100644
--- a/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
+++ b/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
@@ -105,17 +105,17 @@
define fastcc i32 @pf_state_compare_ext_gwy(%struct.pf_state_key* nocapture %a, %struct.pf_state_key* nocapture %b) nounwind optsize ssp {
entry:
%0 = zext i8 0 to i32 ; <i32> [#uses=2]
- %1 = load i8* null, align 1 ; <i8> [#uses=2]
+ %1 = load i8, i8* null, align 1 ; <i8> [#uses=2]
%2 = zext i8 %1 to i32 ; <i32> [#uses=1]
%3 = sub i32 %0, %2 ; <i32> [#uses=1]
%4 = icmp eq i8 0, %1 ; <i1> [#uses=1]
br i1 %4, label %bb1, label %bb79
bb1: ; preds = %entry
- %5 = load i8* null, align 4 ; <i8> [#uses=2]
+ %5 = load i8, i8* null, align 4 ; <i8> [#uses=2]
%6 = zext i8 %5 to i32 ; <i32> [#uses=2]
- %7 = getelementptr %struct.pf_state_key* %b, i32 0, i32 3 ; <i8*> [#uses=1]
- %8 = load i8* %7, align 4 ; <i8> [#uses=2]
+ %7 = getelementptr %struct.pf_state_key, %struct.pf_state_key* %b, i32 0, i32 3 ; <i8*> [#uses=1]
+ %8 = load i8, i8* %7, align 4 ; <i8> [#uses=2]
%9 = zext i8 %8 to i32 ; <i32> [#uses=1]
%10 = sub i32 %6, %9 ; <i32> [#uses=1]
%11 = icmp eq i8 %5, %8 ; <i1> [#uses=1]
@@ -132,32 +132,32 @@ bb3: ; preds = %bb1
]
bb4: ; preds = %bb3, %bb3
- %12 = load i16* null, align 4 ; <i16> [#uses=1]
+ %12 = load i16, i16* null, align 4 ; <i16> [#uses=1]
%13 = zext i16 %12 to i32 ; <i32> [#uses=1]
%14 = sub i32 0, %13 ; <i32> [#uses=1]
br i1 false, label %bb23, label %bb79
bb6: ; preds = %bb3
- %15 = load i16* null, align 4 ; <i16> [#uses=1]
+ %15 = load i16, i16* null, align 4 ; <i16> [#uses=1]
%16 = zext i16 %15 to i32 ; <i32> [#uses=1]
%17 = sub i32 0, %16 ; <i32> [#uses=1]
ret i32 %17
bb10: ; preds = %bb3
- %18 = load i8* null, align 1 ; <i8> [#uses=2]
+ %18 = load i8, i8* null, align 1 ; <i8> [#uses=2]
%19 = zext i8 %18 to i32 ; <i32> [#uses=1]
%20 = sub i32 0, %19 ; <i32> [#uses=1]
%21 = icmp eq i8 0, %18 ; <i1> [#uses=1]
br i1 %21, label %bb12, label %bb79
bb12: ; preds = %bb10
- %22 = load i16* null, align 4 ; <i16> [#uses=1]
+ %22 = load i16, i16* null, align 4 ; <i16> [#uses=1]
%23 = zext i16 %22 to i32 ; <i32> [#uses=1]
%24 = sub i32 0, %23 ; <i32> [#uses=1]
ret i32 %24
bb17: ; preds = %bb3
- %25 = load i8* null, align 1 ; <i8> [#uses=2]
+ %25 = load i8, i8* null, align 1 ; <i8> [#uses=2]
%26 = icmp eq i8 %25, 1 ; <i1> [#uses=1]
br i1 %26, label %bb18, label %bb23
@@ -166,16 +166,16 @@ bb18: ; preds = %bb17
br i1 %27, label %bb19, label %bb23
bb19: ; preds = %bb18
- %28 = load i16* null, align 4 ; <i16> [#uses=1]
+ %28 = load i16, i16* null, align 4 ; <i16> [#uses=1]
%29 = zext i16 %28 to i32 ; <i32> [#uses=1]
%30 = sub i32 0, %29 ; <i32> [#uses=1]
br i1 false, label %bb23, label %bb79
bb21: ; preds = %bb3
- %31 = getelementptr %struct.pf_state_key* %a, i32 0, i32 1, i32 1, i32 0 ; <i32*> [#uses=1]
- %32 = load i32* %31, align 4 ; <i32> [#uses=2]
- %33 = getelementptr %struct.pf_state_key* %b, i32 0, i32 1, i32 1, i32 0 ; <i32*> [#uses=1]
- %34 = load i32* %33, align 4 ; <i32> [#uses=2]
+ %31 = getelementptr %struct.pf_state_key, %struct.pf_state_key* %a, i32 0, i32 1, i32 1, i32 0 ; <i32*> [#uses=1]
+ %32 = load i32, i32* %31, align 4 ; <i32> [#uses=2]
+ %33 = getelementptr %struct.pf_state_key, %struct.pf_state_key* %b, i32 0, i32 1, i32 1, i32 0 ; <i32*> [#uses=1]
+ %34 = load i32, i32* %33, align 4 ; <i32> [#uses=2]
%35 = sub i32 %32, %34 ; <i32> [#uses=1]
%36 = icmp eq i32 %32, %34 ; <i1> [#uses=1]
br i1 %36, label %bb23, label %bb79
@@ -188,11 +188,11 @@ bb24: ; preds = %bb23
ret i32 1
bb70: ; preds = %bb23
- %37 = load i32 (%struct.pf_app_state*, %struct.pf_app_state*)** null, align 4 ; <i32 (%struct.pf_app_state*, %struct.pf_app_state*)*> [#uses=3]
+ %37 = load i32 (%struct.pf_app_state*, %struct.pf_app_state*)*, i32 (%struct.pf_app_state*, %struct.pf_app_state*)** null, align 4 ; <i32 (%struct.pf_app_state*, %struct.pf_app_state*)*> [#uses=3]
br i1 false, label %bb78, label %bb73
bb73: ; preds = %bb70
- %38 = load i32 (%struct.pf_app_state*, %struct.pf_app_state*)** null, align 4 ; <i32 (%struct.pf_app_state*, %struct.pf_app_state*)*> [#uses=2]
+ %38 = load i32 (%struct.pf_app_state*, %struct.pf_app_state*)*, i32 (%struct.pf_app_state*, %struct.pf_app_state*)** null, align 4 ; <i32 (%struct.pf_app_state*, %struct.pf_app_state*)*> [#uses=2]
%39 = icmp eq i32 (%struct.pf_app_state*, %struct.pf_app_state*)* %38, null ; <i1> [#uses=1]
br i1 %39, label %bb78, label %bb74
diff --git a/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll b/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
index e803d6b56369..c291fede98eb 100644
--- a/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
+++ b/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
@@ -71,7 +71,7 @@
define fastcc void @dropCell(%struct.MemPage* nocapture %pPage, i32 %idx, i32 %sz) nounwind ssp {
entry:
- %0 = load i8** null, align 8 ; <i8*> [#uses=4]
+ %0 = load i8*, i8** null, align 8 ; <i8*> [#uses=4]
%1 = or i32 0, 0 ; <i32> [#uses=1]
%2 = icmp slt i32 %sz, 4 ; <i1> [#uses=1]
%size_addr.0.i = select i1 %2, i32 4, i32 %sz ; <i32> [#uses=1]
@@ -83,18 +83,18 @@ bb3.i: ; preds = %bb3.i, %entry
br i1 %or.cond.i, label %bb5.i, label %bb3.i
bb5.i: ; preds = %bb3.i
- %4 = getelementptr i8* %0, i64 0 ; <i8*> [#uses=1]
+ %4 = getelementptr i8, i8* %0, i64 0 ; <i8*> [#uses=1]
store i8 0, i8* %4, align 1
- %5 = getelementptr i8* %0, i64 0 ; <i8*> [#uses=1]
+ %5 = getelementptr i8, i8* %0, i64 0 ; <i8*> [#uses=1]
store i8 0, i8* %5, align 1
%6 = add i32 %1, 2 ; <i32> [#uses=1]
%7 = zext i32 %6 to i64 ; <i64> [#uses=2]
- %8 = getelementptr i8* %0, i64 %7 ; <i8*> [#uses=1]
+ %8 = getelementptr i8, i8* %0, i64 %7 ; <i8*> [#uses=1]
%9 = lshr i32 %size_addr.0.i, 8 ; <i32> [#uses=1]
%10 = trunc i32 %9 to i8 ; <i8> [#uses=1]
store i8 %10, i8* %8, align 1
%.sum31.i = add i64 %7, 1 ; <i64> [#uses=1]
- %11 = getelementptr i8* %0, i64 %.sum31.i ; <i8*> [#uses=1]
+ %11 = getelementptr i8, i8* %0, i64 %.sum31.i ; <i8*> [#uses=1]
store i8 0, i8* %11, align 1
br label %bb11.outer.i
@@ -111,7 +111,7 @@ bb: ; preds = %bb12.i
br label %bb2
bb2: ; preds = %bb, %bb12.i
- %14 = getelementptr %struct.MemPage* %pPage, i64 0, i32 1 ; <i8*> [#uses=1]
+ %14 = getelementptr %struct.MemPage, %struct.MemPage* %pPage, i64 0, i32 1 ; <i8*> [#uses=1]
store i8 1, i8* %14, align 1
ret void
}
diff --git a/test/CodeGen/X86/2009-04-scale.ll b/test/CodeGen/X86/2009-04-scale.ll
index e4c756cfdd44..1fc5f2b234fe 100644
--- a/test/CodeGen/X86/2009-04-scale.ll
+++ b/test/CodeGen/X86/2009-04-scale.ll
@@ -8,13 +8,13 @@
define void @test() {
entry:
- %0 = load i32* null, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%1 = lshr i32 %0, 8 ; <i32> [#uses=1]
%2 = and i32 %1, 255 ; <i32> [#uses=1]
- %3 = getelementptr %struct.array* null, i32 0, i32 3 ; <[256 x %struct.pair]*> [#uses=1]
- %4 = getelementptr [256 x %struct.pair]* %3, i32 0, i32 %2 ; <%struct.pair*> [#uses=1]
- %5 = getelementptr %struct.pair* %4, i32 0, i32 1 ; <i64*> [#uses=1]
- %6 = load i64* %5, align 4 ; <i64> [#uses=1]
+ %3 = getelementptr %struct.array, %struct.array* null, i32 0, i32 3 ; <[256 x %struct.pair]*> [#uses=1]
+ %4 = getelementptr [256 x %struct.pair], [256 x %struct.pair]* %3, i32 0, i32 %2 ; <%struct.pair*> [#uses=1]
+ %5 = getelementptr %struct.pair, %struct.pair* %4, i32 0, i32 1 ; <i64*> [#uses=1]
+ %6 = load i64, i64* %5, align 4 ; <i64> [#uses=1]
%7 = xor i64 0, %6 ; <i64> [#uses=1]
%8 = xor i64 %7, 0 ; <i64> [#uses=1]
%9 = xor i64 %8, 0 ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll b/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
index fa240f64c300..dd073f09fd73 100644
--- a/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
+++ b/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
@@ -12,7 +12,7 @@ target triple = "i386-apple-darwin9.6"
define void @x() nounwind {
entry:
- tail call void asm sideeffect "1: $0", "i,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr (%struct.pv_cpu_ops* @pv_cpu_ops, i32 0, i32 1, i32 1)) nounwind
+ tail call void asm sideeffect "1: $0", "i,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr (%struct.pv_cpu_ops, %struct.pv_cpu_ops* @pv_cpu_ops, i32 0, i32 1, i32 1)) nounwind
tail call void asm sideeffect "2: $0", "i,~{dirflag},~{fpsr},~{flags}"(i32* @G) nounwind
ret void
}
diff --git a/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll b/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
index c2cd89c33ee8..e9d15583e562 100644
--- a/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
+++ b/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
@@ -12,7 +12,7 @@ entry:
br label %bb
bb: ; preds = %bb.i, %bb, %entry
- %2 = load volatile i32* @g_9, align 4 ; <i32> [#uses=2]
+ %2 = load volatile i32, i32* @g_9, align 4 ; <i32> [#uses=2]
%3 = icmp sgt i32 %2, 1 ; <i1> [#uses=1]
%4 = and i1 %3, %1 ; <i1> [#uses=1]
br i1 %4, label %bb.i, label %bb
diff --git a/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll b/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll
index 6e062fb25089..89cd24d7dcfe 100644
--- a/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll
+++ b/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll
@@ -7,7 +7,7 @@ entry:
%tmp5.i = extractelement <1 x i64> %a, i32 0
%tmp11 = bitcast i64 %tmp5.i to <1 x i64>
%tmp8 = extractelement <1 x i64> %tmp11, i32 0
- %call6 = call i32 (i64)* @foo(i64 %tmp8)
+ %call6 = call i32 (i64) @foo(i64 %tmp8)
ret i32 undef
}
diff --git a/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll b/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
index 1d1462075492..019d5dfb1fea 100644
--- a/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
+++ b/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
@@ -5,7 +5,7 @@ entry:
br label %bb14
bb14: ; preds = %bb
- %srcval16 = load i448* %P, align 8 ; <i448> [#uses=1]
+ %srcval16 = load i448, i448* %P, align 8 ; <i448> [#uses=1]
%tmp = zext i32 undef to i448 ; <i448> [#uses=1]
%tmp15 = shl i448 %tmp, 288 ; <i448> [#uses=1]
%mask = and i448 %srcval16, -2135987035423586845985235064014169866455883682256196619149693890381755748887481053010428711403521 ; <i448> [#uses=1]
diff --git a/test/CodeGen/X86/2009-05-30-ISelBug.ll b/test/CodeGen/X86/2009-05-30-ISelBug.ll
index fe04272082c9..e01fe9f89308 100644
--- a/test/CodeGen/X86/2009-05-30-ISelBug.ll
+++ b/test/CodeGen/X86/2009-05-30-ISelBug.ll
@@ -13,14 +13,14 @@ bb35.i.backedge.exitStub: ; preds = %bb54.i
bb54.i: ; preds = %newFuncRoot
%1 = zext i32 %.reload51 to i64 ; <i64> [#uses=1]
- %2 = getelementptr i32* %0, i64 %1 ; <i32*> [#uses=1]
- %3 = load i32* %2, align 4 ; <i32> [#uses=2]
+ %2 = getelementptr i32, i32* %0, i64 %1 ; <i32*> [#uses=1]
+ %3 = load i32, i32* %2, align 4 ; <i32> [#uses=2]
%4 = lshr i32 %3, 8 ; <i32> [#uses=1]
%5 = and i32 %3, 255 ; <i32> [#uses=1]
%6 = add i32 %5, 4 ; <i32> [#uses=1]
%7 = zext i32 %4 to i64 ; <i64> [#uses=1]
- %8 = getelementptr i32* %0, i64 %7 ; <i32*> [#uses=1]
- %9 = load i32* %8, align 4 ; <i32> [#uses=2]
+ %8 = getelementptr i32, i32* %0, i64 %7 ; <i32*> [#uses=1]
+ %9 = load i32, i32* %8, align 4 ; <i32> [#uses=2]
%10 = and i32 %9, 255 ; <i32> [#uses=1]
%11 = lshr i32 %9, 8 ; <i32> [#uses=1]
%12 = add i32 %c_nblock_used.2.i, 5 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2009-06-02-RewriterBug.ll b/test/CodeGen/X86/2009-06-02-RewriterBug.ll
index 779f9857de7f..6ce7af632ee1 100644
--- a/test/CodeGen/X86/2009-06-02-RewriterBug.ll
+++ b/test/CodeGen/X86/2009-06-02-RewriterBug.ll
@@ -11,14 +11,14 @@ bb.nph: ; preds = %entry
while.body: ; preds = %for.end, %bb.nph
%indvar2787 = phi i64 [ 0, %bb.nph ], [ %indvar.next2788, %for.end ] ; <i64> [#uses=2]
%tmp2791 = mul i64 %indvar2787, 44 ; <i64> [#uses=0]
- %ctg22996 = getelementptr i8* %in, i64 0 ; <i8*> [#uses=1]
+ %ctg22996 = getelementptr i8, i8* %in, i64 0 ; <i8*> [#uses=1]
%conv = zext i32 undef to i64 ; <i64> [#uses=1]
%conv11 = zext i32 undef to i64 ; <i64> [#uses=1]
- %tmp18 = load i32* undef ; <i32> [#uses=1]
+ %tmp18 = load i32, i32* undef ; <i32> [#uses=1]
%conv19 = zext i32 %tmp18 to i64 ; <i64> [#uses=1]
- %tmp30 = load i32* undef ; <i32> [#uses=1]
+ %tmp30 = load i32, i32* undef ; <i32> [#uses=1]
%conv31 = zext i32 %tmp30 to i64 ; <i64> [#uses=4]
- %ptrincdec3065 = load i8* null ; <i8> [#uses=1]
+ %ptrincdec3065 = load i8, i8* null ; <i8> [#uses=1]
%conv442709 = zext i8 %ptrincdec3065 to i64 ; <i64> [#uses=1]
%shl45 = shl i64 %conv442709, 16 ; <i64> [#uses=1]
%conv632707 = zext i8 undef to i64 ; <i64> [#uses=1]
@@ -68,10 +68,10 @@ while.body: ; preds = %for.end, %bb.nph
%add479 = add i64 %add473, %add441 ; <i64> [#uses=3]
%conv4932682 = zext i8 undef to i64 ; <i64> [#uses=1]
%shl494 = shl i64 %conv4932682, 16 ; <i64> [#uses=1]
- %ptrincdec4903012 = load i8* null ; <i8> [#uses=1]
+ %ptrincdec4903012 = load i8, i8* null ; <i8> [#uses=1]
%conv5032681 = zext i8 %ptrincdec4903012 to i64 ; <i64> [#uses=1]
%shl504 = shl i64 %conv5032681, 8 ; <i64> [#uses=1]
- %ptrincdec5003009 = load i8* null ; <i8> [#uses=1]
+ %ptrincdec5003009 = load i8, i8* null ; <i8> [#uses=1]
%conv5132680 = zext i8 %ptrincdec5003009 to i64 ; <i64> [#uses=1]
%or495 = or i64 %shl494, 0 ; <i64> [#uses=1]
%or505 = or i64 %or495, %conv5132680 ; <i64> [#uses=1]
@@ -91,10 +91,10 @@ while.body: ; preds = %for.end, %bb.nph
%xor575 = xor i64 %xor568, %or561 ; <i64> [#uses=1]
%add587 = add i64 %xor575, 0 ; <i64> [#uses=1]
%add593 = add i64 %add587, %add555 ; <i64> [#uses=1]
- %ptrincdec6043000 = load i8* null ; <i8> [#uses=1]
+ %ptrincdec6043000 = load i8, i8* null ; <i8> [#uses=1]
%conv6172676 = zext i8 %ptrincdec6043000 to i64 ; <i64> [#uses=1]
%shl618 = shl i64 %conv6172676, 8 ; <i64> [#uses=1]
- %ptrincdec6142997 = load i8* %ctg22996 ; <i8> [#uses=1]
+ %ptrincdec6142997 = load i8, i8* %ctg22996 ; <i8> [#uses=1]
%conv6272675 = zext i8 %ptrincdec6142997 to i64 ; <i64> [#uses=1]
%or619 = or i64 0, %conv6272675 ; <i64> [#uses=1]
%or628 = or i64 %or619, %shl618 ; <i64> [#uses=1]
@@ -106,7 +106,7 @@ while.body: ; preds = %for.end, %bb.nph
%xor700 = xor i64 0, %and699 ; <i64> [#uses=1]
%add701 = add i64 0, %xor700 ; <i64> [#uses=1]
%add707 = add i64 %add701, %add669 ; <i64> [#uses=4]
- %ptrincdec6242994 = load i8* null ; <i8> [#uses=1]
+ %ptrincdec6242994 = load i8, i8* null ; <i8> [#uses=1]
%conv7122673 = zext i8 %ptrincdec6242994 to i64 ; <i64> [#uses=1]
%shl713 = shl i64 %conv7122673, 24 ; <i64> [#uses=1]
%conv7412670 = zext i8 undef to i64 ; <i64> [#uses=1]
@@ -132,7 +132,7 @@ while.body: ; preds = %for.end, %bb.nph
%add821 = add i64 %add815, %add783 ; <i64> [#uses=1]
%add1160 = add i64 0, %add707 ; <i64> [#uses=0]
%add1157 = add i64 undef, undef ; <i64> [#uses=0]
- %ptrincdec11742940 = load i8* null ; <i8> [#uses=1]
+ %ptrincdec11742940 = load i8, i8* null ; <i8> [#uses=1]
%conv11872651 = zext i8 %ptrincdec11742940 to i64 ; <i64> [#uses=1]
%shl1188 = shl i64 %conv11872651, 8 ; <i64> [#uses=1]
%or1198 = or i64 0, %shl1188 ; <i64> [#uses=1]
@@ -164,26 +164,26 @@ entry:
br i1 undef, label %while.end, label %bb.nph
bb.nph: ; preds = %entry
- %arrayidx5 = getelementptr i32* %arr, i64 1 ; <i32*> [#uses=1]
- %arrayidx9 = getelementptr i32* %arr, i64 2 ; <i32*> [#uses=2]
- %arrayidx13 = getelementptr i32* %arr, i64 3 ; <i32*> [#uses=2]
- %arrayidx25 = getelementptr i32* %arr, i64 6 ; <i32*> [#uses=1]
- %arrayidx29 = getelementptr i32* %arr, i64 7 ; <i32*> [#uses=1]
+ %arrayidx5 = getelementptr i32, i32* %arr, i64 1 ; <i32*> [#uses=1]
+ %arrayidx9 = getelementptr i32, i32* %arr, i64 2 ; <i32*> [#uses=2]
+ %arrayidx13 = getelementptr i32, i32* %arr, i64 3 ; <i32*> [#uses=2]
+ %arrayidx25 = getelementptr i32, i32* %arr, i64 6 ; <i32*> [#uses=1]
+ %arrayidx29 = getelementptr i32, i32* %arr, i64 7 ; <i32*> [#uses=1]
br label %while.body
while.body: ; preds = %for.end, %bb.nph
- %tmp3 = load i32* %arr ; <i32> [#uses=2]
+ %tmp3 = load i32, i32* %arr ; <i32> [#uses=2]
%conv = zext i32 %tmp3 to i64 ; <i64> [#uses=1]
- %tmp10 = load i32* %arrayidx9 ; <i32> [#uses=1]
+ %tmp10 = load i32, i32* %arrayidx9 ; <i32> [#uses=1]
%conv11 = zext i32 %tmp10 to i64 ; <i64> [#uses=1]
- %tmp14 = load i32* %arrayidx13 ; <i32> [#uses=3]
+ %tmp14 = load i32, i32* %arrayidx13 ; <i32> [#uses=3]
%conv15 = zext i32 %tmp14 to i64 ; <i64> [#uses=2]
- %tmp18 = load i32* undef ; <i32> [#uses=2]
+ %tmp18 = load i32, i32* undef ; <i32> [#uses=2]
%conv19 = zext i32 %tmp18 to i64 ; <i64> [#uses=1]
%conv23 = zext i32 undef to i64 ; <i64> [#uses=1]
- %tmp26 = load i32* %arrayidx25 ; <i32> [#uses=1]
+ %tmp26 = load i32, i32* %arrayidx25 ; <i32> [#uses=1]
%conv27 = zext i32 %tmp26 to i64 ; <i64> [#uses=1]
- %tmp30 = load i32* %arrayidx29 ; <i32> [#uses=2]
+ %tmp30 = load i32, i32* %arrayidx29 ; <i32> [#uses=2]
%conv31 = zext i32 %tmp30 to i64 ; <i64> [#uses=5]
%shl72 = shl i64 %conv31, 26 ; <i64> [#uses=1]
%shr = lshr i64 %conv31, 6 ; <i64> [#uses=1]
@@ -203,7 +203,7 @@ while.body: ; preds = %for.end, %bb.nph
%add137 = add i64 %add131, %add99 ; <i64> [#uses=5]
%conv1422700 = zext i8 undef to i64 ; <i64> [#uses=1]
%shl143 = shl i64 %conv1422700, 24 ; <i64> [#uses=1]
- %ptrincdec1393051 = load i8* undef ; <i8> [#uses=1]
+ %ptrincdec1393051 = load i8, i8* undef ; <i8> [#uses=1]
%conv1512699 = zext i8 %ptrincdec1393051 to i64 ; <i64> [#uses=1]
%shl152 = shl i64 %conv1512699, 16 ; <i64> [#uses=1]
%conv1712697 = zext i8 undef to i64 ; <i64> [#uses=1]
@@ -283,7 +283,7 @@ for.body: ; preds = %for.cond
%add1427 = add i64 %add1392, %d.0 ; <i64> [#uses=1]
%add1424 = add i64 %xor1412, 0 ; <i64> [#uses=1]
%add1430 = add i64 %add1424, %add1392 ; <i64> [#uses=5]
- %tmp1438 = load i32* undef ; <i32> [#uses=1]
+ %tmp1438 = load i32, i32* undef ; <i32> [#uses=1]
%conv1439 = zext i32 %tmp1438 to i64 ; <i64> [#uses=4]
%shl1441 = shl i64 %conv1439, 25 ; <i64> [#uses=1]
%shr1444 = lshr i64 %conv1439, 7 ; <i64> [#uses=1]
@@ -302,13 +302,13 @@ for.body: ; preds = %for.cond
%shr1479 = lshr i64 %conv1464, 10 ; <i64> [#uses=1]
%xor1477 = xor i64 %or1476, %shr1479 ; <i64> [#uses=1]
%xor1480 = xor i64 %xor1477, %or1470 ; <i64> [#uses=1]
- %tmp1499 = load i32* null ; <i32> [#uses=1]
+ %tmp1499 = load i32, i32* null ; <i32> [#uses=1]
%conv1500 = zext i32 %tmp1499 to i64 ; <i64> [#uses=1]
%add1491 = add i64 %conv1500, 0 ; <i64> [#uses=1]
%add1501 = add i64 %add1491, %xor1455 ; <i64> [#uses=1]
%add1502 = add i64 %add1501, %xor1480 ; <i64> [#uses=1]
%conv1504 = and i64 %add1502, 4294967295 ; <i64> [#uses=1]
- %tmp1541 = load i32* undef ; <i32> [#uses=1]
+ %tmp1541 = load i32, i32* undef ; <i32> [#uses=1]
%conv1542 = zext i32 %tmp1541 to i64 ; <i64> [#uses=1]
%add1527 = add i64 %conv1542, %g.0 ; <i64> [#uses=1]
%add1536 = add i64 %add1527, 0 ; <i64> [#uses=1]
@@ -327,7 +327,7 @@ for.body: ; preds = %for.cond
%add1576 = add i64 %xor1564, 0 ; <i64> [#uses=1]
%add1582 = add i64 %add1576, %add1544 ; <i64> [#uses=3]
store i32 undef, i32* undef
- %tmp1693 = load i32* undef ; <i32> [#uses=1]
+ %tmp1693 = load i32, i32* undef ; <i32> [#uses=1]
%conv1694 = zext i32 %tmp1693 to i64 ; <i64> [#uses=1]
%add1679 = add i64 %conv1694, %f.0 ; <i64> [#uses=1]
%add1688 = add i64 %add1679, 0 ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
index 98b1e0ed2f42..8d387136da9c 100644
--- a/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
@@ -1,7 +1,8 @@
; RUN: llc -mtriple=x86_64-mingw32 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
; CHECK-NOT: -{{[1-9][0-9]*}}(%rsp)
-define x86_fp80 @a(i64 %x) nounwind readnone {
+define x86_64_win64cc x86_fp80 @a(i64 %x) nounwind readnone {
entry:
%conv = sitofp i64 %x to x86_fp80 ; <x86_fp80> [#uses=1]
ret x86_fp80 %conv
diff --git a/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll b/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
index 3dcc0d42e759..29795492d89c 100644
--- a/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
+++ b/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
@@ -17,7 +17,7 @@
define fastcc void @MinSize(%struct.rec* %x) nounwind {
entry:
- %tmp13 = load i8* undef, align 4 ; <i8> [#uses=3]
+ %tmp13 = load i8, i8* undef, align 4 ; <i8> [#uses=3]
%tmp14 = zext i8 %tmp13 to i32 ; <i32> [#uses=2]
switch i32 %tmp14, label %bb1109 [
i32 42, label %bb246
@@ -34,7 +34,7 @@ bb249: ; preds = %bb246
br i1 %tmp3240, label %bb974, label %bb269
bb269:
- %tmp3424 = getelementptr %struct.rec* %x, i32 0, i32 0, i32 0, i32 0, i32 1 ; <%struct.rec**> [#uses=0]
+ %tmp3424 = getelementptr %struct.rec, %struct.rec* %x, i32 0, i32 0, i32 0, i32 0, i32 1 ; <%struct.rec**> [#uses=0]
unreachable
bb974:
diff --git a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
deleted file mode 100644
index 3061dc2c8275..000000000000
--- a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+mmx,+sse2 | FileCheck %s
-
-; CHECK-NOT: movl
-
-define <8 x i8> @a(i8 zeroext %x) nounwind {
- %r = insertelement <8 x i8> undef, i8 %x, i32 0
- ret <8 x i8> %r
-}
-
diff --git a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
index ffbe02c71356..9c7eb6d633db 100644
--- a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
+++ b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
@@ -6,7 +6,7 @@ define <4 x i16> @a(i32* %x1) nounwind {
; CHECK-NEXT: movd %[[R]], %xmm0
; CHECK-NEXT: retl
- %x2 = load i32* %x1
+ %x2 = load i32, i32* %x1
%x3 = lshr i32 %x2, 1
%x = trunc i32 %x3 to i16
%r = insertelement <4 x i16> zeroinitializer, i16 %x, i32 0
@@ -20,7 +20,7 @@ define <8 x i16> @b(i32* %x1) nounwind {
; CHECK-NEXT: movd %e[[R]]x, %xmm0
; CHECK-NEXT: retl
- %x2 = load i32* %x1
+ %x2 = load i32, i32* %x1
%x3 = lshr i32 %x2, 1
%x = trunc i32 %x3 to i16
%r = insertelement <8 x i16> zeroinitializer, i16 %x, i32 0
@@ -34,7 +34,7 @@ define <8 x i8> @c(i32* %x1) nounwind {
; CHECK-NEXT: movd %e[[R]]x, %xmm0
; CHECK-NEXT: retl
- %x2 = load i32* %x1
+ %x2 = load i32, i32* %x1
%x3 = lshr i32 %x2, 1
%x = trunc i32 %x3 to i8
%r = insertelement <8 x i8> zeroinitializer, i8 %x, i32 0
@@ -48,7 +48,7 @@ define <16 x i8> @d(i32* %x1) nounwind {
; CHECK-NEXT: movd %e[[R]]x, %xmm0
; CHECK-NEXT: retl
- %x2 = load i32* %x1
+ %x2 = load i32, i32* %x1
%x3 = lshr i32 %x2, 1
%x = trunc i32 %x3 to i8
%r = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0
diff --git a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
deleted file mode 100644
index 66caedfc0692..000000000000
--- a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mcpu=corei7 -mattr=+mmx | grep movd | count 2
-
-define i64 @a(i32 %a, i32 %b) nounwind readnone {
-entry:
- %0 = insertelement <2 x i32> undef, i32 %a, i32 0 ; <<2 x i32>> [#uses=1]
- %1 = insertelement <2 x i32> %0, i32 %b, i32 1 ; <<2 x i32>> [#uses=1]
- %conv = bitcast <2 x i32> %1 to i64 ; <i64> [#uses=1]
- ret i64 %conv
-}
-
diff --git a/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll b/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
index 8ea70b428007..4c4552da16a5 100644
--- a/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
+++ b/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
@@ -3,7 +3,7 @@
define <4 x float> @f4523(<4 x float> %a,<4 x float> %b) nounwind {
entry:
-; CHECK: shufps $-28, %xmm
+; CHECK: shufps $228, %xmm
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4,i32
5,i32 2,i32 3>
ret <4 x float> %shuffle
diff --git a/test/CodeGen/X86/2009-07-15-CoalescerBug.ll b/test/CodeGen/X86/2009-07-15-CoalescerBug.ll
index eabaf775edef..beb57054dea2 100644
--- a/test/CodeGen/X86/2009-07-15-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-07-15-CoalescerBug.ll
@@ -237,7 +237,7 @@ bb1545: ; preds = %bb1544
br i1 undef, label %bb1563, label %bb1558
bb1558: ; preds = %bb1545
- %0 = load %struct.SV** undef ; <%struct.SV*> [#uses=1]
+ %0 = load %struct.SV*, %struct.SV** undef ; <%struct.SV*> [#uses=1]
%1 = bitcast %struct.SV* %0 to %struct.GV* ; <%struct.GV*> [#uses=5]
br i1 undef, label %bb1563, label %bb1559
diff --git a/test/CodeGen/X86/2009-07-20-CoalescerBug.ll b/test/CodeGen/X86/2009-07-20-CoalescerBug.ll
index e99edd60bd5e..1fa8da31b756 100644
--- a/test/CodeGen/X86/2009-07-20-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-07-20-CoalescerBug.ll
@@ -9,7 +9,7 @@ entry:
br i1 undef, label %return, label %if.end
if.end: ; preds = %entry
- %tmp35 = getelementptr %struct.re_pattern_buffer* %bufp, i64 0, i32 3 ; <i64*> [#uses=1]
+ %tmp35 = getelementptr %struct.re_pattern_buffer, %struct.re_pattern_buffer* %bufp, i64 0, i32 3 ; <i64*> [#uses=1]
store i64 %syntax, i64* %tmp35
store i32 undef, i32* undef
br i1 undef, label %if.then66, label %if.end102
@@ -80,7 +80,7 @@ while.cond979.preheader: ; preds = %if.then842, %land.lhs.true838, %cond.end834
%startoffset.0.ph = phi i32 [ 0, %cond.end834 ], [ 0, %land.lhs.true838 ], [ %conv851, %if.then842 ] ; <i32> [#uses=2]
%laststart.7.ph = phi i8* [ %laststart.2, %cond.end834 ], [ %laststart.2, %land.lhs.true838 ], [ %laststart.2, %if.then842 ] ; <i8*> [#uses=3]
%b.4.ph = phi i8* [ %b.1, %cond.end834 ], [ %b.1, %land.lhs.true838 ], [ %b.1, %if.then842 ] ; <i8*> [#uses=3]
- %ctg29688 = getelementptr i8* %b.4.ph, i64 6 ; <i8*> [#uses=1]
+ %ctg29688 = getelementptr i8, i8* %b.4.ph, i64 6 ; <i8*> [#uses=1]
br label %while.cond979
while.cond979: ; preds = %if.end1006, %while.cond979.preheader
@@ -102,21 +102,21 @@ while.end1088: ; preds = %while.cond979
if.then1091: ; preds = %while.end1088
store i8 undef, i8* undef
%idx.ext1132.pre = zext i32 %startoffset.0.ph to i64 ; <i64> [#uses=1]
- %add.ptr1133.pre = getelementptr i8* %laststart.7.ph, i64 %idx.ext1132.pre ; <i8*> [#uses=1]
+ %add.ptr1133.pre = getelementptr i8, i8* %laststart.7.ph, i64 %idx.ext1132.pre ; <i8*> [#uses=1]
%sub.ptr.lhs.cast1135.pre = ptrtoint i8* %add.ptr1133.pre to i64 ; <i64> [#uses=1]
br label %if.end1126
if.else1101: ; preds = %while.end1088
%cond1109 = select i1 undef, i32 18, i32 14 ; <i32> [#uses=1]
%idx.ext1112 = zext i32 %startoffset.0.ph to i64 ; <i64> [#uses=1]
- %add.ptr1113 = getelementptr i8* %laststart.7.ph, i64 %idx.ext1112 ; <i8*> [#uses=2]
+ %add.ptr1113 = getelementptr i8, i8* %laststart.7.ph, i64 %idx.ext1112 ; <i8*> [#uses=2]
%sub.ptr.rhs.cast1121 = ptrtoint i8* %add.ptr1113 to i64 ; <i64> [#uses=1]
call fastcc void @insert_op1(i32 %cond1109, i8* %add.ptr1113, i32 undef, i8* %b.4.ph) ssp
br label %if.end1126
if.end1126: ; preds = %if.else1101, %if.then1091
%sub.ptr.lhs.cast1135.pre-phi = phi i64 [ %sub.ptr.rhs.cast1121, %if.else1101 ], [ %sub.ptr.lhs.cast1135.pre, %if.then1091 ] ; <i64> [#uses=1]
- %add.ptr1128 = getelementptr i8* %b.4.ph, i64 3 ; <i8*> [#uses=1]
+ %add.ptr1128 = getelementptr i8, i8* %b.4.ph, i64 3 ; <i8*> [#uses=1]
%sub.ptr.rhs.cast1136 = ptrtoint i8* %add.ptr1128 to i64 ; <i64> [#uses=1]
%sub.ptr.sub1137 = sub i64 %sub.ptr.lhs.cast1135.pre-phi, %sub.ptr.rhs.cast1136 ; <i64> [#uses=1]
%sub.ptr.sub11378527 = trunc i64 %sub.ptr.sub1137 to i32 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll b/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll
index e83b3a7db592..045e89e15856 100644
--- a/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll
+++ b/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll
@@ -5,7 +5,7 @@
define fastcc i32 @bsGetUInt32() nounwind ssp {
entry:
- %bsBuff.promoted44 = load i32* @bsBuff ; <i32> [#uses=1]
+ %bsBuff.promoted44 = load i32, i32* @bsBuff ; <i32> [#uses=1]
%0 = add i32 0, -8 ; <i32> [#uses=1]
%1 = lshr i32 %bsBuff.promoted44, %0 ; <i32> [#uses=1]
%2 = shl i32 %1, 8 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll b/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
deleted file mode 100644
index 288eef4f6991..000000000000
--- a/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86-64
-; PR4669
-declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32)
-
-define <1 x i64> @test(i64 %t) {
-entry:
- %t1 = insertelement <1 x i64> undef, i64 %t, i32 0
- %t0 = bitcast <1 x i64> %t1 to x86_mmx
- %t2 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %t0, i32 48)
- %t3 = bitcast x86_mmx %t2 to <1 x i64>
- ret <1 x i64> %t3
-}
diff --git a/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll b/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
index 2080c0ae2e0f..a70861dcf849 100644
--- a/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
+++ b/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
@@ -43,12 +43,12 @@ entry:
br i1 %tobool, label %lor.lhs.false, label %if.then
lor.lhs.false: ; preds = %entry
- %tmp1 = load i8* @g_3 ; <i8> [#uses=1]
+ %tmp1 = load i8, i8* @g_3 ; <i8> [#uses=1]
%tobool3 = icmp eq i8 %tmp1, 0 ; <i1> [#uses=1]
br i1 %tobool3, label %return, label %if.then
if.then: ; preds = %lor.lhs.false, %entry
- %tmp4 = load i8* @g_3 ; <i8> [#uses=1]
+ %tmp4 = load i8, i8* @g_3 ; <i8> [#uses=1]
%conv5 = sext i8 %tmp4 to i32 ; <i32> [#uses=1]
ret i32 %conv5
@@ -93,12 +93,12 @@ entry:
br i1 %tobool, label %lor.lhs.false, label %if.then
lor.lhs.false: ; preds = %entry
- %tmp1 = load i8* @g_3 ; <i8> [#uses=1]
+ %tmp1 = load i8, i8* @g_3 ; <i8> [#uses=1]
%tobool3 = icmp eq i8 %tmp1, 0 ; <i1> [#uses=1]
br i1 %tobool3, label %return, label %if.then
if.then: ; preds = %lor.lhs.false, %entry
- %tmp4 = load i8* @g_3 ; <i8> [#uses=1]
+ %tmp4 = load i8, i8* @g_3 ; <i8> [#uses=1]
%conv5 = sext i8 %tmp4 to i32 ; <i32> [#uses=1]
ret i32 %conv5
diff --git a/test/CodeGen/X86/2009-08-06-inlineasm.ll b/test/CodeGen/X86/2009-08-06-inlineasm.ll
index f9b5f9e0b1fd..f548bc093026 100644
--- a/test/CodeGen/X86/2009-08-06-inlineasm.ll
+++ b/test/CodeGen/X86/2009-08-06-inlineasm.ll
@@ -19,12 +19,12 @@ if.then28: ; preds = %entry
if.end78: ; preds = %if.then28, %entry
%level.1 = phi i32 [ %tmp, %if.then28 ], [ 0, %entry ] ; <i32> [#uses=1]
- %add.ptr1 = getelementptr [64 x i16]* null, i32 0, i32 %qscale ; <i16*> [#uses=1]
- %add.ptr2 = getelementptr [64 x i16]* null, i32 1, i32 %qscale ; <i16*> [#uses=1]
- %add.ptr3 = getelementptr [64 x i16]* null, i32 2, i32 %qscale ; <i16*> [#uses=1]
- %add.ptr4 = getelementptr [64 x i16]* null, i32 3, i32 %qscale ; <i16*> [#uses=1]
- %add.ptr5 = getelementptr [64 x i16]* null, i32 4, i32 %qscale ; <i16*> [#uses=1]
- %add.ptr6 = getelementptr [64 x i16]* null, i32 5, i32 %qscale ; <i16*> [#uses=1]
+ %add.ptr1 = getelementptr [64 x i16], [64 x i16]* null, i32 0, i32 %qscale ; <i16*> [#uses=1]
+ %add.ptr2 = getelementptr [64 x i16], [64 x i16]* null, i32 1, i32 %qscale ; <i16*> [#uses=1]
+ %add.ptr3 = getelementptr [64 x i16], [64 x i16]* null, i32 2, i32 %qscale ; <i16*> [#uses=1]
+ %add.ptr4 = getelementptr [64 x i16], [64 x i16]* null, i32 3, i32 %qscale ; <i16*> [#uses=1]
+ %add.ptr5 = getelementptr [64 x i16], [64 x i16]* null, i32 4, i32 %qscale ; <i16*> [#uses=1]
+ %add.ptr6 = getelementptr [64 x i16], [64 x i16]* null, i32 5, i32 %qscale ; <i16*> [#uses=1]
%tmp1 = call i32 asm sideeffect "nop", "={ax},r,r,r,r,r,0,~{dirflag},~{fpsr},~{flags}"(i16* %add.ptr6, i16* %add.ptr5, i16* %add.ptr4, i16* %add.ptr3, i16* %add.ptr2, i16* %add.ptr1) nounwind ; <i32> [#uses=0]
ret i32 %level.1
}
diff --git a/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll b/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
index bf668e304b28..f24c3f8171bf 100644
--- a/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
+++ b/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
@@ -12,14 +12,14 @@ indexCheckBlock:
primitiveTextureFetchBlock: ; preds = %indexCheckBlock
%pointerArithmeticTmp = bitcast %0* %shaderExecutionStatePtr to i8* ; <i8*> [#uses=1]
- %pointerArithmeticTmp1 = getelementptr i8* %pointerArithmeticTmp, i64 1808 ; <i8*> [#uses=1]
+ %pointerArithmeticTmp1 = getelementptr i8, i8* %pointerArithmeticTmp, i64 1808 ; <i8*> [#uses=1]
%pointerArithmeticTmp2 = bitcast i8* %pointerArithmeticTmp1 to %1** ; <%1**> [#uses=1]
- %primitivePtr = load %1** %pointerArithmeticTmp2 ; <%1*> [#uses=1]
+ %primitivePtr = load %1*, %1** %pointerArithmeticTmp2 ; <%1*> [#uses=1]
%pointerArithmeticTmp3 = bitcast %1* %primitivePtr to i8* ; <i8*> [#uses=1]
- %pointerArithmeticTmp4 = getelementptr i8* %pointerArithmeticTmp3, i64 19408 ; <i8*> [#uses=1]
+ %pointerArithmeticTmp4 = getelementptr i8, i8* %pointerArithmeticTmp3, i64 19408 ; <i8*> [#uses=1]
%pointerArithmeticTmp5 = bitcast i8* %pointerArithmeticTmp4 to %1** ; <%1**> [#uses=1]
- %primitiveTexturePtr = getelementptr %1** %pointerArithmeticTmp5, i32 %index ; <%1**> [#uses=1]
- %primitiveTexturePtr6 = load %1** %primitiveTexturePtr ; <%1*> [#uses=2]
+ %primitiveTexturePtr = getelementptr %1*, %1** %pointerArithmeticTmp5, i32 %index ; <%1**> [#uses=1]
+ %primitiveTexturePtr6 = load %1*, %1** %primitiveTexturePtr ; <%1*> [#uses=2]
br label %textureCheckBlock
textureCheckBlock: ; preds = %primitiveTextureFetchBlock
@@ -29,9 +29,9 @@ textureCheckBlock: ; preds = %primitiveTextureFetchBlock
rhoCalculateBlock: ; preds = %textureCheckBlock
%pointerArithmeticTmp7 = bitcast %1* %primitiveTexturePtr6 to i8* ; <i8*> [#uses=1]
- %pointerArithmeticTmp8 = getelementptr i8* %pointerArithmeticTmp7, i64 640 ; <i8*> [#uses=1]
+ %pointerArithmeticTmp8 = getelementptr i8, i8* %pointerArithmeticTmp7, i64 640 ; <i8*> [#uses=1]
%pointerArithmeticTmp9 = bitcast i8* %pointerArithmeticTmp8 to <4 x float>* ; <<4 x float>*> [#uses=1]
- %dimensionsPtr = load <4 x float>* %pointerArithmeticTmp9, align 1 ; <<4 x float>> [#uses=2]
+ %dimensionsPtr = load <4 x float>, <4 x float>* %pointerArithmeticTmp9, align 1 ; <<4 x float>> [#uses=2]
%texDiffDX = fsub <4 x float> %texCoordDX, %texCoord ; <<4 x float>> [#uses=1]
%texDiffDY = fsub <4 x float> %texCoordDY, %texCoord ; <<4 x float>> [#uses=1]
%ddx = fmul <4 x float> %texDiffDX, %dimensionsPtr ; <<4 x float>> [#uses=2]
diff --git a/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll b/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll
index 5f6cf3b9e0bb..5926ab4b5c72 100644
--- a/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll
+++ b/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll
@@ -5,7 +5,7 @@
define void @c() nounwind {
; CHECK: movl a+8, %eax
- %srcval1 = load i96* @a, align 4
+ %srcval1 = load i96, i96* @a, align 4
%sroa.store.elt2 = lshr i96 %srcval1, 64
%tmp = trunc i96 %sroa.store.elt2 to i64
; CHECK: movl %eax, b
diff --git a/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll b/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
index 410a42a42878..45e770f8121b 100644
--- a/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
+++ b/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
@@ -41,19 +41,19 @@ bb3: ; preds = %bb2, %bb
br i1 undef, label %bb5, label %bb4
bb4: ; preds = %bb3
- %17 = load volatile i32* @uint8, align 4 ; <i32> [#uses=0]
+ %17 = load volatile i32, i32* @uint8, align 4 ; <i32> [#uses=0]
br label %bb5
bb5: ; preds = %bb4, %bb3
- %18 = load volatile i32* @uint8, align 4 ; <i32> [#uses=0]
+ %18 = load volatile i32, i32* @uint8, align 4 ; <i32> [#uses=0]
%19 = sext i8 undef to i16 ; <i16> [#uses=1]
%20 = tail call i32 @func_24(i16 zeroext %19, i8 signext 1) nounwind; <i32> [#uses=0]
br i1 undef, label %return, label %bb6.preheader
bb6.preheader: ; preds = %bb5
%21 = sext i8 %p_52 to i32 ; <i32> [#uses=1]
- %22 = load volatile i32* @uint8, align 4 ; <i32> [#uses=0]
- %23 = tail call i32 (...)* @safefuncts(i32 %21, i32 1) nounwind; <i32> [#uses=0]
+ %22 = load volatile i32, i32* @uint8, align 4 ; <i32> [#uses=0]
+ %23 = tail call i32 (...) @safefuncts(i32 %21, i32 1) nounwind; <i32> [#uses=0]
unreachable
return: ; preds = %bb5
diff --git a/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll b/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll
index 5483b73ecc8c..2ec49f486c99 100644
--- a/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll
+++ b/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll
@@ -25,8 +25,8 @@ invcont: ; preds = %entry
to label %invcont1 unwind label %lpad ; <i8> [#uses=0]
invcont1: ; preds = %invcont
- %6 = getelementptr inbounds %struct.ComplexType* %2, i64 0, i32 0 ; <i32*> [#uses=1]
- %7 = load i32* %6, align 4 ; <i32> [#uses=1]
+ %6 = getelementptr inbounds %struct.ComplexType, %struct.ComplexType* %2, i64 0, i32 0 ; <i32*> [#uses=1]
+ %7 = load i32, i32* %6, align 4 ; <i32> [#uses=1]
invoke void @booleanAndDataReply(i32 %7, i32 undef, i32 %requestID, i32 undef, i64 undef, i32 undef)
to label %invcont2 unwind label %lpad
diff --git a/test/CodeGen/X86/2009-09-10-SpillComments.ll b/test/CodeGen/X86/2009-09-10-SpillComments.ll
index adac20336048..78ce1cefcd43 100644
--- a/test/CodeGen/X86/2009-09-10-SpillComments.ll
+++ b/test/CodeGen/X86/2009-09-10-SpillComments.ll
@@ -20,18 +20,18 @@ entry:
br i1 %tmp2, label %UnifiedReturnBlock, label %cond_next
cond_next: ; preds = %entry
- %tmp6 = getelementptr %struct.rtx_def* %x, i32 0, i32 0 ; <i16*> [#uses=1]
- %tmp7 = load i16* %tmp6 ; <i16> [#uses=2]
+ %tmp6 = getelementptr %struct.rtx_def, %struct.rtx_def* %x, i32 0, i32 0 ; <i16*> [#uses=1]
+ %tmp7 = load i16, i16* %tmp6 ; <i16> [#uses=2]
%tmp78 = zext i16 %tmp7 to i32 ; <i32> [#uses=2]
%tmp10 = icmp eq i16 %tmp7, 54 ; <i1> [#uses=1]
br i1 %tmp10, label %cond_true13, label %cond_next32
cond_true13: ; preds = %cond_next
- %tmp15 = getelementptr %struct.rtx_def* %x, i32 0, i32 3 ; <[1 x %struct..0anon]*> [#uses=1]
+ %tmp15 = getelementptr %struct.rtx_def, %struct.rtx_def* %x, i32 0, i32 3 ; <[1 x %struct..0anon]*> [#uses=1]
%tmp1718 = bitcast [1 x %struct..0anon]* %tmp15 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1]
- %tmp19 = load %struct.rtx_def** %tmp1718 ; <%struct.rtx_def*> [#uses=1]
- %tmp20 = getelementptr %struct.rtx_def* %tmp19, i32 0, i32 0 ; <i16*> [#uses=1]
- %tmp21 = load i16* %tmp20 ; <i16> [#uses=1]
+ %tmp19 = load %struct.rtx_def*, %struct.rtx_def** %tmp1718 ; <%struct.rtx_def*> [#uses=1]
+ %tmp20 = getelementptr %struct.rtx_def, %struct.rtx_def* %tmp19, i32 0, i32 0 ; <i16*> [#uses=1]
+ %tmp21 = load i16, i16* %tmp20 ; <i16> [#uses=1]
%tmp22 = icmp eq i16 %tmp21, 57 ; <i1> [#uses=1]
br i1 %tmp22, label %cond_true25, label %cond_next32
@@ -40,10 +40,10 @@ cond_true25: ; preds = %cond_true13
ret %struct.rtx_def* %tmp29
cond_next32: ; preds = %cond_true13, %cond_next
- %tmp34 = getelementptr [116 x i8*]* @rtx_format, i32 0, i32 %tmp78 ; <i8**> [#uses=1]
- %tmp35 = load i8** %tmp34, align 4 ; <i8*> [#uses=1]
- %tmp37 = getelementptr [117 x i32]* @rtx_length, i32 0, i32 %tmp78 ; <i32*> [#uses=1]
- %tmp38 = load i32* %tmp37, align 4 ; <i32> [#uses=1]
+ %tmp34 = getelementptr [116 x i8*], [116 x i8*]* @rtx_format, i32 0, i32 %tmp78 ; <i8**> [#uses=1]
+ %tmp35 = load i8*, i8** %tmp34, align 4 ; <i8*> [#uses=1]
+ %tmp37 = getelementptr [117 x i32], [117 x i32]* @rtx_length, i32 0, i32 %tmp78 ; <i32*> [#uses=1]
+ %tmp38 = load i32, i32* %tmp37, align 4 ; <i32> [#uses=1]
%i.011 = add i32 %tmp38, -1 ; <i32> [#uses=2]
%tmp12513 = icmp sgt i32 %i.011, -1 ; <i1> [#uses=1]
br i1 %tmp12513, label %bb, label %UnifiedReturnBlock
@@ -51,48 +51,48 @@ cond_next32: ; preds = %cond_true13, %cond_next
bb: ; preds = %bb123, %cond_next32
%indvar = phi i32 [ %indvar.next26, %bb123 ], [ 0, %cond_next32 ] ; <i32> [#uses=2]
%i.01.0 = sub i32 %i.011, %indvar ; <i32> [#uses=5]
- %tmp42 = getelementptr i8* %tmp35, i32 %i.01.0 ; <i8*> [#uses=2]
- %tmp43 = load i8* %tmp42 ; <i8> [#uses=1]
+ %tmp42 = getelementptr i8, i8* %tmp35, i32 %i.01.0 ; <i8*> [#uses=2]
+ %tmp43 = load i8, i8* %tmp42 ; <i8> [#uses=1]
switch i8 %tmp43, label %bb123 [
i8 101, label %cond_true47
i8 69, label %bb105.preheader
]
cond_true47: ; preds = %bb
- %tmp52 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0 ; <%struct..0anon*> [#uses=1]
+ %tmp52 = getelementptr %struct.rtx_def, %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0 ; <%struct..0anon*> [#uses=1]
%tmp5354 = bitcast %struct..0anon* %tmp52 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1]
- %tmp55 = load %struct.rtx_def** %tmp5354 ; <%struct.rtx_def*> [#uses=1]
+ %tmp55 = load %struct.rtx_def*, %struct.rtx_def** %tmp5354 ; <%struct.rtx_def*> [#uses=1]
%tmp58 = tail call %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp55, %struct.rtx_def* %insn ) nounwind ; <%struct.rtx_def*> [#uses=1]
- %tmp62 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0, i32 0 ; <i32*> [#uses=1]
+ %tmp62 = getelementptr %struct.rtx_def, %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0, i32 0 ; <i32*> [#uses=1]
%tmp58.c = ptrtoint %struct.rtx_def* %tmp58 to i32 ; <i32> [#uses=1]
store i32 %tmp58.c, i32* %tmp62
- %tmp6816 = load i8* %tmp42 ; <i8> [#uses=1]
+ %tmp6816 = load i8, i8* %tmp42 ; <i8> [#uses=1]
%tmp6917 = icmp eq i8 %tmp6816, 69 ; <i1> [#uses=1]
br i1 %tmp6917, label %bb105.preheader, label %bb123
bb105.preheader: ; preds = %cond_true47, %bb
- %tmp11020 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0 ; <%struct..0anon*> [#uses=1]
+ %tmp11020 = getelementptr %struct.rtx_def, %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0 ; <%struct..0anon*> [#uses=1]
%tmp11111221 = bitcast %struct..0anon* %tmp11020 to %struct.rtvec_def** ; <%struct.rtvec_def**> [#uses=3]
- %tmp11322 = load %struct.rtvec_def** %tmp11111221 ; <%struct.rtvec_def*> [#uses=1]
- %tmp11423 = getelementptr %struct.rtvec_def* %tmp11322, i32 0, i32 0 ; <i32*> [#uses=1]
- %tmp11524 = load i32* %tmp11423 ; <i32> [#uses=1]
+ %tmp11322 = load %struct.rtvec_def*, %struct.rtvec_def** %tmp11111221 ; <%struct.rtvec_def*> [#uses=1]
+ %tmp11423 = getelementptr %struct.rtvec_def, %struct.rtvec_def* %tmp11322, i32 0, i32 0 ; <i32*> [#uses=1]
+ %tmp11524 = load i32, i32* %tmp11423 ; <i32> [#uses=1]
%tmp11625 = icmp eq i32 %tmp11524, 0 ; <i1> [#uses=1]
br i1 %tmp11625, label %bb123, label %bb73
bb73: ; preds = %bb73, %bb105.preheader
%j.019 = phi i32 [ %tmp104, %bb73 ], [ 0, %bb105.preheader ] ; <i32> [#uses=3]
- %tmp81 = load %struct.rtvec_def** %tmp11111221 ; <%struct.rtvec_def*> [#uses=2]
- %tmp92 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019 ; <%struct..0anon*> [#uses=1]
+ %tmp81 = load %struct.rtvec_def*, %struct.rtvec_def** %tmp11111221 ; <%struct.rtvec_def*> [#uses=2]
+ %tmp92 = getelementptr %struct.rtvec_def, %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019 ; <%struct..0anon*> [#uses=1]
%tmp9394 = bitcast %struct..0anon* %tmp92 to %struct.rtx_def** ; <%struct.rtx_def**> [#uses=1]
- %tmp95 = load %struct.rtx_def** %tmp9394 ; <%struct.rtx_def*> [#uses=1]
+ %tmp95 = load %struct.rtx_def*, %struct.rtx_def** %tmp9394 ; <%struct.rtx_def*> [#uses=1]
%tmp98 = tail call %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp95, %struct.rtx_def* %insn ) nounwind ; <%struct.rtx_def*> [#uses=1]
- %tmp101 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019, i32 0 ; <i32*> [#uses=1]
+ %tmp101 = getelementptr %struct.rtvec_def, %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019, i32 0 ; <i32*> [#uses=1]
%tmp98.c = ptrtoint %struct.rtx_def* %tmp98 to i32 ; <i32> [#uses=1]
store i32 %tmp98.c, i32* %tmp101
%tmp104 = add i32 %j.019, 1 ; <i32> [#uses=2]
- %tmp113 = load %struct.rtvec_def** %tmp11111221 ; <%struct.rtvec_def*> [#uses=1]
- %tmp114 = getelementptr %struct.rtvec_def* %tmp113, i32 0, i32 0 ; <i32*> [#uses=1]
- %tmp115 = load i32* %tmp114 ; <i32> [#uses=1]
+ %tmp113 = load %struct.rtvec_def*, %struct.rtvec_def** %tmp11111221 ; <%struct.rtvec_def*> [#uses=1]
+ %tmp114 = getelementptr %struct.rtvec_def, %struct.rtvec_def* %tmp113, i32 0, i32 0 ; <i32*> [#uses=1]
+ %tmp115 = load i32, i32* %tmp114 ; <i32> [#uses=1]
%tmp116 = icmp ult i32 %tmp104, %tmp115 ; <i1> [#uses=1]
br i1 %tmp116, label %bb73, label %bb123
diff --git a/test/CodeGen/X86/2009-09-16-CoalescerBug.ll b/test/CodeGen/X86/2009-09-16-CoalescerBug.ll
index 18b5a179c9ef..a18a30ad5cd8 100644
--- a/test/CodeGen/X86/2009-09-16-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-09-16-CoalescerBug.ll
@@ -32,7 +32,7 @@ lor.lhs.false: ; preds = %for.body
br i1 %cmp16, label %for.end41, label %for.cond17.preheader
for.cond17.preheader: ; preds = %lor.lhs.false
- %tmp24 = load i32* @boot_cpu_id ; <i32> [#uses=1]
+ %tmp24 = load i32, i32* @boot_cpu_id ; <i32> [#uses=1]
%shr26 = ashr i32 %tmp24, %and ; <i32> [#uses=1]
br label %for.body20
diff --git a/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll b/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
index 0268d817c70d..840b3171f689 100644
--- a/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
+++ b/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
@@ -12,12 +12,12 @@ bb: ; preds = %bb, %entry
%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
%sum.04 = phi i32 [ 0, %entry ], [ %10, %bb ] ; <i32> [#uses=1]
%1 = mul i32 %i.03, %As ; <i32> [#uses=1]
- %2 = getelementptr i16* %A, i32 %1 ; <i16*> [#uses=1]
- %3 = load i16* %2, align 2 ; <i16> [#uses=1]
+ %2 = getelementptr i16, i16* %A, i32 %1 ; <i16*> [#uses=1]
+ %3 = load i16, i16* %2, align 2 ; <i16> [#uses=1]
%4 = sext i16 %3 to i32 ; <i32> [#uses=1]
%5 = mul i32 %i.03, %Bs ; <i32> [#uses=1]
- %6 = getelementptr i16* %B, i32 %5 ; <i16*> [#uses=1]
- %7 = load i16* %6, align 2 ; <i16> [#uses=1]
+ %6 = getelementptr i16, i16* %B, i32 %5 ; <i16*> [#uses=1]
+ %7 = load i16, i16* %6, align 2 ; <i16> [#uses=1]
%8 = sext i16 %7 to i32 ; <i32> [#uses=1]
%9 = mul i32 %8, %4 ; <i32> [#uses=1]
%10 = add i32 %9, %sum.04 ; <i32> [#uses=2]
diff --git a/test/CodeGen/X86/2009-09-22-CoalescerBug.ll b/test/CodeGen/X86/2009-09-22-CoalescerBug.ll
index 33f35f881e85..e469a6004323 100644
--- a/test/CodeGen/X86/2009-09-22-CoalescerBug.ll
+++ b/test/CodeGen/X86/2009-09-22-CoalescerBug.ll
@@ -54,7 +54,7 @@ bb9: ; preds = %quantum_new_qureg.e
unreachable
bb.i37: ; preds = %bb.i37, %bb11.thread
- %0 = load i64* undef, align 8 ; <i64> [#uses=1]
+ %0 = load i64, i64* undef, align 8 ; <i64> [#uses=1]
%1 = shl i64 %0, %.cast.i ; <i64> [#uses=1]
store i64 %1, i64* undef, align 8
br i1 undef, label %bb.i37, label %quantum_addscratch.exit
diff --git a/test/CodeGen/X86/2009-10-16-Scope.ll b/test/CodeGen/X86/2009-10-16-Scope.ll
index e75d594e6682..bda7340b3643 100644
--- a/test/CodeGen/X86/2009-10-16-Scope.ll
+++ b/test/CodeGen/X86/2009-10-16-Scope.ll
@@ -9,7 +9,7 @@ entry:
br label %do.body, !dbg !0
do.body: ; preds = %entry
- call void @llvm.dbg.declare(metadata i32* %count_, metadata !4, metadata !{!"0x102"})
+ call void @llvm.dbg.declare(metadata i32* %count_, metadata !4, metadata !DIExpression()), !dbg !DILocation(scope: !5)
%conv = ptrtoint i32* %count_ to i32, !dbg !0 ; <i32> [#uses=1]
%call = call i32 @foo(i32 %conv) ssp, !dbg !0 ; <i32> [#uses=0]
br label %do.end, !dbg !0
@@ -22,13 +22,13 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
declare i32 @foo(i32) ssp
-!0 = !MDLocation(line: 5, column: 2, scope: !1)
-!1 = !{!"0xb\001\001\000", null, !2}; [DW_TAG_lexical_block ]
-!2 = !{!"0x2e\00bar\00bar\00bar\004\000\001\000\006\000\000\000", i32 0, !3, null, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!3 = !{!"0x11\0012\00clang 1.1\001\00\000\00\000", !8, null, !9, null, null, null}; [DW_TAG_compile_unit ]
-!4 = !{!"0x100\00count_\005\000", !5, !3, !6}; [ DW_TAG_auto_variable ]
-!5 = !{!"0xb\001\001\000", null, !1}; [DW_TAG_lexical_block ]
-!6 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !3}; [DW_TAG_base_type ]
-!7 = !MDLocation(line: 6, column: 1, scope: !2)
-!8 = !{!"genmodes.i", !"/Users/yash/Downloads"}
+!0 = !DILocation(line: 5, column: 2, scope: !1)
+!1 = distinct !DILexicalBlock(line: 1, column: 1, file: null, scope: !2)
+!2 = !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !3)
+!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang 1.1", isOptimized: true, emissionKind: 0, file: !8, retainedTypes: !9)
+!4 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "count_", line: 5, scope: !5, file: !3, type: !6)
+!5 = distinct !DILexicalBlock(line: 1, column: 1, file: null, scope: !1)
+!6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!7 = !DILocation(line: 6, column: 1, scope: !2)
+!8 = !DIFile(filename: "genmodes.i", directory: "/Users/yash/Downloads")
!9 = !{i32 0}
diff --git a/test/CodeGen/X86/2009-10-19-EmergencySpill.ll b/test/CodeGen/X86/2009-10-19-EmergencySpill.ll
index ba44a2e64feb..ec73f5a2c823 100644
--- a/test/CodeGen/X86/2009-10-19-EmergencySpill.ll
+++ b/test/CodeGen/X86/2009-10-19-EmergencySpill.ll
@@ -8,21 +8,21 @@
define fastcc void @nodeOverwriteCell(%struct.Rtree* nocapture %pRtree, %struct.RtreeNode* nocapture %pNode, %struct.RtreeCell* nocapture %pCell, i32 %iCell) nounwind ssp {
entry:
- %0 = load i8** undef, align 8 ; <i8*> [#uses=2]
- %1 = load i32* undef, align 8 ; <i32> [#uses=1]
+ %0 = load i8*, i8** undef, align 8 ; <i8*> [#uses=2]
+ %1 = load i32, i32* undef, align 8 ; <i32> [#uses=1]
%2 = mul i32 %1, %iCell ; <i32> [#uses=1]
%3 = add nsw i32 %2, 4 ; <i32> [#uses=1]
%4 = sext i32 %3 to i64 ; <i64> [#uses=2]
- %5 = load i64* null, align 8 ; <i64> [#uses=2]
+ %5 = load i64, i64* null, align 8 ; <i64> [#uses=2]
%6 = lshr i64 %5, 48 ; <i64> [#uses=1]
%7 = trunc i64 %6 to i8 ; <i8> [#uses=1]
store i8 %7, i8* undef, align 1
%8 = lshr i64 %5, 8 ; <i64> [#uses=1]
%9 = trunc i64 %8 to i8 ; <i8> [#uses=1]
%.sum4 = add i64 %4, 6 ; <i64> [#uses=1]
- %10 = getelementptr inbounds i8* %0, i64 %.sum4 ; <i8*> [#uses=1]
+ %10 = getelementptr inbounds i8, i8* %0, i64 %.sum4 ; <i8*> [#uses=1]
store i8 %9, i8* %10, align 1
- %11 = getelementptr inbounds %struct.Rtree* %pRtree, i64 0, i32 3 ; <i32*> [#uses=1]
+ %11 = getelementptr inbounds %struct.Rtree, %struct.Rtree* %pRtree, i64 0, i32 3 ; <i32*> [#uses=1]
br i1 undef, label %bb.nph, label %bb2
bb.nph: ; preds = %entry
@@ -31,24 +31,24 @@ bb.nph: ; preds = %entry
bb: ; preds = %bb, %bb.nph
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i64> [#uses=3]
- %scevgep = getelementptr %struct.RtreeCell* %pCell, i64 0, i32 1, i64 %indvar ; <%union.RtreeCoord*> [#uses=1]
+ %scevgep = getelementptr %struct.RtreeCell, %struct.RtreeCell* %pCell, i64 0, i32 1, i64 %indvar ; <%union.RtreeCoord*> [#uses=1]
%scevgep12 = bitcast %union.RtreeCoord* %scevgep to i32* ; <i32*> [#uses=1]
%tmp = shl i64 %indvar, 2 ; <i64> [#uses=1]
%tmp26 = add i64 %tmp, %tmp25 ; <i64> [#uses=1]
- %scevgep27 = getelementptr i8* %0, i64 %tmp26 ; <i8*> [#uses=1]
- %12 = load i32* %scevgep12, align 4 ; <i32> [#uses=1]
+ %scevgep27 = getelementptr i8, i8* %0, i64 %tmp26 ; <i8*> [#uses=1]
+ %12 = load i32, i32* %scevgep12, align 4 ; <i32> [#uses=1]
%13 = lshr i32 %12, 24 ; <i32> [#uses=1]
%14 = trunc i32 %13 to i8 ; <i8> [#uses=1]
store i8 %14, i8* undef, align 1
store i8 undef, i8* %scevgep27, align 1
- %15 = load i32* %11, align 4 ; <i32> [#uses=1]
+ %15 = load i32, i32* %11, align 4 ; <i32> [#uses=1]
%16 = shl i32 %15, 1 ; <i32> [#uses=1]
%17 = icmp sgt i32 %16, undef ; <i1> [#uses=1]
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
br i1 %17, label %bb, label %bb2
bb2: ; preds = %bb, %entry
- %18 = getelementptr inbounds %struct.RtreeNode* %pNode, i64 0, i32 3 ; <i32*> [#uses=1]
+ %18 = getelementptr inbounds %struct.RtreeNode, %struct.RtreeNode* %pNode, i64 0, i32 3 ; <i32*> [#uses=1]
store i32 1, i32* %18, align 4
ret void
}
diff --git a/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll b/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll
index 006a02a43b17..e1ca3fd2a97d 100644
--- a/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll
+++ b/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll
@@ -23,7 +23,7 @@ entry:
br i1 %tobool.i, label %if.then.i, label %if.end.i
if.then.i: ; preds = %entry
- %call1.i = call i32 @warn_dlerror(i8* getelementptr inbounds ([45 x i8]* @.str76843, i32 0, i32 0)) nounwind ; <i32> [#uses=0]
+ %call1.i = call i32 @warn_dlerror(i8* getelementptr inbounds ([45 x i8], [45 x i8]* @.str76843, i32 0, i32 0)) nounwind ; <i32> [#uses=0]
store i32 -1, i32* %retval.i
br label %lt_init.exit
@@ -32,13 +32,12 @@ if.end.i: ; preds = %entry
br label %lt_init.exit
lt_init.exit: ; preds = %if.end.i, %if.then.i
- %3 = load i32* %retval.i ; <i32> [#uses=1]
+ %3 = load i32, i32* %retval.i ; <i32> [#uses=1]
call void asm sideeffect "cpuid", "~{ax},~{bx},~{cx},~{dx},~{memory},~{dirflag},~{fpsr},~{flags}"() nounwind
%4 = call i64 @llvm.readcyclecounter() nounwind ; <i64> [#uses=1]
%5 = sub i64 %4, %2 ; <i64> [#uses=1]
- %6 = atomicrmw add i64* getelementptr inbounds ([1216 x i64]* @__profiling_callsite_timestamps_live, i32 0, i32 51), i64 %5 monotonic
-;CHECK: lock
-;CHECK-NEXT: {{xadd|addq}} %rdx, __profiling_callsite_timestamps_live
+ %6 = atomicrmw add i64* getelementptr inbounds ([1216 x i64], [1216 x i64]* @__profiling_callsite_timestamps_live, i32 0, i32 51), i64 %5 monotonic
+;CHECK: lock {{xadd|addq}} %rdx, __profiling_callsite_timestamps_live
;CHECK-NEXT: cmpl $0,
;CHECK-NEXT: jne
%cmp = icmp eq i32 %3, 0 ; <i1> [#uses=1]
@@ -50,11 +49,11 @@ if.then: ; preds = %lt_init.exit
if.end: ; preds = %if.then, %lt_init.exit
store i32 0, i32* %retval
- %7 = load i32* %retval ; <i32> [#uses=1]
+ %7 = load i32, i32* %retval ; <i32> [#uses=1]
tail call void asm sideeffect "cpuid", "~{ax},~{bx},~{cx},~{dx},~{memory},~{dirflag},~{fpsr},~{flags}"() nounwind
%8 = tail call i64 @llvm.readcyclecounter() nounwind ; <i64> [#uses=1]
%9 = sub i64 %8, %0 ; <i64> [#uses=1]
- %10 = atomicrmw add i64* getelementptr inbounds ([1216 x i64]* @__profiling_callsite_timestamps_live, i32 0, i32 50), i64 %9 monotonic
+ %10 = atomicrmw add i64* getelementptr inbounds ([1216 x i64], [1216 x i64]* @__profiling_callsite_timestamps_live, i32 0, i32 50), i64 %9 monotonic
ret i32 %7
}
diff --git a/test/CodeGen/X86/2009-10-25-RewriterBug.ll b/test/CodeGen/X86/2009-10-25-RewriterBug.ll
index 5b4e818359e9..be18186463d4 100644
--- a/test/CodeGen/X86/2009-10-25-RewriterBug.ll
+++ b/test/CodeGen/X86/2009-10-25-RewriterBug.ll
@@ -6,7 +6,7 @@
define fastcc void @insert_picture_in_dpb(%struct.FrameStore* nocapture %fs, %struct.StorablePicture* %p) nounwind ssp {
entry:
- %0 = getelementptr inbounds %struct.FrameStore* %fs, i64 0, i32 12 ; <%struct.StorablePicture**> [#uses=1]
+ %0 = getelementptr inbounds %struct.FrameStore, %struct.FrameStore* %fs, i64 0, i32 12 ; <%struct.StorablePicture**> [#uses=1]
%1 = icmp eq i32 undef, 0 ; <i1> [#uses=1]
br i1 %1, label %bb.i, label %bb36.i
@@ -69,7 +69,7 @@ bb38.i: ; preds = %bb66.i, %bb67.prehe
%23 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %12, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
%indvar248.i = phi i64 [ %indvar.next249.i, %bb66.i ], [ 0, %bb67.preheader.i ] ; <i64> [#uses=3]
%storemerge52.i = trunc i64 %indvar248.i to i32 ; <i32> [#uses=1]
- %24 = getelementptr inbounds %struct.StorablePicture* %23, i64 0, i32 19 ; <i32*> [#uses=0]
+ %24 = getelementptr inbounds %struct.StorablePicture, %struct.StorablePicture* %23, i64 0, i32 19 ; <i32*> [#uses=0]
br i1 undef, label %bb.nph51.i, label %bb66.i
bb.nph51.i: ; preds = %bb38.i
@@ -94,13 +94,13 @@ bb41.i: ; preds = %bb40.i
br i1 undef, label %bb45.i, label %bb47.i
bb45.i: ; preds = %bb41.i
- %33 = getelementptr inbounds %struct.StorablePicture* %26, i64 0, i32 5, i64 undef, i64 %32, i64 undef ; <i64*> [#uses=1]
- %34 = load i64* %33, align 8 ; <i64> [#uses=1]
+ %33 = getelementptr inbounds %struct.StorablePicture, %struct.StorablePicture* %26, i64 0, i32 5, i64 undef, i64 %32, i64 undef ; <i64*> [#uses=1]
+ %34 = load i64, i64* %33, align 8 ; <i64> [#uses=1]
br label %bb47.i
bb47.i: ; preds = %bb45.i, %bb41.i
%storemerge11.i = phi i64 [ %34, %bb45.i ], [ 0, %bb41.i ] ; <i64> [#uses=0]
- %scevgep246.i = getelementptr i64* undef, i64 undef ; <i64*> [#uses=0]
+ %scevgep246.i = getelementptr i64, i64* undef, i64 undef ; <i64*> [#uses=0]
br label %bb64.i
bb57.i: ; preds = %bb40.i, %bb39.i
@@ -110,10 +110,10 @@ bb58.i: ; preds = %bb57.i
br label %bb60.i
bb60.i: ; preds = %bb58.i, %bb57.i
- %35 = load i64*** undef, align 8 ; <i64**> [#uses=1]
- %scevgep256.i = getelementptr i64** %35, i64 %indvar248.i ; <i64**> [#uses=1]
- %36 = load i64** %scevgep256.i, align 8 ; <i64*> [#uses=1]
- %scevgep243.i = getelementptr i64* %36, i64 undef ; <i64*> [#uses=1]
+ %35 = load i64**, i64*** undef, align 8 ; <i64**> [#uses=1]
+ %scevgep256.i = getelementptr i64*, i64** %35, i64 %indvar248.i ; <i64**> [#uses=1]
+ %36 = load i64*, i64** %scevgep256.i, align 8 ; <i64*> [#uses=1]
+ %scevgep243.i = getelementptr i64, i64* %36, i64 undef ; <i64*> [#uses=1]
store i64 -1, i64* %scevgep243.i, align 8
br label %bb64.i
@@ -160,7 +160,7 @@ bb101.i: ; preds = %bb82.i
br label %bb102.i
bb102.i: ; preds = %bb101.i, %bb83.i
- %48 = load %struct.StorablePicture** %0, align 8 ; <%struct.StorablePicture*> [#uses=2]
+ %48 = load %struct.StorablePicture*, %struct.StorablePicture** %0, align 8 ; <%struct.StorablePicture*> [#uses=2]
br i1 undef, label %bb81.i, label %bb104.i
bb104.i: ; preds = %bb102.i, %bb80.i
diff --git a/test/CodeGen/X86/2009-11-16-MachineLICM.ll b/test/CodeGen/X86/2009-11-16-MachineLICM.ll
index fedb2a51f357..3310775c2513 100644
--- a/test/CodeGen/X86/2009-11-16-MachineLICM.ll
+++ b/test/CodeGen/X86/2009-11-16-MachineLICM.ll
@@ -19,19 +19,19 @@ bb: ; preds = %bb, %bb.nph
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i64> [#uses=2]
%tmp9 = shl i64 %indvar, 2 ; <i64> [#uses=4]
%tmp1016 = or i64 %tmp9, 1 ; <i64> [#uses=1]
- %scevgep = getelementptr float* %x, i64 %tmp1016 ; <float*> [#uses=1]
+ %scevgep = getelementptr float, float* %x, i64 %tmp1016 ; <float*> [#uses=1]
%tmp1117 = or i64 %tmp9, 2 ; <i64> [#uses=1]
- %scevgep12 = getelementptr float* %x, i64 %tmp1117 ; <float*> [#uses=1]
+ %scevgep12 = getelementptr float, float* %x, i64 %tmp1117 ; <float*> [#uses=1]
%tmp1318 = or i64 %tmp9, 3 ; <i64> [#uses=1]
- %scevgep14 = getelementptr float* %x, i64 %tmp1318 ; <float*> [#uses=1]
- %x_addr.03 = getelementptr float* %x, i64 %tmp9 ; <float*> [#uses=1]
- %1 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 0), align 16 ; <float> [#uses=1]
+ %scevgep14 = getelementptr float, float* %x, i64 %tmp1318 ; <float*> [#uses=1]
+ %x_addr.03 = getelementptr float, float* %x, i64 %tmp9 ; <float*> [#uses=1]
+ %1 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @g, i64 0, i64 0), align 16 ; <float> [#uses=1]
store float %1, float* %x_addr.03, align 4
- %2 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 1), align 4 ; <float> [#uses=1]
+ %2 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @g, i64 0, i64 1), align 4 ; <float> [#uses=1]
store float %2, float* %scevgep, align 4
- %3 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 2), align 8 ; <float> [#uses=1]
+ %3 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @g, i64 0, i64 2), align 8 ; <float> [#uses=1]
store float %3, float* %scevgep12, align 4
- %4 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 3), align 4 ; <float> [#uses=1]
+ %4 = load float, float* getelementptr inbounds ([4 x float], [4 x float]* @g, i64 0, i64 3), align 4 ; <float> [#uses=1]
store float %4, float* %scevgep14, align 4
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2]
%exitcond = icmp eq i64 %indvar.next, %tmp ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
index b828c27e7826..1cfd108db653 100644
--- a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
+++ b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
@@ -9,7 +9,7 @@ entry:
; CHECK: movups L_str+12(%rip), %xmm0
; CHECK: movups L_str(%rip), %xmm1
%tmp0 = alloca [60 x i8], align 1
- %tmp1 = getelementptr inbounds [60 x i8]* %tmp0, i64 0, i64 0
+ %tmp1 = getelementptr inbounds [60 x i8], [60 x i8]* %tmp0, i64 0, i64 0
br label %bb1
bb1:
@@ -17,7 +17,7 @@ bb1:
; CHECK: movups %xmm0, 12(%rsp)
; CHECK: movaps %xmm1, (%rsp)
%tmp2 = phi i32 [ %tmp3, %bb1 ], [ 0, %entry ]
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* getelementptr inbounds ([28 x i8]* @str, i64 0, i64 0), i64 28, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* getelementptr inbounds ([28 x i8], [28 x i8]* @str, i64 0, i64 0), i64 28, i32 1, i1 false)
%tmp3 = add i32 %tmp2, 1
%tmp4 = icmp eq i32 %tmp3, %count
br i1 %tmp4, label %bb2, label %bb1
diff --git a/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll b/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll
index 5c1a2bcee2ee..12ebe6336952 100644
--- a/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll
+++ b/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll
@@ -43,8 +43,8 @@ bb14: ; preds = %bb8
ret i8 1
bb27.outer108: ; preds = %bb13, %bb27.outer
- %I.2.ph109 = getelementptr i8* %I.2.ph, i64 undef ; <i8*> [#uses=1]
- %scevgep = getelementptr i8* %I.2.ph, i64 undef ; <i8*> [#uses=0]
+ %I.2.ph109 = getelementptr i8, i8* %I.2.ph, i64 undef ; <i8*> [#uses=1]
+ %scevgep = getelementptr i8, i8* %I.2.ph, i64 undef ; <i8*> [#uses=0]
br label %bb8
bb56: ; preds = %bb10, %bb8, %bb8, %entry
diff --git a/test/CodeGen/X86/2009-11-25-ImpDefBug.ll b/test/CodeGen/X86/2009-11-25-ImpDefBug.ll
index 396638fb1dbd..0bf13de61275 100644
--- a/test/CodeGen/X86/2009-11-25-ImpDefBug.ll
+++ b/test/CodeGen/X86/2009-11-25-ImpDefBug.ll
@@ -48,7 +48,7 @@ lpad: ; preds = %bb1.i.fragment.cl,
%.SV10.phi807 = phi i8* [ undef, %bb1.i.fragment.cl ], [ undef, %bb1.i.fragment ], [ undef, %bb5 ] ; <i8*> [#uses=1]
%exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
cleanup
- %1 = load i8* %.SV10.phi807, align 8 ; <i8> [#uses=0]
+ %1 = load i8, i8* %.SV10.phi807, align 8 ; <i8> [#uses=0]
br i1 undef, label %meshBB81.bbcl.disp, label %bb13.fragment.bbcl.disp
bb.i1: ; preds = %bb.i.i.bbcl.disp
diff --git a/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll b/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll
index 5c10c55ea3ee..e191a8a3772f 100644
--- a/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll
+++ b/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll
@@ -9,8 +9,8 @@ entry:
%b = alloca i32 ; <i32*> [#uses=2]
%a = alloca i32 ; <i32*> [#uses=1]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- %0 = load i32* %b, align 4 ; <i32> [#uses=1]
- %1 = load i32* %b, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* %b, align 4 ; <i32> [#uses=1]
+ %1 = load i32, i32* %b, align 4 ; <i32> [#uses=1]
%asmtmp = call i32 asm "$0 = foo ($1, $2)", "=&{ax},%0,r,~{dirflag},~{fpsr},~{flags}"(i32 %0, i32 %1) nounwind ; <i32> [#uses=1]
store i32 %asmtmp, i32* %a
br label %return
@@ -30,8 +30,8 @@ entry:
%b = alloca i32 ; <i32*> [#uses=2]
%a = alloca i32 ; <i32*> [#uses=1]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- %0 = load i32* %b, align 4 ; <i32> [#uses=1]
- %1 = load i32* %b, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* %b, align 4 ; <i32> [#uses=1]
+ %1 = load i32, i32* %b, align 4 ; <i32> [#uses=1]
%asmtmp = call i32 asm "$0 = foo ($1, $2)", "=&r,%0,r,~{dirflag},~{fpsr},~{flags}"(i32 %0, i32 %1) nounwind ; <i32> [#uses=1]
store i32 %asmtmp, i32* %a
br label %return
diff --git a/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll b/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
index 65b70a7d4936..97d97872aa63 100644
--- a/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
+++ b/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
@@ -30,29 +30,29 @@ define void @leaf() nounwind {
br label %"@CFE_debug_label_0"
"@CFE_debug_label_0": ; preds = %"file foo2.c, line 14, bb2"
- %r = load %test** bitcast ([1 x i64]* @ptr to %test**), align 8 ; <%test*> [#uses=1]
+ %r = load %test*, %test** bitcast ([1 x i64]* @ptr to %test**), align 8 ; <%test*> [#uses=1]
store %test* %r, %test** %p, align 8
br label %"@CFE_debug_label_2"
"@CFE_debug_label_2": ; preds = %"@CFE_debug_label_0"
- %r1 = load %link** bitcast ([1 x i64]* @link_ptr to %link**), align 8 ; <%link*> [#uses=1]
- %r2 = load %test** %p, align 8 ; <%test*> [#uses=1]
+ %r1 = load %link*, %link** bitcast ([1 x i64]* @link_ptr to %link**), align 8 ; <%link*> [#uses=1]
+ %r2 = load %test*, %test** %p, align 8 ; <%test*> [#uses=1]
%r3 = ptrtoint %test* %r2 to i64 ; <i64> [#uses=1]
%r4 = inttoptr i64 %r3 to %link** ; <%link**> [#uses=1]
- %r5 = getelementptr %link** %r4, i64 1 ; <%link**> [#uses=1]
+ %r5 = getelementptr %link*, %link** %r4, i64 1 ; <%link**> [#uses=1]
store %link* %r1, %link** %r5, align 8
br label %"@CFE_debug_label_3"
"@CFE_debug_label_3": ; preds = %"@CFE_debug_label_2"
- %r6 = load %test** %p, align 8 ; <%test*> [#uses=1]
+ %r6 = load %test*, %test** %p, align 8 ; <%test*> [#uses=1]
%r7 = ptrtoint %test* %r6 to i64 ; <i64> [#uses=1]
%r8 = inttoptr i64 %r7 to %link* ; <%link*> [#uses=1]
- %r9 = getelementptr %link* %r8, i64 1 ; <%link*> [#uses=1]
+ %r9 = getelementptr %link, %link* %r8, i64 1 ; <%link*> [#uses=1]
store %link* %r9, %link** bitcast ([1 x i64]* @link_ptr to %link**), align 8
br label %"@CFE_debug_label_4"
"@CFE_debug_label_4": ; preds = %"@CFE_debug_label_3"
- %r10 = load %test** %p, align 8 ; <%test*> [#uses=1]
+ %r10 = load %test*, %test** %p, align 8 ; <%test*> [#uses=1]
%r11 = ptrtoint %test* %r10 to i64 ; <i64> [#uses=1]
%r12 = inttoptr i64 %r11 to i32* ; <i32*> [#uses=1]
store i32 1, i32* %r12, align 4
diff --git a/test/CodeGen/X86/20090313-signext.ll b/test/CodeGen/X86/20090313-signext.ll
index b8effa677355..3ea13164112e 100644
--- a/test/CodeGen/X86/20090313-signext.ll
+++ b/test/CodeGen/X86/20090313-signext.ll
@@ -10,7 +10,7 @@ entry:
%0 = tail call signext i16 @h() nounwind
%1 = sext i16 %0 to i32
tail call void @g(i32 %1) nounwind
- %2 = load i16* @x, align 2
+ %2 = load i16, i16* @x, align 2
ret i16 %2
}
diff --git a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
index 850f678c9c2c..3391f1f5ec41 100644
--- a/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
+++ b/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
@@ -14,8 +14,7 @@ entry:
; CHECK: addl $1, %ebx
; CHECK: movl %edx, %ecx
; CHECK: adcl $0, %ecx
-; CHECK: lock
-; CHECK-NEXT: cmpxchg8b ([[REG]])
+; CHECK: lock cmpxchg8b ([[REG]])
; CHECK-NEXT: jne
%0 = atomicrmw add i64* %p, i64 1 seq_cst
ret void
diff --git a/test/CodeGen/X86/2010-01-13-OptExtBug.ll b/test/CodeGen/X86/2010-01-13-OptExtBug.ll
index d49e2a8d0798..3ecf845f0933 100644
--- a/test/CodeGen/X86/2010-01-13-OptExtBug.ll
+++ b/test/CodeGen/X86/2010-01-13-OptExtBug.ll
@@ -7,23 +7,23 @@ define void @XX(%class.OlsonTimeZone* %this) align 2 {
entry:
%call = tail call i8* @_Z15uprv_malloc_4_2v()
%0 = bitcast i8* %call to double*
- %tmp = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 3
- %tmp2 = load i16* %tmp
- %tmp525 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 0
- %tmp626 = load i16* %tmp525
+ %tmp = getelementptr inbounds %class.OlsonTimeZone, %class.OlsonTimeZone* %this, i32 0, i32 3
+ %tmp2 = load i16, i16* %tmp
+ %tmp525 = getelementptr inbounds %class.OlsonTimeZone, %class.OlsonTimeZone* %this, i32 0, i32 0
+ %tmp626 = load i16, i16* %tmp525
%cmp27 = icmp slt i16 %tmp2, %tmp626
br i1 %cmp27, label %bb.nph, label %for.end
for.cond:
- %tmp6 = load i16* %tmp5
+ %tmp6 = load i16, i16* %tmp5
%cmp = icmp slt i16 %inc, %tmp6
%indvar.next = add i32 %indvar, 1
br i1 %cmp, label %for.body, label %for.end
bb.nph:
- %tmp10 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 2
- %tmp17 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 1
- %tmp5 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 0
+ %tmp10 = getelementptr inbounds %class.OlsonTimeZone, %class.OlsonTimeZone* %this, i32 0, i32 2
+ %tmp17 = getelementptr inbounds %class.OlsonTimeZone, %class.OlsonTimeZone* %this, i32 0, i32 1
+ %tmp5 = getelementptr inbounds %class.OlsonTimeZone, %class.OlsonTimeZone* %this, i32 0, i32 0
%tmp29 = sext i16 %tmp2 to i32
%tmp31 = add i16 %tmp2, 1
%tmp32 = zext i16 %tmp31 to i32
@@ -34,9 +34,9 @@ for.body:
%tmp30 = add i32 %indvar, %tmp29
%tmp33 = add i32 %indvar, %tmp32
%inc = trunc i32 %tmp33 to i16
- %tmp11 = load i8** %tmp10
- %arrayidx = getelementptr i8* %tmp11, i32 %tmp30
- %tmp12 = load i8* %arrayidx
+ %tmp11 = load i8*, i8** %tmp10
+ %arrayidx = getelementptr i8, i8* %tmp11, i32 %tmp30
+ %tmp12 = load i8, i8* %arrayidx
br label %for.cond
for.end:
diff --git a/test/CodeGen/X86/2010-01-15-SelectionDAGCycle.ll b/test/CodeGen/X86/2010-01-15-SelectionDAGCycle.ll
index 5d96e4a192f3..6aba39e04bca 100644
--- a/test/CodeGen/X86/2010-01-15-SelectionDAGCycle.ll
+++ b/test/CodeGen/X86/2010-01-15-SelectionDAGCycle.ll
@@ -11,12 +11,12 @@ define void @numvec_(i32* noalias %ncelet, i32* noalias %ncel, i32* noalias %nfa
"file bug754399.f90, line 184, in inner vector loop at depth 0, bb164": ; preds = %"file bug754399.f90, line 184, in inner vector loop at depth 0, bb164", %"file bug754399.f90, line 1, bb1"
%tmp641 = add i64 0, 48 ; <i64> [#uses=1]
%tmp641642 = inttoptr i64 %tmp641 to <4 x i32>* ; <<4 x i32>*> [#uses=1]
- %r1258 = load <4 x i32>* %tmp641642, align 4 ; <<4 x i32>> [#uses=2]
+ %r1258 = load <4 x i32>, <4 x i32>* %tmp641642, align 4 ; <<4 x i32>> [#uses=2]
%r1295 = extractelement <4 x i32> %r1258, i32 3 ; <i32> [#uses=1]
%r1296 = sext i32 %r1295 to i64 ; <i64> [#uses=1]
%r1297 = add i64 %r1296, -1 ; <i64> [#uses=1]
- %r1298183 = getelementptr [0 x i32]* %ismbs, i64 0, i64 %r1297 ; <i32*> [#uses=1]
- %r1298184 = load i32* %r1298183, align 4 ; <i32> [#uses=1]
+ %r1298183 = getelementptr [0 x i32], [0 x i32]* %ismbs, i64 0, i64 %r1297 ; <i32*> [#uses=1]
+ %r1298184 = load i32, i32* %r1298183, align 4 ; <i32> [#uses=1]
%r1301 = extractelement <4 x i32> %r1037, i32 3 ; <i32> [#uses=1]
%r1302 = mul i32 %r1298184, %r1301 ; <i32> [#uses=1]
%r1306 = insertelement <4 x i32> zeroinitializer, i32 %r1302, i32 3 ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/2010-01-18-DbgValue.ll b/test/CodeGen/X86/2010-01-18-DbgValue.ll
index b21846d39494..db56ae65d51e 100644
--- a/test/CodeGen/X86/2010-01-18-DbgValue.ll
+++ b/test/CodeGen/X86/2010-01-18-DbgValue.ll
@@ -12,17 +12,17 @@ entry:
%retval = alloca double ; <double*> [#uses=2]
%0 = alloca double ; <double*> [#uses=2]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- call void @llvm.dbg.declare(metadata %struct.Rect* %my_r0, metadata !0, metadata !{!"0x102"}), !dbg !15
- %1 = getelementptr inbounds %struct.Rect* %my_r0, i32 0, i32 0, !dbg !16 ; <%struct.Pt*> [#uses=1]
- %2 = getelementptr inbounds %struct.Pt* %1, i32 0, i32 0, !dbg !16 ; <double*> [#uses=1]
- %3 = load double* %2, align 8, !dbg !16 ; <double> [#uses=1]
+ call void @llvm.dbg.declare(metadata %struct.Rect* %my_r0, metadata !0, metadata !DIExpression()), !dbg !15
+ %1 = getelementptr inbounds %struct.Rect, %struct.Rect* %my_r0, i32 0, i32 0, !dbg !16 ; <%struct.Pt*> [#uses=1]
+ %2 = getelementptr inbounds %struct.Pt, %struct.Pt* %1, i32 0, i32 0, !dbg !16 ; <double*> [#uses=1]
+ %3 = load double, double* %2, align 8, !dbg !16 ; <double> [#uses=1]
store double %3, double* %0, align 8, !dbg !16
- %4 = load double* %0, align 8, !dbg !16 ; <double> [#uses=1]
+ %4 = load double, double* %0, align 8, !dbg !16 ; <double> [#uses=1]
store double %4, double* %retval, align 8, !dbg !16
br label %return, !dbg !16
return: ; preds = %entry
- %retval1 = load double* %retval, !dbg !16 ; <double> [#uses=1]
+ %retval1 = load double, double* %retval, !dbg !16 ; <double> [#uses=1]
ret double %retval1, !dbg !16
}
@@ -31,25 +31,25 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!21}
-!0 = !{!"0x101\00my_r0\0011\000", !1, !2, !7} ; [ DW_TAG_arg_variable ]
-!1 = !{!"0x2e\00foo\00foo\00foo\0011\000\001\000\006\000\000\0011", !19, !2, !4, null, double (%struct.Rect*)* @foo, null, null, null} ; [ DW_TAG_subprogram ]
-!2 = !{!"0x29", !19} ; [ DW_TAG_file_type ]
-!3 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\000\00\000\00\000", !19, !20, !20, !18, null, null} ; [ DW_TAG_compile_unit ]
-!4 = !{!"0x15\00\000\000\000\000\000\000", !19, !2, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "my_r0", line: 11, arg: 0, scope: !1, file: !2, type: !7)
+!1 = !DISubprogram(name: "foo", linkageName: "foo", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 11, file: !19, scope: !2, type: !4, function: double (%struct.Rect*)* @foo)
+!2 = !DIFile(filename: "b2.c", directory: "/tmp/")
+!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !19, enums: !20, retainedTypes: !20, subprograms: !18)
+!4 = !DISubroutineType(types: !5)
!5 = !{!6, !7}
-!6 = !{!"0x24\00double\000\0064\0064\000\000\004", !19, !2} ; [ DW_TAG_base_type ]
-!7 = !{!"0x13\00Rect\006\00256\0064\000\000\000", !19, !2, null, !8, null, null, null} ; [ DW_TAG_structure_type ] [Rect] [line 6, size 256, align 64, offset 0] [def] [from ]
+!6 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
+!7 = !DICompositeType(tag: DW_TAG_structure_type, name: "Rect", line: 6, size: 256, align: 64, file: !19, scope: !2, elements: !8)
!8 = !{!9, !14}
-!9 = !{!"0xd\00P1\007\00128\0064\000\000", !19, !7, !10} ; [ DW_TAG_member ]
-!10 = !{!"0x13\00Pt\001\00128\0064\000\000\000", !19, !2, null, !11, null, null, null} ; [ DW_TAG_structure_type ] [Pt] [line 1, size 128, align 64, offset 0] [def] [from ]
+!9 = !DIDerivedType(tag: DW_TAG_member, name: "P1", line: 7, size: 128, align: 64, file: !19, scope: !7, baseType: !10)
+!10 = !DICompositeType(tag: DW_TAG_structure_type, name: "Pt", line: 1, size: 128, align: 64, file: !19, scope: !2, elements: !11)
!11 = !{!12, !13}
-!12 = !{!"0xd\00x\002\0064\0064\000\000", !19, !10, !6} ; [ DW_TAG_member ]
-!13 = !{!"0xd\00y\003\0064\0064\0064\000", !19, !10, !6} ; [ DW_TAG_member ]
-!14 = !{!"0xd\00P2\008\00128\0064\00128\000", !19, !7, !10} ; [ DW_TAG_member ]
-!15 = !MDLocation(line: 11, scope: !1)
-!16 = !MDLocation(line: 12, scope: !17)
-!17 = !{!"0xb\0011\000\000", !19, !1} ; [ DW_TAG_lexical_block ]
+!12 = !DIDerivedType(tag: DW_TAG_member, name: "x", line: 2, size: 64, align: 64, file: !19, scope: !10, baseType: !6)
+!13 = !DIDerivedType(tag: DW_TAG_member, name: "y", line: 3, size: 64, align: 64, offset: 64, file: !19, scope: !10, baseType: !6)
+!14 = !DIDerivedType(tag: DW_TAG_member, name: "P2", line: 8, size: 128, align: 64, offset: 128, file: !19, scope: !7, baseType: !10)
+!15 = !DILocation(line: 11, scope: !1)
+!16 = !DILocation(line: 12, scope: !17)
+!17 = distinct !DILexicalBlock(line: 11, column: 0, file: !19, scope: !1)
!18 = !{!1}
-!19 = !{!"b2.c", !"/tmp/"}
-!20 = !{i32 0}
-!21 = !{i32 1, !"Debug Info Version", i32 2}
+!19 = !DIFile(filename: "b2.c", directory: "/tmp/")
+!20 = !{}
+!21 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/2010-01-19-OptExtBug.ll b/test/CodeGen/X86/2010-01-19-OptExtBug.ll
index ec24e73c34ac..def8dd31978b 100644
--- a/test/CodeGen/X86/2010-01-19-OptExtBug.ll
+++ b/test/CodeGen/X86/2010-01-19-OptExtBug.ll
@@ -21,7 +21,7 @@ bb7: ; preds = %bb6
unreachable
bb9: ; preds = %bb6
- %0 = load i8* undef, align 1 ; <i8> [#uses=3]
+ %0 = load i8, i8* undef, align 1 ; <i8> [#uses=3]
br i1 undef, label %bb12, label %bb10
bb10: ; preds = %bb9
diff --git a/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
index b85f1afea0cf..e65edac86ecc 100644
--- a/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
+++ b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
@@ -8,7 +8,7 @@
define i32 @"main(tart.core.String[])->int32"(i32 %args) {
entry:
- tail call void @llvm.dbg.value(metadata %tart.reflect.ComplexType* @.type.SwitchStmtTest, i64 0, metadata !8, metadata !{!"0x102"})
+ tail call void @llvm.dbg.value(metadata %tart.reflect.ComplexType* @.type.SwitchStmtTest, i64 0, metadata !8, metadata !DIExpression()), !dbg !DILocation(scope: !9)
tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
ret i32 3
}
@@ -16,20 +16,20 @@ entry:
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
-!0 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\000", !15, !16, !16, null, null, null} ; [ DW_TAG_compile_unit ]
-!1 = !{!"0x26\00\000\00192\0064\000\000", !15, !0, !2} ; [ DW_TAG_const_type ]
-!2 = !{!"0x13\00C\001\00192\0064\000\000\000", !15, !0, null, !3, null, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 192, align 64, offset 0] [def] [from ]
+!0 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !15, enums: !16, retainedTypes: !16)
+!1 = !DIDerivedType(tag: DW_TAG_const_type, size: 192, align: 64, file: !15, scope: !0, baseType: !2)
+!2 = !DICompositeType(tag: DW_TAG_structure_type, name: "C", line: 1, size: 192, align: 64, file: !15, scope: !0, elements: !3)
!3 = !{!4, !6, !7}
-!4 = !{!"0xd\00x\001\0064\0064\000\000", !15, !2, !5} ; [ DW_TAG_member ]
-!5 = !{!"0x24\00double\000\0064\0064\000\000\004", !15, !0} ; [ DW_TAG_base_type ]
-!6 = !{!"0xd\00y\001\0064\0064\0064\000", !15, !2, !5} ; [ DW_TAG_member ]
-!7 = !{!"0xd\00z\001\0064\0064\00128\000", !15, !2, !5} ; [ DW_TAG_member ]
-!8 = !{!"0x100\00t\005\000", !9, !0, !2} ; [ DW_TAG_auto_variable ]
-!9 = !{!"0xb\000\000\000", null, !10} ; [ DW_TAG_lexical_block ]
-!10 = !{!"0x2e\00foo\00foo\00foo\004\000\001\000\006\000\000\000", i32 0, !0, !11, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!11 = !{!"0x15\00\000\000\000\000\000\000", !15, !0, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !DIDerivedType(tag: DW_TAG_member, name: "x", line: 1, size: 64, align: 64, file: !15, scope: !2, baseType: !5)
+!5 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
+!6 = !DIDerivedType(tag: DW_TAG_member, name: "y", line: 1, size: 64, align: 64, offset: 64, file: !15, scope: !2, baseType: !5)
+!7 = !DIDerivedType(tag: DW_TAG_member, name: "z", line: 1, size: 64, align: 64, offset: 128, file: !15, scope: !2, baseType: !5)
+!8 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "t", line: 5, scope: !9, file: !0, type: !2)
+!9 = distinct !DILexicalBlock(line: 0, column: 0, file: null, scope: !10)
+!10 = !DISubprogram(name: "foo", linkageName: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !0, type: !11)
+!11 = !DISubroutineType(types: !12)
!12 = !{!13}
-!13 = !{!"0x24\00int\000\0032\0032\000\000\005", !15, !0} ; [ DW_TAG_base_type ]
+!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!14 = !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
-!15 = !{!"sm.c", !""}
+!15 = !DIFile(filename: "sm.c", directory: "")
!16 = !{i32 0}
diff --git a/test/CodeGen/X86/2010-02-04-SchedulerBug.ll b/test/CodeGen/X86/2010-02-04-SchedulerBug.ll
index c966e21d52df..51686ea3f775 100644
--- a/test/CodeGen/X86/2010-02-04-SchedulerBug.ll
+++ b/test/CodeGen/X86/2010-02-04-SchedulerBug.ll
@@ -6,13 +6,13 @@
define void @t(i32 %cNum, i64 %max) nounwind optsize ssp noimplicitfloat {
entry:
- %0 = load %struct.b_t** null, align 4 ; <%struct.b_t*> [#uses=1]
- %1 = getelementptr inbounds %struct.b_t* %0, i32 %cNum, i32 5 ; <i64*> [#uses=1]
- %2 = load i64* %1, align 4 ; <i64> [#uses=1]
+ %0 = load %struct.b_t*, %struct.b_t** null, align 4 ; <%struct.b_t*> [#uses=1]
+ %1 = getelementptr inbounds %struct.b_t, %struct.b_t* %0, i32 %cNum, i32 5 ; <i64*> [#uses=1]
+ %2 = load i64, i64* %1, align 4 ; <i64> [#uses=1]
%3 = icmp ult i64 %2, %max ; <i1> [#uses=1]
- %4 = getelementptr inbounds %struct.a_t* null, i32 0, i32 7 ; <i64**> [#uses=1]
- %5 = load i64** %4, align 4 ; <i64*> [#uses=0]
- %6 = load i64* null, align 4 ; <i64> [#uses=1]
+ %4 = getelementptr inbounds %struct.a_t, %struct.a_t* null, i32 0, i32 7 ; <i64**> [#uses=1]
+ %5 = load i64*, i64** %4, align 4 ; <i64*> [#uses=0]
+ %6 = load i64, i64* null, align 4 ; <i64> [#uses=1]
br i1 %3, label %bb2, label %bb
bb: ; preds = %entry
diff --git a/test/CodeGen/X86/2010-02-11-NonTemporal.ll b/test/CodeGen/X86/2010-02-11-NonTemporal.ll
index f9cca8c70c76..5d74db1160c5 100644
--- a/test/CodeGen/X86/2010-02-11-NonTemporal.ll
+++ b/test/CodeGen/X86/2010-02-11-NonTemporal.ll
@@ -11,8 +11,8 @@ define void @sub_(i32* noalias %n) {
%i = alloca i32, align 4
%"$LCS_0" = alloca i64, align 8
%"$LCS_S2" = alloca <2 x double>, align 16
- %r9 = load <2 x double>* %"$LCS_S2", align 8
- %r10 = load i64* %"$LCS_0", align 8
+ %r9 = load <2 x double>, <2 x double>* %"$LCS_S2", align 8
+ %r10 = load i64, i64* %"$LCS_0", align 8
%r11 = inttoptr i64 %r10 to <2 x double>*
store <2 x double> %r9, <2 x double>* %r11, align 16, !nontemporal !0
br label %"file movnt.f90, line 18, bb5"
diff --git a/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll b/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll
index 739a27a3e17c..193f8cfcd52b 100644
--- a/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll
+++ b/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll
@@ -228,7 +228,7 @@ entry:
unreachable
"67": ; preds = %"65"
- %1 = load i32* undef, align 4 ; <i32> [#uses=0]
+ %1 = load i32, i32* undef, align 4 ; <i32> [#uses=0]
br label %"100"
"82": ; preds = %"61", %"60", %"59"
diff --git a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
index 060c535dd778..c3b12edf4ceb 100644
--- a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
+++ b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
@@ -15,31 +15,31 @@
; Move return address from temporary register (%ebp) to new stack location (60(%esp))
; CHECK: movl [[REGISTER]], 60(%esp)
-%tupl_p = type [9 x i32]*
+%tupl = type [9 x i32]
declare fastcc void @l297(i32 %r10, i32 %r9, i32 %r8, i32 %r7, i32 %r6, i32 %r5, i32 %r3, i32 %r2) noreturn nounwind
declare fastcc void @l298(i32 %r10, i32 %r9, i32 %r4) noreturn nounwind
-define fastcc void @l186(%tupl_p %r1) noreturn nounwind {
+define fastcc void @l186(%tupl* %r1) noreturn nounwind {
entry:
- %ptr1 = getelementptr %tupl_p %r1, i32 0, i32 0
- %r2 = load i32* %ptr1
- %ptr3 = getelementptr %tupl_p %r1, i32 0, i32 1
- %r3 = load i32* %ptr3
- %ptr5 = getelementptr %tupl_p %r1, i32 0, i32 2
- %r4 = load i32* %ptr5
- %ptr7 = getelementptr %tupl_p %r1, i32 0, i32 3
- %r5 = load i32* %ptr7
- %ptr9 = getelementptr %tupl_p %r1, i32 0, i32 4
- %r6 = load i32* %ptr9
- %ptr11 = getelementptr %tupl_p %r1, i32 0, i32 5
- %r7 = load i32* %ptr11
- %ptr13 = getelementptr %tupl_p %r1, i32 0, i32 6
- %r8 = load i32* %ptr13
- %ptr15 = getelementptr %tupl_p %r1, i32 0, i32 7
- %r9 = load i32* %ptr15
- %ptr17 = getelementptr %tupl_p %r1, i32 0, i32 8
- %r10 = load i32* %ptr17
+ %ptr1 = getelementptr %tupl, %tupl* %r1, i32 0, i32 0
+ %r2 = load i32, i32* %ptr1
+ %ptr3 = getelementptr %tupl, %tupl* %r1, i32 0, i32 1
+ %r3 = load i32, i32* %ptr3
+ %ptr5 = getelementptr %tupl, %tupl* %r1, i32 0, i32 2
+ %r4 = load i32, i32* %ptr5
+ %ptr7 = getelementptr %tupl, %tupl* %r1, i32 0, i32 3
+ %r5 = load i32, i32* %ptr7
+ %ptr9 = getelementptr %tupl, %tupl* %r1, i32 0, i32 4
+ %r6 = load i32, i32* %ptr9
+ %ptr11 = getelementptr %tupl, %tupl* %r1, i32 0, i32 5
+ %r7 = load i32, i32* %ptr11
+ %ptr13 = getelementptr %tupl, %tupl* %r1, i32 0, i32 6
+ %r8 = load i32, i32* %ptr13
+ %ptr15 = getelementptr %tupl, %tupl* %r1, i32 0, i32 7
+ %r9 = load i32, i32* %ptr15
+ %ptr17 = getelementptr %tupl, %tupl* %r1, i32 0, i32 8
+ %r10 = load i32, i32* %ptr17
%cond = icmp eq i32 %r10, 3
br i1 %cond, label %true, label %false
diff --git a/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll b/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll
index 4a26ba088e5c..6fe31b6d1672 100644
--- a/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll
+++ b/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll
@@ -16,25 +16,25 @@ entry:
br i1 undef, label %for.end, label %for.body
for.body: ; preds = %if.end40, %entry
- %tmp6 = load i8* undef, align 2 ; <i8> [#uses=3]
+ %tmp6 = load i8, i8* undef, align 2 ; <i8> [#uses=3]
%conv11 = sext i8 %tmp6 to i64 ; <i64> [#uses=1]
%cmp15 = icmp slt i64 %conv11, undef ; <i1> [#uses=1]
br i1 %cmp15, label %if.end, label %if.then
if.then: ; preds = %for.body
%conv18 = sext i8 %tmp6 to i32 ; <i32> [#uses=1]
- %call = tail call i32 (...)* @invalid(i32 0, i32 0, i32 %conv18) nounwind ; <i32> [#uses=0]
+ %call = tail call i32 (...) @invalid(i32 0, i32 0, i32 %conv18) nounwind ; <i32> [#uses=0]
br label %if.end
if.end: ; preds = %if.then, %for.body
%index.0 = phi i8 [ 0, %if.then ], [ %tmp6, %for.body ] ; <i8> [#uses=1]
store i8 %index.0, i8* undef
- %tmp24 = load i8* undef ; <i8> [#uses=2]
+ %tmp24 = load i8, i8* undef ; <i8> [#uses=2]
br i1 undef, label %if.end40, label %if.then36
if.then36: ; preds = %if.end
%conv38 = sext i8 %tmp24 to i32 ; <i32> [#uses=1]
- %call39 = tail call i32 (...)* @invalid(i32 0, i32 0, i32 %conv38) nounwind ; <i32> [#uses=0]
+ %call39 = tail call i32 (...) @invalid(i32 0, i32 0, i32 %conv38) nounwind ; <i32> [#uses=0]
br label %if.end40
if.end40: ; preds = %if.then36, %if.end
diff --git a/test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll b/test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll
index 5de19662fffb..5a9ce90155d2 100644
--- a/test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll
+++ b/test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll
@@ -22,7 +22,7 @@ bb2.outer.i: ; preds = %bb9.i, %bb2
bb1.i: ; preds = %bb1.i, %bb2.outer.i
%indvar5.i = phi i64 [ %tmp, %bb1.i ], [ 0, %bb2.outer.i ] ; <i64> [#uses=1]
%tmp = add i64 %indvar5.i, 1 ; <i64> [#uses=2]
- %scevgep.i = getelementptr double* undef, i64 %tmp ; <double*> [#uses=0]
+ %scevgep.i = getelementptr double, double* undef, i64 %tmp ; <double*> [#uses=0]
br i1 undef, label %bb1.i, label %bb5.preheader.i
bb5.preheader.i: ; preds = %bb1.i, %bb2.outer.i
diff --git a/test/CodeGen/X86/2010-03-17-ISelBug.ll b/test/CodeGen/X86/2010-03-17-ISelBug.ll
index ba21902f7d0a..e1d3c10a80ea 100644
--- a/test/CodeGen/X86/2010-03-17-ISelBug.ll
+++ b/test/CodeGen/X86/2010-03-17-ISelBug.ll
@@ -9,7 +9,7 @@
define i32* @t() align 2 nounwind {
entry:
%operation = alloca %struct.PPOperation, align 8 ; <%struct.PPOperation*> [#uses=2]
- %0 = load i32*** null, align 4 ; [#uses=1]
+ %0 = load i32**, i32*** null, align 4 ; [#uses=1]
%1 = ptrtoint i32** %0 to i32 ; <i32> [#uses=1]
%2 = sub nsw i32 %1, undef ; <i32> [#uses=2]
br i1 false, label %bb20, label %bb.nph380
@@ -18,10 +18,10 @@ bb20: ; preds = %entry
ret i32* null
bb.nph380: ; preds = %entry
- %scevgep403 = getelementptr %struct.PPOperation* %operation, i32 0, i32 1, i32 0, i32 2 ; <i32*> [#uses=1]
+ %scevgep403 = getelementptr %struct.PPOperation, %struct.PPOperation* %operation, i32 0, i32 1, i32 0, i32 2 ; <i32*> [#uses=1]
%3 = ashr i32 %2, 1 ; <i32> [#uses=1]
%tmp405 = and i32 %3, -2 ; <i32> [#uses=1]
- %scevgep408 = getelementptr %struct.PPOperation* %operation, i32 0, i32 1, i32 0, i32 1 ; <i16*> [#uses=1]
+ %scevgep408 = getelementptr %struct.PPOperation, %struct.PPOperation* %operation, i32 0, i32 1, i32 0, i32 1 ; <i16*> [#uses=1]
%tmp410 = and i32 %2, -4 ; <i32> [#uses=1]
br label %bb169
@@ -29,10 +29,10 @@ bb169: ; preds = %bb169, %bb.nph380
%index.6379 = phi i32 [ 0, %bb.nph380 ], [ %4, %bb169 ] ; <i32> [#uses=3]
%tmp404 = mul i32 %index.6379, -2 ; <i32> [#uses=1]
%tmp406 = add i32 %tmp405, %tmp404 ; <i32> [#uses=1]
- %scevgep407 = getelementptr i32* %scevgep403, i32 %tmp406 ; <i32*> [#uses=1]
+ %scevgep407 = getelementptr i32, i32* %scevgep403, i32 %tmp406 ; <i32*> [#uses=1]
%tmp409 = mul i32 %index.6379, -4 ; <i32> [#uses=1]
%tmp411 = add i32 %tmp410, %tmp409 ; <i32> [#uses=1]
- %scevgep412 = getelementptr i16* %scevgep408, i32 %tmp411 ; <i16*> [#uses=1]
+ %scevgep412 = getelementptr i16, i16* %scevgep408, i32 %tmp411 ; <i16*> [#uses=1]
store i16 undef, i16* %scevgep412, align 2
store i32 undef, i32* %scevgep407, align 4
%4 = add nsw i32 %index.6379, 1 ; <i32> [#uses=1]
@@ -50,18 +50,18 @@ entry:
for.body261.i: ; preds = %for.body261.i, %for.body190
%line.3300.i = phi i32 [ undef, %for.body190 ], [ %add292.i, %for.body261.i ] ; <i32> [#uses=3]
%conv268.i = and i32 %line.3300.i, 255 ; <i32> [#uses=1]
- %tmp278.i = getelementptr [2 x [256 x %struct.bufBit_s]]* %colourLines, i32 undef, i32 %pen.1100, i32 %conv268.i, i32 0 ; <i8**> [#uses=1]
+ %tmp278.i = getelementptr [2 x [256 x %struct.bufBit_s]], [2 x [256 x %struct.bufBit_s]]* %colourLines, i32 undef, i32 %pen.1100, i32 %conv268.i, i32 0 ; <i8**> [#uses=1]
store i8* undef, i8** %tmp278.i
%tmp338 = shl i32 %line.3300.i, 3 ; <i32> [#uses=1]
%tmp339 = and i32 %tmp338, 2040 ; <i32> [#uses=1]
- %tmp285.i = getelementptr i8* %scevgep328, i32 %tmp339 ; <i8*> [#uses=1]
+ %tmp285.i = getelementptr i8, i8* %scevgep328, i32 %tmp339 ; <i8*> [#uses=1]
store i8 undef, i8* %tmp285.i
%add292.i = add nsw i32 0, %line.3300.i ; <i32> [#uses=1]
br i1 undef, label %for.body190, label %for.body261.i
for.body190: ; preds = %for.body261.i, %for.body190, %bb.nph104
%pen.1100 = phi i32 [ 0, %entry ], [ %inc230, %for.body261.i ], [ %inc230, %for.body190 ] ; <i32> [#uses=3]
- %scevgep328 = getelementptr [2 x [256 x %struct.bufBit_s]]* %colourLines, i32 undef, i32 %pen.1100, i32 0, i32 1 ; <i8*> [#uses=1]
+ %scevgep328 = getelementptr [2 x [256 x %struct.bufBit_s]], [2 x [256 x %struct.bufBit_s]]* %colourLines, i32 undef, i32 %pen.1100, i32 0, i32 1 ; <i8*> [#uses=1]
%inc230 = add i32 %pen.1100, 1 ; <i32> [#uses=2]
br i1 undef, label %for.body190, label %for.body261.i
}
diff --git a/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll b/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll
index 864ebf120f6f..2ba4d9aaded8 100644
--- a/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll
+++ b/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll
@@ -19,7 +19,7 @@ invcont64: ; preds = %bb58
br i1 undef, label %invcont65, label %bb.i.i
bb.i.i: ; preds = %invcont64
- %1 = load <4 x float>* undef, align 16 ; <<4 x float>> [#uses=5]
+ %1 = load <4 x float>, <4 x float>* undef, align 16 ; <<4 x float>> [#uses=5]
br i1 undef, label %bb.nph.i.i, label %invcont65
bb.nph.i.i: ; preds = %bb.i.i
diff --git a/test/CodeGen/X86/2010-04-08-CoalescerBug.ll b/test/CodeGen/X86/2010-04-08-CoalescerBug.ll
index 5e86ecf42b11..5adf99e3e47b 100644
--- a/test/CodeGen/X86/2010-04-08-CoalescerBug.ll
+++ b/test/CodeGen/X86/2010-04-08-CoalescerBug.ll
@@ -14,9 +14,9 @@ entry:
; CHECK-LABEL: t:
; CHECK: addq $12, %rsi
%BitValueArray = alloca [32 x i32], align 4
- %tmp2 = getelementptr inbounds %struct.F* %this, i64 0, i32 0
- %tmp3 = load %struct.FC** %tmp2, align 8
- %tmp4 = getelementptr inbounds %struct.FC* %tmp3, i64 0, i32 1, i64 0
+ %tmp2 = getelementptr inbounds %struct.F, %struct.F* %this, i64 0, i32 0
+ %tmp3 = load %struct.FC*, %struct.FC** %tmp2, align 8
+ %tmp4 = getelementptr inbounds %struct.FC, %struct.FC* %tmp3, i64 0, i32 1, i64 0
%tmp5 = bitcast [32 x i32]* %BitValueArray to i8*
%tmp6 = bitcast i32* %tmp4 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp5, i8* %tmp6, i64 128, i32 4, i1 false)
diff --git a/test/CodeGen/X86/2010-04-13-AnalyzeBranchCrash.ll b/test/CodeGen/X86/2010-04-13-AnalyzeBranchCrash.ll
index fadbd2191989..6c8dbbe5eff5 100644
--- a/test/CodeGen/X86/2010-04-13-AnalyzeBranchCrash.ll
+++ b/test/CodeGen/X86/2010-04-13-AnalyzeBranchCrash.ll
@@ -12,7 +12,7 @@ entry:
]
if.then: ; preds = %entry, %entry
- %tmp69 = load float* null, align 4 ; <float> [#uses=1]
+ %tmp69 = load float, float* null, align 4 ; <float> [#uses=1]
%cmp19 = icmp eq %1* null, %scroller ; <i1> [#uses=2]
%cond = select i1 %cmp19, float %tmp69, float 0.000000e+00 ; <float> [#uses=1]
%call36 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*)*)(i8* undef, i8* undef) nounwind optsize ; <i64> [#uses=2]
diff --git a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
deleted file mode 100644
index 60025bfcdc81..000000000000
--- a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
+++ /dev/null
@@ -1,100 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s
-; There are no MMX operations here, so we use XMM or i64.
-
-; CHECK: ti8
-define void @ti8(double %a, double %b) nounwind {
-entry:
- %tmp1 = bitcast double %a to <8 x i8>
- %tmp2 = bitcast double %b to <8 x i8>
- %tmp3 = add <8 x i8> %tmp1, %tmp2
-; CHECK: paddb
- store <8 x i8> %tmp3, <8 x i8>* null
- ret void
-}
-
-; CHECK: ti16
-define void @ti16(double %a, double %b) nounwind {
-entry:
- %tmp1 = bitcast double %a to <4 x i16>
- %tmp2 = bitcast double %b to <4 x i16>
- %tmp3 = add <4 x i16> %tmp1, %tmp2
-; CHECK: paddw
- store <4 x i16> %tmp3, <4 x i16>* null
- ret void
-}
-
-; CHECK: ti32
-define void @ti32(double %a, double %b) nounwind {
-entry:
- %tmp1 = bitcast double %a to <2 x i32>
- %tmp2 = bitcast double %b to <2 x i32>
- %tmp3 = add <2 x i32> %tmp1, %tmp2
-; CHECK: paddd
- store <2 x i32> %tmp3, <2 x i32>* null
- ret void
-}
-
-; CHECK: ti64
-define void @ti64(double %a, double %b) nounwind {
-entry:
- %tmp1 = bitcast double %a to <1 x i64>
- %tmp2 = bitcast double %b to <1 x i64>
- %tmp3 = add <1 x i64> %tmp1, %tmp2
-; CHECK: addq
- store <1 x i64> %tmp3, <1 x i64>* null
- ret void
-}
-
-; MMX intrinsics calls get us MMX instructions.
-; CHECK: ti8a
-define void @ti8a(double %a, double %b) nounwind {
-entry:
- %tmp1 = bitcast double %a to x86_mmx
-; CHECK: movdq2q
- %tmp2 = bitcast double %b to x86_mmx
-; CHECK: movdq2q
- %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %tmp1, x86_mmx %tmp2)
- store x86_mmx %tmp3, x86_mmx* null
- ret void
-}
-
-; CHECK: ti16a
-define void @ti16a(double %a, double %b) nounwind {
-entry:
- %tmp1 = bitcast double %a to x86_mmx
-; CHECK: movdq2q
- %tmp2 = bitcast double %b to x86_mmx
-; CHECK: movdq2q
- %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %tmp1, x86_mmx %tmp2)
- store x86_mmx %tmp3, x86_mmx* null
- ret void
-}
-
-; CHECK: ti32a
-define void @ti32a(double %a, double %b) nounwind {
-entry:
- %tmp1 = bitcast double %a to x86_mmx
-; CHECK: movdq2q
- %tmp2 = bitcast double %b to x86_mmx
-; CHECK: movdq2q
- %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %tmp1, x86_mmx %tmp2)
- store x86_mmx %tmp3, x86_mmx* null
- ret void
-}
-
-; CHECK: ti64a
-define void @ti64a(double %a, double %b) nounwind {
-entry:
- %tmp1 = bitcast double %a to x86_mmx
-; CHECK: movdq2q
- %tmp2 = bitcast double %b to x86_mmx
-; CHECK: movdq2q
- %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %tmp1, x86_mmx %tmp2)
- store x86_mmx %tmp3, x86_mmx* null
- ret void
-}
-
-declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
-declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
-declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
-declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
diff --git a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
index cbf5502e1c05..4711d5274675 100644
--- a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
+++ b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
@@ -47,9 +47,9 @@ try.handler: ; preds = %entry
match: ; preds = %try.handler
%4 = call i8* @__cxa_begin_catch(i8* %exc1) ; <i8*> [#uses=1]
%5 = bitcast i8* %4 to i32* ; <i32*> [#uses=1]
- %6 = load i32* %5 ; <i32> [#uses=1]
+ %6 = load i32, i32* %5 ; <i32> [#uses=1]
store i32 %6, i32* %0
- %call = invoke i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), %struct.S* %s2)
+ %call = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), %struct.S* %s2)
to label %invoke.cont2 unwind label %match.handler ; <i32> [#uses=0]
invoke.cont2: ; preds = %match
@@ -80,7 +80,7 @@ invoke.cont5: ; preds = %match.end
br label %cleanup.switch
cleanup.switch: ; preds = %invoke.cont5
- %tmp = load i32* %cleanup.dst ; <i32> [#uses=1]
+ %tmp = load i32, i32* %cleanup.dst ; <i32> [#uses=1]
switch i32 %tmp, label %cleanup.end [
i32 1, label %cleanup.pad
i32 2, label %cleanup.pad4
@@ -99,7 +99,7 @@ finally: ; preds = %catch.next, %cleanu
br label %cleanup.switch9
cleanup.switch9: ; preds = %finally
- %tmp8 = load i32* %cleanup.dst7 ; <i32> [#uses=1]
+ %tmp8 = load i32, i32* %cleanup.dst7 ; <i32> [#uses=1]
switch i32 %tmp8, label %cleanup.end10 [
i32 1, label %finally.end
i32 2, label %finally.throw
@@ -109,17 +109,17 @@ cleanup.end10: ; preds = %cleanup.switch9
br label %finally.end
finally.throw: ; preds = %cleanup.switch9
- %8 = load i8** %_rethrow ; <i8*> [#uses=1]
+ %8 = load i8*, i8** %_rethrow ; <i8*> [#uses=1]
call void @_Unwind_Resume_or_Rethrow(i8* %8)
unreachable
finally.end: ; preds = %cleanup.end10, %cleanup.switch9
- %tmp11 = getelementptr inbounds %struct.S* %s1, i32 0, i32 0 ; <[2 x i8*]*> [#uses=1]
- %arraydecay = getelementptr inbounds [2 x i8*]* %tmp11, i32 0, i32 0 ; <i8**> [#uses=1]
- %arrayidx = getelementptr inbounds i8** %arraydecay, i32 1 ; <i8**> [#uses=1]
- %tmp12 = load i8** %arrayidx ; <i8*> [#uses=1]
+ %tmp11 = getelementptr inbounds %struct.S, %struct.S* %s1, i32 0, i32 0 ; <[2 x i8*]*> [#uses=1]
+ %arraydecay = getelementptr inbounds [2 x i8*], [2 x i8*]* %tmp11, i32 0, i32 0 ; <i8**> [#uses=1]
+ %arrayidx = getelementptr inbounds i8*, i8** %arraydecay, i32 1 ; <i8**> [#uses=1]
+ %tmp12 = load i8*, i8** %arrayidx ; <i8*> [#uses=1]
store i8* %tmp12, i8** %retval
- %9 = load i8** %retval ; <i8*> [#uses=1]
+ %9 = load i8*, i8** %retval ; <i8*> [#uses=1]
ret i8* %9
}
diff --git a/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll b/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll
index 86be390b8228..5a9c02109343 100644
--- a/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll
+++ b/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll
@@ -23,9 +23,9 @@ entry:
store i8* %asmresult, i8** %ret
store i8* %asmresult1, i8** %p
store i32 %asmresult2, i32* %t
- %tmp = load i8** %ret ; <i8*> [#uses=1]
+ %tmp = load i8*, i8** %ret ; <i8*> [#uses=1]
store i8* %tmp, i8** %retval
- %1 = load i8** %retval ; <i8*> [#uses=1]
+ %1 = load i8*, i8** %retval ; <i8*> [#uses=1]
ret i8* %1
}
diff --git a/test/CodeGen/X86/2010-05-07-ldconvert.ll b/test/CodeGen/X86/2010-05-07-ldconvert.ll
index 0ba6a8fd6d71..a0c3c95ef607 100644
--- a/test/CodeGen/X86/2010-05-07-ldconvert.ll
+++ b/test/CodeGen/X86/2010-05-07-ldconvert.ll
@@ -9,7 +9,7 @@ entry:
%tmp = call x86_fp80 @llvm.powi.f80(x86_fp80 0xK3FFF8000000000000000, i32 -64) ; <x86_fp80> [#uses=1]
%conv = fptosi x86_fp80 %tmp to i32 ; <i32> [#uses=1]
store i32 %conv, i32* %r
- %tmp1 = load i32* %r ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* %r ; <i32> [#uses=1]
%tobool = icmp ne i32 %tmp1, 0 ; <i1> [#uses=1]
br i1 %tobool, label %if.then, label %if.end
@@ -18,7 +18,7 @@ if.then: ; preds = %entry
br label %if.end
if.end: ; preds = %if.then, %entry
- %0 = load i32* %retval ; <i32> [#uses=1]
+ %0 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %0
}
diff --git a/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll b/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll
index e719da304c53..a6fe310bffa2 100644
--- a/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll
+++ b/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll
@@ -4,7 +4,7 @@
define i32 @CXB30130(i32 %num1, i16* nocapture %num2, float* nocapture %num3, double* nocapture %num4) nounwind ssp {
entry:
- %0 = load i16* %num2, align 2 ; <i16> [#uses=2]
+ %0 = load i16, i16* %num2, align 2 ; <i16> [#uses=2]
%1 = mul nsw i16 %0, %0 ; <i16> [#uses=1]
store i16 %1, i16* %num2, align 2
ret i32 undef
diff --git a/test/CodeGen/X86/2010-05-16-nosseconversion.ll b/test/CodeGen/X86/2010-05-16-nosseconversion.ll
index 889575cea3ae..2d3f0eb1c5f2 100644
--- a/test/CodeGen/X86/2010-05-16-nosseconversion.ll
+++ b/test/CodeGen/X86/2010-05-16-nosseconversion.ll
@@ -5,7 +5,7 @@
define i32 @foo() nounwind readonly ssp {
entry:
- %0 = load i64* @x, align 8 ; <i64> [#uses=1]
+ %0 = load i64, i64* @x, align 8 ; <i64> [#uses=1]
%1 = uitofp i64 %0 to double ; <double> [#uses=1]
%2 = fptosi double %1 to i32 ; <i32> [#uses=1]
ret i32 %2
diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
index 0d30a3f88eb9..3b99e91915f0 100644
--- a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -2,18 +2,17 @@
; RUN: llc -mtriple=x86_64-pc-linux -O2 -regalloc=basic < %s | FileCheck %s
; Test to check .debug_loc support. This test case emits many debug_loc entries.
-; CHECK: .short {{.*}} # Loc expr size
-; CHECK-NEXT: .Ltmp
+; CHECK: .short 1 # Loc expr size
; CHECK-NEXT: DW_OP_reg
%0 = type { double }
define hidden %0 @__divsc3(float %a, float %b, float %c, float %d) nounwind readnone {
entry:
- tail call void @llvm.dbg.value(metadata float %a, i64 0, metadata !0, metadata !{!"0x102"})
- tail call void @llvm.dbg.value(metadata float %b, i64 0, metadata !11, metadata !{!"0x102"})
- tail call void @llvm.dbg.value(metadata float %c, i64 0, metadata !12, metadata !{!"0x102"})
- tail call void @llvm.dbg.value(metadata float %d, i64 0, metadata !13, metadata !{!"0x102"})
+ tail call void @llvm.dbg.value(metadata float %a, i64 0, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !1)
+ tail call void @llvm.dbg.value(metadata float %b, i64 0, metadata !11, metadata !DIExpression()), !dbg !DILocation(scope: !1)
+ tail call void @llvm.dbg.value(metadata float %c, i64 0, metadata !12, metadata !DIExpression()), !dbg !DILocation(scope: !1)
+ tail call void @llvm.dbg.value(metadata float %d, i64 0, metadata !13, metadata !DIExpression()), !dbg !DILocation(scope: !1)
%0 = tail call float @fabsf(float %c) nounwind readnone, !dbg !19 ; <float> [#uses=1]
%1 = tail call float @fabsf(float %d) nounwind readnone, !dbg !19 ; <float> [#uses=1]
%2 = fcmp olt float %0, %1, !dbg !19 ; <i1> [#uses=1]
@@ -21,34 +20,34 @@ entry:
bb: ; preds = %entry
%3 = fdiv float %c, %d, !dbg !20 ; <float> [#uses=3]
- tail call void @llvm.dbg.value(metadata float %3, i64 0, metadata !16, metadata !{!"0x102"}), !dbg !20
+ tail call void @llvm.dbg.value(metadata float %3, i64 0, metadata !16, metadata !DIExpression()), !dbg !20
%4 = fmul float %3, %c, !dbg !21 ; <float> [#uses=1]
%5 = fadd float %4, %d, !dbg !21 ; <float> [#uses=2]
- tail call void @llvm.dbg.value(metadata float %5, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !21
+ tail call void @llvm.dbg.value(metadata float %5, i64 0, metadata !14, metadata !DIExpression()), !dbg !21
%6 = fmul float %3, %a, !dbg !22 ; <float> [#uses=1]
%7 = fadd float %6, %b, !dbg !22 ; <float> [#uses=1]
%8 = fdiv float %7, %5, !dbg !22 ; <float> [#uses=1]
- tail call void @llvm.dbg.value(metadata float %8, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !22
+ tail call void @llvm.dbg.value(metadata float %8, i64 0, metadata !17, metadata !DIExpression()), !dbg !22
%9 = fmul float %3, %b, !dbg !23 ; <float> [#uses=1]
%10 = fsub float %9, %a, !dbg !23 ; <float> [#uses=1]
%11 = fdiv float %10, %5, !dbg !23 ; <float> [#uses=1]
- tail call void @llvm.dbg.value(metadata float %11, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !23
+ tail call void @llvm.dbg.value(metadata float %11, i64 0, metadata !18, metadata !DIExpression()), !dbg !23
br label %bb2, !dbg !23
bb1: ; preds = %entry
%12 = fdiv float %d, %c, !dbg !24 ; <float> [#uses=3]
- tail call void @llvm.dbg.value(metadata float %12, i64 0, metadata !16, metadata !{!"0x102"}), !dbg !24
+ tail call void @llvm.dbg.value(metadata float %12, i64 0, metadata !16, metadata !DIExpression()), !dbg !24
%13 = fmul float %12, %d, !dbg !25 ; <float> [#uses=1]
%14 = fadd float %13, %c, !dbg !25 ; <float> [#uses=2]
- tail call void @llvm.dbg.value(metadata float %14, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !25
+ tail call void @llvm.dbg.value(metadata float %14, i64 0, metadata !14, metadata !DIExpression()), !dbg !25
%15 = fmul float %12, %b, !dbg !26 ; <float> [#uses=1]
%16 = fadd float %15, %a, !dbg !26 ; <float> [#uses=1]
%17 = fdiv float %16, %14, !dbg !26 ; <float> [#uses=1]
- tail call void @llvm.dbg.value(metadata float %17, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !26
+ tail call void @llvm.dbg.value(metadata float %17, i64 0, metadata !17, metadata !DIExpression()), !dbg !26
%18 = fmul float %12, %a, !dbg !27 ; <float> [#uses=1]
%19 = fsub float %b, %18, !dbg !27 ; <float> [#uses=1]
%20 = fdiv float %19, %14, !dbg !27 ; <float> [#uses=1]
- tail call void @llvm.dbg.value(metadata float %20, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !27
+ tail call void @llvm.dbg.value(metadata float %20, i64 0, metadata !18, metadata !DIExpression()), !dbg !27
br label %bb2, !dbg !27
bb2: ; preds = %bb1, %bb
@@ -74,9 +73,9 @@ bb6: ; preds = %bb4
bb8: ; preds = %bb6
%27 = tail call float @copysignf(float 0x7FF0000000000000, float %c) nounwind readnone, !dbg !30 ; <float> [#uses=2]
%28 = fmul float %27, %a, !dbg !30 ; <float> [#uses=1]
- tail call void @llvm.dbg.value(metadata float %28, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !30
+ tail call void @llvm.dbg.value(metadata float %28, i64 0, metadata !17, metadata !DIExpression()), !dbg !30
%29 = fmul float %27, %b, !dbg !31 ; <float> [#uses=1]
- tail call void @llvm.dbg.value(metadata float %29, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !31
+ tail call void @llvm.dbg.value(metadata float %29, i64 0, metadata !18, metadata !DIExpression()), !dbg !31
br label %bb46, !dbg !31
bb9: ; preds = %bb6, %bb4
@@ -106,24 +105,24 @@ bb15: ; preds = %bb14
bb16: ; preds = %bb15
%iftmp.0.0 = select i1 %33, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
%42 = tail call float @copysignf(float %iftmp.0.0, float %a) nounwind readnone, !dbg !33 ; <float> [#uses=2]
- tail call void @llvm.dbg.value(metadata float %42, i64 0, metadata !0, metadata !{!"0x102"}), !dbg !33
+ tail call void @llvm.dbg.value(metadata float %42, i64 0, metadata !0, metadata !DIExpression()), !dbg !33
%43 = fcmp ord float %b, 0.000000e+00 ; <i1> [#uses=1]
%44 = fsub float %b, %b, !dbg !34 ; <float> [#uses=1]
%45 = fcmp uno float %44, 0.000000e+00 ; <i1> [#uses=1]
%46 = and i1 %43, %45, !dbg !34 ; <i1> [#uses=1]
%iftmp.1.0 = select i1 %46, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
%47 = tail call float @copysignf(float %iftmp.1.0, float %b) nounwind readnone, !dbg !34 ; <float> [#uses=2]
- tail call void @llvm.dbg.value(metadata float %47, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !34
+ tail call void @llvm.dbg.value(metadata float %47, i64 0, metadata !11, metadata !DIExpression()), !dbg !34
%48 = fmul float %42, %c, !dbg !35 ; <float> [#uses=1]
%49 = fmul float %47, %d, !dbg !35 ; <float> [#uses=1]
%50 = fadd float %48, %49, !dbg !35 ; <float> [#uses=1]
%51 = fmul float %50, 0x7FF0000000000000, !dbg !35 ; <float> [#uses=1]
- tail call void @llvm.dbg.value(metadata float %51, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !35
+ tail call void @llvm.dbg.value(metadata float %51, i64 0, metadata !17, metadata !DIExpression()), !dbg !35
%52 = fmul float %47, %c, !dbg !36 ; <float> [#uses=1]
%53 = fmul float %42, %d, !dbg !36 ; <float> [#uses=1]
%54 = fsub float %52, %53, !dbg !36 ; <float> [#uses=1]
%55 = fmul float %54, 0x7FF0000000000000, !dbg !36 ; <float> [#uses=1]
- tail call void @llvm.dbg.value(metadata float %55, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !36
+ tail call void @llvm.dbg.value(metadata float %55, i64 0, metadata !18, metadata !DIExpression()), !dbg !36
br label %bb46, !dbg !36
bb27: ; preds = %bb15, %bb14, %bb11
@@ -154,24 +153,24 @@ bb34: ; preds = %bb33, %bb30
bb35: ; preds = %bb34
%iftmp.2.0 = select i1 %59, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
%67 = tail call float @copysignf(float %iftmp.2.0, float %c) nounwind readnone, !dbg !38 ; <float> [#uses=2]
- tail call void @llvm.dbg.value(metadata float %67, i64 0, metadata !12, metadata !{!"0x102"}), !dbg !38
+ tail call void @llvm.dbg.value(metadata float %67, i64 0, metadata !12, metadata !DIExpression()), !dbg !38
%68 = fcmp ord float %d, 0.000000e+00 ; <i1> [#uses=1]
%69 = fsub float %d, %d, !dbg !39 ; <float> [#uses=1]
%70 = fcmp uno float %69, 0.000000e+00 ; <i1> [#uses=1]
%71 = and i1 %68, %70, !dbg !39 ; <i1> [#uses=1]
%iftmp.3.0 = select i1 %71, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
%72 = tail call float @copysignf(float %iftmp.3.0, float %d) nounwind readnone, !dbg !39 ; <float> [#uses=2]
- tail call void @llvm.dbg.value(metadata float %72, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !39
+ tail call void @llvm.dbg.value(metadata float %72, i64 0, metadata !13, metadata !DIExpression()), !dbg !39
%73 = fmul float %67, %a, !dbg !40 ; <float> [#uses=1]
%74 = fmul float %72, %b, !dbg !40 ; <float> [#uses=1]
%75 = fadd float %73, %74, !dbg !40 ; <float> [#uses=1]
%76 = fmul float %75, 0.000000e+00, !dbg !40 ; <float> [#uses=1]
- tail call void @llvm.dbg.value(metadata float %76, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !40
+ tail call void @llvm.dbg.value(metadata float %76, i64 0, metadata !17, metadata !DIExpression()), !dbg !40
%77 = fmul float %67, %b, !dbg !41 ; <float> [#uses=1]
%78 = fmul float %72, %a, !dbg !41 ; <float> [#uses=1]
%79 = fsub float %77, %78, !dbg !41 ; <float> [#uses=1]
%80 = fmul float %79, 0.000000e+00, !dbg !41 ; <float> [#uses=1]
- tail call void @llvm.dbg.value(metadata float %80, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !41
+ tail call void @llvm.dbg.value(metadata float %80, i64 0, metadata !18, metadata !DIExpression()), !dbg !41
br label %bb46, !dbg !41
bb46: ; preds = %bb35, %bb34, %bb33, %bb30, %bb16, %bb8, %bb2
@@ -200,52 +199,52 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!48}
-!0 = !{!"0x101\00a\001921\000", !1, !2, !9} ; [ DW_TAG_arg_variable ]
-!1 = !{!"0x2e\00__divsc3\00__divsc3\00__divsc3\001922\000\001\000\006\000\001\001922", !45, !2, !4, null, %0 (float, float, float, float)* @__divsc3, null, null, !43} ; [ DW_TAG_subprogram ]
-!2 = !{!"0x29", !45} ; [ DW_TAG_file_type ]
-!3 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\001", !45, !47, !47, !44, null, null} ; [ DW_TAG_compile_unit ]
-!4 = !{!"0x15\00\000\000\000\000\000\000", !45, !2, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1921, arg: 0, scope: !1, file: !2, type: !9)
+!1 = !DISubprogram(name: "__divsc3", linkageName: "__divsc3", line: 1922, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 1922, file: !45, scope: !2, type: !4, function: %0 (float, float, float, float)* @__divsc3, variables: !43)
+!2 = !DIFile(filename: "libgcc2.c", directory: "/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc")
+!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !45, enums: !47, retainedTypes: !47, subprograms: !44, imports: null)
+!4 = !DISubroutineType(types: !5)
!5 = !{!6, !9, !9, !9, !9}
-!6 = !{!"0x16\00SCtype\00170\000\000\000\000", !46, !7, !8} ; [ DW_TAG_typedef ]
-!7 = !{!"0x29", !46} ; [ DW_TAG_file_type ]
-!8 = !{!"0x24\00complex float\000\0064\0032\000\000\003", !45, !2} ; [ DW_TAG_base_type ]
-!9 = !{!"0x16\00SFtype\00167\000\000\000\000", !46, !7, !10} ; [ DW_TAG_typedef ]
-!10 = !{!"0x24\00float\000\0032\0032\000\000\004", !45, !2} ; [ DW_TAG_base_type ]
-!11 = !{!"0x101\00b\001921\000", !1, !2, !9} ; [ DW_TAG_arg_variable ]
-!12 = !{!"0x101\00c\001921\000", !1, !2, !9} ; [ DW_TAG_arg_variable ]
-!13 = !{!"0x101\00d\001921\000", !1, !2, !9} ; [ DW_TAG_arg_variable ]
-!14 = !{!"0x100\00denom\001923\000", !15, !2, !9} ; [ DW_TAG_auto_variable ]
-!15 = !{!"0xb\001922\000\000", !45, !1} ; [ DW_TAG_lexical_block ]
-!16 = !{!"0x100\00ratio\001923\000", !15, !2, !9} ; [ DW_TAG_auto_variable ]
-!17 = !{!"0x100\00x\001923\000", !15, !2, !9} ; [ DW_TAG_auto_variable ]
-!18 = !{!"0x100\00y\001923\000", !15, !2, !9} ; [ DW_TAG_auto_variable ]
-!19 = !MDLocation(line: 1929, scope: !15)
-!20 = !MDLocation(line: 1931, scope: !15)
-!21 = !MDLocation(line: 1932, scope: !15)
-!22 = !MDLocation(line: 1933, scope: !15)
-!23 = !MDLocation(line: 1934, scope: !15)
-!24 = !MDLocation(line: 1938, scope: !15)
-!25 = !MDLocation(line: 1939, scope: !15)
-!26 = !MDLocation(line: 1940, scope: !15)
-!27 = !MDLocation(line: 1941, scope: !15)
-!28 = !MDLocation(line: 1946, scope: !15)
-!29 = !MDLocation(line: 1948, scope: !15)
-!30 = !MDLocation(line: 1950, scope: !15)
-!31 = !MDLocation(line: 1951, scope: !15)
-!32 = !MDLocation(line: 1953, scope: !15)
-!33 = !MDLocation(line: 1955, scope: !15)
-!34 = !MDLocation(line: 1956, scope: !15)
-!35 = !MDLocation(line: 1957, scope: !15)
-!36 = !MDLocation(line: 1958, scope: !15)
-!37 = !MDLocation(line: 1960, scope: !15)
-!38 = !MDLocation(line: 1962, scope: !15)
-!39 = !MDLocation(line: 1963, scope: !15)
-!40 = !MDLocation(line: 1964, scope: !15)
-!41 = !MDLocation(line: 1965, scope: !15)
-!42 = !MDLocation(line: 1969, scope: !15)
+!6 = !DIDerivedType(tag: DW_TAG_typedef, name: "SCtype", line: 170, file: !46, scope: !7, baseType: !8)
+!7 = !DIFile(filename: "libgcc2.h", directory: "/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc")
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "complex float", size: 64, align: 32, encoding: DW_ATE_complex_float)
+!9 = !DIDerivedType(tag: DW_TAG_typedef, name: "SFtype", line: 167, file: !46, scope: !7, baseType: !10)
+!10 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
+!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 1921, arg: 0, scope: !1, file: !2, type: !9)
+!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 1921, arg: 0, scope: !1, file: !2, type: !9)
+!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "d", line: 1921, arg: 0, scope: !1, file: !2, type: !9)
+!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "denom", line: 1923, scope: !15, file: !2, type: !9)
+!15 = distinct !DILexicalBlock(line: 1922, column: 0, file: !45, scope: !1)
+!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "ratio", line: 1923, scope: !15, file: !2, type: !9)
+!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 1923, scope: !15, file: !2, type: !9)
+!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 1923, scope: !15, file: !2, type: !9)
+!19 = !DILocation(line: 1929, scope: !15)
+!20 = !DILocation(line: 1931, scope: !15)
+!21 = !DILocation(line: 1932, scope: !15)
+!22 = !DILocation(line: 1933, scope: !15)
+!23 = !DILocation(line: 1934, scope: !15)
+!24 = !DILocation(line: 1938, scope: !15)
+!25 = !DILocation(line: 1939, scope: !15)
+!26 = !DILocation(line: 1940, scope: !15)
+!27 = !DILocation(line: 1941, scope: !15)
+!28 = !DILocation(line: 1946, scope: !15)
+!29 = !DILocation(line: 1948, scope: !15)
+!30 = !DILocation(line: 1950, scope: !15)
+!31 = !DILocation(line: 1951, scope: !15)
+!32 = !DILocation(line: 1953, scope: !15)
+!33 = !DILocation(line: 1955, scope: !15)
+!34 = !DILocation(line: 1956, scope: !15)
+!35 = !DILocation(line: 1957, scope: !15)
+!36 = !DILocation(line: 1958, scope: !15)
+!37 = !DILocation(line: 1960, scope: !15)
+!38 = !DILocation(line: 1962, scope: !15)
+!39 = !DILocation(line: 1963, scope: !15)
+!40 = !DILocation(line: 1964, scope: !15)
+!41 = !DILocation(line: 1965, scope: !15)
+!42 = !DILocation(line: 1969, scope: !15)
!43 = !{!0, !11, !12, !13, !14, !16, !17, !18}
!44 = !{!1}
-!45 = !{!"libgcc2.c", !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc"}
-!46 = !{!"libgcc2.h", !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc"}
-!47 = !{i32 0}
-!48 = !{i32 1, !"Debug Info Version", i32 2}
+!45 = !DIFile(filename: "libgcc2.c", directory: "/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc")
+!46 = !DIFile(filename: "libgcc2.h", directory: "/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc")
+!47 = !{}
+!48 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
index 9915a706e5ee..3670c556aa79 100644
--- a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -9,9 +9,9 @@ target triple = "x86_64-apple-darwin10"
define i8* @bar(%struct.a* %myvar) nounwind optsize noinline ssp {
entry:
- tail call void @llvm.dbg.value(metadata %struct.a* %myvar, i64 0, metadata !8, metadata !{!"0x102"})
- %0 = getelementptr inbounds %struct.a* %myvar, i64 0, i32 0, !dbg !28 ; <i32*> [#uses=1]
- %1 = load i32* %0, align 8, !dbg !28 ; <i32> [#uses=1]
+ tail call void @llvm.dbg.value(metadata %struct.a* %myvar, i64 0, metadata !8, metadata !DIExpression()), !dbg !DILocation(scope: !9)
+ %0 = getelementptr inbounds %struct.a, %struct.a* %myvar, i64 0, i32 0, !dbg !28 ; <i32*> [#uses=1]
+ %1 = load i32, i32* %0, align 8, !dbg !28 ; <i32> [#uses=1]
tail call void @foo(i32 %1) nounwind optsize noinline ssp, !dbg !28
%2 = bitcast %struct.a* %myvar to i8*, !dbg !30 ; <i8*> [#uses=1]
ret i8* %2, !dbg !30
@@ -24,43 +24,43 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!38}
-!0 = !{!"0x34\00ret\00ret\00\007\000\001", !1, !1, !3, null, null} ; [ DW_TAG_variable ]
-!1 = !{!"0x29", !36} ; [ DW_TAG_file_type ]
-!2 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\001", !36, !37, !37, !32, !31, !37} ; [ DW_TAG_compile_unit ]
-!3 = !{!"0x24\00int\000\0032\0032\000\000\005", !36, !1} ; [ DW_TAG_base_type ]
-!4 = !{!"0x101\00x\0012\000", !5, !1, !3} ; [ DW_TAG_arg_variable ]
-!5 = !{!"0x2e\00foo\00foo\00foo\0013\000\001\000\006\000\001\0013", !36, !1, !6, null, void (i32)* @foo, null, null, !33} ; [ DW_TAG_subprogram ]
-!6 = !{!"0x15\00\000\000\000\000\000\000", !36, !1, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = !DIGlobalVariable(name: "ret", line: 7, isLocal: false, isDefinition: true, scope: !1, file: !1, type: !3)
+!1 = !DIFile(filename: "foo.c", directory: "/tmp/")
+!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !36, enums: !37, retainedTypes: !37, subprograms: !32, globals: !31, imports: !37)
+!3 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!4 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 12, arg: 0, scope: !5, file: !1, type: !3)
+!5 = !DISubprogram(name: "foo", linkageName: "foo", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 13, file: !36, scope: !1, type: !6, function: void (i32)* @foo, variables: !33)
+!6 = !DISubroutineType(types: !7)
!7 = !{null, !3}
-!8 = !{!"0x101\00myvar\0017\000", !9, !1, !13} ; [ DW_TAG_arg_variable ]
-!9 = !{!"0x2e\00bar\00bar\00bar\0017\000\001\000\006\000\001\0017", !36, !1, !10, null, i8* (%struct.a*)* @bar, null, null, !34} ; [ DW_TAG_subprogram ]
-!10 = !{!"0x15\00\000\000\000\000\000\000", !36, !1, null, !11, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "myvar", line: 17, arg: 0, scope: !9, file: !1, type: !13)
+!9 = !DISubprogram(name: "bar", linkageName: "bar", line: 17, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 17, file: !36, scope: !1, type: !10, function: i8* (%struct.a*)* @bar, variables: !34)
+!10 = !DISubroutineType(types: !11)
!11 = !{!12, !13}
-!12 = !{!"0xf\00\000\0064\0064\000\000", !36, !1, null} ; [ DW_TAG_pointer_type ]
-!13 = !{!"0xf\00\000\0064\0064\000\000", !36, !1, !14} ; [ DW_TAG_pointer_type ]
-!14 = !{!"0x13\00a\002\00128\0064\000\000\000", !36, !1, null, !15, null, null, null} ; [ DW_TAG_structure_type ] [a] [line 2, size 128, align 64, offset 0] [def] [from ]
+!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !36, scope: !1, baseType: null)
+!13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !36, scope: !1, baseType: !14)
+!14 = !DICompositeType(tag: DW_TAG_structure_type, name: "a", line: 2, size: 128, align: 64, file: !36, scope: !1, elements: !15)
!15 = !{!16, !17}
-!16 = !{!"0xd\00c\003\0032\0032\000\000", !36, !14, !3} ; [ DW_TAG_member ]
-!17 = !{!"0xd\00d\004\0064\0064\0064\000", !36, !14, !13} ; [ DW_TAG_member ]
-!18 = !{!"0x101\00argc\0022\000", !19, !1, !3} ; [ DW_TAG_arg_variable ]
-!19 = !{!"0x2e\00main\00main\00main\0022\000\001\000\006\000\001\0022", !36, !1, !20, null, null, null, null, !35} ; [ DW_TAG_subprogram ]
-!20 = !{!"0x15\00\000\000\000\000\000\000", !36, !1, null, !21, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !DIDerivedType(tag: DW_TAG_member, name: "c", line: 3, size: 32, align: 32, file: !36, scope: !14, baseType: !3)
+!17 = !DIDerivedType(tag: DW_TAG_member, name: "d", line: 4, size: 64, align: 64, offset: 64, file: !36, scope: !14, baseType: !13)
+!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 22, arg: 0, scope: !19, file: !1, type: !3)
+!19 = !DISubprogram(name: "main", linkageName: "main", line: 22, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 22, file: !36, scope: !1, type: !20, variables: !35)
+!20 = !DISubroutineType(types: !21)
!21 = !{!3, !3, !22}
-!22 = !{!"0xf\00\000\0064\0064\000\000", !36, !1, !23} ; [ DW_TAG_pointer_type ]
-!23 = !{!"0xf\00\000\0064\0064\000\000", !36, !1, !24} ; [ DW_TAG_pointer_type ]
-!24 = !{!"0x24\00char\000\008\008\000\000\006", !36, !1} ; [ DW_TAG_base_type ]
-!25 = !{!"0x101\00argv\0022\000", !19, !1, !22} ; [ DW_TAG_arg_variable ]
-!26 = !{!"0x100\00e\0023\000", !27, !1, !14} ; [ DW_TAG_auto_variable ]
-!27 = !{!"0xb\0022\000\000", !36, !19} ; [ DW_TAG_lexical_block ]
-!28 = !MDLocation(line: 18, scope: !29)
-!29 = !{!"0xb\0017\000\001", !36, !9} ; [ DW_TAG_lexical_block ]
-!30 = !MDLocation(line: 19, scope: !29)
+!22 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !36, scope: !1, baseType: !23)
+!23 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !36, scope: !1, baseType: !24)
+!24 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 22, arg: 0, scope: !19, file: !1, type: !22)
+!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "e", line: 23, scope: !27, file: !1, type: !14)
+!27 = distinct !DILexicalBlock(line: 22, column: 0, file: !36, scope: !19)
+!28 = !DILocation(line: 18, scope: !29)
+!29 = distinct !DILexicalBlock(line: 17, column: 0, file: !36, scope: !9)
+!30 = !DILocation(line: 19, scope: !29)
!31 = !{!0}
!32 = !{!5, !9, !19}
!33 = !{!4}
!34 = !{!8}
!35 = !{!18, !25, !26}
-!36 = !{!"foo.c", !"/tmp/"}
+!36 = !DIFile(filename: "foo.c", directory: "/tmp/")
!37 = !{}
; The variable bar:myvar changes registers after the first movq.
@@ -77,18 +77,12 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
; CHECK-NEXT: .quad [[SET1]]
; CHECK-NEXT: [[SET2:.*]] = [[LABEL]]-Lfunc_begin0
; CHECK-NEXT: .quad [[SET2]]
-; CHECK-NEXT: Lset{{.*}} = Ltmp{{.*}}-Ltmp{{.*}} ## Loc expr size
-; CHECK-NEXT: .short Lset{{.*}}
-; CHECK-NEXT: Ltmp{{.*}}:
+; CHECK-NEXT: .short 1 ## Loc expr size
; CHECK-NEXT: .byte 85
-; CHECK-NEXT: Ltmp{{.*}}:
; CHECK-NEXT: [[SET3:.*]] = [[LABEL]]-Lfunc_begin0
; CHECK-NEXT: .quad [[SET3]]
; CHECK-NEXT: [[SET4:.*]] = [[CLOBBER]]-Lfunc_begin0
; CHECK-NEXT: .quad [[SET4]]
-; CHECK-NEXT: Lset{{.*}} = Ltmp{{.*}}-Ltmp{{.*}} ## Loc expr size
-; CHECK-NEXT: .short Lset{{.*}}
-; CHECK-NEXT: Ltmp{{.*}}:
+; CHECK-NEXT: .short 1 ## Loc expr size
; CHECK-NEXT: .byte 83
-; CHECK-NEXT: Ltmp{{.*}}:
-!38 = !{i32 1, !"Debug Info Version", i32 2}
+!38 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/2010-05-26-FP_TO_INT-crash.ll b/test/CodeGen/X86/2010-05-26-FP_TO_INT-crash.ll
index 38dcb806cc9d..ac18195dc5a3 100644
--- a/test/CodeGen/X86/2010-05-26-FP_TO_INT-crash.ll
+++ b/test/CodeGen/X86/2010-05-26-FP_TO_INT-crash.ll
@@ -7,7 +7,7 @@ module asm "\09.ident\09\22GCC: (GNU) 4.5.1 20100510 (prerelease) LLVM: 104604:1
define i32 @f2(double %x) nounwind {
entry:
- %0 = load double* undef, align 64 ; <double> [#uses=1]
+ %0 = load double, double* undef, align 64 ; <double> [#uses=1]
%1 = fptoui double %0 to i16 ; <i16> [#uses=1]
%2 = zext i16 %1 to i32 ; <i32> [#uses=1]
%3 = add nsw i32 0, %2 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll
index 7adacf5e0176..c5201614fdd1 100644
--- a/test/CodeGen/X86/2010-05-28-Crash.ll
+++ b/test/CodeGen/X86/2010-05-28-Crash.ll
@@ -4,8 +4,8 @@
define i32 @foo(i32 %y) nounwind optsize ssp {
entry:
- tail call void @llvm.dbg.value(metadata i32 %y, i64 0, metadata !0, metadata !{!"0x102"})
- %0 = tail call i32 (...)* @zoo(i32 %y) nounwind, !dbg !9 ; <i32> [#uses=1]
+ tail call void @llvm.dbg.value(metadata i32 %y, i64 0, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !1)
+ %0 = tail call i32 (...) @zoo(i32 %y) nounwind, !dbg !9 ; <i32> [#uses=1]
ret i32 %0, !dbg !9
}
@@ -15,9 +15,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
define i32 @bar(i32 %x) nounwind optsize ssp {
entry:
- tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !7, metadata !{!"0x102"})
- tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !0, metadata !{!"0x102"}) nounwind
- %0 = tail call i32 (...)* @zoo(i32 1) nounwind, !dbg !12 ; <i32> [#uses=1]
+ tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !7, metadata !DIExpression()), !dbg !DILocation(scope: !8)
+ tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !0, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !1)
+ %0 = tail call i32 (...) @zoo(i32 1) nounwind, !dbg !12 ; <i32> [#uses=1]
%1 = add nsw i32 %0, %x, !dbg !13 ; <i32> [#uses=1]
ret i32 %1, !dbg !13
}
@@ -25,28 +25,28 @@ entry:
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!20}
-!0 = !{!"0x101\00y\002\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
-!1 = !{!"0x2e\00foo\00foo\00foo\002\000\001\000\006\000\001\002", !18, !2, !4, null, i32 (i32)* @foo, null, null, !15} ; [ DW_TAG_subprogram ]
-!2 = !{!"0x29", !18} ; [ DW_TAG_file_type ]
-!3 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\001", !18, !19, !19, !17, null, null} ; [ DW_TAG_compile_unit ]
-!4 = !{!"0x15\00\000\000\000\000\000\000", !18, !2, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 2, arg: 0, scope: !1, file: !2, type: !6)
+!1 = !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 2, file: !18, scope: !2, type: !4, function: i32 (i32)* @foo, variables: !15)
+!2 = !DIFile(filename: "f.c", directory: "/tmp")
+!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !18, enums: !19, retainedTypes: !19, subprograms: !17, imports: null)
+!4 = !DISubroutineType(types: !5)
!5 = !{!6, !6}
-!6 = !{!"0x24\00int\000\0032\0032\000\000\005", !18, !2} ; [ DW_TAG_base_type ]
-!7 = !{!"0x101\00x\006\000", !8, !2, !6} ; [ DW_TAG_arg_variable ]
-!8 = !{!"0x2e\00bar\00bar\00bar\006\000\001\000\006\000\001\006", !18, !2, !4, null, i32 (i32)* @bar, null, null, !16} ; [ DW_TAG_subprogram ]
-!9 = !MDLocation(line: 3, scope: !10)
-!10 = !{!"0xb\002\000\000", !18, !1} ; [ DW_TAG_lexical_block ]
+!6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!7 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 6, arg: 0, scope: !8, file: !2, type: !6)
+!8 = !DISubprogram(name: "bar", linkageName: "bar", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 6, file: !18, scope: !2, type: !4, function: i32 (i32)* @bar, variables: !16)
+!9 = !DILocation(line: 3, scope: !10)
+!10 = distinct !DILexicalBlock(line: 2, column: 0, file: !18, scope: !1)
!11 = !{i32 1}
-!12 = !MDLocation(line: 3, scope: !10, inlinedAt: !13)
-!13 = !MDLocation(line: 7, scope: !14)
-!14 = !{!"0xb\006\000\000", !18, !8} ; [ DW_TAG_lexical_block ]
+!12 = !DILocation(line: 3, scope: !10, inlinedAt: !13)
+!13 = !DILocation(line: 7, scope: !14)
+!14 = distinct !DILexicalBlock(line: 6, column: 0, file: !18, scope: !8)
!15 = !{!0}
!16 = !{!7}
!17 = !{!1, !8}
-!18 = !{!"f.c", !"/tmp"}
-!19 = !{i32 0}
+!18 = !DIFile(filename: "f.c", directory: "/tmp")
+!19 = !{}
;CHECK: DEBUG_VALUE: bar:x <- E
;CHECK: Ltmp
;CHECK: DEBUG_VALUE: foo:y <- 1{{$}}
-!20 = !{i32 1, !"Debug Info Version", i32 2}
+!20 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
index 3687b828c4a4..757c92808e11 100644
--- a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
+++ b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
@@ -10,8 +10,8 @@ target triple = "x86_64-apple-darwin10.2"
define i32 @_ZN3foo3bazEi(%struct.foo* nocapture %this, i32 %x) nounwind readnone optsize noinline ssp align 2 {
;CHECK: DEBUG_VALUE: baz:this <- RDI{{$}}
entry:
- tail call void @llvm.dbg.value(metadata %struct.foo* %this, i64 0, metadata !15, metadata !{!"0x102"})
- tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !16, metadata !{!"0x102"})
+ tail call void @llvm.dbg.value(metadata %struct.foo* %this, i64 0, metadata !15, metadata !DIExpression()), !dbg !DILocation(scope: !8)
+ tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !16, metadata !DIExpression()), !dbg !DILocation(scope: !8)
%0 = mul nsw i32 %x, 7, !dbg !29 ; <i32> [#uses=1]
%1 = add nsw i32 %0, 1, !dbg !29 ; <i32> [#uses=1]
ret i32 %1, !dbg !29
@@ -23,38 +23,38 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.module.flags = !{!34}
!llvm.dbg.lv = !{!0, !14, !15, !16, !17, !24, !25, !28}
-!0 = !{!"0x101\00this\0011\000", !1, !3, !12} ; [ DW_TAG_arg_variable ]
-!1 = !{!"0x2e\00bar\00bar\00_ZN3foo3barEi\0011\000\001\000\006\000\001\0011", !31, !2, !9, null, i32 (%struct.foo*, i32)* null, null, null, null} ; [ DW_TAG_subprogram ]
-!2 = !{!"0x13\00foo\003\0032\0032\000\000\000", !31, !3, null, !5, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 3, size 32, align 32, offset 0] [def] [from ]
-!3 = !{!"0x29", !31} ; [ DW_TAG_file_type ]
-!4 = !{!"0x11\004\004.2.1 LLVM build\001\00\000\00\000", !31, !32, !32, !33, null, null} ; [ DW_TAG_compile_unit ]
+!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 11, arg: 0, scope: !1, file: !3, type: !12)
+!1 = !DISubprogram(name: "bar", linkageName: "_ZN3foo3barEi", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 11, file: !31, scope: !2, type: !9, function: i32 (%struct.foo*, i32)* null)
+!2 = !DICompositeType(tag: DW_TAG_structure_type, name: "foo", line: 3, size: 32, align: 32, file: !31, scope: !3, elements: !5)
+!3 = !DIFile(filename: "foo.cp", directory: "/tmp/")
+!4 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 LLVM build", isOptimized: true, emissionKind: 0, file: !31, enums: !32, retainedTypes: !32, subprograms: !33)
!5 = !{!6, !1, !8}
-!6 = !{!"0xd\00y\008\0032\0032\000\000", !31, !2, !7} ; [ DW_TAG_member ]
-!7 = !{!"0x24\00int\000\0032\0032\000\000\005", !31, !3} ; [ DW_TAG_base_type ]
-!8 = !{!"0x2e\00baz\00baz\00_ZN3foo3bazEi\0015\000\001\000\006\000\001\0015", !31, !2, !9, null, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null} ; [ DW_TAG_subprogram ]
-!9 = !{!"0x15\00\000\000\000\000\000\000", !31, !3, null, !10, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = !DIDerivedType(tag: DW_TAG_member, name: "y", line: 8, size: 32, align: 32, file: !31, scope: !2, baseType: !7)
+!7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !DISubprogram(name: "baz", linkageName: "_ZN3foo3bazEi", line: 15, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 15, file: !31, scope: !2, type: !9, function: i32 (%struct.foo*, i32)* @_ZN3foo3bazEi)
+!9 = !DISubroutineType(types: !10)
!10 = !{!7, !11, !7}
-!11 = !{!"0xf\00\000\0064\0064\000\0064", !31, !3, !2} ; [ DW_TAG_pointer_type ]
-!12 = !{!"0x26\00\000\0064\0064\000\0064", !31, !3, !13} ; [ DW_TAG_const_type ]
-!13 = !{!"0xf\00\000\0064\0064\000\000", !31, !3, !2} ; [ DW_TAG_pointer_type ]
-!14 = !{!"0x101\00x\0011\000", !1, !3, !7} ; [ DW_TAG_arg_variable ]
-!15 = !{!"0x101\00this\0015\000", !8, !3, !12} ; [ DW_TAG_arg_variable ]
-!16 = !{!"0x101\00x\0015\000", !8, !3, !7} ; [ DW_TAG_arg_variable ]
-!17 = !{!"0x101\00argc\0019\000", !18, !3, !7} ; [ DW_TAG_arg_variable ]
-!18 = !{!"0x2e\00main\00main\00main\0019\000\001\000\006\000\001\0019", !31, !3, !19, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!19 = !{!"0x15\00\000\000\000\000\000\000", !31, !3, null, !20, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, file: !31, scope: !3, baseType: !2)
+!12 = !DIDerivedType(tag: DW_TAG_const_type, size: 64, align: 64, flags: DIFlagArtificial, file: !31, scope: !3, baseType: !13)
+!13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !31, scope: !3, baseType: !2)
+!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 11, arg: 0, scope: !1, file: !3, type: !7)
+!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 15, arg: 0, scope: !8, file: !3, type: !12)
+!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 15, arg: 0, scope: !8, file: !3, type: !7)
+!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 19, arg: 0, scope: !18, file: !3, type: !7)
+!18 = !DISubprogram(name: "main", linkageName: "main", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 19, file: !31, scope: !3, type: !19)
+!19 = !DISubroutineType(types: !20)
!20 = !{!7, !7, !21}
-!21 = !{!"0xf\00\000\0064\0064\000\000", !31, !3, !22} ; [ DW_TAG_pointer_type ]
-!22 = !{!"0xf\00\000\0064\0064\000\000", !31, !3, !23} ; [ DW_TAG_pointer_type ]
-!23 = !{!"0x24\00char\000\008\008\000\000\006", !31, !3} ; [ DW_TAG_base_type ]
-!24 = !{!"0x101\00argv\0019\000", !18, !3, !21} ; [ DW_TAG_arg_variable ]
-!25 = !{!"0x100\00a\0020\000", !26, !3, !2} ; [ DW_TAG_auto_variable ]
-!26 = !{!"0xb\0019\000\000", !31, !27} ; [ DW_TAG_lexical_block ]
-!27 = !{!"0xb\0019\000\000", !31, !18} ; [ DW_TAG_lexical_block ]
-!28 = !{!"0x100\00b\0021\000", !26, !3, !7} ; [ DW_TAG_auto_variable ]
-!29 = !MDLocation(line: 16, scope: !30)
-!30 = !{!"0xb\0015\000\000", !31, !8} ; [ DW_TAG_lexical_block ]
-!31 = !{!"foo.cp", !"/tmp/"}
-!32 = !{i32 0}
+!21 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !31, scope: !3, baseType: !22)
+!22 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !31, scope: !3, baseType: !23)
+!23 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 19, arg: 0, scope: !18, file: !3, type: !21)
+!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 20, scope: !26, file: !3, type: !2)
+!26 = distinct !DILexicalBlock(line: 19, column: 0, file: !31, scope: !27)
+!27 = distinct !DILexicalBlock(line: 19, column: 0, file: !31, scope: !18)
+!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 21, scope: !26, file: !3, type: !7)
+!29 = !DILocation(line: 16, scope: !30)
+!30 = distinct !DILexicalBlock(line: 15, column: 0, file: !31, scope: !8)
+!31 = !DIFile(filename: "foo.cp", directory: "/tmp/")
+!32 = !{}
!33 = !{!1, !8, !18}
-!34 = !{i32 1, !"Debug Info Version", i32 2}
+!34 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll b/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll
index b5679e665696..5a4b389acb3d 100644
--- a/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll
+++ b/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll
@@ -2,6 +2,6 @@
; CHECK: %fs:
define i32 @test1(i32 addrspace(257)* %arg) nounwind {
- %tmp = load i32 addrspace(257)* %arg
+ %tmp = load i32, i32 addrspace(257)* %arg
ret i32 %tmp
}
diff --git a/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll b/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
index 74a7610e6597..0b1c36f735a4 100644
--- a/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
+++ b/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
@@ -10,17 +10,17 @@ entry:
%retval = alloca i32, align 4 ; <i32*> [#uses=3]
%v = alloca i32, align 4 ; <i32*> [#uses=3]
store i32 0, i32* %retval
- %zero = load i32* %retval
+ %zero = load i32, i32* %retval
; The earlyclobber register EC0 should not be spilled before the inline asm.
; Yes, check-not can refer to FileCheck variables defined in the future.
; CHECK-NOT: [[EC0]]{{.*}}(%rsp)
; CHECK: bsr {{[^,]*}}, [[EC0:%...]]
%0 = call i32 asm "bsr $1, $0\0A\09cmovz $2, $0", "=&r,ro,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i32 %zero, i32 -1) nounwind, !srcloc !0 ; <i32> [#uses=1]
store i32 %0, i32* %v
- %tmp = load i32* %v ; <i32> [#uses=1]
- %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 0), i32 %tmp) ; <i32> [#uses=0]
+ %tmp = load i32, i32* %v ; <i32> [#uses=1]
+ %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0), i32 %tmp) ; <i32> [#uses=0]
store i32 0, i32* %retval
- %1 = load i32* %retval ; <i32> [#uses=1]
+ %1 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %0
}
diff --git a/test/CodeGen/X86/2010-06-24-g-constraint-crash.ll b/test/CodeGen/X86/2010-06-24-g-constraint-crash.ll
index 2a938d941e2d..905b34ff6f5c 100644
--- a/test/CodeGen/X86/2010-06-24-g-constraint-crash.ll
+++ b/test/CodeGen/X86/2010-06-24-g-constraint-crash.ll
@@ -10,6 +10,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
define void @bar() nounwind ssp {
entry:
%asmtmp.i.i = tail call %0 asm sideeffect "push %rbp; syscall; pop %rbp\0A", "={ax},={di},={si},={dx},={bx},{ax},{di},{si},{dx},{bx},~{dirflag},~{fpsr},~{flags},~{memory},~{r15},~{r14},~{r13},~{r12},~{r11},~{r10},~{r9},~{r8},~{rcx}"(i32 7, i64 -1, i64 0, i64 -1, i64 -1) nounwind ; <%0> [#uses=0]
- %asmtmp.i1.i = tail call %0 asm sideeffect "mov $10, %r8;\0Amov $11, %r9;\0Amov $12, %r10;\0Apush %rbp; syscall; pop %rbp\0A", "={ax},={di},={si},={dx},={bx},{ax},{di},{si},{dx},{bx},imr,imr,imr,~{dirflag},~{fpsr},~{flags},~{memory},~{r15},~{r14},~{r13},~{r12},~{r11},~{r10},~{r9},~{r8},~{rcx}"(i32 8, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 0, i8* bitcast (i64* getelementptr inbounds ([3 x i64]* @utcbs.1559, i64 0, i64 2) to i8*)) nounwind ; <%0> [#uses=0]
+ %asmtmp.i1.i = tail call %0 asm sideeffect "mov $10, %r8;\0Amov $11, %r9;\0Amov $12, %r10;\0Apush %rbp; syscall; pop %rbp\0A", "={ax},={di},={si},={dx},={bx},{ax},{di},{si},{dx},{bx},imr,imr,imr,~{dirflag},~{fpsr},~{flags},~{memory},~{r15},~{r14},~{r13},~{r12},~{r11},~{r10},~{r9},~{r8},~{rcx}"(i32 8, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 0, i8* bitcast (i64* getelementptr inbounds ([3 x i64], [3 x i64]* @utcbs.1559, i64 0, i64 2) to i8*)) nounwind ; <%0> [#uses=0]
ret void
}
diff --git a/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll b/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
index 05f581a08834..ffb51572a30c 100644
--- a/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
+++ b/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
@@ -6,10 +6,10 @@
define i32 @func(%struct.type* %s) nounwind optsize ssp {
entry:
- %tmp1 = getelementptr inbounds %struct.type* %s, i32 0, i32 1
- %tmp2 = load i32* %tmp1, align 8
+ %tmp1 = getelementptr inbounds %struct.type, %struct.type* %s, i32 0, i32 1
+ %tmp2 = load i32, i32* %tmp1, align 8
%tmp3 = icmp eq i32 %tmp2, 10
- %tmp4 = getelementptr inbounds %struct.type* %s, i32 0, i32 40
+ %tmp4 = getelementptr inbounds %struct.type, %struct.type* %s, i32 0, i32 40
br i1 %tmp3, label %bb, label %entry.bb1_crit_edge
entry.bb1_crit_edge:
@@ -27,7 +27,7 @@ bb:
%tmp5 = bitcast i32* %tmp4 to i8*
call void @llvm.memset.p0i8.i64(i8* %tmp5, i8 0, i64 84, i32 4, i1 false)
- %tmp6 = getelementptr inbounds %struct.type* %s, i32 0, i32 62
+ %tmp6 = getelementptr inbounds %struct.type, %struct.type* %s, i32 0, i32 62
store i32* null, i32** %tmp6, align 8
br label %bb1
diff --git a/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll b/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll
index 3470a06a543b..8f5f083209b4 100644
--- a/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll
+++ b/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll
@@ -6,10 +6,10 @@ define void @_SEH2FrameHandler() nounwind {
entry:
%target.addr.i = alloca i8*, align 4 ; <i8**> [#uses=2]
%frame = alloca %struct.__SEH2Frame*, align 4 ; <%struct.__SEH2Frame**> [#uses=1]
- %tmp = load %struct.__SEH2Frame** %frame ; <%struct.__SEH2Frame*> [#uses=1]
+ %tmp = load %struct.__SEH2Frame*, %struct.__SEH2Frame** %frame ; <%struct.__SEH2Frame*> [#uses=1]
%conv = bitcast %struct.__SEH2Frame* %tmp to i8* ; <i8*> [#uses=1]
store i8* %conv, i8** %target.addr.i
- %tmp.i = load i8** %target.addr.i ; <i8*> [#uses=1]
+ %tmp.i = load i8*, i8** %target.addr.i ; <i8*> [#uses=1]
call void asm sideeffect "push %ebp\0Apush $$0\0Apush $$0\0Apush $$Return${:uid}\0Apush $0\0Acall ${1:c}\0AReturn${:uid}: pop %ebp\0A", "imr,imr,~{ax},~{bx},~{cx},~{dx},~{si},~{di},~{flags},~{memory},~{dirflag},~{fpsr},~{flags}"(i8* %tmp.i, void (...)* @RtlUnwind) nounwind, !srcloc !0
ret void
}
diff --git a/test/CodeGen/X86/2010-06-28-matched-g-constraint.ll b/test/CodeGen/X86/2010-06-28-matched-g-constraint.ll
index a0798ae10d7c..023c77aedd4a 100644
--- a/test/CodeGen/X86/2010-06-28-matched-g-constraint.ll
+++ b/test/CodeGen/X86/2010-06-28-matched-g-constraint.ll
@@ -6,6 +6,6 @@ entry:
; CHECK: GCROOT %eax
%_r = alloca i32, align 4 ; <i32*> [#uses=2]
call void asm "/* GCROOT $0 */", "=*imr,0,~{dirflag},~{fpsr},~{flags}"(i32* %_r, i32 4) nounwind
- %0 = load i32* %_r, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* %_r, align 4 ; <i32> [#uses=1]
ret i32 %0
}
diff --git a/test/CodeGen/X86/2010-07-02-UnfoldBug.ll b/test/CodeGen/X86/2010-07-02-UnfoldBug.ll
index 79219dcfe60a..954f25f6e167 100644
--- a/test/CodeGen/X86/2010-07-02-UnfoldBug.ll
+++ b/test/CodeGen/X86/2010-07-02-UnfoldBug.ll
@@ -61,7 +61,7 @@ bb22: ; preds = %bb24.preheader
br i1 undef, label %bb2.i.i, label %bb.i.i49
bb.i.i49: ; preds = %bb22
- %0 = load float* undef, align 4 ; <float> [#uses=1]
+ %0 = load float, float* undef, align 4 ; <float> [#uses=1]
%1 = insertelement <4 x float> undef, float %0, i32 0 ; <<4 x float>> [#uses=1]
%2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x float> %1) nounwind readnone ; <<4 x float>> [#uses=1]
%3 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %2, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>) nounwind readnone ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/2010-07-06-DbgCrash.ll b/test/CodeGen/X86/2010-07-06-DbgCrash.ll
index 457c49852dca..3ce36eec400a 100644
--- a/test/CodeGen/X86/2010-07-06-DbgCrash.ll
+++ b/test/CodeGen/X86/2010-07-06-DbgCrash.ll
@@ -2,28 +2,28 @@
; PR7545
@.str = private constant [4 x i8] c"one\00", align 1 ; <[4 x i8]*> [#uses=1]
@.str1 = private constant [4 x i8] c"two\00", align 1 ; <[5 x i8]*> [#uses=1]
-@C.9.2167 = internal constant [2 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0)]
-!38 = !{!"0x29", !109} ; [ DW_TAG_file_type ]
-!39 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)\001\00\000\00\000", !109, !108, !108, null, null, null} ; [ DW_TAG_compile_unit ]
-!46 = !{!"0xf\00\000\0064\0064\000\000", !109, null, !47} ; [ DW_TAG_pointer_type ]
-!47 = !{!"0x24\00char\000\008\008\000\000\006", !109, null} ; [ DW_TAG_base_type ]
-!97 = !{!"0x2e\00main\00main\00main\0073\000\001\000\006\000\000\000", i32 0, !39, !98, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!98 = !{!"0x15\00\000\000\000\000\000\000", !109, null, null, !99, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+@C.9.2167 = internal constant [2 x i8*] [i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i64 0, i64 0)]
+!38 = !DIFile(filename: "pbmsrch.c", directory: "/Users/grawp/LLVM/test-suite/MultiSource/Benchmarks/MiBench/office-stringsearch")
+!39 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", isOptimized: true, emissionKind: 0, file: !109, enums: !108, retainedTypes: !108)
+!46 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !109, baseType: !47)
+!47 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!97 = !DISubprogram(name: "main", linkageName: "main", line: 73, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !39, type: !98)
+!98 = !DISubroutineType(types: !99)
!99 = !{!100}
-!100 = !{!"0x24\00int\000\0032\0032\000\000\005", !109, null} ; [ DW_TAG_base_type ]
+!100 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!101 = !{[2 x i8*]* @C.9.2167}
-!102 = !{!"0x100\00find_strings\0075\000", !103, !38, !104} ; [ DW_TAG_auto_variable ]
-!103 = !{!"0xb\0073\000\000", null, !97} ; [ DW_TAG_lexical_block ]
-!104 = !{!"0x1\00\000\0085312\0064\000\000", !109, null, !46, !105, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 85312, align 64, offset 0] [from ]
+!102 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "find_strings", line: 75, scope: !103, file: !38, type: !104)
+!103 = distinct !DILexicalBlock(line: 73, column: 0, file: null, scope: !97)
+!104 = !DICompositeType(tag: DW_TAG_array_type, size: 85312, align: 64, file: !109, baseType: !46, elements: !105)
!105 = !{!106}
-!106 = !{!"0x21\000\001333"} ; [ DW_TAG_subrange_type ]
-!107 = !MDLocation(line: 73, scope: !103)
+!106 = !DISubrange(count: 1333)
+!107 = !DILocation(line: 73, scope: !103)
!108 = !{i32 0}
-!109 = !{!"pbmsrch.c", !"/Users/grawp/LLVM/test-suite/MultiSource/Benchmarks/MiBench/office-stringsearch"}
+!109 = !DIFile(filename: "pbmsrch.c", directory: "/Users/grawp/LLVM/test-suite/MultiSource/Benchmarks/MiBench/office-stringsearch")
define i32 @main() nounwind ssp {
bb.nph:
- tail call void @llvm.dbg.declare(metadata [2 x i8*]* @C.9.2167, metadata !102, metadata !{!"0x102"}), !dbg !107
+ tail call void @llvm.dbg.declare(metadata [2 x i8*]* @C.9.2167, metadata !102, metadata !DIExpression()), !dbg !107
ret i32 0, !dbg !107
}
diff --git a/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll b/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll
index e96da94f5a3a..33a89a04c7d4 100644
--- a/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll
+++ b/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-darwin10.0.0"
define void @_ZN7QVectorIdE4fillERKdi(double* nocapture %t) nounwind ssp align 2 {
entry:
- %tmp2 = load double* %t ; <double> [#uses=1]
+ %tmp2 = load double, double* %t ; <double> [#uses=1]
br i1 undef, label %if.end, label %if.then
if.then: ; preds = %entry
diff --git a/test/CodeGen/X86/2010-07-15-Crash.ll b/test/CodeGen/X86/2010-07-15-Crash.ll
index 3ac4cf5964c3..857dc3a3610f 100644
--- a/test/CodeGen/X86/2010-07-15-Crash.ll
+++ b/test/CodeGen/X86/2010-07-15-Crash.ll
@@ -6,7 +6,7 @@
define void @foo() nounwind {
entry:
tail call void asm sideeffect "", "s,i,~{fpsr},~{flags}"(i8* getelementptr
-inbounds ([4 x i8]* @__FUNCTION__.1623, i64 0, i64 0), i8* getelementptr
-inbounds ([4 x i8]* @__FUNCTION__.1623, i64 0, i64 0)) nounwind
+inbounds ([4 x i8], [4 x i8]* @__FUNCTION__.1623, i64 0, i64 0), i8* getelementptr
+inbounds ([4 x i8], [4 x i8]* @__FUNCTION__.1623, i64 0, i64 0)) nounwind
ret void
}
diff --git a/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll b/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
index 12a8274fb56f..ab9715d22377 100644
--- a/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
+++ b/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
@@ -7,7 +7,7 @@
define i32 @main() nounwind {
entry:
- %tmp = load i64* @g_16 ; <i64> [#uses=1]
+ %tmp = load i64, i64* @g_16 ; <i64> [#uses=1]
%not.lnot = icmp ne i64 %tmp, 0 ; <i1> [#uses=1]
%conv = sext i1 %not.lnot to i64 ; <i64> [#uses=1]
%and = and i64 %conv, 150 ; <i64> [#uses=1]
@@ -20,7 +20,7 @@ entry:
; CHECK-NEXT: jle
entry.if.end_crit_edge: ; preds = %entry
- %tmp4.pre = load i32* @g_38 ; <i32> [#uses=1]
+ %tmp4.pre = load i32, i32* @g_38 ; <i32> [#uses=1]
br label %if.end
if.then: ; preds = %entry
@@ -29,7 +29,7 @@ if.then: ; preds = %entry
if.end: ; preds = %entry.if.end_crit_edge, %if.then
%tmp4 = phi i32 [ %tmp4.pre, %entry.if.end_crit_edge ], [ 1, %if.then ] ; <i32> [#uses=1]
- %call5 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %tmp4) nounwind ; <i32> [#uses=0]
+ %call5 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %tmp4) nounwind ; <i32> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/X86/2010-08-04-StackVariable.ll b/test/CodeGen/X86/2010-08-04-StackVariable.ll
index e3decf0c889a..6129e78fd348 100644
--- a/test/CodeGen/X86/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/X86/2010-08-04-StackVariable.ll
@@ -6,20 +6,20 @@
define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp {
entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !23, metadata !{!"0x102"}), !dbg !24
- call void @llvm.dbg.value(metadata %struct.SVal* %location, i64 0, metadata !25, metadata !{!"0x102"}), !dbg !24
+ call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !23, metadata !DIExpression()), !dbg !24
+ call void @llvm.dbg.value(metadata %struct.SVal* %location, i64 0, metadata !25, metadata !DIExpression()), !dbg !24
%0 = icmp ne i32 %i, 0, !dbg !27 ; <i1> [#uses=1]
br i1 %0, label %bb, label %bb1, !dbg !27
bb: ; preds = %entry
- %1 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !29 ; <i32*> [#uses=1]
- %2 = load i32* %1, align 8, !dbg !29 ; <i32> [#uses=1]
+ %1 = getelementptr inbounds %struct.SVal, %struct.SVal* %location, i32 0, i32 1, !dbg !29 ; <i32*> [#uses=1]
+ %2 = load i32, i32* %1, align 8, !dbg !29 ; <i32> [#uses=1]
%3 = add i32 %2, %i, !dbg !29 ; <i32> [#uses=1]
br label %bb2, !dbg !29
bb1: ; preds = %entry
- %4 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !30 ; <i32*> [#uses=1]
- %5 = load i32* %4, align 8, !dbg !30 ; <i32> [#uses=1]
+ %4 = getelementptr inbounds %struct.SVal, %struct.SVal* %location, i32 0, i32 1, !dbg !30 ; <i32*> [#uses=1]
+ %5 = load i32, i32* %4, align 8, !dbg !30 ; <i32> [#uses=1]
%6 = sub i32 %5, 1, !dbg !30 ; <i32> [#uses=1]
br label %bb2, !dbg !30
@@ -34,10 +34,10 @@ return: ; preds = %bb2
define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2 {
entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- call void @llvm.dbg.value(metadata %struct.SVal* %this, i64 0, metadata !31, metadata !{!"0x102"}), !dbg !34
- %0 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 0, !dbg !34 ; <i8**> [#uses=1]
+ call void @llvm.dbg.value(metadata %struct.SVal* %this, i64 0, metadata !31, metadata !DIExpression()), !dbg !34
+ %0 = getelementptr inbounds %struct.SVal, %struct.SVal* %this, i32 0, i32 0, !dbg !34 ; <i8**> [#uses=1]
store i8* null, i8** %0, align 8, !dbg !34
- %1 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 1, !dbg !34 ; <i32*> [#uses=1]
+ %1 = getelementptr inbounds %struct.SVal, %struct.SVal* %this, i32 0, i32 1, !dbg !34 ; <i32*> [#uses=1]
store i32 0, i32* %1, align 8, !dbg !34
br label %return, !dbg !34
@@ -52,20 +52,20 @@ entry:
%0 = alloca %struct.SVal ; <%struct.SVal*> [#uses=3]
%v = alloca %struct.SVal ; <%struct.SVal*> [#uses=4]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- call void @llvm.dbg.declare(metadata %struct.SVal* %v, metadata !38, metadata !{!"0x102"}), !dbg !41
+ call void @llvm.dbg.declare(metadata %struct.SVal* %v, metadata !38, metadata !DIExpression()), !dbg !41
call void @_ZN4SValC1Ev(%struct.SVal* %v) nounwind, !dbg !41
- %1 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !42 ; <i32*> [#uses=1]
+ %1 = getelementptr inbounds %struct.SVal, %struct.SVal* %v, i32 0, i32 1, !dbg !42 ; <i32*> [#uses=1]
store i32 1, i32* %1, align 8, !dbg !42
- %2 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
- %3 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
- %4 = load i8** %3, align 8, !dbg !43 ; <i8*> [#uses=1]
+ %2 = getelementptr inbounds %struct.SVal, %struct.SVal* %0, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
+ %3 = getelementptr inbounds %struct.SVal, %struct.SVal* %v, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
+ %4 = load i8*, i8** %3, align 8, !dbg !43 ; <i8*> [#uses=1]
store i8* %4, i8** %2, align 8, !dbg !43
- %5 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
- %6 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
- %7 = load i32* %6, align 8, !dbg !43 ; <i32> [#uses=1]
+ %5 = getelementptr inbounds %struct.SVal, %struct.SVal* %0, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
+ %6 = getelementptr inbounds %struct.SVal, %struct.SVal* %v, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
+ %7 = load i32, i32* %6, align 8, !dbg !43 ; <i32> [#uses=1]
store i32 %7, i32* %5, align 8, !dbg !43
%8 = call i32 @_Z3fooi4SVal(i32 2, %struct.SVal* noalias %0) nounwind, !dbg !43 ; <i32> [#uses=0]
- call void @llvm.dbg.value(metadata i32 %8, i64 0, metadata !44, metadata !{!"0x102"}), !dbg !43
+ call void @llvm.dbg.value(metadata i32 %8, i64 0, metadata !44, metadata !DIExpression()), !dbg !43
br label %return, !dbg !45
return: ; preds = %entry
@@ -78,52 +78,52 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.module.flags = !{!49}
!46 = !{!16, !17, !20}
-!0 = !{!"0x2e\00SVal\00SVal\00\0011\000\000\000\006\000\000\0011", !47, !1, !14, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!1 = !{!"0x13\00SVal\001\00128\0064\000\000\000", !47, !2, null, !4, null, null, null} ; [ DW_TAG_structure_type ] [SVal] [line 1, size 128, align 64, offset 0] [def] [from ]
-!2 = !{!"0x29", !47} ; [ DW_TAG_file_type ]
-!3 = !{!"0x11\004\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\000\00\000\00\001", !47, !48, !48, !46, null, null} ; [ DW_TAG_compile_unit ]
+!0 = !DISubprogram(name: "SVal", line: 11, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, scopeLine: 11, file: !47, scope: !1, type: !14)
+!1 = !DICompositeType(tag: DW_TAG_structure_type, name: "SVal", line: 1, size: 128, align: 64, file: !47, scope: !2, elements: !4)
+!2 = !DIFile(filename: "small.cc", directory: "/Users/manav/R8248330")
+!3 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 1, file: !47, enums: !48, retainedTypes: !48, subprograms: !46, imports: null)
!4 = !{!5, !7, !0, !9}
-!5 = !{!"0xd\00Data\007\0064\0064\000\000", !47, !1, !6} ; [ DW_TAG_member ]
-!6 = !{!"0xf\00\000\0064\0064\000\000", !47, !2, null} ; [ DW_TAG_pointer_type ]
-!7 = !{!"0xd\00Kind\008\0032\0032\0064\000", !47, !1, !8} ; [ DW_TAG_member ]
-!8 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", !47, !2} ; [ DW_TAG_base_type ]
-!9 = !{!"0x2e\00~SVal\00~SVal\00\0012\000\000\000\006\000\000\0012", !47, !1, !10, null, null, null, null, null} ; [ DW_TAG_subprogram ]
-!10 = !{!"0x15\00\000\000\000\000\000\000", !47, !2, null, !11, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !DIDerivedType(tag: DW_TAG_member, name: "Data", line: 7, size: 64, align: 64, file: !47, scope: !1, baseType: !6)
+!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !47, scope: !2, baseType: null)
+!7 = !DIDerivedType(tag: DW_TAG_member, name: "Kind", line: 8, size: 32, align: 32, offset: 64, file: !47, scope: !1, baseType: !8)
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
+!9 = !DISubprogram(name: "~SVal", line: 12, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, scopeLine: 12, file: !47, scope: !1, type: !10)
+!10 = !DISubroutineType(types: !11)
!11 = !{null, !12, !13}
-!12 = !{!"0xf\00\000\0064\0064\000\0064", !47, !2, !1} ; [ DW_TAG_pointer_type ]
-!13 = !{!"0x24\00int\000\0032\0032\000\000\005", !47, !2} ; [ DW_TAG_base_type ]
-!14 = !{!"0x15\00\000\000\000\000\000\000", !47, !2, null, !15, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, file: !47, scope: !2, baseType: !1)
+!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!14 = !DISubroutineType(types: !15)
!15 = !{null, !12}
-!16 = !{!"0x2e\00SVal\00SVal\00_ZN4SValC1Ev\0011\000\001\000\006\000\000\0011", !47, !1, !14, null, void (%struct.SVal*)* @_ZN4SValC1Ev, null, null, null} ; [ DW_TAG_subprogram ]
-!17 = !{!"0x2e\00foo\00foo\00_Z3fooi4SVal\0016\000\001\000\006\000\000\0016", !47, !2, !18, null, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal, null, null, null} ; [ DW_TAG_subprogram ]
-!18 = !{!"0x15\00\000\000\000\000\000\000", !47, !2, null, !19, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !DISubprogram(name: "SVal", linkageName: "_ZN4SValC1Ev", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 11, file: !47, scope: !1, type: !14, function: void (%struct.SVal*)* @_ZN4SValC1Ev)
+!17 = !DISubprogram(name: "foo", linkageName: "_Z3fooi4SVal", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 16, file: !47, scope: !2, type: !18, function: i32 (i32, %struct.SVal*)* @_Z3fooi4SVal)
+!18 = !DISubroutineType(types: !19)
!19 = !{!13, !13, !1}
-!20 = !{!"0x2e\00main\00main\00main\0023\000\001\000\006\000\000\0023", !47, !2, !21, null, i32 ()* @main, null, null, null} ; [ DW_TAG_subprogram ]
-!21 = !{!"0x15\00\000\000\000\000\000\000", !47, !2, null, !22, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!20 = !DISubprogram(name: "main", linkageName: "main", line: 23, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 23, file: !47, scope: !2, type: !21, function: i32 ()* @main)
+!21 = !DISubroutineType(types: !22)
!22 = !{!13}
-!23 = !{!"0x101\00i\0016\000", !17, !2, !13} ; [ DW_TAG_arg_variable ]
-!24 = !MDLocation(line: 16, scope: !17)
-!25 = !{!"0x101\00location\0016\000", !17, !2, !26} ; [ DW_TAG_arg_variable ]
-!26 = !{!"0x10\00SVal\000\0064\0064\000\000", !47, !2, !1} ; [ DW_TAG_reference_type ]
-!27 = !MDLocation(line: 17, scope: !28)
-!28 = !{!"0xb\0016\000\002", !47, !17} ; [ DW_TAG_lexical_block ]
-!29 = !MDLocation(line: 18, scope: !28)
-!30 = !MDLocation(line: 20, scope: !28)
-!31 = !{!"0x101\00this\0011\000", !16, !2, !32} ; [ DW_TAG_arg_variable ]
-!32 = !{!"0x26\00\000\0064\0064\000\0064", !47, !2, !33} ; [ DW_TAG_const_type ]
-!33 = !{!"0xf\00\000\0064\0064\000\000", !47, !2, !1} ; [ DW_TAG_pointer_type ]
-!34 = !MDLocation(line: 11, scope: !16)
-!35 = !MDLocation(line: 11, scope: !36)
-!36 = !{!"0xb\0011\000\001", !47, !37} ; [ DW_TAG_lexical_block ]
-!37 = !{!"0xb\0011\000\000", !47, !16} ; [ DW_TAG_lexical_block ]
-!38 = !{!"0x100\00v\0024\000", !39, !2, !1} ; [ DW_TAG_auto_variable ]
-!39 = !{!"0xb\0023\000\004", !47, !40} ; [ DW_TAG_lexical_block ]
-!40 = !{!"0xb\0023\000\003", !47, !20} ; [ DW_TAG_lexical_block ]
-!41 = !MDLocation(line: 24, scope: !39)
-!42 = !MDLocation(line: 25, scope: !39)
-!43 = !MDLocation(line: 26, scope: !39)
-!44 = !{!"0x100\00k\0026\000", !39, !2, !13} ; [ DW_TAG_auto_variable ]
-!45 = !MDLocation(line: 27, scope: !39)
-!47 = !{!"small.cc", !"/Users/manav/R8248330"}
-!48 = !{i32 0}
-!49 = !{i32 1, !"Debug Info Version", i32 2}
+!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 16, arg: 0, scope: !17, file: !2, type: !13)
+!24 = !DILocation(line: 16, scope: !17)
+!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "location", line: 16, arg: 0, scope: !17, file: !2, type: !26)
+!26 = !DIDerivedType(tag: DW_TAG_reference_type, name: "SVal", size: 64, align: 64, file: !47, scope: !2, baseType: !1)
+!27 = !DILocation(line: 17, scope: !28)
+!28 = distinct !DILexicalBlock(line: 16, column: 0, file: !47, scope: !17)
+!29 = !DILocation(line: 18, scope: !28)
+!30 = !DILocation(line: 20, scope: !28)
+!31 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 11, arg: 0, scope: !16, file: !2, type: !32)
+!32 = !DIDerivedType(tag: DW_TAG_const_type, size: 64, align: 64, flags: DIFlagArtificial, file: !47, scope: !2, baseType: !33)
+!33 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !47, scope: !2, baseType: !1)
+!34 = !DILocation(line: 11, scope: !16)
+!35 = !DILocation(line: 11, scope: !36)
+!36 = distinct !DILexicalBlock(line: 11, column: 0, file: !47, scope: !37)
+!37 = distinct !DILexicalBlock(line: 11, column: 0, file: !47, scope: !16)
+!38 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "v", line: 24, scope: !39, file: !2, type: !1)
+!39 = distinct !DILexicalBlock(line: 23, column: 0, file: !47, scope: !40)
+!40 = distinct !DILexicalBlock(line: 23, column: 0, file: !47, scope: !20)
+!41 = !DILocation(line: 24, scope: !39)
+!42 = !DILocation(line: 25, scope: !39)
+!43 = !DILocation(line: 26, scope: !39)
+!44 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 26, scope: !39, file: !2, type: !13)
+!45 = !DILocation(line: 27, scope: !39)
+!47 = !DIFile(filename: "small.cc", directory: "/Users/manav/R8248330")
+!48 = !{}
+!49 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll b/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll
index e5542baf2ee8..b05664d758cf 100644
--- a/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll
+++ b/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll
@@ -8,7 +8,7 @@ define void @f(i32* %w, i32* %h, i8* %_this, i8* %image) nounwind ssp {
%x1 = tail call i64 @g(i8* %_this, i8* %image) nounwind ; <i64> [#uses=3]
%tmp1 = trunc i64 %x1 to i32 ; <i32> [#uses=1]
; CHECK: movl (%r{{.*}}), %
- %x4 = load i32* %h, align 4 ; <i32> [#uses=1]
+ %x4 = load i32, i32* %h, align 4 ; <i32> [#uses=1]
; The imull clobbers a 32-bit register.
; CHECK: imull %{{...}}, %e[[CLOBBER:..]]
diff --git a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
index cf9897ac03ad..d94bd1c79f91 100644
--- a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
+++ b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
@@ -15,21 +15,21 @@ entry:
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!17}
-!0 = !{!"0x2e\00foo\00foo\00foo\0053\000\001\000\006\000\000\000", !14, !1, !3, null, i32 ()* @foo, null, null, null} ; [ DW_TAG_subprogram ]
-!1 = !{!"0x29", !14} ; [ DW_TAG_file_type ]
-!2 = !{!"0x11\0012\00clang version 2.9 (trunk 114084)\000\00\000\00\000", !15, !16, !16, !13, null, null} ; [ DW_TAG_compile_unit ]
-!3 = !{!"0x15\00\000\000\000\000\000\000", !14, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = !DISubprogram(name: "foo", linkageName: "foo", line: 53, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !14, scope: !1, type: !3, function: i32 ()* @foo)
+!1 = !DIFile(filename: "", directory: "/private/tmp")
+!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 114084)", isOptimized: false, emissionKind: 0, file: !15, enums: !16, retainedTypes: !16, subprograms: !13)
+!3 = !DISubroutineType(types: !4)
!4 = !{!5}
-!5 = !{!"0x24\00int\000\0032\0032\000\000\005", !14, !1} ; [ DW_TAG_base_type ]
-!6 = !{!"0x2e\00bar\00bar\00bar\004\000\001\000\006\000\000\000", !15, !7, !3, null, i32 ()* @bar, null, null, null} ; [ DW_TAG_subprogram ]
-!7 = !{!"0x29", !15} ; [ DW_TAG_file_type ]
-!8 = !MDLocation(line: 53, column: 13, scope: !9)
-!9 = !{!"0xb\0053\0011\000", !14, !0} ; [ DW_TAG_lexical_block ]
-!10 = !MDLocation(line: 4, column: 13, scope: !11)
-!11 = !{!"0xb\004\0013\002", !15, !12} ; [ DW_TAG_lexical_block ]
-!12 = !{!"0xb\004\0011\001", !15, !6} ; [ DW_TAG_lexical_block ]
+!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!6 = !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !15, scope: !7, type: !3, function: i32 ()* @bar)
+!7 = !DIFile(filename: "bug.c", directory: "/private/tmp")
+!8 = !DILocation(line: 53, column: 13, scope: !9)
+!9 = distinct !DILexicalBlock(line: 53, column: 11, file: !14, scope: !0)
+!10 = !DILocation(line: 4, column: 13, scope: !11)
+!11 = distinct !DILexicalBlock(line: 4, column: 13, file: !15, scope: !12)
+!12 = distinct !DILexicalBlock(line: 4, column: 11, file: !15, scope: !6)
!13 = !{!0, !6}
-!14 = !{!"", !"/private/tmp"}
-!15 = !{!"bug.c", !"/private/tmp"}
-!16 = !{i32 0}
-!17 = !{i32 1, !"Debug Info Version", i32 2}
+!14 = !DIFile(filename: "", directory: "/private/tmp")
+!15 = !DIFile(filename: "bug.c", directory: "/private/tmp")
+!16 = !{}
+!17 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/2010-09-16-asmcrash.ll b/test/CodeGen/X86/2010-09-16-asmcrash.ll
index 7aa9f32d41c4..81b0fc560ee7 100644
--- a/test/CodeGen/X86/2010-09-16-asmcrash.ll
+++ b/test/CodeGen/X86/2010-09-16-asmcrash.ll
@@ -30,7 +30,7 @@ entry:
br i1 undef, label %while.cond.preheader, label %sem_check_validity.exit
while.cond.preheader: ; preds = %entry
- %tmp4 = getelementptr inbounds %struct._sem* %sem, i64 0, i32 1, i32 1
+ %tmp4 = getelementptr inbounds %struct._sem, %struct._sem* %sem, i64 0, i32 1, i32 1
br label %while.cond
sem_check_validity.exit: ; preds = %entry
diff --git a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
index 39d89e3d8276..b7380196bd9b 100644
--- a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
+++ b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
@@ -7,10 +7,10 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
define fastcc i32 @cli_magic_scandesc(i8* %in) nounwind ssp {
entry:
%a = alloca [64 x i8]
- %b = getelementptr inbounds [64 x i8]* %a, i64 0, i32 0
- %c = getelementptr inbounds [64 x i8]* %a, i64 0, i32 30
- %d = load i8* %b, align 8
- %e = load i8* %c, align 8
+ %b = getelementptr inbounds [64 x i8], [64 x i8]* %a, i64 0, i32 0
+ %c = getelementptr inbounds [64 x i8], [64 x i8]* %a, i64 0, i32 30
+ %d = load i8, i8* %b, align 8
+ %e = load i8, i8* %c, align 8
%f = bitcast [64 x i8]* %a to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* %in, i64 64, i32 8, i1 false) nounwind
store i8 %d, i8* %b, align 8
diff --git a/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll b/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll
index 73e996c5d553..b8abd9c9e7d4 100644
--- a/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll
+++ b/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll
@@ -19,7 +19,7 @@ declare i32 @strlen(i8* nocapture) nounwind readonly
define hidden zeroext i8 @f(i8* %this, i8* %Name.0, i32 %Name.1, i8* noalias %NameLoc, i8* %Operands) nounwind align 2 {
bb.i:
%0 = icmp eq i8 undef, 0
- %iftmp.285.0 = select i1 %0, i8* getelementptr inbounds ([5 x i8]* @.str1189, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8]* @.str706, i32 0, i32 0)
+ %iftmp.285.0 = select i1 %0, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str1189, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str706, i32 0, i32 0)
%1 = call i32 @strlen(i8* %iftmp.285.0) nounwind readonly
switch i32 %Name.1, label %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit [
i32 3, label %bb1.i
@@ -63,7 +63,7 @@ _ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit: ; preds = %bb.i18
br i1 undef, label %bb141, label %_ZNK4llvm9StringRef10startswithES0_.exit
_ZNK4llvm9StringRef10startswithES0_.exit: ; preds = %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit
- %2 = call i32 @memcmp(i8* %PatchedName.0.0, i8* getelementptr inbounds ([4 x i8]* @.str146, i32 0, i32 0), i32 3) nounwind readonly
+ %2 = call i32 @memcmp(i8* %PatchedName.0.0, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str146, i32 0, i32 0), i32 3) nounwind readonly
unreachable
bb141: ; preds = %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit
diff --git a/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll b/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
index ebf51a5d660a..ee50cb13e634 100644
--- a/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
+++ b/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
@@ -16,8 +16,7 @@ define void @foo(i64* %ptr) nounwind inlinehint {
entry:
br label %loop
loop:
-; CHECK: lock
-; CHECK-NEXT: cmpxchg8b
+; CHECK: lock cmpxchg8b
%pair = cmpxchg i64* %ptr, i64 0, i64 1 monotonic monotonic
%r = extractvalue { i64, i1 } %pair, 0
%stored1 = icmp eq i64 %r, 0
diff --git a/test/CodeGen/X86/2010-11-02-DbgParameter.ll b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
index df3aa1f2ab37..124cc9a430e8 100644
--- a/test/CodeGen/X86/2010-11-02-DbgParameter.ll
+++ b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
@@ -9,7 +9,7 @@ target triple = "i386-apple-darwin11.0.0"
define i32 @foo(%struct.bar* nocapture %i) nounwind readnone optsize noinline ssp {
; CHECK: TAG_formal_parameter
entry:
- tail call void @llvm.dbg.value(metadata %struct.bar* %i, i64 0, metadata !6, metadata !{!"0x102"}), !dbg !12
+ tail call void @llvm.dbg.value(metadata %struct.bar* %i, i64 0, metadata !6, metadata !DIExpression()), !dbg !12
ret i32 1, !dbg !13
}
@@ -18,23 +18,23 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!19}
-!0 = !{!"0x2e\00foo\00foo\00\003\000\001\000\006\00256\001\003", !17, !1, !3, null, i32 (%struct.bar*)* @foo, null, null, !16} ; [ DW_TAG_subprogram ]
-!1 = !{!"0x29", !17} ; [ DW_TAG_file_type ]
-!2 = !{!"0x11\0012\00clang version 2.9 (trunk 117922)\001\00\000\00\000", !17, !18, !18, !15, null, null} ; [ DW_TAG_compile_unit ]
-!3 = !{!"0x15\00\000\000\000\000\000\000", !17, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !17, scope: !1, type: !3, function: i32 (%struct.bar*)* @foo, variables: !16)
+!1 = !DIFile(filename: "one.c", directory: "/private/tmp")
+!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 117922)", isOptimized: true, emissionKind: 0, file: !17, enums: !18, retainedTypes: !18, subprograms: !15, imports: null)
+!3 = !DISubroutineType(types: !4)
!4 = !{!5}
-!5 = !{!"0x24\00int\000\0032\0032\000\000\005", !17, !2} ; [ DW_TAG_base_type ]
-!6 = !{!"0x101\00i\003\000", !0, !1, !7} ; [ DW_TAG_arg_variable ]
-!7 = !{!"0xf\00\000\0032\0032\000\000", !17, !1, !8} ; [ DW_TAG_pointer_type ]
-!8 = !{!"0x13\00bar\002\0064\0032\000\000\000", !17, !1, null, !9, null, null, null} ; [ DW_TAG_structure_type ] [bar] [line 2, size 64, align 32, offset 0] [def] [from ]
+!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!6 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 3, arg: 0, scope: !0, file: !1, type: !7)
+!7 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !17, scope: !1, baseType: !8)
+!8 = !DICompositeType(tag: DW_TAG_structure_type, name: "bar", line: 2, size: 64, align: 32, file: !17, scope: !1, elements: !9)
!9 = !{!10, !11}
-!10 = !{!"0xd\00x\002\0032\0032\000\000", !17, !1, !5} ; [ DW_TAG_member ]
-!11 = !{!"0xd\00y\002\0032\0032\0032\000", !17, !1, !5} ; [ DW_TAG_member ]
-!12 = !MDLocation(line: 3, column: 47, scope: !0)
-!13 = !MDLocation(line: 4, column: 2, scope: !14)
-!14 = !{!"0xb\003\0050\000", !17, !0} ; [ DW_TAG_lexical_block ]
+!10 = !DIDerivedType(tag: DW_TAG_member, name: "x", line: 2, size: 32, align: 32, file: !17, scope: !1, baseType: !5)
+!11 = !DIDerivedType(tag: DW_TAG_member, name: "y", line: 2, size: 32, align: 32, offset: 32, file: !17, scope: !1, baseType: !5)
+!12 = !DILocation(line: 3, column: 47, scope: !0)
+!13 = !DILocation(line: 4, column: 2, scope: !14)
+!14 = distinct !DILexicalBlock(line: 3, column: 50, file: !17, scope: !0)
!15 = !{!0}
!16 = !{!6}
-!17 = !{!"one.c", !"/private/tmp"}
-!18 = !{i32 0}
-!19 = !{i32 1, !"Debug Info Version", i32 2}
+!17 = !DIFile(filename: "one.c", directory: "/private/tmp")
+!18 = !{}
+!19 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/2010-11-09-MOVLPS.ll b/test/CodeGen/X86/2010-11-09-MOVLPS.ll
index 710cb86f5374..4b937333c8e9 100644
--- a/test/CodeGen/X86/2010-11-09-MOVLPS.ll
+++ b/test/CodeGen/X86/2010-11-09-MOVLPS.ll
@@ -21,42 +21,42 @@ entry:
store i8* %a, i8** %a_addr
store %0* %b, %0** %b_addr
store %0* %c, %0** %c_addr
- %0 = load i8** %a_addr, align 64
- %1 = load %0** %b_addr, align 64
- %2 = load %0** %c_addr, align 64
+ %0 = load i8*, i8** %a_addr, align 64
+ %1 = load %0*, %0** %b_addr, align 64
+ %2 = load %0*, %0** %c_addr, align 64
%"ssa point" = bitcast i32 0 to i32
br label %"2"
"2": ; preds = %entry
%3 = bitcast i8* %0 to <2 x i32>*
- %4 = getelementptr inbounds %0* %1, i32 0, i32 0
+ %4 = getelementptr inbounds %0, %0* %1, i32 0, i32 0
%5 = bitcast %"int[]"* %4 to <4 x float>*
- %6 = load <4 x float>* %5, align 16
+ %6 = load <4 x float>, <4 x float>* %5, align 16
%7 = bitcast <2 x i32>* %3 to <2 x float>*
%8 = bitcast <2 x float>* %7 to double*
- %9 = load double* %8
+ %9 = load double, double* %8
%10 = insertelement <2 x double> undef, double %9, i32 0
%11 = insertelement <2 x double> %10, double undef, i32 1
%12 = bitcast <2 x double> %11 to <4 x float>
%13 = shufflevector <4 x float> %6, <4 x float> %12, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
- %14 = getelementptr inbounds %0* %1, i32 0, i32 0
+ %14 = getelementptr inbounds %0, %0* %1, i32 0, i32 0
%15 = bitcast %"int[]"* %14 to <4 x float>*
store <4 x float> %13, <4 x float>* %15, align 16
%16 = bitcast i8* %0 to <2 x i32>*
%17 = bitcast <2 x i32>* %16 to i8*
- %18 = getelementptr i8* %17, i64 8
+ %18 = getelementptr i8, i8* %17, i64 8
%19 = bitcast i8* %18 to <2 x i32>*
- %20 = getelementptr inbounds %0* %2, i32 0, i32 0
+ %20 = getelementptr inbounds %0, %0* %2, i32 0, i32 0
%21 = bitcast %"int[]"* %20 to <4 x float>*
- %22 = load <4 x float>* %21, align 16
+ %22 = load <4 x float>, <4 x float>* %21, align 16
%23 = bitcast <2 x i32>* %19 to <2 x float>*
%24 = bitcast <2 x float>* %23 to double*
- %25 = load double* %24
+ %25 = load double, double* %24
%26 = insertelement <2 x double> undef, double %25, i32 0
%27 = insertelement <2 x double> %26, double undef, i32 1
%28 = bitcast <2 x double> %27 to <4 x float>
%29 = shufflevector <4 x float> %22, <4 x float> %28, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
- %30 = getelementptr inbounds %0* %2, i32 0, i32 0
+ %30 = getelementptr inbounds %0, %0* %2, i32 0, i32 0
%31 = bitcast %"int[]"* %30 to <4 x float>*
store <4 x float> %29, <4 x float>* %31, align 16
br label %return
diff --git a/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll b/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll
index 6d54c7e2982b..331e83bb5067 100644
--- a/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll
+++ b/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll
@@ -4,11 +4,11 @@
@s = external global i8
define i32 @foo(i1 %cond) {
; CHECK: @foo
- %u_base = load i8* @u
+ %u_base = load i8, i8* @u
%u_val = zext i8 %u_base to i32
; CHECK: movzbl
; CHECK: movsbl
- %s_base = load i8* @s
+ %s_base = load i8, i8* @s
%s_val = sext i8 %s_base to i32
%val = select i1 %cond, i32 %u_val, i32 %s_val
ret i32 %val
diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
index 8404020c91f1..0ded66fa3bf9 100644
--- a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
+++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -22,8 +22,8 @@ target triple = "x86_64-apple-darwin10.0.0"
define i64 @gcd(i64 %a, i64 %b) nounwind readnone optsize noinline ssp {
entry:
- tail call void @llvm.dbg.value(metadata i64 %a, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !18
- tail call void @llvm.dbg.value(metadata i64 %b, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !19
+ tail call void @llvm.dbg.value(metadata i64 %a, i64 0, metadata !10, metadata !DIExpression()), !dbg !18
+ tail call void @llvm.dbg.value(metadata i64 %b, i64 0, metadata !11, metadata !DIExpression()), !dbg !19
br label %while.body, !dbg !20
while.body: ; preds = %while.body, %entry
@@ -34,14 +34,14 @@ while.body: ; preds = %while.body, %entry
br i1 %cmp, label %if.then, label %while.body, !dbg !23
if.then: ; preds = %while.body
- tail call void @llvm.dbg.value(metadata i64 %rem, i64 0, metadata !12, metadata !{!"0x102"}), !dbg !21
+ tail call void @llvm.dbg.value(metadata i64 %rem, i64 0, metadata !12, metadata !DIExpression()), !dbg !21
ret i64 %b.addr.0, !dbg !23
}
define i32 @main() nounwind optsize ssp {
entry:
%call = tail call i32 @rand() nounwind optsize, !dbg !24
- tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !24
+ tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !14, metadata !DIExpression()), !dbg !24
%cmp = icmp ugt i32 %call, 21, !dbg !25
br i1 %cmp, label %cond.true, label %cond.end, !dbg !25
@@ -51,7 +51,7 @@ cond.true: ; preds = %entry
cond.end: ; preds = %entry, %cond.true
%cond = phi i32 [ %call1, %cond.true ], [ %call, %entry ], !dbg !25
- tail call void @llvm.dbg.value(metadata i32 %cond, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !25
+ tail call void @llvm.dbg.value(metadata i32 %cond, i64 0, metadata !17, metadata !DIExpression()), !dbg !25
%conv = sext i32 %cond to i64, !dbg !26
%conv5 = zext i32 %call to i64, !dbg !26
%call6 = tail call i64 @gcd(i64 %conv, i64 %conv5) optsize, !dbg !26
@@ -59,8 +59,8 @@ cond.end: ; preds = %entry, %cond.true
br i1 %cmp7, label %return, label %if.then, !dbg !26
if.then: ; preds = %cond.end
- %puts = tail call i32 @puts(i8* getelementptr inbounds ([21 x i8]* @str, i64 0, i64 0))
- %call12 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str1, i64 0, i64 0), i32 %call, i32 %cond) nounwind optsize, !dbg !26
+ %puts = tail call i32 @puts(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @str, i64 0, i64 0))
+ %call12 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str1, i64 0, i64 0), i32 %call, i32 %cond) nounwind optsize, !dbg !26
ret i32 1, !dbg !27
return: ; preds = %cond.end
@@ -78,37 +78,37 @@ declare i32 @puts(i8* nocapture) nounwind
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!33}
-!0 = !{!"0x2e\00gcd\00gcd\00\005\000\001\000\006\00256\001\000", !31, !1, !3, null, i64 (i64, i64)* @gcd, null, null, !29} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 0] [gcd]
-!1 = !{!"0x29", !31} ; [ DW_TAG_file_type ]
-!2 = !{!"0x11\0012\00clang version 2.9 (trunk 124117)\001\00\000\00\001", !31, !32, !32, !28, null, null} ; [ DW_TAG_compile_unit ]
-!3 = !{!"0x15\00\000\000\000\000\000\000", !31, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = !DISubprogram(name: "gcd", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !31, scope: !1, type: !3, function: i64 (i64, i64)* @gcd, variables: !29)
+!1 = !DIFile(filename: "rem_small.c", directory: "/private/tmp")
+!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 124117)", isOptimized: true, emissionKind: 1, file: !31, enums: !32, retainedTypes: !32, subprograms: !28, imports: null)
+!3 = !DISubroutineType(types: !4)
!4 = !{!5}
-!5 = !{!"0x24\00long int\000\0064\0064\000\000\005", null, !2} ; [ DW_TAG_base_type ]
-!6 = !{!"0x2e\00main\00main\00\0025\000\001\000\006\000\001\000", !31, !1, !7, null, i32 ()* @main, null, null, !30} ; [ DW_TAG_subprogram ] [line 25] [def] [scope 0] [main]
-!7 = !{!"0x15\00\000\000\000\000\000\000", !31, !1, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !DIBasicType(tag: DW_TAG_base_type, name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!6 = !DISubprogram(name: "main", line: 25, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !31, scope: !1, type: !7, function: i32 ()* @main, variables: !30)
+!7 = !DISubroutineType(types: !8)
!8 = !{!9}
-!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !2} ; [ DW_TAG_base_type ]
-!10 = !{!"0x101\00a\005\000", !0, !1, !5} ; [ DW_TAG_arg_variable ]
-!11 = !{!"0x101\00b\005\000", !0, !1, !5} ; [ DW_TAG_arg_variable ]
-!12 = !{!"0x100\00c\006\000", !13, !1, !5} ; [ DW_TAG_auto_variable ]
-!13 = !{!"0xb\005\0052\000", !31, !0} ; [ DW_TAG_lexical_block ]
-!14 = !{!"0x100\00m\0026\000", !15, !1, !16} ; [ DW_TAG_auto_variable ]
-!15 = !{!"0xb\0025\0012\002", !31, !6} ; [ DW_TAG_lexical_block ]
-!16 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", null, !2} ; [ DW_TAG_base_type ]
-!17 = !{!"0x100\00z_s\0027\000", !15, !1, !9} ; [ DW_TAG_auto_variable ]
-!18 = !MDLocation(line: 5, column: 41, scope: !0)
-!19 = !MDLocation(line: 5, column: 49, scope: !0)
-!20 = !MDLocation(line: 7, column: 5, scope: !13)
-!21 = !MDLocation(line: 8, column: 9, scope: !22)
-!22 = !{!"0xb\007\0014\001", !31, !13} ; [ DW_TAG_lexical_block ]
-!23 = !MDLocation(line: 9, column: 9, scope: !22)
-!24 = !MDLocation(line: 26, column: 38, scope: !15)
-!25 = !MDLocation(line: 27, column: 38, scope: !15)
-!26 = !MDLocation(line: 28, column: 9, scope: !15)
-!27 = !MDLocation(line: 30, column: 1, scope: !15)
+!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 5, arg: 0, scope: !0, file: !1, type: !5)
+!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 5, arg: 0, scope: !0, file: !1, type: !5)
+!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 6, scope: !13, file: !1, type: !5)
+!13 = distinct !DILexicalBlock(line: 5, column: 52, file: !31, scope: !0)
+!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 26, scope: !15, file: !1, type: !16)
+!15 = distinct !DILexicalBlock(line: 25, column: 12, file: !31, scope: !6)
+!16 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
+!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "z_s", line: 27, scope: !15, file: !1, type: !9)
+!18 = !DILocation(line: 5, column: 41, scope: !0)
+!19 = !DILocation(line: 5, column: 49, scope: !0)
+!20 = !DILocation(line: 7, column: 5, scope: !13)
+!21 = !DILocation(line: 8, column: 9, scope: !22)
+!22 = distinct !DILexicalBlock(line: 7, column: 14, file: !31, scope: !13)
+!23 = !DILocation(line: 9, column: 9, scope: !22)
+!24 = !DILocation(line: 26, column: 38, scope: !15)
+!25 = !DILocation(line: 27, column: 38, scope: !15)
+!26 = !DILocation(line: 28, column: 9, scope: !15)
+!27 = !DILocation(line: 30, column: 1, scope: !15)
!28 = !{!0, !6}
!29 = !{!10, !11, !12}
!30 = !{!14, !17}
-!31 = !{!"rem_small.c", !"/private/tmp"}
-!32 = !{i32 0}
-!33 = !{i32 1, !"Debug Info Version", i32 2}
+!31 = !DIFile(filename: "rem_small.c", directory: "/private/tmp")
+!32 = !{}
+!33 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/2011-02-12-shuffle.ll b/test/CodeGen/X86/2011-02-12-shuffle.ll
index b4d56d193ca3..40e36678a557 100644
--- a/test/CodeGen/X86/2011-02-12-shuffle.ll
+++ b/test/CodeGen/X86/2011-02-12-shuffle.ll
@@ -9,7 +9,7 @@ entry:
br i1 undef, label %if.end, label %UnifiedReturnBlock
if.end: ; preds = %entry
- %tmp1067 = load <16 x i32> addrspace(1)* null, align 64
+ %tmp1067 = load <16 x i32>, <16 x i32> addrspace(1)* null, align 64
%tmp1082 = shufflevector <16 x i32> <i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef>,
<16 x i32> %tmp1067,
<16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 26, i32 5, i32 6, i32 undef, i32 8, i32 9, i32 31, i32 30, i32 12, i32 undef, i32 undef, i32 undef>
diff --git a/test/CodeGen/X86/2011-02-21-VirtRegRewriter-KillSubReg.ll b/test/CodeGen/X86/2011-02-21-VirtRegRewriter-KillSubReg.ll
index f982723781ea..7821f0537e70 100644
--- a/test/CodeGen/X86/2011-02-21-VirtRegRewriter-KillSubReg.ll
+++ b/test/CodeGen/X86/2011-02-21-VirtRegRewriter-KillSubReg.ll
@@ -18,7 +18,7 @@ if.then751:
if.then758:
%add761 = add i32 %call747, 4
%add763 = add i32 %add761, %call747
- %add.ptr768 = getelementptr inbounds [516 x i8]* null, i32 0, i32 %add761
+ %add.ptr768 = getelementptr inbounds [516 x i8], [516 x i8]* null, i32 0, i32 %add761
br i1 undef, label %cond.false783, label %cond.true771
cond.true771:
@@ -33,7 +33,7 @@ cond.false783:
cond.end791:
%conv801 = trunc i32 %call747 to i8
%add.ptr822.sum = add i32 %call747, 3
- %arrayidx833 = getelementptr inbounds [516 x i8]* null, i32 0, i32 %add.ptr822.sum
+ %arrayidx833 = getelementptr inbounds [516 x i8], [516 x i8]* null, i32 0, i32 %add.ptr822.sum
store i8 %conv801, i8* %arrayidx833, align 1
%cmp841 = icmp eq i8* undef, null
br i1 %cmp841, label %if.end849, label %if.then843
diff --git a/test/CodeGen/X86/2011-02-23-UnfoldBug.ll b/test/CodeGen/X86/2011-02-23-UnfoldBug.ll
index 900106aac351..90b90d7f9f6a 100644
--- a/test/CodeGen/X86/2011-02-23-UnfoldBug.ll
+++ b/test/CodeGen/X86/2011-02-23-UnfoldBug.ll
@@ -22,7 +22,7 @@ for.body33.lr.ph: ; preds = %for.body
for.end: ; preds = %for.body
%vecins.i94 = insertelement <2 x double> undef, double 0.000000e+00, i32 0
%cmpsd.i = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %vecins.i94, <2 x double> <double 0x3FE984B204153B34, double 0x3FE984B204153B34>, i8 2) nounwind
- tail call void (...)* @_mm_movemask_pd(<2 x double> %cmpsd.i) nounwind
+ tail call void (...) @_mm_movemask_pd(<2 x double> %cmpsd.i) nounwind
br i1 undef, label %if.then67, label %if.end71
if.then67: ; preds = %for.end
diff --git a/test/CodeGen/X86/2011-03-02-DAGCombiner.ll b/test/CodeGen/X86/2011-03-02-DAGCombiner.ll
index be58cedfdaa7..d25fbf7b71f1 100644
--- a/test/CodeGen/X86/2011-03-02-DAGCombiner.ll
+++ b/test/CodeGen/X86/2011-03-02-DAGCombiner.ll
@@ -13,23 +13,23 @@ entry:
%K = alloca %0, align 4
store i32 0, i32* %retval
%0 = bitcast %0* %K to i32*
- %1 = load i32* %0, align 4
+ %1 = load i32, i32* %0, align 4
%2 = and i32 %1, -121
%3 = or i32 %2, 32
store i32 %3, i32* %0, align 4
%4 = bitcast %0* %K to i32*
- %5 = load i32* %4, align 4
+ %5 = load i32, i32* %4, align 4
%6 = lshr i32 %5, 3
%bf.clear = and i32 %6, 15
%conv = sitofp i32 %bf.clear to float
- %f = getelementptr inbounds %struct.anon* %F, i32 0, i32 0
- %tmp = load float* %f, align 4
+ %f = getelementptr inbounds %struct.anon, %struct.anon* %F, i32 0, i32 0
+ %tmp = load float, float* %f, align 4
%sub = fsub float %tmp, %conv
store float %sub, float* %f, align 4
- %ld = getelementptr inbounds %struct.anon* %F, i32 0, i32 1
- %tmp1 = load x86_fp80* %ld, align 16
+ %ld = getelementptr inbounds %struct.anon, %struct.anon* %F, i32 0, i32 1
+ %tmp1 = load x86_fp80, x86_fp80* %ld, align 16
%7 = bitcast %0* %K to i32*
- %8 = load i32* %7, align 4
+ %8 = load i32, i32* %7, align 4
%9 = lshr i32 %8, 7
%bf.clear2 = and i32 %9, 1
%conv3 = uitofp i32 %bf.clear2 to x86_fp80
@@ -39,12 +39,12 @@ entry:
%10 = bitcast %0* %K to i32*
%11 = and i32 %bf.value, 1
%12 = shl i32 %11, 7
- %13 = load i32* %10, align 4
+ %13 = load i32, i32* %10, align 4
%14 = and i32 %13, -129
%15 = or i32 %14, %12
store i32 %15, i32* %10, align 4
- %call = call i32 (...)* @iequals(i32 1841, i32 %bf.value, i32 0)
- %16 = load i32* %retval
+ %call = call i32 (...) @iequals(i32 1841, i32 %bf.value, i32 0)
+ %16 = load i32, i32* %retval
ret i32 %16
}
diff --git a/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll b/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll
index e48edf7e30b4..9fe6a774fbb7 100644
--- a/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll
+++ b/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll
@@ -12,10 +12,10 @@ declare fastcc i8* @save_string(i8* %d, i8* nocapture %s) nounwind
define i32 @cvtchar(i8* nocapture %sp) nounwind {
%temp.i = alloca [2 x i8], align 1
- %tmp1 = load i8* %sp, align 1
+ %tmp1 = load i8, i8* %sp, align 1
%div = udiv i8 %tmp1, 10
%rem = urem i8 %div, 10
- %arrayidx.i = getelementptr inbounds [2 x i8]* %temp.i, i32 0, i32 0
+ %arrayidx.i = getelementptr inbounds [2 x i8], [2 x i8]* %temp.i, i32 0, i32 0
store i8 %rem, i8* %arrayidx.i, align 1
%call.i = call fastcc i8* @save_string(i8* %sp, i8* %arrayidx.i) nounwind
ret i32 undef
diff --git a/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll b/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
index be10ad5cc206..ed64ea93b393 100644
--- a/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
+++ b/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
@@ -17,7 +17,7 @@ declare hidden fastcc void @_ZN3JSCL23returnToThrowTrampolineEPNS_12JSGlobalData
; CHECK: je
define i32 @cti_op_eq(i8** nocapture %args) nounwind ssp {
entry:
- %0 = load i8** null, align 8
+ %0 = load i8*, i8** null, align 8
%tmp13 = bitcast i8* %0 to %"class.JSC::CodeLocationCall"*
%tobool.i.i.i = icmp ugt i8* undef, inttoptr (i64 281474976710655 to i8*)
%or.cond.i = and i1 %tobool.i.i.i, undef
@@ -34,7 +34,7 @@ if.end.i: ; preds = %entry
br i1 undef, label %land.rhs.i121.i, label %_ZNK3JSC7JSValue8isStringEv.exit122.i
land.rhs.i121.i: ; preds = %if.end.i
- %tmp.i.i117.i = load %"class.JSC::Structure"** undef, align 8
+ %tmp.i.i117.i = load %"class.JSC::Structure"*, %"class.JSC::Structure"** undef, align 8
br label %_ZNK3JSC7JSValue8isStringEv.exit122.i
_ZNK3JSC7JSValue8isStringEv.exit122.i: ; preds = %land.rhs.i121.i, %if.end.i
@@ -48,12 +48,12 @@ if.then.i92.i: ; preds = %_ZNK3JSC7JSValue8is
_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit: ; preds = %_ZNK3JSC7JSValue8isStringEv.exit122.i, %if.then.i.i.i, %if.then.i
- %1 = load i8** undef, align 8
+ %1 = load i8*, i8** undef, align 8
br i1 undef, label %do.end39, label %do.body27
do.body27: ; preds = %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit
%tmp30 = bitcast i8* %1 to %"class.JSC::JSGlobalData"*
- %2 = getelementptr inbounds i8** %args, i64 -1
+ %2 = getelementptr inbounds i8*, i8** %args, i64 -1
%3 = bitcast i8** %2 to %"class.JSC::FunctionPtr"*
tail call fastcc void @_ZN3JSCL23returnToThrowTrampolineEPNS_12JSGlobalDataENS_16ReturnAddressPtrERS2_(%"class.JSC::JSGlobalData"* %tmp30, i8* undef, %"class.JSC::FunctionPtr"* %3)
unreachable
diff --git a/test/CodeGen/X86/2011-05-09-loaduse.ll b/test/CodeGen/X86/2011-05-09-loaduse.ll
index c772e4c7f4e4..a94a9812431e 100644
--- a/test/CodeGen/X86/2011-05-09-loaduse.ll
+++ b/test/CodeGen/X86/2011-05-09-loaduse.ll
@@ -5,7 +5,7 @@
;CHECK: ret
define float @test(<4 x float>* %A) nounwind {
entry:
- %T = load <4 x float>* %A
+ %T = load <4 x float>, <4 x float>* %A
%R = extractelement <4 x float> %T, i32 3
store <4 x float><float 0.0, float 0.0, float 0.0, float 0.0>, <4 x float>* %A
ret float %R
diff --git a/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll b/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll
index 91cd208f0167..68c0af4f752a 100644
--- a/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll
+++ b/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll
@@ -41,8 +41,8 @@ cond.false156.i: ; preds = %for.end.i, %land.en
cond.end166.i: ; preds = %cond.false156.i, %cond.true138.i
%idxprom1113.i = phi i64 [ %idxprom1114.i, %cond.false156.i ], [ undef, %cond.true138.i ]
- %tmp235.i = load %struct.state** getelementptr inbounds (%struct.dfa* @aux_temp, i64 0, i32 2), align 8
- %att.i = getelementptr inbounds %struct.state* %tmp235.i, i64 %idxprom1113.i, i32 0
+ %tmp235.i = load %struct.state*, %struct.state** getelementptr inbounds (%struct.dfa, %struct.dfa* @aux_temp, i64 0, i32 2), align 8
+ %att.i = getelementptr inbounds %struct.state, %struct.state* %tmp235.i, i64 %idxprom1113.i, i32 0
store i32 0, i32* %att.i, align 4
ret void
}
diff --git a/test/CodeGen/X86/2011-05-27-CrossClassCoalescing.ll b/test/CodeGen/X86/2011-05-27-CrossClassCoalescing.ll
index c595bba3266c..414bd243ebce 100644
--- a/test/CodeGen/X86/2011-05-27-CrossClassCoalescing.ll
+++ b/test/CodeGen/X86/2011-05-27-CrossClassCoalescing.ll
@@ -20,15 +20,15 @@ land.lhs.true: ; preds = %do.body.i
for.body.i: ; preds = %for.inc.i, %if.then
%tmp3524.i = phi i32 [ 0, %land.lhs.true ], [ %tmp351.i, %for.inc.i ]
- %tmp6.i12 = load i32* undef, align 4
+ %tmp6.i12 = load i32, i32* undef, align 4
br i1 undef, label %for.inc.i, label %if.then.i17
if.then.i17: ; preds = %for.body.i
%shr.i14 = lshr i32 %tmp6.i12, 8
%and14.i = and i32 %shr.i14, 255
%idxprom15.i = zext i32 %and14.i to i64
- %arrayidx16.i = getelementptr inbounds [256 x i32]* @bit_count, i64 0, i64 %idxprom15.i
- %tmp17.i15 = load i32* %arrayidx16.i, align 4
+ %arrayidx16.i = getelementptr inbounds [256 x i32], [256 x i32]* @bit_count, i64 0, i64 %idxprom15.i
+ %tmp17.i15 = load i32, i32* %arrayidx16.i, align 4
%add.i = add i32 0, %tmp3524.i
%add24.i = add i32 %add.i, %tmp17.i15
%add31.i = add i32 %add24.i, 0
diff --git a/test/CodeGen/X86/2011-06-01-fildll.ll b/test/CodeGen/X86/2011-06-01-fildll.ll
index 3a0b05fce3ab..30c743441c36 100644
--- a/test/CodeGen/X86/2011-06-01-fildll.ll
+++ b/test/CodeGen/X86/2011-06-01-fildll.ll
@@ -7,7 +7,7 @@ define float @f(i64* nocapture %x) nounwind readonly ssp {
entry:
; CHECK: movl
; CHECK-NOT: movl
- %tmp1 = load i64* %x, align 4
+ %tmp1 = load i64, i64* %x, align 4
; CHECK: fildll
%conv = sitofp i64 %tmp1 to float
%add = fadd float %conv, 1.000000e+00
diff --git a/test/CodeGen/X86/2011-06-03-x87chain.ll b/test/CodeGen/X86/2011-06-03-x87chain.ll
index 5275b6889bff..c78e8e38a567 100644
--- a/test/CodeGen/X86/2011-06-03-x87chain.ll
+++ b/test/CodeGen/X86/2011-06-03-x87chain.ll
@@ -2,7 +2,7 @@
define float @chainfail1(i64* nocapture %a, i64* nocapture %b, i32 %x, i32 %y, float* nocapture %f) nounwind uwtable noinline ssp {
entry:
- %tmp1 = load i64* %a, align 8
+ %tmp1 = load i64, i64* %a, align 8
; Insure x87 ops are properly chained, order preserved.
; CHECK: fildll
%conv = sitofp i64 %tmp1 to float
@@ -22,8 +22,8 @@ entry:
%mul = mul nsw i32 %y, %x
%sub = add nsw i32 %mul, -1
%idxprom = sext i32 %sub to i64
- %arrayidx = getelementptr inbounds i64* %a, i64 %idxprom
- %tmp4 = load i64* %arrayidx, align 8
+ %arrayidx = getelementptr inbounds i64, i64* %a, i64 %idxprom
+ %tmp4 = load i64, i64* %arrayidx, align 8
; CHECK: fildll
%conv = sitofp i64 %tmp4 to float
store float %conv, float* %f, align 4
@@ -35,7 +35,7 @@ entry:
br i1 undef, label %while.end, label %while.body
while.body: ; preds = %while.body, %entry
- %x.1.copyload = load i24* undef, align 1
+ %x.1.copyload = load i24, i24* undef, align 1
%conv = sitofp i24 %x.1.copyload to float
%div = fmul float %conv, 0x3E80000000000000
store float %div, float* undef, align 4
diff --git a/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll b/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
index 6f43b94b264a..1285d20b8522 100644
--- a/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
+++ b/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
@@ -28,21 +28,21 @@ bb:
br label %bb8
bb8: ; preds = %bb23, %bb
- %tmp15 = getelementptr inbounds %3* %tmp7, i32 0, i32 4
+ %tmp15 = getelementptr inbounds %3, %3* %tmp7, i32 0, i32 4
store i8* bitcast (%0* @0 to i8*), i8** %tmp15
%tmp16 = bitcast %3* %tmp7 to void ()*
store void ()* %tmp16, void ()** %tmp6, align 8
- %tmp17 = load void ()** %tmp6, align 8
+ %tmp17 = load void ()*, void ()** %tmp6, align 8
%tmp18 = bitcast void ()* %tmp17 to %6*
- %tmp19 = getelementptr inbounds %6* %tmp18, i32 0, i32 3
+ %tmp19 = getelementptr inbounds %6, %6* %tmp18, i32 0, i32 3
%tmp20 = bitcast %6* %tmp18 to i8*
- %tmp21 = load i8** %tmp19
+ %tmp21 = load i8*, i8** %tmp19
%tmp22 = bitcast i8* %tmp21 to void (i8*)*
call void %tmp22(i8* %tmp20)
br label %bb23
bb23: ; preds = %bb8
- %tmp24 = load i64* %tmp5, align 8
+ %tmp24 = load i64, i64* %tmp5, align 8
%tmp25 = add i64 %tmp24, 1
store i64 %tmp25, i64* %tmp5, align 8
%tmp26 = icmp ult i64 %tmp25, 10
diff --git a/test/CodeGen/X86/2011-06-19-QuicksortCoalescerBug.ll b/test/CodeGen/X86/2011-06-19-QuicksortCoalescerBug.ll
index 08178a302f23..489eb619b51e 100644
--- a/test/CodeGen/X86/2011-06-19-QuicksortCoalescerBug.ll
+++ b/test/CodeGen/X86/2011-06-19-QuicksortCoalescerBug.ll
@@ -10,7 +10,7 @@ tailrecurse: ; preds = %do.cond, %entry
%l.tr = phi i32 [ %l, %entry ], [ %i.1, %do.cond ]
%r.tr = phi i32 [ %r, %entry ], [ %l.tr, %do.cond ]
%idxprom12 = sext i32 %r.tr to i64
- %arrayidx14 = getelementptr inbounds i32* %a, i64 %idxprom12
+ %arrayidx14 = getelementptr inbounds i32, i32* %a, i64 %idxprom12
br label %do.body
do.body: ; preds = %do.cond, %tailrecurse
diff --git a/test/CodeGen/X86/2011-07-13-BadFrameIndexDisplacement.ll b/test/CodeGen/X86/2011-07-13-BadFrameIndexDisplacement.ll
index aea53b3b9855..f38ebf1da85a 100644
--- a/test/CodeGen/X86/2011-07-13-BadFrameIndexDisplacement.ll
+++ b/test/CodeGen/X86/2011-07-13-BadFrameIndexDisplacement.ll
@@ -11,8 +11,8 @@ entry:
call void @bar([39 x i8]* %stack_main)
%tmp6 = add i64 %a, -2147483647
%.sum = add i64 %tmp6, %b
- %tmp8 = getelementptr inbounds [39 x i8]* %stack_main, i64 0, i64 %.sum
- %tmp9 = load i8* %tmp8, align 1
+ %tmp8 = getelementptr inbounds [39 x i8], [39 x i8]* %stack_main, i64 0, i64 %.sum
+ %tmp9 = load i8, i8* %tmp8, align 1
%tmp10 = sext i8 %tmp9 to i32
ret i32 %tmp10
}
diff --git a/test/CodeGen/X86/2011-09-14-valcoalesce.ll b/test/CodeGen/X86/2011-09-14-valcoalesce.ll
index 4e84e84c1aa9..b8e5100c53bb 100644
--- a/test/CodeGen/X86/2011-09-14-valcoalesce.ll
+++ b/test/CodeGen/X86/2011-09-14-valcoalesce.ll
@@ -121,7 +121,7 @@ while.body.i188: ; preds = %for.end173.i, %if.e
while.body85.i: ; preds = %while.body85.i, %while.body.i188
%aFreq.0518.i = phi i32 [ %add93.i, %while.body85.i ], [ 0, %while.body.i188 ]
%inc87.i = add nsw i32 0, 1
- %tmp91.i = load i32* undef, align 4
+ %tmp91.i = load i32, i32* undef, align 4
%add93.i = add nsw i32 %tmp91.i, %aFreq.0518.i
%or.cond514.i = and i1 undef, false
br i1 %or.cond514.i, label %while.body85.i, label %while.end.i
@@ -144,7 +144,7 @@ if.end117.i: ; preds = %if.then108.i, %land
br i1 undef, label %if.then122.i, label %for.cond138.preheader.i
if.then122.i: ; preds = %if.end117.i
- call void (...)* @fprintf(i32 undef, i32 %gs.0526.i, i32 %ge.1.i, i32 %aFreq.1.i, double undef) nounwind
+ call void (...) @fprintf(i32 undef, i32 %gs.0526.i, i32 %ge.1.i, i32 %aFreq.1.i, double undef) nounwind
br label %for.cond138.preheader.i
for.cond138.preheader.i: ; preds = %if.then122.i, %if.end117.i
diff --git a/test/CodeGen/X86/2011-09-21-setcc-bug.ll b/test/CodeGen/X86/2011-09-21-setcc-bug.ll
index a67c3f338862..e61715a4813d 100644
--- a/test/CodeGen/X86/2011-09-21-setcc-bug.ll
+++ b/test/CodeGen/X86/2011-09-21-setcc-bug.ll
@@ -3,10 +3,10 @@
; Make sure we are not crashing on this code.
define void @load_4_i8(<4 x i8>* %k, <4 x i8>* %y, <4 x double>* %A1, <4 x double>* %A0) {
- %A = load <4 x i8>* %k
- %B = load <4 x i8>* %y
- %C = load <4 x double>* %A0
- %D= load <4 x double>* %A1
+ %A = load <4 x i8>, <4 x i8>* %k
+ %B = load <4 x i8>, <4 x i8>* %y
+ %C = load <4 x double>, <4 x double>* %A0
+ %D= load <4 x double>, <4 x double>* %A1
%M = icmp uge <4 x i8> %A, %B
%T = select <4 x i1> %M, <4 x double> %C, <4 x double> %D
store <4 x double> %T, <4 x double>* undef
@@ -15,10 +15,10 @@ define void @load_4_i8(<4 x i8>* %k, <4 x i8>* %y, <4 x double>* %A1, <4 x doubl
define void @load_256_i8(<256 x i8>* %k, <256 x i8>* %y, <256 x double>* %A1, <256 x double>* %A0) {
- %A = load <256 x i8>* %k
- %B = load <256 x i8>* %y
- %C = load <256 x double>* %A0
- %D= load <256 x double>* %A1
+ %A = load <256 x i8>, <256 x i8>* %k
+ %B = load <256 x i8>, <256 x i8>* %y
+ %C = load <256 x double>, <256 x double>* %A0
+ %D= load <256 x double>, <256 x double>* %A1
%M = icmp uge <256 x i8> %A, %B
%T = select <256 x i1> %M, <256 x double> %C, <256 x double> %D
store <256 x double> %T, <256 x double>* undef
diff --git a/test/CodeGen/X86/2011-10-11-srl.ll b/test/CodeGen/X86/2011-10-11-srl.ll
index 434f88c14b6a..ff58afca0417 100644
--- a/test/CodeGen/X86/2011-10-11-srl.ll
+++ b/test/CodeGen/X86/2011-10-11-srl.ll
@@ -3,7 +3,7 @@
target triple = "x86_64-unknown-linux-gnu"
define void @m387(<2 x i8>* %p, <2 x i16>* %q) {
- %t = load <2 x i8>* %p
+ %t = load <2 x i8>, <2 x i8>* %p
%r = sext <2 x i8> %t to <2 x i16>
store <2 x i16> %r, <2 x i16>* %q
ret void
diff --git a/test/CodeGen/X86/2011-10-12-MachineCSE.ll b/test/CodeGen/X86/2011-10-12-MachineCSE.ll
index 72e672ac4f1f..341a14b6d2a0 100644
--- a/test/CodeGen/X86/2011-10-12-MachineCSE.ll
+++ b/test/CodeGen/X86/2011-10-12-MachineCSE.ll
@@ -16,35 +16,35 @@ target triple = "x86_64-apple-macosx10.7.2"
define %struct.rtx_def* @gen_add3_insn(%struct.rtx_def* %r0, %struct.rtx_def* %r1, %struct.rtx_def* %c) nounwind uwtable ssp {
entry:
%0 = bitcast %struct.rtx_def* %r0 to i32*
- %1 = load i32* %0, align 8
+ %1 = load i32, i32* %0, align 8
%2 = lshr i32 %1, 16
%bf.clear = and i32 %2, 255
%idxprom = sext i32 %bf.clear to i64
- %3 = load %struct.optab** getelementptr inbounds ([49 x %struct.optab*]* @optab_table, i32 0, i64 0), align 8
- %handlers = getelementptr inbounds %struct.optab* %3, i32 0, i32 1
- %arrayidx = getelementptr inbounds [59 x %struct.anon.3]* %handlers, i32 0, i64 %idxprom
- %insn_code = getelementptr inbounds %struct.anon.3* %arrayidx, i32 0, i32 0
- %4 = load i32* %insn_code, align 4
+ %3 = load %struct.optab*, %struct.optab** getelementptr inbounds ([49 x %struct.optab*], [49 x %struct.optab*]* @optab_table, i32 0, i64 0), align 8
+ %handlers = getelementptr inbounds %struct.optab, %struct.optab* %3, i32 0, i32 1
+ %arrayidx = getelementptr inbounds [59 x %struct.anon.3], [59 x %struct.anon.3]* %handlers, i32 0, i64 %idxprom
+ %insn_code = getelementptr inbounds %struct.anon.3, %struct.anon.3* %arrayidx, i32 0, i32 0
+ %4 = load i32, i32* %insn_code, align 4
%cmp = icmp eq i32 %4, 1317
br i1 %cmp, label %if.then, label %lor.lhs.false
lor.lhs.false: ; preds = %entry
%idxprom1 = sext i32 %4 to i64
- %arrayidx2 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom1
- %operand = getelementptr inbounds %struct.insn_data* %arrayidx2, i32 0, i32 3
- %5 = load %struct.insn_operand_data** %operand, align 8
- %arrayidx3 = getelementptr inbounds %struct.insn_operand_data* %5, i64 0
- %predicate = getelementptr inbounds %struct.insn_operand_data* %arrayidx3, i32 0, i32 0
- %6 = load i32 (%struct.rtx_def*, i32)** %predicate, align 8
+ %arrayidx2 = getelementptr inbounds [0 x %struct.insn_data], [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom1
+ %operand = getelementptr inbounds %struct.insn_data, %struct.insn_data* %arrayidx2, i32 0, i32 3
+ %5 = load %struct.insn_operand_data*, %struct.insn_operand_data** %operand, align 8
+ %arrayidx3 = getelementptr inbounds %struct.insn_operand_data, %struct.insn_operand_data* %5, i64 0
+ %predicate = getelementptr inbounds %struct.insn_operand_data, %struct.insn_operand_data* %arrayidx3, i32 0, i32 0
+ %6 = load i32 (%struct.rtx_def*, i32)*, i32 (%struct.rtx_def*, i32)** %predicate, align 8
%idxprom4 = sext i32 %4 to i64
- %arrayidx5 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom4
- %operand6 = getelementptr inbounds %struct.insn_data* %arrayidx5, i32 0, i32 3
- %7 = load %struct.insn_operand_data** %operand6, align 8
- %arrayidx7 = getelementptr inbounds %struct.insn_operand_data* %7, i64 0
+ %arrayidx5 = getelementptr inbounds [0 x %struct.insn_data], [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom4
+ %operand6 = getelementptr inbounds %struct.insn_data, %struct.insn_data* %arrayidx5, i32 0, i32 3
+ %7 = load %struct.insn_operand_data*, %struct.insn_operand_data** %operand6, align 8
+ %arrayidx7 = getelementptr inbounds %struct.insn_operand_data, %struct.insn_operand_data* %7, i64 0
%8 = bitcast %struct.insn_operand_data* %arrayidx7 to i8*
- %bf.field.offs = getelementptr i8* %8, i32 16
+ %bf.field.offs = getelementptr i8, i8* %8, i32 16
%9 = bitcast i8* %bf.field.offs to i32*
- %10 = load i32* %9, align 8
+ %10 = load i32, i32* %9, align 8
%bf.clear8 = and i32 %10, 65535
%call = tail call i32 %6(%struct.rtx_def* %r0, i32 %bf.clear8)
%tobool = icmp ne i32 %call, 0
@@ -52,21 +52,21 @@ lor.lhs.false: ; preds = %entry
lor.lhs.false9: ; preds = %lor.lhs.false
%idxprom10 = sext i32 %4 to i64
- %arrayidx11 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom10
- %operand12 = getelementptr inbounds %struct.insn_data* %arrayidx11, i32 0, i32 3
- %11 = load %struct.insn_operand_data** %operand12, align 8
- %arrayidx13 = getelementptr inbounds %struct.insn_operand_data* %11, i64 1
- %predicate14 = getelementptr inbounds %struct.insn_operand_data* %arrayidx13, i32 0, i32 0
- %12 = load i32 (%struct.rtx_def*, i32)** %predicate14, align 8
+ %arrayidx11 = getelementptr inbounds [0 x %struct.insn_data], [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom10
+ %operand12 = getelementptr inbounds %struct.insn_data, %struct.insn_data* %arrayidx11, i32 0, i32 3
+ %11 = load %struct.insn_operand_data*, %struct.insn_operand_data** %operand12, align 8
+ %arrayidx13 = getelementptr inbounds %struct.insn_operand_data, %struct.insn_operand_data* %11, i64 1
+ %predicate14 = getelementptr inbounds %struct.insn_operand_data, %struct.insn_operand_data* %arrayidx13, i32 0, i32 0
+ %12 = load i32 (%struct.rtx_def*, i32)*, i32 (%struct.rtx_def*, i32)** %predicate14, align 8
%idxprom15 = sext i32 %4 to i64
- %arrayidx16 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom15
- %operand17 = getelementptr inbounds %struct.insn_data* %arrayidx16, i32 0, i32 3
- %13 = load %struct.insn_operand_data** %operand17, align 8
- %arrayidx18 = getelementptr inbounds %struct.insn_operand_data* %13, i64 1
+ %arrayidx16 = getelementptr inbounds [0 x %struct.insn_data], [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom15
+ %operand17 = getelementptr inbounds %struct.insn_data, %struct.insn_data* %arrayidx16, i32 0, i32 3
+ %13 = load %struct.insn_operand_data*, %struct.insn_operand_data** %operand17, align 8
+ %arrayidx18 = getelementptr inbounds %struct.insn_operand_data, %struct.insn_operand_data* %13, i64 1
%14 = bitcast %struct.insn_operand_data* %arrayidx18 to i8*
- %bf.field.offs19 = getelementptr i8* %14, i32 16
+ %bf.field.offs19 = getelementptr i8, i8* %14, i32 16
%15 = bitcast i8* %bf.field.offs19 to i32*
- %16 = load i32* %15, align 8
+ %16 = load i32, i32* %15, align 8
%bf.clear20 = and i32 %16, 65535
%call21 = tail call i32 %12(%struct.rtx_def* %r1, i32 %bf.clear20)
%tobool22 = icmp ne i32 %call21, 0
@@ -74,21 +74,21 @@ lor.lhs.false9: ; preds = %lor.lhs.false
lor.lhs.false23: ; preds = %lor.lhs.false9
%idxprom24 = sext i32 %4 to i64
- %arrayidx25 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom24
- %operand26 = getelementptr inbounds %struct.insn_data* %arrayidx25, i32 0, i32 3
- %17 = load %struct.insn_operand_data** %operand26, align 8
- %arrayidx27 = getelementptr inbounds %struct.insn_operand_data* %17, i64 2
- %predicate28 = getelementptr inbounds %struct.insn_operand_data* %arrayidx27, i32 0, i32 0
- %18 = load i32 (%struct.rtx_def*, i32)** %predicate28, align 8
+ %arrayidx25 = getelementptr inbounds [0 x %struct.insn_data], [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom24
+ %operand26 = getelementptr inbounds %struct.insn_data, %struct.insn_data* %arrayidx25, i32 0, i32 3
+ %17 = load %struct.insn_operand_data*, %struct.insn_operand_data** %operand26, align 8
+ %arrayidx27 = getelementptr inbounds %struct.insn_operand_data, %struct.insn_operand_data* %17, i64 2
+ %predicate28 = getelementptr inbounds %struct.insn_operand_data, %struct.insn_operand_data* %arrayidx27, i32 0, i32 0
+ %18 = load i32 (%struct.rtx_def*, i32)*, i32 (%struct.rtx_def*, i32)** %predicate28, align 8
%idxprom29 = sext i32 %4 to i64
- %arrayidx30 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom29
- %operand31 = getelementptr inbounds %struct.insn_data* %arrayidx30, i32 0, i32 3
- %19 = load %struct.insn_operand_data** %operand31, align 8
- %arrayidx32 = getelementptr inbounds %struct.insn_operand_data* %19, i64 2
+ %arrayidx30 = getelementptr inbounds [0 x %struct.insn_data], [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom29
+ %operand31 = getelementptr inbounds %struct.insn_data, %struct.insn_data* %arrayidx30, i32 0, i32 3
+ %19 = load %struct.insn_operand_data*, %struct.insn_operand_data** %operand31, align 8
+ %arrayidx32 = getelementptr inbounds %struct.insn_operand_data, %struct.insn_operand_data* %19, i64 2
%20 = bitcast %struct.insn_operand_data* %arrayidx32 to i8*
- %bf.field.offs33 = getelementptr i8* %20, i32 16
+ %bf.field.offs33 = getelementptr i8, i8* %20, i32 16
%21 = bitcast i8* %bf.field.offs33 to i32*
- %22 = load i32* %21, align 8
+ %22 = load i32, i32* %21, align 8
%bf.clear34 = and i32 %22, 65535
%call35 = tail call i32 %18(%struct.rtx_def* %c, i32 %bf.clear34)
%tobool36 = icmp ne i32 %call35, 0
@@ -99,10 +99,10 @@ if.then: ; preds = %lor.lhs.false23, %l
if.end: ; preds = %lor.lhs.false23
%idxprom37 = sext i32 %4 to i64
- %arrayidx38 = getelementptr inbounds [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom37
- %genfun = getelementptr inbounds %struct.insn_data* %arrayidx38, i32 0, i32 2
- %23 = load %struct.rtx_def* (%struct.rtx_def*, ...)** %genfun, align 8
- %call39 = tail call %struct.rtx_def* (%struct.rtx_def*, ...)* %23(%struct.rtx_def* %r0, %struct.rtx_def* %r1, %struct.rtx_def* %c)
+ %arrayidx38 = getelementptr inbounds [0 x %struct.insn_data], [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom37
+ %genfun = getelementptr inbounds %struct.insn_data, %struct.insn_data* %arrayidx38, i32 0, i32 2
+ %23 = load %struct.rtx_def* (%struct.rtx_def*, ...)*, %struct.rtx_def* (%struct.rtx_def*, ...)** %genfun, align 8
+ %call39 = tail call %struct.rtx_def* (%struct.rtx_def*, ...) %23(%struct.rtx_def* %r0, %struct.rtx_def* %r1, %struct.rtx_def* %c)
br label %return
return: ; preds = %if.end, %if.then
diff --git a/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll b/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll
index e7d1e194d9cd..c9dc050d0b4e 100644
--- a/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll
+++ b/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll
@@ -15,11 +15,11 @@ entry:
store <4 x float> <float 0x4008CCCCC0000000, float 0x40099999A0000000, float 0x400A666660000000, float 0x400B333340000000>, <4 x float>* %p3, align 16
store <4 x float> <float 0x4010666660000000, float 0x4010CCCCC0000000, float 0x4011333340000000, float 0x40119999A0000000>, <4 x float>* %p4, align 16
store <4 x float> <float 0x4014666660000000, float 0x4014CCCCC0000000, float 0x4015333340000000, float 0x40159999A0000000>, <4 x float>* %p5, align 16
- %0 = load <4 x float>* %p1, align 16
- %1 = load <4 x float>* %p2, align 16
- %2 = load <4 x float>* %p3, align 16
- %3 = load <4 x float>* %p4, align 16
- %4 = load <4 x float>* %p5, align 16
+ %0 = load <4 x float>, <4 x float>* %p1, align 16
+ %1 = load <4 x float>, <4 x float>* %p2, align 16
+ %2 = load <4 x float>, <4 x float>* %p3, align 16
+ %3 = load <4 x float>, <4 x float>* %p4, align 16
+ %4 = load <4 x float>, <4 x float>* %p5, align 16
; CHECK: movups {{%xmm[0-7]}}, (%esp)
; CHECK-NEXT: calll _dovectortest
call void @dovectortest(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4)
diff --git a/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
index 07a6910c65e0..bf1f029847ea 100644
--- a/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
+++ b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
@@ -18,10 +18,10 @@ define i32 @main() nounwind uwtable {
entry:
; CHECK: pmovsxbq i(%rip), %
; CHECK: pmovsxbq j(%rip), %
- %0 = load <2 x i8>* @i, align 8
- %1 = load <2 x i8>* @j, align 8
+ %0 = load <2 x i8>, <2 x i8>* @i, align 8
+ %1 = load <2 x i8>, <2 x i8>* @j, align 8
%div = sdiv <2 x i8> %1, %0
- store <2 x i8> %div, <2 x i8>* getelementptr inbounds (%union.anon* @res, i32 0, i32 0), align 8
+ store <2 x i8> %div, <2 x i8>* getelementptr inbounds (%union.anon, %union.anon* @res, i32 0, i32 0), align 8
ret i32 0
; CHECK: ret
}
diff --git a/test/CodeGen/X86/2011-10-19-widen_vselect.ll b/test/CodeGen/X86/2011-10-19-widen_vselect.ll
index 222068dc579f..07dff9539cd0 100644
--- a/test/CodeGen/X86/2011-10-19-widen_vselect.ll
+++ b/test/CodeGen/X86/2011-10-19-widen_vselect.ll
@@ -26,7 +26,7 @@ entry:
}
; CHECK-LABEL: zero_test
-; CHECK: xorps %xmm0, %xmm0
+; CHECK: xorps %xmm0, %xmm0
; CHECK: ret
define void @zero_test() {
@@ -49,7 +49,7 @@ define void @full_test() {
br label %B1
B1: ; preds = %entry
- %0 = load <2 x float>* %Cy119
+ %0 = load <2 x float>, <2 x float>* %Cy119
%1 = fptosi <2 x float> %0 to <2 x i32>
%2 = sitofp <2 x i32> %1 to <2 x float>
%3 = fcmp ogt <2 x float> %0, zeroinitializer
@@ -58,7 +58,7 @@ define void @full_test() {
%6 = fcmp oeq <2 x float> %2, %0
%7 = select <2 x i1> %6, <2 x float> %0, <2 x float> %5
store <2 x float> %7, <2 x float>* %Cy118
- %8 = load <2 x float>* %Cy118
+ %8 = load <2 x float>, <2 x float>* %Cy118
store <2 x float> %8, <2 x float>* %Cy11a
ret void
}
diff --git a/test/CodeGen/X86/2011-10-27-tstore.ll b/test/CodeGen/X86/2011-10-27-tstore.ll
index 6dea92b63071..290b4d0cb00b 100644
--- a/test/CodeGen/X86/2011-10-27-tstore.ll
+++ b/test/CodeGen/X86/2011-10-27-tstore.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu"
;CHECK: ret
define void @ltstore(<4 x i32>* %pA, <2 x i32>* %pB) {
entry:
- %in = load <4 x i32>* %pA
+ %in = load <4 x i32>, <4 x i32>* %pA
%j = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
store <2 x i32> %j, <2 x i32>* %pB
ret void
diff --git a/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll b/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll
index 8174109378de..dffd6d1cee51 100644
--- a/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll
+++ b/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll
@@ -18,9 +18,9 @@ if_else: ; preds = %allocas
br i1 undef, label %for_loop156.lr.ph, label %if_exit
for_loop156.lr.ph: ; preds = %if_else
- %val_6.i21244 = load i16* undef, align 2
+ %val_6.i21244 = load i16, i16* undef, align 2
%0 = insertelement <8 x i16> undef, i16 %val_6.i21244, i32 6
- %val_7.i21248 = load i16* undef, align 2
+ %val_7.i21248 = load i16, i16* undef, align 2
%1 = insertelement <8 x i16> %0, i16 %val_7.i21248, i32 7
%uint2uint32.i20206 = zext <8 x i16> %1 to <8 x i32>
%bitop5.i20208 = and <8 x i32> %uint2uint32.i20206, <i32 31744, i32 31744, i32 31744, i32 31744, i32 31744, i32 31744, i32 31744, i32 31744>
@@ -39,26 +39,26 @@ for_loop156.lr.ph: ; preds = %if_else
%binop407 = fadd <8 x float> %binop406, <float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00>
%binop408 = fmul <8 x float> zeroinitializer, %binop407
%binop411 = fsub <8 x float> <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>, undef
- %val_4.i21290 = load i16* undef, align 2
+ %val_4.i21290 = load i16, i16* undef, align 2
%2 = insertelement <8 x i16> undef, i16 %val_4.i21290, i32 4
- %val_5.i21294 = load i16* undef, align 2
+ %val_5.i21294 = load i16, i16* undef, align 2
%3 = insertelement <8 x i16> %2, i16 %val_5.i21294, i32 5
- %val_6.i21298 = load i16* undef, align 2
+ %val_6.i21298 = load i16, i16* undef, align 2
%4 = insertelement <8 x i16> %3, i16 %val_6.i21298, i32 6
%ptr_7.i21301 = inttoptr i64 undef to i16*
- %val_7.i21302 = load i16* %ptr_7.i21301, align 2
+ %val_7.i21302 = load i16, i16* %ptr_7.i21301, align 2
%5 = insertelement <8 x i16> %4, i16 %val_7.i21302, i32 7
%uint2uint32.i20218 = zext <8 x i16> %5 to <8 x i32>
- %structelement561 = load i8** undef, align 8
+ %structelement561 = load i8*, i8** undef, align 8
%ptr2int563 = ptrtoint i8* %structelement561 to i64
%smear.ptr_smear7571 = insertelement <8 x i64> undef, i64 %ptr2int563, i32 7
%new_ptr582 = add <8 x i64> %smear.ptr_smear7571, zeroinitializer
- %val_5.i21509 = load i8* null, align 1
+ %val_5.i21509 = load i8, i8* null, align 1
%6 = insertelement <8 x i8> undef, i8 %val_5.i21509, i32 5
%7 = insertelement <8 x i8> %6, i8 undef, i32 6
%iptr_7.i21515 = extractelement <8 x i64> %new_ptr582, i32 7
%ptr_7.i21516 = inttoptr i64 %iptr_7.i21515 to i8*
- %val_7.i21517 = load i8* %ptr_7.i21516, align 1
+ %val_7.i21517 = load i8, i8* %ptr_7.i21516, align 1
%8 = insertelement <8 x i8> %7, i8 %val_7.i21517, i32 7
%uint2float.i20245 = uitofp <8 x i8> %8 to <8 x float>
%binop.i20246 = fmul <8 x float> %uint2float.i20245, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
diff --git a/test/CodeGen/X86/2011-11-30-or.ll b/test/CodeGen/X86/2011-11-30-or.ll
index 8ac4632329b3..4260e817b415 100644
--- a/test/CodeGen/X86/2011-11-30-or.ll
+++ b/test/CodeGen/X86/2011-11-30-or.ll
@@ -2,13 +2,13 @@
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
target triple = "x86_64-apple-macosx10.6.6"
-
-; Test that the order of operands is correct
-; CHECK: select_func
-; CHECK: pblendvb %xmm1, %xmm2
-; CHECK: ret
-
-define void @select_func(<8 x i16> %in) {
+
+; Test that the order of operands is correct
+; CHECK: select_func
+; CHECK: pblendvb {{LCPI0_[0-9]*}}(%rip), %xmm1
+; CHECK: ret
+
+define void @select_func(<8 x i16> %in) {
entry:
%c.lobit.i.i.i = ashr <8 x i16> %in, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, <i16 25, i16 8, i16 65, i16 25, i16 8, i16 95, i16 15, i16 45>
diff --git a/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll b/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
index df9823aa3825..2a1a5c9fb3ea 100644
--- a/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
+++ b/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s
; PR11494
define void @test(<4 x i32>* nocapture %p) nounwind {
@@ -14,5 +14,4 @@ define void @test(<4 x i32>* nocapture %p) nounwind {
ret void
}
-declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll b/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
index 1561784dee32..ab1b46c99d97 100644
--- a/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
+++ b/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
@@ -13,7 +13,7 @@ loop: ; preds = %loop.cond
br i1 undef, label %0, label %t1.exit
; <label>:0 ; preds = %loop
- %1 = load <16 x i32> addrspace(1)* undef, align 64
+ %1 = load <16 x i32>, <16 x i32> addrspace(1)* undef, align 64
%2 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %1, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0>
store <16 x i32> %2, <16 x i32> addrspace(1)* undef, align 64
br label %t1.exit
@@ -29,7 +29,7 @@ define void @t2() nounwind {
br i1 undef, label %1, label %4
; <label>:1 ; preds = %0
- %2 = load <16 x i32> addrspace(1)* undef, align 64
+ %2 = load <16 x i32>, <16 x i32> addrspace(1)* undef, align 64
%3 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %2, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 0, i32 0, i32 0, i32 0>
store <16 x i32> %3, <16 x i32> addrspace(1)* undef, align 64
br label %4
@@ -50,7 +50,7 @@ loop: ; preds = %loop.cond
; <label>:0 ; preds = %loop
%1 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 25, i32 0>
- %2 = load <16 x i32> addrspace(1)* undef, align 64
+ %2 = load <16 x i32>, <16 x i32> addrspace(1)* undef, align 64
%3 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %2, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 28, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
store <16 x i32> %3, <16 x i32> addrspace(1)* undef, align 64
br label %t2.exit
@@ -64,7 +64,7 @@ return: ; preds = %loop.cond
define <3 x i64> @t4() nounwind {
entry:
- %0 = load <2 x i64> addrspace(1)* undef, align 16
+ %0 = load <2 x i64>, <2 x i64> addrspace(1)* undef, align 16
%1 = extractelement <2 x i64> %0, i32 0
%2 = insertelement <3 x i64> <i64 undef, i64 0, i64 0>, i64 %1, i32 0
ret <3 x i64> %2
diff --git a/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
index 14643e4ba8b8..0944adb8b002 100644
--- a/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
+++ b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
@@ -8,7 +8,7 @@
; CHECK-LABEL: test:
; CHECK: pextrd $2, %xmm
define <4 x i32> @test(<4 x i32>* %p) {
- %v = load <4 x i32>* %p
+ %v = load <4 x i32>, <4 x i32>* %p
%e = extractelement <4 x i32> %v, i32 2
%cmp = icmp eq i32 %e, 3
%sel = select i1 %cmp, <4 x i32> %v, <4 x i32> zeroinitializer
diff --git a/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll b/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
index 501a8101a3fe..21443441c9f3 100644
--- a/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
+++ b/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
@@ -34,8 +34,8 @@ bb11: ; preds = %bb7
%tmp12 = ptrtoint i8* %tmp10 to i32
%tmp13 = bitcast i8* %tmp10 to i32*
%tmp14 = shl i32 %tmp8, 2
- %tmp15 = getelementptr i32* %tmp13, i32 undef
- %tmp16 = getelementptr i32* %tmp13, i32 undef
+ %tmp15 = getelementptr i32, i32* %tmp13, i32 undef
+ %tmp16 = getelementptr i32, i32* %tmp13, i32 undef
%tmp17 = zext i32 %tmp9 to i64
%tmp18 = add i64 %tmp17, -1
%tmp19 = icmp ugt i64 %tmp18, 4294967295
@@ -108,8 +108,8 @@ bb49: ; preds = %bb49, %bb48
%tmp50 = phi i32 [ %tmp55, %bb49 ], [ 0, %bb48 ]
%tmp51 = add i32 %tmp50, undef
%tmp52 = add i32 %tmp50, undef
- %tmp53 = getelementptr i32* %tmp13, i32 %tmp52
- %tmp54 = load i32* %tmp53, align 4
+ %tmp53 = getelementptr i32, i32* %tmp13, i32 %tmp52
+ %tmp54 = load i32, i32* %tmp53, align 4
%tmp55 = add i32 %tmp50, 1
%tmp56 = icmp eq i32 %tmp55, %tmp8
br i1 %tmp56, label %bb57, label %bb49
@@ -126,7 +126,7 @@ bb59: ; preds = %bb45
bb61: ; preds = %bb61, %bb59
%tmp62 = phi i32 [ %tmp65, %bb61 ], [ 0, %bb59 ]
%tmp63 = add i32 %tmp62, %tmp14
- %tmp64 = getelementptr i32* %tmp13, i32 %tmp63
+ %tmp64 = getelementptr i32, i32* %tmp13, i32 %tmp63
store i32 0, i32* %tmp64, align 4
%tmp65 = add i32 %tmp62, 1
%tmp66 = icmp eq i32 %tmp65, %tmp8
diff --git a/test/CodeGen/X86/2012-01-11-split-cv.ll b/test/CodeGen/X86/2012-01-11-split-cv.ll
index 69d4b93bb78d..cb39ed911976 100644
--- a/test/CodeGen/X86/2012-01-11-split-cv.ll
+++ b/test/CodeGen/X86/2012-01-11-split-cv.ll
@@ -3,7 +3,7 @@
;CHECK-LABEL: add18i16:
define void @add18i16(<18 x i16>* nocapture sret %ret, <18 x i16>* %bp) nounwind {
;CHECK: vmovaps
- %b = load <18 x i16>* %bp, align 16
+ %b = load <18 x i16>, <18 x i16>* %bp, align 16
%x = add <18 x i16> zeroinitializer, %b
store <18 x i16> %x, <18 x i16>* %ret, align 16
;CHECK: ret
diff --git a/test/CodeGen/X86/2012-01-12-extract-sv.ll b/test/CodeGen/X86/2012-01-12-extract-sv.ll
index fa8e80f0bdef..677c902668bc 100644
--- a/test/CodeGen/X86/2012-01-12-extract-sv.ll
+++ b/test/CodeGen/X86/2012-01-12-extract-sv.ll
@@ -1,12 +1,25 @@
-; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+; RUN: llc < %s -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
-; CHECK: endless_loop
define void @endless_loop() {
+; CHECK-LABEL: endless_loop:
+; CHECK-NEXT: # BB#0:
+; CHECK-NEXT: vmovaps (%eax), %ymm0
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vmovsldup %xmm0, %xmm0 # xmm0 = xmm0[0,0,2,2]
+; CHECK-NEXT: vmovddup %xmm0, %xmm1 # xmm1 = xmm0[0,0]
+; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
+; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vblendps $128, %ymm1, %ymm2, %ymm1 # ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7]
+; CHECK-NEXT: vxorps %ymm2, %ymm2, %ymm2
+; CHECK-NEXT: vblendps $1, %ymm0, %ymm2, %ymm0 # ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7]
+; CHECK-NEXT: vmovaps %ymm0, (%eax)
+; CHECK-NEXT: vmovaps %ymm1, (%eax)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
entry:
- %0 = load <8 x i32> addrspace(1)* undef, align 32
+ %0 = load <8 x i32>, <8 x i32> addrspace(1)* undef, align 32
%1 = shufflevector <8 x i32> %0, <8 x i32> undef, <16 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%2 = shufflevector <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef>, <16 x i32> %1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 17>
store <16 x i32> %2, <16 x i32> addrspace(1)* undef, align 64
ret void
-; CHECK: ret
}
diff --git a/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll b/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
index b78c13f9d4e6..a6c34b8fffa1 100644
--- a/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
+++ b/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
@@ -5,7 +5,7 @@
define void @baz() nounwind ssp {
entry:
- %0 = load i8** @ptr, align 4
+ %0 = load i8*, i8** @ptr, align 4
%cmp = icmp eq i8* %0, null
fence seq_cst
br i1 %cmp, label %if.then, label %if.else
@@ -13,8 +13,7 @@ entry:
; Make sure the fence comes before the comparison, since it
; clobbers EFLAGS.
-; CHECK: lock
-; CHECK-NEXT: orl {{.*}}, (%esp)
+; CHECK: lock orl {{.*}}, (%esp)
; CHECK-NEXT: testl [[REG:%e[a-z]+]], [[REG]]
if.then: ; preds = %entry
diff --git a/test/CodeGen/X86/2012-02-12-dagco.ll b/test/CodeGen/X86/2012-02-12-dagco.ll
index 13723a229943..5d48c142dc1d 100644
--- a/test/CodeGen/X86/2012-02-12-dagco.ll
+++ b/test/CodeGen/X86/2012-02-12-dagco.ll
@@ -3,9 +3,9 @@ target triple = "x86_64-unknown-linux-gnu"
; Make sure we are not crashing on this one
define void @dagco_crash() {
entry:
- %srcval.i411.i = load <4 x i64>* undef, align 1
+ %srcval.i411.i = load <4 x i64>, <4 x i64>* undef, align 1
%0 = extractelement <4 x i64> %srcval.i411.i, i32 3
- %srcval.i409.i = load <2 x i64>* undef, align 1
+ %srcval.i409.i = load <2 x i64>, <2 x i64>* undef, align 1
%1 = extractelement <2 x i64> %srcval.i409.i, i32 0
%2 = insertelement <8 x i64> undef, i64 %0, i32 5
%3 = insertelement <8 x i64> %2, i64 %1, i32 6
diff --git a/test/CodeGen/X86/2012-02-29-CoalescerBug.ll b/test/CodeGen/X86/2012-02-29-CoalescerBug.ll
index bdce85325f37..bbeb2a022192 100644
--- a/test/CodeGen/X86/2012-02-29-CoalescerBug.ll
+++ b/test/CodeGen/X86/2012-02-29-CoalescerBug.ll
@@ -14,9 +14,9 @@ target triple = "i386-apple-macosx10.7.0"
define void @fn2() nounwind optsize ssp {
entry:
store i64 0, i64* bitcast ([2 x [2 x %struct.S0]]* @d to i64*), align 4
- %0 = load i32* @c, align 4
+ %0 = load i32, i32* @c, align 4
%tobool2 = icmp eq i32 %0, 0
- %1 = load i32* @a, align 4
+ %1 = load i32, i32* @a, align 4
%tobool4 = icmp eq i32 %1, 0
br label %for.cond
diff --git a/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
index 90d8d3d2dd6d..260f059492ff 100644
--- a/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
+++ b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
@@ -20,8 +20,8 @@ entry:
indirectbr i8* undef, [label %return, label %if.end]
if.end: ; preds = %entry
- %size5 = getelementptr inbounds %struct.ref_s* %op, i64 0, i32 2
- %tmp6 = load i16* %size5, align 2
+ %size5 = getelementptr inbounds %struct.ref_s, %struct.ref_s* %op, i64 0, i32 2
+ %tmp6 = load i16, i16* %size5, align 2
%tobool1 = icmp eq i16 %tmp6, 0
%1 = select i1 %tobool1, i32 1396, i32 -1910
%index10 = add i32 %index9, %1
@@ -29,19 +29,19 @@ if.end: ; preds = %entry
while.body.lr.ph: ; preds = %if.end
%refs = bitcast %struct.ref_s* %op to %struct.ref_s**
- %tmp9 = load %struct.ref_s** %refs, align 8
+ %tmp9 = load %struct.ref_s*, %struct.ref_s** %refs, align 8
%tmp4 = zext i16 %tmp6 to i64
%index13 = add i32 %index10, 1658
%2 = sext i32 %index13 to i64
- %3 = getelementptr [3891 x i64]* @table, i64 0, i64 %2
- %blockaddress14 = load i64* %3, align 8
+ %3 = getelementptr [3891 x i64], [3891 x i64]* @table, i64 0, i64 %2
+ %blockaddress14 = load i64, i64* %3, align 8
%4 = inttoptr i64 %blockaddress14 to i8*
indirectbr i8* %4, [label %while.body]
while.body: ; preds = %while.body, %while.body.lr.ph
%index7 = phi i32 [ %index15, %while.body ], [ %index13, %while.body.lr.ph ]
%indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.lr.ph ]
- %type_attrs = getelementptr %struct.ref_s* %tmp9, i64 %indvar, i32 1
+ %type_attrs = getelementptr %struct.ref_s, %struct.ref_s* %tmp9, i64 %indvar, i32 1
store i16 32, i16* %type_attrs, align 2
%indvar.next = add i64 %indvar, 1
%exitcond5 = icmp eq i64 %indvar.next, %tmp4
@@ -49,8 +49,8 @@ while.body: ; preds = %while.body, %while.
%index15 = add i32 %index7, %tmp7
%tmp8 = select i1 %exitcond5, i64 13, i64 0
%5 = sext i32 %index15 to i64
- %6 = getelementptr [3891 x i64]* @table, i64 0, i64 %5
- %blockaddress16 = load i64* %6, align 8
+ %6 = getelementptr [3891 x i64], [3891 x i64]* @table, i64 0, i64 %5
+ %blockaddress16 = load i64, i64* %6, align 8
%7 = inttoptr i64 %blockaddress16 to i8*
indirectbr i8* %7, [label %return, label %while.body]
diff --git a/test/CodeGen/X86/2012-04-26-sdglue.ll b/test/CodeGen/X86/2012-04-26-sdglue.ll
index 6651af705551..4e3f1f4a6e4d 100644
--- a/test/CodeGen/X86/2012-04-26-sdglue.ll
+++ b/test/CodeGen/X86/2012-04-26-sdglue.ll
@@ -5,18 +5,18 @@
; It's hard to test for the ISEL condition because CodeGen optimizes
; away the bugpointed code. Just ensure the basics are still there.
;CHECK-LABEL: func:
-;CHECK: vpxor
-;CHECK: vinserti128
+;CHECK: vxorps
;CHECK: vpshufd
;CHECK: vpbroadcastd
+;CHECK: vinserti128
;CHECK: vmulps
;CHECK: vmulps
;CHECK: ret
define void @func() nounwind ssp {
- %tmp = load <4 x float>* null, align 1
- %tmp14 = getelementptr <4 x float>* null, i32 2
- %tmp15 = load <4 x float>* %tmp14, align 1
+ %tmp = load <4 x float>, <4 x float>* null, align 1
+ %tmp14 = getelementptr <4 x float>, <4 x float>* null, i32 2
+ %tmp15 = load <4 x float>, <4 x float>* %tmp14, align 1
%tmp16 = shufflevector <4 x float> %tmp, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
%tmp17 = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %tmp16, <4 x float> undef, i8 1)
%tmp18 = bitcast <4 x float> %tmp to <16 x i8>
diff --git a/test/CodeGen/X86/2012-07-10-extload64.ll b/test/CodeGen/X86/2012-07-10-extload64.ll
index 723302723b6b..a366102fbd74 100644
--- a/test/CodeGen/X86/2012-07-10-extload64.ll
+++ b/test/CodeGen/X86/2012-07-10-extload64.ll
@@ -4,9 +4,9 @@
define void @load_store(<4 x i16>* %in) {
entry:
; CHECK: pmovzxwd
- %A27 = load <4 x i16>* %in, align 4
+ %A27 = load <4 x i16>, <4 x i16>* %in, align 4
%A28 = add <4 x i16> %A27, %A27
-; CHECK: movlpd
+; CHECK: movq
store <4 x i16> %A28, <4 x i16>* %in, align 4
ret void
; CHECK: ret
@@ -18,14 +18,14 @@ define void @store_64(<2 x i32>* %ptr) {
BB:
store <2 x i32> zeroinitializer, <2 x i32>* %ptr
ret void
-;CHECK: movlpd
+;CHECK: movlps
;CHECK: ret
}
;CHECK-LABEL: load_64:
define <2 x i32> @load_64(<2 x i32>* %ptr) {
BB:
- %t = load <2 x i32>* %ptr
+ %t = load <2 x i32>, <2 x i32>* %ptr
ret <2 x i32> %t
;CHECK: pmovzxdq
;CHECK: ret
diff --git a/test/CodeGen/X86/2012-07-15-broadcastfold.ll b/test/CodeGen/X86/2012-07-15-broadcastfold.ll
index 519c7cac736f..7c8c2f28348a 100644
--- a/test/CodeGen/X86/2012-07-15-broadcastfold.ll
+++ b/test/CodeGen/X86/2012-07-15-broadcastfold.ll
@@ -1,5 +1,4 @@
; RUN: llc < %s -march=x86 -mcpu=corei7 -mattr=+avx2 | FileCheck %s
-; RUN: llc < %s -march=x86 -mcpu=corei7 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s
declare x86_fastcallcc i64 @barrier()
@@ -10,7 +9,7 @@ declare x86_fastcallcc i64 @barrier()
;CHECK: ret
define <8 x float> @bcast_fold( float* %A) {
BB:
- %A0 = load float* %A
+ %A0 = load float, float* %A
%tt3 = call x86_fastcallcc i64 @barrier()
br i1 undef, label %work, label %exit
diff --git a/test/CodeGen/X86/2012-08-17-legalizer-crash.ll b/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
index 0d18267fcde1..a19aa52f302f 100644
--- a/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
+++ b/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
@@ -12,9 +12,9 @@ target triple = "x86_64-apple-macosx10.8.0"
define void @fn1() nounwind uwtable ssp {
entry:
- %0 = load %struct._GtkSheetRow** @a, align 8
+ %0 = load %struct._GtkSheetRow*, %struct._GtkSheetRow** @a, align 8
%1 = bitcast %struct._GtkSheetRow* %0 to i576*
- %srcval2 = load i576* %1, align 8
+ %srcval2 = load i576, i576* %1, align 8
%tobool = icmp ugt i576 %srcval2, 57586096570152913699974892898380567793532123114264532903689671329431521032595044740083720782129802971518987656109067457577065805510327036019308994315074097345724415
br i1 %tobool, label %if.then, label %if.end
diff --git a/test/CodeGen/X86/2012-09-28-CGPBug.ll b/test/CodeGen/X86/2012-09-28-CGPBug.ll
index 32d7d012dd14..a8e0625e85c1 100644
--- a/test/CodeGen/X86/2012-09-28-CGPBug.ll
+++ b/test/CodeGen/X86/2012-09-28-CGPBug.ll
@@ -16,10 +16,10 @@
define void @h(i8*) nounwind ssp {
%2 = alloca i8*
store i8* %0, i8** %2
- %3 = load i8** %2
+ %3 = load i8*, i8** %2
%4 = bitcast i8* %3 to { i32, i32 }*
- %5 = getelementptr { i32, i32 }* %4, i32 0, i32 0
- %6 = load i32* %5
+ %5 = getelementptr { i32, i32 }, { i32, i32 }* %4, i32 0, i32 0
+ %6 = load i32, i32* %5
%7 = srem i32 %6, 2
%8 = icmp slt i32 %6, 2
%9 = select i1 %8, i32 %6, i32 %7
@@ -28,14 +28,14 @@ define void @h(i8*) nounwind ssp {
; <label>:11 ; preds = %1
%12 = zext i1 %10 to i32
- %13 = getelementptr [4 x i32]* @JT, i32 0, i32 %12
- %14 = load i32* %13
+ %13 = getelementptr [4 x i32], [4 x i32]* @JT, i32 0, i32 %12
+ %14 = load i32, i32* %13
%15 = add i32 %14, ptrtoint (i8* blockaddress(@h, %11) to i32)
%16 = inttoptr i32 %15 to i8*
indirectbr i8* %16, [label %17, label %18]
; <label>:17 ; preds = %11
- tail call void (i8*, ...)* @g(i8* getelementptr inbounds ([35 x i8]* @.str40, i32 0, i32 0))
+ tail call void (i8*, ...) @g(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str40, i32 0, i32 0))
br label %22
; <label>:18 ; preds = %11
diff --git a/test/CodeGen/X86/2012-1-10-buildvector.ll b/test/CodeGen/X86/2012-1-10-buildvector.ll
index a5f64c5eaf55..d1c0266941fd 100644
--- a/test/CodeGen/X86/2012-1-10-buildvector.ll
+++ b/test/CodeGen/X86/2012-1-10-buildvector.ll
@@ -1,26 +1,28 @@
-; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+; RUN: llc < %s -mattr=+avx -mtriple=i686-unknown-unknown | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
-target triple = "i686-pc-win32"
-
-;CHECK-LABEL: bad_cast:
define void @bad_cast() {
-entry:
+; CHECK-LABEL: bad_cast:
+; CHECK: # BB#0:
+; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vmovaps %xmm0, (%eax)
+; CHECK-NEXT: movl $0, (%eax)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
%vext.i = shufflevector <2 x i64> undef, <2 x i64> undef, <3 x i32> <i32 0, i32 1, i32 undef>
%vecinit8.i = shufflevector <3 x i64> zeroinitializer, <3 x i64> %vext.i, <3 x i32> <i32 0, i32 3, i32 4>
store <3 x i64> %vecinit8.i, <3 x i64>* undef, align 32
-;CHECK: ret
ret void
}
-
-;CHECK-LABEL: bad_insert:
define void @bad_insert(i32 %t) {
-entry:
-;CHECK: vpinsrd
+; CHECK-LABEL: bad_insert:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovaps %ymm0, (%eax)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
%v2 = insertelement <8 x i32> zeroinitializer, i32 %t, i32 0
store <8 x i32> %v2, <8 x i32> addrspace(1)* undef, align 32
-;CHECK: ret
ret void
}
diff --git a/test/CodeGen/X86/2012-10-02-DAGCycle.ll b/test/CodeGen/X86/2012-10-02-DAGCycle.ll
index 403d21ae9733..c43001eb8c21 100644
--- a/test/CodeGen/X86/2012-10-02-DAGCycle.ll
+++ b/test/CodeGen/X86/2012-10-02-DAGCycle.ll
@@ -8,10 +8,10 @@
define i32 @t(%TRp* inreg %rp) nounwind optsize ssp {
entry:
- %handler = getelementptr inbounds %TRp* %rp, i32 0, i32 1
- %0 = load %TRH** %handler, align 4
- %sync = getelementptr inbounds %TRH* %0, i32 0, i32 4
- %sync12 = load {}** %sync, align 4
+ %handler = getelementptr inbounds %TRp, %TRp* %rp, i32 0, i32 1
+ %0 = load %TRH*, %TRH** %handler, align 4
+ %sync = getelementptr inbounds %TRH, %TRH* %0, i32 0, i32 4
+ %sync12 = load {}*, {}** %sync, align 4
%1 = bitcast {}* %sync12 to i32 (%TRp*)*
%call = tail call i32 %1(%TRp* inreg %rp) nounwind optsize
ret i32 %call
@@ -25,17 +25,17 @@ entry:
define { <2 x float>, <2 x float> } @t2(%btConeShape* %this) unnamed_addr uwtable ssp align 2 {
entry:
- %0 = getelementptr inbounds %btConeShape* %this, i64 0, i32 0
+ %0 = getelementptr inbounds %btConeShape, %btConeShape* %this, i64 0, i32 0
br i1 undef, label %if.then, label %if.end17
if.then: ; preds = %entry
- %vecnorm.sroa.2.8.copyload = load float* undef, align 4
+ %vecnorm.sroa.2.8.copyload = load float, float* undef, align 4
%cmp4 = fcmp olt float undef, 0x3D10000000000000
%vecnorm.sroa.2.8.copyload36 = select i1 %cmp4, float -1.000000e+00, float %vecnorm.sroa.2.8.copyload
%call.i.i.i = tail call float @sqrtf(float 0.000000e+00) nounwind readnone
%div.i.i = fdiv float 1.000000e+00, %call.i.i.i
%mul7.i.i.i = fmul float %div.i.i, %vecnorm.sroa.2.8.copyload36
- %1 = load float (%btConvexInternalShape*)** undef, align 8
+ %1 = load float (%btConvexInternalShape*)*, float (%btConvexInternalShape*)** undef, align 8
%call12 = tail call float %1(%btConvexInternalShape* %0)
%mul7.i.i = fmul float %call12, %mul7.i.i.i
%retval.sroa.0.4.insert = insertelement <2 x float> zeroinitializer, float undef, i32 1
diff --git a/test/CodeGen/X86/2012-10-03-DAGCycle.ll b/test/CodeGen/X86/2012-10-03-DAGCycle.ll
index 72083c7115e4..da92565708c8 100644
--- a/test/CodeGen/X86/2012-10-03-DAGCycle.ll
+++ b/test/CodeGen/X86/2012-10-03-DAGCycle.ll
@@ -12,14 +12,14 @@ target triple = "x86_64-apple-macosx10.8.0"
define fastcc void @bar(%struct.pluto.0* %arg) nounwind uwtable ssp align 2 {
bb:
%tmp1 = alloca %struct.widget.375, align 8
- %tmp2 = getelementptr inbounds %struct.pluto.0* %arg, i64 0, i32 1
- %tmp3 = load %struct.hoge.368** %tmp2, align 8
+ %tmp2 = getelementptr inbounds %struct.pluto.0, %struct.pluto.0* %arg, i64 0, i32 1
+ %tmp3 = load %struct.hoge.368*, %struct.hoge.368** %tmp2, align 8
store %struct.pluto.0* %arg, %struct.pluto.0** undef, align 8
- %tmp = getelementptr inbounds %struct.widget.375* %tmp1, i64 0, i32 2
- %tmp4 = getelementptr %struct.pluto.0* %arg, i64 0, i32 0, i32 0
- %tmp5 = load %i8** %tmp4, align 8
+ %tmp = getelementptr inbounds %struct.widget.375, %struct.widget.375* %tmp1, i64 0, i32 2
+ %tmp4 = getelementptr %struct.pluto.0, %struct.pluto.0* %arg, i64 0, i32 0, i32 0
+ %tmp5 = load %i8*, %i8** %tmp4, align 8
store %i8* %tmp5, %i8** %tmp, align 8
- %tmp6 = getelementptr inbounds %struct.widget.375* %tmp1, i64 0, i32 3
+ %tmp6 = getelementptr inbounds %struct.widget.375, %struct.widget.375* %tmp1, i64 0, i32 3
store %struct.hoge.368* %tmp3, %struct.hoge.368** %tmp6, align 8
br i1 undef, label %bb8, label %bb7
diff --git a/test/CodeGen/X86/2012-10-18-crash-dagco.ll b/test/CodeGen/X86/2012-10-18-crash-dagco.ll
index 5b98624a37b8..fb29241035ef 100644
--- a/test/CodeGen/X86/2012-10-18-crash-dagco.ll
+++ b/test/CodeGen/X86/2012-10-18-crash-dagco.ll
@@ -22,23 +22,23 @@ bb27: ; preds = %bb48, %bb
]
bb28: ; preds = %bb27, %bb26
- %tmp = load i32* null
+ %tmp = load i32, i32* null
%tmp29 = trunc i32 %tmp to i8
store i8* undef, i8** undef
- %tmp30 = load i32* null
+ %tmp30 = load i32, i32* null
%tmp31 = icmp eq i32 %tmp30, 0
- %tmp32 = getelementptr inbounds [411 x i8]* @global, i32 0, i32 undef
- %tmp33 = load i8* %tmp32, align 1
- %tmp34 = getelementptr inbounds [411 x i8]* @global, i32 0, i32 0
- %tmp35 = load i8* %tmp34, align 1
+ %tmp32 = getelementptr inbounds [411 x i8], [411 x i8]* @global, i32 0, i32 undef
+ %tmp33 = load i8, i8* %tmp32, align 1
+ %tmp34 = getelementptr inbounds [411 x i8], [411 x i8]* @global, i32 0, i32 0
+ %tmp35 = load i8, i8* %tmp34, align 1
%tmp36 = select i1 %tmp31, i8 %tmp35, i8 %tmp33
%tmp37 = select i1 undef, i8 %tmp29, i8 %tmp36
%tmp38 = zext i8 %tmp37 to i32
%tmp39 = select i1 undef, i32 0, i32 %tmp38
- %tmp40 = getelementptr inbounds i32* null, i32 %tmp39
- %tmp41 = load i32* %tmp40, align 4
- %tmp42 = load i32* undef, align 4
- %tmp43 = load i32* undef
+ %tmp40 = getelementptr inbounds i32, i32* null, i32 %tmp39
+ %tmp41 = load i32, i32* %tmp40, align 4
+ %tmp42 = load i32, i32* undef, align 4
+ %tmp43 = load i32, i32* undef
%tmp44 = xor i32 %tmp42, %tmp43
%tmp45 = lshr i32 %tmp44, 8
%tmp46 = lshr i32 %tmp44, 7
diff --git a/test/CodeGen/X86/2012-11-28-merge-store-alias.ll b/test/CodeGen/X86/2012-11-28-merge-store-alias.ll
index 756e86e0f801..ed1daadf6297 100644
--- a/test/CodeGen/X86/2012-11-28-merge-store-alias.ll
+++ b/test/CodeGen/X86/2012-11-28-merge-store-alias.ll
@@ -2,7 +2,7 @@
; CHECK: merge_stores_can
; CHECK: callq foo
-; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK: xorps %xmm0, %xmm0
; CHECK-NEXT: movups %xmm0
; CHECK: callq foo
; CHECK: ret
@@ -13,15 +13,15 @@ define i32 @merge_stores_can() nounwind ssp {
%ret0 = call i32 @foo([10 x i32]* %object1) nounwind
- %O1_1 = getelementptr [10 x i32]* %object1, i64 0, i32 1
- %O1_2 = getelementptr [10 x i32]* %object1, i64 0, i32 2
- %O1_3 = getelementptr [10 x i32]* %object1, i64 0, i32 3
- %O1_4 = getelementptr [10 x i32]* %object1, i64 0, i32 4
- %ld_ptr = getelementptr [10 x i32]* %object1, i64 0, i32 9
+ %O1_1 = getelementptr [10 x i32], [10 x i32]* %object1, i64 0, i32 1
+ %O1_2 = getelementptr [10 x i32], [10 x i32]* %object1, i64 0, i32 2
+ %O1_3 = getelementptr [10 x i32], [10 x i32]* %object1, i64 0, i32 3
+ %O1_4 = getelementptr [10 x i32], [10 x i32]* %object1, i64 0, i32 4
+ %ld_ptr = getelementptr [10 x i32], [10 x i32]* %object1, i64 0, i32 9
store i32 0, i32* %O1_1
store i32 0, i32* %O1_2
- %ret = load i32* %ld_ptr ; <--- does not alias.
+ %ret = load i32, i32* %ld_ptr ; <--- does not alias.
store i32 0, i32* %O1_3
store i32 0, i32* %O1_4
@@ -36,15 +36,15 @@ define i32 @merge_stores_can() nounwind ssp {
; CHECK: ret
define i32 @merge_stores_cant([10 x i32]* %in0, [10 x i32]* %in1) nounwind ssp {
- %O1_1 = getelementptr [10 x i32]* %in1, i64 0, i32 1
- %O1_2 = getelementptr [10 x i32]* %in1, i64 0, i32 2
- %O1_3 = getelementptr [10 x i32]* %in1, i64 0, i32 3
- %O1_4 = getelementptr [10 x i32]* %in1, i64 0, i32 4
- %ld_ptr = getelementptr [10 x i32]* %in0, i64 0, i32 2
+ %O1_1 = getelementptr [10 x i32], [10 x i32]* %in1, i64 0, i32 1
+ %O1_2 = getelementptr [10 x i32], [10 x i32]* %in1, i64 0, i32 2
+ %O1_3 = getelementptr [10 x i32], [10 x i32]* %in1, i64 0, i32 3
+ %O1_4 = getelementptr [10 x i32], [10 x i32]* %in1, i64 0, i32 4
+ %ld_ptr = getelementptr [10 x i32], [10 x i32]* %in0, i64 0, i32 2
store i32 0, i32* %O1_1
store i32 0, i32* %O1_2
- %ret = load i32* %ld_ptr ; <--- may alias
+ %ret = load i32, i32* %ld_ptr ; <--- may alias
store i32 0, i32* %O1_3
store i32 0, i32* %O1_4
diff --git a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
index c33b48dfecb5..a27db95ba127 100644
--- a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
@@ -5,7 +5,7 @@
; rdar://12777252.
;
; CHECK: %entry
-; CHECK: DEBUG_VALUE: hg
+; CHECK: DEBUG_VALUE: subdivp:hg
; CHECK: j
%struct.node.0.27 = type { i16, double, [3 x double], i32, i32 }
@@ -16,14 +16,14 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
define signext i16 @subdivp(%struct.node.0.27* nocapture %p, double %dsq, double %tolsq, %struct.hgstruct.2.29* nocapture byval align 8 %hg) nounwind uwtable readonly ssp {
entry:
- call void @llvm.dbg.declare(metadata %struct.hgstruct.2.29* %hg, metadata !4, metadata !{!"0x102"})
- %type = getelementptr inbounds %struct.node.0.27* %p, i64 0, i32 0
- %0 = load i16* %type, align 2
+ call void @llvm.dbg.declare(metadata %struct.hgstruct.2.29* %hg, metadata !4, metadata !DIExpression()), !dbg !DILocation(scope: !14)
+ %type = getelementptr inbounds %struct.node.0.27, %struct.node.0.27* %p, i64 0, i32 0
+ %0 = load i16, i16* %type, align 2
%cmp = icmp eq i16 %0, 1
br i1 %cmp, label %return, label %for.cond.preheader
for.cond.preheader: ; preds = %entry
- %arrayidx6.1 = getelementptr inbounds %struct.hgstruct.2.29* %hg, i64 0, i32 1, i64 1
+ %arrayidx6.1 = getelementptr inbounds %struct.hgstruct.2.29, %struct.hgstruct.2.29* %hg, i64 0, i32 1, i64 1
%cmp22 = fcmp olt double 0.000000e+00, %dsq
%conv24 = zext i1 %cmp22 to i16
br label %return
@@ -38,15 +38,15 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!12}
-!0 = !{!"0x11\0012\00clang version 3.3 (trunk 168918) (llvm/trunk 168920)\001\00\000\00\000", !11, !2, !2, !13, !2, null} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/Olden/bh/newbh.c] [DW_LANG_C99]
+!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 168918) (llvm/trunk 168920)", isOptimized: true, emissionKind: 0, file: !11, enums: !2, retainedTypes: !2, subprograms: !13, globals: !2)
!2 = !{}
-!4 = !{!"0x101\00hg\0067109589\000", null, !5, !6} ; [ DW_TAG_arg_variable ] [hg] [line 725]
-!5 = !{!"0x29", !11} ; [ DW_TAG_file_type ]
-!6 = !{!"0x16\00hgstruct\00492\000\000\000\000", !11, null, !7} ; [ DW_TAG_typedef ] [hgstruct] [line 492, size 0, align 0, offset 0] [from ]
-!7 = !{!"0x13\00\00487\00512\0064\000\000\000", !11, null, null, null, null, i32 0, null} ; [ DW_TAG_structure_type ] [line 487, size 512, align 64, offset 0] [def] [from ]
-!11 = !{!"MultiSource/Benchmarks/Olden/bh/newbh.c", !"MultiSource/Benchmarks/Olden/bh"}
-!12 = !{i32 1, !"Debug Info Version", i32 2}
+!4 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "hg", line: 725, arg: 4, scope: !14, file: !5, type: !6)
+!5 = !DIFile(filename: "MultiSource/Benchmarks/Olden/bh/newbh.c", directory: "MultiSource/Benchmarks/Olden/bh")
+!6 = !DIDerivedType(tag: DW_TAG_typedef, name: "hgstruct", line: 492, file: !11, baseType: !7)
+!7 = !DICompositeType(tag: DW_TAG_structure_type, line: 487, size: 512, align: 64, file: !11)
+!11 = !DIFile(filename: "MultiSource/Benchmarks/Olden/bh/newbh.c", directory: "MultiSource/Benchmarks/Olden/bh")
+!12 = !{i32 1, !"Debug Info Version", i32 3}
!13 = !{!14}
-!14 = !{!"0x2e\00subdivp\00subdivp\00\000\000\001\000\006\00256\001\001", !11, !5, !15, null, i16 (%struct.node.0.27*, double, double, %struct.hgstruct.2.29* )* @subdivp, null, null, null} ; [ DW_TAG_subprogram ] [def] [subdivp]
-!15 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = !DISubprogram(name: "subdivp", isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !11, scope: !5, type: !15, function: i16 (%struct.node.0.27*, double, double, %struct.hgstruct.2.29* )* @subdivp)
+!15 = !DISubroutineType(types: !16)
!16 = !{null}
diff --git a/test/CodeGen/X86/2012-11-30-misched-dbg.ll b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
index 28ceb2fad2fc..818c5ed56873 100644
--- a/test/CodeGen/X86/2012-11-30-misched-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
@@ -43,14 +43,14 @@ if.then3344:
br label %if.then4073
if.then4073: ; preds = %if.then3344
- call void @llvm.dbg.declare(metadata [20 x i8]* %num14075, metadata !4, metadata !{!"0x102"})
- %arraydecay4078 = getelementptr inbounds [20 x i8]* %num14075, i64 0, i64 0
- %0 = load i32* undef, align 4
+ call void @llvm.dbg.declare(metadata [20 x i8]* %num14075, metadata !4, metadata !DIExpression()), !dbg !DILocation(scope: !5)
+ %arraydecay4078 = getelementptr inbounds [20 x i8], [20 x i8]* %num14075, i64 0, i64 0
+ %0 = load i32, i32* undef, align 4
%add4093 = add nsw i32 %0, 0
%conv4094 = sitofp i32 %add4093 to float
%div4095 = fdiv float %conv4094, 5.670000e+02
%conv4096 = fpext float %div4095 to double
- %call4097 = call i32 (i8*, i32, i64, i8*, ...)* @__sprintf_chk(i8* %arraydecay4078, i32 0, i64 20, i8* getelementptr inbounds ([6 x i8]* @.str15, i64 0, i64 0), double %conv4096) nounwind
+ %call4097 = call i32 (i8*, i32, i64, i8*, ...) @__sprintf_chk(i8* %arraydecay4078, i32 0, i64 20, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str15, i64 0, i64 0), double %conv4096) nounwind
br i1 %cmp1733, label %if.then4107, label %if.else4114
if.then4107: ; preds = %if.then4073
@@ -65,35 +65,35 @@ declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!35}
-!0 = !{!"0x11\0012\00clang version 3.3 (trunk 168918) (llvm/trunk 168920)\001\00\000\00\000", !19, !2, !2, !20, !2, null} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/MiBench/consumer-typeset/MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c] [DW_LANG_C99]
+!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 168918) (llvm/trunk 168920)", isOptimized: true, emissionKind: 0, file: !19, enums: !2, retainedTypes: !2, subprograms: !20, globals: !2)
!1 = !{!2}
!2 = !{}
-!4 = !{!"0x100\00num1\00815\000", !5, !14, !15} ; [ DW_TAG_auto_variable ] [num1] [line 815]
-!5 = !{!"0xb\00815\000\00177", !14, !6} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!6 = !{!"0xb\00812\000\00176", !14, !7} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!7 = !{!"0xb\00807\000\00175", !14, !8} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!8 = !{!"0xb\00440\000\0094", !14, !9} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!9 = !{!"0xb\00435\000\0091", !14, !10} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!10 = !{!"0xb\00434\000\0090", !14, !11} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!11 = !{!"0xb\00250\000\0024", !14, !12} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!12 = !{!"0xb\00249\000\0023", !14, !13} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!13 = !{!"0xb\00221\000\0019", !14, !2} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!14 = !{!"0x29", !19} ; [ DW_TAG_file_type ]
-!15 = !{!"0x1\00\000\00160\008\000\000", null, null, !16, !17, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 160, align 8, offset 0] [from char]
-!16 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!4 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "num1", line: 815, scope: !5, file: !14, type: !15)
+!5 = distinct !DILexicalBlock(line: 815, column: 0, file: !14, scope: !6)
+!6 = distinct !DILexicalBlock(line: 812, column: 0, file: !14, scope: !7)
+!7 = distinct !DILexicalBlock(line: 807, column: 0, file: !14, scope: !8)
+!8 = distinct !DILexicalBlock(line: 440, column: 0, file: !14, scope: !9)
+!9 = distinct !DILexicalBlock(line: 435, column: 0, file: !14, scope: !10)
+!10 = distinct !DILexicalBlock(line: 434, column: 0, file: !14, scope: !11)
+!11 = distinct !DILexicalBlock(line: 250, column: 0, file: !14, scope: !12)
+!12 = distinct !DILexicalBlock(line: 249, column: 0, file: !14, scope: !13)
+!13 = distinct !DILexicalBlock(line: 221, column: 0, file: !14, scope: !21)
+!14 = !DIFile(filename: "MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c", directory: "MultiSource/Benchmarks/MiBench/consumer-typeset")
+!15 = !DICompositeType(tag: DW_TAG_array_type, size: 160, align: 8, baseType: !16, elements: !17)
+!16 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
!17 = !{!18}
-!18 = !{!"0x21\000\0020"} ; [ DW_TAG_subrange_type ] [0, 19]
-!19 = !{!"MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c", !"MultiSource/Benchmarks/MiBench/consumer-typeset"}
+!18 = !DISubrange(count: 20)
+!19 = !DIFile(filename: "MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c", directory: "MultiSource/Benchmarks/MiBench/consumer-typeset")
!20 = !{!21}
-!21 = !{!"0x2e\00AttachGalley\00AttachGalley\00\000\000\001\000\006\00256\001\001", !19, !14, !22, null, i32 (%union.rec**)* @AttachGalley, null, null, null} ; [ DW_TAG_subprogram ] [def] [AttachGalley]
-!22 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !23, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!21 = !DISubprogram(name: "AttachGalley", isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !19, scope: !14, type: !22, function: i32 (%union.rec**)* @AttachGalley)
+!22 = !DISubroutineType(types: !23)
!23 = !{null}
; Test DebugValue uses visited by RegisterPressureTracker findUseBetween().
;
; CHECK: @main
-; CHECK: DEBUG_VALUE: X
+; CHECK: DEBUG_VALUE: main:X
; CHECK: call
%"class.__gnu_cxx::hash_map" = type { %"class.__gnu_cxx::hashtable" }
@@ -108,8 +108,8 @@ cond.true: ; preds = %entry
unreachable
cond.end: ; preds = %entry
- call void @llvm.dbg.declare(metadata %"class.__gnu_cxx::hash_map"* %X, metadata !31, metadata !{!"0x102"})
- %_M_num_elements.i.i.i.i = getelementptr inbounds %"class.__gnu_cxx::hash_map"* %X, i64 0, i32 0, i32 5
+ call void @llvm.dbg.declare(metadata %"class.__gnu_cxx::hash_map"* %X, metadata !31, metadata !DIExpression()), !dbg !DILocation(scope: !37)
+ %_M_num_elements.i.i.i.i = getelementptr inbounds %"class.__gnu_cxx::hash_map", %"class.__gnu_cxx::hash_map"* %X, i64 0, i32 0, i32 5
invoke void @_Znwm()
to label %exit.i unwind label %lpad2.i.i.i.i
@@ -134,11 +134,11 @@ declare void @_Znwm()
!llvm.dbg.cu = !{!30}
-!30 = !{!"0x11\004\00clang version 3.3 (trunk 169129) (llvm/trunk 169135)\001\00\000\00\000", !34, !2, !2, !36, null, null} ; [ DW_TAG_compile_unit ] [SingleSource/Benchmarks/Shootout-C++/hash.cpp] [DW_LANG_C_plus_plus]
-!31 = !{!"0x100\00X\0029\000", null, null, !32} ; [ DW_TAG_auto_variable ] [X] [line 29]
-!32 = !{!"0x16\00HM\0028\000\000\000\000", !34, null, null} ; [ DW_TAG_typedef ] [HM] [line 28, size 0, align 0, offset 0] [from ]
-!33 = !{!"0x29", !34} ; [ DW_TAG_file_type ]
-!34 = !{!"SingleSource/Benchmarks/Shootout-C++/hash.cpp", !"SingleSource/Benchmarks/Shootout-C++"}
-!35 = !{i32 1, !"Debug Info Version", i32 2}
+!30 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 169129) (llvm/trunk 169135)", isOptimized: true, emissionKind: 0, file: !34, enums: !2, retainedTypes: !2, subprograms: !36)
+!31 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "X", line: 29, scope: !37, type: !32)
+!32 = !DIDerivedType(tag: DW_TAG_typedef, name: "HM", line: 28, file: !34, baseType: null)
+!33 = !DIFile(filename: "SingleSource/Benchmarks/Shootout-C++/hash.cpp", directory: "SingleSource/Benchmarks/Shootout-C++")
+!34 = !DIFile(filename: "SingleSource/Benchmarks/Shootout-C++/hash.cpp", directory: "SingleSource/Benchmarks/Shootout-C++")
+!35 = !{i32 1, !"Debug Info Version", i32 3}
!36 = !{!37}
-!37 = !{!"0x2e\00main\00main\00\000\000\001\000\006\00256\001\001", !19, !14, !22, null, void ()* @main, null, null, null} ; [ DW_TAG_subprogram ] [def] [main]
+!37 = !DISubprogram(name: "main", isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !19, scope: !14, type: !22, function: void ()* @main)
diff --git a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
index 04b31749ce58..77c017eb0e36 100644
--- a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
@@ -4,7 +4,7 @@
; Test RegisterPressure handling of DBG_VALUE.
;
; CHECK: %entry
-; CHECK: DEBUG_VALUE: callback
+; CHECK: DEBUG_VALUE: test:callback
; CHECK: ret
%struct.btCompoundLeafCallback = type { i32, i32 }
@@ -20,8 +20,8 @@ if.then: ; preds = %entry
unreachable
if.end: ; preds = %entry
- call void @llvm.dbg.declare(metadata %struct.btCompoundLeafCallback* %callback, metadata !3, metadata !{!"0x102"})
- %m = getelementptr inbounds %struct.btCompoundLeafCallback* %callback, i64 0, i32 1
+ call void @llvm.dbg.declare(metadata %struct.btCompoundLeafCallback* %callback, metadata !3, metadata !DIExpression()), !dbg !DILocation(scope: !2)
+ %m = getelementptr inbounds %struct.btCompoundLeafCallback, %struct.btCompoundLeafCallback* %callback, i64 0, i32 1
store i32 0, i32* undef, align 8
%cmp12447 = icmp sgt i32 undef, 0
br i1 %cmp12447, label %for.body.lr.ph, label %invoke.cont44
@@ -36,13 +36,13 @@ invoke.cont44: ; preds = %if.end
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8}
-!0 = !{!"0x11\004\00clang version 3.3 (trunk 168984) (llvm/trunk 168983)\001\00\000\00\000", !6, null, null, !1, null, null} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/Bullet/MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp] [DW_LANG_C_plus_plus]
+!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 168984) (llvm/trunk 168983)", isOptimized: true, emissionKind: 0, file: !6, subprograms: !1)
!1 = !{!2}
-!2 = !{!"0x2e\00test\00test\00\000\000\001\000\006\00256\001\001", !6, !5, !7, null, void ()* @test, null, null, null} ; [ DW_TAG_subprogram ] [def] [test]
-!3 = !{!"0x100\00callback\00214\000", null, null, !4} ; [ DW_TAG_auto_variable ] [callback] [line 214]
-!4 = !{!"0x13\00btCompoundLeafCallback\0090\00512\0064\000\000\000", !6, null, null, null, null, null, null} ; [ DW_TAG_structure_type ] [btCompoundLeafCallback] [line 90, size 512, align 64, offset 0] [def] [from ]
-!5 = !{!"0x29", !6} ; [ DW_TAG_file_type ]
-!6 = !{!"MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", !"MultiSource/Benchmarks/Bullet"}
-!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !9, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = !{i32 1, !"Debug Info Version", i32 2}
+!2 = !DISubprogram(name: "test", isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !6, scope: !5, type: !7, function: void ()* @test)
+!3 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "callback", line: 214, scope: !2, type: !4)
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "btCompoundLeafCallback", line: 90, size: 512, align: 64, file: !6)
+!5 = !DIFile(filename: "MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", directory: "MultiSource/Benchmarks/Bullet")
+!6 = !DIFile(filename: "MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", directory: "MultiSource/Benchmarks/Bullet")
+!7 = !DISubroutineType(types: !9)
+!8 = !{i32 1, !"Debug Info Version", i32 3}
!9 = !{null}
diff --git a/test/CodeGen/X86/2012-12-06-python27-miscompile.ll b/test/CodeGen/X86/2012-12-06-python27-miscompile.ll
index d9effc92fa92..b80ae3ae2b7f 100644
--- a/test/CodeGen/X86/2012-12-06-python27-miscompile.ll
+++ b/test/CodeGen/X86/2012-12-06-python27-miscompile.ll
@@ -12,10 +12,10 @@ target triple = "x86_64-apple-macosx10.8.0"
;CHECK: ret
define i32 @foo (i64* %so) nounwind uwtable ssp {
entry:
- %used = getelementptr inbounds i64* %so, i32 3
+ %used = getelementptr inbounds i64, i64* %so, i32 3
store i64 0, i64* %used, align 8
- %fill = getelementptr inbounds i64* %so, i32 2
- %L = load i64* %fill, align 8
+ %fill = getelementptr inbounds i64, i64* %so, i32 2
+ %L = load i64, i64* %fill, align 8
store i64 0, i64* %fill, align 8
%cmp28 = icmp sgt i64 %L, 0
%R = sext i1 %cmp28 to i32
diff --git a/test/CodeGen/X86/2012-12-1-merge-multiple.ll b/test/CodeGen/X86/2012-12-1-merge-multiple.ll
index 5931c3d27be1..9be8b5bbb427 100644
--- a/test/CodeGen/X86/2012-12-1-merge-multiple.ll
+++ b/test/CodeGen/X86/2012-12-1-merge-multiple.ll
@@ -8,14 +8,14 @@
; CHECK: ret
define void @multiple_stores_on_chain(i16 * %A) {
entry:
- %a0 = getelementptr inbounds i16* %A, i64 0
- %a1 = getelementptr inbounds i16* %A, i64 1
- %a2 = getelementptr inbounds i16* %A, i64 2
- %a3 = getelementptr inbounds i16* %A, i64 3
- %a4 = getelementptr inbounds i16* %A, i64 4
- %a5 = getelementptr inbounds i16* %A, i64 5
- %a6 = getelementptr inbounds i16* %A, i64 6
- %a7 = getelementptr inbounds i16* %A, i64 7
+ %a0 = getelementptr inbounds i16, i16* %A, i64 0
+ %a1 = getelementptr inbounds i16, i16* %A, i64 1
+ %a2 = getelementptr inbounds i16, i16* %A, i64 2
+ %a3 = getelementptr inbounds i16, i16* %A, i64 3
+ %a4 = getelementptr inbounds i16, i16* %A, i64 4
+ %a5 = getelementptr inbounds i16, i16* %A, i64 5
+ %a6 = getelementptr inbounds i16, i16* %A, i64 6
+ %a7 = getelementptr inbounds i16, i16* %A, i64 7
store i16 0, i16* %a0
store i16 1, i16* %a1
diff --git a/test/CodeGen/X86/2012-12-19-NoImplicitFloat.ll b/test/CodeGen/X86/2012-12-19-NoImplicitFloat.ll
index 302566520671..e5a64b5ae87e 100644
--- a/test/CodeGen/X86/2012-12-19-NoImplicitFloat.ll
+++ b/test/CodeGen/X86/2012-12-19-NoImplicitFloat.ll
@@ -8,10 +8,10 @@ define void @test() nounwind noimplicitfloat {
entry:
; CHECK-NOT: xmm
; CHECK: ret
- %0 = load %struct1** undef, align 8
- %1 = getelementptr inbounds %struct1* %0, i64 0, i32 0
+ %0 = load %struct1*, %struct1** undef, align 8
+ %1 = getelementptr inbounds %struct1, %struct1* %0, i64 0, i32 0
store i32* null, i32** %1, align 8
- %2 = getelementptr inbounds %struct1* %0, i64 0, i32 1
+ %2 = getelementptr inbounds %struct1, %struct1* %0, i64 0, i32 1
store i32* null, i32** %2, align 8
ret void
}
diff --git a/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll b/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll
deleted file mode 100644
index 614ccda5e250..000000000000
--- a/test/CodeGen/X86/2013-02-12-ShuffleToZext.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx -mtriple=x86_64-pc-win32 | FileCheck %s
-
-; CHECK: test
-; CHECK: vpmovzxwd
-; CHECK: vpmovzxwd
-define void @test(<4 x i64> %a, <4 x i16>* %buf) {
- %ex1 = extractelement <4 x i64> %a, i32 0
- %ex2 = extractelement <4 x i64> %a, i32 1
- %x1 = bitcast i64 %ex1 to <4 x i16>
- %x2 = bitcast i64 %ex2 to <4 x i16>
- %Sh = shufflevector <4 x i16> %x1, <4 x i16> %x2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
- store <4 x i16> %Sh, <4 x i16>* %buf, align 1
- ret void
-}
diff --git a/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll b/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
index 0ff9d3951d5b..f6827c1c75a7 100644
--- a/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
+++ b/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx10.8.0"
define void @main() #0 {
entry:
- %0 = load <8 x float>* bitcast ([8 x float]* @b to <8 x float>*), align 32
+ %0 = load <8 x float>, <8 x float>* bitcast ([8 x float]* @b to <8 x float>*), align 32
%bitcast.i = extractelement <8 x float> %0, i32 0
%vecinit.i.i = insertelement <4 x float> undef, float %bitcast.i, i32 0
%vecinit2.i.i = insertelement <4 x float> %vecinit.i.i, float 0.000000e+00, i32 1
@@ -17,7 +17,7 @@ entry:
%vecinit4.i.i = insertelement <4 x float> %vecinit3.i.i, float 0.000000e+00, i32 3
%1 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %vecinit4.i.i) #2
%vecext.i.i = extractelement <4 x float> %1, i32 0
- store float %vecext.i.i, float* getelementptr inbounds ([8 x float]* @e, i64 0, i64 0), align 16
+ store float %vecext.i.i, float* getelementptr inbounds ([8 x float], [8 x float]* @e, i64 0, i64 0), align 16
unreachable
}
diff --git a/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll b/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll
index 9cd150a2f56d..5ef867d4f9dc 100644
--- a/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll
+++ b/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll
@@ -34,7 +34,7 @@
; CHECK: ret
define i64 @test_bitcast(i64 (i64, i64, i64)** %arg, i1 %bool, i64 %arg2) {
entry:
- %loaded_ptr = load i64 (i64, i64, i64)** %arg, align 8
+ %loaded_ptr = load i64 (i64, i64, i64)*, i64 (i64, i64, i64)** %arg, align 8
%raw = bitcast i64 (i64, i64, i64)* %loaded_ptr to i8*
switch i1 %bool, label %default [
i1 true, label %label_true
@@ -73,7 +73,7 @@ label_end:
; CHECK: ret
define i64 @test_inttoptr(i64 (i64, i64, i64)** %arg, i1 %bool, i64 %arg2) {
entry:
- %loaded_ptr = load i64 (i64, i64, i64)** %arg, align 8
+ %loaded_ptr = load i64 (i64, i64, i64)*, i64 (i64, i64, i64)** %arg, align 8
%raw = ptrtoint i64 (i64, i64, i64)* %loaded_ptr to i64
switch i1 %bool, label %default [
i1 true, label %label_true
@@ -112,7 +112,7 @@ label_end:
; CHECK: ret
define i64 @test_ptrtoint(i64 (i64, i64, i64)** %arg, i1 %bool, i64 %arg2) {
entry:
- %loaded_ptr = load i64 (i64, i64, i64)** %arg, align 8
+ %loaded_ptr = load i64 (i64, i64, i64)*, i64 (i64, i64, i64)** %arg, align 8
%raw = bitcast i64 (i64, i64, i64)* %loaded_ptr to i8*
switch i1 %bool, label %default [
i1 true, label %label_true
diff --git a/test/CodeGen/X86/2014-08-29-CompactUnwind.ll b/test/CodeGen/X86/2014-08-29-CompactUnwind.ll
index f65d7c9d2e05..e7e8bb724fc0 100644
--- a/test/CodeGen/X86/2014-08-29-CompactUnwind.ll
+++ b/test/CodeGen/X86/2014-08-29-CompactUnwind.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -disable-fp-elim -mtriple x86_64-apple-darwin11 -mcpu corei7 -filetype=obj -o - | llvm-objdump -d -unwind-info -s - | FileCheck %s
+; RUN: llc < %s -mtriple x86_64-apple-darwin11 -mcpu corei7 -filetype=obj -o - | llvm-objdump -d -unwind-info -s - | FileCheck %s
; Regression test for http://llvm.org/bugs/show_bug.cgi?id=20800.
; ModuleID = 'asan_report.ii'
@@ -33,10 +33,10 @@ print_shadow_bytes.exit.i: ; preds = %print_shadow_bytes.exit.i, %0
%iv.i = phi i64 [ -5, %0 ], [ %iv.next.i, %print_shadow_bytes.exit.i ]
%reg15 = icmp eq i64 %iv.i, 0
%.str..str1.i = select i1 %reg15, [3 x i8]* @.str, [3 x i8]* @.str1
- %reg16 = getelementptr inbounds [3 x i8]* %.str..str1.i, i64 0, i64 0
+ %reg16 = getelementptr inbounds [3 x i8], [3 x i8]* %.str..str1.i, i64 0, i64 0
%reg17 = shl i64 %iv.i, 1
%reg19 = inttoptr i64 %reg17 to i8*
- call void (i64*, i8*, ...)* @append(i64* %str.i, i8* getelementptr inbounds ([6 x i8]* @.str2, i64 0, i64 0), i8* %reg16, i8* %reg19)
+ call void (i64*, i8*, ...) @append(i64* %str.i, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str2, i64 0, i64 0), i8* %reg16, i8* %reg19)
%iv.next.i = add nsw i64 %iv.i, 0
br label %print_shadow_bytes.exit.i
}
diff --git a/test/CodeGen/X86/Atomics-64.ll b/test/CodeGen/X86/Atomics-64.ll
index c392e947407e..6d367a71d015 100644
--- a/test/CodeGen/X86/Atomics-64.ll
+++ b/test/CodeGen/X86/Atomics-64.ll
@@ -308,331 +308,331 @@ return: ; preds = %entry
define void @test_op_and_fetch() nounwind {
entry:
- %0 = load i8* @uc, align 1
+ %0 = load i8, i8* @uc, align 1
%1 = zext i8 %0 to i32
%2 = trunc i32 %1 to i8
%3 = atomicrmw add i8* @sc, i8 %2 monotonic
%4 = add i8 %3, %2
store i8 %4, i8* @sc, align 1
- %5 = load i8* @uc, align 1
+ %5 = load i8, i8* @uc, align 1
%6 = zext i8 %5 to i32
%7 = trunc i32 %6 to i8
%8 = atomicrmw add i8* @uc, i8 %7 monotonic
%9 = add i8 %8, %7
store i8 %9, i8* @uc, align 1
- %10 = load i8* @uc, align 1
+ %10 = load i8, i8* @uc, align 1
%11 = zext i8 %10 to i32
%12 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
%13 = trunc i32 %11 to i16
%14 = atomicrmw add i16* %12, i16 %13 monotonic
%15 = add i16 %14, %13
store i16 %15, i16* @ss, align 2
- %16 = load i8* @uc, align 1
+ %16 = load i8, i8* @uc, align 1
%17 = zext i8 %16 to i32
%18 = bitcast i8* bitcast (i16* @us to i8*) to i16*
%19 = trunc i32 %17 to i16
%20 = atomicrmw add i16* %18, i16 %19 monotonic
%21 = add i16 %20, %19
store i16 %21, i16* @us, align 2
- %22 = load i8* @uc, align 1
+ %22 = load i8, i8* @uc, align 1
%23 = zext i8 %22 to i32
%24 = bitcast i8* bitcast (i32* @si to i8*) to i32*
%25 = atomicrmw add i32* %24, i32 %23 monotonic
%26 = add i32 %25, %23
store i32 %26, i32* @si, align 4
- %27 = load i8* @uc, align 1
+ %27 = load i8, i8* @uc, align 1
%28 = zext i8 %27 to i32
%29 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
%30 = atomicrmw add i32* %29, i32 %28 monotonic
%31 = add i32 %30, %28
store i32 %31, i32* @ui, align 4
- %32 = load i8* @uc, align 1
+ %32 = load i8, i8* @uc, align 1
%33 = zext i8 %32 to i64
%34 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
%35 = atomicrmw add i64* %34, i64 %33 monotonic
%36 = add i64 %35, %33
store i64 %36, i64* @sl, align 8
- %37 = load i8* @uc, align 1
+ %37 = load i8, i8* @uc, align 1
%38 = zext i8 %37 to i64
%39 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
%40 = atomicrmw add i64* %39, i64 %38 monotonic
%41 = add i64 %40, %38
store i64 %41, i64* @ul, align 8
- %42 = load i8* @uc, align 1
+ %42 = load i8, i8* @uc, align 1
%43 = zext i8 %42 to i64
%44 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
%45 = atomicrmw add i64* %44, i64 %43 monotonic
%46 = add i64 %45, %43
store i64 %46, i64* @sll, align 8
- %47 = load i8* @uc, align 1
+ %47 = load i8, i8* @uc, align 1
%48 = zext i8 %47 to i64
%49 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
%50 = atomicrmw add i64* %49, i64 %48 monotonic
%51 = add i64 %50, %48
store i64 %51, i64* @ull, align 8
- %52 = load i8* @uc, align 1
+ %52 = load i8, i8* @uc, align 1
%53 = zext i8 %52 to i32
%54 = trunc i32 %53 to i8
%55 = atomicrmw sub i8* @sc, i8 %54 monotonic
%56 = sub i8 %55, %54
store i8 %56, i8* @sc, align 1
- %57 = load i8* @uc, align 1
+ %57 = load i8, i8* @uc, align 1
%58 = zext i8 %57 to i32
%59 = trunc i32 %58 to i8
%60 = atomicrmw sub i8* @uc, i8 %59 monotonic
%61 = sub i8 %60, %59
store i8 %61, i8* @uc, align 1
- %62 = load i8* @uc, align 1
+ %62 = load i8, i8* @uc, align 1
%63 = zext i8 %62 to i32
%64 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
%65 = trunc i32 %63 to i16
%66 = atomicrmw sub i16* %64, i16 %65 monotonic
%67 = sub i16 %66, %65
store i16 %67, i16* @ss, align 2
- %68 = load i8* @uc, align 1
+ %68 = load i8, i8* @uc, align 1
%69 = zext i8 %68 to i32
%70 = bitcast i8* bitcast (i16* @us to i8*) to i16*
%71 = trunc i32 %69 to i16
%72 = atomicrmw sub i16* %70, i16 %71 monotonic
%73 = sub i16 %72, %71
store i16 %73, i16* @us, align 2
- %74 = load i8* @uc, align 1
+ %74 = load i8, i8* @uc, align 1
%75 = zext i8 %74 to i32
%76 = bitcast i8* bitcast (i32* @si to i8*) to i32*
%77 = atomicrmw sub i32* %76, i32 %75 monotonic
%78 = sub i32 %77, %75
store i32 %78, i32* @si, align 4
- %79 = load i8* @uc, align 1
+ %79 = load i8, i8* @uc, align 1
%80 = zext i8 %79 to i32
%81 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
%82 = atomicrmw sub i32* %81, i32 %80 monotonic
%83 = sub i32 %82, %80
store i32 %83, i32* @ui, align 4
- %84 = load i8* @uc, align 1
+ %84 = load i8, i8* @uc, align 1
%85 = zext i8 %84 to i64
%86 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
%87 = atomicrmw sub i64* %86, i64 %85 monotonic
%88 = sub i64 %87, %85
store i64 %88, i64* @sl, align 8
- %89 = load i8* @uc, align 1
+ %89 = load i8, i8* @uc, align 1
%90 = zext i8 %89 to i64
%91 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
%92 = atomicrmw sub i64* %91, i64 %90 monotonic
%93 = sub i64 %92, %90
store i64 %93, i64* @ul, align 8
- %94 = load i8* @uc, align 1
+ %94 = load i8, i8* @uc, align 1
%95 = zext i8 %94 to i64
%96 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
%97 = atomicrmw sub i64* %96, i64 %95 monotonic
%98 = sub i64 %97, %95
store i64 %98, i64* @sll, align 8
- %99 = load i8* @uc, align 1
+ %99 = load i8, i8* @uc, align 1
%100 = zext i8 %99 to i64
%101 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
%102 = atomicrmw sub i64* %101, i64 %100 monotonic
%103 = sub i64 %102, %100
store i64 %103, i64* @ull, align 8
- %104 = load i8* @uc, align 1
+ %104 = load i8, i8* @uc, align 1
%105 = zext i8 %104 to i32
%106 = trunc i32 %105 to i8
%107 = atomicrmw or i8* @sc, i8 %106 monotonic
%108 = or i8 %107, %106
store i8 %108, i8* @sc, align 1
- %109 = load i8* @uc, align 1
+ %109 = load i8, i8* @uc, align 1
%110 = zext i8 %109 to i32
%111 = trunc i32 %110 to i8
%112 = atomicrmw or i8* @uc, i8 %111 monotonic
%113 = or i8 %112, %111
store i8 %113, i8* @uc, align 1
- %114 = load i8* @uc, align 1
+ %114 = load i8, i8* @uc, align 1
%115 = zext i8 %114 to i32
%116 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
%117 = trunc i32 %115 to i16
%118 = atomicrmw or i16* %116, i16 %117 monotonic
%119 = or i16 %118, %117
store i16 %119, i16* @ss, align 2
- %120 = load i8* @uc, align 1
+ %120 = load i8, i8* @uc, align 1
%121 = zext i8 %120 to i32
%122 = bitcast i8* bitcast (i16* @us to i8*) to i16*
%123 = trunc i32 %121 to i16
%124 = atomicrmw or i16* %122, i16 %123 monotonic
%125 = or i16 %124, %123
store i16 %125, i16* @us, align 2
- %126 = load i8* @uc, align 1
+ %126 = load i8, i8* @uc, align 1
%127 = zext i8 %126 to i32
%128 = bitcast i8* bitcast (i32* @si to i8*) to i32*
%129 = atomicrmw or i32* %128, i32 %127 monotonic
%130 = or i32 %129, %127
store i32 %130, i32* @si, align 4
- %131 = load i8* @uc, align 1
+ %131 = load i8, i8* @uc, align 1
%132 = zext i8 %131 to i32
%133 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
%134 = atomicrmw or i32* %133, i32 %132 monotonic
%135 = or i32 %134, %132
store i32 %135, i32* @ui, align 4
- %136 = load i8* @uc, align 1
+ %136 = load i8, i8* @uc, align 1
%137 = zext i8 %136 to i64
%138 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
%139 = atomicrmw or i64* %138, i64 %137 monotonic
%140 = or i64 %139, %137
store i64 %140, i64* @sl, align 8
- %141 = load i8* @uc, align 1
+ %141 = load i8, i8* @uc, align 1
%142 = zext i8 %141 to i64
%143 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
%144 = atomicrmw or i64* %143, i64 %142 monotonic
%145 = or i64 %144, %142
store i64 %145, i64* @ul, align 8
- %146 = load i8* @uc, align 1
+ %146 = load i8, i8* @uc, align 1
%147 = zext i8 %146 to i64
%148 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
%149 = atomicrmw or i64* %148, i64 %147 monotonic
%150 = or i64 %149, %147
store i64 %150, i64* @sll, align 8
- %151 = load i8* @uc, align 1
+ %151 = load i8, i8* @uc, align 1
%152 = zext i8 %151 to i64
%153 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
%154 = atomicrmw or i64* %153, i64 %152 monotonic
%155 = or i64 %154, %152
store i64 %155, i64* @ull, align 8
- %156 = load i8* @uc, align 1
+ %156 = load i8, i8* @uc, align 1
%157 = zext i8 %156 to i32
%158 = trunc i32 %157 to i8
%159 = atomicrmw xor i8* @sc, i8 %158 monotonic
%160 = xor i8 %159, %158
store i8 %160, i8* @sc, align 1
- %161 = load i8* @uc, align 1
+ %161 = load i8, i8* @uc, align 1
%162 = zext i8 %161 to i32
%163 = trunc i32 %162 to i8
%164 = atomicrmw xor i8* @uc, i8 %163 monotonic
%165 = xor i8 %164, %163
store i8 %165, i8* @uc, align 1
- %166 = load i8* @uc, align 1
+ %166 = load i8, i8* @uc, align 1
%167 = zext i8 %166 to i32
%168 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
%169 = trunc i32 %167 to i16
%170 = atomicrmw xor i16* %168, i16 %169 monotonic
%171 = xor i16 %170, %169
store i16 %171, i16* @ss, align 2
- %172 = load i8* @uc, align 1
+ %172 = load i8, i8* @uc, align 1
%173 = zext i8 %172 to i32
%174 = bitcast i8* bitcast (i16* @us to i8*) to i16*
%175 = trunc i32 %173 to i16
%176 = atomicrmw xor i16* %174, i16 %175 monotonic
%177 = xor i16 %176, %175
store i16 %177, i16* @us, align 2
- %178 = load i8* @uc, align 1
+ %178 = load i8, i8* @uc, align 1
%179 = zext i8 %178 to i32
%180 = bitcast i8* bitcast (i32* @si to i8*) to i32*
%181 = atomicrmw xor i32* %180, i32 %179 monotonic
%182 = xor i32 %181, %179
store i32 %182, i32* @si, align 4
- %183 = load i8* @uc, align 1
+ %183 = load i8, i8* @uc, align 1
%184 = zext i8 %183 to i32
%185 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
%186 = atomicrmw xor i32* %185, i32 %184 monotonic
%187 = xor i32 %186, %184
store i32 %187, i32* @ui, align 4
- %188 = load i8* @uc, align 1
+ %188 = load i8, i8* @uc, align 1
%189 = zext i8 %188 to i64
%190 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
%191 = atomicrmw xor i64* %190, i64 %189 monotonic
%192 = xor i64 %191, %189
store i64 %192, i64* @sl, align 8
- %193 = load i8* @uc, align 1
+ %193 = load i8, i8* @uc, align 1
%194 = zext i8 %193 to i64
%195 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
%196 = atomicrmw xor i64* %195, i64 %194 monotonic
%197 = xor i64 %196, %194
store i64 %197, i64* @ul, align 8
- %198 = load i8* @uc, align 1
+ %198 = load i8, i8* @uc, align 1
%199 = zext i8 %198 to i64
%200 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
%201 = atomicrmw xor i64* %200, i64 %199 monotonic
%202 = xor i64 %201, %199
store i64 %202, i64* @sll, align 8
- %203 = load i8* @uc, align 1
+ %203 = load i8, i8* @uc, align 1
%204 = zext i8 %203 to i64
%205 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
%206 = atomicrmw xor i64* %205, i64 %204 monotonic
%207 = xor i64 %206, %204
store i64 %207, i64* @ull, align 8
- %208 = load i8* @uc, align 1
+ %208 = load i8, i8* @uc, align 1
%209 = zext i8 %208 to i32
%210 = trunc i32 %209 to i8
%211 = atomicrmw and i8* @sc, i8 %210 monotonic
%212 = and i8 %211, %210
store i8 %212, i8* @sc, align 1
- %213 = load i8* @uc, align 1
+ %213 = load i8, i8* @uc, align 1
%214 = zext i8 %213 to i32
%215 = trunc i32 %214 to i8
%216 = atomicrmw and i8* @uc, i8 %215 monotonic
%217 = and i8 %216, %215
store i8 %217, i8* @uc, align 1
- %218 = load i8* @uc, align 1
+ %218 = load i8, i8* @uc, align 1
%219 = zext i8 %218 to i32
%220 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
%221 = trunc i32 %219 to i16
%222 = atomicrmw and i16* %220, i16 %221 monotonic
%223 = and i16 %222, %221
store i16 %223, i16* @ss, align 2
- %224 = load i8* @uc, align 1
+ %224 = load i8, i8* @uc, align 1
%225 = zext i8 %224 to i32
%226 = bitcast i8* bitcast (i16* @us to i8*) to i16*
%227 = trunc i32 %225 to i16
%228 = atomicrmw and i16* %226, i16 %227 monotonic
%229 = and i16 %228, %227
store i16 %229, i16* @us, align 2
- %230 = load i8* @uc, align 1
+ %230 = load i8, i8* @uc, align 1
%231 = zext i8 %230 to i32
%232 = bitcast i8* bitcast (i32* @si to i8*) to i32*
%233 = atomicrmw and i32* %232, i32 %231 monotonic
%234 = and i32 %233, %231
store i32 %234, i32* @si, align 4
- %235 = load i8* @uc, align 1
+ %235 = load i8, i8* @uc, align 1
%236 = zext i8 %235 to i32
%237 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
%238 = atomicrmw and i32* %237, i32 %236 monotonic
%239 = and i32 %238, %236
store i32 %239, i32* @ui, align 4
- %240 = load i8* @uc, align 1
+ %240 = load i8, i8* @uc, align 1
%241 = zext i8 %240 to i64
%242 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
%243 = atomicrmw and i64* %242, i64 %241 monotonic
%244 = and i64 %243, %241
store i64 %244, i64* @sl, align 8
- %245 = load i8* @uc, align 1
+ %245 = load i8, i8* @uc, align 1
%246 = zext i8 %245 to i64
%247 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
%248 = atomicrmw and i64* %247, i64 %246 monotonic
%249 = and i64 %248, %246
store i64 %249, i64* @ul, align 8
- %250 = load i8* @uc, align 1
+ %250 = load i8, i8* @uc, align 1
%251 = zext i8 %250 to i64
%252 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
%253 = atomicrmw and i64* %252, i64 %251 monotonic
%254 = and i64 %253, %251
store i64 %254, i64* @sll, align 8
- %255 = load i8* @uc, align 1
+ %255 = load i8, i8* @uc, align 1
%256 = zext i8 %255 to i64
%257 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
%258 = atomicrmw and i64* %257, i64 %256 monotonic
%259 = and i64 %258, %256
store i64 %259, i64* @ull, align 8
- %260 = load i8* @uc, align 1
+ %260 = load i8, i8* @uc, align 1
%261 = zext i8 %260 to i32
%262 = trunc i32 %261 to i8
%263 = atomicrmw nand i8* @sc, i8 %262 monotonic
%264 = xor i8 %263, -1
%265 = and i8 %264, %262
store i8 %265, i8* @sc, align 1
- %266 = load i8* @uc, align 1
+ %266 = load i8, i8* @uc, align 1
%267 = zext i8 %266 to i32
%268 = trunc i32 %267 to i8
%269 = atomicrmw nand i8* @uc, i8 %268 monotonic
%270 = xor i8 %269, -1
%271 = and i8 %270, %268
store i8 %271, i8* @uc, align 1
- %272 = load i8* @uc, align 1
+ %272 = load i8, i8* @uc, align 1
%273 = zext i8 %272 to i32
%274 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
%275 = trunc i32 %273 to i16
@@ -640,7 +640,7 @@ entry:
%277 = xor i16 %276, -1
%278 = and i16 %277, %275
store i16 %278, i16* @ss, align 2
- %279 = load i8* @uc, align 1
+ %279 = load i8, i8* @uc, align 1
%280 = zext i8 %279 to i32
%281 = bitcast i8* bitcast (i16* @us to i8*) to i16*
%282 = trunc i32 %280 to i16
@@ -648,42 +648,42 @@ entry:
%284 = xor i16 %283, -1
%285 = and i16 %284, %282
store i16 %285, i16* @us, align 2
- %286 = load i8* @uc, align 1
+ %286 = load i8, i8* @uc, align 1
%287 = zext i8 %286 to i32
%288 = bitcast i8* bitcast (i32* @si to i8*) to i32*
%289 = atomicrmw nand i32* %288, i32 %287 monotonic
%290 = xor i32 %289, -1
%291 = and i32 %290, %287
store i32 %291, i32* @si, align 4
- %292 = load i8* @uc, align 1
+ %292 = load i8, i8* @uc, align 1
%293 = zext i8 %292 to i32
%294 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
%295 = atomicrmw nand i32* %294, i32 %293 monotonic
%296 = xor i32 %295, -1
%297 = and i32 %296, %293
store i32 %297, i32* @ui, align 4
- %298 = load i8* @uc, align 1
+ %298 = load i8, i8* @uc, align 1
%299 = zext i8 %298 to i64
%300 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
%301 = atomicrmw nand i64* %300, i64 %299 monotonic
%302 = xor i64 %301, -1
%303 = and i64 %302, %299
store i64 %303, i64* @sl, align 8
- %304 = load i8* @uc, align 1
+ %304 = load i8, i8* @uc, align 1
%305 = zext i8 %304 to i64
%306 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
%307 = atomicrmw nand i64* %306, i64 %305 monotonic
%308 = xor i64 %307, -1
%309 = and i64 %308, %305
store i64 %309, i64* @ul, align 8
- %310 = load i8* @uc, align 1
+ %310 = load i8, i8* @uc, align 1
%311 = zext i8 %310 to i64
%312 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
%313 = atomicrmw nand i64* %312, i64 %311 monotonic
%314 = xor i64 %313, -1
%315 = and i64 %314, %311
store i64 %315, i64* @sll, align 8
- %316 = load i8* @uc, align 1
+ %316 = load i8, i8* @uc, align 1
%317 = zext i8 %316 to i64
%318 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
%319 = atomicrmw nand i64* %318, i64 %317 monotonic
@@ -698,28 +698,28 @@ return: ; preds = %entry
define void @test_compare_and_swap() nounwind {
entry:
- %0 = load i8* @sc, align 1
+ %0 = load i8, i8* @sc, align 1
%1 = zext i8 %0 to i32
- %2 = load i8* @uc, align 1
+ %2 = load i8, i8* @uc, align 1
%3 = zext i8 %2 to i32
%4 = trunc i32 %3 to i8
%5 = trunc i32 %1 to i8
%pair6 = cmpxchg i8* @sc, i8 %4, i8 %5 monotonic monotonic
%6 = extractvalue { i8, i1 } %pair6, 0
store i8 %6, i8* @sc, align 1
- %7 = load i8* @sc, align 1
+ %7 = load i8, i8* @sc, align 1
%8 = zext i8 %7 to i32
- %9 = load i8* @uc, align 1
+ %9 = load i8, i8* @uc, align 1
%10 = zext i8 %9 to i32
%11 = trunc i32 %10 to i8
%12 = trunc i32 %8 to i8
%pair13 = cmpxchg i8* @uc, i8 %11, i8 %12 monotonic monotonic
%13 = extractvalue { i8, i1 } %pair13, 0
store i8 %13, i8* @uc, align 1
- %14 = load i8* @sc, align 1
+ %14 = load i8, i8* @sc, align 1
%15 = sext i8 %14 to i16
%16 = zext i16 %15 to i32
- %17 = load i8* @uc, align 1
+ %17 = load i8, i8* @uc, align 1
%18 = zext i8 %17 to i32
%19 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
%20 = trunc i32 %18 to i16
@@ -727,10 +727,10 @@ entry:
%pair22 = cmpxchg i16* %19, i16 %20, i16 %21 monotonic monotonic
%22 = extractvalue { i16, i1 } %pair22, 0
store i16 %22, i16* @ss, align 2
- %23 = load i8* @sc, align 1
+ %23 = load i8, i8* @sc, align 1
%24 = sext i8 %23 to i16
%25 = zext i16 %24 to i32
- %26 = load i8* @uc, align 1
+ %26 = load i8, i8* @uc, align 1
%27 = zext i8 %26 to i32
%28 = bitcast i8* bitcast (i16* @us to i8*) to i16*
%29 = trunc i32 %27 to i16
@@ -738,57 +738,57 @@ entry:
%pair31 = cmpxchg i16* %28, i16 %29, i16 %30 monotonic monotonic
%31 = extractvalue { i16, i1 } %pair31, 0
store i16 %31, i16* @us, align 2
- %32 = load i8* @sc, align 1
+ %32 = load i8, i8* @sc, align 1
%33 = sext i8 %32 to i32
- %34 = load i8* @uc, align 1
+ %34 = load i8, i8* @uc, align 1
%35 = zext i8 %34 to i32
%36 = bitcast i8* bitcast (i32* @si to i8*) to i32*
%pair37 = cmpxchg i32* %36, i32 %35, i32 %33 monotonic monotonic
%37 = extractvalue { i32, i1 } %pair37, 0
store i32 %37, i32* @si, align 4
- %38 = load i8* @sc, align 1
+ %38 = load i8, i8* @sc, align 1
%39 = sext i8 %38 to i32
- %40 = load i8* @uc, align 1
+ %40 = load i8, i8* @uc, align 1
%41 = zext i8 %40 to i32
%42 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
%pair43 = cmpxchg i32* %42, i32 %41, i32 %39 monotonic monotonic
%43 = extractvalue { i32, i1 } %pair43, 0
store i32 %43, i32* @ui, align 4
- %44 = load i8* @sc, align 1
+ %44 = load i8, i8* @sc, align 1
%45 = sext i8 %44 to i64
- %46 = load i8* @uc, align 1
+ %46 = load i8, i8* @uc, align 1
%47 = zext i8 %46 to i64
%48 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
%pair49 = cmpxchg i64* %48, i64 %47, i64 %45 monotonic monotonic
%49 = extractvalue { i64, i1 } %pair49, 0
store i64 %49, i64* @sl, align 8
- %50 = load i8* @sc, align 1
+ %50 = load i8, i8* @sc, align 1
%51 = sext i8 %50 to i64
- %52 = load i8* @uc, align 1
+ %52 = load i8, i8* @uc, align 1
%53 = zext i8 %52 to i64
%54 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
%pair55 = cmpxchg i64* %54, i64 %53, i64 %51 monotonic monotonic
%55 = extractvalue { i64, i1 } %pair55, 0
store i64 %55, i64* @ul, align 8
- %56 = load i8* @sc, align 1
+ %56 = load i8, i8* @sc, align 1
%57 = sext i8 %56 to i64
- %58 = load i8* @uc, align 1
+ %58 = load i8, i8* @uc, align 1
%59 = zext i8 %58 to i64
%60 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
%pair61 = cmpxchg i64* %60, i64 %59, i64 %57 monotonic monotonic
%61 = extractvalue { i64, i1 } %pair61, 0
store i64 %61, i64* @sll, align 8
- %62 = load i8* @sc, align 1
+ %62 = load i8, i8* @sc, align 1
%63 = sext i8 %62 to i64
- %64 = load i8* @uc, align 1
+ %64 = load i8, i8* @uc, align 1
%65 = zext i8 %64 to i64
%66 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
%pair67 = cmpxchg i64* %66, i64 %65, i64 %63 monotonic monotonic
%67 = extractvalue { i64, i1 } %pair67, 0
store i64 %67, i64* @ull, align 8
- %68 = load i8* @sc, align 1
+ %68 = load i8, i8* @sc, align 1
%69 = zext i8 %68 to i32
- %70 = load i8* @uc, align 1
+ %70 = load i8, i8* @uc, align 1
%71 = zext i8 %70 to i32
%72 = trunc i32 %71 to i8
%73 = trunc i32 %69 to i8
@@ -798,9 +798,9 @@ entry:
%76 = zext i1 %75 to i8
%77 = zext i8 %76 to i32
store i32 %77, i32* @ui, align 4
- %78 = load i8* @sc, align 1
+ %78 = load i8, i8* @sc, align 1
%79 = zext i8 %78 to i32
- %80 = load i8* @uc, align 1
+ %80 = load i8, i8* @uc, align 1
%81 = zext i8 %80 to i32
%82 = trunc i32 %81 to i8
%83 = trunc i32 %79 to i8
@@ -810,10 +810,10 @@ entry:
%86 = zext i1 %85 to i8
%87 = zext i8 %86 to i32
store i32 %87, i32* @ui, align 4
- %88 = load i8* @sc, align 1
+ %88 = load i8, i8* @sc, align 1
%89 = sext i8 %88 to i16
%90 = zext i16 %89 to i32
- %91 = load i8* @uc, align 1
+ %91 = load i8, i8* @uc, align 1
%92 = zext i8 %91 to i32
%93 = trunc i32 %92 to i8
%94 = trunc i32 %90 to i8
@@ -823,10 +823,10 @@ entry:
%97 = zext i1 %96 to i8
%98 = zext i8 %97 to i32
store i32 %98, i32* @ui, align 4
- %99 = load i8* @sc, align 1
+ %99 = load i8, i8* @sc, align 1
%100 = sext i8 %99 to i16
%101 = zext i16 %100 to i32
- %102 = load i8* @uc, align 1
+ %102 = load i8, i8* @uc, align 1
%103 = zext i8 %102 to i32
%104 = trunc i32 %103 to i8
%105 = trunc i32 %101 to i8
@@ -836,9 +836,9 @@ entry:
%108 = zext i1 %107 to i8
%109 = zext i8 %108 to i32
store i32 %109, i32* @ui, align 4
- %110 = load i8* @sc, align 1
+ %110 = load i8, i8* @sc, align 1
%111 = sext i8 %110 to i32
- %112 = load i8* @uc, align 1
+ %112 = load i8, i8* @uc, align 1
%113 = zext i8 %112 to i32
%114 = trunc i32 %113 to i8
%115 = trunc i32 %111 to i8
@@ -848,9 +848,9 @@ entry:
%118 = zext i1 %117 to i8
%119 = zext i8 %118 to i32
store i32 %119, i32* @ui, align 4
- %120 = load i8* @sc, align 1
+ %120 = load i8, i8* @sc, align 1
%121 = sext i8 %120 to i32
- %122 = load i8* @uc, align 1
+ %122 = load i8, i8* @uc, align 1
%123 = zext i8 %122 to i32
%124 = trunc i32 %123 to i8
%125 = trunc i32 %121 to i8
@@ -860,9 +860,9 @@ entry:
%128 = zext i1 %127 to i8
%129 = zext i8 %128 to i32
store i32 %129, i32* @ui, align 4
- %130 = load i8* @sc, align 1
+ %130 = load i8, i8* @sc, align 1
%131 = sext i8 %130 to i64
- %132 = load i8* @uc, align 1
+ %132 = load i8, i8* @uc, align 1
%133 = zext i8 %132 to i64
%134 = trunc i64 %133 to i8
%135 = trunc i64 %131 to i8
@@ -872,9 +872,9 @@ entry:
%138 = zext i1 %137 to i8
%139 = zext i8 %138 to i32
store i32 %139, i32* @ui, align 4
- %140 = load i8* @sc, align 1
+ %140 = load i8, i8* @sc, align 1
%141 = sext i8 %140 to i64
- %142 = load i8* @uc, align 1
+ %142 = load i8, i8* @uc, align 1
%143 = zext i8 %142 to i64
%144 = trunc i64 %143 to i8
%145 = trunc i64 %141 to i8
@@ -884,9 +884,9 @@ entry:
%148 = zext i1 %147 to i8
%149 = zext i8 %148 to i32
store i32 %149, i32* @ui, align 4
- %150 = load i8* @sc, align 1
+ %150 = load i8, i8* @sc, align 1
%151 = sext i8 %150 to i64
- %152 = load i8* @uc, align 1
+ %152 = load i8, i8* @uc, align 1
%153 = zext i8 %152 to i64
%154 = trunc i64 %153 to i8
%155 = trunc i64 %151 to i8
@@ -896,9 +896,9 @@ entry:
%158 = zext i1 %157 to i8
%159 = zext i8 %158 to i32
store i32 %159, i32* @ui, align 4
- %160 = load i8* @sc, align 1
+ %160 = load i8, i8* @sc, align 1
%161 = sext i8 %160 to i64
- %162 = load i8* @uc, align 1
+ %162 = load i8, i8* @uc, align 1
%163 = zext i8 %162 to i64
%164 = trunc i64 %163 to i8
%165 = trunc i64 %161 to i8
diff --git a/test/CodeGen/X86/GC/alloc_loop.ll b/test/CodeGen/X86/GC/alloc_loop.ll
index fb78ba2cd10b..2a505e80aac8 100644
--- a/test/CodeGen/X86/GC/alloc_loop.ll
+++ b/test/CodeGen/X86/GC/alloc_loop.ll
@@ -31,8 +31,8 @@ entry:
store i8** %tmp.2, i8*** %B
;; *B = A;
- %B.1 = load i8*** %B
- %A.1 = load i8** %A
+ %B.1 = load i8**, i8*** %B
+ %A.1 = load i8*, i8** %A
call void @llvm.gcwrite(i8* %A.1, i8* %B.upgrd.1, i8** %B.1)
br label %AllocLoop
diff --git a/test/CodeGen/X86/GC/argpromotion.ll b/test/CodeGen/X86/GC/argpromotion.ll
index c63ce222b869..37baf325007d 100644
--- a/test/CodeGen/X86/GC/argpromotion.ll
+++ b/test/CodeGen/X86/GC/argpromotion.ll
@@ -14,6 +14,6 @@ define internal i32 @f(i32* %xp) gc "example" {
entry:
%var = alloca i8*
call void @llvm.gcroot(i8** %var, i8* null)
- %x = load i32* %xp
+ %x = load i32, i32* %xp
ret i32 %x
}
diff --git a/test/CodeGen/X86/GC/badreadproto.ll b/test/CodeGen/X86/GC/badreadproto.ll
index 4fe90b90833a..37672f804357 100644
--- a/test/CodeGen/X86/GC/badreadproto.ll
+++ b/test/CodeGen/X86/GC/badreadproto.ll
@@ -7,7 +7,7 @@
declare %list* @llvm.gcread(%list*, %list**)
define %list* @tl(%list* %l) gc "example" {
- %hd.ptr = getelementptr %list* %l, i32 0, i32 0
+ %hd.ptr = getelementptr %list, %list* %l, i32 0, i32 0
%hd = call %list* @llvm.gcread(%list* %l, %list** %hd.ptr)
ret i32 %tmp
}
diff --git a/test/CodeGen/X86/GC/badwriteproto.ll b/test/CodeGen/X86/GC/badwriteproto.ll
index be81f842672e..2544e40f81ff 100644
--- a/test/CodeGen/X86/GC/badwriteproto.ll
+++ b/test/CodeGen/X86/GC/badwriteproto.ll
@@ -7,13 +7,13 @@
declare void @llvm.gcwrite(%list*, %list*, %list**)
define %list* @cons(i32 %hd, %list* %tl) gc "example" {
- %tmp = call i8* @gcalloc(i32 bitcast(%list* getelementptr(%list* null, i32 1) to i32))
+ %tmp = call i8* @gcalloc(i32 bitcast(%list* getelementptr(%list, %list* null, i32 1) to i32))
%cell = bitcast i8* %tmp to %list*
- %hd.ptr = getelementptr %list* %cell, i32 0, i32 0
+ %hd.ptr = getelementptr %list, %list* %cell, i32 0, i32 0
store i32 %hd, i32* %hd.ptr
- %tl.ptr = getelementptr %list* %cell, i32 0, i32 0
+ %tl.ptr = getelementptr %list, %list* %cell, i32 0, i32 0
call void @llvm.gcwrite(%list* %tl, %list* %cell, %list** %tl.ptr)
ret %cell.2
diff --git a/test/CodeGen/X86/GC/dynamic-frame-size.ll b/test/CodeGen/X86/GC/dynamic-frame-size.ll
new file mode 100644
index 000000000000..a3583d46a29a
--- /dev/null
+++ b/test/CodeGen/X86/GC/dynamic-frame-size.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare void @use(<4 x i8*>*)
+
+; Test that a frame which requires dynamic relocation produces a stack map
+; with a size of UINT64_MAX.
+define void @test(i8* %ptr) gc "erlang" {
+ ; 32 byte alignment (for the alloca) is larger than the default
+ ; 16 byte alignment
+ %slot = alloca <4 x i8*>
+ call void @use(<4 x i8*>* %slot);
+ ret void
+}
+
+; CHECK: .note.gc
+; CHECK-NEXT: .align 8
+; safe point count
+; CHECK .short 1
+; CHECK .long .Ltmp0
+; stack frame size (in words)
+; CHECK .short -1
+; stack arity (arguments on the stack)
+; CHECK .short 0
+; live root count
+; CHECK .short 0
+
diff --git a/test/CodeGen/X86/GC/inline.ll b/test/CodeGen/X86/GC/inline.ll
index 9da33aef8dd3..9d74c1faa39d 100644
--- a/test/CodeGen/X86/GC/inline.ll
+++ b/test/CodeGen/X86/GC/inline.ll
@@ -15,8 +15,8 @@ define internal i32 @g() gc "example" {
%obj = call %IntArray* @h( ) ; <%IntArray*> [#uses=2]
%obj.2 = bitcast %IntArray* %obj to i8* ; <i8*> [#uses=1]
store i8* %obj.2, i8** %root
- %Length.ptr = getelementptr %IntArray* %obj, i32 0, i32 0 ; <i32*> [#uses=1]
- %Length = load i32* %Length.ptr ; <i32> [#uses=1]
+ %Length.ptr = getelementptr %IntArray, %IntArray* %obj, i32 0, i32 0 ; <i32*> [#uses=1]
+ %Length = load i32, i32* %Length.ptr ; <i32> [#uses=1]
ret i32 %Length
}
diff --git a/test/CodeGen/X86/GC/inline2.ll b/test/CodeGen/X86/GC/inline2.ll
index 15947056ee39..034c985a1df4 100644
--- a/test/CodeGen/X86/GC/inline2.ll
+++ b/test/CodeGen/X86/GC/inline2.ll
@@ -16,8 +16,8 @@ define internal i32 @g() gc "example" {
%obj = call %IntArray* @h( ) ; <%IntArray*> [#uses=2]
%obj.2 = bitcast %IntArray* %obj to i8* ; <i8*> [#uses=1]
store i8* %obj.2, i8** %root
- %Length.ptr = getelementptr %IntArray* %obj, i32 0, i32 0 ; <i32*> [#uses=1]
- %Length = load i32* %Length.ptr ; <i32> [#uses=1]
+ %Length.ptr = getelementptr %IntArray, %IntArray* %obj, i32 0, i32 0 ; <i32*> [#uses=1]
+ %Length = load i32, i32* %Length.ptr ; <i32> [#uses=1]
ret i32 %Length
}
diff --git a/test/CodeGen/X86/MachineBranchProb.ll b/test/CodeGen/X86/MachineBranchProb.ll
index cf41ef2ea3ad..408c6b9151c3 100644
--- a/test/CodeGen/X86/MachineBranchProb.ll
+++ b/test/CodeGen/X86/MachineBranchProb.ll
@@ -13,14 +13,14 @@ for.cond2: ; preds = %for.inc, %for.cond
%i.1 = phi i32 [ %inc19, %for.inc ], [ 0, %for.cond ]
%bit.0 = phi i32 [ %shl, %for.inc ], [ 1, %for.cond ]
%tobool = icmp eq i32 %bit.0, 0
- %v3 = load i32* @max_regno, align 4
+ %v3 = load i32, i32* @max_regno, align 4
%cmp4 = icmp eq i32 %i.1, %v3
%or.cond = or i1 %tobool, %cmp4
br i1 %or.cond, label %for.inc20, label %for.inc, !prof !0
; CHECK: BB#1: derived from LLVM BB %for.cond2
-; CHECK: Successors according to CFG: BB#3(56008718) BB#4(2203492365)
+; CHECK: Successors according to CFG: BB#3(56008718) BB#4(3615818718)
; CHECK: BB#4: derived from LLVM BB %for.cond2
-; CHECK: Successors according to CFG: BB#3(112017436) BB#2(4294967294)
+; CHECK: Successors according to CFG: BB#3(56008718) BB#2(3559810000)
for.inc: ; preds = %for.cond2
%shl = shl i32 %bit.0, 1
diff --git a/test/CodeGen/X86/MachineSink-DbgValue.ll b/test/CodeGen/X86/MachineSink-DbgValue.ll
index 3a2c58f97e8c..6f057c5f18e6 100644
--- a/test/CodeGen/X86/MachineSink-DbgValue.ll
+++ b/test/CodeGen/X86/MachineSink-DbgValue.ll
@@ -4,10 +4,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-macosx10.7.0"
define i32 @foo(i32 %i, i32* nocapture %c) nounwind uwtable readonly ssp {
- tail call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !6, metadata !{!"0x102"}), !dbg !12
- %ab = load i32* %c, align 1, !dbg !14
- tail call void @llvm.dbg.value(metadata i32* %c, i64 0, metadata !7, metadata !{!"0x102"}), !dbg !13
- tail call void @llvm.dbg.value(metadata i32 %ab, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !14
+ tail call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !6, metadata !DIExpression()), !dbg !12
+ %ab = load i32, i32* %c, align 1, !dbg !14
+ tail call void @llvm.dbg.value(metadata i32* %c, i64 0, metadata !7, metadata !DIExpression()), !dbg !13
+ tail call void @llvm.dbg.value(metadata i32 %ab, i64 0, metadata !10, metadata !DIExpression()), !dbg !14
%cd = icmp eq i32 %i, 42, !dbg !15
br i1 %cd, label %bb1, label %bb2, !dbg !15
@@ -28,26 +28,26 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!22}
-!0 = !{!"0x11\0012\00Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)\001\00\000\00\001", !20, !21, !21, !18, null, null} ; [ DW_TAG_compile_unit ]
-!1 = !{!"0x2e\00foo\00foo\00\002\000\001\000\006\00256\001\000", !20, !2, !3, null, i32 (i32, i32*)* @foo, null, null, !19} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
-!2 = !{!"0x29", !20} ; [ DW_TAG_file_type ]
-!3 = !{!"0x15\00\000\000\000\000\000\000", !20, !2, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 1, file: !20, enums: !21, retainedTypes: !21, subprograms: !18, imports: null)
+!1 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !20, scope: !2, type: !3, function: i32 (i32, i32*)* @foo, variables: !19)
+!2 = !DIFile(filename: "a.c", directory: "/private/tmp")
+!3 = !DISubroutineType(types: !4)
!4 = !{!5}
-!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !0} ; [ DW_TAG_base_type ]
-!6 = !{!"0x101\00i\0016777218\000", !1, !2, !5} ; [ DW_TAG_arg_variable ]
-!7 = !{!"0x101\00c\0033554434\000", !1, !2, !8} ; [ DW_TAG_arg_variable ]
-!8 = !{!"0xf\00\000\0064\0064\000\000", null, !0, !9} ; [ DW_TAG_pointer_type ]
-!9 = !{!"0x24\00char\000\008\008\000\000\006", null, !0} ; [ DW_TAG_base_type ]
-!10 = !{!"0x100\00a\003\000", !11, !2, !9} ; [ DW_TAG_auto_variable ]
-!11 = !{!"0xb\002\0025\000", !20, !1} ; [ DW_TAG_lexical_block ]
-!12 = !MDLocation(line: 2, column: 13, scope: !1)
-!13 = !MDLocation(line: 2, column: 22, scope: !1)
-!14 = !MDLocation(line: 3, column: 14, scope: !11)
-!15 = !MDLocation(line: 4, column: 3, scope: !11)
-!16 = !MDLocation(line: 5, column: 5, scope: !11)
-!17 = !MDLocation(line: 7, column: 1, scope: !11)
+!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!6 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 2, arg: 1, scope: !1, file: !2, type: !5)
+!7 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 2, arg: 2, scope: !1, file: !2, type: !8)
+!8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !0, baseType: !9)
+!9 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 3, scope: !11, file: !2, type: !9)
+!11 = distinct !DILexicalBlock(line: 2, column: 25, file: !20, scope: !1)
+!12 = !DILocation(line: 2, column: 13, scope: !1)
+!13 = !DILocation(line: 2, column: 22, scope: !1)
+!14 = !DILocation(line: 3, column: 14, scope: !11)
+!15 = !DILocation(line: 4, column: 3, scope: !11)
+!16 = !DILocation(line: 5, column: 5, scope: !11)
+!17 = !DILocation(line: 7, column: 1, scope: !11)
!18 = !{!1}
!19 = !{!6, !7, !10}
-!20 = !{!"a.c", !"/private/tmp"}
-!21 = !{i32 0}
-!22 = !{i32 1, !"Debug Info Version", i32 2}
+!20 = !DIFile(filename: "a.c", directory: "/private/tmp")
+!21 = !{}
+!22 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/MachineSink-eflags.ll b/test/CodeGen/X86/MachineSink-eflags.ll
index 5b8c7b205279..4e52c8c5f7d0 100644
--- a/test/CodeGen/X86/MachineSink-eflags.ll
+++ b/test/CodeGen/X86/MachineSink-eflags.ll
@@ -16,18 +16,18 @@ entry:
%i2 = alloca i8*, align 8
%b.i = alloca [16 x <2 x double>], align 16
%conv = bitcast i8* %_stubArgs to i32*
- %tmp1 = load i32* %conv, align 4
- %ptr8 = getelementptr i8* %_stubArgs, i64 16
+ %tmp1 = load i32, i32* %conv, align 4
+ %ptr8 = getelementptr i8, i8* %_stubArgs, i64 16
%i4 = bitcast i8* %ptr8 to <2 x double>*
- %ptr20 = getelementptr i8* %_stubArgs, i64 48
+ %ptr20 = getelementptr i8, i8* %_stubArgs, i64 48
%i7 = bitcast i8* %ptr20 to <2 x double> addrspace(1)**
- %tmp21 = load <2 x double> addrspace(1)** %i7, align 8
- %ptr28 = getelementptr i8* %_stubArgs, i64 64
+ %tmp21 = load <2 x double> addrspace(1)*, <2 x double> addrspace(1)** %i7, align 8
+ %ptr28 = getelementptr i8, i8* %_stubArgs, i64 64
%i9 = bitcast i8* %ptr28 to i32*
- %tmp29 = load i32* %i9, align 4
- %ptr32 = getelementptr i8* %_stubArgs, i64 68
+ %tmp29 = load i32, i32* %i9, align 4
+ %ptr32 = getelementptr i8, i8* %_stubArgs, i64 68
%i10 = bitcast i8* %ptr32 to i32*
- %tmp33 = load i32* %i10, align 4
+ %tmp33 = load i32, i32* %i10, align 4
%tmp17.i = mul i32 10, 20
%tmp19.i = add i32 %tmp17.i, %tmp33
%conv21.i = zext i32 %tmp19.i to i64
@@ -35,8 +35,8 @@ entry:
%tmp42.i = add i32 %tmp6.i, 17
%tmp44.i = insertelement <2 x i32> undef, i32 %tmp42.i, i32 1
%tmp96676677.i = or i32 17, -4
- %ptr4438.i = getelementptr inbounds [16 x <2 x double>]* %b.i, i64 0, i64 0
- %arrayidx4506.i = getelementptr [16 x <2 x double>]* %b.i, i64 0, i64 4
+ %ptr4438.i = getelementptr inbounds [16 x <2 x double>], [16 x <2 x double>]* %b.i, i64 0, i64 0
+ %arrayidx4506.i = getelementptr [16 x <2 x double>], [16 x <2 x double>]* %b.i, i64 0, i64 4
%tmp52.i = insertelement <2 x i32> %tmp44.i, i32 0, i32 0
%tmp78.i = extractelement <2 x i32> %tmp44.i, i32 1
%tmp97.i = add i32 %tmp78.i, %tmp96676677.i
@@ -48,15 +48,15 @@ entry:
%i39 = add i32 %tmp158.i, %i38
%conv160.i = zext i32 %i39 to i64
%tmp22.sum652.i = add i64 %conv160.i, %conv21.i
- %arrayidx161.i = getelementptr <2 x double> addrspace(1)* %tmp21, i64 %tmp22.sum652.i
- %tmp162.i = load <2 x double> addrspace(1)* %arrayidx161.i, align 16
+ %arrayidx161.i = getelementptr <2 x double>, <2 x double> addrspace(1)* %tmp21, i64 %tmp22.sum652.i
+ %tmp162.i = load <2 x double>, <2 x double> addrspace(1)* %arrayidx161.i, align 16
%tmp222.i = add i32 %tmp154.i, 1
%i43 = mul i32 %tmp222.i, %tmp29
%i44 = add i32 %tmp158.i, %i43
%conv228.i = zext i32 %i44 to i64
%tmp22.sum656.i = add i64 %conv228.i, %conv21.i
- %arrayidx229.i = getelementptr <2 x double> addrspace(1)* %tmp21, i64 %tmp22.sum656.i
- %tmp230.i = load <2 x double> addrspace(1)* %arrayidx229.i, align 16
+ %arrayidx229.i = getelementptr <2 x double>, <2 x double> addrspace(1)* %tmp21, i64 %tmp22.sum656.i
+ %tmp230.i = load <2 x double>, <2 x double> addrspace(1)* %arrayidx229.i, align 16
%cmp432.i = icmp ult i32 %tmp156.i, %tmp1
; %shl.i should not be sinked below the compare.
diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll
index dfdaea523fdf..275d4213bd2b 100644
--- a/test/CodeGen/X86/MergeConsecutiveStores.ll
+++ b/test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -17,24 +17,24 @@ define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwt
.lr.ph:
%i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
%.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
- %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
+ %2 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0
store i8 1, i8* %2, align 1
- %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
+ %3 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1
store i8 2, i8* %3, align 1
- %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
+ %4 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 2
store i8 3, i8* %4, align 1
- %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
+ %5 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 3
store i8 4, i8* %5, align 1
- %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
+ %6 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 4
store i8 5, i8* %6, align 1
- %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
+ %7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 5
store i8 6, i8* %7, align 1
- %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
+ %8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 6
store i8 7, i8* %8, align 1
- %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
+ %9 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 7
store i8 8, i8* %9, align 1
%10 = add nsw i32 %i.02, 1
- %11 = getelementptr inbounds %struct.A* %.01, i64 1
+ %11 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1
%exitcond = icmp eq i32 %10, %count
br i1 %exitcond, label %._crit_edge, label %.lr.ph
._crit_edge:
@@ -51,24 +51,24 @@ define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimp
.lr.ph:
%i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
%.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
- %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
+ %2 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
store i32 0, i32* %2, align 4
- %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
+ %3 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
store i32 0, i32* %3, align 4
- %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
+ %4 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2
store i32 0, i32* %4, align 4
- %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
+ %5 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3
store i32 0, i32* %5, align 4
- %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
+ %6 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 4
store i32 0, i32* %6, align 4
- %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
+ %7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 5
store i32 0, i32* %7, align 4
- %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
+ %8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 6
store i32 0, i32* %8, align 4
- %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
+ %9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 7
store i32 0, i32* %9, align 4
%10 = add nsw i32 %i.02, 1
- %11 = getelementptr inbounds %struct.B* %.01, i64 1
+ %11 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
%exitcond = icmp eq i32 %10, %count
br i1 %exitcond, label %._crit_edge, label %.lr.ph
._crit_edge:
@@ -85,24 +85,24 @@ define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind
.lr.ph:
%i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
%.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
- %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
+ %2 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
store i32 0, i32* %2, align 4
- %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
+ %3 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
store i32 0, i32* %3, align 4
- %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
+ %4 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2
store i32 0, i32* %4, align 4
- %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
+ %5 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3
store i32 0, i32* %5, align 4
- %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
+ %6 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 4
store i32 0, i32* %6, align 4
- %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
+ %7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 5
store i32 0, i32* %7, align 4
- %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
+ %8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 6
store i32 0, i32* %8, align 4
- %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
+ %9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 7
store i32 0, i32* %9, align 4
%10 = add nsw i32 %i.02, 1
- %11 = getelementptr inbounds %struct.B* %.01, i64 1
+ %11 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
%exitcond = icmp eq i32 %10, %count
br i1 %exitcond, label %._crit_edge, label %.lr.ph
._crit_edge:
@@ -123,24 +123,24 @@ define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) n
.lr.ph:
%i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
%.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
- %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
+ %2 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0
store i8 1, i8* %2, align 1
- %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
+ %3 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1
store i8 2, i8* %3, align 1
- %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
+ %4 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 2
store i8 3, i8* %4, align 1
- %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
+ %5 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 3
store i8 4, i8* %5, align 1
- %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
+ %6 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 4
store i8 %zz, i8* %6, align 1 ; <----------- Not a const;
- %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
+ %7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 5
store i8 6, i8* %7, align 1
- %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
+ %8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 6
store i8 7, i8* %8, align 1
- %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
+ %9 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 7
store i8 8, i8* %9, align 1
%10 = add nsw i32 %i.02, 1
- %11 = getelementptr inbounds %struct.A* %.01, i64 1
+ %11 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1
%exitcond = icmp eq i32 %10, %count
br i1 %exitcond, label %._crit_edge, label %.lr.ph
._crit_edge:
@@ -159,21 +159,21 @@ define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struc
br i1 %1, label %.lr.ph, label %._crit_edge
.lr.ph: ; preds = %0
- %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
- %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
+ %2 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 0
+ %3 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 1
br label %4
; <label>:4 ; preds = %4, %.lr.ph
%i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
%.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ]
- %5 = load i8* %2, align 1
- %6 = load i8* %3, align 1
- %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
+ %5 = load i8, i8* %2, align 1
+ %6 = load i8, i8* %3, align 1
+ %7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0
store i8 %5, i8* %7, align 1
- %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
+ %8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1
store i8 %6, i8* %8, align 1
%9 = add nsw i32 %i.02, 1
- %10 = getelementptr inbounds %struct.A* %.01, i64 1
+ %10 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1
%exitcond = icmp eq i32 %9, %count
br i1 %exitcond, label %._crit_edge, label %4
@@ -193,21 +193,21 @@ define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct
br i1 %1, label %.lr.ph, label %._crit_edge
.lr.ph: ; preds = %0
- %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
- %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
+ %2 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 0
+ %3 = getelementptr inbounds %struct.A, %struct.A* %q, i64 0, i32 1
br label %a4
a4: ; preds = %4, %.lr.ph
%i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
%.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ]
- %a5 = load i8* %2, align 1
- %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
+ %a5 = load i8, i8* %2, align 1
+ %a7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0
store i8 %a5, i8* %a7, align 1
- %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
- %a6 = load i8* %3, align 1
+ %a8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1
+ %a6 = load i8, i8* %3, align 1
store i8 %a6, i8* %a8, align 1
%a9 = add nsw i32 %i.02, 1
- %a10 = getelementptr inbounds %struct.A* %.01, i64 1
+ %a10 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1
%exitcond = icmp eq i32 %a9, %count
br i1 %exitcond, label %._crit_edge, label %a4
@@ -227,21 +227,21 @@ define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %s
br i1 %1, label %.lr.ph, label %._crit_edge
.lr.ph: ; preds = %0
- %2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
- %3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
+ %2 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 0
+ %3 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 1
br label %4
; <label>:4 ; preds = %4, %.lr.ph
%i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
%.01 = phi %struct.B* [ %p, %.lr.ph ], [ %10, %4 ]
- %5 = load i32* %2
- %6 = load i32* %3
- %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
+ %5 = load i32, i32* %2
+ %6 = load i32, i32* %3
+ %7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
store i32 %5, i32* %7
- %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
+ %8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
store i32 %6, i32* %8
%9 = add nsw i32 %i.02, 1
- %10 = getelementptr inbounds %struct.B* %.01, i64 1
+ %10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
%exitcond = icmp eq i32 %9, %count
br i1 %exitcond, label %._crit_edge, label %4
@@ -261,29 +261,29 @@ define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %st
br i1 %a1, label %.lr.ph, label %._crit_edge
.lr.ph: ; preds = %0
- %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
- %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
- %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2
- %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3
+ %a2 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 0
+ %a3 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 1
+ %a4 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 2
+ %a5 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 3
br label %block4
block4: ; preds = %4, %.lr.ph
%i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
%.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
- %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
- %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
- %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
- %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
- %b1 = load i32* %a2
- %b2 = load i32* %a3
- %b3 = load i32* %a4
- %b4 = load i32* %a5
+ %a7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
+ %a8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
+ %a9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2
+ %a10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3
+ %b1 = load i32, i32* %a2
+ %b2 = load i32, i32* %a3
+ %b3 = load i32, i32* %a4
+ %b4 = load i32, i32* %a5
store i32 %b1, i32* %a7
store i32 %b2, i32* %a8
store i32 %b3, i32* %a9
store i32 %b4, i32* %a10
%c9 = add nsw i32 %i.02, 1
- %c10 = getelementptr inbounds %struct.B* %.01, i64 1
+ %c10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
%exitcond = icmp eq i32 %c9, %count
br i1 %exitcond, label %._crit_edge, label %block4
@@ -291,46 +291,41 @@ block4: ; preds = %4, %.lr.ph
ret void
}
+;; On x86, even unaligned copies can be merged to vector ops.
; CHECK-LABEL: merge_loads_no_align:
; load:
-; CHECK: movl
-; CHECK: movl
-; CHECK: movl
-; CHECK: movl
+; CHECK: vmovups
; store:
-; CHECK: movl
-; CHECK: movl
-; CHECK: movl
-; CHECK: movl
+; CHECK: vmovups
; CHECK: ret
define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
%a1 = icmp sgt i32 %count, 0
br i1 %a1, label %.lr.ph, label %._crit_edge
.lr.ph: ; preds = %0
- %a2 = getelementptr inbounds %struct.B* %q, i64 0, i32 0
- %a3 = getelementptr inbounds %struct.B* %q, i64 0, i32 1
- %a4 = getelementptr inbounds %struct.B* %q, i64 0, i32 2
- %a5 = getelementptr inbounds %struct.B* %q, i64 0, i32 3
+ %a2 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 0
+ %a3 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 1
+ %a4 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 2
+ %a5 = getelementptr inbounds %struct.B, %struct.B* %q, i64 0, i32 3
br label %block4
block4: ; preds = %4, %.lr.ph
%i.02 = phi i32 [ 0, %.lr.ph ], [ %c9, %block4 ]
%.01 = phi %struct.B* [ %p, %.lr.ph ], [ %c10, %block4 ]
- %a7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
- %a8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
- %a9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
- %a10 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
- %b1 = load i32* %a2, align 1
- %b2 = load i32* %a3, align 1
- %b3 = load i32* %a4, align 1
- %b4 = load i32* %a5, align 1
+ %a7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
+ %a8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
+ %a9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2
+ %a10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3
+ %b1 = load i32, i32* %a2, align 1
+ %b2 = load i32, i32* %a3, align 1
+ %b3 = load i32, i32* %a4, align 1
+ %b4 = load i32, i32* %a5, align 1
store i32 %b1, i32* %a7, align 1
store i32 %b2, i32* %a8, align 1
store i32 %b3, i32* %a9, align 1
store i32 %b4, i32* %a10, align 1
%c9 = add nsw i32 %i.02, 1
- %c10 = getelementptr inbounds %struct.B* %.01, i64 1
+ %c10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 1
%exitcond = icmp eq i32 %c9, %count
br i1 %exitcond, label %._crit_edge, label %block4
@@ -350,17 +345,17 @@ define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) {
%.09 = phi i32 [ %n, %0 ], [ %11, %1 ]
%.08 = phi i8* [ %b, %0 ], [ %10, %1 ]
%.0 = phi i64* [ %a, %0 ], [ %2, %1 ]
- %2 = getelementptr inbounds i64* %.0, i64 1
- %3 = load i64* %.0, align 1
- %4 = getelementptr inbounds i8* %c, i64 %3
- %5 = load i8* %4, align 1
+ %2 = getelementptr inbounds i64, i64* %.0, i64 1
+ %3 = load i64, i64* %.0, align 1
+ %4 = getelementptr inbounds i8, i8* %c, i64 %3
+ %5 = load i8, i8* %4, align 1
%6 = add i64 %3, 1
- %7 = getelementptr inbounds i8* %c, i64 %6
- %8 = load i8* %7, align 1
+ %7 = getelementptr inbounds i8, i8* %c, i64 %6
+ %8 = load i8, i8* %7, align 1
store i8 %5, i8* %.08, align 1
- %9 = getelementptr inbounds i8* %.08, i64 1
+ %9 = getelementptr inbounds i8, i8* %.08, i64 1
store i8 %8, i8* %9, align 1
- %10 = getelementptr inbounds i8* %.08, i64 2
+ %10 = getelementptr inbounds i8, i8* %.08, i64 2
%11 = add nsw i32 %.09, -1
%12 = icmp eq i32 %11, 0
br i1 %12, label %13, label %1
@@ -382,18 +377,18 @@ define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
%.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
%.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
%.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
- %2 = getelementptr inbounds i8* %.0, i64 1
- %3 = load i8* %.0, align 1
+ %2 = getelementptr inbounds i8, i8* %.0, i64 1
+ %3 = load i8, i8* %.0, align 1
%4 = sext i8 %3 to i64
- %5 = getelementptr inbounds i8* %c, i64 %4
- %6 = load i8* %5, align 1
+ %5 = getelementptr inbounds i8, i8* %c, i64 %4
+ %6 = load i8, i8* %5, align 1
%7 = add i64 %4, 1
- %8 = getelementptr inbounds i8* %c, i64 %7
- %9 = load i8* %8, align 1
+ %8 = getelementptr inbounds i8, i8* %c, i64 %7
+ %9 = load i8, i8* %8, align 1
store i8 %6, i8* %.08, align 1
- %10 = getelementptr inbounds i8* %.08, i64 1
+ %10 = getelementptr inbounds i8, i8* %.08, i64 1
store i8 %9, i8* %10, align 1
- %11 = getelementptr inbounds i8* %.08, i64 2
+ %11 = getelementptr inbounds i8, i8* %.08, i64 2
%12 = add nsw i32 %.09, -1
%13 = icmp eq i32 %12, 0
br i1 %13, label %14, label %1
@@ -414,19 +409,19 @@ define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
%.09 = phi i32 [ %n, %0 ], [ %12, %1 ]
%.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
%.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
- %2 = getelementptr inbounds i8* %.0, i64 1
- %3 = load i8* %.0, align 1
+ %2 = getelementptr inbounds i8, i8* %.0, i64 1
+ %3 = load i8, i8* %.0, align 1
%4 = sext i8 %3 to i64
- %5 = getelementptr inbounds i8* %c, i64 %4
- %6 = load i8* %5, align 1
+ %5 = getelementptr inbounds i8, i8* %c, i64 %4
+ %6 = load i8, i8* %5, align 1
%7 = add i8 %3, 1
%wrap.4 = sext i8 %7 to i64
- %8 = getelementptr inbounds i8* %c, i64 %wrap.4
- %9 = load i8* %8, align 1
+ %8 = getelementptr inbounds i8, i8* %c, i64 %wrap.4
+ %9 = load i8, i8* %8, align 1
store i8 %6, i8* %.08, align 1
- %10 = getelementptr inbounds i8* %.08, i64 1
+ %10 = getelementptr inbounds i8, i8* %.08, i64 1
store i8 %9, i8* %10, align 1
- %11 = getelementptr inbounds i8* %.08, i64 2
+ %11 = getelementptr inbounds i8, i8* %.08, i64 2
%12 = add nsw i32 %.09, -1
%13 = icmp eq i32 %12, 0
br i1 %13, label %14, label %1
@@ -434,3 +429,62 @@ define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
; <label>:14
ret void
}
+
+; PR21711 ( http://llvm.org/bugs/show_bug.cgi?id=21711 )
+define void @merge_vec_element_store(<8 x float> %v, float* %ptr) {
+ %vecext0 = extractelement <8 x float> %v, i32 0
+ %vecext1 = extractelement <8 x float> %v, i32 1
+ %vecext2 = extractelement <8 x float> %v, i32 2
+ %vecext3 = extractelement <8 x float> %v, i32 3
+ %vecext4 = extractelement <8 x float> %v, i32 4
+ %vecext5 = extractelement <8 x float> %v, i32 5
+ %vecext6 = extractelement <8 x float> %v, i32 6
+ %vecext7 = extractelement <8 x float> %v, i32 7
+ %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 1
+ %arrayidx2 = getelementptr inbounds float, float* %ptr, i64 2
+ %arrayidx3 = getelementptr inbounds float, float* %ptr, i64 3
+ %arrayidx4 = getelementptr inbounds float, float* %ptr, i64 4
+ %arrayidx5 = getelementptr inbounds float, float* %ptr, i64 5
+ %arrayidx6 = getelementptr inbounds float, float* %ptr, i64 6
+ %arrayidx7 = getelementptr inbounds float, float* %ptr, i64 7
+ store float %vecext0, float* %ptr, align 4
+ store float %vecext1, float* %arrayidx1, align 4
+ store float %vecext2, float* %arrayidx2, align 4
+ store float %vecext3, float* %arrayidx3, align 4
+ store float %vecext4, float* %arrayidx4, align 4
+ store float %vecext5, float* %arrayidx5, align 4
+ store float %vecext6, float* %arrayidx6, align 4
+ store float %vecext7, float* %arrayidx7, align 4
+ ret void
+
+; CHECK-LABEL: merge_vec_element_store
+; CHECK: vmovups
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+}
+
+; This is a minimized test based on real code that was failing.
+; We could merge stores (and loads) like this...
+
+define void @merge_vec_element_and_scalar_load([6 x i64]* %array) {
+ %idx0 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 0
+ %idx1 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 1
+ %idx4 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 4
+ %idx5 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 5
+
+ %a0 = load i64, i64* %idx0, align 8
+ store i64 %a0, i64* %idx4, align 8
+
+ %b = bitcast i64* %idx1 to <2 x i64>*
+ %v = load <2 x i64>, <2 x i64>* %b, align 8
+ %a1 = extractelement <2 x i64> %v, i32 0
+ store i64 %a1, i64* %idx5, align 8
+ ret void
+
+; CHECK-LABEL: merge_vec_element_and_scalar_load
+; CHECK: movq (%rdi), %rax
+; CHECK-NEXT: movq %rax, 32(%rdi)
+; CHECK-NEXT: movq 8(%rdi), %rax
+; CHECK-NEXT: movq %rax, 40(%rdi)
+; CHECK-NEXT: retq
+}
diff --git a/test/CodeGen/X86/StackColoring-dbg.ll b/test/CodeGen/X86/StackColoring-dbg.ll
index 498ad7edaa9d..98c27f44fabc 100644
--- a/test/CodeGen/X86/StackColoring-dbg.ll
+++ b/test/CodeGen/X86/StackColoring-dbg.ll
@@ -11,13 +11,13 @@ define void @foo() nounwind uwtable ssp {
entry:
%x.i = alloca i8, align 1
%y.i = alloca [256 x i8], align 16
- %0 = getelementptr inbounds [256 x i8]* %y.i, i64 0, i64 0
+ %0 = getelementptr inbounds [256 x i8], [256 x i8]* %y.i, i64 0, i64 0
br label %for.body
for.body:
call void @llvm.lifetime.end(i64 -1, i8* %0) nounwind
call void @llvm.lifetime.start(i64 -1, i8* %x.i) nounwind
- call void @llvm.dbg.declare(metadata i8* %x.i, metadata !22, metadata !{!"0x102"}) nounwind
+ call void @llvm.dbg.declare(metadata i8* %x.i, metadata !22, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !2)
br label %for.body
}
@@ -27,9 +27,9 @@ declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!23}
-!0 = !{!"0x11\001\00clang\001\00\000\00\000", !1, !2, !2, null, null, null} ; [ DW_TAG_compile_unit ]
-!1 = !{!"t.c", !""}
-!16 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ]
-!2 = !{i32 0}
-!22 = !{!"0x100\00x\0016\000", null, !2, !16} ; [ DW_TAG_auto_variable ]
-!23 = !{i32 1, !"Debug Info Version", i32 2}
+!0 = !DICompileUnit(language: DW_LANG_C89, producer: "clang", isOptimized: true, emissionKind: 0, file: !1, enums: !{}, retainedTypes: !{})
+!1 = !DIFile(filename: "t.c", directory: "")
+!16 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!2 = !DISubprogram()
+!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 16, scope: !2, file: !1, type: !16)
+!23 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/StackColoring.ll b/test/CodeGen/X86/StackColoring.ll
index a8e3537fabe3..414ccf469ebb 100644
--- a/test/CodeGen/X86/StackColoring.ll
+++ b/test/CodeGen/X86/StackColoring.ll
@@ -411,10 +411,10 @@ define i32 @shady_range(i32 %argc, i8** nocapture %argv) uwtable {
%a8 = bitcast [4 x %struct.Klass]* %a.i to i8*
%b8 = bitcast [4 x %struct.Klass]* %b.i to i8*
; I am used outside the lifetime zone below:
- %z2 = getelementptr inbounds [4 x %struct.Klass]* %a.i, i64 0, i64 0, i32 0
+ %z2 = getelementptr inbounds [4 x %struct.Klass], [4 x %struct.Klass]* %a.i, i64 0, i64 0, i32 0
call void @llvm.lifetime.start(i64 -1, i8* %a8)
call void @llvm.lifetime.start(i64 -1, i8* %b8)
- %z3 = load i32* %z2, align 16
+ %z3 = load i32, i32* %z2, align 16
%r = call i32 @foo(i32 %z3, i8* %a8)
%r2 = call i32 @foo(i32 %z3, i8* %b8)
call void @llvm.lifetime.end(i64 -1, i8* %a8)
diff --git a/test/CodeGen/X86/SwitchLowering.ll b/test/CodeGen/X86/SwitchLowering.ll
index 29a0e82bf59f..5f17d9d85726 100644
--- a/test/CodeGen/X86/SwitchLowering.ll
+++ b/test/CodeGen/X86/SwitchLowering.ll
@@ -9,10 +9,10 @@ bb: ; preds = %bb, %entry
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
%CurPtr_addr.0.rec = bitcast i32 %indvar to i32 ; <i32> [#uses=1]
%gep.upgrd.1 = zext i32 %indvar to i64 ; <i64> [#uses=1]
- %CurPtr_addr.0 = getelementptr i8* %CurPtr, i64 %gep.upgrd.1 ; <i8*> [#uses=1]
- %tmp = load i8* %CurPtr_addr.0 ; <i8> [#uses=3]
+ %CurPtr_addr.0 = getelementptr i8, i8* %CurPtr, i64 %gep.upgrd.1 ; <i8*> [#uses=1]
+ %tmp = load i8, i8* %CurPtr_addr.0 ; <i8> [#uses=3]
%tmp2.rec = add i32 %CurPtr_addr.0.rec, 1 ; <i32> [#uses=1]
- %tmp2 = getelementptr i8* %CurPtr, i32 %tmp2.rec ; <i8*> [#uses=1]
+ %tmp2 = getelementptr i8, i8* %CurPtr, i32 %tmp2.rec ; <i8*> [#uses=1]
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
switch i8 %tmp, label %bb [
i8 0, label %bb7
diff --git a/test/CodeGen/X86/SwizzleShuff.ll b/test/CodeGen/X86/SwizzleShuff.ll
index a435272dca44..e4c35c58210a 100644
--- a/test/CodeGen/X86/SwizzleShuff.ll
+++ b/test/CodeGen/X86/SwizzleShuff.ll
@@ -6,23 +6,24 @@
; CHECK: xorl
; CHECK: ret
define void @pull_bitcast (<4 x i8>* %pA, <4 x i8>* %pB) {
- %A = load <4 x i8>* %pA
- %B = load <4 x i8>* %pB
+ %A = load <4 x i8>, <4 x i8>* %pA
+ %B = load <4 x i8>, <4 x i8>* %pB
%C = xor <4 x i8> %A, %B
store <4 x i8> %C, <4 x i8>* %pA
ret void
}
; CHECK: multi_use_swizzle
-; CHECK: mov
-; CHECK-NEXT: shuf
-; CHECK-NEXT: shuf
-; CHECK-NEXT: shuf
-; CHECK-NEXT: xor
+; CHECK: pshufd
+; CHECK-NEXT: pshufd
+; CHECK-NEXT: pblendw
+; CHECK-NEXT: pshufd
+; CHECK-NEXT: pshufd
+; CHECK-NEXT: pxor
; CHECK-NEXT: ret
define <4 x i32> @multi_use_swizzle (<4 x i32>* %pA, <4 x i32>* %pB) {
- %A = load <4 x i32>* %pA
- %B = load <4 x i32>* %pB
+ %A = load <4 x i32>, <4 x i32>* %pA
+ %B = load <4 x i32>, <4 x i32>* %pB
%S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 1, i32 5, i32 6>
%S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 2>
%S2 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 2>
@@ -34,9 +35,9 @@ define <4 x i32> @multi_use_swizzle (<4 x i32>* %pA, <4 x i32>* %pB) {
; CHECK: xorl
; CHECK: ret
define <4 x i8> @pull_bitcast2 (<4 x i8>* %pA, <4 x i8>* %pB, <4 x i8>* %pC) {
- %A = load <4 x i8>* %pA
+ %A = load <4 x i8>, <4 x i8>* %pA
store <4 x i8> %A, <4 x i8>* %pC
- %B = load <4 x i8>* %pB
+ %B = load <4 x i8>, <4 x i8>* %pB
%C = xor <4 x i8> %A, %B
store <4 x i8> %C, <4 x i8>* %pA
ret <4 x i8> %C
@@ -45,11 +46,11 @@ define <4 x i8> @pull_bitcast2 (<4 x i8>* %pA, <4 x i8>* %pB, <4 x i8>* %pC) {
; CHECK: reverse_1
-; CHECK-NOT: shuf
+; CHECK-NOT: pshufd
; CHECK: ret
define <4 x i32> @reverse_1 (<4 x i32>* %pA, <4 x i32>* %pB) {
- %A = load <4 x i32>* %pA
- %B = load <4 x i32>* %pB
+ %A = load <4 x i32>, <4 x i32>* %pA
+ %B = load <4 x i32>, <4 x i32>* %pB
%S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
%S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
ret <4 x i32> %S1
@@ -57,11 +58,11 @@ define <4 x i32> @reverse_1 (<4 x i32>* %pA, <4 x i32>* %pB) {
; CHECK: no_reverse_shuff
-; CHECK: shuf
+; CHECK: pshufd
; CHECK: ret
define <4 x i32> @no_reverse_shuff (<4 x i32>* %pA, <4 x i32>* %pB) {
- %A = load <4 x i32>* %pA
- %B = load <4 x i32>* %pB
+ %A = load <4 x i32>, <4 x i32>* %pA
+ %B = load <4 x i32>, <4 x i32>* %pB
%S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
%S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
ret <4 x i32> %S1
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index 633e70f0285a..f363b64386f5 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -33,8 +33,8 @@
define void @foo00() nounwind {
entry:
- %0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 0), align 4
- store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 0), align 4
+ %0 = load i32, i32* getelementptr ([131072 x i32], [131072 x i32]* @src, i32 0, i64 0), align 4
+ store i32 %0, i32* getelementptr ([131072 x i32], [131072 x i32]* @dst, i32 0, i64 0), align 4
ret void
; LINUX-64-STATIC-LABEL: foo00:
@@ -105,8 +105,8 @@ entry:
define void @fxo00() nounwind {
entry:
- %0 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 0), align 4
- store i32 %0, i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 0), align 4
+ %0 = load i32, i32* getelementptr ([32 x i32], [32 x i32]* @xsrc, i32 0, i64 0), align 4
+ store i32 %0, i32* getelementptr ([32 x i32], [32 x i32]* @xdst, i32 0, i64 0), align 4
ret void
; LINUX-64-STATIC-LABEL: fxo00:
@@ -177,7 +177,7 @@ entry:
define void @foo01() nounwind {
entry:
- store i32* getelementptr ([131072 x i32]* @dst, i32 0, i32 0), i32** @ptr, align 8
+ store i32* getelementptr ([131072 x i32], [131072 x i32]* @dst, i32 0, i32 0), i32** @ptr, align 8
ret void
; LINUX-64-STATIC-LABEL: foo01:
; LINUX-64-STATIC: movq $dst, ptr
@@ -237,7 +237,7 @@ entry:
define void @fxo01() nounwind {
entry:
- store i32* getelementptr ([32 x i32]* @xdst, i32 0, i32 0), i32** @ptr, align 8
+ store i32* getelementptr ([32 x i32], [32 x i32]* @xdst, i32 0, i32 0), i32** @ptr, align 8
ret void
; LINUX-64-STATIC-LABEL: fxo01:
; LINUX-64-STATIC: movq $xdst, ptr
@@ -297,8 +297,8 @@ entry:
define void @foo02() nounwind {
entry:
- %0 = load i32** @ptr, align 8
- %1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 0), align 4
+ %0 = load i32*, i32** @ptr, align 8
+ %1 = load i32, i32* getelementptr ([131072 x i32], [131072 x i32]* @src, i32 0, i64 0), align 4
store i32 %1, i32* %0, align 4
ret void
; LINUX-64-STATIC-LABEL: foo02:
@@ -379,8 +379,8 @@ entry:
define void @fxo02() nounwind {
entry:
- %0 = load i32** @ptr, align 8
- %1 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 0), align 4
+ %0 = load i32*, i32** @ptr, align 8
+ %1 = load i32, i32* getelementptr ([32 x i32], [32 x i32]* @xsrc, i32 0, i64 0), align 4
store i32 %1, i32* %0, align 4
; LINUX-64-STATIC-LABEL: fxo02:
; LINUX-64-STATIC: movl xsrc(%rip), %
@@ -461,8 +461,8 @@ entry:
define void @foo03() nounwind {
entry:
- %0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 0), align 32
- store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 0), align 32
+ %0 = load i32, i32* getelementptr ([131072 x i32], [131072 x i32]* @dsrc, i32 0, i64 0), align 32
+ store i32 %0, i32* getelementptr ([131072 x i32], [131072 x i32]* @ddst, i32 0, i64 0), align 32
ret void
; LINUX-64-STATIC-LABEL: foo03:
; LINUX-64-STATIC: movl dsrc(%rip), [[EAX:%e.x]]
@@ -522,7 +522,7 @@ entry:
define void @foo04() nounwind {
entry:
- store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i32 0), i32** @dptr, align 8
+ store i32* getelementptr ([131072 x i32], [131072 x i32]* @ddst, i32 0, i32 0), i32** @dptr, align 8
ret void
; LINUX-64-STATIC-LABEL: foo04:
; LINUX-64-STATIC: movq $ddst, dptr
@@ -576,8 +576,8 @@ entry:
define void @foo05() nounwind {
entry:
- %0 = load i32** @dptr, align 8
- %1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 0), align 32
+ %0 = load i32*, i32** @dptr, align 8
+ %1 = load i32, i32* getelementptr ([131072 x i32], [131072 x i32]* @dsrc, i32 0, i64 0), align 32
store i32 %1, i32* %0, align 4
ret void
; LINUX-64-STATIC-LABEL: foo05:
@@ -648,8 +648,8 @@ entry:
define void @foo06() nounwind {
entry:
- %0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 0), align 4
- store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 0), align 4
+ %0 = load i32, i32* getelementptr ([131072 x i32], [131072 x i32]* @lsrc, i32 0, i64 0), align 4
+ store i32 %0, i32* getelementptr ([131072 x i32], [131072 x i32]* @ldst, i32 0, i64 0), align 4
ret void
; LINUX-64-STATIC-LABEL: foo06:
; LINUX-64-STATIC: movl lsrc(%rip), [[EAX:%e.x]]
@@ -707,7 +707,7 @@ entry:
define void @foo07() nounwind {
entry:
- store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i32 0), i32** @lptr, align 8
+ store i32* getelementptr ([131072 x i32], [131072 x i32]* @ldst, i32 0, i32 0), i32** @lptr, align 8
ret void
; LINUX-64-STATIC-LABEL: foo07:
; LINUX-64-STATIC: movq $ldst, lptr
@@ -760,8 +760,8 @@ entry:
define void @foo08() nounwind {
entry:
- %0 = load i32** @lptr, align 8
- %1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 0), align 4
+ %0 = load i32*, i32** @lptr, align 8
+ %1 = load i32, i32* getelementptr ([131072 x i32], [131072 x i32]* @lsrc, i32 0, i64 0), align 4
store i32 %1, i32* %0, align 4
ret void
; LINUX-64-STATIC-LABEL: foo08:
@@ -830,8 +830,8 @@ entry:
define void @qux00() nounwind {
entry:
- %0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16), align 4
- store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16), align 4
+ %0 = load i32, i32* getelementptr ([131072 x i32], [131072 x i32]* @src, i32 0, i64 16), align 4
+ store i32 %0, i32* getelementptr ([131072 x i32], [131072 x i32]* @dst, i32 0, i64 16), align 4
ret void
; LINUX-64-STATIC-LABEL: qux00:
; LINUX-64-STATIC: movl src+64(%rip), [[EAX:%e.x]]
@@ -901,8 +901,8 @@ entry:
define void @qxx00() nounwind {
entry:
- %0 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16), align 4
- store i32 %0, i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16), align 4
+ %0 = load i32, i32* getelementptr ([32 x i32], [32 x i32]* @xsrc, i32 0, i64 16), align 4
+ store i32 %0, i32* getelementptr ([32 x i32], [32 x i32]* @xdst, i32 0, i64 16), align 4
ret void
; LINUX-64-STATIC-LABEL: qxx00:
; LINUX-64-STATIC: movl xsrc+64(%rip), [[EAX:%e.x]]
@@ -972,7 +972,7 @@ entry:
define void @qux01() nounwind {
entry:
- store i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16), i32** @ptr, align 8
+ store i32* getelementptr ([131072 x i32], [131072 x i32]* @dst, i32 0, i64 16), i32** @ptr, align 8
ret void
; LINUX-64-STATIC-LABEL: qux01:
; LINUX-64-STATIC: movq $dst+64, ptr
@@ -1038,7 +1038,7 @@ entry:
define void @qxx01() nounwind {
entry:
- store i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16), i32** @ptr, align 8
+ store i32* getelementptr ([32 x i32], [32 x i32]* @xdst, i32 0, i64 16), i32** @ptr, align 8
ret void
; LINUX-64-STATIC-LABEL: qxx01:
; LINUX-64-STATIC: movq $xdst+64, ptr
@@ -1104,9 +1104,9 @@ entry:
define void @qux02() nounwind {
entry:
- %0 = load i32** @ptr, align 8
- %1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16), align 4
- %2 = getelementptr i32* %0, i64 16
+ %0 = load i32*, i32** @ptr, align 8
+ %1 = load i32, i32* getelementptr ([131072 x i32], [131072 x i32]* @src, i32 0, i64 16), align 4
+ %2 = getelementptr i32, i32* %0, i64 16
store i32 %1, i32* %2, align 4
; LINUX-64-STATIC-LABEL: qux02:
; LINUX-64-STATIC: movl src+64(%rip), [[EAX:%e.x]]
@@ -1187,9 +1187,9 @@ entry:
define void @qxx02() nounwind {
entry:
- %0 = load i32** @ptr, align 8
- %1 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16), align 4
- %2 = getelementptr i32* %0, i64 16
+ %0 = load i32*, i32** @ptr, align 8
+ %1 = load i32, i32* getelementptr ([32 x i32], [32 x i32]* @xsrc, i32 0, i64 16), align 4
+ %2 = getelementptr i32, i32* %0, i64 16
store i32 %1, i32* %2, align 4
; LINUX-64-STATIC-LABEL: qxx02:
; LINUX-64-STATIC: movl xsrc+64(%rip), [[EAX:%e.x]]
@@ -1270,8 +1270,8 @@ entry:
define void @qux03() nounwind {
entry:
- %0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16), align 32
- store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16), align 32
+ %0 = load i32, i32* getelementptr ([131072 x i32], [131072 x i32]* @dsrc, i32 0, i64 16), align 32
+ store i32 %0, i32* getelementptr ([131072 x i32], [131072 x i32]* @ddst, i32 0, i64 16), align 32
ret void
; LINUX-64-STATIC-LABEL: qux03:
; LINUX-64-STATIC: movl dsrc+64(%rip), [[EAX:%e.x]]
@@ -1331,7 +1331,7 @@ entry:
define void @qux04() nounwind {
entry:
- store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16), i32** @dptr, align 8
+ store i32* getelementptr ([131072 x i32], [131072 x i32]* @ddst, i32 0, i64 16), i32** @dptr, align 8
ret void
; LINUX-64-STATIC-LABEL: qux04:
; LINUX-64-STATIC: movq $ddst+64, dptr(%rip)
@@ -1386,9 +1386,9 @@ entry:
define void @qux05() nounwind {
entry:
- %0 = load i32** @dptr, align 8
- %1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16), align 32
- %2 = getelementptr i32* %0, i64 16
+ %0 = load i32*, i32** @dptr, align 8
+ %1 = load i32, i32* getelementptr ([131072 x i32], [131072 x i32]* @dsrc, i32 0, i64 16), align 32
+ %2 = getelementptr i32, i32* %0, i64 16
store i32 %1, i32* %2, align 4
; LINUX-64-STATIC-LABEL: qux05:
; LINUX-64-STATIC: movl dsrc+64(%rip), [[EAX:%e.x]]
@@ -1459,8 +1459,8 @@ entry:
define void @qux06() nounwind {
entry:
- %0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16), align 4
- store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16), align 4
+ %0 = load i32, i32* getelementptr ([131072 x i32], [131072 x i32]* @lsrc, i32 0, i64 16), align 4
+ store i32 %0, i32* getelementptr ([131072 x i32], [131072 x i32]* @ldst, i32 0, i64 16), align 4
ret void
; LINUX-64-STATIC-LABEL: qux06:
; LINUX-64-STATIC: movl lsrc+64(%rip), [[EAX:%e.x]]
@@ -1518,7 +1518,7 @@ entry:
define void @qux07() nounwind {
entry:
- store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16), i32** @lptr, align 8
+ store i32* getelementptr ([131072 x i32], [131072 x i32]* @ldst, i32 0, i64 16), i32** @lptr, align 8
ret void
; LINUX-64-STATIC-LABEL: qux07:
; LINUX-64-STATIC: movq $ldst+64, lptr
@@ -1571,9 +1571,9 @@ entry:
define void @qux08() nounwind {
entry:
- %0 = load i32** @lptr, align 8
- %1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16), align 4
- %2 = getelementptr i32* %0, i64 16
+ %0 = load i32*, i32** @lptr, align 8
+ %1 = load i32, i32* getelementptr ([131072 x i32], [131072 x i32]* @lsrc, i32 0, i64 16), align 4
+ %2 = getelementptr i32, i32* %0, i64 16
store i32 %1, i32* %2, align 4
; LINUX-64-STATIC-LABEL: qux08:
; LINUX-64-STATIC: movl lsrc+64(%rip), [[EAX:%e.x]]
@@ -1642,9 +1642,9 @@ entry:
define void @ind00(i64 %i) nounwind {
entry:
- %0 = getelementptr [131072 x i32]* @src, i64 0, i64 %i
- %1 = load i32* %0, align 4
- %2 = getelementptr [131072 x i32]* @dst, i64 0, i64 %i
+ %0 = getelementptr [131072 x i32], [131072 x i32]* @src, i64 0, i64 %i
+ %1 = load i32, i32* %0, align 4
+ %2 = getelementptr [131072 x i32], [131072 x i32]* @dst, i64 0, i64 %i
store i32 %1, i32* %2, align 4
ret void
; LINUX-64-STATIC-LABEL: ind00:
@@ -1720,9 +1720,9 @@ entry:
define void @ixd00(i64 %i) nounwind {
entry:
- %0 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %i
- %1 = load i32* %0, align 4
- %2 = getelementptr [32 x i32]* @xdst, i64 0, i64 %i
+ %0 = getelementptr [32 x i32], [32 x i32]* @xsrc, i64 0, i64 %i
+ %1 = load i32, i32* %0, align 4
+ %2 = getelementptr [32 x i32], [32 x i32]* @xdst, i64 0, i64 %i
store i32 %1, i32* %2, align 4
ret void
; LINUX-64-STATIC-LABEL: ixd00:
@@ -1798,7 +1798,7 @@ entry:
define void @ind01(i64 %i) nounwind {
entry:
- %0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %i
+ %0 = getelementptr [131072 x i32], [131072 x i32]* @dst, i64 0, i64 %i
store i32* %0, i32** @ptr, align 8
ret void
; LINUX-64-STATIC-LABEL: ind01:
@@ -1874,7 +1874,7 @@ entry:
define void @ixd01(i64 %i) nounwind {
entry:
- %0 = getelementptr [32 x i32]* @xdst, i64 0, i64 %i
+ %0 = getelementptr [32 x i32], [32 x i32]* @xdst, i64 0, i64 %i
store i32* %0, i32** @ptr, align 8
ret void
; LINUX-64-STATIC-LABEL: ixd01:
@@ -1950,10 +1950,10 @@ entry:
define void @ind02(i64 %i) nounwind {
entry:
- %0 = load i32** @ptr, align 8
- %1 = getelementptr [131072 x i32]* @src, i64 0, i64 %i
- %2 = load i32* %1, align 4
- %3 = getelementptr i32* %0, i64 %i
+ %0 = load i32*, i32** @ptr, align 8
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @src, i64 0, i64 %i
+ %2 = load i32, i32* %1, align 4
+ %3 = getelementptr i32, i32* %0, i64 %i
store i32 %2, i32* %3, align 4
ret void
; LINUX-64-STATIC-LABEL: ind02:
@@ -2039,10 +2039,10 @@ entry:
define void @ixd02(i64 %i) nounwind {
entry:
- %0 = load i32** @ptr, align 8
- %1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %i
- %2 = load i32* %1, align 4
- %3 = getelementptr i32* %0, i64 %i
+ %0 = load i32*, i32** @ptr, align 8
+ %1 = getelementptr [32 x i32], [32 x i32]* @xsrc, i64 0, i64 %i
+ %2 = load i32, i32* %1, align 4
+ %3 = getelementptr i32, i32* %0, i64 %i
store i32 %2, i32* %3, align 4
ret void
; LINUX-64-STATIC-LABEL: ixd02:
@@ -2128,9 +2128,9 @@ entry:
define void @ind03(i64 %i) nounwind {
entry:
- %0 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %i
- %1 = load i32* %0, align 4
- %2 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %i
+ %0 = getelementptr [131072 x i32], [131072 x i32]* @dsrc, i64 0, i64 %i
+ %1 = load i32, i32* %0, align 4
+ %2 = getelementptr [131072 x i32], [131072 x i32]* @ddst, i64 0, i64 %i
store i32 %1, i32* %2, align 4
ret void
; LINUX-64-STATIC-LABEL: ind03:
@@ -2202,7 +2202,7 @@ entry:
define void @ind04(i64 %i) nounwind {
entry:
- %0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %i
+ %0 = getelementptr [131072 x i32], [131072 x i32]* @ddst, i64 0, i64 %i
store i32* %0, i32** @dptr, align 8
ret void
; LINUX-64-STATIC-LABEL: ind04:
@@ -2271,10 +2271,10 @@ entry:
define void @ind05(i64 %i) nounwind {
entry:
- %0 = load i32** @dptr, align 8
- %1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %i
- %2 = load i32* %1, align 4
- %3 = getelementptr i32* %0, i64 %i
+ %0 = load i32*, i32** @dptr, align 8
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @dsrc, i64 0, i64 %i
+ %2 = load i32, i32* %1, align 4
+ %3 = getelementptr i32, i32* %0, i64 %i
store i32 %2, i32* %3, align 4
ret void
; LINUX-64-STATIC-LABEL: ind05:
@@ -2353,9 +2353,9 @@ entry:
define void @ind06(i64 %i) nounwind {
entry:
- %0 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %i
- %1 = load i32* %0, align 4
- %2 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %i
+ %0 = getelementptr [131072 x i32], [131072 x i32]* @lsrc, i64 0, i64 %i
+ %1 = load i32, i32* %0, align 4
+ %2 = getelementptr [131072 x i32], [131072 x i32]* @ldst, i64 0, i64 %i
store i32 %1, i32* %2, align 4
ret void
; LINUX-64-STATIC-LABEL: ind06:
@@ -2427,7 +2427,7 @@ entry:
define void @ind07(i64 %i) nounwind {
entry:
- %0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %i
+ %0 = getelementptr [131072 x i32], [131072 x i32]* @ldst, i64 0, i64 %i
store i32* %0, i32** @lptr, align 8
ret void
; LINUX-64-STATIC-LABEL: ind07:
@@ -2495,10 +2495,10 @@ entry:
define void @ind08(i64 %i) nounwind {
entry:
- %0 = load i32** @lptr, align 8
- %1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %i
- %2 = load i32* %1, align 4
- %3 = getelementptr i32* %0, i64 %i
+ %0 = load i32*, i32** @lptr, align 8
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @lsrc, i64 0, i64 %i
+ %2 = load i32, i32* %1, align 4
+ %3 = getelementptr i32, i32* %0, i64 %i
store i32 %2, i32* %3, align 4
ret void
; LINUX-64-STATIC-LABEL: ind08:
@@ -2577,9 +2577,9 @@ entry:
define void @off00(i64 %i) nounwind {
entry:
%0 = add i64 %i, 16
- %1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
- %2 = load i32* %1, align 4
- %3 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @src, i64 0, i64 %0
+ %2 = load i32, i32* %1, align 4
+ %3 = getelementptr [131072 x i32], [131072 x i32]* @dst, i64 0, i64 %0
store i32 %2, i32* %3, align 4
ret void
; LINUX-64-STATIC-LABEL: off00:
@@ -2656,9 +2656,9 @@ entry:
define void @oxf00(i64 %i) nounwind {
entry:
%0 = add i64 %i, 16
- %1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %0
- %2 = load i32* %1, align 4
- %3 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
+ %1 = getelementptr [32 x i32], [32 x i32]* @xsrc, i64 0, i64 %0
+ %2 = load i32, i32* %1, align 4
+ %3 = getelementptr [32 x i32], [32 x i32]* @xdst, i64 0, i64 %0
store i32 %2, i32* %3, align 4
ret void
; LINUX-64-STATIC-LABEL: oxf00:
@@ -2735,7 +2735,7 @@ entry:
define void @off01(i64 %i) nounwind {
entry:
%.sum = add i64 %i, 16
- %0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %.sum
+ %0 = getelementptr [131072 x i32], [131072 x i32]* @dst, i64 0, i64 %.sum
store i32* %0, i32** @ptr, align 8
ret void
; LINUX-64-STATIC-LABEL: off01:
@@ -2812,7 +2812,7 @@ entry:
define void @oxf01(i64 %i) nounwind {
entry:
%.sum = add i64 %i, 16
- %0 = getelementptr [32 x i32]* @xdst, i64 0, i64 %.sum
+ %0 = getelementptr [32 x i32], [32 x i32]* @xdst, i64 0, i64 %.sum
store i32* %0, i32** @ptr, align 8
ret void
; LINUX-64-STATIC-LABEL: oxf01:
@@ -2888,11 +2888,11 @@ entry:
define void @off02(i64 %i) nounwind {
entry:
- %0 = load i32** @ptr, align 8
+ %0 = load i32*, i32** @ptr, align 8
%1 = add i64 %i, 16
- %2 = getelementptr [131072 x i32]* @src, i64 0, i64 %1
- %3 = load i32* %2, align 4
- %4 = getelementptr i32* %0, i64 %1
+ %2 = getelementptr [131072 x i32], [131072 x i32]* @src, i64 0, i64 %1
+ %3 = load i32, i32* %2, align 4
+ %4 = getelementptr i32, i32* %0, i64 %1
store i32 %3, i32* %4, align 4
ret void
; LINUX-64-STATIC-LABEL: off02:
@@ -2978,11 +2978,11 @@ entry:
define void @oxf02(i64 %i) nounwind {
entry:
- %0 = load i32** @ptr, align 8
+ %0 = load i32*, i32** @ptr, align 8
%1 = add i64 %i, 16
- %2 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %1
- %3 = load i32* %2, align 4
- %4 = getelementptr i32* %0, i64 %1
+ %2 = getelementptr [32 x i32], [32 x i32]* @xsrc, i64 0, i64 %1
+ %3 = load i32, i32* %2, align 4
+ %4 = getelementptr i32, i32* %0, i64 %1
store i32 %3, i32* %4, align 4
ret void
; LINUX-64-STATIC-LABEL: oxf02:
@@ -3069,9 +3069,9 @@ entry:
define void @off03(i64 %i) nounwind {
entry:
%0 = add i64 %i, 16
- %1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
- %2 = load i32* %1, align 4
- %3 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @dsrc, i64 0, i64 %0
+ %2 = load i32, i32* %1, align 4
+ %3 = getelementptr [131072 x i32], [131072 x i32]* @ddst, i64 0, i64 %0
store i32 %2, i32* %3, align 4
ret void
; LINUX-64-STATIC-LABEL: off03:
@@ -3144,7 +3144,7 @@ entry:
define void @off04(i64 %i) nounwind {
entry:
%.sum = add i64 %i, 16
- %0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %.sum
+ %0 = getelementptr [131072 x i32], [131072 x i32]* @ddst, i64 0, i64 %.sum
store i32* %0, i32** @dptr, align 8
ret void
; LINUX-64-STATIC-LABEL: off04:
@@ -3213,11 +3213,11 @@ entry:
define void @off05(i64 %i) nounwind {
entry:
- %0 = load i32** @dptr, align 8
+ %0 = load i32*, i32** @dptr, align 8
%1 = add i64 %i, 16
- %2 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %1
- %3 = load i32* %2, align 4
- %4 = getelementptr i32* %0, i64 %1
+ %2 = getelementptr [131072 x i32], [131072 x i32]* @dsrc, i64 0, i64 %1
+ %3 = load i32, i32* %2, align 4
+ %4 = getelementptr i32, i32* %0, i64 %1
store i32 %3, i32* %4, align 4
ret void
; LINUX-64-STATIC-LABEL: off05:
@@ -3297,9 +3297,9 @@ entry:
define void @off06(i64 %i) nounwind {
entry:
%0 = add i64 %i, 16
- %1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
- %2 = load i32* %1, align 4
- %3 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @lsrc, i64 0, i64 %0
+ %2 = load i32, i32* %1, align 4
+ %3 = getelementptr [131072 x i32], [131072 x i32]* @ldst, i64 0, i64 %0
store i32 %2, i32* %3, align 4
ret void
; LINUX-64-STATIC-LABEL: off06:
@@ -3372,7 +3372,7 @@ entry:
define void @off07(i64 %i) nounwind {
entry:
%.sum = add i64 %i, 16
- %0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %.sum
+ %0 = getelementptr [131072 x i32], [131072 x i32]* @ldst, i64 0, i64 %.sum
store i32* %0, i32** @lptr, align 8
ret void
; LINUX-64-STATIC-LABEL: off07:
@@ -3440,11 +3440,11 @@ entry:
define void @off08(i64 %i) nounwind {
entry:
- %0 = load i32** @lptr, align 8
+ %0 = load i32*, i32** @lptr, align 8
%1 = add i64 %i, 16
- %2 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %1
- %3 = load i32* %2, align 4
- %4 = getelementptr i32* %0, i64 %1
+ %2 = getelementptr [131072 x i32], [131072 x i32]* @lsrc, i64 0, i64 %1
+ %3 = load i32, i32* %2, align 4
+ %4 = getelementptr i32, i32* %0, i64 %1
store i32 %3, i32* %4, align 4
ret void
; LINUX-64-STATIC-LABEL: off08:
@@ -3522,8 +3522,8 @@ entry:
define void @moo00(i64 %i) nounwind {
entry:
- %0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536), align 4
- store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536), align 4
+ %0 = load i32, i32* getelementptr ([131072 x i32], [131072 x i32]* @src, i32 0, i64 65536), align 4
+ store i32 %0, i32* getelementptr ([131072 x i32], [131072 x i32]* @dst, i32 0, i64 65536), align 4
ret void
; LINUX-64-STATIC-LABEL: moo00:
; LINUX-64-STATIC: movl src+262144(%rip), [[EAX:%e.x]]
@@ -3593,7 +3593,7 @@ entry:
define void @moo01(i64 %i) nounwind {
entry:
- store i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536), i32** @ptr, align 8
+ store i32* getelementptr ([131072 x i32], [131072 x i32]* @dst, i32 0, i64 65536), i32** @ptr, align 8
ret void
; LINUX-64-STATIC-LABEL: moo01:
; LINUX-64-STATIC: movq $dst+262144, ptr(%rip)
@@ -3659,9 +3659,9 @@ entry:
define void @moo02(i64 %i) nounwind {
entry:
- %0 = load i32** @ptr, align 8
- %1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536), align 4
- %2 = getelementptr i32* %0, i64 65536
+ %0 = load i32*, i32** @ptr, align 8
+ %1 = load i32, i32* getelementptr ([131072 x i32], [131072 x i32]* @src, i32 0, i64 65536), align 4
+ %2 = getelementptr i32, i32* %0, i64 65536
store i32 %1, i32* %2, align 4
ret void
; LINUX-64-STATIC-LABEL: moo02:
@@ -3742,8 +3742,8 @@ entry:
define void @moo03(i64 %i) nounwind {
entry:
- %0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536), align 32
- store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536), align 32
+ %0 = load i32, i32* getelementptr ([131072 x i32], [131072 x i32]* @dsrc, i32 0, i64 65536), align 32
+ store i32 %0, i32* getelementptr ([131072 x i32], [131072 x i32]* @ddst, i32 0, i64 65536), align 32
ret void
; LINUX-64-STATIC-LABEL: moo03:
; LINUX-64-STATIC: movl dsrc+262144(%rip), [[EAX:%e.x]]
@@ -3803,7 +3803,7 @@ entry:
define void @moo04(i64 %i) nounwind {
entry:
- store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536), i32** @dptr, align 8
+ store i32* getelementptr ([131072 x i32], [131072 x i32]* @ddst, i32 0, i64 65536), i32** @dptr, align 8
ret void
; LINUX-64-STATIC-LABEL: moo04:
; LINUX-64-STATIC: movq $ddst+262144, dptr
@@ -3858,9 +3858,9 @@ entry:
define void @moo05(i64 %i) nounwind {
entry:
- %0 = load i32** @dptr, align 8
- %1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536), align 32
- %2 = getelementptr i32* %0, i64 65536
+ %0 = load i32*, i32** @dptr, align 8
+ %1 = load i32, i32* getelementptr ([131072 x i32], [131072 x i32]* @dsrc, i32 0, i64 65536), align 32
+ %2 = getelementptr i32, i32* %0, i64 65536
store i32 %1, i32* %2, align 4
ret void
; LINUX-64-STATIC-LABEL: moo05:
@@ -3931,8 +3931,8 @@ entry:
define void @moo06(i64 %i) nounwind {
entry:
- %0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536), align 4
- store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536), align 4
+ %0 = load i32, i32* getelementptr ([131072 x i32], [131072 x i32]* @lsrc, i32 0, i64 65536), align 4
+ store i32 %0, i32* getelementptr ([131072 x i32], [131072 x i32]* @ldst, i32 0, i64 65536), align 4
ret void
; LINUX-64-STATIC-LABEL: moo06:
; LINUX-64-STATIC: movl lsrc+262144(%rip), [[EAX:%e.x]]
@@ -3990,7 +3990,7 @@ entry:
define void @moo07(i64 %i) nounwind {
entry:
- store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536), i32** @lptr, align 8
+ store i32* getelementptr ([131072 x i32], [131072 x i32]* @ldst, i32 0, i64 65536), i32** @lptr, align 8
ret void
; LINUX-64-STATIC-LABEL: moo07:
; LINUX-64-STATIC: movq $ldst+262144, lptr
@@ -4043,9 +4043,9 @@ entry:
define void @moo08(i64 %i) nounwind {
entry:
- %0 = load i32** @lptr, align 8
- %1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536), align 4
- %2 = getelementptr i32* %0, i64 65536
+ %0 = load i32*, i32** @lptr, align 8
+ %1 = load i32, i32* getelementptr ([131072 x i32], [131072 x i32]* @lsrc, i32 0, i64 65536), align 4
+ %2 = getelementptr i32, i32* %0, i64 65536
store i32 %1, i32* %2, align 4
ret void
; LINUX-64-STATIC-LABEL: moo08:
@@ -4115,9 +4115,9 @@ entry:
define void @big00(i64 %i) nounwind {
entry:
%0 = add i64 %i, 65536
- %1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
- %2 = load i32* %1, align 4
- %3 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @src, i64 0, i64 %0
+ %2 = load i32, i32* %1, align 4
+ %3 = getelementptr [131072 x i32], [131072 x i32]* @dst, i64 0, i64 %0
store i32 %2, i32* %3, align 4
ret void
; LINUX-64-STATIC-LABEL: big00:
@@ -4194,7 +4194,7 @@ entry:
define void @big01(i64 %i) nounwind {
entry:
%.sum = add i64 %i, 65536
- %0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %.sum
+ %0 = getelementptr [131072 x i32], [131072 x i32]* @dst, i64 0, i64 %.sum
store i32* %0, i32** @ptr, align 8
ret void
; LINUX-64-STATIC-LABEL: big01:
@@ -4270,11 +4270,11 @@ entry:
define void @big02(i64 %i) nounwind {
entry:
- %0 = load i32** @ptr, align 8
+ %0 = load i32*, i32** @ptr, align 8
%1 = add i64 %i, 65536
- %2 = getelementptr [131072 x i32]* @src, i64 0, i64 %1
- %3 = load i32* %2, align 4
- %4 = getelementptr i32* %0, i64 %1
+ %2 = getelementptr [131072 x i32], [131072 x i32]* @src, i64 0, i64 %1
+ %3 = load i32, i32* %2, align 4
+ %4 = getelementptr i32, i32* %0, i64 %1
store i32 %3, i32* %4, align 4
ret void
; LINUX-64-STATIC-LABEL: big02:
@@ -4361,9 +4361,9 @@ entry:
define void @big03(i64 %i) nounwind {
entry:
%0 = add i64 %i, 65536
- %1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
- %2 = load i32* %1, align 4
- %3 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @dsrc, i64 0, i64 %0
+ %2 = load i32, i32* %1, align 4
+ %3 = getelementptr [131072 x i32], [131072 x i32]* @ddst, i64 0, i64 %0
store i32 %2, i32* %3, align 4
ret void
; LINUX-64-STATIC-LABEL: big03:
@@ -4436,7 +4436,7 @@ entry:
define void @big04(i64 %i) nounwind {
entry:
%.sum = add i64 %i, 65536
- %0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %.sum
+ %0 = getelementptr [131072 x i32], [131072 x i32]* @ddst, i64 0, i64 %.sum
store i32* %0, i32** @dptr, align 8
ret void
; LINUX-64-STATIC-LABEL: big04:
@@ -4505,11 +4505,11 @@ entry:
define void @big05(i64 %i) nounwind {
entry:
- %0 = load i32** @dptr, align 8
+ %0 = load i32*, i32** @dptr, align 8
%1 = add i64 %i, 65536
- %2 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %1
- %3 = load i32* %2, align 4
- %4 = getelementptr i32* %0, i64 %1
+ %2 = getelementptr [131072 x i32], [131072 x i32]* @dsrc, i64 0, i64 %1
+ %3 = load i32, i32* %2, align 4
+ %4 = getelementptr i32, i32* %0, i64 %1
store i32 %3, i32* %4, align 4
ret void
; LINUX-64-STATIC-LABEL: big05:
@@ -4589,9 +4589,9 @@ entry:
define void @big06(i64 %i) nounwind {
entry:
%0 = add i64 %i, 65536
- %1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
- %2 = load i32* %1, align 4
- %3 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @lsrc, i64 0, i64 %0
+ %2 = load i32, i32* %1, align 4
+ %3 = getelementptr [131072 x i32], [131072 x i32]* @ldst, i64 0, i64 %0
store i32 %2, i32* %3, align 4
ret void
; LINUX-64-STATIC-LABEL: big06:
@@ -4664,7 +4664,7 @@ entry:
define void @big07(i64 %i) nounwind {
entry:
%.sum = add i64 %i, 65536
- %0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %.sum
+ %0 = getelementptr [131072 x i32], [131072 x i32]* @ldst, i64 0, i64 %.sum
store i32* %0, i32** @lptr, align 8
ret void
; LINUX-64-STATIC-LABEL: big07:
@@ -4732,11 +4732,11 @@ entry:
define void @big08(i64 %i) nounwind {
entry:
- %0 = load i32** @lptr, align 8
+ %0 = load i32*, i32** @lptr, align 8
%1 = add i64 %i, 65536
- %2 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %1
- %3 = load i32* %2, align 4
- %4 = getelementptr i32* %0, i64 %1
+ %2 = getelementptr [131072 x i32], [131072 x i32]* @lsrc, i64 0, i64 %1
+ %3 = load i32, i32* %2, align 4
+ %4 = getelementptr i32, i32* %0, i64 %1
store i32 %3, i32* %4, align 4
ret void
; LINUX-64-STATIC-LABEL: big08:
@@ -5519,7 +5519,7 @@ entry:
define i8* @har02() nounwind {
entry:
- %0 = load i32** @ptr, align 8
+ %0 = load i32*, i32** @ptr, align 8
%1 = bitcast i32* %0 to i8*
ret i8* %1
; LINUX-64-STATIC-LABEL: har02:
@@ -5668,7 +5668,7 @@ entry:
define i8* @har05() nounwind {
entry:
- %0 = load i32** @dptr, align 8
+ %0 = load i32*, i32** @dptr, align 8
%1 = bitcast i32* %0 to i8*
ret i8* %1
; LINUX-64-STATIC-LABEL: har05:
@@ -5812,7 +5812,7 @@ entry:
define i8* @har08() nounwind {
entry:
- %0 = load i32** @lptr, align 8
+ %0 = load i32*, i32** @lptr, align 8
%1 = bitcast i32* %0 to i8*
ret i8* %1
; LINUX-64-STATIC-LABEL: har08:
@@ -5861,7 +5861,7 @@ entry:
define i8* @bat00() nounwind {
entry:
- ret i8* bitcast (i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16) to i8*)
+ ret i8* bitcast (i32* getelementptr ([131072 x i32], [131072 x i32]* @src, i32 0, i64 16) to i8*)
; LINUX-64-STATIC-LABEL: bat00:
; LINUX-64-STATIC: movl $src+64, %eax
; LINUX-64-STATIC: ret
@@ -5914,7 +5914,7 @@ entry:
define i8* @bxt00() nounwind {
entry:
- ret i8* bitcast (i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16) to i8*)
+ ret i8* bitcast (i32* getelementptr ([32 x i32], [32 x i32]* @xsrc, i32 0, i64 16) to i8*)
; LINUX-64-STATIC-LABEL: bxt00:
; LINUX-64-STATIC: movl $xsrc+64, %eax
; LINUX-64-STATIC: ret
@@ -5967,7 +5967,7 @@ entry:
define i8* @bat01() nounwind {
entry:
- ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16) to i8*)
+ ret i8* bitcast (i32* getelementptr ([131072 x i32], [131072 x i32]* @dst, i32 0, i64 16) to i8*)
; LINUX-64-STATIC-LABEL: bat01:
; LINUX-64-STATIC: movl $dst+64, %eax
; LINUX-64-STATIC: ret
@@ -6020,7 +6020,7 @@ entry:
define i8* @bxt01() nounwind {
entry:
- ret i8* bitcast (i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16) to i8*)
+ ret i8* bitcast (i32* getelementptr ([32 x i32], [32 x i32]* @xdst, i32 0, i64 16) to i8*)
; LINUX-64-STATIC-LABEL: bxt01:
; LINUX-64-STATIC: movl $xdst+64, %eax
; LINUX-64-STATIC: ret
@@ -6073,8 +6073,8 @@ entry:
define i8* @bat02() nounwind {
entry:
- %0 = load i32** @ptr, align 8
- %1 = getelementptr i32* %0, i64 16
+ %0 = load i32*, i32** @ptr, align 8
+ %1 = getelementptr i32, i32* %0, i64 16
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: bat02:
@@ -6139,7 +6139,7 @@ entry:
define i8* @bat03() nounwind {
entry:
- ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16) to i8*)
+ ret i8* bitcast (i32* getelementptr ([131072 x i32], [131072 x i32]* @dsrc, i32 0, i64 16) to i8*)
; LINUX-64-STATIC-LABEL: bat03:
; LINUX-64-STATIC: movl $dsrc+64, %eax
; LINUX-64-STATIC: ret
@@ -6187,7 +6187,7 @@ entry:
define i8* @bat04() nounwind {
entry:
- ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16) to i8*)
+ ret i8* bitcast (i32* getelementptr ([131072 x i32], [131072 x i32]* @ddst, i32 0, i64 16) to i8*)
; LINUX-64-STATIC-LABEL: bat04:
; LINUX-64-STATIC: movl $ddst+64, %eax
; LINUX-64-STATIC: ret
@@ -6235,8 +6235,8 @@ entry:
define i8* @bat05() nounwind {
entry:
- %0 = load i32** @dptr, align 8
- %1 = getelementptr i32* %0, i64 16
+ %0 = load i32*, i32** @dptr, align 8
+ %1 = getelementptr i32, i32* %0, i64 16
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: bat05:
@@ -6296,7 +6296,7 @@ entry:
define i8* @bat06() nounwind {
entry:
- ret i8* bitcast (i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16) to i8*)
+ ret i8* bitcast (i32* getelementptr ([131072 x i32], [131072 x i32]* @lsrc, i32 0, i64 16) to i8*)
; LINUX-64-STATIC-LABEL: bat06:
; LINUX-64-STATIC: movl $lsrc+64, %eax
; LINUX-64-STATIC: ret
@@ -6343,7 +6343,7 @@ entry:
define i8* @bat07() nounwind {
entry:
- ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16) to i8*)
+ ret i8* bitcast (i32* getelementptr ([131072 x i32], [131072 x i32]* @ldst, i32 0, i64 16) to i8*)
; LINUX-64-STATIC-LABEL: bat07:
; LINUX-64-STATIC: movl $ldst+64, %eax
; LINUX-64-STATIC: ret
@@ -6390,8 +6390,8 @@ entry:
define i8* @bat08() nounwind {
entry:
- %0 = load i32** @lptr, align 8
- %1 = getelementptr i32* %0, i64 16
+ %0 = load i32*, i32** @lptr, align 8
+ %1 = getelementptr i32, i32* %0, i64 16
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: bat08:
@@ -6450,7 +6450,7 @@ entry:
define i8* @bam00() nounwind {
entry:
- ret i8* bitcast (i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536) to i8*)
+ ret i8* bitcast (i32* getelementptr ([131072 x i32], [131072 x i32]* @src, i32 0, i64 65536) to i8*)
; LINUX-64-STATIC-LABEL: bam00:
; LINUX-64-STATIC: movl $src+262144, %eax
; LINUX-64-STATIC: ret
@@ -6503,7 +6503,7 @@ entry:
define i8* @bam01() nounwind {
entry:
- ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536) to i8*)
+ ret i8* bitcast (i32* getelementptr ([131072 x i32], [131072 x i32]* @dst, i32 0, i64 65536) to i8*)
; LINUX-64-STATIC-LABEL: bam01:
; LINUX-64-STATIC: movl $dst+262144, %eax
; LINUX-64-STATIC: ret
@@ -6556,7 +6556,7 @@ entry:
define i8* @bxm01() nounwind {
entry:
- ret i8* bitcast (i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 65536) to i8*)
+ ret i8* bitcast (i32* getelementptr ([32 x i32], [32 x i32]* @xdst, i32 0, i64 65536) to i8*)
; LINUX-64-STATIC-LABEL: bxm01:
; LINUX-64-STATIC: movl $xdst+262144, %eax
; LINUX-64-STATIC: ret
@@ -6609,8 +6609,8 @@ entry:
define i8* @bam02() nounwind {
entry:
- %0 = load i32** @ptr, align 8
- %1 = getelementptr i32* %0, i64 65536
+ %0 = load i32*, i32** @ptr, align 8
+ %1 = getelementptr i32, i32* %0, i64 65536
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: bam02:
@@ -6675,7 +6675,7 @@ entry:
define i8* @bam03() nounwind {
entry:
- ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536) to i8*)
+ ret i8* bitcast (i32* getelementptr ([131072 x i32], [131072 x i32]* @dsrc, i32 0, i64 65536) to i8*)
; LINUX-64-STATIC-LABEL: bam03:
; LINUX-64-STATIC: movl $dsrc+262144, %eax
; LINUX-64-STATIC: ret
@@ -6723,7 +6723,7 @@ entry:
define i8* @bam04() nounwind {
entry:
- ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536) to i8*)
+ ret i8* bitcast (i32* getelementptr ([131072 x i32], [131072 x i32]* @ddst, i32 0, i64 65536) to i8*)
; LINUX-64-STATIC-LABEL: bam04:
; LINUX-64-STATIC: movl $ddst+262144, %eax
; LINUX-64-STATIC: ret
@@ -6771,8 +6771,8 @@ entry:
define i8* @bam05() nounwind {
entry:
- %0 = load i32** @dptr, align 8
- %1 = getelementptr i32* %0, i64 65536
+ %0 = load i32*, i32** @dptr, align 8
+ %1 = getelementptr i32, i32* %0, i64 65536
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: bam05:
@@ -6832,7 +6832,7 @@ entry:
define i8* @bam06() nounwind {
entry:
- ret i8* bitcast (i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536) to i8*)
+ ret i8* bitcast (i32* getelementptr ([131072 x i32], [131072 x i32]* @lsrc, i32 0, i64 65536) to i8*)
; LINUX-64-STATIC-LABEL: bam06:
; LINUX-64-STATIC: movl $lsrc+262144, %eax
; LINUX-64-STATIC: ret
@@ -6879,7 +6879,7 @@ entry:
define i8* @bam07() nounwind {
entry:
- ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536) to i8*)
+ ret i8* bitcast (i32* getelementptr ([131072 x i32], [131072 x i32]* @ldst, i32 0, i64 65536) to i8*)
; LINUX-64-STATIC-LABEL: bam07:
; LINUX-64-STATIC: movl $ldst+262144, %eax
; LINUX-64-STATIC: ret
@@ -6926,8 +6926,8 @@ entry:
define i8* @bam08() nounwind {
entry:
- %0 = load i32** @lptr, align 8
- %1 = getelementptr i32* %0, i64 65536
+ %0 = load i32*, i32** @lptr, align 8
+ %1 = getelementptr i32, i32* %0, i64 65536
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: bam08:
@@ -6987,7 +6987,7 @@ entry:
define i8* @cat00(i64 %i) nounwind {
entry:
%0 = add i64 %i, 16
- %1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @src, i64 0, i64 %0
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: cat00:
@@ -7048,7 +7048,7 @@ entry:
define i8* @cxt00(i64 %i) nounwind {
entry:
%0 = add i64 %i, 16
- %1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %0
+ %1 = getelementptr [32 x i32], [32 x i32]* @xsrc, i64 0, i64 %0
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: cxt00:
@@ -7109,7 +7109,7 @@ entry:
define i8* @cat01(i64 %i) nounwind {
entry:
%0 = add i64 %i, 16
- %1 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @dst, i64 0, i64 %0
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: cat01:
@@ -7170,7 +7170,7 @@ entry:
define i8* @cxt01(i64 %i) nounwind {
entry:
%0 = add i64 %i, 16
- %1 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
+ %1 = getelementptr [32 x i32], [32 x i32]* @xdst, i64 0, i64 %0
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: cxt01:
@@ -7230,9 +7230,9 @@ entry:
define i8* @cat02(i64 %i) nounwind {
entry:
- %0 = load i32** @ptr, align 8
+ %0 = load i32*, i32** @ptr, align 8
%1 = add i64 %i, 16
- %2 = getelementptr i32* %0, i64 %1
+ %2 = getelementptr i32, i32* %0, i64 %1
%3 = bitcast i32* %2 to i8*
ret i8* %3
; LINUX-64-STATIC-LABEL: cat02:
@@ -7303,7 +7303,7 @@ entry:
define i8* @cat03(i64 %i) nounwind {
entry:
%0 = add i64 %i, 16
- %1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @dsrc, i64 0, i64 %0
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: cat03:
@@ -7362,7 +7362,7 @@ entry:
define i8* @cat04(i64 %i) nounwind {
entry:
%0 = add i64 %i, 16
- %1 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @ddst, i64 0, i64 %0
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: cat04:
@@ -7420,9 +7420,9 @@ entry:
define i8* @cat05(i64 %i) nounwind {
entry:
- %0 = load i32** @dptr, align 8
+ %0 = load i32*, i32** @dptr, align 8
%1 = add i64 %i, 16
- %2 = getelementptr i32* %0, i64 %1
+ %2 = getelementptr i32, i32* %0, i64 %1
%3 = bitcast i32* %2 to i8*
ret i8* %3
; LINUX-64-STATIC-LABEL: cat05:
@@ -7488,7 +7488,7 @@ entry:
define i8* @cat06(i64 %i) nounwind {
entry:
%0 = add i64 %i, 16
- %1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @lsrc, i64 0, i64 %0
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: cat06:
@@ -7547,7 +7547,7 @@ entry:
define i8* @cat07(i64 %i) nounwind {
entry:
%0 = add i64 %i, 16
- %1 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @ldst, i64 0, i64 %0
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: cat07:
@@ -7605,9 +7605,9 @@ entry:
define i8* @cat08(i64 %i) nounwind {
entry:
- %0 = load i32** @lptr, align 8
+ %0 = load i32*, i32** @lptr, align 8
%1 = add i64 %i, 16
- %2 = getelementptr i32* %0, i64 %1
+ %2 = getelementptr i32, i32* %0, i64 %1
%3 = bitcast i32* %2 to i8*
ret i8* %3
; LINUX-64-STATIC-LABEL: cat08:
@@ -7672,7 +7672,7 @@ entry:
define i8* @cam00(i64 %i) nounwind {
entry:
%0 = add i64 %i, 65536
- %1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @src, i64 0, i64 %0
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: cam00:
@@ -7733,7 +7733,7 @@ entry:
define i8* @cxm00(i64 %i) nounwind {
entry:
%0 = add i64 %i, 65536
- %1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %0
+ %1 = getelementptr [32 x i32], [32 x i32]* @xsrc, i64 0, i64 %0
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: cxm00:
@@ -7794,7 +7794,7 @@ entry:
define i8* @cam01(i64 %i) nounwind {
entry:
%0 = add i64 %i, 65536
- %1 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @dst, i64 0, i64 %0
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: cam01:
@@ -7855,7 +7855,7 @@ entry:
define i8* @cxm01(i64 %i) nounwind {
entry:
%0 = add i64 %i, 65536
- %1 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
+ %1 = getelementptr [32 x i32], [32 x i32]* @xdst, i64 0, i64 %0
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: cxm01:
@@ -7915,9 +7915,9 @@ entry:
define i8* @cam02(i64 %i) nounwind {
entry:
- %0 = load i32** @ptr, align 8
+ %0 = load i32*, i32** @ptr, align 8
%1 = add i64 %i, 65536
- %2 = getelementptr i32* %0, i64 %1
+ %2 = getelementptr i32, i32* %0, i64 %1
%3 = bitcast i32* %2 to i8*
ret i8* %3
; LINUX-64-STATIC-LABEL: cam02:
@@ -7988,7 +7988,7 @@ entry:
define i8* @cam03(i64 %i) nounwind {
entry:
%0 = add i64 %i, 65536
- %1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @dsrc, i64 0, i64 %0
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: cam03:
@@ -8047,7 +8047,7 @@ entry:
define i8* @cam04(i64 %i) nounwind {
entry:
%0 = add i64 %i, 65536
- %1 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @ddst, i64 0, i64 %0
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: cam04:
@@ -8105,9 +8105,9 @@ entry:
define i8* @cam05(i64 %i) nounwind {
entry:
- %0 = load i32** @dptr, align 8
+ %0 = load i32*, i32** @dptr, align 8
%1 = add i64 %i, 65536
- %2 = getelementptr i32* %0, i64 %1
+ %2 = getelementptr i32, i32* %0, i64 %1
%3 = bitcast i32* %2 to i8*
ret i8* %3
; LINUX-64-STATIC-LABEL: cam05:
@@ -8173,7 +8173,7 @@ entry:
define i8* @cam06(i64 %i) nounwind {
entry:
%0 = add i64 %i, 65536
- %1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @lsrc, i64 0, i64 %0
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: cam06:
@@ -8232,7 +8232,7 @@ entry:
define i8* @cam07(i64 %i) nounwind {
entry:
%0 = add i64 %i, 65536
- %1 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
+ %1 = getelementptr [131072 x i32], [131072 x i32]* @ldst, i64 0, i64 %0
%2 = bitcast i32* %1 to i8*
ret i8* %2
; LINUX-64-STATIC-LABEL: cam07:
@@ -8290,9 +8290,9 @@ entry:
define i8* @cam08(i64 %i) nounwind {
entry:
- %0 = load i32** @lptr, align 8
+ %0 = load i32*, i32** @lptr, align 8
%1 = add i64 %i, 65536
- %2 = getelementptr i32* %0, i64 %1
+ %2 = getelementptr i32, i32* %0, i64 %1
%3 = bitcast i32* %2 to i8*
ret i8* %3
; LINUX-64-STATIC-LABEL: cam08:
@@ -9180,9 +9180,9 @@ entry:
define void @icaller() nounwind {
entry:
- %0 = load void ()** @ifunc, align 8
+ %0 = load void ()*, void ()** @ifunc, align 8
call void %0() nounwind
- %1 = load void ()** @ifunc, align 8
+ %1 = load void ()*, void ()** @ifunc, align 8
call void %1() nounwind
ret void
; LINUX-64-STATIC-LABEL: icaller:
@@ -9270,9 +9270,9 @@ entry:
define void @dicaller() nounwind {
entry:
- %0 = load void ()** @difunc, align 8
+ %0 = load void ()*, void ()** @difunc, align 8
call void %0() nounwind
- %1 = load void ()** @difunc, align 8
+ %1 = load void ()*, void ()** @difunc, align 8
call void %1() nounwind
ret void
; LINUX-64-STATIC-LABEL: dicaller:
@@ -9353,9 +9353,9 @@ entry:
define void @licaller() nounwind {
entry:
- %0 = load void ()** @lifunc, align 8
+ %0 = load void ()*, void ()** @lifunc, align 8
call void %0() nounwind
- %1 = load void ()** @lifunc, align 8
+ %1 = load void ()*, void ()** @lifunc, align 8
call void %1() nounwind
ret void
; LINUX-64-STATIC-LABEL: licaller:
@@ -9435,9 +9435,9 @@ entry:
define void @itailcaller() nounwind {
entry:
- %0 = load void ()** @ifunc, align 8
+ %0 = load void ()*, void ()** @ifunc, align 8
call void %0() nounwind
- %1 = load void ()** @ifunc, align 8
+ %1 = load void ()*, void ()** @ifunc, align 8
call void %1() nounwind
ret void
; LINUX-64-STATIC-LABEL: itailcaller:
@@ -9525,7 +9525,7 @@ entry:
define void @ditailcaller() nounwind {
entry:
- %0 = load void ()** @difunc, align 8
+ %0 = load void ()*, void ()** @difunc, align 8
call void %0() nounwind
ret void
; LINUX-64-STATIC-LABEL: ditailcaller:
@@ -9593,7 +9593,7 @@ entry:
define void @litailcaller() nounwind {
entry:
- %0 = load void ()** @lifunc, align 8
+ %0 = load void ()*, void ()** @lifunc, align 8
call void %0() nounwind
ret void
; LINUX-64-STATIC-LABEL: litailcaller:
diff --git a/test/CodeGen/X86/add-of-carry.ll b/test/CodeGen/X86/add-of-carry.ll
index 9c24be4289ff..44b587af3aaa 100644
--- a/test/CodeGen/X86/add-of-carry.ll
+++ b/test/CodeGen/X86/add-of-carry.ll
@@ -4,43 +4,26 @@
define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp {
entry:
; CHECK-LABEL: test1:
-; CHECK: cmpl %ecx, %eax
-; CHECK-NOT: addl
-; CHECK: adcl $0, %eax
- %add4 = add i32 %x, %sum
- %cmp = icmp ult i32 %add4, %x
- %inc = zext i1 %cmp to i32
- %z.0 = add i32 %add4, %inc
- ret i32 %z.0
-}
-
-; Instcombine transforms test1 into test2:
-; CHECK-LABEL: test2:
; CHECK: movl
; CHECK-NEXT: addl
; CHECK-NEXT: adcl $0
; CHECK-NEXT: ret
-define i32 @test2(i32 %sum, i32 %x) nounwind readnone ssp {
-entry:
- %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %sum)
- %0 = extractvalue { i32, i1 } %uadd, 0
- %cmp = extractvalue { i32, i1 } %uadd, 1
+ %add4 = add i32 %x, %sum
+ %cmp = icmp ult i32 %add4, %x
%inc = zext i1 %cmp to i32
- %z.0 = add i32 %0, %inc
+ %z.0 = add i32 %add4, %inc
ret i32 %z.0
}
; <rdar://problem/12579915>
-define i32 @test3(i32 %x, i32 %y, i32 %res) nounwind uwtable readnone ssp {
+define i32 @test2(i32 %x, i32 %y, i32 %res) nounwind uwtable readnone ssp {
entry:
%cmp = icmp ugt i32 %x, %y
%dec = sext i1 %cmp to i32
%dec.res = add nsw i32 %dec, %res
ret i32 %dec.res
-; CHECK-LABEL: test3:
+; CHECK-LABEL: test2:
; CHECK: cmpl
; CHECK: sbbl
; CHECK: ret
}
-
-declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
diff --git a/test/CodeGen/X86/add32ri8.ll b/test/CodeGen/X86/add32ri8.ll
new file mode 100644
index 000000000000..a74c37280cdc
--- /dev/null
+++ b/test/CodeGen/X86/add32ri8.ll
@@ -0,0 +1,10 @@
+; RUN: llc -mtriple=x86_64-linux -fast-isel -show-mc-encoding < %s | FileCheck %s
+
+; pr22854
+; CHECK: addl $42, %esi # encoding: [0x83,0xc6,0x2a]
+
+define void @foo(i32 *%s, i32 %x) {
+ %y = add nsw i32 %x, 42
+ store i32 %y, i32* %s, align 4
+ ret void
+}
diff --git a/test/CodeGen/X86/addr-mode-matcher.ll b/test/CodeGen/X86/addr-mode-matcher.ll
index d5920910f289..83d6858bda1c 100644
--- a/test/CodeGen/X86/addr-mode-matcher.ll
+++ b/test/CodeGen/X86/addr-mode-matcher.ll
@@ -24,16 +24,16 @@ bb1692:
; %load1 = (load (and (shl %xor, 2), 1020))
%tmp1701 = shl i32 %xor, 2
%tmp1702 = and i32 %tmp1701, 1020
- %tmp1703 = getelementptr inbounds [1028 x i8]* null, i32 0, i32 %tmp1702
+ %tmp1703 = getelementptr inbounds [1028 x i8], [1028 x i8]* null, i32 0, i32 %tmp1702
%tmp1704 = bitcast i8* %tmp1703 to i32*
- %load1 = load i32* %tmp1704, align 4
+ %load1 = load i32, i32* %tmp1704, align 4
; %load2 = (load (shl (and %xor, 255), 2))
%tmp1698 = and i32 %xor, 255
%tmp1706 = shl i32 %tmp1698, 2
- %tmp1707 = getelementptr inbounds [1028 x i8]* null, i32 0, i32 %tmp1706
+ %tmp1707 = getelementptr inbounds [1028 x i8], [1028 x i8]* null, i32 0, i32 %tmp1706
%tmp1708 = bitcast i8* %tmp1707 to i32*
- %load2 = load i32* %tmp1708, align 4
+ %load2 = load i32, i32* %tmp1708, align 4
%tmp1710 = or i32 %load2, %a
@@ -43,7 +43,7 @@ bb1692:
; references in MatchScope and RecordedNodes stale.
%tmp1711 = xor i32 %load1, %tmp1710
- %tmp1744 = getelementptr inbounds [256 x i32]* null, i32 0, i32 %tmp1711
+ %tmp1744 = getelementptr inbounds [256 x i32], [256 x i32]* null, i32 0, i32 %tmp1711
store i32 0, i32* %tmp1744, align 4
%tmp1745 = add i32 %tmp1694, 1
indirectbr i8* undef, [label %bb1756, label %bb1692]
diff --git a/test/CodeGen/X86/address-type-promotion-constantexpr.ll b/test/CodeGen/X86/address-type-promotion-constantexpr.ll
index 32f29bd3cad9..fe115b9295da 100644
--- a/test/CodeGen/X86/address-type-promotion-constantexpr.ll
+++ b/test/CodeGen/X86/address-type-promotion-constantexpr.ll
@@ -10,7 +10,7 @@
; CHECK: xor %eax, %eax
define i32 @main() {
entry:
- %foo = load i8* getelementptr ([2 x i8]* @b, i64 0, i64 sext (i8 or (i8 zext (i1 icmp eq (i32* getelementptr inbounds ([2 x i32]* @c, i64 0, i64 1), i32* @a) to i8), i8 1) to i64)), align 1
+ %foo = load i8, i8* getelementptr ([2 x i8], [2 x i8]* @b, i64 0, i64 sext (i8 or (i8 zext (i1 icmp eq (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i8), i8 1) to i64)), align 1
ret i32 0
}
diff --git a/test/CodeGen/X86/aliases.ll b/test/CodeGen/X86/aliases.ll
index 82a8e482b7fa..3f19a064323c 100644
--- a/test/CodeGen/X86/aliases.ll
+++ b/test/CodeGen/X86/aliases.ll
@@ -49,7 +49,7 @@ define i32 @foo_f() {
@bar_p = protected alias i32* @bar
; CHECK-DAG: test2 = bar+4
-@test2 = alias getelementptr(i32 *@bar, i32 1)
+@test2 = alias getelementptr(i32, i32 *@bar, i32 1)
; CHECK-DAG: test3 = 42
@test3 = alias inttoptr(i32 42 to i32*)
@@ -64,12 +64,12 @@ define i32 @foo_f() {
; CHECK-DAG: .globl test
define i32 @test() {
entry:
- %tmp = load i32* @foo1
- %tmp1 = load i32* @foo2
- %tmp0 = load i32* @bar_i
+ %tmp = load i32, i32* @foo1
+ %tmp1 = load i32, i32* @foo2
+ %tmp0 = load i32, i32* @bar_i
%tmp2 = call i32 @foo_f()
%tmp3 = add i32 %tmp, %tmp2
- %tmp4 = call %FunTy* @bar_f()
+ %tmp4 = call i32 @bar_f()
%tmp5 = add i32 %tmp3, %tmp4
%tmp6 = add i32 %tmp1, %tmp5
%tmp7 = add i32 %tmp6, %tmp0
diff --git a/test/CodeGen/X86/aligned-variadic.ll b/test/CodeGen/X86/aligned-variadic.ll
index e2155fe4b373..294159220626 100644
--- a/test/CodeGen/X86/aligned-variadic.ll
+++ b/test/CodeGen/X86/aligned-variadic.ll
@@ -8,12 +8,12 @@
define void @bar(%struct.Baz* byval nocapture readnone align 8 %x, ...) {
entry:
%va = alloca [1 x %struct.__va_list_tag], align 16
- %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag]* %va, i64 0, i64 0
+ %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %va, i64 0, i64 0
%arraydecay1 = bitcast [1 x %struct.__va_list_tag]* %va to i8*
call void @llvm.va_start(i8* %arraydecay1)
- %overflow_arg_area_p = getelementptr inbounds [1 x %struct.__va_list_tag]* %va, i64 0, i64 0, i32 2
- %overflow_arg_area = load i8** %overflow_arg_area_p, align 8
- %overflow_arg_area.next = getelementptr i8* %overflow_arg_area, i64 24
+ %overflow_arg_area_p = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %va, i64 0, i64 0, i32 2
+ %overflow_arg_area = load i8*, i8** %overflow_arg_area_p, align 8
+ %overflow_arg_area.next = getelementptr i8, i8* %overflow_arg_area, i64 24
store i8* %overflow_arg_area.next, i8** %overflow_arg_area_p, align 8
; X32: leal 68(%esp), [[REG:%.*]]
; X32: movl [[REG]], 16(%esp)
diff --git a/test/CodeGen/X86/alignment-2.ll b/test/CodeGen/X86/alignment-2.ll
index 1f9e85cbb763..a38a3626702a 100644
--- a/test/CodeGen/X86/alignment-2.ll
+++ b/test/CodeGen/X86/alignment-2.ll
@@ -23,7 +23,7 @@ bb:
; CHECK-NOT: movaps {{[0-9]*}}(%{{[a-z]*}}), {{%xmm[0-9]}}
%myopt = alloca %struct.printQueryOpt, align 4
%tmp = bitcast %struct.printQueryOpt* %myopt to i8*
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* bitcast (%struct.printQueryOpt* getelementptr inbounds (%struct._psqlSettings* @pset, i32 0, i32 4) to i8*), i32 76, i32 4, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* bitcast (%struct.printQueryOpt* getelementptr inbounds (%struct._psqlSettings, %struct._psqlSettings* @pset, i32 0, i32 4) to i8*), i32 76, i32 4, i1 false)
ret i8 0
}
diff --git a/test/CodeGen/X86/and-load-fold.ll b/test/CodeGen/X86/and-load-fold.ll
index d6f68b3bc433..29ab3242ce26 100644
--- a/test/CodeGen/X86/and-load-fold.ll
+++ b/test/CodeGen/X86/and-load-fold.ll
@@ -8,7 +8,7 @@ define i8 @foo(<4 x i8>* %V) {
; CHECK: ret
entry:
%Vp = bitcast <4 x i8>* %V to <3 x i8>*
- %V3i8 = load <3 x i8>* %Vp, align 4
+ %V3i8 = load <3 x i8>, <3 x i8>* %Vp, align 4
%0 = and <3 x i8> %V3i8, <i8 undef, i8 undef, i8 95>
%1 = extractelement <3 x i8> %0, i64 2
ret i8 %1
diff --git a/test/CodeGen/X86/and-or-fold.ll b/test/CodeGen/X86/and-or-fold.ll
index 836b5f1551c7..ec39522e6b10 100644
--- a/test/CodeGen/X86/and-or-fold.ll
+++ b/test/CodeGen/X86/and-or-fold.ll
@@ -21,6 +21,6 @@ entry:
%tmp1 = and i64 %x, 123127
%tmp2 = or i64 %tmp1, 3
ret i64 %tmp2
-; DARWIN-OPT: andq $123124
+; DARWIN-OPT: andl $123124
; DARWIN-OPT-NEXT: leaq 3
}
diff --git a/test/CodeGen/X86/and-su.ll b/test/CodeGen/X86/and-su.ll
index 70c24615a7e2..bdbab1535016 100644
--- a/test/CodeGen/X86/and-su.ll
+++ b/test/CodeGen/X86/and-su.ll
@@ -6,7 +6,7 @@ define fastcc i32 @foo(i32* %p) nounwind {
; CHECK-LABEL: foo:
; CHECK: andl $10, %eax
; CHECK: je
- %t0 = load i32* %p
+ %t0 = load i32, i32* %p
%t2 = and i32 %t0, 10
%t3 = icmp ne i32 %t2, 0
br i1 %t3, label %bb63, label %bb76
diff --git a/test/CodeGen/X86/andimm8.ll b/test/CodeGen/X86/andimm8.ll
index 640237d0b504..d9e676aa66c5 100644
--- a/test/CodeGen/X86/andimm8.ll
+++ b/test/CodeGen/X86/andimm8.ll
@@ -17,3 +17,15 @@ define void @foo(i64 %zed, i64* %x) nounwind {
store i64 %t2, i64* %x, align 8
ret void
}
+
+define i64 @bar(i64 %zed) nounwind {
+; CHECK: andl $42, %edi # encoding: [0x83,0xe7,0x2a]
+ %t1 = and i64 %zed, 42
+ ret i64 %t1
+}
+
+define i64 @baz(i64 %zed) nounwind {
+; CHECK: andl $2147483647, %edi # encoding: [0x81,0xe7,0xff,0xff,0xff,0x7f]
+ %t1 = and i64 %zed, 2147483647
+ ret i64 %t1
+}
diff --git a/test/CodeGen/X86/anyregcc-crash.ll b/test/CodeGen/X86/anyregcc-crash.ll
index 3abe3d149a11..a7c104e3ba4c 100644
--- a/test/CodeGen/X86/anyregcc-crash.ll
+++ b/test/CodeGen/X86/anyregcc-crash.ll
@@ -7,7 +7,7 @@ define i64 @anyreglimit(i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6,
i64 %v7, i64 %v8, i64 %v9, i64 %v10, i64 %v11, i64 %v12,
i64 %v13, i64 %v14, i64 %v15, i64 %v16) {
entry:
- %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 16,
+ %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 16,
i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6,
i64 %v7, i64 %v8, i64 %v9, i64 %v10, i64 %v11, i64 %v12,
i64 %v13, i64 %v14, i64 %v15, i64 %v16)
diff --git a/test/CodeGen/X86/anyregcc.ll b/test/CodeGen/X86/anyregcc.ll
index 98ba17c74c82..129aadfae88d 100644
--- a/test/CodeGen/X86/anyregcc.ll
+++ b/test/CodeGen/X86/anyregcc.ll
@@ -60,7 +60,7 @@
; CHECK-NEXT: .long 3
define i64 @test() nounwind ssp uwtable {
entry:
- call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 0, i32 15, i8* null, i32 2, i32 1, i32 2, i64 3)
+ call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 0, i32 15, i8* null, i32 2, i32 1, i32 2, i64 3)
ret i64 0
}
@@ -82,7 +82,7 @@ entry:
define i64 @property_access1(i8* %obj) nounwind ssp uwtable {
entry:
%f = inttoptr i64 12297829382473034410 to i8*
- %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 1, i32 15, i8* %f, i32 1, i8* %obj)
+ %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 1, i32 15, i8* %f, i32 1, i8* %obj)
ret i64 %ret
}
@@ -105,7 +105,7 @@ define i64 @property_access2() nounwind ssp uwtable {
entry:
%obj = alloca i64, align 8
%f = inttoptr i64 12297829382473034410 to i8*
- %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 15, i8* %f, i32 1, i64* %obj)
+ %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 15, i8* %f, i32 1, i64* %obj)
ret i64 %ret
}
@@ -128,7 +128,7 @@ define i64 @property_access3() nounwind ssp uwtable {
entry:
%obj = alloca i64, align 8
%f = inttoptr i64 12297829382473034410 to i8*
- %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 3, i32 15, i8* %f, i32 0, i64* %obj)
+ %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 3, i32 15, i8* %f, i32 0, i64* %obj)
ret i64 %ret
}
@@ -210,7 +210,7 @@ entry:
define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
entry:
%f = inttoptr i64 12297829382473034410 to i8*
- %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 4, i32 15, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
+ %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 4, i32 15, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
ret i64 %ret
}
@@ -292,7 +292,7 @@ entry:
define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
entry:
%f = inttoptr i64 12297829382473034410 to i8*
- %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 15, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
+ %ret = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 15, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
ret i64 %ret
}
@@ -320,7 +320,7 @@ entry:
; CHECK-NEXT: .long 0
define i64 @patchpoint_spilldef(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
entry:
- %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2)
+ %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2)
tail call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind
ret i64 %result
}
@@ -360,7 +360,7 @@ entry:
define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
entry:
tail call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind
- %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 13, i32 15, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+ %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 13, i32 15, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
ret i64 %result
}
diff --git a/test/CodeGen/X86/asm-global-imm.ll b/test/CodeGen/X86/asm-global-imm.ll
index 9e79f6f78222..82610114ad34 100644
--- a/test/CodeGen/X86/asm-global-imm.ll
+++ b/test/CodeGen/X86/asm-global-imm.ll
@@ -21,6 +21,6 @@ define void @foo() {
define void @unknown_bootoption() {
entry:
- call void asm sideeffect "ud2\0A\09.word ${0:c}\0A\09.long ${1:c}\0A", "i,i,~{dirflag},~{fpsr},~{flags}"( i32 235, i8* getelementptr ([12 x i8]* @str, i32 0, i64 0) )
+ call void asm sideeffect "ud2\0A\09.word ${0:c}\0A\09.long ${1:c}\0A", "i,i,~{dirflag},~{fpsr},~{flags}"( i32 235, i8* getelementptr ([12 x i8], [12 x i8]* @str, i32 0, i64 0) )
ret void
}
diff --git a/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll
index 6237b66a5ea6..2e144f87c338 100644
--- a/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll
+++ b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll
@@ -36,34 +36,34 @@
define void @func() #0 {
entry:
store i32 0, i32* @sum, align 4
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
store i32 %0, i32* @i, align 4
br label %for.cond
for.cond: ; preds = %for.inc, %entry
- %1 = load i32* @i, align 4
- %2 = load i32* @b, align 4
+ %1 = load i32, i32* @i, align 4
+ %2 = load i32, i32* @b, align 4
%cmp = icmp slt i32 %1, %2
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
- %3 = load i32 (i32, i32, i32, i32, i32, i32, i32, i32)** @funcp, align 4
- %4 = load i32* @i, align 4
- %5 = load i32* @b, align 4
- %6 = load i32* @c, align 4
- %7 = load i32* @d, align 4
- %8 = load i32* @e, align 4
- %9 = load i32* @f, align 4
- %10 = load i32* @g, align 4
- %11 = load i32* @h, align 4
+ %3 = load i32 (i32, i32, i32, i32, i32, i32, i32, i32)*, i32 (i32, i32, i32, i32, i32, i32, i32, i32)** @funcp, align 4
+ %4 = load i32, i32* @i, align 4
+ %5 = load i32, i32* @b, align 4
+ %6 = load i32, i32* @c, align 4
+ %7 = load i32, i32* @d, align 4
+ %8 = load i32, i32* @e, align 4
+ %9 = load i32, i32* @f, align 4
+ %10 = load i32, i32* @g, align 4
+ %11 = load i32, i32* @h, align 4
%call = call i32 %3(i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11)
- %12 = load i32* @sum, align 4
+ %12 = load i32, i32* @sum, align 4
%add = add nsw i32 %12, %call
store i32 %add, i32* @sum, align 4
br label %for.inc
for.inc: ; preds = %for.body
- %13 = load i32* @i, align 4
+ %13 = load i32, i32* @i, align 4
%inc = add nsw i32 %13, 1
store i32 %inc, i32* @i, align 4
br label %for.cond
diff --git a/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll
index a196d8175aa9..e82626ced206 100644
--- a/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll
+++ b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll
@@ -42,43 +42,43 @@
define void @func() #0 {
entry:
store i32 0, i32* @sum, align 4
- %0 = load i32* @a, align 4
+ %0 = load i32, i32* @a, align 4
store i32 %0, i32* @i, align 4
br label %for.cond
for.cond: ; preds = %for.inc, %entry
- %1 = load i32* @i, align 4
- %2 = load i32* @b, align 4
+ %1 = load i32, i32* @i, align 4
+ %2 = load i32, i32* @b, align 4
%cmp = icmp slt i32 %1, %2
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
- %3 = load i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)** @funcp, align 8
- %4 = load i32* @a, align 4
- %5 = load i32* @i, align 4
- %6 = load i32* @i, align 4
+ %3 = load i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)*, i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)** @funcp, align 8
+ %4 = load i32, i32* @a, align 4
+ %5 = load i32, i32* @i, align 4
+ %6 = load i32, i32* @i, align 4
%mul = mul nsw i32 %6, 2
- %7 = load i32* @i, align 4
- %8 = load i32* @b, align 4
+ %7 = load i32, i32* @i, align 4
+ %8 = load i32, i32* @b, align 4
%div = sdiv i32 %7, %8
- %9 = load i32* @c, align 4
- %10 = load i32* @d, align 4
- %11 = load i32* @e, align 4
- %12 = load i32* @f, align 4
- %13 = load i32* @g, align 4
- %14 = load i32* @h, align 4
- %15 = load i32* @j, align 4
- %16 = load i32* @k, align 4
- %17 = load i32* @l, align 4
- %18 = load i32* @n, align 4
+ %9 = load i32, i32* @c, align 4
+ %10 = load i32, i32* @d, align 4
+ %11 = load i32, i32* @e, align 4
+ %12 = load i32, i32* @f, align 4
+ %13 = load i32, i32* @g, align 4
+ %14 = load i32, i32* @h, align 4
+ %15 = load i32, i32* @j, align 4
+ %16 = load i32, i32* @k, align 4
+ %17 = load i32, i32* @l, align 4
+ %18 = load i32, i32* @n, align 4
%call = call i32 %3(i32 %4, i32 %5, i32 %mul, i32 %div, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32 %14, i32 %15, i32 %16, i32 %17, i32 %18)
- %19 = load i32* @sum, align 4
+ %19 = load i32, i32* @sum, align 4
%add = add nsw i32 %19, %call
store i32 %add, i32* @sum, align 4
br label %for.inc
for.inc: ; preds = %for.body
- %20 = load i32* @i, align 4
+ %20 = load i32, i32* @i, align 4
%inc = add nsw i32 %20, 1
store i32 %inc, i32* @i, align 4
br label %for.cond
diff --git a/test/CodeGen/X86/atom-call-reg-indirect.ll b/test/CodeGen/X86/atom-call-reg-indirect.ll
index 48f2d4c11346..663b6f1eee51 100644
--- a/test/CodeGen/X86/atom-call-reg-indirect.ll
+++ b/test/CodeGen/X86/atom-call-reg-indirect.ll
@@ -14,8 +14,8 @@ define i32 @test1() #0 {
entry:
%call = tail call %class.A* @_Z3facv()
%0 = bitcast %class.A* %call to void (%class.A*)***
- %vtable = load void (%class.A*)*** %0, align 8
- %1 = load void (%class.A*)** %vtable, align 8
+ %vtable = load void (%class.A*)**, void (%class.A*)*** %0, align 8
+ %1 = load void (%class.A*)*, void (%class.A*)** %vtable, align 8
;ATOM32: movl (%ecx), %ecx
;ATOM32: calll *%ecx
;ATOM-NOT32: calll *(%ecx)
@@ -38,8 +38,8 @@ declare %class.A* @_Z3facv() #1
define i32 @test2() #0 {
;ATOM-LABEL: test2:
entry:
- %0 = load void (i32)*** @p, align 8
- %1 = load void (i32)** %0, align 8
+ %0 = load void (i32)**, void (i32)*** @p, align 8
+ %1 = load void (i32)*, void (i32)** %0, align 8
;ATOM32: movl (%eax), %eax
;ATOM32: calll *%eax
;ATOM-NOT: calll *(%eax)
diff --git a/test/CodeGen/X86/atom-cmpb.ll b/test/CodeGen/X86/atom-cmpb.ll
index 034bf2f27d25..baf0f5e87fc9 100644
--- a/test/CodeGen/X86/atom-cmpb.ll
+++ b/test/CodeGen/X86/atom-cmpb.ll
@@ -11,10 +11,10 @@
define i8 @run_test(i8* %rd_p) {
entry:
- %incdec.ptr = getelementptr inbounds i8* %rd_p, i64 1
- %ld1 = load i8* %rd_p, align 1
- %incdec.ptr1 = getelementptr inbounds i8* %rd_p, i64 2
- %ld2 = load i8* %incdec.ptr, align 1
+ %incdec.ptr = getelementptr inbounds i8, i8* %rd_p, i64 1
+ %ld1 = load i8, i8* %rd_p, align 1
+ %incdec.ptr1 = getelementptr inbounds i8, i8* %rd_p, i64 2
+ %ld2 = load i8, i8* %incdec.ptr, align 1
%x4 = xor i8 %ld1, -1
%x5 = xor i8 %ld2, -1
%cmp34 = icmp ult i8 %ld2, %ld1
diff --git a/test/CodeGen/X86/atom-fixup-lea1.ll b/test/CodeGen/X86/atom-fixup-lea1.ll
index 4651bf257fd4..f862fa6dfc35 100644
--- a/test/CodeGen/X86/atom-fixup-lea1.ll
+++ b/test/CodeGen/X86/atom-fixup-lea1.ll
@@ -25,8 +25,8 @@ entry:
for.body:
%i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
%sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds i32* %array, i32 %i.06
- %0 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %array, i32 %i.06
+ %0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %sum.05
%inc = add nsw i32 %i.06, 1
%exitcond = icmp eq i32 %inc, %n
diff --git a/test/CodeGen/X86/atom-fixup-lea2.ll b/test/CodeGen/X86/atom-fixup-lea2.ll
index 1855ea1d024d..ec8261388734 100644
--- a/test/CodeGen/X86/atom-fixup-lea2.ll
+++ b/test/CodeGen/X86/atom-fixup-lea2.ll
@@ -37,32 +37,32 @@ define i32 @test() {
entry:
%n = alloca %struct.node_t, align 4
call void bitcast (void (%struct.node_t*, ...)* @getnode to void (%struct.node_t*)*)(%struct.node_t* sret %n)
- %array = getelementptr inbounds %struct.node_t* %n, i32 0, i32 4
- %0 = load i32** %array, align 4
+ %array = getelementptr inbounds %struct.node_t, %struct.node_t* %n, i32 0, i32 4
+ %0 = load i32*, i32** %array, align 4
%cmp = icmp eq i32* %0, null
br i1 %cmp, label %if.end, label %land.lhs.true
land.lhs.true:
- %p = getelementptr inbounds %struct.node_t* %n, i32 0, i32 3
- %1 = load i32* %p, align 4
+ %p = getelementptr inbounds %struct.node_t, %struct.node_t* %n, i32 0, i32 3
+ %1 = load i32, i32* %p, align 4
%cmp1 = icmp sgt i32 %1, 0
br i1 %cmp1, label %land.lhs.true2, label %if.end
land.lhs.true2:
- %k = getelementptr inbounds %struct.node_t* %n, i32 0, i32 0
- %2 = load i32* %k, align 4
+ %k = getelementptr inbounds %struct.node_t, %struct.node_t* %n, i32 0, i32 0
+ %2 = load i32, i32* %k, align 4
%cmp3 = icmp sgt i32 %2, 0
br i1 %cmp3, label %land.lhs.true4, label %if.end
land.lhs.true4:
- %n5 = getelementptr inbounds %struct.node_t* %n, i32 0, i32 2
- %3 = load i32* %n5, align 4
+ %n5 = getelementptr inbounds %struct.node_t, %struct.node_t* %n, i32 0, i32 2
+ %3 = load i32, i32* %n5, align 4
%cmp6 = icmp sgt i32 %3, 0
br i1 %cmp6, label %land.lhs.true7, label %if.end
land.lhs.true7:
- %m = getelementptr inbounds %struct.node_t* %n, i32 0, i32 1
- %4 = load i32* %m, align 4
+ %m = getelementptr inbounds %struct.node_t, %struct.node_t* %n, i32 0, i32 1
+ %4 = load i32, i32* %m, align 4
%cmp8 = icmp sgt i32 %4, 0
br i1 %cmp8, label %if.then, label %if.end
@@ -72,8 +72,8 @@ if.then:
%5 = ptrtoint i32* %0 to i32
%add15 = add nsw i32 %1, %5
%6 = inttoptr i32 %add15 to i32*
- %arrayidx = getelementptr inbounds i32* %6, i32 %add12
- %7 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %6, i32 %add12
+ %7 = load i32, i32* %arrayidx, align 4
br label %if.end
if.end:
diff --git a/test/CodeGen/X86/atom-fixup-lea3.ll b/test/CodeGen/X86/atom-fixup-lea3.ll
index 311b0b302163..ed2df277480e 100644
--- a/test/CodeGen/X86/atom-fixup-lea3.ll
+++ b/test/CodeGen/X86/atom-fixup-lea3.ll
@@ -26,7 +26,7 @@ entry:
br i1 %cmp7, label %for.body.lr.ph, label %for.end
for.body.lr.ph: ; preds = %entry
- %.pre = load i32* %m, align 4
+ %.pre = load i32, i32* %m, align 4
br label %for.body
for.body: ; preds = %for.body, %for.body.lr.ph
@@ -34,12 +34,12 @@ for.body: ; preds = %for.body, %for.body
%sum.010 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %for.body ]
%j.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc1, %for.body ]
%inc1 = add nsw i32 %j.09, 1
- %arrayidx = getelementptr inbounds i32* %array2, i32 %j.09
- %1 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %array2, i32 %j.09
+ %1 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %1
store i32 %add, i32* %m, align 4
- %arrayidx2 = getelementptr inbounds i32* %array, i32 %inc1
- %2 = load i32* %arrayidx2, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %array, i32 %inc1
+ %2 = load i32, i32* %arrayidx2, align 4
%add3 = add nsw i32 %2, %sum.010
%exitcond = icmp eq i32 %inc1, %n
br i1 %exitcond, label %for.end, label %for.body
diff --git a/test/CodeGen/X86/atom-fixup-lea4.ll b/test/CodeGen/X86/atom-fixup-lea4.ll
index 668574b968c8..f0da1d2015cc 100644
--- a/test/CodeGen/X86/atom-fixup-lea4.ll
+++ b/test/CodeGen/X86/atom-fixup-lea4.ll
@@ -10,8 +10,8 @@ define linkonce_odr void @_ZN12ValueWrapperIS_IS_IS_IdEEEEC2Ev(%struct.ValueWrap
entry:
%this.addr = alloca %struct.ValueWrapper.6*, align 8
store %struct.ValueWrapper.6* %this, %struct.ValueWrapper.6** %this.addr, align 8
- %this1 = load %struct.ValueWrapper.6** %this.addr
- %value = getelementptr inbounds %struct.ValueWrapper.6* %this1, i32 0, i32 0
+ %this1 = load %struct.ValueWrapper.6*, %struct.ValueWrapper.6** %this.addr
+ %value = getelementptr inbounds %struct.ValueWrapper.6, %struct.ValueWrapper.6* %this1, i32 0, i32 0
call void @_ZN12ValueWrapperIS_IS_IdEEEC2Ev(%struct.ValueWrapper.7* %value)
ret void
}
diff --git a/test/CodeGen/X86/atom-lea-addw-bug.ll b/test/CodeGen/X86/atom-lea-addw-bug.ll
index 5cda2df432fc..d8147e5fbef9 100644
--- a/test/CodeGen/X86/atom-lea-addw-bug.ll
+++ b/test/CodeGen/X86/atom-lea-addw-bug.ll
@@ -5,9 +5,9 @@ target triple = "x86_64-apple-darwin12.5.0"
define i32 @DoLayout() {
entry:
- %tmp1 = load i16* undef, align 2
- %tmp17 = load i16* null, align 2
- %tmp19 = load i16* undef, align 2
+ %tmp1 = load i16, i16* undef, align 2
+ %tmp17 = load i16, i16* null, align 2
+ %tmp19 = load i16, i16* undef, align 2
%shl = shl i16 %tmp19, 1
%add55 = add i16 %tmp17, %tmp1
%add57 = add i16 %add55, %shl
diff --git a/test/CodeGen/X86/atom-lea-sp.ll b/test/CodeGen/X86/atom-lea-sp.ll
index 1df1974dc494..1ee3b00ee87e 100644
--- a/test/CodeGen/X86/atom-lea-sp.ll
+++ b/test/CodeGen/X86/atom-lea-sp.ll
@@ -16,7 +16,7 @@ define void @test1() nounwind {
; CHECK: call
; CHECK-NOT: lea
%arr = alloca [1024 x i8], align 16
- %arr_ptr = getelementptr inbounds [1024 x i8]* %arr, i8 0, i8 0
+ %arr_ptr = getelementptr inbounds [1024 x i8], [1024 x i8]* %arr, i8 0, i8 0
call void @use_arr(i8* %arr_ptr)
ret void
}
diff --git a/test/CodeGen/X86/atom-sched.ll b/test/CodeGen/X86/atom-sched.ll
index fd18472bff8a..b81359e2832b 100644
--- a/test/CodeGen/X86/atom-sched.ll
+++ b/test/CodeGen/X86/atom-sched.ll
@@ -21,12 +21,12 @@ define void @func() nounwind uwtable {
; CHECK: movl
; CHECK: imull
entry:
- %0 = load i32* @b, align 4
- %1 = load i32* @c, align 4
+ %0 = load i32, i32* @b, align 4
+ %1 = load i32, i32* @c, align 4
%mul = mul nsw i32 %0, %1
store i32 %mul, i32* @a, align 4
- %2 = load i32* @e, align 4
- %3 = load i32* @f, align 4
+ %2 = load i32, i32* @e, align 4
+ %3 = load i32, i32* @f, align 4
%mul1 = mul nsw i32 %2, %3
store i32 %mul1, i32* @d, align 4
ret void
diff --git a/test/CodeGen/X86/atomic-dagsched.ll b/test/CodeGen/X86/atomic-dagsched.ll
index aa057577a042..97bb1afa47a7 100644
--- a/test/CodeGen/X86/atomic-dagsched.ll
+++ b/test/CodeGen/X86/atomic-dagsched.ll
@@ -2,12 +2,12 @@
define void @test(i8** %a, i64* %b, i64 %c, i64 %d) nounwind {
entry:
- %ptrtoarg4 = load i8** %a, align 8
- %brglist1 = getelementptr i8** %a, i64 1
- %ptrtoarg25 = load i8** %brglist1, align 8
- %0 = load i64* %b, align 8
+ %ptrtoarg4 = load i8*, i8** %a, align 8
+ %brglist1 = getelementptr i8*, i8** %a, i64 1
+ %ptrtoarg25 = load i8*, i8** %brglist1, align 8
+ %0 = load i64, i64* %b, align 8
%1 = mul i64 %0, 4
- %scevgep = getelementptr i8* %ptrtoarg25, i64 %1
+ %scevgep = getelementptr i8, i8* %ptrtoarg25, i64 %1
%2 = mul i64 %d, 4
br label %loop.cond
@@ -18,8 +18,8 @@ loop.cond: ; preds = %test.exit, %entry
br i1 %3, label %return, label %loop
loop: ; preds = %loop.cond
- %4 = load i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8
- %5 = load i64* %4, align 8
+ %4 = load i64*, i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8
+ %5 = load i64, i64* %4, align 8
%vector.size.i = ashr i64 %5, 3
%num.vector.wi.i = shl i64 %vector.size.i, 3
%6 = icmp eq i64 %vector.size.i, 0
@@ -36,7 +36,7 @@ vector_kernel_entry.i: ; preds = %vector_kernel_entry
%asr.iv = phi i64 [ %asr.iv.next, %vector_kernel_entry.i ], [ %vector.size.i, %dim_0_vector_pre_head.i ]
%8 = addrspacecast i8* %ptrtoarg4 to i32 addrspace(1)*
%asr.iv911 = addrspacecast i8* %asr.iv9 to <8 x i32> addrspace(1)*
- %9 = load <8 x i32> addrspace(1)* %asr.iv911, align 4
+ %9 = load <8 x i32>, <8 x i32> addrspace(1)* %asr.iv911, align 4
%extract8vector_func.i = extractelement <8 x i32> %9, i32 0
%extract9vector_func.i = extractelement <8 x i32> %9, i32 1
%extract10vector_func.i = extractelement <8 x i32> %9, i32 2
@@ -55,7 +55,7 @@ vector_kernel_entry.i: ; preds = %vector_kernel_entry
%17 = atomicrmw min i32 addrspace(1)* %8, i32 %extract15vector_func.i seq_cst
store <8 x i32> %vectorvector_func.i, <8 x i32> addrspace(1)* %asr.iv911, align 4
%asr.iv.next = add i64 %asr.iv, -1
- %scevgep10 = getelementptr i8* %asr.iv9, i64 32
+ %scevgep10 = getelementptr i8, i8* %asr.iv9, i64 32
%dim_0_vector_cmp.to.max.i = icmp eq i64 %asr.iv.next, 0
br i1 %dim_0_vector_cmp.to.max.i, label %scalarIf.i, label %vector_kernel_entry.i
@@ -65,8 +65,8 @@ scalarIf.i: ; preds = %vector_kernel_entry
br i1 %18, label %test.exit, label %dim_0_pre_head.i
dim_0_pre_head.i: ; preds = %scalarIf.i
- %19 = load i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8
- %20 = load i64* %19, align 8
+ %19 = load i64*, i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8
+ %20 = load i64, i64* %19, align 8
%21 = trunc i64 %20 to i32
%22 = mul i64 %vector.size.i, 8
br label %scalar_kernel_entry.i
@@ -75,10 +75,10 @@ scalar_kernel_entry.i: ; preds = %scalar_kernel_entry
%asr.iv12 = phi i64 [ %asr.iv.next13, %scalar_kernel_entry.i ], [ %22, %dim_0_pre_head.i ]
%23 = addrspacecast i8* %asr.iv6 to i32 addrspace(1)*
%24 = addrspacecast i8* %ptrtoarg4 to i32 addrspace(1)*
- %scevgep16 = getelementptr i32 addrspace(1)* %23, i64 %asr.iv12
- %25 = load i32 addrspace(1)* %scevgep16, align 4
+ %scevgep16 = getelementptr i32, i32 addrspace(1)* %23, i64 %asr.iv12
+ %25 = load i32, i32 addrspace(1)* %scevgep16, align 4
%26 = atomicrmw min i32 addrspace(1)* %24, i32 %25 seq_cst
- %scevgep15 = getelementptr i32 addrspace(1)* %23, i64 %asr.iv12
+ %scevgep15 = getelementptr i32, i32 addrspace(1)* %23, i64 %asr.iv12
store i32 %21, i32 addrspace(1)* %scevgep15, align 4
%asr.iv.next13 = add i64 %asr.iv12, 1
%dim_0_cmp.to.max.i = icmp eq i64 %5, %asr.iv.next13
@@ -88,7 +88,7 @@ test.exit: ; preds = %scalar_kernel_entry.i, %scalarIf.i
%27 = bitcast i8* %asr.iv6 to i1*
%28 = add i64 %iv, %d
store i64 %28, i64* %b, align 8
- %scevgep8 = getelementptr i1* %27, i64 %2
+ %scevgep8 = getelementptr i1, i1* %27, i64 %2
%29 = bitcast i1* %scevgep8 to i8*
br label %loop.cond
diff --git a/test/CodeGen/X86/atomic-load-store-wide.ll b/test/CodeGen/X86/atomic-load-store-wide.ll
index ad1a5c6d0267..df0af5f7f27d 100644
--- a/test/CodeGen/X86/atomic-load-store-wide.ll
+++ b/test/CodeGen/X86/atomic-load-store-wide.ll
@@ -5,8 +5,7 @@
define void @test1(i64* %ptr, i64 %val1) {
; CHECK-LABEL: test1
-; CHECK: lock
-; CHECK-NEXT: cmpxchg8b
+; CHECK: lock cmpxchg8b
; CHECK-NEXT: jne
store atomic i64 %val1, i64* %ptr seq_cst, align 8
ret void
@@ -14,8 +13,7 @@ define void @test1(i64* %ptr, i64 %val1) {
define i64 @test2(i64* %ptr) {
; CHECK-LABEL: test2
-; CHECK: lock
-; CHECK-NEXT: cmpxchg8b
- %val = load atomic i64* %ptr seq_cst, align 8
+; CHECK: lock cmpxchg8b
+ %val = load atomic i64, i64* %ptr seq_cst, align 8
ret i64 %val
}
diff --git a/test/CodeGen/X86/atomic-load-store.ll b/test/CodeGen/X86/atomic-load-store.ll
index 86a744ed00f0..dab79bd274cf 100644
--- a/test/CodeGen/X86/atomic-load-store.ll
+++ b/test/CodeGen/X86/atomic-load-store.ll
@@ -18,6 +18,6 @@ define void @test2(i32* %ptr, i32 %val1) {
define i32 @test3(i32* %ptr) {
; CHECK: test3
; CHECK: movl (%rdi), %eax
- %val = load atomic i32* %ptr seq_cst, align 4
+ %val = load atomic i32, i32* %ptr seq_cst, align 4
ret i32 %val
}
diff --git a/test/CodeGen/X86/atomic-minmax-i6432.ll b/test/CodeGen/X86/atomic-minmax-i6432.ll
index ffb7a3fd6f64..4989bc14ef86 100644
--- a/test/CodeGen/X86/atomic-minmax-i6432.ll
+++ b/test/CodeGen/X86/atomic-minmax-i6432.ll
@@ -11,8 +11,7 @@ define void @atomic_maxmin_i6432() {
; LINUX: seta
; LINUX: cmovne
; LINUX: cmovne
-; LINUX: lock
-; LINUX-NEXT: cmpxchg8b
+; LINUX: lock cmpxchg8b
; LINUX: jne [[LABEL]]
%2 = atomicrmw min i64* @sc64, i64 6 acquire
; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
@@ -20,8 +19,7 @@ define void @atomic_maxmin_i6432() {
; LINUX: setb
; LINUX: cmovne
; LINUX: cmovne
-; LINUX: lock
-; LINUX-NEXT: cmpxchg8b
+; LINUX: lock cmpxchg8b
; LINUX: jne [[LABEL]]
%3 = atomicrmw umax i64* @sc64, i64 7 acquire
; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
@@ -29,8 +27,7 @@ define void @atomic_maxmin_i6432() {
; LINUX: seta
; LINUX: cmovne
; LINUX: cmovne
-; LINUX: lock
-; LINUX-NEXT: cmpxchg8b
+; LINUX: lock cmpxchg8b
; LINUX: jne [[LABEL]]
%4 = atomicrmw umin i64* @sc64, i64 8 acquire
; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
@@ -38,8 +35,7 @@ define void @atomic_maxmin_i6432() {
; LINUX: setb
; LINUX: cmovne
; LINUX: cmovne
-; LINUX: lock
-; LINUX-NEXT: cmpxchg8b
+; LINUX: lock cmpxchg8b
; LINUX: jne [[LABEL]]
ret void
}
diff --git a/test/CodeGen/X86/atomic-or.ll b/test/CodeGen/X86/atomic-or.ll
index 1687e07d57e0..60e9968bdc71 100644
--- a/test/CodeGen/X86/atomic-or.ll
+++ b/test/CodeGen/X86/atomic-or.ll
@@ -6,11 +6,10 @@ define void @t1(i64* %p, i32 %b) nounwind {
entry:
%p.addr = alloca i64*, align 8
store i64* %p, i64** %p.addr, align 8
- %tmp = load i64** %p.addr, align 8
+ %tmp = load i64*, i64** %p.addr, align 8
; CHECK-LABEL: t1:
; CHECK: movl $2147483648, %eax
-; CHECK: lock
-; CHECK-NEXT: orq %r{{.*}}, (%r{{.*}})
+; CHECK: lock orq %r{{.*}}, (%r{{.*}})
%0 = atomicrmw or i64* %tmp, i64 2147483648 seq_cst
ret void
}
@@ -19,10 +18,9 @@ define void @t2(i64* %p, i32 %b) nounwind {
entry:
%p.addr = alloca i64*, align 8
store i64* %p, i64** %p.addr, align 8
- %tmp = load i64** %p.addr, align 8
+ %tmp = load i64*, i64** %p.addr, align 8
; CHECK-LABEL: t2:
-; CHECK: lock
-; CHECK-NEXT: orq $2147483644, (%r{{.*}})
+; CHECK: lock orq $2147483644, (%r{{.*}})
%0 = atomicrmw or i64* %tmp, i64 2147483644 seq_cst
ret void
}
diff --git a/test/CodeGen/X86/atomic-pointer.ll b/test/CodeGen/X86/atomic-pointer.ll
index ec3e6c3a8c19..66e021742fc2 100644
--- a/test/CodeGen/X86/atomic-pointer.ll
+++ b/test/CodeGen/X86/atomic-pointer.ll
@@ -6,7 +6,7 @@ define i32* @test_atomic_ptr_load(i32** %a0) {
; CHECK: movl
; CHECK: ret
0:
- %0 = load atomic i32** %a0 seq_cst, align 4
+ %0 = load atomic i32*, i32** %a0 seq_cst, align 4
ret i32* %0
}
diff --git a/test/CodeGen/X86/atomic128.ll b/test/CodeGen/X86/atomic128.ll
index 741d2904229d..dea7d482f989 100644
--- a/test/CodeGen/X86/atomic128.ll
+++ b/test/CodeGen/X86/atomic128.ll
@@ -249,7 +249,7 @@ define i128 @atomic_load_seq_cst(i128* %p) {
; CHECK: lock
; CHECK: cmpxchg16b (%rdi)
- %r = load atomic i128* %p seq_cst, align 16
+ %r = load atomic i128, i128* %p seq_cst, align 16
ret i128 %r
}
@@ -262,7 +262,7 @@ define i128 @atomic_load_relaxed(i128* %p) {
; CHECK: lock
; CHECK: cmpxchg16b (%rdi)
- %r = load atomic i128* %p monotonic, align 16
+ %r = load atomic i128, i128* %p monotonic, align 16
ret i128 %r
}
diff --git a/test/CodeGen/X86/atomic32.ll b/test/CodeGen/X86/atomic32.ll
index 4f2cbe0ce2d3..d514ac7b3110 100644
--- a/test/CodeGen/X86/atomic32.ll
+++ b/test/CodeGen/X86/atomic32.ll
@@ -1,300 +1,207 @@
-; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X64
-; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32
+; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -verify-machineinstrs | FileCheck %s -check-prefix=WITH-CMOV
+; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 -verify-machineinstrs | FileCheck %s -check-prefix=WITH-CMOV
; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 -mattr=-cmov -verify-machineinstrs | FileCheck %s --check-prefix NOCMOV
@sc32 = external global i32
define void @atomic_fetch_add32() nounwind {
-; X64-LABEL: atomic_fetch_add32:
-; X32-LABEL: atomic_fetch_add32:
+; WITH-CMOV-LABEL: atomic_fetch_add32:
entry:
; 32-bit
%t1 = atomicrmw add i32* @sc32, i32 1 acquire
-; X64: lock
-; X64: incl
-; X32: lock
-; X32: incl
+; WITH-CMOV: lock
+; WITH-CMOV: incl
%t2 = atomicrmw add i32* @sc32, i32 3 acquire
-; X64: lock
-; X64: addl $3
-; X32: lock
-; X32: addl $3
+; WITH-CMOV: lock
+; WITH-CMOV: addl $3
%t3 = atomicrmw add i32* @sc32, i32 5 acquire
-; X64: lock
-; X64: xaddl
-; X32: lock
-; X32: xaddl
+; WITH-CMOV: lock
+; WITH-CMOV: xaddl
%t4 = atomicrmw add i32* @sc32, i32 %t3 acquire
-; X64: lock
-; X64: addl
-; X32: lock
-; X32: addl
+; WITH-CMOV: lock
+; WITH-CMOV: addl
ret void
-; X64: ret
-; X32: ret
+; WITH-CMOV: ret
}
define void @atomic_fetch_sub32() nounwind {
-; X64-LABEL: atomic_fetch_sub32:
-; X32-LABEL: atomic_fetch_sub32:
+; WITH-CMOV-LABEL: atomic_fetch_sub32:
%t1 = atomicrmw sub i32* @sc32, i32 1 acquire
-; X64: lock
-; X64: decl
-; X32: lock
-; X32: decl
+; WITH-CMOV: lock
+; WITH-CMOV: decl
%t2 = atomicrmw sub i32* @sc32, i32 3 acquire
-; X64: lock
-; X64: subl $3
-; X32: lock
-; X32: subl $3
+; WITH-CMOV: lock
+; WITH-CMOV: subl $3
%t3 = atomicrmw sub i32* @sc32, i32 5 acquire
-; X64: lock
-; X64: xaddl
-; X32: lock
-; X32: xaddl
+; WITH-CMOV: lock
+; WITH-CMOV: xaddl
%t4 = atomicrmw sub i32* @sc32, i32 %t3 acquire
-; X64: lock
-; X64: subl
-; X32: lock
-; X32: subl
+; WITH-CMOV: lock
+; WITH-CMOV: subl
ret void
-; X64: ret
-; X32: ret
+; WITH-CMOV: ret
}
define void @atomic_fetch_and32() nounwind {
-; X64-LABEL: atomic_fetch_and32:
-; X32-LABEL: atomic_fetch_and32:
+; WITH-CMOV-LABEL: atomic_fetch_and32:
%t1 = atomicrmw and i32* @sc32, i32 3 acquire
-; X64: lock
-; X64: andl $3
-; X32: lock
-; X32: andl $3
+; WITH-CMOV: lock
+; WITH-CMOV: andl $3
%t2 = atomicrmw and i32* @sc32, i32 5 acquire
-; X64: andl
-; X64: lock
-; X64: cmpxchgl
-; X32: andl
-; X32: lock
-; X32: cmpxchgl
+; WITH-CMOV: andl
+; WITH-CMOV: lock
+; WITH-CMOV: cmpxchgl
%t3 = atomicrmw and i32* @sc32, i32 %t2 acquire
-; X64: lock
-; X64: andl
-; X32: lock
-; X32: andl
+; WITH-CMOV: lock
+; WITH-CMOV: andl
ret void
-; X64: ret
-; X32: ret
+; WITH-CMOV: ret
}
define void @atomic_fetch_or32() nounwind {
-; X64-LABEL: atomic_fetch_or32:
-; X32-LABEL: atomic_fetch_or32:
+; WITH-CMOV-LABEL: atomic_fetch_or32:
%t1 = atomicrmw or i32* @sc32, i32 3 acquire
-; X64: lock
-; X64: orl $3
-; X32: lock
-; X32: orl $3
+; WITH-CMOV: lock
+; WITH-CMOV: orl $3
%t2 = atomicrmw or i32* @sc32, i32 5 acquire
-; X64: orl
-; X64: lock
-; X64: cmpxchgl
-; X32: orl
-; X32: lock
-; X32: cmpxchgl
+; WITH-CMOV: orl
+; WITH-CMOV: lock
+; WITH-CMOV: cmpxchgl
%t3 = atomicrmw or i32* @sc32, i32 %t2 acquire
-; X64: lock
-; X64: orl
-; X32: lock
-; X32: orl
+; WITH-CMOV: lock
+; WITH-CMOV: orl
ret void
-; X64: ret
-; X32: ret
+; WITH-CMOV: ret
}
define void @atomic_fetch_xor32() nounwind {
-; X64-LABEL: atomic_fetch_xor32:
-; X32-LABEL: atomic_fetch_xor32:
+; WITH-CMOV-LABEL: atomic_fetch_xor32:
%t1 = atomicrmw xor i32* @sc32, i32 3 acquire
-; X64: lock
-; X64: xorl $3
-; X32: lock
-; X32: xorl $3
+; WITH-CMOV: lock
+; WITH-CMOV: xorl $3
%t2 = atomicrmw xor i32* @sc32, i32 5 acquire
-; X64: xorl
-; X64: lock
-; X64: cmpxchgl
-; X32: xorl
-; X32: lock
-; X32: cmpxchgl
+; WITH-CMOV: xorl
+; WITH-CMOV: lock
+; WITH-CMOV: cmpxchgl
%t3 = atomicrmw xor i32* @sc32, i32 %t2 acquire
-; X64: lock
-; X64: xorl
-; X32: lock
-; X32: xorl
+; WITH-CMOV: lock
+; WITH-CMOV: xorl
ret void
-; X64: ret
-; X32: ret
+; WITH-CMOV: ret
}
define void @atomic_fetch_nand32(i32 %x) nounwind {
-; X64-LABEL: atomic_fetch_nand32:
-; X32-LABEL: atomic_fetch_nand32:
+; WITH-CMOV-LABEL: atomic_fetch_nand32:
%t1 = atomicrmw nand i32* @sc32, i32 %x acquire
-; X64: andl
-; X64: notl
-; X64: lock
-; X64: cmpxchgl
-; X32: andl
-; X32: notl
-; X32: lock
-; X32: cmpxchgl
+; WITH-CMOV: andl
+; WITH-CMOV: notl
+; WITH-CMOV: lock
+; WITH-CMOV: cmpxchgl
ret void
-; X64: ret
-; X32: ret
+; WITH-CMOV: ret
}
define void @atomic_fetch_max32(i32 %x) nounwind {
-; X64-LABEL: atomic_fetch_max32:
-; X32-LABEL: atomic_fetch_max32:
+; WITH-CMOV-LABEL: atomic_fetch_max32:
%t1 = atomicrmw max i32* @sc32, i32 %x acquire
-; X64: subl
-; X64: cmov
-; X64: lock
-; X64: cmpxchgl
-
-; X32: subl
-; X32: cmov
-; X32: lock
-; X32: cmpxchgl
+; WITH-CMOV: subl
+; WITH-CMOV: cmov
+; WITH-CMOV: lock
+; WITH-CMOV: cmpxchgl
; NOCMOV: subl
; NOCMOV: jge
; NOCMOV: lock
; NOCMOV: cmpxchgl
ret void
-; X64: ret
-; X32: ret
+; WITH-CMOV: ret
; NOCMOV: ret
}
define void @atomic_fetch_min32(i32 %x) nounwind {
-; X64-LABEL: atomic_fetch_min32:
-; X32-LABEL: atomic_fetch_min32:
+; WITH-CMOV-LABEL: atomic_fetch_min32:
; NOCMOV-LABEL: atomic_fetch_min32:
%t1 = atomicrmw min i32* @sc32, i32 %x acquire
-; X64: subl
-; X64: cmov
-; X64: lock
-; X64: cmpxchgl
-
-; X32: subl
-; X32: cmov
-; X32: lock
-; X32: cmpxchgl
+; WITH-CMOV: subl
+; WITH-CMOV: cmov
+; WITH-CMOV: lock
+; WITH-CMOV: cmpxchgl
; NOCMOV: subl
; NOCMOV: jle
; NOCMOV: lock
; NOCMOV: cmpxchgl
ret void
-; X64: ret
-; X32: ret
+; WITH-CMOV: ret
; NOCMOV: ret
}
define void @atomic_fetch_umax32(i32 %x) nounwind {
-; X64-LABEL: atomic_fetch_umax32:
-; X32-LABEL: atomic_fetch_umax32:
+; WITH-CMOV-LABEL: atomic_fetch_umax32:
; NOCMOV-LABEL: atomic_fetch_umax32:
%t1 = atomicrmw umax i32* @sc32, i32 %x acquire
-; X64: subl
-; X64: cmov
-; X64: lock
-; X64: cmpxchgl
-
-; X32: subl
-; X32: cmov
-; X32: lock
-; X32: cmpxchgl
+; WITH-CMOV: subl
+; WITH-CMOV: cmov
+; WITH-CMOV: lock
+; WITH-CMOV: cmpxchgl
; NOCMOV: subl
; NOCMOV: ja
; NOCMOV: lock
; NOCMOV: cmpxchgl
ret void
-; X64: ret
-; X32: ret
+; WITH-CMOV: ret
; NOCMOV: ret
}
define void @atomic_fetch_umin32(i32 %x) nounwind {
-; X64-LABEL: atomic_fetch_umin32:
-; X32-LABEL: atomic_fetch_umin32:
+; WITH-CMOV-LABEL: atomic_fetch_umin32:
; NOCMOV-LABEL: atomic_fetch_umin32:
%t1 = atomicrmw umin i32* @sc32, i32 %x acquire
-; X64: subl
-; X64: cmov
-; X64: lock
-; X64: cmpxchgl
-
-; X32: subl
-; X32: cmov
-; X32: lock
-; X32: cmpxchgl
+; WITH-CMOV: subl
+; WITH-CMOV: cmov
+; WITH-CMOV: lock
+; WITH-CMOV: cmpxchgl
; NOCMOV: subl
; NOCMOV: jb
; NOCMOV: lock
; NOCMOV: cmpxchgl
ret void
-; X64: ret
-; X32: ret
+; WITH-CMOV: ret
; NOCMOV: ret
}
define void @atomic_fetch_cmpxchg32() nounwind {
-; X64-LABEL: atomic_fetch_cmpxchg32:
-; X32-LABEL: atomic_fetch_cmpxchg32:
+; WITH-CMOV-LABEL: atomic_fetch_cmpxchg32:
%t1 = cmpxchg i32* @sc32, i32 0, i32 1 acquire acquire
-; X64: lock
-; X64: cmpxchgl
-; X32: lock
-; X32: cmpxchgl
+; WITH-CMOV: lock
+; WITH-CMOV: cmpxchgl
ret void
-; X64: ret
-; X32: ret
+; WITH-CMOV: ret
}
define void @atomic_fetch_store32(i32 %x) nounwind {
-; X64-LABEL: atomic_fetch_store32:
-; X32-LABEL: atomic_fetch_store32:
+; WITH-CMOV-LABEL: atomic_fetch_store32:
store atomic i32 %x, i32* @sc32 release, align 4
-; X64-NOT: lock
-; X64: movl
-; X32-NOT: lock
-; X32: movl
+; WITH-CMOV-NOT: lock
+; WITH-CMOV: movl
ret void
-; X64: ret
-; X32: ret
+; WITH-CMOV: ret
}
define void @atomic_fetch_swap32(i32 %x) nounwind {
-; X64-LABEL: atomic_fetch_swap32:
-; X32-LABEL: atomic_fetch_swap32:
+; WITH-CMOV-LABEL: atomic_fetch_swap32:
%t1 = atomicrmw xchg i32* @sc32, i32 %x acquire
-; X64-NOT: lock
-; X64: xchgl
-; X32-NOT: lock
-; X32: xchgl
+; WITH-CMOV-NOT: lock
+; WITH-CMOV: xchgl
ret void
-; X64: ret
-; X32: ret
+; WITH-CMOV: ret
}
diff --git a/test/CodeGen/X86/atomic64.ll b/test/CodeGen/X86/atomic64.ll
index 11b4e6864da6..c6b1c39d35dc 100644
--- a/test/CodeGen/X86/atomic64.ll
+++ b/test/CodeGen/X86/atomic64.ll
@@ -48,7 +48,7 @@ define void @atomic_fetch_and64() nounwind {
; X64: lock
; X64: andq $3
%t2 = atomicrmw and i64* @sc64, i64 5 acquire
-; X64: andq
+; X64: andl
; X64: lock
; X64: cmpxchgq
%t3 = atomicrmw and i64* @sc64, i64 %t2 acquire
diff --git a/test/CodeGen/X86/atomic6432.ll b/test/CodeGen/X86/atomic6432.ll
index 1c4b0f43bf76..5a78934eb3fd 100644
--- a/test/CodeGen/X86/atomic6432.ll
+++ b/test/CodeGen/X86/atomic6432.ll
@@ -3,7 +3,6 @@
@sc64 = external global i64
define void @atomic_fetch_add64() nounwind {
-; X64-LABEL: atomic_fetch_add64:
; X32-LABEL: atomic_fetch_add64:
entry:
%t1 = atomicrmw add i64* @sc64, i64 1 acquire
@@ -31,7 +30,6 @@ entry:
}
define void @atomic_fetch_sub64() nounwind {
-; X64-LABEL: atomic_fetch_sub64:
; X32-LABEL: atomic_fetch_sub64:
%t1 = atomicrmw sub i64* @sc64, i64 1 acquire
; X32: addl $-1
@@ -58,7 +56,6 @@ define void @atomic_fetch_sub64() nounwind {
}
define void @atomic_fetch_and64() nounwind {
-; X64-LABEL: atomic_fetch_and:64
; X32-LABEL: atomic_fetch_and64:
%t1 = atomicrmw and i64* @sc64, i64 3 acquire
; X32: andl $3
@@ -80,7 +77,6 @@ define void @atomic_fetch_and64() nounwind {
}
define void @atomic_fetch_or64() nounwind {
-; X64-LABEL: atomic_fetch_or64:
; X32-LABEL: atomic_fetch_or64:
%t1 = atomicrmw or i64* @sc64, i64 3 acquire
; X32: orl $3
@@ -102,7 +98,6 @@ define void @atomic_fetch_or64() nounwind {
}
define void @atomic_fetch_xor64() nounwind {
-; X64-LABEL: atomic_fetch_xor:64
; X32-LABEL: atomic_fetch_xor64:
%t1 = atomicrmw xor i64* @sc64, i64 3 acquire
; X32: xorl
@@ -124,7 +119,6 @@ define void @atomic_fetch_xor64() nounwind {
}
define void @atomic_fetch_nand64(i64 %x) nounwind {
-; X64-LABEL: atomic_fetch_nand64:
; X32-LABEL: atomic_fetch_nand64:
%t1 = atomicrmw nand i64* @sc64, i64 %x acquire
; X32: andl
@@ -138,7 +132,6 @@ define void @atomic_fetch_nand64(i64 %x) nounwind {
}
define void @atomic_fetch_max64(i64 %x) nounwind {
-; X64-LABEL: atomic_fetch_max:64
; X32-LABEL: atomic_fetch_max64:
%t1 = atomicrmw max i64* @sc64, i64 %x acquire
; X32: subl
@@ -152,7 +145,6 @@ define void @atomic_fetch_max64(i64 %x) nounwind {
}
define void @atomic_fetch_min64(i64 %x) nounwind {
-; X64-LABEL: atomic_fetch_min64:
; X32-LABEL: atomic_fetch_min64:
%t1 = atomicrmw min i64* @sc64, i64 %x acquire
; X32: subl
@@ -166,7 +158,6 @@ define void @atomic_fetch_min64(i64 %x) nounwind {
}
define void @atomic_fetch_umax64(i64 %x) nounwind {
-; X64-LABEL: atomic_fetch_umax:64
; X32-LABEL: atomic_fetch_umax64:
%t1 = atomicrmw umax i64* @sc64, i64 %x acquire
; X32: subl
@@ -180,7 +171,6 @@ define void @atomic_fetch_umax64(i64 %x) nounwind {
}
define void @atomic_fetch_umin64(i64 %x) nounwind {
-; X64-LABEL: atomic_fetch_umin64:
; X32-LABEL: atomic_fetch_umin64:
%t1 = atomicrmw umin i64* @sc64, i64 %x acquire
; X32: subl
@@ -194,7 +184,6 @@ define void @atomic_fetch_umin64(i64 %x) nounwind {
}
define void @atomic_fetch_cmpxchg64() nounwind {
-; X64-LABEL: atomic_fetch_cmpxchg:64
; X32-LABEL: atomic_fetch_cmpxchg64:
%t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire acquire
; X32: lock
@@ -204,7 +193,6 @@ define void @atomic_fetch_cmpxchg64() nounwind {
}
define void @atomic_fetch_store64(i64 %x) nounwind {
-; X64-LABEL: atomic_fetch_store64:
; X32-LABEL: atomic_fetch_store64:
store atomic i64 %x, i64* @sc64 release, align 8
; X32: lock
@@ -214,7 +202,6 @@ define void @atomic_fetch_store64(i64 %x) nounwind {
}
define void @atomic_fetch_swap64(i64 %x) nounwind {
-; X64-LABEL: atomic_fetch_swap64:
; X32-LABEL: atomic_fetch_swap64:
%t1 = atomicrmw xchg i64* @sc64, i64 %x acquire
; X32: lock
diff --git a/test/CodeGen/X86/atomic_mi.ll b/test/CodeGen/X86/atomic_mi.ll
index 19e019eaddcd..7a6204fc8930 100644
--- a/test/CodeGen/X86/atomic_mi.ll
+++ b/test/CodeGen/X86/atomic_mi.ll
@@ -103,7 +103,7 @@ define void @add_8(i8* %p) {
; X32-NOT: lock
; X32: addb
; X32-NOT: movb
- %1 = load atomic i8* %p seq_cst, align 1
+ %1 = load atomic i8, i8* %p seq_cst, align 1
%2 = add i8 %1, 2
store atomic i8 %2, i8* %p release, align 1
ret void
@@ -116,7 +116,7 @@ define void @add_16(i16* %p) {
; X64-NOT: addw
; X32-LABEL: add_16
; X32-NOT: addw
- %1 = load atomic i16* %p acquire, align 2
+ %1 = load atomic i16, i16* %p acquire, align 2
%2 = add i16 %1, 2
store atomic i16 %2, i16* %p release, align 2
ret void
@@ -131,7 +131,7 @@ define void @add_32(i32* %p) {
; X32-NOT: lock
; X32: addl
; X32-NOT: movl
- %1 = load atomic i32* %p acquire, align 4
+ %1 = load atomic i32, i32* %p acquire, align 4
%2 = add i32 %1, 2
store atomic i32 %2, i32* %p monotonic, align 4
ret void
@@ -144,7 +144,7 @@ define void @add_64(i64* %p) {
; X64-NOT: movq
; We do not check X86-32 as it cannot do 'addq'.
; X32-LABEL: add_64
- %1 = load atomic i64* %p acquire, align 8
+ %1 = load atomic i64, i64* %p acquire, align 8
%2 = add i64 %1, 2
store atomic i64 %2, i64* %p release, align 8
ret void
@@ -155,7 +155,7 @@ define void @add_32_seq_cst(i32* %p) {
; X64: xchgl
; X32-LABEL: add_32_seq_cst
; X32: xchgl
- %1 = load atomic i32* %p monotonic, align 4
+ %1 = load atomic i32, i32* %p monotonic, align 4
%2 = add i32 %1, 2
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
@@ -172,7 +172,7 @@ define void @and_8(i8* %p) {
; X32-NOT: lock
; X32: andb
; X32-NOT: movb
- %1 = load atomic i8* %p monotonic, align 1
+ %1 = load atomic i8, i8* %p monotonic, align 1
%2 = and i8 %1, 2
store atomic i8 %2, i8* %p release, align 1
ret void
@@ -185,7 +185,7 @@ define void @and_16(i16* %p) {
; X64-NOT: andw
; X32-LABEL: and_16
; X32-NOT: andw
- %1 = load atomic i16* %p acquire, align 2
+ %1 = load atomic i16, i16* %p acquire, align 2
%2 = and i16 %1, 2
store atomic i16 %2, i16* %p release, align 2
ret void
@@ -200,7 +200,7 @@ define void @and_32(i32* %p) {
; X32-NOT: lock
; X32: andl
; X32-NOT: movl
- %1 = load atomic i32* %p acquire, align 4
+ %1 = load atomic i32, i32* %p acquire, align 4
%2 = and i32 %1, 2
store atomic i32 %2, i32* %p release, align 4
ret void
@@ -213,7 +213,7 @@ define void @and_64(i64* %p) {
; X64-NOT: movq
; We do not check X86-32 as it cannot do 'andq'.
; X32-LABEL: and_64
- %1 = load atomic i64* %p acquire, align 8
+ %1 = load atomic i64, i64* %p acquire, align 8
%2 = and i64 %1, 2
store atomic i64 %2, i64* %p release, align 8
ret void
@@ -224,7 +224,7 @@ define void @and_32_seq_cst(i32* %p) {
; X64: xchgl
; X32-LABEL: and_32_seq_cst
; X32: xchgl
- %1 = load atomic i32* %p monotonic, align 4
+ %1 = load atomic i32, i32* %p monotonic, align 4
%2 = and i32 %1, 2
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
@@ -241,7 +241,7 @@ define void @or_8(i8* %p) {
; X32-NOT: lock
; X32: orb
; X32-NOT: movb
- %1 = load atomic i8* %p acquire, align 1
+ %1 = load atomic i8, i8* %p acquire, align 1
%2 = or i8 %1, 2
store atomic i8 %2, i8* %p release, align 1
ret void
@@ -252,7 +252,7 @@ define void @or_16(i16* %p) {
; X64-NOT: orw
; X32-LABEL: or_16
; X32-NOT: orw
- %1 = load atomic i16* %p acquire, align 2
+ %1 = load atomic i16, i16* %p acquire, align 2
%2 = or i16 %1, 2
store atomic i16 %2, i16* %p release, align 2
ret void
@@ -267,7 +267,7 @@ define void @or_32(i32* %p) {
; X32-NOT: lock
; X32: orl
; X32-NOT: movl
- %1 = load atomic i32* %p acquire, align 4
+ %1 = load atomic i32, i32* %p acquire, align 4
%2 = or i32 %1, 2
store atomic i32 %2, i32* %p release, align 4
ret void
@@ -280,7 +280,7 @@ define void @or_64(i64* %p) {
; X64-NOT: movq
; We do not check X86-32 as it cannot do 'orq'.
; X32-LABEL: or_64
- %1 = load atomic i64* %p acquire, align 8
+ %1 = load atomic i64, i64* %p acquire, align 8
%2 = or i64 %1, 2
store atomic i64 %2, i64* %p release, align 8
ret void
@@ -291,7 +291,7 @@ define void @or_32_seq_cst(i32* %p) {
; X64: xchgl
; X32-LABEL: or_32_seq_cst
; X32: xchgl
- %1 = load atomic i32* %p monotonic, align 4
+ %1 = load atomic i32, i32* %p monotonic, align 4
%2 = or i32 %1, 2
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
@@ -308,7 +308,7 @@ define void @xor_8(i8* %p) {
; X32-NOT: lock
; X32: xorb
; X32-NOT: movb
- %1 = load atomic i8* %p acquire, align 1
+ %1 = load atomic i8, i8* %p acquire, align 1
%2 = xor i8 %1, 2
store atomic i8 %2, i8* %p release, align 1
ret void
@@ -319,7 +319,7 @@ define void @xor_16(i16* %p) {
; X64-NOT: xorw
; X32-LABEL: xor_16
; X32-NOT: xorw
- %1 = load atomic i16* %p acquire, align 2
+ %1 = load atomic i16, i16* %p acquire, align 2
%2 = xor i16 %1, 2
store atomic i16 %2, i16* %p release, align 2
ret void
@@ -334,7 +334,7 @@ define void @xor_32(i32* %p) {
; X32-NOT: lock
; X32: xorl
; X32-NOT: movl
- %1 = load atomic i32* %p acquire, align 4
+ %1 = load atomic i32, i32* %p acquire, align 4
%2 = xor i32 %1, 2
store atomic i32 %2, i32* %p release, align 4
ret void
@@ -347,7 +347,7 @@ define void @xor_64(i64* %p) {
; X64-NOT: movq
; We do not check X86-32 as it cannot do 'xorq'.
; X32-LABEL: xor_64
- %1 = load atomic i64* %p acquire, align 8
+ %1 = load atomic i64, i64* %p acquire, align 8
%2 = xor i64 %1, 2
store atomic i64 %2, i64* %p release, align 8
ret void
@@ -358,7 +358,7 @@ define void @xor_32_seq_cst(i32* %p) {
; X64: xchgl
; X32-LABEL: xor_32_seq_cst
; X32: xchgl
- %1 = load atomic i32* %p monotonic, align 4
+ %1 = load atomic i32, i32* %p monotonic, align 4
%2 = xor i32 %1, 2
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
@@ -378,7 +378,7 @@ define void @inc_8(i8* %p) {
; SLOW_INC-LABEL: inc_8
; SLOW_INC-NOT: incb
; SLOW_INC-NOT: movb
- %1 = load atomic i8* %p seq_cst, align 1
+ %1 = load atomic i8, i8* %p seq_cst, align 1
%2 = add i8 %1, 1
store atomic i8 %2, i8* %p release, align 1
ret void
@@ -393,7 +393,7 @@ define void @inc_16(i16* %p) {
; X32-NOT: incw
; SLOW_INC-LABEL: inc_16
; SLOW_INC-NOT: incw
- %1 = load atomic i16* %p acquire, align 2
+ %1 = load atomic i16, i16* %p acquire, align 2
%2 = add i16 %1, 1
store atomic i16 %2, i16* %p release, align 2
ret void
@@ -411,7 +411,7 @@ define void @inc_32(i32* %p) {
; SLOW_INC-LABEL: inc_32
; SLOW_INC-NOT: incl
; SLOW_INC-NOT: movl
- %1 = load atomic i32* %p acquire, align 4
+ %1 = load atomic i32, i32* %p acquire, align 4
%2 = add i32 %1, 1
store atomic i32 %2, i32* %p monotonic, align 4
ret void
@@ -427,7 +427,7 @@ define void @inc_64(i64* %p) {
; SLOW_INC-LABEL: inc_64
; SLOW_INC-NOT: incq
; SLOW_INC-NOT: movq
- %1 = load atomic i64* %p acquire, align 8
+ %1 = load atomic i64, i64* %p acquire, align 8
%2 = add i64 %1, 1
store atomic i64 %2, i64* %p release, align 8
ret void
@@ -438,7 +438,7 @@ define void @inc_32_seq_cst(i32* %p) {
; X64: xchgl
; X32-LABEL: inc_32_seq_cst
; X32: xchgl
- %1 = load atomic i32* %p monotonic, align 4
+ %1 = load atomic i32, i32* %p monotonic, align 4
%2 = add i32 %1, 1
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
@@ -458,7 +458,7 @@ define void @dec_8(i8* %p) {
; SLOW_INC-LABEL: dec_8
; SLOW_INC-NOT: decb
; SLOW_INC-NOT: movb
- %1 = load atomic i8* %p seq_cst, align 1
+ %1 = load atomic i8, i8* %p seq_cst, align 1
%2 = sub i8 %1, 1
store atomic i8 %2, i8* %p release, align 1
ret void
@@ -473,7 +473,7 @@ define void @dec_16(i16* %p) {
; X32-NOT: decw
; SLOW_INC-LABEL: dec_16
; SLOW_INC-NOT: decw
- %1 = load atomic i16* %p acquire, align 2
+ %1 = load atomic i16, i16* %p acquire, align 2
%2 = sub i16 %1, 1
store atomic i16 %2, i16* %p release, align 2
ret void
@@ -491,7 +491,7 @@ define void @dec_32(i32* %p) {
; SLOW_INC-LABEL: dec_32
; SLOW_INC-NOT: decl
; SLOW_INC-NOT: movl
- %1 = load atomic i32* %p acquire, align 4
+ %1 = load atomic i32, i32* %p acquire, align 4
%2 = sub i32 %1, 1
store atomic i32 %2, i32* %p monotonic, align 4
ret void
@@ -507,7 +507,7 @@ define void @dec_64(i64* %p) {
; SLOW_INC-LABEL: dec_64
; SLOW_INC-NOT: decq
; SLOW_INC-NOT: movq
- %1 = load atomic i64* %p acquire, align 8
+ %1 = load atomic i64, i64* %p acquire, align 8
%2 = sub i64 %1, 1
store atomic i64 %2, i64* %p release, align 8
ret void
@@ -518,7 +518,7 @@ define void @dec_32_seq_cst(i32* %p) {
; X64: xchgl
; X32-LABEL: dec_32_seq_cst
; X32: xchgl
- %1 = load atomic i32* %p monotonic, align 4
+ %1 = load atomic i32, i32* %p monotonic, align 4
%2 = sub i32 %1, 1
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
diff --git a/test/CodeGen/X86/atomic_op.ll b/test/CodeGen/X86/atomic_op.ll
index d0ab28aa61f9..aa895dedfe4c 100644
--- a/test/CodeGen/X86/atomic_op.ll
+++ b/test/CodeGen/X86/atomic_op.ll
@@ -22,7 +22,7 @@ entry:
store i32 3855, i32* %ort
store i32 3855, i32* %xort
store i32 4, i32* %temp
- %tmp = load i32* %temp
+ %tmp = load i32, i32* %temp
; CHECK: lock
; CHECK: xaddl
%0 = atomicrmw add i32* %val1, i32 %tmp monotonic
diff --git a/test/CodeGen/X86/avoid-loop-align-2.ll b/test/CodeGen/X86/avoid-loop-align-2.ll
index fc9d1f0428fb..e02f3569c89d 100644
--- a/test/CodeGen/X86/avoid-loop-align-2.ll
+++ b/test/CodeGen/X86/avoid-loop-align-2.ll
@@ -13,7 +13,7 @@ entry:
bb.nph12: ; preds = %entry
%1 = icmp eq i32 %b, 0 ; <i1> [#uses=1]
- %2 = load i32** @x, align 8 ; <i32*> [#uses=1]
+ %2 = load i32*, i32** @x, align 8 ; <i32*> [#uses=1]
br i1 %1, label %bb2.preheader, label %bb2.preheader.us
bb2.preheader.us: ; preds = %bb2.bb3_crit_edge.us, %bb.nph12
@@ -26,8 +26,8 @@ bb1.us: ; preds = %bb1.us, %bb2.preheader.us
%indvar = phi i32 [ 0, %bb2.preheader.us ], [ %indvar.next, %bb1.us ] ; <i32> [#uses=2]
%tmp17 = add i32 %indvar, %tmp16 ; <i32> [#uses=1]
%tmp. = zext i32 %tmp17 to i64 ; <i64> [#uses=1]
- %3 = getelementptr i32* %2, i64 %tmp. ; <i32*> [#uses=1]
- %4 = load i32* %3, align 4 ; <i32> [#uses=2]
+ %3 = getelementptr i32, i32* %2, i64 %tmp. ; <i32*> [#uses=1]
+ %4 = load i32, i32* %3, align 4 ; <i32> [#uses=2]
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next, %b ; <i1> [#uses=1]
br i1 %exitcond, label %bb2.bb3_crit_edge.us, label %bb1.us
diff --git a/test/CodeGen/X86/avoid-loop-align.ll b/test/CodeGen/X86/avoid-loop-align.ll
index 7957db72fe6d..d82cf9418e64 100644
--- a/test/CodeGen/X86/avoid-loop-align.ll
+++ b/test/CodeGen/X86/avoid-loop-align.ll
@@ -11,7 +11,7 @@
define i8* @test(i8* %Q, i32* %L) nounwind {
entry:
- %tmp = tail call i32 (...)* @foo() nounwind ; <i32> [#uses=2]
+ %tmp = tail call i32 (...) @foo() nounwind ; <i32> [#uses=2]
%tmp1 = inttoptr i32 %tmp to i8* ; <i8*> [#uses=1]
br label %bb1
@@ -21,8 +21,8 @@ bb: ; preds = %bb1, %bb1
bb1: ; preds = %bb, %entry
%P.0.rec = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
- %P.0 = getelementptr i8* %tmp1, i32 %P.0.rec ; <i8*> [#uses=3]
- %tmp2 = load i8* %P.0, align 1 ; <i8> [#uses=1]
+ %P.0 = getelementptr i8, i8* %tmp1, i32 %P.0.rec ; <i8*> [#uses=3]
+ %tmp2 = load i8, i8* %P.0, align 1 ; <i8> [#uses=1]
switch i8 %tmp2, label %bb4 [
i8 12, label %bb
i8 42, label %bb
@@ -31,7 +31,7 @@ bb1: ; preds = %bb, %entry
bb4: ; preds = %bb1
%tmp3 = ptrtoint i8* %P.0 to i32 ; <i32> [#uses=1]
%tmp4 = sub i32 %tmp3, %tmp ; <i32> [#uses=1]
- %tmp5 = getelementptr [100 x i32]* @A, i32 0, i32 %tmp4 ; <i32*> [#uses=1]
+ %tmp5 = getelementptr [100 x i32], [100 x i32]* @A, i32 0, i32 %tmp4 ; <i32*> [#uses=1]
store i32 4, i32* %tmp5, align 4
ret i8* %P.0
}
diff --git a/test/CodeGen/X86/avoid_complex_am.ll b/test/CodeGen/X86/avoid_complex_am.ll
index e5e7bd23a641..fafa236b8dd4 100644
--- a/test/CodeGen/X86/avoid_complex_am.ll
+++ b/test/CodeGen/X86/avoid_complex_am.ll
@@ -19,15 +19,15 @@ for.body: ; preds = %for.body, %entry
; CHECK-NOT: phi
%indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
%tmp = add nsw i64 %indvars.iv, -1
- %arrayidx = getelementptr inbounds double* %b, i64 %tmp
- %tmp1 = load double* %arrayidx, align 8
+ %arrayidx = getelementptr inbounds double, double* %b, i64 %tmp
+ %tmp1 = load double, double* %arrayidx, align 8
; The induction variable should carry the scaling factor: 1.
-; CHECK: [[IVNEXT]] = add nuw i64 [[IV]], 1
+; CHECK: [[IVNEXT]] = add nuw nsw i64 [[IV]], 1
%indvars.iv.next = add i64 %indvars.iv, 1
- %arrayidx2 = getelementptr inbounds double* %c, i64 %indvars.iv.next
- %tmp2 = load double* %arrayidx2, align 8
+ %arrayidx2 = getelementptr inbounds double, double* %c, i64 %indvars.iv.next
+ %tmp2 = load double, double* %arrayidx2, align 8
%mul = fmul double %tmp1, %tmp2
- %arrayidx4 = getelementptr inbounds double* %a, i64 %indvars.iv
+ %arrayidx4 = getelementptr inbounds double, double* %a, i64 %indvars.iv
store double %mul, double* %arrayidx4, align 8
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
; Comparison should be 19 * 1 = 19.
diff --git a/test/CodeGen/X86/avx-arith.ll b/test/CodeGen/X86/avx-arith.ll
index a9da1ec067ca..792a9987e107 100644
--- a/test/CodeGen/X86/avx-arith.ll
+++ b/test/CodeGen/X86/avx-arith.ll
@@ -38,7 +38,7 @@ entry:
; CHECK: vsubpd (%
define <4 x double> @subpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp {
entry:
- %tmp2 = load <4 x double>* %x, align 32
+ %tmp2 = load <4 x double>, <4 x double>* %x, align 32
%sub.i = fsub <4 x double> %y, %tmp2
ret <4 x double> %sub.i
}
@@ -53,7 +53,7 @@ entry:
; CHECK: vsubps (%
define <8 x float> @subps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp {
entry:
- %tmp2 = load <8 x float>* %x, align 32
+ %tmp2 = load <8 x float>, <8 x float>* %x, align 32
%sub.i = fsub <8 x float> %y, %tmp2
ret <8 x float> %sub.i
}
@@ -264,7 +264,7 @@ declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
define <4 x float> @int_sqrt_ss() {
; CHECK: int_sqrt_ss
; CHECK: vsqrtss
- %x0 = load float addrspace(1)* undef, align 8
+ %x0 = load float, float addrspace(1)* undef, align 8
%x1 = insertelement <4 x float> undef, float %x0, i32 0
%x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind
ret <4 x float> %x2
diff --git a/test/CodeGen/X86/avx-basic.ll b/test/CodeGen/X86/avx-basic.ll
index 02ea173c8032..6857bb8bd112 100644
--- a/test/CodeGen/X86/avx-basic.ll
+++ b/test/CodeGen/X86/avx-basic.ll
@@ -57,10 +57,10 @@ entry:
define <8 x i32> @VMOVZQI2PQI([0 x float]* nocapture %aFOO) nounwind {
allocas:
%ptrcast.i33.i = bitcast [0 x float]* %aFOO to i32*
- %val.i34.i = load i32* %ptrcast.i33.i, align 4
- %ptroffset.i22.i992 = getelementptr [0 x float]* %aFOO, i64 0, i64 1
+ %val.i34.i = load i32, i32* %ptrcast.i33.i, align 4
+ %ptroffset.i22.i992 = getelementptr [0 x float], [0 x float]* %aFOO, i64 0, i64 1
%ptrcast.i23.i = bitcast float* %ptroffset.i22.i992 to i32*
- %val.i24.i = load i32* %ptrcast.i23.i, align 4
+ %val.i24.i = load i32, i32* %ptrcast.i23.i, align 4
%updatedret.i30.i = insertelement <8 x i32> undef, i32 %val.i34.i, i32 1
ret <8 x i32> %updatedret.i30.i
}
@@ -91,3 +91,12 @@ entry:
%vecext.i = extractelement <2 x i64> %a, i32 0
ret i64 %vecext.i
}
+
+; PR22685
+; CHECK: mov00
+; CHECK: vmovss
+define <8 x float> @mov00_8f32(float* %ptr) {
+ %val = load float, float* %ptr
+ %vec = insertelement <8 x float> zeroinitializer, float %val, i32 0
+ ret <8 x float> %vec
+}
diff --git a/test/CodeGen/X86/avx-bitcast.ll b/test/CodeGen/X86/avx-bitcast.ll
index c9d828c1f6e2..e34c20fcbd73 100644
--- a/test/CodeGen/X86/avx-bitcast.ll
+++ b/test/CodeGen/X86/avx-bitcast.ll
@@ -1,9 +1,12 @@
-; RUN: llc < %s -O0 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
-; CHECK: vmovsd (%
-; CHECK-NEXT: vmovq %xmm
define i64 @bitcasti64tof64() {
- %a = load double* undef
+; CHECK-LABEL: bitcasti64tof64:
+; CHECK: # BB#0:
+; CHECK: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: vmovq %xmm0, %rax
+; CHECK-NEXT: retq
+ %a = load double, double* undef
%b = bitcast double %a to i64
ret i64 %b
}
diff --git a/test/CodeGen/X86/avx-cast.ll b/test/CodeGen/X86/avx-cast.ll
index 32d450cac9f9..b4798f159455 100644
--- a/test/CodeGen/X86/avx-cast.ll
+++ b/test/CodeGen/X86/avx-cast.ll
@@ -1,45 +1,100 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
+
+; Prefer a blend instruction to a vinsert128 instruction because blends
+; are simpler (no lane changes) and therefore will have equal or better
+; performance.
-; CHECK: vxorps
-; CHECK-NEXT: vinsertf128 $0
define <8 x float> @castA(<4 x float> %m) nounwind uwtable readnone ssp {
+; AVX1-LABEL: castA:
+; AVX1: vxorps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: castA:
+; AVX2: vxorps %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX2-NEXT: retq
+
entry:
%shuffle.i = shufflevector <4 x float> %m, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
ret <8 x float> %shuffle.i
}
-; CHECK: vxorps
-; CHECK-NEXT: vinsertf128 $0
define <4 x double> @castB(<2 x double> %m) nounwind uwtable readnone ssp {
+; AVX1-LABEL: castB:
+; AVX1: vxorpd %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: castB:
+; AVX2: vxorpd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; AVX2-NEXT: retq
+
entry:
%shuffle.i = shufflevector <2 x double> %m, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
ret <4 x double> %shuffle.i
}
-; CHECK: vxorps
-; CHECK-NEXT: vinsertf128 $0
+; AVX2 is needed for integer types.
+
define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp {
+; AVX1-LABEL: castC:
+; AVX1: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: castC:
+; AVX2: vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX2-NEXT: retq
+
entry:
%shuffle.i = shufflevector <2 x i64> %m, <2 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
ret <4 x i64> %shuffle.i
}
-; CHECK-NOT: vextractf128 $0
+; The next three tests don't need any shuffling. There may or may not be a
+; vzeroupper before the return, so just check for the absence of shuffles.
+
define <4 x float> @castD(<8 x float> %m) nounwind uwtable readnone ssp {
+; AVX1-LABEL: castD:
+; AVX1-NOT: extract
+; AVX1-NOT: blend
+;
+; AVX2-LABEL: castD:
+; AVX2-NOT: extract
+; AVX2-NOT: blend
+
entry:
%shuffle.i = shufflevector <8 x float> %m, <8 x float> %m, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x float> %shuffle.i
}
-; CHECK-NOT: vextractf128 $0
define <2 x i64> @castE(<4 x i64> %m) nounwind uwtable readnone ssp {
+; AVX1-LABEL: castE:
+; AVX1-NOT: extract
+; AVX1-NOT: blend
+;
+; AVX2-LABEL: castE:
+; AVX2-NOT: extract
+; AVX2-NOT: blend
+
entry:
%shuffle.i = shufflevector <4 x i64> %m, <4 x i64> %m, <2 x i32> <i32 0, i32 1>
ret <2 x i64> %shuffle.i
}
-; CHECK-NOT: vextractf128 $0
define <2 x double> @castF(<4 x double> %m) nounwind uwtable readnone ssp {
+; AVX1-LABEL: castF:
+; AVX1-NOT: extract
+; AVX1-NOT: blend
+;
+; AVX2-LABEL: castF:
+; AVX2-NOT: extract
+; AVX2-NOT: blend
+
entry:
%shuffle.i = shufflevector <4 x double> %m, <4 x double> %m, <2 x i32> <i32 0, i32 1>
ret <2 x double> %shuffle.i
diff --git a/test/CodeGen/X86/avx-cvt-2.ll b/test/CodeGen/X86/avx-cvt-2.ll
index 8cc7190fcc69..583c7d5947bf 100644
--- a/test/CodeGen/X86/avx-cvt-2.ll
+++ b/test/CodeGen/X86/avx-cvt-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s
; Check that we generate vector conversion from float to narrower int types
@@ -8,8 +8,16 @@
define void @fptoui16(%f32vec_t %a, %i16vec_t *%p) {
; CHECK-LABEL: fptoui16:
-; CHECK: vcvttps2dq %ymm
-; CHECK-NOT: vcvttss2si
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: vmovdqa %xmm0, (%rdi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
%b = fptoui %f32vec_t %a to %i16vec_t
store %i16vec_t %b, %i16vec_t * %p
ret void
@@ -17,8 +25,16 @@ define void @fptoui16(%f32vec_t %a, %i16vec_t *%p) {
define void @fptosi16(%f32vec_t %a, %i16vec_t *%p) {
; CHECK-LABEL: fptosi16:
-; CHECK: vcvttps2dq %ymm
-; CHECK-NOT: vcvttss2si
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: vmovdqa %xmm0, (%rdi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
%b = fptosi %f32vec_t %a to %i16vec_t
store %i16vec_t %b, %i16vec_t * %p
ret void
@@ -26,8 +42,17 @@ define void @fptosi16(%f32vec_t %a, %i16vec_t *%p) {
define void @fptoui8(%f32vec_t %a, %i8vec_t *%p) {
; CHECK-LABEL: fptoui8:
-; CHECK: vcvttps2dq %ymm
-; CHECK-NOT: vcvttss2si
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; CHECK-NEXT: vmovq %xmm0, (%rdi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
%b = fptoui %f32vec_t %a to %i8vec_t
store %i8vec_t %b, %i8vec_t * %p
ret void
@@ -35,8 +60,17 @@ define void @fptoui8(%f32vec_t %a, %i8vec_t *%p) {
define void @fptosi8(%f32vec_t %a, %i8vec_t *%p) {
; CHECK-LABEL: fptosi8:
-; CHECK: vcvttps2dq %ymm
-; CHECK-NOT: vcvttss2si
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; CHECK-NEXT: vmovq %xmm0, (%rdi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
%b = fptosi %f32vec_t %a to %i8vec_t
store %i8vec_t %b, %i8vec_t * %p
ret void
diff --git a/test/CodeGen/X86/avx-cvt.ll b/test/CodeGen/X86/avx-cvt.ll
index 22fad7ce4b7d..6df3e5324c11 100644
--- a/test/CodeGen/X86/avx-cvt.ll
+++ b/test/CodeGen/X86/avx-cvt.ll
@@ -1,89 +1,148 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
-; CHECK: vcvtdq2ps %ymm
define <8 x float> @sitofp00(<8 x i32> %a) nounwind {
+; CHECK-LABEL: sitofp00:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
+; CHECK-NEXT: retq
%b = sitofp <8 x i32> %a to <8 x float>
ret <8 x float> %b
}
-; CHECK: vcvttps2dq %ymm
define <8 x i32> @fptosi00(<8 x float> %a) nounwind {
+; CHECK-LABEL: fptosi00:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
+; CHECK-NEXT: retq
%b = fptosi <8 x float> %a to <8 x i32>
ret <8 x i32> %b
}
-; CHECK: vcvtdq2pd %xmm
define <4 x double> @sitofp01(<4 x i32> %a) {
+; CHECK-LABEL: sitofp01:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
+; CHECK-NEXT: retq
%b = sitofp <4 x i32> %a to <4 x double>
ret <4 x double> %b
}
-; CHECK: vcvtdq2ps %ymm
define <8 x float> @sitofp02(<8 x i16> %a) {
+; CHECK-LABEL: sitofp02:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovsxwd %xmm0, %xmm1
+; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
+; CHECK-NEXT: retq
%b = sitofp <8 x i16> %a to <8 x float>
ret <8 x float> %b
}
-; CHECK: vcvttpd2dqy %ymm
define <4 x i32> @fptosi01(<4 x double> %a) {
+; CHECK-LABEL: fptosi01:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvttpd2dqy %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
%b = fptosi <4 x double> %a to <4 x i32>
ret <4 x i32> %b
}
-; CHECK: vcvtpd2psy %ymm
-; CHECK-NEXT: vcvtpd2psy %ymm
-; CHECK-NEXT: vinsertf128 $1
define <8 x float> @fptrunc00(<8 x double> %b) nounwind {
+; CHECK-LABEL: fptrunc00:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtpd2psy %ymm0, %xmm0
+; CHECK-NEXT: vcvtpd2psy %ymm1, %xmm1
+; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
%a = fptrunc <8 x double> %b to <8 x float>
ret <8 x float> %a
}
-; CHECK: vcvtps2pd %xmm
define <4 x double> @fpext00(<4 x float> %b) nounwind {
+; CHECK-LABEL: fpext00:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0
+; CHECK-NEXT: retq
%a = fpext <4 x float> %b to <4 x double>
ret <4 x double> %a
}
-; CHECK: vcvtsi2sdq (%
define double @funcA(i64* nocapture %e) nounwind uwtable readonly ssp {
-entry:
- %tmp1 = load i64* %e, align 8
+; CHECK-LABEL: funcA:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %tmp1 = load i64, i64* %e, align 8
%conv = sitofp i64 %tmp1 to double
ret double %conv
}
-; CHECK: vcvtsi2sdl (%
define double @funcB(i32* nocapture %e) nounwind uwtable readonly ssp {
-entry:
- %tmp1 = load i32* %e, align 4
+; CHECK-LABEL: funcB:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %tmp1 = load i32, i32* %e, align 4
%conv = sitofp i32 %tmp1 to double
ret double %conv
}
-; CHECK: vcvtsi2ssl (%
define float @funcC(i32* nocapture %e) nounwind uwtable readonly ssp {
-entry:
- %tmp1 = load i32* %e, align 4
+; CHECK-LABEL: funcC:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %tmp1 = load i32, i32* %e, align 4
%conv = sitofp i32 %tmp1 to float
ret float %conv
}
-; CHECK: vcvtsi2ssq (%
define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp {
-entry:
- %tmp1 = load i64* %e, align 8
+; CHECK-LABEL: funcD:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %tmp1 = load i64, i64* %e, align 8
%conv = sitofp i64 %tmp1 to float
ret float %conv
}
-; CHECK: vcvtss2sd
define void @fpext() nounwind uwtable {
-entry:
+; CHECK-LABEL: fpext:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: retq
%f = alloca float, align 4
%d = alloca double, align 8
- %tmp = load float* %f, align 4
+ %tmp = load float, float* %f, align 4
%conv = fpext float %tmp to double
store double %conv, double* %d, align 8
ret void
}
+define double @nearbyint_f64(double %a) {
+; CHECK-LABEL: nearbyint_f64:
+; CHECK: # BB#0:
+; CHECK-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call double @llvm.nearbyint.f64(double %a)
+ ret double %res
+}
+declare double @llvm.nearbyint.f64(double %p)
+
+define float @floor_f32(float %a) {
+; CHECK-LABEL: floor_f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vroundss $1, %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call float @llvm.floor.f32(float %a)
+ ret float %res
+}
+declare float @llvm.floor.f32(float %p)
+
+
diff --git a/test/CodeGen/X86/avx-insertelt.ll b/test/CodeGen/X86/avx-insertelt.ll
new file mode 100644
index 000000000000..c159d689451b
--- /dev/null
+++ b/test/CodeGen/X86/avx-insertelt.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
+
+define <8 x float> @insert_f32(<8 x float> %y, float %f, <8 x float> %x) {
+; ALL-LABEL: insert_f32:
+; ALL: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7]
+; ALL-NEXT: retq
+ %i0 = insertelement <8 x float> %y, float %f, i32 0
+ ret <8 x float> %i0
+}
+
+define <4 x double> @insert_f64(<4 x double> %y, double %f, <4 x double> %x) {
+; ALL-LABEL: insert_f64:
+; ALL: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
+; ALL-NEXT: retq
+ %i0 = insertelement <4 x double> %y, double %f, i32 0
+ ret <4 x double> %i0
+}
+
+define <32 x i8> @insert_i8(<32 x i8> %y, i8 %f, <32 x i8> %x) {
+; AVX-LABEL: insert_i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpinsrb $0, %edi, %xmm0, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: insert_i8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpinsrb $0, %edi, %xmm0, %xmm1
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; AVX2-NEXT: retq
+ %i0 = insertelement <32 x i8> %y, i8 %f, i32 0
+ ret <32 x i8> %i0
+}
+
+define <16 x i16> @insert_i16(<16 x i16> %y, i16 %f, <16 x i16> %x) {
+; AVX-LABEL: insert_i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpinsrw $0, %edi, %xmm0, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: insert_i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpinsrw $0, %edi, %xmm0, %xmm1
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; AVX2-NEXT: retq
+ %i0 = insertelement <16 x i16> %y, i16 %f, i32 0
+ ret <16 x i16> %i0
+}
+
+define <8 x i32> @insert_i32(<8 x i32> %y, i32 %f, <8 x i32> %x) {
+; AVX-LABEL: insert_i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpinsrd $0, %edi, %xmm0, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: insert_i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovd %edi, %xmm1
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7]
+; AVX2-NEXT: retq
+ %i0 = insertelement <8 x i32> %y, i32 %f, i32 0
+ ret <8 x i32> %i0
+}
+
+define <4 x i64> @insert_i64(<4 x i64> %y, i64 %f, <4 x i64> %x) {
+; AVX-LABEL: insert_i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: insert_i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm1
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; AVX2-NEXT: retq
+ %i0 = insertelement <4 x i64> %y, i64 %f, i32 0
+ ret <4 x i64> %i0
+}
+
diff --git a/test/CodeGen/X86/avx-intel-ocl.ll b/test/CodeGen/X86/avx-intel-ocl.ll
index 70ec1248cdd7..3923ca850d1a 100644
--- a/test/CodeGen/X86/avx-intel-ocl.ll
+++ b/test/CodeGen/X86/avx-intel-ocl.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=X32 %s
-; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=X32 %s
-; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=WIN64 %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=X64 %s
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx | FileCheck -check-prefix=WIN64 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck -check-prefix=X64 %s
declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)
declare <16 x float> @func_float16(<16 x float>, <16 x float>)
@@ -33,7 +33,7 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
%y = alloca <16 x float>, align 16
%x = fadd <16 x float> %a, %b
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
- %2 = load <16 x float>* %y, align 16
+ %2 = load <16 x float>, <16 x float>* %y, align 16
%3 = fadd <16 x float> %2, %1
ret <16 x float> %3
}
@@ -58,7 +58,7 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
%y = alloca <16 x float>, align 16
%x = fadd <16 x float> %a, %b
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
- %2 = load <16 x float>* %y, align 16
+ %2 = load <16 x float>, <16 x float>* %y, align 16
%3 = fadd <16 x float> %1, %b
%4 = fadd <16 x float> %2, %3
ret <16 x float> %4
diff --git a/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
index d2b44cd64efb..e2f690bff232 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
@@ -1,7 +1,81 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s
+; We don't check any vinsertf128 variant with immediate 0 because that's just a blend.
+
+define <4 x double> @test_x86_avx_vinsertf128_pd_256_1(<4 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_x86_avx_vinsertf128_pd_256_1:
+; CHECK: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+ %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 1)
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
+
+define <8 x float> @test_x86_avx_vinsertf128_ps_256_1(<8 x float> %a0, <4 x float> %a1) {
+; CHECK-LABEL: test_x86_avx_vinsertf128_ps_256_1:
+; CHECK: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+ %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 1)
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
+
+define <8 x i32> @test_x86_avx_vinsertf128_si_256_1(<8 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: test_x86_avx_vinsertf128_si_256_1:
+; CHECK: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+ %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 1)
+ ret <8 x i32> %res
+}
+
+; Verify that high bits of the immediate are masked off. This should be the equivalent
+; of a vinsertf128 $0 which should be optimized into a blend, so just check that it's
+; not a vinsertf128 $1.
+define <8 x i32> @test_x86_avx_vinsertf128_si_256_2(<8 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: test_x86_avx_vinsertf128_si_256_2:
+; CHECK-NOT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+ %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 2)
+ ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
+
+; We don't check any vextractf128 variant with immediate 0 because that's just a move.
+
+define <2 x double> @test_x86_avx_vextractf128_pd_256_1(<4 x double> %a0) {
+; CHECK-LABEL: test_x86_avx_vextractf128_pd_256_1:
+; CHECK: vextractf128 $1, %ymm0, %xmm0
+ %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 1)
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
+
+define <4 x float> @test_x86_avx_vextractf128_ps_256_1(<8 x float> %a0) {
+; CHECK-LABEL: test_x86_avx_vextractf128_ps_256_1:
+; CHECK: vextractf128 $1, %ymm0, %xmm0
+ %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 1)
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
+
+define <4 x i32> @test_x86_avx_vextractf128_si_256_1(<8 x i32> %a0) {
+; CHECK-LABEL: test_x86_avx_vextractf128_si_256_1:
+; CHECK: vextractf128 $1, %ymm0, %xmm0
+ %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 1)
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
+
+; Verify that high bits of the immediate are masked off. This should be the equivalent
+; of a vextractf128 $0 which should be optimized away, so just check that it's
+; not a vextractf128 of any kind.
+define <2 x double> @test_x86_avx_extractf128_pd_256_2(<4 x double> %a0) {
+; CHECK-LABEL: test_x86_avx_extractf128_pd_256_2:
+; CHECK-NOT: vextractf128
+ %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 2)
+ ret <2 x double> %res
+}
+
+
define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
- ; CHECK: vblendpd
+; CHECK-LABEL: test_x86_avx_blend_pd_256:
+; CHECK: vblendpd
%res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
@@ -9,7 +83,8 @@ declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32)
define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
- ; CHECK: vblendps
+; CHECK-LABEL: test_x86_avx_blend_ps_256:
+; CHECK: vblendps
%res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
@@ -17,10 +92,54 @@ declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) no
define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
- ; CHECK: vdpps
+; CHECK-LABEL: test_x86_avx_dp_ps_256:
+; CHECK: vdpps
%res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
+; CHECK-LABEL: test_x86_sse2_psll_dq:
+; CHECK: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+ %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
+; CHECK-LABEL: test_x86_sse2_psrl_dq:
+; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
+ %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_x86_sse41_blendpd:
+; CHECK: vblendpd
+ %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 2) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
+; CHECK-LABEL: test_x86_sse41_blendps:
+; CHECK: vblendps
+ %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK-LABEL: test_x86_sse41_pblendw:
+; CHECK: vpblendw
+ %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index bb9354cff038..28a0272ecf02 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx,aes,pclmul | FileCheck %s
define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vaesdec
+; CHECK-LABEL: test_x86_aesni_aesdec:
+; CHECK: # BB#0:
+; CHECK-NEXT: vaesdec %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
@@ -9,7 +12,10 @@ declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vaesdeclast
+; CHECK-LABEL: test_x86_aesni_aesdeclast:
+; CHECK: # BB#0:
+; CHECK-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
@@ -17,7 +23,10 @@ declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind read
define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vaesenc
+; CHECK-LABEL: test_x86_aesni_aesenc:
+; CHECK: # BB#0:
+; CHECK-NEXT: vaesenc %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
@@ -25,7 +34,10 @@ declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vaesenclast
+; CHECK-LABEL: test_x86_aesni_aesenclast:
+; CHECK: # BB#0:
+; CHECK-NEXT: vaesenclast %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
@@ -33,7 +45,10 @@ declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind read
define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) {
- ; CHECK: vaesimc
+; CHECK-LABEL: test_x86_aesni_aesimc:
+; CHECK: # BB#0:
+; CHECK-NEXT: vaesimc %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
@@ -41,7 +56,10 @@ declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
- ; CHECK: vaeskeygenassist
+; CHECK-LABEL: test_x86_aesni_aeskeygenassist:
+; CHECK: # BB#0:
+; CHECK-NEXT: vaeskeygenassist $7, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
@@ -49,7 +67,10 @@ declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readno
define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vaddsd
+; CHECK-LABEL: test_x86_sse2_add_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -57,7 +78,10 @@ declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind
define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vcmpordpd
+; CHECK-LABEL: test_x86_sse2_cmp_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcmpordpd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -65,7 +89,10 @@ declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounw
define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vcmpordsd
+; CHECK-LABEL: test_x86_sse2_cmp_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcmpordsd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -73,9 +100,12 @@ declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounw
define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vcomisd
- ; CHECK: sete
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse2_comieq_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcomisd %xmm1, %xmm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -83,9 +113,12 @@ declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readno
define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vcomisd
- ; CHECK: setae
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse2_comige_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcomisd %xmm1, %xmm0
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -93,9 +126,12 @@ declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readno
define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vcomisd
- ; CHECK: seta
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse2_comigt_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcomisd %xmm1, %xmm0
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -103,9 +139,12 @@ declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readno
define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vcomisd
- ; CHECK: setbe
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse2_comile_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcomisd %xmm1, %xmm0
+; CHECK-NEXT: setbe %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -113,9 +152,12 @@ declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readno
define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vcomisd
- ; CHECK: sbbl %eax, %eax
- ; CHECK: andl $1, %eax
+; CHECK-LABEL: test_x86_sse2_comilt_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcomisd %xmm1, %xmm0
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -123,9 +165,12 @@ declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readno
define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vcomisd
- ; CHECK: setne
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse2_comineq_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcomisd %xmm1, %xmm0
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -133,7 +178,10 @@ declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readn
define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
- ; CHECK: vcvtdq2pd
+; CHECK-LABEL: test_x86_sse2_cvtdq2pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtdq2pd %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -141,7 +189,10 @@ declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
- ; CHECK: vcvtdq2ps
+; CHECK-LABEL: test_x86_sse2_cvtdq2ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -149,7 +200,10 @@ declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
- ; CHECK: vcvtpd2dq
+; CHECK-LABEL: test_x86_sse2_cvtpd2dq:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -157,7 +211,10 @@ declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
- ; CHECK: vcvtpd2ps
+; CHECK-LABEL: test_x86_sse2_cvtpd2ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtpd2ps %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -165,7 +222,10 @@ declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
- ; CHECK: vcvtps2dq
+; CHECK-LABEL: test_x86_sse2_cvtps2dq:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtps2dq %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -173,7 +233,10 @@ declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
- ; CHECK: vcvtps2pd
+; CHECK-LABEL: test_x86_sse2_cvtps2pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtps2pd %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -181,7 +244,10 @@ declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
- ; CHECK: vcvtsd2si
+; CHECK-LABEL: test_x86_sse2_cvtsd2si:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtsd2si %xmm0, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
ret i32 %res
}
@@ -189,7 +255,10 @@ declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
- ; CHECK: vcvtsd2ss
+; CHECK-LABEL: test_x86_sse2_cvtsd2ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -197,8 +266,11 @@ declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind
define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
- ; CHECK: movl
- ; CHECK: vcvtsi2sd
+; CHECK-LABEL: test_x86_sse2_cvtsi2sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $7, %eax
+; CHECK-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -206,7 +278,10 @@ declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnon
define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
- ; CHECK: vcvtss2sd
+; CHECK-LABEL: test_x86_sse2_cvtss2sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -214,7 +289,10 @@ declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind
define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
- ; CHECK: vcvttpd2dq
+; CHECK-LABEL: test_x86_sse2_cvttpd2dq:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvttpd2dq %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -222,7 +300,10 @@ declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
- ; CHECK: vcvttps2dq
+; CHECK-LABEL: test_x86_sse2_cvttps2dq:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -230,7 +311,10 @@ declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
- ; CHECK: vcvttsd2si
+; CHECK-LABEL: test_x86_sse2_cvttsd2si:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvttsd2si %xmm0, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
ret i32 %res
}
@@ -238,7 +322,10 @@ declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vdivsd
+; CHECK-LABEL: test_x86_sse2_div_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vdivsd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -247,7 +334,10 @@ declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind
define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vmaxpd
+; CHECK-LABEL: test_x86_sse2_max_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -255,7 +345,10 @@ declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind
define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vmaxsd
+; CHECK-LABEL: test_x86_sse2_max_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -263,7 +356,10 @@ declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind
define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vminpd
+; CHECK-LABEL: test_x86_sse2_min_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vminpd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -271,7 +367,10 @@ declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind
define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vminsd
+; CHECK-LABEL: test_x86_sse2_min_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vminsd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -279,7 +378,10 @@ declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind
define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
- ; CHECK: vmovmskpd
+; CHECK-LABEL: test_x86_sse2_movmsk_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovmskpd %xmm0, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
ret i32 %res
}
@@ -289,8 +391,10 @@ declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: test_x86_sse2_mul_sd
- ; CHECK: vmulsd
+; CHECK-LABEL: test_x86_sse2_mul_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmulsd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -298,7 +402,10 @@ declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind
define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpackssdw
+; CHECK-LABEL: test_x86_sse2_packssdw_128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -306,7 +413,10 @@ declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind rea
define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpacksswb
+; CHECK-LABEL: test_x86_sse2_packsswb_128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
@@ -314,7 +424,10 @@ declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind rea
define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpackuswb
+; CHECK-LABEL: test_x86_sse2_packuswb_128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
@@ -322,7 +435,10 @@ declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind rea
define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpaddsb
+; CHECK-LABEL: test_x86_sse2_padds_b:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpaddsb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
@@ -330,7 +446,10 @@ declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpaddsw
+; CHECK-LABEL: test_x86_sse2_padds_w:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpaddsw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -338,7 +457,10 @@ declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpaddusb
+; CHECK-LABEL: test_x86_sse2_paddus_b:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpaddusb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
@@ -346,7 +468,10 @@ declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnon
define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpaddusw
+; CHECK-LABEL: test_x86_sse2_paddus_w:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpaddusw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -354,7 +479,10 @@ declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnon
define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpavgb
+; CHECK-LABEL: test_x86_sse2_pavg_b:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpavgb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
@@ -362,7 +490,10 @@ declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpavgw
+; CHECK-LABEL: test_x86_sse2_pavg_w:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpavgw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -370,7 +501,10 @@ declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpmaddwd
+; CHECK-LABEL: test_x86_sse2_pmadd_wd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -378,7 +512,10 @@ declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnon
define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpmaxsw
+; CHECK-LABEL: test_x86_sse2_pmaxs_w:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -386,7 +523,10 @@ declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpmaxub
+; CHECK-LABEL: test_x86_sse2_pmaxu_b:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmaxub %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
@@ -394,7 +534,10 @@ declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpminsw
+; CHECK-LABEL: test_x86_sse2_pmins_w:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpminsw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -402,7 +545,10 @@ declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpminub
+; CHECK-LABEL: test_x86_sse2_pminu_b:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpminub %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
@@ -410,7 +556,10 @@ declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
- ; CHECK: vpmovmskb
+; CHECK-LABEL: test_x86_sse2_pmovmskb_128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovmskb %xmm0, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
ret i32 %res
}
@@ -418,7 +567,10 @@ declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpmulhw
+; CHECK-LABEL: test_x86_sse2_pmulh_w:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmulhw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -426,7 +578,10 @@ declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpmulhuw
+; CHECK-LABEL: test_x86_sse2_pmulhu_w:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -434,7 +589,10 @@ declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnon
define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpmuludq
+; CHECK-LABEL: test_x86_sse2_pmulu_dq:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
@@ -442,7 +600,10 @@ declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnon
define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpsadbw
+; CHECK-LABEL: test_x86_sse2_psad_bw:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
@@ -450,31 +611,21 @@ declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpslld
+; CHECK-LABEL: test_x86_sse2_psll_d:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
-define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
- ; CHECK: vpslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
- %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
- ; CHECK: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
- %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
-
-
define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vpsllq
+; CHECK-LABEL: test_x86_sse2_psll_q:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
@@ -482,7 +633,10 @@ declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpsllw
+; CHECK-LABEL: test_x86_sse2_psll_w:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -490,7 +644,10 @@ declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
- ; CHECK: vpslld
+; CHECK-LABEL: test_x86_sse2_pslli_d:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpslld $7, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -498,7 +655,10 @@ declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
- ; CHECK: vpsllq
+; CHECK-LABEL: test_x86_sse2_pslli_q:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsllq $7, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
@@ -506,7 +666,10 @@ declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
- ; CHECK: vpsllw
+; CHECK-LABEL: test_x86_sse2_pslli_w:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -514,7 +677,10 @@ declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpsrad
+; CHECK-LABEL: test_x86_sse2_psra_d:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -522,7 +688,10 @@ declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpsraw
+; CHECK-LABEL: test_x86_sse2_psra_w:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -530,7 +699,10 @@ declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
- ; CHECK: vpsrad
+; CHECK-LABEL: test_x86_sse2_psrai_d:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsrad $7, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -538,7 +710,10 @@ declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
- ; CHECK: vpsraw
+; CHECK-LABEL: test_x86_sse2_psrai_w:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsraw $7, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -546,31 +721,21 @@ declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpsrld
+; CHECK-LABEL: test_x86_sse2_psrl_d:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
-define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
- ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
- %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
- ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
- %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
-
-
define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vpsrlq
+; CHECK-LABEL: test_x86_sse2_psrl_q:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
@@ -578,7 +743,10 @@ declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpsrlw
+; CHECK-LABEL: test_x86_sse2_psrl_w:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -586,7 +754,10 @@ declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
- ; CHECK: vpsrld
+; CHECK-LABEL: test_x86_sse2_psrli_d:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsrld $7, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -594,7 +765,10 @@ declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
- ; CHECK: vpsrlq
+; CHECK-LABEL: test_x86_sse2_psrli_q:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsrlq $7, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
@@ -602,7 +776,10 @@ declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
- ; CHECK: vpsrlw
+; CHECK-LABEL: test_x86_sse2_psrli_w:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsrlw $7, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -610,7 +787,10 @@ declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpsubsb
+; CHECK-LABEL: test_x86_sse2_psubs_b:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsubsb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
@@ -618,7 +798,10 @@ declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpsubsw
+; CHECK-LABEL: test_x86_sse2_psubs_w:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsubsw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -626,7 +809,10 @@ declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpsubusb
+; CHECK-LABEL: test_x86_sse2_psubus_b:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
@@ -634,7 +820,10 @@ declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnon
define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpsubusw
+; CHECK-LABEL: test_x86_sse2_psubus_w:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -642,7 +831,10 @@ declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnon
define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
- ; CHECK: vsqrtpd
+; CHECK-LABEL: test_x86_sse2_sqrt_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vsqrtpd %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -650,7 +842,10 @@ declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
- ; CHECK: vsqrtsd
+; CHECK-LABEL: test_x86_sse2_sqrt_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -658,9 +853,11 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
- ; CHECK: test_x86_sse2_storel_dq
- ; CHECK: movl
- ; CHECK: vmovq
+; CHECK-LABEL: test_x86_sse2_storel_dq:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vmovlps %xmm0, (%eax)
+; CHECK-NEXT: retl
call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
ret void
}
@@ -668,10 +865,13 @@ declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
- ; CHECK: test_x86_sse2_storeu_dq
- ; CHECK: movl
- ; CHECK: vmovdqu
; add operation forces the execution domain.
+; CHECK-LABEL: test_x86_sse2_storeu_dq:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vpaddb LCPI77_0, %xmm0, %xmm0
+; CHECK-NEXT: vmovdqu %xmm0, (%eax)
+; CHECK-NEXT: retl
%a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
ret void
@@ -680,10 +880,15 @@ declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
- ; CHECK: test_x86_sse2_storeu_pd
- ; CHECK: movl
- ; CHECK: vmovupd
; fadd operation forces the execution domain.
+; CHECK-LABEL: test_x86_sse2_storeu_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
+; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vmovupd %xmm0, (%eax)
+; CHECK-NEXT: retl
%a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
ret void
@@ -692,8 +897,10 @@ declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: test_x86_sse2_sub_sd
- ; CHECK: vsubsd
+; CHECK-LABEL: test_x86_sse2_sub_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -701,9 +908,12 @@ declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind
define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vucomisd
- ; CHECK: sete
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse2_ucomieq_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vucomisd %xmm1, %xmm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -711,9 +921,12 @@ declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readn
define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vucomisd
- ; CHECK: setae
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse2_ucomige_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vucomisd %xmm1, %xmm0
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -721,9 +934,12 @@ declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readn
define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vucomisd
- ; CHECK: seta
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse2_ucomigt_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vucomisd %xmm1, %xmm0
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -731,9 +947,12 @@ declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readn
define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vucomisd
- ; CHECK: setbe
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse2_ucomile_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vucomisd %xmm1, %xmm0
+; CHECK-NEXT: setbe %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -741,8 +960,12 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readn
define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vucomisd
- ; CHECK: sbbl
+; CHECK-LABEL: test_x86_sse2_ucomilt_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vucomisd %xmm1, %xmm0
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -750,9 +973,12 @@ declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readn
define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vucomisd
- ; CHECK: setne
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse2_ucomineq_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vucomisd %xmm1, %xmm0
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -760,7 +986,10 @@ declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind read
define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vaddsubpd
+; CHECK-LABEL: test_x86_sse3_addsub_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -768,7 +997,10 @@ declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwi
define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vaddsubps
+; CHECK-LABEL: test_x86_sse3_addsub_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vaddsubps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -776,7 +1008,10 @@ declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind
define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vhaddpd
+; CHECK-LABEL: test_x86_sse3_hadd_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vhaddpd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -784,7 +1019,10 @@ declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind
define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vhaddps
+; CHECK-LABEL: test_x86_sse3_hadd_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vhaddps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -792,7 +1030,10 @@ declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind re
define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vhsubpd
+; CHECK-LABEL: test_x86_sse3_hsub_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vhsubpd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -800,7 +1041,10 @@ declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind
define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vhsubps
+; CHECK-LABEL: test_x86_sse3_hsub_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vhsubps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -808,32 +1052,22 @@ declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind re
define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
- ; CHECK: movl
- ; CHECK: vlddqu
+; CHECK-LABEL: test_x86_sse3_ldu_dq:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vlddqu (%eax), %xmm0
+; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
-define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vblendpd
- %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
- ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
-
-
-define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vblendps
- %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
- ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
-
-
define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
- ; CHECK: vblendvpd
+; CHECK-LABEL: test_x86_sse41_blendvpd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -841,7 +1075,10 @@ declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x d
define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
- ; CHECK: vblendvps
+; CHECK-LABEL: test_x86_sse41_blendvps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -849,7 +1086,10 @@ declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x floa
define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vdppd
+; CHECK-LABEL: test_x86_sse41_dppd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -857,7 +1097,10 @@ declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwi
define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vdpps
+; CHECK-LABEL: test_x86_sse41_dpps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -865,7 +1108,10 @@ declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind
define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vinsertps
+; CHECK-LABEL: test_x86_sse41_insertps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[3]
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -874,7 +1120,10 @@ declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounw
define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vmpsadbw
+; CHECK-LABEL: test_x86_sse41_mpsadbw:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -882,7 +1131,10 @@ declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind rea
define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpackusdw
+; CHECK-LABEL: test_x86_sse41_packusdw:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -890,23 +1142,21 @@ declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readno
define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
- ; CHECK: vpblendvb
+; CHECK-LABEL: test_x86_sse41_pblendvb:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
-define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpblendw
- %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1]
- ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
-
-
define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
- ; CHECK: vphminposuw
+; CHECK-LABEL: test_x86_sse41_phminposuw:
+; CHECK: # BB#0:
+; CHECK-NEXT: vphminposuw %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -914,7 +1164,10 @@ declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpmaxsb
+; CHECK-LABEL: test_x86_sse41_pmaxsb:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
@@ -922,7 +1175,10 @@ declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpmaxsd
+; CHECK-LABEL: test_x86_sse41_pmaxsd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -930,7 +1186,10 @@ declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpmaxud
+; CHECK-LABEL: test_x86_sse41_pmaxud:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmaxud %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -938,7 +1197,10 @@ declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpmaxuw
+; CHECK-LABEL: test_x86_sse41_pmaxuw:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -946,7 +1208,10 @@ declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpminsb
+; CHECK-LABEL: test_x86_sse41_pminsb:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpminsb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
@@ -954,7 +1219,10 @@ declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpminsd
+; CHECK-LABEL: test_x86_sse41_pminsd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpminsd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -962,7 +1230,10 @@ declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpminud
+; CHECK-LABEL: test_x86_sse41_pminud:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpminud %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -970,7 +1241,10 @@ declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpminuw
+; CHECK-LABEL: test_x86_sse41_pminuw:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpminuw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -978,7 +1252,10 @@ declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
- ; CHECK: vpmovsxbd
+; CHECK-LABEL: test_x86_sse41_pmovsxbd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovsxbd %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -986,7 +1263,10 @@ declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
- ; CHECK: vpmovsxbq
+; CHECK-LABEL: test_x86_sse41_pmovsxbq:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovsxbq %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
@@ -994,7 +1274,10 @@ declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
- ; CHECK: vpmovsxbw
+; CHECK-LABEL: test_x86_sse41_pmovsxbw:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -1002,7 +1285,10 @@ declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
- ; CHECK: vpmovsxdq
+; CHECK-LABEL: test_x86_sse41_pmovsxdq:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
@@ -1010,7 +1296,10 @@ declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
- ; CHECK: vpmovsxwd
+; CHECK-LABEL: test_x86_sse41_pmovsxwd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -1018,7 +1307,10 @@ declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
- ; CHECK: vpmovsxwq
+; CHECK-LABEL: test_x86_sse41_pmovsxwq:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
@@ -1026,7 +1318,10 @@ declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
- ; CHECK: vpmovzxbd
+; CHECK-LABEL: test_x86_sse41_pmovzxbd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -1034,7 +1329,10 @@ declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
- ; CHECK: vpmovzxbq
+; CHECK-LABEL: test_x86_sse41_pmovzxbq:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
@@ -1042,7 +1340,10 @@ declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
- ; CHECK: vpmovzxbw
+; CHECK-LABEL: test_x86_sse41_pmovzxbw:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -1050,7 +1351,10 @@ declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
- ; CHECK: vpmovzxdq
+; CHECK-LABEL: test_x86_sse41_pmovzxdq:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
@@ -1058,7 +1362,10 @@ declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
- ; CHECK: vpmovzxwd
+; CHECK-LABEL: test_x86_sse41_pmovzxwd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -1066,7 +1373,10 @@ declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
- ; CHECK: vpmovzxwq
+; CHECK-LABEL: test_x86_sse41_pmovzxwq:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
@@ -1074,7 +1384,10 @@ declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpmuldq
+; CHECK-LABEL: test_x86_sse41_pmuldq:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmuldq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
@@ -1082,8 +1395,12 @@ declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vptest
- ; CHECK: sbbl
+; CHECK-LABEL: test_x86_sse41_ptestc:
+; CHECK: # BB#0:
+; CHECK-NEXT: vptest %xmm1, %xmm0
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1091,9 +1408,12 @@ declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vptest
- ; CHECK: seta
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse41_ptestnzc:
+; CHECK: # BB#0:
+; CHECK-NEXT: vptest %xmm1, %xmm0
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1101,9 +1421,12 @@ declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vptest
- ; CHECK: sete
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse41_ptestz:
+; CHECK: # BB#0:
+; CHECK-NEXT: vptest %xmm1, %xmm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1111,7 +1434,10 @@ declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
- ; CHECK: vroundpd
+; CHECK-LABEL: test_x86_sse41_round_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vroundpd $7, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -1119,7 +1445,10 @@ declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readno
define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
- ; CHECK: vroundps
+; CHECK-LABEL: test_x86_sse41_round_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vroundps $7, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -1127,7 +1456,10 @@ declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vroundsd
+; CHECK-LABEL: test_x86_sse41_round_sd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -1135,7 +1467,10 @@ declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) n
define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vroundss
+; CHECK-LABEL: test_x86_sse41_round_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vroundss $7, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -1143,10 +1478,13 @@ declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) noun
define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
- ; CHECK: movl $7
- ; CHECK: movl $7
- ; CHECK: vpcmpestri $7
- ; CHECK: movl
+; CHECK-LABEL: test_x86_sse42_pcmpestri128:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $7, %eax
+; CHECK-NEXT: movl $7, %edx
+; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1154,22 +1492,32 @@ declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nou
define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
- ; CHECK: movl $7
- ; CHECK: movl $7
- ; CHECK: vpcmpestri $7, (
- ; CHECK: movl
- %1 = load <16 x i8>* %a0
- %2 = load <16 x i8>* %a2
+; CHECK-LABEL: test_x86_sse42_pcmpestri128_load:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vmovdqa (%eax), %xmm0
+; CHECK-NEXT: movl $7, %eax
+; CHECK-NEXT: movl $7, %edx
+; CHECK-NEXT: vpcmpestri $7, (%ecx), %xmm0
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: retl
+ %1 = load <16 x i8>, <16 x i8>* %a0
+ %2 = load <16 x i8>, <16 x i8>* %a2
%res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
- ; CHECK: movl
- ; CHECK: movl
- ; CHECK: vpcmpestri
- ; CHECK: seta
+; CHECK-LABEL: test_x86_sse42_pcmpestria128:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $7, %eax
+; CHECK-NEXT: movl $7, %edx
+; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1177,10 +1525,14 @@ declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) no
define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
- ; CHECK: movl
- ; CHECK: movl
- ; CHECK: vpcmpestri
- ; CHECK: sbbl
+; CHECK-LABEL: test_x86_sse42_pcmpestric128:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $7, %eax
+; CHECK-NEXT: movl $7, %edx
+; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1188,10 +1540,14 @@ declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) no
define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
- ; CHECK: movl
- ; CHECK: movl
- ; CHECK: vpcmpestri
- ; CHECK: seto
+; CHECK-LABEL: test_x86_sse42_pcmpestrio128:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $7, %eax
+; CHECK-NEXT: movl $7, %edx
+; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
+; CHECK-NEXT: seto %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1199,10 +1555,14 @@ declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) no
define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
- ; CHECK: movl
- ; CHECK: movl
- ; CHECK: vpcmpestri
- ; CHECK: sets
+; CHECK-LABEL: test_x86_sse42_pcmpestris128:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $7, %eax
+; CHECK-NEXT: movl $7, %edx
+; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
+; CHECK-NEXT: sets %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1210,10 +1570,14 @@ declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) no
define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
- ; CHECK: movl
- ; CHECK: movl
- ; CHECK: vpcmpestri
- ; CHECK: sete
+; CHECK-LABEL: test_x86_sse42_pcmpestriz128:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $7, %eax
+; CHECK-NEXT: movl $7, %edx
+; CHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1221,10 +1585,12 @@ declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) no
define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
- ; CHECK: movl
- ; CHECK: movl
- ; CHECK: vpcmpestrm
- ; CHECK-NOT: vmov
+; CHECK-LABEL: test_x86_sse42_pcmpestrm128:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $7, %eax
+; CHECK-NEXT: movl $7, %edx
+; CHECK-NEXT: vpcmpestrm $7, %xmm1, %xmm0
+; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
@@ -1232,19 +1598,25 @@ declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i
define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) {
- ; CHECK: movl $7
- ; CHECK: movl $7
- ; CHECK: vpcmpestrm $7,
- ; CHECK-NOT: vmov
- %1 = load <16 x i8>* %a2
+; CHECK-LABEL: test_x86_sse42_pcmpestrm128_load:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movl $7, %eax
+; CHECK-NEXT: movl $7, %edx
+; CHECK-NEXT: vpcmpestrm $7, (%ecx), %xmm0
+; CHECK-NEXT: retl
+ %1 = load <16 x i8>, <16 x i8>* %a2
%res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcmpistri $7
- ; CHECK: movl
+; CHECK-LABEL: test_x86_sse42_pcmpistri128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1252,18 +1624,28 @@ declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind read
define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
- ; CHECK: vpcmpistri $7, (
- ; CHECK: movl
- %1 = load <16 x i8>* %a0
- %2 = load <16 x i8>* %a1
+; CHECK-LABEL: test_x86_sse42_pcmpistri128_load:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: vmovdqa (%ecx), %xmm0
+; CHECK-NEXT: vpcmpistri $7, (%eax), %xmm0
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: retl
+ %1 = load <16 x i8>, <16 x i8>* %a0
+ %2 = load <16 x i8>, <16 x i8>* %a1
%res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcmpistri
- ; CHECK: seta
+; CHECK-LABEL: test_x86_sse42_pcmpistria128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1271,8 +1653,12 @@ declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind rea
define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcmpistri
- ; CHECK: sbbl
+; CHECK-LABEL: test_x86_sse42_pcmpistric128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1280,8 +1666,12 @@ declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind rea
define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcmpistri
- ; CHECK: seto
+; CHECK-LABEL: test_x86_sse42_pcmpistrio128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
+; CHECK-NEXT: seto %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1289,8 +1679,12 @@ declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind rea
define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcmpistri
- ; CHECK: sets
+; CHECK-LABEL: test_x86_sse42_pcmpistris128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
+; CHECK-NEXT: sets %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1298,8 +1692,12 @@ declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind rea
define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcmpistri
- ; CHECK: sete
+; CHECK-LABEL: test_x86_sse42_pcmpistriz128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1307,8 +1705,10 @@ declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind rea
define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcmpistrm $7
- ; CHECK-NOT: vmov
+; CHECK-LABEL: test_x86_sse42_pcmpistrm128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpcmpistrm $7, %xmm1, %xmm0
+; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
@@ -1316,16 +1716,22 @@ declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwin
define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) {
- ; CHECK: vpcmpistrm $7, (
- ; CHECK-NOT: vmov
- %1 = load <16 x i8>* %a1
+; CHECK-LABEL: test_x86_sse42_pcmpistrm128_load:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vpcmpistrm $7, (%eax), %xmm0
+; CHECK-NEXT: retl
+ %1 = load <16 x i8>, <16 x i8>* %a1
%res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vaddss
+; CHECK-LABEL: test_x86_sse_add_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -1333,7 +1739,10 @@ declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind read
define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vcmpordps
+; CHECK-LABEL: test_x86_sse_cmp_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcmpordps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -1341,7 +1750,10 @@ declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind
define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vcmpordss
+; CHECK-LABEL: test_x86_sse_cmp_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcmpordss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -1349,9 +1761,12 @@ declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind
define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vcomiss
- ; CHECK: sete
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse_comieq_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcomiss %xmm1, %xmm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1359,9 +1774,12 @@ declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vcomiss
- ; CHECK: setae
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse_comige_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcomiss %xmm1, %xmm0
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1369,9 +1787,12 @@ declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vcomiss
- ; CHECK: seta
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse_comigt_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcomiss %xmm1, %xmm0
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1379,9 +1800,12 @@ declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vcomiss
- ; CHECK: setbe
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse_comile_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcomiss %xmm1, %xmm0
+; CHECK-NEXT: setbe %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1389,8 +1813,12 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vcomiss
- ; CHECK: sbb
+; CHECK-LABEL: test_x86_sse_comilt_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcomiss %xmm1, %xmm0
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1398,9 +1826,12 @@ declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vcomiss
- ; CHECK: setne
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse_comineq_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcomiss %xmm1, %xmm0
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1408,8 +1839,11 @@ declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
- ; CHECK: movl
- ; CHECK: vcvtsi2ss
+; CHECK-LABEL: test_x86_sse_cvtsi2ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $7, %eax
+; CHECK-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -1417,7 +1851,10 @@ declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
- ; CHECK: vcvtss2si
+; CHECK-LABEL: test_x86_sse_cvtss2si:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtss2si %xmm0, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1425,7 +1862,10 @@ declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
- ; CHECK: vcvttss2si
+; CHECK-LABEL: test_x86_sse_cvttss2si:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvttss2si %xmm0, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1433,7 +1873,10 @@ declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vdivss
+; CHECK-LABEL: test_x86_sse_div_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vdivss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -1441,8 +1884,11 @@ declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind read
define void @test_x86_sse_ldmxcsr(i8* %a0) {
- ; CHECK: movl
- ; CHECK: vldmxcsr
+; CHECK-LABEL: test_x86_sse_ldmxcsr:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vldmxcsr (%eax)
+; CHECK-NEXT: retl
call void @llvm.x86.sse.ldmxcsr(i8* %a0)
ret void
}
@@ -1451,7 +1897,10 @@ declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vmaxps
+; CHECK-LABEL: test_x86_sse_max_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -1459,7 +1908,10 @@ declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind read
define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vmaxss
+; CHECK-LABEL: test_x86_sse_max_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -1467,7 +1919,10 @@ declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind read
define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vminps
+; CHECK-LABEL: test_x86_sse_min_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -1475,7 +1930,10 @@ declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind read
define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vminss
+; CHECK-LABEL: test_x86_sse_min_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vminss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -1483,7 +1941,10 @@ declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind read
define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
- ; CHECK: vmovmskps
+; CHECK-LABEL: test_x86_sse_movmsk_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovmskps %xmm0, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1492,7 +1953,10 @@ declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vmulss
+; CHECK-LABEL: test_x86_sse_mul_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -1500,7 +1964,10 @@ declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind read
define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
- ; CHECK: vrcpps
+; CHECK-LABEL: test_x86_sse_rcp_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vrcpps %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -1508,7 +1975,10 @@ declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
- ; CHECK: vrcpss
+; CHECK-LABEL: test_x86_sse_rcp_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vrcpss %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -1516,7 +1986,10 @@ declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
- ; CHECK: vrsqrtps
+; CHECK-LABEL: test_x86_sse_rsqrt_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vrsqrtps %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -1524,7 +1997,10 @@ declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
- ; CHECK: vrsqrtss
+; CHECK-LABEL: test_x86_sse_rsqrt_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -1532,7 +2008,10 @@ declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
- ; CHECK: vsqrtps
+; CHECK-LABEL: test_x86_sse_sqrt_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vsqrtps %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -1540,7 +2019,10 @@ declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
- ; CHECK: vsqrtss
+; CHECK-LABEL: test_x86_sse_sqrt_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -1548,8 +2030,11 @@ declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
define void @test_x86_sse_stmxcsr(i8* %a0) {
- ; CHECK: movl
- ; CHECK: vstmxcsr
+; CHECK-LABEL: test_x86_sse_stmxcsr:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vstmxcsr (%eax)
+; CHECK-NEXT: retl
call void @llvm.x86.sse.stmxcsr(i8* %a0)
ret void
}
@@ -1557,8 +2042,11 @@ declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
- ; CHECK: movl
- ; CHECK: vmovups
+; CHECK-LABEL: test_x86_sse_storeu_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vmovups %xmm0, (%eax)
+; CHECK-NEXT: retl
call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
ret void
}
@@ -1566,7 +2054,10 @@ declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vsubss
+; CHECK-LABEL: test_x86_sse_sub_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -1574,9 +2065,12 @@ declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind read
define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vucomiss
- ; CHECK: sete
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse_ucomieq_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vucomiss %xmm1, %xmm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1584,9 +2078,12 @@ declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vucomiss
- ; CHECK: setae
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse_ucomige_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vucomiss %xmm1, %xmm0
+; CHECK-NEXT: setae %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1594,9 +2091,12 @@ declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vucomiss
- ; CHECK: seta
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse_ucomigt_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vucomiss %xmm1, %xmm0
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1604,9 +2104,12 @@ declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vucomiss
- ; CHECK: setbe
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse_ucomile_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vucomiss %xmm1, %xmm0
+; CHECK-NEXT: setbe %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1614,8 +2117,12 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vucomiss
- ; CHECK: sbbl
+; CHECK-LABEL: test_x86_sse_ucomilt_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vucomiss %xmm1, %xmm0
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1623,9 +2130,12 @@ declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vucomiss
- ; CHECK: setne
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_sse_ucomineq_ss:
+; CHECK: # BB#0:
+; CHECK-NEXT: vucomiss %xmm1, %xmm0
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1633,7 +2143,10 @@ declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnon
define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
- ; CHECK: vpabsb
+; CHECK-LABEL: test_x86_ssse3_pabs_b_128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpabsb %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
@@ -1641,7 +2154,10 @@ declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
- ; CHECK: vpabsd
+; CHECK-LABEL: test_x86_ssse3_pabs_d_128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpabsd %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -1649,7 +2165,10 @@ declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
- ; CHECK: vpabsw
+; CHECK-LABEL: test_x86_ssse3_pabs_w_128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpabsw %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -1657,7 +2176,10 @@ declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vphaddd
+; CHECK-LABEL: test_x86_ssse3_phadd_d_128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vphaddd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -1665,7 +2187,10 @@ declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind rea
define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vphaddsw
+; CHECK-LABEL: test_x86_ssse3_phadd_sw_128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vphaddsw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -1673,7 +2198,10 @@ declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind re
define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vphaddw
+; CHECK-LABEL: test_x86_ssse3_phadd_w_128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vphaddw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -1681,7 +2209,10 @@ declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind rea
define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vphsubd
+; CHECK-LABEL: test_x86_ssse3_phsub_d_128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vphsubd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -1689,7 +2220,10 @@ declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind rea
define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vphsubsw
+; CHECK-LABEL: test_x86_ssse3_phsub_sw_128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vphsubsw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -1697,7 +2231,10 @@ declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind re
define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vphsubw
+; CHECK-LABEL: test_x86_ssse3_phsub_w_128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vphsubw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -1705,7 +2242,10 @@ declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind rea
define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpmaddubsw
+; CHECK-LABEL: test_x86_ssse3_pmadd_ub_sw_128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -1713,7 +2253,10 @@ declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind
define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpmulhrsw
+; CHECK-LABEL: test_x86_ssse3_pmul_hr_sw_128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -1721,7 +2264,10 @@ declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind
define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpshufb
+; CHECK-LABEL: test_x86_ssse3_pshuf_b_128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpshufb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
@@ -1729,7 +2275,10 @@ declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind rea
define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpsignb
+; CHECK-LABEL: test_x86_ssse3_psign_b_128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsignb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
@@ -1737,7 +2286,10 @@ declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind rea
define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpsignd
+; CHECK-LABEL: test_x86_ssse3_psign_d_128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsignd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -1745,7 +2297,10 @@ declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind rea
define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpsignw
+; CHECK-LABEL: test_x86_ssse3_psign_w_128:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsignw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
@@ -1753,7 +2308,10 @@ declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind rea
define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
- ; CHECK: vaddsubpd
+; CHECK-LABEL: test_x86_avx_addsub_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
@@ -1761,31 +2319,21 @@ declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nou
define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
- ; CHECK: vaddsubps
+; CHECK-LABEL: test_x86_avx_addsub_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vaddsubps %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
-define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
- ; CHECK: vblendpd
- %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
- ret <4 x double> %res
-}
-declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
-
-
-define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
- ; CHECK: vblendps
- %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
- ret <8 x float> %res
-}
-declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
-
-
define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
- ; CHECK: vblendvpd
+; CHECK-LABEL: test_x86_avx_blendv_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
@@ -1793,7 +2341,10 @@ declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4
define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
- ; CHECK: vblendvps
+; CHECK-LABEL: test_x86_avx_blendv_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
@@ -1801,7 +2352,10 @@ declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x f
define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) {
- ; CHECK: vcmpordpd
+; CHECK-LABEL: test_x86_avx_cmp_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcmpordpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
@@ -1809,75 +2363,81 @@ declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) no
define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
- ; CHECK: vcmpordps
+; CHECK-LABEL: test_x86_avx_cmp_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) {
- ; CHECK: vcmpeqps
+; CHECK-LABEL: test_x86_avx_cmp_ps_256_pseudo_op:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpltps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpleps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpunordps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpneqps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpnltps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpnleps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpordps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpeq_uqps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpngeps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpngtps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpfalseps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpneq_oqps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpgeps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpgtps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmptrueps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpeq_osps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmplt_oqps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmple_oqps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpunord_sps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpneq_usps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpnlt_uqps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpnle_uqps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpord_sps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpeq_usps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpnge_uqps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpngt_uqps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpfalse_osps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpneq_osps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpge_oqps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmpgt_oqps %ymm1, %ymm0, %ymm1
+; CHECK-NEXT: vcmptrue_usps %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retl
%a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpltps
%a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpleps
%a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpunordps
%a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpneqps
%a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpnltps
%a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpnleps
%a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpordps
%a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpeq_uqps
%a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpngeps
%a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpngtps
%a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpfalseps
%a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpneq_oqps
%a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpgeps
%a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpgtps
%a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmptrueps
%a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpeq_osps
%a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmplt_oqps
%a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmple_oqps
%a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpunord_sps
%a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpneq_usps
%a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpnlt_uqps
%a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpnle_uqps
%a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpord_sps
%a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpeq_usps
%a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpnge_uqps
%a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpngt_uqps
%a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpfalse_osps
%a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpneq_osps
%a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpge_oqps
%a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmpgt_oqps
%a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1]
- ; CHECK: vcmptrue_usps
%res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
@@ -1885,7 +2445,11 @@ declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounw
define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
- ; CHECK: vcvtpd2psy
+; CHECK-LABEL: test_x86_avx_cvt_pd2_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtpd2psy %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -1893,7 +2457,11 @@ declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
- ; CHECK: vcvtpd2dqy
+; CHECK-LABEL: test_x86_avx_cvt_pd2dq_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtpd2dqy %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -1901,7 +2469,10 @@ declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
- ; CHECK: vcvtps2pd
+; CHECK-LABEL: test_x86_avx_cvt_ps2_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0
+; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
@@ -1909,7 +2480,10 @@ declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
- ; CHECK: vcvtps2dq
+; CHECK-LABEL: test_x86_avx_cvt_ps2dq_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtps2dq %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
ret <8 x i32> %res
}
@@ -1917,7 +2491,10 @@ declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
- ; CHECK: vcvtdq2pd
+; CHECK-LABEL: test_x86_avx_cvtdq2_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0
+; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
@@ -1925,7 +2502,10 @@ declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
- ; CHECK: vcvtdq2ps
+; CHECK-LABEL: test_x86_avx_cvtdq2_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
@@ -1933,7 +2513,11 @@ declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
- ; CHECK: vcvttpd2dqy
+; CHECK-LABEL: test_x86_avx_cvtt_pd2dq_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvttpd2dqy %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
%res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
ret <4 x i32> %res
}
@@ -1941,7 +2525,10 @@ declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
- ; CHECK: vcvttps2dq
+; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
ret <8 x i32> %res
}
@@ -1949,7 +2536,10 @@ declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
- ; CHECK: vdpps
+; CHECK-LABEL: test_x86_avx_dp_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
@@ -1957,7 +2547,10 @@ declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwi
define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
- ; CHECK: vhaddpd
+; CHECK-LABEL: test_x86_avx_hadd_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vhaddpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
@@ -1965,7 +2558,10 @@ declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounw
define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) {
- ; CHECK: vhaddps
+; CHECK-LABEL: test_x86_avx_hadd_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vhaddps %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
@@ -1973,7 +2569,10 @@ declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind
define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
- ; CHECK: vhsubpd
+; CHECK-LABEL: test_x86_avx_hsub_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vhsubpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
@@ -1981,7 +2580,10 @@ declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounw
define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
- ; CHECK: vhsubps
+; CHECK-LABEL: test_x86_avx_hsub_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vhsubps %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
@@ -1989,7 +2591,11 @@ declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind
define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
- ; CHECK: vlddqu
+; CHECK-LABEL: test_x86_avx_ldu_dq_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vlddqu (%eax), %ymm0
+; CHECK-NEXT: retl
%res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
ret <32 x i8> %res
}
@@ -1997,7 +2603,11 @@ declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) {
- ; CHECK: vmaskmovpd
+; CHECK-LABEL: test_x86_avx_maskload_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vmaskmovpd (%eax), %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -2005,7 +2615,11 @@ declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind reado
define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) {
- ; CHECK: vmaskmovpd
+; CHECK-LABEL: test_x86_avx_maskload_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vmaskmovpd (%eax), %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
@@ -2013,7 +2627,11 @@ declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind r
define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) {
- ; CHECK: vmaskmovps
+; CHECK-LABEL: test_x86_avx_maskload_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vmaskmovps (%eax), %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -2021,7 +2639,11 @@ declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonl
define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) {
- ; CHECK: vmaskmovps
+; CHECK-LABEL: test_x86_avx_maskload_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vmaskmovps (%eax), %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
@@ -2029,7 +2651,11 @@ declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind rea
define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) {
- ; CHECK: vmaskmovpd
+; CHECK-LABEL: test_x86_avx_maskstore_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vmaskmovpd %xmm1, %xmm0, (%eax)
+; CHECK-NEXT: retl
call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2)
ret void
}
@@ -2037,7 +2663,12 @@ declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwin
define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) {
- ; CHECK: vmaskmovpd
+; CHECK-LABEL: test_x86_avx_maskstore_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vmaskmovpd %ymm1, %ymm0, (%eax)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2)
ret void
}
@@ -2045,7 +2676,11 @@ declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nou
define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) {
- ; CHECK: vmaskmovps
+; CHECK-LABEL: test_x86_avx_maskstore_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vmaskmovps %xmm1, %xmm0, (%eax)
+; CHECK-NEXT: retl
call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2)
ret void
}
@@ -2053,7 +2688,12 @@ declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind
define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) {
- ; CHECK: vmaskmovps
+; CHECK-LABEL: test_x86_avx_maskstore_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vmaskmovps %ymm1, %ymm0, (%eax)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2)
ret void
}
@@ -2061,7 +2701,10 @@ declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounw
define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
- ; CHECK: vmaxpd
+; CHECK-LABEL: test_x86_avx_max_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
@@ -2069,7 +2712,10 @@ declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwi
define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) {
- ; CHECK: vmaxps
+; CHECK-LABEL: test_x86_avx_max_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmaxps %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
@@ -2077,7 +2723,10 @@ declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind
define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) {
- ; CHECK: vminpd
+; CHECK-LABEL: test_x86_avx_min_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vminpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
@@ -2085,7 +2734,10 @@ declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwi
define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) {
- ; CHECK: vminps
+; CHECK-LABEL: test_x86_avx_min_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vminps %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
@@ -2093,7 +2745,11 @@ declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind
define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) {
- ; CHECK: vmovmskpd
+; CHECK-LABEL: test_x86_avx_movmsk_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovmskpd %ymm0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2101,7 +2757,11 @@ declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
- ; CHECK: vmovmskps
+; CHECK-LABEL: test_x86_avx_movmsk_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmovmskps %ymm0, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2114,8 +2774,13 @@ declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
- ; CHECK: vptest
- ; CHECK: sbbl
+; CHECK-LABEL: test_x86_avx_ptestc_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vptest %ymm1, %ymm0
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2123,9 +2788,13 @@ declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) {
- ; CHECK: vptest
- ; CHECK: seta
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_avx_ptestnzc_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vptest %ymm1, %ymm0
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2133,9 +2802,13 @@ declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) {
- ; CHECK: vptest
- ; CHECK: sete
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_avx_ptestz_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vptest %ymm1, %ymm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2143,7 +2816,10 @@ declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) {
- ; CHECK: vrcpps
+; CHECK-LABEL: test_x86_avx_rcp_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vrcpps %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
@@ -2151,7 +2827,10 @@ declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) {
- ; CHECK: vroundpd
+; CHECK-LABEL: test_x86_avx_round_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vroundpd $7, %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
@@ -2159,7 +2838,10 @@ declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind read
define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) {
- ; CHECK: vroundps
+; CHECK-LABEL: test_x86_avx_round_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vroundps $7, %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
@@ -2167,7 +2849,10 @@ declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readno
define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) {
- ; CHECK: vrsqrtps
+; CHECK-LABEL: test_x86_avx_rsqrt_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vrsqrtps %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
@@ -2175,7 +2860,10 @@ declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
- ; CHECK: vsqrtpd
+; CHECK-LABEL: test_x86_avx_sqrt_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vsqrtpd %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
@@ -2183,7 +2871,10 @@ declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
- ; CHECK: vsqrtps
+; CHECK-LABEL: test_x86_avx_sqrt_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vsqrtps %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
@@ -2192,8 +2883,18 @@ declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
- ; CHECK: vmovups
; add operation forces the execution domain.
+; CHECK-LABEL: test_x86_avx_storeu_dq_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; CHECK-NEXT: vpaddb %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: vmovups %ymm0, (%eax)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
%a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
ret void
@@ -2202,8 +2903,15 @@ declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
- ; CHECK: vmovupd
; add operation forces the execution domain.
+; CHECK-LABEL: test_x86_avx_storeu_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vmovupd %ymm0, (%eax)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
%a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
ret void
@@ -2212,7 +2920,12 @@ declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
- ; CHECK: vmovups
+; CHECK-LABEL: test_x86_avx_storeu_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vmovups %ymm0, (%eax)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
ret void
}
@@ -2220,7 +2933,11 @@ declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
- ; CHECK: vbroadcastf128
+; CHECK-LABEL: test_x86_avx_vbroadcastf128_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vbroadcastf128 (%eax), %ymm0
+; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
@@ -2228,63 +2945,22 @@ declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
- ; CHECK: vbroadcastf128
+; CHECK-LABEL: test_x86_avx_vbroadcastf128_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vbroadcastf128 (%eax), %ymm0
+; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
-define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) {
- ; CHECK: vextractf128
- %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
- ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
-
-
-define <4 x float> @test_x86_avx_vextractf128_ps_256(<8 x float> %a0) {
- ; CHECK: vextractf128
- %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
- ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
-
-
-define <4 x i32> @test_x86_avx_vextractf128_si_256(<8 x i32> %a0) {
- ; CHECK: vextractf128
- %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 7) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
-
-
-define <4 x double> @test_x86_avx_vinsertf128_pd_256(<4 x double> %a0, <2 x double> %a1) {
- ; CHECK: vinsertf128
- %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
- ret <4 x double> %res
-}
-declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
-
-
-define <8 x float> @test_x86_avx_vinsertf128_ps_256(<8 x float> %a0, <4 x float> %a1) {
- ; CHECK: vinsertf128
- %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
- ret <8 x float> %res
-}
-declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
-
-
-define <8 x i32> @test_x86_avx_vinsertf128_si_256(<8 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vinsertf128
- %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
- ret <8 x i32> %res
-}
-declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
-
-
define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
- ; CHECK: vperm2f128
+; CHECK-LABEL: test_x86_avx_vperm2f128_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
+; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
@@ -2292,7 +2968,10 @@ declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>,
define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
- ; CHECK: vperm2f128
+; CHECK-LABEL: test_x86_avx_vperm2f128_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
+; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
@@ -2300,7 +2979,10 @@ declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8
define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
- ; CHECK: vperm2f128
+; CHECK-LABEL: test_x86_avx_vperm2f128_si_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
+; CHECK-NEXT: retl
%res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
ret <8 x i32> %res
}
@@ -2308,7 +2990,10 @@ declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) noun
define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
- ; CHECK: vpermilpd
+; CHECK-LABEL: test_x86_avx_vpermil_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -2316,7 +3001,10 @@ declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnon
define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
- ; CHECK: vpermilpd
+; CHECK-LABEL: test_x86_avx_vpermil_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
+; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
@@ -2324,7 +3012,10 @@ declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind rea
define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
- ; CHECK: vpermilps
+; CHECK-LABEL: test_x86_avx_vpermil_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,0]
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -2332,7 +3023,10 @@ declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
- ; CHECK: vpermilps
+; CHECK-LABEL: test_x86_avx_vpermil_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,1,0,0,7,5,4,4]
+; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
@@ -2340,7 +3034,10 @@ declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readn
define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
- ; CHECK: vpermilpd
+; CHECK-LABEL: test_x86_avx_vpermilvar_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -2348,7 +3045,10 @@ declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwi
define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
- ; CHECK: vpermilpd
+; CHECK-LABEL: test_x86_avx_vpermilvar_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpermilpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
@@ -2356,13 +3056,20 @@ declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) no
define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
- ; CHECK: vpermilps
+; CHECK-LABEL: test_x86_avx_vpermilvar_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpermilps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
- ; CHECK: vpermilps
- %a2 = load <4 x i32>* %a1
+; CHECK-LABEL: test_x86_avx_vpermilvar_ps_load:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vpermilps (%eax), %xmm0, %xmm0
+; CHECK-NEXT: retl
+ %a2 = load <4 x i32>, <4 x i32>* %a1
%res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -2370,7 +3077,10 @@ declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind
define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
- ; CHECK: vpermilps
+; CHECK-LABEL: test_x86_avx_vpermilvar_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpermilps %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retl
%res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
@@ -2378,8 +3088,12 @@ declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) noun
define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vtestpd
- ; CHECK: sbbl
+; CHECK-LABEL: test_x86_avx_vtestc_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vtestpd %xmm1, %xmm0
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2387,8 +3101,13 @@ declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnon
define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
- ; CHECK: vtestpd
- ; CHECK: sbbl
+; CHECK-LABEL: test_x86_avx_vtestc_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vtestpd %ymm1, %ymm0
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2396,8 +3115,12 @@ declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind rea
define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vtestps
- ; CHECK: sbbl
+; CHECK-LABEL: test_x86_avx_vtestc_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vtestps %xmm1, %xmm0
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2405,8 +3128,13 @@ declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
- ; CHECK: vtestps
- ; CHECK: sbbl
+; CHECK-LABEL: test_x86_avx_vtestc_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vtestps %ymm1, %ymm0
+; CHECK-NEXT: sbbl %eax, %eax
+; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2414,9 +3142,12 @@ declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readn
define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vtestpd
- ; CHECK: seta
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_avx_vtestnzc_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vtestpd %xmm1, %xmm0
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2424,9 +3155,13 @@ declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readn
define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) {
- ; CHECK: vtestpd
- ; CHECK: seta
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_avx_vtestnzc_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vtestpd %ymm1, %ymm0
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2434,9 +3169,12 @@ declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind r
define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vtestps
- ; CHECK: seta
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_avx_vtestnzc_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vtestps %xmm1, %xmm0
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2444,9 +3182,13 @@ declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnon
define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) {
- ; CHECK: vtestps
- ; CHECK: seta
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_avx_vtestnzc_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vtestps %ymm1, %ymm0
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2454,9 +3196,12 @@ declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind rea
define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) {
- ; CHECK: vtestpd
- ; CHECK: sete
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_avx_vtestz_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: vtestpd %xmm1, %xmm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2464,9 +3209,13 @@ declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnon
define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) {
- ; CHECK: vtestpd
- ; CHECK: sete
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_avx_vtestz_pd_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vtestpd %ymm1, %ymm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2474,9 +3223,12 @@ declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind rea
define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) {
- ; CHECK: vtestps
- ; CHECK: sete
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_avx_vtestz_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: vtestps %xmm1, %xmm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2484,9 +3236,13 @@ declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) {
- ; CHECK: vtestps
- ; CHECK: sete
- ; CHECK: movzbl
+; CHECK-LABEL: test_x86_avx_vtestz_ps_256:
+; CHECK: # BB#0:
+; CHECK-NEXT: vtestps %ymm1, %ymm0
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
%res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
}
@@ -2494,7 +3250,11 @@ declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readn
define void @test_x86_avx_vzeroall() {
- ; CHECK: vzeroall
+; CHECK-LABEL: test_x86_avx_vzeroall:
+; CHECK: # BB#0:
+; CHECK-NEXT: vzeroall
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
call void @llvm.x86.avx.vzeroall()
ret void
}
@@ -2502,7 +3262,11 @@ declare void @llvm.x86.avx.vzeroall() nounwind
define void @test_x86_avx_vzeroupper() {
- ; CHECK: vzeroupper
+; CHECK-LABEL: test_x86_avx_vzeroupper:
+; CHECK: # BB#0:
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
call void @llvm.x86.avx.vzeroupper()
ret void
}
@@ -2510,93 +3274,143 @@ declare void @llvm.x86.avx.vzeroupper() nounwind
; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work
-; CHECK: monitor
define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
-entry:
+; CHECK-LABEL: monitor:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: leal (%eax), %eax
+; CHECK-NEXT: monitor
+; CHECK-NEXT: retl
tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
ret void
}
declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
-; CHECK: mwait
define void @mwait(i32 %E, i32 %H) nounwind {
-entry:
+; CHECK-LABEL: mwait:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: mwait
+; CHECK-NEXT: retl
tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
ret void
}
declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
-; CHECK: sfence
define void @sfence() nounwind {
-entry:
+; CHECK-LABEL: sfence:
+; CHECK: # BB#0:
+; CHECK-NEXT: sfence
+; CHECK-NEXT: retl
tail call void @llvm.x86.sse.sfence()
ret void
}
declare void @llvm.x86.sse.sfence() nounwind
-; CHECK: lfence
define void @lfence() nounwind {
-entry:
+; CHECK-LABEL: lfence:
+; CHECK: # BB#0:
+; CHECK-NEXT: lfence
+; CHECK-NEXT: retl
tail call void @llvm.x86.sse2.lfence()
ret void
}
declare void @llvm.x86.sse2.lfence() nounwind
-; CHECK: mfence
define void @mfence() nounwind {
-entry:
+; CHECK-LABEL: mfence:
+; CHECK: # BB#0:
+; CHECK-NEXT: mfence
+; CHECK-NEXT: retl
tail call void @llvm.x86.sse2.mfence()
ret void
}
declare void @llvm.x86.sse2.mfence() nounwind
-; CHECK: clflush
define void @clflush(i8* %p) nounwind {
-entry:
+; CHECK-LABEL: clflush:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: clflush (%eax)
+; CHECK-NEXT: retl
tail call void @llvm.x86.sse2.clflush(i8* %p)
ret void
}
declare void @llvm.x86.sse2.clflush(i8*) nounwind
-; CHECK: crc32b
define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
+; CHECK-LABEL: crc32_32_8:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: crc32b {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: retl
%tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
ret i32 %tmp
}
declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
-; CHECK: crc32w
define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
+; CHECK-LABEL: crc32_32_16:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: crc32w {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: retl
%tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
ret i32 %tmp
}
declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
-; CHECK: crc32l
define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: crc32_32_32:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: crc32l {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: retl
%tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
ret i32 %tmp
}
declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
-; CHECK: movntdq
-define void @movnt_dq(i8* %p, <4 x i64> %a1) nounwind {
- %a2 = add <4 x i64> %a1, <i64 1, i64 1, i64 1, i64 1>
- tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a2) nounwind
+define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
+; CHECK-LABEL: movnt_dq:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vpaddq LCPI282_0, %xmm0, %xmm0
+; CHECK-NEXT: vmovntdq %ymm0, (%eax)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
+ %a2 = add <2 x i64> %a1, <i64 1, i64 1>
+ %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind
ret void
}
declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
-; CHECK: movntps
define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {
+; CHECK-LABEL: movnt_ps:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vmovntps %ymm0, (%eax)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind
ret void
}
declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
-; CHECK: movntpd
define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
; add operation forces the execution domain.
+; CHECK-LABEL: movnt_pd:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vmovntpd %ymm0, (%eax)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
%a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind
ret void
@@ -2606,7 +3420,10 @@ declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
; Check for pclmulqdq
define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
-; CHECK: vpclmulqdq
+; CHECK-LABEL: test_x86_pclmulqdq:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retl
%res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll
index a6775aba0989..d2f213bac7bb 100644
--- a/test/CodeGen/X86/avx-load-store.ll
+++ b/test/CodeGen/X86/avx-load-store.ll
@@ -10,10 +10,10 @@
define void @test_256_load(double* nocapture %d, float* nocapture %f, <4 x i64>* nocapture %i) nounwind uwtable ssp {
entry:
%0 = bitcast double* %d to <4 x double>*
- %tmp1.i = load <4 x double>* %0, align 32
+ %tmp1.i = load <4 x double>, <4 x double>* %0, align 32
%1 = bitcast float* %f to <8 x float>*
- %tmp1.i17 = load <8 x float>* %1, align 32
- %tmp1.i16 = load <4 x i64>* %i, align 32
+ %tmp1.i17 = load <8 x float>, <8 x float>* %1, align 32
+ %tmp1.i16 = load <4 x i64>, <4 x i64>* %i, align 32
tail call void @dummy(<4 x double> %tmp1.i, <8 x float> %tmp1.i17, <4 x i64> %tmp1.i16) nounwind
store <4 x double> %tmp1.i, <4 x double>* %0, align 32
store <8 x float> %tmp1.i17, <8 x float>* %1, align 32
@@ -29,9 +29,8 @@ declare void @dummy(<4 x double>, <8 x float>, <4 x i64>)
; CHECK: mov00
define <8 x float> @mov00(<8 x float> %v, float * %ptr) nounwind {
- %val = load float* %ptr
-; CHECK: vinsertps
-; CHECK: vinsertf128
+ %val = load float, float* %ptr
+; CHECK: vmovss (%
%i0 = insertelement <8 x float> zeroinitializer, float %val, i32 0
ret <8 x float> %i0
; CHECK: ret
@@ -39,9 +38,8 @@ define <8 x float> @mov00(<8 x float> %v, float * %ptr) nounwind {
; CHECK: mov01
define <4 x double> @mov01(<4 x double> %v, double * %ptr) nounwind {
- %val = load double* %ptr
-; CHECK: vmovlpd
-; CHECK: vinsertf128
+ %val = load double, double* %ptr
+; CHECK: vmovsd (%
%i0 = insertelement <4 x double> zeroinitializer, double %val, i32 0
ret <4 x double> %i0
; CHECK: ret
@@ -122,7 +120,7 @@ cif_mixed_test_any_check: ; preds = %cif_mask_mixed
; CHECK: vmovups
; CHECK: vmovups
define void @add8i32(<8 x i32>* %ret, <8 x i32>* %bp) nounwind {
- %b = load <8 x i32>* %bp, align 1
+ %b = load <8 x i32>, <8 x i32>* %bp, align 1
%x = add <8 x i32> zeroinitializer, %b
store <8 x i32> %x, <8 x i32>* %ret, align 1
ret void
@@ -132,7 +130,7 @@ define void @add8i32(<8 x i32>* %ret, <8 x i32>* %bp) nounwind {
; CHECK: vmovaps ({{.*}}), %ymm{{.*}}
; CHECK: vmovaps %ymm{{.*}}, ({{.*}})
define void @add4i64a64(<4 x i64>* %ret, <4 x i64>* %bp) nounwind {
- %b = load <4 x i64>* %bp, align 64
+ %b = load <4 x i64>, <4 x i64>* %bp, align 64
%x = add <4 x i64> zeroinitializer, %b
store <4 x i64> %x, <4 x i64>* %ret, align 64
ret void
@@ -144,7 +142,7 @@ define void @add4i64a64(<4 x i64>* %ret, <4 x i64>* %bp) nounwind {
; CHECK: vmovaps %xmm{{.*}}, {{.*}}({{.*}})
; CHECK: vmovaps %xmm{{.*}}, {{.*}}({{.*}})
define void @add4i64a16(<4 x i64>* %ret, <4 x i64>* %bp) nounwind {
- %b = load <4 x i64>* %bp, align 16
+ %b = load <4 x i64>, <4 x i64>* %bp, align 16
%x = add <4 x i64> zeroinitializer, %b
store <4 x i64> %x, <4 x i64>* %ret, align 16
ret void
diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll
index 115cefb1b5eb..e71ac473b44d 100644
--- a/test/CodeGen/X86/avx-logic.ll
+++ b/test/CodeGen/X86/avx-logic.ll
@@ -1,7 +1,12 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
-; CHECK: vandpd
define <4 x double> @andpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+; CHECK-LABEL: andpd256:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vandpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
%0 = bitcast <4 x double> %x to <4 x i64>
%1 = bitcast <4 x double> %y to <4 x i64>
@@ -12,8 +17,13 @@ entry:
ret <4 x double> %3
}
-; CHECK: vandpd LCP{{.*}}(%rip)
define <4 x double> @andpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+; CHECK-LABEL: andpd256fold:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vandpd {{.*}}(%rip), %ymm0, %ymm0
+; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
%0 = bitcast <4 x double> %y to <4 x i64>
%and.i = and <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
@@ -23,8 +33,11 @@ entry:
ret <4 x double> %2
}
-; CHECK: vandps
define <8 x float> @andps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+; CHECK-LABEL: andps256:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vandps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
entry:
%0 = bitcast <8 x float> %x to <8 x i32>
%1 = bitcast <8 x float> %y to <8 x i32>
@@ -33,8 +46,11 @@ entry:
ret <8 x float> %2
}
-; CHECK: vandps LCP{{.*}}(%rip)
define <8 x float> @andps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+; CHECK-LABEL: andps256fold:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
%0 = bitcast <8 x float> %y to <8 x i32>
%and.i = and <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938>
@@ -42,8 +58,13 @@ entry:
ret <8 x float> %1
}
-; CHECK: vxorpd
define <4 x double> @xorpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+; CHECK-LABEL: xorpd256:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vxorpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
%0 = bitcast <4 x double> %x to <4 x i64>
%1 = bitcast <4 x double> %y to <4 x i64>
@@ -54,8 +75,13 @@ entry:
ret <4 x double> %3
}
-; CHECK: vxorpd LCP{{.*}}(%rip)
define <4 x double> @xorpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+; CHECK-LABEL: xorpd256fold:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0
+; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
%0 = bitcast <4 x double> %y to <4 x i64>
%xor.i = xor <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
@@ -65,8 +91,11 @@ entry:
ret <4 x double> %2
}
-; CHECK: vxorps
define <8 x float> @xorps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+; CHECK-LABEL: xorps256:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vxorps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
entry:
%0 = bitcast <8 x float> %x to <8 x i32>
%1 = bitcast <8 x float> %y to <8 x i32>
@@ -75,8 +104,11 @@ entry:
ret <8 x float> %2
}
-; CHECK: vxorps LCP{{.*}}(%rip)
define <8 x float> @xorps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+; CHECK-LABEL: xorps256fold:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
%0 = bitcast <8 x float> %y to <8 x i32>
%xor.i = xor <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938>
@@ -84,8 +116,13 @@ entry:
ret <8 x float> %1
}
-; CHECK: vorpd
define <4 x double> @orpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+; CHECK-LABEL: orpd256:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vorpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
%0 = bitcast <4 x double> %x to <4 x i64>
%1 = bitcast <4 x double> %y to <4 x i64>
@@ -96,8 +133,13 @@ entry:
ret <4 x double> %3
}
-; CHECK: vorpd LCP{{.*}}(%rip)
define <4 x double> @orpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+; CHECK-LABEL: orpd256fold:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vorpd {{.*}}(%rip), %ymm0, %ymm0
+; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
%0 = bitcast <4 x double> %y to <4 x i64>
%or.i = or <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
@@ -107,8 +149,11 @@ entry:
ret <4 x double> %2
}
-; CHECK: vorps
define <8 x float> @orps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+; CHECK-LABEL: orps256:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vorps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
entry:
%0 = bitcast <8 x float> %x to <8 x i32>
%1 = bitcast <8 x float> %y to <8 x i32>
@@ -117,8 +162,11 @@ entry:
ret <8 x float> %2
}
-; CHECK: vorps LCP{{.*}}(%rip)
define <8 x float> @orps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+; CHECK-LABEL: orps256fold:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
%0 = bitcast <8 x float> %y to <8 x i32>
%or.i = or <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938>
@@ -126,8 +174,13 @@ entry:
ret <8 x float> %1
}
-; CHECK: vandnpd
define <4 x double> @andnotpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+; CHECK-LABEL: andnotpd256:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vandnpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
%0 = bitcast <4 x double> %x to <4 x i64>
%neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
@@ -139,10 +192,15 @@ entry:
ret <4 x double> %3
}
-; CHECK: vandnpd (%
define <4 x double> @andnotpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp {
+; CHECK-LABEL: andnotpd256fold:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vandnpd (%rdi), %ymm0, %ymm0
+; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
- %tmp2 = load <4 x double>* %x, align 32
+ %tmp2 = load <4 x double>, <4 x double>* %x, align 32
%0 = bitcast <4 x double> %y to <4 x i64>
%neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
%1 = bitcast <4 x double> %tmp2 to <4 x i64>
@@ -153,8 +211,11 @@ entry:
ret <4 x double> %3
}
-; CHECK: vandnps
define <8 x float> @andnotps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+; CHECK-LABEL: andnotps256:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vandnps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
entry:
%0 = bitcast <8 x float> %x to <8 x i32>
%neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
@@ -164,10 +225,13 @@ entry:
ret <8 x float> %2
}
-; CHECK: vandnps (%
define <8 x float> @andnotps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp {
+; CHECK-LABEL: andnotps256fold:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vandnps (%rdi), %ymm0, %ymm0
+; CHECK-NEXT: retq
entry:
- %tmp2 = load <8 x float>* %x, align 32
+ %tmp2 = load <8 x float>, <8 x float>* %x, align 32
%0 = bitcast <8 x float> %y to <8 x i32>
%neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
%1 = bitcast <8 x float> %tmp2 to <8 x i32>
@@ -178,8 +242,12 @@ entry:
;;; Test that basic 2 x i64 logic use the integer version on AVX
-; CHECK: vpandn %xmm
define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+; CHECK-LABEL: vpandn:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm1
+; CHECK-NEXT: vpandn %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
entry:
; Force the execution domain with an add.
%a2 = add <2 x i64> %a, <i64 1, i64 1>
@@ -188,8 +256,12 @@ entry:
ret <2 x i64> %x
}
-; CHECK: vpand %xmm
define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+; CHECK-LABEL: vpand:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
entry:
; Force the execution domain with an add.
%a2 = add <2 x i64> %a, <i64 1, i64 1>
diff --git a/test/CodeGen/X86/avx-shift.ll b/test/CodeGen/X86/avx-shift.ll
index a70d45a7991a..83585b536095 100644
--- a/test/CodeGen/X86/avx-shift.ll
+++ b/test/CodeGen/X86/avx-shift.ll
@@ -1,147 +1,224 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
;;; Shift left
-; CHECK: vpslld
-; CHECK: vpslld
-define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
+define <8 x i32> @vshift00(<8 x i32> %a) {
+; CHECK-LABEL: vshift00:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpslld $2, %xmm0, %xmm1
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpslld $2, %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
2>
ret <8 x i32> %s
}
-; CHECK: vpsllw
-; CHECK: vpsllw
-define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
+define <16 x i16> @vshift01(<16 x i16> %a) {
+; CHECK-LABEL: vshift01:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsllw $2, %xmm0, %xmm1
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpsllw $2, %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
ret <16 x i16> %s
}
-; CHECK: vpsllq
-; CHECK: vpsllq
-define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
+define <4 x i64> @vshift02(<4 x i64> %a) {
+; CHECK-LABEL: vshift02:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsllq $2, %xmm0, %xmm1
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpsllq $2, %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
ret <4 x i64> %s
}
;;; Logical Shift right
-; CHECK: vpsrld
-; CHECK: vpsrld
-define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
+define <8 x i32> @vshift03(<8 x i32> %a) {
+; CHECK-LABEL: vshift03:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsrld $2, %xmm0, %xmm1
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpsrld $2, %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
2>
ret <8 x i32> %s
}
-; CHECK: vpsrlw
-; CHECK: vpsrlw
-define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
+define <16 x i16> @vshift04(<16 x i16> %a) {
+; CHECK-LABEL: vshift04:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm1
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
ret <16 x i16> %s
}
-; CHECK: vpsrlq
-; CHECK: vpsrlq
-define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
+define <4 x i64> @vshift05(<4 x i64> %a) {
+; CHECK-LABEL: vshift05:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsrlq $2, %xmm0, %xmm1
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpsrlq $2, %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
ret <4 x i64> %s
}
;;; Arithmetic Shift right
-; CHECK: vpsrad
-; CHECK: vpsrad
-define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
+define <8 x i32> @vshift06(<8 x i32> %a) {
+; CHECK-LABEL: vshift06:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsrad $2, %xmm0, %xmm1
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpsrad $2, %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
2>
ret <8 x i32> %s
}
-; CHECK: vpsraw
-; CHECK: vpsraw
-define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
+define <16 x i16> @vshift07(<16 x i16> %a) {
+; CHECK-LABEL: vshift07:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsraw $2, %xmm0, %xmm1
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpsraw $2, %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
ret <16 x i16> %s
}
-; CHECK: vpsrlw
-; CHECK: pand
-; CHECK: pxor
-; CHECK: psubb
-; CHECK: vpsrlw
-; CHECK: pand
-; CHECK: pxor
-; CHECK: psubb
-define <32 x i8> @vshift09(<32 x i8> %a) nounwind readnone {
+define <32 x i8> @vshift09(<32 x i8> %a) {
+; CHECK-LABEL: vshift09:
+; CHECK: # BB#0:
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vpsrlw $2, %xmm1, %xmm1
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
+; CHECK-NEXT: vpxor %xmm3, %xmm1, %xmm1
+; CHECK-NEXT: vpsubb %xmm3, %xmm1, %xmm1
+; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0
+; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0
+; CHECK-NEXT: vpsubb %xmm3, %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
%s = ashr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
ret <32 x i8> %s
}
-; CHECK: pxor
-; CHECK: pcmpgtb
-; CHECK: pcmpgtb
-define <32 x i8> @vshift10(<32 x i8> %a) nounwind readnone {
+define <32 x i8> @vshift10(<32 x i8> %a) {
+; CHECK-LABEL: vshift10:
+; CHECK: # BB#0:
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT: vpcmpgtb %xmm1, %xmm2, %xmm1
+; CHECK-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
%s = ashr <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
ret <32 x i8> %s
}
-; CHECK: vpsrlw
-; CHECK: pand
-; CHECK: vpsrlw
-; CHECK: pand
-define <32 x i8> @vshift11(<32 x i8> %a) nounwind readnone {
+define <32 x i8> @vshift11(<32 x i8> %a) {
+; CHECK-LABEL: vshift11:
+; CHECK: # BB#0:
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vpsrlw $2, %xmm1, %xmm1
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpsrlw $2, %xmm0, %xmm0
+; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
%s = lshr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
ret <32 x i8> %s
}
-; CHECK: vpsllw
-; CHECK: pand
-; CHECK: vpsllw
-; CHECK: pand
-define <32 x i8> @vshift12(<32 x i8> %a) nounwind readnone {
+define <32 x i8> @vshift12(<32 x i8> %a) {
+; CHECK-LABEL: vshift12:
+; CHECK: # BB#0:
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vpsllw $2, %xmm1, %xmm1
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpsllw $2, %xmm0, %xmm0
+; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: retq
%s = shl <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
ret <32 x i8> %s
}
;;; Support variable shifts
-; CHECK: _vshift08
-; CHECK: vpslld $23
-; CHECK: vextractf128 $1
-; CHECK: vpslld $23
-; CHECK: ret
-define <8 x i32> @vshift08(<8 x i32> %a) nounwind {
+define <8 x i32> @vshift08(<8 x i32> %a) {
+; CHECK-LABEL: vshift08:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpslld $23, %xmm0, %xmm1
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [1065353216,1065353216,1065353216,1065353216]
+; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vcvttps2dq %xmm1, %xmm1
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpslld $23, %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%bitop = shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %a
ret <8 x i32> %bitop
}
; PR15141
-; CHECK: _vshift13:
-; CHECK-NOT: vpsll
-; CHECK-NOT: vcvttps2dq
-; CHECK: vpmulld
define <4 x i32> @vshift13(<4 x i32> %in) {
+; CHECK-LABEL: vshift13:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: retq
%T = shl <4 x i32> %in, <i32 0, i32 1, i32 2, i32 4>
ret <4 x i32> %T
}
;;; Uses shifts for sign extension
-; CHECK: _sext_v16i16
-; CHECK: vpsllw
-; CHECK: vpsraw
-; CHECK: vpsllw
-; CHECK: vpsraw
-; CHECK: vinsertf128
-define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
+define <16 x i16> @sext_v16i16(<16 x i16> %a) {
+; CHECK-LABEL: sext_v16i16:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpsllw $8, %xmm0, %xmm1
+; CHECK-NEXT: vpsraw $8, %xmm1, %xmm1
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpsllw $8, %xmm0, %xmm0
+; CHECK-NEXT: vpsraw $8, %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%b = trunc <16 x i16> %a to <16 x i8>
%c = sext <16 x i8> %b to <16 x i16>
ret <16 x i16> %c
}
-; CHECK: _sext_v8i32
-; CHECK: vpslld
-; CHECK: vpsrad
-; CHECK: vpslld
-; CHECK: vpsrad
-; CHECK: vinsertf128
-define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
+define <8 x i32> @sext_v8i32(<8 x i32> %a) {
+; CHECK-LABEL: sext_v8i32:
+; CHECK: # BB#0:
+; CHECK-NEXT: vpslld $16, %xmm0, %xmm1
+; CHECK-NEXT: vpsrad $16, %xmm1, %xmm1
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vpslld $16, %xmm0, %xmm0
+; CHECK-NEXT: vpsrad $16, %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
%b = trunc <8 x i32> %a to <8 x i16>
%c = sext <8 x i16> %b to <8 x i32>
ret <8 x i32> %c
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index 98c1645b9080..3ea7e386c426 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -18,7 +18,7 @@ entry:
}
; CHECK: vmovq
-; CHECK-NEXT: vunpcklpd %xmm
+; CHECK-NEXT: vmovddup %xmm
; CHECK-NEXT: vinsertf128 $1
define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
entry:
@@ -29,7 +29,7 @@ entry:
ret <4 x i64> %vecinit6.i
}
-; CHECK: vunpcklpd %xmm
+; CHECK: vmovddup %xmm
; CHECK-NEXT: vinsertf128 $1
define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
entry:
@@ -42,7 +42,7 @@ entry:
; Test this turns into a broadcast:
; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
-;
+;
; CHECK: vbroadcastss
define <8 x float> @funcE() nounwind {
allocas:
@@ -56,9 +56,9 @@ for_exit499: ; preds = %for_test505.prehead
br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
load.i1247: ; preds = %for_exit499
- %ptr1227 = getelementptr [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
+ %ptr1227 = getelementptr [18 x [18 x float]], [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
%ptr.i1237 = bitcast float* %ptr1227 to i32*
- %val.i1238 = load i32* %ptr.i1237, align 4
+ %val.i1238 = load i32, i32* %ptr.i1237, align 4
%ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
%ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
%phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
diff --git a/test/CodeGen/X86/avx-trunc.ll b/test/CodeGen/X86/avx-trunc.ll
index bf8d9a7f1a40..27be9fd2fcd1 100755
--- a/test/CodeGen/X86/avx-trunc.ll
+++ b/test/CodeGen/X86/avx-trunc.ll
@@ -2,9 +2,9 @@
define <4 x i32> @trunc_64_32(<4 x i64> %A) nounwind uwtable readnone ssp{
; CHECK-LABEL: trunc_64_32
-; CHECK: shufps
-; CHECK-NOT: pshufd
-; CHECK-NOT: movlhps
+; CHECK: pshufd
+; CHECK: pshufd
+; CHECK: pblendw
%B = trunc <4 x i64> %A to <4 x i32>
ret <4 x i32>%B
}
diff --git a/test/CodeGen/X86/avx-unpack.ll b/test/CodeGen/X86/avx-unpack.ll
index 20f534532263..6924d98b38b1 100644
--- a/test/CodeGen/X86/avx-unpack.ll
+++ b/test/CodeGen/X86/avx-unpack.ll
@@ -70,8 +70,8 @@ entry:
; CHECK: vunpckhps (%
define <8 x i32> @unpackhips2(<8 x i32>* %src1, <8 x i32>* %src2) nounwind uwtable readnone ssp {
entry:
- %a = load <8 x i32>* %src1
- %b = load <8 x i32>* %src2
+ %a = load <8 x i32>, <8 x i32>* %src1
+ %b = load <8 x i32>, <8 x i32>* %src2
%shuffle.i = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
ret <8 x i32> %shuffle.i
}
@@ -86,8 +86,8 @@ entry:
; CHECK: vunpckhpd (%
define <4 x i64> @unpackhipd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp {
entry:
- %a = load <4 x i64>* %src1
- %b = load <4 x i64>* %src2
+ %a = load <4 x i64>, <4 x i64>* %src1
+ %b = load <4 x i64>, <4 x i64>* %src2
%shuffle.i = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
ret <4 x i64> %shuffle.i
}
@@ -102,8 +102,8 @@ entry:
; CHECK: vunpcklps (%
define <8 x i32> @unpacklops2(<8 x i32>* %src1, <8 x i32>* %src2) nounwind uwtable readnone ssp {
entry:
- %a = load <8 x i32>* %src1
- %b = load <8 x i32>* %src2
+ %a = load <8 x i32>, <8 x i32>* %src1
+ %b = load <8 x i32>, <8 x i32>* %src2
%shuffle.i = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
ret <8 x i32> %shuffle.i
}
@@ -118,8 +118,8 @@ entry:
; CHECK: vunpcklpd (%
define <4 x i64> @unpacklopd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp {
entry:
- %a = load <4 x i64>* %src1
- %b = load <4 x i64>* %src2
+ %a = load <4 x i64>, <4 x i64>* %src1
+ %b = load <4 x i64>, <4 x i64>* %src2
%shuffle.i = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
ret <4 x i64> %shuffle.i
}
diff --git a/test/CodeGen/X86/avx-varargs-x86_64.ll b/test/CodeGen/X86/avx-varargs-x86_64.ll
index f73174dd2bc6..7ce5e19064ad 100644
--- a/test/CodeGen/X86/avx-varargs-x86_64.ll
+++ b/test/CodeGen/X86/avx-varargs-x86_64.ll
@@ -9,7 +9,7 @@ declare i32 @f(i32, ...)
; CHECK: vmovaps %ymm0, (%rsp)
define void @test1() nounwind uwtable ssp {
entry:
- %0 = load <8 x float>* @x, align 32
- %call = call i32 (i32, ...)* @f(i32 1, <8 x float> %0)
+ %0 = load <8 x float>, <8 x float>* @x, align 32
+ %call = call i32 (i32, ...) @f(i32 1, <8 x float> %0)
ret void
}
diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll
index 2ebe6fda37a3..8b8c11b85875 100644
--- a/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/test/CodeGen/X86/avx-vbroadcast.ll
@@ -3,7 +3,7 @@
; CHECK: vbroadcastsd (%
define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
entry:
- %q = load i64* %ptr, align 8
+ %q = load i64, i64* %ptr, align 8
%vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
%vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
%vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
@@ -14,7 +14,7 @@ entry:
; CHECK: vbroadcastss (%
define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp {
entry:
- %q = load i32* %ptr, align 4
+ %q = load i32, i32* %ptr, align 4
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
%vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1
%vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2
@@ -25,7 +25,7 @@ entry:
; CHECK: vbroadcastsd (%
define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp {
entry:
- %q = load double* %ptr, align 8
+ %q = load double, double* %ptr, align 8
%vecinit.i = insertelement <4 x double> undef, double %q, i32 0
%vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
%vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
@@ -36,7 +36,7 @@ entry:
; CHECK: vbroadcastss (%
define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp {
entry:
- %q = load float* %ptr, align 4
+ %q = load float, float* %ptr, align 4
%vecinit.i = insertelement <8 x float> undef, float %q, i32 0
%vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1
%vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2
@@ -49,7 +49,7 @@ entry:
; CHECK: vbroadcastss (%
define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp {
entry:
- %q = load float* %ptr, align 4
+ %q = load float, float* %ptr, align 4
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
@@ -73,7 +73,7 @@ define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
; CHECK: vbroadcastss (%
define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp {
entry:
- %q = load i32* %ptr, align 4
+ %q = load i32, i32* %ptr, align 4
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
%vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %q, i32 1
%vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %q, i32 2
@@ -88,7 +88,7 @@ entry:
; CHECK: ret
define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
entry:
- %q = load i64* %ptr, align 8
+ %q = load i64, i64* %ptr, align 8
%vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
%vecinit2.i = insertelement <2 x i64> %vecinit.i, i64 %q, i32 1
ret <2 x i64> %vecinit2.i
@@ -107,7 +107,7 @@ define <4 x i32> @H(<4 x i32> %a) {
; CHECK: ret
define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
entry:
- %q = load double* %ptr, align 4
+ %q = load double, double* %ptr, align 4
%vecinit.i = insertelement <2 x double> undef, double %q, i32 0
%vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
ret <2 x double> %vecinit2.i
@@ -118,13 +118,13 @@ entry:
; CHECK: ret
define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
entry:
- %q = load float* %ptr, align 4
+ %q = load float, float* %ptr, align 4
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
%vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
; force a chain
- %j = load i32* %k, align 4
+ %j = load i32, i32* %k, align 4
store i32 %j, i32* undef
ret <4 x float> %vecinit6.i
}
@@ -135,7 +135,7 @@ entry:
; CHECK: ret
define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
entry:
- %q = load float* %ptr, align 4
+ %q = load float, float* %ptr, align 4
%v = insertelement <4 x float> undef, float %q, i32 0
%t = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %t
@@ -151,7 +151,7 @@ entry:
; CHECK: vbroadcastss (%
; CHECK-NEXT: ret
define <8 x float> @splat_concat1(float* %p) {
- %1 = load float* %p, align 4
+ %1 = load float, float* %p, align 4
%2 = insertelement <4 x float> undef, float %1, i32 0
%3 = insertelement <4 x float> %2, float %1, i32 1
%4 = insertelement <4 x float> %3, float %1, i32 2
@@ -165,7 +165,7 @@ define <8 x float> @splat_concat1(float* %p) {
; CHECK: vbroadcastss (%
; CHECK-NEXT: ret
define <8 x float> @splat_concat2(float* %p) {
- %1 = load float* %p, align 4
+ %1 = load float, float* %p, align 4
%2 = insertelement <4 x float> undef, float %1, i32 0
%3 = insertelement <4 x float> %2, float %1, i32 1
%4 = insertelement <4 x float> %3, float %1, i32 2
@@ -183,7 +183,7 @@ define <8 x float> @splat_concat2(float* %p) {
; CHECK: vbroadcastsd (%
; CHECK-NEXT: ret
define <4 x double> @splat_concat3(double* %p) {
- %1 = load double* %p, align 8
+ %1 = load double, double* %p, align 8
%2 = insertelement <2 x double> undef, double %1, i32 0
%3 = insertelement <2 x double> %2, double %1, i32 1
%4 = shufflevector <2 x double> %3, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@@ -195,7 +195,7 @@ define <4 x double> @splat_concat3(double* %p) {
; CHECK: vbroadcastsd (%
; CHECK-NEXT: ret
define <4 x double> @splat_concat4(double* %p) {
- %1 = load double* %p, align 8
+ %1 = load double, double* %p, align 8
%2 = insertelement <2 x double> undef, double %1, i32 0
%3 = insertelement <2 x double> %2, double %1, i32 1
%4 = insertelement <2 x double> undef, double %1, i32 0
diff --git a/test/CodeGen/X86/avx-vextractf128.ll b/test/CodeGen/X86/avx-vextractf128.ll
index fa49f949b689..297fb250c5ff 100644
--- a/test/CodeGen/X86/avx-vextractf128.ll
+++ b/test/CodeGen/X86/avx-vextractf128.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s
-; CHECK: @A
+; CHECK-LABEL: A:
; CHECK-NOT: vunpck
; CHECK: vextractf128 $1
define <8 x float> @A(<8 x float> %a) nounwind uwtable readnone ssp {
@@ -9,7 +9,7 @@ entry:
ret <8 x float> %shuffle
}
-; CHECK: @B
+; CHECK-LABEL: B:
; CHECK-NOT: vunpck
; CHECK: vextractf128 $1
define <4 x double> @B(<4 x double> %a) nounwind uwtable readnone ssp {
@@ -18,7 +18,7 @@ entry:
ret <4 x double> %shuffle
}
-; CHECK: @t0
+; CHECK-LABEL: t0:
; CHECK-NOT: vextractf128 $1, %ymm0, %xmm0
; CHECK-NOT: vmovaps %xmm0, (%rdi)
; CHECK: vextractf128 $1, %ymm0, (%rdi)
@@ -32,7 +32,7 @@ entry:
declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
-; CHECK: @t2
+; CHECK-LABEL: t2:
; CHECK-NOT: vextractf128 $1, %ymm0, %xmm0
; CHECK-NOT: vmovaps %xmm0, (%rdi)
; CHECK: vextractf128 $1, %ymm0, (%rdi)
@@ -46,7 +46,7 @@ entry:
declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
-; CHECK: @t4
+; CHECK-LABEL: t4:
; CHECK-NOT: vextractf128 $1, %ymm0, %xmm0
; CHECK-NOT: vmovaps %xmm0, (%rdi)
; CHECK: vextractf128 $1, %ymm0, (%rdi)
@@ -61,7 +61,7 @@ entry:
declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
-; CHECK: @t5
+; CHECK-LABEL: t5:
; CHECK: vmovaps %xmm0, (%rdi)
define void @t5(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
entry:
@@ -71,7 +71,7 @@ entry:
ret void
}
-; CHECK: @t6
+; CHECK-LABEL: t6:
; CHECK: vmovaps %xmm0, (%rdi)
define void @t6(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
entry:
@@ -81,7 +81,7 @@ entry:
ret void
}
-; CHECK: @t7
+; CHECK-LABEL: t7:
; CHECK: vmovaps %xmm0, (%rdi)
define void @t7(<2 x i64>* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
entry:
@@ -92,7 +92,7 @@ entry:
ret void
}
-; CHECK: @t8
+; CHECK-LABEL: t8:
; CHECK: vmovups %xmm0, (%rdi)
define void @t8(<2 x i64>* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
entry:
@@ -106,11 +106,11 @@ entry:
; PR15462
define void @t9(i64* %p) {
store i64 0, i64* %p
- %q = getelementptr i64* %p, i64 1
+ %q = getelementptr i64, i64* %p, i64 1
store i64 0, i64* %q
- %r = getelementptr i64* %p, i64 2
+ %r = getelementptr i64, i64* %p, i64 2
store i64 0, i64* %r
- %s = getelementptr i64* %p, i64 3
+ %s = getelementptr i64, i64* %p, i64 3
store i64 0, i64* %s
ret void
@@ -118,5 +118,4 @@ define void @t9(i64* %p) {
; CHECK: vxorps %xmm
; CHECK-NOT: vextractf
; CHECK: vmovups
-; CHECK: vmovups
}
diff --git a/test/CodeGen/X86/avx-vinsertf128.ll b/test/CodeGen/X86/avx-vinsertf128.ll
index 9a954fe8047e..38389de7a8a1 100644
--- a/test/CodeGen/X86/avx-vinsertf128.ll
+++ b/test/CodeGen/X86/avx-vinsertf128.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=CHECK-SSE %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s
+; CHECK-LABEL: A:
; CHECK-NOT: vunpck
; CHECK: vinsertf128 $1
define <8 x float> @A(<8 x float> %a) nounwind uwtable readnone ssp {
@@ -9,6 +9,7 @@ entry:
ret <8 x float> %shuffle
}
+; CHECK-LABEL: B:
; CHECK-NOT: vunpck
; CHECK: vinsertf128 $1
define <4 x double> @B(<4 x double> %a) nounwind uwtable readnone ssp {
@@ -22,7 +23,7 @@ declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind
declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
; Just check that no crash happens
-; CHECK-SSE: _insert_crash
+; CHECK-LABEL: _insert_crash:
define void @insert_crash() nounwind {
allocas:
%v1.i.i451 = shufflevector <4 x double> zeroinitializer, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
@@ -39,7 +40,7 @@ allocas:
;; DAG Combine must remove useless vinsertf128 instructions
-; CHECK: DAGCombineA
+; CHECK-LABEL: DAGCombineA:
; CHECK-NOT: vinsertf128 $1
define <4 x i32> @DAGCombineA(<4 x i32> %v1) nounwind readonly {
%1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -47,7 +48,7 @@ define <4 x i32> @DAGCombineA(<4 x i32> %v1) nounwind readonly {
ret <4 x i32> %2
}
-; CHECK: DAGCombineB
+; CHECK-LABEL: DAGCombineB:
; CHECK: vpaddd %xmm
; CHECK-NOT: vinsertf128 $1
; CHECK: vpaddd %xmm
@@ -57,14 +58,7 @@ define <8 x i32> @DAGCombineB(<8 x i32> %v1, <8 x i32> %v2) nounwind readonly {
ret <8 x i32> %2
}
-; CHECK: insert_pd
-define <4 x double> @insert_pd(<4 x double> %a0, <2 x double> %a1) {
-; CHECK: vinsertf128
-%res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 0)
-ret <4 x double> %res
-}
-
-; CHECK: insert_undef_pd
+; CHECK-LABEL: insert_undef_pd:
define <4 x double> @insert_undef_pd(<4 x double> %a0, <2 x double> %a1) {
; CHECK: vmovaps %ymm1, %ymm0
%res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> undef, <2 x double> %a1, i8 0)
@@ -73,14 +67,7 @@ ret <4 x double> %res
declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
-; CHECK: insert_ps
-define <8 x float> @insert_ps(<8 x float> %a0, <4 x float> %a1) {
-; CHECK: vinsertf128
-%res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 0)
-ret <8 x float> %res
-}
-
-; CHECK: insert_undef_ps
+; CHECK-LABEL: insert_undef_ps:
define <8 x float> @insert_undef_ps(<8 x float> %a0, <4 x float> %a1) {
; CHECK: vmovaps %ymm1, %ymm0
%res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %a1, i8 0)
@@ -89,14 +76,7 @@ ret <8 x float> %res
declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
-; CHECK: insert_si
-define <8 x i32> @insert_si(<8 x i32> %a0, <4 x i32> %a1) {
-; CHECK: vinsertf128
-%res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 0)
-ret <8 x i32> %res
-}
-
-; CHECK: insert_undef_si
+; CHECK-LABEL: insert_undef_si:
define <8 x i32> @insert_undef_si(<8 x i32> %a0, <4 x i32> %a1) {
; CHECK: vmovaps %ymm1, %ymm0
%res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> undef, <4 x i32> %a1, i8 0)
@@ -105,27 +85,27 @@ ret <8 x i32> %res
declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
; rdar://10643481
-; CHECK: vinsertf128_combine
+; CHECK-LABEL: vinsertf128_combine:
define <8 x float> @vinsertf128_combine(float* nocapture %f) nounwind uwtable readonly ssp {
; CHECK-NOT: vmovaps
; CHECK: vinsertf128
entry:
- %add.ptr = getelementptr inbounds float* %f, i64 4
+ %add.ptr = getelementptr inbounds float, float* %f, i64 4
%0 = bitcast float* %add.ptr to <4 x float>*
- %1 = load <4 x float>* %0, align 16
+ %1 = load <4 x float>, <4 x float>* %0, align 16
%2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1)
ret <8 x float> %2
}
; rdar://11076953
-; CHECK: vinsertf128_ucombine
+; CHECK-LABEL: vinsertf128_ucombine:
define <8 x float> @vinsertf128_ucombine(float* nocapture %f) nounwind uwtable readonly ssp {
; CHECK-NOT: vmovups
; CHECK: vinsertf128
entry:
- %add.ptr = getelementptr inbounds float* %f, i64 4
+ %add.ptr = getelementptr inbounds float, float* %f, i64 4
%0 = bitcast float* %add.ptr to <4 x float>*
- %1 = load <4 x float>* %0, align 8
+ %1 = load <4 x float>, <4 x float>* %0, align 8
%2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1)
ret <8 x float> %2
}
diff --git a/test/CodeGen/X86/avx-vperm2x128.ll b/test/CodeGen/X86/avx-vperm2x128.ll
index 7ca5939fa74a..10ed079a264e 100644
--- a/test/CodeGen/X86/avx-vperm2x128.ll
+++ b/test/CodeGen/X86/avx-vperm2x128.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
; ALL-LABEL: A:
@@ -160,8 +160,8 @@ define <16 x i16> @E5i(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
entry:
- %c = load <16 x i16>* %a
- %d = load <16 x i16>* %b
+ %c = load <16 x i16>, <16 x i16>* %a
+ %d = load <16 x i16>, <16 x i16>* %b
%c2 = add <16 x i16> %c, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%shuffle = shufflevector <16 x i16> %c2, <16 x i16> %d, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <16 x i16> %shuffle
@@ -261,3 +261,94 @@ entry:
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 12, i32 undef, i32 15>
ret <8 x float> %shuffle
}
+
+;; Test zero mask generation.
+;; PR22984: https://llvm.org/bugs/show_bug.cgi?id=22984
+;; Prefer xor+vblendpd over vperm2f128 because that has better performance.
+
+define <4 x double> @vperm2z_0x08(<4 x double> %a) {
+; ALL-LABEL: vperm2z_0x08:
+; ALL: # BB#0:
+; ALL-NEXT: vperm2f128 $40, %ymm0, %ymm0, %ymm0
+; ALL-NEXT: retq
+ %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
+ ret <4 x double> %s
+}
+
+define <4 x double> @vperm2z_0x18(<4 x double> %a) {
+; ALL-LABEL: vperm2z_0x18:
+; ALL: # BB#0:
+; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; ALL-NEXT: vblendpd $12, %ymm0, %ymm1, %ymm0
+; ALL-NEXT: retq
+ %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+ ret <4 x double> %s
+}
+
+define <4 x double> @vperm2z_0x28(<4 x double> %a) {
+; ALL-LABEL: vperm2z_0x28:
+; ALL: # BB#0:
+; ALL-NEXT: vperm2f128 $40, %ymm0, %ymm0, %ymm0
+; ALL-NEXT: retq
+ %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+ ret <4 x double> %s
+}
+
+define <4 x double> @vperm2z_0x38(<4 x double> %a) {
+; ALL-LABEL: vperm2z_0x38:
+; ALL: # BB#0:
+; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; ALL-NEXT: vblendpd $12, %ymm0, %ymm1, %ymm0
+; ALL-NEXT: retq
+ %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+ ret <4 x double> %s
+}
+
+define <4 x double> @vperm2z_0x80(<4 x double> %a) {
+; ALL-LABEL: vperm2z_0x80:
+; ALL: # BB#0:
+; ALL-NEXT: vperm2f128 $128, %ymm0, %ymm0, %ymm0
+; ALL-NEXT: retq
+ %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+ ret <4 x double> %s
+}
+
+define <4 x double> @vperm2z_0x81(<4 x double> %a) {
+; ALL-LABEL: vperm2z_0x81:
+; ALL: # BB#0:
+; ALL-NEXT: vperm2f128 $129, %ymm0, %ymm0, %ymm0
+; ALL-NEXT: retq
+ %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+ ret <4 x double> %s
+}
+
+define <4 x double> @vperm2z_0x82(<4 x double> %a) {
+; ALL-LABEL: vperm2z_0x82:
+; ALL: # BB#0:
+; ALL-NEXT: vperm2f128 $128, %ymm0, %ymm0, %ymm0
+; ALL-NEXT: retq
+ %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
+ ret <4 x double> %s
+}
+
+define <4 x double> @vperm2z_0x83(<4 x double> %a) {
+; ALL-LABEL: vperm2z_0x83:
+; ALL: # BB#0:
+; ALL-NEXT: vperm2f128 $129, %ymm0, %ymm0, %ymm0
+; ALL-NEXT: retq
+ %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+ ret <4 x double> %s
+}
+
+;; With AVX2 select the integer version of the instruction. Use an add to force the domain selection.
+
+define <4 x i64> @vperm2z_int_0x83(<4 x i64> %a, <4 x i64> %b) {
+; ALL-LABEL: vperm2z_int_0x83:
+; ALL: # BB#0:
+; AVX1: vperm2f128 $129, %ymm0, %ymm0, %ymm0
+; AVX2: vperm2i128 $129, %ymm0, %ymm0, %ymm0
+ %s = shufflevector <4 x i64> <i64 0, i64 0, i64 undef, i64 undef>, <4 x i64> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+ %c = add <4 x i64> %b, %s
+ ret <4 x i64> %c
+}
+
diff --git a/test/CodeGen/X86/avx-vzeroupper.ll b/test/CodeGen/X86/avx-vzeroupper.ll
index a2163a254e14..a16dc70e81c6 100644
--- a/test/CodeGen/X86/avx-vzeroupper.ll
+++ b/test/CodeGen/X86/avx-vzeroupper.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s
declare i32 @foo()
declare <4 x float> @do_sse(<4 x float>)
@@ -24,7 +24,7 @@ entry:
; CHECK: _test01
define <8 x float> @test01(<4 x float> %a, <4 x float> %b, <8 x float> %c) nounwind uwtable ssp {
entry:
- %tmp = load <4 x float>* @x, align 16
+ %tmp = load <4 x float>, <4 x float>* @x, align 16
; CHECK: vzeroupper
; CHECK-NEXT: callq _do_sse
%call = tail call <4 x float> @do_sse(<4 x float> %tmp) nounwind
@@ -73,7 +73,7 @@ for.body:
%call5 = tail call <4 x float> @do_sse(<4 x float> %c.017) nounwind
; CHECK-NEXT: callq _do_sse
%call7 = tail call <4 x float> @do_sse(<4 x float> %call5) nounwind
- %tmp11 = load <8 x float>* @g, align 32
+ %tmp11 = load <8 x float>, <8 x float>* @g, align 32
%0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %tmp11, i8 1) nounwind
; CHECK: vzeroupper
; CHECK-NEXT: callq _do_sse
diff --git a/test/CodeGen/X86/avx.ll b/test/CodeGen/X86/avx.ll
index cba6d98f5a84..f71ec5c10e69 100644
--- a/test/CodeGen/X86/avx.ll
+++ b/test/CodeGen/X86/avx.ll
@@ -34,7 +34,7 @@ define <4 x float> @insertps_from_vector_load(<4 x float> %a, <4 x float>* nocap
; CHECK-NOT: mov
; CHECK: insertps $48
; CHECK-NEXT: ret
- %1 = load <4 x float>* %pb, align 16
+ %1 = load <4 x float>, <4 x float>* %pb, align 16
%2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 48)
ret <4 x float> %2
}
@@ -48,7 +48,7 @@ define <4 x float> @insertps_from_vector_load_offset(<4 x float> %a, <4 x float>
;; Try to match a bit more of the instr, since we need the load's offset.
; CHECK: insertps $96, 4(%{{...}}), %
; CHECK-NEXT: ret
- %1 = load <4 x float>* %pb, align 16
+ %1 = load <4 x float>, <4 x float>* %pb, align 16
%2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 96)
ret <4 x float> %2
}
@@ -60,10 +60,10 @@ define <4 x float> @insertps_from_vector_load_offset_2(<4 x float> %a, <4 x floa
; X32: movl 8(%esp), %ecx
; CHECK-NOT: mov
;; Try to match a bit more of the instr, since we need the load's offset.
-; CHECK: vinsertps $-64, 12(%{{...}},%{{...}}), %
+; CHECK: vinsertps $192, 12(%{{...}},%{{...}}), %
; CHECK-NEXT: ret
- %1 = getelementptr inbounds <4 x float>* %pb, i64 %index
- %2 = load <4 x float>* %1, align 16
+ %1 = getelementptr inbounds <4 x float>, <4 x float>* %pb, i64 %index
+ %2 = load <4 x float>, <4 x float>* %1, align 16
%3 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %2, i32 192)
ret <4 x float> %3
}
@@ -76,8 +76,8 @@ define <4 x float> @insertps_from_broadcast_loadf32(<4 x float> %a, float* nocap
; CHECK-NOT: mov
; CHECK: insertps $48
; CHECK-NEXT: ret
- %1 = getelementptr inbounds float* %fb, i64 %index
- %2 = load float* %1, align 4
+ %1 = getelementptr inbounds float, float* %fb, i64 %index
+ %2 = load float, float* %1, align 4
%3 = insertelement <4 x float> undef, float %2, i32 0
%4 = insertelement <4 x float> %3, float %2, i32 1
%5 = insertelement <4 x float> %4, float %2, i32 2
@@ -93,7 +93,7 @@ define <4 x float> @insertps_from_broadcast_loadv4f32(<4 x float> %a, <4 x float
; CHECK-NOT: mov
; CHECK: insertps $48
; CHECK-NEXT: ret
- %1 = load <4 x float>* %b, align 4
+ %1 = load <4 x float>, <4 x float>* %b, align 4
%2 = extractelement <4 x float> %1, i32 0
%3 = insertelement <4 x float> undef, float %2, i32 0
%4 = insertelement <4 x float> %3, float %2, i32 1
@@ -119,8 +119,8 @@ define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x fl
; CHECK: vaddps
; CHECK: vaddps
; CHECK-NEXT: ret
- %1 = getelementptr inbounds float* %fb, i64 %index
- %2 = load float* %1, align 4
+ %1 = getelementptr inbounds float, float* %fb, i64 %index
+ %2 = load float, float* %1, align 4
%3 = insertelement <4 x float> undef, float %2, i32 0
%4 = insertelement <4 x float> %3, float %2, i32 1
%5 = insertelement <4 x float> %4, float %2, i32 2
diff --git a/test/CodeGen/X86/avx1-logical-load-folding.ll b/test/CodeGen/X86/avx1-logical-load-folding.ll
index 32301b1bf9e6..90e00c965391 100644
--- a/test/CodeGen/X86/avx1-logical-load-folding.ll
+++ b/test/CodeGen/X86/avx1-logical-load-folding.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx10.9.0"
; Function Attrs: nounwind ssp uwtable
define void @test1(float* %A, float* %C) #0 {
%tmp1 = bitcast float* %A to <8 x float>*
- %tmp2 = load <8 x float>* %tmp1, align 32
+ %tmp2 = load <8 x float>, <8 x float>* %tmp1, align 32
%tmp3 = bitcast <8 x float> %tmp2 to <8 x i32>
%tmp4 = and <8 x i32> %tmp3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
%tmp5 = bitcast <8 x i32> %tmp4 to <8 x float>
@@ -20,7 +20,7 @@ define void @test1(float* %A, float* %C) #0 {
; Function Attrs: nounwind ssp uwtable
define void @test2(float* %A, float* %C) #0 {
%tmp1 = bitcast float* %A to <8 x float>*
- %tmp2 = load <8 x float>* %tmp1, align 32
+ %tmp2 = load <8 x float>, <8 x float>* %tmp1, align 32
%tmp3 = bitcast <8 x float> %tmp2 to <8 x i32>
%tmp4 = or <8 x i32> %tmp3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
%tmp5 = bitcast <8 x i32> %tmp4 to <8 x float>
@@ -34,7 +34,7 @@ define void @test2(float* %A, float* %C) #0 {
; Function Attrs: nounwind ssp uwtable
define void @test3(float* %A, float* %C) #0 {
%tmp1 = bitcast float* %A to <8 x float>*
- %tmp2 = load <8 x float>* %tmp1, align 32
+ %tmp2 = load <8 x float>, <8 x float>* %tmp1, align 32
%tmp3 = bitcast <8 x float> %tmp2 to <8 x i32>
%tmp4 = xor <8 x i32> %tmp3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
%tmp5 = bitcast <8 x i32> %tmp4 to <8 x float>
@@ -47,7 +47,7 @@ define void @test3(float* %A, float* %C) #0 {
define void @test4(float* %A, float* %C) #0 {
%tmp1 = bitcast float* %A to <8 x float>*
- %tmp2 = load <8 x float>* %tmp1, align 32
+ %tmp2 = load <8 x float>, <8 x float>* %tmp1, align 32
%tmp3 = bitcast <8 x float> %tmp2 to <8 x i32>
%tmp4 = xor <8 x i32> %tmp3, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
%tmp5 = and <8 x i32> %tmp4, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
diff --git a/test/CodeGen/X86/avx1-stack-reload-folding.ll b/test/CodeGen/X86/avx1-stack-reload-folding.ll
deleted file mode 100644
index 54c192583d6e..000000000000
--- a/test/CodeGen/X86/avx1-stack-reload-folding.ll
+++ /dev/null
@@ -1,83 +0,0 @@
-; RUN: llc -O3 -disable-peephole -mcpu=corei7-avx -mattr=+avx < %s | FileCheck %s
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-unknown"
-
-; Stack reload folding tests - we use the 'big vectors' pattern to guarantee spilling to stack.
-;
-; Many of these tests are primarily to check memory folding with specific instructions. Using a basic
-; load/cvt/store pattern to test for this would mean that it wouldn't be the memory folding code thats
-; being tested - the load-execute version of the instruction from the tables would be matched instead.
-
-define void @stack_fold_vmulpd(<64 x double>* %a, <64 x double>* %b, <64 x double>* %c) {
- ;CHECK-LABEL: stack_fold_vmulpd
- ;CHECK: vmulpd {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
-
- %1 = load <64 x double>* %a
- %2 = load <64 x double>* %b
- %3 = fadd <64 x double> %1, %2
- %4 = fsub <64 x double> %1, %2
- %5 = fmul <64 x double> %3, %4
- store <64 x double> %5, <64 x double>* %c
- ret void
-}
-
-define void @stack_fold_cvtdq2ps(<128 x i32>* %a, <128 x i32>* %b, <128 x float>* %c) {
- ;CHECK-LABEL: stack_fold_cvtdq2ps
- ;CHECK: vcvtdq2ps {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
-
- %1 = load <128 x i32>* %a
- %2 = load <128 x i32>* %b
- %3 = and <128 x i32> %1, %2
- %4 = xor <128 x i32> %1, %2
- %5 = sitofp <128 x i32> %3 to <128 x float>
- %6 = sitofp <128 x i32> %4 to <128 x float>
- %7 = fadd <128 x float> %5, %6
- store <128 x float> %7, <128 x float>* %c
- ret void
-}
-
-define void @stack_fold_cvtpd2ps(<128 x double>* %a, <128 x double>* %b, <128 x float>* %c) {
- ;CHECK-LABEL: stack_fold_cvtpd2ps
- ;CHECK: vcvtpd2psy {{[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
-
- %1 = load <128 x double>* %a
- %2 = load <128 x double>* %b
- %3 = fadd <128 x double> %1, %2
- %4 = fsub <128 x double> %1, %2
- %5 = fptrunc <128 x double> %3 to <128 x float>
- %6 = fptrunc <128 x double> %4 to <128 x float>
- %7 = fadd <128 x float> %5, %6
- store <128 x float> %7, <128 x float>* %c
- ret void
-}
-
-define void @stack_fold_cvttpd2dq(<64 x double>* %a, <64 x double>* %b, <64 x i32>* %c) #0 {
- ;CHECK-LABEL: stack_fold_cvttpd2dq
- ;CHECK: vcvttpd2dqy {{[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
-
- %1 = load <64 x double>* %a
- %2 = load <64 x double>* %b
- %3 = fadd <64 x double> %1, %2
- %4 = fsub <64 x double> %1, %2
- %5 = fptosi <64 x double> %3 to <64 x i32>
- %6 = fptosi <64 x double> %4 to <64 x i32>
- %7 = or <64 x i32> %5, %6
- store <64 x i32> %7, <64 x i32>* %c
- ret void
-}
-
-define void @stack_fold_cvttps2dq(<128 x float>* %a, <128 x float>* %b, <128 x i32>* %c) #0 {
- ;CHECK-LABEL: stack_fold_cvttps2dq
- ;CHECK: vcvttps2dq {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
-
- %1 = load <128 x float>* %a
- %2 = load <128 x float>* %b
- %3 = fadd <128 x float> %1, %2
- %4 = fsub <128 x float> %1, %2
- %5 = fptosi <128 x float> %3 to <128 x i32>
- %6 = fptosi <128 x float> %4 to <128 x i32>
- %7 = or <128 x i32> %5, %6
- store <128 x i32> %7, <128 x i32>* %c
- ret void
-}
diff --git a/test/CodeGen/X86/avx2-arith.ll b/test/CodeGen/X86/avx2-arith.ll
index 72bdd9d04729..a205be1c0cd6 100644
--- a/test/CodeGen/X86/avx2-arith.ll
+++ b/test/CodeGen/X86/avx2-arith.ll
@@ -60,6 +60,49 @@ define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone
ret <16 x i16> %x
}
+; CHECK: mul-v16i8
+; CHECK: # BB#0:
+; CHECK-NEXT: vpmovsxbw %xmm1, %ymm1
+; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0
+; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+define <16 x i8> @mul-v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
+ %x = mul <16 x i8> %i, %j
+ ret <16 x i8> %x
+}
+
+; CHECK: mul-v32i8
+; CHECK: # BB#0:
+; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2
+; CHECK-NEXT: vpmovsxbw %xmm2, %ymm2
+; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm3
+; CHECK-NEXT: vpmovsxbw %xmm3, %ymm3
+; CHECK-NEXT: vpmullw %ymm2, %ymm3, %ymm2
+; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; CHECK-NEXT: vpshufb %xmm4, %xmm3, %xmm3
+; CHECK-NEXT: vpshufb %xmm4, %xmm2, %xmm2
+; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; CHECK-NEXT: vpmovsxbw %xmm1, %ymm1
+; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0
+; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vpshufb %xmm4, %xmm1, %xmm1
+; CHECK-NEXT: vpshufb %xmm4, %xmm0, %xmm0
+; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; CHECK-NEXT: retq
+define <32 x i8> @mul-v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+ %x = mul <32 x i8> %i, %j
+ ret <32 x i8> %x
+}
+
; CHECK: mul-v4i64
; CHECK: vpmuludq %ymm
; CHECK-NEXT: vpsrlq $32, %ymm
diff --git a/test/CodeGen/X86/avx2-conversions.ll b/test/CodeGen/X86/avx2-conversions.ll
index f49718e4c8b6..9b6d5aa5eeae 100755
--- a/test/CodeGen/X86/avx2-conversions.ll
+++ b/test/CodeGen/X86/avx2-conversions.ll
@@ -84,7 +84,7 @@ define <16 x i16> @sext_16i8_16i16(<16 x i8> %z) {
; CHECK-LABEL: trunc_16i16_16i8:
; CHECK: vpshufb
; CHECK: vpshufb
-; CHECK: vpor
+; CHECK: vpunpcklqdq
; CHECK: ret
define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) {
%t = trunc <16 x i16> %z to <16 x i8>
@@ -95,7 +95,7 @@ define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) {
; CHECK: vpmovsxdq (%r{{[^,]*}}), %ymm{{.*}}
; CHECK: ret
define <4 x i64> @load_sext_test1(<4 x i32> *%ptr) {
- %X = load <4 x i32>* %ptr
+ %X = load <4 x i32>, <4 x i32>* %ptr
%Y = sext <4 x i32> %X to <4 x i64>
ret <4 x i64>%Y
}
@@ -104,7 +104,7 @@ define <4 x i64> @load_sext_test1(<4 x i32> *%ptr) {
; CHECK: vpmovsxbq (%r{{[^,]*}}), %ymm{{.*}}
; CHECK: ret
define <4 x i64> @load_sext_test2(<4 x i8> *%ptr) {
- %X = load <4 x i8>* %ptr
+ %X = load <4 x i8>, <4 x i8>* %ptr
%Y = sext <4 x i8> %X to <4 x i64>
ret <4 x i64>%Y
}
@@ -113,7 +113,7 @@ define <4 x i64> @load_sext_test2(<4 x i8> *%ptr) {
; CHECK: vpmovsxwq (%r{{[^,]*}}), %ymm{{.*}}
; CHECK: ret
define <4 x i64> @load_sext_test3(<4 x i16> *%ptr) {
- %X = load <4 x i16>* %ptr
+ %X = load <4 x i16>, <4 x i16>* %ptr
%Y = sext <4 x i16> %X to <4 x i64>
ret <4 x i64>%Y
}
@@ -122,7 +122,7 @@ define <4 x i64> @load_sext_test3(<4 x i16> *%ptr) {
; CHECK: vpmovsxwd (%r{{[^,]*}}), %ymm{{.*}}
; CHECK: ret
define <8 x i32> @load_sext_test4(<8 x i16> *%ptr) {
- %X = load <8 x i16>* %ptr
+ %X = load <8 x i16>, <8 x i16>* %ptr
%Y = sext <8 x i16> %X to <8 x i32>
ret <8 x i32>%Y
}
@@ -131,7 +131,7 @@ define <8 x i32> @load_sext_test4(<8 x i16> *%ptr) {
; CHECK: vpmovsxbd (%r{{[^,]*}}), %ymm{{.*}}
; CHECK: ret
define <8 x i32> @load_sext_test5(<8 x i8> *%ptr) {
- %X = load <8 x i8>* %ptr
+ %X = load <8 x i8>, <8 x i8>* %ptr
%Y = sext <8 x i8> %X to <8 x i32>
ret <8 x i32>%Y
}
diff --git a/test/CodeGen/X86/avx2-gather.ll b/test/CodeGen/X86/avx2-gather.ll
index a9ac0258975a..91fa20bc0af4 100644
--- a/test/CodeGen/X86/avx2-gather.ll
+++ b/test/CodeGen/X86/avx2-gather.ll
@@ -32,3 +32,30 @@ define <2 x double> @test_x86_avx2_gather_d_pd(i8* %a1,
; CHECK: vgatherdpd
; CHECK: vmovapd
; CHECK: ret
+
+declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*,
+ <8 x i32>, <8 x float>, i8) nounwind readonly
+
+define <8 x float> @test_x86_avx2_gather_d_ps_256(i8* %a1,
+ <8 x i32> %idx, <8 x float> %mask) {
+ %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef,
+ i8* %a1, <8 x i32> %idx, <8 x float> %mask, i8 4) ;
+ ret <8 x float> %res
+}
+; CHECK-LABEL: @test_x86_avx2_gather_d_ps_256
+; CHECK: vgatherdps %ymm
+; CHECK: ret
+
+declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*,
+ <4 x i32>, <4 x double>, i8) nounwind readonly
+
+define <4 x double> @test_x86_avx2_gather_d_pd_256(i8* %a1,
+ <4 x i32> %idx, <4 x double> %mask) {
+ %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> undef,
+ i8* %a1, <4 x i32> %idx, <4 x double> %mask, i8 8) ;
+ ret <4 x double> %res
+}
+
+; CHECK-LABEL: test_x86_avx2_gather_d_pd_256
+; CHECK: vgatherdpd %ymm
+; CHECK: ret
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
index ac2c73bb9321..a30d8371775c 100644
--- a/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=core-avx2 -mattr=avx2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx2 | FileCheck %s
define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
; CHECK: vpblendw
@@ -31,3 +31,55 @@ define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
}
declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone
+
+define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
+ ; CHECK: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
+ %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
+ ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
+ %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
+ ; CHECK: vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
+ %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1]
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
+ ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero
+ %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1]
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) {
+; CHECK-LABEL: test_x86_avx2_vextracti128:
+; CHECK: vextracti128
+
+ %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7)
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
+; CHECK-LABEL: test_x86_avx2_vinserti128:
+; CHECK: vinserti128
+
+ %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7)
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone
+
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll
index ca5ab3723c11..5b607afef91c 100644
--- a/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=core-avx2 -mattr=avx2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx2 | FileCheck %s
define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK: vpackssdw
@@ -160,22 +160,6 @@ define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) {
declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
-define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
- ; CHECK: vpslldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
- %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
- ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
-
-
-define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
- ; CHECK: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
- %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
- ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
-
-
define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) {
; CHECK: vpsllq
%res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
@@ -256,22 +240,6 @@ define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) {
declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
-define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
- ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
- %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
- ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
-
-
-define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
- ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
- %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
- ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
-
-
define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) {
; CHECK: vpsrlq
%res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
@@ -673,13 +641,6 @@ define <4 x i64> @test_x86_avx2_pmul.dq(<8 x i32> %a0, <8 x i32> %a1) {
declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
-define <4 x i64> @test_x86_avx2_vbroadcasti128(i8* %a0) {
- ; CHECK: vbroadcasti128
- %res = call <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8* %a0) ; <<4 x i64>> [#uses=1]
- ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8*) nounwind readonly
-
define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) {
; CHECK: vbroadcastsd
%res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) ; <<4 x double>> [#uses=1]
@@ -814,22 +775,6 @@ define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) {
declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly
-define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) {
- ; CHECK: vextracti128
- %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone
-
-
-define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vinserti128
- %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7) ; <<4 x i64>> [#uses=1]
- ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone
-
-
define <2 x i64> @test_x86_avx2_maskload_q(i8* %a0, <2 x i64> %a1) {
; CHECK: vpmaskmovq
%res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
diff --git a/test/CodeGen/X86/avx2-pmovx-256-old-shuffle.ll b/test/CodeGen/X86/avx2-pmovx-256-old-shuffle.ll
deleted file mode 100644
index 44eb42adb9f8..000000000000
--- a/test/CodeGen/X86/avx2-pmovx-256-old-shuffle.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s -x86-experimental-vector-shuffle-lowering=false -mattr=+avx2 | FileCheck %s
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-darwin"
-
-; PR21876
-; The old shuffle lowering sometimes generates VZEXT nodes with both input
-; and output same-sized types, here 256-bits. For instance, a v8i8 to v8i32
-; zero-extend would become a (v8i32 (VZEXT v32i8)) node, which can't happen
-; otherwise. The companion commit r223996 added those patterns temporarily.
-; This test, along with the VR256 for AVX2 PMOVXrr instructions, should be
-; removed once the old vector shuffle lowering goes away.
-
-define void @test_avx2_pmovx_256(<8 x i8>* %tmp64, <8 x float>* %tmp75) {
-; CHECK-LABEL: test_avx2_pmovx_256
-; We really don't care about the generated code.
-; CHECK: vpmovzxbd
-; CHECK: vpbroadcastd
-; CHECK: vpand
-; CHECK: vcvtdq2ps
-; CHECK: vmovups
-; CHECK: vzeroupper
-; CHECK: retq
-
- %wide.load458 = load <8 x i8>* %tmp64, align 1
- %tmp68 = uitofp <8 x i8> %wide.load458 to <8 x float>
- store <8 x float> %tmp68, <8 x float>* %tmp75, align 4
- ret void
-}
diff --git a/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll b/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll
index 7301b7cbfc4e..6bd6a5041d41 100644
--- a/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll
+++ b/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll
@@ -3,7 +3,7 @@
define <16 x i16> @test_lvm_x86_avx2_pmovsxbw(<16 x i8>* %a) {
; CHECK-LABEL: test_lvm_x86_avx2_pmovsxbw
; CHECK: vpmovsxbw (%rdi), %ymm0
- %1 = load <16 x i8>* %a, align 1
+ %1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %1)
ret <16 x i16> %2
}
@@ -11,7 +11,7 @@ define <16 x i16> @test_lvm_x86_avx2_pmovsxbw(<16 x i8>* %a) {
define <8 x i32> @test_llvm_x86_avx2_pmovsxbd(<16 x i8>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbd
; CHECK: vpmovsxbd (%rdi), %ymm0
- %1 = load <16 x i8>* %a, align 1
+ %1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %1)
ret <8 x i32> %2
}
@@ -19,7 +19,7 @@ define <8 x i32> @test_llvm_x86_avx2_pmovsxbd(<16 x i8>* %a) {
define <4 x i64> @test_llvm_x86_avx2_pmovsxbq(<16 x i8>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbq
; CHECK: vpmovsxbq (%rdi), %ymm0
- %1 = load <16 x i8>* %a, align 1
+ %1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %1)
ret <4 x i64> %2
}
@@ -27,7 +27,7 @@ define <4 x i64> @test_llvm_x86_avx2_pmovsxbq(<16 x i8>* %a) {
define <8 x i32> @test_llvm_x86_avx2_pmovsxwd(<8 x i16>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovsxwd
; CHECK: vpmovsxwd (%rdi), %ymm0
- %1 = load <8 x i16>* %a, align 1
+ %1 = load <8 x i16>, <8 x i16>* %a, align 1
%2 = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %1)
ret <8 x i32> %2
}
@@ -35,7 +35,7 @@ define <8 x i32> @test_llvm_x86_avx2_pmovsxwd(<8 x i16>* %a) {
define <4 x i64> @test_llvm_x86_avx2_pmovsxwq(<8 x i16>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovsxwq
; CHECK: vpmovsxwq (%rdi), %ymm0
- %1 = load <8 x i16>* %a, align 1
+ %1 = load <8 x i16>, <8 x i16>* %a, align 1
%2 = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %1)
ret <4 x i64> %2
}
@@ -43,7 +43,7 @@ define <4 x i64> @test_llvm_x86_avx2_pmovsxwq(<8 x i16>* %a) {
define <4 x i64> @test_llvm_x86_avx2_pmovsxdq(<4 x i32>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovsxdq
; CHECK: vpmovsxdq (%rdi), %ymm0
- %1 = load <4 x i32>* %a, align 1
+ %1 = load <4 x i32>, <4 x i32>* %a, align 1
%2 = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %1)
ret <4 x i64> %2
}
@@ -51,7 +51,7 @@ define <4 x i64> @test_llvm_x86_avx2_pmovsxdq(<4 x i32>* %a) {
define <16 x i16> @test_lvm_x86_avx2_pmovzxbw(<16 x i8>* %a) {
; CHECK-LABEL: test_lvm_x86_avx2_pmovzxbw
; CHECK: vpmovzxbw (%rdi), %ymm0
- %1 = load <16 x i8>* %a, align 1
+ %1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %1)
ret <16 x i16> %2
}
@@ -59,7 +59,7 @@ define <16 x i16> @test_lvm_x86_avx2_pmovzxbw(<16 x i8>* %a) {
define <8 x i32> @test_llvm_x86_avx2_pmovzxbd(<16 x i8>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbd
; CHECK: vpmovzxbd (%rdi), %ymm0
- %1 = load <16 x i8>* %a, align 1
+ %1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %1)
ret <8 x i32> %2
}
@@ -67,7 +67,7 @@ define <8 x i32> @test_llvm_x86_avx2_pmovzxbd(<16 x i8>* %a) {
define <4 x i64> @test_llvm_x86_avx2_pmovzxbq(<16 x i8>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbq
; CHECK: vpmovzxbq (%rdi), %ymm0
- %1 = load <16 x i8>* %a, align 1
+ %1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %1)
ret <4 x i64> %2
}
@@ -75,7 +75,7 @@ define <4 x i64> @test_llvm_x86_avx2_pmovzxbq(<16 x i8>* %a) {
define <8 x i32> @test_llvm_x86_avx2_pmovzxwd(<8 x i16>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovzxwd
; CHECK: vpmovzxwd (%rdi), %ymm0
- %1 = load <8 x i16>* %a, align 1
+ %1 = load <8 x i16>, <8 x i16>* %a, align 1
%2 = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %1)
ret <8 x i32> %2
}
@@ -83,7 +83,7 @@ define <8 x i32> @test_llvm_x86_avx2_pmovzxwd(<8 x i16>* %a) {
define <4 x i64> @test_llvm_x86_avx2_pmovzxwq(<8 x i16>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovzxwq
; CHECK: vpmovzxwq (%rdi), %ymm0
- %1 = load <8 x i16>* %a, align 1
+ %1 = load <8 x i16>, <8 x i16>* %a, align 1
%2 = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %1)
ret <4 x i64> %2
}
@@ -91,7 +91,7 @@ define <4 x i64> @test_llvm_x86_avx2_pmovzxwq(<8 x i16>* %a) {
define <4 x i64> @test_llvm_x86_avx2_pmovzxdq(<4 x i32>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovzxdq
; CHECK: vpmovzxdq (%rdi), %ymm0
- %1 = load <4 x i32>* %a, align 1
+ %1 = load <4 x i32>, <4 x i32>* %a, align 1
%2 = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %1)
ret <4 x i64> %2
}
diff --git a/test/CodeGen/X86/avx2-shift.ll b/test/CodeGen/X86/avx2-shift.ll
index 025d52ede0f4..5adbb2ef6651 100644
--- a/test/CodeGen/X86/avx2-shift.ll
+++ b/test/CodeGen/X86/avx2-shift.ll
@@ -130,7 +130,7 @@ define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
; CHECK: vpsravd (%
; CHECK: ret
define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
- %y1 = load <4 x i32>* %y
+ %y1 = load <4 x i32>, <4 x i32>* %y
%k = ashr <4 x i32> %x, %y1
ret <4 x i32> %k
}
@@ -139,7 +139,7 @@ define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
; CHECK: vpsravd (%
; CHECK: ret
define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
- %y1 = load <8 x i32>* %y
+ %y1 = load <8 x i32>, <8 x i32>* %y
%k = ashr <8 x i32> %x, %y1
ret <8 x i32> %k
}
@@ -148,7 +148,7 @@ define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
; CHECK: vpsllvd (%
; CHECK: ret
define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
- %y1 = load <4 x i32>* %y
+ %y1 = load <4 x i32>, <4 x i32>* %y
%k = shl <4 x i32> %x, %y1
ret <4 x i32> %k
}
@@ -156,7 +156,7 @@ define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
; CHECK: vpsllvd (%
; CHECK: ret
define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
- %y1 = load <8 x i32>* %y
+ %y1 = load <8 x i32>, <8 x i32>* %y
%k = shl <8 x i32> %x, %y1
ret <8 x i32> %k
}
@@ -164,7 +164,7 @@ define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
; CHECK: vpsllvq (%
; CHECK: ret
define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
- %y1 = load <2 x i64>* %y
+ %y1 = load <2 x i64>, <2 x i64>* %y
%k = shl <2 x i64> %x, %y1
ret <2 x i64> %k
}
@@ -172,7 +172,7 @@ define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
; CHECK: vpsllvq (%
; CHECK: ret
define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
- %y1 = load <4 x i64>* %y
+ %y1 = load <4 x i64>, <4 x i64>* %y
%k = shl <4 x i64> %x, %y1
ret <4 x i64> %k
}
@@ -180,7 +180,7 @@ define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
; CHECK: vpsrlvd (%
; CHECK: ret
define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
- %y1 = load <4 x i32>* %y
+ %y1 = load <4 x i32>, <4 x i32>* %y
%k = lshr <4 x i32> %x, %y1
ret <4 x i32> %k
}
@@ -188,7 +188,7 @@ define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
; CHECK: vpsrlvd (%
; CHECK: ret
define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
- %y1 = load <8 x i32>* %y
+ %y1 = load <8 x i32>, <8 x i32>* %y
%k = lshr <8 x i32> %x, %y1
ret <8 x i32> %k
}
@@ -196,7 +196,7 @@ define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
; CHECK: vpsrlvq (%
; CHECK: ret
define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
- %y1 = load <2 x i64>* %y
+ %y1 = load <2 x i64>, <2 x i64>* %y
%k = lshr <2 x i64> %x, %y1
ret <2 x i64> %k
}
@@ -204,7 +204,7 @@ define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
; CHECK: vpsrlvq (%
; CHECK: ret
define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
- %y1 = load <4 x i64>* %y
+ %y1 = load <4 x i64>, <4 x i64>* %y
%k = lshr <4 x i64> %x, %y1
ret <4 x i64> %k
}
diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll
index 924c06eba768..94dcdcabdd33 100644
--- a/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -3,7 +3,7 @@
; CHECK: vpbroadcastb (%
define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
entry:
- %q = load i8* %ptr, align 4
+ %q = load i8, i8* %ptr, align 4
%q0 = insertelement <16 x i8> undef, i8 %q, i32 0
%q1 = insertelement <16 x i8> %q0, i8 %q, i32 1
%q2 = insertelement <16 x i8> %q1, i8 %q, i32 2
@@ -25,7 +25,7 @@ entry:
; CHECK: vpbroadcastb (%
define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp {
entry:
- %q = load i8* %ptr, align 4
+ %q = load i8, i8* %ptr, align 4
%q0 = insertelement <32 x i8> undef, i8 %q, i32 0
%q1 = insertelement <32 x i8> %q0, i8 %q, i32 1
%q2 = insertelement <32 x i8> %q1, i8 %q, i32 2
@@ -65,7 +65,7 @@ entry:
define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp {
entry:
- %q = load i16* %ptr, align 4
+ %q = load i16, i16* %ptr, align 4
%q0 = insertelement <8 x i16> undef, i16 %q, i32 0
%q1 = insertelement <8 x i16> %q0, i16 %q, i32 1
%q2 = insertelement <8 x i16> %q1, i16 %q, i32 2
@@ -79,7 +79,7 @@ entry:
; CHECK: vpbroadcastw (%
define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp {
entry:
- %q = load i16* %ptr, align 4
+ %q = load i16, i16* %ptr, align 4
%q0 = insertelement <16 x i16> undef, i16 %q, i32 0
%q1 = insertelement <16 x i16> %q0, i16 %q, i32 1
%q2 = insertelement <16 x i16> %q1, i16 %q, i32 2
@@ -101,7 +101,7 @@ entry:
; CHECK: vbroadcastss (%
define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp {
entry:
- %q = load i32* %ptr, align 4
+ %q = load i32, i32* %ptr, align 4
%q0 = insertelement <4 x i32> undef, i32 %q, i32 0
%q1 = insertelement <4 x i32> %q0, i32 %q, i32 1
%q2 = insertelement <4 x i32> %q1, i32 %q, i32 2
@@ -111,7 +111,7 @@ entry:
; CHECK: vbroadcastss (%
define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp {
entry:
- %q = load i32* %ptr, align 4
+ %q = load i32, i32* %ptr, align 4
%q0 = insertelement <8 x i32> undef, i32 %q, i32 0
%q1 = insertelement <8 x i32> %q0, i32 %q, i32 1
%q2 = insertelement <8 x i32> %q1, i32 %q, i32 2
@@ -125,7 +125,7 @@ entry:
; CHECK: vpbroadcastq (%
define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
entry:
- %q = load i64* %ptr, align 4
+ %q = load i64, i64* %ptr, align 4
%q0 = insertelement <2 x i64> undef, i64 %q, i32 0
%q1 = insertelement <2 x i64> %q0, i64 %q, i32 1
ret <2 x i64> %q1
@@ -133,7 +133,7 @@ entry:
; CHECK: vbroadcastsd (%
define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
entry:
- %q = load i64* %ptr, align 4
+ %q = load i64, i64* %ptr, align 4
%q0 = insertelement <4 x i64> undef, i64 %q, i32 0
%q1 = insertelement <4 x i64> %q0, i64 %q, i32 1
%q2 = insertelement <4 x i64> %q1, i64 %q, i32 2
@@ -145,7 +145,7 @@ entry:
; this used to crash
define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
entry:
- %q = load double* %ptr, align 4
+ %q = load double, double* %ptr, align 4
%vecinit.i = insertelement <2 x double> undef, double %q, i32 0
%vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
ret <2 x double> %vecinit2.i
@@ -317,7 +317,7 @@ define <4 x double> @_inreg4xdouble(<4 x double> %a) {
}
;CHECK-LABEL: _inreg2xdouble:
-;CHECK: vunpcklpd
+;CHECK: vmovddup
;CHECK: ret
define <2 x double> @_inreg2xdouble(<2 x double> %a) {
%b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
@@ -431,8 +431,8 @@ eintry:
%__b.addr.i = alloca <2 x i64>, align 16
%vCr = alloca <2 x i64>, align 16
store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
- %tmp = load <2 x i64>* %vCr, align 16
- %tmp2 = load i8* %cV_R.addr, align 4
+ %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
+ %tmp2 = load i8, i8* %cV_R.addr, align 4
%splat.splatinsert = insertelement <16 x i8> undef, i8 %tmp2, i32 0
%splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
%tmp3 = bitcast <16 x i8> %splat.splat to <2 x i64>
@@ -450,8 +450,8 @@ eintry:
%__b.addr.i = alloca <4 x i64>, align 16
%vCr = alloca <4 x i64>, align 16
store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
- %tmp = load <4 x i64>* %vCr, align 16
- %tmp2 = load i8* %cV_R.addr, align 4
+ %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
+ %tmp2 = load i8, i8* %cV_R.addr, align 4
%splat.splatinsert = insertelement <32 x i8> undef, i8 %tmp2, i32 0
%splat.splat = shufflevector <32 x i8> %splat.splatinsert, <32 x i8> undef, <32 x i32> zeroinitializer
%tmp3 = bitcast <32 x i8> %splat.splat to <4 x i64>
@@ -469,8 +469,8 @@ entry:
%__b.addr.i = alloca <2 x i64>, align 16
%vCr = alloca <2 x i64>, align 16
store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
- %tmp = load <2 x i64>* %vCr, align 16
- %tmp2 = load i16* %cV_R.addr, align 4
+ %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
+ %tmp2 = load i16, i16* %cV_R.addr, align 4
%splat.splatinsert = insertelement <8 x i16> undef, i16 %tmp2, i32 0
%splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
%tmp3 = bitcast <8 x i16> %splat.splat to <2 x i64>
@@ -488,8 +488,8 @@ eintry:
%__b.addr.i = alloca <4 x i64>, align 16
%vCr = alloca <4 x i64>, align 16
store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
- %tmp = load <4 x i64>* %vCr, align 16
- %tmp2 = load i16* %cV_R.addr, align 4
+ %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
+ %tmp2 = load i16, i16* %cV_R.addr, align 4
%splat.splatinsert = insertelement <16 x i16> undef, i16 %tmp2, i32 0
%splat.splat = shufflevector <16 x i16> %splat.splatinsert, <16 x i16> undef, <16 x i32> zeroinitializer
%tmp3 = bitcast <16 x i16> %splat.splat to <4 x i64>
@@ -507,8 +507,8 @@ entry:
%__b.addr.i = alloca <2 x i64>, align 16
%vCr = alloca <2 x i64>, align 16
store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
- %tmp = load <2 x i64>* %vCr, align 16
- %tmp2 = load i32* %cV_R.addr, align 4
+ %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
+ %tmp2 = load i32, i32* %cV_R.addr, align 4
%splat.splatinsert = insertelement <4 x i32> undef, i32 %tmp2, i32 0
%splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
%tmp3 = bitcast <4 x i32> %splat.splat to <2 x i64>
@@ -526,8 +526,8 @@ eintry:
%__b.addr.i = alloca <4 x i64>, align 16
%vCr = alloca <4 x i64>, align 16
store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
- %tmp = load <4 x i64>* %vCr, align 16
- %tmp2 = load i32* %cV_R.addr, align 4
+ %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
+ %tmp2 = load i32, i32* %cV_R.addr, align 4
%splat.splatinsert = insertelement <8 x i32> undef, i32 %tmp2, i32 0
%splat.splat = shufflevector <8 x i32> %splat.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer
%tmp3 = bitcast <8 x i32> %splat.splat to <4 x i64>
@@ -545,8 +545,8 @@ entry:
%__b.addr.i = alloca <2 x i64>, align 16
%vCr = alloca <2 x i64>, align 16
store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
- %tmp = load <2 x i64>* %vCr, align 16
- %tmp2 = load i64* %cV_R.addr, align 4
+ %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
+ %tmp2 = load i64, i64* %cV_R.addr, align 4
%splat.splatinsert = insertelement <2 x i64> undef, i64 %tmp2, i32 0
%splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
@@ -563,8 +563,8 @@ eintry:
%__b.addr.i = alloca <4 x i64>, align 16
%vCr = alloca <4 x i64>, align 16
store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
- %tmp = load <4 x i64>* %vCr, align 16
- %tmp2 = load i64* %cV_R.addr, align 4
+ %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
+ %tmp2 = load i64, i64* %cV_R.addr, align 4
%splat.splatinsert = insertelement <4 x i64> undef, i64 %tmp2, i32 0
%splat.splat = shufflevector <4 x i64> %splat.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer
store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
diff --git a/test/CodeGen/X86/avx2-vector-shifts.ll b/test/CodeGen/X86/avx2-vector-shifts.ll
index e355301dd051..8aae90c3c03d 100644
--- a/test/CodeGen/X86/avx2-vector-shifts.ll
+++ b/test/CodeGen/X86/avx2-vector-shifts.ll
@@ -266,3 +266,94 @@ define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
%sra = lshr <4 x i32> %x, %trunc
ret <4 x i32> %sra
}
+
+;
+; Vectorized byte shifts
+;
+
+define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
+; CHECK-LABEL: shl_8i16
+; CHECK: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; CHECK: retq
+ %shl = shl <8 x i16> %r, %a
+ ret <8 x i16> %shl
+}
+
+define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
+; CHECK-LABEL: shl_16i16
+; CHECK: vpxor %ymm2, %ymm2, %ymm2
+; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
+; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; CHECK-NEXT: vpsllvd %ymm3, %ymm4, %ymm3
+; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3
+; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
+; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0
+; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %shl = shl <16 x i16> %r, %a
+ ret <16 x i16> %shl
+}
+
+define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
+; CHECK-LABEL: ashr_8i16
+; CHECK: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0
+; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; CHECK: retq
+ %ashr = ashr <8 x i16> %r, %a
+ ret <8 x i16> %ashr
+}
+
+define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
+; CHECK-LABEL: ashr_16i16
+; CHECK: vpxor %ymm2, %ymm2, %ymm2
+; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
+; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; CHECK-NEXT: vpsravd %ymm3, %ymm4, %ymm3
+; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3
+; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
+; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0
+; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %ashr = ashr <16 x i16> %r, %a
+ ret <16 x i16> %ashr
+}
+
+define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
+; CHECK-LABEL: lshr_8i16
+; CHECK: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; CHECK: retq
+ %lshr = lshr <8 x i16> %r, %a
+ ret <8 x i16> %lshr
+}
+
+define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
+; CHECK-LABEL: lshr_16i16
+; CHECK: vpxor %ymm2, %ymm2, %ymm2
+; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
+; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; CHECK-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3
+; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3
+; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
+; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0
+; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %lshr = lshr <16 x i16> %r, %a
+ ret <16 x i16> %lshr
+}
diff --git a/test/CodeGen/X86/avx512-arith.ll b/test/CodeGen/X86/avx512-arith.ll
index 94b08215b896..1ecd1007905a 100644
--- a/test/CodeGen/X86/avx512-arith.ll
+++ b/test/CodeGen/X86/avx512-arith.ll
@@ -56,7 +56,7 @@ define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0
; CHECK-NEXT: retq
entry:
- %tmp2 = load <8 x double>* %x, align 8
+ %tmp2 = load <8 x double>, <8 x double>* %x, align 8
%sub.i = fsub <8 x double> %y, %tmp2
ret <8 x double> %sub.i
}
@@ -77,7 +77,7 @@ define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0
; CHECK-NEXT: retq
entry:
- %tmp2 = load <16 x float>* %x, align 4
+ %tmp2 = load <16 x float>, <16 x float>* %x, align 4
%sub.i = fsub <16 x float> %y, %tmp2
ret <16 x float> %sub.i
}
@@ -193,7 +193,7 @@ define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
; CHECK: ## BB#0:
; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0
; CHECK-NEXT: retq
- %tmp = load <8 x i64>* %j, align 4
+ %tmp = load <8 x i64>, <8 x i64>* %j, align 4
%x = add <8 x i64> %i, %tmp
ret <8 x i64> %x
}
@@ -212,7 +212,7 @@ define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
; CHECK: ## BB#0:
; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0
; CHECK-NEXT: retq
- %tmp = load i64* %j
+ %tmp = load i64, i64* %j
%j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
%j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
%j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
@@ -239,7 +239,7 @@ define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
; CHECK: ## BB#0:
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0
; CHECK-NEXT: retq
- %tmp = load <16 x i32>* %j, align 4
+ %tmp = load <16 x i32>, <16 x i32>* %j, align 4
%x = add <16 x i32> %i, %tmp
ret <16 x i32> %x
}
@@ -287,7 +287,7 @@ define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1}
; CHECK-NEXT: retq
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %j = load <16 x i32>* %j.ptr
+ %j = load <16 x i32>, <16 x i32>* %j.ptr
%x = add <16 x i32> %i, %j
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
ret <16 x i32> %r
@@ -314,7 +314,7 @@ define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16
; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: retq
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %j = load <16 x i32>* %j.ptr
+ %j = load <16 x i32>, <16 x i32>* %j.ptr
%x = add <16 x i32> %i, %j
%r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
ret <16 x i32> %r
@@ -445,7 +445,7 @@ define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
; CHECK-NEXT: vpandd (%rdi), %zmm0, %zmm0
; CHECK-NEXT: retq
entry:
- %a = load <16 x i32>* %x, align 4
+ %a = load <16 x i32>, <16 x i32>* %x, align 4
%b = and <16 x i32> %y, %a
ret <16 x i32> %b
}
@@ -456,7 +456,7 @@ define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
; CHECK-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0
; CHECK-NEXT: retq
entry:
- %a = load i64* %ap, align 8
+ %a = load i64, i64* %ap, align 8
%b = insertelement <8 x i64> undef, i64 %a, i32 0
%c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
%d = and <8 x i64> %p1, %c
@@ -593,7 +593,7 @@ define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
<8 x double>* %j, <8 x i64> %mask1)
nounwind {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
- %tmp = load <8 x double>* %j, align 8
+ %tmp = load <8 x double>, <8 x double>* %j, align 8
%x = fadd <8 x double> %i, %tmp
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
ret <8 x double> %r
@@ -605,7 +605,7 @@ define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
<8 x i64> %mask1) nounwind {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
- %tmp = load <8 x double>* %j, align 8
+ %tmp = load <8 x double>, <8 x double>* %j, align 8
%x = fadd <8 x double> %i, %tmp
%r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
ret <8 x double> %r
@@ -615,7 +615,7 @@ define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*}}
; CHECK: ret
define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
- %tmp = load double* %j
+ %tmp = load double, double* %j
%b = insertelement <8 x double> undef, double %tmp, i32 0
%c = shufflevector <8 x double> %b, <8 x double> undef,
<8 x i32> zeroinitializer
@@ -629,7 +629,7 @@ define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind
define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
double* %j, <8 x i64> %mask1) nounwind {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
- %tmp = load double* %j
+ %tmp = load double, double* %j
%b = insertelement <8 x double> undef, double %tmp, i32 0
%c = shufflevector <8 x double> %b, <8 x double> undef,
<8 x i32> zeroinitializer
@@ -644,7 +644,7 @@ define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double>
define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
<8 x i64> %mask1) nounwind {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
- %tmp = load double* %j
+ %tmp = load double, double* %j
%b = insertelement <8 x double> undef, double %tmp, i32 0
%c = shufflevector <8 x double> %b, <8 x double> undef,
<8 x i32> zeroinitializer
diff --git a/test/CodeGen/X86/avx512-bugfix-23634.ll b/test/CodeGen/X86/avx512-bugfix-23634.ll
new file mode 100644
index 000000000000..c31a13ad3114
--- /dev/null
+++ b/test/CodeGen/X86/avx512-bugfix-23634.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: f_fu
+; CHECK-NOT: vpblend
+; CHECK: vmovdqa32 {{.*}} {%k1}
+
+define void @f_fu(float* %ret, float* %aa, float %b) {
+allocas:
+ %ptr_cast_for_load = bitcast float* %aa to <16 x float>*
+ %ptr_masked_load.39 = load <16 x float>, <16 x float>* %ptr_cast_for_load, align 4
+ %b_load_to_int32 = fptosi float %b to i32
+ %b_load_to_int32_broadcast_init = insertelement <16 x i32> undef, i32 %b_load_to_int32, i32 0
+ %b_load_to_int32_broadcast = shufflevector <16 x i32> %b_load_to_int32_broadcast_init, <16 x i32> undef, <16 x i32> zeroinitializer
+ %b_to_int32 = fptosi float %b to i32
+ %b_to_int32_broadcast_init = insertelement <16 x i32> undef, i32 %b_to_int32, i32 0
+ %b_to_int32_broadcast = shufflevector <16 x i32> %b_to_int32_broadcast_init, <16 x i32> undef, <16 x i32> zeroinitializer
+
+ %a_load_to_int32 = fptosi <16 x float> %ptr_masked_load.39 to <16 x i32>
+ %div_v019_load_ = sdiv <16 x i32> %b_to_int32_broadcast, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+
+ %v1.i = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>, <16 x i32> %a_load_to_int32
+
+ %foo_test = add <16 x i32> %div_v019_load_, %b_load_to_int32_broadcast
+
+
+ %add_struct_offset_y_struct_offset33_x = add <16 x i32> %foo_test, %v1.i
+
+ %val = sitofp <16 x i32> %add_struct_offset_y_struct_offset33_x to <16 x float>
+ %ptrcast = bitcast float* %ret to <16 x float>*
+ store <16 x float> %val, <16 x float>* %ptrcast, align 4
+ ret void
+} \ No newline at end of file
diff --git a/test/CodeGen/X86/avx512-build-vector.ll b/test/CodeGen/X86/avx512-build-vector.ll
index 9e9ad31c916f..8373c6da2619 100644
--- a/test/CodeGen/X86/avx512-build-vector.ll
+++ b/test/CodeGen/X86/avx512-build-vector.ll
@@ -9,7 +9,7 @@ define <16 x i32> @test1(i32* %x) {
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4],ymm1[5,6,7]
; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; CHECK-NEXT: retq
- %y = load i32* %x, align 4
+ %y = load i32, i32* %x, align 4
%res = insertelement <16 x i32>zeroinitializer, i32 %y, i32 4
ret <16 x i32>%res
}
diff --git a/test/CodeGen/X86/avx512-calling-conv.ll b/test/CodeGen/X86/avx512-calling-conv.ll
new file mode 100644
index 000000000000..edb6bef1a4ac
--- /dev/null
+++ b/test/CodeGen/X86/avx512-calling-conv.ll
@@ -0,0 +1,154 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
+; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL_X32
+
+; KNL-LABEL: test1
+; KNL: vxorps
+define <16 x i1> @test1() {
+ ret <16 x i1> zeroinitializer
+}
+
+; SKX-LABEL: test2
+; SKX: vpmovb2m
+; SKX: vpmovb2m
+; SKX: kandw
+; SKX: vpmovm2b
+; KNL-LABEL: test2
+; KNL: vpmovsxbd
+; KNL: vpmovsxbd
+; KNL: vpandd
+; KNL: vpmovdb
+define <16 x i1> @test2(<16 x i1>%a, <16 x i1>%b) {
+ %c = and <16 x i1>%a, %b
+ ret <16 x i1> %c
+}
+
+; SKX-LABEL: test3
+; SKX: vpmovw2m
+; SKX: vpmovw2m
+; SKX: kandb
+; SKX: vpmovm2w
+define <8 x i1> @test3(<8 x i1>%a, <8 x i1>%b) {
+ %c = and <8 x i1>%a, %b
+ ret <8 x i1> %c
+}
+
+; SKX-LABEL: test4
+; SKX: vpmovd2m
+; SKX: vpmovd2m
+; SKX: kandw
+; SKX: vpmovm2d
+define <4 x i1> @test4(<4 x i1>%a, <4 x i1>%b) {
+ %c = and <4 x i1>%a, %b
+ ret <4 x i1> %c
+}
+
+; SKX-LABEL: test5
+; SKX: vpcmpgtd
+; SKX: vpmovm2w
+; SKX: call
+; SKX: vpmovzxwd
+declare <8 x i1> @func8xi1(<8 x i1> %a)
+define <8 x i32> @test5(<8 x i32>%a, <8 x i32>%b) {
+ %cmpRes = icmp sgt <8 x i32>%a, %b
+ %resi = call <8 x i1> @func8xi1(<8 x i1> %cmpRes)
+ %res = sext <8 x i1>%resi to <8 x i32>
+ ret <8 x i32> %res
+}
+
+declare <16 x i1> @func16xi1(<16 x i1> %a)
+
+; KNL-LABEL: test6
+; KNL: vpbroadcastd
+; KNL: vpmovdb
+; KNL: call
+; KNL: vpmovzxbd
+; KNL: vpslld $31, %zmm
+; KNL: vpsrad $31, %zmm
+define <16 x i32> @test6(<16 x i32>%a, <16 x i32>%b) {
+ %cmpRes = icmp sgt <16 x i32>%a, %b
+ %resi = call <16 x i1> @func16xi1(<16 x i1> %cmpRes)
+ %res = sext <16 x i1>%resi to <16 x i32>
+ ret <16 x i32> %res
+}
+
+declare <4 x i1> @func4xi1(<4 x i1> %a)
+; SKX-LABEL: test7
+; SKX: vpmovm2d
+; SKX: call
+; SKX: vpslld $31, %xmm
+; SKX: vpsrad $31, %xmm
+
+define <4 x i32> @test7(<4 x i32>%a, <4 x i32>%b) {
+ %cmpRes = icmp sgt <4 x i32>%a, %b
+ %resi = call <4 x i1> @func4xi1(<4 x i1> %cmpRes)
+ %res = sext <4 x i1>%resi to <4 x i32>
+ ret <4 x i32> %res
+}
+
+; SKX-LABEL: test7a
+; SKX: call
+; SKX: vpmovw2m %xmm0, %k0
+; SKX: kandb
+define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) {
+ %cmpRes = icmp sgt <8 x i32>%a, %b
+ %resi = call <8 x i1> @func8xi1(<8 x i1> %cmpRes)
+ %res = and <8 x i1>%resi, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
+ ret <8 x i1> %res
+}
+
+
+; KNL_X32-LABEL: test8
+; KNL_X32: testb $1, 4(%esp)
+; KNL_X32:jne
+
+; KNL-LABEL: test8
+; KNL: testb $1, %dil
+; KNL:jne
+
+define <16 x i8> @test8(<16 x i8> %a1, <16 x i8> %a2, i1 %cond) {
+ %res = select i1 %cond, <16 x i8> %a1, <16 x i8> %a2
+ ret <16 x i8> %res
+}
+
+; KNL-LABEL: test9
+; KNL: vucomisd
+; KNL: setb
+define i1 @test9(double %a, double %b) {
+ %c = fcmp ugt double %a, %b
+ ret i1 %c
+}
+
+; KNL_X32-LABEL: test10
+; KNL_X32: testb $1, 12(%esp)
+; KNL_X32: cmovnel
+
+; KNL-LABEL: test10
+; KNL: testb $1, %dl
+; KNL: cmovel
+define i32 @test10(i32 %a, i32 %b, i1 %cond) {
+ %c = select i1 %cond, i32 %a, i32 %b
+ ret i32 %c
+}
+
+; KNL-LABEL: test11
+; KNL: cmp
+; KNL: setg
+define i1 @test11(i32 %a, i32 %b) {
+ %c = icmp sgt i32 %a, %b
+ ret i1 %c
+}
+
+; KNL-LABEL: test12
+; KNL: callq _test11
+;; return value in %al
+; KNL: movzbl %al, %ebx
+; KNL: callq _test10
+; KNL: testb $1, %bl
+
+define i32 @test12(i32 %a1, i32 %a2, i32 %b1) {
+ %cond = call i1 @test11(i32 %a1, i32 %b1)
+ %res = call i32 @test10(i32 %a1, i32 %a2, i1 %cond)
+ %res1 = select i1 %cond, i32 %res, i32 0
+ ret i32 %res1
+} \ No newline at end of file
diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll
index 2b672a72d539..a211bcd38c9c 100644
--- a/test/CodeGen/X86/avx512-cvt.ll
+++ b/test/CodeGen/X86/avx512-cvt.ll
@@ -87,7 +87,7 @@ define <8 x double> @fpext00(<8 x float> %b) nounwind {
; CHECK: ret
define double @funcA(i64* nocapture %e) {
entry:
- %tmp1 = load i64* %e, align 8
+ %tmp1 = load i64, i64* %e, align 8
%conv = sitofp i64 %tmp1 to double
ret double %conv
}
@@ -97,7 +97,7 @@ entry:
; CHECK: ret
define double @funcB(i32* %e) {
entry:
- %tmp1 = load i32* %e, align 4
+ %tmp1 = load i32, i32* %e, align 4
%conv = sitofp i32 %tmp1 to double
ret double %conv
}
@@ -107,7 +107,7 @@ entry:
; CHECK: ret
define float @funcC(i32* %e) {
entry:
- %tmp1 = load i32* %e, align 4
+ %tmp1 = load i32, i32* %e, align 4
%conv = sitofp i32 %tmp1 to float
ret float %conv
}
@@ -117,7 +117,7 @@ entry:
; CHECK: ret
define float @i64tof32(i64* %e) {
entry:
- %tmp1 = load i64* %e, align 8
+ %tmp1 = load i64, i64* %e, align 8
%conv = sitofp i64 %tmp1 to float
ret float %conv
}
@@ -129,7 +129,7 @@ define void @fpext() {
entry:
%f = alloca float, align 4
%d = alloca double, align 8
- %tmp = load float* %f, align 4
+ %tmp = load float, float* %f, align 4
%conv = fpext float %tmp to double
store double %conv, double* %d, align 8
ret void
@@ -144,7 +144,7 @@ define void @fpround_scalar() nounwind uwtable {
entry:
%f = alloca float, align 4
%d = alloca double, align 8
- %tmp = load double* %d, align 8
+ %tmp = load double, double* %d, align 8
%conv = fptrunc double %tmp to float
store float %conv, float* %f, align 4
ret void
@@ -308,3 +308,20 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
%1 = sitofp <8 x i1> %cmpres to <8 x double>
ret <8 x double> %1
}
+
+; CHECK-LABEL: @uitofp_16i8
+; CHECK: vpmovzxbd
+; CHECK: vcvtudq2ps
+define <16 x float> @uitofp_16i8(<16 x i8>%a) {
+ %b = uitofp <16 x i8> %a to <16 x float>
+ ret <16 x float>%b
+}
+
+; CHECK-LABEL: @uitofp_16i16
+; CHECK: vpmovzxwd
+; CHECK: vcvtudq2ps
+define <16 x float> @uitofp_16i16(<16 x i16>%a) {
+ %b = uitofp <16 x i16> %a to <16 x float>
+ ret <16 x float>%b
+}
+
diff --git a/test/CodeGen/X86/avx512-fma-intrinsics.ll b/test/CodeGen/X86/avx512-fma-intrinsics.ll
index 116531d5af9e..9814a6108272 100644
--- a/test/CodeGen/X86/avx512-fma-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-fma-intrinsics.ll
@@ -1,50 +1,8 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s
-define <16 x float> @test_x86_vfmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
- ; CHECK-LABEL: test_x86_vfmadd_ps_z
- ; CHECK: vfmadd213ps %zmm
- %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
- ret <16 x float> %res
-}
-declare <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
-
-define <16 x float> @test_mask_vfmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
- ; CHECK-LABEL: test_mask_vfmadd_ps
- ; CHECK: vfmadd213ps %zmm
- %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
- ret <16 x float> %res
-}
-
-define <8 x double> @test_x86_vfmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
- ; CHECK-LABEL: test_x86_vfmadd_pd_z
- ; CHECK: vfmadd213pd %zmm
- %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
- ret <8 x double> %res
-}
-
-define <8 x double> @test_mask_fmadd_pd(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
-; CHECK-LABEL: test_mask_fmadd_pd:
-; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa8,0xc2]
- %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 4)
- ret <8 x double> %res
-}
-
+declare <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
declare <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
-
-define <16 x float> @test_x86_vfmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
- ; CHECK-LABEL: test_x86_vfmsubps_z
- ; CHECK: vfmsub213ps %zmm
- %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
- ret <16 x float> %res
-}
-declare <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
-
-define <16 x float> @test_mask_vfmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
- ; CHECK-LABEL: test_mask_vfmsub_ps
- ; CHECK: vfmsub213ps %zmm
- %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
- ret <16 x float> %res
-}
+declare <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
define <8 x double> @test_x86_vfmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
; CHECK-LABEL: test_x86_vfmsubpd_z
@@ -182,3 +140,283 @@ define <8 x double> @test_mask_vfmsubadd_pd(<8 x double> %a0, <8 x double> %a1,
ret <8 x double> %res
}
+define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
+ ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rne
+ ; CHECK: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0xc2]
+ %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
+ ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn
+ ; CHECK: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x39,0xa8,0xc2]
+ %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
+ ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp
+ ; CHECK: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xa8,0xc2]
+ %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
+ ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz
+ ; CHECK: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xa8,0xc2]
+ %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
+ ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrb_current
+ ; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa8,0xc2]
+ %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+ ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne
+ ; CHECK: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0xc2]
+ %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+ ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn
+ ; CHECK: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x38,0xa8,0xc2]
+ %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+ ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp
+ ; CHECK: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x58,0xa8,0xc2]
+ %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+ ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz
+ ; CHECK: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x78,0xa8,0xc2]
+ %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+ ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current
+ ; CHECK: vfmadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2]
+ %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
+ ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rne
+ ; CHECK: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xaa,0xc2]
+ %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 0) nounwind
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
+ ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtn
+ ; CHECK: vfmsub213ps {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x39,0xaa,0xc2]
+ %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 1) nounwind
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
+ ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtp
+ ; CHECK: vfmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xaa,0xc2]
+ %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 2) nounwind
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_round_vfmsub512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
+ ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_rtz
+ ; CHECK: vfmsub213ps {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xaa,0xc2]
+ %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 3) nounwind
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_round_vfmsub512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
+ ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrb_current
+ ; CHECK: vfmsub213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xaa,0xc2]
+ %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+ ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rne
+ ; CHECK: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xaa,0xc2]
+ %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 0) nounwind
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+ ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtn
+ ; CHECK: vfmsub213ps {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x38,0xaa,0xc2]
+ %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 1) nounwind
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+ ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtp
+ ; CHECK: vfmsub213ps {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x58,0xaa,0xc2]
+ %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 2) nounwind
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+ ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_rtz
+ ; CHECK: vfmsub213ps {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x78,0xaa,0xc2]
+ %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 3) nounwind
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_round_vfmsub512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+ ; CHECK-LABEL: test_mask_round_vfmsub512_ps_rrbz_current
+ ; CHECK: vfmsub213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xaa,0xc2]
+ %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
+ ret <16 x float> %res
+}
+
+define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
+ ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rne
+ ; CHECK: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xa8,0xc2]
+ %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
+ ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn
+ ; CHECK: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xa8,0xc2]
+ %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
+ ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp
+ ; CHECK: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xa8,0xc2]
+ %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
+ ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz
+ ; CHECK: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xa8,0xc2]
+ %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
+ ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrb_current
+ ; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa8,0xc2]
+ %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+ ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne
+ ; CHECK: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xa8,0xc2]
+ %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+ ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn
+ ; CHECK: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xa8,0xc2]
+ %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+ ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp
+ ; CHECK: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xa8,0xc2]
+ %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+ ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz
+ ; CHECK: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xa8,0xc2]
+ %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+ ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current
+ ; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2]
+ %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
+ ret <8 x double> %res
+}
+
+
+define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
+ ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne
+ ; CHECK: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x19,0xae,0xc2]
+ %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 0) nounwind
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
+ ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn
+ ; CHECK: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x39,0xae,0xc2]
+ %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 1) nounwind
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
+ ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp
+ ; CHECK: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xae,0xc2]
+ %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 2) nounwind
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
+ ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz
+ ; CHECK: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xae,0xc2]
+ %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 3) nounwind
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
+ ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrb_current
+ ; CHECK: vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xae,0xc2]
+ %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+ ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne
+ ; CHECK: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xae,0xc2]
+ %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 0) nounwind
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+ ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn
+ ; CHECK: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x38,0xae,0xc2]
+ %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 1) nounwind
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+ ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp
+ ; CHECK: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x58,0xae,0xc2]
+ %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 2) nounwind
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+ ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz
+ ; CHECK: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x78,0xae,0xc2]
+ %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 3) nounwind
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+ ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current
+ ; CHECK: vfnmsub213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2]
+ %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
+ ret <8 x double> %res
+}
diff --git a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
index 20bf7e4a16e0..0e32a1c28067 100644
--- a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
+++ b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
@@ -170,7 +170,7 @@ define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src,
;CHECK: vscatterdpd
;CHECK: ret
define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) {
- %x = load <8 x double>* %src, align 64
+ %x = load <8 x double>, <8 x double>* %src, align 64
call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4)
ret void
}
@@ -180,7 +180,7 @@ define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8
;CHECK: vscatterqpd
;CHECK: ret
define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) {
- %x = load <8 x double>* %src, align 64
+ %x = load <8 x double>, <8 x double>* %src, align 64
call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4)
ret void
}
@@ -190,7 +190,7 @@ define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8
;CHECK: vscatterdps
;CHECK: ret
define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf) {
- %x = load <16 x float>* %src, align 64
+ %x = load <16 x float>, <16 x float>* %src, align 64
call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4)
ret void
}
@@ -200,7 +200,7 @@ define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i1
;CHECK: vscatterqps
;CHECK: ret
define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf) {
- %x = load <8 x float>* %src, align 32
+ %x = load <8 x float>, <8 x float>* %src, align 32
call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4)
ret void
}
diff --git a/test/CodeGen/X86/avx512-i1test.ll b/test/CodeGen/X86/avx512-i1test.ll
index 4814314a6442..ba2f49b79427 100755
--- a/test/CodeGen/X86/avx512-i1test.ll
+++ b/test/CodeGen/X86/avx512-i1test.ll
@@ -5,8 +5,8 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; CHECK-LABEL: func
-; CHECK: kortestw
-; CHECK: kortestw
+; CHECK: testb
+; CHECK: testb
define void @func() {
bb1:
br i1 undef, label %L_10, label %L_10
@@ -18,7 +18,7 @@ bb56: ; preds = %L_10
br label %bb33
bb33: ; preds = %bb51, %bb56
- %r111 = load i64* undef, align 8
+ %r111 = load i64, i64* undef, align 8
br i1 undef, label %bb51, label %bb35
bb35: ; preds = %bb33
diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll
index eba895ebf565..6f985f0bf3a7 100644
--- a/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/test/CodeGen/X86/avx512-insert-extract.ll
@@ -6,7 +6,7 @@
;CHECK: vinsertf32x4
;CHECK: ret
define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
- %rrr = load float* %br
+ %rrr = load float, float* %br
%rrr2 = insertelement <16 x float> %x, float %rrr, i32 1
%rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14
ret <16 x float> %rrr3
@@ -20,7 +20,7 @@ define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
;SKX: vinsertf64x2 $3
;CHECK: ret
define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
- %rrr = load double* %br
+ %rrr = load double, double* %br
%rrr2 = insertelement <8 x double> %x, double %rrr, i32 1
%rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6
ret <8 x double> %rrr3
@@ -106,7 +106,7 @@ define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
;CHECK: vpcmpltud
;CHECK: kshiftlw $11
;CHECK: kshiftrw $15
-;CHECK: kortestw
+;CHECK: testb
;CHECK: je
;CHECK: ret
;CHECK: ret
@@ -125,7 +125,7 @@ define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
;CHECK: vpcmpgtq
;CHECK: kshiftlw $15
;CHECK: kshiftrw $15
-;CHECK: kortestw
+;CHECK: testb
;CHECK: ret
define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
@@ -137,10 +137,12 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
}
;CHECK-LABEL: test13
-;CHECK: cmpl
-;CHECK: sbbl
-;CHECK: orl $65532
-;CHECK: ret
+;CHECK: cmpl %esi, %edi
+;CHECK: setb %al
+;CHECK: andl $1, %eax
+;CHECK: kmovw %eax, %k0
+;CHECK: movw $-4
+;CHECK: korw
define i16 @test13(i32 %a, i32 %b) {
%cmp_res = icmp ult i32 %a, %b
%maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0
@@ -150,9 +152,12 @@ define i16 @test13(i32 %a, i32 %b) {
;CHECK-LABEL: test14
;CHECK: vpcmpgtq
-;CHECK: kshiftlw $11
-;CHECK: kshiftrw $15
-;CHECK: kortestw
+;KNL: kshiftlw $11
+;KNL: kshiftrw $15
+;KNL: testb
+;SKX: kshiftlb $3
+;SKX: kshiftrb $7
+;SKX: testb
;CHECK: ret
define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
@@ -164,23 +169,26 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
}
;CHECK-LABEL: test15
-;CHECK: kshiftlw
-;CHECK: kmovw
-;CHECK: ret
+;CHECK: movb (%rdi), %al
+;CHECK: andb $1, %al
+;CHECK: movw $-1, %ax
+;CHECK: cmovew
define i16 @test15(i1 *%addr) {
- %x = load i1 * %addr, align 128
+ %x = load i1 , i1 * %addr, align 1
%x1 = insertelement <16 x i1> undef, i1 %x, i32 10
%x2 = bitcast <16 x i1>%x1 to i16
ret i16 %x2
}
;CHECK-LABEL: test16
-;CHECK: kshiftlw
-;CHECK: kshiftrw
+;CHECK: movb (%rdi), %al
+;CHECK: andw $1, %ax
+;CHECK: kmovw
+;CHECK: kshiftlw $10
;CHECK: korw
;CHECK: ret
define i16 @test16(i1 *%addr, i16 %a) {
- %x = load i1 * %addr, align 128
+ %x = load i1 , i1 * %addr, align 128
%a1 = bitcast i16 %a to <16 x i1>
%x1 = insertelement <16 x i1> %a1, i1 %x, i32 10
%x2 = bitcast <16 x i1>%x1 to i16
@@ -188,13 +196,15 @@ define i16 @test16(i1 *%addr, i16 %a) {
}
;CHECK-LABEL: test17
-;CHECK: kshiftlw
-;CHECK: kshiftrw
+;KNL: movb (%rdi), %al
+;KNL: andw $1, %ax
+;KNL: kshiftlw $4
;KNL: korw
+;SKX: kshiftlb $4
;SKX: korb
;CHECK: ret
define i8 @test17(i1 *%addr, i8 %a) {
- %x = load i1 * %addr, align 128
+ %x = load i1 , i1 * %addr, align 128
%a1 = bitcast i8 %a to <8 x i1>
%x1 = insertelement <8 x i1> %a1, i1 %x, i32 4
%x2 = bitcast <8 x i1>%x1 to i8
diff --git a/test/CodeGen/X86/avx512-intel-ocl.ll b/test/CodeGen/X86/avx512-intel-ocl.ll
new file mode 100644
index 000000000000..2e1b27e4aecf
--- /dev/null
+++ b/test/CodeGen/X86/avx512-intel-ocl.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=knl | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=knl | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=knl | FileCheck -check-prefix=WIN64 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck -check-prefix=X64 %s
+
+declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)
+declare <16 x float> @func_float16(<16 x float>, <16 x float>)
+declare i32 @func_int(i32, i32)
+
+; WIN64-LABEL: testf16_inp
+; WIN64: vaddps {{.*}}, {{%zmm[0-1]}}
+; WIN64: leaq {{.*}}(%rsp), %rcx
+; WIN64: call
+; WIN64: ret
+
+; X32-LABEL: testf16_inp
+; X32: vaddps {{.*}}, {{%zmm[0-1]}}
+; X32: movl %eax, (%esp)
+; X32: call
+; X32: ret
+
+; X64-LABEL: testf16_inp
+; X64: vaddps {{.*}}, {{%zmm[0-1]}}
+; X64: leaq {{.*}}(%rsp), %rdi
+; X64: call
+; X64: ret
+
+;test calling conventions - input parameters
+define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
+ %y = alloca <16 x float>, align 16
+ %x = fadd <16 x float> %a, %b
+ %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
+ %2 = load <16 x float>, <16 x float>* %y, align 16
+ %3 = fadd <16 x float> %2, %1
+ ret <16 x float> %3
+}
+
+;test calling conventions - preserved registers
+
+; preserved zmm16-
+; WIN64-LABEL: testf16_regs
+; WIN64: call
+; WIN64: vaddps %zmm16, %zmm0, %zmm0
+; WIN64: ret
+
+; preserved zmm16-
+; X64-LABEL: testf16_regs
+; X64: call
+; X64: vaddps %zmm16, %zmm0, %zmm0
+; X64: ret
+
+define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
+ %y = alloca <16 x float>, align 16
+ %x = fadd <16 x float> %a, %b
+ %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
+ %2 = load <16 x float>, <16 x float>* %y, align 16
+ %3 = fadd <16 x float> %1, %b
+ %4 = fadd <16 x float> %2, %3
+ ret <16 x float> %4
+}
+
+; test calling conventions - prolog and epilog
+; WIN64-LABEL: test_prolog_epilog
+; WIN64: vmovups %zmm21, {{.*(%rbp).*}} # 64-byte Spill
+; WIN64: vmovups %zmm6, {{.*(%rbp).*}} # 64-byte Spill
+; WIN64: call
+; WIN64: vmovups {{.*(%rbp).*}}, %zmm6 # 64-byte Reload
+; WIN64: vmovups {{.*(%rbp).*}}, %zmm21 # 64-byte Reload
+
+; X64-LABEL: test_prolog_epilog
+; X64: kmovw %k7, {{.*}}(%rsp) ## 8-byte Folded Spill
+; X64: kmovw %k6, {{.*}}(%rsp) ## 8-byte Folded Spill
+; X64: kmovw %k5, {{.*}}(%rsp) ## 8-byte Folded Spill
+; X64: kmovw %k4, {{.*}}(%rsp) ## 8-byte Folded Spill
+; X64: vmovups %zmm31, {{.*}}(%rsp) ## 64-byte Spill
+; X64: vmovups %zmm16, {{.*}}(%rsp) ## 64-byte Spill
+; X64: call
+; X64: vmovups {{.*}}(%rsp), %zmm16 ## 64-byte Reload
+; X64: vmovups {{.*}}(%rsp), %zmm31 ## 64-byte Reload
+define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind {
+ %c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)
+ ret <16 x float> %c
+}
+
+
+declare <16 x float> @func_float16_mask(<16 x float>, <16 x i1>)
+
+; X64-LABEL: testf16_inp_mask
+; X64: kmovw %edi, %k1
+; X64: call
+define <16 x float> @testf16_inp_mask(<16 x float> %a, i16 %mask) {
+ %imask = bitcast i16 %mask to <16 x i1>
+ %1 = call intel_ocl_bicc <16 x float> @func_float16_mask(<16 x float> %a, <16 x i1> %imask)
+ ret <16 x float> %1
+}
+
+; X64-LABEL: test_prolog_epilog_with_mask
+; X64: kxorw %k{{.*}}, %k{{.*}}, %k1
+; X64: call
+define intel_ocl_bicc <16 x float> @test_prolog_epilog_with_mask(<16 x float> %a, <16 x i32> %x1, <16 x i32>%x2, <16 x i1> %mask) nounwind {
+ %cmp_res = icmp eq <16 x i32>%x1, %x2
+ %mask1 = xor <16 x i1> %cmp_res, %mask
+ %c = call intel_ocl_bicc <16 x float> @func_float16_mask(<16 x float> %a, <16 x i1>%mask1)
+ ret <16 x float> %c
+} \ No newline at end of file
diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll
index 7cd01683fa98..471e34cdedce 100644
--- a/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-intrinsics.ll
@@ -5,7 +5,7 @@ declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
; CHECK: kortestw
; CHECK: sete
define i32 @test_kortestz(i16 %a0, i16 %a1) {
- %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
+ %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
ret i32 %res
}
@@ -14,7 +14,7 @@ declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
; CHECK: kortestw
; CHECK: sbbl
define i32 @test_kortestc(i16 %a0, i16 %a1) {
- %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
+ %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
ret i32 %res
}
@@ -277,7 +277,7 @@ define <8 x i64> @test_conflict_q(<8 x i64> %a) {
declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
- ; CHECK: vpconflictd
+ ; CHECK: vpconflictd
%res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
ret <16 x i32> %res
}
@@ -356,7 +356,7 @@ define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x d
define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
; CHECK-LABEL: test_x86_mask_blend_pd_512_memop
; CHECK: vblendmpd (%
- %b = load <8 x double>* %ptr
+ %b = load <8 x double>, <8 x double>* %ptr
%res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
ret <8 x double> %res
}
@@ -382,7 +382,7 @@ declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) no
ret <8 x i32>%res
}
declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
-
+
define <16 x i32> @test_cvtps2udq(<16 x float> %a) {
;CHECK: vcvtps2udq {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x79,0xc0]
%res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %a, <16 x i32>zeroinitializer, i16 -1, i32 1)
@@ -434,15 +434,6 @@ declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) no
declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
; fp min - max
-define <16 x float> @test_vmaxps(<16 x float> %a0, <16 x float> %a1) {
- ; CHECK: vmaxps
- %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1,
- <16 x float>zeroinitializer, i16 -1, i32 4)
- ret <16 x float> %res
-}
-declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>,
- <16 x float>, i16, i32)
-
define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
; CHECK: vmaxpd
%res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
@@ -452,15 +443,6 @@ define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
<8 x double>, i8, i32)
-define <16 x float> @test_vminps(<16 x float> %a0, <16 x float> %a1) {
- ; CHECK: vminps
- %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1,
- <16 x float>zeroinitializer, i16 -1, i32 4)
- ret <16 x float> %res
-}
-declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>,
- <16 x float>, i16, i32)
-
define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
; CHECK: vminpd
%res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
@@ -515,14 +497,6 @@ define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
}
declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
-define <8 x i64> @test_vpmuludq(<16 x i32> %a0, <16 x i32> %a1) {
- ; CHECK: vpmuludq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
- %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a0, <16 x i32> %a1,
- <8 x i64>zeroinitializer, i8 -1)
- ret <8 x i64> %res
-}
-declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
-
define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
%res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
@@ -551,7 +525,73 @@ define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
ret void
}
-declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8 )
+declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
+
+define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
+; CHECK-LABEL: test_mask_store_aligned_ps:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
+ ret void
+}
+
+declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
+
+define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
+; CHECK-LABEL: test_mask_store_aligned_pd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1}
+; CHECK-NEXT: retq
+ call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
+ ret void
+}
+
+declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
+
+define <16 x float> @test_maskz_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
+; CHECK-LABEL: test_maskz_load_aligned_ps:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
+ ret <16 x float> %res
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
+
+define <8 x double> @test_maskz_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
+; CHECK-LABEL: test_maskz_load_aligned_pd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
+ ret <8 x double> %res
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
+
+define <16 x float> @test_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
+; CHECK-LABEL: test_load_aligned_ps:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmovaps (%rdi), %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
+ ret <16 x float> %res
+}
+
+define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
+; CHECK-LABEL: test_load_aligned_pd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmovapd (%rdi), %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
+ ret <8 x double> %res
+}
define <16 x float> @test_vpermt2ps(<16 x float>%x, <16 x float>%y, <16 x i32>%perm) {
; CHECK: vpermt2ps {{.*}}encoding: [0x62,0xf2,0x6d,0x48,0x7f,0xc1]
@@ -959,8 +999,8 @@ define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
}
define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
- ; CHECK-LABEL: test_x86_avx512_mask_pslli_d
- ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1}
+ ; CHECK-LABEL: test_x86_avx512_mask_pslli_d
+ ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1}
%res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
ret <16 x i32> %res
}
@@ -983,14 +1023,14 @@ define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
; CHECK-LABEL: test_x86_avx512_mask_pslli_q
- ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1}
+ ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1}
%res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
ret <8 x i64> %res
}
define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
; CHECK-LABEL: test_x86_avx512_maskz_pslli_q
- ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
+ ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
%res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
ret <8 x i64> %res
}
@@ -1006,7 +1046,7 @@ define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
; CHECK-LABEL: test_x86_avx512_mask_psrli_d
- ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1}
+ ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1}
%res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
ret <16 x i32> %res
}
@@ -1029,7 +1069,7 @@ define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
; CHECK-LABEL: test_x86_avx512_mask_psrli_q
- ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1}
+ ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1}
%res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
ret <8 x i64> %res
}
@@ -1052,7 +1092,7 @@ define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
; CHECK-LABEL: test_x86_avx512_mask_psrai_d
- ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1}
+ ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1}
%res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
ret <16 x i32> %res
}
@@ -1075,7 +1115,7 @@ define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
; CHECK-LABEL: test_x86_avx512_mask_psrai_q
- ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1}
+ ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1}
%res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
ret <8 x i64> %res
}
@@ -1369,7 +1409,1364 @@ declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>,
define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
; CHECK-LABEL: test_x86_avx512_psrlv_q_memop
; CHECK: vpsrlvq (%
- %b = load <8 x i64>* %ptr
+ %b = load <8 x i64>, <8 x i64>* %ptr
%res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res
}
+
+declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
+
+define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
+ ; CHECK-LABEL: test_vsubps_rn
+ ; CHECK: vsubps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 -1, i32 0)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
+ ; CHECK-LABEL: test_vsubps_rd
+ ; CHECK: vsubps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 -1, i32 1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
+ ; CHECK-LABEL: test_vsubps_ru
+ ; CHECK: vsubps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 -1, i32 2)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
+ ; CHECK-LABEL: test_vsubps_rz
+ ; CHECK: vsubps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 -1, i32 3)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
+ ; CHECK-LABEL: test_vmulps_rn
+ ; CHECK: vmulps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 -1, i32 0)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
+ ; CHECK-LABEL: test_vmulps_rd
+ ; CHECK: vmulps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 -1, i32 1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
+ ; CHECK-LABEL: test_vmulps_ru
+ ; CHECK: vmulps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 -1, i32 2)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
+ ; CHECK-LABEL: test_vmulps_rz
+ ; CHECK: vmulps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 -1, i32 3)
+ ret <16 x float> %res
+}
+
+;; mask float
+define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ; CHECK-LABEL: test_vmulps_mask_rn
+ ; CHECK: vmulps {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 %mask, i32 0)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ; CHECK-LABEL: test_vmulps_mask_rd
+ ; CHECK: vmulps {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 %mask, i32 1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ; CHECK-LABEL: test_vmulps_mask_ru
+ ; CHECK: vmulps {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 %mask, i32 2)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ; CHECK-LABEL: test_vmulps_mask_rz
+ ; CHECK: vmulps {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> zeroinitializer, i16 %mask, i32 3)
+ ret <16 x float> %res
+}
+
+;; With Passthru value
+define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
+ ; CHECK-LABEL: test_vmulps_mask_passthru_rn
+ ; CHECK: vmulps {rn-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> %passthru, i16 %mask, i32 0)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
+ ; CHECK-LABEL: test_vmulps_mask_passthru_rd
+ ; CHECK: vmulps {rd-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> %passthru, i16 %mask, i32 1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
+ ; CHECK-LABEL: test_vmulps_mask_passthru_ru
+ ; CHECK: vmulps {ru-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> %passthru, i16 %mask, i32 2)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
+ ; CHECK-LABEL: test_vmulps_mask_passthru_rz
+ ; CHECK: vmulps {rz-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
+ <16 x float> %passthru, i16 %mask, i32 3)
+ ret <16 x float> %res
+}
+
+;; mask double
+define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
+ ; CHECK-LABEL: test_vmulpd_mask_rn
+ ; CHECK: vmulpd {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
+ %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
+ <8 x double> zeroinitializer, i8 %mask, i32 0)
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
+ ; CHECK-LABEL: test_vmulpd_mask_rd
+ ; CHECK: vmulpd {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
+ %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
+ <8 x double> zeroinitializer, i8 %mask, i32 1)
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
+ ; CHECK-LABEL: test_vmulpd_mask_ru
+ ; CHECK: vmulpd {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
+ %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
+ <8 x double> zeroinitializer, i8 %mask, i32 2)
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
+ ; CHECK-LABEL: test_vmulpd_mask_rz
+ ; CHECK: vmulpd {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
+ %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
+ <8 x double> zeroinitializer, i8 %mask, i32 3)
+ ret <8 x double> %res
+}
+
+define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
+ ;CHECK-LABEL: test_xor_epi32
+ ;CHECK: vpxord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xef,0xc1]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32
+ ;CHECK: vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
+ ;CHECK-LABEL: test_or_epi32
+ ;CHECK: vpord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xeb,0xc1]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32
+ ;CHECK: vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
+ ;CHECK-LABEL: test_and_epi32
+ ;CHECK: vpandd {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xdb,0xc1]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32
+ ;CHECK: vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
+ ;CHECK-LABEL: test_xor_epi64
+ ;CHECK: vpxorq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi64
+ ;CHECK: vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
+ ;CHECK-LABEL: test_or_epi64
+ ;CHECK: vporq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi64
+ ;CHECK: vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
+ ;CHECK-LABEL: test_and_epi64
+ ;CHECK: vpandq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi64
+ ;CHECK: vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+
+define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
+ ;CHECK-LABEL: test_mask_add_epi32_rr
+ ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rrk
+ ;CHECK: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rrkz
+ ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_add_epi32_rm
+ ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x07]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmk
+ ;CHECK: vpaddd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x0f]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmkz
+ ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x07]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmb
+ ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmbk
+ ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmbkz
+ ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rr
+ ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc1]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rrk
+ ;CHECK: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rrkz
+ ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rm
+ ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x07]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmk
+ ;CHECK: vpsubd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x0f]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmkz
+ ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x07]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmb
+ ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmbk
+ ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmbkz
+ ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
+ ;CHECK-LABEL: test_mask_add_epi64_rr
+ ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi64_rrk
+ ;CHECK: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi64_rrkz
+ ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_add_epi64_rm
+ ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x07]
+ %b = load <8 x i64>, <8 x i64>* %ptr_b
+ %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi64_rmk
+ ;CHECK: vpaddq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x0f]
+ %b = load <8 x i64>, <8 x i64>* %ptr_b
+ %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi64_rmkz
+ ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x07]
+ %b = load <8 x i64>, <8 x i64>* %ptr_b
+ %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
+ ;CHECK-LABEL: test_mask_add_epi64_rmb
+ ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi64_rmbk
+ ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x0f]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi64_rmbkz
+ ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
+ ;CHECK-LABEL: test_mask_sub_epi64_rr
+ ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi64_rrk
+ ;CHECK: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi64_rrkz
+ ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_sub_epi64_rm
+ ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x07]
+ %b = load <8 x i64>, <8 x i64>* %ptr_b
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi64_rmk
+ ;CHECK: vpsubq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x0f]
+ %b = load <8 x i64>, <8 x i64>* %ptr_b
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi64_rmkz
+ ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x07]
+ %b = load <8 x i64>, <8 x i64>* %ptr_b
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
+ ;CHECK-LABEL: test_mask_sub_epi64_rmb
+ ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi64_rmbk
+ ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x0f]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi64_rmbkz
+ ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rr
+ ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rrk
+ ;CHECK: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rrkz
+ ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rm
+ ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmk
+ ;CHECK: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmkz
+ ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmb
+ ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+ %b = bitcast <8 x i64> %b64 to <16 x i32>
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmbk
+ ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+ %b = bitcast <8 x i64> %b64 to <16 x i32>
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmbkz
+ ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+ %b = bitcast <8 x i64> %b64 to <16 x i32>
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
+
+define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rr
+ ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rrk
+ ;CHECK: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rrkz
+ ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rm
+ ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmk
+ ;CHECK: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmkz
+ ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmb
+ ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+ %b = bitcast <8 x i64> %b64 to <16 x i32>
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmbk
+ ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+ %b = bitcast <8 x i64> %b64 to <16 x i32>
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmbkz
+ ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+ %b = bitcast <8 x i64> %b64 to <16 x i32>
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
+ ret < 8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
+
+define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
+ ;CHECK-LABEL: test_mask_mullo_epi32_rr_512
+ ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0xc1]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi32_rrk_512
+ ;CHECK: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi32_rrkz_512
+ ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mullo_epi32_rm_512
+ ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x07]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi32_rmk_512
+ ;CHECK: vpmulld (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x0f]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi32_rmkz_512
+ ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x07]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mullo_epi32_rmb_512
+ ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi32_rmbk_512
+ ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512
+ ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
+ ret < 16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae
+ ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
+ ret <16 x float> %res
+}
+define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae
+ ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
+ ret <16 x float> %res
+}
+define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae
+ ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae
+ ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
+ ret <16 x float> %res
+}
+
+
+define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_add_round_ps_current
+ ;CHECK: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae
+ ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
+ ret <16 x float> %res
+}
+define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae
+ ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
+ ret <16 x float> %res
+}
+define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae
+ ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae
+ ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
+ ret <16 x float> %res
+}
+
+
+define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_add_round_ps_current
+ ;CHECK: vaddps %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
+ ret <16 x float> %res
+}
+
+
+define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_add_round_ps_rn_sae
+ ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
+ ret <16 x float> %res
+}
+define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_add_round_ps_rd_sae
+ ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
+ ret <16 x float> %res
+}
+define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_add_round_ps_ru_sae
+ ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_add_round_ps_rz_sae
+ ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_add_round_ps_current
+ ;CHECK: vaddps %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
+ ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+
+define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae
+ ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
+ ret <16 x float> %res
+}
+define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae
+ ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
+ ret <16 x float> %res
+}
+define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae
+ ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae
+ ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
+ ret <16 x float> %res
+}
+
+
+define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_sub_round_ps_current
+ ;CHECK: vsubps %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_sub_round_ps_rn_sae
+ ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
+ ret <16 x float> %res
+}
+define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_sub_round_ps_rd_sae
+ ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
+ ret <16 x float> %res
+}
+define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_sub_round_ps_ru_sae
+ ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_sub_round_ps_rz_sae
+ ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_sub_round_ps_current
+ ;CHECK: vsubps %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae
+ ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
+ ret <16 x float> %res
+}
+define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae
+ ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
+ ret <16 x float> %res
+}
+define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae
+ ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae
+ ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
+ ret <16 x float> %res
+}
+
+
+define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_div_round_ps_current
+ ;CHECK: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae
+ ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
+ ret <16 x float> %res
+}
+define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae
+ ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
+ ret <16 x float> %res
+}
+define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae
+ ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae
+ ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
+ ret <16 x float> %res
+}
+
+
+define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_div_round_ps_current
+ ;CHECK: vdivps %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
+ ret <16 x float> %res
+}
+
+
+define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_div_round_ps_rn_sae
+ ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
+ ret <16 x float> %res
+}
+define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_div_round_ps_rd_sae
+ ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
+ ret <16 x float> %res
+}
+define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_div_round_ps_ru_sae
+ ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_div_round_ps_rz_sae
+ ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_div_round_ps_current
+ ;CHECK: vdivps %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
+ ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+
+define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_min_round_ps_sae
+ ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_min_round_ps_current
+ ;CHECK: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_min_round_ps_sae
+ ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_min_round_ps_current
+ ;CHECK: vminps %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_min_round_ps_sae
+ ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_min_round_ps_current
+ ;CHECK: vminps %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
+ ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+
+define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_max_round_ps_sae
+ ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_max_round_ps_current
+ ;CHECK: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_max_round_ps_sae
+ ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_max_round_ps_current
+ ;CHECK: vmaxps %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_max_round_ps_sae
+ ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
+ ;CHECK-LABEL: test_mm512_max_round_ps_current
+ ;CHECK: vmaxps %zmm1, %zmm0, %zmm0
+ %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
+ ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+
+declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
+
+define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
+; CHECK-LABEL: test_mask_add_ss_rn
+; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
+; CHECK-LABEL: test_mask_add_ss_rd
+; CHECK: vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
+; CHECK-LABEL: test_mask_add_ss_ru
+; CHECK: vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
+; CHECK-LABEL: test_mask_add_ss_rz
+; CHECK: vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
+; CHECK-LABEL: test_mask_add_ss_current
+; CHECK: vaddss %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
+; CHECK-LABEL: test_maskz_add_ss_rn
+; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) {
+; CHECK-LABEL: test_add_ss_rn
+; CHECK: vaddss {rn-sae}, %xmm1, %xmm0, %xmm0
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0)
+ ret <4 x float> %res
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
+
+define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
+; CHECK-LABEL: test_mask_add_sd_rn
+; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
+; CHECK-LABEL: test_mask_add_sd_rd
+; CHECK: vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
+; CHECK-LABEL: test_mask_add_sd_ru
+; CHECK: vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
+; CHECK-LABEL: test_mask_add_sd_rz
+; CHECK: vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
+; CHECK-LABEL: test_mask_add_sd_current
+; CHECK: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
+; CHECK-LABEL: test_maskz_add_sd_rn
+; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_add_sd_rn
+; CHECK: vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0)
+ ret <2 x double> %res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
+
+define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
+; CHECK-LABEL: test_mask_max_ss_sae
+; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
+; CHECK-LABEL: test_maskz_max_ss_sae
+; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) {
+; CHECK-LABEL: test_max_ss_sae
+; CHECK: vmaxss {sae}, %xmm1, %xmm0, %xmm0
+ %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
+; CHECK-LABEL: test_mask_max_ss
+; CHECK: vmaxss %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
+; CHECK-LABEL: test_maskz_max_ss
+; CHECK: vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) {
+; CHECK-LABEL: test_max_ss
+; CHECK: vmaxss %xmm1, %xmm0, %xmm0
+ %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4)
+ ret <4 x float> %res
+}
+declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
+
+define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
+; CHECK-LABEL: test_mask_max_sd_sae
+; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
+; CHECK-LABEL: test_maskz_max_sd_sae
+; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_max_sd_sae
+; CHECK: vmaxsd {sae}, %xmm1, %xmm0, %xmm0
+ %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
+; CHECK-LABEL: test_mask_max_sd
+; CHECK: vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
+; CHECK-LABEL: test_maskz_max_sd
+; CHECK: vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_max_sd
+; CHECK: vmaxsd %xmm1, %xmm0, %xmm0
+ %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
+ ret <2 x double> %res
+}
diff --git a/test/CodeGen/X86/avx512-logic.ll b/test/CodeGen/X86/avx512-logic.ll
index bee4f52b3216..140ce3b1ec56 100644
--- a/test/CodeGen/X86/avx512-logic.ll
+++ b/test/CodeGen/X86/avx512-logic.ll
@@ -83,7 +83,7 @@ define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
; CHECK: ret
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
entry:
- %a = load <16 x i32>* %x, align 4
+ %a = load <16 x i32>, <16 x i32>* %x, align 4
%b = and <16 x i32> %y, %a
ret <16 x i32> %b
}
@@ -93,7 +93,7 @@ entry:
; CHECK: ret
define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
entry:
- %a = load i64* %ap, align 8
+ %a = load i64, i64* %ap, align 8
%b = insertelement <8 x i64> undef, i64 %a, i32 0
%c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
%d = and <8 x i64> %p1, %c
diff --git a/test/CodeGen/X86/avx512-mask-bugfix.ll b/test/CodeGen/X86/avx512-mask-bugfix.ll
new file mode 100755
index 000000000000..1940680f1c10
--- /dev/null
+++ b/test/CodeGen/X86/avx512-mask-bugfix.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+; ModuleID = 'foo.ll'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) #0
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.cttz.i64(i64, i1) #0
+
+; Function Attrs: nounwind
+define void @foo(float* noalias %aFOO, float %b, i32 %a) {
+allocas:
+ %full_mask_memory.i57 = alloca <8 x float>
+ %return_value_memory.i60 = alloca i1
+ %cmp.i = icmp eq i32 %a, 65535
+ br i1 %cmp.i, label %all_on, label %some_on
+
+all_on:
+ %mask0 = load <8 x float>, <8 x float>* %full_mask_memory.i57
+ %v0.i.i.i70 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %mask0) #0
+ %allon.i.i76 = icmp eq i32 %v0.i.i.i70, 65535
+ br i1 %allon.i.i76, label %check_neighbors.i.i121, label %domixed.i.i100
+
+domixed.i.i100:
+ br label %check_neighbors.i.i121
+
+check_neighbors.i.i121:
+ %v1.i5.i.i116 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %mask0) #0
+ %alleq.i.i120 = icmp eq i32 %v1.i5.i.i116, 65535
+ br i1 %alleq.i.i120, label %all_equal.i.i123, label %not_all_equal.i.i124
+
+; CHECK: kxnorw %k0, %k0, %k0
+; CHECK: kshiftrw $15, %k0, %k0
+; CHECK: jmp
+; CHECK: kxorw %k0, %k0, %k0
+
+all_equal.i.i123:
+ br label %reduce_equal___vyi.exit128
+
+not_all_equal.i.i124:
+ br label %reduce_equal___vyi.exit128
+
+reduce_equal___vyi.exit128:
+ %calltmp2.i125 = phi i1 [ true, %all_equal.i.i123 ], [ false, %not_all_equal.i.i124 ]
+ store i1 %calltmp2.i125, i1* %return_value_memory.i60
+ %return_value.i126 = load i1, i1* %return_value_memory.i60
+ %. = select i1 %return_value.i126, i32 1, i32 0
+ %select_to_float = sitofp i32 %. to float
+ ret void
+
+some_on:
+ ret void
+}
+
diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll
index 35d334813fa8..d2efd7d6db6e 100644
--- a/test/CodeGen/X86/avx512-mask-op.ll
+++ b/test/CodeGen/X86/avx512-mask-op.ll
@@ -1,57 +1,68 @@
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL --check-prefix=CHECK
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX --check-prefix=CHECK
+; CHECK-LABEL: mask16
+; CHECK: kmovw
+; CHECK-NEXT: knotw
+; CHECK-NEXT: kmovw
define i16 @mask16(i16 %x) {
%m0 = bitcast i16 %x to <16 x i1>
%m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
%ret = bitcast <16 x i1> %m1 to i16
ret i16 %ret
-; CHECK-LABEL: mask16
-; CHECK: kmovw
-; CHECK-NEXT: knotw
-; CHECK-NEXT: kmovw
-; CHECK: ret
}
+; CHECK-LABEL: mask8
+; KNL: kmovw
+; KNL-NEXT: knotw
+; KNL-NEXT: kmovw
+; SKX: kmovb
+; SKX-NEXT: knotb
+; SKX-NEXT: kmovb
+
define i8 @mask8(i8 %x) {
%m0 = bitcast i8 %x to <8 x i1>
%m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
%ret = bitcast <8 x i1> %m1 to i8
ret i8 %ret
-; CHECK-LABEL: mask8
-; CHECK: kmovw
+}
+
+; CHECK-LABEL: mask16_mem
+; CHECK: kmovw ([[ARG1:%rdi|%rcx]]), %k{{[0-7]}}
; CHECK-NEXT: knotw
-; CHECK-NEXT: kmovw
+; CHECK-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]])
; CHECK: ret
-}
define void @mask16_mem(i16* %ptr) {
- %x = load i16* %ptr, align 4
+ %x = load i16, i16* %ptr, align 4
%m0 = bitcast i16 %x to <16 x i1>
%m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
%ret = bitcast <16 x i1> %m1 to i16
store i16 %ret, i16* %ptr, align 4
ret void
-; CHECK-LABEL: mask16_mem
-; CHECK: kmovw ([[ARG1:%rdi|%rcx]]), %k{{[0-7]}}
-; CHECK-NEXT: knotw
-; CHECK-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]])
-; CHECK: ret
}
+; CHECK-LABEL: mask8_mem
+; KNL: kmovw ([[ARG1]]), %k{{[0-7]}}
+; KNL-NEXT: knotw
+; KNL-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]])
+; SKX: kmovb ([[ARG1]]), %k{{[0-7]}}
+; SKX-NEXT: knotb
+; SKX-NEXT: kmovb %k{{[0-7]}}, ([[ARG1]])
+
define void @mask8_mem(i8* %ptr) {
- %x = load i8* %ptr, align 4
+ %x = load i8, i8* %ptr, align 4
%m0 = bitcast i8 %x to <8 x i1>
%m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
%ret = bitcast <8 x i1> %m1 to i8
store i8 %ret, i8* %ptr, align 4
ret void
-; CHECK-LABEL: mask8_mem
-; CHECK: kmovw ([[ARG1]]), %k{{[0-7]}}
-; CHECK-NEXT: knotw
-; CHECK-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]])
-; CHECK: ret
}
+; CHECK-LABEL: mand16
+; CHECK: kandw
+; CHECK: kxorw
+; CHECK: korw
define i16 @mand16(i16 %x, i16 %y) {
%ma = bitcast i16 %x to <16 x i1>
%mb = bitcast i16 %y to <16 x i1>
@@ -59,15 +70,11 @@ define i16 @mand16(i16 %x, i16 %y) {
%md = xor <16 x i1> %ma, %mb
%me = or <16 x i1> %mc, %md
%ret = bitcast <16 x i1> %me to i16
-; CHECK: kandw
-; CHECK: kxorw
-; CHECK: korw
ret i16 %ret
}
-; CHECK: shuf_test1
+; CHECK-LABEL: shuf_test1
; CHECK: kshiftrw $8
-; CHECK:ret
define i8 @shuf_test1(i16 %v) nounwind {
%v1 = bitcast i16 %v to <16 x i1>
%mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -75,11 +82,11 @@ define i8 @shuf_test1(i16 %v) nounwind {
ret i8 %mask1
}
-; CHECK: zext_test1
+; CHECK-LABEL: zext_test1
; CHECK: kshiftlw
; CHECK: kshiftrw
; CHECK: kmovw
-; CHECK:ret
+
define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
%cmp_res = icmp ugt <16 x i32> %a, %b
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
@@ -87,11 +94,11 @@ define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
ret i32 %res
}
-; CHECK: zext_test2
+; CHECK-LABEL: zext_test2
; CHECK: kshiftlw
; CHECK: kshiftrw
; CHECK: kmovw
-; CHECK:ret
+
define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
%cmp_res = icmp ugt <16 x i32> %a, %b
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
@@ -99,14 +106,304 @@ define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
ret i16 %res
}
-; CHECK: zext_test3
+; CHECK-LABEL: zext_test3
; CHECK: kshiftlw
; CHECK: kshiftrw
; CHECK: kmovw
-; CHECK:ret
+
define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
%cmp_res = icmp ugt <16 x i32> %a, %b
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
%res = zext i1 %cmp_res.i1 to i8
ret i8 %res
}
+
+; CHECK-LABEL: conv1
+; KNL: kmovw %k0, %eax
+; KNL: movb %al, (%rdi)
+; SKX: kmovb %k0, (%rdi)
+define i8 @conv1(<8 x i1>* %R) {
+entry:
+ store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R
+
+ %maskPtr = alloca <8 x i1>
+ store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr
+ %mask = load <8 x i1>, <8 x i1>* %maskPtr
+ %mask_convert = bitcast <8 x i1> %mask to i8
+ ret i8 %mask_convert
+}
+
+; SKX-LABEL: test4
+; SKX: vpcmpgt
+; SKX: knot
+; SKX: vpcmpgt
+; SKX: vpmovm2d
+define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
+ %x_gt_y = icmp sgt <4 x i64> %x, %y
+ %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
+ %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
+ %resse = sext <4 x i1>%res to <4 x i32>
+ ret <4 x i32> %resse
+}
+
+; SKX-LABEL: test5
+; SKX: vpcmpgt
+; SKX: knot
+; SKX: vpcmpgt
+; SKX: vpmovm2q
+define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
+ %x_gt_y = icmp slt <2 x i64> %x, %y
+ %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
+ %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
+ %resse = sext <2 x i1>%res to <2 x i64>
+ ret <2 x i64> %resse
+}
+
+; KNL-LABEL: test6
+; KNL: vpmovsxbd
+; KNL: vpandd
+; KNL: kmovw %eax, %k1
+; KNL vptestmd {{.*}}, %k0 {%k1}
+
+; SKX-LABEL: test6
+; SKX: vpmovb2m
+; SKX: kmovw %eax, %k1
+; SKX: kandw
+define void @test6(<16 x i1> %mask) {
+allocas:
+ %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
+ %b = bitcast <16 x i1> %a to i16
+ %c = icmp eq i16 %b, 0
+ br i1 %c, label %true, label %false
+
+true:
+ ret void
+
+false:
+ ret void
+}
+
+; KNL-LABEL: test7
+; KNL: vpmovsxwq
+; KNL: vpandq
+; KNL: vptestmq {{.*}}, %k0
+; KNL: korw
+
+; SKX-LABEL: test7
+; SKX: vpmovw2m
+; SKX: kmovb %eax, %k1
+; SKX: korb
+
+define void @test7(<8 x i1> %mask) {
+allocas:
+ %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
+ %b = bitcast <8 x i1> %a to i8
+ %c = icmp eq i8 %b, 0
+ br i1 %c, label %true, label %false
+
+true:
+ ret void
+
+false:
+ ret void
+}
+
+; KNL-LABEL: test8
+; KNL: vpxord %zmm2, %zmm2, %zmm2
+; KNL: jg
+; KNL: vpcmpltud %zmm2, %zmm1, %k1
+; KNL: jmp
+; KNL: vpcmpgtd %zmm2, %zmm0, %k1
+
+; SKX-LABEL: test8
+; SKX: jg
+; SKX: vpcmpltud {{.*}}, %k0
+; SKX: vpmovm2b
+; SKX: vpcmpgtd {{.*}}, %k0
+; SKX: vpmovm2b
+
+define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
+ %cond = icmp sgt i32 %a1, %b1
+ %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
+ %cmp2 = icmp ult <16 x i32> %b, zeroinitializer
+ %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2
+ %res = sext <16 x i1> %mix to <16 x i8>
+ ret <16 x i8> %res
+}
+
+; KNL-LABEL: test9
+; KNL: jg
+; KNL: vpmovsxbd %xmm1, %zmm0
+; KNL: jmp
+; KNL: vpmovsxbd %xmm0, %zmm0
+
+; SKX-LABEL: test9
+; SKX: vpmovb2m %xmm1, %k0
+; SKX: vpmovm2b %k0, %xmm0
+; SKX: retq
+; SKX: vpmovb2m %xmm0, %k0
+; SKX: vpmovm2b %k0, %xmm0
+
+define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
+ %mask = icmp sgt i32 %a1, %b1
+ %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
+ ret <16 x i1>%c
+}
+
+; KNL-LABEL: test10
+; KNL: jg
+; KNL: vpmovsxwq %xmm1, %zmm0
+; KNL: jmp
+; KNL: vpmovsxwq %xmm0, %zmm0
+
+; SKX-LABEL: test10
+; SKX: jg
+; SKX: vpmovw2m %xmm1, %k0
+; SKX: vpmovm2w %k0, %xmm0
+; SKX: retq
+; SKX: vpmovw2m %xmm0, %k0
+; SKX: vpmovm2w %k0, %xmm0
+define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
+ %mask = icmp sgt i32 %a1, %b1
+ %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
+ ret <8 x i1>%c
+}
+
+; SKX-LABEL: test11
+; SKX: jg
+; SKX: vpmovd2m %xmm1, %k0
+; SKX: vpmovm2d %k0, %xmm0
+; SKX: retq
+; SKX: vpmovd2m %xmm0, %k0
+; SKX: vpmovm2d %k0, %xmm0
+define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
+ %mask = icmp sgt i32 %a1, %b1
+ %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
+ ret <4 x i1>%c
+}
+
+; KNL-LABEL: test12
+; KNL: movl %edi, %eax
+define i32 @test12(i32 %x, i32 %y) {
+ %a = bitcast i16 21845 to <16 x i1>
+ %b = extractelement <16 x i1> %a, i32 0
+ %c = select i1 %b, i32 %x, i32 %y
+ ret i32 %c
+}
+
+; KNL-LABEL: test13
+; KNL: movl %esi, %eax
+define i32 @test13(i32 %x, i32 %y) {
+ %a = bitcast i16 21845 to <16 x i1>
+ %b = extractelement <16 x i1> %a, i32 3
+ %c = select i1 %b, i32 %x, i32 %y
+ ret i32 %c
+}
+
+; SKX-LABEL: test14
+; SKX: movb $11, %al
+; SKX: kmovb %eax, %k0
+; SKX: vpmovm2d %k0, %xmm0
+
+define <4 x i1> @test14() {
+ %a = bitcast i16 21845 to <16 x i1>
+ %b = extractelement <16 x i1> %a, i32 2
+ %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
+ ret <4 x i1> %c
+}
+
+; KNL-LABEL: test15
+; KNL: cmovgw
+define <16 x i1> @test15(i32 %x, i32 %y) {
+ %a = bitcast i16 21845 to <16 x i1>
+ %b = bitcast i16 1 to <16 x i1>
+ %mask = icmp sgt i32 %x, %y
+ %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
+ ret <16 x i1> %c
+}
+
+; SKX-LABEL: test16
+; SKX: kxnorw %k1, %k1, %k1
+; SKX: kshiftrw $15, %k1, %k1
+; SKX: kshiftlq $5, %k1, %k1
+; SKX: korq %k1, %k0, %k0
+; SKX: vpmovm2b %k0, %zmm0
+define <64 x i8> @test16(i64 %x) {
+ %a = bitcast i64 %x to <64 x i1>
+ %b = insertelement <64 x i1>%a, i1 true, i32 5
+ %c = sext <64 x i1>%b to <64 x i8>
+ ret <64 x i8>%c
+}
+
+; SKX-LABEL: test17
+; SKX: setg %al
+; SKX: andl $1, %eax
+; SKX: kmovw %eax, %k1
+; SKX: kshiftlq $5, %k1, %k1
+; SKX: korq %k1, %k0, %k0
+; SKX: vpmovm2b %k0, %zmm0
+define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
+ %a = bitcast i64 %x to <64 x i1>
+ %b = icmp sgt i32 %y, %z
+ %c = insertelement <64 x i1>%a, i1 %b, i32 5
+ %d = sext <64 x i1>%c to <64 x i8>
+ ret <64 x i8>%d
+}
+
+; KNL-LABEL: test18
+define <8 x i1> @test18(i8 %a, i16 %y) {
+ %b = bitcast i8 %a to <8 x i1>
+ %b1 = bitcast i16 %y to <16 x i1>
+ %el1 = extractelement <16 x i1>%b1, i32 8
+ %el2 = extractelement <16 x i1>%b1, i32 9
+ %c = insertelement <8 x i1>%b, i1 %el1, i32 7
+ %d = insertelement <8 x i1>%c, i1 %el2, i32 6
+ ret <8 x i1>%d
+}
+
+; KNL-LABEL: test19
+; KNL: movzbl %dil, %eax
+; KNL: kmovw %eax, %k0
+; KNL: kshiftlw $13, %k0, %k0
+; KNL: kshiftrw $15, %k0, %k0
+; KNL: kmovw %k0, %eax
+; KNL: andl $1, %eax
+; KNL: testb %al, %al
+
+define <8 x i1> @test19(i8 %a) {
+ %b = bitcast i8 %a to <8 x i1>
+ %c = shufflevector < 8 x i1>%b, <8 x i1>undef, <8 x i32> <i32 undef, i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef>
+ ret <8 x i1> %c
+}
+
+; KNL-LABEL: test20
+; KNL: movzbl %dil, %eax
+; KNL: kmovw %eax, %k0
+; KNL: kshiftlw $13, %k0, %k1
+; KNL: kshiftrw $15, %k1, %k1
+; KNL: kshiftlw $12, %k0, %k0
+; KNL: kshiftrw $15, %k0, %k0
+; KNL: kshiftlw $4, %k0, %k0
+; KNL: kshiftlw $1, %k1, %k2
+; KNL: korw %k0, %k2, %k0
+; KNL: kshiftlw $6, %k1, %k1
+; KNL: korw %k1, %k0, %k1
+define <8 x i1> @test20(i8 %a, i16 %y) {
+ %b = bitcast i8 %a to <8 x i1>
+ %c = shufflevector < 8 x i1>%b, <8 x i1>undef, <8 x i32> <i32 undef, i32 2, i32 undef, i32 undef, i32 3, i32 undef, i32 2, i32 undef>
+ ret <8 x i1> %c
+}
+
+; KNL-LABEL: test21
+; KNL: vpand %ymm
+; KNL: vextracti128 $1, %ymm2
+; KNL: vpand %ymm
+
+; SKX-LABEL: test21
+; SKX: vpmovb2m
+; SKX: vmovdqu16 {{.*}}%k1
+
+define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
+ %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
+ ret <32 x i16> %ret
+}
diff --git a/test/CodeGen/X86/avx512-mov.ll b/test/CodeGen/X86/avx512-mov.ll
index 93875e839e22..0cd8458f73f5 100644
--- a/test/CodeGen/X86/avx512-mov.ll
+++ b/test/CodeGen/X86/avx512-mov.ll
@@ -28,7 +28,7 @@ define <2 x i64> @test3(i64 %x) {
; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62
; CHECK: ret
define <4 x i32> @test4(i32* %x) {
- %y = load i32* %x
+ %y = load i32, i32* %x
%res = insertelement <4 x i32>undef, i32 %y, i32 0
ret <4 x i32>%res
}
@@ -53,7 +53,7 @@ define void @test6(double %x, double* %y) {
; CHECK: vmovss (%rdi), %xmm0 ## encoding: [0x62
; CHECK: ret
define float @test7(i32* %x) {
- %y = load i32* %x
+ %y = load i32, i32* %x
%res = bitcast i32 %y to float
ret float %res
}
@@ -78,7 +78,7 @@ define i64 @test9(<2 x i64> %x) {
; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62
; CHECK: ret
define <4 x i32> @test10(i32* %x) {
- %y = load i32* %x, align 4
+ %y = load i32, i32* %x, align 4
%res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
ret <4 x i32>%res
}
@@ -87,7 +87,7 @@ define <4 x i32> @test10(i32* %x) {
; CHECK: vmovss (%rdi), %xmm0 ## encoding: [0x62
; CHECK: ret
define <4 x float> @test11(float* %x) {
- %y = load float* %x, align 4
+ %y = load float, float* %x, align 4
%res = insertelement <4 x float>zeroinitializer, float %y, i32 0
ret <4 x float>%res
}
@@ -96,7 +96,7 @@ define <4 x float> @test11(float* %x) {
; CHECK: vmovsd (%rdi), %xmm0 ## encoding: [0x62
; CHECK: ret
define <2 x double> @test12(double* %x) {
- %y = load double* %x, align 8
+ %y = load double, double* %x, align 8
%res = insertelement <2 x double>zeroinitializer, double %y, i32 0
ret <2 x double>%res
}
@@ -121,7 +121,7 @@ define <4 x i32> @test14(i32 %x) {
; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62
; CHECK: ret
define <4 x i32> @test15(i32* %x) {
- %y = load i32* %x, align 4
+ %y = load i32, i32* %x, align 4
%res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
ret <4 x i32>%res
}
@@ -131,7 +131,7 @@ define <4 x i32> @test15(i32* %x) {
; CHECK: ret
define <16 x i32> @test16(i8 * %addr) {
%vaddr = bitcast i8* %addr to <16 x i32>*
- %res = load <16 x i32>* %vaddr, align 1
+ %res = load <16 x i32>, <16 x i32>* %vaddr, align 1
ret <16 x i32>%res
}
@@ -140,7 +140,7 @@ define <16 x i32> @test16(i8 * %addr) {
; CHECK: ret
define <16 x i32> @test17(i8 * %addr) {
%vaddr = bitcast i8* %addr to <16 x i32>*
- %res = load <16 x i32>* %vaddr, align 64
+ %res = load <16 x i32>, <16 x i32>* %vaddr, align 64
ret <16 x i32>%res
}
@@ -176,7 +176,7 @@ define void @test20(i8 * %addr, <16 x i32> %data) {
; CHECK: ret
define <8 x i64> @test21(i8 * %addr) {
%vaddr = bitcast i8* %addr to <8 x i64>*
- %res = load <8 x i64>* %vaddr, align 64
+ %res = load <8 x i64>, <8 x i64>* %vaddr, align 64
ret <8 x i64>%res
}
@@ -194,7 +194,7 @@ define void @test22(i8 * %addr, <8 x i64> %data) {
; CHECK: ret
define <8 x i64> @test23(i8 * %addr) {
%vaddr = bitcast i8* %addr to <8 x i64>*
- %res = load <8 x i64>* %vaddr, align 1
+ %res = load <8 x i64>, <8 x i64>* %vaddr, align 1
ret <8 x i64>%res
}
@@ -212,7 +212,7 @@ define void @test24(i8 * %addr, <8 x double> %data) {
; CHECK: ret
define <8 x double> @test25(i8 * %addr) {
%vaddr = bitcast i8* %addr to <8 x double>*
- %res = load <8 x double>* %vaddr, align 64
+ %res = load <8 x double>, <8 x double>* %vaddr, align 64
ret <8 x double>%res
}
@@ -230,7 +230,7 @@ define void @test26(i8 * %addr, <16 x float> %data) {
; CHECK: ret
define <16 x float> @test27(i8 * %addr) {
%vaddr = bitcast i8* %addr to <16 x float>*
- %res = load <16 x float>* %vaddr, align 64
+ %res = load <16 x float>, <16 x float>* %vaddr, align 64
ret <16 x float>%res
}
@@ -248,7 +248,7 @@ define void @test28(i8 * %addr, <8 x double> %data) {
; CHECK: ret
define <8 x double> @test29(i8 * %addr) {
%vaddr = bitcast i8* %addr to <8 x double>*
- %res = load <8 x double>* %vaddr, align 1
+ %res = load <8 x double>, <8 x double>* %vaddr, align 1
ret <8 x double>%res
}
@@ -266,7 +266,7 @@ define void @test30(i8 * %addr, <16 x float> %data) {
; CHECK: ret
define <16 x float> @test31(i8 * %addr) {
%vaddr = bitcast i8* %addr to <16 x float>*
- %res = load <16 x float>* %vaddr, align 1
+ %res = load <16 x float>, <16 x float>* %vaddr, align 1
ret <16 x float>%res
}
@@ -276,7 +276,7 @@ define <16 x float> @test31(i8 * %addr) {
define <16 x i32> @test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x i32>*
- %r = load <16 x i32>* %vaddr, align 64
+ %r = load <16 x i32>, <16 x i32>* %vaddr, align 64
%res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old
ret <16 x i32>%res
}
@@ -287,7 +287,7 @@ define <16 x i32> @test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
define <16 x i32> @test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x i32>*
- %r = load <16 x i32>* %vaddr, align 1
+ %r = load <16 x i32>, <16 x i32>* %vaddr, align 1
%res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old
ret <16 x i32>%res
}
@@ -298,7 +298,7 @@ define <16 x i32> @test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
define <16 x i32> @test34(i8 * %addr, <16 x i32> %mask1) {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x i32>*
- %r = load <16 x i32>* %vaddr, align 64
+ %r = load <16 x i32>, <16 x i32>* %vaddr, align 64
%res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer
ret <16 x i32>%res
}
@@ -309,7 +309,7 @@ define <16 x i32> @test34(i8 * %addr, <16 x i32> %mask1) {
define <16 x i32> @test35(i8 * %addr, <16 x i32> %mask1) {
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x i32>*
- %r = load <16 x i32>* %vaddr, align 1
+ %r = load <16 x i32>, <16 x i32>* %vaddr, align 1
%res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer
ret <16 x i32>%res
}
@@ -320,7 +320,7 @@ define <16 x i32> @test35(i8 * %addr, <16 x i32> %mask1) {
define <8 x i64> @test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i64>*
- %r = load <8 x i64>* %vaddr, align 64
+ %r = load <8 x i64>, <8 x i64>* %vaddr, align 64
%res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old
ret <8 x i64>%res
}
@@ -331,7 +331,7 @@ define <8 x i64> @test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
define <8 x i64> @test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i64>*
- %r = load <8 x i64>* %vaddr, align 1
+ %r = load <8 x i64>, <8 x i64>* %vaddr, align 1
%res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old
ret <8 x i64>%res
}
@@ -342,7 +342,7 @@ define <8 x i64> @test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
define <8 x i64> @test38(i8 * %addr, <8 x i64> %mask1) {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i64>*
- %r = load <8 x i64>* %vaddr, align 64
+ %r = load <8 x i64>, <8 x i64>* %vaddr, align 64
%res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer
ret <8 x i64>%res
}
@@ -353,7 +353,7 @@ define <8 x i64> @test38(i8 * %addr, <8 x i64> %mask1) {
define <8 x i64> @test39(i8 * %addr, <8 x i64> %mask1) {
%mask = icmp ne <8 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i64>*
- %r = load <8 x i64>* %vaddr, align 1
+ %r = load <8 x i64>, <8 x i64>* %vaddr, align 1
%res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer
ret <8 x i64>%res
}
@@ -364,7 +364,7 @@ define <8 x i64> @test39(i8 * %addr, <8 x i64> %mask1) {
define <16 x float> @test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
%mask = fcmp one <16 x float> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x float>*
- %r = load <16 x float>* %vaddr, align 64
+ %r = load <16 x float>, <16 x float>* %vaddr, align 64
%res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old
ret <16 x float>%res
}
@@ -375,7 +375,7 @@ define <16 x float> @test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1)
define <16 x float> @test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
%mask = fcmp one <16 x float> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x float>*
- %r = load <16 x float>* %vaddr, align 1
+ %r = load <16 x float>, <16 x float>* %vaddr, align 1
%res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old
ret <16 x float>%res
}
@@ -386,7 +386,7 @@ define <16 x float> @test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1)
define <16 x float> @test42(i8 * %addr, <16 x float> %mask1) {
%mask = fcmp one <16 x float> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x float>*
- %r = load <16 x float>* %vaddr, align 64
+ %r = load <16 x float>, <16 x float>* %vaddr, align 64
%res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer
ret <16 x float>%res
}
@@ -397,7 +397,7 @@ define <16 x float> @test42(i8 * %addr, <16 x float> %mask1) {
define <16 x float> @test43(i8 * %addr, <16 x float> %mask1) {
%mask = fcmp one <16 x float> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x float>*
- %r = load <16 x float>* %vaddr, align 1
+ %r = load <16 x float>, <16 x float>* %vaddr, align 1
%res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer
ret <16 x float>%res
}
@@ -408,7 +408,7 @@ define <16 x float> @test43(i8 * %addr, <16 x float> %mask1) {
define <8 x double> @test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
%mask = fcmp one <8 x double> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x double>*
- %r = load <8 x double>* %vaddr, align 64
+ %r = load <8 x double>, <8 x double>* %vaddr, align 64
%res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old
ret <8 x double>%res
}
@@ -419,7 +419,7 @@ define <8 x double> @test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1)
define <8 x double> @test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
%mask = fcmp one <8 x double> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x double>*
- %r = load <8 x double>* %vaddr, align 1
+ %r = load <8 x double>, <8 x double>* %vaddr, align 1
%res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old
ret <8 x double>%res
}
@@ -430,7 +430,7 @@ define <8 x double> @test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1)
define <8 x double> @test46(i8 * %addr, <8 x double> %mask1) {
%mask = fcmp one <8 x double> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x double>*
- %r = load <8 x double>* %vaddr, align 64
+ %r = load <8 x double>, <8 x double>* %vaddr, align 64
%res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer
ret <8 x double>%res
}
@@ -441,7 +441,7 @@ define <8 x double> @test46(i8 * %addr, <8 x double> %mask1) {
define <8 x double> @test47(i8 * %addr, <8 x double> %mask1) {
%mask = fcmp one <8 x double> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x double>*
- %r = load <8 x double>* %vaddr, align 1
+ %r = load <8 x double>, <8 x double>* %vaddr, align 1
%res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer
ret <8 x double>%res
}
diff --git a/test/CodeGen/X86/avx512-round.ll b/test/CodeGen/X86/avx512-round.ll
new file mode 100644
index 000000000000..c4f417e75ab0
--- /dev/null
+++ b/test/CodeGen/X86/avx512-round.ll
@@ -0,0 +1,106 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
+
+define <16 x float> @floor_v16f32(<16 x float> %a) {
+; CHECK-LABEL: floor_v16f32
+; CHECK: vrndscaleps $1, {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x01]
+ %res = call <16 x float> @llvm.floor.v16f32(<16 x float> %a)
+ ret <16 x float> %res
+}
+declare <16 x float> @llvm.floor.v16f32(<16 x float> %p)
+
+define <8 x double> @floor_v8f64(<8 x double> %a) {
+; CHECK-LABEL: floor_v8f64
+; CHECK: vrndscalepd $1, {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x01]
+ %res = call <8 x double> @llvm.floor.v8f64(<8 x double> %a)
+ ret <8 x double> %res
+}
+declare <8 x double> @llvm.floor.v8f64(<8 x double> %p)
+
+define <16 x float> @ceil_v16f32(<16 x float> %a) {
+; CHECK-LABEL: ceil_v16f32
+; CHECK: vrndscaleps $2, {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x02]
+ %res = call <16 x float> @llvm.ceil.v16f32(<16 x float> %a)
+ ret <16 x float> %res
+}
+declare <16 x float> @llvm.ceil.v16f32(<16 x float> %p)
+
+define <8 x double> @ceil_v8f64(<8 x double> %a) {
+; CHECK-LABEL: ceil_v8f64
+; CHECK: vrndscalepd $2, {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x02]
+ %res = call <8 x double> @llvm.ceil.v8f64(<8 x double> %a)
+ ret <8 x double> %res
+}
+declare <8 x double> @llvm.ceil.v8f64(<8 x double> %p)
+
+define <16 x float> @trunc_v16f32(<16 x float> %a) {
+; CHECK-LABEL: trunc_v16f32
+; CHECK: vrndscaleps $3, {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x03]
+ %res = call <16 x float> @llvm.trunc.v16f32(<16 x float> %a)
+ ret <16 x float> %res
+}
+declare <16 x float> @llvm.trunc.v16f32(<16 x float> %p)
+
+define <8 x double> @trunc_v8f64(<8 x double> %a) {
+; CHECK-LABEL: trunc_v8f64
+; CHECK: vrndscalepd $3, {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x03]
+ %res = call <8 x double> @llvm.trunc.v8f64(<8 x double> %a)
+ ret <8 x double> %res
+}
+declare <8 x double> @llvm.trunc.v8f64(<8 x double> %p)
+
+define <16 x float> @rint_v16f32(<16 x float> %a) {
+; CHECK-LABEL: rint_v16f32
+; CHECK: vrndscaleps $4, {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x04]
+ %res = call <16 x float> @llvm.rint.v16f32(<16 x float> %a)
+ ret <16 x float> %res
+}
+declare <16 x float> @llvm.rint.v16f32(<16 x float> %p)
+
+define <8 x double> @rint_v8f64(<8 x double> %a) {
+; CHECK-LABEL: rint_v8f64
+; CHECK: vrndscalepd $4, {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x04]
+ %res = call <8 x double> @llvm.rint.v8f64(<8 x double> %a)
+ ret <8 x double> %res
+}
+declare <8 x double> @llvm.rint.v8f64(<8 x double> %p)
+
+define <16 x float> @nearbyint_v16f32(<16 x float> %a) {
+; CHECK-LABEL: nearbyint_v16f32
+; CHECK: vrndscaleps $12, {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0c]
+ %res = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %a)
+ ret <16 x float> %res
+}
+declare <16 x float> @llvm.nearbyint.v16f32(<16 x float> %p)
+
+define <8 x double> @nearbyint_v8f64(<8 x double> %a) {
+; CHECK-LABEL: nearbyint_v8f64
+; CHECK: vrndscalepd $12, {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0c]
+ %res = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %a)
+ ret <8 x double> %res
+}
+declare <8 x double> @llvm.nearbyint.v8f64(<8 x double> %p)
+
+define double @nearbyint_f64(double %a) {
+; CHECK-LABEL: nearbyint_f64
+; CHECK: vrndscalesd $12, {{.*}}encoding: [0x62,0xf3,0xfd,0x08,0x0b,0xc0,0x0c]
+ %res = call double @llvm.nearbyint.f64(double %a)
+ ret double %res
+}
+declare double @llvm.nearbyint.f64(double %p)
+
+define float @floor_f32(float %a) {
+; CHECK-LABEL: floor_f32
+; CHECK: vrndscaless $1, {{.*}}encoding: [0x62,0xf3,0x7d,0x08,0x0a,0xc0,0x01]
+ %res = call float @llvm.floor.f32(float %a)
+ ret float %res
+}
+declare float @llvm.floor.f32(float %p)
+
+define float @floor_f32m(float* %aptr) {
+; CHECK-LABEL: floor_f32m
+; CHECK: vrndscaless $1, (%rdi), {{.*}}encoding: [0x62,0xf3,0x7d,0x08,0x0a,0x07,0x01]
+ %a = load float, float* %aptr, align 4
+ %res = call float @llvm.floor.f32(float %a)
+ ret float %res
+}
+
diff --git a/test/CodeGen/X86/avx512-scalar.ll b/test/CodeGen/X86/avx512-scalar.ll
new file mode 100644
index 000000000000..644fda415755
--- /dev/null
+++ b/test/CodeGen/X86/avx512-scalar.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s --check-prefix AVX512
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx --show-mc-encoding | FileCheck %s --check-prefix AVX
+
+; AVX512-LABEL: @test_fdiv
+; AVX512: vdivss %xmm{{.*}} ## encoding: [0x62
+; AVX-LABEL: @test_fdiv
+; AVX: vdivss %xmm{{.*}} ## encoding: [0xc5
+
+define float @test_fdiv(float %a, float %b) {
+ %c = fdiv float %a, %b
+ ret float %c
+}
+
+; AVX512-LABEL: @test_fsub
+; AVX512: vsubss %xmm{{.*}} ## encoding: [0x62
+; AVX-LABEL: @test_fsub
+; AVX: vsubss %xmm{{.*}} ## encoding: [0xc5
+
+define float @test_fsub(float %a, float %b) {
+ %c = fsub float %a, %b
+ ret float %c
+}
+
+; AVX512-LABEL: @test_fadd
+; AVX512: vaddsd %xmm{{.*}} ## encoding: [0x62
+; AVX-LABEL: @test_fadd
+; AVX: vaddsd %xmm{{.*}} ## encoding: [0xc5
+
+define double @test_fadd(double %a, double %b) {
+ %c = fadd double %a, %b
+ ret double %c
+}
+
+declare float @llvm.trunc.f32(float %Val)
+declare double @llvm.trunc.f64(double %Val)
+declare float @llvm.rint.f32(float %Val)
+declare double @llvm.rint.f64(double %Val)
+declare double @llvm.sqrt.f64(double %Val)
+declare float @llvm.sqrt.f32(float %Val)
+
+; AVX512-LABEL: @test_trunc
+; AVX512: vrndscaless
+; AVX-LABEL: @test_trunc
+; AVX: vroundss
+
+define float @test_trunc(float %a) {
+ %c = call float @llvm.trunc.f32(float %a)
+ ret float %c
+}
+
+; AVX512-LABEL: @test_sqrt
+; AVX512: vsqrtsd %xmm{{.*}} ## encoding: [0x62
+; AVX-LABEL: @test_sqrt
+; AVX: vsqrtsd %xmm{{.*}} ## encoding: [0xc5
+
+define double @test_sqrt(double %a) {
+ %c = call double @llvm.sqrt.f64(double %a)
+ ret double %c
+}
+
+; AVX512-LABEL: @test_rint
+; AVX512: vrndscaless
+; AVX-LABEL: @test_rint
+; AVX: vroundss
+
+define float @test_rint(float %a) {
+ %c = call float @llvm.rint.f32(float %a)
+ ret float %c
+}
+
+; AVX512-LABEL: @test_vmax
+; AVX512: vmaxss %xmm{{.*}} ## encoding: [0x62
+; AVX-LABEL: @test_vmax
+; AVX: vmaxss %xmm{{.*}} ## encoding: [0xc5
+
+define float @test_vmax(float %i, float %j) {
+ %cmp_res = fcmp ogt float %i, %j
+ %max = select i1 %cmp_res, float %i, float %j
+ ret float %max
+}
+
+; AVX512-LABEL: @test_mov
+; AVX512: vcmpltss %xmm{{.*}} ## encoding: [0x62
+; AVX-LABEL: @test_mov
+; AVX: vcmpltss %xmm{{.*}} ## encoding: [0xc5
+
+define float @test_mov(float %a, float %b, float %i, float %j) {
+ %cmp_res = fcmp ogt float %i, %j
+ %max = select i1 %cmp_res, float %b, float %a
+ ret float %max
+}
+
diff --git a/test/CodeGen/X86/avx512-select.ll b/test/CodeGen/X86/avx512-select.ll
index 0dbf286d3c5d..b92e6f62813c 100644
--- a/test/CodeGen/X86/avx512-select.ll
+++ b/test/CodeGen/X86/avx512-select.ll
@@ -50,8 +50,10 @@ define <16 x double> @select04(<16 x double> %a, <16 x double> %b) {
}
; CHECK-LABEL: select05
-; CHECK: kmovw %esi, %k0
-; CHECK-NEXT: kmovw %edi, %k1
+; CHECK: movzbl %sil, %eax
+; CHECK: kmovw %eax, %k0
+; CHECK: movzbl %dil, %eax
+; CHECK: kmovw %eax, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
define i8 @select05(i8 %a.0, i8 %m) {
@@ -63,8 +65,10 @@ define i8 @select05(i8 %a.0, i8 %m) {
}
; CHECK-LABEL: select06
-; CHECK: kmovw %esi, %k0
-; CHECK-NEXT: kmovw %edi, %k1
+; CHECK: movzbl %sil, %eax
+; CHECK: kmovw %eax, %k0
+; CHECK: movzbl %dil, %eax
+; CHECK: kmovw %eax, %k1
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
define i8 @select06(i8 %a.0, i8 %m) {
@@ -76,9 +80,12 @@ define i8 @select06(i8 %a.0, i8 %m) {
}
; CHECK-LABEL: select07
-; CHECK-DAG: kmovw %edx, %k0
-; CHECK-DAG: kmovw %edi, %k1
-; CHECK-DAG: kmovw %esi, %k2
+; CHECK-DAG: movzbl %dl, %eax
+; CHECK-DAG: kmovw %eax, %k0
+; CHECK-DAG: movzbl %dil, %eax
+; CHECK-DAG: kmovw %eax, %k1
+; CHECK-DAG: movzbl %sil, %eax
+; CHECK-DAG: kmovw %eax, %k2
; CHECK: kandw %k0, %k1, %k1
; CHECK-NEXT: knotw %k0, %k0
; CHECK-NEXT: kandw %k0, %k2, %k0
diff --git a/test/CodeGen/X86/avx512-shift.ll b/test/CodeGen/X86/avx512-shift.ll
index 8cdcf8ad062f..10883a5a9a62 100644
--- a/test/CodeGen/X86/avx512-shift.ll
+++ b/test/CodeGen/X86/avx512-shift.ll
@@ -1,4 +1,5 @@
;RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+;RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s
;CHECK-LABEL: shift_16_i32
;CHECK: vpsrld
@@ -24,6 +25,18 @@ define <8 x i64> @shift_8_i64(<8 x i64> %a) {
ret <8 x i64> %d;
}
+;SKX-LABEL: shift_4_i64
+;SKX: vpsrlq
+;SKX: vpsllq
+;SKX: vpsraq
+;SKX: ret
+define <4 x i64> @shift_4_i64(<4 x i64> %a) {
+ %b = lshr <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+ %c = shl <4 x i64> %b, <i64 12, i64 12, i64 12, i64 12>
+ %d = ashr <4 x i64> %c, <i64 12, i64 12, i64 12, i64 12>
+ ret <4 x i64> %d;
+}
+
; CHECK-LABEL: variable_shl4
; CHECK: vpsllvq %zmm
; CHECK: ret
@@ -72,11 +85,27 @@ define <8 x i64> @variable_sra2(<8 x i64> %x, <8 x i64> %y) {
ret <8 x i64> %k
}
+; SKX-LABEL: variable_sra3
+; SKX: vpsravq %ymm
+; SKX: ret
+define <4 x i64> @variable_sra3(<4 x i64> %x, <4 x i64> %y) {
+ %k = ashr <4 x i64> %x, %y
+ ret <4 x i64> %k
+}
+
+; SKX-LABEL: variable_sra4
+; SKX: vpsravw %xmm
+; SKX: ret
+define <8 x i16> @variable_sra4(<8 x i16> %x, <8 x i16> %y) {
+ %k = ashr <8 x i16> %x, %y
+ ret <8 x i16> %k
+}
+
; CHECK-LABEL: variable_sra01_load
; CHECK: vpsravd (%
; CHECK: ret
define <16 x i32> @variable_sra01_load(<16 x i32> %x, <16 x i32>* %y) {
- %y1 = load <16 x i32>* %y
+ %y1 = load <16 x i32>, <16 x i32>* %y
%k = ashr <16 x i32> %x, %y1
ret <16 x i32> %k
}
@@ -85,7 +114,7 @@ define <16 x i32> @variable_sra01_load(<16 x i32> %x, <16 x i32>* %y) {
; CHECK: vpsllvd (%
; CHECK: ret
define <16 x i32> @variable_shl1_load(<16 x i32> %x, <16 x i32>* %y) {
- %y1 = load <16 x i32>* %y
+ %y1 = load <16 x i32>, <16 x i32>* %y
%k = shl <16 x i32> %x, %y1
ret <16 x i32> %k
}
@@ -93,7 +122,7 @@ define <16 x i32> @variable_shl1_load(<16 x i32> %x, <16 x i32>* %y) {
; CHECK: vpsrlvd (%
; CHECK: ret
define <16 x i32> @variable_srl0_load(<16 x i32> %x, <16 x i32>* %y) {
- %y1 = load <16 x i32>* %y
+ %y1 = load <16 x i32>, <16 x i32>* %y
%k = lshr <16 x i32> %x, %y1
ret <16 x i32> %k
}
@@ -102,7 +131,7 @@ define <16 x i32> @variable_srl0_load(<16 x i32> %x, <16 x i32>* %y) {
; CHECK: vpsrlvq (%
; CHECK: ret
define <8 x i64> @variable_srl3_load(<8 x i64> %x, <8 x i64>* %y) {
- %y1 = load <8 x i64>* %y
+ %y1 = load <8 x i64>, <8 x i64>* %y
%k = lshr <8 x i64> %x, %y1
ret <8 x i64> %k
}
diff --git a/test/CodeGen/X86/avx512-trunc-ext.ll b/test/CodeGen/X86/avx512-trunc-ext.ll
index 91ef5d58f438..f25458972e42 100644
--- a/test/CodeGen/X86/avx512-trunc-ext.ll
+++ b/test/CodeGen/X86/avx512-trunc-ext.ll
@@ -1,126 +1,893 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s
-
-; CHECK-LABEL: trunc_16x32_to_16x8
-; CHECK: vpmovdb
-; CHECK: ret
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
+
+
+; KNL-LABEL: trunc_16x32_to_16x8
+; KNL: vpmovdb
+; KNL: ret
define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) nounwind readnone {
%x = trunc <16 x i32> %i to <16 x i8>
ret <16 x i8> %x
}
-; CHECK-LABEL: trunc_8x64_to_8x16
-; CHECK: vpmovqw
-; CHECK: ret
+; KNL-LABEL: trunc_8x64_to_8x16
+; KNL: vpmovqw
+; KNL: ret
define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) nounwind readnone {
%x = trunc <8 x i64> %i to <8 x i16>
ret <8 x i16> %x
}
+;SKX-LABEL: zext_8x8mem_to_8x16:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm0, %k1
+;SKX-NEXT: vpmovzxbw (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = zext <8 x i8> %a to <8 x i16>
+ %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
+ ret <8 x i16> %ret
+}
+
+;SKX-LABEL: sext_8x8mem_to_8x16:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm0, %k1
+;SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = sext <8 x i8> %a to <8 x i16>
+ %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
+ ret <8 x i16> %ret
+}
+
+;SKX-LABEL: zext_16x8mem_to_16x16:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %xmm0, %k1
+;SKX-NEXT: vpmovzxbw (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
+ %a = load <16 x i8>,<16 x i8> *%i,align 1
+ %x = zext <16 x i8> %a to <16 x i16>
+ %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
+ ret <16 x i16> %ret
+}
+
+;SKX-LABEL: sext_16x8mem_to_16x16:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %xmm0, %k1
+;SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
+ %a = load <16 x i8>,<16 x i8> *%i,align 1
+ %x = sext <16 x i8> %a to <16 x i16>
+ %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
+ ret <16 x i16> %ret
+}
+
+;SKX-LABEL: zext_16x8_to_16x16:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovzxbw %xmm0, %ymm0
+;SKX-NEXT: retq
+define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
+ %x = zext <16 x i8> %a to <16 x i16>
+ ret <16 x i16> %x
+}
+
+;SKX-LABEL: zext_16x8_to_16x16_mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %xmm1, %k1
+;SKX-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
+ %x = zext <16 x i8> %a to <16 x i16>
+ %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
+ ret <16 x i16> %ret
+}
+
+;SKX-LABEL: sext_16x8_to_16x16:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxbw %xmm0, %ymm0
+;SKX-NEXT: retq
+define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
+ %x = sext <16 x i8> %a to <16 x i16>
+ ret <16 x i16> %x
+}
+
+;SKX-LABEL: sext_16x8_to_16x16_mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %xmm1, %k1
+;SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
+ %x = sext <16 x i8> %a to <16 x i16>
+ %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
+ ret <16 x i16> %ret
+}
+
+;SKX-LABEL: zext_32x8mem_to_32x16:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %ymm0, %k1
+;SKX-NEXT: vpmovzxbw (%rdi), %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
+ %a = load <32 x i8>,<32 x i8> *%i,align 1
+ %x = zext <32 x i8> %a to <32 x i16>
+ %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
+ ret <32 x i16> %ret
+}
+
+;SKX-LABEL: sext_32x8mem_to_32x16:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %ymm0, %k1
+;SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
+ %a = load <32 x i8>,<32 x i8> *%i,align 1
+ %x = sext <32 x i8> %a to <32 x i16>
+ %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
+ ret <32 x i16> %ret
+}
+
+;SKX-LABEL: zext_32x8_to_32x16:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovzxbw %ymm0, %zmm0
+;SKX-NEXT: retq
+define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
+ %x = zext <32 x i8> %a to <32 x i16>
+ ret <32 x i16> %x
+}
+
+;SKX-LABEL: zext_32x8_to_32x16_mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %ymm1, %k1
+;SKX-NEXT: vpmovzxbw %ymm0, %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
+ %x = zext <32 x i8> %a to <32 x i16>
+ %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
+ ret <32 x i16> %ret
+}
+
+;SKX-LABEL: sext_32x8_to_32x16:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxbw %ymm0, %zmm0
+;SKX-NEXT: retq
+define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
+ %x = sext <32 x i8> %a to <32 x i16>
+ ret <32 x i16> %x
+}
+
+;SKX-LABEL: sext_32x8_to_32x16_mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %ymm1, %k1
+;SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
+ %x = sext <32 x i8> %a to <32 x i16>
+ %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
+ ret <32 x i16> %ret
+}
+
+;SKX-LABEL: zext_4x8mem_to_4x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm0, %k1
+;SKX-NEXT: vpmovzxbd (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
+ %a = load <4 x i8>,<4 x i8> *%i,align 1
+ %x = zext <4 x i8> %a to <4 x i32>
+ %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+;SKX-LABEL: sext_4x8mem_to_4x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm0, %k1
+;SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
+ %a = load <4 x i8>,<4 x i8> *%i,align 1
+ %x = sext <4 x i8> %a to <4 x i32>
+ %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+;SKX-LABEL: zext_8x8mem_to_8x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm0, %k1
+;SKX-NEXT: vpmovzxbd (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = zext <8 x i8> %a to <8 x i32>
+ %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+ ret <8 x i32> %ret
+}
+
+;SKX-LABEL: sext_8x8mem_to_8x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm0, %k1
+;SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = sext <8 x i8> %a to <8 x i32>
+ %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+ ret <8 x i32> %ret
+}
+
+;KNL-LABEL: zext_16x8mem_to_16x32:
+;KNL: vpmovzxbd (%rdi), %zmm0 {%k1} {z}
+;KNL-NEXT: retq
+define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
+ %a = load <16 x i8>,<16 x i8> *%i,align 1
+ %x = zext <16 x i8> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
+
+;KNL-LABEL: sext_16x8mem_to_16x32:
+;KNL: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
+;KNL-NEXT: retq
+define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
+ %a = load <16 x i8>,<16 x i8> *%i,align 1
+ %x = sext <16 x i8> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
+
+;KNL-LABEL: zext_16x8_to_16x32_mask:
+;KNL: vpmovzxbd %xmm0, %zmm0 {%k1} {z}
+;KNL-NEXT: retq
+define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
+ %x = zext <16 x i8> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
+
+;KNL-LABEL: sext_16x8_to_16x32_mask:
+;KNL: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
+;KNL-NEXT: retq
+define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
+ %x = sext <16 x i8> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
-; CHECK-LABEL: zext_16x8_to_16x32
-; CHECK: vpmovzxbd {{.*}}%zmm
-; CHECK: ret
+; KNL-LABEL: zext_16x8_to_16x32
+; KNL: vpmovzxbd {{.*}}%zmm
+; KNL: ret
define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
%x = zext <16 x i8> %i to <16 x i32>
ret <16 x i32> %x
}
-; CHECK-LABEL: sext_16x8_to_16x32
-; CHECK: vpmovsxbd {{.*}}%zmm
-; CHECK: ret
+; KNL-LABEL: sext_16x8_to_16x32
+; KNL: vpmovsxbd {{.*}}%zmm
+; KNL: ret
define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
%x = sext <16 x i8> %i to <16 x i32>
ret <16 x i32> %x
}
+;SKX-LABEL: zext_2x8mem_to_2x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovq2m %xmm0, %k1
+;SKX-NEXT: vpmovzxbq (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
+ %a = load <2 x i8>,<2 x i8> *%i,align 1
+ %x = zext <2 x i8> %a to <2 x i64>
+ %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+ ret <2 x i64> %ret
+}
+;SKX-LABEL: sext_2x8mem_to_2x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovq2m %xmm0, %k1
+;SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
+ %a = load <2 x i8>,<2 x i8> *%i,align 1
+ %x = sext <2 x i8> %a to <2 x i64>
+ %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+ ret <2 x i64> %ret
+}
+;SKX-LABEL: sext_2x8mem_to_2x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxbq (%rdi), %xmm0
+;SKX-NEXT: retq
+define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
+ %a = load <2 x i8>,<2 x i8> *%i,align 1
+ %x = sext <2 x i8> %a to <2 x i64>
+ ret <2 x i64> %x
+}
+
+;SKX-LABEL: zext_4x8mem_to_4x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm0, %k1
+;SKX-NEXT: vpmovzxbq (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
+ %a = load <4 x i8>,<4 x i8> *%i,align 1
+ %x = zext <4 x i8> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+;SKX-LABEL: sext_4x8mem_to_4x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm0, %k1
+;SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
+ %a = load <4 x i8>,<4 x i8> *%i,align 1
+ %x = sext <4 x i8> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+;SKX-LABEL: sext_4x8mem_to_4x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxbq (%rdi), %ymm0
+;SKX-NEXT: retq
+define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
+ %a = load <4 x i8>,<4 x i8> *%i,align 1
+ %x = sext <4 x i8> %a to <4 x i64>
+ ret <4 x i64> %x
+}
+
+;KNL-LABEL: zext_8x8mem_to_8x64:
+;KNL: vpmovzxbq (%rdi), %zmm0 {%k1} {z}
+;KNL-NEXT: retq
+define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = zext <8 x i8> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+;KNL-LABEL: sext_8x8mem_to_8x64mask:
+;KNL: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
+;KNL-NEXT: retq
+define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = sext <8 x i8> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+;KNL-LABEL: sext_8x8mem_to_8x64:
+;KNL: vpmovsxbq (%rdi), %zmm0
+;KNL-NEXT: retq
+define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = sext <8 x i8> %a to <8 x i64>
+ ret <8 x i64> %x
+}
+
+;SKX-LABEL: zext_4x16mem_to_4x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm0, %k1
+;SKX-NEXT: vpmovzxwd (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
+ %a = load <4 x i16>,<4 x i16> *%i,align 1
+ %x = zext <4 x i16> %a to <4 x i32>
+ %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+;SKX-LABEL: sext_4x16mem_to_4x32mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm0, %k1
+;SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
+ %a = load <4 x i16>,<4 x i16> *%i,align 1
+ %x = sext <4 x i16> %a to <4 x i32>
+ %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+;SKX-LABEL: sext_4x16mem_to_4x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxwd (%rdi), %xmm0
+;SKX-NEXT: retq
+define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
+ %a = load <4 x i16>,<4 x i16> *%i,align 1
+ %x = sext <4 x i16> %a to <4 x i32>
+ ret <4 x i32> %x
+}
+
+
+;SKX-LABEL: zext_8x16mem_to_8x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm0, %k1
+;SKX-NEXT: vpmovzxwd (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i16>,<8 x i16> *%i,align 1
+ %x = zext <8 x i16> %a to <8 x i32>
+ %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+ ret <8 x i32> %ret
+}
+
+;SKX-LABEL: sext_8x16mem_to_8x32mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm0, %k1
+;SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i16>,<8 x i16> *%i,align 1
+ %x = sext <8 x i16> %a to <8 x i32>
+ %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+ ret <8 x i32> %ret
+}
+
+;SKX-LABEL: sext_8x16mem_to_8x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxwd (%rdi), %ymm0
+;SKX-NEXT: retq
+define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
+ %a = load <8 x i16>,<8 x i16> *%i,align 1
+ %x = sext <8 x i16> %a to <8 x i32>
+ ret <8 x i32> %x
+}
+
+;SKX-LABEL: zext_8x16_to_8x32mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm1, %k1
+;SKX-NEXT: vpmovzxwd %xmm0, %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
+ %x = zext <8 x i16> %a to <8 x i32>
+ %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+ ret <8 x i32> %ret
+}
+
+;SKX-LABEL: zext_8x16_to_8x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovzxwd %xmm0, %ymm0
+;SKX-NEXT: retq
+define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
+ %x = zext <8 x i16> %a to <8 x i32>
+ ret <8 x i32> %x
+}
+
+;SKX-LABEL: zext_16x16mem_to_16x32:
+;KNL-LABEL: zext_16x16mem_to_16x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %xmm0, %k1
+;SKX-NEXT: vpmovzxwd (%rdi), %zmm0 {%k1} {z}
+;KNL: vpmovzxwd (%rdi), %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
+ %a = load <16 x i16>,<16 x i16> *%i,align 1
+ %x = zext <16 x i16> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
-; CHECK-LABEL: zext_16x16_to_16x32
-; CHECK: vpmovzxwd {{.*}}%zmm
-; CHECK: ret
-define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %i) nounwind readnone {
- %x = zext <16 x i16> %i to <16 x i32>
+;SKX-LABEL: sext_16x16mem_to_16x32mask:
+;KNL-LABEL: sext_16x16mem_to_16x32mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %xmm0, %k1
+;SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
+;KNL: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
+ %a = load <16 x i16>,<16 x i16> *%i,align 1
+ %x = sext <16 x i16> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
+
+;SKX-LABEL: sext_16x16mem_to_16x32:
+;KNL-LABEL: sext_16x16mem_to_16x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxwd (%rdi), %zmm0
+;KNL: vpmovsxwd (%rdi), %zmm0
+;SKX-NEXT: retq
+define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
+ %a = load <16 x i16>,<16 x i16> *%i,align 1
+ %x = sext <16 x i16> %a to <16 x i32>
ret <16 x i32> %x
}
+;SKX-LABEL: zext_16x16_to_16x32mask:
+;KNL-LABEL: zext_16x16_to_16x32mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %xmm1, %k1
+;SKX-NEXT: vpmovzxwd %ymm0, %zmm0 {%k1} {z}
+;KNL: vpmovzxwd %ymm0, %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
+ %x = zext <16 x i16> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
+
+;SKX-LABEL: zext_16x16_to_16x32:
+;KNL-LABEL: zext_16x16_to_16x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovzxwd %ymm0, %zmm0
+;KNL: vpmovzxwd %ymm0, %zmm0
+;SKX-NEXT: retq
+define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
+ %x = zext <16 x i16> %a to <16 x i32>
+ ret <16 x i32> %x
+}
+
+;SKX-LABEL: zext_2x16mem_to_2x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovq2m %xmm0, %k1
+;SKX-NEXT: vpmovzxwq (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
+ %a = load <2 x i16>,<2 x i16> *%i,align 1
+ %x = zext <2 x i16> %a to <2 x i64>
+ %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+ ret <2 x i64> %ret
+}
+
+;SKX-LABEL: sext_2x16mem_to_2x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovq2m %xmm0, %k1
+;SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
+ %a = load <2 x i16>,<2 x i16> *%i,align 1
+ %x = sext <2 x i16> %a to <2 x i64>
+ %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+ ret <2 x i64> %ret
+}
+
+;SKX-LABEL: sext_2x16mem_to_2x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxwq (%rdi), %xmm0
+;SKX-NEXT: retq
+define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
+ %a = load <2 x i16>,<2 x i16> *%i,align 1
+ %x = sext <2 x i16> %a to <2 x i64>
+ ret <2 x i64> %x
+}
+
+;SKX-LABEL: zext_4x16mem_to_4x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm0, %k1
+;SKX-NEXT: vpmovzxwq (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
+ %a = load <4 x i16>,<4 x i16> *%i,align 1
+ %x = zext <4 x i16> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+;SKX-LABEL: sext_4x16mem_to_4x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm0, %k1
+;SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
+ %a = load <4 x i16>,<4 x i16> *%i,align 1
+ %x = sext <4 x i16> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+;SKX-LABEL: sext_4x16mem_to_4x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxwq (%rdi), %ymm0
+;SKX-NEXT: retq
+define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
+ %a = load <4 x i16>,<4 x i16> *%i,align 1
+ %x = sext <4 x i16> %a to <4 x i64>
+ ret <4 x i64> %x
+}
+
+;SKX-LABEL: zext_8x16mem_to_8x64:
+;KNL-LABEL: zext_8x16mem_to_8x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm0, %k1
+;SKX-NEXT: vpmovzxwq (%rdi), %zmm0 {%k1} {z}
+;KNL: vpmovzxwq (%rdi), %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i16>,<8 x i16> *%i,align 1
+ %x = zext <8 x i16> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+;SKX-LABEL: sext_8x16mem_to_8x64mask:
+;KNL-LABEL: sext_8x16mem_to_8x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm0, %k1
+;SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
+;KNL: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i16>,<8 x i16> *%i,align 1
+ %x = sext <8 x i16> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+;SKX-LABEL: sext_8x16mem_to_8x64:
+;KNL-LABEL: sext_8x16mem_to_8x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxwq (%rdi), %zmm0
+;KNL: vpmovsxwq (%rdi), %zmm0
+;SKX-NEXT: retq
+define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
+ %a = load <8 x i16>,<8 x i16> *%i,align 1
+ %x = sext <8 x i16> %a to <8 x i64>
+ ret <8 x i64> %x
+}
+
+;SKX-LABEL: zext_8x16_to_8x64mask:
+;KNL-LABEL: zext_8x16_to_8x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm1, %k1
+;SKX-NEXT: vpmovzxwq %xmm0, %zmm0 {%k1} {z}
+;KNL: vpmovzxwq %xmm0, %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
+ %x = zext <8 x i16> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+;SKX-LABEL: zext_8x16_to_8x64:
+;KNL-LABEL: zext_8x16_to_8x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovzxwq %xmm0, %zmm0
+;KNL: vpmovzxwq %xmm0, %zmm0
+;SKX-NEXT: retq
+; KNL: ret
+define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
+ %ret = zext <8 x i16> %a to <8 x i64>
+ ret <8 x i64> %ret
+}
+
+;SKX-LABEL: zext_2x32mem_to_2x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovq2m %xmm0, %k1
+;SKX-NEXT: vpmovzxdq (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
+ %a = load <2 x i32>,<2 x i32> *%i,align 1
+ %x = zext <2 x i32> %a to <2 x i64>
+ %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+ ret <2 x i64> %ret
+}
+
+;SKX-LABEL: sext_2x32mem_to_2x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovq2m %xmm0, %k1
+;SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
+ %a = load <2 x i32>,<2 x i32> *%i,align 1
+ %x = sext <2 x i32> %a to <2 x i64>
+ %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+ ret <2 x i64> %ret
+}
+
+;SKX-LABEL: sext_2x32mem_to_2x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxdq (%rdi), %xmm0
+;SKX-NEXT: retq
+define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
+ %a = load <2 x i32>,<2 x i32> *%i,align 1
+ %x = sext <2 x i32> %a to <2 x i64>
+ ret <2 x i64> %x
+}
-; CHECK-LABEL: zext_8x16_to_8x64
-; CHECK: vpmovzxwq
-; CHECK: ret
-define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %i) nounwind readnone {
- %x = zext <8 x i16> %i to <8 x i64>
+;SKX-LABEL: zext_4x32mem_to_4x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm0, %k1
+;SKX-NEXT: vpmovzxdq (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
+ %a = load <4 x i32>,<4 x i32> *%i,align 1
+ %x = zext <4 x i32> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+;SKX-LABEL: sext_4x32mem_to_4x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm0, %k1
+;SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
+ %a = load <4 x i32>,<4 x i32> *%i,align 1
+ %x = sext <4 x i32> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+;SKX-LABEL: sext_4x32mem_to_4x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxdq (%rdi), %ymm0
+;SKX-NEXT: retq
+define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
+ %a = load <4 x i32>,<4 x i32> *%i,align 1
+ %x = sext <4 x i32> %a to <4 x i64>
+ ret <4 x i64> %x
+}
+
+;SKX-LABEL: sext_4x32_to_4x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxdq %xmm0, %ymm0
+;SKX-NEXT: retq
+define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
+ %x = sext <4 x i32> %a to <4 x i64>
+ ret <4 x i64> %x
+}
+
+;SKX-LABEL: zext_4x32_to_4x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm1, %k1
+;SKX-NEXT: vpmovzxdq %xmm0, %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
+ %x = zext <4 x i32> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+;SKX-LABEL: zext_8x32mem_to_8x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm0, %k1
+;SKX-NEXT: vpmovzxdq (%rdi), %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i32>,<8 x i32> *%i,align 1
+ %x = zext <8 x i32> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+;SKX-LABEL: sext_8x32mem_to_8x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm0, %k1
+;SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i32>,<8 x i32> *%i,align 1
+ %x = sext <8 x i32> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+;SKX-LABEL: sext_8x32mem_to_8x64:
+;KNL-LABEL: sext_8x32mem_to_8x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxdq (%rdi), %zmm0
+;KNL: vpmovsxdq (%rdi), %zmm0
+;SKX-NEXT: retq
+define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
+ %a = load <8 x i32>,<8 x i32> *%i,align 1
+ %x = sext <8 x i32> %a to <8 x i64>
ret <8 x i64> %x
}
-;CHECK-LABEL: fptrunc_test
-;CHECK: vcvtpd2ps {{.*}}%zmm
-;CHECK: ret
+;SKX-LABEL: sext_8x32_to_8x64:
+;KNL-LABEL: sext_8x32_to_8x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxdq %ymm0, %zmm0
+;KNL: vpmovsxdq %ymm0, %zmm0
+;SKX-NEXT: retq
+define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
+ %x = sext <8 x i32> %a to <8 x i64>
+ ret <8 x i64> %x
+}
+
+;SKX-LABEL: zext_8x32_to_8x64mask:
+;KNL-LABEL: zext_8x32_to_8x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm1, %k1
+;SKX-NEXT: vpmovzxdq %ymm0, %zmm0 {%k1} {z}
+;KNL: vpmovzxdq %ymm0, %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
+ %x = zext <8 x i32> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+;KNL-LABEL: fptrunc_test
+;KNL: vcvtpd2ps {{.*}}%zmm
+;KNL: ret
define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
%b = fptrunc <8 x double> %a to <8 x float>
ret <8 x float> %b
}
-;CHECK-LABEL: fpext_test
-;CHECK: vcvtps2pd {{.*}}%zmm
-;CHECK: ret
+;KNL-LABEL: fpext_test
+;KNL: vcvtps2pd {{.*}}%zmm
+;KNL: ret
define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
%b = fpext <8 x float> %a to <8 x double>
ret <8 x double> %b
}
-; CHECK-LABEL: zext_16i1_to_16xi32
-; CHECK: vpbroadcastd LCP{{.*}}(%rip), %zmm0 {%k1} {z}
-; CHECK: ret
+; KNL-LABEL: zext_16i1_to_16xi32
+; KNL: vpbroadcastd LCP{{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL: ret
define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
%a = bitcast i16 %b to <16 x i1>
%c = zext <16 x i1> %a to <16 x i32>
ret <16 x i32> %c
}
-; CHECK-LABEL: zext_8i1_to_8xi64
-; CHECK: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
-; CHECK: ret
+; KNL-LABEL: zext_8i1_to_8xi64
+; KNL: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL: ret
define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
%a = bitcast i8 %b to <8 x i1>
%c = zext <8 x i1> %a to <8 x i64>
ret <8 x i64> %c
}
-; CHECK-LABEL: trunc_16i8_to_16i1
-; CHECK: vpmovsxbd
-; CHECK: vpandd
-; CHECK: vptestmd
-; CHECK: ret
+; KNL-LABEL: trunc_16i8_to_16i1
+; KNL: vpmovsxbd
+; KNL: vpandd
+; KNL: vptestmd
+; KNL: ret
+; SKX-LABEL: trunc_16i8_to_16i1
+; SKX: vpmovb2m %xmm
define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
%mask_b = trunc <16 x i8>%a to <16 x i1>
%mask = bitcast <16 x i1> %mask_b to i16
ret i16 %mask
}
-; CHECK-LABEL: trunc_16i32_to_16i1
-; CHECK: vpandd
-; CHECK: vptestmd
-; CHECK: ret
+; KNL-LABEL: trunc_16i32_to_16i1
+; KNL: vpandd
+; KNL: vptestmd
+; KNL: ret
+; SKX-LABEL: trunc_16i32_to_16i1
+; SKX: vpmovd2m %zmm
define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
%mask_b = trunc <16 x i32>%a to <16 x i1>
%mask = bitcast <16 x i1> %mask_b to i16
ret i16 %mask
}
-; CHECK-LABEL: trunc_8i16_to_8i1
-; CHECK: vpmovsxwq
-; CHECK: vpandq LCP{{.*}}(%rip){1to8}
-; CHECK: vptestmq
-; CHECK: ret
+; SKX-LABEL: trunc_4i32_to_4i1
+; SKX: vpmovd2m %xmm
+; SKX: kandw
+; SKX: vpmovm2d
+define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
+ %mask_a = trunc <4 x i32>%a to <4 x i1>
+ %mask_b = trunc <4 x i32>%b to <4 x i1>
+ %a_and_b = and <4 x i1>%mask_a, %mask_b
+ %res = sext <4 x i1>%a_and_b to <4 x i32>
+ ret <4 x i32>%res
+}
+
+; KNL-LABEL: trunc_8i16_to_8i1
+; KNL: vpmovsxwq
+; KNL: vpandq LCP{{.*}}(%rip){1to8}
+; KNL: vptestmq
+; KNL: ret
+
+; SKX-LABEL: trunc_8i16_to_8i1
+; SKX: vpmovw2m %xmm
define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
%mask_b = trunc <8 x i16>%a to <8 x i1>
%mask = bitcast <8 x i1> %mask_b to i8
ret i8 %mask
}
-; CHECK-LABEL: sext_8i1_8i32
-; CHECK: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-LABEL: sext_8i1_8i32
+; KNL: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
; SKX: vpmovm2d
-; CHECK: ret
+; KNL: ret
define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
%x = icmp slt <8 x i32> %a1, %a2
%x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
@@ -128,19 +895,18 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
ret <8 x i32> %y
}
-; CHECK-LABEL: trunc_v16i32_to_v16i16
-; CHECK: vpmovdw
-; CHECK: ret
+; KNL-LABEL: trunc_v16i32_to_v16i16
+; KNL: vpmovdw
+; KNL: ret
define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) {
%1 = trunc <16 x i32> %x to <16 x i16>
ret <16 x i16> %1
}
-; CHECK-LABEL: trunc_i32_to_i1
-; CHECK: testb
-; CHECK: setne
-; CKECK: orl
-; CHECK: ret
+; KNL-LABEL: trunc_i32_to_i1
+; KNL: movw $-4, %ax
+; KNL: kmovw %eax, %k1
+; KNL: korw
define i16 @trunc_i32_to_i1(i32 %a) {
%a_i = trunc i32 %a to i1
%maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
@@ -148,29 +914,48 @@ define i16 @trunc_i32_to_i1(i32 %a) {
ret i16 %res
}
-; CHECK-LABEL: sext_8i1_8i16
+; KNL-LABEL: sext_8i1_8i16
; SKX: vpmovm2w
-; CHECK: ret
+; KNL: ret
define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
%x = icmp slt <8 x i32> %a1, %a2
%y = sext <8 x i1> %x to <8 x i16>
ret <8 x i16> %y
}
-; CHECK-LABEL: sext_16i1_16i32
+; KNL-LABEL: sext_16i1_16i32
; SKX: vpmovm2d
-; CHECK: ret
+; KNL: ret
define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
%x = icmp slt <16 x i32> %a1, %a2
%y = sext <16 x i1> %x to <16 x i32>
ret <16 x i32> %y
}
-; CHECK-LABEL: sext_8i1_8i64
+; KNL-LABEL: sext_8i1_8i64
; SKX: vpmovm2q
-; CHECK: ret
+; KNL: ret
define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
%x = icmp slt <8 x i32> %a1, %a2
%y = sext <8 x i1> %x to <8 x i64>
ret <8 x i64> %y
}
+
+; KNL-LABEL: @extload_v8i64
+; KNL: vpmovsxbq
+define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
+ %sign_load = load <8 x i8>, <8 x i8>* %a
+ %c = sext <8 x i8> %sign_load to <8 x i64>
+ store <8 x i64> %c, <8 x i64>* %res
+ ret void
+}
+
+;SKX-LABEL: test21:
+;SKX: vmovdqu16 %zmm0, %zmm3 {%k1}
+;SKX-NEXT: kshiftrq $32, %k1, %k1
+;SKX-NEXT: vmovdqu16 %zmm1, %zmm2 {%k1}
+define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
+ %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer
+ ret <64 x i16> %ret
+}
+
diff --git a/test/CodeGen/X86/avx512-vbroadcast.ll b/test/CodeGen/X86/avx512-vbroadcast.ll
index 5bb82338d087..854f1019f0f8 100644
--- a/test/CodeGen/X86/avx512-vbroadcast.ll
+++ b/test/CodeGen/X86/avx512-vbroadcast.ll
@@ -64,7 +64,7 @@ define <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) {
;CHECK: vbroadcastss (%{{.*}}, %zmm
;CHECK: ret
define <16 x float> @_ss16xfloat_load(float* %a.ptr) {
- %a = load float* %a.ptr
+ %a = load float, float* %a.ptr
%b = insertelement <16 x float> undef, float %a, i32 0
%c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
ret <16 x float> %c
@@ -74,7 +74,7 @@ define <16 x float> @_ss16xfloat_load(float* %a.ptr) {
;CHECK: vbroadcastss (%rdi), %zmm0 {%k1}
;CHECK: ret
define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) {
- %a = load float* %a.ptr
+ %a = load float, float* %a.ptr
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%b = insertelement <16 x float> undef, float %a, i32 0
%c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
@@ -86,7 +86,7 @@ define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16
;CHECK: vbroadcastss (%rdi), %zmm0 {%k1} {z}
;CHECK: ret
define <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) {
- %a = load float* %a.ptr
+ %a = load float, float* %a.ptr
%mask = icmp ne <16 x i32> %mask1, zeroinitializer
%b = insertelement <16 x float> undef, float %a, i32 0
%c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
@@ -130,7 +130,7 @@ define <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) {
;CHECK: vbroadcastsd (%rdi), %zmm
;CHECK: ret
define <8 x double> @_sd8xdouble_load(double* %a.ptr) {
- %a = load double* %a.ptr
+ %a = load double, double* %a.ptr
%b = insertelement <8 x double> undef, double %a, i32 0
%c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
ret <8 x double> %c
@@ -140,7 +140,7 @@ define <8 x double> @_sd8xdouble_load(double* %a.ptr) {
;CHECK: vbroadcastsd (%rdi), %zmm0 {%k1}
;CHECK: ret
define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) {
- %a = load double* %a.ptr
+ %a = load double, double* %a.ptr
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%b = insertelement <8 x double> undef, double %a, i32 0
%c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
@@ -152,7 +152,7 @@ define <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1)
; CHECK-LABEL: _sd8xdouble_maskz_load:
; CHECK: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
; CHECK: ret
- %a = load double* %a.ptr
+ %a = load double, double* %a.ptr
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%b = insertelement <8 x double> undef, double %a, i32 0
%c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
@@ -300,3 +300,31 @@ entry:
%vecinit7.i = insertelement <8 x double> %vecinit6.i, double %0, i32 7
ret <8 x double> %vecinit7.i
}
+
+; CHECK-LABEL: test1
+; CHECK: vbroadcastss
+define <16 x float> @test1(<8 x float>%a) {
+ %res = shufflevector <8 x float> %a, <8 x float> undef, <16 x i32> zeroinitializer
+ ret <16 x float>%res
+}
+
+; CHECK-LABEL: test2
+; CHECK: vbroadcastsd
+define <8 x double> @test2(<4 x double>%a) {
+ %res = shufflevector <4 x double> %a, <4 x double> undef, <8 x i32> zeroinitializer
+ ret <8 x double>%res
+}
+
+; CHECK-LABEL: test3
+; CHECK: vpbroadcastd
+define <16 x i32> @test3(<8 x i32>%a) {
+ %res = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> zeroinitializer
+ ret <16 x i32>%res
+}
+
+; CHECK-LABEL: test4
+; CHECK: vpbroadcastq
+define <8 x i64> @test4(<4 x i64>%a) {
+ %res = shufflevector <4 x i64> %a, <4 x i64> undef, <8 x i32> zeroinitializer
+ ret <8 x i64>%res
+}
diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll
index b16f5c9663c6..e1f6276c6ef4 100644
--- a/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -1,135 +1,158 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
-; CHECK-LABEL: test1:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1
-; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test1:
+; KNL: ## BB#0:
+; KNL-NEXT: vcmpleps %zmm1, %zmm0, %k1
+; KNL-NEXT: vmovaps %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask = fcmp ole <16 x float> %x, %y
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
ret <16 x float> %max
}
define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
-; CHECK-LABEL: test2:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1
-; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test2:
+; KNL: ## BB#0:
+; KNL-NEXT: vcmplepd %zmm1, %zmm0, %k1
+; KNL-NEXT: vmovapd %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask = fcmp ole <8 x double> %x, %y
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
ret <8 x double> %max
}
define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
-; CHECK-LABEL: test3:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
- %y = load <16 x i32>* %yp, align 4
+; KNL-LABEL: test3:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
+ %y = load <16 x i32>, <16 x i32>* %yp, align 4
%mask = icmp eq <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
ret <16 x i32> %max
}
define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
-; CHECK-LABEL: test4_unsigned:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test4_unsigned:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpnltud %zmm1, %zmm0, %k1
+; KNL-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask = icmp uge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
ret <16 x i32> %max
}
define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
-; CHECK-LABEL: test5:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test5:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask = icmp eq <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
ret <8 x i64> %max
}
define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
-; CHECK-LABEL: test6_unsigned:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
-; CHECK-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test6_unsigned:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
+; KNL-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask = icmp ugt <8 x i64> %x, %y
%max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
ret <8 x i64> %max
}
define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: test7:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
-; CHECK-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test7:
+; KNL: ## BB#0:
+; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2
+; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; KNL-NEXT: retq
+; SKX-LABEL: test7:
+; SKX: ## BB#0:
+; SKX: vxorps %xmm2, %xmm2, %xmm2
+; SKX: vcmpltps %xmm2, %xmm0, %k1
+; SKX: vmovaps %xmm0, %xmm1 {%k1}
+; SKX: vmovaps %zmm1, %zmm0
+; SKX: retq
+
%mask = fcmp olt <4 x float> %a, zeroinitializer
%c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
ret <4 x float>%c
}
define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: test8:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2
-; CHECK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test8:
+; KNL: ## BB#0:
+; KNL-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2
+; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; KNL-NEXT: retq
+; SKX-LABEL: test8:
+; SKX: ## BB#0:
+; SKX: vxorpd %xmm2, %xmm2, %xmm2
+; SKX: vcmpltpd %xmm2, %xmm0, %k1
+; SKX: vmovapd %xmm0, %xmm1 {%k1}
+; SKX: vmovaps %zmm1, %zmm0
+; SKX: retq
%mask = fcmp olt <2 x double> %a, zeroinitializer
%c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
ret <2 x double>%c
}
define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
-; CHECK-LABEL: test9:
-; CHECK: ## BB#0:
-; CHECK-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
-; CHECK-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
-; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
-; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
-; CHECK-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
-; CHECK-NEXT: retq
+; KNL-LABEL: test9:
+; KNL: ## BB#0:
+; KNL-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
+; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
+; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
+; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
+; KNL-NEXT: retq
%mask = icmp eq <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
ret <8 x i32> %max
}
define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
-; CHECK-LABEL: test10:
-; CHECK: ## BB#0:
-; CHECK-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
-; CHECK-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
-; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
-; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
-; CHECK-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
-; CHECK-NEXT: retq
+; KNL-LABEL: test10:
+; KNL: ## BB#0:
+; KNL-NEXT: ## kill: YMM1<def> YMM1<kill> ZMM1<def>
+; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<def>
+; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1
+; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
+; KNL-NEXT: ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
+; KNL-NEXT: retq
+; SKX-LABEL: test10:
+; SKX: ## BB#0:
+; SKX: vcmpeqps %ymm1, %ymm0, %k1
+; SKX: vmovaps %ymm0, %ymm1 {%k1}
+; SKX: vmovaps %zmm1, %zmm0
+; SKX: retq
+
%mask = fcmp oeq <8 x float> %x, %y
%max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
ret <8 x float> %max
}
define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
-; CHECK-LABEL: test11_unsigned:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test11_unsigned:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0
+; KNL-NEXT: retq
%mask = icmp ugt <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
ret <8 x i32> %max
@@ -137,25 +160,25 @@ define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
-; CHECK-LABEL: test12:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpeqq %zmm2, %zmm0, %k0
-; CHECK-NEXT: vpcmpeqq %zmm3, %zmm1, %k1
-; CHECK-NEXT: kunpckbw %k0, %k1, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: ## kill: AX<def> AX<kill> EAX<kill>
-; CHECK-NEXT: retq
+; KNL-LABEL: test12:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpeqq %zmm2, %zmm0, %k0
+; KNL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1
+; KNL-NEXT: kunpckbw %k0, %k1, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: ## kill: AX<def> AX<kill> EAX<kill>
+; KNL-NEXT: retq
%res = icmp eq <16 x i64> %a, %b
%res1 = bitcast <16 x i1> %res to i16
ret i16 %res1
}
define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
-; CHECK-LABEL: test13:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
-; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
-; CHECK-NEXT: retq
+; KNL-LABEL: test13:
+; KNL: ## BB#0:
+; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1
+; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: retq
{
%cmpvector_i = fcmp oeq <16 x float> %a, %b
%conv = zext <16 x i1> %cmpvector_i to <16 x i32>
@@ -163,14 +186,14 @@ define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
}
define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
-; CHECK-LABEL: test14:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm1
-; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
-; CHECK-NEXT: knotw %k0, %k0
-; CHECK-NEXT: knotw %k0, %k1
-; CHECK-NEXT: vmovdqu32 %zmm1, %zmm0 {%k1} {z}
-; CHECK-NEXT: retq
+; KNL-LABEL: test14:
+; KNL: ## BB#0:
+; KNL-NEXT: vpsubd %zmm1, %zmm0, %zmm1
+; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
+; KNL-NEXT: knotw %k0, %k0
+; KNL-NEXT: knotw %k0, %k1
+; KNL-NEXT: vmovdqu32 %zmm1, %zmm0 {%k1} {z}
+; KNL-NEXT: retq
%sub_r = sub <16 x i32> %a, %b
%cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
%sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
@@ -180,14 +203,14 @@ define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
}
define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
-; CHECK-LABEL: test15:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm1
-; CHECK-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
-; CHECK-NEXT: knotw %k0, %k0
-; CHECK-NEXT: knotw %k0, %k1
-; CHECK-NEXT: vmovdqu64 %zmm1, %zmm0 {%k1} {z}
-; CHECK-NEXT: retq
+; KNL-LABEL: test15:
+; KNL: ## BB#0:
+; KNL-NEXT: vpsubq %zmm1, %zmm0, %zmm1
+; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
+; KNL-NEXT: knotw %k0, %k0
+; KNL-NEXT: knotw %k0, %k1
+; KNL-NEXT: vmovdqu64 %zmm1, %zmm0 {%k1} {z}
+; KNL-NEXT: retq
%sub_r = sub <8 x i64> %a, %b
%cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
%sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
@@ -197,64 +220,64 @@ define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
}
define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
-; CHECK-LABEL: test16:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k1
-; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test16:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpled %zmm0, %zmm1, %k1
+; KNL-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask = icmp sge <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
ret <16 x i32> %max
}
define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
-; CHECK-LABEL: test17:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
- %y = load <16 x i32>* %y.ptr, align 4
+; KNL-LABEL: test17:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
+ %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp sgt <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
ret <16 x i32> %max
}
define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
-; CHECK-LABEL: test18:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
- %y = load <16 x i32>* %y.ptr, align 4
+; KNL-LABEL: test18:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpled (%rdi), %zmm0, %k1
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
+ %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp sle <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
ret <16 x i32> %max
}
define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
-; CHECK-LABEL: test19:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
- %y = load <16 x i32>* %y.ptr, align 4
+; KNL-LABEL: test19:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpleud (%rdi), %zmm0, %k1
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
+ %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask = icmp ule <16 x i32> %x, %y
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
ret <16 x i32> %max
}
define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; CHECK-LABEL: test20:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
-; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test20:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
+; KNL-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask1 = icmp eq <16 x i32> %x1, %y1
%mask0 = icmp eq <16 x i32> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
@@ -263,13 +286,13 @@ define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i3
}
define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; CHECK-LABEL: test21:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k1
-; CHECK-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1}
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vmovaps %zmm2, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test21:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1
+; KNL-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1}
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
+; KNL-NEXT: vmovaps %zmm2, %zmm0
+; KNL-NEXT: retq
%mask1 = icmp sge <8 x i64> %x1, %y1
%mask0 = icmp sle <8 x i64> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
@@ -278,15 +301,15 @@ define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y
}
define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; CHECK-LABEL: test22:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpgtq %zmm2, %zmm1, %k1
-; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test22:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpgtq %zmm2, %zmm1, %k1
+; KNL-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask1 = icmp sgt <8 x i64> %x1, %y1
- %y = load <8 x i64>* %y.ptr, align 4
+ %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
%mask0 = icmp sgt <8 x i64> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
@@ -294,15 +317,15 @@ define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i6
}
define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; CHECK-LABEL: test23:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1
-; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1}
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test23:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpled %zmm1, %zmm2, %k1
+; KNL-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1}
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask1 = icmp sge <16 x i32> %x1, %y1
- %y = load <16 x i32>* %y.ptr, align 4
+ %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
%mask0 = icmp ule <16 x i32> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
%max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@@ -310,13 +333,13 @@ define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16
}
define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
-; CHECK-LABEL: test24:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
- %yb = load i64* %yb.ptr, align 4
+; KNL-LABEL: test24:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
+ %yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
%y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
%mask = icmp eq <8 x i64> %x, %y
@@ -325,13 +348,13 @@ define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
}
define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
-; CHECK-LABEL: test25:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
- %yb = load i32* %yb.ptr, align 4
+; KNL-LABEL: test25:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
+ %yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
%y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
%mask = icmp sle <16 x i32> %x, %y
@@ -340,15 +363,15 @@ define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind
}
define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
-; CHECK-LABEL: test26:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1
-; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
-; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test26:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpled %zmm1, %zmm2, %k1
+; KNL-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
+; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask1 = icmp sge <16 x i32> %x1, %y1
- %yb = load i32* %yb.ptr, align 4
+ %yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
%y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
%mask0 = icmp sgt <16 x i32> %x, %y
@@ -358,15 +381,15 @@ define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32
}
define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
-; CHECK-LABEL: test27:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpcmpleq %zmm1, %zmm2, %k1
-; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vmovaps %zmm1, %zmm0
-; CHECK-NEXT: retq
+; KNL-LABEL: test27:
+; KNL: ## BB#0:
+; KNL-NEXT: vpcmpleq %zmm1, %zmm2, %k1
+; KNL-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
+; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
+; KNL-NEXT: vmovaps %zmm1, %zmm0
+; KNL-NEXT: retq
%mask1 = icmp sge <8 x i64> %x1, %y1
- %yb = load i64* %yb.ptr, align 4
+ %yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
%y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
%mask0 = icmp sle <8 x i64> %x, %y
@@ -374,3 +397,201 @@ define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
ret <8 x i64> %max
}
+
+; KNL-LABEL: test28
+; KNL: vpcmpgtq
+; KNL: vpcmpgtq
+; KNL: kxorw
+define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) {
+ %x_gt_y = icmp sgt <8 x i64> %x, %y
+ %x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1
+ %res = icmp eq <8 x i1>%x_gt_y, %x1_gt_y1
+ %resse = sext <8 x i1>%res to <8 x i32>
+ ret <8 x i32> %resse
+}
+
+; KNL-LABEL: test29
+; KNL: vpcmpgtd
+; KNL: vpcmpgtd
+; KNL: kxnorw
+define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) {
+ %x_gt_y = icmp sgt <16 x i32> %x, %y
+ %x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1
+ %res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1
+ %resse = sext <16 x i1>%res to <16 x i8>
+ ret <16 x i8> %resse
+}
+
+define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
+; SKX-LABEL: test30:
+; SKX: vcmpeqpd %ymm1, %ymm0, %k1
+; SKX: vmovapd %ymm0, %ymm1 {%k1}
+
+ %mask = fcmp oeq <4 x double> %x, %y
+ %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y
+ ret <4 x double> %max
+}
+
+define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind {
+; SKX-LABEL: test31:
+; SKX: vcmpltpd (%rdi), %xmm0, %k1
+; SKX: vmovapd %xmm0, %xmm1 {%k1}
+
+ %y = load <2 x double>, <2 x double>* %yp, align 4
+ %mask = fcmp olt <2 x double> %x, %y
+ %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
+ ret <2 x double> %max
+}
+
+define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind {
+; SKX-LABEL: test32:
+; SKX: vcmpltpd (%rdi), %ymm0, %k1
+; SKX: vmovapd %ymm0, %ymm1 {%k1}
+
+ %y = load <4 x double>, <4 x double>* %yp, align 4
+ %mask = fcmp ogt <4 x double> %y, %x
+ %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
+ ret <4 x double> %max
+}
+
+define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind {
+; SKX-LABEL: test33:
+; SKX: vcmpltpd (%rdi), %zmm0, %k1
+; SKX: vmovapd %zmm0, %zmm1 {%k1}
+ %y = load <8 x double>, <8 x double>* %yp, align 4
+ %mask = fcmp olt <8 x double> %x, %y
+ %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
+ ret <8 x double> %max
+}
+
+define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind {
+; SKX-LABEL: test34:
+; SKX: vcmpltps (%rdi), %xmm0, %k1
+; SKX: vmovaps %xmm0, %xmm1 {%k1}
+ %y = load <4 x float>, <4 x float>* %yp, align 4
+ %mask = fcmp olt <4 x float> %x, %y
+ %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
+ ret <4 x float> %max
+}
+
+define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind {
+; SKX-LABEL: test35:
+; SKX: vcmpltps (%rdi), %ymm0, %k1
+; SKX: vmovaps %ymm0, %ymm1 {%k1}
+
+ %y = load <8 x float>, <8 x float>* %yp, align 4
+ %mask = fcmp ogt <8 x float> %y, %x
+ %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
+ ret <8 x float> %max
+}
+
+define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind {
+; SKX-LABEL: test36:
+; SKX: vcmpltps (%rdi), %zmm0, %k1
+; SKX: vmovaps %zmm0, %zmm1 {%k1}
+ %y = load <16 x float>, <16 x float>* %yp, align 4
+ %mask = fcmp olt <16 x float> %x, %y
+ %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
+ ret <16 x float> %max
+}
+
+define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind {
+; SKX-LABEL: test37:
+; SKX: vcmpltpd (%rdi){1to8}, %zmm0, %k1
+; SKX: vmovapd %zmm0, %zmm1 {%k1}
+
+ %a = load double, double* %ptr
+ %v = insertelement <8 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
+
+ %mask = fcmp ogt <8 x double> %shuffle, %x
+ %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
+ ret <8 x double> %max
+}
+
+define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind {
+; SKX-LABEL: test38:
+; SKX: vcmpltpd (%rdi){1to4}, %ymm0, %k1
+; SKX: vmovapd %ymm0, %ymm1 {%k1}
+
+ %a = load double, double* %ptr
+ %v = insertelement <4 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer
+
+ %mask = fcmp ogt <4 x double> %shuffle, %x
+ %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1
+ ret <4 x double> %max
+}
+
+define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind {
+; SKX-LABEL: test39:
+; SKX: vcmpltpd (%rdi){1to2}, %xmm0, %k1
+; SKX: vmovapd %xmm0, %xmm1 {%k1}
+
+ %a = load double, double* %ptr
+ %v = insertelement <2 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+
+ %mask = fcmp ogt <2 x double> %shuffle, %x
+ %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1
+ ret <2 x double> %max
+}
+
+
+define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) nounwind {
+; SKX-LABEL: test40:
+; SKX: vcmpltps (%rdi){1to16}, %zmm0, %k1
+; SKX: vmovaps %zmm0, %zmm1 {%k1}
+
+ %a = load float, float* %ptr
+ %v = insertelement <16 x float> undef, float %a, i32 0
+ %shuffle = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+
+ %mask = fcmp ogt <16 x float> %shuffle, %x
+ %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1
+ ret <16 x float> %max
+}
+
+define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) nounwind {
+; SKX-LABEL: test41:
+; SKX: vcmpltps (%rdi){1to8}, %ymm0, %k1
+; SKX: vmovaps %ymm0, %ymm1 {%k1}
+
+ %a = load float, float* %ptr
+ %v = insertelement <8 x float> undef, float %a, i32 0
+ %shuffle = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+
+ %mask = fcmp ogt <8 x float> %shuffle, %x
+ %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1
+ ret <8 x float> %max
+}
+
+define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) nounwind {
+; SKX-LABEL: test42:
+; SKX: vcmpltps (%rdi){1to4}, %xmm0, %k1
+; SKX: vmovaps %xmm0, %xmm1 {%k1}
+
+ %a = load float, float* %ptr
+ %v = insertelement <4 x float> undef, float %a, i32 0
+ %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+
+ %mask = fcmp ogt <4 x float> %shuffle, %x
+ %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1
+ ret <4 x float> %max
+}
+
+define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind {
+; SKX-LABEL: test43:
+; SKX: vpmovw2m %xmm2, %k1
+; SKX: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
+; SKX: vmovapd %zmm0, %zmm1 {%k1}
+
+ %a = load double, double* %ptr
+ %v = insertelement <8 x double> undef, double %a, i32 0
+ %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer
+
+ %mask_cmp = fcmp ogt <8 x double> %shuffle, %x
+ %mask = and <8 x i1> %mask_cmp, %mask_in
+ %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1
+ ret <8 x double> %max
+}
diff --git a/test/CodeGen/X86/avx512bw-arith.ll b/test/CodeGen/X86/avx512bw-arith.ll
index 94f68a2ddc28..52ebf27b6b72 100644
--- a/test/CodeGen/X86/avx512bw-arith.ll
+++ b/test/CodeGen/X86/avx512bw-arith.ll
@@ -12,7 +12,7 @@ define <64 x i8> @vpaddb512_test(<64 x i8> %i, <64 x i8> %j) nounwind readnone {
; CHECK: vpaddb (%rdi), %zmm{{.*}}
; CHECK: ret
define <64 x i8> @vpaddb512_fold_test(<64 x i8> %i, <64 x i8>* %j) nounwind {
- %tmp = load <64 x i8>* %j, align 4
+ %tmp = load <64 x i8>, <64 x i8>* %j, align 4
%x = add <64 x i8> %i, %tmp
ret <64 x i8> %x
}
@@ -29,7 +29,7 @@ define <32 x i16> @vpaddw512_test(<32 x i16> %i, <32 x i16> %j) nounwind readnon
; CHECK: vpaddw (%rdi), %zmm{{.*}}
; CHECK: ret
define <32 x i16> @vpaddw512_fold_test(<32 x i16> %i, <32 x i16>* %j) nounwind {
- %tmp = load <32 x i16>* %j, align 4
+ %tmp = load <32 x i16>, <32 x i16>* %j, align 4
%x = add <32 x i16> %i, %tmp
ret <32 x i16> %x
}
@@ -59,7 +59,7 @@ define <32 x i16> @vpaddw512_maskz_test(<32 x i16> %i, <32 x i16> %j, <32 x i16>
; CHECK: ret
define <32 x i16> @vpaddw512_mask_fold_test(<32 x i16> %i, <32 x i16>* %j.ptr, <32 x i16> %mask1) nounwind readnone {
%mask = icmp ne <32 x i16> %mask1, zeroinitializer
- %j = load <32 x i16>* %j.ptr
+ %j = load <32 x i16>, <32 x i16>* %j.ptr
%x = add <32 x i16> %i, %j
%r = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %i
ret <32 x i16> %r
@@ -70,7 +70,7 @@ define <32 x i16> @vpaddw512_mask_fold_test(<32 x i16> %i, <32 x i16>* %j.ptr, <
; CHECK: ret
define <32 x i16> @vpaddw512_maskz_fold_test(<32 x i16> %i, <32 x i16>* %j.ptr, <32 x i16> %mask1) nounwind readnone {
%mask = icmp ne <32 x i16> %mask1, zeroinitializer
- %j = load <32 x i16>* %j.ptr
+ %j = load <32 x i16>, <32 x i16>* %j.ptr
%x = add <32 x i16> %i, %j
%r = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
ret <32 x i16> %r
diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll
index c807d222ce53..0db2941cac6f 100644
--- a/test/CodeGen/X86/avx512bw-intrinsics.ll
+++ b/test/CodeGen/X86/avx512bw-intrinsics.ll
@@ -351,3 +351,440 @@ define <8 x i16> @test_x86_mask_blend_w_128(i8 %mask, <8 x i16> %a1, <8 x i16> %
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16>, <8 x i16>, i8) nounwind readonly
+
+define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rr_512
+ ;CHECK: vpackssdw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rrk_512
+ ;CHECK: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rrkz_512
+ ;CHECK: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rm_512
+ ;CHECK: vpackssdw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rmk_512
+ ;CHECK: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rmkz_512
+ ;CHECK: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07]
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rmb_512
+ ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rmbk_512
+ ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rmbkz_512
+ ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
+
+define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
+ ;CHECK-LABEL: test_mask_packs_epi16_rr_512
+ ;CHECK: vpacksswb %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0xc1]
+ %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi16_rrk_512
+ ;CHECK: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0xd1]
+ %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi16_rrkz_512
+ ;CHECK: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0xc1]
+ %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_packs_epi16_rm_512
+ ;CHECK: vpacksswb (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0x07]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi16_rmk_512
+ ;CHECK: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0x0f]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi16_rmkz_512
+ ;CHECK: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0x07]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
+ ret <64 x i8> %res
+}
+
+declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
+
+
+define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rr_512
+ ;CHECK: vpackusdw %zmm1, %zmm0, %zmm0
+ %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rrk_512
+ ;CHECK: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rrkz_512
+ ;CHECK: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rm_512
+ ;CHECK: vpackusdw (%rdi), %zmm0, %zmm0
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rmk_512
+ ;CHECK: vpackusdw (%rdi), %zmm0, %zmm1 {%k1}
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rmkz_512
+ ;CHECK: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z}
+ %b = load <16 x i32>, <16 x i32>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rmb_512
+ ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm0
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rmbk_512
+ ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rmbkz_512
+ ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
+
+define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
+ ;CHECK-LABEL: test_mask_packus_epi16_rr_512
+ ;CHECK: vpackuswb %zmm1, %zmm0, %zmm0
+ %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi16_rrk_512
+ ;CHECK: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi16_rrkz_512
+ ;CHECK: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_packus_epi16_rm_512
+ ;CHECK: vpackuswb (%rdi), %zmm0, %zmm0
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi16_rmk_512
+ ;CHECK: vpackuswb (%rdi), %zmm0, %zmm1 {%k1}
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
+ ret <64 x i8> %res
+}
+
+define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi16_rmkz_512
+ ;CHECK: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z}
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
+ ret <64 x i8> %res
+}
+
+declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
+
+define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
+ ;CHECK-LABEL: test_mask_adds_epi16_rr_512
+ ;CHECK: vpaddsw %zmm1, %zmm0, %zmm0
+ %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epi16_rrk_512
+ ;CHECK: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epi16_rrkz_512
+ ;CHECK: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_adds_epi16_rm_512
+ ;CHECK: vpaddsw (%rdi), %zmm0, %zmm0
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epi16_rmk_512
+ ;CHECK: vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epi16_rmkz_512
+ ;CHECK: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z}
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
+ ;CHECK-LABEL: test_mask_subs_epi16_rr_512
+ ;CHECK: vpsubsw %zmm1, %zmm0, %zmm0
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epi16_rrk_512
+ ;CHECK: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epi16_rrkz_512
+ ;CHECK: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_subs_epi16_rm_512
+ ;CHECK: vpsubsw (%rdi), %zmm0, %zmm0
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epi16_rmk_512
+ ;CHECK: vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epi16_rmkz_512
+ ;CHECK: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z}
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
+ ;CHECK-LABEL: test_mask_adds_epu16_rr_512
+ ;CHECK: vpaddusw %zmm1, %zmm0, %zmm0
+ %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epu16_rrk_512
+ ;CHECK: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epu16_rrkz_512
+ ;CHECK: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_adds_epu16_rm_512
+ ;CHECK: vpaddusw (%rdi), %zmm0, %zmm0
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epu16_rmk_512
+ ;CHECK: vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epu16_rmkz_512
+ ;CHECK: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z}
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
+ ;CHECK-LABEL: test_mask_subs_epu16_rr_512
+ ;CHECK: vpsubusw %zmm1, %zmm0, %zmm0
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epu16_rrk_512
+ ;CHECK: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epu16_rrkz_512
+ ;CHECK: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_subs_epu16_rm_512
+ ;CHECK: vpsubusw (%rdi), %zmm0, %zmm0
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epu16_rmk_512
+ ;CHECK: vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epu16_rmkz_512
+ ;CHECK: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z}
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
diff --git a/test/CodeGen/X86/avx512bw-mask-op.ll b/test/CodeGen/X86/avx512bw-mask-op.ll
index 9d7630c5d0ad..0208011cf89d 100644
--- a/test/CodeGen/X86/avx512bw-mask-op.ll
+++ b/test/CodeGen/X86/avx512bw-mask-op.ll
@@ -35,7 +35,7 @@ define i64 @mask64(i64 %x) {
}
define void @mask32_mem(i32* %ptr) {
- %x = load i32* %ptr, align 4
+ %x = load i32, i32* %ptr, align 4
%m0 = bitcast i32 %x to <32 x i1>
%m1 = xor <32 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
@@ -52,7 +52,7 @@ define void @mask32_mem(i32* %ptr) {
}
define void @mask64_mem(i64* %ptr) {
- %x = load i64* %ptr, align 4
+ %x = load i64, i64* %ptr, align 4
%m0 = bitcast i64 %x to <64 x i1>
%m1 = xor <64 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
diff --git a/test/CodeGen/X86/avx512bw-mov.ll b/test/CodeGen/X86/avx512bw-mov.ll
index 2ff6d280ab8f..519b649ff53a 100644
--- a/test/CodeGen/X86/avx512bw-mov.ll
+++ b/test/CodeGen/X86/avx512bw-mov.ll
@@ -5,7 +5,7 @@
; CHECK: ret
define <64 x i8> @test1(i8 * %addr) {
%vaddr = bitcast i8* %addr to <64 x i8>*
- %res = load <64 x i8>* %vaddr, align 1
+ %res = load <64 x i8>, <64 x i8>* %vaddr, align 1
ret <64 x i8>%res
}
@@ -24,7 +24,7 @@ define void @test2(i8 * %addr, <64 x i8> %data) {
define <64 x i8> @test3(i8 * %addr, <64 x i8> %old, <64 x i8> %mask1) {
%mask = icmp ne <64 x i8> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <64 x i8>*
- %r = load <64 x i8>* %vaddr, align 1
+ %r = load <64 x i8>, <64 x i8>* %vaddr, align 1
%res = select <64 x i1> %mask, <64 x i8> %r, <64 x i8> %old
ret <64 x i8>%res
}
@@ -35,7 +35,7 @@ define <64 x i8> @test3(i8 * %addr, <64 x i8> %old, <64 x i8> %mask1) {
define <64 x i8> @test4(i8 * %addr, <64 x i8> %mask1) {
%mask = icmp ne <64 x i8> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <64 x i8>*
- %r = load <64 x i8>* %vaddr, align 1
+ %r = load <64 x i8>, <64 x i8>* %vaddr, align 1
%res = select <64 x i1> %mask, <64 x i8> %r, <64 x i8> zeroinitializer
ret <64 x i8>%res
}
@@ -45,7 +45,7 @@ define <64 x i8> @test4(i8 * %addr, <64 x i8> %mask1) {
; CHECK: ret
define <32 x i16> @test5(i8 * %addr) {
%vaddr = bitcast i8* %addr to <32 x i16>*
- %res = load <32 x i16>* %vaddr, align 1
+ %res = load <32 x i16>, <32 x i16>* %vaddr, align 1
ret <32 x i16>%res
}
@@ -64,7 +64,7 @@ define void @test6(i8 * %addr, <32 x i16> %data) {
define <32 x i16> @test7(i8 * %addr, <32 x i16> %old, <32 x i16> %mask1) {
%mask = icmp ne <32 x i16> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <32 x i16>*
- %r = load <32 x i16>* %vaddr, align 1
+ %r = load <32 x i16>, <32 x i16>* %vaddr, align 1
%res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> %old
ret <32 x i16>%res
}
@@ -75,7 +75,7 @@ define <32 x i16> @test7(i8 * %addr, <32 x i16> %old, <32 x i16> %mask1) {
define <32 x i16> @test8(i8 * %addr, <32 x i16> %mask1) {
%mask = icmp ne <32 x i16> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <32 x i16>*
- %r = load <32 x i16>* %vaddr, align 1
+ %r = load <32 x i16>, <32 x i16>* %vaddr, align 1
%res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> zeroinitializer
ret <32 x i16>%res
}
diff --git a/test/CodeGen/X86/avx512bw-vec-cmp.ll b/test/CodeGen/X86/avx512bw-vec-cmp.ll
index 6ba4db68662e..141f5cc09219 100644
--- a/test/CodeGen/X86/avx512bw-vec-cmp.ll
+++ b/test/CodeGen/X86/avx512bw-vec-cmp.ll
@@ -45,7 +45,7 @@ define <64 x i8> @test4(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1) nounwind {
; CHECK: vmovdqu16
; CHECK: ret
define <32 x i16> @test5(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %yp) nounwind {
- %y = load <32 x i16>* %yp, align 4
+ %y = load <32 x i16>, <32 x i16>* %yp, align 4
%mask = icmp eq <32 x i16> %x, %y
%max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
ret <32 x i16> %max
@@ -56,7 +56,7 @@ define <32 x i16> @test5(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %yp) nounwin
; CHECK: vmovdqu16
; CHECK: ret
define <32 x i16> @test6(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind {
- %y = load <32 x i16>* %y.ptr, align 4
+ %y = load <32 x i16>, <32 x i16>* %y.ptr, align 4
%mask = icmp sgt <32 x i16> %x, %y
%max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
ret <32 x i16> %max
@@ -67,7 +67,7 @@ define <32 x i16> @test6(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) noun
; CHECK: vmovdqu16
; CHECK: ret
define <32 x i16> @test7(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind {
- %y = load <32 x i16>* %y.ptr, align 4
+ %y = load <32 x i16>, <32 x i16>* %y.ptr, align 4
%mask = icmp sle <32 x i16> %x, %y
%max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
ret <32 x i16> %max
@@ -78,7 +78,7 @@ define <32 x i16> @test7(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) noun
; CHECK: vmovdqu16
; CHECK: ret
define <32 x i16> @test8(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind {
- %y = load <32 x i16>* %y.ptr, align 4
+ %y = load <32 x i16>, <32 x i16>* %y.ptr, align 4
%mask = icmp ule <32 x i16> %x, %y
%max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
ret <32 x i16> %max
@@ -114,7 +114,7 @@ define <64 x i8> @test10(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1, <64 x i8> %y
; CHECK: ret
define <64 x i8> @test11(<64 x i8> %x, <64 x i8>* %y.ptr, <64 x i8> %x1, <64 x i8> %y1) nounwind {
%mask1 = icmp sgt <64 x i8> %x1, %y1
- %y = load <64 x i8>* %y.ptr, align 4
+ %y = load <64 x i8>, <64 x i8>* %y.ptr, align 4
%mask0 = icmp sgt <64 x i8> %x, %y
%mask = select <64 x i1> %mask0, <64 x i1> %mask1, <64 x i1> zeroinitializer
%max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %x1
@@ -127,7 +127,7 @@ define <64 x i8> @test11(<64 x i8> %x, <64 x i8>* %y.ptr, <64 x i8> %x1, <64 x i
; CHECK: ret
define <32 x i16> @test12(<32 x i16> %x, <32 x i16>* %y.ptr, <32 x i16> %x1, <32 x i16> %y1) nounwind {
%mask1 = icmp sge <32 x i16> %x1, %y1
- %y = load <32 x i16>* %y.ptr, align 4
+ %y = load <32 x i16>, <32 x i16>* %y.ptr, align 4
%mask0 = icmp ule <32 x i16> %x, %y
%mask = select <32 x i1> %mask0, <32 x i1> %mask1, <32 x i1> zeroinitializer
%max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
diff --git a/test/CodeGen/X86/avx512bwvl-arith.ll b/test/CodeGen/X86/avx512bwvl-arith.ll
index 96f01409f5be..c0650e176101 100644
--- a/test/CodeGen/X86/avx512bwvl-arith.ll
+++ b/test/CodeGen/X86/avx512bwvl-arith.ll
@@ -14,7 +14,7 @@ define <32 x i8> @vpaddb256_test(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
; CHECK: vpaddb (%rdi), %ymm{{.*}}
; CHECK: ret
define <32 x i8> @vpaddb256_fold_test(<32 x i8> %i, <32 x i8>* %j) nounwind {
- %tmp = load <32 x i8>* %j, align 4
+ %tmp = load <32 x i8>, <32 x i8>* %j, align 4
%x = add <32 x i8> %i, %tmp
ret <32 x i8> %x
}
@@ -31,7 +31,7 @@ define <16 x i16> @vpaddw256_test(<16 x i16> %i, <16 x i16> %j) nounwind readnon
; CHECK: vpaddw (%rdi), %ymm{{.*}}
; CHECK: ret
define <16 x i16> @vpaddw256_fold_test(<16 x i16> %i, <16 x i16>* %j) nounwind {
- %tmp = load <16 x i16>* %j, align 4
+ %tmp = load <16 x i16>, <16 x i16>* %j, align 4
%x = add <16 x i16> %i, %tmp
ret <16 x i16> %x
}
@@ -61,7 +61,7 @@ define <16 x i16> @vpaddw256_maskz_test(<16 x i16> %i, <16 x i16> %j, <16 x i16>
; CHECK: ret
define <16 x i16> @vpaddw256_mask_fold_test(<16 x i16> %i, <16 x i16>* %j.ptr, <16 x i16> %mask1) nounwind readnone {
%mask = icmp ne <16 x i16> %mask1, zeroinitializer
- %j = load <16 x i16>* %j.ptr
+ %j = load <16 x i16>, <16 x i16>* %j.ptr
%x = add <16 x i16> %i, %j
%r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %i
ret <16 x i16> %r
@@ -72,7 +72,7 @@ define <16 x i16> @vpaddw256_mask_fold_test(<16 x i16> %i, <16 x i16>* %j.ptr, <
; CHECK: ret
define <16 x i16> @vpaddw256_maskz_fold_test(<16 x i16> %i, <16 x i16>* %j.ptr, <16 x i16> %mask1) nounwind readnone {
%mask = icmp ne <16 x i16> %mask1, zeroinitializer
- %j = load <16 x i16>* %j.ptr
+ %j = load <16 x i16>, <16 x i16>* %j.ptr
%x = add <16 x i16> %i, %j
%r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
ret <16 x i16> %r
@@ -116,7 +116,7 @@ define <16 x i8> @vpaddb128_test(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
; CHECK: vpaddb (%rdi), %xmm{{.*}}
; CHECK: ret
define <16 x i8> @vpaddb128_fold_test(<16 x i8> %i, <16 x i8>* %j) nounwind {
- %tmp = load <16 x i8>* %j, align 4
+ %tmp = load <16 x i8>, <16 x i8>* %j, align 4
%x = add <16 x i8> %i, %tmp
ret <16 x i8> %x
}
@@ -133,7 +133,7 @@ define <8 x i16> @vpaddw128_test(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
; CHECK: vpaddw (%rdi), %xmm{{.*}}
; CHECK: ret
define <8 x i16> @vpaddw128_fold_test(<8 x i16> %i, <8 x i16>* %j) nounwind {
- %tmp = load <8 x i16>* %j, align 4
+ %tmp = load <8 x i16>, <8 x i16>* %j, align 4
%x = add <8 x i16> %i, %tmp
ret <8 x i16> %x
}
@@ -163,7 +163,7 @@ define <8 x i16> @vpaddw128_maskz_test(<8 x i16> %i, <8 x i16> %j, <8 x i16> %ma
; CHECK: ret
define <8 x i16> @vpaddw128_mask_fold_test(<8 x i16> %i, <8 x i16>* %j.ptr, <8 x i16> %mask1) nounwind readnone {
%mask = icmp ne <8 x i16> %mask1, zeroinitializer
- %j = load <8 x i16>* %j.ptr
+ %j = load <8 x i16>, <8 x i16>* %j.ptr
%x = add <8 x i16> %i, %j
%r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %i
ret <8 x i16> %r
@@ -174,7 +174,7 @@ define <8 x i16> @vpaddw128_mask_fold_test(<8 x i16> %i, <8 x i16>* %j.ptr, <8 x
; CHECK: ret
define <8 x i16> @vpaddw128_maskz_fold_test(<8 x i16> %i, <8 x i16>* %j.ptr, <8 x i16> %mask1) nounwind readnone {
%mask = icmp ne <8 x i16> %mask1, zeroinitializer
- %j = load <8 x i16>* %j.ptr
+ %j = load <8 x i16>, <8 x i16>* %j.ptr
%x = add <8 x i16> %i, %j
%r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
ret <8 x i16> %r
diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll
index 678f252dea42..f0efb2c947e9 100644
--- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll
+++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll
@@ -830,7 +830,7 @@ define <2 x double> @test_mask_vfmsubadd128_pd(<2 x double> %a0, <2 x double> %a
define <2 x double> @test_mask_vfmsubadd128rm_pd(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmsubadd128rm_pd
; CHECK: vfmsubadd213pd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa7,0x07]
- %a2 = load <2 x double>* %ptr_a2
+ %a2 = load <2 x double>, <2 x double>* %ptr_a2
%res = call <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
ret <2 x double> %res
}
@@ -838,7 +838,7 @@ declare <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double>, <8 x doub
define <8 x double> @test_mask_vfmsubaddrm_pd(<8 x double> %a0, <8 x double> %a1, <8 x double>* %ptr_a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmsubaddrm_pd
; CHECK: vfmsubadd213pd (%rdi), %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa7,0x07]
- %a2 = load <8 x double>* %ptr_a2, align 8
+ %a2 = load <8 x double>, <8 x double>* %ptr_a2, align 8
%res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
ret <8 x double> %res
}
@@ -860,7 +860,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rz(<4 x float> %a0, <4 x float> %a1,
define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd128_ps_rmk
; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
- %a2 = load <4 x float>* %ptr_a2
+ %a2 = load <4 x float>, <4 x float>* %ptr_a2
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
ret <4 x float> %res
}
@@ -868,7 +868,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1,
define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd128_ps_rmka
; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
- %a2 = load <4 x float>* %ptr_a2, align 8
+ %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 8
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
ret <4 x float> %res
}
@@ -876,7 +876,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1
define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
; CHECK-LABEL: test_mask_vfmadd128_ps_rmkz
; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07]
- %a2 = load <4 x float>* %ptr_a2
+ %a2 = load <4 x float>, <4 x float>* %ptr_a2
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
ret <4 x float> %res
}
@@ -884,7 +884,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1
define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
; CHECK-LABEL: test_mask_vfmadd128_ps_rmkza
; CHECK: vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07]
- %a2 = load <4 x float>* %ptr_a2, align 4
+ %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4
%res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
ret <4 x float> %res
}
@@ -892,7 +892,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a
define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd128_ps_rmb
; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
- %q = load float* %ptr_a2
+ %q = load float, float* %ptr_a2
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
@@ -904,7 +904,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1,
define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd128_ps_rmba
; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
- %q = load float* %ptr_a2, align 4
+ %q = load float, float* %ptr_a2, align 4
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
@@ -916,7 +916,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1
define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
; CHECK-LABEL: test_mask_vfmadd128_ps_rmbz
; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
- %q = load float* %ptr_a2
+ %q = load float, float* %ptr_a2
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
@@ -928,7 +928,7 @@ define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1
define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
; CHECK-LABEL: test_mask_vfmadd128_ps_rmbza
; CHECK: vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
- %q = load float* %ptr_a2, align 4
+ %q = load float, float* %ptr_a2, align 4
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
%vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
@@ -954,7 +954,7 @@ define <2 x double> @test_mask_vfmadd128_pd_rz(<2 x double> %a0, <2 x double> %a
define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd128_pd_rmk
; CHECK: vfmadd213pd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07]
- %a2 = load <2 x double>* %ptr_a2
+ %a2 = load <2 x double>, <2 x double>* %ptr_a2
%res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
ret <2 x double> %res
}
@@ -962,7 +962,7 @@ define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %
define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2) {
; CHECK-LABEL: test_mask_vfmadd128_pd_rmkz
; CHECK: vfmadd213pd (%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0x07]
- %a2 = load <2 x double>* %ptr_a2
+ %a2 = load <2 x double>, <2 x double>* %ptr_a2
%res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
ret <2 x double> %res
}
@@ -984,7 +984,7 @@ define <4 x double> @test_mask_vfmadd256_pd_rz(<4 x double> %a0, <4 x double> %a
define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2, i8 %mask) {
; CHECK-LABEL: test_mask_vfmadd256_pd_rmk
; CHECK: vfmadd213pd (%rdi), %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07]
- %a2 = load <4 x double>* %ptr_a2
+ %a2 = load <4 x double>, <4 x double>* %ptr_a2
%res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
ret <4 x double> %res
}
@@ -992,7 +992,1679 @@ define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %
define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2) {
; CHECK-LABEL: test_mask_vfmadd256_pd_rmkz
; CHECK: vfmadd213pd (%rdi), %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0x07]
- %a2 = load <4 x double>* %ptr_a2
+ %a2 = load <4 x double>, <4 x double>* %ptr_a2
%res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
ret <4 x double> %res
}
+define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
+ ;CHECK-LABEL: test_mask_add_epi16_rr_128
+ ;CHECK: vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_add_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rrk_128
+ ;CHECK: vpaddw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_add_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rrkz_128
+ ;CHECK: vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_add_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_add_epi16_rm_128
+ ;CHECK: vpaddw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0x07]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_add_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rmk_128
+ ;CHECK: vpaddw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_add_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rmkz_128
+ ;CHECK: vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x07]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <16 x i16> @test_mask_add_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
+ ;CHECK-LABEL: test_mask_add_epi16_rr_256
+ ;CHECK: vpaddw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_add_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rrk_256
+ ;CHECK: vpaddw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_add_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rrkz_256
+ ;CHECK: vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_add_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_add_epi16_rm_256
+ ;CHECK: vpaddw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0x07]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_add_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rmk_256
+ ;CHECK: vpaddw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_add_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rmkz_256
+ ;CHECK: vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x07]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+define <8 x i16> @test_mask_sub_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rr_128
+ ;CHECK: vpsubw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0xc1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_sub_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rrk_128
+ ;CHECK: vpsubw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_sub_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rrkz_128
+ ;CHECK: vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_sub_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rm_128
+ ;CHECK: vpsubw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0x07]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_sub_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rmk_128
+ ;CHECK: vpsubw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_sub_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rmkz_128
+ ;CHECK: vpsubw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x07]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <16 x i16> @test_mask_sub_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rr_256
+ ;CHECK: vpsubw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0xc1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_sub_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rrk_256
+ ;CHECK: vpsubw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_sub_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rrkz_256
+ ;CHECK: vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_sub_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rm_256
+ ;CHECK: vpsubw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0x07]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_sub_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rmk_256
+ ;CHECK: vpsubw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_sub_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rmkz_256
+ ;CHECK: vpsubw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x07]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+define <32 x i16> @test_mask_add_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
+ ;CHECK-LABEL: test_mask_add_epi16_rr_512
+ ;CHECK: vpaddw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_add_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rrk_512
+ ;CHECK: vpaddw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_add_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rrkz_512
+ ;CHECK: vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_add_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_add_epi16_rm_512
+ ;CHECK: vpaddw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x07]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_add_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rmk_512
+ ;CHECK: vpaddw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x0f]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_add_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi16_rmkz_512
+ ;CHECK: vpaddw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x07]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+define <32 x i16> @test_mask_sub_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rr_512
+ ;CHECK: vpsubw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0xc1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_sub_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rrk_512
+ ;CHECK: vpsubw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_sub_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rrkz_512
+ ;CHECK: vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_sub_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rm_512
+ ;CHECK: vpsubw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x07]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_sub_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rmk_512
+ ;CHECK: vpsubw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x0f]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_sub_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi16_rmkz_512
+ ;CHECK: vpsubw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x07]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+define <32 x i16> @test_mask_mullo_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rr_512
+ ;CHECK: vpmullw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0xc1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_mullo_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rrk_512
+ ;CHECK: vpmullw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_mullo_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rrkz_512
+ ;CHECK: vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1]
+ %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_mullo_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rm_512
+ ;CHECK: vpmullw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x07]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_mullo_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rmk_512
+ ;CHECK: vpmullw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x0f]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
+ ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_mullo_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rmkz_512
+ ;CHECK: vpmullw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x07]
+ %b = load <32 x i16>, <32 x i16>* %ptr_b
+ %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
+ ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+define <8 x i16> @test_mask_mullo_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rr_128
+ ;CHECK: vpmullw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0xc1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_mullo_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rrk_128
+ ;CHECK: vpmullw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_mullo_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rrkz_128
+ ;CHECK: vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_mullo_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rm_128
+ ;CHECK: vpmullw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0x07]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_mullo_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rmk_128
+ ;CHECK: vpmullw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_mullo_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rmkz_128
+ ;CHECK: vpmullw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x07]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <16 x i16> @test_mask_mullo_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rr_256
+ ;CHECK: vpmullw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0xc1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_mullo_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rrk_256
+ ;CHECK: vpmullw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_mullo_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rrkz_256
+ ;CHECK: vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_mullo_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rm_256
+ ;CHECK: vpmullw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0x07]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_mullo_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rmk_256
+ ;CHECK: vpmullw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi16_rmkz_256
+ ;CHECK: vpmullw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x07]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+
+define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rr_128
+ ;CHECK: vpackssdw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0xc1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rrk_128
+ ;CHECK: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rrkz_128
+ ;CHECK: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1]
+ %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rm_128
+ ;CHECK: vpackssdw (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x6b,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rmk_128
+ ;CHECK: vpackssdw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rmkz_128
+ ;CHECK: vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rmb_128
+ ;CHECK: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rmbk_128
+ ;CHECK: vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rmbkz_128
+ ;CHECK: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8)
+
+define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rr_256
+ ;CHECK: vpackssdw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0xc1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rrk_256
+ ;CHECK: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rrkz_256
+ ;CHECK: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1]
+ %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rm_256
+ ;CHECK: vpackssdw (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rmk_256
+ ;CHECK: vpackssdw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rmkz_256
+ ;CHECK: vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rmb_256
+ ;CHECK: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rmbk_256
+ ;CHECK: vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi32_rmbkz_256
+ ;CHECK: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16)
+
+define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
+ ;CHECK-LABEL: test_mask_packs_epi16_rr_128
+ ;CHECK: vpacksswb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc1]
+ %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi16_rrk_128
+ ;CHECK: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x63,0xd1]
+ %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi16_rrkz_128
+ ;CHECK: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x63,0xc1]
+ %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_packs_epi16_rm_128
+ ;CHECK: vpacksswb (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0x07]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi16_rmk_128
+ ;CHECK: vpacksswb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x63,0x0f]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi16_rmkz_128
+ ;CHECK: vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x63,0x07]
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
+ ret <16 x i8> %res
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16)
+
+define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
+ ;CHECK-LABEL: test_mask_packs_epi16_rr_256
+ ;CHECK: vpacksswb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0xc1]
+ %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi16_rrk_256
+ ;CHECK: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x63,0xd1]
+ %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi16_rrkz_256
+ ;CHECK: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x63,0xc1]
+ %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_packs_epi16_rm_256
+ ;CHECK: vpacksswb (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0x07]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi16_rmk_256
+ ;CHECK: vpacksswb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x63,0x0f]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_packs_epi16_rmkz_256
+ ;CHECK: vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x63,0x07]
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
+ ret <32 x i8> %res
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32)
+
+
+define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rr_128
+ ;CHECK: vpackusdw %xmm1, %xmm0, %xmm0
+ %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rrk_128
+ ;CHECK: vpackusdw %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rrkz_128
+ ;CHECK: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rm_128
+ ;CHECK: vpackusdw (%rdi), %xmm0, %xmm0
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rmk_128
+ ;CHECK: vpackusdw (%rdi), %xmm0, %xmm1 {%k1}
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rmkz_128
+ ;CHECK: vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z}
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rmb_128
+ ;CHECK: vpackusdw (%rdi){1to4}, %xmm0, %xmm0
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rmbk_128
+ ;CHECK: vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1}
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rmbkz_128
+ ;CHECK: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z}
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8)
+
+define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rr_256
+ ;CHECK: vpackusdw %ymm1, %ymm0, %ymm0
+ %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rrk_256
+ ;CHECK: vpackusdw %ymm1, %ymm0, %ymm2 {%k1}
+ %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rrkz_256
+ ;CHECK: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z}
+ %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rm_256
+ ;CHECK: vpackusdw (%rdi), %ymm0, %ymm0
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rmk_256
+ ;CHECK: vpackusdw (%rdi), %ymm0, %ymm1 {%k1}
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rmkz_256
+ ;CHECK: vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z}
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rmb_256
+ ;CHECK: vpackusdw (%rdi){1to8}, %ymm0, %ymm0
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rmbk_256
+ ;CHECK: vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1}
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi32_rmbkz_256
+ ;CHECK: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z}
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16)
+
+define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
+ ;CHECK-LABEL: test_mask_packus_epi16_rr_128
+ ;CHECK: vpackuswb %xmm1, %xmm0, %xmm0
+ %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi16_rrk_128
+ ;CHECK: vpackuswb %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi16_rrkz_128
+ ;CHECK: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_packus_epi16_rm_128
+ ;CHECK: vpackuswb (%rdi), %xmm0, %xmm0
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi16_rmk_128
+ ;CHECK: vpackuswb (%rdi), %xmm0, %xmm1 {%k1}
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi16_rmkz_128
+ ;CHECK: vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z}
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
+ ret <16 x i8> %res
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16)
+
+define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
+ ;CHECK-LABEL: test_mask_packus_epi16_rr_256
+ ;CHECK: vpackuswb %ymm1, %ymm0, %ymm0
+ %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi16_rrk_256
+ ;CHECK: vpackuswb %ymm1, %ymm0, %ymm2 {%k1}
+ %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi16_rrkz_256
+ ;CHECK: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z}
+ %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_packus_epi16_rm_256
+ ;CHECK: vpackuswb (%rdi), %ymm0, %ymm0
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi16_rmk_256
+ ;CHECK: vpackuswb (%rdi), %ymm0, %ymm1 {%k1}
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_packus_epi16_rmkz_256
+ ;CHECK: vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z}
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
+ ret <32 x i8> %res
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32)
+
+define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
+ ;CHECK-LABEL: test_mask_adds_epi16_rr_128
+ ;CHECK: vpaddsw %xmm1, %xmm0, %xmm0
+ %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epi16_rrk_128
+ ;CHECK: vpaddsw %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epi16_rrkz_128
+ ;CHECK: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_adds_epi16_rm_128
+ ;CHECK: vpaddsw (%rdi), %xmm0, %xmm0
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epi16_rmk_128
+ ;CHECK: vpaddsw (%rdi), %xmm0, %xmm1 {%k1}
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epi16_rmkz_128
+ ;CHECK: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z}
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
+ ;CHECK-LABEL: test_mask_adds_epi16_rr_256
+ ;CHECK: vpaddsw %ymm1, %ymm0, %ymm0
+ %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epi16_rrk_256
+ ;CHECK: vpaddsw %ymm1, %ymm0, %ymm2 {%k1}
+ %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epi16_rrkz_256
+ ;CHECK: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z}
+ %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_adds_epi16_rm_256
+ ;CHECK: vpaddsw (%rdi), %ymm0, %ymm0
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epi16_rmk_256
+ ;CHECK: vpaddsw (%rdi), %ymm0, %ymm1 {%k1}
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epi16_rmkz_256
+ ;CHECK: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z}
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
+ ;CHECK-LABEL: test_mask_subs_epi16_rr_128
+ ;CHECK: vpsubsw %xmm1, %xmm0, %xmm0
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epi16_rrk_128
+ ;CHECK: vpsubsw %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epi16_rrkz_128
+ ;CHECK: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_subs_epi16_rm_128
+ ;CHECK: vpsubsw (%rdi), %xmm0, %xmm0
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epi16_rmk_128
+ ;CHECK: vpsubsw (%rdi), %xmm0, %xmm1 {%k1}
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epi16_rmkz_128
+ ;CHECK: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z}
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
+ ;CHECK-LABEL: test_mask_subs_epi16_rr_256
+ ;CHECK: vpsubsw %ymm1, %ymm0, %ymm0
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epi16_rrk_256
+ ;CHECK: vpsubsw %ymm1, %ymm0, %ymm2 {%k1}
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epi16_rrkz_256
+ ;CHECK: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z}
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_subs_epi16_rm_256
+ ;CHECK: vpsubsw (%rdi), %ymm0, %ymm0
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epi16_rmk_256
+ ;CHECK: vpsubsw (%rdi), %ymm0, %ymm1 {%k1}
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epi16_rmkz_256
+ ;CHECK: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z}
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
+ ;CHECK-LABEL: test_mask_adds_epu16_rr_128
+ ;CHECK: vpaddusw %xmm1, %xmm0, %xmm0
+ %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epu16_rrk_128
+ ;CHECK: vpaddusw %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epu16_rrkz_128
+ ;CHECK: vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_adds_epu16_rm_128
+ ;CHECK: vpaddusw (%rdi), %xmm0, %xmm0
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epu16_rmk_128
+ ;CHECK: vpaddusw (%rdi), %xmm0, %xmm1 {%k1}
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epu16_rmkz_128
+ ;CHECK: vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z}
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
+ ;CHECK-LABEL: test_mask_adds_epu16_rr_256
+ ;CHECK: vpaddusw %ymm1, %ymm0, %ymm0
+ %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epu16_rrk_256
+ ;CHECK: vpaddusw %ymm1, %ymm0, %ymm2 {%k1}
+ %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epu16_rrkz_256
+ ;CHECK: vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z}
+ %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_adds_epu16_rm_256
+ ;CHECK: vpaddusw (%rdi), %ymm0, %ymm0
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epu16_rmk_256
+ ;CHECK: vpaddusw (%rdi), %ymm0, %ymm1 {%k1}
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epu16_rmkz_256
+ ;CHECK: vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z}
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
+ ;CHECK-LABEL: test_mask_subs_epu16_rr_128
+ ;CHECK: vpsubusw %xmm1, %xmm0, %xmm0
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epu16_rrk_128
+ ;CHECK: vpsubusw %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epu16_rrkz_128
+ ;CHECK: vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_subs_epu16_rm_128
+ ;CHECK: vpsubusw (%rdi), %xmm0, %xmm0
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epu16_rmk_128
+ ;CHECK: vpsubusw (%rdi), %xmm0, %xmm1 {%k1}
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
+ ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epu16_rmkz_128
+ ;CHECK: vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z}
+ %b = load <8 x i16>, <8 x i16>* %ptr_b
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
+ ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
+ ;CHECK-LABEL: test_mask_subs_epu16_rr_256
+ ;CHECK: vpsubusw %ymm1, %ymm0, %ymm0
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epu16_rrk_256
+ ;CHECK: vpsubusw %ymm1, %ymm0, %ymm2 {%k1}
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epu16_rrkz_256
+ ;CHECK: vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z}
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_subs_epu16_rm_256
+ ;CHECK: vpsubusw (%rdi), %ymm0, %ymm0
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epu16_rmk_256
+ ;CHECK: vpsubusw (%rdi), %ymm0, %ymm1 {%k1}
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
+ ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epu16_rmkz_256
+ ;CHECK: vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z}
+ %b = load <16 x i16>, <16 x i16>* %ptr_b
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
+ ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
+ ;CHECK-LABEL: test_mask_adds_epi8_rr_128
+ ;CHECK: vpaddsb %xmm1, %xmm0, %xmm0
+ %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epi8_rrk_128
+ ;CHECK: vpaddsb %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epi8_rrkz_128
+ ;CHECK: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_adds_epi8_rm_128
+ ;CHECK: vpaddsb (%rdi), %xmm0, %xmm0
+ %b = load <16 x i8>, <16 x i8>* %ptr_b
+ %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epi8_rmk_128
+ ;CHECK: vpaddsb (%rdi), %xmm0, %xmm1 {%k1}
+ %b = load <16 x i8>, <16 x i8>* %ptr_b
+ %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epi8_rmkz_128
+ ;CHECK: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z}
+ %b = load <16 x i8>, <16 x i8>* %ptr_b
+ %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
+ ret <16 x i8> %res
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
+
+define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
+ ;CHECK-LABEL: test_mask_adds_epi8_rr_256
+ ;CHECK: vpaddsb %ymm1, %ymm0, %ymm0
+ %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epi8_rrk_256
+ ;CHECK: vpaddsb %ymm1, %ymm0, %ymm2 {%k1}
+ %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epi8_rrkz_256
+ ;CHECK: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z}
+ %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_adds_epi8_rm_256
+ ;CHECK: vpaddsb (%rdi), %ymm0, %ymm0
+ %b = load <32 x i8>, <32 x i8>* %ptr_b
+ %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epi8_rmk_256
+ ;CHECK: vpaddsb (%rdi), %ymm0, %ymm1 {%k1}
+ %b = load <32 x i8>, <32 x i8>* %ptr_b
+ %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epi8_rmkz_256
+ ;CHECK: vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z}
+ %b = load <32 x i8>, <32 x i8>* %ptr_b
+ %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
+ ret <32 x i8> %res
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
+
+define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
+ ;CHECK-LABEL: test_mask_subs_epi8_rr_128
+ ;CHECK: vpsubsb %xmm1, %xmm0, %xmm0
+ %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epi8_rrk_128
+ ;CHECK: vpsubsb %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epi8_rrkz_128
+ ;CHECK: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_subs_epi8_rm_128
+ ;CHECK: vpsubsb (%rdi), %xmm0, %xmm0
+ %b = load <16 x i8>, <16 x i8>* %ptr_b
+ %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epi8_rmk_128
+ ;CHECK: vpsubsb (%rdi), %xmm0, %xmm1 {%k1}
+ %b = load <16 x i8>, <16 x i8>* %ptr_b
+ %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epi8_rmkz_128
+ ;CHECK: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z}
+ %b = load <16 x i8>, <16 x i8>* %ptr_b
+ %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
+ ret <16 x i8> %res
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
+
+define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
+ ;CHECK-LABEL: test_mask_subs_epi8_rr_256
+ ;CHECK: vpsubsb %ymm1, %ymm0, %ymm0
+ %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epi8_rrk_256
+ ;CHECK: vpsubsb %ymm1, %ymm0, %ymm2 {%k1}
+ %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epi8_rrkz_256
+ ;CHECK: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z}
+ %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_subs_epi8_rm_256
+ ;CHECK: vpsubsb (%rdi), %ymm0, %ymm0
+ %b = load <32 x i8>, <32 x i8>* %ptr_b
+ %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epi8_rmk_256
+ ;CHECK: vpsubsb (%rdi), %ymm0, %ymm1 {%k1}
+ %b = load <32 x i8>, <32 x i8>* %ptr_b
+ %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epi8_rmkz_256
+ ;CHECK: vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z}
+ %b = load <32 x i8>, <32 x i8>* %ptr_b
+ %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
+ ret <32 x i8> %res
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
+
+define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
+ ;CHECK-LABEL: test_mask_adds_epu8_rr_128
+ ;CHECK: vpaddusb %xmm1, %xmm0, %xmm0
+ %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epu8_rrk_128
+ ;CHECK: vpaddusb %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epu8_rrkz_128
+ ;CHECK: vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_adds_epu8_rm_128
+ ;CHECK: vpaddusb (%rdi), %xmm0, %xmm0
+ %b = load <16 x i8>, <16 x i8>* %ptr_b
+ %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epu8_rmk_128
+ ;CHECK: vpaddusb (%rdi), %xmm0, %xmm1 {%k1}
+ %b = load <16 x i8>, <16 x i8>* %ptr_b
+ %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epu8_rmkz_128
+ ;CHECK: vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z}
+ %b = load <16 x i8>, <16 x i8>* %ptr_b
+ %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
+ ret <16 x i8> %res
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
+
+define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
+ ;CHECK-LABEL: test_mask_adds_epu8_rr_256
+ ;CHECK: vpaddusb %ymm1, %ymm0, %ymm0
+ %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epu8_rrk_256
+ ;CHECK: vpaddusb %ymm1, %ymm0, %ymm2 {%k1}
+ %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_adds_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epu8_rrkz_256
+ ;CHECK: vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z}
+ %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_adds_epu8_rm_256
+ ;CHECK: vpaddusb (%rdi), %ymm0, %ymm0
+ %b = load <32 x i8>, <32 x i8>* %ptr_b
+ %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epu8_rmk_256
+ ;CHECK: vpaddusb (%rdi), %ymm0, %ymm1 {%k1}
+ %b = load <32 x i8>, <32 x i8>* %ptr_b
+ %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_adds_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_adds_epu8_rmkz_256
+ ;CHECK: vpaddusb (%rdi), %ymm0, %ymm0 {%k1} {z}
+ %b = load <32 x i8>, <32 x i8>* %ptr_b
+ %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
+ ret <32 x i8> %res
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
+
+define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
+ ;CHECK-LABEL: test_mask_subs_epu8_rr_128
+ ;CHECK: vpsubusb %xmm1, %xmm0, %xmm0
+ %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epu8_rrk_128
+ ;CHECK: vpsubusb %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epu8_rrkz_128
+ ;CHECK: vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_subs_epu8_rm_128
+ ;CHECK: vpsubusb (%rdi), %xmm0, %xmm0
+ %b = load <16 x i8>, <16 x i8>* %ptr_b
+ %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epu8_rmk_128
+ ;CHECK: vpsubusb (%rdi), %xmm0, %xmm1 {%k1}
+ %b = load <16 x i8>, <16 x i8>* %ptr_b
+ %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
+ ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epu8_rmkz_128
+ ;CHECK: vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z}
+ %b = load <16 x i8>, <16 x i8>* %ptr_b
+ %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
+ ret <16 x i8> %res
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
+
+define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
+ ;CHECK-LABEL: test_mask_subs_epu8_rr_256
+ ;CHECK: vpsubusb %ymm1, %ymm0, %ymm0
+ %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epu8_rrk_256
+ ;CHECK: vpsubusb %ymm1, %ymm0, %ymm2 {%k1}
+ %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_subs_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epu8_rrkz_256
+ ;CHECK: vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z}
+ %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_subs_epu8_rm_256
+ ;CHECK: vpsubusb (%rdi), %ymm0, %ymm0
+ %b = load <32 x i8>, <32 x i8>* %ptr_b
+ %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epu8_rmk_256
+ ;CHECK: vpsubusb (%rdi), %ymm0, %ymm1 {%k1}
+ %b = load <32 x i8>, <32 x i8>* %ptr_b
+ %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
+ ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
+ ;CHECK-LABEL: test_mask_subs_epu8_rmkz_256
+ ;CHECK: vpsubusb (%rdi), %ymm0, %ymm0 {%k1} {z}
+ %b = load <32 x i8>, <32 x i8>* %ptr_b
+ %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
+ ret <32 x i8> %res
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) \ No newline at end of file
diff --git a/test/CodeGen/X86/avx512bwvl-mov.ll b/test/CodeGen/X86/avx512bwvl-mov.ll
index 835844fc821c..8a9a4fa5e5e2 100644
--- a/test/CodeGen/X86/avx512bwvl-mov.ll
+++ b/test/CodeGen/X86/avx512bwvl-mov.ll
@@ -5,7 +5,7 @@
; CHECK: ret
define <32 x i8> @test_256_1(i8 * %addr) {
%vaddr = bitcast i8* %addr to <32 x i8>*
- %res = load <32 x i8>* %vaddr, align 1
+ %res = load <32 x i8>, <32 x i8>* %vaddr, align 1
ret <32 x i8>%res
}
@@ -24,7 +24,7 @@ define void @test_256_2(i8 * %addr, <32 x i8> %data) {
define <32 x i8> @test_256_3(i8 * %addr, <32 x i8> %old, <32 x i8> %mask1) {
%mask = icmp ne <32 x i8> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <32 x i8>*
- %r = load <32 x i8>* %vaddr, align 1
+ %r = load <32 x i8>, <32 x i8>* %vaddr, align 1
%res = select <32 x i1> %mask, <32 x i8> %r, <32 x i8> %old
ret <32 x i8>%res
}
@@ -35,7 +35,7 @@ define <32 x i8> @test_256_3(i8 * %addr, <32 x i8> %old, <32 x i8> %mask1) {
define <32 x i8> @test_256_4(i8 * %addr, <32 x i8> %mask1) {
%mask = icmp ne <32 x i8> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <32 x i8>*
- %r = load <32 x i8>* %vaddr, align 1
+ %r = load <32 x i8>, <32 x i8>* %vaddr, align 1
%res = select <32 x i1> %mask, <32 x i8> %r, <32 x i8> zeroinitializer
ret <32 x i8>%res
}
@@ -45,7 +45,7 @@ define <32 x i8> @test_256_4(i8 * %addr, <32 x i8> %mask1) {
; CHECK: ret
define <16 x i16> @test_256_5(i8 * %addr) {
%vaddr = bitcast i8* %addr to <16 x i16>*
- %res = load <16 x i16>* %vaddr, align 1
+ %res = load <16 x i16>, <16 x i16>* %vaddr, align 1
ret <16 x i16>%res
}
@@ -64,7 +64,7 @@ define void @test_256_6(i8 * %addr, <16 x i16> %data) {
define <16 x i16> @test_256_7(i8 * %addr, <16 x i16> %old, <16 x i16> %mask1) {
%mask = icmp ne <16 x i16> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x i16>*
- %r = load <16 x i16>* %vaddr, align 1
+ %r = load <16 x i16>, <16 x i16>* %vaddr, align 1
%res = select <16 x i1> %mask, <16 x i16> %r, <16 x i16> %old
ret <16 x i16>%res
}
@@ -75,7 +75,7 @@ define <16 x i16> @test_256_7(i8 * %addr, <16 x i16> %old, <16 x i16> %mask1) {
define <16 x i16> @test_256_8(i8 * %addr, <16 x i16> %mask1) {
%mask = icmp ne <16 x i16> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x i16>*
- %r = load <16 x i16>* %vaddr, align 1
+ %r = load <16 x i16>, <16 x i16>* %vaddr, align 1
%res = select <16 x i1> %mask, <16 x i16> %r, <16 x i16> zeroinitializer
ret <16 x i16>%res
}
@@ -85,7 +85,7 @@ define <16 x i16> @test_256_8(i8 * %addr, <16 x i16> %mask1) {
; CHECK: ret
define <16 x i8> @test_128_1(i8 * %addr) {
%vaddr = bitcast i8* %addr to <16 x i8>*
- %res = load <16 x i8>* %vaddr, align 1
+ %res = load <16 x i8>, <16 x i8>* %vaddr, align 1
ret <16 x i8>%res
}
@@ -104,7 +104,7 @@ define void @test_128_2(i8 * %addr, <16 x i8> %data) {
define <16 x i8> @test_128_3(i8 * %addr, <16 x i8> %old, <16 x i8> %mask1) {
%mask = icmp ne <16 x i8> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x i8>*
- %r = load <16 x i8>* %vaddr, align 1
+ %r = load <16 x i8>, <16 x i8>* %vaddr, align 1
%res = select <16 x i1> %mask, <16 x i8> %r, <16 x i8> %old
ret <16 x i8>%res
}
@@ -115,7 +115,7 @@ define <16 x i8> @test_128_3(i8 * %addr, <16 x i8> %old, <16 x i8> %mask1) {
define <16 x i8> @test_128_4(i8 * %addr, <16 x i8> %mask1) {
%mask = icmp ne <16 x i8> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <16 x i8>*
- %r = load <16 x i8>* %vaddr, align 1
+ %r = load <16 x i8>, <16 x i8>* %vaddr, align 1
%res = select <16 x i1> %mask, <16 x i8> %r, <16 x i8> zeroinitializer
ret <16 x i8>%res
}
@@ -125,7 +125,7 @@ define <16 x i8> @test_128_4(i8 * %addr, <16 x i8> %mask1) {
; CHECK: ret
define <8 x i16> @test_128_5(i8 * %addr) {
%vaddr = bitcast i8* %addr to <8 x i16>*
- %res = load <8 x i16>* %vaddr, align 1
+ %res = load <8 x i16>, <8 x i16>* %vaddr, align 1
ret <8 x i16>%res
}
@@ -144,7 +144,7 @@ define void @test_128_6(i8 * %addr, <8 x i16> %data) {
define <8 x i16> @test_128_7(i8 * %addr, <8 x i16> %old, <8 x i16> %mask1) {
%mask = icmp ne <8 x i16> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i16>*
- %r = load <8 x i16>* %vaddr, align 1
+ %r = load <8 x i16>, <8 x i16>* %vaddr, align 1
%res = select <8 x i1> %mask, <8 x i16> %r, <8 x i16> %old
ret <8 x i16>%res
}
@@ -155,7 +155,7 @@ define <8 x i16> @test_128_7(i8 * %addr, <8 x i16> %old, <8 x i16> %mask1) {
define <8 x i16> @test_128_8(i8 * %addr, <8 x i16> %mask1) {
%mask = icmp ne <8 x i16> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i16>*
- %r = load <8 x i16>* %vaddr, align 1
+ %r = load <8 x i16>, <8 x i16>* %vaddr, align 1
%res = select <8 x i1> %mask, <8 x i16> %r, <8 x i16> zeroinitializer
ret <8 x i16>%res
}
diff --git a/test/CodeGen/X86/avx512bwvl-vec-cmp.ll b/test/CodeGen/X86/avx512bwvl-vec-cmp.ll
index 2d13a166a725..9bf02fa41d9a 100644
--- a/test/CodeGen/X86/avx512bwvl-vec-cmp.ll
+++ b/test/CodeGen/X86/avx512bwvl-vec-cmp.ll
@@ -45,7 +45,7 @@ define <32 x i8> @test256_4(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1) nounwind
; CHECK: vmovdqu16
; CHECK: ret
define <16 x i16> @test256_5(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %yp) nounwind {
- %y = load <16 x i16>* %yp, align 4
+ %y = load <16 x i16>, <16 x i16>* %yp, align 4
%mask = icmp eq <16 x i16> %x, %y
%max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
ret <16 x i16> %max
@@ -56,7 +56,7 @@ define <16 x i16> @test256_5(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %yp) nou
; CHECK: vmovdqu16
; CHECK: ret
define <16 x i16> @test256_6(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind {
- %y = load <16 x i16>* %y.ptr, align 4
+ %y = load <16 x i16>, <16 x i16>* %y.ptr, align 4
%mask = icmp sgt <16 x i16> %x, %y
%max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
ret <16 x i16> %max
@@ -67,7 +67,7 @@ define <16 x i16> @test256_6(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr)
; CHECK: vmovdqu16
; CHECK: ret
define <16 x i16> @test256_7(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind {
- %y = load <16 x i16>* %y.ptr, align 4
+ %y = load <16 x i16>, <16 x i16>* %y.ptr, align 4
%mask = icmp sle <16 x i16> %x, %y
%max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
ret <16 x i16> %max
@@ -78,7 +78,7 @@ define <16 x i16> @test256_7(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr)
; CHECK: vmovdqu16
; CHECK: ret
define <16 x i16> @test256_8(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind {
- %y = load <16 x i16>* %y.ptr, align 4
+ %y = load <16 x i16>, <16 x i16>* %y.ptr, align 4
%mask = icmp ule <16 x i16> %x, %y
%max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
ret <16 x i16> %max
@@ -114,7 +114,7 @@ define <32 x i8> @test256_10(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1, <32 x i8
; CHECK: ret
define <32 x i8> @test256_11(<32 x i8> %x, <32 x i8>* %y.ptr, <32 x i8> %x1, <32 x i8> %y1) nounwind {
%mask1 = icmp sgt <32 x i8> %x1, %y1
- %y = load <32 x i8>* %y.ptr, align 4
+ %y = load <32 x i8>, <32 x i8>* %y.ptr, align 4
%mask0 = icmp sgt <32 x i8> %x, %y
%mask = select <32 x i1> %mask0, <32 x i1> %mask1, <32 x i1> zeroinitializer
%max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1
@@ -127,7 +127,7 @@ define <32 x i8> @test256_11(<32 x i8> %x, <32 x i8>* %y.ptr, <32 x i8> %x1, <32
; CHECK: ret
define <16 x i16> @test256_12(<16 x i16> %x, <16 x i16>* %y.ptr, <16 x i16> %x1, <16 x i16> %y1) nounwind {
%mask1 = icmp sge <16 x i16> %x1, %y1
- %y = load <16 x i16>* %y.ptr, align 4
+ %y = load <16 x i16>, <16 x i16>* %y.ptr, align 4
%mask0 = icmp ule <16 x i16> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
%max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
@@ -179,7 +179,7 @@ define <16 x i8> @test128_4(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1) nounwind
; CHECK: vmovdqu16
; CHECK: ret
define <8 x i16> @test128_5(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %yp) nounwind {
- %y = load <8 x i16>* %yp, align 4
+ %y = load <8 x i16>, <8 x i16>* %yp, align 4
%mask = icmp eq <8 x i16> %x, %y
%max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
ret <8 x i16> %max
@@ -190,7 +190,7 @@ define <8 x i16> @test128_5(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %yp) nounwin
; CHECK: vmovdqu16
; CHECK: ret
define <8 x i16> @test128_6(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind {
- %y = load <8 x i16>* %y.ptr, align 4
+ %y = load <8 x i16>, <8 x i16>* %y.ptr, align 4
%mask = icmp sgt <8 x i16> %x, %y
%max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
ret <8 x i16> %max
@@ -201,7 +201,7 @@ define <8 x i16> @test128_6(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) noun
; CHECK: vmovdqu16
; CHECK: ret
define <8 x i16> @test128_7(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind {
- %y = load <8 x i16>* %y.ptr, align 4
+ %y = load <8 x i16>, <8 x i16>* %y.ptr, align 4
%mask = icmp sle <8 x i16> %x, %y
%max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
ret <8 x i16> %max
@@ -212,7 +212,7 @@ define <8 x i16> @test128_7(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) noun
; CHECK: vmovdqu16
; CHECK: ret
define <8 x i16> @test128_8(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind {
- %y = load <8 x i16>* %y.ptr, align 4
+ %y = load <8 x i16>, <8 x i16>* %y.ptr, align 4
%mask = icmp ule <8 x i16> %x, %y
%max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
ret <8 x i16> %max
@@ -248,7 +248,7 @@ define <16 x i8> @test128_10(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1, <16 x i8
; CHECK: ret
define <16 x i8> @test128_11(<16 x i8> %x, <16 x i8>* %y.ptr, <16 x i8> %x1, <16 x i8> %y1) nounwind {
%mask1 = icmp sgt <16 x i8> %x1, %y1
- %y = load <16 x i8>* %y.ptr, align 4
+ %y = load <16 x i8>, <16 x i8>* %y.ptr, align 4
%mask0 = icmp sgt <16 x i8> %x, %y
%mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
%max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1
@@ -261,7 +261,7 @@ define <16 x i8> @test128_11(<16 x i8> %x, <16 x i8>* %y.ptr, <16 x i8> %x1, <16
; CHECK: ret
define <8 x i16> @test128_12(<8 x i16> %x, <8 x i16>* %y.ptr, <8 x i16> %x1, <8 x i16> %y1) nounwind {
%mask1 = icmp sge <8 x i16> %x1, %y1
- %y = load <8 x i16>* %y.ptr, align 4
+ %y = load <8 x i16>, <8 x i16>* %y.ptr, align 4
%mask0 = icmp ule <8 x i16> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
%max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
diff --git a/test/CodeGen/X86/avx512dq-mask-op.ll b/test/CodeGen/X86/avx512dq-mask-op.ll
index 32a2633f8d06..b4d11bc0b77b 100644
--- a/test/CodeGen/X86/avx512dq-mask-op.ll
+++ b/test/CodeGen/X86/avx512dq-mask-op.ll
@@ -11,7 +11,7 @@ define i8 @mask8(i8 %x) {
}
define void @mask8_mem(i8* %ptr) {
- %x = load i8* %ptr, align 4
+ %x = load i8, i8* %ptr, align 4
%m0 = bitcast i8 %x to <8 x i1>
%m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
%ret = bitcast <8 x i1> %m1 to i8
diff --git a/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/test/CodeGen/X86/avx512dqvl-intrinsics.ll
new file mode 100644
index 000000000000..c577abee6640
--- /dev/null
+++ b/test/CodeGen/X86/avx512dqvl-intrinsics.ll
@@ -0,0 +1,1155 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq -mattr=+avx512vl --show-mc-encoding| FileCheck %s
+
+define <8 x i64> @test_mask_mullo_epi64_rr_512(<8 x i64> %a, <8 x i64> %b) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rr_512
+ ;CHECK: vpmullq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x40,0xc1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mullo_epi64_rrk_512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rrk_512
+ ;CHECK: vpmullq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x40,0xd1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mullo_epi64_rrkz_512(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rrkz_512
+ ;CHECK: vpmullq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x40,0xc1]
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mullo_epi64_rm_512(<8 x i64> %a, <8 x i64>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rm_512
+ ;CHECK: vpmullq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x40,0x07]
+ %b = load <8 x i64>, <8 x i64>* %ptr_b
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mullo_epi64_rmk_512(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rmk_512
+ ;CHECK: vpmullq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x40,0x0f]
+ %b = load <8 x i64>, <8 x i64>* %ptr_b
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mullo_epi64_rmkz_512(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rmkz_512
+ ;CHECK: vpmullq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x40,0x07]
+ %b = load <8 x i64>, <8 x i64>* %ptr_b
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mullo_epi64_rmb_512(<8 x i64> %a, i64* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rmb_512
+ ;CHECK: vpmullq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x40,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mullo_epi64_rmbk_512(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rmbk_512
+ ;CHECK: vpmullq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x40,0x0f]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_mask_mullo_epi64_rmbkz_512(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rmbkz_512
+ ;CHECK: vpmullq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x40,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
+ ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.mask.pmull.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+define <4 x i64> @test_mask_mullo_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rr_256
+ ;CHECK: vpmullq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x40,0xc1]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_mask_mullo_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rrk_256
+ ;CHECK: vpmullq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x40,0xd1]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_mask_mullo_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rrkz_256
+ ;CHECK: vpmullq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x40,0xc1]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_mask_mullo_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rm_256
+ ;CHECK: vpmullq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x40,0x07]
+ %b = load <4 x i64>, <4 x i64>* %ptr_b
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_mask_mullo_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rmk_256
+ ;CHECK: vpmullq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x40,0x0f]
+ %b = load <4 x i64>, <4 x i64>* %ptr_b
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_mask_mullo_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rmkz_256
+ ;CHECK: vpmullq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x40,0x07]
+ %b = load <4 x i64>, <4 x i64>* %ptr_b
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_mask_mullo_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rmb_256
+ ;CHECK: vpmullq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x38,0x40,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_mask_mullo_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rmbk_256
+ ;CHECK: vpmullq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x39,0x40,0x0f]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_mask_mullo_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rmbkz_256
+ ;CHECK: vpmullq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xb9,0x40,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
+ ret <4 x i64> %res
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.pmull.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
+
+define <2 x i64> @test_mask_mullo_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rr_128
+ ;CHECK: vpmullq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mask_mullo_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rrk_128
+ ;CHECK: vpmullq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x40,0xd1]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mask_mullo_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rrkz_128
+ ;CHECK: vpmullq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x40,0xc1]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mask_mullo_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rm_128
+ ;CHECK: vpmullq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x40,0x07]
+ %b = load <2 x i64>, <2 x i64>* %ptr_b
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mask_mullo_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rmk_128
+ ;CHECK: vpmullq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x40,0x0f]
+ %b = load <2 x i64>, <2 x i64>* %ptr_b
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mask_mullo_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rmkz_128
+ ;CHECK: vpmullq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x40,0x07]
+ %b = load <2 x i64>, <2 x i64>* %ptr_b
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mask_mullo_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rmb_128
+ ;CHECK: vpmullq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x18,0x40,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mask_mullo_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rmbk_128
+ ;CHECK: vpmullq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0x40,0x0f]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mask_mullo_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mullo_epi64_rmbkz_128
+ ;CHECK: vpmullq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x99,0x40,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
+ ret <2 x i64> %res
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.pmull.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
+
+define <4 x float> @test_mask_andnot_ps_rr_128(<4 x float> %a, <4 x float> %b) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rr_128
+ ;CHECK: vandnps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x55,0xc1]
+ %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_andnot_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rrk_128
+ ;CHECK: vandnps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x55,0xd1]
+ %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_andnot_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rrkz_128
+ ;CHECK: vandnps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x55,0xc1]
+ %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_andnot_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rm_128
+ ;CHECK: vandnps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x55,0x07]
+ %b = load <4 x float>, <4 x float>* %ptr_b
+ %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_andnot_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rmk_128
+ ;CHECK: vandnps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x55,0x0f]
+ %b = load <4 x float>, <4 x float>* %ptr_b
+ %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_andnot_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rmkz_128
+ ;CHECK: vandnps (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x55,0x07]
+ %b = load <4 x float>, <4 x float>* %ptr_b
+ %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_andnot_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rmb_128
+ ;CHECK: vandnps (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x55,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+ %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
+ %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_andnot_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rmbk_128
+ ;CHECK: vandnps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x55,0x0f]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+ %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
+ %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_andnot_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rmbkz_128
+ ;CHECK: vandnps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x55,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+ %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
+ %res = call <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.andn.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <8 x float> @test_mask_andnot_ps_rr_256(<8 x float> %a, <8 x float> %b) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rr_256
+ ;CHECK: vandnps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x55,0xc1]
+ %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_andnot_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rrk_256
+ ;CHECK: vandnps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x55,0xd1]
+ %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_andnot_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rrkz_256
+ ;CHECK: vandnps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x55,0xc1]
+ %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_andnot_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rm_256
+ ;CHECK: vandnps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x55,0x07]
+ %b = load <8 x float>, <8 x float>* %ptr_b
+ %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_andnot_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rmk_256
+ ;CHECK: vandnps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x55,0x0f]
+ %b = load <8 x float>, <8 x float>* %ptr_b
+ %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_andnot_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rmkz_256
+ ;CHECK: vandnps (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x55,0x07]
+ %b = load <8 x float>, <8 x float>* %ptr_b
+ %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_andnot_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rmb_256
+ ;CHECK: vandnps (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x55,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
+ %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
+ %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_andnot_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rmbk_256
+ ;CHECK: vandnps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x55,0x0f]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
+ %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
+ %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_andnot_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rmbkz_256
+ ;CHECK: vandnps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x55,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
+ %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
+ %res = call <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.andn.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <16 x float> @test_mask_andnot_ps_rr_512(<16 x float> %a, <16 x float> %b) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rr_512
+ ;CHECK: vandnps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x55,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_andnot_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rrk_512
+ ;CHECK: vandnps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x55,0xd1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_andnot_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rrkz_512
+ ;CHECK: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x55,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_andnot_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rm_512
+ ;CHECK: vandnps (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x55,0x07]
+ %b = load <16 x float>, <16 x float>* %ptr_b
+ %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_andnot_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rmk_512
+ ;CHECK: vandnps (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x55,0x0f]
+ %b = load <16 x float>, <16 x float>* %ptr_b
+ %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_andnot_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rmkz_512
+ ;CHECK: vandnps (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x55,0x07]
+ %b = load <16 x float>, <16 x float>* %ptr_b
+ %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_andnot_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rmb_512
+ ;CHECK: vandnps (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x55,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
+ %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
+ %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_andnot_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rmbk_512
+ ;CHECK: vandnps (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x55,0x0f]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
+ %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
+ %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_andnot_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_ps_rmbkz_512
+ ;CHECK: vandnps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x55,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
+ %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
+ %res = call <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
+ ret <16 x float> %res
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.andn.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
+
+define <4 x float> @test_mask_and_ps_rr_128(<4 x float> %a, <4 x float> %b) {
+ ;CHECK-LABEL: test_mask_and_ps_rr_128
+ ;CHECK: vandps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x54,0xc1]
+ %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_and_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_ps_rrk_128
+ ;CHECK: vandps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x54,0xd1]
+ %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_and_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_ps_rrkz_128
+ ;CHECK: vandps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x54,0xc1]
+ %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_and_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_and_ps_rm_128
+ ;CHECK: vandps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x54,0x07]
+ %b = load <4 x float>, <4 x float>* %ptr_b
+ %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_and_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_ps_rmk_128
+ ;CHECK: vandps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x54,0x0f]
+ %b = load <4 x float>, <4 x float>* %ptr_b
+ %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_and_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_ps_rmkz_128
+ ;CHECK: vandps (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x54,0x07]
+ %b = load <4 x float>, <4 x float>* %ptr_b
+ %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_and_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
+ ;CHECK-LABEL: test_mask_and_ps_rmb_128
+ ;CHECK: vandps (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x54,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+ %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
+ %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_and_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_ps_rmbk_128
+ ;CHECK: vandps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x54,0x0f]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+ %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
+ %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_and_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_ps_rmbkz_128
+ ;CHECK: vandps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x54,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+ %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
+ %res = call <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.and.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <8 x float> @test_mask_and_ps_rr_256(<8 x float> %a, <8 x float> %b) {
+ ;CHECK-LABEL: test_mask_and_ps_rr_256
+ ;CHECK: vandps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x54,0xc1]
+ %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_and_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_ps_rrk_256
+ ;CHECK: vandps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x54,0xd1]
+ %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_and_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_ps_rrkz_256
+ ;CHECK: vandps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x54,0xc1]
+ %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_and_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_and_ps_rm_256
+ ;CHECK: vandps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x54,0x07]
+ %b = load <8 x float>, <8 x float>* %ptr_b
+ %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_and_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_ps_rmk_256
+ ;CHECK: vandps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x54,0x0f]
+ %b = load <8 x float>, <8 x float>* %ptr_b
+ %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_and_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_ps_rmkz_256
+ ;CHECK: vandps (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x54,0x07]
+ %b = load <8 x float>, <8 x float>* %ptr_b
+ %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_and_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
+ ;CHECK-LABEL: test_mask_and_ps_rmb_256
+ ;CHECK: vandps (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x54,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
+ %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
+ %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_and_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_ps_rmbk_256
+ ;CHECK: vandps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x54,0x0f]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
+ %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
+ %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_and_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_ps_rmbkz_256
+ ;CHECK: vandps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x54,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
+ %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
+ %res = call <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.and.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <16 x float> @test_mask_and_ps_rr_512(<16 x float> %a, <16 x float> %b) {
+ ;CHECK-LABEL: test_mask_and_ps_rr_512
+ ;CHECK: vandps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x54,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_and_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_and_ps_rrk_512
+ ;CHECK: vandps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x54,0xd1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_and_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_and_ps_rrkz_512
+ ;CHECK: vandps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x54,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_and_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_and_ps_rm_512
+ ;CHECK: vandps (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x54,0x07]
+ %b = load <16 x float>, <16 x float>* %ptr_b
+ %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_and_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_and_ps_rmk_512
+ ;CHECK: vandps (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x54,0x0f]
+ %b = load <16 x float>, <16 x float>* %ptr_b
+ %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_and_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_and_ps_rmkz_512
+ ;CHECK: vandps (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x54,0x07]
+ %b = load <16 x float>, <16 x float>* %ptr_b
+ %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_and_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
+ ;CHECK-LABEL: test_mask_and_ps_rmb_512
+ ;CHECK: vandps (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x54,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
+ %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
+ %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_and_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_and_ps_rmbk_512
+ ;CHECK: vandps (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x54,0x0f]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
+ %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
+ %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_and_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_and_ps_rmbkz_512
+ ;CHECK: vandps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x54,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
+ %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
+ %res = call <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
+ ret <16 x float> %res
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.and.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
+
+define <4 x float> @test_mask_or_ps_rr_128(<4 x float> %a, <4 x float> %b) {
+ ;CHECK-LABEL: test_mask_or_ps_rr_128
+ ;CHECK: vorps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x56,0xc1]
+ %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_or_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_ps_rrk_128
+ ;CHECK: vorps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x56,0xd1]
+ %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_or_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_ps_rrkz_128
+ ;CHECK: vorps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x56,0xc1]
+ %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_or_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_or_ps_rm_128
+ ;CHECK: vorps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x56,0x07]
+ %b = load <4 x float>, <4 x float>* %ptr_b
+ %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_or_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_ps_rmk_128
+ ;CHECK: vorps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x56,0x0f]
+ %b = load <4 x float>, <4 x float>* %ptr_b
+ %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_or_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_ps_rmkz_128
+ ;CHECK: vorps (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x56,0x07]
+ %b = load <4 x float>, <4 x float>* %ptr_b
+ %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_or_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
+ ;CHECK-LABEL: test_mask_or_ps_rmb_128
+ ;CHECK: vorps (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x56,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+ %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
+ %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_or_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_ps_rmbk_128
+ ;CHECK: vorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x56,0x0f]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+ %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
+ %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_or_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_ps_rmbkz_128
+ ;CHECK: vorps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x56,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+ %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
+ %res = call <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.or.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <8 x float> @test_mask_or_ps_rr_256(<8 x float> %a, <8 x float> %b) {
+ ;CHECK-LABEL: test_mask_or_ps_rr_256
+ ;CHECK: vorps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x56,0xc1]
+ %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_or_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_ps_rrk_256
+ ;CHECK: vorps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x56,0xd1]
+ %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_or_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_ps_rrkz_256
+ ;CHECK: vorps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x56,0xc1]
+ %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_or_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_or_ps_rm_256
+ ;CHECK: vorps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x56,0x07]
+ %b = load <8 x float>, <8 x float>* %ptr_b
+ %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_or_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_ps_rmk_256
+ ;CHECK: vorps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x56,0x0f]
+ %b = load <8 x float>, <8 x float>* %ptr_b
+ %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_or_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_ps_rmkz_256
+ ;CHECK: vorps (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x56,0x07]
+ %b = load <8 x float>, <8 x float>* %ptr_b
+ %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_or_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
+ ;CHECK-LABEL: test_mask_or_ps_rmb_256
+ ;CHECK: vorps (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x56,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
+ %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
+ %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_or_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_ps_rmbk_256
+ ;CHECK: vorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x56,0x0f]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
+ %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
+ %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_or_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_ps_rmbkz_256
+ ;CHECK: vorps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x56,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
+ %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
+ %res = call <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.or.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <16 x float> @test_mask_or_ps_rr_512(<16 x float> %a, <16 x float> %b) {
+ ;CHECK-LABEL: test_mask_or_ps_rr_512
+ ;CHECK: vorps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x56,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_or_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_or_ps_rrk_512
+ ;CHECK: vorps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x56,0xd1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_or_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_or_ps_rrkz_512
+ ;CHECK: vorps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x56,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_or_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_or_ps_rm_512
+ ;CHECK: vorps (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x56,0x07]
+ %b = load <16 x float>, <16 x float>* %ptr_b
+ %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_or_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_or_ps_rmk_512
+ ;CHECK: vorps (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x56,0x0f]
+ %b = load <16 x float>, <16 x float>* %ptr_b
+ %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_or_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_or_ps_rmkz_512
+ ;CHECK: vorps (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x56,0x07]
+ %b = load <16 x float>, <16 x float>* %ptr_b
+ %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_or_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
+ ;CHECK-LABEL: test_mask_or_ps_rmb_512
+ ;CHECK: vorps (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x56,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
+ %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
+ %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_or_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_or_ps_rmbk_512
+ ;CHECK: vorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x56,0x0f]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
+ %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
+ %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_or_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_or_ps_rmbkz_512
+ ;CHECK: vorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x56,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
+ %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
+ %res = call <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
+ ret <16 x float> %res
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.or.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
+
+define <4 x float> @test_mask_xor_ps_rr_128(<4 x float> %a, <4 x float> %b) {
+ ;CHECK-LABEL: test_mask_xor_ps_rr_128
+ ;CHECK: vxorps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x57,0xc1]
+ %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_xor_ps_rrk_128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_ps_rrk_128
+ ;CHECK: vxorps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x57,0xd1]
+ %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_xor_ps_rrkz_128(<4 x float> %a, <4 x float> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_ps_rrkz_128
+ ;CHECK: vxorps %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x57,0xc1]
+ %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_xor_ps_rm_128(<4 x float> %a, <4 x float>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_xor_ps_rm_128
+ ;CHECK: vxorps (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x57,0x07]
+ %b = load <4 x float>, <4 x float>* %ptr_b
+ %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_xor_ps_rmk_128(<4 x float> %a, <4 x float>* %ptr_b, <4 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_ps_rmk_128
+ ;CHECK: vxorps (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x57,0x0f]
+ %b = load <4 x float>, <4 x float>* %ptr_b
+ %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_xor_ps_rmkz_128(<4 x float> %a, <4 x float>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_ps_rmkz_128
+ ;CHECK: vxorps (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x57,0x07]
+ %b = load <4 x float>, <4 x float>* %ptr_b
+ %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_xor_ps_rmb_128(<4 x float> %a, float* %ptr_b) {
+ ;CHECK-LABEL: test_mask_xor_ps_rmb_128
+ ;CHECK: vxorps (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x57,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+ %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
+ %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_xor_ps_rmbk_128(<4 x float> %a, float* %ptr_b, <4 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_ps_rmbk_128
+ ;CHECK: vxorps (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x57,0x0f]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+ %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
+ %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %passThru, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_xor_ps_rmbkz_128(<4 x float> %a, float* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_ps_rmbkz_128
+ ;CHECK: vxorps (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x57,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+ %b = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer
+ %res = call <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.xor.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <8 x float> @test_mask_xor_ps_rr_256(<8 x float> %a, <8 x float> %b) {
+ ;CHECK-LABEL: test_mask_xor_ps_rr_256
+ ;CHECK: vxorps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x57,0xc1]
+ %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_xor_ps_rrk_256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_ps_rrk_256
+ ;CHECK: vxorps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x57,0xd1]
+ %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_xor_ps_rrkz_256(<8 x float> %a, <8 x float> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_ps_rrkz_256
+ ;CHECK: vxorps %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x57,0xc1]
+ %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_xor_ps_rm_256(<8 x float> %a, <8 x float>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_xor_ps_rm_256
+ ;CHECK: vxorps (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x57,0x07]
+ %b = load <8 x float>, <8 x float>* %ptr_b
+ %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_xor_ps_rmk_256(<8 x float> %a, <8 x float>* %ptr_b, <8 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_ps_rmk_256
+ ;CHECK: vxorps (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x57,0x0f]
+ %b = load <8 x float>, <8 x float>* %ptr_b
+ %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_xor_ps_rmkz_256(<8 x float> %a, <8 x float>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_ps_rmkz_256
+ ;CHECK: vxorps (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x57,0x07]
+ %b = load <8 x float>, <8 x float>* %ptr_b
+ %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_xor_ps_rmb_256(<8 x float> %a, float* %ptr_b) {
+ ;CHECK-LABEL: test_mask_xor_ps_rmb_256
+ ;CHECK: vxorps (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x57,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
+ %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
+ %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_xor_ps_rmbk_256(<8 x float> %a, float* %ptr_b, <8 x float> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_ps_rmbk_256
+ ;CHECK: vxorps (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x57,0x0f]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
+ %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
+ %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %passThru, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_xor_ps_rmbkz_256(<8 x float> %a, float* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_ps_rmbkz_256
+ ;CHECK: vxorps (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x57,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
+ %b = shufflevector <8 x float> %vecinit.i, <8 x float> undef, <8 x i32> zeroinitializer
+ %res = call <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.xor.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <16 x float> @test_mask_xor_ps_rr_512(<16 x float> %a, <16 x float> %b) {
+ ;CHECK-LABEL: test_mask_xor_ps_rr_512
+ ;CHECK: vxorps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x57,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_xor_ps_rrk_512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_xor_ps_rrk_512
+ ;CHECK: vxorps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x57,0xd1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_xor_ps_rrkz_512(<16 x float> %a, <16 x float> %b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_xor_ps_rrkz_512
+ ;CHECK: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x57,0xc1]
+ %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_xor_ps_rm_512(<16 x float> %a, <16 x float>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_xor_ps_rm_512
+ ;CHECK: vxorps (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x57,0x07]
+ %b = load <16 x float>, <16 x float>* %ptr_b
+ %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_xor_ps_rmk_512(<16 x float> %a, <16 x float>* %ptr_b, <16 x float> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_xor_ps_rmk_512
+ ;CHECK: vxorps (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x57,0x0f]
+ %b = load <16 x float>, <16 x float>* %ptr_b
+ %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_xor_ps_rmkz_512(<16 x float> %a, <16 x float>* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_xor_ps_rmkz_512
+ ;CHECK: vxorps (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x57,0x07]
+ %b = load <16 x float>, <16 x float>* %ptr_b
+ %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_xor_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
+ ;CHECK-LABEL: test_mask_xor_ps_rmb_512
+ ;CHECK: vxorps (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x57,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
+ %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
+ %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 -1)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_xor_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
+ ;CHECK-LABEL: test_mask_xor_ps_rmbk_512
+ ;CHECK: vxorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x57,0x0f]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
+ %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
+ %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_xor_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
+ ;CHECK-LABEL: test_mask_xor_ps_rmbkz_512
+ ;CHECK: vxorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x57,0x07]
+ %q = load float, float* %ptr_b
+ %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
+ %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
+ %res = call <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> zeroinitializer, i16 %mask)
+ ret <16 x float> %res
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) \ No newline at end of file
diff --git a/test/CodeGen/X86/avx512er-intrinsics.ll b/test/CodeGen/X86/avx512er-intrinsics.ll
index fa4352e64dce..827a56d76ae1 100644
--- a/test/CodeGen/X86/avx512er-intrinsics.ll
+++ b/test/CodeGen/X86/avx512er-intrinsics.ll
@@ -1,14 +1,14 @@
; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=knl --show-mc-encoding| FileCheck %s
define <16 x float> @test_rsqrt28_ps(<16 x float> %a0) {
- ; CHECK: vrsqrt28ps %zmm0, %zmm0 {sae} # encoding: [0x62,0xf2,0x7d,0x18,0xcc,0xc0]
+ ; CHECK: vrsqrt28ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcc,0xc0]
%res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
ret <16 x float> %res
}
define <16 x float> @test1_rsqrt28_ps(<16 x float> %a0, <16 x float> %a1) {
; CHECK: kmovw
- ; CHECK: vrsqrt28ps %zmm0, %zmm1 {%k1}{sae} # encoding: [0x62,0xf2,0x7d,0x19,0xcc,0xc8]
+ ; CHECK: vrsqrt28ps {sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcc,0xc8]
%res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> %a1, i16 6, i32 8)
ret <16 x float> %res
}
@@ -27,7 +27,7 @@ define <16 x float> @test3_rsqrt28_ps(<16 x float> %a0) {
}
define <16 x float> @test4_rsqrt28_ps(<16 x float> %a0) {
- ; CHECK: vrsqrt28ps %zmm0, %zmm0 {%k1} {z}{sae} # encoding: [0x62,0xf2,0x7d,0x99,0xcc,0xc0]
+ ; CHECK: vrsqrt28ps {sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcc,0xc0]
%res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> undef, i16 6, i32 8)
ret <16 x float> %res
}
@@ -36,61 +36,61 @@ define <16 x float> @test4_rsqrt28_ps(<16 x float> %a0) {
declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
define <16 x float> @test_rcp28_ps_512(<16 x float> %a0) {
- ; CHECK: vrcp28ps %zmm0, %zmm0 {sae} # encoding: [0x62,0xf2,0x7d,0x18,0xca,0xc0]
+ ; CHECK: vrcp28ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xca,0xc0]
%res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
define <8 x double> @test_rcp28_pd_512(<8 x double> %a0) {
- ; CHECK: vrcp28pd %zmm0, %zmm0 {sae} # encoding: [0x62,0xf2,0xfd,0x18,0xca,0xc0]
+ ; CHECK: vrcp28pd {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x18,0xca,0xc0]
%res = call <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone
define <16 x float> @test_exp2_ps_512(<16 x float> %a0) {
- ; CHECK: vexp2ps %zmm0, %zmm0 {sae} # encoding: [0x62,0xf2,0x7d,0x18,0xc8,0xc0]
+ ; CHECK: vexp2ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xc8,0xc0]
%res = call <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
ret <16 x float> %res
}
declare <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
define <8 x double> @test_exp2_pd_512(<8 x double> %a0) {
- ; CHECK: vexp2pd %zmm0, %zmm0 {sae} # encoding: [0x62,0xf2,0xfd,0x18,0xc8,0xc0]
+ ; CHECK: vexp2pd {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x18,0xc8,0xc0]
%res = call <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone
define <4 x float> @test_rsqrt28_ss(<4 x float> %a0) {
- ; CHECK: vrsqrt28ss %xmm0, %xmm0, %xmm0 {sae} # encoding: [0x62,0xf2,0x7d,0x18,0xcd,0xc0]
+ ; CHECK: vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcd,0xc0]
%res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
define <4 x float> @test_rcp28_ss(<4 x float> %a0) {
- ; CHECK: vrcp28ss %xmm0, %xmm0, %xmm0 {sae} # encoding: [0x62,0xf2,0x7d,0x18,0xcb,0xc0]
+ ; CHECK: vrcp28ss {sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcb,0xc0]
%res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
define <4 x float> @test_rsqrt28_ss_maskz(<4 x float> %a0) {
- ; CHECK: vrsqrt28ss %xmm0, %xmm0, %xmm0 {%k1} {z}{sae} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0]
+ ; CHECK: vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0]
%res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 7, i32 8) ;
ret <4 x float> %res
}
define <4 x float> @test_rsqrt28_ss_mask(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0) {
- ; CHECK: vrsqrt28ss %xmm1, %xmm0, %xmm2 {%k1}{sae} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1]
+ ; CHECK: vrsqrt28ss {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1]
%res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0, i8 7, i32 8) ;
ret <4 x float> %res
}
define <2 x double> @test_rsqrt28_sd_maskz(<2 x double> %a0) {
- ; CHECK: vrsqrt28sd %xmm0, %xmm0, %xmm0 {%k1} {z}{sae} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0]
+ ; CHECK: vrsqrt28sd {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0]
%res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 7, i32 8) ;
ret <2 x double> %res
}
@@ -99,7 +99,7 @@ declare <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double>, <2 x double>, <2
define <2 x double> @test_rsqrt28_sd_maskz_mem(<2 x double> %a0, double* %ptr ) {
; CHECK: vrsqrt28sd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x07]
- %mem = load double * %ptr, align 8
+ %mem = load double , double * %ptr, align 8
%mem_v = insertelement <2 x double> undef, double %mem, i32 0
%res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 7, i32 4) ;
ret <2 x double> %res
@@ -107,8 +107,8 @@ define <2 x double> @test_rsqrt28_sd_maskz_mem(<2 x double> %a0, double* %ptr )
define <2 x double> @test_rsqrt28_sd_maskz_mem_offset(<2 x double> %a0, double* %ptr ) {
; CHECK: vrsqrt28sd 144(%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x47,0x12]
- %ptr1 = getelementptr double* %ptr, i32 18
- %mem = load double * %ptr1, align 8
+ %ptr1 = getelementptr double, double* %ptr, i32 18
+ %mem = load double , double * %ptr1, align 8
%mem_v = insertelement <2 x double> undef, double %mem, i32 0
%res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 7, i32 4) ;
ret <2 x double> %res
diff --git a/test/CodeGen/X86/avx512vl-arith.ll b/test/CodeGen/X86/avx512vl-arith.ll
index 1f7da7814cc9..ef01d8656dac 100644
--- a/test/CodeGen/X86/avx512vl-arith.ll
+++ b/test/CodeGen/X86/avx512vl-arith.ll
@@ -14,7 +14,7 @@ define <4 x i64> @vpaddq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
; CHECK: vpaddq (%rdi), %ymm{{.*}}
; CHECK: ret
define <4 x i64> @vpaddq256_fold_test(<4 x i64> %i, <4 x i64>* %j) nounwind {
- %tmp = load <4 x i64>* %j, align 4
+ %tmp = load <4 x i64>, <4 x i64>* %j, align 4
%x = add <4 x i64> %i, %tmp
ret <4 x i64> %x
}
@@ -31,7 +31,7 @@ define <4 x i64> @vpaddq256_broadcast_test(<4 x i64> %i) nounwind {
; CHECK: vpaddq (%rdi){1to4}, %ymm{{.*}}
; CHECK: ret
define <4 x i64> @vpaddq256_broadcast2_test(<4 x i64> %i, i64* %j.ptr) nounwind {
- %j = load i64* %j.ptr
+ %j = load i64, i64* %j.ptr
%j.0 = insertelement <4 x i64> undef, i64 %j, i32 0
%j.v = shufflevector <4 x i64> %j.0, <4 x i64> undef, <4 x i32> zeroinitializer
%x = add <4 x i64> %i, %j.v
@@ -50,7 +50,7 @@ define <8 x i32> @vpaddd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
; CHECK: vpaddd (%rdi), %ymm{{.*}}
; CHECK: ret
define <8 x i32> @vpaddd256_fold_test(<8 x i32> %i, <8 x i32>* %j) nounwind {
- %tmp = load <8 x i32>* %j, align 4
+ %tmp = load <8 x i32>, <8 x i32>* %j, align 4
%x = add <8 x i32> %i, %tmp
ret <8 x i32> %x
}
@@ -88,7 +88,7 @@ define <8 x i32> @vpaddd256_maskz_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %ma
; CHECK: ret
define <8 x i32> @vpaddd256_mask_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone {
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
- %j = load <8 x i32>* %j.ptr
+ %j = load <8 x i32>, <8 x i32>* %j.ptr
%x = add <8 x i32> %i, %j
%r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i
ret <8 x i32> %r
@@ -109,7 +109,7 @@ define <8 x i32> @vpaddd256_mask_broadcast_test(<8 x i32> %i, <8 x i32> %mask1)
; CHECK: ret
define <8 x i32> @vpaddd256_maskz_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone {
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
- %j = load <8 x i32>* %j.ptr
+ %j = load <8 x i32>, <8 x i32>* %j.ptr
%x = add <8 x i32> %i, %j
%r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
ret <8 x i32> %r
@@ -341,7 +341,7 @@ define <4 x double> @test_mask_fold_vaddpd_256(<4 x double> %dst, <4 x double> %
<4 x double>* %j, <4 x i64> %mask1)
nounwind {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
- %tmp = load <4 x double>* %j
+ %tmp = load <4 x double>, <4 x double>* %j
%x = fadd <4 x double> %i, %tmp
%r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
ret <4 x double> %r
@@ -353,7 +353,7 @@ define <4 x double> @test_mask_fold_vaddpd_256(<4 x double> %dst, <4 x double> %
define <4 x double> @test_maskz_fold_vaddpd_256(<4 x double> %i, <4 x double>* %j,
<4 x i64> %mask1) nounwind {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
- %tmp = load <4 x double>* %j
+ %tmp = load <4 x double>, <4 x double>* %j
%x = fadd <4 x double> %i, %tmp
%r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
ret <4 x double> %r
@@ -363,7 +363,7 @@ define <4 x double> @test_maskz_fold_vaddpd_256(<4 x double> %i, <4 x double>* %
; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*}}
; CHECK: ret
define <4 x double> @test_broadcast2_vaddpd_256(<4 x double> %i, double* %j) nounwind {
- %tmp = load double* %j
+ %tmp = load double, double* %j
%b = insertelement <4 x double> undef, double %tmp, i32 0
%c = shufflevector <4 x double> %b, <4 x double> undef,
<4 x i32> zeroinitializer
@@ -377,7 +377,7 @@ define <4 x double> @test_broadcast2_vaddpd_256(<4 x double> %i, double* %j) nou
define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i,
double* %j, <4 x i64> %mask1) nounwind {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
- %tmp = load double* %j
+ %tmp = load double, double* %j
%b = insertelement <4 x double> undef, double %tmp, i32 0
%c = shufflevector <4 x double> %b, <4 x double> undef,
<4 x i32> zeroinitializer
@@ -392,7 +392,7 @@ define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x doub
define <4 x double> @test_maskz_broadcast_vaddpd_256(<4 x double> %i, double* %j,
<4 x i64> %mask1) nounwind {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
- %tmp = load double* %j
+ %tmp = load double, double* %j
%b = insertelement <4 x double> undef, double %tmp, i32 0
%c = shufflevector <4 x double> %b, <4 x double> undef,
<4 x i32> zeroinitializer
@@ -415,7 +415,7 @@ define <2 x i64> @vpaddq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
; CHECK: vpaddq (%rdi), %xmm{{.*}}
; CHECK: ret
define <2 x i64> @vpaddq128_fold_test(<2 x i64> %i, <2 x i64>* %j) nounwind {
- %tmp = load <2 x i64>* %j, align 4
+ %tmp = load <2 x i64>, <2 x i64>* %j, align 4
%x = add <2 x i64> %i, %tmp
ret <2 x i64> %x
}
@@ -424,7 +424,7 @@ define <2 x i64> @vpaddq128_fold_test(<2 x i64> %i, <2 x i64>* %j) nounwind {
; CHECK: vpaddq (%rdi){1to2}, %xmm{{.*}}
; CHECK: ret
define <2 x i64> @vpaddq128_broadcast2_test(<2 x i64> %i, i64* %j) nounwind {
- %tmp = load i64* %j
+ %tmp = load i64, i64* %j
%j.0 = insertelement <2 x i64> undef, i64 %tmp, i32 0
%j.1 = insertelement <2 x i64> %j.0, i64 %tmp, i32 1
%x = add <2 x i64> %i, %j.1
@@ -443,7 +443,7 @@ define <4 x i32> @vpaddd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
; CHECK: vpaddd (%rdi), %xmm{{.*}}
; CHECK: ret
define <4 x i32> @vpaddd128_fold_test(<4 x i32> %i, <4 x i32>* %j) nounwind {
- %tmp = load <4 x i32>* %j, align 4
+ %tmp = load <4 x i32>, <4 x i32>* %j, align 4
%x = add <4 x i32> %i, %tmp
ret <4 x i32> %x
}
@@ -481,7 +481,7 @@ define <4 x i32> @vpaddd128_maskz_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %ma
; CHECK: ret
define <4 x i32> @vpaddd128_mask_fold_test(<4 x i32> %i, <4 x i32>* %j.ptr, <4 x i32> %mask1) nounwind readnone {
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
- %j = load <4 x i32>* %j.ptr
+ %j = load <4 x i32>, <4 x i32>* %j.ptr
%x = add <4 x i32> %i, %j
%r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i
ret <4 x i32> %r
@@ -502,7 +502,7 @@ define <4 x i32> @vpaddd128_mask_broadcast_test(<4 x i32> %i, <4 x i32> %mask1)
; CHECK: ret
define <4 x i32> @vpaddd128_maskz_fold_test(<4 x i32> %i, <4 x i32>* %j.ptr, <4 x i32> %mask1) nounwind readnone {
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
- %j = load <4 x i32>* %j.ptr
+ %j = load <4 x i32>, <4 x i32>* %j.ptr
%x = add <4 x i32> %i, %j
%r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
ret <4 x i32> %r
@@ -735,7 +735,7 @@ define <2 x double> @test_mask_fold_vaddpd_128(<2 x double> %dst, <2 x double> %
<2 x double>* %j, <2 x i64> %mask1)
nounwind {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
- %tmp = load <2 x double>* %j
+ %tmp = load <2 x double>, <2 x double>* %j
%x = fadd <2 x double> %i, %tmp
%r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
ret <2 x double> %r
@@ -747,7 +747,7 @@ define <2 x double> @test_mask_fold_vaddpd_128(<2 x double> %dst, <2 x double> %
define <2 x double> @test_maskz_fold_vaddpd_128(<2 x double> %i, <2 x double>* %j,
<2 x i64> %mask1) nounwind {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
- %tmp = load <2 x double>* %j
+ %tmp = load <2 x double>, <2 x double>* %j
%x = fadd <2 x double> %i, %tmp
%r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
ret <2 x double> %r
@@ -757,7 +757,7 @@ define <2 x double> @test_maskz_fold_vaddpd_128(<2 x double> %i, <2 x double>* %
; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*}}
; CHECK: ret
define <2 x double> @test_broadcast2_vaddpd_128(<2 x double> %i, double* %j) nounwind {
- %tmp = load double* %j
+ %tmp = load double, double* %j
%j.0 = insertelement <2 x double> undef, double %tmp, i64 0
%j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
%x = fadd <2 x double> %j.1, %i
@@ -771,7 +771,7 @@ define <2 x double> @test_mask_broadcast_vaddpd_128(<2 x double> %dst, <2 x doub
double* %j, <2 x i64> %mask1)
nounwind {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
- %tmp = load double* %j
+ %tmp = load double, double* %j
%j.0 = insertelement <2 x double> undef, double %tmp, i64 0
%j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
%x = fadd <2 x double> %j.1, %i
@@ -785,7 +785,7 @@ define <2 x double> @test_mask_broadcast_vaddpd_128(<2 x double> %dst, <2 x doub
define <2 x double> @test_maskz_broadcast_vaddpd_128(<2 x double> %i, double* %j,
<2 x i64> %mask1) nounwind {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
- %tmp = load double* %j
+ %tmp = load double, double* %j
%j.0 = insertelement <2 x double> undef, double %tmp, i64 0
%j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
%x = fadd <2 x double> %j.1, %i
diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll
index d349f4f53786..fd76ed5d0dbd 100644
--- a/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -805,7 +805,7 @@ define <4 x double> @test_x86_mask_blend_pd_256(i8 %a0, <4 x double> %a1, <4 x d
define <4 x double> @test_x86_mask_blend_pd_256_memop(<4 x double> %a, <4 x double>* %ptr, i8 %mask) {
; CHECK-LABEL: test_x86_mask_blend_pd_256_memop
; CHECK: vblendmpd (%
- %b = load <4 x double>* %ptr
+ %b = load <4 x double>, <4 x double>* %ptr
%res = call <4 x double> @llvm.x86.avx512.mask.blend.pd.256(<4 x double> %a, <4 x double> %b, i8 %mask) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
}
@@ -843,7 +843,7 @@ define <2 x double> @test_x86_mask_blend_pd_128(i8 %a0, <2 x double> %a1, <2 x d
define <2 x double> @test_x86_mask_blend_pd_128_memop(<2 x double> %a, <2 x double>* %ptr, i8 %mask) {
; CHECK-LABEL: test_x86_mask_blend_pd_128_memop
; CHECK: vblendmpd (%
- %b = load <2 x double>* %ptr
+ %b = load <2 x double>, <2 x double>* %ptr
%res = call <2 x double> @llvm.x86.avx512.mask.blend.pd.128(<2 x double> %a, <2 x double> %b, i8 %mask) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
}
@@ -862,3 +862,1694 @@ define <2 x i64> @test_x86_mask_blend_q_128(i8 %a0, <2 x i64> %a1, <2 x i64> %a2
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.avx512.mask.blend.q.128(<2 x i64>, <2 x i64>, i8) nounwind readonly
+
+
+define < 2 x i64> @test_mask_mul_epi32_rr_128(< 4 x i32> %a, < 4 x i32> %b) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rr_128
+ ;CHECK: vpmuldq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0xc1]
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epi32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rrk_128
+ ;CHECK: vpmuldq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1]
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epi32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rrkz_128
+ ;CHECK: vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1]
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epi32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rm_128
+ ;CHECK: vpmuldq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x28,0x07]
+ %b = load < 4 x i32>, < 4 x i32>* %ptr_b
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epi32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmk_128
+ ;CHECK: vpmuldq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x28,0x0f]
+ %b = load < 4 x i32>, < 4 x i32>* %ptr_b
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epi32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmkz_128
+ ;CHECK: vpmuldq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x28,0x07]
+ %b = load < 4 x i32>, < 4 x i32>* %ptr_b
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epi32_rmb_128(< 4 x i32> %a, i64* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmb_128
+ ;CHECK: vpmuldq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x18,0x28,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
+ %b = bitcast < 2 x i64> %b64 to < 4 x i32>
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epi32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmbk_128
+ ;CHECK: vpmuldq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0x28,0x0f]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
+ %b = bitcast < 2 x i64> %b64 to < 4 x i32>
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epi32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmbkz_128
+ ;CHECK: vpmuldq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x99,0x28,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer
+ %b = bitcast < 2 x i64> %b64 to < 4 x i32>
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+declare < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8)
+
+define < 4 x i64> @test_mask_mul_epi32_rr_256(< 8 x i32> %a, < 8 x i32> %b) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rr_256
+ ;CHECK: vpmuldq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x28,0xc1]
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epi32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rrk_256
+ ;CHECK: vpmuldq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1]
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epi32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rrkz_256
+ ;CHECK: vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1]
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epi32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rm_256
+ ;CHECK: vpmuldq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x28,0x07]
+ %b = load < 8 x i32>, < 8 x i32>* %ptr_b
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epi32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmk_256
+ ;CHECK: vpmuldq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x28,0x0f]
+ %b = load < 8 x i32>, < 8 x i32>* %ptr_b
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epi32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmkz_256
+ ;CHECK: vpmuldq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x28,0x07]
+ %b = load < 8 x i32>, < 8 x i32>* %ptr_b
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epi32_rmb_256(< 8 x i32> %a, i64* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmb_256
+ ;CHECK: vpmuldq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x38,0x28,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
+ %b = bitcast < 4 x i64> %b64 to < 8 x i32>
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epi32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmbk_256
+ ;CHECK: vpmuldq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x39,0x28,0x0f]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
+ %b = bitcast < 4 x i64> %b64 to < 8 x i32>
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epi32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epi32_rmbkz_256
+ ;CHECK: vpmuldq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xb9,0x28,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
+ %b = bitcast < 4 x i64> %b64 to < 8 x i32>
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+declare < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8)
+
+define < 2 x i64> @test_mask_mul_epu32_rr_128(< 4 x i32> %a, < 4 x i32> %b) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rr_128
+ ;CHECK: vpmuludq %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf4,0xc1]
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epu32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rrk_128
+ ;CHECK: vpmuludq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1]
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epu32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rrkz_128
+ ;CHECK: vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1]
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epu32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rm_128
+ ;CHECK: vpmuludq (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf4,0x07]
+ %b = load < 4 x i32>, < 4 x i32>* %ptr_b
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epu32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmk_128
+ ;CHECK: vpmuludq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x0f]
+ %b = load < 4 x i32>, < 4 x i32>* %ptr_b
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epu32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmkz_128
+ ;CHECK: vpmuludq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xf4,0x07]
+ %b = load < 4 x i32>, < 4 x i32>* %ptr_b
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epu32_rmb_128(< 4 x i32> %a, i64* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmb_128
+ ;CHECK: vpmuludq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0xf4,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
+ %b = bitcast < 2 x i64> %b64 to < 4 x i32>
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epu32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmbk_128
+ ;CHECK: vpmuludq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0xf4,0x0f]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
+ %b = bitcast < 2 x i64> %b64 to < 4 x i32>
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+define < 2 x i64> @test_mask_mul_epu32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmbkz_128
+ ;CHECK: vpmuludq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0xf4,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer
+ %b = bitcast < 2 x i64> %b64 to < 4 x i32>
+ %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
+ ret < 2 x i64> %res
+}
+
+declare < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8)
+
+define < 4 x i64> @test_mask_mul_epu32_rr_256(< 8 x i32> %a, < 8 x i32> %b) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rr_256
+ ;CHECK: vpmuludq %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf4,0xc1]
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epu32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rrk_256
+ ;CHECK: vpmuludq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1]
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epu32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rrkz_256
+ ;CHECK: vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1]
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epu32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rm_256
+ ;CHECK: vpmuludq (%rdi), %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf4,0x07]
+ %b = load < 8 x i32>, < 8 x i32>* %ptr_b
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epu32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmk_256
+ ;CHECK: vpmuludq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x0f]
+ %b = load < 8 x i32>, < 8 x i32>* %ptr_b
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epu32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmkz_256
+ ;CHECK: vpmuludq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0x07]
+ %b = load < 8 x i32>, < 8 x i32>* %ptr_b
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epu32_rmb_256(< 8 x i32> %a, i64* %ptr_b) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmb_256
+ ;CHECK: vpmuludq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xf4,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
+ %b = bitcast < 4 x i64> %b64 to < 8 x i32>
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epu32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmbk_256
+ ;CHECK: vpmuludq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0xf4,0x0f]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
+ %b = bitcast < 4 x i64> %b64 to < 8 x i32>
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+define < 4 x i64> @test_mask_mul_epu32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_mul_epu32_rmbkz_256
+ ;CHECK: vpmuludq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0xf4,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
+ %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
+ %b = bitcast < 4 x i64> %b64 to < 8 x i32>
+ %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
+ ret < 4 x i64> %res
+}
+
+declare < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8)
+
+define <4 x i32> @test_mask_add_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
+ ;CHECK-LABEL: test_mask_add_epi32_rr_128
+ ;CHECK: vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_add_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rrk_128
+ ;CHECK: vpaddd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_add_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rrkz_128
+ ;CHECK: vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_add_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_add_epi32_rm_128
+ ;CHECK: vpaddd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_add_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmk_128
+ ;CHECK: vpaddd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x0f]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_add_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmkz_128
+ ;CHECK: vpaddd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_add_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmb_128
+ ;CHECK: vpaddd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_add_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmbk_128
+ ;CHECK: vpaddd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_add_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmbkz_128
+ ;CHECK: vpaddd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <4 x i32> @test_mask_sub_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rr_128
+ ;CHECK: vpsubd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_sub_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rrk_128
+ ;CHECK: vpsubd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_sub_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rrkz_128
+ ;CHECK: vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_sub_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rm_128
+ ;CHECK: (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfa,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_sub_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmk_128
+ ;CHECK: vpsubd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x0f]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_sub_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmkz_128
+ ;CHECK: vpsubd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_sub_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmb_128
+ ;CHECK: vpsubd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_sub_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmbk_128
+ ;CHECK: vpsubd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_sub_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmbkz_128
+ ;CHECK: vpsubd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <8 x i32> @test_mask_sub_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rr_256
+ ;CHECK: vpsubd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_sub_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rrk_256
+ ;CHECK: vpsubd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_sub_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rrkz_256
+ ;CHECK: vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_sub_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rm_256
+ ;CHECK: vpsubd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfa,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_sub_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmk_256
+ ;CHECK: vpsubd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x0f]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_sub_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmkz_256
+ ;CHECK: vpsubd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_sub_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmb_256
+ ;CHECK: vpsubd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_sub_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmbk_256
+ ;CHECK: vpsubd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_sub_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_sub_epi32_rmbkz_256
+ ;CHECK: vpsubd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <8 x i32> @test_mask_add_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
+ ;CHECK-LABEL: test_mask_add_epi32_rr_256
+ ;CHECK: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_add_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rrk_256
+ ;CHECK: vpaddd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_add_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rrkz_256
+ ;CHECK: vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_add_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_add_epi32_rm_256
+ ;CHECK: vpaddd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_add_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmk_256
+ ;CHECK: vpaddd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x0f]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_add_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmkz_256
+ ;CHECK: vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_add_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmb_256
+ ;CHECK: vpaddd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_add_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmbk_256
+ ;CHECK: vpaddd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_add_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_add_epi32_rmbkz_256
+ ;CHECK: vpaddd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <4 x i32> @test_mask_and_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
+ ;CHECK-LABEL: test_mask_and_epi32_rr_128
+ ;CHECK: vpandd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_and_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rrk_128
+ ;CHECK: vpandd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_and_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rrkz_128
+ ;CHECK: vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_and_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_and_epi32_rm_128
+ ;CHECK: vpandd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_and_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rmk_128
+ ;CHECK: vpandd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x0f]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_and_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rmkz_128
+ ;CHECK: vpandd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_and_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_and_epi32_rmb_128
+ ;CHECK: vpandd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_and_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rmbk_128
+ ;CHECK: vpandd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_and_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rmbkz_128
+ ;CHECK: vpandd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <8 x i32> @test_mask_and_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
+ ;CHECK-LABEL: test_mask_and_epi32_rr_256
+ ;CHECK: vpandd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_and_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rrk_256
+ ;CHECK: vpandd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_and_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rrkz_256
+ ;CHECK: vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_and_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_and_epi32_rm_256
+ ;CHECK: vpandd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_and_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rmk_256
+ ;CHECK: vpandd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x0f]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_and_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rmkz_256
+ ;CHECK: vpandd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_and_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_and_epi32_rmb_256
+ ;CHECK: vpandd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_and_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rmbk_256
+ ;CHECK: vpandd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_and_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_and_epi32_rmbkz_256
+ ;CHECK: vpandd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <4 x i32> @test_mask_or_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
+ ;CHECK-LABEL: test_mask_or_epi32_rr_128
+ ;CHECK: vpord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_or_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rrk_128
+ ;CHECK: vpord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_or_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rrkz_128
+ ;CHECK: vpord %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_or_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_or_epi32_rm_128
+ ;CHECK: vpord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_or_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rmk_128
+ ;CHECK: vpord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x0f]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_or_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rmkz_128
+ ;CHECK: vpord (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_or_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_or_epi32_rmb_128
+ ;CHECK: vpord (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_or_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rmbk_128
+ ;CHECK: vpord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_or_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rmbkz_128
+ ;CHECK: vpord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <8 x i32> @test_mask_or_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
+ ;CHECK-LABEL: test_mask_or_epi32_rr_256
+ ;CHECK: vpord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_or_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rrk_256
+ ;CHECK: vpord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_or_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rrkz_256
+ ;CHECK: vpord %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_or_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_or_epi32_rm_256
+ ;CHECK: vpord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_or_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rmk_256
+ ;CHECK: vpord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x0f]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_or_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rmkz_256
+ ;CHECK: vpord (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_or_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_or_epi32_rmb_256
+ ;CHECK: vpord (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_or_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rmbk_256
+ ;CHECK: vpord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_or_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_or_epi32_rmbkz_256
+ ;CHECK: vpord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <4 x i32> @test_mask_xor_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rr_128
+ ;CHECK: vpxord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_xor_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rrk_128
+ ;CHECK: vpxord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_xor_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rrkz_128
+ ;CHECK: vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_xor_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rm_128
+ ;CHECK: vpxord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_xor_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rmk_128
+ ;CHECK: vpxord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0x0f]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_xor_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rmkz_128
+ ;CHECK: vpxord (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xef,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_xor_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rmb_128
+ ;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xef,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_xor_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rmbk_128
+ ;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xef,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_xor_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rmbkz_128
+ ;CHECK: vpxord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <8 x i32> @test_mask_xor_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rr_256
+ ;CHECK: vpxord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_xor_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rrk_256
+ ;CHECK: vpxord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_xor_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rrkz_256
+ ;CHECK: vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_xor_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rm_256
+ ;CHECK: vpxord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_xor_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rmk_256
+ ;CHECK: vpxord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0x0f]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_xor_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rmkz_256
+ ;CHECK: vpxord (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_xor_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rmb_256
+ ;CHECK: vpxord (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xef,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_xor_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rmbk_256
+ ;CHECK: vpxord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xef,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_xor_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_xor_epi32_rmbkz_256
+ ;CHECK: vpxord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <4 x i32> @test_mask_andnot_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
+ ;CHECK-LABEL: test_mask_andnot_epi32_rr_128
+ ;CHECK: vpandnd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdf,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_andnot_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi32_rrk_128
+ ;CHECK: vpandnd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_andnot_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi32_rrkz_128
+ ;CHECK: vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdf,0xc1]
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_andnot_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_andnot_epi32_rm_128
+ ;CHECK: vpandnd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdf,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_andnot_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi32_rmk_128
+ ;CHECK: vpandnd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x0f]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_andnot_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi32_rmkz_128
+ ;CHECK: vpandnd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdf,0x07]
+ %b = load <4 x i32>, <4 x i32>* %ptr_b
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_andnot_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_andnot_epi32_rmb_128
+ ;CHECK: vpandnd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xdf,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_andnot_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi32_rmbk_128
+ ;CHECK: vpandnd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_andnot_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi32_rmbkz_128
+ ;CHECK: vpandnd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xdf,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
+ ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <8 x i32> @test_mask_andnot_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
+ ;CHECK-LABEL: test_mask_andnot_epi32_rr_256
+ ;CHECK: vpandnd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdf,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_andnot_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi32_rrk_256
+ ;CHECK: vpandnd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_andnot_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi32_rrkz_256
+ ;CHECK: vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0xc1]
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_andnot_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_andnot_epi32_rm_256
+ ;CHECK: vpandnd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdf,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_andnot_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi32_rmk_256
+ ;CHECK: vpandnd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x0f]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_andnot_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi32_rmkz_256
+ ;CHECK: vpandnd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0x07]
+ %b = load <8 x i32>, <8 x i32>* %ptr_b
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_andnot_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
+ ;CHECK-LABEL: test_mask_andnot_epi32_rmb_256
+ ;CHECK: vpandnd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xdf,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_andnot_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi32_rmbk_256
+ ;CHECK: vpandnd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x0f]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_andnot_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi32_rmbkz_256
+ ;CHECK: vpandnd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xdf,0x07]
+ %q = load i32, i32* %ptr_b
+ %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
+ %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
+ ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <2 x i64> @test_mask_andnot_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) {
+ ;CHECK-LABEL: test_mask_andnot_epi64_rr_128
+ ;CHECK: vpandnq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0xc1]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mask_andnot_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi64_rrk_128
+ ;CHECK: vpandnq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mask_andnot_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi64_rrkz_128
+ ;CHECK: vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1]
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mask_andnot_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_andnot_epi64_rm_128
+ ;CHECK: vpandnq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0x07]
+ %b = load <2 x i64>, <2 x i64>* %ptr_b
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mask_andnot_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi64_rmk_128
+ ;CHECK: vpandnq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x0f]
+ %b = load <2 x i64>, <2 x i64>* %ptr_b
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mask_andnot_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi64_rmkz_128
+ ;CHECK: vpandnq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xdf,0x07]
+ %b = load <2 x i64>, <2 x i64>* %ptr_b
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mask_andnot_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) {
+ ;CHECK-LABEL: test_mask_andnot_epi64_rmb_128
+ ;CHECK: vpandnq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0xdf,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mask_andnot_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi64_rmbk_128
+ ;CHECK: vpandnq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0xdf,0x0f]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
+ ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mask_andnot_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi64_rmbkz_128
+ ;CHECK: vpandnq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0xdf,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
+ ret <2 x i64> %res
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
+
+define <4 x i64> @test_mask_andnot_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) {
+ ;CHECK-LABEL: test_mask_andnot_epi64_rr_256
+ ;CHECK: vpandnq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0xc1]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_mask_andnot_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi64_rrk_256
+ ;CHECK: vpandnq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_mask_andnot_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi64_rrkz_256
+ ;CHECK: vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1]
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_mask_andnot_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) {
+ ;CHECK-LABEL: test_mask_andnot_epi64_rm_256
+ ;CHECK: vpandnq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0x07]
+ %b = load <4 x i64>, <4 x i64>* %ptr_b
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_mask_andnot_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi64_rmk_256
+ ;CHECK: vpandnq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x0f]
+ %b = load <4 x i64>, <4 x i64>* %ptr_b
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_mask_andnot_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi64_rmkz_256
+ ;CHECK: vpandnq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0x07]
+ %b = load <4 x i64>, <4 x i64>* %ptr_b
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_mask_andnot_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) {
+ ;CHECK-LABEL: test_mask_andnot_epi64_rmb_256
+ ;CHECK: vpandnq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xdf,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_mask_andnot_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi64_rmbk_256
+ ;CHECK: vpandnq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0xdf,0x0f]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_mask_andnot_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) {
+ ;CHECK-LABEL: test_mask_andnot_epi64_rmbkz_256
+ ;CHECK: vpandnq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0xdf,0x07]
+ %q = load i64, i64* %ptr_b
+ %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
+ %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
+ ret <4 x i64> %res
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
+
+define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
+ ;CHECK: vcmpleps %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02]
+ %res = call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, i8 -1)
+ ret i8 %res
+ }
+ declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> , <8 x float> , i32, i8)
+
+define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) {
+ ;CHECK: vcmpleps %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02]
+ %res = call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, i8 -1)
+ ret i8 %res
+ }
+ declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> , <4 x float> , i32, i8)
+
+define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) {
+ ;CHECK: vcmplepd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02]
+ %res = call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, i8 -1)
+ ret i8 %res
+ }
+ declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> , <4 x double> , i32, i8)
+
+define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) {
+ ;CHECK: vcmplepd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02]
+ %res = call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, i8 -1)
+ ret i8 %res
+ }
+ declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> , <2 x double> , i32, i8)
+
+define <8 x float> @test_mm512_maskz_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_add_ps_256
+ ;CHECK: vaddps %ymm1, %ymm0, %ymm0 {%k1} {z}
+ %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mm512_mask_add_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_add_ps_256
+ ;CHECK: vaddps %ymm1, %ymm0, %ymm2 {%k1}
+ %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mm512_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_add_ps_256
+ ;CHECK: vaddps %ymm1, %ymm0, %ymm0
+ %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <4 x float> @test_mm512_maskz_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_add_ps_128
+ ;CHECK: vaddps %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mm512_mask_add_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_add_ps_128
+ ;CHECK: vaddps %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mm512_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_add_ps_128
+ ;CHECK: vaddps %xmm1, %xmm0, %xmm0
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <8 x float> @test_mm512_maskz_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_sub_ps_256
+ ;CHECK: vsubps %ymm1, %ymm0, %ymm0 {%k1} {z}
+ %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mm512_mask_sub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_sub_ps_256
+ ;CHECK: vsubps %ymm1, %ymm0, %ymm2 {%k1}
+ %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mm512_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_sub_ps_256
+ ;CHECK: vsubps %ymm1, %ymm0, %ymm0
+ %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <4 x float> @test_mm512_maskz_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_sub_ps_128
+ ;CHECK: vsubps %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mm512_mask_sub_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_sub_ps_128
+ ;CHECK: vsubps %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mm512_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_sub_ps_128
+ ;CHECK: vsubps %xmm1, %xmm0, %xmm0
+ %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <8 x float> @test_mm512_maskz_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_mul_ps_256
+ ;CHECK: vmulps %ymm1, %ymm0, %ymm0 {%k1} {z}
+ %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mm512_mask_mul_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_mul_ps_256
+ ;CHECK: vmulps %ymm1, %ymm0, %ymm2 {%k1}
+ %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mm512_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_mul_ps_256
+ ;CHECK: vmulps %ymm1, %ymm0, %ymm0
+ %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <4 x float> @test_mm512_maskz_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_mul_ps_128
+ ;CHECK: vmulps %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mm512_mask_mul_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_mul_ps_128
+ ;CHECK: vmulps %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mm512_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_mul_ps_128
+ ;CHECK: vmulps %xmm1, %xmm0, %xmm0
+ %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <8 x float> @test_mm512_maskz_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_div_ps_256
+ ;CHECK: vdivps %ymm1, %ymm0, %ymm0 {%k1} {z}
+ %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mm512_mask_div_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_div_ps_256
+ ;CHECK: vdivps %ymm1, %ymm0, %ymm2 {%k1}
+ %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mm512_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_div_ps_256
+ ;CHECK: vdivps %ymm1, %ymm0, %ymm0
+ %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <4 x float> @test_mm512_maskz_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_div_ps_128
+ ;CHECK: vdivps %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mm512_mask_div_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_div_ps_128
+ ;CHECK: vdivps %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mm512_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_div_ps_128
+ ;CHECK: vdivps %xmm1, %xmm0, %xmm0
+ %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_max_ps_256
+ ;CHECK: vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z}
+ %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_max_ps_256
+ ;CHECK: vmaxps %ymm1, %ymm0, %ymm2 {%k1}
+ %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_max_ps_256
+ ;CHECK: vmaxps %ymm1, %ymm0, %ymm0
+ %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_max_ps_128
+ ;CHECK: vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_max_ps_128
+ ;CHECK: vmaxps %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_max_ps_128
+ ;CHECK: vmaxps %xmm1, %xmm0, %xmm0
+ %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_min_ps_256
+ ;CHECK: vminps %ymm1, %ymm0, %ymm0 {%k1} {z}
+ %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_min_ps_256
+ ;CHECK: vminps %ymm1, %ymm0, %ymm2 {%k1}
+ %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_min_ps_256
+ ;CHECK: vminps %ymm1, %ymm0, %ymm0
+ %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_maskz_min_ps_128
+ ;CHECK: vminps %xmm1, %xmm0, %xmm0 {%k1} {z}
+ %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_mask_min_ps_128
+ ;CHECK: vminps %xmm1, %xmm0, %xmm2 {%k1}
+ %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
+ ;CHECK-LABEL: test_mm512_min_ps_128
+ ;CHECK: vminps %xmm1, %xmm0, %xmm0
+ %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) \ No newline at end of file
diff --git a/test/CodeGen/X86/avx512vl-mov.ll b/test/CodeGen/X86/avx512vl-mov.ll
index 32246568ac2e..18fa0a142a2d 100644
--- a/test/CodeGen/X86/avx512vl-mov.ll
+++ b/test/CodeGen/X86/avx512vl-mov.ll
@@ -5,7 +5,7 @@
; CHECK: ret
define <8 x i32> @test_256_1(i8 * %addr) {
%vaddr = bitcast i8* %addr to <8 x i32>*
- %res = load <8 x i32>* %vaddr, align 1
+ %res = load <8 x i32>, <8 x i32>* %vaddr, align 1
ret <8 x i32>%res
}
@@ -14,7 +14,7 @@ define <8 x i32> @test_256_1(i8 * %addr) {
; CHECK: ret
define <8 x i32> @test_256_2(i8 * %addr) {
%vaddr = bitcast i8* %addr to <8 x i32>*
- %res = load <8 x i32>* %vaddr, align 32
+ %res = load <8 x i32>, <8 x i32>* %vaddr, align 32
ret <8 x i32>%res
}
@@ -50,7 +50,7 @@ define void @test_256_5(i8 * %addr, <8 x i32> %data) {
; CHECK: ret
define <4 x i64> @test_256_6(i8 * %addr) {
%vaddr = bitcast i8* %addr to <4 x i64>*
- %res = load <4 x i64>* %vaddr, align 32
+ %res = load <4 x i64>, <4 x i64>* %vaddr, align 32
ret <4 x i64>%res
}
@@ -68,7 +68,7 @@ define void @test_256_7(i8 * %addr, <4 x i64> %data) {
; CHECK: ret
define <4 x i64> @test_256_8(i8 * %addr) {
%vaddr = bitcast i8* %addr to <4 x i64>*
- %res = load <4 x i64>* %vaddr, align 1
+ %res = load <4 x i64>, <4 x i64>* %vaddr, align 1
ret <4 x i64>%res
}
@@ -86,7 +86,7 @@ define void @test_256_9(i8 * %addr, <4 x double> %data) {
; CHECK: ret
define <4 x double> @test_256_10(i8 * %addr) {
%vaddr = bitcast i8* %addr to <4 x double>*
- %res = load <4 x double>* %vaddr, align 32
+ %res = load <4 x double>, <4 x double>* %vaddr, align 32
ret <4 x double>%res
}
@@ -104,7 +104,7 @@ define void @test_256_11(i8 * %addr, <8 x float> %data) {
; CHECK: ret
define <8 x float> @test_256_12(i8 * %addr) {
%vaddr = bitcast i8* %addr to <8 x float>*
- %res = load <8 x float>* %vaddr, align 32
+ %res = load <8 x float>, <8 x float>* %vaddr, align 32
ret <8 x float>%res
}
@@ -122,7 +122,7 @@ define void @test_256_13(i8 * %addr, <4 x double> %data) {
; CHECK: ret
define <4 x double> @test_256_14(i8 * %addr) {
%vaddr = bitcast i8* %addr to <4 x double>*
- %res = load <4 x double>* %vaddr, align 1
+ %res = load <4 x double>, <4 x double>* %vaddr, align 1
ret <4 x double>%res
}
@@ -140,7 +140,7 @@ define void @test_256_15(i8 * %addr, <8 x float> %data) {
; CHECK: ret
define <8 x float> @test_256_16(i8 * %addr) {
%vaddr = bitcast i8* %addr to <8 x float>*
- %res = load <8 x float>* %vaddr, align 1
+ %res = load <8 x float>, <8 x float>* %vaddr, align 1
ret <8 x float>%res
}
@@ -150,7 +150,7 @@ define <8 x float> @test_256_16(i8 * %addr) {
define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i32>*
- %r = load <8 x i32>* %vaddr, align 32
+ %r = load <8 x i32>, <8 x i32>* %vaddr, align 32
%res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old
ret <8 x i32>%res
}
@@ -161,7 +161,7 @@ define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i32>*
- %r = load <8 x i32>* %vaddr, align 1
+ %r = load <8 x i32>, <8 x i32>* %vaddr, align 1
%res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old
ret <8 x i32>%res
}
@@ -172,7 +172,7 @@ define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
define <8 x i32> @test_256_19(i8 * %addr, <8 x i32> %mask1) {
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i32>*
- %r = load <8 x i32>* %vaddr, align 32
+ %r = load <8 x i32>, <8 x i32>* %vaddr, align 32
%res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer
ret <8 x i32>%res
}
@@ -183,7 +183,7 @@ define <8 x i32> @test_256_19(i8 * %addr, <8 x i32> %mask1) {
define <8 x i32> @test_256_20(i8 * %addr, <8 x i32> %mask1) {
%mask = icmp ne <8 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x i32>*
- %r = load <8 x i32>* %vaddr, align 1
+ %r = load <8 x i32>, <8 x i32>* %vaddr, align 1
%res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer
ret <8 x i32>%res
}
@@ -194,7 +194,7 @@ define <8 x i32> @test_256_20(i8 * %addr, <8 x i32> %mask1) {
define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x i64>*
- %r = load <4 x i64>* %vaddr, align 32
+ %r = load <4 x i64>, <4 x i64>* %vaddr, align 32
%res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old
ret <4 x i64>%res
}
@@ -205,7 +205,7 @@ define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x i64>*
- %r = load <4 x i64>* %vaddr, align 1
+ %r = load <4 x i64>, <4 x i64>* %vaddr, align 1
%res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old
ret <4 x i64>%res
}
@@ -216,7 +216,7 @@ define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
define <4 x i64> @test_256_23(i8 * %addr, <4 x i64> %mask1) {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x i64>*
- %r = load <4 x i64>* %vaddr, align 32
+ %r = load <4 x i64>, <4 x i64>* %vaddr, align 32
%res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer
ret <4 x i64>%res
}
@@ -227,7 +227,7 @@ define <4 x i64> @test_256_23(i8 * %addr, <4 x i64> %mask1) {
define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x i64>*
- %r = load <4 x i64>* %vaddr, align 1
+ %r = load <4 x i64>, <4 x i64>* %vaddr, align 1
%res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer
ret <4 x i64>%res
}
@@ -238,7 +238,7 @@ define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) {
define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
%mask = fcmp one <8 x float> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x float>*
- %r = load <8 x float>* %vaddr, align 32
+ %r = load <8 x float>, <8 x float>* %vaddr, align 32
%res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old
ret <8 x float>%res
}
@@ -249,7 +249,7 @@ define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1
define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
%mask = fcmp one <8 x float> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x float>*
- %r = load <8 x float>* %vaddr, align 1
+ %r = load <8 x float>, <8 x float>* %vaddr, align 1
%res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old
ret <8 x float>%res
}
@@ -260,7 +260,7 @@ define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1
define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) {
%mask = fcmp one <8 x float> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x float>*
- %r = load <8 x float>* %vaddr, align 32
+ %r = load <8 x float>, <8 x float>* %vaddr, align 32
%res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer
ret <8 x float>%res
}
@@ -271,7 +271,7 @@ define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) {
define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) {
%mask = fcmp one <8 x float> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <8 x float>*
- %r = load <8 x float>* %vaddr, align 1
+ %r = load <8 x float>, <8 x float>* %vaddr, align 1
%res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer
ret <8 x float>%res
}
@@ -282,7 +282,7 @@ define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) {
define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x double>*
- %r = load <4 x double>* %vaddr, align 32
+ %r = load <4 x double>, <4 x double>* %vaddr, align 32
%res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old
ret <4 x double>%res
}
@@ -293,7 +293,7 @@ define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1
define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x double>*
- %r = load <4 x double>* %vaddr, align 1
+ %r = load <4 x double>, <4 x double>* %vaddr, align 1
%res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old
ret <4 x double>%res
}
@@ -304,7 +304,7 @@ define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1
define <4 x double> @test_256_31(i8 * %addr, <4 x i64> %mask1) {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x double>*
- %r = load <4 x double>* %vaddr, align 32
+ %r = load <4 x double>, <4 x double>* %vaddr, align 32
%res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer
ret <4 x double>%res
}
@@ -315,7 +315,7 @@ define <4 x double> @test_256_31(i8 * %addr, <4 x i64> %mask1) {
define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) {
%mask = icmp ne <4 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x double>*
- %r = load <4 x double>* %vaddr, align 1
+ %r = load <4 x double>, <4 x double>* %vaddr, align 1
%res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer
ret <4 x double>%res
}
@@ -325,7 +325,7 @@ define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) {
; CHECK: ret
define <4 x i32> @test_128_1(i8 * %addr) {
%vaddr = bitcast i8* %addr to <4 x i32>*
- %res = load <4 x i32>* %vaddr, align 1
+ %res = load <4 x i32>, <4 x i32>* %vaddr, align 1
ret <4 x i32>%res
}
@@ -334,7 +334,7 @@ define <4 x i32> @test_128_1(i8 * %addr) {
; CHECK: ret
define <4 x i32> @test_128_2(i8 * %addr) {
%vaddr = bitcast i8* %addr to <4 x i32>*
- %res = load <4 x i32>* %vaddr, align 16
+ %res = load <4 x i32>, <4 x i32>* %vaddr, align 16
ret <4 x i32>%res
}
@@ -370,7 +370,7 @@ define void @test_128_5(i8 * %addr, <4 x i32> %data) {
; CHECK: ret
define <2 x i64> @test_128_6(i8 * %addr) {
%vaddr = bitcast i8* %addr to <2 x i64>*
- %res = load <2 x i64>* %vaddr, align 16
+ %res = load <2 x i64>, <2 x i64>* %vaddr, align 16
ret <2 x i64>%res
}
@@ -388,7 +388,7 @@ define void @test_128_7(i8 * %addr, <2 x i64> %data) {
; CHECK: ret
define <2 x i64> @test_128_8(i8 * %addr) {
%vaddr = bitcast i8* %addr to <2 x i64>*
- %res = load <2 x i64>* %vaddr, align 1
+ %res = load <2 x i64>, <2 x i64>* %vaddr, align 1
ret <2 x i64>%res
}
@@ -406,7 +406,7 @@ define void @test_128_9(i8 * %addr, <2 x double> %data) {
; CHECK: ret
define <2 x double> @test_128_10(i8 * %addr) {
%vaddr = bitcast i8* %addr to <2 x double>*
- %res = load <2 x double>* %vaddr, align 16
+ %res = load <2 x double>, <2 x double>* %vaddr, align 16
ret <2 x double>%res
}
@@ -424,7 +424,7 @@ define void @test_128_11(i8 * %addr, <4 x float> %data) {
; CHECK: ret
define <4 x float> @test_128_12(i8 * %addr) {
%vaddr = bitcast i8* %addr to <4 x float>*
- %res = load <4 x float>* %vaddr, align 16
+ %res = load <4 x float>, <4 x float>* %vaddr, align 16
ret <4 x float>%res
}
@@ -442,7 +442,7 @@ define void @test_128_13(i8 * %addr, <2 x double> %data) {
; CHECK: ret
define <2 x double> @test_128_14(i8 * %addr) {
%vaddr = bitcast i8* %addr to <2 x double>*
- %res = load <2 x double>* %vaddr, align 1
+ %res = load <2 x double>, <2 x double>* %vaddr, align 1
ret <2 x double>%res
}
@@ -460,7 +460,7 @@ define void @test_128_15(i8 * %addr, <4 x float> %data) {
; CHECK: ret
define <4 x float> @test_128_16(i8 * %addr) {
%vaddr = bitcast i8* %addr to <4 x float>*
- %res = load <4 x float>* %vaddr, align 1
+ %res = load <4 x float>, <4 x float>* %vaddr, align 1
ret <4 x float>%res
}
@@ -470,7 +470,7 @@ define <4 x float> @test_128_16(i8 * %addr) {
define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x i32>*
- %r = load <4 x i32>* %vaddr, align 16
+ %r = load <4 x i32>, <4 x i32>* %vaddr, align 16
%res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old
ret <4 x i32>%res
}
@@ -481,7 +481,7 @@ define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x i32>*
- %r = load <4 x i32>* %vaddr, align 1
+ %r = load <4 x i32>, <4 x i32>* %vaddr, align 1
%res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old
ret <4 x i32>%res
}
@@ -492,7 +492,7 @@ define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
define <4 x i32> @test_128_19(i8 * %addr, <4 x i32> %mask1) {
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x i32>*
- %r = load <4 x i32>* %vaddr, align 16
+ %r = load <4 x i32>, <4 x i32>* %vaddr, align 16
%res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer
ret <4 x i32>%res
}
@@ -503,7 +503,7 @@ define <4 x i32> @test_128_19(i8 * %addr, <4 x i32> %mask1) {
define <4 x i32> @test_128_20(i8 * %addr, <4 x i32> %mask1) {
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x i32>*
- %r = load <4 x i32>* %vaddr, align 1
+ %r = load <4 x i32>, <4 x i32>* %vaddr, align 1
%res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer
ret <4 x i32>%res
}
@@ -514,7 +514,7 @@ define <4 x i32> @test_128_20(i8 * %addr, <4 x i32> %mask1) {
define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <2 x i64>*
- %r = load <2 x i64>* %vaddr, align 16
+ %r = load <2 x i64>, <2 x i64>* %vaddr, align 16
%res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old
ret <2 x i64>%res
}
@@ -525,7 +525,7 @@ define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <2 x i64>*
- %r = load <2 x i64>* %vaddr, align 1
+ %r = load <2 x i64>, <2 x i64>* %vaddr, align 1
%res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old
ret <2 x i64>%res
}
@@ -536,7 +536,7 @@ define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
define <2 x i64> @test_128_23(i8 * %addr, <2 x i64> %mask1) {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <2 x i64>*
- %r = load <2 x i64>* %vaddr, align 16
+ %r = load <2 x i64>, <2 x i64>* %vaddr, align 16
%res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer
ret <2 x i64>%res
}
@@ -547,7 +547,7 @@ define <2 x i64> @test_128_23(i8 * %addr, <2 x i64> %mask1) {
define <2 x i64> @test_128_24(i8 * %addr, <2 x i64> %mask1) {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <2 x i64>*
- %r = load <2 x i64>* %vaddr, align 1
+ %r = load <2 x i64>, <2 x i64>* %vaddr, align 1
%res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer
ret <2 x i64>%res
}
@@ -558,7 +558,7 @@ define <2 x i64> @test_128_24(i8 * %addr, <2 x i64> %mask1) {
define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x float>*
- %r = load <4 x float>* %vaddr, align 16
+ %r = load <4 x float>, <4 x float>* %vaddr, align 16
%res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old
ret <4 x float>%res
}
@@ -569,7 +569,7 @@ define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1)
define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x float>*
- %r = load <4 x float>* %vaddr, align 1
+ %r = load <4 x float>, <4 x float>* %vaddr, align 1
%res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old
ret <4 x float>%res
}
@@ -580,7 +580,7 @@ define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1)
define <4 x float> @test_128_27(i8 * %addr, <4 x i32> %mask1) {
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x float>*
- %r = load <4 x float>* %vaddr, align 16
+ %r = load <4 x float>, <4 x float>* %vaddr, align 16
%res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer
ret <4 x float>%res
}
@@ -591,7 +591,7 @@ define <4 x float> @test_128_27(i8 * %addr, <4 x i32> %mask1) {
define <4 x float> @test_128_28(i8 * %addr, <4 x i32> %mask1) {
%mask = icmp ne <4 x i32> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <4 x float>*
- %r = load <4 x float>* %vaddr, align 1
+ %r = load <4 x float>, <4 x float>* %vaddr, align 1
%res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer
ret <4 x float>%res
}
@@ -602,7 +602,7 @@ define <4 x float> @test_128_28(i8 * %addr, <4 x i32> %mask1) {
define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <2 x double>*
- %r = load <2 x double>* %vaddr, align 16
+ %r = load <2 x double>, <2 x double>* %vaddr, align 16
%res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old
ret <2 x double>%res
}
@@ -613,7 +613,7 @@ define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1
define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <2 x double>*
- %r = load <2 x double>* %vaddr, align 1
+ %r = load <2 x double>, <2 x double>* %vaddr, align 1
%res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old
ret <2 x double>%res
}
@@ -624,7 +624,7 @@ define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1
define <2 x double> @test_128_31(i8 * %addr, <2 x i64> %mask1) {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <2 x double>*
- %r = load <2 x double>* %vaddr, align 16
+ %r = load <2 x double>, <2 x double>* %vaddr, align 16
%res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer
ret <2 x double>%res
}
@@ -635,7 +635,7 @@ define <2 x double> @test_128_31(i8 * %addr, <2 x i64> %mask1) {
define <2 x double> @test_128_32(i8 * %addr, <2 x i64> %mask1) {
%mask = icmp ne <2 x i64> %mask1, zeroinitializer
%vaddr = bitcast i8* %addr to <2 x double>*
- %r = load <2 x double>* %vaddr, align 1
+ %r = load <2 x double>, <2 x double>* %vaddr, align 1
%res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer
ret <2 x double>%res
}
diff --git a/test/CodeGen/X86/avx512vl-vec-cmp.ll b/test/CodeGen/X86/avx512vl-vec-cmp.ll
index b6b508559ca3..aed8cb1cf559 100644
--- a/test/CodeGen/X86/avx512vl-vec-cmp.ll
+++ b/test/CodeGen/X86/avx512vl-vec-cmp.ll
@@ -45,7 +45,7 @@ define <4 x i64> @test256_4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1) nounwind
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_5(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwind {
- %y = load <8 x i32>* %yp, align 4
+ %y = load <8 x i32>, <8 x i32>* %yp, align 4
%mask = icmp eq <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
ret <8 x i32> %max
@@ -56,7 +56,7 @@ define <8 x i32> @test256_5(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwin
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_6(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind {
- %y = load <8 x i32>* %y.ptr, align 4
+ %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
%mask = icmp sgt <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
ret <8 x i32> %max
@@ -67,7 +67,7 @@ define <8 x i32> @test256_6(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) noun
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_7(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind {
- %y = load <8 x i32>* %y.ptr, align 4
+ %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
%mask = icmp sle <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
ret <8 x i32> %max
@@ -78,7 +78,7 @@ define <8 x i32> @test256_7(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) noun
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_8(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind {
- %y = load <8 x i32>* %y.ptr, align 4
+ %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
%mask = icmp ule <8 x i32> %x, %y
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
ret <8 x i32> %max
@@ -114,7 +114,7 @@ define <4 x i64> @test256_10(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64
; CHECK: ret
define <4 x i64> @test256_11(<4 x i64> %x, <4 x i64>* %y.ptr, <4 x i64> %x1, <4 x i64> %y1) nounwind {
%mask1 = icmp sgt <4 x i64> %x1, %y1
- %y = load <4 x i64>* %y.ptr, align 4
+ %y = load <4 x i64>, <4 x i64>* %y.ptr, align 4
%mask0 = icmp sgt <4 x i64> %x, %y
%mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
%max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %x1
@@ -127,7 +127,7 @@ define <4 x i64> @test256_11(<4 x i64> %x, <4 x i64>* %y.ptr, <4 x i64> %x1, <4
; CHECK: ret
define <8 x i32> @test256_12(<8 x i32> %x, <8 x i32>* %y.ptr, <8 x i32> %x1, <8 x i32> %y1) nounwind {
%mask1 = icmp sge <8 x i32> %x1, %y1
- %y = load <8 x i32>* %y.ptr, align 4
+ %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
%mask0 = icmp ule <8 x i32> %x, %y
%mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
@@ -139,7 +139,7 @@ define <8 x i32> @test256_12(<8 x i32> %x, <8 x i32>* %y.ptr, <8 x i32> %x1, <8
; CHECK: vmovdqa64
; CHECK: ret
define <4 x i64> @test256_13(<4 x i64> %x, <4 x i64> %x1, i64* %yb.ptr) nounwind {
- %yb = load i64* %yb.ptr, align 4
+ %yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0
%y = shufflevector <4 x i64> %y.0, <4 x i64> undef, <4 x i32> zeroinitializer
%mask = icmp eq <4 x i64> %x, %y
@@ -152,7 +152,7 @@ define <4 x i64> @test256_13(<4 x i64> %x, <4 x i64> %x1, i64* %yb.ptr) nounwind
; CHECK: vmovdqa32
; CHECK: ret
define <8 x i32> @test256_14(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1) nounwind {
- %yb = load i32* %yb.ptr, align 4
+ %yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <8 x i32> undef, i32 %yb, i32 0
%y = shufflevector <8 x i32> %y.0, <8 x i32> undef, <8 x i32> zeroinitializer
%mask = icmp sle <8 x i32> %x, %y
@@ -166,7 +166,7 @@ define <8 x i32> @test256_14(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1) nounwind
; CHECK: ret
define <8 x i32> @test256_15(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1, <8 x i32> %y1) nounwind {
%mask1 = icmp sge <8 x i32> %x1, %y1
- %yb = load i32* %yb.ptr, align 4
+ %yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <8 x i32> undef, i32 %yb, i32 0
%y = shufflevector <8 x i32> %y.0, <8 x i32> undef, <8 x i32> zeroinitializer
%mask0 = icmp sgt <8 x i32> %x, %y
@@ -181,7 +181,7 @@ define <8 x i32> @test256_15(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1, <8 x i32
; CHECK: ret
define <4 x i64> @test256_16(<4 x i64> %x, i64* %yb.ptr, <4 x i64> %x1, <4 x i64> %y1) nounwind {
%mask1 = icmp sge <4 x i64> %x1, %y1
- %yb = load i64* %yb.ptr, align 4
+ %yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0
%y = shufflevector <4 x i64> %y.0, <4 x i64> undef, <4 x i32> zeroinitializer
%mask0 = icmp sgt <4 x i64> %x, %y
@@ -235,7 +235,7 @@ define <2 x i64> @test128_4(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1) nounwind
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_5(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %yp) nounwind {
- %y = load <4 x i32>* %yp, align 4
+ %y = load <4 x i32>, <4 x i32>* %yp, align 4
%mask = icmp eq <4 x i32> %x, %y
%max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
ret <4 x i32> %max
@@ -246,7 +246,7 @@ define <4 x i32> @test128_5(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %yp) nounwin
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_6(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
- %y = load <4 x i32>* %y.ptr, align 4
+ %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
%mask = icmp sgt <4 x i32> %x, %y
%max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
ret <4 x i32> %max
@@ -257,7 +257,7 @@ define <4 x i32> @test128_6(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) noun
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_7(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
- %y = load <4 x i32>* %y.ptr, align 4
+ %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
%mask = icmp sle <4 x i32> %x, %y
%max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
ret <4 x i32> %max
@@ -268,7 +268,7 @@ define <4 x i32> @test128_7(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) noun
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_8(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
- %y = load <4 x i32>* %y.ptr, align 4
+ %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
%mask = icmp ule <4 x i32> %x, %y
%max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
ret <4 x i32> %max
@@ -304,7 +304,7 @@ define <2 x i64> @test128_10(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64
; CHECK: ret
define <2 x i64> @test128_11(<2 x i64> %x, <2 x i64>* %y.ptr, <2 x i64> %x1, <2 x i64> %y1) nounwind {
%mask1 = icmp sgt <2 x i64> %x1, %y1
- %y = load <2 x i64>* %y.ptr, align 4
+ %y = load <2 x i64>, <2 x i64>* %y.ptr, align 4
%mask0 = icmp sgt <2 x i64> %x, %y
%mask = select <2 x i1> %mask0, <2 x i1> %mask1, <2 x i1> zeroinitializer
%max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %x1
@@ -317,7 +317,7 @@ define <2 x i64> @test128_11(<2 x i64> %x, <2 x i64>* %y.ptr, <2 x i64> %x1, <2
; CHECK: ret
define <4 x i32> @test128_12(<4 x i32> %x, <4 x i32>* %y.ptr, <4 x i32> %x1, <4 x i32> %y1) nounwind {
%mask1 = icmp sge <4 x i32> %x1, %y1
- %y = load <4 x i32>* %y.ptr, align 4
+ %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
%mask0 = icmp ule <4 x i32> %x, %y
%mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
%max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
@@ -329,7 +329,7 @@ define <4 x i32> @test128_12(<4 x i32> %x, <4 x i32>* %y.ptr, <4 x i32> %x1, <4
; CHECK: vmovdqa64
; CHECK: ret
define <2 x i64> @test128_13(<2 x i64> %x, <2 x i64> %x1, i64* %yb.ptr) nounwind {
- %yb = load i64* %yb.ptr, align 4
+ %yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0
%y = insertelement <2 x i64> %y.0, i64 %yb, i32 1
%mask = icmp eq <2 x i64> %x, %y
@@ -342,7 +342,7 @@ define <2 x i64> @test128_13(<2 x i64> %x, <2 x i64> %x1, i64* %yb.ptr) nounwind
; CHECK: vmovdqa32
; CHECK: ret
define <4 x i32> @test128_14(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1) nounwind {
- %yb = load i32* %yb.ptr, align 4
+ %yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0
%y = shufflevector <4 x i32> %y.0, <4 x i32> undef, <4 x i32> zeroinitializer
%mask = icmp sle <4 x i32> %x, %y
@@ -356,7 +356,7 @@ define <4 x i32> @test128_14(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1) nounwind
; CHECK: ret
define <4 x i32> @test128_15(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1, <4 x i32> %y1) nounwind {
%mask1 = icmp sge <4 x i32> %x1, %y1
- %yb = load i32* %yb.ptr, align 4
+ %yb = load i32, i32* %yb.ptr, align 4
%y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0
%y = shufflevector <4 x i32> %y.0, <4 x i32> undef, <4 x i32> zeroinitializer
%mask0 = icmp sgt <4 x i32> %x, %y
@@ -371,7 +371,7 @@ define <4 x i32> @test128_15(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1, <4 x i32
; CHECK: ret
define <2 x i64> @test128_16(<2 x i64> %x, i64* %yb.ptr, <2 x i64> %x1, <2 x i64> %y1) nounwind {
%mask1 = icmp sge <2 x i64> %x1, %y1
- %yb = load i64* %yb.ptr, align 4
+ %yb = load i64, i64* %yb.ptr, align 4
%y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0
%y = insertelement <2 x i64> %y.0, i64 %yb, i32 1
%mask0 = icmp sgt <2 x i64> %x, %y
diff --git a/test/CodeGen/X86/barrier.ll b/test/CodeGen/X86/barrier.ll
index 4769b39964a0..1f60131f33ca 100644
--- a/test/CodeGen/X86/barrier.ll
+++ b/test/CodeGen/X86/barrier.ll
@@ -1,6 +1,7 @@
-; RUN: llc < %s -march=x86 -mattr=-sse2 | grep lock
+; RUN: llc < %s -march=x86 -mattr=-sse2 | FileCheck %s
define void @test() {
+; CHECK: lock
fence seq_cst
ret void
}
diff --git a/test/CodeGen/X86/bitcast-mmx.ll b/test/CodeGen/X86/bitcast-mmx.ll
new file mode 100644
index 000000000000..4107f3914f81
--- /dev/null
+++ b/test/CodeGen/X86/bitcast-mmx.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
+
+define i32 @t0(i64 %x) {
+; CHECK-LABEL: t0:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: movd %[[REG1:[a-z]+]], %mm0
+; CHECK-NEXT: pshufw $238, %mm0, %mm0
+; CHECK-NEXT: movd %mm0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast i64 %x to <4 x i16>
+ %1 = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 -18)
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ %6 = bitcast i64 %5 to <2 x i32>
+ %7 = extractelement <2 x i32> %6, i32 0
+ ret i32 %7
+}
+
+define i64 @t1(i64 %x, i32 %n) {
+; CHECK-LABEL: t1:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: movd %[[REG2:[a-z]+]], %mm0
+; CHECK-NEXT: movd %[[REG1]], %mm1
+; CHECK-NEXT: psllq %mm0, %mm1
+; CHECK-NEXT: movd %mm1, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast i64 %x to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %0, i32 %n)
+ %2 = bitcast x86_mmx %1 to i64
+ ret i64 %2
+}
+
+define i64 @t2(i64 %x, i32 %n, i32 %w) {
+; CHECK-LABEL: t2:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: movd %[[REG4:[a-z]+]], %mm0
+; CHECK-NEXT: movd %[[REG6:[a-z0-9]+]], %mm1
+; CHECK-NEXT: psllq %mm0, %mm1
+; CHECK-NEXT: movd %[[REG1]], %mm0
+; CHECK-NEXT: por %mm1, %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = insertelement <2 x i32> undef, i32 %w, i32 0
+ %1 = insertelement <2 x i32> %0, i32 0, i32 1
+ %2 = bitcast <2 x i32> %1 to x86_mmx
+ %3 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %2, i32 %n)
+ %4 = bitcast i64 %x to x86_mmx
+ %5 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %4, x86_mmx %3)
+ %6 = bitcast x86_mmx %5 to i64
+ ret i64 %6
+}
+
+define i64 @t3(<1 x i64>* %y, i32* %n) {
+; CHECK-LABEL: t3:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: movq (%[[REG1]]), %mm0
+; CHECK-NEXT: psllq (%[[REG3:[a-z]+]]), %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <1 x i64>* %y to x86_mmx*
+ %1 = load x86_mmx, x86_mmx* %0, align 8
+ %2 = load i32, i32* %n, align 4
+ %3 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %1, i32 %2)
+ %4 = bitcast x86_mmx %3 to i64
+ ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8)
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32)
+declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx)
+
diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll
index e35be6ae654f..e0276e42d4d2 100644
--- a/test/CodeGen/X86/block-placement.ll
+++ b/test/CodeGen/X86/block-placement.ll
@@ -24,8 +24,8 @@ define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) {
; CHECK: %then5
entry:
- %gep1 = getelementptr i32* %a, i32 1
- %val1 = load i32* %gep1
+ %gep1 = getelementptr i32, i32* %a, i32 1
+ %val1 = load i32, i32* %gep1
%cond1 = icmp ugt i32 %val1, 1
br i1 %cond1, label %then1, label %else1, !prof !0
@@ -34,8 +34,8 @@ then1:
br label %else1
else1:
- %gep2 = getelementptr i32* %a, i32 2
- %val2 = load i32* %gep2
+ %gep2 = getelementptr i32, i32* %a, i32 2
+ %val2 = load i32, i32* %gep2
%cond2 = icmp ugt i32 %val2, 2
br i1 %cond2, label %then2, label %else2, !prof !0
@@ -44,8 +44,8 @@ then2:
br label %else2
else2:
- %gep3 = getelementptr i32* %a, i32 3
- %val3 = load i32* %gep3
+ %gep3 = getelementptr i32, i32* %a, i32 3
+ %val3 = load i32, i32* %gep3
%cond3 = icmp ugt i32 %val3, 3
br i1 %cond3, label %then3, label %else3, !prof !0
@@ -54,8 +54,8 @@ then3:
br label %else3
else3:
- %gep4 = getelementptr i32* %a, i32 4
- %val4 = load i32* %gep4
+ %gep4 = getelementptr i32, i32* %a, i32 4
+ %val4 = load i32, i32* %gep4
%cond4 = icmp ugt i32 %val4, 4
br i1 %cond4, label %then4, label %else4, !prof !0
@@ -64,8 +64,8 @@ then4:
br label %else4
else4:
- %gep5 = getelementptr i32* %a, i32 3
- %val5 = load i32* %gep5
+ %gep5 = getelementptr i32, i32* %a, i32 3
+ %val5 = load i32, i32* %gep5
%cond5 = icmp ugt i32 %val5, 3
br i1 %cond5, label %then5, label %exit, !prof !0
@@ -113,8 +113,8 @@ unlikely2:
br label %body3
body3:
- %arrayidx = getelementptr inbounds i32* %a, i32 %iv
- %0 = load i32* %arrayidx
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
+ %0 = load i32, i32* %arrayidx
%sum = add nsw i32 %0, %base
%next = add i32 %iv, 1
%exitcond = icmp eq i32 %next, %i
@@ -166,8 +166,8 @@ bail3:
ret i32 -3
body4:
- %arrayidx = getelementptr inbounds i32* %a, i32 %iv
- %0 = load i32* %arrayidx
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
+ %0 = load i32, i32* %arrayidx
%sum = add nsw i32 %0, %base
%next = add i32 %iv, 1
%exitcond = icmp eq i32 %next, %i
@@ -197,8 +197,8 @@ body0:
br i1 %exitcond, label %exit, label %body1
body1:
- %arrayidx = getelementptr inbounds i32* %a, i32 %iv
- %0 = load i32* %arrayidx
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
+ %0 = load i32, i32* %arrayidx
%sum = add nsw i32 %0, %base
%bailcond1 = icmp eq i32 %sum, 42
br label %body0
@@ -222,8 +222,8 @@ entry:
body0:
%iv = phi i32 [ 0, %entry ], [ %next, %body1 ]
%base = phi i32 [ 0, %entry ], [ %sum, %body1 ]
- %arrayidx = getelementptr inbounds i32* %a, i32 %iv
- %0 = load i32* %arrayidx
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
+ %0 = load i32, i32* %arrayidx
%sum = add nsw i32 %0, %base
%bailcond1 = icmp eq i32 %sum, 42
br i1 %bailcond1, label %exit, label %body1
@@ -252,8 +252,8 @@ entry:
body:
%iv = phi i32 [ 0, %entry ], [ %next, %body ]
%base = phi i32 [ 0, %entry ], [ %sum, %body ]
- %arrayidx = getelementptr inbounds i32* %a, i32 %iv
- %0 = load i32* %arrayidx
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
+ %0 = load i32, i32* %arrayidx
%sum = add nsw i32 %0, %base
%next = add i32 %iv, 1
%exitcond = icmp eq i32 %next, %i
@@ -279,16 +279,16 @@ entry:
loop.body.1:
%iv = phi i32 [ 0, %entry ], [ %next, %loop.body.2 ]
- %arrayidx = getelementptr inbounds i32* %a, i32 %iv
- %bidx = load i32* %arrayidx
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
+ %bidx = load i32, i32* %arrayidx
br label %inner.loop.body
inner.loop.body:
%inner.iv = phi i32 [ 0, %loop.body.1 ], [ %inner.next, %inner.loop.body ]
%base = phi i32 [ 0, %loop.body.1 ], [ %sum, %inner.loop.body ]
%scaled_idx = mul i32 %bidx, %iv
- %inner.arrayidx = getelementptr inbounds i32* %b, i32 %scaled_idx
- %0 = load i32* %inner.arrayidx
+ %inner.arrayidx = getelementptr inbounds i32, i32* %b, i32 %scaled_idx
+ %0 = load i32, i32* %inner.arrayidx
%sum = add nsw i32 %0, %base
%inner.next = add i32 %iv, 1
%inner.exitcond = icmp eq i32 %inner.next, %i
@@ -322,13 +322,13 @@ loop.body1:
br i1 undef, label %loop.body3, label %loop.body2
loop.body2:
- %ptr = load i32** undef, align 4
+ %ptr = load i32*, i32** undef, align 4
br label %loop.body3
loop.body3:
%myptr = phi i32* [ %ptr2, %loop.body5 ], [ %ptr, %loop.body2 ], [ undef, %loop.body1 ]
%bcmyptr = bitcast i32* %myptr to i32*
- %val = load i32* %bcmyptr, align 4
+ %val = load i32, i32* %bcmyptr, align 4
%comp = icmp eq i32 %val, 48
br i1 %comp, label %loop.body4, label %loop.body5
@@ -336,7 +336,7 @@ loop.body4:
br i1 undef, label %loop.header, label %loop.body5
loop.body5:
- %ptr2 = load i32** undef, align 4
+ %ptr2 = load i32*, i32** undef, align 4
br label %loop.body3
}
@@ -366,16 +366,16 @@ loop.header:
br i1 %comp0, label %bail, label %loop.body1
loop.body1:
- %val0 = load i32** undef, align 4
+ %val0 = load i32*, i32** undef, align 4
br i1 undef, label %loop.body2, label %loop.inner1.begin
loop.body2:
br i1 undef, label %loop.body4, label %loop.body3
loop.body3:
- %ptr1 = getelementptr inbounds i32* %val0, i32 0
+ %ptr1 = getelementptr inbounds i32, i32* %val0, i32 0
%castptr1 = bitcast i32* %ptr1 to i32**
- %val1 = load i32** %castptr1, align 4
+ %val1 = load i32*, i32** %castptr1, align 4
br label %loop.inner1.begin
loop.inner1.begin:
@@ -385,9 +385,9 @@ loop.inner1.begin:
br i1 %comp1, label %loop.inner1.end, label %loop.body4
loop.inner1.end:
- %ptr2 = getelementptr inbounds i32* %valphi, i32 0
+ %ptr2 = getelementptr inbounds i32, i32* %valphi, i32 0
%castptr2 = bitcast i32* %ptr2 to i32**
- %val2 = load i32** %castptr2, align 4
+ %val2 = load i32*, i32** %castptr2, align 4
br label %loop.inner1.begin
loop.body4.dead:
@@ -486,7 +486,7 @@ entry:
br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1
entry.if.then_crit_edge:
- %.pre14 = load i8* undef, align 1
+ %.pre14 = load i8, i8* undef, align 1
br label %if.then
lor.lhs.false:
@@ -616,7 +616,7 @@ body:
br label %loop2a
loop1:
- %next.load = load i32** undef
+ %next.load = load i32*, i32** undef
br i1 %comp.a, label %loop2a, label %loop2b
loop2a:
@@ -626,7 +626,7 @@ loop2a:
br label %loop3
loop2b:
- %gep = getelementptr inbounds i32* %var.phi, i32 0
+ %gep = getelementptr inbounds i32, i32* %var.phi, i32 0
%next.ptr = bitcast i32* %gep to i32**
store i32* %next.phi, i32** %next.ptr
br label %loop3
@@ -728,199 +728,199 @@ define void @many_unanalyzable_branches() {
entry:
br label %0
- %val0 = load volatile float* undef
+ %val0 = load volatile float, float* undef
%cmp0 = fcmp une float %val0, undef
br i1 %cmp0, label %1, label %0
- %val1 = load volatile float* undef
+ %val1 = load volatile float, float* undef
%cmp1 = fcmp une float %val1, undef
br i1 %cmp1, label %2, label %1
- %val2 = load volatile float* undef
+ %val2 = load volatile float, float* undef
%cmp2 = fcmp une float %val2, undef
br i1 %cmp2, label %3, label %2
- %val3 = load volatile float* undef
+ %val3 = load volatile float, float* undef
%cmp3 = fcmp une float %val3, undef
br i1 %cmp3, label %4, label %3
- %val4 = load volatile float* undef
+ %val4 = load volatile float, float* undef
%cmp4 = fcmp une float %val4, undef
br i1 %cmp4, label %5, label %4
- %val5 = load volatile float* undef
+ %val5 = load volatile float, float* undef
%cmp5 = fcmp une float %val5, undef
br i1 %cmp5, label %6, label %5
- %val6 = load volatile float* undef
+ %val6 = load volatile float, float* undef
%cmp6 = fcmp une float %val6, undef
br i1 %cmp6, label %7, label %6
- %val7 = load volatile float* undef
+ %val7 = load volatile float, float* undef
%cmp7 = fcmp une float %val7, undef
br i1 %cmp7, label %8, label %7
- %val8 = load volatile float* undef
+ %val8 = load volatile float, float* undef
%cmp8 = fcmp une float %val8, undef
br i1 %cmp8, label %9, label %8
- %val9 = load volatile float* undef
+ %val9 = load volatile float, float* undef
%cmp9 = fcmp une float %val9, undef
br i1 %cmp9, label %10, label %9
- %val10 = load volatile float* undef
+ %val10 = load volatile float, float* undef
%cmp10 = fcmp une float %val10, undef
br i1 %cmp10, label %11, label %10
- %val11 = load volatile float* undef
+ %val11 = load volatile float, float* undef
%cmp11 = fcmp une float %val11, undef
br i1 %cmp11, label %12, label %11
- %val12 = load volatile float* undef
+ %val12 = load volatile float, float* undef
%cmp12 = fcmp une float %val12, undef
br i1 %cmp12, label %13, label %12
- %val13 = load volatile float* undef
+ %val13 = load volatile float, float* undef
%cmp13 = fcmp une float %val13, undef
br i1 %cmp13, label %14, label %13
- %val14 = load volatile float* undef
+ %val14 = load volatile float, float* undef
%cmp14 = fcmp une float %val14, undef
br i1 %cmp14, label %15, label %14
- %val15 = load volatile float* undef
+ %val15 = load volatile float, float* undef
%cmp15 = fcmp une float %val15, undef
br i1 %cmp15, label %16, label %15
- %val16 = load volatile float* undef
+ %val16 = load volatile float, float* undef
%cmp16 = fcmp une float %val16, undef
br i1 %cmp16, label %17, label %16
- %val17 = load volatile float* undef
+ %val17 = load volatile float, float* undef
%cmp17 = fcmp une float %val17, undef
br i1 %cmp17, label %18, label %17
- %val18 = load volatile float* undef
+ %val18 = load volatile float, float* undef
%cmp18 = fcmp une float %val18, undef
br i1 %cmp18, label %19, label %18
- %val19 = load volatile float* undef
+ %val19 = load volatile float, float* undef
%cmp19 = fcmp une float %val19, undef
br i1 %cmp19, label %20, label %19
- %val20 = load volatile float* undef
+ %val20 = load volatile float, float* undef
%cmp20 = fcmp une float %val20, undef
br i1 %cmp20, label %21, label %20
- %val21 = load volatile float* undef
+ %val21 = load volatile float, float* undef
%cmp21 = fcmp une float %val21, undef
br i1 %cmp21, label %22, label %21
- %val22 = load volatile float* undef
+ %val22 = load volatile float, float* undef
%cmp22 = fcmp une float %val22, undef
br i1 %cmp22, label %23, label %22
- %val23 = load volatile float* undef
+ %val23 = load volatile float, float* undef
%cmp23 = fcmp une float %val23, undef
br i1 %cmp23, label %24, label %23
- %val24 = load volatile float* undef
+ %val24 = load volatile float, float* undef
%cmp24 = fcmp une float %val24, undef
br i1 %cmp24, label %25, label %24
- %val25 = load volatile float* undef
+ %val25 = load volatile float, float* undef
%cmp25 = fcmp une float %val25, undef
br i1 %cmp25, label %26, label %25
- %val26 = load volatile float* undef
+ %val26 = load volatile float, float* undef
%cmp26 = fcmp une float %val26, undef
br i1 %cmp26, label %27, label %26
- %val27 = load volatile float* undef
+ %val27 = load volatile float, float* undef
%cmp27 = fcmp une float %val27, undef
br i1 %cmp27, label %28, label %27
- %val28 = load volatile float* undef
+ %val28 = load volatile float, float* undef
%cmp28 = fcmp une float %val28, undef
br i1 %cmp28, label %29, label %28
- %val29 = load volatile float* undef
+ %val29 = load volatile float, float* undef
%cmp29 = fcmp une float %val29, undef
br i1 %cmp29, label %30, label %29
- %val30 = load volatile float* undef
+ %val30 = load volatile float, float* undef
%cmp30 = fcmp une float %val30, undef
br i1 %cmp30, label %31, label %30
- %val31 = load volatile float* undef
+ %val31 = load volatile float, float* undef
%cmp31 = fcmp une float %val31, undef
br i1 %cmp31, label %32, label %31
- %val32 = load volatile float* undef
+ %val32 = load volatile float, float* undef
%cmp32 = fcmp une float %val32, undef
br i1 %cmp32, label %33, label %32
- %val33 = load volatile float* undef
+ %val33 = load volatile float, float* undef
%cmp33 = fcmp une float %val33, undef
br i1 %cmp33, label %34, label %33
- %val34 = load volatile float* undef
+ %val34 = load volatile float, float* undef
%cmp34 = fcmp une float %val34, undef
br i1 %cmp34, label %35, label %34
- %val35 = load volatile float* undef
+ %val35 = load volatile float, float* undef
%cmp35 = fcmp une float %val35, undef
br i1 %cmp35, label %36, label %35
- %val36 = load volatile float* undef
+ %val36 = load volatile float, float* undef
%cmp36 = fcmp une float %val36, undef
br i1 %cmp36, label %37, label %36
- %val37 = load volatile float* undef
+ %val37 = load volatile float, float* undef
%cmp37 = fcmp une float %val37, undef
br i1 %cmp37, label %38, label %37
- %val38 = load volatile float* undef
+ %val38 = load volatile float, float* undef
%cmp38 = fcmp une float %val38, undef
br i1 %cmp38, label %39, label %38
- %val39 = load volatile float* undef
+ %val39 = load volatile float, float* undef
%cmp39 = fcmp une float %val39, undef
br i1 %cmp39, label %40, label %39
- %val40 = load volatile float* undef
+ %val40 = load volatile float, float* undef
%cmp40 = fcmp une float %val40, undef
br i1 %cmp40, label %41, label %40
- %val41 = load volatile float* undef
+ %val41 = load volatile float, float* undef
%cmp41 = fcmp une float %val41, undef
br i1 %cmp41, label %42, label %41
- %val42 = load volatile float* undef
+ %val42 = load volatile float, float* undef
%cmp42 = fcmp une float %val42, undef
br i1 %cmp42, label %43, label %42
- %val43 = load volatile float* undef
+ %val43 = load volatile float, float* undef
%cmp43 = fcmp une float %val43, undef
br i1 %cmp43, label %44, label %43
- %val44 = load volatile float* undef
+ %val44 = load volatile float, float* undef
%cmp44 = fcmp une float %val44, undef
br i1 %cmp44, label %45, label %44
- %val45 = load volatile float* undef
+ %val45 = load volatile float, float* undef
%cmp45 = fcmp une float %val45, undef
br i1 %cmp45, label %46, label %45
- %val46 = load volatile float* undef
+ %val46 = load volatile float, float* undef
%cmp46 = fcmp une float %val46, undef
br i1 %cmp46, label %47, label %46
- %val47 = load volatile float* undef
+ %val47 = load volatile float, float* undef
%cmp47 = fcmp une float %val47, undef
br i1 %cmp47, label %48, label %47
- %val48 = load volatile float* undef
+ %val48 = load volatile float, float* undef
%cmp48 = fcmp une float %val48, undef
br i1 %cmp48, label %49, label %48
- %val49 = load volatile float* undef
+ %val49 = load volatile float, float* undef
%cmp49 = fcmp une float %val49, undef
br i1 %cmp49, label %50, label %49
- %val50 = load volatile float* undef
+ %val50 = load volatile float, float* undef
%cmp50 = fcmp une float %val50, undef
br i1 %cmp50, label %51, label %50
- %val51 = load volatile float* undef
+ %val51 = load volatile float, float* undef
%cmp51 = fcmp une float %val51, undef
br i1 %cmp51, label %52, label %51
- %val52 = load volatile float* undef
+ %val52 = load volatile float, float* undef
%cmp52 = fcmp une float %val52, undef
br i1 %cmp52, label %53, label %52
- %val53 = load volatile float* undef
+ %val53 = load volatile float, float* undef
%cmp53 = fcmp une float %val53, undef
br i1 %cmp53, label %54, label %53
- %val54 = load volatile float* undef
+ %val54 = load volatile float, float* undef
%cmp54 = fcmp une float %val54, undef
br i1 %cmp54, label %55, label %54
- %val55 = load volatile float* undef
+ %val55 = load volatile float, float* undef
%cmp55 = fcmp une float %val55, undef
br i1 %cmp55, label %56, label %55
- %val56 = load volatile float* undef
+ %val56 = load volatile float, float* undef
%cmp56 = fcmp une float %val56, undef
br i1 %cmp56, label %57, label %56
- %val57 = load volatile float* undef
+ %val57 = load volatile float, float* undef
%cmp57 = fcmp une float %val57, undef
br i1 %cmp57, label %58, label %57
- %val58 = load volatile float* undef
+ %val58 = load volatile float, float* undef
%cmp58 = fcmp une float %val58, undef
br i1 %cmp58, label %59, label %58
- %val59 = load volatile float* undef
+ %val59 = load volatile float, float* undef
%cmp59 = fcmp une float %val59, undef
br i1 %cmp59, label %60, label %59
- %val60 = load volatile float* undef
+ %val60 = load volatile float, float* undef
%cmp60 = fcmp une float %val60, undef
br i1 %cmp60, label %61, label %60
- %val61 = load volatile float* undef
+ %val61 = load volatile float, float* undef
%cmp61 = fcmp une float %val61, undef
br i1 %cmp61, label %62, label %61
- %val62 = load volatile float* undef
+ %val62 = load volatile float, float* undef
%cmp62 = fcmp une float %val62, undef
br i1 %cmp62, label %63, label %62
- %val63 = load volatile float* undef
+ %val63 = load volatile float, float* undef
%cmp63 = fcmp une float %val63, undef
br i1 %cmp63, label %64, label %63
- %val64 = load volatile float* undef
+ %val64 = load volatile float, float* undef
%cmp64 = fcmp une float %val64, undef
br i1 %cmp64, label %65, label %64
@@ -966,7 +966,7 @@ define void @benchmark_heapsort(i32 %n, double* nocapture %ra) {
entry:
%shr = ashr i32 %n, 1
%add = add nsw i32 %shr, 1
- %arrayidx3 = getelementptr inbounds double* %ra, i64 1
+ %arrayidx3 = getelementptr inbounds double, double* %ra, i64 1
br label %for.cond
for.cond:
@@ -978,15 +978,15 @@ for.cond:
if.then:
%dec = add nsw i32 %l.0, -1
%idxprom = sext i32 %dec to i64
- %arrayidx = getelementptr inbounds double* %ra, i64 %idxprom
- %0 = load double* %arrayidx, align 8
+ %arrayidx = getelementptr inbounds double, double* %ra, i64 %idxprom
+ %0 = load double, double* %arrayidx, align 8
br label %if.end10
if.else:
%idxprom1 = sext i32 %ir.0 to i64
- %arrayidx2 = getelementptr inbounds double* %ra, i64 %idxprom1
- %1 = load double* %arrayidx2, align 8
- %2 = load double* %arrayidx3, align 8
+ %arrayidx2 = getelementptr inbounds double, double* %ra, i64 %idxprom1
+ %1 = load double, double* %arrayidx2, align 8
+ %2 = load double, double* %arrayidx3, align 8
store double %2, double* %arrayidx2, align 8
%dec6 = add nsw i32 %ir.0, -1
%cmp7 = icmp eq i32 %dec6, 1
@@ -1019,12 +1019,12 @@ while.body:
land.lhs.true:
%idxprom13 = sext i32 %j.0 to i64
- %arrayidx14 = getelementptr inbounds double* %ra, i64 %idxprom13
- %3 = load double* %arrayidx14, align 8
+ %arrayidx14 = getelementptr inbounds double, double* %ra, i64 %idxprom13
+ %3 = load double, double* %arrayidx14, align 8
%add15 = add nsw i32 %j.0, 1
%idxprom16 = sext i32 %add15 to i64
- %arrayidx17 = getelementptr inbounds double* %ra, i64 %idxprom16
- %4 = load double* %arrayidx17, align 8
+ %arrayidx17 = getelementptr inbounds double, double* %ra, i64 %idxprom16
+ %4 = load double, double* %arrayidx17, align 8
%cmp18 = fcmp olt double %3, %4
br i1 %cmp18, label %if.then19, label %if.end20
@@ -1034,20 +1034,20 @@ if.then19:
if.end20:
%j.1 = phi i32 [ %add15, %if.then19 ], [ %j.0, %land.lhs.true ], [ %j.0, %while.body ]
%idxprom21 = sext i32 %j.1 to i64
- %arrayidx22 = getelementptr inbounds double* %ra, i64 %idxprom21
- %5 = load double* %arrayidx22, align 8
+ %arrayidx22 = getelementptr inbounds double, double* %ra, i64 %idxprom21
+ %5 = load double, double* %arrayidx22, align 8
%cmp23 = fcmp olt double %rra.0, %5
br i1 %cmp23, label %if.then24, label %while.cond
if.then24:
%idxprom27 = sext i32 %j.0.ph.in to i64
- %arrayidx28 = getelementptr inbounds double* %ra, i64 %idxprom27
+ %arrayidx28 = getelementptr inbounds double, double* %ra, i64 %idxprom27
store double %5, double* %arrayidx28, align 8
br label %while.cond.outer
while.end:
%idxprom33 = sext i32 %j.0.ph.in to i64
- %arrayidx34 = getelementptr inbounds double* %ra, i64 %idxprom33
+ %arrayidx34 = getelementptr inbounds double, double* %ra, i64 %idxprom33
store double %rra.0, double* %arrayidx34, align 8
br label %for.cond
}
@@ -1065,8 +1065,8 @@ define i32 @test_cold_calls(i32* %a) {
; CHECK: %then
entry:
- %gep1 = getelementptr i32* %a, i32 1
- %val1 = load i32* %gep1
+ %gep1 = getelementptr i32, i32* %a, i32 1
+ %val1 = load i32, i32* %gep1
%cond1 = icmp ugt i32 %val1, 1
br i1 %cond1, label %then, label %else
@@ -1075,8 +1075,8 @@ then:
br label %exit
else:
- %gep2 = getelementptr i32* %a, i32 2
- %val2 = load i32* %gep2
+ %gep2 = getelementptr i32, i32* %a, i32 2
+ %val2 = load i32, i32* %gep2
br label %exit
exit:
diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll
index a70720926de0..8b13e960cd8f 100644
--- a/test/CodeGen/X86/bmi.ll
+++ b/test/CodeGen/X86/bmi.ll
@@ -27,7 +27,7 @@ define i32 @t3(i32 %x) nounwind {
}
define i32 @tzcnt32_load(i32* %x) nounwind {
- %x1 = load i32* %x
+ %x1 = load i32, i32* %x
%tmp = tail call i32 @llvm.cttz.i32(i32 %x1, i1 false )
ret i32 %tmp
; CHECK-LABEL: tzcnt32_load:
@@ -78,7 +78,7 @@ define i32 @andn32(i32 %x, i32 %y) nounwind readnone {
}
define i32 @andn32_load(i32 %x, i32* %y) nounwind readnone {
- %y1 = load i32* %y
+ %y1 = load i32, i32* %y
%tmp1 = xor i32 %x, -1
%tmp2 = and i32 %y1, %tmp1
ret i32 %tmp2
@@ -102,7 +102,7 @@ define i32 @bextr32(i32 %x, i32 %y) nounwind readnone {
}
define i32 @bextr32_load(i32* %x, i32 %y) nounwind readnone {
- %x1 = load i32* %x
+ %x1 = load i32, i32* %x
%tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x1, i32 %y)
ret i32 %tmp
; CHECK-LABEL: bextr32_load:
@@ -120,7 +120,7 @@ define i32 @bextr32b(i32 %x) nounwind uwtable readnone ssp {
}
define i32 @bextr32b_load(i32* %x) nounwind uwtable readnone ssp {
- %1 = load i32* %x
+ %1 = load i32, i32* %x
%2 = lshr i32 %1, 4
%3 = and i32 %2, 4095
ret i32 %3
@@ -145,6 +145,36 @@ define i64 @bextr64b(i64 %x) nounwind uwtable readnone ssp {
; CHECK: bextrq
}
+define i64 @bextr64b_load(i64* %x) {
+ %1 = load i64, i64* %x, align 8
+ %2 = lshr i64 %1, 4
+ %3 = and i64 %2, 4095
+ ret i64 %3
+; CHECK-LABEL: bextr64b_load:
+; CHECK: bextrq {{.*}}, ({{.*}}), {{.*}}
+}
+
+define i32 @non_bextr32(i32 %x) {
+entry:
+ %shr = lshr i32 %x, 2
+ %and = and i32 %shr, 111
+ ret i32 %and
+; CHECK-LABEL: non_bextr32:
+; CHECK: shrl $2
+; CHECK: andl $111
+}
+
+define i64 @non_bextr64(i64 %x) {
+entry:
+ %shr = lshr i64 %x, 2
+ %and = and i64 %shr, 8589934590
+ ret i64 %and
+; CHECK-LABEL: non_bextr64:
+; CHECK: shrq $2
+; CHECK: movabsq $8589934590
+; CHECK: andq
+}
+
define i32 @bzhi32(i32 %x, i32 %y) nounwind readnone {
%tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x, i32 %y)
ret i32 %tmp
@@ -153,7 +183,7 @@ define i32 @bzhi32(i32 %x, i32 %y) nounwind readnone {
}
define i32 @bzhi32_load(i32* %x, i32 %y) nounwind readnone {
- %x1 = load i32* %x
+ %x1 = load i32, i32* %x
%tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y)
ret i32 %tmp
; CHECK-LABEL: bzhi32_load:
@@ -184,7 +214,7 @@ entry:
define i32 @bzhi32b_load(i32* %w, i8 zeroext %index) #0 {
entry:
- %x = load i32* %w
+ %x = load i32, i32* %w
%conv = zext i8 %index to i32
%shl = shl i32 1, %conv
%sub = add nsw i32 %shl, -1
@@ -230,7 +260,7 @@ entry:
%and = and i64 %x, 2147483647
ret i64 %and
; CHECK-LABEL: bzhi64_small_constant_mask:
-; CHECK: andq $2147483647, %r[[ARG1]]
+; CHECK: andl $2147483647, %e[[ARG1]]
}
define i32 @blsi32(i32 %x) nounwind readnone {
@@ -242,7 +272,7 @@ define i32 @blsi32(i32 %x) nounwind readnone {
}
define i32 @blsi32_load(i32* %x) nounwind readnone {
- %x1 = load i32* %x
+ %x1 = load i32, i32* %x
%tmp = sub i32 0, %x1
%tmp2 = and i32 %x1, %tmp
ret i32 %tmp2
@@ -267,7 +297,7 @@ define i32 @blsmsk32(i32 %x) nounwind readnone {
}
define i32 @blsmsk32_load(i32* %x) nounwind readnone {
- %x1 = load i32* %x
+ %x1 = load i32, i32* %x
%tmp = sub i32 %x1, 1
%tmp2 = xor i32 %x1, %tmp
ret i32 %tmp2
@@ -292,7 +322,7 @@ define i32 @blsr32(i32 %x) nounwind readnone {
}
define i32 @blsr32_load(i32* %x) nounwind readnone {
- %x1 = load i32* %x
+ %x1 = load i32, i32* %x
%tmp = sub i32 %x1, 1
%tmp2 = and i32 %x1, %tmp
ret i32 %tmp2
@@ -316,7 +346,7 @@ define i32 @pdep32(i32 %x, i32 %y) nounwind readnone {
}
define i32 @pdep32_load(i32 %x, i32* %y) nounwind readnone {
- %y1 = load i32* %y
+ %y1 = load i32, i32* %y
%tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1)
ret i32 %tmp
; CHECK-LABEL: pdep32_load:
@@ -342,7 +372,7 @@ define i32 @pext32(i32 %x, i32 %y) nounwind readnone {
}
define i32 @pext32_load(i32 %x, i32* %y) nounwind readnone {
- %y1 = load i32* %y
+ %y1 = load i32, i32* %y
%tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1)
ret i32 %tmp
; CHECK-LABEL: pext32_load:
diff --git a/test/CodeGen/X86/bool-zext.ll b/test/CodeGen/X86/bool-zext.ll
index 3558376dfce6..c98ad9e36d7e 100644
--- a/test/CodeGen/X86/bool-zext.ll
+++ b/test/CodeGen/X86/bool-zext.ll
@@ -10,7 +10,7 @@
define void @bar1(i1 zeroext %v1) nounwind ssp {
entry:
%conv = zext i1 %v1 to i32
- %call = tail call i32 (...)* @foo1(i32 %conv) nounwind
+ %call = tail call i32 (...) @foo1(i32 %conv) nounwind
ret void
}
@@ -23,7 +23,7 @@ entry:
define void @bar2(i8 zeroext %v1) nounwind ssp {
entry:
%conv = zext i8 %v1 to i32
- %call = tail call i32 (...)* @foo1(i32 %conv) nounwind
+ %call = tail call i32 (...) @foo1(i32 %conv) nounwind
ret void
}
diff --git a/test/CodeGen/X86/branchfolding-landingpads.ll b/test/CodeGen/X86/branchfolding-landingpads.ll
new file mode 100644
index 000000000000..40ec92ea0d7f
--- /dev/null
+++ b/test/CodeGen/X86/branchfolding-landingpads.ll
@@ -0,0 +1,45 @@
+; RUN: llc %s -o - -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; The machine level BranchFolding pass will try to remove the 'unreachable' block
+; and rewrite 'entry' to jump to the block 'unreachable' falls through to.
+; That will be a landing pad and result in 'entry' jumping to 2 landing pads.
+; This tests that we don't do this change when the fallthrough is itself a landing
+; pad.
+
+declare i32 @__gxx_personality_v0(...)
+declare void @foo()
+
+; Function Attrs: noreturn
+declare void @_throw()
+
+; CHECK-LABEL: @main
+; CHECK: %unreachable
+
+define i32 @main(i8* %cleanup) {
+entry:
+ invoke void @_throw() #0
+ to label %unreachable unwind label %catch.dispatch9
+
+catch.dispatch9: ; preds = %entry
+ %tmp13 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ catch i8* null
+ invoke void @_throw() #0
+ to label %unreachable unwind label %lpad31
+
+lpad31: ; preds = %catch.dispatch9
+ %tmp20 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ catch i8* null
+ call void @foo()
+ unreachable
+
+unreachable: ; preds = %catch.dispatch9, %entry
+ unreachable
+}
+
+attributes #0 = { noreturn }
+
diff --git a/test/CodeGen/X86/brcond.ll b/test/CodeGen/X86/brcond.ll
index 3ebe1a1d2357..f4db3ba7fecb 100644
--- a/test/CodeGen/X86/brcond.ll
+++ b/test/CodeGen/X86/brcond.ll
@@ -17,11 +17,11 @@ entry:
br i1 %4, label %bb1, label %bb
bb: ; preds = %entry
- %5 = tail call i32 (...)* @foo() nounwind ; <i32> [#uses=1]
+ %5 = tail call i32 (...) @foo() nounwind ; <i32> [#uses=1]
ret i32 %5
bb1: ; preds = %entry
- %6 = tail call i32 (...)* @bar() nounwind ; <i32> [#uses=1]
+ %6 = tail call i32 (...) @bar() nounwind ; <i32> [#uses=1]
ret i32 %6
}
diff --git a/test/CodeGen/X86/break-anti-dependencies.ll b/test/CodeGen/X86/break-anti-dependencies.ll
index 614d0adc7271..c54ac108819e 100644
--- a/test/CodeGen/X86/break-anti-dependencies.ll
+++ b/test/CodeGen/X86/break-anti-dependencies.ll
@@ -10,14 +10,14 @@
define void @goo(double* %r, double* %p, double* %q) nounwind {
entry:
- %0 = load double* %p, align 8
+ %0 = load double, double* %p, align 8
%1 = fadd double %0, 1.100000e+00
%2 = fmul double %1, 1.200000e+00
%3 = fadd double %2, 1.300000e+00
%4 = fmul double %3, 1.400000e+00
%5 = fadd double %4, 1.500000e+00
%6 = fptosi double %5 to i32
- %7 = load double* %r, align 8
+ %7 = load double, double* %r, align 8
%8 = fadd double %7, 7.100000e+00
%9 = fmul double %8, 7.200000e+00
%10 = fadd double %9, 7.300000e+00
diff --git a/test/CodeGen/X86/break-false-dep.ll b/test/CodeGen/X86/break-false-dep.ll
index 7034fae5e8bd..699de22d5b56 100644
--- a/test/CodeGen/X86/break-false-dep.ll
+++ b/test/CodeGen/X86/break-false-dep.ll
@@ -8,7 +8,7 @@ entry:
; SSE: movss ([[A0:%rdi|%rcx]]), %xmm0
; SSE: cvtss2sd %xmm0, %xmm0
- %0 = load float* %x, align 4
+ %0 = load float, float* %x, align 4
%1 = fpext float %0 to double
ret double %1
}
@@ -17,7 +17,7 @@ define float @t2(double* nocapture %x) nounwind readonly ssp optsize {
entry:
; SSE-LABEL: t2:
; SSE: cvtsd2ss ([[A0]]), %xmm0
- %0 = load double* %x, align 8
+ %0 = load double, double* %x, align 8
%1 = fptrunc double %0 to float
ret float %1
}
@@ -27,7 +27,7 @@ entry:
; SSE-LABEL: squirtf:
; SSE: movss ([[A0]]), %xmm0
; SSE: sqrtss %xmm0, %xmm0
- %z = load float* %x
+ %z = load float, float* %x
%t = call float @llvm.sqrt.f32(float %z)
ret float %t
}
@@ -37,7 +37,7 @@ entry:
; SSE-LABEL: squirt:
; SSE: movsd ([[A0]]), %xmm0
; SSE: sqrtsd %xmm0, %xmm0
- %z = load double* %x
+ %z = load double, double* %x
%t = call double @llvm.sqrt.f64(double %z)
ret double %t
}
@@ -46,7 +46,7 @@ define float @squirtf_size(float* %x) nounwind optsize {
entry:
; SSE-LABEL: squirtf_size:
; SSE: sqrtss ([[A0]]), %xmm0
- %z = load float* %x
+ %z = load float, float* %x
%t = call float @llvm.sqrt.f32(float %z)
ret float %t
}
@@ -55,7 +55,7 @@ define double @squirt_size(double* %x) nounwind optsize {
entry:
; SSE-LABEL: squirt_size:
; SSE: sqrtsd ([[A0]]), %xmm0
- %z = load double* %x
+ %z = load double, double* %x
%t = call double @llvm.sqrt.f64(double %z)
ret double %t
}
@@ -120,13 +120,13 @@ for.end: ; preds = %for.body, %entry
; SSE: cvtsi2sdq %{{r[0-9a-x]+}}, %[[REG]]
define i64 @loopdep2(i64* nocapture %x, double* nocapture %y) nounwind {
entry:
- %vx = load i64* %x
+ %vx = load i64, i64* %x
br label %loop
loop:
%i = phi i64 [ 1, %entry ], [ %inc, %loop ]
%s1 = phi i64 [ %vx, %entry ], [ %s2, %loop ]
%fi = sitofp i64 %i to double
- %vy = load double* %y
+ %vy = load double, double* %y
%fipy = fadd double %fi, %vy
%iipy = fptosi double %fipy to i64
%s2 = add i64 %s1, %iipy
@@ -158,19 +158,19 @@ for.cond1.preheader: ; preds = %for.inc14, %entry
for.body3:
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
- %arrayidx = getelementptr inbounds [1024 x i32]* @v, i64 0, i64 %indvars.iv
- %0 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @v, i64 0, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
%conv = sitofp i32 %0 to double
- %arrayidx5 = getelementptr inbounds [1024 x double]* @x, i64 0, i64 %indvars.iv
- %1 = load double* %arrayidx5, align 8
+ %arrayidx5 = getelementptr inbounds [1024 x double], [1024 x double]* @x, i64 0, i64 %indvars.iv
+ %1 = load double, double* %arrayidx5, align 8
%mul = fmul double %conv, %1
- %arrayidx7 = getelementptr inbounds [1024 x double]* @y, i64 0, i64 %indvars.iv
- %2 = load double* %arrayidx7, align 8
+ %arrayidx7 = getelementptr inbounds [1024 x double], [1024 x double]* @y, i64 0, i64 %indvars.iv
+ %2 = load double, double* %arrayidx7, align 8
%mul8 = fmul double %mul, %2
- %arrayidx10 = getelementptr inbounds [1024 x double]* @z, i64 0, i64 %indvars.iv
- %3 = load double* %arrayidx10, align 8
+ %arrayidx10 = getelementptr inbounds [1024 x double], [1024 x double]* @z, i64 0, i64 %indvars.iv
+ %3 = load double, double* %arrayidx10, align 8
%mul11 = fmul double %mul8, %3
- %arrayidx13 = getelementptr inbounds [1024 x double]* @w, i64 0, i64 %indvars.iv
+ %arrayidx13 = getelementptr inbounds [1024 x double], [1024 x double]* @w, i64 0, i64 %indvars.iv
store double %mul11, double* %arrayidx13, align 8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
diff --git a/test/CodeGen/X86/bswap-vector.ll b/test/CodeGen/X86/bswap-vector.ll
index 9dc960d7779f..7d5f380c1e28 100644
--- a/test/CodeGen/X86/bswap-vector.ll
+++ b/test/CodeGen/X86/bswap-vector.ll
@@ -1,7 +1,8 @@
-; RUN: llc < %s -mcpu=x86-64 | FileCheck %s -check-prefix=CHECK-NOSSSE3
-; RUN: llc < %s -mcpu=core2 | FileCheck %s -check-prefix=CHECK-SSSE3
-; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK-AVX2
-; RUN: llc < %s -mcpu=core-avx2 -x86-experimental-vector-widening-legalization | FileCheck %s -check-prefix=CHECK-WIDE-AVX2
+; RUN: llc < %s -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK-NOSSSE3
+; RUN: llc < %s -mcpu=core2 | FileCheck %s --check-prefix=CHECK-SSSE3
+; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK-AVX2
+; RUN: llc < %s -mcpu=core-avx2 -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE-AVX2
+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@@ -9,165 +10,278 @@ declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
-define <8 x i16> @test1(<8 x i16> %v) #0 {
+define <8 x i16> @test1(<8 x i16> %v) {
+; CHECK-NOSSSE3-LABEL: test1:
+; CHECK-NOSSSE3: # BB#0: # %entry
+; CHECK-NOSSSE3-NEXT: pxor %xmm1, %xmm1
+; CHECK-NOSSSE3-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NOSSSE3-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
+; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,0,3,2,4,5,6,7]
+; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,4,7,6]
+; CHECK-NOSSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
+; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
+; CHECK-NOSSSE3-NEXT: packuswb %xmm2, %xmm0
+; CHECK-NOSSSE3-NEXT: retq
+;
+; CHECK-SSSE3-LABEL: test1:
+; CHECK-SSSE3: # BB#0: # %entry
+; CHECK-SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; CHECK-SSSE3-NEXT: retq
+;
+; CHECK-AVX2-LABEL: test1:
+; CHECK-AVX2: # BB#0: # %entry
+; CHECK-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-WIDE-AVX2-LABEL: test1:
+; CHECK-WIDE-AVX2: # BB#0: # %entry
+; CHECK-WIDE-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; CHECK-WIDE-AVX2-NEXT: retq
entry:
%r = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %v)
ret <8 x i16> %r
-
-; CHECK-NOSSSE3-LABEL: @test1
-; CHECK-NOSSSE3: rolw
-; CHECK-NOSSSE3: rolw
-; CHECK-NOSSSE3: rolw
-; CHECK-NOSSSE3: rolw
-; CHECK-NOSSSE3: rolw
-; CHECK-NOSSSE3: rolw
-; CHECK-NOSSSE3: rolw
-; CHECK-NOSSSE3: rolw
-; CHECK-NOSSSE3: retq
-
-; CHECK-SSSE3-LABEL: @test1
-; CHECK-SSSE3: pshufb
-; CHECK-SSSE3-NEXT: retq
-
-; CHECK-AVX2-LABEL: @test1
-; CHECK-AVX2: vpshufb
-; CHECK-AVX2-NEXT: retq
-
-; CHECK-WIDE-AVX2-LABEL: @test1
-; CHECK-WIDE-AVX2: vpshufb
-; CHECK-WIDE-AVX2-NEXT: retq
}
-define <4 x i32> @test2(<4 x i32> %v) #0 {
+define <4 x i32> @test2(<4 x i32> %v) {
+; CHECK-NOSSSE3-LABEL: test2:
+; CHECK-NOSSSE3: # BB#0: # %entry
+; CHECK-NOSSSE3-NEXT: pxor %xmm1, %xmm1
+; CHECK-NOSSSE3-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NOSSSE3-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
+; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
+; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
+; CHECK-NOSSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-NOSSSE3-NEXT: packuswb %xmm2, %xmm0
+; CHECK-NOSSSE3-NEXT: retq
+;
+; CHECK-SSSE3-LABEL: test2:
+; CHECK-SSSE3: # BB#0: # %entry
+; CHECK-SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; CHECK-SSSE3-NEXT: retq
+;
+; CHECK-AVX2-LABEL: test2:
+; CHECK-AVX2: # BB#0: # %entry
+; CHECK-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-WIDE-AVX2-LABEL: test2:
+; CHECK-WIDE-AVX2: # BB#0: # %entry
+; CHECK-WIDE-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; CHECK-WIDE-AVX2-NEXT: retq
entry:
%r = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %v)
ret <4 x i32> %r
-
-; CHECK-NOSSSE3-LABEL: @test2
-; CHECK-NOSSSE3: bswapl
-; CHECK-NOSSSE3: bswapl
-; CHECK-NOSSSE3: bswapl
-; CHECK-NOSSSE3: bswapl
-; CHECK-NOSSSE3: retq
-
-; CHECK-SSSE3-LABEL: @test2
-; CHECK-SSSE3: pshufb
-; CHECK-SSSE3-NEXT: retq
-
-; CHECK-AVX2-LABEL: @test2
-; CHECK-AVX2: vpshufb
-; CHECK-AVX2-NEXT: retq
-
-; CHECK-WIDE-AVX2-LABEL: @test2
-; CHECK-WIDE-AVX2: vpshufb
-; CHECK-WIDE-AVX2-NEXT: retq
}
-define <2 x i64> @test3(<2 x i64> %v) #0 {
+define <2 x i64> @test3(<2 x i64> %v) {
+; CHECK-NOSSSE3-LABEL: test3:
+; CHECK-NOSSSE3: # BB#0: # %entry
+; CHECK-NOSSSE3-NEXT: pxor %xmm1, %xmm1
+; CHECK-NOSSSE3-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NOSSSE3-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
+; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
+; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
+; CHECK-NOSSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-NOSSSE3-NEXT: packuswb %xmm2, %xmm0
+; CHECK-NOSSSE3-NEXT: retq
+;
+; CHECK-SSSE3-LABEL: test3:
+; CHECK-SSSE3: # BB#0: # %entry
+; CHECK-SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
+; CHECK-SSSE3-NEXT: retq
+;
+; CHECK-AVX2-LABEL: test3:
+; CHECK-AVX2: # BB#0: # %entry
+; CHECK-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-WIDE-AVX2-LABEL: test3:
+; CHECK-WIDE-AVX2: # BB#0: # %entry
+; CHECK-WIDE-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
+; CHECK-WIDE-AVX2-NEXT: retq
entry:
%r = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %v)
ret <2 x i64> %r
-
-; CHECK-NOSSSE3-LABEL: @test3
-; CHECK-NOSSSE3: bswapq
-; CHECK-NOSSSE3: bswapq
-; CHECK-NOSSSE3: retq
-
-; CHECK-SSSE3-LABEL: @test3
-; CHECK-SSSE3: pshufb
-; CHECK-SSSE3-NEXT: retq
-
-; CHECK-AVX2-LABEL: @test3
-; CHECK-AVX2: vpshufb
-; CHECK-AVX2-NEXT: retq
-
-; CHECK-WIDE-AVX2-LABEL: @test3
-; CHECK-WIDE-AVX2: vpshufb
-; CHECK-WIDE-AVX2-NEXT: retq
}
declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
-define <16 x i16> @test4(<16 x i16> %v) #0 {
+define <16 x i16> @test4(<16 x i16> %v) {
+; CHECK-NOSSSE3-LABEL: test4:
+; CHECK-NOSSSE3: # BB#0: # %entry
+; CHECK-NOSSSE3-NEXT: pxor %xmm2, %xmm2
+; CHECK-NOSSSE3-NEXT: movdqa %xmm0, %xmm3
+; CHECK-NOSSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
+; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[1,0,3,2,4,5,6,7]
+; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,4,7,6]
+; CHECK-NOSSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
+; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
+; CHECK-NOSSSE3-NEXT: packuswb %xmm3, %xmm0
+; CHECK-NOSSSE3-NEXT: movdqa %xmm1, %xmm3
+; CHECK-NOSSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
+; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[1,0,3,2,4,5,6,7]
+; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,4,7,6]
+; CHECK-NOSSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,0,3,2,4,5,6,7]
+; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,4,7,6]
+; CHECK-NOSSSE3-NEXT: packuswb %xmm3, %xmm1
+; CHECK-NOSSSE3-NEXT: retq
+;
+; CHECK-SSSE3-LABEL: test4:
+; CHECK-SSSE3: # BB#0: # %entry
+; CHECK-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; CHECK-SSSE3-NEXT: pshufb %xmm2, %xmm0
+; CHECK-SSSE3-NEXT: pshufb %xmm2, %xmm1
+; CHECK-SSSE3-NEXT: retq
+;
+; CHECK-AVX2-LABEL: test4:
+; CHECK-AVX2: # BB#0: # %entry
+; CHECK-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30]
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-WIDE-AVX2-LABEL: test4:
+; CHECK-WIDE-AVX2: # BB#0: # %entry
+; CHECK-WIDE-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30]
+; CHECK-WIDE-AVX2-NEXT: retq
entry:
%r = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %v)
ret <16 x i16> %r
-
-; CHECK-SSSE3-LABEL: @test4
-; CHECK-SSSE3: pshufb
-; CHECK-SSSE3: pshufb
-; CHECK-SSSE3-NEXT: retq
-
-; CHECK-AVX2-LABEL: @test4
-; CHECK-AVX2: vpshufb
-; CHECK-AVX2-NEXT: retq
-
-; CHECK-WIDE-AVX2-LABEL: @test4
-; CHECK-WIDE-AVX2: vpshufb
-; CHECK-WIDE-AVX2-NEXT: retq
}
-define <8 x i32> @test5(<8 x i32> %v) #0 {
+define <8 x i32> @test5(<8 x i32> %v) {
+; CHECK-NOSSSE3-LABEL: test5:
+; CHECK-NOSSSE3: # BB#0: # %entry
+; CHECK-NOSSSE3-NEXT: pxor %xmm2, %xmm2
+; CHECK-NOSSSE3-NEXT: movdqa %xmm0, %xmm3
+; CHECK-NOSSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
+; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
+; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4]
+; CHECK-NOSSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-NOSSSE3-NEXT: packuswb %xmm3, %xmm0
+; CHECK-NOSSSE3-NEXT: movdqa %xmm1, %xmm3
+; CHECK-NOSSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
+; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
+; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4]
+; CHECK-NOSSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
+; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
+; CHECK-NOSSSE3-NEXT: packuswb %xmm3, %xmm1
+; CHECK-NOSSSE3-NEXT: retq
+;
+; CHECK-SSSE3-LABEL: test5:
+; CHECK-SSSE3: # BB#0: # %entry
+; CHECK-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; CHECK-SSSE3-NEXT: pshufb %xmm2, %xmm0
+; CHECK-SSSE3-NEXT: pshufb %xmm2, %xmm1
+; CHECK-SSSE3-NEXT: retq
+;
+; CHECK-AVX2-LABEL: test5:
+; CHECK-AVX2: # BB#0: # %entry
+; CHECK-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28]
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-WIDE-AVX2-LABEL: test5:
+; CHECK-WIDE-AVX2: # BB#0: # %entry
+; CHECK-WIDE-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28]
+; CHECK-WIDE-AVX2-NEXT: retq
entry:
%r = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %v)
ret <8 x i32> %r
-
-; CHECK-SSSE3-LABEL: @test5
-; CHECK-SSSE3: pshufb
-; CHECK-SSSE3: pshufb
-; CHECK-SSSE3-NEXT: retq
-
-; CHECK-AVX2-LABEL: @test5
-; CHECK-AVX2: vpshufb
-; CHECK-AVX2-NEXT: retq
-
-; CHECK-WIDE-AVX2-LABEL: @test5
-; CHECK-WIDE-AVX2: vpshufb
-; CHECK-WIDE-AVX2-NEXT: retq
}
-define <4 x i64> @test6(<4 x i64> %v) #0 {
+define <4 x i64> @test6(<4 x i64> %v) {
+; CHECK-NOSSSE3-LABEL: test6:
+; CHECK-NOSSSE3: # BB#0: # %entry
+; CHECK-NOSSSE3-NEXT: pxor %xmm2, %xmm2
+; CHECK-NOSSSE3-NEXT: movdqa %xmm0, %xmm3
+; CHECK-NOSSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
+; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
+; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
+; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4]
+; CHECK-NOSSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-NOSSSE3-NEXT: packuswb %xmm3, %xmm0
+; CHECK-NOSSSE3-NEXT: movdqa %xmm1, %xmm3
+; CHECK-NOSSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
+; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
+; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
+; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4]
+; CHECK-NOSSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; CHECK-NOSSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
+; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
+; CHECK-NOSSSE3-NEXT: packuswb %xmm3, %xmm1
+; CHECK-NOSSSE3-NEXT: retq
+;
+; CHECK-SSSE3-LABEL: test6:
+; CHECK-SSSE3: # BB#0: # %entry
+; CHECK-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8]
+; CHECK-SSSE3-NEXT: pshufb %xmm2, %xmm0
+; CHECK-SSSE3-NEXT: pshufb %xmm2, %xmm1
+; CHECK-SSSE3-NEXT: retq
+;
+; CHECK-AVX2-LABEL: test6:
+; CHECK-AVX2: # BB#0: # %entry
+; CHECK-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24]
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-WIDE-AVX2-LABEL: test6:
+; CHECK-WIDE-AVX2: # BB#0: # %entry
+; CHECK-WIDE-AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24]
+; CHECK-WIDE-AVX2-NEXT: retq
entry:
%r = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %v)
ret <4 x i64> %r
-
-; CHECK-SSSE3-LABEL: @test6
-; CHECK-SSSE3: pshufb
-; CHECK-SSSE3: pshufb
-; CHECK-SSSE3-NEXT: retq
-
-; CHECK-AVX2-LABEL: @test6
-; CHECK-AVX2: vpshufb
-; CHECK-AVX2-NEXT: retq
-
-; CHECK-WIDE-AVX2-LABEL: @test6
-; CHECK-WIDE-AVX2: vpshufb
-; CHECK-WIDE-AVX2-NEXT: retq
}
declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>)
-define <4 x i16> @test7(<4 x i16> %v) #0 {
+define <4 x i16> @test7(<4 x i16> %v) {
+; CHECK-NOSSSE3-LABEL: test7:
+; CHECK-NOSSSE3: # BB#0: # %entry
+; CHECK-NOSSSE3-NEXT: pxor %xmm1, %xmm1
+; CHECK-NOSSSE3-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NOSSSE3-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
+; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
+; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
+; CHECK-NOSSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-NOSSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-NOSSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-NOSSSE3-NEXT: packuswb %xmm2, %xmm0
+; CHECK-NOSSSE3-NEXT: psrld $16, %xmm0
+; CHECK-NOSSSE3-NEXT: retq
+;
+; CHECK-SSSE3-LABEL: test7:
+; CHECK-SSSE3: # BB#0: # %entry
+; CHECK-SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; CHECK-SSSE3-NEXT: psrld $16, %xmm0
+; CHECK-SSSE3-NEXT: retq
+;
+; CHECK-AVX2-LABEL: test7:
+; CHECK-AVX2: # BB#0: # %entry
+; CHECK-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; CHECK-AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-WIDE-AVX2-LABEL: test7:
+; CHECK-WIDE-AVX2: # BB#0: # %entry
+; CHECK-WIDE-AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; CHECK-WIDE-AVX2-NEXT: retq
entry:
%r = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %v)
ret <4 x i16> %r
-
-; CHECK-SSSE3-LABEL: @test7
-; CHECK-SSSE3: pshufb
-; CHECK-SSSE3: psrld $16
-; CHECK-SSSE3-NEXT: retq
-
-; CHECK-AVX2-LABEL: @test7
-; CHECK-AVX2: vpshufb
-; CHECK-AVX2: vpsrld $16
-; CHECK-AVX2-NEXT: retq
-
-; CHECK-WIDE-AVX2-LABEL: @test7
-; CHECK-WIDE-AVX2: vpshufb
-; CHECK-WIDE-AVX2-NEXT: retq
}
-
-attributes #0 = { nounwind uwtable }
-
diff --git a/test/CodeGen/X86/bswap.ll b/test/CodeGen/X86/bswap.ll
index e6a456c39ddd..48dc18e0ac14 100644
--- a/test/CodeGen/X86/bswap.ll
+++ b/test/CodeGen/X86/bswap.ll
@@ -91,7 +91,7 @@ define i64 @not_bswap() {
; CHECK64-LABEL: not_bswap:
; CHECK64-NOT: bswapq
; CHECK64: ret
- %init = load i16* @var16
+ %init = load i16, i16* @var16
%big = zext i16 %init to i64
%hishifted = lshr i64 %big, 8
@@ -115,7 +115,7 @@ define i64 @not_useful_bswap() {
; CHECK64-NOT: bswapq
; CHECK64: ret
- %init = load i8* @var8
+ %init = load i8, i8* @var8
%big = zext i8 %init to i64
%hishifted = lshr i64 %big, 8
@@ -140,7 +140,7 @@ define i64 @finally_useful_bswap() {
; CHECK64: shrq $48, [[REG]]
; CHECK64: ret
- %init = load i16* @var16
+ %init = load i16, i16* @var16
%big = zext i16 %init to i64
%hishifted = lshr i64 %big, 8
diff --git a/test/CodeGen/X86/byval-align.ll b/test/CodeGen/X86/byval-align.ll
index c62a1814ba7f..8366ae38333f 100644
--- a/test/CodeGen/X86/byval-align.ll
+++ b/test/CodeGen/X86/byval-align.ll
@@ -14,20 +14,20 @@ entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
%obj1 = bitcast %struct.S* %obj to i8* ; <i8*> [#uses=1]
store i8* %obj1, i8** %ptr, align 8
- %0 = load i8** %ptr, align 8 ; <i8*> [#uses=1]
+ %0 = load i8*, i8** %ptr, align 8 ; <i8*> [#uses=1]
%1 = ptrtoint i8* %0 to i64 ; <i64> [#uses=1]
store i64 %1, i64* %p, align 8
- %2 = load i8** %ptr, align 8 ; <i8*> [#uses=1]
- %3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i64 0, i64 0), i8* %2) nounwind ; <i32> [#uses=0]
- %4 = load i64* %p, align 8 ; <i64> [#uses=1]
+ %2 = load i8*, i8** %ptr, align 8 ; <i8*> [#uses=1]
+ %3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i64 0, i64 0), i8* %2) nounwind ; <i32> [#uses=0]
+ %4 = load i64, i64* %p, align 8 ; <i64> [#uses=1]
%5 = and i64 %4, 140737488355264 ; <i64> [#uses=1]
- %6 = load i64* %p, align 8 ; <i64> [#uses=1]
+ %6 = load i64, i64* %p, align 8 ; <i64> [#uses=1]
%7 = icmp ne i64 %5, %6 ; <i1> [#uses=1]
br i1 %7, label %bb, label %bb2
bb: ; preds = %entry
- %8 = call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @.str1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
- call void @__assert_fail(i8* getelementptr inbounds ([2 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([7 x i8]* @.str3, i64 0, i64 0), i32 18, i8* getelementptr inbounds ([13 x i8]* @__PRETTY_FUNCTION__.2067, i64 0, i64 0)) noreturn nounwind
+ %8 = call i32 @puts(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+ call void @__assert_fail(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str3, i64 0, i64 0), i32 18, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @__PRETTY_FUNCTION__.2067, i64 0, i64 0)) noreturn nounwind
unreachable
bb2: ; preds = %entry
@@ -49,7 +49,7 @@ entry:
; CHECK: andq $-64, %rsp
%s1 = alloca %struct.S ; <%struct.S*> [#uses=4]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- %0 = getelementptr inbounds %struct.S* %s1, i32 0, i32 0 ; <i32*> [#uses=1]
+ %0 = getelementptr inbounds %struct.S, %struct.S* %s1, i32 0, i32 0 ; <i32*> [#uses=1]
store i32 1, i32* %0, align 4
call void @aligned_func(%struct.S* byval align 64 %s1) nounwind
br label %return
diff --git a/test/CodeGen/X86/byval.ll b/test/CodeGen/X86/byval.ll
index 185eda1566d4..f29511a54c41 100644
--- a/test/CodeGen/X86/byval.ll
+++ b/test/CodeGen/X86/byval.ll
@@ -11,7 +11,7 @@
define i64 @f(%struct.s* byval %a) {
entry:
- %tmp2 = getelementptr %struct.s* %a, i32 0, i32 0
- %tmp3 = load i64* %tmp2, align 8
+ %tmp2 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 0
+ %tmp3 = load i64, i64* %tmp2, align 8
ret i64 %tmp3
}
diff --git a/test/CodeGen/X86/byval2.ll b/test/CodeGen/X86/byval2.ll
index c5187db6de4b..cc72a8699a9c 100644
--- a/test/CodeGen/X86/byval2.ll
+++ b/test/CodeGen/X86/byval2.ll
@@ -31,11 +31,11 @@
define void @g(i64 %a, i64 %b, i64 %c) {
entry:
%d = alloca %struct.s, align 16
- %tmp = getelementptr %struct.s* %d, i32 0, i32 0
+ %tmp = getelementptr %struct.s, %struct.s* %d, i32 0, i32 0
store i64 %a, i64* %tmp, align 16
- %tmp2 = getelementptr %struct.s* %d, i32 0, i32 1
+ %tmp2 = getelementptr %struct.s, %struct.s* %d, i32 0, i32 1
store i64 %b, i64* %tmp2, align 16
- %tmp4 = getelementptr %struct.s* %d, i32 0, i32 2
+ %tmp4 = getelementptr %struct.s, %struct.s* %d, i32 0, i32 2
store i64 %c, i64* %tmp4, align 16
call void @f( %struct.s*byval %d )
call void @f( %struct.s*byval %d )
diff --git a/test/CodeGen/X86/byval3.ll b/test/CodeGen/X86/byval3.ll
index d06fd8898e7f..85ecdaf1c67d 100644
--- a/test/CodeGen/X86/byval3.ll
+++ b/test/CodeGen/X86/byval3.ll
@@ -33,17 +33,17 @@
define void @g(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6) nounwind {
entry:
%d = alloca %struct.s, align 16
- %tmp = getelementptr %struct.s* %d, i32 0, i32 0
+ %tmp = getelementptr %struct.s, %struct.s* %d, i32 0, i32 0
store i32 %a1, i32* %tmp, align 16
- %tmp2 = getelementptr %struct.s* %d, i32 0, i32 1
+ %tmp2 = getelementptr %struct.s, %struct.s* %d, i32 0, i32 1
store i32 %a2, i32* %tmp2, align 16
- %tmp4 = getelementptr %struct.s* %d, i32 0, i32 2
+ %tmp4 = getelementptr %struct.s, %struct.s* %d, i32 0, i32 2
store i32 %a3, i32* %tmp4, align 16
- %tmp6 = getelementptr %struct.s* %d, i32 0, i32 3
+ %tmp6 = getelementptr %struct.s, %struct.s* %d, i32 0, i32 3
store i32 %a4, i32* %tmp6, align 16
- %tmp8 = getelementptr %struct.s* %d, i32 0, i32 4
+ %tmp8 = getelementptr %struct.s, %struct.s* %d, i32 0, i32 4
store i32 %a5, i32* %tmp8, align 16
- %tmp10 = getelementptr %struct.s* %d, i32 0, i32 5
+ %tmp10 = getelementptr %struct.s, %struct.s* %d, i32 0, i32 5
store i32 %a6, i32* %tmp10, align 16
call void @f( %struct.s* byval %d)
call void @f( %struct.s* byval %d)
diff --git a/test/CodeGen/X86/byval4.ll b/test/CodeGen/X86/byval4.ll
index 4711e4511112..1e436f7903ac 100644
--- a/test/CodeGen/X86/byval4.ll
+++ b/test/CodeGen/X86/byval4.ll
@@ -39,17 +39,17 @@ define void @g(i16 signext %a1, i16 signext %a2, i16 signext %a3,
i16 signext %a4, i16 signext %a5, i16 signext %a6) nounwind {
entry:
%a = alloca %struct.s, align 16
- %tmp = getelementptr %struct.s* %a, i32 0, i32 0
+ %tmp = getelementptr %struct.s, %struct.s* %a, i32 0, i32 0
store i16 %a1, i16* %tmp, align 16
- %tmp2 = getelementptr %struct.s* %a, i32 0, i32 1
+ %tmp2 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 1
store i16 %a2, i16* %tmp2, align 16
- %tmp4 = getelementptr %struct.s* %a, i32 0, i32 2
+ %tmp4 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 2
store i16 %a3, i16* %tmp4, align 16
- %tmp6 = getelementptr %struct.s* %a, i32 0, i32 3
+ %tmp6 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 3
store i16 %a4, i16* %tmp6, align 16
- %tmp8 = getelementptr %struct.s* %a, i32 0, i32 4
+ %tmp8 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 4
store i16 %a5, i16* %tmp8, align 16
- %tmp10 = getelementptr %struct.s* %a, i32 0, i32 5
+ %tmp10 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 5
store i16 %a6, i16* %tmp10, align 16
call void @f( %struct.s* byval %a )
call void @f( %struct.s* byval %a )
diff --git a/test/CodeGen/X86/byval5.ll b/test/CodeGen/X86/byval5.ll
index f24a5f9aa3b4..6d734a44b3c3 100644
--- a/test/CodeGen/X86/byval5.ll
+++ b/test/CodeGen/X86/byval5.ll
@@ -47,17 +47,17 @@ define void @g(i8 signext %a1, i8 signext %a2, i8 signext %a3,
i8 signext %a4, i8 signext %a5, i8 signext %a6) {
entry:
%a = alloca %struct.s
- %tmp = getelementptr %struct.s* %a, i32 0, i32 0
+ %tmp = getelementptr %struct.s, %struct.s* %a, i32 0, i32 0
store i8 %a1, i8* %tmp, align 8
- %tmp2 = getelementptr %struct.s* %a, i32 0, i32 1
+ %tmp2 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 1
store i8 %a2, i8* %tmp2, align 8
- %tmp4 = getelementptr %struct.s* %a, i32 0, i32 2
+ %tmp4 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 2
store i8 %a3, i8* %tmp4, align 8
- %tmp6 = getelementptr %struct.s* %a, i32 0, i32 3
+ %tmp6 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 3
store i8 %a4, i8* %tmp6, align 8
- %tmp8 = getelementptr %struct.s* %a, i32 0, i32 4
+ %tmp8 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 4
store i8 %a5, i8* %tmp8, align 8
- %tmp10 = getelementptr %struct.s* %a, i32 0, i32 5
+ %tmp10 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 5
store i8 %a6, i8* %tmp10, align 8
call void @f( %struct.s* byval %a )
call void @f( %struct.s* byval %a )
diff --git a/test/CodeGen/X86/byval6.ll b/test/CodeGen/X86/byval6.ll
index 2d3990167f2e..c3e7b7ef435a 100644
--- a/test/CodeGen/X86/byval6.ll
+++ b/test/CodeGen/X86/byval6.ll
@@ -6,8 +6,8 @@
define i32 @main() nounwind {
entry:
- tail call void (i32, ...)* @bar( i32 3, %struct.W* byval @.cpx ) nounwind
- tail call void (i32, ...)* @baz( i32 3, %struct.W* byval @B ) nounwind
+ tail call void (i32, ...) @bar( i32 3, %struct.W* byval @.cpx ) nounwind
+ tail call void (i32, ...) @baz( i32 3, %struct.W* byval @B ) nounwind
ret i32 undef
}
diff --git a/test/CodeGen/X86/byval7.ll b/test/CodeGen/X86/byval7.ll
index 42751d7dbc93..8d5dd8c5887e 100644
--- a/test/CodeGen/X86/byval7.ll
+++ b/test/CodeGen/X86/byval7.ll
@@ -12,7 +12,7 @@ entry:
; CHECK: rep;movsl
; CHECK: movl $1, (%esp)
%s = alloca %struct.S ; <%struct.S*> [#uses=2]
- %tmp15 = getelementptr %struct.S* %s, i32 0, i32 0 ; <<2 x i64>*> [#uses=1]
+ %tmp15 = getelementptr %struct.S, %struct.S* %s, i32 0, i32 0 ; <<2 x i64>*> [#uses=1]
store <2 x i64> < i64 8589934595, i64 1 >, <2 x i64>* %tmp15, align 16
call void @t( i32 1, %struct.S* byval %s ) nounwind
ret i32 0
diff --git a/test/CodeGen/X86/cache-intrinsic.ll b/test/CodeGen/X86/cache-intrinsic.ll
index 3091b5ff3118..0b9d77ac993e 100644
--- a/test/CodeGen/X86/cache-intrinsic.ll
+++ b/test/CodeGen/X86/cache-intrinsic.ll
@@ -10,10 +10,10 @@ define i32 @main() {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0))
- %call1 = call i8* @strcpy(i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds ([25 x i8]* @.str1, i32 0, i32 0)) #3
- call void @llvm.clear_cache(i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds (i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0), i32 32)) #3
- %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8]* @buffer, i32 0, i32 0))
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0))
+ %call1 = call i8* @strcpy(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str1, i32 0, i32 0)) #3
+ call void @llvm.clear_cache(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0), i8* getelementptr inbounds (i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0), i32 32)) #3
+ %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @buffer, i32 0, i32 0))
ret i32 0
}
diff --git a/test/CodeGen/X86/call-push.ll b/test/CodeGen/X86/call-push.ll
index ccb98fefae2a..6bcb5d665618 100644
--- a/test/CodeGen/X86/call-push.ll
+++ b/test/CodeGen/X86/call-push.ll
@@ -10,9 +10,9 @@ define i32 @decode_byte(%struct.decode_t* %decode) nounwind {
; CHECK: jmp
; CHECK: popl
entry:
- %tmp2 = getelementptr %struct.decode_t* %decode, i32 0, i32 4 ; <i16*> [#uses=1]
+ %tmp2 = getelementptr %struct.decode_t, %struct.decode_t* %decode, i32 0, i32 4 ; <i16*> [#uses=1]
%tmp23 = bitcast i16* %tmp2 to i32* ; <i32*> [#uses=1]
- %tmp4 = load i32* %tmp23 ; <i32> [#uses=1]
+ %tmp4 = load i32, i32* %tmp23 ; <i32> [#uses=1]
%tmp514 = lshr i32 %tmp4, 24 ; <i32> [#uses=1]
%tmp56 = trunc i32 %tmp514 to i8 ; <i8> [#uses=1]
%tmp7 = icmp eq i8 %tmp56, 0 ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/cas.ll b/test/CodeGen/X86/cas.ll
index ec519c646f69..7807bb97f5b9 100644
--- a/test/CodeGen/X86/cas.ll
+++ b/test/CodeGen/X86/cas.ll
@@ -24,14 +24,14 @@ entry:
store float* %p, float** %p.addr, align 8
store float* %expected, float** %expected.addr, align 8
store float %desired, float* %desired.addr, align 4
- %0 = load float** %expected.addr, align 8
- %1 = load float** %expected.addr, align 8
- %2 = load float* %1, align 4
- %3 = load float* %desired.addr, align 4
- %4 = load float** %p.addr, align 8
+ %0 = load float*, float** %expected.addr, align 8
+ %1 = load float*, float** %expected.addr, align 8
+ %2 = load float, float* %1, align 4
+ %3 = load float, float* %desired.addr, align 4
+ %4 = load float*, float** %p.addr, align 8
%5 = call i8 asm sideeffect "lock; cmpxchg $3, $4; mov $2, $1; sete $0", "={ax},=*rm,{ax},q,*m,~{memory},~{cc},~{dirflag},~{fpsr},~{flags}"(float* %0, float %2, float %3, float* %4) nounwind
store i8 %5, i8* %success, align 1
- %6 = load i8* %success, align 1
+ %6 = load i8, i8* %success, align 1
%tobool = trunc i8 %6 to i1
ret i1 %tobool
}
@@ -52,16 +52,16 @@ entry:
store i8* %expected, i8** %expected.addr, align 8
%frombool = zext i1 %desired to i8
store i8 %frombool, i8* %desired.addr, align 1
- %0 = load i8** %expected.addr, align 8
- %1 = load i8** %expected.addr, align 8
- %2 = load i8* %1, align 1
+ %0 = load i8*, i8** %expected.addr, align 8
+ %1 = load i8*, i8** %expected.addr, align 8
+ %2 = load i8, i8* %1, align 1
%tobool = trunc i8 %2 to i1
- %3 = load i8* %desired.addr, align 1
+ %3 = load i8, i8* %desired.addr, align 1
%tobool1 = trunc i8 %3 to i1
- %4 = load i8** %p.addr, align 8
+ %4 = load i8*, i8** %p.addr, align 8
%5 = call i8 asm sideeffect "lock; cmpxchg $3, $4; mov $2, $1; sete $0", "={ax},=*rm,{ax},q,*m,~{memory},~{cc},~{dirflag},~{fpsr},~{flags}"(i8* %0, i1 %tobool, i1 %tobool1, i8* %4) nounwind
store i8 %5, i8* %success, align 1
- %6 = load i8* %success, align 1
+ %6 = load i8, i8* %success, align 1
%tobool2 = trunc i8 %6 to i1
ret i1 %tobool2
}
diff --git a/test/CodeGen/X86/catch.ll b/test/CodeGen/X86/catch.ll
index 6f7021360e1f..64e92783ac98 100644
--- a/test/CodeGen/X86/catch.ll
+++ b/test/CodeGen/X86/catch.ll
@@ -14,7 +14,7 @@ invoke.cont:
ret void
lpad:
%tmp14 = landingpad { i8*, i32 } personality i8* bitcast (void ()* @h to i8*)
- catch i8* getelementptr inbounds ([12 x i8]* @str, i64 0, i64 0)
+ catch i8* getelementptr inbounds ([12 x i8], [12 x i8]* @str, i64 0, i64 0)
ret void
}
declare void @g()
diff --git a/test/CodeGen/X86/cfi_enforcing.ll b/test/CodeGen/X86/cfi_enforcing.ll
deleted file mode 100644
index bcad8c168f24..000000000000
--- a/test/CodeGen/X86/cfi_enforcing.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; RUN: llc -mtriple=i386-unknown-linux-gnu -fcfi -cfi-enforcing <%s | FileCheck --check-prefix=X86 %s
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -fcfi -cfi-enforcing <%s | FileCheck --check-prefix=X86-64 %s
-
-define void @indirect_fun() unnamed_addr jumptable {
- ret void
-}
-
-define i32 @m(void ()* %fun) {
- call void ()* %fun()
-; CHECK: subl
-; X86-64: andq $8,
-; X86-64: leaq __llvm_jump_instr_table_0_1({{%[a-z0-9]+}}), [[REG:%[a-z0-9]+]]
-; X86-64-NOT: callq __llvm_cfi_pointer_warning
-; X86-64: callq *[[REG]]
-; X86: andl $8,
-; X86: leal __llvm_jump_instr_table_0_1({{%[a-z0-9]+}}), [[REG:%[a-z0-9]+]]
-; X86-NOT: calll __llvm_cfi_pointer_warning
-; X86: calll *[[REG]]
- ret i32 0
-}
-
-define void ()* @get_fun() {
- ret void ()* @indirect_fun
-}
-
-define i32 @main(i32 %argc, i8** %argv) {
- %f = call void ()* ()* @get_fun()
- %a = call i32 @m(void ()* %f)
- ret i32 %a
-}
-
-; CHECK: .align 8
-; CHECK: __llvm_jump_instr_table_0_1:
-; CHECK: jmp indirect_fun@PLT
diff --git a/test/CodeGen/X86/cfi_invoke.ll b/test/CodeGen/X86/cfi_invoke.ll
deleted file mode 100644
index dd0d42a59c3a..000000000000
--- a/test/CodeGen/X86/cfi_invoke.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: llc <%s -fcfi -cfi-type=sub | FileCheck %s
-; ModuleID = 'test.cc'
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-declare i32 @__gxx_personality_v0(...)
-
-@_ZTIPKc = external constant i8*
-@_ZTIi = external constant i8*
-
-define void @f() unnamed_addr jumptable {
- ret void
-}
-
-@a = global void ()* @f
-
-; Make sure invoke gets targeted as well as regular calls
-define void @_Z3foov(void ()* %f) uwtable ssp {
-; CHECK-LABEL: _Z3foov:
- entry:
- invoke void %f()
- to label %try.cont unwind label %lpad
-; CHECK: callq __llvm_cfi_pointer_warning
-; CHECK: callq *%rbx
-
- lpad:
- %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
- catch i8* bitcast (i8** @_ZTIi to i8*)
- filter [1 x i8*] [i8* bitcast (i8** @_ZTIPKc to i8*)]
- ret void
-
- try.cont:
- ret void
-}
-
diff --git a/test/CodeGen/X86/cfi_non_default_function.ll b/test/CodeGen/X86/cfi_non_default_function.ll
deleted file mode 100644
index 29774a1d4425..000000000000
--- a/test/CodeGen/X86/cfi_non_default_function.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: llc -fcfi -cfi-func-name=cfi_new_failure <%s | FileCheck %s
-
-target triple = "x86_64-unknown-linux-gnu"
-define void @indirect_fun() unnamed_addr jumptable {
- ret void
-}
-
-define i32 @m(void ()* %fun) {
-; CHECK-LABEL: @m
- call void ()* %fun()
-; CHECK: callq cfi_new_failure
- ret i32 0
-}
-
-define void ()* @get_fun() {
- ret void ()* @indirect_fun
-}
-
-define i32 @main(i32 %argc, i8** %argv) {
- %f = call void ()* ()* @get_fun()
- %a = call i32 @m(void ()* %f)
- ret i32 %a
-}
-
-; CHECK: .align 8
-; CHECK: __llvm_jump_instr_table_0_1:
-; CHECK: jmp indirect_fun@PLT
diff --git a/test/CodeGen/X86/cfi_simple_indirect_call.ll b/test/CodeGen/X86/cfi_simple_indirect_call.ll
deleted file mode 100644
index 0ee118d984ea..000000000000
--- a/test/CodeGen/X86/cfi_simple_indirect_call.ll
+++ /dev/null
@@ -1,43 +0,0 @@
-; RUN: llc -fcfi -cfi-type=sub <%s | FileCheck --check-prefix=SUB %s
-; RUN: llc -fcfi -cfi-type=add <%s | FileCheck --check-prefix=ADD %s
-; RUN: llc -fcfi -cfi-type=ror <%s | FileCheck --check-prefix=ROR %s
-
-target triple = "x86_64-unknown-linux-gnu"
-
-define void @indirect_fun() unnamed_addr jumptable {
- ret void
-}
-
-define i32 @m(void ()* %fun) {
- call void ()* %fun()
-; SUB: subl
-; SUB: andq $8
-; SUB-LABEL: leaq __llvm_jump_instr_table_0_1
-; SUB-LABEL: callq __llvm_cfi_pointer_warning
-
-; ROR: subq
-; ROR: rolq $61
-; ROR: testq
-; ROR-LABEL: callq __llvm_cfi_pointer_warning
-
-; ADD: andq $8
-; ADD-LABEL: leaq __llvm_jump_instr_table_0_1
-; ADD: cmpq
-; ADD-LABEL: callq __llvm_cfi_pointer_warning
-ret i32 0
-}
-
-define void ()* @get_fun() {
- ret void ()* @indirect_fun
-}
-
-define i32 @main(i32 %argc, i8** %argv) {
- %f = call void ()* ()* @get_fun()
- %a = call i32 @m(void ()* %f)
- ret i32 %a
-}
-; SUB: .text
-; SUB: .align 8
-; SUB-LABEL: .type __llvm_jump_instr_table_0_1,@function
-; SUB-LABEL:__llvm_jump_instr_table_0_1:
-; SUB-LABEL: jmp indirect_fun@PLT
diff --git a/test/CodeGen/X86/cfstring.ll b/test/CodeGen/X86/cfstring.ll
index cae432098907..3eeb8d2890cc 100644
--- a/test/CodeGen/X86/cfstring.ll
+++ b/test/CodeGen/X86/cfstring.ll
@@ -22,14 +22,14 @@
@isLogVisible = global i8 0, align 1
@__CFConstantStringClassReference = external global [0 x i32]
@.str3 = private unnamed_addr constant [1 x i8] zeroinitializer, align 1
-@_unnamed_cfstring_4 = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([1 x i8]* @.str3, i32 0, i32 0), i32 0 }, section "__DATA,__cfstring"
+@_unnamed_cfstring_4 = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32], [0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str3, i32 0, i32 0), i32 0 }, section "__DATA,__cfstring"
@null.array = weak_odr constant [1 x i8] zeroinitializer, align 1
define linkonce_odr void @bar() nounwind ssp align 2 {
entry:
%stack = alloca i8*, align 4
%call = call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* null, i8* null, %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_4 to %0*))
- store i8* getelementptr inbounds ([1 x i8]* @null.array, i32 0, i32 0), i8** %stack, align 4
+ store i8* getelementptr inbounds ([1 x i8], [1 x i8]* @null.array, i32 0, i32 0), i8** %stack, align 4
ret void
}
diff --git a/test/CodeGen/X86/chain_order.ll b/test/CodeGen/X86/chain_order.ll
index 72e6f78bdef7..442786a47cac 100644
--- a/test/CodeGen/X86/chain_order.ll
+++ b/test/CodeGen/X86/chain_order.ll
@@ -12,13 +12,13 @@
; A test from pifft (after SLP-vectorization) that fails when we drop the chain on newly merged loads.
define void @cftx020(double* nocapture %a) {
entry:
- %0 = load double* %a, align 8
- %arrayidx1 = getelementptr inbounds double* %a, i64 2
- %1 = load double* %arrayidx1, align 8
- %arrayidx2 = getelementptr inbounds double* %a, i64 1
- %2 = load double* %arrayidx2, align 8
- %arrayidx3 = getelementptr inbounds double* %a, i64 3
- %3 = load double* %arrayidx3, align 8
+ %0 = load double, double* %a, align 8
+ %arrayidx1 = getelementptr inbounds double, double* %a, i64 2
+ %1 = load double, double* %arrayidx1, align 8
+ %arrayidx2 = getelementptr inbounds double, double* %a, i64 1
+ %2 = load double, double* %arrayidx2, align 8
+ %arrayidx3 = getelementptr inbounds double, double* %a, i64 3
+ %3 = load double, double* %arrayidx3, align 8
%4 = insertelement <2 x double> undef, double %0, i32 0
%5 = insertelement <2 x double> %4, double %3, i32 1
%6 = insertelement <2 x double> undef, double %1, i32 0
diff --git a/test/CodeGen/X86/change-compare-stride-1.ll b/test/CodeGen/X86/change-compare-stride-1.ll
index b45b404c0f3c..c5480ba2b490 100644
--- a/test/CodeGen/X86/change-compare-stride-1.ll
+++ b/test/CodeGen/X86/change-compare-stride-1.ll
@@ -31,7 +31,7 @@ bb2.outer: ; preds = %bb4, %bb4.thread
bb2: ; preds = %bb2, %bb2.outer
%indvar = phi i64 [ 0, %bb2.outer ], [ %indvar.next, %bb2 ] ; <i64> [#uses=3]
%indvar16 = trunc i64 %indvar to i16 ; <i16> [#uses=1]
- %ctg2 = getelementptr i8* %out, i64 %tmp21 ; <i8*> [#uses=1]
+ %ctg2 = getelementptr i8, i8* %out, i64 %tmp21 ; <i8*> [#uses=1]
%tmp22 = ptrtoint i8* %ctg2 to i64 ; <i64> [#uses=1]
%tmp24 = sub i64 %tmp22, %indvar ; <i64> [#uses=1]
%out_addr.0.reg2mem.0 = inttoptr i64 %tmp24 to i8* ; <i8*> [#uses=1]
@@ -40,39 +40,39 @@ bb2: ; preds = %bb2, %bb2.outer
%3 = add i32 %1, %2 ; <i32> [#uses=9]
%4 = add i32 %3, -481 ; <i32> [#uses=1]
%5 = zext i32 %4 to i64 ; <i64> [#uses=1]
- %6 = getelementptr i8* %in, i64 %5 ; <i8*> [#uses=1]
- %7 = load i8* %6, align 1 ; <i8> [#uses=1]
+ %6 = getelementptr i8, i8* %in, i64 %5 ; <i8*> [#uses=1]
+ %7 = load i8, i8* %6, align 1 ; <i8> [#uses=1]
%8 = add i32 %3, -480 ; <i32> [#uses=1]
%9 = zext i32 %8 to i64 ; <i64> [#uses=1]
- %10 = getelementptr i8* %in, i64 %9 ; <i8*> [#uses=1]
- %11 = load i8* %10, align 1 ; <i8> [#uses=1]
+ %10 = getelementptr i8, i8* %in, i64 %9 ; <i8*> [#uses=1]
+ %11 = load i8, i8* %10, align 1 ; <i8> [#uses=1]
%12 = add i32 %3, -479 ; <i32> [#uses=1]
%13 = zext i32 %12 to i64 ; <i64> [#uses=1]
- %14 = getelementptr i8* %in, i64 %13 ; <i8*> [#uses=1]
- %15 = load i8* %14, align 1 ; <i8> [#uses=1]
+ %14 = getelementptr i8, i8* %in, i64 %13 ; <i8*> [#uses=1]
+ %15 = load i8, i8* %14, align 1 ; <i8> [#uses=1]
%16 = add i32 %3, -1 ; <i32> [#uses=1]
%17 = zext i32 %16 to i64 ; <i64> [#uses=1]
- %18 = getelementptr i8* %in, i64 %17 ; <i8*> [#uses=1]
- %19 = load i8* %18, align 1 ; <i8> [#uses=1]
+ %18 = getelementptr i8, i8* %in, i64 %17 ; <i8*> [#uses=1]
+ %19 = load i8, i8* %18, align 1 ; <i8> [#uses=1]
%20 = zext i32 %3 to i64 ; <i64> [#uses=1]
- %21 = getelementptr i8* %in, i64 %20 ; <i8*> [#uses=1]
- %22 = load i8* %21, align 1 ; <i8> [#uses=1]
+ %21 = getelementptr i8, i8* %in, i64 %20 ; <i8*> [#uses=1]
+ %22 = load i8, i8* %21, align 1 ; <i8> [#uses=1]
%23 = add i32 %3, 1 ; <i32> [#uses=1]
%24 = zext i32 %23 to i64 ; <i64> [#uses=1]
- %25 = getelementptr i8* %in, i64 %24 ; <i8*> [#uses=1]
- %26 = load i8* %25, align 1 ; <i8> [#uses=1]
+ %25 = getelementptr i8, i8* %in, i64 %24 ; <i8*> [#uses=1]
+ %26 = load i8, i8* %25, align 1 ; <i8> [#uses=1]
%27 = add i32 %3, 481 ; <i32> [#uses=1]
%28 = zext i32 %27 to i64 ; <i64> [#uses=1]
- %29 = getelementptr i8* %in, i64 %28 ; <i8*> [#uses=1]
- %30 = load i8* %29, align 1 ; <i8> [#uses=1]
+ %29 = getelementptr i8, i8* %in, i64 %28 ; <i8*> [#uses=1]
+ %30 = load i8, i8* %29, align 1 ; <i8> [#uses=1]
%31 = add i32 %3, 480 ; <i32> [#uses=1]
%32 = zext i32 %31 to i64 ; <i64> [#uses=1]
- %33 = getelementptr i8* %in, i64 %32 ; <i8*> [#uses=1]
- %34 = load i8* %33, align 1 ; <i8> [#uses=1]
+ %33 = getelementptr i8, i8* %in, i64 %32 ; <i8*> [#uses=1]
+ %34 = load i8, i8* %33, align 1 ; <i8> [#uses=1]
%35 = add i32 %3, 479 ; <i32> [#uses=1]
%36 = zext i32 %35 to i64 ; <i64> [#uses=1]
- %37 = getelementptr i8* %in, i64 %36 ; <i8*> [#uses=1]
- %38 = load i8* %37, align 1 ; <i8> [#uses=1]
+ %37 = getelementptr i8, i8* %in, i64 %36 ; <i8*> [#uses=1]
+ %38 = load i8, i8* %37, align 1 ; <i8> [#uses=1]
%39 = add i8 %11, %7 ; <i8> [#uses=1]
%40 = add i8 %39, %15 ; <i8> [#uses=1]
%41 = add i8 %40, %19 ; <i8> [#uses=1]
diff --git a/test/CodeGen/X86/clobber-fi0.ll b/test/CodeGen/X86/clobber-fi0.ll
index 4876c351a413..02f1a1616db2 100644
--- a/test/CodeGen/X86/clobber-fi0.ll
+++ b/test/CodeGen/X86/clobber-fi0.ll
@@ -20,17 +20,17 @@ bb:
br label %bb4
bb4: ; preds = %bb4, %bb
- %tmp6 = load i32* %tmp2, align 4 ; [#uses=1 type=i32]
+ %tmp6 = load i32, i32* %tmp2, align 4 ; [#uses=1 type=i32]
%tmp7 = add i32 %tmp6, -1 ; [#uses=2 type=i32]
store i32 %tmp7, i32* %tmp2, align 4
%tmp8 = icmp eq i32 %tmp7, 0 ; [#uses=1 type=i1]
- %tmp9 = load i32* %tmp ; [#uses=1 type=i32]
+ %tmp9 = load i32, i32* %tmp ; [#uses=1 type=i32]
%tmp10 = add i32 %tmp9, -1 ; [#uses=1 type=i32]
store i32 %tmp10, i32* %tmp3
br i1 %tmp8, label %bb11, label %bb4
bb11: ; preds = %bb4
- %tmp12 = load i32* %tmp, align 4 ; [#uses=1 type=i32]
+ %tmp12 = load i32, i32* %tmp, align 4 ; [#uses=1 type=i32]
ret i32 %tmp12
}
diff --git a/test/CodeGen/X86/cmov-double.ll b/test/CodeGen/X86/cmov-double.ll
new file mode 100644
index 000000000000..994a027596c6
--- /dev/null
+++ b/test/CodeGen/X86/cmov-double.ll
@@ -0,0 +1,52 @@
+; RUN: llc -o - %s | FileCheck %s
+target triple = "x86_64-unknown-unknown"
+
+; select with and i1/or i1 condition should be implemented as a series of 2
+; cmovs, not by producing two conditions and using and on them.
+
+define i32 @select_and(i32 %a0, i32 %a1, float %a2, float %a3, i32 %a4, i32 %a5) {
+; CHECK-LABEL: select_and
+; CHECK-NOT: set
+; CHECK-NOT: and[lb]
+; CHECK-NOT: test
+; CHECK: cmov
+; CHECK: cmov
+ %cmp0 = icmp ult i32 %a0, %a1
+ %cmp1 = fcmp olt float %a2, %a3
+ %and = and i1 %cmp0, %cmp1
+ %res = select i1 %and, i32 %a4, i32 %a5
+ ret i32 %res
+}
+
+define i32 @select_or(i32 %a0, i32 %a1, float %a2, float %a3, i32 %a4, i32 %a5) {
+; select with and i1 condition should be implemented as a series of 2 cmovs, not
+; by producing two conditions and using and on them.
+; CHECK-LABEL: select_or
+; CHECK-NOT: set
+; CHECK-NOT: or[lb]
+; CHECK-NOT: test
+; CHECK: cmov
+; CHECK: cmov
+ %cmp0 = icmp ult i32 %a0, %a1
+ %cmp1 = fcmp olt float %a2, %a3
+ %and = or i1 %cmp0, %cmp1
+ %res = select i1 %and, i32 %a4, i32 %a5
+ ret i32 %res
+}
+
+; If one of the conditions is materialized as a 0/1 value anyway, then the
+; sequence of 2 cmovs should not be used.
+
+@var32 = global i32 0
+define i32 @select_noopt(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+; CHECK-LABEL: select_noopt
+; CHECK: cmov
+; CHECK-NOT: cmov
+ %cmp0 = icmp ult i32 %a0, %a1
+ %cmp1 = icmp ult i32 %a1, %a2
+ %or = or i1 %cmp0, %cmp1
+ %zero_one = zext i1 %or to i32
+ store volatile i32 %zero_one, i32* @var32
+ %res = select i1 %or, i32 %a3, i32 %a4
+ ret i32 %res
+}
diff --git a/test/CodeGen/X86/cmov-into-branch.ll b/test/CodeGen/X86/cmov-into-branch.ll
index cad8dd307b34..909440800a56 100644
--- a/test/CodeGen/X86/cmov-into-branch.ll
+++ b/test/CodeGen/X86/cmov-into-branch.ll
@@ -2,7 +2,7 @@
; cmp with single-use load, should not form cmov.
define i32 @test1(double %a, double* nocapture %b, i32 %x, i32 %y) {
- %load = load double* %b, align 8
+ %load = load double, double* %b, align 8
%cmp = fcmp olt double %load, %a
%cond = select i1 %cmp, i32 %x, i32 %y
ret i32 %cond
@@ -25,7 +25,7 @@ define i32 @test2(double %a, double %b, i32 %x, i32 %y) {
; Multiple uses of %a, should not form cmov.
define i32 @test3(i32 %a, i32* nocapture %b, i32 %x) {
- %load = load i32* %b, align 4
+ %load = load i32, i32* %b, align 4
%cmp = icmp ult i32 %load, %a
%cond = select i1 %cmp, i32 %a, i32 %x
ret i32 %cond
@@ -38,7 +38,7 @@ define i32 @test3(i32 %a, i32* nocapture %b, i32 %x) {
; Multiple uses of the load.
define i32 @test4(i32 %a, i32* nocapture %b, i32 %x, i32 %y) {
- %load = load i32* %b, align 4
+ %load = load i32, i32* %b, align 4
%cmp = icmp ult i32 %load, %a
%cond = select i1 %cmp, i32 %x, i32 %y
%add = add i32 %cond, %load
@@ -50,7 +50,7 @@ define i32 @test4(i32 %a, i32* nocapture %b, i32 %x, i32 %y) {
; Multiple uses of the cmp.
define i32 @test5(i32 %a, i32* nocapture %b, i32 %x, i32 %y) {
- %load = load i32* %b, align 4
+ %load = load i32, i32* %b, align 4
%cmp = icmp ult i32 %load, %a
%cmp1 = icmp ugt i32 %load, %a
%cond = select i1 %cmp1, i32 %a, i32 %y
diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll
index 355c6b4165b9..f2f36b15d0c5 100644
--- a/test/CodeGen/X86/cmov.ll
+++ b/test/CodeGen/X86/cmov.ll
@@ -12,7 +12,7 @@ entry:
%0 = lshr i32 %x, %n ; <i32> [#uses=1]
%1 = and i32 %0, 1 ; <i32> [#uses=1]
%toBool = icmp eq i32 %1, 0 ; <i1> [#uses=1]
- %v = load i32* %vp
+ %v = load i32, i32* %vp
%.0 = select i1 %toBool, i32 %v, i32 12 ; <i32> [#uses=1]
ret i32 %.0
}
@@ -27,7 +27,7 @@ entry:
%0 = lshr i32 %x, %n ; <i32> [#uses=1]
%1 = and i32 %0, 1 ; <i32> [#uses=1]
%toBool = icmp eq i32 %1, 0 ; <i1> [#uses=1]
- %v = load i32* %vp
+ %v = load i32, i32* %vp
%.0 = select i1 %toBool, i32 12, i32 %v ; <i32> [#uses=1]
ret i32 %.0
}
@@ -71,7 +71,7 @@ define void @test3(i64 %a, i64 %b, i1 %p) nounwind {
define i32 @test4() nounwind {
entry:
- %0 = load i8* @g_3, align 1 ; <i8> [#uses=2]
+ %0 = load i8, i8* @g_3, align 1 ; <i8> [#uses=2]
%1 = sext i8 %0 to i32 ; <i32> [#uses=1]
%.lobit.i = lshr i8 %0, 7 ; <i8> [#uses=1]
%tmp.i = zext i8 %.lobit.i to i32 ; <i32> [#uses=1]
@@ -79,12 +79,12 @@ entry:
%iftmp.17.0.i.i = ashr i32 %1, %tmp.not.i ; <i32> [#uses=1]
%retval56.i.i = trunc i32 %iftmp.17.0.i.i to i8 ; <i8> [#uses=1]
%2 = icmp eq i8 %retval56.i.i, 0 ; <i1> [#uses=2]
- %g_96.promoted.i = load i8* @g_96 ; <i8> [#uses=3]
+ %g_96.promoted.i = load i8, i8* @g_96 ; <i8> [#uses=3]
%3 = icmp eq i8 %g_96.promoted.i, 0 ; <i1> [#uses=2]
br i1 %3, label %func_4.exit.i, label %bb.i.i.i
bb.i.i.i: ; preds = %entry
- %4 = load volatile i8* @g_100, align 1 ; <i8> [#uses=0]
+ %4 = load volatile i8, i8* @g_100, align 1 ; <i8> [#uses=0]
br label %func_4.exit.i
; CHECK-LABEL: test4:
@@ -101,14 +101,14 @@ func_4.exit.i: ; preds = %bb.i.i.i, %entry
br i1 %brmerge.i, label %func_1.exit, label %bb.i.i
bb.i.i: ; preds = %func_4.exit.i
- %5 = load volatile i8* @g_100, align 1 ; <i8> [#uses=0]
+ %5 = load volatile i8, i8* @g_100, align 1 ; <i8> [#uses=0]
br label %func_1.exit
func_1.exit: ; preds = %bb.i.i, %func_4.exit.i
%g_96.tmp.0.i = phi i8 [ %g_96.promoted.i, %bb.i.i ], [ %.mux.i, %func_4.exit.i ] ; <i8> [#uses=2]
store i8 %g_96.tmp.0.i, i8* @g_96
%6 = zext i8 %g_96.tmp.0.i to i32 ; <i32> [#uses=1]
- %7 = tail call i32 (i8*, ...)* @printf(i8* noalias getelementptr ([15 x i8]* @_2E_str, i64 0, i64 0), i32 %6) nounwind ; <i32> [#uses=0]
+ %7 = tail call i32 (i8*, ...) @printf(i8* noalias getelementptr ([15 x i8], [15 x i8]* @_2E_str, i64 0, i64 0), i32 %6) nounwind ; <i32> [#uses=0]
ret i32 0
}
@@ -125,7 +125,7 @@ entry:
; CHECK: orl $-2, %eax
; CHECK: ret
- %0 = load i32* %P, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* %P, align 4 ; <i32> [#uses=1]
%1 = icmp sgt i32 %0, 41 ; <i1> [#uses=1]
%iftmp.0.0 = select i1 %1, i32 -1, i32 -2 ; <i32> [#uses=1]
ret i32 %iftmp.0.0
@@ -138,7 +138,7 @@ entry:
; CHECK: movzbl %al, %eax
; CHECK: leal 4(%rax,%rax,8), %eax
; CHECK: ret
- %0 = load i32* %P, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* %P, align 4 ; <i32> [#uses=1]
%1 = icmp sgt i32 %0, 41 ; <i1> [#uses=1]
%iftmp.0.0 = select i1 %1, i32 4, i32 13 ; <i32> [#uses=1]
ret i32 %iftmp.0.0
diff --git a/test/CodeGen/X86/cmovcmov.ll b/test/CodeGen/X86/cmovcmov.ll
new file mode 100644
index 000000000000..d3d9748d6530
--- /dev/null
+++ b/test/CodeGen/X86/cmovcmov.ll
@@ -0,0 +1,226 @@
+; RUN: llc < %s -asm-verbose=false -mtriple=x86_64-unknown-linux | FileCheck %s --check-prefix=CHECK --check-prefix=CMOV
+; RUN: llc < %s -asm-verbose=false -mtriple=i686-unknown-linux | FileCheck %s --check-prefix=CHECK --check-prefix=NOCMOV
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Test 2xCMOV patterns exposed after legalization.
+; One way to do that is with (select (fcmp une/oeq)), which gets
+; legalized to setp/setne.
+
+; CHECK-LABEL: test_select_fcmp_oeq_i32:
+
+; CMOV-NEXT: ucomiss %xmm1, %xmm0
+; CMOV-NEXT: cmovnel %esi, %edi
+; CMOV-NEXT: cmovpl %esi, %edi
+; CMOV-NEXT: movl %edi, %eax
+; CMOV-NEXT: retq
+
+; NOCMOV-NEXT: flds 8(%esp)
+; NOCMOV-NEXT: flds 4(%esp)
+; NOCMOV-NEXT: fucompp
+; NOCMOV-NEXT: fnstsw %ax
+; NOCMOV-NEXT: sahf
+; NOCMOV-NEXT: leal 16(%esp), %eax
+; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; NOCMOV-NEXT: jp [[TBB]]
+; NOCMOV-NEXT: leal 12(%esp), %eax
+; NOCMOV-NEXT:[[TBB]]:
+; NOCMOV-NEXT: movl (%eax), %eax
+; NOCMOV-NEXT: retl
+define i32 @test_select_fcmp_oeq_i32(float %a, float %b, i32 %c, i32 %d) #0 {
+entry:
+ %cmp = fcmp oeq float %a, %b
+ %r = select i1 %cmp, i32 %c, i32 %d
+ ret i32 %r
+}
+
+; CHECK-LABEL: test_select_fcmp_oeq_i64:
+
+; CMOV-NEXT: ucomiss %xmm1, %xmm0
+; CMOV-NEXT: cmovneq %rsi, %rdi
+; CMOV-NEXT: cmovpq %rsi, %rdi
+; CMOV-NEXT: movq %rdi, %rax
+; CMOV-NEXT: retq
+
+; NOCMOV-NEXT: flds 8(%esp)
+; NOCMOV-NEXT: flds 4(%esp)
+; NOCMOV-NEXT: fucompp
+; NOCMOV-NEXT: fnstsw %ax
+; NOCMOV-NEXT: sahf
+; NOCMOV-NEXT: leal 20(%esp), %ecx
+; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; NOCMOV-NEXT: jp [[TBB]]
+; NOCMOV-NEXT: leal 12(%esp), %ecx
+; NOCMOV-NEXT: [[TBB]]:
+; NOCMOV-NEXT: movl (%ecx), %eax
+; NOCMOV-NEXT: orl $4, %ecx
+; NOCMOV-NEXT: movl (%ecx), %edx
+; NOCMOV-NEXT: retl
+define i64 @test_select_fcmp_oeq_i64(float %a, float %b, i64 %c, i64 %d) #0 {
+entry:
+ %cmp = fcmp oeq float %a, %b
+ %r = select i1 %cmp, i64 %c, i64 %d
+ ret i64 %r
+}
+
+; CHECK-LABEL: test_select_fcmp_une_i64:
+
+; CMOV-NEXT: ucomiss %xmm1, %xmm0
+; CMOV-NEXT: cmovneq %rdi, %rsi
+; CMOV-NEXT: cmovpq %rdi, %rsi
+; CMOV-NEXT: movq %rsi, %rax
+; CMOV-NEXT: retq
+
+; NOCMOV-NEXT: flds 8(%esp)
+; NOCMOV-NEXT: flds 4(%esp)
+; NOCMOV-NEXT: fucompp
+; NOCMOV-NEXT: fnstsw %ax
+; NOCMOV-NEXT: sahf
+; NOCMOV-NEXT: leal 12(%esp), %ecx
+; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; NOCMOV-NEXT: jp [[TBB]]
+; NOCMOV-NEXT: leal 20(%esp), %ecx
+; NOCMOV-NEXT: [[TBB]]:
+; NOCMOV-NEXT: movl (%ecx), %eax
+; NOCMOV-NEXT: orl $4, %ecx
+; NOCMOV-NEXT: movl (%ecx), %edx
+; NOCMOV-NEXT: retl
+define i64 @test_select_fcmp_une_i64(float %a, float %b, i64 %c, i64 %d) #0 {
+entry:
+ %cmp = fcmp une float %a, %b
+ %r = select i1 %cmp, i64 %c, i64 %d
+ ret i64 %r
+}
+
+; CHECK-LABEL: test_select_fcmp_oeq_f64:
+
+; CMOV-NEXT: ucomiss %xmm1, %xmm0
+; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; CMOV-NEXT: jp [[TBB]]
+; CMOV-NEXT: movaps %xmm2, %xmm3
+; CMOV-NEXT: [[TBB]]:
+; CMOV-NEXT: movaps %xmm3, %xmm0
+; CMOV-NEXT: retq
+
+; NOCMOV-NEXT: flds 8(%esp)
+; NOCMOV-NEXT: flds 4(%esp)
+; NOCMOV-NEXT: fucompp
+; NOCMOV-NEXT: fnstsw %ax
+; NOCMOV-NEXT: sahf
+; NOCMOV-NEXT: leal 20(%esp), %eax
+; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; NOCMOV-NEXT: jp [[TBB]]
+; NOCMOV-NEXT: leal 12(%esp), %eax
+; NOCMOV-NEXT: [[TBB]]:
+; NOCMOV-NEXT: fldl (%eax)
+; NOCMOV-NEXT: retl
+define double @test_select_fcmp_oeq_f64(float %a, float %b, double %c, double %d) #0 {
+entry:
+ %cmp = fcmp oeq float %a, %b
+ %r = select i1 %cmp, double %c, double %d
+ ret double %r
+}
+
+; CHECK-LABEL: test_select_fcmp_oeq_v4i32:
+
+; CMOV-NEXT: ucomiss %xmm1, %xmm0
+; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; CMOV-NEXT: jp [[TBB]]
+; CMOV-NEXT: movaps %xmm2, %xmm3
+; CMOV-NEXT: [[TBB]]:
+; CMOV-NEXT: movaps %xmm3, %xmm0
+; CMOV-NEXT: retq
+
+; NOCMOV-NEXT: pushl %edi
+; NOCMOV-NEXT: pushl %esi
+; NOCMOV-NEXT: flds 20(%esp)
+; NOCMOV-NEXT: flds 16(%esp)
+; NOCMOV-NEXT: fucompp
+; NOCMOV-NEXT: fnstsw %ax
+; NOCMOV-NEXT: sahf
+; NOCMOV-NEXT: leal 40(%esp), %eax
+; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; NOCMOV-NEXT: jp [[TBB]]
+; NOCMOV-NEXT: leal 24(%esp), %eax
+; NOCMOV-NEXT: [[TBB]]:
+; NOCMOV-NEXT: movl (%eax), %ecx
+; NOCMOV-NEXT: leal 44(%esp), %edx
+; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; NOCMOV-NEXT: jp [[TBB]]
+; NOCMOV-NEXT: leal 28(%esp), %edx
+; NOCMOV-NEXT: [[TBB]]:
+; NOCMOV-NEXT: movl 12(%esp), %eax
+; NOCMOV-NEXT: movl (%edx), %edx
+; NOCMOV-NEXT: leal 48(%esp), %esi
+; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; NOCMOV-NEXT: jp [[TBB]]
+; NOCMOV-NEXT: leal 32(%esp), %esi
+; NOCMOV-NEXT: [[TBB]]:
+; NOCMOV-NEXT: movl (%esi), %esi
+; NOCMOV-NEXT: leal 52(%esp), %edi
+; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; NOCMOV-NEXT: jp [[TBB]]
+; NOCMOV-NEXT: leal 36(%esp), %edi
+; NOCMOV-NEXT: [[TBB]]:
+; NOCMOV-NEXT: movl (%edi), %edi
+; NOCMOV-NEXT: movl %edi, 12(%eax)
+; NOCMOV-NEXT: movl %esi, 8(%eax)
+; NOCMOV-NEXT: movl %edx, 4(%eax)
+; NOCMOV-NEXT: movl %ecx, (%eax)
+; NOCMOV-NEXT: popl %esi
+; NOCMOV-NEXT: popl %edi
+; NOCMOV-NEXT: retl $4
+define <4 x i32> @test_select_fcmp_oeq_v4i32(float %a, float %b, <4 x i32> %c, <4 x i32> %d) #0 {
+entry:
+ %cmp = fcmp oeq float %a, %b
+ %r = select i1 %cmp, <4 x i32> %c, <4 x i32> %d
+ ret <4 x i32> %r
+}
+
+; Also make sure we catch the original code-sequence of interest:
+
+; CMOV: [[ONE_F32_LCPI:.LCPI.*]]:
+; CMOV-NEXT: .long 1065353216
+
+; CHECK-LABEL: test_zext_fcmp_une:
+; CMOV-NEXT: ucomiss %xmm1, %xmm0
+; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0
+; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; CMOV-NEXT: jp [[TBB]]
+; CMOV-NEXT: xorps %xmm0, %xmm0
+; CMOV-NEXT: [[TBB]]:
+; CMOV-NEXT: retq
+
+; NOCMOV: jne
+; NOCMOV-NEXT: jp
+define float @test_zext_fcmp_une(float %a, float %b) #0 {
+entry:
+ %cmp = fcmp une float %a, %b
+ %conv1 = zext i1 %cmp to i32
+ %conv2 = sitofp i32 %conv1 to float
+ ret float %conv2
+}
+
+; CMOV: [[ONE_F32_LCPI:.LCPI.*]]:
+; CMOV-NEXT: .long 1065353216
+
+; CHECK-LABEL: test_zext_fcmp_oeq:
+; CMOV-NEXT: ucomiss %xmm1, %xmm0
+; CMOV-NEXT: xorps %xmm0, %xmm0
+; CMOV-NEXT: jne [[TBB:.LBB[0-9_]+]]
+; CMOV-NEXT: jp [[TBB]]
+; CMOV-NEXT: movss [[ONE_F32_LCPI]](%rip), %xmm0
+; CMOV-NEXT: [[TBB]]:
+; CMOV-NEXT: retq
+
+; NOCMOV: jne
+; NOCMOV-NEXT: jp
+define float @test_zext_fcmp_oeq(float %a, float %b) #0 {
+entry:
+ %cmp = fcmp oeq float %a, %b
+ %conv1 = zext i1 %cmp to i32
+ %conv2 = sitofp i32 %conv1 to float
+ ret float %conv2
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/X86/cmp-fast-isel.ll b/test/CodeGen/X86/cmp-fast-isel.ll
new file mode 100644
index 000000000000..39738fae12d1
--- /dev/null
+++ b/test/CodeGen/X86/cmp-fast-isel.ll
@@ -0,0 +1,45 @@
+; RUN: llc -mtriple=x86_64-linux -fast-isel -show-mc-encoding < %s | FileCheck %s
+
+; pr22854
+
+define i32 @f1(i16 %x) {
+; CHECK-LABEL: f1:
+; CHECK: cmpw $42, %di # encoding: [0x66,0x83,0xff,0x2a]
+bb0:
+ %cmp = icmp ne i16 %x, 42
+ br i1 %cmp, label %bb3, label %bb7
+
+bb3:
+ ret i32 1
+
+bb7:
+ ret i32 2
+}
+
+define i32 @f2(i32 %x) {
+; CHECK-LABEL: f2:
+; CHECK: cmpl $42, %edi # encoding: [0x83,0xff,0x2a]
+bb0:
+ %cmp = icmp ne i32 %x, 42
+ br i1 %cmp, label %bb3, label %bb7
+
+bb3:
+ ret i32 1
+
+bb7:
+ ret i32 2
+}
+
+define i32 @f3(i64 %x) {
+; CHECK-LABEL: f3:
+; CHECK: cmpq $42, %rdi # encoding: [0x48,0x83,0xff,0x2a]
+bb0:
+ %cmp = icmp ne i64 %x, 42
+ br i1 %cmp, label %bb3, label %bb7
+
+bb3:
+ ret i32 1
+
+bb7:
+ ret i32 2
+}
diff --git a/test/CodeGen/X86/cmp.ll b/test/CodeGen/X86/cmp.ll
index 149d53759fe2..584179aacbc9 100644
--- a/test/CodeGen/X86/cmp.ll
+++ b/test/CodeGen/X86/cmp.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -show-mc-encoding | FileCheck %s
define i32 @test1(i32 %X, i32* %y) nounwind {
- %tmp = load i32* %y ; <i32> [#uses=1]
+ %tmp = load i32, i32* %y ; <i32> [#uses=1]
%tmp.upgrd.1 = icmp eq i32 %tmp, 0 ; <i1> [#uses=1]
br i1 %tmp.upgrd.1, label %ReturnBlock, label %cond_true
@@ -15,7 +15,7 @@ ReturnBlock: ; preds = %0
}
define i32 @test2(i32 %X, i32* %y) nounwind {
- %tmp = load i32* %y ; <i32> [#uses=1]
+ %tmp = load i32, i32* %y ; <i32> [#uses=1]
%tmp1 = shl i32 %tmp, 3 ; <i32> [#uses=1]
%tmp1.upgrd.2 = icmp eq i32 %tmp1, 0 ; <i1> [#uses=1]
br i1 %tmp1.upgrd.2, label %ReturnBlock, label %cond_true
@@ -30,7 +30,7 @@ ReturnBlock: ; preds = %0
}
define i8 @test2b(i8 %X, i8* %y) nounwind {
- %tmp = load i8* %y ; <i8> [#uses=1]
+ %tmp = load i8, i8* %y ; <i8> [#uses=1]
%tmp1 = shl i8 %tmp, 3 ; <i8> [#uses=1]
%tmp1.upgrd.2 = icmp eq i8 %tmp1, 0 ; <i1> [#uses=1]
br i1 %tmp1.upgrd.2, label %ReturnBlock, label %cond_true
@@ -75,7 +75,7 @@ define i32 @test5(double %A) nounwind {
br i1 %bothcond, label %bb8, label %bb12
bb8:; preds = %entry
- %tmp9 = tail call i32 (...)* @foo( ) nounwind ; <i32> [#uses=1]
+ %tmp9 = tail call i32 (...) @foo( ) nounwind ; <i32> [#uses=1]
ret i32 %tmp9
bb12:; preds = %entry
@@ -89,8 +89,8 @@ declare i32 @foo(...)
define i32 @test6() nounwind align 2 {
%A = alloca {i64, i64}, align 8
- %B = getelementptr inbounds {i64, i64}* %A, i64 0, i32 1
- %C = load i64* %B
+ %B = getelementptr inbounds {i64, i64}, {i64, i64}* %A, i64 0, i32 1
+ %C = load i64, i64* %B
%D = icmp eq i64 %C, 0
br i1 %D, label %T, label %F
T:
diff --git a/test/CodeGen/X86/cmpxchg-clobber-flags.ll b/test/CodeGen/X86/cmpxchg-clobber-flags.ll
index b7995dbdf259..61123930887b 100644
--- a/test/CodeGen/X86/cmpxchg-clobber-flags.ll
+++ b/test/CodeGen/X86/cmpxchg-clobber-flags.ll
@@ -42,7 +42,7 @@ loop_start:
br label %while.condthread-pre-split.i
while.condthread-pre-split.i:
- %.pr.i = load i32* %p, align 4
+ %.pr.i = load i32, i32* %p, align 4
br label %while.cond.i
while.cond.i:
diff --git a/test/CodeGen/X86/cmpxchg-i1.ll b/test/CodeGen/X86/cmpxchg-i1.ll
index a21ab593b078..5f5869f78bba 100644
--- a/test/CodeGen/X86/cmpxchg-i1.ll
+++ b/test/CodeGen/X86/cmpxchg-i1.ll
@@ -68,7 +68,7 @@ define i32 @cmpxchg_use_eflags_and_val(i32* %addr, i32 %offset) {
; Result already in %eax
; CHECK: retq
entry:
- %init = load atomic i32* %addr seq_cst, align 4
+ %init = load atomic i32, i32* %addr seq_cst, align 4
br label %loop
loop:
diff --git a/test/CodeGen/X86/cmpxchg-i128-i1.ll b/test/CodeGen/X86/cmpxchg-i128-i1.ll
index 4dd30013ecab..278e6a4ed75e 100644
--- a/test/CodeGen/X86/cmpxchg-i128-i1.ll
+++ b/test/CodeGen/X86/cmpxchg-i128-i1.ll
@@ -62,7 +62,7 @@ define i128 @cmpxchg_use_eflags_and_val(i128* %addr, i128 %offset) {
; CHECK-NOT: cmpq
; CHECK: jne
entry:
- %init = load atomic i128* %addr seq_cst, align 16
+ %init = load atomic i128, i128* %addr seq_cst, align 16
br label %loop
loop:
diff --git a/test/CodeGen/X86/cmpxchg16b.ll b/test/CodeGen/X86/cmpxchg16b.ll
index 1d5bb85f8d20..d514c0a35f5b 100644
--- a/test/CodeGen/X86/cmpxchg16b.ll
+++ b/test/CodeGen/X86/cmpxchg16b.ll
@@ -4,8 +4,7 @@
define void @t1(i128* nocapture %p) nounwind ssp {
entry:
; CHECK: movl $1, %ebx
-; CHECK: lock
-; CHECK-NEXT: cmpxchg16b
+; CHECK: lock cmpxchg16b
%r = cmpxchg i128* %p, i128 0, i128 1 seq_cst seq_cst
ret void
}
diff --git a/test/CodeGen/X86/coalesce-esp.ll b/test/CodeGen/X86/coalesce-esp.ll
index 400437993879..e0257e68f0f2 100644
--- a/test/CodeGen/X86/coalesce-esp.ll
+++ b/test/CodeGen/X86/coalesce-esp.ll
@@ -18,9 +18,9 @@ entry:
bb4: ; preds = %bb7.backedge, %entry
%indvar = phi i32 [ %indvar.next, %bb7.backedge ], [ 0, %entry ] ; <i32> [#uses=2]
%scevgep24.sum = sub i32 undef, %indvar ; <i32> [#uses=2]
- %scevgep25 = getelementptr i32* %0, i32 %scevgep24.sum ; <i32*> [#uses=1]
- %scevgep27 = getelementptr i32* undef, i32 %scevgep24.sum ; <i32*> [#uses=1]
- %1 = load i32* %scevgep27, align 4 ; <i32> [#uses=0]
+ %scevgep25 = getelementptr i32, i32* %0, i32 %scevgep24.sum ; <i32*> [#uses=1]
+ %scevgep27 = getelementptr i32, i32* undef, i32 %scevgep24.sum ; <i32*> [#uses=1]
+ %1 = load i32, i32* %scevgep27, align 4 ; <i32> [#uses=0]
br i1 undef, label %bb7.backedge, label %bb5
bb5: ; preds = %bb4
diff --git a/test/CodeGen/X86/coalesce-implicitdef.ll b/test/CodeGen/X86/coalesce-implicitdef.ll
index 9be045271d8d..a0766f99496c 100644
--- a/test/CodeGen/X86/coalesce-implicitdef.ll
+++ b/test/CodeGen/X86/coalesce-implicitdef.ll
@@ -71,7 +71,7 @@ for.inc27.backedge: ; preds = %while.end, %if.then
br i1 %tobool17, label %for.inc27.if.end30.loopexit56_crit_edge, label %while.condthread-pre-split
if.then22: ; preds = %while.end
- %1 = load i16* %p2.1, align 2
+ %1 = load i16, i16* %p2.1, align 2
%tobool23 = icmp eq i16 %1, 0
br i1 %tobool23, label %for.inc27.backedge, label %label.loopexit
@@ -89,7 +89,7 @@ for.inc27.if.end30.loopexit56_crit_edge: ; preds = %for.inc27.backedge
if.end30: ; preds = %for.inc27.if.end30.loopexit56_crit_edge, %label.loopexit, %label.preheader, %for.inc
%i.0.load46 = phi i32 [ 0, %for.inc ], [ %i.0.load4669, %label.preheader ], [ %i.0.load4669, %label.loopexit ], [ %i.0.load4669, %for.inc27.if.end30.loopexit56_crit_edge ]
%pi.4 = phi i32* [ %i, %for.inc ], [ %pi.3.ph, %label.preheader ], [ %pi.3.ph, %label.loopexit ], [ %pi.3.ph, %for.inc27.if.end30.loopexit56_crit_edge ]
- %2 = load i32* %pi.4, align 4
+ %2 = load i32, i32* %pi.4, align 4
%tobool31 = icmp eq i32 %2, 0
br i1 %tobool31, label %for.inc34, label %label.preheader
@@ -100,7 +100,7 @@ for.inc34: ; preds = %if.end30
for.end36: ; preds = %for.cond
store i32 1, i32* %i, align 4
- %3 = load i32* @c, align 4
+ %3 = load i32, i32* @c, align 4
%tobool37 = icmp eq i32 %3, 0
br i1 %tobool37, label %label.preheader, label %land.rhs
@@ -111,15 +111,15 @@ land.rhs: ; preds = %for.end36
label.preheader: ; preds = %for.end36, %if.end30, %land.rhs
%i.0.load4669 = phi i32 [ 1, %land.rhs ], [ %i.0.load46, %if.end30 ], [ 1, %for.end36 ]
%pi.3.ph = phi i32* [ %pi.0, %land.rhs ], [ %pi.4, %if.end30 ], [ %pi.0, %for.end36 ]
- %4 = load i32* @b, align 4
+ %4 = load i32, i32* @b, align 4
%inc285863 = add nsw i32 %4, 1
store i32 %inc285863, i32* @b, align 4
%tobool175964 = icmp eq i32 %inc285863, 0
br i1 %tobool175964, label %if.end30, label %while.condthread-pre-split.lr.ph.lr.ph
while.condthread-pre-split.lr.ph.lr.ph: ; preds = %label.preheader
- %.pr50 = load i32* @d, align 4
+ %.pr50 = load i32, i32* @d, align 4
%tobool19 = icmp eq i32 %.pr50, 0
- %a.promoted.pre = load i32* @a, align 4
+ %a.promoted.pre = load i32, i32* @a, align 4
br label %while.condthread-pre-split
}
diff --git a/test/CodeGen/X86/coalescer-commute1.ll b/test/CodeGen/X86/coalescer-commute1.ll
index cbcb89031b5b..dccf3b906fd9 100644
--- a/test/CodeGen/X86/coalescer-commute1.ll
+++ b/test/CodeGen/X86/coalescer-commute1.ll
@@ -6,14 +6,14 @@
define void @runcont(i32* %source) nounwind {
entry:
- %tmp10 = load i32* @NNTOT, align 4 ; <i32> [#uses=1]
+ %tmp10 = load i32, i32* @NNTOT, align 4 ; <i32> [#uses=1]
br label %bb
bb: ; preds = %bb, %entry
%neuron.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
%thesum.0 = phi float [ 0.000000e+00, %entry ], [ %tmp6, %bb ] ; <float> [#uses=1]
- %tmp2 = getelementptr i32* %source, i32 %neuron.0 ; <i32*> [#uses=1]
- %tmp3 = load i32* %tmp2, align 4 ; <i32> [#uses=1]
+ %tmp2 = getelementptr i32, i32* %source, i32 %neuron.0 ; <i32*> [#uses=1]
+ %tmp3 = load i32, i32* %tmp2, align 4 ; <i32> [#uses=1]
%tmp34 = sitofp i32 %tmp3 to float ; <float> [#uses=1]
%tmp6 = fadd float %tmp34, %thesum.0 ; <float> [#uses=2]
%indvar.next = add i32 %neuron.0, 1 ; <i32> [#uses=2]
diff --git a/test/CodeGen/X86/coalescer-commute4.ll b/test/CodeGen/X86/coalescer-commute4.ll
index 02a97813fdcd..d4af1a62dca7 100644
--- a/test/CodeGen/X86/coalescer-commute4.ll
+++ b/test/CodeGen/X86/coalescer-commute4.ll
@@ -13,11 +13,11 @@ bb.preheader: ; preds = %entry
bb: ; preds = %bb, %bb.preheader
%i.0.reg2mem.0 = phi i32 [ 0, %bb.preheader ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
%res.0.reg2mem.0 = phi float [ 0.000000e+00, %bb.preheader ], [ %tmp14, %bb ] ; <float> [#uses=1]
- %tmp3 = getelementptr i32* %x, i32 %i.0.reg2mem.0 ; <i32*> [#uses=1]
- %tmp4 = load i32* %tmp3, align 4 ; <i32> [#uses=1]
+ %tmp3 = getelementptr i32, i32* %x, i32 %i.0.reg2mem.0 ; <i32*> [#uses=1]
+ %tmp4 = load i32, i32* %tmp3, align 4 ; <i32> [#uses=1]
%tmp45 = sitofp i32 %tmp4 to float ; <float> [#uses=1]
- %tmp8 = getelementptr float* %y, i32 %i.0.reg2mem.0 ; <float*> [#uses=1]
- %tmp9 = load float* %tmp8, align 4 ; <float> [#uses=1]
+ %tmp8 = getelementptr float, float* %y, i32 %i.0.reg2mem.0 ; <float*> [#uses=1]
+ %tmp9 = load float, float* %tmp8, align 4 ; <float> [#uses=1]
%tmp11 = fmul float %tmp9, %tmp45 ; <float> [#uses=1]
%tmp14 = fadd float %tmp11, %res.0.reg2mem.0 ; <float> [#uses=2]
%indvar.next = add i32 %i.0.reg2mem.0, 1 ; <i32> [#uses=2]
diff --git a/test/CodeGen/X86/coalescer-cross.ll b/test/CodeGen/X86/coalescer-cross.ll
index 3f1fec131214..92aedbef5dd4 100644
--- a/test/CodeGen/X86/coalescer-cross.ll
+++ b/test/CodeGen/X86/coalescer-cross.ll
@@ -30,14 +30,14 @@ entry:
%0 = tail call i32 @"\01_clock$UNIX2003"() nounwind ; <i32> [#uses=1]
%1 = uitofp i32 %0 to double ; <double> [#uses=1]
%2 = fdiv double %1, 1.000000e+06 ; <double> [#uses=1]
- %3 = getelementptr %struct.lua_State* %L, i32 0, i32 4 ; <%struct.TValue**> [#uses=3]
- %4 = load %struct.TValue** %3, align 4 ; <%struct.TValue*> [#uses=2]
- %5 = getelementptr %struct.TValue* %4, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %3 = getelementptr %struct.lua_State, %struct.lua_State* %L, i32 0, i32 4 ; <%struct.TValue**> [#uses=3]
+ %4 = load %struct.TValue*, %struct.TValue** %3, align 4 ; <%struct.TValue*> [#uses=2]
+ %5 = getelementptr %struct.TValue, %struct.TValue* %4, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
store double %2, double* %5, align 4
- %6 = getelementptr %struct.TValue* %4, i32 0, i32 1 ; <i32*> [#uses=1]
+ %6 = getelementptr %struct.TValue, %struct.TValue* %4, i32 0, i32 1 ; <i32*> [#uses=1]
store i32 3, i32* %6, align 4
- %7 = load %struct.TValue** %3, align 4 ; <%struct.TValue*> [#uses=1]
- %8 = getelementptr %struct.TValue* %7, i32 1 ; <%struct.TValue*> [#uses=1]
+ %7 = load %struct.TValue*, %struct.TValue** %3, align 4 ; <%struct.TValue*> [#uses=1]
+ %8 = getelementptr %struct.TValue, %struct.TValue* %7, i32 1 ; <%struct.TValue*> [#uses=1]
store %struct.TValue* %8, %struct.TValue** %3, align 4
ret i32 1
}
diff --git a/test/CodeGen/X86/coalescer-dce2.ll b/test/CodeGen/X86/coalescer-dce2.ll
index bbbf09b267b9..116a70484363 100644
--- a/test/CodeGen/X86/coalescer-dce2.ll
+++ b/test/CodeGen/X86/coalescer-dce2.ll
@@ -14,19 +14,19 @@ target triple = "x86_64-apple-macosx10.7.0"
define void @fn1() nounwind uwtable ssp {
entry:
- %0 = load i32* @d, align 4
+ %0 = load i32, i32* @d, align 4
%tobool72 = icmp eq i32 %0, 0
br i1 %tobool72, label %for.end32, label %for.cond1.preheader.lr.ph
for.cond1.preheader.lr.ph: ; preds = %entry
- %1 = load i32* @c, align 4
+ %1 = load i32, i32* @c, align 4
%tobool2 = icmp eq i32 %1, 0
- %2 = load i32* @b, align 4
+ %2 = load i32, i32* @b, align 4
%cmp = icmp sgt i32 %2, 0
%conv = zext i1 %cmp to i32
- %3 = load i32* @g, align 4
+ %3 = load i32, i32* @g, align 4
%tobool4 = icmp eq i32 %3, 0
- %4 = load i16* @a, align 2
+ %4 = load i16, i16* @a, align 2
%tobool9 = icmp eq i16 %4, 0
br label %for.cond1.preheader
@@ -41,7 +41,7 @@ for.cond1.preheader.split.us: ; preds = %for.cond1.preheader
br i1 %tobool9, label %cond.end.us.us, label %cond.end.us
cond.false18.us.us: ; preds = %if.end.us.us
- %5 = load i32* @f, align 4
+ %5 = load i32, i32* @f, align 4
%sext76 = shl i32 %5, 16
%phitmp75 = ashr exact i32 %sext76, 16
br label %cond.end.us.us
@@ -74,7 +74,7 @@ land.lhs.true12.us: ; preds = %if.end6.us
br i1 %cmp14.us, label %cond.end21.us, label %cond.false18.us
if.end6.us: ; preds = %if.end.us
- %6 = load i32* @f, align 4
+ %6 = load i32, i32* @f, align 4
%conv7.us = trunc i32 %6 to i16
%tobool11.us = icmp eq i16 %conv7.us, 0
br i1 %tobool11.us, label %cond.false18.us, label %land.lhs.true12.us
@@ -95,7 +95,7 @@ for.cond1.preheader.split.for.cond1.preheader.split.split_crit_edge: ; preds = %
br i1 %tobool4, label %if.end6.us65, label %for.cond25.loopexit.us-lcssa.us-lcssa
cond.false18.us40: ; preds = %if.end.us50
- %7 = load i32* @f, align 4
+ %7 = load i32, i32* @f, align 4
%sext = shl i32 %7, 16
%phitmp = ashr exact i32 %sext, 16
br label %if.end.us50
diff --git a/test/CodeGen/X86/coalescer-identity.ll b/test/CodeGen/X86/coalescer-identity.ll
index 1aac09594c43..8d581160aa80 100644
--- a/test/CodeGen/X86/coalescer-identity.ll
+++ b/test/CodeGen/X86/coalescer-identity.ll
@@ -12,10 +12,10 @@ target triple = "x86_64-apple-macosx10.8.0"
define void @func() nounwind uwtable ssp {
for.body.lr.ph:
- %0 = load i32* @g2, align 4
+ %0 = load i32, i32* @g2, align 4
%tobool6 = icmp eq i32 %0, 0
- %s.promoted = load i16* @s, align 2
- %.pre = load i32* @g1, align 4
+ %s.promoted = load i16, i16* @s, align 2
+ %.pre = load i32, i32* @g1, align 4
br i1 %tobool6, label %for.body.us, label %for.body
for.body.us: ; preds = %for.body.lr.ph, %for.inc.us
diff --git a/test/CodeGen/X86/coalescer-remat.ll b/test/CodeGen/X86/coalescer-remat.ll
index bb08a0ec52cd..62e0562fd500 100644
--- a/test/CodeGen/X86/coalescer-remat.ll
+++ b/test/CodeGen/X86/coalescer-remat.ll
@@ -7,7 +7,7 @@ define i32 @main() nounwind {
entry:
%t0 = cmpxchg i64* @val, i64 0, i64 1 monotonic monotonic
%0 = extractvalue { i64, i1 } %t0, 0
- %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([7 x i8]* @"\01LC", i32 0, i64 0), i64 %0) nounwind
+ %1 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([7 x i8], [7 x i8]* @"\01LC", i32 0, i64 0), i64 %0) nounwind
ret i32 0
}
diff --git a/test/CodeGen/X86/coalescer-subreg.ll b/test/CodeGen/X86/coalescer-subreg.ll
new file mode 100644
index 000000000000..be80dff779c9
--- /dev/null
+++ b/test/CodeGen/X86/coalescer-subreg.ll
@@ -0,0 +1,29 @@
+; RUN: llc -o - %s -verify-machineinstrs
+; This used to crash when coalescing a regclass like GR16 which did not support
+; the sub_8bit_hi subregister with a class like GR16_ABCD that did.
+target triple = "x86_64-apple-macosx10.10.0"
+
+define void @test() #0 {
+entry:
+ br i1 undef, label %loop, label %for.end597
+
+loop:
+ %0 = load i16, i16* null, align 4
+ %1 = load i16, i16* undef, align 4
+ %or1 = or i16 %1, %0
+ %or2 = trunc i16 %or1 to i8
+ store i8 %or2, i8* undef, align 4
+ %2 = or i16 %1, %0
+ %or3 = lshr i16 %2, 8
+ %or4 = trunc i16 %or3 to i8
+ store i8 %or4, i8* undef, align 1
+ %3 = load i32, i32* undef, align 4
+ %4 = load i32, i32* undef, align 4
+ %or5 = or i32 %4, %3
+ store i32 %or5, i32* undef, align 4
+ store i32 0, i32* undef, align 4
+ br label %loop
+
+for.end597:
+ ret void
+}
diff --git a/test/CodeGen/X86/code_placement.ll b/test/CodeGen/X86/code_placement.ll
index 97471835a4c9..7d2358480051 100644
--- a/test/CodeGen/X86/code_placement.ll
+++ b/test/CodeGen/X86/code_placement.ll
@@ -6,9 +6,9 @@
define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind ssp {
entry:
- %0 = load i32* %rk, align 4 ; <i32> [#uses=1]
- %1 = getelementptr i32* %rk, i64 1 ; <i32*> [#uses=1]
- %2 = load i32* %1, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* %rk, align 4 ; <i32> [#uses=1]
+ %1 = getelementptr i32, i32* %rk, i64 1 ; <i32*> [#uses=1]
+ %2 = load i32, i32* %1, align 4 ; <i32> [#uses=1]
%tmp15 = add i32 %r, -1 ; <i32> [#uses=1]
%tmp.16 = zext i32 %tmp15 to i64 ; <i64> [#uses=2]
br label %bb
@@ -23,64 +23,64 @@ bb: ; preds = %bb1, %entry
%rk26 = bitcast i32* %rk to i8* ; <i8*> [#uses=6]
%3 = lshr i32 %s0.0, 24 ; <i32> [#uses=1]
%4 = zext i32 %3 to i64 ; <i64> [#uses=1]
- %5 = getelementptr [256 x i32]* @Te0, i64 0, i64 %4 ; <i32*> [#uses=1]
- %6 = load i32* %5, align 4 ; <i32> [#uses=1]
+ %5 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %4 ; <i32*> [#uses=1]
+ %6 = load i32, i32* %5, align 4 ; <i32> [#uses=1]
%7 = lshr i32 %s1.0, 16 ; <i32> [#uses=1]
%8 = and i32 %7, 255 ; <i32> [#uses=1]
%9 = zext i32 %8 to i64 ; <i64> [#uses=1]
- %10 = getelementptr [256 x i32]* @Te1, i64 0, i64 %9 ; <i32*> [#uses=1]
- %11 = load i32* %10, align 4 ; <i32> [#uses=1]
+ %10 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %9 ; <i32*> [#uses=1]
+ %11 = load i32, i32* %10, align 4 ; <i32> [#uses=1]
%ctg2.sum2728 = or i64 %tmp18, 8 ; <i64> [#uses=1]
- %12 = getelementptr i8* %rk26, i64 %ctg2.sum2728 ; <i8*> [#uses=1]
+ %12 = getelementptr i8, i8* %rk26, i64 %ctg2.sum2728 ; <i8*> [#uses=1]
%13 = bitcast i8* %12 to i32* ; <i32*> [#uses=1]
- %14 = load i32* %13, align 4 ; <i32> [#uses=1]
+ %14 = load i32, i32* %13, align 4 ; <i32> [#uses=1]
%15 = xor i32 %11, %6 ; <i32> [#uses=1]
%16 = xor i32 %15, %14 ; <i32> [#uses=3]
%17 = lshr i32 %s1.0, 24 ; <i32> [#uses=1]
%18 = zext i32 %17 to i64 ; <i64> [#uses=1]
- %19 = getelementptr [256 x i32]* @Te0, i64 0, i64 %18 ; <i32*> [#uses=1]
- %20 = load i32* %19, align 4 ; <i32> [#uses=1]
+ %19 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %18 ; <i32*> [#uses=1]
+ %20 = load i32, i32* %19, align 4 ; <i32> [#uses=1]
%21 = and i32 %s0.0, 255 ; <i32> [#uses=1]
%22 = zext i32 %21 to i64 ; <i64> [#uses=1]
- %23 = getelementptr [256 x i32]* @Te3, i64 0, i64 %22 ; <i32*> [#uses=1]
- %24 = load i32* %23, align 4 ; <i32> [#uses=1]
+ %23 = getelementptr [256 x i32], [256 x i32]* @Te3, i64 0, i64 %22 ; <i32*> [#uses=1]
+ %24 = load i32, i32* %23, align 4 ; <i32> [#uses=1]
%ctg2.sum2930 = or i64 %tmp18, 12 ; <i64> [#uses=1]
- %25 = getelementptr i8* %rk26, i64 %ctg2.sum2930 ; <i8*> [#uses=1]
+ %25 = getelementptr i8, i8* %rk26, i64 %ctg2.sum2930 ; <i8*> [#uses=1]
%26 = bitcast i8* %25 to i32* ; <i32*> [#uses=1]
- %27 = load i32* %26, align 4 ; <i32> [#uses=1]
+ %27 = load i32, i32* %26, align 4 ; <i32> [#uses=1]
%28 = xor i32 %24, %20 ; <i32> [#uses=1]
%29 = xor i32 %28, %27 ; <i32> [#uses=4]
%30 = lshr i32 %16, 24 ; <i32> [#uses=1]
%31 = zext i32 %30 to i64 ; <i64> [#uses=1]
- %32 = getelementptr [256 x i32]* @Te0, i64 0, i64 %31 ; <i32*> [#uses=1]
- %33 = load i32* %32, align 4 ; <i32> [#uses=2]
+ %32 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %31 ; <i32*> [#uses=1]
+ %33 = load i32, i32* %32, align 4 ; <i32> [#uses=2]
%exitcond = icmp eq i64 %indvar, %tmp.16 ; <i1> [#uses=1]
br i1 %exitcond, label %bb2, label %bb1
bb1: ; preds = %bb
%ctg2.sum31 = add i64 %tmp18, 16 ; <i64> [#uses=1]
- %34 = getelementptr i8* %rk26, i64 %ctg2.sum31 ; <i8*> [#uses=1]
+ %34 = getelementptr i8, i8* %rk26, i64 %ctg2.sum31 ; <i8*> [#uses=1]
%35 = bitcast i8* %34 to i32* ; <i32*> [#uses=1]
%36 = lshr i32 %29, 16 ; <i32> [#uses=1]
%37 = and i32 %36, 255 ; <i32> [#uses=1]
%38 = zext i32 %37 to i64 ; <i64> [#uses=1]
- %39 = getelementptr [256 x i32]* @Te1, i64 0, i64 %38 ; <i32*> [#uses=1]
- %40 = load i32* %39, align 4 ; <i32> [#uses=1]
- %41 = load i32* %35, align 4 ; <i32> [#uses=1]
+ %39 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %38 ; <i32*> [#uses=1]
+ %40 = load i32, i32* %39, align 4 ; <i32> [#uses=1]
+ %41 = load i32, i32* %35, align 4 ; <i32> [#uses=1]
%42 = xor i32 %40, %33 ; <i32> [#uses=1]
%43 = xor i32 %42, %41 ; <i32> [#uses=1]
%44 = lshr i32 %29, 24 ; <i32> [#uses=1]
%45 = zext i32 %44 to i64 ; <i64> [#uses=1]
- %46 = getelementptr [256 x i32]* @Te0, i64 0, i64 %45 ; <i32*> [#uses=1]
- %47 = load i32* %46, align 4 ; <i32> [#uses=1]
+ %46 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %45 ; <i32*> [#uses=1]
+ %47 = load i32, i32* %46, align 4 ; <i32> [#uses=1]
%48 = and i32 %16, 255 ; <i32> [#uses=1]
%49 = zext i32 %48 to i64 ; <i64> [#uses=1]
- %50 = getelementptr [256 x i32]* @Te3, i64 0, i64 %49 ; <i32*> [#uses=1]
- %51 = load i32* %50, align 4 ; <i32> [#uses=1]
+ %50 = getelementptr [256 x i32], [256 x i32]* @Te3, i64 0, i64 %49 ; <i32*> [#uses=1]
+ %51 = load i32, i32* %50, align 4 ; <i32> [#uses=1]
%ctg2.sum32 = add i64 %tmp18, 20 ; <i64> [#uses=1]
- %52 = getelementptr i8* %rk26, i64 %ctg2.sum32 ; <i8*> [#uses=1]
+ %52 = getelementptr i8, i8* %rk26, i64 %ctg2.sum32 ; <i8*> [#uses=1]
%53 = bitcast i8* %52 to i32* ; <i32*> [#uses=1]
- %54 = load i32* %53, align 4 ; <i32> [#uses=1]
+ %54 = load i32, i32* %53, align 4 ; <i32> [#uses=1]
%55 = xor i32 %51, %47 ; <i32> [#uses=1]
%56 = xor i32 %55, %54 ; <i32> [#uses=1]
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
@@ -89,48 +89,48 @@ bb1: ; preds = %bb
bb2: ; preds = %bb
%tmp10 = shl i64 %tmp.16, 4 ; <i64> [#uses=2]
%ctg2.sum = add i64 %tmp10, 16 ; <i64> [#uses=1]
- %tmp1213 = getelementptr i8* %rk26, i64 %ctg2.sum ; <i8*> [#uses=1]
+ %tmp1213 = getelementptr i8, i8* %rk26, i64 %ctg2.sum ; <i8*> [#uses=1]
%57 = bitcast i8* %tmp1213 to i32* ; <i32*> [#uses=1]
%58 = and i32 %33, -16777216 ; <i32> [#uses=1]
%59 = lshr i32 %29, 16 ; <i32> [#uses=1]
%60 = and i32 %59, 255 ; <i32> [#uses=1]
%61 = zext i32 %60 to i64 ; <i64> [#uses=1]
- %62 = getelementptr [256 x i32]* @Te1, i64 0, i64 %61 ; <i32*> [#uses=1]
- %63 = load i32* %62, align 4 ; <i32> [#uses=1]
+ %62 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %61 ; <i32*> [#uses=1]
+ %63 = load i32, i32* %62, align 4 ; <i32> [#uses=1]
%64 = and i32 %63, 16711680 ; <i32> [#uses=1]
%65 = or i32 %64, %58 ; <i32> [#uses=1]
- %66 = load i32* %57, align 4 ; <i32> [#uses=1]
+ %66 = load i32, i32* %57, align 4 ; <i32> [#uses=1]
%67 = xor i32 %65, %66 ; <i32> [#uses=2]
%68 = lshr i32 %29, 8 ; <i32> [#uses=1]
%69 = zext i32 %68 to i64 ; <i64> [#uses=1]
- %70 = getelementptr [256 x i32]* @Te0, i64 0, i64 %69 ; <i32*> [#uses=1]
- %71 = load i32* %70, align 4 ; <i32> [#uses=1]
+ %70 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %69 ; <i32*> [#uses=1]
+ %71 = load i32, i32* %70, align 4 ; <i32> [#uses=1]
%72 = and i32 %71, -16777216 ; <i32> [#uses=1]
%73 = and i32 %16, 255 ; <i32> [#uses=1]
%74 = zext i32 %73 to i64 ; <i64> [#uses=1]
- %75 = getelementptr [256 x i32]* @Te1, i64 0, i64 %74 ; <i32*> [#uses=1]
- %76 = load i32* %75, align 4 ; <i32> [#uses=1]
+ %75 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %74 ; <i32*> [#uses=1]
+ %76 = load i32, i32* %75, align 4 ; <i32> [#uses=1]
%77 = and i32 %76, 16711680 ; <i32> [#uses=1]
%78 = or i32 %77, %72 ; <i32> [#uses=1]
%ctg2.sum25 = add i64 %tmp10, 20 ; <i64> [#uses=1]
- %79 = getelementptr i8* %rk26, i64 %ctg2.sum25 ; <i8*> [#uses=1]
+ %79 = getelementptr i8, i8* %rk26, i64 %ctg2.sum25 ; <i8*> [#uses=1]
%80 = bitcast i8* %79 to i32* ; <i32*> [#uses=1]
- %81 = load i32* %80, align 4 ; <i32> [#uses=1]
+ %81 = load i32, i32* %80, align 4 ; <i32> [#uses=1]
%82 = xor i32 %78, %81 ; <i32> [#uses=2]
%83 = lshr i32 %67, 24 ; <i32> [#uses=1]
%84 = trunc i32 %83 to i8 ; <i8> [#uses=1]
store i8 %84, i8* %out, align 1
%85 = lshr i32 %67, 16 ; <i32> [#uses=1]
%86 = trunc i32 %85 to i8 ; <i8> [#uses=1]
- %87 = getelementptr i8* %out, i64 1 ; <i8*> [#uses=1]
+ %87 = getelementptr i8, i8* %out, i64 1 ; <i8*> [#uses=1]
store i8 %86, i8* %87, align 1
- %88 = getelementptr i8* %out, i64 4 ; <i8*> [#uses=1]
+ %88 = getelementptr i8, i8* %out, i64 4 ; <i8*> [#uses=1]
%89 = lshr i32 %82, 24 ; <i32> [#uses=1]
%90 = trunc i32 %89 to i8 ; <i8> [#uses=1]
store i8 %90, i8* %88, align 1
%91 = lshr i32 %82, 16 ; <i32> [#uses=1]
%92 = trunc i32 %91 to i8 ; <i8> [#uses=1]
- %93 = getelementptr i8* %out, i64 5 ; <i8*> [#uses=1]
+ %93 = getelementptr i8, i8* %out, i64 5 ; <i8*> [#uses=1]
store i8 %92, i8* %93, align 1
ret void
}
diff --git a/test/CodeGen/X86/code_placement_outline_optional_branches.ll b/test/CodeGen/X86/code_placement_outline_optional_branches.ll
new file mode 100644
index 000000000000..3364915fd1b7
--- /dev/null
+++ b/test/CodeGen/X86/code_placement_outline_optional_branches.ll
@@ -0,0 +1,77 @@
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux -outline-optional-branches < %s | FileCheck %s -check-prefix=CHECK-OUTLINE
+
+define void @foo(i32 %t1, i32 %t2, i32 %t3) {
+; Test that we lift the call to 'c' up to immediately follow the call to 'b'
+; when we disable the cfg conflict check.
+;
+; CHECK-LABEL: foo:
+; CHECK: callq a
+; CHECK: callq a
+; CHECK: callq a
+; CHECK: callq a
+; CHECK: callq b
+; CHECK: callq c
+; CHECK: callq d
+; CHECK: callq e
+; CHECK: callq f
+;
+; CHECK-OUTLINE-LABEL: foo:
+; CHECK-OUTLINE: callq b
+; CHECK-OUTLINE: callq c
+; CHECK-OUTLINE: callq d
+; CHECK-OUTLINE: callq e
+; CHECK-OUTLINE: callq f
+; CHECK-OUTLINE: callq a
+; CHECK-OUTLINE: callq a
+; CHECK-OUTLINE: callq a
+; CHECK-OUTLINE: callq a
+
+entry:
+ %cmp = icmp eq i32 %t1, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ call void @a()
+ call void @a()
+ call void @a()
+ call void @a()
+ br label %if.end
+
+if.end:
+ call void @b()
+ br label %hotbranch
+
+hotbranch:
+ %cmp2 = icmp eq i32 %t2, 0
+ br i1 %cmp2, label %if.then2, label %if.end2, !prof !1
+
+if.then2:
+ call void @c()
+ br label %if.end2
+
+if.end2:
+ call void @d()
+ br label %shortbranch
+
+shortbranch:
+ %cmp3 = icmp eq i32 %t3, 0
+ br i1 %cmp3, label %if.then3, label %if.end3
+
+if.then3:
+ call void @e()
+ br label %if.end3
+
+if.end3:
+ call void @f()
+ ret void
+}
+
+declare void @a()
+declare void @b()
+declare void @c()
+declare void @d()
+declare void @e()
+declare void @f()
+
+!1 = !{!"branch_weights", i32 64, i32 4}
diff --git a/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll b/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll
index 85bfff2757e6..f00c40ba3a92 100644
--- a/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll
+++ b/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll
@@ -20,7 +20,7 @@ define i8 @twoArgsPromotion(i32 %arg1, i32 %arg2) {
%add = add nsw i32 %arg1, %arg2
%sextadd = sext i32 %add to i64
%base = inttoptr i64 %sextadd to i8*
- %res = load i8* %base
+ %res = load i8, i8* %base
ret i8 %res
}
@@ -35,8 +35,8 @@ define i8 @twoArgsPromotion(i32 %arg1, i32 %arg2) {
define i8 @twoArgsNoPromotion(i32 %arg1, i32 %arg2, i8* %base) {
%add = add nsw i32 %arg1, %arg2
%sextadd = sext i32 %add to i64
- %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
- %res = load i8* %arrayidx
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
+ %res = load i8, i8* %arrayidx
ret i8 %res
}
@@ -48,8 +48,8 @@ define i8 @twoArgsNoPromotion(i32 %arg1, i32 %arg2, i8* %base) {
define i8 @noPromotion(i32 %arg1, i32 %arg2, i8* %base) {
%add = add i32 %arg1, %arg2
%sextadd = sext i32 %add to i64
- %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
- %res = load i8* %arrayidx
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
+ %res = load i8, i8* %arrayidx
ret i8 %res
}
@@ -57,13 +57,13 @@ define i8 @noPromotion(i32 %arg1, i32 %arg2, i8* %base) {
; CHECK-LABEL: @oneArgPromotion
; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i32 %arg1 to i64
; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1
-; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
+; CHECK: getelementptr inbounds i8, i8* %base, i64 [[PROMOTED]]
; CHECK: ret
define i8 @oneArgPromotion(i32 %arg1, i8* %base) {
%add = add nsw i32 %arg1, 1
%sextadd = sext i32 %add to i64
- %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
- %res = load i8* %arrayidx
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
+ %res = load i8, i8* %arrayidx
ret i8 %res
}
@@ -71,14 +71,14 @@ define i8 @oneArgPromotion(i32 %arg1, i8* %base) {
; CHECK-LABEL: @oneArgPromotionZExt
; CHECK: [[ARG1ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 %arg1 to i64
; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1ZEXT]], 1
-; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
+; CHECK: getelementptr inbounds i8, i8* %base, i64 [[PROMOTED]]
; CHECK: ret
define i8 @oneArgPromotionZExt(i8 %arg1, i8* %base) {
%zext = zext i8 %arg1 to i32
%add = add nsw i32 %zext, 1
%sextadd = sext i32 %add to i64
- %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
- %res = load i8* %arrayidx
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
+ %res = load i8, i8* %arrayidx
ret i8 %res
}
@@ -90,17 +90,15 @@ define i8 @oneArgPromotionZExt(i8 %arg1, i8* %base) {
; rolled back.
; Still, this test case exercises the desired code path.
; CHECK-LABEL: @oneArgPromotionCstZExt
-; CHECK: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i16 undef to i32
-; CHECK: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i32 [[ZEXT]] to i64
-; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXT]], 1
-; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
+; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 0, 1
+; CHECK: getelementptr inbounds i8, i8* %base, i64 [[PROMOTED]]
; CHECK: ret
define i8 @oneArgPromotionCstZExt(i8* %base) {
%cst = zext i16 undef to i32
%add = add nsw i32 %cst, 1
%sextadd = sext i32 %add to i64
- %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
- %res = load i8* %arrayidx
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
+ %res = load i8, i8* %arrayidx
ret i8 %res
}
@@ -110,14 +108,14 @@ define i8 @oneArgPromotionCstZExt(i8* %base) {
; CHECK: [[ARG1TRUNC:%[a-zA-Z_0-9-]+]] = trunc i32 %arg1 to i8
; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[ARG1TRUNC]] to i64
; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1
-; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
+; CHECK: getelementptr inbounds i8, i8* %base, i64 [[PROMOTED]]
; CHECK: ret
define i8 @oneArgPromotionBlockTrunc1(i32 %arg1, i8* %base) {
%trunc = trunc i32 %arg1 to i8
%add = add nsw i8 %trunc, 1
%sextadd = sext i8 %add to i64
- %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
- %res = load i8* %arrayidx
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
+ %res = load i8, i8* %arrayidx
ret i8 %res
}
@@ -128,15 +126,15 @@ define i8 @oneArgPromotionBlockTrunc1(i32 %arg1, i8* %base) {
; CHECK: [[ARG1TRUNC:%[a-zA-Z_0-9-]+]] = trunc i32 [[ARG1SEXT]] to i8
; CHECK: [[ARG1SEXT64:%[a-zA-Z_0-9-]+]] = sext i8 [[ARG1TRUNC]] to i64
; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT64]], 1
-; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
+; CHECK: getelementptr inbounds i8, i8* %base, i64 [[PROMOTED]]
; CHECK: ret
define i8 @oneArgPromotionBlockTrunc2(i16 %arg1, i8* %base) {
%sextarg1 = sext i16 %arg1 to i32
%trunc = trunc i32 %sextarg1 to i8
%add = add nsw i8 %trunc, 1
%sextadd = sext i8 %add to i64
- %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
- %res = load i8* %arrayidx
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
+ %res = load i8, i8* %arrayidx
ret i8 %res
}
@@ -145,15 +143,15 @@ define i8 @oneArgPromotionBlockTrunc2(i16 %arg1, i8* %base) {
; CHECK-LABEL: @oneArgPromotionPassTruncKeepSExt
; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i1 %arg1 to i64
; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1
-; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
+; CHECK: getelementptr inbounds i8, i8* %base, i64 [[PROMOTED]]
; CHECK: ret
define i8 @oneArgPromotionPassTruncKeepSExt(i1 %arg1, i8* %base) {
%sextarg1 = sext i1 %arg1 to i32
%trunc = trunc i32 %sextarg1 to i8
%add = add nsw i8 %trunc, 1
%sextadd = sext i8 %add to i64
- %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
- %res = load i8* %arrayidx
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
+ %res = load i8, i8* %arrayidx
ret i8 %res
}
@@ -164,15 +162,15 @@ define i8 @oneArgPromotionPassTruncKeepSExt(i1 %arg1, i8* %base) {
; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i8 %arg1 to i64
; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1
; CHECK: [[TRUNC:%[a-zA-Z_0-9-]+]] = trunc i64 [[PROMOTED]] to i8
-; CHECK: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i8* %base, i64 [[PROMOTED]]
-; CHECK: [[LOAD:%[a-zA-Z_0-9-]+]] = load i8* [[GEP]]
+; CHECK: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i8, i8* %base, i64 [[PROMOTED]]
+; CHECK: [[LOAD:%[a-zA-Z_0-9-]+]] = load i8, i8* [[GEP]]
; CHECK: add i8 [[LOAD]], [[TRUNC]]
; CHECK: ret
define i8 @oneArgPromotionTruncInsert(i8 %arg1, i8* %base) {
%add = add nsw i8 %arg1, 1
%sextadd = sext i8 %add to i64
- %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
- %res = load i8* %arrayidx
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
+ %res = load i8, i8* %arrayidx
%finalres = add i8 %res, %add
ret i8 %finalres
}
@@ -182,14 +180,14 @@ define i8 @oneArgPromotionTruncInsert(i8 %arg1, i8* %base) {
; CHECK: [[ARG1TRUNC:%[a-zA-Z_0-9-]+]] = trunc i128 %arg1 to i8
; CHECK: [[ARG1SEXT64:%[a-zA-Z_0-9-]+]] = sext i8 [[ARG1TRUNC]] to i64
; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT64]], 1
-; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
+; CHECK: getelementptr inbounds i8, i8* %base, i64 [[PROMOTED]]
; CHECK: ret
define i8 @oneArgPromotionLargerType(i128 %arg1, i8* %base) {
%trunc = trunc i128 %arg1 to i8
%add = add nsw i8 %trunc, 1
%sextadd = sext i8 %add to i64
- %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
- %res = load i8* %arrayidx
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
+ %res = load i8, i8* %arrayidx
%finalres = add i8 %res, %add
ret i8 %finalres
}
@@ -202,16 +200,16 @@ define i8 @oneArgPromotionLargerType(i128 %arg1, i8* %base) {
; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i8 %arg1 to i64
; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1
; CHECK: [[TRUNC:%[a-zA-Z_0-9-]+]] = trunc i64 [[PROMOTED]] to i8
-; CHECK: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i8* %base, i64 [[PROMOTED]]
-; CHECK: [[LOAD:%[a-zA-Z_0-9-]+]] = load i8* [[GEP]]
+; CHECK: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i8, i8* %base, i64 [[PROMOTED]]
+; CHECK: [[LOAD:%[a-zA-Z_0-9-]+]] = load i8, i8* [[GEP]]
; CHECK: [[ADDRES:%[a-zA-Z_0-9-]+]] = add i8 [[LOAD]], [[TRUNC]]
; CHECK: add i8 [[ADDRES]], [[TRUNC]]
; CHECK: ret
define i8 @oneArgPromotionTruncInsertSeveralUse(i8 %arg1, i8* %base) {
%add = add nsw i8 %arg1, 1
%sextadd = sext i8 %add to i64
- %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
- %res = load i8* %arrayidx
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
+ %res = load i8, i8* %arrayidx
%almostfinalres = add i8 %res, %add
%finalres = add i8 %almostfinalres, %add
ret i8 %finalres
@@ -222,16 +220,16 @@ define i8 @oneArgPromotionTruncInsertSeveralUse(i8 %arg1, i8* %base) {
; CHECK-LABEL: @oneArgPromotionSExtSeveralUse
; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i8 %arg1 to i64
; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1
-; CHECK: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i8* %base, i64 [[PROMOTED]]
-; CHECK: [[LOAD:%[a-zA-Z_0-9-]+]] = load i8* [[GEP]]
+; CHECK: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i8, i8* %base, i64 [[PROMOTED]]
+; CHECK: [[LOAD:%[a-zA-Z_0-9-]+]] = load i8, i8* [[GEP]]
; CHECK: [[ADDRES:%[a-zA-Z_0-9-]+]] = zext i8 [[LOAD]] to i64
; CHECK: add i64 [[ADDRES]], [[PROMOTED]]
; CHECK: ret
define i64 @oneArgPromotionSExtSeveralUse(i8 %arg1, i8* %base) {
%add = add nsw i8 %arg1, 1
%sextadd = sext i8 %add to i64
- %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
- %res = load i8* %arrayidx
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
+ %res = load i8, i8* %arrayidx
%almostfinalres = zext i8 %res to i64
%finalres = add i64 %almostfinalres, %sextadd
ret i64 %finalres
@@ -257,14 +255,14 @@ define i64 @oneArgPromotionSExtSeveralUse(i8 %arg1, i8* %base) {
; CHECK: [[ORIG:%[a-zA-Z_0-9-]+]] = add nsw i32 %arg1, %arg2
; CHECK: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ORIG]], [[ORIG]]
; CHECK: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i32 [[ADD]] to i64
-; CHECK: getelementptr inbounds i8* %base, i64 [[SEXT]]
+; CHECK: getelementptr inbounds i8, i8* %base, i64 [[SEXT]]
; CHECK: ret
define i8 @twoArgsPromotionNest(i32 %arg1, i32 %arg2, i8* %base) {
%promotableadd1 = add nsw i32 %arg1, %arg2
%promotableadd2 = add nsw i32 %promotableadd1, %promotableadd1
%sextadd = sext i32 %promotableadd2 to i64
- %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
- %res = load i8* %arrayidx
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
+ %res = load i8, i8* %arrayidx
ret i8 %res
}
@@ -279,15 +277,15 @@ define i8 @twoArgsPromotionNest(i32 %arg1, i32 %arg2, i8* %base) {
; CHECK: [[TRUNC:%[a-zA-Z_0-9-]+]] = trunc i32 [[SEXTARG1]] to i8
; CHECK: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[TRUNC]], %arg2
; CHECK: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i64
-; CHECK: getelementptr inbounds i8* %base, i64 [[SEXT]]
+; CHECK: getelementptr inbounds i8, i8* %base, i64 [[SEXT]]
; CHECK: ret
define i8 @twoArgsNoPromotionRemove(i1 %arg1, i8 %arg2, i8* %base) {
%sextarg1 = sext i1 %arg1 to i32
%trunc = trunc i32 %sextarg1 to i8
%add = add nsw i8 %trunc, %arg2
%sextadd = sext i8 %add to i64
- %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
- %res = load i8* %arrayidx
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
+ %res = load i8, i8* %arrayidx
ret i8 %res
}
@@ -313,11 +311,11 @@ define i8 @twoArgsNoPromotionRemove(i1 %arg1, i8 %arg2, i8* %base) {
; BB then
; CHECK: [[BASE1:%[a-zA-Z_0-9-]+]] = add i64 [[SEXTADD]], 48
; CHECK: [[ADDR1:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[BASE1]] to i32*
-; CHECK: load i32* [[ADDR1]]
+; CHECK: load i32, i32* [[ADDR1]]
; BB else
; CHECK: [[BASE2:%[a-zA-Z_0-9-]+]] = add i64 [[SEXTADD]], 48
; CHECK: [[ADDR2:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[BASE2]] to i32*
-; CHECK: load i32* [[ADDR2]]
+; CHECK: load i32, i32* [[ADDR2]]
; CHECK: ret
; CHECK-GEP-LABEL: @checkProfitability
; CHECK-GEP-NOT: {{%[a-zA-Z_0-9-]+}} = sext i32 %arg1 to i64
@@ -328,34 +326,34 @@ define i8 @twoArgsNoPromotionRemove(i1 %arg1, i8 %arg2, i8* %base) {
; BB then
; CHECK-GEP: [[BASE1:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[SEXTADD]] to i32*
; CHECK-GEP: [[BCC1:%[a-zA-Z_0-9-]+]] = bitcast i32* [[BASE1]] to i8*
-; CHECK-GEP: [[FULL1:%[a-zA-Z_0-9-]+]] = getelementptr i8* [[BCC1]], i64 48
+; CHECK-GEP: [[FULL1:%[a-zA-Z_0-9-]+]] = getelementptr i8, i8* [[BCC1]], i64 48
; CHECK-GEP: [[ADDR1:%[a-zA-Z_0-9-]+]] = bitcast i8* [[FULL1]] to i32*
-; CHECK-GEP: load i32* [[ADDR1]]
+; CHECK-GEP: load i32, i32* [[ADDR1]]
; BB else
; CHECK-GEP: [[BASE2:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[SEXTADD]] to i32*
; CHECK-GEP: [[BCC2:%[a-zA-Z_0-9-]+]] = bitcast i32* [[BASE2]] to i8*
-; CHECK-GEP: [[FULL2:%[a-zA-Z_0-9-]+]] = getelementptr i8* [[BCC2]], i64 48
+; CHECK-GEP: [[FULL2:%[a-zA-Z_0-9-]+]] = getelementptr i8, i8* [[BCC2]], i64 48
; CHECK-GEP: [[ADDR2:%[a-zA-Z_0-9-]+]] = bitcast i8* [[FULL2]] to i32*
-; CHECK-GEP: load i32* [[ADDR2]]
+; CHECK-GEP: load i32, i32* [[ADDR2]]
; CHECK-GEP: ret
define i32 @checkProfitability(i32 %arg1, i32 %arg2, i1 %test) {
%shl = shl nsw i32 %arg1, 1
%add1 = add nsw i32 %shl, %arg2
%sextidx1 = sext i32 %add1 to i64
%tmpptr = inttoptr i64 %sextidx1 to i32*
- %arrayidx1 = getelementptr i32* %tmpptr, i64 12
+ %arrayidx1 = getelementptr i32, i32* %tmpptr, i64 12
br i1 %test, label %then, label %else
then:
- %res1 = load i32* %arrayidx1
+ %res1 = load i32, i32* %arrayidx1
br label %end
else:
- %res2 = load i32* %arrayidx1
+ %res2 = load i32, i32* %arrayidx1
br label %end
end:
%tmp = phi i32 [%res1, %then], [%res2, %else]
%res = add i32 %tmp, %add1
%addr = inttoptr i32 %res to i32*
- %final = load i32* %addr
+ %final = load i32, i32* %addr
ret i32 %final
}
@@ -377,10 +375,10 @@ end:
; CHECK-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add i64 [[BASE]], [[PROMOTED_CONV]]
; CHECK-NEXT: [[ADDR:%[a-zA-Z_0-9-]+]] = add i64 [[ADD]], 7
; CHECK-NEXT: [[CAST:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[ADDR]] to i8*
-; CHECK-NEXT: load i8* [[CAST]], align 1
+; CHECK-NEXT: load i8, i8* [[CAST]], align 1
define signext i16 @fn3(%struct.dns_packet* nocapture readonly %P) {
entry:
- %tmp = getelementptr inbounds %struct.dns_packet* %P, i64 0, i32 2
+ %tmp = getelementptr inbounds %struct.dns_packet, %struct.dns_packet* %P, i64 0, i32 2
%data.i.i = bitcast %union.anon* %tmp to [0 x i8]*
br label %while.body.i.i
@@ -388,8 +386,8 @@ while.body.i.i: ; preds = %while.body.i.i, %en
%src.addr.0.i.i = phi i16 [ 0, %entry ], [ %inc.i.i, %while.body.i.i ]
%inc.i.i = add i16 %src.addr.0.i.i, 1
%idxprom.i.i = sext i16 %src.addr.0.i.i to i64
- %arrayidx.i.i = getelementptr inbounds [0 x i8]* %data.i.i, i64 0, i64 %idxprom.i.i
- %tmp1 = load i8* %arrayidx.i.i, align 1
+ %arrayidx.i.i = getelementptr inbounds [0 x i8], [0 x i8]* %data.i.i, i64 0, i64 %idxprom.i.i
+ %tmp1 = load i8, i8* %arrayidx.i.i, align 1
%conv2.i.i = zext i8 %tmp1 to i32
%and.i.i = and i32 %conv2.i.i, 15
store i32 %and.i.i, i32* @a, align 4
@@ -401,8 +399,8 @@ fn1.exit.i: ; preds = %while.body.i.i
%conv.i = zext i16 %inc.i.i.lcssa to i32
%sub.i = add nsw i32 %conv.i, -1
%idxprom.i = sext i32 %sub.i to i64
- %arrayidx.i = getelementptr inbounds [0 x i8]* %data.i.i, i64 0, i64 %idxprom.i
- %tmp2 = load i8* %arrayidx.i, align 1
+ %arrayidx.i = getelementptr inbounds [0 x i8], [0 x i8]* %data.i.i, i64 0, i64 %idxprom.i
+ %tmp2 = load i8, i8* %arrayidx.i, align 1
%conv2.i = sext i8 %tmp2 to i16
store i16 %conv2.i, i16* @b, align 2
%sub4.i = sub nsw i32 0, %conv.i
@@ -411,8 +409,8 @@ fn1.exit.i: ; preds = %while.body.i.i
br i1 %cmp.i, label %if.then.i, label %fn2.exit
if.then.i: ; preds = %fn1.exit.i
- %end.i = getelementptr inbounds %struct.dns_packet* %P, i64 0, i32 1
- %tmp3 = load i32* %end.i, align 4
+ %end.i = getelementptr inbounds %struct.dns_packet, %struct.dns_packet* %P, i64 0, i32 1
+ %tmp3 = load i32, i32* %end.i, align 4
%sub7.i = add i32 %tmp3, 65535
%conv8.i = trunc i32 %sub7.i to i16
br label %fn2.exit
@@ -433,7 +431,7 @@ define i8 @noPromotionFlag(i32 %arg1, i32 %arg2) {
%add = add nsw i32 %arg1, %arg2
%zextadd = zext i32 %add to i64
%base = inttoptr i64 %zextadd to i8*
- %res = load i8* %base
+ %res = load i8, i8* %base
ret i8 %res
}
@@ -448,7 +446,7 @@ define i8 @twoArgsPromotionZExt(i32 %arg1, i32 %arg2) {
%add = add nuw i32 %arg1, %arg2
%zextadd = zext i32 %add to i64
%base = inttoptr i64 %zextadd to i8*
- %res = load i8* %base
+ %res = load i8, i8* %base
ret i8 %res
}
@@ -456,13 +454,13 @@ define i8 @twoArgsPromotionZExt(i32 %arg1, i32 %arg2) {
; CHECK-LABEL: @oneArgPromotionNegativeCstZExt
; CHECK: [[ARG1ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 %arg1 to i64
; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ARG1ZEXT]], 255
-; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
+; CHECK: getelementptr inbounds i8, i8* %base, i64 [[PROMOTED]]
; CHECK: ret
define i8 @oneArgPromotionNegativeCstZExt(i8 %arg1, i8* %base) {
%add = add nuw i8 %arg1, -1
%zextadd = zext i8 %add to i64
- %arrayidx = getelementptr inbounds i8* %base, i64 %zextadd
- %res = load i8* %arrayidx
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %zextadd
+ %res = load i8, i8* %arrayidx
ret i8 %res
}
@@ -470,14 +468,14 @@ define i8 @oneArgPromotionNegativeCstZExt(i8 %arg1, i8* %base) {
; CHECK-LABEL: @oneArgPromotionZExtZExt
; CHECK: [[ARG1ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 %arg1 to i64
; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ARG1ZEXT]], 1
-; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
+; CHECK: getelementptr inbounds i8, i8* %base, i64 [[PROMOTED]]
; CHECK: ret
define i8 @oneArgPromotionZExtZExt(i8 %arg1, i8* %base) {
%zext = zext i8 %arg1 to i32
%add = add nuw i32 %zext, 1
%zextadd = zext i32 %add to i64
- %arrayidx = getelementptr inbounds i8* %base, i64 %zextadd
- %res = load i8* %arrayidx
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %zextadd
+ %res = load i8, i8* %arrayidx
ret i8 %res
}
@@ -488,15 +486,15 @@ define i8 @oneArgPromotionZExtZExt(i8 %arg1, i8* %base) {
; CHECK: [[ARG1TRUNC:%[a-zA-Z_0-9-]+]] = trunc i32 [[ARG1SEXT]] to i8
; CHECK: [[ARG1ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[ARG1TRUNC]] to i64
; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ARG1ZEXT]], 1
-; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
+; CHECK: getelementptr inbounds i8, i8* %base, i64 [[PROMOTED]]
; CHECK: ret
define i8 @oneArgPromotionBlockTruncZExt(i1 %arg1, i8* %base) {
%sextarg1 = sext i1 %arg1 to i32
%trunc = trunc i32 %sextarg1 to i8
%add = add nuw i8 %trunc, 1
%zextadd = zext i8 %add to i64
- %arrayidx = getelementptr inbounds i8* %base, i64 %zextadd
- %res = load i8* %arrayidx
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %zextadd
+ %res = load i8, i8* %arrayidx
ret i8 %res
}
@@ -505,15 +503,15 @@ define i8 @oneArgPromotionBlockTruncZExt(i1 %arg1, i8* %base) {
; CHECK-LABEL: @oneArgPromotionPassTruncZExt
; CHECK: [[ARG1ZEXT:%[a-zA-Z_0-9-]+]] = zext i1 %arg1 to i64
; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ARG1ZEXT]], 1
-; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
+; CHECK: getelementptr inbounds i8, i8* %base, i64 [[PROMOTED]]
; CHECK: ret
define i8 @oneArgPromotionPassTruncZExt(i1 %arg1, i8* %base) {
%sextarg1 = zext i1 %arg1 to i32
%trunc = trunc i32 %sextarg1 to i8
%add = add nuw i8 %trunc, 1
%zextadd = zext i8 %add to i64
- %arrayidx = getelementptr inbounds i8* %base, i64 %zextadd
- %res = load i8* %arrayidx
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %zextadd
+ %res = load i8, i8* %arrayidx
ret i8 %res
}
@@ -522,13 +520,13 @@ define i8 @oneArgPromotionPassTruncZExt(i1 %arg1, i8* %base) {
; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i1 %arg1 to i8
; CHECK: [[ARG1ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[ARG1SEXT]] to i64
; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ARG1ZEXT]], 1
-; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
+; CHECK: getelementptr inbounds i8, i8* %base, i64 [[PROMOTED]]
; CHECK: ret
define i8 @oneArgPromotionBlockSExtZExt(i1 %arg1, i8* %base) {
%sextarg1 = sext i1 %arg1 to i8
%add = add nuw i8 %sextarg1, 1
%zextadd = zext i8 %add to i64
- %arrayidx = getelementptr inbounds i8* %base, i64 %zextadd
- %res = load i8* %arrayidx
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %zextadd
+ %res = load i8, i8* %arrayidx
ret i8 %res
}
diff --git a/test/CodeGen/X86/codegen-prepare-cast.ll b/test/CodeGen/X86/codegen-prepare-cast.ll
index 59c513385f76..1ab8017e8858 100644
--- a/test/CodeGen/X86/codegen-prepare-cast.ll
+++ b/test/CodeGen/X86/codegen-prepare-cast.ll
@@ -10,16 +10,16 @@ target triple = "x86_64-unknown-linux-gnu"
@.str = external constant [7 x i8] ; <[7 x i8]*> [#uses=1]
; CHECK-LABEL: @_Dmain
-; CHECK: load i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0)
+; CHECK: load i8, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0)
; CHECK ret
define fastcc i32 @_Dmain(%"char[][]" %unnamed) {
entry:
- %tmp = getelementptr [7 x i8]* @.str, i32 0, i32 0 ; <i8*> [#uses=1]
+ %tmp = getelementptr [7 x i8], [7 x i8]* @.str, i32 0, i32 0 ; <i8*> [#uses=1]
br i1 undef, label %foreachbody, label %foreachend
foreachbody: ; preds = %entry
- %tmp4 = getelementptr i8* %tmp, i32 undef ; <i8*> [#uses=1]
- %tmp5 = load i8* %tmp4 ; <i8> [#uses=0]
+ %tmp4 = getelementptr i8, i8* %tmp, i32 undef ; <i8*> [#uses=1]
+ %tmp5 = load i8, i8* %tmp4 ; <i8> [#uses=0]
unreachable
foreachend: ; preds = %entry
diff --git a/test/CodeGen/X86/codegen-prepare-crash.ll b/test/CodeGen/X86/codegen-prepare-crash.ll
index c3288170cc4f..01bcad96d93f 100644
--- a/test/CodeGen/X86/codegen-prepare-crash.ll
+++ b/test/CodeGen/X86/codegen-prepare-crash.ll
@@ -8,7 +8,7 @@ define void @f(i32 %u) {
br label %P.Proc8.exit
P.Proc8.exit:
- %valueindex35.i = getelementptr [10 x i32]* @g, i32 0, i32 %1
+ %valueindex35.i = getelementptr [10 x i32], [10 x i32]* @g, i32 0, i32 %1
store i32 %u, i32* %valueindex35.i
ret void
}
diff --git a/test/CodeGen/X86/codegen-prepare-extload.ll b/test/CodeGen/X86/codegen-prepare-extload.ll
index 9b27c33a80f9..65502b312b04 100644
--- a/test/CodeGen/X86/codegen-prepare-extload.ll
+++ b/test/CodeGen/X86/codegen-prepare-extload.ll
@@ -12,13 +12,13 @@
; CHECK: movsbl ({{%rdi|%rcx}}), %eax
;
; OPTALL-LABEL: @foo
-; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
; OPTALL-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
; OPTALL: store i32 [[ZEXT]], i32* %q
; OPTALL: ret
define void @foo(i8* %p, i32* %q) {
entry:
- %t = load i8* %p
+ %t = load i8, i8* %p
%a = icmp slt i8 %t, 20
br i1 %a, label %true, label %false
true:
@@ -32,7 +32,7 @@ false:
; Check that we manage to form a zextload is an operation with only one
; argument to explicitly extend is in the the way.
; OPTALL-LABEL: @promoteOneArg
-; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT]], 2
; Make sure the operation is not promoted when the promotion pass is disabled.
@@ -42,7 +42,7 @@ false:
; OPTALL: ret
define void @promoteOneArg(i8* %p, i32* %q) {
entry:
- %t = load i8* %p
+ %t = load i8, i8* %p
%add = add nuw i8 %t, 2
%a = icmp slt i8 %t, 20
br i1 %a, label %true, label %false
@@ -58,7 +58,7 @@ false:
; argument to explicitly extend is in the the way.
; Version with sext.
; OPTALL-LABEL: @promoteOneArgSExt
-; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXT]], 2
; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], 2
@@ -67,7 +67,7 @@ false:
; OPTALL: ret
define void @promoteOneArgSExt(i8* %p, i32* %q) {
entry:
- %t = load i8* %p
+ %t = load i8, i8* %p
%add = add nsw i8 %t, 2
%a = icmp slt i8 %t, 20
br i1 %a, label %true, label %false
@@ -90,7 +90,7 @@ false:
; transformation, the regular heuristic does not apply the optimization.
;
; OPTALL-LABEL: @promoteTwoArgZext
-; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
;
; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
@@ -106,7 +106,7 @@ false:
; OPTALL: ret
define void @promoteTwoArgZext(i8* %p, i32* %q, i8 %b) {
entry:
- %t = load i8* %p
+ %t = load i8, i8* %p
%add = add nuw i8 %t, %b
%a = icmp slt i8 %t, 20
br i1 %a, label %true, label %false
@@ -122,7 +122,7 @@ false:
; arguments to explicitly extend is in the the way.
; Version with sext.
; OPTALL-LABEL: @promoteTwoArgSExt
-; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
;
; STRESS-NEXT: [[SEXTLD:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i8 %b to i32
@@ -137,7 +137,7 @@ false:
; OPTALL: ret
define void @promoteTwoArgSExt(i8* %p, i32* %q, i8 %b) {
entry:
- %t = load i8* %p
+ %t = load i8, i8* %p
%add = add nsw i8 %t, %b
%a = icmp slt i8 %t, 20
br i1 %a, label %true, label %false
@@ -152,7 +152,7 @@ false:
; Check that we do not a zextload if we need to introduce more than
; one additional extension.
; OPTALL-LABEL: @promoteThreeArgZext
-; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
;
; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
@@ -172,7 +172,7 @@ false:
; OPTALL: ret
define void @promoteThreeArgZext(i8* %p, i32* %q, i8 %b, i8 %c) {
entry:
- %t = load i8* %p
+ %t = load i8, i8* %p
%tmp = add nuw i8 %t, %b
%add = add nuw i8 %tmp, %c
%a = icmp slt i8 %t, 20
@@ -188,7 +188,7 @@ false:
; Check that we manage to form a zextload after promoting and merging
; two extensions.
; OPTALL-LABEL: @promoteMergeExtArgZExt
-; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
;
; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i16 %b to i32
@@ -206,7 +206,7 @@ false:
; OPTALL: ret
define void @promoteMergeExtArgZExt(i8* %p, i32* %q, i16 %b) {
entry:
- %t = load i8* %p
+ %t = load i8, i8* %p
%ext = zext i8 %t to i16
%add = add nuw i16 %ext, %b
%a = icmp slt i8 %t, 20
@@ -223,7 +223,7 @@ false:
; two extensions.
; Version with sext.
; OPTALL-LABEL: @promoteMergeExtArgSExt
-; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
;
; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = sext i16 %b to i32
@@ -240,7 +240,7 @@ false:
; OPTALL: ret
define void @promoteMergeExtArgSExt(i8* %p, i32* %q, i16 %b) {
entry:
- %t = load i8* %p
+ %t = load i8, i8* %p
%ext = zext i8 %t to i16
%add = add nsw i16 %ext, %b
%a = icmp slt i8 %t, 20
@@ -284,11 +284,11 @@ false:
; 3 identical zext of %ld. The extensions will be CSE'ed by SDag.
;
; OPTALL-LABEL: @severalPromotions
-; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %addr1
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %addr1
; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
-; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32* %addr2
+; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32, i32* %addr2
; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64
; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_1]]
; We do not combine this one: see 2.b.
@@ -308,9 +308,9 @@ false:
; OPTALL: call void @dummy(i64 [[RES]], i64 [[RESZA]], i64 [[RESB]])
; OPTALL: ret
define void @severalPromotions(i8* %addr1, i32* %addr2, i8 %a, i32 %b) {
- %ld = load i8* %addr1
+ %ld = load i8, i8* %addr1
%zextld = zext i8 %ld to i32
- %ld2 = load i32* %addr2
+ %ld2 = load i32, i32* %addr2
%add = add nsw i32 %ld2, %zextld
%sextadd = sext i32 %add to i64
%zexta = zext i8 %a to i32
@@ -345,19 +345,50 @@ entry:
; to an instruction.
; This used to cause a crash.
; OPTALL-LABEL: @promotionOfArgEndsUpInValue
-; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i16* %addr
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i16, i16* %addr
; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i16 [[LD]] to i32
-; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw nsw i32 [[SEXT]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32]* @c, i64 0, i64 1), i32* @a) to i32)
+; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw nsw i32 [[SEXT]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i32)
;
-; DISABLE-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw nsw i16 [[LD]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
+; DISABLE-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw nsw i16 [[LD]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
;
; OPTALL-NEXT: ret i32 [[RES]]
define i32 @promotionOfArgEndsUpInValue(i16* %addr) {
entry:
- %val = load i16* %addr
- %add = add nuw nsw i16 %val, zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
+ %val = load i16, i16* %addr
+ %add = add nuw nsw i16 %val, zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
%conv3 = sext i16 %add to i32
ret i32 %conv3
}
+
+; Check that we see that one zext can be derived from the other for free.
+; OPTALL-LABEL: @promoteTwoArgZextWithSourceExtendedTwice
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
+
+; OPT-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; OPT-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
+; OPT-NEXT: [[RES64:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ZEXT64]], 12
+; OPT-NEXT: store i32 [[RES32]], i32* %addr
+; OPT-NEXT: store i64 [[RES64]], i64* %q
+;
+; DISABLE-NEXT: [[ZEXT32:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; DISABLE-NEXT: [[RES32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], %b
+; DISABLE-NEXT: [[RES2_32:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT32]], 12
+; DISABLE-NEXT: store i32 [[RES32]], i32* %addr
+; DISABLE-NEXT: [[ZEXT64:%[a-zA-Z_0-9-]+]] = zext i32 [[RES2_32]] to i64
+; DISABLE-NEXT: store i64 [[ZEXT64]], i64* %q
+;
+; OPTALL-NEXT: ret void
+define void @promoteTwoArgZextWithSourceExtendedTwice(i8* %p, i64* %q, i32 %b, i32* %addr) {
+entry:
+ %t = load i8, i8* %p
+ %zextt = zext i8 %t to i32
+ %add = add nuw i32 %zextt, %b
+ %add2 = add nuw i32 %zextt, 12
+ store i32 %add, i32 *%addr
+ %s = zext i32 %add2 to i64
+ store i64 %s, i64* %q
+ ret void
+}
diff --git a/test/CodeGen/X86/codegen-prepare.ll b/test/CodeGen/X86/codegen-prepare.ll
index 4ff0f1c0ba24..e58bc22ef142 100644
--- a/test/CodeGen/X86/codegen-prepare.ll
+++ b/test/CodeGen/X86/codegen-prepare.ll
@@ -19,16 +19,16 @@
define linkonce_odr void @foo(%class.A* nocapture %this, i32 %BoolValue) nounwind uwtable {
entry:
%cmp = icmp eq i32 %BoolValue, 0
- %address1 = getelementptr inbounds %class.A* %this, i64 0, i32 0, i32 3
- %address2 = getelementptr inbounds %class.A* %this, i64 0, i32 0, i32 1
+ %address1 = getelementptr inbounds %class.A, %class.A* %this, i64 0, i32 0, i32 3
+ %address2 = getelementptr inbounds %class.A, %class.A* %this, i64 0, i32 0, i32 1
br i1 %cmp, label %if.else, label %if.then
if.then: ; preds = %entry
- %0 = getelementptr inbounds %class.D* %address2, i64 0, i32 0, i64 0, i32 0
- %1 = load float* %0, align 4
- %2 = getelementptr inbounds float* %0, i64 3
- %3 = load float* %2, align 4
- %4 = getelementptr inbounds %class.D* %address1, i64 0, i32 0, i64 0, i32 0
+ %0 = getelementptr inbounds %class.D, %class.D* %address2, i64 0, i32 0, i64 0, i32 0
+ %1 = load float, float* %0, align 4
+ %2 = getelementptr inbounds float, float* %0, i64 3
+ %3 = load float, float* %2, align 4
+ %4 = getelementptr inbounds %class.D, %class.D* %address1, i64 0, i32 0, i64 0, i32 0
store float %1, float* %4, align 4
br label %if.end
diff --git a/test/CodeGen/X86/codemodel.ll b/test/CodeGen/X86/codemodel.ll
index 3aebc13f8740..edea63260270 100644
--- a/test/CodeGen/X86/codemodel.ll
+++ b/test/CodeGen/X86/codemodel.ll
@@ -11,7 +11,7 @@ entry:
; CHECK-SMALL: movl data(%rip), %eax
; CHECK-KERNEL-LABEL: foo:
; CHECK-KERNEL: movl data, %eax
- %0 = load i32* getelementptr ([0 x i32]* @data, i64 0, i64 0), align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @data, i64 0, i64 0), align 4 ; <i32> [#uses=1]
ret i32 %0
}
@@ -21,7 +21,7 @@ entry:
; CHECK-SMALL: movl data+40(%rip), %eax
; CHECK-KERNEL-LABEL: foo2:
; CHECK-KERNEL: movl data+40, %eax
- %0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 10), align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @data, i32 0, i64 10), align 4 ; <i32> [#uses=1]
ret i32 %0
}
@@ -31,7 +31,7 @@ entry:
; CHECK-SMALL: movl data-40(%rip), %eax
; CHECK-KERNEL-LABEL: foo3:
; CHECK-KERNEL: movq $-40, %rax
- %0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 -10), align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @data, i32 0, i64 -10), align 4 ; <i32> [#uses=1]
ret i32 %0
}
@@ -43,7 +43,7 @@ entry:
; CHECK-SMALL: movl data(%rax), %eax
; CHECK-KERNEL-LABEL: foo4:
; CHECK-KERNEL: movl data+16777216, %eax
- %0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 4194304), align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @data, i32 0, i64 4194304), align 4 ; <i32> [#uses=1]
ret i32 %0
}
@@ -53,7 +53,7 @@ entry:
; CHECK-SMALL: movl data+16777212(%rip), %eax
; CHECK-KERNEL-LABEL: foo1:
; CHECK-KERNEL: movl data+16777212, %eax
- %0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 4194303), align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @data, i32 0, i64 4194303), align 4 ; <i32> [#uses=1]
ret i32 %0
}
define i32 @foo5() nounwind readonly {
@@ -62,6 +62,6 @@ entry:
; CHECK-SMALL: movl data-16777216(%rip), %eax
; CHECK-KERNEL-LABEL: foo5:
; CHECK-KERNEL: movq $-16777216, %rax
- %0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 -4194304), align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @data, i32 0, i64 -4194304), align 4 ; <i32> [#uses=1]
ret i32 %0
}
diff --git a/test/CodeGen/X86/coff-comdat.ll b/test/CodeGen/X86/coff-comdat.ll
index 44e1cb236e91..18f418959ec9 100644
--- a/test/CodeGen/X86/coff-comdat.ll
+++ b/test/CodeGen/X86/coff-comdat.ll
@@ -53,7 +53,7 @@ define x86_fastcallcc void @f8() comdat($f8) {
$vftable = comdat largest
@some_name = private unnamed_addr constant [2 x i8*] zeroinitializer, comdat($vftable)
-@vftable = alias getelementptr([2 x i8*]* @some_name, i32 0, i32 1)
+@vftable = alias getelementptr([2 x i8*], [2 x i8*]* @some_name, i32 0, i32 1)
; CHECK: .section .text,"xr",discard,_f1
; CHECK: .globl _f1
diff --git a/test/CodeGen/X86/combine-and.ll b/test/CodeGen/X86/combine-and.ll
index dace806b4bb9..bb46ac539171 100644
--- a/test/CodeGen/X86/combine-and.ll
+++ b/test/CodeGen/X86/combine-and.ll
@@ -6,159 +6,173 @@
define <4 x i32> @test1(<4 x i32> %A) {
+; CHECK-LABEL: test1:
+; CHECK: # BB#0:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 0>
ret <4 x i32> %1
}
-; CHECK-LABEL: test1
-; CHECK: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; CHECK-NEXT: retq
-
define <4 x i32> @test2(<4 x i32> %A) {
+; CHECK-LABEL: test2:
+; CHECK: # BB#0:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
+; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 0>
ret <4 x i32> %1
}
-; CHECK-LABEL: test2
-; CHECK: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
-; CHECK-NEXT: retq
-
define <4 x i32> @test3(<4 x i32> %A) {
+; CHECK-LABEL: test3:
+; CHECK: # BB#0:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
+; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 0, i32 -1, i32 0>
ret <4 x i32> %1
}
-; CHECK-LABEL: test3
-; CHECK: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
-; CHECK-NEXT: retq
-
define <4 x i32> @test4(<4 x i32> %A) {
+; CHECK-LABEL: test4:
+; CHECK: # BB#0:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
+; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 0, i32 0, i32 -1>
ret <4 x i32> %1
}
-; CHECK-LABEL: test4
-; CHECK: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
-; CHECK-NEXT: retq
-
define <4 x i32> @test5(<4 x i32> %A) {
+; CHECK-LABEL: test5:
+; CHECK: # BB#0:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 0>
ret <4 x i32> %1
}
-; CHECK-LABEL: test5
-; CHECK: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; CHECK-NEXT: retq
-
define <4 x i32> @test6(<4 x i32> %A) {
+; CHECK-LABEL: test6:
+; CHECK: # BB#0:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
+; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 -1>
ret <4 x i32> %1
}
-; CHECK-LABEL: test6
-; CHECK: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
-; CHECK-NEXT: retq
-
define <4 x i32> @test7(<4 x i32> %A) {
+; CHECK-LABEL: test7:
+; CHECK: # BB#0:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 0, i32 -1, i32 -1>
ret <4 x i32> %1
}
-; CHECK-LABEL: test7
-; CHECK: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
-; CHECK-NEXT: retq
-
define <4 x i32> @test8(<4 x i32> %A) {
+; CHECK-LABEL: test8:
+; CHECK: # BB#0:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
+; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 -1>
ret <4 x i32> %1
}
-; CHECK-LABEL: test8
-; CHECK: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
-; CHECK-NEXT: retq
-
define <4 x i32> @test9(<4 x i32> %A) {
+; CHECK-LABEL: test9:
+; CHECK: # BB#0:
+; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 0, i32 0>
ret <4 x i32> %1
}
-; CHECK-LABEL: test9
-; CHECK: movq %xmm0, %xmm0
-; CHECK-NEXT: retq
-
define <4 x i32> @test10(<4 x i32> %A) {
+; CHECK-LABEL: test10:
+; CHECK: # BB#0:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
+; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 0>
ret <4 x i32> %1
}
-; CHECK-LABEL: test10
-; CHECK: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
-; CHECK-NEXT: retq
-
define <4 x i32> @test11(<4 x i32> %A) {
+; CHECK-LABEL: test11:
+; CHECK: # BB#0:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
+; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 -1>
ret <4 x i32> %1
}
-; CHECK-LABEL: test11
-; CHECK: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
-; CHECK-NEXT: retq
-
define <4 x i32> @test12(<4 x i32> %A) {
+; CHECK-LABEL: test12:
+; CHECK: # BB#0:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
+; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 0>
ret <4 x i32> %1
}
-; CHECK-LABEL: test12
-; CHECK: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
-; CHECK-NEXT: retq
-
define <4 x i32> @test13(<4 x i32> %A) {
+; CHECK-LABEL: test13:
+; CHECK: # BB#0:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
+; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 0, i32 -1>
ret <4 x i32> %1
}
-; CHECK-LABEL: test13
-; CHECK: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
-; CHECK-NEXT: retq
-
define <4 x i32> @test14(<4 x i32> %A) {
+; CHECK-LABEL: test14:
+; CHECK: # BB#0:
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 -1>
ret <4 x i32> %1
}
-; CHECK-LABEL: test14
-; CHECK: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
-; CHECK-NEXT: retq
-
define <4 x i32> @test15(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: test15:
+; CHECK: # BB#0:
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 -1>
%2 = and <4 x i32> %B, <i32 0, i32 -1, i32 0, i32 0>
%3 = or <4 x i32> %1, %2
ret <4 x i32> %3
}
-; CHECK-LABEL: test15
-; CHECK: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
-; CHECK-NEXT: retq
-
define <4 x i32> @test16(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: test16:
+; CHECK: # BB#0:
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 0>
%2 = and <4 x i32> %B, <i32 0, i32 -1, i32 0, i32 -1>
%3 = or <4 x i32> %1, %2
ret <4 x i32> %3
}
-; CHECK-LABEL: test16
-; CHECK: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; CHECK-NEXT: retq
-
define <4 x i32> @test17(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: test17:
+; CHECK: # BB#0:
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
+; CHECK-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 -1>
%2 = and <4 x i32> %B, <i32 -1, i32 0, i32 -1, i32 0>
%3 = or <4 x i32> %1, %2
ret <4 x i32> %3
}
-; CHECK-LABEL: test17
-; CHECK: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
-; CHECK-NEXT: retq
diff --git a/test/CodeGen/X86/combine-or.ll b/test/CodeGen/X86/combine-or.ll
index 280fcbc7a3a7..970f1762c1b8 100644
--- a/test/CodeGen/X86/combine-or.ll
+++ b/test/CodeGen/X86/combine-or.ll
@@ -153,7 +153,8 @@ define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @test13(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test13:
; CHECK: # BB#0:
-; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 1, i32 1, i32 4, i32 4>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
@@ -177,8 +178,9 @@ define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) {
define <4 x i32> @test15(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test15:
; CHECK: # BB#0:
-; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,1],xmm0[2,1]
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,2,1]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,1,2,3]
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 1>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 2, i32 1, i32 4, i32 4>
@@ -206,12 +208,9 @@ define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) {
define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test17:
; CHECK: # BB#0:
-; CHECK-NEXT: xorps %xmm2, %xmm2
-; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,0]
-; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm0[0,2]
-; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
-; CHECK-NEXT: orps %xmm1, %xmm2
-; CHECK-NEXT: movaps %xmm2, %xmm0
+; CHECK-NEXT: psllq $32, %xmm0
+; CHECK-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
+; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
@@ -239,12 +238,12 @@ define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test19:
; CHECK: # BB#0:
-; CHECK-NEXT: xorps %xmm2, %xmm2
-; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm0[0,3]
-; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
-; CHECK-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],zero,xmm1[2,2]
-; CHECK-NEXT: orps %xmm1, %xmm2
-; CHECK-NEXT: movaps %xmm2, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,3]
+; CHECK-NEXT: pxor %xmm3, %xmm3
+; CHECK-NEXT: pblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,2,2]
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: por %xmm2, %xmm0
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 3>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 2, i32 2>
@@ -256,8 +255,8 @@ define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test20:
; CHECK: # BB#0:
-; CHECK-NEXT: orps %xmm1, %xmm0
-; CHECK-NEXT: movq %xmm0, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; CHECK-NEXT: retq
%shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
%shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
@@ -278,6 +277,70 @@ define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
ret <2 x i64> %or
}
+
+; Verify that the dag-combiner keeps the correct domain for float/double vectors
+; bitcast to use the mask-or blend combine.
+
+define <2 x double> @test22(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test22:
+; CHECK: # BB#0:
+; CHECK-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; CHECK-NEXT: retq
+ %bc1 = bitcast <2 x double> %a0 to <2 x i64>
+ %bc2 = bitcast <2 x double> %a1 to <2 x i64>
+ %and1 = and <2 x i64> %bc1, <i64 0, i64 -1>
+ %and2 = and <2 x i64> %bc2, <i64 -1, i64 0>
+ %or = or <2 x i64> %and1, %and2
+ %bc3 = bitcast <2 x i64> %or to <2 x double>
+ ret <2 x double> %bc3
+}
+
+
+define <4 x float> @test23(<4 x float> %a0, <4 x float> %a1) {
+; CHECK-LABEL: test23:
+; CHECK: # BB#0:
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
+; CHECK-NEXT: retq
+ %bc1 = bitcast <4 x float> %a0 to <4 x i32>
+ %bc2 = bitcast <4 x float> %a1 to <4 x i32>
+ %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0>
+ %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1>
+ %or = or <4 x i32> %and1, %and2
+ %bc3 = bitcast <4 x i32> %or to <4 x float>
+ ret <4 x float> %bc3
+}
+
+
+define <4 x float> @test24(<4 x float> %a0, <4 x float> %a1) {
+; CHECK-LABEL: test24:
+; CHECK: # BB#0:
+; CHECK-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; CHECK-NEXT: retq
+ %bc1 = bitcast <4 x float> %a0 to <2 x i64>
+ %bc2 = bitcast <4 x float> %a1 to <2 x i64>
+ %and1 = and <2 x i64> %bc1, <i64 0, i64 -1>
+ %and2 = and <2 x i64> %bc2, <i64 -1, i64 0>
+ %or = or <2 x i64> %and1, %and2
+ %bc3 = bitcast <2 x i64> %or to <4 x float>
+ ret <4 x float> %bc3
+}
+
+
+define <4 x float> @test25(<4 x float> %a0) {
+; CHECK-LABEL: test25:
+; CHECK: # BB#0:
+; CHECK-NEXT: blendps {{.*#+}} xmm0 = mem[0],xmm0[1,2],mem[3]
+; CHECK-NEXT: retq
+ %bc1 = bitcast <4 x float> %a0 to <4 x i32>
+ %bc2 = bitcast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0> to <4 x i32>
+ %and1 = and <4 x i32> %bc1, <i32 0, i32 -1, i32 -1, i32 0>
+ %and2 = and <4 x i32> %bc2, <i32 -1, i32 0, i32 0, i32 -1>
+ %or = or <4 x i32> %and1, %and2
+ %bc3 = bitcast <4 x i32> %or to <4 x float>
+ ret <4 x float> %bc3
+}
+
+
; Verify that the DAGCombiner doesn't crash in the attempt to check if a shuffle
; with illegal type has a legal mask. Method 'isShuffleMaskLegal' only knows how to
; handle legal vector value types.
diff --git a/test/CodeGen/X86/combiner-aa-0.ll b/test/CodeGen/X86/combiner-aa-0.ll
index a61ef7acd13c..403059d90ab1 100644
--- a/test/CodeGen/X86/combiner-aa-0.ll
+++ b/test/CodeGen/X86/combiner-aa-0.ll
@@ -5,14 +5,14 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
@g_flipV_hashkey = external global %struct.Hash_Key, align 16 ; <%struct.Hash_Key*> [#uses=1]
define void @foo() nounwind {
- %t0 = load i32* undef, align 16 ; <i32> [#uses=1]
- %t1 = load i32* null, align 4 ; <i32> [#uses=1]
+ %t0 = load i32, i32* undef, align 16 ; <i32> [#uses=1]
+ %t1 = load i32, i32* null, align 4 ; <i32> [#uses=1]
%t2 = srem i32 %t0, 32 ; <i32> [#uses=1]
%t3 = shl i32 1, %t2 ; <i32> [#uses=1]
%t4 = xor i32 %t3, %t1 ; <i32> [#uses=1]
store i32 %t4, i32* null, align 4
- %t5 = getelementptr %struct.Hash_Key* @g_flipV_hashkey, i64 0, i32 0, i64 0 ; <i32*> [#uses=2]
- %t6 = load i32* %t5, align 4 ; <i32> [#uses=1]
+ %t5 = getelementptr %struct.Hash_Key, %struct.Hash_Key* @g_flipV_hashkey, i64 0, i32 0, i64 0 ; <i32*> [#uses=2]
+ %t6 = load i32, i32* %t5, align 4 ; <i32> [#uses=1]
%t7 = shl i32 1, undef ; <i32> [#uses=1]
%t8 = xor i32 %t7, %t6 ; <i32> [#uses=1]
store i32 %t8, i32* %t5, align 4
diff --git a/test/CodeGen/X86/combiner-aa-1.ll b/test/CodeGen/X86/combiner-aa-1.ll
index 58a7129b6005..cc3e5ca12602 100644
--- a/test/CodeGen/X86/combiner-aa-1.ll
+++ b/test/CodeGen/X86/combiner-aa-1.ll
@@ -13,9 +13,9 @@ target triple = "i386-pc-linux-gnu"
define i32 @._ZN8lam_node18resolve_name_clashEP8arg_nodeP9alst_node._ZNK8lam_nodeeqERK8exp_node._ZN11arglst_nodeD0Ev(%struct.lam_node* %this.this, %struct.arg_node* %outer_arg, %struct.alst_node* %env.cmp, %struct.arglst_node* %this, i32 %functionID) {
comb_entry:
%.SV59 = alloca %struct.node* ; <%struct.node**> [#uses=1]
- %0 = load i32 (...)*** null, align 4 ; <i32 (...)**> [#uses=1]
- %1 = getelementptr inbounds i32 (...)** %0, i32 3 ; <i32 (...)**> [#uses=1]
- %2 = load i32 (...)** %1, align 4 ; <i32 (...)*> [#uses=1]
+ %0 = load i32 (...)**, i32 (...)*** null, align 4 ; <i32 (...)**> [#uses=1]
+ %1 = getelementptr inbounds i32 (...)*, i32 (...)** %0, i32 3 ; <i32 (...)**> [#uses=1]
+ %2 = load i32 (...)*, i32 (...)** %1, align 4 ; <i32 (...)*> [#uses=1]
store %struct.node* undef, %struct.node** %.SV59
%3 = bitcast i32 (...)* %2 to i32 (%struct.node*)* ; <i32 (%struct.node*)*> [#uses=1]
%4 = tail call i32 %3(%struct.node* undef) ; <i32> [#uses=0]
diff --git a/test/CodeGen/X86/commute-blend-avx2.ll b/test/CodeGen/X86/commute-blend-avx2.ll
index d06c6dad8dbf..bd497ba40767 100644
--- a/test/CodeGen/X86/commute-blend-avx2.ll
+++ b/test/CodeGen/X86/commute-blend-avx2.ll
@@ -1,7 +1,7 @@
; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=avx2 < %s | FileCheck %s
define <8 x i16> @commute_fold_vpblendw_128(<8 x i16> %a, <8 x i16>* %b) #0 {
- %1 = load <8 x i16>* %b
+ %1 = load <8 x i16>, <8 x i16>* %b
%2 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %1, <8 x i16> %a, i8 17)
ret <8 x i16> %2
@@ -12,7 +12,7 @@ define <8 x i16> @commute_fold_vpblendw_128(<8 x i16> %a, <8 x i16>* %b) #0 {
declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
define <16 x i16> @commute_fold_vpblendw_256(<16 x i16> %a, <16 x i16>* %b) #0 {
- %1 = load <16 x i16>* %b
+ %1 = load <16 x i16>, <16 x i16>* %b
%2 = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %1, <16 x i16> %a, i8 17)
ret <16 x i16> %2
@@ -23,7 +23,7 @@ define <16 x i16> @commute_fold_vpblendw_256(<16 x i16> %a, <16 x i16>* %b) #0 {
declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone
define <4 x i32> @commute_fold_vpblendd_128(<4 x i32> %a, <4 x i32>* %b) #0 {
- %1 = load <4 x i32>* %b
+ %1 = load <4 x i32>, <4 x i32>* %b
%2 = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %1, <4 x i32> %a, i8 1)
ret <4 x i32> %2
@@ -34,7 +34,7 @@ define <4 x i32> @commute_fold_vpblendd_128(<4 x i32> %a, <4 x i32>* %b) #0 {
declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i8) nounwind readnone
define <8 x i32> @commute_fold_vpblendd_256(<8 x i32> %a, <8 x i32>* %b) #0 {
- %1 = load <8 x i32>* %b
+ %1 = load <8 x i32>, <8 x i32>* %b
%2 = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %1, <8 x i32> %a, i8 129)
ret <8 x i32> %2
@@ -45,18 +45,18 @@ define <8 x i32> @commute_fold_vpblendd_256(<8 x i32> %a, <8 x i32>* %b) #0 {
declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
define <4 x float> @commute_fold_vblendps_128(<4 x float> %a, <4 x float>* %b) #0 {
- %1 = load <4 x float>* %b
- %2 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %1, <4 x float> %a, i8 3)
+ %1 = load <4 x float>, <4 x float>* %b
+ %2 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %1, <4 x float> %a, i8 5)
ret <4 x float> %2
;LABEL: commute_fold_vblendps_128
- ;CHECK: vblendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
+ ;CHECK: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3]
;CHECK-NEXT: retq
}
declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
define <8 x float> @commute_fold_vblendps_256(<8 x float> %a, <8 x float>* %b) #0 {
- %1 = load <8 x float>* %b
+ %1 = load <8 x float>, <8 x float>* %b
%2 = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %1, <8 x float> %a, i8 7)
ret <8 x float> %2
@@ -67,7 +67,7 @@ define <8 x float> @commute_fold_vblendps_256(<8 x float> %a, <8 x float>* %b) #
declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
define <2 x double> @commute_fold_vblendpd_128(<2 x double> %a, <2 x double>* %b) #0 {
- %1 = load <2 x double>* %b
+ %1 = load <2 x double>, <2 x double>* %b
%2 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %1, <2 x double> %a, i8 1)
ret <2 x double> %2
@@ -78,7 +78,7 @@ define <2 x double> @commute_fold_vblendpd_128(<2 x double> %a, <2 x double>* %b
declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
define <4 x double> @commute_fold_vblendpd_256(<4 x double> %a, <4 x double>* %b) #0 {
- %1 = load <4 x double>* %b
+ %1 = load <4 x double>, <4 x double>* %b
%2 = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %1, <4 x double> %a, i8 7)
ret <4 x double> %2
diff --git a/test/CodeGen/X86/commute-blend-sse41.ll b/test/CodeGen/X86/commute-blend-sse41.ll
index 59fef8c3a29f..8cebcdb8eeae 100644
--- a/test/CodeGen/X86/commute-blend-sse41.ll
+++ b/test/CodeGen/X86/commute-blend-sse41.ll
@@ -1,7 +1,7 @@
; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=corei7 < %s | FileCheck %s
define <8 x i16> @commute_fold_pblendw(<8 x i16> %a, <8 x i16>* %b) #0 {
- %1 = load <8 x i16>* %b
+ %1 = load <8 x i16>, <8 x i16>* %b
%2 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %1, <8 x i16> %a, i8 17)
ret <8 x i16> %2
@@ -12,18 +12,18 @@ define <8 x i16> @commute_fold_pblendw(<8 x i16> %a, <8 x i16>* %b) #0 {
declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
define <4 x float> @commute_fold_blendps(<4 x float> %a, <4 x float>* %b) #0 {
- %1 = load <4 x float>* %b
- %2 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %1, <4 x float> %a, i8 3)
+ %1 = load <4 x float>, <4 x float>* %b
+ %2 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %1, <4 x float> %a, i8 5)
ret <4 x float> %2
;LABEL: commute_fold_blendps
- ;CHECK: blendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
+ ;CHECK: blendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3]
;CHECK-NEXT: retq
}
declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
define <2 x double> @commute_fold_blendpd(<2 x double> %a, <2 x double>* %b) #0 {
- %1 = load <2 x double>* %b
+ %1 = load <2 x double>, <2 x double>* %b
%2 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %1, <2 x double> %a, i8 1)
ret <2 x double> %2
diff --git a/test/CodeGen/X86/commute-clmul.ll b/test/CodeGen/X86/commute-clmul.ll
new file mode 100644
index 000000000000..d13911abc864
--- /dev/null
+++ b/test/CodeGen/X86/commute-clmul.ll
@@ -0,0 +1,60 @@
+; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+sse2,+pclmul < %s | FileCheck %s --check-prefix=SSE
+; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+avx2,+pclmul < %s | FileCheck %s --check-prefix=AVX
+
+declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
+
+define <2 x i64> @commute_lq_lq(<2 x i64>* %a0, <2 x i64> %a1) #0 {
+ ;SSE-LABEL: commute_lq_lq
+ ;SSE: pclmulqdq $0, (%rdi), %xmm0
+ ;SSE-NEXT: retq
+
+ ;AVX-LABEL: commute_lq_lq
+ ;AVX: vpclmulqdq $0, (%rdi), %xmm0, %xmm0
+ ;AVX-NEXT: retq
+
+ %1 = load <2 x i64>, <2 x i64>* %a0
+ %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %a1, i8 0)
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @commute_lq_hq(<2 x i64>* %a0, <2 x i64> %a1) #0 {
+ ;SSE-LABEL: commute_lq_hq
+ ;SSE: pclmulqdq $1, (%rdi), %xmm0
+ ;SSE-NEXT: retq
+
+ ;AVX-LABEL: commute_lq_hq
+ ;AVX: vpclmulqdq $1, (%rdi), %xmm0, %xmm0
+ ;AVX-NEXT: retq
+
+ %1 = load <2 x i64>, <2 x i64>* %a0
+ %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %a1, i8 16)
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @commute_hq_lq(<2 x i64>* %a0, <2 x i64> %a1) #0 {
+ ;SSE-LABEL: commute_hq_lq
+ ;SSE: pclmulqdq $16, (%rdi), %xmm0
+ ;SSE-NEXT: retq
+
+ ;AVX-LABEL: commute_hq_lq
+ ;AVX: vpclmulqdq $16, (%rdi), %xmm0, %xmm0
+ ;AVX-NEXT: retq
+
+ %1 = load <2 x i64>, <2 x i64>* %a0
+ %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %a1, i8 1)
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @commute_hq_hq(<2 x i64>* %a0, <2 x i64> %a1) #0 {
+ ;SSE-LABEL: commute_hq_hq
+ ;SSE: pclmulqdq $17, (%rdi), %xmm0
+ ;SSE-NEXT: retq
+
+ ;AVX-LABEL: commute_hq_hq
+ ;AVX: vpclmulqdq $17, (%rdi), %xmm0, %xmm0
+ ;AVX-NEXT: retq
+
+ %1 = load <2 x i64>, <2 x i64>* %a0
+ %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %a1, i8 17)
+ ret <2 x i64> %2
+}
diff --git a/test/CodeGen/X86/commute-fcmp.ll b/test/CodeGen/X86/commute-fcmp.ll
new file mode 100644
index 000000000000..6f43ebe1fcd7
--- /dev/null
+++ b/test/CodeGen/X86/commute-fcmp.ll
@@ -0,0 +1,340 @@
+; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
+; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=x86-64 -mattr=+avx2 < %s | FileCheck %s --check-prefix=AVX
+
+;
+; Float Comparisons
+; Only equal/not-equal/ordered/unordered can be safely commuted
+;
+
+define <4 x i32> @commute_cmpps_eq(<4 x float>* %a0, <4 x float> %a1) #0 {
+ ;SSE-LABEL: commute_cmpps_eq
+ ;SSE: cmpeqps (%rdi), %xmm0
+ ;SSE-NEXT: retq
+
+ ;AVX-LABEL: commute_cmpps_eq
+ ;AVX: vcmpeqps (%rdi), %xmm0, %xmm0
+ ;AVX-NEXT: retq
+
+ %1 = load <4 x float>, <4 x float>* %a0
+ %2 = fcmp oeq <4 x float> %1, %a1
+ %3 = sext <4 x i1> %2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @commute_cmpps_ne(<4 x float>* %a0, <4 x float> %a1) #0 {
+ ;SSE-LABEL: commute_cmpps_ne
+ ;SSE: cmpneqps (%rdi), %xmm0
+ ;SSE-NEXT: retq
+
+ ;AVX-LABEL: commute_cmpps_ne
+ ;AVX: vcmpneqps (%rdi), %xmm0, %xmm0
+ ;AVX-NEXT: retq
+
+ %1 = load <4 x float>, <4 x float>* %a0
+ %2 = fcmp une <4 x float> %1, %a1
+ %3 = sext <4 x i1> %2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @commute_cmpps_ord(<4 x float>* %a0, <4 x float> %a1) #0 {
+ ;SSE-LABEL: commute_cmpps_ord
+ ;SSE: cmpordps (%rdi), %xmm0
+ ;SSE-NEXT: retq
+
+ ;AVX-LABEL: commute_cmpps_ord
+ ;AVX: vcmpordps (%rdi), %xmm0, %xmm0
+ ;AVX-NEXT: retq
+
+ %1 = load <4 x float>, <4 x float>* %a0
+ %2 = fcmp ord <4 x float> %1, %a1
+ %3 = sext <4 x i1> %2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @commute_cmpps_uno(<4 x float>* %a0, <4 x float> %a1) #0 {
+ ;SSE-LABEL: commute_cmpps_uno
+ ;SSE: cmpunordps (%rdi), %xmm0
+ ;SSE-NEXT: retq
+
+ ;AVX-LABEL: commute_cmpps_uno
+ ;AVX: vcmpunordps (%rdi), %xmm0, %xmm0
+ ;AVX-NEXT: retq
+
+ %1 = load <4 x float>, <4 x float>* %a0
+ %2 = fcmp uno <4 x float> %1, %a1
+ %3 = sext <4 x i1> %2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @commute_cmpps_lt(<4 x float>* %a0, <4 x float> %a1) #0 {
+ ;SSE-LABEL: commute_cmpps_lt
+ ;SSE: movaps (%rdi), %xmm1
+ ;SSE-NEXT: cmpltps %xmm0, %xmm1
+ ;SSE-NEXT: movaps %xmm1, %xmm0
+ ;SSE-NEXT: retq
+
+ ;AVX-LABEL: commute_cmpps_lt
+ ;AVX: vmovaps (%rdi), %xmm1
+ ;AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
+ ;AVX-NEXT: retq
+
+ %1 = load <4 x float>, <4 x float>* %a0
+ %2 = fcmp olt <4 x float> %1, %a1
+ %3 = sext <4 x i1> %2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @commute_cmpps_le(<4 x float>* %a0, <4 x float> %a1) #0 {
+ ;SSE-LABEL: commute_cmpps_le
+ ;SSE: movaps (%rdi), %xmm1
+ ;SSE-NEXT: cmpleps %xmm0, %xmm1
+ ;SSE-NEXT: movaps %xmm1, %xmm0
+ ;SSE-NEXT: retq
+
+ ;AVX-LABEL: commute_cmpps_le
+ ;AVX: vmovaps (%rdi), %xmm1
+ ;AVX-NEXT: vcmpleps %xmm0, %xmm1, %xmm0
+ ;AVX-NEXT: retq
+
+ %1 = load <4 x float>, <4 x float>* %a0
+ %2 = fcmp ole <4 x float> %1, %a1
+ %3 = sext <4 x i1> %2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <8 x i32> @commute_cmpps_eq_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
+ ;AVX-LABEL: commute_cmpps_eq_ymm
+ ;AVX: vcmpeqps (%rdi), %ymm0, %ymm0
+ ;AVX-NEXT: retq
+
+ %1 = load <8 x float>, <8 x float>* %a0
+ %2 = fcmp oeq <8 x float> %1, %a1
+ %3 = sext <8 x i1> %2 to <8 x i32>
+ ret <8 x i32> %3
+}
+
+define <8 x i32> @commute_cmpps_ne_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
+ ;AVX-LABEL: commute_cmpps_ne_ymm
+ ;AVX: vcmpneqps (%rdi), %ymm0, %ymm0
+ ;AVX-NEXT: retq
+
+ %1 = load <8 x float>, <8 x float>* %a0
+ %2 = fcmp une <8 x float> %1, %a1
+ %3 = sext <8 x i1> %2 to <8 x i32>
+ ret <8 x i32> %3
+}
+
+define <8 x i32> @commute_cmpps_ord_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
+ ;AVX-LABEL: commute_cmpps_ord_ymm
+ ;AVX: vcmpordps (%rdi), %ymm0, %ymm0
+ ;AVX-NEXT: retq
+
+ %1 = load <8 x float>, <8 x float>* %a0
+ %2 = fcmp ord <8 x float> %1, %a1
+ %3 = sext <8 x i1> %2 to <8 x i32>
+ ret <8 x i32> %3
+}
+
+define <8 x i32> @commute_cmpps_uno_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
+ ;AVX-LABEL: commute_cmpps_uno_ymm
+ ;AVX: vcmpunordps (%rdi), %ymm0, %ymm0
+ ;AVX-NEXT: retq
+
+ %1 = load <8 x float>, <8 x float>* %a0
+ %2 = fcmp uno <8 x float> %1, %a1
+ %3 = sext <8 x i1> %2 to <8 x i32>
+ ret <8 x i32> %3
+}
+
+define <8 x i32> @commute_cmpps_lt_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
+ ;AVX-LABEL: commute_cmpps_lt_ymm
+ ;AVX: vmovaps (%rdi), %ymm1
+ ;AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
+ ;AVX-NEXT: retq
+
+ %1 = load <8 x float>, <8 x float>* %a0
+ %2 = fcmp olt <8 x float> %1, %a1
+ %3 = sext <8 x i1> %2 to <8 x i32>
+ ret <8 x i32> %3
+}
+
+define <8 x i32> @commute_cmpps_le_ymm(<8 x float>* %a0, <8 x float> %a1) #0 {
+ ;AVX-LABEL: commute_cmpps_le_ymm
+ ;AVX: vmovaps (%rdi), %ymm1
+ ;AVX-NEXT: vcmpleps %ymm0, %ymm1, %ymm0
+ ;AVX-NEXT: retq
+
+ %1 = load <8 x float>, <8 x float>* %a0
+ %2 = fcmp ole <8 x float> %1, %a1
+ %3 = sext <8 x i1> %2 to <8 x i32>
+ ret <8 x i32> %3
+}
+
+;
+; Double Comparisons
+; Only equal/not-equal/ordered/unordered can be safely commuted
+;
+
+define <2 x i64> @commute_cmppd_eq(<2 x double>* %a0, <2 x double> %a1) #0 {
+ ;SSE-LABEL: commute_cmppd_eq
+ ;SSE: cmpeqpd (%rdi), %xmm0
+ ;SSE-NEXT: retq
+
+ ;AVX-LABEL: commute_cmppd_eq
+ ;AVX: vcmpeqpd (%rdi), %xmm0, %xmm0
+ ;AVX-NEXT: retq
+
+ %1 = load <2 x double>, <2 x double>* %a0
+ %2 = fcmp oeq <2 x double> %1, %a1
+ %3 = sext <2 x i1> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <2 x i64> @commute_cmppd_ne(<2 x double>* %a0, <2 x double> %a1) #0 {
+ ;SSE-LABEL: commute_cmppd_ne
+ ;SSE: cmpneqpd (%rdi), %xmm0
+ ;SSE-NEXT: retq
+
+ ;AVX-LABEL: commute_cmppd_ne
+ ;AVX: vcmpneqpd (%rdi), %xmm0, %xmm0
+ ;AVX-NEXT: retq
+
+ %1 = load <2 x double>, <2 x double>* %a0
+ %2 = fcmp une <2 x double> %1, %a1
+ %3 = sext <2 x i1> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <2 x i64> @commute_cmppd_ord(<2 x double>* %a0, <2 x double> %a1) #0 {
+ ;SSE-LABEL: commute_cmppd_ord
+ ;SSE: cmpordpd (%rdi), %xmm0
+ ;SSE-NEXT: retq
+
+ ;AVX-LABEL: commute_cmppd_ord
+ ;AVX: vcmpordpd (%rdi), %xmm0, %xmm0
+ ;AVX-NEXT: retq
+
+ %1 = load <2 x double>, <2 x double>* %a0
+ %2 = fcmp ord <2 x double> %1, %a1
+ %3 = sext <2 x i1> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <2 x i64> @commute_cmppd_uno(<2 x double>* %a0, <2 x double> %a1) #0 {
+ ;SSE-LABEL: commute_cmppd_uno
+ ;SSE: cmpunordpd (%rdi), %xmm0
+ ;SSE-NEXT: retq
+
+ ;AVX-LABEL: commute_cmppd_uno
+ ;AVX: vcmpunordpd (%rdi), %xmm0, %xmm0
+ ;AVX-NEXT: retq
+
+ %1 = load <2 x double>, <2 x double>* %a0
+ %2 = fcmp uno <2 x double> %1, %a1
+ %3 = sext <2 x i1> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <2 x i64> @commute_cmppd_lt(<2 x double>* %a0, <2 x double> %a1) #0 {
+ ;SSE-LABEL: commute_cmppd_lt
+ ;SSE: movapd (%rdi), %xmm1
+ ;SSE-NEXT: cmpltpd %xmm0, %xmm1
+ ;SSE-NEXT: movapd %xmm1, %xmm0
+ ;SSE-NEXT: retq
+
+ ;AVX-LABEL: commute_cmppd_lt
+ ;AVX: vmovapd (%rdi), %xmm1
+ ;AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
+ ;AVX-NEXT: retq
+
+ %1 = load <2 x double>, <2 x double>* %a0
+ %2 = fcmp olt <2 x double> %1, %a1
+ %3 = sext <2 x i1> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <2 x i64> @commute_cmppd_le(<2 x double>* %a0, <2 x double> %a1) #0 {
+ ;SSE-LABEL: commute_cmppd_le
+ ;SSE: movapd (%rdi), %xmm1
+ ;SSE-NEXT: cmplepd %xmm0, %xmm1
+ ;SSE-NEXT: movapd %xmm1, %xmm0
+ ;SSE-NEXT: retq
+
+ ;AVX-LABEL: commute_cmppd_le
+ ;AVX: vmovapd (%rdi), %xmm1
+ ;AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
+ ;AVX-NEXT: retq
+
+ %1 = load <2 x double>, <2 x double>* %a0
+ %2 = fcmp ole <2 x double> %1, %a1
+ %3 = sext <2 x i1> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <4 x i64> @commute_cmppd_eq_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
+ ;AVX-LABEL: commute_cmppd_eq
+ ;AVX: vcmpeqpd (%rdi), %ymm0, %ymm0
+ ;AVX-NEXT: retq
+
+ %1 = load <4 x double>, <4 x double>* %a0
+ %2 = fcmp oeq <4 x double> %1, %a1
+ %3 = sext <4 x i1> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
+define <4 x i64> @commute_cmppd_ne_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
+ ;AVX-LABEL: commute_cmppd_ne
+ ;AVX: vcmpneqpd (%rdi), %ymm0, %ymm0
+ ;AVX-NEXT: retq
+
+ %1 = load <4 x double>, <4 x double>* %a0
+ %2 = fcmp une <4 x double> %1, %a1
+ %3 = sext <4 x i1> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
+define <4 x i64> @commute_cmppd_ord_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
+ ;AVX-LABEL: commute_cmppd_ord
+ ;AVX: vcmpordpd (%rdi), %ymm0, %ymm0
+ ;AVX-NEXT: retq
+
+ %1 = load <4 x double>, <4 x double>* %a0
+ %2 = fcmp ord <4 x double> %1, %a1
+ %3 = sext <4 x i1> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
+define <4 x i64> @commute_cmppd_uno_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
+ ;AVX-LABEL: commute_cmppd_uno
+ ;AVX: vcmpunordpd (%rdi), %ymm0, %ymm0
+ ;AVX-NEXT: retq
+
+ %1 = load <4 x double>, <4 x double>* %a0
+ %2 = fcmp uno <4 x double> %1, %a1
+ %3 = sext <4 x i1> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
+define <4 x i64> @commute_cmppd_lt_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
+ ;AVX-LABEL: commute_cmppd_lt
+ ;AVX: vmovapd (%rdi), %ymm1
+ ;AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
+ ;AVX-NEXT: retq
+
+ %1 = load <4 x double>, <4 x double>* %a0
+ %2 = fcmp olt <4 x double> %1, %a1
+ %3 = sext <4 x i1> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
+define <4 x i64> @commute_cmppd_le_ymmm(<4 x double>* %a0, <4 x double> %a1) #0 {
+ ;AVX-LABEL: commute_cmppd_le
+ ;AVX: vmovapd (%rdi), %ymm1
+ ;AVX-NEXT: vcmplepd %ymm0, %ymm1, %ymm0
+ ;AVX-NEXT: retq
+
+ %1 = load <4 x double>, <4 x double>* %a0
+ %2 = fcmp ole <4 x double> %1, %a1
+ %3 = sext <4 x i1> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
diff --git a/test/CodeGen/X86/commute-intrinsic.ll b/test/CodeGen/X86/commute-intrinsic.ll
index 7d5ca4766892..ff9049cf96da 100644
--- a/test/CodeGen/X86/commute-intrinsic.ll
+++ b/test/CodeGen/X86/commute-intrinsic.ll
@@ -6,7 +6,7 @@
define <2 x i64> @madd(<2 x i64> %b) nounwind {
entry:
- %tmp2 = load <2 x i64>* @a, align 16 ; <<2 x i64>> [#uses=1]
+ %tmp2 = load <2 x i64>, <2 x i64>* @a, align 16 ; <<2 x i64>> [#uses=1]
%tmp6 = bitcast <2 x i64> %b to <8 x i16> ; <<8 x i16>> [#uses=1]
%tmp9 = bitcast <2 x i64> %tmp2 to <8 x i16> ; <<8 x i16>> [#uses=1]
%tmp11 = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd( <8 x i16> %tmp9, <8 x i16> %tmp6 ) nounwind readnone ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/commute-xop.ll b/test/CodeGen/X86/commute-xop.ll
new file mode 100644
index 000000000000..e551d9bfc78f
--- /dev/null
+++ b/test/CodeGen/X86/commute-xop.ll
@@ -0,0 +1,184 @@
+; RUN: llc -O3 -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx,+xop < %s | FileCheck %s
+
+define <16 x i8> @commute_fold_vpcomb(<16 x i8>* %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: commute_fold_vpcomb
+ ;CHECK: vpcomgtb (%rdi), %xmm0, %xmm0
+ %1 = load <16 x i8>, <16 x i8>* %a0
+ %2 = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %1, <16 x i8> %a1, i8 0) ; vpcomltb
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+define <4 x i32> @commute_fold_vpcomd(<4 x i32>* %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: commute_fold_vpcomd
+ ;CHECK: vpcomged (%rdi), %xmm0, %xmm0
+ %1 = load <4 x i32>, <4 x i32>* %a0
+ %2 = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %1, <4 x i32> %a1, i8 1) ; vpcomled
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone
+
+define <2 x i64> @commute_fold_vpcomq(<2 x i64>* %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: commute_fold_vpcomq
+ ;CHECK: vpcomltq (%rdi), %xmm0, %xmm0
+ %1 = load <2 x i64>, <2 x i64>* %a0
+ %2 = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %1, <2 x i64> %a1, i8 2) ; vpcomgtq
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone
+
+define <16 x i8> @commute_fold_vpcomub(<16 x i8>* %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: commute_fold_vpcomub
+ ;CHECK: vpcomleub (%rdi), %xmm0, %xmm0
+ %1 = load <16 x i8>, <16 x i8>* %a0
+ %2 = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %1, <16 x i8> %a1, i8 3) ; vpcomgeub
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+define <4 x i32> @commute_fold_vpcomud(<4 x i32>* %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: commute_fold_vpcomud
+ ;CHECK: vpcomequd (%rdi), %xmm0, %xmm0
+ %1 = load <4 x i32>, <4 x i32>* %a0
+ %2 = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %1, <4 x i32> %a1, i8 4) ; vpcomequd
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone
+
+define <2 x i64> @commute_fold_vpcomuq(<2 x i64>* %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: commute_fold_vpcomuq
+ ;CHECK: vpcomnequq (%rdi), %xmm0, %xmm0
+ %1 = load <2 x i64>, <2 x i64>* %a0
+ %2 = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %1, <2 x i64> %a1, i8 5) ; vpcomnequq
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone
+
+define <8 x i16> @commute_fold_vpcomuw(<8 x i16>* %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: commute_fold_vpcomuw
+ ;CHECK: vpcomfalseuw (%rdi), %xmm0, %xmm0
+ %1 = load <8 x i16>, <8 x i16>* %a0
+ %2 = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %1, <8 x i16> %a1, i8 6) ; vpcomfalseuw
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone
+
+define <8 x i16> @commute_fold_vpcomw(<8 x i16>* %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: commute_fold_vpcomw
+ ;CHECK: vpcomtruew (%rdi), %xmm0, %xmm0
+ %1 = load <8 x i16>, <8 x i16>* %a0
+ %2 = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %1, <8 x i16> %a1, i8 7) ; vpcomtruew
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
+
+define <4 x i32> @commute_fold_vpmacsdd(<4 x i32>* %a0, <4 x i32> %a1, <4 x i32> %a2) {
+ ;CHECK-LABEL: commute_fold_vpmacsdd
+ ;CHECK: vpmacsdd %xmm1, (%rdi), %xmm0, %xmm0
+ %1 = load <4 x i32>, <4 x i32>* %a0
+ %2 = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %1, <4 x i32> %a1, <4 x i32> %a2)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @commute_fold_vpmacsdqh(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
+ ;CHECK-LABEL: commute_fold_vpmacsdqh
+ ;CHECK: vpmacsdqh %xmm1, (%rdi), %xmm0, %xmm0
+ %1 = load <4 x i32>, <4 x i32>* %a0
+ %2 = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @commute_fold_vpmacsdql(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
+ ;CHECK-LABEL: commute_fold_vpmacsdql
+ ;CHECK: vpmacsdql %xmm1, (%rdi), %xmm0, %xmm0
+ %1 = load <4 x i32>, <4 x i32>* %a0
+ %2 = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <4 x i32> @commute_fold_vpmacssdd(<4 x i32>* %a0, <4 x i32> %a1, <4 x i32> %a2) {
+ ;CHECK-LABEL: commute_fold_vpmacssdd
+ ;CHECK: vpmacssdd %xmm1, (%rdi), %xmm0, %xmm0
+ %1 = load <4 x i32>, <4 x i32>* %a0
+ %2 = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %1, <4 x i32> %a1, <4 x i32> %a2)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @commute_fold_vpmacssdqh(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
+ ;CHECK-LABEL: commute_fold_vpmacssdqh
+ ;CHECK: vpmacssdqh %xmm1, (%rdi), %xmm0, %xmm0
+ %1 = load <4 x i32>, <4 x i32>* %a0
+ %2 = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @commute_fold_vpmacssdql(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
+ ;CHECK-LABEL: commute_fold_vpmacssdql
+ ;CHECK: vpmacssdql %xmm1, (%rdi), %xmm0, %xmm0
+ %1 = load <4 x i32>, <4 x i32>* %a0
+ %2 = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <4 x i32> @commute_fold_vpmacsswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
+ ;CHECK-LABEL: commute_fold_vpmacsswd
+ ;CHECK: vpmacsswd %xmm1, (%rdi), %xmm0, %xmm0
+ %1 = load <8 x i16>, <8 x i16>* %a0
+ %2 = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @commute_fold_vpmacssww(<8 x i16>* %a0, <8 x i16> %a1, <8 x i16> %a2) {
+ ;CHECK-LABEL: commute_fold_vpmacssww
+ ;CHECK: vpmacssww %xmm1, (%rdi), %xmm0, %xmm0
+ %1 = load <8 x i16>, <8 x i16>* %a0
+ %2 = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %1, <8 x i16> %a1, <8 x i16> %a2)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @commute_fold_vpmacswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
+ ;CHECK-LABEL: commute_fold_vpmacswd
+ ;CHECK: vpmacswd %xmm1, (%rdi), %xmm0, %xmm0
+ %1 = load <8 x i16>, <8 x i16>* %a0
+ %2 = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @commute_fold_vpmacsww(<8 x i16>* %a0, <8 x i16> %a1, <8 x i16> %a2) {
+ ;CHECK-LABEL: commute_fold_vpmacsww
+ ;CHECK: vpmacsww %xmm1, (%rdi), %xmm0, %xmm0
+ %1 = load <8 x i16>, <8 x i16>* %a0
+ %2 = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %1, <8 x i16> %a1, <8 x i16> %a2)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @commute_fold_vpmadcsswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
+ ;CHECK-LABEL: commute_fold_vpmadcsswd
+ ;CHECK: vpmadcsswd %xmm1, (%rdi), %xmm0, %xmm0
+ %1 = load <8 x i16>, <8 x i16>* %a0
+ %2 = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @commute_fold_vpmadcswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
+ ;CHECK-LABEL: commute_fold_vpmadcswd
+ ;CHECK: vpmadcswd %xmm1, (%rdi), %xmm0, %xmm0
+ %1 = load <8 x i16>, <8 x i16>* %a0
+ %2 = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+
+
diff --git a/test/CodeGen/X86/compact-unwind.ll b/test/CodeGen/X86/compact-unwind.ll
index d3b89a54e0b8..f8266a10cfb5 100644
--- a/test/CodeGen/X86/compact-unwind.ll
+++ b/test/CodeGen/X86/compact-unwind.ll
@@ -39,13 +39,13 @@
define i8* @test0(i64 %size) {
%addr = alloca i64, align 8
- %tmp20 = load i32* @gv, align 4
+ %tmp20 = load i32, i32* @gv, align 4
%tmp21 = call i32 @bar()
- %tmp25 = load i64* %addr, align 8
+ %tmp25 = load i64, i64* %addr, align 8
%tmp26 = inttoptr i64 %tmp25 to %ty*
- %tmp29 = getelementptr inbounds %ty* %tmp26, i64 0, i32 0
- %tmp34 = load i8** %tmp29, align 8
- %tmp35 = getelementptr inbounds i8* %tmp34, i64 %size
+ %tmp29 = getelementptr inbounds %ty, %ty* %tmp26, i64 0, i32 0
+ %tmp34 = load i8*, i8** %tmp29, align 8
+ %tmp35 = getelementptr inbounds i8, i8* %tmp34, i64 %size
store i8* %tmp35, i8** %tmp29, align 8
ret i8* null
}
@@ -84,8 +84,8 @@ for.cond1.preheader: ; preds = %for.inc10, %entry
for.body3: ; preds = %for.inc, %for.cond1.preheader
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.inc ]
- %image4 = getelementptr inbounds %"struct.dyld::MappedRanges"* %p.019, i64 0, i32 0, i64 %indvars.iv, i32 0
- %0 = load %class.ImageLoader** %image4, align 8
+ %image4 = getelementptr inbounds %"struct.dyld::MappedRanges", %"struct.dyld::MappedRanges"* %p.019, i64 0, i32 0, i64 %indvars.iv, i32 0
+ %0 = load %class.ImageLoader*, %class.ImageLoader** %image4, align 8
%cmp5 = icmp eq %class.ImageLoader* %0, %image
br i1 %cmp5, label %if.then, label %for.inc
@@ -101,8 +101,8 @@ for.inc: ; preds = %if.then, %for.body3
br i1 %exitcond, label %for.inc10, label %for.body3
for.inc10: ; preds = %for.inc
- %next = getelementptr inbounds %"struct.dyld::MappedRanges"* %p.019, i64 0, i32 1
- %1 = load %"struct.dyld::MappedRanges"** %next, align 8
+ %next = getelementptr inbounds %"struct.dyld::MappedRanges", %"struct.dyld::MappedRanges"* %p.019, i64 0, i32 1
+ %1 = load %"struct.dyld::MappedRanges"*, %"struct.dyld::MappedRanges"** %next, align 8
%cmp = icmp eq %"struct.dyld::MappedRanges"* %1, null
br i1 %cmp, label %for.end11, label %for.cond1.preheader
diff --git a/test/CodeGen/X86/complex-asm.ll b/test/CodeGen/X86/complex-asm.ll
index 49878b982db3..d7b5879309da 100644
--- a/test/CodeGen/X86/complex-asm.ll
+++ b/test/CodeGen/X86/complex-asm.ll
@@ -7,10 +7,10 @@ define %0 @f() nounwind ssp {
entry:
%v = alloca %0, align 8
call void asm sideeffect "", "=*r,r,r,0,~{dirflag},~{fpsr},~{flags}"(%0* %v, i32 0, i32 1, i128 undef) nounwind
- %0 = getelementptr inbounds %0* %v, i64 0, i32 0
- %1 = load i64* %0, align 8
- %2 = getelementptr inbounds %0* %v, i64 0, i32 1
- %3 = load i64* %2, align 8
+ %0 = getelementptr inbounds %0, %0* %v, i64 0, i32 0
+ %1 = load i64, i64* %0, align 8
+ %2 = getelementptr inbounds %0, %0* %v, i64 0, i32 1
+ %3 = load i64, i64* %2, align 8
%mrv4 = insertvalue %0 undef, i64 %1, 0
%mrv5 = insertvalue %0 %mrv4, i64 %3, 1
ret %0 %mrv5
diff --git a/test/CodeGen/X86/complex-fca.ll b/test/CodeGen/X86/complex-fca.ll
index 8ad38a4ee5c0..78b27b7dc3f5 100644
--- a/test/CodeGen/X86/complex-fca.ll
+++ b/test/CodeGen/X86/complex-fca.ll
@@ -1,17 +1,21 @@
-; RUN: llc < %s -march=x86 | grep mov | count 2
-
-; Skip this on Windows as there is no ccosl and sret behaves differently.
-; XFAIL: pc-win32
+; RUN: llc < %s -march=x86 | FileCheck %s
define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %z) nounwind {
entry:
- %z8 = extractvalue { x86_fp80, x86_fp80 } %z, 0
- %z9 = extractvalue { x86_fp80, x86_fp80 } %z, 1
- %0 = fsub x86_fp80 0xK80000000000000000000, %z9
- %insert = insertvalue { x86_fp80, x86_fp80 } undef, x86_fp80 %0, 0
- %insert7 = insertvalue { x86_fp80, x86_fp80 } %insert, x86_fp80 %z8, 1
- call void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %insert7) nounwind
- ret void
+ %z8 = extractvalue { x86_fp80, x86_fp80 } %z, 0
+ %z9 = extractvalue { x86_fp80, x86_fp80 } %z, 1
+ %0 = fsub x86_fp80 0xK80000000000000000000, %z9
+ %insert = insertvalue { x86_fp80, x86_fp80 } undef, x86_fp80 %0, 0
+ %insert7 = insertvalue { x86_fp80, x86_fp80 } %insert, x86_fp80 %z8, 1
+ call void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %insert7) nounwind
+ ret void
}
+; CHECK-LABEL: ccosl:
+; CHECK: movl {{[0-9]+}}(%esp), %[[sret_reg:[^ ]+]]
+; CHECK: movl %[[sret_reg]], (%esp)
+; CHECK: calll {{.*ccoshl.*}}
+; CHECK: movl %[[sret_reg]], %eax
+; CHECK: retl
+
declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret, { x86_fp80, x86_fp80 }) nounwind
diff --git a/test/CodeGen/X86/computeKnownBits_urem.ll b/test/CodeGen/X86/computeKnownBits_urem.ll
index 9902e6f2597b..a72740e19572 100644
--- a/test/CodeGen/X86/computeKnownBits_urem.ll
+++ b/test/CodeGen/X86/computeKnownBits_urem.ll
@@ -3,7 +3,7 @@ define i32 @main() #0 {
entry:
%a = alloca i32, align 4
store i32 1, i32* %a, align 4
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%or = or i32 1, %0
%and = and i32 1, %or
%rem = urem i32 %and, 1
diff --git a/test/CodeGen/X86/const-base-addr.ll b/test/CodeGen/X86/const-base-addr.ll
index f859d7fafff3..42647136fe30 100644
--- a/test/CodeGen/X86/const-base-addr.ll
+++ b/test/CodeGen/X86/const-base-addr.ll
@@ -11,12 +11,12 @@ define i32 @test1() nounwind {
; CHECK-NEXT: movl 4(%rcx), %eax
; CHECK-NEXT: addl 8(%rcx), %eax
; CHECK-NEXT: addl 12(%rcx), %eax
- %addr1 = getelementptr %T* inttoptr (i64 123456789012345678 to %T*), i32 0, i32 1
- %tmp1 = load i32* %addr1
- %addr2 = getelementptr %T* inttoptr (i64 123456789012345678 to %T*), i32 0, i32 2
- %tmp2 = load i32* %addr2
- %addr3 = getelementptr %T* inttoptr (i64 123456789012345678 to %T*), i32 0, i32 3
- %tmp3 = load i32* %addr3
+ %addr1 = getelementptr %T, %T* inttoptr (i64 123456789012345678 to %T*), i32 0, i32 1
+ %tmp1 = load i32, i32* %addr1
+ %addr2 = getelementptr %T, %T* inttoptr (i64 123456789012345678 to %T*), i32 0, i32 2
+ %tmp2 = load i32, i32* %addr2
+ %addr3 = getelementptr %T, %T* inttoptr (i64 123456789012345678 to %T*), i32 0, i32 3
+ %tmp3 = load i32, i32* %addr3
%tmp4 = add i32 %tmp1, %tmp2
%tmp5 = add i32 %tmp3, %tmp4
ret i32 %tmp5
diff --git a/test/CodeGen/X86/constant-combines.ll b/test/CodeGen/X86/constant-combines.ll
index d2a6ef4f5d25..5ea736e92c78 100644
--- a/test/CodeGen/X86/constant-combines.ll
+++ b/test/CodeGen/X86/constant-combines.ll
@@ -14,13 +14,13 @@ define void @PR22524({ float, float }* %arg) {
;
; CHECK-LABEL: PR22524:
entry:
- %0 = getelementptr inbounds { float, float }* %arg, i32 0, i32 1
+ %0 = getelementptr inbounds { float, float }, { float, float }* %arg, i32 0, i32 1
store float 0.000000e+00, float* %0, align 4
; CHECK: movl $0, 4(%rdi)
- %1 = getelementptr inbounds { float, float }* %arg, i64 0, i32 0
+ %1 = getelementptr inbounds { float, float }, { float, float }* %arg, i64 0, i32 0
%2 = bitcast float* %1 to i64*
- %3 = load i64* %2, align 8
+ %3 = load i64, i64* %2, align 8
%4 = trunc i64 %3 to i32
%5 = lshr i64 %3, 32
%6 = trunc i64 %5 to i32
diff --git a/test/CodeGen/X86/constant-hoisting-optnone.ll b/test/CodeGen/X86/constant-hoisting-optnone.ll
new file mode 100644
index 000000000000..4d8a06c444da
--- /dev/null
+++ b/test/CodeGen/X86/constant-hoisting-optnone.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=generic | FileCheck %s
+;
+; Verify that pass 'Constant Hoisting' is not run on optnone functions.
+; Without optnone, Pass 'Constant Hoisting' would firstly hoist
+; constant 0xBEEBEEBEC, and then rebase the other constant
+; (i.e. constant 0xBEEBEEBF4) with respect to the previous one.
+; With optnone, we check that constants are not coalesced.
+
+define i64 @constant_hoisting_optnone() #0 {
+; CHECK-LABEL: @constant_hoisting_optnone
+; CHECK-DAG: movabsq {{.*#+}} imm = 0xBEEBEEBF4
+; CHECK-DAG: movabsq {{.*#+}} imm = 0xBEEBEEBEC
+; CHECK: ret
+entry:
+ %0 = load i64, i64* inttoptr (i64 51250129900 to i64*)
+ %1 = load i64, i64* inttoptr (i64 51250129908 to i64*)
+ %2 = add i64 %0, %1
+ ret i64 %2
+}
+
+attributes #0 = { optnone noinline }
diff --git a/test/CodeGen/X86/constant-hoisting-shift-immediate.ll b/test/CodeGen/X86/constant-hoisting-shift-immediate.ll
index 883be355bd36..65c26f818a6a 100644
--- a/test/CodeGen/X86/constant-hoisting-shift-immediate.ll
+++ b/test/CodeGen/X86/constant-hoisting-shift-immediate.ll
@@ -6,7 +6,7 @@ define i64 @foo(i1 %z, i192* %p, i192* %q)
; be in another basic block. As a result, a very inefficient code might be
; produced. Here we check that this doesn't occur.
entry:
- %data1 = load i192* %p, align 8
+ %data1 = load i192, i192* %p, align 8
%lshr1 = lshr i192 %data1, 128
%val1 = trunc i192 %lshr1 to i64
br i1 %z, label %End, label %L_val2
@@ -14,7 +14,7 @@ entry:
; CHECK: movq 16(%rdx), %rax
; CHECK-NEXT: retq
L_val2:
- %data2 = load i192* %q, align 8
+ %data2 = load i192, i192* %q, align 8
%lshr2 = lshr i192 %data2, 128
%val2 = trunc i192 %lshr2 to i64
br label %End
diff --git a/test/CodeGen/X86/constructor.ll b/test/CodeGen/X86/constructor.ll
index 7160dcc614c0..e7c846045f01 100644
--- a/test/CodeGen/X86/constructor.ll
+++ b/test/CodeGen/X86/constructor.ll
@@ -1,5 +1,6 @@
; RUN: llc -mtriple x86_64-pc-linux -use-ctors < %s | FileCheck --check-prefix=CTOR %s
; RUN: llc -mtriple x86_64-pc-linux < %s | FileCheck --check-prefix=INIT-ARRAY %s
+; RUN: llc -mtriple x86_64-unknown-nacl < %s | FileCheck --check-prefix=NACL %s
@llvm.global_ctors = appending global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* null}, { i32, void ()*, i8* } { i32 15, void ()* @g, i8* @v }]
@v = weak_odr global i8 0
@@ -27,3 +28,10 @@ entry:
; INIT-ARRAY-NEXT: .section .init_array,"aw",@init_array
; INIT-ARRAY-NEXT: .align 8
; INIT-ARRAY-NEXT: .quad f
+
+; NACL: .section .init_array.15,"aGw",@init_array,v,comdat
+; NACL-NEXT: .align 4
+; NACL-NEXT: .long g
+; NACL-NEXT: .section .init_array,"aw",@init_array
+; NACL-NEXT: .align 4
+; NACL-NEXT: .long f
diff --git a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
index 74a7240c8190..7fc56f5accc2 100644
--- a/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
+++ b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
@@ -11,8 +11,8 @@ define fastcc zeroext i8 @fullGtU(i32 %i1, i32 %i2, i8* %ptr) nounwind optsize {
entry:
%0 = add i32 %i2, 1 ; <i32> [#uses=1]
%1 = sext i32 %0 to i64 ; <i64> [#uses=1]
- %2 = getelementptr i8* %ptr, i64 %1 ; <i8*> [#uses=1]
- %3 = load i8* %2, align 1 ; <i8> [#uses=1]
+ %2 = getelementptr i8, i8* %ptr, i64 %1 ; <i8*> [#uses=1]
+ %3 = load i8, i8* %2, align 1 ; <i8> [#uses=1]
%4 = icmp eq i8 0, %3 ; <i1> [#uses=1]
br i1 %4, label %bb3, label %bb34
diff --git a/test/CodeGen/X86/copy-propagation.ll b/test/CodeGen/X86/copy-propagation.ll
new file mode 100644
index 000000000000..19421a06fa82
--- /dev/null
+++ b/test/CodeGen/X86/copy-propagation.ll
@@ -0,0 +1,44 @@
+; RUN: llc %s -mattr=+avx -o - | FileCheck %s
+; PR21743.
+
+target triple = "x86_64-pc-win32-elf"
+
+; Check that copy propagation conservatively assumes that undef register
+; can be rewritten by the backend to break false dependencies for the
+; hardware.
+; In this function we are in this situation:
+; reg1 = copy reg2
+; = inst reg2<undef>
+; reg2 = copy reg1
+; Copy propagation used to remove the last copy.
+; This is incorrect because the undef flag on reg2 in inst, allows next
+; passes to put whatever trashed value in reg2 that may help.
+; In practice we end up with this code:
+; reg1 = copy reg2
+; reg2 = 0
+; = inst reg2<undef>
+; reg2 = copy reg1
+; Therefore, removing the last copy is wrong.
+;
+; CHECK-LABEL: foo:
+; CHECK: movl $339752784, %e[[INDIRECT_CALL1:[a-z]+]]
+; CHECK: callq *%r[[INDIRECT_CALL1]]
+; Copy the result in a temporary.
+; Note: Technically the regalloc could have been smarter and this move not required,
+; which would have hidden the bug.
+; CHECK-NEXT: vmovapd %xmm0, [[TMP:%xmm[0-9]+]]
+; Crush xmm0.
+; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; CHECK: movl $339772768, %e[[INDIRECT_CALL2:[a-z]+]]
+; Set TMP in the first argument of the second call.
+; CHECK-NEXT: vmovapd [[TMP]], %xmm0
+; CHECK: callq *%r[[INDIRECT_CALL2]]
+; CHECK: retq
+define double @foo(i64 %arg) {
+top:
+ %tmp = call double inttoptr (i64 339752784 to double (double, double)*)(double 1.000000e+00, double 0.000000e+00)
+ %tmp1 = sitofp i64 %arg to double
+ call void inttoptr (i64 339772768 to void (double, double)*)(double %tmp, double %tmp1)
+ %tmp3 = fadd double %tmp1, %tmp
+ ret double %tmp3
+}
diff --git a/test/CodeGen/X86/crash-O0.ll b/test/CodeGen/X86/crash-O0.ll
index df8eaaf442b7..dab15c19c69e 100644
--- a/test/CodeGen/X86/crash-O0.ll
+++ b/test/CodeGen/X86/crash-O0.ll
@@ -44,8 +44,8 @@ entry:
; CHECK-NEXT: idivq
; CHECK: retq
define i64 @addressModeWith32bitIndex(i32 %V) {
- %gep = getelementptr i64* null, i32 %V
- %load = load i64* %gep
+ %gep = getelementptr i64, i64* null, i32 %V
+ %load = load i64, i64* %gep
%sdiv = sdiv i64 0, %load
ret i64 %sdiv
}
diff --git a/test/CodeGen/X86/crash-nosse.ll b/test/CodeGen/X86/crash-nosse.ll
index b1e01f94c9e6..aff120dbb842 100644
--- a/test/CodeGen/X86/crash-nosse.ll
+++ b/test/CodeGen/X86/crash-nosse.ll
@@ -11,7 +11,7 @@ BB:
br label %CF
CF: ; preds = %CF, %BB
- %L19 = load <8 x float>* %S17
+ %L19 = load <8 x float>, <8 x float>* %S17
%BC = bitcast <32 x i32> %Shuff6 to <32 x float>
%S28 = fcmp ord double 0x3ED1A1F787BB2185, 0x3EE59DE55A8DF890
br i1 %S28, label %CF, label %CF39
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index 6b3dd3675750..a95b84d4c3b0 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -7,9 +7,9 @@
; Chain and flag folding issues.
define i32 @test1() nounwind ssp {
entry:
- %tmp5.i = load volatile i32* undef ; <i32> [#uses=1]
+ %tmp5.i = load volatile i32, i32* undef ; <i32> [#uses=1]
%conv.i = zext i32 %tmp5.i to i64 ; <i64> [#uses=1]
- %tmp12.i = load volatile i32* undef ; <i32> [#uses=1]
+ %tmp12.i = load volatile i32, i32* undef ; <i32> [#uses=1]
%conv13.i = zext i32 %tmp12.i to i64 ; <i64> [#uses=1]
%shl.i = shl i64 %conv13.i, 32 ; <i64> [#uses=1]
%or.i = or i64 %shl.i, %conv.i ; <i64> [#uses=1]
@@ -40,7 +40,7 @@ if.end: ; preds = %land.end
define void @test3() {
dependentGraph243.exit:
- %subject19 = load %pair* undef ; <%1> [#uses=1]
+ %subject19 = load %pair, %pair* undef ; <%1> [#uses=1]
%0 = extractvalue %pair %subject19, 1 ; <double> [#uses=2]
%1 = select i1 undef, double %0, double undef ; <double> [#uses=1]
%2 = select i1 undef, double %1, double %0 ; <double> [#uses=1]
@@ -52,7 +52,7 @@ dependentGraph243.exit:
; PR6605
define i64 @test4(i8* %P) nounwind ssp {
entry:
- %tmp1 = load i8* %P ; <i8> [#uses=3]
+ %tmp1 = load i8, i8* %P ; <i8> [#uses=3]
%tobool = icmp eq i8 %tmp1, 0 ; <i1> [#uses=1]
%tmp58 = sext i1 %tobool to i8 ; <i8> [#uses=1]
%mul.i = and i8 %tmp58, %tmp1 ; <i8> [#uses=1]
@@ -76,7 +76,7 @@ declare i32 @safe(i32)
; PR6607
define fastcc void @test5(i32 %FUNC) nounwind {
foo:
- %0 = load i8* undef, align 1 ; <i8> [#uses=3]
+ %0 = load i8, i8* undef, align 1 ; <i8> [#uses=3]
%1 = sext i8 %0 to i32 ; <i32> [#uses=2]
%2 = zext i8 %0 to i32 ; <i32> [#uses=1]
%tmp1.i5037 = urem i32 %2, 10 ; <i32> [#uses=1]
@@ -121,7 +121,7 @@ entry:
bb14:
%tmp0 = trunc i16 undef to i1
- %tmp1 = load i8* undef, align 8
+ %tmp1 = load i8, i8* undef, align 8
%tmp2 = shl i8 %tmp1, 4
%tmp3 = lshr i8 %tmp2, 7
%tmp4 = trunc i8 %tmp3 to i1
@@ -174,12 +174,12 @@ for.body22: ; preds = %for.body22, %bb.nph
%l_75.077 = phi i64 [ %ins, %for.body22 ], [ undef, %bb.nph81 ]
%tmp110 = trunc i64 %l_75.077 to i32
%tmp111 = and i32 %tmp110, 65535
- %arrayidx32.0 = getelementptr [9 x [5 x [2 x %struct.S0]]]* undef, i32 0, i32 %l_74.0, i32 %tmp98, i32 %tmp111, i32 0
+ %arrayidx32.0 = getelementptr [9 x [5 x [2 x %struct.S0]]], [9 x [5 x [2 x %struct.S0]]]* undef, i32 0, i32 %l_74.0, i32 %tmp98, i32 %tmp111, i32 0
store i8 1, i8* %arrayidx32.0, align 4
%tmp106 = shl i32 %tmp110, 2
%tmp107 = and i32 %tmp106, 262140
%scevgep99.sum114 = or i32 %tmp107, 1
- %arrayidx32.1.1 = getelementptr [9 x [5 x [2 x %struct.S0]]]* undef, i32 0, i32 %l_74.0, i32 %tmp98, i32 0, i32 1, i32 %scevgep99.sum114
+ %arrayidx32.1.1 = getelementptr [9 x [5 x [2 x %struct.S0]]], [9 x [5 x [2 x %struct.S0]]]* undef, i32 0, i32 %l_74.0, i32 %tmp98, i32 0, i32 1, i32 %scevgep99.sum114
store i8 0, i8* %arrayidx32.1.1, align 1
%ins = or i64 undef, undef
br label %for.body22
@@ -239,9 +239,9 @@ declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) nounwind readnone
define void @_ZNK4llvm17MipsFrameLowering12emitPrologueERNS_15MachineFunctionE() ssp align 2 {
bb:
- %tmp = load %t9** undef, align 4
- %tmp2 = getelementptr inbounds %t9* %tmp, i32 0, i32 0
- %tmp3 = getelementptr inbounds %t9* %tmp, i32 0, i32 0, i32 0, i32 0, i32 1
+ %tmp = load %t9*, %t9** undef, align 4
+ %tmp2 = getelementptr inbounds %t9, %t9* %tmp, i32 0, i32 0
+ %tmp3 = getelementptr inbounds %t9, %t9* %tmp, i32 0, i32 0, i32 0, i32 0, i32 1
br label %bb4
bb4: ; preds = %bb37, %bb
@@ -250,25 +250,25 @@ bb4: ; preds = %bb37, %bb
br i1 undef, label %bb34, label %bb7
bb7: ; preds = %bb4
- %tmp8 = load i32* undef, align 4
+ %tmp8 = load i32, i32* undef, align 4
%tmp9 = and i96 %tmp6, 4294967040
%tmp10 = zext i32 %tmp8 to i96
%tmp11 = shl nuw nsw i96 %tmp10, 32
%tmp12 = or i96 %tmp9, %tmp11
%tmp13 = or i96 %tmp12, 1
- %tmp14 = load i32* undef, align 4
+ %tmp14 = load i32, i32* undef, align 4
%tmp15 = and i96 %tmp5, 4294967040
%tmp16 = zext i32 %tmp14 to i96
%tmp17 = shl nuw nsw i96 %tmp16, 32
%tmp18 = or i96 %tmp15, %tmp17
%tmp19 = or i96 %tmp18, 1
- %tmp20 = load i8* undef, align 1
+ %tmp20 = load i8, i8* undef, align 1
%tmp21 = and i8 %tmp20, 1
%tmp22 = icmp ne i8 %tmp21, 0
%tmp23 = select i1 %tmp22, i96 %tmp19, i96 %tmp13
%tmp24 = select i1 %tmp22, i96 %tmp13, i96 %tmp19
store i96 %tmp24, i96* undef, align 4
- %tmp25 = load %t13** %tmp3, align 4
+ %tmp25 = load %t13*, %t13** %tmp3, align 4
%tmp26 = icmp eq %t13* %tmp25, undef
br i1 %tmp26, label %bb28, label %bb27
@@ -281,11 +281,11 @@ bb28: ; preds = %bb7
bb29: ; preds = %bb28, %bb27
store i96 %tmp23, i96* undef, align 4
- %tmp30 = load %t13** %tmp3, align 4
+ %tmp30 = load %t13*, %t13** %tmp3, align 4
br i1 false, label %bb33, label %bb31
bb31: ; preds = %bb29
- %tmp32 = getelementptr inbounds %t13* %tmp30, i32 1
+ %tmp32 = getelementptr inbounds %t13, %t13* %tmp30, i32 1
store %t13* %tmp32, %t13** %tmp3, align 4
br label %bb37
@@ -348,13 +348,13 @@ entry:
br label %"4"
"3":
- %0 = load <2 x i32>* null, align 8
+ %0 = load <2 x i32>, <2 x i32>* null, align 8
%1 = xor <2 x i32> zeroinitializer, %0
%2 = and <2 x i32> %1, %6
%3 = or <2 x i32> undef, %2
%4 = and <2 x i32> %3, undef
store <2 x i32> %4, <2 x i32>* undef
- %5 = load <2 x i32>* undef, align 1
+ %5 = load <2 x i32>, <2 x i32>* undef, align 1
br label %"4"
"4":
@@ -378,7 +378,7 @@ entry:
@__force_order = external hidden global i32, align 4
define void @pr11078(i32* %pgd) nounwind {
entry:
- %t0 = load i32* %pgd, align 4
+ %t0 = load i32, i32* %pgd, align 4
%and2 = and i32 %t0, 1
%tobool = icmp eq i32 %and2, 0
br i1 %tobool, label %if.then, label %if.end
@@ -405,7 +405,7 @@ while.body.preheader: ; preds = %entry
br i1 undef, label %if.then3, label %if.end7
if.then3: ; preds = %while.body.preheader
- %0 = load i32* undef, align 4
+ %0 = load i32, i32* undef, align 4
br i1 undef, label %land.lhs.true.i255, label %if.end7
land.lhs.true.i255: ; preds = %if.then3
@@ -434,12 +434,12 @@ return: ; preds = %entry
@.str = private unnamed_addr constant { [1 x i8], [63 x i8] } zeroinitializer, align 32
define void @pr13188(i64* nocapture %this) uwtable ssp sanitize_address align 2 {
entry:
- %x7 = load i64* %this, align 8
+ %x7 = load i64, i64* %this, align 8
%sub = add i64 %x7, -1
%conv = uitofp i64 %sub to float
%div = fmul float %conv, 5.000000e-01
%conv2 = fpext float %div to double
- tail call void (...)* @_Z6PrintFz(i8* getelementptr inbounds ({ [1 x i8], [63 x i8] }* @.str, i64 0, i32 0, i64 0), double %conv2)
+ tail call void (...) @_Z6PrintFz(i8* getelementptr inbounds ({ [1 x i8], [63 x i8] }, { [1 x i8], [63 x i8] }* @.str, i64 0, i32 0, i64 0), double %conv2)
ret void
}
declare void @_Z6PrintFz(...)
@@ -450,19 +450,19 @@ declare void @_Z6PrintFz(...)
define void @pr13943() nounwind uwtable ssp {
entry:
- %srcval = load i576* bitcast ([9 x i32*]* @fn1.g to i576*), align 16
+ %srcval = load i576, i576* bitcast ([9 x i32*]* @fn1.g to i576*), align 16
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%g.0 = phi i576 [ %srcval, %entry ], [ %ins, %for.inc ]
- %0 = load i32* @e, align 4
+ %0 = load i32, i32* @e, align 4
%1 = lshr i576 %g.0, 64
%2 = trunc i576 %1 to i64
%3 = inttoptr i64 %2 to i32*
%cmp = icmp eq i32* undef, %3
%conv2 = zext i1 %cmp to i32
%and = and i32 %conv2, %0
- tail call void (...)* @fn3(i32 %and) nounwind
+ tail call void (...) @fn3(i32 %and) nounwind
%tobool = icmp eq i32 undef, 0
br i1 %tobool, label %for.inc, label %if.then
@@ -510,9 +510,9 @@ bb4: ; preds = %bb3
unreachable
bb5: ; preds = %bb3
- %tmp = load <4 x float>* undef, align 1
+ %tmp = load <4 x float>, <4 x float>* undef, align 1
%tmp6 = bitcast <4 x float> %tmp to i128
- %tmp7 = load <4 x float>* undef, align 1
+ %tmp7 = load <4 x float>, <4 x float>* undef, align 1
%tmp8 = bitcast <4 x float> %tmp7 to i128
br label %bb10
@@ -583,7 +583,7 @@ bb29: ; preds = %bb28, %bb26, %bb25,
}
define void @pr14194() nounwind uwtable {
- %tmp = load i64* undef, align 16
+ %tmp = load i64, i64* undef, align 16
%tmp1 = trunc i64 %tmp to i32
%tmp2 = lshr i64 %tmp, 32
%tmp3 = trunc i64 %tmp2 to i32
diff --git a/test/CodeGen/X86/critical-anti-dep-breaker.ll b/test/CodeGen/X86/critical-anti-dep-breaker.ll
index 32d3f49c79cc..86afc1f245ad 100644
--- a/test/CodeGen/X86/critical-anti-dep-breaker.ll
+++ b/test/CodeGen/X86/critical-anti-dep-breaker.ll
@@ -16,9 +16,9 @@
define i32 @Part_Create(i64* %Anchor, i32 %TypeNum, i32 %F, i32 %Z, i32* %Status, i64* %PartTkn) {
%PartObj = alloca i64*, align 8
%Vchunk = alloca i64, align 8
- %1 = load i64* @NullToken, align 4
+ %1 = load i64, i64* @NullToken, align 4
store i64 %1, i64* %Vchunk, align 8
- %2 = load i32* @PartClass, align 4
+ %2 = load i32, i32* @PartClass, align 4
call i32 @Image(i64* %Anchor, i32 %2, i32 0, i32 0, i32* %Status, i64* %PartTkn, i64** %PartObj)
call i32 @Create(i64* %Anchor)
ret i32 %2
diff --git a/test/CodeGen/X86/critical-edge-split-2.ll b/test/CodeGen/X86/critical-edge-split-2.ll
index 44205d6829dd..d5878bd1a748 100644
--- a/test/CodeGen/X86/critical-edge-split-2.ll
+++ b/test/CodeGen/X86/critical-edge-split-2.ll
@@ -18,7 +18,7 @@ cond.false.i: ; preds = %entry
br label %cond.end.i
cond.end.i: ; preds = %entry
- %call1 = phi i16 [ trunc (i32 srem (i32 1, i32 zext (i1 icmp eq (%1* bitcast (i8* getelementptr inbounds (%0* @g_2, i64 0, i32 1, i32 0) to %1*), %1* @g_4) to i32)) to i16), %cond.false.i ], [ 1, %entry ]
+ %call1 = phi i16 [ trunc (i32 srem (i32 1, i32 zext (i1 icmp eq (%1* bitcast (i8* getelementptr inbounds (%0, %0* @g_2, i64 0, i32 1, i32 0) to %1*), %1* @g_4) to i32)) to i16), %cond.false.i ], [ 1, %entry ]
ret i16 %call1
}
diff --git a/test/CodeGen/X86/cse-add-with-overflow.ll b/test/CodeGen/X86/cse-add-with-overflow.ll
index 1fcc03f117d3..dc02fe915840 100644
--- a/test/CodeGen/X86/cse-add-with-overflow.ll
+++ b/test/CodeGen/X86/cse-add-with-overflow.ll
@@ -15,8 +15,8 @@
define i64 @redundantadd(i64* %a0, i64* %a1) {
entry:
- %tmp8 = load i64* %a0, align 8
- %tmp12 = load i64* %a1, align 8
+ %tmp8 = load i64, i64* %a0, align 8
+ %tmp12 = load i64, i64* %a1, align 8
%tmp13 = icmp ult i64 %tmp12, -281474976710656
br i1 %tmp13, label %exit1, label %body
diff --git a/test/CodeGen/X86/cttz-ctlz.ll b/test/CodeGen/X86/cttz-ctlz.ll
deleted file mode 100644
index 8717d4015954..000000000000
--- a/test/CodeGen/X86/cttz-ctlz.ll
+++ /dev/null
@@ -1,422 +0,0 @@
-; RUN: opt -S -codegenprepare -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s --check-prefix=ALL --check-prefix=BMI
-; RUN: opt -S -codegenprepare -mtriple=x86_64-unknown-unknown -mattr=+lzcnt < %s | FileCheck %s --check-prefix=ALL --check-prefix=LZCNT
-; RUN: opt -S -codegenprepare -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=ALL --check-prefix=GENERIC
-
-
-define i64 @test1(i64 %A) {
-; ALL-LABEL: @test1(
-; LZCNT: [[CTLZ:%[A-Za-z0-9]+]] = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
-; LZCNT-NEXT: ret i64 [[CTLZ]]
-; BMI: icmp eq i64 %A, 0
-; BMI: call i64 @llvm.ctlz.i64(i64 %A, i1 true)
-; GENERIC: icmp eq i64 %A, 0
-; GENERIC: call i64 @llvm.ctlz.i64(i64 %A, i1 true)
-entry:
- %tobool = icmp eq i64 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
- ret i64 %cond
-}
-
-
-define i32 @test2(i32 %A) {
-; ALL-LABEL: @test2(
-; LZCNT: [[CTLZ:%[A-Za-z0-9]+]] = call i32 @llvm.ctlz.i32(i32 %A, i1 false)
-; LZCNT-NEXT: ret i32 [[CTLZ]]
-; BMI: icmp eq i32 %A, 0
-; BMI: call i32 @llvm.ctlz.i32(i32 %A, i1 true)
-; GENERIC: icmp eq i32 %A, 0
-; GENERIC: call i32 @llvm.ctlz.i32(i32 %A, i1 true)
-entry:
- %tobool = icmp eq i32 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
- ret i32 %cond
-}
-
-
-define signext i16 @test3(i16 signext %A) {
-; ALL-LABEL: @test3(
-; LZCNT: [[CTLZ:%[A-Za-z0-9]+]] = call i16 @llvm.ctlz.i16(i16 %A, i1 false)
-; LZCNT-NEXT: ret i16 [[CTLZ]]
-; BMI: icmp eq i16 %A, 0
-; BMI: call i16 @llvm.ctlz.i16(i16 %A, i1 true)
-; GENERIC: icmp eq i16 %A, 0
-; GENERIC: call i16 @llvm.ctlz.i16(i16 %A, i1 true)
-entry:
- %tobool = icmp eq i16 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
- ret i16 %cond
-}
-
-
-define i64 @test1b(i64 %A) {
-; ALL-LABEL: @test1b(
-; LZCNT: icmp eq i64 %A, 0
-; LZCNT: call i64 @llvm.cttz.i64(i64 %A, i1 true)
-; BMI: [[CTTZ:%[A-Za-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %A, i1 false)
-; BMI-NEXT: ret i64 [[CTTZ]]
-; GENERIC: icmp eq i64 %A, 0
-; GENERIC: call i64 @llvm.cttz.i64(i64 %A, i1 true)
-entry:
- %tobool = icmp eq i64 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
- ret i64 %cond
-}
-
-
-define i32 @test2b(i32 %A) {
-; ALL-LABEL: @test2b(
-; LZCNT: icmp eq i32 %A, 0
-; LZCNT: call i32 @llvm.cttz.i32(i32 %A, i1 true)
-; BMI: [[CTTZ:%[A-Za-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %A, i1 false)
-; BMI-NEXT: ret i32 [[CTTZ]]
-; GENERIC: icmp eq i32 %A, 0
-; GENERIC: call i32 @llvm.cttz.i32(i32 %A, i1 true)
-entry:
- %tobool = icmp eq i32 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
- ret i32 %cond
-}
-
-
-define signext i16 @test3b(i16 signext %A) {
-; ALL-LABEL: @test3b(
-; LZCNT: icmp eq i16 %A, 0
-; LZCNT: call i16 @llvm.cttz.i16(i16 %A, i1 true)
-; BMI: [[CTTZ:%[A-Za-z0-9]+]] = call i16 @llvm.cttz.i16(i16 %A, i1 false)
-; BMI-NEXT: ret i16 [[CTTZ]]
-; GENERIC: icmp eq i16 %A, 0
-; GENERIC: call i16 @llvm.cttz.i16(i16 %A, i1 true)
-entry:
- %tobool = icmp eq i16 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
- ret i16 %cond
-}
-
-
-define i64 @test1c(i64 %A) {
-; ALL-LABEL: @test1c(
-; ALL: icmp eq i64 %A, 0
-; ALL: call i64 @llvm.ctlz.i64(i64 %A, i1 true)
-entry:
- %tobool = icmp eq i64 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ]
- ret i64 %cond
-}
-
-define i32 @test2c(i32 %A) {
-; ALL-LABEL: @test2c(
-; ALL: icmp eq i32 %A, 0
-; ALL: call i32 @llvm.ctlz.i32(i32 %A, i1 true)
-entry:
- %tobool = icmp eq i32 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ]
- ret i32 %cond
-}
-
-
-define signext i16 @test3c(i16 signext %A) {
-; ALL-LABEL: @test3c(
-; ALL: icmp eq i16 %A, 0
-; ALL: call i16 @llvm.ctlz.i16(i16 %A, i1 true)
-entry:
- %tobool = icmp eq i16 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ]
- ret i16 %cond
-}
-
-
-define i64 @test1d(i64 %A) {
-; ALL-LABEL: @test1d(
-; ALL: icmp eq i64 %A, 0
-; ALL: call i64 @llvm.cttz.i64(i64 %A, i1 true)
-entry:
- %tobool = icmp eq i64 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ]
- ret i64 %cond
-}
-
-
-define i32 @test2d(i32 %A) {
-; ALL-LABEL: @test2d(
-; ALL: icmp eq i32 %A, 0
-; ALL: call i32 @llvm.cttz.i32(i32 %A, i1 true)
-entry:
- %tobool = icmp eq i32 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ]
- ret i32 %cond
-}
-
-
-define signext i16 @test3d(i16 signext %A) {
-; ALL-LABEL: @test3d(
-; ALL: icmp eq i16 %A, 0
-; ALL: call i16 @llvm.cttz.i16(i16 %A, i1 true)
-entry:
- %tobool = icmp eq i16 %A, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ]
- ret i16 %cond
-}
-
-; The following tests verify that calls to cttz/ctlz are speculated even if
-; basic block %cond.true has an extra zero extend/truncate which is "free"
-; for the target.
-
-define i64 @test1e(i32 %x) {
-; ALL-LABEL: @test1e(
-; LZCNT: icmp eq i32 %x, 0
-; LZCNT: call i32 @llvm.cttz.i32(i32 %x, i1 true)
-; BMI: call i32 @llvm.cttz.i32(i32 %x, i1 false)
-; GENERIC: icmp eq i32 %x, 0
-; GENERIC: call i32 @llvm.cttz.i32(i32 %x, i1 true)
-entry:
- %tobool = icmp eq i32 %x, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
- %phitmp2 = zext i32 %0 to i64
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i64 [ %phitmp2, %cond.true ], [ 32, %entry ]
- ret i64 %cond
-}
-
-define i32 @test2e(i64 %x) {
-; ALL-LABEL: @test2e(
-; LZCNT: icmp eq i64 %x, 0
-; LZCNT: call i64 @llvm.cttz.i64(i64 %x, i1 true)
-; BMI: call i64 @llvm.cttz.i64(i64 %x, i1 false)
-; GENERIC: icmp eq i64 %x, 0
-; GENERIC: call i64 @llvm.cttz.i64(i64 %x, i1 true)
-entry:
- %tobool = icmp eq i64 %x, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
- %cast = trunc i64 %0 to i32
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i32 [ %cast, %cond.true ], [ 64, %entry ]
- ret i32 %cond
-}
-
-define i64 @test3e(i32 %x) {
-; ALL-LABEL: @test3e(
-; BMI: icmp eq i32 %x, 0
-; BMI: call i32 @llvm.ctlz.i32(i32 %x, i1 true)
-; LZCNT: call i32 @llvm.ctlz.i32(i32 %x, i1 false)
-; GENERIC: icmp eq i32 %x, 0
-; GENERIC: call i32 @llvm.ctlz.i32(i32 %x, i1 true)
-entry:
- %tobool = icmp eq i32 %x, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
- %phitmp2 = zext i32 %0 to i64
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i64 [ %phitmp2, %cond.true ], [ 32, %entry ]
- ret i64 %cond
-}
-
-define i32 @test4e(i64 %x) {
-; ALL-LABEL: @test4e(
-; BMI: icmp eq i64 %x, 0
-; BMI: call i64 @llvm.ctlz.i64(i64 %x, i1 true)
-; LZCNT: call i64 @llvm.ctlz.i64(i64 %x, i1 false)
-; GENERIC: icmp eq i64 %x, 0
-; GENERIC: call i64 @llvm.ctlz.i64(i64 %x, i1 true)
-entry:
- %tobool = icmp eq i64 %x, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
- %cast = trunc i64 %0 to i32
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i32 [ %cast, %cond.true ], [ 64, %entry ]
- ret i32 %cond
-}
-
-define i16 @test5e(i64 %x) {
-; ALL-LABEL: @test5e(
-; BMI: icmp eq i64 %x, 0
-; BMI: call i64 @llvm.ctlz.i64(i64 %x, i1 true)
-; LZCNT: call i64 @llvm.ctlz.i64(i64 %x, i1 false)
-; GENERIC: icmp eq i64 %x, 0
-; GENERIC: call i64 @llvm.ctlz.i64(i64 %x, i1 true)
-entry:
- %tobool = icmp eq i64 %x, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
- %cast = trunc i64 %0 to i16
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i16 [ %cast, %cond.true ], [ 64, %entry ]
- ret i16 %cond
-}
-
-define i16 @test6e(i32 %x) {
-; ALL-LABEL: @test6e(
-; BMI: icmp eq i32 %x, 0
-; BMI: call i32 @llvm.ctlz.i32(i32 %x, i1 true)
-; LZCNT: call i32 @llvm.ctlz.i32(i32 %x, i1 false)
-; GENERIC: icmp eq i32 %x, 0
-; GENERIC: call i32 @llvm.ctlz.i32(i32 %x, i1 true)
-entry:
- %tobool = icmp eq i32 %x, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
- %cast = trunc i32 %0 to i16
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i16 [ %cast, %cond.true ], [ 32, %entry ]
- ret i16 %cond
-}
-
-define i16 @test7e(i64 %x) {
-; ALL-LABEL: @test7e(
-; LZCNT: icmp eq i64 %x, 0
-; LZCNT: call i64 @llvm.cttz.i64(i64 %x, i1 true)
-; BMI: call i64 @llvm.cttz.i64(i64 %x, i1 false)
-; GENERIC: icmp eq i64 %x, 0
-; GENERIC: call i64 @llvm.cttz.i64(i64 %x, i1 true)
-entry:
- %tobool = icmp eq i64 %x, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
- %cast = trunc i64 %0 to i16
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i16 [ %cast, %cond.true ], [ 64, %entry ]
- ret i16 %cond
-}
-
-define i16 @test8e(i32 %x) {
-; ALL-LABEL: @test8e(
-; LZCNT: icmp eq i32 %x, 0
-; LZCNT: call i32 @llvm.cttz.i32(i32 %x, i1 true)
-; BMI: call i32 @llvm.cttz.i32(i32 %x, i1 false)
-; GENERIC: icmp eq i32 %x, 0
-; GENERIC: call i32 @llvm.cttz.i32(i32 %x, i1 true)
-entry:
- %tobool = icmp eq i32 %x, 0
- br i1 %tobool, label %cond.end, label %cond.true
-
-cond.true: ; preds = %entry
- %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
- %cast = trunc i32 %0 to i16
- br label %cond.end
-
-cond.end: ; preds = %entry, %cond.true
- %cond = phi i16 [ %cast, %cond.true ], [ 32, %entry ]
- ret i16 %cond
-}
-
-
-declare i64 @llvm.ctlz.i64(i64, i1)
-declare i32 @llvm.ctlz.i32(i32, i1)
-declare i16 @llvm.ctlz.i16(i16, i1)
-declare i64 @llvm.cttz.i64(i64, i1)
-declare i32 @llvm.cttz.i32(i32, i1)
-declare i16 @llvm.cttz.i16(i16, i1)
diff --git a/test/CodeGen/X86/cvt16.ll b/test/CodeGen/X86/cvt16.ll
index 4d920e2d23d2..5ee399fc137a 100644
--- a/test/CodeGen/X86/cvt16.ll
+++ b/test/CodeGen/X86/cvt16.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c | FileCheck %s -check-prefix=CHECK -check-prefix=LIBCALL
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c | FileCheck %s -check-prefix=CHECK -check-prefix=F16C
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -soft-float=1 -mattr=-f16c | FileCheck %s -check-prefix=CHECK -check-prefix=SOFTFLOAT
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -soft-float=1 -mattr=+f16c | FileCheck %s -check-prefix=CHECK -check-prefix=SOFTFLOAT
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c,+soft-float | FileCheck %s -check-prefix=CHECK -check-prefix=SOFTFLOAT
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c,+soft-float | FileCheck %s -check-prefix=CHECK -check-prefix=SOFTFLOAT
; This is a test for float to half float conversions on x86-64.
;
@@ -33,7 +33,7 @@ define void @test1(float %src, i16* %dest) {
define float @test2(i16* nocapture %src) {
- %1 = load i16* %src, align 2
+ %1 = load i16, i16* %src, align 2
%2 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
ret float %2
}
@@ -60,7 +60,7 @@ define float @test3(float %src) nounwind uwtable readnone {
; F16C: ret
define double @test4(i16* nocapture %src) {
- %1 = load i16* %src, align 2
+ %1 = load i16, i16* %src, align 2
%2 = tail call double @llvm.convert.from.fp16.f64(i16 %1)
ret double %2
}
diff --git a/test/CodeGen/X86/dag-optnone.ll b/test/CodeGen/X86/dag-optnone.ll
index 897de31a6fd3..f7774e6e8c54 100644
--- a/test/CodeGen/X86/dag-optnone.ll
+++ b/test/CodeGen/X86/dag-optnone.ll
@@ -63,7 +63,7 @@ entry:
define void @bar() #1 {
entry:
%id83 = alloca <16 x i8>, align 16
- %0 = load <16 x i32>* @id84, align 64
+ %0 = load <16 x i32>, <16 x i32>* @id84, align 64
%conv = trunc <16 x i32> %0 to <16 x i8>
store <16 x i8> %conv, <16 x i8>* %id83, align 16
ret void
diff --git a/test/CodeGen/X86/dagcombine-and-setcc.ll b/test/CodeGen/X86/dagcombine-and-setcc.ll
index e7336a90dbdd..57adc8bc5daa 100644
--- a/test/CodeGen/X86/dagcombine-and-setcc.ll
+++ b/test/CodeGen/X86/dagcombine-and-setcc.ll
@@ -39,7 +39,7 @@ ret2:
define i32 @main(i32 %argc, i8** nocapture readnone %argv) {
%res = alloca i32, align 4
%t = call i32 @foo(i32 1, i32 2, i32* %res) #3
- %v = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %t)
+ %v = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %t)
ret i32 0
}
diff --git a/test/CodeGen/X86/dagcombine-buildvector.ll b/test/CodeGen/X86/dagcombine-buildvector.ll
index cf631c353fce..3a6231ade1ab 100644
--- a/test/CodeGen/X86/dagcombine-buildvector.ll
+++ b/test/CodeGen/X86/dagcombine-buildvector.ll
@@ -17,7 +17,7 @@ entry:
; CHECK: movdqa
define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind {
entry:
- %tmp1 = load <4 x i16>* %src
+ %tmp1 = load <4 x i16>, <4 x i16>* %src
%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
%0 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3)
store <4 x i32> %0, <4 x i32>* %dest
diff --git a/test/CodeGen/X86/dagcombine-cse.ll b/test/CodeGen/X86/dagcombine-cse.ll
index 75d3d93ddb89..be1dcff7ae85 100644
--- a/test/CodeGen/X86/dagcombine-cse.ll
+++ b/test/CodeGen/X86/dagcombine-cse.ll
@@ -5,14 +5,14 @@ define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) n
entry:
%tmp7 = mul i32 %idxY, %ref_frame_stride ; <i32> [#uses=2]
%tmp9 = add i32 %tmp7, %idxX ; <i32> [#uses=1]
- %tmp11 = getelementptr i8* %ref_frame_ptr, i32 %tmp9 ; <i8*> [#uses=1]
+ %tmp11 = getelementptr i8, i8* %ref_frame_ptr, i32 %tmp9 ; <i8*> [#uses=1]
%tmp1112 = bitcast i8* %tmp11 to i32* ; <i32*> [#uses=1]
- %tmp13 = load i32* %tmp1112, align 4 ; <i32> [#uses=1]
+ %tmp13 = load i32, i32* %tmp1112, align 4 ; <i32> [#uses=1]
%tmp18 = add i32 %idxX, 4 ; <i32> [#uses=1]
%tmp20.sum = add i32 %tmp18, %tmp7 ; <i32> [#uses=1]
- %tmp21 = getelementptr i8* %ref_frame_ptr, i32 %tmp20.sum ; <i8*> [#uses=1]
+ %tmp21 = getelementptr i8, i8* %ref_frame_ptr, i32 %tmp20.sum ; <i8*> [#uses=1]
%tmp2122 = bitcast i8* %tmp21 to i16* ; <i16*> [#uses=1]
- %tmp23 = load i16* %tmp2122, align 2 ; <i16> [#uses=1]
+ %tmp23 = load i16, i16* %tmp2122, align 2 ; <i16> [#uses=1]
%tmp2425 = zext i16 %tmp23 to i64 ; <i64> [#uses=1]
%tmp26 = shl i64 %tmp2425, 32 ; <i64> [#uses=1]
%tmp2728 = zext i32 %tmp13 to i64 ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/darwin-quote.ll b/test/CodeGen/X86/darwin-quote.ll
index 8fddc118f61e..c912c92e1e8b 100644
--- a/test/CodeGen/X86/darwin-quote.ll
+++ b/test/CodeGen/X86/darwin-quote.ll
@@ -2,7 +2,7 @@
define internal i64 @baz() nounwind {
- %tmp = load i64* @"+x"
+ %tmp = load i64, i64* @"+x"
ret i64 %tmp
; CHECK: _baz:
; CHECK: movl "L_+x$non_lazy_ptr", %ecx
diff --git a/test/CodeGen/X86/darwin-stub.ll b/test/CodeGen/X86/darwin-stub.ll
index b4d2e1aa566d..607f56fdd60b 100644
--- a/test/CodeGen/X86/darwin-stub.ll
+++ b/test/CodeGen/X86/darwin-stub.ll
@@ -5,7 +5,7 @@
define i32 @main() nounwind {
entry:
- %0 = tail call i32 @puts(i8* getelementptr ([13 x i8]* @"\01LC", i32 0, i32 0)) nounwind ; <i32> [#uses=0]
+ %0 = tail call i32 @puts(i8* getelementptr ([13 x i8], [13 x i8]* @"\01LC", i32 0, i32 0)) nounwind ; <i32> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll b/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
index 16d8f97c3a21..20d0129c3e89 100644
--- a/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
+++ b/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
@@ -52,49 +52,49 @@ define void @_Z3barii(i32 %param1, i32 %param2) #0 {
entry:
%var1 = alloca %struct.AAA3, align 1
%var2 = alloca %struct.AAA3, align 1
- tail call void @llvm.dbg.value(metadata i32 %param1, i64 0, metadata !30, metadata !{!"0x102"}), !dbg !47
- tail call void @llvm.dbg.value(metadata i32 %param2, i64 0, metadata !31, metadata !{!"0x102"}), !dbg !47
- tail call void @llvm.dbg.value(metadata i8* null, i64 0, metadata !32, metadata !{!"0x102"}), !dbg !49
+ tail call void @llvm.dbg.value(metadata i32 %param1, i64 0, metadata !30, metadata !DIExpression()), !dbg !47
+ tail call void @llvm.dbg.value(metadata i32 %param2, i64 0, metadata !31, metadata !DIExpression()), !dbg !47
+ tail call void @llvm.dbg.value(metadata i8* null, i64 0, metadata !32, metadata !DIExpression()), !dbg !49
%tobool = icmp eq i32 %param2, 0, !dbg !50
br i1 %tobool, label %if.end, label %if.then, !dbg !50
if.then: ; preds = %entry
%call = tail call i8* @_Z5i2stri(i32 %param2), !dbg !52
- tail call void @llvm.dbg.value(metadata i8* %call, i64 0, metadata !32, metadata !{!"0x102"}), !dbg !49
+ tail call void @llvm.dbg.value(metadata i8* %call, i64 0, metadata !32, metadata !DIExpression()), !dbg !49
br label %if.end, !dbg !54
if.end: ; preds = %entry, %if.then
- tail call void @llvm.dbg.value(metadata %struct.AAA3* %var1, i64 0, metadata !33, metadata !{!"0x102"}), !dbg !55
- tail call void @llvm.dbg.value(metadata %struct.AAA3* %var1, i64 0, metadata !56, metadata !{!"0x102"}), !dbg !57
- tail call void @llvm.dbg.value(metadata !58, i64 0, metadata !59, metadata !{!"0x102"}), !dbg !60
- %arraydecay.i = getelementptr inbounds %struct.AAA3* %var1, i64 0, i32 0, i64 0, !dbg !61
- call void @_Z3fooPcjPKc(i8* %arraydecay.i, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)), !dbg !61
- call void @llvm.dbg.value(metadata %struct.AAA3* %var2, i64 0, metadata !34, metadata !{!"0x102"}), !dbg !63
- call void @llvm.dbg.value(metadata %struct.AAA3* %var2, i64 0, metadata !64, metadata !{!"0x102"}), !dbg !65
- call void @llvm.dbg.value(metadata !58, i64 0, metadata !66, metadata !{!"0x102"}), !dbg !67
- %arraydecay.i5 = getelementptr inbounds %struct.AAA3* %var2, i64 0, i32 0, i64 0, !dbg !68
- call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)), !dbg !68
+ tail call void @llvm.dbg.value(metadata %struct.AAA3* %var1, i64 0, metadata !33, metadata !DIExpression()), !dbg !55
+ tail call void @llvm.dbg.value(metadata %struct.AAA3* %var1, i64 0, metadata !56, metadata !DIExpression()), !dbg !57
+ tail call void @llvm.dbg.value(metadata !58, i64 0, metadata !59, metadata !DIExpression()), !dbg !60
+ %arraydecay.i = getelementptr inbounds %struct.AAA3, %struct.AAA3* %var1, i64 0, i32 0, i64 0, !dbg !61
+ call void @_Z3fooPcjPKc(i8* %arraydecay.i, i32 4, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i64 0, i64 0)), !dbg !61
+ call void @llvm.dbg.value(metadata %struct.AAA3* %var2, i64 0, metadata !34, metadata !DIExpression()), !dbg !63
+ call void @llvm.dbg.value(metadata %struct.AAA3* %var2, i64 0, metadata !64, metadata !DIExpression()), !dbg !65
+ call void @llvm.dbg.value(metadata !58, i64 0, metadata !66, metadata !DIExpression()), !dbg !67
+ %arraydecay.i5 = getelementptr inbounds %struct.AAA3, %struct.AAA3* %var2, i64 0, i32 0, i64 0, !dbg !68
+ call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i64 0, i64 0)), !dbg !68
%tobool1 = icmp eq i32 %param1, 0, !dbg !69
- call void @llvm.dbg.value(metadata %struct.AAA3* %var2, i64 0, metadata !34, metadata !{!"0x102"}), !dbg !63
+ call void @llvm.dbg.value(metadata %struct.AAA3* %var2, i64 0, metadata !34, metadata !DIExpression()), !dbg !63
br i1 %tobool1, label %if.else, label %if.then2, !dbg !69
if.then2: ; preds = %if.end
- call void @llvm.dbg.value(metadata %struct.AAA3* %var2, i64 0, metadata !71, metadata !{!"0x102"}), !dbg !73
- call void @llvm.dbg.value(metadata !74, i64 0, metadata !75, metadata !{!"0x102"}), !dbg !76
- call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([2 x i8]* @.str1, i64 0, i64 0)), !dbg !76
+ call void @llvm.dbg.value(metadata %struct.AAA3* %var2, i64 0, metadata !71, metadata !DIExpression()), !dbg !73
+ call void @llvm.dbg.value(metadata !74, i64 0, metadata !75, metadata !DIExpression()), !dbg !76
+ call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str1, i64 0, i64 0)), !dbg !76
br label %if.end3, !dbg !72
if.else: ; preds = %if.end
- call void @llvm.dbg.value(metadata %struct.AAA3* %var2, i64 0, metadata !77, metadata !{!"0x102"}), !dbg !79
- call void @llvm.dbg.value(metadata !80, i64 0, metadata !81, metadata !{!"0x102"}), !dbg !82
- call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([2 x i8]* @.str2, i64 0, i64 0)), !dbg !82
+ call void @llvm.dbg.value(metadata %struct.AAA3* %var2, i64 0, metadata !77, metadata !DIExpression()), !dbg !79
+ call void @llvm.dbg.value(metadata !80, i64 0, metadata !81, metadata !DIExpression()), !dbg !82
+ call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str2, i64 0, i64 0)), !dbg !82
br label %if.end3
if.end3: ; preds = %if.else, %if.then2
- call void @llvm.dbg.value(metadata %struct.AAA3* %var1, i64 0, metadata !33, metadata !{!"0x102"}), !dbg !55
- call void @llvm.dbg.value(metadata %struct.AAA3* %var1, i64 0, metadata !83, metadata !{!"0x102"}), !dbg !85
- call void @llvm.dbg.value(metadata !58, i64 0, metadata !86, metadata !{!"0x102"}), !dbg !87
- call void @_Z3fooPcjPKc(i8* %arraydecay.i, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)), !dbg !87
+ call void @llvm.dbg.value(metadata %struct.AAA3* %var1, i64 0, metadata !33, metadata !DIExpression()), !dbg !55
+ call void @llvm.dbg.value(metadata %struct.AAA3* %var1, i64 0, metadata !83, metadata !DIExpression()), !dbg !85
+ call void @llvm.dbg.value(metadata !58, i64 0, metadata !86, metadata !DIExpression()), !dbg !87
+ call void @_Z3fooPcjPKc(i8* %arraydecay.i, i32 4, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i64 0, i64 0)), !dbg !87
ret void, !dbg !88
}
@@ -113,92 +113,92 @@ attributes #2 = { nounwind readnone }
!llvm.module.flags = !{!44, !45}
!llvm.ident = !{!46}
-!0 = !{!"0x11\004\00clang version 3.5.0 \001\00\000\00\001", !1, !2, !3, !23, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp] [DW_LANG_C_plus_plus]
-!1 = !{!"dbg-changes-codegen-branch-folding.cpp", !"/tmp/dbginfo"}
+!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !23, globals: !2, imports: !2)
+!1 = !DIFile(filename: "dbg-changes-codegen-branch-folding.cpp", directory: "/tmp/dbginfo")
!2 = !{}
!3 = !{!4}
-!4 = !{!"0x13\00AAA3\004\0032\008\000\000\000", !1, null, null, !5, null, null, !"_ZTS4AAA3"} ; [ DW_TAG_structure_type ] [AAA3] [line 4, size 32, align 8, offset 0] [def] [from ]
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "AAA3", line: 4, size: 32, align: 8, file: !1, elements: !5, identifier: "_ZTS4AAA3")
!5 = !{!6, !11, !17, !18}
-!6 = !{!"0xd\00text\008\0032\008\000\000", !1, !"_ZTS4AAA3", !7} ; [ DW_TAG_member ] [text] [line 8, size 32, align 8, offset 0] [from ]
-!7 = !{!"0x1\00\000\0032\008\000\000", null, null, !8, !9, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32, align 8, offset 0] [from char]
-!8 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!6 = !DIDerivedType(tag: DW_TAG_member, name: "text", line: 8, size: 32, align: 8, file: !1, scope: !"_ZTS4AAA3", baseType: !7)
+!7 = !DICompositeType(tag: DW_TAG_array_type, size: 32, align: 8, baseType: !8, elements: !9)
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
!9 = !{!10}
-!10 = !{!"0x21\000\004"} ; [ DW_TAG_subrange_type ] [0, 3]
-!11 = !{!"0x2e\00AAA3\00AAA3\00\005\000\000\000\006\00256\001\005", !1, !"_ZTS4AAA3", !12, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 5] [AAA3]
-!12 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !13, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = !DISubrange(count: 4)
+!11 = !DISubprogram(name: "AAA3", line: 5, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !1, scope: !"_ZTS4AAA3", type: !12)
+!12 = !DISubroutineType(types: !13)
!13 = !{null, !14, !15}
-!14 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS4AAA3"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS4AAA3]
-!15 = !{!"0xf\00\000\0064\0064\000\000", null, null, !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!16 = !{!"0x26\00\000\000\000\000\000", null, null, !8} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from char]
-!17 = !{!"0x2e\00operator=\00operator=\00_ZN4AAA3aSEPKc\006\000\000\000\006\00256\001\006", !1, !"_ZTS4AAA3", !12, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 6] [operator=]
-!18 = !{!"0x2e\00operator const char *\00operator const char *\00_ZNK4AAA3cvPKcEv\007\000\000\000\006\00256\001\007", !1, !"_ZTS4AAA3", !19, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 7] [operator const char *]
-!19 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !20, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS4AAA3")
+!15 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !16)
+!16 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !8)
+!17 = !DISubprogram(name: "operator=", linkageName: "_ZN4AAA3aSEPKc", line: 6, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !"_ZTS4AAA3", type: !12)
+!18 = !DISubprogram(name: "operator const char *", linkageName: "_ZNK4AAA3cvPKcEv", line: 7, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !1, scope: !"_ZTS4AAA3", type: !19)
+!19 = !DISubroutineType(types: !20)
!20 = !{!15, !21}
-!21 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !22} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from ]
-!22 = !{!"0x26\00\000\000\000\000\000", null, null, !"_ZTS4AAA3"} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from _ZTS4AAA3]
+!21 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !22)
+!22 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !"_ZTS4AAA3")
!23 = !{!24, !35, !40}
-!24 = !{!"0x2e\00bar\00bar\00_Z3barii\0011\000\001\000\006\00256\001\0011", !1, !25, !26, null, void (i32, i32)* @_Z3barii, null, null, !29} ; [ DW_TAG_subprogram ] [line 11] [def] [bar]
-!25 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp]
-!26 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !27, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!24 = !DISubprogram(name: "bar", linkageName: "_Z3barii", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !1, scope: !25, type: !26, function: void (i32, i32)* @_Z3barii, variables: !29)
+!25 = !DIFile(filename: "dbg-changes-codegen-branch-folding.cpp", directory: "/tmp/dbginfo")
+!26 = !DISubroutineType(types: !27)
!27 = !{null, !28, !28}
-!28 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!28 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!29 = !{!30, !31, !32, !33, !34}
-!30 = !{!"0x101\00param1\0016777227\000", !24, !25, !28} ; [ DW_TAG_arg_variable ] [param1] [line 11]
-!31 = !{!"0x101\00param2\0033554443\000", !24, !25, !28} ; [ DW_TAG_arg_variable ] [param2] [line 11]
-!32 = !{!"0x100\00temp\0012\000", !24, !25, !15} ; [ DW_TAG_auto_variable ] [temp] [line 12]
-!33 = !{!"0x100\00var1\0017\000", !24, !25, !"_ZTS4AAA3"} ; [ DW_TAG_auto_variable ] [var1] [line 17]
-!34 = !{!"0x100\00var2\0018\000", !24, !25, !"_ZTS4AAA3"} ; [ DW_TAG_auto_variable ] [var2] [line 18]
-!35 = !{!"0x2e\00operator=\00operator=\00_ZN4AAA3aSEPKc\006\000\001\000\006\00256\001\006", !1, !"_ZTS4AAA3", !12, null, null, null, !17, !36} ; [ DW_TAG_subprogram ] [line 6] [def] [operator=]
+!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "param1", line: 11, arg: 1, scope: !24, file: !25, type: !28)
+!31 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "param2", line: 11, arg: 2, scope: !24, file: !25, type: !28)
+!32 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "temp", line: 12, scope: !24, file: !25, type: !15)
+!33 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "var1", line: 17, scope: !24, file: !25, type: !"_ZTS4AAA3")
+!34 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "var2", line: 18, scope: !24, file: !25, type: !"_ZTS4AAA3")
+!35 = !DISubprogram(name: "operator=", linkageName: "_ZN4AAA3aSEPKc", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !"_ZTS4AAA3", type: !12, declaration: !17, variables: !36)
!36 = !{!37, !39}
-!37 = !{!"0x101\00this\0016777216\001088", !35, null, !38} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!38 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS4AAA3"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS4AAA3]
-!39 = !{!"0x101\00value\0033554438\000", !35, !25, !15} ; [ DW_TAG_arg_variable ] [value] [line 6]
-!40 = !{!"0x2e\00AAA3\00AAA3\00_ZN4AAA3C2EPKc\005\000\001\000\006\00256\001\005", !1, !"_ZTS4AAA3", !12, null, null, null, !11, !41} ; [ DW_TAG_subprogram ] [line 5] [def] [AAA3]
+!37 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
+!38 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS4AAA3")
+!39 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
+!40 = !DISubprogram(name: "AAA3", linkageName: "_ZN4AAA3C2EPKc", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !1, scope: !"_ZTS4AAA3", type: !12, declaration: !11, variables: !41)
!41 = !{!42, !43}
-!42 = !{!"0x101\00this\0016777216\001088", !40, null, !38} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!43 = !{!"0x101\00value\0033554437\000", !40, !25, !15} ; [ DW_TAG_arg_variable ] [value] [line 5]
+!42 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !40, type: !38)
+!43 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 5, arg: 2, scope: !40, file: !25, type: !15)
!44 = !{i32 2, !"Dwarf Version", i32 4}
-!45 = !{i32 2, !"Debug Info Version", i32 2}
+!45 = !{i32 2, !"Debug Info Version", i32 3}
!46 = !{!"clang version 3.5.0 "}
-!47 = !MDLocation(line: 11, scope: !24)
+!47 = !DILocation(line: 11, scope: !24)
!48 = !{i8* null}
-!49 = !MDLocation(line: 12, scope: !24)
-!50 = !MDLocation(line: 14, scope: !51)
-!51 = !{!"0xb\0014\000\000", !1, !24} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp]
-!52 = !MDLocation(line: 15, scope: !53)
-!53 = !{!"0xb\0014\000\000", !1, !51} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp]
-!54 = !MDLocation(line: 16, scope: !53)
-!55 = !MDLocation(line: 17, scope: !24)
-!56 = !{!"0x101\00this\0016777216\001088", !40, null, !38, !55} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!57 = !MDLocation(line: 0, scope: !40, inlinedAt: !55)
-!58 = !{i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)}
-!59 = !{!"0x101\00value\0033554437\000", !40, !25, !15, !55} ; [ DW_TAG_arg_variable ] [value] [line 5]
-!60 = !MDLocation(line: 5, scope: !40, inlinedAt: !55)
-!61 = !MDLocation(line: 5, scope: !62, inlinedAt: !55)
-!62 = !{!"0xb\005\000\000", !1, !40} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp]
-!63 = !MDLocation(line: 18, scope: !24)
-!64 = !{!"0x101\00this\0016777216\001088", !40, null, !38, !63} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!65 = !MDLocation(line: 0, scope: !40, inlinedAt: !63)
-!66 = !{!"0x101\00value\0033554437\000", !40, !25, !15, !63} ; [ DW_TAG_arg_variable ] [value] [line 5]
-!67 = !MDLocation(line: 5, scope: !40, inlinedAt: !63)
-!68 = !MDLocation(line: 5, scope: !62, inlinedAt: !63)
-!69 = !MDLocation(line: 20, scope: !70)
-!70 = !{!"0xb\0020\000\000", !1, !24} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp]
-!71 = !{!"0x101\00this\0016777216\001088", !35, null, !38, !72} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!72 = !MDLocation(line: 21, scope: !70)
-!73 = !MDLocation(line: 0, scope: !35, inlinedAt: !72)
-!74 = !{i8* getelementptr inbounds ([2 x i8]* @.str1, i64 0, i64 0)}
-!75 = !{!"0x101\00value\0033554438\000", !35, !25, !15, !72} ; [ DW_TAG_arg_variable ] [value] [line 6]
-!76 = !MDLocation(line: 6, scope: !35, inlinedAt: !72)
-!77 = !{!"0x101\00this\0016777216\001088", !35, null, !38, !78} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!78 = !MDLocation(line: 23, scope: !70)
-!79 = !MDLocation(line: 0, scope: !35, inlinedAt: !78)
-!80 = !{i8* getelementptr inbounds ([2 x i8]* @.str2, i64 0, i64 0)}
-!81 = !{!"0x101\00value\0033554438\000", !35, !25, !15, !78} ; [ DW_TAG_arg_variable ] [value] [line 6]
-!82 = !MDLocation(line: 6, scope: !35, inlinedAt: !78)
-!83 = !{!"0x101\00this\0016777216\001088", !35, null, !38, !84} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!84 = !MDLocation(line: 24, scope: !24)
-!85 = !MDLocation(line: 0, scope: !35, inlinedAt: !84)
-!86 = !{!"0x101\00value\0033554438\000", !35, !25, !15, !84} ; [ DW_TAG_arg_variable ] [value] [line 6]
-!87 = !MDLocation(line: 6, scope: !35, inlinedAt: !84)
-!88 = !MDLocation(line: 25, scope: !24)
+!49 = !DILocation(line: 12, scope: !24)
+!50 = !DILocation(line: 14, scope: !51)
+!51 = distinct !DILexicalBlock(line: 14, column: 0, file: !1, scope: !24)
+!52 = !DILocation(line: 15, scope: !53)
+!53 = distinct !DILexicalBlock(line: 14, column: 0, file: !1, scope: !51)
+!54 = !DILocation(line: 16, scope: !53)
+!55 = !DILocation(line: 17, scope: !24)
+!56 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !40, type: !38)
+!57 = !DILocation(line: 0, scope: !40, inlinedAt: !55)
+!58 = !{i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i64 0, i64 0)}
+!59 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 5, arg: 2, scope: !40, file: !25, type: !15)
+!60 = !DILocation(line: 5, scope: !40, inlinedAt: !55)
+!61 = !DILocation(line: 5, scope: !62, inlinedAt: !55)
+!62 = distinct !DILexicalBlock(line: 5, column: 0, file: !1, scope: !40)
+!63 = !DILocation(line: 18, scope: !24)
+!64 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !40, type: !38)
+!65 = !DILocation(line: 0, scope: !40, inlinedAt: !63)
+!66 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 5, arg: 2, scope: !40, file: !25, type: !15)
+!67 = !DILocation(line: 5, scope: !40, inlinedAt: !63)
+!68 = !DILocation(line: 5, scope: !62, inlinedAt: !63)
+!69 = !DILocation(line: 20, scope: !70)
+!70 = distinct !DILexicalBlock(line: 20, column: 0, file: !1, scope: !24)
+!71 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
+!72 = !DILocation(line: 21, scope: !70)
+!73 = !DILocation(line: 0, scope: !35, inlinedAt: !72)
+!74 = !{i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str1, i64 0, i64 0)}
+!75 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
+!76 = !DILocation(line: 6, scope: !35, inlinedAt: !72)
+!77 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
+!78 = !DILocation(line: 23, scope: !70)
+!79 = !DILocation(line: 0, scope: !35, inlinedAt: !78)
+!80 = !{i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str2, i64 0, i64 0)}
+!81 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
+!82 = !DILocation(line: 6, scope: !35, inlinedAt: !78)
+!83 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
+!84 = !DILocation(line: 24, scope: !24)
+!85 = !DILocation(line: 0, scope: !35, inlinedAt: !84)
+!86 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
+!87 = !DILocation(line: 6, scope: !35, inlinedAt: !84)
+!88 = !DILocation(line: 25, scope: !24)
diff --git a/test/CodeGen/X86/dbg-changes-codegen.ll b/test/CodeGen/X86/dbg-changes-codegen.ll
index 2179667245f1..b15e4bd4bf2d 100644
--- a/test/CodeGen/X86/dbg-changes-codegen.ll
+++ b/test/CodeGen/X86/dbg-changes-codegen.ll
@@ -43,8 +43,8 @@
; Function Attrs: nounwind readonly uwtable
define zeroext i1 @_ZN3Foo3batEv(%struct.Foo* %this) #0 align 2 {
entry:
- %0 = load %struct.Foo** @pfoo, align 8
- tail call void @llvm.dbg.value(metadata %struct.Foo* %0, i64 0, metadata !62, metadata !{!"0x102"})
+ %0 = load %struct.Foo*, %struct.Foo** @pfoo, align 8
+ tail call void @llvm.dbg.value(metadata %struct.Foo* %0, i64 0, metadata !62, metadata !DIExpression()), !dbg !DILocation(scope: !DISubprogram())
%cmp.i = icmp eq %struct.Foo* %0, %this
ret i1 %cmp.i
}
@@ -52,9 +52,9 @@ entry:
; Function Attrs: nounwind uwtable
define void @_Z3bazv() #1 {
entry:
- %0 = load %struct.Wibble** @wibble1, align 8
- tail call void @llvm.dbg.value(metadata %struct.Flibble* undef, i64 0, metadata !65, metadata !{!"0x102"})
- %1 = load %struct.Wibble** @wibble2, align 8
+ %0 = load %struct.Wibble*, %struct.Wibble** @wibble1, align 8
+ tail call void @llvm.dbg.value(metadata %struct.Flibble* undef, i64 0, metadata !65, metadata !DIExpression()), !dbg !DILocation(scope: !DISubprogram())
+ %1 = load %struct.Wibble*, %struct.Wibble** @wibble2, align 8
%cmp.i = icmp ugt %struct.Wibble* %1, %0
br i1 %cmp.i, label %if.then.i, label %_ZN7Flibble3barEP6Wibble.exit
@@ -63,7 +63,7 @@ if.then.i: ; preds = %entry
br label %_ZN7Flibble3barEP6Wibble.exit
_ZN7Flibble3barEP6Wibble.exit: ; preds = %entry, %if.then.i
- %x.i = getelementptr inbounds %struct.Wibble* %0, i64 0, i32 0
+ %x.i = getelementptr inbounds %struct.Wibble, %struct.Wibble* %0, i64 0, i32 0
store i32 0, i32* %x.i, align 4
ret void
}
@@ -76,8 +76,8 @@ attributes #1 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
attributes #2 = { nounwind readnone }
-!17 = !{!"0x10\00\000\000\000\000\000", null, null, null} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from Foo]
-!45 = !{!"0xf\00\000\0064\0064\000\000", null, null, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from Flibble]
-!62 = !{!"0x101\00arg\0033554436\000", null, null, !17} ; [ DW_TAG_arg_variable ] [arg] [line 4]
+!17 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: null)
+!45 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: null)
+!62 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "arg", line: 4, arg: 2, scope: !DISubprogram(), type: !17)
!64 = !{%struct.Flibble* undef}
-!65 = !{!"0x101\00this\0016777229\001088", null, null, !45} ; [ DW_TAG_arg_variable ] [this] [line 13]
+!65 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 13, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !DISubprogram(), type: !45)
diff --git a/test/CodeGen/X86/dbg-combine.ll b/test/CodeGen/X86/dbg-combine.ll
new file mode 100644
index 000000000000..5eb2ea9df513
--- /dev/null
+++ b/test/CodeGen/X86/dbg-combine.ll
@@ -0,0 +1,113 @@
+; RUN: llc -mtriple x86_64-pc-linux -O0 < %s | FileCheck %s
+
+; Make sure that the sequence of debug locations for function foo is correctly
+; generated. More specifically, .loc entries for lines 4,5,6,7 must appear in
+; the correct sequence.
+
+; $ clang -emit-llvm -S -g dbg-combine.c
+; 1. int foo()
+; 2. {
+; 3. int elems = 3;
+; 4. int array1[elems];
+; 5. array1[0]=0;
+; 6. array1[1]=1;
+; 7. array1[2]=2;
+; 8. int array2[elems];
+; 9. array2[0]=1;
+; 10. return array2[0];
+; 11. }
+
+; CHECK: .loc 1 4
+; CHECK: .loc 1 5
+; CHECK: .loc 1 6
+; CHECK: .loc 1 7
+
+; ModuleID = 'dbg-combine.c'
+; Function Attrs: nounwind uwtable
+define i32 @foo() #0 {
+entry:
+ %elems = alloca i32, align 4
+ %saved_stack = alloca i8*
+ %cleanup.dest.slot = alloca i32
+ call void @llvm.dbg.declare(metadata i32* %elems, metadata !12, metadata !13), !dbg !14
+ store i32 3, i32* %elems, align 4, !dbg !14
+ %0 = load i32, i32* %elems, align 4, !dbg !15
+ %1 = zext i32 %0 to i64, !dbg !16
+ %2 = call i8* @llvm.stacksave(), !dbg !16
+ store i8* %2, i8** %saved_stack, !dbg !16
+ %vla = alloca i32, i64 %1, align 16, !dbg !16
+ call void @llvm.dbg.declare(metadata i32* %vla, metadata !17, metadata !21), !dbg !22
+ %arrayidx = getelementptr inbounds i32, i32* %vla, i64 0, !dbg !23
+ store i32 0, i32* %arrayidx, align 4, !dbg !24
+ %arrayidx1 = getelementptr inbounds i32, i32* %vla, i64 1, !dbg !25
+ store i32 1, i32* %arrayidx1, align 4, !dbg !26
+ %arrayidx2 = getelementptr inbounds i32, i32* %vla, i64 2, !dbg !27
+ store i32 2, i32* %arrayidx2, align 4, !dbg !28
+ %3 = load i32, i32* %elems, align 4, !dbg !29
+ %4 = zext i32 %3 to i64, !dbg !30
+ %vla3 = alloca i32, i64 %4, align 16, !dbg !30
+ call void @llvm.dbg.declare(metadata i32* %vla3, metadata !31, metadata !21), !dbg !32
+ %arrayidx4 = getelementptr inbounds i32, i32* %vla3, i64 0, !dbg !33
+ store i32 1, i32* %arrayidx4, align 4, !dbg !34
+ %arrayidx5 = getelementptr inbounds i32, i32* %vla3, i64 0, !dbg !35
+ %5 = load i32, i32* %arrayidx5, align 4, !dbg !35
+ store i32 1, i32* %cleanup.dest.slot
+ %6 = load i8*, i8** %saved_stack, !dbg !36
+ call void @llvm.stackrestore(i8* %6), !dbg !36
+ ret i32 %5, !dbg !36
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind
+declare i8* @llvm.stacksave() #2
+
+; Function Attrs: nounwind
+declare void @llvm.stackrestore(i8*) #2
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 227074)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "dbg-combine.c", directory: "/home/probinson/projects/scratch")
+!2 = !{}
+!3 = !{!4}
+!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 ()* @foo, variables: !2)
+!5 = !DIFile(filename: "dbg-combine.c", directory: "/home/probinson/projects/scratch")
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8}
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{!"clang version 3.7.0 (trunk 227074)"}
+!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "elems", line: 3, scope: !4, file: !5, type: !8)
+!13 = !DIExpression()
+!14 = !DILocation(line: 3, column: 8, scope: !4)
+!15 = !DILocation(line: 4, column: 15, scope: !4)
+!16 = !DILocation(line: 4, column: 4, scope: !4)
+!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "array1", line: 4, scope: !4, file: !5, type: !18)
+!18 = !DICompositeType(tag: DW_TAG_array_type, align: 32, baseType: !8, elements: !19)
+!19 = !{!20}
+!20 = !DISubrange(count: -1)
+!21 = !DIExpression(DW_OP_deref)
+!22 = !DILocation(line: 4, column: 8, scope: !4)
+!23 = !DILocation(line: 5, column: 4, scope: !4)
+!24 = !DILocation(line: 5, column: 13, scope: !4)
+!25 = !DILocation(line: 6, column: 4, scope: !4)
+!26 = !DILocation(line: 6, column: 13, scope: !4)
+!27 = !DILocation(line: 7, column: 4, scope: !4)
+!28 = !DILocation(line: 7, column: 13, scope: !4)
+!29 = !DILocation(line: 8, column: 15, scope: !4)
+!30 = !DILocation(line: 8, column: 4, scope: !4)
+!31 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "array2", line: 8, scope: !4, file: !5, type: !18)
+!32 = !DILocation(line: 8, column: 8, scope: !4)
+!33 = !DILocation(line: 9, column: 4, scope: !4)
+!34 = !DILocation(line: 9, column: 13, scope: !4)
+!35 = !DILocation(line: 10, column: 11, scope: !4)
+!36 = !DILocation(line: 11, column: 1, scope: !4)
diff --git a/test/CodeGen/X86/discontiguous-loops.ll b/test/CodeGen/X86/discontiguous-loops.ll
index 479c450ca20f..20db750d206b 100644
--- a/test/CodeGen/X86/discontiguous-loops.ll
+++ b/test/CodeGen/X86/discontiguous-loops.ll
@@ -39,8 +39,8 @@ ybb8: ; preds = %ybb1
br i1 %tmp9, label %bb10, label %ybb12
bb10: ; preds = %ybb8
- %tmp11 = load i8** undef, align 8 ; <i8*> [#uses=1]
- call void (i8*, ...)* @fatal(i8* getelementptr inbounds ([37 x i8]* @.str96, i64 0, i64 0), i8* %tmp11) nounwind
+ %tmp11 = load i8*, i8** undef, align 8 ; <i8*> [#uses=1]
+ call void (i8*, ...) @fatal(i8* getelementptr inbounds ([37 x i8], [37 x i8]* @.str96, i64 0, i64 0), i8* %tmp11) nounwind
unreachable
ybb12: ; preds = %ybb8
@@ -51,7 +51,7 @@ ybb13: ; preds = %ybb12
br i1 %tmp14, label %bb16, label %ybb1
bb15: ; preds = %ybb12
- call void (i8*, ...)* @fatal(i8* getelementptr inbounds ([37 x i8]* @.str96, i64 0, i64 0), i8* undef) nounwind
+ call void (i8*, ...) @fatal(i8* getelementptr inbounds ([37 x i8], [37 x i8]* @.str96, i64 0, i64 0), i8* undef) nounwind
unreachable
bb16: ; preds = %ybb13
diff --git a/test/CodeGen/X86/div8.ll b/test/CodeGen/X86/div8.ll
index 0825f79e3240..f4f50e5a494c 100644
--- a/test/CodeGen/X86/div8.ll
+++ b/test/CodeGen/X86/div8.ll
@@ -10,13 +10,13 @@ entry:
%quotient = alloca i8, align 1
store i8 %dividend, i8* %dividend.addr, align 2
store i8 %divisor, i8* %divisor.addr, align 1
- %tmp = load i8* %dividend.addr, align 2
- %tmp1 = load i8* %divisor.addr, align 1
+ %tmp = load i8, i8* %dividend.addr, align 2
+ %tmp1 = load i8, i8* %divisor.addr, align 1
; Insist on i8->i32 zero extension, even though divb demands only i16:
; CHECK: movzbl {{.*}}%eax
; CHECK: divb
%div = udiv i8 %tmp, %tmp1
store i8 %div, i8* %quotient, align 1
- %tmp4 = load i8* %quotient, align 1
+ %tmp4 = load i8, i8* %quotient, align 1
ret i8 %tmp4
}
diff --git a/test/CodeGen/X86/dllexport-x86_64.ll b/test/CodeGen/X86/dllexport-x86_64.ll
index cf4557d12716..629a5572977f 100644
--- a/test/CodeGen/X86/dllexport-x86_64.ll
+++ b/test/CodeGen/X86/dllexport-x86_64.ll
@@ -17,19 +17,16 @@ define dllexport void @f2() unnamed_addr {
ret void
}
-; CHECK: .section .text,"xr",discard,lnk1
; CHECK: .globl lnk1
define linkonce_odr dllexport void @lnk1() {
ret void
}
-; CHECK: .section .text,"xr",discard,lnk2
; CHECK: .globl lnk2
define linkonce_odr dllexport void @lnk2() alwaysinline {
ret void
}
-; CHECK: .section .text,"xr",discard,weak1
; CHECK: .globl weak1
define weak_odr dllexport void @weak1() {
ret void
@@ -47,11 +44,9 @@ define weak_odr dllexport void @weak1() {
; CHECK: .comm Var3
@Var3 = common dllexport global i32 0, align 4
-; CHECK: .section .data,"dw",discard,WeakVar1
; CHECK: .globl WeakVar1
@WeakVar1 = weak_odr dllexport global i32 1, align 4
-; CHECK: .section .rdata,"dr",discard,WeakVar2
; CHECK: .globl WeakVar2
@WeakVar2 = weak_odr dllexport unnamed_addr constant i32 1
diff --git a/test/CodeGen/X86/dllexport.ll b/test/CodeGen/X86/dllexport.ll
index 145b48aaf635..02a83ae7b191 100644
--- a/test/CodeGen/X86/dllexport.ll
+++ b/test/CodeGen/X86/dllexport.ll
@@ -38,19 +38,16 @@ define dllexport x86_thiscallcc void @thisfun() nounwind {
ret void
}
-; CHECK: .section .text,"xr",discard,_lnk1
; CHECK: .globl _lnk1
define linkonce_odr dllexport void @lnk1() {
ret void
}
-; CHECK: .section .text,"xr",discard,_lnk2
; CHECK: .globl _lnk2
define linkonce_odr dllexport void @lnk2() alwaysinline {
ret void
}
-; CHECK: .section .text,"xr",discard,_weak1
; CHECK: .globl _weak1
define weak_odr dllexport void @weak1() {
ret void
@@ -68,11 +65,9 @@ define weak_odr dllexport void @weak1() {
; CHECK: .comm _Var3
@Var3 = common dllexport global i32 0, align 4
-; CHECK: .section .data,"dw",discard,_WeakVar1
; CHECK: .globl _WeakVar1
@WeakVar1 = weak_odr dllexport global i32 1, align 4
-; CHECK: .section .rdata,"dr",discard,_WeakVar2
; CHECK: .globl _WeakVar2
@WeakVar2 = weak_odr dllexport unnamed_addr constant i32 1
diff --git a/test/CodeGen/X86/dllimport-x86_64.ll b/test/CodeGen/X86/dllimport-x86_64.ll
index 839bca4f3c31..7ee6b4323d15 100644
--- a/test/CodeGen/X86/dllimport-x86_64.ll
+++ b/test/CodeGen/X86/dllimport-x86_64.ll
@@ -35,14 +35,14 @@ define void @use() nounwind {
; available_externally uses go away
; OPT-NOT: call void @inline1()
; OPT-NOT: call void @inline2()
-; OPT-NOT: load i32* @Var2
-; OPT: call void (...)* @dummy(i32 %1, i32 1)
+; OPT-NOT: load i32, i32* @Var2
+; OPT: call void (...) @dummy(i32 %1, i32 1)
; CHECK-DAG: movq __imp_Var1(%rip), [[R1:%[a-z]{3}]]
; CHECK-DAG: movq __imp_Var2(%rip), [[R2:%[a-z]{3}]]
- %1 = load i32* @Var1
- %2 = load i32* @Var2
- call void(...)* @dummy(i32 %1, i32 %2)
+ %1 = load i32, i32* @Var1
+ %2 = load i32, i32* @Var2
+ call void(...) @dummy(i32 %1, i32 %2)
ret void
}
diff --git a/test/CodeGen/X86/dllimport.ll b/test/CodeGen/X86/dllimport.ll
index 231ad65740b8..9db654f22712 100644
--- a/test/CodeGen/X86/dllimport.ll
+++ b/test/CodeGen/X86/dllimport.ll
@@ -46,14 +46,14 @@ define void @use() nounwind {
; available_externally uses go away
; OPT-NOT: call void @inline1()
; OPT-NOT: call void @inline2()
-; OPT-NOT: load i32* @Var2
-; OPT: call void (...)* @dummy(i32 %1, i32 1)
+; OPT-NOT: load i32, i32* @Var2
+; OPT: call void (...) @dummy(i32 %1, i32 1)
; CHECK-DAG: movl __imp__Var1, [[R1:%[a-z]{3}]]
; CHECK-DAG: movl __imp__Var2, [[R2:%[a-z]{3}]]
- %1 = load i32* @Var1
- %2 = load i32* @Var2
- call void(...)* @dummy(i32 %1, i32 %2)
+ %1 = load i32, i32* @Var1
+ %2 = load i32, i32* @Var2
+ call void(...) @dummy(i32 %1, i32 %2)
ret void
}
diff --git a/test/CodeGen/X86/dollar-name.ll b/test/CodeGen/X86/dollar-name.ll
index 2ecd72909cb1..a31b806c031f 100644
--- a/test/CodeGen/X86/dollar-name.ll
+++ b/test/CodeGen/X86/dollar-name.ll
@@ -8,8 +8,8 @@ define i32 @"$foo"() nounwind {
; CHECK: movl ($bar),
; CHECK: addl ($qux),
; CHECK: calll ($hen)
- %m = load i32* @"$bar"
- %n = load i32* @"$qux"
+ %m = load i32, i32* @"$bar"
+ %n = load i32, i32* @"$qux"
%t = add i32 %m, %n
%u = call i32 @"$hen"(i32 %t)
ret i32 %u
diff --git a/test/CodeGen/X86/dont-trunc-store-double-to-float.ll b/test/CodeGen/X86/dont-trunc-store-double-to-float.ll
index 24d9533eba4a..8a334d21631a 100644
--- a/test/CodeGen/X86/dont-trunc-store-double-to-float.ll
+++ b/test/CodeGen/X86/dont-trunc-store-double-to-float.ll
@@ -10,7 +10,7 @@ entry-block:
%b = alloca float
store double 3.140000e+00, double* %a
- %0 = load double* %a
+ %0 = load double, double* %a
%1 = fptrunc double %0 to float
diff --git a/test/CodeGen/X86/dwarf-comp-dir.ll b/test/CodeGen/X86/dwarf-comp-dir.ll
index 77eba63a83ec..27b8b1552ec1 100644
--- a/test/CodeGen/X86/dwarf-comp-dir.ll
+++ b/test/CodeGen/X86/dwarf-comp-dir.ll
@@ -7,15 +7,15 @@ target triple = "x86_64-unknown-linux-gnu"
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!5}
-!0 = !{!"0x11\0012\00clang version 3.1 (trunk 143523)\001\00\000\00\000", !4, !2, !7, !2, !2, null} ; [ DW_TAG_compile_unit ]
+!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 143523)", isOptimized: true, emissionKind: 0, file: !4, enums: !2, retainedTypes: !7, subprograms: !2, globals: !2)
!2 = !{}
-!3 = !{!"0x29", !4} ; [ DW_TAG_file_type ]
-!4 = !{!"empty.c", !"/home/nlewycky"}
-!6 = !{!"0x13\00foo\001\008\008\000\000\000", !4, null, null, !2, null, null, !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 1, size 8, align 8, offset 0] [def] [from ]
+!3 = !DIFile(filename: "empty.c", directory: "/home/nlewycky")
+!4 = !DIFile(filename: "empty.c", directory: "/home/nlewycky")
+!6 = !DICompositeType(tag: DW_TAG_structure_type, name: "foo", line: 1, size: 8, align: 8, file: !4, elements: !2, identifier: "_ZTS3foo")
!7 = !{!6}
; The important part of the following check is that dir = #0.
; Dir Mod Time File Len File Name
; ---- ---------- ---------- ---------------------------
; CHECK: file_names[ 1] 0 0x00000000 0x00000000 empty.c
-!5 = !{i32 1, !"Debug Info Version", i32 2}
+!5 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/dwarf-eh-prepare.ll b/test/CodeGen/X86/dwarf-eh-prepare.ll
new file mode 100644
index 000000000000..25572d868da0
--- /dev/null
+++ b/test/CodeGen/X86/dwarf-eh-prepare.ll
@@ -0,0 +1,158 @@
+; RUN: opt -mtriple=x86_64-linux-gnu -dwarfehprepare < %s -S | FileCheck %s
+
+; Check basic functionality of IR-to-IR DWARF EH preparation. This should
+; eliminate resumes. This pass requires a TargetMachine, so we put it under X86
+; and provide an x86 triple.
+
+@int_typeinfo = global i8 0
+
+declare void @might_throw()
+declare void @cleanup()
+
+define i32 @simple_cleanup_catch() {
+ invoke void @might_throw()
+ to label %cont unwind label %lpad
+
+; CHECK-LABEL: define i32 @simple_cleanup_catch()
+; CHECK: invoke void @might_throw()
+
+cont:
+ ret i32 0
+
+; CHECK: ret i32 0
+
+lpad:
+ %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+ cleanup
+ catch i8* @int_typeinfo
+ %ehptr = extractvalue { i8*, i32 } %ehvals, 0
+ %ehsel = extractvalue { i8*, i32 } %ehvals, 1
+ call void @cleanup()
+ %int_sel = call i32 @llvm.eh.typeid.for(i8* @int_typeinfo)
+ %int_match = icmp eq i32 %ehsel, %int_sel
+ br i1 %int_match, label %catch_int, label %eh.resume
+
+; CHECK: lpad:
+; CHECK: landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+; CHECK: call void @cleanup()
+; CHECK: call i32 @llvm.eh.typeid.for
+; CHECK: br i1
+
+catch_int:
+ ret i32 1
+
+; CHECK: catch_int:
+; CHECK: ret i32 1
+
+eh.resume:
+ %tmp_ehvals = insertvalue { i8*, i32 } undef, i8* %ehptr, 0
+ %new_ehvals = insertvalue { i8*, i32 } %tmp_ehvals, i32 %ehsel, 1
+ resume { i8*, i32 } %new_ehvals
+
+; CHECK: eh.resume:
+; CHECK-NEXT: call void @_Unwind_Resume(i8* %ehptr)
+}
+
+
+define i32 @catch_no_resume() {
+ invoke void @might_throw()
+ to label %cont unwind label %lpad
+
+cont:
+ ret i32 0
+
+lpad:
+ %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+ catch i8* @int_typeinfo
+ %ehptr = extractvalue { i8*, i32 } %ehvals, 0
+ %ehsel = extractvalue { i8*, i32 } %ehvals, 1
+ %int_sel = call i32 @llvm.eh.typeid.for(i8* @int_typeinfo)
+ %int_match = icmp eq i32 %ehsel, %int_sel
+ br i1 %int_match, label %catch_int, label %eh.resume
+
+catch_int:
+ ret i32 1
+
+eh.resume:
+ %tmp_ehvals = insertvalue { i8*, i32 } undef, i8* %ehptr, 0
+ %new_ehvals = insertvalue { i8*, i32 } %tmp_ehvals, i32 %ehsel, 1
+ resume { i8*, i32 } %new_ehvals
+}
+
+; Check that we can prune the unreachable resume instruction.
+
+; CHECK-LABEL: define i32 @catch_no_resume() {
+; CHECK: invoke void @might_throw()
+; CHECK: ret i32 0
+; CHECK: lpad:
+; CHECK: landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+; CHECK-NOT: br i1
+; CHECK: ret i32 1
+; CHECK-NOT: call void @_Unwind_Resume
+; CHECK: {{^[}]}}
+
+
+define i32 @catch_cleanup_merge() {
+ invoke void @might_throw()
+ to label %inner_invoke unwind label %outer_lpad
+inner_invoke:
+ invoke void @might_throw()
+ to label %cont unwind label %inner_lpad
+cont:
+ ret i32 0
+
+outer_lpad:
+ %ehvals1 = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+ catch i8* @int_typeinfo
+ br label %catch.dispatch
+
+inner_lpad:
+ %ehvals2 = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+ cleanup
+ catch i8* @int_typeinfo
+ call void @cleanup()
+ br label %catch.dispatch
+
+catch.dispatch:
+ %ehvals = phi { i8*, i32 } [ %ehvals1, %outer_lpad ], [ %ehvals2, %inner_lpad ]
+ %ehptr = extractvalue { i8*, i32 } %ehvals, 0
+ %ehsel = extractvalue { i8*, i32 } %ehvals, 1
+ %int_sel = call i32 @llvm.eh.typeid.for(i8* @int_typeinfo)
+ %int_match = icmp eq i32 %ehsel, %int_sel
+ br i1 %int_match, label %catch_int, label %eh.resume
+
+catch_int:
+ ret i32 1
+
+eh.resume:
+ %tmp_ehvals = insertvalue { i8*, i32 } undef, i8* %ehptr, 0
+ %new_ehvals = insertvalue { i8*, i32 } %tmp_ehvals, i32 %ehsel, 1
+ resume { i8*, i32 } %new_ehvals
+}
+
+; We can't prune this merge because one landingpad is a cleanup pad.
+
+; CHECK-LABEL: define i32 @catch_cleanup_merge()
+; CHECK: invoke void @might_throw()
+; CHECK: invoke void @might_throw()
+; CHECK: ret i32 0
+;
+; CHECK: outer_lpad:
+; CHECK: landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+; CHECK: br label %catch.dispatch
+;
+; CHECK: inner_lpad:
+; CHECK: landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+; CHECK: call void @cleanup()
+; CHECK: br label %catch.dispatch
+;
+; CHECK: catch.dispatch:
+; CHECK: call i32 @llvm.eh.typeid.for
+; CHECK: br i1
+; CHECK: catch_int:
+; CHECK: ret i32 1
+; CHECK: eh.resume:
+; CHECK-NEXT: call void @_Unwind_Resume(i8* %ehptr)
+
+declare i32 @__gxx_personality_v0(...)
+declare i32 @llvm.eh.typeid.for(i8*)
diff --git a/test/CodeGen/X86/dynamic-alloca-lifetime.ll b/test/CodeGen/X86/dynamic-alloca-lifetime.ll
index f019bed858c2..034b074ef9bd 100644
--- a/test/CodeGen/X86/dynamic-alloca-lifetime.ll
+++ b/test/CodeGen/X86/dynamic-alloca-lifetime.ll
@@ -29,10 +29,10 @@ end1:
ret void
if.else130: ; preds = %bb1
- %tmp = getelementptr inbounds [8192 x i8]* %bitmapBuffer, i32 0, i32 0
+ %tmp = getelementptr inbounds [8192 x i8], [8192 x i8]* %bitmapBuffer, i32 0, i32 0
call void @llvm.lifetime.start(i64 8192, i8* %tmp) #0
call void @llvm.lifetime.end(i64 8192, i8* %tmp) #0
- %tmp25 = getelementptr inbounds [8192 x i8]* %bitmapBuffer229, i32 0, i32 0
+ %tmp25 = getelementptr inbounds [8192 x i8], [8192 x i8]* %bitmapBuffer229, i32 0, i32 0
call void @llvm.lifetime.start(i64 8192, i8* %tmp25) #0
call void @llvm.lifetime.end(i64 8192, i8* %tmp25) #0
br label %end1
diff --git a/test/CodeGen/X86/dynamic-allocas-VLAs.ll b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
index 9405f76cbed0..2925f243b0e3 100644
--- a/test/CodeGen/X86/dynamic-allocas-VLAs.ll
+++ b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
@@ -7,7 +7,7 @@ define i32 @t1() nounwind uwtable ssp {
entry:
%a = alloca i32, align 4
call void @t1_helper(i32* %a) nounwind
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%add = add nsw i32 %0, 13
ret i32 %add
@@ -27,7 +27,7 @@ entry:
%a = alloca i32, align 4
%v = alloca <8 x float>, align 32
call void @t2_helper(i32* %a, <8 x float>* %v) nounwind
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%add = add nsw i32 %0, 13
ret i32 %add
@@ -53,7 +53,7 @@ entry:
%a = alloca i32, align 4
%vla = alloca i32, i64 %sz, align 16
call void @t3_helper(i32* %a, i32* %vla) nounwind
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%add = add nsw i32 %0, 13
ret i32 %add
@@ -78,7 +78,7 @@ entry:
%v = alloca <8 x float>, align 32
%vla = alloca i32, i64 %sz, align 16
call void @t4_helper(i32* %a, i32* %vla, <8 x float>* %v) nounwind
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%add = add nsw i32 %0, 13
ret i32 %add
@@ -108,10 +108,10 @@ define i32 @t5(float* nocapture %f) nounwind uwtable ssp {
entry:
%a = alloca i32, align 4
%0 = bitcast float* %f to <8 x float>*
- %1 = load <8 x float>* %0, align 32
+ %1 = load <8 x float>, <8 x float>* %0, align 32
call void @t5_helper1(i32* %a) nounwind
call void @t5_helper2(<8 x float> %1) nounwind
- %2 = load i32* %a, align 4
+ %2 = load i32, i32* %a, align 4
%add = add nsw i32 %2, 13
ret i32 %add
@@ -138,11 +138,11 @@ entry:
; CHECK: _t6
%a = alloca i32, align 4
%0 = bitcast float* %f to <8 x float>*
- %1 = load <8 x float>* %0, align 32
+ %1 = load <8 x float>, <8 x float>* %0, align 32
%vla = alloca i32, i64 %sz, align 16
call void @t6_helper1(i32* %a, i32* %vla) nounwind
call void @t6_helper2(<8 x float> %1) nounwind
- %2 = load i32* %a, align 4
+ %2 = load i32, i32* %a, align 4
%add = add nsw i32 %2, 13
ret i32 %add
}
@@ -162,7 +162,7 @@ entry:
store i32 0, i32* %x, align 32
%0 = zext i32 %size to i64
%vla = alloca i32, i64 %0, align 16
- %1 = load i32* %x, align 32
+ %1 = load i32, i32* %x, align 32
call void @bar(i32 %1, i32* %vla, %struct.struct_t* byval align 8 %arg1)
ret void
@@ -195,7 +195,7 @@ define i32 @t8() nounwind uwtable {
entry:
%a = alloca i32, align 4
call void @t1_helper(i32* %a) nounwind
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%add = add nsw i32 %0, 13
ret i32 %add
@@ -213,7 +213,7 @@ entry:
%a = alloca i32, align 4
%vla = alloca i32, i64 %sz, align 16
call void @t3_helper(i32* %a, i32* %vla) nounwind
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%add = add nsw i32 %0, 13
ret i32 %add
diff --git a/test/CodeGen/X86/early-ifcvt.ll b/test/CodeGen/X86/early-ifcvt.ll
index 2606bd28d5fc..7fcd530b62aa 100644
--- a/test/CodeGen/X86/early-ifcvt.ll
+++ b/test/CodeGen/X86/early-ifcvt.ll
@@ -14,8 +14,8 @@ do.body:
%min.0 = phi i32 [ 0, %entry ], [ %min.1, %do.cond ]
%n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.cond ]
%p.addr.0 = phi i32* [ %p, %entry ], [ %incdec.ptr, %do.cond ]
- %incdec.ptr = getelementptr inbounds i32* %p.addr.0, i64 1
- %0 = load i32* %p.addr.0, align 4
+ %incdec.ptr = getelementptr inbounds i32, i32* %p.addr.0, i64 1
+ %0 = load i32, i32* %p.addr.0, align 4
%cmp = icmp sgt i32 %0, %max.0
br i1 %cmp, label %do.cond, label %if.else
@@ -62,7 +62,7 @@ if.then37:
if.end41:
%exit_status.0 = phi i32 [ 2, %if.then29 ], [ 0, %if.then37 ], [ 66, %entry ]
- call void (...)* @fprintf(i32 %exit_status.0) nounwind
+ call void (...) @fprintf(i32 %exit_status.0) nounwind
unreachable
}
@@ -108,7 +108,7 @@ if.end2042: ; preds = %while.body2038
br i1 undef, label %if.end2048, label %while.end2104
if.end2048: ; preds = %if.end2042
- %bsLive2054.pre = getelementptr inbounds i8* %s, i32 8
+ %bsLive2054.pre = getelementptr inbounds i8, i8* %s, i32 8
br label %sw.bb2050
sw.bb2050: ; preds = %if.end2048, %if.end.sw.bb2050_crit_edge
diff --git a/test/CodeGen/X86/eh-label.ll b/test/CodeGen/X86/eh-label.ll
new file mode 100644
index 000000000000..aff0bcfffcfe
--- /dev/null
+++ b/test/CodeGen/X86/eh-label.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
+; Test that we don't crashe if the .Lfunc_end0 name is taken.
+
+declare void @g()
+
+define void @f() {
+bb0:
+ call void asm ".Lfunc_end0:", ""()
+; CHECK: #APP
+; CHECK-NEXT: .Lfunc_end0:
+; CHECK-NEXT: #NO_APP
+
+ invoke void @g() to label %bb2 unwind label %bb1
+bb1:
+ landingpad { i8*, i32 } personality i8* bitcast (void ()* @g to i8*)
+ catch i8* null
+ call void @g()
+ ret void
+bb2:
+ ret void
+
+; CHECK: [[END:.Lfunc_end.*]]:
+; CHECK: .long [[END]]-
+}
diff --git a/test/CodeGen/X86/emit-big-cst.ll b/test/CodeGen/X86/emit-big-cst.ll
index 96c15d4a3658..51852d00f823 100644
--- a/test/CodeGen/X86/emit-big-cst.ll
+++ b/test/CodeGen/X86/emit-big-cst.ll
@@ -10,7 +10,7 @@
define void @accessBig(i64* %storage) {
%addr = bitcast i64* %storage to i82*
- %bigLoadedCst = load volatile i82* @bigCst
+ %bigLoadedCst = load volatile i82, i82* @bigCst
%tmp = add i82 %bigLoadedCst, 1
store i82 %tmp, i82* %addr
ret void
diff --git a/test/CodeGen/X86/empty-functions.ll b/test/CodeGen/X86/empty-functions.ll
index 42349688a710..735df2a4196d 100644
--- a/test/CodeGen/X86/empty-functions.ll
+++ b/test/CodeGen/X86/empty-functions.ll
@@ -50,5 +50,5 @@ entry:
; LINUX-FP-NEXT:{{^}}.L{{.*}}:{{$}}
; LINUX-FP-NEXT: .cfi_def_cfa_register %rbp
; LINUX-FP-NEXT:{{^}}.L{{.*}}:{{$}}
-; LINUX-FP-NEXT: .size func, .Ltmp3-func
+; LINUX-FP-NEXT: .size func, .Lfunc_end0-func
; LINUX-FP-NEXT: .cfi_endproc
diff --git a/test/CodeGen/X86/exception-label.ll b/test/CodeGen/X86/exception-label.ll
new file mode 100644
index 000000000000..cafa1e630b96
--- /dev/null
+++ b/test/CodeGen/X86/exception-label.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
+
+; Test that we can handle .Lexception0 being defined. We used to crash.
+
+; CHECK: .cfi_lsda 3, [[LABEL:.*]]
+; CHECK: [[LABEL]]:
+; CHECK-NEXT: .byte 255 # @LPStart Encoding = omit
+
+declare void @g()
+
+define void @f() {
+bb0:
+ call void asm ".Lexception0:", ""()
+ invoke void @g()
+ to label %bb2 unwind label %bb1
+bb1:
+ landingpad { i8*, i32 } personality i8* bitcast (void ()* @g to i8*)
+ catch i8* null
+ br label %bb2
+
+bb2:
+ ret void
+}
diff --git a/test/CodeGen/X86/exedeps-movq.ll b/test/CodeGen/X86/exedeps-movq.ll
new file mode 100644
index 000000000000..a5873be6f27f
--- /dev/null
+++ b/test/CodeGen/X86/exedeps-movq.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX
+
+; Verify that we select the correct version of the instruction that stores the low 64-bits
+; of a 128-bit vector. We want to avoid int/fp domain crossing penalties, so ignore the
+; bitcast ops and choose:
+;
+; movlps for floats
+; movlpd for doubles
+; movq for integers
+
+define void @store_floats(<4 x float> %x, i64* %p) {
+; SSE-LABEL: store_floats:
+; SSE: # BB#0:
+; SSE-NEXT: addps %xmm0, %xmm0
+; SSE-NEXT: movlps %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: store_floats:
+; AVX: # BB#0:
+; AVX-NEXT: vaddps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vmovlps %xmm0, (%rdi)
+; AVX-NEXT: retq
+ %a = fadd <4 x float> %x, %x
+ %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ %c = bitcast <2 x float> %b to i64
+ store i64 %c, i64* %p
+ ret void
+}
+
+define void @store_double(<2 x double> %x, i64* %p) {
+; SSE-LABEL: store_double:
+; SSE: # BB#0:
+; SSE-NEXT: addpd %xmm0, %xmm0
+; SSE-NEXT: movlpd %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: store_double:
+; AVX: # BB#0:
+; AVX-NEXT: vaddpd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vmovlpd %xmm0, (%rdi)
+; AVX-NEXT: retq
+ %a = fadd <2 x double> %x, %x
+ %b = extractelement <2 x double> %a, i32 0
+ %c = bitcast double %b to i64
+ store i64 %c, i64* %p
+ ret void
+}
+
+define void @store_int(<4 x i32> %x, <2 x float>* %p) {
+; SSE-LABEL: store_int:
+; SSE: # BB#0:
+; SSE-NEXT: paddd %xmm0, %xmm0
+; SSE-NEXT: movq %xmm0, (%rdi)
+; SSE-NEXT: retq
+;
+; AVX-LABEL: store_int:
+; AVX: # BB#0:
+; AVX-NEXT: vpaddd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vmovq %xmm0, (%rdi)
+; AVX-NEXT: retq
+ %a = add <4 x i32> %x, %x
+ %b = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %c = bitcast <2 x i32> %b to <2 x float>
+ store <2 x float> %c, <2 x float>* %p
+ ret void
+}
+
diff --git a/test/CodeGen/X86/expand-opaque-const.ll b/test/CodeGen/X86/expand-opaque-const.ll
index 6e461cf8c30b..1e39cd88308c 100644
--- a/test/CodeGen/X86/expand-opaque-const.ll
+++ b/test/CodeGen/X86/expand-opaque-const.ll
@@ -11,11 +11,11 @@ entry:
%op2 = alloca i64
store i64 -6687208052682386272, i64* %op1
store i64 7106745059734980448, i64* %op2
- %tmp1 = load i64* %op1
- %tmp2 = load i64* %op2
+ %tmp1 = load i64, i64* %op1
+ %tmp2 = load i64, i64* %op2
%tmp = xor i64 %tmp2, 7106745059734980448
%tmp3 = lshr i64 %tmp1, %tmp
store i64 %tmp3, i64* %retval
- %tmp4 = load i64* %retval
+ %tmp4 = load i64, i64* %retval
ret i64 %tmp4
}
diff --git a/test/CodeGen/X86/extend.ll b/test/CodeGen/X86/extend.ll
index 9553b1b578b1..d349e782d5d0 100644
--- a/test/CodeGen/X86/extend.ll
+++ b/test/CodeGen/X86/extend.ll
@@ -5,13 +5,13 @@
@G2 = internal global i8 0 ; <i8*> [#uses=1]
define i16 @test1() {
- %tmp.0 = load i8* @G1 ; <i8> [#uses=1]
+ %tmp.0 = load i8, i8* @G1 ; <i8> [#uses=1]
%tmp.3 = zext i8 %tmp.0 to i16 ; <i16> [#uses=1]
ret i16 %tmp.3
}
define i16 @test2() {
- %tmp.0 = load i8* @G2 ; <i8> [#uses=1]
+ %tmp.0 = load i8, i8* @G2 ; <i8> [#uses=1]
%tmp.3 = sext i8 %tmp.0 to i16 ; <i16> [#uses=1]
ret i16 %tmp.3
}
diff --git a/test/CodeGen/X86/extern_weak.ll b/test/CodeGen/X86/extern_weak.ll
index 01e32aae08ca..c2ff09f21e80 100644
--- a/test/CodeGen/X86/extern_weak.ll
+++ b/test/CodeGen/X86/extern_weak.ll
@@ -5,7 +5,7 @@
declare extern_weak i32 @X(i8*)
define void @bar() {
- tail call void (...)* @foo( )
+ tail call void (...) @foo( )
ret void
}
diff --git a/test/CodeGen/X86/extract-extract.ll b/test/CodeGen/X86/extract-extract.ll
index ad79ab9ae20f..9f1516356203 100644
--- a/test/CodeGen/X86/extract-extract.ll
+++ b/test/CodeGen/X86/extract-extract.ll
@@ -11,11 +11,11 @@
define fastcc void @foo(%pp* nocapture byval %p_arg) {
entry:
- %tmp2 = getelementptr %pp* %p_arg, i64 0, i32 0 ; <%cc*> [#uses=
- %tmp3 = load %cc* %tmp2 ; <%cc> [#uses=1]
+ %tmp2 = getelementptr %pp, %pp* %p_arg, i64 0, i32 0 ; <%cc*> [#uses=
+ %tmp3 = load %cc, %cc* %tmp2 ; <%cc> [#uses=1]
%tmp34 = extractvalue %cc %tmp3, 0 ; <%crd> [#uses=1]
%tmp345 = extractvalue %crd %tmp34, 0 ; <i64> [#uses=1]
- %.ptr.i = load %cr** undef ; <%cr*> [#uses=0]
+ %.ptr.i = load %cr*, %cr** undef ; <%cr*> [#uses=0]
%tmp15.i = shl i64 %tmp345, 3 ; <i64> [#uses=0]
store %cr* undef, %cr** undef
ret void
diff --git a/test/CodeGen/X86/extract-store.ll b/test/CodeGen/X86/extract-store.ll
index 27d93804ba60..801ecc9ada6c 100644
--- a/test/CodeGen/X86/extract-store.ll
+++ b/test/CodeGen/X86/extract-store.ll
@@ -1,7 +1,8 @@
; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+sse4.1 | FileCheck %s -check-prefix=SSE41
; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+avx | FileCheck %s -check-prefix=AVX
-define void @pextrb(i8* nocapture %dst, <16 x i8> %foo) {
+; CHECK-LABEL: extract_i8
+define void @extract_i8(i8* nocapture %dst, <16 x i8> %foo) {
; AVX: vpextrb
; SSE41: pextrb
; AVX-NOT: movb
@@ -11,12 +12,35 @@ define void @pextrb(i8* nocapture %dst, <16 x i8> %foo) {
ret void
}
-define void @pextrw(i16* nocapture %dst, <8 x i16> %foo) {
+; CHECK-LABEL: extract_i16
+define void @extract_i16(i16* nocapture %dst, <8 x i16> %foo) {
; AVX: vpextrw
; SSE41: pextrw
; AVX-NOT: movw
; SSE41-NOT: movw
- %vecext = extractelement <8 x i16> %foo, i32 15
+ %vecext = extractelement <8 x i16> %foo, i32 7
+ store i16 %vecext, i16* %dst, align 1
+ ret void
+}
+
+; CHECK-LABEL: extract_i8_undef
+define void @extract_i8_undef(i8* nocapture %dst, <16 x i8> %foo) {
+; AVX-NOT: vpextrb
+; SSE41-NOT: pextrb
+; AVX-NOT: movb
+; SSE41-NOT: movb
+ %vecext = extractelement <16 x i8> %foo, i32 16 ; undef
+ store i8 %vecext, i8* %dst, align 1
+ ret void
+}
+
+; CHECK-LABEL: extract_i16_undef
+define void @extract_i16_undef(i16* nocapture %dst, <8 x i16> %foo) {
+; AVX-NOT: vpextrw
+; SSE41-NOT: pextrw
+; AVX-NOT: movw
+; SSE41-NOT: movw
+ %vecext = extractelement <8 x i16> %foo, i32 9 ; undef
store i16 %vecext, i16* %dst, align 1
ret void
}
diff --git a/test/CodeGen/X86/extractelement-index.ll b/test/CodeGen/X86/extractelement-index.ll
new file mode 100644
index 000000000000..ab3ff8ed435e
--- /dev/null
+++ b/test/CodeGen/X86/extractelement-index.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx | FileCheck %s -check-prefix=AVX
+
+
+; CHECK-LABEL: extractelement_index_1:
+define i8 @extractelement_index_1(<32 x i8> %a) nounwind {
+ ; X64: movaps
+ ; AVX: vpextrb $1
+ %b = extractelement <32 x i8> %a, i256 1
+ ret i8 %b
+}
+
+; CHECK-LABEL: extractelement_index_2:
+define i32 @extractelement_index_2(<8 x i32> %a) nounwind {
+ ; X64: pshufd
+ ; AVX: vextractf128 $1
+ ; AVX-NEXT: vpextrd $3
+ %b = extractelement <8 x i32> %a, i64 7
+ ret i32 %b
+}
+
+; CHECK-LABEL: extractelement_index_3:
+define i32 @extractelement_index_3(<8 x i32> %a) nounwind {
+ ; CHECK-NOT: pextr
+ %b = extractelement <8 x i32> %a, i64 15
+ ret i32 %b
+}
+
+; CHECK-LABEL: extractelement_index_4:
+define i32 @extractelement_index_4(<8 x i32> %a) nounwind {
+ ; X64: movd
+ ; AVX: vextractf128 $1
+ ; AVX-NEXT: vmovd
+ %b = extractelement <8 x i32> %a, i256 4
+ ret i32 %b
+}
+
+; CHECK-LABEL: extractelement_index_5:
+define i8 @extractelement_index_5(<32 x i8> %a, i256 %i) nounwind {
+ ; X64: movaps
+ ; AVX: vmovaps
+ %b = extractelement <32 x i8> %a, i256 %i
+ ret i8 %b
+}
+
+; CHECK-LABEL: extractelement_index_6:
+define i8 @extractelement_index_6(<32 x i8> %a) nounwind {
+ ; CHECK-NOT: pextr
+ %b = extractelement <32 x i8> %a, i256 -1
+ ret i8 %b
+} \ No newline at end of file
diff --git a/test/CodeGen/X86/extractelement-legalization-store-ordering.ll b/test/CodeGen/X86/extractelement-legalization-store-ordering.ll
new file mode 100644
index 000000000000..946516c8a46d
--- /dev/null
+++ b/test/CodeGen/X86/extractelement-legalization-store-ordering.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple i386-apple-darwin -mcpu=yonah | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
+
+; Make sure we don't break load/store ordering when turning an extractelement
+; into loads, off the stack or a previous store.
+; Be very explicit about the ordering/stack offsets.
+
+; CHECK-LABEL: test_extractelement_legalization_storereuse:
+; CHECK: # BB#0
+; CHECK-NEXT: pushl %ebx
+; CHECK-NEXT: pushl %edi
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: movl 16(%esp), %eax
+; CHECK-NEXT: movl 24(%esp), %ecx
+; CHECK-NEXT: movl 20(%esp), %edx
+; CHECK-NEXT: paddd (%edx), %xmm0
+; CHECK-NEXT: movdqa %xmm0, (%edx)
+; CHECK-NEXT: shll $4, %ecx
+; CHECK-NEXT: movl (%ecx,%edx), %esi
+; CHECK-NEXT: movl 12(%ecx,%edx), %edi
+; CHECK-NEXT: movl 8(%ecx,%edx), %ebx
+; CHECK-NEXT: movl 4(%ecx,%edx), %edx
+; CHECK-NEXT: movl %esi, 12(%eax,%ecx)
+; CHECK-NEXT: movl %edx, (%eax,%ecx)
+; CHECK-NEXT: movl %ebx, 8(%eax,%ecx)
+; CHECK-NEXT: movl %edi, 4(%eax,%ecx)
+; CHECK-NEXT: popl %esi
+; CHECK-NEXT: popl %edi
+; CHECK-NEXT: popl %ebx
+; CHECK-NEXT: retl
+define void @test_extractelement_legalization_storereuse(<4 x i32> %a, i32* nocapture %x, i32* nocapture readonly %y, i32 %i) #0 {
+entry:
+ %0 = bitcast i32* %y to <4 x i32>*
+ %1 = load <4 x i32>, <4 x i32>* %0, align 16
+ %am = add <4 x i32> %a, %1
+ store <4 x i32> %am, <4 x i32>* %0, align 16
+ %ip0 = shl nsw i32 %i, 2
+ %ip1 = or i32 %ip0, 1
+ %ip2 = or i32 %ip0, 2
+ %ip3 = or i32 %ip0, 3
+ %vecext = extractelement <4 x i32> %am, i32 %ip0
+ %arrayidx = getelementptr inbounds i32, i32* %x, i32 %ip3
+ store i32 %vecext, i32* %arrayidx, align 4
+ %vecext5 = extractelement <4 x i32> %am, i32 %ip1
+ %arrayidx8 = getelementptr inbounds i32, i32* %x, i32 %ip0
+ store i32 %vecext5, i32* %arrayidx8, align 4
+ %vecext11 = extractelement <4 x i32> %am, i32 %ip2
+ %arrayidx14 = getelementptr inbounds i32, i32* %x, i32 %ip2
+ store i32 %vecext11, i32* %arrayidx14, align 4
+ %vecext17 = extractelement <4 x i32> %am, i32 %ip3
+ %arrayidx20 = getelementptr inbounds i32, i32* %x, i32 %ip1
+ store i32 %vecext17, i32* %arrayidx20, align 4
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/X86/extractelement-load.ll b/test/CodeGen/X86/extractelement-load.ll
index 732f698f59ff..e50d353797a0 100644
--- a/test/CodeGen/X86/extractelement-load.ll
+++ b/test/CodeGen/X86/extractelement-load.ll
@@ -9,7 +9,7 @@ define i32 @t(<2 x i64>* %val) nounwind {
; CHECK-NOT: movd
; CHECK: movl 8(
; CHECK-NEXT: ret
- %tmp2 = load <2 x i64>* %val, align 16 ; <<2 x i64>> [#uses=1]
+ %tmp2 = load <2 x i64>, <2 x i64>* %val, align 16 ; <<2 x i64>> [#uses=1]
%tmp3 = bitcast <2 x i64> %tmp2 to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp4 = extractelement <4 x i32> %tmp3, i32 2 ; <i32> [#uses=1]
ret i32 %tmp4
@@ -20,7 +20,7 @@ define i32 @t(<2 x i64>* %val) nounwind {
define i32 @t2(<8 x i32>* %xp) {
; CHECK-LABEL: t2:
; CHECK: ret
- %x = load <8 x i32>* %xp
+ %x = load <8 x i32>, <8 x i32>* %xp
%Shuff68 = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> <i32
undef, i32 7, i32 9, i32 undef, i32 13, i32 15, i32 1, i32 3>
%y = extractelement <8 x i32> %Shuff68, i32 0
@@ -41,7 +41,7 @@ define void @t3() {
; CHECK: movhpd
bb:
- %tmp13 = load <2 x double>* undef, align 1
+ %tmp13 = load <2 x double>, <2 x double>* undef, align 1
%.sroa.3.24.vec.extract = extractelement <2 x double> %tmp13, i32 1
store double %.sroa.3.24.vec.extract, double* undef, align 8
unreachable
@@ -55,7 +55,7 @@ define i64 @t4(<2 x double>* %a) {
; CHECK-LABEL: t4:
; CHECK: mov
; CHECK: ret
- %b = load <2 x double>* %a, align 16
+ %b = load <2 x double>, <2 x double>* %a, align 16
%c = shufflevector <2 x double> %b, <2 x double> %b, <2 x i32> <i32 1, i32 0>
%d = bitcast <2 x double> %c to <2 x i64>
%e = extractelement <2 x i64> %d, i32 1
diff --git a/test/CodeGen/X86/extractps.ll b/test/CodeGen/X86/extractps.ll
index 9e1a3754d0f0..fecd2faed321 100644
--- a/test/CodeGen/X86/extractps.ll
+++ b/test/CodeGen/X86/extractps.ll
@@ -7,7 +7,7 @@
external global float, align 16 ; <float*>:0 [#uses=2]
define internal void @""() nounwind {
- load float* @0, align 16 ; <float>:1 [#uses=1]
+ load float, float* @0, align 16 ; <float>:1 [#uses=1]
insertelement <4 x float> undef, float %1, i32 0 ; <<4 x float>>:2 [#uses=1]
call <4 x float> @llvm.x86.sse.rsqrt.ss( <4 x float> %2 ) ; <<4 x float>>:3 [#uses=1]
extractelement <4 x float> %3, i32 0 ; <float>:4 [#uses=1]
@@ -15,7 +15,7 @@ define internal void @""() nounwind {
ret void
}
define internal void @""() nounwind {
- load float* @0, align 16 ; <float>:1 [#uses=1]
+ load float, float* @0, align 16 ; <float>:1 [#uses=1]
insertelement <4 x float> undef, float %1, i32 1 ; <<4 x float>>:2 [#uses=1]
call <4 x float> @llvm.x86.sse.rsqrt.ss( <4 x float> %2 ) ; <<4 x float>>:3 [#uses=1]
extractelement <4 x float> %3, i32 1 ; <float>:4 [#uses=1]
diff --git a/test/CodeGen/X86/f16c-intrinsics.ll b/test/CodeGen/X86/f16c-intrinsics.ll
index 514d929f4432..485592aeac38 100644
--- a/test/CodeGen/X86/f16c-intrinsics.ll
+++ b/test/CodeGen/X86/f16c-intrinsics.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -march=x86 -mattr=+avx,+f16c | FileCheck %s
-; RUN: llc < %s -march=x86-64 -mattr=+avx,+f16c | FileCheck %s
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx,+f16c | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c | FileCheck %s
define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) {
+ ; CHECK-LABEL: test_x86_vcvtph2ps_128:
+ ; CHECK-NOT: vmov
; CHECK: vcvtph2ps
%res = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
@@ -10,22 +12,36 @@ declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>) nounwind readonly
define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) {
+ ; CHECK-LABEL: test_x86_vcvtph2ps_256:
+ ; CHECK-NOT: vmov
; CHECK: vcvtph2ps
%res = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readonly
+define <8 x float> @test_x86_vcvtph2ps_256_m(<8 x i16>* nocapture %a) nounwind {
+entry:
+ ; CHECK-LABEL: test_x86_vcvtph2ps_256_m:
+ ; CHECK-NOT: vmov
+ ; CHECK: vcvtph2ps (%
+ %tmp1 = load <8 x i16>, <8 x i16>* %a, align 16
+ %0 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %tmp1)
+ ret <8 x float> %0
+}
define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0) {
+ ; CHECK-LABEL: test_x86_vcvtps2ph_128:
+ ; CHECK-NOT: vmov
; CHECK: vcvtps2ph
%res = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32) nounwind readonly
-
define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0) {
+ ; CHECK-LABEL: test_x86_vcvtps2ph_256:
+ ; CHECK-NOT: vmov
; CHECK: vcvtps2ph
%res = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0) ; <<8 x i16>> [#uses=1]
ret <8 x i16> %res
@@ -33,14 +49,59 @@ define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0) {
declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readonly
define <4 x float> @test_x86_vcvtps2ph_128_scalar(i64* %ptr) {
-; CHECK-LABEL: test_x86_vcvtps2ph_128_scalar
+; CHECK-LABEL: test_x86_vcvtps2ph_128_scalar:
; CHECK-NOT: vmov
; CHECK: vcvtph2ps (%
- %load = load i64* %ptr
+ %load = load i64, i64* %ptr
%ins1 = insertelement <2 x i64> undef, i64 %load, i32 0
%ins2 = insertelement <2 x i64> %ins1, i64 0, i32 1
%bc = bitcast <2 x i64> %ins2 to <8 x i16>
%res = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %bc) #2
ret <4 x float> %res
}
+
+define void @test_x86_vcvtps2ph_256_m(<8 x i16>* nocapture %d, <8 x float> %a) nounwind {
+entry:
+ ; CHECK-LABEL: test_x86_vcvtps2ph_256_m:
+ ; CHECK-NOT: vmov
+ ; CHECK: vcvtps2ph $3, %ymm0, (%
+ %0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a, i32 3)
+ store <8 x i16> %0, <8 x i16>* %d, align 16
+ ret void
+}
+
+define void @test_x86_vcvtps2ph_128_m(<4 x i16>* nocapture %d, <4 x float> %a) nounwind {
+entry:
+ ; CHECK-LABEL: test_x86_vcvtps2ph_128_m:
+ ; CHECK-NOT: vmov
+ ; CHECK: vcvtps2ph $3, %xmm0, (%
+ %0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a, i32 3)
+ %1 = shufflevector <8 x i16> %0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ store <4 x i16> %1, <4 x i16>* %d, align 8
+ ret void
+}
+
+define void @test_x86_vcvtps2ph_128_m2(double* nocapture %hf4x16, <4 x float> %f4x32) #0 {
+entry:
+ ; CHECK-LABEL: test_x86_vcvtps2ph_128_m2:
+ ; CHECK-NOT: vmov
+ ; CHECK: vcvtps2ph $3, %xmm0, (%
+ %0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3)
+ %1 = bitcast <8 x i16> %0 to <2 x double>
+ %vecext = extractelement <2 x double> %1, i32 0
+ store double %vecext, double* %hf4x16, align 8
+ ret void
+}
+
+define void @test_x86_vcvtps2ph_128_m3(i64* nocapture %hf4x16, <4 x float> %f4x32) #0 {
+entry:
+ ; CHECK-LABEL: test_x86_vcvtps2ph_128_m3:
+ ; CHECK-NOT: vmov
+ ; CHECK: vcvtps2ph $3, %xmm0, (%
+ %0 = tail call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %f4x32, i32 3)
+ %1 = bitcast <8 x i16> %0 to <2 x i64>
+ %vecext = extractelement <2 x i64> %1, i32 0
+ store i64 %vecext, i64* %hf4x16, align 8
+ ret void
+}
diff --git a/test/CodeGen/X86/fast-isel-args-fail.ll b/test/CodeGen/X86/fast-isel-args-fail.ll
index 7e783d2891d4..0026832ed281 100644
--- a/test/CodeGen/X86/fast-isel-args-fail.ll
+++ b/test/CodeGen/X86/fast-isel-args-fail.ll
@@ -17,6 +17,6 @@ entry:
; WIN32: movl (%rcx), %eax
; WIN64: foo
; WIN64: movl (%rdi), %eax
- %0 = load i32* %p, align 4
+ %0 = load i32, i32* %p, align 4
ret i32 %0
}
diff --git a/test/CodeGen/X86/fast-isel-args-fail2.ll b/test/CodeGen/X86/fast-isel-args-fail2.ll
index 08de472c2a54..f7066577f2de 100644
--- a/test/CodeGen/X86/fast-isel-args-fail2.ll
+++ b/test/CodeGen/X86/fast-isel-args-fail2.ll
@@ -1,4 +1,4 @@
-; RUN: not --crash llc < %s -fast-isel -fast-isel-abort-args -mtriple=x86_64-apple-darwin10
+; RUN: not llc < %s -fast-isel -fast-isel-abort=2 -mtriple=x86_64-apple-darwin10
; REQUIRES: asserts
%struct.s0 = type { x86_fp80, x86_fp80 }
diff --git a/test/CodeGen/X86/fast-isel-args.ll b/test/CodeGen/X86/fast-isel-args.ll
index 8c86a9cc01d6..bcd41b7a2bfe 100644
--- a/test/CodeGen/X86/fast-isel-args.ll
+++ b/test/CodeGen/X86/fast-isel-args.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -fast-isel -fast-isel-abort -fast-isel-abort-args -verify-machineinstrs -mtriple=x86_64-apple-darwin10
+; RUN: llc < %s -fast-isel -fast-isel-abort=2 -verify-machineinstrs -mtriple=x86_64-apple-darwin10
; Just make sure these don't abort when lowering the arguments.
define i32 @t1(i32 %a, i32 %b, i32 %c) {
diff --git a/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll b/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
index 21fae4a82051..3310e6113f1e 100644
--- a/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
+++ b/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
@@ -15,10 +15,10 @@ entry:
store i32 %x, i32* %x.addr, align 4
store i32 %y, i32* %y.addr, align 4
store i32 %z, i32* %z.addr, align 4
- %tmp = load i32* %x.addr, align 4
- %tmp1 = load i32* %y.addr, align 4
+ %tmp = load i32, i32* %x.addr, align 4
+ %tmp1 = load i32, i32* %y.addr, align 4
%add = add nsw i32 %tmp, %tmp1
- %tmp2 = load i32* %z.addr, align 4
+ %tmp2 = load i32, i32* %z.addr, align 4
%add3 = add nsw i32 %add, %tmp2
ret i32 %add3
}
diff --git a/test/CodeGen/X86/fast-isel-branch_weights.ll b/test/CodeGen/X86/fast-isel-branch_weights.ll
index d2b02aad182d..feb240f05863 100644
--- a/test/CodeGen/X86/fast-isel-branch_weights.ll
+++ b/test/CodeGen/X86/fast-isel-branch_weights.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
-; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-apple-darwin10 | FileCheck %s
; Test if the BBs are reordred according to their branch weights.
define i64 @branch_weights_test(i64 %a, i64 %b) {
diff --git a/test/CodeGen/X86/fast-isel-call-bool.ll b/test/CodeGen/X86/fast-isel-call-bool.ll
index 5cdb2c941161..aaa8ef4f644c 100644
--- a/test/CodeGen/X86/fast-isel-call-bool.ll
+++ b/test/CodeGen/X86/fast-isel-call-bool.ll
@@ -8,7 +8,7 @@ declare i64 @bar(i1)
define i64 @foo(i8* %arg) {
; CHECK-LABEL: foo:
top:
- %0 = load i8* %arg
+ %0 = load i8, i8* %arg
; CHECK: movb
%1 = trunc i8 %0 to i1
; CHECK: andb $1,
diff --git a/test/CodeGen/X86/fast-isel-call.ll b/test/CodeGen/X86/fast-isel-call.ll
index 42d2b8bed654..9fd07b521ab2 100644
--- a/test/CodeGen/X86/fast-isel-call.ll
+++ b/test/CodeGen/X86/fast-isel-call.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -march=x86 | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort=1 -march=x86 | FileCheck %s
%struct.s = type {i32, i32, i32}
diff --git a/test/CodeGen/X86/fast-isel-cmp-branch2.ll b/test/CodeGen/X86/fast-isel-cmp-branch2.ll
index 7e45c49f48f7..04dbac07690a 100644
--- a/test/CodeGen/X86/fast-isel-cmp-branch2.ll
+++ b/test/CodeGen/X86/fast-isel-cmp-branch2.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
-; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-apple-darwin10 | FileCheck %s
define i32 @fcmp_oeq(float %x, float %y) {
; CHECK-LABEL: fcmp_oeq
diff --git a/test/CodeGen/X86/fast-isel-cmp-branch3.ll b/test/CodeGen/X86/fast-isel-cmp-branch3.ll
index 0df782d18ecf..e54d0ca40078 100644
--- a/test/CodeGen/X86/fast-isel-cmp-branch3.ll
+++ b/test/CodeGen/X86/fast-isel-cmp-branch3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-apple-darwin10 | FileCheck %s
define i32 @fcmp_oeq1(float %x) {
; CHECK-LABEL: fcmp_oeq1
diff --git a/test/CodeGen/X86/fast-isel-cmp.ll b/test/CodeGen/X86/fast-isel-cmp.ll
index 1b72cfcde657..d15dd50a4554 100644
--- a/test/CodeGen/X86/fast-isel-cmp.ll
+++ b/test/CodeGen/X86/fast-isel-cmp.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s --check-prefix=SDAG
-; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-apple-darwin10 | FileCheck %s --check-prefix=FAST
define zeroext i1 @fcmp_oeq(float %x, float %y) {
; SDAG-LABEL: fcmp_oeq
diff --git a/test/CodeGen/X86/fast-isel-constrain-store-indexreg.ll b/test/CodeGen/X86/fast-isel-constrain-store-indexreg.ll
new file mode 100644
index 000000000000..c2534f72bd0d
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-constrain-store-indexreg.ll
@@ -0,0 +1,25 @@
+; RUN: llc %s -o - -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-unknown"
+
+@TheArray = external global [100000 x double], align 16
+
+; This test ensures, via the machine verifier, that the register class for the
+; index of the double store is correctly constrained to not include SP.
+
+; CHECK: movsd
+
+define i32 @main(i32* %i, double %tmpv) {
+bb:
+ br label %bb7
+
+bb7: ; preds = %bb7, %bb
+ %storemerge = phi i32 [ 0, %bb ], [ %tmp19, %bb7 ]
+ %tmp15 = zext i32 %storemerge to i64
+ %tmp16 = getelementptr inbounds [100000 x double], [100000 x double]* @TheArray, i64 0, i64 %tmp15
+ store double %tmpv, double* %tmp16, align 8
+ %tmp18 = load i32, i32* %i, align 4
+ %tmp19 = add i32 %tmp18, 1
+ br label %bb7
+}
diff --git a/test/CodeGen/X86/fast-isel-divrem-x86-64.ll b/test/CodeGen/X86/fast-isel-divrem-x86-64.ll
index 0fd0561e2046..9c04ea67d519 100644
--- a/test/CodeGen/X86/fast-isel-divrem-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-divrem-x86-64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
define i64 @test_sdiv64(i64 %dividend, i64 %divisor) nounwind {
entry:
diff --git a/test/CodeGen/X86/fast-isel-divrem.ll b/test/CodeGen/X86/fast-isel-divrem.ll
index 5828becb3c33..3e649ca9a603 100644
--- a/test/CodeGen/X86/fast-isel-divrem.ll
+++ b/test/CodeGen/X86/fast-isel-divrem.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
define i8 @test_sdiv8(i8 %dividend, i8 %divisor) nounwind {
entry:
diff --git a/test/CodeGen/X86/fast-isel-double-half-convertion.ll b/test/CodeGen/X86/fast-isel-double-half-convertion.ll
new file mode 100644
index 000000000000..d17ce101d090
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-double-half-convertion.ll
@@ -0,0 +1,23 @@
+; RUN: llc -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+f16c < %s
+
+; XFAIL: *
+
+; In the future, we might want to teach fast-isel how to expand a double-to-half
+; conversion into a double-to-float conversion immediately followed by a
+; float-to-half conversion. For now, fast-isel is expected to fail.
+
+define double @test_fp16_to_fp64(i32 %a) {
+entry:
+ %0 = trunc i32 %a to i16
+ %1 = call double @llvm.convert.from.fp16.f64(i16 %0)
+ ret float %0
+}
+
+define i16 @test_fp64_to_fp16(double %a) {
+entry:
+ %0 = call i16 @llvm.convert.to.fp16.f64(double %a)
+ ret i16 %0
+}
+
+declare i16 @llvm.convert.to.fp16.f64(double)
+declare double @llvm.convert.from.fp16.f64(i16)
diff --git a/test/CodeGen/X86/fast-isel-extract.ll b/test/CodeGen/X86/fast-isel-extract.ll
index 3a4b2a685504..fb20fdd0d36f 100644
--- a/test/CodeGen/X86/fast-isel-extract.ll
+++ b/test/CodeGen/X86/fast-isel-extract.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple x86_64-apple-darwin11 -O0 -fast-isel-abort | FileCheck %s
+; RUN: llc < %s -mtriple x86_64-apple-darwin11 -O0 -fast-isel-abort=1 | FileCheck %s
%struct.x = type { i64, i64 }
%addovf = type { i32, i1 }
diff --git a/test/CodeGen/X86/fast-isel-float-half-convertion.ll b/test/CodeGen/X86/fast-isel-float-half-convertion.ll
new file mode 100644
index 000000000000..707a325bf41d
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-float-half-convertion.ll
@@ -0,0 +1,28 @@
+; RUN: llc -fast-isel -fast-isel-abort=1 -asm-verbose=false -mtriple=x86_64-unknown-unknown -mattr=+f16c < %s | FileCheck %s
+
+; Verify that fast-isel correctly expands float-half conversions.
+
+define i16 @test_fp32_to_fp16(float %a) {
+; CHECK-LABEL: test_fp32_to_fp16:
+; CHECK: vcvtps2ph $0, %xmm0, %xmm0
+; CHECK-NEXT: vmovd %xmm0, %eax
+; CHECK-NEXT: retq
+entry:
+ %0 = call i16 @llvm.convert.to.fp16.f32(float %a)
+ ret i16 %0
+}
+
+define float @test_fp16_to_fp32(i32 %a) {
+; CHECK-LABEL: test_fp16_to_fp32:
+; CHECK: movswl %di, %eax
+; CHECK-NEXT: vmovd %eax, %xmm0
+; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0
+; CHECK-NEXT: retq
+entry:
+ %0 = trunc i32 %a to i16
+ %1 = call float @llvm.convert.from.fp16.f32(i16 %0)
+ ret float %1
+}
+
+declare i16 @llvm.convert.to.fp16.f32(float)
+declare float @llvm.convert.from.fp16.f32(i16)
diff --git a/test/CodeGen/X86/fast-isel-fneg.ll b/test/CodeGen/X86/fast-isel-fneg.ll
index 8b38587164fe..e3bc7faae3ce 100644
--- a/test/CodeGen/X86/fast-isel-fneg.ll
+++ b/test/CodeGen/X86/fast-isel-fneg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-apple-darwin10 | FileCheck %s
; RUN: llc < %s -fast-isel -march=x86 -mattr=+sse2 | FileCheck --check-prefix=SSE2 %s
; SSE2: xor
diff --git a/test/CodeGen/X86/fast-isel-fold-mem.ll b/test/CodeGen/X86/fast-isel-fold-mem.ll
index a94577962e91..5686484ef935 100644
--- a/test/CodeGen/X86/fast-isel-fold-mem.ll
+++ b/test/CodeGen/X86/fast-isel-fold-mem.ll
@@ -1,11 +1,11 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
-; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-apple-darwin | FileCheck %s
define i64 @fold_load(i64* %a, i64 %b) {
; CHECK-LABEL: fold_load
; CHECK: addq (%rdi), %rsi
; CHECK-NEXT: movq %rsi, %rax
- %1 = load i64* %a, align 8
+ %1 = load i64, i64* %a, align 8
%2 = add i64 %1, %b
ret i64 %2
}
diff --git a/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll b/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
new file mode 100644
index 000000000000..e4e9aeaa262e
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+;
+; Verify that fast-isel doesn't select legacy SSE instructions on targets that
+; feature AVX.
+;
+; Test cases are obtained from the following code snippet:
+; ///
+; double single_to_double_rr(float x) {
+; return (double)x;
+; }
+; float double_to_single_rr(double x) {
+; return (float)x;
+; }
+; double single_to_double_rm(float *x) {
+; return (double)*x;
+; }
+; float double_to_single_rm(double *x) {
+; return (float)*x;
+; }
+; ///
+
+define double @single_to_double_rr(float %x) {
+; ALL-LABEL: single_to_double_rr:
+; SSE-NOT: vcvtss2sd
+; AVX: vcvtss2sd %xmm0, %xmm0, %xmm0
+; ALL: ret
+entry:
+ %conv = fpext float %x to double
+ ret double %conv
+}
+
+define float @double_to_single_rr(double %x) {
+; ALL-LABEL: double_to_single_rr:
+; SSE-NOT: vcvtsd2ss
+; AVX: vcvtsd2ss %xmm0, %xmm0, %xmm0
+; ALL: ret
+entry:
+ %conv = fptrunc double %x to float
+ ret float %conv
+}
+
+define double @single_to_double_rm(float* %x) {
+; ALL-LABEL: single_to_double_rm:
+; SSE: cvtss2sd (%rdi), %xmm0
+; AVX: vmovss (%rdi), %xmm0
+; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
+; ALL-NEXT: ret
+entry:
+ %0 = load float, float* %x, align 4
+ %conv = fpext float %0 to double
+ ret double %conv
+}
+
+define float @double_to_single_rm(double* %x) {
+; ALL-LABEL: double_to_single_rm:
+; SSE: cvtsd2ss (%rdi), %xmm0
+; AVX: vmovsd (%rdi), %xmm0
+; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
+; ALL-NEXT: ret
+entry:
+ %0 = load double, double* %x, align 8
+ %conv = fptrunc double %0 to float
+ ret float %conv
+}
diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll
index a65e0705f2b2..67b30292be3c 100644
--- a/test/CodeGen/X86/fast-isel-gep.ll
+++ b/test/CodeGen/X86/fast-isel-gep.ll
@@ -6,8 +6,8 @@
; should be sign-extended to 64 bits on 64-bit targets.
; PR3181
define i32 @test1(i32 %t3, i32* %t1) nounwind {
- %t9 = getelementptr i32* %t1, i32 %t3 ; <i32*> [#uses=1]
- %t15 = load i32* %t9 ; <i32> [#uses=1]
+ %t9 = getelementptr i32, i32* %t1, i32 %t3 ; <i32*> [#uses=1]
+ %t15 = load i32, i32* %t9 ; <i32> [#uses=1]
ret i32 %t15
; X32-LABEL: test1:
; X32: movl (%eax,%ecx,4), %eax
@@ -20,8 +20,8 @@ define i32 @test1(i32 %t3, i32* %t1) nounwind {
}
define i32 @test2(i64 %t3, i32* %t1) nounwind {
- %t9 = getelementptr i32* %t1, i64 %t3 ; <i32*> [#uses=1]
- %t15 = load i32* %t9 ; <i32> [#uses=1]
+ %t9 = getelementptr i32, i32* %t1, i64 %t3 ; <i32*> [#uses=1]
+ %t15 = load i32, i32* %t9 ; <i32> [#uses=1]
ret i32 %t15
; X32-LABEL: test2:
; X32: movl (%edx,%ecx,4), %e
@@ -37,8 +37,8 @@ define i32 @test2(i64 %t3, i32* %t1) nounwind {
; PR4984
define i8 @test3(i8* %start) nounwind {
entry:
- %A = getelementptr i8* %start, i64 -2 ; <i8*> [#uses=1]
- %B = load i8* %A, align 1 ; <i8> [#uses=1]
+ %A = getelementptr i8, i8* %start, i64 -2 ; <i8*> [#uses=1]
+ %B = load i8, i8* %A, align 1 ; <i8> [#uses=1]
ret i8 %B
@@ -59,11 +59,11 @@ entry:
%p.addr = alloca double*, align 8 ; <double**> [#uses=2]
store i64 %x, i64* %x.addr
store double* %p, double** %p.addr
- %tmp = load i64* %x.addr ; <i64> [#uses=1]
+ %tmp = load i64, i64* %x.addr ; <i64> [#uses=1]
%add = add nsw i64 %tmp, 16 ; <i64> [#uses=1]
- %tmp1 = load double** %p.addr ; <double*> [#uses=1]
- %arrayidx = getelementptr inbounds double* %tmp1, i64 %add ; <double*> [#uses=1]
- %tmp2 = load double* %arrayidx ; <double> [#uses=1]
+ %tmp1 = load double*, double** %p.addr ; <double*> [#uses=1]
+ %arrayidx = getelementptr inbounds double, double* %tmp1, i64 %add ; <double*> [#uses=1]
+ %tmp2 = load double, double* %arrayidx ; <double> [#uses=1]
ret double %tmp2
; X32-LABEL: test4:
@@ -75,9 +75,9 @@ entry:
; PR8961 - Make sure the sext for the GEP addressing comes before the load that
; is folded.
define i64 @test5(i8* %A, i32 %I, i64 %B) nounwind {
- %v8 = getelementptr i8* %A, i32 %I
+ %v8 = getelementptr i8, i8* %A, i32 %I
%v9 = bitcast i8* %v8 to i64*
- %v10 = load i64* %v9
+ %v10 = load i64, i64* %v9
%v11 = add i64 %B, %v10
ret i64 %v11
; X64-LABEL: test5:
@@ -91,14 +91,14 @@ define i64 @test5(i8* %A, i32 %I, i64 %B) nounwind {
; of their blocks.
define void @test6() {
if.end: ; preds = %if.then, %invoke.cont
- %tmp15 = load i64* undef
+ %tmp15 = load i64, i64* undef
%dec = add i64 %tmp15, 13
store i64 %dec, i64* undef
%call17 = invoke i8* @_ZNK18G__FastAllocString4dataEv()
to label %invoke.cont16 unwind label %lpad
invoke.cont16: ; preds = %if.then14
- %arrayidx18 = getelementptr inbounds i8* %call17, i64 %dec
+ %arrayidx18 = getelementptr inbounds i8, i8* %call17, i64 %dec
store i8 0, i8* %arrayidx18
unreachable
@@ -118,10 +118,10 @@ define i32 @test7({i32,i32,i32}* %tmp1, i32 %tmp71, i32 %tmp63) nounwind {
; X64: movl $4, 8({{%rdi|%rcx}})
- %tmp29 = getelementptr inbounds {i32,i32,i32}* %tmp1, i32 0, i32 2
- %tmp30 = load i32* %tmp29, align 4
+ %tmp29 = getelementptr inbounds {i32,i32,i32}, {i32,i32,i32}* %tmp1, i32 0, i32 2
+ %tmp30 = load i32, i32* %tmp29, align 4
- %p2 = getelementptr inbounds {i32,i32,i32}* %tmp1, i32 0, i32 2
+ %p2 = getelementptr inbounds {i32,i32,i32}, {i32,i32,i32}* %tmp1, i32 0, i32 2
store i32 4, i32* %p2
%tmp72 = or i32 %tmp71, %tmp30
diff --git a/test/CodeGen/X86/fast-isel-gv.ll b/test/CodeGen/X86/fast-isel-gv.ll
index de7509568907..b3955d6e4f81 100644
--- a/test/CodeGen/X86/fast-isel-gv.ll
+++ b/test/CodeGen/X86/fast-isel-gv.ll
@@ -12,15 +12,15 @@ entry:
%retval = alloca i32 ; <i32*> [#uses=2]
%0 = alloca i32 ; <i32*> [#uses=2]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- %1 = load i8 (...)** @f, align 8 ; <i8 (...)*> [#uses=1]
+ %1 = load i8 (...)*, i8 (...)** @f, align 8 ; <i8 (...)*> [#uses=1]
%2 = icmp ne i8 (...)* %1, @kill ; <i1> [#uses=1]
%3 = zext i1 %2 to i32 ; <i32> [#uses=1]
store i32 %3, i32* %0, align 4
- %4 = load i32* %0, align 4 ; <i32> [#uses=1]
+ %4 = load i32, i32* %0, align 4 ; <i32> [#uses=1]
store i32 %4, i32* %retval, align 4
br label %return
return: ; preds = %entry
- %retval1 = load i32* %retval ; <i32> [#uses=1]
+ %retval1 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %retval1
}
diff --git a/test/CodeGen/X86/fast-isel-i1.ll b/test/CodeGen/X86/fast-isel-i1.ll
index 9c042d30e78c..589de76617a0 100644
--- a/test/CodeGen/X86/fast-isel-i1.ll
+++ b/test/CodeGen/X86/fast-isel-i1.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=i686-apple-darwin10 -fast-isel -fast-isel-abort | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort | FileCheck %s
+; RUN: llc < %s -mtriple=i686-apple-darwin10 -fast-isel -fast-isel-abort=1 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort=1 | FileCheck %s
declare i32 @test1a(i32)
@@ -23,14 +23,15 @@ exit: ; preds = %next
define void @test2(i8* %a) nounwind {
entry:
+; clang uses i8 constants for booleans, so we test with an i8 1.
; CHECK-LABEL: test2:
; CHECK: movb {{.*}} %al
; CHECK-NEXT: xorb $1, %al
; CHECK-NEXT: testb $1
- %tmp = load i8* %a, align 1
- %tobool = trunc i8 %tmp to i1
- %tobool2 = xor i1 %tobool, true
- br i1 %tobool2, label %if.then, label %if.end
+ %tmp = load i8, i8* %a, align 1
+ %xor = xor i8 %tmp, 1
+ %tobool = trunc i8 %xor to i1
+ br i1 %tobool, label %if.then, label %if.end
if.then:
call void @test2(i8* null)
diff --git a/test/CodeGen/X86/fast-isel-int-float-conversion.ll b/test/CodeGen/X86/fast-isel-int-float-conversion.ll
new file mode 100644
index 000000000000..afa6ee9aa7a2
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-int-float-conversion.ll
@@ -0,0 +1,45 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+sse2 -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx -fast-isel --fast-isel-abort=1 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+
+
+define double @int_to_double_rr(i32 %a) {
+; ALL-LABEL: int_to_double_rr:
+; SSE2: cvtsi2sdl %edi, %xmm0
+; AVX: vcvtsi2sdl %edi, %xmm0, %xmm0
+; ALL-NEXT: ret
+entry:
+ %0 = sitofp i32 %a to double
+ ret double %0
+}
+
+define double @int_to_double_rm(i32* %a) {
+; ALL-LABEL: int_to_double_rm:
+; SSE2: cvtsi2sdl (%rdi), %xmm0
+; AVX: vcvtsi2sdl (%rdi), %xmm0, %xmm0
+; ALL-NEXT: ret
+entry:
+ %0 = load i32, i32* %a
+ %1 = sitofp i32 %0 to double
+ ret double %1
+}
+
+define float @int_to_float_rr(i32 %a) {
+; ALL-LABEL: int_to_float_rr:
+; SSE2: cvtsi2ssl %edi, %xmm0
+; AVX: vcvtsi2ssl %edi, %xmm0, %xmm0
+; ALL-NEXT: ret
+entry:
+ %0 = sitofp i32 %a to float
+ ret float %0
+}
+
+define float @int_to_float_rm(i32* %a) {
+; ALL-LABEL: int_to_float_rm:
+; SSE2: cvtsi2ssl (%rdi), %xmm0
+; AVX: vcvtsi2ssl (%rdi), %xmm0, %xmm0
+; ALL-NEXT: ret
+entry:
+ %0 = load i32, i32* %a
+ %1 = sitofp i32 %0 to float
+ ret float %1
+}
diff --git a/test/CodeGen/X86/fast-isel-mem.ll b/test/CodeGen/X86/fast-isel-mem.ll
index eca1ae9f02a3..3f1d9cae9340 100644
--- a/test/CodeGen/X86/fast-isel-mem.ll
+++ b/test/CodeGen/X86/fast-isel-mem.ll
@@ -6,8 +6,8 @@
; rdar://6653118
define i32 @loadgv() nounwind {
entry:
- %0 = load i32* @src, align 4
- %1 = load i32* @src, align 4
+ %0 = load i32, i32* @src, align 4
+ %1 = load i32, i32* @src, align 4
%2 = add i32 %0, %1
store i32 %2, i32* @src
ret i32 %2
@@ -33,7 +33,7 @@ entry:
define void @t(%stuff* %this) nounwind {
entry:
- store i32 (...)** getelementptr ([4 x i32 (...)*]* @LotsStuff, i32 0, i32 2), i32 (...)*** null, align 4
+ store i32 (...)** getelementptr ([4 x i32 (...)*], [4 x i32 (...)*]* @LotsStuff, i32 0, i32 2), i32 (...)*** null, align 4
ret void
; CHECK: _t:
; CHECK: xorl %eax, %eax
diff --git a/test/CodeGen/X86/fast-isel-movsbl-indexreg.ll b/test/CodeGen/X86/fast-isel-movsbl-indexreg.ll
new file mode 100644
index 000000000000..100c3297ec4e
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-movsbl-indexreg.ll
@@ -0,0 +1,20 @@
+; RUN: llc %s -o - -verify-machineinstrs -fast-isel=true | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; The index register on the folded memory operand was incorrect. Ensure we generate
+; movsbl in fast-isel, but also that it passes verification which will check the register
+; class.
+
+; CHECK: movsbl
+
+@table = external hidden global [64 x i8], align 16
+
+define i32 @test(i32 %x, i64 %offset) {
+bb:
+ %tmp37 = getelementptr inbounds [64 x i8], [64 x i8]* @table, i64 0, i64 %offset
+ %tmp38 = load i8, i8* %tmp37, align 1
+ %tmp39 = sext i8 %tmp38 to i32
+ ret i32 %tmp39
+}
diff --git a/test/CodeGen/X86/fast-isel-ret-ext.ll b/test/CodeGen/X86/fast-isel-ret-ext.ll
index 0370d99f906c..06bcb50850f6 100644
--- a/test/CodeGen/X86/fast-isel-ret-ext.ll
+++ b/test/CodeGen/X86/fast-isel-ret-ext.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple i686-apple-darwin10 | FileCheck %s
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple i686-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple x86_64-apple-darwin10 | FileCheck %s
define zeroext i8 @test1(i32 %y) nounwind {
%conv = trunc i32 %y to i8
diff --git a/test/CodeGen/X86/fast-isel-select-cmov.ll b/test/CodeGen/X86/fast-isel-select-cmov.ll
index 8008e283ad60..879cd2f95ba6 100644
--- a/test/CodeGen/X86/fast-isel-select-cmov.ll
+++ b/test/CodeGen/X86/fast-isel-select-cmov.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=x86_64-apple-darwin10 | FileCheck %s
; Test conditional move for the supported types (i16, i32, and i32) and
; conditon input (argument or cmp). Currently i8 is not supported.
diff --git a/test/CodeGen/X86/fast-isel-select-cmov2.ll b/test/CodeGen/X86/fast-isel-select-cmov2.ll
index 658098fe7c7a..8556ff21021a 100644
--- a/test/CodeGen/X86/fast-isel-select-cmov2.ll
+++ b/test/CodeGen/X86/fast-isel-select-cmov2.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s --check-prefix=CHECK --check-prefix=SDAG
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=CHECK --check-prefix=FAST
; Test all the cmp predicates that can feed an integer conditional move.
@@ -15,10 +15,13 @@ define i64 @select_fcmp_false_cmov(double %a, double %b, i64 %c, i64 %d) {
define i64 @select_fcmp_oeq_cmov(double %a, double %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_fcmp_oeq_cmov
; CHECK: ucomisd %xmm1, %xmm0
-; CHECK-NEXT: setnp %al
-; CHECK-NEXT: sete %cl
-; CHECK-NEXT: testb %al, %cl
-; CHECK-NEXT: cmoveq %rsi, %rdi
+; SDAG-NEXT: cmovneq %rsi, %rdi
+; SDAG-NEXT: cmovpq %rsi, %rdi
+; SDAG-NEXT: movq %rdi, %rax
+; FAST-NEXT: setnp %al
+; FAST-NEXT: sete %cl
+; FAST-NEXT: testb %al, %cl
+; FAST-NEXT: cmoveq %rsi, %rdi
%1 = fcmp oeq double %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
@@ -135,10 +138,13 @@ define i64 @select_fcmp_ule_cmov(double %a, double %b, i64 %c, i64 %d) {
define i64 @select_fcmp_une_cmov(double %a, double %b, i64 %c, i64 %d) {
; CHECK-LABEL: select_fcmp_une_cmov
; CHECK: ucomisd %xmm1, %xmm0
-; CHECK-NEXT: setp %al
-; CHECK-NEXT: setne %cl
-; CHECK-NEXT: orb %al, %cl
-; CHECK-NEXT: cmoveq %rsi, %rdi
+; SDAG-NEXT: cmovneq %rdi, %rsi
+; SDAG-NEXT: cmovpq %rdi, %rsi
+; SDAG-NEXT: movq %rsi, %rax
+; FAST-NEXT: setp %al
+; FAST-NEXT: setne %cl
+; FAST-NEXT: orb %al, %cl
+; FAST-NEXT: cmoveq %rsi, %rdi
%1 = fcmp une double %a, %b
%2 = select i1 %1, i64 %c, i64 %d
ret i64 %2
diff --git a/test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll b/test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll
index 1ec4d64fe209..8147035b4385 100644
--- a/test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll
+++ b/test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort=1 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mcpu=corei7-avx | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort -mcpu=corei7-avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort=1 -mcpu=corei7-avx | FileCheck %s
define float @select_fcmp_one_f32(float %a, float %b, float %c, float %d) {
diff --git a/test/CodeGen/X86/fast-isel-select-sse.ll b/test/CodeGen/X86/fast-isel-select-sse.ll
index 3c03a0312f5e..6761be6d4e1f 100644
--- a/test/CodeGen/X86/fast-isel-select-sse.ll
+++ b/test/CodeGen/X86/fast-isel-select-sse.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort=1 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mcpu=corei7-avx | FileCheck %s --check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort -mcpu=corei7-avx | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort=1 -mcpu=corei7-avx | FileCheck %s --check-prefix=AVX
; Test all cmp predicates that can be used with SSE.
@@ -13,9 +13,7 @@ define float @select_fcmp_oeq_f32(float %a, float %b, float %c, float %d) {
; CHECK-NEXT: orps %xmm2, %xmm0
; AVX-LABEL: select_fcmp_oeq_f32
; AVX: vcmpeqss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp oeq float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
@@ -29,9 +27,7 @@ define double @select_fcmp_oeq_f64(double %a, double %b, double %c, double %d) {
; CHECK-NEXT: orpd %xmm2, %xmm0
; AVX-LABEL: select_fcmp_oeq_f64
; AVX: vcmpeqsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp oeq double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
@@ -45,9 +41,7 @@ define float @select_fcmp_ogt_f32(float %a, float %b, float %c, float %d) {
; CHECK-NEXT: orps %xmm2, %xmm1
; AVX-LABEL: select_fcmp_ogt_f32
; AVX: vcmpltss %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ogt float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
@@ -61,9 +55,7 @@ define double @select_fcmp_ogt_f64(double %a, double %b, double %c, double %d) {
; CHECK-NEXT: orpd %xmm2, %xmm1
; AVX-LABEL: select_fcmp_ogt_f64
; AVX: vcmpltsd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ogt double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
@@ -77,9 +69,7 @@ define float @select_fcmp_oge_f32(float %a, float %b, float %c, float %d) {
; CHECK-NEXT: orps %xmm2, %xmm1
; AVX-LABEL: select_fcmp_oge_f32
; AVX: vcmpless %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp oge float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
@@ -93,9 +83,7 @@ define double @select_fcmp_oge_f64(double %a, double %b, double %c, double %d) {
; CHECK-NEXT: orpd %xmm2, %xmm1
; AVX-LABEL: select_fcmp_oge_f64
; AVX: vcmplesd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp oge double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
@@ -109,9 +97,7 @@ define float @select_fcmp_olt_f32(float %a, float %b, float %c, float %d) {
; CHECK-NEXT: orps %xmm2, %xmm0
; AVX-LABEL: select_fcmp_olt_f32
; AVX: vcmpltss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp olt float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
@@ -125,9 +111,7 @@ define double @select_fcmp_olt_f64(double %a, double %b, double %c, double %d) {
; CHECK-NEXT: orpd %xmm2, %xmm0
; AVX-LABEL: select_fcmp_olt_f64
; AVX: vcmpltsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp olt double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
@@ -141,9 +125,7 @@ define float @select_fcmp_ole_f32(float %a, float %b, float %c, float %d) {
; CHECK-NEXT: orps %xmm2, %xmm0
; AVX-LABEL: select_fcmp_ole_f32
; AVX: vcmpless %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ole float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
@@ -157,9 +139,7 @@ define double @select_fcmp_ole_f64(double %a, double %b, double %c, double %d) {
; CHECK-NEXT: orpd %xmm2, %xmm0
; AVX-LABEL: select_fcmp_ole_f64
; AVX: vcmplesd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ole double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
@@ -173,9 +153,7 @@ define float @select_fcmp_ord_f32(float %a, float %b, float %c, float %d) {
; CHECK-NEXT: orps %xmm2, %xmm0
; AVX-LABEL: select_fcmp_ord_f32
; AVX: vcmpordss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ord float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
@@ -189,9 +167,7 @@ define double @select_fcmp_ord_f64(double %a, double %b, double %c, double %d) {
; CHECK-NEXT: orpd %xmm2, %xmm0
; AVX-LABEL: select_fcmp_ord_f64
; AVX: vcmpordsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ord double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
@@ -205,9 +181,7 @@ define float @select_fcmp_uno_f32(float %a, float %b, float %c, float %d) {
; CHECK-NEXT: orps %xmm2, %xmm0
; AVX-LABEL: select_fcmp_uno_f32
; AVX: vcmpunordss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp uno float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
@@ -221,9 +195,7 @@ define double @select_fcmp_uno_f64(double %a, double %b, double %c, double %d) {
; CHECK-NEXT: orpd %xmm2, %xmm0
; AVX-LABEL: select_fcmp_uno_f64
; AVX: vcmpunordsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp uno double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
@@ -237,9 +209,7 @@ define float @select_fcmp_ugt_f32(float %a, float %b, float %c, float %d) {
; CHECK-NEXT: orps %xmm2, %xmm0
; AVX-LABEL: select_fcmp_ugt_f32
; AVX: vcmpnless %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ugt float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
@@ -253,9 +223,7 @@ define double @select_fcmp_ugt_f64(double %a, double %b, double %c, double %d) {
; CHECK-NEXT: orpd %xmm2, %xmm0
; AVX-LABEL: select_fcmp_ugt_f64
; AVX: vcmpnlesd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ugt double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
@@ -269,9 +237,7 @@ define float @select_fcmp_uge_f32(float %a, float %b, float %c, float %d) {
; CHECK-NEXT: orps %xmm2, %xmm0
; AVX-LABEL: select_fcmp_uge_f32
; AVX: vcmpnltss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp uge float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
@@ -285,9 +251,7 @@ define double @select_fcmp_uge_f64(double %a, double %b, double %c, double %d) {
; CHECK-NEXT: orpd %xmm2, %xmm0
; AVX-LABEL: select_fcmp_uge_f64
; AVX: vcmpnltsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp uge double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
@@ -301,9 +265,7 @@ define float @select_fcmp_ult_f32(float %a, float %b, float %c, float %d) {
; CHECK-NEXT: orps %xmm2, %xmm1
; AVX-LABEL: select_fcmp_ult_f32
; AVX: vcmpnless %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ult float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
@@ -317,9 +279,7 @@ define double @select_fcmp_ult_f64(double %a, double %b, double %c, double %d) {
; CHECK-NEXT: orpd %xmm2, %xmm1
; AVX-LABEL: select_fcmp_ult_f64
; AVX: vcmpnlesd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ult double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
@@ -333,9 +293,7 @@ define float @select_fcmp_ule_f32(float %a, float %b, float %c, float %d) {
; CHECK-NEXT: orps %xmm2, %xmm1
; AVX-LABEL: select_fcmp_ule_f32
; AVX: vcmpnltss %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ule float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
@@ -349,9 +307,7 @@ define double @select_fcmp_ule_f64(double %a, double %b, double %c, double %d) {
; CHECK-NEXT: orpd %xmm2, %xmm1
; AVX-LABEL: select_fcmp_ule_f64
; AVX: vcmpnltsd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp ule double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
@@ -365,9 +321,7 @@ define float @select_fcmp_une_f32(float %a, float %b, float %c, float %d) {
; CHECK-NEXT: orps %xmm2, %xmm0
; AVX-LABEL: select_fcmp_une_f32
; AVX: vcmpneqss %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnps %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvps %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp une float %a, %b
%2 = select i1 %1, float %c, float %d
ret float %2
@@ -381,9 +335,7 @@ define double @select_fcmp_une_f64(double %a, double %b, double %c, double %d) {
; CHECK-NEXT: orpd %xmm2, %xmm0
; AVX-LABEL: select_fcmp_une_f64
; AVX: vcmpneqsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
-; AVX-NEXT: vandnpd %xmm3, %xmm0, %xmm0
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
%1 = fcmp une double %a, %b
%2 = select i1 %1, double %c, double %d
ret double %2
diff --git a/test/CodeGen/X86/fast-isel-sext.ll b/test/CodeGen/X86/fast-isel-sext.ll
new file mode 100644
index 000000000000..ca1558e3c844
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-sext.ll
@@ -0,0 +1,9 @@
+; RUN: llc -mtriple=x86_64-linux -fast-isel -show-mc-encoding < %s | FileCheck %s
+
+; CHECK-LABEL: f:
+; CHECK: addl $-2, %eax # encoding: [0x83,0xc0,0xfe]
+define i32 @f(i32* %y) {
+ %x = load i32, i32* %y
+ %dec = add i32 %x, -2
+ ret i32 %dec
+}
diff --git a/test/CodeGen/X86/fast-isel-sse12-fptoint.ll b/test/CodeGen/X86/fast-isel-sse12-fptoint.ll
index 769c987e604a..2fbacbfdeae5 100644
--- a/test/CodeGen/X86/fast-isel-sse12-fptoint.ll
+++ b/test/CodeGen/X86/fast-isel-sse12-fptoint.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse2 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx2,+avx -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse2 -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx2,+avx -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=AVX
define i32 @cvt_test1(float %a) {
; SSE-LABEL: cvt_test1
diff --git a/test/CodeGen/X86/fast-isel-store.ll b/test/CodeGen/X86/fast-isel-store.ll
index 3d2a46c2bd1b..8fb6356be9a6 100644
--- a/test/CodeGen/X86/fast-isel-store.ll
+++ b/test/CodeGen/X86/fast-isel-store.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort -mattr=+sse2 < %s | FileCheck %s
-; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort -mattr=+sse2 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort=1 -mattr=+sse2 < %s | FileCheck %s
+; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort=1 -mattr=+sse2 < %s | FileCheck %s
define i32 @test_store_32(i32* nocapture %addr, i32 %value) {
entry:
diff --git a/test/CodeGen/X86/fast-isel-tailcall.ll b/test/CodeGen/X86/fast-isel-tailcall.ll
index 79ff79d4bca5..88ad05e8e1a7 100644
--- a/test/CodeGen/X86/fast-isel-tailcall.ll
+++ b/test/CodeGen/X86/fast-isel-tailcall.ll
@@ -7,7 +7,7 @@
define i32 @stub(i8* %t0) nounwind {
entry:
- %t1 = load i32* inttoptr (i32 139708680 to i32*) ; <i32> [#uses=1]
+ %t1 = load i32, i32* inttoptr (i32 139708680 to i32*) ; <i32> [#uses=1]
%t2 = bitcast i8* %t0 to i32 (i32)* ; <i32 (i32)*> [#uses=1]
%t3 = call fastcc i32 %t2(i32 %t1) ; <i32> [#uses=1]
ret i32 %t3
diff --git a/test/CodeGen/X86/fast-isel-tls.ll b/test/CodeGen/X86/fast-isel-tls.ll
index 686df43ac504..18bb9c13ff01 100644
--- a/test/CodeGen/X86/fast-isel-tls.ll
+++ b/test/CodeGen/X86/fast-isel-tls.ll
@@ -4,7 +4,7 @@
@v = thread_local global i32 0
define i32 @f() nounwind {
entry:
- %t = load i32* @v
+ %t = load i32, i32* @v
%s = add i32 %t, 1
ret i32 %s
}
@@ -16,7 +16,7 @@ entry:
@alias = internal alias i32* @v
define i32 @f_alias() nounwind {
entry:
- %t = load i32* @v
+ %t = load i32, i32* @v
%s = add i32 %t, 1
ret i32 %s
}
diff --git a/test/CodeGen/X86/fast-isel-trunc-kill-subreg.ll b/test/CodeGen/X86/fast-isel-trunc-kill-subreg.ll
new file mode 100644
index 000000000000..96a1a2dea179
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-trunc-kill-subreg.ll
@@ -0,0 +1,40 @@
+; RUN: llc %s -o - -fast-isel=true -O1 -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-unknown"
+
+; This test failed the machine verifier because the trunc at the start of the
+; method was extracing a subreg and killing the source register. The kill flag was
+; invalid here as the source of the trunc could still be used elsewhere.
+
+; CHECK-LABEL: @test
+
+define i32 @test(i32 %block8x8) {
+bb:
+ %tmp9 = trunc i32 %block8x8 to i1
+ %tmp10 = zext i1 %tmp9 to i32
+ %tmp11 = mul i32 %tmp10, 8
+ %tmp12 = zext i32 %tmp11 to i64
+ br label %bb241
+
+bb241: ; preds = %bb241, %bb
+ %lsr.iv3 = phi i64 [ %lsr.iv.next4, %bb241 ], [ %tmp12, %bb ]
+ %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb241 ], [ 0, %bb ]
+ %lsr.iv.next2 = add nuw nsw i32 %lsr.iv1, 1
+ %lsr.iv.next4 = add i64 %lsr.iv3, 32
+ %exitcond = icmp eq i32 %lsr.iv.next2, 8
+ br i1 %exitcond, label %.preheader.preheader, label %bb241
+
+.preheader.preheader: ; preds = %bb241
+ %tmp18 = lshr i32 %block8x8, 1
+ br label %bb270
+
+bb270: ; preds = %bb270, %.preheader.preheader
+ %lsr.iv = phi i32 [ %lsr.iv.next, %bb270 ], [ %tmp18, %.preheader.preheader ]
+ %lsr.iv.next = add i32 %lsr.iv, 4
+ %tmp272 = icmp slt i32 %lsr.iv.next, 100
+ br i1 %tmp272, label %bb270, label %.loopexit
+
+.loopexit: ; preds = %bb270
+ ret i32 0
+}
diff --git a/test/CodeGen/X86/fast-isel-vecload.ll b/test/CodeGen/X86/fast-isel-vecload.ll
new file mode 100644
index 000000000000..48eebf526f19
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-vecload.ll
@@ -0,0 +1,185 @@
+; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE --check-prefix=ALL
+; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=ALL
+
+; Verify that fast-isel knows how to select aligned/unaligned vector loads.
+; Also verify that the selected load instruction is in the correct domain.
+
+define <16 x i8> @test_v16i8(<16 x i8>* %V) {
+; ALL-LABEL: test_v16i8:
+; SSE: movdqa (%rdi), %xmm0
+; AVX: vmovdqa (%rdi), %xmm0
+; ALL-NEXT: retq
+entry:
+ %0 = load <16 x i8>, <16 x i8>* %V, align 16
+ ret <16 x i8> %0
+}
+
+define <8 x i16> @test_v8i16(<8 x i16>* %V) {
+; ALL-LABEL: test_v8i16:
+; SSE: movdqa (%rdi), %xmm0
+; AVX: vmovdqa (%rdi), %xmm0
+; ALL-NEXT: retq
+entry:
+ %0 = load <8 x i16>, <8 x i16>* %V, align 16
+ ret <8 x i16> %0
+}
+
+define <4 x i32> @test_v4i32(<4 x i32>* %V) {
+; ALL-LABEL: test_v4i32:
+; SSE: movdqa (%rdi), %xmm0
+; AVX: vmovdqa (%rdi), %xmm0
+; ALL-NEXT: retq
+entry:
+ %0 = load <4 x i32>, <4 x i32>* %V, align 16
+ ret <4 x i32> %0
+}
+
+define <2 x i64> @test_v2i64(<2 x i64>* %V) {
+; ALL-LABEL: test_v2i64:
+; SSE: movdqa (%rdi), %xmm0
+; AVX: vmovdqa (%rdi), %xmm0
+; ALL-NEXT: retq
+entry:
+ %0 = load <2 x i64>, <2 x i64>* %V, align 16
+ ret <2 x i64> %0
+}
+
+define <16 x i8> @test_v16i8_unaligned(<16 x i8>* %V) {
+; ALL-LABEL: test_v16i8_unaligned:
+; SSE: movdqu (%rdi), %xmm0
+; AVX: vmovdqu (%rdi), %xmm0
+; ALL-NEXT: retq
+entry:
+ %0 = load <16 x i8>, <16 x i8>* %V, align 4
+ ret <16 x i8> %0
+}
+
+define <8 x i16> @test_v8i16_unaligned(<8 x i16>* %V) {
+; ALL-LABEL: test_v8i16_unaligned:
+; SSE: movdqu (%rdi), %xmm0
+; AVX: vmovdqu (%rdi), %xmm0
+; ALL-NEXT: retq
+entry:
+ %0 = load <8 x i16>, <8 x i16>* %V, align 4
+ ret <8 x i16> %0
+}
+
+define <4 x i32> @test_v4i32_unaligned(<4 x i32>* %V) {
+; ALL-LABEL: test_v4i32_unaligned:
+; SSE: movdqu (%rdi), %xmm0
+; AVX: vmovdqu (%rdi), %xmm0
+; ALL-NEXT: retq
+entry:
+ %0 = load <4 x i32>, <4 x i32>* %V, align 4
+ ret <4 x i32> %0
+}
+
+define <2 x i64> @test_v2i64_unaligned(<2 x i64>* %V) {
+; ALL-LABEL: test_v2i64_unaligned:
+; SSE: movdqu (%rdi), %xmm0
+; AVX: vmovdqu (%rdi), %xmm0
+; ALL-NEXT: retq
+entry:
+ %0 = load <2 x i64>, <2 x i64>* %V, align 4
+ ret <2 x i64> %0
+}
+
+define <4 x float> @test_v4f32(<4 x float>* %V) {
+; ALL-LABEL: test_v4f32:
+; SSE: movaps (%rdi), %xmm0
+; AVX: vmovaps (%rdi), %xmm0
+; ALL-NEXT: retq
+entry:
+ %0 = load <4 x float>, <4 x float>* %V, align 16
+ ret <4 x float> %0
+}
+
+define <2 x double> @test_v2f64(<2 x double>* %V) {
+; ALL-LABEL: test_v2f64:
+; SSE: movapd (%rdi), %xmm0
+; AVX: vmovapd (%rdi), %xmm0
+; ALL-NEXT: retq
+entry:
+ %0 = load <2 x double>, <2 x double>* %V, align 16
+ ret <2 x double> %0
+}
+
+define <4 x float> @test_v4f32_unaligned(<4 x float>* %V) {
+; ALL-LABEL: test_v4f32_unaligned:
+; SSE: movups (%rdi), %xmm0
+; AVX: vmovups (%rdi), %xmm0
+; ALL-NEXT: retq
+entry:
+ %0 = load <4 x float>, <4 x float>* %V, align 4
+ ret <4 x float> %0
+}
+
+define <2 x double> @test_v2f64_unaligned(<2 x double>* %V) {
+; ALL-LABEL: test_v2f64_unaligned:
+; SSE: movupd (%rdi), %xmm0
+; AVX: vmovupd (%rdi), %xmm0
+; ALL-NEXT: retq
+entry:
+ %0 = load <2 x double>, <2 x double>* %V, align 4
+ ret <2 x double> %0
+}
+
+define <16 x i8> @test_v16i8_abi_alignment(<16 x i8>* %V) {
+; ALL-LABEL: test_v16i8_abi_alignment:
+; SSE: movdqa (%rdi), %xmm0
+; AVX: vmovdqa (%rdi), %xmm0
+; ALL-NEXT: retq
+entry:
+ %0 = load <16 x i8>, <16 x i8>* %V
+ ret <16 x i8> %0
+}
+
+define <8 x i16> @test_v8i16_abi_alignment(<8 x i16>* %V) {
+; ALL-LABEL: test_v8i16_abi_alignment:
+; SSE: movdqa (%rdi), %xmm0
+; AVX: vmovdqa (%rdi), %xmm0
+; ALL-NEXT: retq
+entry:
+ %0 = load <8 x i16>, <8 x i16>* %V
+ ret <8 x i16> %0
+}
+
+define <4 x i32> @test_v4i32_abi_alignment(<4 x i32>* %V) {
+; ALL-LABEL: test_v4i32_abi_alignment:
+; SSE: movdqa (%rdi), %xmm0
+; AVX: vmovdqa (%rdi), %xmm0
+; ALL-NEXT: retq
+entry:
+ %0 = load <4 x i32>, <4 x i32>* %V
+ ret <4 x i32> %0
+}
+
+define <2 x i64> @test_v2i64_abi_alignment(<2 x i64>* %V) {
+; ALL-LABEL: test_v2i64_abi_alignment:
+; SSE: movdqa (%rdi), %xmm0
+; AVX: vmovdqa (%rdi), %xmm0
+; ALL-NEXT: retq
+entry:
+ %0 = load <2 x i64>, <2 x i64>* %V
+ ret <2 x i64> %0
+}
+
+define <4 x float> @test_v4f32_abi_alignment(<4 x float>* %V) {
+; ALL-LABEL: test_v4f32_abi_alignment:
+; SSE: movaps (%rdi), %xmm0
+; AVX: vmovaps (%rdi), %xmm0
+; ALL-NEXT: retq
+entry:
+ %0 = load <4 x float>, <4 x float>* %V
+ ret <4 x float> %0
+}
+
+define <2 x double> @test_v2f64_abi_alignment(<2 x double>* %V) {
+; ALL-LABEL: test_v2f64_abi_alignment:
+; SSE: movapd (%rdi), %xmm0
+; AVX: vmovapd (%rdi), %xmm0
+; ALL-NEXT: retq
+entry:
+ %0 = load <2 x double>, <2 x double>* %V
+ ret <2 x double> %0
+}
diff --git a/test/CodeGen/X86/fast-isel-x32.ll b/test/CodeGen/X86/fast-isel-x32.ll
index d49a10801065..7b95325f562d 100644
--- a/test/CodeGen/X86/fast-isel-x32.ll
+++ b/test/CodeGen/X86/fast-isel-x32.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -fast-isel -fast-isel-abort | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-nacl -fast-isel -fast-isel-abort | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -fast-isel -fast-isel-abort=1 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-nacl -fast-isel -fast-isel-abort=1 | FileCheck %s
; Test that alloca addresses are materialized with the right size instruction.
diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll
index 3747d049424d..d748cba2f8f8 100644
--- a/test/CodeGen/X86/fast-isel-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-x86-64.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mattr=-avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s
-; RUN: llc < %s -mattr=+avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mattr=-avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort=1 | FileCheck %s
+; RUN: llc < %s -mattr=+avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort=1 | FileCheck %s --check-prefix=AVX
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.0.0"
@@ -20,7 +20,7 @@ define void @test2(i64 %x) nounwind ssp {
entry:
%x.addr = alloca i64, align 8
store i64 %x, i64* %x.addr, align 8
- %tmp = load i64* %x.addr, align 8
+ %tmp = load i64, i64* %x.addr, align 8
%cmp = icmp sgt i64 %tmp, 42
br i1 %cmp, label %if.then, label %if.end
@@ -52,8 +52,8 @@ define i64 @test3() nounwind {
@rtx_length = external global [153 x i8]
define i32 @test4(i64 %idxprom9) nounwind {
- %arrayidx10 = getelementptr inbounds [153 x i8]* @rtx_length, i32 0, i64 %idxprom9
- %tmp11 = load i8* %arrayidx10, align 1
+ %arrayidx10 = getelementptr inbounds [153 x i8], [153 x i8]* @rtx_length, i32 0, i64 %idxprom9
+ %tmp11 = load i8, i8* %arrayidx10, align 1
%conv = zext i8 %tmp11 to i32
ret i32 %conv
@@ -190,7 +190,7 @@ define void @test16() nounwind {
; CHECK: movl $1, %edi
; CHECK: movb $0, %al
; CHECK: callq _test16callee
- call void (...)* @test16callee(i32 1)
+ call void (...) @test16callee(i32 1)
br label %block2
block2:
@@ -201,7 +201,7 @@ block2:
; AVX: vmovsd LCP{{.*}}_{{.*}}(%rip), %xmm0
; AVX: movb $1, %al
; AVX: callq _test16callee
- call void (...)* @test16callee(double 1.000000e+00)
+ call void (...) @test16callee(double 1.000000e+00)
ret void
}
@@ -212,7 +212,7 @@ declare void @foo() unnamed_addr ssp align 2
; w.r.t. the call.
define i32 @test17(i32 *%P) ssp nounwind {
entry:
- %tmp = load i32* %P
+ %tmp = load i32, i32* %P
%cmp = icmp ne i32 %tmp, 5
call void @foo()
br i1 %cmp, label %if.then, label %if.else
diff --git a/test/CodeGen/X86/fast-isel-x86.ll b/test/CodeGen/X86/fast-isel-x86.ll
index 61e9b98f6e7e..8049c72ec018 100644
--- a/test/CodeGen/X86/fast-isel-x86.ll
+++ b/test/CodeGen/X86/fast-isel-x86.ll
@@ -6,7 +6,7 @@
; CHECK: retl
@G = external global float
define float @test0() nounwind {
- %t = load float* @G
+ %t = load float, float* @G
ret float %t
}
@@ -28,7 +28,7 @@ define void @test1({i32, i32, i32, i32}* sret %p) nounwind {
; CHECK: retl
@HHH = external global i32
define i32 @test2() nounwind {
- %t = load i32* @HHH
+ %t = load i32, i32* @HHH
ret i32 %t
}
diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
index bc7918421603..36183e48c299 100644
--- a/test/CodeGen/X86/fast-isel.ll
+++ b/test/CodeGen/X86/fast-isel.ll
@@ -1,13 +1,13 @@
-; RUN: llc < %s -fast-isel -fast-isel-abort -verify-machineinstrs -march=x86 -mattr=sse2 -no-integrated-as
-; RUN: llc < %s -fast-isel -fast-isel-abort -verify-machineinstrs -mtriple=x86_64-apple-darwin10 -no-integrated-as
+; RUN: llc < %s -fast-isel -fast-isel-abort=1 -verify-machineinstrs -march=x86 -mattr=sse2 -no-integrated-as
+; RUN: llc < %s -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=x86_64-apple-darwin10 -no-integrated-as
; This tests very minimal fast-isel functionality.
define i32* @foo(i32* %p, i32* %q, i32** %z) nounwind {
entry:
- %r = load i32* %p
- %s = load i32* %q
- %y = load i32** %z
+ %r = load i32, i32* %p
+ %s = load i32, i32* %q
+ %y = load i32*, i32** %z
br label %fast
fast:
@@ -18,8 +18,8 @@ fast:
%t4 = xor i32 %t3, 3
%t5 = xor i32 %t4, %s
%t6 = add i32 %t5, 2
- %t7 = getelementptr i32* %y, i32 1
- %t8 = getelementptr i32* %t7, i32 %t6
+ %t7 = getelementptr i32, i32* %y, i32 1
+ %t8 = getelementptr i32, i32* %t7, i32 %t6
call void asm sideeffect "hello world", ""()
br label %exit
@@ -29,8 +29,8 @@ exit:
define void @bar(double* %p, double* %q) nounwind {
entry:
- %r = load double* %p
- %s = load double* %q
+ %r = load double, double* %p
+ %s = load double, double* %q
br label %fast
fast:
@@ -94,7 +94,7 @@ define void @mul_i8(i8 %a, i8* %p) nounwind {
}
define void @load_store_i1(i1* %p, i1* %q) nounwind {
- %t = load i1* %p
+ %t = load i1, i1* %p
store i1 %t, i1* %q
ret void
}
@@ -102,7 +102,7 @@ define void @load_store_i1(i1* %p, i1* %q) nounwind {
@crash_test1x = external global <2 x i32>, align 8
define void @crash_test1() nounwind ssp {
- %tmp = load <2 x i32>* @crash_test1x, align 8
+ %tmp = load <2 x i32>, <2 x i32>* @crash_test1x, align 8
%neg = xor <2 x i32> %tmp, <i32 -1, i32 -1>
ret void
}
@@ -113,7 +113,7 @@ define i64* @life() nounwind {
%a1 = alloca i64*, align 8
%a2 = bitcast i64** %a1 to i8*
call void @llvm.lifetime.start(i64 -1, i8* %a2) nounwind
- %a3 = load i64** %a1, align 8
+ %a3 = load i64*, i64** %a1, align 8
ret i64* %a3
}
diff --git a/test/CodeGen/X86/fastcall-correct-mangling.ll b/test/CodeGen/X86/fastcall-correct-mangling.ll
index 00dc44e75e8f..bc53fe8aa4ec 100644
--- a/test/CodeGen/X86/fastcall-correct-mangling.ll
+++ b/test/CodeGen/X86/fastcall-correct-mangling.ll
@@ -28,6 +28,6 @@ entry:
define private x86_fastcallcc void @dontCrash() {
; The name is fairly arbitrary since it is private. Just don't crash.
; CHECK32-LABEL: {{^}}L@dontCrash@0:
-; CHECK64-LABEL: {{^}}.LdontCrash:
+; CHECK64-LABEL: {{^}}LdontCrash:
ret void
}
diff --git a/test/CodeGen/X86/fastcc-byval.ll b/test/CodeGen/X86/fastcc-byval.ll
index e6828e42827c..1706de461116 100644
--- a/test/CodeGen/X86/fastcc-byval.ll
+++ b/test/CodeGen/X86/fastcc-byval.ll
@@ -14,10 +14,10 @@ target triple = "i386-apple-darwin9.5"
define fastcc i32 @bar() nounwind {
%V = alloca %struct.MVT
- %a = getelementptr %struct.MVT* %V, i32 0, i32 0
+ %a = getelementptr %struct.MVT, %struct.MVT* %V, i32 0, i32 0
store i32 1, i32* %a
call fastcc void @foo(%struct.MVT* byval %V) nounwind
- %t = load i32* %a
+ %t = load i32, i32* %a
ret i32 %t
}
diff --git a/test/CodeGen/X86/fastcc-sret.ll b/test/CodeGen/X86/fastcc-sret.ll
index 97814dbabdcc..499aadda44fa 100644
--- a/test/CodeGen/X86/fastcc-sret.ll
+++ b/test/CodeGen/X86/fastcc-sret.ll
@@ -4,8 +4,8 @@
define fastcc void @bar(%struct.foo* noalias sret %agg.result) nounwind {
entry:
- %tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0
- %tmp3 = getelementptr [4 x i32]* %tmp1, i32 0, i32 0
+ %tmp1 = getelementptr %struct.foo, %struct.foo* %agg.result, i32 0, i32 0
+ %tmp3 = getelementptr [4 x i32], [4 x i32]* %tmp1, i32 0, i32 0
store i32 1, i32* %tmp3, align 8
ret void
}
@@ -17,9 +17,9 @@ entry:
define void @foo() nounwind {
%memtmp = alloca %struct.foo, align 4
call fastcc void @bar( %struct.foo* sret %memtmp ) nounwind
- %tmp4 = getelementptr %struct.foo* %memtmp, i32 0, i32 0
- %tmp5 = getelementptr [4 x i32]* %tmp4, i32 0, i32 0
- %tmp6 = load i32* %tmp5
+ %tmp4 = getelementptr %struct.foo, %struct.foo* %memtmp, i32 0, i32 0
+ %tmp5 = getelementptr [4 x i32], [4 x i32]* %tmp4, i32 0, i32 0
+ %tmp6 = load i32, i32* %tmp5
store i32 %tmp6, i32* @dst
ret void
}
diff --git a/test/CodeGen/X86/fastcc.ll b/test/CodeGen/X86/fastcc.ll
index a362f8d1ca7e..020e7f9d3532 100644
--- a/test/CodeGen/X86/fastcc.ll
+++ b/test/CodeGen/X86/fastcc.ll
@@ -9,10 +9,10 @@
define i32 @foo() nounwind {
entry:
- %0 = load double* @d, align 8 ; <double> [#uses=1]
- %1 = load double* @c, align 8 ; <double> [#uses=1]
- %2 = load double* @b, align 8 ; <double> [#uses=1]
- %3 = load double* @a, align 8 ; <double> [#uses=1]
+ %0 = load double, double* @d, align 8 ; <double> [#uses=1]
+ %1 = load double, double* @c, align 8 ; <double> [#uses=1]
+ %2 = load double, double* @b, align 8 ; <double> [#uses=1]
+ %3 = load double, double* @a, align 8 ; <double> [#uses=1]
tail call fastcc void @bar( i32 0, i32 1, i32 2, double 1.000000e+00, double %3, double %2, double %1, double %0 ) nounwind
ret i32 0
}
diff --git a/test/CodeGen/X86/fastisel-gep-promote-before-add.ll b/test/CodeGen/X86/fastisel-gep-promote-before-add.ll
index f87a34c4abde..1f67299a8045 100644
--- a/test/CodeGen/X86/fastisel-gep-promote-before-add.ll
+++ b/test/CodeGen/X86/fastisel-gep-promote-before-add.ll
@@ -6,13 +6,13 @@ define zeroext i8 @gep_promotion(i8* %ptr) nounwind uwtable ssp {
entry:
%ptr.addr = alloca i8*, align 8
%add = add i8 64, 64 ; 0x40 + 0x40
- %0 = load i8** %ptr.addr, align 8
+ %0 = load i8*, i8** %ptr.addr, align 8
; CHECK-LABEL: _gep_promotion:
; CHECK: movzbl ({{.*}})
- %arrayidx = getelementptr inbounds i8* %0, i8 %add
+ %arrayidx = getelementptr inbounds i8, i8* %0, i8 %add
- %1 = load i8* %arrayidx, align 1
+ %1 = load i8, i8* %arrayidx, align 1
ret i8 %1
}
@@ -22,16 +22,16 @@ entry:
%ptr.addr = alloca i8*, align 8
store i8 %i, i8* %i.addr, align 4
store i8* %ptr, i8** %ptr.addr, align 8
- %0 = load i8* %i.addr, align 4
+ %0 = load i8, i8* %i.addr, align 4
; CHECK-LABEL: _gep_promotion_nonconst:
; CHECK: movzbl ({{.*}})
%xor = xor i8 %0, -128 ; %0 ^ 0x80
%add = add i8 %xor, -127 ; %xor + 0x81
- %1 = load i8** %ptr.addr, align 8
+ %1 = load i8*, i8** %ptr.addr, align 8
- %arrayidx = getelementptr inbounds i8* %1, i8 %add
+ %arrayidx = getelementptr inbounds i8, i8* %1, i8 %add
- %2 = load i8* %arrayidx, align 1
+ %2 = load i8, i8* %arrayidx, align 1
ret i8 %2
}
diff --git a/test/CodeGen/X86/fastmath-float-half-conversion.ll b/test/CodeGen/X86/fastmath-float-half-conversion.ll
new file mode 100644
index 000000000000..29308735cca2
--- /dev/null
+++ b/test/CodeGen/X86/fastmath-float-half-conversion.ll
@@ -0,0 +1,52 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+f16c < %s | FileCheck %s --check-prefix=ALL --check-prefix=F16C
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+
+define zeroext i16 @test1_fast(double %d) #0 {
+; ALL-LABEL: test1_fast:
+; F16C-NOT: callq {{_+}}truncdfhf2
+; F16C: vcvtsd2ss %xmm0, %xmm0, %xmm0
+; F16C-NEXT: vcvtps2ph $0, %xmm0, %xmm0
+; AVX: callq {{_+}}truncdfhf2
+; ALL: ret
+entry:
+ %0 = tail call i16 @llvm.convert.to.fp16.f64(double %d)
+ ret i16 %0
+}
+
+define zeroext i16 @test2_fast(x86_fp80 %d) #0 {
+; ALL-LABEL: test2_fast:
+; F16C-NOT: callq {{_+}}truncxfhf2
+; F16C: fldt
+; F16C-NEXT: fstps
+; F16C-NEXT: vmovss
+; F16C-NEXT: vcvtps2ph $0, %xmm0, %xmm0
+; AVX: callq {{_+}}truncxfhf2
+; ALL: ret
+entry:
+ %0 = tail call i16 @llvm.convert.to.fp16.f80(x86_fp80 %d)
+ ret i16 %0
+}
+
+define zeroext i16 @test1(double %d) #1 {
+; ALL-LABEL: test1:
+; ALL: callq {{_+}}truncdfhf2
+; ALL: ret
+entry:
+ %0 = tail call i16 @llvm.convert.to.fp16.f64(double %d)
+ ret i16 %0
+}
+
+define zeroext i16 @test2(x86_fp80 %d) #1 {
+; ALL-LABEL: test2:
+; ALL: callq {{_+}}truncxfhf2
+; ALL: ret
+entry:
+ %0 = tail call i16 @llvm.convert.to.fp16.f80(x86_fp80 %d)
+ ret i16 %0
+}
+
+declare i16 @llvm.convert.to.fp16.f64(double)
+declare i16 @llvm.convert.to.fp16.f80(x86_fp80)
+
+attributes #0 = { nounwind readnone uwtable "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone uwtable "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/X86/fcmove.ll b/test/CodeGen/X86/fcmove.ll
new file mode 100644
index 000000000000..21cc683f734f
--- /dev/null
+++ b/test/CodeGen/X86/fcmove.ll
@@ -0,0 +1,15 @@
+; RUN: llc %s -o - -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; Test that we can generate an fcmove, and also that it passes verification.
+
+; CHECK-LABEL: cmove_f
+; CHECK: fcmove %st({{[0-7]}}), %st(0)
+define x86_fp80 @cmove_f(x86_fp80 %a, x86_fp80 %b, i32 %c) {
+ %test = icmp eq i32 %c, 0
+ %add = fadd x86_fp80 %a, %b
+ %ret = select i1 %test, x86_fp80 %add, x86_fp80 %b
+ ret x86_fp80 %ret
+} \ No newline at end of file
diff --git a/test/CodeGen/X86/fdiv-combine.ll b/test/CodeGen/X86/fdiv-combine.ll
new file mode 100644
index 000000000000..279bb0624ace
--- /dev/null
+++ b/test/CodeGen/X86/fdiv-combine.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+; Anything more than one division using a single divisor operand
+; should be converted into a reciprocal and multiplication.
+
+define float @div1_arcp(float %x, float %y, float %z) #0 {
+; CHECK-LABEL: div1_arcp:
+; CHECK: # BB#0:
+; CHECK-NEXT: divss %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %div1 = fdiv arcp float %x, %y
+ ret float %div1
+}
+
+define float @div2_arcp(float %x, float %y, float %z) #0 {
+; CHECK-LABEL: div2_arcp:
+; CHECK: # BB#0:
+; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT: divss %xmm2, %xmm3
+; CHECK-NEXT: mulss %xmm3, %xmm0
+; CHECK-NEXT: mulss %xmm1, %xmm0
+; CHECK-NEXT: mulss %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %div1 = fdiv arcp float %x, %z
+ %mul = fmul arcp float %div1, %y
+ %div2 = fdiv arcp float %mul, %z
+ ret float %div2
+}
+
+; FIXME: If the backend understands 'arcp', then this attribute is unnecessary.
+attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/X86/float-conv-elim.ll b/test/CodeGen/X86/float-conv-elim.ll
new file mode 100644
index 000000000000..3feff851d91a
--- /dev/null
+++ b/test/CodeGen/X86/float-conv-elim.ll
@@ -0,0 +1,32 @@
+; RUN: llc -march=x86-64 -mcpu=x86-64 < %s | FileCheck %s
+
+; Make sure the float conversion is folded away as it should be.
+; CHECK-LABEL: foo
+; CHECK-NOT: cvt
+; CHECK: movzbl
+define i32 @foo(i8 %a) #0 {
+ %conv = uitofp i8 %a to float
+ %conv1 = fptosi float %conv to i32
+ ret i32 %conv1
+}
+
+; CHECK-LABEL: foo2
+; CHECK-NOT: cvt
+; CHECK: movsbl
+define i32 @foo2(i8 %a) #0 {
+ %conv = sitofp i8 %a to float
+ %conv1 = fptosi float %conv to i32
+ ret i32 %conv1
+}
+
+; CHECK-LABEL: bar
+; CHECK-NOT: cvt
+; CHECK: movl
+define zeroext i8 @bar(i8 zeroext %a) #0 {
+ %conv = uitofp i8 %a to float
+ %conv1 = fptoui float %conv to i8
+ ret i8 %conv1
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
diff --git a/test/CodeGen/X86/floor-soft-float.ll b/test/CodeGen/X86/floor-soft-float.ll
index 5644509a86f7..7bb738513f54 100644
--- a/test/CodeGen/X86/floor-soft-float.ll
+++ b/test/CodeGen/X86/floor-soft-float.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse4.1,-avx -soft-float=0 | FileCheck %s --check-prefix=CHECK-HARD-FLOAT
-; RUN: llc < %s -march=x86-64 -mattr=+sse4.1,-avx -soft-float=1 | FileCheck %s --check-prefix=CHECK-SOFT-FLOAT
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.1,-avx | FileCheck %s --check-prefix=CHECK-HARD-FLOAT
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.1,-avx,+soft-float | FileCheck %s --check-prefix=CHECK-SOFT-FLOAT
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/fltused.ll b/test/CodeGen/X86/fltused.ll
index 81511a33f5cb..6c5d8cefeba9 100644
--- a/test/CodeGen/X86/fltused.ll
+++ b/test/CodeGen/X86/fltused.ll
@@ -11,7 +11,7 @@
define i32 @main() nounwind {
entry:
- %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double 1.000000e+000) nounwind
+ %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double 1.000000e+000) nounwind
ret i32 0
}
diff --git a/test/CodeGen/X86/fltused_function_pointer.ll b/test/CodeGen/X86/fltused_function_pointer.ll
index cfe484a8c258..a41ae48a5fb7 100644
--- a/test/CodeGen/X86/fltused_function_pointer.ll
+++ b/test/CodeGen/X86/fltused_function_pointer.ll
@@ -11,7 +11,7 @@
define i32 @foo(i32 (i8*, ...)* %f) nounwind {
entry:
- %call = tail call i32 (i8*, ...)* %f(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double 1.000000e+000) nounwind
+ %call = tail call i32 (i8*, ...) %f(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double 1.000000e+000) nounwind
ret i32 0
}
diff --git a/test/CodeGen/X86/fma-do-not-commute.ll b/test/CodeGen/X86/fma-do-not-commute.ll
index 4e211721a382..1f6a19cfff83 100644
--- a/test/CodeGen/X86/fma-do-not-commute.ll
+++ b/test/CodeGen/X86/fma-do-not-commute.ll
@@ -18,8 +18,8 @@ entry:
loop:
%sum0 = phi float [ %fma, %loop ], [ %arg, %entry ]
- %addrVal = load float* %addr, align 4
- %addr2Val = load float* %addr2, align 4
+ %addrVal = load float, float* %addr, align 4
+ %addr2Val = load float, float* %addr2, align 4
%fmul = fmul float %addrVal, %addr2Val
%fma = fadd float %sum0, %fmul
br i1 true, label %exit, label %loop
diff --git a/test/CodeGen/X86/fma3-intrinsics.ll b/test/CodeGen/X86/fma3-intrinsics.ll
index 9a25096c7a52..fa9c252f30b4 100755
--- a/test/CodeGen/X86/fma3-intrinsics.ll
+++ b/test/CodeGen/X86/fma3-intrinsics.ll
@@ -3,7 +3,9 @@
; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s
define <4 x float> @test_x86_fmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
- ; CHECK: fmadd213ss (%r8), %xmm
+ ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
+ ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
+ ; CHECK: fmadd213ss (%r8), [[XMM1]], [[XMM0]]
%res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
ret <4 x float> %res
}
@@ -24,7 +26,9 @@ define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x f
declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
define <4 x float> @test_x86_fnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
- ; CHECK: fnmadd213ss (%r8), %xmm
+ ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
+ ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
+ ; CHECK: fnmadd213ss (%r8), [[XMM1]], [[XMM0]]
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
ret <4 x float> %res
}
@@ -46,7 +50,9 @@ declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x
define <4 x float> @test_x86_fmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
- ; CHECK: fmsub213ss
+ ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
+ ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
+ ; CHECK: fmsub213ss (%r8), [[XMM1]], [[XMM0]]
%res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
ret <4 x float> %res
}
@@ -60,7 +66,9 @@ define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x flo
declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
define <4 x float> @test_x86_fnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
- ; CHECK: fnmsub213ss
+ ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
+ ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
+ ; CHECK: fnmsub213ss (%r8), [[XMM1]], [[XMM0]]
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
ret <4 x float> %res
}
@@ -76,7 +84,9 @@ declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x floa
;;;;
define <2 x double> @test_x86_fmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
- ; CHECK: fmadd213sd
+ ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
+ ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
+ ; CHECK: fmadd213sd (%r8), [[XMM1]], [[XMM0]]
%res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
ret <2 x double> %res
}
@@ -90,7 +100,9 @@ define <2 x double> @test_x86_fmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x
declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
define <2 x double> @test_x86_fnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
- ; CHECK: fnmadd213sd
+ ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
+ ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
+ ; CHECK: fnmadd213sd (%r8), [[XMM1]], [[XMM0]]
%res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
ret <2 x double> %res
}
@@ -106,7 +118,9 @@ declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x d
define <2 x double> @test_x86_fmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
- ; CHECK: fmsub213sd
+ ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
+ ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
+ ; CHECK: fmsub213sd (%r8), [[XMM1]], [[XMM0]]
%res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
ret <2 x double> %res
}
@@ -120,7 +134,9 @@ define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x
declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
define <2 x double> @test_x86_fnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
- ; CHECK: fnmsub213sd
+ ; CHECK-DAG: vmovaps (%rcx), [[XMM1:%xmm[0-9]+]]
+ ; CHECK-DAG: vmovaps (%rdx), [[XMM0:%xmm[0-9]+]]
+ ; CHECK: fnmsub213sd (%r8), [[XMM1]], [[XMM0]]
%res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
ret <2 x double> %res
}
diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll
index 64a2068aea43..85de1ef5c9dc 100644
--- a/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll
+++ b/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll
@@ -4,14 +4,14 @@
; VFMADD
define < 4 x float > @test_x86_fma_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) {
; CHECK: vfmaddss (%{{.*}})
- %x = load float *%a2
+ %x = load float , float *%a2
%y = insertelement <4 x float> undef, float %x, i32 0
%res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y)
ret < 4 x float > %res
}
define < 4 x float > @test_x86_fma_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) {
; CHECK: vfmaddss %{{.*}}, (%{{.*}})
- %x = load float *%a1
+ %x = load float , float *%a1
%y = insertelement <4 x float> undef, float %x, i32 0
%res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2)
ret < 4 x float > %res
@@ -21,14 +21,14 @@ declare < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float >, < 4 x float >, < 4
define < 2 x double > @test_x86_fma_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) {
; CHECK: vfmaddsd (%{{.*}})
- %x = load double *%a2
+ %x = load double , double *%a2
%y = insertelement <2 x double> undef, double %x, i32 0
%res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y)
ret < 2 x double > %res
}
define < 2 x double > @test_x86_fma_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) {
; CHECK: vfmaddsd %{{.*}}, (%{{.*}})
- %x = load double *%a1
+ %x = load double , double *%a1
%y = insertelement <2 x double> undef, double %x, i32 0
%res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2)
ret < 2 x double > %res
@@ -36,13 +36,13 @@ define < 2 x double > @test_x86_fma_vfmadd_sd_load2(< 2 x double > %a0, double*
declare < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
define < 4 x float > @test_x86_fma_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) {
; CHECK: vfmaddps (%{{.*}})
- %x = load <4 x float>* %a2
+ %x = load <4 x float>, <4 x float>* %a2
%res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x)
ret < 4 x float > %res
}
define < 4 x float > @test_x86_fma_vfmadd_ps_load2(< 4 x float > %a0, < 4 x float >* %a1, < 4 x float > %a2) {
; CHECK: vfmaddps %{{.*}}, (%{{.*}})
- %x = load <4 x float>* %a1
+ %x = load <4 x float>, <4 x float>* %a1
%res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2)
ret < 4 x float > %res
}
@@ -52,21 +52,21 @@ declare < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float >, < 4 x float >, < 4
define < 4 x float > @test_x86_fma_vfmadd_ps_load3(< 4 x float >* %a0, < 4 x float >* %a1, < 4 x float > %a2) {
; CHECK: vmovaps
; CHECK: vfmaddps %{{.*}}, (%{{.*}})
- %x = load <4 x float>* %a0
- %y = load <4 x float>* %a1
+ %x = load <4 x float>, <4 x float>* %a0
+ %y = load <4 x float>, <4 x float>* %a1
%res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %x, < 4 x float > %y, < 4 x float > %a2)
ret < 4 x float > %res
}
define < 2 x double > @test_x86_fma_vfmadd_pd_load(< 2 x double > %a0, < 2 x double > %a1, < 2 x double >* %a2) {
; CHECK: vfmaddpd (%{{.*}})
- %x = load <2 x double>* %a2
+ %x = load <2 x double>, <2 x double>* %a2
%res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x)
ret < 2 x double > %res
}
define < 2 x double > @test_x86_fma_vfmadd_pd_load2(< 2 x double > %a0, < 2 x double >* %a1, < 2 x double > %a2) {
; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
- %x = load <2 x double>* %a1
+ %x = load <2 x double>, <2 x double>* %a1
%res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2)
ret < 2 x double > %res
}
@@ -76,8 +76,8 @@ declare < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double >, < 2 x double >, <
define < 2 x double > @test_x86_fma_vfmadd_pd_load3(< 2 x double >* %a0, < 2 x double >* %a1, < 2 x double > %a2) {
; CHECK: vmovapd
; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
- %x = load <2 x double>* %a0
- %y = load <2 x double>* %a1
+ %x = load <2 x double>, <2 x double>* %a0
+ %y = load <2 x double>, <2 x double>* %a1
%res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %x, < 2 x double > %y, < 2 x double > %a2)
ret < 2 x double > %res
}
diff --git a/test/CodeGen/X86/fma_patterns.ll b/test/CodeGen/X86/fma_patterns.ll
index 9b52db9f14e9..a27b760face7 100644
--- a/test/CodeGen/X86/fma_patterns.ll
+++ b/test/CodeGen/X86/fma_patterns.ll
@@ -190,7 +190,7 @@ define float @test_x86_fnmsub_ss(float %a0, float %a1, float %a2) {
; CHECK_FMA4: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
; CHECK_FMA4: ret
define <4 x float> @test_x86_fmadd_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
- %x = load <4 x float>* %a0
+ %x = load <4 x float>, <4 x float>* %a0
%y = fmul <4 x float> %x, %a1
%res = fadd <4 x float> %y, %a2
ret <4 x float> %res
@@ -204,7 +204,7 @@ define <4 x float> @test_x86_fmadd_ps_load(<4 x float>* %a0, <4 x float> %a1, <4
; CHECK_FMA4: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0
; CHECK_FMA4: ret
define <4 x float> @test_x86_fmsub_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
- %x = load <4 x float>* %a0
+ %x = load <4 x float>, <4 x float>* %a0
%y = fmul <4 x float> %x, %a1
%res = fsub <4 x float> %y, %a2
ret <4 x float> %res
diff --git a/test/CodeGen/X86/fmul-combines.ll b/test/CodeGen/X86/fmul-combines.ll
index 703651153c11..7d75611e1330 100644
--- a/test/CodeGen/X86/fmul-combines.ll
+++ b/test/CodeGen/X86/fmul-combines.ll
@@ -103,6 +103,40 @@ define <4 x float> @fmul_v4f32_two_consts_no_splat_multiple_use(<4 x float> %x)
ret <4 x float> %a
}
+; PR22698 - http://llvm.org/bugs/show_bug.cgi?id=22698
+; Make sure that we don't infinite loop swapping constants back and forth.
+
+define <4 x float> @PR22698_splats(<4 x float> %a) #0 {
+ %mul1 = fmul fast <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, <float 3.0, float 3.0, float 3.0, float 3.0>
+ %mul2 = fmul fast <4 x float> <float 4.0, float 4.0, float 4.0, float 4.0>, %mul1
+ %mul3 = fmul fast <4 x float> %a, %mul2
+ ret <4 x float> %mul3
+
+; CHECK: float 2.400000e+01
+; CHECK: float 2.400000e+01
+; CHECK: float 2.400000e+01
+; CHECK: float 2.400000e+01
+; CHECK-LABEL: PR22698_splats:
+; CHECK: mulps
+; CHECK: ret
+}
+
+; Same as above, but verify that non-splat vectors are handled correctly too.
+define <4 x float> @PR22698_no_splats(<4 x float> %a) #0 {
+ %mul1 = fmul fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, <float 5.0, float 6.0, float 7.0, float 8.0>
+ %mul2 = fmul fast <4 x float> <float 9.0, float 10.0, float 11.0, float 12.0>, %mul1
+ %mul3 = fmul fast <4 x float> %a, %mul2
+ ret <4 x float> %mul3
+
+; CHECK: float 4.500000e+01
+; CHECK: float 1.200000e+02
+; CHECK: float 2.310000e+02
+; CHECK: float 3.840000e+02
+; CHECK-LABEL: PR22698_no_splats:
+; CHECK: mulps
+; CHECK: ret
+}
+
; CHECK-LABEL: fmul_c2_c4_f32:
; CHECK-NOT: addss
; CHECK: mulss
diff --git a/test/CodeGen/X86/fmul-zero.ll b/test/CodeGen/X86/fmul-zero.ll
index 03bad6594128..bc139f88534f 100644
--- a/test/CodeGen/X86/fmul-zero.ll
+++ b/test/CodeGen/X86/fmul-zero.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -march=x86-64 | grep mulps
define void @test14(<4 x float>*) nounwind {
- load <4 x float>* %0, align 1
+ load <4 x float>, <4 x float>* %0, align 1
fmul <4 x float> %2, zeroinitializer
store <4 x float> %3, <4 x float>* %0, align 1
ret void
diff --git a/test/CodeGen/X86/fnabs.ll b/test/CodeGen/X86/fnabs.ll
index 19718d3ff926..a3ddfb970dc4 100644
--- a/test/CodeGen/X86/fnabs.ll
+++ b/test/CodeGen/X86/fnabs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx| FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx| FileCheck %s
; Verify that we generate a single OR instruction for a scalar, vec128, and vec256
; FNABS(x) operation -> FNEG (FABS(x)).
diff --git a/test/CodeGen/X86/fold-add.ll b/test/CodeGen/X86/fold-add.ll
index 0b27387b73bf..7d2740074082 100644
--- a/test/CodeGen/X86/fold-add.ll
+++ b/test/CodeGen/X86/fold-add.ll
@@ -13,12 +13,12 @@ define fastcc i32 @longest_match(i32 %cur_match) nounwind {
; CHECK: ret
entry:
- %0 = load i32* @prev_length, align 4 ; <i32> [#uses=3]
+ %0 = load i32, i32* @prev_length, align 4 ; <i32> [#uses=3]
%1 = zext i32 %cur_match to i64 ; <i64> [#uses=1]
%2 = sext i32 %0 to i64 ; <i64> [#uses=1]
%.sum3 = add i64 %1, %2 ; <i64> [#uses=1]
- %3 = getelementptr [65536 x i8]* @window, i64 0, i64 %.sum3 ; <i8*> [#uses=1]
- %4 = load i8* %3, align 1 ; <i8> [#uses=1]
+ %3 = getelementptr [65536 x i8], [65536 x i8]* @window, i64 0, i64 %.sum3 ; <i8*> [#uses=1]
+ %4 = load i8, i8* %3, align 1 ; <i8> [#uses=1]
%5 = icmp eq i8 %4, 0 ; <i1> [#uses=1]
br i1 %5, label %bb5, label %bb23
diff --git a/test/CodeGen/X86/fold-and-shift.ll b/test/CodeGen/X86/fold-and-shift.ll
index a5eb8b5de3a4..00173efff69b 100644
--- a/test/CodeGen/X86/fold-and-shift.ll
+++ b/test/CodeGen/X86/fold-and-shift.ll
@@ -10,9 +10,9 @@ define i32 @t1(i8* %X, i32 %i) {
entry:
%tmp2 = shl i32 %i, 2
%tmp4 = and i32 %tmp2, 1020
- %tmp7 = getelementptr i8* %X, i32 %tmp4
+ %tmp7 = getelementptr i8, i8* %X, i32 %tmp4
%tmp78 = bitcast i8* %tmp7 to i32*
- %tmp9 = load i32* %tmp78
+ %tmp9 = load i32, i32* %tmp78
ret i32 %tmp9
}
@@ -26,9 +26,9 @@ define i32 @t2(i16* %X, i32 %i) {
entry:
%tmp2 = shl i32 %i, 1
%tmp4 = and i32 %tmp2, 131070
- %tmp7 = getelementptr i16* %X, i32 %tmp4
+ %tmp7 = getelementptr i16, i16* %X, i32 %tmp4
%tmp78 = bitcast i16* %tmp7 to i32*
- %tmp9 = load i32* %tmp78
+ %tmp9 = load i32, i32* %tmp78
ret i32 %tmp9
}
@@ -46,11 +46,11 @@ define i32 @t3(i16* %i.ptr, i32* %arr) {
; CHECK: ret
entry:
- %i = load i16* %i.ptr
+ %i = load i16, i16* %i.ptr
%i.zext = zext i16 %i to i32
%index = lshr i32 %i.zext, 11
- %val.ptr = getelementptr inbounds i32* %arr, i32 %index
- %val = load i32* %val.ptr
+ %val.ptr = getelementptr inbounds i32, i32* %arr, i32 %index
+ %val = load i32, i32* %val.ptr
%sum = add i32 %val, %i.zext
ret i32 %sum
}
@@ -65,12 +65,12 @@ define i32 @t4(i16* %i.ptr, i32* %arr) {
; CHECK: ret
entry:
- %i = load i16* %i.ptr
+ %i = load i16, i16* %i.ptr
%i.zext = zext i16 %i to i32
%index = lshr i32 %i.zext, 11
%index.zext = zext i32 %index to i64
- %val.ptr = getelementptr inbounds i32* %arr, i64 %index.zext
- %val = load i32* %val.ptr
+ %val.ptr = getelementptr inbounds i32, i32* %arr, i64 %index.zext
+ %val = load i32, i32* %val.ptr
%sum.1 = add i32 %val, %i.zext
%sum.2 = add i32 %sum.1, %index
ret i32 %sum.2
diff --git a/test/CodeGen/X86/fold-call-2.ll b/test/CodeGen/X86/fold-call-2.ll
index 7a2b03833ae9..b5e2606410f0 100644
--- a/test/CodeGen/X86/fold-call-2.ll
+++ b/test/CodeGen/X86/fold-call-2.ll
@@ -4,7 +4,7 @@
define i32 @main() nounwind {
entry:
- load void ()** @f, align 8 ; <void ()*>:0 [#uses=1]
+ load void ()*, void ()** @f, align 8 ; <void ()*>:0 [#uses=1]
tail call void %0( ) nounwind
ret i32 0
}
diff --git a/test/CodeGen/X86/fold-call-3.ll b/test/CodeGen/X86/fold-call-3.ll
index 337a7edb1736..e7e81b9422a1 100644
--- a/test/CodeGen/X86/fold-call-3.ll
+++ b/test/CodeGen/X86/fold-call-3.ll
@@ -10,33 +10,33 @@
define void @_Z25RawPointerPerformanceTestPvRN5clang6ActionE(i8* %Val, %"struct.clang::Action"* %Actions) nounwind {
entry:
%0 = alloca %"struct.clang::ActionBase::ActionResult<0u>", align 8 ; <%"struct.clang::ActionBase::ActionResult<0u>"*> [#uses=3]
- %1 = load i32* @NumTrials, align 4 ; <i32> [#uses=1]
+ %1 = load i32, i32* @NumTrials, align 4 ; <i32> [#uses=1]
%2 = icmp eq i32 %1, 0 ; <i1> [#uses=1]
br i1 %2, label %return, label %bb.nph
bb.nph: ; preds = %entry
- %3 = getelementptr %"struct.clang::Action"* %Actions, i64 0, i32 0, i32 0 ; <i32 (...)***> [#uses=1]
+ %3 = getelementptr %"struct.clang::Action", %"struct.clang::Action"* %Actions, i64 0, i32 0, i32 0 ; <i32 (...)***> [#uses=1]
%mrv_gep = bitcast %"struct.clang::ActionBase::ActionResult<0u>"* %0 to i64* ; <i64*> [#uses=1]
- %mrv_gep1 = getelementptr %"struct.clang::ActionBase::ActionResult<0u>"* %0, i64 0, i32 1 ; <i8*> [#uses=1]
+ %mrv_gep1 = getelementptr %"struct.clang::ActionBase::ActionResult<0u>", %"struct.clang::ActionBase::ActionResult<0u>"* %0, i64 0, i32 1 ; <i8*> [#uses=1]
%4 = bitcast i8* %mrv_gep1 to i64* ; <i64*> [#uses=1]
- %5 = getelementptr %"struct.clang::ActionBase::ActionResult<0u>"* %0, i64 0, i32 0 ; <i8**> [#uses=1]
+ %5 = getelementptr %"struct.clang::ActionBase::ActionResult<0u>", %"struct.clang::ActionBase::ActionResult<0u>"* %0, i64 0, i32 0 ; <i8**> [#uses=1]
br label %bb
bb: ; preds = %bb, %bb.nph
%Trial.01 = phi i32 [ 0, %bb.nph ], [ %12, %bb ] ; <i32> [#uses=1]
%Val_addr.02 = phi i8* [ %Val, %bb.nph ], [ %11, %bb ] ; <i8*> [#uses=1]
- %6 = load i32 (...)*** %3, align 8 ; <i32 (...)**> [#uses=1]
- %7 = getelementptr i32 (...)** %6, i64 70 ; <i32 (...)**> [#uses=1]
- %8 = load i32 (...)** %7, align 8 ; <i32 (...)*> [#uses=1]
+ %6 = load i32 (...)**, i32 (...)*** %3, align 8 ; <i32 (...)**> [#uses=1]
+ %7 = getelementptr i32 (...)*, i32 (...)** %6, i64 70 ; <i32 (...)**> [#uses=1]
+ %8 = load i32 (...)*, i32 (...)** %7, align 8 ; <i32 (...)*> [#uses=1]
%9 = bitcast i32 (...)* %8 to { i64, i64 } (%"struct.clang::Action"*, i8*)* ; <{ i64, i64 } (%"struct.clang::Action"*, i8*)*> [#uses=1]
%10 = call { i64, i64 } %9(%"struct.clang::Action"* %Actions, i8* %Val_addr.02) nounwind ; <{ i64, i64 }> [#uses=2]
%mrv_gr = extractvalue { i64, i64 } %10, 0 ; <i64> [#uses=1]
store i64 %mrv_gr, i64* %mrv_gep
%mrv_gr2 = extractvalue { i64, i64 } %10, 1 ; <i64> [#uses=1]
store i64 %mrv_gr2, i64* %4
- %11 = load i8** %5, align 8 ; <i8*> [#uses=1]
+ %11 = load i8*, i8** %5, align 8 ; <i8*> [#uses=1]
%12 = add i32 %Trial.01, 1 ; <i32> [#uses=2]
- %13 = load i32* @NumTrials, align 4 ; <i32> [#uses=1]
+ %13 = load i32, i32* @NumTrials, align 4 ; <i32> [#uses=1]
%14 = icmp ult i32 %12, %13 ; <i1> [#uses=1]
br i1 %14, label %bb, label %return
diff --git a/test/CodeGen/X86/fold-call-oper.ll b/test/CodeGen/X86/fold-call-oper.ll
index 94e2a6f70506..1e3e58ddc6cd 100644
--- a/test/CodeGen/X86/fold-call-oper.ll
+++ b/test/CodeGen/X86/fold-call-oper.ll
@@ -14,7 +14,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; CHECK: callq *{{.*}}(%rbp)
define void @foldCallOper(i32 (i32*, i32, i32**)* nocapture %p1) #0 {
entry:
- %0 = load i32*** @a, align 8
+ %0 = load i32**, i32*** @a, align 8
br label %for.body.i
for.body.i: ; preds = %for.body.i, %entry
@@ -33,14 +33,14 @@ for.body3.i: ; preds = %for.inc8.i, %for.bo
br i1 %tobool.i, label %for.inc8.i, label %if.then.i
if.then.i: ; preds = %for.body3.i
- %2 = load i32* %1, align 4
+ %2 = load i32, i32* %1, align 4
store i32 %2, i32* @b, align 4
br label %for.inc8.i
for.inc8.i: ; preds = %if.then.i, %for.body3.i
%lftr.wideiv.i = trunc i64 %indvars.iv.i to i32
- %arrayidx4.phi.trans.insert.i = getelementptr inbounds [0 x i32*]* undef, i64 0, i64 %indvars.iv.i
- %.pre.i = load i32** %arrayidx4.phi.trans.insert.i, align 8
+ %arrayidx4.phi.trans.insert.i = getelementptr inbounds [0 x i32*], [0 x i32*]* undef, i64 0, i64 %indvars.iv.i
+ %.pre.i = load i32*, i32** %arrayidx4.phi.trans.insert.i, align 8
%phitmp.i = add i64 %indvars.iv.i, 1
br label %for.body3.i
}
diff --git a/test/CodeGen/X86/fold-call.ll b/test/CodeGen/X86/fold-call.ll
index 35327faa6486..00839943f678 100644
--- a/test/CodeGen/X86/fold-call.ll
+++ b/test/CodeGen/X86/fold-call.ll
@@ -19,8 +19,8 @@ define void @test1(i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, void()*
%struct.X = type { void ()* }
define void @test2(%struct.X* nocapture %x) {
entry:
- %f = getelementptr inbounds %struct.X* %x, i64 0, i32 0
- %0 = load void ()** %f
+ %f = getelementptr inbounds %struct.X, %struct.X* %x, i64 0, i32 0
+ %0 = load void ()*, void ()** %f
store void ()* null, void ()** %f
tail call void %0()
ret void
diff --git a/test/CodeGen/X86/fold-load-unops.ll b/test/CodeGen/X86/fold-load-unops.ll
new file mode 100644
index 000000000000..fcde0218158a
--- /dev/null
+++ b/test/CodeGen/X86/fold-load-unops.ll
@@ -0,0 +1,79 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX
+
+; Verify that we're folding the load into the math instruction.
+
+define float @rcpss(float* %a) {
+; SSE-LABEL: rcpss:
+; SSE: # BB#0:
+; SSE-NEXT: rcpss (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: rcpss:
+; AVX: # BB#0:
+; AVX-NEXT: vrcpss (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+ %ld = load float, float* %a
+ %ins = insertelement <4 x float> undef, float %ld, i32 0
+ %res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ins)
+ %ext = extractelement <4 x float> %res, i32 0
+ ret float %ext
+}
+
+define float @rsqrtss(float* %a) {
+; SSE-LABEL: rsqrtss:
+; SSE: # BB#0:
+; SSE-NEXT: rsqrtss (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: rsqrtss:
+; AVX: # BB#0:
+; AVX-NEXT: vrsqrtss (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+ %ld = load float, float* %a
+ %ins = insertelement <4 x float> undef, float %ld, i32 0
+ %res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ins)
+ %ext = extractelement <4 x float> %res, i32 0
+ ret float %ext
+}
+
+define float @sqrtss(float* %a) {
+; SSE-LABEL: sqrtss:
+; SSE: # BB#0:
+; SSE-NEXT: sqrtss (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: sqrtss:
+; AVX: # BB#0:
+; AVX-NEXT: vsqrtss (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+ %ld = load float, float* %a
+ %ins = insertelement <4 x float> undef, float %ld, i32 0
+ %res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ins)
+ %ext = extractelement <4 x float> %res, i32 0
+ ret float %ext
+}
+
+define double @sqrtsd(double* %a) {
+; SSE-LABEL: sqrtsd:
+; SSE: # BB#0:
+; SSE-NEXT: sqrtsd (%rdi), %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: sqrtsd:
+; AVX: # BB#0:
+; AVX-NEXT: vsqrtsd (%rdi), %xmm0, %xmm0
+; AVX-NEXT: retq
+ %ld = load double, double* %a
+ %ins = insertelement <2 x double> undef, double %ld, i32 0
+ %res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ins)
+ %ext = extractelement <2 x double> %res, i32 0
+ ret double %ext
+}
+
+
+declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
+declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
+
diff --git a/test/CodeGen/X86/fold-load-vec.ll b/test/CodeGen/X86/fold-load-vec.ll
index 96c5be4f752f..657b7bdd24ff 100644
--- a/test/CodeGen/X86/fold-load-vec.ll
+++ b/test/CodeGen/X86/fold-load-vec.ll
@@ -14,24 +14,24 @@ entry:
store <4 x float>* %source, <4 x float>** %source.addr, align 8
store <2 x float>* %dest, <2 x float>** %dest.addr, align 8
store <2 x float> zeroinitializer, <2 x float>* %tmp, align 8
- %0 = load <4 x float>** %source.addr, align 8
- %arrayidx = getelementptr inbounds <4 x float>* %0, i64 0
- %1 = load <4 x float>* %arrayidx, align 16
+ %0 = load <4 x float>*, <4 x float>** %source.addr, align 8
+ %arrayidx = getelementptr inbounds <4 x float>, <4 x float>* %0, i64 0
+ %1 = load <4 x float>, <4 x float>* %arrayidx, align 16
%2 = extractelement <4 x float> %1, i32 0
- %3 = load <2 x float>* %tmp, align 8
+ %3 = load <2 x float>, <2 x float>* %tmp, align 8
%4 = insertelement <2 x float> %3, float %2, i32 1
store <2 x float> %4, <2 x float>* %tmp, align 8
- %5 = load <2 x float>* %tmp, align 8
- %6 = load <2 x float>** %dest.addr, align 8
- %arrayidx1 = getelementptr inbounds <2 x float>* %6, i64 0
+ %5 = load <2 x float>, <2 x float>* %tmp, align 8
+ %6 = load <2 x float>*, <2 x float>** %dest.addr, align 8
+ %arrayidx1 = getelementptr inbounds <2 x float>, <2 x float>* %6, i64 0
store <2 x float> %5, <2 x float>* %arrayidx1, align 8
- %7 = load <2 x float>** %dest.addr, align 8
- %arrayidx2 = getelementptr inbounds <2 x float>* %7, i64 0
- %8 = load <2 x float>* %arrayidx2, align 8
+ %7 = load <2 x float>*, <2 x float>** %dest.addr, align 8
+ %arrayidx2 = getelementptr inbounds <2 x float>, <2 x float>* %7, i64 0
+ %8 = load <2 x float>, <2 x float>* %arrayidx2, align 8
%vecext = extractelement <2 x float> %8, i32 0
- %9 = load <2 x float>** %dest.addr, align 8
- %arrayidx3 = getelementptr inbounds <2 x float>* %9, i64 0
- %10 = load <2 x float>* %arrayidx3, align 8
+ %9 = load <2 x float>*, <2 x float>** %dest.addr, align 8
+ %arrayidx3 = getelementptr inbounds <2 x float>, <2 x float>* %9, i64 0
+ %10 = load <2 x float>, <2 x float>* %arrayidx3, align 8
%vecext4 = extractelement <2 x float> %10, i32 1
call void @ext(float %vecext, float %vecext4)
ret void
diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll
index dde0a2d1c5d3..8cdc58bb75e0 100644
--- a/test/CodeGen/X86/fold-load.ll
+++ b/test/CodeGen/X86/fold-load.ll
@@ -10,7 +10,7 @@ entry:
cond_true: ; preds = %entry
%new_size.0.i = select i1 false, i32 0, i32 0 ; <i32> [#uses=1]
- %tmp.i = load i32* bitcast (i8* getelementptr (%struct.obstack* @stmt_obstack, i32 0, i32 10) to i32*) ; <i32> [#uses=1]
+ %tmp.i = load i32, i32* bitcast (i8* getelementptr (%struct.obstack, %struct.obstack* @stmt_obstack, i32 0, i32 10) to i32*) ; <i32> [#uses=1]
%tmp.i.upgrd.1 = trunc i32 %tmp.i to i8 ; <i8> [#uses=1]
%tmp21.i = and i8 %tmp.i.upgrd.1, 1 ; <i8> [#uses=1]
%tmp22.i = icmp eq i8 %tmp21.i, 0 ; <i1> [#uses=1]
@@ -30,7 +30,7 @@ cond_next: ; preds = %entry
define i32 @test2(i16* %P, i16* %Q) nounwind {
- %A = load i16* %P, align 4 ; <i16> [#uses=11]
+ %A = load i16, i16* %P, align 4 ; <i16> [#uses=11]
%C = zext i16 %A to i32 ; <i32> [#uses=1]
%D = and i32 %C, 255 ; <i32> [#uses=1]
br label %L
@@ -54,8 +54,8 @@ define i1 @test3(i32* %P, i32* %Q) nounwind {
; CHECK: xorl (%e
; CHECK: j
entry:
- %0 = load i32* %P, align 4
- %1 = load i32* %Q, align 4
+ %0 = load i32, i32* %P, align 4
+ %1 = load i32, i32* %Q, align 4
%2 = xor i32 %0, %1
%3 = and i32 %2, 89947
%4 = icmp eq i32 %3, 0
diff --git a/test/CodeGen/X86/fold-mul-lohi.ll b/test/CodeGen/X86/fold-mul-lohi.ll
index 5614c808d0e6..8d4c5ef9eb22 100644
--- a/test/CodeGen/X86/fold-mul-lohi.ll
+++ b/test/CodeGen/X86/fold-mul-lohi.ll
@@ -13,13 +13,13 @@ entry:
bb:
%i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]
- %tmp2 = getelementptr [1000 x i8]* @B, i32 0, i32 %i.019.0
- %tmp3 = load i8* %tmp2, align 4
+ %tmp2 = getelementptr [1000 x i8], [1000 x i8]* @B, i32 0, i32 %i.019.0
+ %tmp3 = load i8, i8* %tmp2, align 4
%tmp4 = mul i8 %tmp3, 2
- %tmp5 = getelementptr [1000 x i8]* @A, i32 0, i32 %i.019.0
+ %tmp5 = getelementptr [1000 x i8], [1000 x i8]* @A, i32 0, i32 %i.019.0
store i8 %tmp4, i8* %tmp5, align 4
%tmp8 = mul i32 %i.019.0, 9
- %tmp10 = getelementptr [1000 x i8]* @P, i32 0, i32 %tmp8
+ %tmp10 = getelementptr [1000 x i8], [1000 x i8]* @P, i32 0, i32 %tmp8
store i8 17, i8* %tmp10, align 4
%indvar.next = add i32 %i.019.0, 1
%exitcond = icmp eq i32 %indvar.next, %m
diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
index 60a6844b39b2..d95c6323de4e 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -20,7 +20,7 @@
define void @program_1(%struct._image2d_t* %dest, %struct._image2d_t* %t0, <4 x float> %p0, <4 x float> %p1, <4 x float> %p4, <4 x float> %p5, <4 x float> %p6) nounwind {
entry:
- %tmp3.i = load i32* null ; <i32> [#uses=1]
+ %tmp3.i = load i32, i32* null ; <i32> [#uses=1]
%cmp = icmp slt i32 0, %tmp3.i ; <i1> [#uses=1]
br i1 %cmp, label %forcond, label %ifthen
@@ -28,7 +28,7 @@ ifthen: ; preds = %entry
ret void
forcond: ; preds = %entry
- %tmp3.i536 = load i32* null ; <i32> [#uses=1]
+ %tmp3.i536 = load i32, i32* null ; <i32> [#uses=1]
%cmp12 = icmp slt i32 0, %tmp3.i536 ; <i1> [#uses=1]
br i1 %cmp12, label %forbody, label %afterfor
diff --git a/test/CodeGen/X86/fold-sext-trunc.ll b/test/CodeGen/X86/fold-sext-trunc.ll
index b453310608ec..df06e70b0c24 100644
--- a/test/CodeGen/X86/fold-sext-trunc.ll
+++ b/test/CodeGen/X86/fold-sext-trunc.ll
@@ -9,8 +9,8 @@ declare void @func_28(i64, i64)
define void @int322(i32 %foo) nounwind {
entry:
- %val = load i64* getelementptr (%0* bitcast (%struct.S1* @g_10 to %0*), i32 0, i32 0) ; <i64> [#uses=1]
- %0 = load i32* getelementptr (%struct.S1* @g_10, i32 0, i32 1), align 4 ; <i32> [#uses=1]
+ %val = load i64, i64* getelementptr (%0, %0* bitcast (%struct.S1* @g_10 to %0*), i32 0, i32 0) ; <i64> [#uses=1]
+ %0 = load i32, i32* getelementptr (%struct.S1, %struct.S1* @g_10, i32 0, i32 1), align 4 ; <i32> [#uses=1]
%1 = sext i32 %0 to i64 ; <i64> [#uses=1]
%tmp4.i = lshr i64 %val, 32 ; <i64> [#uses=1]
%tmp5.i = trunc i64 %tmp4.i to i32 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/fold-tied-op.ll b/test/CodeGen/X86/fold-tied-op.ll
index 5bf5dbd1a9ce..62fed4219387 100644
--- a/test/CodeGen/X86/fold-tied-op.ll
+++ b/test/CodeGen/X86/fold-tied-op.ll
@@ -23,20 +23,20 @@ target triple = "i386--netbsd"
; Function Attrs: nounwind uwtable
define i64 @fn1() #0 {
entry:
- %0 = load i32* @a, align 4, !tbaa !1
+ %0 = load i32, i32* @a, align 4, !tbaa !1
%1 = inttoptr i32 %0 to %struct.XXH_state64_t*
- %total_len = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 0
- %2 = load i32* %total_len, align 4, !tbaa !5
+ %total_len = getelementptr inbounds %struct.XXH_state64_t, %struct.XXH_state64_t* %1, i32 0, i32 0
+ %2 = load i32, i32* %total_len, align 4, !tbaa !5
%tobool = icmp eq i32 %2, 0
br i1 %tobool, label %if.else, label %if.then
if.then: ; preds = %entry
- %v3 = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 3
- %3 = load i64* %v3, align 4, !tbaa !8
- %v4 = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 4
- %4 = load i64* %v4, align 4, !tbaa !9
- %v2 = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 2
- %5 = load i64* %v2, align 4, !tbaa !10
+ %v3 = getelementptr inbounds %struct.XXH_state64_t, %struct.XXH_state64_t* %1, i32 0, i32 3
+ %3 = load i64, i64* %v3, align 4, !tbaa !8
+ %v4 = getelementptr inbounds %struct.XXH_state64_t, %struct.XXH_state64_t* %1, i32 0, i32 4
+ %4 = load i64, i64* %v4, align 4, !tbaa !9
+ %v2 = getelementptr inbounds %struct.XXH_state64_t, %struct.XXH_state64_t* %1, i32 0, i32 2
+ %5 = load i64, i64* %v2, align 4, !tbaa !10
%shl = shl i64 %5, 1
%or = or i64 %shl, %5
%shl2 = shl i64 %3, 2
@@ -54,7 +54,7 @@ if.then: ; preds = %entry
br label %if.end
if.else: ; preds = %entry
- %6 = load i64* @b, align 8, !tbaa !11
+ %6 = load i64, i64* @b, align 8, !tbaa !11
%xor10 = xor i64 %6, -4417276706812531889
%mul11 = mul nsw i64 %xor10, 400714785074694791
br label %if.end
diff --git a/test/CodeGen/X86/fold-vector-bv-crash.ll b/test/CodeGen/X86/fold-vector-bv-crash.ll
new file mode 100644
index 000000000000..8c72afb7e970
--- /dev/null
+++ b/test/CodeGen/X86/fold-vector-bv-crash.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx
+
+;
+; llvm-stress generated crash case due to build_vector implicit
+; truncation bug from constant folding after legalization.
+;
+
+@G = external global i32
+
+define void @bv_crash_test() {
+ %I = insertelement <4 x i64> zeroinitializer, i64 15910, i32 0
+ %Tr = trunc <4 x i64> %I to <4 x i8>
+ %Bc = bitcast <4 x i8> %Tr to i32
+ store volatile i32 %Bc, i32* @G
+ ret void
+}
diff --git a/test/CodeGen/X86/fold-vector-shuffle-crash.ll b/test/CodeGen/X86/fold-vector-shuffle-crash.ll
new file mode 100644
index 000000000000..df9c6924b272
--- /dev/null
+++ b/test/CodeGen/X86/fold-vector-shuffle-crash.ll
@@ -0,0 +1,386 @@
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=corei7
+
+define void @autogen_SD13708(i32) {
+BB:
+ %Shuff7 = shufflevector <8 x i32> zeroinitializer, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 undef, i32 2, i32 4, i32 undef>
+ br label %CF
+
+CF:
+ %Tr = trunc <8 x i64> zeroinitializer to <8 x i32>
+ %Shuff20 = shufflevector <8 x i32> %Shuff7, <8 x i32> %Tr, <8 x i32> <i32 13, i32 15, i32 1, i32 3, i32 5, i32 7, i32 undef, i32 11>
+ br i1 undef, label %CF, label %CF247
+
+CF247:
+ %I171 = insertelement <8 x i32> %Shuff20, i32 %0, i32 0
+ br i1 undef, label %CF, label %CF247
+}
+
+define void @autogen_SD13800(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+ %A4 = alloca <8 x i1>
+ %A3 = alloca i32
+ %A2 = alloca <2 x float>
+ %A1 = alloca <2 x double>
+ %A = alloca <8 x float>
+ %L = load <8 x i1>, <8 x i1>* %A4
+ store i8 %5, i8* %0
+ %E = extractelement <2 x i64> zeroinitializer, i32 0
+ %Shuff = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 7, i32 undef, i32 undef, i32 5>
+ %I = insertelement <8 x i64> zeroinitializer, i64 419346, i32 1
+ %B = shl i64 426618, 419346
+ %Tr = trunc <8 x i64> %I to <8 x i16>
+ %Sl = select i1 false, <4 x i64> zeroinitializer, <4 x i64> zeroinitializer
+ %Cmp = icmp eq <16 x i64> zeroinitializer, zeroinitializer
+ %L5 = load i8, i8* %0
+ store i8 17, i8* %0
+ %E6 = extractelement <4 x i64> zeroinitializer, i32 1
+ %Shuff7 = shufflevector <2 x i64> zeroinitializer, <2 x i64> <i64 -1, i64 -1>, <2 x i32> <i32 0, i32 2>
+ %I8 = insertelement <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, i8 %L5, i32 2
+ %B9 = mul <8 x i16> %Tr, %Tr
+ %FC = fptosi float 0xBDF7B90B80000000 to i32
+ %Sl10 = select i1 false, float 0xBDF7B90B80000000, float 0xB875A90980000000
+ %Cmp11 = icmp slt <2 x i64> zeroinitializer, %Shuff7
+ %L12 = load <8 x float>, <8 x float>* %A
+ store <2 x double> <double 0xFFFFFFFFFFFFFFFF, double 0.000000e+00>, <2 x double>* %A1
+ %E13 = extractelement <4 x i64> zeroinitializer, i32 2
+ %Shuff14 = shufflevector <2 x i32> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 3>
+ %I15 = insertelement <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, i8 %L5, i32 15
+ %B16 = add <2 x i64> zeroinitializer, <i64 -1, i64 -1>
+ %BC = bitcast i64 426618 to double
+ %Sl17 = select i1 false, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer
+ %Cmp18 = icmp slt <8 x i1> %L, %L
+ %L19 = load i8, i8* %0
+ store i8 %L5, i8* %0
+ %E20 = extractelement <16 x i8> %I8, i32 1
+ %Shuff21 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %I, <8 x i32> <i32 undef, i32 6, i32 8, i32 10, i32 12, i32 14, i32 0, i32 2>
+ %I22 = insertelement <8 x i16> %Tr, i16 18460, i32 6
+ %B23 = sub i64 419346, %4
+ %FC24 = fptosi double 0xE603EE221901D6A0 to i32
+ %Sl25 = select i1 false, i8 %L5, i8 %5
+ %Cmp26 = icmp ugt i64 %B, %B23
+ br label %CF253
+
+CF253: ; preds = %CF253, %CF271, %CF260, %BB
+ %L27 = load i8, i8* %0
+ store i8 %L5, i8* %0
+ %E28 = extractelement <2 x i64> zeroinitializer, i32 0
+ %Shuff29 = shufflevector <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %I8, <16 x i32> <i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 undef, i32 31, i32 undef, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13>
+ %I30 = insertelement <8 x i1> %Cmp18, i1 false, i32 1
+ %B31 = fsub double 0xE603EE221901D6A0, %BC
+ %Tr32 = trunc <2 x i64> <i64 -1, i64 -1> to <2 x i32>
+ %Sl33 = select i1 false, double %BC, double %B31
+ %Cmp34 = icmp sgt <2 x i32> zeroinitializer, %Shuff14
+ %L35 = load i8, i8* %0
+ store i8 %L5, i8* %0
+ %E36 = extractelement <16 x i8> %Shuff29, i32 5
+ %Shuff37 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %I, <8 x i32> <i32 8, i32 undef, i32 undef, i32 14, i32 0, i32 undef, i32 4, i32 6>
+ %I38 = insertelement <4 x i64> zeroinitializer, i64 %E28, i32 2
+ %FC39 = uitofp i8 %5 to double
+ %Sl40 = select i1 %Cmp26, i32 %3, i32 %FC
+ %Cmp41 = icmp sgt <2 x i64> zeroinitializer, <i64 -1, i64 -1>
+ %L42 = load i8, i8* %0
+ store i8 17, i8* %0
+ %E43 = extractelement <2 x i1> %Cmp41, i32 1
+ br i1 %E43, label %CF253, label %CF256
+
+CF256: ; preds = %CF256, %CF253
+ %Shuff44 = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 14, i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 12>
+ %I45 = insertelement <8 x i32> zeroinitializer, i32 %FC, i32 0
+ %ZE = zext i8 %L19 to i32
+ %Sl46 = select i1 %E43, i8 %L35, i8 %L35
+ %Cmp47 = icmp ult i64 %E6, 426618
+ br i1 %Cmp47, label %CF256, label %CF271
+
+CF271: ; preds = %CF256
+ %L48 = load i8, i8* %0
+ store i8 %L27, i8* %0
+ %E49 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 2
+ %Shuff50 = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 undef, i32 7, i32 undef, i32 11, i32 13, i32 15, i32 1, i32 3>
+ %I51 = insertelement <8 x i64> zeroinitializer, i64 %4, i32 7
+ %B52 = xor <8 x i32> %I45, zeroinitializer
+ %BC53 = bitcast <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> to <4 x float>
+ %Sl54 = select i1 false, <2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 -1, i64 -1>
+ %Cmp55 = icmp sgt i16 0, 18460
+ br i1 %Cmp55, label %CF253, label %CF255
+
+CF255: ; preds = %CF255, %CF266, %CF270, %CF271
+ %L56 = load i8, i8* %0
+ store i8 %L35, i8* %0
+ %E57 = extractelement <4 x i64> zeroinitializer, i32 3
+ %Shuff58 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff37, <8 x i32> <i32 undef, i32 undef, i32 10, i32 12, i32 14, i32 0, i32 2, i32 4>
+ %I59 = insertelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 %FC, i32 0
+ %B60 = lshr <4 x i64> %I38, zeroinitializer
+ %FC61 = sitofp <8 x i1> %L to <8 x float>
+ %Sl62 = select i1 false, i8 %L19, i8 17
+ %Cmp63 = icmp ult i64 %E6, %E57
+ br i1 %Cmp63, label %CF255, label %CF266
+
+CF266: ; preds = %CF255
+ %L64 = load i64, i64* %2
+ store i8 17, i8* %0
+ %E65 = extractelement <8 x i64> %Shuff21, i32 6
+ %Shuff66 = shufflevector <2 x i1> %Cmp11, <2 x i1> %Cmp41, <2 x i32> <i32 1, i32 3>
+ %I67 = insertelement <8 x i1> %I30, i1 false, i32 7
+ %FC68 = uitofp i8 %Sl62 to float
+ %Sl69 = select i1 false, i8 %L42, i8 17
+ %Cmp70 = icmp eq <2 x i32> zeroinitializer, zeroinitializer
+ %L71 = load i8, i8* %0
+ store i8 %5, i8* %0
+ %E72 = extractelement <2 x i64> <i64 -1, i64 -1>, i32 1
+ %Shuff73 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff44, <8 x i32> <i32 undef, i32 14, i32 0, i32 2, i32 4, i32 6, i32 undef, i32 10>
+ %I74 = insertelement <2 x i1> %Cmp70, i1 %Cmp55, i32 0
+ %B75 = add <16 x i8> %I15, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %FC76 = sitofp i8 33 to double
+ %Sl77 = select i1 %E43, double %BC, double %B31
+ %Cmp78 = icmp ult <8 x i64> %Shuff44, %I
+ %L79 = load i8, i8* %0
+ store i8 17, i8* %0
+ %E80 = extractelement <2 x i64> %Shuff7, i32 0
+ %Shuff81 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff73, <8 x i32> <i32 undef, i32 5, i32 7, i32 9, i32 undef, i32 13, i32 15, i32 1>
+ %I82 = insertelement <8 x i64> %Shuff81, i64 %E57, i32 5
+ %FC83 = fptosi float %FC68 to i32
+ %Sl84 = select i1 %Cmp26, <2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 -1, i64 -1>
+ %Cmp85 = icmp ugt i64 %E6, %E57
+ br i1 %Cmp85, label %CF255, label %CF261
+
+CF261: ; preds = %CF261, %CF266
+ %L86 = load i8, i8* %0
+ store i8 %L42, i8* %0
+ %E87 = extractelement <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, i32 7
+ %Shuff88 = shufflevector <16 x i8> %Shuff29, <16 x i8> %I15, <16 x i32> <i32 26, i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24>
+ %I89 = insertelement <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, i8 %L35, i32 12
+ %B90 = shl i32 %3, %E49
+ %BC91 = bitcast <2 x i64> %Sl84 to <2 x double>
+ %Sl92 = select i1 false, i8 %L5, i8 %L19
+ %Cmp93 = icmp ugt i32 -1, %FC24
+ br i1 %Cmp93, label %CF261, label %CF268
+
+CF268: ; preds = %CF268, %CF261
+ %L94 = load i8, i8* %0
+ store i8 %L5, i8* %0
+ %E95 = extractelement <8 x i64> %Shuff58, i32 0
+ %Shuff96 = shufflevector <8 x i64> %Shuff73, <8 x i64> %Shuff73, <8 x i32> <i32 3, i32 5, i32 undef, i32 9, i32 undef, i32 undef, i32 15, i32 1>
+ %I97 = insertelement <4 x i64> zeroinitializer, i64 %B23, i32 1
+ %B98 = or <8 x i64> %Shuff58, %Shuff50
+ %FC99 = sitofp <2 x i1> %Cmp34 to <2 x float>
+ %Sl100 = select i1 %Cmp85, i64 %4, i64 %E
+ %Cmp101 = icmp ne <2 x i64> %B16, zeroinitializer
+ %L102 = load i8, i8* %0
+ store i8 %L56, i8* %0
+ %E103 = extractelement <8 x i16> %I22, i32 6
+ %Shuff104 = shufflevector <2 x double> %BC91, <2 x double> %BC91, <2 x i32> <i32 1, i32 3>
+ %I105 = insertelement <8 x i64> %Shuff96, i64 198384, i32 7
+ %B106 = sdiv <8 x i32> %B52, %I45
+ %ZE107 = zext i16 0 to i32
+ %Sl108 = select i1 %E43, <16 x i8> %Shuff29, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %Cmp109 = icmp slt <16 x i64> zeroinitializer, zeroinitializer
+ %L110 = load <8 x float>, <8 x float>* %A
+ store i8 %L56, i8* %0
+ %E111 = extractelement <8 x i64> zeroinitializer, i32 3
+ %Shuff112 = shufflevector <2 x i1> %Shuff66, <2 x i1> %Cmp11, <2 x i32> <i32 2, i32 0>
+ %I113 = insertelement <2 x i64> %B16, i64 %E95, i32 0
+ %B114 = mul i8 %E20, %Sl25
+ %Tr115 = trunc <8 x i64> %I105 to <8 x i16>
+ %Sl116 = select i1 %Cmp26, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+ %Cmp117 = icmp ult <8 x i16> %Tr, %Tr115
+ %L118 = load i8, i8* %0
+ store i8 %L19, i8* %0
+ %E119 = extractelement <4 x i32> %I59, i32 3
+ %Shuff120 = shufflevector <2 x i64> <i64 -1, i64 -1>, <2 x i64> %I113, <2 x i32> <i32 2, i32 0>
+ %I121 = insertelement <2 x i1> %Shuff66, i1 %Cmp26, i32 0
+ %B122 = fmul double 0.000000e+00, 0xE603EE221901D6A0
+ %FC123 = sitofp i64 %E6 to float
+ %Sl124 = select i1 false, <2 x i1> %Cmp41, <2 x i1> %Shuff66
+ %Cmp125 = icmp ult <4 x i64> %I38, %I38
+ %L126 = load i8, i8* %0
+ store i8 %L126, i8* %0
+ %E127 = extractelement <8 x i64> zeroinitializer, i32 7
+ %Shuff128 = shufflevector <2 x i1> %Cmp101, <2 x i1> %Cmp11, <2 x i32> <i32 undef, i32 0>
+ %I129 = insertelement <8 x i1> %Cmp18, i1 %E43, i32 0
+ %B130 = lshr i8 %L71, %L56
+ %FC131 = sitofp i32 %3 to float
+ %Sl132 = select i1 false, <2 x i64> %Shuff7, <2 x i64> %Sl84
+ %Cmp133 = icmp sge <8 x i16> %Tr, %Tr115
+ %L134 = load i8, i8* %0
+ store i8 %L102, i8* %0
+ %E135 = extractelement <16 x i8> %Shuff88, i32 3
+ %Shuff136 = shufflevector <8 x i64> %Shuff21, <8 x i64> zeroinitializer, <8 x i32> <i32 6, i32 8, i32 undef, i32 12, i32 14, i32 0, i32 2, i32 4>
+ %I137 = insertelement <2 x i64> zeroinitializer, i64 %E111, i32 0
+ %B138 = shl <8 x i64> %I51, %Shuff136
+ %Se = sext <2 x i32> %Tr32 to <2 x i64>
+ %Sl139 = select i1 %E43, <8 x i16> %Tr, <8 x i16> %Tr115
+ %Cmp140 = icmp sge <2 x i32> %Sl17, %Tr32
+ %L141 = load i8, i8* %0
+ store i8 17, i8* %0
+ %E142 = extractelement <8 x i16> %Tr115, i32 6
+ %Shuff143 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff21, <8 x i32> <i32 1, i32 3, i32 undef, i32 7, i32 undef, i32 11, i32 13, i32 15>
+ %I144 = insertelement <4 x i64> %Shuff, i64 %4, i32 3
+ %B145 = sub <2 x i64> <i64 -1, i64 -1>, %I113
+ %Se146 = sext i8 %E135 to i32
+ %Sl147 = select i1 %Cmp55, <2 x i32> %Tr32, <2 x i32> zeroinitializer
+ %Cmp148 = icmp eq <8 x i1> %I30, %Cmp18
+ %L149 = load i8, i8* %0
+ store i8 %L56, i8* %0
+ %E150 = extractelement <2 x i64> %I113, i32 0
+ %Shuff151 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %I, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 1, i32 3, i32 undef, i32 7>
+ %I152 = insertelement <8 x i64> %Shuff136, i64 %E6, i32 3
+ %B153 = frem float %FC68, %FC123
+ %Se154 = sext i1 false to i32
+ %Sl155 = select i1 %Cmp26, i8 %E20, i8 %L19
+ %Cmp156 = icmp eq i64 198384, %4
+ br i1 %Cmp156, label %CF268, label %CF270
+
+CF270: ; preds = %CF268
+ %L157 = load i8, i8* %0
+ store i8 %L157, i8* %0
+ %E158 = extractelement <8 x i1> %Cmp78, i32 1
+ br i1 %E158, label %CF255, label %CF260
+
+CF260: ; preds = %CF270
+ %Shuff159 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff81, <8 x i32> <i32 undef, i32 6, i32 8, i32 undef, i32 12, i32 14, i32 0, i32 2>
+ %I160 = insertelement <2 x i1> %Cmp11, i1 %Cmp156, i32 0
+ %B161 = urem <2 x i32> zeroinitializer, %Sl147
+ %Se162 = sext i8 %L48 to i16
+ %Sl163 = select i1 %Cmp93, i32 %FC83, i32 378892
+ %Cmp164 = fcmp uge double 0xE603EE221901D6A0, 0xE603EE221901D6A0
+ br i1 %Cmp164, label %CF253, label %CF254
+
+CF254: ; preds = %CF254, %CF265, %CF263, %CF260
+ %L165 = load i8, i8* %0
+ store i8 %Sl62, i8* %0
+ %E166 = extractelement <8 x i64> %Shuff58, i32 1
+ %Shuff167 = shufflevector <8 x i64> %Shuff58, <8 x i64> %Shuff96, <8 x i32> <i32 12, i32 14, i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10>
+ %I168 = insertelement <2 x double> %BC91, double %BC, i32 0
+ %B169 = ashr i16 %E142, %E103
+ %FC170 = sitofp <2 x i64> %Sl84 to <2 x float>
+ %Sl171 = select i1 %Cmp156, i8 %L165, i8 %5
+ %Cmp172 = icmp ugt i8 %E20, %L102
+ br i1 %Cmp172, label %CF254, label %CF262
+
+CF262: ; preds = %CF262, %CF254
+ %L173 = load i8, i8* %0
+ store i8 %L94, i8* %0
+ %E174 = extractelement <2 x i1> %Cmp70, i32 0
+ br i1 %E174, label %CF262, label %CF264
+
+CF264: ; preds = %CF264, %CF262
+ %Shuff175 = shufflevector <16 x i1> %Cmp, <16 x i1> %Cmp, <16 x i32> <i32 undef, i32 9, i32 undef, i32 13, i32 undef, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 undef, i32 1, i32 3, i32 5>
+ %I176 = insertelement <8 x i64> %Shuff21, i64 419346, i32 1
+ %B177 = lshr <2 x i32> %Sl17, zeroinitializer
+ %FC178 = sitofp <8 x i32> %B106 to <8 x float>
+ %Sl179 = select i1 %Cmp156, i8 %B114, i8 %Sl171
+ %Cmp180 = icmp ugt <4 x i64> %B60, zeroinitializer
+ %L181 = load i8, i8* %0
+ store i8 %L102, i8* %0
+ %E182 = extractelement <8 x i64> zeroinitializer, i32 0
+ %Shuff183 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %I176, <8 x i32> <i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 undef, i32 1>
+ %I184 = insertelement <2 x i1> %Cmp34, i1 %Cmp63, i32 1
+ %B185 = urem i32 %Sl163, %Se146
+ %FC186 = sitofp i64 %E166 to float
+ %Sl187 = select i1 %Cmp156, i1 %E43, i1 %Cmp26
+ br i1 %Sl187, label %CF264, label %CF265
+
+CF265: ; preds = %CF264
+ %Cmp188 = icmp uge <16 x i1> %Shuff175, %Cmp
+ %L189 = load i8, i8* %0
+ store i8 %L19, i8* %0
+ %E190 = extractelement <2 x i1> %Cmp11, i32 0
+ br i1 %E190, label %CF254, label %CF259
+
+CF259: ; preds = %CF259, %CF265
+ %Shuff191 = shufflevector <8 x i1> %Cmp117, <8 x i1> %I30, <8 x i32> <i32 11, i32 13, i32 15, i32 1, i32 3, i32 5, i32 7, i32 9>
+ %I192 = insertelement <16 x i1> %Cmp188, i1 %Cmp85, i32 13
+ %B193 = urem <2 x i64> %Sl132, %Sl54
+ %Tr194 = trunc i64 %E166 to i8
+ %Sl195 = select i1 %Cmp93, <2 x i1> %I160, <2 x i1> %Shuff66
+ %Cmp196 = icmp ult <2 x i1> %Shuff66, %Cmp11
+ %L197 = load i8, i8* %0
+ store i8 %L5, i8* %0
+ %E198 = extractelement <8 x i64> %Shuff183, i32 0
+ %Shuff199 = shufflevector <8 x i16> %I22, <8 x i16> %Tr115, <8 x i32> <i32 3, i32 5, i32 undef, i32 9, i32 11, i32 13, i32 15, i32 undef>
+ %I200 = insertelement <16 x i8> %Shuff29, i8 %L197, i32 5
+ %B201 = and <2 x i64> %B145, %I113
+ %ZE202 = zext <2 x i1> %I74 to <2 x i64>
+ %Sl203 = select i1 %Cmp26, i8 %L126, i8 %L102
+ %Cmp204 = fcmp oeq <4 x float> %BC53, %BC53
+ %L205 = load i8, i8* %0
+ store i8 %5, i8* %0
+ %E206 = extractelement <2 x double> %Shuff104, i32 0
+ %Shuff207 = shufflevector <4 x i64> %I38, <4 x i64> zeroinitializer, <4 x i32> <i32 7, i32 undef, i32 3, i32 5>
+ %I208 = insertelement <8 x i64> %I82, i64 323142, i32 1
+ %B209 = lshr i8 %L56, %L5
+ %FC210 = fptoui double 0xE603EE221901D6A0 to i1
+ br i1 %FC210, label %CF259, label %CF263
+
+CF263: ; preds = %CF259
+ %Sl211 = select i1 %E174, i32 %ZE, i32 %ZE107
+ %Cmp212 = icmp ne i32 %Se154, %Sl163
+ br i1 %Cmp212, label %CF254, label %CF257
+
+CF257: ; preds = %CF263
+ %L213 = load i8, i8* %0
+ store i8 %L213, i8* %0
+ %E214 = extractelement <8 x i64> %Shuff81, i32 3
+ %Shuff215 = shufflevector <8 x i64> %Shuff159, <8 x i64> %Shuff136, <8 x i32> <i32 14, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12>
+ %I216 = insertelement <8 x i64> %Shuff215, i64 323142, i32 0
+ %Se217 = sext i8 %L71 to i64
+ %Sl218 = select i1 %Cmp156, <8 x i16> %Tr115, <8 x i16> %Tr115
+ %Cmp219 = fcmp ole <2 x float> %FC170, %FC99
+ %L220 = load i8, i8* %0
+ store i8 %L19, i8* %0
+ %E221 = extractelement <8 x i64> zeroinitializer, i32 6
+ %Shuff222 = shufflevector <4 x i1> %Cmp204, <4 x i1> %Cmp125, <4 x i32> <i32 1, i32 undef, i32 5, i32 7>
+ %I223 = insertelement <8 x i1> %Cmp18, i1 %FC210, i32 3
+ %B224 = lshr i32 %E49, %FC24
+ %FC225 = sitofp <4 x i1> %Cmp180 to <4 x float>
+ %Sl226 = select i1 %Cmp93, i64 %E28, i64 %B23
+ %Cmp227 = icmp ugt <4 x i64> zeroinitializer, %B60
+ %L228 = load i8, i8* %0
+ store i8 %Sl46, i8* %0
+ %E229 = extractelement <1 x i32> zeroinitializer, i32 0
+ %Shuff230 = shufflevector <16 x i8> %Shuff29, <16 x i8> %I200, <16 x i32> <i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 undef, i32 undef, i32 29, i32 31, i32 1, i32 undef, i32 5, i32 undef, i32 9>
+ %I231 = insertelement <8 x i64> %Shuff183, i64 %L64, i32 5
+ %B232 = fadd float %FC68, %FC68
+ %Se233 = sext i1 %Cmp172 to i64
+ %Sl234 = select i1 false, i1 %Cmp164, i1 %E43
+ br label %CF
+
+CF: ; preds = %CF, %CF257
+ %Cmp235 = icmp ule i32 %Sl163, %Sl211
+ br i1 %Cmp235, label %CF, label %CF252
+
+CF252: ; preds = %CF252, %CF269, %CF
+ %L236 = load i8, i8* %0
+ store i8 %L19, i8* %0
+ %E237 = extractelement <16 x i1> %Shuff175, i32 15
+ br i1 %E237, label %CF252, label %CF269
+
+CF269: ; preds = %CF252
+ %Shuff238 = shufflevector <2 x i1> %I160, <2 x i1> %Cmp101, <2 x i32> undef
+ %I239 = insertelement <8 x i64> zeroinitializer, i64 %4, i32 0
+ %B240 = add i8 %L56, %Sl155
+ %Tr241 = trunc <2 x i32> %Sl147 to <2 x i1>
+ %Sl242 = select i1 %Sl234, <2 x float> %FC99, <2 x float> %FC99
+ %Cmp243 = icmp eq i8 %L5, %L118
+ br i1 %Cmp243, label %CF252, label %CF258
+
+CF258: ; preds = %CF258, %CF269
+ %L244 = load i8, i8* %0
+ store i8 %L19, i8* %0
+ %E245 = extractelement <2 x i64> %B201, i32 1
+ %Shuff246 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %I144, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %I247 = insertelement <8 x i64> %Shuff73, i64 %E182, i32 2
+ %B248 = or i64 %Sl226, %E245
+ %Tr249 = trunc <2 x i64> <i64 -1, i64 -1> to <2 x i16>
+ %Sl250 = select i1 %FC210, i64 %E57, i64 %L64
+ %Cmp251 = icmp eq i32 %FC24, %FC
+ br i1 %Cmp251, label %CF258, label %CF267
+
+CF267: ; preds = %CF258
+ store i8 %L42, i8* %0
+ store i8 %Sl69, i8* %0
+ store i8 %L5, i8* %0
+ store i8 %L134, i8* %0
+ store i8 %L141, i8* %0
+ ret void
+}
diff --git a/test/CodeGen/X86/fold-vector-trunc-sitofp.ll b/test/CodeGen/X86/fold-vector-trunc-sitofp.ll
new file mode 100644
index 000000000000..6a3be7aace13
--- /dev/null
+++ b/test/CodeGen/X86/fold-vector-trunc-sitofp.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s
+
+; Check that constant integers are correctly being truncated before float conversion
+
+define <4 x float> @test1() {
+; CHECK-LABEL: test1
+; CHECK: movaps {{.*#+}} xmm0 = [-1.000000e+00,0.000000e+00,-1.000000e+00,0.000000e+00]
+; CHECK-NEXT: ret
+ %1 = trunc <4 x i3> <i3 -1, i3 -22, i3 7, i3 8> to <4 x i1>
+ %2 = sitofp <4 x i1> %1 to <4 x float>
+ ret <4 x float> %2
+}
diff --git a/test/CodeGen/X86/fold-vex.ll b/test/CodeGen/X86/fold-vex.ll
index 5a8b1d8cbfdf..006db6effdf6 100644
--- a/test/CodeGen/X86/fold-vex.ll
+++ b/test/CodeGen/X86/fold-vex.ll
@@ -14,7 +14,7 @@
; unless specially configured on some CPUs such as AMD Family 10H.
define <4 x i32> @test1(<4 x i32>* %p0, <4 x i32> %in1) nounwind {
- %in0 = load <4 x i32>* %p0, align 2
+ %in0 = load <4 x i32>, <4 x i32>* %p0, align 2
%a = and <4 x i32> %in0, %in1
ret <4 x i32> %a
diff --git a/test/CodeGen/X86/fold-zext-trunc.ll b/test/CodeGen/X86/fold-zext-trunc.ll
index f901ad280b50..e36e8abdc3be 100644
--- a/test/CodeGen/X86/fold-zext-trunc.ll
+++ b/test/CodeGen/X86/fold-zext-trunc.ll
@@ -12,9 +12,9 @@ define void @foo() nounwind {
; CHECK-NOT: movzbl
; CHECK: calll
entry:
- %tmp17 = load i8* getelementptr inbounds (%struct.S0* @g_98, i32 0, i32 1, i32 0), align 4
+ %tmp17 = load i8, i8* getelementptr inbounds (%struct.S0, %struct.S0* @g_98, i32 0, i32 1, i32 0), align 4
%tmp54 = zext i8 %tmp17 to i32
- %foo = load i32* bitcast (i8* getelementptr inbounds (%struct.S0* @g_98, i32 0, i32 1, i32 0) to i32*), align 4
+ %foo = load i32, i32* bitcast (i8* getelementptr inbounds (%struct.S0, %struct.S0* @g_98, i32 0, i32 1, i32 0) to i32*), align 4
%conv.i = trunc i32 %foo to i8
tail call void @func_12(i32 %tmp54, i8 zeroext %conv.i) nounwind
ret void
diff --git a/test/CodeGen/X86/force-align-stack-alloca.ll b/test/CodeGen/X86/force-align-stack-alloca.ll
index bd9806943920..a9ba20f45e84 100644
--- a/test/CodeGen/X86/force-align-stack-alloca.ll
+++ b/test/CodeGen/X86/force-align-stack-alloca.ll
@@ -10,7 +10,7 @@ target triple = "i386-unknown-linux-gnu"
define i32 @f(i8* %p) nounwind {
entry:
- %0 = load i8* %p
+ %0 = load i8, i8* %p
%conv = sext i8 %0 to i32
ret i32 %conv
}
diff --git a/test/CodeGen/X86/fp-double-rounding.ll b/test/CodeGen/X86/fp-double-rounding.ll
new file mode 100644
index 000000000000..c7578acbec27
--- /dev/null
+++ b/test/CodeGen/X86/fp-double-rounding.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SAFE
+; RUN: llc < %s -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK --check-prefix=UNSAFE
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64--"
+
+; CHECK-LABEL: double_rounding:
+; SAFE: callq __trunctfdf2
+; SAFE-NEXT: cvtsd2ss %xmm0
+; UNSAFE: callq __trunctfsf2
+; UNSAFE-NOT: cvt
+define void @double_rounding(fp128* %x, float* %f) {
+entry:
+ %0 = load fp128, fp128* %x, align 16
+ %1 = fptrunc fp128 %0 to double
+ %2 = fptrunc double %1 to float
+ store float %2, float* %f, align 4
+ ret void
+}
+
+; CHECK-LABEL: double_rounding_precise_first:
+; CHECK: fstps (%
+; CHECK-NOT: fstpl
+define void @double_rounding_precise_first(float* %f) {
+entry:
+ ; Hack, to generate a precise FP_ROUND to double
+ %precise = call double asm sideeffect "fld %st(0)", "={st(0)}"()
+ %0 = fptrunc double %precise to float
+ store float %0, float* %f, align 4
+ ret void
+}
diff --git a/test/CodeGen/X86/fp-fast.ll b/test/CodeGen/X86/fp-fast.ll
index 7b08ad67220b..27af5738ca3e 100644
--- a/test/CodeGen/X86/fp-fast.ll
+++ b/test/CodeGen/X86/fp-fast.ll
@@ -1,126 +1,116 @@
-; RUN: llc -march=x86-64 -mcpu=corei7-avx -enable-unsafe-fp-math < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s
-; CHECK-LABEL: test1
define float @test1(float %a) {
-; CHECK-NOT: addss
-; CHECK: mulss
-; CHECK-NOT: addss
-; CHECK: ret
+; CHECK-LABEL: test1:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: retq
%t1 = fadd float %a, %a
%r = fadd float %t1, %t1
ret float %r
}
-; CHECK-LABEL: test2
define float @test2(float %a) {
-; CHECK-NOT: addss
-; CHECK: mulss
-; CHECK-NOT: addss
-; CHECK: ret
+; CHECK-LABEL: test2:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: retq
%t1 = fmul float 4.0, %a
%t2 = fadd float %a, %a
%r = fadd float %t1, %t2
ret float %r
}
-; CHECK-LABEL: test3
define float @test3(float %a) {
-; CHECK-NOT: addss
-; CHECK: mulss
-; CHECK-NOT: addss
-; CHECK: ret
+; CHECK-LABEL: test3:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: retq
%t1 = fmul float %a, 4.0
%t2 = fadd float %a, %a
%r = fadd float %t1, %t2
ret float %r
}
-; CHECK-LABEL: test4
define float @test4(float %a) {
-; CHECK-NOT: addss
-; CHECK: mulss
-; CHECK-NOT: addss
-; CHECK: ret
+; CHECK-LABEL: test4:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: retq
%t1 = fadd float %a, %a
%t2 = fmul float 4.0, %a
%r = fadd float %t1, %t2
ret float %r
}
-; CHECK-LABEL: test5
define float @test5(float %a) {
-; CHECK-NOT: addss
-; CHECK: mulss
-; CHECK-NOT: addss
-; CHECK: ret
+; CHECK-LABEL: test5:
+; CHECK: # BB#0:
+; CHECK-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT: retq
%t1 = fadd float %a, %a
%t2 = fmul float %a, 4.0
%r = fadd float %t1, %t2
ret float %r
}
-; CHECK-LABEL: test6
define float @test6(float %a) {
-; CHECK-NOT: addss
-; CHECK: xorps
-; CHECK-NOT: addss
-; CHECK: ret
+; CHECK-LABEL: test6:
+; CHECK: # BB#0:
+; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: retq
%t1 = fmul float 2.0, %a
%t2 = fadd float %a, %a
%r = fsub float %t1, %t2
ret float %r
}
-; CHECK-LABEL: test7
define float @test7(float %a) {
-; CHECK-NOT: addss
-; CHECK: xorps
-; CHECK-NOT: addss
-; CHECK: ret
+; CHECK-LABEL: test7:
+; CHECK: # BB#0:
+; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: retq
%t1 = fmul float %a, 2.0
%t2 = fadd float %a, %a
%r = fsub float %t1, %t2
ret float %r
}
-; CHECK-LABEL: test8
define float @test8(float %a) {
-; CHECK-NOT: fma
-; CHECK-NOT: mul
-; CHECK-NOT: add
-; CHECK: ret
+; CHECK-LABEL: test8:
+; CHECK: # BB#0:
+; CHECK-NEXT: retq
%t1 = fmul float %a, 0.0
%t2 = fadd float %a, %t1
ret float %t2
}
-; CHECK-LABEL: test9
define float @test9(float %a) {
-; CHECK-NOT: fma
-; CHECK-NOT: mul
-; CHECK-NOT: add
-; CHECK: ret
+; CHECK-LABEL: test9:
+; CHECK: # BB#0:
+; CHECK-NEXT: retq
%t1 = fmul float 0.0, %a
%t2 = fadd float %t1, %a
ret float %t2
}
-; CHECK-LABEL: test10
define float @test10(float %a) {
-; CHECK-NOT: add
-; CHECK: vxorps
-; CHECK: ret
+; CHECK-LABEL: test10:
+; CHECK: # BB#0:
+; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: retq
%t1 = fsub float -0.0, %a
%t2 = fadd float %a, %t1
ret float %t2
}
-; CHECK-LABEL: test11
define float @test11(float %a) {
-; CHECK-NOT: add
-; CHECK: vxorps
-; CHECK: ret
+; CHECK-LABEL: test11:
+; CHECK: # BB#0:
+; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: retq
%t1 = fsub float -0.0, %a
%t2 = fadd float %a, %t1
ret float %t2
}
+
diff --git a/test/CodeGen/X86/fp-load-trunc.ll b/test/CodeGen/X86/fp-load-trunc.ll
index e6c1e1adb59e..3896913857de 100644
--- a/test/CodeGen/X86/fp-load-trunc.ll
+++ b/test/CodeGen/X86/fp-load-trunc.ll
@@ -23,7 +23,7 @@ define <1 x float> @test1(<1 x double>* %p) nounwind {
; AVX-NEXT: flds (%esp)
; AVX-NEXT: popl %eax
; AVX-NEXT: retl
- %x = load <1 x double>* %p
+ %x = load <1 x double>, <1 x double>* %p
%y = fptrunc <1 x double> %x to <1 x float>
ret <1 x float> %y
}
@@ -40,7 +40,7 @@ define <2 x float> @test2(<2 x double>* %p) nounwind {
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX-NEXT: vcvtpd2psx (%eax), %xmm0
; AVX-NEXT: retl
- %x = load <2 x double>* %p
+ %x = load <2 x double>, <2 x double>* %p
%y = fptrunc <2 x double> %x to <2 x float>
ret <2 x float> %y
}
@@ -59,7 +59,7 @@ define <4 x float> @test3(<4 x double>* %p) nounwind {
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX-NEXT: vcvtpd2psy (%eax), %xmm0
; AVX-NEXT: retl
- %x = load <4 x double>* %p
+ %x = load <4 x double>, <4 x double>* %p
%y = fptrunc <4 x double> %x to <4 x float>
ret <4 x float> %y
}
@@ -83,7 +83,7 @@ define <8 x float> @test4(<8 x double>* %p) nounwind {
; AVX-NEXT: vcvtpd2psy 32(%eax), %xmm1
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retl
- %x = load <8 x double>* %p
+ %x = load <8 x double>, <8 x double>* %p
%y = fptrunc <8 x double> %x to <8 x float>
ret <8 x float> %y
}
diff --git a/test/CodeGen/X86/fp-stack-O0-crash.ll b/test/CodeGen/X86/fp-stack-O0-crash.ll
index ae83a02c6744..5acfd5d3b618 100644
--- a/test/CodeGen/X86/fp-stack-O0-crash.ll
+++ b/test/CodeGen/X86/fp-stack-O0-crash.ll
@@ -11,14 +11,14 @@ entry:
br i1 false, label %cond.true, label %cond.false
cond.true: ; preds = %entry
- %tmp = load x86_fp80* %x.addr ; <x86_fp80> [#uses=1]
- %tmp1 = load x86_fp80* %x.addr ; <x86_fp80> [#uses=1]
+ %tmp = load x86_fp80, x86_fp80* %x.addr ; <x86_fp80> [#uses=1]
+ %tmp1 = load x86_fp80, x86_fp80* %x.addr ; <x86_fp80> [#uses=1]
%cmp = fcmp oeq x86_fp80 %tmp, %tmp1 ; <i1> [#uses=1]
br i1 %cmp, label %if.then, label %if.end
cond.false: ; preds = %entry
- %tmp2 = load x86_fp80* %x.addr ; <x86_fp80> [#uses=1]
- %tmp3 = load x86_fp80* %x.addr ; <x86_fp80> [#uses=1]
+ %tmp2 = load x86_fp80, x86_fp80* %x.addr ; <x86_fp80> [#uses=1]
+ %tmp3 = load x86_fp80, x86_fp80* %x.addr ; <x86_fp80> [#uses=1]
%cmp4 = fcmp une x86_fp80 %tmp2, %tmp3 ; <i1> [#uses=1]
br i1 %cmp4, label %if.then, label %if.end
diff --git a/test/CodeGen/X86/fp-stack-O0.ll b/test/CodeGen/X86/fp-stack-O0.ll
index df90254dbd27..79ef28b163cd 100644
--- a/test/CodeGen/X86/fp-stack-O0.ll
+++ b/test/CodeGen/X86/fp-stack-O0.ll
@@ -17,7 +17,7 @@ declare i32 @x2(x86_fp80, x86_fp80) nounwind
; CHECK-NEXT: x2
define i32 @test1() nounwind uwtable ssp {
entry:
- %call = call x86_fp80 (...)* bitcast (x86_fp80 (i32)* @x1 to x86_fp80 (...)*)(i32 -1)
+ %call = call x86_fp80 (...) bitcast (x86_fp80 (i32)* @x1 to x86_fp80 (...)*)(i32 -1)
%call1 = call i32 @x2(x86_fp80 %call, x86_fp80 0xK401EFFFFFFFF00000000)
ret i32 %call1
}
diff --git a/test/CodeGen/X86/fp-stack-compare-cmov.ll b/test/CodeGen/X86/fp-stack-compare-cmov.ll
index b457fbc1a332..1d3548816b72 100644
--- a/test/CodeGen/X86/fp-stack-compare-cmov.ll
+++ b/test/CodeGen/X86/fp-stack-compare-cmov.ll
@@ -4,7 +4,7 @@
define float @foo(float* %col.2.0) {
; CHECK: fucompi
; CHECK: fcmov
- %tmp = load float* %col.2.0
+ %tmp = load float, float* %col.2.0
%tmp16 = fcmp olt float %tmp, 0.000000e+00
%tmp20 = fsub float -0.000000e+00, %tmp
%iftmp.2.0 = select i1 %tmp16, float %tmp20, float %tmp
diff --git a/test/CodeGen/X86/fp-stack-compare.ll b/test/CodeGen/X86/fp-stack-compare.ll
index a8557adeaf74..96088d759234 100644
--- a/test/CodeGen/X86/fp-stack-compare.ll
+++ b/test/CodeGen/X86/fp-stack-compare.ll
@@ -6,7 +6,7 @@ define float @foo(float* %col.2.0) {
; CHECK-NOT: fucompi
; CHECK: j
; CHECK-NOT: fcmov
- %tmp = load float* %col.2.0
+ %tmp = load float, float* %col.2.0
%tmp16 = fcmp olt float %tmp, 0.000000e+00
%tmp20 = fsub float -0.000000e+00, %tmp
%iftmp.2.0 = select i1 %tmp16, float %tmp20, float %tmp
diff --git a/test/CodeGen/X86/fp-stack-ret-store.ll b/test/CodeGen/X86/fp-stack-ret-store.ll
index 05dfc545db17..c7cbb2a0561f 100644
--- a/test/CodeGen/X86/fp-stack-ret-store.ll
+++ b/test/CodeGen/X86/fp-stack-ret-store.ll
@@ -7,7 +7,7 @@ target triple = "i686-apple-darwin8"
define void @bar(double* %P) {
entry:
- %tmp = tail call double (...)* @foo( ) ; <double> [#uses=1]
+ %tmp = tail call double (...) @foo( ) ; <double> [#uses=1]
store double %tmp, double* %P, align 8
ret void
}
@@ -16,7 +16,7 @@ declare double @foo(...)
define void @bar2(float* %P) {
entry:
- %tmp = tail call double (...)* @foo2( ) ; <double> [#uses=1]
+ %tmp = tail call double (...) @foo2( ) ; <double> [#uses=1]
%tmp1 = fptrunc double %tmp to float ; <float> [#uses=1]
store float %tmp1, float* %P, align 4
ret void
diff --git a/test/CodeGen/X86/fp-stack-ret.ll b/test/CodeGen/X86/fp-stack-ret.ll
index 2733117a1f02..9635e2d2511a 100644
--- a/test/CodeGen/X86/fp-stack-ret.ll
+++ b/test/CodeGen/X86/fp-stack-ret.ll
@@ -7,7 +7,7 @@
; CHECK: fldl
; CHECK-NEXT: ret
define double @test1(double *%P) {
- %A = load double* %P
+ %A = load double, double* %P
ret double %A
}
diff --git a/test/CodeGen/X86/fp-stack.ll b/test/CodeGen/X86/fp-stack.ll
index dca644de667e..44c039633886 100644
--- a/test/CodeGen/X86/fp-stack.ll
+++ b/test/CodeGen/X86/fp-stack.ll
@@ -5,9 +5,9 @@ target triple = "i386-pc-linux-gnu"
define void @foo() nounwind {
entry:
- %tmp6 = load x86_fp80* undef ; <x86_fp80> [#uses=2]
- %tmp15 = load x86_fp80* undef ; <x86_fp80> [#uses=2]
- %tmp24 = load x86_fp80* undef ; <x86_fp80> [#uses=1]
+ %tmp6 = load x86_fp80, x86_fp80* undef ; <x86_fp80> [#uses=2]
+ %tmp15 = load x86_fp80, x86_fp80* undef ; <x86_fp80> [#uses=2]
+ %tmp24 = load x86_fp80, x86_fp80* undef ; <x86_fp80> [#uses=1]
br i1 undef, label %return, label %bb.nph
bb.nph: ; preds = %entry
diff --git a/test/CodeGen/X86/fp-trunc.ll b/test/CodeGen/X86/fp-trunc.ll
index 6424bfc9c219..807a8c8fe5e3 100644
--- a/test/CodeGen/X86/fp-trunc.ll
+++ b/test/CodeGen/X86/fp-trunc.ll
@@ -1,6 +1,8 @@
; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s
; RUN: llc < %s -march=x86 -mcpu=core-avx-i | FileCheck %s --check-prefix=AVX
+target triple = "i686-pc-linux-gnu"
+
define <1 x float> @test1(<1 x double> %x) nounwind {
; CHECK-LABEL: test1:
; CHECK: # BB#0:
@@ -59,12 +61,14 @@ define <4 x float> @test3(<4 x double> %x) nounwind {
define <8 x float> @test4(<8 x double> %x) nounwind {
; CHECK-LABEL: test4:
; CHECK: # BB#0:
+; CHECK-NEXT: subl $12, %esp
; CHECK-NEXT: cvtpd2ps %xmm1, %xmm1
; CHECK-NEXT: cvtpd2ps %xmm0, %xmm0
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; CHECK-NEXT: cvtpd2ps %xmm3, %xmm3
; CHECK-NEXT: cvtpd2ps %xmm2, %xmm1
-; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; CHECK-NEXT: cvtpd2ps 16(%esp), %xmm2
+; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; CHECK-NEXT: addl $12, %esp
; CHECK-NEXT: retl
;
; AVX-LABEL: test4:
diff --git a/test/CodeGen/X86/fp2sint.ll b/test/CodeGen/X86/fp2sint.ll
index 167544488713..b41f56f9f41e 100644
--- a/test/CodeGen/X86/fp2sint.ll
+++ b/test/CodeGen/X86/fp2sint.ll
@@ -4,10 +4,10 @@
define i32 @main(i32 %argc, i8** %argv) {
cond_false.i.i.i: ; preds = %bb.i5
- %tmp35.i = load double* null, align 8 ; <double> [#uses=1]
+ %tmp35.i = load double, double* null, align 8 ; <double> [#uses=1]
%tmp3536.i = fptosi double %tmp35.i to i32 ; <i32> [#uses=1]
%tmp3536140.i = zext i32 %tmp3536.i to i64 ; <i64> [#uses=1]
- %tmp39.i = load double* null, align 4 ; <double> [#uses=1]
+ %tmp39.i = load double, double* null, align 4 ; <double> [#uses=1]
%tmp3940.i = fptosi double %tmp39.i to i32 ; <i32> [#uses=1]
%tmp3940137.i = zext i32 %tmp3940.i to i64 ; <i64> [#uses=1]
%tmp3940137138.i = shl i64 %tmp3940137.i, 32 ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/fp_load_cast_fold.ll b/test/CodeGen/X86/fp_load_cast_fold.ll
index 72ea12f9430e..5fd22e3fa6e5 100644
--- a/test/CodeGen/X86/fp_load_cast_fold.ll
+++ b/test/CodeGen/X86/fp_load_cast_fold.ll
@@ -1,19 +1,19 @@
; RUN: llc < %s -march=x86 | FileCheck %s
define double @short(i16* %P) {
- %V = load i16* %P ; <i16> [#uses=1]
+ %V = load i16, i16* %P ; <i16> [#uses=1]
%V2 = sitofp i16 %V to double ; <double> [#uses=1]
ret double %V2
}
define double @int(i32* %P) {
- %V = load i32* %P ; <i32> [#uses=1]
+ %V = load i32, i32* %P ; <i32> [#uses=1]
%V2 = sitofp i32 %V to double ; <double> [#uses=1]
ret double %V2
}
define double @long(i64* %P) {
- %V = load i64* %P ; <i64> [#uses=1]
+ %V = load i64, i64* %P ; <i64> [#uses=1]
%V2 = sitofp i64 %V to double ; <double> [#uses=1]
ret double %V2
}
diff --git a/test/CodeGen/X86/fp_load_fold.ll b/test/CodeGen/X86/fp_load_fold.ll
index a2cea5e57f64..57497454792b 100644
--- a/test/CodeGen/X86/fp_load_fold.ll
+++ b/test/CodeGen/X86/fp_load_fold.ll
@@ -4,37 +4,37 @@
; Test that the load of the memory location is folded into the operation.
define double @test_add(double %X, double* %P) {
- %Y = load double* %P ; <double> [#uses=1]
+ %Y = load double, double* %P ; <double> [#uses=1]
%R = fadd double %X, %Y ; <double> [#uses=1]
ret double %R
}
define double @test_mul(double %X, double* %P) {
- %Y = load double* %P ; <double> [#uses=1]
+ %Y = load double, double* %P ; <double> [#uses=1]
%R = fmul double %X, %Y ; <double> [#uses=1]
ret double %R
}
define double @test_sub(double %X, double* %P) {
- %Y = load double* %P ; <double> [#uses=1]
+ %Y = load double, double* %P ; <double> [#uses=1]
%R = fsub double %X, %Y ; <double> [#uses=1]
ret double %R
}
define double @test_subr(double %X, double* %P) {
- %Y = load double* %P ; <double> [#uses=1]
+ %Y = load double, double* %P ; <double> [#uses=1]
%R = fsub double %Y, %X ; <double> [#uses=1]
ret double %R
}
define double @test_div(double %X, double* %P) {
- %Y = load double* %P ; <double> [#uses=1]
+ %Y = load double, double* %P ; <double> [#uses=1]
%R = fdiv double %X, %Y ; <double> [#uses=1]
ret double %R
}
define double @test_divr(double %X, double* %P) {
- %Y = load double* %P ; <double> [#uses=1]
+ %Y = load double, double* %P ; <double> [#uses=1]
%R = fdiv double %Y, %X ; <double> [#uses=1]
ret double %R
}
diff --git a/test/CodeGen/X86/fpstack-debuginstr-kill.ll b/test/CodeGen/X86/fpstack-debuginstr-kill.ll
index e3180f4e68a2..34398414a76c 100644
--- a/test/CodeGen/X86/fpstack-debuginstr-kill.ll
+++ b/test/CodeGen/X86/fpstack-debuginstr-kill.ll
@@ -32,7 +32,7 @@ sw.bb735: ; preds = %if.end511
unreachable
if.end41.i2210: ; preds = %if.end511
- call void @llvm.dbg.value(metadata x86_fp80 %src.sroa.0.0.src.sroa.0.0.2280, i64 0, metadata !20, metadata !{!"0x102"})
+ call void @llvm.dbg.value(metadata x86_fp80 %src.sroa.0.0.src.sroa.0.0.2280, i64 0, metadata !20, metadata !DIExpression()), !dbg !DILocation(scope: !4)
unreachable
sw.bb992: ; preds = %if.end511
@@ -43,29 +43,29 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!24, !25}
-!0 = !{!"0x11\004\00clang version 3.6.0 (http://llvm.org/git/clang 8444ae7cfeaefae031f8fedf0d1435ca3b14d90b) (http://llvm.org/git/llvm 886f0101a7d176543b831f5efb74c03427244a55)\001\00\000\00\001", !1, !2, !2, !3, !21, !2} ; [ DW_TAG_compile_unit ] [x87stackifier/fpu_ieee.cpp] [DW_LANG_C_plus_plus]
-!1 = !{!"fpu_ieee.cpp", !"x87stackifier"}
+!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (http://llvm.org/git/clang 8444ae7cfeaefae031f8fedf0d1435ca3b14d90b) (http://llvm.org/git/llvm 886f0101a7d176543b831f5efb74c03427244a55)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !21, imports: !2)
+!1 = !DIFile(filename: "fpu_ieee.cpp", directory: "x87stackifier")
!2 = !{}
!3 = !{!4}
-!4 = !{!"0x2e\00fpuop_arithmetic\00fpuop_arithmetic\00_Z16fpuop_arithmeticjj\0011\000\001\000\006\00256\001\0013", !5, !6, !7, null, void (i32, i32)* @_Z16fpuop_arithmeticjj, null, null, !10} ; [ DW_TAG_subprogram ] [line 11] [def] [scope 13] [fpuop_arithmetic]
-!5 = !{!"f1.cpp", !"x87stackifier"}
-!6 = !{!"0x29", !5} ; [ DW_TAG_file_type ] [x87stackifier/f1.cpp]
-!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !DISubprogram(name: "fpuop_arithmetic", linkageName: "_Z16fpuop_arithmeticjj", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !5, scope: !6, type: !7, function: void (i32, i32)* @_Z16fpuop_arithmeticjj, variables: !10)
+!5 = !DIFile(filename: "f1.cpp", directory: "x87stackifier")
+!6 = !DIFile(filename: "f1.cpp", directory: "x87stackifier")
+!7 = !DISubroutineType(types: !8)
!8 = !{null, !9, !9}
-!9 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", null, null} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
+!9 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
!10 = !{!11, !12, !13, !18, !20}
-!11 = !{!"0x101\00\0016777227\000", !4, !6, !9} ; [ DW_TAG_arg_variable ] [line 11]
-!12 = !{!"0x101\00\0033554443\000", !4, !6, !9} ; [ DW_TAG_arg_variable ] [line 11]
-!13 = !{!"0x100\00x\0014\000", !4, !6, !14} ; [ DW_TAG_auto_variable ] [x] [line 14]
-!14 = !{!"0x16\00fpu_extended\003\000\000\000\000", !5, null, !15} ; [ DW_TAG_typedef ] [fpu_extended] [line 3, size 0, align 0, offset 0] [from fpu_register]
-!15 = !{!"0x16\00fpu_register\002\000\000\000\000", !5, null, !16} ; [ DW_TAG_typedef ] [fpu_register] [line 2, size 0, align 0, offset 0] [from uae_f64]
-!16 = !{!"0x16\00uae_f64\001\000\000\000\000", !5, null, !17} ; [ DW_TAG_typedef ] [uae_f64] [line 1, size 0, align 0, offset 0] [from double]
-!17 = !{!"0x24\00double\000\0064\0064\000\000\004", null, null} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
-!18 = !{!"0x100\00a\0015\000", !4, !6, !19} ; [ DW_TAG_auto_variable ] [a] [line 15]
-!19 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!20 = !{!"0x100\00value\0016\000", !4, !6, !14} ; [ DW_TAG_auto_variable ] [value] [line 16]
+!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 11, arg: 1, scope: !4, file: !6, type: !9)
+!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 11, arg: 2, scope: !4, file: !6, type: !9)
+!13 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 14, scope: !4, file: !6, type: !14)
+!14 = !DIDerivedType(tag: DW_TAG_typedef, name: "fpu_extended", line: 3, file: !5, baseType: !15)
+!15 = !DIDerivedType(tag: DW_TAG_typedef, name: "fpu_register", line: 2, file: !5, baseType: !16)
+!16 = !DIDerivedType(tag: DW_TAG_typedef, name: "uae_f64", line: 1, file: !5, baseType: !17)
+!17 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
+!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 15, scope: !4, file: !6, type: !19)
+!19 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "value", line: 16, scope: !4, file: !6, type: !14)
!21 = !{!22, !23}
-!22 = !{!"0x34\00g1\00g1\00\005\000\001", null, !6, !14, double* @g1, null} ; [ DW_TAG_variable ] [g1] [line 5] [def]
-!23 = !{!"0x34\00g2\00g2\00\006\000\001", null, !6, !19, i32* @g2, null} ; [ DW_TAG_variable ] [g2] [line 6] [def]
+!22 = !DIGlobalVariable(name: "g1", line: 5, isLocal: false, isDefinition: true, scope: null, file: !6, type: !14, variable: double* @g1)
+!23 = !DIGlobalVariable(name: "g2", line: 6, isLocal: false, isDefinition: true, scope: null, file: !6, type: !19, variable: i32* @g2)
!24 = !{i32 2, !"Dwarf Version", i32 2}
-!25 = !{i32 2, !"Debug Info Version", i32 2}
+!25 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/frameaddr.ll b/test/CodeGen/X86/frameaddr.ll
index 3e0f8bc34d64..5ed2ba0f4314 100644
--- a/test/CodeGen/X86/frameaddr.ll
+++ b/test/CodeGen/X86/frameaddr.ll
@@ -1,11 +1,12 @@
; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32
-; RUN: llc < %s -march=x86 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-32
-; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64
-; RUN: llc < %s -march=x86-64 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -march=x86 -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=CHECK-32
+; RUN: llc < %s -mtriple=x86_64-pc-win32 -fast-isel | FileCheck %s --check-prefix=CHECK-W64
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -mtriple=x86_64-unknown -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=CHECK-64
; RUN: llc < %s -mtriple=x86_64-gnux32 | FileCheck %s --check-prefix=CHECK-X32ABI
-; RUN: llc < %s -mtriple=x86_64-gnux32 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-X32ABI
+; RUN: llc < %s -mtriple=x86_64-gnux32 -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=CHECK-X32ABI
; RUN: llc < %s -mtriple=x86_64-nacl | FileCheck %s --check-prefix=CHECK-NACL64
-; RUN: llc < %s -mtriple=x86_64-nacl -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-NACL64
+; RUN: llc < %s -mtriple=x86_64-nacl -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=CHECK-NACL64
define i8* @test1() nounwind {
entry:
@@ -15,6 +16,12 @@ entry:
; CHECK-32-NEXT: movl %ebp, %eax
; CHECK-32-NEXT: pop
; CHECK-32-NEXT: ret
+; CHECK-W64-LABEL: test1
+; CHECK-W64: push
+; CHECK-W64-NEXT: movq %rsp, %rbp
+; CHECK-W64-NEXT: leaq (%rbp), %rax
+; CHECK-W64-NEXT: pop
+; CHECK-W64-NEXT: ret
; CHECK-64-LABEL: test1
; CHECK-64: push
; CHECK-64-NEXT: movq %rsp, %rbp
@@ -44,6 +51,12 @@ entry:
; CHECK-32-NEXT: movl (%eax), %eax
; CHECK-32-NEXT: pop
; CHECK-32-NEXT: ret
+; CHECK-W64-LABEL: test2
+; CHECK-W64: push
+; CHECK-W64-NEXT: movq %rsp, %rbp
+; CHECK-W64-NEXT: leaq (%rbp), %rax
+; CHECK-W64-NEXT: pop
+; CHECK-W64-NEXT: ret
; CHECK-64-LABEL: test2
; CHECK-64: push
; CHECK-64-NEXT: movq %rsp, %rbp
diff --git a/test/CodeGen/X86/frameallocate.ll b/test/CodeGen/X86/frameallocate.ll
deleted file mode 100644
index 13d35b91937d..000000000000
--- a/test/CodeGen/X86/frameallocate.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s
-
-declare i8* @llvm.frameallocate(i32)
-declare i8* @llvm.frameaddress(i32)
-declare i8* @llvm.framerecover(i8*, i8*)
-declare i32 @printf(i8*, ...)
-
-@str = internal constant [10 x i8] c"asdf: %d\0A\00"
-
-define void @print_framealloc_from_fp(i8* %fp) {
- %alloc = call i8* @llvm.framerecover(i8* bitcast (void(i32*, i32*)* @alloc_func to i8*), i8* %fp)
- %alloc_i32 = bitcast i8* %alloc to i32*
- %r = load i32* %alloc_i32
- call i32 (i8*, ...)* @printf(i8* getelementptr ([10 x i8]* @str, i32 0, i32 0), i32 %r)
- ret void
-}
-
-; CHECK-LABEL: print_framealloc_from_fp:
-; CHECK: movabsq $.Lframeallocation_alloc_func, %[[offs:[a-z]+]]
-; CHECK: movl (%rcx,%[[offs]]), %edx
-; CHECK: leaq {{.*}}(%rip), %rcx
-; CHECK: callq printf
-; CHECK: retq
-
-define void @alloc_func(i32* %s, i32* %d) {
- %alloc = call i8* @llvm.frameallocate(i32 16)
- %alloc_i32 = bitcast i8* %alloc to i32*
- store i32 42, i32* %alloc_i32
- %fp = call i8* @llvm.frameaddress(i32 0)
- call void @print_framealloc_from_fp(i8* %fp)
- ret void
-}
-
-; CHECK-LABEL: alloc_func:
-; CHECK: .Lframeallocation_alloc_func = -[[offs:[0-9]+]]
-; CHECK: movl $42, -[[offs]](%rbp)
-; CHECK: movq %rbp, %rcx
-; CHECK: callq print_framealloc_from_fp
-; CHECK: retq
diff --git a/test/CodeGen/X86/frameescape.ll b/test/CodeGen/X86/frameescape.ll
new file mode 100644
index 000000000000..00bc55d24878
--- /dev/null
+++ b/test/CodeGen/X86/frameescape.ll
@@ -0,0 +1,128 @@
+; RUN: llc -mtriple=i686-windows-msvc < %s | FileCheck %s --check-prefix=X86
+; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s --check-prefix=X64
+
+declare void @llvm.frameescape(...)
+declare i8* @llvm.frameaddress(i32)
+declare i8* @llvm.framerecover(i8*, i8*, i32)
+declare i32 @printf(i8*, ...)
+
+@str = internal constant [10 x i8] c"asdf: %d\0A\00"
+
+define void @print_framealloc_from_fp(i8* %fp) {
+ %a.i8 = call i8* @llvm.framerecover(i8* bitcast (void()* @alloc_func to i8*), i8* %fp, i32 0)
+ %a = bitcast i8* %a.i8 to i32*
+ %a.val = load i32, i32* %a
+ call i32 (i8*, ...) @printf(i8* getelementptr ([10 x i8], [10 x i8]* @str, i32 0, i32 0), i32 %a.val)
+ %b.i8 = call i8* @llvm.framerecover(i8* bitcast (void()* @alloc_func to i8*), i8* %fp, i32 1)
+ %b = bitcast i8* %b.i8 to i32*
+ %b.val = load i32, i32* %b
+ call i32 (i8*, ...) @printf(i8* getelementptr ([10 x i8], [10 x i8]* @str, i32 0, i32 0), i32 %b.val)
+ store i32 42, i32* %b
+ %b2 = getelementptr i32, i32* %b, i32 1
+ %b2.val = load i32, i32* %b2
+ call i32 (i8*, ...) @printf(i8* getelementptr ([10 x i8], [10 x i8]* @str, i32 0, i32 0), i32 %b2.val)
+ ret void
+}
+
+; X64-LABEL: print_framealloc_from_fp:
+; X64: movq %rcx, %[[parent_fp:[a-z]+]]
+; X64: movl .Lalloc_func$frame_escape_0(%[[parent_fp]]), %edx
+; X64: leaq {{.*}}(%rip), %[[str:[a-z]+]]
+; X64: movq %[[str]], %rcx
+; X64: callq printf
+; X64: movl .Lalloc_func$frame_escape_1(%[[parent_fp]]), %edx
+; X64: movq %[[str]], %rcx
+; X64: callq printf
+; X64: movl $42, .Lalloc_func$frame_escape_1(%[[parent_fp]])
+; X64: retq
+
+; X86-LABEL: print_framealloc_from_fp:
+; X86: pushl %esi
+; X86: subl $8, %esp
+; X86: movl 16(%esp), %esi
+; X86: movl Lalloc_func$frame_escape_0(%esi), %eax
+; X86: movl %eax, 4(%esp)
+; X86: movl $_str, (%esp)
+; X86: calll _printf
+; X86: movl Lalloc_func$frame_escape_1(%esi), %eax
+; X86: movl %eax, 4(%esp)
+; X86: movl $_str, (%esp)
+; X86: calll _printf
+; X86: movl $42, Lalloc_func$frame_escape_1(%esi)
+; X86: movl $4, %eax
+; X86: movl Lalloc_func$frame_escape_1(%esi,%eax), %eax
+; X86: movl %eax, 4(%esp)
+; X86: movl $_str, (%esp)
+; X86: calll _printf
+; X86: addl $8, %esp
+; X86: popl %esi
+; X86: retl
+
+define void @alloc_func() {
+ %a = alloca i32
+ %b = alloca i32, i32 2
+ call void (...) @llvm.frameescape(i32* %a, i32* %b)
+ store i32 42, i32* %a
+ store i32 13, i32* %b
+ %fp = call i8* @llvm.frameaddress(i32 0)
+ call void @print_framealloc_from_fp(i8* %fp)
+ ret void
+}
+
+; X64-LABEL: alloc_func:
+; X64: subq $48, %rsp
+; X64: .seh_stackalloc 48
+; X64: leaq 48(%rsp), %rbp
+; X64: .seh_setframe 5, 48
+; X64: .Lalloc_func$frame_escape_0 = 44
+; X64: .Lalloc_func$frame_escape_1 = 36
+; X64: movl $42, -4(%rbp)
+; X64: movl $13, -12(%rbp)
+; X64: leaq -48(%rbp), %rcx
+; X64: callq print_framealloc_from_fp
+; X64: retq
+
+; X86-LABEL: alloc_func:
+; X86: pushl %ebp
+; X86: movl %esp, %ebp
+; X86: subl $16, %esp
+; X86: Lalloc_func$frame_escape_0 = -4
+; X86: Lalloc_func$frame_escape_1 = -12
+; X86: movl $42, -4(%ebp)
+; X86: movl $13, -12(%ebp)
+; X86: movl %ebp, (%esp)
+; X86: calll _print_framealloc_from_fp
+; X86: addl $16, %esp
+; X86: popl %ebp
+; X86: retl
+
+; Helper to make this a complete program so it can be compiled and tested.
+define i32 @main() {
+ call void @alloc_func()
+ ret i32 0
+}
+
+define void @alloc_func_no_frameaddr() {
+ %a = alloca i32
+ %b = alloca i32
+ call void (...) @llvm.frameescape(i32* %a, i32* %b)
+ store i32 42, i32* %a
+ store i32 13, i32* %b
+ call void @print_framealloc_from_fp(i8* null)
+ ret void
+}
+
+; X64-LABEL: alloc_func_no_frameaddr:
+; X64: subq $40, %rsp
+; X64: .seh_stackalloc 40
+; X64: .seh_endprologue
+; X64: .Lalloc_func_no_frameaddr$frame_escape_0 = 36
+; X64: .Lalloc_func_no_frameaddr$frame_escape_1 = 32
+; X64: movl $42, 36(%rsp)
+; X64: movl $13, 32(%rsp)
+; X64: xorl %ecx, %ecx
+; X64: callq print_framealloc_from_fp
+; X64: addq $40, %rsp
+; X64: retq
+
+; X86-LABEL: alloc_func_no_frameaddr:
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index cbcc62a7011a..85b2b41fa191 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -18,20 +18,20 @@ entry:
bb: ; preds = %bb, %entry
%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=5]
- %1 = getelementptr float* %A, i32 %i.03 ; <float*> [#uses=1]
- %2 = load float* %1, align 4 ; <float> [#uses=1]
- %3 = getelementptr float* %B, i32 %i.03 ; <float*> [#uses=1]
- %4 = load float* %3, align 4 ; <float> [#uses=1]
+ %1 = getelementptr float, float* %A, i32 %i.03 ; <float*> [#uses=1]
+ %2 = load float, float* %1, align 4 ; <float> [#uses=1]
+ %3 = getelementptr float, float* %B, i32 %i.03 ; <float*> [#uses=1]
+ %4 = load float, float* %3, align 4 ; <float> [#uses=1]
%5 = fadd float %2, %4 ; <float> [#uses=1]
- %6 = getelementptr float* %C, i32 %i.03 ; <float*> [#uses=1]
+ %6 = getelementptr float, float* %C, i32 %i.03 ; <float*> [#uses=1]
store float %5, float* %6, align 4
%7 = add i32 %i.03, 10 ; <i32> [#uses=3]
- %8 = getelementptr float* %A, i32 %7 ; <float*> [#uses=1]
- %9 = load float* %8, align 4 ; <float> [#uses=1]
- %10 = getelementptr float* %B, i32 %7 ; <float*> [#uses=1]
- %11 = load float* %10, align 4 ; <float> [#uses=1]
+ %8 = getelementptr float, float* %A, i32 %7 ; <float*> [#uses=1]
+ %9 = load float, float* %8, align 4 ; <float> [#uses=1]
+ %10 = getelementptr float, float* %B, i32 %7 ; <float*> [#uses=1]
+ %11 = load float, float* %10, align 4 ; <float> [#uses=1]
%12 = fadd float %9, %11 ; <float> [#uses=1]
- %13 = getelementptr float* %C, i32 %7 ; <float*> [#uses=1]
+ %13 = getelementptr float, float* %C, i32 %7 ; <float*> [#uses=1]
store float %12, float* %13, align 4
%indvar.next = add i32 %i.03, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next, %N ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/function-subtarget-features-2.ll b/test/CodeGen/X86/function-subtarget-features-2.ll
new file mode 100644
index 000000000000..d7c7c2fdb6fe
--- /dev/null
+++ b/test/CodeGen/X86/function-subtarget-features-2.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86-64 -filetype=obj -o - | llvm-objdump -d - | FileCheck %s
+
+; This test verifies that we assemble code for different architectures
+; based on target-cpu and target-features attributes.
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo() #0 {
+entry:
+ call void asm sideeffect "aeskeygenassist $$0x4, %xmm0, %xmm1", "~{dirflag},~{fpsr},~{flags}"()
+ ret void
+}
+
+; CHECK: foo
+; CHECK: aeskeygenassist
+
+define void @bar() #2 {
+entry:
+ call void asm sideeffect "crc32b 4(%rbx), %eax", "~{dirflag},~{fpsr},~{flags}"()
+ ret void
+}
+
+; CHECK: bar
+; CHECK: crc32b
+
+attributes #0 = { "target-cpu"="x86-64" "target-features"="+avx2" }
+attributes #2 = { "target-cpu"="corei7" "target-features"="+sse4.2" }
diff --git a/test/CodeGen/X86/function-subtarget-features.ll b/test/CodeGen/X86/function-subtarget-features.ll
new file mode 100644
index 000000000000..b1e2585be004
--- /dev/null
+++ b/test/CodeGen/X86/function-subtarget-features.ll
@@ -0,0 +1,81 @@
+; RUN: llc < %s -march=x86-64 -o - | FileCheck %s
+
+; This test verifies that we produce different code for different architectures
+; based on target-cpu and target-features attributes.
+; In this case avx has a vmovss instruction and otherwise we should be using movss
+; to materialize constants.
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define float @_Z3barv() #0 {
+entry:
+ ret float 4.000000e+00
+}
+
+; CHECK: barv
+; CHECK: vmovss
+
+define float @_Z4testv() #1 {
+entry:
+ ret float 1.000000e+00
+}
+
+; CHECK: testv
+; CHECK: movss
+
+define float @_Z3foov() #2 {
+entry:
+ ret float 4.000000e+00
+}
+
+; CHECK: foov
+; CHECK: movss
+
+define float @_Z3bazv() #0 {
+entry:
+ ret float 4.000000e+00
+}
+
+; CHECK: bazv
+; CHECK: vmovss
+
+define <2 x i64> @foo(<2 x i64> %a) #3 {
+entry:
+ %a.addr = alloca <2 x i64>, align 16
+ store <2 x i64> %a, <2 x i64>* %a.addr, align 16
+ %0 = load <2 x i64>, <2 x i64>* %a.addr, align 16
+ %1 = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %0, i8 4)
+ ret <2 x i64> %1
+}
+
+; Function Attrs: nounwind readnone
+declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8)
+
+; CHECK: foo
+; CHECK: aeskeygenassist
+
+; Function Attrs: nounwind uwtable
+define i32 @bar(i32 %crc, i8* %a) #3 {
+entry:
+ %crc.addr = alloca i32, align 4
+ %a.addr = alloca i8*, align 8
+ store i32 %crc, i32* %crc.addr, align 4
+ store i8* %a, i8** %a.addr, align 8
+ %0 = load i32, i32* %crc.addr, align 4
+ %1 = load i8*, i8** %a.addr, align 8
+ %incdec.ptr = getelementptr inbounds i8, i8* %1, i32 1
+ store i8* %incdec.ptr, i8** %a.addr, align 8
+ %2 = load i8, i8* %1, align 1
+ %3 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %0, i8 %2)
+ ret i32 %3
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8)
+
+; CHECK: bar
+; CHECK: crc32b
+
+attributes #0 = { "target-cpu"="x86-64" "target-features"="+avx2" }
+attributes #1 = { "target-cpu"="x86-64" }
+attributes #2 = { "target-cpu"="corei7" "target-features"="+sse4.2" }
+attributes #3 = { "target-cpu"="x86-64" "target-features"="+avx2,+aes" }
diff --git a/test/CodeGen/X86/ga-offset.ll b/test/CodeGen/X86/ga-offset.ll
index 9f6d3f75cf84..934c14921e99 100644
--- a/test/CodeGen/X86/ga-offset.ll
+++ b/test/CodeGen/X86/ga-offset.ll
@@ -13,6 +13,6 @@
@dst = global [131072 x i32] zeroinitializer
define void @foo() nounwind {
- store i32* getelementptr ([131072 x i32]* @dst, i32 0, i32 16), i32** @ptr
+ store i32* getelementptr ([131072 x i32], [131072 x i32]* @dst, i32 0, i32 16), i32** @ptr
ret void
}
diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll
index 6d397b211481..f7d4eb380d57 100644
--- a/test/CodeGen/X86/gather-addresses.ll
+++ b/test/CodeGen/X86/gather-addresses.ll
@@ -35,21 +35,21 @@
; WIN: movhpd (%rcx,%r[[REG4]],8), %xmm1
define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
- %a = load <4 x i32>* %i
- %b = load <4 x i32>* %h
+ %a = load <4 x i32>, <4 x i32>* %i
+ %b = load <4 x i32>, <4 x i32>* %h
%j = and <4 x i32> %a, %b
%d0 = extractelement <4 x i32> %j, i32 0
%d1 = extractelement <4 x i32> %j, i32 1
%d2 = extractelement <4 x i32> %j, i32 2
%d3 = extractelement <4 x i32> %j, i32 3
- %q0 = getelementptr double* %p, i32 %d0
- %q1 = getelementptr double* %p, i32 %d1
- %q2 = getelementptr double* %p, i32 %d2
- %q3 = getelementptr double* %p, i32 %d3
- %r0 = load double* %q0
- %r1 = load double* %q1
- %r2 = load double* %q2
- %r3 = load double* %q3
+ %q0 = getelementptr double, double* %p, i32 %d0
+ %q1 = getelementptr double, double* %p, i32 %d1
+ %q2 = getelementptr double, double* %p, i32 %d2
+ %q3 = getelementptr double, double* %p, i32 %d3
+ %r0 = load double, double* %q0
+ %r1 = load double, double* %q1
+ %r2 = load double, double* %q2
+ %r3 = load double, double* %q3
%v0 = insertelement <4 x double> undef, double %r0, i32 0
%v1 = insertelement <4 x double> %v0, double %r1, i32 1
%v2 = insertelement <4 x double> %v1, double %r2, i32 2
@@ -67,8 +67,8 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
; LIN32-DAG: {{(mov|and)}}l 8(%esp),
; LIN32-DAG: {{(mov|and)}}l 12(%esp),
define <4 x i64> @old(double* %p, <4 x i32>* %i, <4 x i32>* %h, i64 %f) nounwind {
- %a = load <4 x i32>* %i
- %b = load <4 x i32>* %h
+ %a = load <4 x i32>, <4 x i32>* %i
+ %b = load <4 x i32>, <4 x i32>* %h
%j = and <4 x i32> %a, %b
%d0 = extractelement <4 x i32> %j, i32 0
%d1 = extractelement <4 x i32> %j, i32 1
diff --git a/test/CodeGen/X86/gcc_except_table.ll b/test/CodeGen/X86/gcc_except_table.ll
index a732eb1efbd7..b656dc9d68e2 100644
--- a/test/CodeGen/X86/gcc_except_table.ll
+++ b/test/CodeGen/X86/gcc_except_table.ll
@@ -15,7 +15,7 @@ define i32 @main() uwtable optsize ssp {
; MINGW64: .seh_proc
; MINGW64: .seh_handler __gxx_personality_v0
-; MINGW64: .seh_setframe 5, 0
+; MINGW64: .seh_setframe 5, 32
; MINGW64: callq _Unwind_Resume
; MINGW64: .seh_handlerdata
; MINGW64: GCC_except_table0:
@@ -37,6 +37,7 @@ entry:
lpad:
%0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
catch i8* bitcast (i8** @_ZTIi to i8*)
br label %eh.resume
diff --git a/test/CodeGen/X86/gcc_except_table_functions.ll b/test/CodeGen/X86/gcc_except_table_functions.ll
index 4a8168050e56..7a64a01fa38d 100644
--- a/test/CodeGen/X86/gcc_except_table_functions.ll
+++ b/test/CodeGen/X86/gcc_except_table_functions.ll
@@ -20,6 +20,7 @@ try.cont:
lpad:
%0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
catch i8* bitcast (void ()* @filt0 to i8*)
catch i8* bitcast (void ()* @filt1 to i8*)
%sel = extractvalue { i8*, i32 } %0, 1
diff --git a/test/CodeGen/X86/getelementptr.ll b/test/CodeGen/X86/getelementptr.ll
index f403212700a6..e260e7d7b74a 100644
--- a/test/CodeGen/X86/getelementptr.ll
+++ b/test/CodeGen/X86/getelementptr.ll
@@ -9,35 +9,35 @@
define i8* @test_trunc65(i8* %ptr) nounwind {
; CHECK-LABEL: test_trunc65
; CHECK: 3
- %d = getelementptr i8* %ptr, i65 18446744073709551619 ; 2^64 + 3
+ %d = getelementptr i8, i8* %ptr, i65 18446744073709551619 ; 2^64 + 3
ret i8* %d
}
define i8* @test_trunc128(i8* %ptr) nounwind {
; CHECK-LABEL: test_trunc128
; CHECK: 5
- %d = getelementptr i8* %ptr, i128 18446744073709551621 ; 2^64 + 5
+ %d = getelementptr i8, i8* %ptr, i128 18446744073709551621 ; 2^64 + 5
ret i8* %d
}
define i8* @test_trunc160(i8* %ptr) nounwind {
; CHECK-LABEL: test_trunc160
; CHECK: 8
- %d = getelementptr i8* %ptr, i160 18446744073709551624 ; 2^64 + 8
+ %d = getelementptr i8, i8* %ptr, i160 18446744073709551624 ; 2^64 + 8
ret i8* %d
}
define i8* @test_trunc256(i8* %ptr) nounwind {
; CHECK-LABEL: test_trunc256
; CHECK: 13
- %d = getelementptr i8* %ptr, i256 18446744073709551629 ; 2^64 + 13
+ %d = getelementptr i8, i8* %ptr, i256 18446744073709551629 ; 2^64 + 13
ret i8* %d
}
define i8* @test_trunc2048(i8* %ptr) nounwind {
; CHECK-LABEL: test_trunc2048
; CHECK: 21
- %d = getelementptr i8* %ptr, i2048 18446744073709551637 ; 2^64 + 21
+ %d = getelementptr i8, i8* %ptr, i2048 18446744073709551637 ; 2^64 + 21
ret i8* %d
}
@@ -47,34 +47,34 @@ define i8* @test_trunc2048(i8* %ptr) nounwind {
define i8* @test_sext3(i8* %ptr) nounwind {
; CHECK-LABEL: test_sext3
; CHECK: -3
- %d = getelementptr i8* %ptr, i3 -3
+ %d = getelementptr i8, i8* %ptr, i3 -3
ret i8* %d
}
define i8* @test_sext5(i8* %ptr) nounwind {
; CHECK-LABEL: test_sext5
; CHECK: -5
- %d = getelementptr i8* %ptr, i5 -5
+ %d = getelementptr i8, i8* %ptr, i5 -5
ret i8* %d
}
define i8* @test_sext8(i8* %ptr) nounwind {
; CHECK-LABEL: test_sext8
; CHECK: -8
- %d = getelementptr i8* %ptr, i8 -8
+ %d = getelementptr i8, i8* %ptr, i8 -8
ret i8* %d
}
define i8* @test_sext13(i8* %ptr) nounwind {
; CHECK-LABEL: test_sext13
; CHECK: -13
- %d = getelementptr i8* %ptr, i8 -13
+ %d = getelementptr i8, i8* %ptr, i8 -13
ret i8* %d
}
define i8* @test_sext16(i8* %ptr) nounwind {
; CHECK-LABEL: test_sext16
; CHECK: -21
- %d = getelementptr i8* %ptr, i8 -21
+ %d = getelementptr i8, i8* %ptr, i8 -21
ret i8* %d
}
diff --git a/test/CodeGen/X86/ghc-cc.ll b/test/CodeGen/X86/ghc-cc.ll
index 3ada8c8ce98e..16e4db60502d 100644
--- a/test/CodeGen/X86/ghc-cc.ll
+++ b/test/CodeGen/X86/ghc-cc.ll
@@ -32,10 +32,10 @@ entry:
; CHECK-NEXT: movl hp, %edi
; CHECK-NEXT: movl sp, %ebp
; CHECK-NEXT: movl base, %ebx
- %0 = load i32* @r1
- %1 = load i32* @hp
- %2 = load i32* @sp
- %3 = load i32* @base
+ %0 = load i32, i32* @r1
+ %1 = load i32, i32* @hp
+ %2 = load i32, i32* @sp
+ %3 = load i32, i32* @base
; CHECK: jmp bar
tail call ghccc void @bar( i32 %3, i32 %2, i32 %1, i32 %0 ) nounwind
ret void
diff --git a/test/CodeGen/X86/ghc-cc64.ll b/test/CodeGen/X86/ghc-cc64.ll
index 7251dd673b30..c4ce8cfdef13 100644
--- a/test/CodeGen/X86/ghc-cc64.ll
+++ b/test/CodeGen/X86/ghc-cc64.ll
@@ -57,22 +57,22 @@ entry:
; CHECK-NEXT: movq hp(%rip), %r12
; CHECK-NEXT: movq sp(%rip), %rbp
; CHECK-NEXT: movq base(%rip), %r13
- %0 = load double* @d2
- %1 = load double* @d1
- %2 = load float* @f4
- %3 = load float* @f3
- %4 = load float* @f2
- %5 = load float* @f1
- %6 = load i64* @splim
- %7 = load i64* @r6
- %8 = load i64* @r5
- %9 = load i64* @r4
- %10 = load i64* @r3
- %11 = load i64* @r2
- %12 = load i64* @r1
- %13 = load i64* @hp
- %14 = load i64* @sp
- %15 = load i64* @base
+ %0 = load double, double* @d2
+ %1 = load double, double* @d1
+ %2 = load float, float* @f4
+ %3 = load float, float* @f3
+ %4 = load float, float* @f2
+ %5 = load float, float* @f1
+ %6 = load i64, i64* @splim
+ %7 = load i64, i64* @r6
+ %8 = load i64, i64* @r5
+ %9 = load i64, i64* @r4
+ %10 = load i64, i64* @r3
+ %11 = load i64, i64* @r2
+ %12 = load i64, i64* @r1
+ %13 = load i64, i64* @hp
+ %14 = load i64, i64* @sp
+ %15 = load i64, i64* @base
; CHECK: jmp bar
tail call ghccc void @bar( i64 %15, i64 %14, i64 %13, i64 %12, i64 %11,
i64 %10, i64 %9, i64 %8, i64 %7, i64 %6,
diff --git a/test/CodeGen/X86/global-sections-comdat.ll b/test/CodeGen/X86/global-sections-comdat.ll
new file mode 100644
index 000000000000..730050dda5f3
--- /dev/null
+++ b/test/CodeGen/X86/global-sections-comdat.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=i386-unknown-linux -data-sections -function-sections | FileCheck %s -check-prefix=LINUX-SECTIONS
+; RUN: llc < %s -mtriple=i386-unknown-linux -data-sections -function-sections -unique-section-names=false | FileCheck %s -check-prefix=LINUX-SECTIONS-SHORT
+
+$F1 = comdat any
+define void @F1(i32 %y) comdat {
+bb0:
+switch i32 %y, label %bb5 [
+ i32 1, label %bb1
+ i32 2, label %bb2
+ i32 3, label %bb3
+ i32 4, label %bb4
+ ]
+bb1:
+ ret void
+bb2:
+ ret void
+bb3:
+ ret void
+bb4:
+ ret void
+bb5:
+ ret void
+}
+
+; LINUX: .section .text.F1,"axG",@progbits,F1,comdat
+; LINUX: .size F1,
+; LINUX-NEXT: .cfi_endproc
+; LINUX-NEXT: .section .rodata.F1,"aG",@progbits,F1,comdat
+
+; LINUX-SECTIONS: .section .text.F1,"axG",@progbits,F1,comdat
+; LINUX-SECTIONS: .size F1,
+; LINUX-SECTIONS-NEXT: .cfi_endproc
+; LINUX-SECTIONS-NEXT: .section .rodata.F1,"aG",@progbits,F1,comdat
+
+; LINUX-SECTIONS-SHORT: .section .text,"axG",@progbits,F1,comdat
+; LINUX-SECTIONS-SHORT: .size F1,
+; LINUX-SECTIONS-SHORT-NEXT: .cfi_endproc
+; LINUX-SECTIONS-SHORT-NEXT: .section .rodata,"aG",@progbits,F1,comdat
+
+$G16 = comdat any
+@G16 = unnamed_addr constant i32 42, comdat
+
+; LINUX: .section .rodata.cst4.G16,"aGM",@progbits,4,G16,comdat
+; LINUX-SECTIONS: .section .rodata.cst4.G16,"aGM",@progbits,4,G16,comdat
+; LINUX-SECTIONS-SHORT: .section .rodata.cst4,"aGM",@progbits,4,G16,comdat
diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll
index d6e45ad79ea9..8c61411e53eb 100644
--- a/test/CodeGen/X86/global-sections.ll
+++ b/test/CodeGen/X86/global-sections.ll
@@ -2,8 +2,11 @@
; RUN: llc < %s -mtriple=i386-apple-darwin9.7 | FileCheck %s -check-prefix=DARWIN
; RUN: llc < %s -mtriple=i386-apple-darwin10 -relocation-model=static | FileCheck %s -check-prefix=DARWIN-STATIC
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=DARWIN64
-; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -data-sections | FileCheck %s -check-prefix=LINUX-SECTIONS
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -data-sections -function-sections | FileCheck %s -check-prefix=LINUX-SECTIONS
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -function-sections | FileCheck %s -check-prefix=LINUX-FUNC-SECTIONS
+; RUN: llc < %s -mtriple=x86_64-pc-linux -data-sections -function-sections -relocation-model=pic | FileCheck %s -check-prefix=LINUX-SECTIONS-PIC
; RUN: llc < %s -mtriple=i686-pc-win32 -data-sections -function-sections | FileCheck %s -check-prefix=WIN32-SECTIONS
+; RUN: llc < %s -mtriple=i686-pc-win32 -function-sections | FileCheck %s -check-prefix=WIN32-FUNC-SECTIONS
define void @F1() {
ret void
@@ -12,6 +15,91 @@ define void @F1() {
; WIN32-SECTIONS: .section .text,"xr",one_only,_F1
; WIN32-SECTIONS: .globl _F1
+define void @F2(i32 %y) {
+bb0:
+switch i32 %y, label %bb5 [
+ i32 1, label %bb1
+ i32 2, label %bb2
+ i32 3, label %bb3
+ i32 4, label %bb4
+ ]
+bb1:
+ ret void
+bb2:
+ ret void
+bb3:
+ ret void
+bb4:
+ ret void
+bb5:
+ ret void
+}
+
+; LINUX: .size F2,
+; LINUX-NEX: .cfi_endproc
+; LINUX-NEX: .section .rodata,"a",@progbits
+
+; LINUX-SECTIONS: .section .text.F2,"ax",@progbits
+; LINUX-SECTIONS: .size F2,
+; LINUX-SECTIONS-NEXT: .cfi_endproc
+; LINUX-SECTIONS-NEXT: .section .rodata.F2,"a",@progbits
+
+; LINUX-FUNC-SECTIONS: .section .text.F2,"ax",@progbits
+; LINUX-FUNC-SECTIONS: .size F2,
+; LINUX-FUNC-SECTIONS-NEXT: .cfi_endproc
+; LINUX-FUNC-SECTIONS-NEXT: .section .rodata.F2,"a",@progbits
+
+; WIN32-FUNC-SECTIONS: .section .text,"xr",one_only,_F2
+; WIN32-FUNC-SECTIONS-NOT: .section
+; WIN32-FUNC-SECTIONS: .section .rdata,"dr",associative,_F2
+
+
+; LINUX-SECTIONS-PIC: .section .text.F2,"ax",@progbits
+; LINUX-SECTIONS-PIC: .size F2,
+; LINUX-SECTIONS-PIC-NEXT: .cfi_endproc
+; LINUX-SECTIONS-PIC-NEXT: .section .rodata.F2,"a",@progbits
+
+declare void @G()
+
+define void @F3(i32 %y) {
+bb0:
+ invoke void @G()
+ to label %bb2 unwind label %bb1
+bb1:
+ landingpad { i8*, i32 } personality i8* bitcast (void ()* @G to i8*)
+ catch i8* null
+ br label %bb2
+bb2:
+
+switch i32 %y, label %bb7 [
+ i32 1, label %bb3
+ i32 2, label %bb4
+ i32 3, label %bb5
+ i32 4, label %bb6
+ ]
+bb3:
+ ret void
+bb4:
+ ret void
+bb5:
+ ret void
+bb6:
+ ret void
+bb7:
+ ret void
+}
+
+; DARWIN64: _F3:
+; DARWIN64: Lfunc_end
+; DARWIN64-NEXT: .cfi_endproc
+; DARWIN64-NOT: .section
+; DARWIN64: LJTI{{.*}}:
+; DARWIN64-NEXT: .long
+; DARWIN64-NEXT: .long
+; DARWIN64-NEXT: .long
+; DARWIN64-NEXT: .long
+; DARWIN64-NEXT: .section __TEXT,__gcc_except_tab
+
; int G1;
@G1 = common global i32 0
@@ -85,7 +173,6 @@ define void @F1() {
@"foo bar" = linkonce global i32 42
; LINUX: .type "foo bar",@object
-; LINUX: .section ".data.foo bar","aGw",@progbits,"foo bar",comdat
; LINUX: .weak "foo bar"
; LINUX: "foo bar":
@@ -98,7 +185,6 @@ define void @F1() {
@G6 = weak_odr unnamed_addr constant [1 x i8] c"\01"
; LINUX: .type G6,@object
-; LINUX: .section .rodata.G6,"aG",@progbits,G6,comdat
; LINUX: .weak G6
; LINUX: G6:
; LINUX: .byte 1
@@ -123,7 +209,7 @@ define void @F1() {
; LINUX: G7:
; LINUX: .asciz "abcdefghi"
-; LINUX-SECTIONS: .section .rodata.G7,"aMS",@progbits,1
+; LINUX-SECTIONS: .section .rodata.str1.1,"aMS",@progbits,1
; LINUX-SECTIONS: .globl G7
; WIN32-SECTIONS: .section .rdata,"dr",one_only,_G7
@@ -184,13 +270,13 @@ define void @F1() {
@G14 = private unnamed_addr constant [4 x i8] c"foo\00", align 1
; LINUX-SECTIONS: .type .LG14,@object # @G14
-; LINUX-SECTIONS: .section .rodata..LG14,"aMS",@progbits,1
+; LINUX-SECTIONS: .section .rodata.str1.1,"aMS",@progbits,1
; LINUX-SECTIONS: .LG14:
; LINUX-SECTIONS: .asciz "foo"
; LINUX-SECTIONS: .size .LG14, 4
-; WIN32-SECTIONS: .section .rdata,"dr"
-; WIN32-SECTIONS: L_G14:
+; WIN32-SECTIONS: .section .rdata,"dr",one_only,_G14
+; WIN32-SECTIONS: _G14:
; WIN32-SECTIONS: .asciz "foo"
; cannot be merged on MachO, but can on other formats.
@@ -208,7 +294,7 @@ define void @F1() {
; DARWIN64: .section __TEXT,__const
; DARWIN64: _G15:
-; LINUX-SECTIONS: .section .rodata.G15,"aM",@progbits,8
+; LINUX-SECTIONS: .section .rodata.cst8,"aM",@progbits,8
; LINUX-SECTIONS: G15:
; WIN32-SECTIONS: .section .rdata,"dr",one_only,_G15
diff --git a/test/CodeGen/X86/gs-fold.ll b/test/CodeGen/X86/gs-fold.ll
index dbec76ba52c4..bbdd0339f366 100644
--- a/test/CodeGen/X86/gs-fold.ll
+++ b/test/CodeGen/X86/gs-fold.ll
@@ -6,9 +6,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
define i32 @test() nounwind uwtable {
entry:
- %0 = load volatile %struct.thread* addrspace(256)* null
- %c = getelementptr inbounds %struct.thread* %0, i64 0, i32 2
- %1 = load i32* %c, align 4
+ %0 = load volatile %struct.thread*, %struct.thread* addrspace(256)* null
+ %c = getelementptr inbounds %struct.thread, %struct.thread* %0, i64 0, i32 2
+ %1 = load i32, i32* %c, align 4
ret i32 %1
}
diff --git a/test/CodeGen/X86/h-register-addressing-32.ll b/test/CodeGen/X86/h-register-addressing-32.ll
index 68e8c605f678..d0214137b0e4 100644
--- a/test/CodeGen/X86/h-register-addressing-32.ll
+++ b/test/CodeGen/X86/h-register-addressing-32.ll
@@ -5,8 +5,8 @@
define double @foo8(double* nocapture inreg %p, i32 inreg %x) nounwind readonly {
%t0 = lshr i32 %x, 8
%t1 = and i32 %t0, 255
- %t2 = getelementptr double* %p, i32 %t1
- %t3 = load double* %t2, align 8
+ %t2 = getelementptr double, double* %p, i32 %t1
+ %t3 = load double, double* %t2, align 8
ret double %t3
}
; CHECK: foo8:
@@ -15,8 +15,8 @@ define double @foo8(double* nocapture inreg %p, i32 inreg %x) nounwind readonly
define float @foo4(float* nocapture inreg %p, i32 inreg %x) nounwind readonly {
%t0 = lshr i32 %x, 8
%t1 = and i32 %t0, 255
- %t2 = getelementptr float* %p, i32 %t1
- %t3 = load float* %t2, align 8
+ %t2 = getelementptr float, float* %p, i32 %t1
+ %t3 = load float, float* %t2, align 8
ret float %t3
}
; CHECK: foo4:
@@ -25,8 +25,8 @@ define float @foo4(float* nocapture inreg %p, i32 inreg %x) nounwind readonly {
define i16 @foo2(i16* nocapture inreg %p, i32 inreg %x) nounwind readonly {
%t0 = lshr i32 %x, 8
%t1 = and i32 %t0, 255
- %t2 = getelementptr i16* %p, i32 %t1
- %t3 = load i16* %t2, align 8
+ %t2 = getelementptr i16, i16* %p, i32 %t1
+ %t3 = load i16, i16* %t2, align 8
ret i16 %t3
}
; CHECK: foo2:
@@ -35,8 +35,8 @@ define i16 @foo2(i16* nocapture inreg %p, i32 inreg %x) nounwind readonly {
define i8 @foo1(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
%t0 = lshr i32 %x, 8
%t1 = and i32 %t0, 255
- %t2 = getelementptr i8* %p, i32 %t1
- %t3 = load i8* %t2, align 8
+ %t2 = getelementptr i8, i8* %p, i32 %t1
+ %t3 = load i8, i8* %t2, align 8
ret i8 %t3
}
; CHECK: foo1:
@@ -45,8 +45,8 @@ define i8 @foo1(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
define i8 @bar8(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
%t0 = lshr i32 %x, 5
%t1 = and i32 %t0, 2040
- %t2 = getelementptr i8* %p, i32 %t1
- %t3 = load i8* %t2, align 8
+ %t2 = getelementptr i8, i8* %p, i32 %t1
+ %t3 = load i8, i8* %t2, align 8
ret i8 %t3
}
; CHECK: bar8:
@@ -55,8 +55,8 @@ define i8 @bar8(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
define i8 @bar4(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
%t0 = lshr i32 %x, 6
%t1 = and i32 %t0, 1020
- %t2 = getelementptr i8* %p, i32 %t1
- %t3 = load i8* %t2, align 8
+ %t2 = getelementptr i8, i8* %p, i32 %t1
+ %t3 = load i8, i8* %t2, align 8
ret i8 %t3
}
; CHECK: bar4:
@@ -65,8 +65,8 @@ define i8 @bar4(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
define i8 @bar2(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
%t0 = lshr i32 %x, 7
%t1 = and i32 %t0, 510
- %t2 = getelementptr i8* %p, i32 %t1
- %t3 = load i8* %t2, align 8
+ %t2 = getelementptr i8, i8* %p, i32 %t1
+ %t3 = load i8, i8* %t2, align 8
ret i8 %t3
}
; CHECK: bar2:
diff --git a/test/CodeGen/X86/h-register-addressing-64.ll b/test/CodeGen/X86/h-register-addressing-64.ll
index 3f549d26c2fe..b3159f4896a8 100644
--- a/test/CodeGen/X86/h-register-addressing-64.ll
+++ b/test/CodeGen/X86/h-register-addressing-64.ll
@@ -5,8 +5,8 @@
define double @foo8(double* nocapture inreg %p, i64 inreg %x) nounwind readonly {
%t0 = lshr i64 %x, 8
%t1 = and i64 %t0, 255
- %t2 = getelementptr double* %p, i64 %t1
- %t3 = load double* %t2, align 8
+ %t2 = getelementptr double, double* %p, i64 %t1
+ %t3 = load double, double* %t2, align 8
ret double %t3
}
; CHECK: foo8:
@@ -15,8 +15,8 @@ define double @foo8(double* nocapture inreg %p, i64 inreg %x) nounwind readonly
define float @foo4(float* nocapture inreg %p, i64 inreg %x) nounwind readonly {
%t0 = lshr i64 %x, 8
%t1 = and i64 %t0, 255
- %t2 = getelementptr float* %p, i64 %t1
- %t3 = load float* %t2, align 8
+ %t2 = getelementptr float, float* %p, i64 %t1
+ %t3 = load float, float* %t2, align 8
ret float %t3
}
; CHECK: foo4:
@@ -25,8 +25,8 @@ define float @foo4(float* nocapture inreg %p, i64 inreg %x) nounwind readonly {
define i16 @foo2(i16* nocapture inreg %p, i64 inreg %x) nounwind readonly {
%t0 = lshr i64 %x, 8
%t1 = and i64 %t0, 255
- %t2 = getelementptr i16* %p, i64 %t1
- %t3 = load i16* %t2, align 8
+ %t2 = getelementptr i16, i16* %p, i64 %t1
+ %t3 = load i16, i16* %t2, align 8
ret i16 %t3
}
; CHECK: foo2:
@@ -35,8 +35,8 @@ define i16 @foo2(i16* nocapture inreg %p, i64 inreg %x) nounwind readonly {
define i8 @foo1(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
%t0 = lshr i64 %x, 8
%t1 = and i64 %t0, 255
- %t2 = getelementptr i8* %p, i64 %t1
- %t3 = load i8* %t2, align 8
+ %t2 = getelementptr i8, i8* %p, i64 %t1
+ %t3 = load i8, i8* %t2, align 8
ret i8 %t3
}
; CHECK: foo1:
@@ -45,8 +45,8 @@ define i8 @foo1(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
define i8 @bar8(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
%t0 = lshr i64 %x, 5
%t1 = and i64 %t0, 2040
- %t2 = getelementptr i8* %p, i64 %t1
- %t3 = load i8* %t2, align 8
+ %t2 = getelementptr i8, i8* %p, i64 %t1
+ %t3 = load i8, i8* %t2, align 8
ret i8 %t3
}
; CHECK: bar8:
@@ -55,8 +55,8 @@ define i8 @bar8(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
define i8 @bar4(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
%t0 = lshr i64 %x, 6
%t1 = and i64 %t0, 1020
- %t2 = getelementptr i8* %p, i64 %t1
- %t3 = load i8* %t2, align 8
+ %t2 = getelementptr i8, i8* %p, i64 %t1
+ %t3 = load i8, i8* %t2, align 8
ret i8 %t3
}
; CHECK: bar4:
@@ -65,8 +65,8 @@ define i8 @bar4(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
define i8 @bar2(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
%t0 = lshr i64 %x, 7
%t1 = and i64 %t0, 510
- %t2 = getelementptr i8* %p, i64 %t1
- %t3 = load i8* %t2, align 8
+ %t2 = getelementptr i8, i8* %p, i64 %t1
+ %t3 = load i8, i8* %t2, align 8
ret i8 %t3
}
; CHECK: bar2:
diff --git a/test/CodeGen/X86/h-registers-2.ll b/test/CodeGen/X86/h-registers-2.ll
index 91acb7d5bb1c..d244ab48a2cd 100644
--- a/test/CodeGen/X86/h-registers-2.ll
+++ b/test/CodeGen/X86/h-registers-2.ll
@@ -14,7 +14,7 @@ define i32 @foo(i8* %x, i32 %y) nounwind {
%t0 = lshr i32 %y, 8 ; <i32> [#uses=1]
%t1 = and i32 %t0, 255 ; <i32> [#uses=2]
%t2 = shl i32 %t1, 3
- %t3 = getelementptr i8* %x, i32 %t2 ; <i8*> [#uses=1]
+ %t3 = getelementptr i8, i8* %x, i32 %t2 ; <i8*> [#uses=1]
store i8 77, i8* %t3, align 4
ret i32 %t2
}
diff --git a/test/CodeGen/X86/h-registers-3.ll b/test/CodeGen/X86/h-registers-3.ll
index 8a0b07b31c27..29d0c280c4fb 100644
--- a/test/CodeGen/X86/h-registers-3.ll
+++ b/test/CodeGen/X86/h-registers-3.ll
@@ -3,7 +3,7 @@
define zeroext i8 @foo() nounwind ssp {
entry:
- %0 = tail call zeroext i16 (...)* @bar() nounwind
+ %0 = tail call zeroext i16 (...) @bar() nounwind
%1 = lshr i16 %0, 8
%2 = trunc i16 %1 to i8
ret i8 %2
diff --git a/test/CodeGen/X86/haddsub-2.ll b/test/CodeGen/X86/haddsub-2.ll
index ff939a99427e..c6bac5858807 100644
--- a/test/CodeGen/X86/haddsub-2.ll
+++ b/test/CodeGen/X86/haddsub-2.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse3 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE3
; RUN: llc < %s -march=x86-64 -mattr=+sse2,+sse3,+ssse3 | FileCheck %s -check-prefix=CHECK -check-prefix=SSSE3
-; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
-; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+; RUN: llc < %s -march=x86-64 -mattr=+avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
diff --git a/test/CodeGen/X86/haddsub-undef.ll b/test/CodeGen/X86/haddsub-undef.ll
index 954a9d994e61..dfe5fff72d07 100644
--- a/test/CodeGen/X86/haddsub-undef.ll
+++ b/test/CodeGen/X86/haddsub-undef.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+ssse3 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
-; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
-; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+; RUN: llc < %s -march=x86-64 -mattr=ssse3 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
+; RUN: llc < %s -march=x86-64 -mattr=avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+; RUN: llc < %s -march=x86-64 -mattr=avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
; Verify that we correctly fold horizontal binop even in the presence of UNDEFs.
diff --git a/test/CodeGen/X86/haddsub.ll b/test/CodeGen/X86/haddsub.ll
index 9feb5f6ea6e7..6e65c6c739ca 100644
--- a/test/CodeGen/X86/haddsub.ll
+++ b/test/CodeGen/X86/haddsub.ll
@@ -283,3 +283,18 @@ define <4 x double> @vhsubpd1(<4 x double> %x, <4 x double> %y) {
%r = fsub <4 x double> %a, %b
ret <4 x double> %r
}
+
+; CHECK-LABEL: haddps_v2f32
+; CHECK: haddps %xmm{{[0-9]+}}, %xmm0
+; CHECK-NEXT: retq
+define <2 x float> @haddps_v2f32(<4 x float> %v0) {
+ %v0.0 = extractelement <4 x float> %v0, i32 0
+ %v0.1 = extractelement <4 x float> %v0, i32 1
+ %v0.2 = extractelement <4 x float> %v0, i32 2
+ %v0.3 = extractelement <4 x float> %v0, i32 3
+ %op0 = fadd float %v0.0, %v0.1
+ %op1 = fadd float %v0.2, %v0.3
+ %res0 = insertelement <2 x float> undef, float %op0, i32 0
+ %res1 = insertelement <2 x float> %res0, float %op1, i32 1
+ ret <2 x float> %res1
+}
diff --git a/test/CodeGen/X86/half.ll b/test/CodeGen/X86/half.ll
index 1dcf93939b8b..8a726370f19a 100644
--- a/test/CodeGen/X86/half.ll
+++ b/test/CodeGen/X86/half.ll
@@ -1,11 +1,13 @@
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LIBCALL
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-F16C
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c -asm-verbose=false \
+; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LIBCALL
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c -asm-verbose=false \
+; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-F16C
define void @test_load_store(half* %in, half* %out) {
; CHECK-LABEL: test_load_store:
; CHECK: movw (%rdi), [[TMP:%[a-z0-9]+]]
; CHECK: movw [[TMP]], (%rsi)
- %val = load half* %in
+ %val = load half, half* %in
store half %val, half* %out
ret void
}
@@ -13,7 +15,7 @@ define void @test_load_store(half* %in, half* %out) {
define i16 @test_bitcast_from_half(half* %addr) {
; CHECK-LABEL: test_bitcast_from_half:
; CHECK: movzwl (%rdi), %eax
- %val = load half* %addr
+ %val = load half, half* %addr
%val_int = bitcast half %val to i16
ret i16 %val_int
}
@@ -30,8 +32,8 @@ define float @test_extend32(half* %addr) {
; CHECK-LABEL: test_extend32:
; CHECK-LIBCALL: jmp __gnu_h2f_ieee
-; CHECK-FP16: vcvtph2ps
- %val16 = load half* %addr
+; CHECK-F16C: vcvtph2ps
+ %val16 = load half, half* %addr
%val32 = fpext half %val16 to float
ret float %val32
}
@@ -41,9 +43,9 @@ define double @test_extend64(half* %addr) {
; CHECK-LIBCALL: callq __gnu_h2f_ieee
; CHECK-LIBCALL: cvtss2sd
-; CHECK-FP16: vcvtph2ps
-; CHECK-FP16: vcvtss2sd
- %val16 = load half* %addr
+; CHECK-F16C: vcvtph2ps
+; CHECK-F16C: vcvtss2sd
+ %val16 = load half, half* %addr
%val32 = fpext half %val16 to double
ret double %val32
}
@@ -52,7 +54,7 @@ define void @test_trunc32(float %in, half* %addr) {
; CHECK-LABEL: test_trunc32:
; CHECK-LIBCALL: callq __gnu_f2h_ieee
-; CHECK-FP16: vcvtps2ph
+; CHECK-F16C: vcvtps2ph
%val16 = fptrunc float %in to half
store half %val16, half* %addr
ret void
@@ -62,8 +64,200 @@ define void @test_trunc64(double %in, half* %addr) {
; CHECK-LABEL: test_trunc64:
; CHECK-LIBCALL: callq __truncdfhf2
-; CHECK-FP16: callq __truncdfhf2
+; CHECK-F16C: callq __truncdfhf2
%val16 = fptrunc double %in to half
store half %val16, half* %addr
ret void
}
+
+define i64 @test_fptosi_i64(half* %p) #0 {
+; CHECK-LABEL: test_fptosi_i64:
+
+; CHECK-LIBCALL-NEXT: pushq %rax
+; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi
+; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
+; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax
+; CHECK-LIBCALL-NEXT: popq %rdx
+; CHECK-LIBCALL-NEXT: retq
+
+; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]]
+; CHECK-F16C-NEXT: vmovd [[REG0]], [[REG1:%[a-z0-9]+]]
+; CHECK-F16C-NEXT: vcvtph2ps [[REG1]], [[REG2:%[a-z0-9]+]]
+; CHECK-F16C-NEXT: vcvttss2si [[REG2]], %rax
+; CHECK-F16C-NEXT: retq
+ %a = load half, half* %p, align 2
+ %r = fptosi half %a to i64
+ ret i64 %r
+}
+
+define void @test_sitofp_i64(i64 %a, half* %p) #0 {
+; CHECK-LABEL: test_sitofp_i64:
+
+; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z]+]]
+; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]]
+; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0
+; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee
+; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]])
+; CHECK_LIBCALL-NEXT: popq [[ADDR]]
+; CHECK_LIBCALL-NEXT: retq
+
+; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG0:%[a-z0-9]+]], [[REG0]]
+; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG0]], [[REG0]]
+; CHECK-F16C-NEXT: vmovd [[REG0]], %eax
+; CHECK-F16C-NEXT: movw %ax, (%rsi)
+; CHECK-F16C-NEXT: retq
+ %r = sitofp i64 %a to half
+ store half %r, half* %p
+ ret void
+}
+
+define i64 @test_fptoui_i64(half* %p) #0 {
+; CHECK-LABEL: test_fptoui_i64:
+
+; FP_TO_UINT is expanded using FP_TO_SINT
+; CHECK-LIBCALL-NEXT: pushq %rax
+; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi
+; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
+; CHECK-LIBCALL-NEXT: movss {{.[A-Z_0-9]+}}(%rip), [[REG1:%[a-z0-9]+]]
+; CHECK-LIBCALL-NEXT: movaps %xmm0, [[REG2:%[a-z0-9]+]]
+; CHECK-LIBCALL-NEXT: subss [[REG1]], [[REG2]]
+; CHECK-LIBCALL-NEXT: cvttss2si [[REG2]], [[REG3:%[a-z0-9]+]]
+; CHECK-LIBCALL-NEXT: movabsq $-9223372036854775808, [[REG4:%[a-z0-9]+]]
+; CHECK-LIBCALL-NEXT: xorq [[REG3]], [[REG4]]
+; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, [[REG5:%[a-z0-9]+]]
+; CHECK-LIBCALL-NEXT: ucomiss [[REG1]], %xmm0
+; CHECK-LIBCALL-NEXT: cmovaeq [[REG4]], [[REG5]]
+; CHECK-LIBCALL-NEXT: popq %rdx
+; CHECK-LIBCALL-NEXT: retq
+
+; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]]
+; CHECK-F16C-NEXT: vmovd [[REG0]], [[REG1:%[a-z0-9]+]]
+; CHECK-F16C-NEXT: vcvtph2ps [[REG1]], [[REG2:%[a-z0-9]+]]
+; CHECK-F16C-NEXT: vmovss {{.[A-Z_0-9]+}}(%rip), [[REG3:%[a-z0-9]+]]
+; CHECK-F16C-NEXT: vsubss [[REG3]], [[REG2]], [[REG4:%[a-z0-9]+]]
+; CHECK-F16C-NEXT: vcvttss2si [[REG4]], [[REG5:%[a-z0-9]+]]
+; CHECK-F16C-NEXT: movabsq $-9223372036854775808, [[REG6:%[a-z0-9]+]]
+; CHECK-F16C-NEXT: xorq [[REG5]], [[REG6:%[a-z0-9]+]]
+; CHECK-F16C-NEXT: vcvttss2si [[REG2]], [[REG7:%[a-z0-9]+]]
+; CHECK-F16C-NEXT: vucomiss [[REG3]], [[REG2]]
+; CHECK-F16C-NEXT: cmovaeq [[REG6]], %rax
+; CHECK-F16C-NEXT: retq
+ %a = load half, half* %p, align 2
+ %r = fptoui half %a to i64
+ ret i64 %r
+}
+
+define void @test_uitofp_i64(i64 %a, half* %p) #0 {
+; CHECK-LABEL: test_uitofp_i64:
+; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z0-9]+]]
+; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]]
+; CHECK-NEXT: movl %edi, [[REG0:%[a-z0-9]+]]
+; CHECK-NEXT: andl $1, [[REG0]]
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: js [[LABEL1:.LBB[0-9_]+]]
+
+; simple conversion to float if non-negative
+; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, [[REG1:%[a-z0-9]+]]
+; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG1:%[a-z0-9]+]], [[REG1]]
+; CHECK-NEXT: jmp [[LABEL2:.LBB[0-9_]+]]
+
+; convert using shift+or if negative
+; CHECK-NEXT: [[LABEL1]]:
+; CHECK-NEXT: shrq %rdi
+; CHECK-NEXT: orq %rdi, [[REG2:%[a-z0-9]+]]
+; CHECK-LIBCALL-NEXT: cvtsi2ssq [[REG2]], [[REG3:%[a-z0-9]+]]
+; CHECK-LIBCALL-NEXT: addss [[REG3]], [[REG1]]
+; CHECK-F16C-NEXT: vcvtsi2ssq [[REG2]], [[REG3:%[a-z0-9]+]], [[REG3]]
+; CHECK-F16C-NEXT: vaddss [[REG3]], [[REG3]], [[REG1:[%a-z0-9]+]]
+
+; convert float to half
+; CHECK-NEXT: [[LABEL2]]:
+; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee
+; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]])
+; CHECK-LIBCALL-NEXT: popq [[ADDR]]
+; CHECK-F16C-NEXT: vcvtps2ph $0, [[REG1]], [[REG4:%[a-z0-9]+]]
+; CHECK-F16C-NEXT: vmovd [[REG4]], %eax
+; CHECK-F16C-NEXT: movw %ax, (%rsi)
+; CHECK-NEXT: retq
+
+ %r = uitofp i64 %a to half
+ store half %r, half* %p
+ ret void
+}
+
+define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 {
+; CHECK-LABEL: test_extend32_vec4:
+
+; CHECK-LIBCALL: callq __gnu_h2f_ieee
+; CHECK-LIBCALL: callq __gnu_h2f_ieee
+; CHECK-LIBCALL: callq __gnu_h2f_ieee
+; CHECK-LIBCALL: callq __gnu_h2f_ieee
+; CHECK-F16C: vcvtph2ps
+; CHECK-F16C: vcvtph2ps
+; CHECK-F16C: vcvtph2ps
+; CHECK-F16C: vcvtph2ps
+ %a = load <4 x half>, <4 x half>* %p, align 8
+ %b = fpext <4 x half> %a to <4 x float>
+ ret <4 x float> %b
+}
+
+define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 {
+; CHECK-LABEL: test_extend64_vec4
+
+; CHECK-LIBCALL: callq __gnu_h2f_ieee
+; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee
+; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee
+; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee
+; CHECK-LIBCALL-DAG: cvtss2sd
+; CHECK-LIBCALL-DAG: cvtss2sd
+; CHECK-LIBCALL-DAG: cvtss2sd
+; CHECK-LIBCALL: cvtss2sd
+; CHECK-F16C: vcvtph2ps
+; CHECK-F16C-DAG: vcvtph2ps
+; CHECK-F16C-DAG: vcvtph2ps
+; CHECK-F16C-DAG: vcvtph2ps
+; CHECK-F16C-DAG: vcvtss2sd
+; CHECK-F16C-DAG: vcvtss2sd
+; CHECK-F16C-DAG: vcvtss2sd
+; CHECK-F16C: vcvtss2sd
+ %a = load <4 x half>, <4 x half>* %p, align 8
+ %b = fpext <4 x half> %a to <4 x double>
+ ret <4 x double> %b
+}
+
+define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) {
+; CHECK-LABEL: test_trunc32_vec4:
+
+; CHECK-LIBCALL: callq __gnu_f2h_ieee
+; CHECK-LIBCALL: callq __gnu_f2h_ieee
+; CHECK-LIBCALL: callq __gnu_f2h_ieee
+; CHECK-LIBCALL: callq __gnu_f2h_ieee
+; CHECK-F16C: vcvtps2ph
+; CHECK-F16C: vcvtps2ph
+; CHECK-F16C: vcvtps2ph
+; CHECK-F16C: vcvtps2ph
+; CHECK: movw
+; CHECK: movw
+; CHECK: movw
+; CHECK: movw
+ %v = fptrunc <4 x float> %a to <4 x half>
+ store <4 x half> %v, <4 x half>* %p
+ ret void
+}
+
+define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) {
+; CHECK-LABEL: test_trunc64_vec4:
+; CHECK: callq __truncdfhf2
+; CHECK: callq __truncdfhf2
+; CHECK: callq __truncdfhf2
+; CHECK: callq __truncdfhf2
+; CHECK: movw
+; CHECK: movw
+; CHECK: movw
+; CHECK: movw
+ %v = fptrunc <4 x double> %a to <4 x half>
+ store <4 x half> %v, <4 x half>* %p
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/X86/hidden-vis-2.ll b/test/CodeGen/X86/hidden-vis-2.ll
index 74554d15e2f6..62e143d61c67 100644
--- a/test/CodeGen/X86/hidden-vis-2.ll
+++ b/test/CodeGen/X86/hidden-vis-2.ll
@@ -5,6 +5,6 @@
define i32 @t() nounwind readonly {
entry:
- %0 = load i32* @x, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* @x, align 4 ; <i32> [#uses=1]
ret i32 %0
}
diff --git a/test/CodeGen/X86/hidden-vis-3.ll b/test/CodeGen/X86/hidden-vis-3.ll
index 4be881e84d68..5d9ef44a4d49 100644
--- a/test/CodeGen/X86/hidden-vis-3.ll
+++ b/test/CodeGen/X86/hidden-vis-3.ll
@@ -12,8 +12,8 @@ entry:
; X64: _t:
; X64: movl _y(%rip), %eax
- %0 = load i32* @x, align 4 ; <i32> [#uses=1]
- %1 = load i32* @y, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* @x, align 4 ; <i32> [#uses=1]
+ %1 = load i32, i32* @y, align 4 ; <i32> [#uses=1]
%2 = add i32 %1, %0 ; <i32> [#uses=1]
ret i32 %2
}
diff --git a/test/CodeGen/X86/hidden-vis-4.ll b/test/CodeGen/X86/hidden-vis-4.ll
index 25a87b905bc1..17d44d0e42f1 100644
--- a/test/CodeGen/X86/hidden-vis-4.ll
+++ b/test/CodeGen/X86/hidden-vis-4.ll
@@ -7,6 +7,6 @@ entry:
; CHECK-LABEL: t:
; CHECK: movl _x, %eax
; CHECK: .comm _x,4
- %0 = load i32* @x, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* @x, align 4 ; <i32> [#uses=1]
ret i32 %0
}
diff --git a/test/CodeGen/X86/hidden-vis-pic.ll b/test/CodeGen/X86/hidden-vis-pic.ll
index 1caab7a6a00e..96adf621752f 100644
--- a/test/CodeGen/X86/hidden-vis-pic.ll
+++ b/test/CodeGen/X86/hidden-vis-pic.ll
@@ -28,7 +28,7 @@ entry:
define hidden void @func() nounwind ssp uwtable {
entry:
- %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+ %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
br label %return
return: ; preds = %entry
@@ -45,6 +45,6 @@ entry:
br label %return
return: ; preds = %entry
- %retval1 = load i32* %retval ; <i32> [#uses=1]
+ %retval1 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %retval1
}
diff --git a/test/CodeGen/X86/hipe-cc.ll b/test/CodeGen/X86/hipe-cc.ll
index b34417ebf69b..e3808e754228 100644
--- a/test/CodeGen/X86/hipe-cc.ll
+++ b/test/CodeGen/X86/hipe-cc.ll
@@ -53,18 +53,18 @@ entry:
; CHECK-NEXT: movl 12(%esp), %ebp
; CHECK-NEXT: movl 8(%esp), %eax
; CHECK-NEXT: movl 4(%esp), %edx
- %0 = load i32* %hp_var
- %1 = load i32* %p_var
- %2 = load i32* %arg0_var
- %3 = load i32* %arg1_var
- %4 = load i32* %arg2_var
+ %0 = load i32, i32* %hp_var
+ %1 = load i32, i32* %p_var
+ %2 = load i32, i32* %arg0_var
+ %3 = load i32, i32* %arg1_var
+ %4 = load i32, i32* %arg2_var
; CHECK: jmp bar
tail call cc 11 void @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) nounwind
ret void
}
define cc 11 void @baz() nounwind {
- %tmp_clos = load i32* @clos
+ %tmp_clos = load i32, i32* @clos
%tmp_clos2 = inttoptr i32 %tmp_clos to i32*
%indirect_call = bitcast i32* %tmp_clos2 to void (i32, i32, i32)*
; CHECK: movl $42, %eax
diff --git a/test/CodeGen/X86/hipe-cc64.ll b/test/CodeGen/X86/hipe-cc64.ll
index 27e1c723a8f7..28d90399d857 100644
--- a/test/CodeGen/X86/hipe-cc64.ll
+++ b/test/CodeGen/X86/hipe-cc64.ll
@@ -62,19 +62,19 @@ entry:
; CHECK-NEXT: movq 24(%rsp), %rsi
; CHECK-NEXT: movq 16(%rsp), %rdx
; CHECK-NEXT: movq 8(%rsp), %rcx
- %0 = load i64* %hp_var
- %1 = load i64* %p_var
- %2 = load i64* %arg0_var
- %3 = load i64* %arg1_var
- %4 = load i64* %arg2_var
- %5 = load i64* %arg3_var
+ %0 = load i64, i64* %hp_var
+ %1 = load i64, i64* %p_var
+ %2 = load i64, i64* %arg0_var
+ %3 = load i64, i64* %arg1_var
+ %4 = load i64, i64* %arg2_var
+ %5 = load i64, i64* %arg3_var
; CHECK: jmp bar
tail call cc 11 void @bar(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) nounwind
ret void
}
define cc 11 void @baz() nounwind {
- %tmp_clos = load i64* @clos
+ %tmp_clos = load i64, i64* @clos
%tmp_clos2 = inttoptr i64 %tmp_clos to i64*
%indirect_call = bitcast i64* %tmp_clos2 to void (i64, i64, i64)*
; CHECK: movl $42, %esi
diff --git a/test/CodeGen/X86/hoist-common.ll b/test/CodeGen/X86/hoist-common.ll
index 01d1b8c034e3..65f834081077 100644
--- a/test/CodeGen/X86/hoist-common.ll
+++ b/test/CodeGen/X86/hoist-common.ll
@@ -26,7 +26,7 @@ entry:
if.then:
; CHECK: callq
- %call = tail call zeroext i1 (...)* @foo() nounwind
+ %call = tail call zeroext i1 (...) @foo() nounwind
br label %return
return:
diff --git a/test/CodeGen/X86/hoist-invariant-load.ll b/test/CodeGen/X86/hoist-invariant-load.ll
index c9e52903c79e..6798c2b30c3b 100644
--- a/test/CodeGen/X86/hoist-invariant-load.ll
+++ b/test/CodeGen/X86/hoist-invariant-load.ll
@@ -5,9 +5,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-macosx10.7.2"
@"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__objc_methname,cstring_literals", align 1
-@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] [i32 0, i32 16], section "__DATA, __objc_imageinfo, regular, no_dead_strip"
-@llvm.used = appending global [3 x i8*] [i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*)], section "llvm.metadata"
+@llvm.used = appending global [3 x i8*] [i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*)], section "llvm.metadata"
define void @test(i8* %x) uwtable ssp {
entry:
@@ -15,7 +15,7 @@ entry:
for.body: ; preds = %for.body, %entry
%i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- %0 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8, !invariant.load !0
+ %0 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8, !invariant.load !0
%call = tail call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %x, i8* %0)
%inc = add i32 %i.01, 1
%exitcond = icmp eq i32 %inc, 10000
diff --git a/test/CodeGen/X86/huge-stack-offset.ll b/test/CodeGen/X86/huge-stack-offset.ll
new file mode 100644
index 000000000000..eea389b027ba
--- /dev/null
+++ b/test/CodeGen/X86/huge-stack-offset.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=x86_64-linux-unknown | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -mtriple=i386-linux-unknown | FileCheck %s --check-prefix=CHECK-32
+
+; Test that a large stack offset uses a single add/sub instruction to
+; adjust the stack pointer.
+
+define void @foo() nounwind {
+; CHECK-64-LABEL: foo:
+; CHECK-64: movabsq $50000000{{..}}, %rax
+; CHECK-64-NEXT: subq %rax, %rsp
+; CHECK-64-NOT: subq $2147483647, %rsp
+; CHECK-64: movabsq $50000000{{..}}, [[RAX:%r..]]
+; CHECK-64-NEXT: addq [[RAX]], %rsp
+
+; CHECK-32-LABEL: foo:
+; CHECK-32: movl $50000000{{..}}, %eax
+; CHECK-32-NEXT: subl %eax, %esp
+; CHECK-32-NOT: subl $2147483647, %esp
+; CHECK-32: movl $50000000{{..}}, [[EAX:%e..]]
+; CHECK-32-NEXT: addl [[EAX]], %esp
+ %1 = alloca [5000000000 x i8], align 16
+ %2 = getelementptr inbounds [5000000000 x i8], [5000000000 x i8]* %1, i32 0, i32 0
+ call void @bar(i8* %2)
+ ret void
+}
+
+; Verify that we do not clobber the return value.
+
+define i32 @foo2() nounwind {
+; CHECK-64-LABEL: foo2:
+; CHECK-64: movl $10, %eax
+; CHECK-64-NOT: movabsq ${{.*}}, %rax
+
+; CHECK-32-LABEL: foo2:
+; CHECK-32: movl $10, %eax
+; CHECK-32-NOT: movl ${{.*}}, %eax
+ %1 = alloca [5000000000 x i8], align 16
+ %2 = getelementptr inbounds [5000000000 x i8], [5000000000 x i8]* %1, i32 0, i32 0
+ call void @bar(i8* %2)
+ ret i32 10
+}
+
+; Verify that we do not clobber EAX when using inreg attribute
+
+define i32 @foo3(i32 inreg %x) nounwind {
+; CHECK-64-LABEL: foo3:
+; CHECK-64: movabsq $50000000{{..}}, %rax
+; CHECK-64-NEXT: subq %rax, %rsp
+
+; CHECK-32-LABEL: foo3:
+; CHECK-32: subl $2147483647, %esp
+; CHECK-32-NOT: movl ${{.*}}, %eax
+ %1 = alloca [5000000000 x i8], align 16
+ %2 = getelementptr inbounds [5000000000 x i8], [5000000000 x i8]* %1, i32 0, i32 0
+ call void @bar(i8* %2)
+ ret i32 %x
+}
+
+declare void @bar(i8*)
diff --git a/test/CodeGen/X86/i128-mul.ll b/test/CodeGen/X86/i128-mul.ll
index 8cfda85ce46d..21bca028888a 100644
--- a/test/CodeGen/X86/i128-mul.ll
+++ b/test/CodeGen/X86/i128-mul.ll
@@ -26,14 +26,14 @@ entry:
for.body: ; preds = %entry, %for.body
%carry.013 = phi i64 [ %conv6, %for.body ], [ 0, %entry ]
%i.012 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds i64* %x, i64 %i.012
- %0 = load i64* %arrayidx, align 8
+ %arrayidx = getelementptr inbounds i64, i64* %x, i64 %i.012
+ %0 = load i64, i64* %arrayidx, align 8
%conv2 = zext i64 %0 to i128
%mul = mul i128 %conv2, %conv
%conv3 = zext i64 %carry.013 to i128
%add = add i128 %mul, %conv3
%conv4 = trunc i128 %add to i64
- %arrayidx5 = getelementptr inbounds i64* %z, i64 %i.012
+ %arrayidx5 = getelementptr inbounds i64, i64* %z, i64 %i.012
store i64 %conv4, i64* %arrayidx5, align 8
%shr = lshr i128 %add, 64
%conv6 = trunc i128 %shr to i64
diff --git a/test/CodeGen/X86/i128-ret.ll b/test/CodeGen/X86/i128-ret.ll
index 264f07ceb4c8..1d76471225e2 100644
--- a/test/CodeGen/X86/i128-ret.ll
+++ b/test/CodeGen/X86/i128-ret.ll
@@ -4,7 +4,7 @@
; CHECK: movq 8([[A0]]), %rdx
define i128 @test(i128 *%P) {
- %A = load i128* %P
+ %A = load i128, i128* %P
ret i128 %A
}
diff --git a/test/CodeGen/X86/i1narrowfail.ll b/test/CodeGen/X86/i1narrowfail.ll
new file mode 100644
index 000000000000..4f9a75672bfc
--- /dev/null
+++ b/test/CodeGen/X86/i1narrowfail.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+; CHECK-LABEL: @foo
+; CHECK: orb $16
+define void @foo(i64* %ptr) {
+ %r11 = load i64, i64* %ptr, align 8
+ %r12 = or i64 16, %r11
+ store i64 %r12, i64* %ptr, align 8
+ ret void
+}
diff --git a/test/CodeGen/X86/i256-add.ll b/test/CodeGen/X86/i256-add.ll
index 5a7a7a7fe84a..6164d898ca11 100644
--- a/test/CodeGen/X86/i256-add.ll
+++ b/test/CodeGen/X86/i256-add.ll
@@ -3,15 +3,15 @@
; RUN: grep sbbl %t | count 7
define void @add(i256* %p, i256* %q) nounwind {
- %a = load i256* %p
- %b = load i256* %q
+ %a = load i256, i256* %p
+ %b = load i256, i256* %q
%c = add i256 %a, %b
store i256 %c, i256* %p
ret void
}
define void @sub(i256* %p, i256* %q) nounwind {
- %a = load i256* %p
- %b = load i256* %q
+ %a = load i256, i256* %p
+ %b = load i256, i256* %q
%c = sub i256 %a, %b
store i256 %c, i256* %p
ret void
diff --git a/test/CodeGen/X86/i2k.ll b/test/CodeGen/X86/i2k.ll
index 6116c2e71658..83c10a58a3a8 100644
--- a/test/CodeGen/X86/i2k.ll
+++ b/test/CodeGen/X86/i2k.ll
@@ -1,8 +1,8 @@
; RUN: llc < %s -march=x86
define void @foo(i2011* %x, i2011* %y, i2011* %p) nounwind {
- %a = load i2011* %x
- %b = load i2011* %y
+ %a = load i2011, i2011* %x
+ %b = load i2011, i2011* %y
%c = add i2011 %a, %b
store i2011 %c, i2011* %p
ret void
diff --git a/test/CodeGen/X86/i486-fence-loop.ll b/test/CodeGen/X86/i486-fence-loop.ll
index d8096197b0df..936e54eddafa 100644
--- a/test/CodeGen/X86/i486-fence-loop.ll
+++ b/test/CodeGen/X86/i486-fence-loop.ll
@@ -7,8 +7,7 @@
define void @gst_atomic_queue_push(i32* %addr) {
; CHECK-LABEL: gst_atomic_queue_push:
; CHECK: movl (%eax), [[LHS:%e[a-z]+]]
-; CHECK: lock
-; CHECK-NEXT: orl
+; CHECK: lock orl
; CHECK: movl (%eax), [[RHS:%e[a-z]+]]
; CHECK: cmpl [[LHS]], [[RHS]]
@@ -16,9 +15,9 @@ entry:
br label %while.body
while.body:
- %0 = load volatile i32* %addr, align 4
+ %0 = load volatile i32, i32* %addr, align 4
fence seq_cst
- %1 = load volatile i32* %addr, align 4
+ %1 = load volatile i32, i32* %addr, align 4
%cmp = icmp sgt i32 %1, %0
br i1 %cmp, label %while.body, label %if.then
diff --git a/test/CodeGen/X86/i64-mem-copy.ll b/test/CodeGen/X86/i64-mem-copy.ll
index bf778968c89a..69ec3fd806b4 100644
--- a/test/CodeGen/X86/i64-mem-copy.ll
+++ b/test/CodeGen/X86/i64-mem-copy.ll
@@ -1,17 +1,76 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64
-; X64: movq ({{%rsi|%rdx}}), %r
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=X32AVX
-; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s -check-prefix=X32
-; X32: movsd ({{%ecx|%eax}}), %xmm
+; Use movq or movsd to load / store i64 values if sse2 is available.
+; rdar://6659858
+
+define void @foo(i64* %x, i64* %y) {
+; X64-LABEL: foo:
+; X64: # BB#0:
+; X64-NEXT: movq (%rsi), %rax
+; X64-NEXT: movq %rax, (%rdi)
+; X64-NEXT: retq
+;
+; X32-LABEL: foo:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: movsd %xmm0, (%eax)
+; X32-NEXT: retl
+ %tmp1 = load i64, i64* %y, align 8
+ store i64 %tmp1, i64* %x, align 8
+ ret void
+}
-; Uses movsd to load / store i64 values if sse2 is available.
+; Verify that a 64-bit chunk extracted from a vector is stored with a movq
+; regardless of whether the system is 64-bit.
-; rdar://6659858
+define void @store_i64_from_vector(<8 x i16> %x, <8 x i16> %y, i64* %i) {
+; X64-LABEL: store_i64_from_vector:
+; X64: # BB#0:
+; X64-NEXT: paddw %xmm1, %xmm0
+; X64-NEXT: movq %xmm0, (%rdi)
+; X64-NEXT: retq
+;
+; X32-LABEL: store_i64_from_vector:
+; X32: # BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: paddw %xmm1, %xmm0
+; X32-NEXT: movq %xmm0, (%eax)
+; X32-NEXT: retl
+ %z = add <8 x i16> %x, %y ; force execution domain
+ %bc = bitcast <8 x i16> %z to <2 x i64>
+ %vecext = extractelement <2 x i64> %bc, i32 0
+ store i64 %vecext, i64* %i, align 8
+ ret void
+}
+
+define void @store_i64_from_vector256(<16 x i16> %x, <16 x i16> %y, i64* %i) {
+; X32AVX-LABEL: store_i64_from_vector256:
+; X32AVX: # BB#0:
+; X32AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32AVX-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; X32AVX-NEXT: vextracti128 $1, %ymm0, %xmm0
+; X32AVX-NEXT: vmovq %xmm0, (%eax)
+; X32AVX-NEXT: vzeroupper
+; X32AVX-NEXT: retl
+ %z = add <16 x i16> %x, %y ; force execution domain
+ %bc = bitcast <16 x i16> %z to <4 x i64>
+ %vecext = extractelement <4 x i64> %bc, i32 2
+ store i64 %vecext, i64* %i, align 8
+ ret void
+}
+
+; PR23476
+; Handle extraction from a non-simple / pre-legalization type.
-define void @foo(i64* %x, i64* %y) nounwind {
-entry:
- %tmp1 = load i64* %y, align 8 ; <i64> [#uses=1]
- store i64 %tmp1, i64* %x, align 8
- ret void
+define void @PR23476(<5 x i64> %in, i64* %out, i32 %index) {
+; X32-LABEL: PR23476:
+; X32: movsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: movsd %xmm0, (%eax)
+ %ext = extractelement <5 x i64> %in, i32 %index
+ store i64 %ext, i64* %out, align 8
+ ret void
}
diff --git a/test/CodeGen/X86/illegal-vector-args-return.ll b/test/CodeGen/X86/illegal-vector-args-return.ll
index 62a21f4c5aad..d783d4fa1b49 100644
--- a/test/CodeGen/X86/illegal-vector-args-return.ll
+++ b/test/CodeGen/X86/illegal-vector-args-return.ll
@@ -3,6 +3,8 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep "addps %xmm3, %xmm1"
; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=nehalem | grep "addps %xmm2, %xmm0"
+target triple = "i686-apple-darwin8"
+
define <4 x double> @foo(<4 x double> %x, <4 x double> %z) {
%y = fmul <4 x double> %x, %z
ret <4 x double> %y
diff --git a/test/CodeGen/X86/imul.ll b/test/CodeGen/X86/imul.ll
new file mode 100644
index 000000000000..c64b4e302b92
--- /dev/null
+++ b/test/CodeGen/X86/imul.ll
@@ -0,0 +1,110 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s --check-prefix=X86
+
+define i32 @mul4_32(i32 %A) {
+; X64-LABEL: mul4_32:
+; X64: leal
+; X86-LABEL: mul4_32:
+; X86: shll
+ %mul = mul i32 %A, 4
+ ret i32 %mul
+}
+
+define i64 @mul4_64(i64 %A) {
+; X64-LABEL: mul4_64:
+; X64: leaq
+; X86-LABEL: mul4_64:
+; X86: shldl
+; X86: shll
+ %mul = mul i64 %A, 4
+ ret i64 %mul
+}
+
+define i32 @mul4096_32(i32 %A) {
+; X64-LABEL: mul4096_32:
+; X64: shll
+; X86-LABEL: mul4096_32:
+; X86: shll
+ %mul = mul i32 %A, 4096
+ ret i32 %mul
+}
+
+define i64 @mul4096_64(i64 %A) {
+; X64-LABEL: mul4096_64:
+; X64: shlq
+; X86-LABEL: mul4096_64:
+; X86: shldl
+; X86: shll
+ %mul = mul i64 %A, 4096
+ ret i64 %mul
+}
+
+define i32 @mulmin4096_32(i32 %A) {
+; X64-LABEL: mulmin4096_32:
+; X64: shll
+; X64-NEXT: negl
+; X86-LABEL: mulmin4096_32:
+; X86: shll
+; X86-NEXT: negl
+ %mul = mul i32 %A, -4096
+ ret i32 %mul
+}
+
+define i64 @mulmin4096_64(i64 %A) {
+; X64-LABEL: mulmin4096_64:
+; X64: shlq
+; X64-NEXT: negq
+; X86-LABEL: mulmin4096_64:
+; X86: shldl
+; X86-NEXT: shll
+; X86-NEXT: xorl
+; X86-NEXT: negl
+; X86-NEXT: sbbl
+ %mul = mul i64 %A, -4096
+ ret i64 %mul
+}
+
+define i32 @mul3_32(i32 %A) {
+; X64-LABEL: mul3_32:
+; X64: leal
+; X86-LABEL: mul3_32:
+; But why?!
+; X86: imull
+ %mul = mul i32 %A, 3
+ ret i32 %mul
+}
+
+define i64 @mul3_64(i64 %A) {
+; X64-LABEL: mul3_64:
+; X64: leaq
+; X86-LABEL: mul3_64:
+; X86: mull
+; X86-NEXT: imull
+ %mul = mul i64 %A, 3
+ ret i64 %mul
+}
+
+define i32 @mul40_32(i32 %A) {
+; X64-LABEL: mul40_32:
+; X64: shll
+; X64-NEXT: leal
+; X86-LABEL: mul40_32:
+; X86: shll
+; X86-NEXT: leal
+ %mul = mul i32 %A, 40
+ ret i32 %mul
+}
+
+define i64 @mul40_64(i64 %A) {
+; X64-LABEL: mul40_64:
+; X64: shlq
+; X64-NEXT: leaq
+; X86-LABEL: mul40_64:
+; X86: leal
+; X86-NEXT: movl
+; X86-NEXT: mull
+; X86-NEXT: leal
+ %mul = mul i64 %A, 40
+ ret i64 %mul
+}
diff --git a/test/CodeGen/X86/imul64-lea.ll b/test/CodeGen/X86/imul64-lea.ll
deleted file mode 100644
index 047c129ddb33..000000000000
--- a/test/CodeGen/X86/imul64-lea.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 | FileCheck %s
-
-; Test that 64-bit LEAs are generated for both LP64 and ILP32 in 64-bit mode.
-declare i64 @foo64()
-
-define i64 @test64() {
- %tmp.0 = tail call i64 @foo64( )
- %tmp.1 = mul i64 %tmp.0, 9
-; CHECK-NOT: mul
-; CHECK: leaq
- ret i64 %tmp.1
-}
-
-; Test that 32-bit LEAs are generated for both LP64 and ILP32 in 64-bit mode.
-declare i32 @foo32()
-
-define i32 @test32() {
- %tmp.0 = tail call i32 @foo32( )
- %tmp.1 = mul i32 %tmp.0, 9
-; CHECK-NOT: mul
-; CHECK: leal
- ret i32 %tmp.1
-}
-
diff --git a/test/CodeGen/X86/inalloca-ctor.ll b/test/CodeGen/X86/inalloca-ctor.ll
index b1781d30f913..eba4e72f9330 100644
--- a/test/CodeGen/X86/inalloca-ctor.ll
+++ b/test/CodeGen/X86/inalloca-ctor.ll
@@ -11,7 +11,7 @@ declare void @Foo_ctor(%Foo* %this)
define void @g() {
entry:
%args = alloca inalloca %frame
- %c = getelementptr %frame* %args, i32 0, i32 2
+ %c = getelementptr %frame, %frame* %args, i32 0, i32 2
; CHECK: movl $20, %eax
; CHECK: calll __chkstk
; CHECK: movl %esp,
@@ -20,10 +20,10 @@ entry:
; CHECK-NEXT: pushl
; CHECK-NEXT: calll _Foo_ctor
; CHECK: addl $4, %esp
- %b = getelementptr %frame* %args, i32 0, i32 1
+ %b = getelementptr %frame, %frame* %args, i32 0, i32 1
store i32 42, i32* %b
; CHECK: movl $42,
- %a = getelementptr %frame* %args, i32 0, i32 0
+ %a = getelementptr %frame, %frame* %args, i32 0, i32 0
call void @Foo_ctor(%Foo* %a)
; CHECK-NEXT: pushl
; CHECK-NEXT: calll _Foo_ctor
diff --git a/test/CodeGen/X86/inalloca-invoke.ll b/test/CodeGen/X86/inalloca-invoke.ll
index b56f24d99628..cf5cbe142ec7 100644
--- a/test/CodeGen/X86/inalloca-invoke.ll
+++ b/test/CodeGen/X86/inalloca-invoke.ll
@@ -4,6 +4,7 @@
%frame.reverse = type { %Iter, %Iter }
+declare i32 @pers(...)
declare void @llvm.stackrestore(i8*)
declare i8* @llvm.stacksave()
declare void @begin(%Iter* sret)
@@ -17,12 +18,12 @@ define i32 @main() {
blah:
%inalloca.save = call i8* @llvm.stacksave()
%rev_args = alloca inalloca %frame.reverse, align 4
- %beg = getelementptr %frame.reverse* %rev_args, i32 0, i32 0
- %end = getelementptr %frame.reverse* %rev_args, i32 0, i32 1
+ %beg = getelementptr %frame.reverse, %frame.reverse* %rev_args, i32 0, i32 0
+ %end = getelementptr %frame.reverse, %frame.reverse* %rev_args, i32 0, i32 1
; CHECK: calll __chkstk
-; CHECK: movl %[[beg:[^,]*]], %esp
-; CHECK: leal 12(%[[beg]]), %[[end:[^ ]*]]
+; CHECK: movl %esp, %[[beg:[^ ]*]]
+; CHECK: leal 12(%[[beg]]), %[[end:[^ ]*]]
call void @begin(%Iter* sret %temp.lvalue)
; CHECK: calll _begin
@@ -31,7 +32,7 @@ blah:
to label %invoke.cont unwind label %lpad
; Uses end as sret param.
-; CHECK: movl %[[end]], (%esp)
+; CHECK: pushl %[[end]]
; CHECK: calll _plus
invoke.cont:
@@ -48,7 +49,7 @@ invoke.cont5: ; preds = %invoke.cont
ret i32 0
lpad: ; preds = %invoke.cont, %entry
- %lp = landingpad { i8*, i32 } personality i8* null
+ %lp = landingpad { i8*, i32 } personality i32 (...)* @pers
cleanup
unreachable
}
diff --git a/test/CodeGen/X86/inalloca-stdcall.ll b/test/CodeGen/X86/inalloca-stdcall.ll
index 65a0f77c9a6f..e5f6ea70e9cb 100644
--- a/test/CodeGen/X86/inalloca-stdcall.ll
+++ b/test/CodeGen/X86/inalloca-stdcall.ll
@@ -10,13 +10,12 @@ define void @g() {
%b = alloca inalloca %Foo
; CHECK: movl $8, %eax
; CHECK: calll __chkstk
-; CHECK: movl %[[REG:[^,]*]], %esp
- %f1 = getelementptr %Foo* %b, i32 0, i32 0
- %f2 = getelementptr %Foo* %b, i32 0, i32 1
+ %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0
+ %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
store i32 13, i32* %f1
store i32 42, i32* %f2
-; CHECK: movl $13, (%[[REG]])
-; CHECK: movl $42, 4(%[[REG]])
+; CHECK: movl $13, (%esp)
+; CHECK: movl $42, 4(%esp)
call x86_stdcallcc void @f(%Foo* inalloca %b)
; CHECK: calll _f@8
; CHECK-NOT: %esp
diff --git a/test/CodeGen/X86/inalloca.ll b/test/CodeGen/X86/inalloca.ll
index 12643f9d0d50..904366219ab7 100644
--- a/test/CodeGen/X86/inalloca.ll
+++ b/test/CodeGen/X86/inalloca.ll
@@ -10,13 +10,12 @@ entry:
%b = alloca inalloca %Foo
; CHECK: movl $8, %eax
; CHECK: calll __chkstk
-; CHECK: movl %[[REG:[^,]*]], %esp
- %f1 = getelementptr %Foo* %b, i32 0, i32 0
- %f2 = getelementptr %Foo* %b, i32 0, i32 1
+ %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0
+ %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
store i32 13, i32* %f1
store i32 42, i32* %f2
-; CHECK: movl $13, (%[[REG]])
-; CHECK: movl $42, 4(%[[REG]])
+; CHECK: movl $13, (%esp)
+; CHECK: movl $42, 4(%esp)
call void @f(%Foo* inalloca %b)
; CHECK: calll _f
ret void
@@ -30,13 +29,12 @@ entry:
%b = alloca inalloca %Foo
; CHECK: movl $8, %eax
; CHECK: calll __chkstk
-; CHECK: movl %[[REG:[^,]*]], %esp
- %f1 = getelementptr %Foo* %b, i32 0, i32 0
- %f2 = getelementptr %Foo* %b, i32 0, i32 1
+ %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0
+ %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
store i32 13, i32* %f1
store i32 42, i32* %f2
-; CHECK: movl $13, (%[[REG]])
-; CHECK: movl $42, 4(%[[REG]])
+; CHECK: movl $13, (%esp)
+; CHECK: movl $42, 4(%esp)
call void @inreg_with_inalloca(i32 inreg 1, %Foo* inalloca %b)
; CHECK: movl $1, %eax
; CHECK: calll _inreg_with_inalloca
@@ -51,13 +49,12 @@ entry:
%b = alloca inalloca %Foo
; CHECK: movl $8, %eax
; CHECK: calll __chkstk
-; CHECK: movl %[[REG:[^,]*]], %esp
- %f1 = getelementptr %Foo* %b, i32 0, i32 0
- %f2 = getelementptr %Foo* %b, i32 0, i32 1
+ %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0
+ %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
store i32 13, i32* %f1
store i32 42, i32* %f2
-; CHECK-DAG: movl $13, (%[[REG]])
-; CHECK-DAG: movl $42, 4(%[[REG]])
+; CHECK-DAG: movl $13, (%esp)
+; CHECK-DAG: movl $42, 4(%esp)
call x86_thiscallcc void @thiscall_with_inalloca(i8* null, %Foo* inalloca %b)
; CHECK-DAG: xorl %ecx, %ecx
; CHECK: calll _thiscall_with_inalloca
diff --git a/test/CodeGen/X86/init-priority.ll b/test/CodeGen/X86/init-priority.ll
new file mode 100644
index 000000000000..a0cff237f188
--- /dev/null
+++ b/test/CodeGen/X86/init-priority.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-freebsd9 | FileCheck %s
+
+; Check that our compiler never emits global constructors
+; inside the .init_array section when building for a non-Linux ELF target.
+; Because of this, the test depends on UseInitArray behavior under FreeBSD
+; as found in Generic_ELF::addClangTargetOptions().
+
+; This is to workaround a Visual Studio bug which causes field
+; UseInitArray to be left uninitialized instead of being
+; zero-initialized (as specified in [dcl.init]p7).
+; This workaround consists in providing a user default constructor
+; that explicitly initializes field UseInitArray.
+
+%class.C = type { i8 }
+%class.D = type { i8 }
+
+@c1 = global %class.C zeroinitializer, align 1
+@d1 = global %class.D zeroinitializer, align 1
+@llvm.global_ctors = appending global [2 x { i32, void ()* }] [{ i32, void ()* } { i32 101, void ()* @_GLOBAL__I_000101 }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+
+define linkonce_odr void @_ZN1CC1Ev(%class.C* nocapture %this) {
+entry:
+ ret void
+}
+
+define linkonce_odr void @_ZN1DC1Ev(%class.D* nocapture %this) {
+entry:
+ ret void
+}
+
+define linkonce_odr void @_ZN1DC2Ev(%class.D* nocapture %this) {
+entry:
+ ret void
+}
+
+define linkonce_odr void @_ZN1CC2Ev(%class.C* nocapture %this) {
+entry:
+ ret void
+}
+
+define internal void @_GLOBAL__I_000101() nounwind readnone {
+entry:
+ ret void
+}
+
+define internal void @_GLOBAL__I_a() nounwind readnone {
+entry:
+ ret void
+}
+
+; CHECK-NOT: .init_array
diff --git a/test/CodeGen/X86/inline-asm-duplicated-constraint.ll b/test/CodeGen/X86/inline-asm-duplicated-constraint.ll
new file mode 100644
index 000000000000..2ef54749739f
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-duplicated-constraint.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 -no-integrated-as -mtriple=x86_64-linux-gnu | FileCheck %s
+
+; CHECK-LABEL: test1:
+; CHECK: movl (%rdi), %eax
+; CHECK: nop
+; CHECK: movl %eax, (%rdi)
+; CHECK: ret
+define void @test1(i32* %l) {
+ %load = load i32, i32* %l
+ call void asm "nop", "=*rmrm,0m0m,~{dirflag},~{fpsr},~{flags}"(i32* %l, i32 %load)
+ ret void
+}
diff --git a/test/CodeGen/X86/inline-asm-fpstack.ll b/test/CodeGen/X86/inline-asm-fpstack.ll
index bb3778a28116..972355cffc15 100644
--- a/test/CodeGen/X86/inline-asm-fpstack.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack.ll
@@ -169,11 +169,11 @@ entry:
; CHECK: testPR4485
define void @testPR4485(x86_fp80* %a) {
entry:
- %0 = load x86_fp80* %a, align 16
+ %0 = load x86_fp80, x86_fp80* %a, align 16
%1 = fmul x86_fp80 %0, 0xK4006B400000000000000
%2 = fmul x86_fp80 %1, 0xK4012F424000000000000
tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %2)
- %3 = load x86_fp80* %a, align 16
+ %3 = load x86_fp80, x86_fp80* %a, align 16
%4 = fmul x86_fp80 %3, 0xK4006B400000000000000
%5 = fmul x86_fp80 %4, 0xK4012F424000000000000
tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %5)
@@ -367,7 +367,7 @@ entry:
; Function Attrs: ssp
define void @test_live_st(i32 %a1) {
entry:
- %0 = load x86_fp80* undef, align 16
+ %0 = load x86_fp80, x86_fp80* undef, align 16
%cond = icmp eq i32 %a1, 1
br i1 %cond, label %sw.bb4.i, label %_Z5tointRKe.exit
@@ -380,7 +380,7 @@ _Z5tointRKe.exit:
%result.0.i = phi x86_fp80 [ %1, %sw.bb4.i ], [ %0, %entry ]
%conv.i1814 = fptosi x86_fp80 %result.0.i to i32
%conv626 = sitofp i32 %conv.i1814 to x86_fp80
- store x86_fp80 %conv626, x86_fp80* getelementptr inbounds (%struct.fpu_t* @fpu, i32 0, i32 1)
+ store x86_fp80 %conv626, x86_fp80* getelementptr inbounds (%struct.fpu_t, %struct.fpu_t* @fpu, i32 0, i32 1)
br label %return
return:
diff --git a/test/CodeGen/X86/inline-asm-out-regs.ll b/test/CodeGen/X86/inline-asm-out-regs.ll
index 46966f5370d3..8e47f81a5e06 100644
--- a/test/CodeGen/X86/inline-asm-out-regs.ll
+++ b/test/CodeGen/X86/inline-asm-out-regs.ll
@@ -9,7 +9,7 @@ entry:
br label %bb1.i
bb1.i: ; preds = %bb6.i.i, %bb1.i, %entry
- %0 = load i32* null, align 8 ; <i32> [#uses=1]
+ %0 = load i32, i32* null, align 8 ; <i32> [#uses=1]
%1 = icmp ugt i32 %0, 1048575 ; <i1> [#uses=1]
br i1 %1, label %bb2.i, label %bb1.i
@@ -19,7 +19,7 @@ bb2.i: ; preds = %bb1.i
; <i32> [#uses=1]
%2 = lshr i32 %asmresult2.i.i, 8 ; <i32> [#uses=1]
%3 = trunc i32 %2 to i8 ; <i8> [#uses=1]
- %4 = load i32* @pcibios_last_bus, align 4 ; <i32> [#uses=1]
+ %4 = load i32, i32* @pcibios_last_bus, align 4 ; <i32> [#uses=1]
%5 = icmp slt i32 %4, 0 ; <i1> [#uses=1]
br i1 %5, label %bb5.i.i, label %bb6.i.i
diff --git a/test/CodeGen/X86/inline-asm-ptr-cast.ll b/test/CodeGen/X86/inline-asm-ptr-cast.ll
index 50e302101814..21353468b1fc 100644
--- a/test/CodeGen/X86/inline-asm-ptr-cast.ll
+++ b/test/CodeGen/X86/inline-asm-ptr-cast.ll
@@ -16,12 +16,12 @@ entry:
store i64 1, i64* %flags, align 8
store i64 -1, i64* %newflags, align 8
%0 = bitcast i32* %dst to i8*
- %tmp = load i64* %flags, align 8
+ %tmp = load i64, i64* %flags, align 8
%and = and i64 %tmp, 1
%1 = bitcast i32* %src to i8*
- %tmp1 = load i8* %1
+ %tmp1 = load i8, i8* %1
%2 = bitcast i32* %dst to i8*
- %tmp2 = load i8* %2
+ %tmp2 = load i8, i8* %2
call void asm "pushfq \0Aandq $2, (%rsp) \0Aorq $3, (%rsp) \0Apopfq \0Aaddb $4, $1 \0Apushfq \0Apopq $0 \0A", "=*&rm,=*&rm,i,r,r,1,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %newflags, i8* %0, i64 -2, i64 %and, i8 %tmp1, i8 %tmp2) nounwind
ret void
}
diff --git a/test/CodeGen/X86/inline-asm-stack-realign.ll b/test/CodeGen/X86/inline-asm-stack-realign.ll
index f2ac0f451bb0..cfbe260a33a0 100644
--- a/test/CodeGen/X86/inline-asm-stack-realign.ll
+++ b/test/CodeGen/X86/inline-asm-stack-realign.ll
@@ -11,6 +11,6 @@ entry:
%r = alloca i32, align 16
store i32 -1, i32* %r, align 16
call void asm sideeffect inteldialect "push esi\0A\09xor esi, esi\0A\09mov dword ptr $0, esi\0A\09pop esi", "=*m,~{flags},~{esi},~{esp},~{dirflag},~{fpsr},~{flags}"(i32* %r)
- %0 = load i32* %r, align 16
+ %0 = load i32, i32* %r, align 16
ret i32 %0
}
diff --git a/test/CodeGen/X86/inline-asm-stack-realign2.ll b/test/CodeGen/X86/inline-asm-stack-realign2.ll
index 0e4e7e1a6776..3dfae113cf63 100644
--- a/test/CodeGen/X86/inline-asm-stack-realign2.ll
+++ b/test/CodeGen/X86/inline-asm-stack-realign2.ll
@@ -11,6 +11,6 @@ entry:
%r = alloca i32, align 16
store i32 -1, i32* %r, align 16
call void asm sideeffect "push %esi\0A\09xor %esi, %esi\0A\09mov %esi, $0\0A\09pop %esi", "=*m,~{flags},~{esi},~{esp},~{dirflag},~{fpsr},~{flags}"(i32* %r)
- %0 = load i32* %r, align 16
+ %0 = load i32, i32* %r, align 16
ret i32 %0
}
diff --git a/test/CodeGen/X86/inline-asm-stack-realign3.ll b/test/CodeGen/X86/inline-asm-stack-realign3.ll
index 3baaaaa7d93d..be0c6f51112d 100644
--- a/test/CodeGen/X86/inline-asm-stack-realign3.ll
+++ b/test/CodeGen/X86/inline-asm-stack-realign3.ll
@@ -15,7 +15,7 @@ doit:
br label %skip
skip:
- %0 = load i32* %r, align 128
+ %0 = load i32, i32* %r, align 128
ret i32 %0
}
diff --git a/test/CodeGen/X86/inline-asm-tied.ll b/test/CodeGen/X86/inline-asm-tied.ll
index fb5896b0ad6d..9ceb0e8b4b7d 100644
--- a/test/CodeGen/X86/inline-asm-tied.ll
+++ b/test/CodeGen/X86/inline-asm-tied.ll
@@ -11,12 +11,12 @@ entry:
%retval = alloca i64 ; <i64*> [#uses=2]
%_data.addr = alloca i64 ; <i64*> [#uses=4]
store i64 %_data, i64* %_data.addr
- %tmp = load i64* %_data.addr ; <i64> [#uses=1]
+ %tmp = load i64, i64* %_data.addr ; <i64> [#uses=1]
%0 = call i64 asm "bswap %eax\0A\09bswap %edx\0A\09xchgl %eax, %edx", "=A,0,~{dirflag},~{fpsr},~{flags}"(i64 %tmp) nounwind ; <i64> [#uses=1]
store i64 %0, i64* %_data.addr
- %tmp1 = load i64* %_data.addr ; <i64> [#uses=1]
+ %tmp1 = load i64, i64* %_data.addr ; <i64> [#uses=1]
store i64 %tmp1, i64* %retval
- %1 = load i64* %retval ; <i64> [#uses=1]
+ %1 = load i64, i64* %retval ; <i64> [#uses=1]
ret i64 %1
}
diff --git a/test/CodeGen/X86/ins_split_regalloc.ll b/test/CodeGen/X86/ins_split_regalloc.ll
index f5c5254fcec3..f04d088ce687 100644
--- a/test/CodeGen/X86/ins_split_regalloc.ll
+++ b/test/CodeGen/X86/ins_split_regalloc.ll
@@ -25,7 +25,7 @@ target datalayout = "e-i64:64-f80:128-s:64-n8:16:32:64-S128"
; CHECK: jmpq *[[F_ADDR_TC]]
define void @test(i32 %a, i32 %b, i32 %c) {
entry:
- %fct_f = load void (i32)** @f, align 8
+ %fct_f = load void (i32)*, void (i32)** @f, align 8
tail call void %fct_f(i32 %a)
tail call void %fct_f(i32 %b)
tail call void %fct_f(i32 %c)
diff --git a/test/CodeGen/X86/ins_subreg_coalesce-1.ll b/test/CodeGen/X86/ins_subreg_coalesce-1.ll
index a74e3f20c41a..4a5d8dfaf688 100644
--- a/test/CodeGen/X86/ins_subreg_coalesce-1.ll
+++ b/test/CodeGen/X86/ins_subreg_coalesce-1.ll
@@ -18,7 +18,7 @@ bb22: ; preds = %bb4
bb4.i: ; preds = %bb22
ret i32 0
walkExprTree.exit: ; preds = %bb22
- %tmp83 = load i16* null, align 4 ; <i16> [#uses=1]
+ %tmp83 = load i16, i16* null, align 4 ; <i16> [#uses=1]
%tmp84 = or i16 %tmp83, 2 ; <i16> [#uses=2]
store i16 %tmp84, i16* null, align 4
%tmp98993 = zext i16 %tmp84 to i32 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/ins_subreg_coalesce-3.ll b/test/CodeGen/X86/ins_subreg_coalesce-3.ll
index 63881e0ccb57..71890bc23b61 100644
--- a/test/CodeGen/X86/ins_subreg_coalesce-3.ll
+++ b/test/CodeGen/X86/ins_subreg_coalesce-3.ll
@@ -35,7 +35,7 @@ bb428: ; preds = %bb366, %bb304
bb433: ; preds = %bb428
ret void
bb650: ; preds = %bb650, %bb428
- %tmp658 = load i8* null, align 8 ; <i8> [#uses=1]
+ %tmp658 = load i8, i8* null, align 8 ; <i8> [#uses=1]
%tmp659 = icmp eq i8 %tmp658, 0 ; <i1> [#uses=1]
br i1 %tmp659, label %bb650, label %bb662
bb662: ; preds = %bb650
@@ -43,7 +43,7 @@ bb662: ; preds = %bb650
bb688: ; preds = %bb662
ret void
bb761: ; preds = %bb662
- %tmp487248736542 = load i32* null, align 4 ; <i32> [#uses=2]
+ %tmp487248736542 = load i32, i32* null, align 4 ; <i32> [#uses=2]
%tmp487648776541 = and i32 %tmp487248736542, 57344 ; <i32> [#uses=1]
%tmp4881 = icmp eq i32 %tmp487648776541, 8192 ; <i1> [#uses=1]
br i1 %tmp4881, label %bb4884, label %bb4897
@@ -54,10 +54,10 @@ bb4884: ; preds = %bb761
bb4897: ; preds = %bb4884, %bb761
ret void
bb4932: ; preds = %bb4884
- %tmp4933 = load i32* null, align 4 ; <i32> [#uses=1]
+ %tmp4933 = load i32, i32* null, align 4 ; <i32> [#uses=1]
br i1 %foo, label %bb5054, label %bb4940
bb4940: ; preds = %bb4932
- %tmp4943 = load i32* null, align 4 ; <i32> [#uses=2]
+ %tmp4943 = load i32, i32* null, align 4 ; <i32> [#uses=2]
switch i32 %tmp4933, label %bb5054 [
i32 159, label %bb4970
i32 160, label %bb5002
@@ -66,11 +66,11 @@ bb4970: ; preds = %bb4940
%tmp49746536 = trunc i32 %tmp4943 to i16 ; <i16> [#uses=1]
%tmp49764977 = and i16 %tmp49746536, 4095 ; <i16> [#uses=1]
%mask498049814982 = zext i16 %tmp49764977 to i64 ; <i64> [#uses=1]
- %tmp4984 = getelementptr %struct.FONT_INFO* null, i64 %mask498049814982, i32 5 ; <%struct.rec**> [#uses=1]
- %tmp4985 = load %struct.rec** %tmp4984, align 8 ; <%struct.rec*> [#uses=1]
- %tmp4988 = getelementptr %struct.rec* %tmp4985, i64 0, i32 0, i32 3 ; <%struct.THIRD_UNION*> [#uses=1]
+ %tmp4984 = getelementptr %struct.FONT_INFO, %struct.FONT_INFO* null, i64 %mask498049814982, i32 5 ; <%struct.rec**> [#uses=1]
+ %tmp4985 = load %struct.rec*, %struct.rec** %tmp4984, align 8 ; <%struct.rec*> [#uses=1]
+ %tmp4988 = getelementptr %struct.rec, %struct.rec* %tmp4985, i64 0, i32 0, i32 3 ; <%struct.THIRD_UNION*> [#uses=1]
%tmp4991 = bitcast %struct.THIRD_UNION* %tmp4988 to i32* ; <i32*> [#uses=1]
- %tmp4992 = load i32* %tmp4991, align 8 ; <i32> [#uses=1]
+ %tmp4992 = load i32, i32* %tmp4991, align 8 ; <i32> [#uses=1]
%tmp49924993 = trunc i32 %tmp4992 to i16 ; <i16> [#uses=1]
%tmp4996 = add i16 %tmp49924993, 0 ; <i16> [#uses=1]
br label %bb5054
@@ -78,11 +78,11 @@ bb5002: ; preds = %bb4940
%tmp50066537 = trunc i32 %tmp4943 to i16 ; <i16> [#uses=1]
%tmp50085009 = and i16 %tmp50066537, 4095 ; <i16> [#uses=1]
%mask501250135014 = zext i16 %tmp50085009 to i64 ; <i64> [#uses=1]
- %tmp5016 = getelementptr %struct.FONT_INFO* null, i64 %mask501250135014, i32 5 ; <%struct.rec**> [#uses=1]
- %tmp5017 = load %struct.rec** %tmp5016, align 8 ; <%struct.rec*> [#uses=1]
- %tmp5020 = getelementptr %struct.rec* %tmp5017, i64 0, i32 0, i32 3 ; <%struct.THIRD_UNION*> [#uses=1]
+ %tmp5016 = getelementptr %struct.FONT_INFO, %struct.FONT_INFO* null, i64 %mask501250135014, i32 5 ; <%struct.rec**> [#uses=1]
+ %tmp5017 = load %struct.rec*, %struct.rec** %tmp5016, align 8 ; <%struct.rec*> [#uses=1]
+ %tmp5020 = getelementptr %struct.rec, %struct.rec* %tmp5017, i64 0, i32 0, i32 3 ; <%struct.THIRD_UNION*> [#uses=1]
%tmp5023 = bitcast %struct.THIRD_UNION* %tmp5020 to i32* ; <i32*> [#uses=1]
- %tmp5024 = load i32* %tmp5023, align 8 ; <i32> [#uses=1]
+ %tmp5024 = load i32, i32* %tmp5023, align 8 ; <i32> [#uses=1]
%tmp50245025 = trunc i32 %tmp5024 to i16 ; <i16> [#uses=1]
%tmp5028 = sub i16 %tmp50245025, 0 ; <i16> [#uses=1]
br label %bb5054
diff --git a/test/CodeGen/X86/insert-positions.ll b/test/CodeGen/X86/insert-positions.ll
index 1a695f35e3b0..aa68579d22e0 100644
--- a/test/CodeGen/X86/insert-positions.ll
+++ b/test/CodeGen/X86/insert-positions.ll
@@ -41,7 +41,7 @@ for.end.i.i.i: ; preds = %land.rhs.i.i.i, %fo
%idx.ext.i.i.i = sext i32 %i.0.i.i.i to i64 ; <i64> [#uses=1]
%sub.ptr72.sum.i.i.i = xor i64 %idx.ext.i.i.i, -1 ; <i64> [#uses=1]
%pos.addr.1.sum155.i.i.i = add i64 %tmp154.i.i.i, %sub.ptr72.sum.i.i.i ; <i64> [#uses=1]
- %arrayidx76.i.i.i = getelementptr inbounds i8* undef, i64 %pos.addr.1.sum155.i.i.i ; <i8*> [#uses=0]
+ %arrayidx76.i.i.i = getelementptr inbounds i8, i8* undef, i64 %pos.addr.1.sum155.i.i.i ; <i8*> [#uses=0]
br label %while.cond.backedge.i.i.i
}
diff --git a/test/CodeGen/X86/insertps-O0-bug.ll b/test/CodeGen/X86/insertps-O0-bug.ll
new file mode 100644
index 000000000000..73748ee7e52a
--- /dev/null
+++ b/test/CodeGen/X86/insertps-O0-bug.ll
@@ -0,0 +1,52 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -O0 < %s | FileCheck %s
+
+; Check that at -O0, the backend doesn't attempt to canonicalize a vector load
+; used by an INSERTPS into a scalar load plus scalar_to_vector.
+;
+; In order to fold a load into the memory operand of an INSERTPSrm, the backend
+; tries to canonicalize a vector load in input to an INSERTPS node into a
+; scalar load plus scalar_to_vector. This would allow ISel to match the
+; INSERTPSrm variant rather than a load plus INSERTPSrr.
+;
+; However, ISel can only select an INSERTPSrm if folding a load into the operand
+; of an insertps is considered to be profitable.
+;
+; In the example below:
+;
+; __m128 test(__m128 a, __m128 *b) {
+; __m128 c = _mm_insert_ps(a, *b, 1 << 6);
+; return c;
+; }
+;
+; At -O0, the backend would attempt to canonicalize the load to 'b' into
+; a scalar load in the hope of matching an INSERTPSrm.
+; However, ISel would fail to recognize an INSERTPSrm since load folding is
+; always considered unprofitable at -O0. This would leave the insertps mask
+; in an invalid state.
+;
+; The problem with the canonicalization rule performed by the backend is that
+; it assumes ISel to always be able to match an INSERTPSrm. This assumption is
+; not always correct at -O0. In this example, FastISel fails to lower the
+; arguments needed by the entry block. This is enough to enable the DAGCombiner
+; and eventually trigger the canonicalization on the INSERTPS node.
+;
+; This test checks that the vector load in input to the insertps is not
+; canonicalized into a scalar load plus scalar_to_vector (a movss).
+
+define <4 x float> @test(<4 x float> %a, <4 x float>* %b) {
+; CHECK-LABEL: test:
+; CHECK: movaps (%rdi), [[REG:%[a-z0-9]+]]
+; CHECK-NOT: movss
+; CHECK: insertps $64, [[REG]],
+; CHECK: ret
+entry:
+ %0 = load <4 x float>, <4 x float>* %b, align 16
+ %1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %0, i32 64)
+ %2 = alloca <4 x float>, align 16
+ store <4 x float> %1, <4 x float>* %2, align 16
+ %3 = load <4 x float>, <4 x float>* %2, align 16
+ ret <4 x float> %3
+}
+
+
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32)
diff --git a/test/CodeGen/X86/invalid-shift-immediate.ll b/test/CodeGen/X86/invalid-shift-immediate.ll
index 77a9f7eda783..1fb80c7dba7f 100644
--- a/test/CodeGen/X86/invalid-shift-immediate.ll
+++ b/test/CodeGen/X86/invalid-shift-immediate.ll
@@ -9,7 +9,7 @@ entry:
%x_addr = alloca i32 ; <i32*> [#uses=2]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store i32 %x, i32* %x_addr
- %tmp = load i32* %x_addr, align 4 ; <i32> [#uses=1]
+ %tmp = load i32, i32* %x_addr, align 4 ; <i32> [#uses=1]
%tmp1 = ashr i32 %tmp, -2 ; <i32> [#uses=1]
%tmp2 = and i32 %tmp1, 1 ; <i32> [#uses=1]
%tmp23 = trunc i32 %tmp2 to i8 ; <i8> [#uses=1]
@@ -17,7 +17,7 @@ entry:
br i1 %toBool, label %bb, label %bb5
bb: ; preds = %entry
- %tmp4 = call i32 (...)* @bar( ) nounwind ; <i32> [#uses=0]
+ %tmp4 = call i32 (...) @bar( ) nounwind ; <i32> [#uses=0]
br label %bb5
bb5: ; preds = %bb, %entry
diff --git a/test/CodeGen/X86/isel-optnone.ll b/test/CodeGen/X86/isel-optnone.ll
index d2f062832e0c..831ad3837d96 100644
--- a/test/CodeGen/X86/isel-optnone.ll
+++ b/test/CodeGen/X86/isel-optnone.ll
@@ -2,14 +2,14 @@
define i32* @fooOptnone(i32* %p, i32* %q, i32** %z) #0 {
entry:
- %r = load i32* %p
- %s = load i32* %q
- %y = load i32** %z
+ %r = load i32, i32* %p
+ %s = load i32, i32* %q
+ %y = load i32*, i32** %z
%t0 = add i32 %r, %s
%t1 = add i32 %t0, 1
- %t2 = getelementptr i32* %y, i32 1
- %t3 = getelementptr i32* %t2, i32 %t1
+ %t2 = getelementptr i32, i32* %y, i32 1
+ %t3 = getelementptr i32, i32* %t2, i32 %t1
ret i32* %t3
@@ -21,14 +21,14 @@ entry:
define i32* @fooNormal(i32* %p, i32* %q, i32** %z) #1 {
entry:
- %r = load i32* %p
- %s = load i32* %q
- %y = load i32** %z
+ %r = load i32, i32* %p
+ %s = load i32, i32* %q
+ %y = load i32*, i32** %z
%t0 = add i32 %r, %s
%t1 = add i32 %t0, 1
- %t2 = getelementptr i32* %y, i32 1
- %t3 = getelementptr i32* %t2, i32 %t1
+ %t2 = getelementptr i32, i32* %y, i32 1
+ %t3 = getelementptr i32, i32* %t2, i32 %t1
ret i32* %t3
diff --git a/test/CodeGen/X86/isel-sink.ll b/test/CodeGen/X86/isel-sink.ll
index e4af9b67f95e..27abe051a9b3 100644
--- a/test/CodeGen/X86/isel-sink.ll
+++ b/test/CodeGen/X86/isel-sink.ll
@@ -11,13 +11,13 @@ define i32 @test(i32* %X, i32 %B) {
; CHECK: ret
; This gep should be sunk out of this block into the load/store users.
- %P = getelementptr i32* %X, i32 %B
+ %P = getelementptr i32, i32* %X, i32 %B
%G = icmp ult i32 %B, 1234
br i1 %G, label %T, label %F
T:
store i32 4, i32* %P
ret i32 141
F:
- %V = load i32* %P
+ %V = load i32, i32* %P
ret i32 %V
}
diff --git a/test/CodeGen/X86/isel-sink2.ll b/test/CodeGen/X86/isel-sink2.ll
index b162666362aa..65f1994b9fe1 100644
--- a/test/CodeGen/X86/isel-sink2.ll
+++ b/test/CodeGen/X86/isel-sink2.ll
@@ -3,15 +3,15 @@
; RUN: not grep leal %t
define i8 @test(i32 *%P) nounwind {
- %Q = getelementptr i32* %P, i32 1
+ %Q = getelementptr i32, i32* %P, i32 1
%R = bitcast i32* %Q to i8*
- %S = load i8* %R
+ %S = load i8, i8* %R
%T = icmp eq i8 %S, 0
br i1 %T, label %TB, label %F
TB:
ret i8 4
F:
- %U = getelementptr i8* %R, i32 3
- %V = load i8* %U
+ %U = getelementptr i8, i8* %R, i32 3
+ %V = load i8, i8* %U
ret i8 %V
}
diff --git a/test/CodeGen/X86/isel-sink3.ll b/test/CodeGen/X86/isel-sink3.ll
index 7012ccefaadb..fa633dc25aef 100644
--- a/test/CodeGen/X86/isel-sink3.ll
+++ b/test/CodeGen/X86/isel-sink3.ll
@@ -11,8 +11,8 @@ target triple = "i386-apple-darwin7"
define i32 @bar(i32** %P) nounwind {
entry:
- %0 = load i32** %P, align 4 ; <i32*> [#uses=2]
- %1 = getelementptr i32* %0, i32 1 ; <i32*> [#uses=1]
+ %0 = load i32*, i32** %P, align 4 ; <i32*> [#uses=2]
+ %1 = getelementptr i32, i32* %0, i32 1 ; <i32*> [#uses=1]
%2 = icmp ugt i32* %1, inttoptr (i64 1233 to i32*) ; <i1> [#uses=1]
br i1 %2, label %bb1, label %bb
@@ -21,7 +21,7 @@ bb: ; preds = %entry
br label %bb1
bb1: ; preds = %entry, %bb
- %3 = getelementptr i32* %1, i32 1 ; <i32*> [#uses=1]
- %4 = load i32* %3, align 4 ; <i32> [#uses=1]
+ %3 = getelementptr i32, i32* %1, i32 1 ; <i32*> [#uses=1]
+ %4 = load i32, i32* %3, align 4 ; <i32> [#uses=1]
ret i32 %4
}
diff --git a/test/CodeGen/X86/isint.ll b/test/CodeGen/X86/isint.ll
index 38d05c662bd5..ea38d9e4ec29 100644
--- a/test/CodeGen/X86/isint.ll
+++ b/test/CodeGen/X86/isint.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 -mcpu=penryn | FileCheck %s
-; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 -mcpu=penryn | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 | FileCheck %s
; PR19059
-; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 -mcpu=penryn | FileCheck -check-prefix=CHECK32 %s
+; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK32 %s
define i32 @isint_return(double %d) nounwind {
; CHECK-LABEL: isint_return:
diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll
index dfa8aed46463..ca3e8bf71eba 100644
--- a/test/CodeGen/X86/jump_sign.ll
+++ b/test/CodeGen/X86/jump_sign.ll
@@ -9,11 +9,11 @@ entry:
br i1 %tmp, label %cond_true, label %cond_next
cond_true: ; preds = %entry
- %tmp2 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp2 = tail call i32 (...) @bar( ) ; <i32> [#uses=0]
br label %cond_next
cond_next: ; preds = %cond_true, %entry
- %tmp3 = tail call i32 (...)* @baz( ) ; <i32> [#uses=0]
+ %tmp3 = tail call i32 (...) @baz( ) ; <i32> [#uses=0]
ret i32 undef
}
@@ -164,7 +164,7 @@ entry:
; PR://13046
define void @func_o() nounwind uwtable {
entry:
- %0 = load i16* undef, align 2
+ %0 = load i16, i16* undef, align 2
br i1 undef, label %if.then.i, label %if.end.i
if.then.i: ; preds = %entry
@@ -217,17 +217,15 @@ entry:
; PR13475
; If we have sub a, b and cmp b, a and the result of cmp is used
; by sbb, we should not optimize cmp away.
-define i32 @func_q(i32 %j.4, i32 %w, i32 %el) {
+define i32 @func_q(i32 %a0, i32 %a1, i32 %a2) {
; CHECK-LABEL: func_q:
; CHECK: cmp
; CHECK-NEXT: sbb
- %tmp532 = add i32 %j.4, %w
- %tmp533 = icmp ugt i32 %tmp532, %el
- %tmp534 = icmp ult i32 %w, %el
- %or.cond = and i1 %tmp533, %tmp534
- %tmp535 = sub i32 %el, %w
- %j.5 = select i1 %or.cond, i32 %tmp535, i32 %j.4
- ret i32 %j.5
+ %1 = icmp ult i32 %a0, %a1
+ %2 = sub i32 %a1, %a0
+ %3 = select i1 %1, i32 -1, i32 0
+ %4 = xor i32 %2, %3
+ ret i32 %4
}
; rdar://11873276
define i8* @func_r(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
@@ -238,14 +236,14 @@ entry:
; CHECK: j
; CHECK-NOT: sub
; CHECK: ret
- %0 = load i32* %offset, align 8
+ %0 = load i32, i32* %offset, align 8
%cmp = icmp slt i32 %0, %size
br i1 %cmp, label %return, label %if.end
if.end:
%sub = sub nsw i32 %0, %size
store i32 %sub, i32* %offset, align 8
- %add.ptr = getelementptr inbounds i8* %base, i32 %sub
+ %add.ptr = getelementptr inbounds i8, i8* %base, i32 %sub
br label %return
return:
@@ -287,10 +285,10 @@ entry:
; CHECK: andb
; CHECK: j
; CHECK: ret
- %0 = load i32* @b, align 4
+ %0 = load i32, i32* @b, align 4
%cmp = icmp ult i32 %0, %p1
%conv = zext i1 %cmp to i32
- %1 = load i32* @a, align 4
+ %1 = load i32, i32* @a, align 4
%and = and i32 %conv, %1
%conv1 = trunc i32 %and to i8
%2 = urem i8 %conv1, 3
diff --git a/test/CodeGen/X86/jump_table_alias.ll b/test/CodeGen/X86/jump_table_alias.ll
deleted file mode 100644
index 20622009e376..000000000000
--- a/test/CodeGen/X86/jump_table_alias.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; RUN: llc <%s -jump-table-type=single | FileCheck %s
-target triple = "x86_64-unknown-linux-gnu"
-define i32 @f() unnamed_addr jumptable {
-entry:
- ret i32 0
-}
-
-@i = internal alias i32 ()* @f
-@j = alias i32 ()* @f
-
-define i32 @main(i32 %argc, i8** %argv) {
- %temp = alloca i32 ()*, align 8
- store i32 ()* @i, i32()** %temp, align 8
-; CHECK: movq $__llvm_jump_instr_table_0_1
- %1 = load i32 ()** %temp, align 8
-; CHECK: movl $__llvm_jump_instr_table_0_1
- %2 = call i32 ()* %1()
- %3 = call i32 ()* @i()
-; CHECK: callq i
- %4 = call i32 ()* @j()
-; CHECK: callq j
- ret i32 %3
-}
-
-; There should only be one table, even though there are two GlobalAliases,
-; because they both alias the same value.
-
-; CHECK: .align 8, 0x90
-; CHECK: .type __llvm_jump_instr_table_0_1,@function
-; CHECK: __llvm_jump_instr_table_0_1:
-; CHECK: jmp f@PLT
-
diff --git a/test/CodeGen/X86/jump_table_align.ll b/test/CodeGen/X86/jump_table_align.ll
deleted file mode 100644
index 6ad48d1f54f7..000000000000
--- a/test/CodeGen/X86/jump_table_align.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc -filetype=obj <%s -jump-table-type=single -o %t1
-; RUN: llvm-objdump -triple=x86_64-unknown-linux-gnu -d %t1 | FileCheck %s
-target triple = "x86_64-unknown-linux-gnu"
-define i32 @f() unnamed_addr jumptable {
- ret i32 0
-}
-
-define i32 @g(i8* %a) unnamed_addr jumptable {
- ret i32 0
-}
-
-define void @h(void ()* %func) unnamed_addr jumptable {
- ret void
-}
-
-define i32 @main() {
- %g = alloca i32 (...)*, align 8
- store i32 (...)* bitcast (i32 ()* @f to i32 (...)*), i32 (...)** %g, align 8
- %1 = load i32 (...)** %g, align 8
- %call = call i32 (...)* %1()
- call void (void ()*)* @h(void ()* bitcast (void (void ()*)* @h to void ()*))
- %a = call i32 (i32*)* bitcast (i32 (i8*)* @g to i32(i32*)*)(i32* null)
- ret i32 %a
-}
-
-; Make sure that the padding from getJumpInstrTableEntryBound is right.
-; CHECK: __llvm_jump_instr_table_0_1:
-; CHECK-NEXT: e9 00 00 00 00 jmp 0
-; CHECK-NEXT: 0f 1f 00 nopl (%rax)
diff --git a/test/CodeGen/X86/jump_table_bitcast.ll b/test/CodeGen/X86/jump_table_bitcast.ll
deleted file mode 100644
index 749b77a166ea..000000000000
--- a/test/CodeGen/X86/jump_table_bitcast.ll
+++ /dev/null
@@ -1,43 +0,0 @@
-; RUN: llc <%s -jump-table-type=single | FileCheck %s
-target triple = "x86_64-unknown-linux-gnu"
-define i32 @f() unnamed_addr jumptable {
- ret i32 0
-}
-
-define i32 @g(i8* %a) unnamed_addr jumptable {
- ret i32 0
-}
-
-define void @h(void ()* %func) unnamed_addr jumptable {
- ret void
-}
-
-define i32 @main() {
- %g = alloca i32 (...)*, align 8
- store i32 (...)* bitcast (i32 ()* @f to i32 (...)*), i32 (...)** %g, align 8
-; CHECK: movq $__llvm_jump_instr_table_0_[[ENTRY:1|2|3]],
-; CHECK: movl $__llvm_jump_instr_table_0_[[ENTRY]],
- %1 = load i32 (...)** %g, align 8
- %call = call i32 (...)* %1()
- call void (void ()*)* @h(void ()* bitcast (void (void ()*)* @h to void ()*))
-; CHECK: movl $__llvm_jump_instr_table_0_{{1|2|3}},
-; CHECK: callq h
-
- %a = call i32 (i32*)* bitcast (i32 (i8*)* @g to i32(i32*)*)(i32* null)
-; CHECK: callq g
- ret i32 %a
-}
-
-; CHECK: .align 8, 0x90
-; CHECK: .type __llvm_jump_instr_table_0_1,@function
-; CHECK: __llvm_jump_instr_table_0_1:
-; CHECK: jmp {{f|g|h}}@PLT
-; CHECK: .align 8, 0x90
-; CHECK: .type __llvm_jump_instr_table_0_2,@function
-; CHECK: __llvm_jump_instr_table_0_2:
-; CHECK: jmp {{f|g|h}}@PLT
-; CHECK: .align 8, 0x90
-; CHECK: .type __llvm_jump_instr_table_0_3,@function
-; CHECK: __llvm_jump_instr_table_0_3:
-; CHECK: jmp {{f|g|h}}@PLT
-
diff --git a/test/CodeGen/X86/jump_tables.ll b/test/CodeGen/X86/jump_tables.ll
deleted file mode 100644
index 485154eaa2a9..000000000000
--- a/test/CodeGen/X86/jump_tables.ll
+++ /dev/null
@@ -1,255 +0,0 @@
-; RUN: llc <%s -jump-table-type=single | FileCheck --check-prefix=SINGLE %s
-; RUN: llc <%s -jump-table-type=arity | FileCheck --check-prefix=ARITY %s
-; RUN: llc <%s -jump-table-type=simplified | FileCheck --check-prefix=SIMPL %s
-; RUN: llc <%s -jump-table-type=full | FileCheck --check-prefix=FULL %s
-
-target triple = "x86_64-unknown-linux-gnu"
-
-%struct.fun_struct = type { i32 (...)* }
-
-@a = global [12 x i32 () *] [ i32 ()* bitcast (void ()* @indirect_fun to i32 ()*),
- i32 ()* bitcast (void ()* @indirect_fun_match to i32 ()*),
- i32 ()* bitcast (i32 ()* @indirect_fun_i32 to i32 ()*),
- i32 ()* bitcast (i32 (i32)* @indirect_fun_i32_1 to i32 ()*),
- i32 ()* bitcast (i32 (i32, i32)* @indirect_fun_i32_2 to i32 ()*),
- i32 ()* bitcast (i32* (i32*, i32)* @indirect_fun_i32S_2 to i32 ()*),
- i32 ()* bitcast (void (%struct.fun_struct)* @indirect_fun_struct to i32 ()*),
- i32 ()* bitcast (void (i32 (...)*, i32)* @indirect_fun_fun to i32 ()*),
- i32 ()* bitcast (i32 (i32 (...)*, i32)* @indirect_fun_fun_ret to i32 ()*),
- i32 ()* bitcast (void ([19 x i8])* @indirect_fun_array to i32 ()*),
- i32 ()* bitcast (void (<3 x i32>)* @indirect_fun_vec to i32 ()*),
- i32 ()* bitcast (void (<4 x float>)* @indirect_fun_vec_2 to i32 ()*)
- ]
-
-define void @indirect_fun() unnamed_addr jumptable {
- ret void
-}
-
-define void @indirect_fun_match() unnamed_addr jumptable {
- ret void
-}
-
-define i32 @indirect_fun_i32() unnamed_addr jumptable {
- ret i32 0
-}
-
-define i32 @indirect_fun_i32_1(i32 %a) unnamed_addr jumptable {
- ret i32 %a
-}
-
-define i32 @indirect_fun_i32_2(i32 %a, i32 %b) unnamed_addr jumptable {
- ret i32 %a
-}
-
-define i32* @indirect_fun_i32S_2(i32* %a, i32 %b) unnamed_addr jumptable {
- ret i32* %a
-}
-
-define void @indirect_fun_struct(%struct.fun_struct %fs) unnamed_addr jumptable {
- ret void
-}
-
-define void @indirect_fun_fun(i32 (...)* %fun, i32 %a) unnamed_addr jumptable {
- ret void
-}
-
-define i32 @indirect_fun_fun_ret(i32 (...)* %fun, i32 %a) unnamed_addr jumptable {
- ret i32 %a
-}
-
-define void @indirect_fun_array([19 x i8] %a) unnamed_addr jumptable {
- ret void
-}
-
-define void @indirect_fun_vec(<3 x i32> %a) unnamed_addr jumptable {
- ret void
-}
-
-define void @indirect_fun_vec_2(<4 x float> %a) unnamed_addr jumptable {
- ret void
-}
-
-define i32 @m(void ()* %fun) {
- call void ()* %fun()
- ret i32 0
-}
-
-define void ()* @get_fun() {
- ret void ()* @indirect_fun
-; SINGLE: movl $__llvm_jump_instr_table_0_
-; ARITY: movl $__llvm_jump_instr_table_
-; SIMPL: movl $__llvm_jump_instr_table_
-; FULL: movl $__llvm_jump_instr_table_
-}
-
-define i32 @main(i32 %argc, i8** %argv) {
- %f = call void ()* ()* @get_fun()
- %a = call i32 @m(void ()* %f)
- ret i32 %a
-}
-
-; SINGLE-DAG: .align 8, 0x90
-; SINGLE-DAG: .type __llvm_jump_instr_table_0_1,@function
-; SINGLE-DAG: __llvm_jump_instr_table_0_1:
-; SINGLE-DAG: jmp indirect_fun_array@PLT
-; SINGLE-DAG: .align 8, 0x90
-; SINGLE-DAG: .type __llvm_jump_instr_table_0_2,@function
-; SINGLE-DAG: __llvm_jump_instr_table_0_2:
-; SINGLE-DAG: jmp indirect_fun_i32_2@PLT
-; SINGLE-DAG: .align 8, 0x90
-; SINGLE-DAG: .type __llvm_jump_instr_table_0_3,@function
-; SINGLE-DAG: __llvm_jump_instr_table_0_3:
-; SINGLE-DAG: jmp indirect_fun_vec_2@PLT
-; SINGLE-DAG: .align 8, 0x90
-; SINGLE-DAG: .type __llvm_jump_instr_table_0_4,@function
-; SINGLE-DAG: __llvm_jump_instr_table_0_4:
-; SINGLE-DAG: jmp indirect_fun_i32S_2@PLT
-; SINGLE-DAG: .align 8, 0x90
-; SINGLE-DAG: .type __llvm_jump_instr_table_0_5,@function
-; SINGLE-DAG: __llvm_jump_instr_table_0_5:
-; SINGLE-DAG: jmp indirect_fun_struct@PLT
-; SINGLE-DAG: .align 8, 0x90
-; SINGLE-DAG: .type __llvm_jump_instr_table_0_6,@function
-; SINGLE-DAG: __llvm_jump_instr_table_0_6:
-; SINGLE-DAG: jmp indirect_fun_i32_1@PLT
-; SINGLE-DAG: .align 8, 0x90
-; SINGLE-DAG: .type __llvm_jump_instr_table_0_7,@function
-; SINGLE-DAG: __llvm_jump_instr_table_0_7:
-; SINGLE-DAG: jmp indirect_fun_i32@PLT
-; SINGLE-DAG: .align 8, 0x90
-; SINGLE-DAG: .type __llvm_jump_instr_table_0_8,@function
-; SINGLE-DAG: __llvm_jump_instr_table_0_8:
-; SINGLE-DAG: jmp indirect_fun_fun@PLT
-; SINGLE-DAG: .align 8, 0x90
-; SINGLE-DAG: .type __llvm_jump_instr_table_0_9,@function
-; SINGLE-DAG: __llvm_jump_instr_table_0_9:
-; SINGLE-DAG: jmp indirect_fun_fun_ret@PLT
-; SINGLE-DAG: .align 8, 0x90
-; SINGLE-DAG: .type __llvm_jump_instr_table_0_10,@function
-; SINGLE-DAG: __llvm_jump_instr_table_0_10:
-; SINGLE-DAG: jmp indirect_fun@PLT
-; SINGLE-DAG: .align 8, 0x90
-; SINGLE-DAG: .type __llvm_jump_instr_table_0_11,@function
-; SINGLE-DAG: __llvm_jump_instr_table_0_11:
-; SINGLE-DAG: jmp indirect_fun_match@PLT
-; SINGLE-DAG: .align 8, 0x90
-; SINGLE-DAG: .type __llvm_jump_instr_table_0_12,@function
-; SINGLE-DAG: __llvm_jump_instr_table_0_12:
-; SINGLE-DAG: jmp indirect_fun_vec@PLT
-; SINGLE-DAG: .align 8, 0x90
-; SINGLE-DAG: ud2
-; SINGLE-DAG: .align 8, 0x90
-; SINGLE-DAG: ud2
-; SINGLE-DAG: .align 8, 0x90
-; SINGLE-DAG: ud2
-; SINGLE-DAG: .align 8, 0x90
-; SINGLE-DAG: ud2
-
-
-; ARITY-DAG: .align 8, 0x90
-; ARITY-DAG: .type __llvm_jump_instr_table_2_1,@function
-; ARITY-DAG: __llvm_jump_instr_table_2_1:
-; ARITY-DAG: jmp indirect_fun{{.*}}@PLT
-; ARITY-DAG: .align 8, 0x90
-; ARITY-DAG: ud2
-; ARITY-DAG: .align 8, 0x90
-; ARITY-DAG: .type __llvm_jump_instr_table_0_1,@function
-; ARITY-DAG: __llvm_jump_instr_table_0_1:
-; ARITY-DAG: jmp indirect_fun{{.*}}@PLT
-; ARITY-DAG: .align 8, 0x90
-; ARITY-DAG: .type __llvm_jump_instr_table_1_1,@function
-; ARITY-DAG: __llvm_jump_instr_table_1_1:
-; ARITY-DAG: jmp indirect_fun{{.*}}@PLT
-
-; SIMPL-DAG: .align 8, 0x90
-; SIMPL-DAG: .type __llvm_jump_instr_table_2_1,@function
-; SIMPL-DAG: __llvm_jump_instr_table_2_1:
-; SIMPL-DAG: jmp indirect_fun{{.*}}@PLT
-; SIMPL-DAG: .align 8, 0x90
-; SIMPL-DAG: ud2
-; SIMPL-DAG: .align 8, 0x90
-; SIMPL-DAG: .type __llvm_jump_instr_table_0_1,@function
-; SIMPL-DAG: __llvm_jump_instr_table_0_1:
-; SIMPL-DAG: jmp indirect_fun{{.*}}@PLT
-; SIMPL-DAG: .align 8, 0x90
-; SIMPL-DAG: .type __llvm_jump_instr_table_1_1,@function
-; SIMPL-DAG: __llvm_jump_instr_table_1_1:
-; SIMPL-DAG: jmp indirect_fun{{.*}}@PLT
-; SIMPL-DAG: .align 8, 0x90
-; SIMPL-DAG: .type __llvm_jump_instr_table_3_1,@function
-; SIMPL-DAG: __llvm_jump_instr_table_3_1:
-; SIMPL-DAG: jmp indirect_fun{{.*}}@PLT
-; SIMPL-DAG: .align 8, 0x90
-; SIMPL-DAG: .type __llvm_jump_instr_table_4_1,@function
-; SIMPL-DAG: __llvm_jump_instr_table_4_1:
-; SIMPL-DAG: jmp indirect_fun{{.*}}@PLT
-
-
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: .type __llvm_jump_instr_table_10_1,@function
-; FULL-DAG:__llvm_jump_instr_table_10_1:
-; FULL-DAG: jmp indirect_fun_i32_1@PLT
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: ud2
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: .type __llvm_jump_instr_table_9_1,@function
-; FULL-DAG:__llvm_jump_instr_table_9_1:
-; FULL-DAG: jmp indirect_fun_i32_2@PLT
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: ud2
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: .type __llvm_jump_instr_table_7_1,@function
-; FULL-DAG:__llvm_jump_instr_table_7_1:
-; FULL-DAG: jmp indirect_fun_i32S_2@PLT
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: ud2
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: .type __llvm_jump_instr_table_3_1,@function
-; FULL-DAG:__llvm_jump_instr_table_3_1:
-; FULL-DAG: jmp indirect_fun_vec_2@PLT
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: ud2
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: .type __llvm_jump_instr_table_2_1,@function
-; FULL-DAG:__llvm_jump_instr_table_2_1:
-; FULL-DAG: jmp indirect_fun@PLT
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: ud2
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: ud2
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: .type __llvm_jump_instr_table_8_1,@function
-; FULL-DAG:__llvm_jump_instr_table_8_1:
-; FULL-DAG: jmp indirect_fun_i32@PLT
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: ud2
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: .type __llvm_jump_instr_table_1_1,@function
-; FULL-DAG:__llvm_jump_instr_table_1_1:
-; FULL-DAG: jmp indirect_fun_array@PLT
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: ud2
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: .type __llvm_jump_instr_table_0_1,@function
-; FULL-DAG:__llvm_jump_instr_table_0_1:
-; FULL-DAG: jmp indirect_fun_vec@PLT
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: ud2
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: .type __llvm_jump_instr_table_6_1,@function
-; FULL-DAG:__llvm_jump_instr_table_6_1:
-; FULL-DAG: jmp indirect_fun_struct@PLT
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: ud2
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: .type __llvm_jump_instr_table_5_1,@function
-; FULL-DAG:__llvm_jump_instr_table_5_1:
-; FULL-DAG: jmp indirect_fun_fun@PLT
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: ud2
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: .type __llvm_jump_instr_table_4_1,@function
-; FULL-DAG:__llvm_jump_instr_table_4_1:
-; FULL-DAG: jmp indirect_fun_fun_ret@PLT
-; FULL-DAG: .align 8, 0x90
-; FULL-DAG: ud2
diff --git a/test/CodeGen/X86/large-code-model-isel.ll b/test/CodeGen/X86/large-code-model-isel.ll
index 3c283d934949..9edabcd0520d 100644
--- a/test/CodeGen/X86/large-code-model-isel.ll
+++ b/test/CodeGen/X86/large-code-model-isel.ll
@@ -8,6 +8,6 @@ define void @foo() {
; CHECK-LABEL: foo:
entry:
; CHECK: callq
- %call = call i64* undef(i64* undef, i8* getelementptr inbounds ([2 x i8]* @.str10, i32 0, i32 0))
+ %call = call i64* undef(i64* undef, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str10, i32 0, i32 0))
ret void
}
diff --git a/test/CodeGen/X86/large-constants.ll b/test/CodeGen/X86/large-constants.ll
index 157ecc4af66b..945d633eec12 100644
--- a/test/CodeGen/X86/large-constants.ll
+++ b/test/CodeGen/X86/large-constants.ll
@@ -40,10 +40,10 @@ fail:
define void @constant_expressions() {
entry:
- %0 = load i64* inttoptr (i64 add (i64 51250129900, i64 0) to i64*)
- %1 = load i64* inttoptr (i64 add (i64 51250129900, i64 8) to i64*)
- %2 = load i64* inttoptr (i64 add (i64 51250129900, i64 16) to i64*)
- %3 = load i64* inttoptr (i64 add (i64 51250129900, i64 24) to i64*)
+ %0 = load i64, i64* inttoptr (i64 add (i64 51250129900, i64 0) to i64*)
+ %1 = load i64, i64* inttoptr (i64 add (i64 51250129900, i64 8) to i64*)
+ %2 = load i64, i64* inttoptr (i64 add (i64 51250129900, i64 16) to i64*)
+ %3 = load i64, i64* inttoptr (i64 add (i64 51250129900, i64 24) to i64*)
%4 = add i64 %0, %1
%5 = add i64 %2, %3
%6 = add i64 %4, %5
@@ -54,10 +54,10 @@ entry:
define void @constant_expressions2() {
entry:
- %0 = load i64* inttoptr (i64 51250129900 to i64*)
- %1 = load i64* inttoptr (i64 51250129908 to i64*)
- %2 = load i64* inttoptr (i64 51250129916 to i64*)
- %3 = load i64* inttoptr (i64 51250129924 to i64*)
+ %0 = load i64, i64* inttoptr (i64 51250129900 to i64*)
+ %1 = load i64, i64* inttoptr (i64 51250129908 to i64*)
+ %2 = load i64, i64* inttoptr (i64 51250129916 to i64*)
+ %3 = load i64, i64* inttoptr (i64 51250129924 to i64*)
%4 = add i64 %0, %1
%5 = add i64 %2, %3
%6 = add i64 %4, %5
diff --git a/test/CodeGen/X86/large-gep-chain.ll b/test/CodeGen/X86/large-gep-chain.ll
index 5cf4661f8ff9..44247b8658a7 100644
--- a/test/CodeGen/X86/large-gep-chain.ll
+++ b/test/CodeGen/X86/large-gep-chain.ll
@@ -21,25275 +21,25275 @@ bb1: ; preds = %bb
br label %bb25362
bb2: ; preds = %bb
- %tmp = getelementptr inbounds float* null, i64 1
- %tmp3 = getelementptr inbounds float* %tmp, i64 1
- %tmp4 = getelementptr inbounds float* %tmp3, i64 1
- %tmp5 = getelementptr inbounds float* %tmp4, i64 1
- %tmp6 = getelementptr inbounds float* %tmp5, i64 1
- %tmp7 = getelementptr inbounds float* %tmp6, i64 1
- %tmp8 = getelementptr inbounds float* %tmp7, i64 1
- %tmp9 = getelementptr inbounds float* %tmp8, i64 1
- %tmp10 = getelementptr inbounds float* %tmp9, i64 1
- %tmp11 = getelementptr inbounds float* %tmp10, i64 1
- %tmp12 = getelementptr inbounds float* %tmp11, i64 1
- %tmp13 = getelementptr inbounds float* %tmp12, i64 1
- %tmp14 = getelementptr inbounds float* %tmp13, i64 1
- %tmp15 = getelementptr inbounds float* %tmp14, i64 1
- %tmp16 = getelementptr inbounds float* %tmp15, i64 1
- %tmp17 = getelementptr inbounds float* %tmp16, i64 1
- %tmp18 = getelementptr inbounds float* %tmp17, i64 1
- %tmp19 = getelementptr inbounds float* %tmp18, i64 1
- %tmp20 = getelementptr inbounds float* %tmp19, i64 1
- %tmp21 = getelementptr inbounds float* %tmp20, i64 1
- %tmp22 = getelementptr inbounds float* %tmp21, i64 1
- %tmp23 = getelementptr inbounds float* %tmp22, i64 1
- %tmp24 = getelementptr inbounds float* %tmp23, i64 1
- %tmp25 = getelementptr inbounds float* %tmp24, i64 1
- %tmp26 = getelementptr inbounds float* %tmp25, i64 1
- %tmp27 = getelementptr inbounds float* %tmp26, i64 1
- %tmp28 = getelementptr inbounds float* %tmp27, i64 1
- %tmp29 = getelementptr inbounds float* %tmp28, i64 1
- %tmp30 = getelementptr inbounds float* %tmp29, i64 1
- %tmp31 = getelementptr inbounds float* %tmp30, i64 1
- %tmp32 = getelementptr inbounds float* %tmp31, i64 1
- %tmp33 = getelementptr inbounds float* %tmp32, i64 1
- %tmp34 = getelementptr inbounds float* %tmp33, i64 1
- %tmp35 = getelementptr inbounds float* %tmp34, i64 1
- %tmp36 = getelementptr inbounds float* %tmp35, i64 1
- %tmp37 = getelementptr inbounds float* %tmp36, i64 1
- %tmp38 = getelementptr inbounds float* %tmp37, i64 1
- %tmp39 = getelementptr inbounds float* %tmp38, i64 1
- %tmp40 = getelementptr inbounds float* %tmp39, i64 1
- %tmp41 = getelementptr inbounds float* %tmp40, i64 1
- %tmp42 = getelementptr inbounds float* %tmp41, i64 1
- %tmp43 = getelementptr inbounds float* %tmp42, i64 1
- %tmp44 = getelementptr inbounds float* %tmp43, i64 1
- %tmp45 = getelementptr inbounds float* %tmp44, i64 1
- %tmp46 = getelementptr inbounds float* %tmp45, i64 1
- %tmp47 = getelementptr inbounds float* %tmp46, i64 1
- %tmp48 = getelementptr inbounds float* %tmp47, i64 1
- %tmp49 = getelementptr inbounds float* %tmp48, i64 1
- %tmp50 = getelementptr inbounds float* %tmp49, i64 1
- %tmp51 = getelementptr inbounds float* %tmp50, i64 1
- %tmp52 = getelementptr inbounds float* %tmp51, i64 1
- %tmp53 = getelementptr inbounds float* %tmp52, i64 1
- %tmp54 = getelementptr inbounds float* %tmp53, i64 1
- %tmp55 = getelementptr inbounds float* %tmp54, i64 1
- %tmp56 = getelementptr inbounds float* %tmp55, i64 1
- %tmp57 = getelementptr inbounds float* %tmp56, i64 1
- %tmp58 = getelementptr inbounds float* %tmp57, i64 1
- %tmp59 = getelementptr inbounds float* %tmp58, i64 1
- %tmp60 = getelementptr inbounds float* %tmp59, i64 1
- %tmp61 = getelementptr inbounds float* %tmp60, i64 1
- %tmp62 = getelementptr inbounds float* %tmp61, i64 1
- %tmp63 = getelementptr inbounds float* %tmp62, i64 1
- %tmp64 = getelementptr inbounds float* %tmp63, i64 1
- %tmp65 = getelementptr inbounds float* %tmp64, i64 1
- %tmp66 = getelementptr inbounds float* %tmp65, i64 1
- %tmp67 = getelementptr inbounds float* %tmp66, i64 1
- %tmp68 = getelementptr inbounds float* %tmp67, i64 1
- %tmp69 = getelementptr inbounds float* %tmp68, i64 1
- %tmp70 = getelementptr inbounds float* %tmp69, i64 1
- %tmp71 = getelementptr inbounds float* %tmp70, i64 1
- %tmp72 = getelementptr inbounds float* %tmp71, i64 1
- %tmp73 = getelementptr inbounds float* %tmp72, i64 1
- %tmp74 = getelementptr inbounds float* %tmp73, i64 1
- %tmp75 = getelementptr inbounds float* %tmp74, i64 1
- %tmp76 = getelementptr inbounds float* %tmp75, i64 1
- %tmp77 = getelementptr inbounds float* %tmp76, i64 1
- %tmp78 = getelementptr inbounds float* %tmp77, i64 1
- %tmp79 = getelementptr inbounds float* %tmp78, i64 1
- %tmp80 = getelementptr inbounds float* %tmp79, i64 1
- %tmp81 = getelementptr inbounds float* %tmp80, i64 1
- %tmp82 = getelementptr inbounds float* %tmp81, i64 1
- %tmp83 = getelementptr inbounds float* %tmp82, i64 1
- %tmp84 = getelementptr inbounds float* %tmp83, i64 1
- %tmp85 = getelementptr inbounds float* %tmp84, i64 1
- %tmp86 = getelementptr inbounds float* %tmp85, i64 1
- %tmp87 = getelementptr inbounds float* %tmp86, i64 1
- %tmp88 = getelementptr inbounds float* %tmp87, i64 1
- %tmp89 = getelementptr inbounds float* %tmp88, i64 1
- %tmp90 = getelementptr inbounds float* %tmp89, i64 1
- %tmp91 = getelementptr inbounds float* %tmp90, i64 1
- %tmp92 = getelementptr inbounds float* %tmp91, i64 1
- %tmp93 = getelementptr inbounds float* %tmp92, i64 1
- %tmp94 = getelementptr inbounds float* %tmp93, i64 1
- %tmp95 = getelementptr inbounds float* %tmp94, i64 1
- %tmp96 = getelementptr inbounds float* %tmp95, i64 1
- %tmp97 = getelementptr inbounds float* %tmp96, i64 1
- %tmp98 = getelementptr inbounds float* %tmp97, i64 1
- %tmp99 = getelementptr inbounds float* %tmp98, i64 1
- %tmp100 = getelementptr inbounds float* %tmp99, i64 1
- %tmp101 = getelementptr inbounds float* %tmp100, i64 1
- %tmp102 = getelementptr inbounds float* %tmp101, i64 1
- %tmp103 = getelementptr inbounds float* %tmp102, i64 1
- %tmp104 = getelementptr inbounds float* %tmp103, i64 1
- %tmp105 = getelementptr inbounds float* %tmp104, i64 1
- %tmp106 = getelementptr inbounds float* %tmp105, i64 1
- %tmp107 = getelementptr inbounds float* %tmp106, i64 1
- %tmp108 = getelementptr inbounds float* %tmp107, i64 1
- %tmp109 = getelementptr inbounds float* %tmp108, i64 1
- %tmp110 = getelementptr inbounds float* %tmp109, i64 1
- %tmp111 = getelementptr inbounds float* %tmp110, i64 1
- %tmp112 = getelementptr inbounds float* %tmp111, i64 1
- %tmp113 = getelementptr inbounds float* %tmp112, i64 1
- %tmp114 = getelementptr inbounds float* %tmp113, i64 1
- %tmp115 = getelementptr inbounds float* %tmp114, i64 1
- %tmp116 = getelementptr inbounds float* %tmp115, i64 1
- %tmp117 = getelementptr inbounds float* %tmp116, i64 1
- %tmp118 = getelementptr inbounds float* %tmp117, i64 1
- %tmp119 = getelementptr inbounds float* %tmp118, i64 1
- %tmp120 = getelementptr inbounds float* %tmp119, i64 1
- %tmp121 = getelementptr inbounds float* %tmp120, i64 1
- %tmp122 = getelementptr inbounds float* %tmp121, i64 1
- %tmp123 = getelementptr inbounds float* %tmp122, i64 1
- %tmp124 = getelementptr inbounds float* %tmp123, i64 1
- %tmp125 = getelementptr inbounds float* %tmp124, i64 1
- %tmp126 = getelementptr inbounds float* %tmp125, i64 1
- %tmp127 = getelementptr inbounds float* %tmp126, i64 1
- %tmp128 = getelementptr inbounds float* %tmp127, i64 1
- %tmp129 = getelementptr inbounds float* %tmp128, i64 1
- %tmp130 = getelementptr inbounds float* %tmp129, i64 1
- %tmp131 = getelementptr inbounds float* %tmp130, i64 1
- %tmp132 = getelementptr inbounds float* %tmp131, i64 1
- %tmp133 = getelementptr inbounds float* %tmp132, i64 1
- %tmp134 = getelementptr inbounds float* %tmp133, i64 1
- %tmp135 = getelementptr inbounds float* %tmp134, i64 1
- %tmp136 = getelementptr inbounds float* %tmp135, i64 1
- %tmp137 = getelementptr inbounds float* %tmp136, i64 1
- %tmp138 = getelementptr inbounds float* %tmp137, i64 1
- %tmp139 = getelementptr inbounds float* %tmp138, i64 1
- %tmp140 = getelementptr inbounds float* %tmp139, i64 1
- %tmp141 = getelementptr inbounds float* %tmp140, i64 1
- %tmp142 = getelementptr inbounds float* %tmp141, i64 1
- %tmp143 = getelementptr inbounds float* %tmp142, i64 1
- %tmp144 = getelementptr inbounds float* %tmp143, i64 1
- %tmp145 = getelementptr inbounds float* %tmp144, i64 1
- %tmp146 = getelementptr inbounds float* %tmp145, i64 1
- %tmp147 = getelementptr inbounds float* %tmp146, i64 1
- %tmp148 = getelementptr inbounds float* %tmp147, i64 1
- %tmp149 = getelementptr inbounds float* %tmp148, i64 1
- %tmp150 = getelementptr inbounds float* %tmp149, i64 1
- %tmp151 = getelementptr inbounds float* %tmp150, i64 1
- %tmp152 = getelementptr inbounds float* %tmp151, i64 1
- %tmp153 = getelementptr inbounds float* %tmp152, i64 1
- %tmp154 = getelementptr inbounds float* %tmp153, i64 1
- %tmp155 = getelementptr inbounds float* %tmp154, i64 1
- %tmp156 = getelementptr inbounds float* %tmp155, i64 1
- %tmp157 = getelementptr inbounds float* %tmp156, i64 1
- %tmp158 = getelementptr inbounds float* %tmp157, i64 1
- %tmp159 = getelementptr inbounds float* %tmp158, i64 1
- %tmp160 = getelementptr inbounds float* %tmp159, i64 1
- %tmp161 = getelementptr inbounds float* %tmp160, i64 1
- %tmp162 = getelementptr inbounds float* %tmp161, i64 1
- %tmp163 = getelementptr inbounds float* %tmp162, i64 1
- %tmp164 = getelementptr inbounds float* %tmp163, i64 1
- %tmp165 = getelementptr inbounds float* %tmp164, i64 1
- %tmp166 = getelementptr inbounds float* %tmp165, i64 1
- %tmp167 = getelementptr inbounds float* %tmp166, i64 1
- %tmp168 = getelementptr inbounds float* %tmp167, i64 1
- %tmp169 = getelementptr inbounds float* %tmp168, i64 1
- %tmp170 = getelementptr inbounds float* %tmp169, i64 1
- %tmp171 = getelementptr inbounds float* %tmp170, i64 1
- %tmp172 = getelementptr inbounds float* %tmp171, i64 1
- %tmp173 = getelementptr inbounds float* %tmp172, i64 1
- %tmp174 = getelementptr inbounds float* %tmp173, i64 1
- %tmp175 = getelementptr inbounds float* %tmp174, i64 1
- %tmp176 = getelementptr inbounds float* %tmp175, i64 1
- %tmp177 = getelementptr inbounds float* %tmp176, i64 1
- %tmp178 = getelementptr inbounds float* %tmp177, i64 1
- %tmp179 = getelementptr inbounds float* %tmp178, i64 1
- %tmp180 = getelementptr inbounds float* %tmp179, i64 1
- %tmp181 = getelementptr inbounds float* %tmp180, i64 1
- %tmp182 = getelementptr inbounds float* %tmp181, i64 1
- %tmp183 = getelementptr inbounds float* %tmp182, i64 1
- %tmp184 = getelementptr inbounds float* %tmp183, i64 1
- %tmp185 = getelementptr inbounds float* %tmp184, i64 1
- %tmp186 = getelementptr inbounds float* %tmp185, i64 1
- %tmp187 = getelementptr inbounds float* %tmp186, i64 1
- %tmp188 = getelementptr inbounds float* %tmp187, i64 1
- %tmp189 = getelementptr inbounds float* %tmp188, i64 1
- %tmp190 = getelementptr inbounds float* %tmp189, i64 1
- %tmp191 = getelementptr inbounds float* %tmp190, i64 1
- %tmp192 = getelementptr inbounds float* %tmp191, i64 1
- %tmp193 = getelementptr inbounds float* %tmp192, i64 1
- %tmp194 = getelementptr inbounds float* %tmp193, i64 1
- %tmp195 = getelementptr inbounds float* %tmp194, i64 1
- %tmp196 = getelementptr inbounds float* %tmp195, i64 1
- %tmp197 = getelementptr inbounds float* %tmp196, i64 1
- %tmp198 = getelementptr inbounds float* %tmp197, i64 1
- %tmp199 = getelementptr inbounds float* %tmp198, i64 1
- %tmp200 = getelementptr inbounds float* %tmp199, i64 1
- %tmp201 = getelementptr inbounds float* %tmp200, i64 1
- %tmp202 = getelementptr inbounds float* %tmp201, i64 1
- %tmp203 = getelementptr inbounds float* %tmp202, i64 1
- %tmp204 = getelementptr inbounds float* %tmp203, i64 1
- %tmp205 = getelementptr inbounds float* %tmp204, i64 1
- %tmp206 = getelementptr inbounds float* %tmp205, i64 1
- %tmp207 = getelementptr inbounds float* %tmp206, i64 1
- %tmp208 = getelementptr inbounds float* %tmp207, i64 1
- %tmp209 = getelementptr inbounds float* %tmp208, i64 1
- %tmp210 = getelementptr inbounds float* %tmp209, i64 1
- %tmp211 = getelementptr inbounds float* %tmp210, i64 1
- %tmp212 = getelementptr inbounds float* %tmp211, i64 1
- %tmp213 = getelementptr inbounds float* %tmp212, i64 1
- %tmp214 = getelementptr inbounds float* %tmp213, i64 1
- %tmp215 = getelementptr inbounds float* %tmp214, i64 1
- %tmp216 = getelementptr inbounds float* %tmp215, i64 1
- %tmp217 = getelementptr inbounds float* %tmp216, i64 1
- %tmp218 = getelementptr inbounds float* %tmp217, i64 1
- %tmp219 = getelementptr inbounds float* %tmp218, i64 1
- %tmp220 = getelementptr inbounds float* %tmp219, i64 1
- %tmp221 = getelementptr inbounds float* %tmp220, i64 1
- %tmp222 = getelementptr inbounds float* %tmp221, i64 1
- %tmp223 = getelementptr inbounds float* %tmp222, i64 1
- %tmp224 = getelementptr inbounds float* %tmp223, i64 1
- %tmp225 = getelementptr inbounds float* %tmp224, i64 1
- %tmp226 = getelementptr inbounds float* %tmp225, i64 1
- %tmp227 = getelementptr inbounds float* %tmp226, i64 1
- %tmp228 = getelementptr inbounds float* %tmp227, i64 1
- %tmp229 = getelementptr inbounds float* %tmp228, i64 1
- %tmp230 = getelementptr inbounds float* %tmp229, i64 1
- %tmp231 = getelementptr inbounds float* %tmp230, i64 1
- %tmp232 = getelementptr inbounds float* %tmp231, i64 1
- %tmp233 = getelementptr inbounds float* %tmp232, i64 1
- %tmp234 = getelementptr inbounds float* %tmp233, i64 1
- %tmp235 = getelementptr inbounds float* %tmp234, i64 1
- %tmp236 = getelementptr inbounds float* %tmp235, i64 1
- %tmp237 = getelementptr inbounds float* %tmp236, i64 1
- %tmp238 = getelementptr inbounds float* %tmp237, i64 1
- %tmp239 = getelementptr inbounds float* %tmp238, i64 1
- %tmp240 = getelementptr inbounds float* %tmp239, i64 1
- %tmp241 = getelementptr inbounds float* %tmp240, i64 1
- %tmp242 = getelementptr inbounds float* %tmp241, i64 1
- %tmp243 = getelementptr inbounds float* %tmp242, i64 1
- %tmp244 = getelementptr inbounds float* %tmp243, i64 1
- %tmp245 = getelementptr inbounds float* %tmp244, i64 1
- %tmp246 = getelementptr inbounds float* %tmp245, i64 1
- %tmp247 = getelementptr inbounds float* %tmp246, i64 1
- %tmp248 = getelementptr inbounds float* %tmp247, i64 1
- %tmp249 = getelementptr inbounds float* %tmp248, i64 1
- %tmp250 = getelementptr inbounds float* %tmp249, i64 1
- %tmp251 = getelementptr inbounds float* %tmp250, i64 1
- %tmp252 = getelementptr inbounds float* %tmp251, i64 1
- %tmp253 = getelementptr inbounds float* %tmp252, i64 1
- %tmp254 = getelementptr inbounds float* %tmp253, i64 1
- %tmp255 = getelementptr inbounds float* %tmp254, i64 1
- %tmp256 = getelementptr inbounds float* %tmp255, i64 1
- %tmp257 = getelementptr inbounds float* %tmp256, i64 1
- %tmp258 = getelementptr inbounds float* %tmp257, i64 1
- %tmp259 = getelementptr inbounds float* %tmp258, i64 1
- %tmp260 = getelementptr inbounds float* %tmp259, i64 1
- %tmp261 = getelementptr inbounds float* %tmp260, i64 1
- %tmp262 = getelementptr inbounds float* %tmp261, i64 1
- %tmp263 = getelementptr inbounds float* %tmp262, i64 1
- %tmp264 = getelementptr inbounds float* %tmp263, i64 1
- %tmp265 = getelementptr inbounds float* %tmp264, i64 1
- %tmp266 = getelementptr inbounds float* %tmp265, i64 1
- %tmp267 = getelementptr inbounds float* %tmp266, i64 1
- %tmp268 = getelementptr inbounds float* %tmp267, i64 1
- %tmp269 = getelementptr inbounds float* %tmp268, i64 1
- %tmp270 = getelementptr inbounds float* %tmp269, i64 1
- %tmp271 = getelementptr inbounds float* %tmp270, i64 1
- %tmp272 = getelementptr inbounds float* %tmp271, i64 1
- %tmp273 = getelementptr inbounds float* %tmp272, i64 1
- %tmp274 = getelementptr inbounds float* %tmp273, i64 1
- %tmp275 = getelementptr inbounds float* %tmp274, i64 1
- %tmp276 = getelementptr inbounds float* %tmp275, i64 1
- %tmp277 = getelementptr inbounds float* %tmp276, i64 1
- %tmp278 = getelementptr inbounds float* %tmp277, i64 1
- %tmp279 = getelementptr inbounds float* %tmp278, i64 1
- %tmp280 = getelementptr inbounds float* %tmp279, i64 1
- %tmp281 = getelementptr inbounds float* %tmp280, i64 1
- %tmp282 = getelementptr inbounds float* %tmp281, i64 1
- %tmp283 = getelementptr inbounds float* %tmp282, i64 1
- %tmp284 = getelementptr inbounds float* %tmp283, i64 1
- %tmp285 = getelementptr inbounds float* %tmp284, i64 1
- %tmp286 = getelementptr inbounds float* %tmp285, i64 1
- %tmp287 = getelementptr inbounds float* %tmp286, i64 1
- %tmp288 = getelementptr inbounds float* %tmp287, i64 1
- %tmp289 = getelementptr inbounds float* %tmp288, i64 1
- %tmp290 = getelementptr inbounds float* %tmp289, i64 1
- %tmp291 = getelementptr inbounds float* %tmp290, i64 1
- %tmp292 = getelementptr inbounds float* %tmp291, i64 1
- %tmp293 = getelementptr inbounds float* %tmp292, i64 1
- %tmp294 = getelementptr inbounds float* %tmp293, i64 1
- %tmp295 = getelementptr inbounds float* %tmp294, i64 1
- %tmp296 = getelementptr inbounds float* %tmp295, i64 1
- %tmp297 = getelementptr inbounds float* %tmp296, i64 1
- %tmp298 = getelementptr inbounds float* %tmp297, i64 1
- %tmp299 = getelementptr inbounds float* %tmp298, i64 1
- %tmp300 = getelementptr inbounds float* %tmp299, i64 1
- %tmp301 = getelementptr inbounds float* %tmp300, i64 1
- %tmp302 = getelementptr inbounds float* %tmp301, i64 1
- %tmp303 = getelementptr inbounds float* %tmp302, i64 1
- %tmp304 = getelementptr inbounds float* %tmp303, i64 1
- %tmp305 = getelementptr inbounds float* %tmp304, i64 1
- %tmp306 = getelementptr inbounds float* %tmp305, i64 1
- %tmp307 = getelementptr inbounds float* %tmp306, i64 1
- %tmp308 = getelementptr inbounds float* %tmp307, i64 1
- %tmp309 = getelementptr inbounds float* %tmp308, i64 1
- %tmp310 = getelementptr inbounds float* %tmp309, i64 1
- %tmp311 = getelementptr inbounds float* %tmp310, i64 1
- %tmp312 = getelementptr inbounds float* %tmp311, i64 1
- %tmp313 = getelementptr inbounds float* %tmp312, i64 1
- %tmp314 = getelementptr inbounds float* %tmp313, i64 1
- %tmp315 = getelementptr inbounds float* %tmp314, i64 1
- %tmp316 = getelementptr inbounds float* %tmp315, i64 1
- %tmp317 = getelementptr inbounds float* %tmp316, i64 1
- %tmp318 = getelementptr inbounds float* %tmp317, i64 1
- %tmp319 = getelementptr inbounds float* %tmp318, i64 1
- %tmp320 = getelementptr inbounds float* %tmp319, i64 1
- %tmp321 = getelementptr inbounds float* %tmp320, i64 1
- %tmp322 = getelementptr inbounds float* %tmp321, i64 1
- %tmp323 = getelementptr inbounds float* %tmp322, i64 1
- %tmp324 = getelementptr inbounds float* %tmp323, i64 1
- %tmp325 = getelementptr inbounds float* %tmp324, i64 1
- %tmp326 = getelementptr inbounds float* %tmp325, i64 1
- %tmp327 = getelementptr inbounds float* %tmp326, i64 1
- %tmp328 = getelementptr inbounds float* %tmp327, i64 1
- %tmp329 = getelementptr inbounds float* %tmp328, i64 1
- %tmp330 = getelementptr inbounds float* %tmp329, i64 1
- %tmp331 = getelementptr inbounds float* %tmp330, i64 1
- %tmp332 = getelementptr inbounds float* %tmp331, i64 1
- %tmp333 = getelementptr inbounds float* %tmp332, i64 1
- %tmp334 = getelementptr inbounds float* %tmp333, i64 1
- %tmp335 = getelementptr inbounds float* %tmp334, i64 1
- %tmp336 = getelementptr inbounds float* %tmp335, i64 1
- %tmp337 = getelementptr inbounds float* %tmp336, i64 1
- %tmp338 = getelementptr inbounds float* %tmp337, i64 1
- %tmp339 = getelementptr inbounds float* %tmp338, i64 1
- %tmp340 = getelementptr inbounds float* %tmp339, i64 1
- %tmp341 = getelementptr inbounds float* %tmp340, i64 1
- %tmp342 = getelementptr inbounds float* %tmp341, i64 1
- %tmp343 = getelementptr inbounds float* %tmp342, i64 1
- %tmp344 = getelementptr inbounds float* %tmp343, i64 1
- %tmp345 = getelementptr inbounds float* %tmp344, i64 1
- %tmp346 = getelementptr inbounds float* %tmp345, i64 1
- %tmp347 = getelementptr inbounds float* %tmp346, i64 1
- %tmp348 = getelementptr inbounds float* %tmp347, i64 1
- %tmp349 = getelementptr inbounds float* %tmp348, i64 1
- %tmp350 = getelementptr inbounds float* %tmp349, i64 1
- %tmp351 = getelementptr inbounds float* %tmp350, i64 1
- %tmp352 = getelementptr inbounds float* %tmp351, i64 1
- %tmp353 = getelementptr inbounds float* %tmp352, i64 1
- %tmp354 = getelementptr inbounds float* %tmp353, i64 1
- %tmp355 = getelementptr inbounds float* %tmp354, i64 1
- %tmp356 = getelementptr inbounds float* %tmp355, i64 1
- %tmp357 = getelementptr inbounds float* %tmp356, i64 1
- %tmp358 = getelementptr inbounds float* %tmp357, i64 1
- %tmp359 = getelementptr inbounds float* %tmp358, i64 1
- %tmp360 = getelementptr inbounds float* %tmp359, i64 1
- %tmp361 = getelementptr inbounds float* %tmp360, i64 1
- %tmp362 = getelementptr inbounds float* %tmp361, i64 1
- %tmp363 = getelementptr inbounds float* %tmp362, i64 1
- %tmp364 = getelementptr inbounds float* %tmp363, i64 1
- %tmp365 = getelementptr inbounds float* %tmp364, i64 1
- %tmp366 = getelementptr inbounds float* %tmp365, i64 1
- %tmp367 = getelementptr inbounds float* %tmp366, i64 1
- %tmp368 = getelementptr inbounds float* %tmp367, i64 1
- %tmp369 = getelementptr inbounds float* %tmp368, i64 1
- %tmp370 = getelementptr inbounds float* %tmp369, i64 1
- %tmp371 = getelementptr inbounds float* %tmp370, i64 1
- %tmp372 = getelementptr inbounds float* %tmp371, i64 1
- %tmp373 = getelementptr inbounds float* %tmp372, i64 1
- %tmp374 = getelementptr inbounds float* %tmp373, i64 1
- %tmp375 = getelementptr inbounds float* %tmp374, i64 1
- %tmp376 = getelementptr inbounds float* %tmp375, i64 1
- %tmp377 = getelementptr inbounds float* %tmp376, i64 1
- %tmp378 = getelementptr inbounds float* %tmp377, i64 1
- %tmp379 = getelementptr inbounds float* %tmp378, i64 1
- %tmp380 = getelementptr inbounds float* %tmp379, i64 1
- %tmp381 = getelementptr inbounds float* %tmp380, i64 1
- %tmp382 = getelementptr inbounds float* %tmp381, i64 1
- %tmp383 = getelementptr inbounds float* %tmp382, i64 1
- %tmp384 = getelementptr inbounds float* %tmp383, i64 1
- %tmp385 = getelementptr inbounds float* %tmp384, i64 1
- %tmp386 = getelementptr inbounds float* %tmp385, i64 1
- %tmp387 = getelementptr inbounds float* %tmp386, i64 1
- %tmp388 = getelementptr inbounds float* %tmp387, i64 1
- %tmp389 = getelementptr inbounds float* %tmp388, i64 1
- %tmp390 = getelementptr inbounds float* %tmp389, i64 1
- %tmp391 = getelementptr inbounds float* %tmp390, i64 1
- %tmp392 = getelementptr inbounds float* %tmp391, i64 1
- %tmp393 = getelementptr inbounds float* %tmp392, i64 1
- %tmp394 = getelementptr inbounds float* %tmp393, i64 1
- %tmp395 = getelementptr inbounds float* %tmp394, i64 1
- %tmp396 = getelementptr inbounds float* %tmp395, i64 1
- %tmp397 = getelementptr inbounds float* %tmp396, i64 1
- %tmp398 = getelementptr inbounds float* %tmp397, i64 1
- %tmp399 = getelementptr inbounds float* %tmp398, i64 1
- %tmp400 = getelementptr inbounds float* %tmp399, i64 1
- %tmp401 = getelementptr inbounds float* %tmp400, i64 1
- %tmp402 = getelementptr inbounds float* %tmp401, i64 1
- %tmp403 = getelementptr inbounds float* %tmp402, i64 1
- %tmp404 = getelementptr inbounds float* %tmp403, i64 1
- %tmp405 = getelementptr inbounds float* %tmp404, i64 1
- %tmp406 = getelementptr inbounds float* %tmp405, i64 1
- %tmp407 = getelementptr inbounds float* %tmp406, i64 1
- %tmp408 = getelementptr inbounds float* %tmp407, i64 1
- %tmp409 = getelementptr inbounds float* %tmp408, i64 1
- %tmp410 = getelementptr inbounds float* %tmp409, i64 1
- %tmp411 = getelementptr inbounds float* %tmp410, i64 1
- %tmp412 = getelementptr inbounds float* %tmp411, i64 1
- %tmp413 = getelementptr inbounds float* %tmp412, i64 1
- %tmp414 = getelementptr inbounds float* %tmp413, i64 1
- %tmp415 = getelementptr inbounds float* %tmp414, i64 1
- %tmp416 = getelementptr inbounds float* %tmp415, i64 1
- %tmp417 = getelementptr inbounds float* %tmp416, i64 1
- %tmp418 = getelementptr inbounds float* %tmp417, i64 1
- %tmp419 = getelementptr inbounds float* %tmp418, i64 1
- %tmp420 = getelementptr inbounds float* %tmp419, i64 1
- %tmp421 = getelementptr inbounds float* %tmp420, i64 1
- %tmp422 = getelementptr inbounds float* %tmp421, i64 1
- %tmp423 = getelementptr inbounds float* %tmp422, i64 1
- %tmp424 = getelementptr inbounds float* %tmp423, i64 1
- %tmp425 = getelementptr inbounds float* %tmp424, i64 1
- %tmp426 = getelementptr inbounds float* %tmp425, i64 1
- %tmp427 = getelementptr inbounds float* %tmp426, i64 1
- %tmp428 = getelementptr inbounds float* %tmp427, i64 1
- %tmp429 = getelementptr inbounds float* %tmp428, i64 1
- %tmp430 = getelementptr inbounds float* %tmp429, i64 1
- %tmp431 = getelementptr inbounds float* %tmp430, i64 1
- %tmp432 = getelementptr inbounds float* %tmp431, i64 1
- %tmp433 = getelementptr inbounds float* %tmp432, i64 1
- %tmp434 = getelementptr inbounds float* %tmp433, i64 1
- %tmp435 = getelementptr inbounds float* %tmp434, i64 1
- %tmp436 = getelementptr inbounds float* %tmp435, i64 1
- %tmp437 = getelementptr inbounds float* %tmp436, i64 1
- %tmp438 = getelementptr inbounds float* %tmp437, i64 1
- %tmp439 = getelementptr inbounds float* %tmp438, i64 1
- %tmp440 = getelementptr inbounds float* %tmp439, i64 1
- %tmp441 = getelementptr inbounds float* %tmp440, i64 1
- %tmp442 = getelementptr inbounds float* %tmp441, i64 1
- %tmp443 = getelementptr inbounds float* %tmp442, i64 1
- %tmp444 = getelementptr inbounds float* %tmp443, i64 1
- %tmp445 = getelementptr inbounds float* %tmp444, i64 1
- %tmp446 = getelementptr inbounds float* %tmp445, i64 1
- %tmp447 = getelementptr inbounds float* %tmp446, i64 1
- %tmp448 = getelementptr inbounds float* %tmp447, i64 1
- %tmp449 = getelementptr inbounds float* %tmp448, i64 1
- %tmp450 = getelementptr inbounds float* %tmp449, i64 1
- %tmp451 = getelementptr inbounds float* %tmp450, i64 1
- %tmp452 = getelementptr inbounds float* %tmp451, i64 1
- %tmp453 = getelementptr inbounds float* %tmp452, i64 1
- %tmp454 = getelementptr inbounds float* %tmp453, i64 1
- %tmp455 = getelementptr inbounds float* %tmp454, i64 1
- %tmp456 = getelementptr inbounds float* %tmp455, i64 1
- %tmp457 = getelementptr inbounds float* %tmp456, i64 1
- %tmp458 = getelementptr inbounds float* %tmp457, i64 1
- %tmp459 = getelementptr inbounds float* %tmp458, i64 1
- %tmp460 = getelementptr inbounds float* %tmp459, i64 1
- %tmp461 = getelementptr inbounds float* %tmp460, i64 1
- %tmp462 = getelementptr inbounds float* %tmp461, i64 1
- %tmp463 = getelementptr inbounds float* %tmp462, i64 1
- %tmp464 = getelementptr inbounds float* %tmp463, i64 1
- %tmp465 = getelementptr inbounds float* %tmp464, i64 1
- %tmp466 = getelementptr inbounds float* %tmp465, i64 1
- %tmp467 = getelementptr inbounds float* %tmp466, i64 1
- %tmp468 = getelementptr inbounds float* %tmp467, i64 1
- %tmp469 = getelementptr inbounds float* %tmp468, i64 1
- %tmp470 = getelementptr inbounds float* %tmp469, i64 1
- %tmp471 = getelementptr inbounds float* %tmp470, i64 1
- %tmp472 = getelementptr inbounds float* %tmp471, i64 1
- %tmp473 = getelementptr inbounds float* %tmp472, i64 1
- %tmp474 = getelementptr inbounds float* %tmp473, i64 1
- %tmp475 = getelementptr inbounds float* %tmp474, i64 1
- %tmp476 = getelementptr inbounds float* %tmp475, i64 1
- %tmp477 = getelementptr inbounds float* %tmp476, i64 1
- %tmp478 = getelementptr inbounds float* %tmp477, i64 1
- %tmp479 = getelementptr inbounds float* %tmp478, i64 1
- %tmp480 = getelementptr inbounds float* %tmp479, i64 1
- %tmp481 = getelementptr inbounds float* %tmp480, i64 1
- %tmp482 = getelementptr inbounds float* %tmp481, i64 1
- %tmp483 = getelementptr inbounds float* %tmp482, i64 1
- %tmp484 = getelementptr inbounds float* %tmp483, i64 1
- %tmp485 = getelementptr inbounds float* %tmp484, i64 1
- %tmp486 = getelementptr inbounds float* %tmp485, i64 1
- %tmp487 = getelementptr inbounds float* %tmp486, i64 1
- %tmp488 = getelementptr inbounds float* %tmp487, i64 1
- %tmp489 = getelementptr inbounds float* %tmp488, i64 1
- %tmp490 = getelementptr inbounds float* %tmp489, i64 1
- %tmp491 = getelementptr inbounds float* %tmp490, i64 1
- %tmp492 = getelementptr inbounds float* %tmp491, i64 1
- %tmp493 = getelementptr inbounds float* %tmp492, i64 1
- %tmp494 = getelementptr inbounds float* %tmp493, i64 1
- %tmp495 = getelementptr inbounds float* %tmp494, i64 1
- %tmp496 = getelementptr inbounds float* %tmp495, i64 1
- %tmp497 = getelementptr inbounds float* %tmp496, i64 1
- %tmp498 = getelementptr inbounds float* %tmp497, i64 1
- %tmp499 = getelementptr inbounds float* %tmp498, i64 1
- %tmp500 = getelementptr inbounds float* %tmp499, i64 1
- %tmp501 = getelementptr inbounds float* %tmp500, i64 1
- %tmp502 = getelementptr inbounds float* %tmp501, i64 1
- %tmp503 = getelementptr inbounds float* %tmp502, i64 1
- %tmp504 = getelementptr inbounds float* %tmp503, i64 1
- %tmp505 = getelementptr inbounds float* %tmp504, i64 1
- %tmp506 = getelementptr inbounds float* %tmp505, i64 1
- %tmp507 = getelementptr inbounds float* %tmp506, i64 1
- %tmp508 = getelementptr inbounds float* %tmp507, i64 1
- %tmp509 = getelementptr inbounds float* %tmp508, i64 1
- %tmp510 = getelementptr inbounds float* %tmp509, i64 1
- %tmp511 = getelementptr inbounds float* %tmp510, i64 1
- %tmp512 = getelementptr inbounds float* %tmp511, i64 1
- %tmp513 = getelementptr inbounds float* %tmp512, i64 1
- %tmp514 = getelementptr inbounds float* %tmp513, i64 1
- %tmp515 = getelementptr inbounds float* %tmp514, i64 1
- %tmp516 = getelementptr inbounds float* %tmp515, i64 1
- %tmp517 = getelementptr inbounds float* %tmp516, i64 1
- %tmp518 = getelementptr inbounds float* %tmp517, i64 1
- %tmp519 = getelementptr inbounds float* %tmp518, i64 1
- %tmp520 = getelementptr inbounds float* %tmp519, i64 1
- %tmp521 = getelementptr inbounds float* %tmp520, i64 1
- %tmp522 = getelementptr inbounds float* %tmp521, i64 1
- %tmp523 = getelementptr inbounds float* %tmp522, i64 1
- %tmp524 = getelementptr inbounds float* %tmp523, i64 1
- %tmp525 = getelementptr inbounds float* %tmp524, i64 1
- %tmp526 = getelementptr inbounds float* %tmp525, i64 1
- %tmp527 = getelementptr inbounds float* %tmp526, i64 1
- %tmp528 = getelementptr inbounds float* %tmp527, i64 1
- %tmp529 = getelementptr inbounds float* %tmp528, i64 1
- %tmp530 = getelementptr inbounds float* %tmp529, i64 1
- %tmp531 = getelementptr inbounds float* %tmp530, i64 1
- %tmp532 = getelementptr inbounds float* %tmp531, i64 1
- %tmp533 = getelementptr inbounds float* %tmp532, i64 1
- %tmp534 = getelementptr inbounds float* %tmp533, i64 1
- %tmp535 = getelementptr inbounds float* %tmp534, i64 1
- %tmp536 = getelementptr inbounds float* %tmp535, i64 1
- %tmp537 = getelementptr inbounds float* %tmp536, i64 1
- %tmp538 = getelementptr inbounds float* %tmp537, i64 1
- %tmp539 = getelementptr inbounds float* %tmp538, i64 1
- %tmp540 = getelementptr inbounds float* %tmp539, i64 1
- %tmp541 = getelementptr inbounds float* %tmp540, i64 1
- %tmp542 = getelementptr inbounds float* %tmp541, i64 1
- %tmp543 = getelementptr inbounds float* %tmp542, i64 1
- %tmp544 = getelementptr inbounds float* %tmp543, i64 1
- %tmp545 = getelementptr inbounds float* %tmp544, i64 1
- %tmp546 = getelementptr inbounds float* %tmp545, i64 1
- %tmp547 = getelementptr inbounds float* %tmp546, i64 1
- %tmp548 = getelementptr inbounds float* %tmp547, i64 1
- %tmp549 = getelementptr inbounds float* %tmp548, i64 1
- %tmp550 = getelementptr inbounds float* %tmp549, i64 1
- %tmp551 = getelementptr inbounds float* %tmp550, i64 1
- %tmp552 = getelementptr inbounds float* %tmp551, i64 1
- %tmp553 = getelementptr inbounds float* %tmp552, i64 1
- %tmp554 = getelementptr inbounds float* %tmp553, i64 1
- %tmp555 = getelementptr inbounds float* %tmp554, i64 1
- %tmp556 = getelementptr inbounds float* %tmp555, i64 1
- %tmp557 = getelementptr inbounds float* %tmp556, i64 1
- %tmp558 = getelementptr inbounds float* %tmp557, i64 1
- %tmp559 = getelementptr inbounds float* %tmp558, i64 1
- %tmp560 = getelementptr inbounds float* %tmp559, i64 1
- %tmp561 = getelementptr inbounds float* %tmp560, i64 1
- %tmp562 = getelementptr inbounds float* %tmp561, i64 1
- %tmp563 = getelementptr inbounds float* %tmp562, i64 1
- %tmp564 = getelementptr inbounds float* %tmp563, i64 1
- %tmp565 = getelementptr inbounds float* %tmp564, i64 1
- %tmp566 = getelementptr inbounds float* %tmp565, i64 1
- %tmp567 = getelementptr inbounds float* %tmp566, i64 1
- %tmp568 = getelementptr inbounds float* %tmp567, i64 1
- %tmp569 = getelementptr inbounds float* %tmp568, i64 1
- %tmp570 = getelementptr inbounds float* %tmp569, i64 1
- %tmp571 = getelementptr inbounds float* %tmp570, i64 1
- %tmp572 = getelementptr inbounds float* %tmp571, i64 1
- %tmp573 = getelementptr inbounds float* %tmp572, i64 1
- %tmp574 = getelementptr inbounds float* %tmp573, i64 1
- %tmp575 = getelementptr inbounds float* %tmp574, i64 1
- %tmp576 = getelementptr inbounds float* %tmp575, i64 1
- %tmp577 = getelementptr inbounds float* %tmp576, i64 1
- %tmp578 = getelementptr inbounds float* %tmp577, i64 1
- %tmp579 = getelementptr inbounds float* %tmp578, i64 1
- %tmp580 = getelementptr inbounds float* %tmp579, i64 1
- %tmp581 = getelementptr inbounds float* %tmp580, i64 1
- %tmp582 = getelementptr inbounds float* %tmp581, i64 1
- %tmp583 = getelementptr inbounds float* %tmp582, i64 1
- %tmp584 = getelementptr inbounds float* %tmp583, i64 1
- %tmp585 = getelementptr inbounds float* %tmp584, i64 1
- %tmp586 = getelementptr inbounds float* %tmp585, i64 1
- %tmp587 = getelementptr inbounds float* %tmp586, i64 1
- %tmp588 = getelementptr inbounds float* %tmp587, i64 1
- %tmp589 = getelementptr inbounds float* %tmp588, i64 1
- %tmp590 = getelementptr inbounds float* %tmp589, i64 1
- %tmp591 = getelementptr inbounds float* %tmp590, i64 1
- %tmp592 = getelementptr inbounds float* %tmp591, i64 1
- %tmp593 = getelementptr inbounds float* %tmp592, i64 1
- %tmp594 = getelementptr inbounds float* %tmp593, i64 1
- %tmp595 = getelementptr inbounds float* %tmp594, i64 1
- %tmp596 = getelementptr inbounds float* %tmp595, i64 1
- %tmp597 = getelementptr inbounds float* %tmp596, i64 1
- %tmp598 = getelementptr inbounds float* %tmp597, i64 1
- %tmp599 = getelementptr inbounds float* %tmp598, i64 1
- %tmp600 = getelementptr inbounds float* %tmp599, i64 1
- %tmp601 = getelementptr inbounds float* %tmp600, i64 1
- %tmp602 = getelementptr inbounds float* %tmp601, i64 1
- %tmp603 = getelementptr inbounds float* %tmp602, i64 1
- %tmp604 = getelementptr inbounds float* %tmp603, i64 1
- %tmp605 = getelementptr inbounds float* %tmp604, i64 1
- %tmp606 = getelementptr inbounds float* %tmp605, i64 1
- %tmp607 = getelementptr inbounds float* %tmp606, i64 1
- %tmp608 = getelementptr inbounds float* %tmp607, i64 1
- %tmp609 = getelementptr inbounds float* %tmp608, i64 1
- %tmp610 = getelementptr inbounds float* %tmp609, i64 1
- %tmp611 = getelementptr inbounds float* %tmp610, i64 1
- %tmp612 = getelementptr inbounds float* %tmp611, i64 1
- %tmp613 = getelementptr inbounds float* %tmp612, i64 1
- %tmp614 = getelementptr inbounds float* %tmp613, i64 1
- %tmp615 = getelementptr inbounds float* %tmp614, i64 1
- %tmp616 = getelementptr inbounds float* %tmp615, i64 1
- %tmp617 = getelementptr inbounds float* %tmp616, i64 1
- %tmp618 = getelementptr inbounds float* %tmp617, i64 1
- %tmp619 = getelementptr inbounds float* %tmp618, i64 1
- %tmp620 = getelementptr inbounds float* %tmp619, i64 1
- %tmp621 = getelementptr inbounds float* %tmp620, i64 1
- %tmp622 = getelementptr inbounds float* %tmp621, i64 1
- %tmp623 = getelementptr inbounds float* %tmp622, i64 1
- %tmp624 = getelementptr inbounds float* %tmp623, i64 1
- %tmp625 = getelementptr inbounds float* %tmp624, i64 1
- %tmp626 = getelementptr inbounds float* %tmp625, i64 1
- %tmp627 = getelementptr inbounds float* %tmp626, i64 1
- %tmp628 = getelementptr inbounds float* %tmp627, i64 1
- %tmp629 = getelementptr inbounds float* %tmp628, i64 1
- %tmp630 = getelementptr inbounds float* %tmp629, i64 1
- %tmp631 = getelementptr inbounds float* %tmp630, i64 1
- %tmp632 = getelementptr inbounds float* %tmp631, i64 1
- %tmp633 = getelementptr inbounds float* %tmp632, i64 1
- %tmp634 = getelementptr inbounds float* %tmp633, i64 1
- %tmp635 = getelementptr inbounds float* %tmp634, i64 1
- %tmp636 = getelementptr inbounds float* %tmp635, i64 1
- %tmp637 = getelementptr inbounds float* %tmp636, i64 1
- %tmp638 = getelementptr inbounds float* %tmp637, i64 1
- %tmp639 = getelementptr inbounds float* %tmp638, i64 1
- %tmp640 = getelementptr inbounds float* %tmp639, i64 1
- %tmp641 = getelementptr inbounds float* %tmp640, i64 1
- %tmp642 = getelementptr inbounds float* %tmp641, i64 1
- %tmp643 = getelementptr inbounds float* %tmp642, i64 1
- %tmp644 = getelementptr inbounds float* %tmp643, i64 1
- %tmp645 = getelementptr inbounds float* %tmp644, i64 1
- %tmp646 = getelementptr inbounds float* %tmp645, i64 1
- %tmp647 = getelementptr inbounds float* %tmp646, i64 1
- %tmp648 = getelementptr inbounds float* %tmp647, i64 1
- %tmp649 = getelementptr inbounds float* %tmp648, i64 1
- %tmp650 = getelementptr inbounds float* %tmp649, i64 1
- %tmp651 = getelementptr inbounds float* %tmp650, i64 1
- %tmp652 = getelementptr inbounds float* %tmp651, i64 1
- %tmp653 = getelementptr inbounds float* %tmp652, i64 1
- %tmp654 = getelementptr inbounds float* %tmp653, i64 1
- %tmp655 = getelementptr inbounds float* %tmp654, i64 1
- %tmp656 = getelementptr inbounds float* %tmp655, i64 1
- %tmp657 = getelementptr inbounds float* %tmp656, i64 1
- %tmp658 = getelementptr inbounds float* %tmp657, i64 1
- %tmp659 = getelementptr inbounds float* %tmp658, i64 1
- %tmp660 = getelementptr inbounds float* %tmp659, i64 1
- %tmp661 = getelementptr inbounds float* %tmp660, i64 1
- %tmp662 = getelementptr inbounds float* %tmp661, i64 1
- %tmp663 = getelementptr inbounds float* %tmp662, i64 1
- %tmp664 = getelementptr inbounds float* %tmp663, i64 1
- %tmp665 = getelementptr inbounds float* %tmp664, i64 1
- %tmp666 = getelementptr inbounds float* %tmp665, i64 1
- %tmp667 = getelementptr inbounds float* %tmp666, i64 1
- %tmp668 = getelementptr inbounds float* %tmp667, i64 1
- %tmp669 = getelementptr inbounds float* %tmp668, i64 1
- %tmp670 = getelementptr inbounds float* %tmp669, i64 1
- %tmp671 = getelementptr inbounds float* %tmp670, i64 1
- %tmp672 = getelementptr inbounds float* %tmp671, i64 1
- %tmp673 = getelementptr inbounds float* %tmp672, i64 1
- %tmp674 = getelementptr inbounds float* %tmp673, i64 1
- %tmp675 = getelementptr inbounds float* %tmp674, i64 1
- %tmp676 = getelementptr inbounds float* %tmp675, i64 1
- %tmp677 = getelementptr inbounds float* %tmp676, i64 1
- %tmp678 = getelementptr inbounds float* %tmp677, i64 1
- %tmp679 = getelementptr inbounds float* %tmp678, i64 1
- %tmp680 = getelementptr inbounds float* %tmp679, i64 1
- %tmp681 = getelementptr inbounds float* %tmp680, i64 1
- %tmp682 = getelementptr inbounds float* %tmp681, i64 1
- %tmp683 = getelementptr inbounds float* %tmp682, i64 1
- %tmp684 = getelementptr inbounds float* %tmp683, i64 1
- %tmp685 = getelementptr inbounds float* %tmp684, i64 1
- %tmp686 = getelementptr inbounds float* %tmp685, i64 1
- %tmp687 = getelementptr inbounds float* %tmp686, i64 1
- %tmp688 = getelementptr inbounds float* %tmp687, i64 1
- %tmp689 = getelementptr inbounds float* %tmp688, i64 1
- %tmp690 = getelementptr inbounds float* %tmp689, i64 1
- %tmp691 = getelementptr inbounds float* %tmp690, i64 1
- %tmp692 = getelementptr inbounds float* %tmp691, i64 1
- %tmp693 = getelementptr inbounds float* %tmp692, i64 1
- %tmp694 = getelementptr inbounds float* %tmp693, i64 1
- %tmp695 = getelementptr inbounds float* %tmp694, i64 1
- %tmp696 = getelementptr inbounds float* %tmp695, i64 1
- %tmp697 = getelementptr inbounds float* %tmp696, i64 1
- %tmp698 = getelementptr inbounds float* %tmp697, i64 1
- %tmp699 = getelementptr inbounds float* %tmp698, i64 1
- %tmp700 = getelementptr inbounds float* %tmp699, i64 1
- %tmp701 = getelementptr inbounds float* %tmp700, i64 1
- %tmp702 = getelementptr inbounds float* %tmp701, i64 1
- %tmp703 = getelementptr inbounds float* %tmp702, i64 1
- %tmp704 = getelementptr inbounds float* %tmp703, i64 1
- %tmp705 = getelementptr inbounds float* %tmp704, i64 1
- %tmp706 = getelementptr inbounds float* %tmp705, i64 1
- %tmp707 = getelementptr inbounds float* %tmp706, i64 1
- %tmp708 = getelementptr inbounds float* %tmp707, i64 1
- %tmp709 = getelementptr inbounds float* %tmp708, i64 1
- %tmp710 = getelementptr inbounds float* %tmp709, i64 1
- %tmp711 = getelementptr inbounds float* %tmp710, i64 1
- %tmp712 = getelementptr inbounds float* %tmp711, i64 1
- %tmp713 = getelementptr inbounds float* %tmp712, i64 1
- %tmp714 = getelementptr inbounds float* %tmp713, i64 1
- %tmp715 = getelementptr inbounds float* %tmp714, i64 1
- %tmp716 = getelementptr inbounds float* %tmp715, i64 1
- %tmp717 = getelementptr inbounds float* %tmp716, i64 1
- %tmp718 = getelementptr inbounds float* %tmp717, i64 1
- %tmp719 = getelementptr inbounds float* %tmp718, i64 1
- %tmp720 = getelementptr inbounds float* %tmp719, i64 1
- %tmp721 = getelementptr inbounds float* %tmp720, i64 1
- %tmp722 = getelementptr inbounds float* %tmp721, i64 1
- %tmp723 = getelementptr inbounds float* %tmp722, i64 1
- %tmp724 = getelementptr inbounds float* %tmp723, i64 1
- %tmp725 = getelementptr inbounds float* %tmp724, i64 1
- %tmp726 = getelementptr inbounds float* %tmp725, i64 1
- %tmp727 = getelementptr inbounds float* %tmp726, i64 1
- %tmp728 = getelementptr inbounds float* %tmp727, i64 1
- %tmp729 = getelementptr inbounds float* %tmp728, i64 1
- %tmp730 = getelementptr inbounds float* %tmp729, i64 1
- %tmp731 = getelementptr inbounds float* %tmp730, i64 1
- %tmp732 = getelementptr inbounds float* %tmp731, i64 1
- %tmp733 = getelementptr inbounds float* %tmp732, i64 1
- %tmp734 = getelementptr inbounds float* %tmp733, i64 1
- %tmp735 = getelementptr inbounds float* %tmp734, i64 1
- %tmp736 = getelementptr inbounds float* %tmp735, i64 1
- %tmp737 = getelementptr inbounds float* %tmp736, i64 1
- %tmp738 = getelementptr inbounds float* %tmp737, i64 1
- %tmp739 = getelementptr inbounds float* %tmp738, i64 1
- %tmp740 = getelementptr inbounds float* %tmp739, i64 1
- %tmp741 = getelementptr inbounds float* %tmp740, i64 1
- %tmp742 = getelementptr inbounds float* %tmp741, i64 1
- %tmp743 = getelementptr inbounds float* %tmp742, i64 1
- %tmp744 = getelementptr inbounds float* %tmp743, i64 1
- %tmp745 = getelementptr inbounds float* %tmp744, i64 1
- %tmp746 = getelementptr inbounds float* %tmp745, i64 1
- %tmp747 = getelementptr inbounds float* %tmp746, i64 1
- %tmp748 = getelementptr inbounds float* %tmp747, i64 1
- %tmp749 = getelementptr inbounds float* %tmp748, i64 1
- %tmp750 = getelementptr inbounds float* %tmp749, i64 1
- %tmp751 = getelementptr inbounds float* %tmp750, i64 1
- %tmp752 = getelementptr inbounds float* %tmp751, i64 1
- %tmp753 = getelementptr inbounds float* %tmp752, i64 1
- %tmp754 = getelementptr inbounds float* %tmp753, i64 1
- %tmp755 = getelementptr inbounds float* %tmp754, i64 1
- %tmp756 = getelementptr inbounds float* %tmp755, i64 1
- %tmp757 = getelementptr inbounds float* %tmp756, i64 1
- %tmp758 = getelementptr inbounds float* %tmp757, i64 1
- %tmp759 = getelementptr inbounds float* %tmp758, i64 1
- %tmp760 = getelementptr inbounds float* %tmp759, i64 1
- %tmp761 = getelementptr inbounds float* %tmp760, i64 1
- %tmp762 = getelementptr inbounds float* %tmp761, i64 1
- %tmp763 = getelementptr inbounds float* %tmp762, i64 1
- %tmp764 = getelementptr inbounds float* %tmp763, i64 1
- %tmp765 = getelementptr inbounds float* %tmp764, i64 1
- %tmp766 = getelementptr inbounds float* %tmp765, i64 1
- %tmp767 = getelementptr inbounds float* %tmp766, i64 1
- %tmp768 = getelementptr inbounds float* %tmp767, i64 1
- %tmp769 = getelementptr inbounds float* %tmp768, i64 1
- %tmp770 = getelementptr inbounds float* %tmp769, i64 1
- %tmp771 = getelementptr inbounds float* %tmp770, i64 1
- %tmp772 = getelementptr inbounds float* %tmp771, i64 1
- %tmp773 = getelementptr inbounds float* %tmp772, i64 1
- %tmp774 = getelementptr inbounds float* %tmp773, i64 1
- %tmp775 = getelementptr inbounds float* %tmp774, i64 1
- %tmp776 = getelementptr inbounds float* %tmp775, i64 1
- %tmp777 = getelementptr inbounds float* %tmp776, i64 1
- %tmp778 = getelementptr inbounds float* %tmp777, i64 1
- %tmp779 = getelementptr inbounds float* %tmp778, i64 1
- %tmp780 = getelementptr inbounds float* %tmp779, i64 1
- %tmp781 = getelementptr inbounds float* %tmp780, i64 1
- %tmp782 = getelementptr inbounds float* %tmp781, i64 1
- %tmp783 = getelementptr inbounds float* %tmp782, i64 1
- %tmp784 = getelementptr inbounds float* %tmp783, i64 1
- %tmp785 = getelementptr inbounds float* %tmp784, i64 1
- %tmp786 = getelementptr inbounds float* %tmp785, i64 1
- %tmp787 = getelementptr inbounds float* %tmp786, i64 1
- %tmp788 = getelementptr inbounds float* %tmp787, i64 1
- %tmp789 = getelementptr inbounds float* %tmp788, i64 1
- %tmp790 = getelementptr inbounds float* %tmp789, i64 1
- %tmp791 = getelementptr inbounds float* %tmp790, i64 1
- %tmp792 = getelementptr inbounds float* %tmp791, i64 1
- %tmp793 = getelementptr inbounds float* %tmp792, i64 1
- %tmp794 = getelementptr inbounds float* %tmp793, i64 1
- %tmp795 = getelementptr inbounds float* %tmp794, i64 1
- %tmp796 = getelementptr inbounds float* %tmp795, i64 1
- %tmp797 = getelementptr inbounds float* %tmp796, i64 1
- %tmp798 = getelementptr inbounds float* %tmp797, i64 1
- %tmp799 = getelementptr inbounds float* %tmp798, i64 1
- %tmp800 = getelementptr inbounds float* %tmp799, i64 1
- %tmp801 = getelementptr inbounds float* %tmp800, i64 1
- %tmp802 = getelementptr inbounds float* %tmp801, i64 1
- %tmp803 = getelementptr inbounds float* %tmp802, i64 1
- %tmp804 = getelementptr inbounds float* %tmp803, i64 1
- %tmp805 = getelementptr inbounds float* %tmp804, i64 1
- %tmp806 = getelementptr inbounds float* %tmp805, i64 1
- %tmp807 = getelementptr inbounds float* %tmp806, i64 1
- %tmp808 = getelementptr inbounds float* %tmp807, i64 1
- %tmp809 = getelementptr inbounds float* %tmp808, i64 1
- %tmp810 = getelementptr inbounds float* %tmp809, i64 1
- %tmp811 = getelementptr inbounds float* %tmp810, i64 1
- %tmp812 = getelementptr inbounds float* %tmp811, i64 1
- %tmp813 = getelementptr inbounds float* %tmp812, i64 1
- %tmp814 = getelementptr inbounds float* %tmp813, i64 1
- %tmp815 = getelementptr inbounds float* %tmp814, i64 1
- %tmp816 = getelementptr inbounds float* %tmp815, i64 1
- %tmp817 = getelementptr inbounds float* %tmp816, i64 1
- %tmp818 = getelementptr inbounds float* %tmp817, i64 1
- %tmp819 = getelementptr inbounds float* %tmp818, i64 1
- %tmp820 = getelementptr inbounds float* %tmp819, i64 1
- %tmp821 = getelementptr inbounds float* %tmp820, i64 1
- %tmp822 = getelementptr inbounds float* %tmp821, i64 1
- %tmp823 = getelementptr inbounds float* %tmp822, i64 1
- %tmp824 = getelementptr inbounds float* %tmp823, i64 1
- %tmp825 = getelementptr inbounds float* %tmp824, i64 1
- %tmp826 = getelementptr inbounds float* %tmp825, i64 1
- %tmp827 = getelementptr inbounds float* %tmp826, i64 1
- %tmp828 = getelementptr inbounds float* %tmp827, i64 1
- %tmp829 = getelementptr inbounds float* %tmp828, i64 1
- %tmp830 = getelementptr inbounds float* %tmp829, i64 1
- %tmp831 = getelementptr inbounds float* %tmp830, i64 1
- %tmp832 = getelementptr inbounds float* %tmp831, i64 1
- %tmp833 = getelementptr inbounds float* %tmp832, i64 1
- %tmp834 = getelementptr inbounds float* %tmp833, i64 1
- %tmp835 = getelementptr inbounds float* %tmp834, i64 1
- %tmp836 = getelementptr inbounds float* %tmp835, i64 1
- %tmp837 = getelementptr inbounds float* %tmp836, i64 1
- %tmp838 = getelementptr inbounds float* %tmp837, i64 1
- %tmp839 = getelementptr inbounds float* %tmp838, i64 1
- %tmp840 = getelementptr inbounds float* %tmp839, i64 1
- %tmp841 = getelementptr inbounds float* %tmp840, i64 1
- %tmp842 = getelementptr inbounds float* %tmp841, i64 1
- %tmp843 = getelementptr inbounds float* %tmp842, i64 1
- %tmp844 = getelementptr inbounds float* %tmp843, i64 1
- %tmp845 = getelementptr inbounds float* %tmp844, i64 1
- %tmp846 = getelementptr inbounds float* %tmp845, i64 1
- %tmp847 = getelementptr inbounds float* %tmp846, i64 1
- %tmp848 = getelementptr inbounds float* %tmp847, i64 1
- %tmp849 = getelementptr inbounds float* %tmp848, i64 1
- %tmp850 = getelementptr inbounds float* %tmp849, i64 1
- %tmp851 = getelementptr inbounds float* %tmp850, i64 1
- %tmp852 = getelementptr inbounds float* %tmp851, i64 1
- %tmp853 = getelementptr inbounds float* %tmp852, i64 1
- %tmp854 = getelementptr inbounds float* %tmp853, i64 1
- %tmp855 = getelementptr inbounds float* %tmp854, i64 1
- %tmp856 = getelementptr inbounds float* %tmp855, i64 1
- %tmp857 = getelementptr inbounds float* %tmp856, i64 1
- %tmp858 = getelementptr inbounds float* %tmp857, i64 1
- %tmp859 = getelementptr inbounds float* %tmp858, i64 1
- %tmp860 = getelementptr inbounds float* %tmp859, i64 1
- %tmp861 = getelementptr inbounds float* %tmp860, i64 1
- %tmp862 = getelementptr inbounds float* %tmp861, i64 1
- %tmp863 = getelementptr inbounds float* %tmp862, i64 1
- %tmp864 = getelementptr inbounds float* %tmp863, i64 1
- %tmp865 = getelementptr inbounds float* %tmp864, i64 1
- %tmp866 = getelementptr inbounds float* %tmp865, i64 1
- %tmp867 = getelementptr inbounds float* %tmp866, i64 1
- %tmp868 = getelementptr inbounds float* %tmp867, i64 1
- %tmp869 = getelementptr inbounds float* %tmp868, i64 1
- %tmp870 = getelementptr inbounds float* %tmp869, i64 1
- %tmp871 = getelementptr inbounds float* %tmp870, i64 1
- %tmp872 = getelementptr inbounds float* %tmp871, i64 1
- %tmp873 = getelementptr inbounds float* %tmp872, i64 1
- %tmp874 = getelementptr inbounds float* %tmp873, i64 1
- %tmp875 = getelementptr inbounds float* %tmp874, i64 1
- %tmp876 = getelementptr inbounds float* %tmp875, i64 1
- %tmp877 = getelementptr inbounds float* %tmp876, i64 1
- %tmp878 = getelementptr inbounds float* %tmp877, i64 1
- %tmp879 = getelementptr inbounds float* %tmp878, i64 1
- %tmp880 = getelementptr inbounds float* %tmp879, i64 1
- %tmp881 = getelementptr inbounds float* %tmp880, i64 1
- %tmp882 = getelementptr inbounds float* %tmp881, i64 1
- %tmp883 = getelementptr inbounds float* %tmp882, i64 1
- %tmp884 = getelementptr inbounds float* %tmp883, i64 1
- %tmp885 = getelementptr inbounds float* %tmp884, i64 1
- %tmp886 = getelementptr inbounds float* %tmp885, i64 1
- %tmp887 = getelementptr inbounds float* %tmp886, i64 1
- %tmp888 = getelementptr inbounds float* %tmp887, i64 1
- %tmp889 = getelementptr inbounds float* %tmp888, i64 1
- %tmp890 = getelementptr inbounds float* %tmp889, i64 1
- %tmp891 = getelementptr inbounds float* %tmp890, i64 1
- %tmp892 = getelementptr inbounds float* %tmp891, i64 1
- %tmp893 = getelementptr inbounds float* %tmp892, i64 1
- %tmp894 = getelementptr inbounds float* %tmp893, i64 1
- %tmp895 = getelementptr inbounds float* %tmp894, i64 1
- %tmp896 = getelementptr inbounds float* %tmp895, i64 1
- %tmp897 = getelementptr inbounds float* %tmp896, i64 1
- %tmp898 = getelementptr inbounds float* %tmp897, i64 1
- %tmp899 = getelementptr inbounds float* %tmp898, i64 1
- %tmp900 = getelementptr inbounds float* %tmp899, i64 1
- %tmp901 = getelementptr inbounds float* %tmp900, i64 1
- %tmp902 = getelementptr inbounds float* %tmp901, i64 1
- %tmp903 = getelementptr inbounds float* %tmp902, i64 1
- %tmp904 = getelementptr inbounds float* %tmp903, i64 1
- %tmp905 = getelementptr inbounds float* %tmp904, i64 1
- %tmp906 = getelementptr inbounds float* %tmp905, i64 1
- %tmp907 = getelementptr inbounds float* %tmp906, i64 1
- %tmp908 = getelementptr inbounds float* %tmp907, i64 1
- %tmp909 = getelementptr inbounds float* %tmp908, i64 1
- %tmp910 = getelementptr inbounds float* %tmp909, i64 1
- %tmp911 = getelementptr inbounds float* %tmp910, i64 1
- %tmp912 = getelementptr inbounds float* %tmp911, i64 1
- %tmp913 = getelementptr inbounds float* %tmp912, i64 1
- %tmp914 = getelementptr inbounds float* %tmp913, i64 1
- %tmp915 = getelementptr inbounds float* %tmp914, i64 1
- %tmp916 = getelementptr inbounds float* %tmp915, i64 1
- %tmp917 = getelementptr inbounds float* %tmp916, i64 1
- %tmp918 = getelementptr inbounds float* %tmp917, i64 1
- %tmp919 = getelementptr inbounds float* %tmp918, i64 1
- %tmp920 = getelementptr inbounds float* %tmp919, i64 1
- %tmp921 = getelementptr inbounds float* %tmp920, i64 1
- %tmp922 = getelementptr inbounds float* %tmp921, i64 1
- %tmp923 = getelementptr inbounds float* %tmp922, i64 1
- %tmp924 = getelementptr inbounds float* %tmp923, i64 1
- %tmp925 = getelementptr inbounds float* %tmp924, i64 1
- %tmp926 = getelementptr inbounds float* %tmp925, i64 1
- %tmp927 = getelementptr inbounds float* %tmp926, i64 1
- %tmp928 = getelementptr inbounds float* %tmp927, i64 1
- %tmp929 = getelementptr inbounds float* %tmp928, i64 1
- %tmp930 = getelementptr inbounds float* %tmp929, i64 1
- %tmp931 = getelementptr inbounds float* %tmp930, i64 1
- %tmp932 = getelementptr inbounds float* %tmp931, i64 1
- %tmp933 = getelementptr inbounds float* %tmp932, i64 1
- %tmp934 = getelementptr inbounds float* %tmp933, i64 1
- %tmp935 = getelementptr inbounds float* %tmp934, i64 1
- %tmp936 = getelementptr inbounds float* %tmp935, i64 1
- %tmp937 = getelementptr inbounds float* %tmp936, i64 1
- %tmp938 = getelementptr inbounds float* %tmp937, i64 1
- %tmp939 = getelementptr inbounds float* %tmp938, i64 1
- %tmp940 = getelementptr inbounds float* %tmp939, i64 1
- %tmp941 = getelementptr inbounds float* %tmp940, i64 1
- %tmp942 = getelementptr inbounds float* %tmp941, i64 1
- %tmp943 = getelementptr inbounds float* %tmp942, i64 1
- %tmp944 = getelementptr inbounds float* %tmp943, i64 1
- %tmp945 = getelementptr inbounds float* %tmp944, i64 1
- %tmp946 = getelementptr inbounds float* %tmp945, i64 1
- %tmp947 = getelementptr inbounds float* %tmp946, i64 1
- %tmp948 = getelementptr inbounds float* %tmp947, i64 1
- %tmp949 = getelementptr inbounds float* %tmp948, i64 1
- %tmp950 = getelementptr inbounds float* %tmp949, i64 1
- %tmp951 = getelementptr inbounds float* %tmp950, i64 1
- %tmp952 = getelementptr inbounds float* %tmp951, i64 1
- %tmp953 = getelementptr inbounds float* %tmp952, i64 1
- %tmp954 = getelementptr inbounds float* %tmp953, i64 1
- %tmp955 = getelementptr inbounds float* %tmp954, i64 1
- %tmp956 = getelementptr inbounds float* %tmp955, i64 1
- %tmp957 = getelementptr inbounds float* %tmp956, i64 1
- %tmp958 = getelementptr inbounds float* %tmp957, i64 1
- %tmp959 = getelementptr inbounds float* %tmp958, i64 1
- %tmp960 = getelementptr inbounds float* %tmp959, i64 1
- %tmp961 = getelementptr inbounds float* %tmp960, i64 1
- %tmp962 = getelementptr inbounds float* %tmp961, i64 1
- %tmp963 = getelementptr inbounds float* %tmp962, i64 1
- %tmp964 = getelementptr inbounds float* %tmp963, i64 1
- %tmp965 = getelementptr inbounds float* %tmp964, i64 1
- %tmp966 = getelementptr inbounds float* %tmp965, i64 1
- %tmp967 = getelementptr inbounds float* %tmp966, i64 1
- %tmp968 = getelementptr inbounds float* %tmp967, i64 1
- %tmp969 = getelementptr inbounds float* %tmp968, i64 1
- %tmp970 = getelementptr inbounds float* %tmp969, i64 1
- %tmp971 = getelementptr inbounds float* %tmp970, i64 1
- %tmp972 = getelementptr inbounds float* %tmp971, i64 1
- %tmp973 = getelementptr inbounds float* %tmp972, i64 1
- %tmp974 = getelementptr inbounds float* %tmp973, i64 1
- %tmp975 = getelementptr inbounds float* %tmp974, i64 1
- %tmp976 = getelementptr inbounds float* %tmp975, i64 1
- %tmp977 = getelementptr inbounds float* %tmp976, i64 1
- %tmp978 = getelementptr inbounds float* %tmp977, i64 1
- %tmp979 = getelementptr inbounds float* %tmp978, i64 1
- %tmp980 = getelementptr inbounds float* %tmp979, i64 1
- %tmp981 = getelementptr inbounds float* %tmp980, i64 1
- %tmp982 = getelementptr inbounds float* %tmp981, i64 1
- %tmp983 = getelementptr inbounds float* %tmp982, i64 1
- %tmp984 = getelementptr inbounds float* %tmp983, i64 1
- %tmp985 = getelementptr inbounds float* %tmp984, i64 1
- %tmp986 = getelementptr inbounds float* %tmp985, i64 1
- %tmp987 = getelementptr inbounds float* %tmp986, i64 1
- %tmp988 = getelementptr inbounds float* %tmp987, i64 1
- %tmp989 = getelementptr inbounds float* %tmp988, i64 1
- %tmp990 = getelementptr inbounds float* %tmp989, i64 1
- %tmp991 = getelementptr inbounds float* %tmp990, i64 1
- %tmp992 = getelementptr inbounds float* %tmp991, i64 1
- %tmp993 = getelementptr inbounds float* %tmp992, i64 1
- %tmp994 = getelementptr inbounds float* %tmp993, i64 1
- %tmp995 = getelementptr inbounds float* %tmp994, i64 1
- %tmp996 = getelementptr inbounds float* %tmp995, i64 1
- %tmp997 = getelementptr inbounds float* %tmp996, i64 1
- %tmp998 = getelementptr inbounds float* %tmp997, i64 1
- %tmp999 = getelementptr inbounds float* %tmp998, i64 1
- %tmp1000 = getelementptr inbounds float* %tmp999, i64 1
- %tmp1001 = getelementptr inbounds float* %tmp1000, i64 1
- %tmp1002 = getelementptr inbounds float* %tmp1001, i64 1
- %tmp1003 = getelementptr inbounds float* %tmp1002, i64 1
- %tmp1004 = getelementptr inbounds float* %tmp1003, i64 1
- %tmp1005 = getelementptr inbounds float* %tmp1004, i64 1
- %tmp1006 = getelementptr inbounds float* %tmp1005, i64 1
- %tmp1007 = getelementptr inbounds float* %tmp1006, i64 1
- %tmp1008 = getelementptr inbounds float* %tmp1007, i64 1
- %tmp1009 = getelementptr inbounds float* %tmp1008, i64 1
- %tmp1010 = getelementptr inbounds float* %tmp1009, i64 1
- %tmp1011 = getelementptr inbounds float* %tmp1010, i64 1
- %tmp1012 = getelementptr inbounds float* %tmp1011, i64 1
- %tmp1013 = getelementptr inbounds float* %tmp1012, i64 1
- %tmp1014 = getelementptr inbounds float* %tmp1013, i64 1
- %tmp1015 = getelementptr inbounds float* %tmp1014, i64 1
- %tmp1016 = getelementptr inbounds float* %tmp1015, i64 1
- %tmp1017 = getelementptr inbounds float* %tmp1016, i64 1
- %tmp1018 = getelementptr inbounds float* %tmp1017, i64 1
- %tmp1019 = getelementptr inbounds float* %tmp1018, i64 1
- %tmp1020 = getelementptr inbounds float* %tmp1019, i64 1
- %tmp1021 = getelementptr inbounds float* %tmp1020, i64 1
- %tmp1022 = getelementptr inbounds float* %tmp1021, i64 1
- %tmp1023 = getelementptr inbounds float* %tmp1022, i64 1
- %tmp1024 = getelementptr inbounds float* %tmp1023, i64 1
- %tmp1025 = getelementptr inbounds float* %tmp1024, i64 1
- %tmp1026 = getelementptr inbounds float* %tmp1025, i64 1
- %tmp1027 = getelementptr inbounds float* %tmp1026, i64 1
- %tmp1028 = getelementptr inbounds float* %tmp1027, i64 1
- %tmp1029 = getelementptr inbounds float* %tmp1028, i64 1
- %tmp1030 = getelementptr inbounds float* %tmp1029, i64 1
- %tmp1031 = getelementptr inbounds float* %tmp1030, i64 1
- %tmp1032 = getelementptr inbounds float* %tmp1031, i64 1
- %tmp1033 = getelementptr inbounds float* %tmp1032, i64 1
- %tmp1034 = getelementptr inbounds float* %tmp1033, i64 1
- %tmp1035 = getelementptr inbounds float* %tmp1034, i64 1
- %tmp1036 = getelementptr inbounds float* %tmp1035, i64 1
- %tmp1037 = getelementptr inbounds float* %tmp1036, i64 1
- %tmp1038 = getelementptr inbounds float* %tmp1037, i64 1
- %tmp1039 = getelementptr inbounds float* %tmp1038, i64 1
- %tmp1040 = getelementptr inbounds float* %tmp1039, i64 1
- %tmp1041 = getelementptr inbounds float* %tmp1040, i64 1
- %tmp1042 = getelementptr inbounds float* %tmp1041, i64 1
- %tmp1043 = getelementptr inbounds float* %tmp1042, i64 1
- %tmp1044 = getelementptr inbounds float* %tmp1043, i64 1
- %tmp1045 = getelementptr inbounds float* %tmp1044, i64 1
- %tmp1046 = getelementptr inbounds float* %tmp1045, i64 1
- %tmp1047 = getelementptr inbounds float* %tmp1046, i64 1
- %tmp1048 = getelementptr inbounds float* %tmp1047, i64 1
- %tmp1049 = getelementptr inbounds float* %tmp1048, i64 1
- %tmp1050 = getelementptr inbounds float* %tmp1049, i64 1
- %tmp1051 = getelementptr inbounds float* %tmp1050, i64 1
- %tmp1052 = getelementptr inbounds float* %tmp1051, i64 1
- %tmp1053 = getelementptr inbounds float* %tmp1052, i64 1
- %tmp1054 = getelementptr inbounds float* %tmp1053, i64 1
- %tmp1055 = getelementptr inbounds float* %tmp1054, i64 1
- %tmp1056 = getelementptr inbounds float* %tmp1055, i64 1
- %tmp1057 = getelementptr inbounds float* %tmp1056, i64 1
- %tmp1058 = getelementptr inbounds float* %tmp1057, i64 1
- %tmp1059 = getelementptr inbounds float* %tmp1058, i64 1
- %tmp1060 = getelementptr inbounds float* %tmp1059, i64 1
- %tmp1061 = getelementptr inbounds float* %tmp1060, i64 1
- %tmp1062 = getelementptr inbounds float* %tmp1061, i64 1
- %tmp1063 = getelementptr inbounds float* %tmp1062, i64 1
- %tmp1064 = getelementptr inbounds float* %tmp1063, i64 1
- %tmp1065 = getelementptr inbounds float* %tmp1064, i64 1
- %tmp1066 = getelementptr inbounds float* %tmp1065, i64 1
- %tmp1067 = getelementptr inbounds float* %tmp1066, i64 1
- %tmp1068 = getelementptr inbounds float* %tmp1067, i64 1
- %tmp1069 = getelementptr inbounds float* %tmp1068, i64 1
- %tmp1070 = getelementptr inbounds float* %tmp1069, i64 1
- %tmp1071 = getelementptr inbounds float* %tmp1070, i64 1
- %tmp1072 = getelementptr inbounds float* %tmp1071, i64 1
- %tmp1073 = getelementptr inbounds float* %tmp1072, i64 1
- %tmp1074 = getelementptr inbounds float* %tmp1073, i64 1
- %tmp1075 = getelementptr inbounds float* %tmp1074, i64 1
- %tmp1076 = getelementptr inbounds float* %tmp1075, i64 1
- %tmp1077 = getelementptr inbounds float* %tmp1076, i64 1
- %tmp1078 = getelementptr inbounds float* %tmp1077, i64 1
- %tmp1079 = getelementptr inbounds float* %tmp1078, i64 1
- %tmp1080 = getelementptr inbounds float* %tmp1079, i64 1
- %tmp1081 = getelementptr inbounds float* %tmp1080, i64 1
- %tmp1082 = getelementptr inbounds float* %tmp1081, i64 1
- %tmp1083 = getelementptr inbounds float* %tmp1082, i64 1
- %tmp1084 = getelementptr inbounds float* %tmp1083, i64 1
- %tmp1085 = getelementptr inbounds float* %tmp1084, i64 1
- %tmp1086 = getelementptr inbounds float* %tmp1085, i64 1
- %tmp1087 = getelementptr inbounds float* %tmp1086, i64 1
- %tmp1088 = getelementptr inbounds float* %tmp1087, i64 1
- %tmp1089 = getelementptr inbounds float* %tmp1088, i64 1
- %tmp1090 = getelementptr inbounds float* %tmp1089, i64 1
- %tmp1091 = getelementptr inbounds float* %tmp1090, i64 1
- %tmp1092 = getelementptr inbounds float* %tmp1091, i64 1
- %tmp1093 = getelementptr inbounds float* %tmp1092, i64 1
- %tmp1094 = getelementptr inbounds float* %tmp1093, i64 1
- %tmp1095 = getelementptr inbounds float* %tmp1094, i64 1
- %tmp1096 = getelementptr inbounds float* %tmp1095, i64 1
- %tmp1097 = getelementptr inbounds float* %tmp1096, i64 1
- %tmp1098 = getelementptr inbounds float* %tmp1097, i64 1
- %tmp1099 = getelementptr inbounds float* %tmp1098, i64 1
- %tmp1100 = getelementptr inbounds float* %tmp1099, i64 1
- %tmp1101 = getelementptr inbounds float* %tmp1100, i64 1
- %tmp1102 = getelementptr inbounds float* %tmp1101, i64 1
- %tmp1103 = getelementptr inbounds float* %tmp1102, i64 1
- %tmp1104 = getelementptr inbounds float* %tmp1103, i64 1
- %tmp1105 = getelementptr inbounds float* %tmp1104, i64 1
- %tmp1106 = getelementptr inbounds float* %tmp1105, i64 1
- %tmp1107 = getelementptr inbounds float* %tmp1106, i64 1
- %tmp1108 = getelementptr inbounds float* %tmp1107, i64 1
- %tmp1109 = getelementptr inbounds float* %tmp1108, i64 1
- %tmp1110 = getelementptr inbounds float* %tmp1109, i64 1
- %tmp1111 = getelementptr inbounds float* %tmp1110, i64 1
- %tmp1112 = getelementptr inbounds float* %tmp1111, i64 1
- %tmp1113 = getelementptr inbounds float* %tmp1112, i64 1
- %tmp1114 = getelementptr inbounds float* %tmp1113, i64 1
- %tmp1115 = getelementptr inbounds float* %tmp1114, i64 1
- %tmp1116 = getelementptr inbounds float* %tmp1115, i64 1
- %tmp1117 = getelementptr inbounds float* %tmp1116, i64 1
- %tmp1118 = getelementptr inbounds float* %tmp1117, i64 1
- %tmp1119 = getelementptr inbounds float* %tmp1118, i64 1
- %tmp1120 = getelementptr inbounds float* %tmp1119, i64 1
- %tmp1121 = getelementptr inbounds float* %tmp1120, i64 1
- %tmp1122 = getelementptr inbounds float* %tmp1121, i64 1
- %tmp1123 = getelementptr inbounds float* %tmp1122, i64 1
- %tmp1124 = getelementptr inbounds float* %tmp1123, i64 1
- %tmp1125 = getelementptr inbounds float* %tmp1124, i64 1
- %tmp1126 = getelementptr inbounds float* %tmp1125, i64 1
- %tmp1127 = getelementptr inbounds float* %tmp1126, i64 1
- %tmp1128 = getelementptr inbounds float* %tmp1127, i64 1
- %tmp1129 = getelementptr inbounds float* %tmp1128, i64 1
- %tmp1130 = getelementptr inbounds float* %tmp1129, i64 1
- %tmp1131 = getelementptr inbounds float* %tmp1130, i64 1
- %tmp1132 = getelementptr inbounds float* %tmp1131, i64 1
- %tmp1133 = getelementptr inbounds float* %tmp1132, i64 1
- %tmp1134 = getelementptr inbounds float* %tmp1133, i64 1
- %tmp1135 = getelementptr inbounds float* %tmp1134, i64 1
- %tmp1136 = getelementptr inbounds float* %tmp1135, i64 1
- %tmp1137 = getelementptr inbounds float* %tmp1136, i64 1
- %tmp1138 = getelementptr inbounds float* %tmp1137, i64 1
- %tmp1139 = getelementptr inbounds float* %tmp1138, i64 1
- %tmp1140 = getelementptr inbounds float* %tmp1139, i64 1
- %tmp1141 = getelementptr inbounds float* %tmp1140, i64 1
- %tmp1142 = getelementptr inbounds float* %tmp1141, i64 1
- %tmp1143 = getelementptr inbounds float* %tmp1142, i64 1
- %tmp1144 = getelementptr inbounds float* %tmp1143, i64 1
- %tmp1145 = getelementptr inbounds float* %tmp1144, i64 1
- %tmp1146 = getelementptr inbounds float* %tmp1145, i64 1
- %tmp1147 = getelementptr inbounds float* %tmp1146, i64 1
- %tmp1148 = getelementptr inbounds float* %tmp1147, i64 1
- %tmp1149 = getelementptr inbounds float* %tmp1148, i64 1
- %tmp1150 = getelementptr inbounds float* %tmp1149, i64 1
- %tmp1151 = getelementptr inbounds float* %tmp1150, i64 1
- %tmp1152 = getelementptr inbounds float* %tmp1151, i64 1
- %tmp1153 = getelementptr inbounds float* %tmp1152, i64 1
- %tmp1154 = getelementptr inbounds float* %tmp1153, i64 1
- %tmp1155 = getelementptr inbounds float* %tmp1154, i64 1
- %tmp1156 = getelementptr inbounds float* %tmp1155, i64 1
- %tmp1157 = getelementptr inbounds float* %tmp1156, i64 1
- %tmp1158 = getelementptr inbounds float* %tmp1157, i64 1
- %tmp1159 = getelementptr inbounds float* %tmp1158, i64 1
- %tmp1160 = getelementptr inbounds float* %tmp1159, i64 1
- %tmp1161 = getelementptr inbounds float* %tmp1160, i64 1
- %tmp1162 = getelementptr inbounds float* %tmp1161, i64 1
- %tmp1163 = getelementptr inbounds float* %tmp1162, i64 1
- %tmp1164 = getelementptr inbounds float* %tmp1163, i64 1
- %tmp1165 = getelementptr inbounds float* %tmp1164, i64 1
- %tmp1166 = getelementptr inbounds float* %tmp1165, i64 1
- %tmp1167 = getelementptr inbounds float* %tmp1166, i64 1
- %tmp1168 = getelementptr inbounds float* %tmp1167, i64 1
- %tmp1169 = getelementptr inbounds float* %tmp1168, i64 1
- %tmp1170 = getelementptr inbounds float* %tmp1169, i64 1
- %tmp1171 = getelementptr inbounds float* %tmp1170, i64 1
- %tmp1172 = getelementptr inbounds float* %tmp1171, i64 1
- %tmp1173 = getelementptr inbounds float* %tmp1172, i64 1
- %tmp1174 = getelementptr inbounds float* %tmp1173, i64 1
- %tmp1175 = getelementptr inbounds float* %tmp1174, i64 1
- %tmp1176 = getelementptr inbounds float* %tmp1175, i64 1
- %tmp1177 = getelementptr inbounds float* %tmp1176, i64 1
- %tmp1178 = getelementptr inbounds float* %tmp1177, i64 1
- %tmp1179 = getelementptr inbounds float* %tmp1178, i64 1
- %tmp1180 = getelementptr inbounds float* %tmp1179, i64 1
- %tmp1181 = getelementptr inbounds float* %tmp1180, i64 1
- %tmp1182 = getelementptr inbounds float* %tmp1181, i64 1
- %tmp1183 = getelementptr inbounds float* %tmp1182, i64 1
- %tmp1184 = getelementptr inbounds float* %tmp1183, i64 1
- %tmp1185 = getelementptr inbounds float* %tmp1184, i64 1
- %tmp1186 = getelementptr inbounds float* %tmp1185, i64 1
- %tmp1187 = getelementptr inbounds float* %tmp1186, i64 1
- %tmp1188 = getelementptr inbounds float* %tmp1187, i64 1
- %tmp1189 = getelementptr inbounds float* %tmp1188, i64 1
- %tmp1190 = getelementptr inbounds float* %tmp1189, i64 1
- %tmp1191 = getelementptr inbounds float* %tmp1190, i64 1
- %tmp1192 = getelementptr inbounds float* %tmp1191, i64 1
- %tmp1193 = getelementptr inbounds float* %tmp1192, i64 1
- %tmp1194 = getelementptr inbounds float* %tmp1193, i64 1
- %tmp1195 = getelementptr inbounds float* %tmp1194, i64 1
- %tmp1196 = getelementptr inbounds float* %tmp1195, i64 1
- %tmp1197 = getelementptr inbounds float* %tmp1196, i64 1
- %tmp1198 = getelementptr inbounds float* %tmp1197, i64 1
- %tmp1199 = getelementptr inbounds float* %tmp1198, i64 1
- %tmp1200 = getelementptr inbounds float* %tmp1199, i64 1
- %tmp1201 = getelementptr inbounds float* %tmp1200, i64 1
- %tmp1202 = getelementptr inbounds float* %tmp1201, i64 1
- %tmp1203 = getelementptr inbounds float* %tmp1202, i64 1
- %tmp1204 = getelementptr inbounds float* %tmp1203, i64 1
- %tmp1205 = getelementptr inbounds float* %tmp1204, i64 1
- %tmp1206 = getelementptr inbounds float* %tmp1205, i64 1
- %tmp1207 = getelementptr inbounds float* %tmp1206, i64 1
- %tmp1208 = getelementptr inbounds float* %tmp1207, i64 1
- %tmp1209 = getelementptr inbounds float* %tmp1208, i64 1
- %tmp1210 = getelementptr inbounds float* %tmp1209, i64 1
- %tmp1211 = getelementptr inbounds float* %tmp1210, i64 1
- %tmp1212 = getelementptr inbounds float* %tmp1211, i64 1
- %tmp1213 = getelementptr inbounds float* %tmp1212, i64 1
- %tmp1214 = getelementptr inbounds float* %tmp1213, i64 1
- %tmp1215 = getelementptr inbounds float* %tmp1214, i64 1
- %tmp1216 = getelementptr inbounds float* %tmp1215, i64 1
- %tmp1217 = getelementptr inbounds float* %tmp1216, i64 1
- %tmp1218 = getelementptr inbounds float* %tmp1217, i64 1
- %tmp1219 = getelementptr inbounds float* %tmp1218, i64 1
- %tmp1220 = getelementptr inbounds float* %tmp1219, i64 1
- %tmp1221 = getelementptr inbounds float* %tmp1220, i64 1
- %tmp1222 = getelementptr inbounds float* %tmp1221, i64 1
- %tmp1223 = getelementptr inbounds float* %tmp1222, i64 1
- %tmp1224 = getelementptr inbounds float* %tmp1223, i64 1
- %tmp1225 = getelementptr inbounds float* %tmp1224, i64 1
- %tmp1226 = getelementptr inbounds float* %tmp1225, i64 1
- %tmp1227 = getelementptr inbounds float* %tmp1226, i64 1
- %tmp1228 = getelementptr inbounds float* %tmp1227, i64 1
- %tmp1229 = getelementptr inbounds float* %tmp1228, i64 1
- %tmp1230 = getelementptr inbounds float* %tmp1229, i64 1
- %tmp1231 = getelementptr inbounds float* %tmp1230, i64 1
- %tmp1232 = getelementptr inbounds float* %tmp1231, i64 1
- %tmp1233 = getelementptr inbounds float* %tmp1232, i64 1
- %tmp1234 = getelementptr inbounds float* %tmp1233, i64 1
- %tmp1235 = getelementptr inbounds float* %tmp1234, i64 1
- %tmp1236 = getelementptr inbounds float* %tmp1235, i64 1
- %tmp1237 = getelementptr inbounds float* %tmp1236, i64 1
- %tmp1238 = getelementptr inbounds float* %tmp1237, i64 1
- %tmp1239 = getelementptr inbounds float* %tmp1238, i64 1
- %tmp1240 = getelementptr inbounds float* %tmp1239, i64 1
- %tmp1241 = getelementptr inbounds float* %tmp1240, i64 1
- %tmp1242 = getelementptr inbounds float* %tmp1241, i64 1
- %tmp1243 = getelementptr inbounds float* %tmp1242, i64 1
- %tmp1244 = getelementptr inbounds float* %tmp1243, i64 1
- %tmp1245 = getelementptr inbounds float* %tmp1244, i64 1
- %tmp1246 = getelementptr inbounds float* %tmp1245, i64 1
- %tmp1247 = getelementptr inbounds float* %tmp1246, i64 1
- %tmp1248 = getelementptr inbounds float* %tmp1247, i64 1
- %tmp1249 = getelementptr inbounds float* %tmp1248, i64 1
- %tmp1250 = getelementptr inbounds float* %tmp1249, i64 1
- %tmp1251 = getelementptr inbounds float* %tmp1250, i64 1
- %tmp1252 = getelementptr inbounds float* %tmp1251, i64 1
- %tmp1253 = getelementptr inbounds float* %tmp1252, i64 1
- %tmp1254 = getelementptr inbounds float* %tmp1253, i64 1
- %tmp1255 = getelementptr inbounds float* %tmp1254, i64 1
- %tmp1256 = getelementptr inbounds float* %tmp1255, i64 1
- %tmp1257 = getelementptr inbounds float* %tmp1256, i64 1
- %tmp1258 = getelementptr inbounds float* %tmp1257, i64 1
- %tmp1259 = getelementptr inbounds float* %tmp1258, i64 1
- %tmp1260 = getelementptr inbounds float* %tmp1259, i64 1
- %tmp1261 = getelementptr inbounds float* %tmp1260, i64 1
- %tmp1262 = getelementptr inbounds float* %tmp1261, i64 1
- %tmp1263 = getelementptr inbounds float* %tmp1262, i64 1
- %tmp1264 = getelementptr inbounds float* %tmp1263, i64 1
- %tmp1265 = getelementptr inbounds float* %tmp1264, i64 1
- %tmp1266 = getelementptr inbounds float* %tmp1265, i64 1
- %tmp1267 = getelementptr inbounds float* %tmp1266, i64 1
- %tmp1268 = getelementptr inbounds float* %tmp1267, i64 1
- %tmp1269 = getelementptr inbounds float* %tmp1268, i64 1
- %tmp1270 = getelementptr inbounds float* %tmp1269, i64 1
- %tmp1271 = getelementptr inbounds float* %tmp1270, i64 1
- %tmp1272 = getelementptr inbounds float* %tmp1271, i64 1
- %tmp1273 = getelementptr inbounds float* %tmp1272, i64 1
- %tmp1274 = getelementptr inbounds float* %tmp1273, i64 1
- %tmp1275 = getelementptr inbounds float* %tmp1274, i64 1
- %tmp1276 = getelementptr inbounds float* %tmp1275, i64 1
- %tmp1277 = getelementptr inbounds float* %tmp1276, i64 1
- %tmp1278 = getelementptr inbounds float* %tmp1277, i64 1
- %tmp1279 = getelementptr inbounds float* %tmp1278, i64 1
- %tmp1280 = getelementptr inbounds float* %tmp1279, i64 1
- %tmp1281 = getelementptr inbounds float* %tmp1280, i64 1
- %tmp1282 = getelementptr inbounds float* %tmp1281, i64 1
- %tmp1283 = getelementptr inbounds float* %tmp1282, i64 1
- %tmp1284 = getelementptr inbounds float* %tmp1283, i64 1
- %tmp1285 = getelementptr inbounds float* %tmp1284, i64 1
- %tmp1286 = getelementptr inbounds float* %tmp1285, i64 1
- %tmp1287 = getelementptr inbounds float* %tmp1286, i64 1
- %tmp1288 = getelementptr inbounds float* %tmp1287, i64 1
- %tmp1289 = getelementptr inbounds float* %tmp1288, i64 1
- %tmp1290 = getelementptr inbounds float* %tmp1289, i64 1
- %tmp1291 = getelementptr inbounds float* %tmp1290, i64 1
- %tmp1292 = getelementptr inbounds float* %tmp1291, i64 1
- %tmp1293 = getelementptr inbounds float* %tmp1292, i64 1
- %tmp1294 = getelementptr inbounds float* %tmp1293, i64 1
- %tmp1295 = getelementptr inbounds float* %tmp1294, i64 1
- %tmp1296 = getelementptr inbounds float* %tmp1295, i64 1
- %tmp1297 = getelementptr inbounds float* %tmp1296, i64 1
- %tmp1298 = getelementptr inbounds float* %tmp1297, i64 1
- %tmp1299 = getelementptr inbounds float* %tmp1298, i64 1
- %tmp1300 = getelementptr inbounds float* %tmp1299, i64 1
- %tmp1301 = getelementptr inbounds float* %tmp1300, i64 1
- %tmp1302 = getelementptr inbounds float* %tmp1301, i64 1
- %tmp1303 = getelementptr inbounds float* %tmp1302, i64 1
- %tmp1304 = getelementptr inbounds float* %tmp1303, i64 1
- %tmp1305 = getelementptr inbounds float* %tmp1304, i64 1
- %tmp1306 = getelementptr inbounds float* %tmp1305, i64 1
- %tmp1307 = getelementptr inbounds float* %tmp1306, i64 1
- %tmp1308 = getelementptr inbounds float* %tmp1307, i64 1
- %tmp1309 = getelementptr inbounds float* %tmp1308, i64 1
- %tmp1310 = getelementptr inbounds float* %tmp1309, i64 1
- %tmp1311 = getelementptr inbounds float* %tmp1310, i64 1
- %tmp1312 = getelementptr inbounds float* %tmp1311, i64 1
- %tmp1313 = getelementptr inbounds float* %tmp1312, i64 1
- %tmp1314 = getelementptr inbounds float* %tmp1313, i64 1
- %tmp1315 = getelementptr inbounds float* %tmp1314, i64 1
- %tmp1316 = getelementptr inbounds float* %tmp1315, i64 1
- %tmp1317 = getelementptr inbounds float* %tmp1316, i64 1
- %tmp1318 = getelementptr inbounds float* %tmp1317, i64 1
- %tmp1319 = getelementptr inbounds float* %tmp1318, i64 1
- %tmp1320 = getelementptr inbounds float* %tmp1319, i64 1
- %tmp1321 = getelementptr inbounds float* %tmp1320, i64 1
- %tmp1322 = getelementptr inbounds float* %tmp1321, i64 1
- %tmp1323 = getelementptr inbounds float* %tmp1322, i64 1
- %tmp1324 = getelementptr inbounds float* %tmp1323, i64 1
- %tmp1325 = getelementptr inbounds float* %tmp1324, i64 1
- %tmp1326 = getelementptr inbounds float* %tmp1325, i64 1
- %tmp1327 = getelementptr inbounds float* %tmp1326, i64 1
- %tmp1328 = getelementptr inbounds float* %tmp1327, i64 1
- %tmp1329 = getelementptr inbounds float* %tmp1328, i64 1
- %tmp1330 = getelementptr inbounds float* %tmp1329, i64 1
- %tmp1331 = getelementptr inbounds float* %tmp1330, i64 1
- %tmp1332 = getelementptr inbounds float* %tmp1331, i64 1
- %tmp1333 = getelementptr inbounds float* %tmp1332, i64 1
- %tmp1334 = getelementptr inbounds float* %tmp1333, i64 1
- %tmp1335 = getelementptr inbounds float* %tmp1334, i64 1
- %tmp1336 = getelementptr inbounds float* %tmp1335, i64 1
- %tmp1337 = getelementptr inbounds float* %tmp1336, i64 1
- %tmp1338 = getelementptr inbounds float* %tmp1337, i64 1
- %tmp1339 = getelementptr inbounds float* %tmp1338, i64 1
- %tmp1340 = getelementptr inbounds float* %tmp1339, i64 1
- %tmp1341 = getelementptr inbounds float* %tmp1340, i64 1
- %tmp1342 = getelementptr inbounds float* %tmp1341, i64 1
- %tmp1343 = getelementptr inbounds float* %tmp1342, i64 1
- %tmp1344 = getelementptr inbounds float* %tmp1343, i64 1
- %tmp1345 = getelementptr inbounds float* %tmp1344, i64 1
- %tmp1346 = getelementptr inbounds float* %tmp1345, i64 1
- %tmp1347 = getelementptr inbounds float* %tmp1346, i64 1
- %tmp1348 = getelementptr inbounds float* %tmp1347, i64 1
- %tmp1349 = getelementptr inbounds float* %tmp1348, i64 1
- %tmp1350 = getelementptr inbounds float* %tmp1349, i64 1
- %tmp1351 = getelementptr inbounds float* %tmp1350, i64 1
- %tmp1352 = getelementptr inbounds float* %tmp1351, i64 1
- %tmp1353 = getelementptr inbounds float* %tmp1352, i64 1
- %tmp1354 = getelementptr inbounds float* %tmp1353, i64 1
- %tmp1355 = getelementptr inbounds float* %tmp1354, i64 1
- %tmp1356 = getelementptr inbounds float* %tmp1355, i64 1
- %tmp1357 = getelementptr inbounds float* %tmp1356, i64 1
- %tmp1358 = getelementptr inbounds float* %tmp1357, i64 1
- %tmp1359 = getelementptr inbounds float* %tmp1358, i64 1
- %tmp1360 = getelementptr inbounds float* %tmp1359, i64 1
- %tmp1361 = getelementptr inbounds float* %tmp1360, i64 1
- %tmp1362 = getelementptr inbounds float* %tmp1361, i64 1
- %tmp1363 = getelementptr inbounds float* %tmp1362, i64 1
- %tmp1364 = getelementptr inbounds float* %tmp1363, i64 1
- %tmp1365 = getelementptr inbounds float* %tmp1364, i64 1
- %tmp1366 = getelementptr inbounds float* %tmp1365, i64 1
- %tmp1367 = getelementptr inbounds float* %tmp1366, i64 1
- %tmp1368 = getelementptr inbounds float* %tmp1367, i64 1
- %tmp1369 = getelementptr inbounds float* %tmp1368, i64 1
- %tmp1370 = getelementptr inbounds float* %tmp1369, i64 1
- %tmp1371 = getelementptr inbounds float* %tmp1370, i64 1
- %tmp1372 = getelementptr inbounds float* %tmp1371, i64 1
- %tmp1373 = getelementptr inbounds float* %tmp1372, i64 1
- %tmp1374 = getelementptr inbounds float* %tmp1373, i64 1
- %tmp1375 = getelementptr inbounds float* %tmp1374, i64 1
- %tmp1376 = getelementptr inbounds float* %tmp1375, i64 1
- %tmp1377 = getelementptr inbounds float* %tmp1376, i64 1
- %tmp1378 = getelementptr inbounds float* %tmp1377, i64 1
- %tmp1379 = getelementptr inbounds float* %tmp1378, i64 1
- %tmp1380 = getelementptr inbounds float* %tmp1379, i64 1
- %tmp1381 = getelementptr inbounds float* %tmp1380, i64 1
- %tmp1382 = getelementptr inbounds float* %tmp1381, i64 1
- %tmp1383 = getelementptr inbounds float* %tmp1382, i64 1
- %tmp1384 = getelementptr inbounds float* %tmp1383, i64 1
- %tmp1385 = getelementptr inbounds float* %tmp1384, i64 1
- %tmp1386 = getelementptr inbounds float* %tmp1385, i64 1
- %tmp1387 = getelementptr inbounds float* %tmp1386, i64 1
- %tmp1388 = getelementptr inbounds float* %tmp1387, i64 1
- %tmp1389 = getelementptr inbounds float* %tmp1388, i64 1
- %tmp1390 = getelementptr inbounds float* %tmp1389, i64 1
- %tmp1391 = getelementptr inbounds float* %tmp1390, i64 1
- %tmp1392 = getelementptr inbounds float* %tmp1391, i64 1
- %tmp1393 = getelementptr inbounds float* %tmp1392, i64 1
- %tmp1394 = getelementptr inbounds float* %tmp1393, i64 1
- %tmp1395 = getelementptr inbounds float* %tmp1394, i64 1
- %tmp1396 = getelementptr inbounds float* %tmp1395, i64 1
- %tmp1397 = getelementptr inbounds float* %tmp1396, i64 1
- %tmp1398 = getelementptr inbounds float* %tmp1397, i64 1
- %tmp1399 = getelementptr inbounds float* %tmp1398, i64 1
- %tmp1400 = getelementptr inbounds float* %tmp1399, i64 1
- %tmp1401 = getelementptr inbounds float* %tmp1400, i64 1
- %tmp1402 = getelementptr inbounds float* %tmp1401, i64 1
- %tmp1403 = getelementptr inbounds float* %tmp1402, i64 1
- %tmp1404 = getelementptr inbounds float* %tmp1403, i64 1
- %tmp1405 = getelementptr inbounds float* %tmp1404, i64 1
- %tmp1406 = getelementptr inbounds float* %tmp1405, i64 1
- %tmp1407 = getelementptr inbounds float* %tmp1406, i64 1
- %tmp1408 = getelementptr inbounds float* %tmp1407, i64 1
- %tmp1409 = getelementptr inbounds float* %tmp1408, i64 1
- %tmp1410 = getelementptr inbounds float* %tmp1409, i64 1
- %tmp1411 = getelementptr inbounds float* %tmp1410, i64 1
- %tmp1412 = getelementptr inbounds float* %tmp1411, i64 1
- %tmp1413 = getelementptr inbounds float* %tmp1412, i64 1
- %tmp1414 = getelementptr inbounds float* %tmp1413, i64 1
- %tmp1415 = getelementptr inbounds float* %tmp1414, i64 1
- %tmp1416 = getelementptr inbounds float* %tmp1415, i64 1
- %tmp1417 = getelementptr inbounds float* %tmp1416, i64 1
- %tmp1418 = getelementptr inbounds float* %tmp1417, i64 1
- %tmp1419 = getelementptr inbounds float* %tmp1418, i64 1
- %tmp1420 = getelementptr inbounds float* %tmp1419, i64 1
- %tmp1421 = getelementptr inbounds float* %tmp1420, i64 1
- %tmp1422 = getelementptr inbounds float* %tmp1421, i64 1
- %tmp1423 = getelementptr inbounds float* %tmp1422, i64 1
- %tmp1424 = getelementptr inbounds float* %tmp1423, i64 1
- %tmp1425 = getelementptr inbounds float* %tmp1424, i64 1
- %tmp1426 = getelementptr inbounds float* %tmp1425, i64 1
- %tmp1427 = getelementptr inbounds float* %tmp1426, i64 1
- %tmp1428 = getelementptr inbounds float* %tmp1427, i64 1
- %tmp1429 = getelementptr inbounds float* %tmp1428, i64 1
- %tmp1430 = getelementptr inbounds float* %tmp1429, i64 1
- %tmp1431 = getelementptr inbounds float* %tmp1430, i64 1
- %tmp1432 = getelementptr inbounds float* %tmp1431, i64 1
- %tmp1433 = getelementptr inbounds float* %tmp1432, i64 1
- %tmp1434 = getelementptr inbounds float* %tmp1433, i64 1
- %tmp1435 = getelementptr inbounds float* %tmp1434, i64 1
- %tmp1436 = getelementptr inbounds float* %tmp1435, i64 1
- %tmp1437 = getelementptr inbounds float* %tmp1436, i64 1
- %tmp1438 = getelementptr inbounds float* %tmp1437, i64 1
- %tmp1439 = getelementptr inbounds float* %tmp1438, i64 1
- %tmp1440 = getelementptr inbounds float* %tmp1439, i64 1
- %tmp1441 = getelementptr inbounds float* %tmp1440, i64 1
- %tmp1442 = getelementptr inbounds float* %tmp1441, i64 1
- %tmp1443 = getelementptr inbounds float* %tmp1442, i64 1
- %tmp1444 = getelementptr inbounds float* %tmp1443, i64 1
- %tmp1445 = getelementptr inbounds float* %tmp1444, i64 1
- %tmp1446 = getelementptr inbounds float* %tmp1445, i64 1
- %tmp1447 = getelementptr inbounds float* %tmp1446, i64 1
- %tmp1448 = getelementptr inbounds float* %tmp1447, i64 1
- %tmp1449 = getelementptr inbounds float* %tmp1448, i64 1
- %tmp1450 = getelementptr inbounds float* %tmp1449, i64 1
- %tmp1451 = getelementptr inbounds float* %tmp1450, i64 1
- %tmp1452 = getelementptr inbounds float* %tmp1451, i64 1
- %tmp1453 = getelementptr inbounds float* %tmp1452, i64 1
- %tmp1454 = getelementptr inbounds float* %tmp1453, i64 1
- %tmp1455 = getelementptr inbounds float* %tmp1454, i64 1
- %tmp1456 = getelementptr inbounds float* %tmp1455, i64 1
- %tmp1457 = getelementptr inbounds float* %tmp1456, i64 1
- %tmp1458 = getelementptr inbounds float* %tmp1457, i64 1
- %tmp1459 = getelementptr inbounds float* %tmp1458, i64 1
- %tmp1460 = getelementptr inbounds float* %tmp1459, i64 1
- %tmp1461 = getelementptr inbounds float* %tmp1460, i64 1
- %tmp1462 = getelementptr inbounds float* %tmp1461, i64 1
- %tmp1463 = getelementptr inbounds float* %tmp1462, i64 1
- %tmp1464 = getelementptr inbounds float* %tmp1463, i64 1
- %tmp1465 = getelementptr inbounds float* %tmp1464, i64 1
- %tmp1466 = getelementptr inbounds float* %tmp1465, i64 1
- %tmp1467 = getelementptr inbounds float* %tmp1466, i64 1
- %tmp1468 = getelementptr inbounds float* %tmp1467, i64 1
- %tmp1469 = getelementptr inbounds float* %tmp1468, i64 1
- %tmp1470 = getelementptr inbounds float* %tmp1469, i64 1
- %tmp1471 = getelementptr inbounds float* %tmp1470, i64 1
- %tmp1472 = getelementptr inbounds float* %tmp1471, i64 1
- %tmp1473 = getelementptr inbounds float* %tmp1472, i64 1
- %tmp1474 = getelementptr inbounds float* %tmp1473, i64 1
- %tmp1475 = getelementptr inbounds float* %tmp1474, i64 1
- %tmp1476 = getelementptr inbounds float* %tmp1475, i64 1
- %tmp1477 = getelementptr inbounds float* %tmp1476, i64 1
- %tmp1478 = getelementptr inbounds float* %tmp1477, i64 1
- %tmp1479 = getelementptr inbounds float* %tmp1478, i64 1
- %tmp1480 = getelementptr inbounds float* %tmp1479, i64 1
- %tmp1481 = getelementptr inbounds float* %tmp1480, i64 1
- %tmp1482 = getelementptr inbounds float* %tmp1481, i64 1
- %tmp1483 = getelementptr inbounds float* %tmp1482, i64 1
- %tmp1484 = getelementptr inbounds float* %tmp1483, i64 1
- %tmp1485 = getelementptr inbounds float* %tmp1484, i64 1
- %tmp1486 = getelementptr inbounds float* %tmp1485, i64 1
- %tmp1487 = getelementptr inbounds float* %tmp1486, i64 1
- %tmp1488 = getelementptr inbounds float* %tmp1487, i64 1
- %tmp1489 = getelementptr inbounds float* %tmp1488, i64 1
- %tmp1490 = getelementptr inbounds float* %tmp1489, i64 1
- %tmp1491 = getelementptr inbounds float* %tmp1490, i64 1
- %tmp1492 = getelementptr inbounds float* %tmp1491, i64 1
- %tmp1493 = getelementptr inbounds float* %tmp1492, i64 1
- %tmp1494 = getelementptr inbounds float* %tmp1493, i64 1
- %tmp1495 = getelementptr inbounds float* %tmp1494, i64 1
- %tmp1496 = getelementptr inbounds float* %tmp1495, i64 1
- %tmp1497 = getelementptr inbounds float* %tmp1496, i64 1
- %tmp1498 = getelementptr inbounds float* %tmp1497, i64 1
- %tmp1499 = getelementptr inbounds float* %tmp1498, i64 1
- %tmp1500 = getelementptr inbounds float* %tmp1499, i64 1
- %tmp1501 = getelementptr inbounds float* %tmp1500, i64 1
- %tmp1502 = getelementptr inbounds float* %tmp1501, i64 1
- %tmp1503 = getelementptr inbounds float* %tmp1502, i64 1
- %tmp1504 = getelementptr inbounds float* %tmp1503, i64 1
- %tmp1505 = getelementptr inbounds float* %tmp1504, i64 1
- %tmp1506 = getelementptr inbounds float* %tmp1505, i64 1
- %tmp1507 = getelementptr inbounds float* %tmp1506, i64 1
- %tmp1508 = getelementptr inbounds float* %tmp1507, i64 1
- %tmp1509 = getelementptr inbounds float* %tmp1508, i64 1
- %tmp1510 = getelementptr inbounds float* %tmp1509, i64 1
- %tmp1511 = getelementptr inbounds float* %tmp1510, i64 1
- %tmp1512 = getelementptr inbounds float* %tmp1511, i64 1
- %tmp1513 = getelementptr inbounds float* %tmp1512, i64 1
- %tmp1514 = getelementptr inbounds float* %tmp1513, i64 1
- %tmp1515 = getelementptr inbounds float* %tmp1514, i64 1
- %tmp1516 = getelementptr inbounds float* %tmp1515, i64 1
- %tmp1517 = getelementptr inbounds float* %tmp1516, i64 1
- %tmp1518 = getelementptr inbounds float* %tmp1517, i64 1
- %tmp1519 = getelementptr inbounds float* %tmp1518, i64 1
- %tmp1520 = getelementptr inbounds float* %tmp1519, i64 1
- %tmp1521 = getelementptr inbounds float* %tmp1520, i64 1
- %tmp1522 = getelementptr inbounds float* %tmp1521, i64 1
- %tmp1523 = getelementptr inbounds float* %tmp1522, i64 1
- %tmp1524 = getelementptr inbounds float* %tmp1523, i64 1
- %tmp1525 = getelementptr inbounds float* %tmp1524, i64 1
- %tmp1526 = getelementptr inbounds float* %tmp1525, i64 1
- %tmp1527 = getelementptr inbounds float* %tmp1526, i64 1
- %tmp1528 = getelementptr inbounds float* %tmp1527, i64 1
- %tmp1529 = getelementptr inbounds float* %tmp1528, i64 1
- %tmp1530 = getelementptr inbounds float* %tmp1529, i64 1
- %tmp1531 = getelementptr inbounds float* %tmp1530, i64 1
- %tmp1532 = getelementptr inbounds float* %tmp1531, i64 1
- %tmp1533 = getelementptr inbounds float* %tmp1532, i64 1
- %tmp1534 = getelementptr inbounds float* %tmp1533, i64 1
- %tmp1535 = getelementptr inbounds float* %tmp1534, i64 1
- %tmp1536 = getelementptr inbounds float* %tmp1535, i64 1
- %tmp1537 = getelementptr inbounds float* %tmp1536, i64 1
- %tmp1538 = getelementptr inbounds float* %tmp1537, i64 1
- %tmp1539 = getelementptr inbounds float* %tmp1538, i64 1
- %tmp1540 = getelementptr inbounds float* %tmp1539, i64 1
- %tmp1541 = getelementptr inbounds float* %tmp1540, i64 1
- %tmp1542 = getelementptr inbounds float* %tmp1541, i64 1
- %tmp1543 = getelementptr inbounds float* %tmp1542, i64 1
- %tmp1544 = getelementptr inbounds float* %tmp1543, i64 1
- %tmp1545 = getelementptr inbounds float* %tmp1544, i64 1
- %tmp1546 = getelementptr inbounds float* %tmp1545, i64 1
- %tmp1547 = getelementptr inbounds float* %tmp1546, i64 1
- %tmp1548 = getelementptr inbounds float* %tmp1547, i64 1
- %tmp1549 = getelementptr inbounds float* %tmp1548, i64 1
- %tmp1550 = getelementptr inbounds float* %tmp1549, i64 1
- %tmp1551 = getelementptr inbounds float* %tmp1550, i64 1
- %tmp1552 = getelementptr inbounds float* %tmp1551, i64 1
- %tmp1553 = getelementptr inbounds float* %tmp1552, i64 1
- %tmp1554 = getelementptr inbounds float* %tmp1553, i64 1
- %tmp1555 = getelementptr inbounds float* %tmp1554, i64 1
- %tmp1556 = getelementptr inbounds float* %tmp1555, i64 1
- %tmp1557 = getelementptr inbounds float* %tmp1556, i64 1
- %tmp1558 = getelementptr inbounds float* %tmp1557, i64 1
- %tmp1559 = getelementptr inbounds float* %tmp1558, i64 1
- %tmp1560 = getelementptr inbounds float* %tmp1559, i64 1
- %tmp1561 = getelementptr inbounds float* %tmp1560, i64 1
- %tmp1562 = getelementptr inbounds float* %tmp1561, i64 1
- %tmp1563 = getelementptr inbounds float* %tmp1562, i64 1
- %tmp1564 = getelementptr inbounds float* %tmp1563, i64 1
- %tmp1565 = getelementptr inbounds float* %tmp1564, i64 1
- %tmp1566 = getelementptr inbounds float* %tmp1565, i64 1
- %tmp1567 = getelementptr inbounds float* %tmp1566, i64 1
- %tmp1568 = getelementptr inbounds float* %tmp1567, i64 1
- %tmp1569 = getelementptr inbounds float* %tmp1568, i64 1
- %tmp1570 = getelementptr inbounds float* %tmp1569, i64 1
- %tmp1571 = getelementptr inbounds float* %tmp1570, i64 1
- %tmp1572 = getelementptr inbounds float* %tmp1571, i64 1
- %tmp1573 = getelementptr inbounds float* %tmp1572, i64 1
- %tmp1574 = getelementptr inbounds float* %tmp1573, i64 1
- %tmp1575 = getelementptr inbounds float* %tmp1574, i64 1
- %tmp1576 = getelementptr inbounds float* %tmp1575, i64 1
- %tmp1577 = getelementptr inbounds float* %tmp1576, i64 1
- %tmp1578 = getelementptr inbounds float* %tmp1577, i64 1
- %tmp1579 = getelementptr inbounds float* %tmp1578, i64 1
- %tmp1580 = getelementptr inbounds float* %tmp1579, i64 1
- %tmp1581 = getelementptr inbounds float* %tmp1580, i64 1
- %tmp1582 = getelementptr inbounds float* %tmp1581, i64 1
- %tmp1583 = getelementptr inbounds float* %tmp1582, i64 1
- %tmp1584 = getelementptr inbounds float* %tmp1583, i64 1
- %tmp1585 = getelementptr inbounds float* %tmp1584, i64 1
- %tmp1586 = getelementptr inbounds float* %tmp1585, i64 1
- %tmp1587 = getelementptr inbounds float* %tmp1586, i64 1
- %tmp1588 = getelementptr inbounds float* %tmp1587, i64 1
- %tmp1589 = getelementptr inbounds float* %tmp1588, i64 1
- %tmp1590 = getelementptr inbounds float* %tmp1589, i64 1
- %tmp1591 = getelementptr inbounds float* %tmp1590, i64 1
- %tmp1592 = getelementptr inbounds float* %tmp1591, i64 1
- %tmp1593 = getelementptr inbounds float* %tmp1592, i64 1
- %tmp1594 = getelementptr inbounds float* %tmp1593, i64 1
- %tmp1595 = getelementptr inbounds float* %tmp1594, i64 1
- %tmp1596 = getelementptr inbounds float* %tmp1595, i64 1
- %tmp1597 = getelementptr inbounds float* %tmp1596, i64 1
- %tmp1598 = getelementptr inbounds float* %tmp1597, i64 1
- %tmp1599 = getelementptr inbounds float* %tmp1598, i64 1
- %tmp1600 = getelementptr inbounds float* %tmp1599, i64 1
- %tmp1601 = getelementptr inbounds float* %tmp1600, i64 1
- %tmp1602 = getelementptr inbounds float* %tmp1601, i64 1
- %tmp1603 = getelementptr inbounds float* %tmp1602, i64 1
- %tmp1604 = getelementptr inbounds float* %tmp1603, i64 1
- %tmp1605 = getelementptr inbounds float* %tmp1604, i64 1
- %tmp1606 = getelementptr inbounds float* %tmp1605, i64 1
- %tmp1607 = getelementptr inbounds float* %tmp1606, i64 1
- %tmp1608 = getelementptr inbounds float* %tmp1607, i64 1
- %tmp1609 = getelementptr inbounds float* %tmp1608, i64 1
- %tmp1610 = getelementptr inbounds float* %tmp1609, i64 1
- %tmp1611 = getelementptr inbounds float* %tmp1610, i64 1
- %tmp1612 = getelementptr inbounds float* %tmp1611, i64 1
- %tmp1613 = getelementptr inbounds float* %tmp1612, i64 1
- %tmp1614 = getelementptr inbounds float* %tmp1613, i64 1
- %tmp1615 = getelementptr inbounds float* %tmp1614, i64 1
- %tmp1616 = getelementptr inbounds float* %tmp1615, i64 1
- %tmp1617 = getelementptr inbounds float* %tmp1616, i64 1
- %tmp1618 = getelementptr inbounds float* %tmp1617, i64 1
- %tmp1619 = getelementptr inbounds float* %tmp1618, i64 1
- %tmp1620 = getelementptr inbounds float* %tmp1619, i64 1
- %tmp1621 = getelementptr inbounds float* %tmp1620, i64 1
- %tmp1622 = getelementptr inbounds float* %tmp1621, i64 1
- %tmp1623 = getelementptr inbounds float* %tmp1622, i64 1
- %tmp1624 = getelementptr inbounds float* %tmp1623, i64 1
- %tmp1625 = getelementptr inbounds float* %tmp1624, i64 1
- %tmp1626 = getelementptr inbounds float* %tmp1625, i64 1
- %tmp1627 = getelementptr inbounds float* %tmp1626, i64 1
- %tmp1628 = getelementptr inbounds float* %tmp1627, i64 1
- %tmp1629 = getelementptr inbounds float* %tmp1628, i64 1
- %tmp1630 = getelementptr inbounds float* %tmp1629, i64 1
- %tmp1631 = getelementptr inbounds float* %tmp1630, i64 1
- %tmp1632 = getelementptr inbounds float* %tmp1631, i64 1
- %tmp1633 = getelementptr inbounds float* %tmp1632, i64 1
- %tmp1634 = getelementptr inbounds float* %tmp1633, i64 1
- %tmp1635 = getelementptr inbounds float* %tmp1634, i64 1
- %tmp1636 = getelementptr inbounds float* %tmp1635, i64 1
- %tmp1637 = getelementptr inbounds float* %tmp1636, i64 1
- %tmp1638 = getelementptr inbounds float* %tmp1637, i64 1
- %tmp1639 = getelementptr inbounds float* %tmp1638, i64 1
- %tmp1640 = getelementptr inbounds float* %tmp1639, i64 1
- %tmp1641 = getelementptr inbounds float* %tmp1640, i64 1
- %tmp1642 = getelementptr inbounds float* %tmp1641, i64 1
- %tmp1643 = getelementptr inbounds float* %tmp1642, i64 1
- %tmp1644 = getelementptr inbounds float* %tmp1643, i64 1
- %tmp1645 = getelementptr inbounds float* %tmp1644, i64 1
- %tmp1646 = getelementptr inbounds float* %tmp1645, i64 1
- %tmp1647 = getelementptr inbounds float* %tmp1646, i64 1
- %tmp1648 = getelementptr inbounds float* %tmp1647, i64 1
- %tmp1649 = getelementptr inbounds float* %tmp1648, i64 1
- %tmp1650 = getelementptr inbounds float* %tmp1649, i64 1
- %tmp1651 = getelementptr inbounds float* %tmp1650, i64 1
- %tmp1652 = getelementptr inbounds float* %tmp1651, i64 1
- %tmp1653 = getelementptr inbounds float* %tmp1652, i64 1
- %tmp1654 = getelementptr inbounds float* %tmp1653, i64 1
- %tmp1655 = getelementptr inbounds float* %tmp1654, i64 1
- %tmp1656 = getelementptr inbounds float* %tmp1655, i64 1
- %tmp1657 = getelementptr inbounds float* %tmp1656, i64 1
- %tmp1658 = getelementptr inbounds float* %tmp1657, i64 1
- %tmp1659 = getelementptr inbounds float* %tmp1658, i64 1
- %tmp1660 = getelementptr inbounds float* %tmp1659, i64 1
- %tmp1661 = getelementptr inbounds float* %tmp1660, i64 1
- %tmp1662 = getelementptr inbounds float* %tmp1661, i64 1
- %tmp1663 = getelementptr inbounds float* %tmp1662, i64 1
- %tmp1664 = getelementptr inbounds float* %tmp1663, i64 1
- %tmp1665 = getelementptr inbounds float* %tmp1664, i64 1
- %tmp1666 = getelementptr inbounds float* %tmp1665, i64 1
- %tmp1667 = getelementptr inbounds float* %tmp1666, i64 1
- %tmp1668 = getelementptr inbounds float* %tmp1667, i64 1
- %tmp1669 = getelementptr inbounds float* %tmp1668, i64 1
- %tmp1670 = getelementptr inbounds float* %tmp1669, i64 1
- %tmp1671 = getelementptr inbounds float* %tmp1670, i64 1
- %tmp1672 = getelementptr inbounds float* %tmp1671, i64 1
- %tmp1673 = getelementptr inbounds float* %tmp1672, i64 1
- %tmp1674 = getelementptr inbounds float* %tmp1673, i64 1
- %tmp1675 = getelementptr inbounds float* %tmp1674, i64 1
- %tmp1676 = getelementptr inbounds float* %tmp1675, i64 1
- %tmp1677 = getelementptr inbounds float* %tmp1676, i64 1
- %tmp1678 = getelementptr inbounds float* %tmp1677, i64 1
- %tmp1679 = getelementptr inbounds float* %tmp1678, i64 1
- %tmp1680 = getelementptr inbounds float* %tmp1679, i64 1
- %tmp1681 = getelementptr inbounds float* %tmp1680, i64 1
- %tmp1682 = getelementptr inbounds float* %tmp1681, i64 1
- %tmp1683 = getelementptr inbounds float* %tmp1682, i64 1
- %tmp1684 = getelementptr inbounds float* %tmp1683, i64 1
- %tmp1685 = getelementptr inbounds float* %tmp1684, i64 1
- %tmp1686 = getelementptr inbounds float* %tmp1685, i64 1
- %tmp1687 = getelementptr inbounds float* %tmp1686, i64 1
- %tmp1688 = getelementptr inbounds float* %tmp1687, i64 1
- %tmp1689 = getelementptr inbounds float* %tmp1688, i64 1
- %tmp1690 = getelementptr inbounds float* %tmp1689, i64 1
- %tmp1691 = getelementptr inbounds float* %tmp1690, i64 1
- %tmp1692 = getelementptr inbounds float* %tmp1691, i64 1
- %tmp1693 = getelementptr inbounds float* %tmp1692, i64 1
- %tmp1694 = getelementptr inbounds float* %tmp1693, i64 1
- %tmp1695 = getelementptr inbounds float* %tmp1694, i64 1
- %tmp1696 = getelementptr inbounds float* %tmp1695, i64 1
- %tmp1697 = getelementptr inbounds float* %tmp1696, i64 1
- %tmp1698 = getelementptr inbounds float* %tmp1697, i64 1
- %tmp1699 = getelementptr inbounds float* %tmp1698, i64 1
- %tmp1700 = getelementptr inbounds float* %tmp1699, i64 1
- %tmp1701 = getelementptr inbounds float* %tmp1700, i64 1
- %tmp1702 = getelementptr inbounds float* %tmp1701, i64 1
- %tmp1703 = getelementptr inbounds float* %tmp1702, i64 1
- %tmp1704 = getelementptr inbounds float* %tmp1703, i64 1
- %tmp1705 = getelementptr inbounds float* %tmp1704, i64 1
- %tmp1706 = getelementptr inbounds float* %tmp1705, i64 1
- %tmp1707 = getelementptr inbounds float* %tmp1706, i64 1
- %tmp1708 = getelementptr inbounds float* %tmp1707, i64 1
- %tmp1709 = getelementptr inbounds float* %tmp1708, i64 1
- %tmp1710 = getelementptr inbounds float* %tmp1709, i64 1
- %tmp1711 = getelementptr inbounds float* %tmp1710, i64 1
- %tmp1712 = getelementptr inbounds float* %tmp1711, i64 1
- %tmp1713 = getelementptr inbounds float* %tmp1712, i64 1
- %tmp1714 = getelementptr inbounds float* %tmp1713, i64 1
- %tmp1715 = getelementptr inbounds float* %tmp1714, i64 1
- %tmp1716 = getelementptr inbounds float* %tmp1715, i64 1
- %tmp1717 = getelementptr inbounds float* %tmp1716, i64 1
- %tmp1718 = getelementptr inbounds float* %tmp1717, i64 1
- %tmp1719 = getelementptr inbounds float* %tmp1718, i64 1
- %tmp1720 = getelementptr inbounds float* %tmp1719, i64 1
- %tmp1721 = getelementptr inbounds float* %tmp1720, i64 1
- %tmp1722 = getelementptr inbounds float* %tmp1721, i64 1
- %tmp1723 = getelementptr inbounds float* %tmp1722, i64 1
- %tmp1724 = getelementptr inbounds float* %tmp1723, i64 1
- %tmp1725 = getelementptr inbounds float* %tmp1724, i64 1
- %tmp1726 = getelementptr inbounds float* %tmp1725, i64 1
- %tmp1727 = getelementptr inbounds float* %tmp1726, i64 1
- %tmp1728 = getelementptr inbounds float* %tmp1727, i64 1
- %tmp1729 = getelementptr inbounds float* %tmp1728, i64 1
- %tmp1730 = getelementptr inbounds float* %tmp1729, i64 1
- %tmp1731 = getelementptr inbounds float* %tmp1730, i64 1
- %tmp1732 = getelementptr inbounds float* %tmp1731, i64 1
- %tmp1733 = getelementptr inbounds float* %tmp1732, i64 1
- %tmp1734 = getelementptr inbounds float* %tmp1733, i64 1
- %tmp1735 = getelementptr inbounds float* %tmp1734, i64 1
- %tmp1736 = getelementptr inbounds float* %tmp1735, i64 1
- %tmp1737 = getelementptr inbounds float* %tmp1736, i64 1
- %tmp1738 = getelementptr inbounds float* %tmp1737, i64 1
- %tmp1739 = getelementptr inbounds float* %tmp1738, i64 1
- %tmp1740 = getelementptr inbounds float* %tmp1739, i64 1
- %tmp1741 = getelementptr inbounds float* %tmp1740, i64 1
- %tmp1742 = getelementptr inbounds float* %tmp1741, i64 1
- %tmp1743 = getelementptr inbounds float* %tmp1742, i64 1
- %tmp1744 = getelementptr inbounds float* %tmp1743, i64 1
- %tmp1745 = getelementptr inbounds float* %tmp1744, i64 1
- %tmp1746 = getelementptr inbounds float* %tmp1745, i64 1
- %tmp1747 = getelementptr inbounds float* %tmp1746, i64 1
- %tmp1748 = getelementptr inbounds float* %tmp1747, i64 1
- %tmp1749 = getelementptr inbounds float* %tmp1748, i64 1
- %tmp1750 = getelementptr inbounds float* %tmp1749, i64 1
- %tmp1751 = getelementptr inbounds float* %tmp1750, i64 1
- %tmp1752 = getelementptr inbounds float* %tmp1751, i64 1
- %tmp1753 = getelementptr inbounds float* %tmp1752, i64 1
- %tmp1754 = getelementptr inbounds float* %tmp1753, i64 1
- %tmp1755 = getelementptr inbounds float* %tmp1754, i64 1
- %tmp1756 = getelementptr inbounds float* %tmp1755, i64 1
- %tmp1757 = getelementptr inbounds float* %tmp1756, i64 1
- %tmp1758 = getelementptr inbounds float* %tmp1757, i64 1
- %tmp1759 = getelementptr inbounds float* %tmp1758, i64 1
- %tmp1760 = getelementptr inbounds float* %tmp1759, i64 1
- %tmp1761 = getelementptr inbounds float* %tmp1760, i64 1
- %tmp1762 = getelementptr inbounds float* %tmp1761, i64 1
- %tmp1763 = getelementptr inbounds float* %tmp1762, i64 1
- %tmp1764 = getelementptr inbounds float* %tmp1763, i64 1
- %tmp1765 = getelementptr inbounds float* %tmp1764, i64 1
- %tmp1766 = getelementptr inbounds float* %tmp1765, i64 1
- %tmp1767 = getelementptr inbounds float* %tmp1766, i64 1
- %tmp1768 = getelementptr inbounds float* %tmp1767, i64 1
- %tmp1769 = getelementptr inbounds float* %tmp1768, i64 1
- %tmp1770 = getelementptr inbounds float* %tmp1769, i64 1
- %tmp1771 = getelementptr inbounds float* %tmp1770, i64 1
- %tmp1772 = getelementptr inbounds float* %tmp1771, i64 1
- %tmp1773 = getelementptr inbounds float* %tmp1772, i64 1
- %tmp1774 = getelementptr inbounds float* %tmp1773, i64 1
- %tmp1775 = getelementptr inbounds float* %tmp1774, i64 1
- %tmp1776 = getelementptr inbounds float* %tmp1775, i64 1
- %tmp1777 = getelementptr inbounds float* %tmp1776, i64 1
- %tmp1778 = getelementptr inbounds float* %tmp1777, i64 1
- %tmp1779 = getelementptr inbounds float* %tmp1778, i64 1
- %tmp1780 = getelementptr inbounds float* %tmp1779, i64 1
- %tmp1781 = getelementptr inbounds float* %tmp1780, i64 1
- %tmp1782 = getelementptr inbounds float* %tmp1781, i64 1
- %tmp1783 = getelementptr inbounds float* %tmp1782, i64 1
- %tmp1784 = getelementptr inbounds float* %tmp1783, i64 1
- %tmp1785 = getelementptr inbounds float* %tmp1784, i64 1
- %tmp1786 = getelementptr inbounds float* %tmp1785, i64 1
- %tmp1787 = getelementptr inbounds float* %tmp1786, i64 1
- %tmp1788 = getelementptr inbounds float* %tmp1787, i64 1
- %tmp1789 = getelementptr inbounds float* %tmp1788, i64 1
- %tmp1790 = getelementptr inbounds float* %tmp1789, i64 1
- %tmp1791 = getelementptr inbounds float* %tmp1790, i64 1
- %tmp1792 = getelementptr inbounds float* %tmp1791, i64 1
- %tmp1793 = getelementptr inbounds float* %tmp1792, i64 1
- %tmp1794 = getelementptr inbounds float* %tmp1793, i64 1
- %tmp1795 = getelementptr inbounds float* %tmp1794, i64 1
- %tmp1796 = getelementptr inbounds float* %tmp1795, i64 1
- %tmp1797 = getelementptr inbounds float* %tmp1796, i64 1
- %tmp1798 = getelementptr inbounds float* %tmp1797, i64 1
- %tmp1799 = getelementptr inbounds float* %tmp1798, i64 1
- %tmp1800 = getelementptr inbounds float* %tmp1799, i64 1
- %tmp1801 = getelementptr inbounds float* %tmp1800, i64 1
- %tmp1802 = getelementptr inbounds float* %tmp1801, i64 1
- %tmp1803 = getelementptr inbounds float* %tmp1802, i64 1
- %tmp1804 = getelementptr inbounds float* %tmp1803, i64 1
- %tmp1805 = getelementptr inbounds float* %tmp1804, i64 1
- %tmp1806 = getelementptr inbounds float* %tmp1805, i64 1
- %tmp1807 = getelementptr inbounds float* %tmp1806, i64 1
- %tmp1808 = getelementptr inbounds float* %tmp1807, i64 1
- %tmp1809 = getelementptr inbounds float* %tmp1808, i64 1
- %tmp1810 = getelementptr inbounds float* %tmp1809, i64 1
- %tmp1811 = getelementptr inbounds float* %tmp1810, i64 1
- %tmp1812 = getelementptr inbounds float* %tmp1811, i64 1
- %tmp1813 = getelementptr inbounds float* %tmp1812, i64 1
- %tmp1814 = getelementptr inbounds float* %tmp1813, i64 1
- %tmp1815 = getelementptr inbounds float* %tmp1814, i64 1
- %tmp1816 = getelementptr inbounds float* %tmp1815, i64 1
- %tmp1817 = getelementptr inbounds float* %tmp1816, i64 1
- %tmp1818 = getelementptr inbounds float* %tmp1817, i64 1
- %tmp1819 = getelementptr inbounds float* %tmp1818, i64 1
- %tmp1820 = getelementptr inbounds float* %tmp1819, i64 1
- %tmp1821 = getelementptr inbounds float* %tmp1820, i64 1
- %tmp1822 = getelementptr inbounds float* %tmp1821, i64 1
- %tmp1823 = getelementptr inbounds float* %tmp1822, i64 1
- %tmp1824 = getelementptr inbounds float* %tmp1823, i64 1
- %tmp1825 = getelementptr inbounds float* %tmp1824, i64 1
- %tmp1826 = getelementptr inbounds float* %tmp1825, i64 1
- %tmp1827 = getelementptr inbounds float* %tmp1826, i64 1
- %tmp1828 = getelementptr inbounds float* %tmp1827, i64 1
- %tmp1829 = getelementptr inbounds float* %tmp1828, i64 1
- %tmp1830 = getelementptr inbounds float* %tmp1829, i64 1
- %tmp1831 = getelementptr inbounds float* %tmp1830, i64 1
- %tmp1832 = getelementptr inbounds float* %tmp1831, i64 1
- %tmp1833 = getelementptr inbounds float* %tmp1832, i64 1
- %tmp1834 = getelementptr inbounds float* %tmp1833, i64 1
- %tmp1835 = getelementptr inbounds float* %tmp1834, i64 1
- %tmp1836 = getelementptr inbounds float* %tmp1835, i64 1
- %tmp1837 = getelementptr inbounds float* %tmp1836, i64 1
- %tmp1838 = getelementptr inbounds float* %tmp1837, i64 1
- %tmp1839 = getelementptr inbounds float* %tmp1838, i64 1
- %tmp1840 = getelementptr inbounds float* %tmp1839, i64 1
- %tmp1841 = getelementptr inbounds float* %tmp1840, i64 1
- %tmp1842 = getelementptr inbounds float* %tmp1841, i64 1
- %tmp1843 = getelementptr inbounds float* %tmp1842, i64 1
- %tmp1844 = getelementptr inbounds float* %tmp1843, i64 1
- %tmp1845 = getelementptr inbounds float* %tmp1844, i64 1
- %tmp1846 = getelementptr inbounds float* %tmp1845, i64 1
- %tmp1847 = getelementptr inbounds float* %tmp1846, i64 1
- %tmp1848 = getelementptr inbounds float* %tmp1847, i64 1
- %tmp1849 = getelementptr inbounds float* %tmp1848, i64 1
- %tmp1850 = getelementptr inbounds float* %tmp1849, i64 1
- %tmp1851 = getelementptr inbounds float* %tmp1850, i64 1
- %tmp1852 = getelementptr inbounds float* %tmp1851, i64 1
- %tmp1853 = getelementptr inbounds float* %tmp1852, i64 1
- %tmp1854 = getelementptr inbounds float* %tmp1853, i64 1
- %tmp1855 = getelementptr inbounds float* %tmp1854, i64 1
- %tmp1856 = getelementptr inbounds float* %tmp1855, i64 1
- %tmp1857 = getelementptr inbounds float* %tmp1856, i64 1
- %tmp1858 = getelementptr inbounds float* %tmp1857, i64 1
- %tmp1859 = getelementptr inbounds float* %tmp1858, i64 1
- %tmp1860 = getelementptr inbounds float* %tmp1859, i64 1
- %tmp1861 = getelementptr inbounds float* %tmp1860, i64 1
- %tmp1862 = getelementptr inbounds float* %tmp1861, i64 1
- %tmp1863 = getelementptr inbounds float* %tmp1862, i64 1
- %tmp1864 = getelementptr inbounds float* %tmp1863, i64 1
- %tmp1865 = getelementptr inbounds float* %tmp1864, i64 1
- %tmp1866 = getelementptr inbounds float* %tmp1865, i64 1
- %tmp1867 = getelementptr inbounds float* %tmp1866, i64 1
- %tmp1868 = getelementptr inbounds float* %tmp1867, i64 1
- %tmp1869 = getelementptr inbounds float* %tmp1868, i64 1
- %tmp1870 = getelementptr inbounds float* %tmp1869, i64 1
- %tmp1871 = getelementptr inbounds float* %tmp1870, i64 1
- %tmp1872 = getelementptr inbounds float* %tmp1871, i64 1
- %tmp1873 = getelementptr inbounds float* %tmp1872, i64 1
- %tmp1874 = getelementptr inbounds float* %tmp1873, i64 1
- %tmp1875 = getelementptr inbounds float* %tmp1874, i64 1
- %tmp1876 = getelementptr inbounds float* %tmp1875, i64 1
- %tmp1877 = getelementptr inbounds float* %tmp1876, i64 1
- %tmp1878 = getelementptr inbounds float* %tmp1877, i64 1
- %tmp1879 = getelementptr inbounds float* %tmp1878, i64 1
- %tmp1880 = getelementptr inbounds float* %tmp1879, i64 1
- %tmp1881 = getelementptr inbounds float* %tmp1880, i64 1
- %tmp1882 = getelementptr inbounds float* %tmp1881, i64 1
- %tmp1883 = getelementptr inbounds float* %tmp1882, i64 1
- %tmp1884 = getelementptr inbounds float* %tmp1883, i64 1
- %tmp1885 = getelementptr inbounds float* %tmp1884, i64 1
- %tmp1886 = getelementptr inbounds float* %tmp1885, i64 1
- %tmp1887 = getelementptr inbounds float* %tmp1886, i64 1
- %tmp1888 = getelementptr inbounds float* %tmp1887, i64 1
- %tmp1889 = getelementptr inbounds float* %tmp1888, i64 1
- %tmp1890 = getelementptr inbounds float* %tmp1889, i64 1
- %tmp1891 = getelementptr inbounds float* %tmp1890, i64 1
- %tmp1892 = getelementptr inbounds float* %tmp1891, i64 1
- %tmp1893 = getelementptr inbounds float* %tmp1892, i64 1
- %tmp1894 = getelementptr inbounds float* %tmp1893, i64 1
- %tmp1895 = getelementptr inbounds float* %tmp1894, i64 1
- %tmp1896 = getelementptr inbounds float* %tmp1895, i64 1
- %tmp1897 = getelementptr inbounds float* %tmp1896, i64 1
- %tmp1898 = getelementptr inbounds float* %tmp1897, i64 1
- %tmp1899 = getelementptr inbounds float* %tmp1898, i64 1
- %tmp1900 = getelementptr inbounds float* %tmp1899, i64 1
- %tmp1901 = getelementptr inbounds float* %tmp1900, i64 1
- %tmp1902 = getelementptr inbounds float* %tmp1901, i64 1
- %tmp1903 = getelementptr inbounds float* %tmp1902, i64 1
- %tmp1904 = getelementptr inbounds float* %tmp1903, i64 1
- %tmp1905 = getelementptr inbounds float* %tmp1904, i64 1
- %tmp1906 = getelementptr inbounds float* %tmp1905, i64 1
- %tmp1907 = getelementptr inbounds float* %tmp1906, i64 1
- %tmp1908 = getelementptr inbounds float* %tmp1907, i64 1
- %tmp1909 = getelementptr inbounds float* %tmp1908, i64 1
- %tmp1910 = getelementptr inbounds float* %tmp1909, i64 1
- %tmp1911 = getelementptr inbounds float* %tmp1910, i64 1
- %tmp1912 = getelementptr inbounds float* %tmp1911, i64 1
- %tmp1913 = getelementptr inbounds float* %tmp1912, i64 1
- %tmp1914 = getelementptr inbounds float* %tmp1913, i64 1
- %tmp1915 = getelementptr inbounds float* %tmp1914, i64 1
- %tmp1916 = getelementptr inbounds float* %tmp1915, i64 1
- %tmp1917 = getelementptr inbounds float* %tmp1916, i64 1
- %tmp1918 = getelementptr inbounds float* %tmp1917, i64 1
- %tmp1919 = getelementptr inbounds float* %tmp1918, i64 1
- %tmp1920 = getelementptr inbounds float* %tmp1919, i64 1
- %tmp1921 = getelementptr inbounds float* %tmp1920, i64 1
- %tmp1922 = getelementptr inbounds float* %tmp1921, i64 1
- %tmp1923 = getelementptr inbounds float* %tmp1922, i64 1
- %tmp1924 = getelementptr inbounds float* %tmp1923, i64 1
- %tmp1925 = getelementptr inbounds float* %tmp1924, i64 1
- %tmp1926 = getelementptr inbounds float* %tmp1925, i64 1
- %tmp1927 = getelementptr inbounds float* %tmp1926, i64 1
- %tmp1928 = getelementptr inbounds float* %tmp1927, i64 1
- %tmp1929 = getelementptr inbounds float* %tmp1928, i64 1
- %tmp1930 = getelementptr inbounds float* %tmp1929, i64 1
- %tmp1931 = getelementptr inbounds float* %tmp1930, i64 1
- %tmp1932 = getelementptr inbounds float* %tmp1931, i64 1
- %tmp1933 = getelementptr inbounds float* %tmp1932, i64 1
- %tmp1934 = getelementptr inbounds float* %tmp1933, i64 1
- %tmp1935 = getelementptr inbounds float* %tmp1934, i64 1
- %tmp1936 = getelementptr inbounds float* %tmp1935, i64 1
- %tmp1937 = getelementptr inbounds float* %tmp1936, i64 1
- %tmp1938 = getelementptr inbounds float* %tmp1937, i64 1
- %tmp1939 = getelementptr inbounds float* %tmp1938, i64 1
- %tmp1940 = getelementptr inbounds float* %tmp1939, i64 1
- %tmp1941 = getelementptr inbounds float* %tmp1940, i64 1
- %tmp1942 = getelementptr inbounds float* %tmp1941, i64 1
- %tmp1943 = getelementptr inbounds float* %tmp1942, i64 1
- %tmp1944 = getelementptr inbounds float* %tmp1943, i64 1
- %tmp1945 = getelementptr inbounds float* %tmp1944, i64 1
- %tmp1946 = getelementptr inbounds float* %tmp1945, i64 1
- %tmp1947 = getelementptr inbounds float* %tmp1946, i64 1
- %tmp1948 = getelementptr inbounds float* %tmp1947, i64 1
- %tmp1949 = getelementptr inbounds float* %tmp1948, i64 1
- %tmp1950 = getelementptr inbounds float* %tmp1949, i64 1
- %tmp1951 = getelementptr inbounds float* %tmp1950, i64 1
- %tmp1952 = getelementptr inbounds float* %tmp1951, i64 1
- %tmp1953 = getelementptr inbounds float* %tmp1952, i64 1
- %tmp1954 = getelementptr inbounds float* %tmp1953, i64 1
- %tmp1955 = getelementptr inbounds float* %tmp1954, i64 1
- %tmp1956 = getelementptr inbounds float* %tmp1955, i64 1
- %tmp1957 = getelementptr inbounds float* %tmp1956, i64 1
- %tmp1958 = getelementptr inbounds float* %tmp1957, i64 1
- %tmp1959 = getelementptr inbounds float* %tmp1958, i64 1
- %tmp1960 = getelementptr inbounds float* %tmp1959, i64 1
- %tmp1961 = getelementptr inbounds float* %tmp1960, i64 1
- %tmp1962 = getelementptr inbounds float* %tmp1961, i64 1
- %tmp1963 = getelementptr inbounds float* %tmp1962, i64 1
- %tmp1964 = getelementptr inbounds float* %tmp1963, i64 1
- %tmp1965 = getelementptr inbounds float* %tmp1964, i64 1
- %tmp1966 = getelementptr inbounds float* %tmp1965, i64 1
- %tmp1967 = getelementptr inbounds float* %tmp1966, i64 1
- %tmp1968 = getelementptr inbounds float* %tmp1967, i64 1
- %tmp1969 = getelementptr inbounds float* %tmp1968, i64 1
- %tmp1970 = getelementptr inbounds float* %tmp1969, i64 1
- %tmp1971 = getelementptr inbounds float* %tmp1970, i64 1
- %tmp1972 = getelementptr inbounds float* %tmp1971, i64 1
- %tmp1973 = getelementptr inbounds float* %tmp1972, i64 1
- %tmp1974 = getelementptr inbounds float* %tmp1973, i64 1
- %tmp1975 = getelementptr inbounds float* %tmp1974, i64 1
- %tmp1976 = getelementptr inbounds float* %tmp1975, i64 1
- %tmp1977 = getelementptr inbounds float* %tmp1976, i64 1
- %tmp1978 = getelementptr inbounds float* %tmp1977, i64 1
- %tmp1979 = getelementptr inbounds float* %tmp1978, i64 1
- %tmp1980 = getelementptr inbounds float* %tmp1979, i64 1
- %tmp1981 = getelementptr inbounds float* %tmp1980, i64 1
- %tmp1982 = getelementptr inbounds float* %tmp1981, i64 1
- %tmp1983 = getelementptr inbounds float* %tmp1982, i64 1
- %tmp1984 = getelementptr inbounds float* %tmp1983, i64 1
- %tmp1985 = getelementptr inbounds float* %tmp1984, i64 1
- %tmp1986 = getelementptr inbounds float* %tmp1985, i64 1
- %tmp1987 = getelementptr inbounds float* %tmp1986, i64 1
- %tmp1988 = getelementptr inbounds float* %tmp1987, i64 1
- %tmp1989 = getelementptr inbounds float* %tmp1988, i64 1
- %tmp1990 = getelementptr inbounds float* %tmp1989, i64 1
- %tmp1991 = getelementptr inbounds float* %tmp1990, i64 1
- %tmp1992 = getelementptr inbounds float* %tmp1991, i64 1
- %tmp1993 = getelementptr inbounds float* %tmp1992, i64 1
- %tmp1994 = getelementptr inbounds float* %tmp1993, i64 1
- %tmp1995 = getelementptr inbounds float* %tmp1994, i64 1
- %tmp1996 = getelementptr inbounds float* %tmp1995, i64 1
- %tmp1997 = getelementptr inbounds float* %tmp1996, i64 1
- %tmp1998 = getelementptr inbounds float* %tmp1997, i64 1
- %tmp1999 = getelementptr inbounds float* %tmp1998, i64 1
- %tmp2000 = getelementptr inbounds float* %tmp1999, i64 1
- %tmp2001 = getelementptr inbounds float* %tmp2000, i64 1
- %tmp2002 = getelementptr inbounds float* %tmp2001, i64 1
- %tmp2003 = getelementptr inbounds float* %tmp2002, i64 1
- %tmp2004 = getelementptr inbounds float* %tmp2003, i64 1
- %tmp2005 = getelementptr inbounds float* %tmp2004, i64 1
- %tmp2006 = getelementptr inbounds float* %tmp2005, i64 1
- %tmp2007 = getelementptr inbounds float* %tmp2006, i64 1
- %tmp2008 = getelementptr inbounds float* %tmp2007, i64 1
- %tmp2009 = getelementptr inbounds float* %tmp2008, i64 1
- %tmp2010 = getelementptr inbounds float* %tmp2009, i64 1
- %tmp2011 = getelementptr inbounds float* %tmp2010, i64 1
- %tmp2012 = getelementptr inbounds float* %tmp2011, i64 1
- %tmp2013 = getelementptr inbounds float* %tmp2012, i64 1
- %tmp2014 = getelementptr inbounds float* %tmp2013, i64 1
- %tmp2015 = getelementptr inbounds float* %tmp2014, i64 1
- %tmp2016 = getelementptr inbounds float* %tmp2015, i64 1
- %tmp2017 = getelementptr inbounds float* %tmp2016, i64 1
- %tmp2018 = getelementptr inbounds float* %tmp2017, i64 1
- %tmp2019 = getelementptr inbounds float* %tmp2018, i64 1
- %tmp2020 = getelementptr inbounds float* %tmp2019, i64 1
- %tmp2021 = getelementptr inbounds float* %tmp2020, i64 1
- %tmp2022 = getelementptr inbounds float* %tmp2021, i64 1
- %tmp2023 = getelementptr inbounds float* %tmp2022, i64 1
- %tmp2024 = getelementptr inbounds float* %tmp2023, i64 1
- %tmp2025 = getelementptr inbounds float* %tmp2024, i64 1
- %tmp2026 = getelementptr inbounds float* %tmp2025, i64 1
- %tmp2027 = getelementptr inbounds float* %tmp2026, i64 1
- %tmp2028 = getelementptr inbounds float* %tmp2027, i64 1
- %tmp2029 = getelementptr inbounds float* %tmp2028, i64 1
- %tmp2030 = getelementptr inbounds float* %tmp2029, i64 1
- %tmp2031 = getelementptr inbounds float* %tmp2030, i64 1
- %tmp2032 = getelementptr inbounds float* %tmp2031, i64 1
- %tmp2033 = getelementptr inbounds float* %tmp2032, i64 1
- %tmp2034 = getelementptr inbounds float* %tmp2033, i64 1
- %tmp2035 = getelementptr inbounds float* %tmp2034, i64 1
- %tmp2036 = getelementptr inbounds float* %tmp2035, i64 1
- %tmp2037 = getelementptr inbounds float* %tmp2036, i64 1
- %tmp2038 = getelementptr inbounds float* %tmp2037, i64 1
- %tmp2039 = getelementptr inbounds float* %tmp2038, i64 1
- %tmp2040 = getelementptr inbounds float* %tmp2039, i64 1
- %tmp2041 = getelementptr inbounds float* %tmp2040, i64 1
- %tmp2042 = getelementptr inbounds float* %tmp2041, i64 1
- %tmp2043 = getelementptr inbounds float* %tmp2042, i64 1
- %tmp2044 = getelementptr inbounds float* %tmp2043, i64 1
- %tmp2045 = getelementptr inbounds float* %tmp2044, i64 1
- %tmp2046 = getelementptr inbounds float* %tmp2045, i64 1
- %tmp2047 = getelementptr inbounds float* %tmp2046, i64 1
- %tmp2048 = getelementptr inbounds float* %tmp2047, i64 1
- %tmp2049 = getelementptr inbounds float* %tmp2048, i64 1
- %tmp2050 = getelementptr inbounds float* %tmp2049, i64 1
- %tmp2051 = getelementptr inbounds float* %tmp2050, i64 1
- %tmp2052 = getelementptr inbounds float* %tmp2051, i64 1
- %tmp2053 = getelementptr inbounds float* %tmp2052, i64 1
- %tmp2054 = getelementptr inbounds float* %tmp2053, i64 1
- %tmp2055 = getelementptr inbounds float* %tmp2054, i64 1
- %tmp2056 = getelementptr inbounds float* %tmp2055, i64 1
- %tmp2057 = getelementptr inbounds float* %tmp2056, i64 1
- %tmp2058 = getelementptr inbounds float* %tmp2057, i64 1
- %tmp2059 = getelementptr inbounds float* %tmp2058, i64 1
- %tmp2060 = getelementptr inbounds float* %tmp2059, i64 1
- %tmp2061 = getelementptr inbounds float* %tmp2060, i64 1
- %tmp2062 = getelementptr inbounds float* %tmp2061, i64 1
- %tmp2063 = getelementptr inbounds float* %tmp2062, i64 1
- %tmp2064 = getelementptr inbounds float* %tmp2063, i64 1
- %tmp2065 = getelementptr inbounds float* %tmp2064, i64 1
- %tmp2066 = getelementptr inbounds float* %tmp2065, i64 1
- %tmp2067 = getelementptr inbounds float* %tmp2066, i64 1
- %tmp2068 = getelementptr inbounds float* %tmp2067, i64 1
- %tmp2069 = getelementptr inbounds float* %tmp2068, i64 1
- %tmp2070 = getelementptr inbounds float* %tmp2069, i64 1
- %tmp2071 = getelementptr inbounds float* %tmp2070, i64 1
- %tmp2072 = getelementptr inbounds float* %tmp2071, i64 1
- %tmp2073 = getelementptr inbounds float* %tmp2072, i64 1
- %tmp2074 = getelementptr inbounds float* %tmp2073, i64 1
- %tmp2075 = getelementptr inbounds float* %tmp2074, i64 1
- %tmp2076 = getelementptr inbounds float* %tmp2075, i64 1
- %tmp2077 = getelementptr inbounds float* %tmp2076, i64 1
- %tmp2078 = getelementptr inbounds float* %tmp2077, i64 1
- %tmp2079 = getelementptr inbounds float* %tmp2078, i64 1
- %tmp2080 = getelementptr inbounds float* %tmp2079, i64 1
- %tmp2081 = getelementptr inbounds float* %tmp2080, i64 1
- %tmp2082 = getelementptr inbounds float* %tmp2081, i64 1
- %tmp2083 = getelementptr inbounds float* %tmp2082, i64 1
- %tmp2084 = getelementptr inbounds float* %tmp2083, i64 1
- %tmp2085 = getelementptr inbounds float* %tmp2084, i64 1
- %tmp2086 = getelementptr inbounds float* %tmp2085, i64 1
- %tmp2087 = getelementptr inbounds float* %tmp2086, i64 1
- %tmp2088 = getelementptr inbounds float* %tmp2087, i64 1
- %tmp2089 = getelementptr inbounds float* %tmp2088, i64 1
- %tmp2090 = getelementptr inbounds float* %tmp2089, i64 1
- %tmp2091 = getelementptr inbounds float* %tmp2090, i64 1
- %tmp2092 = getelementptr inbounds float* %tmp2091, i64 1
- %tmp2093 = getelementptr inbounds float* %tmp2092, i64 1
- %tmp2094 = getelementptr inbounds float* %tmp2093, i64 1
- %tmp2095 = getelementptr inbounds float* %tmp2094, i64 1
- %tmp2096 = getelementptr inbounds float* %tmp2095, i64 1
- %tmp2097 = getelementptr inbounds float* %tmp2096, i64 1
- %tmp2098 = getelementptr inbounds float* %tmp2097, i64 1
- %tmp2099 = getelementptr inbounds float* %tmp2098, i64 1
- %tmp2100 = getelementptr inbounds float* %tmp2099, i64 1
- %tmp2101 = getelementptr inbounds float* %tmp2100, i64 1
- %tmp2102 = getelementptr inbounds float* %tmp2101, i64 1
- %tmp2103 = getelementptr inbounds float* %tmp2102, i64 1
- %tmp2104 = getelementptr inbounds float* %tmp2103, i64 1
- %tmp2105 = getelementptr inbounds float* %tmp2104, i64 1
- %tmp2106 = getelementptr inbounds float* %tmp2105, i64 1
- %tmp2107 = getelementptr inbounds float* %tmp2106, i64 1
- %tmp2108 = getelementptr inbounds float* %tmp2107, i64 1
- %tmp2109 = getelementptr inbounds float* %tmp2108, i64 1
- %tmp2110 = getelementptr inbounds float* %tmp2109, i64 1
- %tmp2111 = getelementptr inbounds float* %tmp2110, i64 1
- %tmp2112 = getelementptr inbounds float* %tmp2111, i64 1
- %tmp2113 = getelementptr inbounds float* %tmp2112, i64 1
- %tmp2114 = getelementptr inbounds float* %tmp2113, i64 1
- %tmp2115 = getelementptr inbounds float* %tmp2114, i64 1
- %tmp2116 = getelementptr inbounds float* %tmp2115, i64 1
- %tmp2117 = getelementptr inbounds float* %tmp2116, i64 1
- %tmp2118 = getelementptr inbounds float* %tmp2117, i64 1
- %tmp2119 = getelementptr inbounds float* %tmp2118, i64 1
- %tmp2120 = getelementptr inbounds float* %tmp2119, i64 1
- %tmp2121 = getelementptr inbounds float* %tmp2120, i64 1
- %tmp2122 = getelementptr inbounds float* %tmp2121, i64 1
- %tmp2123 = getelementptr inbounds float* %tmp2122, i64 1
- %tmp2124 = getelementptr inbounds float* %tmp2123, i64 1
- %tmp2125 = getelementptr inbounds float* %tmp2124, i64 1
- %tmp2126 = getelementptr inbounds float* %tmp2125, i64 1
- %tmp2127 = getelementptr inbounds float* %tmp2126, i64 1
- %tmp2128 = getelementptr inbounds float* %tmp2127, i64 1
- %tmp2129 = getelementptr inbounds float* %tmp2128, i64 1
- %tmp2130 = getelementptr inbounds float* %tmp2129, i64 1
- %tmp2131 = getelementptr inbounds float* %tmp2130, i64 1
- %tmp2132 = getelementptr inbounds float* %tmp2131, i64 1
- %tmp2133 = getelementptr inbounds float* %tmp2132, i64 1
- %tmp2134 = getelementptr inbounds float* %tmp2133, i64 1
- %tmp2135 = getelementptr inbounds float* %tmp2134, i64 1
- %tmp2136 = getelementptr inbounds float* %tmp2135, i64 1
- %tmp2137 = getelementptr inbounds float* %tmp2136, i64 1
- %tmp2138 = getelementptr inbounds float* %tmp2137, i64 1
- %tmp2139 = getelementptr inbounds float* %tmp2138, i64 1
- %tmp2140 = getelementptr inbounds float* %tmp2139, i64 1
- %tmp2141 = getelementptr inbounds float* %tmp2140, i64 1
- %tmp2142 = getelementptr inbounds float* %tmp2141, i64 1
- %tmp2143 = getelementptr inbounds float* %tmp2142, i64 1
- %tmp2144 = getelementptr inbounds float* %tmp2143, i64 1
- %tmp2145 = getelementptr inbounds float* %tmp2144, i64 1
- %tmp2146 = getelementptr inbounds float* %tmp2145, i64 1
- %tmp2147 = getelementptr inbounds float* %tmp2146, i64 1
- %tmp2148 = getelementptr inbounds float* %tmp2147, i64 1
- %tmp2149 = getelementptr inbounds float* %tmp2148, i64 1
- %tmp2150 = getelementptr inbounds float* %tmp2149, i64 1
- %tmp2151 = getelementptr inbounds float* %tmp2150, i64 1
- %tmp2152 = getelementptr inbounds float* %tmp2151, i64 1
- %tmp2153 = getelementptr inbounds float* %tmp2152, i64 1
- %tmp2154 = getelementptr inbounds float* %tmp2153, i64 1
- %tmp2155 = getelementptr inbounds float* %tmp2154, i64 1
- %tmp2156 = getelementptr inbounds float* %tmp2155, i64 1
- %tmp2157 = getelementptr inbounds float* %tmp2156, i64 1
- %tmp2158 = getelementptr inbounds float* %tmp2157, i64 1
- %tmp2159 = getelementptr inbounds float* %tmp2158, i64 1
- %tmp2160 = getelementptr inbounds float* %tmp2159, i64 1
- %tmp2161 = getelementptr inbounds float* %tmp2160, i64 1
- %tmp2162 = getelementptr inbounds float* %tmp2161, i64 1
- %tmp2163 = getelementptr inbounds float* %tmp2162, i64 1
- %tmp2164 = getelementptr inbounds float* %tmp2163, i64 1
- %tmp2165 = getelementptr inbounds float* %tmp2164, i64 1
- %tmp2166 = getelementptr inbounds float* %tmp2165, i64 1
- %tmp2167 = getelementptr inbounds float* %tmp2166, i64 1
- %tmp2168 = getelementptr inbounds float* %tmp2167, i64 1
- %tmp2169 = getelementptr inbounds float* %tmp2168, i64 1
- %tmp2170 = getelementptr inbounds float* %tmp2169, i64 1
- %tmp2171 = getelementptr inbounds float* %tmp2170, i64 1
- %tmp2172 = getelementptr inbounds float* %tmp2171, i64 1
- %tmp2173 = getelementptr inbounds float* %tmp2172, i64 1
- %tmp2174 = getelementptr inbounds float* %tmp2173, i64 1
- %tmp2175 = getelementptr inbounds float* %tmp2174, i64 1
- %tmp2176 = getelementptr inbounds float* %tmp2175, i64 1
- %tmp2177 = getelementptr inbounds float* %tmp2176, i64 1
- %tmp2178 = getelementptr inbounds float* %tmp2177, i64 1
- %tmp2179 = getelementptr inbounds float* %tmp2178, i64 1
- %tmp2180 = getelementptr inbounds float* %tmp2179, i64 1
- %tmp2181 = getelementptr inbounds float* %tmp2180, i64 1
- %tmp2182 = getelementptr inbounds float* %tmp2181, i64 1
- %tmp2183 = getelementptr inbounds float* %tmp2182, i64 1
- %tmp2184 = getelementptr inbounds float* %tmp2183, i64 1
- %tmp2185 = getelementptr inbounds float* %tmp2184, i64 1
- %tmp2186 = getelementptr inbounds float* %tmp2185, i64 1
- %tmp2187 = getelementptr inbounds float* %tmp2186, i64 1
- %tmp2188 = getelementptr inbounds float* %tmp2187, i64 1
- %tmp2189 = getelementptr inbounds float* %tmp2188, i64 1
- %tmp2190 = getelementptr inbounds float* %tmp2189, i64 1
- %tmp2191 = getelementptr inbounds float* %tmp2190, i64 1
- %tmp2192 = getelementptr inbounds float* %tmp2191, i64 1
- %tmp2193 = getelementptr inbounds float* %tmp2192, i64 1
- %tmp2194 = getelementptr inbounds float* %tmp2193, i64 1
- %tmp2195 = getelementptr inbounds float* %tmp2194, i64 1
- %tmp2196 = getelementptr inbounds float* %tmp2195, i64 1
- %tmp2197 = getelementptr inbounds float* %tmp2196, i64 1
- %tmp2198 = getelementptr inbounds float* %tmp2197, i64 1
- %tmp2199 = getelementptr inbounds float* %tmp2198, i64 1
- %tmp2200 = getelementptr inbounds float* %tmp2199, i64 1
- %tmp2201 = getelementptr inbounds float* %tmp2200, i64 1
- %tmp2202 = getelementptr inbounds float* %tmp2201, i64 1
- %tmp2203 = getelementptr inbounds float* %tmp2202, i64 1
- %tmp2204 = getelementptr inbounds float* %tmp2203, i64 1
- %tmp2205 = getelementptr inbounds float* %tmp2204, i64 1
- %tmp2206 = getelementptr inbounds float* %tmp2205, i64 1
- %tmp2207 = getelementptr inbounds float* %tmp2206, i64 1
- %tmp2208 = getelementptr inbounds float* %tmp2207, i64 1
- %tmp2209 = getelementptr inbounds float* %tmp2208, i64 1
- %tmp2210 = getelementptr inbounds float* %tmp2209, i64 1
- %tmp2211 = getelementptr inbounds float* %tmp2210, i64 1
- %tmp2212 = getelementptr inbounds float* %tmp2211, i64 1
- %tmp2213 = getelementptr inbounds float* %tmp2212, i64 1
- %tmp2214 = getelementptr inbounds float* %tmp2213, i64 1
- %tmp2215 = getelementptr inbounds float* %tmp2214, i64 1
- %tmp2216 = getelementptr inbounds float* %tmp2215, i64 1
- %tmp2217 = getelementptr inbounds float* %tmp2216, i64 1
- %tmp2218 = getelementptr inbounds float* %tmp2217, i64 1
- %tmp2219 = getelementptr inbounds float* %tmp2218, i64 1
- %tmp2220 = getelementptr inbounds float* %tmp2219, i64 1
- %tmp2221 = getelementptr inbounds float* %tmp2220, i64 1
- %tmp2222 = getelementptr inbounds float* %tmp2221, i64 1
- %tmp2223 = getelementptr inbounds float* %tmp2222, i64 1
- %tmp2224 = getelementptr inbounds float* %tmp2223, i64 1
- %tmp2225 = getelementptr inbounds float* %tmp2224, i64 1
- %tmp2226 = getelementptr inbounds float* %tmp2225, i64 1
- %tmp2227 = getelementptr inbounds float* %tmp2226, i64 1
- %tmp2228 = getelementptr inbounds float* %tmp2227, i64 1
- %tmp2229 = getelementptr inbounds float* %tmp2228, i64 1
- %tmp2230 = getelementptr inbounds float* %tmp2229, i64 1
- %tmp2231 = getelementptr inbounds float* %tmp2230, i64 1
- %tmp2232 = getelementptr inbounds float* %tmp2231, i64 1
- %tmp2233 = getelementptr inbounds float* %tmp2232, i64 1
- %tmp2234 = getelementptr inbounds float* %tmp2233, i64 1
- %tmp2235 = getelementptr inbounds float* %tmp2234, i64 1
- %tmp2236 = getelementptr inbounds float* %tmp2235, i64 1
- %tmp2237 = getelementptr inbounds float* %tmp2236, i64 1
- %tmp2238 = getelementptr inbounds float* %tmp2237, i64 1
- %tmp2239 = getelementptr inbounds float* %tmp2238, i64 1
- %tmp2240 = getelementptr inbounds float* %tmp2239, i64 1
- %tmp2241 = getelementptr inbounds float* %tmp2240, i64 1
- %tmp2242 = getelementptr inbounds float* %tmp2241, i64 1
- %tmp2243 = getelementptr inbounds float* %tmp2242, i64 1
- %tmp2244 = getelementptr inbounds float* %tmp2243, i64 1
- %tmp2245 = getelementptr inbounds float* %tmp2244, i64 1
- %tmp2246 = getelementptr inbounds float* %tmp2245, i64 1
- %tmp2247 = getelementptr inbounds float* %tmp2246, i64 1
- %tmp2248 = getelementptr inbounds float* %tmp2247, i64 1
- %tmp2249 = getelementptr inbounds float* %tmp2248, i64 1
- %tmp2250 = getelementptr inbounds float* %tmp2249, i64 1
- %tmp2251 = getelementptr inbounds float* %tmp2250, i64 1
- %tmp2252 = getelementptr inbounds float* %tmp2251, i64 1
- %tmp2253 = getelementptr inbounds float* %tmp2252, i64 1
- %tmp2254 = getelementptr inbounds float* %tmp2253, i64 1
- %tmp2255 = getelementptr inbounds float* %tmp2254, i64 1
- %tmp2256 = getelementptr inbounds float* %tmp2255, i64 1
- %tmp2257 = getelementptr inbounds float* %tmp2256, i64 1
- %tmp2258 = getelementptr inbounds float* %tmp2257, i64 1
- %tmp2259 = getelementptr inbounds float* %tmp2258, i64 1
- %tmp2260 = getelementptr inbounds float* %tmp2259, i64 1
- %tmp2261 = getelementptr inbounds float* %tmp2260, i64 1
- %tmp2262 = getelementptr inbounds float* %tmp2261, i64 1
- %tmp2263 = getelementptr inbounds float* %tmp2262, i64 1
- %tmp2264 = getelementptr inbounds float* %tmp2263, i64 1
- %tmp2265 = getelementptr inbounds float* %tmp2264, i64 1
- %tmp2266 = getelementptr inbounds float* %tmp2265, i64 1
- %tmp2267 = getelementptr inbounds float* %tmp2266, i64 1
- %tmp2268 = getelementptr inbounds float* %tmp2267, i64 1
- %tmp2269 = getelementptr inbounds float* %tmp2268, i64 1
- %tmp2270 = getelementptr inbounds float* %tmp2269, i64 1
- %tmp2271 = getelementptr inbounds float* %tmp2270, i64 1
- %tmp2272 = getelementptr inbounds float* %tmp2271, i64 1
- %tmp2273 = getelementptr inbounds float* %tmp2272, i64 1
- %tmp2274 = getelementptr inbounds float* %tmp2273, i64 1
- %tmp2275 = getelementptr inbounds float* %tmp2274, i64 1
- %tmp2276 = getelementptr inbounds float* %tmp2275, i64 1
- %tmp2277 = getelementptr inbounds float* %tmp2276, i64 1
- %tmp2278 = getelementptr inbounds float* %tmp2277, i64 1
- %tmp2279 = getelementptr inbounds float* %tmp2278, i64 1
- %tmp2280 = getelementptr inbounds float* %tmp2279, i64 1
- %tmp2281 = getelementptr inbounds float* %tmp2280, i64 1
- %tmp2282 = getelementptr inbounds float* %tmp2281, i64 1
- %tmp2283 = getelementptr inbounds float* %tmp2282, i64 1
- %tmp2284 = getelementptr inbounds float* %tmp2283, i64 1
- %tmp2285 = getelementptr inbounds float* %tmp2284, i64 1
- %tmp2286 = getelementptr inbounds float* %tmp2285, i64 1
- %tmp2287 = getelementptr inbounds float* %tmp2286, i64 1
- %tmp2288 = getelementptr inbounds float* %tmp2287, i64 1
- %tmp2289 = getelementptr inbounds float* %tmp2288, i64 1
- %tmp2290 = getelementptr inbounds float* %tmp2289, i64 1
- %tmp2291 = getelementptr inbounds float* %tmp2290, i64 1
- %tmp2292 = getelementptr inbounds float* %tmp2291, i64 1
- %tmp2293 = getelementptr inbounds float* %tmp2292, i64 1
- %tmp2294 = getelementptr inbounds float* %tmp2293, i64 1
- %tmp2295 = getelementptr inbounds float* %tmp2294, i64 1
- %tmp2296 = getelementptr inbounds float* %tmp2295, i64 1
- %tmp2297 = getelementptr inbounds float* %tmp2296, i64 1
- %tmp2298 = getelementptr inbounds float* %tmp2297, i64 1
- %tmp2299 = getelementptr inbounds float* %tmp2298, i64 1
- %tmp2300 = getelementptr inbounds float* %tmp2299, i64 1
- %tmp2301 = getelementptr inbounds float* %tmp2300, i64 1
- %tmp2302 = getelementptr inbounds float* %tmp2301, i64 1
- %tmp2303 = getelementptr inbounds float* %tmp2302, i64 1
- %tmp2304 = getelementptr inbounds float* %tmp2303, i64 1
- %tmp2305 = getelementptr inbounds float* %tmp2304, i64 1
- %tmp2306 = getelementptr inbounds float* %tmp2305, i64 1
- %tmp2307 = getelementptr inbounds float* %tmp2306, i64 1
- %tmp2308 = getelementptr inbounds float* %tmp2307, i64 1
- %tmp2309 = getelementptr inbounds float* %tmp2308, i64 1
- %tmp2310 = getelementptr inbounds float* %tmp2309, i64 1
- %tmp2311 = getelementptr inbounds float* %tmp2310, i64 1
- %tmp2312 = getelementptr inbounds float* %tmp2311, i64 1
- %tmp2313 = getelementptr inbounds float* %tmp2312, i64 1
- %tmp2314 = getelementptr inbounds float* %tmp2313, i64 1
- %tmp2315 = getelementptr inbounds float* %tmp2314, i64 1
- %tmp2316 = getelementptr inbounds float* %tmp2315, i64 1
- %tmp2317 = getelementptr inbounds float* %tmp2316, i64 1
- %tmp2318 = getelementptr inbounds float* %tmp2317, i64 1
- %tmp2319 = getelementptr inbounds float* %tmp2318, i64 1
- %tmp2320 = getelementptr inbounds float* %tmp2319, i64 1
- %tmp2321 = getelementptr inbounds float* %tmp2320, i64 1
- %tmp2322 = getelementptr inbounds float* %tmp2321, i64 1
- %tmp2323 = getelementptr inbounds float* %tmp2322, i64 1
- %tmp2324 = getelementptr inbounds float* %tmp2323, i64 1
- %tmp2325 = getelementptr inbounds float* %tmp2324, i64 1
- %tmp2326 = getelementptr inbounds float* %tmp2325, i64 1
- %tmp2327 = getelementptr inbounds float* %tmp2326, i64 1
- %tmp2328 = getelementptr inbounds float* %tmp2327, i64 1
- %tmp2329 = getelementptr inbounds float* %tmp2328, i64 1
- %tmp2330 = getelementptr inbounds float* %tmp2329, i64 1
- %tmp2331 = getelementptr inbounds float* %tmp2330, i64 1
- %tmp2332 = getelementptr inbounds float* %tmp2331, i64 1
- %tmp2333 = getelementptr inbounds float* %tmp2332, i64 1
- %tmp2334 = getelementptr inbounds float* %tmp2333, i64 1
- %tmp2335 = getelementptr inbounds float* %tmp2334, i64 1
- %tmp2336 = getelementptr inbounds float* %tmp2335, i64 1
- %tmp2337 = getelementptr inbounds float* %tmp2336, i64 1
- %tmp2338 = getelementptr inbounds float* %tmp2337, i64 1
- %tmp2339 = getelementptr inbounds float* %tmp2338, i64 1
- %tmp2340 = getelementptr inbounds float* %tmp2339, i64 1
- %tmp2341 = getelementptr inbounds float* %tmp2340, i64 1
- %tmp2342 = getelementptr inbounds float* %tmp2341, i64 1
- %tmp2343 = getelementptr inbounds float* %tmp2342, i64 1
- %tmp2344 = getelementptr inbounds float* %tmp2343, i64 1
- %tmp2345 = getelementptr inbounds float* %tmp2344, i64 1
- %tmp2346 = getelementptr inbounds float* %tmp2345, i64 1
- %tmp2347 = getelementptr inbounds float* %tmp2346, i64 1
- %tmp2348 = getelementptr inbounds float* %tmp2347, i64 1
- %tmp2349 = getelementptr inbounds float* %tmp2348, i64 1
- %tmp2350 = getelementptr inbounds float* %tmp2349, i64 1
- %tmp2351 = getelementptr inbounds float* %tmp2350, i64 1
- %tmp2352 = getelementptr inbounds float* %tmp2351, i64 1
- %tmp2353 = getelementptr inbounds float* %tmp2352, i64 1
- %tmp2354 = getelementptr inbounds float* %tmp2353, i64 1
- %tmp2355 = getelementptr inbounds float* %tmp2354, i64 1
- %tmp2356 = getelementptr inbounds float* %tmp2355, i64 1
- %tmp2357 = getelementptr inbounds float* %tmp2356, i64 1
- %tmp2358 = getelementptr inbounds float* %tmp2357, i64 1
- %tmp2359 = getelementptr inbounds float* %tmp2358, i64 1
- %tmp2360 = getelementptr inbounds float* %tmp2359, i64 1
- %tmp2361 = getelementptr inbounds float* %tmp2360, i64 1
- %tmp2362 = getelementptr inbounds float* %tmp2361, i64 1
- %tmp2363 = getelementptr inbounds float* %tmp2362, i64 1
- %tmp2364 = getelementptr inbounds float* %tmp2363, i64 1
- %tmp2365 = getelementptr inbounds float* %tmp2364, i64 1
- %tmp2366 = getelementptr inbounds float* %tmp2365, i64 1
- %tmp2367 = getelementptr inbounds float* %tmp2366, i64 1
- %tmp2368 = getelementptr inbounds float* %tmp2367, i64 1
- %tmp2369 = getelementptr inbounds float* %tmp2368, i64 1
- %tmp2370 = getelementptr inbounds float* %tmp2369, i64 1
- %tmp2371 = getelementptr inbounds float* %tmp2370, i64 1
- %tmp2372 = getelementptr inbounds float* %tmp2371, i64 1
- %tmp2373 = getelementptr inbounds float* %tmp2372, i64 1
- %tmp2374 = getelementptr inbounds float* %tmp2373, i64 1
- %tmp2375 = getelementptr inbounds float* %tmp2374, i64 1
- %tmp2376 = getelementptr inbounds float* %tmp2375, i64 1
- %tmp2377 = getelementptr inbounds float* %tmp2376, i64 1
- %tmp2378 = getelementptr inbounds float* %tmp2377, i64 1
- %tmp2379 = getelementptr inbounds float* %tmp2378, i64 1
- %tmp2380 = getelementptr inbounds float* %tmp2379, i64 1
- %tmp2381 = getelementptr inbounds float* %tmp2380, i64 1
- %tmp2382 = getelementptr inbounds float* %tmp2381, i64 1
- %tmp2383 = getelementptr inbounds float* %tmp2382, i64 1
- %tmp2384 = getelementptr inbounds float* %tmp2383, i64 1
- %tmp2385 = getelementptr inbounds float* %tmp2384, i64 1
- %tmp2386 = getelementptr inbounds float* %tmp2385, i64 1
- %tmp2387 = getelementptr inbounds float* %tmp2386, i64 1
- %tmp2388 = getelementptr inbounds float* %tmp2387, i64 1
- %tmp2389 = getelementptr inbounds float* %tmp2388, i64 1
- %tmp2390 = getelementptr inbounds float* %tmp2389, i64 1
- %tmp2391 = getelementptr inbounds float* %tmp2390, i64 1
- %tmp2392 = getelementptr inbounds float* %tmp2391, i64 1
- %tmp2393 = getelementptr inbounds float* %tmp2392, i64 1
- %tmp2394 = getelementptr inbounds float* %tmp2393, i64 1
- %tmp2395 = getelementptr inbounds float* %tmp2394, i64 1
- %tmp2396 = getelementptr inbounds float* %tmp2395, i64 1
- %tmp2397 = getelementptr inbounds float* %tmp2396, i64 1
- %tmp2398 = getelementptr inbounds float* %tmp2397, i64 1
- %tmp2399 = getelementptr inbounds float* %tmp2398, i64 1
- %tmp2400 = getelementptr inbounds float* %tmp2399, i64 1
- %tmp2401 = getelementptr inbounds float* %tmp2400, i64 1
- %tmp2402 = getelementptr inbounds float* %tmp2401, i64 1
- %tmp2403 = getelementptr inbounds float* %tmp2402, i64 1
- %tmp2404 = getelementptr inbounds float* %tmp2403, i64 1
- %tmp2405 = getelementptr inbounds float* %tmp2404, i64 1
- %tmp2406 = getelementptr inbounds float* %tmp2405, i64 1
- %tmp2407 = getelementptr inbounds float* %tmp2406, i64 1
- %tmp2408 = getelementptr inbounds float* %tmp2407, i64 1
- %tmp2409 = getelementptr inbounds float* %tmp2408, i64 1
- %tmp2410 = getelementptr inbounds float* %tmp2409, i64 1
- %tmp2411 = getelementptr inbounds float* %tmp2410, i64 1
- %tmp2412 = getelementptr inbounds float* %tmp2411, i64 1
- %tmp2413 = getelementptr inbounds float* %tmp2412, i64 1
- %tmp2414 = getelementptr inbounds float* %tmp2413, i64 1
- %tmp2415 = getelementptr inbounds float* %tmp2414, i64 1
- %tmp2416 = getelementptr inbounds float* %tmp2415, i64 1
- %tmp2417 = getelementptr inbounds float* %tmp2416, i64 1
- %tmp2418 = getelementptr inbounds float* %tmp2417, i64 1
- %tmp2419 = getelementptr inbounds float* %tmp2418, i64 1
- %tmp2420 = getelementptr inbounds float* %tmp2419, i64 1
- %tmp2421 = getelementptr inbounds float* %tmp2420, i64 1
- %tmp2422 = getelementptr inbounds float* %tmp2421, i64 1
- %tmp2423 = getelementptr inbounds float* %tmp2422, i64 1
- %tmp2424 = getelementptr inbounds float* %tmp2423, i64 1
- %tmp2425 = getelementptr inbounds float* %tmp2424, i64 1
- %tmp2426 = getelementptr inbounds float* %tmp2425, i64 1
- %tmp2427 = getelementptr inbounds float* %tmp2426, i64 1
- %tmp2428 = getelementptr inbounds float* %tmp2427, i64 1
- %tmp2429 = getelementptr inbounds float* %tmp2428, i64 1
- %tmp2430 = getelementptr inbounds float* %tmp2429, i64 1
- %tmp2431 = getelementptr inbounds float* %tmp2430, i64 1
- %tmp2432 = getelementptr inbounds float* %tmp2431, i64 1
- %tmp2433 = getelementptr inbounds float* %tmp2432, i64 1
- %tmp2434 = getelementptr inbounds float* %tmp2433, i64 1
- %tmp2435 = getelementptr inbounds float* %tmp2434, i64 1
- %tmp2436 = getelementptr inbounds float* %tmp2435, i64 1
- %tmp2437 = getelementptr inbounds float* %tmp2436, i64 1
- %tmp2438 = getelementptr inbounds float* %tmp2437, i64 1
- %tmp2439 = getelementptr inbounds float* %tmp2438, i64 1
- %tmp2440 = getelementptr inbounds float* %tmp2439, i64 1
- %tmp2441 = getelementptr inbounds float* %tmp2440, i64 1
- %tmp2442 = getelementptr inbounds float* %tmp2441, i64 1
- %tmp2443 = getelementptr inbounds float* %tmp2442, i64 1
- %tmp2444 = getelementptr inbounds float* %tmp2443, i64 1
- %tmp2445 = getelementptr inbounds float* %tmp2444, i64 1
- %tmp2446 = getelementptr inbounds float* %tmp2445, i64 1
- %tmp2447 = getelementptr inbounds float* %tmp2446, i64 1
- %tmp2448 = getelementptr inbounds float* %tmp2447, i64 1
- %tmp2449 = getelementptr inbounds float* %tmp2448, i64 1
- %tmp2450 = getelementptr inbounds float* %tmp2449, i64 1
- %tmp2451 = getelementptr inbounds float* %tmp2450, i64 1
- %tmp2452 = getelementptr inbounds float* %tmp2451, i64 1
- %tmp2453 = getelementptr inbounds float* %tmp2452, i64 1
- %tmp2454 = getelementptr inbounds float* %tmp2453, i64 1
- %tmp2455 = getelementptr inbounds float* %tmp2454, i64 1
- %tmp2456 = getelementptr inbounds float* %tmp2455, i64 1
- %tmp2457 = getelementptr inbounds float* %tmp2456, i64 1
- %tmp2458 = getelementptr inbounds float* %tmp2457, i64 1
- %tmp2459 = getelementptr inbounds float* %tmp2458, i64 1
- %tmp2460 = getelementptr inbounds float* %tmp2459, i64 1
- %tmp2461 = getelementptr inbounds float* %tmp2460, i64 1
- %tmp2462 = getelementptr inbounds float* %tmp2461, i64 1
- %tmp2463 = getelementptr inbounds float* %tmp2462, i64 1
- %tmp2464 = getelementptr inbounds float* %tmp2463, i64 1
- %tmp2465 = getelementptr inbounds float* %tmp2464, i64 1
- %tmp2466 = getelementptr inbounds float* %tmp2465, i64 1
- %tmp2467 = getelementptr inbounds float* %tmp2466, i64 1
- %tmp2468 = getelementptr inbounds float* %tmp2467, i64 1
- %tmp2469 = getelementptr inbounds float* %tmp2468, i64 1
- %tmp2470 = getelementptr inbounds float* %tmp2469, i64 1
- %tmp2471 = getelementptr inbounds float* %tmp2470, i64 1
- %tmp2472 = getelementptr inbounds float* %tmp2471, i64 1
- %tmp2473 = getelementptr inbounds float* %tmp2472, i64 1
- %tmp2474 = getelementptr inbounds float* %tmp2473, i64 1
- %tmp2475 = getelementptr inbounds float* %tmp2474, i64 1
- %tmp2476 = getelementptr inbounds float* %tmp2475, i64 1
- %tmp2477 = getelementptr inbounds float* %tmp2476, i64 1
- %tmp2478 = getelementptr inbounds float* %tmp2477, i64 1
- %tmp2479 = getelementptr inbounds float* %tmp2478, i64 1
- %tmp2480 = getelementptr inbounds float* %tmp2479, i64 1
- %tmp2481 = getelementptr inbounds float* %tmp2480, i64 1
- %tmp2482 = getelementptr inbounds float* %tmp2481, i64 1
- %tmp2483 = getelementptr inbounds float* %tmp2482, i64 1
- %tmp2484 = getelementptr inbounds float* %tmp2483, i64 1
- %tmp2485 = getelementptr inbounds float* %tmp2484, i64 1
- %tmp2486 = getelementptr inbounds float* %tmp2485, i64 1
- %tmp2487 = getelementptr inbounds float* %tmp2486, i64 1
- %tmp2488 = getelementptr inbounds float* %tmp2487, i64 1
- %tmp2489 = getelementptr inbounds float* %tmp2488, i64 1
- %tmp2490 = getelementptr inbounds float* %tmp2489, i64 1
- %tmp2491 = getelementptr inbounds float* %tmp2490, i64 1
- %tmp2492 = getelementptr inbounds float* %tmp2491, i64 1
- %tmp2493 = getelementptr inbounds float* %tmp2492, i64 1
- %tmp2494 = getelementptr inbounds float* %tmp2493, i64 1
- %tmp2495 = getelementptr inbounds float* %tmp2494, i64 1
- %tmp2496 = getelementptr inbounds float* %tmp2495, i64 1
- %tmp2497 = getelementptr inbounds float* %tmp2496, i64 1
- %tmp2498 = getelementptr inbounds float* %tmp2497, i64 1
- %tmp2499 = getelementptr inbounds float* %tmp2498, i64 1
- %tmp2500 = getelementptr inbounds float* %tmp2499, i64 1
- %tmp2501 = getelementptr inbounds float* %tmp2500, i64 1
- %tmp2502 = getelementptr inbounds float* %tmp2501, i64 1
- %tmp2503 = getelementptr inbounds float* %tmp2502, i64 1
- %tmp2504 = getelementptr inbounds float* %tmp2503, i64 1
- %tmp2505 = getelementptr inbounds float* %tmp2504, i64 1
- %tmp2506 = getelementptr inbounds float* %tmp2505, i64 1
- %tmp2507 = getelementptr inbounds float* %tmp2506, i64 1
- %tmp2508 = getelementptr inbounds float* %tmp2507, i64 1
- %tmp2509 = getelementptr inbounds float* %tmp2508, i64 1
- %tmp2510 = getelementptr inbounds float* %tmp2509, i64 1
- %tmp2511 = getelementptr inbounds float* %tmp2510, i64 1
- %tmp2512 = getelementptr inbounds float* %tmp2511, i64 1
- %tmp2513 = getelementptr inbounds float* %tmp2512, i64 1
- %tmp2514 = getelementptr inbounds float* %tmp2513, i64 1
- %tmp2515 = getelementptr inbounds float* %tmp2514, i64 1
- %tmp2516 = getelementptr inbounds float* %tmp2515, i64 1
- %tmp2517 = getelementptr inbounds float* %tmp2516, i64 1
- %tmp2518 = getelementptr inbounds float* %tmp2517, i64 1
- %tmp2519 = getelementptr inbounds float* %tmp2518, i64 1
- %tmp2520 = getelementptr inbounds float* %tmp2519, i64 1
- %tmp2521 = getelementptr inbounds float* %tmp2520, i64 1
- %tmp2522 = getelementptr inbounds float* %tmp2521, i64 1
- %tmp2523 = getelementptr inbounds float* %tmp2522, i64 1
- %tmp2524 = getelementptr inbounds float* %tmp2523, i64 1
- %tmp2525 = getelementptr inbounds float* %tmp2524, i64 1
- %tmp2526 = getelementptr inbounds float* %tmp2525, i64 1
- %tmp2527 = getelementptr inbounds float* %tmp2526, i64 1
- %tmp2528 = getelementptr inbounds float* %tmp2527, i64 1
- %tmp2529 = getelementptr inbounds float* %tmp2528, i64 1
- %tmp2530 = getelementptr inbounds float* %tmp2529, i64 1
- %tmp2531 = getelementptr inbounds float* %tmp2530, i64 1
- %tmp2532 = getelementptr inbounds float* %tmp2531, i64 1
- %tmp2533 = getelementptr inbounds float* %tmp2532, i64 1
- %tmp2534 = getelementptr inbounds float* %tmp2533, i64 1
- %tmp2535 = getelementptr inbounds float* %tmp2534, i64 1
- %tmp2536 = getelementptr inbounds float* %tmp2535, i64 1
- %tmp2537 = getelementptr inbounds float* %tmp2536, i64 1
- %tmp2538 = getelementptr inbounds float* %tmp2537, i64 1
- %tmp2539 = getelementptr inbounds float* %tmp2538, i64 1
- %tmp2540 = getelementptr inbounds float* %tmp2539, i64 1
- %tmp2541 = getelementptr inbounds float* %tmp2540, i64 1
- %tmp2542 = getelementptr inbounds float* %tmp2541, i64 1
- %tmp2543 = getelementptr inbounds float* %tmp2542, i64 1
- %tmp2544 = getelementptr inbounds float* %tmp2543, i64 1
- %tmp2545 = getelementptr inbounds float* %tmp2544, i64 1
- %tmp2546 = getelementptr inbounds float* %tmp2545, i64 1
- %tmp2547 = getelementptr inbounds float* %tmp2546, i64 1
- %tmp2548 = getelementptr inbounds float* %tmp2547, i64 1
- %tmp2549 = getelementptr inbounds float* %tmp2548, i64 1
- %tmp2550 = getelementptr inbounds float* %tmp2549, i64 1
- %tmp2551 = getelementptr inbounds float* %tmp2550, i64 1
- %tmp2552 = getelementptr inbounds float* %tmp2551, i64 1
- %tmp2553 = getelementptr inbounds float* %tmp2552, i64 1
- %tmp2554 = getelementptr inbounds float* %tmp2553, i64 1
- %tmp2555 = getelementptr inbounds float* %tmp2554, i64 1
- %tmp2556 = getelementptr inbounds float* %tmp2555, i64 1
- %tmp2557 = getelementptr inbounds float* %tmp2556, i64 1
- %tmp2558 = getelementptr inbounds float* %tmp2557, i64 1
- %tmp2559 = getelementptr inbounds float* %tmp2558, i64 1
- %tmp2560 = getelementptr inbounds float* %tmp2559, i64 1
- %tmp2561 = getelementptr inbounds float* %tmp2560, i64 1
- %tmp2562 = getelementptr inbounds float* %tmp2561, i64 1
- %tmp2563 = getelementptr inbounds float* %tmp2562, i64 1
- %tmp2564 = getelementptr inbounds float* %tmp2563, i64 1
- %tmp2565 = getelementptr inbounds float* %tmp2564, i64 1
- %tmp2566 = getelementptr inbounds float* %tmp2565, i64 1
- %tmp2567 = getelementptr inbounds float* %tmp2566, i64 1
- %tmp2568 = getelementptr inbounds float* %tmp2567, i64 1
- %tmp2569 = getelementptr inbounds float* %tmp2568, i64 1
- %tmp2570 = getelementptr inbounds float* %tmp2569, i64 1
- %tmp2571 = getelementptr inbounds float* %tmp2570, i64 1
- %tmp2572 = getelementptr inbounds float* %tmp2571, i64 1
- %tmp2573 = getelementptr inbounds float* %tmp2572, i64 1
- %tmp2574 = getelementptr inbounds float* %tmp2573, i64 1
- %tmp2575 = getelementptr inbounds float* %tmp2574, i64 1
- %tmp2576 = getelementptr inbounds float* %tmp2575, i64 1
- %tmp2577 = getelementptr inbounds float* %tmp2576, i64 1
- %tmp2578 = getelementptr inbounds float* %tmp2577, i64 1
- %tmp2579 = getelementptr inbounds float* %tmp2578, i64 1
- %tmp2580 = getelementptr inbounds float* %tmp2579, i64 1
- %tmp2581 = getelementptr inbounds float* %tmp2580, i64 1
- %tmp2582 = getelementptr inbounds float* %tmp2581, i64 1
- %tmp2583 = getelementptr inbounds float* %tmp2582, i64 1
- %tmp2584 = getelementptr inbounds float* %tmp2583, i64 1
- %tmp2585 = getelementptr inbounds float* %tmp2584, i64 1
- %tmp2586 = getelementptr inbounds float* %tmp2585, i64 1
- %tmp2587 = getelementptr inbounds float* %tmp2586, i64 1
- %tmp2588 = getelementptr inbounds float* %tmp2587, i64 1
- %tmp2589 = getelementptr inbounds float* %tmp2588, i64 1
- %tmp2590 = getelementptr inbounds float* %tmp2589, i64 1
- %tmp2591 = getelementptr inbounds float* %tmp2590, i64 1
- %tmp2592 = getelementptr inbounds float* %tmp2591, i64 1
- %tmp2593 = getelementptr inbounds float* %tmp2592, i64 1
- %tmp2594 = getelementptr inbounds float* %tmp2593, i64 1
- %tmp2595 = getelementptr inbounds float* %tmp2594, i64 1
- %tmp2596 = getelementptr inbounds float* %tmp2595, i64 1
- %tmp2597 = getelementptr inbounds float* %tmp2596, i64 1
- %tmp2598 = getelementptr inbounds float* %tmp2597, i64 1
- %tmp2599 = getelementptr inbounds float* %tmp2598, i64 1
- %tmp2600 = getelementptr inbounds float* %tmp2599, i64 1
- %tmp2601 = getelementptr inbounds float* %tmp2600, i64 1
- %tmp2602 = getelementptr inbounds float* %tmp2601, i64 1
- %tmp2603 = getelementptr inbounds float* %tmp2602, i64 1
- %tmp2604 = getelementptr inbounds float* %tmp2603, i64 1
- %tmp2605 = getelementptr inbounds float* %tmp2604, i64 1
- %tmp2606 = getelementptr inbounds float* %tmp2605, i64 1
- %tmp2607 = getelementptr inbounds float* %tmp2606, i64 1
- %tmp2608 = getelementptr inbounds float* %tmp2607, i64 1
- %tmp2609 = getelementptr inbounds float* %tmp2608, i64 1
- %tmp2610 = getelementptr inbounds float* %tmp2609, i64 1
- %tmp2611 = getelementptr inbounds float* %tmp2610, i64 1
- %tmp2612 = getelementptr inbounds float* %tmp2611, i64 1
- %tmp2613 = getelementptr inbounds float* %tmp2612, i64 1
- %tmp2614 = getelementptr inbounds float* %tmp2613, i64 1
- %tmp2615 = getelementptr inbounds float* %tmp2614, i64 1
- %tmp2616 = getelementptr inbounds float* %tmp2615, i64 1
- %tmp2617 = getelementptr inbounds float* %tmp2616, i64 1
- %tmp2618 = getelementptr inbounds float* %tmp2617, i64 1
- %tmp2619 = getelementptr inbounds float* %tmp2618, i64 1
- %tmp2620 = getelementptr inbounds float* %tmp2619, i64 1
- %tmp2621 = getelementptr inbounds float* %tmp2620, i64 1
- %tmp2622 = getelementptr inbounds float* %tmp2621, i64 1
- %tmp2623 = getelementptr inbounds float* %tmp2622, i64 1
- %tmp2624 = getelementptr inbounds float* %tmp2623, i64 1
- %tmp2625 = getelementptr inbounds float* %tmp2624, i64 1
- %tmp2626 = getelementptr inbounds float* %tmp2625, i64 1
- %tmp2627 = getelementptr inbounds float* %tmp2626, i64 1
- %tmp2628 = getelementptr inbounds float* %tmp2627, i64 1
- %tmp2629 = getelementptr inbounds float* %tmp2628, i64 1
- %tmp2630 = getelementptr inbounds float* %tmp2629, i64 1
- %tmp2631 = getelementptr inbounds float* %tmp2630, i64 1
- %tmp2632 = getelementptr inbounds float* %tmp2631, i64 1
- %tmp2633 = getelementptr inbounds float* %tmp2632, i64 1
- %tmp2634 = getelementptr inbounds float* %tmp2633, i64 1
- %tmp2635 = getelementptr inbounds float* %tmp2634, i64 1
- %tmp2636 = getelementptr inbounds float* %tmp2635, i64 1
- %tmp2637 = getelementptr inbounds float* %tmp2636, i64 1
- %tmp2638 = getelementptr inbounds float* %tmp2637, i64 1
- %tmp2639 = getelementptr inbounds float* %tmp2638, i64 1
- %tmp2640 = getelementptr inbounds float* %tmp2639, i64 1
- %tmp2641 = getelementptr inbounds float* %tmp2640, i64 1
- %tmp2642 = getelementptr inbounds float* %tmp2641, i64 1
- %tmp2643 = getelementptr inbounds float* %tmp2642, i64 1
- %tmp2644 = getelementptr inbounds float* %tmp2643, i64 1
- %tmp2645 = getelementptr inbounds float* %tmp2644, i64 1
- %tmp2646 = getelementptr inbounds float* %tmp2645, i64 1
- %tmp2647 = getelementptr inbounds float* %tmp2646, i64 1
- %tmp2648 = getelementptr inbounds float* %tmp2647, i64 1
- %tmp2649 = getelementptr inbounds float* %tmp2648, i64 1
- %tmp2650 = getelementptr inbounds float* %tmp2649, i64 1
- %tmp2651 = getelementptr inbounds float* %tmp2650, i64 1
- %tmp2652 = getelementptr inbounds float* %tmp2651, i64 1
- %tmp2653 = getelementptr inbounds float* %tmp2652, i64 1
- %tmp2654 = getelementptr inbounds float* %tmp2653, i64 1
- %tmp2655 = getelementptr inbounds float* %tmp2654, i64 1
- %tmp2656 = getelementptr inbounds float* %tmp2655, i64 1
- %tmp2657 = getelementptr inbounds float* %tmp2656, i64 1
- %tmp2658 = getelementptr inbounds float* %tmp2657, i64 1
- %tmp2659 = getelementptr inbounds float* %tmp2658, i64 1
- %tmp2660 = getelementptr inbounds float* %tmp2659, i64 1
- %tmp2661 = getelementptr inbounds float* %tmp2660, i64 1
- %tmp2662 = getelementptr inbounds float* %tmp2661, i64 1
- %tmp2663 = getelementptr inbounds float* %tmp2662, i64 1
- %tmp2664 = getelementptr inbounds float* %tmp2663, i64 1
- %tmp2665 = getelementptr inbounds float* %tmp2664, i64 1
- %tmp2666 = getelementptr inbounds float* %tmp2665, i64 1
- %tmp2667 = getelementptr inbounds float* %tmp2666, i64 1
- %tmp2668 = getelementptr inbounds float* %tmp2667, i64 1
- %tmp2669 = getelementptr inbounds float* %tmp2668, i64 1
- %tmp2670 = getelementptr inbounds float* %tmp2669, i64 1
- %tmp2671 = getelementptr inbounds float* %tmp2670, i64 1
- %tmp2672 = getelementptr inbounds float* %tmp2671, i64 1
- %tmp2673 = getelementptr inbounds float* %tmp2672, i64 1
- %tmp2674 = getelementptr inbounds float* %tmp2673, i64 1
- %tmp2675 = getelementptr inbounds float* %tmp2674, i64 1
- %tmp2676 = getelementptr inbounds float* %tmp2675, i64 1
- %tmp2677 = getelementptr inbounds float* %tmp2676, i64 1
- %tmp2678 = getelementptr inbounds float* %tmp2677, i64 1
- %tmp2679 = getelementptr inbounds float* %tmp2678, i64 1
- %tmp2680 = getelementptr inbounds float* %tmp2679, i64 1
- %tmp2681 = getelementptr inbounds float* %tmp2680, i64 1
- %tmp2682 = getelementptr inbounds float* %tmp2681, i64 1
- %tmp2683 = getelementptr inbounds float* %tmp2682, i64 1
- %tmp2684 = getelementptr inbounds float* %tmp2683, i64 1
- %tmp2685 = getelementptr inbounds float* %tmp2684, i64 1
- %tmp2686 = getelementptr inbounds float* %tmp2685, i64 1
- %tmp2687 = getelementptr inbounds float* %tmp2686, i64 1
- %tmp2688 = getelementptr inbounds float* %tmp2687, i64 1
- %tmp2689 = getelementptr inbounds float* %tmp2688, i64 1
- %tmp2690 = getelementptr inbounds float* %tmp2689, i64 1
- %tmp2691 = getelementptr inbounds float* %tmp2690, i64 1
- %tmp2692 = getelementptr inbounds float* %tmp2691, i64 1
- %tmp2693 = getelementptr inbounds float* %tmp2692, i64 1
- %tmp2694 = getelementptr inbounds float* %tmp2693, i64 1
- %tmp2695 = getelementptr inbounds float* %tmp2694, i64 1
- %tmp2696 = getelementptr inbounds float* %tmp2695, i64 1
- %tmp2697 = getelementptr inbounds float* %tmp2696, i64 1
- %tmp2698 = getelementptr inbounds float* %tmp2697, i64 1
- %tmp2699 = getelementptr inbounds float* %tmp2698, i64 1
- %tmp2700 = getelementptr inbounds float* %tmp2699, i64 1
- %tmp2701 = getelementptr inbounds float* %tmp2700, i64 1
- %tmp2702 = getelementptr inbounds float* %tmp2701, i64 1
- %tmp2703 = getelementptr inbounds float* %tmp2702, i64 1
- %tmp2704 = getelementptr inbounds float* %tmp2703, i64 1
- %tmp2705 = getelementptr inbounds float* %tmp2704, i64 1
- %tmp2706 = getelementptr inbounds float* %tmp2705, i64 1
- %tmp2707 = getelementptr inbounds float* %tmp2706, i64 1
- %tmp2708 = getelementptr inbounds float* %tmp2707, i64 1
- %tmp2709 = getelementptr inbounds float* %tmp2708, i64 1
- %tmp2710 = getelementptr inbounds float* %tmp2709, i64 1
- %tmp2711 = getelementptr inbounds float* %tmp2710, i64 1
- %tmp2712 = getelementptr inbounds float* %tmp2711, i64 1
- %tmp2713 = getelementptr inbounds float* %tmp2712, i64 1
- %tmp2714 = getelementptr inbounds float* %tmp2713, i64 1
- %tmp2715 = getelementptr inbounds float* %tmp2714, i64 1
- %tmp2716 = getelementptr inbounds float* %tmp2715, i64 1
- %tmp2717 = getelementptr inbounds float* %tmp2716, i64 1
- %tmp2718 = getelementptr inbounds float* %tmp2717, i64 1
- %tmp2719 = getelementptr inbounds float* %tmp2718, i64 1
- %tmp2720 = getelementptr inbounds float* %tmp2719, i64 1
- %tmp2721 = getelementptr inbounds float* %tmp2720, i64 1
- %tmp2722 = getelementptr inbounds float* %tmp2721, i64 1
- %tmp2723 = getelementptr inbounds float* %tmp2722, i64 1
- %tmp2724 = getelementptr inbounds float* %tmp2723, i64 1
- %tmp2725 = getelementptr inbounds float* %tmp2724, i64 1
- %tmp2726 = getelementptr inbounds float* %tmp2725, i64 1
- %tmp2727 = getelementptr inbounds float* %tmp2726, i64 1
- %tmp2728 = getelementptr inbounds float* %tmp2727, i64 1
- %tmp2729 = getelementptr inbounds float* %tmp2728, i64 1
- %tmp2730 = getelementptr inbounds float* %tmp2729, i64 1
- %tmp2731 = getelementptr inbounds float* %tmp2730, i64 1
- %tmp2732 = getelementptr inbounds float* %tmp2731, i64 1
- %tmp2733 = getelementptr inbounds float* %tmp2732, i64 1
- %tmp2734 = getelementptr inbounds float* %tmp2733, i64 1
- %tmp2735 = getelementptr inbounds float* %tmp2734, i64 1
- %tmp2736 = getelementptr inbounds float* %tmp2735, i64 1
- %tmp2737 = getelementptr inbounds float* %tmp2736, i64 1
- %tmp2738 = getelementptr inbounds float* %tmp2737, i64 1
- %tmp2739 = getelementptr inbounds float* %tmp2738, i64 1
- %tmp2740 = getelementptr inbounds float* %tmp2739, i64 1
- %tmp2741 = getelementptr inbounds float* %tmp2740, i64 1
- %tmp2742 = getelementptr inbounds float* %tmp2741, i64 1
- %tmp2743 = getelementptr inbounds float* %tmp2742, i64 1
- %tmp2744 = getelementptr inbounds float* %tmp2743, i64 1
- %tmp2745 = getelementptr inbounds float* %tmp2744, i64 1
- %tmp2746 = getelementptr inbounds float* %tmp2745, i64 1
- %tmp2747 = getelementptr inbounds float* %tmp2746, i64 1
- %tmp2748 = getelementptr inbounds float* %tmp2747, i64 1
- %tmp2749 = getelementptr inbounds float* %tmp2748, i64 1
- %tmp2750 = getelementptr inbounds float* %tmp2749, i64 1
- %tmp2751 = getelementptr inbounds float* %tmp2750, i64 1
- %tmp2752 = getelementptr inbounds float* %tmp2751, i64 1
- %tmp2753 = getelementptr inbounds float* %tmp2752, i64 1
- %tmp2754 = getelementptr inbounds float* %tmp2753, i64 1
- %tmp2755 = getelementptr inbounds float* %tmp2754, i64 1
- %tmp2756 = getelementptr inbounds float* %tmp2755, i64 1
- %tmp2757 = getelementptr inbounds float* %tmp2756, i64 1
- %tmp2758 = getelementptr inbounds float* %tmp2757, i64 1
- %tmp2759 = getelementptr inbounds float* %tmp2758, i64 1
- %tmp2760 = getelementptr inbounds float* %tmp2759, i64 1
- %tmp2761 = getelementptr inbounds float* %tmp2760, i64 1
- %tmp2762 = getelementptr inbounds float* %tmp2761, i64 1
- %tmp2763 = getelementptr inbounds float* %tmp2762, i64 1
- %tmp2764 = getelementptr inbounds float* %tmp2763, i64 1
- %tmp2765 = getelementptr inbounds float* %tmp2764, i64 1
- %tmp2766 = getelementptr inbounds float* %tmp2765, i64 1
- %tmp2767 = getelementptr inbounds float* %tmp2766, i64 1
- %tmp2768 = getelementptr inbounds float* %tmp2767, i64 1
- %tmp2769 = getelementptr inbounds float* %tmp2768, i64 1
- %tmp2770 = getelementptr inbounds float* %tmp2769, i64 1
- %tmp2771 = getelementptr inbounds float* %tmp2770, i64 1
- %tmp2772 = getelementptr inbounds float* %tmp2771, i64 1
- %tmp2773 = getelementptr inbounds float* %tmp2772, i64 1
- %tmp2774 = getelementptr inbounds float* %tmp2773, i64 1
- %tmp2775 = getelementptr inbounds float* %tmp2774, i64 1
- %tmp2776 = getelementptr inbounds float* %tmp2775, i64 1
- %tmp2777 = getelementptr inbounds float* %tmp2776, i64 1
- %tmp2778 = getelementptr inbounds float* %tmp2777, i64 1
- %tmp2779 = getelementptr inbounds float* %tmp2778, i64 1
- %tmp2780 = getelementptr inbounds float* %tmp2779, i64 1
- %tmp2781 = getelementptr inbounds float* %tmp2780, i64 1
- %tmp2782 = getelementptr inbounds float* %tmp2781, i64 1
- %tmp2783 = getelementptr inbounds float* %tmp2782, i64 1
- %tmp2784 = getelementptr inbounds float* %tmp2783, i64 1
- %tmp2785 = getelementptr inbounds float* %tmp2784, i64 1
- %tmp2786 = getelementptr inbounds float* %tmp2785, i64 1
- %tmp2787 = getelementptr inbounds float* %tmp2786, i64 1
- %tmp2788 = getelementptr inbounds float* %tmp2787, i64 1
- %tmp2789 = getelementptr inbounds float* %tmp2788, i64 1
- %tmp2790 = getelementptr inbounds float* %tmp2789, i64 1
- %tmp2791 = getelementptr inbounds float* %tmp2790, i64 1
- %tmp2792 = getelementptr inbounds float* %tmp2791, i64 1
- %tmp2793 = getelementptr inbounds float* %tmp2792, i64 1
- %tmp2794 = getelementptr inbounds float* %tmp2793, i64 1
- %tmp2795 = getelementptr inbounds float* %tmp2794, i64 1
- %tmp2796 = getelementptr inbounds float* %tmp2795, i64 1
- %tmp2797 = getelementptr inbounds float* %tmp2796, i64 1
- %tmp2798 = getelementptr inbounds float* %tmp2797, i64 1
- %tmp2799 = getelementptr inbounds float* %tmp2798, i64 1
- %tmp2800 = getelementptr inbounds float* %tmp2799, i64 1
- %tmp2801 = getelementptr inbounds float* %tmp2800, i64 1
- %tmp2802 = getelementptr inbounds float* %tmp2801, i64 1
- %tmp2803 = getelementptr inbounds float* %tmp2802, i64 1
- %tmp2804 = getelementptr inbounds float* %tmp2803, i64 1
- %tmp2805 = getelementptr inbounds float* %tmp2804, i64 1
- %tmp2806 = getelementptr inbounds float* %tmp2805, i64 1
- %tmp2807 = getelementptr inbounds float* %tmp2806, i64 1
- %tmp2808 = getelementptr inbounds float* %tmp2807, i64 1
- %tmp2809 = getelementptr inbounds float* %tmp2808, i64 1
- %tmp2810 = getelementptr inbounds float* %tmp2809, i64 1
- %tmp2811 = getelementptr inbounds float* %tmp2810, i64 1
- %tmp2812 = getelementptr inbounds float* %tmp2811, i64 1
- %tmp2813 = getelementptr inbounds float* %tmp2812, i64 1
- %tmp2814 = getelementptr inbounds float* %tmp2813, i64 1
- %tmp2815 = getelementptr inbounds float* %tmp2814, i64 1
- %tmp2816 = getelementptr inbounds float* %tmp2815, i64 1
- %tmp2817 = getelementptr inbounds float* %tmp2816, i64 1
- %tmp2818 = getelementptr inbounds float* %tmp2817, i64 1
- %tmp2819 = getelementptr inbounds float* %tmp2818, i64 1
- %tmp2820 = getelementptr inbounds float* %tmp2819, i64 1
- %tmp2821 = getelementptr inbounds float* %tmp2820, i64 1
- %tmp2822 = getelementptr inbounds float* %tmp2821, i64 1
- %tmp2823 = getelementptr inbounds float* %tmp2822, i64 1
- %tmp2824 = getelementptr inbounds float* %tmp2823, i64 1
- %tmp2825 = getelementptr inbounds float* %tmp2824, i64 1
- %tmp2826 = getelementptr inbounds float* %tmp2825, i64 1
- %tmp2827 = getelementptr inbounds float* %tmp2826, i64 1
- %tmp2828 = getelementptr inbounds float* %tmp2827, i64 1
- %tmp2829 = getelementptr inbounds float* %tmp2828, i64 1
- %tmp2830 = getelementptr inbounds float* %tmp2829, i64 1
- %tmp2831 = getelementptr inbounds float* %tmp2830, i64 1
- %tmp2832 = getelementptr inbounds float* %tmp2831, i64 1
- %tmp2833 = getelementptr inbounds float* %tmp2832, i64 1
- %tmp2834 = getelementptr inbounds float* %tmp2833, i64 1
- %tmp2835 = getelementptr inbounds float* %tmp2834, i64 1
- %tmp2836 = getelementptr inbounds float* %tmp2835, i64 1
- %tmp2837 = getelementptr inbounds float* %tmp2836, i64 1
- %tmp2838 = getelementptr inbounds float* %tmp2837, i64 1
- %tmp2839 = getelementptr inbounds float* %tmp2838, i64 1
- %tmp2840 = getelementptr inbounds float* %tmp2839, i64 1
- %tmp2841 = getelementptr inbounds float* %tmp2840, i64 1
- %tmp2842 = getelementptr inbounds float* %tmp2841, i64 1
- %tmp2843 = getelementptr inbounds float* %tmp2842, i64 1
- %tmp2844 = getelementptr inbounds float* %tmp2843, i64 1
- %tmp2845 = getelementptr inbounds float* %tmp2844, i64 1
- %tmp2846 = getelementptr inbounds float* %tmp2845, i64 1
- %tmp2847 = getelementptr inbounds float* %tmp2846, i64 1
- %tmp2848 = getelementptr inbounds float* %tmp2847, i64 1
- %tmp2849 = getelementptr inbounds float* %tmp2848, i64 1
- %tmp2850 = getelementptr inbounds float* %tmp2849, i64 1
- %tmp2851 = getelementptr inbounds float* %tmp2850, i64 1
- %tmp2852 = getelementptr inbounds float* %tmp2851, i64 1
- %tmp2853 = getelementptr inbounds float* %tmp2852, i64 1
- %tmp2854 = getelementptr inbounds float* %tmp2853, i64 1
- %tmp2855 = getelementptr inbounds float* %tmp2854, i64 1
- %tmp2856 = getelementptr inbounds float* %tmp2855, i64 1
- %tmp2857 = getelementptr inbounds float* %tmp2856, i64 1
- %tmp2858 = getelementptr inbounds float* %tmp2857, i64 1
- %tmp2859 = getelementptr inbounds float* %tmp2858, i64 1
- %tmp2860 = getelementptr inbounds float* %tmp2859, i64 1
- %tmp2861 = getelementptr inbounds float* %tmp2860, i64 1
- %tmp2862 = getelementptr inbounds float* %tmp2861, i64 1
- %tmp2863 = getelementptr inbounds float* %tmp2862, i64 1
- %tmp2864 = getelementptr inbounds float* %tmp2863, i64 1
- %tmp2865 = getelementptr inbounds float* %tmp2864, i64 1
- %tmp2866 = getelementptr inbounds float* %tmp2865, i64 1
- %tmp2867 = getelementptr inbounds float* %tmp2866, i64 1
- %tmp2868 = getelementptr inbounds float* %tmp2867, i64 1
- %tmp2869 = getelementptr inbounds float* %tmp2868, i64 1
- %tmp2870 = getelementptr inbounds float* %tmp2869, i64 1
- %tmp2871 = getelementptr inbounds float* %tmp2870, i64 1
- %tmp2872 = getelementptr inbounds float* %tmp2871, i64 1
- %tmp2873 = getelementptr inbounds float* %tmp2872, i64 1
- %tmp2874 = getelementptr inbounds float* %tmp2873, i64 1
- %tmp2875 = getelementptr inbounds float* %tmp2874, i64 1
- %tmp2876 = getelementptr inbounds float* %tmp2875, i64 1
- %tmp2877 = getelementptr inbounds float* %tmp2876, i64 1
- %tmp2878 = getelementptr inbounds float* %tmp2877, i64 1
- %tmp2879 = getelementptr inbounds float* %tmp2878, i64 1
- %tmp2880 = getelementptr inbounds float* %tmp2879, i64 1
- %tmp2881 = getelementptr inbounds float* %tmp2880, i64 1
- %tmp2882 = getelementptr inbounds float* %tmp2881, i64 1
- %tmp2883 = getelementptr inbounds float* %tmp2882, i64 1
- %tmp2884 = getelementptr inbounds float* %tmp2883, i64 1
- %tmp2885 = getelementptr inbounds float* %tmp2884, i64 1
- %tmp2886 = getelementptr inbounds float* %tmp2885, i64 1
- %tmp2887 = getelementptr inbounds float* %tmp2886, i64 1
- %tmp2888 = getelementptr inbounds float* %tmp2887, i64 1
- %tmp2889 = getelementptr inbounds float* %tmp2888, i64 1
- %tmp2890 = getelementptr inbounds float* %tmp2889, i64 1
- %tmp2891 = getelementptr inbounds float* %tmp2890, i64 1
- %tmp2892 = getelementptr inbounds float* %tmp2891, i64 1
- %tmp2893 = getelementptr inbounds float* %tmp2892, i64 1
- %tmp2894 = getelementptr inbounds float* %tmp2893, i64 1
- %tmp2895 = getelementptr inbounds float* %tmp2894, i64 1
- %tmp2896 = getelementptr inbounds float* %tmp2895, i64 1
- %tmp2897 = getelementptr inbounds float* %tmp2896, i64 1
- %tmp2898 = getelementptr inbounds float* %tmp2897, i64 1
- %tmp2899 = getelementptr inbounds float* %tmp2898, i64 1
- %tmp2900 = getelementptr inbounds float* %tmp2899, i64 1
- %tmp2901 = getelementptr inbounds float* %tmp2900, i64 1
- %tmp2902 = getelementptr inbounds float* %tmp2901, i64 1
- %tmp2903 = getelementptr inbounds float* %tmp2902, i64 1
- %tmp2904 = getelementptr inbounds float* %tmp2903, i64 1
- %tmp2905 = getelementptr inbounds float* %tmp2904, i64 1
- %tmp2906 = getelementptr inbounds float* %tmp2905, i64 1
- %tmp2907 = getelementptr inbounds float* %tmp2906, i64 1
- %tmp2908 = getelementptr inbounds float* %tmp2907, i64 1
- %tmp2909 = getelementptr inbounds float* %tmp2908, i64 1
- %tmp2910 = getelementptr inbounds float* %tmp2909, i64 1
- %tmp2911 = getelementptr inbounds float* %tmp2910, i64 1
- %tmp2912 = getelementptr inbounds float* %tmp2911, i64 1
- %tmp2913 = getelementptr inbounds float* %tmp2912, i64 1
- %tmp2914 = getelementptr inbounds float* %tmp2913, i64 1
- %tmp2915 = getelementptr inbounds float* %tmp2914, i64 1
- %tmp2916 = getelementptr inbounds float* %tmp2915, i64 1
- %tmp2917 = getelementptr inbounds float* %tmp2916, i64 1
- %tmp2918 = getelementptr inbounds float* %tmp2917, i64 1
- %tmp2919 = getelementptr inbounds float* %tmp2918, i64 1
- %tmp2920 = getelementptr inbounds float* %tmp2919, i64 1
- %tmp2921 = getelementptr inbounds float* %tmp2920, i64 1
- %tmp2922 = getelementptr inbounds float* %tmp2921, i64 1
- %tmp2923 = getelementptr inbounds float* %tmp2922, i64 1
- %tmp2924 = getelementptr inbounds float* %tmp2923, i64 1
- %tmp2925 = getelementptr inbounds float* %tmp2924, i64 1
- %tmp2926 = getelementptr inbounds float* %tmp2925, i64 1
- %tmp2927 = getelementptr inbounds float* %tmp2926, i64 1
- %tmp2928 = getelementptr inbounds float* %tmp2927, i64 1
- %tmp2929 = getelementptr inbounds float* %tmp2928, i64 1
- %tmp2930 = getelementptr inbounds float* %tmp2929, i64 1
- %tmp2931 = getelementptr inbounds float* %tmp2930, i64 1
- %tmp2932 = getelementptr inbounds float* %tmp2931, i64 1
- %tmp2933 = getelementptr inbounds float* %tmp2932, i64 1
- %tmp2934 = getelementptr inbounds float* %tmp2933, i64 1
- %tmp2935 = getelementptr inbounds float* %tmp2934, i64 1
- %tmp2936 = getelementptr inbounds float* %tmp2935, i64 1
- %tmp2937 = getelementptr inbounds float* %tmp2936, i64 1
- %tmp2938 = getelementptr inbounds float* %tmp2937, i64 1
- %tmp2939 = getelementptr inbounds float* %tmp2938, i64 1
- %tmp2940 = getelementptr inbounds float* %tmp2939, i64 1
- %tmp2941 = getelementptr inbounds float* %tmp2940, i64 1
- %tmp2942 = getelementptr inbounds float* %tmp2941, i64 1
- %tmp2943 = getelementptr inbounds float* %tmp2942, i64 1
- %tmp2944 = getelementptr inbounds float* %tmp2943, i64 1
- %tmp2945 = getelementptr inbounds float* %tmp2944, i64 1
- %tmp2946 = getelementptr inbounds float* %tmp2945, i64 1
- %tmp2947 = getelementptr inbounds float* %tmp2946, i64 1
- %tmp2948 = getelementptr inbounds float* %tmp2947, i64 1
- %tmp2949 = getelementptr inbounds float* %tmp2948, i64 1
- %tmp2950 = getelementptr inbounds float* %tmp2949, i64 1
- %tmp2951 = getelementptr inbounds float* %tmp2950, i64 1
- %tmp2952 = getelementptr inbounds float* %tmp2951, i64 1
- %tmp2953 = getelementptr inbounds float* %tmp2952, i64 1
- %tmp2954 = getelementptr inbounds float* %tmp2953, i64 1
- %tmp2955 = getelementptr inbounds float* %tmp2954, i64 1
- %tmp2956 = getelementptr inbounds float* %tmp2955, i64 1
- %tmp2957 = getelementptr inbounds float* %tmp2956, i64 1
- %tmp2958 = getelementptr inbounds float* %tmp2957, i64 1
- %tmp2959 = getelementptr inbounds float* %tmp2958, i64 1
- %tmp2960 = getelementptr inbounds float* %tmp2959, i64 1
- %tmp2961 = getelementptr inbounds float* %tmp2960, i64 1
- %tmp2962 = getelementptr inbounds float* %tmp2961, i64 1
- %tmp2963 = getelementptr inbounds float* %tmp2962, i64 1
- %tmp2964 = getelementptr inbounds float* %tmp2963, i64 1
- %tmp2965 = getelementptr inbounds float* %tmp2964, i64 1
- %tmp2966 = getelementptr inbounds float* %tmp2965, i64 1
- %tmp2967 = getelementptr inbounds float* %tmp2966, i64 1
- %tmp2968 = getelementptr inbounds float* %tmp2967, i64 1
- %tmp2969 = getelementptr inbounds float* %tmp2968, i64 1
- %tmp2970 = getelementptr inbounds float* %tmp2969, i64 1
- %tmp2971 = getelementptr inbounds float* %tmp2970, i64 1
- %tmp2972 = getelementptr inbounds float* %tmp2971, i64 1
- %tmp2973 = getelementptr inbounds float* %tmp2972, i64 1
- %tmp2974 = getelementptr inbounds float* %tmp2973, i64 1
- %tmp2975 = getelementptr inbounds float* %tmp2974, i64 1
- %tmp2976 = getelementptr inbounds float* %tmp2975, i64 1
- %tmp2977 = getelementptr inbounds float* %tmp2976, i64 1
- %tmp2978 = getelementptr inbounds float* %tmp2977, i64 1
- %tmp2979 = getelementptr inbounds float* %tmp2978, i64 1
- %tmp2980 = getelementptr inbounds float* %tmp2979, i64 1
- %tmp2981 = getelementptr inbounds float* %tmp2980, i64 1
- %tmp2982 = getelementptr inbounds float* %tmp2981, i64 1
- %tmp2983 = getelementptr inbounds float* %tmp2982, i64 1
- %tmp2984 = getelementptr inbounds float* %tmp2983, i64 1
- %tmp2985 = getelementptr inbounds float* %tmp2984, i64 1
- %tmp2986 = getelementptr inbounds float* %tmp2985, i64 1
- %tmp2987 = getelementptr inbounds float* %tmp2986, i64 1
- %tmp2988 = getelementptr inbounds float* %tmp2987, i64 1
- %tmp2989 = getelementptr inbounds float* %tmp2988, i64 1
- %tmp2990 = getelementptr inbounds float* %tmp2989, i64 1
- %tmp2991 = getelementptr inbounds float* %tmp2990, i64 1
- %tmp2992 = getelementptr inbounds float* %tmp2991, i64 1
- %tmp2993 = getelementptr inbounds float* %tmp2992, i64 1
- %tmp2994 = getelementptr inbounds float* %tmp2993, i64 1
- %tmp2995 = getelementptr inbounds float* %tmp2994, i64 1
- %tmp2996 = getelementptr inbounds float* %tmp2995, i64 1
- %tmp2997 = getelementptr inbounds float* %tmp2996, i64 1
- %tmp2998 = getelementptr inbounds float* %tmp2997, i64 1
- %tmp2999 = getelementptr inbounds float* %tmp2998, i64 1
- %tmp3000 = getelementptr inbounds float* %tmp2999, i64 1
- %tmp3001 = getelementptr inbounds float* %tmp3000, i64 1
- %tmp3002 = getelementptr inbounds float* %tmp3001, i64 1
- %tmp3003 = getelementptr inbounds float* %tmp3002, i64 1
- %tmp3004 = getelementptr inbounds float* %tmp3003, i64 1
- %tmp3005 = getelementptr inbounds float* %tmp3004, i64 1
- %tmp3006 = getelementptr inbounds float* %tmp3005, i64 1
- %tmp3007 = getelementptr inbounds float* %tmp3006, i64 1
- %tmp3008 = getelementptr inbounds float* %tmp3007, i64 1
- %tmp3009 = getelementptr inbounds float* %tmp3008, i64 1
- %tmp3010 = getelementptr inbounds float* %tmp3009, i64 1
- %tmp3011 = getelementptr inbounds float* %tmp3010, i64 1
- %tmp3012 = getelementptr inbounds float* %tmp3011, i64 1
- %tmp3013 = getelementptr inbounds float* %tmp3012, i64 1
- %tmp3014 = getelementptr inbounds float* %tmp3013, i64 1
- %tmp3015 = getelementptr inbounds float* %tmp3014, i64 1
- %tmp3016 = getelementptr inbounds float* %tmp3015, i64 1
- %tmp3017 = getelementptr inbounds float* %tmp3016, i64 1
- %tmp3018 = getelementptr inbounds float* %tmp3017, i64 1
- %tmp3019 = getelementptr inbounds float* %tmp3018, i64 1
- %tmp3020 = getelementptr inbounds float* %tmp3019, i64 1
- %tmp3021 = getelementptr inbounds float* %tmp3020, i64 1
- %tmp3022 = getelementptr inbounds float* %tmp3021, i64 1
- %tmp3023 = getelementptr inbounds float* %tmp3022, i64 1
- %tmp3024 = getelementptr inbounds float* %tmp3023, i64 1
- %tmp3025 = getelementptr inbounds float* %tmp3024, i64 1
- %tmp3026 = getelementptr inbounds float* %tmp3025, i64 1
- %tmp3027 = getelementptr inbounds float* %tmp3026, i64 1
- %tmp3028 = getelementptr inbounds float* %tmp3027, i64 1
- %tmp3029 = getelementptr inbounds float* %tmp3028, i64 1
- %tmp3030 = getelementptr inbounds float* %tmp3029, i64 1
- %tmp3031 = getelementptr inbounds float* %tmp3030, i64 1
- %tmp3032 = getelementptr inbounds float* %tmp3031, i64 1
- %tmp3033 = getelementptr inbounds float* %tmp3032, i64 1
- %tmp3034 = getelementptr inbounds float* %tmp3033, i64 1
- %tmp3035 = getelementptr inbounds float* %tmp3034, i64 1
- %tmp3036 = getelementptr inbounds float* %tmp3035, i64 1
- %tmp3037 = getelementptr inbounds float* %tmp3036, i64 1
- %tmp3038 = getelementptr inbounds float* %tmp3037, i64 1
- %tmp3039 = getelementptr inbounds float* %tmp3038, i64 1
- %tmp3040 = getelementptr inbounds float* %tmp3039, i64 1
- %tmp3041 = getelementptr inbounds float* %tmp3040, i64 1
- %tmp3042 = getelementptr inbounds float* %tmp3041, i64 1
- %tmp3043 = getelementptr inbounds float* %tmp3042, i64 1
- %tmp3044 = getelementptr inbounds float* %tmp3043, i64 1
- %tmp3045 = getelementptr inbounds float* %tmp3044, i64 1
- %tmp3046 = getelementptr inbounds float* %tmp3045, i64 1
- %tmp3047 = getelementptr inbounds float* %tmp3046, i64 1
- %tmp3048 = getelementptr inbounds float* %tmp3047, i64 1
- %tmp3049 = getelementptr inbounds float* %tmp3048, i64 1
- %tmp3050 = getelementptr inbounds float* %tmp3049, i64 1
- %tmp3051 = getelementptr inbounds float* %tmp3050, i64 1
- %tmp3052 = getelementptr inbounds float* %tmp3051, i64 1
- %tmp3053 = getelementptr inbounds float* %tmp3052, i64 1
- %tmp3054 = getelementptr inbounds float* %tmp3053, i64 1
- %tmp3055 = getelementptr inbounds float* %tmp3054, i64 1
- %tmp3056 = getelementptr inbounds float* %tmp3055, i64 1
- %tmp3057 = getelementptr inbounds float* %tmp3056, i64 1
- %tmp3058 = getelementptr inbounds float* %tmp3057, i64 1
- %tmp3059 = getelementptr inbounds float* %tmp3058, i64 1
- %tmp3060 = getelementptr inbounds float* %tmp3059, i64 1
- %tmp3061 = getelementptr inbounds float* %tmp3060, i64 1
- %tmp3062 = getelementptr inbounds float* %tmp3061, i64 1
- %tmp3063 = getelementptr inbounds float* %tmp3062, i64 1
- %tmp3064 = getelementptr inbounds float* %tmp3063, i64 1
- %tmp3065 = getelementptr inbounds float* %tmp3064, i64 1
- %tmp3066 = getelementptr inbounds float* %tmp3065, i64 1
- %tmp3067 = getelementptr inbounds float* %tmp3066, i64 1
- %tmp3068 = getelementptr inbounds float* %tmp3067, i64 1
- %tmp3069 = getelementptr inbounds float* %tmp3068, i64 1
- %tmp3070 = getelementptr inbounds float* %tmp3069, i64 1
- %tmp3071 = getelementptr inbounds float* %tmp3070, i64 1
- %tmp3072 = getelementptr inbounds float* %tmp3071, i64 1
- %tmp3073 = getelementptr inbounds float* %tmp3072, i64 1
- %tmp3074 = getelementptr inbounds float* %tmp3073, i64 1
- %tmp3075 = getelementptr inbounds float* %tmp3074, i64 1
- %tmp3076 = getelementptr inbounds float* %tmp3075, i64 1
- %tmp3077 = getelementptr inbounds float* %tmp3076, i64 1
- %tmp3078 = getelementptr inbounds float* %tmp3077, i64 1
- %tmp3079 = getelementptr inbounds float* %tmp3078, i64 1
- %tmp3080 = getelementptr inbounds float* %tmp3079, i64 1
- %tmp3081 = getelementptr inbounds float* %tmp3080, i64 1
- %tmp3082 = getelementptr inbounds float* %tmp3081, i64 1
- %tmp3083 = getelementptr inbounds float* %tmp3082, i64 1
- %tmp3084 = getelementptr inbounds float* %tmp3083, i64 1
- %tmp3085 = getelementptr inbounds float* %tmp3084, i64 1
- %tmp3086 = getelementptr inbounds float* %tmp3085, i64 1
- %tmp3087 = getelementptr inbounds float* %tmp3086, i64 1
- %tmp3088 = getelementptr inbounds float* %tmp3087, i64 1
- %tmp3089 = getelementptr inbounds float* %tmp3088, i64 1
- %tmp3090 = getelementptr inbounds float* %tmp3089, i64 1
- %tmp3091 = getelementptr inbounds float* %tmp3090, i64 1
- %tmp3092 = getelementptr inbounds float* %tmp3091, i64 1
- %tmp3093 = getelementptr inbounds float* %tmp3092, i64 1
- %tmp3094 = getelementptr inbounds float* %tmp3093, i64 1
- %tmp3095 = getelementptr inbounds float* %tmp3094, i64 1
- %tmp3096 = getelementptr inbounds float* %tmp3095, i64 1
- %tmp3097 = getelementptr inbounds float* %tmp3096, i64 1
- %tmp3098 = getelementptr inbounds float* %tmp3097, i64 1
- %tmp3099 = getelementptr inbounds float* %tmp3098, i64 1
- %tmp3100 = getelementptr inbounds float* %tmp3099, i64 1
- %tmp3101 = getelementptr inbounds float* %tmp3100, i64 1
- %tmp3102 = getelementptr inbounds float* %tmp3101, i64 1
- %tmp3103 = getelementptr inbounds float* %tmp3102, i64 1
- %tmp3104 = getelementptr inbounds float* %tmp3103, i64 1
- %tmp3105 = getelementptr inbounds float* %tmp3104, i64 1
- %tmp3106 = getelementptr inbounds float* %tmp3105, i64 1
- %tmp3107 = getelementptr inbounds float* %tmp3106, i64 1
- %tmp3108 = getelementptr inbounds float* %tmp3107, i64 1
- %tmp3109 = getelementptr inbounds float* %tmp3108, i64 1
- %tmp3110 = getelementptr inbounds float* %tmp3109, i64 1
- %tmp3111 = getelementptr inbounds float* %tmp3110, i64 1
- %tmp3112 = getelementptr inbounds float* %tmp3111, i64 1
- %tmp3113 = getelementptr inbounds float* %tmp3112, i64 1
- %tmp3114 = getelementptr inbounds float* %tmp3113, i64 1
- %tmp3115 = getelementptr inbounds float* %tmp3114, i64 1
- %tmp3116 = getelementptr inbounds float* %tmp3115, i64 1
- %tmp3117 = getelementptr inbounds float* %tmp3116, i64 1
- %tmp3118 = getelementptr inbounds float* %tmp3117, i64 1
- %tmp3119 = getelementptr inbounds float* %tmp3118, i64 1
- %tmp3120 = getelementptr inbounds float* %tmp3119, i64 1
- %tmp3121 = getelementptr inbounds float* %tmp3120, i64 1
- %tmp3122 = getelementptr inbounds float* %tmp3121, i64 1
- %tmp3123 = getelementptr inbounds float* %tmp3122, i64 1
- %tmp3124 = getelementptr inbounds float* %tmp3123, i64 1
- %tmp3125 = getelementptr inbounds float* %tmp3124, i64 1
- %tmp3126 = getelementptr inbounds float* %tmp3125, i64 1
- %tmp3127 = getelementptr inbounds float* %tmp3126, i64 1
- %tmp3128 = getelementptr inbounds float* %tmp3127, i64 1
- %tmp3129 = getelementptr inbounds float* %tmp3128, i64 1
- %tmp3130 = getelementptr inbounds float* %tmp3129, i64 1
- %tmp3131 = getelementptr inbounds float* %tmp3130, i64 1
- %tmp3132 = getelementptr inbounds float* %tmp3131, i64 1
- %tmp3133 = getelementptr inbounds float* %tmp3132, i64 1
- %tmp3134 = getelementptr inbounds float* %tmp3133, i64 1
- %tmp3135 = getelementptr inbounds float* %tmp3134, i64 1
- %tmp3136 = getelementptr inbounds float* %tmp3135, i64 1
- %tmp3137 = getelementptr inbounds float* %tmp3136, i64 1
- %tmp3138 = getelementptr inbounds float* %tmp3137, i64 1
- %tmp3139 = getelementptr inbounds float* %tmp3138, i64 1
- %tmp3140 = getelementptr inbounds float* %tmp3139, i64 1
- %tmp3141 = getelementptr inbounds float* %tmp3140, i64 1
- %tmp3142 = getelementptr inbounds float* %tmp3141, i64 1
- %tmp3143 = getelementptr inbounds float* %tmp3142, i64 1
- %tmp3144 = getelementptr inbounds float* %tmp3143, i64 1
- %tmp3145 = getelementptr inbounds float* %tmp3144, i64 1
- %tmp3146 = getelementptr inbounds float* %tmp3145, i64 1
- %tmp3147 = getelementptr inbounds float* %tmp3146, i64 1
- %tmp3148 = getelementptr inbounds float* %tmp3147, i64 1
- %tmp3149 = getelementptr inbounds float* %tmp3148, i64 1
- %tmp3150 = getelementptr inbounds float* %tmp3149, i64 1
- %tmp3151 = getelementptr inbounds float* %tmp3150, i64 1
- %tmp3152 = getelementptr inbounds float* %tmp3151, i64 1
- %tmp3153 = getelementptr inbounds float* %tmp3152, i64 1
- %tmp3154 = getelementptr inbounds float* %tmp3153, i64 1
- %tmp3155 = getelementptr inbounds float* %tmp3154, i64 1
- %tmp3156 = getelementptr inbounds float* %tmp3155, i64 1
- %tmp3157 = getelementptr inbounds float* %tmp3156, i64 1
- %tmp3158 = getelementptr inbounds float* %tmp3157, i64 1
- %tmp3159 = getelementptr inbounds float* %tmp3158, i64 1
- %tmp3160 = getelementptr inbounds float* %tmp3159, i64 1
- %tmp3161 = getelementptr inbounds float* %tmp3160, i64 1
- %tmp3162 = getelementptr inbounds float* %tmp3161, i64 1
- %tmp3163 = getelementptr inbounds float* %tmp3162, i64 1
- %tmp3164 = getelementptr inbounds float* %tmp3163, i64 1
- %tmp3165 = getelementptr inbounds float* %tmp3164, i64 1
- %tmp3166 = getelementptr inbounds float* %tmp3165, i64 1
- %tmp3167 = getelementptr inbounds float* %tmp3166, i64 1
- %tmp3168 = getelementptr inbounds float* %tmp3167, i64 1
- %tmp3169 = getelementptr inbounds float* %tmp3168, i64 1
- %tmp3170 = getelementptr inbounds float* %tmp3169, i64 1
- %tmp3171 = getelementptr inbounds float* %tmp3170, i64 1
- %tmp3172 = getelementptr inbounds float* %tmp3171, i64 1
- %tmp3173 = getelementptr inbounds float* %tmp3172, i64 1
- %tmp3174 = getelementptr inbounds float* %tmp3173, i64 1
- %tmp3175 = getelementptr inbounds float* %tmp3174, i64 1
- %tmp3176 = getelementptr inbounds float* %tmp3175, i64 1
- %tmp3177 = getelementptr inbounds float* %tmp3176, i64 1
- %tmp3178 = getelementptr inbounds float* %tmp3177, i64 1
- %tmp3179 = getelementptr inbounds float* %tmp3178, i64 1
- %tmp3180 = getelementptr inbounds float* %tmp3179, i64 1
- %tmp3181 = getelementptr inbounds float* %tmp3180, i64 1
- %tmp3182 = getelementptr inbounds float* %tmp3181, i64 1
- %tmp3183 = getelementptr inbounds float* %tmp3182, i64 1
- %tmp3184 = getelementptr inbounds float* %tmp3183, i64 1
- %tmp3185 = getelementptr inbounds float* %tmp3184, i64 1
- %tmp3186 = getelementptr inbounds float* %tmp3185, i64 1
- %tmp3187 = getelementptr inbounds float* %tmp3186, i64 1
- %tmp3188 = getelementptr inbounds float* %tmp3187, i64 1
- %tmp3189 = getelementptr inbounds float* %tmp3188, i64 1
- %tmp3190 = getelementptr inbounds float* %tmp3189, i64 1
- %tmp3191 = getelementptr inbounds float* %tmp3190, i64 1
- %tmp3192 = getelementptr inbounds float* %tmp3191, i64 1
- %tmp3193 = getelementptr inbounds float* %tmp3192, i64 1
- %tmp3194 = getelementptr inbounds float* %tmp3193, i64 1
- %tmp3195 = getelementptr inbounds float* %tmp3194, i64 1
- %tmp3196 = getelementptr inbounds float* %tmp3195, i64 1
- %tmp3197 = getelementptr inbounds float* %tmp3196, i64 1
- %tmp3198 = getelementptr inbounds float* %tmp3197, i64 1
- %tmp3199 = getelementptr inbounds float* %tmp3198, i64 1
- %tmp3200 = getelementptr inbounds float* %tmp3199, i64 1
- %tmp3201 = getelementptr inbounds float* %tmp3200, i64 1
- %tmp3202 = getelementptr inbounds float* %tmp3201, i64 1
- %tmp3203 = getelementptr inbounds float* %tmp3202, i64 1
- %tmp3204 = getelementptr inbounds float* %tmp3203, i64 1
- %tmp3205 = getelementptr inbounds float* %tmp3204, i64 1
- %tmp3206 = getelementptr inbounds float* %tmp3205, i64 1
- %tmp3207 = getelementptr inbounds float* %tmp3206, i64 1
- %tmp3208 = getelementptr inbounds float* %tmp3207, i64 1
- %tmp3209 = getelementptr inbounds float* %tmp3208, i64 1
- %tmp3210 = getelementptr inbounds float* %tmp3209, i64 1
- %tmp3211 = getelementptr inbounds float* %tmp3210, i64 1
- %tmp3212 = getelementptr inbounds float* %tmp3211, i64 1
- %tmp3213 = getelementptr inbounds float* %tmp3212, i64 1
- %tmp3214 = getelementptr inbounds float* %tmp3213, i64 1
- %tmp3215 = getelementptr inbounds float* %tmp3214, i64 1
- %tmp3216 = getelementptr inbounds float* %tmp3215, i64 1
- %tmp3217 = getelementptr inbounds float* %tmp3216, i64 1
- %tmp3218 = getelementptr inbounds float* %tmp3217, i64 1
- %tmp3219 = getelementptr inbounds float* %tmp3218, i64 1
- %tmp3220 = getelementptr inbounds float* %tmp3219, i64 1
- %tmp3221 = getelementptr inbounds float* %tmp3220, i64 1
- %tmp3222 = getelementptr inbounds float* %tmp3221, i64 1
- %tmp3223 = getelementptr inbounds float* %tmp3222, i64 1
- %tmp3224 = getelementptr inbounds float* %tmp3223, i64 1
- %tmp3225 = getelementptr inbounds float* %tmp3224, i64 1
- %tmp3226 = getelementptr inbounds float* %tmp3225, i64 1
- %tmp3227 = getelementptr inbounds float* %tmp3226, i64 1
- %tmp3228 = getelementptr inbounds float* %tmp3227, i64 1
- %tmp3229 = getelementptr inbounds float* %tmp3228, i64 1
- %tmp3230 = getelementptr inbounds float* %tmp3229, i64 1
- %tmp3231 = getelementptr inbounds float* %tmp3230, i64 1
- %tmp3232 = getelementptr inbounds float* %tmp3231, i64 1
- %tmp3233 = getelementptr inbounds float* %tmp3232, i64 1
- %tmp3234 = getelementptr inbounds float* %tmp3233, i64 1
- %tmp3235 = getelementptr inbounds float* %tmp3234, i64 1
- %tmp3236 = getelementptr inbounds float* %tmp3235, i64 1
- %tmp3237 = getelementptr inbounds float* %tmp3236, i64 1
- %tmp3238 = getelementptr inbounds float* %tmp3237, i64 1
- %tmp3239 = getelementptr inbounds float* %tmp3238, i64 1
- %tmp3240 = getelementptr inbounds float* %tmp3239, i64 1
- %tmp3241 = getelementptr inbounds float* %tmp3240, i64 1
- %tmp3242 = getelementptr inbounds float* %tmp3241, i64 1
- %tmp3243 = getelementptr inbounds float* %tmp3242, i64 1
- %tmp3244 = getelementptr inbounds float* %tmp3243, i64 1
- %tmp3245 = getelementptr inbounds float* %tmp3244, i64 1
- %tmp3246 = getelementptr inbounds float* %tmp3245, i64 1
- %tmp3247 = getelementptr inbounds float* %tmp3246, i64 1
- %tmp3248 = getelementptr inbounds float* %tmp3247, i64 1
- %tmp3249 = getelementptr inbounds float* %tmp3248, i64 1
- %tmp3250 = getelementptr inbounds float* %tmp3249, i64 1
- %tmp3251 = getelementptr inbounds float* %tmp3250, i64 1
- %tmp3252 = getelementptr inbounds float* %tmp3251, i64 1
- %tmp3253 = getelementptr inbounds float* %tmp3252, i64 1
- %tmp3254 = getelementptr inbounds float* %tmp3253, i64 1
- %tmp3255 = getelementptr inbounds float* %tmp3254, i64 1
- %tmp3256 = getelementptr inbounds float* %tmp3255, i64 1
- %tmp3257 = getelementptr inbounds float* %tmp3256, i64 1
- %tmp3258 = getelementptr inbounds float* %tmp3257, i64 1
- %tmp3259 = getelementptr inbounds float* %tmp3258, i64 1
- %tmp3260 = getelementptr inbounds float* %tmp3259, i64 1
- %tmp3261 = getelementptr inbounds float* %tmp3260, i64 1
- %tmp3262 = getelementptr inbounds float* %tmp3261, i64 1
- %tmp3263 = getelementptr inbounds float* %tmp3262, i64 1
- %tmp3264 = getelementptr inbounds float* %tmp3263, i64 1
- %tmp3265 = getelementptr inbounds float* %tmp3264, i64 1
- %tmp3266 = getelementptr inbounds float* %tmp3265, i64 1
- %tmp3267 = getelementptr inbounds float* %tmp3266, i64 1
- %tmp3268 = getelementptr inbounds float* %tmp3267, i64 1
- %tmp3269 = getelementptr inbounds float* %tmp3268, i64 1
- %tmp3270 = getelementptr inbounds float* %tmp3269, i64 1
- %tmp3271 = getelementptr inbounds float* %tmp3270, i64 1
- %tmp3272 = getelementptr inbounds float* %tmp3271, i64 1
- %tmp3273 = getelementptr inbounds float* %tmp3272, i64 1
- %tmp3274 = getelementptr inbounds float* %tmp3273, i64 1
- %tmp3275 = getelementptr inbounds float* %tmp3274, i64 1
- %tmp3276 = getelementptr inbounds float* %tmp3275, i64 1
- %tmp3277 = getelementptr inbounds float* %tmp3276, i64 1
- %tmp3278 = getelementptr inbounds float* %tmp3277, i64 1
- %tmp3279 = getelementptr inbounds float* %tmp3278, i64 1
- %tmp3280 = getelementptr inbounds float* %tmp3279, i64 1
- %tmp3281 = getelementptr inbounds float* %tmp3280, i64 1
- %tmp3282 = getelementptr inbounds float* %tmp3281, i64 1
- %tmp3283 = getelementptr inbounds float* %tmp3282, i64 1
- %tmp3284 = getelementptr inbounds float* %tmp3283, i64 1
- %tmp3285 = getelementptr inbounds float* %tmp3284, i64 1
- %tmp3286 = getelementptr inbounds float* %tmp3285, i64 1
- %tmp3287 = getelementptr inbounds float* %tmp3286, i64 1
- %tmp3288 = getelementptr inbounds float* %tmp3287, i64 1
- %tmp3289 = getelementptr inbounds float* %tmp3288, i64 1
- %tmp3290 = getelementptr inbounds float* %tmp3289, i64 1
- %tmp3291 = getelementptr inbounds float* %tmp3290, i64 1
- %tmp3292 = getelementptr inbounds float* %tmp3291, i64 1
- %tmp3293 = getelementptr inbounds float* %tmp3292, i64 1
- %tmp3294 = getelementptr inbounds float* %tmp3293, i64 1
- %tmp3295 = getelementptr inbounds float* %tmp3294, i64 1
- %tmp3296 = getelementptr inbounds float* %tmp3295, i64 1
- %tmp3297 = getelementptr inbounds float* %tmp3296, i64 1
- %tmp3298 = getelementptr inbounds float* %tmp3297, i64 1
- %tmp3299 = getelementptr inbounds float* %tmp3298, i64 1
- %tmp3300 = getelementptr inbounds float* %tmp3299, i64 1
- %tmp3301 = getelementptr inbounds float* %tmp3300, i64 1
- %tmp3302 = getelementptr inbounds float* %tmp3301, i64 1
- %tmp3303 = getelementptr inbounds float* %tmp3302, i64 1
- %tmp3304 = getelementptr inbounds float* %tmp3303, i64 1
- %tmp3305 = getelementptr inbounds float* %tmp3304, i64 1
- %tmp3306 = getelementptr inbounds float* %tmp3305, i64 1
- %tmp3307 = getelementptr inbounds float* %tmp3306, i64 1
- %tmp3308 = getelementptr inbounds float* %tmp3307, i64 1
- %tmp3309 = getelementptr inbounds float* %tmp3308, i64 1
- %tmp3310 = getelementptr inbounds float* %tmp3309, i64 1
- %tmp3311 = getelementptr inbounds float* %tmp3310, i64 1
- %tmp3312 = getelementptr inbounds float* %tmp3311, i64 1
- %tmp3313 = getelementptr inbounds float* %tmp3312, i64 1
- %tmp3314 = getelementptr inbounds float* %tmp3313, i64 1
- %tmp3315 = getelementptr inbounds float* %tmp3314, i64 1
- %tmp3316 = getelementptr inbounds float* %tmp3315, i64 1
- %tmp3317 = getelementptr inbounds float* %tmp3316, i64 1
- %tmp3318 = getelementptr inbounds float* %tmp3317, i64 1
- %tmp3319 = getelementptr inbounds float* %tmp3318, i64 1
- %tmp3320 = getelementptr inbounds float* %tmp3319, i64 1
- %tmp3321 = getelementptr inbounds float* %tmp3320, i64 1
- %tmp3322 = getelementptr inbounds float* %tmp3321, i64 1
- %tmp3323 = getelementptr inbounds float* %tmp3322, i64 1
- %tmp3324 = getelementptr inbounds float* %tmp3323, i64 1
- %tmp3325 = getelementptr inbounds float* %tmp3324, i64 1
- %tmp3326 = getelementptr inbounds float* %tmp3325, i64 1
- %tmp3327 = getelementptr inbounds float* %tmp3326, i64 1
- %tmp3328 = getelementptr inbounds float* %tmp3327, i64 1
- %tmp3329 = getelementptr inbounds float* %tmp3328, i64 1
- %tmp3330 = getelementptr inbounds float* %tmp3329, i64 1
- %tmp3331 = getelementptr inbounds float* %tmp3330, i64 1
- %tmp3332 = getelementptr inbounds float* %tmp3331, i64 1
- %tmp3333 = getelementptr inbounds float* %tmp3332, i64 1
- %tmp3334 = getelementptr inbounds float* %tmp3333, i64 1
- %tmp3335 = getelementptr inbounds float* %tmp3334, i64 1
- %tmp3336 = getelementptr inbounds float* %tmp3335, i64 1
- %tmp3337 = getelementptr inbounds float* %tmp3336, i64 1
- %tmp3338 = getelementptr inbounds float* %tmp3337, i64 1
- %tmp3339 = getelementptr inbounds float* %tmp3338, i64 1
- %tmp3340 = getelementptr inbounds float* %tmp3339, i64 1
- %tmp3341 = getelementptr inbounds float* %tmp3340, i64 1
- %tmp3342 = getelementptr inbounds float* %tmp3341, i64 1
- %tmp3343 = getelementptr inbounds float* %tmp3342, i64 1
- %tmp3344 = getelementptr inbounds float* %tmp3343, i64 1
- %tmp3345 = getelementptr inbounds float* %tmp3344, i64 1
- %tmp3346 = getelementptr inbounds float* %tmp3345, i64 1
- %tmp3347 = getelementptr inbounds float* %tmp3346, i64 1
- %tmp3348 = getelementptr inbounds float* %tmp3347, i64 1
- %tmp3349 = getelementptr inbounds float* %tmp3348, i64 1
- %tmp3350 = getelementptr inbounds float* %tmp3349, i64 1
- %tmp3351 = getelementptr inbounds float* %tmp3350, i64 1
- %tmp3352 = getelementptr inbounds float* %tmp3351, i64 1
- %tmp3353 = getelementptr inbounds float* %tmp3352, i64 1
- %tmp3354 = getelementptr inbounds float* %tmp3353, i64 1
- %tmp3355 = getelementptr inbounds float* %tmp3354, i64 1
- %tmp3356 = getelementptr inbounds float* %tmp3355, i64 1
- %tmp3357 = getelementptr inbounds float* %tmp3356, i64 1
- %tmp3358 = getelementptr inbounds float* %tmp3357, i64 1
- %tmp3359 = getelementptr inbounds float* %tmp3358, i64 1
- %tmp3360 = getelementptr inbounds float* %tmp3359, i64 1
- %tmp3361 = getelementptr inbounds float* %tmp3360, i64 1
- %tmp3362 = getelementptr inbounds float* %tmp3361, i64 1
- %tmp3363 = getelementptr inbounds float* %tmp3362, i64 1
- %tmp3364 = getelementptr inbounds float* %tmp3363, i64 1
- %tmp3365 = getelementptr inbounds float* %tmp3364, i64 1
- %tmp3366 = getelementptr inbounds float* %tmp3365, i64 1
- %tmp3367 = getelementptr inbounds float* %tmp3366, i64 1
- %tmp3368 = getelementptr inbounds float* %tmp3367, i64 1
- %tmp3369 = getelementptr inbounds float* %tmp3368, i64 1
- %tmp3370 = getelementptr inbounds float* %tmp3369, i64 1
- %tmp3371 = getelementptr inbounds float* %tmp3370, i64 1
- %tmp3372 = getelementptr inbounds float* %tmp3371, i64 1
- %tmp3373 = getelementptr inbounds float* %tmp3372, i64 1
- %tmp3374 = getelementptr inbounds float* %tmp3373, i64 1
- %tmp3375 = getelementptr inbounds float* %tmp3374, i64 1
- %tmp3376 = getelementptr inbounds float* %tmp3375, i64 1
- %tmp3377 = getelementptr inbounds float* %tmp3376, i64 1
- %tmp3378 = getelementptr inbounds float* %tmp3377, i64 1
- %tmp3379 = getelementptr inbounds float* %tmp3378, i64 1
- %tmp3380 = getelementptr inbounds float* %tmp3379, i64 1
- %tmp3381 = getelementptr inbounds float* %tmp3380, i64 1
- %tmp3382 = getelementptr inbounds float* %tmp3381, i64 1
- %tmp3383 = getelementptr inbounds float* %tmp3382, i64 1
- %tmp3384 = getelementptr inbounds float* %tmp3383, i64 1
- %tmp3385 = getelementptr inbounds float* %tmp3384, i64 1
- %tmp3386 = getelementptr inbounds float* %tmp3385, i64 1
- %tmp3387 = getelementptr inbounds float* %tmp3386, i64 1
- %tmp3388 = getelementptr inbounds float* %tmp3387, i64 1
- %tmp3389 = getelementptr inbounds float* %tmp3388, i64 1
- %tmp3390 = getelementptr inbounds float* %tmp3389, i64 1
- %tmp3391 = getelementptr inbounds float* %tmp3390, i64 1
- %tmp3392 = getelementptr inbounds float* %tmp3391, i64 1
- %tmp3393 = getelementptr inbounds float* %tmp3392, i64 1
- %tmp3394 = getelementptr inbounds float* %tmp3393, i64 1
- %tmp3395 = getelementptr inbounds float* %tmp3394, i64 1
- %tmp3396 = getelementptr inbounds float* %tmp3395, i64 1
- %tmp3397 = getelementptr inbounds float* %tmp3396, i64 1
- %tmp3398 = getelementptr inbounds float* %tmp3397, i64 1
- %tmp3399 = getelementptr inbounds float* %tmp3398, i64 1
- %tmp3400 = getelementptr inbounds float* %tmp3399, i64 1
- %tmp3401 = getelementptr inbounds float* %tmp3400, i64 1
- %tmp3402 = getelementptr inbounds float* %tmp3401, i64 1
- %tmp3403 = getelementptr inbounds float* %tmp3402, i64 1
- %tmp3404 = getelementptr inbounds float* %tmp3403, i64 1
- %tmp3405 = getelementptr inbounds float* %tmp3404, i64 1
- %tmp3406 = getelementptr inbounds float* %tmp3405, i64 1
- %tmp3407 = getelementptr inbounds float* %tmp3406, i64 1
- %tmp3408 = getelementptr inbounds float* %tmp3407, i64 1
- %tmp3409 = getelementptr inbounds float* %tmp3408, i64 1
- %tmp3410 = getelementptr inbounds float* %tmp3409, i64 1
- %tmp3411 = getelementptr inbounds float* %tmp3410, i64 1
- %tmp3412 = getelementptr inbounds float* %tmp3411, i64 1
- %tmp3413 = getelementptr inbounds float* %tmp3412, i64 1
- %tmp3414 = getelementptr inbounds float* %tmp3413, i64 1
- %tmp3415 = getelementptr inbounds float* %tmp3414, i64 1
- %tmp3416 = getelementptr inbounds float* %tmp3415, i64 1
- %tmp3417 = getelementptr inbounds float* %tmp3416, i64 1
- %tmp3418 = getelementptr inbounds float* %tmp3417, i64 1
- %tmp3419 = getelementptr inbounds float* %tmp3418, i64 1
- %tmp3420 = getelementptr inbounds float* %tmp3419, i64 1
- %tmp3421 = getelementptr inbounds float* %tmp3420, i64 1
- %tmp3422 = getelementptr inbounds float* %tmp3421, i64 1
- %tmp3423 = getelementptr inbounds float* %tmp3422, i64 1
- %tmp3424 = getelementptr inbounds float* %tmp3423, i64 1
- %tmp3425 = getelementptr inbounds float* %tmp3424, i64 1
- %tmp3426 = getelementptr inbounds float* %tmp3425, i64 1
- %tmp3427 = getelementptr inbounds float* %tmp3426, i64 1
- %tmp3428 = getelementptr inbounds float* %tmp3427, i64 1
- %tmp3429 = getelementptr inbounds float* %tmp3428, i64 1
- %tmp3430 = getelementptr inbounds float* %tmp3429, i64 1
- %tmp3431 = getelementptr inbounds float* %tmp3430, i64 1
- %tmp3432 = getelementptr inbounds float* %tmp3431, i64 1
- %tmp3433 = getelementptr inbounds float* %tmp3432, i64 1
- %tmp3434 = getelementptr inbounds float* %tmp3433, i64 1
- %tmp3435 = getelementptr inbounds float* %tmp3434, i64 1
- %tmp3436 = getelementptr inbounds float* %tmp3435, i64 1
- %tmp3437 = getelementptr inbounds float* %tmp3436, i64 1
- %tmp3438 = getelementptr inbounds float* %tmp3437, i64 1
- %tmp3439 = getelementptr inbounds float* %tmp3438, i64 1
- %tmp3440 = getelementptr inbounds float* %tmp3439, i64 1
- %tmp3441 = getelementptr inbounds float* %tmp3440, i64 1
- %tmp3442 = getelementptr inbounds float* %tmp3441, i64 1
- %tmp3443 = getelementptr inbounds float* %tmp3442, i64 1
- %tmp3444 = getelementptr inbounds float* %tmp3443, i64 1
- %tmp3445 = getelementptr inbounds float* %tmp3444, i64 1
- %tmp3446 = getelementptr inbounds float* %tmp3445, i64 1
- %tmp3447 = getelementptr inbounds float* %tmp3446, i64 1
- %tmp3448 = getelementptr inbounds float* %tmp3447, i64 1
- %tmp3449 = getelementptr inbounds float* %tmp3448, i64 1
- %tmp3450 = getelementptr inbounds float* %tmp3449, i64 1
- %tmp3451 = getelementptr inbounds float* %tmp3450, i64 1
- %tmp3452 = getelementptr inbounds float* %tmp3451, i64 1
- %tmp3453 = getelementptr inbounds float* %tmp3452, i64 1
- %tmp3454 = getelementptr inbounds float* %tmp3453, i64 1
- %tmp3455 = getelementptr inbounds float* %tmp3454, i64 1
- %tmp3456 = getelementptr inbounds float* %tmp3455, i64 1
- %tmp3457 = getelementptr inbounds float* %tmp3456, i64 1
- %tmp3458 = getelementptr inbounds float* %tmp3457, i64 1
- %tmp3459 = getelementptr inbounds float* %tmp3458, i64 1
- %tmp3460 = getelementptr inbounds float* %tmp3459, i64 1
- %tmp3461 = getelementptr inbounds float* %tmp3460, i64 1
- %tmp3462 = getelementptr inbounds float* %tmp3461, i64 1
- %tmp3463 = getelementptr inbounds float* %tmp3462, i64 1
- %tmp3464 = getelementptr inbounds float* %tmp3463, i64 1
- %tmp3465 = getelementptr inbounds float* %tmp3464, i64 1
- %tmp3466 = getelementptr inbounds float* %tmp3465, i64 1
- %tmp3467 = getelementptr inbounds float* %tmp3466, i64 1
- %tmp3468 = getelementptr inbounds float* %tmp3467, i64 1
- %tmp3469 = getelementptr inbounds float* %tmp3468, i64 1
- %tmp3470 = getelementptr inbounds float* %tmp3469, i64 1
- %tmp3471 = getelementptr inbounds float* %tmp3470, i64 1
- %tmp3472 = getelementptr inbounds float* %tmp3471, i64 1
- %tmp3473 = getelementptr inbounds float* %tmp3472, i64 1
- %tmp3474 = getelementptr inbounds float* %tmp3473, i64 1
- %tmp3475 = getelementptr inbounds float* %tmp3474, i64 1
- %tmp3476 = getelementptr inbounds float* %tmp3475, i64 1
- %tmp3477 = getelementptr inbounds float* %tmp3476, i64 1
- %tmp3478 = getelementptr inbounds float* %tmp3477, i64 1
- %tmp3479 = getelementptr inbounds float* %tmp3478, i64 1
- %tmp3480 = getelementptr inbounds float* %tmp3479, i64 1
- %tmp3481 = getelementptr inbounds float* %tmp3480, i64 1
- %tmp3482 = getelementptr inbounds float* %tmp3481, i64 1
- %tmp3483 = getelementptr inbounds float* %tmp3482, i64 1
- %tmp3484 = getelementptr inbounds float* %tmp3483, i64 1
- %tmp3485 = getelementptr inbounds float* %tmp3484, i64 1
- %tmp3486 = getelementptr inbounds float* %tmp3485, i64 1
- %tmp3487 = getelementptr inbounds float* %tmp3486, i64 1
- %tmp3488 = getelementptr inbounds float* %tmp3487, i64 1
- %tmp3489 = getelementptr inbounds float* %tmp3488, i64 1
- %tmp3490 = getelementptr inbounds float* %tmp3489, i64 1
- %tmp3491 = getelementptr inbounds float* %tmp3490, i64 1
- %tmp3492 = getelementptr inbounds float* %tmp3491, i64 1
- %tmp3493 = getelementptr inbounds float* %tmp3492, i64 1
- %tmp3494 = getelementptr inbounds float* %tmp3493, i64 1
- %tmp3495 = getelementptr inbounds float* %tmp3494, i64 1
- %tmp3496 = getelementptr inbounds float* %tmp3495, i64 1
- %tmp3497 = getelementptr inbounds float* %tmp3496, i64 1
- %tmp3498 = getelementptr inbounds float* %tmp3497, i64 1
- %tmp3499 = getelementptr inbounds float* %tmp3498, i64 1
- %tmp3500 = getelementptr inbounds float* %tmp3499, i64 1
- %tmp3501 = getelementptr inbounds float* %tmp3500, i64 1
- %tmp3502 = getelementptr inbounds float* %tmp3501, i64 1
- %tmp3503 = getelementptr inbounds float* %tmp3502, i64 1
- %tmp3504 = getelementptr inbounds float* %tmp3503, i64 1
- %tmp3505 = getelementptr inbounds float* %tmp3504, i64 1
- %tmp3506 = getelementptr inbounds float* %tmp3505, i64 1
- %tmp3507 = getelementptr inbounds float* %tmp3506, i64 1
- %tmp3508 = getelementptr inbounds float* %tmp3507, i64 1
- %tmp3509 = getelementptr inbounds float* %tmp3508, i64 1
- %tmp3510 = getelementptr inbounds float* %tmp3509, i64 1
- %tmp3511 = getelementptr inbounds float* %tmp3510, i64 1
- %tmp3512 = getelementptr inbounds float* %tmp3511, i64 1
- %tmp3513 = getelementptr inbounds float* %tmp3512, i64 1
- %tmp3514 = getelementptr inbounds float* %tmp3513, i64 1
- %tmp3515 = getelementptr inbounds float* %tmp3514, i64 1
- %tmp3516 = getelementptr inbounds float* %tmp3515, i64 1
- %tmp3517 = getelementptr inbounds float* %tmp3516, i64 1
- %tmp3518 = getelementptr inbounds float* %tmp3517, i64 1
- %tmp3519 = getelementptr inbounds float* %tmp3518, i64 1
- %tmp3520 = getelementptr inbounds float* %tmp3519, i64 1
- %tmp3521 = getelementptr inbounds float* %tmp3520, i64 1
- %tmp3522 = getelementptr inbounds float* %tmp3521, i64 1
- %tmp3523 = getelementptr inbounds float* %tmp3522, i64 1
- %tmp3524 = getelementptr inbounds float* %tmp3523, i64 1
- %tmp3525 = getelementptr inbounds float* %tmp3524, i64 1
- %tmp3526 = getelementptr inbounds float* %tmp3525, i64 1
- %tmp3527 = getelementptr inbounds float* %tmp3526, i64 1
- %tmp3528 = getelementptr inbounds float* %tmp3527, i64 1
- %tmp3529 = getelementptr inbounds float* %tmp3528, i64 1
- %tmp3530 = getelementptr inbounds float* %tmp3529, i64 1
- %tmp3531 = getelementptr inbounds float* %tmp3530, i64 1
- %tmp3532 = getelementptr inbounds float* %tmp3531, i64 1
- %tmp3533 = getelementptr inbounds float* %tmp3532, i64 1
- %tmp3534 = getelementptr inbounds float* %tmp3533, i64 1
- %tmp3535 = getelementptr inbounds float* %tmp3534, i64 1
- %tmp3536 = getelementptr inbounds float* %tmp3535, i64 1
- %tmp3537 = getelementptr inbounds float* %tmp3536, i64 1
- %tmp3538 = getelementptr inbounds float* %tmp3537, i64 1
- %tmp3539 = getelementptr inbounds float* %tmp3538, i64 1
- %tmp3540 = getelementptr inbounds float* %tmp3539, i64 1
- %tmp3541 = getelementptr inbounds float* %tmp3540, i64 1
- %tmp3542 = getelementptr inbounds float* %tmp3541, i64 1
- %tmp3543 = getelementptr inbounds float* %tmp3542, i64 1
- %tmp3544 = getelementptr inbounds float* %tmp3543, i64 1
- %tmp3545 = getelementptr inbounds float* %tmp3544, i64 1
- %tmp3546 = getelementptr inbounds float* %tmp3545, i64 1
- %tmp3547 = getelementptr inbounds float* %tmp3546, i64 1
- %tmp3548 = getelementptr inbounds float* %tmp3547, i64 1
- %tmp3549 = getelementptr inbounds float* %tmp3548, i64 1
- %tmp3550 = getelementptr inbounds float* %tmp3549, i64 1
- %tmp3551 = getelementptr inbounds float* %tmp3550, i64 1
- %tmp3552 = getelementptr inbounds float* %tmp3551, i64 1
- %tmp3553 = getelementptr inbounds float* %tmp3552, i64 1
- %tmp3554 = getelementptr inbounds float* %tmp3553, i64 1
- %tmp3555 = getelementptr inbounds float* %tmp3554, i64 1
- %tmp3556 = getelementptr inbounds float* %tmp3555, i64 1
- %tmp3557 = getelementptr inbounds float* %tmp3556, i64 1
- %tmp3558 = getelementptr inbounds float* %tmp3557, i64 1
- %tmp3559 = getelementptr inbounds float* %tmp3558, i64 1
- %tmp3560 = getelementptr inbounds float* %tmp3559, i64 1
- %tmp3561 = getelementptr inbounds float* %tmp3560, i64 1
- %tmp3562 = getelementptr inbounds float* %tmp3561, i64 1
- %tmp3563 = getelementptr inbounds float* %tmp3562, i64 1
- %tmp3564 = getelementptr inbounds float* %tmp3563, i64 1
- %tmp3565 = getelementptr inbounds float* %tmp3564, i64 1
- %tmp3566 = getelementptr inbounds float* %tmp3565, i64 1
- %tmp3567 = getelementptr inbounds float* %tmp3566, i64 1
- %tmp3568 = getelementptr inbounds float* %tmp3567, i64 1
- %tmp3569 = getelementptr inbounds float* %tmp3568, i64 1
- %tmp3570 = getelementptr inbounds float* %tmp3569, i64 1
- %tmp3571 = getelementptr inbounds float* %tmp3570, i64 1
- %tmp3572 = getelementptr inbounds float* %tmp3571, i64 1
- %tmp3573 = getelementptr inbounds float* %tmp3572, i64 1
- %tmp3574 = getelementptr inbounds float* %tmp3573, i64 1
- %tmp3575 = getelementptr inbounds float* %tmp3574, i64 1
- %tmp3576 = getelementptr inbounds float* %tmp3575, i64 1
- %tmp3577 = getelementptr inbounds float* %tmp3576, i64 1
- %tmp3578 = getelementptr inbounds float* %tmp3577, i64 1
- %tmp3579 = getelementptr inbounds float* %tmp3578, i64 1
- %tmp3580 = getelementptr inbounds float* %tmp3579, i64 1
- %tmp3581 = getelementptr inbounds float* %tmp3580, i64 1
- %tmp3582 = getelementptr inbounds float* %tmp3581, i64 1
- %tmp3583 = getelementptr inbounds float* %tmp3582, i64 1
- %tmp3584 = getelementptr inbounds float* %tmp3583, i64 1
- %tmp3585 = getelementptr inbounds float* %tmp3584, i64 1
- %tmp3586 = getelementptr inbounds float* %tmp3585, i64 1
- %tmp3587 = getelementptr inbounds float* %tmp3586, i64 1
- %tmp3588 = getelementptr inbounds float* %tmp3587, i64 1
- %tmp3589 = getelementptr inbounds float* %tmp3588, i64 1
- %tmp3590 = getelementptr inbounds float* %tmp3589, i64 1
- %tmp3591 = getelementptr inbounds float* %tmp3590, i64 1
- %tmp3592 = getelementptr inbounds float* %tmp3591, i64 1
- %tmp3593 = getelementptr inbounds float* %tmp3592, i64 1
- %tmp3594 = getelementptr inbounds float* %tmp3593, i64 1
- %tmp3595 = getelementptr inbounds float* %tmp3594, i64 1
- %tmp3596 = getelementptr inbounds float* %tmp3595, i64 1
- %tmp3597 = getelementptr inbounds float* %tmp3596, i64 1
- %tmp3598 = getelementptr inbounds float* %tmp3597, i64 1
- %tmp3599 = getelementptr inbounds float* %tmp3598, i64 1
- %tmp3600 = getelementptr inbounds float* %tmp3599, i64 1
- %tmp3601 = getelementptr inbounds float* %tmp3600, i64 1
- %tmp3602 = getelementptr inbounds float* %tmp3601, i64 1
- %tmp3603 = getelementptr inbounds float* %tmp3602, i64 1
- %tmp3604 = getelementptr inbounds float* %tmp3603, i64 1
- %tmp3605 = getelementptr inbounds float* %tmp3604, i64 1
- %tmp3606 = getelementptr inbounds float* %tmp3605, i64 1
- %tmp3607 = getelementptr inbounds float* %tmp3606, i64 1
- %tmp3608 = getelementptr inbounds float* %tmp3607, i64 1
- %tmp3609 = getelementptr inbounds float* %tmp3608, i64 1
- %tmp3610 = getelementptr inbounds float* %tmp3609, i64 1
- %tmp3611 = getelementptr inbounds float* %tmp3610, i64 1
- %tmp3612 = getelementptr inbounds float* %tmp3611, i64 1
- %tmp3613 = getelementptr inbounds float* %tmp3612, i64 1
- %tmp3614 = getelementptr inbounds float* %tmp3613, i64 1
- %tmp3615 = getelementptr inbounds float* %tmp3614, i64 1
- %tmp3616 = getelementptr inbounds float* %tmp3615, i64 1
- %tmp3617 = getelementptr inbounds float* %tmp3616, i64 1
- %tmp3618 = getelementptr inbounds float* %tmp3617, i64 1
- %tmp3619 = getelementptr inbounds float* %tmp3618, i64 1
- %tmp3620 = getelementptr inbounds float* %tmp3619, i64 1
- %tmp3621 = getelementptr inbounds float* %tmp3620, i64 1
- %tmp3622 = getelementptr inbounds float* %tmp3621, i64 1
- %tmp3623 = getelementptr inbounds float* %tmp3622, i64 1
- %tmp3624 = getelementptr inbounds float* %tmp3623, i64 1
- %tmp3625 = getelementptr inbounds float* %tmp3624, i64 1
- %tmp3626 = getelementptr inbounds float* %tmp3625, i64 1
- %tmp3627 = getelementptr inbounds float* %tmp3626, i64 1
- %tmp3628 = getelementptr inbounds float* %tmp3627, i64 1
- %tmp3629 = getelementptr inbounds float* %tmp3628, i64 1
- %tmp3630 = getelementptr inbounds float* %tmp3629, i64 1
- %tmp3631 = getelementptr inbounds float* %tmp3630, i64 1
- %tmp3632 = getelementptr inbounds float* %tmp3631, i64 1
- %tmp3633 = getelementptr inbounds float* %tmp3632, i64 1
- %tmp3634 = getelementptr inbounds float* %tmp3633, i64 1
- %tmp3635 = getelementptr inbounds float* %tmp3634, i64 1
- %tmp3636 = getelementptr inbounds float* %tmp3635, i64 1
- %tmp3637 = getelementptr inbounds float* %tmp3636, i64 1
- %tmp3638 = getelementptr inbounds float* %tmp3637, i64 1
- %tmp3639 = getelementptr inbounds float* %tmp3638, i64 1
- %tmp3640 = getelementptr inbounds float* %tmp3639, i64 1
- %tmp3641 = getelementptr inbounds float* %tmp3640, i64 1
- %tmp3642 = getelementptr inbounds float* %tmp3641, i64 1
- %tmp3643 = getelementptr inbounds float* %tmp3642, i64 1
- %tmp3644 = getelementptr inbounds float* %tmp3643, i64 1
- %tmp3645 = getelementptr inbounds float* %tmp3644, i64 1
- %tmp3646 = getelementptr inbounds float* %tmp3645, i64 1
- %tmp3647 = getelementptr inbounds float* %tmp3646, i64 1
- %tmp3648 = getelementptr inbounds float* %tmp3647, i64 1
- %tmp3649 = getelementptr inbounds float* %tmp3648, i64 1
- %tmp3650 = getelementptr inbounds float* %tmp3649, i64 1
- %tmp3651 = getelementptr inbounds float* %tmp3650, i64 1
- %tmp3652 = getelementptr inbounds float* %tmp3651, i64 1
- %tmp3653 = getelementptr inbounds float* %tmp3652, i64 1
- %tmp3654 = getelementptr inbounds float* %tmp3653, i64 1
- %tmp3655 = getelementptr inbounds float* %tmp3654, i64 1
- %tmp3656 = getelementptr inbounds float* %tmp3655, i64 1
- %tmp3657 = getelementptr inbounds float* %tmp3656, i64 1
- %tmp3658 = getelementptr inbounds float* %tmp3657, i64 1
- %tmp3659 = getelementptr inbounds float* %tmp3658, i64 1
- %tmp3660 = getelementptr inbounds float* %tmp3659, i64 1
- %tmp3661 = getelementptr inbounds float* %tmp3660, i64 1
- %tmp3662 = getelementptr inbounds float* %tmp3661, i64 1
- %tmp3663 = getelementptr inbounds float* %tmp3662, i64 1
- %tmp3664 = getelementptr inbounds float* %tmp3663, i64 1
- %tmp3665 = getelementptr inbounds float* %tmp3664, i64 1
- %tmp3666 = getelementptr inbounds float* %tmp3665, i64 1
- %tmp3667 = getelementptr inbounds float* %tmp3666, i64 1
- %tmp3668 = getelementptr inbounds float* %tmp3667, i64 1
- %tmp3669 = getelementptr inbounds float* %tmp3668, i64 1
- %tmp3670 = getelementptr inbounds float* %tmp3669, i64 1
- %tmp3671 = getelementptr inbounds float* %tmp3670, i64 1
- %tmp3672 = getelementptr inbounds float* %tmp3671, i64 1
- %tmp3673 = getelementptr inbounds float* %tmp3672, i64 1
- %tmp3674 = getelementptr inbounds float* %tmp3673, i64 1
- %tmp3675 = getelementptr inbounds float* %tmp3674, i64 1
- %tmp3676 = getelementptr inbounds float* %tmp3675, i64 1
- %tmp3677 = getelementptr inbounds float* %tmp3676, i64 1
- %tmp3678 = getelementptr inbounds float* %tmp3677, i64 1
- %tmp3679 = getelementptr inbounds float* %tmp3678, i64 1
- %tmp3680 = getelementptr inbounds float* %tmp3679, i64 1
- %tmp3681 = getelementptr inbounds float* %tmp3680, i64 1
- %tmp3682 = getelementptr inbounds float* %tmp3681, i64 1
- %tmp3683 = getelementptr inbounds float* %tmp3682, i64 1
- %tmp3684 = getelementptr inbounds float* %tmp3683, i64 1
- %tmp3685 = getelementptr inbounds float* %tmp3684, i64 1
- %tmp3686 = getelementptr inbounds float* %tmp3685, i64 1
- %tmp3687 = getelementptr inbounds float* %tmp3686, i64 1
- %tmp3688 = getelementptr inbounds float* %tmp3687, i64 1
- %tmp3689 = getelementptr inbounds float* %tmp3688, i64 1
- %tmp3690 = getelementptr inbounds float* %tmp3689, i64 1
- %tmp3691 = getelementptr inbounds float* %tmp3690, i64 1
- %tmp3692 = getelementptr inbounds float* %tmp3691, i64 1
- %tmp3693 = getelementptr inbounds float* %tmp3692, i64 1
- %tmp3694 = getelementptr inbounds float* %tmp3693, i64 1
- %tmp3695 = getelementptr inbounds float* %tmp3694, i64 1
- %tmp3696 = getelementptr inbounds float* %tmp3695, i64 1
- %tmp3697 = getelementptr inbounds float* %tmp3696, i64 1
- %tmp3698 = getelementptr inbounds float* %tmp3697, i64 1
- %tmp3699 = getelementptr inbounds float* %tmp3698, i64 1
- %tmp3700 = getelementptr inbounds float* %tmp3699, i64 1
- %tmp3701 = getelementptr inbounds float* %tmp3700, i64 1
- %tmp3702 = getelementptr inbounds float* %tmp3701, i64 1
- %tmp3703 = getelementptr inbounds float* %tmp3702, i64 1
- %tmp3704 = getelementptr inbounds float* %tmp3703, i64 1
- %tmp3705 = getelementptr inbounds float* %tmp3704, i64 1
- %tmp3706 = getelementptr inbounds float* %tmp3705, i64 1
- %tmp3707 = getelementptr inbounds float* %tmp3706, i64 1
- %tmp3708 = getelementptr inbounds float* %tmp3707, i64 1
- %tmp3709 = getelementptr inbounds float* %tmp3708, i64 1
- %tmp3710 = getelementptr inbounds float* %tmp3709, i64 1
- %tmp3711 = getelementptr inbounds float* %tmp3710, i64 1
- %tmp3712 = getelementptr inbounds float* %tmp3711, i64 1
- %tmp3713 = getelementptr inbounds float* %tmp3712, i64 1
- %tmp3714 = getelementptr inbounds float* %tmp3713, i64 1
- %tmp3715 = getelementptr inbounds float* %tmp3714, i64 1
- %tmp3716 = getelementptr inbounds float* %tmp3715, i64 1
- %tmp3717 = getelementptr inbounds float* %tmp3716, i64 1
- %tmp3718 = getelementptr inbounds float* %tmp3717, i64 1
- %tmp3719 = getelementptr inbounds float* %tmp3718, i64 1
- %tmp3720 = getelementptr inbounds float* %tmp3719, i64 1
- %tmp3721 = getelementptr inbounds float* %tmp3720, i64 1
- %tmp3722 = getelementptr inbounds float* %tmp3721, i64 1
- %tmp3723 = getelementptr inbounds float* %tmp3722, i64 1
- %tmp3724 = getelementptr inbounds float* %tmp3723, i64 1
- %tmp3725 = getelementptr inbounds float* %tmp3724, i64 1
- %tmp3726 = getelementptr inbounds float* %tmp3725, i64 1
- %tmp3727 = getelementptr inbounds float* %tmp3726, i64 1
- %tmp3728 = getelementptr inbounds float* %tmp3727, i64 1
- %tmp3729 = getelementptr inbounds float* %tmp3728, i64 1
- %tmp3730 = getelementptr inbounds float* %tmp3729, i64 1
- %tmp3731 = getelementptr inbounds float* %tmp3730, i64 1
- %tmp3732 = getelementptr inbounds float* %tmp3731, i64 1
- %tmp3733 = getelementptr inbounds float* %tmp3732, i64 1
- %tmp3734 = getelementptr inbounds float* %tmp3733, i64 1
- %tmp3735 = getelementptr inbounds float* %tmp3734, i64 1
- %tmp3736 = getelementptr inbounds float* %tmp3735, i64 1
- %tmp3737 = getelementptr inbounds float* %tmp3736, i64 1
- %tmp3738 = getelementptr inbounds float* %tmp3737, i64 1
- %tmp3739 = getelementptr inbounds float* %tmp3738, i64 1
- %tmp3740 = getelementptr inbounds float* %tmp3739, i64 1
- %tmp3741 = getelementptr inbounds float* %tmp3740, i64 1
- %tmp3742 = getelementptr inbounds float* %tmp3741, i64 1
- %tmp3743 = getelementptr inbounds float* %tmp3742, i64 1
- %tmp3744 = getelementptr inbounds float* %tmp3743, i64 1
- %tmp3745 = getelementptr inbounds float* %tmp3744, i64 1
- %tmp3746 = getelementptr inbounds float* %tmp3745, i64 1
- %tmp3747 = getelementptr inbounds float* %tmp3746, i64 1
- %tmp3748 = getelementptr inbounds float* %tmp3747, i64 1
- %tmp3749 = getelementptr inbounds float* %tmp3748, i64 1
- %tmp3750 = getelementptr inbounds float* %tmp3749, i64 1
- %tmp3751 = getelementptr inbounds float* %tmp3750, i64 1
- %tmp3752 = getelementptr inbounds float* %tmp3751, i64 1
- %tmp3753 = getelementptr inbounds float* %tmp3752, i64 1
- %tmp3754 = getelementptr inbounds float* %tmp3753, i64 1
- %tmp3755 = getelementptr inbounds float* %tmp3754, i64 1
- %tmp3756 = getelementptr inbounds float* %tmp3755, i64 1
- %tmp3757 = getelementptr inbounds float* %tmp3756, i64 1
- %tmp3758 = getelementptr inbounds float* %tmp3757, i64 1
- %tmp3759 = getelementptr inbounds float* %tmp3758, i64 1
- %tmp3760 = getelementptr inbounds float* %tmp3759, i64 1
- %tmp3761 = getelementptr inbounds float* %tmp3760, i64 1
- %tmp3762 = getelementptr inbounds float* %tmp3761, i64 1
- %tmp3763 = getelementptr inbounds float* %tmp3762, i64 1
- %tmp3764 = getelementptr inbounds float* %tmp3763, i64 1
- %tmp3765 = getelementptr inbounds float* %tmp3764, i64 1
- %tmp3766 = getelementptr inbounds float* %tmp3765, i64 1
- %tmp3767 = getelementptr inbounds float* %tmp3766, i64 1
- %tmp3768 = getelementptr inbounds float* %tmp3767, i64 1
- %tmp3769 = getelementptr inbounds float* %tmp3768, i64 1
- %tmp3770 = getelementptr inbounds float* %tmp3769, i64 1
- %tmp3771 = getelementptr inbounds float* %tmp3770, i64 1
- %tmp3772 = getelementptr inbounds float* %tmp3771, i64 1
- %tmp3773 = getelementptr inbounds float* %tmp3772, i64 1
- %tmp3774 = getelementptr inbounds float* %tmp3773, i64 1
- %tmp3775 = getelementptr inbounds float* %tmp3774, i64 1
- %tmp3776 = getelementptr inbounds float* %tmp3775, i64 1
- %tmp3777 = getelementptr inbounds float* %tmp3776, i64 1
- %tmp3778 = getelementptr inbounds float* %tmp3777, i64 1
- %tmp3779 = getelementptr inbounds float* %tmp3778, i64 1
- %tmp3780 = getelementptr inbounds float* %tmp3779, i64 1
- %tmp3781 = getelementptr inbounds float* %tmp3780, i64 1
- %tmp3782 = getelementptr inbounds float* %tmp3781, i64 1
- %tmp3783 = getelementptr inbounds float* %tmp3782, i64 1
- %tmp3784 = getelementptr inbounds float* %tmp3783, i64 1
- %tmp3785 = getelementptr inbounds float* %tmp3784, i64 1
- %tmp3786 = getelementptr inbounds float* %tmp3785, i64 1
- %tmp3787 = getelementptr inbounds float* %tmp3786, i64 1
- %tmp3788 = getelementptr inbounds float* %tmp3787, i64 1
- %tmp3789 = getelementptr inbounds float* %tmp3788, i64 1
- %tmp3790 = getelementptr inbounds float* %tmp3789, i64 1
- %tmp3791 = getelementptr inbounds float* %tmp3790, i64 1
- %tmp3792 = getelementptr inbounds float* %tmp3791, i64 1
- %tmp3793 = getelementptr inbounds float* %tmp3792, i64 1
- %tmp3794 = getelementptr inbounds float* %tmp3793, i64 1
- %tmp3795 = getelementptr inbounds float* %tmp3794, i64 1
- %tmp3796 = getelementptr inbounds float* %tmp3795, i64 1
- %tmp3797 = getelementptr inbounds float* %tmp3796, i64 1
- %tmp3798 = getelementptr inbounds float* %tmp3797, i64 1
- %tmp3799 = getelementptr inbounds float* %tmp3798, i64 1
- %tmp3800 = getelementptr inbounds float* %tmp3799, i64 1
- %tmp3801 = getelementptr inbounds float* %tmp3800, i64 1
- %tmp3802 = getelementptr inbounds float* %tmp3801, i64 1
- %tmp3803 = getelementptr inbounds float* %tmp3802, i64 1
- %tmp3804 = getelementptr inbounds float* %tmp3803, i64 1
- %tmp3805 = getelementptr inbounds float* %tmp3804, i64 1
- %tmp3806 = getelementptr inbounds float* %tmp3805, i64 1
- %tmp3807 = getelementptr inbounds float* %tmp3806, i64 1
- %tmp3808 = getelementptr inbounds float* %tmp3807, i64 1
- %tmp3809 = getelementptr inbounds float* %tmp3808, i64 1
- %tmp3810 = getelementptr inbounds float* %tmp3809, i64 1
- %tmp3811 = getelementptr inbounds float* %tmp3810, i64 1
- %tmp3812 = getelementptr inbounds float* %tmp3811, i64 1
- %tmp3813 = getelementptr inbounds float* %tmp3812, i64 1
- %tmp3814 = getelementptr inbounds float* %tmp3813, i64 1
- %tmp3815 = getelementptr inbounds float* %tmp3814, i64 1
- %tmp3816 = getelementptr inbounds float* %tmp3815, i64 1
- %tmp3817 = getelementptr inbounds float* %tmp3816, i64 1
- %tmp3818 = getelementptr inbounds float* %tmp3817, i64 1
- %tmp3819 = getelementptr inbounds float* %tmp3818, i64 1
- %tmp3820 = getelementptr inbounds float* %tmp3819, i64 1
- %tmp3821 = getelementptr inbounds float* %tmp3820, i64 1
- %tmp3822 = getelementptr inbounds float* %tmp3821, i64 1
- %tmp3823 = getelementptr inbounds float* %tmp3822, i64 1
- %tmp3824 = getelementptr inbounds float* %tmp3823, i64 1
- %tmp3825 = getelementptr inbounds float* %tmp3824, i64 1
- %tmp3826 = getelementptr inbounds float* %tmp3825, i64 1
- %tmp3827 = getelementptr inbounds float* %tmp3826, i64 1
- %tmp3828 = getelementptr inbounds float* %tmp3827, i64 1
- %tmp3829 = getelementptr inbounds float* %tmp3828, i64 1
- %tmp3830 = getelementptr inbounds float* %tmp3829, i64 1
- %tmp3831 = getelementptr inbounds float* %tmp3830, i64 1
- %tmp3832 = getelementptr inbounds float* %tmp3831, i64 1
- %tmp3833 = getelementptr inbounds float* %tmp3832, i64 1
- %tmp3834 = getelementptr inbounds float* %tmp3833, i64 1
- %tmp3835 = getelementptr inbounds float* %tmp3834, i64 1
- %tmp3836 = getelementptr inbounds float* %tmp3835, i64 1
- %tmp3837 = getelementptr inbounds float* %tmp3836, i64 1
- %tmp3838 = getelementptr inbounds float* %tmp3837, i64 1
- %tmp3839 = getelementptr inbounds float* %tmp3838, i64 1
- %tmp3840 = getelementptr inbounds float* %tmp3839, i64 1
- %tmp3841 = getelementptr inbounds float* %tmp3840, i64 1
- %tmp3842 = getelementptr inbounds float* %tmp3841, i64 1
- %tmp3843 = getelementptr inbounds float* %tmp3842, i64 1
- %tmp3844 = getelementptr inbounds float* %tmp3843, i64 1
- %tmp3845 = getelementptr inbounds float* %tmp3844, i64 1
- %tmp3846 = getelementptr inbounds float* %tmp3845, i64 1
- %tmp3847 = getelementptr inbounds float* %tmp3846, i64 1
- %tmp3848 = getelementptr inbounds float* %tmp3847, i64 1
- %tmp3849 = getelementptr inbounds float* %tmp3848, i64 1
- %tmp3850 = getelementptr inbounds float* %tmp3849, i64 1
- %tmp3851 = getelementptr inbounds float* %tmp3850, i64 1
- %tmp3852 = getelementptr inbounds float* %tmp3851, i64 1
- %tmp3853 = getelementptr inbounds float* %tmp3852, i64 1
- %tmp3854 = getelementptr inbounds float* %tmp3853, i64 1
- %tmp3855 = getelementptr inbounds float* %tmp3854, i64 1
- %tmp3856 = getelementptr inbounds float* %tmp3855, i64 1
- %tmp3857 = getelementptr inbounds float* %tmp3856, i64 1
- %tmp3858 = getelementptr inbounds float* %tmp3857, i64 1
- %tmp3859 = getelementptr inbounds float* %tmp3858, i64 1
- %tmp3860 = getelementptr inbounds float* %tmp3859, i64 1
- %tmp3861 = getelementptr inbounds float* %tmp3860, i64 1
- %tmp3862 = getelementptr inbounds float* %tmp3861, i64 1
- %tmp3863 = getelementptr inbounds float* %tmp3862, i64 1
- %tmp3864 = getelementptr inbounds float* %tmp3863, i64 1
- %tmp3865 = getelementptr inbounds float* %tmp3864, i64 1
- %tmp3866 = getelementptr inbounds float* %tmp3865, i64 1
- %tmp3867 = getelementptr inbounds float* %tmp3866, i64 1
- %tmp3868 = getelementptr inbounds float* %tmp3867, i64 1
- %tmp3869 = getelementptr inbounds float* %tmp3868, i64 1
- %tmp3870 = getelementptr inbounds float* %tmp3869, i64 1
- %tmp3871 = getelementptr inbounds float* %tmp3870, i64 1
- %tmp3872 = getelementptr inbounds float* %tmp3871, i64 1
- %tmp3873 = getelementptr inbounds float* %tmp3872, i64 1
- %tmp3874 = getelementptr inbounds float* %tmp3873, i64 1
- %tmp3875 = getelementptr inbounds float* %tmp3874, i64 1
- %tmp3876 = getelementptr inbounds float* %tmp3875, i64 1
- %tmp3877 = getelementptr inbounds float* %tmp3876, i64 1
- %tmp3878 = getelementptr inbounds float* %tmp3877, i64 1
- %tmp3879 = getelementptr inbounds float* %tmp3878, i64 1
- %tmp3880 = getelementptr inbounds float* %tmp3879, i64 1
- %tmp3881 = getelementptr inbounds float* %tmp3880, i64 1
- %tmp3882 = getelementptr inbounds float* %tmp3881, i64 1
- %tmp3883 = getelementptr inbounds float* %tmp3882, i64 1
- %tmp3884 = getelementptr inbounds float* %tmp3883, i64 1
- %tmp3885 = getelementptr inbounds float* %tmp3884, i64 1
- %tmp3886 = getelementptr inbounds float* %tmp3885, i64 1
- %tmp3887 = getelementptr inbounds float* %tmp3886, i64 1
- %tmp3888 = getelementptr inbounds float* %tmp3887, i64 1
- %tmp3889 = getelementptr inbounds float* %tmp3888, i64 1
- %tmp3890 = getelementptr inbounds float* %tmp3889, i64 1
- %tmp3891 = getelementptr inbounds float* %tmp3890, i64 1
- %tmp3892 = getelementptr inbounds float* %tmp3891, i64 1
- %tmp3893 = getelementptr inbounds float* %tmp3892, i64 1
- %tmp3894 = getelementptr inbounds float* %tmp3893, i64 1
- %tmp3895 = getelementptr inbounds float* %tmp3894, i64 1
- %tmp3896 = getelementptr inbounds float* %tmp3895, i64 1
- %tmp3897 = getelementptr inbounds float* %tmp3896, i64 1
- %tmp3898 = getelementptr inbounds float* %tmp3897, i64 1
- %tmp3899 = getelementptr inbounds float* %tmp3898, i64 1
- %tmp3900 = getelementptr inbounds float* %tmp3899, i64 1
- %tmp3901 = getelementptr inbounds float* %tmp3900, i64 1
- %tmp3902 = getelementptr inbounds float* %tmp3901, i64 1
- %tmp3903 = getelementptr inbounds float* %tmp3902, i64 1
- %tmp3904 = getelementptr inbounds float* %tmp3903, i64 1
- %tmp3905 = getelementptr inbounds float* %tmp3904, i64 1
- %tmp3906 = getelementptr inbounds float* %tmp3905, i64 1
- %tmp3907 = getelementptr inbounds float* %tmp3906, i64 1
- %tmp3908 = getelementptr inbounds float* %tmp3907, i64 1
- %tmp3909 = getelementptr inbounds float* %tmp3908, i64 1
- %tmp3910 = getelementptr inbounds float* %tmp3909, i64 1
- %tmp3911 = getelementptr inbounds float* %tmp3910, i64 1
- %tmp3912 = getelementptr inbounds float* %tmp3911, i64 1
- %tmp3913 = getelementptr inbounds float* %tmp3912, i64 1
- %tmp3914 = getelementptr inbounds float* %tmp3913, i64 1
- %tmp3915 = getelementptr inbounds float* %tmp3914, i64 1
- %tmp3916 = getelementptr inbounds float* %tmp3915, i64 1
- %tmp3917 = getelementptr inbounds float* %tmp3916, i64 1
- %tmp3918 = getelementptr inbounds float* %tmp3917, i64 1
- %tmp3919 = getelementptr inbounds float* %tmp3918, i64 1
- %tmp3920 = getelementptr inbounds float* %tmp3919, i64 1
- %tmp3921 = getelementptr inbounds float* %tmp3920, i64 1
- %tmp3922 = getelementptr inbounds float* %tmp3921, i64 1
- %tmp3923 = getelementptr inbounds float* %tmp3922, i64 1
- %tmp3924 = getelementptr inbounds float* %tmp3923, i64 1
- %tmp3925 = getelementptr inbounds float* %tmp3924, i64 1
- %tmp3926 = getelementptr inbounds float* %tmp3925, i64 1
- %tmp3927 = getelementptr inbounds float* %tmp3926, i64 1
- %tmp3928 = getelementptr inbounds float* %tmp3927, i64 1
- %tmp3929 = getelementptr inbounds float* %tmp3928, i64 1
- %tmp3930 = getelementptr inbounds float* %tmp3929, i64 1
- %tmp3931 = getelementptr inbounds float* %tmp3930, i64 1
- %tmp3932 = getelementptr inbounds float* %tmp3931, i64 1
- %tmp3933 = getelementptr inbounds float* %tmp3932, i64 1
- %tmp3934 = getelementptr inbounds float* %tmp3933, i64 1
- %tmp3935 = getelementptr inbounds float* %tmp3934, i64 1
- %tmp3936 = getelementptr inbounds float* %tmp3935, i64 1
- %tmp3937 = getelementptr inbounds float* %tmp3936, i64 1
- %tmp3938 = getelementptr inbounds float* %tmp3937, i64 1
- %tmp3939 = getelementptr inbounds float* %tmp3938, i64 1
- %tmp3940 = getelementptr inbounds float* %tmp3939, i64 1
- %tmp3941 = getelementptr inbounds float* %tmp3940, i64 1
- %tmp3942 = getelementptr inbounds float* %tmp3941, i64 1
- %tmp3943 = getelementptr inbounds float* %tmp3942, i64 1
- %tmp3944 = getelementptr inbounds float* %tmp3943, i64 1
- %tmp3945 = getelementptr inbounds float* %tmp3944, i64 1
- %tmp3946 = getelementptr inbounds float* %tmp3945, i64 1
- %tmp3947 = getelementptr inbounds float* %tmp3946, i64 1
- %tmp3948 = getelementptr inbounds float* %tmp3947, i64 1
- %tmp3949 = getelementptr inbounds float* %tmp3948, i64 1
- %tmp3950 = getelementptr inbounds float* %tmp3949, i64 1
- %tmp3951 = getelementptr inbounds float* %tmp3950, i64 1
- %tmp3952 = getelementptr inbounds float* %tmp3951, i64 1
- %tmp3953 = getelementptr inbounds float* %tmp3952, i64 1
- %tmp3954 = getelementptr inbounds float* %tmp3953, i64 1
- %tmp3955 = getelementptr inbounds float* %tmp3954, i64 1
- %tmp3956 = getelementptr inbounds float* %tmp3955, i64 1
- %tmp3957 = getelementptr inbounds float* %tmp3956, i64 1
- %tmp3958 = getelementptr inbounds float* %tmp3957, i64 1
- %tmp3959 = getelementptr inbounds float* %tmp3958, i64 1
- %tmp3960 = getelementptr inbounds float* %tmp3959, i64 1
- %tmp3961 = getelementptr inbounds float* %tmp3960, i64 1
- %tmp3962 = getelementptr inbounds float* %tmp3961, i64 1
- %tmp3963 = getelementptr inbounds float* %tmp3962, i64 1
- %tmp3964 = getelementptr inbounds float* %tmp3963, i64 1
- %tmp3965 = getelementptr inbounds float* %tmp3964, i64 1
- %tmp3966 = getelementptr inbounds float* %tmp3965, i64 1
- %tmp3967 = getelementptr inbounds float* %tmp3966, i64 1
- %tmp3968 = getelementptr inbounds float* %tmp3967, i64 1
- %tmp3969 = getelementptr inbounds float* %tmp3968, i64 1
- %tmp3970 = getelementptr inbounds float* %tmp3969, i64 1
- %tmp3971 = getelementptr inbounds float* %tmp3970, i64 1
- %tmp3972 = getelementptr inbounds float* %tmp3971, i64 1
- %tmp3973 = getelementptr inbounds float* %tmp3972, i64 1
- %tmp3974 = getelementptr inbounds float* %tmp3973, i64 1
- %tmp3975 = getelementptr inbounds float* %tmp3974, i64 1
- %tmp3976 = getelementptr inbounds float* %tmp3975, i64 1
- %tmp3977 = getelementptr inbounds float* %tmp3976, i64 1
- %tmp3978 = getelementptr inbounds float* %tmp3977, i64 1
- %tmp3979 = getelementptr inbounds float* %tmp3978, i64 1
- %tmp3980 = getelementptr inbounds float* %tmp3979, i64 1
- %tmp3981 = getelementptr inbounds float* %tmp3980, i64 1
- %tmp3982 = getelementptr inbounds float* %tmp3981, i64 1
- %tmp3983 = getelementptr inbounds float* %tmp3982, i64 1
- %tmp3984 = getelementptr inbounds float* %tmp3983, i64 1
- %tmp3985 = getelementptr inbounds float* %tmp3984, i64 1
- %tmp3986 = getelementptr inbounds float* %tmp3985, i64 1
- %tmp3987 = getelementptr inbounds float* %tmp3986, i64 1
- %tmp3988 = getelementptr inbounds float* %tmp3987, i64 1
- %tmp3989 = getelementptr inbounds float* %tmp3988, i64 1
- %tmp3990 = getelementptr inbounds float* %tmp3989, i64 1
- %tmp3991 = getelementptr inbounds float* %tmp3990, i64 1
- %tmp3992 = getelementptr inbounds float* %tmp3991, i64 1
- %tmp3993 = getelementptr inbounds float* %tmp3992, i64 1
- %tmp3994 = getelementptr inbounds float* %tmp3993, i64 1
- %tmp3995 = getelementptr inbounds float* %tmp3994, i64 1
- %tmp3996 = getelementptr inbounds float* %tmp3995, i64 1
- %tmp3997 = getelementptr inbounds float* %tmp3996, i64 1
- %tmp3998 = getelementptr inbounds float* %tmp3997, i64 1
- %tmp3999 = getelementptr inbounds float* %tmp3998, i64 1
- %tmp4000 = getelementptr inbounds float* %tmp3999, i64 1
- %tmp4001 = getelementptr inbounds float* %tmp4000, i64 1
- %tmp4002 = getelementptr inbounds float* %tmp4001, i64 1
- %tmp4003 = getelementptr inbounds float* %tmp4002, i64 1
- %tmp4004 = getelementptr inbounds float* %tmp4003, i64 1
- %tmp4005 = getelementptr inbounds float* %tmp4004, i64 1
- %tmp4006 = getelementptr inbounds float* %tmp4005, i64 1
- %tmp4007 = getelementptr inbounds float* %tmp4006, i64 1
- %tmp4008 = getelementptr inbounds float* %tmp4007, i64 1
- %tmp4009 = getelementptr inbounds float* %tmp4008, i64 1
- %tmp4010 = getelementptr inbounds float* %tmp4009, i64 1
- %tmp4011 = getelementptr inbounds float* %tmp4010, i64 1
- %tmp4012 = getelementptr inbounds float* %tmp4011, i64 1
- %tmp4013 = getelementptr inbounds float* %tmp4012, i64 1
- %tmp4014 = getelementptr inbounds float* %tmp4013, i64 1
- %tmp4015 = getelementptr inbounds float* %tmp4014, i64 1
- %tmp4016 = getelementptr inbounds float* %tmp4015, i64 1
- %tmp4017 = getelementptr inbounds float* %tmp4016, i64 1
- %tmp4018 = getelementptr inbounds float* %tmp4017, i64 1
- %tmp4019 = getelementptr inbounds float* %tmp4018, i64 1
- %tmp4020 = getelementptr inbounds float* %tmp4019, i64 1
- %tmp4021 = getelementptr inbounds float* %tmp4020, i64 1
- %tmp4022 = getelementptr inbounds float* %tmp4021, i64 1
- %tmp4023 = getelementptr inbounds float* %tmp4022, i64 1
- %tmp4024 = getelementptr inbounds float* %tmp4023, i64 1
- %tmp4025 = getelementptr inbounds float* %tmp4024, i64 1
- %tmp4026 = getelementptr inbounds float* %tmp4025, i64 1
- %tmp4027 = getelementptr inbounds float* %tmp4026, i64 1
- %tmp4028 = getelementptr inbounds float* %tmp4027, i64 1
- %tmp4029 = getelementptr inbounds float* %tmp4028, i64 1
- %tmp4030 = getelementptr inbounds float* %tmp4029, i64 1
- %tmp4031 = getelementptr inbounds float* %tmp4030, i64 1
- %tmp4032 = getelementptr inbounds float* %tmp4031, i64 1
- %tmp4033 = getelementptr inbounds float* %tmp4032, i64 1
- %tmp4034 = getelementptr inbounds float* %tmp4033, i64 1
- %tmp4035 = getelementptr inbounds float* %tmp4034, i64 1
- %tmp4036 = getelementptr inbounds float* %tmp4035, i64 1
- %tmp4037 = getelementptr inbounds float* %tmp4036, i64 1
- %tmp4038 = getelementptr inbounds float* %tmp4037, i64 1
- %tmp4039 = getelementptr inbounds float* %tmp4038, i64 1
- %tmp4040 = getelementptr inbounds float* %tmp4039, i64 1
- %tmp4041 = getelementptr inbounds float* %tmp4040, i64 1
- %tmp4042 = getelementptr inbounds float* %tmp4041, i64 1
- %tmp4043 = getelementptr inbounds float* %tmp4042, i64 1
- %tmp4044 = getelementptr inbounds float* %tmp4043, i64 1
- %tmp4045 = getelementptr inbounds float* %tmp4044, i64 1
- %tmp4046 = getelementptr inbounds float* %tmp4045, i64 1
- %tmp4047 = getelementptr inbounds float* %tmp4046, i64 1
- %tmp4048 = getelementptr inbounds float* %tmp4047, i64 1
- %tmp4049 = getelementptr inbounds float* %tmp4048, i64 1
- %tmp4050 = getelementptr inbounds float* %tmp4049, i64 1
- %tmp4051 = getelementptr inbounds float* %tmp4050, i64 1
- %tmp4052 = getelementptr inbounds float* %tmp4051, i64 1
- %tmp4053 = getelementptr inbounds float* %tmp4052, i64 1
- %tmp4054 = getelementptr inbounds float* %tmp4053, i64 1
- %tmp4055 = getelementptr inbounds float* %tmp4054, i64 1
- %tmp4056 = getelementptr inbounds float* %tmp4055, i64 1
- %tmp4057 = getelementptr inbounds float* %tmp4056, i64 1
- %tmp4058 = getelementptr inbounds float* %tmp4057, i64 1
- %tmp4059 = getelementptr inbounds float* %tmp4058, i64 1
- %tmp4060 = getelementptr inbounds float* %tmp4059, i64 1
- %tmp4061 = getelementptr inbounds float* %tmp4060, i64 1
- %tmp4062 = getelementptr inbounds float* %tmp4061, i64 1
- %tmp4063 = getelementptr inbounds float* %tmp4062, i64 1
- %tmp4064 = getelementptr inbounds float* %tmp4063, i64 1
- %tmp4065 = getelementptr inbounds float* %tmp4064, i64 1
- %tmp4066 = getelementptr inbounds float* %tmp4065, i64 1
- %tmp4067 = getelementptr inbounds float* %tmp4066, i64 1
- %tmp4068 = getelementptr inbounds float* %tmp4067, i64 1
- %tmp4069 = getelementptr inbounds float* %tmp4068, i64 1
- %tmp4070 = getelementptr inbounds float* %tmp4069, i64 1
- %tmp4071 = getelementptr inbounds float* %tmp4070, i64 1
- %tmp4072 = getelementptr inbounds float* %tmp4071, i64 1
- %tmp4073 = getelementptr inbounds float* %tmp4072, i64 1
- %tmp4074 = getelementptr inbounds float* %tmp4073, i64 1
- %tmp4075 = getelementptr inbounds float* %tmp4074, i64 1
- %tmp4076 = getelementptr inbounds float* %tmp4075, i64 1
- %tmp4077 = getelementptr inbounds float* %tmp4076, i64 1
- %tmp4078 = getelementptr inbounds float* %tmp4077, i64 1
- %tmp4079 = getelementptr inbounds float* %tmp4078, i64 1
- %tmp4080 = getelementptr inbounds float* %tmp4079, i64 1
- %tmp4081 = getelementptr inbounds float* %tmp4080, i64 1
- %tmp4082 = getelementptr inbounds float* %tmp4081, i64 1
- %tmp4083 = getelementptr inbounds float* %tmp4082, i64 1
- %tmp4084 = getelementptr inbounds float* %tmp4083, i64 1
- %tmp4085 = getelementptr inbounds float* %tmp4084, i64 1
- %tmp4086 = getelementptr inbounds float* %tmp4085, i64 1
- %tmp4087 = getelementptr inbounds float* %tmp4086, i64 1
- %tmp4088 = getelementptr inbounds float* %tmp4087, i64 1
- %tmp4089 = getelementptr inbounds float* %tmp4088, i64 1
- %tmp4090 = getelementptr inbounds float* %tmp4089, i64 1
- %tmp4091 = getelementptr inbounds float* %tmp4090, i64 1
- %tmp4092 = getelementptr inbounds float* %tmp4091, i64 1
- %tmp4093 = getelementptr inbounds float* %tmp4092, i64 1
- %tmp4094 = getelementptr inbounds float* %tmp4093, i64 1
- %tmp4095 = getelementptr inbounds float* %tmp4094, i64 1
- %tmp4096 = getelementptr inbounds float* %tmp4095, i64 1
- %tmp4097 = getelementptr inbounds float* %tmp4096, i64 1
- %tmp4098 = getelementptr inbounds float* %tmp4097, i64 1
- %tmp4099 = getelementptr inbounds float* %tmp4098, i64 1
- %tmp4100 = getelementptr inbounds float* %tmp4099, i64 1
- %tmp4101 = getelementptr inbounds float* %tmp4100, i64 1
- %tmp4102 = getelementptr inbounds float* %tmp4101, i64 1
- %tmp4103 = getelementptr inbounds float* %tmp4102, i64 1
- %tmp4104 = getelementptr inbounds float* %tmp4103, i64 1
- %tmp4105 = getelementptr inbounds float* %tmp4104, i64 1
- %tmp4106 = getelementptr inbounds float* %tmp4105, i64 1
- %tmp4107 = getelementptr inbounds float* %tmp4106, i64 1
- %tmp4108 = getelementptr inbounds float* %tmp4107, i64 1
- %tmp4109 = getelementptr inbounds float* %tmp4108, i64 1
- %tmp4110 = getelementptr inbounds float* %tmp4109, i64 1
- %tmp4111 = getelementptr inbounds float* %tmp4110, i64 1
- %tmp4112 = getelementptr inbounds float* %tmp4111, i64 1
- %tmp4113 = getelementptr inbounds float* %tmp4112, i64 1
- %tmp4114 = getelementptr inbounds float* %tmp4113, i64 1
- %tmp4115 = getelementptr inbounds float* %tmp4114, i64 1
- %tmp4116 = getelementptr inbounds float* %tmp4115, i64 1
- %tmp4117 = getelementptr inbounds float* %tmp4116, i64 1
- %tmp4118 = getelementptr inbounds float* %tmp4117, i64 1
- %tmp4119 = getelementptr inbounds float* %tmp4118, i64 1
- %tmp4120 = getelementptr inbounds float* %tmp4119, i64 1
- %tmp4121 = getelementptr inbounds float* %tmp4120, i64 1
- %tmp4122 = getelementptr inbounds float* %tmp4121, i64 1
- %tmp4123 = getelementptr inbounds float* %tmp4122, i64 1
- %tmp4124 = getelementptr inbounds float* %tmp4123, i64 1
- %tmp4125 = getelementptr inbounds float* %tmp4124, i64 1
- %tmp4126 = getelementptr inbounds float* %tmp4125, i64 1
- %tmp4127 = getelementptr inbounds float* %tmp4126, i64 1
- %tmp4128 = getelementptr inbounds float* %tmp4127, i64 1
- %tmp4129 = getelementptr inbounds float* %tmp4128, i64 1
- %tmp4130 = getelementptr inbounds float* %tmp4129, i64 1
- %tmp4131 = getelementptr inbounds float* %tmp4130, i64 1
- %tmp4132 = getelementptr inbounds float* %tmp4131, i64 1
- %tmp4133 = getelementptr inbounds float* %tmp4132, i64 1
- %tmp4134 = getelementptr inbounds float* %tmp4133, i64 1
- %tmp4135 = getelementptr inbounds float* %tmp4134, i64 1
- %tmp4136 = getelementptr inbounds float* %tmp4135, i64 1
- %tmp4137 = getelementptr inbounds float* %tmp4136, i64 1
- %tmp4138 = getelementptr inbounds float* %tmp4137, i64 1
- %tmp4139 = getelementptr inbounds float* %tmp4138, i64 1
- %tmp4140 = getelementptr inbounds float* %tmp4139, i64 1
- %tmp4141 = getelementptr inbounds float* %tmp4140, i64 1
- %tmp4142 = getelementptr inbounds float* %tmp4141, i64 1
- %tmp4143 = getelementptr inbounds float* %tmp4142, i64 1
- %tmp4144 = getelementptr inbounds float* %tmp4143, i64 1
- %tmp4145 = getelementptr inbounds float* %tmp4144, i64 1
- %tmp4146 = getelementptr inbounds float* %tmp4145, i64 1
- %tmp4147 = getelementptr inbounds float* %tmp4146, i64 1
- %tmp4148 = getelementptr inbounds float* %tmp4147, i64 1
- %tmp4149 = getelementptr inbounds float* %tmp4148, i64 1
- %tmp4150 = getelementptr inbounds float* %tmp4149, i64 1
- %tmp4151 = getelementptr inbounds float* %tmp4150, i64 1
- %tmp4152 = getelementptr inbounds float* %tmp4151, i64 1
- %tmp4153 = getelementptr inbounds float* %tmp4152, i64 1
- %tmp4154 = getelementptr inbounds float* %tmp4153, i64 1
- %tmp4155 = getelementptr inbounds float* %tmp4154, i64 1
- %tmp4156 = getelementptr inbounds float* %tmp4155, i64 1
- %tmp4157 = getelementptr inbounds float* %tmp4156, i64 1
- %tmp4158 = getelementptr inbounds float* %tmp4157, i64 1
- %tmp4159 = getelementptr inbounds float* %tmp4158, i64 1
- %tmp4160 = getelementptr inbounds float* %tmp4159, i64 1
- %tmp4161 = getelementptr inbounds float* %tmp4160, i64 1
- %tmp4162 = getelementptr inbounds float* %tmp4161, i64 1
- %tmp4163 = getelementptr inbounds float* %tmp4162, i64 1
- %tmp4164 = getelementptr inbounds float* %tmp4163, i64 1
- %tmp4165 = getelementptr inbounds float* %tmp4164, i64 1
- %tmp4166 = getelementptr inbounds float* %tmp4165, i64 1
- %tmp4167 = getelementptr inbounds float* %tmp4166, i64 1
- %tmp4168 = getelementptr inbounds float* %tmp4167, i64 1
- %tmp4169 = getelementptr inbounds float* %tmp4168, i64 1
- %tmp4170 = getelementptr inbounds float* %tmp4169, i64 1
- %tmp4171 = getelementptr inbounds float* %tmp4170, i64 1
- %tmp4172 = getelementptr inbounds float* %tmp4171, i64 1
- %tmp4173 = getelementptr inbounds float* %tmp4172, i64 1
- %tmp4174 = getelementptr inbounds float* %tmp4173, i64 1
- %tmp4175 = getelementptr inbounds float* %tmp4174, i64 1
- %tmp4176 = getelementptr inbounds float* %tmp4175, i64 1
- %tmp4177 = getelementptr inbounds float* %tmp4176, i64 1
- %tmp4178 = getelementptr inbounds float* %tmp4177, i64 1
- %tmp4179 = getelementptr inbounds float* %tmp4178, i64 1
- %tmp4180 = getelementptr inbounds float* %tmp4179, i64 1
- %tmp4181 = getelementptr inbounds float* %tmp4180, i64 1
- %tmp4182 = getelementptr inbounds float* %tmp4181, i64 1
- %tmp4183 = getelementptr inbounds float* %tmp4182, i64 1
- %tmp4184 = getelementptr inbounds float* %tmp4183, i64 1
- %tmp4185 = getelementptr inbounds float* %tmp4184, i64 1
- %tmp4186 = getelementptr inbounds float* %tmp4185, i64 1
- %tmp4187 = getelementptr inbounds float* %tmp4186, i64 1
- %tmp4188 = getelementptr inbounds float* %tmp4187, i64 1
- %tmp4189 = getelementptr inbounds float* %tmp4188, i64 1
- %tmp4190 = getelementptr inbounds float* %tmp4189, i64 1
- %tmp4191 = getelementptr inbounds float* %tmp4190, i64 1
- %tmp4192 = getelementptr inbounds float* %tmp4191, i64 1
- %tmp4193 = getelementptr inbounds float* %tmp4192, i64 1
- %tmp4194 = getelementptr inbounds float* %tmp4193, i64 1
- %tmp4195 = getelementptr inbounds float* %tmp4194, i64 1
- %tmp4196 = getelementptr inbounds float* %tmp4195, i64 1
- %tmp4197 = getelementptr inbounds float* %tmp4196, i64 1
- %tmp4198 = getelementptr inbounds float* %tmp4197, i64 1
- %tmp4199 = getelementptr inbounds float* %tmp4198, i64 1
- %tmp4200 = getelementptr inbounds float* %tmp4199, i64 1
- %tmp4201 = getelementptr inbounds float* %tmp4200, i64 1
- %tmp4202 = getelementptr inbounds float* %tmp4201, i64 1
- %tmp4203 = getelementptr inbounds float* %tmp4202, i64 1
- %tmp4204 = getelementptr inbounds float* %tmp4203, i64 1
- %tmp4205 = getelementptr inbounds float* %tmp4204, i64 1
- %tmp4206 = getelementptr inbounds float* %tmp4205, i64 1
- %tmp4207 = getelementptr inbounds float* %tmp4206, i64 1
- %tmp4208 = getelementptr inbounds float* %tmp4207, i64 1
- %tmp4209 = getelementptr inbounds float* %tmp4208, i64 1
- %tmp4210 = getelementptr inbounds float* %tmp4209, i64 1
- %tmp4211 = getelementptr inbounds float* %tmp4210, i64 1
- %tmp4212 = getelementptr inbounds float* %tmp4211, i64 1
- %tmp4213 = getelementptr inbounds float* %tmp4212, i64 1
- %tmp4214 = getelementptr inbounds float* %tmp4213, i64 1
- %tmp4215 = getelementptr inbounds float* %tmp4214, i64 1
- %tmp4216 = getelementptr inbounds float* %tmp4215, i64 1
- %tmp4217 = getelementptr inbounds float* %tmp4216, i64 1
- %tmp4218 = getelementptr inbounds float* %tmp4217, i64 1
- %tmp4219 = getelementptr inbounds float* %tmp4218, i64 1
- %tmp4220 = getelementptr inbounds float* %tmp4219, i64 1
- %tmp4221 = getelementptr inbounds float* %tmp4220, i64 1
- %tmp4222 = getelementptr inbounds float* %tmp4221, i64 1
- %tmp4223 = getelementptr inbounds float* %tmp4222, i64 1
- %tmp4224 = getelementptr inbounds float* %tmp4223, i64 1
- %tmp4225 = getelementptr inbounds float* %tmp4224, i64 1
- %tmp4226 = getelementptr inbounds float* %tmp4225, i64 1
- %tmp4227 = getelementptr inbounds float* %tmp4226, i64 1
- %tmp4228 = getelementptr inbounds float* %tmp4227, i64 1
- %tmp4229 = getelementptr inbounds float* %tmp4228, i64 1
- %tmp4230 = getelementptr inbounds float* %tmp4229, i64 1
- %tmp4231 = getelementptr inbounds float* %tmp4230, i64 1
- %tmp4232 = getelementptr inbounds float* %tmp4231, i64 1
- %tmp4233 = getelementptr inbounds float* %tmp4232, i64 1
- %tmp4234 = getelementptr inbounds float* %tmp4233, i64 1
- %tmp4235 = getelementptr inbounds float* %tmp4234, i64 1
- %tmp4236 = getelementptr inbounds float* %tmp4235, i64 1
- %tmp4237 = getelementptr inbounds float* %tmp4236, i64 1
- %tmp4238 = getelementptr inbounds float* %tmp4237, i64 1
- %tmp4239 = getelementptr inbounds float* %tmp4238, i64 1
- %tmp4240 = getelementptr inbounds float* %tmp4239, i64 1
- %tmp4241 = getelementptr inbounds float* %tmp4240, i64 1
- %tmp4242 = getelementptr inbounds float* %tmp4241, i64 1
- %tmp4243 = getelementptr inbounds float* %tmp4242, i64 1
- %tmp4244 = getelementptr inbounds float* %tmp4243, i64 1
- %tmp4245 = getelementptr inbounds float* %tmp4244, i64 1
- %tmp4246 = getelementptr inbounds float* %tmp4245, i64 1
- %tmp4247 = getelementptr inbounds float* %tmp4246, i64 1
- %tmp4248 = getelementptr inbounds float* %tmp4247, i64 1
- %tmp4249 = getelementptr inbounds float* %tmp4248, i64 1
- %tmp4250 = getelementptr inbounds float* %tmp4249, i64 1
- %tmp4251 = getelementptr inbounds float* %tmp4250, i64 1
- %tmp4252 = getelementptr inbounds float* %tmp4251, i64 1
- %tmp4253 = getelementptr inbounds float* %tmp4252, i64 1
- %tmp4254 = getelementptr inbounds float* %tmp4253, i64 1
- %tmp4255 = getelementptr inbounds float* %tmp4254, i64 1
- %tmp4256 = getelementptr inbounds float* %tmp4255, i64 1
- %tmp4257 = getelementptr inbounds float* %tmp4256, i64 1
- %tmp4258 = getelementptr inbounds float* %tmp4257, i64 1
- %tmp4259 = getelementptr inbounds float* %tmp4258, i64 1
- %tmp4260 = getelementptr inbounds float* %tmp4259, i64 1
- %tmp4261 = getelementptr inbounds float* %tmp4260, i64 1
- %tmp4262 = getelementptr inbounds float* %tmp4261, i64 1
- %tmp4263 = getelementptr inbounds float* %tmp4262, i64 1
- %tmp4264 = getelementptr inbounds float* %tmp4263, i64 1
- %tmp4265 = getelementptr inbounds float* %tmp4264, i64 1
- %tmp4266 = getelementptr inbounds float* %tmp4265, i64 1
- %tmp4267 = getelementptr inbounds float* %tmp4266, i64 1
- %tmp4268 = getelementptr inbounds float* %tmp4267, i64 1
- %tmp4269 = getelementptr inbounds float* %tmp4268, i64 1
- %tmp4270 = getelementptr inbounds float* %tmp4269, i64 1
- %tmp4271 = getelementptr inbounds float* %tmp4270, i64 1
- %tmp4272 = getelementptr inbounds float* %tmp4271, i64 1
- %tmp4273 = getelementptr inbounds float* %tmp4272, i64 1
- %tmp4274 = getelementptr inbounds float* %tmp4273, i64 1
- %tmp4275 = getelementptr inbounds float* %tmp4274, i64 1
- %tmp4276 = getelementptr inbounds float* %tmp4275, i64 1
- %tmp4277 = getelementptr inbounds float* %tmp4276, i64 1
- %tmp4278 = getelementptr inbounds float* %tmp4277, i64 1
- %tmp4279 = getelementptr inbounds float* %tmp4278, i64 1
- %tmp4280 = getelementptr inbounds float* %tmp4279, i64 1
- %tmp4281 = getelementptr inbounds float* %tmp4280, i64 1
- %tmp4282 = getelementptr inbounds float* %tmp4281, i64 1
- %tmp4283 = getelementptr inbounds float* %tmp4282, i64 1
- %tmp4284 = getelementptr inbounds float* %tmp4283, i64 1
- %tmp4285 = getelementptr inbounds float* %tmp4284, i64 1
- %tmp4286 = getelementptr inbounds float* %tmp4285, i64 1
- %tmp4287 = getelementptr inbounds float* %tmp4286, i64 1
- %tmp4288 = getelementptr inbounds float* %tmp4287, i64 1
- %tmp4289 = getelementptr inbounds float* %tmp4288, i64 1
- %tmp4290 = getelementptr inbounds float* %tmp4289, i64 1
- %tmp4291 = getelementptr inbounds float* %tmp4290, i64 1
- %tmp4292 = getelementptr inbounds float* %tmp4291, i64 1
- %tmp4293 = getelementptr inbounds float* %tmp4292, i64 1
- %tmp4294 = getelementptr inbounds float* %tmp4293, i64 1
- %tmp4295 = getelementptr inbounds float* %tmp4294, i64 1
- %tmp4296 = getelementptr inbounds float* %tmp4295, i64 1
- %tmp4297 = getelementptr inbounds float* %tmp4296, i64 1
- %tmp4298 = getelementptr inbounds float* %tmp4297, i64 1
- %tmp4299 = getelementptr inbounds float* %tmp4298, i64 1
- %tmp4300 = getelementptr inbounds float* %tmp4299, i64 1
- %tmp4301 = getelementptr inbounds float* %tmp4300, i64 1
- %tmp4302 = getelementptr inbounds float* %tmp4301, i64 1
- %tmp4303 = getelementptr inbounds float* %tmp4302, i64 1
- %tmp4304 = getelementptr inbounds float* %tmp4303, i64 1
- %tmp4305 = getelementptr inbounds float* %tmp4304, i64 1
- %tmp4306 = getelementptr inbounds float* %tmp4305, i64 1
- %tmp4307 = getelementptr inbounds float* %tmp4306, i64 1
- %tmp4308 = getelementptr inbounds float* %tmp4307, i64 1
- %tmp4309 = getelementptr inbounds float* %tmp4308, i64 1
- %tmp4310 = getelementptr inbounds float* %tmp4309, i64 1
- %tmp4311 = getelementptr inbounds float* %tmp4310, i64 1
- %tmp4312 = getelementptr inbounds float* %tmp4311, i64 1
- %tmp4313 = getelementptr inbounds float* %tmp4312, i64 1
- %tmp4314 = getelementptr inbounds float* %tmp4313, i64 1
- %tmp4315 = getelementptr inbounds float* %tmp4314, i64 1
- %tmp4316 = getelementptr inbounds float* %tmp4315, i64 1
- %tmp4317 = getelementptr inbounds float* %tmp4316, i64 1
- %tmp4318 = getelementptr inbounds float* %tmp4317, i64 1
- %tmp4319 = getelementptr inbounds float* %tmp4318, i64 1
- %tmp4320 = getelementptr inbounds float* %tmp4319, i64 1
- %tmp4321 = getelementptr inbounds float* %tmp4320, i64 1
- %tmp4322 = getelementptr inbounds float* %tmp4321, i64 1
- %tmp4323 = getelementptr inbounds float* %tmp4322, i64 1
- %tmp4324 = getelementptr inbounds float* %tmp4323, i64 1
- %tmp4325 = getelementptr inbounds float* %tmp4324, i64 1
- %tmp4326 = getelementptr inbounds float* %tmp4325, i64 1
- %tmp4327 = getelementptr inbounds float* %tmp4326, i64 1
- %tmp4328 = getelementptr inbounds float* %tmp4327, i64 1
- %tmp4329 = getelementptr inbounds float* %tmp4328, i64 1
- %tmp4330 = getelementptr inbounds float* %tmp4329, i64 1
- %tmp4331 = getelementptr inbounds float* %tmp4330, i64 1
- %tmp4332 = getelementptr inbounds float* %tmp4331, i64 1
- %tmp4333 = getelementptr inbounds float* %tmp4332, i64 1
- %tmp4334 = getelementptr inbounds float* %tmp4333, i64 1
- %tmp4335 = getelementptr inbounds float* %tmp4334, i64 1
- %tmp4336 = getelementptr inbounds float* %tmp4335, i64 1
- %tmp4337 = getelementptr inbounds float* %tmp4336, i64 1
- %tmp4338 = getelementptr inbounds float* %tmp4337, i64 1
- %tmp4339 = getelementptr inbounds float* %tmp4338, i64 1
- %tmp4340 = getelementptr inbounds float* %tmp4339, i64 1
- %tmp4341 = getelementptr inbounds float* %tmp4340, i64 1
- %tmp4342 = getelementptr inbounds float* %tmp4341, i64 1
- %tmp4343 = getelementptr inbounds float* %tmp4342, i64 1
- %tmp4344 = getelementptr inbounds float* %tmp4343, i64 1
- %tmp4345 = getelementptr inbounds float* %tmp4344, i64 1
- %tmp4346 = getelementptr inbounds float* %tmp4345, i64 1
- %tmp4347 = getelementptr inbounds float* %tmp4346, i64 1
- %tmp4348 = getelementptr inbounds float* %tmp4347, i64 1
- %tmp4349 = getelementptr inbounds float* %tmp4348, i64 1
- %tmp4350 = getelementptr inbounds float* %tmp4349, i64 1
- %tmp4351 = getelementptr inbounds float* %tmp4350, i64 1
- %tmp4352 = getelementptr inbounds float* %tmp4351, i64 1
- %tmp4353 = getelementptr inbounds float* %tmp4352, i64 1
- %tmp4354 = getelementptr inbounds float* %tmp4353, i64 1
- %tmp4355 = getelementptr inbounds float* %tmp4354, i64 1
- %tmp4356 = getelementptr inbounds float* %tmp4355, i64 1
- %tmp4357 = getelementptr inbounds float* %tmp4356, i64 1
- %tmp4358 = getelementptr inbounds float* %tmp4357, i64 1
- %tmp4359 = getelementptr inbounds float* %tmp4358, i64 1
- %tmp4360 = getelementptr inbounds float* %tmp4359, i64 1
- %tmp4361 = getelementptr inbounds float* %tmp4360, i64 1
- %tmp4362 = getelementptr inbounds float* %tmp4361, i64 1
- %tmp4363 = getelementptr inbounds float* %tmp4362, i64 1
- %tmp4364 = getelementptr inbounds float* %tmp4363, i64 1
- %tmp4365 = getelementptr inbounds float* %tmp4364, i64 1
- %tmp4366 = getelementptr inbounds float* %tmp4365, i64 1
- %tmp4367 = getelementptr inbounds float* %tmp4366, i64 1
- %tmp4368 = getelementptr inbounds float* %tmp4367, i64 1
- %tmp4369 = getelementptr inbounds float* %tmp4368, i64 1
- %tmp4370 = getelementptr inbounds float* %tmp4369, i64 1
- %tmp4371 = getelementptr inbounds float* %tmp4370, i64 1
- %tmp4372 = getelementptr inbounds float* %tmp4371, i64 1
- %tmp4373 = getelementptr inbounds float* %tmp4372, i64 1
- %tmp4374 = getelementptr inbounds float* %tmp4373, i64 1
- %tmp4375 = getelementptr inbounds float* %tmp4374, i64 1
- %tmp4376 = getelementptr inbounds float* %tmp4375, i64 1
- %tmp4377 = getelementptr inbounds float* %tmp4376, i64 1
- %tmp4378 = getelementptr inbounds float* %tmp4377, i64 1
- %tmp4379 = getelementptr inbounds float* %tmp4378, i64 1
- %tmp4380 = getelementptr inbounds float* %tmp4379, i64 1
- %tmp4381 = getelementptr inbounds float* %tmp4380, i64 1
- %tmp4382 = getelementptr inbounds float* %tmp4381, i64 1
- %tmp4383 = getelementptr inbounds float* %tmp4382, i64 1
- %tmp4384 = getelementptr inbounds float* %tmp4383, i64 1
- %tmp4385 = getelementptr inbounds float* %tmp4384, i64 1
- %tmp4386 = getelementptr inbounds float* %tmp4385, i64 1
- %tmp4387 = getelementptr inbounds float* %tmp4386, i64 1
- %tmp4388 = getelementptr inbounds float* %tmp4387, i64 1
- %tmp4389 = getelementptr inbounds float* %tmp4388, i64 1
- %tmp4390 = getelementptr inbounds float* %tmp4389, i64 1
- %tmp4391 = getelementptr inbounds float* %tmp4390, i64 1
- %tmp4392 = getelementptr inbounds float* %tmp4391, i64 1
- %tmp4393 = getelementptr inbounds float* %tmp4392, i64 1
- %tmp4394 = getelementptr inbounds float* %tmp4393, i64 1
- %tmp4395 = getelementptr inbounds float* %tmp4394, i64 1
- %tmp4396 = getelementptr inbounds float* %tmp4395, i64 1
- %tmp4397 = getelementptr inbounds float* %tmp4396, i64 1
- %tmp4398 = getelementptr inbounds float* %tmp4397, i64 1
- %tmp4399 = getelementptr inbounds float* %tmp4398, i64 1
- %tmp4400 = getelementptr inbounds float* %tmp4399, i64 1
- %tmp4401 = getelementptr inbounds float* %tmp4400, i64 1
- %tmp4402 = getelementptr inbounds float* %tmp4401, i64 1
- %tmp4403 = getelementptr inbounds float* %tmp4402, i64 1
- %tmp4404 = getelementptr inbounds float* %tmp4403, i64 1
- %tmp4405 = getelementptr inbounds float* %tmp4404, i64 1
- %tmp4406 = getelementptr inbounds float* %tmp4405, i64 1
- %tmp4407 = getelementptr inbounds float* %tmp4406, i64 1
- %tmp4408 = getelementptr inbounds float* %tmp4407, i64 1
- %tmp4409 = getelementptr inbounds float* %tmp4408, i64 1
- %tmp4410 = getelementptr inbounds float* %tmp4409, i64 1
- %tmp4411 = getelementptr inbounds float* %tmp4410, i64 1
- %tmp4412 = getelementptr inbounds float* %tmp4411, i64 1
- %tmp4413 = getelementptr inbounds float* %tmp4412, i64 1
- %tmp4414 = getelementptr inbounds float* %tmp4413, i64 1
- %tmp4415 = getelementptr inbounds float* %tmp4414, i64 1
- %tmp4416 = getelementptr inbounds float* %tmp4415, i64 1
- %tmp4417 = getelementptr inbounds float* %tmp4416, i64 1
- %tmp4418 = getelementptr inbounds float* %tmp4417, i64 1
- %tmp4419 = getelementptr inbounds float* %tmp4418, i64 1
- %tmp4420 = getelementptr inbounds float* %tmp4419, i64 1
- %tmp4421 = getelementptr inbounds float* %tmp4420, i64 1
- %tmp4422 = getelementptr inbounds float* %tmp4421, i64 1
- %tmp4423 = getelementptr inbounds float* %tmp4422, i64 1
- %tmp4424 = getelementptr inbounds float* %tmp4423, i64 1
- %tmp4425 = getelementptr inbounds float* %tmp4424, i64 1
- %tmp4426 = getelementptr inbounds float* %tmp4425, i64 1
- %tmp4427 = getelementptr inbounds float* %tmp4426, i64 1
- %tmp4428 = getelementptr inbounds float* %tmp4427, i64 1
- %tmp4429 = getelementptr inbounds float* %tmp4428, i64 1
- %tmp4430 = getelementptr inbounds float* %tmp4429, i64 1
- %tmp4431 = getelementptr inbounds float* %tmp4430, i64 1
- %tmp4432 = getelementptr inbounds float* %tmp4431, i64 1
- %tmp4433 = getelementptr inbounds float* %tmp4432, i64 1
- %tmp4434 = getelementptr inbounds float* %tmp4433, i64 1
- %tmp4435 = getelementptr inbounds float* %tmp4434, i64 1
- %tmp4436 = getelementptr inbounds float* %tmp4435, i64 1
- %tmp4437 = getelementptr inbounds float* %tmp4436, i64 1
- %tmp4438 = getelementptr inbounds float* %tmp4437, i64 1
- %tmp4439 = getelementptr inbounds float* %tmp4438, i64 1
- %tmp4440 = getelementptr inbounds float* %tmp4439, i64 1
- %tmp4441 = getelementptr inbounds float* %tmp4440, i64 1
- %tmp4442 = getelementptr inbounds float* %tmp4441, i64 1
- %tmp4443 = getelementptr inbounds float* %tmp4442, i64 1
- %tmp4444 = getelementptr inbounds float* %tmp4443, i64 1
- %tmp4445 = getelementptr inbounds float* %tmp4444, i64 1
- %tmp4446 = getelementptr inbounds float* %tmp4445, i64 1
- %tmp4447 = getelementptr inbounds float* %tmp4446, i64 1
- %tmp4448 = getelementptr inbounds float* %tmp4447, i64 1
- %tmp4449 = getelementptr inbounds float* %tmp4448, i64 1
- %tmp4450 = getelementptr inbounds float* %tmp4449, i64 1
- %tmp4451 = getelementptr inbounds float* %tmp4450, i64 1
- %tmp4452 = getelementptr inbounds float* %tmp4451, i64 1
- %tmp4453 = getelementptr inbounds float* %tmp4452, i64 1
- %tmp4454 = getelementptr inbounds float* %tmp4453, i64 1
- %tmp4455 = getelementptr inbounds float* %tmp4454, i64 1
- %tmp4456 = getelementptr inbounds float* %tmp4455, i64 1
- %tmp4457 = getelementptr inbounds float* %tmp4456, i64 1
- %tmp4458 = getelementptr inbounds float* %tmp4457, i64 1
- %tmp4459 = getelementptr inbounds float* %tmp4458, i64 1
- %tmp4460 = getelementptr inbounds float* %tmp4459, i64 1
- %tmp4461 = getelementptr inbounds float* %tmp4460, i64 1
- %tmp4462 = getelementptr inbounds float* %tmp4461, i64 1
- %tmp4463 = getelementptr inbounds float* %tmp4462, i64 1
- %tmp4464 = getelementptr inbounds float* %tmp4463, i64 1
- %tmp4465 = getelementptr inbounds float* %tmp4464, i64 1
- %tmp4466 = getelementptr inbounds float* %tmp4465, i64 1
- %tmp4467 = getelementptr inbounds float* %tmp4466, i64 1
- %tmp4468 = getelementptr inbounds float* %tmp4467, i64 1
- %tmp4469 = getelementptr inbounds float* %tmp4468, i64 1
- %tmp4470 = getelementptr inbounds float* %tmp4469, i64 1
- %tmp4471 = getelementptr inbounds float* %tmp4470, i64 1
- %tmp4472 = getelementptr inbounds float* %tmp4471, i64 1
- %tmp4473 = getelementptr inbounds float* %tmp4472, i64 1
- %tmp4474 = getelementptr inbounds float* %tmp4473, i64 1
- %tmp4475 = getelementptr inbounds float* %tmp4474, i64 1
- %tmp4476 = getelementptr inbounds float* %tmp4475, i64 1
- %tmp4477 = getelementptr inbounds float* %tmp4476, i64 1
- %tmp4478 = getelementptr inbounds float* %tmp4477, i64 1
- %tmp4479 = getelementptr inbounds float* %tmp4478, i64 1
- %tmp4480 = getelementptr inbounds float* %tmp4479, i64 1
- %tmp4481 = getelementptr inbounds float* %tmp4480, i64 1
- %tmp4482 = getelementptr inbounds float* %tmp4481, i64 1
- %tmp4483 = getelementptr inbounds float* %tmp4482, i64 1
- %tmp4484 = getelementptr inbounds float* %tmp4483, i64 1
- %tmp4485 = getelementptr inbounds float* %tmp4484, i64 1
- %tmp4486 = getelementptr inbounds float* %tmp4485, i64 1
- %tmp4487 = getelementptr inbounds float* %tmp4486, i64 1
- %tmp4488 = getelementptr inbounds float* %tmp4487, i64 1
- %tmp4489 = getelementptr inbounds float* %tmp4488, i64 1
- %tmp4490 = getelementptr inbounds float* %tmp4489, i64 1
- %tmp4491 = getelementptr inbounds float* %tmp4490, i64 1
- %tmp4492 = getelementptr inbounds float* %tmp4491, i64 1
- %tmp4493 = getelementptr inbounds float* %tmp4492, i64 1
- %tmp4494 = getelementptr inbounds float* %tmp4493, i64 1
- %tmp4495 = getelementptr inbounds float* %tmp4494, i64 1
- %tmp4496 = getelementptr inbounds float* %tmp4495, i64 1
- %tmp4497 = getelementptr inbounds float* %tmp4496, i64 1
- %tmp4498 = getelementptr inbounds float* %tmp4497, i64 1
- %tmp4499 = getelementptr inbounds float* %tmp4498, i64 1
- %tmp4500 = getelementptr inbounds float* %tmp4499, i64 1
- %tmp4501 = getelementptr inbounds float* %tmp4500, i64 1
- %tmp4502 = getelementptr inbounds float* %tmp4501, i64 1
- %tmp4503 = getelementptr inbounds float* %tmp4502, i64 1
- %tmp4504 = getelementptr inbounds float* %tmp4503, i64 1
- %tmp4505 = getelementptr inbounds float* %tmp4504, i64 1
- %tmp4506 = getelementptr inbounds float* %tmp4505, i64 1
- %tmp4507 = getelementptr inbounds float* %tmp4506, i64 1
- %tmp4508 = getelementptr inbounds float* %tmp4507, i64 1
- %tmp4509 = getelementptr inbounds float* %tmp4508, i64 1
- %tmp4510 = getelementptr inbounds float* %tmp4509, i64 1
- %tmp4511 = getelementptr inbounds float* %tmp4510, i64 1
- %tmp4512 = getelementptr inbounds float* %tmp4511, i64 1
- %tmp4513 = getelementptr inbounds float* %tmp4512, i64 1
- %tmp4514 = getelementptr inbounds float* %tmp4513, i64 1
- %tmp4515 = getelementptr inbounds float* %tmp4514, i64 1
- %tmp4516 = getelementptr inbounds float* %tmp4515, i64 1
- %tmp4517 = getelementptr inbounds float* %tmp4516, i64 1
- %tmp4518 = getelementptr inbounds float* %tmp4517, i64 1
- %tmp4519 = getelementptr inbounds float* %tmp4518, i64 1
- %tmp4520 = getelementptr inbounds float* %tmp4519, i64 1
- %tmp4521 = getelementptr inbounds float* %tmp4520, i64 1
- %tmp4522 = getelementptr inbounds float* %tmp4521, i64 1
- %tmp4523 = getelementptr inbounds float* %tmp4522, i64 1
- %tmp4524 = getelementptr inbounds float* %tmp4523, i64 1
- %tmp4525 = getelementptr inbounds float* %tmp4524, i64 1
- %tmp4526 = getelementptr inbounds float* %tmp4525, i64 1
- %tmp4527 = getelementptr inbounds float* %tmp4526, i64 1
- %tmp4528 = getelementptr inbounds float* %tmp4527, i64 1
- %tmp4529 = getelementptr inbounds float* %tmp4528, i64 1
- %tmp4530 = getelementptr inbounds float* %tmp4529, i64 1
- %tmp4531 = getelementptr inbounds float* %tmp4530, i64 1
- %tmp4532 = getelementptr inbounds float* %tmp4531, i64 1
- %tmp4533 = getelementptr inbounds float* %tmp4532, i64 1
- %tmp4534 = getelementptr inbounds float* %tmp4533, i64 1
- %tmp4535 = getelementptr inbounds float* %tmp4534, i64 1
- %tmp4536 = getelementptr inbounds float* %tmp4535, i64 1
- %tmp4537 = getelementptr inbounds float* %tmp4536, i64 1
- %tmp4538 = getelementptr inbounds float* %tmp4537, i64 1
- %tmp4539 = getelementptr inbounds float* %tmp4538, i64 1
- %tmp4540 = getelementptr inbounds float* %tmp4539, i64 1
- %tmp4541 = getelementptr inbounds float* %tmp4540, i64 1
- %tmp4542 = getelementptr inbounds float* %tmp4541, i64 1
- %tmp4543 = getelementptr inbounds float* %tmp4542, i64 1
- %tmp4544 = getelementptr inbounds float* %tmp4543, i64 1
- %tmp4545 = getelementptr inbounds float* %tmp4544, i64 1
- %tmp4546 = getelementptr inbounds float* %tmp4545, i64 1
- %tmp4547 = getelementptr inbounds float* %tmp4546, i64 1
- %tmp4548 = getelementptr inbounds float* %tmp4547, i64 1
- %tmp4549 = getelementptr inbounds float* %tmp4548, i64 1
- %tmp4550 = getelementptr inbounds float* %tmp4549, i64 1
- %tmp4551 = getelementptr inbounds float* %tmp4550, i64 1
- %tmp4552 = getelementptr inbounds float* %tmp4551, i64 1
- %tmp4553 = getelementptr inbounds float* %tmp4552, i64 1
- %tmp4554 = getelementptr inbounds float* %tmp4553, i64 1
- %tmp4555 = getelementptr inbounds float* %tmp4554, i64 1
- %tmp4556 = getelementptr inbounds float* %tmp4555, i64 1
- %tmp4557 = getelementptr inbounds float* %tmp4556, i64 1
- %tmp4558 = getelementptr inbounds float* %tmp4557, i64 1
- %tmp4559 = getelementptr inbounds float* %tmp4558, i64 1
- %tmp4560 = getelementptr inbounds float* %tmp4559, i64 1
- %tmp4561 = getelementptr inbounds float* %tmp4560, i64 1
- %tmp4562 = getelementptr inbounds float* %tmp4561, i64 1
- %tmp4563 = getelementptr inbounds float* %tmp4562, i64 1
- %tmp4564 = getelementptr inbounds float* %tmp4563, i64 1
- %tmp4565 = getelementptr inbounds float* %tmp4564, i64 1
- %tmp4566 = getelementptr inbounds float* %tmp4565, i64 1
- %tmp4567 = getelementptr inbounds float* %tmp4566, i64 1
- %tmp4568 = getelementptr inbounds float* %tmp4567, i64 1
- %tmp4569 = getelementptr inbounds float* %tmp4568, i64 1
- %tmp4570 = getelementptr inbounds float* %tmp4569, i64 1
- %tmp4571 = getelementptr inbounds float* %tmp4570, i64 1
- %tmp4572 = getelementptr inbounds float* %tmp4571, i64 1
- %tmp4573 = getelementptr inbounds float* %tmp4572, i64 1
- %tmp4574 = getelementptr inbounds float* %tmp4573, i64 1
- %tmp4575 = getelementptr inbounds float* %tmp4574, i64 1
- %tmp4576 = getelementptr inbounds float* %tmp4575, i64 1
- %tmp4577 = getelementptr inbounds float* %tmp4576, i64 1
- %tmp4578 = getelementptr inbounds float* %tmp4577, i64 1
- %tmp4579 = getelementptr inbounds float* %tmp4578, i64 1
- %tmp4580 = getelementptr inbounds float* %tmp4579, i64 1
- %tmp4581 = getelementptr inbounds float* %tmp4580, i64 1
- %tmp4582 = getelementptr inbounds float* %tmp4581, i64 1
- %tmp4583 = getelementptr inbounds float* %tmp4582, i64 1
- %tmp4584 = getelementptr inbounds float* %tmp4583, i64 1
- %tmp4585 = getelementptr inbounds float* %tmp4584, i64 1
- %tmp4586 = getelementptr inbounds float* %tmp4585, i64 1
- %tmp4587 = getelementptr inbounds float* %tmp4586, i64 1
- %tmp4588 = getelementptr inbounds float* %tmp4587, i64 1
- %tmp4589 = getelementptr inbounds float* %tmp4588, i64 1
- %tmp4590 = getelementptr inbounds float* %tmp4589, i64 1
- %tmp4591 = getelementptr inbounds float* %tmp4590, i64 1
- %tmp4592 = getelementptr inbounds float* %tmp4591, i64 1
- %tmp4593 = getelementptr inbounds float* %tmp4592, i64 1
- %tmp4594 = getelementptr inbounds float* %tmp4593, i64 1
- %tmp4595 = getelementptr inbounds float* %tmp4594, i64 1
- %tmp4596 = getelementptr inbounds float* %tmp4595, i64 1
- %tmp4597 = getelementptr inbounds float* %tmp4596, i64 1
- %tmp4598 = getelementptr inbounds float* %tmp4597, i64 1
- %tmp4599 = getelementptr inbounds float* %tmp4598, i64 1
- %tmp4600 = getelementptr inbounds float* %tmp4599, i64 1
- %tmp4601 = getelementptr inbounds float* %tmp4600, i64 1
- %tmp4602 = getelementptr inbounds float* %tmp4601, i64 1
- %tmp4603 = getelementptr inbounds float* %tmp4602, i64 1
- %tmp4604 = getelementptr inbounds float* %tmp4603, i64 1
- %tmp4605 = getelementptr inbounds float* %tmp4604, i64 1
- %tmp4606 = getelementptr inbounds float* %tmp4605, i64 1
- %tmp4607 = getelementptr inbounds float* %tmp4606, i64 1
- %tmp4608 = getelementptr inbounds float* %tmp4607, i64 1
- %tmp4609 = getelementptr inbounds float* %tmp4608, i64 1
- %tmp4610 = getelementptr inbounds float* %tmp4609, i64 1
- %tmp4611 = getelementptr inbounds float* %tmp4610, i64 1
- %tmp4612 = getelementptr inbounds float* %tmp4611, i64 1
- %tmp4613 = getelementptr inbounds float* %tmp4612, i64 1
- %tmp4614 = getelementptr inbounds float* %tmp4613, i64 1
- %tmp4615 = getelementptr inbounds float* %tmp4614, i64 1
- %tmp4616 = getelementptr inbounds float* %tmp4615, i64 1
- %tmp4617 = getelementptr inbounds float* %tmp4616, i64 1
- %tmp4618 = getelementptr inbounds float* %tmp4617, i64 1
- %tmp4619 = getelementptr inbounds float* %tmp4618, i64 1
- %tmp4620 = getelementptr inbounds float* %tmp4619, i64 1
- %tmp4621 = getelementptr inbounds float* %tmp4620, i64 1
- %tmp4622 = getelementptr inbounds float* %tmp4621, i64 1
- %tmp4623 = getelementptr inbounds float* %tmp4622, i64 1
- %tmp4624 = getelementptr inbounds float* %tmp4623, i64 1
- %tmp4625 = getelementptr inbounds float* %tmp4624, i64 1
- %tmp4626 = getelementptr inbounds float* %tmp4625, i64 1
- %tmp4627 = getelementptr inbounds float* %tmp4626, i64 1
- %tmp4628 = getelementptr inbounds float* %tmp4627, i64 1
- %tmp4629 = getelementptr inbounds float* %tmp4628, i64 1
- %tmp4630 = getelementptr inbounds float* %tmp4629, i64 1
- %tmp4631 = getelementptr inbounds float* %tmp4630, i64 1
- %tmp4632 = getelementptr inbounds float* %tmp4631, i64 1
- %tmp4633 = getelementptr inbounds float* %tmp4632, i64 1
- %tmp4634 = getelementptr inbounds float* %tmp4633, i64 1
- %tmp4635 = getelementptr inbounds float* %tmp4634, i64 1
- %tmp4636 = getelementptr inbounds float* %tmp4635, i64 1
- %tmp4637 = getelementptr inbounds float* %tmp4636, i64 1
- %tmp4638 = getelementptr inbounds float* %tmp4637, i64 1
- %tmp4639 = getelementptr inbounds float* %tmp4638, i64 1
- %tmp4640 = getelementptr inbounds float* %tmp4639, i64 1
- %tmp4641 = getelementptr inbounds float* %tmp4640, i64 1
- %tmp4642 = getelementptr inbounds float* %tmp4641, i64 1
- %tmp4643 = getelementptr inbounds float* %tmp4642, i64 1
- %tmp4644 = getelementptr inbounds float* %tmp4643, i64 1
- %tmp4645 = getelementptr inbounds float* %tmp4644, i64 1
- %tmp4646 = getelementptr inbounds float* %tmp4645, i64 1
- %tmp4647 = getelementptr inbounds float* %tmp4646, i64 1
- %tmp4648 = getelementptr inbounds float* %tmp4647, i64 1
- %tmp4649 = getelementptr inbounds float* %tmp4648, i64 1
- %tmp4650 = getelementptr inbounds float* %tmp4649, i64 1
- %tmp4651 = getelementptr inbounds float* %tmp4650, i64 1
- %tmp4652 = getelementptr inbounds float* %tmp4651, i64 1
- %tmp4653 = getelementptr inbounds float* %tmp4652, i64 1
- %tmp4654 = getelementptr inbounds float* %tmp4653, i64 1
- %tmp4655 = getelementptr inbounds float* %tmp4654, i64 1
- %tmp4656 = getelementptr inbounds float* %tmp4655, i64 1
- %tmp4657 = getelementptr inbounds float* %tmp4656, i64 1
- %tmp4658 = getelementptr inbounds float* %tmp4657, i64 1
- %tmp4659 = getelementptr inbounds float* %tmp4658, i64 1
- %tmp4660 = getelementptr inbounds float* %tmp4659, i64 1
- %tmp4661 = getelementptr inbounds float* %tmp4660, i64 1
- %tmp4662 = getelementptr inbounds float* %tmp4661, i64 1
- %tmp4663 = getelementptr inbounds float* %tmp4662, i64 1
- %tmp4664 = getelementptr inbounds float* %tmp4663, i64 1
- %tmp4665 = getelementptr inbounds float* %tmp4664, i64 1
- %tmp4666 = getelementptr inbounds float* %tmp4665, i64 1
- %tmp4667 = getelementptr inbounds float* %tmp4666, i64 1
- %tmp4668 = getelementptr inbounds float* %tmp4667, i64 1
- %tmp4669 = getelementptr inbounds float* %tmp4668, i64 1
- %tmp4670 = getelementptr inbounds float* %tmp4669, i64 1
- %tmp4671 = getelementptr inbounds float* %tmp4670, i64 1
- %tmp4672 = getelementptr inbounds float* %tmp4671, i64 1
- %tmp4673 = getelementptr inbounds float* %tmp4672, i64 1
- %tmp4674 = getelementptr inbounds float* %tmp4673, i64 1
- %tmp4675 = getelementptr inbounds float* %tmp4674, i64 1
- %tmp4676 = getelementptr inbounds float* %tmp4675, i64 1
- %tmp4677 = getelementptr inbounds float* %tmp4676, i64 1
- %tmp4678 = getelementptr inbounds float* %tmp4677, i64 1
- %tmp4679 = getelementptr inbounds float* %tmp4678, i64 1
- %tmp4680 = getelementptr inbounds float* %tmp4679, i64 1
- %tmp4681 = getelementptr inbounds float* %tmp4680, i64 1
- %tmp4682 = getelementptr inbounds float* %tmp4681, i64 1
- %tmp4683 = getelementptr inbounds float* %tmp4682, i64 1
- %tmp4684 = getelementptr inbounds float* %tmp4683, i64 1
- %tmp4685 = getelementptr inbounds float* %tmp4684, i64 1
- %tmp4686 = getelementptr inbounds float* %tmp4685, i64 1
- %tmp4687 = getelementptr inbounds float* %tmp4686, i64 1
- %tmp4688 = getelementptr inbounds float* %tmp4687, i64 1
- %tmp4689 = getelementptr inbounds float* %tmp4688, i64 1
- %tmp4690 = getelementptr inbounds float* %tmp4689, i64 1
- %tmp4691 = getelementptr inbounds float* %tmp4690, i64 1
- %tmp4692 = getelementptr inbounds float* %tmp4691, i64 1
- %tmp4693 = getelementptr inbounds float* %tmp4692, i64 1
- %tmp4694 = getelementptr inbounds float* %tmp4693, i64 1
- %tmp4695 = getelementptr inbounds float* %tmp4694, i64 1
- %tmp4696 = getelementptr inbounds float* %tmp4695, i64 1
- %tmp4697 = getelementptr inbounds float* %tmp4696, i64 1
- %tmp4698 = getelementptr inbounds float* %tmp4697, i64 1
- %tmp4699 = getelementptr inbounds float* %tmp4698, i64 1
- %tmp4700 = getelementptr inbounds float* %tmp4699, i64 1
- %tmp4701 = getelementptr inbounds float* %tmp4700, i64 1
- %tmp4702 = getelementptr inbounds float* %tmp4701, i64 1
- %tmp4703 = getelementptr inbounds float* %tmp4702, i64 1
- %tmp4704 = getelementptr inbounds float* %tmp4703, i64 1
- %tmp4705 = getelementptr inbounds float* %tmp4704, i64 1
- %tmp4706 = getelementptr inbounds float* %tmp4705, i64 1
- %tmp4707 = getelementptr inbounds float* %tmp4706, i64 1
- %tmp4708 = getelementptr inbounds float* %tmp4707, i64 1
- %tmp4709 = getelementptr inbounds float* %tmp4708, i64 1
- %tmp4710 = getelementptr inbounds float* %tmp4709, i64 1
- %tmp4711 = getelementptr inbounds float* %tmp4710, i64 1
- %tmp4712 = getelementptr inbounds float* %tmp4711, i64 1
- %tmp4713 = getelementptr inbounds float* %tmp4712, i64 1
- %tmp4714 = getelementptr inbounds float* %tmp4713, i64 1
- %tmp4715 = getelementptr inbounds float* %tmp4714, i64 1
- %tmp4716 = getelementptr inbounds float* %tmp4715, i64 1
- %tmp4717 = getelementptr inbounds float* %tmp4716, i64 1
- %tmp4718 = getelementptr inbounds float* %tmp4717, i64 1
- %tmp4719 = getelementptr inbounds float* %tmp4718, i64 1
- %tmp4720 = getelementptr inbounds float* %tmp4719, i64 1
- %tmp4721 = getelementptr inbounds float* %tmp4720, i64 1
- %tmp4722 = getelementptr inbounds float* %tmp4721, i64 1
- %tmp4723 = getelementptr inbounds float* %tmp4722, i64 1
- %tmp4724 = getelementptr inbounds float* %tmp4723, i64 1
- %tmp4725 = getelementptr inbounds float* %tmp4724, i64 1
- %tmp4726 = getelementptr inbounds float* %tmp4725, i64 1
- %tmp4727 = getelementptr inbounds float* %tmp4726, i64 1
- %tmp4728 = getelementptr inbounds float* %tmp4727, i64 1
- %tmp4729 = getelementptr inbounds float* %tmp4728, i64 1
- %tmp4730 = getelementptr inbounds float* %tmp4729, i64 1
- %tmp4731 = getelementptr inbounds float* %tmp4730, i64 1
- %tmp4732 = getelementptr inbounds float* %tmp4731, i64 1
- %tmp4733 = getelementptr inbounds float* %tmp4732, i64 1
- %tmp4734 = getelementptr inbounds float* %tmp4733, i64 1
- %tmp4735 = getelementptr inbounds float* %tmp4734, i64 1
- %tmp4736 = getelementptr inbounds float* %tmp4735, i64 1
- %tmp4737 = getelementptr inbounds float* %tmp4736, i64 1
- %tmp4738 = getelementptr inbounds float* %tmp4737, i64 1
- %tmp4739 = getelementptr inbounds float* %tmp4738, i64 1
- %tmp4740 = getelementptr inbounds float* %tmp4739, i64 1
- %tmp4741 = getelementptr inbounds float* %tmp4740, i64 1
- %tmp4742 = getelementptr inbounds float* %tmp4741, i64 1
- %tmp4743 = getelementptr inbounds float* %tmp4742, i64 1
- %tmp4744 = getelementptr inbounds float* %tmp4743, i64 1
- %tmp4745 = getelementptr inbounds float* %tmp4744, i64 1
- %tmp4746 = getelementptr inbounds float* %tmp4745, i64 1
- %tmp4747 = getelementptr inbounds float* %tmp4746, i64 1
- %tmp4748 = getelementptr inbounds float* %tmp4747, i64 1
- %tmp4749 = getelementptr inbounds float* %tmp4748, i64 1
- %tmp4750 = getelementptr inbounds float* %tmp4749, i64 1
- %tmp4751 = getelementptr inbounds float* %tmp4750, i64 1
- %tmp4752 = getelementptr inbounds float* %tmp4751, i64 1
- %tmp4753 = getelementptr inbounds float* %tmp4752, i64 1
- %tmp4754 = getelementptr inbounds float* %tmp4753, i64 1
- %tmp4755 = getelementptr inbounds float* %tmp4754, i64 1
- %tmp4756 = getelementptr inbounds float* %tmp4755, i64 1
- %tmp4757 = getelementptr inbounds float* %tmp4756, i64 1
- %tmp4758 = getelementptr inbounds float* %tmp4757, i64 1
- %tmp4759 = getelementptr inbounds float* %tmp4758, i64 1
- %tmp4760 = getelementptr inbounds float* %tmp4759, i64 1
- %tmp4761 = getelementptr inbounds float* %tmp4760, i64 1
- %tmp4762 = getelementptr inbounds float* %tmp4761, i64 1
- %tmp4763 = getelementptr inbounds float* %tmp4762, i64 1
- %tmp4764 = getelementptr inbounds float* %tmp4763, i64 1
- %tmp4765 = getelementptr inbounds float* %tmp4764, i64 1
- %tmp4766 = getelementptr inbounds float* %tmp4765, i64 1
- %tmp4767 = getelementptr inbounds float* %tmp4766, i64 1
- %tmp4768 = getelementptr inbounds float* %tmp4767, i64 1
- %tmp4769 = getelementptr inbounds float* %tmp4768, i64 1
- %tmp4770 = getelementptr inbounds float* %tmp4769, i64 1
- %tmp4771 = getelementptr inbounds float* %tmp4770, i64 1
- %tmp4772 = getelementptr inbounds float* %tmp4771, i64 1
- %tmp4773 = getelementptr inbounds float* %tmp4772, i64 1
- %tmp4774 = getelementptr inbounds float* %tmp4773, i64 1
- %tmp4775 = getelementptr inbounds float* %tmp4774, i64 1
- %tmp4776 = getelementptr inbounds float* %tmp4775, i64 1
- %tmp4777 = getelementptr inbounds float* %tmp4776, i64 1
- %tmp4778 = getelementptr inbounds float* %tmp4777, i64 1
- %tmp4779 = getelementptr inbounds float* %tmp4778, i64 1
- %tmp4780 = getelementptr inbounds float* %tmp4779, i64 1
- %tmp4781 = getelementptr inbounds float* %tmp4780, i64 1
- %tmp4782 = getelementptr inbounds float* %tmp4781, i64 1
- %tmp4783 = getelementptr inbounds float* %tmp4782, i64 1
- %tmp4784 = getelementptr inbounds float* %tmp4783, i64 1
- %tmp4785 = getelementptr inbounds float* %tmp4784, i64 1
- %tmp4786 = getelementptr inbounds float* %tmp4785, i64 1
- %tmp4787 = getelementptr inbounds float* %tmp4786, i64 1
- %tmp4788 = getelementptr inbounds float* %tmp4787, i64 1
- %tmp4789 = getelementptr inbounds float* %tmp4788, i64 1
- %tmp4790 = getelementptr inbounds float* %tmp4789, i64 1
- %tmp4791 = getelementptr inbounds float* %tmp4790, i64 1
- %tmp4792 = getelementptr inbounds float* %tmp4791, i64 1
- %tmp4793 = getelementptr inbounds float* %tmp4792, i64 1
- %tmp4794 = getelementptr inbounds float* %tmp4793, i64 1
- %tmp4795 = getelementptr inbounds float* %tmp4794, i64 1
- %tmp4796 = getelementptr inbounds float* %tmp4795, i64 1
- %tmp4797 = getelementptr inbounds float* %tmp4796, i64 1
- %tmp4798 = getelementptr inbounds float* %tmp4797, i64 1
- %tmp4799 = getelementptr inbounds float* %tmp4798, i64 1
- %tmp4800 = getelementptr inbounds float* %tmp4799, i64 1
- %tmp4801 = getelementptr inbounds float* %tmp4800, i64 1
- %tmp4802 = getelementptr inbounds float* %tmp4801, i64 1
- %tmp4803 = getelementptr inbounds float* %tmp4802, i64 1
- %tmp4804 = getelementptr inbounds float* %tmp4803, i64 1
- %tmp4805 = getelementptr inbounds float* %tmp4804, i64 1
- %tmp4806 = getelementptr inbounds float* %tmp4805, i64 1
- %tmp4807 = getelementptr inbounds float* %tmp4806, i64 1
- %tmp4808 = getelementptr inbounds float* %tmp4807, i64 1
- %tmp4809 = getelementptr inbounds float* %tmp4808, i64 1
- %tmp4810 = getelementptr inbounds float* %tmp4809, i64 1
- %tmp4811 = getelementptr inbounds float* %tmp4810, i64 1
- %tmp4812 = getelementptr inbounds float* %tmp4811, i64 1
- %tmp4813 = getelementptr inbounds float* %tmp4812, i64 1
- %tmp4814 = getelementptr inbounds float* %tmp4813, i64 1
- %tmp4815 = getelementptr inbounds float* %tmp4814, i64 1
- %tmp4816 = getelementptr inbounds float* %tmp4815, i64 1
- %tmp4817 = getelementptr inbounds float* %tmp4816, i64 1
- %tmp4818 = getelementptr inbounds float* %tmp4817, i64 1
- %tmp4819 = getelementptr inbounds float* %tmp4818, i64 1
- %tmp4820 = getelementptr inbounds float* %tmp4819, i64 1
- %tmp4821 = getelementptr inbounds float* %tmp4820, i64 1
- %tmp4822 = getelementptr inbounds float* %tmp4821, i64 1
- %tmp4823 = getelementptr inbounds float* %tmp4822, i64 1
- %tmp4824 = getelementptr inbounds float* %tmp4823, i64 1
- %tmp4825 = getelementptr inbounds float* %tmp4824, i64 1
- %tmp4826 = getelementptr inbounds float* %tmp4825, i64 1
- %tmp4827 = getelementptr inbounds float* %tmp4826, i64 1
- %tmp4828 = getelementptr inbounds float* %tmp4827, i64 1
- %tmp4829 = getelementptr inbounds float* %tmp4828, i64 1
- %tmp4830 = getelementptr inbounds float* %tmp4829, i64 1
- %tmp4831 = getelementptr inbounds float* %tmp4830, i64 1
- %tmp4832 = getelementptr inbounds float* %tmp4831, i64 1
- %tmp4833 = getelementptr inbounds float* %tmp4832, i64 1
- %tmp4834 = getelementptr inbounds float* %tmp4833, i64 1
- %tmp4835 = getelementptr inbounds float* %tmp4834, i64 1
- %tmp4836 = getelementptr inbounds float* %tmp4835, i64 1
- %tmp4837 = getelementptr inbounds float* %tmp4836, i64 1
- %tmp4838 = getelementptr inbounds float* %tmp4837, i64 1
- %tmp4839 = getelementptr inbounds float* %tmp4838, i64 1
- %tmp4840 = getelementptr inbounds float* %tmp4839, i64 1
- %tmp4841 = getelementptr inbounds float* %tmp4840, i64 1
- %tmp4842 = getelementptr inbounds float* %tmp4841, i64 1
- %tmp4843 = getelementptr inbounds float* %tmp4842, i64 1
- %tmp4844 = getelementptr inbounds float* %tmp4843, i64 1
- %tmp4845 = getelementptr inbounds float* %tmp4844, i64 1
- %tmp4846 = getelementptr inbounds float* %tmp4845, i64 1
- %tmp4847 = getelementptr inbounds float* %tmp4846, i64 1
- %tmp4848 = getelementptr inbounds float* %tmp4847, i64 1
- %tmp4849 = getelementptr inbounds float* %tmp4848, i64 1
- %tmp4850 = getelementptr inbounds float* %tmp4849, i64 1
- %tmp4851 = getelementptr inbounds float* %tmp4850, i64 1
- %tmp4852 = getelementptr inbounds float* %tmp4851, i64 1
- %tmp4853 = getelementptr inbounds float* %tmp4852, i64 1
- %tmp4854 = getelementptr inbounds float* %tmp4853, i64 1
- %tmp4855 = getelementptr inbounds float* %tmp4854, i64 1
- %tmp4856 = getelementptr inbounds float* %tmp4855, i64 1
- %tmp4857 = getelementptr inbounds float* %tmp4856, i64 1
- %tmp4858 = getelementptr inbounds float* %tmp4857, i64 1
- %tmp4859 = getelementptr inbounds float* %tmp4858, i64 1
- %tmp4860 = getelementptr inbounds float* %tmp4859, i64 1
- %tmp4861 = getelementptr inbounds float* %tmp4860, i64 1
- %tmp4862 = getelementptr inbounds float* %tmp4861, i64 1
- %tmp4863 = getelementptr inbounds float* %tmp4862, i64 1
- %tmp4864 = getelementptr inbounds float* %tmp4863, i64 1
- %tmp4865 = getelementptr inbounds float* %tmp4864, i64 1
- %tmp4866 = getelementptr inbounds float* %tmp4865, i64 1
- %tmp4867 = getelementptr inbounds float* %tmp4866, i64 1
- %tmp4868 = getelementptr inbounds float* %tmp4867, i64 1
- %tmp4869 = getelementptr inbounds float* %tmp4868, i64 1
- %tmp4870 = getelementptr inbounds float* %tmp4869, i64 1
- %tmp4871 = getelementptr inbounds float* %tmp4870, i64 1
- %tmp4872 = getelementptr inbounds float* %tmp4871, i64 1
- %tmp4873 = getelementptr inbounds float* %tmp4872, i64 1
- %tmp4874 = getelementptr inbounds float* %tmp4873, i64 1
- %tmp4875 = getelementptr inbounds float* %tmp4874, i64 1
- %tmp4876 = getelementptr inbounds float* %tmp4875, i64 1
- %tmp4877 = getelementptr inbounds float* %tmp4876, i64 1
- %tmp4878 = getelementptr inbounds float* %tmp4877, i64 1
- %tmp4879 = getelementptr inbounds float* %tmp4878, i64 1
- %tmp4880 = getelementptr inbounds float* %tmp4879, i64 1
- %tmp4881 = getelementptr inbounds float* %tmp4880, i64 1
- %tmp4882 = getelementptr inbounds float* %tmp4881, i64 1
- %tmp4883 = getelementptr inbounds float* %tmp4882, i64 1
- %tmp4884 = getelementptr inbounds float* %tmp4883, i64 1
- %tmp4885 = getelementptr inbounds float* %tmp4884, i64 1
- %tmp4886 = getelementptr inbounds float* %tmp4885, i64 1
- %tmp4887 = getelementptr inbounds float* %tmp4886, i64 1
- %tmp4888 = getelementptr inbounds float* %tmp4887, i64 1
- %tmp4889 = getelementptr inbounds float* %tmp4888, i64 1
- %tmp4890 = getelementptr inbounds float* %tmp4889, i64 1
- %tmp4891 = getelementptr inbounds float* %tmp4890, i64 1
- %tmp4892 = getelementptr inbounds float* %tmp4891, i64 1
- %tmp4893 = getelementptr inbounds float* %tmp4892, i64 1
- %tmp4894 = getelementptr inbounds float* %tmp4893, i64 1
- %tmp4895 = getelementptr inbounds float* %tmp4894, i64 1
- %tmp4896 = getelementptr inbounds float* %tmp4895, i64 1
- %tmp4897 = getelementptr inbounds float* %tmp4896, i64 1
- %tmp4898 = getelementptr inbounds float* %tmp4897, i64 1
- %tmp4899 = getelementptr inbounds float* %tmp4898, i64 1
- %tmp4900 = getelementptr inbounds float* %tmp4899, i64 1
- %tmp4901 = getelementptr inbounds float* %tmp4900, i64 1
- %tmp4902 = getelementptr inbounds float* %tmp4901, i64 1
- %tmp4903 = getelementptr inbounds float* %tmp4902, i64 1
- %tmp4904 = getelementptr inbounds float* %tmp4903, i64 1
- %tmp4905 = getelementptr inbounds float* %tmp4904, i64 1
- %tmp4906 = getelementptr inbounds float* %tmp4905, i64 1
- %tmp4907 = getelementptr inbounds float* %tmp4906, i64 1
- %tmp4908 = getelementptr inbounds float* %tmp4907, i64 1
- %tmp4909 = getelementptr inbounds float* %tmp4908, i64 1
- %tmp4910 = getelementptr inbounds float* %tmp4909, i64 1
- %tmp4911 = getelementptr inbounds float* %tmp4910, i64 1
- %tmp4912 = getelementptr inbounds float* %tmp4911, i64 1
- %tmp4913 = getelementptr inbounds float* %tmp4912, i64 1
- %tmp4914 = getelementptr inbounds float* %tmp4913, i64 1
- %tmp4915 = getelementptr inbounds float* %tmp4914, i64 1
- %tmp4916 = getelementptr inbounds float* %tmp4915, i64 1
- %tmp4917 = getelementptr inbounds float* %tmp4916, i64 1
- %tmp4918 = getelementptr inbounds float* %tmp4917, i64 1
- %tmp4919 = getelementptr inbounds float* %tmp4918, i64 1
- %tmp4920 = getelementptr inbounds float* %tmp4919, i64 1
- %tmp4921 = getelementptr inbounds float* %tmp4920, i64 1
- %tmp4922 = getelementptr inbounds float* %tmp4921, i64 1
- %tmp4923 = getelementptr inbounds float* %tmp4922, i64 1
- %tmp4924 = getelementptr inbounds float* %tmp4923, i64 1
- %tmp4925 = getelementptr inbounds float* %tmp4924, i64 1
- %tmp4926 = getelementptr inbounds float* %tmp4925, i64 1
- %tmp4927 = getelementptr inbounds float* %tmp4926, i64 1
- %tmp4928 = getelementptr inbounds float* %tmp4927, i64 1
- %tmp4929 = getelementptr inbounds float* %tmp4928, i64 1
- %tmp4930 = getelementptr inbounds float* %tmp4929, i64 1
- %tmp4931 = getelementptr inbounds float* %tmp4930, i64 1
- %tmp4932 = getelementptr inbounds float* %tmp4931, i64 1
- %tmp4933 = getelementptr inbounds float* %tmp4932, i64 1
- %tmp4934 = getelementptr inbounds float* %tmp4933, i64 1
- %tmp4935 = getelementptr inbounds float* %tmp4934, i64 1
- %tmp4936 = getelementptr inbounds float* %tmp4935, i64 1
- %tmp4937 = getelementptr inbounds float* %tmp4936, i64 1
- %tmp4938 = getelementptr inbounds float* %tmp4937, i64 1
- %tmp4939 = getelementptr inbounds float* %tmp4938, i64 1
- %tmp4940 = getelementptr inbounds float* %tmp4939, i64 1
- %tmp4941 = getelementptr inbounds float* %tmp4940, i64 1
- %tmp4942 = getelementptr inbounds float* %tmp4941, i64 1
- %tmp4943 = getelementptr inbounds float* %tmp4942, i64 1
- %tmp4944 = getelementptr inbounds float* %tmp4943, i64 1
- %tmp4945 = getelementptr inbounds float* %tmp4944, i64 1
- %tmp4946 = getelementptr inbounds float* %tmp4945, i64 1
- %tmp4947 = getelementptr inbounds float* %tmp4946, i64 1
- %tmp4948 = getelementptr inbounds float* %tmp4947, i64 1
- %tmp4949 = getelementptr inbounds float* %tmp4948, i64 1
- %tmp4950 = getelementptr inbounds float* %tmp4949, i64 1
- %tmp4951 = getelementptr inbounds float* %tmp4950, i64 1
- %tmp4952 = getelementptr inbounds float* %tmp4951, i64 1
- %tmp4953 = getelementptr inbounds float* %tmp4952, i64 1
- %tmp4954 = getelementptr inbounds float* %tmp4953, i64 1
- %tmp4955 = getelementptr inbounds float* %tmp4954, i64 1
- %tmp4956 = getelementptr inbounds float* %tmp4955, i64 1
- %tmp4957 = getelementptr inbounds float* %tmp4956, i64 1
- %tmp4958 = getelementptr inbounds float* %tmp4957, i64 1
- %tmp4959 = getelementptr inbounds float* %tmp4958, i64 1
- %tmp4960 = getelementptr inbounds float* %tmp4959, i64 1
- %tmp4961 = getelementptr inbounds float* %tmp4960, i64 1
- %tmp4962 = getelementptr inbounds float* %tmp4961, i64 1
- %tmp4963 = getelementptr inbounds float* %tmp4962, i64 1
- %tmp4964 = getelementptr inbounds float* %tmp4963, i64 1
- %tmp4965 = getelementptr inbounds float* %tmp4964, i64 1
- %tmp4966 = getelementptr inbounds float* %tmp4965, i64 1
- %tmp4967 = getelementptr inbounds float* %tmp4966, i64 1
- %tmp4968 = getelementptr inbounds float* %tmp4967, i64 1
- %tmp4969 = getelementptr inbounds float* %tmp4968, i64 1
- %tmp4970 = getelementptr inbounds float* %tmp4969, i64 1
- %tmp4971 = getelementptr inbounds float* %tmp4970, i64 1
- %tmp4972 = getelementptr inbounds float* %tmp4971, i64 1
- %tmp4973 = getelementptr inbounds float* %tmp4972, i64 1
- %tmp4974 = getelementptr inbounds float* %tmp4973, i64 1
- %tmp4975 = getelementptr inbounds float* %tmp4974, i64 1
- %tmp4976 = getelementptr inbounds float* %tmp4975, i64 1
- %tmp4977 = getelementptr inbounds float* %tmp4976, i64 1
- %tmp4978 = getelementptr inbounds float* %tmp4977, i64 1
- %tmp4979 = getelementptr inbounds float* %tmp4978, i64 1
- %tmp4980 = getelementptr inbounds float* %tmp4979, i64 1
- %tmp4981 = getelementptr inbounds float* %tmp4980, i64 1
- %tmp4982 = getelementptr inbounds float* %tmp4981, i64 1
- %tmp4983 = getelementptr inbounds float* %tmp4982, i64 1
- %tmp4984 = getelementptr inbounds float* %tmp4983, i64 1
- %tmp4985 = getelementptr inbounds float* %tmp4984, i64 1
- %tmp4986 = getelementptr inbounds float* %tmp4985, i64 1
- %tmp4987 = getelementptr inbounds float* %tmp4986, i64 1
- %tmp4988 = getelementptr inbounds float* %tmp4987, i64 1
- %tmp4989 = getelementptr inbounds float* %tmp4988, i64 1
- %tmp4990 = getelementptr inbounds float* %tmp4989, i64 1
- %tmp4991 = getelementptr inbounds float* %tmp4990, i64 1
- %tmp4992 = getelementptr inbounds float* %tmp4991, i64 1
- %tmp4993 = getelementptr inbounds float* %tmp4992, i64 1
- %tmp4994 = getelementptr inbounds float* %tmp4993, i64 1
- %tmp4995 = getelementptr inbounds float* %tmp4994, i64 1
- %tmp4996 = getelementptr inbounds float* %tmp4995, i64 1
- %tmp4997 = getelementptr inbounds float* %tmp4996, i64 1
- %tmp4998 = getelementptr inbounds float* %tmp4997, i64 1
- %tmp4999 = getelementptr inbounds float* %tmp4998, i64 1
- %tmp5000 = getelementptr inbounds float* %tmp4999, i64 1
- %tmp5001 = getelementptr inbounds float* %tmp5000, i64 1
- %tmp5002 = getelementptr inbounds float* %tmp5001, i64 1
- %tmp5003 = getelementptr inbounds float* %tmp5002, i64 1
- %tmp5004 = getelementptr inbounds float* %tmp5003, i64 1
- %tmp5005 = getelementptr inbounds float* %tmp5004, i64 1
- %tmp5006 = getelementptr inbounds float* %tmp5005, i64 1
- %tmp5007 = getelementptr inbounds float* %tmp5006, i64 1
- %tmp5008 = getelementptr inbounds float* %tmp5007, i64 1
- %tmp5009 = getelementptr inbounds float* %tmp5008, i64 1
- %tmp5010 = getelementptr inbounds float* %tmp5009, i64 1
- %tmp5011 = getelementptr inbounds float* %tmp5010, i64 1
- %tmp5012 = getelementptr inbounds float* %tmp5011, i64 1
- %tmp5013 = getelementptr inbounds float* %tmp5012, i64 1
- %tmp5014 = getelementptr inbounds float* %tmp5013, i64 1
- %tmp5015 = getelementptr inbounds float* %tmp5014, i64 1
- %tmp5016 = getelementptr inbounds float* %tmp5015, i64 1
- %tmp5017 = getelementptr inbounds float* %tmp5016, i64 1
- %tmp5018 = getelementptr inbounds float* %tmp5017, i64 1
- %tmp5019 = getelementptr inbounds float* %tmp5018, i64 1
- %tmp5020 = getelementptr inbounds float* %tmp5019, i64 1
- %tmp5021 = getelementptr inbounds float* %tmp5020, i64 1
- %tmp5022 = getelementptr inbounds float* %tmp5021, i64 1
- %tmp5023 = getelementptr inbounds float* %tmp5022, i64 1
- %tmp5024 = getelementptr inbounds float* %tmp5023, i64 1
- %tmp5025 = getelementptr inbounds float* %tmp5024, i64 1
- %tmp5026 = getelementptr inbounds float* %tmp5025, i64 1
- %tmp5027 = getelementptr inbounds float* %tmp5026, i64 1
- %tmp5028 = getelementptr inbounds float* %tmp5027, i64 1
- %tmp5029 = getelementptr inbounds float* %tmp5028, i64 1
- %tmp5030 = getelementptr inbounds float* %tmp5029, i64 1
- %tmp5031 = getelementptr inbounds float* %tmp5030, i64 1
- %tmp5032 = getelementptr inbounds float* %tmp5031, i64 1
- %tmp5033 = getelementptr inbounds float* %tmp5032, i64 1
- %tmp5034 = getelementptr inbounds float* %tmp5033, i64 1
- %tmp5035 = getelementptr inbounds float* %tmp5034, i64 1
- %tmp5036 = getelementptr inbounds float* %tmp5035, i64 1
- %tmp5037 = getelementptr inbounds float* %tmp5036, i64 1
- %tmp5038 = getelementptr inbounds float* %tmp5037, i64 1
- %tmp5039 = getelementptr inbounds float* %tmp5038, i64 1
- %tmp5040 = getelementptr inbounds float* %tmp5039, i64 1
- %tmp5041 = getelementptr inbounds float* %tmp5040, i64 1
- %tmp5042 = getelementptr inbounds float* %tmp5041, i64 1
- %tmp5043 = getelementptr inbounds float* %tmp5042, i64 1
- %tmp5044 = getelementptr inbounds float* %tmp5043, i64 1
- %tmp5045 = getelementptr inbounds float* %tmp5044, i64 1
- %tmp5046 = getelementptr inbounds float* %tmp5045, i64 1
- %tmp5047 = getelementptr inbounds float* %tmp5046, i64 1
- %tmp5048 = getelementptr inbounds float* %tmp5047, i64 1
- %tmp5049 = getelementptr inbounds float* %tmp5048, i64 1
- %tmp5050 = getelementptr inbounds float* %tmp5049, i64 1
- %tmp5051 = getelementptr inbounds float* %tmp5050, i64 1
- %tmp5052 = getelementptr inbounds float* %tmp5051, i64 1
- %tmp5053 = getelementptr inbounds float* %tmp5052, i64 1
- %tmp5054 = getelementptr inbounds float* %tmp5053, i64 1
- %tmp5055 = getelementptr inbounds float* %tmp5054, i64 1
- %tmp5056 = getelementptr inbounds float* %tmp5055, i64 1
- %tmp5057 = getelementptr inbounds float* %tmp5056, i64 1
- %tmp5058 = getelementptr inbounds float* %tmp5057, i64 1
- %tmp5059 = getelementptr inbounds float* %tmp5058, i64 1
- %tmp5060 = getelementptr inbounds float* %tmp5059, i64 1
- %tmp5061 = getelementptr inbounds float* %tmp5060, i64 1
- %tmp5062 = getelementptr inbounds float* %tmp5061, i64 1
- %tmp5063 = getelementptr inbounds float* %tmp5062, i64 1
- %tmp5064 = getelementptr inbounds float* %tmp5063, i64 1
- %tmp5065 = getelementptr inbounds float* %tmp5064, i64 1
- %tmp5066 = getelementptr inbounds float* %tmp5065, i64 1
- %tmp5067 = getelementptr inbounds float* %tmp5066, i64 1
- %tmp5068 = getelementptr inbounds float* %tmp5067, i64 1
- %tmp5069 = getelementptr inbounds float* %tmp5068, i64 1
- %tmp5070 = getelementptr inbounds float* %tmp5069, i64 1
- %tmp5071 = getelementptr inbounds float* %tmp5070, i64 1
- %tmp5072 = getelementptr inbounds float* %tmp5071, i64 1
- %tmp5073 = getelementptr inbounds float* %tmp5072, i64 1
- %tmp5074 = getelementptr inbounds float* %tmp5073, i64 1
- %tmp5075 = getelementptr inbounds float* %tmp5074, i64 1
- %tmp5076 = getelementptr inbounds float* %tmp5075, i64 1
- %tmp5077 = getelementptr inbounds float* %tmp5076, i64 1
- %tmp5078 = getelementptr inbounds float* %tmp5077, i64 1
- %tmp5079 = getelementptr inbounds float* %tmp5078, i64 1
- %tmp5080 = getelementptr inbounds float* %tmp5079, i64 1
- %tmp5081 = getelementptr inbounds float* %tmp5080, i64 1
- %tmp5082 = getelementptr inbounds float* %tmp5081, i64 1
- %tmp5083 = getelementptr inbounds float* %tmp5082, i64 1
- %tmp5084 = getelementptr inbounds float* %tmp5083, i64 1
- %tmp5085 = getelementptr inbounds float* %tmp5084, i64 1
- %tmp5086 = getelementptr inbounds float* %tmp5085, i64 1
- %tmp5087 = getelementptr inbounds float* %tmp5086, i64 1
- %tmp5088 = getelementptr inbounds float* %tmp5087, i64 1
- %tmp5089 = getelementptr inbounds float* %tmp5088, i64 1
- %tmp5090 = getelementptr inbounds float* %tmp5089, i64 1
- %tmp5091 = getelementptr inbounds float* %tmp5090, i64 1
- %tmp5092 = getelementptr inbounds float* %tmp5091, i64 1
- %tmp5093 = getelementptr inbounds float* %tmp5092, i64 1
- %tmp5094 = getelementptr inbounds float* %tmp5093, i64 1
- %tmp5095 = getelementptr inbounds float* %tmp5094, i64 1
- %tmp5096 = getelementptr inbounds float* %tmp5095, i64 1
- %tmp5097 = getelementptr inbounds float* %tmp5096, i64 1
- %tmp5098 = getelementptr inbounds float* %tmp5097, i64 1
- %tmp5099 = getelementptr inbounds float* %tmp5098, i64 1
- %tmp5100 = getelementptr inbounds float* %tmp5099, i64 1
- %tmp5101 = getelementptr inbounds float* %tmp5100, i64 1
- %tmp5102 = getelementptr inbounds float* %tmp5101, i64 1
- %tmp5103 = getelementptr inbounds float* %tmp5102, i64 1
- %tmp5104 = getelementptr inbounds float* %tmp5103, i64 1
- %tmp5105 = getelementptr inbounds float* %tmp5104, i64 1
- %tmp5106 = getelementptr inbounds float* %tmp5105, i64 1
- %tmp5107 = getelementptr inbounds float* %tmp5106, i64 1
- %tmp5108 = getelementptr inbounds float* %tmp5107, i64 1
- %tmp5109 = getelementptr inbounds float* %tmp5108, i64 1
- %tmp5110 = getelementptr inbounds float* %tmp5109, i64 1
- %tmp5111 = getelementptr inbounds float* %tmp5110, i64 1
- %tmp5112 = getelementptr inbounds float* %tmp5111, i64 1
- %tmp5113 = getelementptr inbounds float* %tmp5112, i64 1
- %tmp5114 = getelementptr inbounds float* %tmp5113, i64 1
- %tmp5115 = getelementptr inbounds float* %tmp5114, i64 1
- %tmp5116 = getelementptr inbounds float* %tmp5115, i64 1
- %tmp5117 = getelementptr inbounds float* %tmp5116, i64 1
- %tmp5118 = getelementptr inbounds float* %tmp5117, i64 1
- %tmp5119 = getelementptr inbounds float* %tmp5118, i64 1
- %tmp5120 = getelementptr inbounds float* %tmp5119, i64 1
- %tmp5121 = getelementptr inbounds float* %tmp5120, i64 1
- %tmp5122 = getelementptr inbounds float* %tmp5121, i64 1
- %tmp5123 = getelementptr inbounds float* %tmp5122, i64 1
- %tmp5124 = getelementptr inbounds float* %tmp5123, i64 1
- %tmp5125 = getelementptr inbounds float* %tmp5124, i64 1
- %tmp5126 = getelementptr inbounds float* %tmp5125, i64 1
- %tmp5127 = getelementptr inbounds float* %tmp5126, i64 1
- %tmp5128 = getelementptr inbounds float* %tmp5127, i64 1
- %tmp5129 = getelementptr inbounds float* %tmp5128, i64 1
- %tmp5130 = getelementptr inbounds float* %tmp5129, i64 1
- %tmp5131 = getelementptr inbounds float* %tmp5130, i64 1
- %tmp5132 = getelementptr inbounds float* %tmp5131, i64 1
- %tmp5133 = getelementptr inbounds float* %tmp5132, i64 1
- %tmp5134 = getelementptr inbounds float* %tmp5133, i64 1
- %tmp5135 = getelementptr inbounds float* %tmp5134, i64 1
- %tmp5136 = getelementptr inbounds float* %tmp5135, i64 1
- %tmp5137 = getelementptr inbounds float* %tmp5136, i64 1
- %tmp5138 = getelementptr inbounds float* %tmp5137, i64 1
- %tmp5139 = getelementptr inbounds float* %tmp5138, i64 1
- %tmp5140 = getelementptr inbounds float* %tmp5139, i64 1
- %tmp5141 = getelementptr inbounds float* %tmp5140, i64 1
- %tmp5142 = getelementptr inbounds float* %tmp5141, i64 1
- %tmp5143 = getelementptr inbounds float* %tmp5142, i64 1
- %tmp5144 = getelementptr inbounds float* %tmp5143, i64 1
- %tmp5145 = getelementptr inbounds float* %tmp5144, i64 1
- %tmp5146 = getelementptr inbounds float* %tmp5145, i64 1
- %tmp5147 = getelementptr inbounds float* %tmp5146, i64 1
- %tmp5148 = getelementptr inbounds float* %tmp5147, i64 1
- %tmp5149 = getelementptr inbounds float* %tmp5148, i64 1
- %tmp5150 = getelementptr inbounds float* %tmp5149, i64 1
- %tmp5151 = getelementptr inbounds float* %tmp5150, i64 1
- %tmp5152 = getelementptr inbounds float* %tmp5151, i64 1
- %tmp5153 = getelementptr inbounds float* %tmp5152, i64 1
- %tmp5154 = getelementptr inbounds float* %tmp5153, i64 1
- %tmp5155 = getelementptr inbounds float* %tmp5154, i64 1
- %tmp5156 = getelementptr inbounds float* %tmp5155, i64 1
- %tmp5157 = getelementptr inbounds float* %tmp5156, i64 1
- %tmp5158 = getelementptr inbounds float* %tmp5157, i64 1
- %tmp5159 = getelementptr inbounds float* %tmp5158, i64 1
- %tmp5160 = getelementptr inbounds float* %tmp5159, i64 1
- %tmp5161 = getelementptr inbounds float* %tmp5160, i64 1
- %tmp5162 = getelementptr inbounds float* %tmp5161, i64 1
- %tmp5163 = getelementptr inbounds float* %tmp5162, i64 1
- %tmp5164 = getelementptr inbounds float* %tmp5163, i64 1
- %tmp5165 = getelementptr inbounds float* %tmp5164, i64 1
- %tmp5166 = getelementptr inbounds float* %tmp5165, i64 1
- %tmp5167 = getelementptr inbounds float* %tmp5166, i64 1
- %tmp5168 = getelementptr inbounds float* %tmp5167, i64 1
- %tmp5169 = getelementptr inbounds float* %tmp5168, i64 1
- %tmp5170 = getelementptr inbounds float* %tmp5169, i64 1
- %tmp5171 = getelementptr inbounds float* %tmp5170, i64 1
- %tmp5172 = getelementptr inbounds float* %tmp5171, i64 1
- %tmp5173 = getelementptr inbounds float* %tmp5172, i64 1
- %tmp5174 = getelementptr inbounds float* %tmp5173, i64 1
- %tmp5175 = getelementptr inbounds float* %tmp5174, i64 1
- %tmp5176 = getelementptr inbounds float* %tmp5175, i64 1
- %tmp5177 = getelementptr inbounds float* %tmp5176, i64 1
- %tmp5178 = getelementptr inbounds float* %tmp5177, i64 1
- %tmp5179 = getelementptr inbounds float* %tmp5178, i64 1
- %tmp5180 = getelementptr inbounds float* %tmp5179, i64 1
- %tmp5181 = getelementptr inbounds float* %tmp5180, i64 1
- %tmp5182 = getelementptr inbounds float* %tmp5181, i64 1
- %tmp5183 = getelementptr inbounds float* %tmp5182, i64 1
- %tmp5184 = getelementptr inbounds float* %tmp5183, i64 1
- %tmp5185 = getelementptr inbounds float* %tmp5184, i64 1
- %tmp5186 = getelementptr inbounds float* %tmp5185, i64 1
- %tmp5187 = getelementptr inbounds float* %tmp5186, i64 1
- %tmp5188 = getelementptr inbounds float* %tmp5187, i64 1
- %tmp5189 = getelementptr inbounds float* %tmp5188, i64 1
- %tmp5190 = getelementptr inbounds float* %tmp5189, i64 1
- %tmp5191 = getelementptr inbounds float* %tmp5190, i64 1
- %tmp5192 = getelementptr inbounds float* %tmp5191, i64 1
- %tmp5193 = getelementptr inbounds float* %tmp5192, i64 1
- %tmp5194 = getelementptr inbounds float* %tmp5193, i64 1
- %tmp5195 = getelementptr inbounds float* %tmp5194, i64 1
- %tmp5196 = getelementptr inbounds float* %tmp5195, i64 1
- %tmp5197 = getelementptr inbounds float* %tmp5196, i64 1
- %tmp5198 = getelementptr inbounds float* %tmp5197, i64 1
- %tmp5199 = getelementptr inbounds float* %tmp5198, i64 1
- %tmp5200 = getelementptr inbounds float* %tmp5199, i64 1
- %tmp5201 = getelementptr inbounds float* %tmp5200, i64 1
- %tmp5202 = getelementptr inbounds float* %tmp5201, i64 1
- %tmp5203 = getelementptr inbounds float* %tmp5202, i64 1
- %tmp5204 = getelementptr inbounds float* %tmp5203, i64 1
- %tmp5205 = getelementptr inbounds float* %tmp5204, i64 1
- %tmp5206 = getelementptr inbounds float* %tmp5205, i64 1
- %tmp5207 = getelementptr inbounds float* %tmp5206, i64 1
- %tmp5208 = getelementptr inbounds float* %tmp5207, i64 1
- %tmp5209 = getelementptr inbounds float* %tmp5208, i64 1
- %tmp5210 = getelementptr inbounds float* %tmp5209, i64 1
- %tmp5211 = getelementptr inbounds float* %tmp5210, i64 1
- %tmp5212 = getelementptr inbounds float* %tmp5211, i64 1
- %tmp5213 = getelementptr inbounds float* %tmp5212, i64 1
- %tmp5214 = getelementptr inbounds float* %tmp5213, i64 1
- %tmp5215 = getelementptr inbounds float* %tmp5214, i64 1
- %tmp5216 = getelementptr inbounds float* %tmp5215, i64 1
- %tmp5217 = getelementptr inbounds float* %tmp5216, i64 1
- %tmp5218 = getelementptr inbounds float* %tmp5217, i64 1
- %tmp5219 = getelementptr inbounds float* %tmp5218, i64 1
- %tmp5220 = getelementptr inbounds float* %tmp5219, i64 1
- %tmp5221 = getelementptr inbounds float* %tmp5220, i64 1
- %tmp5222 = getelementptr inbounds float* %tmp5221, i64 1
- %tmp5223 = getelementptr inbounds float* %tmp5222, i64 1
- %tmp5224 = getelementptr inbounds float* %tmp5223, i64 1
- %tmp5225 = getelementptr inbounds float* %tmp5224, i64 1
- %tmp5226 = getelementptr inbounds float* %tmp5225, i64 1
- %tmp5227 = getelementptr inbounds float* %tmp5226, i64 1
- %tmp5228 = getelementptr inbounds float* %tmp5227, i64 1
- %tmp5229 = getelementptr inbounds float* %tmp5228, i64 1
- %tmp5230 = getelementptr inbounds float* %tmp5229, i64 1
- %tmp5231 = getelementptr inbounds float* %tmp5230, i64 1
- %tmp5232 = getelementptr inbounds float* %tmp5231, i64 1
- %tmp5233 = getelementptr inbounds float* %tmp5232, i64 1
- %tmp5234 = getelementptr inbounds float* %tmp5233, i64 1
- %tmp5235 = getelementptr inbounds float* %tmp5234, i64 1
- %tmp5236 = getelementptr inbounds float* %tmp5235, i64 1
- %tmp5237 = getelementptr inbounds float* %tmp5236, i64 1
- %tmp5238 = getelementptr inbounds float* %tmp5237, i64 1
- %tmp5239 = getelementptr inbounds float* %tmp5238, i64 1
- %tmp5240 = getelementptr inbounds float* %tmp5239, i64 1
- %tmp5241 = getelementptr inbounds float* %tmp5240, i64 1
- %tmp5242 = getelementptr inbounds float* %tmp5241, i64 1
- %tmp5243 = getelementptr inbounds float* %tmp5242, i64 1
- %tmp5244 = getelementptr inbounds float* %tmp5243, i64 1
- %tmp5245 = getelementptr inbounds float* %tmp5244, i64 1
- %tmp5246 = getelementptr inbounds float* %tmp5245, i64 1
- %tmp5247 = getelementptr inbounds float* %tmp5246, i64 1
- %tmp5248 = getelementptr inbounds float* %tmp5247, i64 1
- %tmp5249 = getelementptr inbounds float* %tmp5248, i64 1
- %tmp5250 = getelementptr inbounds float* %tmp5249, i64 1
- %tmp5251 = getelementptr inbounds float* %tmp5250, i64 1
- %tmp5252 = getelementptr inbounds float* %tmp5251, i64 1
- %tmp5253 = getelementptr inbounds float* %tmp5252, i64 1
- %tmp5254 = getelementptr inbounds float* %tmp5253, i64 1
- %tmp5255 = getelementptr inbounds float* %tmp5254, i64 1
- %tmp5256 = getelementptr inbounds float* %tmp5255, i64 1
- %tmp5257 = getelementptr inbounds float* %tmp5256, i64 1
- %tmp5258 = getelementptr inbounds float* %tmp5257, i64 1
- %tmp5259 = getelementptr inbounds float* %tmp5258, i64 1
- %tmp5260 = getelementptr inbounds float* %tmp5259, i64 1
- %tmp5261 = getelementptr inbounds float* %tmp5260, i64 1
- %tmp5262 = getelementptr inbounds float* %tmp5261, i64 1
- %tmp5263 = getelementptr inbounds float* %tmp5262, i64 1
- %tmp5264 = getelementptr inbounds float* %tmp5263, i64 1
- %tmp5265 = getelementptr inbounds float* %tmp5264, i64 1
- %tmp5266 = getelementptr inbounds float* %tmp5265, i64 1
- %tmp5267 = getelementptr inbounds float* %tmp5266, i64 1
- %tmp5268 = getelementptr inbounds float* %tmp5267, i64 1
- %tmp5269 = getelementptr inbounds float* %tmp5268, i64 1
- %tmp5270 = getelementptr inbounds float* %tmp5269, i64 1
- %tmp5271 = getelementptr inbounds float* %tmp5270, i64 1
- %tmp5272 = getelementptr inbounds float* %tmp5271, i64 1
- %tmp5273 = getelementptr inbounds float* %tmp5272, i64 1
- %tmp5274 = getelementptr inbounds float* %tmp5273, i64 1
- %tmp5275 = getelementptr inbounds float* %tmp5274, i64 1
- %tmp5276 = getelementptr inbounds float* %tmp5275, i64 1
- %tmp5277 = getelementptr inbounds float* %tmp5276, i64 1
- %tmp5278 = getelementptr inbounds float* %tmp5277, i64 1
- %tmp5279 = getelementptr inbounds float* %tmp5278, i64 1
- %tmp5280 = getelementptr inbounds float* %tmp5279, i64 1
- %tmp5281 = getelementptr inbounds float* %tmp5280, i64 1
- %tmp5282 = getelementptr inbounds float* %tmp5281, i64 1
- %tmp5283 = getelementptr inbounds float* %tmp5282, i64 1
- %tmp5284 = getelementptr inbounds float* %tmp5283, i64 1
- %tmp5285 = getelementptr inbounds float* %tmp5284, i64 1
- %tmp5286 = getelementptr inbounds float* %tmp5285, i64 1
- %tmp5287 = getelementptr inbounds float* %tmp5286, i64 1
- %tmp5288 = getelementptr inbounds float* %tmp5287, i64 1
- %tmp5289 = getelementptr inbounds float* %tmp5288, i64 1
- %tmp5290 = getelementptr inbounds float* %tmp5289, i64 1
- %tmp5291 = getelementptr inbounds float* %tmp5290, i64 1
- %tmp5292 = getelementptr inbounds float* %tmp5291, i64 1
- %tmp5293 = getelementptr inbounds float* %tmp5292, i64 1
- %tmp5294 = getelementptr inbounds float* %tmp5293, i64 1
- %tmp5295 = getelementptr inbounds float* %tmp5294, i64 1
- %tmp5296 = getelementptr inbounds float* %tmp5295, i64 1
- %tmp5297 = getelementptr inbounds float* %tmp5296, i64 1
- %tmp5298 = getelementptr inbounds float* %tmp5297, i64 1
- %tmp5299 = getelementptr inbounds float* %tmp5298, i64 1
- %tmp5300 = getelementptr inbounds float* %tmp5299, i64 1
- %tmp5301 = getelementptr inbounds float* %tmp5300, i64 1
- %tmp5302 = getelementptr inbounds float* %tmp5301, i64 1
- %tmp5303 = getelementptr inbounds float* %tmp5302, i64 1
- %tmp5304 = getelementptr inbounds float* %tmp5303, i64 1
- %tmp5305 = getelementptr inbounds float* %tmp5304, i64 1
- %tmp5306 = getelementptr inbounds float* %tmp5305, i64 1
- %tmp5307 = getelementptr inbounds float* %tmp5306, i64 1
- %tmp5308 = getelementptr inbounds float* %tmp5307, i64 1
- %tmp5309 = getelementptr inbounds float* %tmp5308, i64 1
- %tmp5310 = getelementptr inbounds float* %tmp5309, i64 1
- %tmp5311 = getelementptr inbounds float* %tmp5310, i64 1
- %tmp5312 = getelementptr inbounds float* %tmp5311, i64 1
- %tmp5313 = getelementptr inbounds float* %tmp5312, i64 1
- %tmp5314 = getelementptr inbounds float* %tmp5313, i64 1
- %tmp5315 = getelementptr inbounds float* %tmp5314, i64 1
- %tmp5316 = getelementptr inbounds float* %tmp5315, i64 1
- %tmp5317 = getelementptr inbounds float* %tmp5316, i64 1
- %tmp5318 = getelementptr inbounds float* %tmp5317, i64 1
- %tmp5319 = getelementptr inbounds float* %tmp5318, i64 1
- %tmp5320 = getelementptr inbounds float* %tmp5319, i64 1
- %tmp5321 = getelementptr inbounds float* %tmp5320, i64 1
- %tmp5322 = getelementptr inbounds float* %tmp5321, i64 1
- %tmp5323 = getelementptr inbounds float* %tmp5322, i64 1
- %tmp5324 = getelementptr inbounds float* %tmp5323, i64 1
- %tmp5325 = getelementptr inbounds float* %tmp5324, i64 1
- %tmp5326 = getelementptr inbounds float* %tmp5325, i64 1
- %tmp5327 = getelementptr inbounds float* %tmp5326, i64 1
- %tmp5328 = getelementptr inbounds float* %tmp5327, i64 1
- %tmp5329 = getelementptr inbounds float* %tmp5328, i64 1
- %tmp5330 = getelementptr inbounds float* %tmp5329, i64 1
- %tmp5331 = getelementptr inbounds float* %tmp5330, i64 1
- %tmp5332 = getelementptr inbounds float* %tmp5331, i64 1
- %tmp5333 = getelementptr inbounds float* %tmp5332, i64 1
- %tmp5334 = getelementptr inbounds float* %tmp5333, i64 1
- %tmp5335 = getelementptr inbounds float* %tmp5334, i64 1
- %tmp5336 = getelementptr inbounds float* %tmp5335, i64 1
- %tmp5337 = getelementptr inbounds float* %tmp5336, i64 1
- %tmp5338 = getelementptr inbounds float* %tmp5337, i64 1
- %tmp5339 = getelementptr inbounds float* %tmp5338, i64 1
- %tmp5340 = getelementptr inbounds float* %tmp5339, i64 1
- %tmp5341 = getelementptr inbounds float* %tmp5340, i64 1
- %tmp5342 = getelementptr inbounds float* %tmp5341, i64 1
- %tmp5343 = getelementptr inbounds float* %tmp5342, i64 1
- %tmp5344 = getelementptr inbounds float* %tmp5343, i64 1
- %tmp5345 = getelementptr inbounds float* %tmp5344, i64 1
- %tmp5346 = getelementptr inbounds float* %tmp5345, i64 1
- %tmp5347 = getelementptr inbounds float* %tmp5346, i64 1
- %tmp5348 = getelementptr inbounds float* %tmp5347, i64 1
- %tmp5349 = getelementptr inbounds float* %tmp5348, i64 1
- %tmp5350 = getelementptr inbounds float* %tmp5349, i64 1
- %tmp5351 = getelementptr inbounds float* %tmp5350, i64 1
- %tmp5352 = getelementptr inbounds float* %tmp5351, i64 1
- %tmp5353 = getelementptr inbounds float* %tmp5352, i64 1
- %tmp5354 = getelementptr inbounds float* %tmp5353, i64 1
- %tmp5355 = getelementptr inbounds float* %tmp5354, i64 1
- %tmp5356 = getelementptr inbounds float* %tmp5355, i64 1
- %tmp5357 = getelementptr inbounds float* %tmp5356, i64 1
- %tmp5358 = getelementptr inbounds float* %tmp5357, i64 1
- %tmp5359 = getelementptr inbounds float* %tmp5358, i64 1
- %tmp5360 = getelementptr inbounds float* %tmp5359, i64 1
- %tmp5361 = getelementptr inbounds float* %tmp5360, i64 1
- %tmp5362 = getelementptr inbounds float* %tmp5361, i64 1
- %tmp5363 = getelementptr inbounds float* %tmp5362, i64 1
- %tmp5364 = getelementptr inbounds float* %tmp5363, i64 1
- %tmp5365 = getelementptr inbounds float* %tmp5364, i64 1
- %tmp5366 = getelementptr inbounds float* %tmp5365, i64 1
- %tmp5367 = getelementptr inbounds float* %tmp5366, i64 1
- %tmp5368 = getelementptr inbounds float* %tmp5367, i64 1
- %tmp5369 = getelementptr inbounds float* %tmp5368, i64 1
- %tmp5370 = getelementptr inbounds float* %tmp5369, i64 1
- %tmp5371 = getelementptr inbounds float* %tmp5370, i64 1
- %tmp5372 = getelementptr inbounds float* %tmp5371, i64 1
- %tmp5373 = getelementptr inbounds float* %tmp5372, i64 1
- %tmp5374 = getelementptr inbounds float* %tmp5373, i64 1
- %tmp5375 = getelementptr inbounds float* %tmp5374, i64 1
- %tmp5376 = getelementptr inbounds float* %tmp5375, i64 1
- %tmp5377 = getelementptr inbounds float* %tmp5376, i64 1
- %tmp5378 = getelementptr inbounds float* %tmp5377, i64 1
- %tmp5379 = getelementptr inbounds float* %tmp5378, i64 1
- %tmp5380 = getelementptr inbounds float* %tmp5379, i64 1
- %tmp5381 = getelementptr inbounds float* %tmp5380, i64 1
- %tmp5382 = getelementptr inbounds float* %tmp5381, i64 1
- %tmp5383 = getelementptr inbounds float* %tmp5382, i64 1
- %tmp5384 = getelementptr inbounds float* %tmp5383, i64 1
- %tmp5385 = getelementptr inbounds float* %tmp5384, i64 1
- %tmp5386 = getelementptr inbounds float* %tmp5385, i64 1
- %tmp5387 = getelementptr inbounds float* %tmp5386, i64 1
- %tmp5388 = getelementptr inbounds float* %tmp5387, i64 1
- %tmp5389 = getelementptr inbounds float* %tmp5388, i64 1
- %tmp5390 = getelementptr inbounds float* %tmp5389, i64 1
- %tmp5391 = getelementptr inbounds float* %tmp5390, i64 1
- %tmp5392 = getelementptr inbounds float* %tmp5391, i64 1
- %tmp5393 = getelementptr inbounds float* %tmp5392, i64 1
- %tmp5394 = getelementptr inbounds float* %tmp5393, i64 1
- %tmp5395 = getelementptr inbounds float* %tmp5394, i64 1
- %tmp5396 = getelementptr inbounds float* %tmp5395, i64 1
- %tmp5397 = getelementptr inbounds float* %tmp5396, i64 1
- %tmp5398 = getelementptr inbounds float* %tmp5397, i64 1
- %tmp5399 = getelementptr inbounds float* %tmp5398, i64 1
- %tmp5400 = getelementptr inbounds float* %tmp5399, i64 1
- %tmp5401 = getelementptr inbounds float* %tmp5400, i64 1
- %tmp5402 = getelementptr inbounds float* %tmp5401, i64 1
- %tmp5403 = getelementptr inbounds float* %tmp5402, i64 1
- %tmp5404 = getelementptr inbounds float* %tmp5403, i64 1
- %tmp5405 = getelementptr inbounds float* %tmp5404, i64 1
- %tmp5406 = getelementptr inbounds float* %tmp5405, i64 1
- %tmp5407 = getelementptr inbounds float* %tmp5406, i64 1
- %tmp5408 = getelementptr inbounds float* %tmp5407, i64 1
- %tmp5409 = getelementptr inbounds float* %tmp5408, i64 1
- %tmp5410 = getelementptr inbounds float* %tmp5409, i64 1
- %tmp5411 = getelementptr inbounds float* %tmp5410, i64 1
- %tmp5412 = getelementptr inbounds float* %tmp5411, i64 1
- %tmp5413 = getelementptr inbounds float* %tmp5412, i64 1
- %tmp5414 = getelementptr inbounds float* %tmp5413, i64 1
- %tmp5415 = getelementptr inbounds float* %tmp5414, i64 1
- %tmp5416 = getelementptr inbounds float* %tmp5415, i64 1
- %tmp5417 = getelementptr inbounds float* %tmp5416, i64 1
- %tmp5418 = getelementptr inbounds float* %tmp5417, i64 1
- %tmp5419 = getelementptr inbounds float* %tmp5418, i64 1
- %tmp5420 = getelementptr inbounds float* %tmp5419, i64 1
- %tmp5421 = getelementptr inbounds float* %tmp5420, i64 1
- %tmp5422 = getelementptr inbounds float* %tmp5421, i64 1
- %tmp5423 = getelementptr inbounds float* %tmp5422, i64 1
- %tmp5424 = getelementptr inbounds float* %tmp5423, i64 1
- %tmp5425 = getelementptr inbounds float* %tmp5424, i64 1
- %tmp5426 = getelementptr inbounds float* %tmp5425, i64 1
- %tmp5427 = getelementptr inbounds float* %tmp5426, i64 1
- %tmp5428 = getelementptr inbounds float* %tmp5427, i64 1
- %tmp5429 = getelementptr inbounds float* %tmp5428, i64 1
- %tmp5430 = getelementptr inbounds float* %tmp5429, i64 1
- %tmp5431 = getelementptr inbounds float* %tmp5430, i64 1
- %tmp5432 = getelementptr inbounds float* %tmp5431, i64 1
- %tmp5433 = getelementptr inbounds float* %tmp5432, i64 1
- %tmp5434 = getelementptr inbounds float* %tmp5433, i64 1
- %tmp5435 = getelementptr inbounds float* %tmp5434, i64 1
- %tmp5436 = getelementptr inbounds float* %tmp5435, i64 1
- %tmp5437 = getelementptr inbounds float* %tmp5436, i64 1
- %tmp5438 = getelementptr inbounds float* %tmp5437, i64 1
- %tmp5439 = getelementptr inbounds float* %tmp5438, i64 1
- %tmp5440 = getelementptr inbounds float* %tmp5439, i64 1
- %tmp5441 = getelementptr inbounds float* %tmp5440, i64 1
- %tmp5442 = getelementptr inbounds float* %tmp5441, i64 1
- %tmp5443 = getelementptr inbounds float* %tmp5442, i64 1
- %tmp5444 = getelementptr inbounds float* %tmp5443, i64 1
- %tmp5445 = getelementptr inbounds float* %tmp5444, i64 1
- %tmp5446 = getelementptr inbounds float* %tmp5445, i64 1
- %tmp5447 = getelementptr inbounds float* %tmp5446, i64 1
- %tmp5448 = getelementptr inbounds float* %tmp5447, i64 1
- %tmp5449 = getelementptr inbounds float* %tmp5448, i64 1
- %tmp5450 = getelementptr inbounds float* %tmp5449, i64 1
- %tmp5451 = getelementptr inbounds float* %tmp5450, i64 1
- %tmp5452 = getelementptr inbounds float* %tmp5451, i64 1
- %tmp5453 = getelementptr inbounds float* %tmp5452, i64 1
- %tmp5454 = getelementptr inbounds float* %tmp5453, i64 1
- %tmp5455 = getelementptr inbounds float* %tmp5454, i64 1
- %tmp5456 = getelementptr inbounds float* %tmp5455, i64 1
- %tmp5457 = getelementptr inbounds float* %tmp5456, i64 1
- %tmp5458 = getelementptr inbounds float* %tmp5457, i64 1
- %tmp5459 = getelementptr inbounds float* %tmp5458, i64 1
- %tmp5460 = getelementptr inbounds float* %tmp5459, i64 1
- %tmp5461 = getelementptr inbounds float* %tmp5460, i64 1
- %tmp5462 = getelementptr inbounds float* %tmp5461, i64 1
- %tmp5463 = getelementptr inbounds float* %tmp5462, i64 1
- %tmp5464 = getelementptr inbounds float* %tmp5463, i64 1
- %tmp5465 = getelementptr inbounds float* %tmp5464, i64 1
- %tmp5466 = getelementptr inbounds float* %tmp5465, i64 1
- %tmp5467 = getelementptr inbounds float* %tmp5466, i64 1
- %tmp5468 = getelementptr inbounds float* %tmp5467, i64 1
- %tmp5469 = getelementptr inbounds float* %tmp5468, i64 1
- %tmp5470 = getelementptr inbounds float* %tmp5469, i64 1
- %tmp5471 = getelementptr inbounds float* %tmp5470, i64 1
- %tmp5472 = getelementptr inbounds float* %tmp5471, i64 1
- %tmp5473 = getelementptr inbounds float* %tmp5472, i64 1
- %tmp5474 = getelementptr inbounds float* %tmp5473, i64 1
- %tmp5475 = getelementptr inbounds float* %tmp5474, i64 1
- %tmp5476 = getelementptr inbounds float* %tmp5475, i64 1
- %tmp5477 = getelementptr inbounds float* %tmp5476, i64 1
- %tmp5478 = getelementptr inbounds float* %tmp5477, i64 1
- %tmp5479 = getelementptr inbounds float* %tmp5478, i64 1
- %tmp5480 = getelementptr inbounds float* %tmp5479, i64 1
- %tmp5481 = getelementptr inbounds float* %tmp5480, i64 1
- %tmp5482 = getelementptr inbounds float* %tmp5481, i64 1
- %tmp5483 = getelementptr inbounds float* %tmp5482, i64 1
- %tmp5484 = getelementptr inbounds float* %tmp5483, i64 1
- %tmp5485 = getelementptr inbounds float* %tmp5484, i64 1
- %tmp5486 = getelementptr inbounds float* %tmp5485, i64 1
- %tmp5487 = getelementptr inbounds float* %tmp5486, i64 1
- %tmp5488 = getelementptr inbounds float* %tmp5487, i64 1
- %tmp5489 = getelementptr inbounds float* %tmp5488, i64 1
- %tmp5490 = getelementptr inbounds float* %tmp5489, i64 1
- %tmp5491 = getelementptr inbounds float* %tmp5490, i64 1
- %tmp5492 = getelementptr inbounds float* %tmp5491, i64 1
- %tmp5493 = getelementptr inbounds float* %tmp5492, i64 1
- %tmp5494 = getelementptr inbounds float* %tmp5493, i64 1
- %tmp5495 = getelementptr inbounds float* %tmp5494, i64 1
- %tmp5496 = getelementptr inbounds float* %tmp5495, i64 1
- %tmp5497 = getelementptr inbounds float* %tmp5496, i64 1
- %tmp5498 = getelementptr inbounds float* %tmp5497, i64 1
- %tmp5499 = getelementptr inbounds float* %tmp5498, i64 1
- %tmp5500 = getelementptr inbounds float* %tmp5499, i64 1
- %tmp5501 = getelementptr inbounds float* %tmp5500, i64 1
- %tmp5502 = getelementptr inbounds float* %tmp5501, i64 1
- %tmp5503 = getelementptr inbounds float* %tmp5502, i64 1
- %tmp5504 = getelementptr inbounds float* %tmp5503, i64 1
- %tmp5505 = getelementptr inbounds float* %tmp5504, i64 1
- %tmp5506 = getelementptr inbounds float* %tmp5505, i64 1
- %tmp5507 = getelementptr inbounds float* %tmp5506, i64 1
- %tmp5508 = getelementptr inbounds float* %tmp5507, i64 1
- %tmp5509 = getelementptr inbounds float* %tmp5508, i64 1
- %tmp5510 = getelementptr inbounds float* %tmp5509, i64 1
- %tmp5511 = getelementptr inbounds float* %tmp5510, i64 1
- %tmp5512 = getelementptr inbounds float* %tmp5511, i64 1
- %tmp5513 = getelementptr inbounds float* %tmp5512, i64 1
- %tmp5514 = getelementptr inbounds float* %tmp5513, i64 1
- %tmp5515 = getelementptr inbounds float* %tmp5514, i64 1
- %tmp5516 = getelementptr inbounds float* %tmp5515, i64 1
- %tmp5517 = getelementptr inbounds float* %tmp5516, i64 1
- %tmp5518 = getelementptr inbounds float* %tmp5517, i64 1
- %tmp5519 = getelementptr inbounds float* %tmp5518, i64 1
- %tmp5520 = getelementptr inbounds float* %tmp5519, i64 1
- %tmp5521 = getelementptr inbounds float* %tmp5520, i64 1
- %tmp5522 = getelementptr inbounds float* %tmp5521, i64 1
- %tmp5523 = getelementptr inbounds float* %tmp5522, i64 1
- %tmp5524 = getelementptr inbounds float* %tmp5523, i64 1
- %tmp5525 = getelementptr inbounds float* %tmp5524, i64 1
- %tmp5526 = getelementptr inbounds float* %tmp5525, i64 1
- %tmp5527 = getelementptr inbounds float* %tmp5526, i64 1
- %tmp5528 = getelementptr inbounds float* %tmp5527, i64 1
- %tmp5529 = getelementptr inbounds float* %tmp5528, i64 1
- %tmp5530 = getelementptr inbounds float* %tmp5529, i64 1
- %tmp5531 = getelementptr inbounds float* %tmp5530, i64 1
- %tmp5532 = getelementptr inbounds float* %tmp5531, i64 1
- %tmp5533 = getelementptr inbounds float* %tmp5532, i64 1
- %tmp5534 = getelementptr inbounds float* %tmp5533, i64 1
- %tmp5535 = getelementptr inbounds float* %tmp5534, i64 1
- %tmp5536 = getelementptr inbounds float* %tmp5535, i64 1
- %tmp5537 = getelementptr inbounds float* %tmp5536, i64 1
- %tmp5538 = getelementptr inbounds float* %tmp5537, i64 1
- %tmp5539 = getelementptr inbounds float* %tmp5538, i64 1
- %tmp5540 = getelementptr inbounds float* %tmp5539, i64 1
- %tmp5541 = getelementptr inbounds float* %tmp5540, i64 1
- %tmp5542 = getelementptr inbounds float* %tmp5541, i64 1
- %tmp5543 = getelementptr inbounds float* %tmp5542, i64 1
- %tmp5544 = getelementptr inbounds float* %tmp5543, i64 1
- %tmp5545 = getelementptr inbounds float* %tmp5544, i64 1
- %tmp5546 = getelementptr inbounds float* %tmp5545, i64 1
- %tmp5547 = getelementptr inbounds float* %tmp5546, i64 1
- %tmp5548 = getelementptr inbounds float* %tmp5547, i64 1
- %tmp5549 = getelementptr inbounds float* %tmp5548, i64 1
- %tmp5550 = getelementptr inbounds float* %tmp5549, i64 1
- %tmp5551 = getelementptr inbounds float* %tmp5550, i64 1
- %tmp5552 = getelementptr inbounds float* %tmp5551, i64 1
- %tmp5553 = getelementptr inbounds float* %tmp5552, i64 1
- %tmp5554 = getelementptr inbounds float* %tmp5553, i64 1
- %tmp5555 = getelementptr inbounds float* %tmp5554, i64 1
- %tmp5556 = getelementptr inbounds float* %tmp5555, i64 1
- %tmp5557 = getelementptr inbounds float* %tmp5556, i64 1
- %tmp5558 = getelementptr inbounds float* %tmp5557, i64 1
- %tmp5559 = getelementptr inbounds float* %tmp5558, i64 1
- %tmp5560 = getelementptr inbounds float* %tmp5559, i64 1
- %tmp5561 = getelementptr inbounds float* %tmp5560, i64 1
- %tmp5562 = getelementptr inbounds float* %tmp5561, i64 1
- %tmp5563 = getelementptr inbounds float* %tmp5562, i64 1
- %tmp5564 = getelementptr inbounds float* %tmp5563, i64 1
- %tmp5565 = getelementptr inbounds float* %tmp5564, i64 1
- %tmp5566 = getelementptr inbounds float* %tmp5565, i64 1
- %tmp5567 = getelementptr inbounds float* %tmp5566, i64 1
- %tmp5568 = getelementptr inbounds float* %tmp5567, i64 1
- %tmp5569 = getelementptr inbounds float* %tmp5568, i64 1
- %tmp5570 = getelementptr inbounds float* %tmp5569, i64 1
- %tmp5571 = getelementptr inbounds float* %tmp5570, i64 1
- %tmp5572 = getelementptr inbounds float* %tmp5571, i64 1
- %tmp5573 = getelementptr inbounds float* %tmp5572, i64 1
- %tmp5574 = getelementptr inbounds float* %tmp5573, i64 1
- %tmp5575 = getelementptr inbounds float* %tmp5574, i64 1
- %tmp5576 = getelementptr inbounds float* %tmp5575, i64 1
- %tmp5577 = getelementptr inbounds float* %tmp5576, i64 1
- %tmp5578 = getelementptr inbounds float* %tmp5577, i64 1
- %tmp5579 = getelementptr inbounds float* %tmp5578, i64 1
- %tmp5580 = getelementptr inbounds float* %tmp5579, i64 1
- %tmp5581 = getelementptr inbounds float* %tmp5580, i64 1
- %tmp5582 = getelementptr inbounds float* %tmp5581, i64 1
- %tmp5583 = getelementptr inbounds float* %tmp5582, i64 1
- %tmp5584 = getelementptr inbounds float* %tmp5583, i64 1
- %tmp5585 = getelementptr inbounds float* %tmp5584, i64 1
- %tmp5586 = getelementptr inbounds float* %tmp5585, i64 1
- %tmp5587 = getelementptr inbounds float* %tmp5586, i64 1
- %tmp5588 = getelementptr inbounds float* %tmp5587, i64 1
- %tmp5589 = getelementptr inbounds float* %tmp5588, i64 1
- %tmp5590 = getelementptr inbounds float* %tmp5589, i64 1
- %tmp5591 = getelementptr inbounds float* %tmp5590, i64 1
- %tmp5592 = getelementptr inbounds float* %tmp5591, i64 1
- %tmp5593 = getelementptr inbounds float* %tmp5592, i64 1
- %tmp5594 = getelementptr inbounds float* %tmp5593, i64 1
- %tmp5595 = getelementptr inbounds float* %tmp5594, i64 1
- %tmp5596 = getelementptr inbounds float* %tmp5595, i64 1
- %tmp5597 = getelementptr inbounds float* %tmp5596, i64 1
- %tmp5598 = getelementptr inbounds float* %tmp5597, i64 1
- %tmp5599 = getelementptr inbounds float* %tmp5598, i64 1
- %tmp5600 = getelementptr inbounds float* %tmp5599, i64 1
- %tmp5601 = getelementptr inbounds float* %tmp5600, i64 1
- %tmp5602 = getelementptr inbounds float* %tmp5601, i64 1
- %tmp5603 = getelementptr inbounds float* %tmp5602, i64 1
- %tmp5604 = getelementptr inbounds float* %tmp5603, i64 1
- %tmp5605 = getelementptr inbounds float* %tmp5604, i64 1
- %tmp5606 = getelementptr inbounds float* %tmp5605, i64 1
- %tmp5607 = getelementptr inbounds float* %tmp5606, i64 1
- %tmp5608 = getelementptr inbounds float* %tmp5607, i64 1
- %tmp5609 = getelementptr inbounds float* %tmp5608, i64 1
- %tmp5610 = getelementptr inbounds float* %tmp5609, i64 1
- %tmp5611 = getelementptr inbounds float* %tmp5610, i64 1
- %tmp5612 = getelementptr inbounds float* %tmp5611, i64 1
- %tmp5613 = getelementptr inbounds float* %tmp5612, i64 1
- %tmp5614 = getelementptr inbounds float* %tmp5613, i64 1
- %tmp5615 = getelementptr inbounds float* %tmp5614, i64 1
- %tmp5616 = getelementptr inbounds float* %tmp5615, i64 1
- %tmp5617 = getelementptr inbounds float* %tmp5616, i64 1
- %tmp5618 = getelementptr inbounds float* %tmp5617, i64 1
- %tmp5619 = getelementptr inbounds float* %tmp5618, i64 1
- %tmp5620 = getelementptr inbounds float* %tmp5619, i64 1
- %tmp5621 = getelementptr inbounds float* %tmp5620, i64 1
- %tmp5622 = getelementptr inbounds float* %tmp5621, i64 1
- %tmp5623 = getelementptr inbounds float* %tmp5622, i64 1
- %tmp5624 = getelementptr inbounds float* %tmp5623, i64 1
- %tmp5625 = getelementptr inbounds float* %tmp5624, i64 1
- %tmp5626 = getelementptr inbounds float* %tmp5625, i64 1
- %tmp5627 = getelementptr inbounds float* %tmp5626, i64 1
- %tmp5628 = getelementptr inbounds float* %tmp5627, i64 1
- %tmp5629 = getelementptr inbounds float* %tmp5628, i64 1
- %tmp5630 = getelementptr inbounds float* %tmp5629, i64 1
- %tmp5631 = getelementptr inbounds float* %tmp5630, i64 1
- %tmp5632 = getelementptr inbounds float* %tmp5631, i64 1
- %tmp5633 = getelementptr inbounds float* %tmp5632, i64 1
- %tmp5634 = getelementptr inbounds float* %tmp5633, i64 1
- %tmp5635 = getelementptr inbounds float* %tmp5634, i64 1
- %tmp5636 = getelementptr inbounds float* %tmp5635, i64 1
- %tmp5637 = getelementptr inbounds float* %tmp5636, i64 1
- %tmp5638 = getelementptr inbounds float* %tmp5637, i64 1
- %tmp5639 = getelementptr inbounds float* %tmp5638, i64 1
- %tmp5640 = getelementptr inbounds float* %tmp5639, i64 1
- %tmp5641 = getelementptr inbounds float* %tmp5640, i64 1
- %tmp5642 = getelementptr inbounds float* %tmp5641, i64 1
- %tmp5643 = getelementptr inbounds float* %tmp5642, i64 1
- %tmp5644 = getelementptr inbounds float* %tmp5643, i64 1
- %tmp5645 = getelementptr inbounds float* %tmp5644, i64 1
- %tmp5646 = getelementptr inbounds float* %tmp5645, i64 1
- %tmp5647 = getelementptr inbounds float* %tmp5646, i64 1
- %tmp5648 = getelementptr inbounds float* %tmp5647, i64 1
- %tmp5649 = getelementptr inbounds float* %tmp5648, i64 1
- %tmp5650 = getelementptr inbounds float* %tmp5649, i64 1
- %tmp5651 = getelementptr inbounds float* %tmp5650, i64 1
- %tmp5652 = getelementptr inbounds float* %tmp5651, i64 1
- %tmp5653 = getelementptr inbounds float* %tmp5652, i64 1
- %tmp5654 = getelementptr inbounds float* %tmp5653, i64 1
- %tmp5655 = getelementptr inbounds float* %tmp5654, i64 1
- %tmp5656 = getelementptr inbounds float* %tmp5655, i64 1
- %tmp5657 = getelementptr inbounds float* %tmp5656, i64 1
- %tmp5658 = getelementptr inbounds float* %tmp5657, i64 1
- %tmp5659 = getelementptr inbounds float* %tmp5658, i64 1
- %tmp5660 = getelementptr inbounds float* %tmp5659, i64 1
- %tmp5661 = getelementptr inbounds float* %tmp5660, i64 1
- %tmp5662 = getelementptr inbounds float* %tmp5661, i64 1
- %tmp5663 = getelementptr inbounds float* %tmp5662, i64 1
- %tmp5664 = getelementptr inbounds float* %tmp5663, i64 1
- %tmp5665 = getelementptr inbounds float* %tmp5664, i64 1
- %tmp5666 = getelementptr inbounds float* %tmp5665, i64 1
- %tmp5667 = getelementptr inbounds float* %tmp5666, i64 1
- %tmp5668 = getelementptr inbounds float* %tmp5667, i64 1
- %tmp5669 = getelementptr inbounds float* %tmp5668, i64 1
- %tmp5670 = getelementptr inbounds float* %tmp5669, i64 1
- %tmp5671 = getelementptr inbounds float* %tmp5670, i64 1
- %tmp5672 = getelementptr inbounds float* %tmp5671, i64 1
- %tmp5673 = getelementptr inbounds float* %tmp5672, i64 1
- %tmp5674 = getelementptr inbounds float* %tmp5673, i64 1
- %tmp5675 = getelementptr inbounds float* %tmp5674, i64 1
- %tmp5676 = getelementptr inbounds float* %tmp5675, i64 1
- %tmp5677 = getelementptr inbounds float* %tmp5676, i64 1
- %tmp5678 = getelementptr inbounds float* %tmp5677, i64 1
- %tmp5679 = getelementptr inbounds float* %tmp5678, i64 1
- %tmp5680 = getelementptr inbounds float* %tmp5679, i64 1
- %tmp5681 = getelementptr inbounds float* %tmp5680, i64 1
- %tmp5682 = getelementptr inbounds float* %tmp5681, i64 1
- %tmp5683 = getelementptr inbounds float* %tmp5682, i64 1
- %tmp5684 = getelementptr inbounds float* %tmp5683, i64 1
- %tmp5685 = getelementptr inbounds float* %tmp5684, i64 1
- %tmp5686 = getelementptr inbounds float* %tmp5685, i64 1
- %tmp5687 = getelementptr inbounds float* %tmp5686, i64 1
- %tmp5688 = getelementptr inbounds float* %tmp5687, i64 1
- %tmp5689 = getelementptr inbounds float* %tmp5688, i64 1
- %tmp5690 = getelementptr inbounds float* %tmp5689, i64 1
- %tmp5691 = getelementptr inbounds float* %tmp5690, i64 1
- %tmp5692 = getelementptr inbounds float* %tmp5691, i64 1
- %tmp5693 = getelementptr inbounds float* %tmp5692, i64 1
- %tmp5694 = getelementptr inbounds float* %tmp5693, i64 1
- %tmp5695 = getelementptr inbounds float* %tmp5694, i64 1
- %tmp5696 = getelementptr inbounds float* %tmp5695, i64 1
- %tmp5697 = getelementptr inbounds float* %tmp5696, i64 1
- %tmp5698 = getelementptr inbounds float* %tmp5697, i64 1
- %tmp5699 = getelementptr inbounds float* %tmp5698, i64 1
- %tmp5700 = getelementptr inbounds float* %tmp5699, i64 1
- %tmp5701 = getelementptr inbounds float* %tmp5700, i64 1
- %tmp5702 = getelementptr inbounds float* %tmp5701, i64 1
- %tmp5703 = getelementptr inbounds float* %tmp5702, i64 1
- %tmp5704 = getelementptr inbounds float* %tmp5703, i64 1
- %tmp5705 = getelementptr inbounds float* %tmp5704, i64 1
- %tmp5706 = getelementptr inbounds float* %tmp5705, i64 1
- %tmp5707 = getelementptr inbounds float* %tmp5706, i64 1
- %tmp5708 = getelementptr inbounds float* %tmp5707, i64 1
- %tmp5709 = getelementptr inbounds float* %tmp5708, i64 1
- %tmp5710 = getelementptr inbounds float* %tmp5709, i64 1
- %tmp5711 = getelementptr inbounds float* %tmp5710, i64 1
- %tmp5712 = getelementptr inbounds float* %tmp5711, i64 1
- %tmp5713 = getelementptr inbounds float* %tmp5712, i64 1
- %tmp5714 = getelementptr inbounds float* %tmp5713, i64 1
- %tmp5715 = getelementptr inbounds float* %tmp5714, i64 1
- %tmp5716 = getelementptr inbounds float* %tmp5715, i64 1
- %tmp5717 = getelementptr inbounds float* %tmp5716, i64 1
- %tmp5718 = getelementptr inbounds float* %tmp5717, i64 1
- %tmp5719 = getelementptr inbounds float* %tmp5718, i64 1
- %tmp5720 = getelementptr inbounds float* %tmp5719, i64 1
- %tmp5721 = getelementptr inbounds float* %tmp5720, i64 1
- %tmp5722 = getelementptr inbounds float* %tmp5721, i64 1
- %tmp5723 = getelementptr inbounds float* %tmp5722, i64 1
- %tmp5724 = getelementptr inbounds float* %tmp5723, i64 1
- %tmp5725 = getelementptr inbounds float* %tmp5724, i64 1
- %tmp5726 = getelementptr inbounds float* %tmp5725, i64 1
- %tmp5727 = getelementptr inbounds float* %tmp5726, i64 1
- %tmp5728 = getelementptr inbounds float* %tmp5727, i64 1
- %tmp5729 = getelementptr inbounds float* %tmp5728, i64 1
- %tmp5730 = getelementptr inbounds float* %tmp5729, i64 1
- %tmp5731 = getelementptr inbounds float* %tmp5730, i64 1
- %tmp5732 = getelementptr inbounds float* %tmp5731, i64 1
- %tmp5733 = getelementptr inbounds float* %tmp5732, i64 1
- %tmp5734 = getelementptr inbounds float* %tmp5733, i64 1
- %tmp5735 = getelementptr inbounds float* %tmp5734, i64 1
- %tmp5736 = getelementptr inbounds float* %tmp5735, i64 1
- %tmp5737 = getelementptr inbounds float* %tmp5736, i64 1
- %tmp5738 = getelementptr inbounds float* %tmp5737, i64 1
- %tmp5739 = getelementptr inbounds float* %tmp5738, i64 1
- %tmp5740 = getelementptr inbounds float* %tmp5739, i64 1
- %tmp5741 = getelementptr inbounds float* %tmp5740, i64 1
- %tmp5742 = getelementptr inbounds float* %tmp5741, i64 1
- %tmp5743 = getelementptr inbounds float* %tmp5742, i64 1
- %tmp5744 = getelementptr inbounds float* %tmp5743, i64 1
- %tmp5745 = getelementptr inbounds float* %tmp5744, i64 1
- %tmp5746 = getelementptr inbounds float* %tmp5745, i64 1
- %tmp5747 = getelementptr inbounds float* %tmp5746, i64 1
- %tmp5748 = getelementptr inbounds float* %tmp5747, i64 1
- %tmp5749 = getelementptr inbounds float* %tmp5748, i64 1
- %tmp5750 = getelementptr inbounds float* %tmp5749, i64 1
- %tmp5751 = getelementptr inbounds float* %tmp5750, i64 1
- %tmp5752 = getelementptr inbounds float* %tmp5751, i64 1
- %tmp5753 = getelementptr inbounds float* %tmp5752, i64 1
- %tmp5754 = getelementptr inbounds float* %tmp5753, i64 1
- %tmp5755 = getelementptr inbounds float* %tmp5754, i64 1
- %tmp5756 = getelementptr inbounds float* %tmp5755, i64 1
- %tmp5757 = getelementptr inbounds float* %tmp5756, i64 1
- %tmp5758 = getelementptr inbounds float* %tmp5757, i64 1
- %tmp5759 = getelementptr inbounds float* %tmp5758, i64 1
- %tmp5760 = getelementptr inbounds float* %tmp5759, i64 1
- %tmp5761 = getelementptr inbounds float* %tmp5760, i64 1
- %tmp5762 = getelementptr inbounds float* %tmp5761, i64 1
- %tmp5763 = getelementptr inbounds float* %tmp5762, i64 1
- %tmp5764 = getelementptr inbounds float* %tmp5763, i64 1
- %tmp5765 = getelementptr inbounds float* %tmp5764, i64 1
- %tmp5766 = getelementptr inbounds float* %tmp5765, i64 1
- %tmp5767 = getelementptr inbounds float* %tmp5766, i64 1
- %tmp5768 = getelementptr inbounds float* %tmp5767, i64 1
- %tmp5769 = getelementptr inbounds float* %tmp5768, i64 1
- %tmp5770 = getelementptr inbounds float* %tmp5769, i64 1
- %tmp5771 = getelementptr inbounds float* %tmp5770, i64 1
- %tmp5772 = getelementptr inbounds float* %tmp5771, i64 1
- %tmp5773 = getelementptr inbounds float* %tmp5772, i64 1
- %tmp5774 = getelementptr inbounds float* %tmp5773, i64 1
- %tmp5775 = getelementptr inbounds float* %tmp5774, i64 1
- %tmp5776 = getelementptr inbounds float* %tmp5775, i64 1
- %tmp5777 = getelementptr inbounds float* %tmp5776, i64 1
- %tmp5778 = getelementptr inbounds float* %tmp5777, i64 1
- %tmp5779 = getelementptr inbounds float* %tmp5778, i64 1
- %tmp5780 = getelementptr inbounds float* %tmp5779, i64 1
- %tmp5781 = getelementptr inbounds float* %tmp5780, i64 1
- %tmp5782 = getelementptr inbounds float* %tmp5781, i64 1
- %tmp5783 = getelementptr inbounds float* %tmp5782, i64 1
- %tmp5784 = getelementptr inbounds float* %tmp5783, i64 1
- %tmp5785 = getelementptr inbounds float* %tmp5784, i64 1
- %tmp5786 = getelementptr inbounds float* %tmp5785, i64 1
- %tmp5787 = getelementptr inbounds float* %tmp5786, i64 1
- %tmp5788 = getelementptr inbounds float* %tmp5787, i64 1
- %tmp5789 = getelementptr inbounds float* %tmp5788, i64 1
- %tmp5790 = getelementptr inbounds float* %tmp5789, i64 1
- %tmp5791 = getelementptr inbounds float* %tmp5790, i64 1
- %tmp5792 = getelementptr inbounds float* %tmp5791, i64 1
- %tmp5793 = getelementptr inbounds float* %tmp5792, i64 1
- %tmp5794 = getelementptr inbounds float* %tmp5793, i64 1
- %tmp5795 = getelementptr inbounds float* %tmp5794, i64 1
- %tmp5796 = getelementptr inbounds float* %tmp5795, i64 1
- %tmp5797 = getelementptr inbounds float* %tmp5796, i64 1
- %tmp5798 = getelementptr inbounds float* %tmp5797, i64 1
- %tmp5799 = getelementptr inbounds float* %tmp5798, i64 1
- %tmp5800 = getelementptr inbounds float* %tmp5799, i64 1
- %tmp5801 = getelementptr inbounds float* %tmp5800, i64 1
- %tmp5802 = getelementptr inbounds float* %tmp5801, i64 1
- %tmp5803 = getelementptr inbounds float* %tmp5802, i64 1
- %tmp5804 = getelementptr inbounds float* %tmp5803, i64 1
- %tmp5805 = getelementptr inbounds float* %tmp5804, i64 1
- %tmp5806 = getelementptr inbounds float* %tmp5805, i64 1
- %tmp5807 = getelementptr inbounds float* %tmp5806, i64 1
- %tmp5808 = getelementptr inbounds float* %tmp5807, i64 1
- %tmp5809 = getelementptr inbounds float* %tmp5808, i64 1
- %tmp5810 = getelementptr inbounds float* %tmp5809, i64 1
- %tmp5811 = getelementptr inbounds float* %tmp5810, i64 1
- %tmp5812 = getelementptr inbounds float* %tmp5811, i64 1
- %tmp5813 = getelementptr inbounds float* %tmp5812, i64 1
- %tmp5814 = getelementptr inbounds float* %tmp5813, i64 1
- %tmp5815 = getelementptr inbounds float* %tmp5814, i64 1
- %tmp5816 = getelementptr inbounds float* %tmp5815, i64 1
- %tmp5817 = getelementptr inbounds float* %tmp5816, i64 1
- %tmp5818 = getelementptr inbounds float* %tmp5817, i64 1
- %tmp5819 = getelementptr inbounds float* %tmp5818, i64 1
- %tmp5820 = getelementptr inbounds float* %tmp5819, i64 1
- %tmp5821 = getelementptr inbounds float* %tmp5820, i64 1
- %tmp5822 = getelementptr inbounds float* %tmp5821, i64 1
- %tmp5823 = getelementptr inbounds float* %tmp5822, i64 1
- %tmp5824 = getelementptr inbounds float* %tmp5823, i64 1
- %tmp5825 = getelementptr inbounds float* %tmp5824, i64 1
- %tmp5826 = getelementptr inbounds float* %tmp5825, i64 1
- %tmp5827 = getelementptr inbounds float* %tmp5826, i64 1
- %tmp5828 = getelementptr inbounds float* %tmp5827, i64 1
- %tmp5829 = getelementptr inbounds float* %tmp5828, i64 1
- %tmp5830 = getelementptr inbounds float* %tmp5829, i64 1
- %tmp5831 = getelementptr inbounds float* %tmp5830, i64 1
- %tmp5832 = getelementptr inbounds float* %tmp5831, i64 1
- %tmp5833 = getelementptr inbounds float* %tmp5832, i64 1
- %tmp5834 = getelementptr inbounds float* %tmp5833, i64 1
- %tmp5835 = getelementptr inbounds float* %tmp5834, i64 1
- %tmp5836 = getelementptr inbounds float* %tmp5835, i64 1
- %tmp5837 = getelementptr inbounds float* %tmp5836, i64 1
- %tmp5838 = getelementptr inbounds float* %tmp5837, i64 1
- %tmp5839 = getelementptr inbounds float* %tmp5838, i64 1
- %tmp5840 = getelementptr inbounds float* %tmp5839, i64 1
- %tmp5841 = getelementptr inbounds float* %tmp5840, i64 1
- %tmp5842 = getelementptr inbounds float* %tmp5841, i64 1
- %tmp5843 = getelementptr inbounds float* %tmp5842, i64 1
- %tmp5844 = getelementptr inbounds float* %tmp5843, i64 1
- %tmp5845 = getelementptr inbounds float* %tmp5844, i64 1
- %tmp5846 = getelementptr inbounds float* %tmp5845, i64 1
- %tmp5847 = getelementptr inbounds float* %tmp5846, i64 1
- %tmp5848 = getelementptr inbounds float* %tmp5847, i64 1
- %tmp5849 = getelementptr inbounds float* %tmp5848, i64 1
- %tmp5850 = getelementptr inbounds float* %tmp5849, i64 1
- %tmp5851 = getelementptr inbounds float* %tmp5850, i64 1
- %tmp5852 = getelementptr inbounds float* %tmp5851, i64 1
- %tmp5853 = getelementptr inbounds float* %tmp5852, i64 1
- %tmp5854 = getelementptr inbounds float* %tmp5853, i64 1
- %tmp5855 = getelementptr inbounds float* %tmp5854, i64 1
- %tmp5856 = getelementptr inbounds float* %tmp5855, i64 1
- %tmp5857 = getelementptr inbounds float* %tmp5856, i64 1
- %tmp5858 = getelementptr inbounds float* %tmp5857, i64 1
- %tmp5859 = getelementptr inbounds float* %tmp5858, i64 1
- %tmp5860 = getelementptr inbounds float* %tmp5859, i64 1
- %tmp5861 = getelementptr inbounds float* %tmp5860, i64 1
- %tmp5862 = getelementptr inbounds float* %tmp5861, i64 1
- %tmp5863 = getelementptr inbounds float* %tmp5862, i64 1
- %tmp5864 = getelementptr inbounds float* %tmp5863, i64 1
- %tmp5865 = getelementptr inbounds float* %tmp5864, i64 1
- %tmp5866 = getelementptr inbounds float* %tmp5865, i64 1
- %tmp5867 = getelementptr inbounds float* %tmp5866, i64 1
- %tmp5868 = getelementptr inbounds float* %tmp5867, i64 1
- %tmp5869 = getelementptr inbounds float* %tmp5868, i64 1
- %tmp5870 = getelementptr inbounds float* %tmp5869, i64 1
- %tmp5871 = getelementptr inbounds float* %tmp5870, i64 1
- %tmp5872 = getelementptr inbounds float* %tmp5871, i64 1
- %tmp5873 = getelementptr inbounds float* %tmp5872, i64 1
- %tmp5874 = getelementptr inbounds float* %tmp5873, i64 1
- %tmp5875 = getelementptr inbounds float* %tmp5874, i64 1
- %tmp5876 = getelementptr inbounds float* %tmp5875, i64 1
- %tmp5877 = getelementptr inbounds float* %tmp5876, i64 1
- %tmp5878 = getelementptr inbounds float* %tmp5877, i64 1
- %tmp5879 = getelementptr inbounds float* %tmp5878, i64 1
- %tmp5880 = getelementptr inbounds float* %tmp5879, i64 1
- %tmp5881 = getelementptr inbounds float* %tmp5880, i64 1
- %tmp5882 = getelementptr inbounds float* %tmp5881, i64 1
- %tmp5883 = getelementptr inbounds float* %tmp5882, i64 1
- %tmp5884 = getelementptr inbounds float* %tmp5883, i64 1
- %tmp5885 = getelementptr inbounds float* %tmp5884, i64 1
- %tmp5886 = getelementptr inbounds float* %tmp5885, i64 1
- %tmp5887 = getelementptr inbounds float* %tmp5886, i64 1
- %tmp5888 = getelementptr inbounds float* %tmp5887, i64 1
- %tmp5889 = getelementptr inbounds float* %tmp5888, i64 1
- %tmp5890 = getelementptr inbounds float* %tmp5889, i64 1
- %tmp5891 = getelementptr inbounds float* %tmp5890, i64 1
- %tmp5892 = getelementptr inbounds float* %tmp5891, i64 1
- %tmp5893 = getelementptr inbounds float* %tmp5892, i64 1
- %tmp5894 = getelementptr inbounds float* %tmp5893, i64 1
- %tmp5895 = getelementptr inbounds float* %tmp5894, i64 1
- %tmp5896 = getelementptr inbounds float* %tmp5895, i64 1
- %tmp5897 = getelementptr inbounds float* %tmp5896, i64 1
- %tmp5898 = getelementptr inbounds float* %tmp5897, i64 1
- %tmp5899 = getelementptr inbounds float* %tmp5898, i64 1
- %tmp5900 = getelementptr inbounds float* %tmp5899, i64 1
- %tmp5901 = getelementptr inbounds float* %tmp5900, i64 1
- %tmp5902 = getelementptr inbounds float* %tmp5901, i64 1
- %tmp5903 = getelementptr inbounds float* %tmp5902, i64 1
- %tmp5904 = getelementptr inbounds float* %tmp5903, i64 1
- %tmp5905 = getelementptr inbounds float* %tmp5904, i64 1
- %tmp5906 = getelementptr inbounds float* %tmp5905, i64 1
- %tmp5907 = getelementptr inbounds float* %tmp5906, i64 1
- %tmp5908 = getelementptr inbounds float* %tmp5907, i64 1
- %tmp5909 = getelementptr inbounds float* %tmp5908, i64 1
- %tmp5910 = getelementptr inbounds float* %tmp5909, i64 1
- %tmp5911 = getelementptr inbounds float* %tmp5910, i64 1
- %tmp5912 = getelementptr inbounds float* %tmp5911, i64 1
- %tmp5913 = getelementptr inbounds float* %tmp5912, i64 1
- %tmp5914 = getelementptr inbounds float* %tmp5913, i64 1
- %tmp5915 = getelementptr inbounds float* %tmp5914, i64 1
- %tmp5916 = getelementptr inbounds float* %tmp5915, i64 1
- %tmp5917 = getelementptr inbounds float* %tmp5916, i64 1
- %tmp5918 = getelementptr inbounds float* %tmp5917, i64 1
- %tmp5919 = getelementptr inbounds float* %tmp5918, i64 1
- %tmp5920 = getelementptr inbounds float* %tmp5919, i64 1
- %tmp5921 = getelementptr inbounds float* %tmp5920, i64 1
- %tmp5922 = getelementptr inbounds float* %tmp5921, i64 1
- %tmp5923 = getelementptr inbounds float* %tmp5922, i64 1
- %tmp5924 = getelementptr inbounds float* %tmp5923, i64 1
- %tmp5925 = getelementptr inbounds float* %tmp5924, i64 1
- %tmp5926 = getelementptr inbounds float* %tmp5925, i64 1
- %tmp5927 = getelementptr inbounds float* %tmp5926, i64 1
- %tmp5928 = getelementptr inbounds float* %tmp5927, i64 1
- %tmp5929 = getelementptr inbounds float* %tmp5928, i64 1
- %tmp5930 = getelementptr inbounds float* %tmp5929, i64 1
- %tmp5931 = getelementptr inbounds float* %tmp5930, i64 1
- %tmp5932 = getelementptr inbounds float* %tmp5931, i64 1
- %tmp5933 = getelementptr inbounds float* %tmp5932, i64 1
- %tmp5934 = getelementptr inbounds float* %tmp5933, i64 1
- %tmp5935 = getelementptr inbounds float* %tmp5934, i64 1
- %tmp5936 = getelementptr inbounds float* %tmp5935, i64 1
- %tmp5937 = getelementptr inbounds float* %tmp5936, i64 1
- %tmp5938 = getelementptr inbounds float* %tmp5937, i64 1
- %tmp5939 = getelementptr inbounds float* %tmp5938, i64 1
- %tmp5940 = getelementptr inbounds float* %tmp5939, i64 1
- %tmp5941 = getelementptr inbounds float* %tmp5940, i64 1
- %tmp5942 = getelementptr inbounds float* %tmp5941, i64 1
- %tmp5943 = getelementptr inbounds float* %tmp5942, i64 1
- %tmp5944 = getelementptr inbounds float* %tmp5943, i64 1
- %tmp5945 = getelementptr inbounds float* %tmp5944, i64 1
- %tmp5946 = getelementptr inbounds float* %tmp5945, i64 1
- %tmp5947 = getelementptr inbounds float* %tmp5946, i64 1
- %tmp5948 = getelementptr inbounds float* %tmp5947, i64 1
- %tmp5949 = getelementptr inbounds float* %tmp5948, i64 1
- %tmp5950 = getelementptr inbounds float* %tmp5949, i64 1
- %tmp5951 = getelementptr inbounds float* %tmp5950, i64 1
- %tmp5952 = getelementptr inbounds float* %tmp5951, i64 1
- %tmp5953 = getelementptr inbounds float* %tmp5952, i64 1
- %tmp5954 = getelementptr inbounds float* %tmp5953, i64 1
- %tmp5955 = getelementptr inbounds float* %tmp5954, i64 1
- %tmp5956 = getelementptr inbounds float* %tmp5955, i64 1
- %tmp5957 = getelementptr inbounds float* %tmp5956, i64 1
- %tmp5958 = getelementptr inbounds float* %tmp5957, i64 1
- %tmp5959 = getelementptr inbounds float* %tmp5958, i64 1
- %tmp5960 = getelementptr inbounds float* %tmp5959, i64 1
- %tmp5961 = getelementptr inbounds float* %tmp5960, i64 1
- %tmp5962 = getelementptr inbounds float* %tmp5961, i64 1
- %tmp5963 = getelementptr inbounds float* %tmp5962, i64 1
- %tmp5964 = getelementptr inbounds float* %tmp5963, i64 1
- %tmp5965 = getelementptr inbounds float* %tmp5964, i64 1
- %tmp5966 = getelementptr inbounds float* %tmp5965, i64 1
- %tmp5967 = getelementptr inbounds float* %tmp5966, i64 1
- %tmp5968 = getelementptr inbounds float* %tmp5967, i64 1
- %tmp5969 = getelementptr inbounds float* %tmp5968, i64 1
- %tmp5970 = getelementptr inbounds float* %tmp5969, i64 1
- %tmp5971 = getelementptr inbounds float* %tmp5970, i64 1
- %tmp5972 = getelementptr inbounds float* %tmp5971, i64 1
- %tmp5973 = getelementptr inbounds float* %tmp5972, i64 1
- %tmp5974 = getelementptr inbounds float* %tmp5973, i64 1
- %tmp5975 = getelementptr inbounds float* %tmp5974, i64 1
- %tmp5976 = getelementptr inbounds float* %tmp5975, i64 1
- %tmp5977 = getelementptr inbounds float* %tmp5976, i64 1
- %tmp5978 = getelementptr inbounds float* %tmp5977, i64 1
- %tmp5979 = getelementptr inbounds float* %tmp5978, i64 1
- %tmp5980 = getelementptr inbounds float* %tmp5979, i64 1
- %tmp5981 = getelementptr inbounds float* %tmp5980, i64 1
- %tmp5982 = getelementptr inbounds float* %tmp5981, i64 1
- %tmp5983 = getelementptr inbounds float* %tmp5982, i64 1
- %tmp5984 = getelementptr inbounds float* %tmp5983, i64 1
- %tmp5985 = getelementptr inbounds float* %tmp5984, i64 1
- %tmp5986 = getelementptr inbounds float* %tmp5985, i64 1
- %tmp5987 = getelementptr inbounds float* %tmp5986, i64 1
- %tmp5988 = getelementptr inbounds float* %tmp5987, i64 1
- %tmp5989 = getelementptr inbounds float* %tmp5988, i64 1
- %tmp5990 = getelementptr inbounds float* %tmp5989, i64 1
- %tmp5991 = getelementptr inbounds float* %tmp5990, i64 1
- %tmp5992 = getelementptr inbounds float* %tmp5991, i64 1
- %tmp5993 = getelementptr inbounds float* %tmp5992, i64 1
- %tmp5994 = getelementptr inbounds float* %tmp5993, i64 1
- %tmp5995 = getelementptr inbounds float* %tmp5994, i64 1
- %tmp5996 = getelementptr inbounds float* %tmp5995, i64 1
- %tmp5997 = getelementptr inbounds float* %tmp5996, i64 1
- %tmp5998 = getelementptr inbounds float* %tmp5997, i64 1
- %tmp5999 = getelementptr inbounds float* %tmp5998, i64 1
- %tmp6000 = getelementptr inbounds float* %tmp5999, i64 1
- %tmp6001 = getelementptr inbounds float* %tmp6000, i64 1
- %tmp6002 = getelementptr inbounds float* %tmp6001, i64 1
- %tmp6003 = getelementptr inbounds float* %tmp6002, i64 1
- %tmp6004 = getelementptr inbounds float* %tmp6003, i64 1
- %tmp6005 = getelementptr inbounds float* %tmp6004, i64 1
- %tmp6006 = getelementptr inbounds float* %tmp6005, i64 1
- %tmp6007 = getelementptr inbounds float* %tmp6006, i64 1
- %tmp6008 = getelementptr inbounds float* %tmp6007, i64 1
- %tmp6009 = getelementptr inbounds float* %tmp6008, i64 1
- %tmp6010 = getelementptr inbounds float* %tmp6009, i64 1
- %tmp6011 = getelementptr inbounds float* %tmp6010, i64 1
- %tmp6012 = getelementptr inbounds float* %tmp6011, i64 1
- %tmp6013 = getelementptr inbounds float* %tmp6012, i64 1
- %tmp6014 = getelementptr inbounds float* %tmp6013, i64 1
- %tmp6015 = getelementptr inbounds float* %tmp6014, i64 1
- %tmp6016 = getelementptr inbounds float* %tmp6015, i64 1
- %tmp6017 = getelementptr inbounds float* %tmp6016, i64 1
- %tmp6018 = getelementptr inbounds float* %tmp6017, i64 1
- %tmp6019 = getelementptr inbounds float* %tmp6018, i64 1
- %tmp6020 = getelementptr inbounds float* %tmp6019, i64 1
- %tmp6021 = getelementptr inbounds float* %tmp6020, i64 1
- %tmp6022 = getelementptr inbounds float* %tmp6021, i64 1
- %tmp6023 = getelementptr inbounds float* %tmp6022, i64 1
- %tmp6024 = getelementptr inbounds float* %tmp6023, i64 1
- %tmp6025 = getelementptr inbounds float* %tmp6024, i64 1
- %tmp6026 = getelementptr inbounds float* %tmp6025, i64 1
- %tmp6027 = getelementptr inbounds float* %tmp6026, i64 1
- %tmp6028 = getelementptr inbounds float* %tmp6027, i64 1
- %tmp6029 = getelementptr inbounds float* %tmp6028, i64 1
- %tmp6030 = getelementptr inbounds float* %tmp6029, i64 1
- %tmp6031 = getelementptr inbounds float* %tmp6030, i64 1
- %tmp6032 = getelementptr inbounds float* %tmp6031, i64 1
- %tmp6033 = getelementptr inbounds float* %tmp6032, i64 1
- %tmp6034 = getelementptr inbounds float* %tmp6033, i64 1
- %tmp6035 = getelementptr inbounds float* %tmp6034, i64 1
- %tmp6036 = getelementptr inbounds float* %tmp6035, i64 1
- %tmp6037 = getelementptr inbounds float* %tmp6036, i64 1
- %tmp6038 = getelementptr inbounds float* %tmp6037, i64 1
- %tmp6039 = getelementptr inbounds float* %tmp6038, i64 1
- %tmp6040 = getelementptr inbounds float* %tmp6039, i64 1
- %tmp6041 = getelementptr inbounds float* %tmp6040, i64 1
- %tmp6042 = getelementptr inbounds float* %tmp6041, i64 1
- %tmp6043 = getelementptr inbounds float* %tmp6042, i64 1
- %tmp6044 = getelementptr inbounds float* %tmp6043, i64 1
- %tmp6045 = getelementptr inbounds float* %tmp6044, i64 1
- %tmp6046 = getelementptr inbounds float* %tmp6045, i64 1
- %tmp6047 = getelementptr inbounds float* %tmp6046, i64 1
- %tmp6048 = getelementptr inbounds float* %tmp6047, i64 1
- %tmp6049 = getelementptr inbounds float* %tmp6048, i64 1
- %tmp6050 = getelementptr inbounds float* %tmp6049, i64 1
- %tmp6051 = getelementptr inbounds float* %tmp6050, i64 1
- %tmp6052 = getelementptr inbounds float* %tmp6051, i64 1
- %tmp6053 = getelementptr inbounds float* %tmp6052, i64 1
- %tmp6054 = getelementptr inbounds float* %tmp6053, i64 1
- %tmp6055 = getelementptr inbounds float* %tmp6054, i64 1
- %tmp6056 = getelementptr inbounds float* %tmp6055, i64 1
- %tmp6057 = getelementptr inbounds float* %tmp6056, i64 1
- %tmp6058 = getelementptr inbounds float* %tmp6057, i64 1
- %tmp6059 = getelementptr inbounds float* %tmp6058, i64 1
- %tmp6060 = getelementptr inbounds float* %tmp6059, i64 1
- %tmp6061 = getelementptr inbounds float* %tmp6060, i64 1
- %tmp6062 = getelementptr inbounds float* %tmp6061, i64 1
- %tmp6063 = getelementptr inbounds float* %tmp6062, i64 1
- %tmp6064 = getelementptr inbounds float* %tmp6063, i64 1
- %tmp6065 = getelementptr inbounds float* %tmp6064, i64 1
- %tmp6066 = getelementptr inbounds float* %tmp6065, i64 1
- %tmp6067 = getelementptr inbounds float* %tmp6066, i64 1
- %tmp6068 = getelementptr inbounds float* %tmp6067, i64 1
- %tmp6069 = getelementptr inbounds float* %tmp6068, i64 1
- %tmp6070 = getelementptr inbounds float* %tmp6069, i64 1
- %tmp6071 = getelementptr inbounds float* %tmp6070, i64 1
- %tmp6072 = getelementptr inbounds float* %tmp6071, i64 1
- %tmp6073 = getelementptr inbounds float* %tmp6072, i64 1
- %tmp6074 = getelementptr inbounds float* %tmp6073, i64 1
- %tmp6075 = getelementptr inbounds float* %tmp6074, i64 1
- %tmp6076 = getelementptr inbounds float* %tmp6075, i64 1
- %tmp6077 = getelementptr inbounds float* %tmp6076, i64 1
- %tmp6078 = getelementptr inbounds float* %tmp6077, i64 1
- %tmp6079 = getelementptr inbounds float* %tmp6078, i64 1
- %tmp6080 = getelementptr inbounds float* %tmp6079, i64 1
- %tmp6081 = getelementptr inbounds float* %tmp6080, i64 1
- %tmp6082 = getelementptr inbounds float* %tmp6081, i64 1
- %tmp6083 = getelementptr inbounds float* %tmp6082, i64 1
- %tmp6084 = getelementptr inbounds float* %tmp6083, i64 1
- %tmp6085 = getelementptr inbounds float* %tmp6084, i64 1
- %tmp6086 = getelementptr inbounds float* %tmp6085, i64 1
- %tmp6087 = getelementptr inbounds float* %tmp6086, i64 1
- %tmp6088 = getelementptr inbounds float* %tmp6087, i64 1
- %tmp6089 = getelementptr inbounds float* %tmp6088, i64 1
- %tmp6090 = getelementptr inbounds float* %tmp6089, i64 1
- %tmp6091 = getelementptr inbounds float* %tmp6090, i64 1
- %tmp6092 = getelementptr inbounds float* %tmp6091, i64 1
- %tmp6093 = getelementptr inbounds float* %tmp6092, i64 1
- %tmp6094 = getelementptr inbounds float* %tmp6093, i64 1
- %tmp6095 = getelementptr inbounds float* %tmp6094, i64 1
- %tmp6096 = getelementptr inbounds float* %tmp6095, i64 1
- %tmp6097 = getelementptr inbounds float* %tmp6096, i64 1
- %tmp6098 = getelementptr inbounds float* %tmp6097, i64 1
- %tmp6099 = getelementptr inbounds float* %tmp6098, i64 1
- %tmp6100 = getelementptr inbounds float* %tmp6099, i64 1
- %tmp6101 = getelementptr inbounds float* %tmp6100, i64 1
- %tmp6102 = getelementptr inbounds float* %tmp6101, i64 1
- %tmp6103 = getelementptr inbounds float* %tmp6102, i64 1
- %tmp6104 = getelementptr inbounds float* %tmp6103, i64 1
- %tmp6105 = getelementptr inbounds float* %tmp6104, i64 1
- %tmp6106 = getelementptr inbounds float* %tmp6105, i64 1
- %tmp6107 = getelementptr inbounds float* %tmp6106, i64 1
- %tmp6108 = getelementptr inbounds float* %tmp6107, i64 1
- %tmp6109 = getelementptr inbounds float* %tmp6108, i64 1
- %tmp6110 = getelementptr inbounds float* %tmp6109, i64 1
- %tmp6111 = getelementptr inbounds float* %tmp6110, i64 1
- %tmp6112 = getelementptr inbounds float* %tmp6111, i64 1
- %tmp6113 = getelementptr inbounds float* %tmp6112, i64 1
- %tmp6114 = getelementptr inbounds float* %tmp6113, i64 1
- %tmp6115 = getelementptr inbounds float* %tmp6114, i64 1
- %tmp6116 = getelementptr inbounds float* %tmp6115, i64 1
- %tmp6117 = getelementptr inbounds float* %tmp6116, i64 1
- %tmp6118 = getelementptr inbounds float* %tmp6117, i64 1
- %tmp6119 = getelementptr inbounds float* %tmp6118, i64 1
- %tmp6120 = getelementptr inbounds float* %tmp6119, i64 1
- %tmp6121 = getelementptr inbounds float* %tmp6120, i64 1
- %tmp6122 = getelementptr inbounds float* %tmp6121, i64 1
- %tmp6123 = getelementptr inbounds float* %tmp6122, i64 1
- %tmp6124 = getelementptr inbounds float* %tmp6123, i64 1
- %tmp6125 = getelementptr inbounds float* %tmp6124, i64 1
- %tmp6126 = getelementptr inbounds float* %tmp6125, i64 1
- %tmp6127 = getelementptr inbounds float* %tmp6126, i64 1
- %tmp6128 = getelementptr inbounds float* %tmp6127, i64 1
- %tmp6129 = getelementptr inbounds float* %tmp6128, i64 1
- %tmp6130 = getelementptr inbounds float* %tmp6129, i64 1
- %tmp6131 = getelementptr inbounds float* %tmp6130, i64 1
- %tmp6132 = getelementptr inbounds float* %tmp6131, i64 1
- %tmp6133 = getelementptr inbounds float* %tmp6132, i64 1
- %tmp6134 = getelementptr inbounds float* %tmp6133, i64 1
- %tmp6135 = getelementptr inbounds float* %tmp6134, i64 1
- %tmp6136 = getelementptr inbounds float* %tmp6135, i64 1
- %tmp6137 = getelementptr inbounds float* %tmp6136, i64 1
- %tmp6138 = getelementptr inbounds float* %tmp6137, i64 1
- %tmp6139 = getelementptr inbounds float* %tmp6138, i64 1
- %tmp6140 = getelementptr inbounds float* %tmp6139, i64 1
- %tmp6141 = getelementptr inbounds float* %tmp6140, i64 1
- %tmp6142 = getelementptr inbounds float* %tmp6141, i64 1
- %tmp6143 = getelementptr inbounds float* %tmp6142, i64 1
- %tmp6144 = getelementptr inbounds float* %tmp6143, i64 1
- %tmp6145 = getelementptr inbounds float* %tmp6144, i64 1
- %tmp6146 = getelementptr inbounds float* %tmp6145, i64 1
- %tmp6147 = getelementptr inbounds float* %tmp6146, i64 1
- %tmp6148 = getelementptr inbounds float* %tmp6147, i64 1
- %tmp6149 = getelementptr inbounds float* %tmp6148, i64 1
- %tmp6150 = getelementptr inbounds float* %tmp6149, i64 1
- %tmp6151 = getelementptr inbounds float* %tmp6150, i64 1
- %tmp6152 = getelementptr inbounds float* %tmp6151, i64 1
- %tmp6153 = getelementptr inbounds float* %tmp6152, i64 1
- %tmp6154 = getelementptr inbounds float* %tmp6153, i64 1
- %tmp6155 = getelementptr inbounds float* %tmp6154, i64 1
- %tmp6156 = getelementptr inbounds float* %tmp6155, i64 1
- %tmp6157 = getelementptr inbounds float* %tmp6156, i64 1
- %tmp6158 = getelementptr inbounds float* %tmp6157, i64 1
- %tmp6159 = getelementptr inbounds float* %tmp6158, i64 1
- %tmp6160 = getelementptr inbounds float* %tmp6159, i64 1
- %tmp6161 = getelementptr inbounds float* %tmp6160, i64 1
- %tmp6162 = getelementptr inbounds float* %tmp6161, i64 1
- %tmp6163 = getelementptr inbounds float* %tmp6162, i64 1
- %tmp6164 = getelementptr inbounds float* %tmp6163, i64 1
- %tmp6165 = getelementptr inbounds float* %tmp6164, i64 1
- %tmp6166 = getelementptr inbounds float* %tmp6165, i64 1
- %tmp6167 = getelementptr inbounds float* %tmp6166, i64 1
- %tmp6168 = getelementptr inbounds float* %tmp6167, i64 1
- %tmp6169 = getelementptr inbounds float* %tmp6168, i64 1
- %tmp6170 = getelementptr inbounds float* %tmp6169, i64 1
- %tmp6171 = getelementptr inbounds float* %tmp6170, i64 1
- %tmp6172 = getelementptr inbounds float* %tmp6171, i64 1
- %tmp6173 = getelementptr inbounds float* %tmp6172, i64 1
- %tmp6174 = getelementptr inbounds float* %tmp6173, i64 1
- %tmp6175 = getelementptr inbounds float* %tmp6174, i64 1
- %tmp6176 = getelementptr inbounds float* %tmp6175, i64 1
- %tmp6177 = getelementptr inbounds float* %tmp6176, i64 1
- %tmp6178 = getelementptr inbounds float* %tmp6177, i64 1
- %tmp6179 = getelementptr inbounds float* %tmp6178, i64 1
- %tmp6180 = getelementptr inbounds float* %tmp6179, i64 1
- %tmp6181 = getelementptr inbounds float* %tmp6180, i64 1
- %tmp6182 = getelementptr inbounds float* %tmp6181, i64 1
- %tmp6183 = getelementptr inbounds float* %tmp6182, i64 1
- %tmp6184 = getelementptr inbounds float* %tmp6183, i64 1
- %tmp6185 = getelementptr inbounds float* %tmp6184, i64 1
- %tmp6186 = getelementptr inbounds float* %tmp6185, i64 1
- %tmp6187 = getelementptr inbounds float* %tmp6186, i64 1
- %tmp6188 = getelementptr inbounds float* %tmp6187, i64 1
- %tmp6189 = getelementptr inbounds float* %tmp6188, i64 1
- %tmp6190 = getelementptr inbounds float* %tmp6189, i64 1
- %tmp6191 = getelementptr inbounds float* %tmp6190, i64 1
- %tmp6192 = getelementptr inbounds float* %tmp6191, i64 1
- %tmp6193 = getelementptr inbounds float* %tmp6192, i64 1
- %tmp6194 = getelementptr inbounds float* %tmp6193, i64 1
- %tmp6195 = getelementptr inbounds float* %tmp6194, i64 1
- %tmp6196 = getelementptr inbounds float* %tmp6195, i64 1
- %tmp6197 = getelementptr inbounds float* %tmp6196, i64 1
- %tmp6198 = getelementptr inbounds float* %tmp6197, i64 1
- %tmp6199 = getelementptr inbounds float* %tmp6198, i64 1
- %tmp6200 = getelementptr inbounds float* %tmp6199, i64 1
- %tmp6201 = getelementptr inbounds float* %tmp6200, i64 1
- %tmp6202 = getelementptr inbounds float* %tmp6201, i64 1
- %tmp6203 = getelementptr inbounds float* %tmp6202, i64 1
- %tmp6204 = getelementptr inbounds float* %tmp6203, i64 1
- %tmp6205 = getelementptr inbounds float* %tmp6204, i64 1
- %tmp6206 = getelementptr inbounds float* %tmp6205, i64 1
- %tmp6207 = getelementptr inbounds float* %tmp6206, i64 1
- %tmp6208 = getelementptr inbounds float* %tmp6207, i64 1
- %tmp6209 = getelementptr inbounds float* %tmp6208, i64 1
- %tmp6210 = getelementptr inbounds float* %tmp6209, i64 1
- %tmp6211 = getelementptr inbounds float* %tmp6210, i64 1
- %tmp6212 = getelementptr inbounds float* %tmp6211, i64 1
- %tmp6213 = getelementptr inbounds float* %tmp6212, i64 1
- %tmp6214 = getelementptr inbounds float* %tmp6213, i64 1
- %tmp6215 = getelementptr inbounds float* %tmp6214, i64 1
- %tmp6216 = getelementptr inbounds float* %tmp6215, i64 1
- %tmp6217 = getelementptr inbounds float* %tmp6216, i64 1
- %tmp6218 = getelementptr inbounds float* %tmp6217, i64 1
- %tmp6219 = getelementptr inbounds float* %tmp6218, i64 1
- %tmp6220 = getelementptr inbounds float* %tmp6219, i64 1
- %tmp6221 = getelementptr inbounds float* %tmp6220, i64 1
- %tmp6222 = getelementptr inbounds float* %tmp6221, i64 1
- %tmp6223 = getelementptr inbounds float* %tmp6222, i64 1
- %tmp6224 = getelementptr inbounds float* %tmp6223, i64 1
- %tmp6225 = getelementptr inbounds float* %tmp6224, i64 1
- %tmp6226 = getelementptr inbounds float* %tmp6225, i64 1
- %tmp6227 = getelementptr inbounds float* %tmp6226, i64 1
- %tmp6228 = getelementptr inbounds float* %tmp6227, i64 1
- %tmp6229 = getelementptr inbounds float* %tmp6228, i64 1
- %tmp6230 = getelementptr inbounds float* %tmp6229, i64 1
- %tmp6231 = getelementptr inbounds float* %tmp6230, i64 1
- %tmp6232 = getelementptr inbounds float* %tmp6231, i64 1
- %tmp6233 = getelementptr inbounds float* %tmp6232, i64 1
- %tmp6234 = getelementptr inbounds float* %tmp6233, i64 1
- %tmp6235 = getelementptr inbounds float* %tmp6234, i64 1
- %tmp6236 = getelementptr inbounds float* %tmp6235, i64 1
- %tmp6237 = getelementptr inbounds float* %tmp6236, i64 1
- %tmp6238 = getelementptr inbounds float* %tmp6237, i64 1
- %tmp6239 = getelementptr inbounds float* %tmp6238, i64 1
- %tmp6240 = getelementptr inbounds float* %tmp6239, i64 1
- %tmp6241 = getelementptr inbounds float* %tmp6240, i64 1
- %tmp6242 = getelementptr inbounds float* %tmp6241, i64 1
- %tmp6243 = getelementptr inbounds float* %tmp6242, i64 1
- %tmp6244 = getelementptr inbounds float* %tmp6243, i64 1
- %tmp6245 = getelementptr inbounds float* %tmp6244, i64 1
- %tmp6246 = getelementptr inbounds float* %tmp6245, i64 1
- %tmp6247 = getelementptr inbounds float* %tmp6246, i64 1
- %tmp6248 = getelementptr inbounds float* %tmp6247, i64 1
- %tmp6249 = getelementptr inbounds float* %tmp6248, i64 1
- %tmp6250 = getelementptr inbounds float* %tmp6249, i64 1
- %tmp6251 = getelementptr inbounds float* %tmp6250, i64 1
- %tmp6252 = getelementptr inbounds float* %tmp6251, i64 1
- %tmp6253 = getelementptr inbounds float* %tmp6252, i64 1
- %tmp6254 = getelementptr inbounds float* %tmp6253, i64 1
- %tmp6255 = getelementptr inbounds float* %tmp6254, i64 1
- %tmp6256 = getelementptr inbounds float* %tmp6255, i64 1
- %tmp6257 = getelementptr inbounds float* %tmp6256, i64 1
- %tmp6258 = getelementptr inbounds float* %tmp6257, i64 1
- %tmp6259 = getelementptr inbounds float* %tmp6258, i64 1
- %tmp6260 = getelementptr inbounds float* %tmp6259, i64 1
- %tmp6261 = getelementptr inbounds float* %tmp6260, i64 1
- %tmp6262 = getelementptr inbounds float* %tmp6261, i64 1
- %tmp6263 = getelementptr inbounds float* %tmp6262, i64 1
- %tmp6264 = getelementptr inbounds float* %tmp6263, i64 1
- %tmp6265 = getelementptr inbounds float* %tmp6264, i64 1
- %tmp6266 = getelementptr inbounds float* %tmp6265, i64 1
- %tmp6267 = getelementptr inbounds float* %tmp6266, i64 1
- %tmp6268 = getelementptr inbounds float* %tmp6267, i64 1
- %tmp6269 = getelementptr inbounds float* %tmp6268, i64 1
- %tmp6270 = getelementptr inbounds float* %tmp6269, i64 1
- %tmp6271 = getelementptr inbounds float* %tmp6270, i64 1
- %tmp6272 = getelementptr inbounds float* %tmp6271, i64 1
- %tmp6273 = getelementptr inbounds float* %tmp6272, i64 1
- %tmp6274 = getelementptr inbounds float* %tmp6273, i64 1
- %tmp6275 = getelementptr inbounds float* %tmp6274, i64 1
- %tmp6276 = getelementptr inbounds float* %tmp6275, i64 1
- %tmp6277 = getelementptr inbounds float* %tmp6276, i64 1
- %tmp6278 = getelementptr inbounds float* %tmp6277, i64 1
- %tmp6279 = getelementptr inbounds float* %tmp6278, i64 1
- %tmp6280 = getelementptr inbounds float* %tmp6279, i64 1
- %tmp6281 = getelementptr inbounds float* %tmp6280, i64 1
- %tmp6282 = getelementptr inbounds float* %tmp6281, i64 1
- %tmp6283 = getelementptr inbounds float* %tmp6282, i64 1
- %tmp6284 = getelementptr inbounds float* %tmp6283, i64 1
- %tmp6285 = getelementptr inbounds float* %tmp6284, i64 1
- %tmp6286 = getelementptr inbounds float* %tmp6285, i64 1
- %tmp6287 = getelementptr inbounds float* %tmp6286, i64 1
- %tmp6288 = getelementptr inbounds float* %tmp6287, i64 1
- %tmp6289 = getelementptr inbounds float* %tmp6288, i64 1
- %tmp6290 = getelementptr inbounds float* %tmp6289, i64 1
- %tmp6291 = getelementptr inbounds float* %tmp6290, i64 1
- %tmp6292 = getelementptr inbounds float* %tmp6291, i64 1
- %tmp6293 = getelementptr inbounds float* %tmp6292, i64 1
- %tmp6294 = getelementptr inbounds float* %tmp6293, i64 1
- %tmp6295 = getelementptr inbounds float* %tmp6294, i64 1
- %tmp6296 = getelementptr inbounds float* %tmp6295, i64 1
- %tmp6297 = getelementptr inbounds float* %tmp6296, i64 1
- %tmp6298 = getelementptr inbounds float* %tmp6297, i64 1
- %tmp6299 = getelementptr inbounds float* %tmp6298, i64 1
- %tmp6300 = getelementptr inbounds float* %tmp6299, i64 1
- %tmp6301 = getelementptr inbounds float* %tmp6300, i64 1
- %tmp6302 = getelementptr inbounds float* %tmp6301, i64 1
- %tmp6303 = getelementptr inbounds float* %tmp6302, i64 1
- %tmp6304 = getelementptr inbounds float* %tmp6303, i64 1
- %tmp6305 = getelementptr inbounds float* %tmp6304, i64 1
- %tmp6306 = getelementptr inbounds float* %tmp6305, i64 1
- %tmp6307 = getelementptr inbounds float* %tmp6306, i64 1
- %tmp6308 = getelementptr inbounds float* %tmp6307, i64 1
- %tmp6309 = getelementptr inbounds float* %tmp6308, i64 1
- %tmp6310 = getelementptr inbounds float* %tmp6309, i64 1
- %tmp6311 = getelementptr inbounds float* %tmp6310, i64 1
- %tmp6312 = getelementptr inbounds float* %tmp6311, i64 1
- %tmp6313 = getelementptr inbounds float* %tmp6312, i64 1
- %tmp6314 = getelementptr inbounds float* %tmp6313, i64 1
- %tmp6315 = getelementptr inbounds float* %tmp6314, i64 1
- %tmp6316 = getelementptr inbounds float* %tmp6315, i64 1
- %tmp6317 = getelementptr inbounds float* %tmp6316, i64 1
- %tmp6318 = getelementptr inbounds float* %tmp6317, i64 1
- %tmp6319 = getelementptr inbounds float* %tmp6318, i64 1
- %tmp6320 = getelementptr inbounds float* %tmp6319, i64 1
- %tmp6321 = getelementptr inbounds float* %tmp6320, i64 1
- %tmp6322 = getelementptr inbounds float* %tmp6321, i64 1
- %tmp6323 = getelementptr inbounds float* %tmp6322, i64 1
- %tmp6324 = getelementptr inbounds float* %tmp6323, i64 1
- %tmp6325 = getelementptr inbounds float* %tmp6324, i64 1
- %tmp6326 = getelementptr inbounds float* %tmp6325, i64 1
- %tmp6327 = getelementptr inbounds float* %tmp6326, i64 1
- %tmp6328 = getelementptr inbounds float* %tmp6327, i64 1
- %tmp6329 = getelementptr inbounds float* %tmp6328, i64 1
- %tmp6330 = getelementptr inbounds float* %tmp6329, i64 1
- %tmp6331 = getelementptr inbounds float* %tmp6330, i64 1
- %tmp6332 = getelementptr inbounds float* %tmp6331, i64 1
- %tmp6333 = getelementptr inbounds float* %tmp6332, i64 1
- %tmp6334 = getelementptr inbounds float* %tmp6333, i64 1
- %tmp6335 = getelementptr inbounds float* %tmp6334, i64 1
- %tmp6336 = getelementptr inbounds float* %tmp6335, i64 1
- %tmp6337 = getelementptr inbounds float* %tmp6336, i64 1
- %tmp6338 = getelementptr inbounds float* %tmp6337, i64 1
- %tmp6339 = getelementptr inbounds float* %tmp6338, i64 1
- %tmp6340 = getelementptr inbounds float* %tmp6339, i64 1
- %tmp6341 = getelementptr inbounds float* %tmp6340, i64 1
- %tmp6342 = getelementptr inbounds float* %tmp6341, i64 1
- %tmp6343 = getelementptr inbounds float* %tmp6342, i64 1
- %tmp6344 = getelementptr inbounds float* %tmp6343, i64 1
- %tmp6345 = getelementptr inbounds float* %tmp6344, i64 1
- %tmp6346 = getelementptr inbounds float* %tmp6345, i64 1
- %tmp6347 = getelementptr inbounds float* %tmp6346, i64 1
- %tmp6348 = getelementptr inbounds float* %tmp6347, i64 1
- %tmp6349 = getelementptr inbounds float* %tmp6348, i64 1
- %tmp6350 = getelementptr inbounds float* %tmp6349, i64 1
- %tmp6351 = getelementptr inbounds float* %tmp6350, i64 1
- %tmp6352 = getelementptr inbounds float* %tmp6351, i64 1
- %tmp6353 = getelementptr inbounds float* %tmp6352, i64 1
- %tmp6354 = getelementptr inbounds float* %tmp6353, i64 1
- %tmp6355 = getelementptr inbounds float* %tmp6354, i64 1
- %tmp6356 = getelementptr inbounds float* %tmp6355, i64 1
- %tmp6357 = getelementptr inbounds float* %tmp6356, i64 1
- %tmp6358 = getelementptr inbounds float* %tmp6357, i64 1
- %tmp6359 = getelementptr inbounds float* %tmp6358, i64 1
- %tmp6360 = getelementptr inbounds float* %tmp6359, i64 1
- %tmp6361 = getelementptr inbounds float* %tmp6360, i64 1
- %tmp6362 = getelementptr inbounds float* %tmp6361, i64 1
- %tmp6363 = getelementptr inbounds float* %tmp6362, i64 1
- %tmp6364 = getelementptr inbounds float* %tmp6363, i64 1
- %tmp6365 = getelementptr inbounds float* %tmp6364, i64 1
- %tmp6366 = getelementptr inbounds float* %tmp6365, i64 1
- %tmp6367 = getelementptr inbounds float* %tmp6366, i64 1
- %tmp6368 = getelementptr inbounds float* %tmp6367, i64 1
- %tmp6369 = getelementptr inbounds float* %tmp6368, i64 1
- %tmp6370 = getelementptr inbounds float* %tmp6369, i64 1
- %tmp6371 = getelementptr inbounds float* %tmp6370, i64 1
- %tmp6372 = getelementptr inbounds float* %tmp6371, i64 1
- %tmp6373 = getelementptr inbounds float* %tmp6372, i64 1
- %tmp6374 = getelementptr inbounds float* %tmp6373, i64 1
- %tmp6375 = getelementptr inbounds float* %tmp6374, i64 1
- %tmp6376 = getelementptr inbounds float* %tmp6375, i64 1
- %tmp6377 = getelementptr inbounds float* %tmp6376, i64 1
- %tmp6378 = getelementptr inbounds float* %tmp6377, i64 1
- %tmp6379 = getelementptr inbounds float* %tmp6378, i64 1
- %tmp6380 = getelementptr inbounds float* %tmp6379, i64 1
- %tmp6381 = getelementptr inbounds float* %tmp6380, i64 1
- %tmp6382 = getelementptr inbounds float* %tmp6381, i64 1
- %tmp6383 = getelementptr inbounds float* %tmp6382, i64 1
- %tmp6384 = getelementptr inbounds float* %tmp6383, i64 1
- %tmp6385 = getelementptr inbounds float* %tmp6384, i64 1
- %tmp6386 = getelementptr inbounds float* %tmp6385, i64 1
- %tmp6387 = getelementptr inbounds float* %tmp6386, i64 1
- %tmp6388 = getelementptr inbounds float* %tmp6387, i64 1
- %tmp6389 = getelementptr inbounds float* %tmp6388, i64 1
- %tmp6390 = getelementptr inbounds float* %tmp6389, i64 1
- %tmp6391 = getelementptr inbounds float* %tmp6390, i64 1
- %tmp6392 = getelementptr inbounds float* %tmp6391, i64 1
- %tmp6393 = getelementptr inbounds float* %tmp6392, i64 1
- %tmp6394 = getelementptr inbounds float* %tmp6393, i64 1
- %tmp6395 = getelementptr inbounds float* %tmp6394, i64 1
- %tmp6396 = getelementptr inbounds float* %tmp6395, i64 1
- %tmp6397 = getelementptr inbounds float* %tmp6396, i64 1
- %tmp6398 = getelementptr inbounds float* %tmp6397, i64 1
- %tmp6399 = getelementptr inbounds float* %tmp6398, i64 1
- %tmp6400 = getelementptr inbounds float* %tmp6399, i64 1
- %tmp6401 = getelementptr inbounds float* %tmp6400, i64 1
- %tmp6402 = getelementptr inbounds float* %tmp6401, i64 1
- %tmp6403 = getelementptr inbounds float* %tmp6402, i64 1
- %tmp6404 = getelementptr inbounds float* %tmp6403, i64 1
- %tmp6405 = getelementptr inbounds float* %tmp6404, i64 1
- %tmp6406 = getelementptr inbounds float* %tmp6405, i64 1
- %tmp6407 = getelementptr inbounds float* %tmp6406, i64 1
- %tmp6408 = getelementptr inbounds float* %tmp6407, i64 1
- %tmp6409 = getelementptr inbounds float* %tmp6408, i64 1
- %tmp6410 = getelementptr inbounds float* %tmp6409, i64 1
- %tmp6411 = getelementptr inbounds float* %tmp6410, i64 1
- %tmp6412 = getelementptr inbounds float* %tmp6411, i64 1
- %tmp6413 = getelementptr inbounds float* %tmp6412, i64 1
- %tmp6414 = getelementptr inbounds float* %tmp6413, i64 1
- %tmp6415 = getelementptr inbounds float* %tmp6414, i64 1
- %tmp6416 = getelementptr inbounds float* %tmp6415, i64 1
- %tmp6417 = getelementptr inbounds float* %tmp6416, i64 1
- %tmp6418 = getelementptr inbounds float* %tmp6417, i64 1
- %tmp6419 = getelementptr inbounds float* %tmp6418, i64 1
- %tmp6420 = getelementptr inbounds float* %tmp6419, i64 1
- %tmp6421 = getelementptr inbounds float* %tmp6420, i64 1
- %tmp6422 = getelementptr inbounds float* %tmp6421, i64 1
- %tmp6423 = getelementptr inbounds float* %tmp6422, i64 1
- %tmp6424 = getelementptr inbounds float* %tmp6423, i64 1
- %tmp6425 = getelementptr inbounds float* %tmp6424, i64 1
- %tmp6426 = getelementptr inbounds float* %tmp6425, i64 1
- %tmp6427 = getelementptr inbounds float* %tmp6426, i64 1
- %tmp6428 = getelementptr inbounds float* %tmp6427, i64 1
- %tmp6429 = getelementptr inbounds float* %tmp6428, i64 1
- %tmp6430 = getelementptr inbounds float* %tmp6429, i64 1
- %tmp6431 = getelementptr inbounds float* %tmp6430, i64 1
- %tmp6432 = getelementptr inbounds float* %tmp6431, i64 1
- %tmp6433 = getelementptr inbounds float* %tmp6432, i64 1
- %tmp6434 = getelementptr inbounds float* %tmp6433, i64 1
- %tmp6435 = getelementptr inbounds float* %tmp6434, i64 1
- %tmp6436 = getelementptr inbounds float* %tmp6435, i64 1
- %tmp6437 = getelementptr inbounds float* %tmp6436, i64 1
- %tmp6438 = getelementptr inbounds float* %tmp6437, i64 1
- %tmp6439 = getelementptr inbounds float* %tmp6438, i64 1
- %tmp6440 = getelementptr inbounds float* %tmp6439, i64 1
- %tmp6441 = getelementptr inbounds float* %tmp6440, i64 1
- %tmp6442 = getelementptr inbounds float* %tmp6441, i64 1
- %tmp6443 = getelementptr inbounds float* %tmp6442, i64 1
- %tmp6444 = getelementptr inbounds float* %tmp6443, i64 1
- %tmp6445 = getelementptr inbounds float* %tmp6444, i64 1
- %tmp6446 = getelementptr inbounds float* %tmp6445, i64 1
- %tmp6447 = getelementptr inbounds float* %tmp6446, i64 1
- %tmp6448 = getelementptr inbounds float* %tmp6447, i64 1
- %tmp6449 = getelementptr inbounds float* %tmp6448, i64 1
- %tmp6450 = getelementptr inbounds float* %tmp6449, i64 1
- %tmp6451 = getelementptr inbounds float* %tmp6450, i64 1
- %tmp6452 = getelementptr inbounds float* %tmp6451, i64 1
- %tmp6453 = getelementptr inbounds float* %tmp6452, i64 1
- %tmp6454 = getelementptr inbounds float* %tmp6453, i64 1
- %tmp6455 = getelementptr inbounds float* %tmp6454, i64 1
- %tmp6456 = getelementptr inbounds float* %tmp6455, i64 1
- %tmp6457 = getelementptr inbounds float* %tmp6456, i64 1
- %tmp6458 = getelementptr inbounds float* %tmp6457, i64 1
- %tmp6459 = getelementptr inbounds float* %tmp6458, i64 1
- %tmp6460 = getelementptr inbounds float* %tmp6459, i64 1
- %tmp6461 = getelementptr inbounds float* %tmp6460, i64 1
- %tmp6462 = getelementptr inbounds float* %tmp6461, i64 1
- %tmp6463 = getelementptr inbounds float* %tmp6462, i64 1
- %tmp6464 = getelementptr inbounds float* %tmp6463, i64 1
- %tmp6465 = getelementptr inbounds float* %tmp6464, i64 1
- %tmp6466 = getelementptr inbounds float* %tmp6465, i64 1
- %tmp6467 = getelementptr inbounds float* %tmp6466, i64 1
- %tmp6468 = getelementptr inbounds float* %tmp6467, i64 1
- %tmp6469 = getelementptr inbounds float* %tmp6468, i64 1
- %tmp6470 = getelementptr inbounds float* %tmp6469, i64 1
- %tmp6471 = getelementptr inbounds float* %tmp6470, i64 1
- %tmp6472 = getelementptr inbounds float* %tmp6471, i64 1
- %tmp6473 = getelementptr inbounds float* %tmp6472, i64 1
- %tmp6474 = getelementptr inbounds float* %tmp6473, i64 1
- %tmp6475 = getelementptr inbounds float* %tmp6474, i64 1
- %tmp6476 = getelementptr inbounds float* %tmp6475, i64 1
- %tmp6477 = getelementptr inbounds float* %tmp6476, i64 1
- %tmp6478 = getelementptr inbounds float* %tmp6477, i64 1
- %tmp6479 = getelementptr inbounds float* %tmp6478, i64 1
- %tmp6480 = getelementptr inbounds float* %tmp6479, i64 1
- %tmp6481 = getelementptr inbounds float* %tmp6480, i64 1
- %tmp6482 = getelementptr inbounds float* %tmp6481, i64 1
- %tmp6483 = getelementptr inbounds float* %tmp6482, i64 1
- %tmp6484 = getelementptr inbounds float* %tmp6483, i64 1
- %tmp6485 = getelementptr inbounds float* %tmp6484, i64 1
- %tmp6486 = getelementptr inbounds float* %tmp6485, i64 1
- %tmp6487 = getelementptr inbounds float* %tmp6486, i64 1
- %tmp6488 = getelementptr inbounds float* %tmp6487, i64 1
- %tmp6489 = getelementptr inbounds float* %tmp6488, i64 1
- %tmp6490 = getelementptr inbounds float* %tmp6489, i64 1
- %tmp6491 = getelementptr inbounds float* %tmp6490, i64 1
- %tmp6492 = getelementptr inbounds float* %tmp6491, i64 1
- %tmp6493 = getelementptr inbounds float* %tmp6492, i64 1
- %tmp6494 = getelementptr inbounds float* %tmp6493, i64 1
- %tmp6495 = getelementptr inbounds float* %tmp6494, i64 1
- %tmp6496 = getelementptr inbounds float* %tmp6495, i64 1
- %tmp6497 = getelementptr inbounds float* %tmp6496, i64 1
- %tmp6498 = getelementptr inbounds float* %tmp6497, i64 1
- %tmp6499 = getelementptr inbounds float* %tmp6498, i64 1
- %tmp6500 = getelementptr inbounds float* %tmp6499, i64 1
- %tmp6501 = getelementptr inbounds float* %tmp6500, i64 1
- %tmp6502 = getelementptr inbounds float* %tmp6501, i64 1
- %tmp6503 = getelementptr inbounds float* %tmp6502, i64 1
- %tmp6504 = getelementptr inbounds float* %tmp6503, i64 1
- %tmp6505 = getelementptr inbounds float* %tmp6504, i64 1
- %tmp6506 = getelementptr inbounds float* %tmp6505, i64 1
- %tmp6507 = getelementptr inbounds float* %tmp6506, i64 1
- %tmp6508 = getelementptr inbounds float* %tmp6507, i64 1
- %tmp6509 = getelementptr inbounds float* %tmp6508, i64 1
- %tmp6510 = getelementptr inbounds float* %tmp6509, i64 1
- %tmp6511 = getelementptr inbounds float* %tmp6510, i64 1
- %tmp6512 = getelementptr inbounds float* %tmp6511, i64 1
- %tmp6513 = getelementptr inbounds float* %tmp6512, i64 1
- %tmp6514 = getelementptr inbounds float* %tmp6513, i64 1
- %tmp6515 = getelementptr inbounds float* %tmp6514, i64 1
- %tmp6516 = getelementptr inbounds float* %tmp6515, i64 1
- %tmp6517 = getelementptr inbounds float* %tmp6516, i64 1
- %tmp6518 = getelementptr inbounds float* %tmp6517, i64 1
- %tmp6519 = getelementptr inbounds float* %tmp6518, i64 1
- %tmp6520 = getelementptr inbounds float* %tmp6519, i64 1
- %tmp6521 = getelementptr inbounds float* %tmp6520, i64 1
- %tmp6522 = getelementptr inbounds float* %tmp6521, i64 1
- %tmp6523 = getelementptr inbounds float* %tmp6522, i64 1
- %tmp6524 = getelementptr inbounds float* %tmp6523, i64 1
- %tmp6525 = getelementptr inbounds float* %tmp6524, i64 1
- %tmp6526 = getelementptr inbounds float* %tmp6525, i64 1
- %tmp6527 = getelementptr inbounds float* %tmp6526, i64 1
- %tmp6528 = getelementptr inbounds float* %tmp6527, i64 1
- %tmp6529 = getelementptr inbounds float* %tmp6528, i64 1
- %tmp6530 = getelementptr inbounds float* %tmp6529, i64 1
- %tmp6531 = getelementptr inbounds float* %tmp6530, i64 1
- %tmp6532 = getelementptr inbounds float* %tmp6531, i64 1
- %tmp6533 = getelementptr inbounds float* %tmp6532, i64 1
- %tmp6534 = getelementptr inbounds float* %tmp6533, i64 1
- %tmp6535 = getelementptr inbounds float* %tmp6534, i64 1
- %tmp6536 = getelementptr inbounds float* %tmp6535, i64 1
- %tmp6537 = getelementptr inbounds float* %tmp6536, i64 1
- %tmp6538 = getelementptr inbounds float* %tmp6537, i64 1
- %tmp6539 = getelementptr inbounds float* %tmp6538, i64 1
- %tmp6540 = getelementptr inbounds float* %tmp6539, i64 1
- %tmp6541 = getelementptr inbounds float* %tmp6540, i64 1
- %tmp6542 = getelementptr inbounds float* %tmp6541, i64 1
- %tmp6543 = getelementptr inbounds float* %tmp6542, i64 1
- %tmp6544 = getelementptr inbounds float* %tmp6543, i64 1
- %tmp6545 = getelementptr inbounds float* %tmp6544, i64 1
- %tmp6546 = getelementptr inbounds float* %tmp6545, i64 1
- %tmp6547 = getelementptr inbounds float* %tmp6546, i64 1
- %tmp6548 = getelementptr inbounds float* %tmp6547, i64 1
- %tmp6549 = getelementptr inbounds float* %tmp6548, i64 1
- %tmp6550 = getelementptr inbounds float* %tmp6549, i64 1
- %tmp6551 = getelementptr inbounds float* %tmp6550, i64 1
- %tmp6552 = getelementptr inbounds float* %tmp6551, i64 1
- %tmp6553 = getelementptr inbounds float* %tmp6552, i64 1
- %tmp6554 = getelementptr inbounds float* %tmp6553, i64 1
- %tmp6555 = getelementptr inbounds float* %tmp6554, i64 1
- %tmp6556 = getelementptr inbounds float* %tmp6555, i64 1
- %tmp6557 = getelementptr inbounds float* %tmp6556, i64 1
- %tmp6558 = getelementptr inbounds float* %tmp6557, i64 1
- %tmp6559 = getelementptr inbounds float* %tmp6558, i64 1
- %tmp6560 = getelementptr inbounds float* %tmp6559, i64 1
- %tmp6561 = getelementptr inbounds float* %tmp6560, i64 1
- %tmp6562 = getelementptr inbounds float* %tmp6561, i64 1
- %tmp6563 = getelementptr inbounds float* %tmp6562, i64 1
- %tmp6564 = getelementptr inbounds float* %tmp6563, i64 1
- %tmp6565 = getelementptr inbounds float* %tmp6564, i64 1
- %tmp6566 = getelementptr inbounds float* %tmp6565, i64 1
- %tmp6567 = getelementptr inbounds float* %tmp6566, i64 1
- %tmp6568 = getelementptr inbounds float* %tmp6567, i64 1
- %tmp6569 = getelementptr inbounds float* %tmp6568, i64 1
- %tmp6570 = getelementptr inbounds float* %tmp6569, i64 1
- %tmp6571 = getelementptr inbounds float* %tmp6570, i64 1
- %tmp6572 = getelementptr inbounds float* %tmp6571, i64 1
- %tmp6573 = getelementptr inbounds float* %tmp6572, i64 1
- %tmp6574 = getelementptr inbounds float* %tmp6573, i64 1
- %tmp6575 = getelementptr inbounds float* %tmp6574, i64 1
- %tmp6576 = getelementptr inbounds float* %tmp6575, i64 1
- %tmp6577 = getelementptr inbounds float* %tmp6576, i64 1
- %tmp6578 = getelementptr inbounds float* %tmp6577, i64 1
- %tmp6579 = getelementptr inbounds float* %tmp6578, i64 1
- %tmp6580 = getelementptr inbounds float* %tmp6579, i64 1
- %tmp6581 = getelementptr inbounds float* %tmp6580, i64 1
- %tmp6582 = getelementptr inbounds float* %tmp6581, i64 1
- %tmp6583 = getelementptr inbounds float* %tmp6582, i64 1
- %tmp6584 = getelementptr inbounds float* %tmp6583, i64 1
- %tmp6585 = getelementptr inbounds float* %tmp6584, i64 1
- %tmp6586 = getelementptr inbounds float* %tmp6585, i64 1
- %tmp6587 = getelementptr inbounds float* %tmp6586, i64 1
- %tmp6588 = getelementptr inbounds float* %tmp6587, i64 1
- %tmp6589 = getelementptr inbounds float* %tmp6588, i64 1
- %tmp6590 = getelementptr inbounds float* %tmp6589, i64 1
- %tmp6591 = getelementptr inbounds float* %tmp6590, i64 1
- %tmp6592 = getelementptr inbounds float* %tmp6591, i64 1
- %tmp6593 = getelementptr inbounds float* %tmp6592, i64 1
- %tmp6594 = getelementptr inbounds float* %tmp6593, i64 1
- %tmp6595 = getelementptr inbounds float* %tmp6594, i64 1
- %tmp6596 = getelementptr inbounds float* %tmp6595, i64 1
- %tmp6597 = getelementptr inbounds float* %tmp6596, i64 1
- %tmp6598 = getelementptr inbounds float* %tmp6597, i64 1
- %tmp6599 = getelementptr inbounds float* %tmp6598, i64 1
- %tmp6600 = getelementptr inbounds float* %tmp6599, i64 1
- %tmp6601 = getelementptr inbounds float* %tmp6600, i64 1
- %tmp6602 = getelementptr inbounds float* %tmp6601, i64 1
- %tmp6603 = getelementptr inbounds float* %tmp6602, i64 1
- %tmp6604 = getelementptr inbounds float* %tmp6603, i64 1
- %tmp6605 = getelementptr inbounds float* %tmp6604, i64 1
- %tmp6606 = getelementptr inbounds float* %tmp6605, i64 1
- %tmp6607 = getelementptr inbounds float* %tmp6606, i64 1
- %tmp6608 = getelementptr inbounds float* %tmp6607, i64 1
- %tmp6609 = getelementptr inbounds float* %tmp6608, i64 1
- %tmp6610 = getelementptr inbounds float* %tmp6609, i64 1
- %tmp6611 = getelementptr inbounds float* %tmp6610, i64 1
- %tmp6612 = getelementptr inbounds float* %tmp6611, i64 1
- %tmp6613 = getelementptr inbounds float* %tmp6612, i64 1
- %tmp6614 = getelementptr inbounds float* %tmp6613, i64 1
- %tmp6615 = getelementptr inbounds float* %tmp6614, i64 1
- %tmp6616 = getelementptr inbounds float* %tmp6615, i64 1
- %tmp6617 = getelementptr inbounds float* %tmp6616, i64 1
- %tmp6618 = getelementptr inbounds float* %tmp6617, i64 1
- %tmp6619 = getelementptr inbounds float* %tmp6618, i64 1
- %tmp6620 = getelementptr inbounds float* %tmp6619, i64 1
- %tmp6621 = getelementptr inbounds float* %tmp6620, i64 1
- %tmp6622 = getelementptr inbounds float* %tmp6621, i64 1
- %tmp6623 = getelementptr inbounds float* %tmp6622, i64 1
- %tmp6624 = getelementptr inbounds float* %tmp6623, i64 1
- %tmp6625 = getelementptr inbounds float* %tmp6624, i64 1
- %tmp6626 = getelementptr inbounds float* %tmp6625, i64 1
- %tmp6627 = getelementptr inbounds float* %tmp6626, i64 1
- %tmp6628 = getelementptr inbounds float* %tmp6627, i64 1
- %tmp6629 = getelementptr inbounds float* %tmp6628, i64 1
- %tmp6630 = getelementptr inbounds float* %tmp6629, i64 1
- %tmp6631 = getelementptr inbounds float* %tmp6630, i64 1
- %tmp6632 = getelementptr inbounds float* %tmp6631, i64 1
- %tmp6633 = getelementptr inbounds float* %tmp6632, i64 1
- %tmp6634 = getelementptr inbounds float* %tmp6633, i64 1
- %tmp6635 = getelementptr inbounds float* %tmp6634, i64 1
- %tmp6636 = getelementptr inbounds float* %tmp6635, i64 1
- %tmp6637 = getelementptr inbounds float* %tmp6636, i64 1
- %tmp6638 = getelementptr inbounds float* %tmp6637, i64 1
- %tmp6639 = getelementptr inbounds float* %tmp6638, i64 1
- %tmp6640 = getelementptr inbounds float* %tmp6639, i64 1
- %tmp6641 = getelementptr inbounds float* %tmp6640, i64 1
- %tmp6642 = getelementptr inbounds float* %tmp6641, i64 1
- %tmp6643 = getelementptr inbounds float* %tmp6642, i64 1
- %tmp6644 = getelementptr inbounds float* %tmp6643, i64 1
- %tmp6645 = getelementptr inbounds float* %tmp6644, i64 1
- %tmp6646 = getelementptr inbounds float* %tmp6645, i64 1
- %tmp6647 = getelementptr inbounds float* %tmp6646, i64 1
- %tmp6648 = getelementptr inbounds float* %tmp6647, i64 1
- %tmp6649 = getelementptr inbounds float* %tmp6648, i64 1
- %tmp6650 = getelementptr inbounds float* %tmp6649, i64 1
- %tmp6651 = getelementptr inbounds float* %tmp6650, i64 1
- %tmp6652 = getelementptr inbounds float* %tmp6651, i64 1
- %tmp6653 = getelementptr inbounds float* %tmp6652, i64 1
- %tmp6654 = getelementptr inbounds float* %tmp6653, i64 1
- %tmp6655 = getelementptr inbounds float* %tmp6654, i64 1
- %tmp6656 = getelementptr inbounds float* %tmp6655, i64 1
- %tmp6657 = getelementptr inbounds float* %tmp6656, i64 1
- %tmp6658 = getelementptr inbounds float* %tmp6657, i64 1
- %tmp6659 = getelementptr inbounds float* %tmp6658, i64 1
- %tmp6660 = getelementptr inbounds float* %tmp6659, i64 1
- %tmp6661 = getelementptr inbounds float* %tmp6660, i64 1
- %tmp6662 = getelementptr inbounds float* %tmp6661, i64 1
- %tmp6663 = getelementptr inbounds float* %tmp6662, i64 1
- %tmp6664 = getelementptr inbounds float* %tmp6663, i64 1
- %tmp6665 = getelementptr inbounds float* %tmp6664, i64 1
- %tmp6666 = getelementptr inbounds float* %tmp6665, i64 1
- %tmp6667 = getelementptr inbounds float* %tmp6666, i64 1
- %tmp6668 = getelementptr inbounds float* %tmp6667, i64 1
- %tmp6669 = getelementptr inbounds float* %tmp6668, i64 1
- %tmp6670 = getelementptr inbounds float* %tmp6669, i64 1
- %tmp6671 = getelementptr inbounds float* %tmp6670, i64 1
- %tmp6672 = getelementptr inbounds float* %tmp6671, i64 1
- %tmp6673 = getelementptr inbounds float* %tmp6672, i64 1
- %tmp6674 = getelementptr inbounds float* %tmp6673, i64 1
- %tmp6675 = getelementptr inbounds float* %tmp6674, i64 1
- %tmp6676 = getelementptr inbounds float* %tmp6675, i64 1
- %tmp6677 = getelementptr inbounds float* %tmp6676, i64 1
- %tmp6678 = getelementptr inbounds float* %tmp6677, i64 1
- %tmp6679 = getelementptr inbounds float* %tmp6678, i64 1
- %tmp6680 = getelementptr inbounds float* %tmp6679, i64 1
- %tmp6681 = getelementptr inbounds float* %tmp6680, i64 1
- %tmp6682 = getelementptr inbounds float* %tmp6681, i64 1
- %tmp6683 = getelementptr inbounds float* %tmp6682, i64 1
- %tmp6684 = getelementptr inbounds float* %tmp6683, i64 1
- %tmp6685 = getelementptr inbounds float* %tmp6684, i64 1
- %tmp6686 = getelementptr inbounds float* %tmp6685, i64 1
- %tmp6687 = getelementptr inbounds float* %tmp6686, i64 1
- %tmp6688 = getelementptr inbounds float* %tmp6687, i64 1
- %tmp6689 = getelementptr inbounds float* %tmp6688, i64 1
- %tmp6690 = getelementptr inbounds float* %tmp6689, i64 1
- %tmp6691 = getelementptr inbounds float* %tmp6690, i64 1
- %tmp6692 = getelementptr inbounds float* %tmp6691, i64 1
- %tmp6693 = getelementptr inbounds float* %tmp6692, i64 1
- %tmp6694 = getelementptr inbounds float* %tmp6693, i64 1
- %tmp6695 = getelementptr inbounds float* %tmp6694, i64 1
- %tmp6696 = getelementptr inbounds float* %tmp6695, i64 1
- %tmp6697 = getelementptr inbounds float* %tmp6696, i64 1
- %tmp6698 = getelementptr inbounds float* %tmp6697, i64 1
- %tmp6699 = getelementptr inbounds float* %tmp6698, i64 1
- %tmp6700 = getelementptr inbounds float* %tmp6699, i64 1
- %tmp6701 = getelementptr inbounds float* %tmp6700, i64 1
- %tmp6702 = getelementptr inbounds float* %tmp6701, i64 1
- %tmp6703 = getelementptr inbounds float* %tmp6702, i64 1
- %tmp6704 = getelementptr inbounds float* %tmp6703, i64 1
- %tmp6705 = getelementptr inbounds float* %tmp6704, i64 1
- %tmp6706 = getelementptr inbounds float* %tmp6705, i64 1
- %tmp6707 = getelementptr inbounds float* %tmp6706, i64 1
- %tmp6708 = getelementptr inbounds float* %tmp6707, i64 1
- %tmp6709 = getelementptr inbounds float* %tmp6708, i64 1
- %tmp6710 = getelementptr inbounds float* %tmp6709, i64 1
- %tmp6711 = getelementptr inbounds float* %tmp6710, i64 1
- %tmp6712 = getelementptr inbounds float* %tmp6711, i64 1
- %tmp6713 = getelementptr inbounds float* %tmp6712, i64 1
- %tmp6714 = getelementptr inbounds float* %tmp6713, i64 1
- %tmp6715 = getelementptr inbounds float* %tmp6714, i64 1
- %tmp6716 = getelementptr inbounds float* %tmp6715, i64 1
- %tmp6717 = getelementptr inbounds float* %tmp6716, i64 1
- %tmp6718 = getelementptr inbounds float* %tmp6717, i64 1
- %tmp6719 = getelementptr inbounds float* %tmp6718, i64 1
- %tmp6720 = getelementptr inbounds float* %tmp6719, i64 1
- %tmp6721 = getelementptr inbounds float* %tmp6720, i64 1
- %tmp6722 = getelementptr inbounds float* %tmp6721, i64 1
- %tmp6723 = getelementptr inbounds float* %tmp6722, i64 1
- %tmp6724 = getelementptr inbounds float* %tmp6723, i64 1
- %tmp6725 = getelementptr inbounds float* %tmp6724, i64 1
- %tmp6726 = getelementptr inbounds float* %tmp6725, i64 1
- %tmp6727 = getelementptr inbounds float* %tmp6726, i64 1
- %tmp6728 = getelementptr inbounds float* %tmp6727, i64 1
- %tmp6729 = getelementptr inbounds float* %tmp6728, i64 1
- %tmp6730 = getelementptr inbounds float* %tmp6729, i64 1
- %tmp6731 = getelementptr inbounds float* %tmp6730, i64 1
- %tmp6732 = getelementptr inbounds float* %tmp6731, i64 1
- %tmp6733 = getelementptr inbounds float* %tmp6732, i64 1
- %tmp6734 = getelementptr inbounds float* %tmp6733, i64 1
- %tmp6735 = getelementptr inbounds float* %tmp6734, i64 1
- %tmp6736 = getelementptr inbounds float* %tmp6735, i64 1
- %tmp6737 = getelementptr inbounds float* %tmp6736, i64 1
- %tmp6738 = getelementptr inbounds float* %tmp6737, i64 1
- %tmp6739 = getelementptr inbounds float* %tmp6738, i64 1
- %tmp6740 = getelementptr inbounds float* %tmp6739, i64 1
- %tmp6741 = getelementptr inbounds float* %tmp6740, i64 1
- %tmp6742 = getelementptr inbounds float* %tmp6741, i64 1
- %tmp6743 = getelementptr inbounds float* %tmp6742, i64 1
- %tmp6744 = getelementptr inbounds float* %tmp6743, i64 1
- %tmp6745 = getelementptr inbounds float* %tmp6744, i64 1
- %tmp6746 = getelementptr inbounds float* %tmp6745, i64 1
- %tmp6747 = getelementptr inbounds float* %tmp6746, i64 1
- %tmp6748 = getelementptr inbounds float* %tmp6747, i64 1
- %tmp6749 = getelementptr inbounds float* %tmp6748, i64 1
- %tmp6750 = getelementptr inbounds float* %tmp6749, i64 1
- %tmp6751 = getelementptr inbounds float* %tmp6750, i64 1
- %tmp6752 = getelementptr inbounds float* %tmp6751, i64 1
- %tmp6753 = getelementptr inbounds float* %tmp6752, i64 1
- %tmp6754 = getelementptr inbounds float* %tmp6753, i64 1
- %tmp6755 = getelementptr inbounds float* %tmp6754, i64 1
- %tmp6756 = getelementptr inbounds float* %tmp6755, i64 1
- %tmp6757 = getelementptr inbounds float* %tmp6756, i64 1
- %tmp6758 = getelementptr inbounds float* %tmp6757, i64 1
- %tmp6759 = getelementptr inbounds float* %tmp6758, i64 1
- %tmp6760 = getelementptr inbounds float* %tmp6759, i64 1
- %tmp6761 = getelementptr inbounds float* %tmp6760, i64 1
- %tmp6762 = getelementptr inbounds float* %tmp6761, i64 1
- %tmp6763 = getelementptr inbounds float* %tmp6762, i64 1
- %tmp6764 = getelementptr inbounds float* %tmp6763, i64 1
- %tmp6765 = getelementptr inbounds float* %tmp6764, i64 1
- %tmp6766 = getelementptr inbounds float* %tmp6765, i64 1
- %tmp6767 = getelementptr inbounds float* %tmp6766, i64 1
- %tmp6768 = getelementptr inbounds float* %tmp6767, i64 1
- %tmp6769 = getelementptr inbounds float* %tmp6768, i64 1
- %tmp6770 = getelementptr inbounds float* %tmp6769, i64 1
- %tmp6771 = getelementptr inbounds float* %tmp6770, i64 1
- %tmp6772 = getelementptr inbounds float* %tmp6771, i64 1
- %tmp6773 = getelementptr inbounds float* %tmp6772, i64 1
- %tmp6774 = getelementptr inbounds float* %tmp6773, i64 1
- %tmp6775 = getelementptr inbounds float* %tmp6774, i64 1
- %tmp6776 = getelementptr inbounds float* %tmp6775, i64 1
- %tmp6777 = getelementptr inbounds float* %tmp6776, i64 1
- %tmp6778 = getelementptr inbounds float* %tmp6777, i64 1
- %tmp6779 = getelementptr inbounds float* %tmp6778, i64 1
- %tmp6780 = getelementptr inbounds float* %tmp6779, i64 1
- %tmp6781 = getelementptr inbounds float* %tmp6780, i64 1
- %tmp6782 = getelementptr inbounds float* %tmp6781, i64 1
- %tmp6783 = getelementptr inbounds float* %tmp6782, i64 1
- %tmp6784 = getelementptr inbounds float* %tmp6783, i64 1
- %tmp6785 = getelementptr inbounds float* %tmp6784, i64 1
- %tmp6786 = getelementptr inbounds float* %tmp6785, i64 1
- %tmp6787 = getelementptr inbounds float* %tmp6786, i64 1
- %tmp6788 = getelementptr inbounds float* %tmp6787, i64 1
- %tmp6789 = getelementptr inbounds float* %tmp6788, i64 1
- %tmp6790 = getelementptr inbounds float* %tmp6789, i64 1
- %tmp6791 = getelementptr inbounds float* %tmp6790, i64 1
- %tmp6792 = getelementptr inbounds float* %tmp6791, i64 1
- %tmp6793 = getelementptr inbounds float* %tmp6792, i64 1
- %tmp6794 = getelementptr inbounds float* %tmp6793, i64 1
- %tmp6795 = getelementptr inbounds float* %tmp6794, i64 1
- %tmp6796 = getelementptr inbounds float* %tmp6795, i64 1
- %tmp6797 = getelementptr inbounds float* %tmp6796, i64 1
- %tmp6798 = getelementptr inbounds float* %tmp6797, i64 1
- %tmp6799 = getelementptr inbounds float* %tmp6798, i64 1
- %tmp6800 = getelementptr inbounds float* %tmp6799, i64 1
- %tmp6801 = getelementptr inbounds float* %tmp6800, i64 1
- %tmp6802 = getelementptr inbounds float* %tmp6801, i64 1
- %tmp6803 = getelementptr inbounds float* %tmp6802, i64 1
- %tmp6804 = getelementptr inbounds float* %tmp6803, i64 1
- %tmp6805 = getelementptr inbounds float* %tmp6804, i64 1
- %tmp6806 = getelementptr inbounds float* %tmp6805, i64 1
- %tmp6807 = getelementptr inbounds float* %tmp6806, i64 1
- %tmp6808 = getelementptr inbounds float* %tmp6807, i64 1
- %tmp6809 = getelementptr inbounds float* %tmp6808, i64 1
- %tmp6810 = getelementptr inbounds float* %tmp6809, i64 1
- %tmp6811 = getelementptr inbounds float* %tmp6810, i64 1
- %tmp6812 = getelementptr inbounds float* %tmp6811, i64 1
- %tmp6813 = getelementptr inbounds float* %tmp6812, i64 1
- %tmp6814 = getelementptr inbounds float* %tmp6813, i64 1
- %tmp6815 = getelementptr inbounds float* %tmp6814, i64 1
- %tmp6816 = getelementptr inbounds float* %tmp6815, i64 1
- %tmp6817 = getelementptr inbounds float* %tmp6816, i64 1
- %tmp6818 = getelementptr inbounds float* %tmp6817, i64 1
- %tmp6819 = getelementptr inbounds float* %tmp6818, i64 1
- %tmp6820 = getelementptr inbounds float* %tmp6819, i64 1
- %tmp6821 = getelementptr inbounds float* %tmp6820, i64 1
- %tmp6822 = getelementptr inbounds float* %tmp6821, i64 1
- %tmp6823 = getelementptr inbounds float* %tmp6822, i64 1
- %tmp6824 = getelementptr inbounds float* %tmp6823, i64 1
- %tmp6825 = getelementptr inbounds float* %tmp6824, i64 1
- %tmp6826 = getelementptr inbounds float* %tmp6825, i64 1
- %tmp6827 = getelementptr inbounds float* %tmp6826, i64 1
- %tmp6828 = getelementptr inbounds float* %tmp6827, i64 1
- %tmp6829 = getelementptr inbounds float* %tmp6828, i64 1
- %tmp6830 = getelementptr inbounds float* %tmp6829, i64 1
- %tmp6831 = getelementptr inbounds float* %tmp6830, i64 1
- %tmp6832 = getelementptr inbounds float* %tmp6831, i64 1
- %tmp6833 = getelementptr inbounds float* %tmp6832, i64 1
- %tmp6834 = getelementptr inbounds float* %tmp6833, i64 1
- %tmp6835 = getelementptr inbounds float* %tmp6834, i64 1
- %tmp6836 = getelementptr inbounds float* %tmp6835, i64 1
- %tmp6837 = getelementptr inbounds float* %tmp6836, i64 1
- %tmp6838 = getelementptr inbounds float* %tmp6837, i64 1
- %tmp6839 = getelementptr inbounds float* %tmp6838, i64 1
- %tmp6840 = getelementptr inbounds float* %tmp6839, i64 1
- %tmp6841 = getelementptr inbounds float* %tmp6840, i64 1
- %tmp6842 = getelementptr inbounds float* %tmp6841, i64 1
- %tmp6843 = getelementptr inbounds float* %tmp6842, i64 1
- %tmp6844 = getelementptr inbounds float* %tmp6843, i64 1
- %tmp6845 = getelementptr inbounds float* %tmp6844, i64 1
- %tmp6846 = getelementptr inbounds float* %tmp6845, i64 1
- %tmp6847 = getelementptr inbounds float* %tmp6846, i64 1
- %tmp6848 = getelementptr inbounds float* %tmp6847, i64 1
- %tmp6849 = getelementptr inbounds float* %tmp6848, i64 1
- %tmp6850 = getelementptr inbounds float* %tmp6849, i64 1
- %tmp6851 = getelementptr inbounds float* %tmp6850, i64 1
- %tmp6852 = getelementptr inbounds float* %tmp6851, i64 1
- %tmp6853 = getelementptr inbounds float* %tmp6852, i64 1
- %tmp6854 = getelementptr inbounds float* %tmp6853, i64 1
- %tmp6855 = getelementptr inbounds float* %tmp6854, i64 1
- %tmp6856 = getelementptr inbounds float* %tmp6855, i64 1
- %tmp6857 = getelementptr inbounds float* %tmp6856, i64 1
- %tmp6858 = getelementptr inbounds float* %tmp6857, i64 1
- %tmp6859 = getelementptr inbounds float* %tmp6858, i64 1
- %tmp6860 = getelementptr inbounds float* %tmp6859, i64 1
- %tmp6861 = getelementptr inbounds float* %tmp6860, i64 1
- %tmp6862 = getelementptr inbounds float* %tmp6861, i64 1
- %tmp6863 = getelementptr inbounds float* %tmp6862, i64 1
- %tmp6864 = getelementptr inbounds float* %tmp6863, i64 1
- %tmp6865 = getelementptr inbounds float* %tmp6864, i64 1
- %tmp6866 = getelementptr inbounds float* %tmp6865, i64 1
- %tmp6867 = getelementptr inbounds float* %tmp6866, i64 1
- %tmp6868 = getelementptr inbounds float* %tmp6867, i64 1
- %tmp6869 = getelementptr inbounds float* %tmp6868, i64 1
- %tmp6870 = getelementptr inbounds float* %tmp6869, i64 1
- %tmp6871 = getelementptr inbounds float* %tmp6870, i64 1
- %tmp6872 = getelementptr inbounds float* %tmp6871, i64 1
- %tmp6873 = getelementptr inbounds float* %tmp6872, i64 1
- %tmp6874 = getelementptr inbounds float* %tmp6873, i64 1
- %tmp6875 = getelementptr inbounds float* %tmp6874, i64 1
- %tmp6876 = getelementptr inbounds float* %tmp6875, i64 1
- %tmp6877 = getelementptr inbounds float* %tmp6876, i64 1
- %tmp6878 = getelementptr inbounds float* %tmp6877, i64 1
- %tmp6879 = getelementptr inbounds float* %tmp6878, i64 1
- %tmp6880 = getelementptr inbounds float* %tmp6879, i64 1
- %tmp6881 = getelementptr inbounds float* %tmp6880, i64 1
- %tmp6882 = getelementptr inbounds float* %tmp6881, i64 1
- %tmp6883 = getelementptr inbounds float* %tmp6882, i64 1
- %tmp6884 = getelementptr inbounds float* %tmp6883, i64 1
- %tmp6885 = getelementptr inbounds float* %tmp6884, i64 1
- %tmp6886 = getelementptr inbounds float* %tmp6885, i64 1
- %tmp6887 = getelementptr inbounds float* %tmp6886, i64 1
- %tmp6888 = getelementptr inbounds float* %tmp6887, i64 1
- %tmp6889 = getelementptr inbounds float* %tmp6888, i64 1
- %tmp6890 = getelementptr inbounds float* %tmp6889, i64 1
- %tmp6891 = getelementptr inbounds float* %tmp6890, i64 1
- %tmp6892 = getelementptr inbounds float* %tmp6891, i64 1
- %tmp6893 = getelementptr inbounds float* %tmp6892, i64 1
- %tmp6894 = getelementptr inbounds float* %tmp6893, i64 1
- %tmp6895 = getelementptr inbounds float* %tmp6894, i64 1
- %tmp6896 = getelementptr inbounds float* %tmp6895, i64 1
- %tmp6897 = getelementptr inbounds float* %tmp6896, i64 1
- %tmp6898 = getelementptr inbounds float* %tmp6897, i64 1
- %tmp6899 = getelementptr inbounds float* %tmp6898, i64 1
- %tmp6900 = getelementptr inbounds float* %tmp6899, i64 1
- %tmp6901 = getelementptr inbounds float* %tmp6900, i64 1
- %tmp6902 = getelementptr inbounds float* %tmp6901, i64 1
- %tmp6903 = getelementptr inbounds float* %tmp6902, i64 1
- %tmp6904 = getelementptr inbounds float* %tmp6903, i64 1
- %tmp6905 = getelementptr inbounds float* %tmp6904, i64 1
- %tmp6906 = getelementptr inbounds float* %tmp6905, i64 1
- %tmp6907 = getelementptr inbounds float* %tmp6906, i64 1
- %tmp6908 = getelementptr inbounds float* %tmp6907, i64 1
- %tmp6909 = getelementptr inbounds float* %tmp6908, i64 1
- %tmp6910 = getelementptr inbounds float* %tmp6909, i64 1
- %tmp6911 = getelementptr inbounds float* %tmp6910, i64 1
- %tmp6912 = getelementptr inbounds float* %tmp6911, i64 1
- %tmp6913 = getelementptr inbounds float* %tmp6912, i64 1
- %tmp6914 = getelementptr inbounds float* %tmp6913, i64 1
- %tmp6915 = getelementptr inbounds float* %tmp6914, i64 1
- %tmp6916 = getelementptr inbounds float* %tmp6915, i64 1
- %tmp6917 = getelementptr inbounds float* %tmp6916, i64 1
- %tmp6918 = getelementptr inbounds float* %tmp6917, i64 1
- %tmp6919 = getelementptr inbounds float* %tmp6918, i64 1
- %tmp6920 = getelementptr inbounds float* %tmp6919, i64 1
- %tmp6921 = getelementptr inbounds float* %tmp6920, i64 1
- %tmp6922 = getelementptr inbounds float* %tmp6921, i64 1
- %tmp6923 = getelementptr inbounds float* %tmp6922, i64 1
- %tmp6924 = getelementptr inbounds float* %tmp6923, i64 1
- %tmp6925 = getelementptr inbounds float* %tmp6924, i64 1
- %tmp6926 = getelementptr inbounds float* %tmp6925, i64 1
- %tmp6927 = getelementptr inbounds float* %tmp6926, i64 1
- %tmp6928 = getelementptr inbounds float* %tmp6927, i64 1
- %tmp6929 = getelementptr inbounds float* %tmp6928, i64 1
- %tmp6930 = getelementptr inbounds float* %tmp6929, i64 1
- %tmp6931 = getelementptr inbounds float* %tmp6930, i64 1
- %tmp6932 = getelementptr inbounds float* %tmp6931, i64 1
- %tmp6933 = getelementptr inbounds float* %tmp6932, i64 1
- %tmp6934 = getelementptr inbounds float* %tmp6933, i64 1
- %tmp6935 = getelementptr inbounds float* %tmp6934, i64 1
- %tmp6936 = getelementptr inbounds float* %tmp6935, i64 1
- %tmp6937 = getelementptr inbounds float* %tmp6936, i64 1
- %tmp6938 = getelementptr inbounds float* %tmp6937, i64 1
- %tmp6939 = getelementptr inbounds float* %tmp6938, i64 1
- %tmp6940 = getelementptr inbounds float* %tmp6939, i64 1
- %tmp6941 = getelementptr inbounds float* %tmp6940, i64 1
- %tmp6942 = getelementptr inbounds float* %tmp6941, i64 1
- %tmp6943 = getelementptr inbounds float* %tmp6942, i64 1
- %tmp6944 = getelementptr inbounds float* %tmp6943, i64 1
- %tmp6945 = getelementptr inbounds float* %tmp6944, i64 1
- %tmp6946 = getelementptr inbounds float* %tmp6945, i64 1
- %tmp6947 = getelementptr inbounds float* %tmp6946, i64 1
- %tmp6948 = getelementptr inbounds float* %tmp6947, i64 1
- %tmp6949 = getelementptr inbounds float* %tmp6948, i64 1
- %tmp6950 = getelementptr inbounds float* %tmp6949, i64 1
- %tmp6951 = getelementptr inbounds float* %tmp6950, i64 1
- %tmp6952 = getelementptr inbounds float* %tmp6951, i64 1
- %tmp6953 = getelementptr inbounds float* %tmp6952, i64 1
- %tmp6954 = getelementptr inbounds float* %tmp6953, i64 1
- %tmp6955 = getelementptr inbounds float* %tmp6954, i64 1
- %tmp6956 = getelementptr inbounds float* %tmp6955, i64 1
- %tmp6957 = getelementptr inbounds float* %tmp6956, i64 1
- %tmp6958 = getelementptr inbounds float* %tmp6957, i64 1
- %tmp6959 = getelementptr inbounds float* %tmp6958, i64 1
- %tmp6960 = getelementptr inbounds float* %tmp6959, i64 1
- %tmp6961 = getelementptr inbounds float* %tmp6960, i64 1
- %tmp6962 = getelementptr inbounds float* %tmp6961, i64 1
- %tmp6963 = getelementptr inbounds float* %tmp6962, i64 1
- %tmp6964 = getelementptr inbounds float* %tmp6963, i64 1
- %tmp6965 = getelementptr inbounds float* %tmp6964, i64 1
- %tmp6966 = getelementptr inbounds float* %tmp6965, i64 1
- %tmp6967 = getelementptr inbounds float* %tmp6966, i64 1
- %tmp6968 = getelementptr inbounds float* %tmp6967, i64 1
- %tmp6969 = getelementptr inbounds float* %tmp6968, i64 1
- %tmp6970 = getelementptr inbounds float* %tmp6969, i64 1
- %tmp6971 = getelementptr inbounds float* %tmp6970, i64 1
- %tmp6972 = getelementptr inbounds float* %tmp6971, i64 1
- %tmp6973 = getelementptr inbounds float* %tmp6972, i64 1
- %tmp6974 = getelementptr inbounds float* %tmp6973, i64 1
- %tmp6975 = getelementptr inbounds float* %tmp6974, i64 1
- %tmp6976 = getelementptr inbounds float* %tmp6975, i64 1
- %tmp6977 = getelementptr inbounds float* %tmp6976, i64 1
- %tmp6978 = getelementptr inbounds float* %tmp6977, i64 1
- %tmp6979 = getelementptr inbounds float* %tmp6978, i64 1
- %tmp6980 = getelementptr inbounds float* %tmp6979, i64 1
- %tmp6981 = getelementptr inbounds float* %tmp6980, i64 1
- %tmp6982 = getelementptr inbounds float* %tmp6981, i64 1
- %tmp6983 = getelementptr inbounds float* %tmp6982, i64 1
- %tmp6984 = getelementptr inbounds float* %tmp6983, i64 1
- %tmp6985 = getelementptr inbounds float* %tmp6984, i64 1
- %tmp6986 = getelementptr inbounds float* %tmp6985, i64 1
- %tmp6987 = getelementptr inbounds float* %tmp6986, i64 1
- %tmp6988 = getelementptr inbounds float* %tmp6987, i64 1
- %tmp6989 = getelementptr inbounds float* %tmp6988, i64 1
- %tmp6990 = getelementptr inbounds float* %tmp6989, i64 1
- %tmp6991 = getelementptr inbounds float* %tmp6990, i64 1
- %tmp6992 = getelementptr inbounds float* %tmp6991, i64 1
- %tmp6993 = getelementptr inbounds float* %tmp6992, i64 1
- %tmp6994 = getelementptr inbounds float* %tmp6993, i64 1
- %tmp6995 = getelementptr inbounds float* %tmp6994, i64 1
- %tmp6996 = getelementptr inbounds float* %tmp6995, i64 1
- %tmp6997 = getelementptr inbounds float* %tmp6996, i64 1
- %tmp6998 = getelementptr inbounds float* %tmp6997, i64 1
- %tmp6999 = getelementptr inbounds float* %tmp6998, i64 1
- %tmp7000 = getelementptr inbounds float* %tmp6999, i64 1
- %tmp7001 = getelementptr inbounds float* %tmp7000, i64 1
- %tmp7002 = getelementptr inbounds float* %tmp7001, i64 1
- %tmp7003 = getelementptr inbounds float* %tmp7002, i64 1
- %tmp7004 = getelementptr inbounds float* %tmp7003, i64 1
- %tmp7005 = getelementptr inbounds float* %tmp7004, i64 1
- %tmp7006 = getelementptr inbounds float* %tmp7005, i64 1
- %tmp7007 = getelementptr inbounds float* %tmp7006, i64 1
- %tmp7008 = getelementptr inbounds float* %tmp7007, i64 1
- %tmp7009 = getelementptr inbounds float* %tmp7008, i64 1
- %tmp7010 = getelementptr inbounds float* %tmp7009, i64 1
- %tmp7011 = getelementptr inbounds float* %tmp7010, i64 1
- %tmp7012 = getelementptr inbounds float* %tmp7011, i64 1
- %tmp7013 = getelementptr inbounds float* %tmp7012, i64 1
- %tmp7014 = getelementptr inbounds float* %tmp7013, i64 1
- %tmp7015 = getelementptr inbounds float* %tmp7014, i64 1
- %tmp7016 = getelementptr inbounds float* %tmp7015, i64 1
- %tmp7017 = getelementptr inbounds float* %tmp7016, i64 1
- %tmp7018 = getelementptr inbounds float* %tmp7017, i64 1
- %tmp7019 = getelementptr inbounds float* %tmp7018, i64 1
- %tmp7020 = getelementptr inbounds float* %tmp7019, i64 1
- %tmp7021 = getelementptr inbounds float* %tmp7020, i64 1
- %tmp7022 = getelementptr inbounds float* %tmp7021, i64 1
- %tmp7023 = getelementptr inbounds float* %tmp7022, i64 1
- %tmp7024 = getelementptr inbounds float* %tmp7023, i64 1
- %tmp7025 = getelementptr inbounds float* %tmp7024, i64 1
- %tmp7026 = getelementptr inbounds float* %tmp7025, i64 1
- %tmp7027 = getelementptr inbounds float* %tmp7026, i64 1
- %tmp7028 = getelementptr inbounds float* %tmp7027, i64 1
- %tmp7029 = getelementptr inbounds float* %tmp7028, i64 1
- %tmp7030 = getelementptr inbounds float* %tmp7029, i64 1
- %tmp7031 = getelementptr inbounds float* %tmp7030, i64 1
- %tmp7032 = getelementptr inbounds float* %tmp7031, i64 1
- %tmp7033 = getelementptr inbounds float* %tmp7032, i64 1
- %tmp7034 = getelementptr inbounds float* %tmp7033, i64 1
- %tmp7035 = getelementptr inbounds float* %tmp7034, i64 1
- %tmp7036 = getelementptr inbounds float* %tmp7035, i64 1
- %tmp7037 = getelementptr inbounds float* %tmp7036, i64 1
- %tmp7038 = getelementptr inbounds float* %tmp7037, i64 1
- %tmp7039 = getelementptr inbounds float* %tmp7038, i64 1
- %tmp7040 = getelementptr inbounds float* %tmp7039, i64 1
- %tmp7041 = getelementptr inbounds float* %tmp7040, i64 1
- %tmp7042 = getelementptr inbounds float* %tmp7041, i64 1
- %tmp7043 = getelementptr inbounds float* %tmp7042, i64 1
- %tmp7044 = getelementptr inbounds float* %tmp7043, i64 1
- %tmp7045 = getelementptr inbounds float* %tmp7044, i64 1
- %tmp7046 = getelementptr inbounds float* %tmp7045, i64 1
- %tmp7047 = getelementptr inbounds float* %tmp7046, i64 1
- %tmp7048 = getelementptr inbounds float* %tmp7047, i64 1
- %tmp7049 = getelementptr inbounds float* %tmp7048, i64 1
- %tmp7050 = getelementptr inbounds float* %tmp7049, i64 1
- %tmp7051 = getelementptr inbounds float* %tmp7050, i64 1
- %tmp7052 = getelementptr inbounds float* %tmp7051, i64 1
- %tmp7053 = getelementptr inbounds float* %tmp7052, i64 1
- %tmp7054 = getelementptr inbounds float* %tmp7053, i64 1
- %tmp7055 = getelementptr inbounds float* %tmp7054, i64 1
- %tmp7056 = getelementptr inbounds float* %tmp7055, i64 1
- %tmp7057 = getelementptr inbounds float* %tmp7056, i64 1
- %tmp7058 = getelementptr inbounds float* %tmp7057, i64 1
- %tmp7059 = getelementptr inbounds float* %tmp7058, i64 1
- %tmp7060 = getelementptr inbounds float* %tmp7059, i64 1
- %tmp7061 = getelementptr inbounds float* %tmp7060, i64 1
- %tmp7062 = getelementptr inbounds float* %tmp7061, i64 1
- %tmp7063 = getelementptr inbounds float* %tmp7062, i64 1
- %tmp7064 = getelementptr inbounds float* %tmp7063, i64 1
- %tmp7065 = getelementptr inbounds float* %tmp7064, i64 1
- %tmp7066 = getelementptr inbounds float* %tmp7065, i64 1
- %tmp7067 = getelementptr inbounds float* %tmp7066, i64 1
- %tmp7068 = getelementptr inbounds float* %tmp7067, i64 1
- %tmp7069 = getelementptr inbounds float* %tmp7068, i64 1
- %tmp7070 = getelementptr inbounds float* %tmp7069, i64 1
- %tmp7071 = getelementptr inbounds float* %tmp7070, i64 1
- %tmp7072 = getelementptr inbounds float* %tmp7071, i64 1
- %tmp7073 = getelementptr inbounds float* %tmp7072, i64 1
- %tmp7074 = getelementptr inbounds float* %tmp7073, i64 1
- %tmp7075 = getelementptr inbounds float* %tmp7074, i64 1
- %tmp7076 = getelementptr inbounds float* %tmp7075, i64 1
- %tmp7077 = getelementptr inbounds float* %tmp7076, i64 1
- %tmp7078 = getelementptr inbounds float* %tmp7077, i64 1
- %tmp7079 = getelementptr inbounds float* %tmp7078, i64 1
- %tmp7080 = getelementptr inbounds float* %tmp7079, i64 1
- %tmp7081 = getelementptr inbounds float* %tmp7080, i64 1
- %tmp7082 = getelementptr inbounds float* %tmp7081, i64 1
- %tmp7083 = getelementptr inbounds float* %tmp7082, i64 1
- %tmp7084 = getelementptr inbounds float* %tmp7083, i64 1
- %tmp7085 = getelementptr inbounds float* %tmp7084, i64 1
- %tmp7086 = getelementptr inbounds float* %tmp7085, i64 1
- %tmp7087 = getelementptr inbounds float* %tmp7086, i64 1
- %tmp7088 = getelementptr inbounds float* %tmp7087, i64 1
- %tmp7089 = getelementptr inbounds float* %tmp7088, i64 1
- %tmp7090 = getelementptr inbounds float* %tmp7089, i64 1
- %tmp7091 = getelementptr inbounds float* %tmp7090, i64 1
- %tmp7092 = getelementptr inbounds float* %tmp7091, i64 1
- %tmp7093 = getelementptr inbounds float* %tmp7092, i64 1
- %tmp7094 = getelementptr inbounds float* %tmp7093, i64 1
- %tmp7095 = getelementptr inbounds float* %tmp7094, i64 1
- %tmp7096 = getelementptr inbounds float* %tmp7095, i64 1
- %tmp7097 = getelementptr inbounds float* %tmp7096, i64 1
- %tmp7098 = getelementptr inbounds float* %tmp7097, i64 1
- %tmp7099 = getelementptr inbounds float* %tmp7098, i64 1
- %tmp7100 = getelementptr inbounds float* %tmp7099, i64 1
- %tmp7101 = getelementptr inbounds float* %tmp7100, i64 1
- %tmp7102 = getelementptr inbounds float* %tmp7101, i64 1
- %tmp7103 = getelementptr inbounds float* %tmp7102, i64 1
- %tmp7104 = getelementptr inbounds float* %tmp7103, i64 1
- %tmp7105 = getelementptr inbounds float* %tmp7104, i64 1
- %tmp7106 = getelementptr inbounds float* %tmp7105, i64 1
- %tmp7107 = getelementptr inbounds float* %tmp7106, i64 1
- %tmp7108 = getelementptr inbounds float* %tmp7107, i64 1
- %tmp7109 = getelementptr inbounds float* %tmp7108, i64 1
- %tmp7110 = getelementptr inbounds float* %tmp7109, i64 1
- %tmp7111 = getelementptr inbounds float* %tmp7110, i64 1
- %tmp7112 = getelementptr inbounds float* %tmp7111, i64 1
- %tmp7113 = getelementptr inbounds float* %tmp7112, i64 1
- %tmp7114 = getelementptr inbounds float* %tmp7113, i64 1
- %tmp7115 = getelementptr inbounds float* %tmp7114, i64 1
- %tmp7116 = getelementptr inbounds float* %tmp7115, i64 1
- %tmp7117 = getelementptr inbounds float* %tmp7116, i64 1
- %tmp7118 = getelementptr inbounds float* %tmp7117, i64 1
- %tmp7119 = getelementptr inbounds float* %tmp7118, i64 1
- %tmp7120 = getelementptr inbounds float* %tmp7119, i64 1
- %tmp7121 = getelementptr inbounds float* %tmp7120, i64 1
- %tmp7122 = getelementptr inbounds float* %tmp7121, i64 1
- %tmp7123 = getelementptr inbounds float* %tmp7122, i64 1
- %tmp7124 = getelementptr inbounds float* %tmp7123, i64 1
- %tmp7125 = getelementptr inbounds float* %tmp7124, i64 1
- %tmp7126 = getelementptr inbounds float* %tmp7125, i64 1
- %tmp7127 = getelementptr inbounds float* %tmp7126, i64 1
- %tmp7128 = getelementptr inbounds float* %tmp7127, i64 1
- %tmp7129 = getelementptr inbounds float* %tmp7128, i64 1
- %tmp7130 = getelementptr inbounds float* %tmp7129, i64 1
- %tmp7131 = getelementptr inbounds float* %tmp7130, i64 1
- %tmp7132 = getelementptr inbounds float* %tmp7131, i64 1
- %tmp7133 = getelementptr inbounds float* %tmp7132, i64 1
- %tmp7134 = getelementptr inbounds float* %tmp7133, i64 1
- %tmp7135 = getelementptr inbounds float* %tmp7134, i64 1
- %tmp7136 = getelementptr inbounds float* %tmp7135, i64 1
- %tmp7137 = getelementptr inbounds float* %tmp7136, i64 1
- %tmp7138 = getelementptr inbounds float* %tmp7137, i64 1
- %tmp7139 = getelementptr inbounds float* %tmp7138, i64 1
- %tmp7140 = getelementptr inbounds float* %tmp7139, i64 1
- %tmp7141 = getelementptr inbounds float* %tmp7140, i64 1
- %tmp7142 = getelementptr inbounds float* %tmp7141, i64 1
- %tmp7143 = getelementptr inbounds float* %tmp7142, i64 1
- %tmp7144 = getelementptr inbounds float* %tmp7143, i64 1
- %tmp7145 = getelementptr inbounds float* %tmp7144, i64 1
- %tmp7146 = getelementptr inbounds float* %tmp7145, i64 1
- %tmp7147 = getelementptr inbounds float* %tmp7146, i64 1
- %tmp7148 = getelementptr inbounds float* %tmp7147, i64 1
- %tmp7149 = getelementptr inbounds float* %tmp7148, i64 1
- %tmp7150 = getelementptr inbounds float* %tmp7149, i64 1
- %tmp7151 = getelementptr inbounds float* %tmp7150, i64 1
- %tmp7152 = getelementptr inbounds float* %tmp7151, i64 1
- %tmp7153 = getelementptr inbounds float* %tmp7152, i64 1
- %tmp7154 = getelementptr inbounds float* %tmp7153, i64 1
- %tmp7155 = getelementptr inbounds float* %tmp7154, i64 1
- %tmp7156 = getelementptr inbounds float* %tmp7155, i64 1
- %tmp7157 = getelementptr inbounds float* %tmp7156, i64 1
- %tmp7158 = getelementptr inbounds float* %tmp7157, i64 1
- %tmp7159 = getelementptr inbounds float* %tmp7158, i64 1
- %tmp7160 = getelementptr inbounds float* %tmp7159, i64 1
- %tmp7161 = getelementptr inbounds float* %tmp7160, i64 1
- %tmp7162 = getelementptr inbounds float* %tmp7161, i64 1
- %tmp7163 = getelementptr inbounds float* %tmp7162, i64 1
- %tmp7164 = getelementptr inbounds float* %tmp7163, i64 1
- %tmp7165 = getelementptr inbounds float* %tmp7164, i64 1
- %tmp7166 = getelementptr inbounds float* %tmp7165, i64 1
- %tmp7167 = getelementptr inbounds float* %tmp7166, i64 1
- %tmp7168 = getelementptr inbounds float* %tmp7167, i64 1
- %tmp7169 = getelementptr inbounds float* %tmp7168, i64 1
- %tmp7170 = getelementptr inbounds float* %tmp7169, i64 1
- %tmp7171 = getelementptr inbounds float* %tmp7170, i64 1
- %tmp7172 = getelementptr inbounds float* %tmp7171, i64 1
- %tmp7173 = getelementptr inbounds float* %tmp7172, i64 1
- %tmp7174 = getelementptr inbounds float* %tmp7173, i64 1
- %tmp7175 = getelementptr inbounds float* %tmp7174, i64 1
- %tmp7176 = getelementptr inbounds float* %tmp7175, i64 1
- %tmp7177 = getelementptr inbounds float* %tmp7176, i64 1
- %tmp7178 = getelementptr inbounds float* %tmp7177, i64 1
- %tmp7179 = getelementptr inbounds float* %tmp7178, i64 1
- %tmp7180 = getelementptr inbounds float* %tmp7179, i64 1
- %tmp7181 = getelementptr inbounds float* %tmp7180, i64 1
- %tmp7182 = getelementptr inbounds float* %tmp7181, i64 1
- %tmp7183 = getelementptr inbounds float* %tmp7182, i64 1
- %tmp7184 = getelementptr inbounds float* %tmp7183, i64 1
- %tmp7185 = getelementptr inbounds float* %tmp7184, i64 1
- %tmp7186 = getelementptr inbounds float* %tmp7185, i64 1
- %tmp7187 = getelementptr inbounds float* %tmp7186, i64 1
- %tmp7188 = getelementptr inbounds float* %tmp7187, i64 1
- %tmp7189 = getelementptr inbounds float* %tmp7188, i64 1
- %tmp7190 = getelementptr inbounds float* %tmp7189, i64 1
- %tmp7191 = getelementptr inbounds float* %tmp7190, i64 1
- %tmp7192 = getelementptr inbounds float* %tmp7191, i64 1
- %tmp7193 = getelementptr inbounds float* %tmp7192, i64 1
- %tmp7194 = getelementptr inbounds float* %tmp7193, i64 1
- %tmp7195 = getelementptr inbounds float* %tmp7194, i64 1
- %tmp7196 = getelementptr inbounds float* %tmp7195, i64 1
- %tmp7197 = getelementptr inbounds float* %tmp7196, i64 1
- %tmp7198 = getelementptr inbounds float* %tmp7197, i64 1
- %tmp7199 = getelementptr inbounds float* %tmp7198, i64 1
- %tmp7200 = getelementptr inbounds float* %tmp7199, i64 1
- %tmp7201 = getelementptr inbounds float* %tmp7200, i64 1
- %tmp7202 = getelementptr inbounds float* %tmp7201, i64 1
- %tmp7203 = getelementptr inbounds float* %tmp7202, i64 1
- %tmp7204 = getelementptr inbounds float* %tmp7203, i64 1
- %tmp7205 = getelementptr inbounds float* %tmp7204, i64 1
- %tmp7206 = getelementptr inbounds float* %tmp7205, i64 1
- %tmp7207 = getelementptr inbounds float* %tmp7206, i64 1
- %tmp7208 = getelementptr inbounds float* %tmp7207, i64 1
- %tmp7209 = getelementptr inbounds float* %tmp7208, i64 1
- %tmp7210 = getelementptr inbounds float* %tmp7209, i64 1
- %tmp7211 = getelementptr inbounds float* %tmp7210, i64 1
- %tmp7212 = getelementptr inbounds float* %tmp7211, i64 1
- %tmp7213 = getelementptr inbounds float* %tmp7212, i64 1
- %tmp7214 = getelementptr inbounds float* %tmp7213, i64 1
- %tmp7215 = getelementptr inbounds float* %tmp7214, i64 1
- %tmp7216 = getelementptr inbounds float* %tmp7215, i64 1
- %tmp7217 = getelementptr inbounds float* %tmp7216, i64 1
- %tmp7218 = getelementptr inbounds float* %tmp7217, i64 1
- %tmp7219 = getelementptr inbounds float* %tmp7218, i64 1
- %tmp7220 = getelementptr inbounds float* %tmp7219, i64 1
- %tmp7221 = getelementptr inbounds float* %tmp7220, i64 1
- %tmp7222 = getelementptr inbounds float* %tmp7221, i64 1
- %tmp7223 = getelementptr inbounds float* %tmp7222, i64 1
- %tmp7224 = getelementptr inbounds float* %tmp7223, i64 1
- %tmp7225 = getelementptr inbounds float* %tmp7224, i64 1
- %tmp7226 = getelementptr inbounds float* %tmp7225, i64 1
- %tmp7227 = getelementptr inbounds float* %tmp7226, i64 1
- %tmp7228 = getelementptr inbounds float* %tmp7227, i64 1
- %tmp7229 = getelementptr inbounds float* %tmp7228, i64 1
- %tmp7230 = getelementptr inbounds float* %tmp7229, i64 1
- %tmp7231 = getelementptr inbounds float* %tmp7230, i64 1
- %tmp7232 = getelementptr inbounds float* %tmp7231, i64 1
- %tmp7233 = getelementptr inbounds float* %tmp7232, i64 1
- %tmp7234 = getelementptr inbounds float* %tmp7233, i64 1
- %tmp7235 = getelementptr inbounds float* %tmp7234, i64 1
- %tmp7236 = getelementptr inbounds float* %tmp7235, i64 1
- %tmp7237 = getelementptr inbounds float* %tmp7236, i64 1
- %tmp7238 = getelementptr inbounds float* %tmp7237, i64 1
- %tmp7239 = getelementptr inbounds float* %tmp7238, i64 1
- %tmp7240 = getelementptr inbounds float* %tmp7239, i64 1
- %tmp7241 = getelementptr inbounds float* %tmp7240, i64 1
- %tmp7242 = getelementptr inbounds float* %tmp7241, i64 1
- %tmp7243 = getelementptr inbounds float* %tmp7242, i64 1
- %tmp7244 = getelementptr inbounds float* %tmp7243, i64 1
- %tmp7245 = getelementptr inbounds float* %tmp7244, i64 1
- %tmp7246 = getelementptr inbounds float* %tmp7245, i64 1
- %tmp7247 = getelementptr inbounds float* %tmp7246, i64 1
- %tmp7248 = getelementptr inbounds float* %tmp7247, i64 1
- %tmp7249 = getelementptr inbounds float* %tmp7248, i64 1
- %tmp7250 = getelementptr inbounds float* %tmp7249, i64 1
- %tmp7251 = getelementptr inbounds float* %tmp7250, i64 1
- %tmp7252 = getelementptr inbounds float* %tmp7251, i64 1
- %tmp7253 = getelementptr inbounds float* %tmp7252, i64 1
- %tmp7254 = getelementptr inbounds float* %tmp7253, i64 1
- %tmp7255 = getelementptr inbounds float* %tmp7254, i64 1
- %tmp7256 = getelementptr inbounds float* %tmp7255, i64 1
- %tmp7257 = getelementptr inbounds float* %tmp7256, i64 1
- %tmp7258 = getelementptr inbounds float* %tmp7257, i64 1
- %tmp7259 = getelementptr inbounds float* %tmp7258, i64 1
- %tmp7260 = getelementptr inbounds float* %tmp7259, i64 1
- %tmp7261 = getelementptr inbounds float* %tmp7260, i64 1
- %tmp7262 = getelementptr inbounds float* %tmp7261, i64 1
- %tmp7263 = getelementptr inbounds float* %tmp7262, i64 1
- %tmp7264 = getelementptr inbounds float* %tmp7263, i64 1
- %tmp7265 = getelementptr inbounds float* %tmp7264, i64 1
- %tmp7266 = getelementptr inbounds float* %tmp7265, i64 1
- %tmp7267 = getelementptr inbounds float* %tmp7266, i64 1
- %tmp7268 = getelementptr inbounds float* %tmp7267, i64 1
- %tmp7269 = getelementptr inbounds float* %tmp7268, i64 1
- %tmp7270 = getelementptr inbounds float* %tmp7269, i64 1
- %tmp7271 = getelementptr inbounds float* %tmp7270, i64 1
- %tmp7272 = getelementptr inbounds float* %tmp7271, i64 1
- %tmp7273 = getelementptr inbounds float* %tmp7272, i64 1
- %tmp7274 = getelementptr inbounds float* %tmp7273, i64 1
- %tmp7275 = getelementptr inbounds float* %tmp7274, i64 1
- %tmp7276 = getelementptr inbounds float* %tmp7275, i64 1
- %tmp7277 = getelementptr inbounds float* %tmp7276, i64 1
- %tmp7278 = getelementptr inbounds float* %tmp7277, i64 1
- %tmp7279 = getelementptr inbounds float* %tmp7278, i64 1
- %tmp7280 = getelementptr inbounds float* %tmp7279, i64 1
- %tmp7281 = getelementptr inbounds float* %tmp7280, i64 1
- %tmp7282 = getelementptr inbounds float* %tmp7281, i64 1
- %tmp7283 = getelementptr inbounds float* %tmp7282, i64 1
- %tmp7284 = getelementptr inbounds float* %tmp7283, i64 1
- %tmp7285 = getelementptr inbounds float* %tmp7284, i64 1
- %tmp7286 = getelementptr inbounds float* %tmp7285, i64 1
- %tmp7287 = getelementptr inbounds float* %tmp7286, i64 1
- %tmp7288 = getelementptr inbounds float* %tmp7287, i64 1
- %tmp7289 = getelementptr inbounds float* %tmp7288, i64 1
- %tmp7290 = getelementptr inbounds float* %tmp7289, i64 1
- %tmp7291 = getelementptr inbounds float* %tmp7290, i64 1
- %tmp7292 = getelementptr inbounds float* %tmp7291, i64 1
- %tmp7293 = getelementptr inbounds float* %tmp7292, i64 1
- %tmp7294 = getelementptr inbounds float* %tmp7293, i64 1
- %tmp7295 = getelementptr inbounds float* %tmp7294, i64 1
- %tmp7296 = getelementptr inbounds float* %tmp7295, i64 1
- %tmp7297 = getelementptr inbounds float* %tmp7296, i64 1
- %tmp7298 = getelementptr inbounds float* %tmp7297, i64 1
- %tmp7299 = getelementptr inbounds float* %tmp7298, i64 1
- %tmp7300 = getelementptr inbounds float* %tmp7299, i64 1
- %tmp7301 = getelementptr inbounds float* %tmp7300, i64 1
- %tmp7302 = getelementptr inbounds float* %tmp7301, i64 1
- %tmp7303 = getelementptr inbounds float* %tmp7302, i64 1
- %tmp7304 = getelementptr inbounds float* %tmp7303, i64 1
- %tmp7305 = getelementptr inbounds float* %tmp7304, i64 1
- %tmp7306 = getelementptr inbounds float* %tmp7305, i64 1
- %tmp7307 = getelementptr inbounds float* %tmp7306, i64 1
- %tmp7308 = getelementptr inbounds float* %tmp7307, i64 1
- %tmp7309 = getelementptr inbounds float* %tmp7308, i64 1
- %tmp7310 = getelementptr inbounds float* %tmp7309, i64 1
- %tmp7311 = getelementptr inbounds float* %tmp7310, i64 1
- %tmp7312 = getelementptr inbounds float* %tmp7311, i64 1
- %tmp7313 = getelementptr inbounds float* %tmp7312, i64 1
- %tmp7314 = getelementptr inbounds float* %tmp7313, i64 1
- %tmp7315 = getelementptr inbounds float* %tmp7314, i64 1
- %tmp7316 = getelementptr inbounds float* %tmp7315, i64 1
- %tmp7317 = getelementptr inbounds float* %tmp7316, i64 1
- %tmp7318 = getelementptr inbounds float* %tmp7317, i64 1
- %tmp7319 = getelementptr inbounds float* %tmp7318, i64 1
- %tmp7320 = getelementptr inbounds float* %tmp7319, i64 1
- %tmp7321 = getelementptr inbounds float* %tmp7320, i64 1
- %tmp7322 = getelementptr inbounds float* %tmp7321, i64 1
- %tmp7323 = getelementptr inbounds float* %tmp7322, i64 1
- %tmp7324 = getelementptr inbounds float* %tmp7323, i64 1
- %tmp7325 = getelementptr inbounds float* %tmp7324, i64 1
- %tmp7326 = getelementptr inbounds float* %tmp7325, i64 1
- %tmp7327 = getelementptr inbounds float* %tmp7326, i64 1
- %tmp7328 = getelementptr inbounds float* %tmp7327, i64 1
- %tmp7329 = getelementptr inbounds float* %tmp7328, i64 1
- %tmp7330 = getelementptr inbounds float* %tmp7329, i64 1
- %tmp7331 = getelementptr inbounds float* %tmp7330, i64 1
- %tmp7332 = getelementptr inbounds float* %tmp7331, i64 1
- %tmp7333 = getelementptr inbounds float* %tmp7332, i64 1
- %tmp7334 = getelementptr inbounds float* %tmp7333, i64 1
- %tmp7335 = getelementptr inbounds float* %tmp7334, i64 1
- %tmp7336 = getelementptr inbounds float* %tmp7335, i64 1
- %tmp7337 = getelementptr inbounds float* %tmp7336, i64 1
- %tmp7338 = getelementptr inbounds float* %tmp7337, i64 1
- %tmp7339 = getelementptr inbounds float* %tmp7338, i64 1
- %tmp7340 = getelementptr inbounds float* %tmp7339, i64 1
- %tmp7341 = getelementptr inbounds float* %tmp7340, i64 1
- %tmp7342 = getelementptr inbounds float* %tmp7341, i64 1
- %tmp7343 = getelementptr inbounds float* %tmp7342, i64 1
- %tmp7344 = getelementptr inbounds float* %tmp7343, i64 1
- %tmp7345 = getelementptr inbounds float* %tmp7344, i64 1
- %tmp7346 = getelementptr inbounds float* %tmp7345, i64 1
- %tmp7347 = getelementptr inbounds float* %tmp7346, i64 1
- %tmp7348 = getelementptr inbounds float* %tmp7347, i64 1
- %tmp7349 = getelementptr inbounds float* %tmp7348, i64 1
- %tmp7350 = getelementptr inbounds float* %tmp7349, i64 1
- %tmp7351 = getelementptr inbounds float* %tmp7350, i64 1
- %tmp7352 = getelementptr inbounds float* %tmp7351, i64 1
- %tmp7353 = getelementptr inbounds float* %tmp7352, i64 1
- %tmp7354 = getelementptr inbounds float* %tmp7353, i64 1
- %tmp7355 = getelementptr inbounds float* %tmp7354, i64 1
- %tmp7356 = getelementptr inbounds float* %tmp7355, i64 1
- %tmp7357 = getelementptr inbounds float* %tmp7356, i64 1
- %tmp7358 = getelementptr inbounds float* %tmp7357, i64 1
- %tmp7359 = getelementptr inbounds float* %tmp7358, i64 1
- %tmp7360 = getelementptr inbounds float* %tmp7359, i64 1
- %tmp7361 = getelementptr inbounds float* %tmp7360, i64 1
- %tmp7362 = getelementptr inbounds float* %tmp7361, i64 1
- %tmp7363 = getelementptr inbounds float* %tmp7362, i64 1
- %tmp7364 = getelementptr inbounds float* %tmp7363, i64 1
- %tmp7365 = getelementptr inbounds float* %tmp7364, i64 1
- %tmp7366 = getelementptr inbounds float* %tmp7365, i64 1
- %tmp7367 = getelementptr inbounds float* %tmp7366, i64 1
- %tmp7368 = getelementptr inbounds float* %tmp7367, i64 1
- %tmp7369 = getelementptr inbounds float* %tmp7368, i64 1
- %tmp7370 = getelementptr inbounds float* %tmp7369, i64 1
- %tmp7371 = getelementptr inbounds float* %tmp7370, i64 1
- %tmp7372 = getelementptr inbounds float* %tmp7371, i64 1
- %tmp7373 = getelementptr inbounds float* %tmp7372, i64 1
- %tmp7374 = getelementptr inbounds float* %tmp7373, i64 1
- %tmp7375 = getelementptr inbounds float* %tmp7374, i64 1
- %tmp7376 = getelementptr inbounds float* %tmp7375, i64 1
- %tmp7377 = getelementptr inbounds float* %tmp7376, i64 1
- %tmp7378 = getelementptr inbounds float* %tmp7377, i64 1
- %tmp7379 = getelementptr inbounds float* %tmp7378, i64 1
- %tmp7380 = getelementptr inbounds float* %tmp7379, i64 1
- %tmp7381 = getelementptr inbounds float* %tmp7380, i64 1
- %tmp7382 = getelementptr inbounds float* %tmp7381, i64 1
- %tmp7383 = getelementptr inbounds float* %tmp7382, i64 1
- %tmp7384 = getelementptr inbounds float* %tmp7383, i64 1
- %tmp7385 = getelementptr inbounds float* %tmp7384, i64 1
- %tmp7386 = getelementptr inbounds float* %tmp7385, i64 1
- %tmp7387 = getelementptr inbounds float* %tmp7386, i64 1
- %tmp7388 = getelementptr inbounds float* %tmp7387, i64 1
- %tmp7389 = getelementptr inbounds float* %tmp7388, i64 1
- %tmp7390 = getelementptr inbounds float* %tmp7389, i64 1
- %tmp7391 = getelementptr inbounds float* %tmp7390, i64 1
- %tmp7392 = getelementptr inbounds float* %tmp7391, i64 1
- %tmp7393 = getelementptr inbounds float* %tmp7392, i64 1
- %tmp7394 = getelementptr inbounds float* %tmp7393, i64 1
- %tmp7395 = getelementptr inbounds float* %tmp7394, i64 1
- %tmp7396 = getelementptr inbounds float* %tmp7395, i64 1
- %tmp7397 = getelementptr inbounds float* %tmp7396, i64 1
- %tmp7398 = getelementptr inbounds float* %tmp7397, i64 1
- %tmp7399 = getelementptr inbounds float* %tmp7398, i64 1
- %tmp7400 = getelementptr inbounds float* %tmp7399, i64 1
- %tmp7401 = getelementptr inbounds float* %tmp7400, i64 1
- %tmp7402 = getelementptr inbounds float* %tmp7401, i64 1
- %tmp7403 = getelementptr inbounds float* %tmp7402, i64 1
- %tmp7404 = getelementptr inbounds float* %tmp7403, i64 1
- %tmp7405 = getelementptr inbounds float* %tmp7404, i64 1
- %tmp7406 = getelementptr inbounds float* %tmp7405, i64 1
- %tmp7407 = getelementptr inbounds float* %tmp7406, i64 1
- %tmp7408 = getelementptr inbounds float* %tmp7407, i64 1
- %tmp7409 = getelementptr inbounds float* %tmp7408, i64 1
- %tmp7410 = getelementptr inbounds float* %tmp7409, i64 1
- %tmp7411 = getelementptr inbounds float* %tmp7410, i64 1
- %tmp7412 = getelementptr inbounds float* %tmp7411, i64 1
- %tmp7413 = getelementptr inbounds float* %tmp7412, i64 1
- %tmp7414 = getelementptr inbounds float* %tmp7413, i64 1
- %tmp7415 = getelementptr inbounds float* %tmp7414, i64 1
- %tmp7416 = getelementptr inbounds float* %tmp7415, i64 1
- %tmp7417 = getelementptr inbounds float* %tmp7416, i64 1
- %tmp7418 = getelementptr inbounds float* %tmp7417, i64 1
- %tmp7419 = getelementptr inbounds float* %tmp7418, i64 1
- %tmp7420 = getelementptr inbounds float* %tmp7419, i64 1
- %tmp7421 = getelementptr inbounds float* %tmp7420, i64 1
- %tmp7422 = getelementptr inbounds float* %tmp7421, i64 1
- %tmp7423 = getelementptr inbounds float* %tmp7422, i64 1
- %tmp7424 = getelementptr inbounds float* %tmp7423, i64 1
- %tmp7425 = getelementptr inbounds float* %tmp7424, i64 1
- %tmp7426 = getelementptr inbounds float* %tmp7425, i64 1
- %tmp7427 = getelementptr inbounds float* %tmp7426, i64 1
- %tmp7428 = getelementptr inbounds float* %tmp7427, i64 1
- %tmp7429 = getelementptr inbounds float* %tmp7428, i64 1
- %tmp7430 = getelementptr inbounds float* %tmp7429, i64 1
- %tmp7431 = getelementptr inbounds float* %tmp7430, i64 1
- %tmp7432 = getelementptr inbounds float* %tmp7431, i64 1
- %tmp7433 = getelementptr inbounds float* %tmp7432, i64 1
- %tmp7434 = getelementptr inbounds float* %tmp7433, i64 1
- %tmp7435 = getelementptr inbounds float* %tmp7434, i64 1
- %tmp7436 = getelementptr inbounds float* %tmp7435, i64 1
- %tmp7437 = getelementptr inbounds float* %tmp7436, i64 1
- %tmp7438 = getelementptr inbounds float* %tmp7437, i64 1
- %tmp7439 = getelementptr inbounds float* %tmp7438, i64 1
- %tmp7440 = getelementptr inbounds float* %tmp7439, i64 1
- %tmp7441 = getelementptr inbounds float* %tmp7440, i64 1
- %tmp7442 = getelementptr inbounds float* %tmp7441, i64 1
- %tmp7443 = getelementptr inbounds float* %tmp7442, i64 1
- %tmp7444 = getelementptr inbounds float* %tmp7443, i64 1
- %tmp7445 = getelementptr inbounds float* %tmp7444, i64 1
- %tmp7446 = getelementptr inbounds float* %tmp7445, i64 1
- %tmp7447 = getelementptr inbounds float* %tmp7446, i64 1
- %tmp7448 = getelementptr inbounds float* %tmp7447, i64 1
- %tmp7449 = getelementptr inbounds float* %tmp7448, i64 1
- %tmp7450 = getelementptr inbounds float* %tmp7449, i64 1
- %tmp7451 = getelementptr inbounds float* %tmp7450, i64 1
- %tmp7452 = getelementptr inbounds float* %tmp7451, i64 1
- %tmp7453 = getelementptr inbounds float* %tmp7452, i64 1
- %tmp7454 = getelementptr inbounds float* %tmp7453, i64 1
- %tmp7455 = getelementptr inbounds float* %tmp7454, i64 1
- %tmp7456 = getelementptr inbounds float* %tmp7455, i64 1
- %tmp7457 = getelementptr inbounds float* %tmp7456, i64 1
- %tmp7458 = getelementptr inbounds float* %tmp7457, i64 1
- %tmp7459 = getelementptr inbounds float* %tmp7458, i64 1
- %tmp7460 = getelementptr inbounds float* %tmp7459, i64 1
- %tmp7461 = getelementptr inbounds float* %tmp7460, i64 1
- %tmp7462 = getelementptr inbounds float* %tmp7461, i64 1
- %tmp7463 = getelementptr inbounds float* %tmp7462, i64 1
- %tmp7464 = getelementptr inbounds float* %tmp7463, i64 1
- %tmp7465 = getelementptr inbounds float* %tmp7464, i64 1
- %tmp7466 = getelementptr inbounds float* %tmp7465, i64 1
- %tmp7467 = getelementptr inbounds float* %tmp7466, i64 1
- %tmp7468 = getelementptr inbounds float* %tmp7467, i64 1
- %tmp7469 = getelementptr inbounds float* %tmp7468, i64 1
- %tmp7470 = getelementptr inbounds float* %tmp7469, i64 1
- %tmp7471 = getelementptr inbounds float* %tmp7470, i64 1
- %tmp7472 = getelementptr inbounds float* %tmp7471, i64 1
- %tmp7473 = getelementptr inbounds float* %tmp7472, i64 1
- %tmp7474 = getelementptr inbounds float* %tmp7473, i64 1
- %tmp7475 = getelementptr inbounds float* %tmp7474, i64 1
- %tmp7476 = getelementptr inbounds float* %tmp7475, i64 1
- %tmp7477 = getelementptr inbounds float* %tmp7476, i64 1
- %tmp7478 = getelementptr inbounds float* %tmp7477, i64 1
- %tmp7479 = getelementptr inbounds float* %tmp7478, i64 1
- %tmp7480 = getelementptr inbounds float* %tmp7479, i64 1
- %tmp7481 = getelementptr inbounds float* %tmp7480, i64 1
- %tmp7482 = getelementptr inbounds float* %tmp7481, i64 1
- %tmp7483 = getelementptr inbounds float* %tmp7482, i64 1
- %tmp7484 = getelementptr inbounds float* %tmp7483, i64 1
- %tmp7485 = getelementptr inbounds float* %tmp7484, i64 1
- %tmp7486 = getelementptr inbounds float* %tmp7485, i64 1
- %tmp7487 = getelementptr inbounds float* %tmp7486, i64 1
- %tmp7488 = getelementptr inbounds float* %tmp7487, i64 1
- %tmp7489 = getelementptr inbounds float* %tmp7488, i64 1
- %tmp7490 = getelementptr inbounds float* %tmp7489, i64 1
- %tmp7491 = getelementptr inbounds float* %tmp7490, i64 1
- %tmp7492 = getelementptr inbounds float* %tmp7491, i64 1
- %tmp7493 = getelementptr inbounds float* %tmp7492, i64 1
- %tmp7494 = getelementptr inbounds float* %tmp7493, i64 1
- %tmp7495 = getelementptr inbounds float* %tmp7494, i64 1
- %tmp7496 = getelementptr inbounds float* %tmp7495, i64 1
- %tmp7497 = getelementptr inbounds float* %tmp7496, i64 1
- %tmp7498 = getelementptr inbounds float* %tmp7497, i64 1
- %tmp7499 = getelementptr inbounds float* %tmp7498, i64 1
- %tmp7500 = getelementptr inbounds float* %tmp7499, i64 1
- %tmp7501 = getelementptr inbounds float* %tmp7500, i64 1
- %tmp7502 = getelementptr inbounds float* %tmp7501, i64 1
- %tmp7503 = getelementptr inbounds float* %tmp7502, i64 1
- %tmp7504 = getelementptr inbounds float* %tmp7503, i64 1
- %tmp7505 = getelementptr inbounds float* %tmp7504, i64 1
- %tmp7506 = getelementptr inbounds float* %tmp7505, i64 1
- %tmp7507 = getelementptr inbounds float* %tmp7506, i64 1
- %tmp7508 = getelementptr inbounds float* %tmp7507, i64 1
- %tmp7509 = getelementptr inbounds float* %tmp7508, i64 1
- %tmp7510 = getelementptr inbounds float* %tmp7509, i64 1
- %tmp7511 = getelementptr inbounds float* %tmp7510, i64 1
- %tmp7512 = getelementptr inbounds float* %tmp7511, i64 1
- %tmp7513 = getelementptr inbounds float* %tmp7512, i64 1
- %tmp7514 = getelementptr inbounds float* %tmp7513, i64 1
- %tmp7515 = getelementptr inbounds float* %tmp7514, i64 1
- %tmp7516 = getelementptr inbounds float* %tmp7515, i64 1
- %tmp7517 = getelementptr inbounds float* %tmp7516, i64 1
- %tmp7518 = getelementptr inbounds float* %tmp7517, i64 1
- %tmp7519 = getelementptr inbounds float* %tmp7518, i64 1
- %tmp7520 = getelementptr inbounds float* %tmp7519, i64 1
- %tmp7521 = getelementptr inbounds float* %tmp7520, i64 1
- %tmp7522 = getelementptr inbounds float* %tmp7521, i64 1
- %tmp7523 = getelementptr inbounds float* %tmp7522, i64 1
- %tmp7524 = getelementptr inbounds float* %tmp7523, i64 1
- %tmp7525 = getelementptr inbounds float* %tmp7524, i64 1
- %tmp7526 = getelementptr inbounds float* %tmp7525, i64 1
- %tmp7527 = getelementptr inbounds float* %tmp7526, i64 1
- %tmp7528 = getelementptr inbounds float* %tmp7527, i64 1
- %tmp7529 = getelementptr inbounds float* %tmp7528, i64 1
- %tmp7530 = getelementptr inbounds float* %tmp7529, i64 1
- %tmp7531 = getelementptr inbounds float* %tmp7530, i64 1
- %tmp7532 = getelementptr inbounds float* %tmp7531, i64 1
- %tmp7533 = getelementptr inbounds float* %tmp7532, i64 1
- %tmp7534 = getelementptr inbounds float* %tmp7533, i64 1
- %tmp7535 = getelementptr inbounds float* %tmp7534, i64 1
- %tmp7536 = getelementptr inbounds float* %tmp7535, i64 1
- %tmp7537 = getelementptr inbounds float* %tmp7536, i64 1
- %tmp7538 = getelementptr inbounds float* %tmp7537, i64 1
- %tmp7539 = getelementptr inbounds float* %tmp7538, i64 1
- %tmp7540 = getelementptr inbounds float* %tmp7539, i64 1
- %tmp7541 = getelementptr inbounds float* %tmp7540, i64 1
- %tmp7542 = getelementptr inbounds float* %tmp7541, i64 1
- %tmp7543 = getelementptr inbounds float* %tmp7542, i64 1
- %tmp7544 = getelementptr inbounds float* %tmp7543, i64 1
- %tmp7545 = getelementptr inbounds float* %tmp7544, i64 1
- %tmp7546 = getelementptr inbounds float* %tmp7545, i64 1
- %tmp7547 = getelementptr inbounds float* %tmp7546, i64 1
- %tmp7548 = getelementptr inbounds float* %tmp7547, i64 1
- %tmp7549 = getelementptr inbounds float* %tmp7548, i64 1
- %tmp7550 = getelementptr inbounds float* %tmp7549, i64 1
- %tmp7551 = getelementptr inbounds float* %tmp7550, i64 1
- %tmp7552 = getelementptr inbounds float* %tmp7551, i64 1
- %tmp7553 = getelementptr inbounds float* %tmp7552, i64 1
- %tmp7554 = getelementptr inbounds float* %tmp7553, i64 1
- %tmp7555 = getelementptr inbounds float* %tmp7554, i64 1
- %tmp7556 = getelementptr inbounds float* %tmp7555, i64 1
- %tmp7557 = getelementptr inbounds float* %tmp7556, i64 1
- %tmp7558 = getelementptr inbounds float* %tmp7557, i64 1
- %tmp7559 = getelementptr inbounds float* %tmp7558, i64 1
- %tmp7560 = getelementptr inbounds float* %tmp7559, i64 1
- %tmp7561 = getelementptr inbounds float* %tmp7560, i64 1
- %tmp7562 = getelementptr inbounds float* %tmp7561, i64 1
- %tmp7563 = getelementptr inbounds float* %tmp7562, i64 1
- %tmp7564 = getelementptr inbounds float* %tmp7563, i64 1
- %tmp7565 = getelementptr inbounds float* %tmp7564, i64 1
- %tmp7566 = getelementptr inbounds float* %tmp7565, i64 1
- %tmp7567 = getelementptr inbounds float* %tmp7566, i64 1
- %tmp7568 = getelementptr inbounds float* %tmp7567, i64 1
- %tmp7569 = getelementptr inbounds float* %tmp7568, i64 1
- %tmp7570 = getelementptr inbounds float* %tmp7569, i64 1
- %tmp7571 = getelementptr inbounds float* %tmp7570, i64 1
- %tmp7572 = getelementptr inbounds float* %tmp7571, i64 1
- %tmp7573 = getelementptr inbounds float* %tmp7572, i64 1
- %tmp7574 = getelementptr inbounds float* %tmp7573, i64 1
- %tmp7575 = getelementptr inbounds float* %tmp7574, i64 1
- %tmp7576 = getelementptr inbounds float* %tmp7575, i64 1
- %tmp7577 = getelementptr inbounds float* %tmp7576, i64 1
- %tmp7578 = getelementptr inbounds float* %tmp7577, i64 1
- %tmp7579 = getelementptr inbounds float* %tmp7578, i64 1
- %tmp7580 = getelementptr inbounds float* %tmp7579, i64 1
- %tmp7581 = getelementptr inbounds float* %tmp7580, i64 1
- %tmp7582 = getelementptr inbounds float* %tmp7581, i64 1
- %tmp7583 = getelementptr inbounds float* %tmp7582, i64 1
- %tmp7584 = getelementptr inbounds float* %tmp7583, i64 1
- %tmp7585 = getelementptr inbounds float* %tmp7584, i64 1
- %tmp7586 = getelementptr inbounds float* %tmp7585, i64 1
- %tmp7587 = getelementptr inbounds float* %tmp7586, i64 1
- %tmp7588 = getelementptr inbounds float* %tmp7587, i64 1
- %tmp7589 = getelementptr inbounds float* %tmp7588, i64 1
- %tmp7590 = getelementptr inbounds float* %tmp7589, i64 1
- %tmp7591 = getelementptr inbounds float* %tmp7590, i64 1
- %tmp7592 = getelementptr inbounds float* %tmp7591, i64 1
- %tmp7593 = getelementptr inbounds float* %tmp7592, i64 1
- %tmp7594 = getelementptr inbounds float* %tmp7593, i64 1
- %tmp7595 = getelementptr inbounds float* %tmp7594, i64 1
- %tmp7596 = getelementptr inbounds float* %tmp7595, i64 1
- %tmp7597 = getelementptr inbounds float* %tmp7596, i64 1
- %tmp7598 = getelementptr inbounds float* %tmp7597, i64 1
- %tmp7599 = getelementptr inbounds float* %tmp7598, i64 1
- %tmp7600 = getelementptr inbounds float* %tmp7599, i64 1
- %tmp7601 = getelementptr inbounds float* %tmp7600, i64 1
- %tmp7602 = getelementptr inbounds float* %tmp7601, i64 1
- %tmp7603 = getelementptr inbounds float* %tmp7602, i64 1
- %tmp7604 = getelementptr inbounds float* %tmp7603, i64 1
- %tmp7605 = getelementptr inbounds float* %tmp7604, i64 1
- %tmp7606 = getelementptr inbounds float* %tmp7605, i64 1
- %tmp7607 = getelementptr inbounds float* %tmp7606, i64 1
- %tmp7608 = getelementptr inbounds float* %tmp7607, i64 1
- %tmp7609 = getelementptr inbounds float* %tmp7608, i64 1
- %tmp7610 = getelementptr inbounds float* %tmp7609, i64 1
- %tmp7611 = getelementptr inbounds float* %tmp7610, i64 1
- %tmp7612 = getelementptr inbounds float* %tmp7611, i64 1
- %tmp7613 = getelementptr inbounds float* %tmp7612, i64 1
- %tmp7614 = getelementptr inbounds float* %tmp7613, i64 1
- %tmp7615 = getelementptr inbounds float* %tmp7614, i64 1
- %tmp7616 = getelementptr inbounds float* %tmp7615, i64 1
- %tmp7617 = getelementptr inbounds float* %tmp7616, i64 1
- %tmp7618 = getelementptr inbounds float* %tmp7617, i64 1
- %tmp7619 = getelementptr inbounds float* %tmp7618, i64 1
- %tmp7620 = getelementptr inbounds float* %tmp7619, i64 1
- %tmp7621 = getelementptr inbounds float* %tmp7620, i64 1
- %tmp7622 = getelementptr inbounds float* %tmp7621, i64 1
- %tmp7623 = getelementptr inbounds float* %tmp7622, i64 1
- %tmp7624 = getelementptr inbounds float* %tmp7623, i64 1
- %tmp7625 = getelementptr inbounds float* %tmp7624, i64 1
- %tmp7626 = getelementptr inbounds float* %tmp7625, i64 1
- %tmp7627 = getelementptr inbounds float* %tmp7626, i64 1
- %tmp7628 = getelementptr inbounds float* %tmp7627, i64 1
- %tmp7629 = getelementptr inbounds float* %tmp7628, i64 1
- %tmp7630 = getelementptr inbounds float* %tmp7629, i64 1
- %tmp7631 = getelementptr inbounds float* %tmp7630, i64 1
- %tmp7632 = getelementptr inbounds float* %tmp7631, i64 1
- %tmp7633 = getelementptr inbounds float* %tmp7632, i64 1
- %tmp7634 = getelementptr inbounds float* %tmp7633, i64 1
- %tmp7635 = getelementptr inbounds float* %tmp7634, i64 1
- %tmp7636 = getelementptr inbounds float* %tmp7635, i64 1
- %tmp7637 = getelementptr inbounds float* %tmp7636, i64 1
- %tmp7638 = getelementptr inbounds float* %tmp7637, i64 1
- %tmp7639 = getelementptr inbounds float* %tmp7638, i64 1
- %tmp7640 = getelementptr inbounds float* %tmp7639, i64 1
- %tmp7641 = getelementptr inbounds float* %tmp7640, i64 1
- %tmp7642 = getelementptr inbounds float* %tmp7641, i64 1
- %tmp7643 = getelementptr inbounds float* %tmp7642, i64 1
- %tmp7644 = getelementptr inbounds float* %tmp7643, i64 1
- %tmp7645 = getelementptr inbounds float* %tmp7644, i64 1
- %tmp7646 = getelementptr inbounds float* %tmp7645, i64 1
- %tmp7647 = getelementptr inbounds float* %tmp7646, i64 1
- %tmp7648 = getelementptr inbounds float* %tmp7647, i64 1
- %tmp7649 = getelementptr inbounds float* %tmp7648, i64 1
- %tmp7650 = getelementptr inbounds float* %tmp7649, i64 1
- %tmp7651 = getelementptr inbounds float* %tmp7650, i64 1
- %tmp7652 = getelementptr inbounds float* %tmp7651, i64 1
- %tmp7653 = getelementptr inbounds float* %tmp7652, i64 1
- %tmp7654 = getelementptr inbounds float* %tmp7653, i64 1
- %tmp7655 = getelementptr inbounds float* %tmp7654, i64 1
- %tmp7656 = getelementptr inbounds float* %tmp7655, i64 1
- %tmp7657 = getelementptr inbounds float* %tmp7656, i64 1
- %tmp7658 = getelementptr inbounds float* %tmp7657, i64 1
- %tmp7659 = getelementptr inbounds float* %tmp7658, i64 1
- %tmp7660 = getelementptr inbounds float* %tmp7659, i64 1
- %tmp7661 = getelementptr inbounds float* %tmp7660, i64 1
- %tmp7662 = getelementptr inbounds float* %tmp7661, i64 1
- %tmp7663 = getelementptr inbounds float* %tmp7662, i64 1
- %tmp7664 = getelementptr inbounds float* %tmp7663, i64 1
- %tmp7665 = getelementptr inbounds float* %tmp7664, i64 1
- %tmp7666 = getelementptr inbounds float* %tmp7665, i64 1
- %tmp7667 = getelementptr inbounds float* %tmp7666, i64 1
- %tmp7668 = getelementptr inbounds float* %tmp7667, i64 1
- %tmp7669 = getelementptr inbounds float* %tmp7668, i64 1
- %tmp7670 = getelementptr inbounds float* %tmp7669, i64 1
- %tmp7671 = getelementptr inbounds float* %tmp7670, i64 1
- %tmp7672 = getelementptr inbounds float* %tmp7671, i64 1
- %tmp7673 = getelementptr inbounds float* %tmp7672, i64 1
- %tmp7674 = getelementptr inbounds float* %tmp7673, i64 1
- %tmp7675 = getelementptr inbounds float* %tmp7674, i64 1
- %tmp7676 = getelementptr inbounds float* %tmp7675, i64 1
- %tmp7677 = getelementptr inbounds float* %tmp7676, i64 1
- %tmp7678 = getelementptr inbounds float* %tmp7677, i64 1
- %tmp7679 = getelementptr inbounds float* %tmp7678, i64 1
- %tmp7680 = getelementptr inbounds float* %tmp7679, i64 1
- %tmp7681 = getelementptr inbounds float* %tmp7680, i64 1
- %tmp7682 = getelementptr inbounds float* %tmp7681, i64 1
- %tmp7683 = getelementptr inbounds float* %tmp7682, i64 1
- %tmp7684 = getelementptr inbounds float* %tmp7683, i64 1
- %tmp7685 = getelementptr inbounds float* %tmp7684, i64 1
- %tmp7686 = getelementptr inbounds float* %tmp7685, i64 1
- %tmp7687 = getelementptr inbounds float* %tmp7686, i64 1
- %tmp7688 = getelementptr inbounds float* %tmp7687, i64 1
- %tmp7689 = getelementptr inbounds float* %tmp7688, i64 1
- %tmp7690 = getelementptr inbounds float* %tmp7689, i64 1
- %tmp7691 = getelementptr inbounds float* %tmp7690, i64 1
- %tmp7692 = getelementptr inbounds float* %tmp7691, i64 1
- %tmp7693 = getelementptr inbounds float* %tmp7692, i64 1
- %tmp7694 = getelementptr inbounds float* %tmp7693, i64 1
- %tmp7695 = getelementptr inbounds float* %tmp7694, i64 1
- %tmp7696 = getelementptr inbounds float* %tmp7695, i64 1
- %tmp7697 = getelementptr inbounds float* %tmp7696, i64 1
- %tmp7698 = getelementptr inbounds float* %tmp7697, i64 1
- %tmp7699 = getelementptr inbounds float* %tmp7698, i64 1
- %tmp7700 = getelementptr inbounds float* %tmp7699, i64 1
- %tmp7701 = getelementptr inbounds float* %tmp7700, i64 1
- %tmp7702 = getelementptr inbounds float* %tmp7701, i64 1
- %tmp7703 = getelementptr inbounds float* %tmp7702, i64 1
- %tmp7704 = getelementptr inbounds float* %tmp7703, i64 1
- %tmp7705 = getelementptr inbounds float* %tmp7704, i64 1
- %tmp7706 = getelementptr inbounds float* %tmp7705, i64 1
- %tmp7707 = getelementptr inbounds float* %tmp7706, i64 1
- %tmp7708 = getelementptr inbounds float* %tmp7707, i64 1
- %tmp7709 = getelementptr inbounds float* %tmp7708, i64 1
- %tmp7710 = getelementptr inbounds float* %tmp7709, i64 1
- %tmp7711 = getelementptr inbounds float* %tmp7710, i64 1
- %tmp7712 = getelementptr inbounds float* %tmp7711, i64 1
- %tmp7713 = getelementptr inbounds float* %tmp7712, i64 1
- %tmp7714 = getelementptr inbounds float* %tmp7713, i64 1
- %tmp7715 = getelementptr inbounds float* %tmp7714, i64 1
- %tmp7716 = getelementptr inbounds float* %tmp7715, i64 1
- %tmp7717 = getelementptr inbounds float* %tmp7716, i64 1
- %tmp7718 = getelementptr inbounds float* %tmp7717, i64 1
- %tmp7719 = getelementptr inbounds float* %tmp7718, i64 1
- %tmp7720 = getelementptr inbounds float* %tmp7719, i64 1
- %tmp7721 = getelementptr inbounds float* %tmp7720, i64 1
- %tmp7722 = getelementptr inbounds float* %tmp7721, i64 1
- %tmp7723 = getelementptr inbounds float* %tmp7722, i64 1
- %tmp7724 = getelementptr inbounds float* %tmp7723, i64 1
- %tmp7725 = getelementptr inbounds float* %tmp7724, i64 1
- %tmp7726 = getelementptr inbounds float* %tmp7725, i64 1
- %tmp7727 = getelementptr inbounds float* %tmp7726, i64 1
- %tmp7728 = getelementptr inbounds float* %tmp7727, i64 1
- %tmp7729 = getelementptr inbounds float* %tmp7728, i64 1
- %tmp7730 = getelementptr inbounds float* %tmp7729, i64 1
- %tmp7731 = getelementptr inbounds float* %tmp7730, i64 1
- %tmp7732 = getelementptr inbounds float* %tmp7731, i64 1
- %tmp7733 = getelementptr inbounds float* %tmp7732, i64 1
- %tmp7734 = getelementptr inbounds float* %tmp7733, i64 1
- %tmp7735 = getelementptr inbounds float* %tmp7734, i64 1
- %tmp7736 = getelementptr inbounds float* %tmp7735, i64 1
- %tmp7737 = getelementptr inbounds float* %tmp7736, i64 1
- %tmp7738 = getelementptr inbounds float* %tmp7737, i64 1
- %tmp7739 = getelementptr inbounds float* %tmp7738, i64 1
- %tmp7740 = getelementptr inbounds float* %tmp7739, i64 1
- %tmp7741 = getelementptr inbounds float* %tmp7740, i64 1
- %tmp7742 = getelementptr inbounds float* %tmp7741, i64 1
- %tmp7743 = getelementptr inbounds float* %tmp7742, i64 1
- %tmp7744 = getelementptr inbounds float* %tmp7743, i64 1
- %tmp7745 = getelementptr inbounds float* %tmp7744, i64 1
- %tmp7746 = getelementptr inbounds float* %tmp7745, i64 1
- %tmp7747 = getelementptr inbounds float* %tmp7746, i64 1
- %tmp7748 = getelementptr inbounds float* %tmp7747, i64 1
- %tmp7749 = getelementptr inbounds float* %tmp7748, i64 1
- %tmp7750 = getelementptr inbounds float* %tmp7749, i64 1
- %tmp7751 = getelementptr inbounds float* %tmp7750, i64 1
- %tmp7752 = getelementptr inbounds float* %tmp7751, i64 1
- %tmp7753 = getelementptr inbounds float* %tmp7752, i64 1
- %tmp7754 = getelementptr inbounds float* %tmp7753, i64 1
- %tmp7755 = getelementptr inbounds float* %tmp7754, i64 1
- %tmp7756 = getelementptr inbounds float* %tmp7755, i64 1
- %tmp7757 = getelementptr inbounds float* %tmp7756, i64 1
- %tmp7758 = getelementptr inbounds float* %tmp7757, i64 1
- %tmp7759 = getelementptr inbounds float* %tmp7758, i64 1
- %tmp7760 = getelementptr inbounds float* %tmp7759, i64 1
- %tmp7761 = getelementptr inbounds float* %tmp7760, i64 1
- %tmp7762 = getelementptr inbounds float* %tmp7761, i64 1
- %tmp7763 = getelementptr inbounds float* %tmp7762, i64 1
- %tmp7764 = getelementptr inbounds float* %tmp7763, i64 1
- %tmp7765 = getelementptr inbounds float* %tmp7764, i64 1
- %tmp7766 = getelementptr inbounds float* %tmp7765, i64 1
- %tmp7767 = getelementptr inbounds float* %tmp7766, i64 1
- %tmp7768 = getelementptr inbounds float* %tmp7767, i64 1
- %tmp7769 = getelementptr inbounds float* %tmp7768, i64 1
- %tmp7770 = getelementptr inbounds float* %tmp7769, i64 1
- %tmp7771 = getelementptr inbounds float* %tmp7770, i64 1
- %tmp7772 = getelementptr inbounds float* %tmp7771, i64 1
- %tmp7773 = getelementptr inbounds float* %tmp7772, i64 1
- %tmp7774 = getelementptr inbounds float* %tmp7773, i64 1
- %tmp7775 = getelementptr inbounds float* %tmp7774, i64 1
- %tmp7776 = getelementptr inbounds float* %tmp7775, i64 1
- %tmp7777 = getelementptr inbounds float* %tmp7776, i64 1
- %tmp7778 = getelementptr inbounds float* %tmp7777, i64 1
- %tmp7779 = getelementptr inbounds float* %tmp7778, i64 1
- %tmp7780 = getelementptr inbounds float* %tmp7779, i64 1
- %tmp7781 = getelementptr inbounds float* %tmp7780, i64 1
- %tmp7782 = getelementptr inbounds float* %tmp7781, i64 1
- %tmp7783 = getelementptr inbounds float* %tmp7782, i64 1
- %tmp7784 = getelementptr inbounds float* %tmp7783, i64 1
- %tmp7785 = getelementptr inbounds float* %tmp7784, i64 1
- %tmp7786 = getelementptr inbounds float* %tmp7785, i64 1
- %tmp7787 = getelementptr inbounds float* %tmp7786, i64 1
- %tmp7788 = getelementptr inbounds float* %tmp7787, i64 1
- %tmp7789 = getelementptr inbounds float* %tmp7788, i64 1
- %tmp7790 = getelementptr inbounds float* %tmp7789, i64 1
- %tmp7791 = getelementptr inbounds float* %tmp7790, i64 1
- %tmp7792 = getelementptr inbounds float* %tmp7791, i64 1
- %tmp7793 = getelementptr inbounds float* %tmp7792, i64 1
- %tmp7794 = getelementptr inbounds float* %tmp7793, i64 1
- %tmp7795 = getelementptr inbounds float* %tmp7794, i64 1
- %tmp7796 = getelementptr inbounds float* %tmp7795, i64 1
- %tmp7797 = getelementptr inbounds float* %tmp7796, i64 1
- %tmp7798 = getelementptr inbounds float* %tmp7797, i64 1
- %tmp7799 = getelementptr inbounds float* %tmp7798, i64 1
- %tmp7800 = getelementptr inbounds float* %tmp7799, i64 1
- %tmp7801 = getelementptr inbounds float* %tmp7800, i64 1
- %tmp7802 = getelementptr inbounds float* %tmp7801, i64 1
- %tmp7803 = getelementptr inbounds float* %tmp7802, i64 1
- %tmp7804 = getelementptr inbounds float* %tmp7803, i64 1
- %tmp7805 = getelementptr inbounds float* %tmp7804, i64 1
- %tmp7806 = getelementptr inbounds float* %tmp7805, i64 1
- %tmp7807 = getelementptr inbounds float* %tmp7806, i64 1
- %tmp7808 = getelementptr inbounds float* %tmp7807, i64 1
- %tmp7809 = getelementptr inbounds float* %tmp7808, i64 1
- %tmp7810 = getelementptr inbounds float* %tmp7809, i64 1
- %tmp7811 = getelementptr inbounds float* %tmp7810, i64 1
- %tmp7812 = getelementptr inbounds float* %tmp7811, i64 1
- %tmp7813 = getelementptr inbounds float* %tmp7812, i64 1
- %tmp7814 = getelementptr inbounds float* %tmp7813, i64 1
- %tmp7815 = getelementptr inbounds float* %tmp7814, i64 1
- %tmp7816 = getelementptr inbounds float* %tmp7815, i64 1
- %tmp7817 = getelementptr inbounds float* %tmp7816, i64 1
- %tmp7818 = getelementptr inbounds float* %tmp7817, i64 1
- %tmp7819 = getelementptr inbounds float* %tmp7818, i64 1
- %tmp7820 = getelementptr inbounds float* %tmp7819, i64 1
- %tmp7821 = getelementptr inbounds float* %tmp7820, i64 1
- %tmp7822 = getelementptr inbounds float* %tmp7821, i64 1
- %tmp7823 = getelementptr inbounds float* %tmp7822, i64 1
- %tmp7824 = getelementptr inbounds float* %tmp7823, i64 1
- %tmp7825 = getelementptr inbounds float* %tmp7824, i64 1
- %tmp7826 = getelementptr inbounds float* %tmp7825, i64 1
- %tmp7827 = getelementptr inbounds float* %tmp7826, i64 1
- %tmp7828 = getelementptr inbounds float* %tmp7827, i64 1
- %tmp7829 = getelementptr inbounds float* %tmp7828, i64 1
- %tmp7830 = getelementptr inbounds float* %tmp7829, i64 1
- %tmp7831 = getelementptr inbounds float* %tmp7830, i64 1
- %tmp7832 = getelementptr inbounds float* %tmp7831, i64 1
- %tmp7833 = getelementptr inbounds float* %tmp7832, i64 1
- %tmp7834 = getelementptr inbounds float* %tmp7833, i64 1
- %tmp7835 = getelementptr inbounds float* %tmp7834, i64 1
- %tmp7836 = getelementptr inbounds float* %tmp7835, i64 1
- %tmp7837 = getelementptr inbounds float* %tmp7836, i64 1
- %tmp7838 = getelementptr inbounds float* %tmp7837, i64 1
- %tmp7839 = getelementptr inbounds float* %tmp7838, i64 1
- %tmp7840 = getelementptr inbounds float* %tmp7839, i64 1
- %tmp7841 = getelementptr inbounds float* %tmp7840, i64 1
- %tmp7842 = getelementptr inbounds float* %tmp7841, i64 1
- %tmp7843 = getelementptr inbounds float* %tmp7842, i64 1
- %tmp7844 = getelementptr inbounds float* %tmp7843, i64 1
- %tmp7845 = getelementptr inbounds float* %tmp7844, i64 1
- %tmp7846 = getelementptr inbounds float* %tmp7845, i64 1
- %tmp7847 = getelementptr inbounds float* %tmp7846, i64 1
- %tmp7848 = getelementptr inbounds float* %tmp7847, i64 1
- %tmp7849 = getelementptr inbounds float* %tmp7848, i64 1
- %tmp7850 = getelementptr inbounds float* %tmp7849, i64 1
- %tmp7851 = getelementptr inbounds float* %tmp7850, i64 1
- %tmp7852 = getelementptr inbounds float* %tmp7851, i64 1
- %tmp7853 = getelementptr inbounds float* %tmp7852, i64 1
- %tmp7854 = getelementptr inbounds float* %tmp7853, i64 1
- %tmp7855 = getelementptr inbounds float* %tmp7854, i64 1
- %tmp7856 = getelementptr inbounds float* %tmp7855, i64 1
- %tmp7857 = getelementptr inbounds float* %tmp7856, i64 1
- %tmp7858 = getelementptr inbounds float* %tmp7857, i64 1
- %tmp7859 = getelementptr inbounds float* %tmp7858, i64 1
- %tmp7860 = getelementptr inbounds float* %tmp7859, i64 1
- %tmp7861 = getelementptr inbounds float* %tmp7860, i64 1
- %tmp7862 = getelementptr inbounds float* %tmp7861, i64 1
- %tmp7863 = getelementptr inbounds float* %tmp7862, i64 1
- %tmp7864 = getelementptr inbounds float* %tmp7863, i64 1
- %tmp7865 = getelementptr inbounds float* %tmp7864, i64 1
- %tmp7866 = getelementptr inbounds float* %tmp7865, i64 1
- %tmp7867 = getelementptr inbounds float* %tmp7866, i64 1
- %tmp7868 = getelementptr inbounds float* %tmp7867, i64 1
- %tmp7869 = getelementptr inbounds float* %tmp7868, i64 1
- %tmp7870 = getelementptr inbounds float* %tmp7869, i64 1
- %tmp7871 = getelementptr inbounds float* %tmp7870, i64 1
- %tmp7872 = getelementptr inbounds float* %tmp7871, i64 1
- %tmp7873 = getelementptr inbounds float* %tmp7872, i64 1
- %tmp7874 = getelementptr inbounds float* %tmp7873, i64 1
- %tmp7875 = getelementptr inbounds float* %tmp7874, i64 1
- %tmp7876 = getelementptr inbounds float* %tmp7875, i64 1
- %tmp7877 = getelementptr inbounds float* %tmp7876, i64 1
- %tmp7878 = getelementptr inbounds float* %tmp7877, i64 1
- %tmp7879 = getelementptr inbounds float* %tmp7878, i64 1
- %tmp7880 = getelementptr inbounds float* %tmp7879, i64 1
- %tmp7881 = getelementptr inbounds float* %tmp7880, i64 1
- %tmp7882 = getelementptr inbounds float* %tmp7881, i64 1
- %tmp7883 = getelementptr inbounds float* %tmp7882, i64 1
- %tmp7884 = getelementptr inbounds float* %tmp7883, i64 1
- %tmp7885 = getelementptr inbounds float* %tmp7884, i64 1
- %tmp7886 = getelementptr inbounds float* %tmp7885, i64 1
- %tmp7887 = getelementptr inbounds float* %tmp7886, i64 1
- %tmp7888 = getelementptr inbounds float* %tmp7887, i64 1
- %tmp7889 = getelementptr inbounds float* %tmp7888, i64 1
- %tmp7890 = getelementptr inbounds float* %tmp7889, i64 1
- %tmp7891 = getelementptr inbounds float* %tmp7890, i64 1
- %tmp7892 = getelementptr inbounds float* %tmp7891, i64 1
- %tmp7893 = getelementptr inbounds float* %tmp7892, i64 1
- %tmp7894 = getelementptr inbounds float* %tmp7893, i64 1
- %tmp7895 = getelementptr inbounds float* %tmp7894, i64 1
- %tmp7896 = getelementptr inbounds float* %tmp7895, i64 1
- %tmp7897 = getelementptr inbounds float* %tmp7896, i64 1
- %tmp7898 = getelementptr inbounds float* %tmp7897, i64 1
- %tmp7899 = getelementptr inbounds float* %tmp7898, i64 1
- %tmp7900 = getelementptr inbounds float* %tmp7899, i64 1
- %tmp7901 = getelementptr inbounds float* %tmp7900, i64 1
- %tmp7902 = getelementptr inbounds float* %tmp7901, i64 1
- %tmp7903 = getelementptr inbounds float* %tmp7902, i64 1
- %tmp7904 = getelementptr inbounds float* %tmp7903, i64 1
- %tmp7905 = getelementptr inbounds float* %tmp7904, i64 1
- %tmp7906 = getelementptr inbounds float* %tmp7905, i64 1
- %tmp7907 = getelementptr inbounds float* %tmp7906, i64 1
- %tmp7908 = getelementptr inbounds float* %tmp7907, i64 1
- %tmp7909 = getelementptr inbounds float* %tmp7908, i64 1
- %tmp7910 = getelementptr inbounds float* %tmp7909, i64 1
- %tmp7911 = getelementptr inbounds float* %tmp7910, i64 1
- %tmp7912 = getelementptr inbounds float* %tmp7911, i64 1
- %tmp7913 = getelementptr inbounds float* %tmp7912, i64 1
- %tmp7914 = getelementptr inbounds float* %tmp7913, i64 1
- %tmp7915 = getelementptr inbounds float* %tmp7914, i64 1
- %tmp7916 = getelementptr inbounds float* %tmp7915, i64 1
- %tmp7917 = getelementptr inbounds float* %tmp7916, i64 1
- %tmp7918 = getelementptr inbounds float* %tmp7917, i64 1
- %tmp7919 = getelementptr inbounds float* %tmp7918, i64 1
- %tmp7920 = getelementptr inbounds float* %tmp7919, i64 1
- %tmp7921 = getelementptr inbounds float* %tmp7920, i64 1
- %tmp7922 = getelementptr inbounds float* %tmp7921, i64 1
- %tmp7923 = getelementptr inbounds float* %tmp7922, i64 1
- %tmp7924 = getelementptr inbounds float* %tmp7923, i64 1
- %tmp7925 = getelementptr inbounds float* %tmp7924, i64 1
- %tmp7926 = getelementptr inbounds float* %tmp7925, i64 1
- %tmp7927 = getelementptr inbounds float* %tmp7926, i64 1
- %tmp7928 = getelementptr inbounds float* %tmp7927, i64 1
- %tmp7929 = getelementptr inbounds float* %tmp7928, i64 1
- %tmp7930 = getelementptr inbounds float* %tmp7929, i64 1
- %tmp7931 = getelementptr inbounds float* %tmp7930, i64 1
- %tmp7932 = getelementptr inbounds float* %tmp7931, i64 1
- %tmp7933 = getelementptr inbounds float* %tmp7932, i64 1
- %tmp7934 = getelementptr inbounds float* %tmp7933, i64 1
- %tmp7935 = getelementptr inbounds float* %tmp7934, i64 1
- %tmp7936 = getelementptr inbounds float* %tmp7935, i64 1
- %tmp7937 = getelementptr inbounds float* %tmp7936, i64 1
- %tmp7938 = getelementptr inbounds float* %tmp7937, i64 1
- %tmp7939 = getelementptr inbounds float* %tmp7938, i64 1
- %tmp7940 = getelementptr inbounds float* %tmp7939, i64 1
- %tmp7941 = getelementptr inbounds float* %tmp7940, i64 1
- %tmp7942 = getelementptr inbounds float* %tmp7941, i64 1
- %tmp7943 = getelementptr inbounds float* %tmp7942, i64 1
- %tmp7944 = getelementptr inbounds float* %tmp7943, i64 1
- %tmp7945 = getelementptr inbounds float* %tmp7944, i64 1
- %tmp7946 = getelementptr inbounds float* %tmp7945, i64 1
- %tmp7947 = getelementptr inbounds float* %tmp7946, i64 1
- %tmp7948 = getelementptr inbounds float* %tmp7947, i64 1
- %tmp7949 = getelementptr inbounds float* %tmp7948, i64 1
- %tmp7950 = getelementptr inbounds float* %tmp7949, i64 1
- %tmp7951 = getelementptr inbounds float* %tmp7950, i64 1
- %tmp7952 = getelementptr inbounds float* %tmp7951, i64 1
- %tmp7953 = getelementptr inbounds float* %tmp7952, i64 1
- %tmp7954 = getelementptr inbounds float* %tmp7953, i64 1
- %tmp7955 = getelementptr inbounds float* %tmp7954, i64 1
- %tmp7956 = getelementptr inbounds float* %tmp7955, i64 1
- %tmp7957 = getelementptr inbounds float* %tmp7956, i64 1
- %tmp7958 = getelementptr inbounds float* %tmp7957, i64 1
- %tmp7959 = getelementptr inbounds float* %tmp7958, i64 1
- %tmp7960 = getelementptr inbounds float* %tmp7959, i64 1
- %tmp7961 = getelementptr inbounds float* %tmp7960, i64 1
- %tmp7962 = getelementptr inbounds float* %tmp7961, i64 1
- %tmp7963 = getelementptr inbounds float* %tmp7962, i64 1
- %tmp7964 = getelementptr inbounds float* %tmp7963, i64 1
- %tmp7965 = getelementptr inbounds float* %tmp7964, i64 1
- %tmp7966 = getelementptr inbounds float* %tmp7965, i64 1
- %tmp7967 = getelementptr inbounds float* %tmp7966, i64 1
- %tmp7968 = getelementptr inbounds float* %tmp7967, i64 1
- %tmp7969 = getelementptr inbounds float* %tmp7968, i64 1
- %tmp7970 = getelementptr inbounds float* %tmp7969, i64 1
- %tmp7971 = getelementptr inbounds float* %tmp7970, i64 1
- %tmp7972 = getelementptr inbounds float* %tmp7971, i64 1
- %tmp7973 = getelementptr inbounds float* %tmp7972, i64 1
- %tmp7974 = getelementptr inbounds float* %tmp7973, i64 1
- %tmp7975 = getelementptr inbounds float* %tmp7974, i64 1
- %tmp7976 = getelementptr inbounds float* %tmp7975, i64 1
- %tmp7977 = getelementptr inbounds float* %tmp7976, i64 1
- %tmp7978 = getelementptr inbounds float* %tmp7977, i64 1
- %tmp7979 = getelementptr inbounds float* %tmp7978, i64 1
- %tmp7980 = getelementptr inbounds float* %tmp7979, i64 1
- %tmp7981 = getelementptr inbounds float* %tmp7980, i64 1
- %tmp7982 = getelementptr inbounds float* %tmp7981, i64 1
- %tmp7983 = getelementptr inbounds float* %tmp7982, i64 1
- %tmp7984 = getelementptr inbounds float* %tmp7983, i64 1
- %tmp7985 = getelementptr inbounds float* %tmp7984, i64 1
- %tmp7986 = getelementptr inbounds float* %tmp7985, i64 1
- %tmp7987 = getelementptr inbounds float* %tmp7986, i64 1
- %tmp7988 = getelementptr inbounds float* %tmp7987, i64 1
- %tmp7989 = getelementptr inbounds float* %tmp7988, i64 1
- %tmp7990 = getelementptr inbounds float* %tmp7989, i64 1
- %tmp7991 = getelementptr inbounds float* %tmp7990, i64 1
- %tmp7992 = getelementptr inbounds float* %tmp7991, i64 1
- %tmp7993 = getelementptr inbounds float* %tmp7992, i64 1
- %tmp7994 = getelementptr inbounds float* %tmp7993, i64 1
- %tmp7995 = getelementptr inbounds float* %tmp7994, i64 1
- %tmp7996 = getelementptr inbounds float* %tmp7995, i64 1
- %tmp7997 = getelementptr inbounds float* %tmp7996, i64 1
- %tmp7998 = getelementptr inbounds float* %tmp7997, i64 1
- %tmp7999 = getelementptr inbounds float* %tmp7998, i64 1
- %tmp8000 = getelementptr inbounds float* %tmp7999, i64 1
- %tmp8001 = getelementptr inbounds float* %tmp8000, i64 1
- %tmp8002 = getelementptr inbounds float* %tmp8001, i64 1
- %tmp8003 = getelementptr inbounds float* %tmp8002, i64 1
- %tmp8004 = getelementptr inbounds float* %tmp8003, i64 1
- %tmp8005 = getelementptr inbounds float* %tmp8004, i64 1
- %tmp8006 = getelementptr inbounds float* %tmp8005, i64 1
- %tmp8007 = getelementptr inbounds float* %tmp8006, i64 1
- %tmp8008 = getelementptr inbounds float* %tmp8007, i64 1
- %tmp8009 = getelementptr inbounds float* %tmp8008, i64 1
- %tmp8010 = getelementptr inbounds float* %tmp8009, i64 1
- %tmp8011 = getelementptr inbounds float* %tmp8010, i64 1
- %tmp8012 = getelementptr inbounds float* %tmp8011, i64 1
- %tmp8013 = getelementptr inbounds float* %tmp8012, i64 1
- %tmp8014 = getelementptr inbounds float* %tmp8013, i64 1
- %tmp8015 = getelementptr inbounds float* %tmp8014, i64 1
- %tmp8016 = getelementptr inbounds float* %tmp8015, i64 1
- %tmp8017 = getelementptr inbounds float* %tmp8016, i64 1
- %tmp8018 = getelementptr inbounds float* %tmp8017, i64 1
- %tmp8019 = getelementptr inbounds float* %tmp8018, i64 1
- %tmp8020 = getelementptr inbounds float* %tmp8019, i64 1
- %tmp8021 = getelementptr inbounds float* %tmp8020, i64 1
- %tmp8022 = getelementptr inbounds float* %tmp8021, i64 1
- %tmp8023 = getelementptr inbounds float* %tmp8022, i64 1
- %tmp8024 = getelementptr inbounds float* %tmp8023, i64 1
- %tmp8025 = getelementptr inbounds float* %tmp8024, i64 1
- %tmp8026 = getelementptr inbounds float* %tmp8025, i64 1
- %tmp8027 = getelementptr inbounds float* %tmp8026, i64 1
- %tmp8028 = getelementptr inbounds float* %tmp8027, i64 1
- %tmp8029 = getelementptr inbounds float* %tmp8028, i64 1
- %tmp8030 = getelementptr inbounds float* %tmp8029, i64 1
- %tmp8031 = getelementptr inbounds float* %tmp8030, i64 1
- %tmp8032 = getelementptr inbounds float* %tmp8031, i64 1
- %tmp8033 = getelementptr inbounds float* %tmp8032, i64 1
- %tmp8034 = getelementptr inbounds float* %tmp8033, i64 1
- %tmp8035 = getelementptr inbounds float* %tmp8034, i64 1
- %tmp8036 = getelementptr inbounds float* %tmp8035, i64 1
- %tmp8037 = getelementptr inbounds float* %tmp8036, i64 1
- %tmp8038 = getelementptr inbounds float* %tmp8037, i64 1
- %tmp8039 = getelementptr inbounds float* %tmp8038, i64 1
- %tmp8040 = getelementptr inbounds float* %tmp8039, i64 1
- %tmp8041 = getelementptr inbounds float* %tmp8040, i64 1
- %tmp8042 = getelementptr inbounds float* %tmp8041, i64 1
- %tmp8043 = getelementptr inbounds float* %tmp8042, i64 1
- %tmp8044 = getelementptr inbounds float* %tmp8043, i64 1
- %tmp8045 = getelementptr inbounds float* %tmp8044, i64 1
- %tmp8046 = getelementptr inbounds float* %tmp8045, i64 1
- %tmp8047 = getelementptr inbounds float* %tmp8046, i64 1
- %tmp8048 = getelementptr inbounds float* %tmp8047, i64 1
- %tmp8049 = getelementptr inbounds float* %tmp8048, i64 1
- %tmp8050 = getelementptr inbounds float* %tmp8049, i64 1
- %tmp8051 = getelementptr inbounds float* %tmp8050, i64 1
- %tmp8052 = getelementptr inbounds float* %tmp8051, i64 1
- %tmp8053 = getelementptr inbounds float* %tmp8052, i64 1
- %tmp8054 = getelementptr inbounds float* %tmp8053, i64 1
- %tmp8055 = getelementptr inbounds float* %tmp8054, i64 1
- %tmp8056 = getelementptr inbounds float* %tmp8055, i64 1
- %tmp8057 = getelementptr inbounds float* %tmp8056, i64 1
- %tmp8058 = getelementptr inbounds float* %tmp8057, i64 1
- %tmp8059 = getelementptr inbounds float* %tmp8058, i64 1
- %tmp8060 = getelementptr inbounds float* %tmp8059, i64 1
- %tmp8061 = getelementptr inbounds float* %tmp8060, i64 1
- %tmp8062 = getelementptr inbounds float* %tmp8061, i64 1
- %tmp8063 = getelementptr inbounds float* %tmp8062, i64 1
- %tmp8064 = getelementptr inbounds float* %tmp8063, i64 1
- %tmp8065 = getelementptr inbounds float* %tmp8064, i64 1
- %tmp8066 = getelementptr inbounds float* %tmp8065, i64 1
- %tmp8067 = getelementptr inbounds float* %tmp8066, i64 1
- %tmp8068 = getelementptr inbounds float* %tmp8067, i64 1
- %tmp8069 = getelementptr inbounds float* %tmp8068, i64 1
- %tmp8070 = getelementptr inbounds float* %tmp8069, i64 1
- %tmp8071 = getelementptr inbounds float* %tmp8070, i64 1
- %tmp8072 = getelementptr inbounds float* %tmp8071, i64 1
- %tmp8073 = getelementptr inbounds float* %tmp8072, i64 1
- %tmp8074 = getelementptr inbounds float* %tmp8073, i64 1
- %tmp8075 = getelementptr inbounds float* %tmp8074, i64 1
- %tmp8076 = getelementptr inbounds float* %tmp8075, i64 1
- %tmp8077 = getelementptr inbounds float* %tmp8076, i64 1
- %tmp8078 = getelementptr inbounds float* %tmp8077, i64 1
- %tmp8079 = getelementptr inbounds float* %tmp8078, i64 1
- %tmp8080 = getelementptr inbounds float* %tmp8079, i64 1
- %tmp8081 = getelementptr inbounds float* %tmp8080, i64 1
- %tmp8082 = getelementptr inbounds float* %tmp8081, i64 1
- %tmp8083 = getelementptr inbounds float* %tmp8082, i64 1
- %tmp8084 = getelementptr inbounds float* %tmp8083, i64 1
- %tmp8085 = getelementptr inbounds float* %tmp8084, i64 1
- %tmp8086 = getelementptr inbounds float* %tmp8085, i64 1
- %tmp8087 = getelementptr inbounds float* %tmp8086, i64 1
- %tmp8088 = getelementptr inbounds float* %tmp8087, i64 1
- %tmp8089 = getelementptr inbounds float* %tmp8088, i64 1
- %tmp8090 = getelementptr inbounds float* %tmp8089, i64 1
- %tmp8091 = getelementptr inbounds float* %tmp8090, i64 1
- %tmp8092 = getelementptr inbounds float* %tmp8091, i64 1
- %tmp8093 = getelementptr inbounds float* %tmp8092, i64 1
- %tmp8094 = getelementptr inbounds float* %tmp8093, i64 1
- %tmp8095 = getelementptr inbounds float* %tmp8094, i64 1
- %tmp8096 = getelementptr inbounds float* %tmp8095, i64 1
- %tmp8097 = getelementptr inbounds float* %tmp8096, i64 1
- %tmp8098 = getelementptr inbounds float* %tmp8097, i64 1
- %tmp8099 = getelementptr inbounds float* %tmp8098, i64 1
- %tmp8100 = getelementptr inbounds float* %tmp8099, i64 1
- %tmp8101 = getelementptr inbounds float* %tmp8100, i64 1
- %tmp8102 = getelementptr inbounds float* %tmp8101, i64 1
- %tmp8103 = getelementptr inbounds float* %tmp8102, i64 1
- %tmp8104 = getelementptr inbounds float* %tmp8103, i64 1
- %tmp8105 = getelementptr inbounds float* %tmp8104, i64 1
- %tmp8106 = getelementptr inbounds float* %tmp8105, i64 1
- %tmp8107 = getelementptr inbounds float* %tmp8106, i64 1
- %tmp8108 = getelementptr inbounds float* %tmp8107, i64 1
- %tmp8109 = getelementptr inbounds float* %tmp8108, i64 1
- %tmp8110 = getelementptr inbounds float* %tmp8109, i64 1
- %tmp8111 = getelementptr inbounds float* %tmp8110, i64 1
- %tmp8112 = getelementptr inbounds float* %tmp8111, i64 1
- %tmp8113 = getelementptr inbounds float* %tmp8112, i64 1
- %tmp8114 = getelementptr inbounds float* %tmp8113, i64 1
- %tmp8115 = getelementptr inbounds float* %tmp8114, i64 1
- %tmp8116 = getelementptr inbounds float* %tmp8115, i64 1
- %tmp8117 = getelementptr inbounds float* %tmp8116, i64 1
- %tmp8118 = getelementptr inbounds float* %tmp8117, i64 1
- %tmp8119 = getelementptr inbounds float* %tmp8118, i64 1
- %tmp8120 = getelementptr inbounds float* %tmp8119, i64 1
- %tmp8121 = getelementptr inbounds float* %tmp8120, i64 1
- %tmp8122 = getelementptr inbounds float* %tmp8121, i64 1
- %tmp8123 = getelementptr inbounds float* %tmp8122, i64 1
- %tmp8124 = getelementptr inbounds float* %tmp8123, i64 1
- %tmp8125 = getelementptr inbounds float* %tmp8124, i64 1
- %tmp8126 = getelementptr inbounds float* %tmp8125, i64 1
- %tmp8127 = getelementptr inbounds float* %tmp8126, i64 1
- %tmp8128 = getelementptr inbounds float* %tmp8127, i64 1
- %tmp8129 = getelementptr inbounds float* %tmp8128, i64 1
- %tmp8130 = getelementptr inbounds float* %tmp8129, i64 1
- %tmp8131 = getelementptr inbounds float* %tmp8130, i64 1
- %tmp8132 = getelementptr inbounds float* %tmp8131, i64 1
- %tmp8133 = getelementptr inbounds float* %tmp8132, i64 1
- %tmp8134 = getelementptr inbounds float* %tmp8133, i64 1
- %tmp8135 = getelementptr inbounds float* %tmp8134, i64 1
- %tmp8136 = getelementptr inbounds float* %tmp8135, i64 1
- %tmp8137 = getelementptr inbounds float* %tmp8136, i64 1
- %tmp8138 = getelementptr inbounds float* %tmp8137, i64 1
- %tmp8139 = getelementptr inbounds float* %tmp8138, i64 1
- %tmp8140 = getelementptr inbounds float* %tmp8139, i64 1
- %tmp8141 = getelementptr inbounds float* %tmp8140, i64 1
- %tmp8142 = getelementptr inbounds float* %tmp8141, i64 1
- %tmp8143 = getelementptr inbounds float* %tmp8142, i64 1
- %tmp8144 = getelementptr inbounds float* %tmp8143, i64 1
- %tmp8145 = getelementptr inbounds float* %tmp8144, i64 1
- %tmp8146 = getelementptr inbounds float* %tmp8145, i64 1
- %tmp8147 = getelementptr inbounds float* %tmp8146, i64 1
- %tmp8148 = getelementptr inbounds float* %tmp8147, i64 1
- %tmp8149 = getelementptr inbounds float* %tmp8148, i64 1
- %tmp8150 = getelementptr inbounds float* %tmp8149, i64 1
- %tmp8151 = getelementptr inbounds float* %tmp8150, i64 1
- %tmp8152 = getelementptr inbounds float* %tmp8151, i64 1
- %tmp8153 = getelementptr inbounds float* %tmp8152, i64 1
- %tmp8154 = getelementptr inbounds float* %tmp8153, i64 1
- %tmp8155 = getelementptr inbounds float* %tmp8154, i64 1
- %tmp8156 = getelementptr inbounds float* %tmp8155, i64 1
- %tmp8157 = getelementptr inbounds float* %tmp8156, i64 1
- %tmp8158 = getelementptr inbounds float* %tmp8157, i64 1
- %tmp8159 = getelementptr inbounds float* %tmp8158, i64 1
- %tmp8160 = getelementptr inbounds float* %tmp8159, i64 1
- %tmp8161 = getelementptr inbounds float* %tmp8160, i64 1
- %tmp8162 = getelementptr inbounds float* %tmp8161, i64 1
- %tmp8163 = getelementptr inbounds float* %tmp8162, i64 1
- %tmp8164 = getelementptr inbounds float* %tmp8163, i64 1
- %tmp8165 = getelementptr inbounds float* %tmp8164, i64 1
- %tmp8166 = getelementptr inbounds float* %tmp8165, i64 1
- %tmp8167 = getelementptr inbounds float* %tmp8166, i64 1
- %tmp8168 = getelementptr inbounds float* %tmp8167, i64 1
- %tmp8169 = getelementptr inbounds float* %tmp8168, i64 1
- %tmp8170 = getelementptr inbounds float* %tmp8169, i64 1
- %tmp8171 = getelementptr inbounds float* %tmp8170, i64 1
- %tmp8172 = getelementptr inbounds float* %tmp8171, i64 1
- %tmp8173 = getelementptr inbounds float* %tmp8172, i64 1
- %tmp8174 = getelementptr inbounds float* %tmp8173, i64 1
- %tmp8175 = getelementptr inbounds float* %tmp8174, i64 1
- %tmp8176 = getelementptr inbounds float* %tmp8175, i64 1
- %tmp8177 = getelementptr inbounds float* %tmp8176, i64 1
- %tmp8178 = getelementptr inbounds float* %tmp8177, i64 1
- %tmp8179 = getelementptr inbounds float* %tmp8178, i64 1
- %tmp8180 = getelementptr inbounds float* %tmp8179, i64 1
- %tmp8181 = getelementptr inbounds float* %tmp8180, i64 1
- %tmp8182 = getelementptr inbounds float* %tmp8181, i64 1
- %tmp8183 = getelementptr inbounds float* %tmp8182, i64 1
- %tmp8184 = getelementptr inbounds float* %tmp8183, i64 1
- %tmp8185 = getelementptr inbounds float* %tmp8184, i64 1
- %tmp8186 = getelementptr inbounds float* %tmp8185, i64 1
- %tmp8187 = getelementptr inbounds float* %tmp8186, i64 1
- %tmp8188 = getelementptr inbounds float* %tmp8187, i64 1
- %tmp8189 = getelementptr inbounds float* %tmp8188, i64 1
- %tmp8190 = getelementptr inbounds float* %tmp8189, i64 1
- %tmp8191 = getelementptr inbounds float* %tmp8190, i64 1
- %tmp8192 = getelementptr inbounds float* %tmp8191, i64 1
- %tmp8193 = getelementptr inbounds float* %tmp8192, i64 1
- %tmp8194 = getelementptr inbounds float* %tmp8193, i64 1
- %tmp8195 = getelementptr inbounds float* %tmp8194, i64 1
- %tmp8196 = getelementptr inbounds float* %tmp8195, i64 1
- %tmp8197 = getelementptr inbounds float* %tmp8196, i64 1
- %tmp8198 = getelementptr inbounds float* %tmp8197, i64 1
- %tmp8199 = getelementptr inbounds float* %tmp8198, i64 1
- %tmp8200 = getelementptr inbounds float* %tmp8199, i64 1
- %tmp8201 = getelementptr inbounds float* %tmp8200, i64 1
- %tmp8202 = getelementptr inbounds float* %tmp8201, i64 1
- %tmp8203 = getelementptr inbounds float* %tmp8202, i64 1
- %tmp8204 = getelementptr inbounds float* %tmp8203, i64 1
- %tmp8205 = getelementptr inbounds float* %tmp8204, i64 1
- %tmp8206 = getelementptr inbounds float* %tmp8205, i64 1
- %tmp8207 = getelementptr inbounds float* %tmp8206, i64 1
- %tmp8208 = getelementptr inbounds float* %tmp8207, i64 1
- %tmp8209 = getelementptr inbounds float* %tmp8208, i64 1
- %tmp8210 = getelementptr inbounds float* %tmp8209, i64 1
- %tmp8211 = getelementptr inbounds float* %tmp8210, i64 1
- %tmp8212 = getelementptr inbounds float* %tmp8211, i64 1
- %tmp8213 = getelementptr inbounds float* %tmp8212, i64 1
- %tmp8214 = getelementptr inbounds float* %tmp8213, i64 1
- %tmp8215 = getelementptr inbounds float* %tmp8214, i64 1
- %tmp8216 = getelementptr inbounds float* %tmp8215, i64 1
- %tmp8217 = getelementptr inbounds float* %tmp8216, i64 1
- %tmp8218 = getelementptr inbounds float* %tmp8217, i64 1
- %tmp8219 = getelementptr inbounds float* %tmp8218, i64 1
- %tmp8220 = getelementptr inbounds float* %tmp8219, i64 1
- %tmp8221 = getelementptr inbounds float* %tmp8220, i64 1
- %tmp8222 = getelementptr inbounds float* %tmp8221, i64 1
- %tmp8223 = getelementptr inbounds float* %tmp8222, i64 1
- %tmp8224 = getelementptr inbounds float* %tmp8223, i64 1
- %tmp8225 = getelementptr inbounds float* %tmp8224, i64 1
- %tmp8226 = getelementptr inbounds float* %tmp8225, i64 1
- %tmp8227 = getelementptr inbounds float* %tmp8226, i64 1
- %tmp8228 = getelementptr inbounds float* %tmp8227, i64 1
- %tmp8229 = getelementptr inbounds float* %tmp8228, i64 1
- %tmp8230 = getelementptr inbounds float* %tmp8229, i64 1
- %tmp8231 = getelementptr inbounds float* %tmp8230, i64 1
- %tmp8232 = getelementptr inbounds float* %tmp8231, i64 1
- %tmp8233 = getelementptr inbounds float* %tmp8232, i64 1
- %tmp8234 = getelementptr inbounds float* %tmp8233, i64 1
- %tmp8235 = getelementptr inbounds float* %tmp8234, i64 1
- %tmp8236 = getelementptr inbounds float* %tmp8235, i64 1
- %tmp8237 = getelementptr inbounds float* %tmp8236, i64 1
- %tmp8238 = getelementptr inbounds float* %tmp8237, i64 1
- %tmp8239 = getelementptr inbounds float* %tmp8238, i64 1
- %tmp8240 = getelementptr inbounds float* %tmp8239, i64 1
- %tmp8241 = getelementptr inbounds float* %tmp8240, i64 1
- %tmp8242 = getelementptr inbounds float* %tmp8241, i64 1
- %tmp8243 = getelementptr inbounds float* %tmp8242, i64 1
- %tmp8244 = getelementptr inbounds float* %tmp8243, i64 1
- %tmp8245 = getelementptr inbounds float* %tmp8244, i64 1
- %tmp8246 = getelementptr inbounds float* %tmp8245, i64 1
- %tmp8247 = getelementptr inbounds float* %tmp8246, i64 1
- %tmp8248 = getelementptr inbounds float* %tmp8247, i64 1
- %tmp8249 = getelementptr inbounds float* %tmp8248, i64 1
- %tmp8250 = getelementptr inbounds float* %tmp8249, i64 1
- %tmp8251 = getelementptr inbounds float* %tmp8250, i64 1
- %tmp8252 = getelementptr inbounds float* %tmp8251, i64 1
- %tmp8253 = getelementptr inbounds float* %tmp8252, i64 1
- %tmp8254 = getelementptr inbounds float* %tmp8253, i64 1
- %tmp8255 = getelementptr inbounds float* %tmp8254, i64 1
- %tmp8256 = getelementptr inbounds float* %tmp8255, i64 1
- %tmp8257 = getelementptr inbounds float* %tmp8256, i64 1
- %tmp8258 = getelementptr inbounds float* %tmp8257, i64 1
- %tmp8259 = getelementptr inbounds float* %tmp8258, i64 1
- %tmp8260 = getelementptr inbounds float* %tmp8259, i64 1
- %tmp8261 = getelementptr inbounds float* %tmp8260, i64 1
- %tmp8262 = getelementptr inbounds float* %tmp8261, i64 1
- %tmp8263 = getelementptr inbounds float* %tmp8262, i64 1
- %tmp8264 = getelementptr inbounds float* %tmp8263, i64 1
- %tmp8265 = getelementptr inbounds float* %tmp8264, i64 1
- %tmp8266 = getelementptr inbounds float* %tmp8265, i64 1
- %tmp8267 = getelementptr inbounds float* %tmp8266, i64 1
- %tmp8268 = getelementptr inbounds float* %tmp8267, i64 1
- %tmp8269 = getelementptr inbounds float* %tmp8268, i64 1
- %tmp8270 = getelementptr inbounds float* %tmp8269, i64 1
- %tmp8271 = getelementptr inbounds float* %tmp8270, i64 1
- %tmp8272 = getelementptr inbounds float* %tmp8271, i64 1
- %tmp8273 = getelementptr inbounds float* %tmp8272, i64 1
- %tmp8274 = getelementptr inbounds float* %tmp8273, i64 1
- %tmp8275 = getelementptr inbounds float* %tmp8274, i64 1
- %tmp8276 = getelementptr inbounds float* %tmp8275, i64 1
- %tmp8277 = getelementptr inbounds float* %tmp8276, i64 1
- %tmp8278 = getelementptr inbounds float* %tmp8277, i64 1
- %tmp8279 = getelementptr inbounds float* %tmp8278, i64 1
- %tmp8280 = getelementptr inbounds float* %tmp8279, i64 1
- %tmp8281 = getelementptr inbounds float* %tmp8280, i64 1
- %tmp8282 = getelementptr inbounds float* %tmp8281, i64 1
- %tmp8283 = getelementptr inbounds float* %tmp8282, i64 1
- %tmp8284 = getelementptr inbounds float* %tmp8283, i64 1
- %tmp8285 = getelementptr inbounds float* %tmp8284, i64 1
- %tmp8286 = getelementptr inbounds float* %tmp8285, i64 1
- %tmp8287 = getelementptr inbounds float* %tmp8286, i64 1
- %tmp8288 = getelementptr inbounds float* %tmp8287, i64 1
- %tmp8289 = getelementptr inbounds float* %tmp8288, i64 1
- %tmp8290 = getelementptr inbounds float* %tmp8289, i64 1
- %tmp8291 = getelementptr inbounds float* %tmp8290, i64 1
- %tmp8292 = getelementptr inbounds float* %tmp8291, i64 1
- %tmp8293 = getelementptr inbounds float* %tmp8292, i64 1
- %tmp8294 = getelementptr inbounds float* %tmp8293, i64 1
- %tmp8295 = getelementptr inbounds float* %tmp8294, i64 1
- %tmp8296 = getelementptr inbounds float* %tmp8295, i64 1
- %tmp8297 = getelementptr inbounds float* %tmp8296, i64 1
- %tmp8298 = getelementptr inbounds float* %tmp8297, i64 1
- %tmp8299 = getelementptr inbounds float* %tmp8298, i64 1
- %tmp8300 = getelementptr inbounds float* %tmp8299, i64 1
- %tmp8301 = getelementptr inbounds float* %tmp8300, i64 1
- %tmp8302 = getelementptr inbounds float* %tmp8301, i64 1
- %tmp8303 = getelementptr inbounds float* %tmp8302, i64 1
- %tmp8304 = getelementptr inbounds float* %tmp8303, i64 1
- %tmp8305 = getelementptr inbounds float* %tmp8304, i64 1
- %tmp8306 = getelementptr inbounds float* %tmp8305, i64 1
- %tmp8307 = getelementptr inbounds float* %tmp8306, i64 1
- %tmp8308 = getelementptr inbounds float* %tmp8307, i64 1
- %tmp8309 = getelementptr inbounds float* %tmp8308, i64 1
- %tmp8310 = getelementptr inbounds float* %tmp8309, i64 1
- %tmp8311 = getelementptr inbounds float* %tmp8310, i64 1
- %tmp8312 = getelementptr inbounds float* %tmp8311, i64 1
- %tmp8313 = getelementptr inbounds float* %tmp8312, i64 1
- %tmp8314 = getelementptr inbounds float* %tmp8313, i64 1
- %tmp8315 = getelementptr inbounds float* %tmp8314, i64 1
- %tmp8316 = getelementptr inbounds float* %tmp8315, i64 1
- %tmp8317 = getelementptr inbounds float* %tmp8316, i64 1
- %tmp8318 = getelementptr inbounds float* %tmp8317, i64 1
- %tmp8319 = getelementptr inbounds float* %tmp8318, i64 1
- %tmp8320 = getelementptr inbounds float* %tmp8319, i64 1
- %tmp8321 = getelementptr inbounds float* %tmp8320, i64 1
- %tmp8322 = getelementptr inbounds float* %tmp8321, i64 1
- %tmp8323 = getelementptr inbounds float* %tmp8322, i64 1
- %tmp8324 = getelementptr inbounds float* %tmp8323, i64 1
- %tmp8325 = getelementptr inbounds float* %tmp8324, i64 1
- %tmp8326 = getelementptr inbounds float* %tmp8325, i64 1
- %tmp8327 = getelementptr inbounds float* %tmp8326, i64 1
- %tmp8328 = getelementptr inbounds float* %tmp8327, i64 1
- %tmp8329 = getelementptr inbounds float* %tmp8328, i64 1
- %tmp8330 = getelementptr inbounds float* %tmp8329, i64 1
- %tmp8331 = getelementptr inbounds float* %tmp8330, i64 1
- %tmp8332 = getelementptr inbounds float* %tmp8331, i64 1
- %tmp8333 = getelementptr inbounds float* %tmp8332, i64 1
- %tmp8334 = getelementptr inbounds float* %tmp8333, i64 1
- %tmp8335 = getelementptr inbounds float* %tmp8334, i64 1
- %tmp8336 = getelementptr inbounds float* %tmp8335, i64 1
- %tmp8337 = getelementptr inbounds float* %tmp8336, i64 1
- %tmp8338 = getelementptr inbounds float* %tmp8337, i64 1
- %tmp8339 = getelementptr inbounds float* %tmp8338, i64 1
- %tmp8340 = getelementptr inbounds float* %tmp8339, i64 1
- %tmp8341 = getelementptr inbounds float* %tmp8340, i64 1
- %tmp8342 = getelementptr inbounds float* %tmp8341, i64 1
- %tmp8343 = getelementptr inbounds float* %tmp8342, i64 1
- %tmp8344 = getelementptr inbounds float* %tmp8343, i64 1
- %tmp8345 = getelementptr inbounds float* %tmp8344, i64 1
- %tmp8346 = getelementptr inbounds float* %tmp8345, i64 1
- %tmp8347 = getelementptr inbounds float* %tmp8346, i64 1
- %tmp8348 = getelementptr inbounds float* %tmp8347, i64 1
- %tmp8349 = getelementptr inbounds float* %tmp8348, i64 1
- %tmp8350 = getelementptr inbounds float* %tmp8349, i64 1
- %tmp8351 = getelementptr inbounds float* %tmp8350, i64 1
- %tmp8352 = getelementptr inbounds float* %tmp8351, i64 1
- %tmp8353 = getelementptr inbounds float* %tmp8352, i64 1
- %tmp8354 = getelementptr inbounds float* %tmp8353, i64 1
- %tmp8355 = getelementptr inbounds float* %tmp8354, i64 1
- %tmp8356 = getelementptr inbounds float* %tmp8355, i64 1
- %tmp8357 = getelementptr inbounds float* %tmp8356, i64 1
- %tmp8358 = getelementptr inbounds float* %tmp8357, i64 1
- %tmp8359 = getelementptr inbounds float* %tmp8358, i64 1
- %tmp8360 = getelementptr inbounds float* %tmp8359, i64 1
- %tmp8361 = getelementptr inbounds float* %tmp8360, i64 1
- %tmp8362 = getelementptr inbounds float* %tmp8361, i64 1
- %tmp8363 = getelementptr inbounds float* %tmp8362, i64 1
- %tmp8364 = getelementptr inbounds float* %tmp8363, i64 1
- %tmp8365 = getelementptr inbounds float* %tmp8364, i64 1
- %tmp8366 = getelementptr inbounds float* %tmp8365, i64 1
- %tmp8367 = getelementptr inbounds float* %tmp8366, i64 1
- %tmp8368 = getelementptr inbounds float* %tmp8367, i64 1
- %tmp8369 = getelementptr inbounds float* %tmp8368, i64 1
- %tmp8370 = getelementptr inbounds float* %tmp8369, i64 1
- %tmp8371 = getelementptr inbounds float* %tmp8370, i64 1
- %tmp8372 = getelementptr inbounds float* %tmp8371, i64 1
- %tmp8373 = getelementptr inbounds float* %tmp8372, i64 1
- %tmp8374 = getelementptr inbounds float* %tmp8373, i64 1
- %tmp8375 = getelementptr inbounds float* %tmp8374, i64 1
- %tmp8376 = getelementptr inbounds float* %tmp8375, i64 1
- %tmp8377 = getelementptr inbounds float* %tmp8376, i64 1
- %tmp8378 = getelementptr inbounds float* %tmp8377, i64 1
- %tmp8379 = getelementptr inbounds float* %tmp8378, i64 1
- %tmp8380 = getelementptr inbounds float* %tmp8379, i64 1
- %tmp8381 = getelementptr inbounds float* %tmp8380, i64 1
- %tmp8382 = getelementptr inbounds float* %tmp8381, i64 1
- %tmp8383 = getelementptr inbounds float* %tmp8382, i64 1
- %tmp8384 = getelementptr inbounds float* %tmp8383, i64 1
- %tmp8385 = getelementptr inbounds float* %tmp8384, i64 1
- %tmp8386 = getelementptr inbounds float* %tmp8385, i64 1
- %tmp8387 = getelementptr inbounds float* %tmp8386, i64 1
- %tmp8388 = getelementptr inbounds float* %tmp8387, i64 1
- %tmp8389 = getelementptr inbounds float* %tmp8388, i64 1
- %tmp8390 = getelementptr inbounds float* %tmp8389, i64 1
- %tmp8391 = getelementptr inbounds float* %tmp8390, i64 1
- %tmp8392 = getelementptr inbounds float* %tmp8391, i64 1
- %tmp8393 = getelementptr inbounds float* %tmp8392, i64 1
- %tmp8394 = getelementptr inbounds float* %tmp8393, i64 1
- %tmp8395 = getelementptr inbounds float* %tmp8394, i64 1
- %tmp8396 = getelementptr inbounds float* %tmp8395, i64 1
- %tmp8397 = getelementptr inbounds float* %tmp8396, i64 1
- %tmp8398 = getelementptr inbounds float* %tmp8397, i64 1
- %tmp8399 = getelementptr inbounds float* %tmp8398, i64 1
- %tmp8400 = getelementptr inbounds float* %tmp8399, i64 1
- %tmp8401 = getelementptr inbounds float* %tmp8400, i64 1
- %tmp8402 = getelementptr inbounds float* %tmp8401, i64 1
- %tmp8403 = getelementptr inbounds float* %tmp8402, i64 1
- %tmp8404 = getelementptr inbounds float* %tmp8403, i64 1
- %tmp8405 = getelementptr inbounds float* %tmp8404, i64 1
- %tmp8406 = getelementptr inbounds float* %tmp8405, i64 1
- %tmp8407 = getelementptr inbounds float* %tmp8406, i64 1
- %tmp8408 = getelementptr inbounds float* %tmp8407, i64 1
- %tmp8409 = getelementptr inbounds float* %tmp8408, i64 1
- %tmp8410 = getelementptr inbounds float* %tmp8409, i64 1
- %tmp8411 = getelementptr inbounds float* %tmp8410, i64 1
- %tmp8412 = getelementptr inbounds float* %tmp8411, i64 1
- %tmp8413 = getelementptr inbounds float* %tmp8412, i64 1
- %tmp8414 = getelementptr inbounds float* %tmp8413, i64 1
- %tmp8415 = getelementptr inbounds float* %tmp8414, i64 1
- %tmp8416 = getelementptr inbounds float* %tmp8415, i64 1
- %tmp8417 = getelementptr inbounds float* %tmp8416, i64 1
- %tmp8418 = getelementptr inbounds float* %tmp8417, i64 1
- %tmp8419 = getelementptr inbounds float* %tmp8418, i64 1
- %tmp8420 = getelementptr inbounds float* %tmp8419, i64 1
- %tmp8421 = getelementptr inbounds float* %tmp8420, i64 1
- %tmp8422 = getelementptr inbounds float* %tmp8421, i64 1
- %tmp8423 = getelementptr inbounds float* %tmp8422, i64 1
- %tmp8424 = getelementptr inbounds float* %tmp8423, i64 1
- %tmp8425 = getelementptr inbounds float* %tmp8424, i64 1
- %tmp8426 = getelementptr inbounds float* %tmp8425, i64 1
- %tmp8427 = getelementptr inbounds float* %tmp8426, i64 1
- %tmp8428 = getelementptr inbounds float* %tmp8427, i64 1
- %tmp8429 = getelementptr inbounds float* %tmp8428, i64 1
- %tmp8430 = getelementptr inbounds float* %tmp8429, i64 1
- %tmp8431 = getelementptr inbounds float* %tmp8430, i64 1
- %tmp8432 = getelementptr inbounds float* %tmp8431, i64 1
- %tmp8433 = getelementptr inbounds float* %tmp8432, i64 1
- %tmp8434 = getelementptr inbounds float* %tmp8433, i64 1
- %tmp8435 = getelementptr inbounds float* %tmp8434, i64 1
- %tmp8436 = getelementptr inbounds float* %tmp8435, i64 1
- %tmp8437 = getelementptr inbounds float* %tmp8436, i64 1
- %tmp8438 = getelementptr inbounds float* %tmp8437, i64 1
- %tmp8439 = getelementptr inbounds float* %tmp8438, i64 1
- %tmp8440 = getelementptr inbounds float* %tmp8439, i64 1
- %tmp8441 = getelementptr inbounds float* %tmp8440, i64 1
- %tmp8442 = getelementptr inbounds float* %tmp8441, i64 1
- %tmp8443 = getelementptr inbounds float* %tmp8442, i64 1
- %tmp8444 = getelementptr inbounds float* %tmp8443, i64 1
- %tmp8445 = getelementptr inbounds float* %tmp8444, i64 1
- %tmp8446 = getelementptr inbounds float* %tmp8445, i64 1
- %tmp8447 = getelementptr inbounds float* %tmp8446, i64 1
- %tmp8448 = getelementptr inbounds float* %tmp8447, i64 1
- %tmp8449 = getelementptr inbounds float* %tmp8448, i64 1
- %tmp8450 = getelementptr inbounds float* %tmp8449, i64 1
- %tmp8451 = getelementptr inbounds float* %tmp8450, i64 1
- %tmp8452 = getelementptr inbounds float* %tmp8451, i64 1
- %tmp8453 = getelementptr inbounds float* %tmp8452, i64 1
- %tmp8454 = getelementptr inbounds float* %tmp8453, i64 1
- %tmp8455 = getelementptr inbounds float* %tmp8454, i64 1
- %tmp8456 = getelementptr inbounds float* %tmp8455, i64 1
- %tmp8457 = getelementptr inbounds float* %tmp8456, i64 1
- %tmp8458 = getelementptr inbounds float* %tmp8457, i64 1
- %tmp8459 = getelementptr inbounds float* %tmp8458, i64 1
- %tmp8460 = getelementptr inbounds float* %tmp8459, i64 1
- %tmp8461 = getelementptr inbounds float* %tmp8460, i64 1
- %tmp8462 = getelementptr inbounds float* %tmp8461, i64 1
- %tmp8463 = getelementptr inbounds float* %tmp8462, i64 1
- %tmp8464 = getelementptr inbounds float* %tmp8463, i64 1
- %tmp8465 = getelementptr inbounds float* %tmp8464, i64 1
- %tmp8466 = getelementptr inbounds float* %tmp8465, i64 1
- %tmp8467 = getelementptr inbounds float* %tmp8466, i64 1
- %tmp8468 = getelementptr inbounds float* %tmp8467, i64 1
- %tmp8469 = getelementptr inbounds float* %tmp8468, i64 1
- %tmp8470 = getelementptr inbounds float* %tmp8469, i64 1
- %tmp8471 = getelementptr inbounds float* %tmp8470, i64 1
- %tmp8472 = getelementptr inbounds float* %tmp8471, i64 1
- %tmp8473 = getelementptr inbounds float* %tmp8472, i64 1
- %tmp8474 = getelementptr inbounds float* %tmp8473, i64 1
- %tmp8475 = getelementptr inbounds float* %tmp8474, i64 1
- %tmp8476 = getelementptr inbounds float* %tmp8475, i64 1
- %tmp8477 = getelementptr inbounds float* %tmp8476, i64 1
- %tmp8478 = getelementptr inbounds float* %tmp8477, i64 1
- %tmp8479 = getelementptr inbounds float* %tmp8478, i64 1
- %tmp8480 = getelementptr inbounds float* %tmp8479, i64 1
- %tmp8481 = getelementptr inbounds float* %tmp8480, i64 1
- %tmp8482 = getelementptr inbounds float* %tmp8481, i64 1
- %tmp8483 = getelementptr inbounds float* %tmp8482, i64 1
- %tmp8484 = getelementptr inbounds float* %tmp8483, i64 1
- %tmp8485 = getelementptr inbounds float* %tmp8484, i64 1
- %tmp8486 = getelementptr inbounds float* %tmp8485, i64 1
- %tmp8487 = getelementptr inbounds float* %tmp8486, i64 1
- %tmp8488 = getelementptr inbounds float* %tmp8487, i64 1
- %tmp8489 = getelementptr inbounds float* %tmp8488, i64 1
- %tmp8490 = getelementptr inbounds float* %tmp8489, i64 1
- %tmp8491 = getelementptr inbounds float* %tmp8490, i64 1
- %tmp8492 = getelementptr inbounds float* %tmp8491, i64 1
- %tmp8493 = getelementptr inbounds float* %tmp8492, i64 1
- %tmp8494 = getelementptr inbounds float* %tmp8493, i64 1
- %tmp8495 = getelementptr inbounds float* %tmp8494, i64 1
- %tmp8496 = getelementptr inbounds float* %tmp8495, i64 1
- %tmp8497 = getelementptr inbounds float* %tmp8496, i64 1
- %tmp8498 = getelementptr inbounds float* %tmp8497, i64 1
- %tmp8499 = getelementptr inbounds float* %tmp8498, i64 1
- %tmp8500 = getelementptr inbounds float* %tmp8499, i64 1
- %tmp8501 = getelementptr inbounds float* %tmp8500, i64 1
- %tmp8502 = getelementptr inbounds float* %tmp8501, i64 1
- %tmp8503 = getelementptr inbounds float* %tmp8502, i64 1
- %tmp8504 = getelementptr inbounds float* %tmp8503, i64 1
- %tmp8505 = getelementptr inbounds float* %tmp8504, i64 1
- %tmp8506 = getelementptr inbounds float* %tmp8505, i64 1
- %tmp8507 = getelementptr inbounds float* %tmp8506, i64 1
- %tmp8508 = getelementptr inbounds float* %tmp8507, i64 1
- %tmp8509 = getelementptr inbounds float* %tmp8508, i64 1
- %tmp8510 = getelementptr inbounds float* %tmp8509, i64 1
- %tmp8511 = getelementptr inbounds float* %tmp8510, i64 1
- %tmp8512 = getelementptr inbounds float* %tmp8511, i64 1
- %tmp8513 = getelementptr inbounds float* %tmp8512, i64 1
- %tmp8514 = getelementptr inbounds float* %tmp8513, i64 1
- %tmp8515 = getelementptr inbounds float* %tmp8514, i64 1
- %tmp8516 = getelementptr inbounds float* %tmp8515, i64 1
- %tmp8517 = getelementptr inbounds float* %tmp8516, i64 1
- %tmp8518 = getelementptr inbounds float* %tmp8517, i64 1
- %tmp8519 = getelementptr inbounds float* %tmp8518, i64 1
- %tmp8520 = getelementptr inbounds float* %tmp8519, i64 1
- %tmp8521 = getelementptr inbounds float* %tmp8520, i64 1
- %tmp8522 = getelementptr inbounds float* %tmp8521, i64 1
- %tmp8523 = getelementptr inbounds float* %tmp8522, i64 1
- %tmp8524 = getelementptr inbounds float* %tmp8523, i64 1
- %tmp8525 = getelementptr inbounds float* %tmp8524, i64 1
- %tmp8526 = getelementptr inbounds float* %tmp8525, i64 1
- %tmp8527 = getelementptr inbounds float* %tmp8526, i64 1
- %tmp8528 = getelementptr inbounds float* %tmp8527, i64 1
- %tmp8529 = getelementptr inbounds float* %tmp8528, i64 1
- %tmp8530 = getelementptr inbounds float* %tmp8529, i64 1
- %tmp8531 = getelementptr inbounds float* %tmp8530, i64 1
- %tmp8532 = getelementptr inbounds float* %tmp8531, i64 1
- %tmp8533 = getelementptr inbounds float* %tmp8532, i64 1
- %tmp8534 = getelementptr inbounds float* %tmp8533, i64 1
- %tmp8535 = getelementptr inbounds float* %tmp8534, i64 1
- %tmp8536 = getelementptr inbounds float* %tmp8535, i64 1
- %tmp8537 = getelementptr inbounds float* %tmp8536, i64 1
- %tmp8538 = getelementptr inbounds float* %tmp8537, i64 1
- %tmp8539 = getelementptr inbounds float* %tmp8538, i64 1
- %tmp8540 = getelementptr inbounds float* %tmp8539, i64 1
- %tmp8541 = getelementptr inbounds float* %tmp8540, i64 1
- %tmp8542 = getelementptr inbounds float* %tmp8541, i64 1
- %tmp8543 = getelementptr inbounds float* %tmp8542, i64 1
- %tmp8544 = getelementptr inbounds float* %tmp8543, i64 1
- %tmp8545 = getelementptr inbounds float* %tmp8544, i64 1
- %tmp8546 = getelementptr inbounds float* %tmp8545, i64 1
- %tmp8547 = getelementptr inbounds float* %tmp8546, i64 1
- %tmp8548 = getelementptr inbounds float* %tmp8547, i64 1
- %tmp8549 = getelementptr inbounds float* %tmp8548, i64 1
- %tmp8550 = getelementptr inbounds float* %tmp8549, i64 1
- %tmp8551 = getelementptr inbounds float* %tmp8550, i64 1
- %tmp8552 = getelementptr inbounds float* %tmp8551, i64 1
- %tmp8553 = getelementptr inbounds float* %tmp8552, i64 1
- %tmp8554 = getelementptr inbounds float* %tmp8553, i64 1
- %tmp8555 = getelementptr inbounds float* %tmp8554, i64 1
- %tmp8556 = getelementptr inbounds float* %tmp8555, i64 1
- %tmp8557 = getelementptr inbounds float* %tmp8556, i64 1
- %tmp8558 = getelementptr inbounds float* %tmp8557, i64 1
- %tmp8559 = getelementptr inbounds float* %tmp8558, i64 1
- %tmp8560 = getelementptr inbounds float* %tmp8559, i64 1
- %tmp8561 = getelementptr inbounds float* %tmp8560, i64 1
- %tmp8562 = getelementptr inbounds float* %tmp8561, i64 1
- %tmp8563 = getelementptr inbounds float* %tmp8562, i64 1
- %tmp8564 = getelementptr inbounds float* %tmp8563, i64 1
- %tmp8565 = getelementptr inbounds float* %tmp8564, i64 1
- %tmp8566 = getelementptr inbounds float* %tmp8565, i64 1
- %tmp8567 = getelementptr inbounds float* %tmp8566, i64 1
- %tmp8568 = getelementptr inbounds float* %tmp8567, i64 1
- %tmp8569 = getelementptr inbounds float* %tmp8568, i64 1
- %tmp8570 = getelementptr inbounds float* %tmp8569, i64 1
- %tmp8571 = getelementptr inbounds float* %tmp8570, i64 1
- %tmp8572 = getelementptr inbounds float* %tmp8571, i64 1
- %tmp8573 = getelementptr inbounds float* %tmp8572, i64 1
- %tmp8574 = getelementptr inbounds float* %tmp8573, i64 1
- %tmp8575 = getelementptr inbounds float* %tmp8574, i64 1
- %tmp8576 = getelementptr inbounds float* %tmp8575, i64 1
- %tmp8577 = getelementptr inbounds float* %tmp8576, i64 1
- %tmp8578 = getelementptr inbounds float* %tmp8577, i64 1
- %tmp8579 = getelementptr inbounds float* %tmp8578, i64 1
- %tmp8580 = getelementptr inbounds float* %tmp8579, i64 1
- %tmp8581 = getelementptr inbounds float* %tmp8580, i64 1
- %tmp8582 = getelementptr inbounds float* %tmp8581, i64 1
- %tmp8583 = getelementptr inbounds float* %tmp8582, i64 1
- %tmp8584 = getelementptr inbounds float* %tmp8583, i64 1
- %tmp8585 = getelementptr inbounds float* %tmp8584, i64 1
- %tmp8586 = getelementptr inbounds float* %tmp8585, i64 1
- %tmp8587 = getelementptr inbounds float* %tmp8586, i64 1
- %tmp8588 = getelementptr inbounds float* %tmp8587, i64 1
- %tmp8589 = getelementptr inbounds float* %tmp8588, i64 1
- %tmp8590 = getelementptr inbounds float* %tmp8589, i64 1
- %tmp8591 = getelementptr inbounds float* %tmp8590, i64 1
- %tmp8592 = getelementptr inbounds float* %tmp8591, i64 1
- %tmp8593 = getelementptr inbounds float* %tmp8592, i64 1
- %tmp8594 = getelementptr inbounds float* %tmp8593, i64 1
- %tmp8595 = getelementptr inbounds float* %tmp8594, i64 1
- %tmp8596 = getelementptr inbounds float* %tmp8595, i64 1
- %tmp8597 = getelementptr inbounds float* %tmp8596, i64 1
- %tmp8598 = getelementptr inbounds float* %tmp8597, i64 1
- %tmp8599 = getelementptr inbounds float* %tmp8598, i64 1
- %tmp8600 = getelementptr inbounds float* %tmp8599, i64 1
- %tmp8601 = getelementptr inbounds float* %tmp8600, i64 1
- %tmp8602 = getelementptr inbounds float* %tmp8601, i64 1
- %tmp8603 = getelementptr inbounds float* %tmp8602, i64 1
- %tmp8604 = getelementptr inbounds float* %tmp8603, i64 1
- %tmp8605 = getelementptr inbounds float* %tmp8604, i64 1
- %tmp8606 = getelementptr inbounds float* %tmp8605, i64 1
- %tmp8607 = getelementptr inbounds float* %tmp8606, i64 1
- %tmp8608 = getelementptr inbounds float* %tmp8607, i64 1
- %tmp8609 = getelementptr inbounds float* %tmp8608, i64 1
- %tmp8610 = getelementptr inbounds float* %tmp8609, i64 1
- %tmp8611 = getelementptr inbounds float* %tmp8610, i64 1
- %tmp8612 = getelementptr inbounds float* %tmp8611, i64 1
- %tmp8613 = getelementptr inbounds float* %tmp8612, i64 1
- %tmp8614 = getelementptr inbounds float* %tmp8613, i64 1
- %tmp8615 = getelementptr inbounds float* %tmp8614, i64 1
- %tmp8616 = getelementptr inbounds float* %tmp8615, i64 1
- %tmp8617 = getelementptr inbounds float* %tmp8616, i64 1
- %tmp8618 = getelementptr inbounds float* %tmp8617, i64 1
- %tmp8619 = getelementptr inbounds float* %tmp8618, i64 1
- %tmp8620 = getelementptr inbounds float* %tmp8619, i64 1
- %tmp8621 = getelementptr inbounds float* %tmp8620, i64 1
- %tmp8622 = getelementptr inbounds float* %tmp8621, i64 1
- %tmp8623 = getelementptr inbounds float* %tmp8622, i64 1
- %tmp8624 = getelementptr inbounds float* %tmp8623, i64 1
- %tmp8625 = getelementptr inbounds float* %tmp8624, i64 1
- %tmp8626 = getelementptr inbounds float* %tmp8625, i64 1
- %tmp8627 = getelementptr inbounds float* %tmp8626, i64 1
- %tmp8628 = getelementptr inbounds float* %tmp8627, i64 1
- %tmp8629 = getelementptr inbounds float* %tmp8628, i64 1
- %tmp8630 = getelementptr inbounds float* %tmp8629, i64 1
- %tmp8631 = getelementptr inbounds float* %tmp8630, i64 1
- %tmp8632 = getelementptr inbounds float* %tmp8631, i64 1
- %tmp8633 = getelementptr inbounds float* %tmp8632, i64 1
- %tmp8634 = getelementptr inbounds float* %tmp8633, i64 1
- %tmp8635 = getelementptr inbounds float* %tmp8634, i64 1
- %tmp8636 = getelementptr inbounds float* %tmp8635, i64 1
- %tmp8637 = getelementptr inbounds float* %tmp8636, i64 1
- %tmp8638 = getelementptr inbounds float* %tmp8637, i64 1
- %tmp8639 = getelementptr inbounds float* %tmp8638, i64 1
- %tmp8640 = getelementptr inbounds float* %tmp8639, i64 1
- %tmp8641 = getelementptr inbounds float* %tmp8640, i64 1
- %tmp8642 = getelementptr inbounds float* %tmp8641, i64 1
- %tmp8643 = getelementptr inbounds float* %tmp8642, i64 1
- %tmp8644 = getelementptr inbounds float* %tmp8643, i64 1
- %tmp8645 = getelementptr inbounds float* %tmp8644, i64 1
- %tmp8646 = getelementptr inbounds float* %tmp8645, i64 1
- %tmp8647 = getelementptr inbounds float* %tmp8646, i64 1
- %tmp8648 = getelementptr inbounds float* %tmp8647, i64 1
- %tmp8649 = getelementptr inbounds float* %tmp8648, i64 1
- %tmp8650 = getelementptr inbounds float* %tmp8649, i64 1
- %tmp8651 = getelementptr inbounds float* %tmp8650, i64 1
- %tmp8652 = getelementptr inbounds float* %tmp8651, i64 1
- %tmp8653 = getelementptr inbounds float* %tmp8652, i64 1
- %tmp8654 = getelementptr inbounds float* %tmp8653, i64 1
- %tmp8655 = getelementptr inbounds float* %tmp8654, i64 1
- %tmp8656 = getelementptr inbounds float* %tmp8655, i64 1
- %tmp8657 = getelementptr inbounds float* %tmp8656, i64 1
- %tmp8658 = getelementptr inbounds float* %tmp8657, i64 1
- %tmp8659 = getelementptr inbounds float* %tmp8658, i64 1
- %tmp8660 = getelementptr inbounds float* %tmp8659, i64 1
- %tmp8661 = getelementptr inbounds float* %tmp8660, i64 1
- %tmp8662 = getelementptr inbounds float* %tmp8661, i64 1
- %tmp8663 = getelementptr inbounds float* %tmp8662, i64 1
- %tmp8664 = getelementptr inbounds float* %tmp8663, i64 1
- %tmp8665 = getelementptr inbounds float* %tmp8664, i64 1
- %tmp8666 = getelementptr inbounds float* %tmp8665, i64 1
- %tmp8667 = getelementptr inbounds float* %tmp8666, i64 1
- %tmp8668 = getelementptr inbounds float* %tmp8667, i64 1
- %tmp8669 = getelementptr inbounds float* %tmp8668, i64 1
- %tmp8670 = getelementptr inbounds float* %tmp8669, i64 1
- %tmp8671 = getelementptr inbounds float* %tmp8670, i64 1
- %tmp8672 = getelementptr inbounds float* %tmp8671, i64 1
- %tmp8673 = getelementptr inbounds float* %tmp8672, i64 1
- %tmp8674 = getelementptr inbounds float* %tmp8673, i64 1
- %tmp8675 = getelementptr inbounds float* %tmp8674, i64 1
- %tmp8676 = getelementptr inbounds float* %tmp8675, i64 1
- %tmp8677 = getelementptr inbounds float* %tmp8676, i64 1
- %tmp8678 = getelementptr inbounds float* %tmp8677, i64 1
- %tmp8679 = getelementptr inbounds float* %tmp8678, i64 1
- %tmp8680 = getelementptr inbounds float* %tmp8679, i64 1
- %tmp8681 = getelementptr inbounds float* %tmp8680, i64 1
- %tmp8682 = getelementptr inbounds float* %tmp8681, i64 1
- %tmp8683 = getelementptr inbounds float* %tmp8682, i64 1
- %tmp8684 = getelementptr inbounds float* %tmp8683, i64 1
- %tmp8685 = getelementptr inbounds float* %tmp8684, i64 1
- %tmp8686 = getelementptr inbounds float* %tmp8685, i64 1
- %tmp8687 = getelementptr inbounds float* %tmp8686, i64 1
- %tmp8688 = getelementptr inbounds float* %tmp8687, i64 1
- %tmp8689 = getelementptr inbounds float* %tmp8688, i64 1
- %tmp8690 = getelementptr inbounds float* %tmp8689, i64 1
- %tmp8691 = getelementptr inbounds float* %tmp8690, i64 1
- %tmp8692 = getelementptr inbounds float* %tmp8691, i64 1
- %tmp8693 = getelementptr inbounds float* %tmp8692, i64 1
- %tmp8694 = getelementptr inbounds float* %tmp8693, i64 1
- %tmp8695 = getelementptr inbounds float* %tmp8694, i64 1
- %tmp8696 = getelementptr inbounds float* %tmp8695, i64 1
- %tmp8697 = getelementptr inbounds float* %tmp8696, i64 1
- %tmp8698 = getelementptr inbounds float* %tmp8697, i64 1
- %tmp8699 = getelementptr inbounds float* %tmp8698, i64 1
- %tmp8700 = getelementptr inbounds float* %tmp8699, i64 1
- %tmp8701 = getelementptr inbounds float* %tmp8700, i64 1
- %tmp8702 = getelementptr inbounds float* %tmp8701, i64 1
- %tmp8703 = getelementptr inbounds float* %tmp8702, i64 1
- %tmp8704 = getelementptr inbounds float* %tmp8703, i64 1
- %tmp8705 = getelementptr inbounds float* %tmp8704, i64 1
- %tmp8706 = getelementptr inbounds float* %tmp8705, i64 1
- %tmp8707 = getelementptr inbounds float* %tmp8706, i64 1
- %tmp8708 = getelementptr inbounds float* %tmp8707, i64 1
- %tmp8709 = getelementptr inbounds float* %tmp8708, i64 1
- %tmp8710 = getelementptr inbounds float* %tmp8709, i64 1
- %tmp8711 = getelementptr inbounds float* %tmp8710, i64 1
- %tmp8712 = getelementptr inbounds float* %tmp8711, i64 1
- %tmp8713 = getelementptr inbounds float* %tmp8712, i64 1
- %tmp8714 = getelementptr inbounds float* %tmp8713, i64 1
- %tmp8715 = getelementptr inbounds float* %tmp8714, i64 1
- %tmp8716 = getelementptr inbounds float* %tmp8715, i64 1
- %tmp8717 = getelementptr inbounds float* %tmp8716, i64 1
- %tmp8718 = getelementptr inbounds float* %tmp8717, i64 1
- %tmp8719 = getelementptr inbounds float* %tmp8718, i64 1
- %tmp8720 = getelementptr inbounds float* %tmp8719, i64 1
- %tmp8721 = getelementptr inbounds float* %tmp8720, i64 1
- %tmp8722 = getelementptr inbounds float* %tmp8721, i64 1
- %tmp8723 = getelementptr inbounds float* %tmp8722, i64 1
- %tmp8724 = getelementptr inbounds float* %tmp8723, i64 1
- %tmp8725 = getelementptr inbounds float* %tmp8724, i64 1
- %tmp8726 = getelementptr inbounds float* %tmp8725, i64 1
- %tmp8727 = getelementptr inbounds float* %tmp8726, i64 1
- %tmp8728 = getelementptr inbounds float* %tmp8727, i64 1
- %tmp8729 = getelementptr inbounds float* %tmp8728, i64 1
- %tmp8730 = getelementptr inbounds float* %tmp8729, i64 1
- %tmp8731 = getelementptr inbounds float* %tmp8730, i64 1
- %tmp8732 = getelementptr inbounds float* %tmp8731, i64 1
- %tmp8733 = getelementptr inbounds float* %tmp8732, i64 1
- %tmp8734 = getelementptr inbounds float* %tmp8733, i64 1
- %tmp8735 = getelementptr inbounds float* %tmp8734, i64 1
- %tmp8736 = getelementptr inbounds float* %tmp8735, i64 1
- %tmp8737 = getelementptr inbounds float* %tmp8736, i64 1
- %tmp8738 = getelementptr inbounds float* %tmp8737, i64 1
- %tmp8739 = getelementptr inbounds float* %tmp8738, i64 1
- %tmp8740 = getelementptr inbounds float* %tmp8739, i64 1
- %tmp8741 = getelementptr inbounds float* %tmp8740, i64 1
- %tmp8742 = getelementptr inbounds float* %tmp8741, i64 1
- %tmp8743 = getelementptr inbounds float* %tmp8742, i64 1
- %tmp8744 = getelementptr inbounds float* %tmp8743, i64 1
- %tmp8745 = getelementptr inbounds float* %tmp8744, i64 1
- %tmp8746 = getelementptr inbounds float* %tmp8745, i64 1
- %tmp8747 = getelementptr inbounds float* %tmp8746, i64 1
- %tmp8748 = getelementptr inbounds float* %tmp8747, i64 1
- %tmp8749 = getelementptr inbounds float* %tmp8748, i64 1
- %tmp8750 = getelementptr inbounds float* %tmp8749, i64 1
- %tmp8751 = getelementptr inbounds float* %tmp8750, i64 1
- %tmp8752 = getelementptr inbounds float* %tmp8751, i64 1
- %tmp8753 = getelementptr inbounds float* %tmp8752, i64 1
- %tmp8754 = getelementptr inbounds float* %tmp8753, i64 1
- %tmp8755 = getelementptr inbounds float* %tmp8754, i64 1
- %tmp8756 = getelementptr inbounds float* %tmp8755, i64 1
- %tmp8757 = getelementptr inbounds float* %tmp8756, i64 1
- %tmp8758 = getelementptr inbounds float* %tmp8757, i64 1
- %tmp8759 = getelementptr inbounds float* %tmp8758, i64 1
- %tmp8760 = getelementptr inbounds float* %tmp8759, i64 1
- %tmp8761 = getelementptr inbounds float* %tmp8760, i64 1
- %tmp8762 = getelementptr inbounds float* %tmp8761, i64 1
- %tmp8763 = getelementptr inbounds float* %tmp8762, i64 1
- %tmp8764 = getelementptr inbounds float* %tmp8763, i64 1
- %tmp8765 = getelementptr inbounds float* %tmp8764, i64 1
- %tmp8766 = getelementptr inbounds float* %tmp8765, i64 1
- %tmp8767 = getelementptr inbounds float* %tmp8766, i64 1
- %tmp8768 = getelementptr inbounds float* %tmp8767, i64 1
- %tmp8769 = getelementptr inbounds float* %tmp8768, i64 1
- %tmp8770 = getelementptr inbounds float* %tmp8769, i64 1
- %tmp8771 = getelementptr inbounds float* %tmp8770, i64 1
- %tmp8772 = getelementptr inbounds float* %tmp8771, i64 1
- %tmp8773 = getelementptr inbounds float* %tmp8772, i64 1
- %tmp8774 = getelementptr inbounds float* %tmp8773, i64 1
- %tmp8775 = getelementptr inbounds float* %tmp8774, i64 1
- %tmp8776 = getelementptr inbounds float* %tmp8775, i64 1
- %tmp8777 = getelementptr inbounds float* %tmp8776, i64 1
- %tmp8778 = getelementptr inbounds float* %tmp8777, i64 1
- %tmp8779 = getelementptr inbounds float* %tmp8778, i64 1
- %tmp8780 = getelementptr inbounds float* %tmp8779, i64 1
- %tmp8781 = getelementptr inbounds float* %tmp8780, i64 1
- %tmp8782 = getelementptr inbounds float* %tmp8781, i64 1
- %tmp8783 = getelementptr inbounds float* %tmp8782, i64 1
- %tmp8784 = getelementptr inbounds float* %tmp8783, i64 1
- %tmp8785 = getelementptr inbounds float* %tmp8784, i64 1
- %tmp8786 = getelementptr inbounds float* %tmp8785, i64 1
- %tmp8787 = getelementptr inbounds float* %tmp8786, i64 1
- %tmp8788 = getelementptr inbounds float* %tmp8787, i64 1
- %tmp8789 = getelementptr inbounds float* %tmp8788, i64 1
- %tmp8790 = getelementptr inbounds float* %tmp8789, i64 1
- %tmp8791 = getelementptr inbounds float* %tmp8790, i64 1
- %tmp8792 = getelementptr inbounds float* %tmp8791, i64 1
- %tmp8793 = getelementptr inbounds float* %tmp8792, i64 1
- %tmp8794 = getelementptr inbounds float* %tmp8793, i64 1
- %tmp8795 = getelementptr inbounds float* %tmp8794, i64 1
- %tmp8796 = getelementptr inbounds float* %tmp8795, i64 1
- %tmp8797 = getelementptr inbounds float* %tmp8796, i64 1
- %tmp8798 = getelementptr inbounds float* %tmp8797, i64 1
- %tmp8799 = getelementptr inbounds float* %tmp8798, i64 1
- %tmp8800 = getelementptr inbounds float* %tmp8799, i64 1
- %tmp8801 = getelementptr inbounds float* %tmp8800, i64 1
- %tmp8802 = getelementptr inbounds float* %tmp8801, i64 1
- %tmp8803 = getelementptr inbounds float* %tmp8802, i64 1
- %tmp8804 = getelementptr inbounds float* %tmp8803, i64 1
- %tmp8805 = getelementptr inbounds float* %tmp8804, i64 1
- %tmp8806 = getelementptr inbounds float* %tmp8805, i64 1
- %tmp8807 = getelementptr inbounds float* %tmp8806, i64 1
- %tmp8808 = getelementptr inbounds float* %tmp8807, i64 1
- %tmp8809 = getelementptr inbounds float* %tmp8808, i64 1
- %tmp8810 = getelementptr inbounds float* %tmp8809, i64 1
- %tmp8811 = getelementptr inbounds float* %tmp8810, i64 1
- %tmp8812 = getelementptr inbounds float* %tmp8811, i64 1
- %tmp8813 = getelementptr inbounds float* %tmp8812, i64 1
- %tmp8814 = getelementptr inbounds float* %tmp8813, i64 1
- %tmp8815 = getelementptr inbounds float* %tmp8814, i64 1
- %tmp8816 = getelementptr inbounds float* %tmp8815, i64 1
- %tmp8817 = getelementptr inbounds float* %tmp8816, i64 1
- %tmp8818 = getelementptr inbounds float* %tmp8817, i64 1
- %tmp8819 = getelementptr inbounds float* %tmp8818, i64 1
- %tmp8820 = getelementptr inbounds float* %tmp8819, i64 1
- %tmp8821 = getelementptr inbounds float* %tmp8820, i64 1
- %tmp8822 = getelementptr inbounds float* %tmp8821, i64 1
- %tmp8823 = getelementptr inbounds float* %tmp8822, i64 1
- %tmp8824 = getelementptr inbounds float* %tmp8823, i64 1
- %tmp8825 = getelementptr inbounds float* %tmp8824, i64 1
- %tmp8826 = getelementptr inbounds float* %tmp8825, i64 1
- %tmp8827 = getelementptr inbounds float* %tmp8826, i64 1
- %tmp8828 = getelementptr inbounds float* %tmp8827, i64 1
- %tmp8829 = getelementptr inbounds float* %tmp8828, i64 1
- %tmp8830 = getelementptr inbounds float* %tmp8829, i64 1
- %tmp8831 = getelementptr inbounds float* %tmp8830, i64 1
- %tmp8832 = getelementptr inbounds float* %tmp8831, i64 1
- %tmp8833 = getelementptr inbounds float* %tmp8832, i64 1
- %tmp8834 = getelementptr inbounds float* %tmp8833, i64 1
- %tmp8835 = getelementptr inbounds float* %tmp8834, i64 1
- %tmp8836 = getelementptr inbounds float* %tmp8835, i64 1
- %tmp8837 = getelementptr inbounds float* %tmp8836, i64 1
- %tmp8838 = getelementptr inbounds float* %tmp8837, i64 1
- %tmp8839 = getelementptr inbounds float* %tmp8838, i64 1
- %tmp8840 = getelementptr inbounds float* %tmp8839, i64 1
- %tmp8841 = getelementptr inbounds float* %tmp8840, i64 1
- %tmp8842 = getelementptr inbounds float* %tmp8841, i64 1
- %tmp8843 = getelementptr inbounds float* %tmp8842, i64 1
- %tmp8844 = getelementptr inbounds float* %tmp8843, i64 1
- %tmp8845 = getelementptr inbounds float* %tmp8844, i64 1
- %tmp8846 = getelementptr inbounds float* %tmp8845, i64 1
- %tmp8847 = getelementptr inbounds float* %tmp8846, i64 1
- %tmp8848 = getelementptr inbounds float* %tmp8847, i64 1
- %tmp8849 = getelementptr inbounds float* %tmp8848, i64 1
- %tmp8850 = getelementptr inbounds float* %tmp8849, i64 1
- %tmp8851 = getelementptr inbounds float* %tmp8850, i64 1
- %tmp8852 = getelementptr inbounds float* %tmp8851, i64 1
- %tmp8853 = getelementptr inbounds float* %tmp8852, i64 1
- %tmp8854 = getelementptr inbounds float* %tmp8853, i64 1
- %tmp8855 = getelementptr inbounds float* %tmp8854, i64 1
- %tmp8856 = getelementptr inbounds float* %tmp8855, i64 1
- %tmp8857 = getelementptr inbounds float* %tmp8856, i64 1
- %tmp8858 = getelementptr inbounds float* %tmp8857, i64 1
- %tmp8859 = getelementptr inbounds float* %tmp8858, i64 1
- %tmp8860 = getelementptr inbounds float* %tmp8859, i64 1
- %tmp8861 = getelementptr inbounds float* %tmp8860, i64 1
- %tmp8862 = getelementptr inbounds float* %tmp8861, i64 1
- %tmp8863 = getelementptr inbounds float* %tmp8862, i64 1
- %tmp8864 = getelementptr inbounds float* %tmp8863, i64 1
- %tmp8865 = getelementptr inbounds float* %tmp8864, i64 1
- %tmp8866 = getelementptr inbounds float* %tmp8865, i64 1
- %tmp8867 = getelementptr inbounds float* %tmp8866, i64 1
- %tmp8868 = getelementptr inbounds float* %tmp8867, i64 1
- %tmp8869 = getelementptr inbounds float* %tmp8868, i64 1
- %tmp8870 = getelementptr inbounds float* %tmp8869, i64 1
- %tmp8871 = getelementptr inbounds float* %tmp8870, i64 1
- %tmp8872 = getelementptr inbounds float* %tmp8871, i64 1
- %tmp8873 = getelementptr inbounds float* %tmp8872, i64 1
- %tmp8874 = getelementptr inbounds float* %tmp8873, i64 1
- %tmp8875 = getelementptr inbounds float* %tmp8874, i64 1
- %tmp8876 = getelementptr inbounds float* %tmp8875, i64 1
- %tmp8877 = getelementptr inbounds float* %tmp8876, i64 1
- %tmp8878 = getelementptr inbounds float* %tmp8877, i64 1
- %tmp8879 = getelementptr inbounds float* %tmp8878, i64 1
- %tmp8880 = getelementptr inbounds float* %tmp8879, i64 1
- %tmp8881 = getelementptr inbounds float* %tmp8880, i64 1
- %tmp8882 = getelementptr inbounds float* %tmp8881, i64 1
- %tmp8883 = getelementptr inbounds float* %tmp8882, i64 1
- %tmp8884 = getelementptr inbounds float* %tmp8883, i64 1
- %tmp8885 = getelementptr inbounds float* %tmp8884, i64 1
- %tmp8886 = getelementptr inbounds float* %tmp8885, i64 1
- %tmp8887 = getelementptr inbounds float* %tmp8886, i64 1
- %tmp8888 = getelementptr inbounds float* %tmp8887, i64 1
- %tmp8889 = getelementptr inbounds float* %tmp8888, i64 1
- %tmp8890 = getelementptr inbounds float* %tmp8889, i64 1
- %tmp8891 = getelementptr inbounds float* %tmp8890, i64 1
- %tmp8892 = getelementptr inbounds float* %tmp8891, i64 1
- %tmp8893 = getelementptr inbounds float* %tmp8892, i64 1
- %tmp8894 = getelementptr inbounds float* %tmp8893, i64 1
- %tmp8895 = getelementptr inbounds float* %tmp8894, i64 1
- %tmp8896 = getelementptr inbounds float* %tmp8895, i64 1
- %tmp8897 = getelementptr inbounds float* %tmp8896, i64 1
- %tmp8898 = getelementptr inbounds float* %tmp8897, i64 1
- %tmp8899 = getelementptr inbounds float* %tmp8898, i64 1
- %tmp8900 = getelementptr inbounds float* %tmp8899, i64 1
- %tmp8901 = getelementptr inbounds float* %tmp8900, i64 1
- %tmp8902 = getelementptr inbounds float* %tmp8901, i64 1
- %tmp8903 = getelementptr inbounds float* %tmp8902, i64 1
- %tmp8904 = getelementptr inbounds float* %tmp8903, i64 1
- %tmp8905 = getelementptr inbounds float* %tmp8904, i64 1
- %tmp8906 = getelementptr inbounds float* %tmp8905, i64 1
- %tmp8907 = getelementptr inbounds float* %tmp8906, i64 1
- %tmp8908 = getelementptr inbounds float* %tmp8907, i64 1
- %tmp8909 = getelementptr inbounds float* %tmp8908, i64 1
- %tmp8910 = getelementptr inbounds float* %tmp8909, i64 1
- %tmp8911 = getelementptr inbounds float* %tmp8910, i64 1
- %tmp8912 = getelementptr inbounds float* %tmp8911, i64 1
- %tmp8913 = getelementptr inbounds float* %tmp8912, i64 1
- %tmp8914 = getelementptr inbounds float* %tmp8913, i64 1
- %tmp8915 = getelementptr inbounds float* %tmp8914, i64 1
- %tmp8916 = getelementptr inbounds float* %tmp8915, i64 1
- %tmp8917 = getelementptr inbounds float* %tmp8916, i64 1
- %tmp8918 = getelementptr inbounds float* %tmp8917, i64 1
- %tmp8919 = getelementptr inbounds float* %tmp8918, i64 1
- %tmp8920 = getelementptr inbounds float* %tmp8919, i64 1
- %tmp8921 = getelementptr inbounds float* %tmp8920, i64 1
- %tmp8922 = getelementptr inbounds float* %tmp8921, i64 1
- %tmp8923 = getelementptr inbounds float* %tmp8922, i64 1
- %tmp8924 = getelementptr inbounds float* %tmp8923, i64 1
- %tmp8925 = getelementptr inbounds float* %tmp8924, i64 1
- %tmp8926 = getelementptr inbounds float* %tmp8925, i64 1
- %tmp8927 = getelementptr inbounds float* %tmp8926, i64 1
- %tmp8928 = getelementptr inbounds float* %tmp8927, i64 1
- %tmp8929 = getelementptr inbounds float* %tmp8928, i64 1
- %tmp8930 = getelementptr inbounds float* %tmp8929, i64 1
- %tmp8931 = getelementptr inbounds float* %tmp8930, i64 1
- %tmp8932 = getelementptr inbounds float* %tmp8931, i64 1
- %tmp8933 = getelementptr inbounds float* %tmp8932, i64 1
- %tmp8934 = getelementptr inbounds float* %tmp8933, i64 1
- %tmp8935 = getelementptr inbounds float* %tmp8934, i64 1
- %tmp8936 = getelementptr inbounds float* %tmp8935, i64 1
- %tmp8937 = getelementptr inbounds float* %tmp8936, i64 1
- %tmp8938 = getelementptr inbounds float* %tmp8937, i64 1
- %tmp8939 = getelementptr inbounds float* %tmp8938, i64 1
- %tmp8940 = getelementptr inbounds float* %tmp8939, i64 1
- %tmp8941 = getelementptr inbounds float* %tmp8940, i64 1
- %tmp8942 = getelementptr inbounds float* %tmp8941, i64 1
- %tmp8943 = getelementptr inbounds float* %tmp8942, i64 1
- %tmp8944 = getelementptr inbounds float* %tmp8943, i64 1
- %tmp8945 = getelementptr inbounds float* %tmp8944, i64 1
- %tmp8946 = getelementptr inbounds float* %tmp8945, i64 1
- %tmp8947 = getelementptr inbounds float* %tmp8946, i64 1
- %tmp8948 = getelementptr inbounds float* %tmp8947, i64 1
- %tmp8949 = getelementptr inbounds float* %tmp8948, i64 1
- %tmp8950 = getelementptr inbounds float* %tmp8949, i64 1
- %tmp8951 = getelementptr inbounds float* %tmp8950, i64 1
- %tmp8952 = getelementptr inbounds float* %tmp8951, i64 1
- %tmp8953 = getelementptr inbounds float* %tmp8952, i64 1
- %tmp8954 = getelementptr inbounds float* %tmp8953, i64 1
- %tmp8955 = getelementptr inbounds float* %tmp8954, i64 1
- %tmp8956 = getelementptr inbounds float* %tmp8955, i64 1
- %tmp8957 = getelementptr inbounds float* %tmp8956, i64 1
- %tmp8958 = getelementptr inbounds float* %tmp8957, i64 1
- %tmp8959 = getelementptr inbounds float* %tmp8958, i64 1
- %tmp8960 = getelementptr inbounds float* %tmp8959, i64 1
- %tmp8961 = getelementptr inbounds float* %tmp8960, i64 1
- %tmp8962 = getelementptr inbounds float* %tmp8961, i64 1
- %tmp8963 = getelementptr inbounds float* %tmp8962, i64 1
- %tmp8964 = getelementptr inbounds float* %tmp8963, i64 1
- %tmp8965 = getelementptr inbounds float* %tmp8964, i64 1
- %tmp8966 = getelementptr inbounds float* %tmp8965, i64 1
- %tmp8967 = getelementptr inbounds float* %tmp8966, i64 1
- %tmp8968 = getelementptr inbounds float* %tmp8967, i64 1
- %tmp8969 = getelementptr inbounds float* %tmp8968, i64 1
- %tmp8970 = getelementptr inbounds float* %tmp8969, i64 1
- %tmp8971 = getelementptr inbounds float* %tmp8970, i64 1
- %tmp8972 = getelementptr inbounds float* %tmp8971, i64 1
- %tmp8973 = getelementptr inbounds float* %tmp8972, i64 1
- %tmp8974 = getelementptr inbounds float* %tmp8973, i64 1
- %tmp8975 = getelementptr inbounds float* %tmp8974, i64 1
- %tmp8976 = getelementptr inbounds float* %tmp8975, i64 1
- %tmp8977 = getelementptr inbounds float* %tmp8976, i64 1
- %tmp8978 = getelementptr inbounds float* %tmp8977, i64 1
- %tmp8979 = getelementptr inbounds float* %tmp8978, i64 1
- %tmp8980 = getelementptr inbounds float* %tmp8979, i64 1
- %tmp8981 = getelementptr inbounds float* %tmp8980, i64 1
- %tmp8982 = getelementptr inbounds float* %tmp8981, i64 1
- %tmp8983 = getelementptr inbounds float* %tmp8982, i64 1
- %tmp8984 = getelementptr inbounds float* %tmp8983, i64 1
- %tmp8985 = getelementptr inbounds float* %tmp8984, i64 1
- %tmp8986 = getelementptr inbounds float* %tmp8985, i64 1
- %tmp8987 = getelementptr inbounds float* %tmp8986, i64 1
- %tmp8988 = getelementptr inbounds float* %tmp8987, i64 1
- %tmp8989 = getelementptr inbounds float* %tmp8988, i64 1
- %tmp8990 = getelementptr inbounds float* %tmp8989, i64 1
- %tmp8991 = getelementptr inbounds float* %tmp8990, i64 1
- %tmp8992 = getelementptr inbounds float* %tmp8991, i64 1
- %tmp8993 = getelementptr inbounds float* %tmp8992, i64 1
- %tmp8994 = getelementptr inbounds float* %tmp8993, i64 1
- %tmp8995 = getelementptr inbounds float* %tmp8994, i64 1
- %tmp8996 = getelementptr inbounds float* %tmp8995, i64 1
- %tmp8997 = getelementptr inbounds float* %tmp8996, i64 1
- %tmp8998 = getelementptr inbounds float* %tmp8997, i64 1
- %tmp8999 = getelementptr inbounds float* %tmp8998, i64 1
- %tmp9000 = getelementptr inbounds float* %tmp8999, i64 1
- %tmp9001 = getelementptr inbounds float* %tmp9000, i64 1
- %tmp9002 = getelementptr inbounds float* %tmp9001, i64 1
- %tmp9003 = getelementptr inbounds float* %tmp9002, i64 1
- %tmp9004 = getelementptr inbounds float* %tmp9003, i64 1
- %tmp9005 = getelementptr inbounds float* %tmp9004, i64 1
- %tmp9006 = getelementptr inbounds float* %tmp9005, i64 1
- %tmp9007 = getelementptr inbounds float* %tmp9006, i64 1
- %tmp9008 = getelementptr inbounds float* %tmp9007, i64 1
- %tmp9009 = getelementptr inbounds float* %tmp9008, i64 1
- %tmp9010 = getelementptr inbounds float* %tmp9009, i64 1
- %tmp9011 = getelementptr inbounds float* %tmp9010, i64 1
- %tmp9012 = getelementptr inbounds float* %tmp9011, i64 1
- %tmp9013 = getelementptr inbounds float* %tmp9012, i64 1
- %tmp9014 = getelementptr inbounds float* %tmp9013, i64 1
- %tmp9015 = getelementptr inbounds float* %tmp9014, i64 1
- %tmp9016 = getelementptr inbounds float* %tmp9015, i64 1
- %tmp9017 = getelementptr inbounds float* %tmp9016, i64 1
- %tmp9018 = getelementptr inbounds float* %tmp9017, i64 1
- %tmp9019 = getelementptr inbounds float* %tmp9018, i64 1
- %tmp9020 = getelementptr inbounds float* %tmp9019, i64 1
- %tmp9021 = getelementptr inbounds float* %tmp9020, i64 1
- %tmp9022 = getelementptr inbounds float* %tmp9021, i64 1
- %tmp9023 = getelementptr inbounds float* %tmp9022, i64 1
- %tmp9024 = getelementptr inbounds float* %tmp9023, i64 1
- %tmp9025 = getelementptr inbounds float* %tmp9024, i64 1
- %tmp9026 = getelementptr inbounds float* %tmp9025, i64 1
- %tmp9027 = getelementptr inbounds float* %tmp9026, i64 1
- %tmp9028 = getelementptr inbounds float* %tmp9027, i64 1
- %tmp9029 = getelementptr inbounds float* %tmp9028, i64 1
- %tmp9030 = getelementptr inbounds float* %tmp9029, i64 1
- %tmp9031 = getelementptr inbounds float* %tmp9030, i64 1
- %tmp9032 = getelementptr inbounds float* %tmp9031, i64 1
- %tmp9033 = getelementptr inbounds float* %tmp9032, i64 1
- %tmp9034 = getelementptr inbounds float* %tmp9033, i64 1
- %tmp9035 = getelementptr inbounds float* %tmp9034, i64 1
- %tmp9036 = getelementptr inbounds float* %tmp9035, i64 1
- %tmp9037 = getelementptr inbounds float* %tmp9036, i64 1
- %tmp9038 = getelementptr inbounds float* %tmp9037, i64 1
- %tmp9039 = getelementptr inbounds float* %tmp9038, i64 1
- %tmp9040 = getelementptr inbounds float* %tmp9039, i64 1
- %tmp9041 = getelementptr inbounds float* %tmp9040, i64 1
- %tmp9042 = getelementptr inbounds float* %tmp9041, i64 1
- %tmp9043 = getelementptr inbounds float* %tmp9042, i64 1
- %tmp9044 = getelementptr inbounds float* %tmp9043, i64 1
- %tmp9045 = getelementptr inbounds float* %tmp9044, i64 1
- %tmp9046 = getelementptr inbounds float* %tmp9045, i64 1
- %tmp9047 = getelementptr inbounds float* %tmp9046, i64 1
- %tmp9048 = getelementptr inbounds float* %tmp9047, i64 1
- %tmp9049 = getelementptr inbounds float* %tmp9048, i64 1
- %tmp9050 = getelementptr inbounds float* %tmp9049, i64 1
- %tmp9051 = getelementptr inbounds float* %tmp9050, i64 1
- %tmp9052 = getelementptr inbounds float* %tmp9051, i64 1
- %tmp9053 = getelementptr inbounds float* %tmp9052, i64 1
- %tmp9054 = getelementptr inbounds float* %tmp9053, i64 1
- %tmp9055 = getelementptr inbounds float* %tmp9054, i64 1
- %tmp9056 = getelementptr inbounds float* %tmp9055, i64 1
- %tmp9057 = getelementptr inbounds float* %tmp9056, i64 1
- %tmp9058 = getelementptr inbounds float* %tmp9057, i64 1
- %tmp9059 = getelementptr inbounds float* %tmp9058, i64 1
- %tmp9060 = getelementptr inbounds float* %tmp9059, i64 1
- %tmp9061 = getelementptr inbounds float* %tmp9060, i64 1
- %tmp9062 = getelementptr inbounds float* %tmp9061, i64 1
- %tmp9063 = getelementptr inbounds float* %tmp9062, i64 1
- %tmp9064 = getelementptr inbounds float* %tmp9063, i64 1
- %tmp9065 = getelementptr inbounds float* %tmp9064, i64 1
- %tmp9066 = getelementptr inbounds float* %tmp9065, i64 1
- %tmp9067 = getelementptr inbounds float* %tmp9066, i64 1
- %tmp9068 = getelementptr inbounds float* %tmp9067, i64 1
- %tmp9069 = getelementptr inbounds float* %tmp9068, i64 1
- %tmp9070 = getelementptr inbounds float* %tmp9069, i64 1
- %tmp9071 = getelementptr inbounds float* %tmp9070, i64 1
- %tmp9072 = getelementptr inbounds float* %tmp9071, i64 1
- %tmp9073 = getelementptr inbounds float* %tmp9072, i64 1
- %tmp9074 = getelementptr inbounds float* %tmp9073, i64 1
- %tmp9075 = getelementptr inbounds float* %tmp9074, i64 1
- %tmp9076 = getelementptr inbounds float* %tmp9075, i64 1
- %tmp9077 = getelementptr inbounds float* %tmp9076, i64 1
- %tmp9078 = getelementptr inbounds float* %tmp9077, i64 1
- %tmp9079 = getelementptr inbounds float* %tmp9078, i64 1
- %tmp9080 = getelementptr inbounds float* %tmp9079, i64 1
- %tmp9081 = getelementptr inbounds float* %tmp9080, i64 1
- %tmp9082 = getelementptr inbounds float* %tmp9081, i64 1
- %tmp9083 = getelementptr inbounds float* %tmp9082, i64 1
- %tmp9084 = getelementptr inbounds float* %tmp9083, i64 1
- %tmp9085 = getelementptr inbounds float* %tmp9084, i64 1
- %tmp9086 = getelementptr inbounds float* %tmp9085, i64 1
- %tmp9087 = getelementptr inbounds float* %tmp9086, i64 1
- %tmp9088 = getelementptr inbounds float* %tmp9087, i64 1
- %tmp9089 = getelementptr inbounds float* %tmp9088, i64 1
- %tmp9090 = getelementptr inbounds float* %tmp9089, i64 1
- %tmp9091 = getelementptr inbounds float* %tmp9090, i64 1
- %tmp9092 = getelementptr inbounds float* %tmp9091, i64 1
- %tmp9093 = getelementptr inbounds float* %tmp9092, i64 1
- %tmp9094 = getelementptr inbounds float* %tmp9093, i64 1
- %tmp9095 = getelementptr inbounds float* %tmp9094, i64 1
- %tmp9096 = getelementptr inbounds float* %tmp9095, i64 1
- %tmp9097 = getelementptr inbounds float* %tmp9096, i64 1
- %tmp9098 = getelementptr inbounds float* %tmp9097, i64 1
- %tmp9099 = getelementptr inbounds float* %tmp9098, i64 1
- %tmp9100 = getelementptr inbounds float* %tmp9099, i64 1
- %tmp9101 = getelementptr inbounds float* %tmp9100, i64 1
- %tmp9102 = getelementptr inbounds float* %tmp9101, i64 1
- %tmp9103 = getelementptr inbounds float* %tmp9102, i64 1
- %tmp9104 = getelementptr inbounds float* %tmp9103, i64 1
- %tmp9105 = getelementptr inbounds float* %tmp9104, i64 1
- %tmp9106 = getelementptr inbounds float* %tmp9105, i64 1
- %tmp9107 = getelementptr inbounds float* %tmp9106, i64 1
- %tmp9108 = getelementptr inbounds float* %tmp9107, i64 1
- %tmp9109 = getelementptr inbounds float* %tmp9108, i64 1
- %tmp9110 = getelementptr inbounds float* %tmp9109, i64 1
- %tmp9111 = getelementptr inbounds float* %tmp9110, i64 1
- %tmp9112 = getelementptr inbounds float* %tmp9111, i64 1
- %tmp9113 = getelementptr inbounds float* %tmp9112, i64 1
- %tmp9114 = getelementptr inbounds float* %tmp9113, i64 1
- %tmp9115 = getelementptr inbounds float* %tmp9114, i64 1
- %tmp9116 = getelementptr inbounds float* %tmp9115, i64 1
- %tmp9117 = getelementptr inbounds float* %tmp9116, i64 1
- %tmp9118 = getelementptr inbounds float* %tmp9117, i64 1
- %tmp9119 = getelementptr inbounds float* %tmp9118, i64 1
- %tmp9120 = getelementptr inbounds float* %tmp9119, i64 1
- %tmp9121 = getelementptr inbounds float* %tmp9120, i64 1
- %tmp9122 = getelementptr inbounds float* %tmp9121, i64 1
- %tmp9123 = getelementptr inbounds float* %tmp9122, i64 1
- %tmp9124 = getelementptr inbounds float* %tmp9123, i64 1
- %tmp9125 = getelementptr inbounds float* %tmp9124, i64 1
- %tmp9126 = getelementptr inbounds float* %tmp9125, i64 1
- %tmp9127 = getelementptr inbounds float* %tmp9126, i64 1
- %tmp9128 = getelementptr inbounds float* %tmp9127, i64 1
- %tmp9129 = getelementptr inbounds float* %tmp9128, i64 1
- %tmp9130 = getelementptr inbounds float* %tmp9129, i64 1
- %tmp9131 = getelementptr inbounds float* %tmp9130, i64 1
- %tmp9132 = getelementptr inbounds float* %tmp9131, i64 1
- %tmp9133 = getelementptr inbounds float* %tmp9132, i64 1
- %tmp9134 = getelementptr inbounds float* %tmp9133, i64 1
- %tmp9135 = getelementptr inbounds float* %tmp9134, i64 1
- %tmp9136 = getelementptr inbounds float* %tmp9135, i64 1
- %tmp9137 = getelementptr inbounds float* %tmp9136, i64 1
- %tmp9138 = getelementptr inbounds float* %tmp9137, i64 1
- %tmp9139 = getelementptr inbounds float* %tmp9138, i64 1
- %tmp9140 = getelementptr inbounds float* %tmp9139, i64 1
- %tmp9141 = getelementptr inbounds float* %tmp9140, i64 1
- %tmp9142 = getelementptr inbounds float* %tmp9141, i64 1
- %tmp9143 = getelementptr inbounds float* %tmp9142, i64 1
- %tmp9144 = getelementptr inbounds float* %tmp9143, i64 1
- %tmp9145 = getelementptr inbounds float* %tmp9144, i64 1
- %tmp9146 = getelementptr inbounds float* %tmp9145, i64 1
- %tmp9147 = getelementptr inbounds float* %tmp9146, i64 1
- %tmp9148 = getelementptr inbounds float* %tmp9147, i64 1
- %tmp9149 = getelementptr inbounds float* %tmp9148, i64 1
- %tmp9150 = getelementptr inbounds float* %tmp9149, i64 1
- %tmp9151 = getelementptr inbounds float* %tmp9150, i64 1
- %tmp9152 = getelementptr inbounds float* %tmp9151, i64 1
- %tmp9153 = getelementptr inbounds float* %tmp9152, i64 1
- %tmp9154 = getelementptr inbounds float* %tmp9153, i64 1
- %tmp9155 = getelementptr inbounds float* %tmp9154, i64 1
- %tmp9156 = getelementptr inbounds float* %tmp9155, i64 1
- %tmp9157 = getelementptr inbounds float* %tmp9156, i64 1
- %tmp9158 = getelementptr inbounds float* %tmp9157, i64 1
- %tmp9159 = getelementptr inbounds float* %tmp9158, i64 1
- %tmp9160 = getelementptr inbounds float* %tmp9159, i64 1
- %tmp9161 = getelementptr inbounds float* %tmp9160, i64 1
- %tmp9162 = getelementptr inbounds float* %tmp9161, i64 1
- %tmp9163 = getelementptr inbounds float* %tmp9162, i64 1
- %tmp9164 = getelementptr inbounds float* %tmp9163, i64 1
- %tmp9165 = getelementptr inbounds float* %tmp9164, i64 1
- %tmp9166 = getelementptr inbounds float* %tmp9165, i64 1
- %tmp9167 = getelementptr inbounds float* %tmp9166, i64 1
- %tmp9168 = getelementptr inbounds float* %tmp9167, i64 1
- %tmp9169 = getelementptr inbounds float* %tmp9168, i64 1
- %tmp9170 = getelementptr inbounds float* %tmp9169, i64 1
- %tmp9171 = getelementptr inbounds float* %tmp9170, i64 1
- %tmp9172 = getelementptr inbounds float* %tmp9171, i64 1
- %tmp9173 = getelementptr inbounds float* %tmp9172, i64 1
- %tmp9174 = getelementptr inbounds float* %tmp9173, i64 1
- %tmp9175 = getelementptr inbounds float* %tmp9174, i64 1
- %tmp9176 = getelementptr inbounds float* %tmp9175, i64 1
- %tmp9177 = getelementptr inbounds float* %tmp9176, i64 1
- %tmp9178 = getelementptr inbounds float* %tmp9177, i64 1
- %tmp9179 = getelementptr inbounds float* %tmp9178, i64 1
- %tmp9180 = getelementptr inbounds float* %tmp9179, i64 1
- %tmp9181 = getelementptr inbounds float* %tmp9180, i64 1
- %tmp9182 = getelementptr inbounds float* %tmp9181, i64 1
- %tmp9183 = getelementptr inbounds float* %tmp9182, i64 1
- %tmp9184 = getelementptr inbounds float* %tmp9183, i64 1
- %tmp9185 = getelementptr inbounds float* %tmp9184, i64 1
- %tmp9186 = getelementptr inbounds float* %tmp9185, i64 1
- %tmp9187 = getelementptr inbounds float* %tmp9186, i64 1
- %tmp9188 = getelementptr inbounds float* %tmp9187, i64 1
- %tmp9189 = getelementptr inbounds float* %tmp9188, i64 1
- %tmp9190 = getelementptr inbounds float* %tmp9189, i64 1
- %tmp9191 = getelementptr inbounds float* %tmp9190, i64 1
- %tmp9192 = getelementptr inbounds float* %tmp9191, i64 1
- %tmp9193 = getelementptr inbounds float* %tmp9192, i64 1
- %tmp9194 = getelementptr inbounds float* %tmp9193, i64 1
- %tmp9195 = getelementptr inbounds float* %tmp9194, i64 1
- %tmp9196 = getelementptr inbounds float* %tmp9195, i64 1
- %tmp9197 = getelementptr inbounds float* %tmp9196, i64 1
- %tmp9198 = getelementptr inbounds float* %tmp9197, i64 1
- %tmp9199 = getelementptr inbounds float* %tmp9198, i64 1
- %tmp9200 = getelementptr inbounds float* %tmp9199, i64 1
- %tmp9201 = getelementptr inbounds float* %tmp9200, i64 1
- %tmp9202 = getelementptr inbounds float* %tmp9201, i64 1
- %tmp9203 = getelementptr inbounds float* %tmp9202, i64 1
- %tmp9204 = getelementptr inbounds float* %tmp9203, i64 1
- %tmp9205 = getelementptr inbounds float* %tmp9204, i64 1
- %tmp9206 = getelementptr inbounds float* %tmp9205, i64 1
- %tmp9207 = getelementptr inbounds float* %tmp9206, i64 1
- %tmp9208 = getelementptr inbounds float* %tmp9207, i64 1
- %tmp9209 = getelementptr inbounds float* %tmp9208, i64 1
- %tmp9210 = getelementptr inbounds float* %tmp9209, i64 1
- %tmp9211 = getelementptr inbounds float* %tmp9210, i64 1
- %tmp9212 = getelementptr inbounds float* %tmp9211, i64 1
- %tmp9213 = getelementptr inbounds float* %tmp9212, i64 1
- %tmp9214 = getelementptr inbounds float* %tmp9213, i64 1
- %tmp9215 = getelementptr inbounds float* %tmp9214, i64 1
- %tmp9216 = getelementptr inbounds float* %tmp9215, i64 1
- %tmp9217 = getelementptr inbounds float* %tmp9216, i64 1
- %tmp9218 = getelementptr inbounds float* %tmp9217, i64 1
- %tmp9219 = getelementptr inbounds float* %tmp9218, i64 1
- %tmp9220 = getelementptr inbounds float* %tmp9219, i64 1
- %tmp9221 = getelementptr inbounds float* %tmp9220, i64 1
- %tmp9222 = getelementptr inbounds float* %tmp9221, i64 1
- %tmp9223 = getelementptr inbounds float* %tmp9222, i64 1
- %tmp9224 = getelementptr inbounds float* %tmp9223, i64 1
- %tmp9225 = getelementptr inbounds float* %tmp9224, i64 1
- %tmp9226 = getelementptr inbounds float* %tmp9225, i64 1
- %tmp9227 = getelementptr inbounds float* %tmp9226, i64 1
- %tmp9228 = getelementptr inbounds float* %tmp9227, i64 1
- %tmp9229 = getelementptr inbounds float* %tmp9228, i64 1
- %tmp9230 = getelementptr inbounds float* %tmp9229, i64 1
- %tmp9231 = getelementptr inbounds float* %tmp9230, i64 1
- %tmp9232 = getelementptr inbounds float* %tmp9231, i64 1
- %tmp9233 = getelementptr inbounds float* %tmp9232, i64 1
- %tmp9234 = getelementptr inbounds float* %tmp9233, i64 1
- %tmp9235 = getelementptr inbounds float* %tmp9234, i64 1
- %tmp9236 = getelementptr inbounds float* %tmp9235, i64 1
- %tmp9237 = getelementptr inbounds float* %tmp9236, i64 1
- %tmp9238 = getelementptr inbounds float* %tmp9237, i64 1
- %tmp9239 = getelementptr inbounds float* %tmp9238, i64 1
- %tmp9240 = getelementptr inbounds float* %tmp9239, i64 1
- %tmp9241 = getelementptr inbounds float* %tmp9240, i64 1
- %tmp9242 = getelementptr inbounds float* %tmp9241, i64 1
- %tmp9243 = getelementptr inbounds float* %tmp9242, i64 1
- %tmp9244 = getelementptr inbounds float* %tmp9243, i64 1
- %tmp9245 = getelementptr inbounds float* %tmp9244, i64 1
- %tmp9246 = getelementptr inbounds float* %tmp9245, i64 1
- %tmp9247 = getelementptr inbounds float* %tmp9246, i64 1
- %tmp9248 = getelementptr inbounds float* %tmp9247, i64 1
- %tmp9249 = getelementptr inbounds float* %tmp9248, i64 1
- %tmp9250 = getelementptr inbounds float* %tmp9249, i64 1
- %tmp9251 = getelementptr inbounds float* %tmp9250, i64 1
- %tmp9252 = getelementptr inbounds float* %tmp9251, i64 1
- %tmp9253 = getelementptr inbounds float* %tmp9252, i64 1
- %tmp9254 = getelementptr inbounds float* %tmp9253, i64 1
- %tmp9255 = getelementptr inbounds float* %tmp9254, i64 1
- %tmp9256 = getelementptr inbounds float* %tmp9255, i64 1
- %tmp9257 = getelementptr inbounds float* %tmp9256, i64 1
- %tmp9258 = getelementptr inbounds float* %tmp9257, i64 1
- %tmp9259 = getelementptr inbounds float* %tmp9258, i64 1
- %tmp9260 = getelementptr inbounds float* %tmp9259, i64 1
- %tmp9261 = getelementptr inbounds float* %tmp9260, i64 1
- %tmp9262 = getelementptr inbounds float* %tmp9261, i64 1
- %tmp9263 = getelementptr inbounds float* %tmp9262, i64 1
- %tmp9264 = getelementptr inbounds float* %tmp9263, i64 1
- %tmp9265 = getelementptr inbounds float* %tmp9264, i64 1
- %tmp9266 = getelementptr inbounds float* %tmp9265, i64 1
- %tmp9267 = getelementptr inbounds float* %tmp9266, i64 1
- %tmp9268 = getelementptr inbounds float* %tmp9267, i64 1
- %tmp9269 = getelementptr inbounds float* %tmp9268, i64 1
- %tmp9270 = getelementptr inbounds float* %tmp9269, i64 1
- %tmp9271 = getelementptr inbounds float* %tmp9270, i64 1
- %tmp9272 = getelementptr inbounds float* %tmp9271, i64 1
- %tmp9273 = getelementptr inbounds float* %tmp9272, i64 1
- %tmp9274 = getelementptr inbounds float* %tmp9273, i64 1
- %tmp9275 = getelementptr inbounds float* %tmp9274, i64 1
- %tmp9276 = getelementptr inbounds float* %tmp9275, i64 1
- %tmp9277 = getelementptr inbounds float* %tmp9276, i64 1
- %tmp9278 = getelementptr inbounds float* %tmp9277, i64 1
- %tmp9279 = getelementptr inbounds float* %tmp9278, i64 1
- %tmp9280 = getelementptr inbounds float* %tmp9279, i64 1
- %tmp9281 = getelementptr inbounds float* %tmp9280, i64 1
- %tmp9282 = getelementptr inbounds float* %tmp9281, i64 1
- %tmp9283 = getelementptr inbounds float* %tmp9282, i64 1
- %tmp9284 = getelementptr inbounds float* %tmp9283, i64 1
- %tmp9285 = getelementptr inbounds float* %tmp9284, i64 1
- %tmp9286 = getelementptr inbounds float* %tmp9285, i64 1
- %tmp9287 = getelementptr inbounds float* %tmp9286, i64 1
- %tmp9288 = getelementptr inbounds float* %tmp9287, i64 1
- %tmp9289 = getelementptr inbounds float* %tmp9288, i64 1
- %tmp9290 = getelementptr inbounds float* %tmp9289, i64 1
- %tmp9291 = getelementptr inbounds float* %tmp9290, i64 1
- %tmp9292 = getelementptr inbounds float* %tmp9291, i64 1
- %tmp9293 = getelementptr inbounds float* %tmp9292, i64 1
- %tmp9294 = getelementptr inbounds float* %tmp9293, i64 1
- %tmp9295 = getelementptr inbounds float* %tmp9294, i64 1
- %tmp9296 = getelementptr inbounds float* %tmp9295, i64 1
- %tmp9297 = getelementptr inbounds float* %tmp9296, i64 1
- %tmp9298 = getelementptr inbounds float* %tmp9297, i64 1
- %tmp9299 = getelementptr inbounds float* %tmp9298, i64 1
- %tmp9300 = getelementptr inbounds float* %tmp9299, i64 1
- %tmp9301 = getelementptr inbounds float* %tmp9300, i64 1
- %tmp9302 = getelementptr inbounds float* %tmp9301, i64 1
- %tmp9303 = getelementptr inbounds float* %tmp9302, i64 1
- %tmp9304 = getelementptr inbounds float* %tmp9303, i64 1
- %tmp9305 = getelementptr inbounds float* %tmp9304, i64 1
- %tmp9306 = getelementptr inbounds float* %tmp9305, i64 1
- %tmp9307 = getelementptr inbounds float* %tmp9306, i64 1
- %tmp9308 = getelementptr inbounds float* %tmp9307, i64 1
- %tmp9309 = getelementptr inbounds float* %tmp9308, i64 1
- %tmp9310 = getelementptr inbounds float* %tmp9309, i64 1
- %tmp9311 = getelementptr inbounds float* %tmp9310, i64 1
- %tmp9312 = getelementptr inbounds float* %tmp9311, i64 1
- %tmp9313 = getelementptr inbounds float* %tmp9312, i64 1
- %tmp9314 = getelementptr inbounds float* %tmp9313, i64 1
- %tmp9315 = getelementptr inbounds float* %tmp9314, i64 1
- %tmp9316 = getelementptr inbounds float* %tmp9315, i64 1
- %tmp9317 = getelementptr inbounds float* %tmp9316, i64 1
- %tmp9318 = getelementptr inbounds float* %tmp9317, i64 1
- %tmp9319 = getelementptr inbounds float* %tmp9318, i64 1
- %tmp9320 = getelementptr inbounds float* %tmp9319, i64 1
- %tmp9321 = getelementptr inbounds float* %tmp9320, i64 1
- %tmp9322 = getelementptr inbounds float* %tmp9321, i64 1
- %tmp9323 = getelementptr inbounds float* %tmp9322, i64 1
- %tmp9324 = getelementptr inbounds float* %tmp9323, i64 1
- %tmp9325 = getelementptr inbounds float* %tmp9324, i64 1
- %tmp9326 = getelementptr inbounds float* %tmp9325, i64 1
- %tmp9327 = getelementptr inbounds float* %tmp9326, i64 1
- %tmp9328 = getelementptr inbounds float* %tmp9327, i64 1
- %tmp9329 = getelementptr inbounds float* %tmp9328, i64 1
- %tmp9330 = getelementptr inbounds float* %tmp9329, i64 1
- %tmp9331 = getelementptr inbounds float* %tmp9330, i64 1
- %tmp9332 = getelementptr inbounds float* %tmp9331, i64 1
- %tmp9333 = getelementptr inbounds float* %tmp9332, i64 1
- %tmp9334 = getelementptr inbounds float* %tmp9333, i64 1
- %tmp9335 = getelementptr inbounds float* %tmp9334, i64 1
- %tmp9336 = getelementptr inbounds float* %tmp9335, i64 1
- %tmp9337 = getelementptr inbounds float* %tmp9336, i64 1
- %tmp9338 = getelementptr inbounds float* %tmp9337, i64 1
- %tmp9339 = getelementptr inbounds float* %tmp9338, i64 1
- %tmp9340 = getelementptr inbounds float* %tmp9339, i64 1
- %tmp9341 = getelementptr inbounds float* %tmp9340, i64 1
- %tmp9342 = getelementptr inbounds float* %tmp9341, i64 1
- %tmp9343 = getelementptr inbounds float* %tmp9342, i64 1
- %tmp9344 = getelementptr inbounds float* %tmp9343, i64 1
- %tmp9345 = getelementptr inbounds float* %tmp9344, i64 1
- %tmp9346 = getelementptr inbounds float* %tmp9345, i64 1
- %tmp9347 = getelementptr inbounds float* %tmp9346, i64 1
- %tmp9348 = getelementptr inbounds float* %tmp9347, i64 1
- %tmp9349 = getelementptr inbounds float* %tmp9348, i64 1
- %tmp9350 = getelementptr inbounds float* %tmp9349, i64 1
- %tmp9351 = getelementptr inbounds float* %tmp9350, i64 1
- %tmp9352 = getelementptr inbounds float* %tmp9351, i64 1
- %tmp9353 = getelementptr inbounds float* %tmp9352, i64 1
- %tmp9354 = getelementptr inbounds float* %tmp9353, i64 1
- %tmp9355 = getelementptr inbounds float* %tmp9354, i64 1
- %tmp9356 = getelementptr inbounds float* %tmp9355, i64 1
- %tmp9357 = getelementptr inbounds float* %tmp9356, i64 1
- %tmp9358 = getelementptr inbounds float* %tmp9357, i64 1
- %tmp9359 = getelementptr inbounds float* %tmp9358, i64 1
- %tmp9360 = getelementptr inbounds float* %tmp9359, i64 1
- %tmp9361 = getelementptr inbounds float* %tmp9360, i64 1
- %tmp9362 = getelementptr inbounds float* %tmp9361, i64 1
- %tmp9363 = getelementptr inbounds float* %tmp9362, i64 1
- %tmp9364 = getelementptr inbounds float* %tmp9363, i64 1
- %tmp9365 = getelementptr inbounds float* %tmp9364, i64 1
- %tmp9366 = getelementptr inbounds float* %tmp9365, i64 1
- %tmp9367 = getelementptr inbounds float* %tmp9366, i64 1
- %tmp9368 = getelementptr inbounds float* %tmp9367, i64 1
- %tmp9369 = getelementptr inbounds float* %tmp9368, i64 1
- %tmp9370 = getelementptr inbounds float* %tmp9369, i64 1
- %tmp9371 = getelementptr inbounds float* %tmp9370, i64 1
- %tmp9372 = getelementptr inbounds float* %tmp9371, i64 1
- %tmp9373 = getelementptr inbounds float* %tmp9372, i64 1
- %tmp9374 = getelementptr inbounds float* %tmp9373, i64 1
- %tmp9375 = getelementptr inbounds float* %tmp9374, i64 1
- %tmp9376 = getelementptr inbounds float* %tmp9375, i64 1
- %tmp9377 = getelementptr inbounds float* %tmp9376, i64 1
- %tmp9378 = getelementptr inbounds float* %tmp9377, i64 1
- %tmp9379 = getelementptr inbounds float* %tmp9378, i64 1
- %tmp9380 = getelementptr inbounds float* %tmp9379, i64 1
- %tmp9381 = getelementptr inbounds float* %tmp9380, i64 1
- %tmp9382 = getelementptr inbounds float* %tmp9381, i64 1
- %tmp9383 = getelementptr inbounds float* %tmp9382, i64 1
- %tmp9384 = getelementptr inbounds float* %tmp9383, i64 1
- %tmp9385 = getelementptr inbounds float* %tmp9384, i64 1
- %tmp9386 = getelementptr inbounds float* %tmp9385, i64 1
- %tmp9387 = getelementptr inbounds float* %tmp9386, i64 1
- %tmp9388 = getelementptr inbounds float* %tmp9387, i64 1
- %tmp9389 = getelementptr inbounds float* %tmp9388, i64 1
- %tmp9390 = getelementptr inbounds float* %tmp9389, i64 1
- %tmp9391 = getelementptr inbounds float* %tmp9390, i64 1
- %tmp9392 = getelementptr inbounds float* %tmp9391, i64 1
- %tmp9393 = getelementptr inbounds float* %tmp9392, i64 1
- %tmp9394 = getelementptr inbounds float* %tmp9393, i64 1
- %tmp9395 = getelementptr inbounds float* %tmp9394, i64 1
- %tmp9396 = getelementptr inbounds float* %tmp9395, i64 1
- %tmp9397 = getelementptr inbounds float* %tmp9396, i64 1
- %tmp9398 = getelementptr inbounds float* %tmp9397, i64 1
- %tmp9399 = getelementptr inbounds float* %tmp9398, i64 1
- %tmp9400 = getelementptr inbounds float* %tmp9399, i64 1
- %tmp9401 = getelementptr inbounds float* %tmp9400, i64 1
- %tmp9402 = getelementptr inbounds float* %tmp9401, i64 1
- %tmp9403 = getelementptr inbounds float* %tmp9402, i64 1
- %tmp9404 = getelementptr inbounds float* %tmp9403, i64 1
- %tmp9405 = getelementptr inbounds float* %tmp9404, i64 1
- %tmp9406 = getelementptr inbounds float* %tmp9405, i64 1
- %tmp9407 = getelementptr inbounds float* %tmp9406, i64 1
- %tmp9408 = getelementptr inbounds float* %tmp9407, i64 1
- %tmp9409 = getelementptr inbounds float* %tmp9408, i64 1
- %tmp9410 = getelementptr inbounds float* %tmp9409, i64 1
- %tmp9411 = getelementptr inbounds float* %tmp9410, i64 1
- %tmp9412 = getelementptr inbounds float* %tmp9411, i64 1
- %tmp9413 = getelementptr inbounds float* %tmp9412, i64 1
- %tmp9414 = getelementptr inbounds float* %tmp9413, i64 1
- %tmp9415 = getelementptr inbounds float* %tmp9414, i64 1
- %tmp9416 = getelementptr inbounds float* %tmp9415, i64 1
- %tmp9417 = getelementptr inbounds float* %tmp9416, i64 1
- %tmp9418 = getelementptr inbounds float* %tmp9417, i64 1
- %tmp9419 = getelementptr inbounds float* %tmp9418, i64 1
- %tmp9420 = getelementptr inbounds float* %tmp9419, i64 1
- %tmp9421 = getelementptr inbounds float* %tmp9420, i64 1
- %tmp9422 = getelementptr inbounds float* %tmp9421, i64 1
- %tmp9423 = getelementptr inbounds float* %tmp9422, i64 1
- %tmp9424 = getelementptr inbounds float* %tmp9423, i64 1
- %tmp9425 = getelementptr inbounds float* %tmp9424, i64 1
- %tmp9426 = getelementptr inbounds float* %tmp9425, i64 1
- %tmp9427 = getelementptr inbounds float* %tmp9426, i64 1
- %tmp9428 = getelementptr inbounds float* %tmp9427, i64 1
- %tmp9429 = getelementptr inbounds float* %tmp9428, i64 1
- %tmp9430 = getelementptr inbounds float* %tmp9429, i64 1
- %tmp9431 = getelementptr inbounds float* %tmp9430, i64 1
- %tmp9432 = getelementptr inbounds float* %tmp9431, i64 1
- %tmp9433 = getelementptr inbounds float* %tmp9432, i64 1
- %tmp9434 = getelementptr inbounds float* %tmp9433, i64 1
- %tmp9435 = getelementptr inbounds float* %tmp9434, i64 1
- %tmp9436 = getelementptr inbounds float* %tmp9435, i64 1
- %tmp9437 = getelementptr inbounds float* %tmp9436, i64 1
- %tmp9438 = getelementptr inbounds float* %tmp9437, i64 1
- %tmp9439 = getelementptr inbounds float* %tmp9438, i64 1
- %tmp9440 = getelementptr inbounds float* %tmp9439, i64 1
- %tmp9441 = getelementptr inbounds float* %tmp9440, i64 1
- %tmp9442 = getelementptr inbounds float* %tmp9441, i64 1
- %tmp9443 = getelementptr inbounds float* %tmp9442, i64 1
- %tmp9444 = getelementptr inbounds float* %tmp9443, i64 1
- %tmp9445 = getelementptr inbounds float* %tmp9444, i64 1
- %tmp9446 = getelementptr inbounds float* %tmp9445, i64 1
- %tmp9447 = getelementptr inbounds float* %tmp9446, i64 1
- %tmp9448 = getelementptr inbounds float* %tmp9447, i64 1
- %tmp9449 = getelementptr inbounds float* %tmp9448, i64 1
- %tmp9450 = getelementptr inbounds float* %tmp9449, i64 1
- %tmp9451 = getelementptr inbounds float* %tmp9450, i64 1
- %tmp9452 = getelementptr inbounds float* %tmp9451, i64 1
- %tmp9453 = getelementptr inbounds float* %tmp9452, i64 1
- %tmp9454 = getelementptr inbounds float* %tmp9453, i64 1
- %tmp9455 = getelementptr inbounds float* %tmp9454, i64 1
- %tmp9456 = getelementptr inbounds float* %tmp9455, i64 1
- %tmp9457 = getelementptr inbounds float* %tmp9456, i64 1
- %tmp9458 = getelementptr inbounds float* %tmp9457, i64 1
- %tmp9459 = getelementptr inbounds float* %tmp9458, i64 1
- %tmp9460 = getelementptr inbounds float* %tmp9459, i64 1
- %tmp9461 = getelementptr inbounds float* %tmp9460, i64 1
- %tmp9462 = getelementptr inbounds float* %tmp9461, i64 1
- %tmp9463 = getelementptr inbounds float* %tmp9462, i64 1
- %tmp9464 = getelementptr inbounds float* %tmp9463, i64 1
- %tmp9465 = getelementptr inbounds float* %tmp9464, i64 1
- %tmp9466 = getelementptr inbounds float* %tmp9465, i64 1
- %tmp9467 = getelementptr inbounds float* %tmp9466, i64 1
- %tmp9468 = getelementptr inbounds float* %tmp9467, i64 1
- %tmp9469 = getelementptr inbounds float* %tmp9468, i64 1
- %tmp9470 = getelementptr inbounds float* %tmp9469, i64 1
- %tmp9471 = getelementptr inbounds float* %tmp9470, i64 1
- %tmp9472 = getelementptr inbounds float* %tmp9471, i64 1
- %tmp9473 = getelementptr inbounds float* %tmp9472, i64 1
- %tmp9474 = getelementptr inbounds float* %tmp9473, i64 1
- %tmp9475 = getelementptr inbounds float* %tmp9474, i64 1
- %tmp9476 = getelementptr inbounds float* %tmp9475, i64 1
- %tmp9477 = getelementptr inbounds float* %tmp9476, i64 1
- %tmp9478 = getelementptr inbounds float* %tmp9477, i64 1
- %tmp9479 = getelementptr inbounds float* %tmp9478, i64 1
- %tmp9480 = getelementptr inbounds float* %tmp9479, i64 1
- %tmp9481 = getelementptr inbounds float* %tmp9480, i64 1
- %tmp9482 = getelementptr inbounds float* %tmp9481, i64 1
- %tmp9483 = getelementptr inbounds float* %tmp9482, i64 1
- %tmp9484 = getelementptr inbounds float* %tmp9483, i64 1
- %tmp9485 = getelementptr inbounds float* %tmp9484, i64 1
- %tmp9486 = getelementptr inbounds float* %tmp9485, i64 1
- %tmp9487 = getelementptr inbounds float* %tmp9486, i64 1
- %tmp9488 = getelementptr inbounds float* %tmp9487, i64 1
- %tmp9489 = getelementptr inbounds float* %tmp9488, i64 1
- %tmp9490 = getelementptr inbounds float* %tmp9489, i64 1
- %tmp9491 = getelementptr inbounds float* %tmp9490, i64 1
- %tmp9492 = getelementptr inbounds float* %tmp9491, i64 1
- %tmp9493 = getelementptr inbounds float* %tmp9492, i64 1
- %tmp9494 = getelementptr inbounds float* %tmp9493, i64 1
- %tmp9495 = getelementptr inbounds float* %tmp9494, i64 1
- %tmp9496 = getelementptr inbounds float* %tmp9495, i64 1
- %tmp9497 = getelementptr inbounds float* %tmp9496, i64 1
- %tmp9498 = getelementptr inbounds float* %tmp9497, i64 1
- %tmp9499 = getelementptr inbounds float* %tmp9498, i64 1
- %tmp9500 = getelementptr inbounds float* %tmp9499, i64 1
- %tmp9501 = getelementptr inbounds float* %tmp9500, i64 1
- %tmp9502 = getelementptr inbounds float* %tmp9501, i64 1
- %tmp9503 = getelementptr inbounds float* %tmp9502, i64 1
- %tmp9504 = getelementptr inbounds float* %tmp9503, i64 1
- %tmp9505 = getelementptr inbounds float* %tmp9504, i64 1
- %tmp9506 = getelementptr inbounds float* %tmp9505, i64 1
- %tmp9507 = getelementptr inbounds float* %tmp9506, i64 1
- %tmp9508 = getelementptr inbounds float* %tmp9507, i64 1
- %tmp9509 = getelementptr inbounds float* %tmp9508, i64 1
- %tmp9510 = getelementptr inbounds float* %tmp9509, i64 1
- %tmp9511 = getelementptr inbounds float* %tmp9510, i64 1
- %tmp9512 = getelementptr inbounds float* %tmp9511, i64 1
- %tmp9513 = getelementptr inbounds float* %tmp9512, i64 1
- %tmp9514 = getelementptr inbounds float* %tmp9513, i64 1
- %tmp9515 = getelementptr inbounds float* %tmp9514, i64 1
- %tmp9516 = getelementptr inbounds float* %tmp9515, i64 1
- %tmp9517 = getelementptr inbounds float* %tmp9516, i64 1
- %tmp9518 = getelementptr inbounds float* %tmp9517, i64 1
- %tmp9519 = getelementptr inbounds float* %tmp9518, i64 1
- %tmp9520 = getelementptr inbounds float* %tmp9519, i64 1
- %tmp9521 = getelementptr inbounds float* %tmp9520, i64 1
- %tmp9522 = getelementptr inbounds float* %tmp9521, i64 1
- %tmp9523 = getelementptr inbounds float* %tmp9522, i64 1
- %tmp9524 = getelementptr inbounds float* %tmp9523, i64 1
- %tmp9525 = getelementptr inbounds float* %tmp9524, i64 1
- %tmp9526 = getelementptr inbounds float* %tmp9525, i64 1
- %tmp9527 = getelementptr inbounds float* %tmp9526, i64 1
- %tmp9528 = getelementptr inbounds float* %tmp9527, i64 1
- %tmp9529 = getelementptr inbounds float* %tmp9528, i64 1
- %tmp9530 = getelementptr inbounds float* %tmp9529, i64 1
- %tmp9531 = getelementptr inbounds float* %tmp9530, i64 1
- %tmp9532 = getelementptr inbounds float* %tmp9531, i64 1
- %tmp9533 = getelementptr inbounds float* %tmp9532, i64 1
- %tmp9534 = getelementptr inbounds float* %tmp9533, i64 1
- %tmp9535 = getelementptr inbounds float* %tmp9534, i64 1
- %tmp9536 = getelementptr inbounds float* %tmp9535, i64 1
- %tmp9537 = getelementptr inbounds float* %tmp9536, i64 1
- %tmp9538 = getelementptr inbounds float* %tmp9537, i64 1
- %tmp9539 = getelementptr inbounds float* %tmp9538, i64 1
- %tmp9540 = getelementptr inbounds float* %tmp9539, i64 1
- %tmp9541 = getelementptr inbounds float* %tmp9540, i64 1
- %tmp9542 = getelementptr inbounds float* %tmp9541, i64 1
- %tmp9543 = getelementptr inbounds float* %tmp9542, i64 1
- %tmp9544 = getelementptr inbounds float* %tmp9543, i64 1
- %tmp9545 = getelementptr inbounds float* %tmp9544, i64 1
- %tmp9546 = getelementptr inbounds float* %tmp9545, i64 1
- %tmp9547 = getelementptr inbounds float* %tmp9546, i64 1
- %tmp9548 = getelementptr inbounds float* %tmp9547, i64 1
- %tmp9549 = getelementptr inbounds float* %tmp9548, i64 1
- %tmp9550 = getelementptr inbounds float* %tmp9549, i64 1
- %tmp9551 = getelementptr inbounds float* %tmp9550, i64 1
- %tmp9552 = getelementptr inbounds float* %tmp9551, i64 1
- %tmp9553 = getelementptr inbounds float* %tmp9552, i64 1
- %tmp9554 = getelementptr inbounds float* %tmp9553, i64 1
- %tmp9555 = getelementptr inbounds float* %tmp9554, i64 1
- %tmp9556 = getelementptr inbounds float* %tmp9555, i64 1
- %tmp9557 = getelementptr inbounds float* %tmp9556, i64 1
- %tmp9558 = getelementptr inbounds float* %tmp9557, i64 1
- %tmp9559 = getelementptr inbounds float* %tmp9558, i64 1
- %tmp9560 = getelementptr inbounds float* %tmp9559, i64 1
- %tmp9561 = getelementptr inbounds float* %tmp9560, i64 1
- %tmp9562 = getelementptr inbounds float* %tmp9561, i64 1
- %tmp9563 = getelementptr inbounds float* %tmp9562, i64 1
- %tmp9564 = getelementptr inbounds float* %tmp9563, i64 1
- %tmp9565 = getelementptr inbounds float* %tmp9564, i64 1
- %tmp9566 = getelementptr inbounds float* %tmp9565, i64 1
- %tmp9567 = getelementptr inbounds float* %tmp9566, i64 1
- %tmp9568 = getelementptr inbounds float* %tmp9567, i64 1
- %tmp9569 = getelementptr inbounds float* %tmp9568, i64 1
- %tmp9570 = getelementptr inbounds float* %tmp9569, i64 1
- %tmp9571 = getelementptr inbounds float* %tmp9570, i64 1
- %tmp9572 = getelementptr inbounds float* %tmp9571, i64 1
- %tmp9573 = getelementptr inbounds float* %tmp9572, i64 1
- %tmp9574 = getelementptr inbounds float* %tmp9573, i64 1
- %tmp9575 = getelementptr inbounds float* %tmp9574, i64 1
- %tmp9576 = getelementptr inbounds float* %tmp9575, i64 1
- %tmp9577 = getelementptr inbounds float* %tmp9576, i64 1
- %tmp9578 = getelementptr inbounds float* %tmp9577, i64 1
- %tmp9579 = getelementptr inbounds float* %tmp9578, i64 1
- %tmp9580 = getelementptr inbounds float* %tmp9579, i64 1
- %tmp9581 = getelementptr inbounds float* %tmp9580, i64 1
- %tmp9582 = getelementptr inbounds float* %tmp9581, i64 1
- %tmp9583 = getelementptr inbounds float* %tmp9582, i64 1
- %tmp9584 = getelementptr inbounds float* %tmp9583, i64 1
- %tmp9585 = getelementptr inbounds float* %tmp9584, i64 1
- %tmp9586 = getelementptr inbounds float* %tmp9585, i64 1
- %tmp9587 = getelementptr inbounds float* %tmp9586, i64 1
- %tmp9588 = getelementptr inbounds float* %tmp9587, i64 1
- %tmp9589 = getelementptr inbounds float* %tmp9588, i64 1
- %tmp9590 = getelementptr inbounds float* %tmp9589, i64 1
- %tmp9591 = getelementptr inbounds float* %tmp9590, i64 1
- %tmp9592 = getelementptr inbounds float* %tmp9591, i64 1
- %tmp9593 = getelementptr inbounds float* %tmp9592, i64 1
- %tmp9594 = getelementptr inbounds float* %tmp9593, i64 1
- %tmp9595 = getelementptr inbounds float* %tmp9594, i64 1
- %tmp9596 = getelementptr inbounds float* %tmp9595, i64 1
- %tmp9597 = getelementptr inbounds float* %tmp9596, i64 1
- %tmp9598 = getelementptr inbounds float* %tmp9597, i64 1
- %tmp9599 = getelementptr inbounds float* %tmp9598, i64 1
- %tmp9600 = getelementptr inbounds float* %tmp9599, i64 1
- %tmp9601 = getelementptr inbounds float* %tmp9600, i64 1
- %tmp9602 = getelementptr inbounds float* %tmp9601, i64 1
- %tmp9603 = getelementptr inbounds float* %tmp9602, i64 1
- %tmp9604 = getelementptr inbounds float* %tmp9603, i64 1
- %tmp9605 = getelementptr inbounds float* %tmp9604, i64 1
- %tmp9606 = getelementptr inbounds float* %tmp9605, i64 1
- %tmp9607 = getelementptr inbounds float* %tmp9606, i64 1
- %tmp9608 = getelementptr inbounds float* %tmp9607, i64 1
- %tmp9609 = getelementptr inbounds float* %tmp9608, i64 1
- %tmp9610 = getelementptr inbounds float* %tmp9609, i64 1
- %tmp9611 = getelementptr inbounds float* %tmp9610, i64 1
- %tmp9612 = getelementptr inbounds float* %tmp9611, i64 1
- %tmp9613 = getelementptr inbounds float* %tmp9612, i64 1
- %tmp9614 = getelementptr inbounds float* %tmp9613, i64 1
- %tmp9615 = getelementptr inbounds float* %tmp9614, i64 1
- %tmp9616 = getelementptr inbounds float* %tmp9615, i64 1
- %tmp9617 = getelementptr inbounds float* %tmp9616, i64 1
- %tmp9618 = getelementptr inbounds float* %tmp9617, i64 1
- %tmp9619 = getelementptr inbounds float* %tmp9618, i64 1
- %tmp9620 = getelementptr inbounds float* %tmp9619, i64 1
- %tmp9621 = getelementptr inbounds float* %tmp9620, i64 1
- %tmp9622 = getelementptr inbounds float* %tmp9621, i64 1
- %tmp9623 = getelementptr inbounds float* %tmp9622, i64 1
- %tmp9624 = getelementptr inbounds float* %tmp9623, i64 1
- %tmp9625 = getelementptr inbounds float* %tmp9624, i64 1
- %tmp9626 = getelementptr inbounds float* %tmp9625, i64 1
- %tmp9627 = getelementptr inbounds float* %tmp9626, i64 1
- %tmp9628 = getelementptr inbounds float* %tmp9627, i64 1
- %tmp9629 = getelementptr inbounds float* %tmp9628, i64 1
- %tmp9630 = getelementptr inbounds float* %tmp9629, i64 1
- %tmp9631 = getelementptr inbounds float* %tmp9630, i64 1
- %tmp9632 = getelementptr inbounds float* %tmp9631, i64 1
- %tmp9633 = getelementptr inbounds float* %tmp9632, i64 1
- %tmp9634 = getelementptr inbounds float* %tmp9633, i64 1
- %tmp9635 = getelementptr inbounds float* %tmp9634, i64 1
- %tmp9636 = getelementptr inbounds float* %tmp9635, i64 1
- %tmp9637 = getelementptr inbounds float* %tmp9636, i64 1
- %tmp9638 = getelementptr inbounds float* %tmp9637, i64 1
- %tmp9639 = getelementptr inbounds float* %tmp9638, i64 1
- %tmp9640 = getelementptr inbounds float* %tmp9639, i64 1
- %tmp9641 = getelementptr inbounds float* %tmp9640, i64 1
- %tmp9642 = getelementptr inbounds float* %tmp9641, i64 1
- %tmp9643 = getelementptr inbounds float* %tmp9642, i64 1
- %tmp9644 = getelementptr inbounds float* %tmp9643, i64 1
- %tmp9645 = getelementptr inbounds float* %tmp9644, i64 1
- %tmp9646 = getelementptr inbounds float* %tmp9645, i64 1
- %tmp9647 = getelementptr inbounds float* %tmp9646, i64 1
- %tmp9648 = getelementptr inbounds float* %tmp9647, i64 1
- %tmp9649 = getelementptr inbounds float* %tmp9648, i64 1
- %tmp9650 = getelementptr inbounds float* %tmp9649, i64 1
- %tmp9651 = getelementptr inbounds float* %tmp9650, i64 1
- %tmp9652 = getelementptr inbounds float* %tmp9651, i64 1
- %tmp9653 = getelementptr inbounds float* %tmp9652, i64 1
- %tmp9654 = getelementptr inbounds float* %tmp9653, i64 1
- %tmp9655 = getelementptr inbounds float* %tmp9654, i64 1
- %tmp9656 = getelementptr inbounds float* %tmp9655, i64 1
- %tmp9657 = getelementptr inbounds float* %tmp9656, i64 1
- %tmp9658 = getelementptr inbounds float* %tmp9657, i64 1
- %tmp9659 = getelementptr inbounds float* %tmp9658, i64 1
- %tmp9660 = getelementptr inbounds float* %tmp9659, i64 1
- %tmp9661 = getelementptr inbounds float* %tmp9660, i64 1
- %tmp9662 = getelementptr inbounds float* %tmp9661, i64 1
- %tmp9663 = getelementptr inbounds float* %tmp9662, i64 1
- %tmp9664 = getelementptr inbounds float* %tmp9663, i64 1
- %tmp9665 = getelementptr inbounds float* %tmp9664, i64 1
- %tmp9666 = getelementptr inbounds float* %tmp9665, i64 1
- %tmp9667 = getelementptr inbounds float* %tmp9666, i64 1
- %tmp9668 = getelementptr inbounds float* %tmp9667, i64 1
- %tmp9669 = getelementptr inbounds float* %tmp9668, i64 1
- %tmp9670 = getelementptr inbounds float* %tmp9669, i64 1
- %tmp9671 = getelementptr inbounds float* %tmp9670, i64 1
- %tmp9672 = getelementptr inbounds float* %tmp9671, i64 1
- %tmp9673 = getelementptr inbounds float* %tmp9672, i64 1
- %tmp9674 = getelementptr inbounds float* %tmp9673, i64 1
- %tmp9675 = getelementptr inbounds float* %tmp9674, i64 1
- %tmp9676 = getelementptr inbounds float* %tmp9675, i64 1
- %tmp9677 = getelementptr inbounds float* %tmp9676, i64 1
- %tmp9678 = getelementptr inbounds float* %tmp9677, i64 1
- %tmp9679 = getelementptr inbounds float* %tmp9678, i64 1
- %tmp9680 = getelementptr inbounds float* %tmp9679, i64 1
- %tmp9681 = getelementptr inbounds float* %tmp9680, i64 1
- %tmp9682 = getelementptr inbounds float* %tmp9681, i64 1
- %tmp9683 = getelementptr inbounds float* %tmp9682, i64 1
- %tmp9684 = getelementptr inbounds float* %tmp9683, i64 1
- %tmp9685 = getelementptr inbounds float* %tmp9684, i64 1
- %tmp9686 = getelementptr inbounds float* %tmp9685, i64 1
- %tmp9687 = getelementptr inbounds float* %tmp9686, i64 1
- %tmp9688 = getelementptr inbounds float* %tmp9687, i64 1
- %tmp9689 = getelementptr inbounds float* %tmp9688, i64 1
- %tmp9690 = getelementptr inbounds float* %tmp9689, i64 1
- %tmp9691 = getelementptr inbounds float* %tmp9690, i64 1
- %tmp9692 = getelementptr inbounds float* %tmp9691, i64 1
- %tmp9693 = getelementptr inbounds float* %tmp9692, i64 1
- %tmp9694 = getelementptr inbounds float* %tmp9693, i64 1
- %tmp9695 = getelementptr inbounds float* %tmp9694, i64 1
- %tmp9696 = getelementptr inbounds float* %tmp9695, i64 1
- %tmp9697 = getelementptr inbounds float* %tmp9696, i64 1
- %tmp9698 = getelementptr inbounds float* %tmp9697, i64 1
- %tmp9699 = getelementptr inbounds float* %tmp9698, i64 1
- %tmp9700 = getelementptr inbounds float* %tmp9699, i64 1
- %tmp9701 = getelementptr inbounds float* %tmp9700, i64 1
- %tmp9702 = getelementptr inbounds float* %tmp9701, i64 1
- %tmp9703 = getelementptr inbounds float* %tmp9702, i64 1
- %tmp9704 = getelementptr inbounds float* %tmp9703, i64 1
- %tmp9705 = getelementptr inbounds float* %tmp9704, i64 1
- %tmp9706 = getelementptr inbounds float* %tmp9705, i64 1
- %tmp9707 = getelementptr inbounds float* %tmp9706, i64 1
- %tmp9708 = getelementptr inbounds float* %tmp9707, i64 1
- %tmp9709 = getelementptr inbounds float* %tmp9708, i64 1
- %tmp9710 = getelementptr inbounds float* %tmp9709, i64 1
- %tmp9711 = getelementptr inbounds float* %tmp9710, i64 1
- %tmp9712 = getelementptr inbounds float* %tmp9711, i64 1
- %tmp9713 = getelementptr inbounds float* %tmp9712, i64 1
- %tmp9714 = getelementptr inbounds float* %tmp9713, i64 1
- %tmp9715 = getelementptr inbounds float* %tmp9714, i64 1
- %tmp9716 = getelementptr inbounds float* %tmp9715, i64 1
- %tmp9717 = getelementptr inbounds float* %tmp9716, i64 1
- %tmp9718 = getelementptr inbounds float* %tmp9717, i64 1
- %tmp9719 = getelementptr inbounds float* %tmp9718, i64 1
- %tmp9720 = getelementptr inbounds float* %tmp9719, i64 1
- %tmp9721 = getelementptr inbounds float* %tmp9720, i64 1
- %tmp9722 = getelementptr inbounds float* %tmp9721, i64 1
- %tmp9723 = getelementptr inbounds float* %tmp9722, i64 1
- %tmp9724 = getelementptr inbounds float* %tmp9723, i64 1
- %tmp9725 = getelementptr inbounds float* %tmp9724, i64 1
- %tmp9726 = getelementptr inbounds float* %tmp9725, i64 1
- %tmp9727 = getelementptr inbounds float* %tmp9726, i64 1
- %tmp9728 = getelementptr inbounds float* %tmp9727, i64 1
- %tmp9729 = getelementptr inbounds float* %tmp9728, i64 1
- %tmp9730 = getelementptr inbounds float* %tmp9729, i64 1
- %tmp9731 = getelementptr inbounds float* %tmp9730, i64 1
- %tmp9732 = getelementptr inbounds float* %tmp9731, i64 1
- %tmp9733 = getelementptr inbounds float* %tmp9732, i64 1
- %tmp9734 = getelementptr inbounds float* %tmp9733, i64 1
- %tmp9735 = getelementptr inbounds float* %tmp9734, i64 1
- %tmp9736 = getelementptr inbounds float* %tmp9735, i64 1
- %tmp9737 = getelementptr inbounds float* %tmp9736, i64 1
- %tmp9738 = getelementptr inbounds float* %tmp9737, i64 1
- %tmp9739 = getelementptr inbounds float* %tmp9738, i64 1
- %tmp9740 = getelementptr inbounds float* %tmp9739, i64 1
- %tmp9741 = getelementptr inbounds float* %tmp9740, i64 1
- %tmp9742 = getelementptr inbounds float* %tmp9741, i64 1
- %tmp9743 = getelementptr inbounds float* %tmp9742, i64 1
- %tmp9744 = getelementptr inbounds float* %tmp9743, i64 1
- %tmp9745 = getelementptr inbounds float* %tmp9744, i64 1
- %tmp9746 = getelementptr inbounds float* %tmp9745, i64 1
- %tmp9747 = getelementptr inbounds float* %tmp9746, i64 1
- %tmp9748 = getelementptr inbounds float* %tmp9747, i64 1
- %tmp9749 = getelementptr inbounds float* %tmp9748, i64 1
- %tmp9750 = getelementptr inbounds float* %tmp9749, i64 1
- %tmp9751 = getelementptr inbounds float* %tmp9750, i64 1
- %tmp9752 = getelementptr inbounds float* %tmp9751, i64 1
- %tmp9753 = getelementptr inbounds float* %tmp9752, i64 1
- %tmp9754 = getelementptr inbounds float* %tmp9753, i64 1
- %tmp9755 = getelementptr inbounds float* %tmp9754, i64 1
- %tmp9756 = getelementptr inbounds float* %tmp9755, i64 1
- %tmp9757 = getelementptr inbounds float* %tmp9756, i64 1
- %tmp9758 = getelementptr inbounds float* %tmp9757, i64 1
- %tmp9759 = getelementptr inbounds float* %tmp9758, i64 1
- %tmp9760 = getelementptr inbounds float* %tmp9759, i64 1
- %tmp9761 = getelementptr inbounds float* %tmp9760, i64 1
- %tmp9762 = getelementptr inbounds float* %tmp9761, i64 1
- %tmp9763 = getelementptr inbounds float* %tmp9762, i64 1
- %tmp9764 = getelementptr inbounds float* %tmp9763, i64 1
- %tmp9765 = getelementptr inbounds float* %tmp9764, i64 1
- %tmp9766 = getelementptr inbounds float* %tmp9765, i64 1
- %tmp9767 = getelementptr inbounds float* %tmp9766, i64 1
- %tmp9768 = getelementptr inbounds float* %tmp9767, i64 1
- %tmp9769 = getelementptr inbounds float* %tmp9768, i64 1
- %tmp9770 = getelementptr inbounds float* %tmp9769, i64 1
- %tmp9771 = getelementptr inbounds float* %tmp9770, i64 1
- %tmp9772 = getelementptr inbounds float* %tmp9771, i64 1
- %tmp9773 = getelementptr inbounds float* %tmp9772, i64 1
- %tmp9774 = getelementptr inbounds float* %tmp9773, i64 1
- %tmp9775 = getelementptr inbounds float* %tmp9774, i64 1
- %tmp9776 = getelementptr inbounds float* %tmp9775, i64 1
- %tmp9777 = getelementptr inbounds float* %tmp9776, i64 1
- %tmp9778 = getelementptr inbounds float* %tmp9777, i64 1
- %tmp9779 = getelementptr inbounds float* %tmp9778, i64 1
- %tmp9780 = getelementptr inbounds float* %tmp9779, i64 1
- %tmp9781 = getelementptr inbounds float* %tmp9780, i64 1
- %tmp9782 = getelementptr inbounds float* %tmp9781, i64 1
- %tmp9783 = getelementptr inbounds float* %tmp9782, i64 1
- %tmp9784 = getelementptr inbounds float* %tmp9783, i64 1
- %tmp9785 = getelementptr inbounds float* %tmp9784, i64 1
- %tmp9786 = getelementptr inbounds float* %tmp9785, i64 1
- %tmp9787 = getelementptr inbounds float* %tmp9786, i64 1
- %tmp9788 = getelementptr inbounds float* %tmp9787, i64 1
- %tmp9789 = getelementptr inbounds float* %tmp9788, i64 1
- %tmp9790 = getelementptr inbounds float* %tmp9789, i64 1
- %tmp9791 = getelementptr inbounds float* %tmp9790, i64 1
- %tmp9792 = getelementptr inbounds float* %tmp9791, i64 1
- %tmp9793 = getelementptr inbounds float* %tmp9792, i64 1
- %tmp9794 = getelementptr inbounds float* %tmp9793, i64 1
- %tmp9795 = getelementptr inbounds float* %tmp9794, i64 1
- %tmp9796 = getelementptr inbounds float* %tmp9795, i64 1
- %tmp9797 = getelementptr inbounds float* %tmp9796, i64 1
- %tmp9798 = getelementptr inbounds float* %tmp9797, i64 1
- %tmp9799 = getelementptr inbounds float* %tmp9798, i64 1
- %tmp9800 = getelementptr inbounds float* %tmp9799, i64 1
- %tmp9801 = getelementptr inbounds float* %tmp9800, i64 1
- %tmp9802 = getelementptr inbounds float* %tmp9801, i64 1
- %tmp9803 = getelementptr inbounds float* %tmp9802, i64 1
- %tmp9804 = getelementptr inbounds float* %tmp9803, i64 1
- %tmp9805 = getelementptr inbounds float* %tmp9804, i64 1
- %tmp9806 = getelementptr inbounds float* %tmp9805, i64 1
- %tmp9807 = getelementptr inbounds float* %tmp9806, i64 1
- %tmp9808 = getelementptr inbounds float* %tmp9807, i64 1
- %tmp9809 = getelementptr inbounds float* %tmp9808, i64 1
- %tmp9810 = getelementptr inbounds float* %tmp9809, i64 1
- %tmp9811 = getelementptr inbounds float* %tmp9810, i64 1
- %tmp9812 = getelementptr inbounds float* %tmp9811, i64 1
- %tmp9813 = getelementptr inbounds float* %tmp9812, i64 1
- %tmp9814 = getelementptr inbounds float* %tmp9813, i64 1
- %tmp9815 = getelementptr inbounds float* %tmp9814, i64 1
- %tmp9816 = getelementptr inbounds float* %tmp9815, i64 1
- %tmp9817 = getelementptr inbounds float* %tmp9816, i64 1
- %tmp9818 = getelementptr inbounds float* %tmp9817, i64 1
- %tmp9819 = getelementptr inbounds float* %tmp9818, i64 1
- %tmp9820 = getelementptr inbounds float* %tmp9819, i64 1
- %tmp9821 = getelementptr inbounds float* %tmp9820, i64 1
- %tmp9822 = getelementptr inbounds float* %tmp9821, i64 1
- %tmp9823 = getelementptr inbounds float* %tmp9822, i64 1
- %tmp9824 = getelementptr inbounds float* %tmp9823, i64 1
- %tmp9825 = getelementptr inbounds float* %tmp9824, i64 1
- %tmp9826 = getelementptr inbounds float* %tmp9825, i64 1
- %tmp9827 = getelementptr inbounds float* %tmp9826, i64 1
- %tmp9828 = getelementptr inbounds float* %tmp9827, i64 1
- %tmp9829 = getelementptr inbounds float* %tmp9828, i64 1
- %tmp9830 = getelementptr inbounds float* %tmp9829, i64 1
- %tmp9831 = getelementptr inbounds float* %tmp9830, i64 1
- %tmp9832 = getelementptr inbounds float* %tmp9831, i64 1
- %tmp9833 = getelementptr inbounds float* %tmp9832, i64 1
- %tmp9834 = getelementptr inbounds float* %tmp9833, i64 1
- %tmp9835 = getelementptr inbounds float* %tmp9834, i64 1
- %tmp9836 = getelementptr inbounds float* %tmp9835, i64 1
- %tmp9837 = getelementptr inbounds float* %tmp9836, i64 1
- %tmp9838 = getelementptr inbounds float* %tmp9837, i64 1
- %tmp9839 = getelementptr inbounds float* %tmp9838, i64 1
- %tmp9840 = getelementptr inbounds float* %tmp9839, i64 1
- %tmp9841 = getelementptr inbounds float* %tmp9840, i64 1
- %tmp9842 = getelementptr inbounds float* %tmp9841, i64 1
- %tmp9843 = getelementptr inbounds float* %tmp9842, i64 1
- %tmp9844 = getelementptr inbounds float* %tmp9843, i64 1
- %tmp9845 = getelementptr inbounds float* %tmp9844, i64 1
- %tmp9846 = getelementptr inbounds float* %tmp9845, i64 1
- %tmp9847 = getelementptr inbounds float* %tmp9846, i64 1
- %tmp9848 = getelementptr inbounds float* %tmp9847, i64 1
- %tmp9849 = getelementptr inbounds float* %tmp9848, i64 1
- %tmp9850 = getelementptr inbounds float* %tmp9849, i64 1
- %tmp9851 = getelementptr inbounds float* %tmp9850, i64 1
- %tmp9852 = getelementptr inbounds float* %tmp9851, i64 1
- %tmp9853 = getelementptr inbounds float* %tmp9852, i64 1
- %tmp9854 = getelementptr inbounds float* %tmp9853, i64 1
- %tmp9855 = getelementptr inbounds float* %tmp9854, i64 1
- %tmp9856 = getelementptr inbounds float* %tmp9855, i64 1
- %tmp9857 = getelementptr inbounds float* %tmp9856, i64 1
- %tmp9858 = getelementptr inbounds float* %tmp9857, i64 1
- %tmp9859 = getelementptr inbounds float* %tmp9858, i64 1
- %tmp9860 = getelementptr inbounds float* %tmp9859, i64 1
- %tmp9861 = getelementptr inbounds float* %tmp9860, i64 1
- %tmp9862 = getelementptr inbounds float* %tmp9861, i64 1
- %tmp9863 = getelementptr inbounds float* %tmp9862, i64 1
- %tmp9864 = getelementptr inbounds float* %tmp9863, i64 1
- %tmp9865 = getelementptr inbounds float* %tmp9864, i64 1
- %tmp9866 = getelementptr inbounds float* %tmp9865, i64 1
- %tmp9867 = getelementptr inbounds float* %tmp9866, i64 1
- %tmp9868 = getelementptr inbounds float* %tmp9867, i64 1
- %tmp9869 = getelementptr inbounds float* %tmp9868, i64 1
- %tmp9870 = getelementptr inbounds float* %tmp9869, i64 1
- %tmp9871 = getelementptr inbounds float* %tmp9870, i64 1
- %tmp9872 = getelementptr inbounds float* %tmp9871, i64 1
- %tmp9873 = getelementptr inbounds float* %tmp9872, i64 1
- %tmp9874 = getelementptr inbounds float* %tmp9873, i64 1
- %tmp9875 = getelementptr inbounds float* %tmp9874, i64 1
- %tmp9876 = getelementptr inbounds float* %tmp9875, i64 1
- %tmp9877 = getelementptr inbounds float* %tmp9876, i64 1
- %tmp9878 = getelementptr inbounds float* %tmp9877, i64 1
- %tmp9879 = getelementptr inbounds float* %tmp9878, i64 1
- %tmp9880 = getelementptr inbounds float* %tmp9879, i64 1
- %tmp9881 = getelementptr inbounds float* %tmp9880, i64 1
- %tmp9882 = getelementptr inbounds float* %tmp9881, i64 1
- %tmp9883 = getelementptr inbounds float* %tmp9882, i64 1
- %tmp9884 = getelementptr inbounds float* %tmp9883, i64 1
- %tmp9885 = getelementptr inbounds float* %tmp9884, i64 1
- %tmp9886 = getelementptr inbounds float* %tmp9885, i64 1
- %tmp9887 = getelementptr inbounds float* %tmp9886, i64 1
- %tmp9888 = getelementptr inbounds float* %tmp9887, i64 1
- %tmp9889 = getelementptr inbounds float* %tmp9888, i64 1
- %tmp9890 = getelementptr inbounds float* %tmp9889, i64 1
- %tmp9891 = getelementptr inbounds float* %tmp9890, i64 1
- %tmp9892 = getelementptr inbounds float* %tmp9891, i64 1
- %tmp9893 = getelementptr inbounds float* %tmp9892, i64 1
- %tmp9894 = getelementptr inbounds float* %tmp9893, i64 1
- %tmp9895 = getelementptr inbounds float* %tmp9894, i64 1
- %tmp9896 = getelementptr inbounds float* %tmp9895, i64 1
- %tmp9897 = getelementptr inbounds float* %tmp9896, i64 1
- %tmp9898 = getelementptr inbounds float* %tmp9897, i64 1
- %tmp9899 = getelementptr inbounds float* %tmp9898, i64 1
- %tmp9900 = getelementptr inbounds float* %tmp9899, i64 1
- %tmp9901 = getelementptr inbounds float* %tmp9900, i64 1
- %tmp9902 = getelementptr inbounds float* %tmp9901, i64 1
- %tmp9903 = getelementptr inbounds float* %tmp9902, i64 1
- %tmp9904 = getelementptr inbounds float* %tmp9903, i64 1
- %tmp9905 = getelementptr inbounds float* %tmp9904, i64 1
- %tmp9906 = getelementptr inbounds float* %tmp9905, i64 1
- %tmp9907 = getelementptr inbounds float* %tmp9906, i64 1
- %tmp9908 = getelementptr inbounds float* %tmp9907, i64 1
- %tmp9909 = getelementptr inbounds float* %tmp9908, i64 1
- %tmp9910 = getelementptr inbounds float* %tmp9909, i64 1
- %tmp9911 = getelementptr inbounds float* %tmp9910, i64 1
- %tmp9912 = getelementptr inbounds float* %tmp9911, i64 1
- %tmp9913 = getelementptr inbounds float* %tmp9912, i64 1
- %tmp9914 = getelementptr inbounds float* %tmp9913, i64 1
- %tmp9915 = getelementptr inbounds float* %tmp9914, i64 1
- %tmp9916 = getelementptr inbounds float* %tmp9915, i64 1
- %tmp9917 = getelementptr inbounds float* %tmp9916, i64 1
- %tmp9918 = getelementptr inbounds float* %tmp9917, i64 1
- %tmp9919 = getelementptr inbounds float* %tmp9918, i64 1
- %tmp9920 = getelementptr inbounds float* %tmp9919, i64 1
- %tmp9921 = getelementptr inbounds float* %tmp9920, i64 1
- %tmp9922 = getelementptr inbounds float* %tmp9921, i64 1
- %tmp9923 = getelementptr inbounds float* %tmp9922, i64 1
- %tmp9924 = getelementptr inbounds float* %tmp9923, i64 1
- %tmp9925 = getelementptr inbounds float* %tmp9924, i64 1
- %tmp9926 = getelementptr inbounds float* %tmp9925, i64 1
- %tmp9927 = getelementptr inbounds float* %tmp9926, i64 1
- %tmp9928 = getelementptr inbounds float* %tmp9927, i64 1
- %tmp9929 = getelementptr inbounds float* %tmp9928, i64 1
- %tmp9930 = getelementptr inbounds float* %tmp9929, i64 1
- %tmp9931 = getelementptr inbounds float* %tmp9930, i64 1
- %tmp9932 = getelementptr inbounds float* %tmp9931, i64 1
- %tmp9933 = getelementptr inbounds float* %tmp9932, i64 1
- %tmp9934 = getelementptr inbounds float* %tmp9933, i64 1
- %tmp9935 = getelementptr inbounds float* %tmp9934, i64 1
- %tmp9936 = getelementptr inbounds float* %tmp9935, i64 1
- %tmp9937 = getelementptr inbounds float* %tmp9936, i64 1
- %tmp9938 = getelementptr inbounds float* %tmp9937, i64 1
- %tmp9939 = getelementptr inbounds float* %tmp9938, i64 1
- %tmp9940 = getelementptr inbounds float* %tmp9939, i64 1
- %tmp9941 = getelementptr inbounds float* %tmp9940, i64 1
- %tmp9942 = getelementptr inbounds float* %tmp9941, i64 1
- %tmp9943 = getelementptr inbounds float* %tmp9942, i64 1
- %tmp9944 = getelementptr inbounds float* %tmp9943, i64 1
- %tmp9945 = getelementptr inbounds float* %tmp9944, i64 1
- %tmp9946 = getelementptr inbounds float* %tmp9945, i64 1
- %tmp9947 = getelementptr inbounds float* %tmp9946, i64 1
- %tmp9948 = getelementptr inbounds float* %tmp9947, i64 1
- %tmp9949 = getelementptr inbounds float* %tmp9948, i64 1
- %tmp9950 = getelementptr inbounds float* %tmp9949, i64 1
- %tmp9951 = getelementptr inbounds float* %tmp9950, i64 1
- %tmp9952 = getelementptr inbounds float* %tmp9951, i64 1
- %tmp9953 = getelementptr inbounds float* %tmp9952, i64 1
- %tmp9954 = getelementptr inbounds float* %tmp9953, i64 1
- %tmp9955 = getelementptr inbounds float* %tmp9954, i64 1
- %tmp9956 = getelementptr inbounds float* %tmp9955, i64 1
- %tmp9957 = getelementptr inbounds float* %tmp9956, i64 1
- %tmp9958 = getelementptr inbounds float* %tmp9957, i64 1
- %tmp9959 = getelementptr inbounds float* %tmp9958, i64 1
- %tmp9960 = getelementptr inbounds float* %tmp9959, i64 1
- %tmp9961 = getelementptr inbounds float* %tmp9960, i64 1
- %tmp9962 = getelementptr inbounds float* %tmp9961, i64 1
- %tmp9963 = getelementptr inbounds float* %tmp9962, i64 1
- %tmp9964 = getelementptr inbounds float* %tmp9963, i64 1
- %tmp9965 = getelementptr inbounds float* %tmp9964, i64 1
- %tmp9966 = getelementptr inbounds float* %tmp9965, i64 1
- %tmp9967 = getelementptr inbounds float* %tmp9966, i64 1
- %tmp9968 = getelementptr inbounds float* %tmp9967, i64 1
- %tmp9969 = getelementptr inbounds float* %tmp9968, i64 1
- %tmp9970 = getelementptr inbounds float* %tmp9969, i64 1
- %tmp9971 = getelementptr inbounds float* %tmp9970, i64 1
- %tmp9972 = getelementptr inbounds float* %tmp9971, i64 1
- %tmp9973 = getelementptr inbounds float* %tmp9972, i64 1
- %tmp9974 = getelementptr inbounds float* %tmp9973, i64 1
- %tmp9975 = getelementptr inbounds float* %tmp9974, i64 1
- %tmp9976 = getelementptr inbounds float* %tmp9975, i64 1
- %tmp9977 = getelementptr inbounds float* %tmp9976, i64 1
- %tmp9978 = getelementptr inbounds float* %tmp9977, i64 1
- %tmp9979 = getelementptr inbounds float* %tmp9978, i64 1
- %tmp9980 = getelementptr inbounds float* %tmp9979, i64 1
- %tmp9981 = getelementptr inbounds float* %tmp9980, i64 1
- %tmp9982 = getelementptr inbounds float* %tmp9981, i64 1
- %tmp9983 = getelementptr inbounds float* %tmp9982, i64 1
- %tmp9984 = getelementptr inbounds float* %tmp9983, i64 1
- %tmp9985 = getelementptr inbounds float* %tmp9984, i64 1
- %tmp9986 = getelementptr inbounds float* %tmp9985, i64 1
- %tmp9987 = getelementptr inbounds float* %tmp9986, i64 1
- %tmp9988 = getelementptr inbounds float* %tmp9987, i64 1
- %tmp9989 = getelementptr inbounds float* %tmp9988, i64 1
- %tmp9990 = getelementptr inbounds float* %tmp9989, i64 1
- %tmp9991 = getelementptr inbounds float* %tmp9990, i64 1
- %tmp9992 = getelementptr inbounds float* %tmp9991, i64 1
- %tmp9993 = getelementptr inbounds float* %tmp9992, i64 1
- %tmp9994 = getelementptr inbounds float* %tmp9993, i64 1
- %tmp9995 = getelementptr inbounds float* %tmp9994, i64 1
- %tmp9996 = getelementptr inbounds float* %tmp9995, i64 1
- %tmp9997 = getelementptr inbounds float* %tmp9996, i64 1
- %tmp9998 = getelementptr inbounds float* %tmp9997, i64 1
- %tmp9999 = getelementptr inbounds float* %tmp9998, i64 1
- %tmp10000 = getelementptr inbounds float* %tmp9999, i64 1
- %tmp10001 = getelementptr inbounds float* %tmp10000, i64 1
- %tmp10002 = getelementptr inbounds float* %tmp10001, i64 1
- %tmp10003 = getelementptr inbounds float* %tmp10002, i64 1
- %tmp10004 = getelementptr inbounds float* %tmp10003, i64 1
- %tmp10005 = getelementptr inbounds float* %tmp10004, i64 1
- %tmp10006 = getelementptr inbounds float* %tmp10005, i64 1
- %tmp10007 = getelementptr inbounds float* %tmp10006, i64 1
- %tmp10008 = getelementptr inbounds float* %tmp10007, i64 1
- %tmp10009 = getelementptr inbounds float* %tmp10008, i64 1
- %tmp10010 = getelementptr inbounds float* %tmp10009, i64 1
- %tmp10011 = getelementptr inbounds float* %tmp10010, i64 1
- %tmp10012 = getelementptr inbounds float* %tmp10011, i64 1
- %tmp10013 = getelementptr inbounds float* %tmp10012, i64 1
- %tmp10014 = getelementptr inbounds float* %tmp10013, i64 1
- %tmp10015 = getelementptr inbounds float* %tmp10014, i64 1
- %tmp10016 = getelementptr inbounds float* %tmp10015, i64 1
- %tmp10017 = getelementptr inbounds float* %tmp10016, i64 1
- %tmp10018 = getelementptr inbounds float* %tmp10017, i64 1
- %tmp10019 = getelementptr inbounds float* %tmp10018, i64 1
- %tmp10020 = getelementptr inbounds float* %tmp10019, i64 1
- %tmp10021 = getelementptr inbounds float* %tmp10020, i64 1
- %tmp10022 = getelementptr inbounds float* %tmp10021, i64 1
- %tmp10023 = getelementptr inbounds float* %tmp10022, i64 1
- %tmp10024 = getelementptr inbounds float* %tmp10023, i64 1
- %tmp10025 = getelementptr inbounds float* %tmp10024, i64 1
- %tmp10026 = getelementptr inbounds float* %tmp10025, i64 1
- %tmp10027 = getelementptr inbounds float* %tmp10026, i64 1
- %tmp10028 = getelementptr inbounds float* %tmp10027, i64 1
- %tmp10029 = getelementptr inbounds float* %tmp10028, i64 1
- %tmp10030 = getelementptr inbounds float* %tmp10029, i64 1
- %tmp10031 = getelementptr inbounds float* %tmp10030, i64 1
- %tmp10032 = getelementptr inbounds float* %tmp10031, i64 1
- %tmp10033 = getelementptr inbounds float* %tmp10032, i64 1
- %tmp10034 = getelementptr inbounds float* %tmp10033, i64 1
- %tmp10035 = getelementptr inbounds float* %tmp10034, i64 1
- %tmp10036 = getelementptr inbounds float* %tmp10035, i64 1
- %tmp10037 = getelementptr inbounds float* %tmp10036, i64 1
- %tmp10038 = getelementptr inbounds float* %tmp10037, i64 1
- %tmp10039 = getelementptr inbounds float* %tmp10038, i64 1
- %tmp10040 = getelementptr inbounds float* %tmp10039, i64 1
- %tmp10041 = getelementptr inbounds float* %tmp10040, i64 1
- %tmp10042 = getelementptr inbounds float* %tmp10041, i64 1
- %tmp10043 = getelementptr inbounds float* %tmp10042, i64 1
- %tmp10044 = getelementptr inbounds float* %tmp10043, i64 1
- %tmp10045 = getelementptr inbounds float* %tmp10044, i64 1
- %tmp10046 = getelementptr inbounds float* %tmp10045, i64 1
- %tmp10047 = getelementptr inbounds float* %tmp10046, i64 1
- %tmp10048 = getelementptr inbounds float* %tmp10047, i64 1
- %tmp10049 = getelementptr inbounds float* %tmp10048, i64 1
- %tmp10050 = getelementptr inbounds float* %tmp10049, i64 1
- %tmp10051 = getelementptr inbounds float* %tmp10050, i64 1
- %tmp10052 = getelementptr inbounds float* %tmp10051, i64 1
- %tmp10053 = getelementptr inbounds float* %tmp10052, i64 1
- %tmp10054 = getelementptr inbounds float* %tmp10053, i64 1
- %tmp10055 = getelementptr inbounds float* %tmp10054, i64 1
- %tmp10056 = getelementptr inbounds float* %tmp10055, i64 1
- %tmp10057 = getelementptr inbounds float* %tmp10056, i64 1
- %tmp10058 = getelementptr inbounds float* %tmp10057, i64 1
- %tmp10059 = getelementptr inbounds float* %tmp10058, i64 1
- %tmp10060 = getelementptr inbounds float* %tmp10059, i64 1
- %tmp10061 = getelementptr inbounds float* %tmp10060, i64 1
- %tmp10062 = getelementptr inbounds float* %tmp10061, i64 1
- %tmp10063 = getelementptr inbounds float* %tmp10062, i64 1
- %tmp10064 = getelementptr inbounds float* %tmp10063, i64 1
- %tmp10065 = getelementptr inbounds float* %tmp10064, i64 1
- %tmp10066 = getelementptr inbounds float* %tmp10065, i64 1
- %tmp10067 = getelementptr inbounds float* %tmp10066, i64 1
- %tmp10068 = getelementptr inbounds float* %tmp10067, i64 1
- %tmp10069 = getelementptr inbounds float* %tmp10068, i64 1
- %tmp10070 = getelementptr inbounds float* %tmp10069, i64 1
- %tmp10071 = getelementptr inbounds float* %tmp10070, i64 1
- %tmp10072 = getelementptr inbounds float* %tmp10071, i64 1
- %tmp10073 = getelementptr inbounds float* %tmp10072, i64 1
- %tmp10074 = getelementptr inbounds float* %tmp10073, i64 1
- %tmp10075 = getelementptr inbounds float* %tmp10074, i64 1
- %tmp10076 = getelementptr inbounds float* %tmp10075, i64 1
- %tmp10077 = getelementptr inbounds float* %tmp10076, i64 1
- %tmp10078 = getelementptr inbounds float* %tmp10077, i64 1
- %tmp10079 = getelementptr inbounds float* %tmp10078, i64 1
- %tmp10080 = getelementptr inbounds float* %tmp10079, i64 1
- %tmp10081 = getelementptr inbounds float* %tmp10080, i64 1
- %tmp10082 = getelementptr inbounds float* %tmp10081, i64 1
- %tmp10083 = getelementptr inbounds float* %tmp10082, i64 1
- %tmp10084 = getelementptr inbounds float* %tmp10083, i64 1
- %tmp10085 = getelementptr inbounds float* %tmp10084, i64 1
- %tmp10086 = getelementptr inbounds float* %tmp10085, i64 1
- %tmp10087 = getelementptr inbounds float* %tmp10086, i64 1
- %tmp10088 = getelementptr inbounds float* %tmp10087, i64 1
- %tmp10089 = getelementptr inbounds float* %tmp10088, i64 1
- %tmp10090 = getelementptr inbounds float* %tmp10089, i64 1
- %tmp10091 = getelementptr inbounds float* %tmp10090, i64 1
- %tmp10092 = getelementptr inbounds float* %tmp10091, i64 1
- %tmp10093 = getelementptr inbounds float* %tmp10092, i64 1
- %tmp10094 = getelementptr inbounds float* %tmp10093, i64 1
- %tmp10095 = getelementptr inbounds float* %tmp10094, i64 1
- %tmp10096 = getelementptr inbounds float* %tmp10095, i64 1
- %tmp10097 = getelementptr inbounds float* %tmp10096, i64 1
- %tmp10098 = getelementptr inbounds float* %tmp10097, i64 1
- %tmp10099 = getelementptr inbounds float* %tmp10098, i64 1
- %tmp10100 = getelementptr inbounds float* %tmp10099, i64 1
- %tmp10101 = getelementptr inbounds float* %tmp10100, i64 1
- %tmp10102 = getelementptr inbounds float* %tmp10101, i64 1
- %tmp10103 = getelementptr inbounds float* %tmp10102, i64 1
- %tmp10104 = getelementptr inbounds float* %tmp10103, i64 1
- %tmp10105 = getelementptr inbounds float* %tmp10104, i64 1
- %tmp10106 = getelementptr inbounds float* %tmp10105, i64 1
- %tmp10107 = getelementptr inbounds float* %tmp10106, i64 1
- %tmp10108 = getelementptr inbounds float* %tmp10107, i64 1
- %tmp10109 = getelementptr inbounds float* %tmp10108, i64 1
- %tmp10110 = getelementptr inbounds float* %tmp10109, i64 1
- %tmp10111 = getelementptr inbounds float* %tmp10110, i64 1
- %tmp10112 = getelementptr inbounds float* %tmp10111, i64 1
- %tmp10113 = getelementptr inbounds float* %tmp10112, i64 1
- %tmp10114 = getelementptr inbounds float* %tmp10113, i64 1
- %tmp10115 = getelementptr inbounds float* %tmp10114, i64 1
- %tmp10116 = getelementptr inbounds float* %tmp10115, i64 1
- %tmp10117 = getelementptr inbounds float* %tmp10116, i64 1
- %tmp10118 = getelementptr inbounds float* %tmp10117, i64 1
- %tmp10119 = getelementptr inbounds float* %tmp10118, i64 1
- %tmp10120 = getelementptr inbounds float* %tmp10119, i64 1
- %tmp10121 = getelementptr inbounds float* %tmp10120, i64 1
- %tmp10122 = getelementptr inbounds float* %tmp10121, i64 1
- %tmp10123 = getelementptr inbounds float* %tmp10122, i64 1
- %tmp10124 = getelementptr inbounds float* %tmp10123, i64 1
- %tmp10125 = getelementptr inbounds float* %tmp10124, i64 1
- %tmp10126 = getelementptr inbounds float* %tmp10125, i64 1
- %tmp10127 = getelementptr inbounds float* %tmp10126, i64 1
- %tmp10128 = getelementptr inbounds float* %tmp10127, i64 1
- %tmp10129 = getelementptr inbounds float* %tmp10128, i64 1
- %tmp10130 = getelementptr inbounds float* %tmp10129, i64 1
- %tmp10131 = getelementptr inbounds float* %tmp10130, i64 1
- %tmp10132 = getelementptr inbounds float* %tmp10131, i64 1
- %tmp10133 = getelementptr inbounds float* %tmp10132, i64 1
- %tmp10134 = getelementptr inbounds float* %tmp10133, i64 1
- %tmp10135 = getelementptr inbounds float* %tmp10134, i64 1
- %tmp10136 = getelementptr inbounds float* %tmp10135, i64 1
- %tmp10137 = getelementptr inbounds float* %tmp10136, i64 1
- %tmp10138 = getelementptr inbounds float* %tmp10137, i64 1
- %tmp10139 = getelementptr inbounds float* %tmp10138, i64 1
- %tmp10140 = getelementptr inbounds float* %tmp10139, i64 1
- %tmp10141 = getelementptr inbounds float* %tmp10140, i64 1
- %tmp10142 = getelementptr inbounds float* %tmp10141, i64 1
- %tmp10143 = getelementptr inbounds float* %tmp10142, i64 1
- %tmp10144 = getelementptr inbounds float* %tmp10143, i64 1
- %tmp10145 = getelementptr inbounds float* %tmp10144, i64 1
- %tmp10146 = getelementptr inbounds float* %tmp10145, i64 1
- %tmp10147 = getelementptr inbounds float* %tmp10146, i64 1
- %tmp10148 = getelementptr inbounds float* %tmp10147, i64 1
- %tmp10149 = getelementptr inbounds float* %tmp10148, i64 1
- %tmp10150 = getelementptr inbounds float* %tmp10149, i64 1
- %tmp10151 = getelementptr inbounds float* %tmp10150, i64 1
- %tmp10152 = getelementptr inbounds float* %tmp10151, i64 1
- %tmp10153 = getelementptr inbounds float* %tmp10152, i64 1
- %tmp10154 = getelementptr inbounds float* %tmp10153, i64 1
- %tmp10155 = getelementptr inbounds float* %tmp10154, i64 1
- %tmp10156 = getelementptr inbounds float* %tmp10155, i64 1
- %tmp10157 = getelementptr inbounds float* %tmp10156, i64 1
- %tmp10158 = getelementptr inbounds float* %tmp10157, i64 1
- %tmp10159 = getelementptr inbounds float* %tmp10158, i64 1
- %tmp10160 = getelementptr inbounds float* %tmp10159, i64 1
- %tmp10161 = getelementptr inbounds float* %tmp10160, i64 1
- %tmp10162 = getelementptr inbounds float* %tmp10161, i64 1
- %tmp10163 = getelementptr inbounds float* %tmp10162, i64 1
- %tmp10164 = getelementptr inbounds float* %tmp10163, i64 1
- %tmp10165 = getelementptr inbounds float* %tmp10164, i64 1
- %tmp10166 = getelementptr inbounds float* %tmp10165, i64 1
- %tmp10167 = getelementptr inbounds float* %tmp10166, i64 1
- %tmp10168 = getelementptr inbounds float* %tmp10167, i64 1
- %tmp10169 = getelementptr inbounds float* %tmp10168, i64 1
- %tmp10170 = getelementptr inbounds float* %tmp10169, i64 1
- %tmp10171 = getelementptr inbounds float* %tmp10170, i64 1
- %tmp10172 = getelementptr inbounds float* %tmp10171, i64 1
- %tmp10173 = getelementptr inbounds float* %tmp10172, i64 1
- %tmp10174 = getelementptr inbounds float* %tmp10173, i64 1
- %tmp10175 = getelementptr inbounds float* %tmp10174, i64 1
- %tmp10176 = getelementptr inbounds float* %tmp10175, i64 1
- %tmp10177 = getelementptr inbounds float* %tmp10176, i64 1
- %tmp10178 = getelementptr inbounds float* %tmp10177, i64 1
- %tmp10179 = getelementptr inbounds float* %tmp10178, i64 1
- %tmp10180 = getelementptr inbounds float* %tmp10179, i64 1
- %tmp10181 = getelementptr inbounds float* %tmp10180, i64 1
- %tmp10182 = getelementptr inbounds float* %tmp10181, i64 1
- %tmp10183 = getelementptr inbounds float* %tmp10182, i64 1
- %tmp10184 = getelementptr inbounds float* %tmp10183, i64 1
- %tmp10185 = getelementptr inbounds float* %tmp10184, i64 1
- %tmp10186 = getelementptr inbounds float* %tmp10185, i64 1
- %tmp10187 = getelementptr inbounds float* %tmp10186, i64 1
- %tmp10188 = getelementptr inbounds float* %tmp10187, i64 1
- %tmp10189 = getelementptr inbounds float* %tmp10188, i64 1
- %tmp10190 = getelementptr inbounds float* %tmp10189, i64 1
- %tmp10191 = getelementptr inbounds float* %tmp10190, i64 1
- %tmp10192 = getelementptr inbounds float* %tmp10191, i64 1
- %tmp10193 = getelementptr inbounds float* %tmp10192, i64 1
- %tmp10194 = getelementptr inbounds float* %tmp10193, i64 1
- %tmp10195 = getelementptr inbounds float* %tmp10194, i64 1
- %tmp10196 = getelementptr inbounds float* %tmp10195, i64 1
- %tmp10197 = getelementptr inbounds float* %tmp10196, i64 1
- %tmp10198 = getelementptr inbounds float* %tmp10197, i64 1
- %tmp10199 = getelementptr inbounds float* %tmp10198, i64 1
- %tmp10200 = getelementptr inbounds float* %tmp10199, i64 1
- %tmp10201 = getelementptr inbounds float* %tmp10200, i64 1
- %tmp10202 = getelementptr inbounds float* %tmp10201, i64 1
- %tmp10203 = getelementptr inbounds float* %tmp10202, i64 1
- %tmp10204 = getelementptr inbounds float* %tmp10203, i64 1
- %tmp10205 = getelementptr inbounds float* %tmp10204, i64 1
- %tmp10206 = getelementptr inbounds float* %tmp10205, i64 1
- %tmp10207 = getelementptr inbounds float* %tmp10206, i64 1
- %tmp10208 = getelementptr inbounds float* %tmp10207, i64 1
- %tmp10209 = getelementptr inbounds float* %tmp10208, i64 1
- %tmp10210 = getelementptr inbounds float* %tmp10209, i64 1
- %tmp10211 = getelementptr inbounds float* %tmp10210, i64 1
- %tmp10212 = getelementptr inbounds float* %tmp10211, i64 1
- %tmp10213 = getelementptr inbounds float* %tmp10212, i64 1
- %tmp10214 = getelementptr inbounds float* %tmp10213, i64 1
- %tmp10215 = getelementptr inbounds float* %tmp10214, i64 1
- %tmp10216 = getelementptr inbounds float* %tmp10215, i64 1
- %tmp10217 = getelementptr inbounds float* %tmp10216, i64 1
- %tmp10218 = getelementptr inbounds float* %tmp10217, i64 1
- %tmp10219 = getelementptr inbounds float* %tmp10218, i64 1
- %tmp10220 = getelementptr inbounds float* %tmp10219, i64 1
- %tmp10221 = getelementptr inbounds float* %tmp10220, i64 1
- %tmp10222 = getelementptr inbounds float* %tmp10221, i64 1
- %tmp10223 = getelementptr inbounds float* %tmp10222, i64 1
- %tmp10224 = getelementptr inbounds float* %tmp10223, i64 1
- %tmp10225 = getelementptr inbounds float* %tmp10224, i64 1
- %tmp10226 = getelementptr inbounds float* %tmp10225, i64 1
- %tmp10227 = getelementptr inbounds float* %tmp10226, i64 1
- %tmp10228 = getelementptr inbounds float* %tmp10227, i64 1
- %tmp10229 = getelementptr inbounds float* %tmp10228, i64 1
- %tmp10230 = getelementptr inbounds float* %tmp10229, i64 1
- %tmp10231 = getelementptr inbounds float* %tmp10230, i64 1
- %tmp10232 = getelementptr inbounds float* %tmp10231, i64 1
- %tmp10233 = getelementptr inbounds float* %tmp10232, i64 1
- %tmp10234 = getelementptr inbounds float* %tmp10233, i64 1
- %tmp10235 = getelementptr inbounds float* %tmp10234, i64 1
- %tmp10236 = getelementptr inbounds float* %tmp10235, i64 1
- %tmp10237 = getelementptr inbounds float* %tmp10236, i64 1
- %tmp10238 = getelementptr inbounds float* %tmp10237, i64 1
- %tmp10239 = getelementptr inbounds float* %tmp10238, i64 1
- %tmp10240 = getelementptr inbounds float* %tmp10239, i64 1
- %tmp10241 = getelementptr inbounds float* %tmp10240, i64 1
- %tmp10242 = getelementptr inbounds float* %tmp10241, i64 1
- %tmp10243 = getelementptr inbounds float* %tmp10242, i64 1
- %tmp10244 = getelementptr inbounds float* %tmp10243, i64 1
- %tmp10245 = getelementptr inbounds float* %tmp10244, i64 1
- %tmp10246 = getelementptr inbounds float* %tmp10245, i64 1
- %tmp10247 = getelementptr inbounds float* %tmp10246, i64 1
- %tmp10248 = getelementptr inbounds float* %tmp10247, i64 1
- %tmp10249 = getelementptr inbounds float* %tmp10248, i64 1
- %tmp10250 = getelementptr inbounds float* %tmp10249, i64 1
- %tmp10251 = getelementptr inbounds float* %tmp10250, i64 1
- %tmp10252 = getelementptr inbounds float* %tmp10251, i64 1
- %tmp10253 = getelementptr inbounds float* %tmp10252, i64 1
- %tmp10254 = getelementptr inbounds float* %tmp10253, i64 1
- %tmp10255 = getelementptr inbounds float* %tmp10254, i64 1
- %tmp10256 = getelementptr inbounds float* %tmp10255, i64 1
- %tmp10257 = getelementptr inbounds float* %tmp10256, i64 1
- %tmp10258 = getelementptr inbounds float* %tmp10257, i64 1
- %tmp10259 = getelementptr inbounds float* %tmp10258, i64 1
- %tmp10260 = getelementptr inbounds float* %tmp10259, i64 1
- %tmp10261 = getelementptr inbounds float* %tmp10260, i64 1
- %tmp10262 = getelementptr inbounds float* %tmp10261, i64 1
- %tmp10263 = getelementptr inbounds float* %tmp10262, i64 1
- %tmp10264 = getelementptr inbounds float* %tmp10263, i64 1
- %tmp10265 = getelementptr inbounds float* %tmp10264, i64 1
- %tmp10266 = getelementptr inbounds float* %tmp10265, i64 1
- %tmp10267 = getelementptr inbounds float* %tmp10266, i64 1
- %tmp10268 = getelementptr inbounds float* %tmp10267, i64 1
- %tmp10269 = getelementptr inbounds float* %tmp10268, i64 1
- %tmp10270 = getelementptr inbounds float* %tmp10269, i64 1
- %tmp10271 = getelementptr inbounds float* %tmp10270, i64 1
- %tmp10272 = getelementptr inbounds float* %tmp10271, i64 1
- %tmp10273 = getelementptr inbounds float* %tmp10272, i64 1
- %tmp10274 = getelementptr inbounds float* %tmp10273, i64 1
- %tmp10275 = getelementptr inbounds float* %tmp10274, i64 1
- %tmp10276 = getelementptr inbounds float* %tmp10275, i64 1
- %tmp10277 = getelementptr inbounds float* %tmp10276, i64 1
- %tmp10278 = getelementptr inbounds float* %tmp10277, i64 1
- %tmp10279 = getelementptr inbounds float* %tmp10278, i64 1
- %tmp10280 = getelementptr inbounds float* %tmp10279, i64 1
- %tmp10281 = getelementptr inbounds float* %tmp10280, i64 1
- %tmp10282 = getelementptr inbounds float* %tmp10281, i64 1
- %tmp10283 = getelementptr inbounds float* %tmp10282, i64 1
- %tmp10284 = getelementptr inbounds float* %tmp10283, i64 1
- %tmp10285 = getelementptr inbounds float* %tmp10284, i64 1
- %tmp10286 = getelementptr inbounds float* %tmp10285, i64 1
- %tmp10287 = getelementptr inbounds float* %tmp10286, i64 1
- %tmp10288 = getelementptr inbounds float* %tmp10287, i64 1
- %tmp10289 = getelementptr inbounds float* %tmp10288, i64 1
- %tmp10290 = getelementptr inbounds float* %tmp10289, i64 1
- %tmp10291 = getelementptr inbounds float* %tmp10290, i64 1
- %tmp10292 = getelementptr inbounds float* %tmp10291, i64 1
- %tmp10293 = getelementptr inbounds float* %tmp10292, i64 1
- %tmp10294 = getelementptr inbounds float* %tmp10293, i64 1
- %tmp10295 = getelementptr inbounds float* %tmp10294, i64 1
- %tmp10296 = getelementptr inbounds float* %tmp10295, i64 1
- %tmp10297 = getelementptr inbounds float* %tmp10296, i64 1
- %tmp10298 = getelementptr inbounds float* %tmp10297, i64 1
- %tmp10299 = getelementptr inbounds float* %tmp10298, i64 1
- %tmp10300 = getelementptr inbounds float* %tmp10299, i64 1
- %tmp10301 = getelementptr inbounds float* %tmp10300, i64 1
- %tmp10302 = getelementptr inbounds float* %tmp10301, i64 1
- %tmp10303 = getelementptr inbounds float* %tmp10302, i64 1
- %tmp10304 = getelementptr inbounds float* %tmp10303, i64 1
- %tmp10305 = getelementptr inbounds float* %tmp10304, i64 1
- %tmp10306 = getelementptr inbounds float* %tmp10305, i64 1
- %tmp10307 = getelementptr inbounds float* %tmp10306, i64 1
- %tmp10308 = getelementptr inbounds float* %tmp10307, i64 1
- %tmp10309 = getelementptr inbounds float* %tmp10308, i64 1
- %tmp10310 = getelementptr inbounds float* %tmp10309, i64 1
- %tmp10311 = getelementptr inbounds float* %tmp10310, i64 1
- %tmp10312 = getelementptr inbounds float* %tmp10311, i64 1
- %tmp10313 = getelementptr inbounds float* %tmp10312, i64 1
- %tmp10314 = getelementptr inbounds float* %tmp10313, i64 1
- %tmp10315 = getelementptr inbounds float* %tmp10314, i64 1
- %tmp10316 = getelementptr inbounds float* %tmp10315, i64 1
- %tmp10317 = getelementptr inbounds float* %tmp10316, i64 1
- %tmp10318 = getelementptr inbounds float* %tmp10317, i64 1
- %tmp10319 = getelementptr inbounds float* %tmp10318, i64 1
- %tmp10320 = getelementptr inbounds float* %tmp10319, i64 1
- %tmp10321 = getelementptr inbounds float* %tmp10320, i64 1
- %tmp10322 = getelementptr inbounds float* %tmp10321, i64 1
- %tmp10323 = getelementptr inbounds float* %tmp10322, i64 1
- %tmp10324 = getelementptr inbounds float* %tmp10323, i64 1
- %tmp10325 = getelementptr inbounds float* %tmp10324, i64 1
- %tmp10326 = getelementptr inbounds float* %tmp10325, i64 1
- %tmp10327 = getelementptr inbounds float* %tmp10326, i64 1
- %tmp10328 = getelementptr inbounds float* %tmp10327, i64 1
- %tmp10329 = getelementptr inbounds float* %tmp10328, i64 1
- %tmp10330 = getelementptr inbounds float* %tmp10329, i64 1
- %tmp10331 = getelementptr inbounds float* %tmp10330, i64 1
- %tmp10332 = getelementptr inbounds float* %tmp10331, i64 1
- %tmp10333 = getelementptr inbounds float* %tmp10332, i64 1
- %tmp10334 = getelementptr inbounds float* %tmp10333, i64 1
- %tmp10335 = getelementptr inbounds float* %tmp10334, i64 1
- %tmp10336 = getelementptr inbounds float* %tmp10335, i64 1
- %tmp10337 = getelementptr inbounds float* %tmp10336, i64 1
- %tmp10338 = getelementptr inbounds float* %tmp10337, i64 1
- %tmp10339 = getelementptr inbounds float* %tmp10338, i64 1
- %tmp10340 = getelementptr inbounds float* %tmp10339, i64 1
- %tmp10341 = getelementptr inbounds float* %tmp10340, i64 1
- %tmp10342 = getelementptr inbounds float* %tmp10341, i64 1
- %tmp10343 = getelementptr inbounds float* %tmp10342, i64 1
- %tmp10344 = getelementptr inbounds float* %tmp10343, i64 1
- %tmp10345 = getelementptr inbounds float* %tmp10344, i64 1
- %tmp10346 = getelementptr inbounds float* %tmp10345, i64 1
- %tmp10347 = getelementptr inbounds float* %tmp10346, i64 1
- %tmp10348 = getelementptr inbounds float* %tmp10347, i64 1
- %tmp10349 = getelementptr inbounds float* %tmp10348, i64 1
- %tmp10350 = getelementptr inbounds float* %tmp10349, i64 1
- %tmp10351 = getelementptr inbounds float* %tmp10350, i64 1
- %tmp10352 = getelementptr inbounds float* %tmp10351, i64 1
- %tmp10353 = getelementptr inbounds float* %tmp10352, i64 1
- %tmp10354 = getelementptr inbounds float* %tmp10353, i64 1
- %tmp10355 = getelementptr inbounds float* %tmp10354, i64 1
- %tmp10356 = getelementptr inbounds float* %tmp10355, i64 1
- %tmp10357 = getelementptr inbounds float* %tmp10356, i64 1
- %tmp10358 = getelementptr inbounds float* %tmp10357, i64 1
- %tmp10359 = getelementptr inbounds float* %tmp10358, i64 1
- %tmp10360 = getelementptr inbounds float* %tmp10359, i64 1
- %tmp10361 = getelementptr inbounds float* %tmp10360, i64 1
- %tmp10362 = getelementptr inbounds float* %tmp10361, i64 1
- %tmp10363 = getelementptr inbounds float* %tmp10362, i64 1
- %tmp10364 = getelementptr inbounds float* %tmp10363, i64 1
- %tmp10365 = getelementptr inbounds float* %tmp10364, i64 1
- %tmp10366 = getelementptr inbounds float* %tmp10365, i64 1
- %tmp10367 = getelementptr inbounds float* %tmp10366, i64 1
- %tmp10368 = getelementptr inbounds float* %tmp10367, i64 1
- %tmp10369 = getelementptr inbounds float* %tmp10368, i64 1
- %tmp10370 = getelementptr inbounds float* %tmp10369, i64 1
- %tmp10371 = getelementptr inbounds float* %tmp10370, i64 1
- %tmp10372 = getelementptr inbounds float* %tmp10371, i64 1
- %tmp10373 = getelementptr inbounds float* %tmp10372, i64 1
- %tmp10374 = getelementptr inbounds float* %tmp10373, i64 1
- %tmp10375 = getelementptr inbounds float* %tmp10374, i64 1
- %tmp10376 = getelementptr inbounds float* %tmp10375, i64 1
- %tmp10377 = getelementptr inbounds float* %tmp10376, i64 1
- %tmp10378 = getelementptr inbounds float* %tmp10377, i64 1
- %tmp10379 = getelementptr inbounds float* %tmp10378, i64 1
- %tmp10380 = getelementptr inbounds float* %tmp10379, i64 1
- %tmp10381 = getelementptr inbounds float* %tmp10380, i64 1
- %tmp10382 = getelementptr inbounds float* %tmp10381, i64 1
- %tmp10383 = getelementptr inbounds float* %tmp10382, i64 1
- %tmp10384 = getelementptr inbounds float* %tmp10383, i64 1
- %tmp10385 = getelementptr inbounds float* %tmp10384, i64 1
- %tmp10386 = getelementptr inbounds float* %tmp10385, i64 1
- %tmp10387 = getelementptr inbounds float* %tmp10386, i64 1
- %tmp10388 = getelementptr inbounds float* %tmp10387, i64 1
- %tmp10389 = getelementptr inbounds float* %tmp10388, i64 1
- %tmp10390 = getelementptr inbounds float* %tmp10389, i64 1
- %tmp10391 = getelementptr inbounds float* %tmp10390, i64 1
- %tmp10392 = getelementptr inbounds float* %tmp10391, i64 1
- %tmp10393 = getelementptr inbounds float* %tmp10392, i64 1
- %tmp10394 = getelementptr inbounds float* %tmp10393, i64 1
- %tmp10395 = getelementptr inbounds float* %tmp10394, i64 1
- %tmp10396 = getelementptr inbounds float* %tmp10395, i64 1
- %tmp10397 = getelementptr inbounds float* %tmp10396, i64 1
- %tmp10398 = getelementptr inbounds float* %tmp10397, i64 1
- %tmp10399 = getelementptr inbounds float* %tmp10398, i64 1
- %tmp10400 = getelementptr inbounds float* %tmp10399, i64 1
- %tmp10401 = getelementptr inbounds float* %tmp10400, i64 1
- %tmp10402 = getelementptr inbounds float* %tmp10401, i64 1
- %tmp10403 = getelementptr inbounds float* %tmp10402, i64 1
- %tmp10404 = getelementptr inbounds float* %tmp10403, i64 1
- %tmp10405 = getelementptr inbounds float* %tmp10404, i64 1
- %tmp10406 = getelementptr inbounds float* %tmp10405, i64 1
- %tmp10407 = getelementptr inbounds float* %tmp10406, i64 1
- %tmp10408 = getelementptr inbounds float* %tmp10407, i64 1
- %tmp10409 = getelementptr inbounds float* %tmp10408, i64 1
- %tmp10410 = getelementptr inbounds float* %tmp10409, i64 1
- %tmp10411 = getelementptr inbounds float* %tmp10410, i64 1
- %tmp10412 = getelementptr inbounds float* %tmp10411, i64 1
- %tmp10413 = getelementptr inbounds float* %tmp10412, i64 1
- %tmp10414 = getelementptr inbounds float* %tmp10413, i64 1
- %tmp10415 = getelementptr inbounds float* %tmp10414, i64 1
- %tmp10416 = getelementptr inbounds float* %tmp10415, i64 1
- %tmp10417 = getelementptr inbounds float* %tmp10416, i64 1
- %tmp10418 = getelementptr inbounds float* %tmp10417, i64 1
- %tmp10419 = getelementptr inbounds float* %tmp10418, i64 1
- %tmp10420 = getelementptr inbounds float* %tmp10419, i64 1
- %tmp10421 = getelementptr inbounds float* %tmp10420, i64 1
- %tmp10422 = getelementptr inbounds float* %tmp10421, i64 1
- %tmp10423 = getelementptr inbounds float* %tmp10422, i64 1
- %tmp10424 = getelementptr inbounds float* %tmp10423, i64 1
- %tmp10425 = getelementptr inbounds float* %tmp10424, i64 1
- %tmp10426 = getelementptr inbounds float* %tmp10425, i64 1
- %tmp10427 = getelementptr inbounds float* %tmp10426, i64 1
- %tmp10428 = getelementptr inbounds float* %tmp10427, i64 1
- %tmp10429 = getelementptr inbounds float* %tmp10428, i64 1
- %tmp10430 = getelementptr inbounds float* %tmp10429, i64 1
- %tmp10431 = getelementptr inbounds float* %tmp10430, i64 1
- %tmp10432 = getelementptr inbounds float* %tmp10431, i64 1
- %tmp10433 = getelementptr inbounds float* %tmp10432, i64 1
- %tmp10434 = getelementptr inbounds float* %tmp10433, i64 1
- %tmp10435 = getelementptr inbounds float* %tmp10434, i64 1
- %tmp10436 = getelementptr inbounds float* %tmp10435, i64 1
- %tmp10437 = getelementptr inbounds float* %tmp10436, i64 1
- %tmp10438 = getelementptr inbounds float* %tmp10437, i64 1
- %tmp10439 = getelementptr inbounds float* %tmp10438, i64 1
- %tmp10440 = getelementptr inbounds float* %tmp10439, i64 1
- %tmp10441 = getelementptr inbounds float* %tmp10440, i64 1
- %tmp10442 = getelementptr inbounds float* %tmp10441, i64 1
- %tmp10443 = getelementptr inbounds float* %tmp10442, i64 1
- %tmp10444 = getelementptr inbounds float* %tmp10443, i64 1
- %tmp10445 = getelementptr inbounds float* %tmp10444, i64 1
- %tmp10446 = getelementptr inbounds float* %tmp10445, i64 1
- %tmp10447 = getelementptr inbounds float* %tmp10446, i64 1
- %tmp10448 = getelementptr inbounds float* %tmp10447, i64 1
- %tmp10449 = getelementptr inbounds float* %tmp10448, i64 1
- %tmp10450 = getelementptr inbounds float* %tmp10449, i64 1
- %tmp10451 = getelementptr inbounds float* %tmp10450, i64 1
- %tmp10452 = getelementptr inbounds float* %tmp10451, i64 1
- %tmp10453 = getelementptr inbounds float* %tmp10452, i64 1
- %tmp10454 = getelementptr inbounds float* %tmp10453, i64 1
- %tmp10455 = getelementptr inbounds float* %tmp10454, i64 1
- %tmp10456 = getelementptr inbounds float* %tmp10455, i64 1
- %tmp10457 = getelementptr inbounds float* %tmp10456, i64 1
- %tmp10458 = getelementptr inbounds float* %tmp10457, i64 1
- %tmp10459 = getelementptr inbounds float* %tmp10458, i64 1
- %tmp10460 = getelementptr inbounds float* %tmp10459, i64 1
- %tmp10461 = getelementptr inbounds float* %tmp10460, i64 1
- %tmp10462 = getelementptr inbounds float* %tmp10461, i64 1
- %tmp10463 = getelementptr inbounds float* %tmp10462, i64 1
- %tmp10464 = getelementptr inbounds float* %tmp10463, i64 1
- %tmp10465 = getelementptr inbounds float* %tmp10464, i64 1
- %tmp10466 = getelementptr inbounds float* %tmp10465, i64 1
- %tmp10467 = getelementptr inbounds float* %tmp10466, i64 1
- %tmp10468 = getelementptr inbounds float* %tmp10467, i64 1
- %tmp10469 = getelementptr inbounds float* %tmp10468, i64 1
- %tmp10470 = getelementptr inbounds float* %tmp10469, i64 1
- %tmp10471 = getelementptr inbounds float* %tmp10470, i64 1
- %tmp10472 = getelementptr inbounds float* %tmp10471, i64 1
- %tmp10473 = getelementptr inbounds float* %tmp10472, i64 1
- %tmp10474 = getelementptr inbounds float* %tmp10473, i64 1
- %tmp10475 = getelementptr inbounds float* %tmp10474, i64 1
- %tmp10476 = getelementptr inbounds float* %tmp10475, i64 1
- %tmp10477 = getelementptr inbounds float* %tmp10476, i64 1
- %tmp10478 = getelementptr inbounds float* %tmp10477, i64 1
- %tmp10479 = getelementptr inbounds float* %tmp10478, i64 1
- %tmp10480 = getelementptr inbounds float* %tmp10479, i64 1
- %tmp10481 = getelementptr inbounds float* %tmp10480, i64 1
- %tmp10482 = getelementptr inbounds float* %tmp10481, i64 1
- %tmp10483 = getelementptr inbounds float* %tmp10482, i64 1
- %tmp10484 = getelementptr inbounds float* %tmp10483, i64 1
- %tmp10485 = getelementptr inbounds float* %tmp10484, i64 1
- %tmp10486 = getelementptr inbounds float* %tmp10485, i64 1
- %tmp10487 = getelementptr inbounds float* %tmp10486, i64 1
- %tmp10488 = getelementptr inbounds float* %tmp10487, i64 1
- %tmp10489 = getelementptr inbounds float* %tmp10488, i64 1
- %tmp10490 = getelementptr inbounds float* %tmp10489, i64 1
- %tmp10491 = getelementptr inbounds float* %tmp10490, i64 1
- %tmp10492 = getelementptr inbounds float* %tmp10491, i64 1
- %tmp10493 = getelementptr inbounds float* %tmp10492, i64 1
- %tmp10494 = getelementptr inbounds float* %tmp10493, i64 1
- %tmp10495 = getelementptr inbounds float* %tmp10494, i64 1
- %tmp10496 = getelementptr inbounds float* %tmp10495, i64 1
- %tmp10497 = getelementptr inbounds float* %tmp10496, i64 1
- %tmp10498 = getelementptr inbounds float* %tmp10497, i64 1
- %tmp10499 = getelementptr inbounds float* %tmp10498, i64 1
- %tmp10500 = getelementptr inbounds float* %tmp10499, i64 1
- %tmp10501 = getelementptr inbounds float* %tmp10500, i64 1
- %tmp10502 = getelementptr inbounds float* %tmp10501, i64 1
- %tmp10503 = getelementptr inbounds float* %tmp10502, i64 1
- %tmp10504 = getelementptr inbounds float* %tmp10503, i64 1
- %tmp10505 = getelementptr inbounds float* %tmp10504, i64 1
- %tmp10506 = getelementptr inbounds float* %tmp10505, i64 1
- %tmp10507 = getelementptr inbounds float* %tmp10506, i64 1
- %tmp10508 = getelementptr inbounds float* %tmp10507, i64 1
- %tmp10509 = getelementptr inbounds float* %tmp10508, i64 1
- %tmp10510 = getelementptr inbounds float* %tmp10509, i64 1
- %tmp10511 = getelementptr inbounds float* %tmp10510, i64 1
- %tmp10512 = getelementptr inbounds float* %tmp10511, i64 1
- %tmp10513 = getelementptr inbounds float* %tmp10512, i64 1
- %tmp10514 = getelementptr inbounds float* %tmp10513, i64 1
- %tmp10515 = getelementptr inbounds float* %tmp10514, i64 1
- %tmp10516 = getelementptr inbounds float* %tmp10515, i64 1
- %tmp10517 = getelementptr inbounds float* %tmp10516, i64 1
- %tmp10518 = getelementptr inbounds float* %tmp10517, i64 1
- %tmp10519 = getelementptr inbounds float* %tmp10518, i64 1
- %tmp10520 = getelementptr inbounds float* %tmp10519, i64 1
- %tmp10521 = getelementptr inbounds float* %tmp10520, i64 1
- %tmp10522 = getelementptr inbounds float* %tmp10521, i64 1
- %tmp10523 = getelementptr inbounds float* %tmp10522, i64 1
- %tmp10524 = getelementptr inbounds float* %tmp10523, i64 1
- %tmp10525 = getelementptr inbounds float* %tmp10524, i64 1
- %tmp10526 = getelementptr inbounds float* %tmp10525, i64 1
- %tmp10527 = getelementptr inbounds float* %tmp10526, i64 1
- %tmp10528 = getelementptr inbounds float* %tmp10527, i64 1
- %tmp10529 = getelementptr inbounds float* %tmp10528, i64 1
- %tmp10530 = getelementptr inbounds float* %tmp10529, i64 1
- %tmp10531 = getelementptr inbounds float* %tmp10530, i64 1
- %tmp10532 = getelementptr inbounds float* %tmp10531, i64 1
- %tmp10533 = getelementptr inbounds float* %tmp10532, i64 1
- %tmp10534 = getelementptr inbounds float* %tmp10533, i64 1
- %tmp10535 = getelementptr inbounds float* %tmp10534, i64 1
- %tmp10536 = getelementptr inbounds float* %tmp10535, i64 1
- %tmp10537 = getelementptr inbounds float* %tmp10536, i64 1
- %tmp10538 = getelementptr inbounds float* %tmp10537, i64 1
- %tmp10539 = getelementptr inbounds float* %tmp10538, i64 1
- %tmp10540 = getelementptr inbounds float* %tmp10539, i64 1
- %tmp10541 = getelementptr inbounds float* %tmp10540, i64 1
- %tmp10542 = getelementptr inbounds float* %tmp10541, i64 1
- %tmp10543 = getelementptr inbounds float* %tmp10542, i64 1
- %tmp10544 = getelementptr inbounds float* %tmp10543, i64 1
- %tmp10545 = getelementptr inbounds float* %tmp10544, i64 1
- %tmp10546 = getelementptr inbounds float* %tmp10545, i64 1
- %tmp10547 = getelementptr inbounds float* %tmp10546, i64 1
- %tmp10548 = getelementptr inbounds float* %tmp10547, i64 1
- %tmp10549 = getelementptr inbounds float* %tmp10548, i64 1
- %tmp10550 = getelementptr inbounds float* %tmp10549, i64 1
- %tmp10551 = getelementptr inbounds float* %tmp10550, i64 1
- %tmp10552 = getelementptr inbounds float* %tmp10551, i64 1
- %tmp10553 = getelementptr inbounds float* %tmp10552, i64 1
- %tmp10554 = getelementptr inbounds float* %tmp10553, i64 1
- %tmp10555 = getelementptr inbounds float* %tmp10554, i64 1
- %tmp10556 = getelementptr inbounds float* %tmp10555, i64 1
- %tmp10557 = getelementptr inbounds float* %tmp10556, i64 1
- %tmp10558 = getelementptr inbounds float* %tmp10557, i64 1
- %tmp10559 = getelementptr inbounds float* %tmp10558, i64 1
- %tmp10560 = getelementptr inbounds float* %tmp10559, i64 1
- %tmp10561 = getelementptr inbounds float* %tmp10560, i64 1
- %tmp10562 = getelementptr inbounds float* %tmp10561, i64 1
- %tmp10563 = getelementptr inbounds float* %tmp10562, i64 1
- %tmp10564 = getelementptr inbounds float* %tmp10563, i64 1
- %tmp10565 = getelementptr inbounds float* %tmp10564, i64 1
- %tmp10566 = getelementptr inbounds float* %tmp10565, i64 1
- %tmp10567 = getelementptr inbounds float* %tmp10566, i64 1
- %tmp10568 = getelementptr inbounds float* %tmp10567, i64 1
- %tmp10569 = getelementptr inbounds float* %tmp10568, i64 1
- %tmp10570 = getelementptr inbounds float* %tmp10569, i64 1
- %tmp10571 = getelementptr inbounds float* %tmp10570, i64 1
- %tmp10572 = getelementptr inbounds float* %tmp10571, i64 1
- %tmp10573 = getelementptr inbounds float* %tmp10572, i64 1
- %tmp10574 = getelementptr inbounds float* %tmp10573, i64 1
- %tmp10575 = getelementptr inbounds float* %tmp10574, i64 1
- %tmp10576 = getelementptr inbounds float* %tmp10575, i64 1
- %tmp10577 = getelementptr inbounds float* %tmp10576, i64 1
- %tmp10578 = getelementptr inbounds float* %tmp10577, i64 1
- %tmp10579 = getelementptr inbounds float* %tmp10578, i64 1
- %tmp10580 = getelementptr inbounds float* %tmp10579, i64 1
- %tmp10581 = getelementptr inbounds float* %tmp10580, i64 1
- %tmp10582 = getelementptr inbounds float* %tmp10581, i64 1
- %tmp10583 = getelementptr inbounds float* %tmp10582, i64 1
- %tmp10584 = getelementptr inbounds float* %tmp10583, i64 1
- %tmp10585 = getelementptr inbounds float* %tmp10584, i64 1
- %tmp10586 = getelementptr inbounds float* %tmp10585, i64 1
- %tmp10587 = getelementptr inbounds float* %tmp10586, i64 1
- %tmp10588 = getelementptr inbounds float* %tmp10587, i64 1
- %tmp10589 = getelementptr inbounds float* %tmp10588, i64 1
- %tmp10590 = getelementptr inbounds float* %tmp10589, i64 1
- %tmp10591 = getelementptr inbounds float* %tmp10590, i64 1
- %tmp10592 = getelementptr inbounds float* %tmp10591, i64 1
- %tmp10593 = getelementptr inbounds float* %tmp10592, i64 1
- %tmp10594 = getelementptr inbounds float* %tmp10593, i64 1
- %tmp10595 = getelementptr inbounds float* %tmp10594, i64 1
- %tmp10596 = getelementptr inbounds float* %tmp10595, i64 1
- %tmp10597 = getelementptr inbounds float* %tmp10596, i64 1
- %tmp10598 = getelementptr inbounds float* %tmp10597, i64 1
- %tmp10599 = getelementptr inbounds float* %tmp10598, i64 1
- %tmp10600 = getelementptr inbounds float* %tmp10599, i64 1
- %tmp10601 = getelementptr inbounds float* %tmp10600, i64 1
- %tmp10602 = getelementptr inbounds float* %tmp10601, i64 1
- %tmp10603 = getelementptr inbounds float* %tmp10602, i64 1
- %tmp10604 = getelementptr inbounds float* %tmp10603, i64 1
- %tmp10605 = getelementptr inbounds float* %tmp10604, i64 1
- %tmp10606 = getelementptr inbounds float* %tmp10605, i64 1
- %tmp10607 = getelementptr inbounds float* %tmp10606, i64 1
- %tmp10608 = getelementptr inbounds float* %tmp10607, i64 1
- %tmp10609 = getelementptr inbounds float* %tmp10608, i64 1
- %tmp10610 = getelementptr inbounds float* %tmp10609, i64 1
- %tmp10611 = getelementptr inbounds float* %tmp10610, i64 1
- %tmp10612 = getelementptr inbounds float* %tmp10611, i64 1
- %tmp10613 = getelementptr inbounds float* %tmp10612, i64 1
- %tmp10614 = getelementptr inbounds float* %tmp10613, i64 1
- %tmp10615 = getelementptr inbounds float* %tmp10614, i64 1
- %tmp10616 = getelementptr inbounds float* %tmp10615, i64 1
- %tmp10617 = getelementptr inbounds float* %tmp10616, i64 1
- %tmp10618 = getelementptr inbounds float* %tmp10617, i64 1
- %tmp10619 = getelementptr inbounds float* %tmp10618, i64 1
- %tmp10620 = getelementptr inbounds float* %tmp10619, i64 1
- %tmp10621 = getelementptr inbounds float* %tmp10620, i64 1
- %tmp10622 = getelementptr inbounds float* %tmp10621, i64 1
- %tmp10623 = getelementptr inbounds float* %tmp10622, i64 1
- %tmp10624 = getelementptr inbounds float* %tmp10623, i64 1
- %tmp10625 = getelementptr inbounds float* %tmp10624, i64 1
- %tmp10626 = getelementptr inbounds float* %tmp10625, i64 1
- %tmp10627 = getelementptr inbounds float* %tmp10626, i64 1
- %tmp10628 = getelementptr inbounds float* %tmp10627, i64 1
- %tmp10629 = getelementptr inbounds float* %tmp10628, i64 1
- %tmp10630 = getelementptr inbounds float* %tmp10629, i64 1
- %tmp10631 = getelementptr inbounds float* %tmp10630, i64 1
- %tmp10632 = getelementptr inbounds float* %tmp10631, i64 1
- %tmp10633 = getelementptr inbounds float* %tmp10632, i64 1
- %tmp10634 = getelementptr inbounds float* %tmp10633, i64 1
- %tmp10635 = getelementptr inbounds float* %tmp10634, i64 1
- %tmp10636 = getelementptr inbounds float* %tmp10635, i64 1
- %tmp10637 = getelementptr inbounds float* %tmp10636, i64 1
- %tmp10638 = getelementptr inbounds float* %tmp10637, i64 1
- %tmp10639 = getelementptr inbounds float* %tmp10638, i64 1
- %tmp10640 = getelementptr inbounds float* %tmp10639, i64 1
- %tmp10641 = getelementptr inbounds float* %tmp10640, i64 1
- %tmp10642 = getelementptr inbounds float* %tmp10641, i64 1
- %tmp10643 = getelementptr inbounds float* %tmp10642, i64 1
- %tmp10644 = getelementptr inbounds float* %tmp10643, i64 1
- %tmp10645 = getelementptr inbounds float* %tmp10644, i64 1
- %tmp10646 = getelementptr inbounds float* %tmp10645, i64 1
- %tmp10647 = getelementptr inbounds float* %tmp10646, i64 1
- %tmp10648 = getelementptr inbounds float* %tmp10647, i64 1
- %tmp10649 = getelementptr inbounds float* %tmp10648, i64 1
- %tmp10650 = getelementptr inbounds float* %tmp10649, i64 1
- %tmp10651 = getelementptr inbounds float* %tmp10650, i64 1
- %tmp10652 = getelementptr inbounds float* %tmp10651, i64 1
- %tmp10653 = getelementptr inbounds float* %tmp10652, i64 1
- %tmp10654 = getelementptr inbounds float* %tmp10653, i64 1
- %tmp10655 = getelementptr inbounds float* %tmp10654, i64 1
- %tmp10656 = getelementptr inbounds float* %tmp10655, i64 1
- %tmp10657 = getelementptr inbounds float* %tmp10656, i64 1
- %tmp10658 = getelementptr inbounds float* %tmp10657, i64 1
- %tmp10659 = getelementptr inbounds float* %tmp10658, i64 1
- %tmp10660 = getelementptr inbounds float* %tmp10659, i64 1
- %tmp10661 = getelementptr inbounds float* %tmp10660, i64 1
- %tmp10662 = getelementptr inbounds float* %tmp10661, i64 1
- %tmp10663 = getelementptr inbounds float* %tmp10662, i64 1
- %tmp10664 = getelementptr inbounds float* %tmp10663, i64 1
- %tmp10665 = getelementptr inbounds float* %tmp10664, i64 1
- %tmp10666 = getelementptr inbounds float* %tmp10665, i64 1
- %tmp10667 = getelementptr inbounds float* %tmp10666, i64 1
- %tmp10668 = getelementptr inbounds float* %tmp10667, i64 1
- %tmp10669 = getelementptr inbounds float* %tmp10668, i64 1
- %tmp10670 = getelementptr inbounds float* %tmp10669, i64 1
- %tmp10671 = getelementptr inbounds float* %tmp10670, i64 1
- %tmp10672 = getelementptr inbounds float* %tmp10671, i64 1
- %tmp10673 = getelementptr inbounds float* %tmp10672, i64 1
- %tmp10674 = getelementptr inbounds float* %tmp10673, i64 1
- %tmp10675 = getelementptr inbounds float* %tmp10674, i64 1
- %tmp10676 = getelementptr inbounds float* %tmp10675, i64 1
- %tmp10677 = getelementptr inbounds float* %tmp10676, i64 1
- %tmp10678 = getelementptr inbounds float* %tmp10677, i64 1
- %tmp10679 = getelementptr inbounds float* %tmp10678, i64 1
- %tmp10680 = getelementptr inbounds float* %tmp10679, i64 1
- %tmp10681 = getelementptr inbounds float* %tmp10680, i64 1
- %tmp10682 = getelementptr inbounds float* %tmp10681, i64 1
- %tmp10683 = getelementptr inbounds float* %tmp10682, i64 1
- %tmp10684 = getelementptr inbounds float* %tmp10683, i64 1
- %tmp10685 = getelementptr inbounds float* %tmp10684, i64 1
- %tmp10686 = getelementptr inbounds float* %tmp10685, i64 1
- %tmp10687 = getelementptr inbounds float* %tmp10686, i64 1
- %tmp10688 = getelementptr inbounds float* %tmp10687, i64 1
- %tmp10689 = getelementptr inbounds float* %tmp10688, i64 1
- %tmp10690 = getelementptr inbounds float* %tmp10689, i64 1
- %tmp10691 = getelementptr inbounds float* %tmp10690, i64 1
- %tmp10692 = getelementptr inbounds float* %tmp10691, i64 1
- %tmp10693 = getelementptr inbounds float* %tmp10692, i64 1
- %tmp10694 = getelementptr inbounds float* %tmp10693, i64 1
- %tmp10695 = getelementptr inbounds float* %tmp10694, i64 1
- %tmp10696 = getelementptr inbounds float* %tmp10695, i64 1
- %tmp10697 = getelementptr inbounds float* %tmp10696, i64 1
- %tmp10698 = getelementptr inbounds float* %tmp10697, i64 1
- %tmp10699 = getelementptr inbounds float* %tmp10698, i64 1
- %tmp10700 = getelementptr inbounds float* %tmp10699, i64 1
- %tmp10701 = getelementptr inbounds float* %tmp10700, i64 1
- %tmp10702 = getelementptr inbounds float* %tmp10701, i64 1
- %tmp10703 = getelementptr inbounds float* %tmp10702, i64 1
- %tmp10704 = getelementptr inbounds float* %tmp10703, i64 1
- %tmp10705 = getelementptr inbounds float* %tmp10704, i64 1
- %tmp10706 = getelementptr inbounds float* %tmp10705, i64 1
- %tmp10707 = getelementptr inbounds float* %tmp10706, i64 1
- %tmp10708 = getelementptr inbounds float* %tmp10707, i64 1
- %tmp10709 = getelementptr inbounds float* %tmp10708, i64 1
- %tmp10710 = getelementptr inbounds float* %tmp10709, i64 1
- %tmp10711 = getelementptr inbounds float* %tmp10710, i64 1
- %tmp10712 = getelementptr inbounds float* %tmp10711, i64 1
- %tmp10713 = getelementptr inbounds float* %tmp10712, i64 1
- %tmp10714 = getelementptr inbounds float* %tmp10713, i64 1
- %tmp10715 = getelementptr inbounds float* %tmp10714, i64 1
- %tmp10716 = getelementptr inbounds float* %tmp10715, i64 1
- %tmp10717 = getelementptr inbounds float* %tmp10716, i64 1
- %tmp10718 = getelementptr inbounds float* %tmp10717, i64 1
- %tmp10719 = getelementptr inbounds float* %tmp10718, i64 1
- %tmp10720 = getelementptr inbounds float* %tmp10719, i64 1
- %tmp10721 = getelementptr inbounds float* %tmp10720, i64 1
- %tmp10722 = getelementptr inbounds float* %tmp10721, i64 1
- %tmp10723 = getelementptr inbounds float* %tmp10722, i64 1
- %tmp10724 = getelementptr inbounds float* %tmp10723, i64 1
- %tmp10725 = getelementptr inbounds float* %tmp10724, i64 1
- %tmp10726 = getelementptr inbounds float* %tmp10725, i64 1
- %tmp10727 = getelementptr inbounds float* %tmp10726, i64 1
- %tmp10728 = getelementptr inbounds float* %tmp10727, i64 1
- %tmp10729 = getelementptr inbounds float* %tmp10728, i64 1
- %tmp10730 = getelementptr inbounds float* %tmp10729, i64 1
- %tmp10731 = getelementptr inbounds float* %tmp10730, i64 1
- %tmp10732 = getelementptr inbounds float* %tmp10731, i64 1
- %tmp10733 = getelementptr inbounds float* %tmp10732, i64 1
- %tmp10734 = getelementptr inbounds float* %tmp10733, i64 1
- %tmp10735 = getelementptr inbounds float* %tmp10734, i64 1
- %tmp10736 = getelementptr inbounds float* %tmp10735, i64 1
- %tmp10737 = getelementptr inbounds float* %tmp10736, i64 1
- %tmp10738 = getelementptr inbounds float* %tmp10737, i64 1
- %tmp10739 = getelementptr inbounds float* %tmp10738, i64 1
- %tmp10740 = getelementptr inbounds float* %tmp10739, i64 1
- %tmp10741 = getelementptr inbounds float* %tmp10740, i64 1
- %tmp10742 = getelementptr inbounds float* %tmp10741, i64 1
- %tmp10743 = getelementptr inbounds float* %tmp10742, i64 1
- %tmp10744 = getelementptr inbounds float* %tmp10743, i64 1
- %tmp10745 = getelementptr inbounds float* %tmp10744, i64 1
- %tmp10746 = getelementptr inbounds float* %tmp10745, i64 1
- %tmp10747 = getelementptr inbounds float* %tmp10746, i64 1
- %tmp10748 = getelementptr inbounds float* %tmp10747, i64 1
- %tmp10749 = getelementptr inbounds float* %tmp10748, i64 1
- %tmp10750 = getelementptr inbounds float* %tmp10749, i64 1
- %tmp10751 = getelementptr inbounds float* %tmp10750, i64 1
- %tmp10752 = getelementptr inbounds float* %tmp10751, i64 1
- %tmp10753 = getelementptr inbounds float* %tmp10752, i64 1
- %tmp10754 = getelementptr inbounds float* %tmp10753, i64 1
- %tmp10755 = getelementptr inbounds float* %tmp10754, i64 1
- %tmp10756 = getelementptr inbounds float* %tmp10755, i64 1
- %tmp10757 = getelementptr inbounds float* %tmp10756, i64 1
- %tmp10758 = getelementptr inbounds float* %tmp10757, i64 1
- %tmp10759 = getelementptr inbounds float* %tmp10758, i64 1
- %tmp10760 = getelementptr inbounds float* %tmp10759, i64 1
- %tmp10761 = getelementptr inbounds float* %tmp10760, i64 1
- %tmp10762 = getelementptr inbounds float* %tmp10761, i64 1
- %tmp10763 = getelementptr inbounds float* %tmp10762, i64 1
- %tmp10764 = getelementptr inbounds float* %tmp10763, i64 1
- %tmp10765 = getelementptr inbounds float* %tmp10764, i64 1
- %tmp10766 = getelementptr inbounds float* %tmp10765, i64 1
- %tmp10767 = getelementptr inbounds float* %tmp10766, i64 1
- %tmp10768 = getelementptr inbounds float* %tmp10767, i64 1
- %tmp10769 = getelementptr inbounds float* %tmp10768, i64 1
- %tmp10770 = getelementptr inbounds float* %tmp10769, i64 1
- %tmp10771 = getelementptr inbounds float* %tmp10770, i64 1
- %tmp10772 = getelementptr inbounds float* %tmp10771, i64 1
- %tmp10773 = getelementptr inbounds float* %tmp10772, i64 1
- %tmp10774 = getelementptr inbounds float* %tmp10773, i64 1
- %tmp10775 = getelementptr inbounds float* %tmp10774, i64 1
- %tmp10776 = getelementptr inbounds float* %tmp10775, i64 1
- %tmp10777 = getelementptr inbounds float* %tmp10776, i64 1
- %tmp10778 = getelementptr inbounds float* %tmp10777, i64 1
- %tmp10779 = getelementptr inbounds float* %tmp10778, i64 1
- %tmp10780 = getelementptr inbounds float* %tmp10779, i64 1
- %tmp10781 = getelementptr inbounds float* %tmp10780, i64 1
- %tmp10782 = getelementptr inbounds float* %tmp10781, i64 1
- %tmp10783 = getelementptr inbounds float* %tmp10782, i64 1
- %tmp10784 = getelementptr inbounds float* %tmp10783, i64 1
- %tmp10785 = getelementptr inbounds float* %tmp10784, i64 1
- %tmp10786 = getelementptr inbounds float* %tmp10785, i64 1
- %tmp10787 = getelementptr inbounds float* %tmp10786, i64 1
- %tmp10788 = getelementptr inbounds float* %tmp10787, i64 1
- %tmp10789 = getelementptr inbounds float* %tmp10788, i64 1
- %tmp10790 = getelementptr inbounds float* %tmp10789, i64 1
- %tmp10791 = getelementptr inbounds float* %tmp10790, i64 1
- %tmp10792 = getelementptr inbounds float* %tmp10791, i64 1
- %tmp10793 = getelementptr inbounds float* %tmp10792, i64 1
- %tmp10794 = getelementptr inbounds float* %tmp10793, i64 1
- %tmp10795 = getelementptr inbounds float* %tmp10794, i64 1
- %tmp10796 = getelementptr inbounds float* %tmp10795, i64 1
- %tmp10797 = getelementptr inbounds float* %tmp10796, i64 1
- %tmp10798 = getelementptr inbounds float* %tmp10797, i64 1
- %tmp10799 = getelementptr inbounds float* %tmp10798, i64 1
- %tmp10800 = getelementptr inbounds float* %tmp10799, i64 1
- %tmp10801 = getelementptr inbounds float* %tmp10800, i64 1
- %tmp10802 = getelementptr inbounds float* %tmp10801, i64 1
- %tmp10803 = getelementptr inbounds float* %tmp10802, i64 1
- %tmp10804 = getelementptr inbounds float* %tmp10803, i64 1
- %tmp10805 = getelementptr inbounds float* %tmp10804, i64 1
- %tmp10806 = getelementptr inbounds float* %tmp10805, i64 1
- %tmp10807 = getelementptr inbounds float* %tmp10806, i64 1
- %tmp10808 = getelementptr inbounds float* %tmp10807, i64 1
- %tmp10809 = getelementptr inbounds float* %tmp10808, i64 1
- %tmp10810 = getelementptr inbounds float* %tmp10809, i64 1
- %tmp10811 = getelementptr inbounds float* %tmp10810, i64 1
- %tmp10812 = getelementptr inbounds float* %tmp10811, i64 1
- %tmp10813 = getelementptr inbounds float* %tmp10812, i64 1
- %tmp10814 = getelementptr inbounds float* %tmp10813, i64 1
- %tmp10815 = getelementptr inbounds float* %tmp10814, i64 1
- %tmp10816 = getelementptr inbounds float* %tmp10815, i64 1
- %tmp10817 = getelementptr inbounds float* %tmp10816, i64 1
- %tmp10818 = getelementptr inbounds float* %tmp10817, i64 1
- %tmp10819 = getelementptr inbounds float* %tmp10818, i64 1
- %tmp10820 = getelementptr inbounds float* %tmp10819, i64 1
- %tmp10821 = getelementptr inbounds float* %tmp10820, i64 1
- %tmp10822 = getelementptr inbounds float* %tmp10821, i64 1
- %tmp10823 = getelementptr inbounds float* %tmp10822, i64 1
- %tmp10824 = getelementptr inbounds float* %tmp10823, i64 1
- %tmp10825 = getelementptr inbounds float* %tmp10824, i64 1
- %tmp10826 = getelementptr inbounds float* %tmp10825, i64 1
- %tmp10827 = getelementptr inbounds float* %tmp10826, i64 1
- %tmp10828 = getelementptr inbounds float* %tmp10827, i64 1
- %tmp10829 = getelementptr inbounds float* %tmp10828, i64 1
- %tmp10830 = getelementptr inbounds float* %tmp10829, i64 1
- %tmp10831 = getelementptr inbounds float* %tmp10830, i64 1
- %tmp10832 = getelementptr inbounds float* %tmp10831, i64 1
- %tmp10833 = getelementptr inbounds float* %tmp10832, i64 1
- %tmp10834 = getelementptr inbounds float* %tmp10833, i64 1
- %tmp10835 = getelementptr inbounds float* %tmp10834, i64 1
- %tmp10836 = getelementptr inbounds float* %tmp10835, i64 1
- %tmp10837 = getelementptr inbounds float* %tmp10836, i64 1
- %tmp10838 = getelementptr inbounds float* %tmp10837, i64 1
- %tmp10839 = getelementptr inbounds float* %tmp10838, i64 1
- %tmp10840 = getelementptr inbounds float* %tmp10839, i64 1
- %tmp10841 = getelementptr inbounds float* %tmp10840, i64 1
- %tmp10842 = getelementptr inbounds float* %tmp10841, i64 1
- %tmp10843 = getelementptr inbounds float* %tmp10842, i64 1
- %tmp10844 = getelementptr inbounds float* %tmp10843, i64 1
- %tmp10845 = getelementptr inbounds float* %tmp10844, i64 1
- %tmp10846 = getelementptr inbounds float* %tmp10845, i64 1
- %tmp10847 = getelementptr inbounds float* %tmp10846, i64 1
- %tmp10848 = getelementptr inbounds float* %tmp10847, i64 1
- %tmp10849 = getelementptr inbounds float* %tmp10848, i64 1
- %tmp10850 = getelementptr inbounds float* %tmp10849, i64 1
- %tmp10851 = getelementptr inbounds float* %tmp10850, i64 1
- %tmp10852 = getelementptr inbounds float* %tmp10851, i64 1
- %tmp10853 = getelementptr inbounds float* %tmp10852, i64 1
- %tmp10854 = getelementptr inbounds float* %tmp10853, i64 1
- %tmp10855 = getelementptr inbounds float* %tmp10854, i64 1
- %tmp10856 = getelementptr inbounds float* %tmp10855, i64 1
- %tmp10857 = getelementptr inbounds float* %tmp10856, i64 1
- %tmp10858 = getelementptr inbounds float* %tmp10857, i64 1
- %tmp10859 = getelementptr inbounds float* %tmp10858, i64 1
- %tmp10860 = getelementptr inbounds float* %tmp10859, i64 1
- %tmp10861 = getelementptr inbounds float* %tmp10860, i64 1
- %tmp10862 = getelementptr inbounds float* %tmp10861, i64 1
- %tmp10863 = getelementptr inbounds float* %tmp10862, i64 1
- %tmp10864 = getelementptr inbounds float* %tmp10863, i64 1
- %tmp10865 = getelementptr inbounds float* %tmp10864, i64 1
- %tmp10866 = getelementptr inbounds float* %tmp10865, i64 1
- %tmp10867 = getelementptr inbounds float* %tmp10866, i64 1
- %tmp10868 = getelementptr inbounds float* %tmp10867, i64 1
- %tmp10869 = getelementptr inbounds float* %tmp10868, i64 1
- %tmp10870 = getelementptr inbounds float* %tmp10869, i64 1
- %tmp10871 = getelementptr inbounds float* %tmp10870, i64 1
- %tmp10872 = getelementptr inbounds float* %tmp10871, i64 1
- %tmp10873 = getelementptr inbounds float* %tmp10872, i64 1
- %tmp10874 = getelementptr inbounds float* %tmp10873, i64 1
- %tmp10875 = getelementptr inbounds float* %tmp10874, i64 1
- %tmp10876 = getelementptr inbounds float* %tmp10875, i64 1
- %tmp10877 = getelementptr inbounds float* %tmp10876, i64 1
- %tmp10878 = getelementptr inbounds float* %tmp10877, i64 1
- %tmp10879 = getelementptr inbounds float* %tmp10878, i64 1
- %tmp10880 = getelementptr inbounds float* %tmp10879, i64 1
- %tmp10881 = getelementptr inbounds float* %tmp10880, i64 1
- %tmp10882 = getelementptr inbounds float* %tmp10881, i64 1
- %tmp10883 = getelementptr inbounds float* %tmp10882, i64 1
- %tmp10884 = getelementptr inbounds float* %tmp10883, i64 1
- %tmp10885 = getelementptr inbounds float* %tmp10884, i64 1
- %tmp10886 = getelementptr inbounds float* %tmp10885, i64 1
- %tmp10887 = getelementptr inbounds float* %tmp10886, i64 1
- %tmp10888 = getelementptr inbounds float* %tmp10887, i64 1
- %tmp10889 = getelementptr inbounds float* %tmp10888, i64 1
- %tmp10890 = getelementptr inbounds float* %tmp10889, i64 1
- %tmp10891 = getelementptr inbounds float* %tmp10890, i64 1
- %tmp10892 = getelementptr inbounds float* %tmp10891, i64 1
- %tmp10893 = getelementptr inbounds float* %tmp10892, i64 1
- %tmp10894 = getelementptr inbounds float* %tmp10893, i64 1
- %tmp10895 = getelementptr inbounds float* %tmp10894, i64 1
- %tmp10896 = getelementptr inbounds float* %tmp10895, i64 1
- %tmp10897 = getelementptr inbounds float* %tmp10896, i64 1
- %tmp10898 = getelementptr inbounds float* %tmp10897, i64 1
- %tmp10899 = getelementptr inbounds float* %tmp10898, i64 1
- %tmp10900 = getelementptr inbounds float* %tmp10899, i64 1
- %tmp10901 = getelementptr inbounds float* %tmp10900, i64 1
- %tmp10902 = getelementptr inbounds float* %tmp10901, i64 1
- %tmp10903 = getelementptr inbounds float* %tmp10902, i64 1
- %tmp10904 = getelementptr inbounds float* %tmp10903, i64 1
- %tmp10905 = getelementptr inbounds float* %tmp10904, i64 1
- %tmp10906 = getelementptr inbounds float* %tmp10905, i64 1
- %tmp10907 = getelementptr inbounds float* %tmp10906, i64 1
- %tmp10908 = getelementptr inbounds float* %tmp10907, i64 1
- %tmp10909 = getelementptr inbounds float* %tmp10908, i64 1
- %tmp10910 = getelementptr inbounds float* %tmp10909, i64 1
- %tmp10911 = getelementptr inbounds float* %tmp10910, i64 1
- %tmp10912 = getelementptr inbounds float* %tmp10911, i64 1
- %tmp10913 = getelementptr inbounds float* %tmp10912, i64 1
- %tmp10914 = getelementptr inbounds float* %tmp10913, i64 1
- %tmp10915 = getelementptr inbounds float* %tmp10914, i64 1
- %tmp10916 = getelementptr inbounds float* %tmp10915, i64 1
- %tmp10917 = getelementptr inbounds float* %tmp10916, i64 1
- %tmp10918 = getelementptr inbounds float* %tmp10917, i64 1
- %tmp10919 = getelementptr inbounds float* %tmp10918, i64 1
- %tmp10920 = getelementptr inbounds float* %tmp10919, i64 1
- %tmp10921 = getelementptr inbounds float* %tmp10920, i64 1
- %tmp10922 = getelementptr inbounds float* %tmp10921, i64 1
- %tmp10923 = getelementptr inbounds float* %tmp10922, i64 1
- %tmp10924 = getelementptr inbounds float* %tmp10923, i64 1
- %tmp10925 = getelementptr inbounds float* %tmp10924, i64 1
- %tmp10926 = getelementptr inbounds float* %tmp10925, i64 1
- %tmp10927 = getelementptr inbounds float* %tmp10926, i64 1
- %tmp10928 = getelementptr inbounds float* %tmp10927, i64 1
- %tmp10929 = getelementptr inbounds float* %tmp10928, i64 1
- %tmp10930 = getelementptr inbounds float* %tmp10929, i64 1
- %tmp10931 = getelementptr inbounds float* %tmp10930, i64 1
- %tmp10932 = getelementptr inbounds float* %tmp10931, i64 1
- %tmp10933 = getelementptr inbounds float* %tmp10932, i64 1
- %tmp10934 = getelementptr inbounds float* %tmp10933, i64 1
- %tmp10935 = getelementptr inbounds float* %tmp10934, i64 1
- %tmp10936 = getelementptr inbounds float* %tmp10935, i64 1
- %tmp10937 = getelementptr inbounds float* %tmp10936, i64 1
- %tmp10938 = getelementptr inbounds float* %tmp10937, i64 1
- %tmp10939 = getelementptr inbounds float* %tmp10938, i64 1
- %tmp10940 = getelementptr inbounds float* %tmp10939, i64 1
- %tmp10941 = getelementptr inbounds float* %tmp10940, i64 1
- %tmp10942 = getelementptr inbounds float* %tmp10941, i64 1
- %tmp10943 = getelementptr inbounds float* %tmp10942, i64 1
- %tmp10944 = getelementptr inbounds float* %tmp10943, i64 1
- %tmp10945 = getelementptr inbounds float* %tmp10944, i64 1
- %tmp10946 = getelementptr inbounds float* %tmp10945, i64 1
- %tmp10947 = getelementptr inbounds float* %tmp10946, i64 1
- %tmp10948 = getelementptr inbounds float* %tmp10947, i64 1
- %tmp10949 = getelementptr inbounds float* %tmp10948, i64 1
- %tmp10950 = getelementptr inbounds float* %tmp10949, i64 1
- %tmp10951 = getelementptr inbounds float* %tmp10950, i64 1
- %tmp10952 = getelementptr inbounds float* %tmp10951, i64 1
- %tmp10953 = getelementptr inbounds float* %tmp10952, i64 1
- %tmp10954 = getelementptr inbounds float* %tmp10953, i64 1
- %tmp10955 = getelementptr inbounds float* %tmp10954, i64 1
- %tmp10956 = getelementptr inbounds float* %tmp10955, i64 1
- %tmp10957 = getelementptr inbounds float* %tmp10956, i64 1
- %tmp10958 = getelementptr inbounds float* %tmp10957, i64 1
- %tmp10959 = getelementptr inbounds float* %tmp10958, i64 1
- %tmp10960 = getelementptr inbounds float* %tmp10959, i64 1
- %tmp10961 = getelementptr inbounds float* %tmp10960, i64 1
- %tmp10962 = getelementptr inbounds float* %tmp10961, i64 1
- %tmp10963 = getelementptr inbounds float* %tmp10962, i64 1
- %tmp10964 = getelementptr inbounds float* %tmp10963, i64 1
- %tmp10965 = getelementptr inbounds float* %tmp10964, i64 1
- %tmp10966 = getelementptr inbounds float* %tmp10965, i64 1
- %tmp10967 = getelementptr inbounds float* %tmp10966, i64 1
- %tmp10968 = getelementptr inbounds float* %tmp10967, i64 1
- %tmp10969 = getelementptr inbounds float* %tmp10968, i64 1
- %tmp10970 = getelementptr inbounds float* %tmp10969, i64 1
- %tmp10971 = getelementptr inbounds float* %tmp10970, i64 1
- %tmp10972 = getelementptr inbounds float* %tmp10971, i64 1
- %tmp10973 = getelementptr inbounds float* %tmp10972, i64 1
- %tmp10974 = getelementptr inbounds float* %tmp10973, i64 1
- %tmp10975 = getelementptr inbounds float* %tmp10974, i64 1
- %tmp10976 = getelementptr inbounds float* %tmp10975, i64 1
- %tmp10977 = getelementptr inbounds float* %tmp10976, i64 1
- %tmp10978 = getelementptr inbounds float* %tmp10977, i64 1
- %tmp10979 = getelementptr inbounds float* %tmp10978, i64 1
- %tmp10980 = getelementptr inbounds float* %tmp10979, i64 1
- %tmp10981 = getelementptr inbounds float* %tmp10980, i64 1
- %tmp10982 = getelementptr inbounds float* %tmp10981, i64 1
- %tmp10983 = getelementptr inbounds float* %tmp10982, i64 1
- %tmp10984 = getelementptr inbounds float* %tmp10983, i64 1
- %tmp10985 = getelementptr inbounds float* %tmp10984, i64 1
- %tmp10986 = getelementptr inbounds float* %tmp10985, i64 1
- %tmp10987 = getelementptr inbounds float* %tmp10986, i64 1
- %tmp10988 = getelementptr inbounds float* %tmp10987, i64 1
- %tmp10989 = getelementptr inbounds float* %tmp10988, i64 1
- %tmp10990 = getelementptr inbounds float* %tmp10989, i64 1
- %tmp10991 = getelementptr inbounds float* %tmp10990, i64 1
- %tmp10992 = getelementptr inbounds float* %tmp10991, i64 1
- %tmp10993 = getelementptr inbounds float* %tmp10992, i64 1
- %tmp10994 = getelementptr inbounds float* %tmp10993, i64 1
- %tmp10995 = getelementptr inbounds float* %tmp10994, i64 1
- %tmp10996 = getelementptr inbounds float* %tmp10995, i64 1
- %tmp10997 = getelementptr inbounds float* %tmp10996, i64 1
- %tmp10998 = getelementptr inbounds float* %tmp10997, i64 1
- %tmp10999 = getelementptr inbounds float* %tmp10998, i64 1
- %tmp11000 = getelementptr inbounds float* %tmp10999, i64 1
- %tmp11001 = getelementptr inbounds float* %tmp11000, i64 1
- %tmp11002 = getelementptr inbounds float* %tmp11001, i64 1
- %tmp11003 = getelementptr inbounds float* %tmp11002, i64 1
- %tmp11004 = getelementptr inbounds float* %tmp11003, i64 1
- %tmp11005 = getelementptr inbounds float* %tmp11004, i64 1
- %tmp11006 = getelementptr inbounds float* %tmp11005, i64 1
- %tmp11007 = getelementptr inbounds float* %tmp11006, i64 1
- %tmp11008 = getelementptr inbounds float* %tmp11007, i64 1
- %tmp11009 = getelementptr inbounds float* %tmp11008, i64 1
- %tmp11010 = getelementptr inbounds float* %tmp11009, i64 1
- %tmp11011 = getelementptr inbounds float* %tmp11010, i64 1
- %tmp11012 = getelementptr inbounds float* %tmp11011, i64 1
- %tmp11013 = getelementptr inbounds float* %tmp11012, i64 1
- %tmp11014 = getelementptr inbounds float* %tmp11013, i64 1
- %tmp11015 = getelementptr inbounds float* %tmp11014, i64 1
- %tmp11016 = getelementptr inbounds float* %tmp11015, i64 1
- %tmp11017 = getelementptr inbounds float* %tmp11016, i64 1
- %tmp11018 = getelementptr inbounds float* %tmp11017, i64 1
- %tmp11019 = getelementptr inbounds float* %tmp11018, i64 1
- %tmp11020 = getelementptr inbounds float* %tmp11019, i64 1
- %tmp11021 = getelementptr inbounds float* %tmp11020, i64 1
- %tmp11022 = getelementptr inbounds float* %tmp11021, i64 1
- %tmp11023 = getelementptr inbounds float* %tmp11022, i64 1
- %tmp11024 = getelementptr inbounds float* %tmp11023, i64 1
- %tmp11025 = getelementptr inbounds float* %tmp11024, i64 1
- %tmp11026 = getelementptr inbounds float* %tmp11025, i64 1
- %tmp11027 = getelementptr inbounds float* %tmp11026, i64 1
- %tmp11028 = getelementptr inbounds float* %tmp11027, i64 1
- %tmp11029 = getelementptr inbounds float* %tmp11028, i64 1
- %tmp11030 = getelementptr inbounds float* %tmp11029, i64 1
- %tmp11031 = getelementptr inbounds float* %tmp11030, i64 1
- %tmp11032 = getelementptr inbounds float* %tmp11031, i64 1
- %tmp11033 = getelementptr inbounds float* %tmp11032, i64 1
- %tmp11034 = getelementptr inbounds float* %tmp11033, i64 1
- %tmp11035 = getelementptr inbounds float* %tmp11034, i64 1
- %tmp11036 = getelementptr inbounds float* %tmp11035, i64 1
- %tmp11037 = getelementptr inbounds float* %tmp11036, i64 1
- %tmp11038 = getelementptr inbounds float* %tmp11037, i64 1
- %tmp11039 = getelementptr inbounds float* %tmp11038, i64 1
- %tmp11040 = getelementptr inbounds float* %tmp11039, i64 1
- %tmp11041 = getelementptr inbounds float* %tmp11040, i64 1
- %tmp11042 = getelementptr inbounds float* %tmp11041, i64 1
- %tmp11043 = getelementptr inbounds float* %tmp11042, i64 1
- %tmp11044 = getelementptr inbounds float* %tmp11043, i64 1
- %tmp11045 = getelementptr inbounds float* %tmp11044, i64 1
- %tmp11046 = getelementptr inbounds float* %tmp11045, i64 1
- %tmp11047 = getelementptr inbounds float* %tmp11046, i64 1
- %tmp11048 = getelementptr inbounds float* %tmp11047, i64 1
- %tmp11049 = getelementptr inbounds float* %tmp11048, i64 1
- %tmp11050 = getelementptr inbounds float* %tmp11049, i64 1
- %tmp11051 = getelementptr inbounds float* %tmp11050, i64 1
- %tmp11052 = getelementptr inbounds float* %tmp11051, i64 1
- %tmp11053 = getelementptr inbounds float* %tmp11052, i64 1
- %tmp11054 = getelementptr inbounds float* %tmp11053, i64 1
- %tmp11055 = getelementptr inbounds float* %tmp11054, i64 1
- %tmp11056 = getelementptr inbounds float* %tmp11055, i64 1
- %tmp11057 = getelementptr inbounds float* %tmp11056, i64 1
- %tmp11058 = getelementptr inbounds float* %tmp11057, i64 1
- %tmp11059 = getelementptr inbounds float* %tmp11058, i64 1
- %tmp11060 = getelementptr inbounds float* %tmp11059, i64 1
- %tmp11061 = getelementptr inbounds float* %tmp11060, i64 1
- %tmp11062 = getelementptr inbounds float* %tmp11061, i64 1
- %tmp11063 = getelementptr inbounds float* %tmp11062, i64 1
- %tmp11064 = getelementptr inbounds float* %tmp11063, i64 1
- %tmp11065 = getelementptr inbounds float* %tmp11064, i64 1
- %tmp11066 = getelementptr inbounds float* %tmp11065, i64 1
- %tmp11067 = getelementptr inbounds float* %tmp11066, i64 1
- %tmp11068 = getelementptr inbounds float* %tmp11067, i64 1
- %tmp11069 = getelementptr inbounds float* %tmp11068, i64 1
- %tmp11070 = getelementptr inbounds float* %tmp11069, i64 1
- %tmp11071 = getelementptr inbounds float* %tmp11070, i64 1
- %tmp11072 = getelementptr inbounds float* %tmp11071, i64 1
- %tmp11073 = getelementptr inbounds float* %tmp11072, i64 1
- %tmp11074 = getelementptr inbounds float* %tmp11073, i64 1
- %tmp11075 = getelementptr inbounds float* %tmp11074, i64 1
- %tmp11076 = getelementptr inbounds float* %tmp11075, i64 1
- %tmp11077 = getelementptr inbounds float* %tmp11076, i64 1
- %tmp11078 = getelementptr inbounds float* %tmp11077, i64 1
- %tmp11079 = getelementptr inbounds float* %tmp11078, i64 1
- %tmp11080 = getelementptr inbounds float* %tmp11079, i64 1
- %tmp11081 = getelementptr inbounds float* %tmp11080, i64 1
- %tmp11082 = getelementptr inbounds float* %tmp11081, i64 1
- %tmp11083 = getelementptr inbounds float* %tmp11082, i64 1
- %tmp11084 = getelementptr inbounds float* %tmp11083, i64 1
- %tmp11085 = getelementptr inbounds float* %tmp11084, i64 1
- %tmp11086 = getelementptr inbounds float* %tmp11085, i64 1
- %tmp11087 = getelementptr inbounds float* %tmp11086, i64 1
- %tmp11088 = getelementptr inbounds float* %tmp11087, i64 1
- %tmp11089 = getelementptr inbounds float* %tmp11088, i64 1
- %tmp11090 = getelementptr inbounds float* %tmp11089, i64 1
- %tmp11091 = getelementptr inbounds float* %tmp11090, i64 1
- %tmp11092 = getelementptr inbounds float* %tmp11091, i64 1
- %tmp11093 = getelementptr inbounds float* %tmp11092, i64 1
- %tmp11094 = getelementptr inbounds float* %tmp11093, i64 1
- %tmp11095 = getelementptr inbounds float* %tmp11094, i64 1
- %tmp11096 = getelementptr inbounds float* %tmp11095, i64 1
- %tmp11097 = getelementptr inbounds float* %tmp11096, i64 1
- %tmp11098 = getelementptr inbounds float* %tmp11097, i64 1
- %tmp11099 = getelementptr inbounds float* %tmp11098, i64 1
- %tmp11100 = getelementptr inbounds float* %tmp11099, i64 1
- %tmp11101 = getelementptr inbounds float* %tmp11100, i64 1
- %tmp11102 = getelementptr inbounds float* %tmp11101, i64 1
- %tmp11103 = getelementptr inbounds float* %tmp11102, i64 1
- %tmp11104 = getelementptr inbounds float* %tmp11103, i64 1
- %tmp11105 = getelementptr inbounds float* %tmp11104, i64 1
- %tmp11106 = getelementptr inbounds float* %tmp11105, i64 1
- %tmp11107 = getelementptr inbounds float* %tmp11106, i64 1
- %tmp11108 = getelementptr inbounds float* %tmp11107, i64 1
- %tmp11109 = getelementptr inbounds float* %tmp11108, i64 1
- %tmp11110 = getelementptr inbounds float* %tmp11109, i64 1
- %tmp11111 = getelementptr inbounds float* %tmp11110, i64 1
- %tmp11112 = getelementptr inbounds float* %tmp11111, i64 1
- %tmp11113 = getelementptr inbounds float* %tmp11112, i64 1
- %tmp11114 = getelementptr inbounds float* %tmp11113, i64 1
- %tmp11115 = getelementptr inbounds float* %tmp11114, i64 1
- %tmp11116 = getelementptr inbounds float* %tmp11115, i64 1
- %tmp11117 = getelementptr inbounds float* %tmp11116, i64 1
- %tmp11118 = getelementptr inbounds float* %tmp11117, i64 1
- %tmp11119 = getelementptr inbounds float* %tmp11118, i64 1
- %tmp11120 = getelementptr inbounds float* %tmp11119, i64 1
- %tmp11121 = getelementptr inbounds float* %tmp11120, i64 1
- %tmp11122 = getelementptr inbounds float* %tmp11121, i64 1
- %tmp11123 = getelementptr inbounds float* %tmp11122, i64 1
- %tmp11124 = getelementptr inbounds float* %tmp11123, i64 1
- %tmp11125 = getelementptr inbounds float* %tmp11124, i64 1
- %tmp11126 = getelementptr inbounds float* %tmp11125, i64 1
- %tmp11127 = getelementptr inbounds float* %tmp11126, i64 1
- %tmp11128 = getelementptr inbounds float* %tmp11127, i64 1
- %tmp11129 = getelementptr inbounds float* %tmp11128, i64 1
- %tmp11130 = getelementptr inbounds float* %tmp11129, i64 1
- %tmp11131 = getelementptr inbounds float* %tmp11130, i64 1
- %tmp11132 = getelementptr inbounds float* %tmp11131, i64 1
- %tmp11133 = getelementptr inbounds float* %tmp11132, i64 1
- %tmp11134 = getelementptr inbounds float* %tmp11133, i64 1
- %tmp11135 = getelementptr inbounds float* %tmp11134, i64 1
- %tmp11136 = getelementptr inbounds float* %tmp11135, i64 1
- %tmp11137 = getelementptr inbounds float* %tmp11136, i64 1
- %tmp11138 = getelementptr inbounds float* %tmp11137, i64 1
- %tmp11139 = getelementptr inbounds float* %tmp11138, i64 1
- %tmp11140 = getelementptr inbounds float* %tmp11139, i64 1
- %tmp11141 = getelementptr inbounds float* %tmp11140, i64 1
- %tmp11142 = getelementptr inbounds float* %tmp11141, i64 1
- %tmp11143 = getelementptr inbounds float* %tmp11142, i64 1
- %tmp11144 = getelementptr inbounds float* %tmp11143, i64 1
- %tmp11145 = getelementptr inbounds float* %tmp11144, i64 1
- %tmp11146 = getelementptr inbounds float* %tmp11145, i64 1
- %tmp11147 = getelementptr inbounds float* %tmp11146, i64 1
- %tmp11148 = getelementptr inbounds float* %tmp11147, i64 1
- %tmp11149 = getelementptr inbounds float* %tmp11148, i64 1
- %tmp11150 = getelementptr inbounds float* %tmp11149, i64 1
- %tmp11151 = getelementptr inbounds float* %tmp11150, i64 1
- %tmp11152 = getelementptr inbounds float* %tmp11151, i64 1
- %tmp11153 = getelementptr inbounds float* %tmp11152, i64 1
- %tmp11154 = getelementptr inbounds float* %tmp11153, i64 1
- %tmp11155 = getelementptr inbounds float* %tmp11154, i64 1
- %tmp11156 = getelementptr inbounds float* %tmp11155, i64 1
- %tmp11157 = getelementptr inbounds float* %tmp11156, i64 1
- %tmp11158 = getelementptr inbounds float* %tmp11157, i64 1
- %tmp11159 = getelementptr inbounds float* %tmp11158, i64 1
- %tmp11160 = getelementptr inbounds float* %tmp11159, i64 1
- %tmp11161 = getelementptr inbounds float* %tmp11160, i64 1
- %tmp11162 = getelementptr inbounds float* %tmp11161, i64 1
- %tmp11163 = getelementptr inbounds float* %tmp11162, i64 1
- %tmp11164 = getelementptr inbounds float* %tmp11163, i64 1
- %tmp11165 = getelementptr inbounds float* %tmp11164, i64 1
- %tmp11166 = getelementptr inbounds float* %tmp11165, i64 1
- %tmp11167 = getelementptr inbounds float* %tmp11166, i64 1
- %tmp11168 = getelementptr inbounds float* %tmp11167, i64 1
- %tmp11169 = getelementptr inbounds float* %tmp11168, i64 1
- %tmp11170 = getelementptr inbounds float* %tmp11169, i64 1
- %tmp11171 = getelementptr inbounds float* %tmp11170, i64 1
- %tmp11172 = getelementptr inbounds float* %tmp11171, i64 1
- %tmp11173 = getelementptr inbounds float* %tmp11172, i64 1
- %tmp11174 = getelementptr inbounds float* %tmp11173, i64 1
- %tmp11175 = getelementptr inbounds float* %tmp11174, i64 1
- %tmp11176 = getelementptr inbounds float* %tmp11175, i64 1
- %tmp11177 = getelementptr inbounds float* %tmp11176, i64 1
- %tmp11178 = getelementptr inbounds float* %tmp11177, i64 1
- %tmp11179 = getelementptr inbounds float* %tmp11178, i64 1
- %tmp11180 = getelementptr inbounds float* %tmp11179, i64 1
- %tmp11181 = getelementptr inbounds float* %tmp11180, i64 1
- %tmp11182 = getelementptr inbounds float* %tmp11181, i64 1
- %tmp11183 = getelementptr inbounds float* %tmp11182, i64 1
- %tmp11184 = getelementptr inbounds float* %tmp11183, i64 1
- %tmp11185 = getelementptr inbounds float* %tmp11184, i64 1
- %tmp11186 = getelementptr inbounds float* %tmp11185, i64 1
- %tmp11187 = getelementptr inbounds float* %tmp11186, i64 1
- %tmp11188 = getelementptr inbounds float* %tmp11187, i64 1
- %tmp11189 = getelementptr inbounds float* %tmp11188, i64 1
- %tmp11190 = getelementptr inbounds float* %tmp11189, i64 1
- %tmp11191 = getelementptr inbounds float* %tmp11190, i64 1
- %tmp11192 = getelementptr inbounds float* %tmp11191, i64 1
- %tmp11193 = getelementptr inbounds float* %tmp11192, i64 1
- %tmp11194 = getelementptr inbounds float* %tmp11193, i64 1
- %tmp11195 = getelementptr inbounds float* %tmp11194, i64 1
- %tmp11196 = getelementptr inbounds float* %tmp11195, i64 1
- %tmp11197 = getelementptr inbounds float* %tmp11196, i64 1
- %tmp11198 = getelementptr inbounds float* %tmp11197, i64 1
- %tmp11199 = getelementptr inbounds float* %tmp11198, i64 1
- %tmp11200 = getelementptr inbounds float* %tmp11199, i64 1
- %tmp11201 = getelementptr inbounds float* %tmp11200, i64 1
- %tmp11202 = getelementptr inbounds float* %tmp11201, i64 1
- %tmp11203 = getelementptr inbounds float* %tmp11202, i64 1
- %tmp11204 = getelementptr inbounds float* %tmp11203, i64 1
- %tmp11205 = getelementptr inbounds float* %tmp11204, i64 1
- %tmp11206 = getelementptr inbounds float* %tmp11205, i64 1
- %tmp11207 = getelementptr inbounds float* %tmp11206, i64 1
- %tmp11208 = getelementptr inbounds float* %tmp11207, i64 1
- %tmp11209 = getelementptr inbounds float* %tmp11208, i64 1
- %tmp11210 = getelementptr inbounds float* %tmp11209, i64 1
- %tmp11211 = getelementptr inbounds float* %tmp11210, i64 1
- %tmp11212 = getelementptr inbounds float* %tmp11211, i64 1
- %tmp11213 = getelementptr inbounds float* %tmp11212, i64 1
- %tmp11214 = getelementptr inbounds float* %tmp11213, i64 1
- %tmp11215 = getelementptr inbounds float* %tmp11214, i64 1
- %tmp11216 = getelementptr inbounds float* %tmp11215, i64 1
- %tmp11217 = getelementptr inbounds float* %tmp11216, i64 1
- %tmp11218 = getelementptr inbounds float* %tmp11217, i64 1
- %tmp11219 = getelementptr inbounds float* %tmp11218, i64 1
- %tmp11220 = getelementptr inbounds float* %tmp11219, i64 1
- %tmp11221 = getelementptr inbounds float* %tmp11220, i64 1
- %tmp11222 = getelementptr inbounds float* %tmp11221, i64 1
- %tmp11223 = getelementptr inbounds float* %tmp11222, i64 1
- %tmp11224 = getelementptr inbounds float* %tmp11223, i64 1
- %tmp11225 = getelementptr inbounds float* %tmp11224, i64 1
- %tmp11226 = getelementptr inbounds float* %tmp11225, i64 1
- %tmp11227 = getelementptr inbounds float* %tmp11226, i64 1
- %tmp11228 = getelementptr inbounds float* %tmp11227, i64 1
- %tmp11229 = getelementptr inbounds float* %tmp11228, i64 1
- %tmp11230 = getelementptr inbounds float* %tmp11229, i64 1
- %tmp11231 = getelementptr inbounds float* %tmp11230, i64 1
- %tmp11232 = getelementptr inbounds float* %tmp11231, i64 1
- %tmp11233 = getelementptr inbounds float* %tmp11232, i64 1
- %tmp11234 = getelementptr inbounds float* %tmp11233, i64 1
- %tmp11235 = getelementptr inbounds float* %tmp11234, i64 1
- %tmp11236 = getelementptr inbounds float* %tmp11235, i64 1
- %tmp11237 = getelementptr inbounds float* %tmp11236, i64 1
- %tmp11238 = getelementptr inbounds float* %tmp11237, i64 1
- %tmp11239 = getelementptr inbounds float* %tmp11238, i64 1
- %tmp11240 = getelementptr inbounds float* %tmp11239, i64 1
- %tmp11241 = getelementptr inbounds float* %tmp11240, i64 1
- %tmp11242 = getelementptr inbounds float* %tmp11241, i64 1
- %tmp11243 = getelementptr inbounds float* %tmp11242, i64 1
- %tmp11244 = getelementptr inbounds float* %tmp11243, i64 1
- %tmp11245 = getelementptr inbounds float* %tmp11244, i64 1
- %tmp11246 = getelementptr inbounds float* %tmp11245, i64 1
- %tmp11247 = getelementptr inbounds float* %tmp11246, i64 1
- %tmp11248 = getelementptr inbounds float* %tmp11247, i64 1
- %tmp11249 = getelementptr inbounds float* %tmp11248, i64 1
- %tmp11250 = getelementptr inbounds float* %tmp11249, i64 1
- %tmp11251 = getelementptr inbounds float* %tmp11250, i64 1
- %tmp11252 = getelementptr inbounds float* %tmp11251, i64 1
- %tmp11253 = getelementptr inbounds float* %tmp11252, i64 1
- %tmp11254 = getelementptr inbounds float* %tmp11253, i64 1
- %tmp11255 = getelementptr inbounds float* %tmp11254, i64 1
- %tmp11256 = getelementptr inbounds float* %tmp11255, i64 1
- %tmp11257 = getelementptr inbounds float* %tmp11256, i64 1
- %tmp11258 = getelementptr inbounds float* %tmp11257, i64 1
- %tmp11259 = getelementptr inbounds float* %tmp11258, i64 1
- %tmp11260 = getelementptr inbounds float* %tmp11259, i64 1
- %tmp11261 = getelementptr inbounds float* %tmp11260, i64 1
- %tmp11262 = getelementptr inbounds float* %tmp11261, i64 1
- %tmp11263 = getelementptr inbounds float* %tmp11262, i64 1
- %tmp11264 = getelementptr inbounds float* %tmp11263, i64 1
- %tmp11265 = getelementptr inbounds float* %tmp11264, i64 1
- %tmp11266 = getelementptr inbounds float* %tmp11265, i64 1
- %tmp11267 = getelementptr inbounds float* %tmp11266, i64 1
- %tmp11268 = getelementptr inbounds float* %tmp11267, i64 1
- %tmp11269 = getelementptr inbounds float* %tmp11268, i64 1
- %tmp11270 = getelementptr inbounds float* %tmp11269, i64 1
- %tmp11271 = getelementptr inbounds float* %tmp11270, i64 1
- %tmp11272 = getelementptr inbounds float* %tmp11271, i64 1
- %tmp11273 = getelementptr inbounds float* %tmp11272, i64 1
- %tmp11274 = getelementptr inbounds float* %tmp11273, i64 1
- %tmp11275 = getelementptr inbounds float* %tmp11274, i64 1
- %tmp11276 = getelementptr inbounds float* %tmp11275, i64 1
- %tmp11277 = getelementptr inbounds float* %tmp11276, i64 1
- %tmp11278 = getelementptr inbounds float* %tmp11277, i64 1
- %tmp11279 = getelementptr inbounds float* %tmp11278, i64 1
- %tmp11280 = getelementptr inbounds float* %tmp11279, i64 1
- %tmp11281 = getelementptr inbounds float* %tmp11280, i64 1
- %tmp11282 = getelementptr inbounds float* %tmp11281, i64 1
- %tmp11283 = getelementptr inbounds float* %tmp11282, i64 1
- %tmp11284 = getelementptr inbounds float* %tmp11283, i64 1
- %tmp11285 = getelementptr inbounds float* %tmp11284, i64 1
- %tmp11286 = getelementptr inbounds float* %tmp11285, i64 1
- %tmp11287 = getelementptr inbounds float* %tmp11286, i64 1
- %tmp11288 = getelementptr inbounds float* %tmp11287, i64 1
- %tmp11289 = getelementptr inbounds float* %tmp11288, i64 1
- %tmp11290 = getelementptr inbounds float* %tmp11289, i64 1
- %tmp11291 = getelementptr inbounds float* %tmp11290, i64 1
- %tmp11292 = getelementptr inbounds float* %tmp11291, i64 1
- %tmp11293 = getelementptr inbounds float* %tmp11292, i64 1
- %tmp11294 = getelementptr inbounds float* %tmp11293, i64 1
- %tmp11295 = getelementptr inbounds float* %tmp11294, i64 1
- %tmp11296 = getelementptr inbounds float* %tmp11295, i64 1
- %tmp11297 = getelementptr inbounds float* %tmp11296, i64 1
- %tmp11298 = getelementptr inbounds float* %tmp11297, i64 1
- %tmp11299 = getelementptr inbounds float* %tmp11298, i64 1
- %tmp11300 = getelementptr inbounds float* %tmp11299, i64 1
- %tmp11301 = getelementptr inbounds float* %tmp11300, i64 1
- %tmp11302 = getelementptr inbounds float* %tmp11301, i64 1
- %tmp11303 = getelementptr inbounds float* %tmp11302, i64 1
- %tmp11304 = getelementptr inbounds float* %tmp11303, i64 1
- %tmp11305 = getelementptr inbounds float* %tmp11304, i64 1
- %tmp11306 = getelementptr inbounds float* %tmp11305, i64 1
- %tmp11307 = getelementptr inbounds float* %tmp11306, i64 1
- %tmp11308 = getelementptr inbounds float* %tmp11307, i64 1
- %tmp11309 = getelementptr inbounds float* %tmp11308, i64 1
- %tmp11310 = getelementptr inbounds float* %tmp11309, i64 1
- %tmp11311 = getelementptr inbounds float* %tmp11310, i64 1
- %tmp11312 = getelementptr inbounds float* %tmp11311, i64 1
- %tmp11313 = getelementptr inbounds float* %tmp11312, i64 1
- %tmp11314 = getelementptr inbounds float* %tmp11313, i64 1
- %tmp11315 = getelementptr inbounds float* %tmp11314, i64 1
- %tmp11316 = getelementptr inbounds float* %tmp11315, i64 1
- %tmp11317 = getelementptr inbounds float* %tmp11316, i64 1
- %tmp11318 = getelementptr inbounds float* %tmp11317, i64 1
- %tmp11319 = getelementptr inbounds float* %tmp11318, i64 1
- %tmp11320 = getelementptr inbounds float* %tmp11319, i64 1
- %tmp11321 = getelementptr inbounds float* %tmp11320, i64 1
- %tmp11322 = getelementptr inbounds float* %tmp11321, i64 1
- %tmp11323 = getelementptr inbounds float* %tmp11322, i64 1
- %tmp11324 = getelementptr inbounds float* %tmp11323, i64 1
- %tmp11325 = getelementptr inbounds float* %tmp11324, i64 1
- %tmp11326 = getelementptr inbounds float* %tmp11325, i64 1
- %tmp11327 = getelementptr inbounds float* %tmp11326, i64 1
- %tmp11328 = getelementptr inbounds float* %tmp11327, i64 1
- %tmp11329 = getelementptr inbounds float* %tmp11328, i64 1
- %tmp11330 = getelementptr inbounds float* %tmp11329, i64 1
- %tmp11331 = getelementptr inbounds float* %tmp11330, i64 1
- %tmp11332 = getelementptr inbounds float* %tmp11331, i64 1
- %tmp11333 = getelementptr inbounds float* %tmp11332, i64 1
- %tmp11334 = getelementptr inbounds float* %tmp11333, i64 1
- %tmp11335 = getelementptr inbounds float* %tmp11334, i64 1
- %tmp11336 = getelementptr inbounds float* %tmp11335, i64 1
- %tmp11337 = getelementptr inbounds float* %tmp11336, i64 1
- %tmp11338 = getelementptr inbounds float* %tmp11337, i64 1
- %tmp11339 = getelementptr inbounds float* %tmp11338, i64 1
- %tmp11340 = getelementptr inbounds float* %tmp11339, i64 1
- %tmp11341 = getelementptr inbounds float* %tmp11340, i64 1
- %tmp11342 = getelementptr inbounds float* %tmp11341, i64 1
- %tmp11343 = getelementptr inbounds float* %tmp11342, i64 1
- %tmp11344 = getelementptr inbounds float* %tmp11343, i64 1
- %tmp11345 = getelementptr inbounds float* %tmp11344, i64 1
- %tmp11346 = getelementptr inbounds float* %tmp11345, i64 1
- %tmp11347 = getelementptr inbounds float* %tmp11346, i64 1
- %tmp11348 = getelementptr inbounds float* %tmp11347, i64 1
- %tmp11349 = getelementptr inbounds float* %tmp11348, i64 1
- %tmp11350 = getelementptr inbounds float* %tmp11349, i64 1
- %tmp11351 = getelementptr inbounds float* %tmp11350, i64 1
- %tmp11352 = getelementptr inbounds float* %tmp11351, i64 1
- %tmp11353 = getelementptr inbounds float* %tmp11352, i64 1
- %tmp11354 = getelementptr inbounds float* %tmp11353, i64 1
- %tmp11355 = getelementptr inbounds float* %tmp11354, i64 1
- %tmp11356 = getelementptr inbounds float* %tmp11355, i64 1
- %tmp11357 = getelementptr inbounds float* %tmp11356, i64 1
- %tmp11358 = getelementptr inbounds float* %tmp11357, i64 1
- %tmp11359 = getelementptr inbounds float* %tmp11358, i64 1
- %tmp11360 = getelementptr inbounds float* %tmp11359, i64 1
- %tmp11361 = getelementptr inbounds float* %tmp11360, i64 1
- %tmp11362 = getelementptr inbounds float* %tmp11361, i64 1
- %tmp11363 = getelementptr inbounds float* %tmp11362, i64 1
- %tmp11364 = getelementptr inbounds float* %tmp11363, i64 1
- %tmp11365 = getelementptr inbounds float* %tmp11364, i64 1
- %tmp11366 = getelementptr inbounds float* %tmp11365, i64 1
- %tmp11367 = getelementptr inbounds float* %tmp11366, i64 1
- %tmp11368 = getelementptr inbounds float* %tmp11367, i64 1
- %tmp11369 = getelementptr inbounds float* %tmp11368, i64 1
- %tmp11370 = getelementptr inbounds float* %tmp11369, i64 1
- %tmp11371 = getelementptr inbounds float* %tmp11370, i64 1
- %tmp11372 = getelementptr inbounds float* %tmp11371, i64 1
- %tmp11373 = getelementptr inbounds float* %tmp11372, i64 1
- %tmp11374 = getelementptr inbounds float* %tmp11373, i64 1
- %tmp11375 = getelementptr inbounds float* %tmp11374, i64 1
- %tmp11376 = getelementptr inbounds float* %tmp11375, i64 1
- %tmp11377 = getelementptr inbounds float* %tmp11376, i64 1
- %tmp11378 = getelementptr inbounds float* %tmp11377, i64 1
- %tmp11379 = getelementptr inbounds float* %tmp11378, i64 1
- %tmp11380 = getelementptr inbounds float* %tmp11379, i64 1
- %tmp11381 = getelementptr inbounds float* %tmp11380, i64 1
- %tmp11382 = getelementptr inbounds float* %tmp11381, i64 1
- %tmp11383 = getelementptr inbounds float* %tmp11382, i64 1
- %tmp11384 = getelementptr inbounds float* %tmp11383, i64 1
- %tmp11385 = getelementptr inbounds float* %tmp11384, i64 1
- %tmp11386 = getelementptr inbounds float* %tmp11385, i64 1
- %tmp11387 = getelementptr inbounds float* %tmp11386, i64 1
- %tmp11388 = getelementptr inbounds float* %tmp11387, i64 1
- %tmp11389 = getelementptr inbounds float* %tmp11388, i64 1
- %tmp11390 = getelementptr inbounds float* %tmp11389, i64 1
- %tmp11391 = getelementptr inbounds float* %tmp11390, i64 1
- %tmp11392 = getelementptr inbounds float* %tmp11391, i64 1
- %tmp11393 = getelementptr inbounds float* %tmp11392, i64 1
- %tmp11394 = getelementptr inbounds float* %tmp11393, i64 1
- %tmp11395 = getelementptr inbounds float* %tmp11394, i64 1
- %tmp11396 = getelementptr inbounds float* %tmp11395, i64 1
- %tmp11397 = getelementptr inbounds float* %tmp11396, i64 1
- %tmp11398 = getelementptr inbounds float* %tmp11397, i64 1
- %tmp11399 = getelementptr inbounds float* %tmp11398, i64 1
- %tmp11400 = getelementptr inbounds float* %tmp11399, i64 1
- %tmp11401 = getelementptr inbounds float* %tmp11400, i64 1
- %tmp11402 = getelementptr inbounds float* %tmp11401, i64 1
- %tmp11403 = getelementptr inbounds float* %tmp11402, i64 1
- %tmp11404 = getelementptr inbounds float* %tmp11403, i64 1
- %tmp11405 = getelementptr inbounds float* %tmp11404, i64 1
- %tmp11406 = getelementptr inbounds float* %tmp11405, i64 1
- %tmp11407 = getelementptr inbounds float* %tmp11406, i64 1
- %tmp11408 = getelementptr inbounds float* %tmp11407, i64 1
- %tmp11409 = getelementptr inbounds float* %tmp11408, i64 1
- %tmp11410 = getelementptr inbounds float* %tmp11409, i64 1
- %tmp11411 = getelementptr inbounds float* %tmp11410, i64 1
- %tmp11412 = getelementptr inbounds float* %tmp11411, i64 1
- %tmp11413 = getelementptr inbounds float* %tmp11412, i64 1
- %tmp11414 = getelementptr inbounds float* %tmp11413, i64 1
- %tmp11415 = getelementptr inbounds float* %tmp11414, i64 1
- %tmp11416 = getelementptr inbounds float* %tmp11415, i64 1
- %tmp11417 = getelementptr inbounds float* %tmp11416, i64 1
- %tmp11418 = getelementptr inbounds float* %tmp11417, i64 1
- %tmp11419 = getelementptr inbounds float* %tmp11418, i64 1
- %tmp11420 = getelementptr inbounds float* %tmp11419, i64 1
- %tmp11421 = getelementptr inbounds float* %tmp11420, i64 1
- %tmp11422 = getelementptr inbounds float* %tmp11421, i64 1
- %tmp11423 = getelementptr inbounds float* %tmp11422, i64 1
- %tmp11424 = getelementptr inbounds float* %tmp11423, i64 1
- %tmp11425 = getelementptr inbounds float* %tmp11424, i64 1
- %tmp11426 = getelementptr inbounds float* %tmp11425, i64 1
- %tmp11427 = getelementptr inbounds float* %tmp11426, i64 1
- %tmp11428 = getelementptr inbounds float* %tmp11427, i64 1
- %tmp11429 = getelementptr inbounds float* %tmp11428, i64 1
- %tmp11430 = getelementptr inbounds float* %tmp11429, i64 1
- %tmp11431 = getelementptr inbounds float* %tmp11430, i64 1
- %tmp11432 = getelementptr inbounds float* %tmp11431, i64 1
- %tmp11433 = getelementptr inbounds float* %tmp11432, i64 1
- %tmp11434 = getelementptr inbounds float* %tmp11433, i64 1
- %tmp11435 = getelementptr inbounds float* %tmp11434, i64 1
- %tmp11436 = getelementptr inbounds float* %tmp11435, i64 1
- %tmp11437 = getelementptr inbounds float* %tmp11436, i64 1
- %tmp11438 = getelementptr inbounds float* %tmp11437, i64 1
- %tmp11439 = getelementptr inbounds float* %tmp11438, i64 1
- %tmp11440 = getelementptr inbounds float* %tmp11439, i64 1
- %tmp11441 = getelementptr inbounds float* %tmp11440, i64 1
- %tmp11442 = getelementptr inbounds float* %tmp11441, i64 1
- %tmp11443 = getelementptr inbounds float* %tmp11442, i64 1
- %tmp11444 = getelementptr inbounds float* %tmp11443, i64 1
- %tmp11445 = getelementptr inbounds float* %tmp11444, i64 1
- %tmp11446 = getelementptr inbounds float* %tmp11445, i64 1
- %tmp11447 = getelementptr inbounds float* %tmp11446, i64 1
- %tmp11448 = getelementptr inbounds float* %tmp11447, i64 1
- %tmp11449 = getelementptr inbounds float* %tmp11448, i64 1
- %tmp11450 = getelementptr inbounds float* %tmp11449, i64 1
- %tmp11451 = getelementptr inbounds float* %tmp11450, i64 1
- %tmp11452 = getelementptr inbounds float* %tmp11451, i64 1
- %tmp11453 = getelementptr inbounds float* %tmp11452, i64 1
- %tmp11454 = getelementptr inbounds float* %tmp11453, i64 1
- %tmp11455 = getelementptr inbounds float* %tmp11454, i64 1
- %tmp11456 = getelementptr inbounds float* %tmp11455, i64 1
- %tmp11457 = getelementptr inbounds float* %tmp11456, i64 1
- %tmp11458 = getelementptr inbounds float* %tmp11457, i64 1
- %tmp11459 = getelementptr inbounds float* %tmp11458, i64 1
- %tmp11460 = getelementptr inbounds float* %tmp11459, i64 1
- %tmp11461 = getelementptr inbounds float* %tmp11460, i64 1
- %tmp11462 = getelementptr inbounds float* %tmp11461, i64 1
- %tmp11463 = getelementptr inbounds float* %tmp11462, i64 1
- %tmp11464 = getelementptr inbounds float* %tmp11463, i64 1
- %tmp11465 = getelementptr inbounds float* %tmp11464, i64 1
- %tmp11466 = getelementptr inbounds float* %tmp11465, i64 1
- %tmp11467 = getelementptr inbounds float* %tmp11466, i64 1
- %tmp11468 = getelementptr inbounds float* %tmp11467, i64 1
- %tmp11469 = getelementptr inbounds float* %tmp11468, i64 1
- %tmp11470 = getelementptr inbounds float* %tmp11469, i64 1
- %tmp11471 = getelementptr inbounds float* %tmp11470, i64 1
- %tmp11472 = getelementptr inbounds float* %tmp11471, i64 1
- %tmp11473 = getelementptr inbounds float* %tmp11472, i64 1
- %tmp11474 = getelementptr inbounds float* %tmp11473, i64 1
- %tmp11475 = getelementptr inbounds float* %tmp11474, i64 1
- %tmp11476 = getelementptr inbounds float* %tmp11475, i64 1
- %tmp11477 = getelementptr inbounds float* %tmp11476, i64 1
- %tmp11478 = getelementptr inbounds float* %tmp11477, i64 1
- %tmp11479 = getelementptr inbounds float* %tmp11478, i64 1
- %tmp11480 = getelementptr inbounds float* %tmp11479, i64 1
- %tmp11481 = getelementptr inbounds float* %tmp11480, i64 1
- %tmp11482 = getelementptr inbounds float* %tmp11481, i64 1
- %tmp11483 = getelementptr inbounds float* %tmp11482, i64 1
- %tmp11484 = getelementptr inbounds float* %tmp11483, i64 1
- %tmp11485 = getelementptr inbounds float* %tmp11484, i64 1
- %tmp11486 = getelementptr inbounds float* %tmp11485, i64 1
- %tmp11487 = getelementptr inbounds float* %tmp11486, i64 1
- %tmp11488 = getelementptr inbounds float* %tmp11487, i64 1
- %tmp11489 = getelementptr inbounds float* %tmp11488, i64 1
- %tmp11490 = getelementptr inbounds float* %tmp11489, i64 1
- %tmp11491 = getelementptr inbounds float* %tmp11490, i64 1
- %tmp11492 = getelementptr inbounds float* %tmp11491, i64 1
- %tmp11493 = getelementptr inbounds float* %tmp11492, i64 1
- %tmp11494 = getelementptr inbounds float* %tmp11493, i64 1
- %tmp11495 = getelementptr inbounds float* %tmp11494, i64 1
- %tmp11496 = getelementptr inbounds float* %tmp11495, i64 1
- %tmp11497 = getelementptr inbounds float* %tmp11496, i64 1
- %tmp11498 = getelementptr inbounds float* %tmp11497, i64 1
- %tmp11499 = getelementptr inbounds float* %tmp11498, i64 1
- %tmp11500 = getelementptr inbounds float* %tmp11499, i64 1
- %tmp11501 = getelementptr inbounds float* %tmp11500, i64 1
- %tmp11502 = getelementptr inbounds float* %tmp11501, i64 1
- %tmp11503 = getelementptr inbounds float* %tmp11502, i64 1
- %tmp11504 = getelementptr inbounds float* %tmp11503, i64 1
- %tmp11505 = getelementptr inbounds float* %tmp11504, i64 1
- %tmp11506 = getelementptr inbounds float* %tmp11505, i64 1
- %tmp11507 = getelementptr inbounds float* %tmp11506, i64 1
- %tmp11508 = getelementptr inbounds float* %tmp11507, i64 1
- %tmp11509 = getelementptr inbounds float* %tmp11508, i64 1
- %tmp11510 = getelementptr inbounds float* %tmp11509, i64 1
- %tmp11511 = getelementptr inbounds float* %tmp11510, i64 1
- %tmp11512 = getelementptr inbounds float* %tmp11511, i64 1
- %tmp11513 = getelementptr inbounds float* %tmp11512, i64 1
- %tmp11514 = getelementptr inbounds float* %tmp11513, i64 1
- %tmp11515 = getelementptr inbounds float* %tmp11514, i64 1
- %tmp11516 = getelementptr inbounds float* %tmp11515, i64 1
- %tmp11517 = getelementptr inbounds float* %tmp11516, i64 1
- %tmp11518 = getelementptr inbounds float* %tmp11517, i64 1
- %tmp11519 = getelementptr inbounds float* %tmp11518, i64 1
- %tmp11520 = getelementptr inbounds float* %tmp11519, i64 1
- %tmp11521 = getelementptr inbounds float* %tmp11520, i64 1
- %tmp11522 = getelementptr inbounds float* %tmp11521, i64 1
- %tmp11523 = getelementptr inbounds float* %tmp11522, i64 1
- %tmp11524 = getelementptr inbounds float* %tmp11523, i64 1
- %tmp11525 = getelementptr inbounds float* %tmp11524, i64 1
- %tmp11526 = getelementptr inbounds float* %tmp11525, i64 1
- %tmp11527 = getelementptr inbounds float* %tmp11526, i64 1
- %tmp11528 = getelementptr inbounds float* %tmp11527, i64 1
- %tmp11529 = getelementptr inbounds float* %tmp11528, i64 1
- %tmp11530 = getelementptr inbounds float* %tmp11529, i64 1
- %tmp11531 = getelementptr inbounds float* %tmp11530, i64 1
- %tmp11532 = getelementptr inbounds float* %tmp11531, i64 1
- %tmp11533 = getelementptr inbounds float* %tmp11532, i64 1
- %tmp11534 = getelementptr inbounds float* %tmp11533, i64 1
- %tmp11535 = getelementptr inbounds float* %tmp11534, i64 1
- %tmp11536 = getelementptr inbounds float* %tmp11535, i64 1
- %tmp11537 = getelementptr inbounds float* %tmp11536, i64 1
- %tmp11538 = getelementptr inbounds float* %tmp11537, i64 1
- %tmp11539 = getelementptr inbounds float* %tmp11538, i64 1
- %tmp11540 = getelementptr inbounds float* %tmp11539, i64 1
- %tmp11541 = getelementptr inbounds float* %tmp11540, i64 1
- %tmp11542 = getelementptr inbounds float* %tmp11541, i64 1
- %tmp11543 = getelementptr inbounds float* %tmp11542, i64 1
- %tmp11544 = getelementptr inbounds float* %tmp11543, i64 1
- %tmp11545 = getelementptr inbounds float* %tmp11544, i64 1
- %tmp11546 = getelementptr inbounds float* %tmp11545, i64 1
- %tmp11547 = getelementptr inbounds float* %tmp11546, i64 1
- %tmp11548 = getelementptr inbounds float* %tmp11547, i64 1
- %tmp11549 = getelementptr inbounds float* %tmp11548, i64 1
- %tmp11550 = getelementptr inbounds float* %tmp11549, i64 1
- %tmp11551 = getelementptr inbounds float* %tmp11550, i64 1
- %tmp11552 = getelementptr inbounds float* %tmp11551, i64 1
- %tmp11553 = getelementptr inbounds float* %tmp11552, i64 1
- %tmp11554 = getelementptr inbounds float* %tmp11553, i64 1
- %tmp11555 = getelementptr inbounds float* %tmp11554, i64 1
- %tmp11556 = getelementptr inbounds float* %tmp11555, i64 1
- %tmp11557 = getelementptr inbounds float* %tmp11556, i64 1
- %tmp11558 = getelementptr inbounds float* %tmp11557, i64 1
- %tmp11559 = getelementptr inbounds float* %tmp11558, i64 1
- %tmp11560 = getelementptr inbounds float* %tmp11559, i64 1
- %tmp11561 = getelementptr inbounds float* %tmp11560, i64 1
- %tmp11562 = getelementptr inbounds float* %tmp11561, i64 1
- %tmp11563 = getelementptr inbounds float* %tmp11562, i64 1
- %tmp11564 = getelementptr inbounds float* %tmp11563, i64 1
- %tmp11565 = getelementptr inbounds float* %tmp11564, i64 1
- %tmp11566 = getelementptr inbounds float* %tmp11565, i64 1
- %tmp11567 = getelementptr inbounds float* %tmp11566, i64 1
- %tmp11568 = getelementptr inbounds float* %tmp11567, i64 1
- %tmp11569 = getelementptr inbounds float* %tmp11568, i64 1
- %tmp11570 = getelementptr inbounds float* %tmp11569, i64 1
- %tmp11571 = getelementptr inbounds float* %tmp11570, i64 1
- %tmp11572 = getelementptr inbounds float* %tmp11571, i64 1
- %tmp11573 = getelementptr inbounds float* %tmp11572, i64 1
- %tmp11574 = getelementptr inbounds float* %tmp11573, i64 1
- %tmp11575 = getelementptr inbounds float* %tmp11574, i64 1
- %tmp11576 = getelementptr inbounds float* %tmp11575, i64 1
- %tmp11577 = getelementptr inbounds float* %tmp11576, i64 1
- %tmp11578 = getelementptr inbounds float* %tmp11577, i64 1
- %tmp11579 = getelementptr inbounds float* %tmp11578, i64 1
- %tmp11580 = getelementptr inbounds float* %tmp11579, i64 1
- %tmp11581 = getelementptr inbounds float* %tmp11580, i64 1
- %tmp11582 = getelementptr inbounds float* %tmp11581, i64 1
- %tmp11583 = getelementptr inbounds float* %tmp11582, i64 1
- %tmp11584 = getelementptr inbounds float* %tmp11583, i64 1
- %tmp11585 = getelementptr inbounds float* %tmp11584, i64 1
- %tmp11586 = getelementptr inbounds float* %tmp11585, i64 1
- %tmp11587 = getelementptr inbounds float* %tmp11586, i64 1
- %tmp11588 = getelementptr inbounds float* %tmp11587, i64 1
- %tmp11589 = getelementptr inbounds float* %tmp11588, i64 1
- %tmp11590 = getelementptr inbounds float* %tmp11589, i64 1
- %tmp11591 = getelementptr inbounds float* %tmp11590, i64 1
- %tmp11592 = getelementptr inbounds float* %tmp11591, i64 1
- %tmp11593 = getelementptr inbounds float* %tmp11592, i64 1
- %tmp11594 = getelementptr inbounds float* %tmp11593, i64 1
- %tmp11595 = getelementptr inbounds float* %tmp11594, i64 1
- %tmp11596 = getelementptr inbounds float* %tmp11595, i64 1
- %tmp11597 = getelementptr inbounds float* %tmp11596, i64 1
- %tmp11598 = getelementptr inbounds float* %tmp11597, i64 1
- %tmp11599 = getelementptr inbounds float* %tmp11598, i64 1
- %tmp11600 = getelementptr inbounds float* %tmp11599, i64 1
- %tmp11601 = getelementptr inbounds float* %tmp11600, i64 1
- %tmp11602 = getelementptr inbounds float* %tmp11601, i64 1
- %tmp11603 = getelementptr inbounds float* %tmp11602, i64 1
- %tmp11604 = getelementptr inbounds float* %tmp11603, i64 1
- %tmp11605 = getelementptr inbounds float* %tmp11604, i64 1
- %tmp11606 = getelementptr inbounds float* %tmp11605, i64 1
- %tmp11607 = getelementptr inbounds float* %tmp11606, i64 1
- %tmp11608 = getelementptr inbounds float* %tmp11607, i64 1
- %tmp11609 = getelementptr inbounds float* %tmp11608, i64 1
- %tmp11610 = getelementptr inbounds float* %tmp11609, i64 1
- %tmp11611 = getelementptr inbounds float* %tmp11610, i64 1
- %tmp11612 = getelementptr inbounds float* %tmp11611, i64 1
- %tmp11613 = getelementptr inbounds float* %tmp11612, i64 1
- %tmp11614 = getelementptr inbounds float* %tmp11613, i64 1
- %tmp11615 = getelementptr inbounds float* %tmp11614, i64 1
- %tmp11616 = getelementptr inbounds float* %tmp11615, i64 1
- %tmp11617 = getelementptr inbounds float* %tmp11616, i64 1
- %tmp11618 = getelementptr inbounds float* %tmp11617, i64 1
- %tmp11619 = getelementptr inbounds float* %tmp11618, i64 1
- %tmp11620 = getelementptr inbounds float* %tmp11619, i64 1
- %tmp11621 = getelementptr inbounds float* %tmp11620, i64 1
- %tmp11622 = getelementptr inbounds float* %tmp11621, i64 1
- %tmp11623 = getelementptr inbounds float* %tmp11622, i64 1
- %tmp11624 = getelementptr inbounds float* %tmp11623, i64 1
- %tmp11625 = getelementptr inbounds float* %tmp11624, i64 1
- %tmp11626 = getelementptr inbounds float* %tmp11625, i64 1
- %tmp11627 = getelementptr inbounds float* %tmp11626, i64 1
- %tmp11628 = getelementptr inbounds float* %tmp11627, i64 1
- %tmp11629 = getelementptr inbounds float* %tmp11628, i64 1
- %tmp11630 = getelementptr inbounds float* %tmp11629, i64 1
- %tmp11631 = getelementptr inbounds float* %tmp11630, i64 1
- %tmp11632 = getelementptr inbounds float* %tmp11631, i64 1
- %tmp11633 = getelementptr inbounds float* %tmp11632, i64 1
- %tmp11634 = getelementptr inbounds float* %tmp11633, i64 1
- %tmp11635 = getelementptr inbounds float* %tmp11634, i64 1
- %tmp11636 = getelementptr inbounds float* %tmp11635, i64 1
- %tmp11637 = getelementptr inbounds float* %tmp11636, i64 1
- %tmp11638 = getelementptr inbounds float* %tmp11637, i64 1
- %tmp11639 = getelementptr inbounds float* %tmp11638, i64 1
- %tmp11640 = getelementptr inbounds float* %tmp11639, i64 1
- %tmp11641 = getelementptr inbounds float* %tmp11640, i64 1
- %tmp11642 = getelementptr inbounds float* %tmp11641, i64 1
- %tmp11643 = getelementptr inbounds float* %tmp11642, i64 1
- %tmp11644 = getelementptr inbounds float* %tmp11643, i64 1
- %tmp11645 = getelementptr inbounds float* %tmp11644, i64 1
- %tmp11646 = getelementptr inbounds float* %tmp11645, i64 1
- %tmp11647 = getelementptr inbounds float* %tmp11646, i64 1
- %tmp11648 = getelementptr inbounds float* %tmp11647, i64 1
- %tmp11649 = getelementptr inbounds float* %tmp11648, i64 1
- %tmp11650 = getelementptr inbounds float* %tmp11649, i64 1
- %tmp11651 = getelementptr inbounds float* %tmp11650, i64 1
- %tmp11652 = getelementptr inbounds float* %tmp11651, i64 1
- %tmp11653 = getelementptr inbounds float* %tmp11652, i64 1
- %tmp11654 = getelementptr inbounds float* %tmp11653, i64 1
- %tmp11655 = getelementptr inbounds float* %tmp11654, i64 1
- %tmp11656 = getelementptr inbounds float* %tmp11655, i64 1
- %tmp11657 = getelementptr inbounds float* %tmp11656, i64 1
- %tmp11658 = getelementptr inbounds float* %tmp11657, i64 1
- %tmp11659 = getelementptr inbounds float* %tmp11658, i64 1
- %tmp11660 = getelementptr inbounds float* %tmp11659, i64 1
- %tmp11661 = getelementptr inbounds float* %tmp11660, i64 1
- %tmp11662 = getelementptr inbounds float* %tmp11661, i64 1
- %tmp11663 = getelementptr inbounds float* %tmp11662, i64 1
- %tmp11664 = getelementptr inbounds float* %tmp11663, i64 1
- %tmp11665 = getelementptr inbounds float* %tmp11664, i64 1
- %tmp11666 = getelementptr inbounds float* %tmp11665, i64 1
- %tmp11667 = getelementptr inbounds float* %tmp11666, i64 1
- %tmp11668 = getelementptr inbounds float* %tmp11667, i64 1
- %tmp11669 = getelementptr inbounds float* %tmp11668, i64 1
- %tmp11670 = getelementptr inbounds float* %tmp11669, i64 1
- %tmp11671 = getelementptr inbounds float* %tmp11670, i64 1
- %tmp11672 = getelementptr inbounds float* %tmp11671, i64 1
- %tmp11673 = getelementptr inbounds float* %tmp11672, i64 1
- %tmp11674 = getelementptr inbounds float* %tmp11673, i64 1
- %tmp11675 = getelementptr inbounds float* %tmp11674, i64 1
- %tmp11676 = getelementptr inbounds float* %tmp11675, i64 1
- %tmp11677 = getelementptr inbounds float* %tmp11676, i64 1
- %tmp11678 = getelementptr inbounds float* %tmp11677, i64 1
- %tmp11679 = getelementptr inbounds float* %tmp11678, i64 1
- %tmp11680 = getelementptr inbounds float* %tmp11679, i64 1
- %tmp11681 = getelementptr inbounds float* %tmp11680, i64 1
- %tmp11682 = getelementptr inbounds float* %tmp11681, i64 1
- %tmp11683 = getelementptr inbounds float* %tmp11682, i64 1
- %tmp11684 = getelementptr inbounds float* %tmp11683, i64 1
- %tmp11685 = getelementptr inbounds float* %tmp11684, i64 1
- %tmp11686 = getelementptr inbounds float* %tmp11685, i64 1
- %tmp11687 = getelementptr inbounds float* %tmp11686, i64 1
- %tmp11688 = getelementptr inbounds float* %tmp11687, i64 1
- %tmp11689 = getelementptr inbounds float* %tmp11688, i64 1
- %tmp11690 = getelementptr inbounds float* %tmp11689, i64 1
- %tmp11691 = getelementptr inbounds float* %tmp11690, i64 1
- %tmp11692 = getelementptr inbounds float* %tmp11691, i64 1
- %tmp11693 = getelementptr inbounds float* %tmp11692, i64 1
- %tmp11694 = getelementptr inbounds float* %tmp11693, i64 1
- %tmp11695 = getelementptr inbounds float* %tmp11694, i64 1
- %tmp11696 = getelementptr inbounds float* %tmp11695, i64 1
- %tmp11697 = getelementptr inbounds float* %tmp11696, i64 1
- %tmp11698 = getelementptr inbounds float* %tmp11697, i64 1
- %tmp11699 = getelementptr inbounds float* %tmp11698, i64 1
- %tmp11700 = getelementptr inbounds float* %tmp11699, i64 1
- %tmp11701 = getelementptr inbounds float* %tmp11700, i64 1
- %tmp11702 = getelementptr inbounds float* %tmp11701, i64 1
- %tmp11703 = getelementptr inbounds float* %tmp11702, i64 1
- %tmp11704 = getelementptr inbounds float* %tmp11703, i64 1
- %tmp11705 = getelementptr inbounds float* %tmp11704, i64 1
- %tmp11706 = getelementptr inbounds float* %tmp11705, i64 1
- %tmp11707 = getelementptr inbounds float* %tmp11706, i64 1
- %tmp11708 = getelementptr inbounds float* %tmp11707, i64 1
- %tmp11709 = getelementptr inbounds float* %tmp11708, i64 1
- %tmp11710 = getelementptr inbounds float* %tmp11709, i64 1
- %tmp11711 = getelementptr inbounds float* %tmp11710, i64 1
- %tmp11712 = getelementptr inbounds float* %tmp11711, i64 1
- %tmp11713 = getelementptr inbounds float* %tmp11712, i64 1
- %tmp11714 = getelementptr inbounds float* %tmp11713, i64 1
- %tmp11715 = getelementptr inbounds float* %tmp11714, i64 1
- %tmp11716 = getelementptr inbounds float* %tmp11715, i64 1
- %tmp11717 = getelementptr inbounds float* %tmp11716, i64 1
- %tmp11718 = getelementptr inbounds float* %tmp11717, i64 1
- %tmp11719 = getelementptr inbounds float* %tmp11718, i64 1
- %tmp11720 = getelementptr inbounds float* %tmp11719, i64 1
- %tmp11721 = getelementptr inbounds float* %tmp11720, i64 1
- %tmp11722 = getelementptr inbounds float* %tmp11721, i64 1
- %tmp11723 = getelementptr inbounds float* %tmp11722, i64 1
- %tmp11724 = getelementptr inbounds float* %tmp11723, i64 1
- %tmp11725 = getelementptr inbounds float* %tmp11724, i64 1
- %tmp11726 = getelementptr inbounds float* %tmp11725, i64 1
- %tmp11727 = getelementptr inbounds float* %tmp11726, i64 1
- %tmp11728 = getelementptr inbounds float* %tmp11727, i64 1
- %tmp11729 = getelementptr inbounds float* %tmp11728, i64 1
- %tmp11730 = getelementptr inbounds float* %tmp11729, i64 1
- %tmp11731 = getelementptr inbounds float* %tmp11730, i64 1
- %tmp11732 = getelementptr inbounds float* %tmp11731, i64 1
- %tmp11733 = getelementptr inbounds float* %tmp11732, i64 1
- %tmp11734 = getelementptr inbounds float* %tmp11733, i64 1
- %tmp11735 = getelementptr inbounds float* %tmp11734, i64 1
- %tmp11736 = getelementptr inbounds float* %tmp11735, i64 1
- %tmp11737 = getelementptr inbounds float* %tmp11736, i64 1
- %tmp11738 = getelementptr inbounds float* %tmp11737, i64 1
- %tmp11739 = getelementptr inbounds float* %tmp11738, i64 1
- %tmp11740 = getelementptr inbounds float* %tmp11739, i64 1
- %tmp11741 = getelementptr inbounds float* %tmp11740, i64 1
- %tmp11742 = getelementptr inbounds float* %tmp11741, i64 1
- %tmp11743 = getelementptr inbounds float* %tmp11742, i64 1
- %tmp11744 = getelementptr inbounds float* %tmp11743, i64 1
- %tmp11745 = getelementptr inbounds float* %tmp11744, i64 1
- %tmp11746 = getelementptr inbounds float* %tmp11745, i64 1
- %tmp11747 = getelementptr inbounds float* %tmp11746, i64 1
- %tmp11748 = getelementptr inbounds float* %tmp11747, i64 1
- %tmp11749 = getelementptr inbounds float* %tmp11748, i64 1
- %tmp11750 = getelementptr inbounds float* %tmp11749, i64 1
- %tmp11751 = getelementptr inbounds float* %tmp11750, i64 1
- %tmp11752 = getelementptr inbounds float* %tmp11751, i64 1
- %tmp11753 = getelementptr inbounds float* %tmp11752, i64 1
- %tmp11754 = getelementptr inbounds float* %tmp11753, i64 1
- %tmp11755 = getelementptr inbounds float* %tmp11754, i64 1
- %tmp11756 = getelementptr inbounds float* %tmp11755, i64 1
- %tmp11757 = getelementptr inbounds float* %tmp11756, i64 1
- %tmp11758 = getelementptr inbounds float* %tmp11757, i64 1
- %tmp11759 = getelementptr inbounds float* %tmp11758, i64 1
- %tmp11760 = getelementptr inbounds float* %tmp11759, i64 1
- %tmp11761 = getelementptr inbounds float* %tmp11760, i64 1
- %tmp11762 = getelementptr inbounds float* %tmp11761, i64 1
- %tmp11763 = getelementptr inbounds float* %tmp11762, i64 1
- %tmp11764 = getelementptr inbounds float* %tmp11763, i64 1
- %tmp11765 = getelementptr inbounds float* %tmp11764, i64 1
- %tmp11766 = getelementptr inbounds float* %tmp11765, i64 1
- %tmp11767 = getelementptr inbounds float* %tmp11766, i64 1
- %tmp11768 = getelementptr inbounds float* %tmp11767, i64 1
- %tmp11769 = getelementptr inbounds float* %tmp11768, i64 1
- %tmp11770 = getelementptr inbounds float* %tmp11769, i64 1
- %tmp11771 = getelementptr inbounds float* %tmp11770, i64 1
- %tmp11772 = getelementptr inbounds float* %tmp11771, i64 1
- %tmp11773 = getelementptr inbounds float* %tmp11772, i64 1
- %tmp11774 = getelementptr inbounds float* %tmp11773, i64 1
- %tmp11775 = getelementptr inbounds float* %tmp11774, i64 1
- %tmp11776 = getelementptr inbounds float* %tmp11775, i64 1
- %tmp11777 = getelementptr inbounds float* %tmp11776, i64 1
- %tmp11778 = getelementptr inbounds float* %tmp11777, i64 1
- %tmp11779 = getelementptr inbounds float* %tmp11778, i64 1
- %tmp11780 = getelementptr inbounds float* %tmp11779, i64 1
- %tmp11781 = getelementptr inbounds float* %tmp11780, i64 1
- %tmp11782 = getelementptr inbounds float* %tmp11781, i64 1
- %tmp11783 = getelementptr inbounds float* %tmp11782, i64 1
- %tmp11784 = getelementptr inbounds float* %tmp11783, i64 1
- %tmp11785 = getelementptr inbounds float* %tmp11784, i64 1
- %tmp11786 = getelementptr inbounds float* %tmp11785, i64 1
- %tmp11787 = getelementptr inbounds float* %tmp11786, i64 1
- %tmp11788 = getelementptr inbounds float* %tmp11787, i64 1
- %tmp11789 = getelementptr inbounds float* %tmp11788, i64 1
- %tmp11790 = getelementptr inbounds float* %tmp11789, i64 1
- %tmp11791 = getelementptr inbounds float* %tmp11790, i64 1
- %tmp11792 = getelementptr inbounds float* %tmp11791, i64 1
- %tmp11793 = getelementptr inbounds float* %tmp11792, i64 1
- %tmp11794 = getelementptr inbounds float* %tmp11793, i64 1
- %tmp11795 = getelementptr inbounds float* %tmp11794, i64 1
- %tmp11796 = getelementptr inbounds float* %tmp11795, i64 1
- %tmp11797 = getelementptr inbounds float* %tmp11796, i64 1
- %tmp11798 = getelementptr inbounds float* %tmp11797, i64 1
- %tmp11799 = getelementptr inbounds float* %tmp11798, i64 1
- %tmp11800 = getelementptr inbounds float* %tmp11799, i64 1
- %tmp11801 = getelementptr inbounds float* %tmp11800, i64 1
- %tmp11802 = getelementptr inbounds float* %tmp11801, i64 1
- %tmp11803 = getelementptr inbounds float* %tmp11802, i64 1
- %tmp11804 = getelementptr inbounds float* %tmp11803, i64 1
- %tmp11805 = getelementptr inbounds float* %tmp11804, i64 1
- %tmp11806 = getelementptr inbounds float* %tmp11805, i64 1
- %tmp11807 = getelementptr inbounds float* %tmp11806, i64 1
- %tmp11808 = getelementptr inbounds float* %tmp11807, i64 1
- %tmp11809 = getelementptr inbounds float* %tmp11808, i64 1
- %tmp11810 = getelementptr inbounds float* %tmp11809, i64 1
- %tmp11811 = getelementptr inbounds float* %tmp11810, i64 1
- %tmp11812 = getelementptr inbounds float* %tmp11811, i64 1
- %tmp11813 = getelementptr inbounds float* %tmp11812, i64 1
- %tmp11814 = getelementptr inbounds float* %tmp11813, i64 1
- %tmp11815 = getelementptr inbounds float* %tmp11814, i64 1
- %tmp11816 = getelementptr inbounds float* %tmp11815, i64 1
- %tmp11817 = getelementptr inbounds float* %tmp11816, i64 1
- %tmp11818 = getelementptr inbounds float* %tmp11817, i64 1
- %tmp11819 = getelementptr inbounds float* %tmp11818, i64 1
- %tmp11820 = getelementptr inbounds float* %tmp11819, i64 1
- %tmp11821 = getelementptr inbounds float* %tmp11820, i64 1
- %tmp11822 = getelementptr inbounds float* %tmp11821, i64 1
- %tmp11823 = getelementptr inbounds float* %tmp11822, i64 1
- %tmp11824 = getelementptr inbounds float* %tmp11823, i64 1
- %tmp11825 = getelementptr inbounds float* %tmp11824, i64 1
- %tmp11826 = getelementptr inbounds float* %tmp11825, i64 1
- %tmp11827 = getelementptr inbounds float* %tmp11826, i64 1
- %tmp11828 = getelementptr inbounds float* %tmp11827, i64 1
- %tmp11829 = getelementptr inbounds float* %tmp11828, i64 1
- %tmp11830 = getelementptr inbounds float* %tmp11829, i64 1
- %tmp11831 = getelementptr inbounds float* %tmp11830, i64 1
- %tmp11832 = getelementptr inbounds float* %tmp11831, i64 1
- %tmp11833 = getelementptr inbounds float* %tmp11832, i64 1
- %tmp11834 = getelementptr inbounds float* %tmp11833, i64 1
- %tmp11835 = getelementptr inbounds float* %tmp11834, i64 1
- %tmp11836 = getelementptr inbounds float* %tmp11835, i64 1
- %tmp11837 = getelementptr inbounds float* %tmp11836, i64 1
- %tmp11838 = getelementptr inbounds float* %tmp11837, i64 1
- %tmp11839 = getelementptr inbounds float* %tmp11838, i64 1
- %tmp11840 = getelementptr inbounds float* %tmp11839, i64 1
- %tmp11841 = getelementptr inbounds float* %tmp11840, i64 1
- %tmp11842 = getelementptr inbounds float* %tmp11841, i64 1
- %tmp11843 = getelementptr inbounds float* %tmp11842, i64 1
- %tmp11844 = getelementptr inbounds float* %tmp11843, i64 1
- %tmp11845 = getelementptr inbounds float* %tmp11844, i64 1
- %tmp11846 = getelementptr inbounds float* %tmp11845, i64 1
- %tmp11847 = getelementptr inbounds float* %tmp11846, i64 1
- %tmp11848 = getelementptr inbounds float* %tmp11847, i64 1
- %tmp11849 = getelementptr inbounds float* %tmp11848, i64 1
- %tmp11850 = getelementptr inbounds float* %tmp11849, i64 1
- %tmp11851 = getelementptr inbounds float* %tmp11850, i64 1
- %tmp11852 = getelementptr inbounds float* %tmp11851, i64 1
- %tmp11853 = getelementptr inbounds float* %tmp11852, i64 1
- %tmp11854 = getelementptr inbounds float* %tmp11853, i64 1
- %tmp11855 = getelementptr inbounds float* %tmp11854, i64 1
- %tmp11856 = getelementptr inbounds float* %tmp11855, i64 1
- %tmp11857 = getelementptr inbounds float* %tmp11856, i64 1
- %tmp11858 = getelementptr inbounds float* %tmp11857, i64 1
- %tmp11859 = getelementptr inbounds float* %tmp11858, i64 1
- %tmp11860 = getelementptr inbounds float* %tmp11859, i64 1
- %tmp11861 = getelementptr inbounds float* %tmp11860, i64 1
- %tmp11862 = getelementptr inbounds float* %tmp11861, i64 1
- %tmp11863 = getelementptr inbounds float* %tmp11862, i64 1
- %tmp11864 = getelementptr inbounds float* %tmp11863, i64 1
- %tmp11865 = getelementptr inbounds float* %tmp11864, i64 1
- %tmp11866 = getelementptr inbounds float* %tmp11865, i64 1
- %tmp11867 = getelementptr inbounds float* %tmp11866, i64 1
- %tmp11868 = getelementptr inbounds float* %tmp11867, i64 1
- %tmp11869 = getelementptr inbounds float* %tmp11868, i64 1
- %tmp11870 = getelementptr inbounds float* %tmp11869, i64 1
- %tmp11871 = getelementptr inbounds float* %tmp11870, i64 1
- %tmp11872 = getelementptr inbounds float* %tmp11871, i64 1
- %tmp11873 = getelementptr inbounds float* %tmp11872, i64 1
- %tmp11874 = getelementptr inbounds float* %tmp11873, i64 1
- %tmp11875 = getelementptr inbounds float* %tmp11874, i64 1
- %tmp11876 = getelementptr inbounds float* %tmp11875, i64 1
- %tmp11877 = getelementptr inbounds float* %tmp11876, i64 1
- %tmp11878 = getelementptr inbounds float* %tmp11877, i64 1
- %tmp11879 = getelementptr inbounds float* %tmp11878, i64 1
- %tmp11880 = getelementptr inbounds float* %tmp11879, i64 1
- %tmp11881 = getelementptr inbounds float* %tmp11880, i64 1
- %tmp11882 = getelementptr inbounds float* %tmp11881, i64 1
- %tmp11883 = getelementptr inbounds float* %tmp11882, i64 1
- %tmp11884 = getelementptr inbounds float* %tmp11883, i64 1
- %tmp11885 = getelementptr inbounds float* %tmp11884, i64 1
- %tmp11886 = getelementptr inbounds float* %tmp11885, i64 1
- %tmp11887 = getelementptr inbounds float* %tmp11886, i64 1
- %tmp11888 = getelementptr inbounds float* %tmp11887, i64 1
- %tmp11889 = getelementptr inbounds float* %tmp11888, i64 1
- %tmp11890 = getelementptr inbounds float* %tmp11889, i64 1
- %tmp11891 = getelementptr inbounds float* %tmp11890, i64 1
- %tmp11892 = getelementptr inbounds float* %tmp11891, i64 1
- %tmp11893 = getelementptr inbounds float* %tmp11892, i64 1
- %tmp11894 = getelementptr inbounds float* %tmp11893, i64 1
- %tmp11895 = getelementptr inbounds float* %tmp11894, i64 1
- %tmp11896 = getelementptr inbounds float* %tmp11895, i64 1
- %tmp11897 = getelementptr inbounds float* %tmp11896, i64 1
- %tmp11898 = getelementptr inbounds float* %tmp11897, i64 1
- %tmp11899 = getelementptr inbounds float* %tmp11898, i64 1
- %tmp11900 = getelementptr inbounds float* %tmp11899, i64 1
- %tmp11901 = getelementptr inbounds float* %tmp11900, i64 1
- %tmp11902 = getelementptr inbounds float* %tmp11901, i64 1
- %tmp11903 = getelementptr inbounds float* %tmp11902, i64 1
- %tmp11904 = getelementptr inbounds float* %tmp11903, i64 1
- %tmp11905 = getelementptr inbounds float* %tmp11904, i64 1
- %tmp11906 = getelementptr inbounds float* %tmp11905, i64 1
- %tmp11907 = getelementptr inbounds float* %tmp11906, i64 1
- %tmp11908 = getelementptr inbounds float* %tmp11907, i64 1
- %tmp11909 = getelementptr inbounds float* %tmp11908, i64 1
- %tmp11910 = getelementptr inbounds float* %tmp11909, i64 1
- %tmp11911 = getelementptr inbounds float* %tmp11910, i64 1
- %tmp11912 = getelementptr inbounds float* %tmp11911, i64 1
- %tmp11913 = getelementptr inbounds float* %tmp11912, i64 1
- %tmp11914 = getelementptr inbounds float* %tmp11913, i64 1
- %tmp11915 = getelementptr inbounds float* %tmp11914, i64 1
- %tmp11916 = getelementptr inbounds float* %tmp11915, i64 1
- %tmp11917 = getelementptr inbounds float* %tmp11916, i64 1
- %tmp11918 = getelementptr inbounds float* %tmp11917, i64 1
- %tmp11919 = getelementptr inbounds float* %tmp11918, i64 1
- %tmp11920 = getelementptr inbounds float* %tmp11919, i64 1
- %tmp11921 = getelementptr inbounds float* %tmp11920, i64 1
- %tmp11922 = getelementptr inbounds float* %tmp11921, i64 1
- %tmp11923 = getelementptr inbounds float* %tmp11922, i64 1
- %tmp11924 = getelementptr inbounds float* %tmp11923, i64 1
- %tmp11925 = getelementptr inbounds float* %tmp11924, i64 1
- %tmp11926 = getelementptr inbounds float* %tmp11925, i64 1
- %tmp11927 = getelementptr inbounds float* %tmp11926, i64 1
- %tmp11928 = getelementptr inbounds float* %tmp11927, i64 1
- %tmp11929 = getelementptr inbounds float* %tmp11928, i64 1
- %tmp11930 = getelementptr inbounds float* %tmp11929, i64 1
- %tmp11931 = getelementptr inbounds float* %tmp11930, i64 1
- %tmp11932 = getelementptr inbounds float* %tmp11931, i64 1
- %tmp11933 = getelementptr inbounds float* %tmp11932, i64 1
- %tmp11934 = getelementptr inbounds float* %tmp11933, i64 1
- %tmp11935 = getelementptr inbounds float* %tmp11934, i64 1
- %tmp11936 = getelementptr inbounds float* %tmp11935, i64 1
- %tmp11937 = getelementptr inbounds float* %tmp11936, i64 1
- %tmp11938 = getelementptr inbounds float* %tmp11937, i64 1
- %tmp11939 = getelementptr inbounds float* %tmp11938, i64 1
- %tmp11940 = getelementptr inbounds float* %tmp11939, i64 1
- %tmp11941 = getelementptr inbounds float* %tmp11940, i64 1
- %tmp11942 = getelementptr inbounds float* %tmp11941, i64 1
- %tmp11943 = getelementptr inbounds float* %tmp11942, i64 1
- %tmp11944 = getelementptr inbounds float* %tmp11943, i64 1
- %tmp11945 = getelementptr inbounds float* %tmp11944, i64 1
- %tmp11946 = getelementptr inbounds float* %tmp11945, i64 1
- %tmp11947 = getelementptr inbounds float* %tmp11946, i64 1
- %tmp11948 = getelementptr inbounds float* %tmp11947, i64 1
- %tmp11949 = getelementptr inbounds float* %tmp11948, i64 1
- %tmp11950 = getelementptr inbounds float* %tmp11949, i64 1
- %tmp11951 = getelementptr inbounds float* %tmp11950, i64 1
- %tmp11952 = getelementptr inbounds float* %tmp11951, i64 1
- %tmp11953 = getelementptr inbounds float* %tmp11952, i64 1
- %tmp11954 = getelementptr inbounds float* %tmp11953, i64 1
- %tmp11955 = getelementptr inbounds float* %tmp11954, i64 1
- %tmp11956 = getelementptr inbounds float* %tmp11955, i64 1
- %tmp11957 = getelementptr inbounds float* %tmp11956, i64 1
- %tmp11958 = getelementptr inbounds float* %tmp11957, i64 1
- %tmp11959 = getelementptr inbounds float* %tmp11958, i64 1
- %tmp11960 = getelementptr inbounds float* %tmp11959, i64 1
- %tmp11961 = getelementptr inbounds float* %tmp11960, i64 1
- %tmp11962 = getelementptr inbounds float* %tmp11961, i64 1
- %tmp11963 = getelementptr inbounds float* %tmp11962, i64 1
- %tmp11964 = getelementptr inbounds float* %tmp11963, i64 1
- %tmp11965 = getelementptr inbounds float* %tmp11964, i64 1
- %tmp11966 = getelementptr inbounds float* %tmp11965, i64 1
- %tmp11967 = getelementptr inbounds float* %tmp11966, i64 1
- %tmp11968 = getelementptr inbounds float* %tmp11967, i64 1
- %tmp11969 = getelementptr inbounds float* %tmp11968, i64 1
- %tmp11970 = getelementptr inbounds float* %tmp11969, i64 1
- %tmp11971 = getelementptr inbounds float* %tmp11970, i64 1
- %tmp11972 = getelementptr inbounds float* %tmp11971, i64 1
- %tmp11973 = getelementptr inbounds float* %tmp11972, i64 1
- %tmp11974 = getelementptr inbounds float* %tmp11973, i64 1
- %tmp11975 = getelementptr inbounds float* %tmp11974, i64 1
- %tmp11976 = getelementptr inbounds float* %tmp11975, i64 1
- %tmp11977 = getelementptr inbounds float* %tmp11976, i64 1
- %tmp11978 = getelementptr inbounds float* %tmp11977, i64 1
- %tmp11979 = getelementptr inbounds float* %tmp11978, i64 1
- %tmp11980 = getelementptr inbounds float* %tmp11979, i64 1
- %tmp11981 = getelementptr inbounds float* %tmp11980, i64 1
- %tmp11982 = getelementptr inbounds float* %tmp11981, i64 1
- %tmp11983 = getelementptr inbounds float* %tmp11982, i64 1
- %tmp11984 = getelementptr inbounds float* %tmp11983, i64 1
- %tmp11985 = getelementptr inbounds float* %tmp11984, i64 1
- %tmp11986 = getelementptr inbounds float* %tmp11985, i64 1
- %tmp11987 = getelementptr inbounds float* %tmp11986, i64 1
- %tmp11988 = getelementptr inbounds float* %tmp11987, i64 1
- %tmp11989 = getelementptr inbounds float* %tmp11988, i64 1
- %tmp11990 = getelementptr inbounds float* %tmp11989, i64 1
- %tmp11991 = getelementptr inbounds float* %tmp11990, i64 1
- %tmp11992 = getelementptr inbounds float* %tmp11991, i64 1
- %tmp11993 = getelementptr inbounds float* %tmp11992, i64 1
- %tmp11994 = getelementptr inbounds float* %tmp11993, i64 1
- %tmp11995 = getelementptr inbounds float* %tmp11994, i64 1
- %tmp11996 = getelementptr inbounds float* %tmp11995, i64 1
- %tmp11997 = getelementptr inbounds float* %tmp11996, i64 1
- %tmp11998 = getelementptr inbounds float* %tmp11997, i64 1
- %tmp11999 = getelementptr inbounds float* %tmp11998, i64 1
- %tmp12000 = getelementptr inbounds float* %tmp11999, i64 1
- %tmp12001 = getelementptr inbounds float* %tmp12000, i64 1
- %tmp12002 = getelementptr inbounds float* %tmp12001, i64 1
- %tmp12003 = getelementptr inbounds float* %tmp12002, i64 1
- %tmp12004 = getelementptr inbounds float* %tmp12003, i64 1
- %tmp12005 = getelementptr inbounds float* %tmp12004, i64 1
- %tmp12006 = getelementptr inbounds float* %tmp12005, i64 1
- %tmp12007 = getelementptr inbounds float* %tmp12006, i64 1
- %tmp12008 = getelementptr inbounds float* %tmp12007, i64 1
- %tmp12009 = getelementptr inbounds float* %tmp12008, i64 1
- %tmp12010 = getelementptr inbounds float* %tmp12009, i64 1
- %tmp12011 = getelementptr inbounds float* %tmp12010, i64 1
- %tmp12012 = getelementptr inbounds float* %tmp12011, i64 1
- %tmp12013 = getelementptr inbounds float* %tmp12012, i64 1
- %tmp12014 = getelementptr inbounds float* %tmp12013, i64 1
- %tmp12015 = getelementptr inbounds float* %tmp12014, i64 1
- %tmp12016 = getelementptr inbounds float* %tmp12015, i64 1
- %tmp12017 = getelementptr inbounds float* %tmp12016, i64 1
- %tmp12018 = getelementptr inbounds float* %tmp12017, i64 1
- %tmp12019 = getelementptr inbounds float* %tmp12018, i64 1
- %tmp12020 = getelementptr inbounds float* %tmp12019, i64 1
- %tmp12021 = getelementptr inbounds float* %tmp12020, i64 1
- %tmp12022 = getelementptr inbounds float* %tmp12021, i64 1
- %tmp12023 = getelementptr inbounds float* %tmp12022, i64 1
- %tmp12024 = getelementptr inbounds float* %tmp12023, i64 1
- %tmp12025 = getelementptr inbounds float* %tmp12024, i64 1
- %tmp12026 = getelementptr inbounds float* %tmp12025, i64 1
- %tmp12027 = getelementptr inbounds float* %tmp12026, i64 1
- %tmp12028 = getelementptr inbounds float* %tmp12027, i64 1
- %tmp12029 = getelementptr inbounds float* %tmp12028, i64 1
- %tmp12030 = getelementptr inbounds float* %tmp12029, i64 1
- %tmp12031 = getelementptr inbounds float* %tmp12030, i64 1
- %tmp12032 = getelementptr inbounds float* %tmp12031, i64 1
- %tmp12033 = getelementptr inbounds float* %tmp12032, i64 1
- %tmp12034 = getelementptr inbounds float* %tmp12033, i64 1
- %tmp12035 = getelementptr inbounds float* %tmp12034, i64 1
- %tmp12036 = getelementptr inbounds float* %tmp12035, i64 1
- %tmp12037 = getelementptr inbounds float* %tmp12036, i64 1
- %tmp12038 = getelementptr inbounds float* %tmp12037, i64 1
- %tmp12039 = getelementptr inbounds float* %tmp12038, i64 1
- %tmp12040 = getelementptr inbounds float* %tmp12039, i64 1
- %tmp12041 = getelementptr inbounds float* %tmp12040, i64 1
- %tmp12042 = getelementptr inbounds float* %tmp12041, i64 1
- %tmp12043 = getelementptr inbounds float* %tmp12042, i64 1
- %tmp12044 = getelementptr inbounds float* %tmp12043, i64 1
- %tmp12045 = getelementptr inbounds float* %tmp12044, i64 1
- %tmp12046 = getelementptr inbounds float* %tmp12045, i64 1
- %tmp12047 = getelementptr inbounds float* %tmp12046, i64 1
- %tmp12048 = getelementptr inbounds float* %tmp12047, i64 1
- %tmp12049 = getelementptr inbounds float* %tmp12048, i64 1
- %tmp12050 = getelementptr inbounds float* %tmp12049, i64 1
- %tmp12051 = getelementptr inbounds float* %tmp12050, i64 1
- %tmp12052 = getelementptr inbounds float* %tmp12051, i64 1
- %tmp12053 = getelementptr inbounds float* %tmp12052, i64 1
- %tmp12054 = getelementptr inbounds float* %tmp12053, i64 1
- %tmp12055 = getelementptr inbounds float* %tmp12054, i64 1
- %tmp12056 = getelementptr inbounds float* %tmp12055, i64 1
- %tmp12057 = getelementptr inbounds float* %tmp12056, i64 1
- %tmp12058 = getelementptr inbounds float* %tmp12057, i64 1
- %tmp12059 = getelementptr inbounds float* %tmp12058, i64 1
- %tmp12060 = getelementptr inbounds float* %tmp12059, i64 1
- %tmp12061 = getelementptr inbounds float* %tmp12060, i64 1
- %tmp12062 = getelementptr inbounds float* %tmp12061, i64 1
- %tmp12063 = getelementptr inbounds float* %tmp12062, i64 1
- %tmp12064 = getelementptr inbounds float* %tmp12063, i64 1
- %tmp12065 = getelementptr inbounds float* %tmp12064, i64 1
- %tmp12066 = getelementptr inbounds float* %tmp12065, i64 1
- %tmp12067 = getelementptr inbounds float* %tmp12066, i64 1
- %tmp12068 = getelementptr inbounds float* %tmp12067, i64 1
- %tmp12069 = getelementptr inbounds float* %tmp12068, i64 1
- %tmp12070 = getelementptr inbounds float* %tmp12069, i64 1
- %tmp12071 = getelementptr inbounds float* %tmp12070, i64 1
- %tmp12072 = getelementptr inbounds float* %tmp12071, i64 1
- %tmp12073 = getelementptr inbounds float* %tmp12072, i64 1
- %tmp12074 = getelementptr inbounds float* %tmp12073, i64 1
- %tmp12075 = getelementptr inbounds float* %tmp12074, i64 1
- %tmp12076 = getelementptr inbounds float* %tmp12075, i64 1
- %tmp12077 = getelementptr inbounds float* %tmp12076, i64 1
- %tmp12078 = getelementptr inbounds float* %tmp12077, i64 1
- %tmp12079 = getelementptr inbounds float* %tmp12078, i64 1
- %tmp12080 = getelementptr inbounds float* %tmp12079, i64 1
- %tmp12081 = getelementptr inbounds float* %tmp12080, i64 1
- %tmp12082 = getelementptr inbounds float* %tmp12081, i64 1
- %tmp12083 = getelementptr inbounds float* %tmp12082, i64 1
- %tmp12084 = getelementptr inbounds float* %tmp12083, i64 1
- %tmp12085 = getelementptr inbounds float* %tmp12084, i64 1
- %tmp12086 = getelementptr inbounds float* %tmp12085, i64 1
- %tmp12087 = getelementptr inbounds float* %tmp12086, i64 1
- %tmp12088 = getelementptr inbounds float* %tmp12087, i64 1
- %tmp12089 = getelementptr inbounds float* %tmp12088, i64 1
- %tmp12090 = getelementptr inbounds float* %tmp12089, i64 1
- %tmp12091 = getelementptr inbounds float* %tmp12090, i64 1
- %tmp12092 = getelementptr inbounds float* %tmp12091, i64 1
- %tmp12093 = getelementptr inbounds float* %tmp12092, i64 1
- %tmp12094 = getelementptr inbounds float* %tmp12093, i64 1
- %tmp12095 = getelementptr inbounds float* %tmp12094, i64 1
- %tmp12096 = getelementptr inbounds float* %tmp12095, i64 1
- %tmp12097 = getelementptr inbounds float* %tmp12096, i64 1
- %tmp12098 = getelementptr inbounds float* %tmp12097, i64 1
- %tmp12099 = getelementptr inbounds float* %tmp12098, i64 1
- %tmp12100 = getelementptr inbounds float* %tmp12099, i64 1
- %tmp12101 = getelementptr inbounds float* %tmp12100, i64 1
- %tmp12102 = getelementptr inbounds float* %tmp12101, i64 1
- %tmp12103 = getelementptr inbounds float* %tmp12102, i64 1
- %tmp12104 = getelementptr inbounds float* %tmp12103, i64 1
- %tmp12105 = getelementptr inbounds float* %tmp12104, i64 1
- %tmp12106 = getelementptr inbounds float* %tmp12105, i64 1
- %tmp12107 = getelementptr inbounds float* %tmp12106, i64 1
- %tmp12108 = getelementptr inbounds float* %tmp12107, i64 1
- %tmp12109 = getelementptr inbounds float* %tmp12108, i64 1
- %tmp12110 = getelementptr inbounds float* %tmp12109, i64 1
- %tmp12111 = getelementptr inbounds float* %tmp12110, i64 1
- %tmp12112 = getelementptr inbounds float* %tmp12111, i64 1
- %tmp12113 = getelementptr inbounds float* %tmp12112, i64 1
- %tmp12114 = getelementptr inbounds float* %tmp12113, i64 1
- %tmp12115 = getelementptr inbounds float* %tmp12114, i64 1
- %tmp12116 = getelementptr inbounds float* %tmp12115, i64 1
- %tmp12117 = getelementptr inbounds float* %tmp12116, i64 1
- %tmp12118 = getelementptr inbounds float* %tmp12117, i64 1
- %tmp12119 = getelementptr inbounds float* %tmp12118, i64 1
- %tmp12120 = getelementptr inbounds float* %tmp12119, i64 1
- %tmp12121 = getelementptr inbounds float* %tmp12120, i64 1
- %tmp12122 = getelementptr inbounds float* %tmp12121, i64 1
- %tmp12123 = getelementptr inbounds float* %tmp12122, i64 1
- %tmp12124 = getelementptr inbounds float* %tmp12123, i64 1
- %tmp12125 = getelementptr inbounds float* %tmp12124, i64 1
- %tmp12126 = getelementptr inbounds float* %tmp12125, i64 1
- %tmp12127 = getelementptr inbounds float* %tmp12126, i64 1
- %tmp12128 = getelementptr inbounds float* %tmp12127, i64 1
- %tmp12129 = getelementptr inbounds float* %tmp12128, i64 1
- %tmp12130 = getelementptr inbounds float* %tmp12129, i64 1
- %tmp12131 = getelementptr inbounds float* %tmp12130, i64 1
- %tmp12132 = getelementptr inbounds float* %tmp12131, i64 1
- %tmp12133 = getelementptr inbounds float* %tmp12132, i64 1
- %tmp12134 = getelementptr inbounds float* %tmp12133, i64 1
- %tmp12135 = getelementptr inbounds float* %tmp12134, i64 1
- %tmp12136 = getelementptr inbounds float* %tmp12135, i64 1
- %tmp12137 = getelementptr inbounds float* %tmp12136, i64 1
- %tmp12138 = getelementptr inbounds float* %tmp12137, i64 1
- %tmp12139 = getelementptr inbounds float* %tmp12138, i64 1
- %tmp12140 = getelementptr inbounds float* %tmp12139, i64 1
- %tmp12141 = getelementptr inbounds float* %tmp12140, i64 1
- %tmp12142 = getelementptr inbounds float* %tmp12141, i64 1
- %tmp12143 = getelementptr inbounds float* %tmp12142, i64 1
- %tmp12144 = getelementptr inbounds float* %tmp12143, i64 1
- %tmp12145 = getelementptr inbounds float* %tmp12144, i64 1
- %tmp12146 = getelementptr inbounds float* %tmp12145, i64 1
- %tmp12147 = getelementptr inbounds float* %tmp12146, i64 1
- %tmp12148 = getelementptr inbounds float* %tmp12147, i64 1
- %tmp12149 = getelementptr inbounds float* %tmp12148, i64 1
- %tmp12150 = getelementptr inbounds float* %tmp12149, i64 1
- %tmp12151 = getelementptr inbounds float* %tmp12150, i64 1
- %tmp12152 = getelementptr inbounds float* %tmp12151, i64 1
- %tmp12153 = getelementptr inbounds float* %tmp12152, i64 1
- %tmp12154 = getelementptr inbounds float* %tmp12153, i64 1
- %tmp12155 = getelementptr inbounds float* %tmp12154, i64 1
- %tmp12156 = getelementptr inbounds float* %tmp12155, i64 1
- %tmp12157 = getelementptr inbounds float* %tmp12156, i64 1
- %tmp12158 = getelementptr inbounds float* %tmp12157, i64 1
- %tmp12159 = getelementptr inbounds float* %tmp12158, i64 1
- %tmp12160 = getelementptr inbounds float* %tmp12159, i64 1
- %tmp12161 = getelementptr inbounds float* %tmp12160, i64 1
- %tmp12162 = getelementptr inbounds float* %tmp12161, i64 1
- %tmp12163 = getelementptr inbounds float* %tmp12162, i64 1
- %tmp12164 = getelementptr inbounds float* %tmp12163, i64 1
- %tmp12165 = getelementptr inbounds float* %tmp12164, i64 1
- %tmp12166 = getelementptr inbounds float* %tmp12165, i64 1
- %tmp12167 = getelementptr inbounds float* %tmp12166, i64 1
- %tmp12168 = getelementptr inbounds float* %tmp12167, i64 1
- %tmp12169 = getelementptr inbounds float* %tmp12168, i64 1
- %tmp12170 = getelementptr inbounds float* %tmp12169, i64 1
- %tmp12171 = getelementptr inbounds float* %tmp12170, i64 1
- %tmp12172 = getelementptr inbounds float* %tmp12171, i64 1
- %tmp12173 = getelementptr inbounds float* %tmp12172, i64 1
- %tmp12174 = getelementptr inbounds float* %tmp12173, i64 1
- %tmp12175 = getelementptr inbounds float* %tmp12174, i64 1
- %tmp12176 = getelementptr inbounds float* %tmp12175, i64 1
- %tmp12177 = getelementptr inbounds float* %tmp12176, i64 1
- %tmp12178 = getelementptr inbounds float* %tmp12177, i64 1
- %tmp12179 = getelementptr inbounds float* %tmp12178, i64 1
- %tmp12180 = getelementptr inbounds float* %tmp12179, i64 1
- %tmp12181 = getelementptr inbounds float* %tmp12180, i64 1
- %tmp12182 = getelementptr inbounds float* %tmp12181, i64 1
- %tmp12183 = getelementptr inbounds float* %tmp12182, i64 1
- %tmp12184 = getelementptr inbounds float* %tmp12183, i64 1
- %tmp12185 = getelementptr inbounds float* %tmp12184, i64 1
- %tmp12186 = getelementptr inbounds float* %tmp12185, i64 1
- %tmp12187 = getelementptr inbounds float* %tmp12186, i64 1
- %tmp12188 = getelementptr inbounds float* %tmp12187, i64 1
- %tmp12189 = getelementptr inbounds float* %tmp12188, i64 1
- %tmp12190 = getelementptr inbounds float* %tmp12189, i64 1
- %tmp12191 = getelementptr inbounds float* %tmp12190, i64 1
- %tmp12192 = getelementptr inbounds float* %tmp12191, i64 1
- %tmp12193 = getelementptr inbounds float* %tmp12192, i64 1
- %tmp12194 = getelementptr inbounds float* %tmp12193, i64 1
- %tmp12195 = getelementptr inbounds float* %tmp12194, i64 1
- %tmp12196 = getelementptr inbounds float* %tmp12195, i64 1
- %tmp12197 = getelementptr inbounds float* %tmp12196, i64 1
- %tmp12198 = getelementptr inbounds float* %tmp12197, i64 1
- %tmp12199 = getelementptr inbounds float* %tmp12198, i64 1
- %tmp12200 = getelementptr inbounds float* %tmp12199, i64 1
- %tmp12201 = getelementptr inbounds float* %tmp12200, i64 1
- %tmp12202 = getelementptr inbounds float* %tmp12201, i64 1
- %tmp12203 = getelementptr inbounds float* %tmp12202, i64 1
- %tmp12204 = getelementptr inbounds float* %tmp12203, i64 1
- %tmp12205 = getelementptr inbounds float* %tmp12204, i64 1
- %tmp12206 = getelementptr inbounds float* %tmp12205, i64 1
- %tmp12207 = getelementptr inbounds float* %tmp12206, i64 1
- %tmp12208 = getelementptr inbounds float* %tmp12207, i64 1
- %tmp12209 = getelementptr inbounds float* %tmp12208, i64 1
- %tmp12210 = getelementptr inbounds float* %tmp12209, i64 1
- %tmp12211 = getelementptr inbounds float* %tmp12210, i64 1
- %tmp12212 = getelementptr inbounds float* %tmp12211, i64 1
- %tmp12213 = getelementptr inbounds float* %tmp12212, i64 1
- %tmp12214 = getelementptr inbounds float* %tmp12213, i64 1
- %tmp12215 = getelementptr inbounds float* %tmp12214, i64 1
- %tmp12216 = getelementptr inbounds float* %tmp12215, i64 1
- %tmp12217 = getelementptr inbounds float* %tmp12216, i64 1
- %tmp12218 = getelementptr inbounds float* %tmp12217, i64 1
- %tmp12219 = getelementptr inbounds float* %tmp12218, i64 1
- %tmp12220 = getelementptr inbounds float* %tmp12219, i64 1
- %tmp12221 = getelementptr inbounds float* %tmp12220, i64 1
- %tmp12222 = getelementptr inbounds float* %tmp12221, i64 1
- %tmp12223 = getelementptr inbounds float* %tmp12222, i64 1
- %tmp12224 = getelementptr inbounds float* %tmp12223, i64 1
- %tmp12225 = getelementptr inbounds float* %tmp12224, i64 1
- %tmp12226 = getelementptr inbounds float* %tmp12225, i64 1
- %tmp12227 = getelementptr inbounds float* %tmp12226, i64 1
- %tmp12228 = getelementptr inbounds float* %tmp12227, i64 1
- %tmp12229 = getelementptr inbounds float* %tmp12228, i64 1
- %tmp12230 = getelementptr inbounds float* %tmp12229, i64 1
- %tmp12231 = getelementptr inbounds float* %tmp12230, i64 1
- %tmp12232 = getelementptr inbounds float* %tmp12231, i64 1
- %tmp12233 = getelementptr inbounds float* %tmp12232, i64 1
- %tmp12234 = getelementptr inbounds float* %tmp12233, i64 1
- %tmp12235 = getelementptr inbounds float* %tmp12234, i64 1
- %tmp12236 = getelementptr inbounds float* %tmp12235, i64 1
- %tmp12237 = getelementptr inbounds float* %tmp12236, i64 1
- %tmp12238 = getelementptr inbounds float* %tmp12237, i64 1
- %tmp12239 = getelementptr inbounds float* %tmp12238, i64 1
- %tmp12240 = getelementptr inbounds float* %tmp12239, i64 1
- %tmp12241 = getelementptr inbounds float* %tmp12240, i64 1
- %tmp12242 = getelementptr inbounds float* %tmp12241, i64 1
- %tmp12243 = getelementptr inbounds float* %tmp12242, i64 1
- %tmp12244 = getelementptr inbounds float* %tmp12243, i64 1
- %tmp12245 = getelementptr inbounds float* %tmp12244, i64 1
- %tmp12246 = getelementptr inbounds float* %tmp12245, i64 1
- %tmp12247 = getelementptr inbounds float* %tmp12246, i64 1
- %tmp12248 = getelementptr inbounds float* %tmp12247, i64 1
- %tmp12249 = getelementptr inbounds float* %tmp12248, i64 1
- %tmp12250 = getelementptr inbounds float* %tmp12249, i64 1
- %tmp12251 = getelementptr inbounds float* %tmp12250, i64 1
- %tmp12252 = getelementptr inbounds float* %tmp12251, i64 1
- %tmp12253 = getelementptr inbounds float* %tmp12252, i64 1
- %tmp12254 = getelementptr inbounds float* %tmp12253, i64 1
- %tmp12255 = getelementptr inbounds float* %tmp12254, i64 1
- %tmp12256 = getelementptr inbounds float* %tmp12255, i64 1
- %tmp12257 = getelementptr inbounds float* %tmp12256, i64 1
- %tmp12258 = getelementptr inbounds float* %tmp12257, i64 1
- %tmp12259 = getelementptr inbounds float* %tmp12258, i64 1
- %tmp12260 = getelementptr inbounds float* %tmp12259, i64 1
- %tmp12261 = getelementptr inbounds float* %tmp12260, i64 1
- %tmp12262 = getelementptr inbounds float* %tmp12261, i64 1
- %tmp12263 = getelementptr inbounds float* %tmp12262, i64 1
- %tmp12264 = getelementptr inbounds float* %tmp12263, i64 1
- %tmp12265 = getelementptr inbounds float* %tmp12264, i64 1
- %tmp12266 = getelementptr inbounds float* %tmp12265, i64 1
- %tmp12267 = getelementptr inbounds float* %tmp12266, i64 1
- %tmp12268 = getelementptr inbounds float* %tmp12267, i64 1
- %tmp12269 = getelementptr inbounds float* %tmp12268, i64 1
- %tmp12270 = getelementptr inbounds float* %tmp12269, i64 1
- %tmp12271 = getelementptr inbounds float* %tmp12270, i64 1
- %tmp12272 = getelementptr inbounds float* %tmp12271, i64 1
- %tmp12273 = getelementptr inbounds float* %tmp12272, i64 1
- %tmp12274 = getelementptr inbounds float* %tmp12273, i64 1
- %tmp12275 = getelementptr inbounds float* %tmp12274, i64 1
- %tmp12276 = getelementptr inbounds float* %tmp12275, i64 1
- %tmp12277 = getelementptr inbounds float* %tmp12276, i64 1
- %tmp12278 = getelementptr inbounds float* %tmp12277, i64 1
- %tmp12279 = getelementptr inbounds float* %tmp12278, i64 1
- %tmp12280 = getelementptr inbounds float* %tmp12279, i64 1
- %tmp12281 = getelementptr inbounds float* %tmp12280, i64 1
- %tmp12282 = getelementptr inbounds float* %tmp12281, i64 1
- %tmp12283 = getelementptr inbounds float* %tmp12282, i64 1
- %tmp12284 = getelementptr inbounds float* %tmp12283, i64 1
- %tmp12285 = getelementptr inbounds float* %tmp12284, i64 1
- %tmp12286 = getelementptr inbounds float* %tmp12285, i64 1
- %tmp12287 = getelementptr inbounds float* %tmp12286, i64 1
- %tmp12288 = getelementptr inbounds float* %tmp12287, i64 1
- %tmp12289 = getelementptr inbounds float* %tmp12288, i64 1
- %tmp12290 = getelementptr inbounds float* %tmp12289, i64 1
- %tmp12291 = getelementptr inbounds float* %tmp12290, i64 1
- %tmp12292 = getelementptr inbounds float* %tmp12291, i64 1
- %tmp12293 = getelementptr inbounds float* %tmp12292, i64 1
- %tmp12294 = getelementptr inbounds float* %tmp12293, i64 1
- %tmp12295 = getelementptr inbounds float* %tmp12294, i64 1
- %tmp12296 = getelementptr inbounds float* %tmp12295, i64 1
- %tmp12297 = getelementptr inbounds float* %tmp12296, i64 1
- %tmp12298 = getelementptr inbounds float* %tmp12297, i64 1
- %tmp12299 = getelementptr inbounds float* %tmp12298, i64 1
- %tmp12300 = getelementptr inbounds float* %tmp12299, i64 1
- %tmp12301 = getelementptr inbounds float* %tmp12300, i64 1
- %tmp12302 = getelementptr inbounds float* %tmp12301, i64 1
- %tmp12303 = getelementptr inbounds float* %tmp12302, i64 1
- %tmp12304 = getelementptr inbounds float* %tmp12303, i64 1
- %tmp12305 = getelementptr inbounds float* %tmp12304, i64 1
- %tmp12306 = getelementptr inbounds float* %tmp12305, i64 1
- %tmp12307 = getelementptr inbounds float* %tmp12306, i64 1
- %tmp12308 = getelementptr inbounds float* %tmp12307, i64 1
- %tmp12309 = getelementptr inbounds float* %tmp12308, i64 1
- %tmp12310 = getelementptr inbounds float* %tmp12309, i64 1
- %tmp12311 = getelementptr inbounds float* %tmp12310, i64 1
- %tmp12312 = getelementptr inbounds float* %tmp12311, i64 1
- %tmp12313 = getelementptr inbounds float* %tmp12312, i64 1
- %tmp12314 = getelementptr inbounds float* %tmp12313, i64 1
- %tmp12315 = getelementptr inbounds float* %tmp12314, i64 1
- %tmp12316 = getelementptr inbounds float* %tmp12315, i64 1
- %tmp12317 = getelementptr inbounds float* %tmp12316, i64 1
- %tmp12318 = getelementptr inbounds float* %tmp12317, i64 1
- %tmp12319 = getelementptr inbounds float* %tmp12318, i64 1
- %tmp12320 = getelementptr inbounds float* %tmp12319, i64 1
- %tmp12321 = getelementptr inbounds float* %tmp12320, i64 1
- %tmp12322 = getelementptr inbounds float* %tmp12321, i64 1
- %tmp12323 = getelementptr inbounds float* %tmp12322, i64 1
- %tmp12324 = getelementptr inbounds float* %tmp12323, i64 1
- %tmp12325 = getelementptr inbounds float* %tmp12324, i64 1
- %tmp12326 = getelementptr inbounds float* %tmp12325, i64 1
- %tmp12327 = getelementptr inbounds float* %tmp12326, i64 1
- %tmp12328 = getelementptr inbounds float* %tmp12327, i64 1
- %tmp12329 = getelementptr inbounds float* %tmp12328, i64 1
- %tmp12330 = getelementptr inbounds float* %tmp12329, i64 1
- %tmp12331 = getelementptr inbounds float* %tmp12330, i64 1
- %tmp12332 = getelementptr inbounds float* %tmp12331, i64 1
- %tmp12333 = getelementptr inbounds float* %tmp12332, i64 1
- %tmp12334 = getelementptr inbounds float* %tmp12333, i64 1
- %tmp12335 = getelementptr inbounds float* %tmp12334, i64 1
- %tmp12336 = getelementptr inbounds float* %tmp12335, i64 1
- %tmp12337 = getelementptr inbounds float* %tmp12336, i64 1
- %tmp12338 = getelementptr inbounds float* %tmp12337, i64 1
- %tmp12339 = getelementptr inbounds float* %tmp12338, i64 1
- %tmp12340 = getelementptr inbounds float* %tmp12339, i64 1
- %tmp12341 = getelementptr inbounds float* %tmp12340, i64 1
- %tmp12342 = getelementptr inbounds float* %tmp12341, i64 1
- %tmp12343 = getelementptr inbounds float* %tmp12342, i64 1
- %tmp12344 = getelementptr inbounds float* %tmp12343, i64 1
- %tmp12345 = getelementptr inbounds float* %tmp12344, i64 1
- %tmp12346 = getelementptr inbounds float* %tmp12345, i64 1
- %tmp12347 = getelementptr inbounds float* %tmp12346, i64 1
- %tmp12348 = getelementptr inbounds float* %tmp12347, i64 1
- %tmp12349 = getelementptr inbounds float* %tmp12348, i64 1
- %tmp12350 = getelementptr inbounds float* %tmp12349, i64 1
- %tmp12351 = getelementptr inbounds float* %tmp12350, i64 1
- %tmp12352 = getelementptr inbounds float* %tmp12351, i64 1
- %tmp12353 = getelementptr inbounds float* %tmp12352, i64 1
- %tmp12354 = getelementptr inbounds float* %tmp12353, i64 1
- %tmp12355 = getelementptr inbounds float* %tmp12354, i64 1
- %tmp12356 = getelementptr inbounds float* %tmp12355, i64 1
- %tmp12357 = getelementptr inbounds float* %tmp12356, i64 1
- %tmp12358 = getelementptr inbounds float* %tmp12357, i64 1
- %tmp12359 = getelementptr inbounds float* %tmp12358, i64 1
- %tmp12360 = getelementptr inbounds float* %tmp12359, i64 1
- %tmp12361 = getelementptr inbounds float* %tmp12360, i64 1
- %tmp12362 = getelementptr inbounds float* %tmp12361, i64 1
- %tmp12363 = getelementptr inbounds float* %tmp12362, i64 1
- %tmp12364 = getelementptr inbounds float* %tmp12363, i64 1
- %tmp12365 = getelementptr inbounds float* %tmp12364, i64 1
- %tmp12366 = getelementptr inbounds float* %tmp12365, i64 1
- %tmp12367 = getelementptr inbounds float* %tmp12366, i64 1
- %tmp12368 = getelementptr inbounds float* %tmp12367, i64 1
- %tmp12369 = getelementptr inbounds float* %tmp12368, i64 1
- %tmp12370 = getelementptr inbounds float* %tmp12369, i64 1
- %tmp12371 = getelementptr inbounds float* %tmp12370, i64 1
- %tmp12372 = getelementptr inbounds float* %tmp12371, i64 1
- %tmp12373 = getelementptr inbounds float* %tmp12372, i64 1
- %tmp12374 = getelementptr inbounds float* %tmp12373, i64 1
- %tmp12375 = getelementptr inbounds float* %tmp12374, i64 1
- %tmp12376 = getelementptr inbounds float* %tmp12375, i64 1
- %tmp12377 = getelementptr inbounds float* %tmp12376, i64 1
- %tmp12378 = getelementptr inbounds float* %tmp12377, i64 1
- %tmp12379 = getelementptr inbounds float* %tmp12378, i64 1
- %tmp12380 = getelementptr inbounds float* %tmp12379, i64 1
- %tmp12381 = getelementptr inbounds float* %tmp12380, i64 1
- %tmp12382 = getelementptr inbounds float* %tmp12381, i64 1
- %tmp12383 = getelementptr inbounds float* %tmp12382, i64 1
- %tmp12384 = getelementptr inbounds float* %tmp12383, i64 1
- %tmp12385 = getelementptr inbounds float* %tmp12384, i64 1
- %tmp12386 = getelementptr inbounds float* %tmp12385, i64 1
- %tmp12387 = getelementptr inbounds float* %tmp12386, i64 1
- %tmp12388 = getelementptr inbounds float* %tmp12387, i64 1
- %tmp12389 = getelementptr inbounds float* %tmp12388, i64 1
- %tmp12390 = getelementptr inbounds float* %tmp12389, i64 1
- %tmp12391 = getelementptr inbounds float* %tmp12390, i64 1
- %tmp12392 = getelementptr inbounds float* %tmp12391, i64 1
- %tmp12393 = getelementptr inbounds float* %tmp12392, i64 1
- %tmp12394 = getelementptr inbounds float* %tmp12393, i64 1
- %tmp12395 = getelementptr inbounds float* %tmp12394, i64 1
- %tmp12396 = getelementptr inbounds float* %tmp12395, i64 1
- %tmp12397 = getelementptr inbounds float* %tmp12396, i64 1
- %tmp12398 = getelementptr inbounds float* %tmp12397, i64 1
- %tmp12399 = getelementptr inbounds float* %tmp12398, i64 1
- %tmp12400 = getelementptr inbounds float* %tmp12399, i64 1
- %tmp12401 = getelementptr inbounds float* %tmp12400, i64 1
- %tmp12402 = getelementptr inbounds float* %tmp12401, i64 1
- %tmp12403 = getelementptr inbounds float* %tmp12402, i64 1
- %tmp12404 = getelementptr inbounds float* %tmp12403, i64 1
- %tmp12405 = getelementptr inbounds float* %tmp12404, i64 1
- %tmp12406 = getelementptr inbounds float* %tmp12405, i64 1
- %tmp12407 = getelementptr inbounds float* %tmp12406, i64 1
- %tmp12408 = getelementptr inbounds float* %tmp12407, i64 1
- %tmp12409 = getelementptr inbounds float* %tmp12408, i64 1
- %tmp12410 = getelementptr inbounds float* %tmp12409, i64 1
- %tmp12411 = getelementptr inbounds float* %tmp12410, i64 1
- %tmp12412 = getelementptr inbounds float* %tmp12411, i64 1
- %tmp12413 = getelementptr inbounds float* %tmp12412, i64 1
- %tmp12414 = getelementptr inbounds float* %tmp12413, i64 1
- %tmp12415 = getelementptr inbounds float* %tmp12414, i64 1
- %tmp12416 = getelementptr inbounds float* %tmp12415, i64 1
- %tmp12417 = getelementptr inbounds float* %tmp12416, i64 1
- %tmp12418 = getelementptr inbounds float* %tmp12417, i64 1
- %tmp12419 = getelementptr inbounds float* %tmp12418, i64 1
- %tmp12420 = getelementptr inbounds float* %tmp12419, i64 1
- %tmp12421 = getelementptr inbounds float* %tmp12420, i64 1
- %tmp12422 = getelementptr inbounds float* %tmp12421, i64 1
- %tmp12423 = getelementptr inbounds float* %tmp12422, i64 1
- %tmp12424 = getelementptr inbounds float* %tmp12423, i64 1
- %tmp12425 = getelementptr inbounds float* %tmp12424, i64 1
- %tmp12426 = getelementptr inbounds float* %tmp12425, i64 1
- %tmp12427 = getelementptr inbounds float* %tmp12426, i64 1
- %tmp12428 = getelementptr inbounds float* %tmp12427, i64 1
- %tmp12429 = getelementptr inbounds float* %tmp12428, i64 1
- %tmp12430 = getelementptr inbounds float* %tmp12429, i64 1
- %tmp12431 = getelementptr inbounds float* %tmp12430, i64 1
- %tmp12432 = getelementptr inbounds float* %tmp12431, i64 1
- %tmp12433 = getelementptr inbounds float* %tmp12432, i64 1
- %tmp12434 = getelementptr inbounds float* %tmp12433, i64 1
- %tmp12435 = getelementptr inbounds float* %tmp12434, i64 1
- %tmp12436 = getelementptr inbounds float* %tmp12435, i64 1
- %tmp12437 = getelementptr inbounds float* %tmp12436, i64 1
- %tmp12438 = getelementptr inbounds float* %tmp12437, i64 1
- %tmp12439 = getelementptr inbounds float* %tmp12438, i64 1
- %tmp12440 = getelementptr inbounds float* %tmp12439, i64 1
- %tmp12441 = getelementptr inbounds float* %tmp12440, i64 1
- %tmp12442 = getelementptr inbounds float* %tmp12441, i64 1
- %tmp12443 = getelementptr inbounds float* %tmp12442, i64 1
- %tmp12444 = getelementptr inbounds float* %tmp12443, i64 1
- %tmp12445 = getelementptr inbounds float* %tmp12444, i64 1
- %tmp12446 = getelementptr inbounds float* %tmp12445, i64 1
- %tmp12447 = getelementptr inbounds float* %tmp12446, i64 1
- %tmp12448 = getelementptr inbounds float* %tmp12447, i64 1
- %tmp12449 = getelementptr inbounds float* %tmp12448, i64 1
- %tmp12450 = getelementptr inbounds float* %tmp12449, i64 1
- %tmp12451 = getelementptr inbounds float* %tmp12450, i64 1
- %tmp12452 = getelementptr inbounds float* %tmp12451, i64 1
- %tmp12453 = getelementptr inbounds float* %tmp12452, i64 1
- %tmp12454 = getelementptr inbounds float* %tmp12453, i64 1
- %tmp12455 = getelementptr inbounds float* %tmp12454, i64 1
- %tmp12456 = getelementptr inbounds float* %tmp12455, i64 1
- %tmp12457 = getelementptr inbounds float* %tmp12456, i64 1
- %tmp12458 = getelementptr inbounds float* %tmp12457, i64 1
- %tmp12459 = getelementptr inbounds float* %tmp12458, i64 1
- %tmp12460 = getelementptr inbounds float* %tmp12459, i64 1
- %tmp12461 = getelementptr inbounds float* %tmp12460, i64 1
- %tmp12462 = getelementptr inbounds float* %tmp12461, i64 1
- %tmp12463 = getelementptr inbounds float* %tmp12462, i64 1
- %tmp12464 = getelementptr inbounds float* %tmp12463, i64 1
- %tmp12465 = getelementptr inbounds float* %tmp12464, i64 1
- %tmp12466 = getelementptr inbounds float* %tmp12465, i64 1
- %tmp12467 = getelementptr inbounds float* %tmp12466, i64 1
- %tmp12468 = getelementptr inbounds float* %tmp12467, i64 1
- %tmp12469 = getelementptr inbounds float* %tmp12468, i64 1
- %tmp12470 = getelementptr inbounds float* %tmp12469, i64 1
- %tmp12471 = getelementptr inbounds float* %tmp12470, i64 1
- %tmp12472 = getelementptr inbounds float* %tmp12471, i64 1
- %tmp12473 = getelementptr inbounds float* %tmp12472, i64 1
- %tmp12474 = getelementptr inbounds float* %tmp12473, i64 1
- %tmp12475 = getelementptr inbounds float* %tmp12474, i64 1
- %tmp12476 = getelementptr inbounds float* %tmp12475, i64 1
- %tmp12477 = getelementptr inbounds float* %tmp12476, i64 1
- %tmp12478 = getelementptr inbounds float* %tmp12477, i64 1
- %tmp12479 = getelementptr inbounds float* %tmp12478, i64 1
- %tmp12480 = getelementptr inbounds float* %tmp12479, i64 1
- %tmp12481 = getelementptr inbounds float* %tmp12480, i64 1
- %tmp12482 = getelementptr inbounds float* %tmp12481, i64 1
- %tmp12483 = getelementptr inbounds float* %tmp12482, i64 1
- %tmp12484 = getelementptr inbounds float* %tmp12483, i64 1
- %tmp12485 = getelementptr inbounds float* %tmp12484, i64 1
- %tmp12486 = getelementptr inbounds float* %tmp12485, i64 1
- %tmp12487 = getelementptr inbounds float* %tmp12486, i64 1
- %tmp12488 = getelementptr inbounds float* %tmp12487, i64 1
- %tmp12489 = getelementptr inbounds float* %tmp12488, i64 1
- %tmp12490 = getelementptr inbounds float* %tmp12489, i64 1
- %tmp12491 = getelementptr inbounds float* %tmp12490, i64 1
- %tmp12492 = getelementptr inbounds float* %tmp12491, i64 1
- %tmp12493 = getelementptr inbounds float* %tmp12492, i64 1
- %tmp12494 = getelementptr inbounds float* %tmp12493, i64 1
- %tmp12495 = getelementptr inbounds float* %tmp12494, i64 1
- %tmp12496 = getelementptr inbounds float* %tmp12495, i64 1
- %tmp12497 = getelementptr inbounds float* %tmp12496, i64 1
- %tmp12498 = getelementptr inbounds float* %tmp12497, i64 1
- %tmp12499 = getelementptr inbounds float* %tmp12498, i64 1
- %tmp12500 = getelementptr inbounds float* %tmp12499, i64 1
- %tmp12501 = getelementptr inbounds float* %tmp12500, i64 1
- %tmp12502 = getelementptr inbounds float* %tmp12501, i64 1
- %tmp12503 = getelementptr inbounds float* %tmp12502, i64 1
- %tmp12504 = getelementptr inbounds float* %tmp12503, i64 1
- %tmp12505 = getelementptr inbounds float* %tmp12504, i64 1
- %tmp12506 = getelementptr inbounds float* %tmp12505, i64 1
- %tmp12507 = getelementptr inbounds float* %tmp12506, i64 1
- %tmp12508 = getelementptr inbounds float* %tmp12507, i64 1
- %tmp12509 = getelementptr inbounds float* %tmp12508, i64 1
- %tmp12510 = getelementptr inbounds float* %tmp12509, i64 1
- %tmp12511 = getelementptr inbounds float* %tmp12510, i64 1
- %tmp12512 = getelementptr inbounds float* %tmp12511, i64 1
- %tmp12513 = getelementptr inbounds float* %tmp12512, i64 1
- %tmp12514 = getelementptr inbounds float* %tmp12513, i64 1
- %tmp12515 = getelementptr inbounds float* %tmp12514, i64 1
- %tmp12516 = getelementptr inbounds float* %tmp12515, i64 1
- %tmp12517 = getelementptr inbounds float* %tmp12516, i64 1
- %tmp12518 = getelementptr inbounds float* %tmp12517, i64 1
- %tmp12519 = getelementptr inbounds float* %tmp12518, i64 1
- %tmp12520 = getelementptr inbounds float* %tmp12519, i64 1
- %tmp12521 = getelementptr inbounds float* %tmp12520, i64 1
- %tmp12522 = getelementptr inbounds float* %tmp12521, i64 1
- %tmp12523 = getelementptr inbounds float* %tmp12522, i64 1
- %tmp12524 = getelementptr inbounds float* %tmp12523, i64 1
- %tmp12525 = getelementptr inbounds float* %tmp12524, i64 1
- %tmp12526 = getelementptr inbounds float* %tmp12525, i64 1
- %tmp12527 = getelementptr inbounds float* %tmp12526, i64 1
- %tmp12528 = getelementptr inbounds float* %tmp12527, i64 1
- %tmp12529 = getelementptr inbounds float* %tmp12528, i64 1
- %tmp12530 = getelementptr inbounds float* %tmp12529, i64 1
- %tmp12531 = getelementptr inbounds float* %tmp12530, i64 1
- %tmp12532 = getelementptr inbounds float* %tmp12531, i64 1
- %tmp12533 = getelementptr inbounds float* %tmp12532, i64 1
- %tmp12534 = getelementptr inbounds float* %tmp12533, i64 1
- %tmp12535 = getelementptr inbounds float* %tmp12534, i64 1
- %tmp12536 = getelementptr inbounds float* %tmp12535, i64 1
- %tmp12537 = getelementptr inbounds float* %tmp12536, i64 1
- %tmp12538 = getelementptr inbounds float* %tmp12537, i64 1
- %tmp12539 = getelementptr inbounds float* %tmp12538, i64 1
- %tmp12540 = getelementptr inbounds float* %tmp12539, i64 1
- %tmp12541 = getelementptr inbounds float* %tmp12540, i64 1
- %tmp12542 = getelementptr inbounds float* %tmp12541, i64 1
- %tmp12543 = getelementptr inbounds float* %tmp12542, i64 1
- %tmp12544 = getelementptr inbounds float* %tmp12543, i64 1
- %tmp12545 = getelementptr inbounds float* %tmp12544, i64 1
- %tmp12546 = getelementptr inbounds float* %tmp12545, i64 1
- %tmp12547 = getelementptr inbounds float* %tmp12546, i64 1
- %tmp12548 = getelementptr inbounds float* %tmp12547, i64 1
- %tmp12549 = getelementptr inbounds float* %tmp12548, i64 1
- %tmp12550 = getelementptr inbounds float* %tmp12549, i64 1
- %tmp12551 = getelementptr inbounds float* %tmp12550, i64 1
- %tmp12552 = getelementptr inbounds float* %tmp12551, i64 1
- %tmp12553 = getelementptr inbounds float* %tmp12552, i64 1
- %tmp12554 = getelementptr inbounds float* %tmp12553, i64 1
- %tmp12555 = getelementptr inbounds float* %tmp12554, i64 1
- %tmp12556 = getelementptr inbounds float* %tmp12555, i64 1
- %tmp12557 = getelementptr inbounds float* %tmp12556, i64 1
- %tmp12558 = getelementptr inbounds float* %tmp12557, i64 1
- %tmp12559 = getelementptr inbounds float* %tmp12558, i64 1
- %tmp12560 = getelementptr inbounds float* %tmp12559, i64 1
- %tmp12561 = getelementptr inbounds float* %tmp12560, i64 1
- %tmp12562 = getelementptr inbounds float* %tmp12561, i64 1
- %tmp12563 = getelementptr inbounds float* %tmp12562, i64 1
- %tmp12564 = getelementptr inbounds float* %tmp12563, i64 1
- %tmp12565 = getelementptr inbounds float* %tmp12564, i64 1
- %tmp12566 = getelementptr inbounds float* %tmp12565, i64 1
- %tmp12567 = getelementptr inbounds float* %tmp12566, i64 1
- %tmp12568 = getelementptr inbounds float* %tmp12567, i64 1
- %tmp12569 = getelementptr inbounds float* %tmp12568, i64 1
- %tmp12570 = getelementptr inbounds float* %tmp12569, i64 1
- %tmp12571 = getelementptr inbounds float* %tmp12570, i64 1
- %tmp12572 = getelementptr inbounds float* %tmp12571, i64 1
- %tmp12573 = getelementptr inbounds float* %tmp12572, i64 1
- %tmp12574 = getelementptr inbounds float* %tmp12573, i64 1
- %tmp12575 = getelementptr inbounds float* %tmp12574, i64 1
- %tmp12576 = getelementptr inbounds float* %tmp12575, i64 1
- %tmp12577 = getelementptr inbounds float* %tmp12576, i64 1
- %tmp12578 = getelementptr inbounds float* %tmp12577, i64 1
- %tmp12579 = getelementptr inbounds float* %tmp12578, i64 1
- %tmp12580 = getelementptr inbounds float* %tmp12579, i64 1
- %tmp12581 = getelementptr inbounds float* %tmp12580, i64 1
- %tmp12582 = getelementptr inbounds float* %tmp12581, i64 1
- %tmp12583 = getelementptr inbounds float* %tmp12582, i64 1
- %tmp12584 = getelementptr inbounds float* %tmp12583, i64 1
- %tmp12585 = getelementptr inbounds float* %tmp12584, i64 1
- %tmp12586 = getelementptr inbounds float* %tmp12585, i64 1
- %tmp12587 = getelementptr inbounds float* %tmp12586, i64 1
- %tmp12588 = getelementptr inbounds float* %tmp12587, i64 1
- %tmp12589 = getelementptr inbounds float* %tmp12588, i64 1
- %tmp12590 = getelementptr inbounds float* %tmp12589, i64 1
- %tmp12591 = getelementptr inbounds float* %tmp12590, i64 1
- %tmp12592 = getelementptr inbounds float* %tmp12591, i64 1
- %tmp12593 = getelementptr inbounds float* %tmp12592, i64 1
- %tmp12594 = getelementptr inbounds float* %tmp12593, i64 1
- %tmp12595 = getelementptr inbounds float* %tmp12594, i64 1
- %tmp12596 = getelementptr inbounds float* %tmp12595, i64 1
- %tmp12597 = getelementptr inbounds float* %tmp12596, i64 1
- %tmp12598 = getelementptr inbounds float* %tmp12597, i64 1
- %tmp12599 = getelementptr inbounds float* %tmp12598, i64 1
- %tmp12600 = getelementptr inbounds float* %tmp12599, i64 1
- %tmp12601 = getelementptr inbounds float* %tmp12600, i64 1
- %tmp12602 = getelementptr inbounds float* %tmp12601, i64 1
- %tmp12603 = getelementptr inbounds float* %tmp12602, i64 1
- %tmp12604 = getelementptr inbounds float* %tmp12603, i64 1
- %tmp12605 = getelementptr inbounds float* %tmp12604, i64 1
- %tmp12606 = getelementptr inbounds float* %tmp12605, i64 1
- %tmp12607 = getelementptr inbounds float* %tmp12606, i64 1
- %tmp12608 = getelementptr inbounds float* %tmp12607, i64 1
- %tmp12609 = getelementptr inbounds float* %tmp12608, i64 1
- %tmp12610 = getelementptr inbounds float* %tmp12609, i64 1
- %tmp12611 = getelementptr inbounds float* %tmp12610, i64 1
- %tmp12612 = getelementptr inbounds float* %tmp12611, i64 1
- %tmp12613 = getelementptr inbounds float* %tmp12612, i64 1
- %tmp12614 = getelementptr inbounds float* %tmp12613, i64 1
- %tmp12615 = getelementptr inbounds float* %tmp12614, i64 1
- %tmp12616 = getelementptr inbounds float* %tmp12615, i64 1
- %tmp12617 = getelementptr inbounds float* %tmp12616, i64 1
- %tmp12618 = getelementptr inbounds float* %tmp12617, i64 1
- %tmp12619 = getelementptr inbounds float* %tmp12618, i64 1
- %tmp12620 = getelementptr inbounds float* %tmp12619, i64 1
- %tmp12621 = getelementptr inbounds float* %tmp12620, i64 1
- %tmp12622 = getelementptr inbounds float* %tmp12621, i64 1
- %tmp12623 = getelementptr inbounds float* %tmp12622, i64 1
- %tmp12624 = getelementptr inbounds float* %tmp12623, i64 1
- %tmp12625 = getelementptr inbounds float* %tmp12624, i64 1
- %tmp12626 = getelementptr inbounds float* %tmp12625, i64 1
- %tmp12627 = getelementptr inbounds float* %tmp12626, i64 1
- %tmp12628 = getelementptr inbounds float* %tmp12627, i64 1
- %tmp12629 = getelementptr inbounds float* %tmp12628, i64 1
- %tmp12630 = getelementptr inbounds float* %tmp12629, i64 1
- %tmp12631 = getelementptr inbounds float* %tmp12630, i64 1
- %tmp12632 = getelementptr inbounds float* %tmp12631, i64 1
- %tmp12633 = getelementptr inbounds float* %tmp12632, i64 1
- %tmp12634 = getelementptr inbounds float* %tmp12633, i64 1
- %tmp12635 = getelementptr inbounds float* %tmp12634, i64 1
- %tmp12636 = getelementptr inbounds float* %tmp12635, i64 1
- %tmp12637 = getelementptr inbounds float* %tmp12636, i64 1
- %tmp12638 = getelementptr inbounds float* %tmp12637, i64 1
- %tmp12639 = getelementptr inbounds float* %tmp12638, i64 1
- %tmp12640 = getelementptr inbounds float* %tmp12639, i64 1
- %tmp12641 = getelementptr inbounds float* %tmp12640, i64 1
- %tmp12642 = getelementptr inbounds float* %tmp12641, i64 1
- %tmp12643 = getelementptr inbounds float* %tmp12642, i64 1
- %tmp12644 = getelementptr inbounds float* %tmp12643, i64 1
- %tmp12645 = getelementptr inbounds float* %tmp12644, i64 1
- %tmp12646 = getelementptr inbounds float* %tmp12645, i64 1
- %tmp12647 = getelementptr inbounds float* %tmp12646, i64 1
- %tmp12648 = getelementptr inbounds float* %tmp12647, i64 1
- %tmp12649 = getelementptr inbounds float* %tmp12648, i64 1
- %tmp12650 = getelementptr inbounds float* %tmp12649, i64 1
- %tmp12651 = getelementptr inbounds float* %tmp12650, i64 1
- %tmp12652 = getelementptr inbounds float* %tmp12651, i64 1
- %tmp12653 = getelementptr inbounds float* %tmp12652, i64 1
- %tmp12654 = getelementptr inbounds float* %tmp12653, i64 1
- %tmp12655 = getelementptr inbounds float* %tmp12654, i64 1
- %tmp12656 = getelementptr inbounds float* %tmp12655, i64 1
- %tmp12657 = getelementptr inbounds float* %tmp12656, i64 1
- %tmp12658 = getelementptr inbounds float* %tmp12657, i64 1
- %tmp12659 = getelementptr inbounds float* %tmp12658, i64 1
- %tmp12660 = getelementptr inbounds float* %tmp12659, i64 1
- %tmp12661 = getelementptr inbounds float* %tmp12660, i64 1
- %tmp12662 = getelementptr inbounds float* %tmp12661, i64 1
- %tmp12663 = getelementptr inbounds float* %tmp12662, i64 1
- %tmp12664 = getelementptr inbounds float* %tmp12663, i64 1
- %tmp12665 = getelementptr inbounds float* %tmp12664, i64 1
- %tmp12666 = getelementptr inbounds float* %tmp12665, i64 1
- %tmp12667 = getelementptr inbounds float* %tmp12666, i64 1
- %tmp12668 = getelementptr inbounds float* %tmp12667, i64 1
- %tmp12669 = getelementptr inbounds float* %tmp12668, i64 1
- %tmp12670 = getelementptr inbounds float* %tmp12669, i64 1
- %tmp12671 = getelementptr inbounds float* %tmp12670, i64 1
- %tmp12672 = getelementptr inbounds float* %tmp12671, i64 1
- %tmp12673 = getelementptr inbounds float* %tmp12672, i64 1
- %tmp12674 = getelementptr inbounds float* %tmp12673, i64 1
- %tmp12675 = getelementptr inbounds float* %tmp12674, i64 1
- %tmp12676 = getelementptr inbounds float* %tmp12675, i64 1
- %tmp12677 = getelementptr inbounds float* %tmp12676, i64 1
- %tmp12678 = getelementptr inbounds float* %tmp12677, i64 1
- %tmp12679 = getelementptr inbounds float* %tmp12678, i64 1
- %tmp12680 = getelementptr inbounds float* %tmp12679, i64 1
- %tmp12681 = getelementptr inbounds float* %tmp12680, i64 1
- %tmp12682 = getelementptr inbounds float* %tmp12681, i64 1
- %tmp12683 = getelementptr inbounds float* %tmp12682, i64 1
- %tmp12684 = getelementptr inbounds float* %tmp12683, i64 1
- %tmp12685 = getelementptr inbounds float* %tmp12684, i64 1
- %tmp12686 = getelementptr inbounds float* %tmp12685, i64 1
- %tmp12687 = getelementptr inbounds float* %tmp12686, i64 1
- %tmp12688 = getelementptr inbounds float* %tmp12687, i64 1
- %tmp12689 = getelementptr inbounds float* %tmp12688, i64 1
- %tmp12690 = getelementptr inbounds float* %tmp12689, i64 1
- %tmp12691 = getelementptr inbounds float* %tmp12690, i64 1
- %tmp12692 = getelementptr inbounds float* %tmp12691, i64 1
- %tmp12693 = getelementptr inbounds float* %tmp12692, i64 1
- %tmp12694 = getelementptr inbounds float* %tmp12693, i64 1
- %tmp12695 = getelementptr inbounds float* %tmp12694, i64 1
- %tmp12696 = getelementptr inbounds float* %tmp12695, i64 1
- %tmp12697 = getelementptr inbounds float* %tmp12696, i64 1
- %tmp12698 = getelementptr inbounds float* %tmp12697, i64 1
- %tmp12699 = getelementptr inbounds float* %tmp12698, i64 1
- %tmp12700 = getelementptr inbounds float* %tmp12699, i64 1
- %tmp12701 = getelementptr inbounds float* %tmp12700, i64 1
- %tmp12702 = getelementptr inbounds float* %tmp12701, i64 1
- %tmp12703 = getelementptr inbounds float* %tmp12702, i64 1
- %tmp12704 = getelementptr inbounds float* %tmp12703, i64 1
- %tmp12705 = getelementptr inbounds float* %tmp12704, i64 1
- %tmp12706 = getelementptr inbounds float* %tmp12705, i64 1
- %tmp12707 = getelementptr inbounds float* %tmp12706, i64 1
- %tmp12708 = getelementptr inbounds float* %tmp12707, i64 1
- %tmp12709 = getelementptr inbounds float* %tmp12708, i64 1
- %tmp12710 = getelementptr inbounds float* %tmp12709, i64 1
- %tmp12711 = getelementptr inbounds float* %tmp12710, i64 1
- %tmp12712 = getelementptr inbounds float* %tmp12711, i64 1
- %tmp12713 = getelementptr inbounds float* %tmp12712, i64 1
- %tmp12714 = getelementptr inbounds float* %tmp12713, i64 1
- %tmp12715 = getelementptr inbounds float* %tmp12714, i64 1
- %tmp12716 = getelementptr inbounds float* %tmp12715, i64 1
- %tmp12717 = getelementptr inbounds float* %tmp12716, i64 1
- %tmp12718 = getelementptr inbounds float* %tmp12717, i64 1
- %tmp12719 = getelementptr inbounds float* %tmp12718, i64 1
- %tmp12720 = getelementptr inbounds float* %tmp12719, i64 1
- %tmp12721 = getelementptr inbounds float* %tmp12720, i64 1
- %tmp12722 = getelementptr inbounds float* %tmp12721, i64 1
- %tmp12723 = getelementptr inbounds float* %tmp12722, i64 1
- %tmp12724 = getelementptr inbounds float* %tmp12723, i64 1
- %tmp12725 = getelementptr inbounds float* %tmp12724, i64 1
- %tmp12726 = getelementptr inbounds float* %tmp12725, i64 1
- %tmp12727 = getelementptr inbounds float* %tmp12726, i64 1
- %tmp12728 = getelementptr inbounds float* %tmp12727, i64 1
- %tmp12729 = getelementptr inbounds float* %tmp12728, i64 1
- %tmp12730 = getelementptr inbounds float* %tmp12729, i64 1
- %tmp12731 = getelementptr inbounds float* %tmp12730, i64 1
- %tmp12732 = getelementptr inbounds float* %tmp12731, i64 1
- %tmp12733 = getelementptr inbounds float* %tmp12732, i64 1
- %tmp12734 = getelementptr inbounds float* %tmp12733, i64 1
- %tmp12735 = getelementptr inbounds float* %tmp12734, i64 1
- %tmp12736 = getelementptr inbounds float* %tmp12735, i64 1
- %tmp12737 = getelementptr inbounds float* %tmp12736, i64 1
- %tmp12738 = getelementptr inbounds float* %tmp12737, i64 1
- %tmp12739 = getelementptr inbounds float* %tmp12738, i64 1
- %tmp12740 = getelementptr inbounds float* %tmp12739, i64 1
- %tmp12741 = getelementptr inbounds float* %tmp12740, i64 1
- %tmp12742 = getelementptr inbounds float* %tmp12741, i64 1
- %tmp12743 = getelementptr inbounds float* %tmp12742, i64 1
- %tmp12744 = getelementptr inbounds float* %tmp12743, i64 1
- %tmp12745 = getelementptr inbounds float* %tmp12744, i64 1
- %tmp12746 = getelementptr inbounds float* %tmp12745, i64 1
- %tmp12747 = getelementptr inbounds float* %tmp12746, i64 1
- %tmp12748 = getelementptr inbounds float* %tmp12747, i64 1
- %tmp12749 = getelementptr inbounds float* %tmp12748, i64 1
- %tmp12750 = getelementptr inbounds float* %tmp12749, i64 1
- %tmp12751 = getelementptr inbounds float* %tmp12750, i64 1
- %tmp12752 = getelementptr inbounds float* %tmp12751, i64 1
- %tmp12753 = getelementptr inbounds float* %tmp12752, i64 1
- %tmp12754 = getelementptr inbounds float* %tmp12753, i64 1
- %tmp12755 = getelementptr inbounds float* %tmp12754, i64 1
- %tmp12756 = getelementptr inbounds float* %tmp12755, i64 1
- %tmp12757 = getelementptr inbounds float* %tmp12756, i64 1
- %tmp12758 = getelementptr inbounds float* %tmp12757, i64 1
- %tmp12759 = getelementptr inbounds float* %tmp12758, i64 1
- %tmp12760 = getelementptr inbounds float* %tmp12759, i64 1
- %tmp12761 = getelementptr inbounds float* %tmp12760, i64 1
- %tmp12762 = getelementptr inbounds float* %tmp12761, i64 1
- %tmp12763 = getelementptr inbounds float* %tmp12762, i64 1
- %tmp12764 = getelementptr inbounds float* %tmp12763, i64 1
- %tmp12765 = getelementptr inbounds float* %tmp12764, i64 1
- %tmp12766 = getelementptr inbounds float* %tmp12765, i64 1
- %tmp12767 = getelementptr inbounds float* %tmp12766, i64 1
- %tmp12768 = getelementptr inbounds float* %tmp12767, i64 1
- %tmp12769 = getelementptr inbounds float* %tmp12768, i64 1
- %tmp12770 = getelementptr inbounds float* %tmp12769, i64 1
- %tmp12771 = getelementptr inbounds float* %tmp12770, i64 1
- %tmp12772 = getelementptr inbounds float* %tmp12771, i64 1
- %tmp12773 = getelementptr inbounds float* %tmp12772, i64 1
- %tmp12774 = getelementptr inbounds float* %tmp12773, i64 1
- %tmp12775 = getelementptr inbounds float* %tmp12774, i64 1
- %tmp12776 = getelementptr inbounds float* %tmp12775, i64 1
- %tmp12777 = getelementptr inbounds float* %tmp12776, i64 1
- %tmp12778 = getelementptr inbounds float* %tmp12777, i64 1
- %tmp12779 = getelementptr inbounds float* %tmp12778, i64 1
- %tmp12780 = getelementptr inbounds float* %tmp12779, i64 1
- %tmp12781 = getelementptr inbounds float* %tmp12780, i64 1
- %tmp12782 = getelementptr inbounds float* %tmp12781, i64 1
- %tmp12783 = getelementptr inbounds float* %tmp12782, i64 1
- %tmp12784 = getelementptr inbounds float* %tmp12783, i64 1
- %tmp12785 = getelementptr inbounds float* %tmp12784, i64 1
- %tmp12786 = getelementptr inbounds float* %tmp12785, i64 1
- %tmp12787 = getelementptr inbounds float* %tmp12786, i64 1
- %tmp12788 = getelementptr inbounds float* %tmp12787, i64 1
- %tmp12789 = getelementptr inbounds float* %tmp12788, i64 1
- %tmp12790 = getelementptr inbounds float* %tmp12789, i64 1
- %tmp12791 = getelementptr inbounds float* %tmp12790, i64 1
- %tmp12792 = getelementptr inbounds float* %tmp12791, i64 1
- %tmp12793 = getelementptr inbounds float* %tmp12792, i64 1
- %tmp12794 = getelementptr inbounds float* %tmp12793, i64 1
- %tmp12795 = getelementptr inbounds float* %tmp12794, i64 1
- %tmp12796 = getelementptr inbounds float* %tmp12795, i64 1
- %tmp12797 = getelementptr inbounds float* %tmp12796, i64 1
- %tmp12798 = getelementptr inbounds float* %tmp12797, i64 1
- %tmp12799 = getelementptr inbounds float* %tmp12798, i64 1
- %tmp12800 = getelementptr inbounds float* %tmp12799, i64 1
- %tmp12801 = getelementptr inbounds float* %tmp12800, i64 1
- %tmp12802 = getelementptr inbounds float* %tmp12801, i64 1
- %tmp12803 = getelementptr inbounds float* %tmp12802, i64 1
- %tmp12804 = getelementptr inbounds float* %tmp12803, i64 1
- %tmp12805 = getelementptr inbounds float* %tmp12804, i64 1
- %tmp12806 = getelementptr inbounds float* %tmp12805, i64 1
- %tmp12807 = getelementptr inbounds float* %tmp12806, i64 1
- %tmp12808 = getelementptr inbounds float* %tmp12807, i64 1
- %tmp12809 = getelementptr inbounds float* %tmp12808, i64 1
- %tmp12810 = getelementptr inbounds float* %tmp12809, i64 1
- %tmp12811 = getelementptr inbounds float* %tmp12810, i64 1
- %tmp12812 = getelementptr inbounds float* %tmp12811, i64 1
- %tmp12813 = getelementptr inbounds float* %tmp12812, i64 1
- %tmp12814 = getelementptr inbounds float* %tmp12813, i64 1
- %tmp12815 = getelementptr inbounds float* %tmp12814, i64 1
- %tmp12816 = getelementptr inbounds float* %tmp12815, i64 1
- %tmp12817 = getelementptr inbounds float* %tmp12816, i64 1
- %tmp12818 = getelementptr inbounds float* %tmp12817, i64 1
- %tmp12819 = getelementptr inbounds float* %tmp12818, i64 1
- %tmp12820 = getelementptr inbounds float* %tmp12819, i64 1
- %tmp12821 = getelementptr inbounds float* %tmp12820, i64 1
- %tmp12822 = getelementptr inbounds float* %tmp12821, i64 1
- %tmp12823 = getelementptr inbounds float* %tmp12822, i64 1
- %tmp12824 = getelementptr inbounds float* %tmp12823, i64 1
- %tmp12825 = getelementptr inbounds float* %tmp12824, i64 1
- %tmp12826 = getelementptr inbounds float* %tmp12825, i64 1
- %tmp12827 = getelementptr inbounds float* %tmp12826, i64 1
- %tmp12828 = getelementptr inbounds float* %tmp12827, i64 1
- %tmp12829 = getelementptr inbounds float* %tmp12828, i64 1
- %tmp12830 = getelementptr inbounds float* %tmp12829, i64 1
- %tmp12831 = getelementptr inbounds float* %tmp12830, i64 1
- %tmp12832 = getelementptr inbounds float* %tmp12831, i64 1
- %tmp12833 = getelementptr inbounds float* %tmp12832, i64 1
- %tmp12834 = getelementptr inbounds float* %tmp12833, i64 1
- %tmp12835 = getelementptr inbounds float* %tmp12834, i64 1
- %tmp12836 = getelementptr inbounds float* %tmp12835, i64 1
- %tmp12837 = getelementptr inbounds float* %tmp12836, i64 1
- %tmp12838 = getelementptr inbounds float* %tmp12837, i64 1
- %tmp12839 = getelementptr inbounds float* %tmp12838, i64 1
- %tmp12840 = getelementptr inbounds float* %tmp12839, i64 1
- %tmp12841 = getelementptr inbounds float* %tmp12840, i64 1
- %tmp12842 = getelementptr inbounds float* %tmp12841, i64 1
- %tmp12843 = getelementptr inbounds float* %tmp12842, i64 1
- %tmp12844 = getelementptr inbounds float* %tmp12843, i64 1
- %tmp12845 = getelementptr inbounds float* %tmp12844, i64 1
- %tmp12846 = getelementptr inbounds float* %tmp12845, i64 1
- %tmp12847 = getelementptr inbounds float* %tmp12846, i64 1
- %tmp12848 = getelementptr inbounds float* %tmp12847, i64 1
- %tmp12849 = getelementptr inbounds float* %tmp12848, i64 1
- %tmp12850 = getelementptr inbounds float* %tmp12849, i64 1
- %tmp12851 = getelementptr inbounds float* %tmp12850, i64 1
- %tmp12852 = getelementptr inbounds float* %tmp12851, i64 1
- %tmp12853 = getelementptr inbounds float* %tmp12852, i64 1
- %tmp12854 = getelementptr inbounds float* %tmp12853, i64 1
- %tmp12855 = getelementptr inbounds float* %tmp12854, i64 1
- %tmp12856 = getelementptr inbounds float* %tmp12855, i64 1
- %tmp12857 = getelementptr inbounds float* %tmp12856, i64 1
- %tmp12858 = getelementptr inbounds float* %tmp12857, i64 1
- %tmp12859 = getelementptr inbounds float* %tmp12858, i64 1
- %tmp12860 = getelementptr inbounds float* %tmp12859, i64 1
- %tmp12861 = getelementptr inbounds float* %tmp12860, i64 1
- %tmp12862 = getelementptr inbounds float* %tmp12861, i64 1
- %tmp12863 = getelementptr inbounds float* %tmp12862, i64 1
- %tmp12864 = getelementptr inbounds float* %tmp12863, i64 1
- %tmp12865 = getelementptr inbounds float* %tmp12864, i64 1
- %tmp12866 = getelementptr inbounds float* %tmp12865, i64 1
- %tmp12867 = getelementptr inbounds float* %tmp12866, i64 1
- %tmp12868 = getelementptr inbounds float* %tmp12867, i64 1
- %tmp12869 = getelementptr inbounds float* %tmp12868, i64 1
- %tmp12870 = getelementptr inbounds float* %tmp12869, i64 1
- %tmp12871 = getelementptr inbounds float* %tmp12870, i64 1
- %tmp12872 = getelementptr inbounds float* %tmp12871, i64 1
- %tmp12873 = getelementptr inbounds float* %tmp12872, i64 1
- %tmp12874 = getelementptr inbounds float* %tmp12873, i64 1
- %tmp12875 = getelementptr inbounds float* %tmp12874, i64 1
- %tmp12876 = getelementptr inbounds float* %tmp12875, i64 1
- %tmp12877 = getelementptr inbounds float* %tmp12876, i64 1
- %tmp12878 = getelementptr inbounds float* %tmp12877, i64 1
- %tmp12879 = getelementptr inbounds float* %tmp12878, i64 1
- %tmp12880 = getelementptr inbounds float* %tmp12879, i64 1
- %tmp12881 = getelementptr inbounds float* %tmp12880, i64 1
- %tmp12882 = getelementptr inbounds float* %tmp12881, i64 1
- %tmp12883 = getelementptr inbounds float* %tmp12882, i64 1
- %tmp12884 = getelementptr inbounds float* %tmp12883, i64 1
- %tmp12885 = getelementptr inbounds float* %tmp12884, i64 1
- %tmp12886 = getelementptr inbounds float* %tmp12885, i64 1
- %tmp12887 = getelementptr inbounds float* %tmp12886, i64 1
- %tmp12888 = getelementptr inbounds float* %tmp12887, i64 1
- %tmp12889 = getelementptr inbounds float* %tmp12888, i64 1
- %tmp12890 = getelementptr inbounds float* %tmp12889, i64 1
- %tmp12891 = getelementptr inbounds float* %tmp12890, i64 1
- %tmp12892 = getelementptr inbounds float* %tmp12891, i64 1
- %tmp12893 = getelementptr inbounds float* %tmp12892, i64 1
- %tmp12894 = getelementptr inbounds float* %tmp12893, i64 1
- %tmp12895 = getelementptr inbounds float* %tmp12894, i64 1
- %tmp12896 = getelementptr inbounds float* %tmp12895, i64 1
- %tmp12897 = getelementptr inbounds float* %tmp12896, i64 1
- %tmp12898 = getelementptr inbounds float* %tmp12897, i64 1
- %tmp12899 = getelementptr inbounds float* %tmp12898, i64 1
- %tmp12900 = getelementptr inbounds float* %tmp12899, i64 1
- %tmp12901 = getelementptr inbounds float* %tmp12900, i64 1
- %tmp12902 = getelementptr inbounds float* %tmp12901, i64 1
- %tmp12903 = getelementptr inbounds float* %tmp12902, i64 1
- %tmp12904 = getelementptr inbounds float* %tmp12903, i64 1
- %tmp12905 = getelementptr inbounds float* %tmp12904, i64 1
- %tmp12906 = getelementptr inbounds float* %tmp12905, i64 1
- %tmp12907 = getelementptr inbounds float* %tmp12906, i64 1
- %tmp12908 = getelementptr inbounds float* %tmp12907, i64 1
- %tmp12909 = getelementptr inbounds float* %tmp12908, i64 1
- %tmp12910 = getelementptr inbounds float* %tmp12909, i64 1
- %tmp12911 = getelementptr inbounds float* %tmp12910, i64 1
- %tmp12912 = getelementptr inbounds float* %tmp12911, i64 1
- %tmp12913 = getelementptr inbounds float* %tmp12912, i64 1
- %tmp12914 = getelementptr inbounds float* %tmp12913, i64 1
- %tmp12915 = getelementptr inbounds float* %tmp12914, i64 1
- %tmp12916 = getelementptr inbounds float* %tmp12915, i64 1
- %tmp12917 = getelementptr inbounds float* %tmp12916, i64 1
- %tmp12918 = getelementptr inbounds float* %tmp12917, i64 1
- %tmp12919 = getelementptr inbounds float* %tmp12918, i64 1
- %tmp12920 = getelementptr inbounds float* %tmp12919, i64 1
- %tmp12921 = getelementptr inbounds float* %tmp12920, i64 1
- %tmp12922 = getelementptr inbounds float* %tmp12921, i64 1
- %tmp12923 = getelementptr inbounds float* %tmp12922, i64 1
- %tmp12924 = getelementptr inbounds float* %tmp12923, i64 1
- %tmp12925 = getelementptr inbounds float* %tmp12924, i64 1
- %tmp12926 = getelementptr inbounds float* %tmp12925, i64 1
- %tmp12927 = getelementptr inbounds float* %tmp12926, i64 1
- %tmp12928 = getelementptr inbounds float* %tmp12927, i64 1
- %tmp12929 = getelementptr inbounds float* %tmp12928, i64 1
- %tmp12930 = getelementptr inbounds float* %tmp12929, i64 1
- %tmp12931 = getelementptr inbounds float* %tmp12930, i64 1
- %tmp12932 = getelementptr inbounds float* %tmp12931, i64 1
- %tmp12933 = getelementptr inbounds float* %tmp12932, i64 1
- %tmp12934 = getelementptr inbounds float* %tmp12933, i64 1
- %tmp12935 = getelementptr inbounds float* %tmp12934, i64 1
- %tmp12936 = getelementptr inbounds float* %tmp12935, i64 1
- %tmp12937 = getelementptr inbounds float* %tmp12936, i64 1
- %tmp12938 = getelementptr inbounds float* %tmp12937, i64 1
- %tmp12939 = getelementptr inbounds float* %tmp12938, i64 1
- %tmp12940 = getelementptr inbounds float* %tmp12939, i64 1
- %tmp12941 = getelementptr inbounds float* %tmp12940, i64 1
- %tmp12942 = getelementptr inbounds float* %tmp12941, i64 1
- %tmp12943 = getelementptr inbounds float* %tmp12942, i64 1
- %tmp12944 = getelementptr inbounds float* %tmp12943, i64 1
- %tmp12945 = getelementptr inbounds float* %tmp12944, i64 1
- %tmp12946 = getelementptr inbounds float* %tmp12945, i64 1
- %tmp12947 = getelementptr inbounds float* %tmp12946, i64 1
- %tmp12948 = getelementptr inbounds float* %tmp12947, i64 1
- %tmp12949 = getelementptr inbounds float* %tmp12948, i64 1
- %tmp12950 = getelementptr inbounds float* %tmp12949, i64 1
- %tmp12951 = getelementptr inbounds float* %tmp12950, i64 1
- %tmp12952 = getelementptr inbounds float* %tmp12951, i64 1
- %tmp12953 = getelementptr inbounds float* %tmp12952, i64 1
- %tmp12954 = getelementptr inbounds float* %tmp12953, i64 1
- %tmp12955 = getelementptr inbounds float* %tmp12954, i64 1
- %tmp12956 = getelementptr inbounds float* %tmp12955, i64 1
- %tmp12957 = getelementptr inbounds float* %tmp12956, i64 1
- %tmp12958 = getelementptr inbounds float* %tmp12957, i64 1
- %tmp12959 = getelementptr inbounds float* %tmp12958, i64 1
- %tmp12960 = getelementptr inbounds float* %tmp12959, i64 1
- %tmp12961 = getelementptr inbounds float* %tmp12960, i64 1
- %tmp12962 = getelementptr inbounds float* %tmp12961, i64 1
- %tmp12963 = getelementptr inbounds float* %tmp12962, i64 1
- %tmp12964 = getelementptr inbounds float* %tmp12963, i64 1
- %tmp12965 = getelementptr inbounds float* %tmp12964, i64 1
- %tmp12966 = getelementptr inbounds float* %tmp12965, i64 1
- %tmp12967 = getelementptr inbounds float* %tmp12966, i64 1
- %tmp12968 = getelementptr inbounds float* %tmp12967, i64 1
- %tmp12969 = getelementptr inbounds float* %tmp12968, i64 1
- %tmp12970 = getelementptr inbounds float* %tmp12969, i64 1
- %tmp12971 = getelementptr inbounds float* %tmp12970, i64 1
- %tmp12972 = getelementptr inbounds float* %tmp12971, i64 1
- %tmp12973 = getelementptr inbounds float* %tmp12972, i64 1
- %tmp12974 = getelementptr inbounds float* %tmp12973, i64 1
- %tmp12975 = getelementptr inbounds float* %tmp12974, i64 1
- %tmp12976 = getelementptr inbounds float* %tmp12975, i64 1
- %tmp12977 = getelementptr inbounds float* %tmp12976, i64 1
- %tmp12978 = getelementptr inbounds float* %tmp12977, i64 1
- %tmp12979 = getelementptr inbounds float* %tmp12978, i64 1
- %tmp12980 = getelementptr inbounds float* %tmp12979, i64 1
- %tmp12981 = getelementptr inbounds float* %tmp12980, i64 1
- %tmp12982 = getelementptr inbounds float* %tmp12981, i64 1
- %tmp12983 = getelementptr inbounds float* %tmp12982, i64 1
- %tmp12984 = getelementptr inbounds float* %tmp12983, i64 1
- %tmp12985 = getelementptr inbounds float* %tmp12984, i64 1
- %tmp12986 = getelementptr inbounds float* %tmp12985, i64 1
- %tmp12987 = getelementptr inbounds float* %tmp12986, i64 1
- %tmp12988 = getelementptr inbounds float* %tmp12987, i64 1
- %tmp12989 = getelementptr inbounds float* %tmp12988, i64 1
- %tmp12990 = getelementptr inbounds float* %tmp12989, i64 1
- %tmp12991 = getelementptr inbounds float* %tmp12990, i64 1
- %tmp12992 = getelementptr inbounds float* %tmp12991, i64 1
- %tmp12993 = getelementptr inbounds float* %tmp12992, i64 1
- %tmp12994 = getelementptr inbounds float* %tmp12993, i64 1
- %tmp12995 = getelementptr inbounds float* %tmp12994, i64 1
- %tmp12996 = getelementptr inbounds float* %tmp12995, i64 1
- %tmp12997 = getelementptr inbounds float* %tmp12996, i64 1
- %tmp12998 = getelementptr inbounds float* %tmp12997, i64 1
- %tmp12999 = getelementptr inbounds float* %tmp12998, i64 1
- %tmp13000 = getelementptr inbounds float* %tmp12999, i64 1
- %tmp13001 = getelementptr inbounds float* %tmp13000, i64 1
- %tmp13002 = getelementptr inbounds float* %tmp13001, i64 1
- %tmp13003 = getelementptr inbounds float* %tmp13002, i64 1
- %tmp13004 = getelementptr inbounds float* %tmp13003, i64 1
- %tmp13005 = getelementptr inbounds float* %tmp13004, i64 1
- %tmp13006 = getelementptr inbounds float* %tmp13005, i64 1
- %tmp13007 = getelementptr inbounds float* %tmp13006, i64 1
- %tmp13008 = getelementptr inbounds float* %tmp13007, i64 1
- %tmp13009 = getelementptr inbounds float* %tmp13008, i64 1
- %tmp13010 = getelementptr inbounds float* %tmp13009, i64 1
- %tmp13011 = getelementptr inbounds float* %tmp13010, i64 1
- %tmp13012 = getelementptr inbounds float* %tmp13011, i64 1
- %tmp13013 = getelementptr inbounds float* %tmp13012, i64 1
- %tmp13014 = getelementptr inbounds float* %tmp13013, i64 1
- %tmp13015 = getelementptr inbounds float* %tmp13014, i64 1
- %tmp13016 = getelementptr inbounds float* %tmp13015, i64 1
- %tmp13017 = getelementptr inbounds float* %tmp13016, i64 1
- %tmp13018 = getelementptr inbounds float* %tmp13017, i64 1
- %tmp13019 = getelementptr inbounds float* %tmp13018, i64 1
- %tmp13020 = getelementptr inbounds float* %tmp13019, i64 1
- %tmp13021 = getelementptr inbounds float* %tmp13020, i64 1
- %tmp13022 = getelementptr inbounds float* %tmp13021, i64 1
- %tmp13023 = getelementptr inbounds float* %tmp13022, i64 1
- %tmp13024 = getelementptr inbounds float* %tmp13023, i64 1
- %tmp13025 = getelementptr inbounds float* %tmp13024, i64 1
- %tmp13026 = getelementptr inbounds float* %tmp13025, i64 1
- %tmp13027 = getelementptr inbounds float* %tmp13026, i64 1
- %tmp13028 = getelementptr inbounds float* %tmp13027, i64 1
- %tmp13029 = getelementptr inbounds float* %tmp13028, i64 1
- %tmp13030 = getelementptr inbounds float* %tmp13029, i64 1
- %tmp13031 = getelementptr inbounds float* %tmp13030, i64 1
- %tmp13032 = getelementptr inbounds float* %tmp13031, i64 1
- %tmp13033 = getelementptr inbounds float* %tmp13032, i64 1
- %tmp13034 = getelementptr inbounds float* %tmp13033, i64 1
- %tmp13035 = getelementptr inbounds float* %tmp13034, i64 1
- %tmp13036 = getelementptr inbounds float* %tmp13035, i64 1
- %tmp13037 = getelementptr inbounds float* %tmp13036, i64 1
- %tmp13038 = getelementptr inbounds float* %tmp13037, i64 1
- %tmp13039 = getelementptr inbounds float* %tmp13038, i64 1
- %tmp13040 = getelementptr inbounds float* %tmp13039, i64 1
- %tmp13041 = getelementptr inbounds float* %tmp13040, i64 1
- %tmp13042 = getelementptr inbounds float* %tmp13041, i64 1
- %tmp13043 = getelementptr inbounds float* %tmp13042, i64 1
- %tmp13044 = getelementptr inbounds float* %tmp13043, i64 1
- %tmp13045 = getelementptr inbounds float* %tmp13044, i64 1
- %tmp13046 = getelementptr inbounds float* %tmp13045, i64 1
- %tmp13047 = getelementptr inbounds float* %tmp13046, i64 1
- %tmp13048 = getelementptr inbounds float* %tmp13047, i64 1
- %tmp13049 = getelementptr inbounds float* %tmp13048, i64 1
- %tmp13050 = getelementptr inbounds float* %tmp13049, i64 1
- %tmp13051 = getelementptr inbounds float* %tmp13050, i64 1
- %tmp13052 = getelementptr inbounds float* %tmp13051, i64 1
- %tmp13053 = getelementptr inbounds float* %tmp13052, i64 1
- %tmp13054 = getelementptr inbounds float* %tmp13053, i64 1
- %tmp13055 = getelementptr inbounds float* %tmp13054, i64 1
- %tmp13056 = getelementptr inbounds float* %tmp13055, i64 1
- %tmp13057 = getelementptr inbounds float* %tmp13056, i64 1
- %tmp13058 = getelementptr inbounds float* %tmp13057, i64 1
- %tmp13059 = getelementptr inbounds float* %tmp13058, i64 1
- %tmp13060 = getelementptr inbounds float* %tmp13059, i64 1
- %tmp13061 = getelementptr inbounds float* %tmp13060, i64 1
- %tmp13062 = getelementptr inbounds float* %tmp13061, i64 1
- %tmp13063 = getelementptr inbounds float* %tmp13062, i64 1
- %tmp13064 = getelementptr inbounds float* %tmp13063, i64 1
- %tmp13065 = getelementptr inbounds float* %tmp13064, i64 1
- %tmp13066 = getelementptr inbounds float* %tmp13065, i64 1
- %tmp13067 = getelementptr inbounds float* %tmp13066, i64 1
- %tmp13068 = getelementptr inbounds float* %tmp13067, i64 1
- %tmp13069 = getelementptr inbounds float* %tmp13068, i64 1
- %tmp13070 = getelementptr inbounds float* %tmp13069, i64 1
- %tmp13071 = getelementptr inbounds float* %tmp13070, i64 1
- %tmp13072 = getelementptr inbounds float* %tmp13071, i64 1
- %tmp13073 = getelementptr inbounds float* %tmp13072, i64 1
- %tmp13074 = getelementptr inbounds float* %tmp13073, i64 1
- %tmp13075 = getelementptr inbounds float* %tmp13074, i64 1
- %tmp13076 = getelementptr inbounds float* %tmp13075, i64 1
- %tmp13077 = getelementptr inbounds float* %tmp13076, i64 1
- %tmp13078 = getelementptr inbounds float* %tmp13077, i64 1
- %tmp13079 = getelementptr inbounds float* %tmp13078, i64 1
- %tmp13080 = getelementptr inbounds float* %tmp13079, i64 1
- %tmp13081 = getelementptr inbounds float* %tmp13080, i64 1
- %tmp13082 = getelementptr inbounds float* %tmp13081, i64 1
- %tmp13083 = getelementptr inbounds float* %tmp13082, i64 1
- %tmp13084 = getelementptr inbounds float* %tmp13083, i64 1
- %tmp13085 = getelementptr inbounds float* %tmp13084, i64 1
- %tmp13086 = getelementptr inbounds float* %tmp13085, i64 1
- %tmp13087 = getelementptr inbounds float* %tmp13086, i64 1
- %tmp13088 = getelementptr inbounds float* %tmp13087, i64 1
- %tmp13089 = getelementptr inbounds float* %tmp13088, i64 1
- %tmp13090 = getelementptr inbounds float* %tmp13089, i64 1
- %tmp13091 = getelementptr inbounds float* %tmp13090, i64 1
- %tmp13092 = getelementptr inbounds float* %tmp13091, i64 1
- %tmp13093 = getelementptr inbounds float* %tmp13092, i64 1
- %tmp13094 = getelementptr inbounds float* %tmp13093, i64 1
- %tmp13095 = getelementptr inbounds float* %tmp13094, i64 1
- %tmp13096 = getelementptr inbounds float* %tmp13095, i64 1
- %tmp13097 = getelementptr inbounds float* %tmp13096, i64 1
- %tmp13098 = getelementptr inbounds float* %tmp13097, i64 1
- %tmp13099 = getelementptr inbounds float* %tmp13098, i64 1
- %tmp13100 = getelementptr inbounds float* %tmp13099, i64 1
- %tmp13101 = getelementptr inbounds float* %tmp13100, i64 1
- %tmp13102 = getelementptr inbounds float* %tmp13101, i64 1
- %tmp13103 = getelementptr inbounds float* %tmp13102, i64 1
- %tmp13104 = getelementptr inbounds float* %tmp13103, i64 1
- %tmp13105 = getelementptr inbounds float* %tmp13104, i64 1
- %tmp13106 = getelementptr inbounds float* %tmp13105, i64 1
- %tmp13107 = getelementptr inbounds float* %tmp13106, i64 1
- %tmp13108 = getelementptr inbounds float* %tmp13107, i64 1
- %tmp13109 = getelementptr inbounds float* %tmp13108, i64 1
- %tmp13110 = getelementptr inbounds float* %tmp13109, i64 1
- %tmp13111 = getelementptr inbounds float* %tmp13110, i64 1
- %tmp13112 = getelementptr inbounds float* %tmp13111, i64 1
- %tmp13113 = getelementptr inbounds float* %tmp13112, i64 1
- %tmp13114 = getelementptr inbounds float* %tmp13113, i64 1
- %tmp13115 = getelementptr inbounds float* %tmp13114, i64 1
- %tmp13116 = getelementptr inbounds float* %tmp13115, i64 1
- %tmp13117 = getelementptr inbounds float* %tmp13116, i64 1
- %tmp13118 = getelementptr inbounds float* %tmp13117, i64 1
- %tmp13119 = getelementptr inbounds float* %tmp13118, i64 1
- %tmp13120 = getelementptr inbounds float* %tmp13119, i64 1
- %tmp13121 = getelementptr inbounds float* %tmp13120, i64 1
- %tmp13122 = getelementptr inbounds float* %tmp13121, i64 1
- %tmp13123 = getelementptr inbounds float* %tmp13122, i64 1
- %tmp13124 = getelementptr inbounds float* %tmp13123, i64 1
- %tmp13125 = getelementptr inbounds float* %tmp13124, i64 1
- %tmp13126 = getelementptr inbounds float* %tmp13125, i64 1
- %tmp13127 = getelementptr inbounds float* %tmp13126, i64 1
- %tmp13128 = getelementptr inbounds float* %tmp13127, i64 1
- %tmp13129 = getelementptr inbounds float* %tmp13128, i64 1
- %tmp13130 = getelementptr inbounds float* %tmp13129, i64 1
- %tmp13131 = getelementptr inbounds float* %tmp13130, i64 1
- %tmp13132 = getelementptr inbounds float* %tmp13131, i64 1
- %tmp13133 = getelementptr inbounds float* %tmp13132, i64 1
- %tmp13134 = getelementptr inbounds float* %tmp13133, i64 1
- %tmp13135 = getelementptr inbounds float* %tmp13134, i64 1
- %tmp13136 = getelementptr inbounds float* %tmp13135, i64 1
- %tmp13137 = getelementptr inbounds float* %tmp13136, i64 1
- %tmp13138 = getelementptr inbounds float* %tmp13137, i64 1
- %tmp13139 = getelementptr inbounds float* %tmp13138, i64 1
- %tmp13140 = getelementptr inbounds float* %tmp13139, i64 1
- %tmp13141 = getelementptr inbounds float* %tmp13140, i64 1
- %tmp13142 = getelementptr inbounds float* %tmp13141, i64 1
- %tmp13143 = getelementptr inbounds float* %tmp13142, i64 1
- %tmp13144 = getelementptr inbounds float* %tmp13143, i64 1
- %tmp13145 = getelementptr inbounds float* %tmp13144, i64 1
- %tmp13146 = getelementptr inbounds float* %tmp13145, i64 1
- %tmp13147 = getelementptr inbounds float* %tmp13146, i64 1
- %tmp13148 = getelementptr inbounds float* %tmp13147, i64 1
- %tmp13149 = getelementptr inbounds float* %tmp13148, i64 1
- %tmp13150 = getelementptr inbounds float* %tmp13149, i64 1
- %tmp13151 = getelementptr inbounds float* %tmp13150, i64 1
- %tmp13152 = getelementptr inbounds float* %tmp13151, i64 1
- %tmp13153 = getelementptr inbounds float* %tmp13152, i64 1
- %tmp13154 = getelementptr inbounds float* %tmp13153, i64 1
- %tmp13155 = getelementptr inbounds float* %tmp13154, i64 1
- %tmp13156 = getelementptr inbounds float* %tmp13155, i64 1
- %tmp13157 = getelementptr inbounds float* %tmp13156, i64 1
- %tmp13158 = getelementptr inbounds float* %tmp13157, i64 1
- %tmp13159 = getelementptr inbounds float* %tmp13158, i64 1
- %tmp13160 = getelementptr inbounds float* %tmp13159, i64 1
- %tmp13161 = getelementptr inbounds float* %tmp13160, i64 1
- %tmp13162 = getelementptr inbounds float* %tmp13161, i64 1
- %tmp13163 = getelementptr inbounds float* %tmp13162, i64 1
- %tmp13164 = getelementptr inbounds float* %tmp13163, i64 1
- %tmp13165 = getelementptr inbounds float* %tmp13164, i64 1
- %tmp13166 = getelementptr inbounds float* %tmp13165, i64 1
- %tmp13167 = getelementptr inbounds float* %tmp13166, i64 1
- %tmp13168 = getelementptr inbounds float* %tmp13167, i64 1
- %tmp13169 = getelementptr inbounds float* %tmp13168, i64 1
- %tmp13170 = getelementptr inbounds float* %tmp13169, i64 1
- %tmp13171 = getelementptr inbounds float* %tmp13170, i64 1
- %tmp13172 = getelementptr inbounds float* %tmp13171, i64 1
- %tmp13173 = getelementptr inbounds float* %tmp13172, i64 1
- %tmp13174 = getelementptr inbounds float* %tmp13173, i64 1
- %tmp13175 = getelementptr inbounds float* %tmp13174, i64 1
- %tmp13176 = getelementptr inbounds float* %tmp13175, i64 1
- %tmp13177 = getelementptr inbounds float* %tmp13176, i64 1
- %tmp13178 = getelementptr inbounds float* %tmp13177, i64 1
- %tmp13179 = getelementptr inbounds float* %tmp13178, i64 1
- %tmp13180 = getelementptr inbounds float* %tmp13179, i64 1
- %tmp13181 = getelementptr inbounds float* %tmp13180, i64 1
- %tmp13182 = getelementptr inbounds float* %tmp13181, i64 1
- %tmp13183 = getelementptr inbounds float* %tmp13182, i64 1
- %tmp13184 = getelementptr inbounds float* %tmp13183, i64 1
- %tmp13185 = getelementptr inbounds float* %tmp13184, i64 1
- %tmp13186 = getelementptr inbounds float* %tmp13185, i64 1
- %tmp13187 = getelementptr inbounds float* %tmp13186, i64 1
- %tmp13188 = getelementptr inbounds float* %tmp13187, i64 1
- %tmp13189 = getelementptr inbounds float* %tmp13188, i64 1
- %tmp13190 = getelementptr inbounds float* %tmp13189, i64 1
- %tmp13191 = getelementptr inbounds float* %tmp13190, i64 1
- %tmp13192 = getelementptr inbounds float* %tmp13191, i64 1
- %tmp13193 = getelementptr inbounds float* %tmp13192, i64 1
- %tmp13194 = getelementptr inbounds float* %tmp13193, i64 1
- %tmp13195 = getelementptr inbounds float* %tmp13194, i64 1
- %tmp13196 = getelementptr inbounds float* %tmp13195, i64 1
- %tmp13197 = getelementptr inbounds float* %tmp13196, i64 1
- %tmp13198 = getelementptr inbounds float* %tmp13197, i64 1
- %tmp13199 = getelementptr inbounds float* %tmp13198, i64 1
- %tmp13200 = getelementptr inbounds float* %tmp13199, i64 1
- %tmp13201 = getelementptr inbounds float* %tmp13200, i64 1
- %tmp13202 = getelementptr inbounds float* %tmp13201, i64 1
- %tmp13203 = getelementptr inbounds float* %tmp13202, i64 1
- %tmp13204 = getelementptr inbounds float* %tmp13203, i64 1
- %tmp13205 = getelementptr inbounds float* %tmp13204, i64 1
- %tmp13206 = getelementptr inbounds float* %tmp13205, i64 1
- %tmp13207 = getelementptr inbounds float* %tmp13206, i64 1
- %tmp13208 = getelementptr inbounds float* %tmp13207, i64 1
- %tmp13209 = getelementptr inbounds float* %tmp13208, i64 1
- %tmp13210 = getelementptr inbounds float* %tmp13209, i64 1
- %tmp13211 = getelementptr inbounds float* %tmp13210, i64 1
- %tmp13212 = getelementptr inbounds float* %tmp13211, i64 1
- %tmp13213 = getelementptr inbounds float* %tmp13212, i64 1
- %tmp13214 = getelementptr inbounds float* %tmp13213, i64 1
- %tmp13215 = getelementptr inbounds float* %tmp13214, i64 1
- %tmp13216 = getelementptr inbounds float* %tmp13215, i64 1
- %tmp13217 = getelementptr inbounds float* %tmp13216, i64 1
- %tmp13218 = getelementptr inbounds float* %tmp13217, i64 1
- %tmp13219 = getelementptr inbounds float* %tmp13218, i64 1
- %tmp13220 = getelementptr inbounds float* %tmp13219, i64 1
- %tmp13221 = getelementptr inbounds float* %tmp13220, i64 1
- %tmp13222 = getelementptr inbounds float* %tmp13221, i64 1
- %tmp13223 = getelementptr inbounds float* %tmp13222, i64 1
- %tmp13224 = getelementptr inbounds float* %tmp13223, i64 1
- %tmp13225 = getelementptr inbounds float* %tmp13224, i64 1
- %tmp13226 = getelementptr inbounds float* %tmp13225, i64 1
- %tmp13227 = getelementptr inbounds float* %tmp13226, i64 1
- %tmp13228 = getelementptr inbounds float* %tmp13227, i64 1
- %tmp13229 = getelementptr inbounds float* %tmp13228, i64 1
- %tmp13230 = getelementptr inbounds float* %tmp13229, i64 1
- %tmp13231 = getelementptr inbounds float* %tmp13230, i64 1
- %tmp13232 = getelementptr inbounds float* %tmp13231, i64 1
- %tmp13233 = getelementptr inbounds float* %tmp13232, i64 1
- %tmp13234 = getelementptr inbounds float* %tmp13233, i64 1
- %tmp13235 = getelementptr inbounds float* %tmp13234, i64 1
- %tmp13236 = getelementptr inbounds float* %tmp13235, i64 1
- %tmp13237 = getelementptr inbounds float* %tmp13236, i64 1
- %tmp13238 = getelementptr inbounds float* %tmp13237, i64 1
- %tmp13239 = getelementptr inbounds float* %tmp13238, i64 1
- %tmp13240 = getelementptr inbounds float* %tmp13239, i64 1
- %tmp13241 = getelementptr inbounds float* %tmp13240, i64 1
- %tmp13242 = getelementptr inbounds float* %tmp13241, i64 1
- %tmp13243 = getelementptr inbounds float* %tmp13242, i64 1
- %tmp13244 = getelementptr inbounds float* %tmp13243, i64 1
- %tmp13245 = getelementptr inbounds float* %tmp13244, i64 1
- %tmp13246 = getelementptr inbounds float* %tmp13245, i64 1
- %tmp13247 = getelementptr inbounds float* %tmp13246, i64 1
- %tmp13248 = getelementptr inbounds float* %tmp13247, i64 1
- %tmp13249 = getelementptr inbounds float* %tmp13248, i64 1
- %tmp13250 = getelementptr inbounds float* %tmp13249, i64 1
- %tmp13251 = getelementptr inbounds float* %tmp13250, i64 1
- %tmp13252 = getelementptr inbounds float* %tmp13251, i64 1
- %tmp13253 = getelementptr inbounds float* %tmp13252, i64 1
- %tmp13254 = getelementptr inbounds float* %tmp13253, i64 1
- %tmp13255 = getelementptr inbounds float* %tmp13254, i64 1
- %tmp13256 = getelementptr inbounds float* %tmp13255, i64 1
- %tmp13257 = getelementptr inbounds float* %tmp13256, i64 1
- %tmp13258 = getelementptr inbounds float* %tmp13257, i64 1
- %tmp13259 = getelementptr inbounds float* %tmp13258, i64 1
- %tmp13260 = getelementptr inbounds float* %tmp13259, i64 1
- %tmp13261 = getelementptr inbounds float* %tmp13260, i64 1
- %tmp13262 = getelementptr inbounds float* %tmp13261, i64 1
- %tmp13263 = getelementptr inbounds float* %tmp13262, i64 1
- %tmp13264 = getelementptr inbounds float* %tmp13263, i64 1
- %tmp13265 = getelementptr inbounds float* %tmp13264, i64 1
- %tmp13266 = getelementptr inbounds float* %tmp13265, i64 1
- %tmp13267 = getelementptr inbounds float* %tmp13266, i64 1
- %tmp13268 = getelementptr inbounds float* %tmp13267, i64 1
- %tmp13269 = getelementptr inbounds float* %tmp13268, i64 1
- %tmp13270 = getelementptr inbounds float* %tmp13269, i64 1
- %tmp13271 = getelementptr inbounds float* %tmp13270, i64 1
- %tmp13272 = getelementptr inbounds float* %tmp13271, i64 1
- %tmp13273 = getelementptr inbounds float* %tmp13272, i64 1
- %tmp13274 = getelementptr inbounds float* %tmp13273, i64 1
- %tmp13275 = getelementptr inbounds float* %tmp13274, i64 1
- %tmp13276 = getelementptr inbounds float* %tmp13275, i64 1
- %tmp13277 = getelementptr inbounds float* %tmp13276, i64 1
- %tmp13278 = getelementptr inbounds float* %tmp13277, i64 1
- %tmp13279 = getelementptr inbounds float* %tmp13278, i64 1
- %tmp13280 = getelementptr inbounds float* %tmp13279, i64 1
- %tmp13281 = getelementptr inbounds float* %tmp13280, i64 1
- %tmp13282 = getelementptr inbounds float* %tmp13281, i64 1
- %tmp13283 = getelementptr inbounds float* %tmp13282, i64 1
- %tmp13284 = getelementptr inbounds float* %tmp13283, i64 1
- %tmp13285 = getelementptr inbounds float* %tmp13284, i64 1
- %tmp13286 = getelementptr inbounds float* %tmp13285, i64 1
- %tmp13287 = getelementptr inbounds float* %tmp13286, i64 1
- %tmp13288 = getelementptr inbounds float* %tmp13287, i64 1
- %tmp13289 = getelementptr inbounds float* %tmp13288, i64 1
- %tmp13290 = getelementptr inbounds float* %tmp13289, i64 1
- %tmp13291 = getelementptr inbounds float* %tmp13290, i64 1
- %tmp13292 = getelementptr inbounds float* %tmp13291, i64 1
- %tmp13293 = getelementptr inbounds float* %tmp13292, i64 1
- %tmp13294 = getelementptr inbounds float* %tmp13293, i64 1
- %tmp13295 = getelementptr inbounds float* %tmp13294, i64 1
- %tmp13296 = getelementptr inbounds float* %tmp13295, i64 1
- %tmp13297 = getelementptr inbounds float* %tmp13296, i64 1
- %tmp13298 = getelementptr inbounds float* %tmp13297, i64 1
- %tmp13299 = getelementptr inbounds float* %tmp13298, i64 1
- %tmp13300 = getelementptr inbounds float* %tmp13299, i64 1
- %tmp13301 = getelementptr inbounds float* %tmp13300, i64 1
- %tmp13302 = getelementptr inbounds float* %tmp13301, i64 1
- %tmp13303 = getelementptr inbounds float* %tmp13302, i64 1
- %tmp13304 = getelementptr inbounds float* %tmp13303, i64 1
- %tmp13305 = getelementptr inbounds float* %tmp13304, i64 1
- %tmp13306 = getelementptr inbounds float* %tmp13305, i64 1
- %tmp13307 = getelementptr inbounds float* %tmp13306, i64 1
- %tmp13308 = getelementptr inbounds float* %tmp13307, i64 1
- %tmp13309 = getelementptr inbounds float* %tmp13308, i64 1
- %tmp13310 = getelementptr inbounds float* %tmp13309, i64 1
- %tmp13311 = getelementptr inbounds float* %tmp13310, i64 1
- %tmp13312 = getelementptr inbounds float* %tmp13311, i64 1
- %tmp13313 = getelementptr inbounds float* %tmp13312, i64 1
- %tmp13314 = getelementptr inbounds float* %tmp13313, i64 1
- %tmp13315 = getelementptr inbounds float* %tmp13314, i64 1
- %tmp13316 = getelementptr inbounds float* %tmp13315, i64 1
- %tmp13317 = getelementptr inbounds float* %tmp13316, i64 1
- %tmp13318 = getelementptr inbounds float* %tmp13317, i64 1
- %tmp13319 = getelementptr inbounds float* %tmp13318, i64 1
- %tmp13320 = getelementptr inbounds float* %tmp13319, i64 1
- %tmp13321 = getelementptr inbounds float* %tmp13320, i64 1
- %tmp13322 = getelementptr inbounds float* %tmp13321, i64 1
- %tmp13323 = getelementptr inbounds float* %tmp13322, i64 1
- %tmp13324 = getelementptr inbounds float* %tmp13323, i64 1
- %tmp13325 = getelementptr inbounds float* %tmp13324, i64 1
- %tmp13326 = getelementptr inbounds float* %tmp13325, i64 1
- %tmp13327 = getelementptr inbounds float* %tmp13326, i64 1
- %tmp13328 = getelementptr inbounds float* %tmp13327, i64 1
- %tmp13329 = getelementptr inbounds float* %tmp13328, i64 1
- %tmp13330 = getelementptr inbounds float* %tmp13329, i64 1
- %tmp13331 = getelementptr inbounds float* %tmp13330, i64 1
- %tmp13332 = getelementptr inbounds float* %tmp13331, i64 1
- %tmp13333 = getelementptr inbounds float* %tmp13332, i64 1
- %tmp13334 = getelementptr inbounds float* %tmp13333, i64 1
- %tmp13335 = getelementptr inbounds float* %tmp13334, i64 1
- %tmp13336 = getelementptr inbounds float* %tmp13335, i64 1
- %tmp13337 = getelementptr inbounds float* %tmp13336, i64 1
- %tmp13338 = getelementptr inbounds float* %tmp13337, i64 1
- %tmp13339 = getelementptr inbounds float* %tmp13338, i64 1
- %tmp13340 = getelementptr inbounds float* %tmp13339, i64 1
- %tmp13341 = getelementptr inbounds float* %tmp13340, i64 1
- %tmp13342 = getelementptr inbounds float* %tmp13341, i64 1
- %tmp13343 = getelementptr inbounds float* %tmp13342, i64 1
- %tmp13344 = getelementptr inbounds float* %tmp13343, i64 1
- %tmp13345 = getelementptr inbounds float* %tmp13344, i64 1
- %tmp13346 = getelementptr inbounds float* %tmp13345, i64 1
- %tmp13347 = getelementptr inbounds float* %tmp13346, i64 1
- %tmp13348 = getelementptr inbounds float* %tmp13347, i64 1
- %tmp13349 = getelementptr inbounds float* %tmp13348, i64 1
- %tmp13350 = getelementptr inbounds float* %tmp13349, i64 1
- %tmp13351 = getelementptr inbounds float* %tmp13350, i64 1
- %tmp13352 = getelementptr inbounds float* %tmp13351, i64 1
- %tmp13353 = getelementptr inbounds float* %tmp13352, i64 1
- %tmp13354 = getelementptr inbounds float* %tmp13353, i64 1
- %tmp13355 = getelementptr inbounds float* %tmp13354, i64 1
- %tmp13356 = getelementptr inbounds float* %tmp13355, i64 1
- %tmp13357 = getelementptr inbounds float* %tmp13356, i64 1
- %tmp13358 = getelementptr inbounds float* %tmp13357, i64 1
- %tmp13359 = getelementptr inbounds float* %tmp13358, i64 1
- %tmp13360 = getelementptr inbounds float* %tmp13359, i64 1
- %tmp13361 = getelementptr inbounds float* %tmp13360, i64 1
- %tmp13362 = getelementptr inbounds float* %tmp13361, i64 1
- %tmp13363 = getelementptr inbounds float* %tmp13362, i64 1
- %tmp13364 = getelementptr inbounds float* %tmp13363, i64 1
- %tmp13365 = getelementptr inbounds float* %tmp13364, i64 1
- %tmp13366 = getelementptr inbounds float* %tmp13365, i64 1
- %tmp13367 = getelementptr inbounds float* %tmp13366, i64 1
- %tmp13368 = getelementptr inbounds float* %tmp13367, i64 1
- %tmp13369 = getelementptr inbounds float* %tmp13368, i64 1
- %tmp13370 = getelementptr inbounds float* %tmp13369, i64 1
- %tmp13371 = getelementptr inbounds float* %tmp13370, i64 1
- %tmp13372 = getelementptr inbounds float* %tmp13371, i64 1
- %tmp13373 = getelementptr inbounds float* %tmp13372, i64 1
- %tmp13374 = getelementptr inbounds float* %tmp13373, i64 1
- %tmp13375 = getelementptr inbounds float* %tmp13374, i64 1
- %tmp13376 = getelementptr inbounds float* %tmp13375, i64 1
- %tmp13377 = getelementptr inbounds float* %tmp13376, i64 1
- %tmp13378 = getelementptr inbounds float* %tmp13377, i64 1
- %tmp13379 = getelementptr inbounds float* %tmp13378, i64 1
- %tmp13380 = getelementptr inbounds float* %tmp13379, i64 1
- %tmp13381 = getelementptr inbounds float* %tmp13380, i64 1
- %tmp13382 = getelementptr inbounds float* %tmp13381, i64 1
- %tmp13383 = getelementptr inbounds float* %tmp13382, i64 1
- %tmp13384 = getelementptr inbounds float* %tmp13383, i64 1
- %tmp13385 = getelementptr inbounds float* %tmp13384, i64 1
- %tmp13386 = getelementptr inbounds float* %tmp13385, i64 1
- %tmp13387 = getelementptr inbounds float* %tmp13386, i64 1
- %tmp13388 = getelementptr inbounds float* %tmp13387, i64 1
- %tmp13389 = getelementptr inbounds float* %tmp13388, i64 1
- %tmp13390 = getelementptr inbounds float* %tmp13389, i64 1
- %tmp13391 = getelementptr inbounds float* %tmp13390, i64 1
- %tmp13392 = getelementptr inbounds float* %tmp13391, i64 1
- %tmp13393 = getelementptr inbounds float* %tmp13392, i64 1
- %tmp13394 = getelementptr inbounds float* %tmp13393, i64 1
- %tmp13395 = getelementptr inbounds float* %tmp13394, i64 1
- %tmp13396 = getelementptr inbounds float* %tmp13395, i64 1
- %tmp13397 = getelementptr inbounds float* %tmp13396, i64 1
- %tmp13398 = getelementptr inbounds float* %tmp13397, i64 1
- %tmp13399 = getelementptr inbounds float* %tmp13398, i64 1
- %tmp13400 = getelementptr inbounds float* %tmp13399, i64 1
- %tmp13401 = getelementptr inbounds float* %tmp13400, i64 1
- %tmp13402 = getelementptr inbounds float* %tmp13401, i64 1
- %tmp13403 = getelementptr inbounds float* %tmp13402, i64 1
- %tmp13404 = getelementptr inbounds float* %tmp13403, i64 1
- %tmp13405 = getelementptr inbounds float* %tmp13404, i64 1
- %tmp13406 = getelementptr inbounds float* %tmp13405, i64 1
- %tmp13407 = getelementptr inbounds float* %tmp13406, i64 1
- %tmp13408 = getelementptr inbounds float* %tmp13407, i64 1
- %tmp13409 = getelementptr inbounds float* %tmp13408, i64 1
- %tmp13410 = getelementptr inbounds float* %tmp13409, i64 1
- %tmp13411 = getelementptr inbounds float* %tmp13410, i64 1
- %tmp13412 = getelementptr inbounds float* %tmp13411, i64 1
- %tmp13413 = getelementptr inbounds float* %tmp13412, i64 1
- %tmp13414 = getelementptr inbounds float* %tmp13413, i64 1
- %tmp13415 = getelementptr inbounds float* %tmp13414, i64 1
- %tmp13416 = getelementptr inbounds float* %tmp13415, i64 1
- %tmp13417 = getelementptr inbounds float* %tmp13416, i64 1
- %tmp13418 = getelementptr inbounds float* %tmp13417, i64 1
- %tmp13419 = getelementptr inbounds float* %tmp13418, i64 1
- %tmp13420 = getelementptr inbounds float* %tmp13419, i64 1
- %tmp13421 = getelementptr inbounds float* %tmp13420, i64 1
- %tmp13422 = getelementptr inbounds float* %tmp13421, i64 1
- %tmp13423 = getelementptr inbounds float* %tmp13422, i64 1
- %tmp13424 = getelementptr inbounds float* %tmp13423, i64 1
- %tmp13425 = getelementptr inbounds float* %tmp13424, i64 1
- %tmp13426 = getelementptr inbounds float* %tmp13425, i64 1
- %tmp13427 = getelementptr inbounds float* %tmp13426, i64 1
- %tmp13428 = getelementptr inbounds float* %tmp13427, i64 1
- %tmp13429 = getelementptr inbounds float* %tmp13428, i64 1
- %tmp13430 = getelementptr inbounds float* %tmp13429, i64 1
- %tmp13431 = getelementptr inbounds float* %tmp13430, i64 1
- %tmp13432 = getelementptr inbounds float* %tmp13431, i64 1
- %tmp13433 = getelementptr inbounds float* %tmp13432, i64 1
- %tmp13434 = getelementptr inbounds float* %tmp13433, i64 1
- %tmp13435 = getelementptr inbounds float* %tmp13434, i64 1
- %tmp13436 = getelementptr inbounds float* %tmp13435, i64 1
- %tmp13437 = getelementptr inbounds float* %tmp13436, i64 1
- %tmp13438 = getelementptr inbounds float* %tmp13437, i64 1
- %tmp13439 = getelementptr inbounds float* %tmp13438, i64 1
- %tmp13440 = getelementptr inbounds float* %tmp13439, i64 1
- %tmp13441 = getelementptr inbounds float* %tmp13440, i64 1
- %tmp13442 = getelementptr inbounds float* %tmp13441, i64 1
- %tmp13443 = getelementptr inbounds float* %tmp13442, i64 1
- %tmp13444 = getelementptr inbounds float* %tmp13443, i64 1
- %tmp13445 = getelementptr inbounds float* %tmp13444, i64 1
- %tmp13446 = getelementptr inbounds float* %tmp13445, i64 1
- %tmp13447 = getelementptr inbounds float* %tmp13446, i64 1
- %tmp13448 = getelementptr inbounds float* %tmp13447, i64 1
- %tmp13449 = getelementptr inbounds float* %tmp13448, i64 1
- %tmp13450 = getelementptr inbounds float* %tmp13449, i64 1
- %tmp13451 = getelementptr inbounds float* %tmp13450, i64 1
- %tmp13452 = getelementptr inbounds float* %tmp13451, i64 1
- %tmp13453 = getelementptr inbounds float* %tmp13452, i64 1
- %tmp13454 = getelementptr inbounds float* %tmp13453, i64 1
- %tmp13455 = getelementptr inbounds float* %tmp13454, i64 1
- %tmp13456 = getelementptr inbounds float* %tmp13455, i64 1
- %tmp13457 = getelementptr inbounds float* %tmp13456, i64 1
- %tmp13458 = getelementptr inbounds float* %tmp13457, i64 1
- %tmp13459 = getelementptr inbounds float* %tmp13458, i64 1
- %tmp13460 = getelementptr inbounds float* %tmp13459, i64 1
- %tmp13461 = getelementptr inbounds float* %tmp13460, i64 1
- %tmp13462 = getelementptr inbounds float* %tmp13461, i64 1
- %tmp13463 = getelementptr inbounds float* %tmp13462, i64 1
- %tmp13464 = getelementptr inbounds float* %tmp13463, i64 1
- %tmp13465 = getelementptr inbounds float* %tmp13464, i64 1
- %tmp13466 = getelementptr inbounds float* %tmp13465, i64 1
- %tmp13467 = getelementptr inbounds float* %tmp13466, i64 1
- %tmp13468 = getelementptr inbounds float* %tmp13467, i64 1
- %tmp13469 = getelementptr inbounds float* %tmp13468, i64 1
- %tmp13470 = getelementptr inbounds float* %tmp13469, i64 1
- %tmp13471 = getelementptr inbounds float* %tmp13470, i64 1
- %tmp13472 = getelementptr inbounds float* %tmp13471, i64 1
- %tmp13473 = getelementptr inbounds float* %tmp13472, i64 1
- %tmp13474 = getelementptr inbounds float* %tmp13473, i64 1
- %tmp13475 = getelementptr inbounds float* %tmp13474, i64 1
- %tmp13476 = getelementptr inbounds float* %tmp13475, i64 1
- %tmp13477 = getelementptr inbounds float* %tmp13476, i64 1
- %tmp13478 = getelementptr inbounds float* %tmp13477, i64 1
- %tmp13479 = getelementptr inbounds float* %tmp13478, i64 1
- %tmp13480 = getelementptr inbounds float* %tmp13479, i64 1
- %tmp13481 = getelementptr inbounds float* %tmp13480, i64 1
- %tmp13482 = getelementptr inbounds float* %tmp13481, i64 1
- %tmp13483 = getelementptr inbounds float* %tmp13482, i64 1
- %tmp13484 = getelementptr inbounds float* %tmp13483, i64 1
- %tmp13485 = getelementptr inbounds float* %tmp13484, i64 1
- %tmp13486 = getelementptr inbounds float* %tmp13485, i64 1
- %tmp13487 = getelementptr inbounds float* %tmp13486, i64 1
- %tmp13488 = getelementptr inbounds float* %tmp13487, i64 1
- %tmp13489 = getelementptr inbounds float* %tmp13488, i64 1
- %tmp13490 = getelementptr inbounds float* %tmp13489, i64 1
- %tmp13491 = getelementptr inbounds float* %tmp13490, i64 1
- %tmp13492 = getelementptr inbounds float* %tmp13491, i64 1
- %tmp13493 = getelementptr inbounds float* %tmp13492, i64 1
- %tmp13494 = getelementptr inbounds float* %tmp13493, i64 1
- %tmp13495 = getelementptr inbounds float* %tmp13494, i64 1
- %tmp13496 = getelementptr inbounds float* %tmp13495, i64 1
- %tmp13497 = getelementptr inbounds float* %tmp13496, i64 1
- %tmp13498 = getelementptr inbounds float* %tmp13497, i64 1
- %tmp13499 = getelementptr inbounds float* %tmp13498, i64 1
- %tmp13500 = getelementptr inbounds float* %tmp13499, i64 1
- %tmp13501 = getelementptr inbounds float* %tmp13500, i64 1
- %tmp13502 = getelementptr inbounds float* %tmp13501, i64 1
- %tmp13503 = getelementptr inbounds float* %tmp13502, i64 1
- %tmp13504 = getelementptr inbounds float* %tmp13503, i64 1
- %tmp13505 = getelementptr inbounds float* %tmp13504, i64 1
- %tmp13506 = getelementptr inbounds float* %tmp13505, i64 1
- %tmp13507 = getelementptr inbounds float* %tmp13506, i64 1
- %tmp13508 = getelementptr inbounds float* %tmp13507, i64 1
- %tmp13509 = getelementptr inbounds float* %tmp13508, i64 1
- %tmp13510 = getelementptr inbounds float* %tmp13509, i64 1
- %tmp13511 = getelementptr inbounds float* %tmp13510, i64 1
- %tmp13512 = getelementptr inbounds float* %tmp13511, i64 1
- %tmp13513 = getelementptr inbounds float* %tmp13512, i64 1
- %tmp13514 = getelementptr inbounds float* %tmp13513, i64 1
- %tmp13515 = getelementptr inbounds float* %tmp13514, i64 1
- %tmp13516 = getelementptr inbounds float* %tmp13515, i64 1
- %tmp13517 = getelementptr inbounds float* %tmp13516, i64 1
- %tmp13518 = getelementptr inbounds float* %tmp13517, i64 1
- %tmp13519 = getelementptr inbounds float* %tmp13518, i64 1
- %tmp13520 = getelementptr inbounds float* %tmp13519, i64 1
- %tmp13521 = getelementptr inbounds float* %tmp13520, i64 1
- %tmp13522 = getelementptr inbounds float* %tmp13521, i64 1
- %tmp13523 = getelementptr inbounds float* %tmp13522, i64 1
- %tmp13524 = getelementptr inbounds float* %tmp13523, i64 1
- %tmp13525 = getelementptr inbounds float* %tmp13524, i64 1
- %tmp13526 = getelementptr inbounds float* %tmp13525, i64 1
- %tmp13527 = getelementptr inbounds float* %tmp13526, i64 1
- %tmp13528 = getelementptr inbounds float* %tmp13527, i64 1
- %tmp13529 = getelementptr inbounds float* %tmp13528, i64 1
- %tmp13530 = getelementptr inbounds float* %tmp13529, i64 1
- %tmp13531 = getelementptr inbounds float* %tmp13530, i64 1
- %tmp13532 = getelementptr inbounds float* %tmp13531, i64 1
- %tmp13533 = getelementptr inbounds float* %tmp13532, i64 1
- %tmp13534 = getelementptr inbounds float* %tmp13533, i64 1
- %tmp13535 = getelementptr inbounds float* %tmp13534, i64 1
- %tmp13536 = getelementptr inbounds float* %tmp13535, i64 1
- %tmp13537 = getelementptr inbounds float* %tmp13536, i64 1
- %tmp13538 = getelementptr inbounds float* %tmp13537, i64 1
- %tmp13539 = getelementptr inbounds float* %tmp13538, i64 1
- %tmp13540 = getelementptr inbounds float* %tmp13539, i64 1
- %tmp13541 = getelementptr inbounds float* %tmp13540, i64 1
- %tmp13542 = getelementptr inbounds float* %tmp13541, i64 1
- %tmp13543 = getelementptr inbounds float* %tmp13542, i64 1
- %tmp13544 = getelementptr inbounds float* %tmp13543, i64 1
- %tmp13545 = getelementptr inbounds float* %tmp13544, i64 1
- %tmp13546 = getelementptr inbounds float* %tmp13545, i64 1
- %tmp13547 = getelementptr inbounds float* %tmp13546, i64 1
- %tmp13548 = getelementptr inbounds float* %tmp13547, i64 1
- %tmp13549 = getelementptr inbounds float* %tmp13548, i64 1
- %tmp13550 = getelementptr inbounds float* %tmp13549, i64 1
- %tmp13551 = getelementptr inbounds float* %tmp13550, i64 1
- %tmp13552 = getelementptr inbounds float* %tmp13551, i64 1
- %tmp13553 = getelementptr inbounds float* %tmp13552, i64 1
- %tmp13554 = getelementptr inbounds float* %tmp13553, i64 1
- %tmp13555 = getelementptr inbounds float* %tmp13554, i64 1
- %tmp13556 = getelementptr inbounds float* %tmp13555, i64 1
- %tmp13557 = getelementptr inbounds float* %tmp13556, i64 1
- %tmp13558 = getelementptr inbounds float* %tmp13557, i64 1
- %tmp13559 = getelementptr inbounds float* %tmp13558, i64 1
- %tmp13560 = getelementptr inbounds float* %tmp13559, i64 1
- %tmp13561 = getelementptr inbounds float* %tmp13560, i64 1
- %tmp13562 = getelementptr inbounds float* %tmp13561, i64 1
- %tmp13563 = getelementptr inbounds float* %tmp13562, i64 1
- %tmp13564 = getelementptr inbounds float* %tmp13563, i64 1
- %tmp13565 = getelementptr inbounds float* %tmp13564, i64 1
- %tmp13566 = getelementptr inbounds float* %tmp13565, i64 1
- %tmp13567 = getelementptr inbounds float* %tmp13566, i64 1
- %tmp13568 = getelementptr inbounds float* %tmp13567, i64 1
- %tmp13569 = getelementptr inbounds float* %tmp13568, i64 1
- %tmp13570 = getelementptr inbounds float* %tmp13569, i64 1
- %tmp13571 = getelementptr inbounds float* %tmp13570, i64 1
- %tmp13572 = getelementptr inbounds float* %tmp13571, i64 1
- %tmp13573 = getelementptr inbounds float* %tmp13572, i64 1
- %tmp13574 = getelementptr inbounds float* %tmp13573, i64 1
- %tmp13575 = getelementptr inbounds float* %tmp13574, i64 1
- %tmp13576 = getelementptr inbounds float* %tmp13575, i64 1
- %tmp13577 = getelementptr inbounds float* %tmp13576, i64 1
- %tmp13578 = getelementptr inbounds float* %tmp13577, i64 1
- %tmp13579 = getelementptr inbounds float* %tmp13578, i64 1
- %tmp13580 = getelementptr inbounds float* %tmp13579, i64 1
- %tmp13581 = getelementptr inbounds float* %tmp13580, i64 1
- %tmp13582 = getelementptr inbounds float* %tmp13581, i64 1
- %tmp13583 = getelementptr inbounds float* %tmp13582, i64 1
- %tmp13584 = getelementptr inbounds float* %tmp13583, i64 1
- %tmp13585 = getelementptr inbounds float* %tmp13584, i64 1
- %tmp13586 = getelementptr inbounds float* %tmp13585, i64 1
- %tmp13587 = getelementptr inbounds float* %tmp13586, i64 1
- %tmp13588 = getelementptr inbounds float* %tmp13587, i64 1
- %tmp13589 = getelementptr inbounds float* %tmp13588, i64 1
- %tmp13590 = getelementptr inbounds float* %tmp13589, i64 1
- %tmp13591 = getelementptr inbounds float* %tmp13590, i64 1
- %tmp13592 = getelementptr inbounds float* %tmp13591, i64 1
- %tmp13593 = getelementptr inbounds float* %tmp13592, i64 1
- %tmp13594 = getelementptr inbounds float* %tmp13593, i64 1
- %tmp13595 = getelementptr inbounds float* %tmp13594, i64 1
- %tmp13596 = getelementptr inbounds float* %tmp13595, i64 1
- %tmp13597 = getelementptr inbounds float* %tmp13596, i64 1
- %tmp13598 = getelementptr inbounds float* %tmp13597, i64 1
- %tmp13599 = getelementptr inbounds float* %tmp13598, i64 1
- %tmp13600 = getelementptr inbounds float* %tmp13599, i64 1
- %tmp13601 = getelementptr inbounds float* %tmp13600, i64 1
- %tmp13602 = getelementptr inbounds float* %tmp13601, i64 1
- %tmp13603 = getelementptr inbounds float* %tmp13602, i64 1
- %tmp13604 = getelementptr inbounds float* %tmp13603, i64 1
- %tmp13605 = getelementptr inbounds float* %tmp13604, i64 1
- %tmp13606 = getelementptr inbounds float* %tmp13605, i64 1
- %tmp13607 = getelementptr inbounds float* %tmp13606, i64 1
- %tmp13608 = getelementptr inbounds float* %tmp13607, i64 1
- %tmp13609 = getelementptr inbounds float* %tmp13608, i64 1
- %tmp13610 = getelementptr inbounds float* %tmp13609, i64 1
- %tmp13611 = getelementptr inbounds float* %tmp13610, i64 1
- %tmp13612 = getelementptr inbounds float* %tmp13611, i64 1
- %tmp13613 = getelementptr inbounds float* %tmp13612, i64 1
- %tmp13614 = getelementptr inbounds float* %tmp13613, i64 1
- %tmp13615 = getelementptr inbounds float* %tmp13614, i64 1
- %tmp13616 = getelementptr inbounds float* %tmp13615, i64 1
- %tmp13617 = getelementptr inbounds float* %tmp13616, i64 1
- %tmp13618 = getelementptr inbounds float* %tmp13617, i64 1
- %tmp13619 = getelementptr inbounds float* %tmp13618, i64 1
- %tmp13620 = getelementptr inbounds float* %tmp13619, i64 1
- %tmp13621 = getelementptr inbounds float* %tmp13620, i64 1
- %tmp13622 = getelementptr inbounds float* %tmp13621, i64 1
- %tmp13623 = getelementptr inbounds float* %tmp13622, i64 1
- %tmp13624 = getelementptr inbounds float* %tmp13623, i64 1
- %tmp13625 = getelementptr inbounds float* %tmp13624, i64 1
- %tmp13626 = getelementptr inbounds float* %tmp13625, i64 1
- %tmp13627 = getelementptr inbounds float* %tmp13626, i64 1
- %tmp13628 = getelementptr inbounds float* %tmp13627, i64 1
- %tmp13629 = getelementptr inbounds float* %tmp13628, i64 1
- %tmp13630 = getelementptr inbounds float* %tmp13629, i64 1
- %tmp13631 = getelementptr inbounds float* %tmp13630, i64 1
- %tmp13632 = getelementptr inbounds float* %tmp13631, i64 1
- %tmp13633 = getelementptr inbounds float* %tmp13632, i64 1
- %tmp13634 = getelementptr inbounds float* %tmp13633, i64 1
- %tmp13635 = getelementptr inbounds float* %tmp13634, i64 1
- %tmp13636 = getelementptr inbounds float* %tmp13635, i64 1
- %tmp13637 = getelementptr inbounds float* %tmp13636, i64 1
- %tmp13638 = getelementptr inbounds float* %tmp13637, i64 1
- %tmp13639 = getelementptr inbounds float* %tmp13638, i64 1
- %tmp13640 = getelementptr inbounds float* %tmp13639, i64 1
- %tmp13641 = getelementptr inbounds float* %tmp13640, i64 1
- %tmp13642 = getelementptr inbounds float* %tmp13641, i64 1
- %tmp13643 = getelementptr inbounds float* %tmp13642, i64 1
- %tmp13644 = getelementptr inbounds float* %tmp13643, i64 1
- %tmp13645 = getelementptr inbounds float* %tmp13644, i64 1
- %tmp13646 = getelementptr inbounds float* %tmp13645, i64 1
- %tmp13647 = getelementptr inbounds float* %tmp13646, i64 1
- %tmp13648 = getelementptr inbounds float* %tmp13647, i64 1
- %tmp13649 = getelementptr inbounds float* %tmp13648, i64 1
- %tmp13650 = getelementptr inbounds float* %tmp13649, i64 1
- %tmp13651 = getelementptr inbounds float* %tmp13650, i64 1
- %tmp13652 = getelementptr inbounds float* %tmp13651, i64 1
- %tmp13653 = getelementptr inbounds float* %tmp13652, i64 1
- %tmp13654 = getelementptr inbounds float* %tmp13653, i64 1
- %tmp13655 = getelementptr inbounds float* %tmp13654, i64 1
- %tmp13656 = getelementptr inbounds float* %tmp13655, i64 1
- %tmp13657 = getelementptr inbounds float* %tmp13656, i64 1
- %tmp13658 = getelementptr inbounds float* %tmp13657, i64 1
- %tmp13659 = getelementptr inbounds float* %tmp13658, i64 1
- %tmp13660 = getelementptr inbounds float* %tmp13659, i64 1
- %tmp13661 = getelementptr inbounds float* %tmp13660, i64 1
- %tmp13662 = getelementptr inbounds float* %tmp13661, i64 1
- %tmp13663 = getelementptr inbounds float* %tmp13662, i64 1
- %tmp13664 = getelementptr inbounds float* %tmp13663, i64 1
- %tmp13665 = getelementptr inbounds float* %tmp13664, i64 1
- %tmp13666 = getelementptr inbounds float* %tmp13665, i64 1
- %tmp13667 = getelementptr inbounds float* %tmp13666, i64 1
- %tmp13668 = getelementptr inbounds float* %tmp13667, i64 1
- %tmp13669 = getelementptr inbounds float* %tmp13668, i64 1
- %tmp13670 = getelementptr inbounds float* %tmp13669, i64 1
- %tmp13671 = getelementptr inbounds float* %tmp13670, i64 1
- %tmp13672 = getelementptr inbounds float* %tmp13671, i64 1
- %tmp13673 = getelementptr inbounds float* %tmp13672, i64 1
- %tmp13674 = getelementptr inbounds float* %tmp13673, i64 1
- %tmp13675 = getelementptr inbounds float* %tmp13674, i64 1
- %tmp13676 = getelementptr inbounds float* %tmp13675, i64 1
- %tmp13677 = getelementptr inbounds float* %tmp13676, i64 1
- %tmp13678 = getelementptr inbounds float* %tmp13677, i64 1
- %tmp13679 = getelementptr inbounds float* %tmp13678, i64 1
- %tmp13680 = getelementptr inbounds float* %tmp13679, i64 1
- %tmp13681 = getelementptr inbounds float* %tmp13680, i64 1
- %tmp13682 = getelementptr inbounds float* %tmp13681, i64 1
- %tmp13683 = getelementptr inbounds float* %tmp13682, i64 1
- %tmp13684 = getelementptr inbounds float* %tmp13683, i64 1
- %tmp13685 = getelementptr inbounds float* %tmp13684, i64 1
- %tmp13686 = getelementptr inbounds float* %tmp13685, i64 1
- %tmp13687 = getelementptr inbounds float* %tmp13686, i64 1
- %tmp13688 = getelementptr inbounds float* %tmp13687, i64 1
- %tmp13689 = getelementptr inbounds float* %tmp13688, i64 1
- %tmp13690 = getelementptr inbounds float* %tmp13689, i64 1
- %tmp13691 = getelementptr inbounds float* %tmp13690, i64 1
- %tmp13692 = getelementptr inbounds float* %tmp13691, i64 1
- %tmp13693 = getelementptr inbounds float* %tmp13692, i64 1
- %tmp13694 = getelementptr inbounds float* %tmp13693, i64 1
- %tmp13695 = getelementptr inbounds float* %tmp13694, i64 1
- %tmp13696 = getelementptr inbounds float* %tmp13695, i64 1
- %tmp13697 = getelementptr inbounds float* %tmp13696, i64 1
- %tmp13698 = getelementptr inbounds float* %tmp13697, i64 1
- %tmp13699 = getelementptr inbounds float* %tmp13698, i64 1
- %tmp13700 = getelementptr inbounds float* %tmp13699, i64 1
- %tmp13701 = getelementptr inbounds float* %tmp13700, i64 1
- %tmp13702 = getelementptr inbounds float* %tmp13701, i64 1
- %tmp13703 = getelementptr inbounds float* %tmp13702, i64 1
- %tmp13704 = getelementptr inbounds float* %tmp13703, i64 1
- %tmp13705 = getelementptr inbounds float* %tmp13704, i64 1
- %tmp13706 = getelementptr inbounds float* %tmp13705, i64 1
- %tmp13707 = getelementptr inbounds float* %tmp13706, i64 1
- %tmp13708 = getelementptr inbounds float* %tmp13707, i64 1
- %tmp13709 = getelementptr inbounds float* %tmp13708, i64 1
- %tmp13710 = getelementptr inbounds float* %tmp13709, i64 1
- %tmp13711 = getelementptr inbounds float* %tmp13710, i64 1
- %tmp13712 = getelementptr inbounds float* %tmp13711, i64 1
- %tmp13713 = getelementptr inbounds float* %tmp13712, i64 1
- %tmp13714 = getelementptr inbounds float* %tmp13713, i64 1
- %tmp13715 = getelementptr inbounds float* %tmp13714, i64 1
- %tmp13716 = getelementptr inbounds float* %tmp13715, i64 1
- %tmp13717 = getelementptr inbounds float* %tmp13716, i64 1
- %tmp13718 = getelementptr inbounds float* %tmp13717, i64 1
- %tmp13719 = getelementptr inbounds float* %tmp13718, i64 1
- %tmp13720 = getelementptr inbounds float* %tmp13719, i64 1
- %tmp13721 = getelementptr inbounds float* %tmp13720, i64 1
- %tmp13722 = getelementptr inbounds float* %tmp13721, i64 1
- %tmp13723 = getelementptr inbounds float* %tmp13722, i64 1
- %tmp13724 = getelementptr inbounds float* %tmp13723, i64 1
- %tmp13725 = getelementptr inbounds float* %tmp13724, i64 1
- %tmp13726 = getelementptr inbounds float* %tmp13725, i64 1
- %tmp13727 = getelementptr inbounds float* %tmp13726, i64 1
- %tmp13728 = getelementptr inbounds float* %tmp13727, i64 1
- %tmp13729 = getelementptr inbounds float* %tmp13728, i64 1
- %tmp13730 = getelementptr inbounds float* %tmp13729, i64 1
- %tmp13731 = getelementptr inbounds float* %tmp13730, i64 1
- %tmp13732 = getelementptr inbounds float* %tmp13731, i64 1
- %tmp13733 = getelementptr inbounds float* %tmp13732, i64 1
- %tmp13734 = getelementptr inbounds float* %tmp13733, i64 1
- %tmp13735 = getelementptr inbounds float* %tmp13734, i64 1
- %tmp13736 = getelementptr inbounds float* %tmp13735, i64 1
- %tmp13737 = getelementptr inbounds float* %tmp13736, i64 1
- %tmp13738 = getelementptr inbounds float* %tmp13737, i64 1
- %tmp13739 = getelementptr inbounds float* %tmp13738, i64 1
- %tmp13740 = getelementptr inbounds float* %tmp13739, i64 1
- %tmp13741 = getelementptr inbounds float* %tmp13740, i64 1
- %tmp13742 = getelementptr inbounds float* %tmp13741, i64 1
- %tmp13743 = getelementptr inbounds float* %tmp13742, i64 1
- %tmp13744 = getelementptr inbounds float* %tmp13743, i64 1
- %tmp13745 = getelementptr inbounds float* %tmp13744, i64 1
- %tmp13746 = getelementptr inbounds float* %tmp13745, i64 1
- %tmp13747 = getelementptr inbounds float* %tmp13746, i64 1
- %tmp13748 = getelementptr inbounds float* %tmp13747, i64 1
- %tmp13749 = getelementptr inbounds float* %tmp13748, i64 1
- %tmp13750 = getelementptr inbounds float* %tmp13749, i64 1
- %tmp13751 = getelementptr inbounds float* %tmp13750, i64 1
- %tmp13752 = getelementptr inbounds float* %tmp13751, i64 1
- %tmp13753 = getelementptr inbounds float* %tmp13752, i64 1
- %tmp13754 = getelementptr inbounds float* %tmp13753, i64 1
- %tmp13755 = getelementptr inbounds float* %tmp13754, i64 1
- %tmp13756 = getelementptr inbounds float* %tmp13755, i64 1
- %tmp13757 = getelementptr inbounds float* %tmp13756, i64 1
- %tmp13758 = getelementptr inbounds float* %tmp13757, i64 1
- %tmp13759 = getelementptr inbounds float* %tmp13758, i64 1
- %tmp13760 = getelementptr inbounds float* %tmp13759, i64 1
- %tmp13761 = getelementptr inbounds float* %tmp13760, i64 1
- %tmp13762 = getelementptr inbounds float* %tmp13761, i64 1
- %tmp13763 = getelementptr inbounds float* %tmp13762, i64 1
- %tmp13764 = getelementptr inbounds float* %tmp13763, i64 1
- %tmp13765 = getelementptr inbounds float* %tmp13764, i64 1
- %tmp13766 = getelementptr inbounds float* %tmp13765, i64 1
- %tmp13767 = getelementptr inbounds float* %tmp13766, i64 1
- %tmp13768 = getelementptr inbounds float* %tmp13767, i64 1
- %tmp13769 = getelementptr inbounds float* %tmp13768, i64 1
- %tmp13770 = getelementptr inbounds float* %tmp13769, i64 1
- %tmp13771 = getelementptr inbounds float* %tmp13770, i64 1
- %tmp13772 = getelementptr inbounds float* %tmp13771, i64 1
- %tmp13773 = getelementptr inbounds float* %tmp13772, i64 1
- %tmp13774 = getelementptr inbounds float* %tmp13773, i64 1
- %tmp13775 = getelementptr inbounds float* %tmp13774, i64 1
- %tmp13776 = getelementptr inbounds float* %tmp13775, i64 1
- %tmp13777 = getelementptr inbounds float* %tmp13776, i64 1
- %tmp13778 = getelementptr inbounds float* %tmp13777, i64 1
- %tmp13779 = getelementptr inbounds float* %tmp13778, i64 1
- %tmp13780 = getelementptr inbounds float* %tmp13779, i64 1
- %tmp13781 = getelementptr inbounds float* %tmp13780, i64 1
- %tmp13782 = getelementptr inbounds float* %tmp13781, i64 1
- %tmp13783 = getelementptr inbounds float* %tmp13782, i64 1
- %tmp13784 = getelementptr inbounds float* %tmp13783, i64 1
- %tmp13785 = getelementptr inbounds float* %tmp13784, i64 1
- %tmp13786 = getelementptr inbounds float* %tmp13785, i64 1
- %tmp13787 = getelementptr inbounds float* %tmp13786, i64 1
- %tmp13788 = getelementptr inbounds float* %tmp13787, i64 1
- %tmp13789 = getelementptr inbounds float* %tmp13788, i64 1
- %tmp13790 = getelementptr inbounds float* %tmp13789, i64 1
- %tmp13791 = getelementptr inbounds float* %tmp13790, i64 1
- %tmp13792 = getelementptr inbounds float* %tmp13791, i64 1
- %tmp13793 = getelementptr inbounds float* %tmp13792, i64 1
- %tmp13794 = getelementptr inbounds float* %tmp13793, i64 1
- %tmp13795 = getelementptr inbounds float* %tmp13794, i64 1
- %tmp13796 = getelementptr inbounds float* %tmp13795, i64 1
- %tmp13797 = getelementptr inbounds float* %tmp13796, i64 1
- %tmp13798 = getelementptr inbounds float* %tmp13797, i64 1
- %tmp13799 = getelementptr inbounds float* %tmp13798, i64 1
- %tmp13800 = getelementptr inbounds float* %tmp13799, i64 1
- %tmp13801 = getelementptr inbounds float* %tmp13800, i64 1
- %tmp13802 = getelementptr inbounds float* %tmp13801, i64 1
- %tmp13803 = getelementptr inbounds float* %tmp13802, i64 1
- %tmp13804 = getelementptr inbounds float* %tmp13803, i64 1
- %tmp13805 = getelementptr inbounds float* %tmp13804, i64 1
- %tmp13806 = getelementptr inbounds float* %tmp13805, i64 1
- %tmp13807 = getelementptr inbounds float* %tmp13806, i64 1
- %tmp13808 = getelementptr inbounds float* %tmp13807, i64 1
- %tmp13809 = getelementptr inbounds float* %tmp13808, i64 1
- %tmp13810 = getelementptr inbounds float* %tmp13809, i64 1
- %tmp13811 = getelementptr inbounds float* %tmp13810, i64 1
- %tmp13812 = getelementptr inbounds float* %tmp13811, i64 1
- %tmp13813 = getelementptr inbounds float* %tmp13812, i64 1
- %tmp13814 = getelementptr inbounds float* %tmp13813, i64 1
- %tmp13815 = getelementptr inbounds float* %tmp13814, i64 1
- %tmp13816 = getelementptr inbounds float* %tmp13815, i64 1
- %tmp13817 = getelementptr inbounds float* %tmp13816, i64 1
- %tmp13818 = getelementptr inbounds float* %tmp13817, i64 1
- %tmp13819 = getelementptr inbounds float* %tmp13818, i64 1
- %tmp13820 = getelementptr inbounds float* %tmp13819, i64 1
- %tmp13821 = getelementptr inbounds float* %tmp13820, i64 1
- %tmp13822 = getelementptr inbounds float* %tmp13821, i64 1
- %tmp13823 = getelementptr inbounds float* %tmp13822, i64 1
- %tmp13824 = getelementptr inbounds float* %tmp13823, i64 1
- %tmp13825 = getelementptr inbounds float* %tmp13824, i64 1
- %tmp13826 = getelementptr inbounds float* %tmp13825, i64 1
- %tmp13827 = getelementptr inbounds float* %tmp13826, i64 1
- %tmp13828 = getelementptr inbounds float* %tmp13827, i64 1
- %tmp13829 = getelementptr inbounds float* %tmp13828, i64 1
- %tmp13830 = getelementptr inbounds float* %tmp13829, i64 1
- %tmp13831 = getelementptr inbounds float* %tmp13830, i64 1
- %tmp13832 = getelementptr inbounds float* %tmp13831, i64 1
- %tmp13833 = getelementptr inbounds float* %tmp13832, i64 1
- %tmp13834 = getelementptr inbounds float* %tmp13833, i64 1
- %tmp13835 = getelementptr inbounds float* %tmp13834, i64 1
- %tmp13836 = getelementptr inbounds float* %tmp13835, i64 1
- %tmp13837 = getelementptr inbounds float* %tmp13836, i64 1
- %tmp13838 = getelementptr inbounds float* %tmp13837, i64 1
- %tmp13839 = getelementptr inbounds float* %tmp13838, i64 1
- %tmp13840 = getelementptr inbounds float* %tmp13839, i64 1
- %tmp13841 = getelementptr inbounds float* %tmp13840, i64 1
- %tmp13842 = getelementptr inbounds float* %tmp13841, i64 1
- %tmp13843 = getelementptr inbounds float* %tmp13842, i64 1
- %tmp13844 = getelementptr inbounds float* %tmp13843, i64 1
- %tmp13845 = getelementptr inbounds float* %tmp13844, i64 1
- %tmp13846 = getelementptr inbounds float* %tmp13845, i64 1
- %tmp13847 = getelementptr inbounds float* %tmp13846, i64 1
- %tmp13848 = getelementptr inbounds float* %tmp13847, i64 1
- %tmp13849 = getelementptr inbounds float* %tmp13848, i64 1
- %tmp13850 = getelementptr inbounds float* %tmp13849, i64 1
- %tmp13851 = getelementptr inbounds float* %tmp13850, i64 1
- %tmp13852 = getelementptr inbounds float* %tmp13851, i64 1
- %tmp13853 = getelementptr inbounds float* %tmp13852, i64 1
- %tmp13854 = getelementptr inbounds float* %tmp13853, i64 1
- %tmp13855 = getelementptr inbounds float* %tmp13854, i64 1
- %tmp13856 = getelementptr inbounds float* %tmp13855, i64 1
- %tmp13857 = getelementptr inbounds float* %tmp13856, i64 1
- %tmp13858 = getelementptr inbounds float* %tmp13857, i64 1
- %tmp13859 = getelementptr inbounds float* %tmp13858, i64 1
- %tmp13860 = getelementptr inbounds float* %tmp13859, i64 1
- %tmp13861 = getelementptr inbounds float* %tmp13860, i64 1
- %tmp13862 = getelementptr inbounds float* %tmp13861, i64 1
- %tmp13863 = getelementptr inbounds float* %tmp13862, i64 1
- %tmp13864 = getelementptr inbounds float* %tmp13863, i64 1
- %tmp13865 = getelementptr inbounds float* %tmp13864, i64 1
- %tmp13866 = getelementptr inbounds float* %tmp13865, i64 1
- %tmp13867 = getelementptr inbounds float* %tmp13866, i64 1
- %tmp13868 = getelementptr inbounds float* %tmp13867, i64 1
- %tmp13869 = getelementptr inbounds float* %tmp13868, i64 1
- %tmp13870 = getelementptr inbounds float* %tmp13869, i64 1
- %tmp13871 = getelementptr inbounds float* %tmp13870, i64 1
- %tmp13872 = getelementptr inbounds float* %tmp13871, i64 1
- %tmp13873 = getelementptr inbounds float* %tmp13872, i64 1
- %tmp13874 = getelementptr inbounds float* %tmp13873, i64 1
- %tmp13875 = getelementptr inbounds float* %tmp13874, i64 1
- %tmp13876 = getelementptr inbounds float* %tmp13875, i64 1
- %tmp13877 = getelementptr inbounds float* %tmp13876, i64 1
- %tmp13878 = getelementptr inbounds float* %tmp13877, i64 1
- %tmp13879 = getelementptr inbounds float* %tmp13878, i64 1
- %tmp13880 = getelementptr inbounds float* %tmp13879, i64 1
- %tmp13881 = getelementptr inbounds float* %tmp13880, i64 1
- %tmp13882 = getelementptr inbounds float* %tmp13881, i64 1
- %tmp13883 = getelementptr inbounds float* %tmp13882, i64 1
- %tmp13884 = getelementptr inbounds float* %tmp13883, i64 1
- %tmp13885 = getelementptr inbounds float* %tmp13884, i64 1
- %tmp13886 = getelementptr inbounds float* %tmp13885, i64 1
- %tmp13887 = getelementptr inbounds float* %tmp13886, i64 1
- %tmp13888 = getelementptr inbounds float* %tmp13887, i64 1
- %tmp13889 = getelementptr inbounds float* %tmp13888, i64 1
- %tmp13890 = getelementptr inbounds float* %tmp13889, i64 1
- %tmp13891 = getelementptr inbounds float* %tmp13890, i64 1
- %tmp13892 = getelementptr inbounds float* %tmp13891, i64 1
- %tmp13893 = getelementptr inbounds float* %tmp13892, i64 1
- %tmp13894 = getelementptr inbounds float* %tmp13893, i64 1
- %tmp13895 = getelementptr inbounds float* %tmp13894, i64 1
- %tmp13896 = getelementptr inbounds float* %tmp13895, i64 1
- %tmp13897 = getelementptr inbounds float* %tmp13896, i64 1
- %tmp13898 = getelementptr inbounds float* %tmp13897, i64 1
- %tmp13899 = getelementptr inbounds float* %tmp13898, i64 1
- %tmp13900 = getelementptr inbounds float* %tmp13899, i64 1
- %tmp13901 = getelementptr inbounds float* %tmp13900, i64 1
- %tmp13902 = getelementptr inbounds float* %tmp13901, i64 1
- %tmp13903 = getelementptr inbounds float* %tmp13902, i64 1
- %tmp13904 = getelementptr inbounds float* %tmp13903, i64 1
- %tmp13905 = getelementptr inbounds float* %tmp13904, i64 1
- %tmp13906 = getelementptr inbounds float* %tmp13905, i64 1
- %tmp13907 = getelementptr inbounds float* %tmp13906, i64 1
- %tmp13908 = getelementptr inbounds float* %tmp13907, i64 1
- %tmp13909 = getelementptr inbounds float* %tmp13908, i64 1
- %tmp13910 = getelementptr inbounds float* %tmp13909, i64 1
- %tmp13911 = getelementptr inbounds float* %tmp13910, i64 1
- %tmp13912 = getelementptr inbounds float* %tmp13911, i64 1
- %tmp13913 = getelementptr inbounds float* %tmp13912, i64 1
- %tmp13914 = getelementptr inbounds float* %tmp13913, i64 1
- %tmp13915 = getelementptr inbounds float* %tmp13914, i64 1
- %tmp13916 = getelementptr inbounds float* %tmp13915, i64 1
- %tmp13917 = getelementptr inbounds float* %tmp13916, i64 1
- %tmp13918 = getelementptr inbounds float* %tmp13917, i64 1
- %tmp13919 = getelementptr inbounds float* %tmp13918, i64 1
- %tmp13920 = getelementptr inbounds float* %tmp13919, i64 1
- %tmp13921 = getelementptr inbounds float* %tmp13920, i64 1
- %tmp13922 = getelementptr inbounds float* %tmp13921, i64 1
- %tmp13923 = getelementptr inbounds float* %tmp13922, i64 1
- %tmp13924 = getelementptr inbounds float* %tmp13923, i64 1
- %tmp13925 = getelementptr inbounds float* %tmp13924, i64 1
- %tmp13926 = getelementptr inbounds float* %tmp13925, i64 1
- %tmp13927 = getelementptr inbounds float* %tmp13926, i64 1
- %tmp13928 = getelementptr inbounds float* %tmp13927, i64 1
- %tmp13929 = getelementptr inbounds float* %tmp13928, i64 1
- %tmp13930 = getelementptr inbounds float* %tmp13929, i64 1
- %tmp13931 = getelementptr inbounds float* %tmp13930, i64 1
- %tmp13932 = getelementptr inbounds float* %tmp13931, i64 1
- %tmp13933 = getelementptr inbounds float* %tmp13932, i64 1
- %tmp13934 = getelementptr inbounds float* %tmp13933, i64 1
- %tmp13935 = getelementptr inbounds float* %tmp13934, i64 1
- %tmp13936 = getelementptr inbounds float* %tmp13935, i64 1
- %tmp13937 = getelementptr inbounds float* %tmp13936, i64 1
- %tmp13938 = getelementptr inbounds float* %tmp13937, i64 1
- %tmp13939 = getelementptr inbounds float* %tmp13938, i64 1
- %tmp13940 = getelementptr inbounds float* %tmp13939, i64 1
- %tmp13941 = getelementptr inbounds float* %tmp13940, i64 1
- %tmp13942 = getelementptr inbounds float* %tmp13941, i64 1
- %tmp13943 = getelementptr inbounds float* %tmp13942, i64 1
- %tmp13944 = getelementptr inbounds float* %tmp13943, i64 1
- %tmp13945 = getelementptr inbounds float* %tmp13944, i64 1
- %tmp13946 = getelementptr inbounds float* %tmp13945, i64 1
- %tmp13947 = getelementptr inbounds float* %tmp13946, i64 1
- %tmp13948 = getelementptr inbounds float* %tmp13947, i64 1
- %tmp13949 = getelementptr inbounds float* %tmp13948, i64 1
- %tmp13950 = getelementptr inbounds float* %tmp13949, i64 1
- %tmp13951 = getelementptr inbounds float* %tmp13950, i64 1
- %tmp13952 = getelementptr inbounds float* %tmp13951, i64 1
- %tmp13953 = getelementptr inbounds float* %tmp13952, i64 1
- %tmp13954 = getelementptr inbounds float* %tmp13953, i64 1
- %tmp13955 = getelementptr inbounds float* %tmp13954, i64 1
- %tmp13956 = getelementptr inbounds float* %tmp13955, i64 1
- %tmp13957 = getelementptr inbounds float* %tmp13956, i64 1
- %tmp13958 = getelementptr inbounds float* %tmp13957, i64 1
- %tmp13959 = getelementptr inbounds float* %tmp13958, i64 1
- %tmp13960 = getelementptr inbounds float* %tmp13959, i64 1
- %tmp13961 = getelementptr inbounds float* %tmp13960, i64 1
- %tmp13962 = getelementptr inbounds float* %tmp13961, i64 1
- %tmp13963 = getelementptr inbounds float* %tmp13962, i64 1
- %tmp13964 = getelementptr inbounds float* %tmp13963, i64 1
- %tmp13965 = getelementptr inbounds float* %tmp13964, i64 1
- %tmp13966 = getelementptr inbounds float* %tmp13965, i64 1
- %tmp13967 = getelementptr inbounds float* %tmp13966, i64 1
- %tmp13968 = getelementptr inbounds float* %tmp13967, i64 1
- %tmp13969 = getelementptr inbounds float* %tmp13968, i64 1
- %tmp13970 = getelementptr inbounds float* %tmp13969, i64 1
- %tmp13971 = getelementptr inbounds float* %tmp13970, i64 1
- %tmp13972 = getelementptr inbounds float* %tmp13971, i64 1
- %tmp13973 = getelementptr inbounds float* %tmp13972, i64 1
- %tmp13974 = getelementptr inbounds float* %tmp13973, i64 1
- %tmp13975 = getelementptr inbounds float* %tmp13974, i64 1
- %tmp13976 = getelementptr inbounds float* %tmp13975, i64 1
- %tmp13977 = getelementptr inbounds float* %tmp13976, i64 1
- %tmp13978 = getelementptr inbounds float* %tmp13977, i64 1
- %tmp13979 = getelementptr inbounds float* %tmp13978, i64 1
- %tmp13980 = getelementptr inbounds float* %tmp13979, i64 1
- %tmp13981 = getelementptr inbounds float* %tmp13980, i64 1
- %tmp13982 = getelementptr inbounds float* %tmp13981, i64 1
- %tmp13983 = getelementptr inbounds float* %tmp13982, i64 1
- %tmp13984 = getelementptr inbounds float* %tmp13983, i64 1
- %tmp13985 = getelementptr inbounds float* %tmp13984, i64 1
- %tmp13986 = getelementptr inbounds float* %tmp13985, i64 1
- %tmp13987 = getelementptr inbounds float* %tmp13986, i64 1
- %tmp13988 = getelementptr inbounds float* %tmp13987, i64 1
- %tmp13989 = getelementptr inbounds float* %tmp13988, i64 1
- %tmp13990 = getelementptr inbounds float* %tmp13989, i64 1
- %tmp13991 = getelementptr inbounds float* %tmp13990, i64 1
- %tmp13992 = getelementptr inbounds float* %tmp13991, i64 1
- %tmp13993 = getelementptr inbounds float* %tmp13992, i64 1
- %tmp13994 = getelementptr inbounds float* %tmp13993, i64 1
- %tmp13995 = getelementptr inbounds float* %tmp13994, i64 1
- %tmp13996 = getelementptr inbounds float* %tmp13995, i64 1
- %tmp13997 = getelementptr inbounds float* %tmp13996, i64 1
- %tmp13998 = getelementptr inbounds float* %tmp13997, i64 1
- %tmp13999 = getelementptr inbounds float* %tmp13998, i64 1
- %tmp14000 = getelementptr inbounds float* %tmp13999, i64 1
- %tmp14001 = getelementptr inbounds float* %tmp14000, i64 1
- %tmp14002 = getelementptr inbounds float* %tmp14001, i64 1
- %tmp14003 = getelementptr inbounds float* %tmp14002, i64 1
- %tmp14004 = getelementptr inbounds float* %tmp14003, i64 1
- %tmp14005 = getelementptr inbounds float* %tmp14004, i64 1
- %tmp14006 = getelementptr inbounds float* %tmp14005, i64 1
- %tmp14007 = getelementptr inbounds float* %tmp14006, i64 1
- %tmp14008 = getelementptr inbounds float* %tmp14007, i64 1
- %tmp14009 = getelementptr inbounds float* %tmp14008, i64 1
- %tmp14010 = getelementptr inbounds float* %tmp14009, i64 1
- %tmp14011 = getelementptr inbounds float* %tmp14010, i64 1
- %tmp14012 = getelementptr inbounds float* %tmp14011, i64 1
- %tmp14013 = getelementptr inbounds float* %tmp14012, i64 1
- %tmp14014 = getelementptr inbounds float* %tmp14013, i64 1
- %tmp14015 = getelementptr inbounds float* %tmp14014, i64 1
- %tmp14016 = getelementptr inbounds float* %tmp14015, i64 1
- %tmp14017 = getelementptr inbounds float* %tmp14016, i64 1
- %tmp14018 = getelementptr inbounds float* %tmp14017, i64 1
- %tmp14019 = getelementptr inbounds float* %tmp14018, i64 1
- %tmp14020 = getelementptr inbounds float* %tmp14019, i64 1
- %tmp14021 = getelementptr inbounds float* %tmp14020, i64 1
- %tmp14022 = getelementptr inbounds float* %tmp14021, i64 1
- %tmp14023 = getelementptr inbounds float* %tmp14022, i64 1
- %tmp14024 = getelementptr inbounds float* %tmp14023, i64 1
- %tmp14025 = getelementptr inbounds float* %tmp14024, i64 1
- %tmp14026 = getelementptr inbounds float* %tmp14025, i64 1
- %tmp14027 = getelementptr inbounds float* %tmp14026, i64 1
- %tmp14028 = getelementptr inbounds float* %tmp14027, i64 1
- %tmp14029 = getelementptr inbounds float* %tmp14028, i64 1
- %tmp14030 = getelementptr inbounds float* %tmp14029, i64 1
- %tmp14031 = getelementptr inbounds float* %tmp14030, i64 1
- %tmp14032 = getelementptr inbounds float* %tmp14031, i64 1
- %tmp14033 = getelementptr inbounds float* %tmp14032, i64 1
- %tmp14034 = getelementptr inbounds float* %tmp14033, i64 1
- %tmp14035 = getelementptr inbounds float* %tmp14034, i64 1
- %tmp14036 = getelementptr inbounds float* %tmp14035, i64 1
- %tmp14037 = getelementptr inbounds float* %tmp14036, i64 1
- %tmp14038 = getelementptr inbounds float* %tmp14037, i64 1
- %tmp14039 = getelementptr inbounds float* %tmp14038, i64 1
- %tmp14040 = getelementptr inbounds float* %tmp14039, i64 1
- %tmp14041 = getelementptr inbounds float* %tmp14040, i64 1
- %tmp14042 = getelementptr inbounds float* %tmp14041, i64 1
- %tmp14043 = getelementptr inbounds float* %tmp14042, i64 1
- %tmp14044 = getelementptr inbounds float* %tmp14043, i64 1
- %tmp14045 = getelementptr inbounds float* %tmp14044, i64 1
- %tmp14046 = getelementptr inbounds float* %tmp14045, i64 1
- %tmp14047 = getelementptr inbounds float* %tmp14046, i64 1
- %tmp14048 = getelementptr inbounds float* %tmp14047, i64 1
- %tmp14049 = getelementptr inbounds float* %tmp14048, i64 1
- %tmp14050 = getelementptr inbounds float* %tmp14049, i64 1
- %tmp14051 = getelementptr inbounds float* %tmp14050, i64 1
- %tmp14052 = getelementptr inbounds float* %tmp14051, i64 1
- %tmp14053 = getelementptr inbounds float* %tmp14052, i64 1
- %tmp14054 = getelementptr inbounds float* %tmp14053, i64 1
- %tmp14055 = getelementptr inbounds float* %tmp14054, i64 1
- %tmp14056 = getelementptr inbounds float* %tmp14055, i64 1
- %tmp14057 = getelementptr inbounds float* %tmp14056, i64 1
- %tmp14058 = getelementptr inbounds float* %tmp14057, i64 1
- %tmp14059 = getelementptr inbounds float* %tmp14058, i64 1
- %tmp14060 = getelementptr inbounds float* %tmp14059, i64 1
- %tmp14061 = getelementptr inbounds float* %tmp14060, i64 1
- %tmp14062 = getelementptr inbounds float* %tmp14061, i64 1
- %tmp14063 = getelementptr inbounds float* %tmp14062, i64 1
- %tmp14064 = getelementptr inbounds float* %tmp14063, i64 1
- %tmp14065 = getelementptr inbounds float* %tmp14064, i64 1
- %tmp14066 = getelementptr inbounds float* %tmp14065, i64 1
- %tmp14067 = getelementptr inbounds float* %tmp14066, i64 1
- %tmp14068 = getelementptr inbounds float* %tmp14067, i64 1
- %tmp14069 = getelementptr inbounds float* %tmp14068, i64 1
- %tmp14070 = getelementptr inbounds float* %tmp14069, i64 1
- %tmp14071 = getelementptr inbounds float* %tmp14070, i64 1
- %tmp14072 = getelementptr inbounds float* %tmp14071, i64 1
- %tmp14073 = getelementptr inbounds float* %tmp14072, i64 1
- %tmp14074 = getelementptr inbounds float* %tmp14073, i64 1
- %tmp14075 = getelementptr inbounds float* %tmp14074, i64 1
- %tmp14076 = getelementptr inbounds float* %tmp14075, i64 1
- %tmp14077 = getelementptr inbounds float* %tmp14076, i64 1
- %tmp14078 = getelementptr inbounds float* %tmp14077, i64 1
- %tmp14079 = getelementptr inbounds float* %tmp14078, i64 1
- %tmp14080 = getelementptr inbounds float* %tmp14079, i64 1
- %tmp14081 = getelementptr inbounds float* %tmp14080, i64 1
- %tmp14082 = getelementptr inbounds float* %tmp14081, i64 1
- %tmp14083 = getelementptr inbounds float* %tmp14082, i64 1
- %tmp14084 = getelementptr inbounds float* %tmp14083, i64 1
- %tmp14085 = getelementptr inbounds float* %tmp14084, i64 1
- %tmp14086 = getelementptr inbounds float* %tmp14085, i64 1
- %tmp14087 = getelementptr inbounds float* %tmp14086, i64 1
- %tmp14088 = getelementptr inbounds float* %tmp14087, i64 1
- %tmp14089 = getelementptr inbounds float* %tmp14088, i64 1
- %tmp14090 = getelementptr inbounds float* %tmp14089, i64 1
- %tmp14091 = getelementptr inbounds float* %tmp14090, i64 1
- %tmp14092 = getelementptr inbounds float* %tmp14091, i64 1
- %tmp14093 = getelementptr inbounds float* %tmp14092, i64 1
- %tmp14094 = getelementptr inbounds float* %tmp14093, i64 1
- %tmp14095 = getelementptr inbounds float* %tmp14094, i64 1
- %tmp14096 = getelementptr inbounds float* %tmp14095, i64 1
- %tmp14097 = getelementptr inbounds float* %tmp14096, i64 1
- %tmp14098 = getelementptr inbounds float* %tmp14097, i64 1
- %tmp14099 = getelementptr inbounds float* %tmp14098, i64 1
- %tmp14100 = getelementptr inbounds float* %tmp14099, i64 1
- %tmp14101 = getelementptr inbounds float* %tmp14100, i64 1
- %tmp14102 = getelementptr inbounds float* %tmp14101, i64 1
- %tmp14103 = getelementptr inbounds float* %tmp14102, i64 1
- %tmp14104 = getelementptr inbounds float* %tmp14103, i64 1
- %tmp14105 = getelementptr inbounds float* %tmp14104, i64 1
- %tmp14106 = getelementptr inbounds float* %tmp14105, i64 1
- %tmp14107 = getelementptr inbounds float* %tmp14106, i64 1
- %tmp14108 = getelementptr inbounds float* %tmp14107, i64 1
- %tmp14109 = getelementptr inbounds float* %tmp14108, i64 1
- %tmp14110 = getelementptr inbounds float* %tmp14109, i64 1
- %tmp14111 = getelementptr inbounds float* %tmp14110, i64 1
- %tmp14112 = getelementptr inbounds float* %tmp14111, i64 1
- %tmp14113 = getelementptr inbounds float* %tmp14112, i64 1
- %tmp14114 = getelementptr inbounds float* %tmp14113, i64 1
- %tmp14115 = getelementptr inbounds float* %tmp14114, i64 1
- %tmp14116 = getelementptr inbounds float* %tmp14115, i64 1
- %tmp14117 = getelementptr inbounds float* %tmp14116, i64 1
- %tmp14118 = getelementptr inbounds float* %tmp14117, i64 1
- %tmp14119 = getelementptr inbounds float* %tmp14118, i64 1
- %tmp14120 = getelementptr inbounds float* %tmp14119, i64 1
- %tmp14121 = getelementptr inbounds float* %tmp14120, i64 1
- %tmp14122 = getelementptr inbounds float* %tmp14121, i64 1
- %tmp14123 = getelementptr inbounds float* %tmp14122, i64 1
- %tmp14124 = getelementptr inbounds float* %tmp14123, i64 1
- %tmp14125 = getelementptr inbounds float* %tmp14124, i64 1
- %tmp14126 = getelementptr inbounds float* %tmp14125, i64 1
- %tmp14127 = getelementptr inbounds float* %tmp14126, i64 1
- %tmp14128 = getelementptr inbounds float* %tmp14127, i64 1
- %tmp14129 = getelementptr inbounds float* %tmp14128, i64 1
- %tmp14130 = getelementptr inbounds float* %tmp14129, i64 1
- %tmp14131 = getelementptr inbounds float* %tmp14130, i64 1
- %tmp14132 = getelementptr inbounds float* %tmp14131, i64 1
- %tmp14133 = getelementptr inbounds float* %tmp14132, i64 1
- %tmp14134 = getelementptr inbounds float* %tmp14133, i64 1
- %tmp14135 = getelementptr inbounds float* %tmp14134, i64 1
- %tmp14136 = getelementptr inbounds float* %tmp14135, i64 1
- %tmp14137 = getelementptr inbounds float* %tmp14136, i64 1
- %tmp14138 = getelementptr inbounds float* %tmp14137, i64 1
- %tmp14139 = getelementptr inbounds float* %tmp14138, i64 1
- %tmp14140 = getelementptr inbounds float* %tmp14139, i64 1
- %tmp14141 = getelementptr inbounds float* %tmp14140, i64 1
- %tmp14142 = getelementptr inbounds float* %tmp14141, i64 1
- %tmp14143 = getelementptr inbounds float* %tmp14142, i64 1
- %tmp14144 = getelementptr inbounds float* %tmp14143, i64 1
- %tmp14145 = getelementptr inbounds float* %tmp14144, i64 1
- %tmp14146 = getelementptr inbounds float* %tmp14145, i64 1
- %tmp14147 = getelementptr inbounds float* %tmp14146, i64 1
- %tmp14148 = getelementptr inbounds float* %tmp14147, i64 1
- %tmp14149 = getelementptr inbounds float* %tmp14148, i64 1
- %tmp14150 = getelementptr inbounds float* %tmp14149, i64 1
- %tmp14151 = getelementptr inbounds float* %tmp14150, i64 1
- %tmp14152 = getelementptr inbounds float* %tmp14151, i64 1
- %tmp14153 = getelementptr inbounds float* %tmp14152, i64 1
- %tmp14154 = getelementptr inbounds float* %tmp14153, i64 1
- %tmp14155 = getelementptr inbounds float* %tmp14154, i64 1
- %tmp14156 = getelementptr inbounds float* %tmp14155, i64 1
- %tmp14157 = getelementptr inbounds float* %tmp14156, i64 1
- %tmp14158 = getelementptr inbounds float* %tmp14157, i64 1
- %tmp14159 = getelementptr inbounds float* %tmp14158, i64 1
- %tmp14160 = getelementptr inbounds float* %tmp14159, i64 1
- %tmp14161 = getelementptr inbounds float* %tmp14160, i64 1
- %tmp14162 = getelementptr inbounds float* %tmp14161, i64 1
- %tmp14163 = getelementptr inbounds float* %tmp14162, i64 1
- %tmp14164 = getelementptr inbounds float* %tmp14163, i64 1
- %tmp14165 = getelementptr inbounds float* %tmp14164, i64 1
- %tmp14166 = getelementptr inbounds float* %tmp14165, i64 1
- %tmp14167 = getelementptr inbounds float* %tmp14166, i64 1
- %tmp14168 = getelementptr inbounds float* %tmp14167, i64 1
- %tmp14169 = getelementptr inbounds float* %tmp14168, i64 1
- %tmp14170 = getelementptr inbounds float* %tmp14169, i64 1
- %tmp14171 = getelementptr inbounds float* %tmp14170, i64 1
- %tmp14172 = getelementptr inbounds float* %tmp14171, i64 1
- %tmp14173 = getelementptr inbounds float* %tmp14172, i64 1
- %tmp14174 = getelementptr inbounds float* %tmp14173, i64 1
- %tmp14175 = getelementptr inbounds float* %tmp14174, i64 1
- %tmp14176 = getelementptr inbounds float* %tmp14175, i64 1
- %tmp14177 = getelementptr inbounds float* %tmp14176, i64 1
- %tmp14178 = getelementptr inbounds float* %tmp14177, i64 1
- %tmp14179 = getelementptr inbounds float* %tmp14178, i64 1
- %tmp14180 = getelementptr inbounds float* %tmp14179, i64 1
- %tmp14181 = getelementptr inbounds float* %tmp14180, i64 1
- %tmp14182 = getelementptr inbounds float* %tmp14181, i64 1
- %tmp14183 = getelementptr inbounds float* %tmp14182, i64 1
- %tmp14184 = getelementptr inbounds float* %tmp14183, i64 1
- %tmp14185 = getelementptr inbounds float* %tmp14184, i64 1
- %tmp14186 = getelementptr inbounds float* %tmp14185, i64 1
- %tmp14187 = getelementptr inbounds float* %tmp14186, i64 1
- %tmp14188 = getelementptr inbounds float* %tmp14187, i64 1
- %tmp14189 = getelementptr inbounds float* %tmp14188, i64 1
- %tmp14190 = getelementptr inbounds float* %tmp14189, i64 1
- %tmp14191 = getelementptr inbounds float* %tmp14190, i64 1
- %tmp14192 = getelementptr inbounds float* %tmp14191, i64 1
- %tmp14193 = getelementptr inbounds float* %tmp14192, i64 1
- %tmp14194 = getelementptr inbounds float* %tmp14193, i64 1
- %tmp14195 = getelementptr inbounds float* %tmp14194, i64 1
- %tmp14196 = getelementptr inbounds float* %tmp14195, i64 1
- %tmp14197 = getelementptr inbounds float* %tmp14196, i64 1
- %tmp14198 = getelementptr inbounds float* %tmp14197, i64 1
- %tmp14199 = getelementptr inbounds float* %tmp14198, i64 1
- %tmp14200 = getelementptr inbounds float* %tmp14199, i64 1
- %tmp14201 = getelementptr inbounds float* %tmp14200, i64 1
- %tmp14202 = getelementptr inbounds float* %tmp14201, i64 1
- %tmp14203 = getelementptr inbounds float* %tmp14202, i64 1
- %tmp14204 = getelementptr inbounds float* %tmp14203, i64 1
- %tmp14205 = getelementptr inbounds float* %tmp14204, i64 1
- %tmp14206 = getelementptr inbounds float* %tmp14205, i64 1
- %tmp14207 = getelementptr inbounds float* %tmp14206, i64 1
- %tmp14208 = getelementptr inbounds float* %tmp14207, i64 1
- %tmp14209 = getelementptr inbounds float* %tmp14208, i64 1
- %tmp14210 = getelementptr inbounds float* %tmp14209, i64 1
- %tmp14211 = getelementptr inbounds float* %tmp14210, i64 1
- %tmp14212 = getelementptr inbounds float* %tmp14211, i64 1
- %tmp14213 = getelementptr inbounds float* %tmp14212, i64 1
- %tmp14214 = getelementptr inbounds float* %tmp14213, i64 1
- %tmp14215 = getelementptr inbounds float* %tmp14214, i64 1
- %tmp14216 = getelementptr inbounds float* %tmp14215, i64 1
- %tmp14217 = getelementptr inbounds float* %tmp14216, i64 1
- %tmp14218 = getelementptr inbounds float* %tmp14217, i64 1
- %tmp14219 = getelementptr inbounds float* %tmp14218, i64 1
- %tmp14220 = getelementptr inbounds float* %tmp14219, i64 1
- %tmp14221 = getelementptr inbounds float* %tmp14220, i64 1
- %tmp14222 = getelementptr inbounds float* %tmp14221, i64 1
- %tmp14223 = getelementptr inbounds float* %tmp14222, i64 1
- %tmp14224 = getelementptr inbounds float* %tmp14223, i64 1
- %tmp14225 = getelementptr inbounds float* %tmp14224, i64 1
- %tmp14226 = getelementptr inbounds float* %tmp14225, i64 1
- %tmp14227 = getelementptr inbounds float* %tmp14226, i64 1
- %tmp14228 = getelementptr inbounds float* %tmp14227, i64 1
- %tmp14229 = getelementptr inbounds float* %tmp14228, i64 1
- %tmp14230 = getelementptr inbounds float* %tmp14229, i64 1
- %tmp14231 = getelementptr inbounds float* %tmp14230, i64 1
- %tmp14232 = getelementptr inbounds float* %tmp14231, i64 1
- %tmp14233 = getelementptr inbounds float* %tmp14232, i64 1
- %tmp14234 = getelementptr inbounds float* %tmp14233, i64 1
- %tmp14235 = getelementptr inbounds float* %tmp14234, i64 1
- %tmp14236 = getelementptr inbounds float* %tmp14235, i64 1
- %tmp14237 = getelementptr inbounds float* %tmp14236, i64 1
- %tmp14238 = getelementptr inbounds float* %tmp14237, i64 1
- %tmp14239 = getelementptr inbounds float* %tmp14238, i64 1
- %tmp14240 = getelementptr inbounds float* %tmp14239, i64 1
- %tmp14241 = getelementptr inbounds float* %tmp14240, i64 1
- %tmp14242 = getelementptr inbounds float* %tmp14241, i64 1
- %tmp14243 = getelementptr inbounds float* %tmp14242, i64 1
- %tmp14244 = getelementptr inbounds float* %tmp14243, i64 1
- %tmp14245 = getelementptr inbounds float* %tmp14244, i64 1
- %tmp14246 = getelementptr inbounds float* %tmp14245, i64 1
- %tmp14247 = getelementptr inbounds float* %tmp14246, i64 1
- %tmp14248 = getelementptr inbounds float* %tmp14247, i64 1
- %tmp14249 = getelementptr inbounds float* %tmp14248, i64 1
- %tmp14250 = getelementptr inbounds float* %tmp14249, i64 1
- %tmp14251 = getelementptr inbounds float* %tmp14250, i64 1
- %tmp14252 = getelementptr inbounds float* %tmp14251, i64 1
- %tmp14253 = getelementptr inbounds float* %tmp14252, i64 1
- %tmp14254 = getelementptr inbounds float* %tmp14253, i64 1
- %tmp14255 = getelementptr inbounds float* %tmp14254, i64 1
- %tmp14256 = getelementptr inbounds float* %tmp14255, i64 1
- %tmp14257 = getelementptr inbounds float* %tmp14256, i64 1
- %tmp14258 = getelementptr inbounds float* %tmp14257, i64 1
- %tmp14259 = getelementptr inbounds float* %tmp14258, i64 1
- %tmp14260 = getelementptr inbounds float* %tmp14259, i64 1
- %tmp14261 = getelementptr inbounds float* %tmp14260, i64 1
- %tmp14262 = getelementptr inbounds float* %tmp14261, i64 1
- %tmp14263 = getelementptr inbounds float* %tmp14262, i64 1
- %tmp14264 = getelementptr inbounds float* %tmp14263, i64 1
- %tmp14265 = getelementptr inbounds float* %tmp14264, i64 1
- %tmp14266 = getelementptr inbounds float* %tmp14265, i64 1
- %tmp14267 = getelementptr inbounds float* %tmp14266, i64 1
- %tmp14268 = getelementptr inbounds float* %tmp14267, i64 1
- %tmp14269 = getelementptr inbounds float* %tmp14268, i64 1
- %tmp14270 = getelementptr inbounds float* %tmp14269, i64 1
- %tmp14271 = getelementptr inbounds float* %tmp14270, i64 1
- %tmp14272 = getelementptr inbounds float* %tmp14271, i64 1
- %tmp14273 = getelementptr inbounds float* %tmp14272, i64 1
- %tmp14274 = getelementptr inbounds float* %tmp14273, i64 1
- %tmp14275 = getelementptr inbounds float* %tmp14274, i64 1
- %tmp14276 = getelementptr inbounds float* %tmp14275, i64 1
- %tmp14277 = getelementptr inbounds float* %tmp14276, i64 1
- %tmp14278 = getelementptr inbounds float* %tmp14277, i64 1
- %tmp14279 = getelementptr inbounds float* %tmp14278, i64 1
- %tmp14280 = getelementptr inbounds float* %tmp14279, i64 1
- %tmp14281 = getelementptr inbounds float* %tmp14280, i64 1
- %tmp14282 = getelementptr inbounds float* %tmp14281, i64 1
- %tmp14283 = getelementptr inbounds float* %tmp14282, i64 1
- %tmp14284 = getelementptr inbounds float* %tmp14283, i64 1
- %tmp14285 = getelementptr inbounds float* %tmp14284, i64 1
- %tmp14286 = getelementptr inbounds float* %tmp14285, i64 1
- %tmp14287 = getelementptr inbounds float* %tmp14286, i64 1
- %tmp14288 = getelementptr inbounds float* %tmp14287, i64 1
- %tmp14289 = getelementptr inbounds float* %tmp14288, i64 1
- %tmp14290 = getelementptr inbounds float* %tmp14289, i64 1
- %tmp14291 = getelementptr inbounds float* %tmp14290, i64 1
- %tmp14292 = getelementptr inbounds float* %tmp14291, i64 1
- %tmp14293 = getelementptr inbounds float* %tmp14292, i64 1
- %tmp14294 = getelementptr inbounds float* %tmp14293, i64 1
- %tmp14295 = getelementptr inbounds float* %tmp14294, i64 1
- %tmp14296 = getelementptr inbounds float* %tmp14295, i64 1
- %tmp14297 = getelementptr inbounds float* %tmp14296, i64 1
- %tmp14298 = getelementptr inbounds float* %tmp14297, i64 1
- %tmp14299 = getelementptr inbounds float* %tmp14298, i64 1
- %tmp14300 = getelementptr inbounds float* %tmp14299, i64 1
- %tmp14301 = getelementptr inbounds float* %tmp14300, i64 1
- %tmp14302 = getelementptr inbounds float* %tmp14301, i64 1
- %tmp14303 = getelementptr inbounds float* %tmp14302, i64 1
- %tmp14304 = getelementptr inbounds float* %tmp14303, i64 1
- %tmp14305 = getelementptr inbounds float* %tmp14304, i64 1
- %tmp14306 = getelementptr inbounds float* %tmp14305, i64 1
- %tmp14307 = getelementptr inbounds float* %tmp14306, i64 1
- %tmp14308 = getelementptr inbounds float* %tmp14307, i64 1
- %tmp14309 = getelementptr inbounds float* %tmp14308, i64 1
- %tmp14310 = getelementptr inbounds float* %tmp14309, i64 1
- %tmp14311 = getelementptr inbounds float* %tmp14310, i64 1
- %tmp14312 = getelementptr inbounds float* %tmp14311, i64 1
- %tmp14313 = getelementptr inbounds float* %tmp14312, i64 1
- %tmp14314 = getelementptr inbounds float* %tmp14313, i64 1
- %tmp14315 = getelementptr inbounds float* %tmp14314, i64 1
- %tmp14316 = getelementptr inbounds float* %tmp14315, i64 1
- %tmp14317 = getelementptr inbounds float* %tmp14316, i64 1
- %tmp14318 = getelementptr inbounds float* %tmp14317, i64 1
- %tmp14319 = getelementptr inbounds float* %tmp14318, i64 1
- %tmp14320 = getelementptr inbounds float* %tmp14319, i64 1
- %tmp14321 = getelementptr inbounds float* %tmp14320, i64 1
- %tmp14322 = getelementptr inbounds float* %tmp14321, i64 1
- %tmp14323 = getelementptr inbounds float* %tmp14322, i64 1
- %tmp14324 = getelementptr inbounds float* %tmp14323, i64 1
- %tmp14325 = getelementptr inbounds float* %tmp14324, i64 1
- %tmp14326 = getelementptr inbounds float* %tmp14325, i64 1
- %tmp14327 = getelementptr inbounds float* %tmp14326, i64 1
- %tmp14328 = getelementptr inbounds float* %tmp14327, i64 1
- %tmp14329 = getelementptr inbounds float* %tmp14328, i64 1
- %tmp14330 = getelementptr inbounds float* %tmp14329, i64 1
- %tmp14331 = getelementptr inbounds float* %tmp14330, i64 1
- %tmp14332 = getelementptr inbounds float* %tmp14331, i64 1
- %tmp14333 = getelementptr inbounds float* %tmp14332, i64 1
- %tmp14334 = getelementptr inbounds float* %tmp14333, i64 1
- %tmp14335 = getelementptr inbounds float* %tmp14334, i64 1
- %tmp14336 = getelementptr inbounds float* %tmp14335, i64 1
- %tmp14337 = getelementptr inbounds float* %tmp14336, i64 1
- %tmp14338 = getelementptr inbounds float* %tmp14337, i64 1
- %tmp14339 = getelementptr inbounds float* %tmp14338, i64 1
- %tmp14340 = getelementptr inbounds float* %tmp14339, i64 1
- %tmp14341 = getelementptr inbounds float* %tmp14340, i64 1
- %tmp14342 = getelementptr inbounds float* %tmp14341, i64 1
- %tmp14343 = getelementptr inbounds float* %tmp14342, i64 1
- %tmp14344 = getelementptr inbounds float* %tmp14343, i64 1
- %tmp14345 = getelementptr inbounds float* %tmp14344, i64 1
- %tmp14346 = getelementptr inbounds float* %tmp14345, i64 1
- %tmp14347 = getelementptr inbounds float* %tmp14346, i64 1
- %tmp14348 = getelementptr inbounds float* %tmp14347, i64 1
- %tmp14349 = getelementptr inbounds float* %tmp14348, i64 1
- %tmp14350 = getelementptr inbounds float* %tmp14349, i64 1
- %tmp14351 = getelementptr inbounds float* %tmp14350, i64 1
- %tmp14352 = getelementptr inbounds float* %tmp14351, i64 1
- %tmp14353 = getelementptr inbounds float* %tmp14352, i64 1
- %tmp14354 = getelementptr inbounds float* %tmp14353, i64 1
- %tmp14355 = getelementptr inbounds float* %tmp14354, i64 1
- %tmp14356 = getelementptr inbounds float* %tmp14355, i64 1
- %tmp14357 = getelementptr inbounds float* %tmp14356, i64 1
- %tmp14358 = getelementptr inbounds float* %tmp14357, i64 1
- %tmp14359 = getelementptr inbounds float* %tmp14358, i64 1
- %tmp14360 = getelementptr inbounds float* %tmp14359, i64 1
- %tmp14361 = getelementptr inbounds float* %tmp14360, i64 1
- %tmp14362 = getelementptr inbounds float* %tmp14361, i64 1
- %tmp14363 = getelementptr inbounds float* %tmp14362, i64 1
- %tmp14364 = getelementptr inbounds float* %tmp14363, i64 1
- %tmp14365 = getelementptr inbounds float* %tmp14364, i64 1
- %tmp14366 = getelementptr inbounds float* %tmp14365, i64 1
- %tmp14367 = getelementptr inbounds float* %tmp14366, i64 1
- %tmp14368 = getelementptr inbounds float* %tmp14367, i64 1
- %tmp14369 = getelementptr inbounds float* %tmp14368, i64 1
- %tmp14370 = getelementptr inbounds float* %tmp14369, i64 1
- %tmp14371 = getelementptr inbounds float* %tmp14370, i64 1
- %tmp14372 = getelementptr inbounds float* %tmp14371, i64 1
- %tmp14373 = getelementptr inbounds float* %tmp14372, i64 1
- %tmp14374 = getelementptr inbounds float* %tmp14373, i64 1
- %tmp14375 = getelementptr inbounds float* %tmp14374, i64 1
- %tmp14376 = getelementptr inbounds float* %tmp14375, i64 1
- %tmp14377 = getelementptr inbounds float* %tmp14376, i64 1
- %tmp14378 = getelementptr inbounds float* %tmp14377, i64 1
- %tmp14379 = getelementptr inbounds float* %tmp14378, i64 1
- %tmp14380 = getelementptr inbounds float* %tmp14379, i64 1
- %tmp14381 = getelementptr inbounds float* %tmp14380, i64 1
- %tmp14382 = getelementptr inbounds float* %tmp14381, i64 1
- %tmp14383 = getelementptr inbounds float* %tmp14382, i64 1
- %tmp14384 = getelementptr inbounds float* %tmp14383, i64 1
- %tmp14385 = getelementptr inbounds float* %tmp14384, i64 1
- %tmp14386 = getelementptr inbounds float* %tmp14385, i64 1
- %tmp14387 = getelementptr inbounds float* %tmp14386, i64 1
- %tmp14388 = getelementptr inbounds float* %tmp14387, i64 1
- %tmp14389 = getelementptr inbounds float* %tmp14388, i64 1
- %tmp14390 = getelementptr inbounds float* %tmp14389, i64 1
- %tmp14391 = getelementptr inbounds float* %tmp14390, i64 1
- %tmp14392 = getelementptr inbounds float* %tmp14391, i64 1
- %tmp14393 = getelementptr inbounds float* %tmp14392, i64 1
- %tmp14394 = getelementptr inbounds float* %tmp14393, i64 1
- %tmp14395 = getelementptr inbounds float* %tmp14394, i64 1
- %tmp14396 = getelementptr inbounds float* %tmp14395, i64 1
- %tmp14397 = getelementptr inbounds float* %tmp14396, i64 1
- %tmp14398 = getelementptr inbounds float* %tmp14397, i64 1
- %tmp14399 = getelementptr inbounds float* %tmp14398, i64 1
- %tmp14400 = getelementptr inbounds float* %tmp14399, i64 1
- %tmp14401 = getelementptr inbounds float* %tmp14400, i64 1
- %tmp14402 = getelementptr inbounds float* %tmp14401, i64 1
- %tmp14403 = getelementptr inbounds float* %tmp14402, i64 1
- %tmp14404 = getelementptr inbounds float* %tmp14403, i64 1
- %tmp14405 = getelementptr inbounds float* %tmp14404, i64 1
- %tmp14406 = getelementptr inbounds float* %tmp14405, i64 1
- %tmp14407 = getelementptr inbounds float* %tmp14406, i64 1
- %tmp14408 = getelementptr inbounds float* %tmp14407, i64 1
- %tmp14409 = getelementptr inbounds float* %tmp14408, i64 1
- %tmp14410 = getelementptr inbounds float* %tmp14409, i64 1
- %tmp14411 = getelementptr inbounds float* %tmp14410, i64 1
- %tmp14412 = getelementptr inbounds float* %tmp14411, i64 1
- %tmp14413 = getelementptr inbounds float* %tmp14412, i64 1
- %tmp14414 = getelementptr inbounds float* %tmp14413, i64 1
- %tmp14415 = getelementptr inbounds float* %tmp14414, i64 1
- %tmp14416 = getelementptr inbounds float* %tmp14415, i64 1
- %tmp14417 = getelementptr inbounds float* %tmp14416, i64 1
- %tmp14418 = getelementptr inbounds float* %tmp14417, i64 1
- %tmp14419 = getelementptr inbounds float* %tmp14418, i64 1
- %tmp14420 = getelementptr inbounds float* %tmp14419, i64 1
- %tmp14421 = getelementptr inbounds float* %tmp14420, i64 1
- %tmp14422 = getelementptr inbounds float* %tmp14421, i64 1
- %tmp14423 = getelementptr inbounds float* %tmp14422, i64 1
- %tmp14424 = getelementptr inbounds float* %tmp14423, i64 1
- %tmp14425 = getelementptr inbounds float* %tmp14424, i64 1
- %tmp14426 = getelementptr inbounds float* %tmp14425, i64 1
- %tmp14427 = getelementptr inbounds float* %tmp14426, i64 1
- %tmp14428 = getelementptr inbounds float* %tmp14427, i64 1
- %tmp14429 = getelementptr inbounds float* %tmp14428, i64 1
- %tmp14430 = getelementptr inbounds float* %tmp14429, i64 1
- %tmp14431 = getelementptr inbounds float* %tmp14430, i64 1
- %tmp14432 = getelementptr inbounds float* %tmp14431, i64 1
- %tmp14433 = getelementptr inbounds float* %tmp14432, i64 1
- %tmp14434 = getelementptr inbounds float* %tmp14433, i64 1
- %tmp14435 = getelementptr inbounds float* %tmp14434, i64 1
- %tmp14436 = getelementptr inbounds float* %tmp14435, i64 1
- %tmp14437 = getelementptr inbounds float* %tmp14436, i64 1
- %tmp14438 = getelementptr inbounds float* %tmp14437, i64 1
- %tmp14439 = getelementptr inbounds float* %tmp14438, i64 1
- %tmp14440 = getelementptr inbounds float* %tmp14439, i64 1
- %tmp14441 = getelementptr inbounds float* %tmp14440, i64 1
- %tmp14442 = getelementptr inbounds float* %tmp14441, i64 1
- %tmp14443 = getelementptr inbounds float* %tmp14442, i64 1
- %tmp14444 = getelementptr inbounds float* %tmp14443, i64 1
- %tmp14445 = getelementptr inbounds float* %tmp14444, i64 1
- %tmp14446 = getelementptr inbounds float* %tmp14445, i64 1
- %tmp14447 = getelementptr inbounds float* %tmp14446, i64 1
- %tmp14448 = getelementptr inbounds float* %tmp14447, i64 1
- %tmp14449 = getelementptr inbounds float* %tmp14448, i64 1
- %tmp14450 = getelementptr inbounds float* %tmp14449, i64 1
- %tmp14451 = getelementptr inbounds float* %tmp14450, i64 1
- %tmp14452 = getelementptr inbounds float* %tmp14451, i64 1
- %tmp14453 = getelementptr inbounds float* %tmp14452, i64 1
- %tmp14454 = getelementptr inbounds float* %tmp14453, i64 1
- %tmp14455 = getelementptr inbounds float* %tmp14454, i64 1
- %tmp14456 = getelementptr inbounds float* %tmp14455, i64 1
- %tmp14457 = getelementptr inbounds float* %tmp14456, i64 1
- %tmp14458 = getelementptr inbounds float* %tmp14457, i64 1
- %tmp14459 = getelementptr inbounds float* %tmp14458, i64 1
- %tmp14460 = getelementptr inbounds float* %tmp14459, i64 1
- %tmp14461 = getelementptr inbounds float* %tmp14460, i64 1
- %tmp14462 = getelementptr inbounds float* %tmp14461, i64 1
- %tmp14463 = getelementptr inbounds float* %tmp14462, i64 1
- %tmp14464 = getelementptr inbounds float* %tmp14463, i64 1
- %tmp14465 = getelementptr inbounds float* %tmp14464, i64 1
- %tmp14466 = getelementptr inbounds float* %tmp14465, i64 1
- %tmp14467 = getelementptr inbounds float* %tmp14466, i64 1
- %tmp14468 = getelementptr inbounds float* %tmp14467, i64 1
- %tmp14469 = getelementptr inbounds float* %tmp14468, i64 1
- %tmp14470 = getelementptr inbounds float* %tmp14469, i64 1
- %tmp14471 = getelementptr inbounds float* %tmp14470, i64 1
- %tmp14472 = getelementptr inbounds float* %tmp14471, i64 1
- %tmp14473 = getelementptr inbounds float* %tmp14472, i64 1
- %tmp14474 = getelementptr inbounds float* %tmp14473, i64 1
- %tmp14475 = getelementptr inbounds float* %tmp14474, i64 1
- %tmp14476 = getelementptr inbounds float* %tmp14475, i64 1
- %tmp14477 = getelementptr inbounds float* %tmp14476, i64 1
- %tmp14478 = getelementptr inbounds float* %tmp14477, i64 1
- %tmp14479 = getelementptr inbounds float* %tmp14478, i64 1
- %tmp14480 = getelementptr inbounds float* %tmp14479, i64 1
- %tmp14481 = getelementptr inbounds float* %tmp14480, i64 1
- %tmp14482 = getelementptr inbounds float* %tmp14481, i64 1
- %tmp14483 = getelementptr inbounds float* %tmp14482, i64 1
- %tmp14484 = getelementptr inbounds float* %tmp14483, i64 1
- %tmp14485 = getelementptr inbounds float* %tmp14484, i64 1
- %tmp14486 = getelementptr inbounds float* %tmp14485, i64 1
- %tmp14487 = getelementptr inbounds float* %tmp14486, i64 1
- %tmp14488 = getelementptr inbounds float* %tmp14487, i64 1
- %tmp14489 = getelementptr inbounds float* %tmp14488, i64 1
- %tmp14490 = getelementptr inbounds float* %tmp14489, i64 1
- %tmp14491 = getelementptr inbounds float* %tmp14490, i64 1
- %tmp14492 = getelementptr inbounds float* %tmp14491, i64 1
- %tmp14493 = getelementptr inbounds float* %tmp14492, i64 1
- %tmp14494 = getelementptr inbounds float* %tmp14493, i64 1
- %tmp14495 = getelementptr inbounds float* %tmp14494, i64 1
- %tmp14496 = getelementptr inbounds float* %tmp14495, i64 1
- %tmp14497 = getelementptr inbounds float* %tmp14496, i64 1
- %tmp14498 = getelementptr inbounds float* %tmp14497, i64 1
- %tmp14499 = getelementptr inbounds float* %tmp14498, i64 1
- %tmp14500 = getelementptr inbounds float* %tmp14499, i64 1
- %tmp14501 = getelementptr inbounds float* %tmp14500, i64 1
- %tmp14502 = getelementptr inbounds float* %tmp14501, i64 1
- %tmp14503 = getelementptr inbounds float* %tmp14502, i64 1
- %tmp14504 = getelementptr inbounds float* %tmp14503, i64 1
- %tmp14505 = getelementptr inbounds float* %tmp14504, i64 1
- %tmp14506 = getelementptr inbounds float* %tmp14505, i64 1
- %tmp14507 = getelementptr inbounds float* %tmp14506, i64 1
- %tmp14508 = getelementptr inbounds float* %tmp14507, i64 1
- %tmp14509 = getelementptr inbounds float* %tmp14508, i64 1
- %tmp14510 = getelementptr inbounds float* %tmp14509, i64 1
- %tmp14511 = getelementptr inbounds float* %tmp14510, i64 1
- %tmp14512 = getelementptr inbounds float* %tmp14511, i64 1
- %tmp14513 = getelementptr inbounds float* %tmp14512, i64 1
- %tmp14514 = getelementptr inbounds float* %tmp14513, i64 1
- %tmp14515 = getelementptr inbounds float* %tmp14514, i64 1
- %tmp14516 = getelementptr inbounds float* %tmp14515, i64 1
- %tmp14517 = getelementptr inbounds float* %tmp14516, i64 1
- %tmp14518 = getelementptr inbounds float* %tmp14517, i64 1
- %tmp14519 = getelementptr inbounds float* %tmp14518, i64 1
- %tmp14520 = getelementptr inbounds float* %tmp14519, i64 1
- %tmp14521 = getelementptr inbounds float* %tmp14520, i64 1
- %tmp14522 = getelementptr inbounds float* %tmp14521, i64 1
- %tmp14523 = getelementptr inbounds float* %tmp14522, i64 1
- %tmp14524 = getelementptr inbounds float* %tmp14523, i64 1
- %tmp14525 = getelementptr inbounds float* %tmp14524, i64 1
- %tmp14526 = getelementptr inbounds float* %tmp14525, i64 1
- %tmp14527 = getelementptr inbounds float* %tmp14526, i64 1
- %tmp14528 = getelementptr inbounds float* %tmp14527, i64 1
- %tmp14529 = getelementptr inbounds float* %tmp14528, i64 1
- %tmp14530 = getelementptr inbounds float* %tmp14529, i64 1
- %tmp14531 = getelementptr inbounds float* %tmp14530, i64 1
- %tmp14532 = getelementptr inbounds float* %tmp14531, i64 1
- %tmp14533 = getelementptr inbounds float* %tmp14532, i64 1
- %tmp14534 = getelementptr inbounds float* %tmp14533, i64 1
- %tmp14535 = getelementptr inbounds float* %tmp14534, i64 1
- %tmp14536 = getelementptr inbounds float* %tmp14535, i64 1
- %tmp14537 = getelementptr inbounds float* %tmp14536, i64 1
- %tmp14538 = getelementptr inbounds float* %tmp14537, i64 1
- %tmp14539 = getelementptr inbounds float* %tmp14538, i64 1
- %tmp14540 = getelementptr inbounds float* %tmp14539, i64 1
- %tmp14541 = getelementptr inbounds float* %tmp14540, i64 1
- %tmp14542 = getelementptr inbounds float* %tmp14541, i64 1
- %tmp14543 = getelementptr inbounds float* %tmp14542, i64 1
- %tmp14544 = getelementptr inbounds float* %tmp14543, i64 1
- %tmp14545 = getelementptr inbounds float* %tmp14544, i64 1
- %tmp14546 = getelementptr inbounds float* %tmp14545, i64 1
- %tmp14547 = getelementptr inbounds float* %tmp14546, i64 1
- %tmp14548 = getelementptr inbounds float* %tmp14547, i64 1
- %tmp14549 = getelementptr inbounds float* %tmp14548, i64 1
- %tmp14550 = getelementptr inbounds float* %tmp14549, i64 1
- %tmp14551 = getelementptr inbounds float* %tmp14550, i64 1
- %tmp14552 = getelementptr inbounds float* %tmp14551, i64 1
- %tmp14553 = getelementptr inbounds float* %tmp14552, i64 1
- %tmp14554 = getelementptr inbounds float* %tmp14553, i64 1
- %tmp14555 = getelementptr inbounds float* %tmp14554, i64 1
- %tmp14556 = getelementptr inbounds float* %tmp14555, i64 1
- %tmp14557 = getelementptr inbounds float* %tmp14556, i64 1
- %tmp14558 = getelementptr inbounds float* %tmp14557, i64 1
- %tmp14559 = getelementptr inbounds float* %tmp14558, i64 1
- %tmp14560 = getelementptr inbounds float* %tmp14559, i64 1
- %tmp14561 = getelementptr inbounds float* %tmp14560, i64 1
- %tmp14562 = getelementptr inbounds float* %tmp14561, i64 1
- %tmp14563 = getelementptr inbounds float* %tmp14562, i64 1
- %tmp14564 = getelementptr inbounds float* %tmp14563, i64 1
- %tmp14565 = getelementptr inbounds float* %tmp14564, i64 1
- %tmp14566 = getelementptr inbounds float* %tmp14565, i64 1
- %tmp14567 = getelementptr inbounds float* %tmp14566, i64 1
- %tmp14568 = getelementptr inbounds float* %tmp14567, i64 1
- %tmp14569 = getelementptr inbounds float* %tmp14568, i64 1
- %tmp14570 = getelementptr inbounds float* %tmp14569, i64 1
- %tmp14571 = getelementptr inbounds float* %tmp14570, i64 1
- %tmp14572 = getelementptr inbounds float* %tmp14571, i64 1
- %tmp14573 = getelementptr inbounds float* %tmp14572, i64 1
- %tmp14574 = getelementptr inbounds float* %tmp14573, i64 1
- %tmp14575 = getelementptr inbounds float* %tmp14574, i64 1
- %tmp14576 = getelementptr inbounds float* %tmp14575, i64 1
- %tmp14577 = getelementptr inbounds float* %tmp14576, i64 1
- %tmp14578 = getelementptr inbounds float* %tmp14577, i64 1
- %tmp14579 = getelementptr inbounds float* %tmp14578, i64 1
- %tmp14580 = getelementptr inbounds float* %tmp14579, i64 1
- %tmp14581 = getelementptr inbounds float* %tmp14580, i64 1
- %tmp14582 = getelementptr inbounds float* %tmp14581, i64 1
- %tmp14583 = getelementptr inbounds float* %tmp14582, i64 1
- %tmp14584 = getelementptr inbounds float* %tmp14583, i64 1
- %tmp14585 = getelementptr inbounds float* %tmp14584, i64 1
- %tmp14586 = getelementptr inbounds float* %tmp14585, i64 1
- %tmp14587 = getelementptr inbounds float* %tmp14586, i64 1
- %tmp14588 = getelementptr inbounds float* %tmp14587, i64 1
- %tmp14589 = getelementptr inbounds float* %tmp14588, i64 1
- %tmp14590 = getelementptr inbounds float* %tmp14589, i64 1
- %tmp14591 = getelementptr inbounds float* %tmp14590, i64 1
- %tmp14592 = getelementptr inbounds float* %tmp14591, i64 1
- %tmp14593 = getelementptr inbounds float* %tmp14592, i64 1
- %tmp14594 = getelementptr inbounds float* %tmp14593, i64 1
- %tmp14595 = getelementptr inbounds float* %tmp14594, i64 1
- %tmp14596 = getelementptr inbounds float* %tmp14595, i64 1
- %tmp14597 = getelementptr inbounds float* %tmp14596, i64 1
- %tmp14598 = getelementptr inbounds float* %tmp14597, i64 1
- %tmp14599 = getelementptr inbounds float* %tmp14598, i64 1
- %tmp14600 = getelementptr inbounds float* %tmp14599, i64 1
- %tmp14601 = getelementptr inbounds float* %tmp14600, i64 1
- %tmp14602 = getelementptr inbounds float* %tmp14601, i64 1
- %tmp14603 = getelementptr inbounds float* %tmp14602, i64 1
- %tmp14604 = getelementptr inbounds float* %tmp14603, i64 1
- %tmp14605 = getelementptr inbounds float* %tmp14604, i64 1
- %tmp14606 = getelementptr inbounds float* %tmp14605, i64 1
- %tmp14607 = getelementptr inbounds float* %tmp14606, i64 1
- %tmp14608 = getelementptr inbounds float* %tmp14607, i64 1
- %tmp14609 = getelementptr inbounds float* %tmp14608, i64 1
- %tmp14610 = getelementptr inbounds float* %tmp14609, i64 1
- %tmp14611 = getelementptr inbounds float* %tmp14610, i64 1
- %tmp14612 = getelementptr inbounds float* %tmp14611, i64 1
- %tmp14613 = getelementptr inbounds float* %tmp14612, i64 1
- %tmp14614 = getelementptr inbounds float* %tmp14613, i64 1
- %tmp14615 = getelementptr inbounds float* %tmp14614, i64 1
- %tmp14616 = getelementptr inbounds float* %tmp14615, i64 1
- %tmp14617 = getelementptr inbounds float* %tmp14616, i64 1
- %tmp14618 = getelementptr inbounds float* %tmp14617, i64 1
- %tmp14619 = getelementptr inbounds float* %tmp14618, i64 1
- %tmp14620 = getelementptr inbounds float* %tmp14619, i64 1
- %tmp14621 = getelementptr inbounds float* %tmp14620, i64 1
- %tmp14622 = getelementptr inbounds float* %tmp14621, i64 1
- %tmp14623 = getelementptr inbounds float* %tmp14622, i64 1
- %tmp14624 = getelementptr inbounds float* %tmp14623, i64 1
- %tmp14625 = getelementptr inbounds float* %tmp14624, i64 1
- %tmp14626 = getelementptr inbounds float* %tmp14625, i64 1
- %tmp14627 = getelementptr inbounds float* %tmp14626, i64 1
- %tmp14628 = getelementptr inbounds float* %tmp14627, i64 1
- %tmp14629 = getelementptr inbounds float* %tmp14628, i64 1
- %tmp14630 = getelementptr inbounds float* %tmp14629, i64 1
- %tmp14631 = getelementptr inbounds float* %tmp14630, i64 1
- %tmp14632 = getelementptr inbounds float* %tmp14631, i64 1
- %tmp14633 = getelementptr inbounds float* %tmp14632, i64 1
- %tmp14634 = getelementptr inbounds float* %tmp14633, i64 1
- %tmp14635 = getelementptr inbounds float* %tmp14634, i64 1
- %tmp14636 = getelementptr inbounds float* %tmp14635, i64 1
- %tmp14637 = getelementptr inbounds float* %tmp14636, i64 1
- %tmp14638 = getelementptr inbounds float* %tmp14637, i64 1
- %tmp14639 = getelementptr inbounds float* %tmp14638, i64 1
- %tmp14640 = getelementptr inbounds float* %tmp14639, i64 1
- %tmp14641 = getelementptr inbounds float* %tmp14640, i64 1
- %tmp14642 = getelementptr inbounds float* %tmp14641, i64 1
- %tmp14643 = getelementptr inbounds float* %tmp14642, i64 1
- %tmp14644 = getelementptr inbounds float* %tmp14643, i64 1
- %tmp14645 = getelementptr inbounds float* %tmp14644, i64 1
- %tmp14646 = getelementptr inbounds float* %tmp14645, i64 1
- %tmp14647 = getelementptr inbounds float* %tmp14646, i64 1
- %tmp14648 = getelementptr inbounds float* %tmp14647, i64 1
- %tmp14649 = getelementptr inbounds float* %tmp14648, i64 1
- %tmp14650 = getelementptr inbounds float* %tmp14649, i64 1
- %tmp14651 = getelementptr inbounds float* %tmp14650, i64 1
- %tmp14652 = getelementptr inbounds float* %tmp14651, i64 1
- %tmp14653 = getelementptr inbounds float* %tmp14652, i64 1
- %tmp14654 = getelementptr inbounds float* %tmp14653, i64 1
- %tmp14655 = getelementptr inbounds float* %tmp14654, i64 1
- %tmp14656 = getelementptr inbounds float* %tmp14655, i64 1
- %tmp14657 = getelementptr inbounds float* %tmp14656, i64 1
- %tmp14658 = getelementptr inbounds float* %tmp14657, i64 1
- %tmp14659 = getelementptr inbounds float* %tmp14658, i64 1
- %tmp14660 = getelementptr inbounds float* %tmp14659, i64 1
- %tmp14661 = getelementptr inbounds float* %tmp14660, i64 1
- %tmp14662 = getelementptr inbounds float* %tmp14661, i64 1
- %tmp14663 = getelementptr inbounds float* %tmp14662, i64 1
- %tmp14664 = getelementptr inbounds float* %tmp14663, i64 1
- %tmp14665 = getelementptr inbounds float* %tmp14664, i64 1
- %tmp14666 = getelementptr inbounds float* %tmp14665, i64 1
- %tmp14667 = getelementptr inbounds float* %tmp14666, i64 1
- %tmp14668 = getelementptr inbounds float* %tmp14667, i64 1
- %tmp14669 = getelementptr inbounds float* %tmp14668, i64 1
- %tmp14670 = getelementptr inbounds float* %tmp14669, i64 1
- %tmp14671 = getelementptr inbounds float* %tmp14670, i64 1
- %tmp14672 = getelementptr inbounds float* %tmp14671, i64 1
- %tmp14673 = getelementptr inbounds float* %tmp14672, i64 1
- %tmp14674 = getelementptr inbounds float* %tmp14673, i64 1
- %tmp14675 = getelementptr inbounds float* %tmp14674, i64 1
- %tmp14676 = getelementptr inbounds float* %tmp14675, i64 1
- %tmp14677 = getelementptr inbounds float* %tmp14676, i64 1
- %tmp14678 = getelementptr inbounds float* %tmp14677, i64 1
- %tmp14679 = getelementptr inbounds float* %tmp14678, i64 1
- %tmp14680 = getelementptr inbounds float* %tmp14679, i64 1
- %tmp14681 = getelementptr inbounds float* %tmp14680, i64 1
- %tmp14682 = getelementptr inbounds float* %tmp14681, i64 1
- %tmp14683 = getelementptr inbounds float* %tmp14682, i64 1
- %tmp14684 = getelementptr inbounds float* %tmp14683, i64 1
- %tmp14685 = getelementptr inbounds float* %tmp14684, i64 1
- %tmp14686 = getelementptr inbounds float* %tmp14685, i64 1
- %tmp14687 = getelementptr inbounds float* %tmp14686, i64 1
- %tmp14688 = getelementptr inbounds float* %tmp14687, i64 1
- %tmp14689 = getelementptr inbounds float* %tmp14688, i64 1
- %tmp14690 = getelementptr inbounds float* %tmp14689, i64 1
- %tmp14691 = getelementptr inbounds float* %tmp14690, i64 1
- %tmp14692 = getelementptr inbounds float* %tmp14691, i64 1
- %tmp14693 = getelementptr inbounds float* %tmp14692, i64 1
- %tmp14694 = getelementptr inbounds float* %tmp14693, i64 1
- %tmp14695 = getelementptr inbounds float* %tmp14694, i64 1
- %tmp14696 = getelementptr inbounds float* %tmp14695, i64 1
- %tmp14697 = getelementptr inbounds float* %tmp14696, i64 1
- %tmp14698 = getelementptr inbounds float* %tmp14697, i64 1
- %tmp14699 = getelementptr inbounds float* %tmp14698, i64 1
- %tmp14700 = getelementptr inbounds float* %tmp14699, i64 1
- %tmp14701 = getelementptr inbounds float* %tmp14700, i64 1
- %tmp14702 = getelementptr inbounds float* %tmp14701, i64 1
- %tmp14703 = getelementptr inbounds float* %tmp14702, i64 1
- %tmp14704 = getelementptr inbounds float* %tmp14703, i64 1
- %tmp14705 = getelementptr inbounds float* %tmp14704, i64 1
- %tmp14706 = getelementptr inbounds float* %tmp14705, i64 1
- %tmp14707 = getelementptr inbounds float* %tmp14706, i64 1
- %tmp14708 = getelementptr inbounds float* %tmp14707, i64 1
- %tmp14709 = getelementptr inbounds float* %tmp14708, i64 1
- %tmp14710 = getelementptr inbounds float* %tmp14709, i64 1
- %tmp14711 = getelementptr inbounds float* %tmp14710, i64 1
- %tmp14712 = getelementptr inbounds float* %tmp14711, i64 1
- %tmp14713 = getelementptr inbounds float* %tmp14712, i64 1
- %tmp14714 = getelementptr inbounds float* %tmp14713, i64 1
- %tmp14715 = getelementptr inbounds float* %tmp14714, i64 1
- %tmp14716 = getelementptr inbounds float* %tmp14715, i64 1
- %tmp14717 = getelementptr inbounds float* %tmp14716, i64 1
- %tmp14718 = getelementptr inbounds float* %tmp14717, i64 1
- %tmp14719 = getelementptr inbounds float* %tmp14718, i64 1
- %tmp14720 = getelementptr inbounds float* %tmp14719, i64 1
- %tmp14721 = getelementptr inbounds float* %tmp14720, i64 1
- %tmp14722 = getelementptr inbounds float* %tmp14721, i64 1
- %tmp14723 = getelementptr inbounds float* %tmp14722, i64 1
- %tmp14724 = getelementptr inbounds float* %tmp14723, i64 1
- %tmp14725 = getelementptr inbounds float* %tmp14724, i64 1
- %tmp14726 = getelementptr inbounds float* %tmp14725, i64 1
- %tmp14727 = getelementptr inbounds float* %tmp14726, i64 1
- %tmp14728 = getelementptr inbounds float* %tmp14727, i64 1
- %tmp14729 = getelementptr inbounds float* %tmp14728, i64 1
- %tmp14730 = getelementptr inbounds float* %tmp14729, i64 1
- %tmp14731 = getelementptr inbounds float* %tmp14730, i64 1
- %tmp14732 = getelementptr inbounds float* %tmp14731, i64 1
- %tmp14733 = getelementptr inbounds float* %tmp14732, i64 1
- %tmp14734 = getelementptr inbounds float* %tmp14733, i64 1
- %tmp14735 = getelementptr inbounds float* %tmp14734, i64 1
- %tmp14736 = getelementptr inbounds float* %tmp14735, i64 1
- %tmp14737 = getelementptr inbounds float* %tmp14736, i64 1
- %tmp14738 = getelementptr inbounds float* %tmp14737, i64 1
- %tmp14739 = getelementptr inbounds float* %tmp14738, i64 1
- %tmp14740 = getelementptr inbounds float* %tmp14739, i64 1
- %tmp14741 = getelementptr inbounds float* %tmp14740, i64 1
- %tmp14742 = getelementptr inbounds float* %tmp14741, i64 1
- %tmp14743 = getelementptr inbounds float* %tmp14742, i64 1
- %tmp14744 = getelementptr inbounds float* %tmp14743, i64 1
- %tmp14745 = getelementptr inbounds float* %tmp14744, i64 1
- %tmp14746 = getelementptr inbounds float* %tmp14745, i64 1
- %tmp14747 = getelementptr inbounds float* %tmp14746, i64 1
- %tmp14748 = getelementptr inbounds float* %tmp14747, i64 1
- %tmp14749 = getelementptr inbounds float* %tmp14748, i64 1
- %tmp14750 = getelementptr inbounds float* %tmp14749, i64 1
- %tmp14751 = getelementptr inbounds float* %tmp14750, i64 1
- %tmp14752 = getelementptr inbounds float* %tmp14751, i64 1
- %tmp14753 = getelementptr inbounds float* %tmp14752, i64 1
- %tmp14754 = getelementptr inbounds float* %tmp14753, i64 1
- %tmp14755 = getelementptr inbounds float* %tmp14754, i64 1
- %tmp14756 = getelementptr inbounds float* %tmp14755, i64 1
- %tmp14757 = getelementptr inbounds float* %tmp14756, i64 1
- %tmp14758 = getelementptr inbounds float* %tmp14757, i64 1
- %tmp14759 = getelementptr inbounds float* %tmp14758, i64 1
- %tmp14760 = getelementptr inbounds float* %tmp14759, i64 1
- %tmp14761 = getelementptr inbounds float* %tmp14760, i64 1
- %tmp14762 = getelementptr inbounds float* %tmp14761, i64 1
- %tmp14763 = getelementptr inbounds float* %tmp14762, i64 1
- %tmp14764 = getelementptr inbounds float* %tmp14763, i64 1
- %tmp14765 = getelementptr inbounds float* %tmp14764, i64 1
- %tmp14766 = getelementptr inbounds float* %tmp14765, i64 1
- %tmp14767 = getelementptr inbounds float* %tmp14766, i64 1
- %tmp14768 = getelementptr inbounds float* %tmp14767, i64 1
- %tmp14769 = getelementptr inbounds float* %tmp14768, i64 1
- %tmp14770 = getelementptr inbounds float* %tmp14769, i64 1
- %tmp14771 = getelementptr inbounds float* %tmp14770, i64 1
- %tmp14772 = getelementptr inbounds float* %tmp14771, i64 1
- %tmp14773 = getelementptr inbounds float* %tmp14772, i64 1
- %tmp14774 = getelementptr inbounds float* %tmp14773, i64 1
- %tmp14775 = getelementptr inbounds float* %tmp14774, i64 1
- %tmp14776 = getelementptr inbounds float* %tmp14775, i64 1
- %tmp14777 = getelementptr inbounds float* %tmp14776, i64 1
- %tmp14778 = getelementptr inbounds float* %tmp14777, i64 1
- %tmp14779 = getelementptr inbounds float* %tmp14778, i64 1
- %tmp14780 = getelementptr inbounds float* %tmp14779, i64 1
- %tmp14781 = getelementptr inbounds float* %tmp14780, i64 1
- %tmp14782 = getelementptr inbounds float* %tmp14781, i64 1
- %tmp14783 = getelementptr inbounds float* %tmp14782, i64 1
- %tmp14784 = getelementptr inbounds float* %tmp14783, i64 1
- %tmp14785 = getelementptr inbounds float* %tmp14784, i64 1
- %tmp14786 = getelementptr inbounds float* %tmp14785, i64 1
- %tmp14787 = getelementptr inbounds float* %tmp14786, i64 1
- %tmp14788 = getelementptr inbounds float* %tmp14787, i64 1
- %tmp14789 = getelementptr inbounds float* %tmp14788, i64 1
- %tmp14790 = getelementptr inbounds float* %tmp14789, i64 1
- %tmp14791 = getelementptr inbounds float* %tmp14790, i64 1
- %tmp14792 = getelementptr inbounds float* %tmp14791, i64 1
- %tmp14793 = getelementptr inbounds float* %tmp14792, i64 1
- %tmp14794 = getelementptr inbounds float* %tmp14793, i64 1
- %tmp14795 = getelementptr inbounds float* %tmp14794, i64 1
- %tmp14796 = getelementptr inbounds float* %tmp14795, i64 1
- %tmp14797 = getelementptr inbounds float* %tmp14796, i64 1
- %tmp14798 = getelementptr inbounds float* %tmp14797, i64 1
- %tmp14799 = getelementptr inbounds float* %tmp14798, i64 1
- %tmp14800 = getelementptr inbounds float* %tmp14799, i64 1
- %tmp14801 = getelementptr inbounds float* %tmp14800, i64 1
- %tmp14802 = getelementptr inbounds float* %tmp14801, i64 1
- %tmp14803 = getelementptr inbounds float* %tmp14802, i64 1
- %tmp14804 = getelementptr inbounds float* %tmp14803, i64 1
- %tmp14805 = getelementptr inbounds float* %tmp14804, i64 1
- %tmp14806 = getelementptr inbounds float* %tmp14805, i64 1
- %tmp14807 = getelementptr inbounds float* %tmp14806, i64 1
- %tmp14808 = getelementptr inbounds float* %tmp14807, i64 1
- %tmp14809 = getelementptr inbounds float* %tmp14808, i64 1
- %tmp14810 = getelementptr inbounds float* %tmp14809, i64 1
- %tmp14811 = getelementptr inbounds float* %tmp14810, i64 1
- %tmp14812 = getelementptr inbounds float* %tmp14811, i64 1
- %tmp14813 = getelementptr inbounds float* %tmp14812, i64 1
- %tmp14814 = getelementptr inbounds float* %tmp14813, i64 1
- %tmp14815 = getelementptr inbounds float* %tmp14814, i64 1
- %tmp14816 = getelementptr inbounds float* %tmp14815, i64 1
- %tmp14817 = getelementptr inbounds float* %tmp14816, i64 1
- %tmp14818 = getelementptr inbounds float* %tmp14817, i64 1
- %tmp14819 = getelementptr inbounds float* %tmp14818, i64 1
- %tmp14820 = getelementptr inbounds float* %tmp14819, i64 1
- %tmp14821 = getelementptr inbounds float* %tmp14820, i64 1
- %tmp14822 = getelementptr inbounds float* %tmp14821, i64 1
- %tmp14823 = getelementptr inbounds float* %tmp14822, i64 1
- %tmp14824 = getelementptr inbounds float* %tmp14823, i64 1
- %tmp14825 = getelementptr inbounds float* %tmp14824, i64 1
- %tmp14826 = getelementptr inbounds float* %tmp14825, i64 1
- %tmp14827 = getelementptr inbounds float* %tmp14826, i64 1
- %tmp14828 = getelementptr inbounds float* %tmp14827, i64 1
- %tmp14829 = getelementptr inbounds float* %tmp14828, i64 1
- %tmp14830 = getelementptr inbounds float* %tmp14829, i64 1
- %tmp14831 = getelementptr inbounds float* %tmp14830, i64 1
- %tmp14832 = getelementptr inbounds float* %tmp14831, i64 1
- %tmp14833 = getelementptr inbounds float* %tmp14832, i64 1
- %tmp14834 = getelementptr inbounds float* %tmp14833, i64 1
- %tmp14835 = getelementptr inbounds float* %tmp14834, i64 1
- %tmp14836 = getelementptr inbounds float* %tmp14835, i64 1
- %tmp14837 = getelementptr inbounds float* %tmp14836, i64 1
- %tmp14838 = getelementptr inbounds float* %tmp14837, i64 1
- %tmp14839 = getelementptr inbounds float* %tmp14838, i64 1
- %tmp14840 = getelementptr inbounds float* %tmp14839, i64 1
- %tmp14841 = getelementptr inbounds float* %tmp14840, i64 1
- %tmp14842 = getelementptr inbounds float* %tmp14841, i64 1
- %tmp14843 = getelementptr inbounds float* %tmp14842, i64 1
- %tmp14844 = getelementptr inbounds float* %tmp14843, i64 1
- %tmp14845 = getelementptr inbounds float* %tmp14844, i64 1
- %tmp14846 = getelementptr inbounds float* %tmp14845, i64 1
- %tmp14847 = getelementptr inbounds float* %tmp14846, i64 1
- %tmp14848 = getelementptr inbounds float* %tmp14847, i64 1
- %tmp14849 = getelementptr inbounds float* %tmp14848, i64 1
- %tmp14850 = getelementptr inbounds float* %tmp14849, i64 1
- %tmp14851 = getelementptr inbounds float* %tmp14850, i64 1
- %tmp14852 = getelementptr inbounds float* %tmp14851, i64 1
- %tmp14853 = getelementptr inbounds float* %tmp14852, i64 1
- %tmp14854 = getelementptr inbounds float* %tmp14853, i64 1
- %tmp14855 = getelementptr inbounds float* %tmp14854, i64 1
- %tmp14856 = getelementptr inbounds float* %tmp14855, i64 1
- %tmp14857 = getelementptr inbounds float* %tmp14856, i64 1
- %tmp14858 = getelementptr inbounds float* %tmp14857, i64 1
- %tmp14859 = getelementptr inbounds float* %tmp14858, i64 1
- %tmp14860 = getelementptr inbounds float* %tmp14859, i64 1
- %tmp14861 = getelementptr inbounds float* %tmp14860, i64 1
- %tmp14862 = getelementptr inbounds float* %tmp14861, i64 1
- %tmp14863 = getelementptr inbounds float* %tmp14862, i64 1
- %tmp14864 = getelementptr inbounds float* %tmp14863, i64 1
- %tmp14865 = getelementptr inbounds float* %tmp14864, i64 1
- %tmp14866 = getelementptr inbounds float* %tmp14865, i64 1
- %tmp14867 = getelementptr inbounds float* %tmp14866, i64 1
- %tmp14868 = getelementptr inbounds float* %tmp14867, i64 1
- %tmp14869 = getelementptr inbounds float* %tmp14868, i64 1
- %tmp14870 = getelementptr inbounds float* %tmp14869, i64 1
- %tmp14871 = getelementptr inbounds float* %tmp14870, i64 1
- %tmp14872 = getelementptr inbounds float* %tmp14871, i64 1
- %tmp14873 = getelementptr inbounds float* %tmp14872, i64 1
- %tmp14874 = getelementptr inbounds float* %tmp14873, i64 1
- %tmp14875 = getelementptr inbounds float* %tmp14874, i64 1
- %tmp14876 = getelementptr inbounds float* %tmp14875, i64 1
- %tmp14877 = getelementptr inbounds float* %tmp14876, i64 1
- %tmp14878 = getelementptr inbounds float* %tmp14877, i64 1
- %tmp14879 = getelementptr inbounds float* %tmp14878, i64 1
- %tmp14880 = getelementptr inbounds float* %tmp14879, i64 1
- %tmp14881 = getelementptr inbounds float* %tmp14880, i64 1
- %tmp14882 = getelementptr inbounds float* %tmp14881, i64 1
- %tmp14883 = getelementptr inbounds float* %tmp14882, i64 1
- %tmp14884 = getelementptr inbounds float* %tmp14883, i64 1
- %tmp14885 = getelementptr inbounds float* %tmp14884, i64 1
- %tmp14886 = getelementptr inbounds float* %tmp14885, i64 1
- %tmp14887 = getelementptr inbounds float* %tmp14886, i64 1
- %tmp14888 = getelementptr inbounds float* %tmp14887, i64 1
- %tmp14889 = getelementptr inbounds float* %tmp14888, i64 1
- %tmp14890 = getelementptr inbounds float* %tmp14889, i64 1
- %tmp14891 = getelementptr inbounds float* %tmp14890, i64 1
- %tmp14892 = getelementptr inbounds float* %tmp14891, i64 1
- %tmp14893 = getelementptr inbounds float* %tmp14892, i64 1
- %tmp14894 = getelementptr inbounds float* %tmp14893, i64 1
- %tmp14895 = getelementptr inbounds float* %tmp14894, i64 1
- %tmp14896 = getelementptr inbounds float* %tmp14895, i64 1
- %tmp14897 = getelementptr inbounds float* %tmp14896, i64 1
- %tmp14898 = getelementptr inbounds float* %tmp14897, i64 1
- %tmp14899 = getelementptr inbounds float* %tmp14898, i64 1
- %tmp14900 = getelementptr inbounds float* %tmp14899, i64 1
- %tmp14901 = getelementptr inbounds float* %tmp14900, i64 1
- %tmp14902 = getelementptr inbounds float* %tmp14901, i64 1
- %tmp14903 = getelementptr inbounds float* %tmp14902, i64 1
- %tmp14904 = getelementptr inbounds float* %tmp14903, i64 1
- %tmp14905 = getelementptr inbounds float* %tmp14904, i64 1
- %tmp14906 = getelementptr inbounds float* %tmp14905, i64 1
- %tmp14907 = getelementptr inbounds float* %tmp14906, i64 1
- %tmp14908 = getelementptr inbounds float* %tmp14907, i64 1
- %tmp14909 = getelementptr inbounds float* %tmp14908, i64 1
- %tmp14910 = getelementptr inbounds float* %tmp14909, i64 1
- %tmp14911 = getelementptr inbounds float* %tmp14910, i64 1
- %tmp14912 = getelementptr inbounds float* %tmp14911, i64 1
- %tmp14913 = getelementptr inbounds float* %tmp14912, i64 1
- %tmp14914 = getelementptr inbounds float* %tmp14913, i64 1
- %tmp14915 = getelementptr inbounds float* %tmp14914, i64 1
- %tmp14916 = getelementptr inbounds float* %tmp14915, i64 1
- %tmp14917 = getelementptr inbounds float* %tmp14916, i64 1
- %tmp14918 = getelementptr inbounds float* %tmp14917, i64 1
- %tmp14919 = getelementptr inbounds float* %tmp14918, i64 1
- %tmp14920 = getelementptr inbounds float* %tmp14919, i64 1
- %tmp14921 = getelementptr inbounds float* %tmp14920, i64 1
- %tmp14922 = getelementptr inbounds float* %tmp14921, i64 1
- %tmp14923 = getelementptr inbounds float* %tmp14922, i64 1
- %tmp14924 = getelementptr inbounds float* %tmp14923, i64 1
- %tmp14925 = getelementptr inbounds float* %tmp14924, i64 1
- %tmp14926 = getelementptr inbounds float* %tmp14925, i64 1
- %tmp14927 = getelementptr inbounds float* %tmp14926, i64 1
- %tmp14928 = getelementptr inbounds float* %tmp14927, i64 1
- %tmp14929 = getelementptr inbounds float* %tmp14928, i64 1
- %tmp14930 = getelementptr inbounds float* %tmp14929, i64 1
- %tmp14931 = getelementptr inbounds float* %tmp14930, i64 1
- %tmp14932 = getelementptr inbounds float* %tmp14931, i64 1
- %tmp14933 = getelementptr inbounds float* %tmp14932, i64 1
- %tmp14934 = getelementptr inbounds float* %tmp14933, i64 1
- %tmp14935 = getelementptr inbounds float* %tmp14934, i64 1
- %tmp14936 = getelementptr inbounds float* %tmp14935, i64 1
- %tmp14937 = getelementptr inbounds float* %tmp14936, i64 1
- %tmp14938 = getelementptr inbounds float* %tmp14937, i64 1
- %tmp14939 = getelementptr inbounds float* %tmp14938, i64 1
- %tmp14940 = getelementptr inbounds float* %tmp14939, i64 1
- %tmp14941 = getelementptr inbounds float* %tmp14940, i64 1
- %tmp14942 = getelementptr inbounds float* %tmp14941, i64 1
- %tmp14943 = getelementptr inbounds float* %tmp14942, i64 1
- %tmp14944 = getelementptr inbounds float* %tmp14943, i64 1
- %tmp14945 = getelementptr inbounds float* %tmp14944, i64 1
- %tmp14946 = getelementptr inbounds float* %tmp14945, i64 1
- %tmp14947 = getelementptr inbounds float* %tmp14946, i64 1
- %tmp14948 = getelementptr inbounds float* %tmp14947, i64 1
- %tmp14949 = getelementptr inbounds float* %tmp14948, i64 1
- %tmp14950 = getelementptr inbounds float* %tmp14949, i64 1
- %tmp14951 = getelementptr inbounds float* %tmp14950, i64 1
- %tmp14952 = getelementptr inbounds float* %tmp14951, i64 1
- %tmp14953 = getelementptr inbounds float* %tmp14952, i64 1
- %tmp14954 = getelementptr inbounds float* %tmp14953, i64 1
- %tmp14955 = getelementptr inbounds float* %tmp14954, i64 1
- %tmp14956 = getelementptr inbounds float* %tmp14955, i64 1
- %tmp14957 = getelementptr inbounds float* %tmp14956, i64 1
- %tmp14958 = getelementptr inbounds float* %tmp14957, i64 1
- %tmp14959 = getelementptr inbounds float* %tmp14958, i64 1
- %tmp14960 = getelementptr inbounds float* %tmp14959, i64 1
- %tmp14961 = getelementptr inbounds float* %tmp14960, i64 1
- %tmp14962 = getelementptr inbounds float* %tmp14961, i64 1
- %tmp14963 = getelementptr inbounds float* %tmp14962, i64 1
- %tmp14964 = getelementptr inbounds float* %tmp14963, i64 1
- %tmp14965 = getelementptr inbounds float* %tmp14964, i64 1
- %tmp14966 = getelementptr inbounds float* %tmp14965, i64 1
- %tmp14967 = getelementptr inbounds float* %tmp14966, i64 1
- %tmp14968 = getelementptr inbounds float* %tmp14967, i64 1
- %tmp14969 = getelementptr inbounds float* %tmp14968, i64 1
- %tmp14970 = getelementptr inbounds float* %tmp14969, i64 1
- %tmp14971 = getelementptr inbounds float* %tmp14970, i64 1
- %tmp14972 = getelementptr inbounds float* %tmp14971, i64 1
- %tmp14973 = getelementptr inbounds float* %tmp14972, i64 1
- %tmp14974 = getelementptr inbounds float* %tmp14973, i64 1
- %tmp14975 = getelementptr inbounds float* %tmp14974, i64 1
- %tmp14976 = getelementptr inbounds float* %tmp14975, i64 1
- %tmp14977 = getelementptr inbounds float* %tmp14976, i64 1
- %tmp14978 = getelementptr inbounds float* %tmp14977, i64 1
- %tmp14979 = getelementptr inbounds float* %tmp14978, i64 1
- %tmp14980 = getelementptr inbounds float* %tmp14979, i64 1
- %tmp14981 = getelementptr inbounds float* %tmp14980, i64 1
- %tmp14982 = getelementptr inbounds float* %tmp14981, i64 1
- %tmp14983 = getelementptr inbounds float* %tmp14982, i64 1
- %tmp14984 = getelementptr inbounds float* %tmp14983, i64 1
- %tmp14985 = getelementptr inbounds float* %tmp14984, i64 1
- %tmp14986 = getelementptr inbounds float* %tmp14985, i64 1
- %tmp14987 = getelementptr inbounds float* %tmp14986, i64 1
- %tmp14988 = getelementptr inbounds float* %tmp14987, i64 1
- %tmp14989 = getelementptr inbounds float* %tmp14988, i64 1
- %tmp14990 = getelementptr inbounds float* %tmp14989, i64 1
- %tmp14991 = getelementptr inbounds float* %tmp14990, i64 1
- %tmp14992 = getelementptr inbounds float* %tmp14991, i64 1
- %tmp14993 = getelementptr inbounds float* %tmp14992, i64 1
- %tmp14994 = getelementptr inbounds float* %tmp14993, i64 1
- %tmp14995 = getelementptr inbounds float* %tmp14994, i64 1
- %tmp14996 = getelementptr inbounds float* %tmp14995, i64 1
- %tmp14997 = getelementptr inbounds float* %tmp14996, i64 1
- %tmp14998 = getelementptr inbounds float* %tmp14997, i64 1
- %tmp14999 = getelementptr inbounds float* %tmp14998, i64 1
- %tmp15000 = getelementptr inbounds float* %tmp14999, i64 1
- %tmp15001 = getelementptr inbounds float* %tmp15000, i64 1
- %tmp15002 = getelementptr inbounds float* %tmp15001, i64 1
- %tmp15003 = getelementptr inbounds float* %tmp15002, i64 1
- %tmp15004 = getelementptr inbounds float* %tmp15003, i64 1
- %tmp15005 = getelementptr inbounds float* %tmp15004, i64 1
- %tmp15006 = getelementptr inbounds float* %tmp15005, i64 1
- %tmp15007 = getelementptr inbounds float* %tmp15006, i64 1
- %tmp15008 = getelementptr inbounds float* %tmp15007, i64 1
- %tmp15009 = getelementptr inbounds float* %tmp15008, i64 1
- %tmp15010 = getelementptr inbounds float* %tmp15009, i64 1
- %tmp15011 = getelementptr inbounds float* %tmp15010, i64 1
- %tmp15012 = getelementptr inbounds float* %tmp15011, i64 1
- %tmp15013 = getelementptr inbounds float* %tmp15012, i64 1
- %tmp15014 = getelementptr inbounds float* %tmp15013, i64 1
- %tmp15015 = getelementptr inbounds float* %tmp15014, i64 1
- %tmp15016 = getelementptr inbounds float* %tmp15015, i64 1
- %tmp15017 = getelementptr inbounds float* %tmp15016, i64 1
- %tmp15018 = getelementptr inbounds float* %tmp15017, i64 1
- %tmp15019 = getelementptr inbounds float* %tmp15018, i64 1
- %tmp15020 = getelementptr inbounds float* %tmp15019, i64 1
- %tmp15021 = getelementptr inbounds float* %tmp15020, i64 1
- %tmp15022 = getelementptr inbounds float* %tmp15021, i64 1
- %tmp15023 = getelementptr inbounds float* %tmp15022, i64 1
- %tmp15024 = getelementptr inbounds float* %tmp15023, i64 1
- %tmp15025 = getelementptr inbounds float* %tmp15024, i64 1
- %tmp15026 = getelementptr inbounds float* %tmp15025, i64 1
- %tmp15027 = getelementptr inbounds float* %tmp15026, i64 1
- %tmp15028 = getelementptr inbounds float* %tmp15027, i64 1
- %tmp15029 = getelementptr inbounds float* %tmp15028, i64 1
- %tmp15030 = getelementptr inbounds float* %tmp15029, i64 1
- %tmp15031 = getelementptr inbounds float* %tmp15030, i64 1
- %tmp15032 = getelementptr inbounds float* %tmp15031, i64 1
- %tmp15033 = getelementptr inbounds float* %tmp15032, i64 1
- %tmp15034 = getelementptr inbounds float* %tmp15033, i64 1
- %tmp15035 = getelementptr inbounds float* %tmp15034, i64 1
- %tmp15036 = getelementptr inbounds float* %tmp15035, i64 1
- %tmp15037 = getelementptr inbounds float* %tmp15036, i64 1
- %tmp15038 = getelementptr inbounds float* %tmp15037, i64 1
- %tmp15039 = getelementptr inbounds float* %tmp15038, i64 1
- %tmp15040 = getelementptr inbounds float* %tmp15039, i64 1
- %tmp15041 = getelementptr inbounds float* %tmp15040, i64 1
- %tmp15042 = getelementptr inbounds float* %tmp15041, i64 1
- %tmp15043 = getelementptr inbounds float* %tmp15042, i64 1
- %tmp15044 = getelementptr inbounds float* %tmp15043, i64 1
- %tmp15045 = getelementptr inbounds float* %tmp15044, i64 1
- %tmp15046 = getelementptr inbounds float* %tmp15045, i64 1
- %tmp15047 = getelementptr inbounds float* %tmp15046, i64 1
- %tmp15048 = getelementptr inbounds float* %tmp15047, i64 1
- %tmp15049 = getelementptr inbounds float* %tmp15048, i64 1
- %tmp15050 = getelementptr inbounds float* %tmp15049, i64 1
- %tmp15051 = getelementptr inbounds float* %tmp15050, i64 1
- %tmp15052 = getelementptr inbounds float* %tmp15051, i64 1
- %tmp15053 = getelementptr inbounds float* %tmp15052, i64 1
- %tmp15054 = getelementptr inbounds float* %tmp15053, i64 1
- %tmp15055 = getelementptr inbounds float* %tmp15054, i64 1
- %tmp15056 = getelementptr inbounds float* %tmp15055, i64 1
- %tmp15057 = getelementptr inbounds float* %tmp15056, i64 1
- %tmp15058 = getelementptr inbounds float* %tmp15057, i64 1
- %tmp15059 = getelementptr inbounds float* %tmp15058, i64 1
- %tmp15060 = getelementptr inbounds float* %tmp15059, i64 1
- %tmp15061 = getelementptr inbounds float* %tmp15060, i64 1
- %tmp15062 = getelementptr inbounds float* %tmp15061, i64 1
- %tmp15063 = getelementptr inbounds float* %tmp15062, i64 1
- %tmp15064 = getelementptr inbounds float* %tmp15063, i64 1
- %tmp15065 = getelementptr inbounds float* %tmp15064, i64 1
- %tmp15066 = getelementptr inbounds float* %tmp15065, i64 1
- %tmp15067 = getelementptr inbounds float* %tmp15066, i64 1
- %tmp15068 = getelementptr inbounds float* %tmp15067, i64 1
- %tmp15069 = getelementptr inbounds float* %tmp15068, i64 1
- %tmp15070 = getelementptr inbounds float* %tmp15069, i64 1
- %tmp15071 = getelementptr inbounds float* %tmp15070, i64 1
- %tmp15072 = getelementptr inbounds float* %tmp15071, i64 1
- %tmp15073 = getelementptr inbounds float* %tmp15072, i64 1
- %tmp15074 = getelementptr inbounds float* %tmp15073, i64 1
- %tmp15075 = getelementptr inbounds float* %tmp15074, i64 1
- %tmp15076 = getelementptr inbounds float* %tmp15075, i64 1
- %tmp15077 = getelementptr inbounds float* %tmp15076, i64 1
- %tmp15078 = getelementptr inbounds float* %tmp15077, i64 1
- %tmp15079 = getelementptr inbounds float* %tmp15078, i64 1
- %tmp15080 = getelementptr inbounds float* %tmp15079, i64 1
- %tmp15081 = getelementptr inbounds float* %tmp15080, i64 1
- %tmp15082 = getelementptr inbounds float* %tmp15081, i64 1
- %tmp15083 = getelementptr inbounds float* %tmp15082, i64 1
- %tmp15084 = getelementptr inbounds float* %tmp15083, i64 1
- %tmp15085 = getelementptr inbounds float* %tmp15084, i64 1
- %tmp15086 = getelementptr inbounds float* %tmp15085, i64 1
- %tmp15087 = getelementptr inbounds float* %tmp15086, i64 1
- %tmp15088 = getelementptr inbounds float* %tmp15087, i64 1
- %tmp15089 = getelementptr inbounds float* %tmp15088, i64 1
- %tmp15090 = getelementptr inbounds float* %tmp15089, i64 1
- %tmp15091 = getelementptr inbounds float* %tmp15090, i64 1
- %tmp15092 = getelementptr inbounds float* %tmp15091, i64 1
- %tmp15093 = getelementptr inbounds float* %tmp15092, i64 1
- %tmp15094 = getelementptr inbounds float* %tmp15093, i64 1
- %tmp15095 = getelementptr inbounds float* %tmp15094, i64 1
- %tmp15096 = getelementptr inbounds float* %tmp15095, i64 1
- %tmp15097 = getelementptr inbounds float* %tmp15096, i64 1
- %tmp15098 = getelementptr inbounds float* %tmp15097, i64 1
- %tmp15099 = getelementptr inbounds float* %tmp15098, i64 1
- %tmp15100 = getelementptr inbounds float* %tmp15099, i64 1
- %tmp15101 = getelementptr inbounds float* %tmp15100, i64 1
- %tmp15102 = getelementptr inbounds float* %tmp15101, i64 1
- %tmp15103 = getelementptr inbounds float* %tmp15102, i64 1
- %tmp15104 = getelementptr inbounds float* %tmp15103, i64 1
- %tmp15105 = getelementptr inbounds float* %tmp15104, i64 1
- %tmp15106 = getelementptr inbounds float* %tmp15105, i64 1
- %tmp15107 = getelementptr inbounds float* %tmp15106, i64 1
- %tmp15108 = getelementptr inbounds float* %tmp15107, i64 1
- %tmp15109 = getelementptr inbounds float* %tmp15108, i64 1
- %tmp15110 = getelementptr inbounds float* %tmp15109, i64 1
- %tmp15111 = getelementptr inbounds float* %tmp15110, i64 1
- %tmp15112 = getelementptr inbounds float* %tmp15111, i64 1
- %tmp15113 = getelementptr inbounds float* %tmp15112, i64 1
- %tmp15114 = getelementptr inbounds float* %tmp15113, i64 1
- %tmp15115 = getelementptr inbounds float* %tmp15114, i64 1
- %tmp15116 = getelementptr inbounds float* %tmp15115, i64 1
- %tmp15117 = getelementptr inbounds float* %tmp15116, i64 1
- %tmp15118 = getelementptr inbounds float* %tmp15117, i64 1
- %tmp15119 = getelementptr inbounds float* %tmp15118, i64 1
- %tmp15120 = getelementptr inbounds float* %tmp15119, i64 1
- %tmp15121 = getelementptr inbounds float* %tmp15120, i64 1
- %tmp15122 = getelementptr inbounds float* %tmp15121, i64 1
- %tmp15123 = getelementptr inbounds float* %tmp15122, i64 1
- %tmp15124 = getelementptr inbounds float* %tmp15123, i64 1
- %tmp15125 = getelementptr inbounds float* %tmp15124, i64 1
- %tmp15126 = getelementptr inbounds float* %tmp15125, i64 1
- %tmp15127 = getelementptr inbounds float* %tmp15126, i64 1
- %tmp15128 = getelementptr inbounds float* %tmp15127, i64 1
- %tmp15129 = getelementptr inbounds float* %tmp15128, i64 1
- %tmp15130 = getelementptr inbounds float* %tmp15129, i64 1
- %tmp15131 = getelementptr inbounds float* %tmp15130, i64 1
- %tmp15132 = getelementptr inbounds float* %tmp15131, i64 1
- %tmp15133 = getelementptr inbounds float* %tmp15132, i64 1
- %tmp15134 = getelementptr inbounds float* %tmp15133, i64 1
- %tmp15135 = getelementptr inbounds float* %tmp15134, i64 1
- %tmp15136 = getelementptr inbounds float* %tmp15135, i64 1
- %tmp15137 = getelementptr inbounds float* %tmp15136, i64 1
- %tmp15138 = getelementptr inbounds float* %tmp15137, i64 1
- %tmp15139 = getelementptr inbounds float* %tmp15138, i64 1
- %tmp15140 = getelementptr inbounds float* %tmp15139, i64 1
- %tmp15141 = getelementptr inbounds float* %tmp15140, i64 1
- %tmp15142 = getelementptr inbounds float* %tmp15141, i64 1
- %tmp15143 = getelementptr inbounds float* %tmp15142, i64 1
- %tmp15144 = getelementptr inbounds float* %tmp15143, i64 1
- %tmp15145 = getelementptr inbounds float* %tmp15144, i64 1
- %tmp15146 = getelementptr inbounds float* %tmp15145, i64 1
- %tmp15147 = getelementptr inbounds float* %tmp15146, i64 1
- %tmp15148 = getelementptr inbounds float* %tmp15147, i64 1
- %tmp15149 = getelementptr inbounds float* %tmp15148, i64 1
- %tmp15150 = getelementptr inbounds float* %tmp15149, i64 1
- %tmp15151 = getelementptr inbounds float* %tmp15150, i64 1
- %tmp15152 = getelementptr inbounds float* %tmp15151, i64 1
- %tmp15153 = getelementptr inbounds float* %tmp15152, i64 1
- %tmp15154 = getelementptr inbounds float* %tmp15153, i64 1
- %tmp15155 = getelementptr inbounds float* %tmp15154, i64 1
- %tmp15156 = getelementptr inbounds float* %tmp15155, i64 1
- %tmp15157 = getelementptr inbounds float* %tmp15156, i64 1
- %tmp15158 = getelementptr inbounds float* %tmp15157, i64 1
- %tmp15159 = getelementptr inbounds float* %tmp15158, i64 1
- %tmp15160 = getelementptr inbounds float* %tmp15159, i64 1
- %tmp15161 = getelementptr inbounds float* %tmp15160, i64 1
- %tmp15162 = getelementptr inbounds float* %tmp15161, i64 1
- %tmp15163 = getelementptr inbounds float* %tmp15162, i64 1
- %tmp15164 = getelementptr inbounds float* %tmp15163, i64 1
- %tmp15165 = getelementptr inbounds float* %tmp15164, i64 1
- %tmp15166 = getelementptr inbounds float* %tmp15165, i64 1
- %tmp15167 = getelementptr inbounds float* %tmp15166, i64 1
- %tmp15168 = getelementptr inbounds float* %tmp15167, i64 1
- %tmp15169 = getelementptr inbounds float* %tmp15168, i64 1
- %tmp15170 = getelementptr inbounds float* %tmp15169, i64 1
- %tmp15171 = getelementptr inbounds float* %tmp15170, i64 1
- %tmp15172 = getelementptr inbounds float* %tmp15171, i64 1
- %tmp15173 = getelementptr inbounds float* %tmp15172, i64 1
- %tmp15174 = getelementptr inbounds float* %tmp15173, i64 1
- %tmp15175 = getelementptr inbounds float* %tmp15174, i64 1
- %tmp15176 = getelementptr inbounds float* %tmp15175, i64 1
- %tmp15177 = getelementptr inbounds float* %tmp15176, i64 1
- %tmp15178 = getelementptr inbounds float* %tmp15177, i64 1
- %tmp15179 = getelementptr inbounds float* %tmp15178, i64 1
- %tmp15180 = getelementptr inbounds float* %tmp15179, i64 1
- %tmp15181 = getelementptr inbounds float* %tmp15180, i64 1
- %tmp15182 = getelementptr inbounds float* %tmp15181, i64 1
- %tmp15183 = getelementptr inbounds float* %tmp15182, i64 1
- %tmp15184 = getelementptr inbounds float* %tmp15183, i64 1
- %tmp15185 = getelementptr inbounds float* %tmp15184, i64 1
- %tmp15186 = getelementptr inbounds float* %tmp15185, i64 1
- %tmp15187 = getelementptr inbounds float* %tmp15186, i64 1
- %tmp15188 = getelementptr inbounds float* %tmp15187, i64 1
- %tmp15189 = getelementptr inbounds float* %tmp15188, i64 1
- %tmp15190 = getelementptr inbounds float* %tmp15189, i64 1
- %tmp15191 = getelementptr inbounds float* %tmp15190, i64 1
- %tmp15192 = getelementptr inbounds float* %tmp15191, i64 1
- %tmp15193 = getelementptr inbounds float* %tmp15192, i64 1
- %tmp15194 = getelementptr inbounds float* %tmp15193, i64 1
- %tmp15195 = getelementptr inbounds float* %tmp15194, i64 1
- %tmp15196 = getelementptr inbounds float* %tmp15195, i64 1
- %tmp15197 = getelementptr inbounds float* %tmp15196, i64 1
- %tmp15198 = getelementptr inbounds float* %tmp15197, i64 1
- %tmp15199 = getelementptr inbounds float* %tmp15198, i64 1
- %tmp15200 = getelementptr inbounds float* %tmp15199, i64 1
- %tmp15201 = getelementptr inbounds float* %tmp15200, i64 1
- %tmp15202 = getelementptr inbounds float* %tmp15201, i64 1
- %tmp15203 = getelementptr inbounds float* %tmp15202, i64 1
- %tmp15204 = getelementptr inbounds float* %tmp15203, i64 1
- %tmp15205 = getelementptr inbounds float* %tmp15204, i64 1
- %tmp15206 = getelementptr inbounds float* %tmp15205, i64 1
- %tmp15207 = getelementptr inbounds float* %tmp15206, i64 1
- %tmp15208 = getelementptr inbounds float* %tmp15207, i64 1
- %tmp15209 = getelementptr inbounds float* %tmp15208, i64 1
- %tmp15210 = getelementptr inbounds float* %tmp15209, i64 1
- %tmp15211 = getelementptr inbounds float* %tmp15210, i64 1
- %tmp15212 = getelementptr inbounds float* %tmp15211, i64 1
- %tmp15213 = getelementptr inbounds float* %tmp15212, i64 1
- %tmp15214 = getelementptr inbounds float* %tmp15213, i64 1
- %tmp15215 = getelementptr inbounds float* %tmp15214, i64 1
- %tmp15216 = getelementptr inbounds float* %tmp15215, i64 1
- %tmp15217 = getelementptr inbounds float* %tmp15216, i64 1
- %tmp15218 = getelementptr inbounds float* %tmp15217, i64 1
- %tmp15219 = getelementptr inbounds float* %tmp15218, i64 1
- %tmp15220 = getelementptr inbounds float* %tmp15219, i64 1
- %tmp15221 = getelementptr inbounds float* %tmp15220, i64 1
- %tmp15222 = getelementptr inbounds float* %tmp15221, i64 1
- %tmp15223 = getelementptr inbounds float* %tmp15222, i64 1
- %tmp15224 = getelementptr inbounds float* %tmp15223, i64 1
- %tmp15225 = getelementptr inbounds float* %tmp15224, i64 1
- %tmp15226 = getelementptr inbounds float* %tmp15225, i64 1
- %tmp15227 = getelementptr inbounds float* %tmp15226, i64 1
- %tmp15228 = getelementptr inbounds float* %tmp15227, i64 1
- %tmp15229 = getelementptr inbounds float* %tmp15228, i64 1
- %tmp15230 = getelementptr inbounds float* %tmp15229, i64 1
- %tmp15231 = getelementptr inbounds float* %tmp15230, i64 1
- %tmp15232 = getelementptr inbounds float* %tmp15231, i64 1
- %tmp15233 = getelementptr inbounds float* %tmp15232, i64 1
- %tmp15234 = getelementptr inbounds float* %tmp15233, i64 1
- %tmp15235 = getelementptr inbounds float* %tmp15234, i64 1
- %tmp15236 = getelementptr inbounds float* %tmp15235, i64 1
- %tmp15237 = getelementptr inbounds float* %tmp15236, i64 1
- %tmp15238 = getelementptr inbounds float* %tmp15237, i64 1
- %tmp15239 = getelementptr inbounds float* %tmp15238, i64 1
- %tmp15240 = getelementptr inbounds float* %tmp15239, i64 1
- %tmp15241 = getelementptr inbounds float* %tmp15240, i64 1
- %tmp15242 = getelementptr inbounds float* %tmp15241, i64 1
- %tmp15243 = getelementptr inbounds float* %tmp15242, i64 1
- %tmp15244 = getelementptr inbounds float* %tmp15243, i64 1
- %tmp15245 = getelementptr inbounds float* %tmp15244, i64 1
- %tmp15246 = getelementptr inbounds float* %tmp15245, i64 1
- %tmp15247 = getelementptr inbounds float* %tmp15246, i64 1
- %tmp15248 = getelementptr inbounds float* %tmp15247, i64 1
- %tmp15249 = getelementptr inbounds float* %tmp15248, i64 1
- %tmp15250 = getelementptr inbounds float* %tmp15249, i64 1
- %tmp15251 = getelementptr inbounds float* %tmp15250, i64 1
- %tmp15252 = getelementptr inbounds float* %tmp15251, i64 1
- %tmp15253 = getelementptr inbounds float* %tmp15252, i64 1
- %tmp15254 = getelementptr inbounds float* %tmp15253, i64 1
- %tmp15255 = getelementptr inbounds float* %tmp15254, i64 1
- %tmp15256 = getelementptr inbounds float* %tmp15255, i64 1
- %tmp15257 = getelementptr inbounds float* %tmp15256, i64 1
- %tmp15258 = getelementptr inbounds float* %tmp15257, i64 1
- %tmp15259 = getelementptr inbounds float* %tmp15258, i64 1
- %tmp15260 = getelementptr inbounds float* %tmp15259, i64 1
- %tmp15261 = getelementptr inbounds float* %tmp15260, i64 1
- %tmp15262 = getelementptr inbounds float* %tmp15261, i64 1
- %tmp15263 = getelementptr inbounds float* %tmp15262, i64 1
- %tmp15264 = getelementptr inbounds float* %tmp15263, i64 1
- %tmp15265 = getelementptr inbounds float* %tmp15264, i64 1
- %tmp15266 = getelementptr inbounds float* %tmp15265, i64 1
- %tmp15267 = getelementptr inbounds float* %tmp15266, i64 1
- %tmp15268 = getelementptr inbounds float* %tmp15267, i64 1
- %tmp15269 = getelementptr inbounds float* %tmp15268, i64 1
- %tmp15270 = getelementptr inbounds float* %tmp15269, i64 1
- %tmp15271 = getelementptr inbounds float* %tmp15270, i64 1
- %tmp15272 = getelementptr inbounds float* %tmp15271, i64 1
- %tmp15273 = getelementptr inbounds float* %tmp15272, i64 1
- %tmp15274 = getelementptr inbounds float* %tmp15273, i64 1
- %tmp15275 = getelementptr inbounds float* %tmp15274, i64 1
- %tmp15276 = getelementptr inbounds float* %tmp15275, i64 1
- %tmp15277 = getelementptr inbounds float* %tmp15276, i64 1
- %tmp15278 = getelementptr inbounds float* %tmp15277, i64 1
- %tmp15279 = getelementptr inbounds float* %tmp15278, i64 1
- %tmp15280 = getelementptr inbounds float* %tmp15279, i64 1
- %tmp15281 = getelementptr inbounds float* %tmp15280, i64 1
- %tmp15282 = getelementptr inbounds float* %tmp15281, i64 1
- %tmp15283 = getelementptr inbounds float* %tmp15282, i64 1
- %tmp15284 = getelementptr inbounds float* %tmp15283, i64 1
- %tmp15285 = getelementptr inbounds float* %tmp15284, i64 1
- %tmp15286 = getelementptr inbounds float* %tmp15285, i64 1
- %tmp15287 = getelementptr inbounds float* %tmp15286, i64 1
- %tmp15288 = getelementptr inbounds float* %tmp15287, i64 1
- %tmp15289 = getelementptr inbounds float* %tmp15288, i64 1
- %tmp15290 = getelementptr inbounds float* %tmp15289, i64 1
- %tmp15291 = getelementptr inbounds float* %tmp15290, i64 1
- %tmp15292 = getelementptr inbounds float* %tmp15291, i64 1
- %tmp15293 = getelementptr inbounds float* %tmp15292, i64 1
- %tmp15294 = getelementptr inbounds float* %tmp15293, i64 1
- %tmp15295 = getelementptr inbounds float* %tmp15294, i64 1
- %tmp15296 = getelementptr inbounds float* %tmp15295, i64 1
- %tmp15297 = getelementptr inbounds float* %tmp15296, i64 1
- %tmp15298 = getelementptr inbounds float* %tmp15297, i64 1
- %tmp15299 = getelementptr inbounds float* %tmp15298, i64 1
- %tmp15300 = getelementptr inbounds float* %tmp15299, i64 1
- %tmp15301 = getelementptr inbounds float* %tmp15300, i64 1
- %tmp15302 = getelementptr inbounds float* %tmp15301, i64 1
- %tmp15303 = getelementptr inbounds float* %tmp15302, i64 1
- %tmp15304 = getelementptr inbounds float* %tmp15303, i64 1
- %tmp15305 = getelementptr inbounds float* %tmp15304, i64 1
- %tmp15306 = getelementptr inbounds float* %tmp15305, i64 1
- %tmp15307 = getelementptr inbounds float* %tmp15306, i64 1
- %tmp15308 = getelementptr inbounds float* %tmp15307, i64 1
- %tmp15309 = getelementptr inbounds float* %tmp15308, i64 1
- %tmp15310 = getelementptr inbounds float* %tmp15309, i64 1
- %tmp15311 = getelementptr inbounds float* %tmp15310, i64 1
- %tmp15312 = getelementptr inbounds float* %tmp15311, i64 1
- %tmp15313 = getelementptr inbounds float* %tmp15312, i64 1
- %tmp15314 = getelementptr inbounds float* %tmp15313, i64 1
- %tmp15315 = getelementptr inbounds float* %tmp15314, i64 1
- %tmp15316 = getelementptr inbounds float* %tmp15315, i64 1
- %tmp15317 = getelementptr inbounds float* %tmp15316, i64 1
- %tmp15318 = getelementptr inbounds float* %tmp15317, i64 1
- %tmp15319 = getelementptr inbounds float* %tmp15318, i64 1
- %tmp15320 = getelementptr inbounds float* %tmp15319, i64 1
- %tmp15321 = getelementptr inbounds float* %tmp15320, i64 1
- %tmp15322 = getelementptr inbounds float* %tmp15321, i64 1
- %tmp15323 = getelementptr inbounds float* %tmp15322, i64 1
- %tmp15324 = getelementptr inbounds float* %tmp15323, i64 1
- %tmp15325 = getelementptr inbounds float* %tmp15324, i64 1
- %tmp15326 = getelementptr inbounds float* %tmp15325, i64 1
- %tmp15327 = getelementptr inbounds float* %tmp15326, i64 1
- %tmp15328 = getelementptr inbounds float* %tmp15327, i64 1
- %tmp15329 = getelementptr inbounds float* %tmp15328, i64 1
- %tmp15330 = getelementptr inbounds float* %tmp15329, i64 1
- %tmp15331 = getelementptr inbounds float* %tmp15330, i64 1
- %tmp15332 = getelementptr inbounds float* %tmp15331, i64 1
- %tmp15333 = getelementptr inbounds float* %tmp15332, i64 1
- %tmp15334 = getelementptr inbounds float* %tmp15333, i64 1
- %tmp15335 = getelementptr inbounds float* %tmp15334, i64 1
- %tmp15336 = getelementptr inbounds float* %tmp15335, i64 1
- %tmp15337 = getelementptr inbounds float* %tmp15336, i64 1
- %tmp15338 = getelementptr inbounds float* %tmp15337, i64 1
- %tmp15339 = getelementptr inbounds float* %tmp15338, i64 1
- %tmp15340 = getelementptr inbounds float* %tmp15339, i64 1
- %tmp15341 = getelementptr inbounds float* %tmp15340, i64 1
- %tmp15342 = getelementptr inbounds float* %tmp15341, i64 1
- %tmp15343 = getelementptr inbounds float* %tmp15342, i64 1
- %tmp15344 = getelementptr inbounds float* %tmp15343, i64 1
- %tmp15345 = getelementptr inbounds float* %tmp15344, i64 1
- %tmp15346 = getelementptr inbounds float* %tmp15345, i64 1
- %tmp15347 = getelementptr inbounds float* %tmp15346, i64 1
- %tmp15348 = getelementptr inbounds float* %tmp15347, i64 1
- %tmp15349 = getelementptr inbounds float* %tmp15348, i64 1
- %tmp15350 = getelementptr inbounds float* %tmp15349, i64 1
- %tmp15351 = getelementptr inbounds float* %tmp15350, i64 1
- %tmp15352 = getelementptr inbounds float* %tmp15351, i64 1
- %tmp15353 = getelementptr inbounds float* %tmp15352, i64 1
- %tmp15354 = getelementptr inbounds float* %tmp15353, i64 1
- %tmp15355 = getelementptr inbounds float* %tmp15354, i64 1
- %tmp15356 = getelementptr inbounds float* %tmp15355, i64 1
- %tmp15357 = getelementptr inbounds float* %tmp15356, i64 1
- %tmp15358 = getelementptr inbounds float* %tmp15357, i64 1
- %tmp15359 = getelementptr inbounds float* %tmp15358, i64 1
- %tmp15360 = getelementptr inbounds float* %tmp15359, i64 1
- %tmp15361 = getelementptr inbounds float* %tmp15360, i64 1
- %tmp15362 = getelementptr inbounds float* %tmp15361, i64 1
- %tmp15363 = getelementptr inbounds float* %tmp15362, i64 1
- %tmp15364 = getelementptr inbounds float* %tmp15363, i64 1
- %tmp15365 = getelementptr inbounds float* %tmp15364, i64 1
- %tmp15366 = getelementptr inbounds float* %tmp15365, i64 1
- %tmp15367 = getelementptr inbounds float* %tmp15366, i64 1
- %tmp15368 = getelementptr inbounds float* %tmp15367, i64 1
- %tmp15369 = getelementptr inbounds float* %tmp15368, i64 1
- %tmp15370 = getelementptr inbounds float* %tmp15369, i64 1
- %tmp15371 = getelementptr inbounds float* %tmp15370, i64 1
- %tmp15372 = getelementptr inbounds float* %tmp15371, i64 1
- %tmp15373 = getelementptr inbounds float* %tmp15372, i64 1
- %tmp15374 = getelementptr inbounds float* %tmp15373, i64 1
- %tmp15375 = getelementptr inbounds float* %tmp15374, i64 1
- %tmp15376 = getelementptr inbounds float* %tmp15375, i64 1
- %tmp15377 = getelementptr inbounds float* %tmp15376, i64 1
- %tmp15378 = getelementptr inbounds float* %tmp15377, i64 1
- %tmp15379 = getelementptr inbounds float* %tmp15378, i64 1
- %tmp15380 = getelementptr inbounds float* %tmp15379, i64 1
- %tmp15381 = getelementptr inbounds float* %tmp15380, i64 1
- %tmp15382 = getelementptr inbounds float* %tmp15381, i64 1
- %tmp15383 = getelementptr inbounds float* %tmp15382, i64 1
- %tmp15384 = getelementptr inbounds float* %tmp15383, i64 1
- %tmp15385 = getelementptr inbounds float* %tmp15384, i64 1
- %tmp15386 = getelementptr inbounds float* %tmp15385, i64 1
- %tmp15387 = getelementptr inbounds float* %tmp15386, i64 1
- %tmp15388 = getelementptr inbounds float* %tmp15387, i64 1
- %tmp15389 = getelementptr inbounds float* %tmp15388, i64 1
- %tmp15390 = getelementptr inbounds float* %tmp15389, i64 1
- %tmp15391 = getelementptr inbounds float* %tmp15390, i64 1
- %tmp15392 = getelementptr inbounds float* %tmp15391, i64 1
- %tmp15393 = getelementptr inbounds float* %tmp15392, i64 1
- %tmp15394 = getelementptr inbounds float* %tmp15393, i64 1
- %tmp15395 = getelementptr inbounds float* %tmp15394, i64 1
- %tmp15396 = getelementptr inbounds float* %tmp15395, i64 1
- %tmp15397 = getelementptr inbounds float* %tmp15396, i64 1
- %tmp15398 = getelementptr inbounds float* %tmp15397, i64 1
- %tmp15399 = getelementptr inbounds float* %tmp15398, i64 1
- %tmp15400 = getelementptr inbounds float* %tmp15399, i64 1
- %tmp15401 = getelementptr inbounds float* %tmp15400, i64 1
- %tmp15402 = getelementptr inbounds float* %tmp15401, i64 1
- %tmp15403 = getelementptr inbounds float* %tmp15402, i64 1
- %tmp15404 = getelementptr inbounds float* %tmp15403, i64 1
- %tmp15405 = getelementptr inbounds float* %tmp15404, i64 1
- %tmp15406 = getelementptr inbounds float* %tmp15405, i64 1
- %tmp15407 = getelementptr inbounds float* %tmp15406, i64 1
- %tmp15408 = getelementptr inbounds float* %tmp15407, i64 1
- %tmp15409 = getelementptr inbounds float* %tmp15408, i64 1
- %tmp15410 = getelementptr inbounds float* %tmp15409, i64 1
- %tmp15411 = getelementptr inbounds float* %tmp15410, i64 1
- %tmp15412 = getelementptr inbounds float* %tmp15411, i64 1
- %tmp15413 = getelementptr inbounds float* %tmp15412, i64 1
- %tmp15414 = getelementptr inbounds float* %tmp15413, i64 1
- %tmp15415 = getelementptr inbounds float* %tmp15414, i64 1
- %tmp15416 = getelementptr inbounds float* %tmp15415, i64 1
- %tmp15417 = getelementptr inbounds float* %tmp15416, i64 1
- %tmp15418 = getelementptr inbounds float* %tmp15417, i64 1
- %tmp15419 = getelementptr inbounds float* %tmp15418, i64 1
- %tmp15420 = getelementptr inbounds float* %tmp15419, i64 1
- %tmp15421 = getelementptr inbounds float* %tmp15420, i64 1
- %tmp15422 = getelementptr inbounds float* %tmp15421, i64 1
- %tmp15423 = getelementptr inbounds float* %tmp15422, i64 1
- %tmp15424 = getelementptr inbounds float* %tmp15423, i64 1
- %tmp15425 = getelementptr inbounds float* %tmp15424, i64 1
- %tmp15426 = getelementptr inbounds float* %tmp15425, i64 1
- %tmp15427 = getelementptr inbounds float* %tmp15426, i64 1
- %tmp15428 = getelementptr inbounds float* %tmp15427, i64 1
- %tmp15429 = getelementptr inbounds float* %tmp15428, i64 1
- %tmp15430 = getelementptr inbounds float* %tmp15429, i64 1
- %tmp15431 = getelementptr inbounds float* %tmp15430, i64 1
- %tmp15432 = getelementptr inbounds float* %tmp15431, i64 1
- %tmp15433 = getelementptr inbounds float* %tmp15432, i64 1
- %tmp15434 = getelementptr inbounds float* %tmp15433, i64 1
- %tmp15435 = getelementptr inbounds float* %tmp15434, i64 1
- %tmp15436 = getelementptr inbounds float* %tmp15435, i64 1
- %tmp15437 = getelementptr inbounds float* %tmp15436, i64 1
- %tmp15438 = getelementptr inbounds float* %tmp15437, i64 1
- %tmp15439 = getelementptr inbounds float* %tmp15438, i64 1
- %tmp15440 = getelementptr inbounds float* %tmp15439, i64 1
- %tmp15441 = getelementptr inbounds float* %tmp15440, i64 1
- %tmp15442 = getelementptr inbounds float* %tmp15441, i64 1
- %tmp15443 = getelementptr inbounds float* %tmp15442, i64 1
- %tmp15444 = getelementptr inbounds float* %tmp15443, i64 1
- %tmp15445 = getelementptr inbounds float* %tmp15444, i64 1
- %tmp15446 = getelementptr inbounds float* %tmp15445, i64 1
- %tmp15447 = getelementptr inbounds float* %tmp15446, i64 1
- %tmp15448 = getelementptr inbounds float* %tmp15447, i64 1
- %tmp15449 = getelementptr inbounds float* %tmp15448, i64 1
- %tmp15450 = getelementptr inbounds float* %tmp15449, i64 1
- %tmp15451 = getelementptr inbounds float* %tmp15450, i64 1
- %tmp15452 = getelementptr inbounds float* %tmp15451, i64 1
- %tmp15453 = getelementptr inbounds float* %tmp15452, i64 1
- %tmp15454 = getelementptr inbounds float* %tmp15453, i64 1
- %tmp15455 = getelementptr inbounds float* %tmp15454, i64 1
- %tmp15456 = getelementptr inbounds float* %tmp15455, i64 1
- %tmp15457 = getelementptr inbounds float* %tmp15456, i64 1
- %tmp15458 = getelementptr inbounds float* %tmp15457, i64 1
- %tmp15459 = getelementptr inbounds float* %tmp15458, i64 1
- %tmp15460 = getelementptr inbounds float* %tmp15459, i64 1
- %tmp15461 = getelementptr inbounds float* %tmp15460, i64 1
- %tmp15462 = getelementptr inbounds float* %tmp15461, i64 1
- %tmp15463 = getelementptr inbounds float* %tmp15462, i64 1
- %tmp15464 = getelementptr inbounds float* %tmp15463, i64 1
- %tmp15465 = getelementptr inbounds float* %tmp15464, i64 1
- %tmp15466 = getelementptr inbounds float* %tmp15465, i64 1
- %tmp15467 = getelementptr inbounds float* %tmp15466, i64 1
- %tmp15468 = getelementptr inbounds float* %tmp15467, i64 1
- %tmp15469 = getelementptr inbounds float* %tmp15468, i64 1
- %tmp15470 = getelementptr inbounds float* %tmp15469, i64 1
- %tmp15471 = getelementptr inbounds float* %tmp15470, i64 1
- %tmp15472 = getelementptr inbounds float* %tmp15471, i64 1
- %tmp15473 = getelementptr inbounds float* %tmp15472, i64 1
- %tmp15474 = getelementptr inbounds float* %tmp15473, i64 1
- %tmp15475 = getelementptr inbounds float* %tmp15474, i64 1
- %tmp15476 = getelementptr inbounds float* %tmp15475, i64 1
- %tmp15477 = getelementptr inbounds float* %tmp15476, i64 1
- %tmp15478 = getelementptr inbounds float* %tmp15477, i64 1
- %tmp15479 = getelementptr inbounds float* %tmp15478, i64 1
- %tmp15480 = getelementptr inbounds float* %tmp15479, i64 1
- %tmp15481 = getelementptr inbounds float* %tmp15480, i64 1
- %tmp15482 = getelementptr inbounds float* %tmp15481, i64 1
- %tmp15483 = getelementptr inbounds float* %tmp15482, i64 1
- %tmp15484 = getelementptr inbounds float* %tmp15483, i64 1
- %tmp15485 = getelementptr inbounds float* %tmp15484, i64 1
- %tmp15486 = getelementptr inbounds float* %tmp15485, i64 1
- %tmp15487 = getelementptr inbounds float* %tmp15486, i64 1
- %tmp15488 = getelementptr inbounds float* %tmp15487, i64 1
- %tmp15489 = getelementptr inbounds float* %tmp15488, i64 1
- %tmp15490 = getelementptr inbounds float* %tmp15489, i64 1
- %tmp15491 = getelementptr inbounds float* %tmp15490, i64 1
- %tmp15492 = getelementptr inbounds float* %tmp15491, i64 1
- %tmp15493 = getelementptr inbounds float* %tmp15492, i64 1
- %tmp15494 = getelementptr inbounds float* %tmp15493, i64 1
- %tmp15495 = getelementptr inbounds float* %tmp15494, i64 1
- %tmp15496 = getelementptr inbounds float* %tmp15495, i64 1
- %tmp15497 = getelementptr inbounds float* %tmp15496, i64 1
- %tmp15498 = getelementptr inbounds float* %tmp15497, i64 1
- %tmp15499 = getelementptr inbounds float* %tmp15498, i64 1
- %tmp15500 = getelementptr inbounds float* %tmp15499, i64 1
- %tmp15501 = getelementptr inbounds float* %tmp15500, i64 1
- %tmp15502 = getelementptr inbounds float* %tmp15501, i64 1
- %tmp15503 = getelementptr inbounds float* %tmp15502, i64 1
- %tmp15504 = getelementptr inbounds float* %tmp15503, i64 1
- %tmp15505 = getelementptr inbounds float* %tmp15504, i64 1
- %tmp15506 = getelementptr inbounds float* %tmp15505, i64 1
- %tmp15507 = getelementptr inbounds float* %tmp15506, i64 1
- %tmp15508 = getelementptr inbounds float* %tmp15507, i64 1
- %tmp15509 = getelementptr inbounds float* %tmp15508, i64 1
- %tmp15510 = getelementptr inbounds float* %tmp15509, i64 1
- %tmp15511 = getelementptr inbounds float* %tmp15510, i64 1
- %tmp15512 = getelementptr inbounds float* %tmp15511, i64 1
- %tmp15513 = getelementptr inbounds float* %tmp15512, i64 1
- %tmp15514 = getelementptr inbounds float* %tmp15513, i64 1
- %tmp15515 = getelementptr inbounds float* %tmp15514, i64 1
- %tmp15516 = getelementptr inbounds float* %tmp15515, i64 1
- %tmp15517 = getelementptr inbounds float* %tmp15516, i64 1
- %tmp15518 = getelementptr inbounds float* %tmp15517, i64 1
- %tmp15519 = getelementptr inbounds float* %tmp15518, i64 1
- %tmp15520 = getelementptr inbounds float* %tmp15519, i64 1
- %tmp15521 = getelementptr inbounds float* %tmp15520, i64 1
- %tmp15522 = getelementptr inbounds float* %tmp15521, i64 1
- %tmp15523 = getelementptr inbounds float* %tmp15522, i64 1
- %tmp15524 = getelementptr inbounds float* %tmp15523, i64 1
- %tmp15525 = getelementptr inbounds float* %tmp15524, i64 1
- %tmp15526 = getelementptr inbounds float* %tmp15525, i64 1
- %tmp15527 = getelementptr inbounds float* %tmp15526, i64 1
- %tmp15528 = getelementptr inbounds float* %tmp15527, i64 1
- %tmp15529 = getelementptr inbounds float* %tmp15528, i64 1
- %tmp15530 = getelementptr inbounds float* %tmp15529, i64 1
- %tmp15531 = getelementptr inbounds float* %tmp15530, i64 1
- %tmp15532 = getelementptr inbounds float* %tmp15531, i64 1
- %tmp15533 = getelementptr inbounds float* %tmp15532, i64 1
- %tmp15534 = getelementptr inbounds float* %tmp15533, i64 1
- %tmp15535 = getelementptr inbounds float* %tmp15534, i64 1
- %tmp15536 = getelementptr inbounds float* %tmp15535, i64 1
- %tmp15537 = getelementptr inbounds float* %tmp15536, i64 1
- %tmp15538 = getelementptr inbounds float* %tmp15537, i64 1
- %tmp15539 = getelementptr inbounds float* %tmp15538, i64 1
- %tmp15540 = getelementptr inbounds float* %tmp15539, i64 1
- %tmp15541 = getelementptr inbounds float* %tmp15540, i64 1
- %tmp15542 = getelementptr inbounds float* %tmp15541, i64 1
- %tmp15543 = getelementptr inbounds float* %tmp15542, i64 1
- %tmp15544 = getelementptr inbounds float* %tmp15543, i64 1
- %tmp15545 = getelementptr inbounds float* %tmp15544, i64 1
- %tmp15546 = getelementptr inbounds float* %tmp15545, i64 1
- %tmp15547 = getelementptr inbounds float* %tmp15546, i64 1
- %tmp15548 = getelementptr inbounds float* %tmp15547, i64 1
- %tmp15549 = getelementptr inbounds float* %tmp15548, i64 1
- %tmp15550 = getelementptr inbounds float* %tmp15549, i64 1
- %tmp15551 = getelementptr inbounds float* %tmp15550, i64 1
- %tmp15552 = getelementptr inbounds float* %tmp15551, i64 1
- %tmp15553 = getelementptr inbounds float* %tmp15552, i64 1
- %tmp15554 = getelementptr inbounds float* %tmp15553, i64 1
- %tmp15555 = getelementptr inbounds float* %tmp15554, i64 1
- %tmp15556 = getelementptr inbounds float* %tmp15555, i64 1
- %tmp15557 = getelementptr inbounds float* %tmp15556, i64 1
- %tmp15558 = getelementptr inbounds float* %tmp15557, i64 1
- %tmp15559 = getelementptr inbounds float* %tmp15558, i64 1
- %tmp15560 = getelementptr inbounds float* %tmp15559, i64 1
- %tmp15561 = getelementptr inbounds float* %tmp15560, i64 1
- %tmp15562 = getelementptr inbounds float* %tmp15561, i64 1
- %tmp15563 = getelementptr inbounds float* %tmp15562, i64 1
- %tmp15564 = getelementptr inbounds float* %tmp15563, i64 1
- %tmp15565 = getelementptr inbounds float* %tmp15564, i64 1
- %tmp15566 = getelementptr inbounds float* %tmp15565, i64 1
- %tmp15567 = getelementptr inbounds float* %tmp15566, i64 1
- %tmp15568 = getelementptr inbounds float* %tmp15567, i64 1
- %tmp15569 = getelementptr inbounds float* %tmp15568, i64 1
- %tmp15570 = getelementptr inbounds float* %tmp15569, i64 1
- %tmp15571 = getelementptr inbounds float* %tmp15570, i64 1
- %tmp15572 = getelementptr inbounds float* %tmp15571, i64 1
- %tmp15573 = getelementptr inbounds float* %tmp15572, i64 1
- %tmp15574 = getelementptr inbounds float* %tmp15573, i64 1
- %tmp15575 = getelementptr inbounds float* %tmp15574, i64 1
- %tmp15576 = getelementptr inbounds float* %tmp15575, i64 1
- %tmp15577 = getelementptr inbounds float* %tmp15576, i64 1
- %tmp15578 = getelementptr inbounds float* %tmp15577, i64 1
- %tmp15579 = getelementptr inbounds float* %tmp15578, i64 1
- %tmp15580 = getelementptr inbounds float* %tmp15579, i64 1
- %tmp15581 = getelementptr inbounds float* %tmp15580, i64 1
- %tmp15582 = getelementptr inbounds float* %tmp15581, i64 1
- %tmp15583 = getelementptr inbounds float* %tmp15582, i64 1
- %tmp15584 = getelementptr inbounds float* %tmp15583, i64 1
- %tmp15585 = getelementptr inbounds float* %tmp15584, i64 1
- %tmp15586 = getelementptr inbounds float* %tmp15585, i64 1
- %tmp15587 = getelementptr inbounds float* %tmp15586, i64 1
- %tmp15588 = getelementptr inbounds float* %tmp15587, i64 1
- %tmp15589 = getelementptr inbounds float* %tmp15588, i64 1
- %tmp15590 = getelementptr inbounds float* %tmp15589, i64 1
- %tmp15591 = getelementptr inbounds float* %tmp15590, i64 1
- %tmp15592 = getelementptr inbounds float* %tmp15591, i64 1
- %tmp15593 = getelementptr inbounds float* %tmp15592, i64 1
- %tmp15594 = getelementptr inbounds float* %tmp15593, i64 1
- %tmp15595 = getelementptr inbounds float* %tmp15594, i64 1
- %tmp15596 = getelementptr inbounds float* %tmp15595, i64 1
- %tmp15597 = getelementptr inbounds float* %tmp15596, i64 1
- %tmp15598 = getelementptr inbounds float* %tmp15597, i64 1
- %tmp15599 = getelementptr inbounds float* %tmp15598, i64 1
- %tmp15600 = getelementptr inbounds float* %tmp15599, i64 1
- %tmp15601 = getelementptr inbounds float* %tmp15600, i64 1
- %tmp15602 = getelementptr inbounds float* %tmp15601, i64 1
- %tmp15603 = getelementptr inbounds float* %tmp15602, i64 1
- %tmp15604 = getelementptr inbounds float* %tmp15603, i64 1
- %tmp15605 = getelementptr inbounds float* %tmp15604, i64 1
- %tmp15606 = getelementptr inbounds float* %tmp15605, i64 1
- %tmp15607 = getelementptr inbounds float* %tmp15606, i64 1
- %tmp15608 = getelementptr inbounds float* %tmp15607, i64 1
- %tmp15609 = getelementptr inbounds float* %tmp15608, i64 1
- %tmp15610 = getelementptr inbounds float* %tmp15609, i64 1
- %tmp15611 = getelementptr inbounds float* %tmp15610, i64 1
- %tmp15612 = getelementptr inbounds float* %tmp15611, i64 1
- %tmp15613 = getelementptr inbounds float* %tmp15612, i64 1
- %tmp15614 = getelementptr inbounds float* %tmp15613, i64 1
- %tmp15615 = getelementptr inbounds float* %tmp15614, i64 1
- %tmp15616 = getelementptr inbounds float* %tmp15615, i64 1
- %tmp15617 = getelementptr inbounds float* %tmp15616, i64 1
- %tmp15618 = getelementptr inbounds float* %tmp15617, i64 1
- %tmp15619 = getelementptr inbounds float* %tmp15618, i64 1
- %tmp15620 = getelementptr inbounds float* %tmp15619, i64 1
- %tmp15621 = getelementptr inbounds float* %tmp15620, i64 1
- %tmp15622 = getelementptr inbounds float* %tmp15621, i64 1
- %tmp15623 = getelementptr inbounds float* %tmp15622, i64 1
- %tmp15624 = getelementptr inbounds float* %tmp15623, i64 1
- %tmp15625 = getelementptr inbounds float* %tmp15624, i64 1
- %tmp15626 = getelementptr inbounds float* %tmp15625, i64 1
- %tmp15627 = getelementptr inbounds float* %tmp15626, i64 1
- %tmp15628 = getelementptr inbounds float* %tmp15627, i64 1
- %tmp15629 = getelementptr inbounds float* %tmp15628, i64 1
- %tmp15630 = getelementptr inbounds float* %tmp15629, i64 1
- %tmp15631 = getelementptr inbounds float* %tmp15630, i64 1
- %tmp15632 = getelementptr inbounds float* %tmp15631, i64 1
- %tmp15633 = getelementptr inbounds float* %tmp15632, i64 1
- %tmp15634 = getelementptr inbounds float* %tmp15633, i64 1
- %tmp15635 = getelementptr inbounds float* %tmp15634, i64 1
- %tmp15636 = getelementptr inbounds float* %tmp15635, i64 1
- %tmp15637 = getelementptr inbounds float* %tmp15636, i64 1
- %tmp15638 = getelementptr inbounds float* %tmp15637, i64 1
- %tmp15639 = getelementptr inbounds float* %tmp15638, i64 1
- %tmp15640 = getelementptr inbounds float* %tmp15639, i64 1
- %tmp15641 = getelementptr inbounds float* %tmp15640, i64 1
- %tmp15642 = getelementptr inbounds float* %tmp15641, i64 1
- %tmp15643 = getelementptr inbounds float* %tmp15642, i64 1
- %tmp15644 = getelementptr inbounds float* %tmp15643, i64 1
- %tmp15645 = getelementptr inbounds float* %tmp15644, i64 1
- %tmp15646 = getelementptr inbounds float* %tmp15645, i64 1
- %tmp15647 = getelementptr inbounds float* %tmp15646, i64 1
- %tmp15648 = getelementptr inbounds float* %tmp15647, i64 1
- %tmp15649 = getelementptr inbounds float* %tmp15648, i64 1
- %tmp15650 = getelementptr inbounds float* %tmp15649, i64 1
- %tmp15651 = getelementptr inbounds float* %tmp15650, i64 1
- %tmp15652 = getelementptr inbounds float* %tmp15651, i64 1
- %tmp15653 = getelementptr inbounds float* %tmp15652, i64 1
- %tmp15654 = getelementptr inbounds float* %tmp15653, i64 1
- %tmp15655 = getelementptr inbounds float* %tmp15654, i64 1
- %tmp15656 = getelementptr inbounds float* %tmp15655, i64 1
- %tmp15657 = getelementptr inbounds float* %tmp15656, i64 1
- %tmp15658 = getelementptr inbounds float* %tmp15657, i64 1
- %tmp15659 = getelementptr inbounds float* %tmp15658, i64 1
- %tmp15660 = getelementptr inbounds float* %tmp15659, i64 1
- %tmp15661 = getelementptr inbounds float* %tmp15660, i64 1
- %tmp15662 = getelementptr inbounds float* %tmp15661, i64 1
- %tmp15663 = getelementptr inbounds float* %tmp15662, i64 1
- %tmp15664 = getelementptr inbounds float* %tmp15663, i64 1
- %tmp15665 = getelementptr inbounds float* %tmp15664, i64 1
- %tmp15666 = getelementptr inbounds float* %tmp15665, i64 1
- %tmp15667 = getelementptr inbounds float* %tmp15666, i64 1
- %tmp15668 = getelementptr inbounds float* %tmp15667, i64 1
- %tmp15669 = getelementptr inbounds float* %tmp15668, i64 1
- %tmp15670 = getelementptr inbounds float* %tmp15669, i64 1
- %tmp15671 = getelementptr inbounds float* %tmp15670, i64 1
- %tmp15672 = getelementptr inbounds float* %tmp15671, i64 1
- %tmp15673 = getelementptr inbounds float* %tmp15672, i64 1
- %tmp15674 = getelementptr inbounds float* %tmp15673, i64 1
- %tmp15675 = getelementptr inbounds float* %tmp15674, i64 1
- %tmp15676 = getelementptr inbounds float* %tmp15675, i64 1
- %tmp15677 = getelementptr inbounds float* %tmp15676, i64 1
- %tmp15678 = getelementptr inbounds float* %tmp15677, i64 1
- %tmp15679 = getelementptr inbounds float* %tmp15678, i64 1
- %tmp15680 = getelementptr inbounds float* %tmp15679, i64 1
- %tmp15681 = getelementptr inbounds float* %tmp15680, i64 1
- %tmp15682 = getelementptr inbounds float* %tmp15681, i64 1
- %tmp15683 = getelementptr inbounds float* %tmp15682, i64 1
- %tmp15684 = getelementptr inbounds float* %tmp15683, i64 1
- %tmp15685 = getelementptr inbounds float* %tmp15684, i64 1
- %tmp15686 = getelementptr inbounds float* %tmp15685, i64 1
- %tmp15687 = getelementptr inbounds float* %tmp15686, i64 1
- %tmp15688 = getelementptr inbounds float* %tmp15687, i64 1
- %tmp15689 = getelementptr inbounds float* %tmp15688, i64 1
- %tmp15690 = getelementptr inbounds float* %tmp15689, i64 1
- %tmp15691 = getelementptr inbounds float* %tmp15690, i64 1
- %tmp15692 = getelementptr inbounds float* %tmp15691, i64 1
- %tmp15693 = getelementptr inbounds float* %tmp15692, i64 1
- %tmp15694 = getelementptr inbounds float* %tmp15693, i64 1
- %tmp15695 = getelementptr inbounds float* %tmp15694, i64 1
- %tmp15696 = getelementptr inbounds float* %tmp15695, i64 1
- %tmp15697 = getelementptr inbounds float* %tmp15696, i64 1
- %tmp15698 = getelementptr inbounds float* %tmp15697, i64 1
- %tmp15699 = getelementptr inbounds float* %tmp15698, i64 1
- %tmp15700 = getelementptr inbounds float* %tmp15699, i64 1
- %tmp15701 = getelementptr inbounds float* %tmp15700, i64 1
- %tmp15702 = getelementptr inbounds float* %tmp15701, i64 1
- %tmp15703 = getelementptr inbounds float* %tmp15702, i64 1
- %tmp15704 = getelementptr inbounds float* %tmp15703, i64 1
- %tmp15705 = getelementptr inbounds float* %tmp15704, i64 1
- %tmp15706 = getelementptr inbounds float* %tmp15705, i64 1
- %tmp15707 = getelementptr inbounds float* %tmp15706, i64 1
- %tmp15708 = getelementptr inbounds float* %tmp15707, i64 1
- %tmp15709 = getelementptr inbounds float* %tmp15708, i64 1
- %tmp15710 = getelementptr inbounds float* %tmp15709, i64 1
- %tmp15711 = getelementptr inbounds float* %tmp15710, i64 1
- %tmp15712 = getelementptr inbounds float* %tmp15711, i64 1
- %tmp15713 = getelementptr inbounds float* %tmp15712, i64 1
- %tmp15714 = getelementptr inbounds float* %tmp15713, i64 1
- %tmp15715 = getelementptr inbounds float* %tmp15714, i64 1
- %tmp15716 = getelementptr inbounds float* %tmp15715, i64 1
- %tmp15717 = getelementptr inbounds float* %tmp15716, i64 1
- %tmp15718 = getelementptr inbounds float* %tmp15717, i64 1
- %tmp15719 = getelementptr inbounds float* %tmp15718, i64 1
- %tmp15720 = getelementptr inbounds float* %tmp15719, i64 1
- %tmp15721 = getelementptr inbounds float* %tmp15720, i64 1
- %tmp15722 = getelementptr inbounds float* %tmp15721, i64 1
- %tmp15723 = getelementptr inbounds float* %tmp15722, i64 1
- %tmp15724 = getelementptr inbounds float* %tmp15723, i64 1
- %tmp15725 = getelementptr inbounds float* %tmp15724, i64 1
- %tmp15726 = getelementptr inbounds float* %tmp15725, i64 1
- %tmp15727 = getelementptr inbounds float* %tmp15726, i64 1
- %tmp15728 = getelementptr inbounds float* %tmp15727, i64 1
- %tmp15729 = getelementptr inbounds float* %tmp15728, i64 1
- %tmp15730 = getelementptr inbounds float* %tmp15729, i64 1
- %tmp15731 = getelementptr inbounds float* %tmp15730, i64 1
- %tmp15732 = getelementptr inbounds float* %tmp15731, i64 1
- %tmp15733 = getelementptr inbounds float* %tmp15732, i64 1
- %tmp15734 = getelementptr inbounds float* %tmp15733, i64 1
- %tmp15735 = getelementptr inbounds float* %tmp15734, i64 1
- %tmp15736 = getelementptr inbounds float* %tmp15735, i64 1
- %tmp15737 = getelementptr inbounds float* %tmp15736, i64 1
- %tmp15738 = getelementptr inbounds float* %tmp15737, i64 1
- %tmp15739 = getelementptr inbounds float* %tmp15738, i64 1
- %tmp15740 = getelementptr inbounds float* %tmp15739, i64 1
- %tmp15741 = getelementptr inbounds float* %tmp15740, i64 1
- %tmp15742 = getelementptr inbounds float* %tmp15741, i64 1
- %tmp15743 = getelementptr inbounds float* %tmp15742, i64 1
- %tmp15744 = getelementptr inbounds float* %tmp15743, i64 1
- %tmp15745 = getelementptr inbounds float* %tmp15744, i64 1
- %tmp15746 = getelementptr inbounds float* %tmp15745, i64 1
- %tmp15747 = getelementptr inbounds float* %tmp15746, i64 1
- %tmp15748 = getelementptr inbounds float* %tmp15747, i64 1
- %tmp15749 = getelementptr inbounds float* %tmp15748, i64 1
- %tmp15750 = getelementptr inbounds float* %tmp15749, i64 1
- %tmp15751 = getelementptr inbounds float* %tmp15750, i64 1
- %tmp15752 = getelementptr inbounds float* %tmp15751, i64 1
- %tmp15753 = getelementptr inbounds float* %tmp15752, i64 1
- %tmp15754 = getelementptr inbounds float* %tmp15753, i64 1
- %tmp15755 = getelementptr inbounds float* %tmp15754, i64 1
- %tmp15756 = getelementptr inbounds float* %tmp15755, i64 1
- %tmp15757 = getelementptr inbounds float* %tmp15756, i64 1
- %tmp15758 = getelementptr inbounds float* %tmp15757, i64 1
- %tmp15759 = getelementptr inbounds float* %tmp15758, i64 1
- %tmp15760 = getelementptr inbounds float* %tmp15759, i64 1
- %tmp15761 = getelementptr inbounds float* %tmp15760, i64 1
- %tmp15762 = getelementptr inbounds float* %tmp15761, i64 1
- %tmp15763 = getelementptr inbounds float* %tmp15762, i64 1
- %tmp15764 = getelementptr inbounds float* %tmp15763, i64 1
- %tmp15765 = getelementptr inbounds float* %tmp15764, i64 1
- %tmp15766 = getelementptr inbounds float* %tmp15765, i64 1
- %tmp15767 = getelementptr inbounds float* %tmp15766, i64 1
- %tmp15768 = getelementptr inbounds float* %tmp15767, i64 1
- %tmp15769 = getelementptr inbounds float* %tmp15768, i64 1
- %tmp15770 = getelementptr inbounds float* %tmp15769, i64 1
- %tmp15771 = getelementptr inbounds float* %tmp15770, i64 1
- %tmp15772 = getelementptr inbounds float* %tmp15771, i64 1
- %tmp15773 = getelementptr inbounds float* %tmp15772, i64 1
- %tmp15774 = getelementptr inbounds float* %tmp15773, i64 1
- %tmp15775 = getelementptr inbounds float* %tmp15774, i64 1
- %tmp15776 = getelementptr inbounds float* %tmp15775, i64 1
- %tmp15777 = getelementptr inbounds float* %tmp15776, i64 1
- %tmp15778 = getelementptr inbounds float* %tmp15777, i64 1
- %tmp15779 = getelementptr inbounds float* %tmp15778, i64 1
- %tmp15780 = getelementptr inbounds float* %tmp15779, i64 1
- %tmp15781 = getelementptr inbounds float* %tmp15780, i64 1
- %tmp15782 = getelementptr inbounds float* %tmp15781, i64 1
- %tmp15783 = getelementptr inbounds float* %tmp15782, i64 1
- %tmp15784 = getelementptr inbounds float* %tmp15783, i64 1
- %tmp15785 = getelementptr inbounds float* %tmp15784, i64 1
- %tmp15786 = getelementptr inbounds float* %tmp15785, i64 1
- %tmp15787 = getelementptr inbounds float* %tmp15786, i64 1
- %tmp15788 = getelementptr inbounds float* %tmp15787, i64 1
- %tmp15789 = getelementptr inbounds float* %tmp15788, i64 1
- %tmp15790 = getelementptr inbounds float* %tmp15789, i64 1
- %tmp15791 = getelementptr inbounds float* %tmp15790, i64 1
- %tmp15792 = getelementptr inbounds float* %tmp15791, i64 1
- %tmp15793 = getelementptr inbounds float* %tmp15792, i64 1
- %tmp15794 = getelementptr inbounds float* %tmp15793, i64 1
- %tmp15795 = getelementptr inbounds float* %tmp15794, i64 1
- %tmp15796 = getelementptr inbounds float* %tmp15795, i64 1
- %tmp15797 = getelementptr inbounds float* %tmp15796, i64 1
- %tmp15798 = getelementptr inbounds float* %tmp15797, i64 1
- %tmp15799 = getelementptr inbounds float* %tmp15798, i64 1
- %tmp15800 = getelementptr inbounds float* %tmp15799, i64 1
- %tmp15801 = getelementptr inbounds float* %tmp15800, i64 1
- %tmp15802 = getelementptr inbounds float* %tmp15801, i64 1
- %tmp15803 = getelementptr inbounds float* %tmp15802, i64 1
- %tmp15804 = getelementptr inbounds float* %tmp15803, i64 1
- %tmp15805 = getelementptr inbounds float* %tmp15804, i64 1
- %tmp15806 = getelementptr inbounds float* %tmp15805, i64 1
- %tmp15807 = getelementptr inbounds float* %tmp15806, i64 1
- %tmp15808 = getelementptr inbounds float* %tmp15807, i64 1
- %tmp15809 = getelementptr inbounds float* %tmp15808, i64 1
- %tmp15810 = getelementptr inbounds float* %tmp15809, i64 1
- %tmp15811 = getelementptr inbounds float* %tmp15810, i64 1
- %tmp15812 = getelementptr inbounds float* %tmp15811, i64 1
- %tmp15813 = getelementptr inbounds float* %tmp15812, i64 1
- %tmp15814 = getelementptr inbounds float* %tmp15813, i64 1
- %tmp15815 = getelementptr inbounds float* %tmp15814, i64 1
- %tmp15816 = getelementptr inbounds float* %tmp15815, i64 1
- %tmp15817 = getelementptr inbounds float* %tmp15816, i64 1
- %tmp15818 = getelementptr inbounds float* %tmp15817, i64 1
- %tmp15819 = getelementptr inbounds float* %tmp15818, i64 1
- %tmp15820 = getelementptr inbounds float* %tmp15819, i64 1
- %tmp15821 = getelementptr inbounds float* %tmp15820, i64 1
- %tmp15822 = getelementptr inbounds float* %tmp15821, i64 1
- %tmp15823 = getelementptr inbounds float* %tmp15822, i64 1
- %tmp15824 = getelementptr inbounds float* %tmp15823, i64 1
- %tmp15825 = getelementptr inbounds float* %tmp15824, i64 1
- %tmp15826 = getelementptr inbounds float* %tmp15825, i64 1
- %tmp15827 = getelementptr inbounds float* %tmp15826, i64 1
- %tmp15828 = getelementptr inbounds float* %tmp15827, i64 1
- %tmp15829 = getelementptr inbounds float* %tmp15828, i64 1
- %tmp15830 = getelementptr inbounds float* %tmp15829, i64 1
- %tmp15831 = getelementptr inbounds float* %tmp15830, i64 1
- %tmp15832 = getelementptr inbounds float* %tmp15831, i64 1
- %tmp15833 = getelementptr inbounds float* %tmp15832, i64 1
- %tmp15834 = getelementptr inbounds float* %tmp15833, i64 1
- %tmp15835 = getelementptr inbounds float* %tmp15834, i64 1
- %tmp15836 = getelementptr inbounds float* %tmp15835, i64 1
- %tmp15837 = getelementptr inbounds float* %tmp15836, i64 1
- %tmp15838 = getelementptr inbounds float* %tmp15837, i64 1
- %tmp15839 = getelementptr inbounds float* %tmp15838, i64 1
- %tmp15840 = getelementptr inbounds float* %tmp15839, i64 1
- %tmp15841 = getelementptr inbounds float* %tmp15840, i64 1
- %tmp15842 = getelementptr inbounds float* %tmp15841, i64 1
- %tmp15843 = getelementptr inbounds float* %tmp15842, i64 1
- %tmp15844 = getelementptr inbounds float* %tmp15843, i64 1
- %tmp15845 = getelementptr inbounds float* %tmp15844, i64 1
- %tmp15846 = getelementptr inbounds float* %tmp15845, i64 1
- %tmp15847 = getelementptr inbounds float* %tmp15846, i64 1
- %tmp15848 = getelementptr inbounds float* %tmp15847, i64 1
- %tmp15849 = getelementptr inbounds float* %tmp15848, i64 1
- %tmp15850 = getelementptr inbounds float* %tmp15849, i64 1
- %tmp15851 = getelementptr inbounds float* %tmp15850, i64 1
- %tmp15852 = getelementptr inbounds float* %tmp15851, i64 1
- %tmp15853 = getelementptr inbounds float* %tmp15852, i64 1
- %tmp15854 = getelementptr inbounds float* %tmp15853, i64 1
- %tmp15855 = getelementptr inbounds float* %tmp15854, i64 1
- %tmp15856 = getelementptr inbounds float* %tmp15855, i64 1
- %tmp15857 = getelementptr inbounds float* %tmp15856, i64 1
- %tmp15858 = getelementptr inbounds float* %tmp15857, i64 1
- %tmp15859 = getelementptr inbounds float* %tmp15858, i64 1
- %tmp15860 = getelementptr inbounds float* %tmp15859, i64 1
- %tmp15861 = getelementptr inbounds float* %tmp15860, i64 1
- %tmp15862 = getelementptr inbounds float* %tmp15861, i64 1
- %tmp15863 = getelementptr inbounds float* %tmp15862, i64 1
- %tmp15864 = getelementptr inbounds float* %tmp15863, i64 1
- %tmp15865 = getelementptr inbounds float* %tmp15864, i64 1
- %tmp15866 = getelementptr inbounds float* %tmp15865, i64 1
- %tmp15867 = getelementptr inbounds float* %tmp15866, i64 1
- %tmp15868 = getelementptr inbounds float* %tmp15867, i64 1
- %tmp15869 = getelementptr inbounds float* %tmp15868, i64 1
- %tmp15870 = getelementptr inbounds float* %tmp15869, i64 1
- %tmp15871 = getelementptr inbounds float* %tmp15870, i64 1
- %tmp15872 = getelementptr inbounds float* %tmp15871, i64 1
- %tmp15873 = getelementptr inbounds float* %tmp15872, i64 1
- %tmp15874 = getelementptr inbounds float* %tmp15873, i64 1
- %tmp15875 = getelementptr inbounds float* %tmp15874, i64 1
- %tmp15876 = getelementptr inbounds float* %tmp15875, i64 1
- %tmp15877 = getelementptr inbounds float* %tmp15876, i64 1
- %tmp15878 = getelementptr inbounds float* %tmp15877, i64 1
- %tmp15879 = getelementptr inbounds float* %tmp15878, i64 1
- %tmp15880 = getelementptr inbounds float* %tmp15879, i64 1
- %tmp15881 = getelementptr inbounds float* %tmp15880, i64 1
- %tmp15882 = getelementptr inbounds float* %tmp15881, i64 1
- %tmp15883 = getelementptr inbounds float* %tmp15882, i64 1
- %tmp15884 = getelementptr inbounds float* %tmp15883, i64 1
- %tmp15885 = getelementptr inbounds float* %tmp15884, i64 1
- %tmp15886 = getelementptr inbounds float* %tmp15885, i64 1
- %tmp15887 = getelementptr inbounds float* %tmp15886, i64 1
- %tmp15888 = getelementptr inbounds float* %tmp15887, i64 1
- %tmp15889 = getelementptr inbounds float* %tmp15888, i64 1
- %tmp15890 = getelementptr inbounds float* %tmp15889, i64 1
- %tmp15891 = getelementptr inbounds float* %tmp15890, i64 1
- %tmp15892 = getelementptr inbounds float* %tmp15891, i64 1
- %tmp15893 = getelementptr inbounds float* %tmp15892, i64 1
- %tmp15894 = getelementptr inbounds float* %tmp15893, i64 1
- %tmp15895 = getelementptr inbounds float* %tmp15894, i64 1
- %tmp15896 = getelementptr inbounds float* %tmp15895, i64 1
- %tmp15897 = getelementptr inbounds float* %tmp15896, i64 1
- %tmp15898 = getelementptr inbounds float* %tmp15897, i64 1
- %tmp15899 = getelementptr inbounds float* %tmp15898, i64 1
- %tmp15900 = getelementptr inbounds float* %tmp15899, i64 1
- %tmp15901 = getelementptr inbounds float* %tmp15900, i64 1
- %tmp15902 = getelementptr inbounds float* %tmp15901, i64 1
- %tmp15903 = getelementptr inbounds float* %tmp15902, i64 1
- %tmp15904 = getelementptr inbounds float* %tmp15903, i64 1
- %tmp15905 = getelementptr inbounds float* %tmp15904, i64 1
- %tmp15906 = getelementptr inbounds float* %tmp15905, i64 1
- %tmp15907 = getelementptr inbounds float* %tmp15906, i64 1
- %tmp15908 = getelementptr inbounds float* %tmp15907, i64 1
- %tmp15909 = getelementptr inbounds float* %tmp15908, i64 1
- %tmp15910 = getelementptr inbounds float* %tmp15909, i64 1
- %tmp15911 = getelementptr inbounds float* %tmp15910, i64 1
- %tmp15912 = getelementptr inbounds float* %tmp15911, i64 1
- %tmp15913 = getelementptr inbounds float* %tmp15912, i64 1
- %tmp15914 = getelementptr inbounds float* %tmp15913, i64 1
- %tmp15915 = getelementptr inbounds float* %tmp15914, i64 1
- %tmp15916 = getelementptr inbounds float* %tmp15915, i64 1
- %tmp15917 = getelementptr inbounds float* %tmp15916, i64 1
- %tmp15918 = getelementptr inbounds float* %tmp15917, i64 1
- %tmp15919 = getelementptr inbounds float* %tmp15918, i64 1
- %tmp15920 = getelementptr inbounds float* %tmp15919, i64 1
- %tmp15921 = getelementptr inbounds float* %tmp15920, i64 1
- %tmp15922 = getelementptr inbounds float* %tmp15921, i64 1
- %tmp15923 = getelementptr inbounds float* %tmp15922, i64 1
- %tmp15924 = getelementptr inbounds float* %tmp15923, i64 1
- %tmp15925 = getelementptr inbounds float* %tmp15924, i64 1
- %tmp15926 = getelementptr inbounds float* %tmp15925, i64 1
- %tmp15927 = getelementptr inbounds float* %tmp15926, i64 1
- %tmp15928 = getelementptr inbounds float* %tmp15927, i64 1
- %tmp15929 = getelementptr inbounds float* %tmp15928, i64 1
- %tmp15930 = getelementptr inbounds float* %tmp15929, i64 1
- %tmp15931 = getelementptr inbounds float* %tmp15930, i64 1
- %tmp15932 = getelementptr inbounds float* %tmp15931, i64 1
- %tmp15933 = getelementptr inbounds float* %tmp15932, i64 1
- %tmp15934 = getelementptr inbounds float* %tmp15933, i64 1
- %tmp15935 = getelementptr inbounds float* %tmp15934, i64 1
- %tmp15936 = getelementptr inbounds float* %tmp15935, i64 1
- %tmp15937 = getelementptr inbounds float* %tmp15936, i64 1
- %tmp15938 = getelementptr inbounds float* %tmp15937, i64 1
- %tmp15939 = getelementptr inbounds float* %tmp15938, i64 1
- %tmp15940 = getelementptr inbounds float* %tmp15939, i64 1
- %tmp15941 = getelementptr inbounds float* %tmp15940, i64 1
- %tmp15942 = getelementptr inbounds float* %tmp15941, i64 1
- %tmp15943 = getelementptr inbounds float* %tmp15942, i64 1
- %tmp15944 = getelementptr inbounds float* %tmp15943, i64 1
- %tmp15945 = getelementptr inbounds float* %tmp15944, i64 1
- %tmp15946 = getelementptr inbounds float* %tmp15945, i64 1
- %tmp15947 = getelementptr inbounds float* %tmp15946, i64 1
- %tmp15948 = getelementptr inbounds float* %tmp15947, i64 1
- %tmp15949 = getelementptr inbounds float* %tmp15948, i64 1
- %tmp15950 = getelementptr inbounds float* %tmp15949, i64 1
- %tmp15951 = getelementptr inbounds float* %tmp15950, i64 1
- %tmp15952 = getelementptr inbounds float* %tmp15951, i64 1
- %tmp15953 = getelementptr inbounds float* %tmp15952, i64 1
- %tmp15954 = getelementptr inbounds float* %tmp15953, i64 1
- %tmp15955 = getelementptr inbounds float* %tmp15954, i64 1
- %tmp15956 = getelementptr inbounds float* %tmp15955, i64 1
- %tmp15957 = getelementptr inbounds float* %tmp15956, i64 1
- %tmp15958 = getelementptr inbounds float* %tmp15957, i64 1
- %tmp15959 = getelementptr inbounds float* %tmp15958, i64 1
- %tmp15960 = getelementptr inbounds float* %tmp15959, i64 1
- %tmp15961 = getelementptr inbounds float* %tmp15960, i64 1
- %tmp15962 = getelementptr inbounds float* %tmp15961, i64 1
- %tmp15963 = getelementptr inbounds float* %tmp15962, i64 1
- %tmp15964 = getelementptr inbounds float* %tmp15963, i64 1
- %tmp15965 = getelementptr inbounds float* %tmp15964, i64 1
- %tmp15966 = getelementptr inbounds float* %tmp15965, i64 1
- %tmp15967 = getelementptr inbounds float* %tmp15966, i64 1
- %tmp15968 = getelementptr inbounds float* %tmp15967, i64 1
- %tmp15969 = getelementptr inbounds float* %tmp15968, i64 1
- %tmp15970 = getelementptr inbounds float* %tmp15969, i64 1
- %tmp15971 = getelementptr inbounds float* %tmp15970, i64 1
- %tmp15972 = getelementptr inbounds float* %tmp15971, i64 1
- %tmp15973 = getelementptr inbounds float* %tmp15972, i64 1
- %tmp15974 = getelementptr inbounds float* %tmp15973, i64 1
- %tmp15975 = getelementptr inbounds float* %tmp15974, i64 1
- %tmp15976 = getelementptr inbounds float* %tmp15975, i64 1
- %tmp15977 = getelementptr inbounds float* %tmp15976, i64 1
- %tmp15978 = getelementptr inbounds float* %tmp15977, i64 1
- %tmp15979 = getelementptr inbounds float* %tmp15978, i64 1
- %tmp15980 = getelementptr inbounds float* %tmp15979, i64 1
- %tmp15981 = getelementptr inbounds float* %tmp15980, i64 1
- %tmp15982 = getelementptr inbounds float* %tmp15981, i64 1
- %tmp15983 = getelementptr inbounds float* %tmp15982, i64 1
- %tmp15984 = getelementptr inbounds float* %tmp15983, i64 1
- %tmp15985 = getelementptr inbounds float* %tmp15984, i64 1
- %tmp15986 = getelementptr inbounds float* %tmp15985, i64 1
- %tmp15987 = getelementptr inbounds float* %tmp15986, i64 1
- %tmp15988 = getelementptr inbounds float* %tmp15987, i64 1
- %tmp15989 = getelementptr inbounds float* %tmp15988, i64 1
- %tmp15990 = getelementptr inbounds float* %tmp15989, i64 1
- %tmp15991 = getelementptr inbounds float* %tmp15990, i64 1
- %tmp15992 = getelementptr inbounds float* %tmp15991, i64 1
- %tmp15993 = getelementptr inbounds float* %tmp15992, i64 1
- %tmp15994 = getelementptr inbounds float* %tmp15993, i64 1
- %tmp15995 = getelementptr inbounds float* %tmp15994, i64 1
- %tmp15996 = getelementptr inbounds float* %tmp15995, i64 1
- %tmp15997 = getelementptr inbounds float* %tmp15996, i64 1
- %tmp15998 = getelementptr inbounds float* %tmp15997, i64 1
- %tmp15999 = getelementptr inbounds float* %tmp15998, i64 1
- %tmp16000 = getelementptr inbounds float* %tmp15999, i64 1
- %tmp16001 = getelementptr inbounds float* %tmp16000, i64 1
- %tmp16002 = getelementptr inbounds float* %tmp16001, i64 1
- %tmp16003 = getelementptr inbounds float* %tmp16002, i64 1
- %tmp16004 = getelementptr inbounds float* %tmp16003, i64 1
- %tmp16005 = getelementptr inbounds float* %tmp16004, i64 1
- %tmp16006 = getelementptr inbounds float* %tmp16005, i64 1
- %tmp16007 = getelementptr inbounds float* %tmp16006, i64 1
- %tmp16008 = getelementptr inbounds float* %tmp16007, i64 1
- %tmp16009 = getelementptr inbounds float* %tmp16008, i64 1
- %tmp16010 = getelementptr inbounds float* %tmp16009, i64 1
- %tmp16011 = getelementptr inbounds float* %tmp16010, i64 1
- %tmp16012 = getelementptr inbounds float* %tmp16011, i64 1
- %tmp16013 = getelementptr inbounds float* %tmp16012, i64 1
- %tmp16014 = getelementptr inbounds float* %tmp16013, i64 1
- %tmp16015 = getelementptr inbounds float* %tmp16014, i64 1
- %tmp16016 = getelementptr inbounds float* %tmp16015, i64 1
- %tmp16017 = getelementptr inbounds float* %tmp16016, i64 1
- %tmp16018 = getelementptr inbounds float* %tmp16017, i64 1
- %tmp16019 = getelementptr inbounds float* %tmp16018, i64 1
- %tmp16020 = getelementptr inbounds float* %tmp16019, i64 1
- %tmp16021 = getelementptr inbounds float* %tmp16020, i64 1
- %tmp16022 = getelementptr inbounds float* %tmp16021, i64 1
- %tmp16023 = getelementptr inbounds float* %tmp16022, i64 1
- %tmp16024 = getelementptr inbounds float* %tmp16023, i64 1
- %tmp16025 = getelementptr inbounds float* %tmp16024, i64 1
- %tmp16026 = getelementptr inbounds float* %tmp16025, i64 1
- %tmp16027 = getelementptr inbounds float* %tmp16026, i64 1
- %tmp16028 = getelementptr inbounds float* %tmp16027, i64 1
- %tmp16029 = getelementptr inbounds float* %tmp16028, i64 1
- %tmp16030 = getelementptr inbounds float* %tmp16029, i64 1
- %tmp16031 = getelementptr inbounds float* %tmp16030, i64 1
- %tmp16032 = getelementptr inbounds float* %tmp16031, i64 1
- %tmp16033 = getelementptr inbounds float* %tmp16032, i64 1
- %tmp16034 = getelementptr inbounds float* %tmp16033, i64 1
- %tmp16035 = getelementptr inbounds float* %tmp16034, i64 1
- %tmp16036 = getelementptr inbounds float* %tmp16035, i64 1
- %tmp16037 = getelementptr inbounds float* %tmp16036, i64 1
- %tmp16038 = getelementptr inbounds float* %tmp16037, i64 1
- %tmp16039 = getelementptr inbounds float* %tmp16038, i64 1
- %tmp16040 = getelementptr inbounds float* %tmp16039, i64 1
- %tmp16041 = getelementptr inbounds float* %tmp16040, i64 1
- %tmp16042 = getelementptr inbounds float* %tmp16041, i64 1
- %tmp16043 = getelementptr inbounds float* %tmp16042, i64 1
- %tmp16044 = getelementptr inbounds float* %tmp16043, i64 1
- %tmp16045 = getelementptr inbounds float* %tmp16044, i64 1
- %tmp16046 = getelementptr inbounds float* %tmp16045, i64 1
- %tmp16047 = getelementptr inbounds float* %tmp16046, i64 1
- %tmp16048 = getelementptr inbounds float* %tmp16047, i64 1
- %tmp16049 = getelementptr inbounds float* %tmp16048, i64 1
- %tmp16050 = getelementptr inbounds float* %tmp16049, i64 1
- %tmp16051 = getelementptr inbounds float* %tmp16050, i64 1
- %tmp16052 = getelementptr inbounds float* %tmp16051, i64 1
- %tmp16053 = getelementptr inbounds float* %tmp16052, i64 1
- %tmp16054 = getelementptr inbounds float* %tmp16053, i64 1
- %tmp16055 = getelementptr inbounds float* %tmp16054, i64 1
- %tmp16056 = getelementptr inbounds float* %tmp16055, i64 1
- %tmp16057 = getelementptr inbounds float* %tmp16056, i64 1
- %tmp16058 = getelementptr inbounds float* %tmp16057, i64 1
- %tmp16059 = getelementptr inbounds float* %tmp16058, i64 1
- %tmp16060 = getelementptr inbounds float* %tmp16059, i64 1
- %tmp16061 = getelementptr inbounds float* %tmp16060, i64 1
- %tmp16062 = getelementptr inbounds float* %tmp16061, i64 1
- %tmp16063 = getelementptr inbounds float* %tmp16062, i64 1
- %tmp16064 = getelementptr inbounds float* %tmp16063, i64 1
- %tmp16065 = getelementptr inbounds float* %tmp16064, i64 1
- %tmp16066 = getelementptr inbounds float* %tmp16065, i64 1
- %tmp16067 = getelementptr inbounds float* %tmp16066, i64 1
- %tmp16068 = getelementptr inbounds float* %tmp16067, i64 1
- %tmp16069 = getelementptr inbounds float* %tmp16068, i64 1
- %tmp16070 = getelementptr inbounds float* %tmp16069, i64 1
- %tmp16071 = getelementptr inbounds float* %tmp16070, i64 1
- %tmp16072 = getelementptr inbounds float* %tmp16071, i64 1
- %tmp16073 = getelementptr inbounds float* %tmp16072, i64 1
- %tmp16074 = getelementptr inbounds float* %tmp16073, i64 1
- %tmp16075 = getelementptr inbounds float* %tmp16074, i64 1
- %tmp16076 = getelementptr inbounds float* %tmp16075, i64 1
- %tmp16077 = getelementptr inbounds float* %tmp16076, i64 1
- %tmp16078 = getelementptr inbounds float* %tmp16077, i64 1
- %tmp16079 = getelementptr inbounds float* %tmp16078, i64 1
- %tmp16080 = getelementptr inbounds float* %tmp16079, i64 1
- %tmp16081 = getelementptr inbounds float* %tmp16080, i64 1
- %tmp16082 = getelementptr inbounds float* %tmp16081, i64 1
- %tmp16083 = getelementptr inbounds float* %tmp16082, i64 1
- %tmp16084 = getelementptr inbounds float* %tmp16083, i64 1
- %tmp16085 = getelementptr inbounds float* %tmp16084, i64 1
- %tmp16086 = getelementptr inbounds float* %tmp16085, i64 1
- %tmp16087 = getelementptr inbounds float* %tmp16086, i64 1
- %tmp16088 = getelementptr inbounds float* %tmp16087, i64 1
- %tmp16089 = getelementptr inbounds float* %tmp16088, i64 1
- %tmp16090 = getelementptr inbounds float* %tmp16089, i64 1
- %tmp16091 = getelementptr inbounds float* %tmp16090, i64 1
- %tmp16092 = getelementptr inbounds float* %tmp16091, i64 1
- %tmp16093 = getelementptr inbounds float* %tmp16092, i64 1
- %tmp16094 = getelementptr inbounds float* %tmp16093, i64 1
- %tmp16095 = getelementptr inbounds float* %tmp16094, i64 1
- %tmp16096 = getelementptr inbounds float* %tmp16095, i64 1
- %tmp16097 = getelementptr inbounds float* %tmp16096, i64 1
- %tmp16098 = getelementptr inbounds float* %tmp16097, i64 1
- %tmp16099 = getelementptr inbounds float* %tmp16098, i64 1
- %tmp16100 = getelementptr inbounds float* %tmp16099, i64 1
- %tmp16101 = getelementptr inbounds float* %tmp16100, i64 1
- %tmp16102 = getelementptr inbounds float* %tmp16101, i64 1
- %tmp16103 = getelementptr inbounds float* %tmp16102, i64 1
- %tmp16104 = getelementptr inbounds float* %tmp16103, i64 1
- %tmp16105 = getelementptr inbounds float* %tmp16104, i64 1
- %tmp16106 = getelementptr inbounds float* %tmp16105, i64 1
- %tmp16107 = getelementptr inbounds float* %tmp16106, i64 1
- %tmp16108 = getelementptr inbounds float* %tmp16107, i64 1
- %tmp16109 = getelementptr inbounds float* %tmp16108, i64 1
- %tmp16110 = getelementptr inbounds float* %tmp16109, i64 1
- %tmp16111 = getelementptr inbounds float* %tmp16110, i64 1
- %tmp16112 = getelementptr inbounds float* %tmp16111, i64 1
- %tmp16113 = getelementptr inbounds float* %tmp16112, i64 1
- %tmp16114 = getelementptr inbounds float* %tmp16113, i64 1
- %tmp16115 = getelementptr inbounds float* %tmp16114, i64 1
- %tmp16116 = getelementptr inbounds float* %tmp16115, i64 1
- %tmp16117 = getelementptr inbounds float* %tmp16116, i64 1
- %tmp16118 = getelementptr inbounds float* %tmp16117, i64 1
- %tmp16119 = getelementptr inbounds float* %tmp16118, i64 1
- %tmp16120 = getelementptr inbounds float* %tmp16119, i64 1
- %tmp16121 = getelementptr inbounds float* %tmp16120, i64 1
- %tmp16122 = getelementptr inbounds float* %tmp16121, i64 1
- %tmp16123 = getelementptr inbounds float* %tmp16122, i64 1
- %tmp16124 = getelementptr inbounds float* %tmp16123, i64 1
- %tmp16125 = getelementptr inbounds float* %tmp16124, i64 1
- %tmp16126 = getelementptr inbounds float* %tmp16125, i64 1
- %tmp16127 = getelementptr inbounds float* %tmp16126, i64 1
- %tmp16128 = getelementptr inbounds float* %tmp16127, i64 1
- %tmp16129 = getelementptr inbounds float* %tmp16128, i64 1
- %tmp16130 = getelementptr inbounds float* %tmp16129, i64 1
- %tmp16131 = getelementptr inbounds float* %tmp16130, i64 1
- %tmp16132 = getelementptr inbounds float* %tmp16131, i64 1
- %tmp16133 = getelementptr inbounds float* %tmp16132, i64 1
- %tmp16134 = getelementptr inbounds float* %tmp16133, i64 1
- %tmp16135 = getelementptr inbounds float* %tmp16134, i64 1
- %tmp16136 = getelementptr inbounds float* %tmp16135, i64 1
- %tmp16137 = getelementptr inbounds float* %tmp16136, i64 1
- %tmp16138 = getelementptr inbounds float* %tmp16137, i64 1
- %tmp16139 = getelementptr inbounds float* %tmp16138, i64 1
- %tmp16140 = getelementptr inbounds float* %tmp16139, i64 1
- %tmp16141 = getelementptr inbounds float* %tmp16140, i64 1
- %tmp16142 = getelementptr inbounds float* %tmp16141, i64 1
- %tmp16143 = getelementptr inbounds float* %tmp16142, i64 1
- %tmp16144 = getelementptr inbounds float* %tmp16143, i64 1
- %tmp16145 = getelementptr inbounds float* %tmp16144, i64 1
- %tmp16146 = getelementptr inbounds float* %tmp16145, i64 1
- %tmp16147 = getelementptr inbounds float* %tmp16146, i64 1
- %tmp16148 = getelementptr inbounds float* %tmp16147, i64 1
- %tmp16149 = getelementptr inbounds float* %tmp16148, i64 1
- %tmp16150 = getelementptr inbounds float* %tmp16149, i64 1
- %tmp16151 = getelementptr inbounds float* %tmp16150, i64 1
- %tmp16152 = getelementptr inbounds float* %tmp16151, i64 1
- %tmp16153 = getelementptr inbounds float* %tmp16152, i64 1
- %tmp16154 = getelementptr inbounds float* %tmp16153, i64 1
- %tmp16155 = getelementptr inbounds float* %tmp16154, i64 1
- %tmp16156 = getelementptr inbounds float* %tmp16155, i64 1
- %tmp16157 = getelementptr inbounds float* %tmp16156, i64 1
- %tmp16158 = getelementptr inbounds float* %tmp16157, i64 1
- %tmp16159 = getelementptr inbounds float* %tmp16158, i64 1
- %tmp16160 = getelementptr inbounds float* %tmp16159, i64 1
- %tmp16161 = getelementptr inbounds float* %tmp16160, i64 1
- %tmp16162 = getelementptr inbounds float* %tmp16161, i64 1
- %tmp16163 = getelementptr inbounds float* %tmp16162, i64 1
- %tmp16164 = getelementptr inbounds float* %tmp16163, i64 1
- %tmp16165 = getelementptr inbounds float* %tmp16164, i64 1
- %tmp16166 = getelementptr inbounds float* %tmp16165, i64 1
- %tmp16167 = getelementptr inbounds float* %tmp16166, i64 1
- %tmp16168 = getelementptr inbounds float* %tmp16167, i64 1
- %tmp16169 = getelementptr inbounds float* %tmp16168, i64 1
- %tmp16170 = getelementptr inbounds float* %tmp16169, i64 1
- %tmp16171 = getelementptr inbounds float* %tmp16170, i64 1
- %tmp16172 = getelementptr inbounds float* %tmp16171, i64 1
- %tmp16173 = getelementptr inbounds float* %tmp16172, i64 1
- %tmp16174 = getelementptr inbounds float* %tmp16173, i64 1
- %tmp16175 = getelementptr inbounds float* %tmp16174, i64 1
- %tmp16176 = getelementptr inbounds float* %tmp16175, i64 1
- %tmp16177 = getelementptr inbounds float* %tmp16176, i64 1
- %tmp16178 = getelementptr inbounds float* %tmp16177, i64 1
- %tmp16179 = getelementptr inbounds float* %tmp16178, i64 1
- %tmp16180 = getelementptr inbounds float* %tmp16179, i64 1
- %tmp16181 = getelementptr inbounds float* %tmp16180, i64 1
- %tmp16182 = getelementptr inbounds float* %tmp16181, i64 1
- %tmp16183 = getelementptr inbounds float* %tmp16182, i64 1
- %tmp16184 = getelementptr inbounds float* %tmp16183, i64 1
- %tmp16185 = getelementptr inbounds float* %tmp16184, i64 1
- %tmp16186 = getelementptr inbounds float* %tmp16185, i64 1
- %tmp16187 = getelementptr inbounds float* %tmp16186, i64 1
- %tmp16188 = getelementptr inbounds float* %tmp16187, i64 1
- %tmp16189 = getelementptr inbounds float* %tmp16188, i64 1
- %tmp16190 = getelementptr inbounds float* %tmp16189, i64 1
- %tmp16191 = getelementptr inbounds float* %tmp16190, i64 1
- %tmp16192 = getelementptr inbounds float* %tmp16191, i64 1
- %tmp16193 = getelementptr inbounds float* %tmp16192, i64 1
- %tmp16194 = getelementptr inbounds float* %tmp16193, i64 1
- %tmp16195 = getelementptr inbounds float* %tmp16194, i64 1
- %tmp16196 = getelementptr inbounds float* %tmp16195, i64 1
- %tmp16197 = getelementptr inbounds float* %tmp16196, i64 1
- %tmp16198 = getelementptr inbounds float* %tmp16197, i64 1
- %tmp16199 = getelementptr inbounds float* %tmp16198, i64 1
- %tmp16200 = getelementptr inbounds float* %tmp16199, i64 1
- %tmp16201 = getelementptr inbounds float* %tmp16200, i64 1
- %tmp16202 = getelementptr inbounds float* %tmp16201, i64 1
- %tmp16203 = getelementptr inbounds float* %tmp16202, i64 1
- %tmp16204 = getelementptr inbounds float* %tmp16203, i64 1
- %tmp16205 = getelementptr inbounds float* %tmp16204, i64 1
- %tmp16206 = getelementptr inbounds float* %tmp16205, i64 1
- %tmp16207 = getelementptr inbounds float* %tmp16206, i64 1
- %tmp16208 = getelementptr inbounds float* %tmp16207, i64 1
- %tmp16209 = getelementptr inbounds float* %tmp16208, i64 1
- %tmp16210 = getelementptr inbounds float* %tmp16209, i64 1
- %tmp16211 = getelementptr inbounds float* %tmp16210, i64 1
- %tmp16212 = getelementptr inbounds float* %tmp16211, i64 1
- %tmp16213 = getelementptr inbounds float* %tmp16212, i64 1
- %tmp16214 = getelementptr inbounds float* %tmp16213, i64 1
- %tmp16215 = getelementptr inbounds float* %tmp16214, i64 1
- %tmp16216 = getelementptr inbounds float* %tmp16215, i64 1
- %tmp16217 = getelementptr inbounds float* %tmp16216, i64 1
- %tmp16218 = getelementptr inbounds float* %tmp16217, i64 1
- %tmp16219 = getelementptr inbounds float* %tmp16218, i64 1
- %tmp16220 = getelementptr inbounds float* %tmp16219, i64 1
- %tmp16221 = getelementptr inbounds float* %tmp16220, i64 1
- %tmp16222 = getelementptr inbounds float* %tmp16221, i64 1
- %tmp16223 = getelementptr inbounds float* %tmp16222, i64 1
- %tmp16224 = getelementptr inbounds float* %tmp16223, i64 1
- %tmp16225 = getelementptr inbounds float* %tmp16224, i64 1
- %tmp16226 = getelementptr inbounds float* %tmp16225, i64 1
- %tmp16227 = getelementptr inbounds float* %tmp16226, i64 1
- %tmp16228 = getelementptr inbounds float* %tmp16227, i64 1
- %tmp16229 = getelementptr inbounds float* %tmp16228, i64 1
- %tmp16230 = getelementptr inbounds float* %tmp16229, i64 1
- %tmp16231 = getelementptr inbounds float* %tmp16230, i64 1
- %tmp16232 = getelementptr inbounds float* %tmp16231, i64 1
- %tmp16233 = getelementptr inbounds float* %tmp16232, i64 1
- %tmp16234 = getelementptr inbounds float* %tmp16233, i64 1
- %tmp16235 = getelementptr inbounds float* %tmp16234, i64 1
- %tmp16236 = getelementptr inbounds float* %tmp16235, i64 1
- %tmp16237 = getelementptr inbounds float* %tmp16236, i64 1
- %tmp16238 = getelementptr inbounds float* %tmp16237, i64 1
- %tmp16239 = getelementptr inbounds float* %tmp16238, i64 1
- %tmp16240 = getelementptr inbounds float* %tmp16239, i64 1
- %tmp16241 = getelementptr inbounds float* %tmp16240, i64 1
- %tmp16242 = getelementptr inbounds float* %tmp16241, i64 1
- %tmp16243 = getelementptr inbounds float* %tmp16242, i64 1
- %tmp16244 = getelementptr inbounds float* %tmp16243, i64 1
- %tmp16245 = getelementptr inbounds float* %tmp16244, i64 1
- %tmp16246 = getelementptr inbounds float* %tmp16245, i64 1
- %tmp16247 = getelementptr inbounds float* %tmp16246, i64 1
- %tmp16248 = getelementptr inbounds float* %tmp16247, i64 1
- %tmp16249 = getelementptr inbounds float* %tmp16248, i64 1
- %tmp16250 = getelementptr inbounds float* %tmp16249, i64 1
- %tmp16251 = getelementptr inbounds float* %tmp16250, i64 1
- %tmp16252 = getelementptr inbounds float* %tmp16251, i64 1
- %tmp16253 = getelementptr inbounds float* %tmp16252, i64 1
- %tmp16254 = getelementptr inbounds float* %tmp16253, i64 1
- %tmp16255 = getelementptr inbounds float* %tmp16254, i64 1
- %tmp16256 = getelementptr inbounds float* %tmp16255, i64 1
- %tmp16257 = getelementptr inbounds float* %tmp16256, i64 1
- %tmp16258 = getelementptr inbounds float* %tmp16257, i64 1
- %tmp16259 = getelementptr inbounds float* %tmp16258, i64 1
- %tmp16260 = getelementptr inbounds float* %tmp16259, i64 1
- %tmp16261 = getelementptr inbounds float* %tmp16260, i64 1
- %tmp16262 = getelementptr inbounds float* %tmp16261, i64 1
- %tmp16263 = getelementptr inbounds float* %tmp16262, i64 1
- %tmp16264 = getelementptr inbounds float* %tmp16263, i64 1
- %tmp16265 = getelementptr inbounds float* %tmp16264, i64 1
- %tmp16266 = getelementptr inbounds float* %tmp16265, i64 1
- %tmp16267 = getelementptr inbounds float* %tmp16266, i64 1
- %tmp16268 = getelementptr inbounds float* %tmp16267, i64 1
- %tmp16269 = getelementptr inbounds float* %tmp16268, i64 1
- %tmp16270 = getelementptr inbounds float* %tmp16269, i64 1
- %tmp16271 = getelementptr inbounds float* %tmp16270, i64 1
- %tmp16272 = getelementptr inbounds float* %tmp16271, i64 1
- %tmp16273 = getelementptr inbounds float* %tmp16272, i64 1
- %tmp16274 = getelementptr inbounds float* %tmp16273, i64 1
- %tmp16275 = getelementptr inbounds float* %tmp16274, i64 1
- %tmp16276 = getelementptr inbounds float* %tmp16275, i64 1
- %tmp16277 = getelementptr inbounds float* %tmp16276, i64 1
- %tmp16278 = getelementptr inbounds float* %tmp16277, i64 1
- %tmp16279 = getelementptr inbounds float* %tmp16278, i64 1
- %tmp16280 = getelementptr inbounds float* %tmp16279, i64 1
- %tmp16281 = getelementptr inbounds float* %tmp16280, i64 1
- %tmp16282 = getelementptr inbounds float* %tmp16281, i64 1
- %tmp16283 = getelementptr inbounds float* %tmp16282, i64 1
- %tmp16284 = getelementptr inbounds float* %tmp16283, i64 1
- %tmp16285 = getelementptr inbounds float* %tmp16284, i64 1
- %tmp16286 = getelementptr inbounds float* %tmp16285, i64 1
- %tmp16287 = getelementptr inbounds float* %tmp16286, i64 1
- %tmp16288 = getelementptr inbounds float* %tmp16287, i64 1
- %tmp16289 = getelementptr inbounds float* %tmp16288, i64 1
- %tmp16290 = getelementptr inbounds float* %tmp16289, i64 1
- %tmp16291 = getelementptr inbounds float* %tmp16290, i64 1
- %tmp16292 = getelementptr inbounds float* %tmp16291, i64 1
- %tmp16293 = getelementptr inbounds float* %tmp16292, i64 1
- %tmp16294 = getelementptr inbounds float* %tmp16293, i64 1
- %tmp16295 = getelementptr inbounds float* %tmp16294, i64 1
- %tmp16296 = getelementptr inbounds float* %tmp16295, i64 1
- %tmp16297 = getelementptr inbounds float* %tmp16296, i64 1
- %tmp16298 = getelementptr inbounds float* %tmp16297, i64 1
- %tmp16299 = getelementptr inbounds float* %tmp16298, i64 1
- %tmp16300 = getelementptr inbounds float* %tmp16299, i64 1
- %tmp16301 = getelementptr inbounds float* %tmp16300, i64 1
- %tmp16302 = getelementptr inbounds float* %tmp16301, i64 1
- %tmp16303 = getelementptr inbounds float* %tmp16302, i64 1
- %tmp16304 = getelementptr inbounds float* %tmp16303, i64 1
- %tmp16305 = getelementptr inbounds float* %tmp16304, i64 1
- %tmp16306 = getelementptr inbounds float* %tmp16305, i64 1
- %tmp16307 = getelementptr inbounds float* %tmp16306, i64 1
- %tmp16308 = getelementptr inbounds float* %tmp16307, i64 1
- %tmp16309 = getelementptr inbounds float* %tmp16308, i64 1
- %tmp16310 = getelementptr inbounds float* %tmp16309, i64 1
- %tmp16311 = getelementptr inbounds float* %tmp16310, i64 1
- %tmp16312 = getelementptr inbounds float* %tmp16311, i64 1
- %tmp16313 = getelementptr inbounds float* %tmp16312, i64 1
- %tmp16314 = getelementptr inbounds float* %tmp16313, i64 1
- %tmp16315 = getelementptr inbounds float* %tmp16314, i64 1
- %tmp16316 = getelementptr inbounds float* %tmp16315, i64 1
- %tmp16317 = getelementptr inbounds float* %tmp16316, i64 1
- %tmp16318 = getelementptr inbounds float* %tmp16317, i64 1
- %tmp16319 = getelementptr inbounds float* %tmp16318, i64 1
- %tmp16320 = getelementptr inbounds float* %tmp16319, i64 1
- %tmp16321 = getelementptr inbounds float* %tmp16320, i64 1
- %tmp16322 = getelementptr inbounds float* %tmp16321, i64 1
- %tmp16323 = getelementptr inbounds float* %tmp16322, i64 1
- %tmp16324 = getelementptr inbounds float* %tmp16323, i64 1
- %tmp16325 = getelementptr inbounds float* %tmp16324, i64 1
- %tmp16326 = getelementptr inbounds float* %tmp16325, i64 1
- %tmp16327 = getelementptr inbounds float* %tmp16326, i64 1
- %tmp16328 = getelementptr inbounds float* %tmp16327, i64 1
- %tmp16329 = getelementptr inbounds float* %tmp16328, i64 1
- %tmp16330 = getelementptr inbounds float* %tmp16329, i64 1
- %tmp16331 = getelementptr inbounds float* %tmp16330, i64 1
- %tmp16332 = getelementptr inbounds float* %tmp16331, i64 1
- %tmp16333 = getelementptr inbounds float* %tmp16332, i64 1
- %tmp16334 = getelementptr inbounds float* %tmp16333, i64 1
- %tmp16335 = getelementptr inbounds float* %tmp16334, i64 1
- %tmp16336 = getelementptr inbounds float* %tmp16335, i64 1
- %tmp16337 = getelementptr inbounds float* %tmp16336, i64 1
- %tmp16338 = getelementptr inbounds float* %tmp16337, i64 1
- %tmp16339 = getelementptr inbounds float* %tmp16338, i64 1
- %tmp16340 = getelementptr inbounds float* %tmp16339, i64 1
- %tmp16341 = getelementptr inbounds float* %tmp16340, i64 1
- %tmp16342 = getelementptr inbounds float* %tmp16341, i64 1
- %tmp16343 = getelementptr inbounds float* %tmp16342, i64 1
- %tmp16344 = getelementptr inbounds float* %tmp16343, i64 1
- %tmp16345 = getelementptr inbounds float* %tmp16344, i64 1
- %tmp16346 = getelementptr inbounds float* %tmp16345, i64 1
- %tmp16347 = getelementptr inbounds float* %tmp16346, i64 1
- %tmp16348 = getelementptr inbounds float* %tmp16347, i64 1
- %tmp16349 = getelementptr inbounds float* %tmp16348, i64 1
- %tmp16350 = getelementptr inbounds float* %tmp16349, i64 1
- %tmp16351 = getelementptr inbounds float* %tmp16350, i64 1
- %tmp16352 = getelementptr inbounds float* %tmp16351, i64 1
- %tmp16353 = getelementptr inbounds float* %tmp16352, i64 1
- %tmp16354 = getelementptr inbounds float* %tmp16353, i64 1
- %tmp16355 = getelementptr inbounds float* %tmp16354, i64 1
- %tmp16356 = getelementptr inbounds float* %tmp16355, i64 1
- %tmp16357 = getelementptr inbounds float* %tmp16356, i64 1
- %tmp16358 = getelementptr inbounds float* %tmp16357, i64 1
- %tmp16359 = getelementptr inbounds float* %tmp16358, i64 1
- %tmp16360 = getelementptr inbounds float* %tmp16359, i64 1
- %tmp16361 = getelementptr inbounds float* %tmp16360, i64 1
- %tmp16362 = getelementptr inbounds float* %tmp16361, i64 1
- %tmp16363 = getelementptr inbounds float* %tmp16362, i64 1
- %tmp16364 = getelementptr inbounds float* %tmp16363, i64 1
- %tmp16365 = getelementptr inbounds float* %tmp16364, i64 1
- %tmp16366 = getelementptr inbounds float* %tmp16365, i64 1
- %tmp16367 = getelementptr inbounds float* %tmp16366, i64 1
- %tmp16368 = getelementptr inbounds float* %tmp16367, i64 1
- %tmp16369 = getelementptr inbounds float* %tmp16368, i64 1
- %tmp16370 = getelementptr inbounds float* %tmp16369, i64 1
- %tmp16371 = getelementptr inbounds float* %tmp16370, i64 1
- %tmp16372 = getelementptr inbounds float* %tmp16371, i64 1
- %tmp16373 = getelementptr inbounds float* %tmp16372, i64 1
- %tmp16374 = getelementptr inbounds float* %tmp16373, i64 1
- %tmp16375 = getelementptr inbounds float* %tmp16374, i64 1
- %tmp16376 = getelementptr inbounds float* %tmp16375, i64 1
- %tmp16377 = getelementptr inbounds float* %tmp16376, i64 1
- %tmp16378 = getelementptr inbounds float* %tmp16377, i64 1
- %tmp16379 = getelementptr inbounds float* %tmp16378, i64 1
- %tmp16380 = getelementptr inbounds float* %tmp16379, i64 1
- %tmp16381 = getelementptr inbounds float* %tmp16380, i64 1
- %tmp16382 = getelementptr inbounds float* %tmp16381, i64 1
- %tmp16383 = getelementptr inbounds float* %tmp16382, i64 1
- %tmp16384 = getelementptr inbounds float* %tmp16383, i64 1
- %tmp16385 = getelementptr inbounds float* %tmp16384, i64 1
- %tmp16386 = getelementptr inbounds float* %tmp16385, i64 1
- %tmp16387 = getelementptr inbounds float* %tmp16386, i64 1
- %tmp16388 = getelementptr inbounds float* %tmp16387, i64 1
- %tmp16389 = getelementptr inbounds float* %tmp16388, i64 1
- %tmp16390 = getelementptr inbounds float* %tmp16389, i64 1
- %tmp16391 = getelementptr inbounds float* %tmp16390, i64 1
- %tmp16392 = getelementptr inbounds float* %tmp16391, i64 1
- %tmp16393 = getelementptr inbounds float* %tmp16392, i64 1
- %tmp16394 = getelementptr inbounds float* %tmp16393, i64 1
- %tmp16395 = getelementptr inbounds float* %tmp16394, i64 1
- %tmp16396 = getelementptr inbounds float* %tmp16395, i64 1
- %tmp16397 = getelementptr inbounds float* %tmp16396, i64 1
- %tmp16398 = getelementptr inbounds float* %tmp16397, i64 1
- %tmp16399 = getelementptr inbounds float* %tmp16398, i64 1
- %tmp16400 = getelementptr inbounds float* %tmp16399, i64 1
- %tmp16401 = getelementptr inbounds float* %tmp16400, i64 1
- %tmp16402 = getelementptr inbounds float* %tmp16401, i64 1
- %tmp16403 = getelementptr inbounds float* %tmp16402, i64 1
- %tmp16404 = getelementptr inbounds float* %tmp16403, i64 1
- %tmp16405 = getelementptr inbounds float* %tmp16404, i64 1
- %tmp16406 = getelementptr inbounds float* %tmp16405, i64 1
- %tmp16407 = getelementptr inbounds float* %tmp16406, i64 1
- %tmp16408 = getelementptr inbounds float* %tmp16407, i64 1
- %tmp16409 = getelementptr inbounds float* %tmp16408, i64 1
- %tmp16410 = getelementptr inbounds float* %tmp16409, i64 1
- %tmp16411 = getelementptr inbounds float* %tmp16410, i64 1
- %tmp16412 = getelementptr inbounds float* %tmp16411, i64 1
- %tmp16413 = getelementptr inbounds float* %tmp16412, i64 1
- %tmp16414 = getelementptr inbounds float* %tmp16413, i64 1
- %tmp16415 = getelementptr inbounds float* %tmp16414, i64 1
- %tmp16416 = getelementptr inbounds float* %tmp16415, i64 1
- %tmp16417 = getelementptr inbounds float* %tmp16416, i64 1
- %tmp16418 = getelementptr inbounds float* %tmp16417, i64 1
- %tmp16419 = getelementptr inbounds float* %tmp16418, i64 1
- %tmp16420 = getelementptr inbounds float* %tmp16419, i64 1
- %tmp16421 = getelementptr inbounds float* %tmp16420, i64 1
- %tmp16422 = getelementptr inbounds float* %tmp16421, i64 1
- %tmp16423 = getelementptr inbounds float* %tmp16422, i64 1
- %tmp16424 = getelementptr inbounds float* %tmp16423, i64 1
- %tmp16425 = getelementptr inbounds float* %tmp16424, i64 1
- %tmp16426 = getelementptr inbounds float* %tmp16425, i64 1
- %tmp16427 = getelementptr inbounds float* %tmp16426, i64 1
- %tmp16428 = getelementptr inbounds float* %tmp16427, i64 1
- %tmp16429 = getelementptr inbounds float* %tmp16428, i64 1
- %tmp16430 = getelementptr inbounds float* %tmp16429, i64 1
- %tmp16431 = getelementptr inbounds float* %tmp16430, i64 1
- %tmp16432 = getelementptr inbounds float* %tmp16431, i64 1
- %tmp16433 = getelementptr inbounds float* %tmp16432, i64 1
- %tmp16434 = getelementptr inbounds float* %tmp16433, i64 1
- %tmp16435 = getelementptr inbounds float* %tmp16434, i64 1
- %tmp16436 = getelementptr inbounds float* %tmp16435, i64 1
- %tmp16437 = getelementptr inbounds float* %tmp16436, i64 1
- %tmp16438 = getelementptr inbounds float* %tmp16437, i64 1
- %tmp16439 = getelementptr inbounds float* %tmp16438, i64 1
- %tmp16440 = getelementptr inbounds float* %tmp16439, i64 1
- %tmp16441 = getelementptr inbounds float* %tmp16440, i64 1
- %tmp16442 = getelementptr inbounds float* %tmp16441, i64 1
- %tmp16443 = getelementptr inbounds float* %tmp16442, i64 1
- %tmp16444 = getelementptr inbounds float* %tmp16443, i64 1
- %tmp16445 = getelementptr inbounds float* %tmp16444, i64 1
- %tmp16446 = getelementptr inbounds float* %tmp16445, i64 1
- %tmp16447 = getelementptr inbounds float* %tmp16446, i64 1
- %tmp16448 = getelementptr inbounds float* %tmp16447, i64 1
- %tmp16449 = getelementptr inbounds float* %tmp16448, i64 1
- %tmp16450 = getelementptr inbounds float* %tmp16449, i64 1
- %tmp16451 = getelementptr inbounds float* %tmp16450, i64 1
- %tmp16452 = getelementptr inbounds float* %tmp16451, i64 1
- %tmp16453 = getelementptr inbounds float* %tmp16452, i64 1
- %tmp16454 = getelementptr inbounds float* %tmp16453, i64 1
- %tmp16455 = getelementptr inbounds float* %tmp16454, i64 1
- %tmp16456 = getelementptr inbounds float* %tmp16455, i64 1
- %tmp16457 = getelementptr inbounds float* %tmp16456, i64 1
- %tmp16458 = getelementptr inbounds float* %tmp16457, i64 1
- %tmp16459 = getelementptr inbounds float* %tmp16458, i64 1
- %tmp16460 = getelementptr inbounds float* %tmp16459, i64 1
- %tmp16461 = getelementptr inbounds float* %tmp16460, i64 1
- %tmp16462 = getelementptr inbounds float* %tmp16461, i64 1
- %tmp16463 = getelementptr inbounds float* %tmp16462, i64 1
- %tmp16464 = getelementptr inbounds float* %tmp16463, i64 1
- %tmp16465 = getelementptr inbounds float* %tmp16464, i64 1
- %tmp16466 = getelementptr inbounds float* %tmp16465, i64 1
- %tmp16467 = getelementptr inbounds float* %tmp16466, i64 1
- %tmp16468 = getelementptr inbounds float* %tmp16467, i64 1
- %tmp16469 = getelementptr inbounds float* %tmp16468, i64 1
- %tmp16470 = getelementptr inbounds float* %tmp16469, i64 1
- %tmp16471 = getelementptr inbounds float* %tmp16470, i64 1
- %tmp16472 = getelementptr inbounds float* %tmp16471, i64 1
- %tmp16473 = getelementptr inbounds float* %tmp16472, i64 1
- %tmp16474 = getelementptr inbounds float* %tmp16473, i64 1
- %tmp16475 = getelementptr inbounds float* %tmp16474, i64 1
- %tmp16476 = getelementptr inbounds float* %tmp16475, i64 1
- %tmp16477 = getelementptr inbounds float* %tmp16476, i64 1
- %tmp16478 = getelementptr inbounds float* %tmp16477, i64 1
- %tmp16479 = getelementptr inbounds float* %tmp16478, i64 1
- %tmp16480 = getelementptr inbounds float* %tmp16479, i64 1
- %tmp16481 = getelementptr inbounds float* %tmp16480, i64 1
- %tmp16482 = getelementptr inbounds float* %tmp16481, i64 1
- %tmp16483 = getelementptr inbounds float* %tmp16482, i64 1
- %tmp16484 = getelementptr inbounds float* %tmp16483, i64 1
- %tmp16485 = getelementptr inbounds float* %tmp16484, i64 1
- %tmp16486 = getelementptr inbounds float* %tmp16485, i64 1
- %tmp16487 = getelementptr inbounds float* %tmp16486, i64 1
- %tmp16488 = getelementptr inbounds float* %tmp16487, i64 1
- %tmp16489 = getelementptr inbounds float* %tmp16488, i64 1
- %tmp16490 = getelementptr inbounds float* %tmp16489, i64 1
- %tmp16491 = getelementptr inbounds float* %tmp16490, i64 1
- %tmp16492 = getelementptr inbounds float* %tmp16491, i64 1
- %tmp16493 = getelementptr inbounds float* %tmp16492, i64 1
- %tmp16494 = getelementptr inbounds float* %tmp16493, i64 1
- %tmp16495 = getelementptr inbounds float* %tmp16494, i64 1
- %tmp16496 = getelementptr inbounds float* %tmp16495, i64 1
- %tmp16497 = getelementptr inbounds float* %tmp16496, i64 1
- %tmp16498 = getelementptr inbounds float* %tmp16497, i64 1
- %tmp16499 = getelementptr inbounds float* %tmp16498, i64 1
- %tmp16500 = getelementptr inbounds float* %tmp16499, i64 1
- %tmp16501 = getelementptr inbounds float* %tmp16500, i64 1
- %tmp16502 = getelementptr inbounds float* %tmp16501, i64 1
- %tmp16503 = getelementptr inbounds float* %tmp16502, i64 1
- %tmp16504 = getelementptr inbounds float* %tmp16503, i64 1
- %tmp16505 = getelementptr inbounds float* %tmp16504, i64 1
- %tmp16506 = getelementptr inbounds float* %tmp16505, i64 1
- %tmp16507 = getelementptr inbounds float* %tmp16506, i64 1
- %tmp16508 = getelementptr inbounds float* %tmp16507, i64 1
- %tmp16509 = getelementptr inbounds float* %tmp16508, i64 1
- %tmp16510 = getelementptr inbounds float* %tmp16509, i64 1
- %tmp16511 = getelementptr inbounds float* %tmp16510, i64 1
- %tmp16512 = getelementptr inbounds float* %tmp16511, i64 1
- %tmp16513 = getelementptr inbounds float* %tmp16512, i64 1
- %tmp16514 = getelementptr inbounds float* %tmp16513, i64 1
- %tmp16515 = getelementptr inbounds float* %tmp16514, i64 1
- %tmp16516 = getelementptr inbounds float* %tmp16515, i64 1
- %tmp16517 = getelementptr inbounds float* %tmp16516, i64 1
- %tmp16518 = getelementptr inbounds float* %tmp16517, i64 1
- %tmp16519 = getelementptr inbounds float* %tmp16518, i64 1
- %tmp16520 = getelementptr inbounds float* %tmp16519, i64 1
- %tmp16521 = getelementptr inbounds float* %tmp16520, i64 1
- %tmp16522 = getelementptr inbounds float* %tmp16521, i64 1
- %tmp16523 = getelementptr inbounds float* %tmp16522, i64 1
- %tmp16524 = getelementptr inbounds float* %tmp16523, i64 1
- %tmp16525 = getelementptr inbounds float* %tmp16524, i64 1
- %tmp16526 = getelementptr inbounds float* %tmp16525, i64 1
- %tmp16527 = getelementptr inbounds float* %tmp16526, i64 1
- %tmp16528 = getelementptr inbounds float* %tmp16527, i64 1
- %tmp16529 = getelementptr inbounds float* %tmp16528, i64 1
- %tmp16530 = getelementptr inbounds float* %tmp16529, i64 1
- %tmp16531 = getelementptr inbounds float* %tmp16530, i64 1
- %tmp16532 = getelementptr inbounds float* %tmp16531, i64 1
- %tmp16533 = getelementptr inbounds float* %tmp16532, i64 1
- %tmp16534 = getelementptr inbounds float* %tmp16533, i64 1
- %tmp16535 = getelementptr inbounds float* %tmp16534, i64 1
- %tmp16536 = getelementptr inbounds float* %tmp16535, i64 1
- %tmp16537 = getelementptr inbounds float* %tmp16536, i64 1
- %tmp16538 = getelementptr inbounds float* %tmp16537, i64 1
- %tmp16539 = getelementptr inbounds float* %tmp16538, i64 1
- %tmp16540 = getelementptr inbounds float* %tmp16539, i64 1
- %tmp16541 = getelementptr inbounds float* %tmp16540, i64 1
- %tmp16542 = getelementptr inbounds float* %tmp16541, i64 1
- %tmp16543 = getelementptr inbounds float* %tmp16542, i64 1
- %tmp16544 = getelementptr inbounds float* %tmp16543, i64 1
- %tmp16545 = getelementptr inbounds float* %tmp16544, i64 1
- %tmp16546 = getelementptr inbounds float* %tmp16545, i64 1
- %tmp16547 = getelementptr inbounds float* %tmp16546, i64 1
- %tmp16548 = getelementptr inbounds float* %tmp16547, i64 1
- %tmp16549 = getelementptr inbounds float* %tmp16548, i64 1
- %tmp16550 = getelementptr inbounds float* %tmp16549, i64 1
- %tmp16551 = getelementptr inbounds float* %tmp16550, i64 1
- %tmp16552 = getelementptr inbounds float* %tmp16551, i64 1
- %tmp16553 = getelementptr inbounds float* %tmp16552, i64 1
- %tmp16554 = getelementptr inbounds float* %tmp16553, i64 1
- %tmp16555 = getelementptr inbounds float* %tmp16554, i64 1
- %tmp16556 = getelementptr inbounds float* %tmp16555, i64 1
- %tmp16557 = getelementptr inbounds float* %tmp16556, i64 1
- %tmp16558 = getelementptr inbounds float* %tmp16557, i64 1
- %tmp16559 = getelementptr inbounds float* %tmp16558, i64 1
- %tmp16560 = getelementptr inbounds float* %tmp16559, i64 1
- %tmp16561 = getelementptr inbounds float* %tmp16560, i64 1
- %tmp16562 = getelementptr inbounds float* %tmp16561, i64 1
- %tmp16563 = getelementptr inbounds float* %tmp16562, i64 1
- %tmp16564 = getelementptr inbounds float* %tmp16563, i64 1
- %tmp16565 = getelementptr inbounds float* %tmp16564, i64 1
- %tmp16566 = getelementptr inbounds float* %tmp16565, i64 1
- %tmp16567 = getelementptr inbounds float* %tmp16566, i64 1
- %tmp16568 = getelementptr inbounds float* %tmp16567, i64 1
- %tmp16569 = getelementptr inbounds float* %tmp16568, i64 1
- %tmp16570 = getelementptr inbounds float* %tmp16569, i64 1
- %tmp16571 = getelementptr inbounds float* %tmp16570, i64 1
- %tmp16572 = getelementptr inbounds float* %tmp16571, i64 1
- %tmp16573 = getelementptr inbounds float* %tmp16572, i64 1
- %tmp16574 = getelementptr inbounds float* %tmp16573, i64 1
- %tmp16575 = getelementptr inbounds float* %tmp16574, i64 1
- %tmp16576 = getelementptr inbounds float* %tmp16575, i64 1
- %tmp16577 = getelementptr inbounds float* %tmp16576, i64 1
- %tmp16578 = getelementptr inbounds float* %tmp16577, i64 1
- %tmp16579 = getelementptr inbounds float* %tmp16578, i64 1
- %tmp16580 = getelementptr inbounds float* %tmp16579, i64 1
- %tmp16581 = getelementptr inbounds float* %tmp16580, i64 1
- %tmp16582 = getelementptr inbounds float* %tmp16581, i64 1
- %tmp16583 = getelementptr inbounds float* %tmp16582, i64 1
- %tmp16584 = getelementptr inbounds float* %tmp16583, i64 1
- %tmp16585 = getelementptr inbounds float* %tmp16584, i64 1
- %tmp16586 = getelementptr inbounds float* %tmp16585, i64 1
- %tmp16587 = getelementptr inbounds float* %tmp16586, i64 1
- %tmp16588 = getelementptr inbounds float* %tmp16587, i64 1
- %tmp16589 = getelementptr inbounds float* %tmp16588, i64 1
- %tmp16590 = getelementptr inbounds float* %tmp16589, i64 1
- %tmp16591 = getelementptr inbounds float* %tmp16590, i64 1
- %tmp16592 = getelementptr inbounds float* %tmp16591, i64 1
- %tmp16593 = getelementptr inbounds float* %tmp16592, i64 1
- %tmp16594 = getelementptr inbounds float* %tmp16593, i64 1
- %tmp16595 = getelementptr inbounds float* %tmp16594, i64 1
- %tmp16596 = getelementptr inbounds float* %tmp16595, i64 1
- %tmp16597 = getelementptr inbounds float* %tmp16596, i64 1
- %tmp16598 = getelementptr inbounds float* %tmp16597, i64 1
- %tmp16599 = getelementptr inbounds float* %tmp16598, i64 1
- %tmp16600 = getelementptr inbounds float* %tmp16599, i64 1
- %tmp16601 = getelementptr inbounds float* %tmp16600, i64 1
- %tmp16602 = getelementptr inbounds float* %tmp16601, i64 1
- %tmp16603 = getelementptr inbounds float* %tmp16602, i64 1
- %tmp16604 = getelementptr inbounds float* %tmp16603, i64 1
- %tmp16605 = getelementptr inbounds float* %tmp16604, i64 1
- %tmp16606 = getelementptr inbounds float* %tmp16605, i64 1
- %tmp16607 = getelementptr inbounds float* %tmp16606, i64 1
- %tmp16608 = getelementptr inbounds float* %tmp16607, i64 1
- %tmp16609 = getelementptr inbounds float* %tmp16608, i64 1
- %tmp16610 = getelementptr inbounds float* %tmp16609, i64 1
- %tmp16611 = getelementptr inbounds float* %tmp16610, i64 1
- %tmp16612 = getelementptr inbounds float* %tmp16611, i64 1
- %tmp16613 = getelementptr inbounds float* %tmp16612, i64 1
- %tmp16614 = getelementptr inbounds float* %tmp16613, i64 1
- %tmp16615 = getelementptr inbounds float* %tmp16614, i64 1
- %tmp16616 = getelementptr inbounds float* %tmp16615, i64 1
- %tmp16617 = getelementptr inbounds float* %tmp16616, i64 1
- %tmp16618 = getelementptr inbounds float* %tmp16617, i64 1
- %tmp16619 = getelementptr inbounds float* %tmp16618, i64 1
- %tmp16620 = getelementptr inbounds float* %tmp16619, i64 1
- %tmp16621 = getelementptr inbounds float* %tmp16620, i64 1
- %tmp16622 = getelementptr inbounds float* %tmp16621, i64 1
- %tmp16623 = getelementptr inbounds float* %tmp16622, i64 1
- %tmp16624 = getelementptr inbounds float* %tmp16623, i64 1
- %tmp16625 = getelementptr inbounds float* %tmp16624, i64 1
- %tmp16626 = getelementptr inbounds float* %tmp16625, i64 1
- %tmp16627 = getelementptr inbounds float* %tmp16626, i64 1
- %tmp16628 = getelementptr inbounds float* %tmp16627, i64 1
- %tmp16629 = getelementptr inbounds float* %tmp16628, i64 1
- %tmp16630 = getelementptr inbounds float* %tmp16629, i64 1
- %tmp16631 = getelementptr inbounds float* %tmp16630, i64 1
- %tmp16632 = getelementptr inbounds float* %tmp16631, i64 1
- %tmp16633 = getelementptr inbounds float* %tmp16632, i64 1
- %tmp16634 = getelementptr inbounds float* %tmp16633, i64 1
- %tmp16635 = getelementptr inbounds float* %tmp16634, i64 1
- %tmp16636 = getelementptr inbounds float* %tmp16635, i64 1
- %tmp16637 = getelementptr inbounds float* %tmp16636, i64 1
- %tmp16638 = getelementptr inbounds float* %tmp16637, i64 1
- %tmp16639 = getelementptr inbounds float* %tmp16638, i64 1
- %tmp16640 = getelementptr inbounds float* %tmp16639, i64 1
- %tmp16641 = getelementptr inbounds float* %tmp16640, i64 1
- %tmp16642 = getelementptr inbounds float* %tmp16641, i64 1
- %tmp16643 = getelementptr inbounds float* %tmp16642, i64 1
- %tmp16644 = getelementptr inbounds float* %tmp16643, i64 1
- %tmp16645 = getelementptr inbounds float* %tmp16644, i64 1
- %tmp16646 = getelementptr inbounds float* %tmp16645, i64 1
- %tmp16647 = getelementptr inbounds float* %tmp16646, i64 1
- %tmp16648 = getelementptr inbounds float* %tmp16647, i64 1
- %tmp16649 = getelementptr inbounds float* %tmp16648, i64 1
- %tmp16650 = getelementptr inbounds float* %tmp16649, i64 1
- %tmp16651 = getelementptr inbounds float* %tmp16650, i64 1
- %tmp16652 = getelementptr inbounds float* %tmp16651, i64 1
- %tmp16653 = getelementptr inbounds float* %tmp16652, i64 1
- %tmp16654 = getelementptr inbounds float* %tmp16653, i64 1
- %tmp16655 = getelementptr inbounds float* %tmp16654, i64 1
- %tmp16656 = getelementptr inbounds float* %tmp16655, i64 1
- %tmp16657 = getelementptr inbounds float* %tmp16656, i64 1
- %tmp16658 = getelementptr inbounds float* %tmp16657, i64 1
- %tmp16659 = getelementptr inbounds float* %tmp16658, i64 1
- %tmp16660 = getelementptr inbounds float* %tmp16659, i64 1
- %tmp16661 = getelementptr inbounds float* %tmp16660, i64 1
- %tmp16662 = getelementptr inbounds float* %tmp16661, i64 1
- %tmp16663 = getelementptr inbounds float* %tmp16662, i64 1
- %tmp16664 = getelementptr inbounds float* %tmp16663, i64 1
- %tmp16665 = getelementptr inbounds float* %tmp16664, i64 1
- %tmp16666 = getelementptr inbounds float* %tmp16665, i64 1
- %tmp16667 = getelementptr inbounds float* %tmp16666, i64 1
- %tmp16668 = getelementptr inbounds float* %tmp16667, i64 1
- %tmp16669 = getelementptr inbounds float* %tmp16668, i64 1
- %tmp16670 = getelementptr inbounds float* %tmp16669, i64 1
- %tmp16671 = getelementptr inbounds float* %tmp16670, i64 1
- %tmp16672 = getelementptr inbounds float* %tmp16671, i64 1
- %tmp16673 = getelementptr inbounds float* %tmp16672, i64 1
- %tmp16674 = getelementptr inbounds float* %tmp16673, i64 1
- %tmp16675 = getelementptr inbounds float* %tmp16674, i64 1
- %tmp16676 = getelementptr inbounds float* %tmp16675, i64 1
- %tmp16677 = getelementptr inbounds float* %tmp16676, i64 1
- %tmp16678 = getelementptr inbounds float* %tmp16677, i64 1
- %tmp16679 = getelementptr inbounds float* %tmp16678, i64 1
- %tmp16680 = getelementptr inbounds float* %tmp16679, i64 1
- %tmp16681 = getelementptr inbounds float* %tmp16680, i64 1
- %tmp16682 = getelementptr inbounds float* %tmp16681, i64 1
- %tmp16683 = getelementptr inbounds float* %tmp16682, i64 1
- %tmp16684 = getelementptr inbounds float* %tmp16683, i64 1
- %tmp16685 = getelementptr inbounds float* %tmp16684, i64 1
- %tmp16686 = getelementptr inbounds float* %tmp16685, i64 1
- %tmp16687 = getelementptr inbounds float* %tmp16686, i64 1
- %tmp16688 = getelementptr inbounds float* %tmp16687, i64 1
- %tmp16689 = getelementptr inbounds float* %tmp16688, i64 1
- %tmp16690 = getelementptr inbounds float* %tmp16689, i64 1
- %tmp16691 = getelementptr inbounds float* %tmp16690, i64 1
- %tmp16692 = getelementptr inbounds float* %tmp16691, i64 1
- %tmp16693 = getelementptr inbounds float* %tmp16692, i64 1
- %tmp16694 = getelementptr inbounds float* %tmp16693, i64 1
- %tmp16695 = getelementptr inbounds float* %tmp16694, i64 1
- %tmp16696 = getelementptr inbounds float* %tmp16695, i64 1
- %tmp16697 = getelementptr inbounds float* %tmp16696, i64 1
- %tmp16698 = getelementptr inbounds float* %tmp16697, i64 1
- %tmp16699 = getelementptr inbounds float* %tmp16698, i64 1
- %tmp16700 = getelementptr inbounds float* %tmp16699, i64 1
- %tmp16701 = getelementptr inbounds float* %tmp16700, i64 1
- %tmp16702 = getelementptr inbounds float* %tmp16701, i64 1
- %tmp16703 = getelementptr inbounds float* %tmp16702, i64 1
- %tmp16704 = getelementptr inbounds float* %tmp16703, i64 1
- %tmp16705 = getelementptr inbounds float* %tmp16704, i64 1
- %tmp16706 = getelementptr inbounds float* %tmp16705, i64 1
- %tmp16707 = getelementptr inbounds float* %tmp16706, i64 1
- %tmp16708 = getelementptr inbounds float* %tmp16707, i64 1
- %tmp16709 = getelementptr inbounds float* %tmp16708, i64 1
- %tmp16710 = getelementptr inbounds float* %tmp16709, i64 1
- %tmp16711 = getelementptr inbounds float* %tmp16710, i64 1
- %tmp16712 = getelementptr inbounds float* %tmp16711, i64 1
- %tmp16713 = getelementptr inbounds float* %tmp16712, i64 1
- %tmp16714 = getelementptr inbounds float* %tmp16713, i64 1
- %tmp16715 = getelementptr inbounds float* %tmp16714, i64 1
- %tmp16716 = getelementptr inbounds float* %tmp16715, i64 1
- %tmp16717 = getelementptr inbounds float* %tmp16716, i64 1
- %tmp16718 = getelementptr inbounds float* %tmp16717, i64 1
- %tmp16719 = getelementptr inbounds float* %tmp16718, i64 1
- %tmp16720 = getelementptr inbounds float* %tmp16719, i64 1
- %tmp16721 = getelementptr inbounds float* %tmp16720, i64 1
- %tmp16722 = getelementptr inbounds float* %tmp16721, i64 1
- %tmp16723 = getelementptr inbounds float* %tmp16722, i64 1
- %tmp16724 = getelementptr inbounds float* %tmp16723, i64 1
- %tmp16725 = getelementptr inbounds float* %tmp16724, i64 1
- %tmp16726 = getelementptr inbounds float* %tmp16725, i64 1
- %tmp16727 = getelementptr inbounds float* %tmp16726, i64 1
- %tmp16728 = getelementptr inbounds float* %tmp16727, i64 1
- %tmp16729 = getelementptr inbounds float* %tmp16728, i64 1
- %tmp16730 = getelementptr inbounds float* %tmp16729, i64 1
- %tmp16731 = getelementptr inbounds float* %tmp16730, i64 1
- %tmp16732 = getelementptr inbounds float* %tmp16731, i64 1
- %tmp16733 = getelementptr inbounds float* %tmp16732, i64 1
- %tmp16734 = getelementptr inbounds float* %tmp16733, i64 1
- %tmp16735 = getelementptr inbounds float* %tmp16734, i64 1
- %tmp16736 = getelementptr inbounds float* %tmp16735, i64 1
- %tmp16737 = getelementptr inbounds float* %tmp16736, i64 1
- %tmp16738 = getelementptr inbounds float* %tmp16737, i64 1
- %tmp16739 = getelementptr inbounds float* %tmp16738, i64 1
- %tmp16740 = getelementptr inbounds float* %tmp16739, i64 1
- %tmp16741 = getelementptr inbounds float* %tmp16740, i64 1
- %tmp16742 = getelementptr inbounds float* %tmp16741, i64 1
- %tmp16743 = getelementptr inbounds float* %tmp16742, i64 1
- %tmp16744 = getelementptr inbounds float* %tmp16743, i64 1
- %tmp16745 = getelementptr inbounds float* %tmp16744, i64 1
- %tmp16746 = getelementptr inbounds float* %tmp16745, i64 1
- %tmp16747 = getelementptr inbounds float* %tmp16746, i64 1
- %tmp16748 = getelementptr inbounds float* %tmp16747, i64 1
- %tmp16749 = getelementptr inbounds float* %tmp16748, i64 1
- %tmp16750 = getelementptr inbounds float* %tmp16749, i64 1
- %tmp16751 = getelementptr inbounds float* %tmp16750, i64 1
- %tmp16752 = getelementptr inbounds float* %tmp16751, i64 1
- %tmp16753 = getelementptr inbounds float* %tmp16752, i64 1
- %tmp16754 = getelementptr inbounds float* %tmp16753, i64 1
- %tmp16755 = getelementptr inbounds float* %tmp16754, i64 1
- %tmp16756 = getelementptr inbounds float* %tmp16755, i64 1
- %tmp16757 = getelementptr inbounds float* %tmp16756, i64 1
- %tmp16758 = getelementptr inbounds float* %tmp16757, i64 1
- %tmp16759 = getelementptr inbounds float* %tmp16758, i64 1
- %tmp16760 = getelementptr inbounds float* %tmp16759, i64 1
- %tmp16761 = getelementptr inbounds float* %tmp16760, i64 1
- %tmp16762 = getelementptr inbounds float* %tmp16761, i64 1
- %tmp16763 = getelementptr inbounds float* %tmp16762, i64 1
- %tmp16764 = getelementptr inbounds float* %tmp16763, i64 1
- %tmp16765 = getelementptr inbounds float* %tmp16764, i64 1
- %tmp16766 = getelementptr inbounds float* %tmp16765, i64 1
- %tmp16767 = getelementptr inbounds float* %tmp16766, i64 1
- %tmp16768 = getelementptr inbounds float* %tmp16767, i64 1
- %tmp16769 = getelementptr inbounds float* %tmp16768, i64 1
- %tmp16770 = getelementptr inbounds float* %tmp16769, i64 1
- %tmp16771 = getelementptr inbounds float* %tmp16770, i64 1
- %tmp16772 = getelementptr inbounds float* %tmp16771, i64 1
- %tmp16773 = getelementptr inbounds float* %tmp16772, i64 1
- %tmp16774 = getelementptr inbounds float* %tmp16773, i64 1
- %tmp16775 = getelementptr inbounds float* %tmp16774, i64 1
- %tmp16776 = getelementptr inbounds float* %tmp16775, i64 1
- %tmp16777 = getelementptr inbounds float* %tmp16776, i64 1
- %tmp16778 = getelementptr inbounds float* %tmp16777, i64 1
- %tmp16779 = getelementptr inbounds float* %tmp16778, i64 1
- %tmp16780 = getelementptr inbounds float* %tmp16779, i64 1
- %tmp16781 = getelementptr inbounds float* %tmp16780, i64 1
- %tmp16782 = getelementptr inbounds float* %tmp16781, i64 1
- %tmp16783 = getelementptr inbounds float* %tmp16782, i64 1
- %tmp16784 = getelementptr inbounds float* %tmp16783, i64 1
- %tmp16785 = getelementptr inbounds float* %tmp16784, i64 1
- %tmp16786 = getelementptr inbounds float* %tmp16785, i64 1
- %tmp16787 = getelementptr inbounds float* %tmp16786, i64 1
- %tmp16788 = getelementptr inbounds float* %tmp16787, i64 1
- %tmp16789 = getelementptr inbounds float* %tmp16788, i64 1
- %tmp16790 = getelementptr inbounds float* %tmp16789, i64 1
- %tmp16791 = getelementptr inbounds float* %tmp16790, i64 1
- %tmp16792 = getelementptr inbounds float* %tmp16791, i64 1
- %tmp16793 = getelementptr inbounds float* %tmp16792, i64 1
- %tmp16794 = getelementptr inbounds float* %tmp16793, i64 1
- %tmp16795 = getelementptr inbounds float* %tmp16794, i64 1
- %tmp16796 = getelementptr inbounds float* %tmp16795, i64 1
- %tmp16797 = getelementptr inbounds float* %tmp16796, i64 1
- %tmp16798 = getelementptr inbounds float* %tmp16797, i64 1
- %tmp16799 = getelementptr inbounds float* %tmp16798, i64 1
- %tmp16800 = getelementptr inbounds float* %tmp16799, i64 1
- %tmp16801 = getelementptr inbounds float* %tmp16800, i64 1
- %tmp16802 = getelementptr inbounds float* %tmp16801, i64 1
- %tmp16803 = getelementptr inbounds float* %tmp16802, i64 1
- %tmp16804 = getelementptr inbounds float* %tmp16803, i64 1
- %tmp16805 = getelementptr inbounds float* %tmp16804, i64 1
- %tmp16806 = getelementptr inbounds float* %tmp16805, i64 1
- %tmp16807 = getelementptr inbounds float* %tmp16806, i64 1
- %tmp16808 = getelementptr inbounds float* %tmp16807, i64 1
- %tmp16809 = getelementptr inbounds float* %tmp16808, i64 1
- %tmp16810 = getelementptr inbounds float* %tmp16809, i64 1
- %tmp16811 = getelementptr inbounds float* %tmp16810, i64 1
- %tmp16812 = getelementptr inbounds float* %tmp16811, i64 1
- %tmp16813 = getelementptr inbounds float* %tmp16812, i64 1
- %tmp16814 = getelementptr inbounds float* %tmp16813, i64 1
- %tmp16815 = getelementptr inbounds float* %tmp16814, i64 1
- %tmp16816 = getelementptr inbounds float* %tmp16815, i64 1
- %tmp16817 = getelementptr inbounds float* %tmp16816, i64 1
- %tmp16818 = getelementptr inbounds float* %tmp16817, i64 1
- %tmp16819 = getelementptr inbounds float* %tmp16818, i64 1
- %tmp16820 = getelementptr inbounds float* %tmp16819, i64 1
- %tmp16821 = getelementptr inbounds float* %tmp16820, i64 1
- %tmp16822 = getelementptr inbounds float* %tmp16821, i64 1
- %tmp16823 = getelementptr inbounds float* %tmp16822, i64 1
- %tmp16824 = getelementptr inbounds float* %tmp16823, i64 1
- %tmp16825 = getelementptr inbounds float* %tmp16824, i64 1
- %tmp16826 = getelementptr inbounds float* %tmp16825, i64 1
- %tmp16827 = getelementptr inbounds float* %tmp16826, i64 1
- %tmp16828 = getelementptr inbounds float* %tmp16827, i64 1
- %tmp16829 = getelementptr inbounds float* %tmp16828, i64 1
- %tmp16830 = getelementptr inbounds float* %tmp16829, i64 1
- %tmp16831 = getelementptr inbounds float* %tmp16830, i64 1
- %tmp16832 = getelementptr inbounds float* %tmp16831, i64 1
- %tmp16833 = getelementptr inbounds float* %tmp16832, i64 1
- %tmp16834 = getelementptr inbounds float* %tmp16833, i64 1
- %tmp16835 = getelementptr inbounds float* %tmp16834, i64 1
- %tmp16836 = getelementptr inbounds float* %tmp16835, i64 1
- %tmp16837 = getelementptr inbounds float* %tmp16836, i64 1
- %tmp16838 = getelementptr inbounds float* %tmp16837, i64 1
- %tmp16839 = getelementptr inbounds float* %tmp16838, i64 1
- %tmp16840 = getelementptr inbounds float* %tmp16839, i64 1
- %tmp16841 = getelementptr inbounds float* %tmp16840, i64 1
- %tmp16842 = getelementptr inbounds float* %tmp16841, i64 1
- %tmp16843 = getelementptr inbounds float* %tmp16842, i64 1
- %tmp16844 = getelementptr inbounds float* %tmp16843, i64 1
- %tmp16845 = getelementptr inbounds float* %tmp16844, i64 1
- %tmp16846 = getelementptr inbounds float* %tmp16845, i64 1
- %tmp16847 = getelementptr inbounds float* %tmp16846, i64 1
- %tmp16848 = getelementptr inbounds float* %tmp16847, i64 1
- %tmp16849 = getelementptr inbounds float* %tmp16848, i64 1
- %tmp16850 = getelementptr inbounds float* %tmp16849, i64 1
- %tmp16851 = getelementptr inbounds float* %tmp16850, i64 1
- %tmp16852 = getelementptr inbounds float* %tmp16851, i64 1
- %tmp16853 = getelementptr inbounds float* %tmp16852, i64 1
- %tmp16854 = getelementptr inbounds float* %tmp16853, i64 1
- %tmp16855 = getelementptr inbounds float* %tmp16854, i64 1
- %tmp16856 = getelementptr inbounds float* %tmp16855, i64 1
- %tmp16857 = getelementptr inbounds float* %tmp16856, i64 1
- %tmp16858 = getelementptr inbounds float* %tmp16857, i64 1
- %tmp16859 = getelementptr inbounds float* %tmp16858, i64 1
- %tmp16860 = getelementptr inbounds float* %tmp16859, i64 1
- %tmp16861 = getelementptr inbounds float* %tmp16860, i64 1
- %tmp16862 = getelementptr inbounds float* %tmp16861, i64 1
- %tmp16863 = getelementptr inbounds float* %tmp16862, i64 1
- %tmp16864 = getelementptr inbounds float* %tmp16863, i64 1
- %tmp16865 = getelementptr inbounds float* %tmp16864, i64 1
- %tmp16866 = getelementptr inbounds float* %tmp16865, i64 1
- %tmp16867 = getelementptr inbounds float* %tmp16866, i64 1
- %tmp16868 = getelementptr inbounds float* %tmp16867, i64 1
- %tmp16869 = getelementptr inbounds float* %tmp16868, i64 1
- %tmp16870 = getelementptr inbounds float* %tmp16869, i64 1
- %tmp16871 = getelementptr inbounds float* %tmp16870, i64 1
- %tmp16872 = getelementptr inbounds float* %tmp16871, i64 1
- %tmp16873 = getelementptr inbounds float* %tmp16872, i64 1
- %tmp16874 = getelementptr inbounds float* %tmp16873, i64 1
- %tmp16875 = getelementptr inbounds float* %tmp16874, i64 1
- %tmp16876 = getelementptr inbounds float* %tmp16875, i64 1
- %tmp16877 = getelementptr inbounds float* %tmp16876, i64 1
- %tmp16878 = getelementptr inbounds float* %tmp16877, i64 1
- %tmp16879 = getelementptr inbounds float* %tmp16878, i64 1
- %tmp16880 = getelementptr inbounds float* %tmp16879, i64 1
- %tmp16881 = getelementptr inbounds float* %tmp16880, i64 1
- %tmp16882 = getelementptr inbounds float* %tmp16881, i64 1
- %tmp16883 = getelementptr inbounds float* %tmp16882, i64 1
- %tmp16884 = getelementptr inbounds float* %tmp16883, i64 1
- %tmp16885 = getelementptr inbounds float* %tmp16884, i64 1
- %tmp16886 = getelementptr inbounds float* %tmp16885, i64 1
- %tmp16887 = getelementptr inbounds float* %tmp16886, i64 1
- %tmp16888 = getelementptr inbounds float* %tmp16887, i64 1
- %tmp16889 = getelementptr inbounds float* %tmp16888, i64 1
- %tmp16890 = getelementptr inbounds float* %tmp16889, i64 1
- %tmp16891 = getelementptr inbounds float* %tmp16890, i64 1
- %tmp16892 = getelementptr inbounds float* %tmp16891, i64 1
- %tmp16893 = getelementptr inbounds float* %tmp16892, i64 1
- %tmp16894 = getelementptr inbounds float* %tmp16893, i64 1
- %tmp16895 = getelementptr inbounds float* %tmp16894, i64 1
- %tmp16896 = getelementptr inbounds float* %tmp16895, i64 1
- %tmp16897 = getelementptr inbounds float* %tmp16896, i64 1
- %tmp16898 = getelementptr inbounds float* %tmp16897, i64 1
- %tmp16899 = getelementptr inbounds float* %tmp16898, i64 1
- %tmp16900 = getelementptr inbounds float* %tmp16899, i64 1
- %tmp16901 = getelementptr inbounds float* %tmp16900, i64 1
- %tmp16902 = getelementptr inbounds float* %tmp16901, i64 1
- %tmp16903 = getelementptr inbounds float* %tmp16902, i64 1
- %tmp16904 = getelementptr inbounds float* %tmp16903, i64 1
- %tmp16905 = getelementptr inbounds float* %tmp16904, i64 1
- %tmp16906 = getelementptr inbounds float* %tmp16905, i64 1
- %tmp16907 = getelementptr inbounds float* %tmp16906, i64 1
- %tmp16908 = getelementptr inbounds float* %tmp16907, i64 1
- %tmp16909 = getelementptr inbounds float* %tmp16908, i64 1
- %tmp16910 = getelementptr inbounds float* %tmp16909, i64 1
- %tmp16911 = getelementptr inbounds float* %tmp16910, i64 1
- %tmp16912 = getelementptr inbounds float* %tmp16911, i64 1
- %tmp16913 = getelementptr inbounds float* %tmp16912, i64 1
- %tmp16914 = getelementptr inbounds float* %tmp16913, i64 1
- %tmp16915 = getelementptr inbounds float* %tmp16914, i64 1
- %tmp16916 = getelementptr inbounds float* %tmp16915, i64 1
- %tmp16917 = getelementptr inbounds float* %tmp16916, i64 1
- %tmp16918 = getelementptr inbounds float* %tmp16917, i64 1
- %tmp16919 = getelementptr inbounds float* %tmp16918, i64 1
- %tmp16920 = getelementptr inbounds float* %tmp16919, i64 1
- %tmp16921 = getelementptr inbounds float* %tmp16920, i64 1
- %tmp16922 = getelementptr inbounds float* %tmp16921, i64 1
- %tmp16923 = getelementptr inbounds float* %tmp16922, i64 1
- %tmp16924 = getelementptr inbounds float* %tmp16923, i64 1
- %tmp16925 = getelementptr inbounds float* %tmp16924, i64 1
- %tmp16926 = getelementptr inbounds float* %tmp16925, i64 1
- %tmp16927 = getelementptr inbounds float* %tmp16926, i64 1
- %tmp16928 = getelementptr inbounds float* %tmp16927, i64 1
- %tmp16929 = getelementptr inbounds float* %tmp16928, i64 1
- %tmp16930 = getelementptr inbounds float* %tmp16929, i64 1
- %tmp16931 = getelementptr inbounds float* %tmp16930, i64 1
- %tmp16932 = getelementptr inbounds float* %tmp16931, i64 1
- %tmp16933 = getelementptr inbounds float* %tmp16932, i64 1
- %tmp16934 = getelementptr inbounds float* %tmp16933, i64 1
- %tmp16935 = getelementptr inbounds float* %tmp16934, i64 1
- %tmp16936 = getelementptr inbounds float* %tmp16935, i64 1
- %tmp16937 = getelementptr inbounds float* %tmp16936, i64 1
- %tmp16938 = getelementptr inbounds float* %tmp16937, i64 1
- %tmp16939 = getelementptr inbounds float* %tmp16938, i64 1
- %tmp16940 = getelementptr inbounds float* %tmp16939, i64 1
- %tmp16941 = getelementptr inbounds float* %tmp16940, i64 1
- %tmp16942 = getelementptr inbounds float* %tmp16941, i64 1
- %tmp16943 = getelementptr inbounds float* %tmp16942, i64 1
- %tmp16944 = getelementptr inbounds float* %tmp16943, i64 1
- %tmp16945 = getelementptr inbounds float* %tmp16944, i64 1
- %tmp16946 = getelementptr inbounds float* %tmp16945, i64 1
- %tmp16947 = getelementptr inbounds float* %tmp16946, i64 1
- %tmp16948 = getelementptr inbounds float* %tmp16947, i64 1
- %tmp16949 = getelementptr inbounds float* %tmp16948, i64 1
- %tmp16950 = getelementptr inbounds float* %tmp16949, i64 1
- %tmp16951 = getelementptr inbounds float* %tmp16950, i64 1
- %tmp16952 = getelementptr inbounds float* %tmp16951, i64 1
- %tmp16953 = getelementptr inbounds float* %tmp16952, i64 1
- %tmp16954 = getelementptr inbounds float* %tmp16953, i64 1
- %tmp16955 = getelementptr inbounds float* %tmp16954, i64 1
- %tmp16956 = getelementptr inbounds float* %tmp16955, i64 1
- %tmp16957 = getelementptr inbounds float* %tmp16956, i64 1
- %tmp16958 = getelementptr inbounds float* %tmp16957, i64 1
- %tmp16959 = getelementptr inbounds float* %tmp16958, i64 1
- %tmp16960 = getelementptr inbounds float* %tmp16959, i64 1
- %tmp16961 = getelementptr inbounds float* %tmp16960, i64 1
- %tmp16962 = getelementptr inbounds float* %tmp16961, i64 1
- %tmp16963 = getelementptr inbounds float* %tmp16962, i64 1
- %tmp16964 = getelementptr inbounds float* %tmp16963, i64 1
- %tmp16965 = getelementptr inbounds float* %tmp16964, i64 1
- %tmp16966 = getelementptr inbounds float* %tmp16965, i64 1
- %tmp16967 = getelementptr inbounds float* %tmp16966, i64 1
- %tmp16968 = getelementptr inbounds float* %tmp16967, i64 1
- %tmp16969 = getelementptr inbounds float* %tmp16968, i64 1
- %tmp16970 = getelementptr inbounds float* %tmp16969, i64 1
- %tmp16971 = getelementptr inbounds float* %tmp16970, i64 1
- %tmp16972 = getelementptr inbounds float* %tmp16971, i64 1
- %tmp16973 = getelementptr inbounds float* %tmp16972, i64 1
- %tmp16974 = getelementptr inbounds float* %tmp16973, i64 1
- %tmp16975 = getelementptr inbounds float* %tmp16974, i64 1
- %tmp16976 = getelementptr inbounds float* %tmp16975, i64 1
- %tmp16977 = getelementptr inbounds float* %tmp16976, i64 1
- %tmp16978 = getelementptr inbounds float* %tmp16977, i64 1
- %tmp16979 = getelementptr inbounds float* %tmp16978, i64 1
- %tmp16980 = getelementptr inbounds float* %tmp16979, i64 1
- %tmp16981 = getelementptr inbounds float* %tmp16980, i64 1
- %tmp16982 = getelementptr inbounds float* %tmp16981, i64 1
- %tmp16983 = getelementptr inbounds float* %tmp16982, i64 1
- %tmp16984 = getelementptr inbounds float* %tmp16983, i64 1
- %tmp16985 = getelementptr inbounds float* %tmp16984, i64 1
- %tmp16986 = getelementptr inbounds float* %tmp16985, i64 1
- %tmp16987 = getelementptr inbounds float* %tmp16986, i64 1
- %tmp16988 = getelementptr inbounds float* %tmp16987, i64 1
- %tmp16989 = getelementptr inbounds float* %tmp16988, i64 1
- %tmp16990 = getelementptr inbounds float* %tmp16989, i64 1
- %tmp16991 = getelementptr inbounds float* %tmp16990, i64 1
- %tmp16992 = getelementptr inbounds float* %tmp16991, i64 1
- %tmp16993 = getelementptr inbounds float* %tmp16992, i64 1
- %tmp16994 = getelementptr inbounds float* %tmp16993, i64 1
- %tmp16995 = getelementptr inbounds float* %tmp16994, i64 1
- %tmp16996 = getelementptr inbounds float* %tmp16995, i64 1
- %tmp16997 = getelementptr inbounds float* %tmp16996, i64 1
- %tmp16998 = getelementptr inbounds float* %tmp16997, i64 1
- %tmp16999 = getelementptr inbounds float* %tmp16998, i64 1
- %tmp17000 = getelementptr inbounds float* %tmp16999, i64 1
- %tmp17001 = getelementptr inbounds float* %tmp17000, i64 1
- %tmp17002 = getelementptr inbounds float* %tmp17001, i64 1
- %tmp17003 = getelementptr inbounds float* %tmp17002, i64 1
- %tmp17004 = getelementptr inbounds float* %tmp17003, i64 1
- %tmp17005 = getelementptr inbounds float* %tmp17004, i64 1
- %tmp17006 = getelementptr inbounds float* %tmp17005, i64 1
- %tmp17007 = getelementptr inbounds float* %tmp17006, i64 1
- %tmp17008 = getelementptr inbounds float* %tmp17007, i64 1
- %tmp17009 = getelementptr inbounds float* %tmp17008, i64 1
- %tmp17010 = getelementptr inbounds float* %tmp17009, i64 1
- %tmp17011 = getelementptr inbounds float* %tmp17010, i64 1
- %tmp17012 = getelementptr inbounds float* %tmp17011, i64 1
- %tmp17013 = getelementptr inbounds float* %tmp17012, i64 1
- %tmp17014 = getelementptr inbounds float* %tmp17013, i64 1
- %tmp17015 = getelementptr inbounds float* %tmp17014, i64 1
- %tmp17016 = getelementptr inbounds float* %tmp17015, i64 1
- %tmp17017 = getelementptr inbounds float* %tmp17016, i64 1
- %tmp17018 = getelementptr inbounds float* %tmp17017, i64 1
- %tmp17019 = getelementptr inbounds float* %tmp17018, i64 1
- %tmp17020 = getelementptr inbounds float* %tmp17019, i64 1
- %tmp17021 = getelementptr inbounds float* %tmp17020, i64 1
- %tmp17022 = getelementptr inbounds float* %tmp17021, i64 1
- %tmp17023 = getelementptr inbounds float* %tmp17022, i64 1
- %tmp17024 = getelementptr inbounds float* %tmp17023, i64 1
- %tmp17025 = getelementptr inbounds float* %tmp17024, i64 1
- %tmp17026 = getelementptr inbounds float* %tmp17025, i64 1
- %tmp17027 = getelementptr inbounds float* %tmp17026, i64 1
- %tmp17028 = getelementptr inbounds float* %tmp17027, i64 1
- %tmp17029 = getelementptr inbounds float* %tmp17028, i64 1
- %tmp17030 = getelementptr inbounds float* %tmp17029, i64 1
- %tmp17031 = getelementptr inbounds float* %tmp17030, i64 1
- %tmp17032 = getelementptr inbounds float* %tmp17031, i64 1
- %tmp17033 = getelementptr inbounds float* %tmp17032, i64 1
- %tmp17034 = getelementptr inbounds float* %tmp17033, i64 1
- %tmp17035 = getelementptr inbounds float* %tmp17034, i64 1
- %tmp17036 = getelementptr inbounds float* %tmp17035, i64 1
- %tmp17037 = getelementptr inbounds float* %tmp17036, i64 1
- %tmp17038 = getelementptr inbounds float* %tmp17037, i64 1
- %tmp17039 = getelementptr inbounds float* %tmp17038, i64 1
- %tmp17040 = getelementptr inbounds float* %tmp17039, i64 1
- %tmp17041 = getelementptr inbounds float* %tmp17040, i64 1
- %tmp17042 = getelementptr inbounds float* %tmp17041, i64 1
- %tmp17043 = getelementptr inbounds float* %tmp17042, i64 1
- %tmp17044 = getelementptr inbounds float* %tmp17043, i64 1
- %tmp17045 = getelementptr inbounds float* %tmp17044, i64 1
- %tmp17046 = getelementptr inbounds float* %tmp17045, i64 1
- %tmp17047 = getelementptr inbounds float* %tmp17046, i64 1
- %tmp17048 = getelementptr inbounds float* %tmp17047, i64 1
- %tmp17049 = getelementptr inbounds float* %tmp17048, i64 1
- %tmp17050 = getelementptr inbounds float* %tmp17049, i64 1
- %tmp17051 = getelementptr inbounds float* %tmp17050, i64 1
- %tmp17052 = getelementptr inbounds float* %tmp17051, i64 1
- %tmp17053 = getelementptr inbounds float* %tmp17052, i64 1
- %tmp17054 = getelementptr inbounds float* %tmp17053, i64 1
- %tmp17055 = getelementptr inbounds float* %tmp17054, i64 1
- %tmp17056 = getelementptr inbounds float* %tmp17055, i64 1
- %tmp17057 = getelementptr inbounds float* %tmp17056, i64 1
- %tmp17058 = getelementptr inbounds float* %tmp17057, i64 1
- %tmp17059 = getelementptr inbounds float* %tmp17058, i64 1
- %tmp17060 = getelementptr inbounds float* %tmp17059, i64 1
- %tmp17061 = getelementptr inbounds float* %tmp17060, i64 1
- %tmp17062 = getelementptr inbounds float* %tmp17061, i64 1
- %tmp17063 = getelementptr inbounds float* %tmp17062, i64 1
- %tmp17064 = getelementptr inbounds float* %tmp17063, i64 1
- %tmp17065 = getelementptr inbounds float* %tmp17064, i64 1
- %tmp17066 = getelementptr inbounds float* %tmp17065, i64 1
- %tmp17067 = getelementptr inbounds float* %tmp17066, i64 1
- %tmp17068 = getelementptr inbounds float* %tmp17067, i64 1
- %tmp17069 = getelementptr inbounds float* %tmp17068, i64 1
- %tmp17070 = getelementptr inbounds float* %tmp17069, i64 1
- %tmp17071 = getelementptr inbounds float* %tmp17070, i64 1
- %tmp17072 = getelementptr inbounds float* %tmp17071, i64 1
- %tmp17073 = getelementptr inbounds float* %tmp17072, i64 1
- %tmp17074 = getelementptr inbounds float* %tmp17073, i64 1
- %tmp17075 = getelementptr inbounds float* %tmp17074, i64 1
- %tmp17076 = getelementptr inbounds float* %tmp17075, i64 1
- %tmp17077 = getelementptr inbounds float* %tmp17076, i64 1
- %tmp17078 = getelementptr inbounds float* %tmp17077, i64 1
- %tmp17079 = getelementptr inbounds float* %tmp17078, i64 1
- %tmp17080 = getelementptr inbounds float* %tmp17079, i64 1
- %tmp17081 = getelementptr inbounds float* %tmp17080, i64 1
- %tmp17082 = getelementptr inbounds float* %tmp17081, i64 1
- %tmp17083 = getelementptr inbounds float* %tmp17082, i64 1
- %tmp17084 = getelementptr inbounds float* %tmp17083, i64 1
- %tmp17085 = getelementptr inbounds float* %tmp17084, i64 1
- %tmp17086 = getelementptr inbounds float* %tmp17085, i64 1
- %tmp17087 = getelementptr inbounds float* %tmp17086, i64 1
- %tmp17088 = getelementptr inbounds float* %tmp17087, i64 1
- %tmp17089 = getelementptr inbounds float* %tmp17088, i64 1
- %tmp17090 = getelementptr inbounds float* %tmp17089, i64 1
- %tmp17091 = getelementptr inbounds float* %tmp17090, i64 1
- %tmp17092 = getelementptr inbounds float* %tmp17091, i64 1
- %tmp17093 = getelementptr inbounds float* %tmp17092, i64 1
- %tmp17094 = getelementptr inbounds float* %tmp17093, i64 1
- %tmp17095 = getelementptr inbounds float* %tmp17094, i64 1
- %tmp17096 = getelementptr inbounds float* %tmp17095, i64 1
- %tmp17097 = getelementptr inbounds float* %tmp17096, i64 1
- %tmp17098 = getelementptr inbounds float* %tmp17097, i64 1
- %tmp17099 = getelementptr inbounds float* %tmp17098, i64 1
- %tmp17100 = getelementptr inbounds float* %tmp17099, i64 1
- %tmp17101 = getelementptr inbounds float* %tmp17100, i64 1
- %tmp17102 = getelementptr inbounds float* %tmp17101, i64 1
- %tmp17103 = getelementptr inbounds float* %tmp17102, i64 1
- %tmp17104 = getelementptr inbounds float* %tmp17103, i64 1
- %tmp17105 = getelementptr inbounds float* %tmp17104, i64 1
- %tmp17106 = getelementptr inbounds float* %tmp17105, i64 1
- %tmp17107 = getelementptr inbounds float* %tmp17106, i64 1
- %tmp17108 = getelementptr inbounds float* %tmp17107, i64 1
- %tmp17109 = getelementptr inbounds float* %tmp17108, i64 1
- %tmp17110 = getelementptr inbounds float* %tmp17109, i64 1
- %tmp17111 = getelementptr inbounds float* %tmp17110, i64 1
- %tmp17112 = getelementptr inbounds float* %tmp17111, i64 1
- %tmp17113 = getelementptr inbounds float* %tmp17112, i64 1
- %tmp17114 = getelementptr inbounds float* %tmp17113, i64 1
- %tmp17115 = getelementptr inbounds float* %tmp17114, i64 1
- %tmp17116 = getelementptr inbounds float* %tmp17115, i64 1
- %tmp17117 = getelementptr inbounds float* %tmp17116, i64 1
- %tmp17118 = getelementptr inbounds float* %tmp17117, i64 1
- %tmp17119 = getelementptr inbounds float* %tmp17118, i64 1
- %tmp17120 = getelementptr inbounds float* %tmp17119, i64 1
- %tmp17121 = getelementptr inbounds float* %tmp17120, i64 1
- %tmp17122 = getelementptr inbounds float* %tmp17121, i64 1
- %tmp17123 = getelementptr inbounds float* %tmp17122, i64 1
- %tmp17124 = getelementptr inbounds float* %tmp17123, i64 1
- %tmp17125 = getelementptr inbounds float* %tmp17124, i64 1
- %tmp17126 = getelementptr inbounds float* %tmp17125, i64 1
- %tmp17127 = getelementptr inbounds float* %tmp17126, i64 1
- %tmp17128 = getelementptr inbounds float* %tmp17127, i64 1
- %tmp17129 = getelementptr inbounds float* %tmp17128, i64 1
- %tmp17130 = getelementptr inbounds float* %tmp17129, i64 1
- %tmp17131 = getelementptr inbounds float* %tmp17130, i64 1
- %tmp17132 = getelementptr inbounds float* %tmp17131, i64 1
- %tmp17133 = getelementptr inbounds float* %tmp17132, i64 1
- %tmp17134 = getelementptr inbounds float* %tmp17133, i64 1
- %tmp17135 = getelementptr inbounds float* %tmp17134, i64 1
- %tmp17136 = getelementptr inbounds float* %tmp17135, i64 1
- %tmp17137 = getelementptr inbounds float* %tmp17136, i64 1
- %tmp17138 = getelementptr inbounds float* %tmp17137, i64 1
- %tmp17139 = getelementptr inbounds float* %tmp17138, i64 1
- %tmp17140 = getelementptr inbounds float* %tmp17139, i64 1
- %tmp17141 = getelementptr inbounds float* %tmp17140, i64 1
- %tmp17142 = getelementptr inbounds float* %tmp17141, i64 1
- %tmp17143 = getelementptr inbounds float* %tmp17142, i64 1
- %tmp17144 = getelementptr inbounds float* %tmp17143, i64 1
- %tmp17145 = getelementptr inbounds float* %tmp17144, i64 1
- %tmp17146 = getelementptr inbounds float* %tmp17145, i64 1
- %tmp17147 = getelementptr inbounds float* %tmp17146, i64 1
- %tmp17148 = getelementptr inbounds float* %tmp17147, i64 1
- %tmp17149 = getelementptr inbounds float* %tmp17148, i64 1
- %tmp17150 = getelementptr inbounds float* %tmp17149, i64 1
- %tmp17151 = getelementptr inbounds float* %tmp17150, i64 1
- %tmp17152 = getelementptr inbounds float* %tmp17151, i64 1
- %tmp17153 = getelementptr inbounds float* %tmp17152, i64 1
- %tmp17154 = getelementptr inbounds float* %tmp17153, i64 1
- %tmp17155 = getelementptr inbounds float* %tmp17154, i64 1
- %tmp17156 = getelementptr inbounds float* %tmp17155, i64 1
- %tmp17157 = getelementptr inbounds float* %tmp17156, i64 1
- %tmp17158 = getelementptr inbounds float* %tmp17157, i64 1
- %tmp17159 = getelementptr inbounds float* %tmp17158, i64 1
- %tmp17160 = getelementptr inbounds float* %tmp17159, i64 1
- %tmp17161 = getelementptr inbounds float* %tmp17160, i64 1
- %tmp17162 = getelementptr inbounds float* %tmp17161, i64 1
- %tmp17163 = getelementptr inbounds float* %tmp17162, i64 1
- %tmp17164 = getelementptr inbounds float* %tmp17163, i64 1
- %tmp17165 = getelementptr inbounds float* %tmp17164, i64 1
- %tmp17166 = getelementptr inbounds float* %tmp17165, i64 1
- %tmp17167 = getelementptr inbounds float* %tmp17166, i64 1
- %tmp17168 = getelementptr inbounds float* %tmp17167, i64 1
- %tmp17169 = getelementptr inbounds float* %tmp17168, i64 1
- %tmp17170 = getelementptr inbounds float* %tmp17169, i64 1
- %tmp17171 = getelementptr inbounds float* %tmp17170, i64 1
- %tmp17172 = getelementptr inbounds float* %tmp17171, i64 1
- %tmp17173 = getelementptr inbounds float* %tmp17172, i64 1
- %tmp17174 = getelementptr inbounds float* %tmp17173, i64 1
- %tmp17175 = getelementptr inbounds float* %tmp17174, i64 1
- %tmp17176 = getelementptr inbounds float* %tmp17175, i64 1
- %tmp17177 = getelementptr inbounds float* %tmp17176, i64 1
- %tmp17178 = getelementptr inbounds float* %tmp17177, i64 1
- %tmp17179 = getelementptr inbounds float* %tmp17178, i64 1
- %tmp17180 = getelementptr inbounds float* %tmp17179, i64 1
- %tmp17181 = getelementptr inbounds float* %tmp17180, i64 1
- %tmp17182 = getelementptr inbounds float* %tmp17181, i64 1
- %tmp17183 = getelementptr inbounds float* %tmp17182, i64 1
- %tmp17184 = getelementptr inbounds float* %tmp17183, i64 1
- %tmp17185 = getelementptr inbounds float* %tmp17184, i64 1
- %tmp17186 = getelementptr inbounds float* %tmp17185, i64 1
- %tmp17187 = getelementptr inbounds float* %tmp17186, i64 1
- %tmp17188 = getelementptr inbounds float* %tmp17187, i64 1
- %tmp17189 = getelementptr inbounds float* %tmp17188, i64 1
- %tmp17190 = getelementptr inbounds float* %tmp17189, i64 1
- %tmp17191 = getelementptr inbounds float* %tmp17190, i64 1
- %tmp17192 = getelementptr inbounds float* %tmp17191, i64 1
- %tmp17193 = getelementptr inbounds float* %tmp17192, i64 1
- %tmp17194 = getelementptr inbounds float* %tmp17193, i64 1
- %tmp17195 = getelementptr inbounds float* %tmp17194, i64 1
- %tmp17196 = getelementptr inbounds float* %tmp17195, i64 1
- %tmp17197 = getelementptr inbounds float* %tmp17196, i64 1
- %tmp17198 = getelementptr inbounds float* %tmp17197, i64 1
- %tmp17199 = getelementptr inbounds float* %tmp17198, i64 1
- %tmp17200 = getelementptr inbounds float* %tmp17199, i64 1
- %tmp17201 = getelementptr inbounds float* %tmp17200, i64 1
- %tmp17202 = getelementptr inbounds float* %tmp17201, i64 1
- %tmp17203 = getelementptr inbounds float* %tmp17202, i64 1
- %tmp17204 = getelementptr inbounds float* %tmp17203, i64 1
- %tmp17205 = getelementptr inbounds float* %tmp17204, i64 1
- %tmp17206 = getelementptr inbounds float* %tmp17205, i64 1
- %tmp17207 = getelementptr inbounds float* %tmp17206, i64 1
- %tmp17208 = getelementptr inbounds float* %tmp17207, i64 1
- %tmp17209 = getelementptr inbounds float* %tmp17208, i64 1
- %tmp17210 = getelementptr inbounds float* %tmp17209, i64 1
- %tmp17211 = getelementptr inbounds float* %tmp17210, i64 1
- %tmp17212 = getelementptr inbounds float* %tmp17211, i64 1
- %tmp17213 = getelementptr inbounds float* %tmp17212, i64 1
- %tmp17214 = getelementptr inbounds float* %tmp17213, i64 1
- %tmp17215 = getelementptr inbounds float* %tmp17214, i64 1
- %tmp17216 = getelementptr inbounds float* %tmp17215, i64 1
- %tmp17217 = getelementptr inbounds float* %tmp17216, i64 1
- %tmp17218 = getelementptr inbounds float* %tmp17217, i64 1
- %tmp17219 = getelementptr inbounds float* %tmp17218, i64 1
- %tmp17220 = getelementptr inbounds float* %tmp17219, i64 1
- %tmp17221 = getelementptr inbounds float* %tmp17220, i64 1
- %tmp17222 = getelementptr inbounds float* %tmp17221, i64 1
- %tmp17223 = getelementptr inbounds float* %tmp17222, i64 1
- %tmp17224 = getelementptr inbounds float* %tmp17223, i64 1
- %tmp17225 = getelementptr inbounds float* %tmp17224, i64 1
- %tmp17226 = getelementptr inbounds float* %tmp17225, i64 1
- %tmp17227 = getelementptr inbounds float* %tmp17226, i64 1
- %tmp17228 = getelementptr inbounds float* %tmp17227, i64 1
- %tmp17229 = getelementptr inbounds float* %tmp17228, i64 1
- %tmp17230 = getelementptr inbounds float* %tmp17229, i64 1
- %tmp17231 = getelementptr inbounds float* %tmp17230, i64 1
- %tmp17232 = getelementptr inbounds float* %tmp17231, i64 1
- %tmp17233 = getelementptr inbounds float* %tmp17232, i64 1
- %tmp17234 = getelementptr inbounds float* %tmp17233, i64 1
- %tmp17235 = getelementptr inbounds float* %tmp17234, i64 1
- %tmp17236 = getelementptr inbounds float* %tmp17235, i64 1
- %tmp17237 = getelementptr inbounds float* %tmp17236, i64 1
- %tmp17238 = getelementptr inbounds float* %tmp17237, i64 1
- %tmp17239 = getelementptr inbounds float* %tmp17238, i64 1
- %tmp17240 = getelementptr inbounds float* %tmp17239, i64 1
- %tmp17241 = getelementptr inbounds float* %tmp17240, i64 1
- %tmp17242 = getelementptr inbounds float* %tmp17241, i64 1
- %tmp17243 = getelementptr inbounds float* %tmp17242, i64 1
- %tmp17244 = getelementptr inbounds float* %tmp17243, i64 1
- %tmp17245 = getelementptr inbounds float* %tmp17244, i64 1
- %tmp17246 = getelementptr inbounds float* %tmp17245, i64 1
- %tmp17247 = getelementptr inbounds float* %tmp17246, i64 1
- %tmp17248 = getelementptr inbounds float* %tmp17247, i64 1
- %tmp17249 = getelementptr inbounds float* %tmp17248, i64 1
- %tmp17250 = getelementptr inbounds float* %tmp17249, i64 1
- %tmp17251 = getelementptr inbounds float* %tmp17250, i64 1
- %tmp17252 = getelementptr inbounds float* %tmp17251, i64 1
- %tmp17253 = getelementptr inbounds float* %tmp17252, i64 1
- %tmp17254 = getelementptr inbounds float* %tmp17253, i64 1
- %tmp17255 = getelementptr inbounds float* %tmp17254, i64 1
- %tmp17256 = getelementptr inbounds float* %tmp17255, i64 1
- %tmp17257 = getelementptr inbounds float* %tmp17256, i64 1
- %tmp17258 = getelementptr inbounds float* %tmp17257, i64 1
- %tmp17259 = getelementptr inbounds float* %tmp17258, i64 1
- %tmp17260 = getelementptr inbounds float* %tmp17259, i64 1
- %tmp17261 = getelementptr inbounds float* %tmp17260, i64 1
- %tmp17262 = getelementptr inbounds float* %tmp17261, i64 1
- %tmp17263 = getelementptr inbounds float* %tmp17262, i64 1
- %tmp17264 = getelementptr inbounds float* %tmp17263, i64 1
- %tmp17265 = getelementptr inbounds float* %tmp17264, i64 1
- %tmp17266 = getelementptr inbounds float* %tmp17265, i64 1
- %tmp17267 = getelementptr inbounds float* %tmp17266, i64 1
- %tmp17268 = getelementptr inbounds float* %tmp17267, i64 1
- %tmp17269 = getelementptr inbounds float* %tmp17268, i64 1
- %tmp17270 = getelementptr inbounds float* %tmp17269, i64 1
- %tmp17271 = getelementptr inbounds float* %tmp17270, i64 1
- %tmp17272 = getelementptr inbounds float* %tmp17271, i64 1
- %tmp17273 = getelementptr inbounds float* %tmp17272, i64 1
- %tmp17274 = getelementptr inbounds float* %tmp17273, i64 1
- %tmp17275 = getelementptr inbounds float* %tmp17274, i64 1
- %tmp17276 = getelementptr inbounds float* %tmp17275, i64 1
- %tmp17277 = getelementptr inbounds float* %tmp17276, i64 1
- %tmp17278 = getelementptr inbounds float* %tmp17277, i64 1
- %tmp17279 = getelementptr inbounds float* %tmp17278, i64 1
- %tmp17280 = getelementptr inbounds float* %tmp17279, i64 1
- %tmp17281 = getelementptr inbounds float* %tmp17280, i64 1
- %tmp17282 = getelementptr inbounds float* %tmp17281, i64 1
- %tmp17283 = getelementptr inbounds float* %tmp17282, i64 1
- %tmp17284 = getelementptr inbounds float* %tmp17283, i64 1
- %tmp17285 = getelementptr inbounds float* %tmp17284, i64 1
- %tmp17286 = getelementptr inbounds float* %tmp17285, i64 1
- %tmp17287 = getelementptr inbounds float* %tmp17286, i64 1
- %tmp17288 = getelementptr inbounds float* %tmp17287, i64 1
- %tmp17289 = getelementptr inbounds float* %tmp17288, i64 1
- %tmp17290 = getelementptr inbounds float* %tmp17289, i64 1
- %tmp17291 = getelementptr inbounds float* %tmp17290, i64 1
- %tmp17292 = getelementptr inbounds float* %tmp17291, i64 1
- %tmp17293 = getelementptr inbounds float* %tmp17292, i64 1
- %tmp17294 = getelementptr inbounds float* %tmp17293, i64 1
- %tmp17295 = getelementptr inbounds float* %tmp17294, i64 1
- %tmp17296 = getelementptr inbounds float* %tmp17295, i64 1
- %tmp17297 = getelementptr inbounds float* %tmp17296, i64 1
- %tmp17298 = getelementptr inbounds float* %tmp17297, i64 1
- %tmp17299 = getelementptr inbounds float* %tmp17298, i64 1
- %tmp17300 = getelementptr inbounds float* %tmp17299, i64 1
- %tmp17301 = getelementptr inbounds float* %tmp17300, i64 1
- %tmp17302 = getelementptr inbounds float* %tmp17301, i64 1
- %tmp17303 = getelementptr inbounds float* %tmp17302, i64 1
- %tmp17304 = getelementptr inbounds float* %tmp17303, i64 1
- %tmp17305 = getelementptr inbounds float* %tmp17304, i64 1
- %tmp17306 = getelementptr inbounds float* %tmp17305, i64 1
- %tmp17307 = getelementptr inbounds float* %tmp17306, i64 1
- %tmp17308 = getelementptr inbounds float* %tmp17307, i64 1
- %tmp17309 = getelementptr inbounds float* %tmp17308, i64 1
- %tmp17310 = getelementptr inbounds float* %tmp17309, i64 1
- %tmp17311 = getelementptr inbounds float* %tmp17310, i64 1
- %tmp17312 = getelementptr inbounds float* %tmp17311, i64 1
- %tmp17313 = getelementptr inbounds float* %tmp17312, i64 1
- %tmp17314 = getelementptr inbounds float* %tmp17313, i64 1
- %tmp17315 = getelementptr inbounds float* %tmp17314, i64 1
- %tmp17316 = getelementptr inbounds float* %tmp17315, i64 1
- %tmp17317 = getelementptr inbounds float* %tmp17316, i64 1
- %tmp17318 = getelementptr inbounds float* %tmp17317, i64 1
- %tmp17319 = getelementptr inbounds float* %tmp17318, i64 1
- %tmp17320 = getelementptr inbounds float* %tmp17319, i64 1
- %tmp17321 = getelementptr inbounds float* %tmp17320, i64 1
- %tmp17322 = getelementptr inbounds float* %tmp17321, i64 1
- %tmp17323 = getelementptr inbounds float* %tmp17322, i64 1
- %tmp17324 = getelementptr inbounds float* %tmp17323, i64 1
- %tmp17325 = getelementptr inbounds float* %tmp17324, i64 1
- %tmp17326 = getelementptr inbounds float* %tmp17325, i64 1
- %tmp17327 = getelementptr inbounds float* %tmp17326, i64 1
- %tmp17328 = getelementptr inbounds float* %tmp17327, i64 1
- %tmp17329 = getelementptr inbounds float* %tmp17328, i64 1
- %tmp17330 = getelementptr inbounds float* %tmp17329, i64 1
- %tmp17331 = getelementptr inbounds float* %tmp17330, i64 1
- %tmp17332 = getelementptr inbounds float* %tmp17331, i64 1
- %tmp17333 = getelementptr inbounds float* %tmp17332, i64 1
- %tmp17334 = getelementptr inbounds float* %tmp17333, i64 1
- %tmp17335 = getelementptr inbounds float* %tmp17334, i64 1
- %tmp17336 = getelementptr inbounds float* %tmp17335, i64 1
- %tmp17337 = getelementptr inbounds float* %tmp17336, i64 1
- %tmp17338 = getelementptr inbounds float* %tmp17337, i64 1
- %tmp17339 = getelementptr inbounds float* %tmp17338, i64 1
- %tmp17340 = getelementptr inbounds float* %tmp17339, i64 1
- %tmp17341 = getelementptr inbounds float* %tmp17340, i64 1
- %tmp17342 = getelementptr inbounds float* %tmp17341, i64 1
- %tmp17343 = getelementptr inbounds float* %tmp17342, i64 1
- %tmp17344 = getelementptr inbounds float* %tmp17343, i64 1
- %tmp17345 = getelementptr inbounds float* %tmp17344, i64 1
- %tmp17346 = getelementptr inbounds float* %tmp17345, i64 1
- %tmp17347 = getelementptr inbounds float* %tmp17346, i64 1
- %tmp17348 = getelementptr inbounds float* %tmp17347, i64 1
- %tmp17349 = getelementptr inbounds float* %tmp17348, i64 1
- %tmp17350 = getelementptr inbounds float* %tmp17349, i64 1
- %tmp17351 = getelementptr inbounds float* %tmp17350, i64 1
- %tmp17352 = getelementptr inbounds float* %tmp17351, i64 1
- %tmp17353 = getelementptr inbounds float* %tmp17352, i64 1
- %tmp17354 = getelementptr inbounds float* %tmp17353, i64 1
- %tmp17355 = getelementptr inbounds float* %tmp17354, i64 1
- %tmp17356 = getelementptr inbounds float* %tmp17355, i64 1
- %tmp17357 = getelementptr inbounds float* %tmp17356, i64 1
- %tmp17358 = getelementptr inbounds float* %tmp17357, i64 1
- %tmp17359 = getelementptr inbounds float* %tmp17358, i64 1
- %tmp17360 = getelementptr inbounds float* %tmp17359, i64 1
- %tmp17361 = getelementptr inbounds float* %tmp17360, i64 1
- %tmp17362 = getelementptr inbounds float* %tmp17361, i64 1
- %tmp17363 = getelementptr inbounds float* %tmp17362, i64 1
- %tmp17364 = getelementptr inbounds float* %tmp17363, i64 1
- %tmp17365 = getelementptr inbounds float* %tmp17364, i64 1
- %tmp17366 = getelementptr inbounds float* %tmp17365, i64 1
- %tmp17367 = getelementptr inbounds float* %tmp17366, i64 1
- %tmp17368 = getelementptr inbounds float* %tmp17367, i64 1
- %tmp17369 = getelementptr inbounds float* %tmp17368, i64 1
- %tmp17370 = getelementptr inbounds float* %tmp17369, i64 1
- %tmp17371 = getelementptr inbounds float* %tmp17370, i64 1
- %tmp17372 = getelementptr inbounds float* %tmp17371, i64 1
- %tmp17373 = getelementptr inbounds float* %tmp17372, i64 1
- %tmp17374 = getelementptr inbounds float* %tmp17373, i64 1
- %tmp17375 = getelementptr inbounds float* %tmp17374, i64 1
- %tmp17376 = getelementptr inbounds float* %tmp17375, i64 1
- %tmp17377 = getelementptr inbounds float* %tmp17376, i64 1
- %tmp17378 = getelementptr inbounds float* %tmp17377, i64 1
- %tmp17379 = getelementptr inbounds float* %tmp17378, i64 1
- %tmp17380 = getelementptr inbounds float* %tmp17379, i64 1
- %tmp17381 = getelementptr inbounds float* %tmp17380, i64 1
- %tmp17382 = getelementptr inbounds float* %tmp17381, i64 1
- %tmp17383 = getelementptr inbounds float* %tmp17382, i64 1
- %tmp17384 = getelementptr inbounds float* %tmp17383, i64 1
- %tmp17385 = getelementptr inbounds float* %tmp17384, i64 1
- %tmp17386 = getelementptr inbounds float* %tmp17385, i64 1
- %tmp17387 = getelementptr inbounds float* %tmp17386, i64 1
- %tmp17388 = getelementptr inbounds float* %tmp17387, i64 1
- %tmp17389 = getelementptr inbounds float* %tmp17388, i64 1
- %tmp17390 = getelementptr inbounds float* %tmp17389, i64 1
- %tmp17391 = getelementptr inbounds float* %tmp17390, i64 1
- %tmp17392 = getelementptr inbounds float* %tmp17391, i64 1
- %tmp17393 = getelementptr inbounds float* %tmp17392, i64 1
- %tmp17394 = getelementptr inbounds float* %tmp17393, i64 1
- %tmp17395 = getelementptr inbounds float* %tmp17394, i64 1
- %tmp17396 = getelementptr inbounds float* %tmp17395, i64 1
- %tmp17397 = getelementptr inbounds float* %tmp17396, i64 1
- %tmp17398 = getelementptr inbounds float* %tmp17397, i64 1
- %tmp17399 = getelementptr inbounds float* %tmp17398, i64 1
- %tmp17400 = getelementptr inbounds float* %tmp17399, i64 1
- %tmp17401 = getelementptr inbounds float* %tmp17400, i64 1
- %tmp17402 = getelementptr inbounds float* %tmp17401, i64 1
- %tmp17403 = getelementptr inbounds float* %tmp17402, i64 1
- %tmp17404 = getelementptr inbounds float* %tmp17403, i64 1
- %tmp17405 = getelementptr inbounds float* %tmp17404, i64 1
- %tmp17406 = getelementptr inbounds float* %tmp17405, i64 1
- %tmp17407 = getelementptr inbounds float* %tmp17406, i64 1
- %tmp17408 = getelementptr inbounds float* %tmp17407, i64 1
- %tmp17409 = getelementptr inbounds float* %tmp17408, i64 1
- %tmp17410 = getelementptr inbounds float* %tmp17409, i64 1
- %tmp17411 = getelementptr inbounds float* %tmp17410, i64 1
- %tmp17412 = getelementptr inbounds float* %tmp17411, i64 1
- %tmp17413 = getelementptr inbounds float* %tmp17412, i64 1
- %tmp17414 = getelementptr inbounds float* %tmp17413, i64 1
- %tmp17415 = getelementptr inbounds float* %tmp17414, i64 1
- %tmp17416 = getelementptr inbounds float* %tmp17415, i64 1
- %tmp17417 = getelementptr inbounds float* %tmp17416, i64 1
- %tmp17418 = getelementptr inbounds float* %tmp17417, i64 1
- %tmp17419 = getelementptr inbounds float* %tmp17418, i64 1
- %tmp17420 = getelementptr inbounds float* %tmp17419, i64 1
- %tmp17421 = getelementptr inbounds float* %tmp17420, i64 1
- %tmp17422 = getelementptr inbounds float* %tmp17421, i64 1
- %tmp17423 = getelementptr inbounds float* %tmp17422, i64 1
- %tmp17424 = getelementptr inbounds float* %tmp17423, i64 1
- %tmp17425 = getelementptr inbounds float* %tmp17424, i64 1
- %tmp17426 = getelementptr inbounds float* %tmp17425, i64 1
- %tmp17427 = getelementptr inbounds float* %tmp17426, i64 1
- %tmp17428 = getelementptr inbounds float* %tmp17427, i64 1
- %tmp17429 = getelementptr inbounds float* %tmp17428, i64 1
- %tmp17430 = getelementptr inbounds float* %tmp17429, i64 1
- %tmp17431 = getelementptr inbounds float* %tmp17430, i64 1
- %tmp17432 = getelementptr inbounds float* %tmp17431, i64 1
- %tmp17433 = getelementptr inbounds float* %tmp17432, i64 1
- %tmp17434 = getelementptr inbounds float* %tmp17433, i64 1
- %tmp17435 = getelementptr inbounds float* %tmp17434, i64 1
- %tmp17436 = getelementptr inbounds float* %tmp17435, i64 1
- %tmp17437 = getelementptr inbounds float* %tmp17436, i64 1
- %tmp17438 = getelementptr inbounds float* %tmp17437, i64 1
- %tmp17439 = getelementptr inbounds float* %tmp17438, i64 1
- %tmp17440 = getelementptr inbounds float* %tmp17439, i64 1
- %tmp17441 = getelementptr inbounds float* %tmp17440, i64 1
- %tmp17442 = getelementptr inbounds float* %tmp17441, i64 1
- %tmp17443 = getelementptr inbounds float* %tmp17442, i64 1
- %tmp17444 = getelementptr inbounds float* %tmp17443, i64 1
- %tmp17445 = getelementptr inbounds float* %tmp17444, i64 1
- %tmp17446 = getelementptr inbounds float* %tmp17445, i64 1
- %tmp17447 = getelementptr inbounds float* %tmp17446, i64 1
- %tmp17448 = getelementptr inbounds float* %tmp17447, i64 1
- %tmp17449 = getelementptr inbounds float* %tmp17448, i64 1
- %tmp17450 = getelementptr inbounds float* %tmp17449, i64 1
- %tmp17451 = getelementptr inbounds float* %tmp17450, i64 1
- %tmp17452 = getelementptr inbounds float* %tmp17451, i64 1
- %tmp17453 = getelementptr inbounds float* %tmp17452, i64 1
- %tmp17454 = getelementptr inbounds float* %tmp17453, i64 1
- %tmp17455 = getelementptr inbounds float* %tmp17454, i64 1
- %tmp17456 = getelementptr inbounds float* %tmp17455, i64 1
- %tmp17457 = getelementptr inbounds float* %tmp17456, i64 1
- %tmp17458 = getelementptr inbounds float* %tmp17457, i64 1
- %tmp17459 = getelementptr inbounds float* %tmp17458, i64 1
- %tmp17460 = getelementptr inbounds float* %tmp17459, i64 1
- %tmp17461 = getelementptr inbounds float* %tmp17460, i64 1
- %tmp17462 = getelementptr inbounds float* %tmp17461, i64 1
- %tmp17463 = getelementptr inbounds float* %tmp17462, i64 1
- %tmp17464 = getelementptr inbounds float* %tmp17463, i64 1
- %tmp17465 = getelementptr inbounds float* %tmp17464, i64 1
- %tmp17466 = getelementptr inbounds float* %tmp17465, i64 1
- %tmp17467 = getelementptr inbounds float* %tmp17466, i64 1
- %tmp17468 = getelementptr inbounds float* %tmp17467, i64 1
- %tmp17469 = getelementptr inbounds float* %tmp17468, i64 1
- %tmp17470 = getelementptr inbounds float* %tmp17469, i64 1
- %tmp17471 = getelementptr inbounds float* %tmp17470, i64 1
- %tmp17472 = getelementptr inbounds float* %tmp17471, i64 1
- %tmp17473 = getelementptr inbounds float* %tmp17472, i64 1
- %tmp17474 = getelementptr inbounds float* %tmp17473, i64 1
- %tmp17475 = getelementptr inbounds float* %tmp17474, i64 1
- %tmp17476 = getelementptr inbounds float* %tmp17475, i64 1
- %tmp17477 = getelementptr inbounds float* %tmp17476, i64 1
- %tmp17478 = getelementptr inbounds float* %tmp17477, i64 1
- %tmp17479 = getelementptr inbounds float* %tmp17478, i64 1
- %tmp17480 = getelementptr inbounds float* %tmp17479, i64 1
- %tmp17481 = getelementptr inbounds float* %tmp17480, i64 1
- %tmp17482 = getelementptr inbounds float* %tmp17481, i64 1
- %tmp17483 = getelementptr inbounds float* %tmp17482, i64 1
- %tmp17484 = getelementptr inbounds float* %tmp17483, i64 1
- %tmp17485 = getelementptr inbounds float* %tmp17484, i64 1
- %tmp17486 = getelementptr inbounds float* %tmp17485, i64 1
- %tmp17487 = getelementptr inbounds float* %tmp17486, i64 1
- %tmp17488 = getelementptr inbounds float* %tmp17487, i64 1
- %tmp17489 = getelementptr inbounds float* %tmp17488, i64 1
- %tmp17490 = getelementptr inbounds float* %tmp17489, i64 1
- %tmp17491 = getelementptr inbounds float* %tmp17490, i64 1
- %tmp17492 = getelementptr inbounds float* %tmp17491, i64 1
- %tmp17493 = getelementptr inbounds float* %tmp17492, i64 1
- %tmp17494 = getelementptr inbounds float* %tmp17493, i64 1
- %tmp17495 = getelementptr inbounds float* %tmp17494, i64 1
- %tmp17496 = getelementptr inbounds float* %tmp17495, i64 1
- %tmp17497 = getelementptr inbounds float* %tmp17496, i64 1
- %tmp17498 = getelementptr inbounds float* %tmp17497, i64 1
- %tmp17499 = getelementptr inbounds float* %tmp17498, i64 1
- %tmp17500 = getelementptr inbounds float* %tmp17499, i64 1
- %tmp17501 = getelementptr inbounds float* %tmp17500, i64 1
- %tmp17502 = getelementptr inbounds float* %tmp17501, i64 1
- %tmp17503 = getelementptr inbounds float* %tmp17502, i64 1
- %tmp17504 = getelementptr inbounds float* %tmp17503, i64 1
- %tmp17505 = getelementptr inbounds float* %tmp17504, i64 1
- %tmp17506 = getelementptr inbounds float* %tmp17505, i64 1
- %tmp17507 = getelementptr inbounds float* %tmp17506, i64 1
- %tmp17508 = getelementptr inbounds float* %tmp17507, i64 1
- %tmp17509 = getelementptr inbounds float* %tmp17508, i64 1
- %tmp17510 = getelementptr inbounds float* %tmp17509, i64 1
- %tmp17511 = getelementptr inbounds float* %tmp17510, i64 1
- %tmp17512 = getelementptr inbounds float* %tmp17511, i64 1
- %tmp17513 = getelementptr inbounds float* %tmp17512, i64 1
- %tmp17514 = getelementptr inbounds float* %tmp17513, i64 1
- %tmp17515 = getelementptr inbounds float* %tmp17514, i64 1
- %tmp17516 = getelementptr inbounds float* %tmp17515, i64 1
- %tmp17517 = getelementptr inbounds float* %tmp17516, i64 1
- %tmp17518 = getelementptr inbounds float* %tmp17517, i64 1
- %tmp17519 = getelementptr inbounds float* %tmp17518, i64 1
- %tmp17520 = getelementptr inbounds float* %tmp17519, i64 1
- %tmp17521 = getelementptr inbounds float* %tmp17520, i64 1
- %tmp17522 = getelementptr inbounds float* %tmp17521, i64 1
- %tmp17523 = getelementptr inbounds float* %tmp17522, i64 1
- %tmp17524 = getelementptr inbounds float* %tmp17523, i64 1
- %tmp17525 = getelementptr inbounds float* %tmp17524, i64 1
- %tmp17526 = getelementptr inbounds float* %tmp17525, i64 1
- %tmp17527 = getelementptr inbounds float* %tmp17526, i64 1
- %tmp17528 = getelementptr inbounds float* %tmp17527, i64 1
- %tmp17529 = getelementptr inbounds float* %tmp17528, i64 1
- %tmp17530 = getelementptr inbounds float* %tmp17529, i64 1
- %tmp17531 = getelementptr inbounds float* %tmp17530, i64 1
- %tmp17532 = getelementptr inbounds float* %tmp17531, i64 1
- %tmp17533 = getelementptr inbounds float* %tmp17532, i64 1
- %tmp17534 = getelementptr inbounds float* %tmp17533, i64 1
- %tmp17535 = getelementptr inbounds float* %tmp17534, i64 1
- %tmp17536 = getelementptr inbounds float* %tmp17535, i64 1
- %tmp17537 = getelementptr inbounds float* %tmp17536, i64 1
- %tmp17538 = getelementptr inbounds float* %tmp17537, i64 1
- %tmp17539 = getelementptr inbounds float* %tmp17538, i64 1
- %tmp17540 = getelementptr inbounds float* %tmp17539, i64 1
- %tmp17541 = getelementptr inbounds float* %tmp17540, i64 1
- %tmp17542 = getelementptr inbounds float* %tmp17541, i64 1
- %tmp17543 = getelementptr inbounds float* %tmp17542, i64 1
- %tmp17544 = getelementptr inbounds float* %tmp17543, i64 1
- %tmp17545 = getelementptr inbounds float* %tmp17544, i64 1
- %tmp17546 = getelementptr inbounds float* %tmp17545, i64 1
- %tmp17547 = getelementptr inbounds float* %tmp17546, i64 1
- %tmp17548 = getelementptr inbounds float* %tmp17547, i64 1
- %tmp17549 = getelementptr inbounds float* %tmp17548, i64 1
- %tmp17550 = getelementptr inbounds float* %tmp17549, i64 1
- %tmp17551 = getelementptr inbounds float* %tmp17550, i64 1
- %tmp17552 = getelementptr inbounds float* %tmp17551, i64 1
- %tmp17553 = getelementptr inbounds float* %tmp17552, i64 1
- %tmp17554 = getelementptr inbounds float* %tmp17553, i64 1
- %tmp17555 = getelementptr inbounds float* %tmp17554, i64 1
- %tmp17556 = getelementptr inbounds float* %tmp17555, i64 1
- %tmp17557 = getelementptr inbounds float* %tmp17556, i64 1
- %tmp17558 = getelementptr inbounds float* %tmp17557, i64 1
- %tmp17559 = getelementptr inbounds float* %tmp17558, i64 1
- %tmp17560 = getelementptr inbounds float* %tmp17559, i64 1
- %tmp17561 = getelementptr inbounds float* %tmp17560, i64 1
- %tmp17562 = getelementptr inbounds float* %tmp17561, i64 1
- %tmp17563 = getelementptr inbounds float* %tmp17562, i64 1
- %tmp17564 = getelementptr inbounds float* %tmp17563, i64 1
- %tmp17565 = getelementptr inbounds float* %tmp17564, i64 1
- %tmp17566 = getelementptr inbounds float* %tmp17565, i64 1
- %tmp17567 = getelementptr inbounds float* %tmp17566, i64 1
- %tmp17568 = getelementptr inbounds float* %tmp17567, i64 1
- %tmp17569 = getelementptr inbounds float* %tmp17568, i64 1
- %tmp17570 = getelementptr inbounds float* %tmp17569, i64 1
- %tmp17571 = getelementptr inbounds float* %tmp17570, i64 1
- %tmp17572 = getelementptr inbounds float* %tmp17571, i64 1
- %tmp17573 = getelementptr inbounds float* %tmp17572, i64 1
- %tmp17574 = getelementptr inbounds float* %tmp17573, i64 1
- %tmp17575 = getelementptr inbounds float* %tmp17574, i64 1
- %tmp17576 = getelementptr inbounds float* %tmp17575, i64 1
- %tmp17577 = getelementptr inbounds float* %tmp17576, i64 1
- %tmp17578 = getelementptr inbounds float* %tmp17577, i64 1
- %tmp17579 = getelementptr inbounds float* %tmp17578, i64 1
- %tmp17580 = getelementptr inbounds float* %tmp17579, i64 1
- %tmp17581 = getelementptr inbounds float* %tmp17580, i64 1
- %tmp17582 = getelementptr inbounds float* %tmp17581, i64 1
- %tmp17583 = getelementptr inbounds float* %tmp17582, i64 1
- %tmp17584 = getelementptr inbounds float* %tmp17583, i64 1
- %tmp17585 = getelementptr inbounds float* %tmp17584, i64 1
- %tmp17586 = getelementptr inbounds float* %tmp17585, i64 1
- %tmp17587 = getelementptr inbounds float* %tmp17586, i64 1
- %tmp17588 = getelementptr inbounds float* %tmp17587, i64 1
- %tmp17589 = getelementptr inbounds float* %tmp17588, i64 1
- %tmp17590 = getelementptr inbounds float* %tmp17589, i64 1
- %tmp17591 = getelementptr inbounds float* %tmp17590, i64 1
- %tmp17592 = getelementptr inbounds float* %tmp17591, i64 1
- %tmp17593 = getelementptr inbounds float* %tmp17592, i64 1
- %tmp17594 = getelementptr inbounds float* %tmp17593, i64 1
- %tmp17595 = getelementptr inbounds float* %tmp17594, i64 1
- %tmp17596 = getelementptr inbounds float* %tmp17595, i64 1
- %tmp17597 = getelementptr inbounds float* %tmp17596, i64 1
- %tmp17598 = getelementptr inbounds float* %tmp17597, i64 1
- %tmp17599 = getelementptr inbounds float* %tmp17598, i64 1
- %tmp17600 = getelementptr inbounds float* %tmp17599, i64 1
- %tmp17601 = getelementptr inbounds float* %tmp17600, i64 1
- %tmp17602 = getelementptr inbounds float* %tmp17601, i64 1
- %tmp17603 = getelementptr inbounds float* %tmp17602, i64 1
- %tmp17604 = getelementptr inbounds float* %tmp17603, i64 1
- %tmp17605 = getelementptr inbounds float* %tmp17604, i64 1
- %tmp17606 = getelementptr inbounds float* %tmp17605, i64 1
- %tmp17607 = getelementptr inbounds float* %tmp17606, i64 1
- %tmp17608 = getelementptr inbounds float* %tmp17607, i64 1
- %tmp17609 = getelementptr inbounds float* %tmp17608, i64 1
- %tmp17610 = getelementptr inbounds float* %tmp17609, i64 1
- %tmp17611 = getelementptr inbounds float* %tmp17610, i64 1
- %tmp17612 = getelementptr inbounds float* %tmp17611, i64 1
- %tmp17613 = getelementptr inbounds float* %tmp17612, i64 1
- %tmp17614 = getelementptr inbounds float* %tmp17613, i64 1
- %tmp17615 = getelementptr inbounds float* %tmp17614, i64 1
- %tmp17616 = getelementptr inbounds float* %tmp17615, i64 1
- %tmp17617 = getelementptr inbounds float* %tmp17616, i64 1
- %tmp17618 = getelementptr inbounds float* %tmp17617, i64 1
- %tmp17619 = getelementptr inbounds float* %tmp17618, i64 1
- %tmp17620 = getelementptr inbounds float* %tmp17619, i64 1
- %tmp17621 = getelementptr inbounds float* %tmp17620, i64 1
- %tmp17622 = getelementptr inbounds float* %tmp17621, i64 1
- %tmp17623 = getelementptr inbounds float* %tmp17622, i64 1
- %tmp17624 = getelementptr inbounds float* %tmp17623, i64 1
- %tmp17625 = getelementptr inbounds float* %tmp17624, i64 1
- %tmp17626 = getelementptr inbounds float* %tmp17625, i64 1
- %tmp17627 = getelementptr inbounds float* %tmp17626, i64 1
- %tmp17628 = getelementptr inbounds float* %tmp17627, i64 1
- %tmp17629 = getelementptr inbounds float* %tmp17628, i64 1
- %tmp17630 = getelementptr inbounds float* %tmp17629, i64 1
- %tmp17631 = getelementptr inbounds float* %tmp17630, i64 1
- %tmp17632 = getelementptr inbounds float* %tmp17631, i64 1
- %tmp17633 = getelementptr inbounds float* %tmp17632, i64 1
- %tmp17634 = getelementptr inbounds float* %tmp17633, i64 1
- %tmp17635 = getelementptr inbounds float* %tmp17634, i64 1
- %tmp17636 = getelementptr inbounds float* %tmp17635, i64 1
- %tmp17637 = getelementptr inbounds float* %tmp17636, i64 1
- %tmp17638 = getelementptr inbounds float* %tmp17637, i64 1
- %tmp17639 = getelementptr inbounds float* %tmp17638, i64 1
- %tmp17640 = getelementptr inbounds float* %tmp17639, i64 1
- %tmp17641 = getelementptr inbounds float* %tmp17640, i64 1
- %tmp17642 = getelementptr inbounds float* %tmp17641, i64 1
- %tmp17643 = getelementptr inbounds float* %tmp17642, i64 1
- %tmp17644 = getelementptr inbounds float* %tmp17643, i64 1
- %tmp17645 = getelementptr inbounds float* %tmp17644, i64 1
- %tmp17646 = getelementptr inbounds float* %tmp17645, i64 1
- %tmp17647 = getelementptr inbounds float* %tmp17646, i64 1
- %tmp17648 = getelementptr inbounds float* %tmp17647, i64 1
- %tmp17649 = getelementptr inbounds float* %tmp17648, i64 1
- %tmp17650 = getelementptr inbounds float* %tmp17649, i64 1
- %tmp17651 = getelementptr inbounds float* %tmp17650, i64 1
- %tmp17652 = getelementptr inbounds float* %tmp17651, i64 1
- %tmp17653 = getelementptr inbounds float* %tmp17652, i64 1
- %tmp17654 = getelementptr inbounds float* %tmp17653, i64 1
- %tmp17655 = getelementptr inbounds float* %tmp17654, i64 1
- %tmp17656 = getelementptr inbounds float* %tmp17655, i64 1
- %tmp17657 = getelementptr inbounds float* %tmp17656, i64 1
- %tmp17658 = getelementptr inbounds float* %tmp17657, i64 1
- %tmp17659 = getelementptr inbounds float* %tmp17658, i64 1
- %tmp17660 = getelementptr inbounds float* %tmp17659, i64 1
- %tmp17661 = getelementptr inbounds float* %tmp17660, i64 1
- %tmp17662 = getelementptr inbounds float* %tmp17661, i64 1
- %tmp17663 = getelementptr inbounds float* %tmp17662, i64 1
- %tmp17664 = getelementptr inbounds float* %tmp17663, i64 1
- %tmp17665 = getelementptr inbounds float* %tmp17664, i64 1
- %tmp17666 = getelementptr inbounds float* %tmp17665, i64 1
- %tmp17667 = getelementptr inbounds float* %tmp17666, i64 1
- %tmp17668 = getelementptr inbounds float* %tmp17667, i64 1
- %tmp17669 = getelementptr inbounds float* %tmp17668, i64 1
- %tmp17670 = getelementptr inbounds float* %tmp17669, i64 1
- %tmp17671 = getelementptr inbounds float* %tmp17670, i64 1
- %tmp17672 = getelementptr inbounds float* %tmp17671, i64 1
- %tmp17673 = getelementptr inbounds float* %tmp17672, i64 1
- %tmp17674 = getelementptr inbounds float* %tmp17673, i64 1
- %tmp17675 = getelementptr inbounds float* %tmp17674, i64 1
- %tmp17676 = getelementptr inbounds float* %tmp17675, i64 1
- %tmp17677 = getelementptr inbounds float* %tmp17676, i64 1
- %tmp17678 = getelementptr inbounds float* %tmp17677, i64 1
- %tmp17679 = getelementptr inbounds float* %tmp17678, i64 1
- %tmp17680 = getelementptr inbounds float* %tmp17679, i64 1
- %tmp17681 = getelementptr inbounds float* %tmp17680, i64 1
- %tmp17682 = getelementptr inbounds float* %tmp17681, i64 1
- %tmp17683 = getelementptr inbounds float* %tmp17682, i64 1
- %tmp17684 = getelementptr inbounds float* %tmp17683, i64 1
- %tmp17685 = getelementptr inbounds float* %tmp17684, i64 1
- %tmp17686 = getelementptr inbounds float* %tmp17685, i64 1
- %tmp17687 = getelementptr inbounds float* %tmp17686, i64 1
- %tmp17688 = getelementptr inbounds float* %tmp17687, i64 1
- %tmp17689 = getelementptr inbounds float* %tmp17688, i64 1
- %tmp17690 = getelementptr inbounds float* %tmp17689, i64 1
- %tmp17691 = getelementptr inbounds float* %tmp17690, i64 1
- %tmp17692 = getelementptr inbounds float* %tmp17691, i64 1
- %tmp17693 = getelementptr inbounds float* %tmp17692, i64 1
- %tmp17694 = getelementptr inbounds float* %tmp17693, i64 1
- %tmp17695 = getelementptr inbounds float* %tmp17694, i64 1
- %tmp17696 = getelementptr inbounds float* %tmp17695, i64 1
- %tmp17697 = getelementptr inbounds float* %tmp17696, i64 1
- %tmp17698 = getelementptr inbounds float* %tmp17697, i64 1
- %tmp17699 = getelementptr inbounds float* %tmp17698, i64 1
- %tmp17700 = getelementptr inbounds float* %tmp17699, i64 1
- %tmp17701 = getelementptr inbounds float* %tmp17700, i64 1
- %tmp17702 = getelementptr inbounds float* %tmp17701, i64 1
- %tmp17703 = getelementptr inbounds float* %tmp17702, i64 1
- %tmp17704 = getelementptr inbounds float* %tmp17703, i64 1
- %tmp17705 = getelementptr inbounds float* %tmp17704, i64 1
- %tmp17706 = getelementptr inbounds float* %tmp17705, i64 1
- %tmp17707 = getelementptr inbounds float* %tmp17706, i64 1
- %tmp17708 = getelementptr inbounds float* %tmp17707, i64 1
- %tmp17709 = getelementptr inbounds float* %tmp17708, i64 1
- %tmp17710 = getelementptr inbounds float* %tmp17709, i64 1
- %tmp17711 = getelementptr inbounds float* %tmp17710, i64 1
- %tmp17712 = getelementptr inbounds float* %tmp17711, i64 1
- %tmp17713 = getelementptr inbounds float* %tmp17712, i64 1
- %tmp17714 = getelementptr inbounds float* %tmp17713, i64 1
- %tmp17715 = getelementptr inbounds float* %tmp17714, i64 1
- %tmp17716 = getelementptr inbounds float* %tmp17715, i64 1
- %tmp17717 = getelementptr inbounds float* %tmp17716, i64 1
- %tmp17718 = getelementptr inbounds float* %tmp17717, i64 1
- %tmp17719 = getelementptr inbounds float* %tmp17718, i64 1
- %tmp17720 = getelementptr inbounds float* %tmp17719, i64 1
- %tmp17721 = getelementptr inbounds float* %tmp17720, i64 1
- %tmp17722 = getelementptr inbounds float* %tmp17721, i64 1
- %tmp17723 = getelementptr inbounds float* %tmp17722, i64 1
- %tmp17724 = getelementptr inbounds float* %tmp17723, i64 1
- %tmp17725 = getelementptr inbounds float* %tmp17724, i64 1
- %tmp17726 = getelementptr inbounds float* %tmp17725, i64 1
- %tmp17727 = getelementptr inbounds float* %tmp17726, i64 1
- %tmp17728 = getelementptr inbounds float* %tmp17727, i64 1
- %tmp17729 = getelementptr inbounds float* %tmp17728, i64 1
- %tmp17730 = getelementptr inbounds float* %tmp17729, i64 1
- %tmp17731 = getelementptr inbounds float* %tmp17730, i64 1
- %tmp17732 = getelementptr inbounds float* %tmp17731, i64 1
- %tmp17733 = getelementptr inbounds float* %tmp17732, i64 1
- %tmp17734 = getelementptr inbounds float* %tmp17733, i64 1
- %tmp17735 = getelementptr inbounds float* %tmp17734, i64 1
- %tmp17736 = getelementptr inbounds float* %tmp17735, i64 1
- %tmp17737 = getelementptr inbounds float* %tmp17736, i64 1
- %tmp17738 = getelementptr inbounds float* %tmp17737, i64 1
- %tmp17739 = getelementptr inbounds float* %tmp17738, i64 1
- %tmp17740 = getelementptr inbounds float* %tmp17739, i64 1
- %tmp17741 = getelementptr inbounds float* %tmp17740, i64 1
- %tmp17742 = getelementptr inbounds float* %tmp17741, i64 1
- %tmp17743 = getelementptr inbounds float* %tmp17742, i64 1
- %tmp17744 = getelementptr inbounds float* %tmp17743, i64 1
- %tmp17745 = getelementptr inbounds float* %tmp17744, i64 1
- %tmp17746 = getelementptr inbounds float* %tmp17745, i64 1
- %tmp17747 = getelementptr inbounds float* %tmp17746, i64 1
- %tmp17748 = getelementptr inbounds float* %tmp17747, i64 1
- %tmp17749 = getelementptr inbounds float* %tmp17748, i64 1
- %tmp17750 = getelementptr inbounds float* %tmp17749, i64 1
- %tmp17751 = getelementptr inbounds float* %tmp17750, i64 1
- %tmp17752 = getelementptr inbounds float* %tmp17751, i64 1
- %tmp17753 = getelementptr inbounds float* %tmp17752, i64 1
- %tmp17754 = getelementptr inbounds float* %tmp17753, i64 1
- %tmp17755 = getelementptr inbounds float* %tmp17754, i64 1
- %tmp17756 = getelementptr inbounds float* %tmp17755, i64 1
- %tmp17757 = getelementptr inbounds float* %tmp17756, i64 1
- %tmp17758 = getelementptr inbounds float* %tmp17757, i64 1
- %tmp17759 = getelementptr inbounds float* %tmp17758, i64 1
- %tmp17760 = getelementptr inbounds float* %tmp17759, i64 1
- %tmp17761 = getelementptr inbounds float* %tmp17760, i64 1
- %tmp17762 = getelementptr inbounds float* %tmp17761, i64 1
- %tmp17763 = getelementptr inbounds float* %tmp17762, i64 1
- %tmp17764 = getelementptr inbounds float* %tmp17763, i64 1
- %tmp17765 = getelementptr inbounds float* %tmp17764, i64 1
- %tmp17766 = getelementptr inbounds float* %tmp17765, i64 1
- %tmp17767 = getelementptr inbounds float* %tmp17766, i64 1
- %tmp17768 = getelementptr inbounds float* %tmp17767, i64 1
- %tmp17769 = getelementptr inbounds float* %tmp17768, i64 1
- %tmp17770 = getelementptr inbounds float* %tmp17769, i64 1
- %tmp17771 = getelementptr inbounds float* %tmp17770, i64 1
- %tmp17772 = getelementptr inbounds float* %tmp17771, i64 1
- %tmp17773 = getelementptr inbounds float* %tmp17772, i64 1
- %tmp17774 = getelementptr inbounds float* %tmp17773, i64 1
- %tmp17775 = getelementptr inbounds float* %tmp17774, i64 1
- %tmp17776 = getelementptr inbounds float* %tmp17775, i64 1
- %tmp17777 = getelementptr inbounds float* %tmp17776, i64 1
- %tmp17778 = getelementptr inbounds float* %tmp17777, i64 1
- %tmp17779 = getelementptr inbounds float* %tmp17778, i64 1
- %tmp17780 = getelementptr inbounds float* %tmp17779, i64 1
- %tmp17781 = getelementptr inbounds float* %tmp17780, i64 1
- %tmp17782 = getelementptr inbounds float* %tmp17781, i64 1
- %tmp17783 = getelementptr inbounds float* %tmp17782, i64 1
- %tmp17784 = getelementptr inbounds float* %tmp17783, i64 1
- %tmp17785 = getelementptr inbounds float* %tmp17784, i64 1
- %tmp17786 = getelementptr inbounds float* %tmp17785, i64 1
- %tmp17787 = getelementptr inbounds float* %tmp17786, i64 1
- %tmp17788 = getelementptr inbounds float* %tmp17787, i64 1
- %tmp17789 = getelementptr inbounds float* %tmp17788, i64 1
- %tmp17790 = getelementptr inbounds float* %tmp17789, i64 1
- %tmp17791 = getelementptr inbounds float* %tmp17790, i64 1
- %tmp17792 = getelementptr inbounds float* %tmp17791, i64 1
- %tmp17793 = getelementptr inbounds float* %tmp17792, i64 1
- %tmp17794 = getelementptr inbounds float* %tmp17793, i64 1
- %tmp17795 = getelementptr inbounds float* %tmp17794, i64 1
- %tmp17796 = getelementptr inbounds float* %tmp17795, i64 1
- %tmp17797 = getelementptr inbounds float* %tmp17796, i64 1
- %tmp17798 = getelementptr inbounds float* %tmp17797, i64 1
- %tmp17799 = getelementptr inbounds float* %tmp17798, i64 1
- %tmp17800 = getelementptr inbounds float* %tmp17799, i64 1
- %tmp17801 = getelementptr inbounds float* %tmp17800, i64 1
- %tmp17802 = getelementptr inbounds float* %tmp17801, i64 1
- %tmp17803 = getelementptr inbounds float* %tmp17802, i64 1
- %tmp17804 = getelementptr inbounds float* %tmp17803, i64 1
- %tmp17805 = getelementptr inbounds float* %tmp17804, i64 1
- %tmp17806 = getelementptr inbounds float* %tmp17805, i64 1
- %tmp17807 = getelementptr inbounds float* %tmp17806, i64 1
- %tmp17808 = getelementptr inbounds float* %tmp17807, i64 1
- %tmp17809 = getelementptr inbounds float* %tmp17808, i64 1
- %tmp17810 = getelementptr inbounds float* %tmp17809, i64 1
- %tmp17811 = getelementptr inbounds float* %tmp17810, i64 1
- %tmp17812 = getelementptr inbounds float* %tmp17811, i64 1
- %tmp17813 = getelementptr inbounds float* %tmp17812, i64 1
- %tmp17814 = getelementptr inbounds float* %tmp17813, i64 1
- %tmp17815 = getelementptr inbounds float* %tmp17814, i64 1
- %tmp17816 = getelementptr inbounds float* %tmp17815, i64 1
- %tmp17817 = getelementptr inbounds float* %tmp17816, i64 1
- %tmp17818 = getelementptr inbounds float* %tmp17817, i64 1
- %tmp17819 = getelementptr inbounds float* %tmp17818, i64 1
- %tmp17820 = getelementptr inbounds float* %tmp17819, i64 1
- %tmp17821 = getelementptr inbounds float* %tmp17820, i64 1
- %tmp17822 = getelementptr inbounds float* %tmp17821, i64 1
- %tmp17823 = getelementptr inbounds float* %tmp17822, i64 1
- %tmp17824 = getelementptr inbounds float* %tmp17823, i64 1
- %tmp17825 = getelementptr inbounds float* %tmp17824, i64 1
- %tmp17826 = getelementptr inbounds float* %tmp17825, i64 1
- %tmp17827 = getelementptr inbounds float* %tmp17826, i64 1
- %tmp17828 = getelementptr inbounds float* %tmp17827, i64 1
- %tmp17829 = getelementptr inbounds float* %tmp17828, i64 1
- %tmp17830 = getelementptr inbounds float* %tmp17829, i64 1
- %tmp17831 = getelementptr inbounds float* %tmp17830, i64 1
- %tmp17832 = getelementptr inbounds float* %tmp17831, i64 1
- %tmp17833 = getelementptr inbounds float* %tmp17832, i64 1
- %tmp17834 = getelementptr inbounds float* %tmp17833, i64 1
- %tmp17835 = getelementptr inbounds float* %tmp17834, i64 1
- %tmp17836 = getelementptr inbounds float* %tmp17835, i64 1
- %tmp17837 = getelementptr inbounds float* %tmp17836, i64 1
- %tmp17838 = getelementptr inbounds float* %tmp17837, i64 1
- %tmp17839 = getelementptr inbounds float* %tmp17838, i64 1
- %tmp17840 = getelementptr inbounds float* %tmp17839, i64 1
- %tmp17841 = getelementptr inbounds float* %tmp17840, i64 1
- %tmp17842 = getelementptr inbounds float* %tmp17841, i64 1
- %tmp17843 = getelementptr inbounds float* %tmp17842, i64 1
- %tmp17844 = getelementptr inbounds float* %tmp17843, i64 1
- %tmp17845 = getelementptr inbounds float* %tmp17844, i64 1
- %tmp17846 = getelementptr inbounds float* %tmp17845, i64 1
- %tmp17847 = getelementptr inbounds float* %tmp17846, i64 1
- %tmp17848 = getelementptr inbounds float* %tmp17847, i64 1
- %tmp17849 = getelementptr inbounds float* %tmp17848, i64 1
- %tmp17850 = getelementptr inbounds float* %tmp17849, i64 1
- %tmp17851 = getelementptr inbounds float* %tmp17850, i64 1
- %tmp17852 = getelementptr inbounds float* %tmp17851, i64 1
- %tmp17853 = getelementptr inbounds float* %tmp17852, i64 1
- %tmp17854 = getelementptr inbounds float* %tmp17853, i64 1
- %tmp17855 = getelementptr inbounds float* %tmp17854, i64 1
- %tmp17856 = getelementptr inbounds float* %tmp17855, i64 1
- %tmp17857 = getelementptr inbounds float* %tmp17856, i64 1
- %tmp17858 = getelementptr inbounds float* %tmp17857, i64 1
- %tmp17859 = getelementptr inbounds float* %tmp17858, i64 1
- %tmp17860 = getelementptr inbounds float* %tmp17859, i64 1
- %tmp17861 = getelementptr inbounds float* %tmp17860, i64 1
- %tmp17862 = getelementptr inbounds float* %tmp17861, i64 1
- %tmp17863 = getelementptr inbounds float* %tmp17862, i64 1
- %tmp17864 = getelementptr inbounds float* %tmp17863, i64 1
- %tmp17865 = getelementptr inbounds float* %tmp17864, i64 1
- %tmp17866 = getelementptr inbounds float* %tmp17865, i64 1
- %tmp17867 = getelementptr inbounds float* %tmp17866, i64 1
- %tmp17868 = getelementptr inbounds float* %tmp17867, i64 1
- %tmp17869 = getelementptr inbounds float* %tmp17868, i64 1
- %tmp17870 = getelementptr inbounds float* %tmp17869, i64 1
- %tmp17871 = getelementptr inbounds float* %tmp17870, i64 1
- %tmp17872 = getelementptr inbounds float* %tmp17871, i64 1
- %tmp17873 = getelementptr inbounds float* %tmp17872, i64 1
- %tmp17874 = getelementptr inbounds float* %tmp17873, i64 1
- %tmp17875 = getelementptr inbounds float* %tmp17874, i64 1
- %tmp17876 = getelementptr inbounds float* %tmp17875, i64 1
- %tmp17877 = getelementptr inbounds float* %tmp17876, i64 1
- %tmp17878 = getelementptr inbounds float* %tmp17877, i64 1
- %tmp17879 = getelementptr inbounds float* %tmp17878, i64 1
- %tmp17880 = getelementptr inbounds float* %tmp17879, i64 1
- %tmp17881 = getelementptr inbounds float* %tmp17880, i64 1
- %tmp17882 = getelementptr inbounds float* %tmp17881, i64 1
- %tmp17883 = getelementptr inbounds float* %tmp17882, i64 1
- %tmp17884 = getelementptr inbounds float* %tmp17883, i64 1
- %tmp17885 = getelementptr inbounds float* %tmp17884, i64 1
- %tmp17886 = getelementptr inbounds float* %tmp17885, i64 1
- %tmp17887 = getelementptr inbounds float* %tmp17886, i64 1
- %tmp17888 = getelementptr inbounds float* %tmp17887, i64 1
- %tmp17889 = getelementptr inbounds float* %tmp17888, i64 1
- %tmp17890 = getelementptr inbounds float* %tmp17889, i64 1
- %tmp17891 = getelementptr inbounds float* %tmp17890, i64 1
- %tmp17892 = getelementptr inbounds float* %tmp17891, i64 1
- %tmp17893 = getelementptr inbounds float* %tmp17892, i64 1
- %tmp17894 = getelementptr inbounds float* %tmp17893, i64 1
- %tmp17895 = getelementptr inbounds float* %tmp17894, i64 1
- %tmp17896 = getelementptr inbounds float* %tmp17895, i64 1
- %tmp17897 = getelementptr inbounds float* %tmp17896, i64 1
- %tmp17898 = getelementptr inbounds float* %tmp17897, i64 1
- %tmp17899 = getelementptr inbounds float* %tmp17898, i64 1
- %tmp17900 = getelementptr inbounds float* %tmp17899, i64 1
- %tmp17901 = getelementptr inbounds float* %tmp17900, i64 1
- %tmp17902 = getelementptr inbounds float* %tmp17901, i64 1
- %tmp17903 = getelementptr inbounds float* %tmp17902, i64 1
- %tmp17904 = getelementptr inbounds float* %tmp17903, i64 1
- %tmp17905 = getelementptr inbounds float* %tmp17904, i64 1
- %tmp17906 = getelementptr inbounds float* %tmp17905, i64 1
- %tmp17907 = getelementptr inbounds float* %tmp17906, i64 1
- %tmp17908 = getelementptr inbounds float* %tmp17907, i64 1
- %tmp17909 = getelementptr inbounds float* %tmp17908, i64 1
- %tmp17910 = getelementptr inbounds float* %tmp17909, i64 1
- %tmp17911 = getelementptr inbounds float* %tmp17910, i64 1
- %tmp17912 = getelementptr inbounds float* %tmp17911, i64 1
- %tmp17913 = getelementptr inbounds float* %tmp17912, i64 1
- %tmp17914 = getelementptr inbounds float* %tmp17913, i64 1
- %tmp17915 = getelementptr inbounds float* %tmp17914, i64 1
- %tmp17916 = getelementptr inbounds float* %tmp17915, i64 1
- %tmp17917 = getelementptr inbounds float* %tmp17916, i64 1
- %tmp17918 = getelementptr inbounds float* %tmp17917, i64 1
- %tmp17919 = getelementptr inbounds float* %tmp17918, i64 1
- %tmp17920 = getelementptr inbounds float* %tmp17919, i64 1
- %tmp17921 = getelementptr inbounds float* %tmp17920, i64 1
- %tmp17922 = getelementptr inbounds float* %tmp17921, i64 1
- %tmp17923 = getelementptr inbounds float* %tmp17922, i64 1
- %tmp17924 = getelementptr inbounds float* %tmp17923, i64 1
- %tmp17925 = getelementptr inbounds float* %tmp17924, i64 1
- %tmp17926 = getelementptr inbounds float* %tmp17925, i64 1
- %tmp17927 = getelementptr inbounds float* %tmp17926, i64 1
- %tmp17928 = getelementptr inbounds float* %tmp17927, i64 1
- %tmp17929 = getelementptr inbounds float* %tmp17928, i64 1
- %tmp17930 = getelementptr inbounds float* %tmp17929, i64 1
- %tmp17931 = getelementptr inbounds float* %tmp17930, i64 1
- %tmp17932 = getelementptr inbounds float* %tmp17931, i64 1
- %tmp17933 = getelementptr inbounds float* %tmp17932, i64 1
- %tmp17934 = getelementptr inbounds float* %tmp17933, i64 1
- %tmp17935 = getelementptr inbounds float* %tmp17934, i64 1
- %tmp17936 = getelementptr inbounds float* %tmp17935, i64 1
- %tmp17937 = getelementptr inbounds float* %tmp17936, i64 1
- %tmp17938 = getelementptr inbounds float* %tmp17937, i64 1
- %tmp17939 = getelementptr inbounds float* %tmp17938, i64 1
- %tmp17940 = getelementptr inbounds float* %tmp17939, i64 1
- %tmp17941 = getelementptr inbounds float* %tmp17940, i64 1
- %tmp17942 = getelementptr inbounds float* %tmp17941, i64 1
- %tmp17943 = getelementptr inbounds float* %tmp17942, i64 1
- %tmp17944 = getelementptr inbounds float* %tmp17943, i64 1
- %tmp17945 = getelementptr inbounds float* %tmp17944, i64 1
- %tmp17946 = getelementptr inbounds float* %tmp17945, i64 1
- %tmp17947 = getelementptr inbounds float* %tmp17946, i64 1
- %tmp17948 = getelementptr inbounds float* %tmp17947, i64 1
- %tmp17949 = getelementptr inbounds float* %tmp17948, i64 1
- %tmp17950 = getelementptr inbounds float* %tmp17949, i64 1
- %tmp17951 = getelementptr inbounds float* %tmp17950, i64 1
- %tmp17952 = getelementptr inbounds float* %tmp17951, i64 1
- %tmp17953 = getelementptr inbounds float* %tmp17952, i64 1
- %tmp17954 = getelementptr inbounds float* %tmp17953, i64 1
- %tmp17955 = getelementptr inbounds float* %tmp17954, i64 1
- %tmp17956 = getelementptr inbounds float* %tmp17955, i64 1
- %tmp17957 = getelementptr inbounds float* %tmp17956, i64 1
- %tmp17958 = getelementptr inbounds float* %tmp17957, i64 1
- %tmp17959 = getelementptr inbounds float* %tmp17958, i64 1
- %tmp17960 = getelementptr inbounds float* %tmp17959, i64 1
- %tmp17961 = getelementptr inbounds float* %tmp17960, i64 1
- %tmp17962 = getelementptr inbounds float* %tmp17961, i64 1
- %tmp17963 = getelementptr inbounds float* %tmp17962, i64 1
- %tmp17964 = getelementptr inbounds float* %tmp17963, i64 1
- %tmp17965 = getelementptr inbounds float* %tmp17964, i64 1
- %tmp17966 = getelementptr inbounds float* %tmp17965, i64 1
- %tmp17967 = getelementptr inbounds float* %tmp17966, i64 1
- %tmp17968 = getelementptr inbounds float* %tmp17967, i64 1
- %tmp17969 = getelementptr inbounds float* %tmp17968, i64 1
- %tmp17970 = getelementptr inbounds float* %tmp17969, i64 1
- %tmp17971 = getelementptr inbounds float* %tmp17970, i64 1
- %tmp17972 = getelementptr inbounds float* %tmp17971, i64 1
- %tmp17973 = getelementptr inbounds float* %tmp17972, i64 1
- %tmp17974 = getelementptr inbounds float* %tmp17973, i64 1
- %tmp17975 = getelementptr inbounds float* %tmp17974, i64 1
- %tmp17976 = getelementptr inbounds float* %tmp17975, i64 1
- %tmp17977 = getelementptr inbounds float* %tmp17976, i64 1
- %tmp17978 = getelementptr inbounds float* %tmp17977, i64 1
- %tmp17979 = getelementptr inbounds float* %tmp17978, i64 1
- %tmp17980 = getelementptr inbounds float* %tmp17979, i64 1
- %tmp17981 = getelementptr inbounds float* %tmp17980, i64 1
- %tmp17982 = getelementptr inbounds float* %tmp17981, i64 1
- %tmp17983 = getelementptr inbounds float* %tmp17982, i64 1
- %tmp17984 = getelementptr inbounds float* %tmp17983, i64 1
- %tmp17985 = getelementptr inbounds float* %tmp17984, i64 1
- %tmp17986 = getelementptr inbounds float* %tmp17985, i64 1
- %tmp17987 = getelementptr inbounds float* %tmp17986, i64 1
- %tmp17988 = getelementptr inbounds float* %tmp17987, i64 1
- %tmp17989 = getelementptr inbounds float* %tmp17988, i64 1
- %tmp17990 = getelementptr inbounds float* %tmp17989, i64 1
- %tmp17991 = getelementptr inbounds float* %tmp17990, i64 1
- %tmp17992 = getelementptr inbounds float* %tmp17991, i64 1
- %tmp17993 = getelementptr inbounds float* %tmp17992, i64 1
- %tmp17994 = getelementptr inbounds float* %tmp17993, i64 1
- %tmp17995 = getelementptr inbounds float* %tmp17994, i64 1
- %tmp17996 = getelementptr inbounds float* %tmp17995, i64 1
- %tmp17997 = getelementptr inbounds float* %tmp17996, i64 1
- %tmp17998 = getelementptr inbounds float* %tmp17997, i64 1
- %tmp17999 = getelementptr inbounds float* %tmp17998, i64 1
- %tmp18000 = getelementptr inbounds float* %tmp17999, i64 1
- %tmp18001 = getelementptr inbounds float* %tmp18000, i64 1
- %tmp18002 = getelementptr inbounds float* %tmp18001, i64 1
- %tmp18003 = getelementptr inbounds float* %tmp18002, i64 1
- %tmp18004 = getelementptr inbounds float* %tmp18003, i64 1
- %tmp18005 = getelementptr inbounds float* %tmp18004, i64 1
- %tmp18006 = getelementptr inbounds float* %tmp18005, i64 1
- %tmp18007 = getelementptr inbounds float* %tmp18006, i64 1
- %tmp18008 = getelementptr inbounds float* %tmp18007, i64 1
- %tmp18009 = getelementptr inbounds float* %tmp18008, i64 1
- %tmp18010 = getelementptr inbounds float* %tmp18009, i64 1
- %tmp18011 = getelementptr inbounds float* %tmp18010, i64 1
- %tmp18012 = getelementptr inbounds float* %tmp18011, i64 1
- %tmp18013 = getelementptr inbounds float* %tmp18012, i64 1
- %tmp18014 = getelementptr inbounds float* %tmp18013, i64 1
- %tmp18015 = getelementptr inbounds float* %tmp18014, i64 1
- %tmp18016 = getelementptr inbounds float* %tmp18015, i64 1
- %tmp18017 = getelementptr inbounds float* %tmp18016, i64 1
- %tmp18018 = getelementptr inbounds float* %tmp18017, i64 1
- %tmp18019 = getelementptr inbounds float* %tmp18018, i64 1
- %tmp18020 = getelementptr inbounds float* %tmp18019, i64 1
- %tmp18021 = getelementptr inbounds float* %tmp18020, i64 1
- %tmp18022 = getelementptr inbounds float* %tmp18021, i64 1
- %tmp18023 = getelementptr inbounds float* %tmp18022, i64 1
- %tmp18024 = getelementptr inbounds float* %tmp18023, i64 1
- %tmp18025 = getelementptr inbounds float* %tmp18024, i64 1
- %tmp18026 = getelementptr inbounds float* %tmp18025, i64 1
- %tmp18027 = getelementptr inbounds float* %tmp18026, i64 1
- %tmp18028 = getelementptr inbounds float* %tmp18027, i64 1
- %tmp18029 = getelementptr inbounds float* %tmp18028, i64 1
- %tmp18030 = getelementptr inbounds float* %tmp18029, i64 1
- %tmp18031 = getelementptr inbounds float* %tmp18030, i64 1
- %tmp18032 = getelementptr inbounds float* %tmp18031, i64 1
- %tmp18033 = getelementptr inbounds float* %tmp18032, i64 1
- %tmp18034 = getelementptr inbounds float* %tmp18033, i64 1
- %tmp18035 = getelementptr inbounds float* %tmp18034, i64 1
- %tmp18036 = getelementptr inbounds float* %tmp18035, i64 1
- %tmp18037 = getelementptr inbounds float* %tmp18036, i64 1
- %tmp18038 = getelementptr inbounds float* %tmp18037, i64 1
- %tmp18039 = getelementptr inbounds float* %tmp18038, i64 1
- %tmp18040 = getelementptr inbounds float* %tmp18039, i64 1
- %tmp18041 = getelementptr inbounds float* %tmp18040, i64 1
- %tmp18042 = getelementptr inbounds float* %tmp18041, i64 1
- %tmp18043 = getelementptr inbounds float* %tmp18042, i64 1
- %tmp18044 = getelementptr inbounds float* %tmp18043, i64 1
- %tmp18045 = getelementptr inbounds float* %tmp18044, i64 1
- %tmp18046 = getelementptr inbounds float* %tmp18045, i64 1
- %tmp18047 = getelementptr inbounds float* %tmp18046, i64 1
- %tmp18048 = getelementptr inbounds float* %tmp18047, i64 1
- %tmp18049 = getelementptr inbounds float* %tmp18048, i64 1
- %tmp18050 = getelementptr inbounds float* %tmp18049, i64 1
- %tmp18051 = getelementptr inbounds float* %tmp18050, i64 1
- %tmp18052 = getelementptr inbounds float* %tmp18051, i64 1
- %tmp18053 = getelementptr inbounds float* %tmp18052, i64 1
- %tmp18054 = getelementptr inbounds float* %tmp18053, i64 1
- %tmp18055 = getelementptr inbounds float* %tmp18054, i64 1
- %tmp18056 = getelementptr inbounds float* %tmp18055, i64 1
- %tmp18057 = getelementptr inbounds float* %tmp18056, i64 1
- %tmp18058 = getelementptr inbounds float* %tmp18057, i64 1
- %tmp18059 = getelementptr inbounds float* %tmp18058, i64 1
- %tmp18060 = getelementptr inbounds float* %tmp18059, i64 1
- %tmp18061 = getelementptr inbounds float* %tmp18060, i64 1
- %tmp18062 = getelementptr inbounds float* %tmp18061, i64 1
- %tmp18063 = getelementptr inbounds float* %tmp18062, i64 1
- %tmp18064 = getelementptr inbounds float* %tmp18063, i64 1
- %tmp18065 = getelementptr inbounds float* %tmp18064, i64 1
- %tmp18066 = getelementptr inbounds float* %tmp18065, i64 1
- %tmp18067 = getelementptr inbounds float* %tmp18066, i64 1
- %tmp18068 = getelementptr inbounds float* %tmp18067, i64 1
- %tmp18069 = getelementptr inbounds float* %tmp18068, i64 1
- %tmp18070 = getelementptr inbounds float* %tmp18069, i64 1
- %tmp18071 = getelementptr inbounds float* %tmp18070, i64 1
- %tmp18072 = getelementptr inbounds float* %tmp18071, i64 1
- %tmp18073 = getelementptr inbounds float* %tmp18072, i64 1
- %tmp18074 = getelementptr inbounds float* %tmp18073, i64 1
- %tmp18075 = getelementptr inbounds float* %tmp18074, i64 1
- %tmp18076 = getelementptr inbounds float* %tmp18075, i64 1
- %tmp18077 = getelementptr inbounds float* %tmp18076, i64 1
- %tmp18078 = getelementptr inbounds float* %tmp18077, i64 1
- %tmp18079 = getelementptr inbounds float* %tmp18078, i64 1
- %tmp18080 = getelementptr inbounds float* %tmp18079, i64 1
- %tmp18081 = getelementptr inbounds float* %tmp18080, i64 1
- %tmp18082 = getelementptr inbounds float* %tmp18081, i64 1
- %tmp18083 = getelementptr inbounds float* %tmp18082, i64 1
- %tmp18084 = getelementptr inbounds float* %tmp18083, i64 1
- %tmp18085 = getelementptr inbounds float* %tmp18084, i64 1
- %tmp18086 = getelementptr inbounds float* %tmp18085, i64 1
- %tmp18087 = getelementptr inbounds float* %tmp18086, i64 1
- %tmp18088 = getelementptr inbounds float* %tmp18087, i64 1
- %tmp18089 = getelementptr inbounds float* %tmp18088, i64 1
- %tmp18090 = getelementptr inbounds float* %tmp18089, i64 1
- %tmp18091 = getelementptr inbounds float* %tmp18090, i64 1
- %tmp18092 = getelementptr inbounds float* %tmp18091, i64 1
- %tmp18093 = getelementptr inbounds float* %tmp18092, i64 1
- %tmp18094 = getelementptr inbounds float* %tmp18093, i64 1
- %tmp18095 = getelementptr inbounds float* %tmp18094, i64 1
- %tmp18096 = getelementptr inbounds float* %tmp18095, i64 1
- %tmp18097 = getelementptr inbounds float* %tmp18096, i64 1
- %tmp18098 = getelementptr inbounds float* %tmp18097, i64 1
- %tmp18099 = getelementptr inbounds float* %tmp18098, i64 1
- %tmp18100 = getelementptr inbounds float* %tmp18099, i64 1
- %tmp18101 = getelementptr inbounds float* %tmp18100, i64 1
- %tmp18102 = getelementptr inbounds float* %tmp18101, i64 1
- %tmp18103 = getelementptr inbounds float* %tmp18102, i64 1
- %tmp18104 = getelementptr inbounds float* %tmp18103, i64 1
- %tmp18105 = getelementptr inbounds float* %tmp18104, i64 1
- %tmp18106 = getelementptr inbounds float* %tmp18105, i64 1
- %tmp18107 = getelementptr inbounds float* %tmp18106, i64 1
- %tmp18108 = getelementptr inbounds float* %tmp18107, i64 1
- %tmp18109 = getelementptr inbounds float* %tmp18108, i64 1
- %tmp18110 = getelementptr inbounds float* %tmp18109, i64 1
- %tmp18111 = getelementptr inbounds float* %tmp18110, i64 1
- %tmp18112 = getelementptr inbounds float* %tmp18111, i64 1
- %tmp18113 = getelementptr inbounds float* %tmp18112, i64 1
- %tmp18114 = getelementptr inbounds float* %tmp18113, i64 1
- %tmp18115 = getelementptr inbounds float* %tmp18114, i64 1
- %tmp18116 = getelementptr inbounds float* %tmp18115, i64 1
- %tmp18117 = getelementptr inbounds float* %tmp18116, i64 1
- %tmp18118 = getelementptr inbounds float* %tmp18117, i64 1
- %tmp18119 = getelementptr inbounds float* %tmp18118, i64 1
- %tmp18120 = getelementptr inbounds float* %tmp18119, i64 1
- %tmp18121 = getelementptr inbounds float* %tmp18120, i64 1
- %tmp18122 = getelementptr inbounds float* %tmp18121, i64 1
- %tmp18123 = getelementptr inbounds float* %tmp18122, i64 1
- %tmp18124 = getelementptr inbounds float* %tmp18123, i64 1
- %tmp18125 = getelementptr inbounds float* %tmp18124, i64 1
- %tmp18126 = getelementptr inbounds float* %tmp18125, i64 1
- %tmp18127 = getelementptr inbounds float* %tmp18126, i64 1
- %tmp18128 = getelementptr inbounds float* %tmp18127, i64 1
- %tmp18129 = getelementptr inbounds float* %tmp18128, i64 1
- %tmp18130 = getelementptr inbounds float* %tmp18129, i64 1
- %tmp18131 = getelementptr inbounds float* %tmp18130, i64 1
- %tmp18132 = getelementptr inbounds float* %tmp18131, i64 1
- %tmp18133 = getelementptr inbounds float* %tmp18132, i64 1
- %tmp18134 = getelementptr inbounds float* %tmp18133, i64 1
- %tmp18135 = getelementptr inbounds float* %tmp18134, i64 1
- %tmp18136 = getelementptr inbounds float* %tmp18135, i64 1
- %tmp18137 = getelementptr inbounds float* %tmp18136, i64 1
- %tmp18138 = getelementptr inbounds float* %tmp18137, i64 1
- %tmp18139 = getelementptr inbounds float* %tmp18138, i64 1
- %tmp18140 = getelementptr inbounds float* %tmp18139, i64 1
- %tmp18141 = getelementptr inbounds float* %tmp18140, i64 1
- %tmp18142 = getelementptr inbounds float* %tmp18141, i64 1
- %tmp18143 = getelementptr inbounds float* %tmp18142, i64 1
- %tmp18144 = getelementptr inbounds float* %tmp18143, i64 1
- %tmp18145 = getelementptr inbounds float* %tmp18144, i64 1
- %tmp18146 = getelementptr inbounds float* %tmp18145, i64 1
- %tmp18147 = getelementptr inbounds float* %tmp18146, i64 1
- %tmp18148 = getelementptr inbounds float* %tmp18147, i64 1
- %tmp18149 = getelementptr inbounds float* %tmp18148, i64 1
- %tmp18150 = getelementptr inbounds float* %tmp18149, i64 1
- %tmp18151 = getelementptr inbounds float* %tmp18150, i64 1
- %tmp18152 = getelementptr inbounds float* %tmp18151, i64 1
- %tmp18153 = getelementptr inbounds float* %tmp18152, i64 1
- %tmp18154 = getelementptr inbounds float* %tmp18153, i64 1
- %tmp18155 = getelementptr inbounds float* %tmp18154, i64 1
- %tmp18156 = getelementptr inbounds float* %tmp18155, i64 1
- %tmp18157 = getelementptr inbounds float* %tmp18156, i64 1
- %tmp18158 = getelementptr inbounds float* %tmp18157, i64 1
- %tmp18159 = getelementptr inbounds float* %tmp18158, i64 1
- %tmp18160 = getelementptr inbounds float* %tmp18159, i64 1
- %tmp18161 = getelementptr inbounds float* %tmp18160, i64 1
- %tmp18162 = getelementptr inbounds float* %tmp18161, i64 1
- %tmp18163 = getelementptr inbounds float* %tmp18162, i64 1
- %tmp18164 = getelementptr inbounds float* %tmp18163, i64 1
- %tmp18165 = getelementptr inbounds float* %tmp18164, i64 1
- %tmp18166 = getelementptr inbounds float* %tmp18165, i64 1
- %tmp18167 = getelementptr inbounds float* %tmp18166, i64 1
- %tmp18168 = getelementptr inbounds float* %tmp18167, i64 1
- %tmp18169 = getelementptr inbounds float* %tmp18168, i64 1
- %tmp18170 = getelementptr inbounds float* %tmp18169, i64 1
- %tmp18171 = getelementptr inbounds float* %tmp18170, i64 1
- %tmp18172 = getelementptr inbounds float* %tmp18171, i64 1
- %tmp18173 = getelementptr inbounds float* %tmp18172, i64 1
- %tmp18174 = getelementptr inbounds float* %tmp18173, i64 1
- %tmp18175 = getelementptr inbounds float* %tmp18174, i64 1
- %tmp18176 = getelementptr inbounds float* %tmp18175, i64 1
- %tmp18177 = getelementptr inbounds float* %tmp18176, i64 1
- %tmp18178 = getelementptr inbounds float* %tmp18177, i64 1
- %tmp18179 = getelementptr inbounds float* %tmp18178, i64 1
- %tmp18180 = getelementptr inbounds float* %tmp18179, i64 1
- %tmp18181 = getelementptr inbounds float* %tmp18180, i64 1
- %tmp18182 = getelementptr inbounds float* %tmp18181, i64 1
- %tmp18183 = getelementptr inbounds float* %tmp18182, i64 1
- %tmp18184 = getelementptr inbounds float* %tmp18183, i64 1
- %tmp18185 = getelementptr inbounds float* %tmp18184, i64 1
- %tmp18186 = getelementptr inbounds float* %tmp18185, i64 1
- %tmp18187 = getelementptr inbounds float* %tmp18186, i64 1
- %tmp18188 = getelementptr inbounds float* %tmp18187, i64 1
- %tmp18189 = getelementptr inbounds float* %tmp18188, i64 1
- %tmp18190 = getelementptr inbounds float* %tmp18189, i64 1
- %tmp18191 = getelementptr inbounds float* %tmp18190, i64 1
- %tmp18192 = getelementptr inbounds float* %tmp18191, i64 1
- %tmp18193 = getelementptr inbounds float* %tmp18192, i64 1
- %tmp18194 = getelementptr inbounds float* %tmp18193, i64 1
- %tmp18195 = getelementptr inbounds float* %tmp18194, i64 1
- %tmp18196 = getelementptr inbounds float* %tmp18195, i64 1
- %tmp18197 = getelementptr inbounds float* %tmp18196, i64 1
- %tmp18198 = getelementptr inbounds float* %tmp18197, i64 1
- %tmp18199 = getelementptr inbounds float* %tmp18198, i64 1
- %tmp18200 = getelementptr inbounds float* %tmp18199, i64 1
- %tmp18201 = getelementptr inbounds float* %tmp18200, i64 1
- %tmp18202 = getelementptr inbounds float* %tmp18201, i64 1
- %tmp18203 = getelementptr inbounds float* %tmp18202, i64 1
- %tmp18204 = getelementptr inbounds float* %tmp18203, i64 1
- %tmp18205 = getelementptr inbounds float* %tmp18204, i64 1
- %tmp18206 = getelementptr inbounds float* %tmp18205, i64 1
- %tmp18207 = getelementptr inbounds float* %tmp18206, i64 1
- %tmp18208 = getelementptr inbounds float* %tmp18207, i64 1
- %tmp18209 = getelementptr inbounds float* %tmp18208, i64 1
- %tmp18210 = getelementptr inbounds float* %tmp18209, i64 1
- %tmp18211 = getelementptr inbounds float* %tmp18210, i64 1
- %tmp18212 = getelementptr inbounds float* %tmp18211, i64 1
- %tmp18213 = getelementptr inbounds float* %tmp18212, i64 1
- %tmp18214 = getelementptr inbounds float* %tmp18213, i64 1
- %tmp18215 = getelementptr inbounds float* %tmp18214, i64 1
- %tmp18216 = getelementptr inbounds float* %tmp18215, i64 1
- %tmp18217 = getelementptr inbounds float* %tmp18216, i64 1
- %tmp18218 = getelementptr inbounds float* %tmp18217, i64 1
- %tmp18219 = getelementptr inbounds float* %tmp18218, i64 1
- %tmp18220 = getelementptr inbounds float* %tmp18219, i64 1
- %tmp18221 = getelementptr inbounds float* %tmp18220, i64 1
- %tmp18222 = getelementptr inbounds float* %tmp18221, i64 1
- %tmp18223 = getelementptr inbounds float* %tmp18222, i64 1
- %tmp18224 = getelementptr inbounds float* %tmp18223, i64 1
- %tmp18225 = getelementptr inbounds float* %tmp18224, i64 1
- %tmp18226 = getelementptr inbounds float* %tmp18225, i64 1
- %tmp18227 = getelementptr inbounds float* %tmp18226, i64 1
- %tmp18228 = getelementptr inbounds float* %tmp18227, i64 1
- %tmp18229 = getelementptr inbounds float* %tmp18228, i64 1
- %tmp18230 = getelementptr inbounds float* %tmp18229, i64 1
- %tmp18231 = getelementptr inbounds float* %tmp18230, i64 1
- %tmp18232 = getelementptr inbounds float* %tmp18231, i64 1
- %tmp18233 = getelementptr inbounds float* %tmp18232, i64 1
- %tmp18234 = getelementptr inbounds float* %tmp18233, i64 1
- %tmp18235 = getelementptr inbounds float* %tmp18234, i64 1
- %tmp18236 = getelementptr inbounds float* %tmp18235, i64 1
- %tmp18237 = getelementptr inbounds float* %tmp18236, i64 1
- %tmp18238 = getelementptr inbounds float* %tmp18237, i64 1
- %tmp18239 = getelementptr inbounds float* %tmp18238, i64 1
- %tmp18240 = getelementptr inbounds float* %tmp18239, i64 1
- %tmp18241 = getelementptr inbounds float* %tmp18240, i64 1
- %tmp18242 = getelementptr inbounds float* %tmp18241, i64 1
- %tmp18243 = getelementptr inbounds float* %tmp18242, i64 1
- %tmp18244 = getelementptr inbounds float* %tmp18243, i64 1
- %tmp18245 = getelementptr inbounds float* %tmp18244, i64 1
- %tmp18246 = getelementptr inbounds float* %tmp18245, i64 1
- %tmp18247 = getelementptr inbounds float* %tmp18246, i64 1
- %tmp18248 = getelementptr inbounds float* %tmp18247, i64 1
- %tmp18249 = getelementptr inbounds float* %tmp18248, i64 1
- %tmp18250 = getelementptr inbounds float* %tmp18249, i64 1
- %tmp18251 = getelementptr inbounds float* %tmp18250, i64 1
- %tmp18252 = getelementptr inbounds float* %tmp18251, i64 1
- %tmp18253 = getelementptr inbounds float* %tmp18252, i64 1
- %tmp18254 = getelementptr inbounds float* %tmp18253, i64 1
- %tmp18255 = getelementptr inbounds float* %tmp18254, i64 1
- %tmp18256 = getelementptr inbounds float* %tmp18255, i64 1
- %tmp18257 = getelementptr inbounds float* %tmp18256, i64 1
- %tmp18258 = getelementptr inbounds float* %tmp18257, i64 1
- %tmp18259 = getelementptr inbounds float* %tmp18258, i64 1
- %tmp18260 = getelementptr inbounds float* %tmp18259, i64 1
- %tmp18261 = getelementptr inbounds float* %tmp18260, i64 1
- %tmp18262 = getelementptr inbounds float* %tmp18261, i64 1
- %tmp18263 = getelementptr inbounds float* %tmp18262, i64 1
- %tmp18264 = getelementptr inbounds float* %tmp18263, i64 1
- %tmp18265 = getelementptr inbounds float* %tmp18264, i64 1
- %tmp18266 = getelementptr inbounds float* %tmp18265, i64 1
- %tmp18267 = getelementptr inbounds float* %tmp18266, i64 1
- %tmp18268 = getelementptr inbounds float* %tmp18267, i64 1
- %tmp18269 = getelementptr inbounds float* %tmp18268, i64 1
- %tmp18270 = getelementptr inbounds float* %tmp18269, i64 1
- %tmp18271 = getelementptr inbounds float* %tmp18270, i64 1
- %tmp18272 = getelementptr inbounds float* %tmp18271, i64 1
- %tmp18273 = getelementptr inbounds float* %tmp18272, i64 1
- %tmp18274 = getelementptr inbounds float* %tmp18273, i64 1
- %tmp18275 = getelementptr inbounds float* %tmp18274, i64 1
- %tmp18276 = getelementptr inbounds float* %tmp18275, i64 1
- %tmp18277 = getelementptr inbounds float* %tmp18276, i64 1
- %tmp18278 = getelementptr inbounds float* %tmp18277, i64 1
- %tmp18279 = getelementptr inbounds float* %tmp18278, i64 1
- %tmp18280 = getelementptr inbounds float* %tmp18279, i64 1
- %tmp18281 = getelementptr inbounds float* %tmp18280, i64 1
- %tmp18282 = getelementptr inbounds float* %tmp18281, i64 1
- %tmp18283 = getelementptr inbounds float* %tmp18282, i64 1
- %tmp18284 = getelementptr inbounds float* %tmp18283, i64 1
- %tmp18285 = getelementptr inbounds float* %tmp18284, i64 1
- %tmp18286 = getelementptr inbounds float* %tmp18285, i64 1
- %tmp18287 = getelementptr inbounds float* %tmp18286, i64 1
- %tmp18288 = getelementptr inbounds float* %tmp18287, i64 1
- %tmp18289 = getelementptr inbounds float* %tmp18288, i64 1
- %tmp18290 = getelementptr inbounds float* %tmp18289, i64 1
- %tmp18291 = getelementptr inbounds float* %tmp18290, i64 1
- %tmp18292 = getelementptr inbounds float* %tmp18291, i64 1
- %tmp18293 = getelementptr inbounds float* %tmp18292, i64 1
- %tmp18294 = getelementptr inbounds float* %tmp18293, i64 1
- %tmp18295 = getelementptr inbounds float* %tmp18294, i64 1
- %tmp18296 = getelementptr inbounds float* %tmp18295, i64 1
- %tmp18297 = getelementptr inbounds float* %tmp18296, i64 1
- %tmp18298 = getelementptr inbounds float* %tmp18297, i64 1
- %tmp18299 = getelementptr inbounds float* %tmp18298, i64 1
- %tmp18300 = getelementptr inbounds float* %tmp18299, i64 1
- %tmp18301 = getelementptr inbounds float* %tmp18300, i64 1
- %tmp18302 = getelementptr inbounds float* %tmp18301, i64 1
- %tmp18303 = getelementptr inbounds float* %tmp18302, i64 1
- %tmp18304 = getelementptr inbounds float* %tmp18303, i64 1
- %tmp18305 = getelementptr inbounds float* %tmp18304, i64 1
- %tmp18306 = getelementptr inbounds float* %tmp18305, i64 1
- %tmp18307 = getelementptr inbounds float* %tmp18306, i64 1
- %tmp18308 = getelementptr inbounds float* %tmp18307, i64 1
- %tmp18309 = getelementptr inbounds float* %tmp18308, i64 1
- %tmp18310 = getelementptr inbounds float* %tmp18309, i64 1
- %tmp18311 = getelementptr inbounds float* %tmp18310, i64 1
- %tmp18312 = getelementptr inbounds float* %tmp18311, i64 1
- %tmp18313 = getelementptr inbounds float* %tmp18312, i64 1
- %tmp18314 = getelementptr inbounds float* %tmp18313, i64 1
- %tmp18315 = getelementptr inbounds float* %tmp18314, i64 1
- %tmp18316 = getelementptr inbounds float* %tmp18315, i64 1
- %tmp18317 = getelementptr inbounds float* %tmp18316, i64 1
- %tmp18318 = getelementptr inbounds float* %tmp18317, i64 1
- %tmp18319 = getelementptr inbounds float* %tmp18318, i64 1
- %tmp18320 = getelementptr inbounds float* %tmp18319, i64 1
- %tmp18321 = getelementptr inbounds float* %tmp18320, i64 1
- %tmp18322 = getelementptr inbounds float* %tmp18321, i64 1
- %tmp18323 = getelementptr inbounds float* %tmp18322, i64 1
- %tmp18324 = getelementptr inbounds float* %tmp18323, i64 1
- %tmp18325 = getelementptr inbounds float* %tmp18324, i64 1
- %tmp18326 = getelementptr inbounds float* %tmp18325, i64 1
- %tmp18327 = getelementptr inbounds float* %tmp18326, i64 1
- %tmp18328 = getelementptr inbounds float* %tmp18327, i64 1
- %tmp18329 = getelementptr inbounds float* %tmp18328, i64 1
- %tmp18330 = getelementptr inbounds float* %tmp18329, i64 1
- %tmp18331 = getelementptr inbounds float* %tmp18330, i64 1
- %tmp18332 = getelementptr inbounds float* %tmp18331, i64 1
- %tmp18333 = getelementptr inbounds float* %tmp18332, i64 1
- %tmp18334 = getelementptr inbounds float* %tmp18333, i64 1
- %tmp18335 = getelementptr inbounds float* %tmp18334, i64 1
- %tmp18336 = getelementptr inbounds float* %tmp18335, i64 1
- %tmp18337 = getelementptr inbounds float* %tmp18336, i64 1
- %tmp18338 = getelementptr inbounds float* %tmp18337, i64 1
- %tmp18339 = getelementptr inbounds float* %tmp18338, i64 1
- %tmp18340 = getelementptr inbounds float* %tmp18339, i64 1
- %tmp18341 = getelementptr inbounds float* %tmp18340, i64 1
- %tmp18342 = getelementptr inbounds float* %tmp18341, i64 1
- %tmp18343 = getelementptr inbounds float* %tmp18342, i64 1
- %tmp18344 = getelementptr inbounds float* %tmp18343, i64 1
- %tmp18345 = getelementptr inbounds float* %tmp18344, i64 1
- %tmp18346 = getelementptr inbounds float* %tmp18345, i64 1
- %tmp18347 = getelementptr inbounds float* %tmp18346, i64 1
- %tmp18348 = getelementptr inbounds float* %tmp18347, i64 1
- %tmp18349 = getelementptr inbounds float* %tmp18348, i64 1
- %tmp18350 = getelementptr inbounds float* %tmp18349, i64 1
- %tmp18351 = getelementptr inbounds float* %tmp18350, i64 1
- %tmp18352 = getelementptr inbounds float* %tmp18351, i64 1
- %tmp18353 = getelementptr inbounds float* %tmp18352, i64 1
- %tmp18354 = getelementptr inbounds float* %tmp18353, i64 1
- %tmp18355 = getelementptr inbounds float* %tmp18354, i64 1
- %tmp18356 = getelementptr inbounds float* %tmp18355, i64 1
- %tmp18357 = getelementptr inbounds float* %tmp18356, i64 1
- %tmp18358 = getelementptr inbounds float* %tmp18357, i64 1
- %tmp18359 = getelementptr inbounds float* %tmp18358, i64 1
- %tmp18360 = getelementptr inbounds float* %tmp18359, i64 1
- %tmp18361 = getelementptr inbounds float* %tmp18360, i64 1
- %tmp18362 = getelementptr inbounds float* %tmp18361, i64 1
- %tmp18363 = getelementptr inbounds float* %tmp18362, i64 1
- %tmp18364 = getelementptr inbounds float* %tmp18363, i64 1
- %tmp18365 = getelementptr inbounds float* %tmp18364, i64 1
- %tmp18366 = getelementptr inbounds float* %tmp18365, i64 1
- %tmp18367 = getelementptr inbounds float* %tmp18366, i64 1
- %tmp18368 = getelementptr inbounds float* %tmp18367, i64 1
- %tmp18369 = getelementptr inbounds float* %tmp18368, i64 1
- %tmp18370 = getelementptr inbounds float* %tmp18369, i64 1
- %tmp18371 = getelementptr inbounds float* %tmp18370, i64 1
- %tmp18372 = getelementptr inbounds float* %tmp18371, i64 1
- %tmp18373 = getelementptr inbounds float* %tmp18372, i64 1
- %tmp18374 = getelementptr inbounds float* %tmp18373, i64 1
- %tmp18375 = getelementptr inbounds float* %tmp18374, i64 1
- %tmp18376 = getelementptr inbounds float* %tmp18375, i64 1
- %tmp18377 = getelementptr inbounds float* %tmp18376, i64 1
- %tmp18378 = getelementptr inbounds float* %tmp18377, i64 1
- %tmp18379 = getelementptr inbounds float* %tmp18378, i64 1
- %tmp18380 = getelementptr inbounds float* %tmp18379, i64 1
- %tmp18381 = getelementptr inbounds float* %tmp18380, i64 1
- %tmp18382 = getelementptr inbounds float* %tmp18381, i64 1
- %tmp18383 = getelementptr inbounds float* %tmp18382, i64 1
- %tmp18384 = getelementptr inbounds float* %tmp18383, i64 1
- %tmp18385 = getelementptr inbounds float* %tmp18384, i64 1
- %tmp18386 = getelementptr inbounds float* %tmp18385, i64 1
- %tmp18387 = getelementptr inbounds float* %tmp18386, i64 1
- %tmp18388 = getelementptr inbounds float* %tmp18387, i64 1
- %tmp18389 = getelementptr inbounds float* %tmp18388, i64 1
- %tmp18390 = getelementptr inbounds float* %tmp18389, i64 1
- %tmp18391 = getelementptr inbounds float* %tmp18390, i64 1
- %tmp18392 = getelementptr inbounds float* %tmp18391, i64 1
- %tmp18393 = getelementptr inbounds float* %tmp18392, i64 1
- %tmp18394 = getelementptr inbounds float* %tmp18393, i64 1
- %tmp18395 = getelementptr inbounds float* %tmp18394, i64 1
- %tmp18396 = getelementptr inbounds float* %tmp18395, i64 1
- %tmp18397 = getelementptr inbounds float* %tmp18396, i64 1
- %tmp18398 = getelementptr inbounds float* %tmp18397, i64 1
- %tmp18399 = getelementptr inbounds float* %tmp18398, i64 1
- %tmp18400 = getelementptr inbounds float* %tmp18399, i64 1
- %tmp18401 = getelementptr inbounds float* %tmp18400, i64 1
- %tmp18402 = getelementptr inbounds float* %tmp18401, i64 1
- %tmp18403 = getelementptr inbounds float* %tmp18402, i64 1
- %tmp18404 = getelementptr inbounds float* %tmp18403, i64 1
- %tmp18405 = getelementptr inbounds float* %tmp18404, i64 1
- %tmp18406 = getelementptr inbounds float* %tmp18405, i64 1
- %tmp18407 = getelementptr inbounds float* %tmp18406, i64 1
- %tmp18408 = getelementptr inbounds float* %tmp18407, i64 1
- %tmp18409 = getelementptr inbounds float* %tmp18408, i64 1
- %tmp18410 = getelementptr inbounds float* %tmp18409, i64 1
- %tmp18411 = getelementptr inbounds float* %tmp18410, i64 1
- %tmp18412 = getelementptr inbounds float* %tmp18411, i64 1
- %tmp18413 = getelementptr inbounds float* %tmp18412, i64 1
- %tmp18414 = getelementptr inbounds float* %tmp18413, i64 1
- %tmp18415 = getelementptr inbounds float* %tmp18414, i64 1
- %tmp18416 = getelementptr inbounds float* %tmp18415, i64 1
- %tmp18417 = getelementptr inbounds float* %tmp18416, i64 1
- %tmp18418 = getelementptr inbounds float* %tmp18417, i64 1
- %tmp18419 = getelementptr inbounds float* %tmp18418, i64 1
- %tmp18420 = getelementptr inbounds float* %tmp18419, i64 1
- %tmp18421 = getelementptr inbounds float* %tmp18420, i64 1
- %tmp18422 = getelementptr inbounds float* %tmp18421, i64 1
- %tmp18423 = getelementptr inbounds float* %tmp18422, i64 1
- %tmp18424 = getelementptr inbounds float* %tmp18423, i64 1
- %tmp18425 = getelementptr inbounds float* %tmp18424, i64 1
- %tmp18426 = getelementptr inbounds float* %tmp18425, i64 1
- %tmp18427 = getelementptr inbounds float* %tmp18426, i64 1
- %tmp18428 = getelementptr inbounds float* %tmp18427, i64 1
- %tmp18429 = getelementptr inbounds float* %tmp18428, i64 1
- %tmp18430 = getelementptr inbounds float* %tmp18429, i64 1
- %tmp18431 = getelementptr inbounds float* %tmp18430, i64 1
- %tmp18432 = getelementptr inbounds float* %tmp18431, i64 1
- %tmp18433 = getelementptr inbounds float* %tmp18432, i64 1
- %tmp18434 = getelementptr inbounds float* %tmp18433, i64 1
- %tmp18435 = getelementptr inbounds float* %tmp18434, i64 1
- %tmp18436 = getelementptr inbounds float* %tmp18435, i64 1
- %tmp18437 = getelementptr inbounds float* %tmp18436, i64 1
- %tmp18438 = getelementptr inbounds float* %tmp18437, i64 1
- %tmp18439 = getelementptr inbounds float* %tmp18438, i64 1
- %tmp18440 = getelementptr inbounds float* %tmp18439, i64 1
- %tmp18441 = getelementptr inbounds float* %tmp18440, i64 1
- %tmp18442 = getelementptr inbounds float* %tmp18441, i64 1
- %tmp18443 = getelementptr inbounds float* %tmp18442, i64 1
- %tmp18444 = getelementptr inbounds float* %tmp18443, i64 1
- %tmp18445 = getelementptr inbounds float* %tmp18444, i64 1
- %tmp18446 = getelementptr inbounds float* %tmp18445, i64 1
- %tmp18447 = getelementptr inbounds float* %tmp18446, i64 1
- %tmp18448 = getelementptr inbounds float* %tmp18447, i64 1
- %tmp18449 = getelementptr inbounds float* %tmp18448, i64 1
- %tmp18450 = getelementptr inbounds float* %tmp18449, i64 1
- %tmp18451 = getelementptr inbounds float* %tmp18450, i64 1
- %tmp18452 = getelementptr inbounds float* %tmp18451, i64 1
- %tmp18453 = getelementptr inbounds float* %tmp18452, i64 1
- %tmp18454 = getelementptr inbounds float* %tmp18453, i64 1
- %tmp18455 = getelementptr inbounds float* %tmp18454, i64 1
- %tmp18456 = getelementptr inbounds float* %tmp18455, i64 1
- %tmp18457 = getelementptr inbounds float* %tmp18456, i64 1
- %tmp18458 = getelementptr inbounds float* %tmp18457, i64 1
- %tmp18459 = getelementptr inbounds float* %tmp18458, i64 1
- %tmp18460 = getelementptr inbounds float* %tmp18459, i64 1
- %tmp18461 = getelementptr inbounds float* %tmp18460, i64 1
- %tmp18462 = getelementptr inbounds float* %tmp18461, i64 1
- %tmp18463 = getelementptr inbounds float* %tmp18462, i64 1
- %tmp18464 = getelementptr inbounds float* %tmp18463, i64 1
- %tmp18465 = getelementptr inbounds float* %tmp18464, i64 1
- %tmp18466 = getelementptr inbounds float* %tmp18465, i64 1
- %tmp18467 = getelementptr inbounds float* %tmp18466, i64 1
- %tmp18468 = getelementptr inbounds float* %tmp18467, i64 1
- %tmp18469 = getelementptr inbounds float* %tmp18468, i64 1
- %tmp18470 = getelementptr inbounds float* %tmp18469, i64 1
- %tmp18471 = getelementptr inbounds float* %tmp18470, i64 1
- %tmp18472 = getelementptr inbounds float* %tmp18471, i64 1
- %tmp18473 = getelementptr inbounds float* %tmp18472, i64 1
- %tmp18474 = getelementptr inbounds float* %tmp18473, i64 1
- %tmp18475 = getelementptr inbounds float* %tmp18474, i64 1
- %tmp18476 = getelementptr inbounds float* %tmp18475, i64 1
- %tmp18477 = getelementptr inbounds float* %tmp18476, i64 1
- %tmp18478 = getelementptr inbounds float* %tmp18477, i64 1
- %tmp18479 = getelementptr inbounds float* %tmp18478, i64 1
- %tmp18480 = getelementptr inbounds float* %tmp18479, i64 1
- %tmp18481 = getelementptr inbounds float* %tmp18480, i64 1
- %tmp18482 = getelementptr inbounds float* %tmp18481, i64 1
- %tmp18483 = getelementptr inbounds float* %tmp18482, i64 1
- %tmp18484 = getelementptr inbounds float* %tmp18483, i64 1
- %tmp18485 = getelementptr inbounds float* %tmp18484, i64 1
- %tmp18486 = getelementptr inbounds float* %tmp18485, i64 1
- %tmp18487 = getelementptr inbounds float* %tmp18486, i64 1
- %tmp18488 = getelementptr inbounds float* %tmp18487, i64 1
- %tmp18489 = getelementptr inbounds float* %tmp18488, i64 1
- %tmp18490 = getelementptr inbounds float* %tmp18489, i64 1
- %tmp18491 = getelementptr inbounds float* %tmp18490, i64 1
- %tmp18492 = getelementptr inbounds float* %tmp18491, i64 1
- %tmp18493 = getelementptr inbounds float* %tmp18492, i64 1
- %tmp18494 = getelementptr inbounds float* %tmp18493, i64 1
- %tmp18495 = getelementptr inbounds float* %tmp18494, i64 1
- %tmp18496 = getelementptr inbounds float* %tmp18495, i64 1
- %tmp18497 = getelementptr inbounds float* %tmp18496, i64 1
- %tmp18498 = getelementptr inbounds float* %tmp18497, i64 1
- %tmp18499 = getelementptr inbounds float* %tmp18498, i64 1
- %tmp18500 = getelementptr inbounds float* %tmp18499, i64 1
- %tmp18501 = getelementptr inbounds float* %tmp18500, i64 1
- %tmp18502 = getelementptr inbounds float* %tmp18501, i64 1
- %tmp18503 = getelementptr inbounds float* %tmp18502, i64 1
- %tmp18504 = getelementptr inbounds float* %tmp18503, i64 1
- %tmp18505 = getelementptr inbounds float* %tmp18504, i64 1
- %tmp18506 = getelementptr inbounds float* %tmp18505, i64 1
- %tmp18507 = getelementptr inbounds float* %tmp18506, i64 1
- %tmp18508 = getelementptr inbounds float* %tmp18507, i64 1
- %tmp18509 = getelementptr inbounds float* %tmp18508, i64 1
- %tmp18510 = getelementptr inbounds float* %tmp18509, i64 1
- %tmp18511 = getelementptr inbounds float* %tmp18510, i64 1
- %tmp18512 = getelementptr inbounds float* %tmp18511, i64 1
- %tmp18513 = getelementptr inbounds float* %tmp18512, i64 1
- %tmp18514 = getelementptr inbounds float* %tmp18513, i64 1
- %tmp18515 = getelementptr inbounds float* %tmp18514, i64 1
- %tmp18516 = getelementptr inbounds float* %tmp18515, i64 1
- %tmp18517 = getelementptr inbounds float* %tmp18516, i64 1
- %tmp18518 = getelementptr inbounds float* %tmp18517, i64 1
- %tmp18519 = getelementptr inbounds float* %tmp18518, i64 1
- %tmp18520 = getelementptr inbounds float* %tmp18519, i64 1
- %tmp18521 = getelementptr inbounds float* %tmp18520, i64 1
- %tmp18522 = getelementptr inbounds float* %tmp18521, i64 1
- %tmp18523 = getelementptr inbounds float* %tmp18522, i64 1
- %tmp18524 = getelementptr inbounds float* %tmp18523, i64 1
- %tmp18525 = getelementptr inbounds float* %tmp18524, i64 1
- %tmp18526 = getelementptr inbounds float* %tmp18525, i64 1
- %tmp18527 = getelementptr inbounds float* %tmp18526, i64 1
- %tmp18528 = getelementptr inbounds float* %tmp18527, i64 1
- %tmp18529 = getelementptr inbounds float* %tmp18528, i64 1
- %tmp18530 = getelementptr inbounds float* %tmp18529, i64 1
- %tmp18531 = getelementptr inbounds float* %tmp18530, i64 1
- %tmp18532 = getelementptr inbounds float* %tmp18531, i64 1
- %tmp18533 = getelementptr inbounds float* %tmp18532, i64 1
- %tmp18534 = getelementptr inbounds float* %tmp18533, i64 1
- %tmp18535 = getelementptr inbounds float* %tmp18534, i64 1
- %tmp18536 = getelementptr inbounds float* %tmp18535, i64 1
- %tmp18537 = getelementptr inbounds float* %tmp18536, i64 1
- %tmp18538 = getelementptr inbounds float* %tmp18537, i64 1
- %tmp18539 = getelementptr inbounds float* %tmp18538, i64 1
- %tmp18540 = getelementptr inbounds float* %tmp18539, i64 1
- %tmp18541 = getelementptr inbounds float* %tmp18540, i64 1
- %tmp18542 = getelementptr inbounds float* %tmp18541, i64 1
- %tmp18543 = getelementptr inbounds float* %tmp18542, i64 1
- %tmp18544 = getelementptr inbounds float* %tmp18543, i64 1
- %tmp18545 = getelementptr inbounds float* %tmp18544, i64 1
- %tmp18546 = getelementptr inbounds float* %tmp18545, i64 1
- %tmp18547 = getelementptr inbounds float* %tmp18546, i64 1
- %tmp18548 = getelementptr inbounds float* %tmp18547, i64 1
- %tmp18549 = getelementptr inbounds float* %tmp18548, i64 1
- %tmp18550 = getelementptr inbounds float* %tmp18549, i64 1
- %tmp18551 = getelementptr inbounds float* %tmp18550, i64 1
- %tmp18552 = getelementptr inbounds float* %tmp18551, i64 1
- %tmp18553 = getelementptr inbounds float* %tmp18552, i64 1
- %tmp18554 = getelementptr inbounds float* %tmp18553, i64 1
- %tmp18555 = getelementptr inbounds float* %tmp18554, i64 1
- %tmp18556 = getelementptr inbounds float* %tmp18555, i64 1
- %tmp18557 = getelementptr inbounds float* %tmp18556, i64 1
- %tmp18558 = getelementptr inbounds float* %tmp18557, i64 1
- %tmp18559 = getelementptr inbounds float* %tmp18558, i64 1
- %tmp18560 = getelementptr inbounds float* %tmp18559, i64 1
- %tmp18561 = getelementptr inbounds float* %tmp18560, i64 1
- %tmp18562 = getelementptr inbounds float* %tmp18561, i64 1
- %tmp18563 = getelementptr inbounds float* %tmp18562, i64 1
- %tmp18564 = getelementptr inbounds float* %tmp18563, i64 1
- %tmp18565 = getelementptr inbounds float* %tmp18564, i64 1
- %tmp18566 = getelementptr inbounds float* %tmp18565, i64 1
- %tmp18567 = getelementptr inbounds float* %tmp18566, i64 1
- %tmp18568 = getelementptr inbounds float* %tmp18567, i64 1
- %tmp18569 = getelementptr inbounds float* %tmp18568, i64 1
- %tmp18570 = getelementptr inbounds float* %tmp18569, i64 1
- %tmp18571 = getelementptr inbounds float* %tmp18570, i64 1
- %tmp18572 = getelementptr inbounds float* %tmp18571, i64 1
- %tmp18573 = getelementptr inbounds float* %tmp18572, i64 1
- %tmp18574 = getelementptr inbounds float* %tmp18573, i64 1
- %tmp18575 = getelementptr inbounds float* %tmp18574, i64 1
- %tmp18576 = getelementptr inbounds float* %tmp18575, i64 1
- %tmp18577 = getelementptr inbounds float* %tmp18576, i64 1
- %tmp18578 = getelementptr inbounds float* %tmp18577, i64 1
- %tmp18579 = getelementptr inbounds float* %tmp18578, i64 1
- %tmp18580 = getelementptr inbounds float* %tmp18579, i64 1
- %tmp18581 = getelementptr inbounds float* %tmp18580, i64 1
- %tmp18582 = getelementptr inbounds float* %tmp18581, i64 1
- %tmp18583 = getelementptr inbounds float* %tmp18582, i64 1
- %tmp18584 = getelementptr inbounds float* %tmp18583, i64 1
- %tmp18585 = getelementptr inbounds float* %tmp18584, i64 1
- %tmp18586 = getelementptr inbounds float* %tmp18585, i64 1
- %tmp18587 = getelementptr inbounds float* %tmp18586, i64 1
- %tmp18588 = getelementptr inbounds float* %tmp18587, i64 1
- %tmp18589 = getelementptr inbounds float* %tmp18588, i64 1
- %tmp18590 = getelementptr inbounds float* %tmp18589, i64 1
- %tmp18591 = getelementptr inbounds float* %tmp18590, i64 1
- %tmp18592 = getelementptr inbounds float* %tmp18591, i64 1
- %tmp18593 = getelementptr inbounds float* %tmp18592, i64 1
- %tmp18594 = getelementptr inbounds float* %tmp18593, i64 1
- %tmp18595 = getelementptr inbounds float* %tmp18594, i64 1
- %tmp18596 = getelementptr inbounds float* %tmp18595, i64 1
- %tmp18597 = getelementptr inbounds float* %tmp18596, i64 1
- %tmp18598 = getelementptr inbounds float* %tmp18597, i64 1
- %tmp18599 = getelementptr inbounds float* %tmp18598, i64 1
- %tmp18600 = getelementptr inbounds float* %tmp18599, i64 1
- %tmp18601 = getelementptr inbounds float* %tmp18600, i64 1
- %tmp18602 = getelementptr inbounds float* %tmp18601, i64 1
- %tmp18603 = getelementptr inbounds float* %tmp18602, i64 1
- %tmp18604 = getelementptr inbounds float* %tmp18603, i64 1
- %tmp18605 = getelementptr inbounds float* %tmp18604, i64 1
- %tmp18606 = getelementptr inbounds float* %tmp18605, i64 1
- %tmp18607 = getelementptr inbounds float* %tmp18606, i64 1
- %tmp18608 = getelementptr inbounds float* %tmp18607, i64 1
- %tmp18609 = getelementptr inbounds float* %tmp18608, i64 1
- %tmp18610 = getelementptr inbounds float* %tmp18609, i64 1
- %tmp18611 = getelementptr inbounds float* %tmp18610, i64 1
- %tmp18612 = getelementptr inbounds float* %tmp18611, i64 1
- %tmp18613 = getelementptr inbounds float* %tmp18612, i64 1
- %tmp18614 = getelementptr inbounds float* %tmp18613, i64 1
- %tmp18615 = getelementptr inbounds float* %tmp18614, i64 1
- %tmp18616 = getelementptr inbounds float* %tmp18615, i64 1
- %tmp18617 = getelementptr inbounds float* %tmp18616, i64 1
- %tmp18618 = getelementptr inbounds float* %tmp18617, i64 1
- %tmp18619 = getelementptr inbounds float* %tmp18618, i64 1
- %tmp18620 = getelementptr inbounds float* %tmp18619, i64 1
- %tmp18621 = getelementptr inbounds float* %tmp18620, i64 1
- %tmp18622 = getelementptr inbounds float* %tmp18621, i64 1
- %tmp18623 = getelementptr inbounds float* %tmp18622, i64 1
- %tmp18624 = getelementptr inbounds float* %tmp18623, i64 1
- %tmp18625 = getelementptr inbounds float* %tmp18624, i64 1
- %tmp18626 = getelementptr inbounds float* %tmp18625, i64 1
- %tmp18627 = getelementptr inbounds float* %tmp18626, i64 1
- %tmp18628 = getelementptr inbounds float* %tmp18627, i64 1
- %tmp18629 = getelementptr inbounds float* %tmp18628, i64 1
- %tmp18630 = getelementptr inbounds float* %tmp18629, i64 1
- %tmp18631 = getelementptr inbounds float* %tmp18630, i64 1
- %tmp18632 = getelementptr inbounds float* %tmp18631, i64 1
- %tmp18633 = getelementptr inbounds float* %tmp18632, i64 1
- %tmp18634 = getelementptr inbounds float* %tmp18633, i64 1
- %tmp18635 = getelementptr inbounds float* %tmp18634, i64 1
- %tmp18636 = getelementptr inbounds float* %tmp18635, i64 1
- %tmp18637 = getelementptr inbounds float* %tmp18636, i64 1
- %tmp18638 = getelementptr inbounds float* %tmp18637, i64 1
- %tmp18639 = getelementptr inbounds float* %tmp18638, i64 1
- %tmp18640 = getelementptr inbounds float* %tmp18639, i64 1
- %tmp18641 = getelementptr inbounds float* %tmp18640, i64 1
- %tmp18642 = getelementptr inbounds float* %tmp18641, i64 1
- %tmp18643 = getelementptr inbounds float* %tmp18642, i64 1
- %tmp18644 = getelementptr inbounds float* %tmp18643, i64 1
- %tmp18645 = getelementptr inbounds float* %tmp18644, i64 1
- %tmp18646 = getelementptr inbounds float* %tmp18645, i64 1
- %tmp18647 = getelementptr inbounds float* %tmp18646, i64 1
- %tmp18648 = getelementptr inbounds float* %tmp18647, i64 1
- %tmp18649 = getelementptr inbounds float* %tmp18648, i64 1
- %tmp18650 = getelementptr inbounds float* %tmp18649, i64 1
- %tmp18651 = getelementptr inbounds float* %tmp18650, i64 1
- %tmp18652 = getelementptr inbounds float* %tmp18651, i64 1
- %tmp18653 = getelementptr inbounds float* %tmp18652, i64 1
- %tmp18654 = getelementptr inbounds float* %tmp18653, i64 1
- %tmp18655 = getelementptr inbounds float* %tmp18654, i64 1
- %tmp18656 = getelementptr inbounds float* %tmp18655, i64 1
- %tmp18657 = getelementptr inbounds float* %tmp18656, i64 1
- %tmp18658 = getelementptr inbounds float* %tmp18657, i64 1
- %tmp18659 = getelementptr inbounds float* %tmp18658, i64 1
- %tmp18660 = getelementptr inbounds float* %tmp18659, i64 1
- %tmp18661 = getelementptr inbounds float* %tmp18660, i64 1
- %tmp18662 = getelementptr inbounds float* %tmp18661, i64 1
- %tmp18663 = getelementptr inbounds float* %tmp18662, i64 1
- %tmp18664 = getelementptr inbounds float* %tmp18663, i64 1
- %tmp18665 = getelementptr inbounds float* %tmp18664, i64 1
- %tmp18666 = getelementptr inbounds float* %tmp18665, i64 1
- %tmp18667 = getelementptr inbounds float* %tmp18666, i64 1
- %tmp18668 = getelementptr inbounds float* %tmp18667, i64 1
- %tmp18669 = getelementptr inbounds float* %tmp18668, i64 1
- %tmp18670 = getelementptr inbounds float* %tmp18669, i64 1
- %tmp18671 = getelementptr inbounds float* %tmp18670, i64 1
- %tmp18672 = getelementptr inbounds float* %tmp18671, i64 1
- %tmp18673 = getelementptr inbounds float* %tmp18672, i64 1
- %tmp18674 = getelementptr inbounds float* %tmp18673, i64 1
- %tmp18675 = getelementptr inbounds float* %tmp18674, i64 1
- %tmp18676 = getelementptr inbounds float* %tmp18675, i64 1
- %tmp18677 = getelementptr inbounds float* %tmp18676, i64 1
- %tmp18678 = getelementptr inbounds float* %tmp18677, i64 1
- %tmp18679 = getelementptr inbounds float* %tmp18678, i64 1
- %tmp18680 = getelementptr inbounds float* %tmp18679, i64 1
- %tmp18681 = getelementptr inbounds float* %tmp18680, i64 1
- %tmp18682 = getelementptr inbounds float* %tmp18681, i64 1
- %tmp18683 = getelementptr inbounds float* %tmp18682, i64 1
- %tmp18684 = getelementptr inbounds float* %tmp18683, i64 1
- %tmp18685 = getelementptr inbounds float* %tmp18684, i64 1
- %tmp18686 = getelementptr inbounds float* %tmp18685, i64 1
- %tmp18687 = getelementptr inbounds float* %tmp18686, i64 1
- %tmp18688 = getelementptr inbounds float* %tmp18687, i64 1
- %tmp18689 = getelementptr inbounds float* %tmp18688, i64 1
- %tmp18690 = getelementptr inbounds float* %tmp18689, i64 1
- %tmp18691 = getelementptr inbounds float* %tmp18690, i64 1
- %tmp18692 = getelementptr inbounds float* %tmp18691, i64 1
- %tmp18693 = getelementptr inbounds float* %tmp18692, i64 1
- %tmp18694 = getelementptr inbounds float* %tmp18693, i64 1
- %tmp18695 = getelementptr inbounds float* %tmp18694, i64 1
- %tmp18696 = getelementptr inbounds float* %tmp18695, i64 1
- %tmp18697 = getelementptr inbounds float* %tmp18696, i64 1
- %tmp18698 = getelementptr inbounds float* %tmp18697, i64 1
- %tmp18699 = getelementptr inbounds float* %tmp18698, i64 1
- %tmp18700 = getelementptr inbounds float* %tmp18699, i64 1
- %tmp18701 = getelementptr inbounds float* %tmp18700, i64 1
- %tmp18702 = getelementptr inbounds float* %tmp18701, i64 1
- %tmp18703 = getelementptr inbounds float* %tmp18702, i64 1
- %tmp18704 = getelementptr inbounds float* %tmp18703, i64 1
- %tmp18705 = getelementptr inbounds float* %tmp18704, i64 1
- %tmp18706 = getelementptr inbounds float* %tmp18705, i64 1
- %tmp18707 = getelementptr inbounds float* %tmp18706, i64 1
- %tmp18708 = getelementptr inbounds float* %tmp18707, i64 1
- %tmp18709 = getelementptr inbounds float* %tmp18708, i64 1
- %tmp18710 = getelementptr inbounds float* %tmp18709, i64 1
- %tmp18711 = getelementptr inbounds float* %tmp18710, i64 1
- %tmp18712 = getelementptr inbounds float* %tmp18711, i64 1
- %tmp18713 = getelementptr inbounds float* %tmp18712, i64 1
- %tmp18714 = getelementptr inbounds float* %tmp18713, i64 1
- %tmp18715 = getelementptr inbounds float* %tmp18714, i64 1
- %tmp18716 = getelementptr inbounds float* %tmp18715, i64 1
- %tmp18717 = getelementptr inbounds float* %tmp18716, i64 1
- %tmp18718 = getelementptr inbounds float* %tmp18717, i64 1
- %tmp18719 = getelementptr inbounds float* %tmp18718, i64 1
- %tmp18720 = getelementptr inbounds float* %tmp18719, i64 1
- %tmp18721 = getelementptr inbounds float* %tmp18720, i64 1
- %tmp18722 = getelementptr inbounds float* %tmp18721, i64 1
- %tmp18723 = getelementptr inbounds float* %tmp18722, i64 1
- %tmp18724 = getelementptr inbounds float* %tmp18723, i64 1
- %tmp18725 = getelementptr inbounds float* %tmp18724, i64 1
- %tmp18726 = getelementptr inbounds float* %tmp18725, i64 1
- %tmp18727 = getelementptr inbounds float* %tmp18726, i64 1
- %tmp18728 = getelementptr inbounds float* %tmp18727, i64 1
- %tmp18729 = getelementptr inbounds float* %tmp18728, i64 1
- %tmp18730 = getelementptr inbounds float* %tmp18729, i64 1
- %tmp18731 = getelementptr inbounds float* %tmp18730, i64 1
- %tmp18732 = getelementptr inbounds float* %tmp18731, i64 1
- %tmp18733 = getelementptr inbounds float* %tmp18732, i64 1
- %tmp18734 = getelementptr inbounds float* %tmp18733, i64 1
- %tmp18735 = getelementptr inbounds float* %tmp18734, i64 1
- %tmp18736 = getelementptr inbounds float* %tmp18735, i64 1
- %tmp18737 = getelementptr inbounds float* %tmp18736, i64 1
- %tmp18738 = getelementptr inbounds float* %tmp18737, i64 1
- %tmp18739 = getelementptr inbounds float* %tmp18738, i64 1
- %tmp18740 = getelementptr inbounds float* %tmp18739, i64 1
- %tmp18741 = getelementptr inbounds float* %tmp18740, i64 1
- %tmp18742 = getelementptr inbounds float* %tmp18741, i64 1
- %tmp18743 = getelementptr inbounds float* %tmp18742, i64 1
- %tmp18744 = getelementptr inbounds float* %tmp18743, i64 1
- %tmp18745 = getelementptr inbounds float* %tmp18744, i64 1
- %tmp18746 = getelementptr inbounds float* %tmp18745, i64 1
- %tmp18747 = getelementptr inbounds float* %tmp18746, i64 1
- %tmp18748 = getelementptr inbounds float* %tmp18747, i64 1
- %tmp18749 = getelementptr inbounds float* %tmp18748, i64 1
- %tmp18750 = getelementptr inbounds float* %tmp18749, i64 1
- %tmp18751 = getelementptr inbounds float* %tmp18750, i64 1
- %tmp18752 = getelementptr inbounds float* %tmp18751, i64 1
- %tmp18753 = getelementptr inbounds float* %tmp18752, i64 1
- %tmp18754 = getelementptr inbounds float* %tmp18753, i64 1
- %tmp18755 = getelementptr inbounds float* %tmp18754, i64 1
- %tmp18756 = getelementptr inbounds float* %tmp18755, i64 1
- %tmp18757 = getelementptr inbounds float* %tmp18756, i64 1
- %tmp18758 = getelementptr inbounds float* %tmp18757, i64 1
- %tmp18759 = getelementptr inbounds float* %tmp18758, i64 1
- %tmp18760 = getelementptr inbounds float* %tmp18759, i64 1
- %tmp18761 = getelementptr inbounds float* %tmp18760, i64 1
- %tmp18762 = getelementptr inbounds float* %tmp18761, i64 1
- %tmp18763 = getelementptr inbounds float* %tmp18762, i64 1
- %tmp18764 = getelementptr inbounds float* %tmp18763, i64 1
- %tmp18765 = getelementptr inbounds float* %tmp18764, i64 1
- %tmp18766 = getelementptr inbounds float* %tmp18765, i64 1
- %tmp18767 = getelementptr inbounds float* %tmp18766, i64 1
- %tmp18768 = getelementptr inbounds float* %tmp18767, i64 1
- %tmp18769 = getelementptr inbounds float* %tmp18768, i64 1
- %tmp18770 = getelementptr inbounds float* %tmp18769, i64 1
- %tmp18771 = getelementptr inbounds float* %tmp18770, i64 1
- %tmp18772 = getelementptr inbounds float* %tmp18771, i64 1
- %tmp18773 = getelementptr inbounds float* %tmp18772, i64 1
- %tmp18774 = getelementptr inbounds float* %tmp18773, i64 1
- %tmp18775 = getelementptr inbounds float* %tmp18774, i64 1
- %tmp18776 = getelementptr inbounds float* %tmp18775, i64 1
- %tmp18777 = getelementptr inbounds float* %tmp18776, i64 1
- %tmp18778 = getelementptr inbounds float* %tmp18777, i64 1
- %tmp18779 = getelementptr inbounds float* %tmp18778, i64 1
- %tmp18780 = getelementptr inbounds float* %tmp18779, i64 1
- %tmp18781 = getelementptr inbounds float* %tmp18780, i64 1
- %tmp18782 = getelementptr inbounds float* %tmp18781, i64 1
- %tmp18783 = getelementptr inbounds float* %tmp18782, i64 1
- %tmp18784 = getelementptr inbounds float* %tmp18783, i64 1
- %tmp18785 = getelementptr inbounds float* %tmp18784, i64 1
- %tmp18786 = getelementptr inbounds float* %tmp18785, i64 1
- %tmp18787 = getelementptr inbounds float* %tmp18786, i64 1
- %tmp18788 = getelementptr inbounds float* %tmp18787, i64 1
- %tmp18789 = getelementptr inbounds float* %tmp18788, i64 1
- %tmp18790 = getelementptr inbounds float* %tmp18789, i64 1
- %tmp18791 = getelementptr inbounds float* %tmp18790, i64 1
- %tmp18792 = getelementptr inbounds float* %tmp18791, i64 1
- %tmp18793 = getelementptr inbounds float* %tmp18792, i64 1
- %tmp18794 = getelementptr inbounds float* %tmp18793, i64 1
- %tmp18795 = getelementptr inbounds float* %tmp18794, i64 1
- %tmp18796 = getelementptr inbounds float* %tmp18795, i64 1
- %tmp18797 = getelementptr inbounds float* %tmp18796, i64 1
- %tmp18798 = getelementptr inbounds float* %tmp18797, i64 1
- %tmp18799 = getelementptr inbounds float* %tmp18798, i64 1
- %tmp18800 = getelementptr inbounds float* %tmp18799, i64 1
- %tmp18801 = getelementptr inbounds float* %tmp18800, i64 1
- %tmp18802 = getelementptr inbounds float* %tmp18801, i64 1
- %tmp18803 = getelementptr inbounds float* %tmp18802, i64 1
- %tmp18804 = getelementptr inbounds float* %tmp18803, i64 1
- %tmp18805 = getelementptr inbounds float* %tmp18804, i64 1
- %tmp18806 = getelementptr inbounds float* %tmp18805, i64 1
- %tmp18807 = getelementptr inbounds float* %tmp18806, i64 1
- %tmp18808 = getelementptr inbounds float* %tmp18807, i64 1
- %tmp18809 = getelementptr inbounds float* %tmp18808, i64 1
- %tmp18810 = getelementptr inbounds float* %tmp18809, i64 1
- %tmp18811 = getelementptr inbounds float* %tmp18810, i64 1
- %tmp18812 = getelementptr inbounds float* %tmp18811, i64 1
- %tmp18813 = getelementptr inbounds float* %tmp18812, i64 1
- %tmp18814 = getelementptr inbounds float* %tmp18813, i64 1
- %tmp18815 = getelementptr inbounds float* %tmp18814, i64 1
- %tmp18816 = getelementptr inbounds float* %tmp18815, i64 1
- %tmp18817 = getelementptr inbounds float* %tmp18816, i64 1
- %tmp18818 = getelementptr inbounds float* %tmp18817, i64 1
- %tmp18819 = getelementptr inbounds float* %tmp18818, i64 1
- %tmp18820 = getelementptr inbounds float* %tmp18819, i64 1
- %tmp18821 = getelementptr inbounds float* %tmp18820, i64 1
- %tmp18822 = getelementptr inbounds float* %tmp18821, i64 1
- %tmp18823 = getelementptr inbounds float* %tmp18822, i64 1
- %tmp18824 = getelementptr inbounds float* %tmp18823, i64 1
- %tmp18825 = getelementptr inbounds float* %tmp18824, i64 1
- %tmp18826 = getelementptr inbounds float* %tmp18825, i64 1
- %tmp18827 = getelementptr inbounds float* %tmp18826, i64 1
- %tmp18828 = getelementptr inbounds float* %tmp18827, i64 1
- %tmp18829 = getelementptr inbounds float* %tmp18828, i64 1
- %tmp18830 = getelementptr inbounds float* %tmp18829, i64 1
- %tmp18831 = getelementptr inbounds float* %tmp18830, i64 1
- %tmp18832 = getelementptr inbounds float* %tmp18831, i64 1
- %tmp18833 = getelementptr inbounds float* %tmp18832, i64 1
- %tmp18834 = getelementptr inbounds float* %tmp18833, i64 1
- %tmp18835 = getelementptr inbounds float* %tmp18834, i64 1
- %tmp18836 = getelementptr inbounds float* %tmp18835, i64 1
- %tmp18837 = getelementptr inbounds float* %tmp18836, i64 1
- %tmp18838 = getelementptr inbounds float* %tmp18837, i64 1
- %tmp18839 = getelementptr inbounds float* %tmp18838, i64 1
- %tmp18840 = getelementptr inbounds float* %tmp18839, i64 1
- %tmp18841 = getelementptr inbounds float* %tmp18840, i64 1
- %tmp18842 = getelementptr inbounds float* %tmp18841, i64 1
- %tmp18843 = getelementptr inbounds float* %tmp18842, i64 1
- %tmp18844 = getelementptr inbounds float* %tmp18843, i64 1
- %tmp18845 = getelementptr inbounds float* %tmp18844, i64 1
- %tmp18846 = getelementptr inbounds float* %tmp18845, i64 1
- %tmp18847 = getelementptr inbounds float* %tmp18846, i64 1
- %tmp18848 = getelementptr inbounds float* %tmp18847, i64 1
- %tmp18849 = getelementptr inbounds float* %tmp18848, i64 1
- %tmp18850 = getelementptr inbounds float* %tmp18849, i64 1
- %tmp18851 = getelementptr inbounds float* %tmp18850, i64 1
- %tmp18852 = getelementptr inbounds float* %tmp18851, i64 1
- %tmp18853 = getelementptr inbounds float* %tmp18852, i64 1
- %tmp18854 = getelementptr inbounds float* %tmp18853, i64 1
- %tmp18855 = getelementptr inbounds float* %tmp18854, i64 1
- %tmp18856 = getelementptr inbounds float* %tmp18855, i64 1
- %tmp18857 = getelementptr inbounds float* %tmp18856, i64 1
- %tmp18858 = getelementptr inbounds float* %tmp18857, i64 1
- %tmp18859 = getelementptr inbounds float* %tmp18858, i64 1
- %tmp18860 = getelementptr inbounds float* %tmp18859, i64 1
- %tmp18861 = getelementptr inbounds float* %tmp18860, i64 1
- %tmp18862 = getelementptr inbounds float* %tmp18861, i64 1
- %tmp18863 = getelementptr inbounds float* %tmp18862, i64 1
- %tmp18864 = getelementptr inbounds float* %tmp18863, i64 1
- %tmp18865 = getelementptr inbounds float* %tmp18864, i64 1
- %tmp18866 = getelementptr inbounds float* %tmp18865, i64 1
- %tmp18867 = getelementptr inbounds float* %tmp18866, i64 1
- %tmp18868 = getelementptr inbounds float* %tmp18867, i64 1
- %tmp18869 = getelementptr inbounds float* %tmp18868, i64 1
- %tmp18870 = getelementptr inbounds float* %tmp18869, i64 1
- %tmp18871 = getelementptr inbounds float* %tmp18870, i64 1
- %tmp18872 = getelementptr inbounds float* %tmp18871, i64 1
- %tmp18873 = getelementptr inbounds float* %tmp18872, i64 1
- %tmp18874 = getelementptr inbounds float* %tmp18873, i64 1
- %tmp18875 = getelementptr inbounds float* %tmp18874, i64 1
- %tmp18876 = getelementptr inbounds float* %tmp18875, i64 1
- %tmp18877 = getelementptr inbounds float* %tmp18876, i64 1
- %tmp18878 = getelementptr inbounds float* %tmp18877, i64 1
- %tmp18879 = getelementptr inbounds float* %tmp18878, i64 1
- %tmp18880 = getelementptr inbounds float* %tmp18879, i64 1
- %tmp18881 = getelementptr inbounds float* %tmp18880, i64 1
- %tmp18882 = getelementptr inbounds float* %tmp18881, i64 1
- %tmp18883 = getelementptr inbounds float* %tmp18882, i64 1
- %tmp18884 = getelementptr inbounds float* %tmp18883, i64 1
- %tmp18885 = getelementptr inbounds float* %tmp18884, i64 1
- %tmp18886 = getelementptr inbounds float* %tmp18885, i64 1
- %tmp18887 = getelementptr inbounds float* %tmp18886, i64 1
- %tmp18888 = getelementptr inbounds float* %tmp18887, i64 1
- %tmp18889 = getelementptr inbounds float* %tmp18888, i64 1
- %tmp18890 = getelementptr inbounds float* %tmp18889, i64 1
- %tmp18891 = getelementptr inbounds float* %tmp18890, i64 1
- %tmp18892 = getelementptr inbounds float* %tmp18891, i64 1
- %tmp18893 = getelementptr inbounds float* %tmp18892, i64 1
- %tmp18894 = getelementptr inbounds float* %tmp18893, i64 1
- %tmp18895 = getelementptr inbounds float* %tmp18894, i64 1
- %tmp18896 = getelementptr inbounds float* %tmp18895, i64 1
- %tmp18897 = getelementptr inbounds float* %tmp18896, i64 1
- %tmp18898 = getelementptr inbounds float* %tmp18897, i64 1
- %tmp18899 = getelementptr inbounds float* %tmp18898, i64 1
- %tmp18900 = getelementptr inbounds float* %tmp18899, i64 1
- %tmp18901 = getelementptr inbounds float* %tmp18900, i64 1
- %tmp18902 = getelementptr inbounds float* %tmp18901, i64 1
- %tmp18903 = getelementptr inbounds float* %tmp18902, i64 1
- %tmp18904 = getelementptr inbounds float* %tmp18903, i64 1
- %tmp18905 = getelementptr inbounds float* %tmp18904, i64 1
- %tmp18906 = getelementptr inbounds float* %tmp18905, i64 1
- %tmp18907 = getelementptr inbounds float* %tmp18906, i64 1
- %tmp18908 = getelementptr inbounds float* %tmp18907, i64 1
- %tmp18909 = getelementptr inbounds float* %tmp18908, i64 1
- %tmp18910 = getelementptr inbounds float* %tmp18909, i64 1
- %tmp18911 = getelementptr inbounds float* %tmp18910, i64 1
- %tmp18912 = getelementptr inbounds float* %tmp18911, i64 1
- %tmp18913 = getelementptr inbounds float* %tmp18912, i64 1
- %tmp18914 = getelementptr inbounds float* %tmp18913, i64 1
- %tmp18915 = getelementptr inbounds float* %tmp18914, i64 1
- %tmp18916 = getelementptr inbounds float* %tmp18915, i64 1
- %tmp18917 = getelementptr inbounds float* %tmp18916, i64 1
- %tmp18918 = getelementptr inbounds float* %tmp18917, i64 1
- %tmp18919 = getelementptr inbounds float* %tmp18918, i64 1
- %tmp18920 = getelementptr inbounds float* %tmp18919, i64 1
- %tmp18921 = getelementptr inbounds float* %tmp18920, i64 1
- %tmp18922 = getelementptr inbounds float* %tmp18921, i64 1
- %tmp18923 = getelementptr inbounds float* %tmp18922, i64 1
- %tmp18924 = getelementptr inbounds float* %tmp18923, i64 1
- %tmp18925 = getelementptr inbounds float* %tmp18924, i64 1
- %tmp18926 = getelementptr inbounds float* %tmp18925, i64 1
- %tmp18927 = getelementptr inbounds float* %tmp18926, i64 1
- %tmp18928 = getelementptr inbounds float* %tmp18927, i64 1
- %tmp18929 = getelementptr inbounds float* %tmp18928, i64 1
- %tmp18930 = getelementptr inbounds float* %tmp18929, i64 1
- %tmp18931 = getelementptr inbounds float* %tmp18930, i64 1
- %tmp18932 = getelementptr inbounds float* %tmp18931, i64 1
- %tmp18933 = getelementptr inbounds float* %tmp18932, i64 1
- %tmp18934 = getelementptr inbounds float* %tmp18933, i64 1
- %tmp18935 = getelementptr inbounds float* %tmp18934, i64 1
- %tmp18936 = getelementptr inbounds float* %tmp18935, i64 1
- %tmp18937 = getelementptr inbounds float* %tmp18936, i64 1
- %tmp18938 = getelementptr inbounds float* %tmp18937, i64 1
- %tmp18939 = getelementptr inbounds float* %tmp18938, i64 1
- %tmp18940 = getelementptr inbounds float* %tmp18939, i64 1
- %tmp18941 = getelementptr inbounds float* %tmp18940, i64 1
- %tmp18942 = getelementptr inbounds float* %tmp18941, i64 1
- %tmp18943 = getelementptr inbounds float* %tmp18942, i64 1
- %tmp18944 = getelementptr inbounds float* %tmp18943, i64 1
- %tmp18945 = getelementptr inbounds float* %tmp18944, i64 1
- %tmp18946 = getelementptr inbounds float* %tmp18945, i64 1
- %tmp18947 = getelementptr inbounds float* %tmp18946, i64 1
- %tmp18948 = getelementptr inbounds float* %tmp18947, i64 1
- %tmp18949 = getelementptr inbounds float* %tmp18948, i64 1
- %tmp18950 = getelementptr inbounds float* %tmp18949, i64 1
- %tmp18951 = getelementptr inbounds float* %tmp18950, i64 1
- %tmp18952 = getelementptr inbounds float* %tmp18951, i64 1
- %tmp18953 = getelementptr inbounds float* %tmp18952, i64 1
- %tmp18954 = getelementptr inbounds float* %tmp18953, i64 1
- %tmp18955 = getelementptr inbounds float* %tmp18954, i64 1
- %tmp18956 = getelementptr inbounds float* %tmp18955, i64 1
- %tmp18957 = getelementptr inbounds float* %tmp18956, i64 1
- %tmp18958 = getelementptr inbounds float* %tmp18957, i64 1
- %tmp18959 = getelementptr inbounds float* %tmp18958, i64 1
- %tmp18960 = getelementptr inbounds float* %tmp18959, i64 1
- %tmp18961 = getelementptr inbounds float* %tmp18960, i64 1
- %tmp18962 = getelementptr inbounds float* %tmp18961, i64 1
- %tmp18963 = getelementptr inbounds float* %tmp18962, i64 1
- %tmp18964 = getelementptr inbounds float* %tmp18963, i64 1
- %tmp18965 = getelementptr inbounds float* %tmp18964, i64 1
- %tmp18966 = getelementptr inbounds float* %tmp18965, i64 1
- %tmp18967 = getelementptr inbounds float* %tmp18966, i64 1
- %tmp18968 = getelementptr inbounds float* %tmp18967, i64 1
- %tmp18969 = getelementptr inbounds float* %tmp18968, i64 1
- %tmp18970 = getelementptr inbounds float* %tmp18969, i64 1
- %tmp18971 = getelementptr inbounds float* %tmp18970, i64 1
- %tmp18972 = getelementptr inbounds float* %tmp18971, i64 1
- %tmp18973 = getelementptr inbounds float* %tmp18972, i64 1
- %tmp18974 = getelementptr inbounds float* %tmp18973, i64 1
- %tmp18975 = getelementptr inbounds float* %tmp18974, i64 1
- %tmp18976 = getelementptr inbounds float* %tmp18975, i64 1
- %tmp18977 = getelementptr inbounds float* %tmp18976, i64 1
- %tmp18978 = getelementptr inbounds float* %tmp18977, i64 1
- %tmp18979 = getelementptr inbounds float* %tmp18978, i64 1
- %tmp18980 = getelementptr inbounds float* %tmp18979, i64 1
- %tmp18981 = getelementptr inbounds float* %tmp18980, i64 1
- %tmp18982 = getelementptr inbounds float* %tmp18981, i64 1
- %tmp18983 = getelementptr inbounds float* %tmp18982, i64 1
- %tmp18984 = getelementptr inbounds float* %tmp18983, i64 1
- %tmp18985 = getelementptr inbounds float* %tmp18984, i64 1
- %tmp18986 = getelementptr inbounds float* %tmp18985, i64 1
- %tmp18987 = getelementptr inbounds float* %tmp18986, i64 1
- %tmp18988 = getelementptr inbounds float* %tmp18987, i64 1
- %tmp18989 = getelementptr inbounds float* %tmp18988, i64 1
- %tmp18990 = getelementptr inbounds float* %tmp18989, i64 1
- %tmp18991 = getelementptr inbounds float* %tmp18990, i64 1
- %tmp18992 = getelementptr inbounds float* %tmp18991, i64 1
- %tmp18993 = getelementptr inbounds float* %tmp18992, i64 1
- %tmp18994 = getelementptr inbounds float* %tmp18993, i64 1
- %tmp18995 = getelementptr inbounds float* %tmp18994, i64 1
- %tmp18996 = getelementptr inbounds float* %tmp18995, i64 1
- %tmp18997 = getelementptr inbounds float* %tmp18996, i64 1
- %tmp18998 = getelementptr inbounds float* %tmp18997, i64 1
- %tmp18999 = getelementptr inbounds float* %tmp18998, i64 1
- %tmp19000 = getelementptr inbounds float* %tmp18999, i64 1
- %tmp19001 = getelementptr inbounds float* %tmp19000, i64 1
- %tmp19002 = getelementptr inbounds float* %tmp19001, i64 1
- %tmp19003 = getelementptr inbounds float* %tmp19002, i64 1
- %tmp19004 = getelementptr inbounds float* %tmp19003, i64 1
- %tmp19005 = getelementptr inbounds float* %tmp19004, i64 1
- %tmp19006 = getelementptr inbounds float* %tmp19005, i64 1
- %tmp19007 = getelementptr inbounds float* %tmp19006, i64 1
- %tmp19008 = getelementptr inbounds float* %tmp19007, i64 1
- %tmp19009 = getelementptr inbounds float* %tmp19008, i64 1
- %tmp19010 = getelementptr inbounds float* %tmp19009, i64 1
- %tmp19011 = getelementptr inbounds float* %tmp19010, i64 1
- %tmp19012 = getelementptr inbounds float* %tmp19011, i64 1
- %tmp19013 = getelementptr inbounds float* %tmp19012, i64 1
- %tmp19014 = getelementptr inbounds float* %tmp19013, i64 1
- %tmp19015 = getelementptr inbounds float* %tmp19014, i64 1
- %tmp19016 = getelementptr inbounds float* %tmp19015, i64 1
- %tmp19017 = getelementptr inbounds float* %tmp19016, i64 1
- %tmp19018 = getelementptr inbounds float* %tmp19017, i64 1
- %tmp19019 = getelementptr inbounds float* %tmp19018, i64 1
- %tmp19020 = getelementptr inbounds float* %tmp19019, i64 1
- %tmp19021 = getelementptr inbounds float* %tmp19020, i64 1
- %tmp19022 = getelementptr inbounds float* %tmp19021, i64 1
- %tmp19023 = getelementptr inbounds float* %tmp19022, i64 1
- %tmp19024 = getelementptr inbounds float* %tmp19023, i64 1
- %tmp19025 = getelementptr inbounds float* %tmp19024, i64 1
- %tmp19026 = getelementptr inbounds float* %tmp19025, i64 1
- %tmp19027 = getelementptr inbounds float* %tmp19026, i64 1
- %tmp19028 = getelementptr inbounds float* %tmp19027, i64 1
- %tmp19029 = getelementptr inbounds float* %tmp19028, i64 1
- %tmp19030 = getelementptr inbounds float* %tmp19029, i64 1
- %tmp19031 = getelementptr inbounds float* %tmp19030, i64 1
- %tmp19032 = getelementptr inbounds float* %tmp19031, i64 1
- %tmp19033 = getelementptr inbounds float* %tmp19032, i64 1
- %tmp19034 = getelementptr inbounds float* %tmp19033, i64 1
- %tmp19035 = getelementptr inbounds float* %tmp19034, i64 1
- %tmp19036 = getelementptr inbounds float* %tmp19035, i64 1
- %tmp19037 = getelementptr inbounds float* %tmp19036, i64 1
- %tmp19038 = getelementptr inbounds float* %tmp19037, i64 1
- %tmp19039 = getelementptr inbounds float* %tmp19038, i64 1
- %tmp19040 = getelementptr inbounds float* %tmp19039, i64 1
- %tmp19041 = getelementptr inbounds float* %tmp19040, i64 1
- %tmp19042 = getelementptr inbounds float* %tmp19041, i64 1
- %tmp19043 = getelementptr inbounds float* %tmp19042, i64 1
- %tmp19044 = getelementptr inbounds float* %tmp19043, i64 1
- %tmp19045 = getelementptr inbounds float* %tmp19044, i64 1
- %tmp19046 = getelementptr inbounds float* %tmp19045, i64 1
- %tmp19047 = getelementptr inbounds float* %tmp19046, i64 1
- %tmp19048 = getelementptr inbounds float* %tmp19047, i64 1
- %tmp19049 = getelementptr inbounds float* %tmp19048, i64 1
- %tmp19050 = getelementptr inbounds float* %tmp19049, i64 1
- %tmp19051 = getelementptr inbounds float* %tmp19050, i64 1
- %tmp19052 = getelementptr inbounds float* %tmp19051, i64 1
- %tmp19053 = getelementptr inbounds float* %tmp19052, i64 1
- %tmp19054 = getelementptr inbounds float* %tmp19053, i64 1
- %tmp19055 = getelementptr inbounds float* %tmp19054, i64 1
- %tmp19056 = getelementptr inbounds float* %tmp19055, i64 1
- %tmp19057 = getelementptr inbounds float* %tmp19056, i64 1
- %tmp19058 = getelementptr inbounds float* %tmp19057, i64 1
- %tmp19059 = getelementptr inbounds float* %tmp19058, i64 1
- %tmp19060 = getelementptr inbounds float* %tmp19059, i64 1
- %tmp19061 = getelementptr inbounds float* %tmp19060, i64 1
- %tmp19062 = getelementptr inbounds float* %tmp19061, i64 1
- %tmp19063 = getelementptr inbounds float* %tmp19062, i64 1
- %tmp19064 = getelementptr inbounds float* %tmp19063, i64 1
- %tmp19065 = getelementptr inbounds float* %tmp19064, i64 1
- %tmp19066 = getelementptr inbounds float* %tmp19065, i64 1
- %tmp19067 = getelementptr inbounds float* %tmp19066, i64 1
- %tmp19068 = getelementptr inbounds float* %tmp19067, i64 1
- %tmp19069 = getelementptr inbounds float* %tmp19068, i64 1
- %tmp19070 = getelementptr inbounds float* %tmp19069, i64 1
- %tmp19071 = getelementptr inbounds float* %tmp19070, i64 1
- %tmp19072 = getelementptr inbounds float* %tmp19071, i64 1
- %tmp19073 = getelementptr inbounds float* %tmp19072, i64 1
- %tmp19074 = getelementptr inbounds float* %tmp19073, i64 1
- %tmp19075 = getelementptr inbounds float* %tmp19074, i64 1
- %tmp19076 = getelementptr inbounds float* %tmp19075, i64 1
- %tmp19077 = getelementptr inbounds float* %tmp19076, i64 1
- %tmp19078 = getelementptr inbounds float* %tmp19077, i64 1
- %tmp19079 = getelementptr inbounds float* %tmp19078, i64 1
- %tmp19080 = getelementptr inbounds float* %tmp19079, i64 1
- %tmp19081 = getelementptr inbounds float* %tmp19080, i64 1
- %tmp19082 = getelementptr inbounds float* %tmp19081, i64 1
- %tmp19083 = getelementptr inbounds float* %tmp19082, i64 1
- %tmp19084 = getelementptr inbounds float* %tmp19083, i64 1
- %tmp19085 = getelementptr inbounds float* %tmp19084, i64 1
- %tmp19086 = getelementptr inbounds float* %tmp19085, i64 1
- %tmp19087 = getelementptr inbounds float* %tmp19086, i64 1
- %tmp19088 = getelementptr inbounds float* %tmp19087, i64 1
- %tmp19089 = getelementptr inbounds float* %tmp19088, i64 1
- %tmp19090 = getelementptr inbounds float* %tmp19089, i64 1
- %tmp19091 = getelementptr inbounds float* %tmp19090, i64 1
- %tmp19092 = getelementptr inbounds float* %tmp19091, i64 1
- %tmp19093 = getelementptr inbounds float* %tmp19092, i64 1
- %tmp19094 = getelementptr inbounds float* %tmp19093, i64 1
- %tmp19095 = getelementptr inbounds float* %tmp19094, i64 1
- %tmp19096 = getelementptr inbounds float* %tmp19095, i64 1
- %tmp19097 = getelementptr inbounds float* %tmp19096, i64 1
- %tmp19098 = getelementptr inbounds float* %tmp19097, i64 1
- %tmp19099 = getelementptr inbounds float* %tmp19098, i64 1
- %tmp19100 = getelementptr inbounds float* %tmp19099, i64 1
- %tmp19101 = getelementptr inbounds float* %tmp19100, i64 1
- %tmp19102 = getelementptr inbounds float* %tmp19101, i64 1
- %tmp19103 = getelementptr inbounds float* %tmp19102, i64 1
- %tmp19104 = getelementptr inbounds float* %tmp19103, i64 1
- %tmp19105 = getelementptr inbounds float* %tmp19104, i64 1
- %tmp19106 = getelementptr inbounds float* %tmp19105, i64 1
- %tmp19107 = getelementptr inbounds float* %tmp19106, i64 1
- %tmp19108 = getelementptr inbounds float* %tmp19107, i64 1
- %tmp19109 = getelementptr inbounds float* %tmp19108, i64 1
- %tmp19110 = getelementptr inbounds float* %tmp19109, i64 1
- %tmp19111 = getelementptr inbounds float* %tmp19110, i64 1
- %tmp19112 = getelementptr inbounds float* %tmp19111, i64 1
- %tmp19113 = getelementptr inbounds float* %tmp19112, i64 1
- %tmp19114 = getelementptr inbounds float* %tmp19113, i64 1
- %tmp19115 = getelementptr inbounds float* %tmp19114, i64 1
- %tmp19116 = getelementptr inbounds float* %tmp19115, i64 1
- %tmp19117 = getelementptr inbounds float* %tmp19116, i64 1
- %tmp19118 = getelementptr inbounds float* %tmp19117, i64 1
- %tmp19119 = getelementptr inbounds float* %tmp19118, i64 1
- %tmp19120 = getelementptr inbounds float* %tmp19119, i64 1
- %tmp19121 = getelementptr inbounds float* %tmp19120, i64 1
- %tmp19122 = getelementptr inbounds float* %tmp19121, i64 1
- %tmp19123 = getelementptr inbounds float* %tmp19122, i64 1
- %tmp19124 = getelementptr inbounds float* %tmp19123, i64 1
- %tmp19125 = getelementptr inbounds float* %tmp19124, i64 1
- %tmp19126 = getelementptr inbounds float* %tmp19125, i64 1
- %tmp19127 = getelementptr inbounds float* %tmp19126, i64 1
- %tmp19128 = getelementptr inbounds float* %tmp19127, i64 1
- %tmp19129 = getelementptr inbounds float* %tmp19128, i64 1
- %tmp19130 = getelementptr inbounds float* %tmp19129, i64 1
- %tmp19131 = getelementptr inbounds float* %tmp19130, i64 1
- %tmp19132 = getelementptr inbounds float* %tmp19131, i64 1
- %tmp19133 = getelementptr inbounds float* %tmp19132, i64 1
- %tmp19134 = getelementptr inbounds float* %tmp19133, i64 1
- %tmp19135 = getelementptr inbounds float* %tmp19134, i64 1
- %tmp19136 = getelementptr inbounds float* %tmp19135, i64 1
- %tmp19137 = getelementptr inbounds float* %tmp19136, i64 1
- %tmp19138 = getelementptr inbounds float* %tmp19137, i64 1
- %tmp19139 = getelementptr inbounds float* %tmp19138, i64 1
- %tmp19140 = getelementptr inbounds float* %tmp19139, i64 1
- %tmp19141 = getelementptr inbounds float* %tmp19140, i64 1
- %tmp19142 = getelementptr inbounds float* %tmp19141, i64 1
- %tmp19143 = getelementptr inbounds float* %tmp19142, i64 1
- %tmp19144 = getelementptr inbounds float* %tmp19143, i64 1
- %tmp19145 = getelementptr inbounds float* %tmp19144, i64 1
- %tmp19146 = getelementptr inbounds float* %tmp19145, i64 1
- %tmp19147 = getelementptr inbounds float* %tmp19146, i64 1
- %tmp19148 = getelementptr inbounds float* %tmp19147, i64 1
- %tmp19149 = getelementptr inbounds float* %tmp19148, i64 1
- %tmp19150 = getelementptr inbounds float* %tmp19149, i64 1
- %tmp19151 = getelementptr inbounds float* %tmp19150, i64 1
- %tmp19152 = getelementptr inbounds float* %tmp19151, i64 1
- %tmp19153 = getelementptr inbounds float* %tmp19152, i64 1
- %tmp19154 = getelementptr inbounds float* %tmp19153, i64 1
- %tmp19155 = getelementptr inbounds float* %tmp19154, i64 1
- %tmp19156 = getelementptr inbounds float* %tmp19155, i64 1
- %tmp19157 = getelementptr inbounds float* %tmp19156, i64 1
- %tmp19158 = getelementptr inbounds float* %tmp19157, i64 1
- %tmp19159 = getelementptr inbounds float* %tmp19158, i64 1
- %tmp19160 = getelementptr inbounds float* %tmp19159, i64 1
- %tmp19161 = getelementptr inbounds float* %tmp19160, i64 1
- %tmp19162 = getelementptr inbounds float* %tmp19161, i64 1
- %tmp19163 = getelementptr inbounds float* %tmp19162, i64 1
- %tmp19164 = getelementptr inbounds float* %tmp19163, i64 1
- %tmp19165 = getelementptr inbounds float* %tmp19164, i64 1
- %tmp19166 = getelementptr inbounds float* %tmp19165, i64 1
- %tmp19167 = getelementptr inbounds float* %tmp19166, i64 1
- %tmp19168 = getelementptr inbounds float* %tmp19167, i64 1
- %tmp19169 = getelementptr inbounds float* %tmp19168, i64 1
- %tmp19170 = getelementptr inbounds float* %tmp19169, i64 1
- %tmp19171 = getelementptr inbounds float* %tmp19170, i64 1
- %tmp19172 = getelementptr inbounds float* %tmp19171, i64 1
- %tmp19173 = getelementptr inbounds float* %tmp19172, i64 1
- %tmp19174 = getelementptr inbounds float* %tmp19173, i64 1
- %tmp19175 = getelementptr inbounds float* %tmp19174, i64 1
- %tmp19176 = getelementptr inbounds float* %tmp19175, i64 1
- %tmp19177 = getelementptr inbounds float* %tmp19176, i64 1
- %tmp19178 = getelementptr inbounds float* %tmp19177, i64 1
- %tmp19179 = getelementptr inbounds float* %tmp19178, i64 1
- %tmp19180 = getelementptr inbounds float* %tmp19179, i64 1
- %tmp19181 = getelementptr inbounds float* %tmp19180, i64 1
- %tmp19182 = getelementptr inbounds float* %tmp19181, i64 1
- %tmp19183 = getelementptr inbounds float* %tmp19182, i64 1
- %tmp19184 = getelementptr inbounds float* %tmp19183, i64 1
- %tmp19185 = getelementptr inbounds float* %tmp19184, i64 1
- %tmp19186 = getelementptr inbounds float* %tmp19185, i64 1
- %tmp19187 = getelementptr inbounds float* %tmp19186, i64 1
- %tmp19188 = getelementptr inbounds float* %tmp19187, i64 1
- %tmp19189 = getelementptr inbounds float* %tmp19188, i64 1
- %tmp19190 = getelementptr inbounds float* %tmp19189, i64 1
- %tmp19191 = getelementptr inbounds float* %tmp19190, i64 1
- %tmp19192 = getelementptr inbounds float* %tmp19191, i64 1
- %tmp19193 = getelementptr inbounds float* %tmp19192, i64 1
- %tmp19194 = getelementptr inbounds float* %tmp19193, i64 1
- %tmp19195 = getelementptr inbounds float* %tmp19194, i64 1
- %tmp19196 = getelementptr inbounds float* %tmp19195, i64 1
- %tmp19197 = getelementptr inbounds float* %tmp19196, i64 1
- %tmp19198 = getelementptr inbounds float* %tmp19197, i64 1
- %tmp19199 = getelementptr inbounds float* %tmp19198, i64 1
- %tmp19200 = getelementptr inbounds float* %tmp19199, i64 1
- %tmp19201 = getelementptr inbounds float* %tmp19200, i64 1
- %tmp19202 = getelementptr inbounds float* %tmp19201, i64 1
- %tmp19203 = getelementptr inbounds float* %tmp19202, i64 1
- %tmp19204 = getelementptr inbounds float* %tmp19203, i64 1
- %tmp19205 = getelementptr inbounds float* %tmp19204, i64 1
- %tmp19206 = getelementptr inbounds float* %tmp19205, i64 1
- %tmp19207 = getelementptr inbounds float* %tmp19206, i64 1
- %tmp19208 = getelementptr inbounds float* %tmp19207, i64 1
- %tmp19209 = getelementptr inbounds float* %tmp19208, i64 1
- %tmp19210 = getelementptr inbounds float* %tmp19209, i64 1
- %tmp19211 = getelementptr inbounds float* %tmp19210, i64 1
- %tmp19212 = getelementptr inbounds float* %tmp19211, i64 1
- %tmp19213 = getelementptr inbounds float* %tmp19212, i64 1
- %tmp19214 = getelementptr inbounds float* %tmp19213, i64 1
- %tmp19215 = getelementptr inbounds float* %tmp19214, i64 1
- %tmp19216 = getelementptr inbounds float* %tmp19215, i64 1
- %tmp19217 = getelementptr inbounds float* %tmp19216, i64 1
- %tmp19218 = getelementptr inbounds float* %tmp19217, i64 1
- %tmp19219 = getelementptr inbounds float* %tmp19218, i64 1
- %tmp19220 = getelementptr inbounds float* %tmp19219, i64 1
- %tmp19221 = getelementptr inbounds float* %tmp19220, i64 1
- %tmp19222 = getelementptr inbounds float* %tmp19221, i64 1
- %tmp19223 = getelementptr inbounds float* %tmp19222, i64 1
- %tmp19224 = getelementptr inbounds float* %tmp19223, i64 1
- %tmp19225 = getelementptr inbounds float* %tmp19224, i64 1
- %tmp19226 = getelementptr inbounds float* %tmp19225, i64 1
- %tmp19227 = getelementptr inbounds float* %tmp19226, i64 1
- %tmp19228 = getelementptr inbounds float* %tmp19227, i64 1
- %tmp19229 = getelementptr inbounds float* %tmp19228, i64 1
- %tmp19230 = getelementptr inbounds float* %tmp19229, i64 1
- %tmp19231 = getelementptr inbounds float* %tmp19230, i64 1
- %tmp19232 = getelementptr inbounds float* %tmp19231, i64 1
- %tmp19233 = getelementptr inbounds float* %tmp19232, i64 1
- %tmp19234 = getelementptr inbounds float* %tmp19233, i64 1
- %tmp19235 = getelementptr inbounds float* %tmp19234, i64 1
- %tmp19236 = getelementptr inbounds float* %tmp19235, i64 1
- %tmp19237 = getelementptr inbounds float* %tmp19236, i64 1
- %tmp19238 = getelementptr inbounds float* %tmp19237, i64 1
- %tmp19239 = getelementptr inbounds float* %tmp19238, i64 1
- %tmp19240 = getelementptr inbounds float* %tmp19239, i64 1
- %tmp19241 = getelementptr inbounds float* %tmp19240, i64 1
- %tmp19242 = getelementptr inbounds float* %tmp19241, i64 1
- %tmp19243 = getelementptr inbounds float* %tmp19242, i64 1
- %tmp19244 = getelementptr inbounds float* %tmp19243, i64 1
- %tmp19245 = getelementptr inbounds float* %tmp19244, i64 1
- %tmp19246 = getelementptr inbounds float* %tmp19245, i64 1
- %tmp19247 = getelementptr inbounds float* %tmp19246, i64 1
- %tmp19248 = getelementptr inbounds float* %tmp19247, i64 1
- %tmp19249 = getelementptr inbounds float* %tmp19248, i64 1
- %tmp19250 = getelementptr inbounds float* %tmp19249, i64 1
- %tmp19251 = getelementptr inbounds float* %tmp19250, i64 1
- %tmp19252 = getelementptr inbounds float* %tmp19251, i64 1
- %tmp19253 = getelementptr inbounds float* %tmp19252, i64 1
- %tmp19254 = getelementptr inbounds float* %tmp19253, i64 1
- %tmp19255 = getelementptr inbounds float* %tmp19254, i64 1
- %tmp19256 = getelementptr inbounds float* %tmp19255, i64 1
- %tmp19257 = getelementptr inbounds float* %tmp19256, i64 1
- %tmp19258 = getelementptr inbounds float* %tmp19257, i64 1
- %tmp19259 = getelementptr inbounds float* %tmp19258, i64 1
- %tmp19260 = getelementptr inbounds float* %tmp19259, i64 1
- %tmp19261 = getelementptr inbounds float* %tmp19260, i64 1
- %tmp19262 = getelementptr inbounds float* %tmp19261, i64 1
- %tmp19263 = getelementptr inbounds float* %tmp19262, i64 1
- %tmp19264 = getelementptr inbounds float* %tmp19263, i64 1
- %tmp19265 = getelementptr inbounds float* %tmp19264, i64 1
- %tmp19266 = getelementptr inbounds float* %tmp19265, i64 1
- %tmp19267 = getelementptr inbounds float* %tmp19266, i64 1
- %tmp19268 = getelementptr inbounds float* %tmp19267, i64 1
- %tmp19269 = getelementptr inbounds float* %tmp19268, i64 1
- %tmp19270 = getelementptr inbounds float* %tmp19269, i64 1
- %tmp19271 = getelementptr inbounds float* %tmp19270, i64 1
- %tmp19272 = getelementptr inbounds float* %tmp19271, i64 1
- %tmp19273 = getelementptr inbounds float* %tmp19272, i64 1
- %tmp19274 = getelementptr inbounds float* %tmp19273, i64 1
- %tmp19275 = getelementptr inbounds float* %tmp19274, i64 1
- %tmp19276 = getelementptr inbounds float* %tmp19275, i64 1
- %tmp19277 = getelementptr inbounds float* %tmp19276, i64 1
- %tmp19278 = getelementptr inbounds float* %tmp19277, i64 1
- %tmp19279 = getelementptr inbounds float* %tmp19278, i64 1
- %tmp19280 = getelementptr inbounds float* %tmp19279, i64 1
- %tmp19281 = getelementptr inbounds float* %tmp19280, i64 1
- %tmp19282 = getelementptr inbounds float* %tmp19281, i64 1
- %tmp19283 = getelementptr inbounds float* %tmp19282, i64 1
- %tmp19284 = getelementptr inbounds float* %tmp19283, i64 1
- %tmp19285 = getelementptr inbounds float* %tmp19284, i64 1
- %tmp19286 = getelementptr inbounds float* %tmp19285, i64 1
- %tmp19287 = getelementptr inbounds float* %tmp19286, i64 1
- %tmp19288 = getelementptr inbounds float* %tmp19287, i64 1
- %tmp19289 = getelementptr inbounds float* %tmp19288, i64 1
- %tmp19290 = getelementptr inbounds float* %tmp19289, i64 1
- %tmp19291 = getelementptr inbounds float* %tmp19290, i64 1
- %tmp19292 = getelementptr inbounds float* %tmp19291, i64 1
- %tmp19293 = getelementptr inbounds float* %tmp19292, i64 1
- %tmp19294 = getelementptr inbounds float* %tmp19293, i64 1
- %tmp19295 = getelementptr inbounds float* %tmp19294, i64 1
- %tmp19296 = getelementptr inbounds float* %tmp19295, i64 1
- %tmp19297 = getelementptr inbounds float* %tmp19296, i64 1
- %tmp19298 = getelementptr inbounds float* %tmp19297, i64 1
- %tmp19299 = getelementptr inbounds float* %tmp19298, i64 1
- %tmp19300 = getelementptr inbounds float* %tmp19299, i64 1
- %tmp19301 = getelementptr inbounds float* %tmp19300, i64 1
- %tmp19302 = getelementptr inbounds float* %tmp19301, i64 1
- %tmp19303 = getelementptr inbounds float* %tmp19302, i64 1
- %tmp19304 = getelementptr inbounds float* %tmp19303, i64 1
- %tmp19305 = getelementptr inbounds float* %tmp19304, i64 1
- %tmp19306 = getelementptr inbounds float* %tmp19305, i64 1
- %tmp19307 = getelementptr inbounds float* %tmp19306, i64 1
- %tmp19308 = getelementptr inbounds float* %tmp19307, i64 1
- %tmp19309 = getelementptr inbounds float* %tmp19308, i64 1
- %tmp19310 = getelementptr inbounds float* %tmp19309, i64 1
- %tmp19311 = getelementptr inbounds float* %tmp19310, i64 1
- %tmp19312 = getelementptr inbounds float* %tmp19311, i64 1
- %tmp19313 = getelementptr inbounds float* %tmp19312, i64 1
- %tmp19314 = getelementptr inbounds float* %tmp19313, i64 1
- %tmp19315 = getelementptr inbounds float* %tmp19314, i64 1
- %tmp19316 = getelementptr inbounds float* %tmp19315, i64 1
- %tmp19317 = getelementptr inbounds float* %tmp19316, i64 1
- %tmp19318 = getelementptr inbounds float* %tmp19317, i64 1
- %tmp19319 = getelementptr inbounds float* %tmp19318, i64 1
- %tmp19320 = getelementptr inbounds float* %tmp19319, i64 1
- %tmp19321 = getelementptr inbounds float* %tmp19320, i64 1
- %tmp19322 = getelementptr inbounds float* %tmp19321, i64 1
- %tmp19323 = getelementptr inbounds float* %tmp19322, i64 1
- %tmp19324 = getelementptr inbounds float* %tmp19323, i64 1
- %tmp19325 = getelementptr inbounds float* %tmp19324, i64 1
- %tmp19326 = getelementptr inbounds float* %tmp19325, i64 1
- %tmp19327 = getelementptr inbounds float* %tmp19326, i64 1
- %tmp19328 = getelementptr inbounds float* %tmp19327, i64 1
- %tmp19329 = getelementptr inbounds float* %tmp19328, i64 1
- %tmp19330 = getelementptr inbounds float* %tmp19329, i64 1
- %tmp19331 = getelementptr inbounds float* %tmp19330, i64 1
- %tmp19332 = getelementptr inbounds float* %tmp19331, i64 1
- %tmp19333 = getelementptr inbounds float* %tmp19332, i64 1
- %tmp19334 = getelementptr inbounds float* %tmp19333, i64 1
- %tmp19335 = getelementptr inbounds float* %tmp19334, i64 1
- %tmp19336 = getelementptr inbounds float* %tmp19335, i64 1
- %tmp19337 = getelementptr inbounds float* %tmp19336, i64 1
- %tmp19338 = getelementptr inbounds float* %tmp19337, i64 1
- %tmp19339 = getelementptr inbounds float* %tmp19338, i64 1
- %tmp19340 = getelementptr inbounds float* %tmp19339, i64 1
- %tmp19341 = getelementptr inbounds float* %tmp19340, i64 1
- %tmp19342 = getelementptr inbounds float* %tmp19341, i64 1
- %tmp19343 = getelementptr inbounds float* %tmp19342, i64 1
- %tmp19344 = getelementptr inbounds float* %tmp19343, i64 1
- %tmp19345 = getelementptr inbounds float* %tmp19344, i64 1
- %tmp19346 = getelementptr inbounds float* %tmp19345, i64 1
- %tmp19347 = getelementptr inbounds float* %tmp19346, i64 1
- %tmp19348 = getelementptr inbounds float* %tmp19347, i64 1
- %tmp19349 = getelementptr inbounds float* %tmp19348, i64 1
- %tmp19350 = getelementptr inbounds float* %tmp19349, i64 1
- %tmp19351 = getelementptr inbounds float* %tmp19350, i64 1
- %tmp19352 = getelementptr inbounds float* %tmp19351, i64 1
- %tmp19353 = getelementptr inbounds float* %tmp19352, i64 1
- %tmp19354 = getelementptr inbounds float* %tmp19353, i64 1
- %tmp19355 = getelementptr inbounds float* %tmp19354, i64 1
- %tmp19356 = getelementptr inbounds float* %tmp19355, i64 1
- %tmp19357 = getelementptr inbounds float* %tmp19356, i64 1
- %tmp19358 = getelementptr inbounds float* %tmp19357, i64 1
- %tmp19359 = getelementptr inbounds float* %tmp19358, i64 1
- %tmp19360 = getelementptr inbounds float* %tmp19359, i64 1
- %tmp19361 = getelementptr inbounds float* %tmp19360, i64 1
- %tmp19362 = getelementptr inbounds float* %tmp19361, i64 1
- %tmp19363 = getelementptr inbounds float* %tmp19362, i64 1
- %tmp19364 = getelementptr inbounds float* %tmp19363, i64 1
- %tmp19365 = getelementptr inbounds float* %tmp19364, i64 1
- %tmp19366 = getelementptr inbounds float* %tmp19365, i64 1
- %tmp19367 = getelementptr inbounds float* %tmp19366, i64 1
- %tmp19368 = getelementptr inbounds float* %tmp19367, i64 1
- %tmp19369 = getelementptr inbounds float* %tmp19368, i64 1
- %tmp19370 = getelementptr inbounds float* %tmp19369, i64 1
- %tmp19371 = getelementptr inbounds float* %tmp19370, i64 1
- %tmp19372 = getelementptr inbounds float* %tmp19371, i64 1
- %tmp19373 = getelementptr inbounds float* %tmp19372, i64 1
- %tmp19374 = getelementptr inbounds float* %tmp19373, i64 1
- %tmp19375 = getelementptr inbounds float* %tmp19374, i64 1
- %tmp19376 = getelementptr inbounds float* %tmp19375, i64 1
- %tmp19377 = getelementptr inbounds float* %tmp19376, i64 1
- %tmp19378 = getelementptr inbounds float* %tmp19377, i64 1
- %tmp19379 = getelementptr inbounds float* %tmp19378, i64 1
- %tmp19380 = getelementptr inbounds float* %tmp19379, i64 1
- %tmp19381 = getelementptr inbounds float* %tmp19380, i64 1
- %tmp19382 = getelementptr inbounds float* %tmp19381, i64 1
- %tmp19383 = getelementptr inbounds float* %tmp19382, i64 1
- %tmp19384 = getelementptr inbounds float* %tmp19383, i64 1
- %tmp19385 = getelementptr inbounds float* %tmp19384, i64 1
- %tmp19386 = getelementptr inbounds float* %tmp19385, i64 1
- %tmp19387 = getelementptr inbounds float* %tmp19386, i64 1
- %tmp19388 = getelementptr inbounds float* %tmp19387, i64 1
- %tmp19389 = getelementptr inbounds float* %tmp19388, i64 1
- %tmp19390 = getelementptr inbounds float* %tmp19389, i64 1
- %tmp19391 = getelementptr inbounds float* %tmp19390, i64 1
- %tmp19392 = getelementptr inbounds float* %tmp19391, i64 1
- %tmp19393 = getelementptr inbounds float* %tmp19392, i64 1
- %tmp19394 = getelementptr inbounds float* %tmp19393, i64 1
- %tmp19395 = getelementptr inbounds float* %tmp19394, i64 1
- %tmp19396 = getelementptr inbounds float* %tmp19395, i64 1
- %tmp19397 = getelementptr inbounds float* %tmp19396, i64 1
- %tmp19398 = getelementptr inbounds float* %tmp19397, i64 1
- %tmp19399 = getelementptr inbounds float* %tmp19398, i64 1
- %tmp19400 = getelementptr inbounds float* %tmp19399, i64 1
- %tmp19401 = getelementptr inbounds float* %tmp19400, i64 1
- %tmp19402 = getelementptr inbounds float* %tmp19401, i64 1
- %tmp19403 = getelementptr inbounds float* %tmp19402, i64 1
- %tmp19404 = getelementptr inbounds float* %tmp19403, i64 1
- %tmp19405 = getelementptr inbounds float* %tmp19404, i64 1
- %tmp19406 = getelementptr inbounds float* %tmp19405, i64 1
- %tmp19407 = getelementptr inbounds float* %tmp19406, i64 1
- %tmp19408 = getelementptr inbounds float* %tmp19407, i64 1
- %tmp19409 = getelementptr inbounds float* %tmp19408, i64 1
- %tmp19410 = getelementptr inbounds float* %tmp19409, i64 1
- %tmp19411 = getelementptr inbounds float* %tmp19410, i64 1
- %tmp19412 = getelementptr inbounds float* %tmp19411, i64 1
- %tmp19413 = getelementptr inbounds float* %tmp19412, i64 1
- %tmp19414 = getelementptr inbounds float* %tmp19413, i64 1
- %tmp19415 = getelementptr inbounds float* %tmp19414, i64 1
- %tmp19416 = getelementptr inbounds float* %tmp19415, i64 1
- %tmp19417 = getelementptr inbounds float* %tmp19416, i64 1
- %tmp19418 = getelementptr inbounds float* %tmp19417, i64 1
- %tmp19419 = getelementptr inbounds float* %tmp19418, i64 1
- %tmp19420 = getelementptr inbounds float* %tmp19419, i64 1
- %tmp19421 = getelementptr inbounds float* %tmp19420, i64 1
- %tmp19422 = getelementptr inbounds float* %tmp19421, i64 1
- %tmp19423 = getelementptr inbounds float* %tmp19422, i64 1
- %tmp19424 = getelementptr inbounds float* %tmp19423, i64 1
- %tmp19425 = getelementptr inbounds float* %tmp19424, i64 1
- %tmp19426 = getelementptr inbounds float* %tmp19425, i64 1
- %tmp19427 = getelementptr inbounds float* %tmp19426, i64 1
- %tmp19428 = getelementptr inbounds float* %tmp19427, i64 1
- %tmp19429 = getelementptr inbounds float* %tmp19428, i64 1
- %tmp19430 = getelementptr inbounds float* %tmp19429, i64 1
- %tmp19431 = getelementptr inbounds float* %tmp19430, i64 1
- %tmp19432 = getelementptr inbounds float* %tmp19431, i64 1
- %tmp19433 = getelementptr inbounds float* %tmp19432, i64 1
- %tmp19434 = getelementptr inbounds float* %tmp19433, i64 1
- %tmp19435 = getelementptr inbounds float* %tmp19434, i64 1
- %tmp19436 = getelementptr inbounds float* %tmp19435, i64 1
- %tmp19437 = getelementptr inbounds float* %tmp19436, i64 1
- %tmp19438 = getelementptr inbounds float* %tmp19437, i64 1
- %tmp19439 = getelementptr inbounds float* %tmp19438, i64 1
- %tmp19440 = getelementptr inbounds float* %tmp19439, i64 1
- %tmp19441 = getelementptr inbounds float* %tmp19440, i64 1
- %tmp19442 = getelementptr inbounds float* %tmp19441, i64 1
- %tmp19443 = getelementptr inbounds float* %tmp19442, i64 1
- %tmp19444 = getelementptr inbounds float* %tmp19443, i64 1
- %tmp19445 = getelementptr inbounds float* %tmp19444, i64 1
- %tmp19446 = getelementptr inbounds float* %tmp19445, i64 1
- %tmp19447 = getelementptr inbounds float* %tmp19446, i64 1
- %tmp19448 = getelementptr inbounds float* %tmp19447, i64 1
- %tmp19449 = getelementptr inbounds float* %tmp19448, i64 1
- %tmp19450 = getelementptr inbounds float* %tmp19449, i64 1
- %tmp19451 = getelementptr inbounds float* %tmp19450, i64 1
- %tmp19452 = getelementptr inbounds float* %tmp19451, i64 1
- %tmp19453 = getelementptr inbounds float* %tmp19452, i64 1
- %tmp19454 = getelementptr inbounds float* %tmp19453, i64 1
- %tmp19455 = getelementptr inbounds float* %tmp19454, i64 1
- %tmp19456 = getelementptr inbounds float* %tmp19455, i64 1
- %tmp19457 = getelementptr inbounds float* %tmp19456, i64 1
- %tmp19458 = getelementptr inbounds float* %tmp19457, i64 1
- %tmp19459 = getelementptr inbounds float* %tmp19458, i64 1
- %tmp19460 = getelementptr inbounds float* %tmp19459, i64 1
- %tmp19461 = getelementptr inbounds float* %tmp19460, i64 1
- %tmp19462 = getelementptr inbounds float* %tmp19461, i64 1
- %tmp19463 = getelementptr inbounds float* %tmp19462, i64 1
- %tmp19464 = getelementptr inbounds float* %tmp19463, i64 1
- %tmp19465 = getelementptr inbounds float* %tmp19464, i64 1
- %tmp19466 = getelementptr inbounds float* %tmp19465, i64 1
- %tmp19467 = getelementptr inbounds float* %tmp19466, i64 1
- %tmp19468 = getelementptr inbounds float* %tmp19467, i64 1
- %tmp19469 = getelementptr inbounds float* %tmp19468, i64 1
- %tmp19470 = getelementptr inbounds float* %tmp19469, i64 1
- %tmp19471 = getelementptr inbounds float* %tmp19470, i64 1
- %tmp19472 = getelementptr inbounds float* %tmp19471, i64 1
- %tmp19473 = getelementptr inbounds float* %tmp19472, i64 1
- %tmp19474 = getelementptr inbounds float* %tmp19473, i64 1
- %tmp19475 = getelementptr inbounds float* %tmp19474, i64 1
- %tmp19476 = getelementptr inbounds float* %tmp19475, i64 1
- %tmp19477 = getelementptr inbounds float* %tmp19476, i64 1
- %tmp19478 = getelementptr inbounds float* %tmp19477, i64 1
- %tmp19479 = getelementptr inbounds float* %tmp19478, i64 1
- %tmp19480 = getelementptr inbounds float* %tmp19479, i64 1
- %tmp19481 = getelementptr inbounds float* %tmp19480, i64 1
- %tmp19482 = getelementptr inbounds float* %tmp19481, i64 1
- %tmp19483 = getelementptr inbounds float* %tmp19482, i64 1
- %tmp19484 = getelementptr inbounds float* %tmp19483, i64 1
- %tmp19485 = getelementptr inbounds float* %tmp19484, i64 1
- %tmp19486 = getelementptr inbounds float* %tmp19485, i64 1
- %tmp19487 = getelementptr inbounds float* %tmp19486, i64 1
- %tmp19488 = getelementptr inbounds float* %tmp19487, i64 1
- %tmp19489 = getelementptr inbounds float* %tmp19488, i64 1
- %tmp19490 = getelementptr inbounds float* %tmp19489, i64 1
- %tmp19491 = getelementptr inbounds float* %tmp19490, i64 1
- %tmp19492 = getelementptr inbounds float* %tmp19491, i64 1
- %tmp19493 = getelementptr inbounds float* %tmp19492, i64 1
- %tmp19494 = getelementptr inbounds float* %tmp19493, i64 1
- %tmp19495 = getelementptr inbounds float* %tmp19494, i64 1
- %tmp19496 = getelementptr inbounds float* %tmp19495, i64 1
- %tmp19497 = getelementptr inbounds float* %tmp19496, i64 1
- %tmp19498 = getelementptr inbounds float* %tmp19497, i64 1
- %tmp19499 = getelementptr inbounds float* %tmp19498, i64 1
- %tmp19500 = getelementptr inbounds float* %tmp19499, i64 1
- %tmp19501 = getelementptr inbounds float* %tmp19500, i64 1
- %tmp19502 = getelementptr inbounds float* %tmp19501, i64 1
- %tmp19503 = getelementptr inbounds float* %tmp19502, i64 1
- %tmp19504 = getelementptr inbounds float* %tmp19503, i64 1
- %tmp19505 = getelementptr inbounds float* %tmp19504, i64 1
- %tmp19506 = getelementptr inbounds float* %tmp19505, i64 1
- %tmp19507 = getelementptr inbounds float* %tmp19506, i64 1
- %tmp19508 = getelementptr inbounds float* %tmp19507, i64 1
- %tmp19509 = getelementptr inbounds float* %tmp19508, i64 1
- %tmp19510 = getelementptr inbounds float* %tmp19509, i64 1
- %tmp19511 = getelementptr inbounds float* %tmp19510, i64 1
- %tmp19512 = getelementptr inbounds float* %tmp19511, i64 1
- %tmp19513 = getelementptr inbounds float* %tmp19512, i64 1
- %tmp19514 = getelementptr inbounds float* %tmp19513, i64 1
- %tmp19515 = getelementptr inbounds float* %tmp19514, i64 1
- %tmp19516 = getelementptr inbounds float* %tmp19515, i64 1
- %tmp19517 = getelementptr inbounds float* %tmp19516, i64 1
- %tmp19518 = getelementptr inbounds float* %tmp19517, i64 1
- %tmp19519 = getelementptr inbounds float* %tmp19518, i64 1
- %tmp19520 = getelementptr inbounds float* %tmp19519, i64 1
- %tmp19521 = getelementptr inbounds float* %tmp19520, i64 1
- %tmp19522 = getelementptr inbounds float* %tmp19521, i64 1
- %tmp19523 = getelementptr inbounds float* %tmp19522, i64 1
- %tmp19524 = getelementptr inbounds float* %tmp19523, i64 1
- %tmp19525 = getelementptr inbounds float* %tmp19524, i64 1
- %tmp19526 = getelementptr inbounds float* %tmp19525, i64 1
- %tmp19527 = getelementptr inbounds float* %tmp19526, i64 1
- %tmp19528 = getelementptr inbounds float* %tmp19527, i64 1
- %tmp19529 = getelementptr inbounds float* %tmp19528, i64 1
- %tmp19530 = getelementptr inbounds float* %tmp19529, i64 1
- %tmp19531 = getelementptr inbounds float* %tmp19530, i64 1
- %tmp19532 = getelementptr inbounds float* %tmp19531, i64 1
- %tmp19533 = getelementptr inbounds float* %tmp19532, i64 1
- %tmp19534 = getelementptr inbounds float* %tmp19533, i64 1
- %tmp19535 = getelementptr inbounds float* %tmp19534, i64 1
- %tmp19536 = getelementptr inbounds float* %tmp19535, i64 1
- %tmp19537 = getelementptr inbounds float* %tmp19536, i64 1
- %tmp19538 = getelementptr inbounds float* %tmp19537, i64 1
- %tmp19539 = getelementptr inbounds float* %tmp19538, i64 1
- %tmp19540 = getelementptr inbounds float* %tmp19539, i64 1
- %tmp19541 = getelementptr inbounds float* %tmp19540, i64 1
- %tmp19542 = getelementptr inbounds float* %tmp19541, i64 1
- %tmp19543 = getelementptr inbounds float* %tmp19542, i64 1
- %tmp19544 = getelementptr inbounds float* %tmp19543, i64 1
- %tmp19545 = getelementptr inbounds float* %tmp19544, i64 1
- %tmp19546 = getelementptr inbounds float* %tmp19545, i64 1
- %tmp19547 = getelementptr inbounds float* %tmp19546, i64 1
- %tmp19548 = getelementptr inbounds float* %tmp19547, i64 1
- %tmp19549 = getelementptr inbounds float* %tmp19548, i64 1
- %tmp19550 = getelementptr inbounds float* %tmp19549, i64 1
- %tmp19551 = getelementptr inbounds float* %tmp19550, i64 1
- %tmp19552 = getelementptr inbounds float* %tmp19551, i64 1
- %tmp19553 = getelementptr inbounds float* %tmp19552, i64 1
- %tmp19554 = getelementptr inbounds float* %tmp19553, i64 1
- %tmp19555 = getelementptr inbounds float* %tmp19554, i64 1
- %tmp19556 = getelementptr inbounds float* %tmp19555, i64 1
- %tmp19557 = getelementptr inbounds float* %tmp19556, i64 1
- %tmp19558 = getelementptr inbounds float* %tmp19557, i64 1
- %tmp19559 = getelementptr inbounds float* %tmp19558, i64 1
- %tmp19560 = getelementptr inbounds float* %tmp19559, i64 1
- %tmp19561 = getelementptr inbounds float* %tmp19560, i64 1
- %tmp19562 = getelementptr inbounds float* %tmp19561, i64 1
- %tmp19563 = getelementptr inbounds float* %tmp19562, i64 1
- %tmp19564 = getelementptr inbounds float* %tmp19563, i64 1
- %tmp19565 = getelementptr inbounds float* %tmp19564, i64 1
- %tmp19566 = getelementptr inbounds float* %tmp19565, i64 1
- %tmp19567 = getelementptr inbounds float* %tmp19566, i64 1
- %tmp19568 = getelementptr inbounds float* %tmp19567, i64 1
- %tmp19569 = getelementptr inbounds float* %tmp19568, i64 1
- %tmp19570 = getelementptr inbounds float* %tmp19569, i64 1
- %tmp19571 = getelementptr inbounds float* %tmp19570, i64 1
- %tmp19572 = getelementptr inbounds float* %tmp19571, i64 1
- %tmp19573 = getelementptr inbounds float* %tmp19572, i64 1
- %tmp19574 = getelementptr inbounds float* %tmp19573, i64 1
- %tmp19575 = getelementptr inbounds float* %tmp19574, i64 1
- %tmp19576 = getelementptr inbounds float* %tmp19575, i64 1
- %tmp19577 = getelementptr inbounds float* %tmp19576, i64 1
- %tmp19578 = getelementptr inbounds float* %tmp19577, i64 1
- %tmp19579 = getelementptr inbounds float* %tmp19578, i64 1
- %tmp19580 = getelementptr inbounds float* %tmp19579, i64 1
- %tmp19581 = getelementptr inbounds float* %tmp19580, i64 1
- %tmp19582 = getelementptr inbounds float* %tmp19581, i64 1
- %tmp19583 = getelementptr inbounds float* %tmp19582, i64 1
- %tmp19584 = getelementptr inbounds float* %tmp19583, i64 1
- %tmp19585 = getelementptr inbounds float* %tmp19584, i64 1
- %tmp19586 = getelementptr inbounds float* %tmp19585, i64 1
- %tmp19587 = getelementptr inbounds float* %tmp19586, i64 1
- %tmp19588 = getelementptr inbounds float* %tmp19587, i64 1
- %tmp19589 = getelementptr inbounds float* %tmp19588, i64 1
- %tmp19590 = getelementptr inbounds float* %tmp19589, i64 1
- %tmp19591 = getelementptr inbounds float* %tmp19590, i64 1
- %tmp19592 = getelementptr inbounds float* %tmp19591, i64 1
- %tmp19593 = getelementptr inbounds float* %tmp19592, i64 1
- %tmp19594 = getelementptr inbounds float* %tmp19593, i64 1
- %tmp19595 = getelementptr inbounds float* %tmp19594, i64 1
- %tmp19596 = getelementptr inbounds float* %tmp19595, i64 1
- %tmp19597 = getelementptr inbounds float* %tmp19596, i64 1
- %tmp19598 = getelementptr inbounds float* %tmp19597, i64 1
- %tmp19599 = getelementptr inbounds float* %tmp19598, i64 1
- %tmp19600 = getelementptr inbounds float* %tmp19599, i64 1
- %tmp19601 = getelementptr inbounds float* %tmp19600, i64 1
- %tmp19602 = getelementptr inbounds float* %tmp19601, i64 1
- %tmp19603 = getelementptr inbounds float* %tmp19602, i64 1
- %tmp19604 = getelementptr inbounds float* %tmp19603, i64 1
- %tmp19605 = getelementptr inbounds float* %tmp19604, i64 1
- %tmp19606 = getelementptr inbounds float* %tmp19605, i64 1
- %tmp19607 = getelementptr inbounds float* %tmp19606, i64 1
- %tmp19608 = getelementptr inbounds float* %tmp19607, i64 1
- %tmp19609 = getelementptr inbounds float* %tmp19608, i64 1
- %tmp19610 = getelementptr inbounds float* %tmp19609, i64 1
- %tmp19611 = getelementptr inbounds float* %tmp19610, i64 1
- %tmp19612 = getelementptr inbounds float* %tmp19611, i64 1
- %tmp19613 = getelementptr inbounds float* %tmp19612, i64 1
- %tmp19614 = getelementptr inbounds float* %tmp19613, i64 1
- %tmp19615 = getelementptr inbounds float* %tmp19614, i64 1
- %tmp19616 = getelementptr inbounds float* %tmp19615, i64 1
- %tmp19617 = getelementptr inbounds float* %tmp19616, i64 1
- %tmp19618 = getelementptr inbounds float* %tmp19617, i64 1
- %tmp19619 = getelementptr inbounds float* %tmp19618, i64 1
- %tmp19620 = getelementptr inbounds float* %tmp19619, i64 1
- %tmp19621 = getelementptr inbounds float* %tmp19620, i64 1
- %tmp19622 = getelementptr inbounds float* %tmp19621, i64 1
- %tmp19623 = getelementptr inbounds float* %tmp19622, i64 1
- %tmp19624 = getelementptr inbounds float* %tmp19623, i64 1
- %tmp19625 = getelementptr inbounds float* %tmp19624, i64 1
- %tmp19626 = getelementptr inbounds float* %tmp19625, i64 1
- %tmp19627 = getelementptr inbounds float* %tmp19626, i64 1
- %tmp19628 = getelementptr inbounds float* %tmp19627, i64 1
- %tmp19629 = getelementptr inbounds float* %tmp19628, i64 1
- %tmp19630 = getelementptr inbounds float* %tmp19629, i64 1
- %tmp19631 = getelementptr inbounds float* %tmp19630, i64 1
- %tmp19632 = getelementptr inbounds float* %tmp19631, i64 1
- %tmp19633 = getelementptr inbounds float* %tmp19632, i64 1
- %tmp19634 = getelementptr inbounds float* %tmp19633, i64 1
- %tmp19635 = getelementptr inbounds float* %tmp19634, i64 1
- %tmp19636 = getelementptr inbounds float* %tmp19635, i64 1
- %tmp19637 = getelementptr inbounds float* %tmp19636, i64 1
- %tmp19638 = getelementptr inbounds float* %tmp19637, i64 1
- %tmp19639 = getelementptr inbounds float* %tmp19638, i64 1
- %tmp19640 = getelementptr inbounds float* %tmp19639, i64 1
- %tmp19641 = getelementptr inbounds float* %tmp19640, i64 1
- %tmp19642 = getelementptr inbounds float* %tmp19641, i64 1
- %tmp19643 = getelementptr inbounds float* %tmp19642, i64 1
- %tmp19644 = getelementptr inbounds float* %tmp19643, i64 1
- %tmp19645 = getelementptr inbounds float* %tmp19644, i64 1
- %tmp19646 = getelementptr inbounds float* %tmp19645, i64 1
- %tmp19647 = getelementptr inbounds float* %tmp19646, i64 1
- %tmp19648 = getelementptr inbounds float* %tmp19647, i64 1
- %tmp19649 = getelementptr inbounds float* %tmp19648, i64 1
- %tmp19650 = getelementptr inbounds float* %tmp19649, i64 1
- %tmp19651 = getelementptr inbounds float* %tmp19650, i64 1
- %tmp19652 = getelementptr inbounds float* %tmp19651, i64 1
- %tmp19653 = getelementptr inbounds float* %tmp19652, i64 1
- %tmp19654 = getelementptr inbounds float* %tmp19653, i64 1
- %tmp19655 = getelementptr inbounds float* %tmp19654, i64 1
- %tmp19656 = getelementptr inbounds float* %tmp19655, i64 1
- %tmp19657 = getelementptr inbounds float* %tmp19656, i64 1
- %tmp19658 = getelementptr inbounds float* %tmp19657, i64 1
- %tmp19659 = getelementptr inbounds float* %tmp19658, i64 1
- %tmp19660 = getelementptr inbounds float* %tmp19659, i64 1
- %tmp19661 = getelementptr inbounds float* %tmp19660, i64 1
- %tmp19662 = getelementptr inbounds float* %tmp19661, i64 1
- %tmp19663 = getelementptr inbounds float* %tmp19662, i64 1
- %tmp19664 = getelementptr inbounds float* %tmp19663, i64 1
- %tmp19665 = getelementptr inbounds float* %tmp19664, i64 1
- %tmp19666 = getelementptr inbounds float* %tmp19665, i64 1
- %tmp19667 = getelementptr inbounds float* %tmp19666, i64 1
- %tmp19668 = getelementptr inbounds float* %tmp19667, i64 1
- %tmp19669 = getelementptr inbounds float* %tmp19668, i64 1
- %tmp19670 = getelementptr inbounds float* %tmp19669, i64 1
- %tmp19671 = getelementptr inbounds float* %tmp19670, i64 1
- %tmp19672 = getelementptr inbounds float* %tmp19671, i64 1
- %tmp19673 = getelementptr inbounds float* %tmp19672, i64 1
- %tmp19674 = getelementptr inbounds float* %tmp19673, i64 1
- %tmp19675 = getelementptr inbounds float* %tmp19674, i64 1
- %tmp19676 = getelementptr inbounds float* %tmp19675, i64 1
- %tmp19677 = getelementptr inbounds float* %tmp19676, i64 1
- %tmp19678 = getelementptr inbounds float* %tmp19677, i64 1
- %tmp19679 = getelementptr inbounds float* %tmp19678, i64 1
- %tmp19680 = getelementptr inbounds float* %tmp19679, i64 1
- %tmp19681 = getelementptr inbounds float* %tmp19680, i64 1
- %tmp19682 = getelementptr inbounds float* %tmp19681, i64 1
- %tmp19683 = getelementptr inbounds float* %tmp19682, i64 1
- %tmp19684 = getelementptr inbounds float* %tmp19683, i64 1
- %tmp19685 = getelementptr inbounds float* %tmp19684, i64 1
- %tmp19686 = getelementptr inbounds float* %tmp19685, i64 1
- %tmp19687 = getelementptr inbounds float* %tmp19686, i64 1
- %tmp19688 = getelementptr inbounds float* %tmp19687, i64 1
- %tmp19689 = getelementptr inbounds float* %tmp19688, i64 1
- %tmp19690 = getelementptr inbounds float* %tmp19689, i64 1
- %tmp19691 = getelementptr inbounds float* %tmp19690, i64 1
- %tmp19692 = getelementptr inbounds float* %tmp19691, i64 1
- %tmp19693 = getelementptr inbounds float* %tmp19692, i64 1
- %tmp19694 = getelementptr inbounds float* %tmp19693, i64 1
- %tmp19695 = getelementptr inbounds float* %tmp19694, i64 1
- %tmp19696 = getelementptr inbounds float* %tmp19695, i64 1
- %tmp19697 = getelementptr inbounds float* %tmp19696, i64 1
- %tmp19698 = getelementptr inbounds float* %tmp19697, i64 1
- %tmp19699 = getelementptr inbounds float* %tmp19698, i64 1
- %tmp19700 = getelementptr inbounds float* %tmp19699, i64 1
- %tmp19701 = getelementptr inbounds float* %tmp19700, i64 1
- %tmp19702 = getelementptr inbounds float* %tmp19701, i64 1
- %tmp19703 = getelementptr inbounds float* %tmp19702, i64 1
- %tmp19704 = getelementptr inbounds float* %tmp19703, i64 1
- %tmp19705 = getelementptr inbounds float* %tmp19704, i64 1
- %tmp19706 = getelementptr inbounds float* %tmp19705, i64 1
- %tmp19707 = getelementptr inbounds float* %tmp19706, i64 1
- %tmp19708 = getelementptr inbounds float* %tmp19707, i64 1
- %tmp19709 = getelementptr inbounds float* %tmp19708, i64 1
- %tmp19710 = getelementptr inbounds float* %tmp19709, i64 1
- %tmp19711 = getelementptr inbounds float* %tmp19710, i64 1
- %tmp19712 = getelementptr inbounds float* %tmp19711, i64 1
- %tmp19713 = getelementptr inbounds float* %tmp19712, i64 1
- %tmp19714 = getelementptr inbounds float* %tmp19713, i64 1
- %tmp19715 = getelementptr inbounds float* %tmp19714, i64 1
- %tmp19716 = getelementptr inbounds float* %tmp19715, i64 1
- %tmp19717 = getelementptr inbounds float* %tmp19716, i64 1
- %tmp19718 = getelementptr inbounds float* %tmp19717, i64 1
- %tmp19719 = getelementptr inbounds float* %tmp19718, i64 1
- %tmp19720 = getelementptr inbounds float* %tmp19719, i64 1
- %tmp19721 = getelementptr inbounds float* %tmp19720, i64 1
- %tmp19722 = getelementptr inbounds float* %tmp19721, i64 1
- %tmp19723 = getelementptr inbounds float* %tmp19722, i64 1
- %tmp19724 = getelementptr inbounds float* %tmp19723, i64 1
- %tmp19725 = getelementptr inbounds float* %tmp19724, i64 1
- %tmp19726 = getelementptr inbounds float* %tmp19725, i64 1
- %tmp19727 = getelementptr inbounds float* %tmp19726, i64 1
- %tmp19728 = getelementptr inbounds float* %tmp19727, i64 1
- %tmp19729 = getelementptr inbounds float* %tmp19728, i64 1
- %tmp19730 = getelementptr inbounds float* %tmp19729, i64 1
- %tmp19731 = getelementptr inbounds float* %tmp19730, i64 1
- %tmp19732 = getelementptr inbounds float* %tmp19731, i64 1
- %tmp19733 = getelementptr inbounds float* %tmp19732, i64 1
- %tmp19734 = getelementptr inbounds float* %tmp19733, i64 1
- %tmp19735 = getelementptr inbounds float* %tmp19734, i64 1
- %tmp19736 = getelementptr inbounds float* %tmp19735, i64 1
- %tmp19737 = getelementptr inbounds float* %tmp19736, i64 1
- %tmp19738 = getelementptr inbounds float* %tmp19737, i64 1
- %tmp19739 = getelementptr inbounds float* %tmp19738, i64 1
- %tmp19740 = getelementptr inbounds float* %tmp19739, i64 1
- %tmp19741 = getelementptr inbounds float* %tmp19740, i64 1
- %tmp19742 = getelementptr inbounds float* %tmp19741, i64 1
- %tmp19743 = getelementptr inbounds float* %tmp19742, i64 1
- %tmp19744 = getelementptr inbounds float* %tmp19743, i64 1
- %tmp19745 = getelementptr inbounds float* %tmp19744, i64 1
- %tmp19746 = getelementptr inbounds float* %tmp19745, i64 1
- %tmp19747 = getelementptr inbounds float* %tmp19746, i64 1
- %tmp19748 = getelementptr inbounds float* %tmp19747, i64 1
- %tmp19749 = getelementptr inbounds float* %tmp19748, i64 1
- %tmp19750 = getelementptr inbounds float* %tmp19749, i64 1
- %tmp19751 = getelementptr inbounds float* %tmp19750, i64 1
- %tmp19752 = getelementptr inbounds float* %tmp19751, i64 1
- %tmp19753 = getelementptr inbounds float* %tmp19752, i64 1
- %tmp19754 = getelementptr inbounds float* %tmp19753, i64 1
- %tmp19755 = getelementptr inbounds float* %tmp19754, i64 1
- %tmp19756 = getelementptr inbounds float* %tmp19755, i64 1
- %tmp19757 = getelementptr inbounds float* %tmp19756, i64 1
- %tmp19758 = getelementptr inbounds float* %tmp19757, i64 1
- %tmp19759 = getelementptr inbounds float* %tmp19758, i64 1
- %tmp19760 = getelementptr inbounds float* %tmp19759, i64 1
- %tmp19761 = getelementptr inbounds float* %tmp19760, i64 1
- %tmp19762 = getelementptr inbounds float* %tmp19761, i64 1
- %tmp19763 = getelementptr inbounds float* %tmp19762, i64 1
- %tmp19764 = getelementptr inbounds float* %tmp19763, i64 1
- %tmp19765 = getelementptr inbounds float* %tmp19764, i64 1
- %tmp19766 = getelementptr inbounds float* %tmp19765, i64 1
- %tmp19767 = getelementptr inbounds float* %tmp19766, i64 1
- %tmp19768 = getelementptr inbounds float* %tmp19767, i64 1
- %tmp19769 = getelementptr inbounds float* %tmp19768, i64 1
- %tmp19770 = getelementptr inbounds float* %tmp19769, i64 1
- %tmp19771 = getelementptr inbounds float* %tmp19770, i64 1
- %tmp19772 = getelementptr inbounds float* %tmp19771, i64 1
- %tmp19773 = getelementptr inbounds float* %tmp19772, i64 1
- %tmp19774 = getelementptr inbounds float* %tmp19773, i64 1
- %tmp19775 = getelementptr inbounds float* %tmp19774, i64 1
- %tmp19776 = getelementptr inbounds float* %tmp19775, i64 1
- %tmp19777 = getelementptr inbounds float* %tmp19776, i64 1
- %tmp19778 = getelementptr inbounds float* %tmp19777, i64 1
- %tmp19779 = getelementptr inbounds float* %tmp19778, i64 1
- %tmp19780 = getelementptr inbounds float* %tmp19779, i64 1
- %tmp19781 = getelementptr inbounds float* %tmp19780, i64 1
- %tmp19782 = getelementptr inbounds float* %tmp19781, i64 1
- %tmp19783 = getelementptr inbounds float* %tmp19782, i64 1
- %tmp19784 = getelementptr inbounds float* %tmp19783, i64 1
- %tmp19785 = getelementptr inbounds float* %tmp19784, i64 1
- %tmp19786 = getelementptr inbounds float* %tmp19785, i64 1
- %tmp19787 = getelementptr inbounds float* %tmp19786, i64 1
- %tmp19788 = getelementptr inbounds float* %tmp19787, i64 1
- %tmp19789 = getelementptr inbounds float* %tmp19788, i64 1
- %tmp19790 = getelementptr inbounds float* %tmp19789, i64 1
- %tmp19791 = getelementptr inbounds float* %tmp19790, i64 1
- %tmp19792 = getelementptr inbounds float* %tmp19791, i64 1
- %tmp19793 = getelementptr inbounds float* %tmp19792, i64 1
- %tmp19794 = getelementptr inbounds float* %tmp19793, i64 1
- %tmp19795 = getelementptr inbounds float* %tmp19794, i64 1
- %tmp19796 = getelementptr inbounds float* %tmp19795, i64 1
- %tmp19797 = getelementptr inbounds float* %tmp19796, i64 1
- %tmp19798 = getelementptr inbounds float* %tmp19797, i64 1
- %tmp19799 = getelementptr inbounds float* %tmp19798, i64 1
- %tmp19800 = getelementptr inbounds float* %tmp19799, i64 1
- %tmp19801 = getelementptr inbounds float* %tmp19800, i64 1
- %tmp19802 = getelementptr inbounds float* %tmp19801, i64 1
- %tmp19803 = getelementptr inbounds float* %tmp19802, i64 1
- %tmp19804 = getelementptr inbounds float* %tmp19803, i64 1
- %tmp19805 = getelementptr inbounds float* %tmp19804, i64 1
- %tmp19806 = getelementptr inbounds float* %tmp19805, i64 1
- %tmp19807 = getelementptr inbounds float* %tmp19806, i64 1
- %tmp19808 = getelementptr inbounds float* %tmp19807, i64 1
- %tmp19809 = getelementptr inbounds float* %tmp19808, i64 1
- %tmp19810 = getelementptr inbounds float* %tmp19809, i64 1
- %tmp19811 = getelementptr inbounds float* %tmp19810, i64 1
- %tmp19812 = getelementptr inbounds float* %tmp19811, i64 1
- %tmp19813 = getelementptr inbounds float* %tmp19812, i64 1
- %tmp19814 = getelementptr inbounds float* %tmp19813, i64 1
- %tmp19815 = getelementptr inbounds float* %tmp19814, i64 1
- %tmp19816 = getelementptr inbounds float* %tmp19815, i64 1
- %tmp19817 = getelementptr inbounds float* %tmp19816, i64 1
- %tmp19818 = getelementptr inbounds float* %tmp19817, i64 1
- %tmp19819 = getelementptr inbounds float* %tmp19818, i64 1
- %tmp19820 = getelementptr inbounds float* %tmp19819, i64 1
- %tmp19821 = getelementptr inbounds float* %tmp19820, i64 1
- %tmp19822 = getelementptr inbounds float* %tmp19821, i64 1
- %tmp19823 = getelementptr inbounds float* %tmp19822, i64 1
- %tmp19824 = getelementptr inbounds float* %tmp19823, i64 1
- %tmp19825 = getelementptr inbounds float* %tmp19824, i64 1
- %tmp19826 = getelementptr inbounds float* %tmp19825, i64 1
- %tmp19827 = getelementptr inbounds float* %tmp19826, i64 1
- %tmp19828 = getelementptr inbounds float* %tmp19827, i64 1
- %tmp19829 = getelementptr inbounds float* %tmp19828, i64 1
- %tmp19830 = getelementptr inbounds float* %tmp19829, i64 1
- %tmp19831 = getelementptr inbounds float* %tmp19830, i64 1
- %tmp19832 = getelementptr inbounds float* %tmp19831, i64 1
- %tmp19833 = getelementptr inbounds float* %tmp19832, i64 1
- %tmp19834 = getelementptr inbounds float* %tmp19833, i64 1
- %tmp19835 = getelementptr inbounds float* %tmp19834, i64 1
- %tmp19836 = getelementptr inbounds float* %tmp19835, i64 1
- %tmp19837 = getelementptr inbounds float* %tmp19836, i64 1
- %tmp19838 = getelementptr inbounds float* %tmp19837, i64 1
- %tmp19839 = getelementptr inbounds float* %tmp19838, i64 1
- %tmp19840 = getelementptr inbounds float* %tmp19839, i64 1
- %tmp19841 = getelementptr inbounds float* %tmp19840, i64 1
- %tmp19842 = getelementptr inbounds float* %tmp19841, i64 1
- %tmp19843 = getelementptr inbounds float* %tmp19842, i64 1
- %tmp19844 = getelementptr inbounds float* %tmp19843, i64 1
- %tmp19845 = getelementptr inbounds float* %tmp19844, i64 1
- %tmp19846 = getelementptr inbounds float* %tmp19845, i64 1
- %tmp19847 = getelementptr inbounds float* %tmp19846, i64 1
- %tmp19848 = getelementptr inbounds float* %tmp19847, i64 1
- %tmp19849 = getelementptr inbounds float* %tmp19848, i64 1
- %tmp19850 = getelementptr inbounds float* %tmp19849, i64 1
- %tmp19851 = getelementptr inbounds float* %tmp19850, i64 1
- %tmp19852 = getelementptr inbounds float* %tmp19851, i64 1
- %tmp19853 = getelementptr inbounds float* %tmp19852, i64 1
- %tmp19854 = getelementptr inbounds float* %tmp19853, i64 1
- %tmp19855 = getelementptr inbounds float* %tmp19854, i64 1
- %tmp19856 = getelementptr inbounds float* %tmp19855, i64 1
- %tmp19857 = getelementptr inbounds float* %tmp19856, i64 1
- %tmp19858 = getelementptr inbounds float* %tmp19857, i64 1
- %tmp19859 = getelementptr inbounds float* %tmp19858, i64 1
- %tmp19860 = getelementptr inbounds float* %tmp19859, i64 1
- %tmp19861 = getelementptr inbounds float* %tmp19860, i64 1
- %tmp19862 = getelementptr inbounds float* %tmp19861, i64 1
- %tmp19863 = getelementptr inbounds float* %tmp19862, i64 1
- %tmp19864 = getelementptr inbounds float* %tmp19863, i64 1
- %tmp19865 = getelementptr inbounds float* %tmp19864, i64 1
- %tmp19866 = getelementptr inbounds float* %tmp19865, i64 1
- %tmp19867 = getelementptr inbounds float* %tmp19866, i64 1
- %tmp19868 = getelementptr inbounds float* %tmp19867, i64 1
- %tmp19869 = getelementptr inbounds float* %tmp19868, i64 1
- %tmp19870 = getelementptr inbounds float* %tmp19869, i64 1
- %tmp19871 = getelementptr inbounds float* %tmp19870, i64 1
- %tmp19872 = getelementptr inbounds float* %tmp19871, i64 1
- %tmp19873 = getelementptr inbounds float* %tmp19872, i64 1
- %tmp19874 = getelementptr inbounds float* %tmp19873, i64 1
- %tmp19875 = getelementptr inbounds float* %tmp19874, i64 1
- %tmp19876 = getelementptr inbounds float* %tmp19875, i64 1
- %tmp19877 = getelementptr inbounds float* %tmp19876, i64 1
- %tmp19878 = getelementptr inbounds float* %tmp19877, i64 1
- %tmp19879 = getelementptr inbounds float* %tmp19878, i64 1
- %tmp19880 = getelementptr inbounds float* %tmp19879, i64 1
- %tmp19881 = getelementptr inbounds float* %tmp19880, i64 1
- %tmp19882 = getelementptr inbounds float* %tmp19881, i64 1
- %tmp19883 = getelementptr inbounds float* %tmp19882, i64 1
- %tmp19884 = getelementptr inbounds float* %tmp19883, i64 1
- %tmp19885 = getelementptr inbounds float* %tmp19884, i64 1
- %tmp19886 = getelementptr inbounds float* %tmp19885, i64 1
- %tmp19887 = getelementptr inbounds float* %tmp19886, i64 1
- %tmp19888 = getelementptr inbounds float* %tmp19887, i64 1
- %tmp19889 = getelementptr inbounds float* %tmp19888, i64 1
- %tmp19890 = getelementptr inbounds float* %tmp19889, i64 1
- %tmp19891 = getelementptr inbounds float* %tmp19890, i64 1
- %tmp19892 = getelementptr inbounds float* %tmp19891, i64 1
- %tmp19893 = getelementptr inbounds float* %tmp19892, i64 1
- %tmp19894 = getelementptr inbounds float* %tmp19893, i64 1
- %tmp19895 = getelementptr inbounds float* %tmp19894, i64 1
- %tmp19896 = getelementptr inbounds float* %tmp19895, i64 1
- %tmp19897 = getelementptr inbounds float* %tmp19896, i64 1
- %tmp19898 = getelementptr inbounds float* %tmp19897, i64 1
- %tmp19899 = getelementptr inbounds float* %tmp19898, i64 1
- %tmp19900 = getelementptr inbounds float* %tmp19899, i64 1
- %tmp19901 = getelementptr inbounds float* %tmp19900, i64 1
- %tmp19902 = getelementptr inbounds float* %tmp19901, i64 1
- %tmp19903 = getelementptr inbounds float* %tmp19902, i64 1
- %tmp19904 = getelementptr inbounds float* %tmp19903, i64 1
- %tmp19905 = getelementptr inbounds float* %tmp19904, i64 1
- %tmp19906 = getelementptr inbounds float* %tmp19905, i64 1
- %tmp19907 = getelementptr inbounds float* %tmp19906, i64 1
- %tmp19908 = getelementptr inbounds float* %tmp19907, i64 1
- %tmp19909 = getelementptr inbounds float* %tmp19908, i64 1
- %tmp19910 = getelementptr inbounds float* %tmp19909, i64 1
- %tmp19911 = getelementptr inbounds float* %tmp19910, i64 1
- %tmp19912 = getelementptr inbounds float* %tmp19911, i64 1
- %tmp19913 = getelementptr inbounds float* %tmp19912, i64 1
- %tmp19914 = getelementptr inbounds float* %tmp19913, i64 1
- %tmp19915 = getelementptr inbounds float* %tmp19914, i64 1
- %tmp19916 = getelementptr inbounds float* %tmp19915, i64 1
- %tmp19917 = getelementptr inbounds float* %tmp19916, i64 1
- %tmp19918 = getelementptr inbounds float* %tmp19917, i64 1
- %tmp19919 = getelementptr inbounds float* %tmp19918, i64 1
- %tmp19920 = getelementptr inbounds float* %tmp19919, i64 1
- %tmp19921 = getelementptr inbounds float* %tmp19920, i64 1
- %tmp19922 = getelementptr inbounds float* %tmp19921, i64 1
- %tmp19923 = getelementptr inbounds float* %tmp19922, i64 1
- %tmp19924 = getelementptr inbounds float* %tmp19923, i64 1
- %tmp19925 = getelementptr inbounds float* %tmp19924, i64 1
- %tmp19926 = getelementptr inbounds float* %tmp19925, i64 1
- %tmp19927 = getelementptr inbounds float* %tmp19926, i64 1
- %tmp19928 = getelementptr inbounds float* %tmp19927, i64 1
- %tmp19929 = getelementptr inbounds float* %tmp19928, i64 1
- %tmp19930 = getelementptr inbounds float* %tmp19929, i64 1
- %tmp19931 = getelementptr inbounds float* %tmp19930, i64 1
- %tmp19932 = getelementptr inbounds float* %tmp19931, i64 1
- %tmp19933 = getelementptr inbounds float* %tmp19932, i64 1
- %tmp19934 = getelementptr inbounds float* %tmp19933, i64 1
- %tmp19935 = getelementptr inbounds float* %tmp19934, i64 1
- %tmp19936 = getelementptr inbounds float* %tmp19935, i64 1
- %tmp19937 = getelementptr inbounds float* %tmp19936, i64 1
- %tmp19938 = getelementptr inbounds float* %tmp19937, i64 1
- %tmp19939 = getelementptr inbounds float* %tmp19938, i64 1
- %tmp19940 = getelementptr inbounds float* %tmp19939, i64 1
- %tmp19941 = getelementptr inbounds float* %tmp19940, i64 1
- %tmp19942 = getelementptr inbounds float* %tmp19941, i64 1
- %tmp19943 = getelementptr inbounds float* %tmp19942, i64 1
- %tmp19944 = getelementptr inbounds float* %tmp19943, i64 1
- %tmp19945 = getelementptr inbounds float* %tmp19944, i64 1
- %tmp19946 = getelementptr inbounds float* %tmp19945, i64 1
- %tmp19947 = getelementptr inbounds float* %tmp19946, i64 1
- %tmp19948 = getelementptr inbounds float* %tmp19947, i64 1
- %tmp19949 = getelementptr inbounds float* %tmp19948, i64 1
- %tmp19950 = getelementptr inbounds float* %tmp19949, i64 1
- %tmp19951 = getelementptr inbounds float* %tmp19950, i64 1
- %tmp19952 = getelementptr inbounds float* %tmp19951, i64 1
- %tmp19953 = getelementptr inbounds float* %tmp19952, i64 1
- %tmp19954 = getelementptr inbounds float* %tmp19953, i64 1
- %tmp19955 = getelementptr inbounds float* %tmp19954, i64 1
- %tmp19956 = getelementptr inbounds float* %tmp19955, i64 1
- %tmp19957 = getelementptr inbounds float* %tmp19956, i64 1
- %tmp19958 = getelementptr inbounds float* %tmp19957, i64 1
- %tmp19959 = getelementptr inbounds float* %tmp19958, i64 1
- %tmp19960 = getelementptr inbounds float* %tmp19959, i64 1
- %tmp19961 = getelementptr inbounds float* %tmp19960, i64 1
- %tmp19962 = getelementptr inbounds float* %tmp19961, i64 1
- %tmp19963 = getelementptr inbounds float* %tmp19962, i64 1
- %tmp19964 = getelementptr inbounds float* %tmp19963, i64 1
- %tmp19965 = getelementptr inbounds float* %tmp19964, i64 1
- %tmp19966 = getelementptr inbounds float* %tmp19965, i64 1
- %tmp19967 = getelementptr inbounds float* %tmp19966, i64 1
- %tmp19968 = getelementptr inbounds float* %tmp19967, i64 1
- %tmp19969 = getelementptr inbounds float* %tmp19968, i64 1
- %tmp19970 = getelementptr inbounds float* %tmp19969, i64 1
- %tmp19971 = getelementptr inbounds float* %tmp19970, i64 1
- %tmp19972 = getelementptr inbounds float* %tmp19971, i64 1
- %tmp19973 = getelementptr inbounds float* %tmp19972, i64 1
- %tmp19974 = getelementptr inbounds float* %tmp19973, i64 1
- %tmp19975 = getelementptr inbounds float* %tmp19974, i64 1
- %tmp19976 = getelementptr inbounds float* %tmp19975, i64 1
- %tmp19977 = getelementptr inbounds float* %tmp19976, i64 1
- %tmp19978 = getelementptr inbounds float* %tmp19977, i64 1
- %tmp19979 = getelementptr inbounds float* %tmp19978, i64 1
- %tmp19980 = getelementptr inbounds float* %tmp19979, i64 1
- %tmp19981 = getelementptr inbounds float* %tmp19980, i64 1
- %tmp19982 = getelementptr inbounds float* %tmp19981, i64 1
- %tmp19983 = getelementptr inbounds float* %tmp19982, i64 1
- %tmp19984 = getelementptr inbounds float* %tmp19983, i64 1
- %tmp19985 = getelementptr inbounds float* %tmp19984, i64 1
- %tmp19986 = getelementptr inbounds float* %tmp19985, i64 1
- %tmp19987 = getelementptr inbounds float* %tmp19986, i64 1
- %tmp19988 = getelementptr inbounds float* %tmp19987, i64 1
- %tmp19989 = getelementptr inbounds float* %tmp19988, i64 1
- %tmp19990 = getelementptr inbounds float* %tmp19989, i64 1
- %tmp19991 = getelementptr inbounds float* %tmp19990, i64 1
- %tmp19992 = getelementptr inbounds float* %tmp19991, i64 1
- %tmp19993 = getelementptr inbounds float* %tmp19992, i64 1
- %tmp19994 = getelementptr inbounds float* %tmp19993, i64 1
- %tmp19995 = getelementptr inbounds float* %tmp19994, i64 1
- %tmp19996 = getelementptr inbounds float* %tmp19995, i64 1
- %tmp19997 = getelementptr inbounds float* %tmp19996, i64 1
- %tmp19998 = getelementptr inbounds float* %tmp19997, i64 1
- %tmp19999 = getelementptr inbounds float* %tmp19998, i64 1
- %tmp20000 = getelementptr inbounds float* %tmp19999, i64 1
- %tmp20001 = getelementptr inbounds float* %tmp20000, i64 1
- %tmp20002 = getelementptr inbounds float* %tmp20001, i64 1
- %tmp20003 = getelementptr inbounds float* %tmp20002, i64 1
- %tmp20004 = getelementptr inbounds float* %tmp20003, i64 1
- %tmp20005 = getelementptr inbounds float* %tmp20004, i64 1
- %tmp20006 = getelementptr inbounds float* %tmp20005, i64 1
- %tmp20007 = getelementptr inbounds float* %tmp20006, i64 1
- %tmp20008 = getelementptr inbounds float* %tmp20007, i64 1
- %tmp20009 = getelementptr inbounds float* %tmp20008, i64 1
- %tmp20010 = getelementptr inbounds float* %tmp20009, i64 1
- %tmp20011 = getelementptr inbounds float* %tmp20010, i64 1
- %tmp20012 = getelementptr inbounds float* %tmp20011, i64 1
- %tmp20013 = getelementptr inbounds float* %tmp20012, i64 1
- %tmp20014 = getelementptr inbounds float* %tmp20013, i64 1
- %tmp20015 = getelementptr inbounds float* %tmp20014, i64 1
- %tmp20016 = getelementptr inbounds float* %tmp20015, i64 1
- %tmp20017 = getelementptr inbounds float* %tmp20016, i64 1
- %tmp20018 = getelementptr inbounds float* %tmp20017, i64 1
- %tmp20019 = getelementptr inbounds float* %tmp20018, i64 1
- %tmp20020 = getelementptr inbounds float* %tmp20019, i64 1
- %tmp20021 = getelementptr inbounds float* %tmp20020, i64 1
- %tmp20022 = getelementptr inbounds float* %tmp20021, i64 1
- %tmp20023 = getelementptr inbounds float* %tmp20022, i64 1
- %tmp20024 = getelementptr inbounds float* %tmp20023, i64 1
- %tmp20025 = getelementptr inbounds float* %tmp20024, i64 1
- %tmp20026 = getelementptr inbounds float* %tmp20025, i64 1
- %tmp20027 = getelementptr inbounds float* %tmp20026, i64 1
- %tmp20028 = getelementptr inbounds float* %tmp20027, i64 1
- %tmp20029 = getelementptr inbounds float* %tmp20028, i64 1
- %tmp20030 = getelementptr inbounds float* %tmp20029, i64 1
- %tmp20031 = getelementptr inbounds float* %tmp20030, i64 1
- %tmp20032 = getelementptr inbounds float* %tmp20031, i64 1
- %tmp20033 = getelementptr inbounds float* %tmp20032, i64 1
- %tmp20034 = getelementptr inbounds float* %tmp20033, i64 1
- %tmp20035 = getelementptr inbounds float* %tmp20034, i64 1
- %tmp20036 = getelementptr inbounds float* %tmp20035, i64 1
- %tmp20037 = getelementptr inbounds float* %tmp20036, i64 1
- %tmp20038 = getelementptr inbounds float* %tmp20037, i64 1
- %tmp20039 = getelementptr inbounds float* %tmp20038, i64 1
- %tmp20040 = getelementptr inbounds float* %tmp20039, i64 1
- %tmp20041 = getelementptr inbounds float* %tmp20040, i64 1
- %tmp20042 = getelementptr inbounds float* %tmp20041, i64 1
- %tmp20043 = getelementptr inbounds float* %tmp20042, i64 1
- %tmp20044 = getelementptr inbounds float* %tmp20043, i64 1
- %tmp20045 = getelementptr inbounds float* %tmp20044, i64 1
- %tmp20046 = getelementptr inbounds float* %tmp20045, i64 1
- %tmp20047 = getelementptr inbounds float* %tmp20046, i64 1
- %tmp20048 = getelementptr inbounds float* %tmp20047, i64 1
- %tmp20049 = getelementptr inbounds float* %tmp20048, i64 1
- %tmp20050 = getelementptr inbounds float* %tmp20049, i64 1
- %tmp20051 = getelementptr inbounds float* %tmp20050, i64 1
- %tmp20052 = getelementptr inbounds float* %tmp20051, i64 1
- %tmp20053 = getelementptr inbounds float* %tmp20052, i64 1
- %tmp20054 = getelementptr inbounds float* %tmp20053, i64 1
- %tmp20055 = getelementptr inbounds float* %tmp20054, i64 1
- %tmp20056 = getelementptr inbounds float* %tmp20055, i64 1
- %tmp20057 = getelementptr inbounds float* %tmp20056, i64 1
- %tmp20058 = getelementptr inbounds float* %tmp20057, i64 1
- %tmp20059 = getelementptr inbounds float* %tmp20058, i64 1
- %tmp20060 = getelementptr inbounds float* %tmp20059, i64 1
- %tmp20061 = getelementptr inbounds float* %tmp20060, i64 1
- %tmp20062 = getelementptr inbounds float* %tmp20061, i64 1
- %tmp20063 = getelementptr inbounds float* %tmp20062, i64 1
- %tmp20064 = getelementptr inbounds float* %tmp20063, i64 1
- %tmp20065 = getelementptr inbounds float* %tmp20064, i64 1
- %tmp20066 = getelementptr inbounds float* %tmp20065, i64 1
- %tmp20067 = getelementptr inbounds float* %tmp20066, i64 1
- %tmp20068 = getelementptr inbounds float* %tmp20067, i64 1
- %tmp20069 = getelementptr inbounds float* %tmp20068, i64 1
- %tmp20070 = getelementptr inbounds float* %tmp20069, i64 1
- %tmp20071 = getelementptr inbounds float* %tmp20070, i64 1
- %tmp20072 = getelementptr inbounds float* %tmp20071, i64 1
- %tmp20073 = getelementptr inbounds float* %tmp20072, i64 1
- %tmp20074 = getelementptr inbounds float* %tmp20073, i64 1
- %tmp20075 = getelementptr inbounds float* %tmp20074, i64 1
- %tmp20076 = getelementptr inbounds float* %tmp20075, i64 1
- %tmp20077 = getelementptr inbounds float* %tmp20076, i64 1
- %tmp20078 = getelementptr inbounds float* %tmp20077, i64 1
- %tmp20079 = getelementptr inbounds float* %tmp20078, i64 1
- %tmp20080 = getelementptr inbounds float* %tmp20079, i64 1
- %tmp20081 = getelementptr inbounds float* %tmp20080, i64 1
- %tmp20082 = getelementptr inbounds float* %tmp20081, i64 1
- %tmp20083 = getelementptr inbounds float* %tmp20082, i64 1
- %tmp20084 = getelementptr inbounds float* %tmp20083, i64 1
- %tmp20085 = getelementptr inbounds float* %tmp20084, i64 1
- %tmp20086 = getelementptr inbounds float* %tmp20085, i64 1
- %tmp20087 = getelementptr inbounds float* %tmp20086, i64 1
- %tmp20088 = getelementptr inbounds float* %tmp20087, i64 1
- %tmp20089 = getelementptr inbounds float* %tmp20088, i64 1
- %tmp20090 = getelementptr inbounds float* %tmp20089, i64 1
- %tmp20091 = getelementptr inbounds float* %tmp20090, i64 1
- %tmp20092 = getelementptr inbounds float* %tmp20091, i64 1
- %tmp20093 = getelementptr inbounds float* %tmp20092, i64 1
- %tmp20094 = getelementptr inbounds float* %tmp20093, i64 1
- %tmp20095 = getelementptr inbounds float* %tmp20094, i64 1
- %tmp20096 = getelementptr inbounds float* %tmp20095, i64 1
- %tmp20097 = getelementptr inbounds float* %tmp20096, i64 1
- %tmp20098 = getelementptr inbounds float* %tmp20097, i64 1
- %tmp20099 = getelementptr inbounds float* %tmp20098, i64 1
- %tmp20100 = getelementptr inbounds float* %tmp20099, i64 1
- %tmp20101 = getelementptr inbounds float* %tmp20100, i64 1
- %tmp20102 = getelementptr inbounds float* %tmp20101, i64 1
- %tmp20103 = getelementptr inbounds float* %tmp20102, i64 1
- %tmp20104 = getelementptr inbounds float* %tmp20103, i64 1
- %tmp20105 = getelementptr inbounds float* %tmp20104, i64 1
- %tmp20106 = getelementptr inbounds float* %tmp20105, i64 1
- %tmp20107 = getelementptr inbounds float* %tmp20106, i64 1
- %tmp20108 = getelementptr inbounds float* %tmp20107, i64 1
- %tmp20109 = getelementptr inbounds float* %tmp20108, i64 1
- %tmp20110 = getelementptr inbounds float* %tmp20109, i64 1
- %tmp20111 = getelementptr inbounds float* %tmp20110, i64 1
- %tmp20112 = getelementptr inbounds float* %tmp20111, i64 1
- %tmp20113 = getelementptr inbounds float* %tmp20112, i64 1
- %tmp20114 = getelementptr inbounds float* %tmp20113, i64 1
- %tmp20115 = getelementptr inbounds float* %tmp20114, i64 1
- %tmp20116 = getelementptr inbounds float* %tmp20115, i64 1
- %tmp20117 = getelementptr inbounds float* %tmp20116, i64 1
- %tmp20118 = getelementptr inbounds float* %tmp20117, i64 1
- %tmp20119 = getelementptr inbounds float* %tmp20118, i64 1
- %tmp20120 = getelementptr inbounds float* %tmp20119, i64 1
- %tmp20121 = getelementptr inbounds float* %tmp20120, i64 1
- %tmp20122 = getelementptr inbounds float* %tmp20121, i64 1
- %tmp20123 = getelementptr inbounds float* %tmp20122, i64 1
- %tmp20124 = getelementptr inbounds float* %tmp20123, i64 1
- %tmp20125 = getelementptr inbounds float* %tmp20124, i64 1
- %tmp20126 = getelementptr inbounds float* %tmp20125, i64 1
- %tmp20127 = getelementptr inbounds float* %tmp20126, i64 1
- %tmp20128 = getelementptr inbounds float* %tmp20127, i64 1
- %tmp20129 = getelementptr inbounds float* %tmp20128, i64 1
- %tmp20130 = getelementptr inbounds float* %tmp20129, i64 1
- %tmp20131 = getelementptr inbounds float* %tmp20130, i64 1
- %tmp20132 = getelementptr inbounds float* %tmp20131, i64 1
- %tmp20133 = getelementptr inbounds float* %tmp20132, i64 1
- %tmp20134 = getelementptr inbounds float* %tmp20133, i64 1
- %tmp20135 = getelementptr inbounds float* %tmp20134, i64 1
- %tmp20136 = getelementptr inbounds float* %tmp20135, i64 1
- %tmp20137 = getelementptr inbounds float* %tmp20136, i64 1
- %tmp20138 = getelementptr inbounds float* %tmp20137, i64 1
- %tmp20139 = getelementptr inbounds float* %tmp20138, i64 1
- %tmp20140 = getelementptr inbounds float* %tmp20139, i64 1
- %tmp20141 = getelementptr inbounds float* %tmp20140, i64 1
- %tmp20142 = getelementptr inbounds float* %tmp20141, i64 1
- %tmp20143 = getelementptr inbounds float* %tmp20142, i64 1
- %tmp20144 = getelementptr inbounds float* %tmp20143, i64 1
- %tmp20145 = getelementptr inbounds float* %tmp20144, i64 1
- %tmp20146 = getelementptr inbounds float* %tmp20145, i64 1
- %tmp20147 = getelementptr inbounds float* %tmp20146, i64 1
- %tmp20148 = getelementptr inbounds float* %tmp20147, i64 1
- %tmp20149 = getelementptr inbounds float* %tmp20148, i64 1
- %tmp20150 = getelementptr inbounds float* %tmp20149, i64 1
- %tmp20151 = getelementptr inbounds float* %tmp20150, i64 1
- %tmp20152 = getelementptr inbounds float* %tmp20151, i64 1
- %tmp20153 = getelementptr inbounds float* %tmp20152, i64 1
- %tmp20154 = getelementptr inbounds float* %tmp20153, i64 1
- %tmp20155 = getelementptr inbounds float* %tmp20154, i64 1
- %tmp20156 = getelementptr inbounds float* %tmp20155, i64 1
- %tmp20157 = getelementptr inbounds float* %tmp20156, i64 1
- %tmp20158 = getelementptr inbounds float* %tmp20157, i64 1
- %tmp20159 = getelementptr inbounds float* %tmp20158, i64 1
- %tmp20160 = getelementptr inbounds float* %tmp20159, i64 1
- %tmp20161 = getelementptr inbounds float* %tmp20160, i64 1
- %tmp20162 = getelementptr inbounds float* %tmp20161, i64 1
- %tmp20163 = getelementptr inbounds float* %tmp20162, i64 1
- %tmp20164 = getelementptr inbounds float* %tmp20163, i64 1
- %tmp20165 = getelementptr inbounds float* %tmp20164, i64 1
- %tmp20166 = getelementptr inbounds float* %tmp20165, i64 1
- %tmp20167 = getelementptr inbounds float* %tmp20166, i64 1
- %tmp20168 = getelementptr inbounds float* %tmp20167, i64 1
- %tmp20169 = getelementptr inbounds float* %tmp20168, i64 1
- %tmp20170 = getelementptr inbounds float* %tmp20169, i64 1
- %tmp20171 = getelementptr inbounds float* %tmp20170, i64 1
- %tmp20172 = getelementptr inbounds float* %tmp20171, i64 1
- %tmp20173 = getelementptr inbounds float* %tmp20172, i64 1
- %tmp20174 = getelementptr inbounds float* %tmp20173, i64 1
- %tmp20175 = getelementptr inbounds float* %tmp20174, i64 1
- %tmp20176 = getelementptr inbounds float* %tmp20175, i64 1
- %tmp20177 = getelementptr inbounds float* %tmp20176, i64 1
- %tmp20178 = getelementptr inbounds float* %tmp20177, i64 1
- %tmp20179 = getelementptr inbounds float* %tmp20178, i64 1
- %tmp20180 = getelementptr inbounds float* %tmp20179, i64 1
- %tmp20181 = getelementptr inbounds float* %tmp20180, i64 1
- %tmp20182 = getelementptr inbounds float* %tmp20181, i64 1
- %tmp20183 = getelementptr inbounds float* %tmp20182, i64 1
- %tmp20184 = getelementptr inbounds float* %tmp20183, i64 1
- %tmp20185 = getelementptr inbounds float* %tmp20184, i64 1
- %tmp20186 = getelementptr inbounds float* %tmp20185, i64 1
- %tmp20187 = getelementptr inbounds float* %tmp20186, i64 1
- %tmp20188 = getelementptr inbounds float* %tmp20187, i64 1
- %tmp20189 = getelementptr inbounds float* %tmp20188, i64 1
- %tmp20190 = getelementptr inbounds float* %tmp20189, i64 1
- %tmp20191 = getelementptr inbounds float* %tmp20190, i64 1
- %tmp20192 = getelementptr inbounds float* %tmp20191, i64 1
- %tmp20193 = getelementptr inbounds float* %tmp20192, i64 1
- %tmp20194 = getelementptr inbounds float* %tmp20193, i64 1
- %tmp20195 = getelementptr inbounds float* %tmp20194, i64 1
- %tmp20196 = getelementptr inbounds float* %tmp20195, i64 1
- %tmp20197 = getelementptr inbounds float* %tmp20196, i64 1
- %tmp20198 = getelementptr inbounds float* %tmp20197, i64 1
- %tmp20199 = getelementptr inbounds float* %tmp20198, i64 1
- %tmp20200 = getelementptr inbounds float* %tmp20199, i64 1
- %tmp20201 = getelementptr inbounds float* %tmp20200, i64 1
- %tmp20202 = getelementptr inbounds float* %tmp20201, i64 1
- %tmp20203 = getelementptr inbounds float* %tmp20202, i64 1
- %tmp20204 = getelementptr inbounds float* %tmp20203, i64 1
- %tmp20205 = getelementptr inbounds float* %tmp20204, i64 1
- %tmp20206 = getelementptr inbounds float* %tmp20205, i64 1
- %tmp20207 = getelementptr inbounds float* %tmp20206, i64 1
- %tmp20208 = getelementptr inbounds float* %tmp20207, i64 1
- %tmp20209 = getelementptr inbounds float* %tmp20208, i64 1
- %tmp20210 = getelementptr inbounds float* %tmp20209, i64 1
- %tmp20211 = getelementptr inbounds float* %tmp20210, i64 1
- %tmp20212 = getelementptr inbounds float* %tmp20211, i64 1
- %tmp20213 = getelementptr inbounds float* %tmp20212, i64 1
- %tmp20214 = getelementptr inbounds float* %tmp20213, i64 1
- %tmp20215 = getelementptr inbounds float* %tmp20214, i64 1
- %tmp20216 = getelementptr inbounds float* %tmp20215, i64 1
- %tmp20217 = getelementptr inbounds float* %tmp20216, i64 1
- %tmp20218 = getelementptr inbounds float* %tmp20217, i64 1
- %tmp20219 = getelementptr inbounds float* %tmp20218, i64 1
- %tmp20220 = getelementptr inbounds float* %tmp20219, i64 1
- %tmp20221 = getelementptr inbounds float* %tmp20220, i64 1
- %tmp20222 = getelementptr inbounds float* %tmp20221, i64 1
- %tmp20223 = getelementptr inbounds float* %tmp20222, i64 1
- %tmp20224 = getelementptr inbounds float* %tmp20223, i64 1
- %tmp20225 = getelementptr inbounds float* %tmp20224, i64 1
- %tmp20226 = getelementptr inbounds float* %tmp20225, i64 1
- %tmp20227 = getelementptr inbounds float* %tmp20226, i64 1
- %tmp20228 = getelementptr inbounds float* %tmp20227, i64 1
- %tmp20229 = getelementptr inbounds float* %tmp20228, i64 1
- %tmp20230 = getelementptr inbounds float* %tmp20229, i64 1
- %tmp20231 = getelementptr inbounds float* %tmp20230, i64 1
- %tmp20232 = getelementptr inbounds float* %tmp20231, i64 1
- %tmp20233 = getelementptr inbounds float* %tmp20232, i64 1
- %tmp20234 = getelementptr inbounds float* %tmp20233, i64 1
- %tmp20235 = getelementptr inbounds float* %tmp20234, i64 1
- %tmp20236 = getelementptr inbounds float* %tmp20235, i64 1
- %tmp20237 = getelementptr inbounds float* %tmp20236, i64 1
- %tmp20238 = getelementptr inbounds float* %tmp20237, i64 1
- %tmp20239 = getelementptr inbounds float* %tmp20238, i64 1
- %tmp20240 = getelementptr inbounds float* %tmp20239, i64 1
- %tmp20241 = getelementptr inbounds float* %tmp20240, i64 1
- %tmp20242 = getelementptr inbounds float* %tmp20241, i64 1
- %tmp20243 = getelementptr inbounds float* %tmp20242, i64 1
- %tmp20244 = getelementptr inbounds float* %tmp20243, i64 1
- %tmp20245 = getelementptr inbounds float* %tmp20244, i64 1
- %tmp20246 = getelementptr inbounds float* %tmp20245, i64 1
- %tmp20247 = getelementptr inbounds float* %tmp20246, i64 1
- %tmp20248 = getelementptr inbounds float* %tmp20247, i64 1
- %tmp20249 = getelementptr inbounds float* %tmp20248, i64 1
- %tmp20250 = getelementptr inbounds float* %tmp20249, i64 1
- %tmp20251 = getelementptr inbounds float* %tmp20250, i64 1
- %tmp20252 = getelementptr inbounds float* %tmp20251, i64 1
- %tmp20253 = getelementptr inbounds float* %tmp20252, i64 1
- %tmp20254 = getelementptr inbounds float* %tmp20253, i64 1
- %tmp20255 = getelementptr inbounds float* %tmp20254, i64 1
- %tmp20256 = getelementptr inbounds float* %tmp20255, i64 1
- %tmp20257 = getelementptr inbounds float* %tmp20256, i64 1
- %tmp20258 = getelementptr inbounds float* %tmp20257, i64 1
- %tmp20259 = getelementptr inbounds float* %tmp20258, i64 1
- %tmp20260 = getelementptr inbounds float* %tmp20259, i64 1
- %tmp20261 = getelementptr inbounds float* %tmp20260, i64 1
- %tmp20262 = getelementptr inbounds float* %tmp20261, i64 1
- %tmp20263 = getelementptr inbounds float* %tmp20262, i64 1
- %tmp20264 = getelementptr inbounds float* %tmp20263, i64 1
- %tmp20265 = getelementptr inbounds float* %tmp20264, i64 1
- %tmp20266 = getelementptr inbounds float* %tmp20265, i64 1
- %tmp20267 = getelementptr inbounds float* %tmp20266, i64 1
- %tmp20268 = getelementptr inbounds float* %tmp20267, i64 1
- %tmp20269 = getelementptr inbounds float* %tmp20268, i64 1
- %tmp20270 = getelementptr inbounds float* %tmp20269, i64 1
- %tmp20271 = getelementptr inbounds float* %tmp20270, i64 1
- %tmp20272 = getelementptr inbounds float* %tmp20271, i64 1
- %tmp20273 = getelementptr inbounds float* %tmp20272, i64 1
- %tmp20274 = getelementptr inbounds float* %tmp20273, i64 1
- %tmp20275 = getelementptr inbounds float* %tmp20274, i64 1
- %tmp20276 = getelementptr inbounds float* %tmp20275, i64 1
- %tmp20277 = getelementptr inbounds float* %tmp20276, i64 1
- %tmp20278 = getelementptr inbounds float* %tmp20277, i64 1
- %tmp20279 = getelementptr inbounds float* %tmp20278, i64 1
- %tmp20280 = getelementptr inbounds float* %tmp20279, i64 1
- %tmp20281 = getelementptr inbounds float* %tmp20280, i64 1
- %tmp20282 = getelementptr inbounds float* %tmp20281, i64 1
- %tmp20283 = getelementptr inbounds float* %tmp20282, i64 1
- %tmp20284 = getelementptr inbounds float* %tmp20283, i64 1
- %tmp20285 = getelementptr inbounds float* %tmp20284, i64 1
- %tmp20286 = getelementptr inbounds float* %tmp20285, i64 1
- %tmp20287 = getelementptr inbounds float* %tmp20286, i64 1
- %tmp20288 = getelementptr inbounds float* %tmp20287, i64 1
- %tmp20289 = getelementptr inbounds float* %tmp20288, i64 1
- %tmp20290 = getelementptr inbounds float* %tmp20289, i64 1
- %tmp20291 = getelementptr inbounds float* %tmp20290, i64 1
- %tmp20292 = getelementptr inbounds float* %tmp20291, i64 1
- %tmp20293 = getelementptr inbounds float* %tmp20292, i64 1
- %tmp20294 = getelementptr inbounds float* %tmp20293, i64 1
- %tmp20295 = getelementptr inbounds float* %tmp20294, i64 1
- %tmp20296 = getelementptr inbounds float* %tmp20295, i64 1
- %tmp20297 = getelementptr inbounds float* %tmp20296, i64 1
- %tmp20298 = getelementptr inbounds float* %tmp20297, i64 1
- %tmp20299 = getelementptr inbounds float* %tmp20298, i64 1
- %tmp20300 = getelementptr inbounds float* %tmp20299, i64 1
- %tmp20301 = getelementptr inbounds float* %tmp20300, i64 1
- %tmp20302 = getelementptr inbounds float* %tmp20301, i64 1
- %tmp20303 = getelementptr inbounds float* %tmp20302, i64 1
- %tmp20304 = getelementptr inbounds float* %tmp20303, i64 1
- %tmp20305 = getelementptr inbounds float* %tmp20304, i64 1
- %tmp20306 = getelementptr inbounds float* %tmp20305, i64 1
- %tmp20307 = getelementptr inbounds float* %tmp20306, i64 1
- %tmp20308 = getelementptr inbounds float* %tmp20307, i64 1
- %tmp20309 = getelementptr inbounds float* %tmp20308, i64 1
- %tmp20310 = getelementptr inbounds float* %tmp20309, i64 1
- %tmp20311 = getelementptr inbounds float* %tmp20310, i64 1
- %tmp20312 = getelementptr inbounds float* %tmp20311, i64 1
- %tmp20313 = getelementptr inbounds float* %tmp20312, i64 1
- %tmp20314 = getelementptr inbounds float* %tmp20313, i64 1
- %tmp20315 = getelementptr inbounds float* %tmp20314, i64 1
- %tmp20316 = getelementptr inbounds float* %tmp20315, i64 1
- %tmp20317 = getelementptr inbounds float* %tmp20316, i64 1
- %tmp20318 = getelementptr inbounds float* %tmp20317, i64 1
- %tmp20319 = getelementptr inbounds float* %tmp20318, i64 1
- %tmp20320 = getelementptr inbounds float* %tmp20319, i64 1
- %tmp20321 = getelementptr inbounds float* %tmp20320, i64 1
- %tmp20322 = getelementptr inbounds float* %tmp20321, i64 1
- %tmp20323 = getelementptr inbounds float* %tmp20322, i64 1
- %tmp20324 = getelementptr inbounds float* %tmp20323, i64 1
- %tmp20325 = getelementptr inbounds float* %tmp20324, i64 1
- %tmp20326 = getelementptr inbounds float* %tmp20325, i64 1
- %tmp20327 = getelementptr inbounds float* %tmp20326, i64 1
- %tmp20328 = getelementptr inbounds float* %tmp20327, i64 1
- %tmp20329 = getelementptr inbounds float* %tmp20328, i64 1
- %tmp20330 = getelementptr inbounds float* %tmp20329, i64 1
- %tmp20331 = getelementptr inbounds float* %tmp20330, i64 1
- %tmp20332 = getelementptr inbounds float* %tmp20331, i64 1
- %tmp20333 = getelementptr inbounds float* %tmp20332, i64 1
- %tmp20334 = getelementptr inbounds float* %tmp20333, i64 1
- %tmp20335 = getelementptr inbounds float* %tmp20334, i64 1
- %tmp20336 = getelementptr inbounds float* %tmp20335, i64 1
- %tmp20337 = getelementptr inbounds float* %tmp20336, i64 1
- %tmp20338 = getelementptr inbounds float* %tmp20337, i64 1
- %tmp20339 = getelementptr inbounds float* %tmp20338, i64 1
- %tmp20340 = getelementptr inbounds float* %tmp20339, i64 1
- %tmp20341 = getelementptr inbounds float* %tmp20340, i64 1
- %tmp20342 = getelementptr inbounds float* %tmp20341, i64 1
- %tmp20343 = getelementptr inbounds float* %tmp20342, i64 1
- %tmp20344 = getelementptr inbounds float* %tmp20343, i64 1
- %tmp20345 = getelementptr inbounds float* %tmp20344, i64 1
- %tmp20346 = getelementptr inbounds float* %tmp20345, i64 1
- %tmp20347 = getelementptr inbounds float* %tmp20346, i64 1
- %tmp20348 = getelementptr inbounds float* %tmp20347, i64 1
- %tmp20349 = getelementptr inbounds float* %tmp20348, i64 1
- %tmp20350 = getelementptr inbounds float* %tmp20349, i64 1
- %tmp20351 = getelementptr inbounds float* %tmp20350, i64 1
- %tmp20352 = getelementptr inbounds float* %tmp20351, i64 1
- %tmp20353 = getelementptr inbounds float* %tmp20352, i64 1
- %tmp20354 = getelementptr inbounds float* %tmp20353, i64 1
- %tmp20355 = getelementptr inbounds float* %tmp20354, i64 1
- %tmp20356 = getelementptr inbounds float* %tmp20355, i64 1
- %tmp20357 = getelementptr inbounds float* %tmp20356, i64 1
- %tmp20358 = getelementptr inbounds float* %tmp20357, i64 1
- %tmp20359 = getelementptr inbounds float* %tmp20358, i64 1
- %tmp20360 = getelementptr inbounds float* %tmp20359, i64 1
- %tmp20361 = getelementptr inbounds float* %tmp20360, i64 1
- %tmp20362 = getelementptr inbounds float* %tmp20361, i64 1
- %tmp20363 = getelementptr inbounds float* %tmp20362, i64 1
- %tmp20364 = getelementptr inbounds float* %tmp20363, i64 1
- %tmp20365 = getelementptr inbounds float* %tmp20364, i64 1
- %tmp20366 = getelementptr inbounds float* %tmp20365, i64 1
- %tmp20367 = getelementptr inbounds float* %tmp20366, i64 1
- %tmp20368 = getelementptr inbounds float* %tmp20367, i64 1
- %tmp20369 = getelementptr inbounds float* %tmp20368, i64 1
- %tmp20370 = getelementptr inbounds float* %tmp20369, i64 1
- %tmp20371 = getelementptr inbounds float* %tmp20370, i64 1
- %tmp20372 = getelementptr inbounds float* %tmp20371, i64 1
- %tmp20373 = getelementptr inbounds float* %tmp20372, i64 1
- %tmp20374 = getelementptr inbounds float* %tmp20373, i64 1
- %tmp20375 = getelementptr inbounds float* %tmp20374, i64 1
- %tmp20376 = getelementptr inbounds float* %tmp20375, i64 1
- %tmp20377 = getelementptr inbounds float* %tmp20376, i64 1
- %tmp20378 = getelementptr inbounds float* %tmp20377, i64 1
- %tmp20379 = getelementptr inbounds float* %tmp20378, i64 1
- %tmp20380 = getelementptr inbounds float* %tmp20379, i64 1
- %tmp20381 = getelementptr inbounds float* %tmp20380, i64 1
- %tmp20382 = getelementptr inbounds float* %tmp20381, i64 1
- %tmp20383 = getelementptr inbounds float* %tmp20382, i64 1
- %tmp20384 = getelementptr inbounds float* %tmp20383, i64 1
- %tmp20385 = getelementptr inbounds float* %tmp20384, i64 1
- %tmp20386 = getelementptr inbounds float* %tmp20385, i64 1
- %tmp20387 = getelementptr inbounds float* %tmp20386, i64 1
- %tmp20388 = getelementptr inbounds float* %tmp20387, i64 1
- %tmp20389 = getelementptr inbounds float* %tmp20388, i64 1
- %tmp20390 = getelementptr inbounds float* %tmp20389, i64 1
- %tmp20391 = getelementptr inbounds float* %tmp20390, i64 1
- %tmp20392 = getelementptr inbounds float* %tmp20391, i64 1
- %tmp20393 = getelementptr inbounds float* %tmp20392, i64 1
- %tmp20394 = getelementptr inbounds float* %tmp20393, i64 1
- %tmp20395 = getelementptr inbounds float* %tmp20394, i64 1
- %tmp20396 = getelementptr inbounds float* %tmp20395, i64 1
- %tmp20397 = getelementptr inbounds float* %tmp20396, i64 1
- %tmp20398 = getelementptr inbounds float* %tmp20397, i64 1
- %tmp20399 = getelementptr inbounds float* %tmp20398, i64 1
- %tmp20400 = getelementptr inbounds float* %tmp20399, i64 1
- %tmp20401 = getelementptr inbounds float* %tmp20400, i64 1
- %tmp20402 = getelementptr inbounds float* %tmp20401, i64 1
- %tmp20403 = getelementptr inbounds float* %tmp20402, i64 1
- %tmp20404 = getelementptr inbounds float* %tmp20403, i64 1
- %tmp20405 = getelementptr inbounds float* %tmp20404, i64 1
- %tmp20406 = getelementptr inbounds float* %tmp20405, i64 1
- %tmp20407 = getelementptr inbounds float* %tmp20406, i64 1
- %tmp20408 = getelementptr inbounds float* %tmp20407, i64 1
- %tmp20409 = getelementptr inbounds float* %tmp20408, i64 1
- %tmp20410 = getelementptr inbounds float* %tmp20409, i64 1
- %tmp20411 = getelementptr inbounds float* %tmp20410, i64 1
- %tmp20412 = getelementptr inbounds float* %tmp20411, i64 1
- %tmp20413 = getelementptr inbounds float* %tmp20412, i64 1
- %tmp20414 = getelementptr inbounds float* %tmp20413, i64 1
- %tmp20415 = getelementptr inbounds float* %tmp20414, i64 1
- %tmp20416 = getelementptr inbounds float* %tmp20415, i64 1
- %tmp20417 = getelementptr inbounds float* %tmp20416, i64 1
- %tmp20418 = getelementptr inbounds float* %tmp20417, i64 1
- %tmp20419 = getelementptr inbounds float* %tmp20418, i64 1
- %tmp20420 = getelementptr inbounds float* %tmp20419, i64 1
- %tmp20421 = getelementptr inbounds float* %tmp20420, i64 1
- %tmp20422 = getelementptr inbounds float* %tmp20421, i64 1
- %tmp20423 = getelementptr inbounds float* %tmp20422, i64 1
- %tmp20424 = getelementptr inbounds float* %tmp20423, i64 1
- %tmp20425 = getelementptr inbounds float* %tmp20424, i64 1
- %tmp20426 = getelementptr inbounds float* %tmp20425, i64 1
- %tmp20427 = getelementptr inbounds float* %tmp20426, i64 1
- %tmp20428 = getelementptr inbounds float* %tmp20427, i64 1
- %tmp20429 = getelementptr inbounds float* %tmp20428, i64 1
- %tmp20430 = getelementptr inbounds float* %tmp20429, i64 1
- %tmp20431 = getelementptr inbounds float* %tmp20430, i64 1
- %tmp20432 = getelementptr inbounds float* %tmp20431, i64 1
- %tmp20433 = getelementptr inbounds float* %tmp20432, i64 1
- %tmp20434 = getelementptr inbounds float* %tmp20433, i64 1
- %tmp20435 = getelementptr inbounds float* %tmp20434, i64 1
- %tmp20436 = getelementptr inbounds float* %tmp20435, i64 1
- %tmp20437 = getelementptr inbounds float* %tmp20436, i64 1
- %tmp20438 = getelementptr inbounds float* %tmp20437, i64 1
- %tmp20439 = getelementptr inbounds float* %tmp20438, i64 1
- %tmp20440 = getelementptr inbounds float* %tmp20439, i64 1
- %tmp20441 = getelementptr inbounds float* %tmp20440, i64 1
- %tmp20442 = getelementptr inbounds float* %tmp20441, i64 1
- %tmp20443 = getelementptr inbounds float* %tmp20442, i64 1
- %tmp20444 = getelementptr inbounds float* %tmp20443, i64 1
- %tmp20445 = getelementptr inbounds float* %tmp20444, i64 1
- %tmp20446 = getelementptr inbounds float* %tmp20445, i64 1
- %tmp20447 = getelementptr inbounds float* %tmp20446, i64 1
- %tmp20448 = getelementptr inbounds float* %tmp20447, i64 1
- %tmp20449 = getelementptr inbounds float* %tmp20448, i64 1
- %tmp20450 = getelementptr inbounds float* %tmp20449, i64 1
- %tmp20451 = getelementptr inbounds float* %tmp20450, i64 1
- %tmp20452 = getelementptr inbounds float* %tmp20451, i64 1
- %tmp20453 = getelementptr inbounds float* %tmp20452, i64 1
- %tmp20454 = getelementptr inbounds float* %tmp20453, i64 1
- %tmp20455 = getelementptr inbounds float* %tmp20454, i64 1
- %tmp20456 = getelementptr inbounds float* %tmp20455, i64 1
- %tmp20457 = getelementptr inbounds float* %tmp20456, i64 1
- %tmp20458 = getelementptr inbounds float* %tmp20457, i64 1
- %tmp20459 = getelementptr inbounds float* %tmp20458, i64 1
- %tmp20460 = getelementptr inbounds float* %tmp20459, i64 1
- %tmp20461 = getelementptr inbounds float* %tmp20460, i64 1
- %tmp20462 = getelementptr inbounds float* %tmp20461, i64 1
- %tmp20463 = getelementptr inbounds float* %tmp20462, i64 1
- %tmp20464 = getelementptr inbounds float* %tmp20463, i64 1
- %tmp20465 = getelementptr inbounds float* %tmp20464, i64 1
- %tmp20466 = getelementptr inbounds float* %tmp20465, i64 1
- %tmp20467 = getelementptr inbounds float* %tmp20466, i64 1
- %tmp20468 = getelementptr inbounds float* %tmp20467, i64 1
- %tmp20469 = getelementptr inbounds float* %tmp20468, i64 1
- %tmp20470 = getelementptr inbounds float* %tmp20469, i64 1
- %tmp20471 = getelementptr inbounds float* %tmp20470, i64 1
- %tmp20472 = getelementptr inbounds float* %tmp20471, i64 1
- %tmp20473 = getelementptr inbounds float* %tmp20472, i64 1
- %tmp20474 = getelementptr inbounds float* %tmp20473, i64 1
- %tmp20475 = getelementptr inbounds float* %tmp20474, i64 1
- %tmp20476 = getelementptr inbounds float* %tmp20475, i64 1
- %tmp20477 = getelementptr inbounds float* %tmp20476, i64 1
- %tmp20478 = getelementptr inbounds float* %tmp20477, i64 1
- %tmp20479 = getelementptr inbounds float* %tmp20478, i64 1
- %tmp20480 = getelementptr inbounds float* %tmp20479, i64 1
- %tmp20481 = getelementptr inbounds float* %tmp20480, i64 1
- %tmp20482 = getelementptr inbounds float* %tmp20481, i64 1
- %tmp20483 = getelementptr inbounds float* %tmp20482, i64 1
- %tmp20484 = getelementptr inbounds float* %tmp20483, i64 1
- %tmp20485 = getelementptr inbounds float* %tmp20484, i64 1
- %tmp20486 = getelementptr inbounds float* %tmp20485, i64 1
- %tmp20487 = getelementptr inbounds float* %tmp20486, i64 1
- %tmp20488 = getelementptr inbounds float* %tmp20487, i64 1
- %tmp20489 = getelementptr inbounds float* %tmp20488, i64 1
- %tmp20490 = getelementptr inbounds float* %tmp20489, i64 1
- %tmp20491 = getelementptr inbounds float* %tmp20490, i64 1
- %tmp20492 = getelementptr inbounds float* %tmp20491, i64 1
- %tmp20493 = getelementptr inbounds float* %tmp20492, i64 1
- %tmp20494 = getelementptr inbounds float* %tmp20493, i64 1
- %tmp20495 = getelementptr inbounds float* %tmp20494, i64 1
- %tmp20496 = getelementptr inbounds float* %tmp20495, i64 1
- %tmp20497 = getelementptr inbounds float* %tmp20496, i64 1
- %tmp20498 = getelementptr inbounds float* %tmp20497, i64 1
- %tmp20499 = getelementptr inbounds float* %tmp20498, i64 1
- %tmp20500 = getelementptr inbounds float* %tmp20499, i64 1
- %tmp20501 = getelementptr inbounds float* %tmp20500, i64 1
- %tmp20502 = getelementptr inbounds float* %tmp20501, i64 1
- %tmp20503 = getelementptr inbounds float* %tmp20502, i64 1
- %tmp20504 = getelementptr inbounds float* %tmp20503, i64 1
- %tmp20505 = getelementptr inbounds float* %tmp20504, i64 1
- %tmp20506 = getelementptr inbounds float* %tmp20505, i64 1
- %tmp20507 = getelementptr inbounds float* %tmp20506, i64 1
- %tmp20508 = getelementptr inbounds float* %tmp20507, i64 1
- %tmp20509 = getelementptr inbounds float* %tmp20508, i64 1
- %tmp20510 = getelementptr inbounds float* %tmp20509, i64 1
- %tmp20511 = getelementptr inbounds float* %tmp20510, i64 1
- %tmp20512 = getelementptr inbounds float* %tmp20511, i64 1
- %tmp20513 = getelementptr inbounds float* %tmp20512, i64 1
- %tmp20514 = getelementptr inbounds float* %tmp20513, i64 1
- %tmp20515 = getelementptr inbounds float* %tmp20514, i64 1
- %tmp20516 = getelementptr inbounds float* %tmp20515, i64 1
- %tmp20517 = getelementptr inbounds float* %tmp20516, i64 1
- %tmp20518 = getelementptr inbounds float* %tmp20517, i64 1
- %tmp20519 = getelementptr inbounds float* %tmp20518, i64 1
- %tmp20520 = getelementptr inbounds float* %tmp20519, i64 1
- %tmp20521 = getelementptr inbounds float* %tmp20520, i64 1
- %tmp20522 = getelementptr inbounds float* %tmp20521, i64 1
- %tmp20523 = getelementptr inbounds float* %tmp20522, i64 1
- %tmp20524 = getelementptr inbounds float* %tmp20523, i64 1
- %tmp20525 = getelementptr inbounds float* %tmp20524, i64 1
- %tmp20526 = getelementptr inbounds float* %tmp20525, i64 1
- %tmp20527 = getelementptr inbounds float* %tmp20526, i64 1
- %tmp20528 = getelementptr inbounds float* %tmp20527, i64 1
- %tmp20529 = getelementptr inbounds float* %tmp20528, i64 1
- %tmp20530 = getelementptr inbounds float* %tmp20529, i64 1
- %tmp20531 = getelementptr inbounds float* %tmp20530, i64 1
- %tmp20532 = getelementptr inbounds float* %tmp20531, i64 1
- %tmp20533 = getelementptr inbounds float* %tmp20532, i64 1
- %tmp20534 = getelementptr inbounds float* %tmp20533, i64 1
- %tmp20535 = getelementptr inbounds float* %tmp20534, i64 1
- %tmp20536 = getelementptr inbounds float* %tmp20535, i64 1
- %tmp20537 = getelementptr inbounds float* %tmp20536, i64 1
- %tmp20538 = getelementptr inbounds float* %tmp20537, i64 1
- %tmp20539 = getelementptr inbounds float* %tmp20538, i64 1
- %tmp20540 = getelementptr inbounds float* %tmp20539, i64 1
- %tmp20541 = getelementptr inbounds float* %tmp20540, i64 1
- %tmp20542 = getelementptr inbounds float* %tmp20541, i64 1
- %tmp20543 = getelementptr inbounds float* %tmp20542, i64 1
- %tmp20544 = getelementptr inbounds float* %tmp20543, i64 1
- %tmp20545 = getelementptr inbounds float* %tmp20544, i64 1
- %tmp20546 = getelementptr inbounds float* %tmp20545, i64 1
- %tmp20547 = getelementptr inbounds float* %tmp20546, i64 1
- %tmp20548 = getelementptr inbounds float* %tmp20547, i64 1
- %tmp20549 = getelementptr inbounds float* %tmp20548, i64 1
- %tmp20550 = getelementptr inbounds float* %tmp20549, i64 1
- %tmp20551 = getelementptr inbounds float* %tmp20550, i64 1
- %tmp20552 = getelementptr inbounds float* %tmp20551, i64 1
- %tmp20553 = getelementptr inbounds float* %tmp20552, i64 1
- %tmp20554 = getelementptr inbounds float* %tmp20553, i64 1
- %tmp20555 = getelementptr inbounds float* %tmp20554, i64 1
- %tmp20556 = getelementptr inbounds float* %tmp20555, i64 1
- %tmp20557 = getelementptr inbounds float* %tmp20556, i64 1
- %tmp20558 = getelementptr inbounds float* %tmp20557, i64 1
- %tmp20559 = getelementptr inbounds float* %tmp20558, i64 1
- %tmp20560 = getelementptr inbounds float* %tmp20559, i64 1
- %tmp20561 = getelementptr inbounds float* %tmp20560, i64 1
- %tmp20562 = getelementptr inbounds float* %tmp20561, i64 1
- %tmp20563 = getelementptr inbounds float* %tmp20562, i64 1
- %tmp20564 = getelementptr inbounds float* %tmp20563, i64 1
- %tmp20565 = getelementptr inbounds float* %tmp20564, i64 1
- %tmp20566 = getelementptr inbounds float* %tmp20565, i64 1
- %tmp20567 = getelementptr inbounds float* %tmp20566, i64 1
- %tmp20568 = getelementptr inbounds float* %tmp20567, i64 1
- %tmp20569 = getelementptr inbounds float* %tmp20568, i64 1
- %tmp20570 = getelementptr inbounds float* %tmp20569, i64 1
- %tmp20571 = getelementptr inbounds float* %tmp20570, i64 1
- %tmp20572 = getelementptr inbounds float* %tmp20571, i64 1
- %tmp20573 = getelementptr inbounds float* %tmp20572, i64 1
- %tmp20574 = getelementptr inbounds float* %tmp20573, i64 1
- %tmp20575 = getelementptr inbounds float* %tmp20574, i64 1
- %tmp20576 = getelementptr inbounds float* %tmp20575, i64 1
- %tmp20577 = getelementptr inbounds float* %tmp20576, i64 1
- %tmp20578 = getelementptr inbounds float* %tmp20577, i64 1
- %tmp20579 = getelementptr inbounds float* %tmp20578, i64 1
- %tmp20580 = getelementptr inbounds float* %tmp20579, i64 1
- %tmp20581 = getelementptr inbounds float* %tmp20580, i64 1
- %tmp20582 = getelementptr inbounds float* %tmp20581, i64 1
- %tmp20583 = getelementptr inbounds float* %tmp20582, i64 1
- %tmp20584 = getelementptr inbounds float* %tmp20583, i64 1
- %tmp20585 = getelementptr inbounds float* %tmp20584, i64 1
- %tmp20586 = getelementptr inbounds float* %tmp20585, i64 1
- %tmp20587 = getelementptr inbounds float* %tmp20586, i64 1
- %tmp20588 = getelementptr inbounds float* %tmp20587, i64 1
- %tmp20589 = getelementptr inbounds float* %tmp20588, i64 1
- %tmp20590 = getelementptr inbounds float* %tmp20589, i64 1
- %tmp20591 = getelementptr inbounds float* %tmp20590, i64 1
- %tmp20592 = getelementptr inbounds float* %tmp20591, i64 1
- %tmp20593 = getelementptr inbounds float* %tmp20592, i64 1
- %tmp20594 = getelementptr inbounds float* %tmp20593, i64 1
- %tmp20595 = getelementptr inbounds float* %tmp20594, i64 1
- %tmp20596 = getelementptr inbounds float* %tmp20595, i64 1
- %tmp20597 = getelementptr inbounds float* %tmp20596, i64 1
- %tmp20598 = getelementptr inbounds float* %tmp20597, i64 1
- %tmp20599 = getelementptr inbounds float* %tmp20598, i64 1
- %tmp20600 = getelementptr inbounds float* %tmp20599, i64 1
- %tmp20601 = getelementptr inbounds float* %tmp20600, i64 1
- %tmp20602 = getelementptr inbounds float* %tmp20601, i64 1
- %tmp20603 = getelementptr inbounds float* %tmp20602, i64 1
- %tmp20604 = getelementptr inbounds float* %tmp20603, i64 1
- %tmp20605 = getelementptr inbounds float* %tmp20604, i64 1
- %tmp20606 = getelementptr inbounds float* %tmp20605, i64 1
- %tmp20607 = getelementptr inbounds float* %tmp20606, i64 1
- %tmp20608 = getelementptr inbounds float* %tmp20607, i64 1
- %tmp20609 = getelementptr inbounds float* %tmp20608, i64 1
- %tmp20610 = getelementptr inbounds float* %tmp20609, i64 1
- %tmp20611 = getelementptr inbounds float* %tmp20610, i64 1
- %tmp20612 = getelementptr inbounds float* %tmp20611, i64 1
- %tmp20613 = getelementptr inbounds float* %tmp20612, i64 1
- %tmp20614 = getelementptr inbounds float* %tmp20613, i64 1
- %tmp20615 = getelementptr inbounds float* %tmp20614, i64 1
- %tmp20616 = getelementptr inbounds float* %tmp20615, i64 1
- %tmp20617 = getelementptr inbounds float* %tmp20616, i64 1
- %tmp20618 = getelementptr inbounds float* %tmp20617, i64 1
- %tmp20619 = getelementptr inbounds float* %tmp20618, i64 1
- %tmp20620 = getelementptr inbounds float* %tmp20619, i64 1
- %tmp20621 = getelementptr inbounds float* %tmp20620, i64 1
- %tmp20622 = getelementptr inbounds float* %tmp20621, i64 1
- %tmp20623 = getelementptr inbounds float* %tmp20622, i64 1
- %tmp20624 = getelementptr inbounds float* %tmp20623, i64 1
- %tmp20625 = getelementptr inbounds float* %tmp20624, i64 1
- %tmp20626 = getelementptr inbounds float* %tmp20625, i64 1
- %tmp20627 = getelementptr inbounds float* %tmp20626, i64 1
- %tmp20628 = getelementptr inbounds float* %tmp20627, i64 1
- %tmp20629 = getelementptr inbounds float* %tmp20628, i64 1
- %tmp20630 = getelementptr inbounds float* %tmp20629, i64 1
- %tmp20631 = getelementptr inbounds float* %tmp20630, i64 1
- %tmp20632 = getelementptr inbounds float* %tmp20631, i64 1
- %tmp20633 = getelementptr inbounds float* %tmp20632, i64 1
- %tmp20634 = getelementptr inbounds float* %tmp20633, i64 1
- %tmp20635 = getelementptr inbounds float* %tmp20634, i64 1
- %tmp20636 = getelementptr inbounds float* %tmp20635, i64 1
- %tmp20637 = getelementptr inbounds float* %tmp20636, i64 1
- %tmp20638 = getelementptr inbounds float* %tmp20637, i64 1
- %tmp20639 = getelementptr inbounds float* %tmp20638, i64 1
- %tmp20640 = getelementptr inbounds float* %tmp20639, i64 1
- %tmp20641 = getelementptr inbounds float* %tmp20640, i64 1
- %tmp20642 = getelementptr inbounds float* %tmp20641, i64 1
- %tmp20643 = getelementptr inbounds float* %tmp20642, i64 1
- %tmp20644 = getelementptr inbounds float* %tmp20643, i64 1
- %tmp20645 = getelementptr inbounds float* %tmp20644, i64 1
- %tmp20646 = getelementptr inbounds float* %tmp20645, i64 1
- %tmp20647 = getelementptr inbounds float* %tmp20646, i64 1
- %tmp20648 = getelementptr inbounds float* %tmp20647, i64 1
- %tmp20649 = getelementptr inbounds float* %tmp20648, i64 1
- %tmp20650 = getelementptr inbounds float* %tmp20649, i64 1
- %tmp20651 = getelementptr inbounds float* %tmp20650, i64 1
- %tmp20652 = getelementptr inbounds float* %tmp20651, i64 1
- %tmp20653 = getelementptr inbounds float* %tmp20652, i64 1
- %tmp20654 = getelementptr inbounds float* %tmp20653, i64 1
- %tmp20655 = getelementptr inbounds float* %tmp20654, i64 1
- %tmp20656 = getelementptr inbounds float* %tmp20655, i64 1
- %tmp20657 = getelementptr inbounds float* %tmp20656, i64 1
- %tmp20658 = getelementptr inbounds float* %tmp20657, i64 1
- %tmp20659 = getelementptr inbounds float* %tmp20658, i64 1
- %tmp20660 = getelementptr inbounds float* %tmp20659, i64 1
- %tmp20661 = getelementptr inbounds float* %tmp20660, i64 1
- %tmp20662 = getelementptr inbounds float* %tmp20661, i64 1
- %tmp20663 = getelementptr inbounds float* %tmp20662, i64 1
- %tmp20664 = getelementptr inbounds float* %tmp20663, i64 1
- %tmp20665 = getelementptr inbounds float* %tmp20664, i64 1
- %tmp20666 = getelementptr inbounds float* %tmp20665, i64 1
- %tmp20667 = getelementptr inbounds float* %tmp20666, i64 1
- %tmp20668 = getelementptr inbounds float* %tmp20667, i64 1
- %tmp20669 = getelementptr inbounds float* %tmp20668, i64 1
- %tmp20670 = getelementptr inbounds float* %tmp20669, i64 1
- %tmp20671 = getelementptr inbounds float* %tmp20670, i64 1
- %tmp20672 = getelementptr inbounds float* %tmp20671, i64 1
- %tmp20673 = getelementptr inbounds float* %tmp20672, i64 1
- %tmp20674 = getelementptr inbounds float* %tmp20673, i64 1
- %tmp20675 = getelementptr inbounds float* %tmp20674, i64 1
- %tmp20676 = getelementptr inbounds float* %tmp20675, i64 1
- %tmp20677 = getelementptr inbounds float* %tmp20676, i64 1
- %tmp20678 = getelementptr inbounds float* %tmp20677, i64 1
- %tmp20679 = getelementptr inbounds float* %tmp20678, i64 1
- %tmp20680 = getelementptr inbounds float* %tmp20679, i64 1
- %tmp20681 = getelementptr inbounds float* %tmp20680, i64 1
- %tmp20682 = getelementptr inbounds float* %tmp20681, i64 1
- %tmp20683 = getelementptr inbounds float* %tmp20682, i64 1
- %tmp20684 = getelementptr inbounds float* %tmp20683, i64 1
- %tmp20685 = getelementptr inbounds float* %tmp20684, i64 1
- %tmp20686 = getelementptr inbounds float* %tmp20685, i64 1
- %tmp20687 = getelementptr inbounds float* %tmp20686, i64 1
- %tmp20688 = getelementptr inbounds float* %tmp20687, i64 1
- %tmp20689 = getelementptr inbounds float* %tmp20688, i64 1
- %tmp20690 = getelementptr inbounds float* %tmp20689, i64 1
- %tmp20691 = getelementptr inbounds float* %tmp20690, i64 1
- %tmp20692 = getelementptr inbounds float* %tmp20691, i64 1
- %tmp20693 = getelementptr inbounds float* %tmp20692, i64 1
- %tmp20694 = getelementptr inbounds float* %tmp20693, i64 1
- %tmp20695 = getelementptr inbounds float* %tmp20694, i64 1
- %tmp20696 = getelementptr inbounds float* %tmp20695, i64 1
- %tmp20697 = getelementptr inbounds float* %tmp20696, i64 1
- %tmp20698 = getelementptr inbounds float* %tmp20697, i64 1
- %tmp20699 = getelementptr inbounds float* %tmp20698, i64 1
- %tmp20700 = getelementptr inbounds float* %tmp20699, i64 1
- %tmp20701 = getelementptr inbounds float* %tmp20700, i64 1
- %tmp20702 = getelementptr inbounds float* %tmp20701, i64 1
- %tmp20703 = getelementptr inbounds float* %tmp20702, i64 1
- %tmp20704 = getelementptr inbounds float* %tmp20703, i64 1
- %tmp20705 = getelementptr inbounds float* %tmp20704, i64 1
- %tmp20706 = getelementptr inbounds float* %tmp20705, i64 1
- %tmp20707 = getelementptr inbounds float* %tmp20706, i64 1
- %tmp20708 = getelementptr inbounds float* %tmp20707, i64 1
- %tmp20709 = getelementptr inbounds float* %tmp20708, i64 1
- %tmp20710 = getelementptr inbounds float* %tmp20709, i64 1
- %tmp20711 = getelementptr inbounds float* %tmp20710, i64 1
- %tmp20712 = getelementptr inbounds float* %tmp20711, i64 1
- %tmp20713 = getelementptr inbounds float* %tmp20712, i64 1
- %tmp20714 = getelementptr inbounds float* %tmp20713, i64 1
- %tmp20715 = getelementptr inbounds float* %tmp20714, i64 1
- %tmp20716 = getelementptr inbounds float* %tmp20715, i64 1
- %tmp20717 = getelementptr inbounds float* %tmp20716, i64 1
- %tmp20718 = getelementptr inbounds float* %tmp20717, i64 1
- %tmp20719 = getelementptr inbounds float* %tmp20718, i64 1
- %tmp20720 = getelementptr inbounds float* %tmp20719, i64 1
- %tmp20721 = getelementptr inbounds float* %tmp20720, i64 1
- %tmp20722 = getelementptr inbounds float* %tmp20721, i64 1
- %tmp20723 = getelementptr inbounds float* %tmp20722, i64 1
- %tmp20724 = getelementptr inbounds float* %tmp20723, i64 1
- %tmp20725 = getelementptr inbounds float* %tmp20724, i64 1
- %tmp20726 = getelementptr inbounds float* %tmp20725, i64 1
- %tmp20727 = getelementptr inbounds float* %tmp20726, i64 1
- %tmp20728 = getelementptr inbounds float* %tmp20727, i64 1
- %tmp20729 = getelementptr inbounds float* %tmp20728, i64 1
- %tmp20730 = getelementptr inbounds float* %tmp20729, i64 1
- %tmp20731 = getelementptr inbounds float* %tmp20730, i64 1
- %tmp20732 = getelementptr inbounds float* %tmp20731, i64 1
- %tmp20733 = getelementptr inbounds float* %tmp20732, i64 1
- %tmp20734 = getelementptr inbounds float* %tmp20733, i64 1
- %tmp20735 = getelementptr inbounds float* %tmp20734, i64 1
- %tmp20736 = getelementptr inbounds float* %tmp20735, i64 1
- %tmp20737 = getelementptr inbounds float* %tmp20736, i64 1
- %tmp20738 = getelementptr inbounds float* %tmp20737, i64 1
- %tmp20739 = getelementptr inbounds float* %tmp20738, i64 1
- %tmp20740 = getelementptr inbounds float* %tmp20739, i64 1
- %tmp20741 = getelementptr inbounds float* %tmp20740, i64 1
- %tmp20742 = getelementptr inbounds float* %tmp20741, i64 1
- %tmp20743 = getelementptr inbounds float* %tmp20742, i64 1
- %tmp20744 = getelementptr inbounds float* %tmp20743, i64 1
- %tmp20745 = getelementptr inbounds float* %tmp20744, i64 1
- %tmp20746 = getelementptr inbounds float* %tmp20745, i64 1
- %tmp20747 = getelementptr inbounds float* %tmp20746, i64 1
- %tmp20748 = getelementptr inbounds float* %tmp20747, i64 1
- %tmp20749 = getelementptr inbounds float* %tmp20748, i64 1
- %tmp20750 = getelementptr inbounds float* %tmp20749, i64 1
- %tmp20751 = getelementptr inbounds float* %tmp20750, i64 1
- %tmp20752 = getelementptr inbounds float* %tmp20751, i64 1
- %tmp20753 = getelementptr inbounds float* %tmp20752, i64 1
- %tmp20754 = getelementptr inbounds float* %tmp20753, i64 1
- %tmp20755 = getelementptr inbounds float* %tmp20754, i64 1
- %tmp20756 = getelementptr inbounds float* %tmp20755, i64 1
- %tmp20757 = getelementptr inbounds float* %tmp20756, i64 1
- %tmp20758 = getelementptr inbounds float* %tmp20757, i64 1
- %tmp20759 = getelementptr inbounds float* %tmp20758, i64 1
- %tmp20760 = getelementptr inbounds float* %tmp20759, i64 1
- %tmp20761 = getelementptr inbounds float* %tmp20760, i64 1
- %tmp20762 = getelementptr inbounds float* %tmp20761, i64 1
- %tmp20763 = getelementptr inbounds float* %tmp20762, i64 1
- %tmp20764 = getelementptr inbounds float* %tmp20763, i64 1
- %tmp20765 = getelementptr inbounds float* %tmp20764, i64 1
- %tmp20766 = getelementptr inbounds float* %tmp20765, i64 1
- %tmp20767 = getelementptr inbounds float* %tmp20766, i64 1
- %tmp20768 = getelementptr inbounds float* %tmp20767, i64 1
- %tmp20769 = getelementptr inbounds float* %tmp20768, i64 1
- %tmp20770 = getelementptr inbounds float* %tmp20769, i64 1
- %tmp20771 = getelementptr inbounds float* %tmp20770, i64 1
- %tmp20772 = getelementptr inbounds float* %tmp20771, i64 1
- %tmp20773 = getelementptr inbounds float* %tmp20772, i64 1
- %tmp20774 = getelementptr inbounds float* %tmp20773, i64 1
- %tmp20775 = getelementptr inbounds float* %tmp20774, i64 1
- %tmp20776 = getelementptr inbounds float* %tmp20775, i64 1
- %tmp20777 = getelementptr inbounds float* %tmp20776, i64 1
- %tmp20778 = getelementptr inbounds float* %tmp20777, i64 1
- %tmp20779 = getelementptr inbounds float* %tmp20778, i64 1
- %tmp20780 = getelementptr inbounds float* %tmp20779, i64 1
- %tmp20781 = getelementptr inbounds float* %tmp20780, i64 1
- %tmp20782 = getelementptr inbounds float* %tmp20781, i64 1
- %tmp20783 = getelementptr inbounds float* %tmp20782, i64 1
- %tmp20784 = getelementptr inbounds float* %tmp20783, i64 1
- %tmp20785 = getelementptr inbounds float* %tmp20784, i64 1
- %tmp20786 = getelementptr inbounds float* %tmp20785, i64 1
- %tmp20787 = getelementptr inbounds float* %tmp20786, i64 1
- %tmp20788 = getelementptr inbounds float* %tmp20787, i64 1
- %tmp20789 = getelementptr inbounds float* %tmp20788, i64 1
- %tmp20790 = getelementptr inbounds float* %tmp20789, i64 1
- %tmp20791 = getelementptr inbounds float* %tmp20790, i64 1
- %tmp20792 = getelementptr inbounds float* %tmp20791, i64 1
- %tmp20793 = getelementptr inbounds float* %tmp20792, i64 1
- %tmp20794 = getelementptr inbounds float* %tmp20793, i64 1
- %tmp20795 = getelementptr inbounds float* %tmp20794, i64 1
- %tmp20796 = getelementptr inbounds float* %tmp20795, i64 1
- %tmp20797 = getelementptr inbounds float* %tmp20796, i64 1
- %tmp20798 = getelementptr inbounds float* %tmp20797, i64 1
- %tmp20799 = getelementptr inbounds float* %tmp20798, i64 1
- %tmp20800 = getelementptr inbounds float* %tmp20799, i64 1
- %tmp20801 = getelementptr inbounds float* %tmp20800, i64 1
- %tmp20802 = getelementptr inbounds float* %tmp20801, i64 1
- %tmp20803 = getelementptr inbounds float* %tmp20802, i64 1
- %tmp20804 = getelementptr inbounds float* %tmp20803, i64 1
- %tmp20805 = getelementptr inbounds float* %tmp20804, i64 1
- %tmp20806 = getelementptr inbounds float* %tmp20805, i64 1
- %tmp20807 = getelementptr inbounds float* %tmp20806, i64 1
- %tmp20808 = getelementptr inbounds float* %tmp20807, i64 1
- %tmp20809 = getelementptr inbounds float* %tmp20808, i64 1
- %tmp20810 = getelementptr inbounds float* %tmp20809, i64 1
- %tmp20811 = getelementptr inbounds float* %tmp20810, i64 1
- %tmp20812 = getelementptr inbounds float* %tmp20811, i64 1
- %tmp20813 = getelementptr inbounds float* %tmp20812, i64 1
- %tmp20814 = getelementptr inbounds float* %tmp20813, i64 1
- %tmp20815 = getelementptr inbounds float* %tmp20814, i64 1
- %tmp20816 = getelementptr inbounds float* %tmp20815, i64 1
- %tmp20817 = getelementptr inbounds float* %tmp20816, i64 1
- %tmp20818 = getelementptr inbounds float* %tmp20817, i64 1
- %tmp20819 = getelementptr inbounds float* %tmp20818, i64 1
- %tmp20820 = getelementptr inbounds float* %tmp20819, i64 1
- %tmp20821 = getelementptr inbounds float* %tmp20820, i64 1
- %tmp20822 = getelementptr inbounds float* %tmp20821, i64 1
- %tmp20823 = getelementptr inbounds float* %tmp20822, i64 1
- %tmp20824 = getelementptr inbounds float* %tmp20823, i64 1
- %tmp20825 = getelementptr inbounds float* %tmp20824, i64 1
- %tmp20826 = getelementptr inbounds float* %tmp20825, i64 1
- %tmp20827 = getelementptr inbounds float* %tmp20826, i64 1
- %tmp20828 = getelementptr inbounds float* %tmp20827, i64 1
- %tmp20829 = getelementptr inbounds float* %tmp20828, i64 1
- %tmp20830 = getelementptr inbounds float* %tmp20829, i64 1
- %tmp20831 = getelementptr inbounds float* %tmp20830, i64 1
- %tmp20832 = getelementptr inbounds float* %tmp20831, i64 1
- %tmp20833 = getelementptr inbounds float* %tmp20832, i64 1
- %tmp20834 = getelementptr inbounds float* %tmp20833, i64 1
- %tmp20835 = getelementptr inbounds float* %tmp20834, i64 1
- %tmp20836 = getelementptr inbounds float* %tmp20835, i64 1
- %tmp20837 = getelementptr inbounds float* %tmp20836, i64 1
- %tmp20838 = getelementptr inbounds float* %tmp20837, i64 1
- %tmp20839 = getelementptr inbounds float* %tmp20838, i64 1
- %tmp20840 = getelementptr inbounds float* %tmp20839, i64 1
- %tmp20841 = getelementptr inbounds float* %tmp20840, i64 1
- %tmp20842 = getelementptr inbounds float* %tmp20841, i64 1
- %tmp20843 = getelementptr inbounds float* %tmp20842, i64 1
- %tmp20844 = getelementptr inbounds float* %tmp20843, i64 1
- %tmp20845 = getelementptr inbounds float* %tmp20844, i64 1
- %tmp20846 = getelementptr inbounds float* %tmp20845, i64 1
- %tmp20847 = getelementptr inbounds float* %tmp20846, i64 1
- %tmp20848 = getelementptr inbounds float* %tmp20847, i64 1
- %tmp20849 = getelementptr inbounds float* %tmp20848, i64 1
- %tmp20850 = getelementptr inbounds float* %tmp20849, i64 1
- %tmp20851 = getelementptr inbounds float* %tmp20850, i64 1
- %tmp20852 = getelementptr inbounds float* %tmp20851, i64 1
- %tmp20853 = getelementptr inbounds float* %tmp20852, i64 1
- %tmp20854 = getelementptr inbounds float* %tmp20853, i64 1
- %tmp20855 = getelementptr inbounds float* %tmp20854, i64 1
- %tmp20856 = getelementptr inbounds float* %tmp20855, i64 1
- %tmp20857 = getelementptr inbounds float* %tmp20856, i64 1
- %tmp20858 = getelementptr inbounds float* %tmp20857, i64 1
- %tmp20859 = getelementptr inbounds float* %tmp20858, i64 1
- %tmp20860 = getelementptr inbounds float* %tmp20859, i64 1
- %tmp20861 = getelementptr inbounds float* %tmp20860, i64 1
- %tmp20862 = getelementptr inbounds float* %tmp20861, i64 1
- %tmp20863 = getelementptr inbounds float* %tmp20862, i64 1
- %tmp20864 = getelementptr inbounds float* %tmp20863, i64 1
- %tmp20865 = getelementptr inbounds float* %tmp20864, i64 1
- %tmp20866 = getelementptr inbounds float* %tmp20865, i64 1
- %tmp20867 = getelementptr inbounds float* %tmp20866, i64 1
- %tmp20868 = getelementptr inbounds float* %tmp20867, i64 1
- %tmp20869 = getelementptr inbounds float* %tmp20868, i64 1
- %tmp20870 = getelementptr inbounds float* %tmp20869, i64 1
- %tmp20871 = getelementptr inbounds float* %tmp20870, i64 1
- %tmp20872 = getelementptr inbounds float* %tmp20871, i64 1
- %tmp20873 = getelementptr inbounds float* %tmp20872, i64 1
- %tmp20874 = getelementptr inbounds float* %tmp20873, i64 1
- %tmp20875 = getelementptr inbounds float* %tmp20874, i64 1
- %tmp20876 = getelementptr inbounds float* %tmp20875, i64 1
- %tmp20877 = getelementptr inbounds float* %tmp20876, i64 1
- %tmp20878 = getelementptr inbounds float* %tmp20877, i64 1
- %tmp20879 = getelementptr inbounds float* %tmp20878, i64 1
- %tmp20880 = getelementptr inbounds float* %tmp20879, i64 1
- %tmp20881 = getelementptr inbounds float* %tmp20880, i64 1
- %tmp20882 = getelementptr inbounds float* %tmp20881, i64 1
- %tmp20883 = getelementptr inbounds float* %tmp20882, i64 1
- %tmp20884 = getelementptr inbounds float* %tmp20883, i64 1
- %tmp20885 = getelementptr inbounds float* %tmp20884, i64 1
- %tmp20886 = getelementptr inbounds float* %tmp20885, i64 1
- %tmp20887 = getelementptr inbounds float* %tmp20886, i64 1
- %tmp20888 = getelementptr inbounds float* %tmp20887, i64 1
- %tmp20889 = getelementptr inbounds float* %tmp20888, i64 1
- %tmp20890 = getelementptr inbounds float* %tmp20889, i64 1
- %tmp20891 = getelementptr inbounds float* %tmp20890, i64 1
- %tmp20892 = getelementptr inbounds float* %tmp20891, i64 1
- %tmp20893 = getelementptr inbounds float* %tmp20892, i64 1
- %tmp20894 = getelementptr inbounds float* %tmp20893, i64 1
- %tmp20895 = getelementptr inbounds float* %tmp20894, i64 1
- %tmp20896 = getelementptr inbounds float* %tmp20895, i64 1
- %tmp20897 = getelementptr inbounds float* %tmp20896, i64 1
- %tmp20898 = getelementptr inbounds float* %tmp20897, i64 1
- %tmp20899 = getelementptr inbounds float* %tmp20898, i64 1
- %tmp20900 = getelementptr inbounds float* %tmp20899, i64 1
- %tmp20901 = getelementptr inbounds float* %tmp20900, i64 1
- %tmp20902 = getelementptr inbounds float* %tmp20901, i64 1
- %tmp20903 = getelementptr inbounds float* %tmp20902, i64 1
- %tmp20904 = getelementptr inbounds float* %tmp20903, i64 1
- %tmp20905 = getelementptr inbounds float* %tmp20904, i64 1
- %tmp20906 = getelementptr inbounds float* %tmp20905, i64 1
- %tmp20907 = getelementptr inbounds float* %tmp20906, i64 1
- %tmp20908 = getelementptr inbounds float* %tmp20907, i64 1
- %tmp20909 = getelementptr inbounds float* %tmp20908, i64 1
- %tmp20910 = getelementptr inbounds float* %tmp20909, i64 1
- %tmp20911 = getelementptr inbounds float* %tmp20910, i64 1
- %tmp20912 = getelementptr inbounds float* %tmp20911, i64 1
- %tmp20913 = getelementptr inbounds float* %tmp20912, i64 1
- %tmp20914 = getelementptr inbounds float* %tmp20913, i64 1
- %tmp20915 = getelementptr inbounds float* %tmp20914, i64 1
- %tmp20916 = getelementptr inbounds float* %tmp20915, i64 1
- %tmp20917 = getelementptr inbounds float* %tmp20916, i64 1
- %tmp20918 = getelementptr inbounds float* %tmp20917, i64 1
- %tmp20919 = getelementptr inbounds float* %tmp20918, i64 1
- %tmp20920 = getelementptr inbounds float* %tmp20919, i64 1
- %tmp20921 = getelementptr inbounds float* %tmp20920, i64 1
- %tmp20922 = getelementptr inbounds float* %tmp20921, i64 1
- %tmp20923 = getelementptr inbounds float* %tmp20922, i64 1
- %tmp20924 = getelementptr inbounds float* %tmp20923, i64 1
- %tmp20925 = getelementptr inbounds float* %tmp20924, i64 1
- %tmp20926 = getelementptr inbounds float* %tmp20925, i64 1
- %tmp20927 = getelementptr inbounds float* %tmp20926, i64 1
- %tmp20928 = getelementptr inbounds float* %tmp20927, i64 1
- %tmp20929 = getelementptr inbounds float* %tmp20928, i64 1
- %tmp20930 = getelementptr inbounds float* %tmp20929, i64 1
- %tmp20931 = getelementptr inbounds float* %tmp20930, i64 1
- %tmp20932 = getelementptr inbounds float* %tmp20931, i64 1
- %tmp20933 = getelementptr inbounds float* %tmp20932, i64 1
- %tmp20934 = getelementptr inbounds float* %tmp20933, i64 1
- %tmp20935 = getelementptr inbounds float* %tmp20934, i64 1
- %tmp20936 = getelementptr inbounds float* %tmp20935, i64 1
- %tmp20937 = getelementptr inbounds float* %tmp20936, i64 1
- %tmp20938 = getelementptr inbounds float* %tmp20937, i64 1
- %tmp20939 = getelementptr inbounds float* %tmp20938, i64 1
- %tmp20940 = getelementptr inbounds float* %tmp20939, i64 1
- %tmp20941 = getelementptr inbounds float* %tmp20940, i64 1
- %tmp20942 = getelementptr inbounds float* %tmp20941, i64 1
- %tmp20943 = getelementptr inbounds float* %tmp20942, i64 1
- %tmp20944 = getelementptr inbounds float* %tmp20943, i64 1
- %tmp20945 = getelementptr inbounds float* %tmp20944, i64 1
- %tmp20946 = getelementptr inbounds float* %tmp20945, i64 1
- %tmp20947 = getelementptr inbounds float* %tmp20946, i64 1
- %tmp20948 = getelementptr inbounds float* %tmp20947, i64 1
- %tmp20949 = getelementptr inbounds float* %tmp20948, i64 1
- %tmp20950 = getelementptr inbounds float* %tmp20949, i64 1
- %tmp20951 = getelementptr inbounds float* %tmp20950, i64 1
- %tmp20952 = getelementptr inbounds float* %tmp20951, i64 1
- %tmp20953 = getelementptr inbounds float* %tmp20952, i64 1
- %tmp20954 = getelementptr inbounds float* %tmp20953, i64 1
- %tmp20955 = getelementptr inbounds float* %tmp20954, i64 1
- %tmp20956 = getelementptr inbounds float* %tmp20955, i64 1
- %tmp20957 = getelementptr inbounds float* %tmp20956, i64 1
- %tmp20958 = getelementptr inbounds float* %tmp20957, i64 1
- %tmp20959 = getelementptr inbounds float* %tmp20958, i64 1
- %tmp20960 = getelementptr inbounds float* %tmp20959, i64 1
- %tmp20961 = getelementptr inbounds float* %tmp20960, i64 1
- %tmp20962 = getelementptr inbounds float* %tmp20961, i64 1
- %tmp20963 = getelementptr inbounds float* %tmp20962, i64 1
- %tmp20964 = getelementptr inbounds float* %tmp20963, i64 1
- %tmp20965 = getelementptr inbounds float* %tmp20964, i64 1
- %tmp20966 = getelementptr inbounds float* %tmp20965, i64 1
- %tmp20967 = getelementptr inbounds float* %tmp20966, i64 1
- %tmp20968 = getelementptr inbounds float* %tmp20967, i64 1
- %tmp20969 = getelementptr inbounds float* %tmp20968, i64 1
- %tmp20970 = getelementptr inbounds float* %tmp20969, i64 1
- %tmp20971 = getelementptr inbounds float* %tmp20970, i64 1
- %tmp20972 = getelementptr inbounds float* %tmp20971, i64 1
- %tmp20973 = getelementptr inbounds float* %tmp20972, i64 1
- %tmp20974 = getelementptr inbounds float* %tmp20973, i64 1
- %tmp20975 = getelementptr inbounds float* %tmp20974, i64 1
- %tmp20976 = getelementptr inbounds float* %tmp20975, i64 1
- %tmp20977 = getelementptr inbounds float* %tmp20976, i64 1
- %tmp20978 = getelementptr inbounds float* %tmp20977, i64 1
- %tmp20979 = getelementptr inbounds float* %tmp20978, i64 1
- %tmp20980 = getelementptr inbounds float* %tmp20979, i64 1
- %tmp20981 = getelementptr inbounds float* %tmp20980, i64 1
- %tmp20982 = getelementptr inbounds float* %tmp20981, i64 1
- %tmp20983 = getelementptr inbounds float* %tmp20982, i64 1
- %tmp20984 = getelementptr inbounds float* %tmp20983, i64 1
- %tmp20985 = getelementptr inbounds float* %tmp20984, i64 1
- %tmp20986 = getelementptr inbounds float* %tmp20985, i64 1
- %tmp20987 = getelementptr inbounds float* %tmp20986, i64 1
- %tmp20988 = getelementptr inbounds float* %tmp20987, i64 1
- %tmp20989 = getelementptr inbounds float* %tmp20988, i64 1
- %tmp20990 = getelementptr inbounds float* %tmp20989, i64 1
- %tmp20991 = getelementptr inbounds float* %tmp20990, i64 1
- %tmp20992 = getelementptr inbounds float* %tmp20991, i64 1
- %tmp20993 = getelementptr inbounds float* %tmp20992, i64 1
- %tmp20994 = getelementptr inbounds float* %tmp20993, i64 1
- %tmp20995 = getelementptr inbounds float* %tmp20994, i64 1
- %tmp20996 = getelementptr inbounds float* %tmp20995, i64 1
- %tmp20997 = getelementptr inbounds float* %tmp20996, i64 1
- %tmp20998 = getelementptr inbounds float* %tmp20997, i64 1
- %tmp20999 = getelementptr inbounds float* %tmp20998, i64 1
- %tmp21000 = getelementptr inbounds float* %tmp20999, i64 1
- %tmp21001 = getelementptr inbounds float* %tmp21000, i64 1
- %tmp21002 = getelementptr inbounds float* %tmp21001, i64 1
- %tmp21003 = getelementptr inbounds float* %tmp21002, i64 1
- %tmp21004 = getelementptr inbounds float* %tmp21003, i64 1
- %tmp21005 = getelementptr inbounds float* %tmp21004, i64 1
- %tmp21006 = getelementptr inbounds float* %tmp21005, i64 1
- %tmp21007 = getelementptr inbounds float* %tmp21006, i64 1
- %tmp21008 = getelementptr inbounds float* %tmp21007, i64 1
- %tmp21009 = getelementptr inbounds float* %tmp21008, i64 1
- %tmp21010 = getelementptr inbounds float* %tmp21009, i64 1
- %tmp21011 = getelementptr inbounds float* %tmp21010, i64 1
- %tmp21012 = getelementptr inbounds float* %tmp21011, i64 1
- %tmp21013 = getelementptr inbounds float* %tmp21012, i64 1
- %tmp21014 = getelementptr inbounds float* %tmp21013, i64 1
- %tmp21015 = getelementptr inbounds float* %tmp21014, i64 1
- %tmp21016 = getelementptr inbounds float* %tmp21015, i64 1
- %tmp21017 = getelementptr inbounds float* %tmp21016, i64 1
- %tmp21018 = getelementptr inbounds float* %tmp21017, i64 1
- %tmp21019 = getelementptr inbounds float* %tmp21018, i64 1
- %tmp21020 = getelementptr inbounds float* %tmp21019, i64 1
- %tmp21021 = getelementptr inbounds float* %tmp21020, i64 1
- %tmp21022 = getelementptr inbounds float* %tmp21021, i64 1
- %tmp21023 = getelementptr inbounds float* %tmp21022, i64 1
- %tmp21024 = getelementptr inbounds float* %tmp21023, i64 1
- %tmp21025 = getelementptr inbounds float* %tmp21024, i64 1
- %tmp21026 = getelementptr inbounds float* %tmp21025, i64 1
- %tmp21027 = getelementptr inbounds float* %tmp21026, i64 1
- %tmp21028 = getelementptr inbounds float* %tmp21027, i64 1
- %tmp21029 = getelementptr inbounds float* %tmp21028, i64 1
- %tmp21030 = getelementptr inbounds float* %tmp21029, i64 1
- %tmp21031 = getelementptr inbounds float* %tmp21030, i64 1
- %tmp21032 = getelementptr inbounds float* %tmp21031, i64 1
- %tmp21033 = getelementptr inbounds float* %tmp21032, i64 1
- %tmp21034 = getelementptr inbounds float* %tmp21033, i64 1
- %tmp21035 = getelementptr inbounds float* %tmp21034, i64 1
- %tmp21036 = getelementptr inbounds float* %tmp21035, i64 1
- %tmp21037 = getelementptr inbounds float* %tmp21036, i64 1
- %tmp21038 = getelementptr inbounds float* %tmp21037, i64 1
- %tmp21039 = getelementptr inbounds float* %tmp21038, i64 1
- %tmp21040 = getelementptr inbounds float* %tmp21039, i64 1
- %tmp21041 = getelementptr inbounds float* %tmp21040, i64 1
- %tmp21042 = getelementptr inbounds float* %tmp21041, i64 1
- %tmp21043 = getelementptr inbounds float* %tmp21042, i64 1
- %tmp21044 = getelementptr inbounds float* %tmp21043, i64 1
- %tmp21045 = getelementptr inbounds float* %tmp21044, i64 1
- %tmp21046 = getelementptr inbounds float* %tmp21045, i64 1
- %tmp21047 = getelementptr inbounds float* %tmp21046, i64 1
- %tmp21048 = getelementptr inbounds float* %tmp21047, i64 1
- %tmp21049 = getelementptr inbounds float* %tmp21048, i64 1
- %tmp21050 = getelementptr inbounds float* %tmp21049, i64 1
- %tmp21051 = getelementptr inbounds float* %tmp21050, i64 1
- %tmp21052 = getelementptr inbounds float* %tmp21051, i64 1
- %tmp21053 = getelementptr inbounds float* %tmp21052, i64 1
- %tmp21054 = getelementptr inbounds float* %tmp21053, i64 1
- %tmp21055 = getelementptr inbounds float* %tmp21054, i64 1
- %tmp21056 = getelementptr inbounds float* %tmp21055, i64 1
- %tmp21057 = getelementptr inbounds float* %tmp21056, i64 1
- %tmp21058 = getelementptr inbounds float* %tmp21057, i64 1
- %tmp21059 = getelementptr inbounds float* %tmp21058, i64 1
- %tmp21060 = getelementptr inbounds float* %tmp21059, i64 1
- %tmp21061 = getelementptr inbounds float* %tmp21060, i64 1
- %tmp21062 = getelementptr inbounds float* %tmp21061, i64 1
- %tmp21063 = getelementptr inbounds float* %tmp21062, i64 1
- %tmp21064 = getelementptr inbounds float* %tmp21063, i64 1
- %tmp21065 = getelementptr inbounds float* %tmp21064, i64 1
- %tmp21066 = getelementptr inbounds float* %tmp21065, i64 1
- %tmp21067 = getelementptr inbounds float* %tmp21066, i64 1
- %tmp21068 = getelementptr inbounds float* %tmp21067, i64 1
- %tmp21069 = getelementptr inbounds float* %tmp21068, i64 1
- %tmp21070 = getelementptr inbounds float* %tmp21069, i64 1
- %tmp21071 = getelementptr inbounds float* %tmp21070, i64 1
- %tmp21072 = getelementptr inbounds float* %tmp21071, i64 1
- %tmp21073 = getelementptr inbounds float* %tmp21072, i64 1
- %tmp21074 = getelementptr inbounds float* %tmp21073, i64 1
- %tmp21075 = getelementptr inbounds float* %tmp21074, i64 1
- %tmp21076 = getelementptr inbounds float* %tmp21075, i64 1
- %tmp21077 = getelementptr inbounds float* %tmp21076, i64 1
- %tmp21078 = getelementptr inbounds float* %tmp21077, i64 1
- %tmp21079 = getelementptr inbounds float* %tmp21078, i64 1
- %tmp21080 = getelementptr inbounds float* %tmp21079, i64 1
- %tmp21081 = getelementptr inbounds float* %tmp21080, i64 1
- %tmp21082 = getelementptr inbounds float* %tmp21081, i64 1
- %tmp21083 = getelementptr inbounds float* %tmp21082, i64 1
- %tmp21084 = getelementptr inbounds float* %tmp21083, i64 1
- %tmp21085 = getelementptr inbounds float* %tmp21084, i64 1
- %tmp21086 = getelementptr inbounds float* %tmp21085, i64 1
- %tmp21087 = getelementptr inbounds float* %tmp21086, i64 1
- %tmp21088 = getelementptr inbounds float* %tmp21087, i64 1
- %tmp21089 = getelementptr inbounds float* %tmp21088, i64 1
- %tmp21090 = getelementptr inbounds float* %tmp21089, i64 1
- %tmp21091 = getelementptr inbounds float* %tmp21090, i64 1
- %tmp21092 = getelementptr inbounds float* %tmp21091, i64 1
- %tmp21093 = getelementptr inbounds float* %tmp21092, i64 1
- %tmp21094 = getelementptr inbounds float* %tmp21093, i64 1
- %tmp21095 = getelementptr inbounds float* %tmp21094, i64 1
- %tmp21096 = getelementptr inbounds float* %tmp21095, i64 1
- %tmp21097 = getelementptr inbounds float* %tmp21096, i64 1
- %tmp21098 = getelementptr inbounds float* %tmp21097, i64 1
- %tmp21099 = getelementptr inbounds float* %tmp21098, i64 1
- %tmp21100 = getelementptr inbounds float* %tmp21099, i64 1
- %tmp21101 = getelementptr inbounds float* %tmp21100, i64 1
- %tmp21102 = getelementptr inbounds float* %tmp21101, i64 1
- %tmp21103 = getelementptr inbounds float* %tmp21102, i64 1
- %tmp21104 = getelementptr inbounds float* %tmp21103, i64 1
- %tmp21105 = getelementptr inbounds float* %tmp21104, i64 1
- %tmp21106 = getelementptr inbounds float* %tmp21105, i64 1
- %tmp21107 = getelementptr inbounds float* %tmp21106, i64 1
- %tmp21108 = getelementptr inbounds float* %tmp21107, i64 1
- %tmp21109 = getelementptr inbounds float* %tmp21108, i64 1
- %tmp21110 = getelementptr inbounds float* %tmp21109, i64 1
- %tmp21111 = getelementptr inbounds float* %tmp21110, i64 1
- %tmp21112 = getelementptr inbounds float* %tmp21111, i64 1
- %tmp21113 = getelementptr inbounds float* %tmp21112, i64 1
- %tmp21114 = getelementptr inbounds float* %tmp21113, i64 1
- %tmp21115 = getelementptr inbounds float* %tmp21114, i64 1
- %tmp21116 = getelementptr inbounds float* %tmp21115, i64 1
- %tmp21117 = getelementptr inbounds float* %tmp21116, i64 1
- %tmp21118 = getelementptr inbounds float* %tmp21117, i64 1
- %tmp21119 = getelementptr inbounds float* %tmp21118, i64 1
- %tmp21120 = getelementptr inbounds float* %tmp21119, i64 1
- %tmp21121 = getelementptr inbounds float* %tmp21120, i64 1
- %tmp21122 = getelementptr inbounds float* %tmp21121, i64 1
- %tmp21123 = getelementptr inbounds float* %tmp21122, i64 1
- %tmp21124 = getelementptr inbounds float* %tmp21123, i64 1
- %tmp21125 = getelementptr inbounds float* %tmp21124, i64 1
- %tmp21126 = getelementptr inbounds float* %tmp21125, i64 1
- %tmp21127 = getelementptr inbounds float* %tmp21126, i64 1
- %tmp21128 = getelementptr inbounds float* %tmp21127, i64 1
- %tmp21129 = getelementptr inbounds float* %tmp21128, i64 1
- %tmp21130 = getelementptr inbounds float* %tmp21129, i64 1
- %tmp21131 = getelementptr inbounds float* %tmp21130, i64 1
- %tmp21132 = getelementptr inbounds float* %tmp21131, i64 1
- %tmp21133 = getelementptr inbounds float* %tmp21132, i64 1
- %tmp21134 = getelementptr inbounds float* %tmp21133, i64 1
- %tmp21135 = getelementptr inbounds float* %tmp21134, i64 1
- %tmp21136 = getelementptr inbounds float* %tmp21135, i64 1
- %tmp21137 = getelementptr inbounds float* %tmp21136, i64 1
- %tmp21138 = getelementptr inbounds float* %tmp21137, i64 1
- %tmp21139 = getelementptr inbounds float* %tmp21138, i64 1
- %tmp21140 = getelementptr inbounds float* %tmp21139, i64 1
- %tmp21141 = getelementptr inbounds float* %tmp21140, i64 1
- %tmp21142 = getelementptr inbounds float* %tmp21141, i64 1
- %tmp21143 = getelementptr inbounds float* %tmp21142, i64 1
- %tmp21144 = getelementptr inbounds float* %tmp21143, i64 1
- %tmp21145 = getelementptr inbounds float* %tmp21144, i64 1
- %tmp21146 = getelementptr inbounds float* %tmp21145, i64 1
- %tmp21147 = getelementptr inbounds float* %tmp21146, i64 1
- %tmp21148 = getelementptr inbounds float* %tmp21147, i64 1
- %tmp21149 = getelementptr inbounds float* %tmp21148, i64 1
- %tmp21150 = getelementptr inbounds float* %tmp21149, i64 1
- %tmp21151 = getelementptr inbounds float* %tmp21150, i64 1
- %tmp21152 = getelementptr inbounds float* %tmp21151, i64 1
- %tmp21153 = getelementptr inbounds float* %tmp21152, i64 1
- %tmp21154 = getelementptr inbounds float* %tmp21153, i64 1
- %tmp21155 = getelementptr inbounds float* %tmp21154, i64 1
- %tmp21156 = getelementptr inbounds float* %tmp21155, i64 1
- %tmp21157 = getelementptr inbounds float* %tmp21156, i64 1
- %tmp21158 = getelementptr inbounds float* %tmp21157, i64 1
- %tmp21159 = getelementptr inbounds float* %tmp21158, i64 1
- %tmp21160 = getelementptr inbounds float* %tmp21159, i64 1
- %tmp21161 = getelementptr inbounds float* %tmp21160, i64 1
- %tmp21162 = getelementptr inbounds float* %tmp21161, i64 1
- %tmp21163 = getelementptr inbounds float* %tmp21162, i64 1
- %tmp21164 = getelementptr inbounds float* %tmp21163, i64 1
- %tmp21165 = getelementptr inbounds float* %tmp21164, i64 1
- %tmp21166 = getelementptr inbounds float* %tmp21165, i64 1
- %tmp21167 = getelementptr inbounds float* %tmp21166, i64 1
- %tmp21168 = getelementptr inbounds float* %tmp21167, i64 1
- %tmp21169 = getelementptr inbounds float* %tmp21168, i64 1
- %tmp21170 = getelementptr inbounds float* %tmp21169, i64 1
- %tmp21171 = getelementptr inbounds float* %tmp21170, i64 1
- %tmp21172 = getelementptr inbounds float* %tmp21171, i64 1
- %tmp21173 = getelementptr inbounds float* %tmp21172, i64 1
- %tmp21174 = getelementptr inbounds float* %tmp21173, i64 1
- %tmp21175 = getelementptr inbounds float* %tmp21174, i64 1
- %tmp21176 = getelementptr inbounds float* %tmp21175, i64 1
- %tmp21177 = getelementptr inbounds float* %tmp21176, i64 1
- %tmp21178 = getelementptr inbounds float* %tmp21177, i64 1
- %tmp21179 = getelementptr inbounds float* %tmp21178, i64 1
- %tmp21180 = getelementptr inbounds float* %tmp21179, i64 1
- %tmp21181 = getelementptr inbounds float* %tmp21180, i64 1
- %tmp21182 = getelementptr inbounds float* %tmp21181, i64 1
- %tmp21183 = getelementptr inbounds float* %tmp21182, i64 1
- %tmp21184 = getelementptr inbounds float* %tmp21183, i64 1
- %tmp21185 = getelementptr inbounds float* %tmp21184, i64 1
- %tmp21186 = getelementptr inbounds float* %tmp21185, i64 1
- %tmp21187 = getelementptr inbounds float* %tmp21186, i64 1
- %tmp21188 = getelementptr inbounds float* %tmp21187, i64 1
- %tmp21189 = getelementptr inbounds float* %tmp21188, i64 1
- %tmp21190 = getelementptr inbounds float* %tmp21189, i64 1
- %tmp21191 = getelementptr inbounds float* %tmp21190, i64 1
- %tmp21192 = getelementptr inbounds float* %tmp21191, i64 1
- %tmp21193 = getelementptr inbounds float* %tmp21192, i64 1
- %tmp21194 = getelementptr inbounds float* %tmp21193, i64 1
- %tmp21195 = getelementptr inbounds float* %tmp21194, i64 1
- %tmp21196 = getelementptr inbounds float* %tmp21195, i64 1
- %tmp21197 = getelementptr inbounds float* %tmp21196, i64 1
- %tmp21198 = getelementptr inbounds float* %tmp21197, i64 1
- %tmp21199 = getelementptr inbounds float* %tmp21198, i64 1
- %tmp21200 = getelementptr inbounds float* %tmp21199, i64 1
- %tmp21201 = getelementptr inbounds float* %tmp21200, i64 1
- %tmp21202 = getelementptr inbounds float* %tmp21201, i64 1
- %tmp21203 = getelementptr inbounds float* %tmp21202, i64 1
- %tmp21204 = getelementptr inbounds float* %tmp21203, i64 1
- %tmp21205 = getelementptr inbounds float* %tmp21204, i64 1
- %tmp21206 = getelementptr inbounds float* %tmp21205, i64 1
- %tmp21207 = getelementptr inbounds float* %tmp21206, i64 1
- %tmp21208 = getelementptr inbounds float* %tmp21207, i64 1
- %tmp21209 = getelementptr inbounds float* %tmp21208, i64 1
- %tmp21210 = getelementptr inbounds float* %tmp21209, i64 1
- %tmp21211 = getelementptr inbounds float* %tmp21210, i64 1
- %tmp21212 = getelementptr inbounds float* %tmp21211, i64 1
- %tmp21213 = getelementptr inbounds float* %tmp21212, i64 1
- %tmp21214 = getelementptr inbounds float* %tmp21213, i64 1
- %tmp21215 = getelementptr inbounds float* %tmp21214, i64 1
- %tmp21216 = getelementptr inbounds float* %tmp21215, i64 1
- %tmp21217 = getelementptr inbounds float* %tmp21216, i64 1
- %tmp21218 = getelementptr inbounds float* %tmp21217, i64 1
- %tmp21219 = getelementptr inbounds float* %tmp21218, i64 1
- %tmp21220 = getelementptr inbounds float* %tmp21219, i64 1
- %tmp21221 = getelementptr inbounds float* %tmp21220, i64 1
- %tmp21222 = getelementptr inbounds float* %tmp21221, i64 1
- %tmp21223 = getelementptr inbounds float* %tmp21222, i64 1
- %tmp21224 = getelementptr inbounds float* %tmp21223, i64 1
- %tmp21225 = getelementptr inbounds float* %tmp21224, i64 1
- %tmp21226 = getelementptr inbounds float* %tmp21225, i64 1
- %tmp21227 = getelementptr inbounds float* %tmp21226, i64 1
- %tmp21228 = getelementptr inbounds float* %tmp21227, i64 1
- %tmp21229 = getelementptr inbounds float* %tmp21228, i64 1
- %tmp21230 = getelementptr inbounds float* %tmp21229, i64 1
- %tmp21231 = getelementptr inbounds float* %tmp21230, i64 1
- %tmp21232 = getelementptr inbounds float* %tmp21231, i64 1
- %tmp21233 = getelementptr inbounds float* %tmp21232, i64 1
- %tmp21234 = getelementptr inbounds float* %tmp21233, i64 1
- %tmp21235 = getelementptr inbounds float* %tmp21234, i64 1
- %tmp21236 = getelementptr inbounds float* %tmp21235, i64 1
- %tmp21237 = getelementptr inbounds float* %tmp21236, i64 1
- %tmp21238 = getelementptr inbounds float* %tmp21237, i64 1
- %tmp21239 = getelementptr inbounds float* %tmp21238, i64 1
- %tmp21240 = getelementptr inbounds float* %tmp21239, i64 1
- %tmp21241 = getelementptr inbounds float* %tmp21240, i64 1
- %tmp21242 = getelementptr inbounds float* %tmp21241, i64 1
- %tmp21243 = getelementptr inbounds float* %tmp21242, i64 1
- %tmp21244 = getelementptr inbounds float* %tmp21243, i64 1
- %tmp21245 = getelementptr inbounds float* %tmp21244, i64 1
- %tmp21246 = getelementptr inbounds float* %tmp21245, i64 1
- %tmp21247 = getelementptr inbounds float* %tmp21246, i64 1
- %tmp21248 = getelementptr inbounds float* %tmp21247, i64 1
- %tmp21249 = getelementptr inbounds float* %tmp21248, i64 1
- %tmp21250 = getelementptr inbounds float* %tmp21249, i64 1
- %tmp21251 = getelementptr inbounds float* %tmp21250, i64 1
- %tmp21252 = getelementptr inbounds float* %tmp21251, i64 1
- %tmp21253 = getelementptr inbounds float* %tmp21252, i64 1
- %tmp21254 = getelementptr inbounds float* %tmp21253, i64 1
- %tmp21255 = getelementptr inbounds float* %tmp21254, i64 1
- %tmp21256 = getelementptr inbounds float* %tmp21255, i64 1
- %tmp21257 = getelementptr inbounds float* %tmp21256, i64 1
- %tmp21258 = getelementptr inbounds float* %tmp21257, i64 1
- %tmp21259 = getelementptr inbounds float* %tmp21258, i64 1
- %tmp21260 = getelementptr inbounds float* %tmp21259, i64 1
- %tmp21261 = getelementptr inbounds float* %tmp21260, i64 1
- %tmp21262 = getelementptr inbounds float* %tmp21261, i64 1
- %tmp21263 = getelementptr inbounds float* %tmp21262, i64 1
- %tmp21264 = getelementptr inbounds float* %tmp21263, i64 1
- %tmp21265 = getelementptr inbounds float* %tmp21264, i64 1
- %tmp21266 = getelementptr inbounds float* %tmp21265, i64 1
- %tmp21267 = getelementptr inbounds float* %tmp21266, i64 1
- %tmp21268 = getelementptr inbounds float* %tmp21267, i64 1
- %tmp21269 = getelementptr inbounds float* %tmp21268, i64 1
- %tmp21270 = getelementptr inbounds float* %tmp21269, i64 1
- %tmp21271 = getelementptr inbounds float* %tmp21270, i64 1
- %tmp21272 = getelementptr inbounds float* %tmp21271, i64 1
- %tmp21273 = getelementptr inbounds float* %tmp21272, i64 1
- %tmp21274 = getelementptr inbounds float* %tmp21273, i64 1
- %tmp21275 = getelementptr inbounds float* %tmp21274, i64 1
- %tmp21276 = getelementptr inbounds float* %tmp21275, i64 1
- %tmp21277 = getelementptr inbounds float* %tmp21276, i64 1
- %tmp21278 = getelementptr inbounds float* %tmp21277, i64 1
- %tmp21279 = getelementptr inbounds float* %tmp21278, i64 1
- %tmp21280 = getelementptr inbounds float* %tmp21279, i64 1
- %tmp21281 = getelementptr inbounds float* %tmp21280, i64 1
- %tmp21282 = getelementptr inbounds float* %tmp21281, i64 1
- %tmp21283 = getelementptr inbounds float* %tmp21282, i64 1
- %tmp21284 = getelementptr inbounds float* %tmp21283, i64 1
- %tmp21285 = getelementptr inbounds float* %tmp21284, i64 1
- %tmp21286 = getelementptr inbounds float* %tmp21285, i64 1
- %tmp21287 = getelementptr inbounds float* %tmp21286, i64 1
- %tmp21288 = getelementptr inbounds float* %tmp21287, i64 1
- %tmp21289 = getelementptr inbounds float* %tmp21288, i64 1
- %tmp21290 = getelementptr inbounds float* %tmp21289, i64 1
- %tmp21291 = getelementptr inbounds float* %tmp21290, i64 1
- %tmp21292 = getelementptr inbounds float* %tmp21291, i64 1
- %tmp21293 = getelementptr inbounds float* %tmp21292, i64 1
- %tmp21294 = getelementptr inbounds float* %tmp21293, i64 1
- %tmp21295 = getelementptr inbounds float* %tmp21294, i64 1
- %tmp21296 = getelementptr inbounds float* %tmp21295, i64 1
- %tmp21297 = getelementptr inbounds float* %tmp21296, i64 1
- %tmp21298 = getelementptr inbounds float* %tmp21297, i64 1
- %tmp21299 = getelementptr inbounds float* %tmp21298, i64 1
- %tmp21300 = getelementptr inbounds float* %tmp21299, i64 1
- %tmp21301 = getelementptr inbounds float* %tmp21300, i64 1
- %tmp21302 = getelementptr inbounds float* %tmp21301, i64 1
- %tmp21303 = getelementptr inbounds float* %tmp21302, i64 1
- %tmp21304 = getelementptr inbounds float* %tmp21303, i64 1
- %tmp21305 = getelementptr inbounds float* %tmp21304, i64 1
- %tmp21306 = getelementptr inbounds float* %tmp21305, i64 1
- %tmp21307 = getelementptr inbounds float* %tmp21306, i64 1
- %tmp21308 = getelementptr inbounds float* %tmp21307, i64 1
- %tmp21309 = getelementptr inbounds float* %tmp21308, i64 1
- %tmp21310 = getelementptr inbounds float* %tmp21309, i64 1
- %tmp21311 = getelementptr inbounds float* %tmp21310, i64 1
- %tmp21312 = getelementptr inbounds float* %tmp21311, i64 1
- %tmp21313 = getelementptr inbounds float* %tmp21312, i64 1
- %tmp21314 = getelementptr inbounds float* %tmp21313, i64 1
- %tmp21315 = getelementptr inbounds float* %tmp21314, i64 1
- %tmp21316 = getelementptr inbounds float* %tmp21315, i64 1
- %tmp21317 = getelementptr inbounds float* %tmp21316, i64 1
- %tmp21318 = getelementptr inbounds float* %tmp21317, i64 1
- %tmp21319 = getelementptr inbounds float* %tmp21318, i64 1
- %tmp21320 = getelementptr inbounds float* %tmp21319, i64 1
- %tmp21321 = getelementptr inbounds float* %tmp21320, i64 1
- %tmp21322 = getelementptr inbounds float* %tmp21321, i64 1
- %tmp21323 = getelementptr inbounds float* %tmp21322, i64 1
- %tmp21324 = getelementptr inbounds float* %tmp21323, i64 1
- %tmp21325 = getelementptr inbounds float* %tmp21324, i64 1
- %tmp21326 = getelementptr inbounds float* %tmp21325, i64 1
- %tmp21327 = getelementptr inbounds float* %tmp21326, i64 1
- %tmp21328 = getelementptr inbounds float* %tmp21327, i64 1
- %tmp21329 = getelementptr inbounds float* %tmp21328, i64 1
- %tmp21330 = getelementptr inbounds float* %tmp21329, i64 1
- %tmp21331 = getelementptr inbounds float* %tmp21330, i64 1
- %tmp21332 = getelementptr inbounds float* %tmp21331, i64 1
- %tmp21333 = getelementptr inbounds float* %tmp21332, i64 1
- %tmp21334 = getelementptr inbounds float* %tmp21333, i64 1
- %tmp21335 = getelementptr inbounds float* %tmp21334, i64 1
- %tmp21336 = getelementptr inbounds float* %tmp21335, i64 1
- %tmp21337 = getelementptr inbounds float* %tmp21336, i64 1
- %tmp21338 = getelementptr inbounds float* %tmp21337, i64 1
- %tmp21339 = getelementptr inbounds float* %tmp21338, i64 1
- %tmp21340 = getelementptr inbounds float* %tmp21339, i64 1
- %tmp21341 = getelementptr inbounds float* %tmp21340, i64 1
- %tmp21342 = getelementptr inbounds float* %tmp21341, i64 1
- %tmp21343 = getelementptr inbounds float* %tmp21342, i64 1
- %tmp21344 = getelementptr inbounds float* %tmp21343, i64 1
- %tmp21345 = getelementptr inbounds float* %tmp21344, i64 1
- %tmp21346 = getelementptr inbounds float* %tmp21345, i64 1
- %tmp21347 = getelementptr inbounds float* %tmp21346, i64 1
- %tmp21348 = getelementptr inbounds float* %tmp21347, i64 1
- %tmp21349 = getelementptr inbounds float* %tmp21348, i64 1
- %tmp21350 = getelementptr inbounds float* %tmp21349, i64 1
- %tmp21351 = getelementptr inbounds float* %tmp21350, i64 1
- %tmp21352 = getelementptr inbounds float* %tmp21351, i64 1
- %tmp21353 = getelementptr inbounds float* %tmp21352, i64 1
- %tmp21354 = getelementptr inbounds float* %tmp21353, i64 1
- %tmp21355 = getelementptr inbounds float* %tmp21354, i64 1
- %tmp21356 = getelementptr inbounds float* %tmp21355, i64 1
- %tmp21357 = getelementptr inbounds float* %tmp21356, i64 1
- %tmp21358 = getelementptr inbounds float* %tmp21357, i64 1
- %tmp21359 = getelementptr inbounds float* %tmp21358, i64 1
- %tmp21360 = getelementptr inbounds float* %tmp21359, i64 1
- %tmp21361 = getelementptr inbounds float* %tmp21360, i64 1
- %tmp21362 = getelementptr inbounds float* %tmp21361, i64 1
- %tmp21363 = getelementptr inbounds float* %tmp21362, i64 1
- %tmp21364 = getelementptr inbounds float* %tmp21363, i64 1
- %tmp21365 = getelementptr inbounds float* %tmp21364, i64 1
- %tmp21366 = getelementptr inbounds float* %tmp21365, i64 1
- %tmp21367 = getelementptr inbounds float* %tmp21366, i64 1
- %tmp21368 = getelementptr inbounds float* %tmp21367, i64 1
- %tmp21369 = getelementptr inbounds float* %tmp21368, i64 1
- %tmp21370 = getelementptr inbounds float* %tmp21369, i64 1
- %tmp21371 = getelementptr inbounds float* %tmp21370, i64 1
- %tmp21372 = getelementptr inbounds float* %tmp21371, i64 1
- %tmp21373 = getelementptr inbounds float* %tmp21372, i64 1
- %tmp21374 = getelementptr inbounds float* %tmp21373, i64 1
- %tmp21375 = getelementptr inbounds float* %tmp21374, i64 1
- %tmp21376 = getelementptr inbounds float* %tmp21375, i64 1
- %tmp21377 = getelementptr inbounds float* %tmp21376, i64 1
- %tmp21378 = getelementptr inbounds float* %tmp21377, i64 1
- %tmp21379 = getelementptr inbounds float* %tmp21378, i64 1
- %tmp21380 = getelementptr inbounds float* %tmp21379, i64 1
- %tmp21381 = getelementptr inbounds float* %tmp21380, i64 1
- %tmp21382 = getelementptr inbounds float* %tmp21381, i64 1
- %tmp21383 = getelementptr inbounds float* %tmp21382, i64 1
- %tmp21384 = getelementptr inbounds float* %tmp21383, i64 1
- %tmp21385 = getelementptr inbounds float* %tmp21384, i64 1
- %tmp21386 = getelementptr inbounds float* %tmp21385, i64 1
- %tmp21387 = getelementptr inbounds float* %tmp21386, i64 1
- %tmp21388 = getelementptr inbounds float* %tmp21387, i64 1
- %tmp21389 = getelementptr inbounds float* %tmp21388, i64 1
- %tmp21390 = getelementptr inbounds float* %tmp21389, i64 1
- %tmp21391 = getelementptr inbounds float* %tmp21390, i64 1
- %tmp21392 = getelementptr inbounds float* %tmp21391, i64 1
- %tmp21393 = getelementptr inbounds float* %tmp21392, i64 1
- %tmp21394 = getelementptr inbounds float* %tmp21393, i64 1
- %tmp21395 = getelementptr inbounds float* %tmp21394, i64 1
- %tmp21396 = getelementptr inbounds float* %tmp21395, i64 1
- %tmp21397 = getelementptr inbounds float* %tmp21396, i64 1
- %tmp21398 = getelementptr inbounds float* %tmp21397, i64 1
- %tmp21399 = getelementptr inbounds float* %tmp21398, i64 1
- %tmp21400 = getelementptr inbounds float* %tmp21399, i64 1
- %tmp21401 = getelementptr inbounds float* %tmp21400, i64 1
- %tmp21402 = getelementptr inbounds float* %tmp21401, i64 1
- %tmp21403 = getelementptr inbounds float* %tmp21402, i64 1
- %tmp21404 = getelementptr inbounds float* %tmp21403, i64 1
- %tmp21405 = getelementptr inbounds float* %tmp21404, i64 1
- %tmp21406 = getelementptr inbounds float* %tmp21405, i64 1
- %tmp21407 = getelementptr inbounds float* %tmp21406, i64 1
- %tmp21408 = getelementptr inbounds float* %tmp21407, i64 1
- %tmp21409 = getelementptr inbounds float* %tmp21408, i64 1
- %tmp21410 = getelementptr inbounds float* %tmp21409, i64 1
- %tmp21411 = getelementptr inbounds float* %tmp21410, i64 1
- %tmp21412 = getelementptr inbounds float* %tmp21411, i64 1
- %tmp21413 = getelementptr inbounds float* %tmp21412, i64 1
- %tmp21414 = getelementptr inbounds float* %tmp21413, i64 1
- %tmp21415 = getelementptr inbounds float* %tmp21414, i64 1
- %tmp21416 = getelementptr inbounds float* %tmp21415, i64 1
- %tmp21417 = getelementptr inbounds float* %tmp21416, i64 1
- %tmp21418 = getelementptr inbounds float* %tmp21417, i64 1
- %tmp21419 = getelementptr inbounds float* %tmp21418, i64 1
- %tmp21420 = getelementptr inbounds float* %tmp21419, i64 1
- %tmp21421 = getelementptr inbounds float* %tmp21420, i64 1
- %tmp21422 = getelementptr inbounds float* %tmp21421, i64 1
- %tmp21423 = getelementptr inbounds float* %tmp21422, i64 1
- %tmp21424 = getelementptr inbounds float* %tmp21423, i64 1
- %tmp21425 = getelementptr inbounds float* %tmp21424, i64 1
- %tmp21426 = getelementptr inbounds float* %tmp21425, i64 1
- %tmp21427 = getelementptr inbounds float* %tmp21426, i64 1
- %tmp21428 = getelementptr inbounds float* %tmp21427, i64 1
- %tmp21429 = getelementptr inbounds float* %tmp21428, i64 1
- %tmp21430 = getelementptr inbounds float* %tmp21429, i64 1
- %tmp21431 = getelementptr inbounds float* %tmp21430, i64 1
- %tmp21432 = getelementptr inbounds float* %tmp21431, i64 1
- %tmp21433 = getelementptr inbounds float* %tmp21432, i64 1
- %tmp21434 = getelementptr inbounds float* %tmp21433, i64 1
- %tmp21435 = getelementptr inbounds float* %tmp21434, i64 1
- %tmp21436 = getelementptr inbounds float* %tmp21435, i64 1
- %tmp21437 = getelementptr inbounds float* %tmp21436, i64 1
- %tmp21438 = getelementptr inbounds float* %tmp21437, i64 1
- %tmp21439 = getelementptr inbounds float* %tmp21438, i64 1
- %tmp21440 = getelementptr inbounds float* %tmp21439, i64 1
- %tmp21441 = getelementptr inbounds float* %tmp21440, i64 1
- %tmp21442 = getelementptr inbounds float* %tmp21441, i64 1
- %tmp21443 = getelementptr inbounds float* %tmp21442, i64 1
- %tmp21444 = getelementptr inbounds float* %tmp21443, i64 1
- %tmp21445 = getelementptr inbounds float* %tmp21444, i64 1
- %tmp21446 = getelementptr inbounds float* %tmp21445, i64 1
- %tmp21447 = getelementptr inbounds float* %tmp21446, i64 1
- %tmp21448 = getelementptr inbounds float* %tmp21447, i64 1
- %tmp21449 = getelementptr inbounds float* %tmp21448, i64 1
- %tmp21450 = getelementptr inbounds float* %tmp21449, i64 1
- %tmp21451 = getelementptr inbounds float* %tmp21450, i64 1
- %tmp21452 = getelementptr inbounds float* %tmp21451, i64 1
- %tmp21453 = getelementptr inbounds float* %tmp21452, i64 1
- %tmp21454 = getelementptr inbounds float* %tmp21453, i64 1
- %tmp21455 = getelementptr inbounds float* %tmp21454, i64 1
- %tmp21456 = getelementptr inbounds float* %tmp21455, i64 1
- %tmp21457 = getelementptr inbounds float* %tmp21456, i64 1
- %tmp21458 = getelementptr inbounds float* %tmp21457, i64 1
- %tmp21459 = getelementptr inbounds float* %tmp21458, i64 1
- %tmp21460 = getelementptr inbounds float* %tmp21459, i64 1
- %tmp21461 = getelementptr inbounds float* %tmp21460, i64 1
- %tmp21462 = getelementptr inbounds float* %tmp21461, i64 1
- %tmp21463 = getelementptr inbounds float* %tmp21462, i64 1
- %tmp21464 = getelementptr inbounds float* %tmp21463, i64 1
- %tmp21465 = getelementptr inbounds float* %tmp21464, i64 1
- %tmp21466 = getelementptr inbounds float* %tmp21465, i64 1
- %tmp21467 = getelementptr inbounds float* %tmp21466, i64 1
- %tmp21468 = getelementptr inbounds float* %tmp21467, i64 1
- %tmp21469 = getelementptr inbounds float* %tmp21468, i64 1
- %tmp21470 = getelementptr inbounds float* %tmp21469, i64 1
- %tmp21471 = getelementptr inbounds float* %tmp21470, i64 1
- %tmp21472 = getelementptr inbounds float* %tmp21471, i64 1
- %tmp21473 = getelementptr inbounds float* %tmp21472, i64 1
- %tmp21474 = getelementptr inbounds float* %tmp21473, i64 1
- %tmp21475 = getelementptr inbounds float* %tmp21474, i64 1
- %tmp21476 = getelementptr inbounds float* %tmp21475, i64 1
- %tmp21477 = getelementptr inbounds float* %tmp21476, i64 1
- %tmp21478 = getelementptr inbounds float* %tmp21477, i64 1
- %tmp21479 = getelementptr inbounds float* %tmp21478, i64 1
- %tmp21480 = getelementptr inbounds float* %tmp21479, i64 1
- %tmp21481 = getelementptr inbounds float* %tmp21480, i64 1
- %tmp21482 = getelementptr inbounds float* %tmp21481, i64 1
- %tmp21483 = getelementptr inbounds float* %tmp21482, i64 1
- %tmp21484 = getelementptr inbounds float* %tmp21483, i64 1
- %tmp21485 = getelementptr inbounds float* %tmp21484, i64 1
- %tmp21486 = getelementptr inbounds float* %tmp21485, i64 1
- %tmp21487 = getelementptr inbounds float* %tmp21486, i64 1
- %tmp21488 = getelementptr inbounds float* %tmp21487, i64 1
- %tmp21489 = getelementptr inbounds float* %tmp21488, i64 1
- %tmp21490 = getelementptr inbounds float* %tmp21489, i64 1
- %tmp21491 = getelementptr inbounds float* %tmp21490, i64 1
- %tmp21492 = getelementptr inbounds float* %tmp21491, i64 1
- %tmp21493 = getelementptr inbounds float* %tmp21492, i64 1
- %tmp21494 = getelementptr inbounds float* %tmp21493, i64 1
- %tmp21495 = getelementptr inbounds float* %tmp21494, i64 1
- %tmp21496 = getelementptr inbounds float* %tmp21495, i64 1
- %tmp21497 = getelementptr inbounds float* %tmp21496, i64 1
- %tmp21498 = getelementptr inbounds float* %tmp21497, i64 1
- %tmp21499 = getelementptr inbounds float* %tmp21498, i64 1
- %tmp21500 = getelementptr inbounds float* %tmp21499, i64 1
- %tmp21501 = getelementptr inbounds float* %tmp21500, i64 1
- %tmp21502 = getelementptr inbounds float* %tmp21501, i64 1
- %tmp21503 = getelementptr inbounds float* %tmp21502, i64 1
- %tmp21504 = getelementptr inbounds float* %tmp21503, i64 1
- %tmp21505 = getelementptr inbounds float* %tmp21504, i64 1
- %tmp21506 = getelementptr inbounds float* %tmp21505, i64 1
- %tmp21507 = getelementptr inbounds float* %tmp21506, i64 1
- %tmp21508 = getelementptr inbounds float* %tmp21507, i64 1
- %tmp21509 = getelementptr inbounds float* %tmp21508, i64 1
- %tmp21510 = getelementptr inbounds float* %tmp21509, i64 1
- %tmp21511 = getelementptr inbounds float* %tmp21510, i64 1
- %tmp21512 = getelementptr inbounds float* %tmp21511, i64 1
- %tmp21513 = getelementptr inbounds float* %tmp21512, i64 1
- %tmp21514 = getelementptr inbounds float* %tmp21513, i64 1
- %tmp21515 = getelementptr inbounds float* %tmp21514, i64 1
- %tmp21516 = getelementptr inbounds float* %tmp21515, i64 1
- %tmp21517 = getelementptr inbounds float* %tmp21516, i64 1
- %tmp21518 = getelementptr inbounds float* %tmp21517, i64 1
- %tmp21519 = getelementptr inbounds float* %tmp21518, i64 1
- %tmp21520 = getelementptr inbounds float* %tmp21519, i64 1
- %tmp21521 = getelementptr inbounds float* %tmp21520, i64 1
- %tmp21522 = getelementptr inbounds float* %tmp21521, i64 1
- %tmp21523 = getelementptr inbounds float* %tmp21522, i64 1
- %tmp21524 = getelementptr inbounds float* %tmp21523, i64 1
- %tmp21525 = getelementptr inbounds float* %tmp21524, i64 1
- %tmp21526 = getelementptr inbounds float* %tmp21525, i64 1
- %tmp21527 = getelementptr inbounds float* %tmp21526, i64 1
- %tmp21528 = getelementptr inbounds float* %tmp21527, i64 1
- %tmp21529 = getelementptr inbounds float* %tmp21528, i64 1
- %tmp21530 = getelementptr inbounds float* %tmp21529, i64 1
- %tmp21531 = getelementptr inbounds float* %tmp21530, i64 1
- %tmp21532 = getelementptr inbounds float* %tmp21531, i64 1
- %tmp21533 = getelementptr inbounds float* %tmp21532, i64 1
- %tmp21534 = getelementptr inbounds float* %tmp21533, i64 1
- %tmp21535 = getelementptr inbounds float* %tmp21534, i64 1
- %tmp21536 = getelementptr inbounds float* %tmp21535, i64 1
- %tmp21537 = getelementptr inbounds float* %tmp21536, i64 1
- %tmp21538 = getelementptr inbounds float* %tmp21537, i64 1
- %tmp21539 = getelementptr inbounds float* %tmp21538, i64 1
- %tmp21540 = getelementptr inbounds float* %tmp21539, i64 1
- %tmp21541 = getelementptr inbounds float* %tmp21540, i64 1
- %tmp21542 = getelementptr inbounds float* %tmp21541, i64 1
- %tmp21543 = getelementptr inbounds float* %tmp21542, i64 1
- %tmp21544 = getelementptr inbounds float* %tmp21543, i64 1
- %tmp21545 = getelementptr inbounds float* %tmp21544, i64 1
- %tmp21546 = getelementptr inbounds float* %tmp21545, i64 1
- %tmp21547 = getelementptr inbounds float* %tmp21546, i64 1
- %tmp21548 = getelementptr inbounds float* %tmp21547, i64 1
- %tmp21549 = getelementptr inbounds float* %tmp21548, i64 1
- %tmp21550 = getelementptr inbounds float* %tmp21549, i64 1
- %tmp21551 = getelementptr inbounds float* %tmp21550, i64 1
- %tmp21552 = getelementptr inbounds float* %tmp21551, i64 1
- %tmp21553 = getelementptr inbounds float* %tmp21552, i64 1
- %tmp21554 = getelementptr inbounds float* %tmp21553, i64 1
- %tmp21555 = getelementptr inbounds float* %tmp21554, i64 1
- %tmp21556 = getelementptr inbounds float* %tmp21555, i64 1
- %tmp21557 = getelementptr inbounds float* %tmp21556, i64 1
- %tmp21558 = getelementptr inbounds float* %tmp21557, i64 1
- %tmp21559 = getelementptr inbounds float* %tmp21558, i64 1
- %tmp21560 = getelementptr inbounds float* %tmp21559, i64 1
- %tmp21561 = getelementptr inbounds float* %tmp21560, i64 1
- %tmp21562 = getelementptr inbounds float* %tmp21561, i64 1
- %tmp21563 = getelementptr inbounds float* %tmp21562, i64 1
- %tmp21564 = getelementptr inbounds float* %tmp21563, i64 1
- %tmp21565 = getelementptr inbounds float* %tmp21564, i64 1
- %tmp21566 = getelementptr inbounds float* %tmp21565, i64 1
- %tmp21567 = getelementptr inbounds float* %tmp21566, i64 1
- %tmp21568 = getelementptr inbounds float* %tmp21567, i64 1
- %tmp21569 = getelementptr inbounds float* %tmp21568, i64 1
- %tmp21570 = getelementptr inbounds float* %tmp21569, i64 1
- %tmp21571 = getelementptr inbounds float* %tmp21570, i64 1
- %tmp21572 = getelementptr inbounds float* %tmp21571, i64 1
- %tmp21573 = getelementptr inbounds float* %tmp21572, i64 1
- %tmp21574 = getelementptr inbounds float* %tmp21573, i64 1
- %tmp21575 = getelementptr inbounds float* %tmp21574, i64 1
- %tmp21576 = getelementptr inbounds float* %tmp21575, i64 1
- %tmp21577 = getelementptr inbounds float* %tmp21576, i64 1
- %tmp21578 = getelementptr inbounds float* %tmp21577, i64 1
- %tmp21579 = getelementptr inbounds float* %tmp21578, i64 1
- %tmp21580 = getelementptr inbounds float* %tmp21579, i64 1
- %tmp21581 = getelementptr inbounds float* %tmp21580, i64 1
- %tmp21582 = getelementptr inbounds float* %tmp21581, i64 1
- %tmp21583 = getelementptr inbounds float* %tmp21582, i64 1
- %tmp21584 = getelementptr inbounds float* %tmp21583, i64 1
- %tmp21585 = getelementptr inbounds float* %tmp21584, i64 1
- %tmp21586 = getelementptr inbounds float* %tmp21585, i64 1
- %tmp21587 = getelementptr inbounds float* %tmp21586, i64 1
- %tmp21588 = getelementptr inbounds float* %tmp21587, i64 1
- %tmp21589 = getelementptr inbounds float* %tmp21588, i64 1
- %tmp21590 = getelementptr inbounds float* %tmp21589, i64 1
- %tmp21591 = getelementptr inbounds float* %tmp21590, i64 1
- %tmp21592 = getelementptr inbounds float* %tmp21591, i64 1
- %tmp21593 = getelementptr inbounds float* %tmp21592, i64 1
- %tmp21594 = getelementptr inbounds float* %tmp21593, i64 1
- %tmp21595 = getelementptr inbounds float* %tmp21594, i64 1
- %tmp21596 = getelementptr inbounds float* %tmp21595, i64 1
- %tmp21597 = getelementptr inbounds float* %tmp21596, i64 1
- %tmp21598 = getelementptr inbounds float* %tmp21597, i64 1
- %tmp21599 = getelementptr inbounds float* %tmp21598, i64 1
- %tmp21600 = getelementptr inbounds float* %tmp21599, i64 1
- %tmp21601 = getelementptr inbounds float* %tmp21600, i64 1
- %tmp21602 = getelementptr inbounds float* %tmp21601, i64 1
- %tmp21603 = getelementptr inbounds float* %tmp21602, i64 1
- %tmp21604 = getelementptr inbounds float* %tmp21603, i64 1
- %tmp21605 = getelementptr inbounds float* %tmp21604, i64 1
- %tmp21606 = getelementptr inbounds float* %tmp21605, i64 1
- %tmp21607 = getelementptr inbounds float* %tmp21606, i64 1
- %tmp21608 = getelementptr inbounds float* %tmp21607, i64 1
- %tmp21609 = getelementptr inbounds float* %tmp21608, i64 1
- %tmp21610 = getelementptr inbounds float* %tmp21609, i64 1
- %tmp21611 = getelementptr inbounds float* %tmp21610, i64 1
- %tmp21612 = getelementptr inbounds float* %tmp21611, i64 1
- %tmp21613 = getelementptr inbounds float* %tmp21612, i64 1
- %tmp21614 = getelementptr inbounds float* %tmp21613, i64 1
- %tmp21615 = getelementptr inbounds float* %tmp21614, i64 1
- %tmp21616 = getelementptr inbounds float* %tmp21615, i64 1
- %tmp21617 = getelementptr inbounds float* %tmp21616, i64 1
- %tmp21618 = getelementptr inbounds float* %tmp21617, i64 1
- %tmp21619 = getelementptr inbounds float* %tmp21618, i64 1
- %tmp21620 = getelementptr inbounds float* %tmp21619, i64 1
- %tmp21621 = getelementptr inbounds float* %tmp21620, i64 1
- %tmp21622 = getelementptr inbounds float* %tmp21621, i64 1
- %tmp21623 = getelementptr inbounds float* %tmp21622, i64 1
- %tmp21624 = getelementptr inbounds float* %tmp21623, i64 1
- %tmp21625 = getelementptr inbounds float* %tmp21624, i64 1
- %tmp21626 = getelementptr inbounds float* %tmp21625, i64 1
- %tmp21627 = getelementptr inbounds float* %tmp21626, i64 1
- %tmp21628 = getelementptr inbounds float* %tmp21627, i64 1
- %tmp21629 = getelementptr inbounds float* %tmp21628, i64 1
- %tmp21630 = getelementptr inbounds float* %tmp21629, i64 1
- %tmp21631 = getelementptr inbounds float* %tmp21630, i64 1
- %tmp21632 = getelementptr inbounds float* %tmp21631, i64 1
- %tmp21633 = getelementptr inbounds float* %tmp21632, i64 1
- %tmp21634 = getelementptr inbounds float* %tmp21633, i64 1
- %tmp21635 = getelementptr inbounds float* %tmp21634, i64 1
- %tmp21636 = getelementptr inbounds float* %tmp21635, i64 1
- %tmp21637 = getelementptr inbounds float* %tmp21636, i64 1
- %tmp21638 = getelementptr inbounds float* %tmp21637, i64 1
- %tmp21639 = getelementptr inbounds float* %tmp21638, i64 1
- %tmp21640 = getelementptr inbounds float* %tmp21639, i64 1
- %tmp21641 = getelementptr inbounds float* %tmp21640, i64 1
- %tmp21642 = getelementptr inbounds float* %tmp21641, i64 1
- %tmp21643 = getelementptr inbounds float* %tmp21642, i64 1
- %tmp21644 = getelementptr inbounds float* %tmp21643, i64 1
- %tmp21645 = getelementptr inbounds float* %tmp21644, i64 1
- %tmp21646 = getelementptr inbounds float* %tmp21645, i64 1
- %tmp21647 = getelementptr inbounds float* %tmp21646, i64 1
- %tmp21648 = getelementptr inbounds float* %tmp21647, i64 1
- %tmp21649 = getelementptr inbounds float* %tmp21648, i64 1
- %tmp21650 = getelementptr inbounds float* %tmp21649, i64 1
- %tmp21651 = getelementptr inbounds float* %tmp21650, i64 1
- %tmp21652 = getelementptr inbounds float* %tmp21651, i64 1
- %tmp21653 = getelementptr inbounds float* %tmp21652, i64 1
- %tmp21654 = getelementptr inbounds float* %tmp21653, i64 1
- %tmp21655 = getelementptr inbounds float* %tmp21654, i64 1
- %tmp21656 = getelementptr inbounds float* %tmp21655, i64 1
- %tmp21657 = getelementptr inbounds float* %tmp21656, i64 1
- %tmp21658 = getelementptr inbounds float* %tmp21657, i64 1
- %tmp21659 = getelementptr inbounds float* %tmp21658, i64 1
- %tmp21660 = getelementptr inbounds float* %tmp21659, i64 1
- %tmp21661 = getelementptr inbounds float* %tmp21660, i64 1
- %tmp21662 = getelementptr inbounds float* %tmp21661, i64 1
- %tmp21663 = getelementptr inbounds float* %tmp21662, i64 1
- %tmp21664 = getelementptr inbounds float* %tmp21663, i64 1
- %tmp21665 = getelementptr inbounds float* %tmp21664, i64 1
- %tmp21666 = getelementptr inbounds float* %tmp21665, i64 1
- %tmp21667 = getelementptr inbounds float* %tmp21666, i64 1
- %tmp21668 = getelementptr inbounds float* %tmp21667, i64 1
- %tmp21669 = getelementptr inbounds float* %tmp21668, i64 1
- %tmp21670 = getelementptr inbounds float* %tmp21669, i64 1
- %tmp21671 = getelementptr inbounds float* %tmp21670, i64 1
- %tmp21672 = getelementptr inbounds float* %tmp21671, i64 1
- %tmp21673 = getelementptr inbounds float* %tmp21672, i64 1
- %tmp21674 = getelementptr inbounds float* %tmp21673, i64 1
- %tmp21675 = getelementptr inbounds float* %tmp21674, i64 1
- %tmp21676 = getelementptr inbounds float* %tmp21675, i64 1
- %tmp21677 = getelementptr inbounds float* %tmp21676, i64 1
- %tmp21678 = getelementptr inbounds float* %tmp21677, i64 1
- %tmp21679 = getelementptr inbounds float* %tmp21678, i64 1
- %tmp21680 = getelementptr inbounds float* %tmp21679, i64 1
- %tmp21681 = getelementptr inbounds float* %tmp21680, i64 1
- %tmp21682 = getelementptr inbounds float* %tmp21681, i64 1
- %tmp21683 = getelementptr inbounds float* %tmp21682, i64 1
- %tmp21684 = getelementptr inbounds float* %tmp21683, i64 1
- %tmp21685 = getelementptr inbounds float* %tmp21684, i64 1
- %tmp21686 = getelementptr inbounds float* %tmp21685, i64 1
- %tmp21687 = getelementptr inbounds float* %tmp21686, i64 1
- %tmp21688 = getelementptr inbounds float* %tmp21687, i64 1
- %tmp21689 = getelementptr inbounds float* %tmp21688, i64 1
- %tmp21690 = getelementptr inbounds float* %tmp21689, i64 1
- %tmp21691 = getelementptr inbounds float* %tmp21690, i64 1
- %tmp21692 = getelementptr inbounds float* %tmp21691, i64 1
- %tmp21693 = getelementptr inbounds float* %tmp21692, i64 1
- %tmp21694 = getelementptr inbounds float* %tmp21693, i64 1
- %tmp21695 = getelementptr inbounds float* %tmp21694, i64 1
- %tmp21696 = getelementptr inbounds float* %tmp21695, i64 1
- %tmp21697 = getelementptr inbounds float* %tmp21696, i64 1
- %tmp21698 = getelementptr inbounds float* %tmp21697, i64 1
- %tmp21699 = getelementptr inbounds float* %tmp21698, i64 1
- %tmp21700 = getelementptr inbounds float* %tmp21699, i64 1
- %tmp21701 = getelementptr inbounds float* %tmp21700, i64 1
- %tmp21702 = getelementptr inbounds float* %tmp21701, i64 1
- %tmp21703 = getelementptr inbounds float* %tmp21702, i64 1
- %tmp21704 = getelementptr inbounds float* %tmp21703, i64 1
- %tmp21705 = getelementptr inbounds float* %tmp21704, i64 1
- %tmp21706 = getelementptr inbounds float* %tmp21705, i64 1
- %tmp21707 = getelementptr inbounds float* %tmp21706, i64 1
- %tmp21708 = getelementptr inbounds float* %tmp21707, i64 1
- %tmp21709 = getelementptr inbounds float* %tmp21708, i64 1
- %tmp21710 = getelementptr inbounds float* %tmp21709, i64 1
- %tmp21711 = getelementptr inbounds float* %tmp21710, i64 1
- %tmp21712 = getelementptr inbounds float* %tmp21711, i64 1
- %tmp21713 = getelementptr inbounds float* %tmp21712, i64 1
- %tmp21714 = getelementptr inbounds float* %tmp21713, i64 1
- %tmp21715 = getelementptr inbounds float* %tmp21714, i64 1
- %tmp21716 = getelementptr inbounds float* %tmp21715, i64 1
- %tmp21717 = getelementptr inbounds float* %tmp21716, i64 1
- %tmp21718 = getelementptr inbounds float* %tmp21717, i64 1
- %tmp21719 = getelementptr inbounds float* %tmp21718, i64 1
- %tmp21720 = getelementptr inbounds float* %tmp21719, i64 1
- %tmp21721 = getelementptr inbounds float* %tmp21720, i64 1
- %tmp21722 = getelementptr inbounds float* %tmp21721, i64 1
- %tmp21723 = getelementptr inbounds float* %tmp21722, i64 1
- %tmp21724 = getelementptr inbounds float* %tmp21723, i64 1
- %tmp21725 = getelementptr inbounds float* %tmp21724, i64 1
- %tmp21726 = getelementptr inbounds float* %tmp21725, i64 1
- %tmp21727 = getelementptr inbounds float* %tmp21726, i64 1
- %tmp21728 = getelementptr inbounds float* %tmp21727, i64 1
- %tmp21729 = getelementptr inbounds float* %tmp21728, i64 1
- %tmp21730 = getelementptr inbounds float* %tmp21729, i64 1
- %tmp21731 = getelementptr inbounds float* %tmp21730, i64 1
- %tmp21732 = getelementptr inbounds float* %tmp21731, i64 1
- %tmp21733 = getelementptr inbounds float* %tmp21732, i64 1
- %tmp21734 = getelementptr inbounds float* %tmp21733, i64 1
- %tmp21735 = getelementptr inbounds float* %tmp21734, i64 1
- %tmp21736 = getelementptr inbounds float* %tmp21735, i64 1
- %tmp21737 = getelementptr inbounds float* %tmp21736, i64 1
- %tmp21738 = getelementptr inbounds float* %tmp21737, i64 1
- %tmp21739 = getelementptr inbounds float* %tmp21738, i64 1
- %tmp21740 = getelementptr inbounds float* %tmp21739, i64 1
- %tmp21741 = getelementptr inbounds float* %tmp21740, i64 1
- %tmp21742 = getelementptr inbounds float* %tmp21741, i64 1
- %tmp21743 = getelementptr inbounds float* %tmp21742, i64 1
- %tmp21744 = getelementptr inbounds float* %tmp21743, i64 1
- %tmp21745 = getelementptr inbounds float* %tmp21744, i64 1
- %tmp21746 = getelementptr inbounds float* %tmp21745, i64 1
- %tmp21747 = getelementptr inbounds float* %tmp21746, i64 1
- %tmp21748 = getelementptr inbounds float* %tmp21747, i64 1
- %tmp21749 = getelementptr inbounds float* %tmp21748, i64 1
- %tmp21750 = getelementptr inbounds float* %tmp21749, i64 1
- %tmp21751 = getelementptr inbounds float* %tmp21750, i64 1
- %tmp21752 = getelementptr inbounds float* %tmp21751, i64 1
- %tmp21753 = getelementptr inbounds float* %tmp21752, i64 1
- %tmp21754 = getelementptr inbounds float* %tmp21753, i64 1
- %tmp21755 = getelementptr inbounds float* %tmp21754, i64 1
- %tmp21756 = getelementptr inbounds float* %tmp21755, i64 1
- %tmp21757 = getelementptr inbounds float* %tmp21756, i64 1
- %tmp21758 = getelementptr inbounds float* %tmp21757, i64 1
- %tmp21759 = getelementptr inbounds float* %tmp21758, i64 1
- %tmp21760 = getelementptr inbounds float* %tmp21759, i64 1
- %tmp21761 = getelementptr inbounds float* %tmp21760, i64 1
- %tmp21762 = getelementptr inbounds float* %tmp21761, i64 1
- %tmp21763 = getelementptr inbounds float* %tmp21762, i64 1
- %tmp21764 = getelementptr inbounds float* %tmp21763, i64 1
- %tmp21765 = getelementptr inbounds float* %tmp21764, i64 1
- %tmp21766 = getelementptr inbounds float* %tmp21765, i64 1
- %tmp21767 = getelementptr inbounds float* %tmp21766, i64 1
- %tmp21768 = getelementptr inbounds float* %tmp21767, i64 1
- %tmp21769 = getelementptr inbounds float* %tmp21768, i64 1
- %tmp21770 = getelementptr inbounds float* %tmp21769, i64 1
- %tmp21771 = getelementptr inbounds float* %tmp21770, i64 1
- %tmp21772 = getelementptr inbounds float* %tmp21771, i64 1
- %tmp21773 = getelementptr inbounds float* %tmp21772, i64 1
- %tmp21774 = getelementptr inbounds float* %tmp21773, i64 1
- %tmp21775 = getelementptr inbounds float* %tmp21774, i64 1
- %tmp21776 = getelementptr inbounds float* %tmp21775, i64 1
- %tmp21777 = getelementptr inbounds float* %tmp21776, i64 1
- %tmp21778 = getelementptr inbounds float* %tmp21777, i64 1
- %tmp21779 = getelementptr inbounds float* %tmp21778, i64 1
- %tmp21780 = getelementptr inbounds float* %tmp21779, i64 1
- %tmp21781 = getelementptr inbounds float* %tmp21780, i64 1
- %tmp21782 = getelementptr inbounds float* %tmp21781, i64 1
- %tmp21783 = getelementptr inbounds float* %tmp21782, i64 1
- %tmp21784 = getelementptr inbounds float* %tmp21783, i64 1
- %tmp21785 = getelementptr inbounds float* %tmp21784, i64 1
- %tmp21786 = getelementptr inbounds float* %tmp21785, i64 1
- %tmp21787 = getelementptr inbounds float* %tmp21786, i64 1
- %tmp21788 = getelementptr inbounds float* %tmp21787, i64 1
- %tmp21789 = getelementptr inbounds float* %tmp21788, i64 1
- %tmp21790 = getelementptr inbounds float* %tmp21789, i64 1
- %tmp21791 = getelementptr inbounds float* %tmp21790, i64 1
- %tmp21792 = getelementptr inbounds float* %tmp21791, i64 1
- %tmp21793 = getelementptr inbounds float* %tmp21792, i64 1
- %tmp21794 = getelementptr inbounds float* %tmp21793, i64 1
- %tmp21795 = getelementptr inbounds float* %tmp21794, i64 1
- %tmp21796 = getelementptr inbounds float* %tmp21795, i64 1
- %tmp21797 = getelementptr inbounds float* %tmp21796, i64 1
- %tmp21798 = getelementptr inbounds float* %tmp21797, i64 1
- %tmp21799 = getelementptr inbounds float* %tmp21798, i64 1
- %tmp21800 = getelementptr inbounds float* %tmp21799, i64 1
- %tmp21801 = getelementptr inbounds float* %tmp21800, i64 1
- %tmp21802 = getelementptr inbounds float* %tmp21801, i64 1
- %tmp21803 = getelementptr inbounds float* %tmp21802, i64 1
- %tmp21804 = getelementptr inbounds float* %tmp21803, i64 1
- %tmp21805 = getelementptr inbounds float* %tmp21804, i64 1
- %tmp21806 = getelementptr inbounds float* %tmp21805, i64 1
- %tmp21807 = getelementptr inbounds float* %tmp21806, i64 1
- %tmp21808 = getelementptr inbounds float* %tmp21807, i64 1
- %tmp21809 = getelementptr inbounds float* %tmp21808, i64 1
- %tmp21810 = getelementptr inbounds float* %tmp21809, i64 1
- %tmp21811 = getelementptr inbounds float* %tmp21810, i64 1
- %tmp21812 = getelementptr inbounds float* %tmp21811, i64 1
- %tmp21813 = getelementptr inbounds float* %tmp21812, i64 1
- %tmp21814 = getelementptr inbounds float* %tmp21813, i64 1
- %tmp21815 = getelementptr inbounds float* %tmp21814, i64 1
- %tmp21816 = getelementptr inbounds float* %tmp21815, i64 1
- %tmp21817 = getelementptr inbounds float* %tmp21816, i64 1
- %tmp21818 = getelementptr inbounds float* %tmp21817, i64 1
- %tmp21819 = getelementptr inbounds float* %tmp21818, i64 1
- %tmp21820 = getelementptr inbounds float* %tmp21819, i64 1
- %tmp21821 = getelementptr inbounds float* %tmp21820, i64 1
- %tmp21822 = getelementptr inbounds float* %tmp21821, i64 1
- %tmp21823 = getelementptr inbounds float* %tmp21822, i64 1
- %tmp21824 = getelementptr inbounds float* %tmp21823, i64 1
- %tmp21825 = getelementptr inbounds float* %tmp21824, i64 1
- %tmp21826 = getelementptr inbounds float* %tmp21825, i64 1
- %tmp21827 = getelementptr inbounds float* %tmp21826, i64 1
- %tmp21828 = getelementptr inbounds float* %tmp21827, i64 1
- %tmp21829 = getelementptr inbounds float* %tmp21828, i64 1
- %tmp21830 = getelementptr inbounds float* %tmp21829, i64 1
- %tmp21831 = getelementptr inbounds float* %tmp21830, i64 1
- %tmp21832 = getelementptr inbounds float* %tmp21831, i64 1
- %tmp21833 = getelementptr inbounds float* %tmp21832, i64 1
- %tmp21834 = getelementptr inbounds float* %tmp21833, i64 1
- %tmp21835 = getelementptr inbounds float* %tmp21834, i64 1
- %tmp21836 = getelementptr inbounds float* %tmp21835, i64 1
- %tmp21837 = getelementptr inbounds float* %tmp21836, i64 1
- %tmp21838 = getelementptr inbounds float* %tmp21837, i64 1
- %tmp21839 = getelementptr inbounds float* %tmp21838, i64 1
- %tmp21840 = getelementptr inbounds float* %tmp21839, i64 1
- %tmp21841 = getelementptr inbounds float* %tmp21840, i64 1
- %tmp21842 = getelementptr inbounds float* %tmp21841, i64 1
- %tmp21843 = getelementptr inbounds float* %tmp21842, i64 1
- %tmp21844 = getelementptr inbounds float* %tmp21843, i64 1
- %tmp21845 = getelementptr inbounds float* %tmp21844, i64 1
- %tmp21846 = getelementptr inbounds float* %tmp21845, i64 1
- %tmp21847 = getelementptr inbounds float* %tmp21846, i64 1
- %tmp21848 = getelementptr inbounds float* %tmp21847, i64 1
- %tmp21849 = getelementptr inbounds float* %tmp21848, i64 1
- %tmp21850 = getelementptr inbounds float* %tmp21849, i64 1
- %tmp21851 = getelementptr inbounds float* %tmp21850, i64 1
- %tmp21852 = getelementptr inbounds float* %tmp21851, i64 1
- %tmp21853 = getelementptr inbounds float* %tmp21852, i64 1
- %tmp21854 = getelementptr inbounds float* %tmp21853, i64 1
- %tmp21855 = getelementptr inbounds float* %tmp21854, i64 1
- %tmp21856 = getelementptr inbounds float* %tmp21855, i64 1
- %tmp21857 = getelementptr inbounds float* %tmp21856, i64 1
- %tmp21858 = getelementptr inbounds float* %tmp21857, i64 1
- %tmp21859 = getelementptr inbounds float* %tmp21858, i64 1
- %tmp21860 = getelementptr inbounds float* %tmp21859, i64 1
- %tmp21861 = getelementptr inbounds float* %tmp21860, i64 1
- %tmp21862 = getelementptr inbounds float* %tmp21861, i64 1
- %tmp21863 = getelementptr inbounds float* %tmp21862, i64 1
- %tmp21864 = getelementptr inbounds float* %tmp21863, i64 1
- %tmp21865 = getelementptr inbounds float* %tmp21864, i64 1
- %tmp21866 = getelementptr inbounds float* %tmp21865, i64 1
- %tmp21867 = getelementptr inbounds float* %tmp21866, i64 1
- %tmp21868 = getelementptr inbounds float* %tmp21867, i64 1
- %tmp21869 = getelementptr inbounds float* %tmp21868, i64 1
- %tmp21870 = getelementptr inbounds float* %tmp21869, i64 1
- %tmp21871 = getelementptr inbounds float* %tmp21870, i64 1
- %tmp21872 = getelementptr inbounds float* %tmp21871, i64 1
- %tmp21873 = getelementptr inbounds float* %tmp21872, i64 1
- %tmp21874 = getelementptr inbounds float* %tmp21873, i64 1
- %tmp21875 = getelementptr inbounds float* %tmp21874, i64 1
- %tmp21876 = getelementptr inbounds float* %tmp21875, i64 1
- %tmp21877 = getelementptr inbounds float* %tmp21876, i64 1
- %tmp21878 = getelementptr inbounds float* %tmp21877, i64 1
- %tmp21879 = getelementptr inbounds float* %tmp21878, i64 1
- %tmp21880 = getelementptr inbounds float* %tmp21879, i64 1
- %tmp21881 = getelementptr inbounds float* %tmp21880, i64 1
- %tmp21882 = getelementptr inbounds float* %tmp21881, i64 1
- %tmp21883 = getelementptr inbounds float* %tmp21882, i64 1
- %tmp21884 = getelementptr inbounds float* %tmp21883, i64 1
- %tmp21885 = getelementptr inbounds float* %tmp21884, i64 1
- %tmp21886 = getelementptr inbounds float* %tmp21885, i64 1
- %tmp21887 = getelementptr inbounds float* %tmp21886, i64 1
- %tmp21888 = getelementptr inbounds float* %tmp21887, i64 1
- %tmp21889 = getelementptr inbounds float* %tmp21888, i64 1
- %tmp21890 = getelementptr inbounds float* %tmp21889, i64 1
- %tmp21891 = getelementptr inbounds float* %tmp21890, i64 1
- %tmp21892 = getelementptr inbounds float* %tmp21891, i64 1
- %tmp21893 = getelementptr inbounds float* %tmp21892, i64 1
- %tmp21894 = getelementptr inbounds float* %tmp21893, i64 1
- %tmp21895 = getelementptr inbounds float* %tmp21894, i64 1
- %tmp21896 = getelementptr inbounds float* %tmp21895, i64 1
- %tmp21897 = getelementptr inbounds float* %tmp21896, i64 1
- %tmp21898 = getelementptr inbounds float* %tmp21897, i64 1
- %tmp21899 = getelementptr inbounds float* %tmp21898, i64 1
- %tmp21900 = getelementptr inbounds float* %tmp21899, i64 1
- %tmp21901 = getelementptr inbounds float* %tmp21900, i64 1
- %tmp21902 = getelementptr inbounds float* %tmp21901, i64 1
- %tmp21903 = getelementptr inbounds float* %tmp21902, i64 1
- %tmp21904 = getelementptr inbounds float* %tmp21903, i64 1
- %tmp21905 = getelementptr inbounds float* %tmp21904, i64 1
- %tmp21906 = getelementptr inbounds float* %tmp21905, i64 1
- %tmp21907 = getelementptr inbounds float* %tmp21906, i64 1
- %tmp21908 = getelementptr inbounds float* %tmp21907, i64 1
- %tmp21909 = getelementptr inbounds float* %tmp21908, i64 1
- %tmp21910 = getelementptr inbounds float* %tmp21909, i64 1
- %tmp21911 = getelementptr inbounds float* %tmp21910, i64 1
- %tmp21912 = getelementptr inbounds float* %tmp21911, i64 1
- %tmp21913 = getelementptr inbounds float* %tmp21912, i64 1
- %tmp21914 = getelementptr inbounds float* %tmp21913, i64 1
- %tmp21915 = getelementptr inbounds float* %tmp21914, i64 1
- %tmp21916 = getelementptr inbounds float* %tmp21915, i64 1
- %tmp21917 = getelementptr inbounds float* %tmp21916, i64 1
- %tmp21918 = getelementptr inbounds float* %tmp21917, i64 1
- %tmp21919 = getelementptr inbounds float* %tmp21918, i64 1
- %tmp21920 = getelementptr inbounds float* %tmp21919, i64 1
- %tmp21921 = getelementptr inbounds float* %tmp21920, i64 1
- %tmp21922 = getelementptr inbounds float* %tmp21921, i64 1
- %tmp21923 = getelementptr inbounds float* %tmp21922, i64 1
- %tmp21924 = getelementptr inbounds float* %tmp21923, i64 1
- %tmp21925 = getelementptr inbounds float* %tmp21924, i64 1
- %tmp21926 = getelementptr inbounds float* %tmp21925, i64 1
- %tmp21927 = getelementptr inbounds float* %tmp21926, i64 1
- %tmp21928 = getelementptr inbounds float* %tmp21927, i64 1
- %tmp21929 = getelementptr inbounds float* %tmp21928, i64 1
- %tmp21930 = getelementptr inbounds float* %tmp21929, i64 1
- %tmp21931 = getelementptr inbounds float* %tmp21930, i64 1
- %tmp21932 = getelementptr inbounds float* %tmp21931, i64 1
- %tmp21933 = getelementptr inbounds float* %tmp21932, i64 1
- %tmp21934 = getelementptr inbounds float* %tmp21933, i64 1
- %tmp21935 = getelementptr inbounds float* %tmp21934, i64 1
- %tmp21936 = getelementptr inbounds float* %tmp21935, i64 1
- %tmp21937 = getelementptr inbounds float* %tmp21936, i64 1
- %tmp21938 = getelementptr inbounds float* %tmp21937, i64 1
- %tmp21939 = getelementptr inbounds float* %tmp21938, i64 1
- %tmp21940 = getelementptr inbounds float* %tmp21939, i64 1
- %tmp21941 = getelementptr inbounds float* %tmp21940, i64 1
- %tmp21942 = getelementptr inbounds float* %tmp21941, i64 1
- %tmp21943 = getelementptr inbounds float* %tmp21942, i64 1
- %tmp21944 = getelementptr inbounds float* %tmp21943, i64 1
- %tmp21945 = getelementptr inbounds float* %tmp21944, i64 1
- %tmp21946 = getelementptr inbounds float* %tmp21945, i64 1
- %tmp21947 = getelementptr inbounds float* %tmp21946, i64 1
- %tmp21948 = getelementptr inbounds float* %tmp21947, i64 1
- %tmp21949 = getelementptr inbounds float* %tmp21948, i64 1
- %tmp21950 = getelementptr inbounds float* %tmp21949, i64 1
- %tmp21951 = getelementptr inbounds float* %tmp21950, i64 1
- %tmp21952 = getelementptr inbounds float* %tmp21951, i64 1
- %tmp21953 = getelementptr inbounds float* %tmp21952, i64 1
- %tmp21954 = getelementptr inbounds float* %tmp21953, i64 1
- %tmp21955 = getelementptr inbounds float* %tmp21954, i64 1
- %tmp21956 = getelementptr inbounds float* %tmp21955, i64 1
- %tmp21957 = getelementptr inbounds float* %tmp21956, i64 1
- %tmp21958 = getelementptr inbounds float* %tmp21957, i64 1
- %tmp21959 = getelementptr inbounds float* %tmp21958, i64 1
- %tmp21960 = getelementptr inbounds float* %tmp21959, i64 1
- %tmp21961 = getelementptr inbounds float* %tmp21960, i64 1
- %tmp21962 = getelementptr inbounds float* %tmp21961, i64 1
- %tmp21963 = getelementptr inbounds float* %tmp21962, i64 1
- %tmp21964 = getelementptr inbounds float* %tmp21963, i64 1
- %tmp21965 = getelementptr inbounds float* %tmp21964, i64 1
- %tmp21966 = getelementptr inbounds float* %tmp21965, i64 1
- %tmp21967 = getelementptr inbounds float* %tmp21966, i64 1
- %tmp21968 = getelementptr inbounds float* %tmp21967, i64 1
- %tmp21969 = getelementptr inbounds float* %tmp21968, i64 1
- %tmp21970 = getelementptr inbounds float* %tmp21969, i64 1
- %tmp21971 = getelementptr inbounds float* %tmp21970, i64 1
- %tmp21972 = getelementptr inbounds float* %tmp21971, i64 1
- %tmp21973 = getelementptr inbounds float* %tmp21972, i64 1
- %tmp21974 = getelementptr inbounds float* %tmp21973, i64 1
- %tmp21975 = getelementptr inbounds float* %tmp21974, i64 1
- %tmp21976 = getelementptr inbounds float* %tmp21975, i64 1
- %tmp21977 = getelementptr inbounds float* %tmp21976, i64 1
- %tmp21978 = getelementptr inbounds float* %tmp21977, i64 1
- %tmp21979 = getelementptr inbounds float* %tmp21978, i64 1
- %tmp21980 = getelementptr inbounds float* %tmp21979, i64 1
- %tmp21981 = getelementptr inbounds float* %tmp21980, i64 1
- %tmp21982 = getelementptr inbounds float* %tmp21981, i64 1
- %tmp21983 = getelementptr inbounds float* %tmp21982, i64 1
- %tmp21984 = getelementptr inbounds float* %tmp21983, i64 1
- %tmp21985 = getelementptr inbounds float* %tmp21984, i64 1
- %tmp21986 = getelementptr inbounds float* %tmp21985, i64 1
- %tmp21987 = getelementptr inbounds float* %tmp21986, i64 1
- %tmp21988 = getelementptr inbounds float* %tmp21987, i64 1
- %tmp21989 = getelementptr inbounds float* %tmp21988, i64 1
- %tmp21990 = getelementptr inbounds float* %tmp21989, i64 1
- %tmp21991 = getelementptr inbounds float* %tmp21990, i64 1
- %tmp21992 = getelementptr inbounds float* %tmp21991, i64 1
- %tmp21993 = getelementptr inbounds float* %tmp21992, i64 1
- %tmp21994 = getelementptr inbounds float* %tmp21993, i64 1
- %tmp21995 = getelementptr inbounds float* %tmp21994, i64 1
- %tmp21996 = getelementptr inbounds float* %tmp21995, i64 1
- %tmp21997 = getelementptr inbounds float* %tmp21996, i64 1
- %tmp21998 = getelementptr inbounds float* %tmp21997, i64 1
- %tmp21999 = getelementptr inbounds float* %tmp21998, i64 1
- %tmp22000 = getelementptr inbounds float* %tmp21999, i64 1
- %tmp22001 = getelementptr inbounds float* %tmp22000, i64 1
- %tmp22002 = getelementptr inbounds float* %tmp22001, i64 1
- %tmp22003 = getelementptr inbounds float* %tmp22002, i64 1
- %tmp22004 = getelementptr inbounds float* %tmp22003, i64 1
- %tmp22005 = getelementptr inbounds float* %tmp22004, i64 1
- %tmp22006 = getelementptr inbounds float* %tmp22005, i64 1
- %tmp22007 = getelementptr inbounds float* %tmp22006, i64 1
- %tmp22008 = getelementptr inbounds float* %tmp22007, i64 1
- %tmp22009 = getelementptr inbounds float* %tmp22008, i64 1
- %tmp22010 = getelementptr inbounds float* %tmp22009, i64 1
- %tmp22011 = getelementptr inbounds float* %tmp22010, i64 1
- %tmp22012 = getelementptr inbounds float* %tmp22011, i64 1
- %tmp22013 = getelementptr inbounds float* %tmp22012, i64 1
- %tmp22014 = getelementptr inbounds float* %tmp22013, i64 1
- %tmp22015 = getelementptr inbounds float* %tmp22014, i64 1
- %tmp22016 = getelementptr inbounds float* %tmp22015, i64 1
- %tmp22017 = getelementptr inbounds float* %tmp22016, i64 1
- %tmp22018 = getelementptr inbounds float* %tmp22017, i64 1
- %tmp22019 = getelementptr inbounds float* %tmp22018, i64 1
- %tmp22020 = getelementptr inbounds float* %tmp22019, i64 1
- %tmp22021 = getelementptr inbounds float* %tmp22020, i64 1
- %tmp22022 = getelementptr inbounds float* %tmp22021, i64 1
- %tmp22023 = getelementptr inbounds float* %tmp22022, i64 1
- %tmp22024 = getelementptr inbounds float* %tmp22023, i64 1
- %tmp22025 = getelementptr inbounds float* %tmp22024, i64 1
- %tmp22026 = getelementptr inbounds float* %tmp22025, i64 1
- %tmp22027 = getelementptr inbounds float* %tmp22026, i64 1
- %tmp22028 = getelementptr inbounds float* %tmp22027, i64 1
- %tmp22029 = getelementptr inbounds float* %tmp22028, i64 1
- %tmp22030 = getelementptr inbounds float* %tmp22029, i64 1
- %tmp22031 = getelementptr inbounds float* %tmp22030, i64 1
- %tmp22032 = getelementptr inbounds float* %tmp22031, i64 1
- %tmp22033 = getelementptr inbounds float* %tmp22032, i64 1
- %tmp22034 = getelementptr inbounds float* %tmp22033, i64 1
- %tmp22035 = getelementptr inbounds float* %tmp22034, i64 1
- %tmp22036 = getelementptr inbounds float* %tmp22035, i64 1
- %tmp22037 = getelementptr inbounds float* %tmp22036, i64 1
- %tmp22038 = getelementptr inbounds float* %tmp22037, i64 1
- %tmp22039 = getelementptr inbounds float* %tmp22038, i64 1
- %tmp22040 = getelementptr inbounds float* %tmp22039, i64 1
- %tmp22041 = getelementptr inbounds float* %tmp22040, i64 1
- %tmp22042 = getelementptr inbounds float* %tmp22041, i64 1
- %tmp22043 = getelementptr inbounds float* %tmp22042, i64 1
- %tmp22044 = getelementptr inbounds float* %tmp22043, i64 1
- %tmp22045 = getelementptr inbounds float* %tmp22044, i64 1
- %tmp22046 = getelementptr inbounds float* %tmp22045, i64 1
- %tmp22047 = getelementptr inbounds float* %tmp22046, i64 1
- %tmp22048 = getelementptr inbounds float* %tmp22047, i64 1
- %tmp22049 = getelementptr inbounds float* %tmp22048, i64 1
- %tmp22050 = getelementptr inbounds float* %tmp22049, i64 1
- %tmp22051 = getelementptr inbounds float* %tmp22050, i64 1
- %tmp22052 = getelementptr inbounds float* %tmp22051, i64 1
- %tmp22053 = getelementptr inbounds float* %tmp22052, i64 1
- %tmp22054 = getelementptr inbounds float* %tmp22053, i64 1
- %tmp22055 = getelementptr inbounds float* %tmp22054, i64 1
- %tmp22056 = getelementptr inbounds float* %tmp22055, i64 1
- %tmp22057 = getelementptr inbounds float* %tmp22056, i64 1
- %tmp22058 = getelementptr inbounds float* %tmp22057, i64 1
- %tmp22059 = getelementptr inbounds float* %tmp22058, i64 1
- %tmp22060 = getelementptr inbounds float* %tmp22059, i64 1
- %tmp22061 = getelementptr inbounds float* %tmp22060, i64 1
- %tmp22062 = getelementptr inbounds float* %tmp22061, i64 1
- %tmp22063 = getelementptr inbounds float* %tmp22062, i64 1
- %tmp22064 = getelementptr inbounds float* %tmp22063, i64 1
- %tmp22065 = getelementptr inbounds float* %tmp22064, i64 1
- %tmp22066 = getelementptr inbounds float* %tmp22065, i64 1
- %tmp22067 = getelementptr inbounds float* %tmp22066, i64 1
- %tmp22068 = getelementptr inbounds float* %tmp22067, i64 1
- %tmp22069 = getelementptr inbounds float* %tmp22068, i64 1
- %tmp22070 = getelementptr inbounds float* %tmp22069, i64 1
- %tmp22071 = getelementptr inbounds float* %tmp22070, i64 1
- %tmp22072 = getelementptr inbounds float* %tmp22071, i64 1
- %tmp22073 = getelementptr inbounds float* %tmp22072, i64 1
- %tmp22074 = getelementptr inbounds float* %tmp22073, i64 1
- %tmp22075 = getelementptr inbounds float* %tmp22074, i64 1
- %tmp22076 = getelementptr inbounds float* %tmp22075, i64 1
- %tmp22077 = getelementptr inbounds float* %tmp22076, i64 1
- %tmp22078 = getelementptr inbounds float* %tmp22077, i64 1
- %tmp22079 = getelementptr inbounds float* %tmp22078, i64 1
- %tmp22080 = getelementptr inbounds float* %tmp22079, i64 1
- %tmp22081 = getelementptr inbounds float* %tmp22080, i64 1
- %tmp22082 = getelementptr inbounds float* %tmp22081, i64 1
- %tmp22083 = getelementptr inbounds float* %tmp22082, i64 1
- %tmp22084 = getelementptr inbounds float* %tmp22083, i64 1
- %tmp22085 = getelementptr inbounds float* %tmp22084, i64 1
- %tmp22086 = getelementptr inbounds float* %tmp22085, i64 1
- %tmp22087 = getelementptr inbounds float* %tmp22086, i64 1
- %tmp22088 = getelementptr inbounds float* %tmp22087, i64 1
- %tmp22089 = getelementptr inbounds float* %tmp22088, i64 1
- %tmp22090 = getelementptr inbounds float* %tmp22089, i64 1
- %tmp22091 = getelementptr inbounds float* %tmp22090, i64 1
- %tmp22092 = getelementptr inbounds float* %tmp22091, i64 1
- %tmp22093 = getelementptr inbounds float* %tmp22092, i64 1
- %tmp22094 = getelementptr inbounds float* %tmp22093, i64 1
- %tmp22095 = getelementptr inbounds float* %tmp22094, i64 1
- %tmp22096 = getelementptr inbounds float* %tmp22095, i64 1
- %tmp22097 = getelementptr inbounds float* %tmp22096, i64 1
- %tmp22098 = getelementptr inbounds float* %tmp22097, i64 1
- %tmp22099 = getelementptr inbounds float* %tmp22098, i64 1
- %tmp22100 = getelementptr inbounds float* %tmp22099, i64 1
- %tmp22101 = getelementptr inbounds float* %tmp22100, i64 1
- %tmp22102 = getelementptr inbounds float* %tmp22101, i64 1
- %tmp22103 = getelementptr inbounds float* %tmp22102, i64 1
- %tmp22104 = getelementptr inbounds float* %tmp22103, i64 1
- %tmp22105 = getelementptr inbounds float* %tmp22104, i64 1
- %tmp22106 = getelementptr inbounds float* %tmp22105, i64 1
- %tmp22107 = getelementptr inbounds float* %tmp22106, i64 1
- %tmp22108 = getelementptr inbounds float* %tmp22107, i64 1
- %tmp22109 = getelementptr inbounds float* %tmp22108, i64 1
- %tmp22110 = getelementptr inbounds float* %tmp22109, i64 1
- %tmp22111 = getelementptr inbounds float* %tmp22110, i64 1
- %tmp22112 = getelementptr inbounds float* %tmp22111, i64 1
- %tmp22113 = getelementptr inbounds float* %tmp22112, i64 1
- %tmp22114 = getelementptr inbounds float* %tmp22113, i64 1
- %tmp22115 = getelementptr inbounds float* %tmp22114, i64 1
- %tmp22116 = getelementptr inbounds float* %tmp22115, i64 1
- %tmp22117 = getelementptr inbounds float* %tmp22116, i64 1
- %tmp22118 = getelementptr inbounds float* %tmp22117, i64 1
- %tmp22119 = getelementptr inbounds float* %tmp22118, i64 1
- %tmp22120 = getelementptr inbounds float* %tmp22119, i64 1
- %tmp22121 = getelementptr inbounds float* %tmp22120, i64 1
- %tmp22122 = getelementptr inbounds float* %tmp22121, i64 1
- %tmp22123 = getelementptr inbounds float* %tmp22122, i64 1
- %tmp22124 = getelementptr inbounds float* %tmp22123, i64 1
- %tmp22125 = getelementptr inbounds float* %tmp22124, i64 1
- %tmp22126 = getelementptr inbounds float* %tmp22125, i64 1
- %tmp22127 = getelementptr inbounds float* %tmp22126, i64 1
- %tmp22128 = getelementptr inbounds float* %tmp22127, i64 1
- %tmp22129 = getelementptr inbounds float* %tmp22128, i64 1
- %tmp22130 = getelementptr inbounds float* %tmp22129, i64 1
- %tmp22131 = getelementptr inbounds float* %tmp22130, i64 1
- %tmp22132 = getelementptr inbounds float* %tmp22131, i64 1
- %tmp22133 = getelementptr inbounds float* %tmp22132, i64 1
- %tmp22134 = getelementptr inbounds float* %tmp22133, i64 1
- %tmp22135 = getelementptr inbounds float* %tmp22134, i64 1
- %tmp22136 = getelementptr inbounds float* %tmp22135, i64 1
- %tmp22137 = getelementptr inbounds float* %tmp22136, i64 1
- %tmp22138 = getelementptr inbounds float* %tmp22137, i64 1
- %tmp22139 = getelementptr inbounds float* %tmp22138, i64 1
- %tmp22140 = getelementptr inbounds float* %tmp22139, i64 1
- %tmp22141 = getelementptr inbounds float* %tmp22140, i64 1
- %tmp22142 = getelementptr inbounds float* %tmp22141, i64 1
- %tmp22143 = getelementptr inbounds float* %tmp22142, i64 1
- %tmp22144 = getelementptr inbounds float* %tmp22143, i64 1
- %tmp22145 = getelementptr inbounds float* %tmp22144, i64 1
- %tmp22146 = getelementptr inbounds float* %tmp22145, i64 1
- %tmp22147 = getelementptr inbounds float* %tmp22146, i64 1
- %tmp22148 = getelementptr inbounds float* %tmp22147, i64 1
- %tmp22149 = getelementptr inbounds float* %tmp22148, i64 1
- %tmp22150 = getelementptr inbounds float* %tmp22149, i64 1
- %tmp22151 = getelementptr inbounds float* %tmp22150, i64 1
- %tmp22152 = getelementptr inbounds float* %tmp22151, i64 1
- %tmp22153 = getelementptr inbounds float* %tmp22152, i64 1
- %tmp22154 = getelementptr inbounds float* %tmp22153, i64 1
- %tmp22155 = getelementptr inbounds float* %tmp22154, i64 1
- %tmp22156 = getelementptr inbounds float* %tmp22155, i64 1
- %tmp22157 = getelementptr inbounds float* %tmp22156, i64 1
- %tmp22158 = getelementptr inbounds float* %tmp22157, i64 1
- %tmp22159 = getelementptr inbounds float* %tmp22158, i64 1
- %tmp22160 = getelementptr inbounds float* %tmp22159, i64 1
- %tmp22161 = getelementptr inbounds float* %tmp22160, i64 1
- %tmp22162 = getelementptr inbounds float* %tmp22161, i64 1
- %tmp22163 = getelementptr inbounds float* %tmp22162, i64 1
- %tmp22164 = getelementptr inbounds float* %tmp22163, i64 1
- %tmp22165 = getelementptr inbounds float* %tmp22164, i64 1
- %tmp22166 = getelementptr inbounds float* %tmp22165, i64 1
- %tmp22167 = getelementptr inbounds float* %tmp22166, i64 1
- %tmp22168 = getelementptr inbounds float* %tmp22167, i64 1
- %tmp22169 = getelementptr inbounds float* %tmp22168, i64 1
- %tmp22170 = getelementptr inbounds float* %tmp22169, i64 1
- %tmp22171 = getelementptr inbounds float* %tmp22170, i64 1
- %tmp22172 = getelementptr inbounds float* %tmp22171, i64 1
- %tmp22173 = getelementptr inbounds float* %tmp22172, i64 1
- %tmp22174 = getelementptr inbounds float* %tmp22173, i64 1
- %tmp22175 = getelementptr inbounds float* %tmp22174, i64 1
- %tmp22176 = getelementptr inbounds float* %tmp22175, i64 1
- %tmp22177 = getelementptr inbounds float* %tmp22176, i64 1
- %tmp22178 = getelementptr inbounds float* %tmp22177, i64 1
- %tmp22179 = getelementptr inbounds float* %tmp22178, i64 1
- %tmp22180 = getelementptr inbounds float* %tmp22179, i64 1
- %tmp22181 = getelementptr inbounds float* %tmp22180, i64 1
- %tmp22182 = getelementptr inbounds float* %tmp22181, i64 1
- %tmp22183 = getelementptr inbounds float* %tmp22182, i64 1
- %tmp22184 = getelementptr inbounds float* %tmp22183, i64 1
- %tmp22185 = getelementptr inbounds float* %tmp22184, i64 1
- %tmp22186 = getelementptr inbounds float* %tmp22185, i64 1
- %tmp22187 = getelementptr inbounds float* %tmp22186, i64 1
- %tmp22188 = getelementptr inbounds float* %tmp22187, i64 1
- %tmp22189 = getelementptr inbounds float* %tmp22188, i64 1
- %tmp22190 = getelementptr inbounds float* %tmp22189, i64 1
- %tmp22191 = getelementptr inbounds float* %tmp22190, i64 1
- %tmp22192 = getelementptr inbounds float* %tmp22191, i64 1
- %tmp22193 = getelementptr inbounds float* %tmp22192, i64 1
- %tmp22194 = getelementptr inbounds float* %tmp22193, i64 1
- %tmp22195 = getelementptr inbounds float* %tmp22194, i64 1
- %tmp22196 = getelementptr inbounds float* %tmp22195, i64 1
- %tmp22197 = getelementptr inbounds float* %tmp22196, i64 1
- %tmp22198 = getelementptr inbounds float* %tmp22197, i64 1
- %tmp22199 = getelementptr inbounds float* %tmp22198, i64 1
- %tmp22200 = getelementptr inbounds float* %tmp22199, i64 1
- %tmp22201 = getelementptr inbounds float* %tmp22200, i64 1
- %tmp22202 = getelementptr inbounds float* %tmp22201, i64 1
- %tmp22203 = getelementptr inbounds float* %tmp22202, i64 1
- %tmp22204 = getelementptr inbounds float* %tmp22203, i64 1
- %tmp22205 = getelementptr inbounds float* %tmp22204, i64 1
- %tmp22206 = getelementptr inbounds float* %tmp22205, i64 1
- %tmp22207 = getelementptr inbounds float* %tmp22206, i64 1
- %tmp22208 = getelementptr inbounds float* %tmp22207, i64 1
- %tmp22209 = getelementptr inbounds float* %tmp22208, i64 1
- %tmp22210 = getelementptr inbounds float* %tmp22209, i64 1
- %tmp22211 = getelementptr inbounds float* %tmp22210, i64 1
- %tmp22212 = getelementptr inbounds float* %tmp22211, i64 1
- %tmp22213 = getelementptr inbounds float* %tmp22212, i64 1
- %tmp22214 = getelementptr inbounds float* %tmp22213, i64 1
- %tmp22215 = getelementptr inbounds float* %tmp22214, i64 1
- %tmp22216 = getelementptr inbounds float* %tmp22215, i64 1
- %tmp22217 = getelementptr inbounds float* %tmp22216, i64 1
- %tmp22218 = getelementptr inbounds float* %tmp22217, i64 1
- %tmp22219 = getelementptr inbounds float* %tmp22218, i64 1
- %tmp22220 = getelementptr inbounds float* %tmp22219, i64 1
- %tmp22221 = getelementptr inbounds float* %tmp22220, i64 1
- %tmp22222 = getelementptr inbounds float* %tmp22221, i64 1
- %tmp22223 = getelementptr inbounds float* %tmp22222, i64 1
- %tmp22224 = getelementptr inbounds float* %tmp22223, i64 1
- %tmp22225 = getelementptr inbounds float* %tmp22224, i64 1
- %tmp22226 = getelementptr inbounds float* %tmp22225, i64 1
- %tmp22227 = getelementptr inbounds float* %tmp22226, i64 1
- %tmp22228 = getelementptr inbounds float* %tmp22227, i64 1
- %tmp22229 = getelementptr inbounds float* %tmp22228, i64 1
- %tmp22230 = getelementptr inbounds float* %tmp22229, i64 1
- %tmp22231 = getelementptr inbounds float* %tmp22230, i64 1
- %tmp22232 = getelementptr inbounds float* %tmp22231, i64 1
- %tmp22233 = getelementptr inbounds float* %tmp22232, i64 1
- %tmp22234 = getelementptr inbounds float* %tmp22233, i64 1
- %tmp22235 = getelementptr inbounds float* %tmp22234, i64 1
- %tmp22236 = getelementptr inbounds float* %tmp22235, i64 1
- %tmp22237 = getelementptr inbounds float* %tmp22236, i64 1
- %tmp22238 = getelementptr inbounds float* %tmp22237, i64 1
- %tmp22239 = getelementptr inbounds float* %tmp22238, i64 1
- %tmp22240 = getelementptr inbounds float* %tmp22239, i64 1
- %tmp22241 = getelementptr inbounds float* %tmp22240, i64 1
- %tmp22242 = getelementptr inbounds float* %tmp22241, i64 1
- %tmp22243 = getelementptr inbounds float* %tmp22242, i64 1
- %tmp22244 = getelementptr inbounds float* %tmp22243, i64 1
- %tmp22245 = getelementptr inbounds float* %tmp22244, i64 1
- %tmp22246 = getelementptr inbounds float* %tmp22245, i64 1
- %tmp22247 = getelementptr inbounds float* %tmp22246, i64 1
- %tmp22248 = getelementptr inbounds float* %tmp22247, i64 1
- %tmp22249 = getelementptr inbounds float* %tmp22248, i64 1
- %tmp22250 = getelementptr inbounds float* %tmp22249, i64 1
- %tmp22251 = getelementptr inbounds float* %tmp22250, i64 1
- %tmp22252 = getelementptr inbounds float* %tmp22251, i64 1
- %tmp22253 = getelementptr inbounds float* %tmp22252, i64 1
- %tmp22254 = getelementptr inbounds float* %tmp22253, i64 1
- %tmp22255 = getelementptr inbounds float* %tmp22254, i64 1
- %tmp22256 = getelementptr inbounds float* %tmp22255, i64 1
- %tmp22257 = getelementptr inbounds float* %tmp22256, i64 1
- %tmp22258 = getelementptr inbounds float* %tmp22257, i64 1
- %tmp22259 = getelementptr inbounds float* %tmp22258, i64 1
- %tmp22260 = getelementptr inbounds float* %tmp22259, i64 1
- %tmp22261 = getelementptr inbounds float* %tmp22260, i64 1
- %tmp22262 = getelementptr inbounds float* %tmp22261, i64 1
- %tmp22263 = getelementptr inbounds float* %tmp22262, i64 1
- %tmp22264 = getelementptr inbounds float* %tmp22263, i64 1
- %tmp22265 = getelementptr inbounds float* %tmp22264, i64 1
- %tmp22266 = getelementptr inbounds float* %tmp22265, i64 1
- %tmp22267 = getelementptr inbounds float* %tmp22266, i64 1
- %tmp22268 = getelementptr inbounds float* %tmp22267, i64 1
- %tmp22269 = getelementptr inbounds float* %tmp22268, i64 1
- %tmp22270 = getelementptr inbounds float* %tmp22269, i64 1
- %tmp22271 = getelementptr inbounds float* %tmp22270, i64 1
- %tmp22272 = getelementptr inbounds float* %tmp22271, i64 1
- %tmp22273 = getelementptr inbounds float* %tmp22272, i64 1
- %tmp22274 = getelementptr inbounds float* %tmp22273, i64 1
- %tmp22275 = getelementptr inbounds float* %tmp22274, i64 1
- %tmp22276 = getelementptr inbounds float* %tmp22275, i64 1
- %tmp22277 = getelementptr inbounds float* %tmp22276, i64 1
- %tmp22278 = getelementptr inbounds float* %tmp22277, i64 1
- %tmp22279 = getelementptr inbounds float* %tmp22278, i64 1
- %tmp22280 = getelementptr inbounds float* %tmp22279, i64 1
- %tmp22281 = getelementptr inbounds float* %tmp22280, i64 1
- %tmp22282 = getelementptr inbounds float* %tmp22281, i64 1
- %tmp22283 = getelementptr inbounds float* %tmp22282, i64 1
- %tmp22284 = getelementptr inbounds float* %tmp22283, i64 1
- %tmp22285 = getelementptr inbounds float* %tmp22284, i64 1
- %tmp22286 = getelementptr inbounds float* %tmp22285, i64 1
- %tmp22287 = getelementptr inbounds float* %tmp22286, i64 1
- %tmp22288 = getelementptr inbounds float* %tmp22287, i64 1
- %tmp22289 = getelementptr inbounds float* %tmp22288, i64 1
- %tmp22290 = getelementptr inbounds float* %tmp22289, i64 1
- %tmp22291 = getelementptr inbounds float* %tmp22290, i64 1
- %tmp22292 = getelementptr inbounds float* %tmp22291, i64 1
- %tmp22293 = getelementptr inbounds float* %tmp22292, i64 1
- %tmp22294 = getelementptr inbounds float* %tmp22293, i64 1
- %tmp22295 = getelementptr inbounds float* %tmp22294, i64 1
- %tmp22296 = getelementptr inbounds float* %tmp22295, i64 1
- %tmp22297 = getelementptr inbounds float* %tmp22296, i64 1
- %tmp22298 = getelementptr inbounds float* %tmp22297, i64 1
- %tmp22299 = getelementptr inbounds float* %tmp22298, i64 1
- %tmp22300 = getelementptr inbounds float* %tmp22299, i64 1
- %tmp22301 = getelementptr inbounds float* %tmp22300, i64 1
- %tmp22302 = getelementptr inbounds float* %tmp22301, i64 1
- %tmp22303 = getelementptr inbounds float* %tmp22302, i64 1
- %tmp22304 = getelementptr inbounds float* %tmp22303, i64 1
- %tmp22305 = getelementptr inbounds float* %tmp22304, i64 1
- %tmp22306 = getelementptr inbounds float* %tmp22305, i64 1
- %tmp22307 = getelementptr inbounds float* %tmp22306, i64 1
- %tmp22308 = getelementptr inbounds float* %tmp22307, i64 1
- %tmp22309 = getelementptr inbounds float* %tmp22308, i64 1
- %tmp22310 = getelementptr inbounds float* %tmp22309, i64 1
- %tmp22311 = getelementptr inbounds float* %tmp22310, i64 1
- %tmp22312 = getelementptr inbounds float* %tmp22311, i64 1
- %tmp22313 = getelementptr inbounds float* %tmp22312, i64 1
- %tmp22314 = getelementptr inbounds float* %tmp22313, i64 1
- %tmp22315 = getelementptr inbounds float* %tmp22314, i64 1
- %tmp22316 = getelementptr inbounds float* %tmp22315, i64 1
- %tmp22317 = getelementptr inbounds float* %tmp22316, i64 1
- %tmp22318 = getelementptr inbounds float* %tmp22317, i64 1
- %tmp22319 = getelementptr inbounds float* %tmp22318, i64 1
- %tmp22320 = getelementptr inbounds float* %tmp22319, i64 1
- %tmp22321 = getelementptr inbounds float* %tmp22320, i64 1
- %tmp22322 = getelementptr inbounds float* %tmp22321, i64 1
- %tmp22323 = getelementptr inbounds float* %tmp22322, i64 1
- %tmp22324 = getelementptr inbounds float* %tmp22323, i64 1
- %tmp22325 = getelementptr inbounds float* %tmp22324, i64 1
- %tmp22326 = getelementptr inbounds float* %tmp22325, i64 1
- %tmp22327 = getelementptr inbounds float* %tmp22326, i64 1
- %tmp22328 = getelementptr inbounds float* %tmp22327, i64 1
- %tmp22329 = getelementptr inbounds float* %tmp22328, i64 1
- %tmp22330 = getelementptr inbounds float* %tmp22329, i64 1
- %tmp22331 = getelementptr inbounds float* %tmp22330, i64 1
- %tmp22332 = getelementptr inbounds float* %tmp22331, i64 1
- %tmp22333 = getelementptr inbounds float* %tmp22332, i64 1
- %tmp22334 = getelementptr inbounds float* %tmp22333, i64 1
- %tmp22335 = getelementptr inbounds float* %tmp22334, i64 1
- %tmp22336 = getelementptr inbounds float* %tmp22335, i64 1
- %tmp22337 = getelementptr inbounds float* %tmp22336, i64 1
- %tmp22338 = getelementptr inbounds float* %tmp22337, i64 1
- %tmp22339 = getelementptr inbounds float* %tmp22338, i64 1
- %tmp22340 = getelementptr inbounds float* %tmp22339, i64 1
- %tmp22341 = getelementptr inbounds float* %tmp22340, i64 1
- %tmp22342 = getelementptr inbounds float* %tmp22341, i64 1
- %tmp22343 = getelementptr inbounds float* %tmp22342, i64 1
- %tmp22344 = getelementptr inbounds float* %tmp22343, i64 1
- %tmp22345 = getelementptr inbounds float* %tmp22344, i64 1
- %tmp22346 = getelementptr inbounds float* %tmp22345, i64 1
- %tmp22347 = getelementptr inbounds float* %tmp22346, i64 1
- %tmp22348 = getelementptr inbounds float* %tmp22347, i64 1
- %tmp22349 = getelementptr inbounds float* %tmp22348, i64 1
- %tmp22350 = getelementptr inbounds float* %tmp22349, i64 1
- %tmp22351 = getelementptr inbounds float* %tmp22350, i64 1
- %tmp22352 = getelementptr inbounds float* %tmp22351, i64 1
- %tmp22353 = getelementptr inbounds float* %tmp22352, i64 1
- %tmp22354 = getelementptr inbounds float* %tmp22353, i64 1
- %tmp22355 = getelementptr inbounds float* %tmp22354, i64 1
- %tmp22356 = getelementptr inbounds float* %tmp22355, i64 1
- %tmp22357 = getelementptr inbounds float* %tmp22356, i64 1
- %tmp22358 = getelementptr inbounds float* %tmp22357, i64 1
- %tmp22359 = getelementptr inbounds float* %tmp22358, i64 1
- %tmp22360 = getelementptr inbounds float* %tmp22359, i64 1
- %tmp22361 = getelementptr inbounds float* %tmp22360, i64 1
- %tmp22362 = getelementptr inbounds float* %tmp22361, i64 1
- %tmp22363 = getelementptr inbounds float* %tmp22362, i64 1
- %tmp22364 = getelementptr inbounds float* %tmp22363, i64 1
- %tmp22365 = getelementptr inbounds float* %tmp22364, i64 1
- %tmp22366 = getelementptr inbounds float* %tmp22365, i64 1
- %tmp22367 = getelementptr inbounds float* %tmp22366, i64 1
- %tmp22368 = getelementptr inbounds float* %tmp22367, i64 1
- %tmp22369 = getelementptr inbounds float* %tmp22368, i64 1
- %tmp22370 = getelementptr inbounds float* %tmp22369, i64 1
- %tmp22371 = getelementptr inbounds float* %tmp22370, i64 1
- %tmp22372 = getelementptr inbounds float* %tmp22371, i64 1
- %tmp22373 = getelementptr inbounds float* %tmp22372, i64 1
- %tmp22374 = getelementptr inbounds float* %tmp22373, i64 1
- %tmp22375 = getelementptr inbounds float* %tmp22374, i64 1
- %tmp22376 = getelementptr inbounds float* %tmp22375, i64 1
- %tmp22377 = getelementptr inbounds float* %tmp22376, i64 1
- %tmp22378 = getelementptr inbounds float* %tmp22377, i64 1
- %tmp22379 = getelementptr inbounds float* %tmp22378, i64 1
- %tmp22380 = getelementptr inbounds float* %tmp22379, i64 1
- %tmp22381 = getelementptr inbounds float* %tmp22380, i64 1
- %tmp22382 = getelementptr inbounds float* %tmp22381, i64 1
- %tmp22383 = getelementptr inbounds float* %tmp22382, i64 1
- %tmp22384 = getelementptr inbounds float* %tmp22383, i64 1
- %tmp22385 = getelementptr inbounds float* %tmp22384, i64 1
- %tmp22386 = getelementptr inbounds float* %tmp22385, i64 1
- %tmp22387 = getelementptr inbounds float* %tmp22386, i64 1
- %tmp22388 = getelementptr inbounds float* %tmp22387, i64 1
- %tmp22389 = getelementptr inbounds float* %tmp22388, i64 1
- %tmp22390 = getelementptr inbounds float* %tmp22389, i64 1
- %tmp22391 = getelementptr inbounds float* %tmp22390, i64 1
- %tmp22392 = getelementptr inbounds float* %tmp22391, i64 1
- %tmp22393 = getelementptr inbounds float* %tmp22392, i64 1
- %tmp22394 = getelementptr inbounds float* %tmp22393, i64 1
- %tmp22395 = getelementptr inbounds float* %tmp22394, i64 1
- %tmp22396 = getelementptr inbounds float* %tmp22395, i64 1
- %tmp22397 = getelementptr inbounds float* %tmp22396, i64 1
- %tmp22398 = getelementptr inbounds float* %tmp22397, i64 1
- %tmp22399 = getelementptr inbounds float* %tmp22398, i64 1
- %tmp22400 = getelementptr inbounds float* %tmp22399, i64 1
- %tmp22401 = getelementptr inbounds float* %tmp22400, i64 1
- %tmp22402 = getelementptr inbounds float* %tmp22401, i64 1
- %tmp22403 = getelementptr inbounds float* %tmp22402, i64 1
- %tmp22404 = getelementptr inbounds float* %tmp22403, i64 1
- %tmp22405 = getelementptr inbounds float* %tmp22404, i64 1
- %tmp22406 = getelementptr inbounds float* %tmp22405, i64 1
- %tmp22407 = getelementptr inbounds float* %tmp22406, i64 1
- %tmp22408 = getelementptr inbounds float* %tmp22407, i64 1
- %tmp22409 = getelementptr inbounds float* %tmp22408, i64 1
- %tmp22410 = getelementptr inbounds float* %tmp22409, i64 1
- %tmp22411 = getelementptr inbounds float* %tmp22410, i64 1
- %tmp22412 = getelementptr inbounds float* %tmp22411, i64 1
- %tmp22413 = getelementptr inbounds float* %tmp22412, i64 1
- %tmp22414 = getelementptr inbounds float* %tmp22413, i64 1
- %tmp22415 = getelementptr inbounds float* %tmp22414, i64 1
- %tmp22416 = getelementptr inbounds float* %tmp22415, i64 1
- %tmp22417 = getelementptr inbounds float* %tmp22416, i64 1
- %tmp22418 = getelementptr inbounds float* %tmp22417, i64 1
- %tmp22419 = getelementptr inbounds float* %tmp22418, i64 1
- %tmp22420 = getelementptr inbounds float* %tmp22419, i64 1
- %tmp22421 = getelementptr inbounds float* %tmp22420, i64 1
- %tmp22422 = getelementptr inbounds float* %tmp22421, i64 1
- %tmp22423 = getelementptr inbounds float* %tmp22422, i64 1
- %tmp22424 = getelementptr inbounds float* %tmp22423, i64 1
- %tmp22425 = getelementptr inbounds float* %tmp22424, i64 1
- %tmp22426 = getelementptr inbounds float* %tmp22425, i64 1
- %tmp22427 = getelementptr inbounds float* %tmp22426, i64 1
- %tmp22428 = getelementptr inbounds float* %tmp22427, i64 1
- %tmp22429 = getelementptr inbounds float* %tmp22428, i64 1
- %tmp22430 = getelementptr inbounds float* %tmp22429, i64 1
- %tmp22431 = getelementptr inbounds float* %tmp22430, i64 1
- %tmp22432 = getelementptr inbounds float* %tmp22431, i64 1
- %tmp22433 = getelementptr inbounds float* %tmp22432, i64 1
- %tmp22434 = getelementptr inbounds float* %tmp22433, i64 1
- %tmp22435 = getelementptr inbounds float* %tmp22434, i64 1
- %tmp22436 = getelementptr inbounds float* %tmp22435, i64 1
- %tmp22437 = getelementptr inbounds float* %tmp22436, i64 1
- %tmp22438 = getelementptr inbounds float* %tmp22437, i64 1
- %tmp22439 = getelementptr inbounds float* %tmp22438, i64 1
- %tmp22440 = getelementptr inbounds float* %tmp22439, i64 1
- %tmp22441 = getelementptr inbounds float* %tmp22440, i64 1
- %tmp22442 = getelementptr inbounds float* %tmp22441, i64 1
- %tmp22443 = getelementptr inbounds float* %tmp22442, i64 1
- %tmp22444 = getelementptr inbounds float* %tmp22443, i64 1
- %tmp22445 = getelementptr inbounds float* %tmp22444, i64 1
- %tmp22446 = getelementptr inbounds float* %tmp22445, i64 1
- %tmp22447 = getelementptr inbounds float* %tmp22446, i64 1
- %tmp22448 = getelementptr inbounds float* %tmp22447, i64 1
- %tmp22449 = getelementptr inbounds float* %tmp22448, i64 1
- %tmp22450 = getelementptr inbounds float* %tmp22449, i64 1
- %tmp22451 = getelementptr inbounds float* %tmp22450, i64 1
- %tmp22452 = getelementptr inbounds float* %tmp22451, i64 1
- %tmp22453 = getelementptr inbounds float* %tmp22452, i64 1
- %tmp22454 = getelementptr inbounds float* %tmp22453, i64 1
- %tmp22455 = getelementptr inbounds float* %tmp22454, i64 1
- %tmp22456 = getelementptr inbounds float* %tmp22455, i64 1
- %tmp22457 = getelementptr inbounds float* %tmp22456, i64 1
- %tmp22458 = getelementptr inbounds float* %tmp22457, i64 1
- %tmp22459 = getelementptr inbounds float* %tmp22458, i64 1
- %tmp22460 = getelementptr inbounds float* %tmp22459, i64 1
- %tmp22461 = getelementptr inbounds float* %tmp22460, i64 1
- %tmp22462 = getelementptr inbounds float* %tmp22461, i64 1
- %tmp22463 = getelementptr inbounds float* %tmp22462, i64 1
- %tmp22464 = getelementptr inbounds float* %tmp22463, i64 1
- %tmp22465 = getelementptr inbounds float* %tmp22464, i64 1
- %tmp22466 = getelementptr inbounds float* %tmp22465, i64 1
- %tmp22467 = getelementptr inbounds float* %tmp22466, i64 1
- %tmp22468 = getelementptr inbounds float* %tmp22467, i64 1
- %tmp22469 = getelementptr inbounds float* %tmp22468, i64 1
- %tmp22470 = getelementptr inbounds float* %tmp22469, i64 1
- %tmp22471 = getelementptr inbounds float* %tmp22470, i64 1
- %tmp22472 = getelementptr inbounds float* %tmp22471, i64 1
- %tmp22473 = getelementptr inbounds float* %tmp22472, i64 1
- %tmp22474 = getelementptr inbounds float* %tmp22473, i64 1
- %tmp22475 = getelementptr inbounds float* %tmp22474, i64 1
- %tmp22476 = getelementptr inbounds float* %tmp22475, i64 1
- %tmp22477 = getelementptr inbounds float* %tmp22476, i64 1
- %tmp22478 = getelementptr inbounds float* %tmp22477, i64 1
- %tmp22479 = getelementptr inbounds float* %tmp22478, i64 1
- %tmp22480 = getelementptr inbounds float* %tmp22479, i64 1
- %tmp22481 = getelementptr inbounds float* %tmp22480, i64 1
- %tmp22482 = getelementptr inbounds float* %tmp22481, i64 1
- %tmp22483 = getelementptr inbounds float* %tmp22482, i64 1
- %tmp22484 = getelementptr inbounds float* %tmp22483, i64 1
- %tmp22485 = getelementptr inbounds float* %tmp22484, i64 1
- %tmp22486 = getelementptr inbounds float* %tmp22485, i64 1
- %tmp22487 = getelementptr inbounds float* %tmp22486, i64 1
- %tmp22488 = getelementptr inbounds float* %tmp22487, i64 1
- %tmp22489 = getelementptr inbounds float* %tmp22488, i64 1
- %tmp22490 = getelementptr inbounds float* %tmp22489, i64 1
- %tmp22491 = getelementptr inbounds float* %tmp22490, i64 1
- %tmp22492 = getelementptr inbounds float* %tmp22491, i64 1
- %tmp22493 = getelementptr inbounds float* %tmp22492, i64 1
- %tmp22494 = getelementptr inbounds float* %tmp22493, i64 1
- %tmp22495 = getelementptr inbounds float* %tmp22494, i64 1
- %tmp22496 = getelementptr inbounds float* %tmp22495, i64 1
- %tmp22497 = getelementptr inbounds float* %tmp22496, i64 1
- %tmp22498 = getelementptr inbounds float* %tmp22497, i64 1
- %tmp22499 = getelementptr inbounds float* %tmp22498, i64 1
- %tmp22500 = getelementptr inbounds float* %tmp22499, i64 1
- %tmp22501 = getelementptr inbounds float* %tmp22500, i64 1
- %tmp22502 = getelementptr inbounds float* %tmp22501, i64 1
- %tmp22503 = getelementptr inbounds float* %tmp22502, i64 1
- %tmp22504 = getelementptr inbounds float* %tmp22503, i64 1
- %tmp22505 = getelementptr inbounds float* %tmp22504, i64 1
- %tmp22506 = getelementptr inbounds float* %tmp22505, i64 1
- %tmp22507 = getelementptr inbounds float* %tmp22506, i64 1
- %tmp22508 = getelementptr inbounds float* %tmp22507, i64 1
- %tmp22509 = getelementptr inbounds float* %tmp22508, i64 1
- %tmp22510 = getelementptr inbounds float* %tmp22509, i64 1
- %tmp22511 = getelementptr inbounds float* %tmp22510, i64 1
- %tmp22512 = getelementptr inbounds float* %tmp22511, i64 1
- %tmp22513 = getelementptr inbounds float* %tmp22512, i64 1
- %tmp22514 = getelementptr inbounds float* %tmp22513, i64 1
- %tmp22515 = getelementptr inbounds float* %tmp22514, i64 1
- %tmp22516 = getelementptr inbounds float* %tmp22515, i64 1
- %tmp22517 = getelementptr inbounds float* %tmp22516, i64 1
- %tmp22518 = getelementptr inbounds float* %tmp22517, i64 1
- %tmp22519 = getelementptr inbounds float* %tmp22518, i64 1
- %tmp22520 = getelementptr inbounds float* %tmp22519, i64 1
- %tmp22521 = getelementptr inbounds float* %tmp22520, i64 1
- %tmp22522 = getelementptr inbounds float* %tmp22521, i64 1
- %tmp22523 = getelementptr inbounds float* %tmp22522, i64 1
- %tmp22524 = getelementptr inbounds float* %tmp22523, i64 1
- %tmp22525 = getelementptr inbounds float* %tmp22524, i64 1
- %tmp22526 = getelementptr inbounds float* %tmp22525, i64 1
- %tmp22527 = getelementptr inbounds float* %tmp22526, i64 1
- %tmp22528 = getelementptr inbounds float* %tmp22527, i64 1
- %tmp22529 = getelementptr inbounds float* %tmp22528, i64 1
- %tmp22530 = getelementptr inbounds float* %tmp22529, i64 1
- %tmp22531 = getelementptr inbounds float* %tmp22530, i64 1
- %tmp22532 = getelementptr inbounds float* %tmp22531, i64 1
- %tmp22533 = getelementptr inbounds float* %tmp22532, i64 1
- %tmp22534 = getelementptr inbounds float* %tmp22533, i64 1
- %tmp22535 = getelementptr inbounds float* %tmp22534, i64 1
- %tmp22536 = getelementptr inbounds float* %tmp22535, i64 1
- %tmp22537 = getelementptr inbounds float* %tmp22536, i64 1
- %tmp22538 = getelementptr inbounds float* %tmp22537, i64 1
- %tmp22539 = getelementptr inbounds float* %tmp22538, i64 1
- %tmp22540 = getelementptr inbounds float* %tmp22539, i64 1
- %tmp22541 = getelementptr inbounds float* %tmp22540, i64 1
- %tmp22542 = getelementptr inbounds float* %tmp22541, i64 1
- %tmp22543 = getelementptr inbounds float* %tmp22542, i64 1
- %tmp22544 = getelementptr inbounds float* %tmp22543, i64 1
- %tmp22545 = getelementptr inbounds float* %tmp22544, i64 1
- %tmp22546 = getelementptr inbounds float* %tmp22545, i64 1
- %tmp22547 = getelementptr inbounds float* %tmp22546, i64 1
- %tmp22548 = getelementptr inbounds float* %tmp22547, i64 1
- %tmp22549 = getelementptr inbounds float* %tmp22548, i64 1
- %tmp22550 = getelementptr inbounds float* %tmp22549, i64 1
- %tmp22551 = getelementptr inbounds float* %tmp22550, i64 1
- %tmp22552 = getelementptr inbounds float* %tmp22551, i64 1
- %tmp22553 = getelementptr inbounds float* %tmp22552, i64 1
- %tmp22554 = getelementptr inbounds float* %tmp22553, i64 1
- %tmp22555 = getelementptr inbounds float* %tmp22554, i64 1
- %tmp22556 = getelementptr inbounds float* %tmp22555, i64 1
- %tmp22557 = getelementptr inbounds float* %tmp22556, i64 1
- %tmp22558 = getelementptr inbounds float* %tmp22557, i64 1
- %tmp22559 = getelementptr inbounds float* %tmp22558, i64 1
- %tmp22560 = getelementptr inbounds float* %tmp22559, i64 1
- %tmp22561 = getelementptr inbounds float* %tmp22560, i64 1
- %tmp22562 = getelementptr inbounds float* %tmp22561, i64 1
- %tmp22563 = getelementptr inbounds float* %tmp22562, i64 1
- %tmp22564 = getelementptr inbounds float* %tmp22563, i64 1
- %tmp22565 = getelementptr inbounds float* %tmp22564, i64 1
- %tmp22566 = getelementptr inbounds float* %tmp22565, i64 1
- %tmp22567 = getelementptr inbounds float* %tmp22566, i64 1
- %tmp22568 = getelementptr inbounds float* %tmp22567, i64 1
- %tmp22569 = getelementptr inbounds float* %tmp22568, i64 1
- %tmp22570 = getelementptr inbounds float* %tmp22569, i64 1
- %tmp22571 = getelementptr inbounds float* %tmp22570, i64 1
- %tmp22572 = getelementptr inbounds float* %tmp22571, i64 1
- %tmp22573 = getelementptr inbounds float* %tmp22572, i64 1
- %tmp22574 = getelementptr inbounds float* %tmp22573, i64 1
- %tmp22575 = getelementptr inbounds float* %tmp22574, i64 1
- %tmp22576 = getelementptr inbounds float* %tmp22575, i64 1
- %tmp22577 = getelementptr inbounds float* %tmp22576, i64 1
- %tmp22578 = getelementptr inbounds float* %tmp22577, i64 1
- %tmp22579 = getelementptr inbounds float* %tmp22578, i64 1
- %tmp22580 = getelementptr inbounds float* %tmp22579, i64 1
- %tmp22581 = getelementptr inbounds float* %tmp22580, i64 1
- %tmp22582 = getelementptr inbounds float* %tmp22581, i64 1
- %tmp22583 = getelementptr inbounds float* %tmp22582, i64 1
- %tmp22584 = getelementptr inbounds float* %tmp22583, i64 1
- %tmp22585 = getelementptr inbounds float* %tmp22584, i64 1
- %tmp22586 = getelementptr inbounds float* %tmp22585, i64 1
- %tmp22587 = getelementptr inbounds float* %tmp22586, i64 1
- %tmp22588 = getelementptr inbounds float* %tmp22587, i64 1
- %tmp22589 = getelementptr inbounds float* %tmp22588, i64 1
- %tmp22590 = getelementptr inbounds float* %tmp22589, i64 1
- %tmp22591 = getelementptr inbounds float* %tmp22590, i64 1
- %tmp22592 = getelementptr inbounds float* %tmp22591, i64 1
- %tmp22593 = getelementptr inbounds float* %tmp22592, i64 1
- %tmp22594 = getelementptr inbounds float* %tmp22593, i64 1
- %tmp22595 = getelementptr inbounds float* %tmp22594, i64 1
- %tmp22596 = getelementptr inbounds float* %tmp22595, i64 1
- %tmp22597 = getelementptr inbounds float* %tmp22596, i64 1
- %tmp22598 = getelementptr inbounds float* %tmp22597, i64 1
- %tmp22599 = getelementptr inbounds float* %tmp22598, i64 1
- %tmp22600 = getelementptr inbounds float* %tmp22599, i64 1
- %tmp22601 = getelementptr inbounds float* %tmp22600, i64 1
- %tmp22602 = getelementptr inbounds float* %tmp22601, i64 1
- %tmp22603 = getelementptr inbounds float* %tmp22602, i64 1
- %tmp22604 = getelementptr inbounds float* %tmp22603, i64 1
- %tmp22605 = getelementptr inbounds float* %tmp22604, i64 1
- %tmp22606 = getelementptr inbounds float* %tmp22605, i64 1
- %tmp22607 = getelementptr inbounds float* %tmp22606, i64 1
- %tmp22608 = getelementptr inbounds float* %tmp22607, i64 1
- %tmp22609 = getelementptr inbounds float* %tmp22608, i64 1
- %tmp22610 = getelementptr inbounds float* %tmp22609, i64 1
- %tmp22611 = getelementptr inbounds float* %tmp22610, i64 1
- %tmp22612 = getelementptr inbounds float* %tmp22611, i64 1
- %tmp22613 = getelementptr inbounds float* %tmp22612, i64 1
- %tmp22614 = getelementptr inbounds float* %tmp22613, i64 1
- %tmp22615 = getelementptr inbounds float* %tmp22614, i64 1
- %tmp22616 = getelementptr inbounds float* %tmp22615, i64 1
- %tmp22617 = getelementptr inbounds float* %tmp22616, i64 1
- %tmp22618 = getelementptr inbounds float* %tmp22617, i64 1
- %tmp22619 = getelementptr inbounds float* %tmp22618, i64 1
- %tmp22620 = getelementptr inbounds float* %tmp22619, i64 1
- %tmp22621 = getelementptr inbounds float* %tmp22620, i64 1
- %tmp22622 = getelementptr inbounds float* %tmp22621, i64 1
- %tmp22623 = getelementptr inbounds float* %tmp22622, i64 1
- %tmp22624 = getelementptr inbounds float* %tmp22623, i64 1
- %tmp22625 = getelementptr inbounds float* %tmp22624, i64 1
- %tmp22626 = getelementptr inbounds float* %tmp22625, i64 1
- %tmp22627 = getelementptr inbounds float* %tmp22626, i64 1
- %tmp22628 = getelementptr inbounds float* %tmp22627, i64 1
- %tmp22629 = getelementptr inbounds float* %tmp22628, i64 1
- %tmp22630 = getelementptr inbounds float* %tmp22629, i64 1
- %tmp22631 = getelementptr inbounds float* %tmp22630, i64 1
- %tmp22632 = getelementptr inbounds float* %tmp22631, i64 1
- %tmp22633 = getelementptr inbounds float* %tmp22632, i64 1
- %tmp22634 = getelementptr inbounds float* %tmp22633, i64 1
- %tmp22635 = getelementptr inbounds float* %tmp22634, i64 1
- %tmp22636 = getelementptr inbounds float* %tmp22635, i64 1
- %tmp22637 = getelementptr inbounds float* %tmp22636, i64 1
- %tmp22638 = getelementptr inbounds float* %tmp22637, i64 1
- %tmp22639 = getelementptr inbounds float* %tmp22638, i64 1
- %tmp22640 = getelementptr inbounds float* %tmp22639, i64 1
- %tmp22641 = getelementptr inbounds float* %tmp22640, i64 1
- %tmp22642 = getelementptr inbounds float* %tmp22641, i64 1
- %tmp22643 = getelementptr inbounds float* %tmp22642, i64 1
- %tmp22644 = getelementptr inbounds float* %tmp22643, i64 1
- %tmp22645 = getelementptr inbounds float* %tmp22644, i64 1
- %tmp22646 = getelementptr inbounds float* %tmp22645, i64 1
- %tmp22647 = getelementptr inbounds float* %tmp22646, i64 1
- %tmp22648 = getelementptr inbounds float* %tmp22647, i64 1
- %tmp22649 = getelementptr inbounds float* %tmp22648, i64 1
- %tmp22650 = getelementptr inbounds float* %tmp22649, i64 1
- %tmp22651 = getelementptr inbounds float* %tmp22650, i64 1
- %tmp22652 = getelementptr inbounds float* %tmp22651, i64 1
- %tmp22653 = getelementptr inbounds float* %tmp22652, i64 1
- %tmp22654 = getelementptr inbounds float* %tmp22653, i64 1
- %tmp22655 = getelementptr inbounds float* %tmp22654, i64 1
- %tmp22656 = getelementptr inbounds float* %tmp22655, i64 1
- %tmp22657 = getelementptr inbounds float* %tmp22656, i64 1
- %tmp22658 = getelementptr inbounds float* %tmp22657, i64 1
- %tmp22659 = getelementptr inbounds float* %tmp22658, i64 1
- %tmp22660 = getelementptr inbounds float* %tmp22659, i64 1
- %tmp22661 = getelementptr inbounds float* %tmp22660, i64 1
- %tmp22662 = getelementptr inbounds float* %tmp22661, i64 1
- %tmp22663 = getelementptr inbounds float* %tmp22662, i64 1
- %tmp22664 = getelementptr inbounds float* %tmp22663, i64 1
- %tmp22665 = getelementptr inbounds float* %tmp22664, i64 1
- %tmp22666 = getelementptr inbounds float* %tmp22665, i64 1
- %tmp22667 = getelementptr inbounds float* %tmp22666, i64 1
- %tmp22668 = getelementptr inbounds float* %tmp22667, i64 1
- %tmp22669 = getelementptr inbounds float* %tmp22668, i64 1
- %tmp22670 = getelementptr inbounds float* %tmp22669, i64 1
- %tmp22671 = getelementptr inbounds float* %tmp22670, i64 1
- %tmp22672 = getelementptr inbounds float* %tmp22671, i64 1
- %tmp22673 = getelementptr inbounds float* %tmp22672, i64 1
- %tmp22674 = getelementptr inbounds float* %tmp22673, i64 1
- %tmp22675 = getelementptr inbounds float* %tmp22674, i64 1
- %tmp22676 = getelementptr inbounds float* %tmp22675, i64 1
- %tmp22677 = getelementptr inbounds float* %tmp22676, i64 1
- %tmp22678 = getelementptr inbounds float* %tmp22677, i64 1
- %tmp22679 = getelementptr inbounds float* %tmp22678, i64 1
- %tmp22680 = getelementptr inbounds float* %tmp22679, i64 1
- %tmp22681 = getelementptr inbounds float* %tmp22680, i64 1
- %tmp22682 = getelementptr inbounds float* %tmp22681, i64 1
- %tmp22683 = getelementptr inbounds float* %tmp22682, i64 1
- %tmp22684 = getelementptr inbounds float* %tmp22683, i64 1
- %tmp22685 = getelementptr inbounds float* %tmp22684, i64 1
- %tmp22686 = getelementptr inbounds float* %tmp22685, i64 1
- %tmp22687 = getelementptr inbounds float* %tmp22686, i64 1
- %tmp22688 = getelementptr inbounds float* %tmp22687, i64 1
- %tmp22689 = getelementptr inbounds float* %tmp22688, i64 1
- %tmp22690 = getelementptr inbounds float* %tmp22689, i64 1
- %tmp22691 = getelementptr inbounds float* %tmp22690, i64 1
- %tmp22692 = getelementptr inbounds float* %tmp22691, i64 1
- %tmp22693 = getelementptr inbounds float* %tmp22692, i64 1
- %tmp22694 = getelementptr inbounds float* %tmp22693, i64 1
- %tmp22695 = getelementptr inbounds float* %tmp22694, i64 1
- %tmp22696 = getelementptr inbounds float* %tmp22695, i64 1
- %tmp22697 = getelementptr inbounds float* %tmp22696, i64 1
- %tmp22698 = getelementptr inbounds float* %tmp22697, i64 1
- %tmp22699 = getelementptr inbounds float* %tmp22698, i64 1
- %tmp22700 = getelementptr inbounds float* %tmp22699, i64 1
- %tmp22701 = getelementptr inbounds float* %tmp22700, i64 1
- %tmp22702 = getelementptr inbounds float* %tmp22701, i64 1
- %tmp22703 = getelementptr inbounds float* %tmp22702, i64 1
- %tmp22704 = getelementptr inbounds float* %tmp22703, i64 1
- %tmp22705 = getelementptr inbounds float* %tmp22704, i64 1
- %tmp22706 = getelementptr inbounds float* %tmp22705, i64 1
- %tmp22707 = getelementptr inbounds float* %tmp22706, i64 1
- %tmp22708 = getelementptr inbounds float* %tmp22707, i64 1
- %tmp22709 = getelementptr inbounds float* %tmp22708, i64 1
- %tmp22710 = getelementptr inbounds float* %tmp22709, i64 1
- %tmp22711 = getelementptr inbounds float* %tmp22710, i64 1
- %tmp22712 = getelementptr inbounds float* %tmp22711, i64 1
- %tmp22713 = getelementptr inbounds float* %tmp22712, i64 1
- %tmp22714 = getelementptr inbounds float* %tmp22713, i64 1
- %tmp22715 = getelementptr inbounds float* %tmp22714, i64 1
- %tmp22716 = getelementptr inbounds float* %tmp22715, i64 1
- %tmp22717 = getelementptr inbounds float* %tmp22716, i64 1
- %tmp22718 = getelementptr inbounds float* %tmp22717, i64 1
- %tmp22719 = getelementptr inbounds float* %tmp22718, i64 1
- %tmp22720 = getelementptr inbounds float* %tmp22719, i64 1
- %tmp22721 = getelementptr inbounds float* %tmp22720, i64 1
- %tmp22722 = getelementptr inbounds float* %tmp22721, i64 1
- %tmp22723 = getelementptr inbounds float* %tmp22722, i64 1
- %tmp22724 = getelementptr inbounds float* %tmp22723, i64 1
- %tmp22725 = getelementptr inbounds float* %tmp22724, i64 1
- %tmp22726 = getelementptr inbounds float* %tmp22725, i64 1
- %tmp22727 = getelementptr inbounds float* %tmp22726, i64 1
- %tmp22728 = getelementptr inbounds float* %tmp22727, i64 1
- %tmp22729 = getelementptr inbounds float* %tmp22728, i64 1
- %tmp22730 = getelementptr inbounds float* %tmp22729, i64 1
- %tmp22731 = getelementptr inbounds float* %tmp22730, i64 1
- %tmp22732 = getelementptr inbounds float* %tmp22731, i64 1
- %tmp22733 = getelementptr inbounds float* %tmp22732, i64 1
- %tmp22734 = getelementptr inbounds float* %tmp22733, i64 1
- %tmp22735 = getelementptr inbounds float* %tmp22734, i64 1
- %tmp22736 = getelementptr inbounds float* %tmp22735, i64 1
- %tmp22737 = getelementptr inbounds float* %tmp22736, i64 1
- %tmp22738 = getelementptr inbounds float* %tmp22737, i64 1
- %tmp22739 = getelementptr inbounds float* %tmp22738, i64 1
- %tmp22740 = getelementptr inbounds float* %tmp22739, i64 1
- %tmp22741 = getelementptr inbounds float* %tmp22740, i64 1
- %tmp22742 = getelementptr inbounds float* %tmp22741, i64 1
- %tmp22743 = getelementptr inbounds float* %tmp22742, i64 1
- %tmp22744 = getelementptr inbounds float* %tmp22743, i64 1
- %tmp22745 = getelementptr inbounds float* %tmp22744, i64 1
- %tmp22746 = getelementptr inbounds float* %tmp22745, i64 1
- %tmp22747 = getelementptr inbounds float* %tmp22746, i64 1
- %tmp22748 = getelementptr inbounds float* %tmp22747, i64 1
- %tmp22749 = getelementptr inbounds float* %tmp22748, i64 1
- %tmp22750 = getelementptr inbounds float* %tmp22749, i64 1
- %tmp22751 = getelementptr inbounds float* %tmp22750, i64 1
- %tmp22752 = getelementptr inbounds float* %tmp22751, i64 1
- %tmp22753 = getelementptr inbounds float* %tmp22752, i64 1
- %tmp22754 = getelementptr inbounds float* %tmp22753, i64 1
- %tmp22755 = getelementptr inbounds float* %tmp22754, i64 1
- %tmp22756 = getelementptr inbounds float* %tmp22755, i64 1
- %tmp22757 = getelementptr inbounds float* %tmp22756, i64 1
- %tmp22758 = getelementptr inbounds float* %tmp22757, i64 1
- %tmp22759 = getelementptr inbounds float* %tmp22758, i64 1
- %tmp22760 = getelementptr inbounds float* %tmp22759, i64 1
- %tmp22761 = getelementptr inbounds float* %tmp22760, i64 1
- %tmp22762 = getelementptr inbounds float* %tmp22761, i64 1
- %tmp22763 = getelementptr inbounds float* %tmp22762, i64 1
- %tmp22764 = getelementptr inbounds float* %tmp22763, i64 1
- %tmp22765 = getelementptr inbounds float* %tmp22764, i64 1
- %tmp22766 = getelementptr inbounds float* %tmp22765, i64 1
- %tmp22767 = getelementptr inbounds float* %tmp22766, i64 1
- %tmp22768 = getelementptr inbounds float* %tmp22767, i64 1
- %tmp22769 = getelementptr inbounds float* %tmp22768, i64 1
- %tmp22770 = getelementptr inbounds float* %tmp22769, i64 1
- %tmp22771 = getelementptr inbounds float* %tmp22770, i64 1
- %tmp22772 = getelementptr inbounds float* %tmp22771, i64 1
- %tmp22773 = getelementptr inbounds float* %tmp22772, i64 1
- %tmp22774 = getelementptr inbounds float* %tmp22773, i64 1
- %tmp22775 = getelementptr inbounds float* %tmp22774, i64 1
- %tmp22776 = getelementptr inbounds float* %tmp22775, i64 1
- %tmp22777 = getelementptr inbounds float* %tmp22776, i64 1
- %tmp22778 = getelementptr inbounds float* %tmp22777, i64 1
- %tmp22779 = getelementptr inbounds float* %tmp22778, i64 1
- %tmp22780 = getelementptr inbounds float* %tmp22779, i64 1
- %tmp22781 = getelementptr inbounds float* %tmp22780, i64 1
- %tmp22782 = getelementptr inbounds float* %tmp22781, i64 1
- %tmp22783 = getelementptr inbounds float* %tmp22782, i64 1
- %tmp22784 = getelementptr inbounds float* %tmp22783, i64 1
- %tmp22785 = getelementptr inbounds float* %tmp22784, i64 1
- %tmp22786 = getelementptr inbounds float* %tmp22785, i64 1
- %tmp22787 = getelementptr inbounds float* %tmp22786, i64 1
- %tmp22788 = getelementptr inbounds float* %tmp22787, i64 1
- %tmp22789 = getelementptr inbounds float* %tmp22788, i64 1
- %tmp22790 = getelementptr inbounds float* %tmp22789, i64 1
- %tmp22791 = getelementptr inbounds float* %tmp22790, i64 1
- %tmp22792 = getelementptr inbounds float* %tmp22791, i64 1
- %tmp22793 = getelementptr inbounds float* %tmp22792, i64 1
- %tmp22794 = getelementptr inbounds float* %tmp22793, i64 1
- %tmp22795 = getelementptr inbounds float* %tmp22794, i64 1
- %tmp22796 = getelementptr inbounds float* %tmp22795, i64 1
- %tmp22797 = getelementptr inbounds float* %tmp22796, i64 1
- %tmp22798 = getelementptr inbounds float* %tmp22797, i64 1
- %tmp22799 = getelementptr inbounds float* %tmp22798, i64 1
- %tmp22800 = getelementptr inbounds float* %tmp22799, i64 1
- %tmp22801 = getelementptr inbounds float* %tmp22800, i64 1
- %tmp22802 = getelementptr inbounds float* %tmp22801, i64 1
- %tmp22803 = getelementptr inbounds float* %tmp22802, i64 1
- %tmp22804 = getelementptr inbounds float* %tmp22803, i64 1
- %tmp22805 = getelementptr inbounds float* %tmp22804, i64 1
- %tmp22806 = getelementptr inbounds float* %tmp22805, i64 1
- %tmp22807 = getelementptr inbounds float* %tmp22806, i64 1
- %tmp22808 = getelementptr inbounds float* %tmp22807, i64 1
- %tmp22809 = getelementptr inbounds float* %tmp22808, i64 1
- %tmp22810 = getelementptr inbounds float* %tmp22809, i64 1
- %tmp22811 = getelementptr inbounds float* %tmp22810, i64 1
- %tmp22812 = getelementptr inbounds float* %tmp22811, i64 1
- %tmp22813 = getelementptr inbounds float* %tmp22812, i64 1
- %tmp22814 = getelementptr inbounds float* %tmp22813, i64 1
- %tmp22815 = getelementptr inbounds float* %tmp22814, i64 1
- %tmp22816 = getelementptr inbounds float* %tmp22815, i64 1
- %tmp22817 = getelementptr inbounds float* %tmp22816, i64 1
- %tmp22818 = getelementptr inbounds float* %tmp22817, i64 1
- %tmp22819 = getelementptr inbounds float* %tmp22818, i64 1
- %tmp22820 = getelementptr inbounds float* %tmp22819, i64 1
- %tmp22821 = getelementptr inbounds float* %tmp22820, i64 1
- %tmp22822 = getelementptr inbounds float* %tmp22821, i64 1
- %tmp22823 = getelementptr inbounds float* %tmp22822, i64 1
- %tmp22824 = getelementptr inbounds float* %tmp22823, i64 1
- %tmp22825 = getelementptr inbounds float* %tmp22824, i64 1
- %tmp22826 = getelementptr inbounds float* %tmp22825, i64 1
- %tmp22827 = getelementptr inbounds float* %tmp22826, i64 1
- %tmp22828 = getelementptr inbounds float* %tmp22827, i64 1
- %tmp22829 = getelementptr inbounds float* %tmp22828, i64 1
- %tmp22830 = getelementptr inbounds float* %tmp22829, i64 1
- %tmp22831 = getelementptr inbounds float* %tmp22830, i64 1
- %tmp22832 = getelementptr inbounds float* %tmp22831, i64 1
- %tmp22833 = getelementptr inbounds float* %tmp22832, i64 1
- %tmp22834 = getelementptr inbounds float* %tmp22833, i64 1
- %tmp22835 = getelementptr inbounds float* %tmp22834, i64 1
- %tmp22836 = getelementptr inbounds float* %tmp22835, i64 1
- %tmp22837 = getelementptr inbounds float* %tmp22836, i64 1
- %tmp22838 = getelementptr inbounds float* %tmp22837, i64 1
- %tmp22839 = getelementptr inbounds float* %tmp22838, i64 1
- %tmp22840 = getelementptr inbounds float* %tmp22839, i64 1
- %tmp22841 = getelementptr inbounds float* %tmp22840, i64 1
- %tmp22842 = getelementptr inbounds float* %tmp22841, i64 1
- %tmp22843 = getelementptr inbounds float* %tmp22842, i64 1
- %tmp22844 = getelementptr inbounds float* %tmp22843, i64 1
- %tmp22845 = getelementptr inbounds float* %tmp22844, i64 1
- %tmp22846 = getelementptr inbounds float* %tmp22845, i64 1
- %tmp22847 = getelementptr inbounds float* %tmp22846, i64 1
- %tmp22848 = getelementptr inbounds float* %tmp22847, i64 1
- %tmp22849 = getelementptr inbounds float* %tmp22848, i64 1
- %tmp22850 = getelementptr inbounds float* %tmp22849, i64 1
- %tmp22851 = getelementptr inbounds float* %tmp22850, i64 1
- %tmp22852 = getelementptr inbounds float* %tmp22851, i64 1
- %tmp22853 = getelementptr inbounds float* %tmp22852, i64 1
- %tmp22854 = getelementptr inbounds float* %tmp22853, i64 1
- %tmp22855 = getelementptr inbounds float* %tmp22854, i64 1
- %tmp22856 = getelementptr inbounds float* %tmp22855, i64 1
- %tmp22857 = getelementptr inbounds float* %tmp22856, i64 1
- %tmp22858 = getelementptr inbounds float* %tmp22857, i64 1
- %tmp22859 = getelementptr inbounds float* %tmp22858, i64 1
- %tmp22860 = getelementptr inbounds float* %tmp22859, i64 1
- %tmp22861 = getelementptr inbounds float* %tmp22860, i64 1
- %tmp22862 = getelementptr inbounds float* %tmp22861, i64 1
- %tmp22863 = getelementptr inbounds float* %tmp22862, i64 1
- %tmp22864 = getelementptr inbounds float* %tmp22863, i64 1
- %tmp22865 = getelementptr inbounds float* %tmp22864, i64 1
- %tmp22866 = getelementptr inbounds float* %tmp22865, i64 1
- %tmp22867 = getelementptr inbounds float* %tmp22866, i64 1
- %tmp22868 = getelementptr inbounds float* %tmp22867, i64 1
- %tmp22869 = getelementptr inbounds float* %tmp22868, i64 1
- %tmp22870 = getelementptr inbounds float* %tmp22869, i64 1
- %tmp22871 = getelementptr inbounds float* %tmp22870, i64 1
- %tmp22872 = getelementptr inbounds float* %tmp22871, i64 1
- %tmp22873 = getelementptr inbounds float* %tmp22872, i64 1
- %tmp22874 = getelementptr inbounds float* %tmp22873, i64 1
- %tmp22875 = getelementptr inbounds float* %tmp22874, i64 1
- %tmp22876 = getelementptr inbounds float* %tmp22875, i64 1
- %tmp22877 = getelementptr inbounds float* %tmp22876, i64 1
- %tmp22878 = getelementptr inbounds float* %tmp22877, i64 1
- %tmp22879 = getelementptr inbounds float* %tmp22878, i64 1
- %tmp22880 = getelementptr inbounds float* %tmp22879, i64 1
- %tmp22881 = getelementptr inbounds float* %tmp22880, i64 1
- %tmp22882 = getelementptr inbounds float* %tmp22881, i64 1
- %tmp22883 = getelementptr inbounds float* %tmp22882, i64 1
- %tmp22884 = getelementptr inbounds float* %tmp22883, i64 1
- %tmp22885 = getelementptr inbounds float* %tmp22884, i64 1
- %tmp22886 = getelementptr inbounds float* %tmp22885, i64 1
- %tmp22887 = getelementptr inbounds float* %tmp22886, i64 1
- %tmp22888 = getelementptr inbounds float* %tmp22887, i64 1
- %tmp22889 = getelementptr inbounds float* %tmp22888, i64 1
- %tmp22890 = getelementptr inbounds float* %tmp22889, i64 1
- %tmp22891 = getelementptr inbounds float* %tmp22890, i64 1
- %tmp22892 = getelementptr inbounds float* %tmp22891, i64 1
- %tmp22893 = getelementptr inbounds float* %tmp22892, i64 1
- %tmp22894 = getelementptr inbounds float* %tmp22893, i64 1
- %tmp22895 = getelementptr inbounds float* %tmp22894, i64 1
- %tmp22896 = getelementptr inbounds float* %tmp22895, i64 1
- %tmp22897 = getelementptr inbounds float* %tmp22896, i64 1
- %tmp22898 = getelementptr inbounds float* %tmp22897, i64 1
- %tmp22899 = getelementptr inbounds float* %tmp22898, i64 1
- %tmp22900 = getelementptr inbounds float* %tmp22899, i64 1
- %tmp22901 = getelementptr inbounds float* %tmp22900, i64 1
- %tmp22902 = getelementptr inbounds float* %tmp22901, i64 1
- %tmp22903 = getelementptr inbounds float* %tmp22902, i64 1
- %tmp22904 = getelementptr inbounds float* %tmp22903, i64 1
- %tmp22905 = getelementptr inbounds float* %tmp22904, i64 1
- %tmp22906 = getelementptr inbounds float* %tmp22905, i64 1
- %tmp22907 = getelementptr inbounds float* %tmp22906, i64 1
- %tmp22908 = getelementptr inbounds float* %tmp22907, i64 1
- %tmp22909 = getelementptr inbounds float* %tmp22908, i64 1
- %tmp22910 = getelementptr inbounds float* %tmp22909, i64 1
- %tmp22911 = getelementptr inbounds float* %tmp22910, i64 1
- %tmp22912 = getelementptr inbounds float* %tmp22911, i64 1
- %tmp22913 = getelementptr inbounds float* %tmp22912, i64 1
- %tmp22914 = getelementptr inbounds float* %tmp22913, i64 1
- %tmp22915 = getelementptr inbounds float* %tmp22914, i64 1
- %tmp22916 = getelementptr inbounds float* %tmp22915, i64 1
- %tmp22917 = getelementptr inbounds float* %tmp22916, i64 1
- %tmp22918 = getelementptr inbounds float* %tmp22917, i64 1
- %tmp22919 = getelementptr inbounds float* %tmp22918, i64 1
- %tmp22920 = getelementptr inbounds float* %tmp22919, i64 1
- %tmp22921 = getelementptr inbounds float* %tmp22920, i64 1
- %tmp22922 = getelementptr inbounds float* %tmp22921, i64 1
- %tmp22923 = getelementptr inbounds float* %tmp22922, i64 1
- %tmp22924 = getelementptr inbounds float* %tmp22923, i64 1
- %tmp22925 = getelementptr inbounds float* %tmp22924, i64 1
- %tmp22926 = getelementptr inbounds float* %tmp22925, i64 1
- %tmp22927 = getelementptr inbounds float* %tmp22926, i64 1
- %tmp22928 = getelementptr inbounds float* %tmp22927, i64 1
- %tmp22929 = getelementptr inbounds float* %tmp22928, i64 1
- %tmp22930 = getelementptr inbounds float* %tmp22929, i64 1
- %tmp22931 = getelementptr inbounds float* %tmp22930, i64 1
- %tmp22932 = getelementptr inbounds float* %tmp22931, i64 1
- %tmp22933 = getelementptr inbounds float* %tmp22932, i64 1
- %tmp22934 = getelementptr inbounds float* %tmp22933, i64 1
- %tmp22935 = getelementptr inbounds float* %tmp22934, i64 1
- %tmp22936 = getelementptr inbounds float* %tmp22935, i64 1
- %tmp22937 = getelementptr inbounds float* %tmp22936, i64 1
- %tmp22938 = getelementptr inbounds float* %tmp22937, i64 1
- %tmp22939 = getelementptr inbounds float* %tmp22938, i64 1
- %tmp22940 = getelementptr inbounds float* %tmp22939, i64 1
- %tmp22941 = getelementptr inbounds float* %tmp22940, i64 1
- %tmp22942 = getelementptr inbounds float* %tmp22941, i64 1
- %tmp22943 = getelementptr inbounds float* %tmp22942, i64 1
- %tmp22944 = getelementptr inbounds float* %tmp22943, i64 1
- %tmp22945 = getelementptr inbounds float* %tmp22944, i64 1
- %tmp22946 = getelementptr inbounds float* %tmp22945, i64 1
- %tmp22947 = getelementptr inbounds float* %tmp22946, i64 1
- %tmp22948 = getelementptr inbounds float* %tmp22947, i64 1
- %tmp22949 = getelementptr inbounds float* %tmp22948, i64 1
- %tmp22950 = getelementptr inbounds float* %tmp22949, i64 1
- %tmp22951 = getelementptr inbounds float* %tmp22950, i64 1
- %tmp22952 = getelementptr inbounds float* %tmp22951, i64 1
- %tmp22953 = getelementptr inbounds float* %tmp22952, i64 1
- %tmp22954 = getelementptr inbounds float* %tmp22953, i64 1
- %tmp22955 = getelementptr inbounds float* %tmp22954, i64 1
- %tmp22956 = getelementptr inbounds float* %tmp22955, i64 1
- %tmp22957 = getelementptr inbounds float* %tmp22956, i64 1
- %tmp22958 = getelementptr inbounds float* %tmp22957, i64 1
- %tmp22959 = getelementptr inbounds float* %tmp22958, i64 1
- %tmp22960 = getelementptr inbounds float* %tmp22959, i64 1
- %tmp22961 = getelementptr inbounds float* %tmp22960, i64 1
- %tmp22962 = getelementptr inbounds float* %tmp22961, i64 1
- %tmp22963 = getelementptr inbounds float* %tmp22962, i64 1
- %tmp22964 = getelementptr inbounds float* %tmp22963, i64 1
- %tmp22965 = getelementptr inbounds float* %tmp22964, i64 1
- %tmp22966 = getelementptr inbounds float* %tmp22965, i64 1
- %tmp22967 = getelementptr inbounds float* %tmp22966, i64 1
- %tmp22968 = getelementptr inbounds float* %tmp22967, i64 1
- %tmp22969 = getelementptr inbounds float* %tmp22968, i64 1
- %tmp22970 = getelementptr inbounds float* %tmp22969, i64 1
- %tmp22971 = getelementptr inbounds float* %tmp22970, i64 1
- %tmp22972 = getelementptr inbounds float* %tmp22971, i64 1
- %tmp22973 = getelementptr inbounds float* %tmp22972, i64 1
- %tmp22974 = getelementptr inbounds float* %tmp22973, i64 1
- %tmp22975 = getelementptr inbounds float* %tmp22974, i64 1
- %tmp22976 = getelementptr inbounds float* %tmp22975, i64 1
- %tmp22977 = getelementptr inbounds float* %tmp22976, i64 1
- %tmp22978 = getelementptr inbounds float* %tmp22977, i64 1
- %tmp22979 = getelementptr inbounds float* %tmp22978, i64 1
- %tmp22980 = getelementptr inbounds float* %tmp22979, i64 1
- %tmp22981 = getelementptr inbounds float* %tmp22980, i64 1
- %tmp22982 = getelementptr inbounds float* %tmp22981, i64 1
- %tmp22983 = getelementptr inbounds float* %tmp22982, i64 1
- %tmp22984 = getelementptr inbounds float* %tmp22983, i64 1
- %tmp22985 = getelementptr inbounds float* %tmp22984, i64 1
- %tmp22986 = getelementptr inbounds float* %tmp22985, i64 1
- %tmp22987 = getelementptr inbounds float* %tmp22986, i64 1
- %tmp22988 = getelementptr inbounds float* %tmp22987, i64 1
- %tmp22989 = getelementptr inbounds float* %tmp22988, i64 1
- %tmp22990 = getelementptr inbounds float* %tmp22989, i64 1
- %tmp22991 = getelementptr inbounds float* %tmp22990, i64 1
- %tmp22992 = getelementptr inbounds float* %tmp22991, i64 1
- %tmp22993 = getelementptr inbounds float* %tmp22992, i64 1
- %tmp22994 = getelementptr inbounds float* %tmp22993, i64 1
- %tmp22995 = getelementptr inbounds float* %tmp22994, i64 1
- %tmp22996 = getelementptr inbounds float* %tmp22995, i64 1
- %tmp22997 = getelementptr inbounds float* %tmp22996, i64 1
- %tmp22998 = getelementptr inbounds float* %tmp22997, i64 1
- %tmp22999 = getelementptr inbounds float* %tmp22998, i64 1
- %tmp23000 = getelementptr inbounds float* %tmp22999, i64 1
- %tmp23001 = getelementptr inbounds float* %tmp23000, i64 1
- %tmp23002 = getelementptr inbounds float* %tmp23001, i64 1
- %tmp23003 = getelementptr inbounds float* %tmp23002, i64 1
- %tmp23004 = getelementptr inbounds float* %tmp23003, i64 1
- %tmp23005 = getelementptr inbounds float* %tmp23004, i64 1
- %tmp23006 = getelementptr inbounds float* %tmp23005, i64 1
- %tmp23007 = getelementptr inbounds float* %tmp23006, i64 1
- %tmp23008 = getelementptr inbounds float* %tmp23007, i64 1
- %tmp23009 = getelementptr inbounds float* %tmp23008, i64 1
- %tmp23010 = getelementptr inbounds float* %tmp23009, i64 1
- %tmp23011 = getelementptr inbounds float* %tmp23010, i64 1
- %tmp23012 = getelementptr inbounds float* %tmp23011, i64 1
- %tmp23013 = getelementptr inbounds float* %tmp23012, i64 1
- %tmp23014 = getelementptr inbounds float* %tmp23013, i64 1
- %tmp23015 = getelementptr inbounds float* %tmp23014, i64 1
- %tmp23016 = getelementptr inbounds float* %tmp23015, i64 1
- %tmp23017 = getelementptr inbounds float* %tmp23016, i64 1
- %tmp23018 = getelementptr inbounds float* %tmp23017, i64 1
- %tmp23019 = getelementptr inbounds float* %tmp23018, i64 1
- %tmp23020 = getelementptr inbounds float* %tmp23019, i64 1
- %tmp23021 = getelementptr inbounds float* %tmp23020, i64 1
- %tmp23022 = getelementptr inbounds float* %tmp23021, i64 1
- %tmp23023 = getelementptr inbounds float* %tmp23022, i64 1
- %tmp23024 = getelementptr inbounds float* %tmp23023, i64 1
- %tmp23025 = getelementptr inbounds float* %tmp23024, i64 1
- %tmp23026 = getelementptr inbounds float* %tmp23025, i64 1
- %tmp23027 = getelementptr inbounds float* %tmp23026, i64 1
- %tmp23028 = getelementptr inbounds float* %tmp23027, i64 1
- %tmp23029 = getelementptr inbounds float* %tmp23028, i64 1
- %tmp23030 = getelementptr inbounds float* %tmp23029, i64 1
- %tmp23031 = getelementptr inbounds float* %tmp23030, i64 1
- %tmp23032 = getelementptr inbounds float* %tmp23031, i64 1
- %tmp23033 = getelementptr inbounds float* %tmp23032, i64 1
- %tmp23034 = getelementptr inbounds float* %tmp23033, i64 1
- %tmp23035 = getelementptr inbounds float* %tmp23034, i64 1
- %tmp23036 = getelementptr inbounds float* %tmp23035, i64 1
- %tmp23037 = getelementptr inbounds float* %tmp23036, i64 1
- %tmp23038 = getelementptr inbounds float* %tmp23037, i64 1
- %tmp23039 = getelementptr inbounds float* %tmp23038, i64 1
- %tmp23040 = getelementptr inbounds float* %tmp23039, i64 1
- %tmp23041 = getelementptr inbounds float* %tmp23040, i64 1
- %tmp23042 = getelementptr inbounds float* %tmp23041, i64 1
- %tmp23043 = getelementptr inbounds float* %tmp23042, i64 1
- %tmp23044 = getelementptr inbounds float* %tmp23043, i64 1
- %tmp23045 = getelementptr inbounds float* %tmp23044, i64 1
- %tmp23046 = getelementptr inbounds float* %tmp23045, i64 1
- %tmp23047 = getelementptr inbounds float* %tmp23046, i64 1
- %tmp23048 = getelementptr inbounds float* %tmp23047, i64 1
- %tmp23049 = getelementptr inbounds float* %tmp23048, i64 1
- %tmp23050 = getelementptr inbounds float* %tmp23049, i64 1
- %tmp23051 = getelementptr inbounds float* %tmp23050, i64 1
- %tmp23052 = getelementptr inbounds float* %tmp23051, i64 1
- %tmp23053 = getelementptr inbounds float* %tmp23052, i64 1
- %tmp23054 = getelementptr inbounds float* %tmp23053, i64 1
- %tmp23055 = getelementptr inbounds float* %tmp23054, i64 1
- %tmp23056 = getelementptr inbounds float* %tmp23055, i64 1
- %tmp23057 = getelementptr inbounds float* %tmp23056, i64 1
- %tmp23058 = getelementptr inbounds float* %tmp23057, i64 1
- %tmp23059 = getelementptr inbounds float* %tmp23058, i64 1
- %tmp23060 = getelementptr inbounds float* %tmp23059, i64 1
- %tmp23061 = getelementptr inbounds float* %tmp23060, i64 1
- %tmp23062 = getelementptr inbounds float* %tmp23061, i64 1
- %tmp23063 = getelementptr inbounds float* %tmp23062, i64 1
- %tmp23064 = getelementptr inbounds float* %tmp23063, i64 1
- %tmp23065 = getelementptr inbounds float* %tmp23064, i64 1
- %tmp23066 = getelementptr inbounds float* %tmp23065, i64 1
- %tmp23067 = getelementptr inbounds float* %tmp23066, i64 1
- %tmp23068 = getelementptr inbounds float* %tmp23067, i64 1
- %tmp23069 = getelementptr inbounds float* %tmp23068, i64 1
- %tmp23070 = getelementptr inbounds float* %tmp23069, i64 1
- %tmp23071 = getelementptr inbounds float* %tmp23070, i64 1
- %tmp23072 = getelementptr inbounds float* %tmp23071, i64 1
- %tmp23073 = getelementptr inbounds float* %tmp23072, i64 1
- %tmp23074 = getelementptr inbounds float* %tmp23073, i64 1
- %tmp23075 = getelementptr inbounds float* %tmp23074, i64 1
- %tmp23076 = getelementptr inbounds float* %tmp23075, i64 1
- %tmp23077 = getelementptr inbounds float* %tmp23076, i64 1
- %tmp23078 = getelementptr inbounds float* %tmp23077, i64 1
- %tmp23079 = getelementptr inbounds float* %tmp23078, i64 1
- %tmp23080 = getelementptr inbounds float* %tmp23079, i64 1
- %tmp23081 = getelementptr inbounds float* %tmp23080, i64 1
- %tmp23082 = getelementptr inbounds float* %tmp23081, i64 1
- %tmp23083 = getelementptr inbounds float* %tmp23082, i64 1
- %tmp23084 = getelementptr inbounds float* %tmp23083, i64 1
- %tmp23085 = getelementptr inbounds float* %tmp23084, i64 1
- %tmp23086 = getelementptr inbounds float* %tmp23085, i64 1
- %tmp23087 = getelementptr inbounds float* %tmp23086, i64 1
- %tmp23088 = getelementptr inbounds float* %tmp23087, i64 1
- %tmp23089 = getelementptr inbounds float* %tmp23088, i64 1
- %tmp23090 = getelementptr inbounds float* %tmp23089, i64 1
- %tmp23091 = getelementptr inbounds float* %tmp23090, i64 1
- %tmp23092 = getelementptr inbounds float* %tmp23091, i64 1
- %tmp23093 = getelementptr inbounds float* %tmp23092, i64 1
- %tmp23094 = getelementptr inbounds float* %tmp23093, i64 1
- %tmp23095 = getelementptr inbounds float* %tmp23094, i64 1
- %tmp23096 = getelementptr inbounds float* %tmp23095, i64 1
- %tmp23097 = getelementptr inbounds float* %tmp23096, i64 1
- %tmp23098 = getelementptr inbounds float* %tmp23097, i64 1
- %tmp23099 = getelementptr inbounds float* %tmp23098, i64 1
- %tmp23100 = getelementptr inbounds float* %tmp23099, i64 1
- %tmp23101 = getelementptr inbounds float* %tmp23100, i64 1
- %tmp23102 = getelementptr inbounds float* %tmp23101, i64 1
- %tmp23103 = getelementptr inbounds float* %tmp23102, i64 1
- %tmp23104 = getelementptr inbounds float* %tmp23103, i64 1
- %tmp23105 = getelementptr inbounds float* %tmp23104, i64 1
- %tmp23106 = getelementptr inbounds float* %tmp23105, i64 1
- %tmp23107 = getelementptr inbounds float* %tmp23106, i64 1
- %tmp23108 = getelementptr inbounds float* %tmp23107, i64 1
- %tmp23109 = getelementptr inbounds float* %tmp23108, i64 1
- %tmp23110 = getelementptr inbounds float* %tmp23109, i64 1
- %tmp23111 = getelementptr inbounds float* %tmp23110, i64 1
- %tmp23112 = getelementptr inbounds float* %tmp23111, i64 1
- %tmp23113 = getelementptr inbounds float* %tmp23112, i64 1
- %tmp23114 = getelementptr inbounds float* %tmp23113, i64 1
- %tmp23115 = getelementptr inbounds float* %tmp23114, i64 1
- %tmp23116 = getelementptr inbounds float* %tmp23115, i64 1
- %tmp23117 = getelementptr inbounds float* %tmp23116, i64 1
- %tmp23118 = getelementptr inbounds float* %tmp23117, i64 1
- %tmp23119 = getelementptr inbounds float* %tmp23118, i64 1
- %tmp23120 = getelementptr inbounds float* %tmp23119, i64 1
- %tmp23121 = getelementptr inbounds float* %tmp23120, i64 1
- %tmp23122 = getelementptr inbounds float* %tmp23121, i64 1
- %tmp23123 = getelementptr inbounds float* %tmp23122, i64 1
- %tmp23124 = getelementptr inbounds float* %tmp23123, i64 1
- %tmp23125 = getelementptr inbounds float* %tmp23124, i64 1
- %tmp23126 = getelementptr inbounds float* %tmp23125, i64 1
- %tmp23127 = getelementptr inbounds float* %tmp23126, i64 1
- %tmp23128 = getelementptr inbounds float* %tmp23127, i64 1
- %tmp23129 = getelementptr inbounds float* %tmp23128, i64 1
- %tmp23130 = getelementptr inbounds float* %tmp23129, i64 1
- %tmp23131 = getelementptr inbounds float* %tmp23130, i64 1
- %tmp23132 = getelementptr inbounds float* %tmp23131, i64 1
- %tmp23133 = getelementptr inbounds float* %tmp23132, i64 1
- %tmp23134 = getelementptr inbounds float* %tmp23133, i64 1
- %tmp23135 = getelementptr inbounds float* %tmp23134, i64 1
- %tmp23136 = getelementptr inbounds float* %tmp23135, i64 1
- %tmp23137 = getelementptr inbounds float* %tmp23136, i64 1
- %tmp23138 = getelementptr inbounds float* %tmp23137, i64 1
- %tmp23139 = getelementptr inbounds float* %tmp23138, i64 1
- %tmp23140 = getelementptr inbounds float* %tmp23139, i64 1
- %tmp23141 = getelementptr inbounds float* %tmp23140, i64 1
- %tmp23142 = getelementptr inbounds float* %tmp23141, i64 1
- %tmp23143 = getelementptr inbounds float* %tmp23142, i64 1
- %tmp23144 = getelementptr inbounds float* %tmp23143, i64 1
- %tmp23145 = getelementptr inbounds float* %tmp23144, i64 1
- %tmp23146 = getelementptr inbounds float* %tmp23145, i64 1
- %tmp23147 = getelementptr inbounds float* %tmp23146, i64 1
- %tmp23148 = getelementptr inbounds float* %tmp23147, i64 1
- %tmp23149 = getelementptr inbounds float* %tmp23148, i64 1
- %tmp23150 = getelementptr inbounds float* %tmp23149, i64 1
- %tmp23151 = getelementptr inbounds float* %tmp23150, i64 1
- %tmp23152 = getelementptr inbounds float* %tmp23151, i64 1
- %tmp23153 = getelementptr inbounds float* %tmp23152, i64 1
- %tmp23154 = getelementptr inbounds float* %tmp23153, i64 1
- %tmp23155 = getelementptr inbounds float* %tmp23154, i64 1
- %tmp23156 = getelementptr inbounds float* %tmp23155, i64 1
- %tmp23157 = getelementptr inbounds float* %tmp23156, i64 1
- %tmp23158 = getelementptr inbounds float* %tmp23157, i64 1
- %tmp23159 = getelementptr inbounds float* %tmp23158, i64 1
- %tmp23160 = getelementptr inbounds float* %tmp23159, i64 1
- %tmp23161 = getelementptr inbounds float* %tmp23160, i64 1
- %tmp23162 = getelementptr inbounds float* %tmp23161, i64 1
- %tmp23163 = getelementptr inbounds float* %tmp23162, i64 1
- %tmp23164 = getelementptr inbounds float* %tmp23163, i64 1
- %tmp23165 = getelementptr inbounds float* %tmp23164, i64 1
- %tmp23166 = getelementptr inbounds float* %tmp23165, i64 1
- %tmp23167 = getelementptr inbounds float* %tmp23166, i64 1
- %tmp23168 = getelementptr inbounds float* %tmp23167, i64 1
- %tmp23169 = getelementptr inbounds float* %tmp23168, i64 1
- %tmp23170 = getelementptr inbounds float* %tmp23169, i64 1
- %tmp23171 = getelementptr inbounds float* %tmp23170, i64 1
- %tmp23172 = getelementptr inbounds float* %tmp23171, i64 1
- %tmp23173 = getelementptr inbounds float* %tmp23172, i64 1
- %tmp23174 = getelementptr inbounds float* %tmp23173, i64 1
- %tmp23175 = getelementptr inbounds float* %tmp23174, i64 1
- %tmp23176 = getelementptr inbounds float* %tmp23175, i64 1
- %tmp23177 = getelementptr inbounds float* %tmp23176, i64 1
- %tmp23178 = getelementptr inbounds float* %tmp23177, i64 1
- %tmp23179 = getelementptr inbounds float* %tmp23178, i64 1
- %tmp23180 = getelementptr inbounds float* %tmp23179, i64 1
- %tmp23181 = getelementptr inbounds float* %tmp23180, i64 1
- %tmp23182 = getelementptr inbounds float* %tmp23181, i64 1
- %tmp23183 = getelementptr inbounds float* %tmp23182, i64 1
- %tmp23184 = getelementptr inbounds float* %tmp23183, i64 1
- %tmp23185 = getelementptr inbounds float* %tmp23184, i64 1
- %tmp23186 = getelementptr inbounds float* %tmp23185, i64 1
- %tmp23187 = getelementptr inbounds float* %tmp23186, i64 1
- %tmp23188 = getelementptr inbounds float* %tmp23187, i64 1
- %tmp23189 = getelementptr inbounds float* %tmp23188, i64 1
- %tmp23190 = getelementptr inbounds float* %tmp23189, i64 1
- %tmp23191 = getelementptr inbounds float* %tmp23190, i64 1
- %tmp23192 = getelementptr inbounds float* %tmp23191, i64 1
- %tmp23193 = getelementptr inbounds float* %tmp23192, i64 1
- %tmp23194 = getelementptr inbounds float* %tmp23193, i64 1
- %tmp23195 = getelementptr inbounds float* %tmp23194, i64 1
- %tmp23196 = getelementptr inbounds float* %tmp23195, i64 1
- %tmp23197 = getelementptr inbounds float* %tmp23196, i64 1
- %tmp23198 = getelementptr inbounds float* %tmp23197, i64 1
- %tmp23199 = getelementptr inbounds float* %tmp23198, i64 1
- %tmp23200 = getelementptr inbounds float* %tmp23199, i64 1
- %tmp23201 = getelementptr inbounds float* %tmp23200, i64 1
- %tmp23202 = getelementptr inbounds float* %tmp23201, i64 1
- %tmp23203 = getelementptr inbounds float* %tmp23202, i64 1
- %tmp23204 = getelementptr inbounds float* %tmp23203, i64 1
- %tmp23205 = getelementptr inbounds float* %tmp23204, i64 1
- %tmp23206 = getelementptr inbounds float* %tmp23205, i64 1
- %tmp23207 = getelementptr inbounds float* %tmp23206, i64 1
- %tmp23208 = getelementptr inbounds float* %tmp23207, i64 1
- %tmp23209 = getelementptr inbounds float* %tmp23208, i64 1
- %tmp23210 = getelementptr inbounds float* %tmp23209, i64 1
- %tmp23211 = getelementptr inbounds float* %tmp23210, i64 1
- %tmp23212 = getelementptr inbounds float* %tmp23211, i64 1
- %tmp23213 = getelementptr inbounds float* %tmp23212, i64 1
- %tmp23214 = getelementptr inbounds float* %tmp23213, i64 1
- %tmp23215 = getelementptr inbounds float* %tmp23214, i64 1
- %tmp23216 = getelementptr inbounds float* %tmp23215, i64 1
- %tmp23217 = getelementptr inbounds float* %tmp23216, i64 1
- %tmp23218 = getelementptr inbounds float* %tmp23217, i64 1
- %tmp23219 = getelementptr inbounds float* %tmp23218, i64 1
- %tmp23220 = getelementptr inbounds float* %tmp23219, i64 1
- %tmp23221 = getelementptr inbounds float* %tmp23220, i64 1
- %tmp23222 = getelementptr inbounds float* %tmp23221, i64 1
- %tmp23223 = getelementptr inbounds float* %tmp23222, i64 1
- %tmp23224 = getelementptr inbounds float* %tmp23223, i64 1
- %tmp23225 = getelementptr inbounds float* %tmp23224, i64 1
- %tmp23226 = getelementptr inbounds float* %tmp23225, i64 1
- %tmp23227 = getelementptr inbounds float* %tmp23226, i64 1
- %tmp23228 = getelementptr inbounds float* %tmp23227, i64 1
- %tmp23229 = getelementptr inbounds float* %tmp23228, i64 1
- %tmp23230 = getelementptr inbounds float* %tmp23229, i64 1
- %tmp23231 = getelementptr inbounds float* %tmp23230, i64 1
- %tmp23232 = getelementptr inbounds float* %tmp23231, i64 1
- %tmp23233 = getelementptr inbounds float* %tmp23232, i64 1
- %tmp23234 = getelementptr inbounds float* %tmp23233, i64 1
- %tmp23235 = getelementptr inbounds float* %tmp23234, i64 1
- %tmp23236 = getelementptr inbounds float* %tmp23235, i64 1
- %tmp23237 = getelementptr inbounds float* %tmp23236, i64 1
- %tmp23238 = getelementptr inbounds float* %tmp23237, i64 1
- %tmp23239 = getelementptr inbounds float* %tmp23238, i64 1
- %tmp23240 = getelementptr inbounds float* %tmp23239, i64 1
- %tmp23241 = getelementptr inbounds float* %tmp23240, i64 1
- %tmp23242 = getelementptr inbounds float* %tmp23241, i64 1
- %tmp23243 = getelementptr inbounds float* %tmp23242, i64 1
- %tmp23244 = getelementptr inbounds float* %tmp23243, i64 1
- %tmp23245 = getelementptr inbounds float* %tmp23244, i64 1
- %tmp23246 = getelementptr inbounds float* %tmp23245, i64 1
- %tmp23247 = getelementptr inbounds float* %tmp23246, i64 1
- %tmp23248 = getelementptr inbounds float* %tmp23247, i64 1
- %tmp23249 = getelementptr inbounds float* %tmp23248, i64 1
- %tmp23250 = getelementptr inbounds float* %tmp23249, i64 1
- %tmp23251 = getelementptr inbounds float* %tmp23250, i64 1
- %tmp23252 = getelementptr inbounds float* %tmp23251, i64 1
- %tmp23253 = getelementptr inbounds float* %tmp23252, i64 1
- %tmp23254 = getelementptr inbounds float* %tmp23253, i64 1
- %tmp23255 = getelementptr inbounds float* %tmp23254, i64 1
- %tmp23256 = getelementptr inbounds float* %tmp23255, i64 1
- %tmp23257 = getelementptr inbounds float* %tmp23256, i64 1
- %tmp23258 = getelementptr inbounds float* %tmp23257, i64 1
- %tmp23259 = getelementptr inbounds float* %tmp23258, i64 1
- %tmp23260 = getelementptr inbounds float* %tmp23259, i64 1
- %tmp23261 = getelementptr inbounds float* %tmp23260, i64 1
- %tmp23262 = getelementptr inbounds float* %tmp23261, i64 1
- %tmp23263 = getelementptr inbounds float* %tmp23262, i64 1
- %tmp23264 = getelementptr inbounds float* %tmp23263, i64 1
- %tmp23265 = getelementptr inbounds float* %tmp23264, i64 1
- %tmp23266 = getelementptr inbounds float* %tmp23265, i64 1
- %tmp23267 = getelementptr inbounds float* %tmp23266, i64 1
- %tmp23268 = getelementptr inbounds float* %tmp23267, i64 1
- %tmp23269 = getelementptr inbounds float* %tmp23268, i64 1
- %tmp23270 = getelementptr inbounds float* %tmp23269, i64 1
- %tmp23271 = getelementptr inbounds float* %tmp23270, i64 1
- %tmp23272 = getelementptr inbounds float* %tmp23271, i64 1
- %tmp23273 = getelementptr inbounds float* %tmp23272, i64 1
- %tmp23274 = getelementptr inbounds float* %tmp23273, i64 1
- %tmp23275 = getelementptr inbounds float* %tmp23274, i64 1
- %tmp23276 = getelementptr inbounds float* %tmp23275, i64 1
- %tmp23277 = getelementptr inbounds float* %tmp23276, i64 1
- %tmp23278 = getelementptr inbounds float* %tmp23277, i64 1
- %tmp23279 = getelementptr inbounds float* %tmp23278, i64 1
- %tmp23280 = getelementptr inbounds float* %tmp23279, i64 1
- %tmp23281 = getelementptr inbounds float* %tmp23280, i64 1
- %tmp23282 = getelementptr inbounds float* %tmp23281, i64 1
- %tmp23283 = getelementptr inbounds float* %tmp23282, i64 1
- %tmp23284 = getelementptr inbounds float* %tmp23283, i64 1
- %tmp23285 = getelementptr inbounds float* %tmp23284, i64 1
- %tmp23286 = getelementptr inbounds float* %tmp23285, i64 1
- %tmp23287 = getelementptr inbounds float* %tmp23286, i64 1
- %tmp23288 = getelementptr inbounds float* %tmp23287, i64 1
- %tmp23289 = getelementptr inbounds float* %tmp23288, i64 1
- %tmp23290 = getelementptr inbounds float* %tmp23289, i64 1
- %tmp23291 = getelementptr inbounds float* %tmp23290, i64 1
- %tmp23292 = getelementptr inbounds float* %tmp23291, i64 1
- %tmp23293 = getelementptr inbounds float* %tmp23292, i64 1
- %tmp23294 = getelementptr inbounds float* %tmp23293, i64 1
- %tmp23295 = getelementptr inbounds float* %tmp23294, i64 1
- %tmp23296 = getelementptr inbounds float* %tmp23295, i64 1
- %tmp23297 = getelementptr inbounds float* %tmp23296, i64 1
- %tmp23298 = getelementptr inbounds float* %tmp23297, i64 1
- %tmp23299 = getelementptr inbounds float* %tmp23298, i64 1
- %tmp23300 = getelementptr inbounds float* %tmp23299, i64 1
- %tmp23301 = getelementptr inbounds float* %tmp23300, i64 1
- %tmp23302 = getelementptr inbounds float* %tmp23301, i64 1
- %tmp23303 = getelementptr inbounds float* %tmp23302, i64 1
- %tmp23304 = getelementptr inbounds float* %tmp23303, i64 1
- %tmp23305 = getelementptr inbounds float* %tmp23304, i64 1
- %tmp23306 = getelementptr inbounds float* %tmp23305, i64 1
- %tmp23307 = getelementptr inbounds float* %tmp23306, i64 1
- %tmp23308 = getelementptr inbounds float* %tmp23307, i64 1
- %tmp23309 = getelementptr inbounds float* %tmp23308, i64 1
- %tmp23310 = getelementptr inbounds float* %tmp23309, i64 1
- %tmp23311 = getelementptr inbounds float* %tmp23310, i64 1
- %tmp23312 = getelementptr inbounds float* %tmp23311, i64 1
- %tmp23313 = getelementptr inbounds float* %tmp23312, i64 1
- %tmp23314 = getelementptr inbounds float* %tmp23313, i64 1
- %tmp23315 = getelementptr inbounds float* %tmp23314, i64 1
- %tmp23316 = getelementptr inbounds float* %tmp23315, i64 1
- %tmp23317 = getelementptr inbounds float* %tmp23316, i64 1
- %tmp23318 = getelementptr inbounds float* %tmp23317, i64 1
- %tmp23319 = getelementptr inbounds float* %tmp23318, i64 1
- %tmp23320 = getelementptr inbounds float* %tmp23319, i64 1
- %tmp23321 = getelementptr inbounds float* %tmp23320, i64 1
- %tmp23322 = getelementptr inbounds float* %tmp23321, i64 1
- %tmp23323 = getelementptr inbounds float* %tmp23322, i64 1
- %tmp23324 = getelementptr inbounds float* %tmp23323, i64 1
- %tmp23325 = getelementptr inbounds float* %tmp23324, i64 1
- %tmp23326 = getelementptr inbounds float* %tmp23325, i64 1
- %tmp23327 = getelementptr inbounds float* %tmp23326, i64 1
- %tmp23328 = getelementptr inbounds float* %tmp23327, i64 1
- %tmp23329 = getelementptr inbounds float* %tmp23328, i64 1
- %tmp23330 = getelementptr inbounds float* %tmp23329, i64 1
- %tmp23331 = getelementptr inbounds float* %tmp23330, i64 1
- %tmp23332 = getelementptr inbounds float* %tmp23331, i64 1
- %tmp23333 = getelementptr inbounds float* %tmp23332, i64 1
- %tmp23334 = getelementptr inbounds float* %tmp23333, i64 1
- %tmp23335 = getelementptr inbounds float* %tmp23334, i64 1
- %tmp23336 = getelementptr inbounds float* %tmp23335, i64 1
- %tmp23337 = getelementptr inbounds float* %tmp23336, i64 1
- %tmp23338 = getelementptr inbounds float* %tmp23337, i64 1
- %tmp23339 = getelementptr inbounds float* %tmp23338, i64 1
- %tmp23340 = getelementptr inbounds float* %tmp23339, i64 1
- %tmp23341 = getelementptr inbounds float* %tmp23340, i64 1
- %tmp23342 = getelementptr inbounds float* %tmp23341, i64 1
- %tmp23343 = getelementptr inbounds float* %tmp23342, i64 1
- %tmp23344 = getelementptr inbounds float* %tmp23343, i64 1
- %tmp23345 = getelementptr inbounds float* %tmp23344, i64 1
- %tmp23346 = getelementptr inbounds float* %tmp23345, i64 1
- %tmp23347 = getelementptr inbounds float* %tmp23346, i64 1
- %tmp23348 = getelementptr inbounds float* %tmp23347, i64 1
- %tmp23349 = getelementptr inbounds float* %tmp23348, i64 1
- %tmp23350 = getelementptr inbounds float* %tmp23349, i64 1
- %tmp23351 = getelementptr inbounds float* %tmp23350, i64 1
- %tmp23352 = getelementptr inbounds float* %tmp23351, i64 1
- %tmp23353 = getelementptr inbounds float* %tmp23352, i64 1
- %tmp23354 = getelementptr inbounds float* %tmp23353, i64 1
- %tmp23355 = getelementptr inbounds float* %tmp23354, i64 1
- %tmp23356 = getelementptr inbounds float* %tmp23355, i64 1
- %tmp23357 = getelementptr inbounds float* %tmp23356, i64 1
- %tmp23358 = getelementptr inbounds float* %tmp23357, i64 1
- %tmp23359 = getelementptr inbounds float* %tmp23358, i64 1
- %tmp23360 = getelementptr inbounds float* %tmp23359, i64 1
- %tmp23361 = getelementptr inbounds float* %tmp23360, i64 1
- %tmp23362 = getelementptr inbounds float* %tmp23361, i64 1
- %tmp23363 = getelementptr inbounds float* %tmp23362, i64 1
- %tmp23364 = getelementptr inbounds float* %tmp23363, i64 1
- %tmp23365 = getelementptr inbounds float* %tmp23364, i64 1
- %tmp23366 = getelementptr inbounds float* %tmp23365, i64 1
- %tmp23367 = getelementptr inbounds float* %tmp23366, i64 1
- %tmp23368 = getelementptr inbounds float* %tmp23367, i64 1
- %tmp23369 = getelementptr inbounds float* %tmp23368, i64 1
- %tmp23370 = getelementptr inbounds float* %tmp23369, i64 1
- %tmp23371 = getelementptr inbounds float* %tmp23370, i64 1
- %tmp23372 = getelementptr inbounds float* %tmp23371, i64 1
- %tmp23373 = getelementptr inbounds float* %tmp23372, i64 1
- %tmp23374 = getelementptr inbounds float* %tmp23373, i64 1
- %tmp23375 = getelementptr inbounds float* %tmp23374, i64 1
- %tmp23376 = getelementptr inbounds float* %tmp23375, i64 1
- %tmp23377 = getelementptr inbounds float* %tmp23376, i64 1
- %tmp23378 = getelementptr inbounds float* %tmp23377, i64 1
- %tmp23379 = getelementptr inbounds float* %tmp23378, i64 1
- %tmp23380 = getelementptr inbounds float* %tmp23379, i64 1
- %tmp23381 = getelementptr inbounds float* %tmp23380, i64 1
- %tmp23382 = getelementptr inbounds float* %tmp23381, i64 1
- %tmp23383 = getelementptr inbounds float* %tmp23382, i64 1
- %tmp23384 = getelementptr inbounds float* %tmp23383, i64 1
- %tmp23385 = getelementptr inbounds float* %tmp23384, i64 1
- %tmp23386 = getelementptr inbounds float* %tmp23385, i64 1
- %tmp23387 = getelementptr inbounds float* %tmp23386, i64 1
- %tmp23388 = getelementptr inbounds float* %tmp23387, i64 1
- %tmp23389 = getelementptr inbounds float* %tmp23388, i64 1
- %tmp23390 = getelementptr inbounds float* %tmp23389, i64 1
- %tmp23391 = getelementptr inbounds float* %tmp23390, i64 1
- %tmp23392 = getelementptr inbounds float* %tmp23391, i64 1
- %tmp23393 = getelementptr inbounds float* %tmp23392, i64 1
- %tmp23394 = getelementptr inbounds float* %tmp23393, i64 1
- %tmp23395 = getelementptr inbounds float* %tmp23394, i64 1
- %tmp23396 = getelementptr inbounds float* %tmp23395, i64 1
- %tmp23397 = getelementptr inbounds float* %tmp23396, i64 1
- %tmp23398 = getelementptr inbounds float* %tmp23397, i64 1
- %tmp23399 = getelementptr inbounds float* %tmp23398, i64 1
- %tmp23400 = getelementptr inbounds float* %tmp23399, i64 1
- %tmp23401 = getelementptr inbounds float* %tmp23400, i64 1
- %tmp23402 = getelementptr inbounds float* %tmp23401, i64 1
- %tmp23403 = getelementptr inbounds float* %tmp23402, i64 1
- %tmp23404 = getelementptr inbounds float* %tmp23403, i64 1
- %tmp23405 = getelementptr inbounds float* %tmp23404, i64 1
- %tmp23406 = getelementptr inbounds float* %tmp23405, i64 1
- %tmp23407 = getelementptr inbounds float* %tmp23406, i64 1
- %tmp23408 = getelementptr inbounds float* %tmp23407, i64 1
- %tmp23409 = getelementptr inbounds float* %tmp23408, i64 1
- %tmp23410 = getelementptr inbounds float* %tmp23409, i64 1
- %tmp23411 = getelementptr inbounds float* %tmp23410, i64 1
- %tmp23412 = getelementptr inbounds float* %tmp23411, i64 1
- %tmp23413 = getelementptr inbounds float* %tmp23412, i64 1
- %tmp23414 = getelementptr inbounds float* %tmp23413, i64 1
- %tmp23415 = getelementptr inbounds float* %tmp23414, i64 1
- %tmp23416 = getelementptr inbounds float* %tmp23415, i64 1
- %tmp23417 = getelementptr inbounds float* %tmp23416, i64 1
- %tmp23418 = getelementptr inbounds float* %tmp23417, i64 1
- %tmp23419 = getelementptr inbounds float* %tmp23418, i64 1
- %tmp23420 = getelementptr inbounds float* %tmp23419, i64 1
- %tmp23421 = getelementptr inbounds float* %tmp23420, i64 1
- %tmp23422 = getelementptr inbounds float* %tmp23421, i64 1
- %tmp23423 = getelementptr inbounds float* %tmp23422, i64 1
- %tmp23424 = getelementptr inbounds float* %tmp23423, i64 1
- %tmp23425 = getelementptr inbounds float* %tmp23424, i64 1
- %tmp23426 = getelementptr inbounds float* %tmp23425, i64 1
- %tmp23427 = getelementptr inbounds float* %tmp23426, i64 1
- %tmp23428 = getelementptr inbounds float* %tmp23427, i64 1
- %tmp23429 = getelementptr inbounds float* %tmp23428, i64 1
- %tmp23430 = getelementptr inbounds float* %tmp23429, i64 1
- %tmp23431 = getelementptr inbounds float* %tmp23430, i64 1
- %tmp23432 = getelementptr inbounds float* %tmp23431, i64 1
- %tmp23433 = getelementptr inbounds float* %tmp23432, i64 1
- %tmp23434 = getelementptr inbounds float* %tmp23433, i64 1
- %tmp23435 = getelementptr inbounds float* %tmp23434, i64 1
- %tmp23436 = getelementptr inbounds float* %tmp23435, i64 1
- %tmp23437 = getelementptr inbounds float* %tmp23436, i64 1
- %tmp23438 = getelementptr inbounds float* %tmp23437, i64 1
- %tmp23439 = getelementptr inbounds float* %tmp23438, i64 1
- %tmp23440 = getelementptr inbounds float* %tmp23439, i64 1
- %tmp23441 = getelementptr inbounds float* %tmp23440, i64 1
- %tmp23442 = getelementptr inbounds float* %tmp23441, i64 1
- %tmp23443 = getelementptr inbounds float* %tmp23442, i64 1
- %tmp23444 = getelementptr inbounds float* %tmp23443, i64 1
- %tmp23445 = getelementptr inbounds float* %tmp23444, i64 1
- %tmp23446 = getelementptr inbounds float* %tmp23445, i64 1
- %tmp23447 = getelementptr inbounds float* %tmp23446, i64 1
- %tmp23448 = getelementptr inbounds float* %tmp23447, i64 1
- %tmp23449 = getelementptr inbounds float* %tmp23448, i64 1
- %tmp23450 = getelementptr inbounds float* %tmp23449, i64 1
- %tmp23451 = getelementptr inbounds float* %tmp23450, i64 1
- %tmp23452 = getelementptr inbounds float* %tmp23451, i64 1
- %tmp23453 = getelementptr inbounds float* %tmp23452, i64 1
- %tmp23454 = getelementptr inbounds float* %tmp23453, i64 1
- %tmp23455 = getelementptr inbounds float* %tmp23454, i64 1
- %tmp23456 = getelementptr inbounds float* %tmp23455, i64 1
- %tmp23457 = getelementptr inbounds float* %tmp23456, i64 1
- %tmp23458 = getelementptr inbounds float* %tmp23457, i64 1
- %tmp23459 = getelementptr inbounds float* %tmp23458, i64 1
- %tmp23460 = getelementptr inbounds float* %tmp23459, i64 1
- %tmp23461 = getelementptr inbounds float* %tmp23460, i64 1
- %tmp23462 = getelementptr inbounds float* %tmp23461, i64 1
- %tmp23463 = getelementptr inbounds float* %tmp23462, i64 1
- %tmp23464 = getelementptr inbounds float* %tmp23463, i64 1
- %tmp23465 = getelementptr inbounds float* %tmp23464, i64 1
- %tmp23466 = getelementptr inbounds float* %tmp23465, i64 1
- %tmp23467 = getelementptr inbounds float* %tmp23466, i64 1
- %tmp23468 = getelementptr inbounds float* %tmp23467, i64 1
- %tmp23469 = getelementptr inbounds float* %tmp23468, i64 1
- %tmp23470 = getelementptr inbounds float* %tmp23469, i64 1
- %tmp23471 = getelementptr inbounds float* %tmp23470, i64 1
- %tmp23472 = getelementptr inbounds float* %tmp23471, i64 1
- %tmp23473 = getelementptr inbounds float* %tmp23472, i64 1
- %tmp23474 = getelementptr inbounds float* %tmp23473, i64 1
- %tmp23475 = getelementptr inbounds float* %tmp23474, i64 1
- %tmp23476 = getelementptr inbounds float* %tmp23475, i64 1
- %tmp23477 = getelementptr inbounds float* %tmp23476, i64 1
- %tmp23478 = getelementptr inbounds float* %tmp23477, i64 1
- %tmp23479 = getelementptr inbounds float* %tmp23478, i64 1
- %tmp23480 = getelementptr inbounds float* %tmp23479, i64 1
- %tmp23481 = getelementptr inbounds float* %tmp23480, i64 1
- %tmp23482 = getelementptr inbounds float* %tmp23481, i64 1
- %tmp23483 = getelementptr inbounds float* %tmp23482, i64 1
- %tmp23484 = getelementptr inbounds float* %tmp23483, i64 1
- %tmp23485 = getelementptr inbounds float* %tmp23484, i64 1
- %tmp23486 = getelementptr inbounds float* %tmp23485, i64 1
- %tmp23487 = getelementptr inbounds float* %tmp23486, i64 1
- %tmp23488 = getelementptr inbounds float* %tmp23487, i64 1
- %tmp23489 = getelementptr inbounds float* %tmp23488, i64 1
- %tmp23490 = getelementptr inbounds float* %tmp23489, i64 1
- %tmp23491 = getelementptr inbounds float* %tmp23490, i64 1
- %tmp23492 = getelementptr inbounds float* %tmp23491, i64 1
- %tmp23493 = getelementptr inbounds float* %tmp23492, i64 1
- %tmp23494 = getelementptr inbounds float* %tmp23493, i64 1
- %tmp23495 = getelementptr inbounds float* %tmp23494, i64 1
- %tmp23496 = getelementptr inbounds float* %tmp23495, i64 1
- %tmp23497 = getelementptr inbounds float* %tmp23496, i64 1
- %tmp23498 = getelementptr inbounds float* %tmp23497, i64 1
- %tmp23499 = getelementptr inbounds float* %tmp23498, i64 1
- %tmp23500 = getelementptr inbounds float* %tmp23499, i64 1
- %tmp23501 = getelementptr inbounds float* %tmp23500, i64 1
- %tmp23502 = getelementptr inbounds float* %tmp23501, i64 1
- %tmp23503 = getelementptr inbounds float* %tmp23502, i64 1
- %tmp23504 = getelementptr inbounds float* %tmp23503, i64 1
- %tmp23505 = getelementptr inbounds float* %tmp23504, i64 1
- %tmp23506 = getelementptr inbounds float* %tmp23505, i64 1
- %tmp23507 = getelementptr inbounds float* %tmp23506, i64 1
- %tmp23508 = getelementptr inbounds float* %tmp23507, i64 1
- %tmp23509 = getelementptr inbounds float* %tmp23508, i64 1
- %tmp23510 = getelementptr inbounds float* %tmp23509, i64 1
- %tmp23511 = getelementptr inbounds float* %tmp23510, i64 1
- %tmp23512 = getelementptr inbounds float* %tmp23511, i64 1
- %tmp23513 = getelementptr inbounds float* %tmp23512, i64 1
- %tmp23514 = getelementptr inbounds float* %tmp23513, i64 1
- %tmp23515 = getelementptr inbounds float* %tmp23514, i64 1
- %tmp23516 = getelementptr inbounds float* %tmp23515, i64 1
- %tmp23517 = getelementptr inbounds float* %tmp23516, i64 1
- %tmp23518 = getelementptr inbounds float* %tmp23517, i64 1
- %tmp23519 = getelementptr inbounds float* %tmp23518, i64 1
- %tmp23520 = getelementptr inbounds float* %tmp23519, i64 1
- %tmp23521 = getelementptr inbounds float* %tmp23520, i64 1
- %tmp23522 = getelementptr inbounds float* %tmp23521, i64 1
- %tmp23523 = getelementptr inbounds float* %tmp23522, i64 1
- %tmp23524 = getelementptr inbounds float* %tmp23523, i64 1
- %tmp23525 = getelementptr inbounds float* %tmp23524, i64 1
- %tmp23526 = getelementptr inbounds float* %tmp23525, i64 1
- %tmp23527 = getelementptr inbounds float* %tmp23526, i64 1
- %tmp23528 = getelementptr inbounds float* %tmp23527, i64 1
- %tmp23529 = getelementptr inbounds float* %tmp23528, i64 1
- %tmp23530 = getelementptr inbounds float* %tmp23529, i64 1
- %tmp23531 = getelementptr inbounds float* %tmp23530, i64 1
- %tmp23532 = getelementptr inbounds float* %tmp23531, i64 1
- %tmp23533 = getelementptr inbounds float* %tmp23532, i64 1
- %tmp23534 = getelementptr inbounds float* %tmp23533, i64 1
- %tmp23535 = getelementptr inbounds float* %tmp23534, i64 1
- %tmp23536 = getelementptr inbounds float* %tmp23535, i64 1
- %tmp23537 = getelementptr inbounds float* %tmp23536, i64 1
- %tmp23538 = getelementptr inbounds float* %tmp23537, i64 1
- %tmp23539 = getelementptr inbounds float* %tmp23538, i64 1
- %tmp23540 = getelementptr inbounds float* %tmp23539, i64 1
- %tmp23541 = getelementptr inbounds float* %tmp23540, i64 1
- %tmp23542 = getelementptr inbounds float* %tmp23541, i64 1
- %tmp23543 = getelementptr inbounds float* %tmp23542, i64 1
- %tmp23544 = getelementptr inbounds float* %tmp23543, i64 1
- %tmp23545 = getelementptr inbounds float* %tmp23544, i64 1
- %tmp23546 = getelementptr inbounds float* %tmp23545, i64 1
- %tmp23547 = getelementptr inbounds float* %tmp23546, i64 1
- %tmp23548 = getelementptr inbounds float* %tmp23547, i64 1
- %tmp23549 = getelementptr inbounds float* %tmp23548, i64 1
- %tmp23550 = getelementptr inbounds float* %tmp23549, i64 1
- %tmp23551 = getelementptr inbounds float* %tmp23550, i64 1
- %tmp23552 = getelementptr inbounds float* %tmp23551, i64 1
- %tmp23553 = getelementptr inbounds float* %tmp23552, i64 1
- %tmp23554 = getelementptr inbounds float* %tmp23553, i64 1
- %tmp23555 = getelementptr inbounds float* %tmp23554, i64 1
- %tmp23556 = getelementptr inbounds float* %tmp23555, i64 1
- %tmp23557 = getelementptr inbounds float* %tmp23556, i64 1
- %tmp23558 = getelementptr inbounds float* %tmp23557, i64 1
- %tmp23559 = getelementptr inbounds float* %tmp23558, i64 1
- %tmp23560 = getelementptr inbounds float* %tmp23559, i64 1
- %tmp23561 = getelementptr inbounds float* %tmp23560, i64 1
- %tmp23562 = getelementptr inbounds float* %tmp23561, i64 1
- %tmp23563 = getelementptr inbounds float* %tmp23562, i64 1
- %tmp23564 = getelementptr inbounds float* %tmp23563, i64 1
- %tmp23565 = getelementptr inbounds float* %tmp23564, i64 1
- %tmp23566 = getelementptr inbounds float* %tmp23565, i64 1
- %tmp23567 = getelementptr inbounds float* %tmp23566, i64 1
- %tmp23568 = getelementptr inbounds float* %tmp23567, i64 1
- %tmp23569 = getelementptr inbounds float* %tmp23568, i64 1
- %tmp23570 = getelementptr inbounds float* %tmp23569, i64 1
- %tmp23571 = getelementptr inbounds float* %tmp23570, i64 1
- %tmp23572 = getelementptr inbounds float* %tmp23571, i64 1
- %tmp23573 = getelementptr inbounds float* %tmp23572, i64 1
- %tmp23574 = getelementptr inbounds float* %tmp23573, i64 1
- %tmp23575 = getelementptr inbounds float* %tmp23574, i64 1
- %tmp23576 = getelementptr inbounds float* %tmp23575, i64 1
- %tmp23577 = getelementptr inbounds float* %tmp23576, i64 1
- %tmp23578 = getelementptr inbounds float* %tmp23577, i64 1
- %tmp23579 = getelementptr inbounds float* %tmp23578, i64 1
- %tmp23580 = getelementptr inbounds float* %tmp23579, i64 1
- %tmp23581 = getelementptr inbounds float* %tmp23580, i64 1
- %tmp23582 = getelementptr inbounds float* %tmp23581, i64 1
- %tmp23583 = getelementptr inbounds float* %tmp23582, i64 1
- %tmp23584 = getelementptr inbounds float* %tmp23583, i64 1
- %tmp23585 = getelementptr inbounds float* %tmp23584, i64 1
- %tmp23586 = getelementptr inbounds float* %tmp23585, i64 1
- %tmp23587 = getelementptr inbounds float* %tmp23586, i64 1
- %tmp23588 = getelementptr inbounds float* %tmp23587, i64 1
- %tmp23589 = getelementptr inbounds float* %tmp23588, i64 1
- %tmp23590 = getelementptr inbounds float* %tmp23589, i64 1
- %tmp23591 = getelementptr inbounds float* %tmp23590, i64 1
- %tmp23592 = getelementptr inbounds float* %tmp23591, i64 1
- %tmp23593 = getelementptr inbounds float* %tmp23592, i64 1
- %tmp23594 = getelementptr inbounds float* %tmp23593, i64 1
- %tmp23595 = getelementptr inbounds float* %tmp23594, i64 1
- %tmp23596 = getelementptr inbounds float* %tmp23595, i64 1
- %tmp23597 = getelementptr inbounds float* %tmp23596, i64 1
- %tmp23598 = getelementptr inbounds float* %tmp23597, i64 1
- %tmp23599 = getelementptr inbounds float* %tmp23598, i64 1
- %tmp23600 = getelementptr inbounds float* %tmp23599, i64 1
- %tmp23601 = getelementptr inbounds float* %tmp23600, i64 1
- %tmp23602 = getelementptr inbounds float* %tmp23601, i64 1
- %tmp23603 = getelementptr inbounds float* %tmp23602, i64 1
- %tmp23604 = getelementptr inbounds float* %tmp23603, i64 1
- %tmp23605 = getelementptr inbounds float* %tmp23604, i64 1
- %tmp23606 = getelementptr inbounds float* %tmp23605, i64 1
- %tmp23607 = getelementptr inbounds float* %tmp23606, i64 1
- %tmp23608 = getelementptr inbounds float* %tmp23607, i64 1
- %tmp23609 = getelementptr inbounds float* %tmp23608, i64 1
- %tmp23610 = getelementptr inbounds float* %tmp23609, i64 1
- %tmp23611 = getelementptr inbounds float* %tmp23610, i64 1
- %tmp23612 = getelementptr inbounds float* %tmp23611, i64 1
- %tmp23613 = getelementptr inbounds float* %tmp23612, i64 1
- %tmp23614 = getelementptr inbounds float* %tmp23613, i64 1
- %tmp23615 = getelementptr inbounds float* %tmp23614, i64 1
- %tmp23616 = getelementptr inbounds float* %tmp23615, i64 1
- %tmp23617 = getelementptr inbounds float* %tmp23616, i64 1
- %tmp23618 = getelementptr inbounds float* %tmp23617, i64 1
- %tmp23619 = getelementptr inbounds float* %tmp23618, i64 1
- %tmp23620 = getelementptr inbounds float* %tmp23619, i64 1
- %tmp23621 = getelementptr inbounds float* %tmp23620, i64 1
- %tmp23622 = getelementptr inbounds float* %tmp23621, i64 1
- %tmp23623 = getelementptr inbounds float* %tmp23622, i64 1
- %tmp23624 = getelementptr inbounds float* %tmp23623, i64 1
- %tmp23625 = getelementptr inbounds float* %tmp23624, i64 1
- %tmp23626 = getelementptr inbounds float* %tmp23625, i64 1
- %tmp23627 = getelementptr inbounds float* %tmp23626, i64 1
- %tmp23628 = getelementptr inbounds float* %tmp23627, i64 1
- %tmp23629 = getelementptr inbounds float* %tmp23628, i64 1
- %tmp23630 = getelementptr inbounds float* %tmp23629, i64 1
- %tmp23631 = getelementptr inbounds float* %tmp23630, i64 1
- %tmp23632 = getelementptr inbounds float* %tmp23631, i64 1
- %tmp23633 = getelementptr inbounds float* %tmp23632, i64 1
- %tmp23634 = getelementptr inbounds float* %tmp23633, i64 1
- %tmp23635 = getelementptr inbounds float* %tmp23634, i64 1
- %tmp23636 = getelementptr inbounds float* %tmp23635, i64 1
- %tmp23637 = getelementptr inbounds float* %tmp23636, i64 1
- %tmp23638 = getelementptr inbounds float* %tmp23637, i64 1
- %tmp23639 = getelementptr inbounds float* %tmp23638, i64 1
- %tmp23640 = getelementptr inbounds float* %tmp23639, i64 1
- %tmp23641 = getelementptr inbounds float* %tmp23640, i64 1
- %tmp23642 = getelementptr inbounds float* %tmp23641, i64 1
- %tmp23643 = getelementptr inbounds float* %tmp23642, i64 1
- %tmp23644 = getelementptr inbounds float* %tmp23643, i64 1
- %tmp23645 = getelementptr inbounds float* %tmp23644, i64 1
- %tmp23646 = getelementptr inbounds float* %tmp23645, i64 1
- %tmp23647 = getelementptr inbounds float* %tmp23646, i64 1
- %tmp23648 = getelementptr inbounds float* %tmp23647, i64 1
- %tmp23649 = getelementptr inbounds float* %tmp23648, i64 1
- %tmp23650 = getelementptr inbounds float* %tmp23649, i64 1
- %tmp23651 = getelementptr inbounds float* %tmp23650, i64 1
- %tmp23652 = getelementptr inbounds float* %tmp23651, i64 1
- %tmp23653 = getelementptr inbounds float* %tmp23652, i64 1
- %tmp23654 = getelementptr inbounds float* %tmp23653, i64 1
- %tmp23655 = getelementptr inbounds float* %tmp23654, i64 1
- %tmp23656 = getelementptr inbounds float* %tmp23655, i64 1
- %tmp23657 = getelementptr inbounds float* %tmp23656, i64 1
- %tmp23658 = getelementptr inbounds float* %tmp23657, i64 1
- %tmp23659 = getelementptr inbounds float* %tmp23658, i64 1
- %tmp23660 = getelementptr inbounds float* %tmp23659, i64 1
- %tmp23661 = getelementptr inbounds float* %tmp23660, i64 1
- %tmp23662 = getelementptr inbounds float* %tmp23661, i64 1
- %tmp23663 = getelementptr inbounds float* %tmp23662, i64 1
- %tmp23664 = getelementptr inbounds float* %tmp23663, i64 1
- %tmp23665 = getelementptr inbounds float* %tmp23664, i64 1
- %tmp23666 = getelementptr inbounds float* %tmp23665, i64 1
- %tmp23667 = getelementptr inbounds float* %tmp23666, i64 1
- %tmp23668 = getelementptr inbounds float* %tmp23667, i64 1
- %tmp23669 = getelementptr inbounds float* %tmp23668, i64 1
- %tmp23670 = getelementptr inbounds float* %tmp23669, i64 1
- %tmp23671 = getelementptr inbounds float* %tmp23670, i64 1
- %tmp23672 = getelementptr inbounds float* %tmp23671, i64 1
- %tmp23673 = getelementptr inbounds float* %tmp23672, i64 1
- %tmp23674 = getelementptr inbounds float* %tmp23673, i64 1
- %tmp23675 = getelementptr inbounds float* %tmp23674, i64 1
- %tmp23676 = getelementptr inbounds float* %tmp23675, i64 1
- %tmp23677 = getelementptr inbounds float* %tmp23676, i64 1
- %tmp23678 = getelementptr inbounds float* %tmp23677, i64 1
- %tmp23679 = getelementptr inbounds float* %tmp23678, i64 1
- %tmp23680 = getelementptr inbounds float* %tmp23679, i64 1
- %tmp23681 = getelementptr inbounds float* %tmp23680, i64 1
- %tmp23682 = getelementptr inbounds float* %tmp23681, i64 1
- %tmp23683 = getelementptr inbounds float* %tmp23682, i64 1
- %tmp23684 = getelementptr inbounds float* %tmp23683, i64 1
- %tmp23685 = getelementptr inbounds float* %tmp23684, i64 1
- %tmp23686 = getelementptr inbounds float* %tmp23685, i64 1
- %tmp23687 = getelementptr inbounds float* %tmp23686, i64 1
- %tmp23688 = getelementptr inbounds float* %tmp23687, i64 1
- %tmp23689 = getelementptr inbounds float* %tmp23688, i64 1
- %tmp23690 = getelementptr inbounds float* %tmp23689, i64 1
- %tmp23691 = getelementptr inbounds float* %tmp23690, i64 1
- %tmp23692 = getelementptr inbounds float* %tmp23691, i64 1
- %tmp23693 = getelementptr inbounds float* %tmp23692, i64 1
- %tmp23694 = getelementptr inbounds float* %tmp23693, i64 1
- %tmp23695 = getelementptr inbounds float* %tmp23694, i64 1
- %tmp23696 = getelementptr inbounds float* %tmp23695, i64 1
- %tmp23697 = getelementptr inbounds float* %tmp23696, i64 1
- %tmp23698 = getelementptr inbounds float* %tmp23697, i64 1
- %tmp23699 = getelementptr inbounds float* %tmp23698, i64 1
- %tmp23700 = getelementptr inbounds float* %tmp23699, i64 1
- %tmp23701 = getelementptr inbounds float* %tmp23700, i64 1
- %tmp23702 = getelementptr inbounds float* %tmp23701, i64 1
- %tmp23703 = getelementptr inbounds float* %tmp23702, i64 1
- %tmp23704 = getelementptr inbounds float* %tmp23703, i64 1
- %tmp23705 = getelementptr inbounds float* %tmp23704, i64 1
- %tmp23706 = getelementptr inbounds float* %tmp23705, i64 1
- %tmp23707 = getelementptr inbounds float* %tmp23706, i64 1
- %tmp23708 = getelementptr inbounds float* %tmp23707, i64 1
- %tmp23709 = getelementptr inbounds float* %tmp23708, i64 1
- %tmp23710 = getelementptr inbounds float* %tmp23709, i64 1
- %tmp23711 = getelementptr inbounds float* %tmp23710, i64 1
- %tmp23712 = getelementptr inbounds float* %tmp23711, i64 1
- %tmp23713 = getelementptr inbounds float* %tmp23712, i64 1
- %tmp23714 = getelementptr inbounds float* %tmp23713, i64 1
- %tmp23715 = getelementptr inbounds float* %tmp23714, i64 1
- %tmp23716 = getelementptr inbounds float* %tmp23715, i64 1
- %tmp23717 = getelementptr inbounds float* %tmp23716, i64 1
- %tmp23718 = getelementptr inbounds float* %tmp23717, i64 1
- %tmp23719 = getelementptr inbounds float* %tmp23718, i64 1
- %tmp23720 = getelementptr inbounds float* %tmp23719, i64 1
- %tmp23721 = getelementptr inbounds float* %tmp23720, i64 1
- %tmp23722 = getelementptr inbounds float* %tmp23721, i64 1
- %tmp23723 = getelementptr inbounds float* %tmp23722, i64 1
- %tmp23724 = getelementptr inbounds float* %tmp23723, i64 1
- %tmp23725 = getelementptr inbounds float* %tmp23724, i64 1
- %tmp23726 = getelementptr inbounds float* %tmp23725, i64 1
- %tmp23727 = getelementptr inbounds float* %tmp23726, i64 1
- %tmp23728 = getelementptr inbounds float* %tmp23727, i64 1
- %tmp23729 = getelementptr inbounds float* %tmp23728, i64 1
- %tmp23730 = getelementptr inbounds float* %tmp23729, i64 1
- %tmp23731 = getelementptr inbounds float* %tmp23730, i64 1
- %tmp23732 = getelementptr inbounds float* %tmp23731, i64 1
- %tmp23733 = getelementptr inbounds float* %tmp23732, i64 1
- %tmp23734 = getelementptr inbounds float* %tmp23733, i64 1
- %tmp23735 = getelementptr inbounds float* %tmp23734, i64 1
- %tmp23736 = getelementptr inbounds float* %tmp23735, i64 1
- %tmp23737 = getelementptr inbounds float* %tmp23736, i64 1
- %tmp23738 = getelementptr inbounds float* %tmp23737, i64 1
- %tmp23739 = getelementptr inbounds float* %tmp23738, i64 1
- %tmp23740 = getelementptr inbounds float* %tmp23739, i64 1
- %tmp23741 = getelementptr inbounds float* %tmp23740, i64 1
- %tmp23742 = getelementptr inbounds float* %tmp23741, i64 1
- %tmp23743 = getelementptr inbounds float* %tmp23742, i64 1
- %tmp23744 = getelementptr inbounds float* %tmp23743, i64 1
- %tmp23745 = getelementptr inbounds float* %tmp23744, i64 1
- %tmp23746 = getelementptr inbounds float* %tmp23745, i64 1
- %tmp23747 = getelementptr inbounds float* %tmp23746, i64 1
- %tmp23748 = getelementptr inbounds float* %tmp23747, i64 1
- %tmp23749 = getelementptr inbounds float* %tmp23748, i64 1
- %tmp23750 = getelementptr inbounds float* %tmp23749, i64 1
- %tmp23751 = getelementptr inbounds float* %tmp23750, i64 1
- %tmp23752 = getelementptr inbounds float* %tmp23751, i64 1
- %tmp23753 = getelementptr inbounds float* %tmp23752, i64 1
- %tmp23754 = getelementptr inbounds float* %tmp23753, i64 1
- %tmp23755 = getelementptr inbounds float* %tmp23754, i64 1
- %tmp23756 = getelementptr inbounds float* %tmp23755, i64 1
- %tmp23757 = getelementptr inbounds float* %tmp23756, i64 1
- %tmp23758 = getelementptr inbounds float* %tmp23757, i64 1
- %tmp23759 = getelementptr inbounds float* %tmp23758, i64 1
- %tmp23760 = getelementptr inbounds float* %tmp23759, i64 1
- %tmp23761 = getelementptr inbounds float* %tmp23760, i64 1
- %tmp23762 = getelementptr inbounds float* %tmp23761, i64 1
- %tmp23763 = getelementptr inbounds float* %tmp23762, i64 1
- %tmp23764 = getelementptr inbounds float* %tmp23763, i64 1
- %tmp23765 = getelementptr inbounds float* %tmp23764, i64 1
- %tmp23766 = getelementptr inbounds float* %tmp23765, i64 1
- %tmp23767 = getelementptr inbounds float* %tmp23766, i64 1
- %tmp23768 = getelementptr inbounds float* %tmp23767, i64 1
- %tmp23769 = getelementptr inbounds float* %tmp23768, i64 1
- %tmp23770 = getelementptr inbounds float* %tmp23769, i64 1
- %tmp23771 = getelementptr inbounds float* %tmp23770, i64 1
- %tmp23772 = getelementptr inbounds float* %tmp23771, i64 1
- %tmp23773 = getelementptr inbounds float* %tmp23772, i64 1
- %tmp23774 = getelementptr inbounds float* %tmp23773, i64 1
- %tmp23775 = getelementptr inbounds float* %tmp23774, i64 1
- %tmp23776 = getelementptr inbounds float* %tmp23775, i64 1
- %tmp23777 = getelementptr inbounds float* %tmp23776, i64 1
- %tmp23778 = getelementptr inbounds float* %tmp23777, i64 1
- %tmp23779 = getelementptr inbounds float* %tmp23778, i64 1
- %tmp23780 = getelementptr inbounds float* %tmp23779, i64 1
- %tmp23781 = getelementptr inbounds float* %tmp23780, i64 1
- %tmp23782 = getelementptr inbounds float* %tmp23781, i64 1
- %tmp23783 = getelementptr inbounds float* %tmp23782, i64 1
- %tmp23784 = getelementptr inbounds float* %tmp23783, i64 1
- %tmp23785 = getelementptr inbounds float* %tmp23784, i64 1
- %tmp23786 = getelementptr inbounds float* %tmp23785, i64 1
- %tmp23787 = getelementptr inbounds float* %tmp23786, i64 1
- %tmp23788 = getelementptr inbounds float* %tmp23787, i64 1
- %tmp23789 = getelementptr inbounds float* %tmp23788, i64 1
- %tmp23790 = getelementptr inbounds float* %tmp23789, i64 1
- %tmp23791 = getelementptr inbounds float* %tmp23790, i64 1
- %tmp23792 = getelementptr inbounds float* %tmp23791, i64 1
- %tmp23793 = getelementptr inbounds float* %tmp23792, i64 1
- %tmp23794 = getelementptr inbounds float* %tmp23793, i64 1
- %tmp23795 = getelementptr inbounds float* %tmp23794, i64 1
- %tmp23796 = getelementptr inbounds float* %tmp23795, i64 1
- %tmp23797 = getelementptr inbounds float* %tmp23796, i64 1
- %tmp23798 = getelementptr inbounds float* %tmp23797, i64 1
- %tmp23799 = getelementptr inbounds float* %tmp23798, i64 1
- %tmp23800 = getelementptr inbounds float* %tmp23799, i64 1
- %tmp23801 = getelementptr inbounds float* %tmp23800, i64 1
- %tmp23802 = getelementptr inbounds float* %tmp23801, i64 1
- %tmp23803 = getelementptr inbounds float* %tmp23802, i64 1
- %tmp23804 = getelementptr inbounds float* %tmp23803, i64 1
- %tmp23805 = getelementptr inbounds float* %tmp23804, i64 1
- %tmp23806 = getelementptr inbounds float* %tmp23805, i64 1
- %tmp23807 = getelementptr inbounds float* %tmp23806, i64 1
- %tmp23808 = getelementptr inbounds float* %tmp23807, i64 1
- %tmp23809 = getelementptr inbounds float* %tmp23808, i64 1
- %tmp23810 = getelementptr inbounds float* %tmp23809, i64 1
- %tmp23811 = getelementptr inbounds float* %tmp23810, i64 1
- %tmp23812 = getelementptr inbounds float* %tmp23811, i64 1
- %tmp23813 = getelementptr inbounds float* %tmp23812, i64 1
- %tmp23814 = getelementptr inbounds float* %tmp23813, i64 1
- %tmp23815 = getelementptr inbounds float* %tmp23814, i64 1
- %tmp23816 = getelementptr inbounds float* %tmp23815, i64 1
- %tmp23817 = getelementptr inbounds float* %tmp23816, i64 1
- %tmp23818 = getelementptr inbounds float* %tmp23817, i64 1
- %tmp23819 = getelementptr inbounds float* %tmp23818, i64 1
- %tmp23820 = getelementptr inbounds float* %tmp23819, i64 1
- %tmp23821 = getelementptr inbounds float* %tmp23820, i64 1
- %tmp23822 = getelementptr inbounds float* %tmp23821, i64 1
- %tmp23823 = getelementptr inbounds float* %tmp23822, i64 1
- %tmp23824 = getelementptr inbounds float* %tmp23823, i64 1
- %tmp23825 = getelementptr inbounds float* %tmp23824, i64 1
- %tmp23826 = getelementptr inbounds float* %tmp23825, i64 1
- %tmp23827 = getelementptr inbounds float* %tmp23826, i64 1
- %tmp23828 = getelementptr inbounds float* %tmp23827, i64 1
- %tmp23829 = getelementptr inbounds float* %tmp23828, i64 1
- %tmp23830 = getelementptr inbounds float* %tmp23829, i64 1
- %tmp23831 = getelementptr inbounds float* %tmp23830, i64 1
- %tmp23832 = getelementptr inbounds float* %tmp23831, i64 1
- %tmp23833 = getelementptr inbounds float* %tmp23832, i64 1
- %tmp23834 = getelementptr inbounds float* %tmp23833, i64 1
- %tmp23835 = getelementptr inbounds float* %tmp23834, i64 1
- %tmp23836 = getelementptr inbounds float* %tmp23835, i64 1
- %tmp23837 = getelementptr inbounds float* %tmp23836, i64 1
- %tmp23838 = getelementptr inbounds float* %tmp23837, i64 1
- %tmp23839 = getelementptr inbounds float* %tmp23838, i64 1
- %tmp23840 = getelementptr inbounds float* %tmp23839, i64 1
- %tmp23841 = getelementptr inbounds float* %tmp23840, i64 1
- %tmp23842 = getelementptr inbounds float* %tmp23841, i64 1
- %tmp23843 = getelementptr inbounds float* %tmp23842, i64 1
- %tmp23844 = getelementptr inbounds float* %tmp23843, i64 1
- %tmp23845 = getelementptr inbounds float* %tmp23844, i64 1
- %tmp23846 = getelementptr inbounds float* %tmp23845, i64 1
- %tmp23847 = getelementptr inbounds float* %tmp23846, i64 1
- %tmp23848 = getelementptr inbounds float* %tmp23847, i64 1
- %tmp23849 = getelementptr inbounds float* %tmp23848, i64 1
- %tmp23850 = getelementptr inbounds float* %tmp23849, i64 1
- %tmp23851 = getelementptr inbounds float* %tmp23850, i64 1
- %tmp23852 = getelementptr inbounds float* %tmp23851, i64 1
- %tmp23853 = getelementptr inbounds float* %tmp23852, i64 1
- %tmp23854 = getelementptr inbounds float* %tmp23853, i64 1
- %tmp23855 = getelementptr inbounds float* %tmp23854, i64 1
- %tmp23856 = getelementptr inbounds float* %tmp23855, i64 1
- %tmp23857 = getelementptr inbounds float* %tmp23856, i64 1
- %tmp23858 = getelementptr inbounds float* %tmp23857, i64 1
- %tmp23859 = getelementptr inbounds float* %tmp23858, i64 1
- %tmp23860 = getelementptr inbounds float* %tmp23859, i64 1
- %tmp23861 = getelementptr inbounds float* %tmp23860, i64 1
- %tmp23862 = getelementptr inbounds float* %tmp23861, i64 1
- %tmp23863 = getelementptr inbounds float* %tmp23862, i64 1
- %tmp23864 = getelementptr inbounds float* %tmp23863, i64 1
- %tmp23865 = getelementptr inbounds float* %tmp23864, i64 1
- %tmp23866 = getelementptr inbounds float* %tmp23865, i64 1
- %tmp23867 = getelementptr inbounds float* %tmp23866, i64 1
- %tmp23868 = getelementptr inbounds float* %tmp23867, i64 1
- %tmp23869 = getelementptr inbounds float* %tmp23868, i64 1
- %tmp23870 = getelementptr inbounds float* %tmp23869, i64 1
- %tmp23871 = getelementptr inbounds float* %tmp23870, i64 1
- %tmp23872 = getelementptr inbounds float* %tmp23871, i64 1
- %tmp23873 = getelementptr inbounds float* %tmp23872, i64 1
- %tmp23874 = getelementptr inbounds float* %tmp23873, i64 1
- %tmp23875 = getelementptr inbounds float* %tmp23874, i64 1
- %tmp23876 = getelementptr inbounds float* %tmp23875, i64 1
- %tmp23877 = getelementptr inbounds float* %tmp23876, i64 1
- %tmp23878 = getelementptr inbounds float* %tmp23877, i64 1
- %tmp23879 = getelementptr inbounds float* %tmp23878, i64 1
- %tmp23880 = getelementptr inbounds float* %tmp23879, i64 1
- %tmp23881 = getelementptr inbounds float* %tmp23880, i64 1
- %tmp23882 = getelementptr inbounds float* %tmp23881, i64 1
- %tmp23883 = getelementptr inbounds float* %tmp23882, i64 1
- %tmp23884 = getelementptr inbounds float* %tmp23883, i64 1
- %tmp23885 = getelementptr inbounds float* %tmp23884, i64 1
- %tmp23886 = getelementptr inbounds float* %tmp23885, i64 1
- %tmp23887 = getelementptr inbounds float* %tmp23886, i64 1
- %tmp23888 = getelementptr inbounds float* %tmp23887, i64 1
- %tmp23889 = getelementptr inbounds float* %tmp23888, i64 1
- %tmp23890 = getelementptr inbounds float* %tmp23889, i64 1
- %tmp23891 = getelementptr inbounds float* %tmp23890, i64 1
- %tmp23892 = getelementptr inbounds float* %tmp23891, i64 1
- %tmp23893 = getelementptr inbounds float* %tmp23892, i64 1
- %tmp23894 = getelementptr inbounds float* %tmp23893, i64 1
- %tmp23895 = getelementptr inbounds float* %tmp23894, i64 1
- %tmp23896 = getelementptr inbounds float* %tmp23895, i64 1
- %tmp23897 = getelementptr inbounds float* %tmp23896, i64 1
- %tmp23898 = getelementptr inbounds float* %tmp23897, i64 1
- %tmp23899 = getelementptr inbounds float* %tmp23898, i64 1
- %tmp23900 = getelementptr inbounds float* %tmp23899, i64 1
- %tmp23901 = getelementptr inbounds float* %tmp23900, i64 1
- %tmp23902 = getelementptr inbounds float* %tmp23901, i64 1
- %tmp23903 = getelementptr inbounds float* %tmp23902, i64 1
- %tmp23904 = getelementptr inbounds float* %tmp23903, i64 1
- %tmp23905 = getelementptr inbounds float* %tmp23904, i64 1
- %tmp23906 = getelementptr inbounds float* %tmp23905, i64 1
- %tmp23907 = getelementptr inbounds float* %tmp23906, i64 1
- %tmp23908 = getelementptr inbounds float* %tmp23907, i64 1
- %tmp23909 = getelementptr inbounds float* %tmp23908, i64 1
- %tmp23910 = getelementptr inbounds float* %tmp23909, i64 1
- %tmp23911 = getelementptr inbounds float* %tmp23910, i64 1
- %tmp23912 = getelementptr inbounds float* %tmp23911, i64 1
- %tmp23913 = getelementptr inbounds float* %tmp23912, i64 1
- %tmp23914 = getelementptr inbounds float* %tmp23913, i64 1
- %tmp23915 = getelementptr inbounds float* %tmp23914, i64 1
- %tmp23916 = getelementptr inbounds float* %tmp23915, i64 1
- %tmp23917 = getelementptr inbounds float* %tmp23916, i64 1
- %tmp23918 = getelementptr inbounds float* %tmp23917, i64 1
- %tmp23919 = getelementptr inbounds float* %tmp23918, i64 1
- %tmp23920 = getelementptr inbounds float* %tmp23919, i64 1
- %tmp23921 = getelementptr inbounds float* %tmp23920, i64 1
- %tmp23922 = getelementptr inbounds float* %tmp23921, i64 1
- %tmp23923 = getelementptr inbounds float* %tmp23922, i64 1
- %tmp23924 = getelementptr inbounds float* %tmp23923, i64 1
- %tmp23925 = getelementptr inbounds float* %tmp23924, i64 1
- %tmp23926 = getelementptr inbounds float* %tmp23925, i64 1
- %tmp23927 = getelementptr inbounds float* %tmp23926, i64 1
- %tmp23928 = getelementptr inbounds float* %tmp23927, i64 1
- %tmp23929 = getelementptr inbounds float* %tmp23928, i64 1
- %tmp23930 = getelementptr inbounds float* %tmp23929, i64 1
- %tmp23931 = getelementptr inbounds float* %tmp23930, i64 1
- %tmp23932 = getelementptr inbounds float* %tmp23931, i64 1
- %tmp23933 = getelementptr inbounds float* %tmp23932, i64 1
- %tmp23934 = getelementptr inbounds float* %tmp23933, i64 1
- %tmp23935 = getelementptr inbounds float* %tmp23934, i64 1
- %tmp23936 = getelementptr inbounds float* %tmp23935, i64 1
- %tmp23937 = getelementptr inbounds float* %tmp23936, i64 1
- %tmp23938 = getelementptr inbounds float* %tmp23937, i64 1
- %tmp23939 = getelementptr inbounds float* %tmp23938, i64 1
- %tmp23940 = getelementptr inbounds float* %tmp23939, i64 1
- %tmp23941 = getelementptr inbounds float* %tmp23940, i64 1
- %tmp23942 = getelementptr inbounds float* %tmp23941, i64 1
- %tmp23943 = getelementptr inbounds float* %tmp23942, i64 1
- %tmp23944 = getelementptr inbounds float* %tmp23943, i64 1
- %tmp23945 = getelementptr inbounds float* %tmp23944, i64 1
- %tmp23946 = getelementptr inbounds float* %tmp23945, i64 1
- %tmp23947 = getelementptr inbounds float* %tmp23946, i64 1
- %tmp23948 = getelementptr inbounds float* %tmp23947, i64 1
- %tmp23949 = getelementptr inbounds float* %tmp23948, i64 1
- %tmp23950 = getelementptr inbounds float* %tmp23949, i64 1
- %tmp23951 = getelementptr inbounds float* %tmp23950, i64 1
- %tmp23952 = getelementptr inbounds float* %tmp23951, i64 1
- %tmp23953 = getelementptr inbounds float* %tmp23952, i64 1
- %tmp23954 = getelementptr inbounds float* %tmp23953, i64 1
- %tmp23955 = getelementptr inbounds float* %tmp23954, i64 1
- %tmp23956 = getelementptr inbounds float* %tmp23955, i64 1
- %tmp23957 = getelementptr inbounds float* %tmp23956, i64 1
- %tmp23958 = getelementptr inbounds float* %tmp23957, i64 1
- %tmp23959 = getelementptr inbounds float* %tmp23958, i64 1
- %tmp23960 = getelementptr inbounds float* %tmp23959, i64 1
- %tmp23961 = getelementptr inbounds float* %tmp23960, i64 1
- %tmp23962 = getelementptr inbounds float* %tmp23961, i64 1
- %tmp23963 = getelementptr inbounds float* %tmp23962, i64 1
- %tmp23964 = getelementptr inbounds float* %tmp23963, i64 1
- %tmp23965 = getelementptr inbounds float* %tmp23964, i64 1
- %tmp23966 = getelementptr inbounds float* %tmp23965, i64 1
- %tmp23967 = getelementptr inbounds float* %tmp23966, i64 1
- %tmp23968 = getelementptr inbounds float* %tmp23967, i64 1
- %tmp23969 = getelementptr inbounds float* %tmp23968, i64 1
- %tmp23970 = getelementptr inbounds float* %tmp23969, i64 1
- %tmp23971 = getelementptr inbounds float* %tmp23970, i64 1
- %tmp23972 = getelementptr inbounds float* %tmp23971, i64 1
- %tmp23973 = getelementptr inbounds float* %tmp23972, i64 1
- %tmp23974 = getelementptr inbounds float* %tmp23973, i64 1
- %tmp23975 = getelementptr inbounds float* %tmp23974, i64 1
- %tmp23976 = getelementptr inbounds float* %tmp23975, i64 1
- %tmp23977 = getelementptr inbounds float* %tmp23976, i64 1
- %tmp23978 = getelementptr inbounds float* %tmp23977, i64 1
- %tmp23979 = getelementptr inbounds float* %tmp23978, i64 1
- %tmp23980 = getelementptr inbounds float* %tmp23979, i64 1
- %tmp23981 = getelementptr inbounds float* %tmp23980, i64 1
- %tmp23982 = getelementptr inbounds float* %tmp23981, i64 1
- %tmp23983 = getelementptr inbounds float* %tmp23982, i64 1
- %tmp23984 = getelementptr inbounds float* %tmp23983, i64 1
- %tmp23985 = getelementptr inbounds float* %tmp23984, i64 1
- %tmp23986 = getelementptr inbounds float* %tmp23985, i64 1
- %tmp23987 = getelementptr inbounds float* %tmp23986, i64 1
- %tmp23988 = getelementptr inbounds float* %tmp23987, i64 1
- %tmp23989 = getelementptr inbounds float* %tmp23988, i64 1
- %tmp23990 = getelementptr inbounds float* %tmp23989, i64 1
- %tmp23991 = getelementptr inbounds float* %tmp23990, i64 1
- %tmp23992 = getelementptr inbounds float* %tmp23991, i64 1
- %tmp23993 = getelementptr inbounds float* %tmp23992, i64 1
- %tmp23994 = getelementptr inbounds float* %tmp23993, i64 1
- %tmp23995 = getelementptr inbounds float* %tmp23994, i64 1
- %tmp23996 = getelementptr inbounds float* %tmp23995, i64 1
- %tmp23997 = getelementptr inbounds float* %tmp23996, i64 1
- %tmp23998 = getelementptr inbounds float* %tmp23997, i64 1
- %tmp23999 = getelementptr inbounds float* %tmp23998, i64 1
- %tmp24000 = getelementptr inbounds float* %tmp23999, i64 1
- %tmp24001 = getelementptr inbounds float* %tmp24000, i64 1
- %tmp24002 = getelementptr inbounds float* %tmp24001, i64 1
- %tmp24003 = getelementptr inbounds float* %tmp24002, i64 1
- %tmp24004 = getelementptr inbounds float* %tmp24003, i64 1
- %tmp24005 = getelementptr inbounds float* %tmp24004, i64 1
- %tmp24006 = getelementptr inbounds float* %tmp24005, i64 1
- %tmp24007 = getelementptr inbounds float* %tmp24006, i64 1
- %tmp24008 = getelementptr inbounds float* %tmp24007, i64 1
- %tmp24009 = getelementptr inbounds float* %tmp24008, i64 1
- %tmp24010 = getelementptr inbounds float* %tmp24009, i64 1
- %tmp24011 = getelementptr inbounds float* %tmp24010, i64 1
- %tmp24012 = getelementptr inbounds float* %tmp24011, i64 1
- %tmp24013 = getelementptr inbounds float* %tmp24012, i64 1
- %tmp24014 = getelementptr inbounds float* %tmp24013, i64 1
- %tmp24015 = getelementptr inbounds float* %tmp24014, i64 1
- %tmp24016 = getelementptr inbounds float* %tmp24015, i64 1
- %tmp24017 = getelementptr inbounds float* %tmp24016, i64 1
- %tmp24018 = getelementptr inbounds float* %tmp24017, i64 1
- %tmp24019 = getelementptr inbounds float* %tmp24018, i64 1
- %tmp24020 = getelementptr inbounds float* %tmp24019, i64 1
- %tmp24021 = getelementptr inbounds float* %tmp24020, i64 1
- %tmp24022 = getelementptr inbounds float* %tmp24021, i64 1
- %tmp24023 = getelementptr inbounds float* %tmp24022, i64 1
- %tmp24024 = getelementptr inbounds float* %tmp24023, i64 1
- %tmp24025 = getelementptr inbounds float* %tmp24024, i64 1
- %tmp24026 = getelementptr inbounds float* %tmp24025, i64 1
- %tmp24027 = getelementptr inbounds float* %tmp24026, i64 1
- %tmp24028 = getelementptr inbounds float* %tmp24027, i64 1
- %tmp24029 = getelementptr inbounds float* %tmp24028, i64 1
- %tmp24030 = getelementptr inbounds float* %tmp24029, i64 1
- %tmp24031 = getelementptr inbounds float* %tmp24030, i64 1
- %tmp24032 = getelementptr inbounds float* %tmp24031, i64 1
- %tmp24033 = getelementptr inbounds float* %tmp24032, i64 1
- %tmp24034 = getelementptr inbounds float* %tmp24033, i64 1
- %tmp24035 = getelementptr inbounds float* %tmp24034, i64 1
- %tmp24036 = getelementptr inbounds float* %tmp24035, i64 1
- %tmp24037 = getelementptr inbounds float* %tmp24036, i64 1
- %tmp24038 = getelementptr inbounds float* %tmp24037, i64 1
- %tmp24039 = getelementptr inbounds float* %tmp24038, i64 1
- %tmp24040 = getelementptr inbounds float* %tmp24039, i64 1
- %tmp24041 = getelementptr inbounds float* %tmp24040, i64 1
- %tmp24042 = getelementptr inbounds float* %tmp24041, i64 1
- %tmp24043 = getelementptr inbounds float* %tmp24042, i64 1
- %tmp24044 = getelementptr inbounds float* %tmp24043, i64 1
- %tmp24045 = getelementptr inbounds float* %tmp24044, i64 1
- %tmp24046 = getelementptr inbounds float* %tmp24045, i64 1
- %tmp24047 = getelementptr inbounds float* %tmp24046, i64 1
- %tmp24048 = getelementptr inbounds float* %tmp24047, i64 1
- %tmp24049 = getelementptr inbounds float* %tmp24048, i64 1
- %tmp24050 = getelementptr inbounds float* %tmp24049, i64 1
- %tmp24051 = getelementptr inbounds float* %tmp24050, i64 1
- %tmp24052 = getelementptr inbounds float* %tmp24051, i64 1
- %tmp24053 = getelementptr inbounds float* %tmp24052, i64 1
- %tmp24054 = getelementptr inbounds float* %tmp24053, i64 1
- %tmp24055 = getelementptr inbounds float* %tmp24054, i64 1
- %tmp24056 = getelementptr inbounds float* %tmp24055, i64 1
- %tmp24057 = getelementptr inbounds float* %tmp24056, i64 1
- %tmp24058 = getelementptr inbounds float* %tmp24057, i64 1
- %tmp24059 = getelementptr inbounds float* %tmp24058, i64 1
- %tmp24060 = getelementptr inbounds float* %tmp24059, i64 1
- %tmp24061 = getelementptr inbounds float* %tmp24060, i64 1
- %tmp24062 = getelementptr inbounds float* %tmp24061, i64 1
- %tmp24063 = getelementptr inbounds float* %tmp24062, i64 1
- %tmp24064 = getelementptr inbounds float* %tmp24063, i64 1
- %tmp24065 = getelementptr inbounds float* %tmp24064, i64 1
- %tmp24066 = getelementptr inbounds float* %tmp24065, i64 1
- %tmp24067 = getelementptr inbounds float* %tmp24066, i64 1
- %tmp24068 = getelementptr inbounds float* %tmp24067, i64 1
- %tmp24069 = getelementptr inbounds float* %tmp24068, i64 1
- %tmp24070 = getelementptr inbounds float* %tmp24069, i64 1
- %tmp24071 = getelementptr inbounds float* %tmp24070, i64 1
- %tmp24072 = getelementptr inbounds float* %tmp24071, i64 1
- %tmp24073 = getelementptr inbounds float* %tmp24072, i64 1
- %tmp24074 = getelementptr inbounds float* %tmp24073, i64 1
- %tmp24075 = getelementptr inbounds float* %tmp24074, i64 1
- %tmp24076 = getelementptr inbounds float* %tmp24075, i64 1
- %tmp24077 = getelementptr inbounds float* %tmp24076, i64 1
- %tmp24078 = getelementptr inbounds float* %tmp24077, i64 1
- %tmp24079 = getelementptr inbounds float* %tmp24078, i64 1
- %tmp24080 = getelementptr inbounds float* %tmp24079, i64 1
- %tmp24081 = getelementptr inbounds float* %tmp24080, i64 1
- %tmp24082 = getelementptr inbounds float* %tmp24081, i64 1
- %tmp24083 = getelementptr inbounds float* %tmp24082, i64 1
- %tmp24084 = getelementptr inbounds float* %tmp24083, i64 1
- %tmp24085 = getelementptr inbounds float* %tmp24084, i64 1
- %tmp24086 = getelementptr inbounds float* %tmp24085, i64 1
- %tmp24087 = getelementptr inbounds float* %tmp24086, i64 1
- %tmp24088 = getelementptr inbounds float* %tmp24087, i64 1
- %tmp24089 = getelementptr inbounds float* %tmp24088, i64 1
- %tmp24090 = getelementptr inbounds float* %tmp24089, i64 1
- %tmp24091 = getelementptr inbounds float* %tmp24090, i64 1
- %tmp24092 = getelementptr inbounds float* %tmp24091, i64 1
- %tmp24093 = getelementptr inbounds float* %tmp24092, i64 1
- %tmp24094 = getelementptr inbounds float* %tmp24093, i64 1
- %tmp24095 = getelementptr inbounds float* %tmp24094, i64 1
- %tmp24096 = getelementptr inbounds float* %tmp24095, i64 1
- %tmp24097 = getelementptr inbounds float* %tmp24096, i64 1
- %tmp24098 = getelementptr inbounds float* %tmp24097, i64 1
- %tmp24099 = getelementptr inbounds float* %tmp24098, i64 1
- %tmp24100 = getelementptr inbounds float* %tmp24099, i64 1
- %tmp24101 = getelementptr inbounds float* %tmp24100, i64 1
- %tmp24102 = getelementptr inbounds float* %tmp24101, i64 1
- %tmp24103 = getelementptr inbounds float* %tmp24102, i64 1
- %tmp24104 = getelementptr inbounds float* %tmp24103, i64 1
- %tmp24105 = getelementptr inbounds float* %tmp24104, i64 1
- %tmp24106 = getelementptr inbounds float* %tmp24105, i64 1
- %tmp24107 = getelementptr inbounds float* %tmp24106, i64 1
- %tmp24108 = getelementptr inbounds float* %tmp24107, i64 1
- %tmp24109 = getelementptr inbounds float* %tmp24108, i64 1
- %tmp24110 = getelementptr inbounds float* %tmp24109, i64 1
- %tmp24111 = getelementptr inbounds float* %tmp24110, i64 1
- %tmp24112 = getelementptr inbounds float* %tmp24111, i64 1
- %tmp24113 = getelementptr inbounds float* %tmp24112, i64 1
- %tmp24114 = getelementptr inbounds float* %tmp24113, i64 1
- %tmp24115 = getelementptr inbounds float* %tmp24114, i64 1
- %tmp24116 = getelementptr inbounds float* %tmp24115, i64 1
- %tmp24117 = getelementptr inbounds float* %tmp24116, i64 1
- %tmp24118 = getelementptr inbounds float* %tmp24117, i64 1
- %tmp24119 = getelementptr inbounds float* %tmp24118, i64 1
- %tmp24120 = getelementptr inbounds float* %tmp24119, i64 1
- %tmp24121 = getelementptr inbounds float* %tmp24120, i64 1
- %tmp24122 = getelementptr inbounds float* %tmp24121, i64 1
- %tmp24123 = getelementptr inbounds float* %tmp24122, i64 1
- %tmp24124 = getelementptr inbounds float* %tmp24123, i64 1
- %tmp24125 = getelementptr inbounds float* %tmp24124, i64 1
- %tmp24126 = getelementptr inbounds float* %tmp24125, i64 1
- %tmp24127 = getelementptr inbounds float* %tmp24126, i64 1
- %tmp24128 = getelementptr inbounds float* %tmp24127, i64 1
- %tmp24129 = getelementptr inbounds float* %tmp24128, i64 1
- %tmp24130 = getelementptr inbounds float* %tmp24129, i64 1
- %tmp24131 = getelementptr inbounds float* %tmp24130, i64 1
- %tmp24132 = getelementptr inbounds float* %tmp24131, i64 1
- %tmp24133 = getelementptr inbounds float* %tmp24132, i64 1
- %tmp24134 = getelementptr inbounds float* %tmp24133, i64 1
- %tmp24135 = getelementptr inbounds float* %tmp24134, i64 1
- %tmp24136 = getelementptr inbounds float* %tmp24135, i64 1
- %tmp24137 = getelementptr inbounds float* %tmp24136, i64 1
- %tmp24138 = getelementptr inbounds float* %tmp24137, i64 1
- %tmp24139 = getelementptr inbounds float* %tmp24138, i64 1
- %tmp24140 = getelementptr inbounds float* %tmp24139, i64 1
- %tmp24141 = getelementptr inbounds float* %tmp24140, i64 1
- %tmp24142 = getelementptr inbounds float* %tmp24141, i64 1
- %tmp24143 = getelementptr inbounds float* %tmp24142, i64 1
- %tmp24144 = getelementptr inbounds float* %tmp24143, i64 1
- %tmp24145 = getelementptr inbounds float* %tmp24144, i64 1
- %tmp24146 = getelementptr inbounds float* %tmp24145, i64 1
- %tmp24147 = getelementptr inbounds float* %tmp24146, i64 1
- %tmp24148 = getelementptr inbounds float* %tmp24147, i64 1
- %tmp24149 = getelementptr inbounds float* %tmp24148, i64 1
- %tmp24150 = getelementptr inbounds float* %tmp24149, i64 1
- %tmp24151 = getelementptr inbounds float* %tmp24150, i64 1
- %tmp24152 = getelementptr inbounds float* %tmp24151, i64 1
- %tmp24153 = getelementptr inbounds float* %tmp24152, i64 1
- %tmp24154 = getelementptr inbounds float* %tmp24153, i64 1
- %tmp24155 = getelementptr inbounds float* %tmp24154, i64 1
- %tmp24156 = getelementptr inbounds float* %tmp24155, i64 1
- %tmp24157 = getelementptr inbounds float* %tmp24156, i64 1
- %tmp24158 = getelementptr inbounds float* %tmp24157, i64 1
- %tmp24159 = getelementptr inbounds float* %tmp24158, i64 1
- %tmp24160 = getelementptr inbounds float* %tmp24159, i64 1
- %tmp24161 = getelementptr inbounds float* %tmp24160, i64 1
- %tmp24162 = getelementptr inbounds float* %tmp24161, i64 1
- %tmp24163 = getelementptr inbounds float* %tmp24162, i64 1
- %tmp24164 = getelementptr inbounds float* %tmp24163, i64 1
- %tmp24165 = getelementptr inbounds float* %tmp24164, i64 1
- %tmp24166 = getelementptr inbounds float* %tmp24165, i64 1
- %tmp24167 = getelementptr inbounds float* %tmp24166, i64 1
- %tmp24168 = getelementptr inbounds float* %tmp24167, i64 1
- %tmp24169 = getelementptr inbounds float* %tmp24168, i64 1
- %tmp24170 = getelementptr inbounds float* %tmp24169, i64 1
- %tmp24171 = getelementptr inbounds float* %tmp24170, i64 1
- %tmp24172 = getelementptr inbounds float* %tmp24171, i64 1
- %tmp24173 = getelementptr inbounds float* %tmp24172, i64 1
- %tmp24174 = getelementptr inbounds float* %tmp24173, i64 1
- %tmp24175 = getelementptr inbounds float* %tmp24174, i64 1
- %tmp24176 = getelementptr inbounds float* %tmp24175, i64 1
- %tmp24177 = getelementptr inbounds float* %tmp24176, i64 1
- %tmp24178 = getelementptr inbounds float* %tmp24177, i64 1
- %tmp24179 = getelementptr inbounds float* %tmp24178, i64 1
- %tmp24180 = getelementptr inbounds float* %tmp24179, i64 1
- %tmp24181 = getelementptr inbounds float* %tmp24180, i64 1
- %tmp24182 = getelementptr inbounds float* %tmp24181, i64 1
- %tmp24183 = getelementptr inbounds float* %tmp24182, i64 1
- %tmp24184 = getelementptr inbounds float* %tmp24183, i64 1
- %tmp24185 = getelementptr inbounds float* %tmp24184, i64 1
- %tmp24186 = getelementptr inbounds float* %tmp24185, i64 1
- %tmp24187 = getelementptr inbounds float* %tmp24186, i64 1
- %tmp24188 = getelementptr inbounds float* %tmp24187, i64 1
- %tmp24189 = getelementptr inbounds float* %tmp24188, i64 1
- %tmp24190 = getelementptr inbounds float* %tmp24189, i64 1
- %tmp24191 = getelementptr inbounds float* %tmp24190, i64 1
- %tmp24192 = getelementptr inbounds float* %tmp24191, i64 1
- %tmp24193 = getelementptr inbounds float* %tmp24192, i64 1
- %tmp24194 = getelementptr inbounds float* %tmp24193, i64 1
- %tmp24195 = getelementptr inbounds float* %tmp24194, i64 1
- %tmp24196 = getelementptr inbounds float* %tmp24195, i64 1
- %tmp24197 = getelementptr inbounds float* %tmp24196, i64 1
- %tmp24198 = getelementptr inbounds float* %tmp24197, i64 1
- %tmp24199 = getelementptr inbounds float* %tmp24198, i64 1
- %tmp24200 = getelementptr inbounds float* %tmp24199, i64 1
- %tmp24201 = getelementptr inbounds float* %tmp24200, i64 1
- %tmp24202 = getelementptr inbounds float* %tmp24201, i64 1
- %tmp24203 = getelementptr inbounds float* %tmp24202, i64 1
- %tmp24204 = getelementptr inbounds float* %tmp24203, i64 1
- %tmp24205 = getelementptr inbounds float* %tmp24204, i64 1
- %tmp24206 = getelementptr inbounds float* %tmp24205, i64 1
- %tmp24207 = getelementptr inbounds float* %tmp24206, i64 1
- %tmp24208 = getelementptr inbounds float* %tmp24207, i64 1
- %tmp24209 = getelementptr inbounds float* %tmp24208, i64 1
- %tmp24210 = getelementptr inbounds float* %tmp24209, i64 1
- %tmp24211 = getelementptr inbounds float* %tmp24210, i64 1
- %tmp24212 = getelementptr inbounds float* %tmp24211, i64 1
- %tmp24213 = getelementptr inbounds float* %tmp24212, i64 1
- %tmp24214 = getelementptr inbounds float* %tmp24213, i64 1
- %tmp24215 = getelementptr inbounds float* %tmp24214, i64 1
- %tmp24216 = getelementptr inbounds float* %tmp24215, i64 1
- %tmp24217 = getelementptr inbounds float* %tmp24216, i64 1
- %tmp24218 = getelementptr inbounds float* %tmp24217, i64 1
- %tmp24219 = getelementptr inbounds float* %tmp24218, i64 1
- %tmp24220 = getelementptr inbounds float* %tmp24219, i64 1
- %tmp24221 = getelementptr inbounds float* %tmp24220, i64 1
- %tmp24222 = getelementptr inbounds float* %tmp24221, i64 1
- %tmp24223 = getelementptr inbounds float* %tmp24222, i64 1
- %tmp24224 = getelementptr inbounds float* %tmp24223, i64 1
- %tmp24225 = getelementptr inbounds float* %tmp24224, i64 1
- %tmp24226 = getelementptr inbounds float* %tmp24225, i64 1
- %tmp24227 = getelementptr inbounds float* %tmp24226, i64 1
- %tmp24228 = getelementptr inbounds float* %tmp24227, i64 1
- %tmp24229 = getelementptr inbounds float* %tmp24228, i64 1
- %tmp24230 = getelementptr inbounds float* %tmp24229, i64 1
- %tmp24231 = getelementptr inbounds float* %tmp24230, i64 1
- %tmp24232 = getelementptr inbounds float* %tmp24231, i64 1
- %tmp24233 = getelementptr inbounds float* %tmp24232, i64 1
- %tmp24234 = getelementptr inbounds float* %tmp24233, i64 1
- %tmp24235 = getelementptr inbounds float* %tmp24234, i64 1
- %tmp24236 = getelementptr inbounds float* %tmp24235, i64 1
- %tmp24237 = getelementptr inbounds float* %tmp24236, i64 1
- %tmp24238 = getelementptr inbounds float* %tmp24237, i64 1
- %tmp24239 = getelementptr inbounds float* %tmp24238, i64 1
- %tmp24240 = getelementptr inbounds float* %tmp24239, i64 1
- %tmp24241 = getelementptr inbounds float* %tmp24240, i64 1
- %tmp24242 = getelementptr inbounds float* %tmp24241, i64 1
- %tmp24243 = getelementptr inbounds float* %tmp24242, i64 1
- %tmp24244 = getelementptr inbounds float* %tmp24243, i64 1
- %tmp24245 = getelementptr inbounds float* %tmp24244, i64 1
- %tmp24246 = getelementptr inbounds float* %tmp24245, i64 1
- %tmp24247 = getelementptr inbounds float* %tmp24246, i64 1
- %tmp24248 = getelementptr inbounds float* %tmp24247, i64 1
- %tmp24249 = getelementptr inbounds float* %tmp24248, i64 1
- %tmp24250 = getelementptr inbounds float* %tmp24249, i64 1
- %tmp24251 = getelementptr inbounds float* %tmp24250, i64 1
- %tmp24252 = getelementptr inbounds float* %tmp24251, i64 1
- %tmp24253 = getelementptr inbounds float* %tmp24252, i64 1
- %tmp24254 = getelementptr inbounds float* %tmp24253, i64 1
- %tmp24255 = getelementptr inbounds float* %tmp24254, i64 1
- %tmp24256 = getelementptr inbounds float* %tmp24255, i64 1
- %tmp24257 = getelementptr inbounds float* %tmp24256, i64 1
- %tmp24258 = getelementptr inbounds float* %tmp24257, i64 1
- %tmp24259 = getelementptr inbounds float* %tmp24258, i64 1
- %tmp24260 = getelementptr inbounds float* %tmp24259, i64 1
- %tmp24261 = getelementptr inbounds float* %tmp24260, i64 1
- %tmp24262 = getelementptr inbounds float* %tmp24261, i64 1
- %tmp24263 = getelementptr inbounds float* %tmp24262, i64 1
- %tmp24264 = getelementptr inbounds float* %tmp24263, i64 1
- %tmp24265 = getelementptr inbounds float* %tmp24264, i64 1
- %tmp24266 = getelementptr inbounds float* %tmp24265, i64 1
- %tmp24267 = getelementptr inbounds float* %tmp24266, i64 1
- %tmp24268 = getelementptr inbounds float* %tmp24267, i64 1
- %tmp24269 = getelementptr inbounds float* %tmp24268, i64 1
- %tmp24270 = getelementptr inbounds float* %tmp24269, i64 1
- %tmp24271 = getelementptr inbounds float* %tmp24270, i64 1
- %tmp24272 = getelementptr inbounds float* %tmp24271, i64 1
- %tmp24273 = getelementptr inbounds float* %tmp24272, i64 1
- %tmp24274 = getelementptr inbounds float* %tmp24273, i64 1
- %tmp24275 = getelementptr inbounds float* %tmp24274, i64 1
- %tmp24276 = getelementptr inbounds float* %tmp24275, i64 1
- %tmp24277 = getelementptr inbounds float* %tmp24276, i64 1
- %tmp24278 = getelementptr inbounds float* %tmp24277, i64 1
- %tmp24279 = getelementptr inbounds float* %tmp24278, i64 1
- %tmp24280 = getelementptr inbounds float* %tmp24279, i64 1
- %tmp24281 = getelementptr inbounds float* %tmp24280, i64 1
- %tmp24282 = getelementptr inbounds float* %tmp24281, i64 1
- %tmp24283 = getelementptr inbounds float* %tmp24282, i64 1
- %tmp24284 = getelementptr inbounds float* %tmp24283, i64 1
- %tmp24285 = getelementptr inbounds float* %tmp24284, i64 1
- %tmp24286 = getelementptr inbounds float* %tmp24285, i64 1
- %tmp24287 = getelementptr inbounds float* %tmp24286, i64 1
- %tmp24288 = getelementptr inbounds float* %tmp24287, i64 1
- %tmp24289 = getelementptr inbounds float* %tmp24288, i64 1
- %tmp24290 = getelementptr inbounds float* %tmp24289, i64 1
- %tmp24291 = getelementptr inbounds float* %tmp24290, i64 1
- %tmp24292 = getelementptr inbounds float* %tmp24291, i64 1
- %tmp24293 = getelementptr inbounds float* %tmp24292, i64 1
- %tmp24294 = getelementptr inbounds float* %tmp24293, i64 1
- %tmp24295 = getelementptr inbounds float* %tmp24294, i64 1
- %tmp24296 = getelementptr inbounds float* %tmp24295, i64 1
- %tmp24297 = getelementptr inbounds float* %tmp24296, i64 1
- %tmp24298 = getelementptr inbounds float* %tmp24297, i64 1
- %tmp24299 = getelementptr inbounds float* %tmp24298, i64 1
- %tmp24300 = getelementptr inbounds float* %tmp24299, i64 1
- %tmp24301 = getelementptr inbounds float* %tmp24300, i64 1
- %tmp24302 = getelementptr inbounds float* %tmp24301, i64 1
- %tmp24303 = getelementptr inbounds float* %tmp24302, i64 1
- %tmp24304 = getelementptr inbounds float* %tmp24303, i64 1
- %tmp24305 = getelementptr inbounds float* %tmp24304, i64 1
- %tmp24306 = getelementptr inbounds float* %tmp24305, i64 1
- %tmp24307 = getelementptr inbounds float* %tmp24306, i64 1
- %tmp24308 = getelementptr inbounds float* %tmp24307, i64 1
- %tmp24309 = getelementptr inbounds float* %tmp24308, i64 1
- %tmp24310 = getelementptr inbounds float* %tmp24309, i64 1
- %tmp24311 = getelementptr inbounds float* %tmp24310, i64 1
- %tmp24312 = getelementptr inbounds float* %tmp24311, i64 1
- %tmp24313 = getelementptr inbounds float* %tmp24312, i64 1
- %tmp24314 = getelementptr inbounds float* %tmp24313, i64 1
- %tmp24315 = getelementptr inbounds float* %tmp24314, i64 1
- %tmp24316 = getelementptr inbounds float* %tmp24315, i64 1
- %tmp24317 = getelementptr inbounds float* %tmp24316, i64 1
- %tmp24318 = getelementptr inbounds float* %tmp24317, i64 1
- %tmp24319 = getelementptr inbounds float* %tmp24318, i64 1
- %tmp24320 = getelementptr inbounds float* %tmp24319, i64 1
- %tmp24321 = getelementptr inbounds float* %tmp24320, i64 1
- %tmp24322 = getelementptr inbounds float* %tmp24321, i64 1
- %tmp24323 = getelementptr inbounds float* %tmp24322, i64 1
- %tmp24324 = getelementptr inbounds float* %tmp24323, i64 1
- %tmp24325 = getelementptr inbounds float* %tmp24324, i64 1
- %tmp24326 = getelementptr inbounds float* %tmp24325, i64 1
- %tmp24327 = getelementptr inbounds float* %tmp24326, i64 1
- %tmp24328 = getelementptr inbounds float* %tmp24327, i64 1
- %tmp24329 = getelementptr inbounds float* %tmp24328, i64 1
- %tmp24330 = getelementptr inbounds float* %tmp24329, i64 1
- %tmp24331 = getelementptr inbounds float* %tmp24330, i64 1
- %tmp24332 = getelementptr inbounds float* %tmp24331, i64 1
- %tmp24333 = getelementptr inbounds float* %tmp24332, i64 1
- %tmp24334 = getelementptr inbounds float* %tmp24333, i64 1
- %tmp24335 = getelementptr inbounds float* %tmp24334, i64 1
- %tmp24336 = getelementptr inbounds float* %tmp24335, i64 1
- %tmp24337 = getelementptr inbounds float* %tmp24336, i64 1
- %tmp24338 = getelementptr inbounds float* %tmp24337, i64 1
- %tmp24339 = getelementptr inbounds float* %tmp24338, i64 1
- %tmp24340 = getelementptr inbounds float* %tmp24339, i64 1
- %tmp24341 = getelementptr inbounds float* %tmp24340, i64 1
- %tmp24342 = getelementptr inbounds float* %tmp24341, i64 1
- %tmp24343 = getelementptr inbounds float* %tmp24342, i64 1
- %tmp24344 = getelementptr inbounds float* %tmp24343, i64 1
- %tmp24345 = getelementptr inbounds float* %tmp24344, i64 1
- %tmp24346 = getelementptr inbounds float* %tmp24345, i64 1
- %tmp24347 = getelementptr inbounds float* %tmp24346, i64 1
- %tmp24348 = getelementptr inbounds float* %tmp24347, i64 1
- %tmp24349 = getelementptr inbounds float* %tmp24348, i64 1
- %tmp24350 = getelementptr inbounds float* %tmp24349, i64 1
- %tmp24351 = getelementptr inbounds float* %tmp24350, i64 1
- %tmp24352 = getelementptr inbounds float* %tmp24351, i64 1
- %tmp24353 = getelementptr inbounds float* %tmp24352, i64 1
- %tmp24354 = getelementptr inbounds float* %tmp24353, i64 1
- %tmp24355 = getelementptr inbounds float* %tmp24354, i64 1
- %tmp24356 = getelementptr inbounds float* %tmp24355, i64 1
- %tmp24357 = getelementptr inbounds float* %tmp24356, i64 1
- %tmp24358 = getelementptr inbounds float* %tmp24357, i64 1
- %tmp24359 = getelementptr inbounds float* %tmp24358, i64 1
- %tmp24360 = getelementptr inbounds float* %tmp24359, i64 1
- %tmp24361 = getelementptr inbounds float* %tmp24360, i64 1
- %tmp24362 = getelementptr inbounds float* %tmp24361, i64 1
- %tmp24363 = getelementptr inbounds float* %tmp24362, i64 1
- %tmp24364 = getelementptr inbounds float* %tmp24363, i64 1
- %tmp24365 = getelementptr inbounds float* %tmp24364, i64 1
- %tmp24366 = getelementptr inbounds float* %tmp24365, i64 1
- %tmp24367 = getelementptr inbounds float* %tmp24366, i64 1
- %tmp24368 = getelementptr inbounds float* %tmp24367, i64 1
- %tmp24369 = getelementptr inbounds float* %tmp24368, i64 1
- %tmp24370 = getelementptr inbounds float* %tmp24369, i64 1
- %tmp24371 = getelementptr inbounds float* %tmp24370, i64 1
- %tmp24372 = getelementptr inbounds float* %tmp24371, i64 1
- %tmp24373 = getelementptr inbounds float* %tmp24372, i64 1
- %tmp24374 = getelementptr inbounds float* %tmp24373, i64 1
- %tmp24375 = getelementptr inbounds float* %tmp24374, i64 1
- %tmp24376 = getelementptr inbounds float* %tmp24375, i64 1
- %tmp24377 = getelementptr inbounds float* %tmp24376, i64 1
- %tmp24378 = getelementptr inbounds float* %tmp24377, i64 1
- %tmp24379 = getelementptr inbounds float* %tmp24378, i64 1
- %tmp24380 = getelementptr inbounds float* %tmp24379, i64 1
- %tmp24381 = getelementptr inbounds float* %tmp24380, i64 1
- %tmp24382 = getelementptr inbounds float* %tmp24381, i64 1
- %tmp24383 = getelementptr inbounds float* %tmp24382, i64 1
- %tmp24384 = getelementptr inbounds float* %tmp24383, i64 1
- %tmp24385 = getelementptr inbounds float* %tmp24384, i64 1
- %tmp24386 = getelementptr inbounds float* %tmp24385, i64 1
- %tmp24387 = getelementptr inbounds float* %tmp24386, i64 1
- %tmp24388 = getelementptr inbounds float* %tmp24387, i64 1
- %tmp24389 = getelementptr inbounds float* %tmp24388, i64 1
- %tmp24390 = getelementptr inbounds float* %tmp24389, i64 1
- %tmp24391 = getelementptr inbounds float* %tmp24390, i64 1
- %tmp24392 = getelementptr inbounds float* %tmp24391, i64 1
- %tmp24393 = getelementptr inbounds float* %tmp24392, i64 1
- %tmp24394 = getelementptr inbounds float* %tmp24393, i64 1
- %tmp24395 = getelementptr inbounds float* %tmp24394, i64 1
- %tmp24396 = getelementptr inbounds float* %tmp24395, i64 1
- %tmp24397 = getelementptr inbounds float* %tmp24396, i64 1
- %tmp24398 = getelementptr inbounds float* %tmp24397, i64 1
- %tmp24399 = getelementptr inbounds float* %tmp24398, i64 1
- %tmp24400 = getelementptr inbounds float* %tmp24399, i64 1
- %tmp24401 = getelementptr inbounds float* %tmp24400, i64 1
- %tmp24402 = getelementptr inbounds float* %tmp24401, i64 1
- %tmp24403 = getelementptr inbounds float* %tmp24402, i64 1
- %tmp24404 = getelementptr inbounds float* %tmp24403, i64 1
- %tmp24405 = getelementptr inbounds float* %tmp24404, i64 1
- %tmp24406 = getelementptr inbounds float* %tmp24405, i64 1
- %tmp24407 = getelementptr inbounds float* %tmp24406, i64 1
- %tmp24408 = getelementptr inbounds float* %tmp24407, i64 1
- %tmp24409 = getelementptr inbounds float* %tmp24408, i64 1
- %tmp24410 = getelementptr inbounds float* %tmp24409, i64 1
- %tmp24411 = getelementptr inbounds float* %tmp24410, i64 1
- %tmp24412 = getelementptr inbounds float* %tmp24411, i64 1
- %tmp24413 = getelementptr inbounds float* %tmp24412, i64 1
- %tmp24414 = getelementptr inbounds float* %tmp24413, i64 1
- %tmp24415 = getelementptr inbounds float* %tmp24414, i64 1
- %tmp24416 = getelementptr inbounds float* %tmp24415, i64 1
- %tmp24417 = getelementptr inbounds float* %tmp24416, i64 1
- %tmp24418 = getelementptr inbounds float* %tmp24417, i64 1
- %tmp24419 = getelementptr inbounds float* %tmp24418, i64 1
- %tmp24420 = getelementptr inbounds float* %tmp24419, i64 1
- %tmp24421 = getelementptr inbounds float* %tmp24420, i64 1
- %tmp24422 = getelementptr inbounds float* %tmp24421, i64 1
- %tmp24423 = getelementptr inbounds float* %tmp24422, i64 1
- %tmp24424 = getelementptr inbounds float* %tmp24423, i64 1
- %tmp24425 = getelementptr inbounds float* %tmp24424, i64 1
- %tmp24426 = getelementptr inbounds float* %tmp24425, i64 1
- %tmp24427 = getelementptr inbounds float* %tmp24426, i64 1
- %tmp24428 = getelementptr inbounds float* %tmp24427, i64 1
- %tmp24429 = getelementptr inbounds float* %tmp24428, i64 1
- %tmp24430 = getelementptr inbounds float* %tmp24429, i64 1
- %tmp24431 = getelementptr inbounds float* %tmp24430, i64 1
- %tmp24432 = getelementptr inbounds float* %tmp24431, i64 1
- %tmp24433 = getelementptr inbounds float* %tmp24432, i64 1
- %tmp24434 = getelementptr inbounds float* %tmp24433, i64 1
- %tmp24435 = getelementptr inbounds float* %tmp24434, i64 1
- %tmp24436 = getelementptr inbounds float* %tmp24435, i64 1
- %tmp24437 = getelementptr inbounds float* %tmp24436, i64 1
- %tmp24438 = getelementptr inbounds float* %tmp24437, i64 1
- %tmp24439 = getelementptr inbounds float* %tmp24438, i64 1
- %tmp24440 = getelementptr inbounds float* %tmp24439, i64 1
- %tmp24441 = getelementptr inbounds float* %tmp24440, i64 1
- %tmp24442 = getelementptr inbounds float* %tmp24441, i64 1
- %tmp24443 = getelementptr inbounds float* %tmp24442, i64 1
- %tmp24444 = getelementptr inbounds float* %tmp24443, i64 1
- %tmp24445 = getelementptr inbounds float* %tmp24444, i64 1
- %tmp24446 = getelementptr inbounds float* %tmp24445, i64 1
- %tmp24447 = getelementptr inbounds float* %tmp24446, i64 1
- %tmp24448 = getelementptr inbounds float* %tmp24447, i64 1
- %tmp24449 = getelementptr inbounds float* %tmp24448, i64 1
- %tmp24450 = getelementptr inbounds float* %tmp24449, i64 1
- %tmp24451 = getelementptr inbounds float* %tmp24450, i64 1
- %tmp24452 = getelementptr inbounds float* %tmp24451, i64 1
- %tmp24453 = getelementptr inbounds float* %tmp24452, i64 1
- %tmp24454 = getelementptr inbounds float* %tmp24453, i64 1
- %tmp24455 = getelementptr inbounds float* %tmp24454, i64 1
- %tmp24456 = getelementptr inbounds float* %tmp24455, i64 1
- %tmp24457 = getelementptr inbounds float* %tmp24456, i64 1
- %tmp24458 = getelementptr inbounds float* %tmp24457, i64 1
- %tmp24459 = getelementptr inbounds float* %tmp24458, i64 1
- %tmp24460 = getelementptr inbounds float* %tmp24459, i64 1
- %tmp24461 = getelementptr inbounds float* %tmp24460, i64 1
- %tmp24462 = getelementptr inbounds float* %tmp24461, i64 1
- %tmp24463 = getelementptr inbounds float* %tmp24462, i64 1
- %tmp24464 = getelementptr inbounds float* %tmp24463, i64 1
- %tmp24465 = getelementptr inbounds float* %tmp24464, i64 1
- %tmp24466 = getelementptr inbounds float* %tmp24465, i64 1
- %tmp24467 = getelementptr inbounds float* %tmp24466, i64 1
- %tmp24468 = getelementptr inbounds float* %tmp24467, i64 1
- %tmp24469 = getelementptr inbounds float* %tmp24468, i64 1
- %tmp24470 = getelementptr inbounds float* %tmp24469, i64 1
- %tmp24471 = getelementptr inbounds float* %tmp24470, i64 1
- %tmp24472 = getelementptr inbounds float* %tmp24471, i64 1
- %tmp24473 = getelementptr inbounds float* %tmp24472, i64 1
- %tmp24474 = getelementptr inbounds float* %tmp24473, i64 1
- %tmp24475 = getelementptr inbounds float* %tmp24474, i64 1
- %tmp24476 = getelementptr inbounds float* %tmp24475, i64 1
- %tmp24477 = getelementptr inbounds float* %tmp24476, i64 1
- %tmp24478 = getelementptr inbounds float* %tmp24477, i64 1
- %tmp24479 = getelementptr inbounds float* %tmp24478, i64 1
- %tmp24480 = getelementptr inbounds float* %tmp24479, i64 1
- %tmp24481 = getelementptr inbounds float* %tmp24480, i64 1
- %tmp24482 = getelementptr inbounds float* %tmp24481, i64 1
- %tmp24483 = getelementptr inbounds float* %tmp24482, i64 1
- %tmp24484 = getelementptr inbounds float* %tmp24483, i64 1
- %tmp24485 = getelementptr inbounds float* %tmp24484, i64 1
- %tmp24486 = getelementptr inbounds float* %tmp24485, i64 1
- %tmp24487 = getelementptr inbounds float* %tmp24486, i64 1
- %tmp24488 = getelementptr inbounds float* %tmp24487, i64 1
- %tmp24489 = getelementptr inbounds float* %tmp24488, i64 1
- %tmp24490 = getelementptr inbounds float* %tmp24489, i64 1
- %tmp24491 = getelementptr inbounds float* %tmp24490, i64 1
- %tmp24492 = getelementptr inbounds float* %tmp24491, i64 1
- %tmp24493 = getelementptr inbounds float* %tmp24492, i64 1
- %tmp24494 = getelementptr inbounds float* %tmp24493, i64 1
- %tmp24495 = getelementptr inbounds float* %tmp24494, i64 1
- %tmp24496 = getelementptr inbounds float* %tmp24495, i64 1
- %tmp24497 = getelementptr inbounds float* %tmp24496, i64 1
- %tmp24498 = getelementptr inbounds float* %tmp24497, i64 1
- %tmp24499 = getelementptr inbounds float* %tmp24498, i64 1
- %tmp24500 = getelementptr inbounds float* %tmp24499, i64 1
- %tmp24501 = getelementptr inbounds float* %tmp24500, i64 1
- %tmp24502 = getelementptr inbounds float* %tmp24501, i64 1
- %tmp24503 = getelementptr inbounds float* %tmp24502, i64 1
- %tmp24504 = getelementptr inbounds float* %tmp24503, i64 1
- %tmp24505 = getelementptr inbounds float* %tmp24504, i64 1
- %tmp24506 = getelementptr inbounds float* %tmp24505, i64 1
- %tmp24507 = getelementptr inbounds float* %tmp24506, i64 1
- %tmp24508 = getelementptr inbounds float* %tmp24507, i64 1
- %tmp24509 = getelementptr inbounds float* %tmp24508, i64 1
- %tmp24510 = getelementptr inbounds float* %tmp24509, i64 1
- %tmp24511 = getelementptr inbounds float* %tmp24510, i64 1
- %tmp24512 = getelementptr inbounds float* %tmp24511, i64 1
- %tmp24513 = getelementptr inbounds float* %tmp24512, i64 1
- %tmp24514 = getelementptr inbounds float* %tmp24513, i64 1
- %tmp24515 = getelementptr inbounds float* %tmp24514, i64 1
- %tmp24516 = getelementptr inbounds float* %tmp24515, i64 1
- %tmp24517 = getelementptr inbounds float* %tmp24516, i64 1
- %tmp24518 = getelementptr inbounds float* %tmp24517, i64 1
- %tmp24519 = getelementptr inbounds float* %tmp24518, i64 1
- %tmp24520 = getelementptr inbounds float* %tmp24519, i64 1
- %tmp24521 = getelementptr inbounds float* %tmp24520, i64 1
- %tmp24522 = getelementptr inbounds float* %tmp24521, i64 1
- %tmp24523 = getelementptr inbounds float* %tmp24522, i64 1
- %tmp24524 = getelementptr inbounds float* %tmp24523, i64 1
- %tmp24525 = getelementptr inbounds float* %tmp24524, i64 1
- %tmp24526 = getelementptr inbounds float* %tmp24525, i64 1
- %tmp24527 = getelementptr inbounds float* %tmp24526, i64 1
- %tmp24528 = getelementptr inbounds float* %tmp24527, i64 1
- %tmp24529 = getelementptr inbounds float* %tmp24528, i64 1
- %tmp24530 = getelementptr inbounds float* %tmp24529, i64 1
- %tmp24531 = getelementptr inbounds float* %tmp24530, i64 1
- %tmp24532 = getelementptr inbounds float* %tmp24531, i64 1
- %tmp24533 = getelementptr inbounds float* %tmp24532, i64 1
- %tmp24534 = getelementptr inbounds float* %tmp24533, i64 1
- %tmp24535 = getelementptr inbounds float* %tmp24534, i64 1
- %tmp24536 = getelementptr inbounds float* %tmp24535, i64 1
- %tmp24537 = getelementptr inbounds float* %tmp24536, i64 1
- %tmp24538 = getelementptr inbounds float* %tmp24537, i64 1
- %tmp24539 = getelementptr inbounds float* %tmp24538, i64 1
- %tmp24540 = getelementptr inbounds float* %tmp24539, i64 1
- %tmp24541 = getelementptr inbounds float* %tmp24540, i64 1
- %tmp24542 = getelementptr inbounds float* %tmp24541, i64 1
- %tmp24543 = getelementptr inbounds float* %tmp24542, i64 1
- %tmp24544 = getelementptr inbounds float* %tmp24543, i64 1
- %tmp24545 = getelementptr inbounds float* %tmp24544, i64 1
- %tmp24546 = getelementptr inbounds float* %tmp24545, i64 1
- %tmp24547 = getelementptr inbounds float* %tmp24546, i64 1
- %tmp24548 = getelementptr inbounds float* %tmp24547, i64 1
- %tmp24549 = getelementptr inbounds float* %tmp24548, i64 1
- %tmp24550 = getelementptr inbounds float* %tmp24549, i64 1
- %tmp24551 = getelementptr inbounds float* %tmp24550, i64 1
- %tmp24552 = getelementptr inbounds float* %tmp24551, i64 1
- %tmp24553 = getelementptr inbounds float* %tmp24552, i64 1
- %tmp24554 = getelementptr inbounds float* %tmp24553, i64 1
- %tmp24555 = getelementptr inbounds float* %tmp24554, i64 1
- %tmp24556 = getelementptr inbounds float* %tmp24555, i64 1
- %tmp24557 = getelementptr inbounds float* %tmp24556, i64 1
- %tmp24558 = getelementptr inbounds float* %tmp24557, i64 1
- %tmp24559 = getelementptr inbounds float* %tmp24558, i64 1
- %tmp24560 = getelementptr inbounds float* %tmp24559, i64 1
- %tmp24561 = getelementptr inbounds float* %tmp24560, i64 1
- %tmp24562 = getelementptr inbounds float* %tmp24561, i64 1
- %tmp24563 = getelementptr inbounds float* %tmp24562, i64 1
- %tmp24564 = getelementptr inbounds float* %tmp24563, i64 1
- %tmp24565 = getelementptr inbounds float* %tmp24564, i64 1
- %tmp24566 = getelementptr inbounds float* %tmp24565, i64 1
- %tmp24567 = getelementptr inbounds float* %tmp24566, i64 1
- %tmp24568 = getelementptr inbounds float* %tmp24567, i64 1
- %tmp24569 = getelementptr inbounds float* %tmp24568, i64 1
- %tmp24570 = getelementptr inbounds float* %tmp24569, i64 1
- %tmp24571 = getelementptr inbounds float* %tmp24570, i64 1
- %tmp24572 = getelementptr inbounds float* %tmp24571, i64 1
- %tmp24573 = getelementptr inbounds float* %tmp24572, i64 1
- %tmp24574 = getelementptr inbounds float* %tmp24573, i64 1
- %tmp24575 = getelementptr inbounds float* %tmp24574, i64 1
- %tmp24576 = getelementptr inbounds float* %tmp24575, i64 1
- %tmp24577 = getelementptr inbounds float* %tmp24576, i64 1
- %tmp24578 = getelementptr inbounds float* %tmp24577, i64 1
- %tmp24579 = getelementptr inbounds float* %tmp24578, i64 1
- %tmp24580 = getelementptr inbounds float* %tmp24579, i64 1
- %tmp24581 = getelementptr inbounds float* %tmp24580, i64 1
- %tmp24582 = getelementptr inbounds float* %tmp24581, i64 1
- %tmp24583 = getelementptr inbounds float* %tmp24582, i64 1
- %tmp24584 = getelementptr inbounds float* %tmp24583, i64 1
- %tmp24585 = getelementptr inbounds float* %tmp24584, i64 1
- %tmp24586 = getelementptr inbounds float* %tmp24585, i64 1
- %tmp24587 = getelementptr inbounds float* %tmp24586, i64 1
- %tmp24588 = getelementptr inbounds float* %tmp24587, i64 1
- %tmp24589 = getelementptr inbounds float* %tmp24588, i64 1
- %tmp24590 = getelementptr inbounds float* %tmp24589, i64 1
- %tmp24591 = getelementptr inbounds float* %tmp24590, i64 1
- %tmp24592 = getelementptr inbounds float* %tmp24591, i64 1
- %tmp24593 = getelementptr inbounds float* %tmp24592, i64 1
- %tmp24594 = getelementptr inbounds float* %tmp24593, i64 1
- %tmp24595 = getelementptr inbounds float* %tmp24594, i64 1
- %tmp24596 = getelementptr inbounds float* %tmp24595, i64 1
- %tmp24597 = getelementptr inbounds float* %tmp24596, i64 1
- %tmp24598 = getelementptr inbounds float* %tmp24597, i64 1
- %tmp24599 = getelementptr inbounds float* %tmp24598, i64 1
- %tmp24600 = getelementptr inbounds float* %tmp24599, i64 1
- %tmp24601 = getelementptr inbounds float* %tmp24600, i64 1
- %tmp24602 = getelementptr inbounds float* %tmp24601, i64 1
- %tmp24603 = getelementptr inbounds float* %tmp24602, i64 1
- %tmp24604 = getelementptr inbounds float* %tmp24603, i64 1
- %tmp24605 = getelementptr inbounds float* %tmp24604, i64 1
- %tmp24606 = getelementptr inbounds float* %tmp24605, i64 1
- %tmp24607 = getelementptr inbounds float* %tmp24606, i64 1
- %tmp24608 = getelementptr inbounds float* %tmp24607, i64 1
- %tmp24609 = getelementptr inbounds float* %tmp24608, i64 1
- %tmp24610 = getelementptr inbounds float* %tmp24609, i64 1
- %tmp24611 = getelementptr inbounds float* %tmp24610, i64 1
- %tmp24612 = getelementptr inbounds float* %tmp24611, i64 1
- %tmp24613 = getelementptr inbounds float* %tmp24612, i64 1
- %tmp24614 = getelementptr inbounds float* %tmp24613, i64 1
- %tmp24615 = getelementptr inbounds float* %tmp24614, i64 1
- %tmp24616 = getelementptr inbounds float* %tmp24615, i64 1
- %tmp24617 = getelementptr inbounds float* %tmp24616, i64 1
- %tmp24618 = getelementptr inbounds float* %tmp24617, i64 1
- %tmp24619 = getelementptr inbounds float* %tmp24618, i64 1
- %tmp24620 = getelementptr inbounds float* %tmp24619, i64 1
- %tmp24621 = getelementptr inbounds float* %tmp24620, i64 1
- %tmp24622 = getelementptr inbounds float* %tmp24621, i64 1
- %tmp24623 = getelementptr inbounds float* %tmp24622, i64 1
- %tmp24624 = getelementptr inbounds float* %tmp24623, i64 1
- %tmp24625 = getelementptr inbounds float* %tmp24624, i64 1
- %tmp24626 = getelementptr inbounds float* %tmp24625, i64 1
- %tmp24627 = getelementptr inbounds float* %tmp24626, i64 1
- %tmp24628 = getelementptr inbounds float* %tmp24627, i64 1
- %tmp24629 = getelementptr inbounds float* %tmp24628, i64 1
- %tmp24630 = getelementptr inbounds float* %tmp24629, i64 1
- %tmp24631 = getelementptr inbounds float* %tmp24630, i64 1
- %tmp24632 = getelementptr inbounds float* %tmp24631, i64 1
- %tmp24633 = getelementptr inbounds float* %tmp24632, i64 1
- %tmp24634 = getelementptr inbounds float* %tmp24633, i64 1
- %tmp24635 = getelementptr inbounds float* %tmp24634, i64 1
- %tmp24636 = getelementptr inbounds float* %tmp24635, i64 1
- %tmp24637 = getelementptr inbounds float* %tmp24636, i64 1
- %tmp24638 = getelementptr inbounds float* %tmp24637, i64 1
- %tmp24639 = getelementptr inbounds float* %tmp24638, i64 1
- %tmp24640 = getelementptr inbounds float* %tmp24639, i64 1
- %tmp24641 = getelementptr inbounds float* %tmp24640, i64 1
- %tmp24642 = getelementptr inbounds float* %tmp24641, i64 1
- %tmp24643 = getelementptr inbounds float* %tmp24642, i64 1
- %tmp24644 = getelementptr inbounds float* %tmp24643, i64 1
- %tmp24645 = getelementptr inbounds float* %tmp24644, i64 1
- %tmp24646 = getelementptr inbounds float* %tmp24645, i64 1
- %tmp24647 = getelementptr inbounds float* %tmp24646, i64 1
- %tmp24648 = getelementptr inbounds float* %tmp24647, i64 1
- %tmp24649 = getelementptr inbounds float* %tmp24648, i64 1
- %tmp24650 = getelementptr inbounds float* %tmp24649, i64 1
- %tmp24651 = getelementptr inbounds float* %tmp24650, i64 1
- %tmp24652 = getelementptr inbounds float* %tmp24651, i64 1
- %tmp24653 = getelementptr inbounds float* %tmp24652, i64 1
- %tmp24654 = getelementptr inbounds float* %tmp24653, i64 1
- %tmp24655 = getelementptr inbounds float* %tmp24654, i64 1
- %tmp24656 = getelementptr inbounds float* %tmp24655, i64 1
- %tmp24657 = getelementptr inbounds float* %tmp24656, i64 1
- %tmp24658 = getelementptr inbounds float* %tmp24657, i64 1
- %tmp24659 = getelementptr inbounds float* %tmp24658, i64 1
- %tmp24660 = getelementptr inbounds float* %tmp24659, i64 1
- %tmp24661 = getelementptr inbounds float* %tmp24660, i64 1
- %tmp24662 = getelementptr inbounds float* %tmp24661, i64 1
- %tmp24663 = getelementptr inbounds float* %tmp24662, i64 1
- %tmp24664 = getelementptr inbounds float* %tmp24663, i64 1
- %tmp24665 = getelementptr inbounds float* %tmp24664, i64 1
- %tmp24666 = getelementptr inbounds float* %tmp24665, i64 1
- %tmp24667 = getelementptr inbounds float* %tmp24666, i64 1
- %tmp24668 = getelementptr inbounds float* %tmp24667, i64 1
- %tmp24669 = getelementptr inbounds float* %tmp24668, i64 1
- %tmp24670 = getelementptr inbounds float* %tmp24669, i64 1
- %tmp24671 = getelementptr inbounds float* %tmp24670, i64 1
- %tmp24672 = getelementptr inbounds float* %tmp24671, i64 1
- %tmp24673 = getelementptr inbounds float* %tmp24672, i64 1
- %tmp24674 = getelementptr inbounds float* %tmp24673, i64 1
- %tmp24675 = getelementptr inbounds float* %tmp24674, i64 1
- %tmp24676 = getelementptr inbounds float* %tmp24675, i64 1
- %tmp24677 = getelementptr inbounds float* %tmp24676, i64 1
- %tmp24678 = getelementptr inbounds float* %tmp24677, i64 1
- %tmp24679 = getelementptr inbounds float* %tmp24678, i64 1
- %tmp24680 = getelementptr inbounds float* %tmp24679, i64 1
- %tmp24681 = getelementptr inbounds float* %tmp24680, i64 1
- %tmp24682 = getelementptr inbounds float* %tmp24681, i64 1
- %tmp24683 = getelementptr inbounds float* %tmp24682, i64 1
- %tmp24684 = getelementptr inbounds float* %tmp24683, i64 1
- %tmp24685 = getelementptr inbounds float* %tmp24684, i64 1
- %tmp24686 = getelementptr inbounds float* %tmp24685, i64 1
- %tmp24687 = getelementptr inbounds float* %tmp24686, i64 1
- %tmp24688 = getelementptr inbounds float* %tmp24687, i64 1
- %tmp24689 = getelementptr inbounds float* %tmp24688, i64 1
- %tmp24690 = getelementptr inbounds float* %tmp24689, i64 1
- %tmp24691 = getelementptr inbounds float* %tmp24690, i64 1
- %tmp24692 = getelementptr inbounds float* %tmp24691, i64 1
- %tmp24693 = getelementptr inbounds float* %tmp24692, i64 1
- %tmp24694 = getelementptr inbounds float* %tmp24693, i64 1
- %tmp24695 = getelementptr inbounds float* %tmp24694, i64 1
- %tmp24696 = getelementptr inbounds float* %tmp24695, i64 1
- %tmp24697 = getelementptr inbounds float* %tmp24696, i64 1
- %tmp24698 = getelementptr inbounds float* %tmp24697, i64 1
- %tmp24699 = getelementptr inbounds float* %tmp24698, i64 1
- %tmp24700 = getelementptr inbounds float* %tmp24699, i64 1
- %tmp24701 = getelementptr inbounds float* %tmp24700, i64 1
- %tmp24702 = getelementptr inbounds float* %tmp24701, i64 1
- %tmp24703 = getelementptr inbounds float* %tmp24702, i64 1
- %tmp24704 = getelementptr inbounds float* %tmp24703, i64 1
- %tmp24705 = getelementptr inbounds float* %tmp24704, i64 1
- %tmp24706 = getelementptr inbounds float* %tmp24705, i64 1
- %tmp24707 = getelementptr inbounds float* %tmp24706, i64 1
- %tmp24708 = getelementptr inbounds float* %tmp24707, i64 1
- %tmp24709 = getelementptr inbounds float* %tmp24708, i64 1
- %tmp24710 = getelementptr inbounds float* %tmp24709, i64 1
- %tmp24711 = getelementptr inbounds float* %tmp24710, i64 1
- %tmp24712 = getelementptr inbounds float* %tmp24711, i64 1
- %tmp24713 = getelementptr inbounds float* %tmp24712, i64 1
- %tmp24714 = getelementptr inbounds float* %tmp24713, i64 1
- %tmp24715 = getelementptr inbounds float* %tmp24714, i64 1
- %tmp24716 = getelementptr inbounds float* %tmp24715, i64 1
- %tmp24717 = getelementptr inbounds float* %tmp24716, i64 1
- %tmp24718 = getelementptr inbounds float* %tmp24717, i64 1
- %tmp24719 = getelementptr inbounds float* %tmp24718, i64 1
- %tmp24720 = getelementptr inbounds float* %tmp24719, i64 1
- %tmp24721 = getelementptr inbounds float* %tmp24720, i64 1
- %tmp24722 = getelementptr inbounds float* %tmp24721, i64 1
- %tmp24723 = getelementptr inbounds float* %tmp24722, i64 1
- %tmp24724 = getelementptr inbounds float* %tmp24723, i64 1
- %tmp24725 = getelementptr inbounds float* %tmp24724, i64 1
- %tmp24726 = getelementptr inbounds float* %tmp24725, i64 1
- %tmp24727 = getelementptr inbounds float* %tmp24726, i64 1
- %tmp24728 = getelementptr inbounds float* %tmp24727, i64 1
- %tmp24729 = getelementptr inbounds float* %tmp24728, i64 1
- %tmp24730 = getelementptr inbounds float* %tmp24729, i64 1
- %tmp24731 = getelementptr inbounds float* %tmp24730, i64 1
- %tmp24732 = getelementptr inbounds float* %tmp24731, i64 1
- %tmp24733 = getelementptr inbounds float* %tmp24732, i64 1
- %tmp24734 = getelementptr inbounds float* %tmp24733, i64 1
- %tmp24735 = getelementptr inbounds float* %tmp24734, i64 1
- %tmp24736 = getelementptr inbounds float* %tmp24735, i64 1
- %tmp24737 = getelementptr inbounds float* %tmp24736, i64 1
- %tmp24738 = getelementptr inbounds float* %tmp24737, i64 1
- %tmp24739 = getelementptr inbounds float* %tmp24738, i64 1
- %tmp24740 = getelementptr inbounds float* %tmp24739, i64 1
- %tmp24741 = getelementptr inbounds float* %tmp24740, i64 1
- %tmp24742 = getelementptr inbounds float* %tmp24741, i64 1
- %tmp24743 = getelementptr inbounds float* %tmp24742, i64 1
- %tmp24744 = getelementptr inbounds float* %tmp24743, i64 1
- %tmp24745 = getelementptr inbounds float* %tmp24744, i64 1
- %tmp24746 = getelementptr inbounds float* %tmp24745, i64 1
- %tmp24747 = getelementptr inbounds float* %tmp24746, i64 1
- %tmp24748 = getelementptr inbounds float* %tmp24747, i64 1
- %tmp24749 = getelementptr inbounds float* %tmp24748, i64 1
- %tmp24750 = getelementptr inbounds float* %tmp24749, i64 1
- %tmp24751 = getelementptr inbounds float* %tmp24750, i64 1
- %tmp24752 = getelementptr inbounds float* %tmp24751, i64 1
- %tmp24753 = getelementptr inbounds float* %tmp24752, i64 1
- %tmp24754 = getelementptr inbounds float* %tmp24753, i64 1
- %tmp24755 = getelementptr inbounds float* %tmp24754, i64 1
- %tmp24756 = getelementptr inbounds float* %tmp24755, i64 1
- %tmp24757 = getelementptr inbounds float* %tmp24756, i64 1
- %tmp24758 = getelementptr inbounds float* %tmp24757, i64 1
- %tmp24759 = getelementptr inbounds float* %tmp24758, i64 1
- %tmp24760 = getelementptr inbounds float* %tmp24759, i64 1
- %tmp24761 = getelementptr inbounds float* %tmp24760, i64 1
- %tmp24762 = getelementptr inbounds float* %tmp24761, i64 1
- %tmp24763 = getelementptr inbounds float* %tmp24762, i64 1
- %tmp24764 = getelementptr inbounds float* %tmp24763, i64 1
- %tmp24765 = getelementptr inbounds float* %tmp24764, i64 1
- %tmp24766 = getelementptr inbounds float* %tmp24765, i64 1
- %tmp24767 = getelementptr inbounds float* %tmp24766, i64 1
- %tmp24768 = getelementptr inbounds float* %tmp24767, i64 1
- %tmp24769 = getelementptr inbounds float* %tmp24768, i64 1
- %tmp24770 = getelementptr inbounds float* %tmp24769, i64 1
- %tmp24771 = getelementptr inbounds float* %tmp24770, i64 1
- %tmp24772 = getelementptr inbounds float* %tmp24771, i64 1
- %tmp24773 = getelementptr inbounds float* %tmp24772, i64 1
- %tmp24774 = getelementptr inbounds float* %tmp24773, i64 1
- %tmp24775 = getelementptr inbounds float* %tmp24774, i64 1
- %tmp24776 = getelementptr inbounds float* %tmp24775, i64 1
- %tmp24777 = getelementptr inbounds float* %tmp24776, i64 1
- %tmp24778 = getelementptr inbounds float* %tmp24777, i64 1
- %tmp24779 = getelementptr inbounds float* %tmp24778, i64 1
- %tmp24780 = getelementptr inbounds float* %tmp24779, i64 1
- %tmp24781 = getelementptr inbounds float* %tmp24780, i64 1
- %tmp24782 = getelementptr inbounds float* %tmp24781, i64 1
- %tmp24783 = getelementptr inbounds float* %tmp24782, i64 1
- %tmp24784 = getelementptr inbounds float* %tmp24783, i64 1
- %tmp24785 = getelementptr inbounds float* %tmp24784, i64 1
- %tmp24786 = getelementptr inbounds float* %tmp24785, i64 1
- %tmp24787 = getelementptr inbounds float* %tmp24786, i64 1
- %tmp24788 = getelementptr inbounds float* %tmp24787, i64 1
- %tmp24789 = getelementptr inbounds float* %tmp24788, i64 1
- %tmp24790 = getelementptr inbounds float* %tmp24789, i64 1
- %tmp24791 = getelementptr inbounds float* %tmp24790, i64 1
- %tmp24792 = getelementptr inbounds float* %tmp24791, i64 1
- %tmp24793 = getelementptr inbounds float* %tmp24792, i64 1
- %tmp24794 = getelementptr inbounds float* %tmp24793, i64 1
- %tmp24795 = getelementptr inbounds float* %tmp24794, i64 1
- %tmp24796 = getelementptr inbounds float* %tmp24795, i64 1
- %tmp24797 = getelementptr inbounds float* %tmp24796, i64 1
- %tmp24798 = getelementptr inbounds float* %tmp24797, i64 1
- %tmp24799 = getelementptr inbounds float* %tmp24798, i64 1
- %tmp24800 = getelementptr inbounds float* %tmp24799, i64 1
- %tmp24801 = getelementptr inbounds float* %tmp24800, i64 1
- %tmp24802 = getelementptr inbounds float* %tmp24801, i64 1
- %tmp24803 = getelementptr inbounds float* %tmp24802, i64 1
- %tmp24804 = getelementptr inbounds float* %tmp24803, i64 1
- %tmp24805 = getelementptr inbounds float* %tmp24804, i64 1
- %tmp24806 = getelementptr inbounds float* %tmp24805, i64 1
- %tmp24807 = getelementptr inbounds float* %tmp24806, i64 1
- %tmp24808 = getelementptr inbounds float* %tmp24807, i64 1
- %tmp24809 = getelementptr inbounds float* %tmp24808, i64 1
- %tmp24810 = getelementptr inbounds float* %tmp24809, i64 1
- %tmp24811 = getelementptr inbounds float* %tmp24810, i64 1
- %tmp24812 = getelementptr inbounds float* %tmp24811, i64 1
- %tmp24813 = getelementptr inbounds float* %tmp24812, i64 1
- %tmp24814 = getelementptr inbounds float* %tmp24813, i64 1
- %tmp24815 = getelementptr inbounds float* %tmp24814, i64 1
- %tmp24816 = getelementptr inbounds float* %tmp24815, i64 1
- %tmp24817 = getelementptr inbounds float* %tmp24816, i64 1
- %tmp24818 = getelementptr inbounds float* %tmp24817, i64 1
- %tmp24819 = getelementptr inbounds float* %tmp24818, i64 1
- %tmp24820 = getelementptr inbounds float* %tmp24819, i64 1
- %tmp24821 = getelementptr inbounds float* %tmp24820, i64 1
- %tmp24822 = getelementptr inbounds float* %tmp24821, i64 1
- %tmp24823 = getelementptr inbounds float* %tmp24822, i64 1
- %tmp24824 = getelementptr inbounds float* %tmp24823, i64 1
- %tmp24825 = getelementptr inbounds float* %tmp24824, i64 1
- %tmp24826 = getelementptr inbounds float* %tmp24825, i64 1
- %tmp24827 = getelementptr inbounds float* %tmp24826, i64 1
- %tmp24828 = getelementptr inbounds float* %tmp24827, i64 1
- %tmp24829 = getelementptr inbounds float* %tmp24828, i64 1
- %tmp24830 = getelementptr inbounds float* %tmp24829, i64 1
- %tmp24831 = getelementptr inbounds float* %tmp24830, i64 1
- %tmp24832 = getelementptr inbounds float* %tmp24831, i64 1
- %tmp24833 = getelementptr inbounds float* %tmp24832, i64 1
- %tmp24834 = getelementptr inbounds float* %tmp24833, i64 1
- %tmp24835 = getelementptr inbounds float* %tmp24834, i64 1
- %tmp24836 = getelementptr inbounds float* %tmp24835, i64 1
- %tmp24837 = getelementptr inbounds float* %tmp24836, i64 1
- %tmp24838 = getelementptr inbounds float* %tmp24837, i64 1
- %tmp24839 = getelementptr inbounds float* %tmp24838, i64 1
- %tmp24840 = getelementptr inbounds float* %tmp24839, i64 1
- %tmp24841 = getelementptr inbounds float* %tmp24840, i64 1
- %tmp24842 = getelementptr inbounds float* %tmp24841, i64 1
- %tmp24843 = getelementptr inbounds float* %tmp24842, i64 1
- %tmp24844 = getelementptr inbounds float* %tmp24843, i64 1
- %tmp24845 = getelementptr inbounds float* %tmp24844, i64 1
- %tmp24846 = getelementptr inbounds float* %tmp24845, i64 1
- %tmp24847 = getelementptr inbounds float* %tmp24846, i64 1
- %tmp24848 = getelementptr inbounds float* %tmp24847, i64 1
- %tmp24849 = getelementptr inbounds float* %tmp24848, i64 1
- %tmp24850 = getelementptr inbounds float* %tmp24849, i64 1
- %tmp24851 = getelementptr inbounds float* %tmp24850, i64 1
- %tmp24852 = getelementptr inbounds float* %tmp24851, i64 1
- %tmp24853 = getelementptr inbounds float* %tmp24852, i64 1
- %tmp24854 = getelementptr inbounds float* %tmp24853, i64 1
- %tmp24855 = getelementptr inbounds float* %tmp24854, i64 1
- %tmp24856 = getelementptr inbounds float* %tmp24855, i64 1
- %tmp24857 = getelementptr inbounds float* %tmp24856, i64 1
- %tmp24858 = getelementptr inbounds float* %tmp24857, i64 1
- %tmp24859 = getelementptr inbounds float* %tmp24858, i64 1
- %tmp24860 = getelementptr inbounds float* %tmp24859, i64 1
- %tmp24861 = getelementptr inbounds float* %tmp24860, i64 1
- %tmp24862 = getelementptr inbounds float* %tmp24861, i64 1
- %tmp24863 = getelementptr inbounds float* %tmp24862, i64 1
- %tmp24864 = getelementptr inbounds float* %tmp24863, i64 1
- %tmp24865 = getelementptr inbounds float* %tmp24864, i64 1
- %tmp24866 = getelementptr inbounds float* %tmp24865, i64 1
- %tmp24867 = getelementptr inbounds float* %tmp24866, i64 1
- %tmp24868 = getelementptr inbounds float* %tmp24867, i64 1
- %tmp24869 = getelementptr inbounds float* %tmp24868, i64 1
- %tmp24870 = getelementptr inbounds float* %tmp24869, i64 1
- %tmp24871 = getelementptr inbounds float* %tmp24870, i64 1
- %tmp24872 = getelementptr inbounds float* %tmp24871, i64 1
- %tmp24873 = getelementptr inbounds float* %tmp24872, i64 1
- %tmp24874 = getelementptr inbounds float* %tmp24873, i64 1
- %tmp24875 = getelementptr inbounds float* %tmp24874, i64 1
- %tmp24876 = getelementptr inbounds float* %tmp24875, i64 1
- %tmp24877 = getelementptr inbounds float* %tmp24876, i64 1
- %tmp24878 = getelementptr inbounds float* %tmp24877, i64 1
- %tmp24879 = getelementptr inbounds float* %tmp24878, i64 1
- %tmp24880 = getelementptr inbounds float* %tmp24879, i64 1
- %tmp24881 = getelementptr inbounds float* %tmp24880, i64 1
- %tmp24882 = getelementptr inbounds float* %tmp24881, i64 1
- %tmp24883 = getelementptr inbounds float* %tmp24882, i64 1
- %tmp24884 = getelementptr inbounds float* %tmp24883, i64 1
- %tmp24885 = getelementptr inbounds float* %tmp24884, i64 1
- %tmp24886 = getelementptr inbounds float* %tmp24885, i64 1
- %tmp24887 = getelementptr inbounds float* %tmp24886, i64 1
- %tmp24888 = getelementptr inbounds float* %tmp24887, i64 1
- %tmp24889 = getelementptr inbounds float* %tmp24888, i64 1
- %tmp24890 = getelementptr inbounds float* %tmp24889, i64 1
- %tmp24891 = getelementptr inbounds float* %tmp24890, i64 1
- %tmp24892 = getelementptr inbounds float* %tmp24891, i64 1
- %tmp24893 = getelementptr inbounds float* %tmp24892, i64 1
- %tmp24894 = getelementptr inbounds float* %tmp24893, i64 1
- %tmp24895 = getelementptr inbounds float* %tmp24894, i64 1
- %tmp24896 = getelementptr inbounds float* %tmp24895, i64 1
- %tmp24897 = getelementptr inbounds float* %tmp24896, i64 1
- %tmp24898 = getelementptr inbounds float* %tmp24897, i64 1
- %tmp24899 = getelementptr inbounds float* %tmp24898, i64 1
- %tmp24900 = getelementptr inbounds float* %tmp24899, i64 1
- %tmp24901 = getelementptr inbounds float* %tmp24900, i64 1
- %tmp24902 = getelementptr inbounds float* %tmp24901, i64 1
- %tmp24903 = getelementptr inbounds float* %tmp24902, i64 1
- %tmp24904 = getelementptr inbounds float* %tmp24903, i64 1
- %tmp24905 = getelementptr inbounds float* %tmp24904, i64 1
- %tmp24906 = getelementptr inbounds float* %tmp24905, i64 1
- %tmp24907 = getelementptr inbounds float* %tmp24906, i64 1
- %tmp24908 = getelementptr inbounds float* %tmp24907, i64 1
- %tmp24909 = getelementptr inbounds float* %tmp24908, i64 1
- %tmp24910 = getelementptr inbounds float* %tmp24909, i64 1
- %tmp24911 = getelementptr inbounds float* %tmp24910, i64 1
- %tmp24912 = getelementptr inbounds float* %tmp24911, i64 1
- %tmp24913 = getelementptr inbounds float* %tmp24912, i64 1
- %tmp24914 = getelementptr inbounds float* %tmp24913, i64 1
- %tmp24915 = getelementptr inbounds float* %tmp24914, i64 1
- %tmp24916 = getelementptr inbounds float* %tmp24915, i64 1
- %tmp24917 = getelementptr inbounds float* %tmp24916, i64 1
- %tmp24918 = getelementptr inbounds float* %tmp24917, i64 1
- %tmp24919 = getelementptr inbounds float* %tmp24918, i64 1
- %tmp24920 = getelementptr inbounds float* %tmp24919, i64 1
- %tmp24921 = getelementptr inbounds float* %tmp24920, i64 1
- %tmp24922 = getelementptr inbounds float* %tmp24921, i64 1
- %tmp24923 = getelementptr inbounds float* %tmp24922, i64 1
- %tmp24924 = getelementptr inbounds float* %tmp24923, i64 1
- %tmp24925 = getelementptr inbounds float* %tmp24924, i64 1
- %tmp24926 = getelementptr inbounds float* %tmp24925, i64 1
- %tmp24927 = getelementptr inbounds float* %tmp24926, i64 1
- %tmp24928 = getelementptr inbounds float* %tmp24927, i64 1
- %tmp24929 = getelementptr inbounds float* %tmp24928, i64 1
- %tmp24930 = getelementptr inbounds float* %tmp24929, i64 1
- %tmp24931 = getelementptr inbounds float* %tmp24930, i64 1
- %tmp24932 = getelementptr inbounds float* %tmp24931, i64 1
- %tmp24933 = getelementptr inbounds float* %tmp24932, i64 1
- %tmp24934 = getelementptr inbounds float* %tmp24933, i64 1
- %tmp24935 = getelementptr inbounds float* %tmp24934, i64 1
- %tmp24936 = getelementptr inbounds float* %tmp24935, i64 1
- %tmp24937 = getelementptr inbounds float* %tmp24936, i64 1
- %tmp24938 = getelementptr inbounds float* %tmp24937, i64 1
- %tmp24939 = getelementptr inbounds float* %tmp24938, i64 1
- %tmp24940 = getelementptr inbounds float* %tmp24939, i64 1
- %tmp24941 = getelementptr inbounds float* %tmp24940, i64 1
- %tmp24942 = getelementptr inbounds float* %tmp24941, i64 1
- %tmp24943 = getelementptr inbounds float* %tmp24942, i64 1
- %tmp24944 = getelementptr inbounds float* %tmp24943, i64 1
- %tmp24945 = getelementptr inbounds float* %tmp24944, i64 1
- %tmp24946 = getelementptr inbounds float* %tmp24945, i64 1
+ %tmp = getelementptr inbounds float, float* null, i64 1
+ %tmp3 = getelementptr inbounds float, float* %tmp, i64 1
+ %tmp4 = getelementptr inbounds float, float* %tmp3, i64 1
+ %tmp5 = getelementptr inbounds float, float* %tmp4, i64 1
+ %tmp6 = getelementptr inbounds float, float* %tmp5, i64 1
+ %tmp7 = getelementptr inbounds float, float* %tmp6, i64 1
+ %tmp8 = getelementptr inbounds float, float* %tmp7, i64 1
+ %tmp9 = getelementptr inbounds float, float* %tmp8, i64 1
+ %tmp10 = getelementptr inbounds float, float* %tmp9, i64 1
+ %tmp11 = getelementptr inbounds float, float* %tmp10, i64 1
+ %tmp12 = getelementptr inbounds float, float* %tmp11, i64 1
+ %tmp13 = getelementptr inbounds float, float* %tmp12, i64 1
+ %tmp14 = getelementptr inbounds float, float* %tmp13, i64 1
+ %tmp15 = getelementptr inbounds float, float* %tmp14, i64 1
+ %tmp16 = getelementptr inbounds float, float* %tmp15, i64 1
+ %tmp17 = getelementptr inbounds float, float* %tmp16, i64 1
+ %tmp18 = getelementptr inbounds float, float* %tmp17, i64 1
+ %tmp19 = getelementptr inbounds float, float* %tmp18, i64 1
+ %tmp20 = getelementptr inbounds float, float* %tmp19, i64 1
+ %tmp21 = getelementptr inbounds float, float* %tmp20, i64 1
+ %tmp22 = getelementptr inbounds float, float* %tmp21, i64 1
+ %tmp23 = getelementptr inbounds float, float* %tmp22, i64 1
+ %tmp24 = getelementptr inbounds float, float* %tmp23, i64 1
+ %tmp25 = getelementptr inbounds float, float* %tmp24, i64 1
+ %tmp26 = getelementptr inbounds float, float* %tmp25, i64 1
+ %tmp27 = getelementptr inbounds float, float* %tmp26, i64 1
+ %tmp28 = getelementptr inbounds float, float* %tmp27, i64 1
+ %tmp29 = getelementptr inbounds float, float* %tmp28, i64 1
+ %tmp30 = getelementptr inbounds float, float* %tmp29, i64 1
+ %tmp31 = getelementptr inbounds float, float* %tmp30, i64 1
+ %tmp32 = getelementptr inbounds float, float* %tmp31, i64 1
+ %tmp33 = getelementptr inbounds float, float* %tmp32, i64 1
+ %tmp34 = getelementptr inbounds float, float* %tmp33, i64 1
+ %tmp35 = getelementptr inbounds float, float* %tmp34, i64 1
+ %tmp36 = getelementptr inbounds float, float* %tmp35, i64 1
+ %tmp37 = getelementptr inbounds float, float* %tmp36, i64 1
+ %tmp38 = getelementptr inbounds float, float* %tmp37, i64 1
+ %tmp39 = getelementptr inbounds float, float* %tmp38, i64 1
+ %tmp40 = getelementptr inbounds float, float* %tmp39, i64 1
+ %tmp41 = getelementptr inbounds float, float* %tmp40, i64 1
+ %tmp42 = getelementptr inbounds float, float* %tmp41, i64 1
+ %tmp43 = getelementptr inbounds float, float* %tmp42, i64 1
+ %tmp44 = getelementptr inbounds float, float* %tmp43, i64 1
+ %tmp45 = getelementptr inbounds float, float* %tmp44, i64 1
+ %tmp46 = getelementptr inbounds float, float* %tmp45, i64 1
+ %tmp47 = getelementptr inbounds float, float* %tmp46, i64 1
+ %tmp48 = getelementptr inbounds float, float* %tmp47, i64 1
+ %tmp49 = getelementptr inbounds float, float* %tmp48, i64 1
+ %tmp50 = getelementptr inbounds float, float* %tmp49, i64 1
+ %tmp51 = getelementptr inbounds float, float* %tmp50, i64 1
+ %tmp52 = getelementptr inbounds float, float* %tmp51, i64 1
+ %tmp53 = getelementptr inbounds float, float* %tmp52, i64 1
+ %tmp54 = getelementptr inbounds float, float* %tmp53, i64 1
+ %tmp55 = getelementptr inbounds float, float* %tmp54, i64 1
+ %tmp56 = getelementptr inbounds float, float* %tmp55, i64 1
+ %tmp57 = getelementptr inbounds float, float* %tmp56, i64 1
+ %tmp58 = getelementptr inbounds float, float* %tmp57, i64 1
+ %tmp59 = getelementptr inbounds float, float* %tmp58, i64 1
+ %tmp60 = getelementptr inbounds float, float* %tmp59, i64 1
+ %tmp61 = getelementptr inbounds float, float* %tmp60, i64 1
+ %tmp62 = getelementptr inbounds float, float* %tmp61, i64 1
+ %tmp63 = getelementptr inbounds float, float* %tmp62, i64 1
+ %tmp64 = getelementptr inbounds float, float* %tmp63, i64 1
+ %tmp65 = getelementptr inbounds float, float* %tmp64, i64 1
+ %tmp66 = getelementptr inbounds float, float* %tmp65, i64 1
+ %tmp67 = getelementptr inbounds float, float* %tmp66, i64 1
+ %tmp68 = getelementptr inbounds float, float* %tmp67, i64 1
+ %tmp69 = getelementptr inbounds float, float* %tmp68, i64 1
+ %tmp70 = getelementptr inbounds float, float* %tmp69, i64 1
+ %tmp71 = getelementptr inbounds float, float* %tmp70, i64 1
+ %tmp72 = getelementptr inbounds float, float* %tmp71, i64 1
+ %tmp73 = getelementptr inbounds float, float* %tmp72, i64 1
+ %tmp74 = getelementptr inbounds float, float* %tmp73, i64 1
+ %tmp75 = getelementptr inbounds float, float* %tmp74, i64 1
+ %tmp76 = getelementptr inbounds float, float* %tmp75, i64 1
+ %tmp77 = getelementptr inbounds float, float* %tmp76, i64 1
+ %tmp78 = getelementptr inbounds float, float* %tmp77, i64 1
+ %tmp79 = getelementptr inbounds float, float* %tmp78, i64 1
+ %tmp80 = getelementptr inbounds float, float* %tmp79, i64 1
+ %tmp81 = getelementptr inbounds float, float* %tmp80, i64 1
+ %tmp82 = getelementptr inbounds float, float* %tmp81, i64 1
+ %tmp83 = getelementptr inbounds float, float* %tmp82, i64 1
+ %tmp84 = getelementptr inbounds float, float* %tmp83, i64 1
+ %tmp85 = getelementptr inbounds float, float* %tmp84, i64 1
+ %tmp86 = getelementptr inbounds float, float* %tmp85, i64 1
+ %tmp87 = getelementptr inbounds float, float* %tmp86, i64 1
+ %tmp88 = getelementptr inbounds float, float* %tmp87, i64 1
+ %tmp89 = getelementptr inbounds float, float* %tmp88, i64 1
+ %tmp90 = getelementptr inbounds float, float* %tmp89, i64 1
+ %tmp91 = getelementptr inbounds float, float* %tmp90, i64 1
+ %tmp92 = getelementptr inbounds float, float* %tmp91, i64 1
+ %tmp93 = getelementptr inbounds float, float* %tmp92, i64 1
+ %tmp94 = getelementptr inbounds float, float* %tmp93, i64 1
+ %tmp95 = getelementptr inbounds float, float* %tmp94, i64 1
+ %tmp96 = getelementptr inbounds float, float* %tmp95, i64 1
+ %tmp97 = getelementptr inbounds float, float* %tmp96, i64 1
+ %tmp98 = getelementptr inbounds float, float* %tmp97, i64 1
+ %tmp99 = getelementptr inbounds float, float* %tmp98, i64 1
+ %tmp100 = getelementptr inbounds float, float* %tmp99, i64 1
+ %tmp101 = getelementptr inbounds float, float* %tmp100, i64 1
+ %tmp102 = getelementptr inbounds float, float* %tmp101, i64 1
+ %tmp103 = getelementptr inbounds float, float* %tmp102, i64 1
+ %tmp104 = getelementptr inbounds float, float* %tmp103, i64 1
+ %tmp105 = getelementptr inbounds float, float* %tmp104, i64 1
+ %tmp106 = getelementptr inbounds float, float* %tmp105, i64 1
+ %tmp107 = getelementptr inbounds float, float* %tmp106, i64 1
+ %tmp108 = getelementptr inbounds float, float* %tmp107, i64 1
+ %tmp109 = getelementptr inbounds float, float* %tmp108, i64 1
+ %tmp110 = getelementptr inbounds float, float* %tmp109, i64 1
+ %tmp111 = getelementptr inbounds float, float* %tmp110, i64 1
+ %tmp112 = getelementptr inbounds float, float* %tmp111, i64 1
+ %tmp113 = getelementptr inbounds float, float* %tmp112, i64 1
+ %tmp114 = getelementptr inbounds float, float* %tmp113, i64 1
+ %tmp115 = getelementptr inbounds float, float* %tmp114, i64 1
+ %tmp116 = getelementptr inbounds float, float* %tmp115, i64 1
+ %tmp117 = getelementptr inbounds float, float* %tmp116, i64 1
+ %tmp118 = getelementptr inbounds float, float* %tmp117, i64 1
+ %tmp119 = getelementptr inbounds float, float* %tmp118, i64 1
+ %tmp120 = getelementptr inbounds float, float* %tmp119, i64 1
+ %tmp121 = getelementptr inbounds float, float* %tmp120, i64 1
+ %tmp122 = getelementptr inbounds float, float* %tmp121, i64 1
+ %tmp123 = getelementptr inbounds float, float* %tmp122, i64 1
+ %tmp124 = getelementptr inbounds float, float* %tmp123, i64 1
+ %tmp125 = getelementptr inbounds float, float* %tmp124, i64 1
+ %tmp126 = getelementptr inbounds float, float* %tmp125, i64 1
+ %tmp127 = getelementptr inbounds float, float* %tmp126, i64 1
+ %tmp128 = getelementptr inbounds float, float* %tmp127, i64 1
+ %tmp129 = getelementptr inbounds float, float* %tmp128, i64 1
+ %tmp130 = getelementptr inbounds float, float* %tmp129, i64 1
+ %tmp131 = getelementptr inbounds float, float* %tmp130, i64 1
+ %tmp132 = getelementptr inbounds float, float* %tmp131, i64 1
+ %tmp133 = getelementptr inbounds float, float* %tmp132, i64 1
+ %tmp134 = getelementptr inbounds float, float* %tmp133, i64 1
+ %tmp135 = getelementptr inbounds float, float* %tmp134, i64 1
+ %tmp136 = getelementptr inbounds float, float* %tmp135, i64 1
+ %tmp137 = getelementptr inbounds float, float* %tmp136, i64 1
+ %tmp138 = getelementptr inbounds float, float* %tmp137, i64 1
+ %tmp139 = getelementptr inbounds float, float* %tmp138, i64 1
+ %tmp140 = getelementptr inbounds float, float* %tmp139, i64 1
+ %tmp141 = getelementptr inbounds float, float* %tmp140, i64 1
+ %tmp142 = getelementptr inbounds float, float* %tmp141, i64 1
+ %tmp143 = getelementptr inbounds float, float* %tmp142, i64 1
+ %tmp144 = getelementptr inbounds float, float* %tmp143, i64 1
+ %tmp145 = getelementptr inbounds float, float* %tmp144, i64 1
+ %tmp146 = getelementptr inbounds float, float* %tmp145, i64 1
+ %tmp147 = getelementptr inbounds float, float* %tmp146, i64 1
+ %tmp148 = getelementptr inbounds float, float* %tmp147, i64 1
+ %tmp149 = getelementptr inbounds float, float* %tmp148, i64 1
+ %tmp150 = getelementptr inbounds float, float* %tmp149, i64 1
+ %tmp151 = getelementptr inbounds float, float* %tmp150, i64 1
+ %tmp152 = getelementptr inbounds float, float* %tmp151, i64 1
+ %tmp153 = getelementptr inbounds float, float* %tmp152, i64 1
+ %tmp154 = getelementptr inbounds float, float* %tmp153, i64 1
+ %tmp155 = getelementptr inbounds float, float* %tmp154, i64 1
+ %tmp156 = getelementptr inbounds float, float* %tmp155, i64 1
+ %tmp157 = getelementptr inbounds float, float* %tmp156, i64 1
+ %tmp158 = getelementptr inbounds float, float* %tmp157, i64 1
+ %tmp159 = getelementptr inbounds float, float* %tmp158, i64 1
+ %tmp160 = getelementptr inbounds float, float* %tmp159, i64 1
+ %tmp161 = getelementptr inbounds float, float* %tmp160, i64 1
+ %tmp162 = getelementptr inbounds float, float* %tmp161, i64 1
+ %tmp163 = getelementptr inbounds float, float* %tmp162, i64 1
+ %tmp164 = getelementptr inbounds float, float* %tmp163, i64 1
+ %tmp165 = getelementptr inbounds float, float* %tmp164, i64 1
+ %tmp166 = getelementptr inbounds float, float* %tmp165, i64 1
+ %tmp167 = getelementptr inbounds float, float* %tmp166, i64 1
+ %tmp168 = getelementptr inbounds float, float* %tmp167, i64 1
+ %tmp169 = getelementptr inbounds float, float* %tmp168, i64 1
+ %tmp170 = getelementptr inbounds float, float* %tmp169, i64 1
+ %tmp171 = getelementptr inbounds float, float* %tmp170, i64 1
+ %tmp172 = getelementptr inbounds float, float* %tmp171, i64 1
+ %tmp173 = getelementptr inbounds float, float* %tmp172, i64 1
+ %tmp174 = getelementptr inbounds float, float* %tmp173, i64 1
+ %tmp175 = getelementptr inbounds float, float* %tmp174, i64 1
+ %tmp176 = getelementptr inbounds float, float* %tmp175, i64 1
+ %tmp177 = getelementptr inbounds float, float* %tmp176, i64 1
+ %tmp178 = getelementptr inbounds float, float* %tmp177, i64 1
+ %tmp179 = getelementptr inbounds float, float* %tmp178, i64 1
+ %tmp180 = getelementptr inbounds float, float* %tmp179, i64 1
+ %tmp181 = getelementptr inbounds float, float* %tmp180, i64 1
+ %tmp182 = getelementptr inbounds float, float* %tmp181, i64 1
+ %tmp183 = getelementptr inbounds float, float* %tmp182, i64 1
+ %tmp184 = getelementptr inbounds float, float* %tmp183, i64 1
+ %tmp185 = getelementptr inbounds float, float* %tmp184, i64 1
+ %tmp186 = getelementptr inbounds float, float* %tmp185, i64 1
+ %tmp187 = getelementptr inbounds float, float* %tmp186, i64 1
+ %tmp188 = getelementptr inbounds float, float* %tmp187, i64 1
+ %tmp189 = getelementptr inbounds float, float* %tmp188, i64 1
+ %tmp190 = getelementptr inbounds float, float* %tmp189, i64 1
+ %tmp191 = getelementptr inbounds float, float* %tmp190, i64 1
+ %tmp192 = getelementptr inbounds float, float* %tmp191, i64 1
+ %tmp193 = getelementptr inbounds float, float* %tmp192, i64 1
+ %tmp194 = getelementptr inbounds float, float* %tmp193, i64 1
+ %tmp195 = getelementptr inbounds float, float* %tmp194, i64 1
+ %tmp196 = getelementptr inbounds float, float* %tmp195, i64 1
+ %tmp197 = getelementptr inbounds float, float* %tmp196, i64 1
+ %tmp198 = getelementptr inbounds float, float* %tmp197, i64 1
+ %tmp199 = getelementptr inbounds float, float* %tmp198, i64 1
+ %tmp200 = getelementptr inbounds float, float* %tmp199, i64 1
+ %tmp201 = getelementptr inbounds float, float* %tmp200, i64 1
+ %tmp202 = getelementptr inbounds float, float* %tmp201, i64 1
+ %tmp203 = getelementptr inbounds float, float* %tmp202, i64 1
+ %tmp204 = getelementptr inbounds float, float* %tmp203, i64 1
+ %tmp205 = getelementptr inbounds float, float* %tmp204, i64 1
+ %tmp206 = getelementptr inbounds float, float* %tmp205, i64 1
+ %tmp207 = getelementptr inbounds float, float* %tmp206, i64 1
+ %tmp208 = getelementptr inbounds float, float* %tmp207, i64 1
+ %tmp209 = getelementptr inbounds float, float* %tmp208, i64 1
+ %tmp210 = getelementptr inbounds float, float* %tmp209, i64 1
+ %tmp211 = getelementptr inbounds float, float* %tmp210, i64 1
+ %tmp212 = getelementptr inbounds float, float* %tmp211, i64 1
+ %tmp213 = getelementptr inbounds float, float* %tmp212, i64 1
+ %tmp214 = getelementptr inbounds float, float* %tmp213, i64 1
+ %tmp215 = getelementptr inbounds float, float* %tmp214, i64 1
+ %tmp216 = getelementptr inbounds float, float* %tmp215, i64 1
+ %tmp217 = getelementptr inbounds float, float* %tmp216, i64 1
+ %tmp218 = getelementptr inbounds float, float* %tmp217, i64 1
+ %tmp219 = getelementptr inbounds float, float* %tmp218, i64 1
+ %tmp220 = getelementptr inbounds float, float* %tmp219, i64 1
+ %tmp221 = getelementptr inbounds float, float* %tmp220, i64 1
+ %tmp222 = getelementptr inbounds float, float* %tmp221, i64 1
+ %tmp223 = getelementptr inbounds float, float* %tmp222, i64 1
+ %tmp224 = getelementptr inbounds float, float* %tmp223, i64 1
+ %tmp225 = getelementptr inbounds float, float* %tmp224, i64 1
+ %tmp226 = getelementptr inbounds float, float* %tmp225, i64 1
+ %tmp227 = getelementptr inbounds float, float* %tmp226, i64 1
+ %tmp228 = getelementptr inbounds float, float* %tmp227, i64 1
+ %tmp229 = getelementptr inbounds float, float* %tmp228, i64 1
+ %tmp230 = getelementptr inbounds float, float* %tmp229, i64 1
+ %tmp231 = getelementptr inbounds float, float* %tmp230, i64 1
+ %tmp232 = getelementptr inbounds float, float* %tmp231, i64 1
+ %tmp233 = getelementptr inbounds float, float* %tmp232, i64 1
+ %tmp234 = getelementptr inbounds float, float* %tmp233, i64 1
+ %tmp235 = getelementptr inbounds float, float* %tmp234, i64 1
+ %tmp236 = getelementptr inbounds float, float* %tmp235, i64 1
+ %tmp237 = getelementptr inbounds float, float* %tmp236, i64 1
+ %tmp238 = getelementptr inbounds float, float* %tmp237, i64 1
+ %tmp239 = getelementptr inbounds float, float* %tmp238, i64 1
+ %tmp240 = getelementptr inbounds float, float* %tmp239, i64 1
+ %tmp241 = getelementptr inbounds float, float* %tmp240, i64 1
+ %tmp242 = getelementptr inbounds float, float* %tmp241, i64 1
+ %tmp243 = getelementptr inbounds float, float* %tmp242, i64 1
+ %tmp244 = getelementptr inbounds float, float* %tmp243, i64 1
+ %tmp245 = getelementptr inbounds float, float* %tmp244, i64 1
+ %tmp246 = getelementptr inbounds float, float* %tmp245, i64 1
+ %tmp247 = getelementptr inbounds float, float* %tmp246, i64 1
+ %tmp248 = getelementptr inbounds float, float* %tmp247, i64 1
+ %tmp249 = getelementptr inbounds float, float* %tmp248, i64 1
+ %tmp250 = getelementptr inbounds float, float* %tmp249, i64 1
+ %tmp251 = getelementptr inbounds float, float* %tmp250, i64 1
+ %tmp252 = getelementptr inbounds float, float* %tmp251, i64 1
+ %tmp253 = getelementptr inbounds float, float* %tmp252, i64 1
+ %tmp254 = getelementptr inbounds float, float* %tmp253, i64 1
+ %tmp255 = getelementptr inbounds float, float* %tmp254, i64 1
+ %tmp256 = getelementptr inbounds float, float* %tmp255, i64 1
+ %tmp257 = getelementptr inbounds float, float* %tmp256, i64 1
+ %tmp258 = getelementptr inbounds float, float* %tmp257, i64 1
+ %tmp259 = getelementptr inbounds float, float* %tmp258, i64 1
+ %tmp260 = getelementptr inbounds float, float* %tmp259, i64 1
+ %tmp261 = getelementptr inbounds float, float* %tmp260, i64 1
+ %tmp262 = getelementptr inbounds float, float* %tmp261, i64 1
+ %tmp263 = getelementptr inbounds float, float* %tmp262, i64 1
+ %tmp264 = getelementptr inbounds float, float* %tmp263, i64 1
+ %tmp265 = getelementptr inbounds float, float* %tmp264, i64 1
+ %tmp266 = getelementptr inbounds float, float* %tmp265, i64 1
+ %tmp267 = getelementptr inbounds float, float* %tmp266, i64 1
+ %tmp268 = getelementptr inbounds float, float* %tmp267, i64 1
+ %tmp269 = getelementptr inbounds float, float* %tmp268, i64 1
+ %tmp270 = getelementptr inbounds float, float* %tmp269, i64 1
+ %tmp271 = getelementptr inbounds float, float* %tmp270, i64 1
+ %tmp272 = getelementptr inbounds float, float* %tmp271, i64 1
+ %tmp273 = getelementptr inbounds float, float* %tmp272, i64 1
+ %tmp274 = getelementptr inbounds float, float* %tmp273, i64 1
+ %tmp275 = getelementptr inbounds float, float* %tmp274, i64 1
+ %tmp276 = getelementptr inbounds float, float* %tmp275, i64 1
+ %tmp277 = getelementptr inbounds float, float* %tmp276, i64 1
+ %tmp278 = getelementptr inbounds float, float* %tmp277, i64 1
+ %tmp279 = getelementptr inbounds float, float* %tmp278, i64 1
+ %tmp280 = getelementptr inbounds float, float* %tmp279, i64 1
+ %tmp281 = getelementptr inbounds float, float* %tmp280, i64 1
+ %tmp282 = getelementptr inbounds float, float* %tmp281, i64 1
+ %tmp283 = getelementptr inbounds float, float* %tmp282, i64 1
+ %tmp284 = getelementptr inbounds float, float* %tmp283, i64 1
+ %tmp285 = getelementptr inbounds float, float* %tmp284, i64 1
+ %tmp286 = getelementptr inbounds float, float* %tmp285, i64 1
+ %tmp287 = getelementptr inbounds float, float* %tmp286, i64 1
+ %tmp288 = getelementptr inbounds float, float* %tmp287, i64 1
+ %tmp289 = getelementptr inbounds float, float* %tmp288, i64 1
+ %tmp290 = getelementptr inbounds float, float* %tmp289, i64 1
+ %tmp291 = getelementptr inbounds float, float* %tmp290, i64 1
+ %tmp292 = getelementptr inbounds float, float* %tmp291, i64 1
+ %tmp293 = getelementptr inbounds float, float* %tmp292, i64 1
+ %tmp294 = getelementptr inbounds float, float* %tmp293, i64 1
+ %tmp295 = getelementptr inbounds float, float* %tmp294, i64 1
+ %tmp296 = getelementptr inbounds float, float* %tmp295, i64 1
+ %tmp297 = getelementptr inbounds float, float* %tmp296, i64 1
+ %tmp298 = getelementptr inbounds float, float* %tmp297, i64 1
+ %tmp299 = getelementptr inbounds float, float* %tmp298, i64 1
+ %tmp300 = getelementptr inbounds float, float* %tmp299, i64 1
+ %tmp301 = getelementptr inbounds float, float* %tmp300, i64 1
+ %tmp302 = getelementptr inbounds float, float* %tmp301, i64 1
+ %tmp303 = getelementptr inbounds float, float* %tmp302, i64 1
+ %tmp304 = getelementptr inbounds float, float* %tmp303, i64 1
+ %tmp305 = getelementptr inbounds float, float* %tmp304, i64 1
+ %tmp306 = getelementptr inbounds float, float* %tmp305, i64 1
+ %tmp307 = getelementptr inbounds float, float* %tmp306, i64 1
+ %tmp308 = getelementptr inbounds float, float* %tmp307, i64 1
+ %tmp309 = getelementptr inbounds float, float* %tmp308, i64 1
+ %tmp310 = getelementptr inbounds float, float* %tmp309, i64 1
+ %tmp311 = getelementptr inbounds float, float* %tmp310, i64 1
+ %tmp312 = getelementptr inbounds float, float* %tmp311, i64 1
+ %tmp313 = getelementptr inbounds float, float* %tmp312, i64 1
+ %tmp314 = getelementptr inbounds float, float* %tmp313, i64 1
+ %tmp315 = getelementptr inbounds float, float* %tmp314, i64 1
+ %tmp316 = getelementptr inbounds float, float* %tmp315, i64 1
+ %tmp317 = getelementptr inbounds float, float* %tmp316, i64 1
+ %tmp318 = getelementptr inbounds float, float* %tmp317, i64 1
+ %tmp319 = getelementptr inbounds float, float* %tmp318, i64 1
+ %tmp320 = getelementptr inbounds float, float* %tmp319, i64 1
+ %tmp321 = getelementptr inbounds float, float* %tmp320, i64 1
+ %tmp322 = getelementptr inbounds float, float* %tmp321, i64 1
+ %tmp323 = getelementptr inbounds float, float* %tmp322, i64 1
+ %tmp324 = getelementptr inbounds float, float* %tmp323, i64 1
+ %tmp325 = getelementptr inbounds float, float* %tmp324, i64 1
+ %tmp326 = getelementptr inbounds float, float* %tmp325, i64 1
+ %tmp327 = getelementptr inbounds float, float* %tmp326, i64 1
+ %tmp328 = getelementptr inbounds float, float* %tmp327, i64 1
+ %tmp329 = getelementptr inbounds float, float* %tmp328, i64 1
+ %tmp330 = getelementptr inbounds float, float* %tmp329, i64 1
+ %tmp331 = getelementptr inbounds float, float* %tmp330, i64 1
+ %tmp332 = getelementptr inbounds float, float* %tmp331, i64 1
+ %tmp333 = getelementptr inbounds float, float* %tmp332, i64 1
+ %tmp334 = getelementptr inbounds float, float* %tmp333, i64 1
+ %tmp335 = getelementptr inbounds float, float* %tmp334, i64 1
+ %tmp336 = getelementptr inbounds float, float* %tmp335, i64 1
+ %tmp337 = getelementptr inbounds float, float* %tmp336, i64 1
+ %tmp338 = getelementptr inbounds float, float* %tmp337, i64 1
+ %tmp339 = getelementptr inbounds float, float* %tmp338, i64 1
+ %tmp340 = getelementptr inbounds float, float* %tmp339, i64 1
+ %tmp341 = getelementptr inbounds float, float* %tmp340, i64 1
+ %tmp342 = getelementptr inbounds float, float* %tmp341, i64 1
+ %tmp343 = getelementptr inbounds float, float* %tmp342, i64 1
+ %tmp344 = getelementptr inbounds float, float* %tmp343, i64 1
+ %tmp345 = getelementptr inbounds float, float* %tmp344, i64 1
+ %tmp346 = getelementptr inbounds float, float* %tmp345, i64 1
+ %tmp347 = getelementptr inbounds float, float* %tmp346, i64 1
+ %tmp348 = getelementptr inbounds float, float* %tmp347, i64 1
+ %tmp349 = getelementptr inbounds float, float* %tmp348, i64 1
+ %tmp350 = getelementptr inbounds float, float* %tmp349, i64 1
+ %tmp351 = getelementptr inbounds float, float* %tmp350, i64 1
+ %tmp352 = getelementptr inbounds float, float* %tmp351, i64 1
+ %tmp353 = getelementptr inbounds float, float* %tmp352, i64 1
+ %tmp354 = getelementptr inbounds float, float* %tmp353, i64 1
+ %tmp355 = getelementptr inbounds float, float* %tmp354, i64 1
+ %tmp356 = getelementptr inbounds float, float* %tmp355, i64 1
+ %tmp357 = getelementptr inbounds float, float* %tmp356, i64 1
+ %tmp358 = getelementptr inbounds float, float* %tmp357, i64 1
+ %tmp359 = getelementptr inbounds float, float* %tmp358, i64 1
+ %tmp360 = getelementptr inbounds float, float* %tmp359, i64 1
+ %tmp361 = getelementptr inbounds float, float* %tmp360, i64 1
+ %tmp362 = getelementptr inbounds float, float* %tmp361, i64 1
+ %tmp363 = getelementptr inbounds float, float* %tmp362, i64 1
+ %tmp364 = getelementptr inbounds float, float* %tmp363, i64 1
+ %tmp365 = getelementptr inbounds float, float* %tmp364, i64 1
+ %tmp366 = getelementptr inbounds float, float* %tmp365, i64 1
+ %tmp367 = getelementptr inbounds float, float* %tmp366, i64 1
+ %tmp368 = getelementptr inbounds float, float* %tmp367, i64 1
+ %tmp369 = getelementptr inbounds float, float* %tmp368, i64 1
+ %tmp370 = getelementptr inbounds float, float* %tmp369, i64 1
+ %tmp371 = getelementptr inbounds float, float* %tmp370, i64 1
+ %tmp372 = getelementptr inbounds float, float* %tmp371, i64 1
+ %tmp373 = getelementptr inbounds float, float* %tmp372, i64 1
+ %tmp374 = getelementptr inbounds float, float* %tmp373, i64 1
+ %tmp375 = getelementptr inbounds float, float* %tmp374, i64 1
+ %tmp376 = getelementptr inbounds float, float* %tmp375, i64 1
+ %tmp377 = getelementptr inbounds float, float* %tmp376, i64 1
+ %tmp378 = getelementptr inbounds float, float* %tmp377, i64 1
+ %tmp379 = getelementptr inbounds float, float* %tmp378, i64 1
+ %tmp380 = getelementptr inbounds float, float* %tmp379, i64 1
+ %tmp381 = getelementptr inbounds float, float* %tmp380, i64 1
+ %tmp382 = getelementptr inbounds float, float* %tmp381, i64 1
+ %tmp383 = getelementptr inbounds float, float* %tmp382, i64 1
+ %tmp384 = getelementptr inbounds float, float* %tmp383, i64 1
+ %tmp385 = getelementptr inbounds float, float* %tmp384, i64 1
+ %tmp386 = getelementptr inbounds float, float* %tmp385, i64 1
+ %tmp387 = getelementptr inbounds float, float* %tmp386, i64 1
+ %tmp388 = getelementptr inbounds float, float* %tmp387, i64 1
+ %tmp389 = getelementptr inbounds float, float* %tmp388, i64 1
+ %tmp390 = getelementptr inbounds float, float* %tmp389, i64 1
+ %tmp391 = getelementptr inbounds float, float* %tmp390, i64 1
+ %tmp392 = getelementptr inbounds float, float* %tmp391, i64 1
+ %tmp393 = getelementptr inbounds float, float* %tmp392, i64 1
+ %tmp394 = getelementptr inbounds float, float* %tmp393, i64 1
+ %tmp395 = getelementptr inbounds float, float* %tmp394, i64 1
+ %tmp396 = getelementptr inbounds float, float* %tmp395, i64 1
+ %tmp397 = getelementptr inbounds float, float* %tmp396, i64 1
+ %tmp398 = getelementptr inbounds float, float* %tmp397, i64 1
+ %tmp399 = getelementptr inbounds float, float* %tmp398, i64 1
+ %tmp400 = getelementptr inbounds float, float* %tmp399, i64 1
+ %tmp401 = getelementptr inbounds float, float* %tmp400, i64 1
+ %tmp402 = getelementptr inbounds float, float* %tmp401, i64 1
+ %tmp403 = getelementptr inbounds float, float* %tmp402, i64 1
+ %tmp404 = getelementptr inbounds float, float* %tmp403, i64 1
+ %tmp405 = getelementptr inbounds float, float* %tmp404, i64 1
+ %tmp406 = getelementptr inbounds float, float* %tmp405, i64 1
+ %tmp407 = getelementptr inbounds float, float* %tmp406, i64 1
+ %tmp408 = getelementptr inbounds float, float* %tmp407, i64 1
+ %tmp409 = getelementptr inbounds float, float* %tmp408, i64 1
+ %tmp410 = getelementptr inbounds float, float* %tmp409, i64 1
+ %tmp411 = getelementptr inbounds float, float* %tmp410, i64 1
+ %tmp412 = getelementptr inbounds float, float* %tmp411, i64 1
+ %tmp413 = getelementptr inbounds float, float* %tmp412, i64 1
+ %tmp414 = getelementptr inbounds float, float* %tmp413, i64 1
+ %tmp415 = getelementptr inbounds float, float* %tmp414, i64 1
+ %tmp416 = getelementptr inbounds float, float* %tmp415, i64 1
+ %tmp417 = getelementptr inbounds float, float* %tmp416, i64 1
+ %tmp418 = getelementptr inbounds float, float* %tmp417, i64 1
+ %tmp419 = getelementptr inbounds float, float* %tmp418, i64 1
+ %tmp420 = getelementptr inbounds float, float* %tmp419, i64 1
+ %tmp421 = getelementptr inbounds float, float* %tmp420, i64 1
+ %tmp422 = getelementptr inbounds float, float* %tmp421, i64 1
+ %tmp423 = getelementptr inbounds float, float* %tmp422, i64 1
+ %tmp424 = getelementptr inbounds float, float* %tmp423, i64 1
+ %tmp425 = getelementptr inbounds float, float* %tmp424, i64 1
+ %tmp426 = getelementptr inbounds float, float* %tmp425, i64 1
+ %tmp427 = getelementptr inbounds float, float* %tmp426, i64 1
+ %tmp428 = getelementptr inbounds float, float* %tmp427, i64 1
+ %tmp429 = getelementptr inbounds float, float* %tmp428, i64 1
+ %tmp430 = getelementptr inbounds float, float* %tmp429, i64 1
+ %tmp431 = getelementptr inbounds float, float* %tmp430, i64 1
+ %tmp432 = getelementptr inbounds float, float* %tmp431, i64 1
+ %tmp433 = getelementptr inbounds float, float* %tmp432, i64 1
+ %tmp434 = getelementptr inbounds float, float* %tmp433, i64 1
+ %tmp435 = getelementptr inbounds float, float* %tmp434, i64 1
+ %tmp436 = getelementptr inbounds float, float* %tmp435, i64 1
+ %tmp437 = getelementptr inbounds float, float* %tmp436, i64 1
+ %tmp438 = getelementptr inbounds float, float* %tmp437, i64 1
+ %tmp439 = getelementptr inbounds float, float* %tmp438, i64 1
+ %tmp440 = getelementptr inbounds float, float* %tmp439, i64 1
+ %tmp441 = getelementptr inbounds float, float* %tmp440, i64 1
+ %tmp442 = getelementptr inbounds float, float* %tmp441, i64 1
+ %tmp443 = getelementptr inbounds float, float* %tmp442, i64 1
+ %tmp444 = getelementptr inbounds float, float* %tmp443, i64 1
+ %tmp445 = getelementptr inbounds float, float* %tmp444, i64 1
+ %tmp446 = getelementptr inbounds float, float* %tmp445, i64 1
+ %tmp447 = getelementptr inbounds float, float* %tmp446, i64 1
+ %tmp448 = getelementptr inbounds float, float* %tmp447, i64 1
+ %tmp449 = getelementptr inbounds float, float* %tmp448, i64 1
+ %tmp450 = getelementptr inbounds float, float* %tmp449, i64 1
+ %tmp451 = getelementptr inbounds float, float* %tmp450, i64 1
+ %tmp452 = getelementptr inbounds float, float* %tmp451, i64 1
+ %tmp453 = getelementptr inbounds float, float* %tmp452, i64 1
+ %tmp454 = getelementptr inbounds float, float* %tmp453, i64 1
+ %tmp455 = getelementptr inbounds float, float* %tmp454, i64 1
+ %tmp456 = getelementptr inbounds float, float* %tmp455, i64 1
+ %tmp457 = getelementptr inbounds float, float* %tmp456, i64 1
+ %tmp458 = getelementptr inbounds float, float* %tmp457, i64 1
+ %tmp459 = getelementptr inbounds float, float* %tmp458, i64 1
+ %tmp460 = getelementptr inbounds float, float* %tmp459, i64 1
+ %tmp461 = getelementptr inbounds float, float* %tmp460, i64 1
+ %tmp462 = getelementptr inbounds float, float* %tmp461, i64 1
+ %tmp463 = getelementptr inbounds float, float* %tmp462, i64 1
+ %tmp464 = getelementptr inbounds float, float* %tmp463, i64 1
+ %tmp465 = getelementptr inbounds float, float* %tmp464, i64 1
+ %tmp466 = getelementptr inbounds float, float* %tmp465, i64 1
+ %tmp467 = getelementptr inbounds float, float* %tmp466, i64 1
+ %tmp468 = getelementptr inbounds float, float* %tmp467, i64 1
+ %tmp469 = getelementptr inbounds float, float* %tmp468, i64 1
+ %tmp470 = getelementptr inbounds float, float* %tmp469, i64 1
+ %tmp471 = getelementptr inbounds float, float* %tmp470, i64 1
+ %tmp472 = getelementptr inbounds float, float* %tmp471, i64 1
+ %tmp473 = getelementptr inbounds float, float* %tmp472, i64 1
+ %tmp474 = getelementptr inbounds float, float* %tmp473, i64 1
+ %tmp475 = getelementptr inbounds float, float* %tmp474, i64 1
+ %tmp476 = getelementptr inbounds float, float* %tmp475, i64 1
+ %tmp477 = getelementptr inbounds float, float* %tmp476, i64 1
+ %tmp478 = getelementptr inbounds float, float* %tmp477, i64 1
+ %tmp479 = getelementptr inbounds float, float* %tmp478, i64 1
+ %tmp480 = getelementptr inbounds float, float* %tmp479, i64 1
+ %tmp481 = getelementptr inbounds float, float* %tmp480, i64 1
+ %tmp482 = getelementptr inbounds float, float* %tmp481, i64 1
+ %tmp483 = getelementptr inbounds float, float* %tmp482, i64 1
+ %tmp484 = getelementptr inbounds float, float* %tmp483, i64 1
+ %tmp485 = getelementptr inbounds float, float* %tmp484, i64 1
+ %tmp486 = getelementptr inbounds float, float* %tmp485, i64 1
+ %tmp487 = getelementptr inbounds float, float* %tmp486, i64 1
+ %tmp488 = getelementptr inbounds float, float* %tmp487, i64 1
+ %tmp489 = getelementptr inbounds float, float* %tmp488, i64 1
+ %tmp490 = getelementptr inbounds float, float* %tmp489, i64 1
+ %tmp491 = getelementptr inbounds float, float* %tmp490, i64 1
+ %tmp492 = getelementptr inbounds float, float* %tmp491, i64 1
+ %tmp493 = getelementptr inbounds float, float* %tmp492, i64 1
+ %tmp494 = getelementptr inbounds float, float* %tmp493, i64 1
+ %tmp495 = getelementptr inbounds float, float* %tmp494, i64 1
+ %tmp496 = getelementptr inbounds float, float* %tmp495, i64 1
+ %tmp497 = getelementptr inbounds float, float* %tmp496, i64 1
+ %tmp498 = getelementptr inbounds float, float* %tmp497, i64 1
+ %tmp499 = getelementptr inbounds float, float* %tmp498, i64 1
+ %tmp500 = getelementptr inbounds float, float* %tmp499, i64 1
+ %tmp501 = getelementptr inbounds float, float* %tmp500, i64 1
+ %tmp502 = getelementptr inbounds float, float* %tmp501, i64 1
+ %tmp503 = getelementptr inbounds float, float* %tmp502, i64 1
+ %tmp504 = getelementptr inbounds float, float* %tmp503, i64 1
+ %tmp505 = getelementptr inbounds float, float* %tmp504, i64 1
+ %tmp506 = getelementptr inbounds float, float* %tmp505, i64 1
+ %tmp507 = getelementptr inbounds float, float* %tmp506, i64 1
+ %tmp508 = getelementptr inbounds float, float* %tmp507, i64 1
+ %tmp509 = getelementptr inbounds float, float* %tmp508, i64 1
+ %tmp510 = getelementptr inbounds float, float* %tmp509, i64 1
+ %tmp511 = getelementptr inbounds float, float* %tmp510, i64 1
+ %tmp512 = getelementptr inbounds float, float* %tmp511, i64 1
+ %tmp513 = getelementptr inbounds float, float* %tmp512, i64 1
+ %tmp514 = getelementptr inbounds float, float* %tmp513, i64 1
+ %tmp515 = getelementptr inbounds float, float* %tmp514, i64 1
+ %tmp516 = getelementptr inbounds float, float* %tmp515, i64 1
+ %tmp517 = getelementptr inbounds float, float* %tmp516, i64 1
+ %tmp518 = getelementptr inbounds float, float* %tmp517, i64 1
+ %tmp519 = getelementptr inbounds float, float* %tmp518, i64 1
+ %tmp520 = getelementptr inbounds float, float* %tmp519, i64 1
+ %tmp521 = getelementptr inbounds float, float* %tmp520, i64 1
+ %tmp522 = getelementptr inbounds float, float* %tmp521, i64 1
+ %tmp523 = getelementptr inbounds float, float* %tmp522, i64 1
+ %tmp524 = getelementptr inbounds float, float* %tmp523, i64 1
+ %tmp525 = getelementptr inbounds float, float* %tmp524, i64 1
+ %tmp526 = getelementptr inbounds float, float* %tmp525, i64 1
+ %tmp527 = getelementptr inbounds float, float* %tmp526, i64 1
+ %tmp528 = getelementptr inbounds float, float* %tmp527, i64 1
+ %tmp529 = getelementptr inbounds float, float* %tmp528, i64 1
+ %tmp530 = getelementptr inbounds float, float* %tmp529, i64 1
+ %tmp531 = getelementptr inbounds float, float* %tmp530, i64 1
+ %tmp532 = getelementptr inbounds float, float* %tmp531, i64 1
+ %tmp533 = getelementptr inbounds float, float* %tmp532, i64 1
+ %tmp534 = getelementptr inbounds float, float* %tmp533, i64 1
+ %tmp535 = getelementptr inbounds float, float* %tmp534, i64 1
+ %tmp536 = getelementptr inbounds float, float* %tmp535, i64 1
+ %tmp537 = getelementptr inbounds float, float* %tmp536, i64 1
+ %tmp538 = getelementptr inbounds float, float* %tmp537, i64 1
+ %tmp539 = getelementptr inbounds float, float* %tmp538, i64 1
+ %tmp540 = getelementptr inbounds float, float* %tmp539, i64 1
+ %tmp541 = getelementptr inbounds float, float* %tmp540, i64 1
+ %tmp542 = getelementptr inbounds float, float* %tmp541, i64 1
+ %tmp543 = getelementptr inbounds float, float* %tmp542, i64 1
+ %tmp544 = getelementptr inbounds float, float* %tmp543, i64 1
+ %tmp545 = getelementptr inbounds float, float* %tmp544, i64 1
+ %tmp546 = getelementptr inbounds float, float* %tmp545, i64 1
+ %tmp547 = getelementptr inbounds float, float* %tmp546, i64 1
+ %tmp548 = getelementptr inbounds float, float* %tmp547, i64 1
+ %tmp549 = getelementptr inbounds float, float* %tmp548, i64 1
+ %tmp550 = getelementptr inbounds float, float* %tmp549, i64 1
+ %tmp551 = getelementptr inbounds float, float* %tmp550, i64 1
+ %tmp552 = getelementptr inbounds float, float* %tmp551, i64 1
+ %tmp553 = getelementptr inbounds float, float* %tmp552, i64 1
+ %tmp554 = getelementptr inbounds float, float* %tmp553, i64 1
+ %tmp555 = getelementptr inbounds float, float* %tmp554, i64 1
+ %tmp556 = getelementptr inbounds float, float* %tmp555, i64 1
+ %tmp557 = getelementptr inbounds float, float* %tmp556, i64 1
+ %tmp558 = getelementptr inbounds float, float* %tmp557, i64 1
+ %tmp559 = getelementptr inbounds float, float* %tmp558, i64 1
+ %tmp560 = getelementptr inbounds float, float* %tmp559, i64 1
+ %tmp561 = getelementptr inbounds float, float* %tmp560, i64 1
+ %tmp562 = getelementptr inbounds float, float* %tmp561, i64 1
+ %tmp563 = getelementptr inbounds float, float* %tmp562, i64 1
+ %tmp564 = getelementptr inbounds float, float* %tmp563, i64 1
+ %tmp565 = getelementptr inbounds float, float* %tmp564, i64 1
+ %tmp566 = getelementptr inbounds float, float* %tmp565, i64 1
+ %tmp567 = getelementptr inbounds float, float* %tmp566, i64 1
+ %tmp568 = getelementptr inbounds float, float* %tmp567, i64 1
+ %tmp569 = getelementptr inbounds float, float* %tmp568, i64 1
+ %tmp570 = getelementptr inbounds float, float* %tmp569, i64 1
+ %tmp571 = getelementptr inbounds float, float* %tmp570, i64 1
+ %tmp572 = getelementptr inbounds float, float* %tmp571, i64 1
+ %tmp573 = getelementptr inbounds float, float* %tmp572, i64 1
+ %tmp574 = getelementptr inbounds float, float* %tmp573, i64 1
+ %tmp575 = getelementptr inbounds float, float* %tmp574, i64 1
+ %tmp576 = getelementptr inbounds float, float* %tmp575, i64 1
+ %tmp577 = getelementptr inbounds float, float* %tmp576, i64 1
+ %tmp578 = getelementptr inbounds float, float* %tmp577, i64 1
+ %tmp579 = getelementptr inbounds float, float* %tmp578, i64 1
+ %tmp580 = getelementptr inbounds float, float* %tmp579, i64 1
+ %tmp581 = getelementptr inbounds float, float* %tmp580, i64 1
+ %tmp582 = getelementptr inbounds float, float* %tmp581, i64 1
+ %tmp583 = getelementptr inbounds float, float* %tmp582, i64 1
+ %tmp584 = getelementptr inbounds float, float* %tmp583, i64 1
+ %tmp585 = getelementptr inbounds float, float* %tmp584, i64 1
+ %tmp586 = getelementptr inbounds float, float* %tmp585, i64 1
+ %tmp587 = getelementptr inbounds float, float* %tmp586, i64 1
+ %tmp588 = getelementptr inbounds float, float* %tmp587, i64 1
+ %tmp589 = getelementptr inbounds float, float* %tmp588, i64 1
+ %tmp590 = getelementptr inbounds float, float* %tmp589, i64 1
+ %tmp591 = getelementptr inbounds float, float* %tmp590, i64 1
+ %tmp592 = getelementptr inbounds float, float* %tmp591, i64 1
+ %tmp593 = getelementptr inbounds float, float* %tmp592, i64 1
+ %tmp594 = getelementptr inbounds float, float* %tmp593, i64 1
+ %tmp595 = getelementptr inbounds float, float* %tmp594, i64 1
+ %tmp596 = getelementptr inbounds float, float* %tmp595, i64 1
+ %tmp597 = getelementptr inbounds float, float* %tmp596, i64 1
+ %tmp598 = getelementptr inbounds float, float* %tmp597, i64 1
+ %tmp599 = getelementptr inbounds float, float* %tmp598, i64 1
+ %tmp600 = getelementptr inbounds float, float* %tmp599, i64 1
+ %tmp601 = getelementptr inbounds float, float* %tmp600, i64 1
+ %tmp602 = getelementptr inbounds float, float* %tmp601, i64 1
+ %tmp603 = getelementptr inbounds float, float* %tmp602, i64 1
+ %tmp604 = getelementptr inbounds float, float* %tmp603, i64 1
+ %tmp605 = getelementptr inbounds float, float* %tmp604, i64 1
+ %tmp606 = getelementptr inbounds float, float* %tmp605, i64 1
+ %tmp607 = getelementptr inbounds float, float* %tmp606, i64 1
+ %tmp608 = getelementptr inbounds float, float* %tmp607, i64 1
+ %tmp609 = getelementptr inbounds float, float* %tmp608, i64 1
+ %tmp610 = getelementptr inbounds float, float* %tmp609, i64 1
+ %tmp611 = getelementptr inbounds float, float* %tmp610, i64 1
+ %tmp612 = getelementptr inbounds float, float* %tmp611, i64 1
+ %tmp613 = getelementptr inbounds float, float* %tmp612, i64 1
+ %tmp614 = getelementptr inbounds float, float* %tmp613, i64 1
+ %tmp615 = getelementptr inbounds float, float* %tmp614, i64 1
+ %tmp616 = getelementptr inbounds float, float* %tmp615, i64 1
+ %tmp617 = getelementptr inbounds float, float* %tmp616, i64 1
+ %tmp618 = getelementptr inbounds float, float* %tmp617, i64 1
+ %tmp619 = getelementptr inbounds float, float* %tmp618, i64 1
+ %tmp620 = getelementptr inbounds float, float* %tmp619, i64 1
+ %tmp621 = getelementptr inbounds float, float* %tmp620, i64 1
+ %tmp622 = getelementptr inbounds float, float* %tmp621, i64 1
+ %tmp623 = getelementptr inbounds float, float* %tmp622, i64 1
+ %tmp624 = getelementptr inbounds float, float* %tmp623, i64 1
+ %tmp625 = getelementptr inbounds float, float* %tmp624, i64 1
+ %tmp626 = getelementptr inbounds float, float* %tmp625, i64 1
+ %tmp627 = getelementptr inbounds float, float* %tmp626, i64 1
+ %tmp628 = getelementptr inbounds float, float* %tmp627, i64 1
+ %tmp629 = getelementptr inbounds float, float* %tmp628, i64 1
+ %tmp630 = getelementptr inbounds float, float* %tmp629, i64 1
+ %tmp631 = getelementptr inbounds float, float* %tmp630, i64 1
+ %tmp632 = getelementptr inbounds float, float* %tmp631, i64 1
+ %tmp633 = getelementptr inbounds float, float* %tmp632, i64 1
+ %tmp634 = getelementptr inbounds float, float* %tmp633, i64 1
+ %tmp635 = getelementptr inbounds float, float* %tmp634, i64 1
+ %tmp636 = getelementptr inbounds float, float* %tmp635, i64 1
+ %tmp637 = getelementptr inbounds float, float* %tmp636, i64 1
+ %tmp638 = getelementptr inbounds float, float* %tmp637, i64 1
+ %tmp639 = getelementptr inbounds float, float* %tmp638, i64 1
+ %tmp640 = getelementptr inbounds float, float* %tmp639, i64 1
+ %tmp641 = getelementptr inbounds float, float* %tmp640, i64 1
+ %tmp642 = getelementptr inbounds float, float* %tmp641, i64 1
+ %tmp643 = getelementptr inbounds float, float* %tmp642, i64 1
+ %tmp644 = getelementptr inbounds float, float* %tmp643, i64 1
+ %tmp645 = getelementptr inbounds float, float* %tmp644, i64 1
+ %tmp646 = getelementptr inbounds float, float* %tmp645, i64 1
+ %tmp647 = getelementptr inbounds float, float* %tmp646, i64 1
+ %tmp648 = getelementptr inbounds float, float* %tmp647, i64 1
+ %tmp649 = getelementptr inbounds float, float* %tmp648, i64 1
+ %tmp650 = getelementptr inbounds float, float* %tmp649, i64 1
+ %tmp651 = getelementptr inbounds float, float* %tmp650, i64 1
+ %tmp652 = getelementptr inbounds float, float* %tmp651, i64 1
+ %tmp653 = getelementptr inbounds float, float* %tmp652, i64 1
+ %tmp654 = getelementptr inbounds float, float* %tmp653, i64 1
+ %tmp655 = getelementptr inbounds float, float* %tmp654, i64 1
+ %tmp656 = getelementptr inbounds float, float* %tmp655, i64 1
+ %tmp657 = getelementptr inbounds float, float* %tmp656, i64 1
+ %tmp658 = getelementptr inbounds float, float* %tmp657, i64 1
+ %tmp659 = getelementptr inbounds float, float* %tmp658, i64 1
+ %tmp660 = getelementptr inbounds float, float* %tmp659, i64 1
+ %tmp661 = getelementptr inbounds float, float* %tmp660, i64 1
+ %tmp662 = getelementptr inbounds float, float* %tmp661, i64 1
+ %tmp663 = getelementptr inbounds float, float* %tmp662, i64 1
+ %tmp664 = getelementptr inbounds float, float* %tmp663, i64 1
+ %tmp665 = getelementptr inbounds float, float* %tmp664, i64 1
+ %tmp666 = getelementptr inbounds float, float* %tmp665, i64 1
+ %tmp667 = getelementptr inbounds float, float* %tmp666, i64 1
+ %tmp668 = getelementptr inbounds float, float* %tmp667, i64 1
+ %tmp669 = getelementptr inbounds float, float* %tmp668, i64 1
+ %tmp670 = getelementptr inbounds float, float* %tmp669, i64 1
+ %tmp671 = getelementptr inbounds float, float* %tmp670, i64 1
+ %tmp672 = getelementptr inbounds float, float* %tmp671, i64 1
+ %tmp673 = getelementptr inbounds float, float* %tmp672, i64 1
+ %tmp674 = getelementptr inbounds float, float* %tmp673, i64 1
+ %tmp675 = getelementptr inbounds float, float* %tmp674, i64 1
+ %tmp676 = getelementptr inbounds float, float* %tmp675, i64 1
+ %tmp677 = getelementptr inbounds float, float* %tmp676, i64 1
+ %tmp678 = getelementptr inbounds float, float* %tmp677, i64 1
+ %tmp679 = getelementptr inbounds float, float* %tmp678, i64 1
+ %tmp680 = getelementptr inbounds float, float* %tmp679, i64 1
+ %tmp681 = getelementptr inbounds float, float* %tmp680, i64 1
+ %tmp682 = getelementptr inbounds float, float* %tmp681, i64 1
+ %tmp683 = getelementptr inbounds float, float* %tmp682, i64 1
+ %tmp684 = getelementptr inbounds float, float* %tmp683, i64 1
+ %tmp685 = getelementptr inbounds float, float* %tmp684, i64 1
+ %tmp686 = getelementptr inbounds float, float* %tmp685, i64 1
+ %tmp687 = getelementptr inbounds float, float* %tmp686, i64 1
+ %tmp688 = getelementptr inbounds float, float* %tmp687, i64 1
+ %tmp689 = getelementptr inbounds float, float* %tmp688, i64 1
+ %tmp690 = getelementptr inbounds float, float* %tmp689, i64 1
+ %tmp691 = getelementptr inbounds float, float* %tmp690, i64 1
+ %tmp692 = getelementptr inbounds float, float* %tmp691, i64 1
+ %tmp693 = getelementptr inbounds float, float* %tmp692, i64 1
+ %tmp694 = getelementptr inbounds float, float* %tmp693, i64 1
+ %tmp695 = getelementptr inbounds float, float* %tmp694, i64 1
+ %tmp696 = getelementptr inbounds float, float* %tmp695, i64 1
+ %tmp697 = getelementptr inbounds float, float* %tmp696, i64 1
+ %tmp698 = getelementptr inbounds float, float* %tmp697, i64 1
+ %tmp699 = getelementptr inbounds float, float* %tmp698, i64 1
+ %tmp700 = getelementptr inbounds float, float* %tmp699, i64 1
+ %tmp701 = getelementptr inbounds float, float* %tmp700, i64 1
+ %tmp702 = getelementptr inbounds float, float* %tmp701, i64 1
+ %tmp703 = getelementptr inbounds float, float* %tmp702, i64 1
+ %tmp704 = getelementptr inbounds float, float* %tmp703, i64 1
+ %tmp705 = getelementptr inbounds float, float* %tmp704, i64 1
+ %tmp706 = getelementptr inbounds float, float* %tmp705, i64 1
+ %tmp707 = getelementptr inbounds float, float* %tmp706, i64 1
+ %tmp708 = getelementptr inbounds float, float* %tmp707, i64 1
+ %tmp709 = getelementptr inbounds float, float* %tmp708, i64 1
+ %tmp710 = getelementptr inbounds float, float* %tmp709, i64 1
+ %tmp711 = getelementptr inbounds float, float* %tmp710, i64 1
+ %tmp712 = getelementptr inbounds float, float* %tmp711, i64 1
+ %tmp713 = getelementptr inbounds float, float* %tmp712, i64 1
+ %tmp714 = getelementptr inbounds float, float* %tmp713, i64 1
+ %tmp715 = getelementptr inbounds float, float* %tmp714, i64 1
+ %tmp716 = getelementptr inbounds float, float* %tmp715, i64 1
+ %tmp717 = getelementptr inbounds float, float* %tmp716, i64 1
+ %tmp718 = getelementptr inbounds float, float* %tmp717, i64 1
+ %tmp719 = getelementptr inbounds float, float* %tmp718, i64 1
+ %tmp720 = getelementptr inbounds float, float* %tmp719, i64 1
+ %tmp721 = getelementptr inbounds float, float* %tmp720, i64 1
+ %tmp722 = getelementptr inbounds float, float* %tmp721, i64 1
+ %tmp723 = getelementptr inbounds float, float* %tmp722, i64 1
+ %tmp724 = getelementptr inbounds float, float* %tmp723, i64 1
+ %tmp725 = getelementptr inbounds float, float* %tmp724, i64 1
+ %tmp726 = getelementptr inbounds float, float* %tmp725, i64 1
+ %tmp727 = getelementptr inbounds float, float* %tmp726, i64 1
+ %tmp728 = getelementptr inbounds float, float* %tmp727, i64 1
+ %tmp729 = getelementptr inbounds float, float* %tmp728, i64 1
+ %tmp730 = getelementptr inbounds float, float* %tmp729, i64 1
+ %tmp731 = getelementptr inbounds float, float* %tmp730, i64 1
+ %tmp732 = getelementptr inbounds float, float* %tmp731, i64 1
+ %tmp733 = getelementptr inbounds float, float* %tmp732, i64 1
+ %tmp734 = getelementptr inbounds float, float* %tmp733, i64 1
+ %tmp735 = getelementptr inbounds float, float* %tmp734, i64 1
+ %tmp736 = getelementptr inbounds float, float* %tmp735, i64 1
+ %tmp737 = getelementptr inbounds float, float* %tmp736, i64 1
+ %tmp738 = getelementptr inbounds float, float* %tmp737, i64 1
+ %tmp739 = getelementptr inbounds float, float* %tmp738, i64 1
+ %tmp740 = getelementptr inbounds float, float* %tmp739, i64 1
+ %tmp741 = getelementptr inbounds float, float* %tmp740, i64 1
+ %tmp742 = getelementptr inbounds float, float* %tmp741, i64 1
+ %tmp743 = getelementptr inbounds float, float* %tmp742, i64 1
+ %tmp744 = getelementptr inbounds float, float* %tmp743, i64 1
+ %tmp745 = getelementptr inbounds float, float* %tmp744, i64 1
+ %tmp746 = getelementptr inbounds float, float* %tmp745, i64 1
+ %tmp747 = getelementptr inbounds float, float* %tmp746, i64 1
+ %tmp748 = getelementptr inbounds float, float* %tmp747, i64 1
+ %tmp749 = getelementptr inbounds float, float* %tmp748, i64 1
+ %tmp750 = getelementptr inbounds float, float* %tmp749, i64 1
+ %tmp751 = getelementptr inbounds float, float* %tmp750, i64 1
+ %tmp752 = getelementptr inbounds float, float* %tmp751, i64 1
+ %tmp753 = getelementptr inbounds float, float* %tmp752, i64 1
+ %tmp754 = getelementptr inbounds float, float* %tmp753, i64 1
+ %tmp755 = getelementptr inbounds float, float* %tmp754, i64 1
+ %tmp756 = getelementptr inbounds float, float* %tmp755, i64 1
+ %tmp757 = getelementptr inbounds float, float* %tmp756, i64 1
+ %tmp758 = getelementptr inbounds float, float* %tmp757, i64 1
+ %tmp759 = getelementptr inbounds float, float* %tmp758, i64 1
+ %tmp760 = getelementptr inbounds float, float* %tmp759, i64 1
+ %tmp761 = getelementptr inbounds float, float* %tmp760, i64 1
+ %tmp762 = getelementptr inbounds float, float* %tmp761, i64 1
+ %tmp763 = getelementptr inbounds float, float* %tmp762, i64 1
+ %tmp764 = getelementptr inbounds float, float* %tmp763, i64 1
+ %tmp765 = getelementptr inbounds float, float* %tmp764, i64 1
+ %tmp766 = getelementptr inbounds float, float* %tmp765, i64 1
+ %tmp767 = getelementptr inbounds float, float* %tmp766, i64 1
+ %tmp768 = getelementptr inbounds float, float* %tmp767, i64 1
+ %tmp769 = getelementptr inbounds float, float* %tmp768, i64 1
+ %tmp770 = getelementptr inbounds float, float* %tmp769, i64 1
+ %tmp771 = getelementptr inbounds float, float* %tmp770, i64 1
+ %tmp772 = getelementptr inbounds float, float* %tmp771, i64 1
+ %tmp773 = getelementptr inbounds float, float* %tmp772, i64 1
+ %tmp774 = getelementptr inbounds float, float* %tmp773, i64 1
+ %tmp775 = getelementptr inbounds float, float* %tmp774, i64 1
+ %tmp776 = getelementptr inbounds float, float* %tmp775, i64 1
+ %tmp777 = getelementptr inbounds float, float* %tmp776, i64 1
+ %tmp778 = getelementptr inbounds float, float* %tmp777, i64 1
+ %tmp779 = getelementptr inbounds float, float* %tmp778, i64 1
+ %tmp780 = getelementptr inbounds float, float* %tmp779, i64 1
+ %tmp781 = getelementptr inbounds float, float* %tmp780, i64 1
+ %tmp782 = getelementptr inbounds float, float* %tmp781, i64 1
+ %tmp783 = getelementptr inbounds float, float* %tmp782, i64 1
+ %tmp784 = getelementptr inbounds float, float* %tmp783, i64 1
+ %tmp785 = getelementptr inbounds float, float* %tmp784, i64 1
+ %tmp786 = getelementptr inbounds float, float* %tmp785, i64 1
+ %tmp787 = getelementptr inbounds float, float* %tmp786, i64 1
+ %tmp788 = getelementptr inbounds float, float* %tmp787, i64 1
+ %tmp789 = getelementptr inbounds float, float* %tmp788, i64 1
+ %tmp790 = getelementptr inbounds float, float* %tmp789, i64 1
+ %tmp791 = getelementptr inbounds float, float* %tmp790, i64 1
+ %tmp792 = getelementptr inbounds float, float* %tmp791, i64 1
+ %tmp793 = getelementptr inbounds float, float* %tmp792, i64 1
+ %tmp794 = getelementptr inbounds float, float* %tmp793, i64 1
+ %tmp795 = getelementptr inbounds float, float* %tmp794, i64 1
+ %tmp796 = getelementptr inbounds float, float* %tmp795, i64 1
+ %tmp797 = getelementptr inbounds float, float* %tmp796, i64 1
+ %tmp798 = getelementptr inbounds float, float* %tmp797, i64 1
+ %tmp799 = getelementptr inbounds float, float* %tmp798, i64 1
+ %tmp800 = getelementptr inbounds float, float* %tmp799, i64 1
+ %tmp801 = getelementptr inbounds float, float* %tmp800, i64 1
+ %tmp802 = getelementptr inbounds float, float* %tmp801, i64 1
+ %tmp803 = getelementptr inbounds float, float* %tmp802, i64 1
+ %tmp804 = getelementptr inbounds float, float* %tmp803, i64 1
+ %tmp805 = getelementptr inbounds float, float* %tmp804, i64 1
+ %tmp806 = getelementptr inbounds float, float* %tmp805, i64 1
+ %tmp807 = getelementptr inbounds float, float* %tmp806, i64 1
+ %tmp808 = getelementptr inbounds float, float* %tmp807, i64 1
+ %tmp809 = getelementptr inbounds float, float* %tmp808, i64 1
+ %tmp810 = getelementptr inbounds float, float* %tmp809, i64 1
+ %tmp811 = getelementptr inbounds float, float* %tmp810, i64 1
+ %tmp812 = getelementptr inbounds float, float* %tmp811, i64 1
+ %tmp813 = getelementptr inbounds float, float* %tmp812, i64 1
+ %tmp814 = getelementptr inbounds float, float* %tmp813, i64 1
+ %tmp815 = getelementptr inbounds float, float* %tmp814, i64 1
+ %tmp816 = getelementptr inbounds float, float* %tmp815, i64 1
+ %tmp817 = getelementptr inbounds float, float* %tmp816, i64 1
+ %tmp818 = getelementptr inbounds float, float* %tmp817, i64 1
+ %tmp819 = getelementptr inbounds float, float* %tmp818, i64 1
+ %tmp820 = getelementptr inbounds float, float* %tmp819, i64 1
+ %tmp821 = getelementptr inbounds float, float* %tmp820, i64 1
+ %tmp822 = getelementptr inbounds float, float* %tmp821, i64 1
+ %tmp823 = getelementptr inbounds float, float* %tmp822, i64 1
+ %tmp824 = getelementptr inbounds float, float* %tmp823, i64 1
+ %tmp825 = getelementptr inbounds float, float* %tmp824, i64 1
+ %tmp826 = getelementptr inbounds float, float* %tmp825, i64 1
+ %tmp827 = getelementptr inbounds float, float* %tmp826, i64 1
+ %tmp828 = getelementptr inbounds float, float* %tmp827, i64 1
+ %tmp829 = getelementptr inbounds float, float* %tmp828, i64 1
+ %tmp830 = getelementptr inbounds float, float* %tmp829, i64 1
+ %tmp831 = getelementptr inbounds float, float* %tmp830, i64 1
+ %tmp832 = getelementptr inbounds float, float* %tmp831, i64 1
+ %tmp833 = getelementptr inbounds float, float* %tmp832, i64 1
+ %tmp834 = getelementptr inbounds float, float* %tmp833, i64 1
+ %tmp835 = getelementptr inbounds float, float* %tmp834, i64 1
+ %tmp836 = getelementptr inbounds float, float* %tmp835, i64 1
+ %tmp837 = getelementptr inbounds float, float* %tmp836, i64 1
+ %tmp838 = getelementptr inbounds float, float* %tmp837, i64 1
+ %tmp839 = getelementptr inbounds float, float* %tmp838, i64 1
+ %tmp840 = getelementptr inbounds float, float* %tmp839, i64 1
+ %tmp841 = getelementptr inbounds float, float* %tmp840, i64 1
+ %tmp842 = getelementptr inbounds float, float* %tmp841, i64 1
+ %tmp843 = getelementptr inbounds float, float* %tmp842, i64 1
+ %tmp844 = getelementptr inbounds float, float* %tmp843, i64 1
+ %tmp845 = getelementptr inbounds float, float* %tmp844, i64 1
+ %tmp846 = getelementptr inbounds float, float* %tmp845, i64 1
+ %tmp847 = getelementptr inbounds float, float* %tmp846, i64 1
+ %tmp848 = getelementptr inbounds float, float* %tmp847, i64 1
+ %tmp849 = getelementptr inbounds float, float* %tmp848, i64 1
+ %tmp850 = getelementptr inbounds float, float* %tmp849, i64 1
+ %tmp851 = getelementptr inbounds float, float* %tmp850, i64 1
+ %tmp852 = getelementptr inbounds float, float* %tmp851, i64 1
+ %tmp853 = getelementptr inbounds float, float* %tmp852, i64 1
+ %tmp854 = getelementptr inbounds float, float* %tmp853, i64 1
+ %tmp855 = getelementptr inbounds float, float* %tmp854, i64 1
+ %tmp856 = getelementptr inbounds float, float* %tmp855, i64 1
+ %tmp857 = getelementptr inbounds float, float* %tmp856, i64 1
+ %tmp858 = getelementptr inbounds float, float* %tmp857, i64 1
+ %tmp859 = getelementptr inbounds float, float* %tmp858, i64 1
+ %tmp860 = getelementptr inbounds float, float* %tmp859, i64 1
+ %tmp861 = getelementptr inbounds float, float* %tmp860, i64 1
+ %tmp862 = getelementptr inbounds float, float* %tmp861, i64 1
+ %tmp863 = getelementptr inbounds float, float* %tmp862, i64 1
+ %tmp864 = getelementptr inbounds float, float* %tmp863, i64 1
+ %tmp865 = getelementptr inbounds float, float* %tmp864, i64 1
+ %tmp866 = getelementptr inbounds float, float* %tmp865, i64 1
+ %tmp867 = getelementptr inbounds float, float* %tmp866, i64 1
+ %tmp868 = getelementptr inbounds float, float* %tmp867, i64 1
+ %tmp869 = getelementptr inbounds float, float* %tmp868, i64 1
+ %tmp870 = getelementptr inbounds float, float* %tmp869, i64 1
+ %tmp871 = getelementptr inbounds float, float* %tmp870, i64 1
+ %tmp872 = getelementptr inbounds float, float* %tmp871, i64 1
+ %tmp873 = getelementptr inbounds float, float* %tmp872, i64 1
+ %tmp874 = getelementptr inbounds float, float* %tmp873, i64 1
+ %tmp875 = getelementptr inbounds float, float* %tmp874, i64 1
+ %tmp876 = getelementptr inbounds float, float* %tmp875, i64 1
+ %tmp877 = getelementptr inbounds float, float* %tmp876, i64 1
+ %tmp878 = getelementptr inbounds float, float* %tmp877, i64 1
+ %tmp879 = getelementptr inbounds float, float* %tmp878, i64 1
+ %tmp880 = getelementptr inbounds float, float* %tmp879, i64 1
+ %tmp881 = getelementptr inbounds float, float* %tmp880, i64 1
+ %tmp882 = getelementptr inbounds float, float* %tmp881, i64 1
+ %tmp883 = getelementptr inbounds float, float* %tmp882, i64 1
+ %tmp884 = getelementptr inbounds float, float* %tmp883, i64 1
+ %tmp885 = getelementptr inbounds float, float* %tmp884, i64 1
+ %tmp886 = getelementptr inbounds float, float* %tmp885, i64 1
+ %tmp887 = getelementptr inbounds float, float* %tmp886, i64 1
+ %tmp888 = getelementptr inbounds float, float* %tmp887, i64 1
+ %tmp889 = getelementptr inbounds float, float* %tmp888, i64 1
+ %tmp890 = getelementptr inbounds float, float* %tmp889, i64 1
+ %tmp891 = getelementptr inbounds float, float* %tmp890, i64 1
+ %tmp892 = getelementptr inbounds float, float* %tmp891, i64 1
+ %tmp893 = getelementptr inbounds float, float* %tmp892, i64 1
+ %tmp894 = getelementptr inbounds float, float* %tmp893, i64 1
+ %tmp895 = getelementptr inbounds float, float* %tmp894, i64 1
+ %tmp896 = getelementptr inbounds float, float* %tmp895, i64 1
+ %tmp897 = getelementptr inbounds float, float* %tmp896, i64 1
+ %tmp898 = getelementptr inbounds float, float* %tmp897, i64 1
+ %tmp899 = getelementptr inbounds float, float* %tmp898, i64 1
+ %tmp900 = getelementptr inbounds float, float* %tmp899, i64 1
+ %tmp901 = getelementptr inbounds float, float* %tmp900, i64 1
+ %tmp902 = getelementptr inbounds float, float* %tmp901, i64 1
+ %tmp903 = getelementptr inbounds float, float* %tmp902, i64 1
+ %tmp904 = getelementptr inbounds float, float* %tmp903, i64 1
+ %tmp905 = getelementptr inbounds float, float* %tmp904, i64 1
+ %tmp906 = getelementptr inbounds float, float* %tmp905, i64 1
+ %tmp907 = getelementptr inbounds float, float* %tmp906, i64 1
+ %tmp908 = getelementptr inbounds float, float* %tmp907, i64 1
+ %tmp909 = getelementptr inbounds float, float* %tmp908, i64 1
+ %tmp910 = getelementptr inbounds float, float* %tmp909, i64 1
+ %tmp911 = getelementptr inbounds float, float* %tmp910, i64 1
+ %tmp912 = getelementptr inbounds float, float* %tmp911, i64 1
+ %tmp913 = getelementptr inbounds float, float* %tmp912, i64 1
+ %tmp914 = getelementptr inbounds float, float* %tmp913, i64 1
+ %tmp915 = getelementptr inbounds float, float* %tmp914, i64 1
+ %tmp916 = getelementptr inbounds float, float* %tmp915, i64 1
+ %tmp917 = getelementptr inbounds float, float* %tmp916, i64 1
+ %tmp918 = getelementptr inbounds float, float* %tmp917, i64 1
+ %tmp919 = getelementptr inbounds float, float* %tmp918, i64 1
+ %tmp920 = getelementptr inbounds float, float* %tmp919, i64 1
+ %tmp921 = getelementptr inbounds float, float* %tmp920, i64 1
+ %tmp922 = getelementptr inbounds float, float* %tmp921, i64 1
+ %tmp923 = getelementptr inbounds float, float* %tmp922, i64 1
+ %tmp924 = getelementptr inbounds float, float* %tmp923, i64 1
+ %tmp925 = getelementptr inbounds float, float* %tmp924, i64 1
+ %tmp926 = getelementptr inbounds float, float* %tmp925, i64 1
+ %tmp927 = getelementptr inbounds float, float* %tmp926, i64 1
+ %tmp928 = getelementptr inbounds float, float* %tmp927, i64 1
+ %tmp929 = getelementptr inbounds float, float* %tmp928, i64 1
+ %tmp930 = getelementptr inbounds float, float* %tmp929, i64 1
+ %tmp931 = getelementptr inbounds float, float* %tmp930, i64 1
+ %tmp932 = getelementptr inbounds float, float* %tmp931, i64 1
+ %tmp933 = getelementptr inbounds float, float* %tmp932, i64 1
+ %tmp934 = getelementptr inbounds float, float* %tmp933, i64 1
+ %tmp935 = getelementptr inbounds float, float* %tmp934, i64 1
+ %tmp936 = getelementptr inbounds float, float* %tmp935, i64 1
+ %tmp937 = getelementptr inbounds float, float* %tmp936, i64 1
+ %tmp938 = getelementptr inbounds float, float* %tmp937, i64 1
+ %tmp939 = getelementptr inbounds float, float* %tmp938, i64 1
+ %tmp940 = getelementptr inbounds float, float* %tmp939, i64 1
+ %tmp941 = getelementptr inbounds float, float* %tmp940, i64 1
+ %tmp942 = getelementptr inbounds float, float* %tmp941, i64 1
+ %tmp943 = getelementptr inbounds float, float* %tmp942, i64 1
+ %tmp944 = getelementptr inbounds float, float* %tmp943, i64 1
+ %tmp945 = getelementptr inbounds float, float* %tmp944, i64 1
+ %tmp946 = getelementptr inbounds float, float* %tmp945, i64 1
+ %tmp947 = getelementptr inbounds float, float* %tmp946, i64 1
+ %tmp948 = getelementptr inbounds float, float* %tmp947, i64 1
+ %tmp949 = getelementptr inbounds float, float* %tmp948, i64 1
+ %tmp950 = getelementptr inbounds float, float* %tmp949, i64 1
+ %tmp951 = getelementptr inbounds float, float* %tmp950, i64 1
+ %tmp952 = getelementptr inbounds float, float* %tmp951, i64 1
+ %tmp953 = getelementptr inbounds float, float* %tmp952, i64 1
+ %tmp954 = getelementptr inbounds float, float* %tmp953, i64 1
+ %tmp955 = getelementptr inbounds float, float* %tmp954, i64 1
+ %tmp956 = getelementptr inbounds float, float* %tmp955, i64 1
+ %tmp957 = getelementptr inbounds float, float* %tmp956, i64 1
+ %tmp958 = getelementptr inbounds float, float* %tmp957, i64 1
+ %tmp959 = getelementptr inbounds float, float* %tmp958, i64 1
+ %tmp960 = getelementptr inbounds float, float* %tmp959, i64 1
+ %tmp961 = getelementptr inbounds float, float* %tmp960, i64 1
+ %tmp962 = getelementptr inbounds float, float* %tmp961, i64 1
+ %tmp963 = getelementptr inbounds float, float* %tmp962, i64 1
+ %tmp964 = getelementptr inbounds float, float* %tmp963, i64 1
+ %tmp965 = getelementptr inbounds float, float* %tmp964, i64 1
+ %tmp966 = getelementptr inbounds float, float* %tmp965, i64 1
+ %tmp967 = getelementptr inbounds float, float* %tmp966, i64 1
+ %tmp968 = getelementptr inbounds float, float* %tmp967, i64 1
+ %tmp969 = getelementptr inbounds float, float* %tmp968, i64 1
+ %tmp970 = getelementptr inbounds float, float* %tmp969, i64 1
+ %tmp971 = getelementptr inbounds float, float* %tmp970, i64 1
+ %tmp972 = getelementptr inbounds float, float* %tmp971, i64 1
+ %tmp973 = getelementptr inbounds float, float* %tmp972, i64 1
+ %tmp974 = getelementptr inbounds float, float* %tmp973, i64 1
+ %tmp975 = getelementptr inbounds float, float* %tmp974, i64 1
+ %tmp976 = getelementptr inbounds float, float* %tmp975, i64 1
+ %tmp977 = getelementptr inbounds float, float* %tmp976, i64 1
+ %tmp978 = getelementptr inbounds float, float* %tmp977, i64 1
+ %tmp979 = getelementptr inbounds float, float* %tmp978, i64 1
+ %tmp980 = getelementptr inbounds float, float* %tmp979, i64 1
+ %tmp981 = getelementptr inbounds float, float* %tmp980, i64 1
+ %tmp982 = getelementptr inbounds float, float* %tmp981, i64 1
+ %tmp983 = getelementptr inbounds float, float* %tmp982, i64 1
+ %tmp984 = getelementptr inbounds float, float* %tmp983, i64 1
+ %tmp985 = getelementptr inbounds float, float* %tmp984, i64 1
+ %tmp986 = getelementptr inbounds float, float* %tmp985, i64 1
+ %tmp987 = getelementptr inbounds float, float* %tmp986, i64 1
+ %tmp988 = getelementptr inbounds float, float* %tmp987, i64 1
+ %tmp989 = getelementptr inbounds float, float* %tmp988, i64 1
+ %tmp990 = getelementptr inbounds float, float* %tmp989, i64 1
+ %tmp991 = getelementptr inbounds float, float* %tmp990, i64 1
+ %tmp992 = getelementptr inbounds float, float* %tmp991, i64 1
+ %tmp993 = getelementptr inbounds float, float* %tmp992, i64 1
+ %tmp994 = getelementptr inbounds float, float* %tmp993, i64 1
+ %tmp995 = getelementptr inbounds float, float* %tmp994, i64 1
+ %tmp996 = getelementptr inbounds float, float* %tmp995, i64 1
+ %tmp997 = getelementptr inbounds float, float* %tmp996, i64 1
+ %tmp998 = getelementptr inbounds float, float* %tmp997, i64 1
+ %tmp999 = getelementptr inbounds float, float* %tmp998, i64 1
+ %tmp1000 = getelementptr inbounds float, float* %tmp999, i64 1
+ %tmp1001 = getelementptr inbounds float, float* %tmp1000, i64 1
+ %tmp1002 = getelementptr inbounds float, float* %tmp1001, i64 1
+ %tmp1003 = getelementptr inbounds float, float* %tmp1002, i64 1
+ %tmp1004 = getelementptr inbounds float, float* %tmp1003, i64 1
+ %tmp1005 = getelementptr inbounds float, float* %tmp1004, i64 1
+ %tmp1006 = getelementptr inbounds float, float* %tmp1005, i64 1
+ %tmp1007 = getelementptr inbounds float, float* %tmp1006, i64 1
+ %tmp1008 = getelementptr inbounds float, float* %tmp1007, i64 1
+ %tmp1009 = getelementptr inbounds float, float* %tmp1008, i64 1
+ %tmp1010 = getelementptr inbounds float, float* %tmp1009, i64 1
+ %tmp1011 = getelementptr inbounds float, float* %tmp1010, i64 1
+ %tmp1012 = getelementptr inbounds float, float* %tmp1011, i64 1
+ %tmp1013 = getelementptr inbounds float, float* %tmp1012, i64 1
+ %tmp1014 = getelementptr inbounds float, float* %tmp1013, i64 1
+ %tmp1015 = getelementptr inbounds float, float* %tmp1014, i64 1
+ %tmp1016 = getelementptr inbounds float, float* %tmp1015, i64 1
+ %tmp1017 = getelementptr inbounds float, float* %tmp1016, i64 1
+ %tmp1018 = getelementptr inbounds float, float* %tmp1017, i64 1
+ %tmp1019 = getelementptr inbounds float, float* %tmp1018, i64 1
+ %tmp1020 = getelementptr inbounds float, float* %tmp1019, i64 1
+ %tmp1021 = getelementptr inbounds float, float* %tmp1020, i64 1
+ %tmp1022 = getelementptr inbounds float, float* %tmp1021, i64 1
+ %tmp1023 = getelementptr inbounds float, float* %tmp1022, i64 1
+ %tmp1024 = getelementptr inbounds float, float* %tmp1023, i64 1
+ %tmp1025 = getelementptr inbounds float, float* %tmp1024, i64 1
+ %tmp1026 = getelementptr inbounds float, float* %tmp1025, i64 1
+ %tmp1027 = getelementptr inbounds float, float* %tmp1026, i64 1
+ %tmp1028 = getelementptr inbounds float, float* %tmp1027, i64 1
+ %tmp1029 = getelementptr inbounds float, float* %tmp1028, i64 1
+ %tmp1030 = getelementptr inbounds float, float* %tmp1029, i64 1
+ %tmp1031 = getelementptr inbounds float, float* %tmp1030, i64 1
+ %tmp1032 = getelementptr inbounds float, float* %tmp1031, i64 1
+ %tmp1033 = getelementptr inbounds float, float* %tmp1032, i64 1
+ %tmp1034 = getelementptr inbounds float, float* %tmp1033, i64 1
+ %tmp1035 = getelementptr inbounds float, float* %tmp1034, i64 1
+ %tmp1036 = getelementptr inbounds float, float* %tmp1035, i64 1
+ %tmp1037 = getelementptr inbounds float, float* %tmp1036, i64 1
+ %tmp1038 = getelementptr inbounds float, float* %tmp1037, i64 1
+ %tmp1039 = getelementptr inbounds float, float* %tmp1038, i64 1
+ %tmp1040 = getelementptr inbounds float, float* %tmp1039, i64 1
+ %tmp1041 = getelementptr inbounds float, float* %tmp1040, i64 1
+ %tmp1042 = getelementptr inbounds float, float* %tmp1041, i64 1
+ %tmp1043 = getelementptr inbounds float, float* %tmp1042, i64 1
+ %tmp1044 = getelementptr inbounds float, float* %tmp1043, i64 1
+ %tmp1045 = getelementptr inbounds float, float* %tmp1044, i64 1
+ %tmp1046 = getelementptr inbounds float, float* %tmp1045, i64 1
+ %tmp1047 = getelementptr inbounds float, float* %tmp1046, i64 1
+ %tmp1048 = getelementptr inbounds float, float* %tmp1047, i64 1
+ %tmp1049 = getelementptr inbounds float, float* %tmp1048, i64 1
+ %tmp1050 = getelementptr inbounds float, float* %tmp1049, i64 1
+ %tmp1051 = getelementptr inbounds float, float* %tmp1050, i64 1
+ %tmp1052 = getelementptr inbounds float, float* %tmp1051, i64 1
+ %tmp1053 = getelementptr inbounds float, float* %tmp1052, i64 1
+ %tmp1054 = getelementptr inbounds float, float* %tmp1053, i64 1
+ %tmp1055 = getelementptr inbounds float, float* %tmp1054, i64 1
+ %tmp1056 = getelementptr inbounds float, float* %tmp1055, i64 1
+ %tmp1057 = getelementptr inbounds float, float* %tmp1056, i64 1
+ %tmp1058 = getelementptr inbounds float, float* %tmp1057, i64 1
+ %tmp1059 = getelementptr inbounds float, float* %tmp1058, i64 1
+ %tmp1060 = getelementptr inbounds float, float* %tmp1059, i64 1
+ %tmp1061 = getelementptr inbounds float, float* %tmp1060, i64 1
+ %tmp1062 = getelementptr inbounds float, float* %tmp1061, i64 1
+ %tmp1063 = getelementptr inbounds float, float* %tmp1062, i64 1
+ %tmp1064 = getelementptr inbounds float, float* %tmp1063, i64 1
+ %tmp1065 = getelementptr inbounds float, float* %tmp1064, i64 1
+ %tmp1066 = getelementptr inbounds float, float* %tmp1065, i64 1
+ %tmp1067 = getelementptr inbounds float, float* %tmp1066, i64 1
+ %tmp1068 = getelementptr inbounds float, float* %tmp1067, i64 1
+ %tmp1069 = getelementptr inbounds float, float* %tmp1068, i64 1
+ %tmp1070 = getelementptr inbounds float, float* %tmp1069, i64 1
+ %tmp1071 = getelementptr inbounds float, float* %tmp1070, i64 1
+ %tmp1072 = getelementptr inbounds float, float* %tmp1071, i64 1
+ %tmp1073 = getelementptr inbounds float, float* %tmp1072, i64 1
+ %tmp1074 = getelementptr inbounds float, float* %tmp1073, i64 1
+ %tmp1075 = getelementptr inbounds float, float* %tmp1074, i64 1
+ %tmp1076 = getelementptr inbounds float, float* %tmp1075, i64 1
+ %tmp1077 = getelementptr inbounds float, float* %tmp1076, i64 1
+ %tmp1078 = getelementptr inbounds float, float* %tmp1077, i64 1
+ %tmp1079 = getelementptr inbounds float, float* %tmp1078, i64 1
+ %tmp1080 = getelementptr inbounds float, float* %tmp1079, i64 1
+ %tmp1081 = getelementptr inbounds float, float* %tmp1080, i64 1
+ %tmp1082 = getelementptr inbounds float, float* %tmp1081, i64 1
+ %tmp1083 = getelementptr inbounds float, float* %tmp1082, i64 1
+ %tmp1084 = getelementptr inbounds float, float* %tmp1083, i64 1
+ %tmp1085 = getelementptr inbounds float, float* %tmp1084, i64 1
+ %tmp1086 = getelementptr inbounds float, float* %tmp1085, i64 1
+ %tmp1087 = getelementptr inbounds float, float* %tmp1086, i64 1
+ %tmp1088 = getelementptr inbounds float, float* %tmp1087, i64 1
+ %tmp1089 = getelementptr inbounds float, float* %tmp1088, i64 1
+ %tmp1090 = getelementptr inbounds float, float* %tmp1089, i64 1
+ %tmp1091 = getelementptr inbounds float, float* %tmp1090, i64 1
+ %tmp1092 = getelementptr inbounds float, float* %tmp1091, i64 1
+ %tmp1093 = getelementptr inbounds float, float* %tmp1092, i64 1
+ %tmp1094 = getelementptr inbounds float, float* %tmp1093, i64 1
+ %tmp1095 = getelementptr inbounds float, float* %tmp1094, i64 1
+ %tmp1096 = getelementptr inbounds float, float* %tmp1095, i64 1
+ %tmp1097 = getelementptr inbounds float, float* %tmp1096, i64 1
+ %tmp1098 = getelementptr inbounds float, float* %tmp1097, i64 1
+ %tmp1099 = getelementptr inbounds float, float* %tmp1098, i64 1
+ %tmp1100 = getelementptr inbounds float, float* %tmp1099, i64 1
+ %tmp1101 = getelementptr inbounds float, float* %tmp1100, i64 1
+ %tmp1102 = getelementptr inbounds float, float* %tmp1101, i64 1
+ %tmp1103 = getelementptr inbounds float, float* %tmp1102, i64 1
+ %tmp1104 = getelementptr inbounds float, float* %tmp1103, i64 1
+ %tmp1105 = getelementptr inbounds float, float* %tmp1104, i64 1
+ %tmp1106 = getelementptr inbounds float, float* %tmp1105, i64 1
+ %tmp1107 = getelementptr inbounds float, float* %tmp1106, i64 1
+ %tmp1108 = getelementptr inbounds float, float* %tmp1107, i64 1
+ %tmp1109 = getelementptr inbounds float, float* %tmp1108, i64 1
+ %tmp1110 = getelementptr inbounds float, float* %tmp1109, i64 1
+ %tmp1111 = getelementptr inbounds float, float* %tmp1110, i64 1
+ %tmp1112 = getelementptr inbounds float, float* %tmp1111, i64 1
+ %tmp1113 = getelementptr inbounds float, float* %tmp1112, i64 1
+ %tmp1114 = getelementptr inbounds float, float* %tmp1113, i64 1
+ %tmp1115 = getelementptr inbounds float, float* %tmp1114, i64 1
+ %tmp1116 = getelementptr inbounds float, float* %tmp1115, i64 1
+ %tmp1117 = getelementptr inbounds float, float* %tmp1116, i64 1
+ %tmp1118 = getelementptr inbounds float, float* %tmp1117, i64 1
+ %tmp1119 = getelementptr inbounds float, float* %tmp1118, i64 1
+ %tmp1120 = getelementptr inbounds float, float* %tmp1119, i64 1
+ %tmp1121 = getelementptr inbounds float, float* %tmp1120, i64 1
+ %tmp1122 = getelementptr inbounds float, float* %tmp1121, i64 1
+ %tmp1123 = getelementptr inbounds float, float* %tmp1122, i64 1
+ %tmp1124 = getelementptr inbounds float, float* %tmp1123, i64 1
+ %tmp1125 = getelementptr inbounds float, float* %tmp1124, i64 1
+ %tmp1126 = getelementptr inbounds float, float* %tmp1125, i64 1
+ %tmp1127 = getelementptr inbounds float, float* %tmp1126, i64 1
+ %tmp1128 = getelementptr inbounds float, float* %tmp1127, i64 1
+ %tmp1129 = getelementptr inbounds float, float* %tmp1128, i64 1
+ %tmp1130 = getelementptr inbounds float, float* %tmp1129, i64 1
+ %tmp1131 = getelementptr inbounds float, float* %tmp1130, i64 1
+ %tmp1132 = getelementptr inbounds float, float* %tmp1131, i64 1
+ %tmp1133 = getelementptr inbounds float, float* %tmp1132, i64 1
+ %tmp1134 = getelementptr inbounds float, float* %tmp1133, i64 1
+ %tmp1135 = getelementptr inbounds float, float* %tmp1134, i64 1
+ %tmp1136 = getelementptr inbounds float, float* %tmp1135, i64 1
+ %tmp1137 = getelementptr inbounds float, float* %tmp1136, i64 1
+ %tmp1138 = getelementptr inbounds float, float* %tmp1137, i64 1
+ %tmp1139 = getelementptr inbounds float, float* %tmp1138, i64 1
+ %tmp1140 = getelementptr inbounds float, float* %tmp1139, i64 1
+ %tmp1141 = getelementptr inbounds float, float* %tmp1140, i64 1
+ %tmp1142 = getelementptr inbounds float, float* %tmp1141, i64 1
+ %tmp1143 = getelementptr inbounds float, float* %tmp1142, i64 1
+ %tmp1144 = getelementptr inbounds float, float* %tmp1143, i64 1
+ %tmp1145 = getelementptr inbounds float, float* %tmp1144, i64 1
+ %tmp1146 = getelementptr inbounds float, float* %tmp1145, i64 1
+ %tmp1147 = getelementptr inbounds float, float* %tmp1146, i64 1
+ %tmp1148 = getelementptr inbounds float, float* %tmp1147, i64 1
+ %tmp1149 = getelementptr inbounds float, float* %tmp1148, i64 1
+ %tmp1150 = getelementptr inbounds float, float* %tmp1149, i64 1
+ %tmp1151 = getelementptr inbounds float, float* %tmp1150, i64 1
+ %tmp1152 = getelementptr inbounds float, float* %tmp1151, i64 1
+ %tmp1153 = getelementptr inbounds float, float* %tmp1152, i64 1
+ %tmp1154 = getelementptr inbounds float, float* %tmp1153, i64 1
+ %tmp1155 = getelementptr inbounds float, float* %tmp1154, i64 1
+ %tmp1156 = getelementptr inbounds float, float* %tmp1155, i64 1
+ %tmp1157 = getelementptr inbounds float, float* %tmp1156, i64 1
+ %tmp1158 = getelementptr inbounds float, float* %tmp1157, i64 1
+ %tmp1159 = getelementptr inbounds float, float* %tmp1158, i64 1
+ %tmp1160 = getelementptr inbounds float, float* %tmp1159, i64 1
+ %tmp1161 = getelementptr inbounds float, float* %tmp1160, i64 1
+ %tmp1162 = getelementptr inbounds float, float* %tmp1161, i64 1
+ %tmp1163 = getelementptr inbounds float, float* %tmp1162, i64 1
+ %tmp1164 = getelementptr inbounds float, float* %tmp1163, i64 1
+ %tmp1165 = getelementptr inbounds float, float* %tmp1164, i64 1
+ %tmp1166 = getelementptr inbounds float, float* %tmp1165, i64 1
+ %tmp1167 = getelementptr inbounds float, float* %tmp1166, i64 1
+ %tmp1168 = getelementptr inbounds float, float* %tmp1167, i64 1
+ %tmp1169 = getelementptr inbounds float, float* %tmp1168, i64 1
+ %tmp1170 = getelementptr inbounds float, float* %tmp1169, i64 1
+ %tmp1171 = getelementptr inbounds float, float* %tmp1170, i64 1
+ %tmp1172 = getelementptr inbounds float, float* %tmp1171, i64 1
+ %tmp1173 = getelementptr inbounds float, float* %tmp1172, i64 1
+ %tmp1174 = getelementptr inbounds float, float* %tmp1173, i64 1
+ %tmp1175 = getelementptr inbounds float, float* %tmp1174, i64 1
+ %tmp1176 = getelementptr inbounds float, float* %tmp1175, i64 1
+ %tmp1177 = getelementptr inbounds float, float* %tmp1176, i64 1
+ %tmp1178 = getelementptr inbounds float, float* %tmp1177, i64 1
+ %tmp1179 = getelementptr inbounds float, float* %tmp1178, i64 1
+ %tmp1180 = getelementptr inbounds float, float* %tmp1179, i64 1
+ %tmp1181 = getelementptr inbounds float, float* %tmp1180, i64 1
+ %tmp1182 = getelementptr inbounds float, float* %tmp1181, i64 1
+ %tmp1183 = getelementptr inbounds float, float* %tmp1182, i64 1
+ %tmp1184 = getelementptr inbounds float, float* %tmp1183, i64 1
+ %tmp1185 = getelementptr inbounds float, float* %tmp1184, i64 1
+ %tmp1186 = getelementptr inbounds float, float* %tmp1185, i64 1
+ %tmp1187 = getelementptr inbounds float, float* %tmp1186, i64 1
+ %tmp1188 = getelementptr inbounds float, float* %tmp1187, i64 1
+ %tmp1189 = getelementptr inbounds float, float* %tmp1188, i64 1
+ %tmp1190 = getelementptr inbounds float, float* %tmp1189, i64 1
+ %tmp1191 = getelementptr inbounds float, float* %tmp1190, i64 1
+ %tmp1192 = getelementptr inbounds float, float* %tmp1191, i64 1
+ %tmp1193 = getelementptr inbounds float, float* %tmp1192, i64 1
+ %tmp1194 = getelementptr inbounds float, float* %tmp1193, i64 1
+ %tmp1195 = getelementptr inbounds float, float* %tmp1194, i64 1
+ %tmp1196 = getelementptr inbounds float, float* %tmp1195, i64 1
+ %tmp1197 = getelementptr inbounds float, float* %tmp1196, i64 1
+ %tmp1198 = getelementptr inbounds float, float* %tmp1197, i64 1
+ %tmp1199 = getelementptr inbounds float, float* %tmp1198, i64 1
+ %tmp1200 = getelementptr inbounds float, float* %tmp1199, i64 1
+ %tmp1201 = getelementptr inbounds float, float* %tmp1200, i64 1
+ %tmp1202 = getelementptr inbounds float, float* %tmp1201, i64 1
+ %tmp1203 = getelementptr inbounds float, float* %tmp1202, i64 1
+ %tmp1204 = getelementptr inbounds float, float* %tmp1203, i64 1
+ %tmp1205 = getelementptr inbounds float, float* %tmp1204, i64 1
+ %tmp1206 = getelementptr inbounds float, float* %tmp1205, i64 1
+ %tmp1207 = getelementptr inbounds float, float* %tmp1206, i64 1
+ %tmp1208 = getelementptr inbounds float, float* %tmp1207, i64 1
+ %tmp1209 = getelementptr inbounds float, float* %tmp1208, i64 1
+ %tmp1210 = getelementptr inbounds float, float* %tmp1209, i64 1
+ %tmp1211 = getelementptr inbounds float, float* %tmp1210, i64 1
+ %tmp1212 = getelementptr inbounds float, float* %tmp1211, i64 1
+ %tmp1213 = getelementptr inbounds float, float* %tmp1212, i64 1
+ %tmp1214 = getelementptr inbounds float, float* %tmp1213, i64 1
+ %tmp1215 = getelementptr inbounds float, float* %tmp1214, i64 1
+ %tmp1216 = getelementptr inbounds float, float* %tmp1215, i64 1
+ %tmp1217 = getelementptr inbounds float, float* %tmp1216, i64 1
+ %tmp1218 = getelementptr inbounds float, float* %tmp1217, i64 1
+ %tmp1219 = getelementptr inbounds float, float* %tmp1218, i64 1
+ %tmp1220 = getelementptr inbounds float, float* %tmp1219, i64 1
+ %tmp1221 = getelementptr inbounds float, float* %tmp1220, i64 1
+ %tmp1222 = getelementptr inbounds float, float* %tmp1221, i64 1
+ %tmp1223 = getelementptr inbounds float, float* %tmp1222, i64 1
+ %tmp1224 = getelementptr inbounds float, float* %tmp1223, i64 1
+ %tmp1225 = getelementptr inbounds float, float* %tmp1224, i64 1
+ %tmp1226 = getelementptr inbounds float, float* %tmp1225, i64 1
+ %tmp1227 = getelementptr inbounds float, float* %tmp1226, i64 1
+ %tmp1228 = getelementptr inbounds float, float* %tmp1227, i64 1
+ %tmp1229 = getelementptr inbounds float, float* %tmp1228, i64 1
+ %tmp1230 = getelementptr inbounds float, float* %tmp1229, i64 1
+ %tmp1231 = getelementptr inbounds float, float* %tmp1230, i64 1
+ %tmp1232 = getelementptr inbounds float, float* %tmp1231, i64 1
+ %tmp1233 = getelementptr inbounds float, float* %tmp1232, i64 1
+ %tmp1234 = getelementptr inbounds float, float* %tmp1233, i64 1
+ %tmp1235 = getelementptr inbounds float, float* %tmp1234, i64 1
+ %tmp1236 = getelementptr inbounds float, float* %tmp1235, i64 1
+ %tmp1237 = getelementptr inbounds float, float* %tmp1236, i64 1
+ %tmp1238 = getelementptr inbounds float, float* %tmp1237, i64 1
+ %tmp1239 = getelementptr inbounds float, float* %tmp1238, i64 1
+ %tmp1240 = getelementptr inbounds float, float* %tmp1239, i64 1
+ %tmp1241 = getelementptr inbounds float, float* %tmp1240, i64 1
+ %tmp1242 = getelementptr inbounds float, float* %tmp1241, i64 1
+ %tmp1243 = getelementptr inbounds float, float* %tmp1242, i64 1
+ %tmp1244 = getelementptr inbounds float, float* %tmp1243, i64 1
+ %tmp1245 = getelementptr inbounds float, float* %tmp1244, i64 1
+ %tmp1246 = getelementptr inbounds float, float* %tmp1245, i64 1
+ %tmp1247 = getelementptr inbounds float, float* %tmp1246, i64 1
+ %tmp1248 = getelementptr inbounds float, float* %tmp1247, i64 1
+ %tmp1249 = getelementptr inbounds float, float* %tmp1248, i64 1
+ %tmp1250 = getelementptr inbounds float, float* %tmp1249, i64 1
+ %tmp1251 = getelementptr inbounds float, float* %tmp1250, i64 1
+ %tmp1252 = getelementptr inbounds float, float* %tmp1251, i64 1
+ %tmp1253 = getelementptr inbounds float, float* %tmp1252, i64 1
+ %tmp1254 = getelementptr inbounds float, float* %tmp1253, i64 1
+ %tmp1255 = getelementptr inbounds float, float* %tmp1254, i64 1
+ %tmp1256 = getelementptr inbounds float, float* %tmp1255, i64 1
+ %tmp1257 = getelementptr inbounds float, float* %tmp1256, i64 1
+ %tmp1258 = getelementptr inbounds float, float* %tmp1257, i64 1
+ %tmp1259 = getelementptr inbounds float, float* %tmp1258, i64 1
+ %tmp1260 = getelementptr inbounds float, float* %tmp1259, i64 1
+ %tmp1261 = getelementptr inbounds float, float* %tmp1260, i64 1
+ %tmp1262 = getelementptr inbounds float, float* %tmp1261, i64 1
+ %tmp1263 = getelementptr inbounds float, float* %tmp1262, i64 1
+ %tmp1264 = getelementptr inbounds float, float* %tmp1263, i64 1
+ %tmp1265 = getelementptr inbounds float, float* %tmp1264, i64 1
+ %tmp1266 = getelementptr inbounds float, float* %tmp1265, i64 1
+ %tmp1267 = getelementptr inbounds float, float* %tmp1266, i64 1
+ %tmp1268 = getelementptr inbounds float, float* %tmp1267, i64 1
+ %tmp1269 = getelementptr inbounds float, float* %tmp1268, i64 1
+ %tmp1270 = getelementptr inbounds float, float* %tmp1269, i64 1
+ %tmp1271 = getelementptr inbounds float, float* %tmp1270, i64 1
+ %tmp1272 = getelementptr inbounds float, float* %tmp1271, i64 1
+ %tmp1273 = getelementptr inbounds float, float* %tmp1272, i64 1
+ %tmp1274 = getelementptr inbounds float, float* %tmp1273, i64 1
+ %tmp1275 = getelementptr inbounds float, float* %tmp1274, i64 1
+ %tmp1276 = getelementptr inbounds float, float* %tmp1275, i64 1
+ %tmp1277 = getelementptr inbounds float, float* %tmp1276, i64 1
+ %tmp1278 = getelementptr inbounds float, float* %tmp1277, i64 1
+ %tmp1279 = getelementptr inbounds float, float* %tmp1278, i64 1
+ %tmp1280 = getelementptr inbounds float, float* %tmp1279, i64 1
+ %tmp1281 = getelementptr inbounds float, float* %tmp1280, i64 1
+ %tmp1282 = getelementptr inbounds float, float* %tmp1281, i64 1
+ %tmp1283 = getelementptr inbounds float, float* %tmp1282, i64 1
+ %tmp1284 = getelementptr inbounds float, float* %tmp1283, i64 1
+ %tmp1285 = getelementptr inbounds float, float* %tmp1284, i64 1
+ %tmp1286 = getelementptr inbounds float, float* %tmp1285, i64 1
+ %tmp1287 = getelementptr inbounds float, float* %tmp1286, i64 1
+ %tmp1288 = getelementptr inbounds float, float* %tmp1287, i64 1
+ %tmp1289 = getelementptr inbounds float, float* %tmp1288, i64 1
+ %tmp1290 = getelementptr inbounds float, float* %tmp1289, i64 1
+ %tmp1291 = getelementptr inbounds float, float* %tmp1290, i64 1
+ %tmp1292 = getelementptr inbounds float, float* %tmp1291, i64 1
+ %tmp1293 = getelementptr inbounds float, float* %tmp1292, i64 1
+ %tmp1294 = getelementptr inbounds float, float* %tmp1293, i64 1
+ %tmp1295 = getelementptr inbounds float, float* %tmp1294, i64 1
+ %tmp1296 = getelementptr inbounds float, float* %tmp1295, i64 1
+ %tmp1297 = getelementptr inbounds float, float* %tmp1296, i64 1
+ %tmp1298 = getelementptr inbounds float, float* %tmp1297, i64 1
+ %tmp1299 = getelementptr inbounds float, float* %tmp1298, i64 1
+ %tmp1300 = getelementptr inbounds float, float* %tmp1299, i64 1
+ %tmp1301 = getelementptr inbounds float, float* %tmp1300, i64 1
+ %tmp1302 = getelementptr inbounds float, float* %tmp1301, i64 1
+ %tmp1303 = getelementptr inbounds float, float* %tmp1302, i64 1
+ %tmp1304 = getelementptr inbounds float, float* %tmp1303, i64 1
+ %tmp1305 = getelementptr inbounds float, float* %tmp1304, i64 1
+ %tmp1306 = getelementptr inbounds float, float* %tmp1305, i64 1
+ %tmp1307 = getelementptr inbounds float, float* %tmp1306, i64 1
+ %tmp1308 = getelementptr inbounds float, float* %tmp1307, i64 1
+ %tmp1309 = getelementptr inbounds float, float* %tmp1308, i64 1
+ %tmp1310 = getelementptr inbounds float, float* %tmp1309, i64 1
+ %tmp1311 = getelementptr inbounds float, float* %tmp1310, i64 1
+ %tmp1312 = getelementptr inbounds float, float* %tmp1311, i64 1
+ %tmp1313 = getelementptr inbounds float, float* %tmp1312, i64 1
+ %tmp1314 = getelementptr inbounds float, float* %tmp1313, i64 1
+ %tmp1315 = getelementptr inbounds float, float* %tmp1314, i64 1
+ %tmp1316 = getelementptr inbounds float, float* %tmp1315, i64 1
+ %tmp1317 = getelementptr inbounds float, float* %tmp1316, i64 1
+ %tmp1318 = getelementptr inbounds float, float* %tmp1317, i64 1
+ %tmp1319 = getelementptr inbounds float, float* %tmp1318, i64 1
+ %tmp1320 = getelementptr inbounds float, float* %tmp1319, i64 1
+ %tmp1321 = getelementptr inbounds float, float* %tmp1320, i64 1
+ %tmp1322 = getelementptr inbounds float, float* %tmp1321, i64 1
+ %tmp1323 = getelementptr inbounds float, float* %tmp1322, i64 1
+ %tmp1324 = getelementptr inbounds float, float* %tmp1323, i64 1
+ %tmp1325 = getelementptr inbounds float, float* %tmp1324, i64 1
+ %tmp1326 = getelementptr inbounds float, float* %tmp1325, i64 1
+ %tmp1327 = getelementptr inbounds float, float* %tmp1326, i64 1
+ %tmp1328 = getelementptr inbounds float, float* %tmp1327, i64 1
+ %tmp1329 = getelementptr inbounds float, float* %tmp1328, i64 1
+ %tmp1330 = getelementptr inbounds float, float* %tmp1329, i64 1
+ %tmp1331 = getelementptr inbounds float, float* %tmp1330, i64 1
+ %tmp1332 = getelementptr inbounds float, float* %tmp1331, i64 1
+ %tmp1333 = getelementptr inbounds float, float* %tmp1332, i64 1
+ %tmp1334 = getelementptr inbounds float, float* %tmp1333, i64 1
+ %tmp1335 = getelementptr inbounds float, float* %tmp1334, i64 1
+ %tmp1336 = getelementptr inbounds float, float* %tmp1335, i64 1
+ %tmp1337 = getelementptr inbounds float, float* %tmp1336, i64 1
+ %tmp1338 = getelementptr inbounds float, float* %tmp1337, i64 1
+ %tmp1339 = getelementptr inbounds float, float* %tmp1338, i64 1
+ %tmp1340 = getelementptr inbounds float, float* %tmp1339, i64 1
+ %tmp1341 = getelementptr inbounds float, float* %tmp1340, i64 1
+ %tmp1342 = getelementptr inbounds float, float* %tmp1341, i64 1
+ %tmp1343 = getelementptr inbounds float, float* %tmp1342, i64 1
+ %tmp1344 = getelementptr inbounds float, float* %tmp1343, i64 1
+ %tmp1345 = getelementptr inbounds float, float* %tmp1344, i64 1
+ %tmp1346 = getelementptr inbounds float, float* %tmp1345, i64 1
+ %tmp1347 = getelementptr inbounds float, float* %tmp1346, i64 1
+ %tmp1348 = getelementptr inbounds float, float* %tmp1347, i64 1
+ %tmp1349 = getelementptr inbounds float, float* %tmp1348, i64 1
+ %tmp1350 = getelementptr inbounds float, float* %tmp1349, i64 1
+ %tmp1351 = getelementptr inbounds float, float* %tmp1350, i64 1
+ %tmp1352 = getelementptr inbounds float, float* %tmp1351, i64 1
+ %tmp1353 = getelementptr inbounds float, float* %tmp1352, i64 1
+ %tmp1354 = getelementptr inbounds float, float* %tmp1353, i64 1
+ %tmp1355 = getelementptr inbounds float, float* %tmp1354, i64 1
+ %tmp1356 = getelementptr inbounds float, float* %tmp1355, i64 1
+ %tmp1357 = getelementptr inbounds float, float* %tmp1356, i64 1
+ %tmp1358 = getelementptr inbounds float, float* %tmp1357, i64 1
+ %tmp1359 = getelementptr inbounds float, float* %tmp1358, i64 1
+ %tmp1360 = getelementptr inbounds float, float* %tmp1359, i64 1
+ %tmp1361 = getelementptr inbounds float, float* %tmp1360, i64 1
+ %tmp1362 = getelementptr inbounds float, float* %tmp1361, i64 1
+ %tmp1363 = getelementptr inbounds float, float* %tmp1362, i64 1
+ %tmp1364 = getelementptr inbounds float, float* %tmp1363, i64 1
+ %tmp1365 = getelementptr inbounds float, float* %tmp1364, i64 1
+ %tmp1366 = getelementptr inbounds float, float* %tmp1365, i64 1
+ %tmp1367 = getelementptr inbounds float, float* %tmp1366, i64 1
+ %tmp1368 = getelementptr inbounds float, float* %tmp1367, i64 1
+ %tmp1369 = getelementptr inbounds float, float* %tmp1368, i64 1
+ %tmp1370 = getelementptr inbounds float, float* %tmp1369, i64 1
+ %tmp1371 = getelementptr inbounds float, float* %tmp1370, i64 1
+ %tmp1372 = getelementptr inbounds float, float* %tmp1371, i64 1
+ %tmp1373 = getelementptr inbounds float, float* %tmp1372, i64 1
+ %tmp1374 = getelementptr inbounds float, float* %tmp1373, i64 1
+ %tmp1375 = getelementptr inbounds float, float* %tmp1374, i64 1
+ %tmp1376 = getelementptr inbounds float, float* %tmp1375, i64 1
+ %tmp1377 = getelementptr inbounds float, float* %tmp1376, i64 1
+ %tmp1378 = getelementptr inbounds float, float* %tmp1377, i64 1
+ %tmp1379 = getelementptr inbounds float, float* %tmp1378, i64 1
+ %tmp1380 = getelementptr inbounds float, float* %tmp1379, i64 1
+ %tmp1381 = getelementptr inbounds float, float* %tmp1380, i64 1
+ %tmp1382 = getelementptr inbounds float, float* %tmp1381, i64 1
+ %tmp1383 = getelementptr inbounds float, float* %tmp1382, i64 1
+ %tmp1384 = getelementptr inbounds float, float* %tmp1383, i64 1
+ %tmp1385 = getelementptr inbounds float, float* %tmp1384, i64 1
+ %tmp1386 = getelementptr inbounds float, float* %tmp1385, i64 1
+ %tmp1387 = getelementptr inbounds float, float* %tmp1386, i64 1
+ %tmp1388 = getelementptr inbounds float, float* %tmp1387, i64 1
+ %tmp1389 = getelementptr inbounds float, float* %tmp1388, i64 1
+ %tmp1390 = getelementptr inbounds float, float* %tmp1389, i64 1
+ %tmp1391 = getelementptr inbounds float, float* %tmp1390, i64 1
+ %tmp1392 = getelementptr inbounds float, float* %tmp1391, i64 1
+ %tmp1393 = getelementptr inbounds float, float* %tmp1392, i64 1
+ %tmp1394 = getelementptr inbounds float, float* %tmp1393, i64 1
+ %tmp1395 = getelementptr inbounds float, float* %tmp1394, i64 1
+ %tmp1396 = getelementptr inbounds float, float* %tmp1395, i64 1
+ %tmp1397 = getelementptr inbounds float, float* %tmp1396, i64 1
+ %tmp1398 = getelementptr inbounds float, float* %tmp1397, i64 1
+ %tmp1399 = getelementptr inbounds float, float* %tmp1398, i64 1
+ %tmp1400 = getelementptr inbounds float, float* %tmp1399, i64 1
+ %tmp1401 = getelementptr inbounds float, float* %tmp1400, i64 1
+ %tmp1402 = getelementptr inbounds float, float* %tmp1401, i64 1
+ %tmp1403 = getelementptr inbounds float, float* %tmp1402, i64 1
+ %tmp1404 = getelementptr inbounds float, float* %tmp1403, i64 1
+ %tmp1405 = getelementptr inbounds float, float* %tmp1404, i64 1
+ %tmp1406 = getelementptr inbounds float, float* %tmp1405, i64 1
+ %tmp1407 = getelementptr inbounds float, float* %tmp1406, i64 1
+ %tmp1408 = getelementptr inbounds float, float* %tmp1407, i64 1
+ %tmp1409 = getelementptr inbounds float, float* %tmp1408, i64 1
+ %tmp1410 = getelementptr inbounds float, float* %tmp1409, i64 1
+ %tmp1411 = getelementptr inbounds float, float* %tmp1410, i64 1
+ %tmp1412 = getelementptr inbounds float, float* %tmp1411, i64 1
+ %tmp1413 = getelementptr inbounds float, float* %tmp1412, i64 1
+ %tmp1414 = getelementptr inbounds float, float* %tmp1413, i64 1
+ %tmp1415 = getelementptr inbounds float, float* %tmp1414, i64 1
+ %tmp1416 = getelementptr inbounds float, float* %tmp1415, i64 1
+ %tmp1417 = getelementptr inbounds float, float* %tmp1416, i64 1
+ %tmp1418 = getelementptr inbounds float, float* %tmp1417, i64 1
+ %tmp1419 = getelementptr inbounds float, float* %tmp1418, i64 1
+ %tmp1420 = getelementptr inbounds float, float* %tmp1419, i64 1
+ %tmp1421 = getelementptr inbounds float, float* %tmp1420, i64 1
+ %tmp1422 = getelementptr inbounds float, float* %tmp1421, i64 1
+ %tmp1423 = getelementptr inbounds float, float* %tmp1422, i64 1
+ %tmp1424 = getelementptr inbounds float, float* %tmp1423, i64 1
+ %tmp1425 = getelementptr inbounds float, float* %tmp1424, i64 1
+ %tmp1426 = getelementptr inbounds float, float* %tmp1425, i64 1
+ %tmp1427 = getelementptr inbounds float, float* %tmp1426, i64 1
+ %tmp1428 = getelementptr inbounds float, float* %tmp1427, i64 1
+ %tmp1429 = getelementptr inbounds float, float* %tmp1428, i64 1
+ %tmp1430 = getelementptr inbounds float, float* %tmp1429, i64 1
+ %tmp1431 = getelementptr inbounds float, float* %tmp1430, i64 1
+ %tmp1432 = getelementptr inbounds float, float* %tmp1431, i64 1
+ %tmp1433 = getelementptr inbounds float, float* %tmp1432, i64 1
+ %tmp1434 = getelementptr inbounds float, float* %tmp1433, i64 1
+ %tmp1435 = getelementptr inbounds float, float* %tmp1434, i64 1
+ %tmp1436 = getelementptr inbounds float, float* %tmp1435, i64 1
+ %tmp1437 = getelementptr inbounds float, float* %tmp1436, i64 1
+ %tmp1438 = getelementptr inbounds float, float* %tmp1437, i64 1
+ %tmp1439 = getelementptr inbounds float, float* %tmp1438, i64 1
+ %tmp1440 = getelementptr inbounds float, float* %tmp1439, i64 1
+ %tmp1441 = getelementptr inbounds float, float* %tmp1440, i64 1
+ %tmp1442 = getelementptr inbounds float, float* %tmp1441, i64 1
+ %tmp1443 = getelementptr inbounds float, float* %tmp1442, i64 1
+ %tmp1444 = getelementptr inbounds float, float* %tmp1443, i64 1
+ %tmp1445 = getelementptr inbounds float, float* %tmp1444, i64 1
+ %tmp1446 = getelementptr inbounds float, float* %tmp1445, i64 1
+ %tmp1447 = getelementptr inbounds float, float* %tmp1446, i64 1
+ %tmp1448 = getelementptr inbounds float, float* %tmp1447, i64 1
+ %tmp1449 = getelementptr inbounds float, float* %tmp1448, i64 1
+ %tmp1450 = getelementptr inbounds float, float* %tmp1449, i64 1
+ %tmp1451 = getelementptr inbounds float, float* %tmp1450, i64 1
+ %tmp1452 = getelementptr inbounds float, float* %tmp1451, i64 1
+ %tmp1453 = getelementptr inbounds float, float* %tmp1452, i64 1
+ %tmp1454 = getelementptr inbounds float, float* %tmp1453, i64 1
+ %tmp1455 = getelementptr inbounds float, float* %tmp1454, i64 1
+ %tmp1456 = getelementptr inbounds float, float* %tmp1455, i64 1
+ %tmp1457 = getelementptr inbounds float, float* %tmp1456, i64 1
+ %tmp1458 = getelementptr inbounds float, float* %tmp1457, i64 1
+ %tmp1459 = getelementptr inbounds float, float* %tmp1458, i64 1
+ %tmp1460 = getelementptr inbounds float, float* %tmp1459, i64 1
+ %tmp1461 = getelementptr inbounds float, float* %tmp1460, i64 1
+ %tmp1462 = getelementptr inbounds float, float* %tmp1461, i64 1
+ %tmp1463 = getelementptr inbounds float, float* %tmp1462, i64 1
+ %tmp1464 = getelementptr inbounds float, float* %tmp1463, i64 1
+ %tmp1465 = getelementptr inbounds float, float* %tmp1464, i64 1
+ %tmp1466 = getelementptr inbounds float, float* %tmp1465, i64 1
+ %tmp1467 = getelementptr inbounds float, float* %tmp1466, i64 1
+ %tmp1468 = getelementptr inbounds float, float* %tmp1467, i64 1
+ %tmp1469 = getelementptr inbounds float, float* %tmp1468, i64 1
+ %tmp1470 = getelementptr inbounds float, float* %tmp1469, i64 1
+ %tmp1471 = getelementptr inbounds float, float* %tmp1470, i64 1
+ %tmp1472 = getelementptr inbounds float, float* %tmp1471, i64 1
+ %tmp1473 = getelementptr inbounds float, float* %tmp1472, i64 1
+ %tmp1474 = getelementptr inbounds float, float* %tmp1473, i64 1
+ %tmp1475 = getelementptr inbounds float, float* %tmp1474, i64 1
+ %tmp1476 = getelementptr inbounds float, float* %tmp1475, i64 1
+ %tmp1477 = getelementptr inbounds float, float* %tmp1476, i64 1
+ %tmp1478 = getelementptr inbounds float, float* %tmp1477, i64 1
+ %tmp1479 = getelementptr inbounds float, float* %tmp1478, i64 1
+ %tmp1480 = getelementptr inbounds float, float* %tmp1479, i64 1
+ %tmp1481 = getelementptr inbounds float, float* %tmp1480, i64 1
+ %tmp1482 = getelementptr inbounds float, float* %tmp1481, i64 1
+ %tmp1483 = getelementptr inbounds float, float* %tmp1482, i64 1
+ %tmp1484 = getelementptr inbounds float, float* %tmp1483, i64 1
+ %tmp1485 = getelementptr inbounds float, float* %tmp1484, i64 1
+ %tmp1486 = getelementptr inbounds float, float* %tmp1485, i64 1
+ %tmp1487 = getelementptr inbounds float, float* %tmp1486, i64 1
+ %tmp1488 = getelementptr inbounds float, float* %tmp1487, i64 1
+ %tmp1489 = getelementptr inbounds float, float* %tmp1488, i64 1
+ %tmp1490 = getelementptr inbounds float, float* %tmp1489, i64 1
+ %tmp1491 = getelementptr inbounds float, float* %tmp1490, i64 1
+ %tmp1492 = getelementptr inbounds float, float* %tmp1491, i64 1
+ %tmp1493 = getelementptr inbounds float, float* %tmp1492, i64 1
+ %tmp1494 = getelementptr inbounds float, float* %tmp1493, i64 1
+ %tmp1495 = getelementptr inbounds float, float* %tmp1494, i64 1
+ %tmp1496 = getelementptr inbounds float, float* %tmp1495, i64 1
+ %tmp1497 = getelementptr inbounds float, float* %tmp1496, i64 1
+ %tmp1498 = getelementptr inbounds float, float* %tmp1497, i64 1
+ %tmp1499 = getelementptr inbounds float, float* %tmp1498, i64 1
+ %tmp1500 = getelementptr inbounds float, float* %tmp1499, i64 1
+ %tmp1501 = getelementptr inbounds float, float* %tmp1500, i64 1
+ %tmp1502 = getelementptr inbounds float, float* %tmp1501, i64 1
+ %tmp1503 = getelementptr inbounds float, float* %tmp1502, i64 1
+ %tmp1504 = getelementptr inbounds float, float* %tmp1503, i64 1
+ %tmp1505 = getelementptr inbounds float, float* %tmp1504, i64 1
+ %tmp1506 = getelementptr inbounds float, float* %tmp1505, i64 1
+ %tmp1507 = getelementptr inbounds float, float* %tmp1506, i64 1
+ %tmp1508 = getelementptr inbounds float, float* %tmp1507, i64 1
+ %tmp1509 = getelementptr inbounds float, float* %tmp1508, i64 1
+ %tmp1510 = getelementptr inbounds float, float* %tmp1509, i64 1
+ %tmp1511 = getelementptr inbounds float, float* %tmp1510, i64 1
+ %tmp1512 = getelementptr inbounds float, float* %tmp1511, i64 1
+ %tmp1513 = getelementptr inbounds float, float* %tmp1512, i64 1
+ %tmp1514 = getelementptr inbounds float, float* %tmp1513, i64 1
+ %tmp1515 = getelementptr inbounds float, float* %tmp1514, i64 1
+ %tmp1516 = getelementptr inbounds float, float* %tmp1515, i64 1
+ %tmp1517 = getelementptr inbounds float, float* %tmp1516, i64 1
+ %tmp1518 = getelementptr inbounds float, float* %tmp1517, i64 1
+ %tmp1519 = getelementptr inbounds float, float* %tmp1518, i64 1
+ %tmp1520 = getelementptr inbounds float, float* %tmp1519, i64 1
+ %tmp1521 = getelementptr inbounds float, float* %tmp1520, i64 1
+ %tmp1522 = getelementptr inbounds float, float* %tmp1521, i64 1
+ %tmp1523 = getelementptr inbounds float, float* %tmp1522, i64 1
+ %tmp1524 = getelementptr inbounds float, float* %tmp1523, i64 1
+ %tmp1525 = getelementptr inbounds float, float* %tmp1524, i64 1
+ %tmp1526 = getelementptr inbounds float, float* %tmp1525, i64 1
+ %tmp1527 = getelementptr inbounds float, float* %tmp1526, i64 1
+ %tmp1528 = getelementptr inbounds float, float* %tmp1527, i64 1
+ %tmp1529 = getelementptr inbounds float, float* %tmp1528, i64 1
+ %tmp1530 = getelementptr inbounds float, float* %tmp1529, i64 1
+ %tmp1531 = getelementptr inbounds float, float* %tmp1530, i64 1
+ %tmp1532 = getelementptr inbounds float, float* %tmp1531, i64 1
+ %tmp1533 = getelementptr inbounds float, float* %tmp1532, i64 1
+ %tmp1534 = getelementptr inbounds float, float* %tmp1533, i64 1
+ %tmp1535 = getelementptr inbounds float, float* %tmp1534, i64 1
+ %tmp1536 = getelementptr inbounds float, float* %tmp1535, i64 1
+ %tmp1537 = getelementptr inbounds float, float* %tmp1536, i64 1
+ %tmp1538 = getelementptr inbounds float, float* %tmp1537, i64 1
+ %tmp1539 = getelementptr inbounds float, float* %tmp1538, i64 1
+ %tmp1540 = getelementptr inbounds float, float* %tmp1539, i64 1
+ %tmp1541 = getelementptr inbounds float, float* %tmp1540, i64 1
+ %tmp1542 = getelementptr inbounds float, float* %tmp1541, i64 1
+ %tmp1543 = getelementptr inbounds float, float* %tmp1542, i64 1
+ %tmp1544 = getelementptr inbounds float, float* %tmp1543, i64 1
+ %tmp1545 = getelementptr inbounds float, float* %tmp1544, i64 1
+ %tmp1546 = getelementptr inbounds float, float* %tmp1545, i64 1
+ %tmp1547 = getelementptr inbounds float, float* %tmp1546, i64 1
+ %tmp1548 = getelementptr inbounds float, float* %tmp1547, i64 1
+ %tmp1549 = getelementptr inbounds float, float* %tmp1548, i64 1
+ %tmp1550 = getelementptr inbounds float, float* %tmp1549, i64 1
+ %tmp1551 = getelementptr inbounds float, float* %tmp1550, i64 1
+ %tmp1552 = getelementptr inbounds float, float* %tmp1551, i64 1
+ %tmp1553 = getelementptr inbounds float, float* %tmp1552, i64 1
+ %tmp1554 = getelementptr inbounds float, float* %tmp1553, i64 1
+ %tmp1555 = getelementptr inbounds float, float* %tmp1554, i64 1
+ %tmp1556 = getelementptr inbounds float, float* %tmp1555, i64 1
+ %tmp1557 = getelementptr inbounds float, float* %tmp1556, i64 1
+ %tmp1558 = getelementptr inbounds float, float* %tmp1557, i64 1
+ %tmp1559 = getelementptr inbounds float, float* %tmp1558, i64 1
+ %tmp1560 = getelementptr inbounds float, float* %tmp1559, i64 1
+ %tmp1561 = getelementptr inbounds float, float* %tmp1560, i64 1
+ %tmp1562 = getelementptr inbounds float, float* %tmp1561, i64 1
+ %tmp1563 = getelementptr inbounds float, float* %tmp1562, i64 1
+ %tmp1564 = getelementptr inbounds float, float* %tmp1563, i64 1
+ %tmp1565 = getelementptr inbounds float, float* %tmp1564, i64 1
+ %tmp1566 = getelementptr inbounds float, float* %tmp1565, i64 1
+ %tmp1567 = getelementptr inbounds float, float* %tmp1566, i64 1
+ %tmp1568 = getelementptr inbounds float, float* %tmp1567, i64 1
+ %tmp1569 = getelementptr inbounds float, float* %tmp1568, i64 1
+ %tmp1570 = getelementptr inbounds float, float* %tmp1569, i64 1
+ %tmp1571 = getelementptr inbounds float, float* %tmp1570, i64 1
+ %tmp1572 = getelementptr inbounds float, float* %tmp1571, i64 1
+ %tmp1573 = getelementptr inbounds float, float* %tmp1572, i64 1
+ %tmp1574 = getelementptr inbounds float, float* %tmp1573, i64 1
+ %tmp1575 = getelementptr inbounds float, float* %tmp1574, i64 1
+ %tmp1576 = getelementptr inbounds float, float* %tmp1575, i64 1
+ %tmp1577 = getelementptr inbounds float, float* %tmp1576, i64 1
+ %tmp1578 = getelementptr inbounds float, float* %tmp1577, i64 1
+ %tmp1579 = getelementptr inbounds float, float* %tmp1578, i64 1
+ %tmp1580 = getelementptr inbounds float, float* %tmp1579, i64 1
+ %tmp1581 = getelementptr inbounds float, float* %tmp1580, i64 1
+ %tmp1582 = getelementptr inbounds float, float* %tmp1581, i64 1
+ %tmp1583 = getelementptr inbounds float, float* %tmp1582, i64 1
+ %tmp1584 = getelementptr inbounds float, float* %tmp1583, i64 1
+ %tmp1585 = getelementptr inbounds float, float* %tmp1584, i64 1
+ %tmp1586 = getelementptr inbounds float, float* %tmp1585, i64 1
+ %tmp1587 = getelementptr inbounds float, float* %tmp1586, i64 1
+ %tmp1588 = getelementptr inbounds float, float* %tmp1587, i64 1
+ %tmp1589 = getelementptr inbounds float, float* %tmp1588, i64 1
+ %tmp1590 = getelementptr inbounds float, float* %tmp1589, i64 1
+ %tmp1591 = getelementptr inbounds float, float* %tmp1590, i64 1
+ %tmp1592 = getelementptr inbounds float, float* %tmp1591, i64 1
+ %tmp1593 = getelementptr inbounds float, float* %tmp1592, i64 1
+ %tmp1594 = getelementptr inbounds float, float* %tmp1593, i64 1
+ %tmp1595 = getelementptr inbounds float, float* %tmp1594, i64 1
+ %tmp1596 = getelementptr inbounds float, float* %tmp1595, i64 1
+ %tmp1597 = getelementptr inbounds float, float* %tmp1596, i64 1
+ %tmp1598 = getelementptr inbounds float, float* %tmp1597, i64 1
+ %tmp1599 = getelementptr inbounds float, float* %tmp1598, i64 1
+ %tmp1600 = getelementptr inbounds float, float* %tmp1599, i64 1
+ %tmp1601 = getelementptr inbounds float, float* %tmp1600, i64 1
+ %tmp1602 = getelementptr inbounds float, float* %tmp1601, i64 1
+ %tmp1603 = getelementptr inbounds float, float* %tmp1602, i64 1
+ %tmp1604 = getelementptr inbounds float, float* %tmp1603, i64 1
+ %tmp1605 = getelementptr inbounds float, float* %tmp1604, i64 1
+ %tmp1606 = getelementptr inbounds float, float* %tmp1605, i64 1
+ %tmp1607 = getelementptr inbounds float, float* %tmp1606, i64 1
+ %tmp1608 = getelementptr inbounds float, float* %tmp1607, i64 1
+ %tmp1609 = getelementptr inbounds float, float* %tmp1608, i64 1
+ %tmp1610 = getelementptr inbounds float, float* %tmp1609, i64 1
+ %tmp1611 = getelementptr inbounds float, float* %tmp1610, i64 1
+ %tmp1612 = getelementptr inbounds float, float* %tmp1611, i64 1
+ %tmp1613 = getelementptr inbounds float, float* %tmp1612, i64 1
+ %tmp1614 = getelementptr inbounds float, float* %tmp1613, i64 1
+ %tmp1615 = getelementptr inbounds float, float* %tmp1614, i64 1
+ %tmp1616 = getelementptr inbounds float, float* %tmp1615, i64 1
+ %tmp1617 = getelementptr inbounds float, float* %tmp1616, i64 1
+ %tmp1618 = getelementptr inbounds float, float* %tmp1617, i64 1
+ %tmp1619 = getelementptr inbounds float, float* %tmp1618, i64 1
+ %tmp1620 = getelementptr inbounds float, float* %tmp1619, i64 1
+ %tmp1621 = getelementptr inbounds float, float* %tmp1620, i64 1
+ %tmp1622 = getelementptr inbounds float, float* %tmp1621, i64 1
+ %tmp1623 = getelementptr inbounds float, float* %tmp1622, i64 1
+ %tmp1624 = getelementptr inbounds float, float* %tmp1623, i64 1
+ %tmp1625 = getelementptr inbounds float, float* %tmp1624, i64 1
+ %tmp1626 = getelementptr inbounds float, float* %tmp1625, i64 1
+ %tmp1627 = getelementptr inbounds float, float* %tmp1626, i64 1
+ %tmp1628 = getelementptr inbounds float, float* %tmp1627, i64 1
+ %tmp1629 = getelementptr inbounds float, float* %tmp1628, i64 1
+ %tmp1630 = getelementptr inbounds float, float* %tmp1629, i64 1
+ %tmp1631 = getelementptr inbounds float, float* %tmp1630, i64 1
+ %tmp1632 = getelementptr inbounds float, float* %tmp1631, i64 1
+ %tmp1633 = getelementptr inbounds float, float* %tmp1632, i64 1
+ %tmp1634 = getelementptr inbounds float, float* %tmp1633, i64 1
+ %tmp1635 = getelementptr inbounds float, float* %tmp1634, i64 1
+ %tmp1636 = getelementptr inbounds float, float* %tmp1635, i64 1
+ %tmp1637 = getelementptr inbounds float, float* %tmp1636, i64 1
+ %tmp1638 = getelementptr inbounds float, float* %tmp1637, i64 1
+ %tmp1639 = getelementptr inbounds float, float* %tmp1638, i64 1
+ %tmp1640 = getelementptr inbounds float, float* %tmp1639, i64 1
+ %tmp1641 = getelementptr inbounds float, float* %tmp1640, i64 1
+ %tmp1642 = getelementptr inbounds float, float* %tmp1641, i64 1
+ %tmp1643 = getelementptr inbounds float, float* %tmp1642, i64 1
+ %tmp1644 = getelementptr inbounds float, float* %tmp1643, i64 1
+ %tmp1645 = getelementptr inbounds float, float* %tmp1644, i64 1
+ %tmp1646 = getelementptr inbounds float, float* %tmp1645, i64 1
+ %tmp1647 = getelementptr inbounds float, float* %tmp1646, i64 1
+ %tmp1648 = getelementptr inbounds float, float* %tmp1647, i64 1
+ %tmp1649 = getelementptr inbounds float, float* %tmp1648, i64 1
+ %tmp1650 = getelementptr inbounds float, float* %tmp1649, i64 1
+ %tmp1651 = getelementptr inbounds float, float* %tmp1650, i64 1
+ %tmp1652 = getelementptr inbounds float, float* %tmp1651, i64 1
+ %tmp1653 = getelementptr inbounds float, float* %tmp1652, i64 1
+ %tmp1654 = getelementptr inbounds float, float* %tmp1653, i64 1
+ %tmp1655 = getelementptr inbounds float, float* %tmp1654, i64 1
+ %tmp1656 = getelementptr inbounds float, float* %tmp1655, i64 1
+ %tmp1657 = getelementptr inbounds float, float* %tmp1656, i64 1
+ %tmp1658 = getelementptr inbounds float, float* %tmp1657, i64 1
+ %tmp1659 = getelementptr inbounds float, float* %tmp1658, i64 1
+ %tmp1660 = getelementptr inbounds float, float* %tmp1659, i64 1
+ %tmp1661 = getelementptr inbounds float, float* %tmp1660, i64 1
+ %tmp1662 = getelementptr inbounds float, float* %tmp1661, i64 1
+ %tmp1663 = getelementptr inbounds float, float* %tmp1662, i64 1
+ %tmp1664 = getelementptr inbounds float, float* %tmp1663, i64 1
+ %tmp1665 = getelementptr inbounds float, float* %tmp1664, i64 1
+ %tmp1666 = getelementptr inbounds float, float* %tmp1665, i64 1
+ %tmp1667 = getelementptr inbounds float, float* %tmp1666, i64 1
+ %tmp1668 = getelementptr inbounds float, float* %tmp1667, i64 1
+ %tmp1669 = getelementptr inbounds float, float* %tmp1668, i64 1
+ %tmp1670 = getelementptr inbounds float, float* %tmp1669, i64 1
+ %tmp1671 = getelementptr inbounds float, float* %tmp1670, i64 1
+ %tmp1672 = getelementptr inbounds float, float* %tmp1671, i64 1
+ %tmp1673 = getelementptr inbounds float, float* %tmp1672, i64 1
+ %tmp1674 = getelementptr inbounds float, float* %tmp1673, i64 1
+ %tmp1675 = getelementptr inbounds float, float* %tmp1674, i64 1
+ %tmp1676 = getelementptr inbounds float, float* %tmp1675, i64 1
+ %tmp1677 = getelementptr inbounds float, float* %tmp1676, i64 1
+ %tmp1678 = getelementptr inbounds float, float* %tmp1677, i64 1
+ %tmp1679 = getelementptr inbounds float, float* %tmp1678, i64 1
+ %tmp1680 = getelementptr inbounds float, float* %tmp1679, i64 1
+ %tmp1681 = getelementptr inbounds float, float* %tmp1680, i64 1
+ %tmp1682 = getelementptr inbounds float, float* %tmp1681, i64 1
+ %tmp1683 = getelementptr inbounds float, float* %tmp1682, i64 1
+ %tmp1684 = getelementptr inbounds float, float* %tmp1683, i64 1
+ %tmp1685 = getelementptr inbounds float, float* %tmp1684, i64 1
+ %tmp1686 = getelementptr inbounds float, float* %tmp1685, i64 1
+ %tmp1687 = getelementptr inbounds float, float* %tmp1686, i64 1
+ %tmp1688 = getelementptr inbounds float, float* %tmp1687, i64 1
+ %tmp1689 = getelementptr inbounds float, float* %tmp1688, i64 1
+ %tmp1690 = getelementptr inbounds float, float* %tmp1689, i64 1
+ %tmp1691 = getelementptr inbounds float, float* %tmp1690, i64 1
+ %tmp1692 = getelementptr inbounds float, float* %tmp1691, i64 1
+ %tmp1693 = getelementptr inbounds float, float* %tmp1692, i64 1
+ %tmp1694 = getelementptr inbounds float, float* %tmp1693, i64 1
+ %tmp1695 = getelementptr inbounds float, float* %tmp1694, i64 1
+ %tmp1696 = getelementptr inbounds float, float* %tmp1695, i64 1
+ %tmp1697 = getelementptr inbounds float, float* %tmp1696, i64 1
+ %tmp1698 = getelementptr inbounds float, float* %tmp1697, i64 1
+ %tmp1699 = getelementptr inbounds float, float* %tmp1698, i64 1
+ %tmp1700 = getelementptr inbounds float, float* %tmp1699, i64 1
+ %tmp1701 = getelementptr inbounds float, float* %tmp1700, i64 1
+ %tmp1702 = getelementptr inbounds float, float* %tmp1701, i64 1
+ %tmp1703 = getelementptr inbounds float, float* %tmp1702, i64 1
+ %tmp1704 = getelementptr inbounds float, float* %tmp1703, i64 1
+ %tmp1705 = getelementptr inbounds float, float* %tmp1704, i64 1
+ %tmp1706 = getelementptr inbounds float, float* %tmp1705, i64 1
+ %tmp1707 = getelementptr inbounds float, float* %tmp1706, i64 1
+ %tmp1708 = getelementptr inbounds float, float* %tmp1707, i64 1
+ %tmp1709 = getelementptr inbounds float, float* %tmp1708, i64 1
+ %tmp1710 = getelementptr inbounds float, float* %tmp1709, i64 1
+ %tmp1711 = getelementptr inbounds float, float* %tmp1710, i64 1
+ %tmp1712 = getelementptr inbounds float, float* %tmp1711, i64 1
+ %tmp1713 = getelementptr inbounds float, float* %tmp1712, i64 1
+ %tmp1714 = getelementptr inbounds float, float* %tmp1713, i64 1
+ %tmp1715 = getelementptr inbounds float, float* %tmp1714, i64 1
+ %tmp1716 = getelementptr inbounds float, float* %tmp1715, i64 1
+ %tmp1717 = getelementptr inbounds float, float* %tmp1716, i64 1
+ %tmp1718 = getelementptr inbounds float, float* %tmp1717, i64 1
+ %tmp1719 = getelementptr inbounds float, float* %tmp1718, i64 1
+ %tmp1720 = getelementptr inbounds float, float* %tmp1719, i64 1
+ %tmp1721 = getelementptr inbounds float, float* %tmp1720, i64 1
+ %tmp1722 = getelementptr inbounds float, float* %tmp1721, i64 1
+ %tmp1723 = getelementptr inbounds float, float* %tmp1722, i64 1
+ %tmp1724 = getelementptr inbounds float, float* %tmp1723, i64 1
+ %tmp1725 = getelementptr inbounds float, float* %tmp1724, i64 1
+ %tmp1726 = getelementptr inbounds float, float* %tmp1725, i64 1
+ %tmp1727 = getelementptr inbounds float, float* %tmp1726, i64 1
+ %tmp1728 = getelementptr inbounds float, float* %tmp1727, i64 1
+ %tmp1729 = getelementptr inbounds float, float* %tmp1728, i64 1
+ %tmp1730 = getelementptr inbounds float, float* %tmp1729, i64 1
+ %tmp1731 = getelementptr inbounds float, float* %tmp1730, i64 1
+ %tmp1732 = getelementptr inbounds float, float* %tmp1731, i64 1
+ %tmp1733 = getelementptr inbounds float, float* %tmp1732, i64 1
+ %tmp1734 = getelementptr inbounds float, float* %tmp1733, i64 1
+ %tmp1735 = getelementptr inbounds float, float* %tmp1734, i64 1
+ %tmp1736 = getelementptr inbounds float, float* %tmp1735, i64 1
+ %tmp1737 = getelementptr inbounds float, float* %tmp1736, i64 1
+ %tmp1738 = getelementptr inbounds float, float* %tmp1737, i64 1
+ %tmp1739 = getelementptr inbounds float, float* %tmp1738, i64 1
+ %tmp1740 = getelementptr inbounds float, float* %tmp1739, i64 1
+ %tmp1741 = getelementptr inbounds float, float* %tmp1740, i64 1
+ %tmp1742 = getelementptr inbounds float, float* %tmp1741, i64 1
+ %tmp1743 = getelementptr inbounds float, float* %tmp1742, i64 1
+ %tmp1744 = getelementptr inbounds float, float* %tmp1743, i64 1
+ %tmp1745 = getelementptr inbounds float, float* %tmp1744, i64 1
+ %tmp1746 = getelementptr inbounds float, float* %tmp1745, i64 1
+ %tmp1747 = getelementptr inbounds float, float* %tmp1746, i64 1
+ %tmp1748 = getelementptr inbounds float, float* %tmp1747, i64 1
+ %tmp1749 = getelementptr inbounds float, float* %tmp1748, i64 1
+ %tmp1750 = getelementptr inbounds float, float* %tmp1749, i64 1
+ %tmp1751 = getelementptr inbounds float, float* %tmp1750, i64 1
+ %tmp1752 = getelementptr inbounds float, float* %tmp1751, i64 1
+ %tmp1753 = getelementptr inbounds float, float* %tmp1752, i64 1
+ %tmp1754 = getelementptr inbounds float, float* %tmp1753, i64 1
+ %tmp1755 = getelementptr inbounds float, float* %tmp1754, i64 1
+ %tmp1756 = getelementptr inbounds float, float* %tmp1755, i64 1
+ %tmp1757 = getelementptr inbounds float, float* %tmp1756, i64 1
+ %tmp1758 = getelementptr inbounds float, float* %tmp1757, i64 1
+ %tmp1759 = getelementptr inbounds float, float* %tmp1758, i64 1
+ %tmp1760 = getelementptr inbounds float, float* %tmp1759, i64 1
+ %tmp1761 = getelementptr inbounds float, float* %tmp1760, i64 1
+ %tmp1762 = getelementptr inbounds float, float* %tmp1761, i64 1
+ %tmp1763 = getelementptr inbounds float, float* %tmp1762, i64 1
+ %tmp1764 = getelementptr inbounds float, float* %tmp1763, i64 1
+ %tmp1765 = getelementptr inbounds float, float* %tmp1764, i64 1
+ %tmp1766 = getelementptr inbounds float, float* %tmp1765, i64 1
+ %tmp1767 = getelementptr inbounds float, float* %tmp1766, i64 1
+ %tmp1768 = getelementptr inbounds float, float* %tmp1767, i64 1
+ %tmp1769 = getelementptr inbounds float, float* %tmp1768, i64 1
+ %tmp1770 = getelementptr inbounds float, float* %tmp1769, i64 1
+ %tmp1771 = getelementptr inbounds float, float* %tmp1770, i64 1
+ %tmp1772 = getelementptr inbounds float, float* %tmp1771, i64 1
+ %tmp1773 = getelementptr inbounds float, float* %tmp1772, i64 1
+ %tmp1774 = getelementptr inbounds float, float* %tmp1773, i64 1
+ %tmp1775 = getelementptr inbounds float, float* %tmp1774, i64 1
+ %tmp1776 = getelementptr inbounds float, float* %tmp1775, i64 1
+ %tmp1777 = getelementptr inbounds float, float* %tmp1776, i64 1
+ %tmp1778 = getelementptr inbounds float, float* %tmp1777, i64 1
+ %tmp1779 = getelementptr inbounds float, float* %tmp1778, i64 1
+ %tmp1780 = getelementptr inbounds float, float* %tmp1779, i64 1
+ %tmp1781 = getelementptr inbounds float, float* %tmp1780, i64 1
+ %tmp1782 = getelementptr inbounds float, float* %tmp1781, i64 1
+ %tmp1783 = getelementptr inbounds float, float* %tmp1782, i64 1
+ %tmp1784 = getelementptr inbounds float, float* %tmp1783, i64 1
+ %tmp1785 = getelementptr inbounds float, float* %tmp1784, i64 1
+ %tmp1786 = getelementptr inbounds float, float* %tmp1785, i64 1
+ %tmp1787 = getelementptr inbounds float, float* %tmp1786, i64 1
+ %tmp1788 = getelementptr inbounds float, float* %tmp1787, i64 1
+ %tmp1789 = getelementptr inbounds float, float* %tmp1788, i64 1
+ %tmp1790 = getelementptr inbounds float, float* %tmp1789, i64 1
+ %tmp1791 = getelementptr inbounds float, float* %tmp1790, i64 1
+ %tmp1792 = getelementptr inbounds float, float* %tmp1791, i64 1
+ %tmp1793 = getelementptr inbounds float, float* %tmp1792, i64 1
+ %tmp1794 = getelementptr inbounds float, float* %tmp1793, i64 1
+ %tmp1795 = getelementptr inbounds float, float* %tmp1794, i64 1
+ %tmp1796 = getelementptr inbounds float, float* %tmp1795, i64 1
+ %tmp1797 = getelementptr inbounds float, float* %tmp1796, i64 1
+ %tmp1798 = getelementptr inbounds float, float* %tmp1797, i64 1
+ %tmp1799 = getelementptr inbounds float, float* %tmp1798, i64 1
+ %tmp1800 = getelementptr inbounds float, float* %tmp1799, i64 1
+ %tmp1801 = getelementptr inbounds float, float* %tmp1800, i64 1
+ %tmp1802 = getelementptr inbounds float, float* %tmp1801, i64 1
+ %tmp1803 = getelementptr inbounds float, float* %tmp1802, i64 1
+ %tmp1804 = getelementptr inbounds float, float* %tmp1803, i64 1
+ %tmp1805 = getelementptr inbounds float, float* %tmp1804, i64 1
+ %tmp1806 = getelementptr inbounds float, float* %tmp1805, i64 1
+ %tmp1807 = getelementptr inbounds float, float* %tmp1806, i64 1
+ %tmp1808 = getelementptr inbounds float, float* %tmp1807, i64 1
+ %tmp1809 = getelementptr inbounds float, float* %tmp1808, i64 1
+ %tmp1810 = getelementptr inbounds float, float* %tmp1809, i64 1
+ %tmp1811 = getelementptr inbounds float, float* %tmp1810, i64 1
+ %tmp1812 = getelementptr inbounds float, float* %tmp1811, i64 1
+ %tmp1813 = getelementptr inbounds float, float* %tmp1812, i64 1
+ %tmp1814 = getelementptr inbounds float, float* %tmp1813, i64 1
+ %tmp1815 = getelementptr inbounds float, float* %tmp1814, i64 1
+ %tmp1816 = getelementptr inbounds float, float* %tmp1815, i64 1
+ %tmp1817 = getelementptr inbounds float, float* %tmp1816, i64 1
+ %tmp1818 = getelementptr inbounds float, float* %tmp1817, i64 1
+ %tmp1819 = getelementptr inbounds float, float* %tmp1818, i64 1
+ %tmp1820 = getelementptr inbounds float, float* %tmp1819, i64 1
+ %tmp1821 = getelementptr inbounds float, float* %tmp1820, i64 1
+ %tmp1822 = getelementptr inbounds float, float* %tmp1821, i64 1
+ %tmp1823 = getelementptr inbounds float, float* %tmp1822, i64 1
+ %tmp1824 = getelementptr inbounds float, float* %tmp1823, i64 1
+ %tmp1825 = getelementptr inbounds float, float* %tmp1824, i64 1
+ %tmp1826 = getelementptr inbounds float, float* %tmp1825, i64 1
+ %tmp1827 = getelementptr inbounds float, float* %tmp1826, i64 1
+ %tmp1828 = getelementptr inbounds float, float* %tmp1827, i64 1
+ %tmp1829 = getelementptr inbounds float, float* %tmp1828, i64 1
+ %tmp1830 = getelementptr inbounds float, float* %tmp1829, i64 1
+ %tmp1831 = getelementptr inbounds float, float* %tmp1830, i64 1
+ %tmp1832 = getelementptr inbounds float, float* %tmp1831, i64 1
+ %tmp1833 = getelementptr inbounds float, float* %tmp1832, i64 1
+ %tmp1834 = getelementptr inbounds float, float* %tmp1833, i64 1
+ %tmp1835 = getelementptr inbounds float, float* %tmp1834, i64 1
+ %tmp1836 = getelementptr inbounds float, float* %tmp1835, i64 1
+ %tmp1837 = getelementptr inbounds float, float* %tmp1836, i64 1
+ %tmp1838 = getelementptr inbounds float, float* %tmp1837, i64 1
+ %tmp1839 = getelementptr inbounds float, float* %tmp1838, i64 1
+ %tmp1840 = getelementptr inbounds float, float* %tmp1839, i64 1
+ %tmp1841 = getelementptr inbounds float, float* %tmp1840, i64 1
+ %tmp1842 = getelementptr inbounds float, float* %tmp1841, i64 1
+ %tmp1843 = getelementptr inbounds float, float* %tmp1842, i64 1
+ %tmp1844 = getelementptr inbounds float, float* %tmp1843, i64 1
+ %tmp1845 = getelementptr inbounds float, float* %tmp1844, i64 1
+ %tmp1846 = getelementptr inbounds float, float* %tmp1845, i64 1
+ %tmp1847 = getelementptr inbounds float, float* %tmp1846, i64 1
+ %tmp1848 = getelementptr inbounds float, float* %tmp1847, i64 1
+ %tmp1849 = getelementptr inbounds float, float* %tmp1848, i64 1
+ %tmp1850 = getelementptr inbounds float, float* %tmp1849, i64 1
+ %tmp1851 = getelementptr inbounds float, float* %tmp1850, i64 1
+ %tmp1852 = getelementptr inbounds float, float* %tmp1851, i64 1
+ %tmp1853 = getelementptr inbounds float, float* %tmp1852, i64 1
+ %tmp1854 = getelementptr inbounds float, float* %tmp1853, i64 1
+ %tmp1855 = getelementptr inbounds float, float* %tmp1854, i64 1
+ %tmp1856 = getelementptr inbounds float, float* %tmp1855, i64 1
+ %tmp1857 = getelementptr inbounds float, float* %tmp1856, i64 1
+ %tmp1858 = getelementptr inbounds float, float* %tmp1857, i64 1
+ %tmp1859 = getelementptr inbounds float, float* %tmp1858, i64 1
+ %tmp1860 = getelementptr inbounds float, float* %tmp1859, i64 1
+ %tmp1861 = getelementptr inbounds float, float* %tmp1860, i64 1
+ %tmp1862 = getelementptr inbounds float, float* %tmp1861, i64 1
+ %tmp1863 = getelementptr inbounds float, float* %tmp1862, i64 1
+ %tmp1864 = getelementptr inbounds float, float* %tmp1863, i64 1
+ %tmp1865 = getelementptr inbounds float, float* %tmp1864, i64 1
+ %tmp1866 = getelementptr inbounds float, float* %tmp1865, i64 1
+ %tmp1867 = getelementptr inbounds float, float* %tmp1866, i64 1
+ %tmp1868 = getelementptr inbounds float, float* %tmp1867, i64 1
+ %tmp1869 = getelementptr inbounds float, float* %tmp1868, i64 1
+ %tmp1870 = getelementptr inbounds float, float* %tmp1869, i64 1
+ %tmp1871 = getelementptr inbounds float, float* %tmp1870, i64 1
+ %tmp1872 = getelementptr inbounds float, float* %tmp1871, i64 1
+ %tmp1873 = getelementptr inbounds float, float* %tmp1872, i64 1
+ %tmp1874 = getelementptr inbounds float, float* %tmp1873, i64 1
+ %tmp1875 = getelementptr inbounds float, float* %tmp1874, i64 1
+ %tmp1876 = getelementptr inbounds float, float* %tmp1875, i64 1
+ %tmp1877 = getelementptr inbounds float, float* %tmp1876, i64 1
+ %tmp1878 = getelementptr inbounds float, float* %tmp1877, i64 1
+ %tmp1879 = getelementptr inbounds float, float* %tmp1878, i64 1
+ %tmp1880 = getelementptr inbounds float, float* %tmp1879, i64 1
+ %tmp1881 = getelementptr inbounds float, float* %tmp1880, i64 1
+ %tmp1882 = getelementptr inbounds float, float* %tmp1881, i64 1
+ %tmp1883 = getelementptr inbounds float, float* %tmp1882, i64 1
+ %tmp1884 = getelementptr inbounds float, float* %tmp1883, i64 1
+ %tmp1885 = getelementptr inbounds float, float* %tmp1884, i64 1
+ %tmp1886 = getelementptr inbounds float, float* %tmp1885, i64 1
+ %tmp1887 = getelementptr inbounds float, float* %tmp1886, i64 1
+ %tmp1888 = getelementptr inbounds float, float* %tmp1887, i64 1
+ %tmp1889 = getelementptr inbounds float, float* %tmp1888, i64 1
+ %tmp1890 = getelementptr inbounds float, float* %tmp1889, i64 1
+ %tmp1891 = getelementptr inbounds float, float* %tmp1890, i64 1
+ %tmp1892 = getelementptr inbounds float, float* %tmp1891, i64 1
+ %tmp1893 = getelementptr inbounds float, float* %tmp1892, i64 1
+ %tmp1894 = getelementptr inbounds float, float* %tmp1893, i64 1
+ %tmp1895 = getelementptr inbounds float, float* %tmp1894, i64 1
+ %tmp1896 = getelementptr inbounds float, float* %tmp1895, i64 1
+ %tmp1897 = getelementptr inbounds float, float* %tmp1896, i64 1
+ %tmp1898 = getelementptr inbounds float, float* %tmp1897, i64 1
+ %tmp1899 = getelementptr inbounds float, float* %tmp1898, i64 1
+ %tmp1900 = getelementptr inbounds float, float* %tmp1899, i64 1
+ %tmp1901 = getelementptr inbounds float, float* %tmp1900, i64 1
+ %tmp1902 = getelementptr inbounds float, float* %tmp1901, i64 1
+ %tmp1903 = getelementptr inbounds float, float* %tmp1902, i64 1
+ %tmp1904 = getelementptr inbounds float, float* %tmp1903, i64 1
+ %tmp1905 = getelementptr inbounds float, float* %tmp1904, i64 1
+ %tmp1906 = getelementptr inbounds float, float* %tmp1905, i64 1
+ %tmp1907 = getelementptr inbounds float, float* %tmp1906, i64 1
+ %tmp1908 = getelementptr inbounds float, float* %tmp1907, i64 1
+ %tmp1909 = getelementptr inbounds float, float* %tmp1908, i64 1
+ %tmp1910 = getelementptr inbounds float, float* %tmp1909, i64 1
+ %tmp1911 = getelementptr inbounds float, float* %tmp1910, i64 1
+ %tmp1912 = getelementptr inbounds float, float* %tmp1911, i64 1
+ %tmp1913 = getelementptr inbounds float, float* %tmp1912, i64 1
+ %tmp1914 = getelementptr inbounds float, float* %tmp1913, i64 1
+ %tmp1915 = getelementptr inbounds float, float* %tmp1914, i64 1
+ %tmp1916 = getelementptr inbounds float, float* %tmp1915, i64 1
+ %tmp1917 = getelementptr inbounds float, float* %tmp1916, i64 1
+ %tmp1918 = getelementptr inbounds float, float* %tmp1917, i64 1
+ %tmp1919 = getelementptr inbounds float, float* %tmp1918, i64 1
+ %tmp1920 = getelementptr inbounds float, float* %tmp1919, i64 1
+ %tmp1921 = getelementptr inbounds float, float* %tmp1920, i64 1
+ %tmp1922 = getelementptr inbounds float, float* %tmp1921, i64 1
+ %tmp1923 = getelementptr inbounds float, float* %tmp1922, i64 1
+ %tmp1924 = getelementptr inbounds float, float* %tmp1923, i64 1
+ %tmp1925 = getelementptr inbounds float, float* %tmp1924, i64 1
+ %tmp1926 = getelementptr inbounds float, float* %tmp1925, i64 1
+ %tmp1927 = getelementptr inbounds float, float* %tmp1926, i64 1
+ %tmp1928 = getelementptr inbounds float, float* %tmp1927, i64 1
+ %tmp1929 = getelementptr inbounds float, float* %tmp1928, i64 1
+ %tmp1930 = getelementptr inbounds float, float* %tmp1929, i64 1
+ %tmp1931 = getelementptr inbounds float, float* %tmp1930, i64 1
+ %tmp1932 = getelementptr inbounds float, float* %tmp1931, i64 1
+ %tmp1933 = getelementptr inbounds float, float* %tmp1932, i64 1
+ %tmp1934 = getelementptr inbounds float, float* %tmp1933, i64 1
+ %tmp1935 = getelementptr inbounds float, float* %tmp1934, i64 1
+ %tmp1936 = getelementptr inbounds float, float* %tmp1935, i64 1
+ %tmp1937 = getelementptr inbounds float, float* %tmp1936, i64 1
+ %tmp1938 = getelementptr inbounds float, float* %tmp1937, i64 1
+ %tmp1939 = getelementptr inbounds float, float* %tmp1938, i64 1
+ %tmp1940 = getelementptr inbounds float, float* %tmp1939, i64 1
+ %tmp1941 = getelementptr inbounds float, float* %tmp1940, i64 1
+ %tmp1942 = getelementptr inbounds float, float* %tmp1941, i64 1
+ %tmp1943 = getelementptr inbounds float, float* %tmp1942, i64 1
+ %tmp1944 = getelementptr inbounds float, float* %tmp1943, i64 1
+ %tmp1945 = getelementptr inbounds float, float* %tmp1944, i64 1
+ %tmp1946 = getelementptr inbounds float, float* %tmp1945, i64 1
+ %tmp1947 = getelementptr inbounds float, float* %tmp1946, i64 1
+ %tmp1948 = getelementptr inbounds float, float* %tmp1947, i64 1
+ %tmp1949 = getelementptr inbounds float, float* %tmp1948, i64 1
+ %tmp1950 = getelementptr inbounds float, float* %tmp1949, i64 1
+ %tmp1951 = getelementptr inbounds float, float* %tmp1950, i64 1
+ %tmp1952 = getelementptr inbounds float, float* %tmp1951, i64 1
+ %tmp1953 = getelementptr inbounds float, float* %tmp1952, i64 1
+ %tmp1954 = getelementptr inbounds float, float* %tmp1953, i64 1
+ %tmp1955 = getelementptr inbounds float, float* %tmp1954, i64 1
+ %tmp1956 = getelementptr inbounds float, float* %tmp1955, i64 1
+ %tmp1957 = getelementptr inbounds float, float* %tmp1956, i64 1
+ %tmp1958 = getelementptr inbounds float, float* %tmp1957, i64 1
+ %tmp1959 = getelementptr inbounds float, float* %tmp1958, i64 1
+ %tmp1960 = getelementptr inbounds float, float* %tmp1959, i64 1
+ %tmp1961 = getelementptr inbounds float, float* %tmp1960, i64 1
+ %tmp1962 = getelementptr inbounds float, float* %tmp1961, i64 1
+ %tmp1963 = getelementptr inbounds float, float* %tmp1962, i64 1
+ %tmp1964 = getelementptr inbounds float, float* %tmp1963, i64 1
+ %tmp1965 = getelementptr inbounds float, float* %tmp1964, i64 1
+ %tmp1966 = getelementptr inbounds float, float* %tmp1965, i64 1
+ %tmp1967 = getelementptr inbounds float, float* %tmp1966, i64 1
+ %tmp1968 = getelementptr inbounds float, float* %tmp1967, i64 1
+ %tmp1969 = getelementptr inbounds float, float* %tmp1968, i64 1
+ %tmp1970 = getelementptr inbounds float, float* %tmp1969, i64 1
+ %tmp1971 = getelementptr inbounds float, float* %tmp1970, i64 1
+ %tmp1972 = getelementptr inbounds float, float* %tmp1971, i64 1
+ %tmp1973 = getelementptr inbounds float, float* %tmp1972, i64 1
+ %tmp1974 = getelementptr inbounds float, float* %tmp1973, i64 1
+ %tmp1975 = getelementptr inbounds float, float* %tmp1974, i64 1
+ %tmp1976 = getelementptr inbounds float, float* %tmp1975, i64 1
+ %tmp1977 = getelementptr inbounds float, float* %tmp1976, i64 1
+ %tmp1978 = getelementptr inbounds float, float* %tmp1977, i64 1
+ %tmp1979 = getelementptr inbounds float, float* %tmp1978, i64 1
+ %tmp1980 = getelementptr inbounds float, float* %tmp1979, i64 1
+ %tmp1981 = getelementptr inbounds float, float* %tmp1980, i64 1
+ %tmp1982 = getelementptr inbounds float, float* %tmp1981, i64 1
+ %tmp1983 = getelementptr inbounds float, float* %tmp1982, i64 1
+ %tmp1984 = getelementptr inbounds float, float* %tmp1983, i64 1
+ %tmp1985 = getelementptr inbounds float, float* %tmp1984, i64 1
+ %tmp1986 = getelementptr inbounds float, float* %tmp1985, i64 1
+ %tmp1987 = getelementptr inbounds float, float* %tmp1986, i64 1
+ %tmp1988 = getelementptr inbounds float, float* %tmp1987, i64 1
+ %tmp1989 = getelementptr inbounds float, float* %tmp1988, i64 1
+ %tmp1990 = getelementptr inbounds float, float* %tmp1989, i64 1
+ %tmp1991 = getelementptr inbounds float, float* %tmp1990, i64 1
+ %tmp1992 = getelementptr inbounds float, float* %tmp1991, i64 1
+ %tmp1993 = getelementptr inbounds float, float* %tmp1992, i64 1
+ %tmp1994 = getelementptr inbounds float, float* %tmp1993, i64 1
+ %tmp1995 = getelementptr inbounds float, float* %tmp1994, i64 1
+ %tmp1996 = getelementptr inbounds float, float* %tmp1995, i64 1
+ %tmp1997 = getelementptr inbounds float, float* %tmp1996, i64 1
+ %tmp1998 = getelementptr inbounds float, float* %tmp1997, i64 1
+ %tmp1999 = getelementptr inbounds float, float* %tmp1998, i64 1
+ %tmp2000 = getelementptr inbounds float, float* %tmp1999, i64 1
+ %tmp2001 = getelementptr inbounds float, float* %tmp2000, i64 1
+ %tmp2002 = getelementptr inbounds float, float* %tmp2001, i64 1
+ %tmp2003 = getelementptr inbounds float, float* %tmp2002, i64 1
+ %tmp2004 = getelementptr inbounds float, float* %tmp2003, i64 1
+ %tmp2005 = getelementptr inbounds float, float* %tmp2004, i64 1
+ %tmp2006 = getelementptr inbounds float, float* %tmp2005, i64 1
+ %tmp2007 = getelementptr inbounds float, float* %tmp2006, i64 1
+ %tmp2008 = getelementptr inbounds float, float* %tmp2007, i64 1
+ %tmp2009 = getelementptr inbounds float, float* %tmp2008, i64 1
+ %tmp2010 = getelementptr inbounds float, float* %tmp2009, i64 1
+ %tmp2011 = getelementptr inbounds float, float* %tmp2010, i64 1
+ %tmp2012 = getelementptr inbounds float, float* %tmp2011, i64 1
+ %tmp2013 = getelementptr inbounds float, float* %tmp2012, i64 1
+ %tmp2014 = getelementptr inbounds float, float* %tmp2013, i64 1
+ %tmp2015 = getelementptr inbounds float, float* %tmp2014, i64 1
+ %tmp2016 = getelementptr inbounds float, float* %tmp2015, i64 1
+ %tmp2017 = getelementptr inbounds float, float* %tmp2016, i64 1
+ %tmp2018 = getelementptr inbounds float, float* %tmp2017, i64 1
+ %tmp2019 = getelementptr inbounds float, float* %tmp2018, i64 1
+ %tmp2020 = getelementptr inbounds float, float* %tmp2019, i64 1
+ %tmp2021 = getelementptr inbounds float, float* %tmp2020, i64 1
+ %tmp2022 = getelementptr inbounds float, float* %tmp2021, i64 1
+ %tmp2023 = getelementptr inbounds float, float* %tmp2022, i64 1
+ %tmp2024 = getelementptr inbounds float, float* %tmp2023, i64 1
+ %tmp2025 = getelementptr inbounds float, float* %tmp2024, i64 1
+ %tmp2026 = getelementptr inbounds float, float* %tmp2025, i64 1
+ %tmp2027 = getelementptr inbounds float, float* %tmp2026, i64 1
+ %tmp2028 = getelementptr inbounds float, float* %tmp2027, i64 1
+ %tmp2029 = getelementptr inbounds float, float* %tmp2028, i64 1
+ %tmp2030 = getelementptr inbounds float, float* %tmp2029, i64 1
+ %tmp2031 = getelementptr inbounds float, float* %tmp2030, i64 1
+ %tmp2032 = getelementptr inbounds float, float* %tmp2031, i64 1
+ %tmp2033 = getelementptr inbounds float, float* %tmp2032, i64 1
+ %tmp2034 = getelementptr inbounds float, float* %tmp2033, i64 1
+ %tmp2035 = getelementptr inbounds float, float* %tmp2034, i64 1
+ %tmp2036 = getelementptr inbounds float, float* %tmp2035, i64 1
+ %tmp2037 = getelementptr inbounds float, float* %tmp2036, i64 1
+ %tmp2038 = getelementptr inbounds float, float* %tmp2037, i64 1
+ %tmp2039 = getelementptr inbounds float, float* %tmp2038, i64 1
+ %tmp2040 = getelementptr inbounds float, float* %tmp2039, i64 1
+ %tmp2041 = getelementptr inbounds float, float* %tmp2040, i64 1
+ %tmp2042 = getelementptr inbounds float, float* %tmp2041, i64 1
+ %tmp2043 = getelementptr inbounds float, float* %tmp2042, i64 1
+ %tmp2044 = getelementptr inbounds float, float* %tmp2043, i64 1
+ %tmp2045 = getelementptr inbounds float, float* %tmp2044, i64 1
+ %tmp2046 = getelementptr inbounds float, float* %tmp2045, i64 1
+ %tmp2047 = getelementptr inbounds float, float* %tmp2046, i64 1
+ %tmp2048 = getelementptr inbounds float, float* %tmp2047, i64 1
+ %tmp2049 = getelementptr inbounds float, float* %tmp2048, i64 1
+ %tmp2050 = getelementptr inbounds float, float* %tmp2049, i64 1
+ %tmp2051 = getelementptr inbounds float, float* %tmp2050, i64 1
+ %tmp2052 = getelementptr inbounds float, float* %tmp2051, i64 1
+ %tmp2053 = getelementptr inbounds float, float* %tmp2052, i64 1
+ %tmp2054 = getelementptr inbounds float, float* %tmp2053, i64 1
+ %tmp2055 = getelementptr inbounds float, float* %tmp2054, i64 1
+ %tmp2056 = getelementptr inbounds float, float* %tmp2055, i64 1
+ %tmp2057 = getelementptr inbounds float, float* %tmp2056, i64 1
+ %tmp2058 = getelementptr inbounds float, float* %tmp2057, i64 1
+ %tmp2059 = getelementptr inbounds float, float* %tmp2058, i64 1
+ %tmp2060 = getelementptr inbounds float, float* %tmp2059, i64 1
+ %tmp2061 = getelementptr inbounds float, float* %tmp2060, i64 1
+ %tmp2062 = getelementptr inbounds float, float* %tmp2061, i64 1
+ %tmp2063 = getelementptr inbounds float, float* %tmp2062, i64 1
+ %tmp2064 = getelementptr inbounds float, float* %tmp2063, i64 1
+ %tmp2065 = getelementptr inbounds float, float* %tmp2064, i64 1
+ %tmp2066 = getelementptr inbounds float, float* %tmp2065, i64 1
+ %tmp2067 = getelementptr inbounds float, float* %tmp2066, i64 1
+ %tmp2068 = getelementptr inbounds float, float* %tmp2067, i64 1
+ %tmp2069 = getelementptr inbounds float, float* %tmp2068, i64 1
+ %tmp2070 = getelementptr inbounds float, float* %tmp2069, i64 1
+ %tmp2071 = getelementptr inbounds float, float* %tmp2070, i64 1
+ %tmp2072 = getelementptr inbounds float, float* %tmp2071, i64 1
+ %tmp2073 = getelementptr inbounds float, float* %tmp2072, i64 1
+ %tmp2074 = getelementptr inbounds float, float* %tmp2073, i64 1
+ %tmp2075 = getelementptr inbounds float, float* %tmp2074, i64 1
+ %tmp2076 = getelementptr inbounds float, float* %tmp2075, i64 1
+ %tmp2077 = getelementptr inbounds float, float* %tmp2076, i64 1
+ %tmp2078 = getelementptr inbounds float, float* %tmp2077, i64 1
+ %tmp2079 = getelementptr inbounds float, float* %tmp2078, i64 1
+ %tmp2080 = getelementptr inbounds float, float* %tmp2079, i64 1
+ %tmp2081 = getelementptr inbounds float, float* %tmp2080, i64 1
+ %tmp2082 = getelementptr inbounds float, float* %tmp2081, i64 1
+ %tmp2083 = getelementptr inbounds float, float* %tmp2082, i64 1
+ %tmp2084 = getelementptr inbounds float, float* %tmp2083, i64 1
+ %tmp2085 = getelementptr inbounds float, float* %tmp2084, i64 1
+ %tmp2086 = getelementptr inbounds float, float* %tmp2085, i64 1
+ %tmp2087 = getelementptr inbounds float, float* %tmp2086, i64 1
+ %tmp2088 = getelementptr inbounds float, float* %tmp2087, i64 1
+ %tmp2089 = getelementptr inbounds float, float* %tmp2088, i64 1
+ %tmp2090 = getelementptr inbounds float, float* %tmp2089, i64 1
+ %tmp2091 = getelementptr inbounds float, float* %tmp2090, i64 1
+ %tmp2092 = getelementptr inbounds float, float* %tmp2091, i64 1
+ %tmp2093 = getelementptr inbounds float, float* %tmp2092, i64 1
+ %tmp2094 = getelementptr inbounds float, float* %tmp2093, i64 1
+ %tmp2095 = getelementptr inbounds float, float* %tmp2094, i64 1
+ %tmp2096 = getelementptr inbounds float, float* %tmp2095, i64 1
+ %tmp2097 = getelementptr inbounds float, float* %tmp2096, i64 1
+ %tmp2098 = getelementptr inbounds float, float* %tmp2097, i64 1
+ %tmp2099 = getelementptr inbounds float, float* %tmp2098, i64 1
+ %tmp2100 = getelementptr inbounds float, float* %tmp2099, i64 1
+ %tmp2101 = getelementptr inbounds float, float* %tmp2100, i64 1
+ %tmp2102 = getelementptr inbounds float, float* %tmp2101, i64 1
+ %tmp2103 = getelementptr inbounds float, float* %tmp2102, i64 1
+ %tmp2104 = getelementptr inbounds float, float* %tmp2103, i64 1
+ %tmp2105 = getelementptr inbounds float, float* %tmp2104, i64 1
+ %tmp2106 = getelementptr inbounds float, float* %tmp2105, i64 1
+ %tmp2107 = getelementptr inbounds float, float* %tmp2106, i64 1
+ %tmp2108 = getelementptr inbounds float, float* %tmp2107, i64 1
+ %tmp2109 = getelementptr inbounds float, float* %tmp2108, i64 1
+ %tmp2110 = getelementptr inbounds float, float* %tmp2109, i64 1
+ %tmp2111 = getelementptr inbounds float, float* %tmp2110, i64 1
+ %tmp2112 = getelementptr inbounds float, float* %tmp2111, i64 1
+ %tmp2113 = getelementptr inbounds float, float* %tmp2112, i64 1
+ %tmp2114 = getelementptr inbounds float, float* %tmp2113, i64 1
+ %tmp2115 = getelementptr inbounds float, float* %tmp2114, i64 1
+ %tmp2116 = getelementptr inbounds float, float* %tmp2115, i64 1
+ %tmp2117 = getelementptr inbounds float, float* %tmp2116, i64 1
+ %tmp2118 = getelementptr inbounds float, float* %tmp2117, i64 1
+ %tmp2119 = getelementptr inbounds float, float* %tmp2118, i64 1
+ %tmp2120 = getelementptr inbounds float, float* %tmp2119, i64 1
+ %tmp2121 = getelementptr inbounds float, float* %tmp2120, i64 1
+ %tmp2122 = getelementptr inbounds float, float* %tmp2121, i64 1
+ %tmp2123 = getelementptr inbounds float, float* %tmp2122, i64 1
+ %tmp2124 = getelementptr inbounds float, float* %tmp2123, i64 1
+ %tmp2125 = getelementptr inbounds float, float* %tmp2124, i64 1
+ %tmp2126 = getelementptr inbounds float, float* %tmp2125, i64 1
+ %tmp2127 = getelementptr inbounds float, float* %tmp2126, i64 1
+ %tmp2128 = getelementptr inbounds float, float* %tmp2127, i64 1
+ %tmp2129 = getelementptr inbounds float, float* %tmp2128, i64 1
+ %tmp2130 = getelementptr inbounds float, float* %tmp2129, i64 1
+ %tmp2131 = getelementptr inbounds float, float* %tmp2130, i64 1
+ %tmp2132 = getelementptr inbounds float, float* %tmp2131, i64 1
+ %tmp2133 = getelementptr inbounds float, float* %tmp2132, i64 1
+ %tmp2134 = getelementptr inbounds float, float* %tmp2133, i64 1
+ %tmp2135 = getelementptr inbounds float, float* %tmp2134, i64 1
+ %tmp2136 = getelementptr inbounds float, float* %tmp2135, i64 1
+ %tmp2137 = getelementptr inbounds float, float* %tmp2136, i64 1
+ %tmp2138 = getelementptr inbounds float, float* %tmp2137, i64 1
+ %tmp2139 = getelementptr inbounds float, float* %tmp2138, i64 1
+ %tmp2140 = getelementptr inbounds float, float* %tmp2139, i64 1
+ %tmp2141 = getelementptr inbounds float, float* %tmp2140, i64 1
+ %tmp2142 = getelementptr inbounds float, float* %tmp2141, i64 1
+ %tmp2143 = getelementptr inbounds float, float* %tmp2142, i64 1
+ %tmp2144 = getelementptr inbounds float, float* %tmp2143, i64 1
+ %tmp2145 = getelementptr inbounds float, float* %tmp2144, i64 1
+ %tmp2146 = getelementptr inbounds float, float* %tmp2145, i64 1
+ %tmp2147 = getelementptr inbounds float, float* %tmp2146, i64 1
+ %tmp2148 = getelementptr inbounds float, float* %tmp2147, i64 1
+ %tmp2149 = getelementptr inbounds float, float* %tmp2148, i64 1
+ %tmp2150 = getelementptr inbounds float, float* %tmp2149, i64 1
+ %tmp2151 = getelementptr inbounds float, float* %tmp2150, i64 1
+ %tmp2152 = getelementptr inbounds float, float* %tmp2151, i64 1
+ %tmp2153 = getelementptr inbounds float, float* %tmp2152, i64 1
+ %tmp2154 = getelementptr inbounds float, float* %tmp2153, i64 1
+ %tmp2155 = getelementptr inbounds float, float* %tmp2154, i64 1
+ %tmp2156 = getelementptr inbounds float, float* %tmp2155, i64 1
+ %tmp2157 = getelementptr inbounds float, float* %tmp2156, i64 1
+ %tmp2158 = getelementptr inbounds float, float* %tmp2157, i64 1
+ %tmp2159 = getelementptr inbounds float, float* %tmp2158, i64 1
+ %tmp2160 = getelementptr inbounds float, float* %tmp2159, i64 1
+ %tmp2161 = getelementptr inbounds float, float* %tmp2160, i64 1
+ %tmp2162 = getelementptr inbounds float, float* %tmp2161, i64 1
+ %tmp2163 = getelementptr inbounds float, float* %tmp2162, i64 1
+ %tmp2164 = getelementptr inbounds float, float* %tmp2163, i64 1
+ %tmp2165 = getelementptr inbounds float, float* %tmp2164, i64 1
+ %tmp2166 = getelementptr inbounds float, float* %tmp2165, i64 1
+ %tmp2167 = getelementptr inbounds float, float* %tmp2166, i64 1
+ %tmp2168 = getelementptr inbounds float, float* %tmp2167, i64 1
+ %tmp2169 = getelementptr inbounds float, float* %tmp2168, i64 1
+ %tmp2170 = getelementptr inbounds float, float* %tmp2169, i64 1
+ %tmp2171 = getelementptr inbounds float, float* %tmp2170, i64 1
+ %tmp2172 = getelementptr inbounds float, float* %tmp2171, i64 1
+ %tmp2173 = getelementptr inbounds float, float* %tmp2172, i64 1
+ %tmp2174 = getelementptr inbounds float, float* %tmp2173, i64 1
+ %tmp2175 = getelementptr inbounds float, float* %tmp2174, i64 1
+ %tmp2176 = getelementptr inbounds float, float* %tmp2175, i64 1
+ %tmp2177 = getelementptr inbounds float, float* %tmp2176, i64 1
+ %tmp2178 = getelementptr inbounds float, float* %tmp2177, i64 1
+ %tmp2179 = getelementptr inbounds float, float* %tmp2178, i64 1
+ %tmp2180 = getelementptr inbounds float, float* %tmp2179, i64 1
+ %tmp2181 = getelementptr inbounds float, float* %tmp2180, i64 1
+ %tmp2182 = getelementptr inbounds float, float* %tmp2181, i64 1
+ %tmp2183 = getelementptr inbounds float, float* %tmp2182, i64 1
+ %tmp2184 = getelementptr inbounds float, float* %tmp2183, i64 1
+ %tmp2185 = getelementptr inbounds float, float* %tmp2184, i64 1
+ %tmp2186 = getelementptr inbounds float, float* %tmp2185, i64 1
+ %tmp2187 = getelementptr inbounds float, float* %tmp2186, i64 1
+ %tmp2188 = getelementptr inbounds float, float* %tmp2187, i64 1
+ %tmp2189 = getelementptr inbounds float, float* %tmp2188, i64 1
+ %tmp2190 = getelementptr inbounds float, float* %tmp2189, i64 1
+ %tmp2191 = getelementptr inbounds float, float* %tmp2190, i64 1
+ %tmp2192 = getelementptr inbounds float, float* %tmp2191, i64 1
+ %tmp2193 = getelementptr inbounds float, float* %tmp2192, i64 1
+ %tmp2194 = getelementptr inbounds float, float* %tmp2193, i64 1
+ %tmp2195 = getelementptr inbounds float, float* %tmp2194, i64 1
+ %tmp2196 = getelementptr inbounds float, float* %tmp2195, i64 1
+ %tmp2197 = getelementptr inbounds float, float* %tmp2196, i64 1
+ %tmp2198 = getelementptr inbounds float, float* %tmp2197, i64 1
+ %tmp2199 = getelementptr inbounds float, float* %tmp2198, i64 1
+ %tmp2200 = getelementptr inbounds float, float* %tmp2199, i64 1
+ %tmp2201 = getelementptr inbounds float, float* %tmp2200, i64 1
+ %tmp2202 = getelementptr inbounds float, float* %tmp2201, i64 1
+ %tmp2203 = getelementptr inbounds float, float* %tmp2202, i64 1
+ %tmp2204 = getelementptr inbounds float, float* %tmp2203, i64 1
+ %tmp2205 = getelementptr inbounds float, float* %tmp2204, i64 1
+ %tmp2206 = getelementptr inbounds float, float* %tmp2205, i64 1
+ %tmp2207 = getelementptr inbounds float, float* %tmp2206, i64 1
+ %tmp2208 = getelementptr inbounds float, float* %tmp2207, i64 1
+ %tmp2209 = getelementptr inbounds float, float* %tmp2208, i64 1
+ %tmp2210 = getelementptr inbounds float, float* %tmp2209, i64 1
+ %tmp2211 = getelementptr inbounds float, float* %tmp2210, i64 1
+ %tmp2212 = getelementptr inbounds float, float* %tmp2211, i64 1
+ %tmp2213 = getelementptr inbounds float, float* %tmp2212, i64 1
+ %tmp2214 = getelementptr inbounds float, float* %tmp2213, i64 1
+ %tmp2215 = getelementptr inbounds float, float* %tmp2214, i64 1
+ %tmp2216 = getelementptr inbounds float, float* %tmp2215, i64 1
+ %tmp2217 = getelementptr inbounds float, float* %tmp2216, i64 1
+ %tmp2218 = getelementptr inbounds float, float* %tmp2217, i64 1
+ %tmp2219 = getelementptr inbounds float, float* %tmp2218, i64 1
+ %tmp2220 = getelementptr inbounds float, float* %tmp2219, i64 1
+ %tmp2221 = getelementptr inbounds float, float* %tmp2220, i64 1
+ %tmp2222 = getelementptr inbounds float, float* %tmp2221, i64 1
+ %tmp2223 = getelementptr inbounds float, float* %tmp2222, i64 1
+ %tmp2224 = getelementptr inbounds float, float* %tmp2223, i64 1
+ %tmp2225 = getelementptr inbounds float, float* %tmp2224, i64 1
+ %tmp2226 = getelementptr inbounds float, float* %tmp2225, i64 1
+ %tmp2227 = getelementptr inbounds float, float* %tmp2226, i64 1
+ %tmp2228 = getelementptr inbounds float, float* %tmp2227, i64 1
+ %tmp2229 = getelementptr inbounds float, float* %tmp2228, i64 1
+ %tmp2230 = getelementptr inbounds float, float* %tmp2229, i64 1
+ %tmp2231 = getelementptr inbounds float, float* %tmp2230, i64 1
+ %tmp2232 = getelementptr inbounds float, float* %tmp2231, i64 1
+ %tmp2233 = getelementptr inbounds float, float* %tmp2232, i64 1
+ %tmp2234 = getelementptr inbounds float, float* %tmp2233, i64 1
+ %tmp2235 = getelementptr inbounds float, float* %tmp2234, i64 1
+ %tmp2236 = getelementptr inbounds float, float* %tmp2235, i64 1
+ %tmp2237 = getelementptr inbounds float, float* %tmp2236, i64 1
+ %tmp2238 = getelementptr inbounds float, float* %tmp2237, i64 1
+ %tmp2239 = getelementptr inbounds float, float* %tmp2238, i64 1
+ %tmp2240 = getelementptr inbounds float, float* %tmp2239, i64 1
+ %tmp2241 = getelementptr inbounds float, float* %tmp2240, i64 1
+ %tmp2242 = getelementptr inbounds float, float* %tmp2241, i64 1
+ %tmp2243 = getelementptr inbounds float, float* %tmp2242, i64 1
+ %tmp2244 = getelementptr inbounds float, float* %tmp2243, i64 1
+ %tmp2245 = getelementptr inbounds float, float* %tmp2244, i64 1
+ %tmp2246 = getelementptr inbounds float, float* %tmp2245, i64 1
+ %tmp2247 = getelementptr inbounds float, float* %tmp2246, i64 1
+ %tmp2248 = getelementptr inbounds float, float* %tmp2247, i64 1
+ %tmp2249 = getelementptr inbounds float, float* %tmp2248, i64 1
+ %tmp2250 = getelementptr inbounds float, float* %tmp2249, i64 1
+ %tmp2251 = getelementptr inbounds float, float* %tmp2250, i64 1
+ %tmp2252 = getelementptr inbounds float, float* %tmp2251, i64 1
+ %tmp2253 = getelementptr inbounds float, float* %tmp2252, i64 1
+ %tmp2254 = getelementptr inbounds float, float* %tmp2253, i64 1
+ %tmp2255 = getelementptr inbounds float, float* %tmp2254, i64 1
+ %tmp2256 = getelementptr inbounds float, float* %tmp2255, i64 1
+ %tmp2257 = getelementptr inbounds float, float* %tmp2256, i64 1
+ %tmp2258 = getelementptr inbounds float, float* %tmp2257, i64 1
+ %tmp2259 = getelementptr inbounds float, float* %tmp2258, i64 1
+ %tmp2260 = getelementptr inbounds float, float* %tmp2259, i64 1
+ %tmp2261 = getelementptr inbounds float, float* %tmp2260, i64 1
+ %tmp2262 = getelementptr inbounds float, float* %tmp2261, i64 1
+ %tmp2263 = getelementptr inbounds float, float* %tmp2262, i64 1
+ %tmp2264 = getelementptr inbounds float, float* %tmp2263, i64 1
+ %tmp2265 = getelementptr inbounds float, float* %tmp2264, i64 1
+ %tmp2266 = getelementptr inbounds float, float* %tmp2265, i64 1
+ %tmp2267 = getelementptr inbounds float, float* %tmp2266, i64 1
+ %tmp2268 = getelementptr inbounds float, float* %tmp2267, i64 1
+ %tmp2269 = getelementptr inbounds float, float* %tmp2268, i64 1
+ %tmp2270 = getelementptr inbounds float, float* %tmp2269, i64 1
+ %tmp2271 = getelementptr inbounds float, float* %tmp2270, i64 1
+ %tmp2272 = getelementptr inbounds float, float* %tmp2271, i64 1
+ %tmp2273 = getelementptr inbounds float, float* %tmp2272, i64 1
+ %tmp2274 = getelementptr inbounds float, float* %tmp2273, i64 1
+ %tmp2275 = getelementptr inbounds float, float* %tmp2274, i64 1
+ %tmp2276 = getelementptr inbounds float, float* %tmp2275, i64 1
+ %tmp2277 = getelementptr inbounds float, float* %tmp2276, i64 1
+ %tmp2278 = getelementptr inbounds float, float* %tmp2277, i64 1
+ %tmp2279 = getelementptr inbounds float, float* %tmp2278, i64 1
+ %tmp2280 = getelementptr inbounds float, float* %tmp2279, i64 1
+ %tmp2281 = getelementptr inbounds float, float* %tmp2280, i64 1
+ %tmp2282 = getelementptr inbounds float, float* %tmp2281, i64 1
+ %tmp2283 = getelementptr inbounds float, float* %tmp2282, i64 1
+ %tmp2284 = getelementptr inbounds float, float* %tmp2283, i64 1
+ %tmp2285 = getelementptr inbounds float, float* %tmp2284, i64 1
+ %tmp2286 = getelementptr inbounds float, float* %tmp2285, i64 1
+ %tmp2287 = getelementptr inbounds float, float* %tmp2286, i64 1
+ %tmp2288 = getelementptr inbounds float, float* %tmp2287, i64 1
+ %tmp2289 = getelementptr inbounds float, float* %tmp2288, i64 1
+ %tmp2290 = getelementptr inbounds float, float* %tmp2289, i64 1
+ %tmp2291 = getelementptr inbounds float, float* %tmp2290, i64 1
+ %tmp2292 = getelementptr inbounds float, float* %tmp2291, i64 1
+ %tmp2293 = getelementptr inbounds float, float* %tmp2292, i64 1
+ %tmp2294 = getelementptr inbounds float, float* %tmp2293, i64 1
+ %tmp2295 = getelementptr inbounds float, float* %tmp2294, i64 1
+ %tmp2296 = getelementptr inbounds float, float* %tmp2295, i64 1
+ %tmp2297 = getelementptr inbounds float, float* %tmp2296, i64 1
+ %tmp2298 = getelementptr inbounds float, float* %tmp2297, i64 1
+ %tmp2299 = getelementptr inbounds float, float* %tmp2298, i64 1
+ %tmp2300 = getelementptr inbounds float, float* %tmp2299, i64 1
+ %tmp2301 = getelementptr inbounds float, float* %tmp2300, i64 1
+ %tmp2302 = getelementptr inbounds float, float* %tmp2301, i64 1
+ %tmp2303 = getelementptr inbounds float, float* %tmp2302, i64 1
+ %tmp2304 = getelementptr inbounds float, float* %tmp2303, i64 1
+ %tmp2305 = getelementptr inbounds float, float* %tmp2304, i64 1
+ %tmp2306 = getelementptr inbounds float, float* %tmp2305, i64 1
+ %tmp2307 = getelementptr inbounds float, float* %tmp2306, i64 1
+ %tmp2308 = getelementptr inbounds float, float* %tmp2307, i64 1
+ %tmp2309 = getelementptr inbounds float, float* %tmp2308, i64 1
+ %tmp2310 = getelementptr inbounds float, float* %tmp2309, i64 1
+ %tmp2311 = getelementptr inbounds float, float* %tmp2310, i64 1
+ %tmp2312 = getelementptr inbounds float, float* %tmp2311, i64 1
+ %tmp2313 = getelementptr inbounds float, float* %tmp2312, i64 1
+ %tmp2314 = getelementptr inbounds float, float* %tmp2313, i64 1
+ %tmp2315 = getelementptr inbounds float, float* %tmp2314, i64 1
+ %tmp2316 = getelementptr inbounds float, float* %tmp2315, i64 1
+ %tmp2317 = getelementptr inbounds float, float* %tmp2316, i64 1
+ %tmp2318 = getelementptr inbounds float, float* %tmp2317, i64 1
+ %tmp2319 = getelementptr inbounds float, float* %tmp2318, i64 1
+ %tmp2320 = getelementptr inbounds float, float* %tmp2319, i64 1
+ %tmp2321 = getelementptr inbounds float, float* %tmp2320, i64 1
+ %tmp2322 = getelementptr inbounds float, float* %tmp2321, i64 1
+ %tmp2323 = getelementptr inbounds float, float* %tmp2322, i64 1
+ %tmp2324 = getelementptr inbounds float, float* %tmp2323, i64 1
+ %tmp2325 = getelementptr inbounds float, float* %tmp2324, i64 1
+ %tmp2326 = getelementptr inbounds float, float* %tmp2325, i64 1
+ %tmp2327 = getelementptr inbounds float, float* %tmp2326, i64 1
+ %tmp2328 = getelementptr inbounds float, float* %tmp2327, i64 1
+ %tmp2329 = getelementptr inbounds float, float* %tmp2328, i64 1
+ %tmp2330 = getelementptr inbounds float, float* %tmp2329, i64 1
+ %tmp2331 = getelementptr inbounds float, float* %tmp2330, i64 1
+ %tmp2332 = getelementptr inbounds float, float* %tmp2331, i64 1
+ %tmp2333 = getelementptr inbounds float, float* %tmp2332, i64 1
+ %tmp2334 = getelementptr inbounds float, float* %tmp2333, i64 1
+ %tmp2335 = getelementptr inbounds float, float* %tmp2334, i64 1
+ %tmp2336 = getelementptr inbounds float, float* %tmp2335, i64 1
+ %tmp2337 = getelementptr inbounds float, float* %tmp2336, i64 1
+ %tmp2338 = getelementptr inbounds float, float* %tmp2337, i64 1
+ %tmp2339 = getelementptr inbounds float, float* %tmp2338, i64 1
+ %tmp2340 = getelementptr inbounds float, float* %tmp2339, i64 1
+ %tmp2341 = getelementptr inbounds float, float* %tmp2340, i64 1
+ %tmp2342 = getelementptr inbounds float, float* %tmp2341, i64 1
+ %tmp2343 = getelementptr inbounds float, float* %tmp2342, i64 1
+ %tmp2344 = getelementptr inbounds float, float* %tmp2343, i64 1
+ %tmp2345 = getelementptr inbounds float, float* %tmp2344, i64 1
+ %tmp2346 = getelementptr inbounds float, float* %tmp2345, i64 1
+ %tmp2347 = getelementptr inbounds float, float* %tmp2346, i64 1
+ %tmp2348 = getelementptr inbounds float, float* %tmp2347, i64 1
+ %tmp2349 = getelementptr inbounds float, float* %tmp2348, i64 1
+ %tmp2350 = getelementptr inbounds float, float* %tmp2349, i64 1
+ %tmp2351 = getelementptr inbounds float, float* %tmp2350, i64 1
+ %tmp2352 = getelementptr inbounds float, float* %tmp2351, i64 1
+ %tmp2353 = getelementptr inbounds float, float* %tmp2352, i64 1
+ %tmp2354 = getelementptr inbounds float, float* %tmp2353, i64 1
+ %tmp2355 = getelementptr inbounds float, float* %tmp2354, i64 1
+ %tmp2356 = getelementptr inbounds float, float* %tmp2355, i64 1
+ %tmp2357 = getelementptr inbounds float, float* %tmp2356, i64 1
+ %tmp2358 = getelementptr inbounds float, float* %tmp2357, i64 1
+ %tmp2359 = getelementptr inbounds float, float* %tmp2358, i64 1
+ %tmp2360 = getelementptr inbounds float, float* %tmp2359, i64 1
+ %tmp2361 = getelementptr inbounds float, float* %tmp2360, i64 1
+ %tmp2362 = getelementptr inbounds float, float* %tmp2361, i64 1
+ %tmp2363 = getelementptr inbounds float, float* %tmp2362, i64 1
+ %tmp2364 = getelementptr inbounds float, float* %tmp2363, i64 1
+ %tmp2365 = getelementptr inbounds float, float* %tmp2364, i64 1
+ %tmp2366 = getelementptr inbounds float, float* %tmp2365, i64 1
+ %tmp2367 = getelementptr inbounds float, float* %tmp2366, i64 1
+ %tmp2368 = getelementptr inbounds float, float* %tmp2367, i64 1
+ %tmp2369 = getelementptr inbounds float, float* %tmp2368, i64 1
+ %tmp2370 = getelementptr inbounds float, float* %tmp2369, i64 1
+ %tmp2371 = getelementptr inbounds float, float* %tmp2370, i64 1
+ %tmp2372 = getelementptr inbounds float, float* %tmp2371, i64 1
+ %tmp2373 = getelementptr inbounds float, float* %tmp2372, i64 1
+ %tmp2374 = getelementptr inbounds float, float* %tmp2373, i64 1
+ %tmp2375 = getelementptr inbounds float, float* %tmp2374, i64 1
+ %tmp2376 = getelementptr inbounds float, float* %tmp2375, i64 1
+ %tmp2377 = getelementptr inbounds float, float* %tmp2376, i64 1
+ %tmp2378 = getelementptr inbounds float, float* %tmp2377, i64 1
+ %tmp2379 = getelementptr inbounds float, float* %tmp2378, i64 1
+ %tmp2380 = getelementptr inbounds float, float* %tmp2379, i64 1
+ %tmp2381 = getelementptr inbounds float, float* %tmp2380, i64 1
+ %tmp2382 = getelementptr inbounds float, float* %tmp2381, i64 1
+ %tmp2383 = getelementptr inbounds float, float* %tmp2382, i64 1
+ %tmp2384 = getelementptr inbounds float, float* %tmp2383, i64 1
+ %tmp2385 = getelementptr inbounds float, float* %tmp2384, i64 1
+ %tmp2386 = getelementptr inbounds float, float* %tmp2385, i64 1
+ %tmp2387 = getelementptr inbounds float, float* %tmp2386, i64 1
+ %tmp2388 = getelementptr inbounds float, float* %tmp2387, i64 1
+ %tmp2389 = getelementptr inbounds float, float* %tmp2388, i64 1
+ %tmp2390 = getelementptr inbounds float, float* %tmp2389, i64 1
+ %tmp2391 = getelementptr inbounds float, float* %tmp2390, i64 1
+ %tmp2392 = getelementptr inbounds float, float* %tmp2391, i64 1
+ %tmp2393 = getelementptr inbounds float, float* %tmp2392, i64 1
+ %tmp2394 = getelementptr inbounds float, float* %tmp2393, i64 1
+ %tmp2395 = getelementptr inbounds float, float* %tmp2394, i64 1
+ %tmp2396 = getelementptr inbounds float, float* %tmp2395, i64 1
+ %tmp2397 = getelementptr inbounds float, float* %tmp2396, i64 1
+ %tmp2398 = getelementptr inbounds float, float* %tmp2397, i64 1
+ %tmp2399 = getelementptr inbounds float, float* %tmp2398, i64 1
+ %tmp2400 = getelementptr inbounds float, float* %tmp2399, i64 1
+ %tmp2401 = getelementptr inbounds float, float* %tmp2400, i64 1
+ %tmp2402 = getelementptr inbounds float, float* %tmp2401, i64 1
+ %tmp2403 = getelementptr inbounds float, float* %tmp2402, i64 1
+ %tmp2404 = getelementptr inbounds float, float* %tmp2403, i64 1
+ %tmp2405 = getelementptr inbounds float, float* %tmp2404, i64 1
+ %tmp2406 = getelementptr inbounds float, float* %tmp2405, i64 1
+ %tmp2407 = getelementptr inbounds float, float* %tmp2406, i64 1
+ %tmp2408 = getelementptr inbounds float, float* %tmp2407, i64 1
+ %tmp2409 = getelementptr inbounds float, float* %tmp2408, i64 1
+ %tmp2410 = getelementptr inbounds float, float* %tmp2409, i64 1
+ %tmp2411 = getelementptr inbounds float, float* %tmp2410, i64 1
+ %tmp2412 = getelementptr inbounds float, float* %tmp2411, i64 1
+ %tmp2413 = getelementptr inbounds float, float* %tmp2412, i64 1
+ %tmp2414 = getelementptr inbounds float, float* %tmp2413, i64 1
+ %tmp2415 = getelementptr inbounds float, float* %tmp2414, i64 1
+ %tmp2416 = getelementptr inbounds float, float* %tmp2415, i64 1
+ %tmp2417 = getelementptr inbounds float, float* %tmp2416, i64 1
+ %tmp2418 = getelementptr inbounds float, float* %tmp2417, i64 1
+ %tmp2419 = getelementptr inbounds float, float* %tmp2418, i64 1
+ %tmp2420 = getelementptr inbounds float, float* %tmp2419, i64 1
+ %tmp2421 = getelementptr inbounds float, float* %tmp2420, i64 1
+ %tmp2422 = getelementptr inbounds float, float* %tmp2421, i64 1
+ %tmp2423 = getelementptr inbounds float, float* %tmp2422, i64 1
+ %tmp2424 = getelementptr inbounds float, float* %tmp2423, i64 1
+ %tmp2425 = getelementptr inbounds float, float* %tmp2424, i64 1
+ %tmp2426 = getelementptr inbounds float, float* %tmp2425, i64 1
+ %tmp2427 = getelementptr inbounds float, float* %tmp2426, i64 1
+ %tmp2428 = getelementptr inbounds float, float* %tmp2427, i64 1
+ %tmp2429 = getelementptr inbounds float, float* %tmp2428, i64 1
+ %tmp2430 = getelementptr inbounds float, float* %tmp2429, i64 1
+ %tmp2431 = getelementptr inbounds float, float* %tmp2430, i64 1
+ %tmp2432 = getelementptr inbounds float, float* %tmp2431, i64 1
+ %tmp2433 = getelementptr inbounds float, float* %tmp2432, i64 1
+ %tmp2434 = getelementptr inbounds float, float* %tmp2433, i64 1
+ %tmp2435 = getelementptr inbounds float, float* %tmp2434, i64 1
+ %tmp2436 = getelementptr inbounds float, float* %tmp2435, i64 1
+ %tmp2437 = getelementptr inbounds float, float* %tmp2436, i64 1
+ %tmp2438 = getelementptr inbounds float, float* %tmp2437, i64 1
+ %tmp2439 = getelementptr inbounds float, float* %tmp2438, i64 1
+ %tmp2440 = getelementptr inbounds float, float* %tmp2439, i64 1
+ %tmp2441 = getelementptr inbounds float, float* %tmp2440, i64 1
+ %tmp2442 = getelementptr inbounds float, float* %tmp2441, i64 1
+ %tmp2443 = getelementptr inbounds float, float* %tmp2442, i64 1
+ %tmp2444 = getelementptr inbounds float, float* %tmp2443, i64 1
+ %tmp2445 = getelementptr inbounds float, float* %tmp2444, i64 1
+ %tmp2446 = getelementptr inbounds float, float* %tmp2445, i64 1
+ %tmp2447 = getelementptr inbounds float, float* %tmp2446, i64 1
+ %tmp2448 = getelementptr inbounds float, float* %tmp2447, i64 1
+ %tmp2449 = getelementptr inbounds float, float* %tmp2448, i64 1
+ %tmp2450 = getelementptr inbounds float, float* %tmp2449, i64 1
+ %tmp2451 = getelementptr inbounds float, float* %tmp2450, i64 1
+ %tmp2452 = getelementptr inbounds float, float* %tmp2451, i64 1
+ %tmp2453 = getelementptr inbounds float, float* %tmp2452, i64 1
+ %tmp2454 = getelementptr inbounds float, float* %tmp2453, i64 1
+ %tmp2455 = getelementptr inbounds float, float* %tmp2454, i64 1
+ %tmp2456 = getelementptr inbounds float, float* %tmp2455, i64 1
+ %tmp2457 = getelementptr inbounds float, float* %tmp2456, i64 1
+ %tmp2458 = getelementptr inbounds float, float* %tmp2457, i64 1
+ %tmp2459 = getelementptr inbounds float, float* %tmp2458, i64 1
+ %tmp2460 = getelementptr inbounds float, float* %tmp2459, i64 1
+ %tmp2461 = getelementptr inbounds float, float* %tmp2460, i64 1
+ %tmp2462 = getelementptr inbounds float, float* %tmp2461, i64 1
+ %tmp2463 = getelementptr inbounds float, float* %tmp2462, i64 1
+ %tmp2464 = getelementptr inbounds float, float* %tmp2463, i64 1
+ %tmp2465 = getelementptr inbounds float, float* %tmp2464, i64 1
+ %tmp2466 = getelementptr inbounds float, float* %tmp2465, i64 1
+ %tmp2467 = getelementptr inbounds float, float* %tmp2466, i64 1
+ %tmp2468 = getelementptr inbounds float, float* %tmp2467, i64 1
+ %tmp2469 = getelementptr inbounds float, float* %tmp2468, i64 1
+ %tmp2470 = getelementptr inbounds float, float* %tmp2469, i64 1
+ %tmp2471 = getelementptr inbounds float, float* %tmp2470, i64 1
+ %tmp2472 = getelementptr inbounds float, float* %tmp2471, i64 1
+ %tmp2473 = getelementptr inbounds float, float* %tmp2472, i64 1
+ %tmp2474 = getelementptr inbounds float, float* %tmp2473, i64 1
+ %tmp2475 = getelementptr inbounds float, float* %tmp2474, i64 1
+ %tmp2476 = getelementptr inbounds float, float* %tmp2475, i64 1
+ %tmp2477 = getelementptr inbounds float, float* %tmp2476, i64 1
+ %tmp2478 = getelementptr inbounds float, float* %tmp2477, i64 1
+ %tmp2479 = getelementptr inbounds float, float* %tmp2478, i64 1
+ %tmp2480 = getelementptr inbounds float, float* %tmp2479, i64 1
+ %tmp2481 = getelementptr inbounds float, float* %tmp2480, i64 1
+ %tmp2482 = getelementptr inbounds float, float* %tmp2481, i64 1
+ %tmp2483 = getelementptr inbounds float, float* %tmp2482, i64 1
+ %tmp2484 = getelementptr inbounds float, float* %tmp2483, i64 1
+ %tmp2485 = getelementptr inbounds float, float* %tmp2484, i64 1
+ %tmp2486 = getelementptr inbounds float, float* %tmp2485, i64 1
+ %tmp2487 = getelementptr inbounds float, float* %tmp2486, i64 1
+ %tmp2488 = getelementptr inbounds float, float* %tmp2487, i64 1
+ %tmp2489 = getelementptr inbounds float, float* %tmp2488, i64 1
+ %tmp2490 = getelementptr inbounds float, float* %tmp2489, i64 1
+ %tmp2491 = getelementptr inbounds float, float* %tmp2490, i64 1
+ %tmp2492 = getelementptr inbounds float, float* %tmp2491, i64 1
+ %tmp2493 = getelementptr inbounds float, float* %tmp2492, i64 1
+ %tmp2494 = getelementptr inbounds float, float* %tmp2493, i64 1
+ %tmp2495 = getelementptr inbounds float, float* %tmp2494, i64 1
+ %tmp2496 = getelementptr inbounds float, float* %tmp2495, i64 1
+ %tmp2497 = getelementptr inbounds float, float* %tmp2496, i64 1
+ %tmp2498 = getelementptr inbounds float, float* %tmp2497, i64 1
+ %tmp2499 = getelementptr inbounds float, float* %tmp2498, i64 1
+ %tmp2500 = getelementptr inbounds float, float* %tmp2499, i64 1
+ %tmp2501 = getelementptr inbounds float, float* %tmp2500, i64 1
+ %tmp2502 = getelementptr inbounds float, float* %tmp2501, i64 1
+ %tmp2503 = getelementptr inbounds float, float* %tmp2502, i64 1
+ %tmp2504 = getelementptr inbounds float, float* %tmp2503, i64 1
+ %tmp2505 = getelementptr inbounds float, float* %tmp2504, i64 1
+ %tmp2506 = getelementptr inbounds float, float* %tmp2505, i64 1
+ %tmp2507 = getelementptr inbounds float, float* %tmp2506, i64 1
+ %tmp2508 = getelementptr inbounds float, float* %tmp2507, i64 1
+ %tmp2509 = getelementptr inbounds float, float* %tmp2508, i64 1
+ %tmp2510 = getelementptr inbounds float, float* %tmp2509, i64 1
+ %tmp2511 = getelementptr inbounds float, float* %tmp2510, i64 1
+ %tmp2512 = getelementptr inbounds float, float* %tmp2511, i64 1
+ %tmp2513 = getelementptr inbounds float, float* %tmp2512, i64 1
+ %tmp2514 = getelementptr inbounds float, float* %tmp2513, i64 1
+ %tmp2515 = getelementptr inbounds float, float* %tmp2514, i64 1
+ %tmp2516 = getelementptr inbounds float, float* %tmp2515, i64 1
+ %tmp2517 = getelementptr inbounds float, float* %tmp2516, i64 1
+ %tmp2518 = getelementptr inbounds float, float* %tmp2517, i64 1
+ %tmp2519 = getelementptr inbounds float, float* %tmp2518, i64 1
+ %tmp2520 = getelementptr inbounds float, float* %tmp2519, i64 1
+ %tmp2521 = getelementptr inbounds float, float* %tmp2520, i64 1
+ %tmp2522 = getelementptr inbounds float, float* %tmp2521, i64 1
+ %tmp2523 = getelementptr inbounds float, float* %tmp2522, i64 1
+ %tmp2524 = getelementptr inbounds float, float* %tmp2523, i64 1
+ %tmp2525 = getelementptr inbounds float, float* %tmp2524, i64 1
+ %tmp2526 = getelementptr inbounds float, float* %tmp2525, i64 1
+ %tmp2527 = getelementptr inbounds float, float* %tmp2526, i64 1
+ %tmp2528 = getelementptr inbounds float, float* %tmp2527, i64 1
+ %tmp2529 = getelementptr inbounds float, float* %tmp2528, i64 1
+ %tmp2530 = getelementptr inbounds float, float* %tmp2529, i64 1
+ %tmp2531 = getelementptr inbounds float, float* %tmp2530, i64 1
+ %tmp2532 = getelementptr inbounds float, float* %tmp2531, i64 1
+ %tmp2533 = getelementptr inbounds float, float* %tmp2532, i64 1
+ %tmp2534 = getelementptr inbounds float, float* %tmp2533, i64 1
+ %tmp2535 = getelementptr inbounds float, float* %tmp2534, i64 1
+ %tmp2536 = getelementptr inbounds float, float* %tmp2535, i64 1
+ %tmp2537 = getelementptr inbounds float, float* %tmp2536, i64 1
+ %tmp2538 = getelementptr inbounds float, float* %tmp2537, i64 1
+ %tmp2539 = getelementptr inbounds float, float* %tmp2538, i64 1
+ %tmp2540 = getelementptr inbounds float, float* %tmp2539, i64 1
+ %tmp2541 = getelementptr inbounds float, float* %tmp2540, i64 1
+ %tmp2542 = getelementptr inbounds float, float* %tmp2541, i64 1
+ %tmp2543 = getelementptr inbounds float, float* %tmp2542, i64 1
+ %tmp2544 = getelementptr inbounds float, float* %tmp2543, i64 1
+ %tmp2545 = getelementptr inbounds float, float* %tmp2544, i64 1
+ %tmp2546 = getelementptr inbounds float, float* %tmp2545, i64 1
+ %tmp2547 = getelementptr inbounds float, float* %tmp2546, i64 1
+ %tmp2548 = getelementptr inbounds float, float* %tmp2547, i64 1
+ %tmp2549 = getelementptr inbounds float, float* %tmp2548, i64 1
+ %tmp2550 = getelementptr inbounds float, float* %tmp2549, i64 1
+ %tmp2551 = getelementptr inbounds float, float* %tmp2550, i64 1
+ %tmp2552 = getelementptr inbounds float, float* %tmp2551, i64 1
+ %tmp2553 = getelementptr inbounds float, float* %tmp2552, i64 1
+ %tmp2554 = getelementptr inbounds float, float* %tmp2553, i64 1
+ %tmp2555 = getelementptr inbounds float, float* %tmp2554, i64 1
+ %tmp2556 = getelementptr inbounds float, float* %tmp2555, i64 1
+ %tmp2557 = getelementptr inbounds float, float* %tmp2556, i64 1
+ %tmp2558 = getelementptr inbounds float, float* %tmp2557, i64 1
+ %tmp2559 = getelementptr inbounds float, float* %tmp2558, i64 1
+ %tmp2560 = getelementptr inbounds float, float* %tmp2559, i64 1
+ %tmp2561 = getelementptr inbounds float, float* %tmp2560, i64 1
+ %tmp2562 = getelementptr inbounds float, float* %tmp2561, i64 1
+ %tmp2563 = getelementptr inbounds float, float* %tmp2562, i64 1
+ %tmp2564 = getelementptr inbounds float, float* %tmp2563, i64 1
+ %tmp2565 = getelementptr inbounds float, float* %tmp2564, i64 1
+ %tmp2566 = getelementptr inbounds float, float* %tmp2565, i64 1
+ %tmp2567 = getelementptr inbounds float, float* %tmp2566, i64 1
+ %tmp2568 = getelementptr inbounds float, float* %tmp2567, i64 1
+ %tmp2569 = getelementptr inbounds float, float* %tmp2568, i64 1
+ %tmp2570 = getelementptr inbounds float, float* %tmp2569, i64 1
+ %tmp2571 = getelementptr inbounds float, float* %tmp2570, i64 1
+ %tmp2572 = getelementptr inbounds float, float* %tmp2571, i64 1
+ %tmp2573 = getelementptr inbounds float, float* %tmp2572, i64 1
+ %tmp2574 = getelementptr inbounds float, float* %tmp2573, i64 1
+ %tmp2575 = getelementptr inbounds float, float* %tmp2574, i64 1
+ %tmp2576 = getelementptr inbounds float, float* %tmp2575, i64 1
+ %tmp2577 = getelementptr inbounds float, float* %tmp2576, i64 1
+ %tmp2578 = getelementptr inbounds float, float* %tmp2577, i64 1
+ %tmp2579 = getelementptr inbounds float, float* %tmp2578, i64 1
+ %tmp2580 = getelementptr inbounds float, float* %tmp2579, i64 1
+ %tmp2581 = getelementptr inbounds float, float* %tmp2580, i64 1
+ %tmp2582 = getelementptr inbounds float, float* %tmp2581, i64 1
+ %tmp2583 = getelementptr inbounds float, float* %tmp2582, i64 1
+ %tmp2584 = getelementptr inbounds float, float* %tmp2583, i64 1
+ %tmp2585 = getelementptr inbounds float, float* %tmp2584, i64 1
+ %tmp2586 = getelementptr inbounds float, float* %tmp2585, i64 1
+ %tmp2587 = getelementptr inbounds float, float* %tmp2586, i64 1
+ %tmp2588 = getelementptr inbounds float, float* %tmp2587, i64 1
+ %tmp2589 = getelementptr inbounds float, float* %tmp2588, i64 1
+ %tmp2590 = getelementptr inbounds float, float* %tmp2589, i64 1
+ %tmp2591 = getelementptr inbounds float, float* %tmp2590, i64 1
+ %tmp2592 = getelementptr inbounds float, float* %tmp2591, i64 1
+ %tmp2593 = getelementptr inbounds float, float* %tmp2592, i64 1
+ %tmp2594 = getelementptr inbounds float, float* %tmp2593, i64 1
+ %tmp2595 = getelementptr inbounds float, float* %tmp2594, i64 1
+ %tmp2596 = getelementptr inbounds float, float* %tmp2595, i64 1
+ %tmp2597 = getelementptr inbounds float, float* %tmp2596, i64 1
+ %tmp2598 = getelementptr inbounds float, float* %tmp2597, i64 1
+ %tmp2599 = getelementptr inbounds float, float* %tmp2598, i64 1
+ %tmp2600 = getelementptr inbounds float, float* %tmp2599, i64 1
+ %tmp2601 = getelementptr inbounds float, float* %tmp2600, i64 1
+ %tmp2602 = getelementptr inbounds float, float* %tmp2601, i64 1
+ %tmp2603 = getelementptr inbounds float, float* %tmp2602, i64 1
+ %tmp2604 = getelementptr inbounds float, float* %tmp2603, i64 1
+ %tmp2605 = getelementptr inbounds float, float* %tmp2604, i64 1
+ %tmp2606 = getelementptr inbounds float, float* %tmp2605, i64 1
+ %tmp2607 = getelementptr inbounds float, float* %tmp2606, i64 1
+ %tmp2608 = getelementptr inbounds float, float* %tmp2607, i64 1
+ %tmp2609 = getelementptr inbounds float, float* %tmp2608, i64 1
+ %tmp2610 = getelementptr inbounds float, float* %tmp2609, i64 1
+ %tmp2611 = getelementptr inbounds float, float* %tmp2610, i64 1
+ %tmp2612 = getelementptr inbounds float, float* %tmp2611, i64 1
+ %tmp2613 = getelementptr inbounds float, float* %tmp2612, i64 1
+ %tmp2614 = getelementptr inbounds float, float* %tmp2613, i64 1
+ %tmp2615 = getelementptr inbounds float, float* %tmp2614, i64 1
+ %tmp2616 = getelementptr inbounds float, float* %tmp2615, i64 1
+ %tmp2617 = getelementptr inbounds float, float* %tmp2616, i64 1
+ %tmp2618 = getelementptr inbounds float, float* %tmp2617, i64 1
+ %tmp2619 = getelementptr inbounds float, float* %tmp2618, i64 1
+ %tmp2620 = getelementptr inbounds float, float* %tmp2619, i64 1
+ %tmp2621 = getelementptr inbounds float, float* %tmp2620, i64 1
+ %tmp2622 = getelementptr inbounds float, float* %tmp2621, i64 1
+ %tmp2623 = getelementptr inbounds float, float* %tmp2622, i64 1
+ %tmp2624 = getelementptr inbounds float, float* %tmp2623, i64 1
+ %tmp2625 = getelementptr inbounds float, float* %tmp2624, i64 1
+ %tmp2626 = getelementptr inbounds float, float* %tmp2625, i64 1
+ %tmp2627 = getelementptr inbounds float, float* %tmp2626, i64 1
+ %tmp2628 = getelementptr inbounds float, float* %tmp2627, i64 1
+ %tmp2629 = getelementptr inbounds float, float* %tmp2628, i64 1
+ %tmp2630 = getelementptr inbounds float, float* %tmp2629, i64 1
+ %tmp2631 = getelementptr inbounds float, float* %tmp2630, i64 1
+ %tmp2632 = getelementptr inbounds float, float* %tmp2631, i64 1
+ %tmp2633 = getelementptr inbounds float, float* %tmp2632, i64 1
+ %tmp2634 = getelementptr inbounds float, float* %tmp2633, i64 1
+ %tmp2635 = getelementptr inbounds float, float* %tmp2634, i64 1
+ %tmp2636 = getelementptr inbounds float, float* %tmp2635, i64 1
+ %tmp2637 = getelementptr inbounds float, float* %tmp2636, i64 1
+ %tmp2638 = getelementptr inbounds float, float* %tmp2637, i64 1
+ %tmp2639 = getelementptr inbounds float, float* %tmp2638, i64 1
+ %tmp2640 = getelementptr inbounds float, float* %tmp2639, i64 1
+ %tmp2641 = getelementptr inbounds float, float* %tmp2640, i64 1
+ %tmp2642 = getelementptr inbounds float, float* %tmp2641, i64 1
+ %tmp2643 = getelementptr inbounds float, float* %tmp2642, i64 1
+ %tmp2644 = getelementptr inbounds float, float* %tmp2643, i64 1
+ %tmp2645 = getelementptr inbounds float, float* %tmp2644, i64 1
+ %tmp2646 = getelementptr inbounds float, float* %tmp2645, i64 1
+ %tmp2647 = getelementptr inbounds float, float* %tmp2646, i64 1
+ %tmp2648 = getelementptr inbounds float, float* %tmp2647, i64 1
+ %tmp2649 = getelementptr inbounds float, float* %tmp2648, i64 1
+ %tmp2650 = getelementptr inbounds float, float* %tmp2649, i64 1
+ %tmp2651 = getelementptr inbounds float, float* %tmp2650, i64 1
+ %tmp2652 = getelementptr inbounds float, float* %tmp2651, i64 1
+ %tmp2653 = getelementptr inbounds float, float* %tmp2652, i64 1
+ %tmp2654 = getelementptr inbounds float, float* %tmp2653, i64 1
+ %tmp2655 = getelementptr inbounds float, float* %tmp2654, i64 1
+ %tmp2656 = getelementptr inbounds float, float* %tmp2655, i64 1
+ %tmp2657 = getelementptr inbounds float, float* %tmp2656, i64 1
+ %tmp2658 = getelementptr inbounds float, float* %tmp2657, i64 1
+ %tmp2659 = getelementptr inbounds float, float* %tmp2658, i64 1
+ %tmp2660 = getelementptr inbounds float, float* %tmp2659, i64 1
+ %tmp2661 = getelementptr inbounds float, float* %tmp2660, i64 1
+ %tmp2662 = getelementptr inbounds float, float* %tmp2661, i64 1
+ %tmp2663 = getelementptr inbounds float, float* %tmp2662, i64 1
+ %tmp2664 = getelementptr inbounds float, float* %tmp2663, i64 1
+ %tmp2665 = getelementptr inbounds float, float* %tmp2664, i64 1
+ %tmp2666 = getelementptr inbounds float, float* %tmp2665, i64 1
+ %tmp2667 = getelementptr inbounds float, float* %tmp2666, i64 1
+ %tmp2668 = getelementptr inbounds float, float* %tmp2667, i64 1
+ %tmp2669 = getelementptr inbounds float, float* %tmp2668, i64 1
+ %tmp2670 = getelementptr inbounds float, float* %tmp2669, i64 1
+ %tmp2671 = getelementptr inbounds float, float* %tmp2670, i64 1
+ %tmp2672 = getelementptr inbounds float, float* %tmp2671, i64 1
+ %tmp2673 = getelementptr inbounds float, float* %tmp2672, i64 1
+ %tmp2674 = getelementptr inbounds float, float* %tmp2673, i64 1
+ %tmp2675 = getelementptr inbounds float, float* %tmp2674, i64 1
+ %tmp2676 = getelementptr inbounds float, float* %tmp2675, i64 1
+ %tmp2677 = getelementptr inbounds float, float* %tmp2676, i64 1
+ %tmp2678 = getelementptr inbounds float, float* %tmp2677, i64 1
+ %tmp2679 = getelementptr inbounds float, float* %tmp2678, i64 1
+ %tmp2680 = getelementptr inbounds float, float* %tmp2679, i64 1
+ %tmp2681 = getelementptr inbounds float, float* %tmp2680, i64 1
+ %tmp2682 = getelementptr inbounds float, float* %tmp2681, i64 1
+ %tmp2683 = getelementptr inbounds float, float* %tmp2682, i64 1
+ %tmp2684 = getelementptr inbounds float, float* %tmp2683, i64 1
+ %tmp2685 = getelementptr inbounds float, float* %tmp2684, i64 1
+ %tmp2686 = getelementptr inbounds float, float* %tmp2685, i64 1
+ %tmp2687 = getelementptr inbounds float, float* %tmp2686, i64 1
+ %tmp2688 = getelementptr inbounds float, float* %tmp2687, i64 1
+ %tmp2689 = getelementptr inbounds float, float* %tmp2688, i64 1
+ %tmp2690 = getelementptr inbounds float, float* %tmp2689, i64 1
+ %tmp2691 = getelementptr inbounds float, float* %tmp2690, i64 1
+ %tmp2692 = getelementptr inbounds float, float* %tmp2691, i64 1
+ %tmp2693 = getelementptr inbounds float, float* %tmp2692, i64 1
+ %tmp2694 = getelementptr inbounds float, float* %tmp2693, i64 1
+ %tmp2695 = getelementptr inbounds float, float* %tmp2694, i64 1
+ %tmp2696 = getelementptr inbounds float, float* %tmp2695, i64 1
+ %tmp2697 = getelementptr inbounds float, float* %tmp2696, i64 1
+ %tmp2698 = getelementptr inbounds float, float* %tmp2697, i64 1
+ %tmp2699 = getelementptr inbounds float, float* %tmp2698, i64 1
+ %tmp2700 = getelementptr inbounds float, float* %tmp2699, i64 1
+ %tmp2701 = getelementptr inbounds float, float* %tmp2700, i64 1
+ %tmp2702 = getelementptr inbounds float, float* %tmp2701, i64 1
+ %tmp2703 = getelementptr inbounds float, float* %tmp2702, i64 1
+ %tmp2704 = getelementptr inbounds float, float* %tmp2703, i64 1
+ %tmp2705 = getelementptr inbounds float, float* %tmp2704, i64 1
+ %tmp2706 = getelementptr inbounds float, float* %tmp2705, i64 1
+ %tmp2707 = getelementptr inbounds float, float* %tmp2706, i64 1
+ %tmp2708 = getelementptr inbounds float, float* %tmp2707, i64 1
+ %tmp2709 = getelementptr inbounds float, float* %tmp2708, i64 1
+ %tmp2710 = getelementptr inbounds float, float* %tmp2709, i64 1
+ %tmp2711 = getelementptr inbounds float, float* %tmp2710, i64 1
+ %tmp2712 = getelementptr inbounds float, float* %tmp2711, i64 1
+ %tmp2713 = getelementptr inbounds float, float* %tmp2712, i64 1
+ %tmp2714 = getelementptr inbounds float, float* %tmp2713, i64 1
+ %tmp2715 = getelementptr inbounds float, float* %tmp2714, i64 1
+ %tmp2716 = getelementptr inbounds float, float* %tmp2715, i64 1
+ %tmp2717 = getelementptr inbounds float, float* %tmp2716, i64 1
+ %tmp2718 = getelementptr inbounds float, float* %tmp2717, i64 1
+ %tmp2719 = getelementptr inbounds float, float* %tmp2718, i64 1
+ %tmp2720 = getelementptr inbounds float, float* %tmp2719, i64 1
+ %tmp2721 = getelementptr inbounds float, float* %tmp2720, i64 1
+ %tmp2722 = getelementptr inbounds float, float* %tmp2721, i64 1
+ %tmp2723 = getelementptr inbounds float, float* %tmp2722, i64 1
+ %tmp2724 = getelementptr inbounds float, float* %tmp2723, i64 1
+ %tmp2725 = getelementptr inbounds float, float* %tmp2724, i64 1
+ %tmp2726 = getelementptr inbounds float, float* %tmp2725, i64 1
+ %tmp2727 = getelementptr inbounds float, float* %tmp2726, i64 1
+ %tmp2728 = getelementptr inbounds float, float* %tmp2727, i64 1
+ %tmp2729 = getelementptr inbounds float, float* %tmp2728, i64 1
+ %tmp2730 = getelementptr inbounds float, float* %tmp2729, i64 1
+ %tmp2731 = getelementptr inbounds float, float* %tmp2730, i64 1
+ %tmp2732 = getelementptr inbounds float, float* %tmp2731, i64 1
+ %tmp2733 = getelementptr inbounds float, float* %tmp2732, i64 1
+ %tmp2734 = getelementptr inbounds float, float* %tmp2733, i64 1
+ %tmp2735 = getelementptr inbounds float, float* %tmp2734, i64 1
+ %tmp2736 = getelementptr inbounds float, float* %tmp2735, i64 1
+ %tmp2737 = getelementptr inbounds float, float* %tmp2736, i64 1
+ %tmp2738 = getelementptr inbounds float, float* %tmp2737, i64 1
+ %tmp2739 = getelementptr inbounds float, float* %tmp2738, i64 1
+ %tmp2740 = getelementptr inbounds float, float* %tmp2739, i64 1
+ %tmp2741 = getelementptr inbounds float, float* %tmp2740, i64 1
+ %tmp2742 = getelementptr inbounds float, float* %tmp2741, i64 1
+ %tmp2743 = getelementptr inbounds float, float* %tmp2742, i64 1
+ %tmp2744 = getelementptr inbounds float, float* %tmp2743, i64 1
+ %tmp2745 = getelementptr inbounds float, float* %tmp2744, i64 1
+ %tmp2746 = getelementptr inbounds float, float* %tmp2745, i64 1
+ %tmp2747 = getelementptr inbounds float, float* %tmp2746, i64 1
+ %tmp2748 = getelementptr inbounds float, float* %tmp2747, i64 1
+ %tmp2749 = getelementptr inbounds float, float* %tmp2748, i64 1
+ %tmp2750 = getelementptr inbounds float, float* %tmp2749, i64 1
+ %tmp2751 = getelementptr inbounds float, float* %tmp2750, i64 1
+ %tmp2752 = getelementptr inbounds float, float* %tmp2751, i64 1
+ %tmp2753 = getelementptr inbounds float, float* %tmp2752, i64 1
+ %tmp2754 = getelementptr inbounds float, float* %tmp2753, i64 1
+ %tmp2755 = getelementptr inbounds float, float* %tmp2754, i64 1
+ %tmp2756 = getelementptr inbounds float, float* %tmp2755, i64 1
+ %tmp2757 = getelementptr inbounds float, float* %tmp2756, i64 1
+ %tmp2758 = getelementptr inbounds float, float* %tmp2757, i64 1
+ %tmp2759 = getelementptr inbounds float, float* %tmp2758, i64 1
+ %tmp2760 = getelementptr inbounds float, float* %tmp2759, i64 1
+ %tmp2761 = getelementptr inbounds float, float* %tmp2760, i64 1
+ %tmp2762 = getelementptr inbounds float, float* %tmp2761, i64 1
+ %tmp2763 = getelementptr inbounds float, float* %tmp2762, i64 1
+ %tmp2764 = getelementptr inbounds float, float* %tmp2763, i64 1
+ %tmp2765 = getelementptr inbounds float, float* %tmp2764, i64 1
+ %tmp2766 = getelementptr inbounds float, float* %tmp2765, i64 1
+ %tmp2767 = getelementptr inbounds float, float* %tmp2766, i64 1
+ %tmp2768 = getelementptr inbounds float, float* %tmp2767, i64 1
+ %tmp2769 = getelementptr inbounds float, float* %tmp2768, i64 1
+ %tmp2770 = getelementptr inbounds float, float* %tmp2769, i64 1
+ %tmp2771 = getelementptr inbounds float, float* %tmp2770, i64 1
+ %tmp2772 = getelementptr inbounds float, float* %tmp2771, i64 1
+ %tmp2773 = getelementptr inbounds float, float* %tmp2772, i64 1
+ %tmp2774 = getelementptr inbounds float, float* %tmp2773, i64 1
+ %tmp2775 = getelementptr inbounds float, float* %tmp2774, i64 1
+ %tmp2776 = getelementptr inbounds float, float* %tmp2775, i64 1
+ %tmp2777 = getelementptr inbounds float, float* %tmp2776, i64 1
+ %tmp2778 = getelementptr inbounds float, float* %tmp2777, i64 1
+ %tmp2779 = getelementptr inbounds float, float* %tmp2778, i64 1
+ %tmp2780 = getelementptr inbounds float, float* %tmp2779, i64 1
+ %tmp2781 = getelementptr inbounds float, float* %tmp2780, i64 1
+ %tmp2782 = getelementptr inbounds float, float* %tmp2781, i64 1
+ %tmp2783 = getelementptr inbounds float, float* %tmp2782, i64 1
+ %tmp2784 = getelementptr inbounds float, float* %tmp2783, i64 1
+ %tmp2785 = getelementptr inbounds float, float* %tmp2784, i64 1
+ %tmp2786 = getelementptr inbounds float, float* %tmp2785, i64 1
+ %tmp2787 = getelementptr inbounds float, float* %tmp2786, i64 1
+ %tmp2788 = getelementptr inbounds float, float* %tmp2787, i64 1
+ %tmp2789 = getelementptr inbounds float, float* %tmp2788, i64 1
+ %tmp2790 = getelementptr inbounds float, float* %tmp2789, i64 1
+ %tmp2791 = getelementptr inbounds float, float* %tmp2790, i64 1
+ %tmp2792 = getelementptr inbounds float, float* %tmp2791, i64 1
+ %tmp2793 = getelementptr inbounds float, float* %tmp2792, i64 1
+ %tmp2794 = getelementptr inbounds float, float* %tmp2793, i64 1
+ %tmp2795 = getelementptr inbounds float, float* %tmp2794, i64 1
+ %tmp2796 = getelementptr inbounds float, float* %tmp2795, i64 1
+ %tmp2797 = getelementptr inbounds float, float* %tmp2796, i64 1
+ %tmp2798 = getelementptr inbounds float, float* %tmp2797, i64 1
+ %tmp2799 = getelementptr inbounds float, float* %tmp2798, i64 1
+ %tmp2800 = getelementptr inbounds float, float* %tmp2799, i64 1
+ %tmp2801 = getelementptr inbounds float, float* %tmp2800, i64 1
+ %tmp2802 = getelementptr inbounds float, float* %tmp2801, i64 1
+ %tmp2803 = getelementptr inbounds float, float* %tmp2802, i64 1
+ %tmp2804 = getelementptr inbounds float, float* %tmp2803, i64 1
+ %tmp2805 = getelementptr inbounds float, float* %tmp2804, i64 1
+ %tmp2806 = getelementptr inbounds float, float* %tmp2805, i64 1
+ %tmp2807 = getelementptr inbounds float, float* %tmp2806, i64 1
+ %tmp2808 = getelementptr inbounds float, float* %tmp2807, i64 1
+ %tmp2809 = getelementptr inbounds float, float* %tmp2808, i64 1
+ %tmp2810 = getelementptr inbounds float, float* %tmp2809, i64 1
+ %tmp2811 = getelementptr inbounds float, float* %tmp2810, i64 1
+ %tmp2812 = getelementptr inbounds float, float* %tmp2811, i64 1
+ %tmp2813 = getelementptr inbounds float, float* %tmp2812, i64 1
+ %tmp2814 = getelementptr inbounds float, float* %tmp2813, i64 1
+ %tmp2815 = getelementptr inbounds float, float* %tmp2814, i64 1
+ %tmp2816 = getelementptr inbounds float, float* %tmp2815, i64 1
+ %tmp2817 = getelementptr inbounds float, float* %tmp2816, i64 1
+ %tmp2818 = getelementptr inbounds float, float* %tmp2817, i64 1
+ %tmp2819 = getelementptr inbounds float, float* %tmp2818, i64 1
+ %tmp2820 = getelementptr inbounds float, float* %tmp2819, i64 1
+ %tmp2821 = getelementptr inbounds float, float* %tmp2820, i64 1
+ %tmp2822 = getelementptr inbounds float, float* %tmp2821, i64 1
+ %tmp2823 = getelementptr inbounds float, float* %tmp2822, i64 1
+ %tmp2824 = getelementptr inbounds float, float* %tmp2823, i64 1
+ %tmp2825 = getelementptr inbounds float, float* %tmp2824, i64 1
+ %tmp2826 = getelementptr inbounds float, float* %tmp2825, i64 1
+ %tmp2827 = getelementptr inbounds float, float* %tmp2826, i64 1
+ %tmp2828 = getelementptr inbounds float, float* %tmp2827, i64 1
+ %tmp2829 = getelementptr inbounds float, float* %tmp2828, i64 1
+ %tmp2830 = getelementptr inbounds float, float* %tmp2829, i64 1
+ %tmp2831 = getelementptr inbounds float, float* %tmp2830, i64 1
+ %tmp2832 = getelementptr inbounds float, float* %tmp2831, i64 1
+ %tmp2833 = getelementptr inbounds float, float* %tmp2832, i64 1
+ %tmp2834 = getelementptr inbounds float, float* %tmp2833, i64 1
+ %tmp2835 = getelementptr inbounds float, float* %tmp2834, i64 1
+ %tmp2836 = getelementptr inbounds float, float* %tmp2835, i64 1
+ %tmp2837 = getelementptr inbounds float, float* %tmp2836, i64 1
+ %tmp2838 = getelementptr inbounds float, float* %tmp2837, i64 1
+ %tmp2839 = getelementptr inbounds float, float* %tmp2838, i64 1
+ %tmp2840 = getelementptr inbounds float, float* %tmp2839, i64 1
+ %tmp2841 = getelementptr inbounds float, float* %tmp2840, i64 1
+ %tmp2842 = getelementptr inbounds float, float* %tmp2841, i64 1
+ %tmp2843 = getelementptr inbounds float, float* %tmp2842, i64 1
+ %tmp2844 = getelementptr inbounds float, float* %tmp2843, i64 1
+ %tmp2845 = getelementptr inbounds float, float* %tmp2844, i64 1
+ %tmp2846 = getelementptr inbounds float, float* %tmp2845, i64 1
+ %tmp2847 = getelementptr inbounds float, float* %tmp2846, i64 1
+ %tmp2848 = getelementptr inbounds float, float* %tmp2847, i64 1
+ %tmp2849 = getelementptr inbounds float, float* %tmp2848, i64 1
+ %tmp2850 = getelementptr inbounds float, float* %tmp2849, i64 1
+ %tmp2851 = getelementptr inbounds float, float* %tmp2850, i64 1
+ %tmp2852 = getelementptr inbounds float, float* %tmp2851, i64 1
+ %tmp2853 = getelementptr inbounds float, float* %tmp2852, i64 1
+ %tmp2854 = getelementptr inbounds float, float* %tmp2853, i64 1
+ %tmp2855 = getelementptr inbounds float, float* %tmp2854, i64 1
+ %tmp2856 = getelementptr inbounds float, float* %tmp2855, i64 1
+ %tmp2857 = getelementptr inbounds float, float* %tmp2856, i64 1
+ %tmp2858 = getelementptr inbounds float, float* %tmp2857, i64 1
+ %tmp2859 = getelementptr inbounds float, float* %tmp2858, i64 1
+ %tmp2860 = getelementptr inbounds float, float* %tmp2859, i64 1
+ %tmp2861 = getelementptr inbounds float, float* %tmp2860, i64 1
+ %tmp2862 = getelementptr inbounds float, float* %tmp2861, i64 1
+ %tmp2863 = getelementptr inbounds float, float* %tmp2862, i64 1
+ %tmp2864 = getelementptr inbounds float, float* %tmp2863, i64 1
+ %tmp2865 = getelementptr inbounds float, float* %tmp2864, i64 1
+ %tmp2866 = getelementptr inbounds float, float* %tmp2865, i64 1
+ %tmp2867 = getelementptr inbounds float, float* %tmp2866, i64 1
+ %tmp2868 = getelementptr inbounds float, float* %tmp2867, i64 1
+ %tmp2869 = getelementptr inbounds float, float* %tmp2868, i64 1
+ %tmp2870 = getelementptr inbounds float, float* %tmp2869, i64 1
+ %tmp2871 = getelementptr inbounds float, float* %tmp2870, i64 1
+ %tmp2872 = getelementptr inbounds float, float* %tmp2871, i64 1
+ %tmp2873 = getelementptr inbounds float, float* %tmp2872, i64 1
+ %tmp2874 = getelementptr inbounds float, float* %tmp2873, i64 1
+ %tmp2875 = getelementptr inbounds float, float* %tmp2874, i64 1
+ %tmp2876 = getelementptr inbounds float, float* %tmp2875, i64 1
+ %tmp2877 = getelementptr inbounds float, float* %tmp2876, i64 1
+ %tmp2878 = getelementptr inbounds float, float* %tmp2877, i64 1
+ %tmp2879 = getelementptr inbounds float, float* %tmp2878, i64 1
+ %tmp2880 = getelementptr inbounds float, float* %tmp2879, i64 1
+ %tmp2881 = getelementptr inbounds float, float* %tmp2880, i64 1
+ %tmp2882 = getelementptr inbounds float, float* %tmp2881, i64 1
+ %tmp2883 = getelementptr inbounds float, float* %tmp2882, i64 1
+ %tmp2884 = getelementptr inbounds float, float* %tmp2883, i64 1
+ %tmp2885 = getelementptr inbounds float, float* %tmp2884, i64 1
+ %tmp2886 = getelementptr inbounds float, float* %tmp2885, i64 1
+ %tmp2887 = getelementptr inbounds float, float* %tmp2886, i64 1
+ %tmp2888 = getelementptr inbounds float, float* %tmp2887, i64 1
+ %tmp2889 = getelementptr inbounds float, float* %tmp2888, i64 1
+ %tmp2890 = getelementptr inbounds float, float* %tmp2889, i64 1
+ %tmp2891 = getelementptr inbounds float, float* %tmp2890, i64 1
+ %tmp2892 = getelementptr inbounds float, float* %tmp2891, i64 1
+ %tmp2893 = getelementptr inbounds float, float* %tmp2892, i64 1
+ %tmp2894 = getelementptr inbounds float, float* %tmp2893, i64 1
+ %tmp2895 = getelementptr inbounds float, float* %tmp2894, i64 1
+ %tmp2896 = getelementptr inbounds float, float* %tmp2895, i64 1
+ %tmp2897 = getelementptr inbounds float, float* %tmp2896, i64 1
+ %tmp2898 = getelementptr inbounds float, float* %tmp2897, i64 1
+ %tmp2899 = getelementptr inbounds float, float* %tmp2898, i64 1
+ %tmp2900 = getelementptr inbounds float, float* %tmp2899, i64 1
+ %tmp2901 = getelementptr inbounds float, float* %tmp2900, i64 1
+ %tmp2902 = getelementptr inbounds float, float* %tmp2901, i64 1
+ %tmp2903 = getelementptr inbounds float, float* %tmp2902, i64 1
+ %tmp2904 = getelementptr inbounds float, float* %tmp2903, i64 1
+ %tmp2905 = getelementptr inbounds float, float* %tmp2904, i64 1
+ %tmp2906 = getelementptr inbounds float, float* %tmp2905, i64 1
+ %tmp2907 = getelementptr inbounds float, float* %tmp2906, i64 1
+ %tmp2908 = getelementptr inbounds float, float* %tmp2907, i64 1
+ %tmp2909 = getelementptr inbounds float, float* %tmp2908, i64 1
+ %tmp2910 = getelementptr inbounds float, float* %tmp2909, i64 1
+ %tmp2911 = getelementptr inbounds float, float* %tmp2910, i64 1
+ %tmp2912 = getelementptr inbounds float, float* %tmp2911, i64 1
+ %tmp2913 = getelementptr inbounds float, float* %tmp2912, i64 1
+ %tmp2914 = getelementptr inbounds float, float* %tmp2913, i64 1
+ %tmp2915 = getelementptr inbounds float, float* %tmp2914, i64 1
+ %tmp2916 = getelementptr inbounds float, float* %tmp2915, i64 1
+ %tmp2917 = getelementptr inbounds float, float* %tmp2916, i64 1
+ %tmp2918 = getelementptr inbounds float, float* %tmp2917, i64 1
+ %tmp2919 = getelementptr inbounds float, float* %tmp2918, i64 1
+ %tmp2920 = getelementptr inbounds float, float* %tmp2919, i64 1
+ %tmp2921 = getelementptr inbounds float, float* %tmp2920, i64 1
+ %tmp2922 = getelementptr inbounds float, float* %tmp2921, i64 1
+ %tmp2923 = getelementptr inbounds float, float* %tmp2922, i64 1
+ %tmp2924 = getelementptr inbounds float, float* %tmp2923, i64 1
+ %tmp2925 = getelementptr inbounds float, float* %tmp2924, i64 1
+ %tmp2926 = getelementptr inbounds float, float* %tmp2925, i64 1
+ %tmp2927 = getelementptr inbounds float, float* %tmp2926, i64 1
+ %tmp2928 = getelementptr inbounds float, float* %tmp2927, i64 1
+ %tmp2929 = getelementptr inbounds float, float* %tmp2928, i64 1
+ %tmp2930 = getelementptr inbounds float, float* %tmp2929, i64 1
+ %tmp2931 = getelementptr inbounds float, float* %tmp2930, i64 1
+ %tmp2932 = getelementptr inbounds float, float* %tmp2931, i64 1
+ %tmp2933 = getelementptr inbounds float, float* %tmp2932, i64 1
+ %tmp2934 = getelementptr inbounds float, float* %tmp2933, i64 1
+ %tmp2935 = getelementptr inbounds float, float* %tmp2934, i64 1
+ %tmp2936 = getelementptr inbounds float, float* %tmp2935, i64 1
+ %tmp2937 = getelementptr inbounds float, float* %tmp2936, i64 1
+ %tmp2938 = getelementptr inbounds float, float* %tmp2937, i64 1
+ %tmp2939 = getelementptr inbounds float, float* %tmp2938, i64 1
+ %tmp2940 = getelementptr inbounds float, float* %tmp2939, i64 1
+ %tmp2941 = getelementptr inbounds float, float* %tmp2940, i64 1
+ %tmp2942 = getelementptr inbounds float, float* %tmp2941, i64 1
+ %tmp2943 = getelementptr inbounds float, float* %tmp2942, i64 1
+ %tmp2944 = getelementptr inbounds float, float* %tmp2943, i64 1
+ %tmp2945 = getelementptr inbounds float, float* %tmp2944, i64 1
+ %tmp2946 = getelementptr inbounds float, float* %tmp2945, i64 1
+ %tmp2947 = getelementptr inbounds float, float* %tmp2946, i64 1
+ %tmp2948 = getelementptr inbounds float, float* %tmp2947, i64 1
+ %tmp2949 = getelementptr inbounds float, float* %tmp2948, i64 1
+ %tmp2950 = getelementptr inbounds float, float* %tmp2949, i64 1
+ %tmp2951 = getelementptr inbounds float, float* %tmp2950, i64 1
+ %tmp2952 = getelementptr inbounds float, float* %tmp2951, i64 1
+ %tmp2953 = getelementptr inbounds float, float* %tmp2952, i64 1
+ %tmp2954 = getelementptr inbounds float, float* %tmp2953, i64 1
+ %tmp2955 = getelementptr inbounds float, float* %tmp2954, i64 1
+ %tmp2956 = getelementptr inbounds float, float* %tmp2955, i64 1
+ %tmp2957 = getelementptr inbounds float, float* %tmp2956, i64 1
+ %tmp2958 = getelementptr inbounds float, float* %tmp2957, i64 1
+ %tmp2959 = getelementptr inbounds float, float* %tmp2958, i64 1
+ %tmp2960 = getelementptr inbounds float, float* %tmp2959, i64 1
+ %tmp2961 = getelementptr inbounds float, float* %tmp2960, i64 1
+ %tmp2962 = getelementptr inbounds float, float* %tmp2961, i64 1
+ %tmp2963 = getelementptr inbounds float, float* %tmp2962, i64 1
+ %tmp2964 = getelementptr inbounds float, float* %tmp2963, i64 1
+ %tmp2965 = getelementptr inbounds float, float* %tmp2964, i64 1
+ %tmp2966 = getelementptr inbounds float, float* %tmp2965, i64 1
+ %tmp2967 = getelementptr inbounds float, float* %tmp2966, i64 1
+ %tmp2968 = getelementptr inbounds float, float* %tmp2967, i64 1
+ %tmp2969 = getelementptr inbounds float, float* %tmp2968, i64 1
+ %tmp2970 = getelementptr inbounds float, float* %tmp2969, i64 1
+ %tmp2971 = getelementptr inbounds float, float* %tmp2970, i64 1
+ %tmp2972 = getelementptr inbounds float, float* %tmp2971, i64 1
+ %tmp2973 = getelementptr inbounds float, float* %tmp2972, i64 1
+ %tmp2974 = getelementptr inbounds float, float* %tmp2973, i64 1
+ %tmp2975 = getelementptr inbounds float, float* %tmp2974, i64 1
+ %tmp2976 = getelementptr inbounds float, float* %tmp2975, i64 1
+ %tmp2977 = getelementptr inbounds float, float* %tmp2976, i64 1
+ %tmp2978 = getelementptr inbounds float, float* %tmp2977, i64 1
+ %tmp2979 = getelementptr inbounds float, float* %tmp2978, i64 1
+ %tmp2980 = getelementptr inbounds float, float* %tmp2979, i64 1
+ %tmp2981 = getelementptr inbounds float, float* %tmp2980, i64 1
+ %tmp2982 = getelementptr inbounds float, float* %tmp2981, i64 1
+ %tmp2983 = getelementptr inbounds float, float* %tmp2982, i64 1
+ %tmp2984 = getelementptr inbounds float, float* %tmp2983, i64 1
+ %tmp2985 = getelementptr inbounds float, float* %tmp2984, i64 1
+ %tmp2986 = getelementptr inbounds float, float* %tmp2985, i64 1
+ %tmp2987 = getelementptr inbounds float, float* %tmp2986, i64 1
+ %tmp2988 = getelementptr inbounds float, float* %tmp2987, i64 1
+ %tmp2989 = getelementptr inbounds float, float* %tmp2988, i64 1
+ %tmp2990 = getelementptr inbounds float, float* %tmp2989, i64 1
+ %tmp2991 = getelementptr inbounds float, float* %tmp2990, i64 1
+ %tmp2992 = getelementptr inbounds float, float* %tmp2991, i64 1
+ %tmp2993 = getelementptr inbounds float, float* %tmp2992, i64 1
+ %tmp2994 = getelementptr inbounds float, float* %tmp2993, i64 1
+ %tmp2995 = getelementptr inbounds float, float* %tmp2994, i64 1
+ %tmp2996 = getelementptr inbounds float, float* %tmp2995, i64 1
+ %tmp2997 = getelementptr inbounds float, float* %tmp2996, i64 1
+ %tmp2998 = getelementptr inbounds float, float* %tmp2997, i64 1
+ %tmp2999 = getelementptr inbounds float, float* %tmp2998, i64 1
+ %tmp3000 = getelementptr inbounds float, float* %tmp2999, i64 1
+ %tmp3001 = getelementptr inbounds float, float* %tmp3000, i64 1
+ %tmp3002 = getelementptr inbounds float, float* %tmp3001, i64 1
+ %tmp3003 = getelementptr inbounds float, float* %tmp3002, i64 1
+ %tmp3004 = getelementptr inbounds float, float* %tmp3003, i64 1
+ %tmp3005 = getelementptr inbounds float, float* %tmp3004, i64 1
+ %tmp3006 = getelementptr inbounds float, float* %tmp3005, i64 1
+ %tmp3007 = getelementptr inbounds float, float* %tmp3006, i64 1
+ %tmp3008 = getelementptr inbounds float, float* %tmp3007, i64 1
+ %tmp3009 = getelementptr inbounds float, float* %tmp3008, i64 1
+ %tmp3010 = getelementptr inbounds float, float* %tmp3009, i64 1
+ %tmp3011 = getelementptr inbounds float, float* %tmp3010, i64 1
+ %tmp3012 = getelementptr inbounds float, float* %tmp3011, i64 1
+ %tmp3013 = getelementptr inbounds float, float* %tmp3012, i64 1
+ %tmp3014 = getelementptr inbounds float, float* %tmp3013, i64 1
+ %tmp3015 = getelementptr inbounds float, float* %tmp3014, i64 1
+ %tmp3016 = getelementptr inbounds float, float* %tmp3015, i64 1
+ %tmp3017 = getelementptr inbounds float, float* %tmp3016, i64 1
+ %tmp3018 = getelementptr inbounds float, float* %tmp3017, i64 1
+ %tmp3019 = getelementptr inbounds float, float* %tmp3018, i64 1
+ %tmp3020 = getelementptr inbounds float, float* %tmp3019, i64 1
+ %tmp3021 = getelementptr inbounds float, float* %tmp3020, i64 1
+ %tmp3022 = getelementptr inbounds float, float* %tmp3021, i64 1
+ %tmp3023 = getelementptr inbounds float, float* %tmp3022, i64 1
+ %tmp3024 = getelementptr inbounds float, float* %tmp3023, i64 1
+ %tmp3025 = getelementptr inbounds float, float* %tmp3024, i64 1
+ %tmp3026 = getelementptr inbounds float, float* %tmp3025, i64 1
+ %tmp3027 = getelementptr inbounds float, float* %tmp3026, i64 1
+ %tmp3028 = getelementptr inbounds float, float* %tmp3027, i64 1
+ %tmp3029 = getelementptr inbounds float, float* %tmp3028, i64 1
+ %tmp3030 = getelementptr inbounds float, float* %tmp3029, i64 1
+ %tmp3031 = getelementptr inbounds float, float* %tmp3030, i64 1
+ %tmp3032 = getelementptr inbounds float, float* %tmp3031, i64 1
+ %tmp3033 = getelementptr inbounds float, float* %tmp3032, i64 1
+ %tmp3034 = getelementptr inbounds float, float* %tmp3033, i64 1
+ %tmp3035 = getelementptr inbounds float, float* %tmp3034, i64 1
+ %tmp3036 = getelementptr inbounds float, float* %tmp3035, i64 1
+ %tmp3037 = getelementptr inbounds float, float* %tmp3036, i64 1
+ %tmp3038 = getelementptr inbounds float, float* %tmp3037, i64 1
+ %tmp3039 = getelementptr inbounds float, float* %tmp3038, i64 1
+ %tmp3040 = getelementptr inbounds float, float* %tmp3039, i64 1
+ %tmp3041 = getelementptr inbounds float, float* %tmp3040, i64 1
+ %tmp3042 = getelementptr inbounds float, float* %tmp3041, i64 1
+ %tmp3043 = getelementptr inbounds float, float* %tmp3042, i64 1
+ %tmp3044 = getelementptr inbounds float, float* %tmp3043, i64 1
+ %tmp3045 = getelementptr inbounds float, float* %tmp3044, i64 1
+ %tmp3046 = getelementptr inbounds float, float* %tmp3045, i64 1
+ %tmp3047 = getelementptr inbounds float, float* %tmp3046, i64 1
+ %tmp3048 = getelementptr inbounds float, float* %tmp3047, i64 1
+ %tmp3049 = getelementptr inbounds float, float* %tmp3048, i64 1
+ %tmp3050 = getelementptr inbounds float, float* %tmp3049, i64 1
+ %tmp3051 = getelementptr inbounds float, float* %tmp3050, i64 1
+ %tmp3052 = getelementptr inbounds float, float* %tmp3051, i64 1
+ %tmp3053 = getelementptr inbounds float, float* %tmp3052, i64 1
+ %tmp3054 = getelementptr inbounds float, float* %tmp3053, i64 1
+ %tmp3055 = getelementptr inbounds float, float* %tmp3054, i64 1
+ %tmp3056 = getelementptr inbounds float, float* %tmp3055, i64 1
+ %tmp3057 = getelementptr inbounds float, float* %tmp3056, i64 1
+ %tmp3058 = getelementptr inbounds float, float* %tmp3057, i64 1
+ %tmp3059 = getelementptr inbounds float, float* %tmp3058, i64 1
+ %tmp3060 = getelementptr inbounds float, float* %tmp3059, i64 1
+ %tmp3061 = getelementptr inbounds float, float* %tmp3060, i64 1
+ %tmp3062 = getelementptr inbounds float, float* %tmp3061, i64 1
+ %tmp3063 = getelementptr inbounds float, float* %tmp3062, i64 1
+ %tmp3064 = getelementptr inbounds float, float* %tmp3063, i64 1
+ %tmp3065 = getelementptr inbounds float, float* %tmp3064, i64 1
+ %tmp3066 = getelementptr inbounds float, float* %tmp3065, i64 1
+ %tmp3067 = getelementptr inbounds float, float* %tmp3066, i64 1
+ %tmp3068 = getelementptr inbounds float, float* %tmp3067, i64 1
+ %tmp3069 = getelementptr inbounds float, float* %tmp3068, i64 1
+ %tmp3070 = getelementptr inbounds float, float* %tmp3069, i64 1
+ %tmp3071 = getelementptr inbounds float, float* %tmp3070, i64 1
+ %tmp3072 = getelementptr inbounds float, float* %tmp3071, i64 1
+ %tmp3073 = getelementptr inbounds float, float* %tmp3072, i64 1
+ %tmp3074 = getelementptr inbounds float, float* %tmp3073, i64 1
+ %tmp3075 = getelementptr inbounds float, float* %tmp3074, i64 1
+ %tmp3076 = getelementptr inbounds float, float* %tmp3075, i64 1
+ %tmp3077 = getelementptr inbounds float, float* %tmp3076, i64 1
+ %tmp3078 = getelementptr inbounds float, float* %tmp3077, i64 1
+ %tmp3079 = getelementptr inbounds float, float* %tmp3078, i64 1
+ %tmp3080 = getelementptr inbounds float, float* %tmp3079, i64 1
+ %tmp3081 = getelementptr inbounds float, float* %tmp3080, i64 1
+ %tmp3082 = getelementptr inbounds float, float* %tmp3081, i64 1
+ %tmp3083 = getelementptr inbounds float, float* %tmp3082, i64 1
+ %tmp3084 = getelementptr inbounds float, float* %tmp3083, i64 1
+ %tmp3085 = getelementptr inbounds float, float* %tmp3084, i64 1
+ %tmp3086 = getelementptr inbounds float, float* %tmp3085, i64 1
+ %tmp3087 = getelementptr inbounds float, float* %tmp3086, i64 1
+ %tmp3088 = getelementptr inbounds float, float* %tmp3087, i64 1
+ %tmp3089 = getelementptr inbounds float, float* %tmp3088, i64 1
+ %tmp3090 = getelementptr inbounds float, float* %tmp3089, i64 1
+ %tmp3091 = getelementptr inbounds float, float* %tmp3090, i64 1
+ %tmp3092 = getelementptr inbounds float, float* %tmp3091, i64 1
+ %tmp3093 = getelementptr inbounds float, float* %tmp3092, i64 1
+ %tmp3094 = getelementptr inbounds float, float* %tmp3093, i64 1
+ %tmp3095 = getelementptr inbounds float, float* %tmp3094, i64 1
+ %tmp3096 = getelementptr inbounds float, float* %tmp3095, i64 1
+ %tmp3097 = getelementptr inbounds float, float* %tmp3096, i64 1
+ %tmp3098 = getelementptr inbounds float, float* %tmp3097, i64 1
+ %tmp3099 = getelementptr inbounds float, float* %tmp3098, i64 1
+ %tmp3100 = getelementptr inbounds float, float* %tmp3099, i64 1
+ %tmp3101 = getelementptr inbounds float, float* %tmp3100, i64 1
+ %tmp3102 = getelementptr inbounds float, float* %tmp3101, i64 1
+ %tmp3103 = getelementptr inbounds float, float* %tmp3102, i64 1
+ %tmp3104 = getelementptr inbounds float, float* %tmp3103, i64 1
+ %tmp3105 = getelementptr inbounds float, float* %tmp3104, i64 1
+ %tmp3106 = getelementptr inbounds float, float* %tmp3105, i64 1
+ %tmp3107 = getelementptr inbounds float, float* %tmp3106, i64 1
+ %tmp3108 = getelementptr inbounds float, float* %tmp3107, i64 1
+ %tmp3109 = getelementptr inbounds float, float* %tmp3108, i64 1
+ %tmp3110 = getelementptr inbounds float, float* %tmp3109, i64 1
+ %tmp3111 = getelementptr inbounds float, float* %tmp3110, i64 1
+ %tmp3112 = getelementptr inbounds float, float* %tmp3111, i64 1
+ %tmp3113 = getelementptr inbounds float, float* %tmp3112, i64 1
+ %tmp3114 = getelementptr inbounds float, float* %tmp3113, i64 1
+ %tmp3115 = getelementptr inbounds float, float* %tmp3114, i64 1
+ %tmp3116 = getelementptr inbounds float, float* %tmp3115, i64 1
+ %tmp3117 = getelementptr inbounds float, float* %tmp3116, i64 1
+ %tmp3118 = getelementptr inbounds float, float* %tmp3117, i64 1
+ %tmp3119 = getelementptr inbounds float, float* %tmp3118, i64 1
+ %tmp3120 = getelementptr inbounds float, float* %tmp3119, i64 1
+ %tmp3121 = getelementptr inbounds float, float* %tmp3120, i64 1
+ %tmp3122 = getelementptr inbounds float, float* %tmp3121, i64 1
+ %tmp3123 = getelementptr inbounds float, float* %tmp3122, i64 1
+ %tmp3124 = getelementptr inbounds float, float* %tmp3123, i64 1
+ %tmp3125 = getelementptr inbounds float, float* %tmp3124, i64 1
+ %tmp3126 = getelementptr inbounds float, float* %tmp3125, i64 1
+ %tmp3127 = getelementptr inbounds float, float* %tmp3126, i64 1
+ %tmp3128 = getelementptr inbounds float, float* %tmp3127, i64 1
+ %tmp3129 = getelementptr inbounds float, float* %tmp3128, i64 1
+ %tmp3130 = getelementptr inbounds float, float* %tmp3129, i64 1
+ %tmp3131 = getelementptr inbounds float, float* %tmp3130, i64 1
+ %tmp3132 = getelementptr inbounds float, float* %tmp3131, i64 1
+ %tmp3133 = getelementptr inbounds float, float* %tmp3132, i64 1
+ %tmp3134 = getelementptr inbounds float, float* %tmp3133, i64 1
+ %tmp3135 = getelementptr inbounds float, float* %tmp3134, i64 1
+ %tmp3136 = getelementptr inbounds float, float* %tmp3135, i64 1
+ %tmp3137 = getelementptr inbounds float, float* %tmp3136, i64 1
+ %tmp3138 = getelementptr inbounds float, float* %tmp3137, i64 1
+ %tmp3139 = getelementptr inbounds float, float* %tmp3138, i64 1
+ %tmp3140 = getelementptr inbounds float, float* %tmp3139, i64 1
+ %tmp3141 = getelementptr inbounds float, float* %tmp3140, i64 1
+ %tmp3142 = getelementptr inbounds float, float* %tmp3141, i64 1
+ %tmp3143 = getelementptr inbounds float, float* %tmp3142, i64 1
+ %tmp3144 = getelementptr inbounds float, float* %tmp3143, i64 1
+ %tmp3145 = getelementptr inbounds float, float* %tmp3144, i64 1
+ %tmp3146 = getelementptr inbounds float, float* %tmp3145, i64 1
+ %tmp3147 = getelementptr inbounds float, float* %tmp3146, i64 1
+ %tmp3148 = getelementptr inbounds float, float* %tmp3147, i64 1
+ %tmp3149 = getelementptr inbounds float, float* %tmp3148, i64 1
+ %tmp3150 = getelementptr inbounds float, float* %tmp3149, i64 1
+ %tmp3151 = getelementptr inbounds float, float* %tmp3150, i64 1
+ %tmp3152 = getelementptr inbounds float, float* %tmp3151, i64 1
+ %tmp3153 = getelementptr inbounds float, float* %tmp3152, i64 1
+ %tmp3154 = getelementptr inbounds float, float* %tmp3153, i64 1
+ %tmp3155 = getelementptr inbounds float, float* %tmp3154, i64 1
+ %tmp3156 = getelementptr inbounds float, float* %tmp3155, i64 1
+ %tmp3157 = getelementptr inbounds float, float* %tmp3156, i64 1
+ %tmp3158 = getelementptr inbounds float, float* %tmp3157, i64 1
+ %tmp3159 = getelementptr inbounds float, float* %tmp3158, i64 1
+ %tmp3160 = getelementptr inbounds float, float* %tmp3159, i64 1
+ %tmp3161 = getelementptr inbounds float, float* %tmp3160, i64 1
+ %tmp3162 = getelementptr inbounds float, float* %tmp3161, i64 1
+ %tmp3163 = getelementptr inbounds float, float* %tmp3162, i64 1
+ %tmp3164 = getelementptr inbounds float, float* %tmp3163, i64 1
+ %tmp3165 = getelementptr inbounds float, float* %tmp3164, i64 1
+ %tmp3166 = getelementptr inbounds float, float* %tmp3165, i64 1
+ %tmp3167 = getelementptr inbounds float, float* %tmp3166, i64 1
+ %tmp3168 = getelementptr inbounds float, float* %tmp3167, i64 1
+ %tmp3169 = getelementptr inbounds float, float* %tmp3168, i64 1
+ %tmp3170 = getelementptr inbounds float, float* %tmp3169, i64 1
+ %tmp3171 = getelementptr inbounds float, float* %tmp3170, i64 1
+ %tmp3172 = getelementptr inbounds float, float* %tmp3171, i64 1
+ %tmp3173 = getelementptr inbounds float, float* %tmp3172, i64 1
+ %tmp3174 = getelementptr inbounds float, float* %tmp3173, i64 1
+ %tmp3175 = getelementptr inbounds float, float* %tmp3174, i64 1
+ %tmp3176 = getelementptr inbounds float, float* %tmp3175, i64 1
+ %tmp3177 = getelementptr inbounds float, float* %tmp3176, i64 1
+ %tmp3178 = getelementptr inbounds float, float* %tmp3177, i64 1
+ %tmp3179 = getelementptr inbounds float, float* %tmp3178, i64 1
+ %tmp3180 = getelementptr inbounds float, float* %tmp3179, i64 1
+ %tmp3181 = getelementptr inbounds float, float* %tmp3180, i64 1
+ %tmp3182 = getelementptr inbounds float, float* %tmp3181, i64 1
+ %tmp3183 = getelementptr inbounds float, float* %tmp3182, i64 1
+ %tmp3184 = getelementptr inbounds float, float* %tmp3183, i64 1
+ %tmp3185 = getelementptr inbounds float, float* %tmp3184, i64 1
+ %tmp3186 = getelementptr inbounds float, float* %tmp3185, i64 1
+ %tmp3187 = getelementptr inbounds float, float* %tmp3186, i64 1
+ %tmp3188 = getelementptr inbounds float, float* %tmp3187, i64 1
+ %tmp3189 = getelementptr inbounds float, float* %tmp3188, i64 1
+ %tmp3190 = getelementptr inbounds float, float* %tmp3189, i64 1
+ %tmp3191 = getelementptr inbounds float, float* %tmp3190, i64 1
+ %tmp3192 = getelementptr inbounds float, float* %tmp3191, i64 1
+ %tmp3193 = getelementptr inbounds float, float* %tmp3192, i64 1
+ %tmp3194 = getelementptr inbounds float, float* %tmp3193, i64 1
+ %tmp3195 = getelementptr inbounds float, float* %tmp3194, i64 1
+ %tmp3196 = getelementptr inbounds float, float* %tmp3195, i64 1
+ %tmp3197 = getelementptr inbounds float, float* %tmp3196, i64 1
+ %tmp3198 = getelementptr inbounds float, float* %tmp3197, i64 1
+ %tmp3199 = getelementptr inbounds float, float* %tmp3198, i64 1
+ %tmp3200 = getelementptr inbounds float, float* %tmp3199, i64 1
+ %tmp3201 = getelementptr inbounds float, float* %tmp3200, i64 1
+ %tmp3202 = getelementptr inbounds float, float* %tmp3201, i64 1
+ %tmp3203 = getelementptr inbounds float, float* %tmp3202, i64 1
+ %tmp3204 = getelementptr inbounds float, float* %tmp3203, i64 1
+ %tmp3205 = getelementptr inbounds float, float* %tmp3204, i64 1
+ %tmp3206 = getelementptr inbounds float, float* %tmp3205, i64 1
+ %tmp3207 = getelementptr inbounds float, float* %tmp3206, i64 1
+ %tmp3208 = getelementptr inbounds float, float* %tmp3207, i64 1
+ %tmp3209 = getelementptr inbounds float, float* %tmp3208, i64 1
+ %tmp3210 = getelementptr inbounds float, float* %tmp3209, i64 1
+ %tmp3211 = getelementptr inbounds float, float* %tmp3210, i64 1
+ %tmp3212 = getelementptr inbounds float, float* %tmp3211, i64 1
+ %tmp3213 = getelementptr inbounds float, float* %tmp3212, i64 1
+ %tmp3214 = getelementptr inbounds float, float* %tmp3213, i64 1
+ %tmp3215 = getelementptr inbounds float, float* %tmp3214, i64 1
+ %tmp3216 = getelementptr inbounds float, float* %tmp3215, i64 1
+ %tmp3217 = getelementptr inbounds float, float* %tmp3216, i64 1
+ %tmp3218 = getelementptr inbounds float, float* %tmp3217, i64 1
+ %tmp3219 = getelementptr inbounds float, float* %tmp3218, i64 1
+ %tmp3220 = getelementptr inbounds float, float* %tmp3219, i64 1
+ %tmp3221 = getelementptr inbounds float, float* %tmp3220, i64 1
+ %tmp3222 = getelementptr inbounds float, float* %tmp3221, i64 1
+ %tmp3223 = getelementptr inbounds float, float* %tmp3222, i64 1
+ %tmp3224 = getelementptr inbounds float, float* %tmp3223, i64 1
+ %tmp3225 = getelementptr inbounds float, float* %tmp3224, i64 1
+ %tmp3226 = getelementptr inbounds float, float* %tmp3225, i64 1
+ %tmp3227 = getelementptr inbounds float, float* %tmp3226, i64 1
+ %tmp3228 = getelementptr inbounds float, float* %tmp3227, i64 1
+ %tmp3229 = getelementptr inbounds float, float* %tmp3228, i64 1
+ %tmp3230 = getelementptr inbounds float, float* %tmp3229, i64 1
+ %tmp3231 = getelementptr inbounds float, float* %tmp3230, i64 1
+ %tmp3232 = getelementptr inbounds float, float* %tmp3231, i64 1
+ %tmp3233 = getelementptr inbounds float, float* %tmp3232, i64 1
+ %tmp3234 = getelementptr inbounds float, float* %tmp3233, i64 1
+ %tmp3235 = getelementptr inbounds float, float* %tmp3234, i64 1
+ %tmp3236 = getelementptr inbounds float, float* %tmp3235, i64 1
+ %tmp3237 = getelementptr inbounds float, float* %tmp3236, i64 1
+ %tmp3238 = getelementptr inbounds float, float* %tmp3237, i64 1
+ %tmp3239 = getelementptr inbounds float, float* %tmp3238, i64 1
+ %tmp3240 = getelementptr inbounds float, float* %tmp3239, i64 1
+ %tmp3241 = getelementptr inbounds float, float* %tmp3240, i64 1
+ %tmp3242 = getelementptr inbounds float, float* %tmp3241, i64 1
+ %tmp3243 = getelementptr inbounds float, float* %tmp3242, i64 1
+ %tmp3244 = getelementptr inbounds float, float* %tmp3243, i64 1
+ %tmp3245 = getelementptr inbounds float, float* %tmp3244, i64 1
+ %tmp3246 = getelementptr inbounds float, float* %tmp3245, i64 1
+ %tmp3247 = getelementptr inbounds float, float* %tmp3246, i64 1
+ %tmp3248 = getelementptr inbounds float, float* %tmp3247, i64 1
+ %tmp3249 = getelementptr inbounds float, float* %tmp3248, i64 1
+ %tmp3250 = getelementptr inbounds float, float* %tmp3249, i64 1
+ %tmp3251 = getelementptr inbounds float, float* %tmp3250, i64 1
+ %tmp3252 = getelementptr inbounds float, float* %tmp3251, i64 1
+ %tmp3253 = getelementptr inbounds float, float* %tmp3252, i64 1
+ %tmp3254 = getelementptr inbounds float, float* %tmp3253, i64 1
+ %tmp3255 = getelementptr inbounds float, float* %tmp3254, i64 1
+ %tmp3256 = getelementptr inbounds float, float* %tmp3255, i64 1
+ %tmp3257 = getelementptr inbounds float, float* %tmp3256, i64 1
+ %tmp3258 = getelementptr inbounds float, float* %tmp3257, i64 1
+ %tmp3259 = getelementptr inbounds float, float* %tmp3258, i64 1
+ %tmp3260 = getelementptr inbounds float, float* %tmp3259, i64 1
+ %tmp3261 = getelementptr inbounds float, float* %tmp3260, i64 1
+ %tmp3262 = getelementptr inbounds float, float* %tmp3261, i64 1
+ %tmp3263 = getelementptr inbounds float, float* %tmp3262, i64 1
+ %tmp3264 = getelementptr inbounds float, float* %tmp3263, i64 1
+ %tmp3265 = getelementptr inbounds float, float* %tmp3264, i64 1
+ %tmp3266 = getelementptr inbounds float, float* %tmp3265, i64 1
+ %tmp3267 = getelementptr inbounds float, float* %tmp3266, i64 1
+ %tmp3268 = getelementptr inbounds float, float* %tmp3267, i64 1
+ %tmp3269 = getelementptr inbounds float, float* %tmp3268, i64 1
+ %tmp3270 = getelementptr inbounds float, float* %tmp3269, i64 1
+ %tmp3271 = getelementptr inbounds float, float* %tmp3270, i64 1
+ %tmp3272 = getelementptr inbounds float, float* %tmp3271, i64 1
+ %tmp3273 = getelementptr inbounds float, float* %tmp3272, i64 1
+ %tmp3274 = getelementptr inbounds float, float* %tmp3273, i64 1
+ %tmp3275 = getelementptr inbounds float, float* %tmp3274, i64 1
+ %tmp3276 = getelementptr inbounds float, float* %tmp3275, i64 1
+ %tmp3277 = getelementptr inbounds float, float* %tmp3276, i64 1
+ %tmp3278 = getelementptr inbounds float, float* %tmp3277, i64 1
+ %tmp3279 = getelementptr inbounds float, float* %tmp3278, i64 1
+ %tmp3280 = getelementptr inbounds float, float* %tmp3279, i64 1
+ %tmp3281 = getelementptr inbounds float, float* %tmp3280, i64 1
+ %tmp3282 = getelementptr inbounds float, float* %tmp3281, i64 1
+ %tmp3283 = getelementptr inbounds float, float* %tmp3282, i64 1
+ %tmp3284 = getelementptr inbounds float, float* %tmp3283, i64 1
+ %tmp3285 = getelementptr inbounds float, float* %tmp3284, i64 1
+ %tmp3286 = getelementptr inbounds float, float* %tmp3285, i64 1
+ %tmp3287 = getelementptr inbounds float, float* %tmp3286, i64 1
+ %tmp3288 = getelementptr inbounds float, float* %tmp3287, i64 1
+ %tmp3289 = getelementptr inbounds float, float* %tmp3288, i64 1
+ %tmp3290 = getelementptr inbounds float, float* %tmp3289, i64 1
+ %tmp3291 = getelementptr inbounds float, float* %tmp3290, i64 1
+ %tmp3292 = getelementptr inbounds float, float* %tmp3291, i64 1
+ %tmp3293 = getelementptr inbounds float, float* %tmp3292, i64 1
+ %tmp3294 = getelementptr inbounds float, float* %tmp3293, i64 1
+ %tmp3295 = getelementptr inbounds float, float* %tmp3294, i64 1
+ %tmp3296 = getelementptr inbounds float, float* %tmp3295, i64 1
+ %tmp3297 = getelementptr inbounds float, float* %tmp3296, i64 1
+ %tmp3298 = getelementptr inbounds float, float* %tmp3297, i64 1
+ %tmp3299 = getelementptr inbounds float, float* %tmp3298, i64 1
+ %tmp3300 = getelementptr inbounds float, float* %tmp3299, i64 1
+ %tmp3301 = getelementptr inbounds float, float* %tmp3300, i64 1
+ %tmp3302 = getelementptr inbounds float, float* %tmp3301, i64 1
+ %tmp3303 = getelementptr inbounds float, float* %tmp3302, i64 1
+ %tmp3304 = getelementptr inbounds float, float* %tmp3303, i64 1
+ %tmp3305 = getelementptr inbounds float, float* %tmp3304, i64 1
+ %tmp3306 = getelementptr inbounds float, float* %tmp3305, i64 1
+ %tmp3307 = getelementptr inbounds float, float* %tmp3306, i64 1
+ %tmp3308 = getelementptr inbounds float, float* %tmp3307, i64 1
+ %tmp3309 = getelementptr inbounds float, float* %tmp3308, i64 1
+ %tmp3310 = getelementptr inbounds float, float* %tmp3309, i64 1
+ %tmp3311 = getelementptr inbounds float, float* %tmp3310, i64 1
+ %tmp3312 = getelementptr inbounds float, float* %tmp3311, i64 1
+ %tmp3313 = getelementptr inbounds float, float* %tmp3312, i64 1
+ %tmp3314 = getelementptr inbounds float, float* %tmp3313, i64 1
+ %tmp3315 = getelementptr inbounds float, float* %tmp3314, i64 1
+ %tmp3316 = getelementptr inbounds float, float* %tmp3315, i64 1
+ %tmp3317 = getelementptr inbounds float, float* %tmp3316, i64 1
+ %tmp3318 = getelementptr inbounds float, float* %tmp3317, i64 1
+ %tmp3319 = getelementptr inbounds float, float* %tmp3318, i64 1
+ %tmp3320 = getelementptr inbounds float, float* %tmp3319, i64 1
+ %tmp3321 = getelementptr inbounds float, float* %tmp3320, i64 1
+ %tmp3322 = getelementptr inbounds float, float* %tmp3321, i64 1
+ %tmp3323 = getelementptr inbounds float, float* %tmp3322, i64 1
+ %tmp3324 = getelementptr inbounds float, float* %tmp3323, i64 1
+ %tmp3325 = getelementptr inbounds float, float* %tmp3324, i64 1
+ %tmp3326 = getelementptr inbounds float, float* %tmp3325, i64 1
+ %tmp3327 = getelementptr inbounds float, float* %tmp3326, i64 1
+ %tmp3328 = getelementptr inbounds float, float* %tmp3327, i64 1
+ %tmp3329 = getelementptr inbounds float, float* %tmp3328, i64 1
+ %tmp3330 = getelementptr inbounds float, float* %tmp3329, i64 1
+ %tmp3331 = getelementptr inbounds float, float* %tmp3330, i64 1
+ %tmp3332 = getelementptr inbounds float, float* %tmp3331, i64 1
+ %tmp3333 = getelementptr inbounds float, float* %tmp3332, i64 1
+ %tmp3334 = getelementptr inbounds float, float* %tmp3333, i64 1
+ %tmp3335 = getelementptr inbounds float, float* %tmp3334, i64 1
+ %tmp3336 = getelementptr inbounds float, float* %tmp3335, i64 1
+ %tmp3337 = getelementptr inbounds float, float* %tmp3336, i64 1
+ %tmp3338 = getelementptr inbounds float, float* %tmp3337, i64 1
+ %tmp3339 = getelementptr inbounds float, float* %tmp3338, i64 1
+ %tmp3340 = getelementptr inbounds float, float* %tmp3339, i64 1
+ %tmp3341 = getelementptr inbounds float, float* %tmp3340, i64 1
+ %tmp3342 = getelementptr inbounds float, float* %tmp3341, i64 1
+ %tmp3343 = getelementptr inbounds float, float* %tmp3342, i64 1
+ %tmp3344 = getelementptr inbounds float, float* %tmp3343, i64 1
+ %tmp3345 = getelementptr inbounds float, float* %tmp3344, i64 1
+ %tmp3346 = getelementptr inbounds float, float* %tmp3345, i64 1
+ %tmp3347 = getelementptr inbounds float, float* %tmp3346, i64 1
+ %tmp3348 = getelementptr inbounds float, float* %tmp3347, i64 1
+ %tmp3349 = getelementptr inbounds float, float* %tmp3348, i64 1
+ %tmp3350 = getelementptr inbounds float, float* %tmp3349, i64 1
+ %tmp3351 = getelementptr inbounds float, float* %tmp3350, i64 1
+ %tmp3352 = getelementptr inbounds float, float* %tmp3351, i64 1
+ %tmp3353 = getelementptr inbounds float, float* %tmp3352, i64 1
+ %tmp3354 = getelementptr inbounds float, float* %tmp3353, i64 1
+ %tmp3355 = getelementptr inbounds float, float* %tmp3354, i64 1
+ %tmp3356 = getelementptr inbounds float, float* %tmp3355, i64 1
+ %tmp3357 = getelementptr inbounds float, float* %tmp3356, i64 1
+ %tmp3358 = getelementptr inbounds float, float* %tmp3357, i64 1
+ %tmp3359 = getelementptr inbounds float, float* %tmp3358, i64 1
+ %tmp3360 = getelementptr inbounds float, float* %tmp3359, i64 1
+ %tmp3361 = getelementptr inbounds float, float* %tmp3360, i64 1
+ %tmp3362 = getelementptr inbounds float, float* %tmp3361, i64 1
+ %tmp3363 = getelementptr inbounds float, float* %tmp3362, i64 1
+ %tmp3364 = getelementptr inbounds float, float* %tmp3363, i64 1
+ %tmp3365 = getelementptr inbounds float, float* %tmp3364, i64 1
+ %tmp3366 = getelementptr inbounds float, float* %tmp3365, i64 1
+ %tmp3367 = getelementptr inbounds float, float* %tmp3366, i64 1
+ %tmp3368 = getelementptr inbounds float, float* %tmp3367, i64 1
+ %tmp3369 = getelementptr inbounds float, float* %tmp3368, i64 1
+ %tmp3370 = getelementptr inbounds float, float* %tmp3369, i64 1
+ %tmp3371 = getelementptr inbounds float, float* %tmp3370, i64 1
+ %tmp3372 = getelementptr inbounds float, float* %tmp3371, i64 1
+ %tmp3373 = getelementptr inbounds float, float* %tmp3372, i64 1
+ %tmp3374 = getelementptr inbounds float, float* %tmp3373, i64 1
+ %tmp3375 = getelementptr inbounds float, float* %tmp3374, i64 1
+ %tmp3376 = getelementptr inbounds float, float* %tmp3375, i64 1
+ %tmp3377 = getelementptr inbounds float, float* %tmp3376, i64 1
+ %tmp3378 = getelementptr inbounds float, float* %tmp3377, i64 1
+ %tmp3379 = getelementptr inbounds float, float* %tmp3378, i64 1
+ %tmp3380 = getelementptr inbounds float, float* %tmp3379, i64 1
+ %tmp3381 = getelementptr inbounds float, float* %tmp3380, i64 1
+ %tmp3382 = getelementptr inbounds float, float* %tmp3381, i64 1
+ %tmp3383 = getelementptr inbounds float, float* %tmp3382, i64 1
+ %tmp3384 = getelementptr inbounds float, float* %tmp3383, i64 1
+ %tmp3385 = getelementptr inbounds float, float* %tmp3384, i64 1
+ %tmp3386 = getelementptr inbounds float, float* %tmp3385, i64 1
+ %tmp3387 = getelementptr inbounds float, float* %tmp3386, i64 1
+ %tmp3388 = getelementptr inbounds float, float* %tmp3387, i64 1
+ %tmp3389 = getelementptr inbounds float, float* %tmp3388, i64 1
+ %tmp3390 = getelementptr inbounds float, float* %tmp3389, i64 1
+ %tmp3391 = getelementptr inbounds float, float* %tmp3390, i64 1
+ %tmp3392 = getelementptr inbounds float, float* %tmp3391, i64 1
+ %tmp3393 = getelementptr inbounds float, float* %tmp3392, i64 1
+ %tmp3394 = getelementptr inbounds float, float* %tmp3393, i64 1
+ %tmp3395 = getelementptr inbounds float, float* %tmp3394, i64 1
+ %tmp3396 = getelementptr inbounds float, float* %tmp3395, i64 1
+ %tmp3397 = getelementptr inbounds float, float* %tmp3396, i64 1
+ %tmp3398 = getelementptr inbounds float, float* %tmp3397, i64 1
+ %tmp3399 = getelementptr inbounds float, float* %tmp3398, i64 1
+ %tmp3400 = getelementptr inbounds float, float* %tmp3399, i64 1
+ %tmp3401 = getelementptr inbounds float, float* %tmp3400, i64 1
+ %tmp3402 = getelementptr inbounds float, float* %tmp3401, i64 1
+ %tmp3403 = getelementptr inbounds float, float* %tmp3402, i64 1
+ %tmp3404 = getelementptr inbounds float, float* %tmp3403, i64 1
+ %tmp3405 = getelementptr inbounds float, float* %tmp3404, i64 1
+ %tmp3406 = getelementptr inbounds float, float* %tmp3405, i64 1
+ %tmp3407 = getelementptr inbounds float, float* %tmp3406, i64 1
+ %tmp3408 = getelementptr inbounds float, float* %tmp3407, i64 1
+ %tmp3409 = getelementptr inbounds float, float* %tmp3408, i64 1
+ %tmp3410 = getelementptr inbounds float, float* %tmp3409, i64 1
+ %tmp3411 = getelementptr inbounds float, float* %tmp3410, i64 1
+ %tmp3412 = getelementptr inbounds float, float* %tmp3411, i64 1
+ %tmp3413 = getelementptr inbounds float, float* %tmp3412, i64 1
+ %tmp3414 = getelementptr inbounds float, float* %tmp3413, i64 1
+ %tmp3415 = getelementptr inbounds float, float* %tmp3414, i64 1
+ %tmp3416 = getelementptr inbounds float, float* %tmp3415, i64 1
+ %tmp3417 = getelementptr inbounds float, float* %tmp3416, i64 1
+ %tmp3418 = getelementptr inbounds float, float* %tmp3417, i64 1
+ %tmp3419 = getelementptr inbounds float, float* %tmp3418, i64 1
+ %tmp3420 = getelementptr inbounds float, float* %tmp3419, i64 1
+ %tmp3421 = getelementptr inbounds float, float* %tmp3420, i64 1
+ %tmp3422 = getelementptr inbounds float, float* %tmp3421, i64 1
+ %tmp3423 = getelementptr inbounds float, float* %tmp3422, i64 1
+ %tmp3424 = getelementptr inbounds float, float* %tmp3423, i64 1
+ %tmp3425 = getelementptr inbounds float, float* %tmp3424, i64 1
+ %tmp3426 = getelementptr inbounds float, float* %tmp3425, i64 1
+ %tmp3427 = getelementptr inbounds float, float* %tmp3426, i64 1
+ %tmp3428 = getelementptr inbounds float, float* %tmp3427, i64 1
+ %tmp3429 = getelementptr inbounds float, float* %tmp3428, i64 1
+ %tmp3430 = getelementptr inbounds float, float* %tmp3429, i64 1
+ %tmp3431 = getelementptr inbounds float, float* %tmp3430, i64 1
+ %tmp3432 = getelementptr inbounds float, float* %tmp3431, i64 1
+ %tmp3433 = getelementptr inbounds float, float* %tmp3432, i64 1
+ %tmp3434 = getelementptr inbounds float, float* %tmp3433, i64 1
+ %tmp3435 = getelementptr inbounds float, float* %tmp3434, i64 1
+ %tmp3436 = getelementptr inbounds float, float* %tmp3435, i64 1
+ %tmp3437 = getelementptr inbounds float, float* %tmp3436, i64 1
+ %tmp3438 = getelementptr inbounds float, float* %tmp3437, i64 1
+ %tmp3439 = getelementptr inbounds float, float* %tmp3438, i64 1
+ %tmp3440 = getelementptr inbounds float, float* %tmp3439, i64 1
+ %tmp3441 = getelementptr inbounds float, float* %tmp3440, i64 1
+ %tmp3442 = getelementptr inbounds float, float* %tmp3441, i64 1
+ %tmp3443 = getelementptr inbounds float, float* %tmp3442, i64 1
+ %tmp3444 = getelementptr inbounds float, float* %tmp3443, i64 1
+ %tmp3445 = getelementptr inbounds float, float* %tmp3444, i64 1
+ %tmp3446 = getelementptr inbounds float, float* %tmp3445, i64 1
+ %tmp3447 = getelementptr inbounds float, float* %tmp3446, i64 1
+ %tmp3448 = getelementptr inbounds float, float* %tmp3447, i64 1
+ %tmp3449 = getelementptr inbounds float, float* %tmp3448, i64 1
+ %tmp3450 = getelementptr inbounds float, float* %tmp3449, i64 1
+ %tmp3451 = getelementptr inbounds float, float* %tmp3450, i64 1
+ %tmp3452 = getelementptr inbounds float, float* %tmp3451, i64 1
+ %tmp3453 = getelementptr inbounds float, float* %tmp3452, i64 1
+ %tmp3454 = getelementptr inbounds float, float* %tmp3453, i64 1
+ %tmp3455 = getelementptr inbounds float, float* %tmp3454, i64 1
+ %tmp3456 = getelementptr inbounds float, float* %tmp3455, i64 1
+ %tmp3457 = getelementptr inbounds float, float* %tmp3456, i64 1
+ %tmp3458 = getelementptr inbounds float, float* %tmp3457, i64 1
+ %tmp3459 = getelementptr inbounds float, float* %tmp3458, i64 1
+ %tmp3460 = getelementptr inbounds float, float* %tmp3459, i64 1
+ %tmp3461 = getelementptr inbounds float, float* %tmp3460, i64 1
+ %tmp3462 = getelementptr inbounds float, float* %tmp3461, i64 1
+ %tmp3463 = getelementptr inbounds float, float* %tmp3462, i64 1
+ %tmp3464 = getelementptr inbounds float, float* %tmp3463, i64 1
+ %tmp3465 = getelementptr inbounds float, float* %tmp3464, i64 1
+ %tmp3466 = getelementptr inbounds float, float* %tmp3465, i64 1
+ %tmp3467 = getelementptr inbounds float, float* %tmp3466, i64 1
+ %tmp3468 = getelementptr inbounds float, float* %tmp3467, i64 1
+ %tmp3469 = getelementptr inbounds float, float* %tmp3468, i64 1
+ %tmp3470 = getelementptr inbounds float, float* %tmp3469, i64 1
+ %tmp3471 = getelementptr inbounds float, float* %tmp3470, i64 1
+ %tmp3472 = getelementptr inbounds float, float* %tmp3471, i64 1
+ %tmp3473 = getelementptr inbounds float, float* %tmp3472, i64 1
+ %tmp3474 = getelementptr inbounds float, float* %tmp3473, i64 1
+ %tmp3475 = getelementptr inbounds float, float* %tmp3474, i64 1
+ %tmp3476 = getelementptr inbounds float, float* %tmp3475, i64 1
+ %tmp3477 = getelementptr inbounds float, float* %tmp3476, i64 1
+ %tmp3478 = getelementptr inbounds float, float* %tmp3477, i64 1
+ %tmp3479 = getelementptr inbounds float, float* %tmp3478, i64 1
+ %tmp3480 = getelementptr inbounds float, float* %tmp3479, i64 1
+ %tmp3481 = getelementptr inbounds float, float* %tmp3480, i64 1
+ %tmp3482 = getelementptr inbounds float, float* %tmp3481, i64 1
+ %tmp3483 = getelementptr inbounds float, float* %tmp3482, i64 1
+ %tmp3484 = getelementptr inbounds float, float* %tmp3483, i64 1
+ %tmp3485 = getelementptr inbounds float, float* %tmp3484, i64 1
+ %tmp3486 = getelementptr inbounds float, float* %tmp3485, i64 1
+ %tmp3487 = getelementptr inbounds float, float* %tmp3486, i64 1
+ %tmp3488 = getelementptr inbounds float, float* %tmp3487, i64 1
+ %tmp3489 = getelementptr inbounds float, float* %tmp3488, i64 1
+ %tmp3490 = getelementptr inbounds float, float* %tmp3489, i64 1
+ %tmp3491 = getelementptr inbounds float, float* %tmp3490, i64 1
+ %tmp3492 = getelementptr inbounds float, float* %tmp3491, i64 1
+ %tmp3493 = getelementptr inbounds float, float* %tmp3492, i64 1
+ %tmp3494 = getelementptr inbounds float, float* %tmp3493, i64 1
+ %tmp3495 = getelementptr inbounds float, float* %tmp3494, i64 1
+ %tmp3496 = getelementptr inbounds float, float* %tmp3495, i64 1
+ %tmp3497 = getelementptr inbounds float, float* %tmp3496, i64 1
+ %tmp3498 = getelementptr inbounds float, float* %tmp3497, i64 1
+ %tmp3499 = getelementptr inbounds float, float* %tmp3498, i64 1
+ %tmp3500 = getelementptr inbounds float, float* %tmp3499, i64 1
+ %tmp3501 = getelementptr inbounds float, float* %tmp3500, i64 1
+ %tmp3502 = getelementptr inbounds float, float* %tmp3501, i64 1
+ %tmp3503 = getelementptr inbounds float, float* %tmp3502, i64 1
+ %tmp3504 = getelementptr inbounds float, float* %tmp3503, i64 1
+ %tmp3505 = getelementptr inbounds float, float* %tmp3504, i64 1
+ %tmp3506 = getelementptr inbounds float, float* %tmp3505, i64 1
+ %tmp3507 = getelementptr inbounds float, float* %tmp3506, i64 1
+ %tmp3508 = getelementptr inbounds float, float* %tmp3507, i64 1
+ %tmp3509 = getelementptr inbounds float, float* %tmp3508, i64 1
+ %tmp3510 = getelementptr inbounds float, float* %tmp3509, i64 1
+ %tmp3511 = getelementptr inbounds float, float* %tmp3510, i64 1
+ %tmp3512 = getelementptr inbounds float, float* %tmp3511, i64 1
+ %tmp3513 = getelementptr inbounds float, float* %tmp3512, i64 1
+ %tmp3514 = getelementptr inbounds float, float* %tmp3513, i64 1
+ %tmp3515 = getelementptr inbounds float, float* %tmp3514, i64 1
+ %tmp3516 = getelementptr inbounds float, float* %tmp3515, i64 1
+ %tmp3517 = getelementptr inbounds float, float* %tmp3516, i64 1
+ %tmp3518 = getelementptr inbounds float, float* %tmp3517, i64 1
+ %tmp3519 = getelementptr inbounds float, float* %tmp3518, i64 1
+ %tmp3520 = getelementptr inbounds float, float* %tmp3519, i64 1
+ %tmp3521 = getelementptr inbounds float, float* %tmp3520, i64 1
+ %tmp3522 = getelementptr inbounds float, float* %tmp3521, i64 1
+ %tmp3523 = getelementptr inbounds float, float* %tmp3522, i64 1
+ %tmp3524 = getelementptr inbounds float, float* %tmp3523, i64 1
+ %tmp3525 = getelementptr inbounds float, float* %tmp3524, i64 1
+ %tmp3526 = getelementptr inbounds float, float* %tmp3525, i64 1
+ %tmp3527 = getelementptr inbounds float, float* %tmp3526, i64 1
+ %tmp3528 = getelementptr inbounds float, float* %tmp3527, i64 1
+ %tmp3529 = getelementptr inbounds float, float* %tmp3528, i64 1
+ %tmp3530 = getelementptr inbounds float, float* %tmp3529, i64 1
+ %tmp3531 = getelementptr inbounds float, float* %tmp3530, i64 1
+ %tmp3532 = getelementptr inbounds float, float* %tmp3531, i64 1
+ %tmp3533 = getelementptr inbounds float, float* %tmp3532, i64 1
+ %tmp3534 = getelementptr inbounds float, float* %tmp3533, i64 1
+ %tmp3535 = getelementptr inbounds float, float* %tmp3534, i64 1
+ %tmp3536 = getelementptr inbounds float, float* %tmp3535, i64 1
+ %tmp3537 = getelementptr inbounds float, float* %tmp3536, i64 1
+ %tmp3538 = getelementptr inbounds float, float* %tmp3537, i64 1
+ %tmp3539 = getelementptr inbounds float, float* %tmp3538, i64 1
+ %tmp3540 = getelementptr inbounds float, float* %tmp3539, i64 1
+ %tmp3541 = getelementptr inbounds float, float* %tmp3540, i64 1
+ %tmp3542 = getelementptr inbounds float, float* %tmp3541, i64 1
+ %tmp3543 = getelementptr inbounds float, float* %tmp3542, i64 1
+ %tmp3544 = getelementptr inbounds float, float* %tmp3543, i64 1
+ %tmp3545 = getelementptr inbounds float, float* %tmp3544, i64 1
+ %tmp3546 = getelementptr inbounds float, float* %tmp3545, i64 1
+ %tmp3547 = getelementptr inbounds float, float* %tmp3546, i64 1
+ %tmp3548 = getelementptr inbounds float, float* %tmp3547, i64 1
+ %tmp3549 = getelementptr inbounds float, float* %tmp3548, i64 1
+ %tmp3550 = getelementptr inbounds float, float* %tmp3549, i64 1
+ %tmp3551 = getelementptr inbounds float, float* %tmp3550, i64 1
+ %tmp3552 = getelementptr inbounds float, float* %tmp3551, i64 1
+ %tmp3553 = getelementptr inbounds float, float* %tmp3552, i64 1
+ %tmp3554 = getelementptr inbounds float, float* %tmp3553, i64 1
+ %tmp3555 = getelementptr inbounds float, float* %tmp3554, i64 1
+ %tmp3556 = getelementptr inbounds float, float* %tmp3555, i64 1
+ %tmp3557 = getelementptr inbounds float, float* %tmp3556, i64 1
+ %tmp3558 = getelementptr inbounds float, float* %tmp3557, i64 1
+ %tmp3559 = getelementptr inbounds float, float* %tmp3558, i64 1
+ %tmp3560 = getelementptr inbounds float, float* %tmp3559, i64 1
+ %tmp3561 = getelementptr inbounds float, float* %tmp3560, i64 1
+ %tmp3562 = getelementptr inbounds float, float* %tmp3561, i64 1
+ %tmp3563 = getelementptr inbounds float, float* %tmp3562, i64 1
+ %tmp3564 = getelementptr inbounds float, float* %tmp3563, i64 1
+ %tmp3565 = getelementptr inbounds float, float* %tmp3564, i64 1
+ %tmp3566 = getelementptr inbounds float, float* %tmp3565, i64 1
+ %tmp3567 = getelementptr inbounds float, float* %tmp3566, i64 1
+ %tmp3568 = getelementptr inbounds float, float* %tmp3567, i64 1
+ %tmp3569 = getelementptr inbounds float, float* %tmp3568, i64 1
+ %tmp3570 = getelementptr inbounds float, float* %tmp3569, i64 1
+ %tmp3571 = getelementptr inbounds float, float* %tmp3570, i64 1
+ %tmp3572 = getelementptr inbounds float, float* %tmp3571, i64 1
+ %tmp3573 = getelementptr inbounds float, float* %tmp3572, i64 1
+ %tmp3574 = getelementptr inbounds float, float* %tmp3573, i64 1
+ %tmp3575 = getelementptr inbounds float, float* %tmp3574, i64 1
+ %tmp3576 = getelementptr inbounds float, float* %tmp3575, i64 1
+ %tmp3577 = getelementptr inbounds float, float* %tmp3576, i64 1
+ %tmp3578 = getelementptr inbounds float, float* %tmp3577, i64 1
+ %tmp3579 = getelementptr inbounds float, float* %tmp3578, i64 1
+ %tmp3580 = getelementptr inbounds float, float* %tmp3579, i64 1
+ %tmp3581 = getelementptr inbounds float, float* %tmp3580, i64 1
+ %tmp3582 = getelementptr inbounds float, float* %tmp3581, i64 1
+ %tmp3583 = getelementptr inbounds float, float* %tmp3582, i64 1
+ %tmp3584 = getelementptr inbounds float, float* %tmp3583, i64 1
+ %tmp3585 = getelementptr inbounds float, float* %tmp3584, i64 1
+ %tmp3586 = getelementptr inbounds float, float* %tmp3585, i64 1
+ %tmp3587 = getelementptr inbounds float, float* %tmp3586, i64 1
+ %tmp3588 = getelementptr inbounds float, float* %tmp3587, i64 1
+ %tmp3589 = getelementptr inbounds float, float* %tmp3588, i64 1
+ %tmp3590 = getelementptr inbounds float, float* %tmp3589, i64 1
+ %tmp3591 = getelementptr inbounds float, float* %tmp3590, i64 1
+ %tmp3592 = getelementptr inbounds float, float* %tmp3591, i64 1
+ %tmp3593 = getelementptr inbounds float, float* %tmp3592, i64 1
+ %tmp3594 = getelementptr inbounds float, float* %tmp3593, i64 1
+ %tmp3595 = getelementptr inbounds float, float* %tmp3594, i64 1
+ %tmp3596 = getelementptr inbounds float, float* %tmp3595, i64 1
+ %tmp3597 = getelementptr inbounds float, float* %tmp3596, i64 1
+ %tmp3598 = getelementptr inbounds float, float* %tmp3597, i64 1
+ %tmp3599 = getelementptr inbounds float, float* %tmp3598, i64 1
+ %tmp3600 = getelementptr inbounds float, float* %tmp3599, i64 1
+ %tmp3601 = getelementptr inbounds float, float* %tmp3600, i64 1
+ %tmp3602 = getelementptr inbounds float, float* %tmp3601, i64 1
+ %tmp3603 = getelementptr inbounds float, float* %tmp3602, i64 1
+ %tmp3604 = getelementptr inbounds float, float* %tmp3603, i64 1
+ %tmp3605 = getelementptr inbounds float, float* %tmp3604, i64 1
+ %tmp3606 = getelementptr inbounds float, float* %tmp3605, i64 1
+ %tmp3607 = getelementptr inbounds float, float* %tmp3606, i64 1
+ %tmp3608 = getelementptr inbounds float, float* %tmp3607, i64 1
+ %tmp3609 = getelementptr inbounds float, float* %tmp3608, i64 1
+ %tmp3610 = getelementptr inbounds float, float* %tmp3609, i64 1
+ %tmp3611 = getelementptr inbounds float, float* %tmp3610, i64 1
+ %tmp3612 = getelementptr inbounds float, float* %tmp3611, i64 1
+ %tmp3613 = getelementptr inbounds float, float* %tmp3612, i64 1
+ %tmp3614 = getelementptr inbounds float, float* %tmp3613, i64 1
+ %tmp3615 = getelementptr inbounds float, float* %tmp3614, i64 1
+ %tmp3616 = getelementptr inbounds float, float* %tmp3615, i64 1
+ %tmp3617 = getelementptr inbounds float, float* %tmp3616, i64 1
+ %tmp3618 = getelementptr inbounds float, float* %tmp3617, i64 1
+ %tmp3619 = getelementptr inbounds float, float* %tmp3618, i64 1
+ %tmp3620 = getelementptr inbounds float, float* %tmp3619, i64 1
+ %tmp3621 = getelementptr inbounds float, float* %tmp3620, i64 1
+ %tmp3622 = getelementptr inbounds float, float* %tmp3621, i64 1
+ %tmp3623 = getelementptr inbounds float, float* %tmp3622, i64 1
+ %tmp3624 = getelementptr inbounds float, float* %tmp3623, i64 1
+ %tmp3625 = getelementptr inbounds float, float* %tmp3624, i64 1
+ %tmp3626 = getelementptr inbounds float, float* %tmp3625, i64 1
+ %tmp3627 = getelementptr inbounds float, float* %tmp3626, i64 1
+ %tmp3628 = getelementptr inbounds float, float* %tmp3627, i64 1
+ %tmp3629 = getelementptr inbounds float, float* %tmp3628, i64 1
+ %tmp3630 = getelementptr inbounds float, float* %tmp3629, i64 1
+ %tmp3631 = getelementptr inbounds float, float* %tmp3630, i64 1
+ %tmp3632 = getelementptr inbounds float, float* %tmp3631, i64 1
+ %tmp3633 = getelementptr inbounds float, float* %tmp3632, i64 1
+ %tmp3634 = getelementptr inbounds float, float* %tmp3633, i64 1
+ %tmp3635 = getelementptr inbounds float, float* %tmp3634, i64 1
+ %tmp3636 = getelementptr inbounds float, float* %tmp3635, i64 1
+ %tmp3637 = getelementptr inbounds float, float* %tmp3636, i64 1
+ %tmp3638 = getelementptr inbounds float, float* %tmp3637, i64 1
+ %tmp3639 = getelementptr inbounds float, float* %tmp3638, i64 1
+ %tmp3640 = getelementptr inbounds float, float* %tmp3639, i64 1
+ %tmp3641 = getelementptr inbounds float, float* %tmp3640, i64 1
+ %tmp3642 = getelementptr inbounds float, float* %tmp3641, i64 1
+ %tmp3643 = getelementptr inbounds float, float* %tmp3642, i64 1
+ %tmp3644 = getelementptr inbounds float, float* %tmp3643, i64 1
+ %tmp3645 = getelementptr inbounds float, float* %tmp3644, i64 1
+ %tmp3646 = getelementptr inbounds float, float* %tmp3645, i64 1
+ %tmp3647 = getelementptr inbounds float, float* %tmp3646, i64 1
+ %tmp3648 = getelementptr inbounds float, float* %tmp3647, i64 1
+ %tmp3649 = getelementptr inbounds float, float* %tmp3648, i64 1
+ %tmp3650 = getelementptr inbounds float, float* %tmp3649, i64 1
+ %tmp3651 = getelementptr inbounds float, float* %tmp3650, i64 1
+ %tmp3652 = getelementptr inbounds float, float* %tmp3651, i64 1
+ %tmp3653 = getelementptr inbounds float, float* %tmp3652, i64 1
+ %tmp3654 = getelementptr inbounds float, float* %tmp3653, i64 1
+ %tmp3655 = getelementptr inbounds float, float* %tmp3654, i64 1
+ %tmp3656 = getelementptr inbounds float, float* %tmp3655, i64 1
+ %tmp3657 = getelementptr inbounds float, float* %tmp3656, i64 1
+ %tmp3658 = getelementptr inbounds float, float* %tmp3657, i64 1
+ %tmp3659 = getelementptr inbounds float, float* %tmp3658, i64 1
+ %tmp3660 = getelementptr inbounds float, float* %tmp3659, i64 1
+ %tmp3661 = getelementptr inbounds float, float* %tmp3660, i64 1
+ %tmp3662 = getelementptr inbounds float, float* %tmp3661, i64 1
+ %tmp3663 = getelementptr inbounds float, float* %tmp3662, i64 1
+ %tmp3664 = getelementptr inbounds float, float* %tmp3663, i64 1
+ %tmp3665 = getelementptr inbounds float, float* %tmp3664, i64 1
+ %tmp3666 = getelementptr inbounds float, float* %tmp3665, i64 1
+ %tmp3667 = getelementptr inbounds float, float* %tmp3666, i64 1
+ %tmp3668 = getelementptr inbounds float, float* %tmp3667, i64 1
+ %tmp3669 = getelementptr inbounds float, float* %tmp3668, i64 1
+ %tmp3670 = getelementptr inbounds float, float* %tmp3669, i64 1
+ %tmp3671 = getelementptr inbounds float, float* %tmp3670, i64 1
+ %tmp3672 = getelementptr inbounds float, float* %tmp3671, i64 1
+ %tmp3673 = getelementptr inbounds float, float* %tmp3672, i64 1
+ %tmp3674 = getelementptr inbounds float, float* %tmp3673, i64 1
+ %tmp3675 = getelementptr inbounds float, float* %tmp3674, i64 1
+ %tmp3676 = getelementptr inbounds float, float* %tmp3675, i64 1
+ %tmp3677 = getelementptr inbounds float, float* %tmp3676, i64 1
+ %tmp3678 = getelementptr inbounds float, float* %tmp3677, i64 1
+ %tmp3679 = getelementptr inbounds float, float* %tmp3678, i64 1
+ %tmp3680 = getelementptr inbounds float, float* %tmp3679, i64 1
+ %tmp3681 = getelementptr inbounds float, float* %tmp3680, i64 1
+ %tmp3682 = getelementptr inbounds float, float* %tmp3681, i64 1
+ %tmp3683 = getelementptr inbounds float, float* %tmp3682, i64 1
+ %tmp3684 = getelementptr inbounds float, float* %tmp3683, i64 1
+ %tmp3685 = getelementptr inbounds float, float* %tmp3684, i64 1
+ %tmp3686 = getelementptr inbounds float, float* %tmp3685, i64 1
+ %tmp3687 = getelementptr inbounds float, float* %tmp3686, i64 1
+ %tmp3688 = getelementptr inbounds float, float* %tmp3687, i64 1
+ %tmp3689 = getelementptr inbounds float, float* %tmp3688, i64 1
+ %tmp3690 = getelementptr inbounds float, float* %tmp3689, i64 1
+ %tmp3691 = getelementptr inbounds float, float* %tmp3690, i64 1
+ %tmp3692 = getelementptr inbounds float, float* %tmp3691, i64 1
+ %tmp3693 = getelementptr inbounds float, float* %tmp3692, i64 1
+ %tmp3694 = getelementptr inbounds float, float* %tmp3693, i64 1
+ %tmp3695 = getelementptr inbounds float, float* %tmp3694, i64 1
+ %tmp3696 = getelementptr inbounds float, float* %tmp3695, i64 1
+ %tmp3697 = getelementptr inbounds float, float* %tmp3696, i64 1
+ %tmp3698 = getelementptr inbounds float, float* %tmp3697, i64 1
+ %tmp3699 = getelementptr inbounds float, float* %tmp3698, i64 1
+ %tmp3700 = getelementptr inbounds float, float* %tmp3699, i64 1
+ %tmp3701 = getelementptr inbounds float, float* %tmp3700, i64 1
+ %tmp3702 = getelementptr inbounds float, float* %tmp3701, i64 1
+ %tmp3703 = getelementptr inbounds float, float* %tmp3702, i64 1
+ %tmp3704 = getelementptr inbounds float, float* %tmp3703, i64 1
+ %tmp3705 = getelementptr inbounds float, float* %tmp3704, i64 1
+ %tmp3706 = getelementptr inbounds float, float* %tmp3705, i64 1
+ %tmp3707 = getelementptr inbounds float, float* %tmp3706, i64 1
+ %tmp3708 = getelementptr inbounds float, float* %tmp3707, i64 1
+ %tmp3709 = getelementptr inbounds float, float* %tmp3708, i64 1
+ %tmp3710 = getelementptr inbounds float, float* %tmp3709, i64 1
+ %tmp3711 = getelementptr inbounds float, float* %tmp3710, i64 1
+ %tmp3712 = getelementptr inbounds float, float* %tmp3711, i64 1
+ %tmp3713 = getelementptr inbounds float, float* %tmp3712, i64 1
+ %tmp3714 = getelementptr inbounds float, float* %tmp3713, i64 1
+ %tmp3715 = getelementptr inbounds float, float* %tmp3714, i64 1
+ %tmp3716 = getelementptr inbounds float, float* %tmp3715, i64 1
+ %tmp3717 = getelementptr inbounds float, float* %tmp3716, i64 1
+ %tmp3718 = getelementptr inbounds float, float* %tmp3717, i64 1
+ %tmp3719 = getelementptr inbounds float, float* %tmp3718, i64 1
+ %tmp3720 = getelementptr inbounds float, float* %tmp3719, i64 1
+ %tmp3721 = getelementptr inbounds float, float* %tmp3720, i64 1
+ %tmp3722 = getelementptr inbounds float, float* %tmp3721, i64 1
+ %tmp3723 = getelementptr inbounds float, float* %tmp3722, i64 1
+ %tmp3724 = getelementptr inbounds float, float* %tmp3723, i64 1
+ %tmp3725 = getelementptr inbounds float, float* %tmp3724, i64 1
+ %tmp3726 = getelementptr inbounds float, float* %tmp3725, i64 1
+ %tmp3727 = getelementptr inbounds float, float* %tmp3726, i64 1
+ %tmp3728 = getelementptr inbounds float, float* %tmp3727, i64 1
+ %tmp3729 = getelementptr inbounds float, float* %tmp3728, i64 1
+ %tmp3730 = getelementptr inbounds float, float* %tmp3729, i64 1
+ %tmp3731 = getelementptr inbounds float, float* %tmp3730, i64 1
+ %tmp3732 = getelementptr inbounds float, float* %tmp3731, i64 1
+ %tmp3733 = getelementptr inbounds float, float* %tmp3732, i64 1
+ %tmp3734 = getelementptr inbounds float, float* %tmp3733, i64 1
+ %tmp3735 = getelementptr inbounds float, float* %tmp3734, i64 1
+ %tmp3736 = getelementptr inbounds float, float* %tmp3735, i64 1
+ %tmp3737 = getelementptr inbounds float, float* %tmp3736, i64 1
+ %tmp3738 = getelementptr inbounds float, float* %tmp3737, i64 1
+ %tmp3739 = getelementptr inbounds float, float* %tmp3738, i64 1
+ %tmp3740 = getelementptr inbounds float, float* %tmp3739, i64 1
+ %tmp3741 = getelementptr inbounds float, float* %tmp3740, i64 1
+ %tmp3742 = getelementptr inbounds float, float* %tmp3741, i64 1
+ %tmp3743 = getelementptr inbounds float, float* %tmp3742, i64 1
+ %tmp3744 = getelementptr inbounds float, float* %tmp3743, i64 1
+ %tmp3745 = getelementptr inbounds float, float* %tmp3744, i64 1
+ %tmp3746 = getelementptr inbounds float, float* %tmp3745, i64 1
+ %tmp3747 = getelementptr inbounds float, float* %tmp3746, i64 1
+ %tmp3748 = getelementptr inbounds float, float* %tmp3747, i64 1
+ %tmp3749 = getelementptr inbounds float, float* %tmp3748, i64 1
+ %tmp3750 = getelementptr inbounds float, float* %tmp3749, i64 1
+ %tmp3751 = getelementptr inbounds float, float* %tmp3750, i64 1
+ %tmp3752 = getelementptr inbounds float, float* %tmp3751, i64 1
+ %tmp3753 = getelementptr inbounds float, float* %tmp3752, i64 1
+ %tmp3754 = getelementptr inbounds float, float* %tmp3753, i64 1
+ %tmp3755 = getelementptr inbounds float, float* %tmp3754, i64 1
+ %tmp3756 = getelementptr inbounds float, float* %tmp3755, i64 1
+ %tmp3757 = getelementptr inbounds float, float* %tmp3756, i64 1
+ %tmp3758 = getelementptr inbounds float, float* %tmp3757, i64 1
+ %tmp3759 = getelementptr inbounds float, float* %tmp3758, i64 1
+ %tmp3760 = getelementptr inbounds float, float* %tmp3759, i64 1
+ %tmp3761 = getelementptr inbounds float, float* %tmp3760, i64 1
+ %tmp3762 = getelementptr inbounds float, float* %tmp3761, i64 1
+ %tmp3763 = getelementptr inbounds float, float* %tmp3762, i64 1
+ %tmp3764 = getelementptr inbounds float, float* %tmp3763, i64 1
+ %tmp3765 = getelementptr inbounds float, float* %tmp3764, i64 1
+ %tmp3766 = getelementptr inbounds float, float* %tmp3765, i64 1
+ %tmp3767 = getelementptr inbounds float, float* %tmp3766, i64 1
+ %tmp3768 = getelementptr inbounds float, float* %tmp3767, i64 1
+ %tmp3769 = getelementptr inbounds float, float* %tmp3768, i64 1
+ %tmp3770 = getelementptr inbounds float, float* %tmp3769, i64 1
+ %tmp3771 = getelementptr inbounds float, float* %tmp3770, i64 1
+ %tmp3772 = getelementptr inbounds float, float* %tmp3771, i64 1
+ %tmp3773 = getelementptr inbounds float, float* %tmp3772, i64 1
+ %tmp3774 = getelementptr inbounds float, float* %tmp3773, i64 1
+ %tmp3775 = getelementptr inbounds float, float* %tmp3774, i64 1
+ %tmp3776 = getelementptr inbounds float, float* %tmp3775, i64 1
+ %tmp3777 = getelementptr inbounds float, float* %tmp3776, i64 1
+ %tmp3778 = getelementptr inbounds float, float* %tmp3777, i64 1
+ %tmp3779 = getelementptr inbounds float, float* %tmp3778, i64 1
+ %tmp3780 = getelementptr inbounds float, float* %tmp3779, i64 1
+ %tmp3781 = getelementptr inbounds float, float* %tmp3780, i64 1
+ %tmp3782 = getelementptr inbounds float, float* %tmp3781, i64 1
+ %tmp3783 = getelementptr inbounds float, float* %tmp3782, i64 1
+ %tmp3784 = getelementptr inbounds float, float* %tmp3783, i64 1
+ %tmp3785 = getelementptr inbounds float, float* %tmp3784, i64 1
+ %tmp3786 = getelementptr inbounds float, float* %tmp3785, i64 1
+ %tmp3787 = getelementptr inbounds float, float* %tmp3786, i64 1
+ %tmp3788 = getelementptr inbounds float, float* %tmp3787, i64 1
+ %tmp3789 = getelementptr inbounds float, float* %tmp3788, i64 1
+ %tmp3790 = getelementptr inbounds float, float* %tmp3789, i64 1
+ %tmp3791 = getelementptr inbounds float, float* %tmp3790, i64 1
+ %tmp3792 = getelementptr inbounds float, float* %tmp3791, i64 1
+ %tmp3793 = getelementptr inbounds float, float* %tmp3792, i64 1
+ %tmp3794 = getelementptr inbounds float, float* %tmp3793, i64 1
+ %tmp3795 = getelementptr inbounds float, float* %tmp3794, i64 1
+ %tmp3796 = getelementptr inbounds float, float* %tmp3795, i64 1
+ %tmp3797 = getelementptr inbounds float, float* %tmp3796, i64 1
+ %tmp3798 = getelementptr inbounds float, float* %tmp3797, i64 1
+ %tmp3799 = getelementptr inbounds float, float* %tmp3798, i64 1
+ %tmp3800 = getelementptr inbounds float, float* %tmp3799, i64 1
+ %tmp3801 = getelementptr inbounds float, float* %tmp3800, i64 1
+ %tmp3802 = getelementptr inbounds float, float* %tmp3801, i64 1
+ %tmp3803 = getelementptr inbounds float, float* %tmp3802, i64 1
+ %tmp3804 = getelementptr inbounds float, float* %tmp3803, i64 1
+ %tmp3805 = getelementptr inbounds float, float* %tmp3804, i64 1
+ %tmp3806 = getelementptr inbounds float, float* %tmp3805, i64 1
+ %tmp3807 = getelementptr inbounds float, float* %tmp3806, i64 1
+ %tmp3808 = getelementptr inbounds float, float* %tmp3807, i64 1
+ %tmp3809 = getelementptr inbounds float, float* %tmp3808, i64 1
+ %tmp3810 = getelementptr inbounds float, float* %tmp3809, i64 1
+ %tmp3811 = getelementptr inbounds float, float* %tmp3810, i64 1
+ %tmp3812 = getelementptr inbounds float, float* %tmp3811, i64 1
+ %tmp3813 = getelementptr inbounds float, float* %tmp3812, i64 1
+ %tmp3814 = getelementptr inbounds float, float* %tmp3813, i64 1
+ %tmp3815 = getelementptr inbounds float, float* %tmp3814, i64 1
+ %tmp3816 = getelementptr inbounds float, float* %tmp3815, i64 1
+ %tmp3817 = getelementptr inbounds float, float* %tmp3816, i64 1
+ %tmp3818 = getelementptr inbounds float, float* %tmp3817, i64 1
+ %tmp3819 = getelementptr inbounds float, float* %tmp3818, i64 1
+ %tmp3820 = getelementptr inbounds float, float* %tmp3819, i64 1
+ %tmp3821 = getelementptr inbounds float, float* %tmp3820, i64 1
+ %tmp3822 = getelementptr inbounds float, float* %tmp3821, i64 1
+ %tmp3823 = getelementptr inbounds float, float* %tmp3822, i64 1
+ %tmp3824 = getelementptr inbounds float, float* %tmp3823, i64 1
+ %tmp3825 = getelementptr inbounds float, float* %tmp3824, i64 1
+ %tmp3826 = getelementptr inbounds float, float* %tmp3825, i64 1
+ %tmp3827 = getelementptr inbounds float, float* %tmp3826, i64 1
+ %tmp3828 = getelementptr inbounds float, float* %tmp3827, i64 1
+ %tmp3829 = getelementptr inbounds float, float* %tmp3828, i64 1
+ %tmp3830 = getelementptr inbounds float, float* %tmp3829, i64 1
+ %tmp3831 = getelementptr inbounds float, float* %tmp3830, i64 1
+ %tmp3832 = getelementptr inbounds float, float* %tmp3831, i64 1
+ %tmp3833 = getelementptr inbounds float, float* %tmp3832, i64 1
+ %tmp3834 = getelementptr inbounds float, float* %tmp3833, i64 1
+ %tmp3835 = getelementptr inbounds float, float* %tmp3834, i64 1
+ %tmp3836 = getelementptr inbounds float, float* %tmp3835, i64 1
+ %tmp3837 = getelementptr inbounds float, float* %tmp3836, i64 1
+ %tmp3838 = getelementptr inbounds float, float* %tmp3837, i64 1
+ %tmp3839 = getelementptr inbounds float, float* %tmp3838, i64 1
+ %tmp3840 = getelementptr inbounds float, float* %tmp3839, i64 1
+ %tmp3841 = getelementptr inbounds float, float* %tmp3840, i64 1
+ %tmp3842 = getelementptr inbounds float, float* %tmp3841, i64 1
+ %tmp3843 = getelementptr inbounds float, float* %tmp3842, i64 1
+ %tmp3844 = getelementptr inbounds float, float* %tmp3843, i64 1
+ %tmp3845 = getelementptr inbounds float, float* %tmp3844, i64 1
+ %tmp3846 = getelementptr inbounds float, float* %tmp3845, i64 1
+ %tmp3847 = getelementptr inbounds float, float* %tmp3846, i64 1
+ %tmp3848 = getelementptr inbounds float, float* %tmp3847, i64 1
+ %tmp3849 = getelementptr inbounds float, float* %tmp3848, i64 1
+ %tmp3850 = getelementptr inbounds float, float* %tmp3849, i64 1
+ %tmp3851 = getelementptr inbounds float, float* %tmp3850, i64 1
+ %tmp3852 = getelementptr inbounds float, float* %tmp3851, i64 1
+ %tmp3853 = getelementptr inbounds float, float* %tmp3852, i64 1
+ %tmp3854 = getelementptr inbounds float, float* %tmp3853, i64 1
+ %tmp3855 = getelementptr inbounds float, float* %tmp3854, i64 1
+ %tmp3856 = getelementptr inbounds float, float* %tmp3855, i64 1
+ %tmp3857 = getelementptr inbounds float, float* %tmp3856, i64 1
+ %tmp3858 = getelementptr inbounds float, float* %tmp3857, i64 1
+ %tmp3859 = getelementptr inbounds float, float* %tmp3858, i64 1
+ %tmp3860 = getelementptr inbounds float, float* %tmp3859, i64 1
+ %tmp3861 = getelementptr inbounds float, float* %tmp3860, i64 1
+ %tmp3862 = getelementptr inbounds float, float* %tmp3861, i64 1
+ %tmp3863 = getelementptr inbounds float, float* %tmp3862, i64 1
+ %tmp3864 = getelementptr inbounds float, float* %tmp3863, i64 1
+ %tmp3865 = getelementptr inbounds float, float* %tmp3864, i64 1
+ %tmp3866 = getelementptr inbounds float, float* %tmp3865, i64 1
+ %tmp3867 = getelementptr inbounds float, float* %tmp3866, i64 1
+ %tmp3868 = getelementptr inbounds float, float* %tmp3867, i64 1
+ %tmp3869 = getelementptr inbounds float, float* %tmp3868, i64 1
+ %tmp3870 = getelementptr inbounds float, float* %tmp3869, i64 1
+ %tmp3871 = getelementptr inbounds float, float* %tmp3870, i64 1
+ %tmp3872 = getelementptr inbounds float, float* %tmp3871, i64 1
+ %tmp3873 = getelementptr inbounds float, float* %tmp3872, i64 1
+ %tmp3874 = getelementptr inbounds float, float* %tmp3873, i64 1
+ %tmp3875 = getelementptr inbounds float, float* %tmp3874, i64 1
+ %tmp3876 = getelementptr inbounds float, float* %tmp3875, i64 1
+ %tmp3877 = getelementptr inbounds float, float* %tmp3876, i64 1
+ %tmp3878 = getelementptr inbounds float, float* %tmp3877, i64 1
+ %tmp3879 = getelementptr inbounds float, float* %tmp3878, i64 1
+ %tmp3880 = getelementptr inbounds float, float* %tmp3879, i64 1
+ %tmp3881 = getelementptr inbounds float, float* %tmp3880, i64 1
+ %tmp3882 = getelementptr inbounds float, float* %tmp3881, i64 1
+ %tmp3883 = getelementptr inbounds float, float* %tmp3882, i64 1
+ %tmp3884 = getelementptr inbounds float, float* %tmp3883, i64 1
+ %tmp3885 = getelementptr inbounds float, float* %tmp3884, i64 1
+ %tmp3886 = getelementptr inbounds float, float* %tmp3885, i64 1
+ %tmp3887 = getelementptr inbounds float, float* %tmp3886, i64 1
+ %tmp3888 = getelementptr inbounds float, float* %tmp3887, i64 1
+ %tmp3889 = getelementptr inbounds float, float* %tmp3888, i64 1
+ %tmp3890 = getelementptr inbounds float, float* %tmp3889, i64 1
+ %tmp3891 = getelementptr inbounds float, float* %tmp3890, i64 1
+ %tmp3892 = getelementptr inbounds float, float* %tmp3891, i64 1
+ %tmp3893 = getelementptr inbounds float, float* %tmp3892, i64 1
+ %tmp3894 = getelementptr inbounds float, float* %tmp3893, i64 1
+ %tmp3895 = getelementptr inbounds float, float* %tmp3894, i64 1
+ %tmp3896 = getelementptr inbounds float, float* %tmp3895, i64 1
+ %tmp3897 = getelementptr inbounds float, float* %tmp3896, i64 1
+ %tmp3898 = getelementptr inbounds float, float* %tmp3897, i64 1
+ %tmp3899 = getelementptr inbounds float, float* %tmp3898, i64 1
+ %tmp3900 = getelementptr inbounds float, float* %tmp3899, i64 1
+ %tmp3901 = getelementptr inbounds float, float* %tmp3900, i64 1
+ %tmp3902 = getelementptr inbounds float, float* %tmp3901, i64 1
+ %tmp3903 = getelementptr inbounds float, float* %tmp3902, i64 1
+ %tmp3904 = getelementptr inbounds float, float* %tmp3903, i64 1
+ %tmp3905 = getelementptr inbounds float, float* %tmp3904, i64 1
+ %tmp3906 = getelementptr inbounds float, float* %tmp3905, i64 1
+ %tmp3907 = getelementptr inbounds float, float* %tmp3906, i64 1
+ %tmp3908 = getelementptr inbounds float, float* %tmp3907, i64 1
+ %tmp3909 = getelementptr inbounds float, float* %tmp3908, i64 1
+ %tmp3910 = getelementptr inbounds float, float* %tmp3909, i64 1
+ %tmp3911 = getelementptr inbounds float, float* %tmp3910, i64 1
+ %tmp3912 = getelementptr inbounds float, float* %tmp3911, i64 1
+ %tmp3913 = getelementptr inbounds float, float* %tmp3912, i64 1
+ %tmp3914 = getelementptr inbounds float, float* %tmp3913, i64 1
+ %tmp3915 = getelementptr inbounds float, float* %tmp3914, i64 1
+ %tmp3916 = getelementptr inbounds float, float* %tmp3915, i64 1
+ %tmp3917 = getelementptr inbounds float, float* %tmp3916, i64 1
+ %tmp3918 = getelementptr inbounds float, float* %tmp3917, i64 1
+ %tmp3919 = getelementptr inbounds float, float* %tmp3918, i64 1
+ %tmp3920 = getelementptr inbounds float, float* %tmp3919, i64 1
+ %tmp3921 = getelementptr inbounds float, float* %tmp3920, i64 1
+ %tmp3922 = getelementptr inbounds float, float* %tmp3921, i64 1
+ %tmp3923 = getelementptr inbounds float, float* %tmp3922, i64 1
+ %tmp3924 = getelementptr inbounds float, float* %tmp3923, i64 1
+ %tmp3925 = getelementptr inbounds float, float* %tmp3924, i64 1
+ %tmp3926 = getelementptr inbounds float, float* %tmp3925, i64 1
+ %tmp3927 = getelementptr inbounds float, float* %tmp3926, i64 1
+ %tmp3928 = getelementptr inbounds float, float* %tmp3927, i64 1
+ %tmp3929 = getelementptr inbounds float, float* %tmp3928, i64 1
+ %tmp3930 = getelementptr inbounds float, float* %tmp3929, i64 1
+ %tmp3931 = getelementptr inbounds float, float* %tmp3930, i64 1
+ %tmp3932 = getelementptr inbounds float, float* %tmp3931, i64 1
+ %tmp3933 = getelementptr inbounds float, float* %tmp3932, i64 1
+ %tmp3934 = getelementptr inbounds float, float* %tmp3933, i64 1
+ %tmp3935 = getelementptr inbounds float, float* %tmp3934, i64 1
+ %tmp3936 = getelementptr inbounds float, float* %tmp3935, i64 1
+ %tmp3937 = getelementptr inbounds float, float* %tmp3936, i64 1
+ %tmp3938 = getelementptr inbounds float, float* %tmp3937, i64 1
+ %tmp3939 = getelementptr inbounds float, float* %tmp3938, i64 1
+ %tmp3940 = getelementptr inbounds float, float* %tmp3939, i64 1
+ %tmp3941 = getelementptr inbounds float, float* %tmp3940, i64 1
+ %tmp3942 = getelementptr inbounds float, float* %tmp3941, i64 1
+ %tmp3943 = getelementptr inbounds float, float* %tmp3942, i64 1
+ %tmp3944 = getelementptr inbounds float, float* %tmp3943, i64 1
+ %tmp3945 = getelementptr inbounds float, float* %tmp3944, i64 1
+ %tmp3946 = getelementptr inbounds float, float* %tmp3945, i64 1
+ %tmp3947 = getelementptr inbounds float, float* %tmp3946, i64 1
+ %tmp3948 = getelementptr inbounds float, float* %tmp3947, i64 1
+ %tmp3949 = getelementptr inbounds float, float* %tmp3948, i64 1
+ %tmp3950 = getelementptr inbounds float, float* %tmp3949, i64 1
+ %tmp3951 = getelementptr inbounds float, float* %tmp3950, i64 1
+ %tmp3952 = getelementptr inbounds float, float* %tmp3951, i64 1
+ %tmp3953 = getelementptr inbounds float, float* %tmp3952, i64 1
+ %tmp3954 = getelementptr inbounds float, float* %tmp3953, i64 1
+ %tmp3955 = getelementptr inbounds float, float* %tmp3954, i64 1
+ %tmp3956 = getelementptr inbounds float, float* %tmp3955, i64 1
+ %tmp3957 = getelementptr inbounds float, float* %tmp3956, i64 1
+ %tmp3958 = getelementptr inbounds float, float* %tmp3957, i64 1
+ %tmp3959 = getelementptr inbounds float, float* %tmp3958, i64 1
+ %tmp3960 = getelementptr inbounds float, float* %tmp3959, i64 1
+ %tmp3961 = getelementptr inbounds float, float* %tmp3960, i64 1
+ %tmp3962 = getelementptr inbounds float, float* %tmp3961, i64 1
+ %tmp3963 = getelementptr inbounds float, float* %tmp3962, i64 1
+ %tmp3964 = getelementptr inbounds float, float* %tmp3963, i64 1
+ %tmp3965 = getelementptr inbounds float, float* %tmp3964, i64 1
+ %tmp3966 = getelementptr inbounds float, float* %tmp3965, i64 1
+ %tmp3967 = getelementptr inbounds float, float* %tmp3966, i64 1
+ %tmp3968 = getelementptr inbounds float, float* %tmp3967, i64 1
+ %tmp3969 = getelementptr inbounds float, float* %tmp3968, i64 1
+ %tmp3970 = getelementptr inbounds float, float* %tmp3969, i64 1
+ %tmp3971 = getelementptr inbounds float, float* %tmp3970, i64 1
+ %tmp3972 = getelementptr inbounds float, float* %tmp3971, i64 1
+ %tmp3973 = getelementptr inbounds float, float* %tmp3972, i64 1
+ %tmp3974 = getelementptr inbounds float, float* %tmp3973, i64 1
+ %tmp3975 = getelementptr inbounds float, float* %tmp3974, i64 1
+ %tmp3976 = getelementptr inbounds float, float* %tmp3975, i64 1
+ %tmp3977 = getelementptr inbounds float, float* %tmp3976, i64 1
+ %tmp3978 = getelementptr inbounds float, float* %tmp3977, i64 1
+ %tmp3979 = getelementptr inbounds float, float* %tmp3978, i64 1
+ %tmp3980 = getelementptr inbounds float, float* %tmp3979, i64 1
+ %tmp3981 = getelementptr inbounds float, float* %tmp3980, i64 1
+ %tmp3982 = getelementptr inbounds float, float* %tmp3981, i64 1
+ %tmp3983 = getelementptr inbounds float, float* %tmp3982, i64 1
+ %tmp3984 = getelementptr inbounds float, float* %tmp3983, i64 1
+ %tmp3985 = getelementptr inbounds float, float* %tmp3984, i64 1
+ %tmp3986 = getelementptr inbounds float, float* %tmp3985, i64 1
+ %tmp3987 = getelementptr inbounds float, float* %tmp3986, i64 1
+ %tmp3988 = getelementptr inbounds float, float* %tmp3987, i64 1
+ %tmp3989 = getelementptr inbounds float, float* %tmp3988, i64 1
+ %tmp3990 = getelementptr inbounds float, float* %tmp3989, i64 1
+ %tmp3991 = getelementptr inbounds float, float* %tmp3990, i64 1
+ %tmp3992 = getelementptr inbounds float, float* %tmp3991, i64 1
+ %tmp3993 = getelementptr inbounds float, float* %tmp3992, i64 1
+ %tmp3994 = getelementptr inbounds float, float* %tmp3993, i64 1
+ %tmp3995 = getelementptr inbounds float, float* %tmp3994, i64 1
+ %tmp3996 = getelementptr inbounds float, float* %tmp3995, i64 1
+ %tmp3997 = getelementptr inbounds float, float* %tmp3996, i64 1
+ %tmp3998 = getelementptr inbounds float, float* %tmp3997, i64 1
+ %tmp3999 = getelementptr inbounds float, float* %tmp3998, i64 1
+ %tmp4000 = getelementptr inbounds float, float* %tmp3999, i64 1
+ %tmp4001 = getelementptr inbounds float, float* %tmp4000, i64 1
+ %tmp4002 = getelementptr inbounds float, float* %tmp4001, i64 1
+ %tmp4003 = getelementptr inbounds float, float* %tmp4002, i64 1
+ %tmp4004 = getelementptr inbounds float, float* %tmp4003, i64 1
+ %tmp4005 = getelementptr inbounds float, float* %tmp4004, i64 1
+ %tmp4006 = getelementptr inbounds float, float* %tmp4005, i64 1
+ %tmp4007 = getelementptr inbounds float, float* %tmp4006, i64 1
+ %tmp4008 = getelementptr inbounds float, float* %tmp4007, i64 1
+ %tmp4009 = getelementptr inbounds float, float* %tmp4008, i64 1
+ %tmp4010 = getelementptr inbounds float, float* %tmp4009, i64 1
+ %tmp4011 = getelementptr inbounds float, float* %tmp4010, i64 1
+ %tmp4012 = getelementptr inbounds float, float* %tmp4011, i64 1
+ %tmp4013 = getelementptr inbounds float, float* %tmp4012, i64 1
+ %tmp4014 = getelementptr inbounds float, float* %tmp4013, i64 1
+ %tmp4015 = getelementptr inbounds float, float* %tmp4014, i64 1
+ %tmp4016 = getelementptr inbounds float, float* %tmp4015, i64 1
+ %tmp4017 = getelementptr inbounds float, float* %tmp4016, i64 1
+ %tmp4018 = getelementptr inbounds float, float* %tmp4017, i64 1
+ %tmp4019 = getelementptr inbounds float, float* %tmp4018, i64 1
+ %tmp4020 = getelementptr inbounds float, float* %tmp4019, i64 1
+ %tmp4021 = getelementptr inbounds float, float* %tmp4020, i64 1
+ %tmp4022 = getelementptr inbounds float, float* %tmp4021, i64 1
+ %tmp4023 = getelementptr inbounds float, float* %tmp4022, i64 1
+ %tmp4024 = getelementptr inbounds float, float* %tmp4023, i64 1
+ %tmp4025 = getelementptr inbounds float, float* %tmp4024, i64 1
+ %tmp4026 = getelementptr inbounds float, float* %tmp4025, i64 1
+ %tmp4027 = getelementptr inbounds float, float* %tmp4026, i64 1
+ %tmp4028 = getelementptr inbounds float, float* %tmp4027, i64 1
+ %tmp4029 = getelementptr inbounds float, float* %tmp4028, i64 1
+ %tmp4030 = getelementptr inbounds float, float* %tmp4029, i64 1
+ %tmp4031 = getelementptr inbounds float, float* %tmp4030, i64 1
+ %tmp4032 = getelementptr inbounds float, float* %tmp4031, i64 1
+ %tmp4033 = getelementptr inbounds float, float* %tmp4032, i64 1
+ %tmp4034 = getelementptr inbounds float, float* %tmp4033, i64 1
+ %tmp4035 = getelementptr inbounds float, float* %tmp4034, i64 1
+ %tmp4036 = getelementptr inbounds float, float* %tmp4035, i64 1
+ %tmp4037 = getelementptr inbounds float, float* %tmp4036, i64 1
+ %tmp4038 = getelementptr inbounds float, float* %tmp4037, i64 1
+ %tmp4039 = getelementptr inbounds float, float* %tmp4038, i64 1
+ %tmp4040 = getelementptr inbounds float, float* %tmp4039, i64 1
+ %tmp4041 = getelementptr inbounds float, float* %tmp4040, i64 1
+ %tmp4042 = getelementptr inbounds float, float* %tmp4041, i64 1
+ %tmp4043 = getelementptr inbounds float, float* %tmp4042, i64 1
+ %tmp4044 = getelementptr inbounds float, float* %tmp4043, i64 1
+ %tmp4045 = getelementptr inbounds float, float* %tmp4044, i64 1
+ %tmp4046 = getelementptr inbounds float, float* %tmp4045, i64 1
+ %tmp4047 = getelementptr inbounds float, float* %tmp4046, i64 1
+ %tmp4048 = getelementptr inbounds float, float* %tmp4047, i64 1
+ %tmp4049 = getelementptr inbounds float, float* %tmp4048, i64 1
+ %tmp4050 = getelementptr inbounds float, float* %tmp4049, i64 1
+ %tmp4051 = getelementptr inbounds float, float* %tmp4050, i64 1
+ %tmp4052 = getelementptr inbounds float, float* %tmp4051, i64 1
+ %tmp4053 = getelementptr inbounds float, float* %tmp4052, i64 1
+ %tmp4054 = getelementptr inbounds float, float* %tmp4053, i64 1
+ %tmp4055 = getelementptr inbounds float, float* %tmp4054, i64 1
+ %tmp4056 = getelementptr inbounds float, float* %tmp4055, i64 1
+ %tmp4057 = getelementptr inbounds float, float* %tmp4056, i64 1
+ %tmp4058 = getelementptr inbounds float, float* %tmp4057, i64 1
+ %tmp4059 = getelementptr inbounds float, float* %tmp4058, i64 1
+ %tmp4060 = getelementptr inbounds float, float* %tmp4059, i64 1
+ %tmp4061 = getelementptr inbounds float, float* %tmp4060, i64 1
+ %tmp4062 = getelementptr inbounds float, float* %tmp4061, i64 1
+ %tmp4063 = getelementptr inbounds float, float* %tmp4062, i64 1
+ %tmp4064 = getelementptr inbounds float, float* %tmp4063, i64 1
+ %tmp4065 = getelementptr inbounds float, float* %tmp4064, i64 1
+ %tmp4066 = getelementptr inbounds float, float* %tmp4065, i64 1
+ %tmp4067 = getelementptr inbounds float, float* %tmp4066, i64 1
+ %tmp4068 = getelementptr inbounds float, float* %tmp4067, i64 1
+ %tmp4069 = getelementptr inbounds float, float* %tmp4068, i64 1
+ %tmp4070 = getelementptr inbounds float, float* %tmp4069, i64 1
+ %tmp4071 = getelementptr inbounds float, float* %tmp4070, i64 1
+ %tmp4072 = getelementptr inbounds float, float* %tmp4071, i64 1
+ %tmp4073 = getelementptr inbounds float, float* %tmp4072, i64 1
+ %tmp4074 = getelementptr inbounds float, float* %tmp4073, i64 1
+ %tmp4075 = getelementptr inbounds float, float* %tmp4074, i64 1
+ %tmp4076 = getelementptr inbounds float, float* %tmp4075, i64 1
+ %tmp4077 = getelementptr inbounds float, float* %tmp4076, i64 1
+ %tmp4078 = getelementptr inbounds float, float* %tmp4077, i64 1
+ %tmp4079 = getelementptr inbounds float, float* %tmp4078, i64 1
+ %tmp4080 = getelementptr inbounds float, float* %tmp4079, i64 1
+ %tmp4081 = getelementptr inbounds float, float* %tmp4080, i64 1
+ %tmp4082 = getelementptr inbounds float, float* %tmp4081, i64 1
+ %tmp4083 = getelementptr inbounds float, float* %tmp4082, i64 1
+ %tmp4084 = getelementptr inbounds float, float* %tmp4083, i64 1
+ %tmp4085 = getelementptr inbounds float, float* %tmp4084, i64 1
+ %tmp4086 = getelementptr inbounds float, float* %tmp4085, i64 1
+ %tmp4087 = getelementptr inbounds float, float* %tmp4086, i64 1
+ %tmp4088 = getelementptr inbounds float, float* %tmp4087, i64 1
+ %tmp4089 = getelementptr inbounds float, float* %tmp4088, i64 1
+ %tmp4090 = getelementptr inbounds float, float* %tmp4089, i64 1
+ %tmp4091 = getelementptr inbounds float, float* %tmp4090, i64 1
+ %tmp4092 = getelementptr inbounds float, float* %tmp4091, i64 1
+ %tmp4093 = getelementptr inbounds float, float* %tmp4092, i64 1
+ %tmp4094 = getelementptr inbounds float, float* %tmp4093, i64 1
+ %tmp4095 = getelementptr inbounds float, float* %tmp4094, i64 1
+ %tmp4096 = getelementptr inbounds float, float* %tmp4095, i64 1
+ %tmp4097 = getelementptr inbounds float, float* %tmp4096, i64 1
+ %tmp4098 = getelementptr inbounds float, float* %tmp4097, i64 1
+ %tmp4099 = getelementptr inbounds float, float* %tmp4098, i64 1
+ %tmp4100 = getelementptr inbounds float, float* %tmp4099, i64 1
+ %tmp4101 = getelementptr inbounds float, float* %tmp4100, i64 1
+ %tmp4102 = getelementptr inbounds float, float* %tmp4101, i64 1
+ %tmp4103 = getelementptr inbounds float, float* %tmp4102, i64 1
+ %tmp4104 = getelementptr inbounds float, float* %tmp4103, i64 1
+ %tmp4105 = getelementptr inbounds float, float* %tmp4104, i64 1
+ %tmp4106 = getelementptr inbounds float, float* %tmp4105, i64 1
+ %tmp4107 = getelementptr inbounds float, float* %tmp4106, i64 1
+ %tmp4108 = getelementptr inbounds float, float* %tmp4107, i64 1
+ %tmp4109 = getelementptr inbounds float, float* %tmp4108, i64 1
+ %tmp4110 = getelementptr inbounds float, float* %tmp4109, i64 1
+ %tmp4111 = getelementptr inbounds float, float* %tmp4110, i64 1
+ %tmp4112 = getelementptr inbounds float, float* %tmp4111, i64 1
+ %tmp4113 = getelementptr inbounds float, float* %tmp4112, i64 1
+ %tmp4114 = getelementptr inbounds float, float* %tmp4113, i64 1
+ %tmp4115 = getelementptr inbounds float, float* %tmp4114, i64 1
+ %tmp4116 = getelementptr inbounds float, float* %tmp4115, i64 1
+ %tmp4117 = getelementptr inbounds float, float* %tmp4116, i64 1
+ %tmp4118 = getelementptr inbounds float, float* %tmp4117, i64 1
+ %tmp4119 = getelementptr inbounds float, float* %tmp4118, i64 1
+ %tmp4120 = getelementptr inbounds float, float* %tmp4119, i64 1
+ %tmp4121 = getelementptr inbounds float, float* %tmp4120, i64 1
+ %tmp4122 = getelementptr inbounds float, float* %tmp4121, i64 1
+ %tmp4123 = getelementptr inbounds float, float* %tmp4122, i64 1
+ %tmp4124 = getelementptr inbounds float, float* %tmp4123, i64 1
+ %tmp4125 = getelementptr inbounds float, float* %tmp4124, i64 1
+ %tmp4126 = getelementptr inbounds float, float* %tmp4125, i64 1
+ %tmp4127 = getelementptr inbounds float, float* %tmp4126, i64 1
+ %tmp4128 = getelementptr inbounds float, float* %tmp4127, i64 1
+ %tmp4129 = getelementptr inbounds float, float* %tmp4128, i64 1
+ %tmp4130 = getelementptr inbounds float, float* %tmp4129, i64 1
+ %tmp4131 = getelementptr inbounds float, float* %tmp4130, i64 1
+ %tmp4132 = getelementptr inbounds float, float* %tmp4131, i64 1
+ %tmp4133 = getelementptr inbounds float, float* %tmp4132, i64 1
+ %tmp4134 = getelementptr inbounds float, float* %tmp4133, i64 1
+ %tmp4135 = getelementptr inbounds float, float* %tmp4134, i64 1
+ %tmp4136 = getelementptr inbounds float, float* %tmp4135, i64 1
+ %tmp4137 = getelementptr inbounds float, float* %tmp4136, i64 1
+ %tmp4138 = getelementptr inbounds float, float* %tmp4137, i64 1
+ %tmp4139 = getelementptr inbounds float, float* %tmp4138, i64 1
+ %tmp4140 = getelementptr inbounds float, float* %tmp4139, i64 1
+ %tmp4141 = getelementptr inbounds float, float* %tmp4140, i64 1
+ %tmp4142 = getelementptr inbounds float, float* %tmp4141, i64 1
+ %tmp4143 = getelementptr inbounds float, float* %tmp4142, i64 1
+ %tmp4144 = getelementptr inbounds float, float* %tmp4143, i64 1
+ %tmp4145 = getelementptr inbounds float, float* %tmp4144, i64 1
+ %tmp4146 = getelementptr inbounds float, float* %tmp4145, i64 1
+ %tmp4147 = getelementptr inbounds float, float* %tmp4146, i64 1
+ %tmp4148 = getelementptr inbounds float, float* %tmp4147, i64 1
+ %tmp4149 = getelementptr inbounds float, float* %tmp4148, i64 1
+ %tmp4150 = getelementptr inbounds float, float* %tmp4149, i64 1
+ %tmp4151 = getelementptr inbounds float, float* %tmp4150, i64 1
+ %tmp4152 = getelementptr inbounds float, float* %tmp4151, i64 1
+ %tmp4153 = getelementptr inbounds float, float* %tmp4152, i64 1
+ %tmp4154 = getelementptr inbounds float, float* %tmp4153, i64 1
+ %tmp4155 = getelementptr inbounds float, float* %tmp4154, i64 1
+ %tmp4156 = getelementptr inbounds float, float* %tmp4155, i64 1
+ %tmp4157 = getelementptr inbounds float, float* %tmp4156, i64 1
+ %tmp4158 = getelementptr inbounds float, float* %tmp4157, i64 1
+ %tmp4159 = getelementptr inbounds float, float* %tmp4158, i64 1
+ %tmp4160 = getelementptr inbounds float, float* %tmp4159, i64 1
+ %tmp4161 = getelementptr inbounds float, float* %tmp4160, i64 1
+ %tmp4162 = getelementptr inbounds float, float* %tmp4161, i64 1
+ %tmp4163 = getelementptr inbounds float, float* %tmp4162, i64 1
+ %tmp4164 = getelementptr inbounds float, float* %tmp4163, i64 1
+ %tmp4165 = getelementptr inbounds float, float* %tmp4164, i64 1
+ %tmp4166 = getelementptr inbounds float, float* %tmp4165, i64 1
+ %tmp4167 = getelementptr inbounds float, float* %tmp4166, i64 1
+ %tmp4168 = getelementptr inbounds float, float* %tmp4167, i64 1
+ %tmp4169 = getelementptr inbounds float, float* %tmp4168, i64 1
+ %tmp4170 = getelementptr inbounds float, float* %tmp4169, i64 1
+ %tmp4171 = getelementptr inbounds float, float* %tmp4170, i64 1
+ %tmp4172 = getelementptr inbounds float, float* %tmp4171, i64 1
+ %tmp4173 = getelementptr inbounds float, float* %tmp4172, i64 1
+ %tmp4174 = getelementptr inbounds float, float* %tmp4173, i64 1
+ %tmp4175 = getelementptr inbounds float, float* %tmp4174, i64 1
+ %tmp4176 = getelementptr inbounds float, float* %tmp4175, i64 1
+ %tmp4177 = getelementptr inbounds float, float* %tmp4176, i64 1
+ %tmp4178 = getelementptr inbounds float, float* %tmp4177, i64 1
+ %tmp4179 = getelementptr inbounds float, float* %tmp4178, i64 1
+ %tmp4180 = getelementptr inbounds float, float* %tmp4179, i64 1
+ %tmp4181 = getelementptr inbounds float, float* %tmp4180, i64 1
+ %tmp4182 = getelementptr inbounds float, float* %tmp4181, i64 1
+ %tmp4183 = getelementptr inbounds float, float* %tmp4182, i64 1
+ %tmp4184 = getelementptr inbounds float, float* %tmp4183, i64 1
+ %tmp4185 = getelementptr inbounds float, float* %tmp4184, i64 1
+ %tmp4186 = getelementptr inbounds float, float* %tmp4185, i64 1
+ %tmp4187 = getelementptr inbounds float, float* %tmp4186, i64 1
+ %tmp4188 = getelementptr inbounds float, float* %tmp4187, i64 1
+ %tmp4189 = getelementptr inbounds float, float* %tmp4188, i64 1
+ %tmp4190 = getelementptr inbounds float, float* %tmp4189, i64 1
+ %tmp4191 = getelementptr inbounds float, float* %tmp4190, i64 1
+ %tmp4192 = getelementptr inbounds float, float* %tmp4191, i64 1
+ %tmp4193 = getelementptr inbounds float, float* %tmp4192, i64 1
+ %tmp4194 = getelementptr inbounds float, float* %tmp4193, i64 1
+ %tmp4195 = getelementptr inbounds float, float* %tmp4194, i64 1
+ %tmp4196 = getelementptr inbounds float, float* %tmp4195, i64 1
+ %tmp4197 = getelementptr inbounds float, float* %tmp4196, i64 1
+ %tmp4198 = getelementptr inbounds float, float* %tmp4197, i64 1
+ %tmp4199 = getelementptr inbounds float, float* %tmp4198, i64 1
+ %tmp4200 = getelementptr inbounds float, float* %tmp4199, i64 1
+ %tmp4201 = getelementptr inbounds float, float* %tmp4200, i64 1
+ %tmp4202 = getelementptr inbounds float, float* %tmp4201, i64 1
+ %tmp4203 = getelementptr inbounds float, float* %tmp4202, i64 1
+ %tmp4204 = getelementptr inbounds float, float* %tmp4203, i64 1
+ %tmp4205 = getelementptr inbounds float, float* %tmp4204, i64 1
+ %tmp4206 = getelementptr inbounds float, float* %tmp4205, i64 1
+ %tmp4207 = getelementptr inbounds float, float* %tmp4206, i64 1
+ %tmp4208 = getelementptr inbounds float, float* %tmp4207, i64 1
+ %tmp4209 = getelementptr inbounds float, float* %tmp4208, i64 1
+ %tmp4210 = getelementptr inbounds float, float* %tmp4209, i64 1
+ %tmp4211 = getelementptr inbounds float, float* %tmp4210, i64 1
+ %tmp4212 = getelementptr inbounds float, float* %tmp4211, i64 1
+ %tmp4213 = getelementptr inbounds float, float* %tmp4212, i64 1
+ %tmp4214 = getelementptr inbounds float, float* %tmp4213, i64 1
+ %tmp4215 = getelementptr inbounds float, float* %tmp4214, i64 1
+ %tmp4216 = getelementptr inbounds float, float* %tmp4215, i64 1
+ %tmp4217 = getelementptr inbounds float, float* %tmp4216, i64 1
+ %tmp4218 = getelementptr inbounds float, float* %tmp4217, i64 1
+ %tmp4219 = getelementptr inbounds float, float* %tmp4218, i64 1
+ %tmp4220 = getelementptr inbounds float, float* %tmp4219, i64 1
+ %tmp4221 = getelementptr inbounds float, float* %tmp4220, i64 1
+ %tmp4222 = getelementptr inbounds float, float* %tmp4221, i64 1
+ %tmp4223 = getelementptr inbounds float, float* %tmp4222, i64 1
+ %tmp4224 = getelementptr inbounds float, float* %tmp4223, i64 1
+ %tmp4225 = getelementptr inbounds float, float* %tmp4224, i64 1
+ %tmp4226 = getelementptr inbounds float, float* %tmp4225, i64 1
+ %tmp4227 = getelementptr inbounds float, float* %tmp4226, i64 1
+ %tmp4228 = getelementptr inbounds float, float* %tmp4227, i64 1
+ %tmp4229 = getelementptr inbounds float, float* %tmp4228, i64 1
+ %tmp4230 = getelementptr inbounds float, float* %tmp4229, i64 1
+ %tmp4231 = getelementptr inbounds float, float* %tmp4230, i64 1
+ %tmp4232 = getelementptr inbounds float, float* %tmp4231, i64 1
+ %tmp4233 = getelementptr inbounds float, float* %tmp4232, i64 1
+ %tmp4234 = getelementptr inbounds float, float* %tmp4233, i64 1
+ %tmp4235 = getelementptr inbounds float, float* %tmp4234, i64 1
+ %tmp4236 = getelementptr inbounds float, float* %tmp4235, i64 1
+ %tmp4237 = getelementptr inbounds float, float* %tmp4236, i64 1
+ %tmp4238 = getelementptr inbounds float, float* %tmp4237, i64 1
+ %tmp4239 = getelementptr inbounds float, float* %tmp4238, i64 1
+ %tmp4240 = getelementptr inbounds float, float* %tmp4239, i64 1
+ %tmp4241 = getelementptr inbounds float, float* %tmp4240, i64 1
+ %tmp4242 = getelementptr inbounds float, float* %tmp4241, i64 1
+ %tmp4243 = getelementptr inbounds float, float* %tmp4242, i64 1
+ %tmp4244 = getelementptr inbounds float, float* %tmp4243, i64 1
+ %tmp4245 = getelementptr inbounds float, float* %tmp4244, i64 1
+ %tmp4246 = getelementptr inbounds float, float* %tmp4245, i64 1
+ %tmp4247 = getelementptr inbounds float, float* %tmp4246, i64 1
+ %tmp4248 = getelementptr inbounds float, float* %tmp4247, i64 1
+ %tmp4249 = getelementptr inbounds float, float* %tmp4248, i64 1
+ %tmp4250 = getelementptr inbounds float, float* %tmp4249, i64 1
+ %tmp4251 = getelementptr inbounds float, float* %tmp4250, i64 1
+ %tmp4252 = getelementptr inbounds float, float* %tmp4251, i64 1
+ %tmp4253 = getelementptr inbounds float, float* %tmp4252, i64 1
+ %tmp4254 = getelementptr inbounds float, float* %tmp4253, i64 1
+ %tmp4255 = getelementptr inbounds float, float* %tmp4254, i64 1
+ %tmp4256 = getelementptr inbounds float, float* %tmp4255, i64 1
+ %tmp4257 = getelementptr inbounds float, float* %tmp4256, i64 1
+ %tmp4258 = getelementptr inbounds float, float* %tmp4257, i64 1
+ %tmp4259 = getelementptr inbounds float, float* %tmp4258, i64 1
+ %tmp4260 = getelementptr inbounds float, float* %tmp4259, i64 1
+ %tmp4261 = getelementptr inbounds float, float* %tmp4260, i64 1
+ %tmp4262 = getelementptr inbounds float, float* %tmp4261, i64 1
+ %tmp4263 = getelementptr inbounds float, float* %tmp4262, i64 1
+ %tmp4264 = getelementptr inbounds float, float* %tmp4263, i64 1
+ %tmp4265 = getelementptr inbounds float, float* %tmp4264, i64 1
+ %tmp4266 = getelementptr inbounds float, float* %tmp4265, i64 1
+ %tmp4267 = getelementptr inbounds float, float* %tmp4266, i64 1
+ %tmp4268 = getelementptr inbounds float, float* %tmp4267, i64 1
+ %tmp4269 = getelementptr inbounds float, float* %tmp4268, i64 1
+ %tmp4270 = getelementptr inbounds float, float* %tmp4269, i64 1
+ %tmp4271 = getelementptr inbounds float, float* %tmp4270, i64 1
+ %tmp4272 = getelementptr inbounds float, float* %tmp4271, i64 1
+ %tmp4273 = getelementptr inbounds float, float* %tmp4272, i64 1
+ %tmp4274 = getelementptr inbounds float, float* %tmp4273, i64 1
+ %tmp4275 = getelementptr inbounds float, float* %tmp4274, i64 1
+ %tmp4276 = getelementptr inbounds float, float* %tmp4275, i64 1
+ %tmp4277 = getelementptr inbounds float, float* %tmp4276, i64 1
+ %tmp4278 = getelementptr inbounds float, float* %tmp4277, i64 1
+ %tmp4279 = getelementptr inbounds float, float* %tmp4278, i64 1
+ %tmp4280 = getelementptr inbounds float, float* %tmp4279, i64 1
+ %tmp4281 = getelementptr inbounds float, float* %tmp4280, i64 1
+ %tmp4282 = getelementptr inbounds float, float* %tmp4281, i64 1
+ %tmp4283 = getelementptr inbounds float, float* %tmp4282, i64 1
+ %tmp4284 = getelementptr inbounds float, float* %tmp4283, i64 1
+ %tmp4285 = getelementptr inbounds float, float* %tmp4284, i64 1
+ %tmp4286 = getelementptr inbounds float, float* %tmp4285, i64 1
+ %tmp4287 = getelementptr inbounds float, float* %tmp4286, i64 1
+ %tmp4288 = getelementptr inbounds float, float* %tmp4287, i64 1
+ %tmp4289 = getelementptr inbounds float, float* %tmp4288, i64 1
+ %tmp4290 = getelementptr inbounds float, float* %tmp4289, i64 1
+ %tmp4291 = getelementptr inbounds float, float* %tmp4290, i64 1
+ %tmp4292 = getelementptr inbounds float, float* %tmp4291, i64 1
+ %tmp4293 = getelementptr inbounds float, float* %tmp4292, i64 1
+ %tmp4294 = getelementptr inbounds float, float* %tmp4293, i64 1
+ %tmp4295 = getelementptr inbounds float, float* %tmp4294, i64 1
+ %tmp4296 = getelementptr inbounds float, float* %tmp4295, i64 1
+ %tmp4297 = getelementptr inbounds float, float* %tmp4296, i64 1
+ %tmp4298 = getelementptr inbounds float, float* %tmp4297, i64 1
+ %tmp4299 = getelementptr inbounds float, float* %tmp4298, i64 1
+ %tmp4300 = getelementptr inbounds float, float* %tmp4299, i64 1
+ %tmp4301 = getelementptr inbounds float, float* %tmp4300, i64 1
+ %tmp4302 = getelementptr inbounds float, float* %tmp4301, i64 1
+ %tmp4303 = getelementptr inbounds float, float* %tmp4302, i64 1
+ %tmp4304 = getelementptr inbounds float, float* %tmp4303, i64 1
+ %tmp4305 = getelementptr inbounds float, float* %tmp4304, i64 1
+ %tmp4306 = getelementptr inbounds float, float* %tmp4305, i64 1
+ %tmp4307 = getelementptr inbounds float, float* %tmp4306, i64 1
+ %tmp4308 = getelementptr inbounds float, float* %tmp4307, i64 1
+ %tmp4309 = getelementptr inbounds float, float* %tmp4308, i64 1
+ %tmp4310 = getelementptr inbounds float, float* %tmp4309, i64 1
+ %tmp4311 = getelementptr inbounds float, float* %tmp4310, i64 1
+ %tmp4312 = getelementptr inbounds float, float* %tmp4311, i64 1
+ %tmp4313 = getelementptr inbounds float, float* %tmp4312, i64 1
+ %tmp4314 = getelementptr inbounds float, float* %tmp4313, i64 1
+ %tmp4315 = getelementptr inbounds float, float* %tmp4314, i64 1
+ %tmp4316 = getelementptr inbounds float, float* %tmp4315, i64 1
+ %tmp4317 = getelementptr inbounds float, float* %tmp4316, i64 1
+ %tmp4318 = getelementptr inbounds float, float* %tmp4317, i64 1
+ %tmp4319 = getelementptr inbounds float, float* %tmp4318, i64 1
+ %tmp4320 = getelementptr inbounds float, float* %tmp4319, i64 1
+ %tmp4321 = getelementptr inbounds float, float* %tmp4320, i64 1
+ %tmp4322 = getelementptr inbounds float, float* %tmp4321, i64 1
+ %tmp4323 = getelementptr inbounds float, float* %tmp4322, i64 1
+ %tmp4324 = getelementptr inbounds float, float* %tmp4323, i64 1
+ %tmp4325 = getelementptr inbounds float, float* %tmp4324, i64 1
+ %tmp4326 = getelementptr inbounds float, float* %tmp4325, i64 1
+ %tmp4327 = getelementptr inbounds float, float* %tmp4326, i64 1
+ %tmp4328 = getelementptr inbounds float, float* %tmp4327, i64 1
+ %tmp4329 = getelementptr inbounds float, float* %tmp4328, i64 1
+ %tmp4330 = getelementptr inbounds float, float* %tmp4329, i64 1
+ %tmp4331 = getelementptr inbounds float, float* %tmp4330, i64 1
+ %tmp4332 = getelementptr inbounds float, float* %tmp4331, i64 1
+ %tmp4333 = getelementptr inbounds float, float* %tmp4332, i64 1
+ %tmp4334 = getelementptr inbounds float, float* %tmp4333, i64 1
+ %tmp4335 = getelementptr inbounds float, float* %tmp4334, i64 1
+ %tmp4336 = getelementptr inbounds float, float* %tmp4335, i64 1
+ %tmp4337 = getelementptr inbounds float, float* %tmp4336, i64 1
+ %tmp4338 = getelementptr inbounds float, float* %tmp4337, i64 1
+ %tmp4339 = getelementptr inbounds float, float* %tmp4338, i64 1
+ %tmp4340 = getelementptr inbounds float, float* %tmp4339, i64 1
+ %tmp4341 = getelementptr inbounds float, float* %tmp4340, i64 1
+ %tmp4342 = getelementptr inbounds float, float* %tmp4341, i64 1
+ %tmp4343 = getelementptr inbounds float, float* %tmp4342, i64 1
+ %tmp4344 = getelementptr inbounds float, float* %tmp4343, i64 1
+ %tmp4345 = getelementptr inbounds float, float* %tmp4344, i64 1
+ %tmp4346 = getelementptr inbounds float, float* %tmp4345, i64 1
+ %tmp4347 = getelementptr inbounds float, float* %tmp4346, i64 1
+ %tmp4348 = getelementptr inbounds float, float* %tmp4347, i64 1
+ %tmp4349 = getelementptr inbounds float, float* %tmp4348, i64 1
+ %tmp4350 = getelementptr inbounds float, float* %tmp4349, i64 1
+ %tmp4351 = getelementptr inbounds float, float* %tmp4350, i64 1
+ %tmp4352 = getelementptr inbounds float, float* %tmp4351, i64 1
+ %tmp4353 = getelementptr inbounds float, float* %tmp4352, i64 1
+ %tmp4354 = getelementptr inbounds float, float* %tmp4353, i64 1
+ %tmp4355 = getelementptr inbounds float, float* %tmp4354, i64 1
+ %tmp4356 = getelementptr inbounds float, float* %tmp4355, i64 1
+ %tmp4357 = getelementptr inbounds float, float* %tmp4356, i64 1
+ %tmp4358 = getelementptr inbounds float, float* %tmp4357, i64 1
+ %tmp4359 = getelementptr inbounds float, float* %tmp4358, i64 1
+ %tmp4360 = getelementptr inbounds float, float* %tmp4359, i64 1
+ %tmp4361 = getelementptr inbounds float, float* %tmp4360, i64 1
+ %tmp4362 = getelementptr inbounds float, float* %tmp4361, i64 1
+ %tmp4363 = getelementptr inbounds float, float* %tmp4362, i64 1
+ %tmp4364 = getelementptr inbounds float, float* %tmp4363, i64 1
+ %tmp4365 = getelementptr inbounds float, float* %tmp4364, i64 1
+ %tmp4366 = getelementptr inbounds float, float* %tmp4365, i64 1
+ %tmp4367 = getelementptr inbounds float, float* %tmp4366, i64 1
+ %tmp4368 = getelementptr inbounds float, float* %tmp4367, i64 1
+ %tmp4369 = getelementptr inbounds float, float* %tmp4368, i64 1
+ %tmp4370 = getelementptr inbounds float, float* %tmp4369, i64 1
+ %tmp4371 = getelementptr inbounds float, float* %tmp4370, i64 1
+ %tmp4372 = getelementptr inbounds float, float* %tmp4371, i64 1
+ %tmp4373 = getelementptr inbounds float, float* %tmp4372, i64 1
+ %tmp4374 = getelementptr inbounds float, float* %tmp4373, i64 1
+ %tmp4375 = getelementptr inbounds float, float* %tmp4374, i64 1
+ %tmp4376 = getelementptr inbounds float, float* %tmp4375, i64 1
+ %tmp4377 = getelementptr inbounds float, float* %tmp4376, i64 1
+ %tmp4378 = getelementptr inbounds float, float* %tmp4377, i64 1
+ %tmp4379 = getelementptr inbounds float, float* %tmp4378, i64 1
+ %tmp4380 = getelementptr inbounds float, float* %tmp4379, i64 1
+ %tmp4381 = getelementptr inbounds float, float* %tmp4380, i64 1
+ %tmp4382 = getelementptr inbounds float, float* %tmp4381, i64 1
+ %tmp4383 = getelementptr inbounds float, float* %tmp4382, i64 1
+ %tmp4384 = getelementptr inbounds float, float* %tmp4383, i64 1
+ %tmp4385 = getelementptr inbounds float, float* %tmp4384, i64 1
+ %tmp4386 = getelementptr inbounds float, float* %tmp4385, i64 1
+ %tmp4387 = getelementptr inbounds float, float* %tmp4386, i64 1
+ %tmp4388 = getelementptr inbounds float, float* %tmp4387, i64 1
+ %tmp4389 = getelementptr inbounds float, float* %tmp4388, i64 1
+ %tmp4390 = getelementptr inbounds float, float* %tmp4389, i64 1
+ %tmp4391 = getelementptr inbounds float, float* %tmp4390, i64 1
+ %tmp4392 = getelementptr inbounds float, float* %tmp4391, i64 1
+ %tmp4393 = getelementptr inbounds float, float* %tmp4392, i64 1
+ %tmp4394 = getelementptr inbounds float, float* %tmp4393, i64 1
+ %tmp4395 = getelementptr inbounds float, float* %tmp4394, i64 1
+ %tmp4396 = getelementptr inbounds float, float* %tmp4395, i64 1
+ %tmp4397 = getelementptr inbounds float, float* %tmp4396, i64 1
+ %tmp4398 = getelementptr inbounds float, float* %tmp4397, i64 1
+ %tmp4399 = getelementptr inbounds float, float* %tmp4398, i64 1
+ %tmp4400 = getelementptr inbounds float, float* %tmp4399, i64 1
+ %tmp4401 = getelementptr inbounds float, float* %tmp4400, i64 1
+ %tmp4402 = getelementptr inbounds float, float* %tmp4401, i64 1
+ %tmp4403 = getelementptr inbounds float, float* %tmp4402, i64 1
+ %tmp4404 = getelementptr inbounds float, float* %tmp4403, i64 1
+ %tmp4405 = getelementptr inbounds float, float* %tmp4404, i64 1
+ %tmp4406 = getelementptr inbounds float, float* %tmp4405, i64 1
+ %tmp4407 = getelementptr inbounds float, float* %tmp4406, i64 1
+ %tmp4408 = getelementptr inbounds float, float* %tmp4407, i64 1
+ %tmp4409 = getelementptr inbounds float, float* %tmp4408, i64 1
+ %tmp4410 = getelementptr inbounds float, float* %tmp4409, i64 1
+ %tmp4411 = getelementptr inbounds float, float* %tmp4410, i64 1
+ %tmp4412 = getelementptr inbounds float, float* %tmp4411, i64 1
+ %tmp4413 = getelementptr inbounds float, float* %tmp4412, i64 1
+ %tmp4414 = getelementptr inbounds float, float* %tmp4413, i64 1
+ %tmp4415 = getelementptr inbounds float, float* %tmp4414, i64 1
+ %tmp4416 = getelementptr inbounds float, float* %tmp4415, i64 1
+ %tmp4417 = getelementptr inbounds float, float* %tmp4416, i64 1
+ %tmp4418 = getelementptr inbounds float, float* %tmp4417, i64 1
+ %tmp4419 = getelementptr inbounds float, float* %tmp4418, i64 1
+ %tmp4420 = getelementptr inbounds float, float* %tmp4419, i64 1
+ %tmp4421 = getelementptr inbounds float, float* %tmp4420, i64 1
+ %tmp4422 = getelementptr inbounds float, float* %tmp4421, i64 1
+ %tmp4423 = getelementptr inbounds float, float* %tmp4422, i64 1
+ %tmp4424 = getelementptr inbounds float, float* %tmp4423, i64 1
+ %tmp4425 = getelementptr inbounds float, float* %tmp4424, i64 1
+ %tmp4426 = getelementptr inbounds float, float* %tmp4425, i64 1
+ %tmp4427 = getelementptr inbounds float, float* %tmp4426, i64 1
+ %tmp4428 = getelementptr inbounds float, float* %tmp4427, i64 1
+ %tmp4429 = getelementptr inbounds float, float* %tmp4428, i64 1
+ %tmp4430 = getelementptr inbounds float, float* %tmp4429, i64 1
+ %tmp4431 = getelementptr inbounds float, float* %tmp4430, i64 1
+ %tmp4432 = getelementptr inbounds float, float* %tmp4431, i64 1
+ %tmp4433 = getelementptr inbounds float, float* %tmp4432, i64 1
+ %tmp4434 = getelementptr inbounds float, float* %tmp4433, i64 1
+ %tmp4435 = getelementptr inbounds float, float* %tmp4434, i64 1
+ %tmp4436 = getelementptr inbounds float, float* %tmp4435, i64 1
+ %tmp4437 = getelementptr inbounds float, float* %tmp4436, i64 1
+ %tmp4438 = getelementptr inbounds float, float* %tmp4437, i64 1
+ %tmp4439 = getelementptr inbounds float, float* %tmp4438, i64 1
+ %tmp4440 = getelementptr inbounds float, float* %tmp4439, i64 1
+ %tmp4441 = getelementptr inbounds float, float* %tmp4440, i64 1
+ %tmp4442 = getelementptr inbounds float, float* %tmp4441, i64 1
+ %tmp4443 = getelementptr inbounds float, float* %tmp4442, i64 1
+ %tmp4444 = getelementptr inbounds float, float* %tmp4443, i64 1
+ %tmp4445 = getelementptr inbounds float, float* %tmp4444, i64 1
+ %tmp4446 = getelementptr inbounds float, float* %tmp4445, i64 1
+ %tmp4447 = getelementptr inbounds float, float* %tmp4446, i64 1
+ %tmp4448 = getelementptr inbounds float, float* %tmp4447, i64 1
+ %tmp4449 = getelementptr inbounds float, float* %tmp4448, i64 1
+ %tmp4450 = getelementptr inbounds float, float* %tmp4449, i64 1
+ %tmp4451 = getelementptr inbounds float, float* %tmp4450, i64 1
+ %tmp4452 = getelementptr inbounds float, float* %tmp4451, i64 1
+ %tmp4453 = getelementptr inbounds float, float* %tmp4452, i64 1
+ %tmp4454 = getelementptr inbounds float, float* %tmp4453, i64 1
+ %tmp4455 = getelementptr inbounds float, float* %tmp4454, i64 1
+ %tmp4456 = getelementptr inbounds float, float* %tmp4455, i64 1
+ %tmp4457 = getelementptr inbounds float, float* %tmp4456, i64 1
+ %tmp4458 = getelementptr inbounds float, float* %tmp4457, i64 1
+ %tmp4459 = getelementptr inbounds float, float* %tmp4458, i64 1
+ %tmp4460 = getelementptr inbounds float, float* %tmp4459, i64 1
+ %tmp4461 = getelementptr inbounds float, float* %tmp4460, i64 1
+ %tmp4462 = getelementptr inbounds float, float* %tmp4461, i64 1
+ %tmp4463 = getelementptr inbounds float, float* %tmp4462, i64 1
+ %tmp4464 = getelementptr inbounds float, float* %tmp4463, i64 1
+ %tmp4465 = getelementptr inbounds float, float* %tmp4464, i64 1
+ %tmp4466 = getelementptr inbounds float, float* %tmp4465, i64 1
+ %tmp4467 = getelementptr inbounds float, float* %tmp4466, i64 1
+ %tmp4468 = getelementptr inbounds float, float* %tmp4467, i64 1
+ %tmp4469 = getelementptr inbounds float, float* %tmp4468, i64 1
+ %tmp4470 = getelementptr inbounds float, float* %tmp4469, i64 1
+ %tmp4471 = getelementptr inbounds float, float* %tmp4470, i64 1
+ %tmp4472 = getelementptr inbounds float, float* %tmp4471, i64 1
+ %tmp4473 = getelementptr inbounds float, float* %tmp4472, i64 1
+ %tmp4474 = getelementptr inbounds float, float* %tmp4473, i64 1
+ %tmp4475 = getelementptr inbounds float, float* %tmp4474, i64 1
+ %tmp4476 = getelementptr inbounds float, float* %tmp4475, i64 1
+ %tmp4477 = getelementptr inbounds float, float* %tmp4476, i64 1
+ %tmp4478 = getelementptr inbounds float, float* %tmp4477, i64 1
+ %tmp4479 = getelementptr inbounds float, float* %tmp4478, i64 1
+ %tmp4480 = getelementptr inbounds float, float* %tmp4479, i64 1
+ %tmp4481 = getelementptr inbounds float, float* %tmp4480, i64 1
+ %tmp4482 = getelementptr inbounds float, float* %tmp4481, i64 1
+ %tmp4483 = getelementptr inbounds float, float* %tmp4482, i64 1
+ %tmp4484 = getelementptr inbounds float, float* %tmp4483, i64 1
+ %tmp4485 = getelementptr inbounds float, float* %tmp4484, i64 1
+ %tmp4486 = getelementptr inbounds float, float* %tmp4485, i64 1
+ %tmp4487 = getelementptr inbounds float, float* %tmp4486, i64 1
+ %tmp4488 = getelementptr inbounds float, float* %tmp4487, i64 1
+ %tmp4489 = getelementptr inbounds float, float* %tmp4488, i64 1
+ %tmp4490 = getelementptr inbounds float, float* %tmp4489, i64 1
+ %tmp4491 = getelementptr inbounds float, float* %tmp4490, i64 1
+ %tmp4492 = getelementptr inbounds float, float* %tmp4491, i64 1
+ %tmp4493 = getelementptr inbounds float, float* %tmp4492, i64 1
+ %tmp4494 = getelementptr inbounds float, float* %tmp4493, i64 1
+ %tmp4495 = getelementptr inbounds float, float* %tmp4494, i64 1
+ %tmp4496 = getelementptr inbounds float, float* %tmp4495, i64 1
+ %tmp4497 = getelementptr inbounds float, float* %tmp4496, i64 1
+ %tmp4498 = getelementptr inbounds float, float* %tmp4497, i64 1
+ %tmp4499 = getelementptr inbounds float, float* %tmp4498, i64 1
+ %tmp4500 = getelementptr inbounds float, float* %tmp4499, i64 1
+ %tmp4501 = getelementptr inbounds float, float* %tmp4500, i64 1
+ %tmp4502 = getelementptr inbounds float, float* %tmp4501, i64 1
+ %tmp4503 = getelementptr inbounds float, float* %tmp4502, i64 1
+ %tmp4504 = getelementptr inbounds float, float* %tmp4503, i64 1
+ %tmp4505 = getelementptr inbounds float, float* %tmp4504, i64 1
+ %tmp4506 = getelementptr inbounds float, float* %tmp4505, i64 1
+ %tmp4507 = getelementptr inbounds float, float* %tmp4506, i64 1
+ %tmp4508 = getelementptr inbounds float, float* %tmp4507, i64 1
+ %tmp4509 = getelementptr inbounds float, float* %tmp4508, i64 1
+ %tmp4510 = getelementptr inbounds float, float* %tmp4509, i64 1
+ %tmp4511 = getelementptr inbounds float, float* %tmp4510, i64 1
+ %tmp4512 = getelementptr inbounds float, float* %tmp4511, i64 1
+ %tmp4513 = getelementptr inbounds float, float* %tmp4512, i64 1
+ %tmp4514 = getelementptr inbounds float, float* %tmp4513, i64 1
+ %tmp4515 = getelementptr inbounds float, float* %tmp4514, i64 1
+ %tmp4516 = getelementptr inbounds float, float* %tmp4515, i64 1
+ %tmp4517 = getelementptr inbounds float, float* %tmp4516, i64 1
+ %tmp4518 = getelementptr inbounds float, float* %tmp4517, i64 1
+ %tmp4519 = getelementptr inbounds float, float* %tmp4518, i64 1
+ %tmp4520 = getelementptr inbounds float, float* %tmp4519, i64 1
+ %tmp4521 = getelementptr inbounds float, float* %tmp4520, i64 1
+ %tmp4522 = getelementptr inbounds float, float* %tmp4521, i64 1
+ %tmp4523 = getelementptr inbounds float, float* %tmp4522, i64 1
+ %tmp4524 = getelementptr inbounds float, float* %tmp4523, i64 1
+ %tmp4525 = getelementptr inbounds float, float* %tmp4524, i64 1
+ %tmp4526 = getelementptr inbounds float, float* %tmp4525, i64 1
+ %tmp4527 = getelementptr inbounds float, float* %tmp4526, i64 1
+ %tmp4528 = getelementptr inbounds float, float* %tmp4527, i64 1
+ %tmp4529 = getelementptr inbounds float, float* %tmp4528, i64 1
+ %tmp4530 = getelementptr inbounds float, float* %tmp4529, i64 1
+ %tmp4531 = getelementptr inbounds float, float* %tmp4530, i64 1
+ %tmp4532 = getelementptr inbounds float, float* %tmp4531, i64 1
+ %tmp4533 = getelementptr inbounds float, float* %tmp4532, i64 1
+ %tmp4534 = getelementptr inbounds float, float* %tmp4533, i64 1
+ %tmp4535 = getelementptr inbounds float, float* %tmp4534, i64 1
+ %tmp4536 = getelementptr inbounds float, float* %tmp4535, i64 1
+ %tmp4537 = getelementptr inbounds float, float* %tmp4536, i64 1
+ %tmp4538 = getelementptr inbounds float, float* %tmp4537, i64 1
+ %tmp4539 = getelementptr inbounds float, float* %tmp4538, i64 1
+ %tmp4540 = getelementptr inbounds float, float* %tmp4539, i64 1
+ %tmp4541 = getelementptr inbounds float, float* %tmp4540, i64 1
+ %tmp4542 = getelementptr inbounds float, float* %tmp4541, i64 1
+ %tmp4543 = getelementptr inbounds float, float* %tmp4542, i64 1
+ %tmp4544 = getelementptr inbounds float, float* %tmp4543, i64 1
+ %tmp4545 = getelementptr inbounds float, float* %tmp4544, i64 1
+ %tmp4546 = getelementptr inbounds float, float* %tmp4545, i64 1
+ %tmp4547 = getelementptr inbounds float, float* %tmp4546, i64 1
+ %tmp4548 = getelementptr inbounds float, float* %tmp4547, i64 1
+ %tmp4549 = getelementptr inbounds float, float* %tmp4548, i64 1
+ %tmp4550 = getelementptr inbounds float, float* %tmp4549, i64 1
+ %tmp4551 = getelementptr inbounds float, float* %tmp4550, i64 1
+ %tmp4552 = getelementptr inbounds float, float* %tmp4551, i64 1
+ %tmp4553 = getelementptr inbounds float, float* %tmp4552, i64 1
+ %tmp4554 = getelementptr inbounds float, float* %tmp4553, i64 1
+ %tmp4555 = getelementptr inbounds float, float* %tmp4554, i64 1
+ %tmp4556 = getelementptr inbounds float, float* %tmp4555, i64 1
+ %tmp4557 = getelementptr inbounds float, float* %tmp4556, i64 1
+ %tmp4558 = getelementptr inbounds float, float* %tmp4557, i64 1
+ %tmp4559 = getelementptr inbounds float, float* %tmp4558, i64 1
+ %tmp4560 = getelementptr inbounds float, float* %tmp4559, i64 1
+ %tmp4561 = getelementptr inbounds float, float* %tmp4560, i64 1
+ %tmp4562 = getelementptr inbounds float, float* %tmp4561, i64 1
+ %tmp4563 = getelementptr inbounds float, float* %tmp4562, i64 1
+ %tmp4564 = getelementptr inbounds float, float* %tmp4563, i64 1
+ %tmp4565 = getelementptr inbounds float, float* %tmp4564, i64 1
+ %tmp4566 = getelementptr inbounds float, float* %tmp4565, i64 1
+ %tmp4567 = getelementptr inbounds float, float* %tmp4566, i64 1
+ %tmp4568 = getelementptr inbounds float, float* %tmp4567, i64 1
+ %tmp4569 = getelementptr inbounds float, float* %tmp4568, i64 1
+ %tmp4570 = getelementptr inbounds float, float* %tmp4569, i64 1
+ %tmp4571 = getelementptr inbounds float, float* %tmp4570, i64 1
+ %tmp4572 = getelementptr inbounds float, float* %tmp4571, i64 1
+ %tmp4573 = getelementptr inbounds float, float* %tmp4572, i64 1
+ %tmp4574 = getelementptr inbounds float, float* %tmp4573, i64 1
+ %tmp4575 = getelementptr inbounds float, float* %tmp4574, i64 1
+ %tmp4576 = getelementptr inbounds float, float* %tmp4575, i64 1
+ %tmp4577 = getelementptr inbounds float, float* %tmp4576, i64 1
+ %tmp4578 = getelementptr inbounds float, float* %tmp4577, i64 1
+ %tmp4579 = getelementptr inbounds float, float* %tmp4578, i64 1
+ %tmp4580 = getelementptr inbounds float, float* %tmp4579, i64 1
+ %tmp4581 = getelementptr inbounds float, float* %tmp4580, i64 1
+ %tmp4582 = getelementptr inbounds float, float* %tmp4581, i64 1
+ %tmp4583 = getelementptr inbounds float, float* %tmp4582, i64 1
+ %tmp4584 = getelementptr inbounds float, float* %tmp4583, i64 1
+ %tmp4585 = getelementptr inbounds float, float* %tmp4584, i64 1
+ %tmp4586 = getelementptr inbounds float, float* %tmp4585, i64 1
+ %tmp4587 = getelementptr inbounds float, float* %tmp4586, i64 1
+ %tmp4588 = getelementptr inbounds float, float* %tmp4587, i64 1
+ %tmp4589 = getelementptr inbounds float, float* %tmp4588, i64 1
+ %tmp4590 = getelementptr inbounds float, float* %tmp4589, i64 1
+ %tmp4591 = getelementptr inbounds float, float* %tmp4590, i64 1
+ %tmp4592 = getelementptr inbounds float, float* %tmp4591, i64 1
+ %tmp4593 = getelementptr inbounds float, float* %tmp4592, i64 1
+ %tmp4594 = getelementptr inbounds float, float* %tmp4593, i64 1
+ %tmp4595 = getelementptr inbounds float, float* %tmp4594, i64 1
+ %tmp4596 = getelementptr inbounds float, float* %tmp4595, i64 1
+ %tmp4597 = getelementptr inbounds float, float* %tmp4596, i64 1
+ %tmp4598 = getelementptr inbounds float, float* %tmp4597, i64 1
+ %tmp4599 = getelementptr inbounds float, float* %tmp4598, i64 1
+ %tmp4600 = getelementptr inbounds float, float* %tmp4599, i64 1
+ %tmp4601 = getelementptr inbounds float, float* %tmp4600, i64 1
+ %tmp4602 = getelementptr inbounds float, float* %tmp4601, i64 1
+ %tmp4603 = getelementptr inbounds float, float* %tmp4602, i64 1
+ %tmp4604 = getelementptr inbounds float, float* %tmp4603, i64 1
+ %tmp4605 = getelementptr inbounds float, float* %tmp4604, i64 1
+ %tmp4606 = getelementptr inbounds float, float* %tmp4605, i64 1
+ %tmp4607 = getelementptr inbounds float, float* %tmp4606, i64 1
+ %tmp4608 = getelementptr inbounds float, float* %tmp4607, i64 1
+ %tmp4609 = getelementptr inbounds float, float* %tmp4608, i64 1
+ %tmp4610 = getelementptr inbounds float, float* %tmp4609, i64 1
+ %tmp4611 = getelementptr inbounds float, float* %tmp4610, i64 1
+ %tmp4612 = getelementptr inbounds float, float* %tmp4611, i64 1
+ %tmp4613 = getelementptr inbounds float, float* %tmp4612, i64 1
+ %tmp4614 = getelementptr inbounds float, float* %tmp4613, i64 1
+ %tmp4615 = getelementptr inbounds float, float* %tmp4614, i64 1
+ %tmp4616 = getelementptr inbounds float, float* %tmp4615, i64 1
+ %tmp4617 = getelementptr inbounds float, float* %tmp4616, i64 1
+ %tmp4618 = getelementptr inbounds float, float* %tmp4617, i64 1
+ %tmp4619 = getelementptr inbounds float, float* %tmp4618, i64 1
+ %tmp4620 = getelementptr inbounds float, float* %tmp4619, i64 1
+ %tmp4621 = getelementptr inbounds float, float* %tmp4620, i64 1
+ %tmp4622 = getelementptr inbounds float, float* %tmp4621, i64 1
+ %tmp4623 = getelementptr inbounds float, float* %tmp4622, i64 1
+ %tmp4624 = getelementptr inbounds float, float* %tmp4623, i64 1
+ %tmp4625 = getelementptr inbounds float, float* %tmp4624, i64 1
+ %tmp4626 = getelementptr inbounds float, float* %tmp4625, i64 1
+ %tmp4627 = getelementptr inbounds float, float* %tmp4626, i64 1
+ %tmp4628 = getelementptr inbounds float, float* %tmp4627, i64 1
+ %tmp4629 = getelementptr inbounds float, float* %tmp4628, i64 1
+ %tmp4630 = getelementptr inbounds float, float* %tmp4629, i64 1
+ %tmp4631 = getelementptr inbounds float, float* %tmp4630, i64 1
+ %tmp4632 = getelementptr inbounds float, float* %tmp4631, i64 1
+ %tmp4633 = getelementptr inbounds float, float* %tmp4632, i64 1
+ %tmp4634 = getelementptr inbounds float, float* %tmp4633, i64 1
+ %tmp4635 = getelementptr inbounds float, float* %tmp4634, i64 1
+ %tmp4636 = getelementptr inbounds float, float* %tmp4635, i64 1
+ %tmp4637 = getelementptr inbounds float, float* %tmp4636, i64 1
+ %tmp4638 = getelementptr inbounds float, float* %tmp4637, i64 1
+ %tmp4639 = getelementptr inbounds float, float* %tmp4638, i64 1
+ %tmp4640 = getelementptr inbounds float, float* %tmp4639, i64 1
+ %tmp4641 = getelementptr inbounds float, float* %tmp4640, i64 1
+ %tmp4642 = getelementptr inbounds float, float* %tmp4641, i64 1
+ %tmp4643 = getelementptr inbounds float, float* %tmp4642, i64 1
+ %tmp4644 = getelementptr inbounds float, float* %tmp4643, i64 1
+ %tmp4645 = getelementptr inbounds float, float* %tmp4644, i64 1
+ %tmp4646 = getelementptr inbounds float, float* %tmp4645, i64 1
+ %tmp4647 = getelementptr inbounds float, float* %tmp4646, i64 1
+ %tmp4648 = getelementptr inbounds float, float* %tmp4647, i64 1
+ %tmp4649 = getelementptr inbounds float, float* %tmp4648, i64 1
+ %tmp4650 = getelementptr inbounds float, float* %tmp4649, i64 1
+ %tmp4651 = getelementptr inbounds float, float* %tmp4650, i64 1
+ %tmp4652 = getelementptr inbounds float, float* %tmp4651, i64 1
+ %tmp4653 = getelementptr inbounds float, float* %tmp4652, i64 1
+ %tmp4654 = getelementptr inbounds float, float* %tmp4653, i64 1
+ %tmp4655 = getelementptr inbounds float, float* %tmp4654, i64 1
+ %tmp4656 = getelementptr inbounds float, float* %tmp4655, i64 1
+ %tmp4657 = getelementptr inbounds float, float* %tmp4656, i64 1
+ %tmp4658 = getelementptr inbounds float, float* %tmp4657, i64 1
+ %tmp4659 = getelementptr inbounds float, float* %tmp4658, i64 1
+ %tmp4660 = getelementptr inbounds float, float* %tmp4659, i64 1
+ %tmp4661 = getelementptr inbounds float, float* %tmp4660, i64 1
+ %tmp4662 = getelementptr inbounds float, float* %tmp4661, i64 1
+ %tmp4663 = getelementptr inbounds float, float* %tmp4662, i64 1
+ %tmp4664 = getelementptr inbounds float, float* %tmp4663, i64 1
+ %tmp4665 = getelementptr inbounds float, float* %tmp4664, i64 1
+ %tmp4666 = getelementptr inbounds float, float* %tmp4665, i64 1
+ %tmp4667 = getelementptr inbounds float, float* %tmp4666, i64 1
+ %tmp4668 = getelementptr inbounds float, float* %tmp4667, i64 1
+ %tmp4669 = getelementptr inbounds float, float* %tmp4668, i64 1
+ %tmp4670 = getelementptr inbounds float, float* %tmp4669, i64 1
+ %tmp4671 = getelementptr inbounds float, float* %tmp4670, i64 1
+ %tmp4672 = getelementptr inbounds float, float* %tmp4671, i64 1
+ %tmp4673 = getelementptr inbounds float, float* %tmp4672, i64 1
+ %tmp4674 = getelementptr inbounds float, float* %tmp4673, i64 1
+ %tmp4675 = getelementptr inbounds float, float* %tmp4674, i64 1
+ %tmp4676 = getelementptr inbounds float, float* %tmp4675, i64 1
+ %tmp4677 = getelementptr inbounds float, float* %tmp4676, i64 1
+ %tmp4678 = getelementptr inbounds float, float* %tmp4677, i64 1
+ %tmp4679 = getelementptr inbounds float, float* %tmp4678, i64 1
+ %tmp4680 = getelementptr inbounds float, float* %tmp4679, i64 1
+ %tmp4681 = getelementptr inbounds float, float* %tmp4680, i64 1
+ %tmp4682 = getelementptr inbounds float, float* %tmp4681, i64 1
+ %tmp4683 = getelementptr inbounds float, float* %tmp4682, i64 1
+ %tmp4684 = getelementptr inbounds float, float* %tmp4683, i64 1
+ %tmp4685 = getelementptr inbounds float, float* %tmp4684, i64 1
+ %tmp4686 = getelementptr inbounds float, float* %tmp4685, i64 1
+ %tmp4687 = getelementptr inbounds float, float* %tmp4686, i64 1
+ %tmp4688 = getelementptr inbounds float, float* %tmp4687, i64 1
+ %tmp4689 = getelementptr inbounds float, float* %tmp4688, i64 1
+ %tmp4690 = getelementptr inbounds float, float* %tmp4689, i64 1
+ %tmp4691 = getelementptr inbounds float, float* %tmp4690, i64 1
+ %tmp4692 = getelementptr inbounds float, float* %tmp4691, i64 1
+ %tmp4693 = getelementptr inbounds float, float* %tmp4692, i64 1
+ %tmp4694 = getelementptr inbounds float, float* %tmp4693, i64 1
+ %tmp4695 = getelementptr inbounds float, float* %tmp4694, i64 1
+ %tmp4696 = getelementptr inbounds float, float* %tmp4695, i64 1
+ %tmp4697 = getelementptr inbounds float, float* %tmp4696, i64 1
+ %tmp4698 = getelementptr inbounds float, float* %tmp4697, i64 1
+ %tmp4699 = getelementptr inbounds float, float* %tmp4698, i64 1
+ %tmp4700 = getelementptr inbounds float, float* %tmp4699, i64 1
+ %tmp4701 = getelementptr inbounds float, float* %tmp4700, i64 1
+ %tmp4702 = getelementptr inbounds float, float* %tmp4701, i64 1
+ %tmp4703 = getelementptr inbounds float, float* %tmp4702, i64 1
+ %tmp4704 = getelementptr inbounds float, float* %tmp4703, i64 1
+ %tmp4705 = getelementptr inbounds float, float* %tmp4704, i64 1
+ %tmp4706 = getelementptr inbounds float, float* %tmp4705, i64 1
+ %tmp4707 = getelementptr inbounds float, float* %tmp4706, i64 1
+ %tmp4708 = getelementptr inbounds float, float* %tmp4707, i64 1
+ %tmp4709 = getelementptr inbounds float, float* %tmp4708, i64 1
+ %tmp4710 = getelementptr inbounds float, float* %tmp4709, i64 1
+ %tmp4711 = getelementptr inbounds float, float* %tmp4710, i64 1
+ %tmp4712 = getelementptr inbounds float, float* %tmp4711, i64 1
+ %tmp4713 = getelementptr inbounds float, float* %tmp4712, i64 1
+ %tmp4714 = getelementptr inbounds float, float* %tmp4713, i64 1
+ %tmp4715 = getelementptr inbounds float, float* %tmp4714, i64 1
+ %tmp4716 = getelementptr inbounds float, float* %tmp4715, i64 1
+ %tmp4717 = getelementptr inbounds float, float* %tmp4716, i64 1
+ %tmp4718 = getelementptr inbounds float, float* %tmp4717, i64 1
+ %tmp4719 = getelementptr inbounds float, float* %tmp4718, i64 1
+ %tmp4720 = getelementptr inbounds float, float* %tmp4719, i64 1
+ %tmp4721 = getelementptr inbounds float, float* %tmp4720, i64 1
+ %tmp4722 = getelementptr inbounds float, float* %tmp4721, i64 1
+ %tmp4723 = getelementptr inbounds float, float* %tmp4722, i64 1
+ %tmp4724 = getelementptr inbounds float, float* %tmp4723, i64 1
+ %tmp4725 = getelementptr inbounds float, float* %tmp4724, i64 1
+ %tmp4726 = getelementptr inbounds float, float* %tmp4725, i64 1
+ %tmp4727 = getelementptr inbounds float, float* %tmp4726, i64 1
+ %tmp4728 = getelementptr inbounds float, float* %tmp4727, i64 1
+ %tmp4729 = getelementptr inbounds float, float* %tmp4728, i64 1
+ %tmp4730 = getelementptr inbounds float, float* %tmp4729, i64 1
+ %tmp4731 = getelementptr inbounds float, float* %tmp4730, i64 1
+ %tmp4732 = getelementptr inbounds float, float* %tmp4731, i64 1
+ %tmp4733 = getelementptr inbounds float, float* %tmp4732, i64 1
+ %tmp4734 = getelementptr inbounds float, float* %tmp4733, i64 1
+ %tmp4735 = getelementptr inbounds float, float* %tmp4734, i64 1
+ %tmp4736 = getelementptr inbounds float, float* %tmp4735, i64 1
+ %tmp4737 = getelementptr inbounds float, float* %tmp4736, i64 1
+ %tmp4738 = getelementptr inbounds float, float* %tmp4737, i64 1
+ %tmp4739 = getelementptr inbounds float, float* %tmp4738, i64 1
+ %tmp4740 = getelementptr inbounds float, float* %tmp4739, i64 1
+ %tmp4741 = getelementptr inbounds float, float* %tmp4740, i64 1
+ %tmp4742 = getelementptr inbounds float, float* %tmp4741, i64 1
+ %tmp4743 = getelementptr inbounds float, float* %tmp4742, i64 1
+ %tmp4744 = getelementptr inbounds float, float* %tmp4743, i64 1
+ %tmp4745 = getelementptr inbounds float, float* %tmp4744, i64 1
+ %tmp4746 = getelementptr inbounds float, float* %tmp4745, i64 1
+ %tmp4747 = getelementptr inbounds float, float* %tmp4746, i64 1
+ %tmp4748 = getelementptr inbounds float, float* %tmp4747, i64 1
+ %tmp4749 = getelementptr inbounds float, float* %tmp4748, i64 1
+ %tmp4750 = getelementptr inbounds float, float* %tmp4749, i64 1
+ %tmp4751 = getelementptr inbounds float, float* %tmp4750, i64 1
+ %tmp4752 = getelementptr inbounds float, float* %tmp4751, i64 1
+ %tmp4753 = getelementptr inbounds float, float* %tmp4752, i64 1
+ %tmp4754 = getelementptr inbounds float, float* %tmp4753, i64 1
+ %tmp4755 = getelementptr inbounds float, float* %tmp4754, i64 1
+ %tmp4756 = getelementptr inbounds float, float* %tmp4755, i64 1
+ %tmp4757 = getelementptr inbounds float, float* %tmp4756, i64 1
+ %tmp4758 = getelementptr inbounds float, float* %tmp4757, i64 1
+ %tmp4759 = getelementptr inbounds float, float* %tmp4758, i64 1
+ %tmp4760 = getelementptr inbounds float, float* %tmp4759, i64 1
+ %tmp4761 = getelementptr inbounds float, float* %tmp4760, i64 1
+ %tmp4762 = getelementptr inbounds float, float* %tmp4761, i64 1
+ %tmp4763 = getelementptr inbounds float, float* %tmp4762, i64 1
+ %tmp4764 = getelementptr inbounds float, float* %tmp4763, i64 1
+ %tmp4765 = getelementptr inbounds float, float* %tmp4764, i64 1
+ %tmp4766 = getelementptr inbounds float, float* %tmp4765, i64 1
+ %tmp4767 = getelementptr inbounds float, float* %tmp4766, i64 1
+ %tmp4768 = getelementptr inbounds float, float* %tmp4767, i64 1
+ %tmp4769 = getelementptr inbounds float, float* %tmp4768, i64 1
+ %tmp4770 = getelementptr inbounds float, float* %tmp4769, i64 1
+ %tmp4771 = getelementptr inbounds float, float* %tmp4770, i64 1
+ %tmp4772 = getelementptr inbounds float, float* %tmp4771, i64 1
+ %tmp4773 = getelementptr inbounds float, float* %tmp4772, i64 1
+ %tmp4774 = getelementptr inbounds float, float* %tmp4773, i64 1
+ %tmp4775 = getelementptr inbounds float, float* %tmp4774, i64 1
+ %tmp4776 = getelementptr inbounds float, float* %tmp4775, i64 1
+ %tmp4777 = getelementptr inbounds float, float* %tmp4776, i64 1
+ %tmp4778 = getelementptr inbounds float, float* %tmp4777, i64 1
+ %tmp4779 = getelementptr inbounds float, float* %tmp4778, i64 1
+ %tmp4780 = getelementptr inbounds float, float* %tmp4779, i64 1
+ %tmp4781 = getelementptr inbounds float, float* %tmp4780, i64 1
+ %tmp4782 = getelementptr inbounds float, float* %tmp4781, i64 1
+ %tmp4783 = getelementptr inbounds float, float* %tmp4782, i64 1
+ %tmp4784 = getelementptr inbounds float, float* %tmp4783, i64 1
+ %tmp4785 = getelementptr inbounds float, float* %tmp4784, i64 1
+ %tmp4786 = getelementptr inbounds float, float* %tmp4785, i64 1
+ %tmp4787 = getelementptr inbounds float, float* %tmp4786, i64 1
+ %tmp4788 = getelementptr inbounds float, float* %tmp4787, i64 1
+ %tmp4789 = getelementptr inbounds float, float* %tmp4788, i64 1
+ %tmp4790 = getelementptr inbounds float, float* %tmp4789, i64 1
+ %tmp4791 = getelementptr inbounds float, float* %tmp4790, i64 1
+ %tmp4792 = getelementptr inbounds float, float* %tmp4791, i64 1
+ %tmp4793 = getelementptr inbounds float, float* %tmp4792, i64 1
+ %tmp4794 = getelementptr inbounds float, float* %tmp4793, i64 1
+ %tmp4795 = getelementptr inbounds float, float* %tmp4794, i64 1
+ %tmp4796 = getelementptr inbounds float, float* %tmp4795, i64 1
+ %tmp4797 = getelementptr inbounds float, float* %tmp4796, i64 1
+ %tmp4798 = getelementptr inbounds float, float* %tmp4797, i64 1
+ %tmp4799 = getelementptr inbounds float, float* %tmp4798, i64 1
+ %tmp4800 = getelementptr inbounds float, float* %tmp4799, i64 1
+ %tmp4801 = getelementptr inbounds float, float* %tmp4800, i64 1
+ %tmp4802 = getelementptr inbounds float, float* %tmp4801, i64 1
+ %tmp4803 = getelementptr inbounds float, float* %tmp4802, i64 1
+ %tmp4804 = getelementptr inbounds float, float* %tmp4803, i64 1
+ %tmp4805 = getelementptr inbounds float, float* %tmp4804, i64 1
+ %tmp4806 = getelementptr inbounds float, float* %tmp4805, i64 1
+ %tmp4807 = getelementptr inbounds float, float* %tmp4806, i64 1
+ %tmp4808 = getelementptr inbounds float, float* %tmp4807, i64 1
+ %tmp4809 = getelementptr inbounds float, float* %tmp4808, i64 1
+ %tmp4810 = getelementptr inbounds float, float* %tmp4809, i64 1
+ %tmp4811 = getelementptr inbounds float, float* %tmp4810, i64 1
+ %tmp4812 = getelementptr inbounds float, float* %tmp4811, i64 1
+ %tmp4813 = getelementptr inbounds float, float* %tmp4812, i64 1
+ %tmp4814 = getelementptr inbounds float, float* %tmp4813, i64 1
+ %tmp4815 = getelementptr inbounds float, float* %tmp4814, i64 1
+ %tmp4816 = getelementptr inbounds float, float* %tmp4815, i64 1
+ %tmp4817 = getelementptr inbounds float, float* %tmp4816, i64 1
+ %tmp4818 = getelementptr inbounds float, float* %tmp4817, i64 1
+ %tmp4819 = getelementptr inbounds float, float* %tmp4818, i64 1
+ %tmp4820 = getelementptr inbounds float, float* %tmp4819, i64 1
+ %tmp4821 = getelementptr inbounds float, float* %tmp4820, i64 1
+ %tmp4822 = getelementptr inbounds float, float* %tmp4821, i64 1
+ %tmp4823 = getelementptr inbounds float, float* %tmp4822, i64 1
+ %tmp4824 = getelementptr inbounds float, float* %tmp4823, i64 1
+ %tmp4825 = getelementptr inbounds float, float* %tmp4824, i64 1
+ %tmp4826 = getelementptr inbounds float, float* %tmp4825, i64 1
+ %tmp4827 = getelementptr inbounds float, float* %tmp4826, i64 1
+ %tmp4828 = getelementptr inbounds float, float* %tmp4827, i64 1
+ %tmp4829 = getelementptr inbounds float, float* %tmp4828, i64 1
+ %tmp4830 = getelementptr inbounds float, float* %tmp4829, i64 1
+ %tmp4831 = getelementptr inbounds float, float* %tmp4830, i64 1
+ %tmp4832 = getelementptr inbounds float, float* %tmp4831, i64 1
+ %tmp4833 = getelementptr inbounds float, float* %tmp4832, i64 1
+ %tmp4834 = getelementptr inbounds float, float* %tmp4833, i64 1
+ %tmp4835 = getelementptr inbounds float, float* %tmp4834, i64 1
+ %tmp4836 = getelementptr inbounds float, float* %tmp4835, i64 1
+ %tmp4837 = getelementptr inbounds float, float* %tmp4836, i64 1
+ %tmp4838 = getelementptr inbounds float, float* %tmp4837, i64 1
+ %tmp4839 = getelementptr inbounds float, float* %tmp4838, i64 1
+ %tmp4840 = getelementptr inbounds float, float* %tmp4839, i64 1
+ %tmp4841 = getelementptr inbounds float, float* %tmp4840, i64 1
+ %tmp4842 = getelementptr inbounds float, float* %tmp4841, i64 1
+ %tmp4843 = getelementptr inbounds float, float* %tmp4842, i64 1
+ %tmp4844 = getelementptr inbounds float, float* %tmp4843, i64 1
+ %tmp4845 = getelementptr inbounds float, float* %tmp4844, i64 1
+ %tmp4846 = getelementptr inbounds float, float* %tmp4845, i64 1
+ %tmp4847 = getelementptr inbounds float, float* %tmp4846, i64 1
+ %tmp4848 = getelementptr inbounds float, float* %tmp4847, i64 1
+ %tmp4849 = getelementptr inbounds float, float* %tmp4848, i64 1
+ %tmp4850 = getelementptr inbounds float, float* %tmp4849, i64 1
+ %tmp4851 = getelementptr inbounds float, float* %tmp4850, i64 1
+ %tmp4852 = getelementptr inbounds float, float* %tmp4851, i64 1
+ %tmp4853 = getelementptr inbounds float, float* %tmp4852, i64 1
+ %tmp4854 = getelementptr inbounds float, float* %tmp4853, i64 1
+ %tmp4855 = getelementptr inbounds float, float* %tmp4854, i64 1
+ %tmp4856 = getelementptr inbounds float, float* %tmp4855, i64 1
+ %tmp4857 = getelementptr inbounds float, float* %tmp4856, i64 1
+ %tmp4858 = getelementptr inbounds float, float* %tmp4857, i64 1
+ %tmp4859 = getelementptr inbounds float, float* %tmp4858, i64 1
+ %tmp4860 = getelementptr inbounds float, float* %tmp4859, i64 1
+ %tmp4861 = getelementptr inbounds float, float* %tmp4860, i64 1
+ %tmp4862 = getelementptr inbounds float, float* %tmp4861, i64 1
+ %tmp4863 = getelementptr inbounds float, float* %tmp4862, i64 1
+ %tmp4864 = getelementptr inbounds float, float* %tmp4863, i64 1
+ %tmp4865 = getelementptr inbounds float, float* %tmp4864, i64 1
+ %tmp4866 = getelementptr inbounds float, float* %tmp4865, i64 1
+ %tmp4867 = getelementptr inbounds float, float* %tmp4866, i64 1
+ %tmp4868 = getelementptr inbounds float, float* %tmp4867, i64 1
+ %tmp4869 = getelementptr inbounds float, float* %tmp4868, i64 1
+ %tmp4870 = getelementptr inbounds float, float* %tmp4869, i64 1
+ %tmp4871 = getelementptr inbounds float, float* %tmp4870, i64 1
+ %tmp4872 = getelementptr inbounds float, float* %tmp4871, i64 1
+ %tmp4873 = getelementptr inbounds float, float* %tmp4872, i64 1
+ %tmp4874 = getelementptr inbounds float, float* %tmp4873, i64 1
+ %tmp4875 = getelementptr inbounds float, float* %tmp4874, i64 1
+ %tmp4876 = getelementptr inbounds float, float* %tmp4875, i64 1
+ %tmp4877 = getelementptr inbounds float, float* %tmp4876, i64 1
+ %tmp4878 = getelementptr inbounds float, float* %tmp4877, i64 1
+ %tmp4879 = getelementptr inbounds float, float* %tmp4878, i64 1
+ %tmp4880 = getelementptr inbounds float, float* %tmp4879, i64 1
+ %tmp4881 = getelementptr inbounds float, float* %tmp4880, i64 1
+ %tmp4882 = getelementptr inbounds float, float* %tmp4881, i64 1
+ %tmp4883 = getelementptr inbounds float, float* %tmp4882, i64 1
+ %tmp4884 = getelementptr inbounds float, float* %tmp4883, i64 1
+ %tmp4885 = getelementptr inbounds float, float* %tmp4884, i64 1
+ %tmp4886 = getelementptr inbounds float, float* %tmp4885, i64 1
+ %tmp4887 = getelementptr inbounds float, float* %tmp4886, i64 1
+ %tmp4888 = getelementptr inbounds float, float* %tmp4887, i64 1
+ %tmp4889 = getelementptr inbounds float, float* %tmp4888, i64 1
+ %tmp4890 = getelementptr inbounds float, float* %tmp4889, i64 1
+ %tmp4891 = getelementptr inbounds float, float* %tmp4890, i64 1
+ %tmp4892 = getelementptr inbounds float, float* %tmp4891, i64 1
+ %tmp4893 = getelementptr inbounds float, float* %tmp4892, i64 1
+ %tmp4894 = getelementptr inbounds float, float* %tmp4893, i64 1
+ %tmp4895 = getelementptr inbounds float, float* %tmp4894, i64 1
+ %tmp4896 = getelementptr inbounds float, float* %tmp4895, i64 1
+ %tmp4897 = getelementptr inbounds float, float* %tmp4896, i64 1
+ %tmp4898 = getelementptr inbounds float, float* %tmp4897, i64 1
+ %tmp4899 = getelementptr inbounds float, float* %tmp4898, i64 1
+ %tmp4900 = getelementptr inbounds float, float* %tmp4899, i64 1
+ %tmp4901 = getelementptr inbounds float, float* %tmp4900, i64 1
+ %tmp4902 = getelementptr inbounds float, float* %tmp4901, i64 1
+ %tmp4903 = getelementptr inbounds float, float* %tmp4902, i64 1
+ %tmp4904 = getelementptr inbounds float, float* %tmp4903, i64 1
+ %tmp4905 = getelementptr inbounds float, float* %tmp4904, i64 1
+ %tmp4906 = getelementptr inbounds float, float* %tmp4905, i64 1
+ %tmp4907 = getelementptr inbounds float, float* %tmp4906, i64 1
+ %tmp4908 = getelementptr inbounds float, float* %tmp4907, i64 1
+ %tmp4909 = getelementptr inbounds float, float* %tmp4908, i64 1
+ %tmp4910 = getelementptr inbounds float, float* %tmp4909, i64 1
+ %tmp4911 = getelementptr inbounds float, float* %tmp4910, i64 1
+ %tmp4912 = getelementptr inbounds float, float* %tmp4911, i64 1
+ %tmp4913 = getelementptr inbounds float, float* %tmp4912, i64 1
+ %tmp4914 = getelementptr inbounds float, float* %tmp4913, i64 1
+ %tmp4915 = getelementptr inbounds float, float* %tmp4914, i64 1
+ %tmp4916 = getelementptr inbounds float, float* %tmp4915, i64 1
+ %tmp4917 = getelementptr inbounds float, float* %tmp4916, i64 1
+ %tmp4918 = getelementptr inbounds float, float* %tmp4917, i64 1
+ %tmp4919 = getelementptr inbounds float, float* %tmp4918, i64 1
+ %tmp4920 = getelementptr inbounds float, float* %tmp4919, i64 1
+ %tmp4921 = getelementptr inbounds float, float* %tmp4920, i64 1
+ %tmp4922 = getelementptr inbounds float, float* %tmp4921, i64 1
+ %tmp4923 = getelementptr inbounds float, float* %tmp4922, i64 1
+ %tmp4924 = getelementptr inbounds float, float* %tmp4923, i64 1
+ %tmp4925 = getelementptr inbounds float, float* %tmp4924, i64 1
+ %tmp4926 = getelementptr inbounds float, float* %tmp4925, i64 1
+ %tmp4927 = getelementptr inbounds float, float* %tmp4926, i64 1
+ %tmp4928 = getelementptr inbounds float, float* %tmp4927, i64 1
+ %tmp4929 = getelementptr inbounds float, float* %tmp4928, i64 1
+ %tmp4930 = getelementptr inbounds float, float* %tmp4929, i64 1
+ %tmp4931 = getelementptr inbounds float, float* %tmp4930, i64 1
+ %tmp4932 = getelementptr inbounds float, float* %tmp4931, i64 1
+ %tmp4933 = getelementptr inbounds float, float* %tmp4932, i64 1
+ %tmp4934 = getelementptr inbounds float, float* %tmp4933, i64 1
+ %tmp4935 = getelementptr inbounds float, float* %tmp4934, i64 1
+ %tmp4936 = getelementptr inbounds float, float* %tmp4935, i64 1
+ %tmp4937 = getelementptr inbounds float, float* %tmp4936, i64 1
+ %tmp4938 = getelementptr inbounds float, float* %tmp4937, i64 1
+ %tmp4939 = getelementptr inbounds float, float* %tmp4938, i64 1
+ %tmp4940 = getelementptr inbounds float, float* %tmp4939, i64 1
+ %tmp4941 = getelementptr inbounds float, float* %tmp4940, i64 1
+ %tmp4942 = getelementptr inbounds float, float* %tmp4941, i64 1
+ %tmp4943 = getelementptr inbounds float, float* %tmp4942, i64 1
+ %tmp4944 = getelementptr inbounds float, float* %tmp4943, i64 1
+ %tmp4945 = getelementptr inbounds float, float* %tmp4944, i64 1
+ %tmp4946 = getelementptr inbounds float, float* %tmp4945, i64 1
+ %tmp4947 = getelementptr inbounds float, float* %tmp4946, i64 1
+ %tmp4948 = getelementptr inbounds float, float* %tmp4947, i64 1
+ %tmp4949 = getelementptr inbounds float, float* %tmp4948, i64 1
+ %tmp4950 = getelementptr inbounds float, float* %tmp4949, i64 1
+ %tmp4951 = getelementptr inbounds float, float* %tmp4950, i64 1
+ %tmp4952 = getelementptr inbounds float, float* %tmp4951, i64 1
+ %tmp4953 = getelementptr inbounds float, float* %tmp4952, i64 1
+ %tmp4954 = getelementptr inbounds float, float* %tmp4953, i64 1
+ %tmp4955 = getelementptr inbounds float, float* %tmp4954, i64 1
+ %tmp4956 = getelementptr inbounds float, float* %tmp4955, i64 1
+ %tmp4957 = getelementptr inbounds float, float* %tmp4956, i64 1
+ %tmp4958 = getelementptr inbounds float, float* %tmp4957, i64 1
+ %tmp4959 = getelementptr inbounds float, float* %tmp4958, i64 1
+ %tmp4960 = getelementptr inbounds float, float* %tmp4959, i64 1
+ %tmp4961 = getelementptr inbounds float, float* %tmp4960, i64 1
+ %tmp4962 = getelementptr inbounds float, float* %tmp4961, i64 1
+ %tmp4963 = getelementptr inbounds float, float* %tmp4962, i64 1
+ %tmp4964 = getelementptr inbounds float, float* %tmp4963, i64 1
+ %tmp4965 = getelementptr inbounds float, float* %tmp4964, i64 1
+ %tmp4966 = getelementptr inbounds float, float* %tmp4965, i64 1
+ %tmp4967 = getelementptr inbounds float, float* %tmp4966, i64 1
+ %tmp4968 = getelementptr inbounds float, float* %tmp4967, i64 1
+ %tmp4969 = getelementptr inbounds float, float* %tmp4968, i64 1
+ %tmp4970 = getelementptr inbounds float, float* %tmp4969, i64 1
+ %tmp4971 = getelementptr inbounds float, float* %tmp4970, i64 1
+ %tmp4972 = getelementptr inbounds float, float* %tmp4971, i64 1
+ %tmp4973 = getelementptr inbounds float, float* %tmp4972, i64 1
+ %tmp4974 = getelementptr inbounds float, float* %tmp4973, i64 1
+ %tmp4975 = getelementptr inbounds float, float* %tmp4974, i64 1
+ %tmp4976 = getelementptr inbounds float, float* %tmp4975, i64 1
+ %tmp4977 = getelementptr inbounds float, float* %tmp4976, i64 1
+ %tmp4978 = getelementptr inbounds float, float* %tmp4977, i64 1
+ %tmp4979 = getelementptr inbounds float, float* %tmp4978, i64 1
+ %tmp4980 = getelementptr inbounds float, float* %tmp4979, i64 1
+ %tmp4981 = getelementptr inbounds float, float* %tmp4980, i64 1
+ %tmp4982 = getelementptr inbounds float, float* %tmp4981, i64 1
+ %tmp4983 = getelementptr inbounds float, float* %tmp4982, i64 1
+ %tmp4984 = getelementptr inbounds float, float* %tmp4983, i64 1
+ %tmp4985 = getelementptr inbounds float, float* %tmp4984, i64 1
+ %tmp4986 = getelementptr inbounds float, float* %tmp4985, i64 1
+ %tmp4987 = getelementptr inbounds float, float* %tmp4986, i64 1
+ %tmp4988 = getelementptr inbounds float, float* %tmp4987, i64 1
+ %tmp4989 = getelementptr inbounds float, float* %tmp4988, i64 1
+ %tmp4990 = getelementptr inbounds float, float* %tmp4989, i64 1
+ %tmp4991 = getelementptr inbounds float, float* %tmp4990, i64 1
+ %tmp4992 = getelementptr inbounds float, float* %tmp4991, i64 1
+ %tmp4993 = getelementptr inbounds float, float* %tmp4992, i64 1
+ %tmp4994 = getelementptr inbounds float, float* %tmp4993, i64 1
+ %tmp4995 = getelementptr inbounds float, float* %tmp4994, i64 1
+ %tmp4996 = getelementptr inbounds float, float* %tmp4995, i64 1
+ %tmp4997 = getelementptr inbounds float, float* %tmp4996, i64 1
+ %tmp4998 = getelementptr inbounds float, float* %tmp4997, i64 1
+ %tmp4999 = getelementptr inbounds float, float* %tmp4998, i64 1
+ %tmp5000 = getelementptr inbounds float, float* %tmp4999, i64 1
+ %tmp5001 = getelementptr inbounds float, float* %tmp5000, i64 1
+ %tmp5002 = getelementptr inbounds float, float* %tmp5001, i64 1
+ %tmp5003 = getelementptr inbounds float, float* %tmp5002, i64 1
+ %tmp5004 = getelementptr inbounds float, float* %tmp5003, i64 1
+ %tmp5005 = getelementptr inbounds float, float* %tmp5004, i64 1
+ %tmp5006 = getelementptr inbounds float, float* %tmp5005, i64 1
+ %tmp5007 = getelementptr inbounds float, float* %tmp5006, i64 1
+ %tmp5008 = getelementptr inbounds float, float* %tmp5007, i64 1
+ %tmp5009 = getelementptr inbounds float, float* %tmp5008, i64 1
+ %tmp5010 = getelementptr inbounds float, float* %tmp5009, i64 1
+ %tmp5011 = getelementptr inbounds float, float* %tmp5010, i64 1
+ %tmp5012 = getelementptr inbounds float, float* %tmp5011, i64 1
+ %tmp5013 = getelementptr inbounds float, float* %tmp5012, i64 1
+ %tmp5014 = getelementptr inbounds float, float* %tmp5013, i64 1
+ %tmp5015 = getelementptr inbounds float, float* %tmp5014, i64 1
+ %tmp5016 = getelementptr inbounds float, float* %tmp5015, i64 1
+ %tmp5017 = getelementptr inbounds float, float* %tmp5016, i64 1
+ %tmp5018 = getelementptr inbounds float, float* %tmp5017, i64 1
+ %tmp5019 = getelementptr inbounds float, float* %tmp5018, i64 1
+ %tmp5020 = getelementptr inbounds float, float* %tmp5019, i64 1
+ %tmp5021 = getelementptr inbounds float, float* %tmp5020, i64 1
+ %tmp5022 = getelementptr inbounds float, float* %tmp5021, i64 1
+ %tmp5023 = getelementptr inbounds float, float* %tmp5022, i64 1
+ %tmp5024 = getelementptr inbounds float, float* %tmp5023, i64 1
+ %tmp5025 = getelementptr inbounds float, float* %tmp5024, i64 1
+ %tmp5026 = getelementptr inbounds float, float* %tmp5025, i64 1
+ %tmp5027 = getelementptr inbounds float, float* %tmp5026, i64 1
+ %tmp5028 = getelementptr inbounds float, float* %tmp5027, i64 1
+ %tmp5029 = getelementptr inbounds float, float* %tmp5028, i64 1
+ %tmp5030 = getelementptr inbounds float, float* %tmp5029, i64 1
+ %tmp5031 = getelementptr inbounds float, float* %tmp5030, i64 1
+ %tmp5032 = getelementptr inbounds float, float* %tmp5031, i64 1
+ %tmp5033 = getelementptr inbounds float, float* %tmp5032, i64 1
+ %tmp5034 = getelementptr inbounds float, float* %tmp5033, i64 1
+ %tmp5035 = getelementptr inbounds float, float* %tmp5034, i64 1
+ %tmp5036 = getelementptr inbounds float, float* %tmp5035, i64 1
+ %tmp5037 = getelementptr inbounds float, float* %tmp5036, i64 1
+ %tmp5038 = getelementptr inbounds float, float* %tmp5037, i64 1
+ %tmp5039 = getelementptr inbounds float, float* %tmp5038, i64 1
+ %tmp5040 = getelementptr inbounds float, float* %tmp5039, i64 1
+ %tmp5041 = getelementptr inbounds float, float* %tmp5040, i64 1
+ %tmp5042 = getelementptr inbounds float, float* %tmp5041, i64 1
+ %tmp5043 = getelementptr inbounds float, float* %tmp5042, i64 1
+ %tmp5044 = getelementptr inbounds float, float* %tmp5043, i64 1
+ %tmp5045 = getelementptr inbounds float, float* %tmp5044, i64 1
+ %tmp5046 = getelementptr inbounds float, float* %tmp5045, i64 1
+ %tmp5047 = getelementptr inbounds float, float* %tmp5046, i64 1
+ %tmp5048 = getelementptr inbounds float, float* %tmp5047, i64 1
+ %tmp5049 = getelementptr inbounds float, float* %tmp5048, i64 1
+ %tmp5050 = getelementptr inbounds float, float* %tmp5049, i64 1
+ %tmp5051 = getelementptr inbounds float, float* %tmp5050, i64 1
+ %tmp5052 = getelementptr inbounds float, float* %tmp5051, i64 1
+ %tmp5053 = getelementptr inbounds float, float* %tmp5052, i64 1
+ %tmp5054 = getelementptr inbounds float, float* %tmp5053, i64 1
+ %tmp5055 = getelementptr inbounds float, float* %tmp5054, i64 1
+ %tmp5056 = getelementptr inbounds float, float* %tmp5055, i64 1
+ %tmp5057 = getelementptr inbounds float, float* %tmp5056, i64 1
+ %tmp5058 = getelementptr inbounds float, float* %tmp5057, i64 1
+ %tmp5059 = getelementptr inbounds float, float* %tmp5058, i64 1
+ %tmp5060 = getelementptr inbounds float, float* %tmp5059, i64 1
+ %tmp5061 = getelementptr inbounds float, float* %tmp5060, i64 1
+ %tmp5062 = getelementptr inbounds float, float* %tmp5061, i64 1
+ %tmp5063 = getelementptr inbounds float, float* %tmp5062, i64 1
+ %tmp5064 = getelementptr inbounds float, float* %tmp5063, i64 1
+ %tmp5065 = getelementptr inbounds float, float* %tmp5064, i64 1
+ %tmp5066 = getelementptr inbounds float, float* %tmp5065, i64 1
+ %tmp5067 = getelementptr inbounds float, float* %tmp5066, i64 1
+ %tmp5068 = getelementptr inbounds float, float* %tmp5067, i64 1
+ %tmp5069 = getelementptr inbounds float, float* %tmp5068, i64 1
+ %tmp5070 = getelementptr inbounds float, float* %tmp5069, i64 1
+ %tmp5071 = getelementptr inbounds float, float* %tmp5070, i64 1
+ %tmp5072 = getelementptr inbounds float, float* %tmp5071, i64 1
+ %tmp5073 = getelementptr inbounds float, float* %tmp5072, i64 1
+ %tmp5074 = getelementptr inbounds float, float* %tmp5073, i64 1
+ %tmp5075 = getelementptr inbounds float, float* %tmp5074, i64 1
+ %tmp5076 = getelementptr inbounds float, float* %tmp5075, i64 1
+ %tmp5077 = getelementptr inbounds float, float* %tmp5076, i64 1
+ %tmp5078 = getelementptr inbounds float, float* %tmp5077, i64 1
+ %tmp5079 = getelementptr inbounds float, float* %tmp5078, i64 1
+ %tmp5080 = getelementptr inbounds float, float* %tmp5079, i64 1
+ %tmp5081 = getelementptr inbounds float, float* %tmp5080, i64 1
+ %tmp5082 = getelementptr inbounds float, float* %tmp5081, i64 1
+ %tmp5083 = getelementptr inbounds float, float* %tmp5082, i64 1
+ %tmp5084 = getelementptr inbounds float, float* %tmp5083, i64 1
+ %tmp5085 = getelementptr inbounds float, float* %tmp5084, i64 1
+ %tmp5086 = getelementptr inbounds float, float* %tmp5085, i64 1
+ %tmp5087 = getelementptr inbounds float, float* %tmp5086, i64 1
+ %tmp5088 = getelementptr inbounds float, float* %tmp5087, i64 1
+ %tmp5089 = getelementptr inbounds float, float* %tmp5088, i64 1
+ %tmp5090 = getelementptr inbounds float, float* %tmp5089, i64 1
+ %tmp5091 = getelementptr inbounds float, float* %tmp5090, i64 1
+ %tmp5092 = getelementptr inbounds float, float* %tmp5091, i64 1
+ %tmp5093 = getelementptr inbounds float, float* %tmp5092, i64 1
+ %tmp5094 = getelementptr inbounds float, float* %tmp5093, i64 1
+ %tmp5095 = getelementptr inbounds float, float* %tmp5094, i64 1
+ %tmp5096 = getelementptr inbounds float, float* %tmp5095, i64 1
+ %tmp5097 = getelementptr inbounds float, float* %tmp5096, i64 1
+ %tmp5098 = getelementptr inbounds float, float* %tmp5097, i64 1
+ %tmp5099 = getelementptr inbounds float, float* %tmp5098, i64 1
+ %tmp5100 = getelementptr inbounds float, float* %tmp5099, i64 1
+ %tmp5101 = getelementptr inbounds float, float* %tmp5100, i64 1
+ %tmp5102 = getelementptr inbounds float, float* %tmp5101, i64 1
+ %tmp5103 = getelementptr inbounds float, float* %tmp5102, i64 1
+ %tmp5104 = getelementptr inbounds float, float* %tmp5103, i64 1
+ %tmp5105 = getelementptr inbounds float, float* %tmp5104, i64 1
+ %tmp5106 = getelementptr inbounds float, float* %tmp5105, i64 1
+ %tmp5107 = getelementptr inbounds float, float* %tmp5106, i64 1
+ %tmp5108 = getelementptr inbounds float, float* %tmp5107, i64 1
+ %tmp5109 = getelementptr inbounds float, float* %tmp5108, i64 1
+ %tmp5110 = getelementptr inbounds float, float* %tmp5109, i64 1
+ %tmp5111 = getelementptr inbounds float, float* %tmp5110, i64 1
+ %tmp5112 = getelementptr inbounds float, float* %tmp5111, i64 1
+ %tmp5113 = getelementptr inbounds float, float* %tmp5112, i64 1
+ %tmp5114 = getelementptr inbounds float, float* %tmp5113, i64 1
+ %tmp5115 = getelementptr inbounds float, float* %tmp5114, i64 1
+ %tmp5116 = getelementptr inbounds float, float* %tmp5115, i64 1
+ %tmp5117 = getelementptr inbounds float, float* %tmp5116, i64 1
+ %tmp5118 = getelementptr inbounds float, float* %tmp5117, i64 1
+ %tmp5119 = getelementptr inbounds float, float* %tmp5118, i64 1
+ %tmp5120 = getelementptr inbounds float, float* %tmp5119, i64 1
+ %tmp5121 = getelementptr inbounds float, float* %tmp5120, i64 1
+ %tmp5122 = getelementptr inbounds float, float* %tmp5121, i64 1
+ %tmp5123 = getelementptr inbounds float, float* %tmp5122, i64 1
+ %tmp5124 = getelementptr inbounds float, float* %tmp5123, i64 1
+ %tmp5125 = getelementptr inbounds float, float* %tmp5124, i64 1
+ %tmp5126 = getelementptr inbounds float, float* %tmp5125, i64 1
+ %tmp5127 = getelementptr inbounds float, float* %tmp5126, i64 1
+ %tmp5128 = getelementptr inbounds float, float* %tmp5127, i64 1
+ %tmp5129 = getelementptr inbounds float, float* %tmp5128, i64 1
+ %tmp5130 = getelementptr inbounds float, float* %tmp5129, i64 1
+ %tmp5131 = getelementptr inbounds float, float* %tmp5130, i64 1
+ %tmp5132 = getelementptr inbounds float, float* %tmp5131, i64 1
+ %tmp5133 = getelementptr inbounds float, float* %tmp5132, i64 1
+ %tmp5134 = getelementptr inbounds float, float* %tmp5133, i64 1
+ %tmp5135 = getelementptr inbounds float, float* %tmp5134, i64 1
+ %tmp5136 = getelementptr inbounds float, float* %tmp5135, i64 1
+ %tmp5137 = getelementptr inbounds float, float* %tmp5136, i64 1
+ %tmp5138 = getelementptr inbounds float, float* %tmp5137, i64 1
+ %tmp5139 = getelementptr inbounds float, float* %tmp5138, i64 1
+ %tmp5140 = getelementptr inbounds float, float* %tmp5139, i64 1
+ %tmp5141 = getelementptr inbounds float, float* %tmp5140, i64 1
+ %tmp5142 = getelementptr inbounds float, float* %tmp5141, i64 1
+ %tmp5143 = getelementptr inbounds float, float* %tmp5142, i64 1
+ %tmp5144 = getelementptr inbounds float, float* %tmp5143, i64 1
+ %tmp5145 = getelementptr inbounds float, float* %tmp5144, i64 1
+ %tmp5146 = getelementptr inbounds float, float* %tmp5145, i64 1
+ %tmp5147 = getelementptr inbounds float, float* %tmp5146, i64 1
+ %tmp5148 = getelementptr inbounds float, float* %tmp5147, i64 1
+ %tmp5149 = getelementptr inbounds float, float* %tmp5148, i64 1
+ %tmp5150 = getelementptr inbounds float, float* %tmp5149, i64 1
+ %tmp5151 = getelementptr inbounds float, float* %tmp5150, i64 1
+ %tmp5152 = getelementptr inbounds float, float* %tmp5151, i64 1
+ %tmp5153 = getelementptr inbounds float, float* %tmp5152, i64 1
+ %tmp5154 = getelementptr inbounds float, float* %tmp5153, i64 1
+ %tmp5155 = getelementptr inbounds float, float* %tmp5154, i64 1
+ %tmp5156 = getelementptr inbounds float, float* %tmp5155, i64 1
+ %tmp5157 = getelementptr inbounds float, float* %tmp5156, i64 1
+ %tmp5158 = getelementptr inbounds float, float* %tmp5157, i64 1
+ %tmp5159 = getelementptr inbounds float, float* %tmp5158, i64 1
+ %tmp5160 = getelementptr inbounds float, float* %tmp5159, i64 1
+ %tmp5161 = getelementptr inbounds float, float* %tmp5160, i64 1
+ %tmp5162 = getelementptr inbounds float, float* %tmp5161, i64 1
+ %tmp5163 = getelementptr inbounds float, float* %tmp5162, i64 1
+ %tmp5164 = getelementptr inbounds float, float* %tmp5163, i64 1
+ %tmp5165 = getelementptr inbounds float, float* %tmp5164, i64 1
+ %tmp5166 = getelementptr inbounds float, float* %tmp5165, i64 1
+ %tmp5167 = getelementptr inbounds float, float* %tmp5166, i64 1
+ %tmp5168 = getelementptr inbounds float, float* %tmp5167, i64 1
+ %tmp5169 = getelementptr inbounds float, float* %tmp5168, i64 1
+ %tmp5170 = getelementptr inbounds float, float* %tmp5169, i64 1
+ %tmp5171 = getelementptr inbounds float, float* %tmp5170, i64 1
+ %tmp5172 = getelementptr inbounds float, float* %tmp5171, i64 1
+ %tmp5173 = getelementptr inbounds float, float* %tmp5172, i64 1
+ %tmp5174 = getelementptr inbounds float, float* %tmp5173, i64 1
+ %tmp5175 = getelementptr inbounds float, float* %tmp5174, i64 1
+ %tmp5176 = getelementptr inbounds float, float* %tmp5175, i64 1
+ %tmp5177 = getelementptr inbounds float, float* %tmp5176, i64 1
+ %tmp5178 = getelementptr inbounds float, float* %tmp5177, i64 1
+ %tmp5179 = getelementptr inbounds float, float* %tmp5178, i64 1
+ %tmp5180 = getelementptr inbounds float, float* %tmp5179, i64 1
+ %tmp5181 = getelementptr inbounds float, float* %tmp5180, i64 1
+ %tmp5182 = getelementptr inbounds float, float* %tmp5181, i64 1
+ %tmp5183 = getelementptr inbounds float, float* %tmp5182, i64 1
+ %tmp5184 = getelementptr inbounds float, float* %tmp5183, i64 1
+ %tmp5185 = getelementptr inbounds float, float* %tmp5184, i64 1
+ %tmp5186 = getelementptr inbounds float, float* %tmp5185, i64 1
+ %tmp5187 = getelementptr inbounds float, float* %tmp5186, i64 1
+ %tmp5188 = getelementptr inbounds float, float* %tmp5187, i64 1
+ %tmp5189 = getelementptr inbounds float, float* %tmp5188, i64 1
+ %tmp5190 = getelementptr inbounds float, float* %tmp5189, i64 1
+ %tmp5191 = getelementptr inbounds float, float* %tmp5190, i64 1
+ %tmp5192 = getelementptr inbounds float, float* %tmp5191, i64 1
+ %tmp5193 = getelementptr inbounds float, float* %tmp5192, i64 1
+ %tmp5194 = getelementptr inbounds float, float* %tmp5193, i64 1
+ %tmp5195 = getelementptr inbounds float, float* %tmp5194, i64 1
+ %tmp5196 = getelementptr inbounds float, float* %tmp5195, i64 1
+ %tmp5197 = getelementptr inbounds float, float* %tmp5196, i64 1
+ %tmp5198 = getelementptr inbounds float, float* %tmp5197, i64 1
+ %tmp5199 = getelementptr inbounds float, float* %tmp5198, i64 1
+ %tmp5200 = getelementptr inbounds float, float* %tmp5199, i64 1
+ %tmp5201 = getelementptr inbounds float, float* %tmp5200, i64 1
+ %tmp5202 = getelementptr inbounds float, float* %tmp5201, i64 1
+ %tmp5203 = getelementptr inbounds float, float* %tmp5202, i64 1
+ %tmp5204 = getelementptr inbounds float, float* %tmp5203, i64 1
+ %tmp5205 = getelementptr inbounds float, float* %tmp5204, i64 1
+ %tmp5206 = getelementptr inbounds float, float* %tmp5205, i64 1
+ %tmp5207 = getelementptr inbounds float, float* %tmp5206, i64 1
+ %tmp5208 = getelementptr inbounds float, float* %tmp5207, i64 1
+ %tmp5209 = getelementptr inbounds float, float* %tmp5208, i64 1
+ %tmp5210 = getelementptr inbounds float, float* %tmp5209, i64 1
+ %tmp5211 = getelementptr inbounds float, float* %tmp5210, i64 1
+ %tmp5212 = getelementptr inbounds float, float* %tmp5211, i64 1
+ %tmp5213 = getelementptr inbounds float, float* %tmp5212, i64 1
+ %tmp5214 = getelementptr inbounds float, float* %tmp5213, i64 1
+ %tmp5215 = getelementptr inbounds float, float* %tmp5214, i64 1
+ %tmp5216 = getelementptr inbounds float, float* %tmp5215, i64 1
+ %tmp5217 = getelementptr inbounds float, float* %tmp5216, i64 1
+ %tmp5218 = getelementptr inbounds float, float* %tmp5217, i64 1
+ %tmp5219 = getelementptr inbounds float, float* %tmp5218, i64 1
+ %tmp5220 = getelementptr inbounds float, float* %tmp5219, i64 1
+ %tmp5221 = getelementptr inbounds float, float* %tmp5220, i64 1
+ %tmp5222 = getelementptr inbounds float, float* %tmp5221, i64 1
+ %tmp5223 = getelementptr inbounds float, float* %tmp5222, i64 1
+ %tmp5224 = getelementptr inbounds float, float* %tmp5223, i64 1
+ %tmp5225 = getelementptr inbounds float, float* %tmp5224, i64 1
+ %tmp5226 = getelementptr inbounds float, float* %tmp5225, i64 1
+ %tmp5227 = getelementptr inbounds float, float* %tmp5226, i64 1
+ %tmp5228 = getelementptr inbounds float, float* %tmp5227, i64 1
+ %tmp5229 = getelementptr inbounds float, float* %tmp5228, i64 1
+ %tmp5230 = getelementptr inbounds float, float* %tmp5229, i64 1
+ %tmp5231 = getelementptr inbounds float, float* %tmp5230, i64 1
+ %tmp5232 = getelementptr inbounds float, float* %tmp5231, i64 1
+ %tmp5233 = getelementptr inbounds float, float* %tmp5232, i64 1
+ %tmp5234 = getelementptr inbounds float, float* %tmp5233, i64 1
+ %tmp5235 = getelementptr inbounds float, float* %tmp5234, i64 1
+ %tmp5236 = getelementptr inbounds float, float* %tmp5235, i64 1
+ %tmp5237 = getelementptr inbounds float, float* %tmp5236, i64 1
+ %tmp5238 = getelementptr inbounds float, float* %tmp5237, i64 1
+ %tmp5239 = getelementptr inbounds float, float* %tmp5238, i64 1
+ %tmp5240 = getelementptr inbounds float, float* %tmp5239, i64 1
+ %tmp5241 = getelementptr inbounds float, float* %tmp5240, i64 1
+ %tmp5242 = getelementptr inbounds float, float* %tmp5241, i64 1
+ %tmp5243 = getelementptr inbounds float, float* %tmp5242, i64 1
+ %tmp5244 = getelementptr inbounds float, float* %tmp5243, i64 1
+ %tmp5245 = getelementptr inbounds float, float* %tmp5244, i64 1
+ %tmp5246 = getelementptr inbounds float, float* %tmp5245, i64 1
+ %tmp5247 = getelementptr inbounds float, float* %tmp5246, i64 1
+ %tmp5248 = getelementptr inbounds float, float* %tmp5247, i64 1
+ %tmp5249 = getelementptr inbounds float, float* %tmp5248, i64 1
+ %tmp5250 = getelementptr inbounds float, float* %tmp5249, i64 1
+ %tmp5251 = getelementptr inbounds float, float* %tmp5250, i64 1
+ %tmp5252 = getelementptr inbounds float, float* %tmp5251, i64 1
+ %tmp5253 = getelementptr inbounds float, float* %tmp5252, i64 1
+ %tmp5254 = getelementptr inbounds float, float* %tmp5253, i64 1
+ %tmp5255 = getelementptr inbounds float, float* %tmp5254, i64 1
+ %tmp5256 = getelementptr inbounds float, float* %tmp5255, i64 1
+ %tmp5257 = getelementptr inbounds float, float* %tmp5256, i64 1
+ %tmp5258 = getelementptr inbounds float, float* %tmp5257, i64 1
+ %tmp5259 = getelementptr inbounds float, float* %tmp5258, i64 1
+ %tmp5260 = getelementptr inbounds float, float* %tmp5259, i64 1
+ %tmp5261 = getelementptr inbounds float, float* %tmp5260, i64 1
+ %tmp5262 = getelementptr inbounds float, float* %tmp5261, i64 1
+ %tmp5263 = getelementptr inbounds float, float* %tmp5262, i64 1
+ %tmp5264 = getelementptr inbounds float, float* %tmp5263, i64 1
+ %tmp5265 = getelementptr inbounds float, float* %tmp5264, i64 1
+ %tmp5266 = getelementptr inbounds float, float* %tmp5265, i64 1
+ %tmp5267 = getelementptr inbounds float, float* %tmp5266, i64 1
+ %tmp5268 = getelementptr inbounds float, float* %tmp5267, i64 1
+ %tmp5269 = getelementptr inbounds float, float* %tmp5268, i64 1
+ %tmp5270 = getelementptr inbounds float, float* %tmp5269, i64 1
+ %tmp5271 = getelementptr inbounds float, float* %tmp5270, i64 1
+ %tmp5272 = getelementptr inbounds float, float* %tmp5271, i64 1
+ %tmp5273 = getelementptr inbounds float, float* %tmp5272, i64 1
+ %tmp5274 = getelementptr inbounds float, float* %tmp5273, i64 1
+ %tmp5275 = getelementptr inbounds float, float* %tmp5274, i64 1
+ %tmp5276 = getelementptr inbounds float, float* %tmp5275, i64 1
+ %tmp5277 = getelementptr inbounds float, float* %tmp5276, i64 1
+ %tmp5278 = getelementptr inbounds float, float* %tmp5277, i64 1
+ %tmp5279 = getelementptr inbounds float, float* %tmp5278, i64 1
+ %tmp5280 = getelementptr inbounds float, float* %tmp5279, i64 1
+ %tmp5281 = getelementptr inbounds float, float* %tmp5280, i64 1
+ %tmp5282 = getelementptr inbounds float, float* %tmp5281, i64 1
+ %tmp5283 = getelementptr inbounds float, float* %tmp5282, i64 1
+ %tmp5284 = getelementptr inbounds float, float* %tmp5283, i64 1
+ %tmp5285 = getelementptr inbounds float, float* %tmp5284, i64 1
+ %tmp5286 = getelementptr inbounds float, float* %tmp5285, i64 1
+ %tmp5287 = getelementptr inbounds float, float* %tmp5286, i64 1
+ %tmp5288 = getelementptr inbounds float, float* %tmp5287, i64 1
+ %tmp5289 = getelementptr inbounds float, float* %tmp5288, i64 1
+ %tmp5290 = getelementptr inbounds float, float* %tmp5289, i64 1
+ %tmp5291 = getelementptr inbounds float, float* %tmp5290, i64 1
+ %tmp5292 = getelementptr inbounds float, float* %tmp5291, i64 1
+ %tmp5293 = getelementptr inbounds float, float* %tmp5292, i64 1
+ %tmp5294 = getelementptr inbounds float, float* %tmp5293, i64 1
+ %tmp5295 = getelementptr inbounds float, float* %tmp5294, i64 1
+ %tmp5296 = getelementptr inbounds float, float* %tmp5295, i64 1
+ %tmp5297 = getelementptr inbounds float, float* %tmp5296, i64 1
+ %tmp5298 = getelementptr inbounds float, float* %tmp5297, i64 1
+ %tmp5299 = getelementptr inbounds float, float* %tmp5298, i64 1
+ %tmp5300 = getelementptr inbounds float, float* %tmp5299, i64 1
+ %tmp5301 = getelementptr inbounds float, float* %tmp5300, i64 1
+ %tmp5302 = getelementptr inbounds float, float* %tmp5301, i64 1
+ %tmp5303 = getelementptr inbounds float, float* %tmp5302, i64 1
+ %tmp5304 = getelementptr inbounds float, float* %tmp5303, i64 1
+ %tmp5305 = getelementptr inbounds float, float* %tmp5304, i64 1
+ %tmp5306 = getelementptr inbounds float, float* %tmp5305, i64 1
+ %tmp5307 = getelementptr inbounds float, float* %tmp5306, i64 1
+ %tmp5308 = getelementptr inbounds float, float* %tmp5307, i64 1
+ %tmp5309 = getelementptr inbounds float, float* %tmp5308, i64 1
+ %tmp5310 = getelementptr inbounds float, float* %tmp5309, i64 1
+ %tmp5311 = getelementptr inbounds float, float* %tmp5310, i64 1
+ %tmp5312 = getelementptr inbounds float, float* %tmp5311, i64 1
+ %tmp5313 = getelementptr inbounds float, float* %tmp5312, i64 1
+ %tmp5314 = getelementptr inbounds float, float* %tmp5313, i64 1
+ %tmp5315 = getelementptr inbounds float, float* %tmp5314, i64 1
+ %tmp5316 = getelementptr inbounds float, float* %tmp5315, i64 1
+ %tmp5317 = getelementptr inbounds float, float* %tmp5316, i64 1
+ %tmp5318 = getelementptr inbounds float, float* %tmp5317, i64 1
+ %tmp5319 = getelementptr inbounds float, float* %tmp5318, i64 1
+ %tmp5320 = getelementptr inbounds float, float* %tmp5319, i64 1
+ %tmp5321 = getelementptr inbounds float, float* %tmp5320, i64 1
+ %tmp5322 = getelementptr inbounds float, float* %tmp5321, i64 1
+ %tmp5323 = getelementptr inbounds float, float* %tmp5322, i64 1
+ %tmp5324 = getelementptr inbounds float, float* %tmp5323, i64 1
+ %tmp5325 = getelementptr inbounds float, float* %tmp5324, i64 1
+ %tmp5326 = getelementptr inbounds float, float* %tmp5325, i64 1
+ %tmp5327 = getelementptr inbounds float, float* %tmp5326, i64 1
+ %tmp5328 = getelementptr inbounds float, float* %tmp5327, i64 1
+ %tmp5329 = getelementptr inbounds float, float* %tmp5328, i64 1
+ %tmp5330 = getelementptr inbounds float, float* %tmp5329, i64 1
+ %tmp5331 = getelementptr inbounds float, float* %tmp5330, i64 1
+ %tmp5332 = getelementptr inbounds float, float* %tmp5331, i64 1
+ %tmp5333 = getelementptr inbounds float, float* %tmp5332, i64 1
+ %tmp5334 = getelementptr inbounds float, float* %tmp5333, i64 1
+ %tmp5335 = getelementptr inbounds float, float* %tmp5334, i64 1
+ %tmp5336 = getelementptr inbounds float, float* %tmp5335, i64 1
+ %tmp5337 = getelementptr inbounds float, float* %tmp5336, i64 1
+ %tmp5338 = getelementptr inbounds float, float* %tmp5337, i64 1
+ %tmp5339 = getelementptr inbounds float, float* %tmp5338, i64 1
+ %tmp5340 = getelementptr inbounds float, float* %tmp5339, i64 1
+ %tmp5341 = getelementptr inbounds float, float* %tmp5340, i64 1
+ %tmp5342 = getelementptr inbounds float, float* %tmp5341, i64 1
+ %tmp5343 = getelementptr inbounds float, float* %tmp5342, i64 1
+ %tmp5344 = getelementptr inbounds float, float* %tmp5343, i64 1
+ %tmp5345 = getelementptr inbounds float, float* %tmp5344, i64 1
+ %tmp5346 = getelementptr inbounds float, float* %tmp5345, i64 1
+ %tmp5347 = getelementptr inbounds float, float* %tmp5346, i64 1
+ %tmp5348 = getelementptr inbounds float, float* %tmp5347, i64 1
+ %tmp5349 = getelementptr inbounds float, float* %tmp5348, i64 1
+ %tmp5350 = getelementptr inbounds float, float* %tmp5349, i64 1
+ %tmp5351 = getelementptr inbounds float, float* %tmp5350, i64 1
+ %tmp5352 = getelementptr inbounds float, float* %tmp5351, i64 1
+ %tmp5353 = getelementptr inbounds float, float* %tmp5352, i64 1
+ %tmp5354 = getelementptr inbounds float, float* %tmp5353, i64 1
+ %tmp5355 = getelementptr inbounds float, float* %tmp5354, i64 1
+ %tmp5356 = getelementptr inbounds float, float* %tmp5355, i64 1
+ %tmp5357 = getelementptr inbounds float, float* %tmp5356, i64 1
+ %tmp5358 = getelementptr inbounds float, float* %tmp5357, i64 1
+ %tmp5359 = getelementptr inbounds float, float* %tmp5358, i64 1
+ %tmp5360 = getelementptr inbounds float, float* %tmp5359, i64 1
+ %tmp5361 = getelementptr inbounds float, float* %tmp5360, i64 1
+ %tmp5362 = getelementptr inbounds float, float* %tmp5361, i64 1
+ %tmp5363 = getelementptr inbounds float, float* %tmp5362, i64 1
+ %tmp5364 = getelementptr inbounds float, float* %tmp5363, i64 1
+ %tmp5365 = getelementptr inbounds float, float* %tmp5364, i64 1
+ %tmp5366 = getelementptr inbounds float, float* %tmp5365, i64 1
+ %tmp5367 = getelementptr inbounds float, float* %tmp5366, i64 1
+ %tmp5368 = getelementptr inbounds float, float* %tmp5367, i64 1
+ %tmp5369 = getelementptr inbounds float, float* %tmp5368, i64 1
+ %tmp5370 = getelementptr inbounds float, float* %tmp5369, i64 1
+ %tmp5371 = getelementptr inbounds float, float* %tmp5370, i64 1
+ %tmp5372 = getelementptr inbounds float, float* %tmp5371, i64 1
+ %tmp5373 = getelementptr inbounds float, float* %tmp5372, i64 1
+ %tmp5374 = getelementptr inbounds float, float* %tmp5373, i64 1
+ %tmp5375 = getelementptr inbounds float, float* %tmp5374, i64 1
+ %tmp5376 = getelementptr inbounds float, float* %tmp5375, i64 1
+ %tmp5377 = getelementptr inbounds float, float* %tmp5376, i64 1
+ %tmp5378 = getelementptr inbounds float, float* %tmp5377, i64 1
+ %tmp5379 = getelementptr inbounds float, float* %tmp5378, i64 1
+ %tmp5380 = getelementptr inbounds float, float* %tmp5379, i64 1
+ %tmp5381 = getelementptr inbounds float, float* %tmp5380, i64 1
+ %tmp5382 = getelementptr inbounds float, float* %tmp5381, i64 1
+ %tmp5383 = getelementptr inbounds float, float* %tmp5382, i64 1
+ %tmp5384 = getelementptr inbounds float, float* %tmp5383, i64 1
+ %tmp5385 = getelementptr inbounds float, float* %tmp5384, i64 1
+ %tmp5386 = getelementptr inbounds float, float* %tmp5385, i64 1
+ %tmp5387 = getelementptr inbounds float, float* %tmp5386, i64 1
+ %tmp5388 = getelementptr inbounds float, float* %tmp5387, i64 1
+ %tmp5389 = getelementptr inbounds float, float* %tmp5388, i64 1
+ %tmp5390 = getelementptr inbounds float, float* %tmp5389, i64 1
+ %tmp5391 = getelementptr inbounds float, float* %tmp5390, i64 1
+ %tmp5392 = getelementptr inbounds float, float* %tmp5391, i64 1
+ %tmp5393 = getelementptr inbounds float, float* %tmp5392, i64 1
+ %tmp5394 = getelementptr inbounds float, float* %tmp5393, i64 1
+ %tmp5395 = getelementptr inbounds float, float* %tmp5394, i64 1
+ %tmp5396 = getelementptr inbounds float, float* %tmp5395, i64 1
+ %tmp5397 = getelementptr inbounds float, float* %tmp5396, i64 1
+ %tmp5398 = getelementptr inbounds float, float* %tmp5397, i64 1
+ %tmp5399 = getelementptr inbounds float, float* %tmp5398, i64 1
+ %tmp5400 = getelementptr inbounds float, float* %tmp5399, i64 1
+ %tmp5401 = getelementptr inbounds float, float* %tmp5400, i64 1
+ %tmp5402 = getelementptr inbounds float, float* %tmp5401, i64 1
+ %tmp5403 = getelementptr inbounds float, float* %tmp5402, i64 1
+ %tmp5404 = getelementptr inbounds float, float* %tmp5403, i64 1
+ %tmp5405 = getelementptr inbounds float, float* %tmp5404, i64 1
+ %tmp5406 = getelementptr inbounds float, float* %tmp5405, i64 1
+ %tmp5407 = getelementptr inbounds float, float* %tmp5406, i64 1
+ %tmp5408 = getelementptr inbounds float, float* %tmp5407, i64 1
+ %tmp5409 = getelementptr inbounds float, float* %tmp5408, i64 1
+ %tmp5410 = getelementptr inbounds float, float* %tmp5409, i64 1
+ %tmp5411 = getelementptr inbounds float, float* %tmp5410, i64 1
+ %tmp5412 = getelementptr inbounds float, float* %tmp5411, i64 1
+ %tmp5413 = getelementptr inbounds float, float* %tmp5412, i64 1
+ %tmp5414 = getelementptr inbounds float, float* %tmp5413, i64 1
+ %tmp5415 = getelementptr inbounds float, float* %tmp5414, i64 1
+ %tmp5416 = getelementptr inbounds float, float* %tmp5415, i64 1
+ %tmp5417 = getelementptr inbounds float, float* %tmp5416, i64 1
+ %tmp5418 = getelementptr inbounds float, float* %tmp5417, i64 1
+ %tmp5419 = getelementptr inbounds float, float* %tmp5418, i64 1
+ %tmp5420 = getelementptr inbounds float, float* %tmp5419, i64 1
+ %tmp5421 = getelementptr inbounds float, float* %tmp5420, i64 1
+ %tmp5422 = getelementptr inbounds float, float* %tmp5421, i64 1
+ %tmp5423 = getelementptr inbounds float, float* %tmp5422, i64 1
+ %tmp5424 = getelementptr inbounds float, float* %tmp5423, i64 1
+ %tmp5425 = getelementptr inbounds float, float* %tmp5424, i64 1
+ %tmp5426 = getelementptr inbounds float, float* %tmp5425, i64 1
+ %tmp5427 = getelementptr inbounds float, float* %tmp5426, i64 1
+ %tmp5428 = getelementptr inbounds float, float* %tmp5427, i64 1
+ %tmp5429 = getelementptr inbounds float, float* %tmp5428, i64 1
+ %tmp5430 = getelementptr inbounds float, float* %tmp5429, i64 1
+ %tmp5431 = getelementptr inbounds float, float* %tmp5430, i64 1
+ %tmp5432 = getelementptr inbounds float, float* %tmp5431, i64 1
+ %tmp5433 = getelementptr inbounds float, float* %tmp5432, i64 1
+ %tmp5434 = getelementptr inbounds float, float* %tmp5433, i64 1
+ %tmp5435 = getelementptr inbounds float, float* %tmp5434, i64 1
+ %tmp5436 = getelementptr inbounds float, float* %tmp5435, i64 1
+ %tmp5437 = getelementptr inbounds float, float* %tmp5436, i64 1
+ %tmp5438 = getelementptr inbounds float, float* %tmp5437, i64 1
+ %tmp5439 = getelementptr inbounds float, float* %tmp5438, i64 1
+ %tmp5440 = getelementptr inbounds float, float* %tmp5439, i64 1
+ %tmp5441 = getelementptr inbounds float, float* %tmp5440, i64 1
+ %tmp5442 = getelementptr inbounds float, float* %tmp5441, i64 1
+ %tmp5443 = getelementptr inbounds float, float* %tmp5442, i64 1
+ %tmp5444 = getelementptr inbounds float, float* %tmp5443, i64 1
+ %tmp5445 = getelementptr inbounds float, float* %tmp5444, i64 1
+ %tmp5446 = getelementptr inbounds float, float* %tmp5445, i64 1
+ %tmp5447 = getelementptr inbounds float, float* %tmp5446, i64 1
+ %tmp5448 = getelementptr inbounds float, float* %tmp5447, i64 1
+ %tmp5449 = getelementptr inbounds float, float* %tmp5448, i64 1
+ %tmp5450 = getelementptr inbounds float, float* %tmp5449, i64 1
+ %tmp5451 = getelementptr inbounds float, float* %tmp5450, i64 1
+ %tmp5452 = getelementptr inbounds float, float* %tmp5451, i64 1
+ %tmp5453 = getelementptr inbounds float, float* %tmp5452, i64 1
+ %tmp5454 = getelementptr inbounds float, float* %tmp5453, i64 1
+ %tmp5455 = getelementptr inbounds float, float* %tmp5454, i64 1
+ %tmp5456 = getelementptr inbounds float, float* %tmp5455, i64 1
+ %tmp5457 = getelementptr inbounds float, float* %tmp5456, i64 1
+ %tmp5458 = getelementptr inbounds float, float* %tmp5457, i64 1
+ %tmp5459 = getelementptr inbounds float, float* %tmp5458, i64 1
+ %tmp5460 = getelementptr inbounds float, float* %tmp5459, i64 1
+ %tmp5461 = getelementptr inbounds float, float* %tmp5460, i64 1
+ %tmp5462 = getelementptr inbounds float, float* %tmp5461, i64 1
+ %tmp5463 = getelementptr inbounds float, float* %tmp5462, i64 1
+ %tmp5464 = getelementptr inbounds float, float* %tmp5463, i64 1
+ %tmp5465 = getelementptr inbounds float, float* %tmp5464, i64 1
+ %tmp5466 = getelementptr inbounds float, float* %tmp5465, i64 1
+ %tmp5467 = getelementptr inbounds float, float* %tmp5466, i64 1
+ %tmp5468 = getelementptr inbounds float, float* %tmp5467, i64 1
+ %tmp5469 = getelementptr inbounds float, float* %tmp5468, i64 1
+ %tmp5470 = getelementptr inbounds float, float* %tmp5469, i64 1
+ %tmp5471 = getelementptr inbounds float, float* %tmp5470, i64 1
+ %tmp5472 = getelementptr inbounds float, float* %tmp5471, i64 1
+ %tmp5473 = getelementptr inbounds float, float* %tmp5472, i64 1
+ %tmp5474 = getelementptr inbounds float, float* %tmp5473, i64 1
+ %tmp5475 = getelementptr inbounds float, float* %tmp5474, i64 1
+ %tmp5476 = getelementptr inbounds float, float* %tmp5475, i64 1
+ %tmp5477 = getelementptr inbounds float, float* %tmp5476, i64 1
+ %tmp5478 = getelementptr inbounds float, float* %tmp5477, i64 1
+ %tmp5479 = getelementptr inbounds float, float* %tmp5478, i64 1
+ %tmp5480 = getelementptr inbounds float, float* %tmp5479, i64 1
+ %tmp5481 = getelementptr inbounds float, float* %tmp5480, i64 1
+ %tmp5482 = getelementptr inbounds float, float* %tmp5481, i64 1
+ %tmp5483 = getelementptr inbounds float, float* %tmp5482, i64 1
+ %tmp5484 = getelementptr inbounds float, float* %tmp5483, i64 1
+ %tmp5485 = getelementptr inbounds float, float* %tmp5484, i64 1
+ %tmp5486 = getelementptr inbounds float, float* %tmp5485, i64 1
+ %tmp5487 = getelementptr inbounds float, float* %tmp5486, i64 1
+ %tmp5488 = getelementptr inbounds float, float* %tmp5487, i64 1
+ %tmp5489 = getelementptr inbounds float, float* %tmp5488, i64 1
+ %tmp5490 = getelementptr inbounds float, float* %tmp5489, i64 1
+ %tmp5491 = getelementptr inbounds float, float* %tmp5490, i64 1
+ %tmp5492 = getelementptr inbounds float, float* %tmp5491, i64 1
+ %tmp5493 = getelementptr inbounds float, float* %tmp5492, i64 1
+ %tmp5494 = getelementptr inbounds float, float* %tmp5493, i64 1
+ %tmp5495 = getelementptr inbounds float, float* %tmp5494, i64 1
+ %tmp5496 = getelementptr inbounds float, float* %tmp5495, i64 1
+ %tmp5497 = getelementptr inbounds float, float* %tmp5496, i64 1
+ %tmp5498 = getelementptr inbounds float, float* %tmp5497, i64 1
+ %tmp5499 = getelementptr inbounds float, float* %tmp5498, i64 1
+ %tmp5500 = getelementptr inbounds float, float* %tmp5499, i64 1
+ %tmp5501 = getelementptr inbounds float, float* %tmp5500, i64 1
+ %tmp5502 = getelementptr inbounds float, float* %tmp5501, i64 1
+ %tmp5503 = getelementptr inbounds float, float* %tmp5502, i64 1
+ %tmp5504 = getelementptr inbounds float, float* %tmp5503, i64 1
+ %tmp5505 = getelementptr inbounds float, float* %tmp5504, i64 1
+ %tmp5506 = getelementptr inbounds float, float* %tmp5505, i64 1
+ %tmp5507 = getelementptr inbounds float, float* %tmp5506, i64 1
+ %tmp5508 = getelementptr inbounds float, float* %tmp5507, i64 1
+ %tmp5509 = getelementptr inbounds float, float* %tmp5508, i64 1
+ %tmp5510 = getelementptr inbounds float, float* %tmp5509, i64 1
+ %tmp5511 = getelementptr inbounds float, float* %tmp5510, i64 1
+ %tmp5512 = getelementptr inbounds float, float* %tmp5511, i64 1
+ %tmp5513 = getelementptr inbounds float, float* %tmp5512, i64 1
+ %tmp5514 = getelementptr inbounds float, float* %tmp5513, i64 1
+ %tmp5515 = getelementptr inbounds float, float* %tmp5514, i64 1
+ %tmp5516 = getelementptr inbounds float, float* %tmp5515, i64 1
+ %tmp5517 = getelementptr inbounds float, float* %tmp5516, i64 1
+ %tmp5518 = getelementptr inbounds float, float* %tmp5517, i64 1
+ %tmp5519 = getelementptr inbounds float, float* %tmp5518, i64 1
+ %tmp5520 = getelementptr inbounds float, float* %tmp5519, i64 1
+ %tmp5521 = getelementptr inbounds float, float* %tmp5520, i64 1
+ %tmp5522 = getelementptr inbounds float, float* %tmp5521, i64 1
+ %tmp5523 = getelementptr inbounds float, float* %tmp5522, i64 1
+ %tmp5524 = getelementptr inbounds float, float* %tmp5523, i64 1
+ %tmp5525 = getelementptr inbounds float, float* %tmp5524, i64 1
+ %tmp5526 = getelementptr inbounds float, float* %tmp5525, i64 1
+ %tmp5527 = getelementptr inbounds float, float* %tmp5526, i64 1
+ %tmp5528 = getelementptr inbounds float, float* %tmp5527, i64 1
+ %tmp5529 = getelementptr inbounds float, float* %tmp5528, i64 1
+ %tmp5530 = getelementptr inbounds float, float* %tmp5529, i64 1
+ %tmp5531 = getelementptr inbounds float, float* %tmp5530, i64 1
+ %tmp5532 = getelementptr inbounds float, float* %tmp5531, i64 1
+ %tmp5533 = getelementptr inbounds float, float* %tmp5532, i64 1
+ %tmp5534 = getelementptr inbounds float, float* %tmp5533, i64 1
+ %tmp5535 = getelementptr inbounds float, float* %tmp5534, i64 1
+ %tmp5536 = getelementptr inbounds float, float* %tmp5535, i64 1
+ %tmp5537 = getelementptr inbounds float, float* %tmp5536, i64 1
+ %tmp5538 = getelementptr inbounds float, float* %tmp5537, i64 1
+ %tmp5539 = getelementptr inbounds float, float* %tmp5538, i64 1
+ %tmp5540 = getelementptr inbounds float, float* %tmp5539, i64 1
+ %tmp5541 = getelementptr inbounds float, float* %tmp5540, i64 1
+ %tmp5542 = getelementptr inbounds float, float* %tmp5541, i64 1
+ %tmp5543 = getelementptr inbounds float, float* %tmp5542, i64 1
+ %tmp5544 = getelementptr inbounds float, float* %tmp5543, i64 1
+ %tmp5545 = getelementptr inbounds float, float* %tmp5544, i64 1
+ %tmp5546 = getelementptr inbounds float, float* %tmp5545, i64 1
+ %tmp5547 = getelementptr inbounds float, float* %tmp5546, i64 1
+ %tmp5548 = getelementptr inbounds float, float* %tmp5547, i64 1
+ %tmp5549 = getelementptr inbounds float, float* %tmp5548, i64 1
+ %tmp5550 = getelementptr inbounds float, float* %tmp5549, i64 1
+ %tmp5551 = getelementptr inbounds float, float* %tmp5550, i64 1
+ %tmp5552 = getelementptr inbounds float, float* %tmp5551, i64 1
+ %tmp5553 = getelementptr inbounds float, float* %tmp5552, i64 1
+ %tmp5554 = getelementptr inbounds float, float* %tmp5553, i64 1
+ %tmp5555 = getelementptr inbounds float, float* %tmp5554, i64 1
+ %tmp5556 = getelementptr inbounds float, float* %tmp5555, i64 1
+ %tmp5557 = getelementptr inbounds float, float* %tmp5556, i64 1
+ %tmp5558 = getelementptr inbounds float, float* %tmp5557, i64 1
+ %tmp5559 = getelementptr inbounds float, float* %tmp5558, i64 1
+ %tmp5560 = getelementptr inbounds float, float* %tmp5559, i64 1
+ %tmp5561 = getelementptr inbounds float, float* %tmp5560, i64 1
+ %tmp5562 = getelementptr inbounds float, float* %tmp5561, i64 1
+ %tmp5563 = getelementptr inbounds float, float* %tmp5562, i64 1
+ %tmp5564 = getelementptr inbounds float, float* %tmp5563, i64 1
+ %tmp5565 = getelementptr inbounds float, float* %tmp5564, i64 1
+ %tmp5566 = getelementptr inbounds float, float* %tmp5565, i64 1
+ %tmp5567 = getelementptr inbounds float, float* %tmp5566, i64 1
+ %tmp5568 = getelementptr inbounds float, float* %tmp5567, i64 1
+ %tmp5569 = getelementptr inbounds float, float* %tmp5568, i64 1
+ %tmp5570 = getelementptr inbounds float, float* %tmp5569, i64 1
+ %tmp5571 = getelementptr inbounds float, float* %tmp5570, i64 1
+ %tmp5572 = getelementptr inbounds float, float* %tmp5571, i64 1
+ %tmp5573 = getelementptr inbounds float, float* %tmp5572, i64 1
+ %tmp5574 = getelementptr inbounds float, float* %tmp5573, i64 1
+ %tmp5575 = getelementptr inbounds float, float* %tmp5574, i64 1
+ %tmp5576 = getelementptr inbounds float, float* %tmp5575, i64 1
+ %tmp5577 = getelementptr inbounds float, float* %tmp5576, i64 1
+ %tmp5578 = getelementptr inbounds float, float* %tmp5577, i64 1
+ %tmp5579 = getelementptr inbounds float, float* %tmp5578, i64 1
+ %tmp5580 = getelementptr inbounds float, float* %tmp5579, i64 1
+ %tmp5581 = getelementptr inbounds float, float* %tmp5580, i64 1
+ %tmp5582 = getelementptr inbounds float, float* %tmp5581, i64 1
+ %tmp5583 = getelementptr inbounds float, float* %tmp5582, i64 1
+ %tmp5584 = getelementptr inbounds float, float* %tmp5583, i64 1
+ %tmp5585 = getelementptr inbounds float, float* %tmp5584, i64 1
+ %tmp5586 = getelementptr inbounds float, float* %tmp5585, i64 1
+ %tmp5587 = getelementptr inbounds float, float* %tmp5586, i64 1
+ %tmp5588 = getelementptr inbounds float, float* %tmp5587, i64 1
+ %tmp5589 = getelementptr inbounds float, float* %tmp5588, i64 1
+ %tmp5590 = getelementptr inbounds float, float* %tmp5589, i64 1
+ %tmp5591 = getelementptr inbounds float, float* %tmp5590, i64 1
+ %tmp5592 = getelementptr inbounds float, float* %tmp5591, i64 1
+ %tmp5593 = getelementptr inbounds float, float* %tmp5592, i64 1
+ %tmp5594 = getelementptr inbounds float, float* %tmp5593, i64 1
+ %tmp5595 = getelementptr inbounds float, float* %tmp5594, i64 1
+ %tmp5596 = getelementptr inbounds float, float* %tmp5595, i64 1
+ %tmp5597 = getelementptr inbounds float, float* %tmp5596, i64 1
+ %tmp5598 = getelementptr inbounds float, float* %tmp5597, i64 1
+ %tmp5599 = getelementptr inbounds float, float* %tmp5598, i64 1
+ %tmp5600 = getelementptr inbounds float, float* %tmp5599, i64 1
+ %tmp5601 = getelementptr inbounds float, float* %tmp5600, i64 1
+ %tmp5602 = getelementptr inbounds float, float* %tmp5601, i64 1
+ %tmp5603 = getelementptr inbounds float, float* %tmp5602, i64 1
+ %tmp5604 = getelementptr inbounds float, float* %tmp5603, i64 1
+ %tmp5605 = getelementptr inbounds float, float* %tmp5604, i64 1
+ %tmp5606 = getelementptr inbounds float, float* %tmp5605, i64 1
+ %tmp5607 = getelementptr inbounds float, float* %tmp5606, i64 1
+ %tmp5608 = getelementptr inbounds float, float* %tmp5607, i64 1
+ %tmp5609 = getelementptr inbounds float, float* %tmp5608, i64 1
+ %tmp5610 = getelementptr inbounds float, float* %tmp5609, i64 1
+ %tmp5611 = getelementptr inbounds float, float* %tmp5610, i64 1
+ %tmp5612 = getelementptr inbounds float, float* %tmp5611, i64 1
+ %tmp5613 = getelementptr inbounds float, float* %tmp5612, i64 1
+ %tmp5614 = getelementptr inbounds float, float* %tmp5613, i64 1
+ %tmp5615 = getelementptr inbounds float, float* %tmp5614, i64 1
+ %tmp5616 = getelementptr inbounds float, float* %tmp5615, i64 1
+ %tmp5617 = getelementptr inbounds float, float* %tmp5616, i64 1
+ %tmp5618 = getelementptr inbounds float, float* %tmp5617, i64 1
+ %tmp5619 = getelementptr inbounds float, float* %tmp5618, i64 1
+ %tmp5620 = getelementptr inbounds float, float* %tmp5619, i64 1
+ %tmp5621 = getelementptr inbounds float, float* %tmp5620, i64 1
+ %tmp5622 = getelementptr inbounds float, float* %tmp5621, i64 1
+ %tmp5623 = getelementptr inbounds float, float* %tmp5622, i64 1
+ %tmp5624 = getelementptr inbounds float, float* %tmp5623, i64 1
+ %tmp5625 = getelementptr inbounds float, float* %tmp5624, i64 1
+ %tmp5626 = getelementptr inbounds float, float* %tmp5625, i64 1
+ %tmp5627 = getelementptr inbounds float, float* %tmp5626, i64 1
+ %tmp5628 = getelementptr inbounds float, float* %tmp5627, i64 1
+ %tmp5629 = getelementptr inbounds float, float* %tmp5628, i64 1
+ %tmp5630 = getelementptr inbounds float, float* %tmp5629, i64 1
+ %tmp5631 = getelementptr inbounds float, float* %tmp5630, i64 1
+ %tmp5632 = getelementptr inbounds float, float* %tmp5631, i64 1
+ %tmp5633 = getelementptr inbounds float, float* %tmp5632, i64 1
+ %tmp5634 = getelementptr inbounds float, float* %tmp5633, i64 1
+ %tmp5635 = getelementptr inbounds float, float* %tmp5634, i64 1
+ %tmp5636 = getelementptr inbounds float, float* %tmp5635, i64 1
+ %tmp5637 = getelementptr inbounds float, float* %tmp5636, i64 1
+ %tmp5638 = getelementptr inbounds float, float* %tmp5637, i64 1
+ %tmp5639 = getelementptr inbounds float, float* %tmp5638, i64 1
+ %tmp5640 = getelementptr inbounds float, float* %tmp5639, i64 1
+ %tmp5641 = getelementptr inbounds float, float* %tmp5640, i64 1
+ %tmp5642 = getelementptr inbounds float, float* %tmp5641, i64 1
+ %tmp5643 = getelementptr inbounds float, float* %tmp5642, i64 1
+ %tmp5644 = getelementptr inbounds float, float* %tmp5643, i64 1
+ %tmp5645 = getelementptr inbounds float, float* %tmp5644, i64 1
+ %tmp5646 = getelementptr inbounds float, float* %tmp5645, i64 1
+ %tmp5647 = getelementptr inbounds float, float* %tmp5646, i64 1
+ %tmp5648 = getelementptr inbounds float, float* %tmp5647, i64 1
+ %tmp5649 = getelementptr inbounds float, float* %tmp5648, i64 1
+ %tmp5650 = getelementptr inbounds float, float* %tmp5649, i64 1
+ %tmp5651 = getelementptr inbounds float, float* %tmp5650, i64 1
+ %tmp5652 = getelementptr inbounds float, float* %tmp5651, i64 1
+ %tmp5653 = getelementptr inbounds float, float* %tmp5652, i64 1
+ %tmp5654 = getelementptr inbounds float, float* %tmp5653, i64 1
+ %tmp5655 = getelementptr inbounds float, float* %tmp5654, i64 1
+ %tmp5656 = getelementptr inbounds float, float* %tmp5655, i64 1
+ %tmp5657 = getelementptr inbounds float, float* %tmp5656, i64 1
+ %tmp5658 = getelementptr inbounds float, float* %tmp5657, i64 1
+ %tmp5659 = getelementptr inbounds float, float* %tmp5658, i64 1
+ %tmp5660 = getelementptr inbounds float, float* %tmp5659, i64 1
+ %tmp5661 = getelementptr inbounds float, float* %tmp5660, i64 1
+ %tmp5662 = getelementptr inbounds float, float* %tmp5661, i64 1
+ %tmp5663 = getelementptr inbounds float, float* %tmp5662, i64 1
+ %tmp5664 = getelementptr inbounds float, float* %tmp5663, i64 1
+ %tmp5665 = getelementptr inbounds float, float* %tmp5664, i64 1
+ %tmp5666 = getelementptr inbounds float, float* %tmp5665, i64 1
+ %tmp5667 = getelementptr inbounds float, float* %tmp5666, i64 1
+ %tmp5668 = getelementptr inbounds float, float* %tmp5667, i64 1
+ %tmp5669 = getelementptr inbounds float, float* %tmp5668, i64 1
+ %tmp5670 = getelementptr inbounds float, float* %tmp5669, i64 1
+ %tmp5671 = getelementptr inbounds float, float* %tmp5670, i64 1
+ %tmp5672 = getelementptr inbounds float, float* %tmp5671, i64 1
+ %tmp5673 = getelementptr inbounds float, float* %tmp5672, i64 1
+ %tmp5674 = getelementptr inbounds float, float* %tmp5673, i64 1
+ %tmp5675 = getelementptr inbounds float, float* %tmp5674, i64 1
+ %tmp5676 = getelementptr inbounds float, float* %tmp5675, i64 1
+ %tmp5677 = getelementptr inbounds float, float* %tmp5676, i64 1
+ %tmp5678 = getelementptr inbounds float, float* %tmp5677, i64 1
+ %tmp5679 = getelementptr inbounds float, float* %tmp5678, i64 1
+ %tmp5680 = getelementptr inbounds float, float* %tmp5679, i64 1
+ %tmp5681 = getelementptr inbounds float, float* %tmp5680, i64 1
+ %tmp5682 = getelementptr inbounds float, float* %tmp5681, i64 1
+ %tmp5683 = getelementptr inbounds float, float* %tmp5682, i64 1
+ %tmp5684 = getelementptr inbounds float, float* %tmp5683, i64 1
+ %tmp5685 = getelementptr inbounds float, float* %tmp5684, i64 1
+ %tmp5686 = getelementptr inbounds float, float* %tmp5685, i64 1
+ %tmp5687 = getelementptr inbounds float, float* %tmp5686, i64 1
+ %tmp5688 = getelementptr inbounds float, float* %tmp5687, i64 1
+ %tmp5689 = getelementptr inbounds float, float* %tmp5688, i64 1
+ %tmp5690 = getelementptr inbounds float, float* %tmp5689, i64 1
+ %tmp5691 = getelementptr inbounds float, float* %tmp5690, i64 1
+ %tmp5692 = getelementptr inbounds float, float* %tmp5691, i64 1
+ %tmp5693 = getelementptr inbounds float, float* %tmp5692, i64 1
+ %tmp5694 = getelementptr inbounds float, float* %tmp5693, i64 1
+ %tmp5695 = getelementptr inbounds float, float* %tmp5694, i64 1
+ %tmp5696 = getelementptr inbounds float, float* %tmp5695, i64 1
+ %tmp5697 = getelementptr inbounds float, float* %tmp5696, i64 1
+ %tmp5698 = getelementptr inbounds float, float* %tmp5697, i64 1
+ %tmp5699 = getelementptr inbounds float, float* %tmp5698, i64 1
+ %tmp5700 = getelementptr inbounds float, float* %tmp5699, i64 1
+ %tmp5701 = getelementptr inbounds float, float* %tmp5700, i64 1
+ %tmp5702 = getelementptr inbounds float, float* %tmp5701, i64 1
+ %tmp5703 = getelementptr inbounds float, float* %tmp5702, i64 1
+ %tmp5704 = getelementptr inbounds float, float* %tmp5703, i64 1
+ %tmp5705 = getelementptr inbounds float, float* %tmp5704, i64 1
+ %tmp5706 = getelementptr inbounds float, float* %tmp5705, i64 1
+ %tmp5707 = getelementptr inbounds float, float* %tmp5706, i64 1
+ %tmp5708 = getelementptr inbounds float, float* %tmp5707, i64 1
+ %tmp5709 = getelementptr inbounds float, float* %tmp5708, i64 1
+ %tmp5710 = getelementptr inbounds float, float* %tmp5709, i64 1
+ %tmp5711 = getelementptr inbounds float, float* %tmp5710, i64 1
+ %tmp5712 = getelementptr inbounds float, float* %tmp5711, i64 1
+ %tmp5713 = getelementptr inbounds float, float* %tmp5712, i64 1
+ %tmp5714 = getelementptr inbounds float, float* %tmp5713, i64 1
+ %tmp5715 = getelementptr inbounds float, float* %tmp5714, i64 1
+ %tmp5716 = getelementptr inbounds float, float* %tmp5715, i64 1
+ %tmp5717 = getelementptr inbounds float, float* %tmp5716, i64 1
+ %tmp5718 = getelementptr inbounds float, float* %tmp5717, i64 1
+ %tmp5719 = getelementptr inbounds float, float* %tmp5718, i64 1
+ %tmp5720 = getelementptr inbounds float, float* %tmp5719, i64 1
+ %tmp5721 = getelementptr inbounds float, float* %tmp5720, i64 1
+ %tmp5722 = getelementptr inbounds float, float* %tmp5721, i64 1
+ %tmp5723 = getelementptr inbounds float, float* %tmp5722, i64 1
+ %tmp5724 = getelementptr inbounds float, float* %tmp5723, i64 1
+ %tmp5725 = getelementptr inbounds float, float* %tmp5724, i64 1
+ %tmp5726 = getelementptr inbounds float, float* %tmp5725, i64 1
+ %tmp5727 = getelementptr inbounds float, float* %tmp5726, i64 1
+ %tmp5728 = getelementptr inbounds float, float* %tmp5727, i64 1
+ %tmp5729 = getelementptr inbounds float, float* %tmp5728, i64 1
+ %tmp5730 = getelementptr inbounds float, float* %tmp5729, i64 1
+ %tmp5731 = getelementptr inbounds float, float* %tmp5730, i64 1
+ %tmp5732 = getelementptr inbounds float, float* %tmp5731, i64 1
+ %tmp5733 = getelementptr inbounds float, float* %tmp5732, i64 1
+ %tmp5734 = getelementptr inbounds float, float* %tmp5733, i64 1
+ %tmp5735 = getelementptr inbounds float, float* %tmp5734, i64 1
+ %tmp5736 = getelementptr inbounds float, float* %tmp5735, i64 1
+ %tmp5737 = getelementptr inbounds float, float* %tmp5736, i64 1
+ %tmp5738 = getelementptr inbounds float, float* %tmp5737, i64 1
+ %tmp5739 = getelementptr inbounds float, float* %tmp5738, i64 1
+ %tmp5740 = getelementptr inbounds float, float* %tmp5739, i64 1
+ %tmp5741 = getelementptr inbounds float, float* %tmp5740, i64 1
+ %tmp5742 = getelementptr inbounds float, float* %tmp5741, i64 1
+ %tmp5743 = getelementptr inbounds float, float* %tmp5742, i64 1
+ %tmp5744 = getelementptr inbounds float, float* %tmp5743, i64 1
+ %tmp5745 = getelementptr inbounds float, float* %tmp5744, i64 1
+ %tmp5746 = getelementptr inbounds float, float* %tmp5745, i64 1
+ %tmp5747 = getelementptr inbounds float, float* %tmp5746, i64 1
+ %tmp5748 = getelementptr inbounds float, float* %tmp5747, i64 1
+ %tmp5749 = getelementptr inbounds float, float* %tmp5748, i64 1
+ %tmp5750 = getelementptr inbounds float, float* %tmp5749, i64 1
+ %tmp5751 = getelementptr inbounds float, float* %tmp5750, i64 1
+ %tmp5752 = getelementptr inbounds float, float* %tmp5751, i64 1
+ %tmp5753 = getelementptr inbounds float, float* %tmp5752, i64 1
+ %tmp5754 = getelementptr inbounds float, float* %tmp5753, i64 1
+ %tmp5755 = getelementptr inbounds float, float* %tmp5754, i64 1
+ %tmp5756 = getelementptr inbounds float, float* %tmp5755, i64 1
+ %tmp5757 = getelementptr inbounds float, float* %tmp5756, i64 1
+ %tmp5758 = getelementptr inbounds float, float* %tmp5757, i64 1
+ %tmp5759 = getelementptr inbounds float, float* %tmp5758, i64 1
+ %tmp5760 = getelementptr inbounds float, float* %tmp5759, i64 1
+ %tmp5761 = getelementptr inbounds float, float* %tmp5760, i64 1
+ %tmp5762 = getelementptr inbounds float, float* %tmp5761, i64 1
+ %tmp5763 = getelementptr inbounds float, float* %tmp5762, i64 1
+ %tmp5764 = getelementptr inbounds float, float* %tmp5763, i64 1
+ %tmp5765 = getelementptr inbounds float, float* %tmp5764, i64 1
+ %tmp5766 = getelementptr inbounds float, float* %tmp5765, i64 1
+ %tmp5767 = getelementptr inbounds float, float* %tmp5766, i64 1
+ %tmp5768 = getelementptr inbounds float, float* %tmp5767, i64 1
+ %tmp5769 = getelementptr inbounds float, float* %tmp5768, i64 1
+ %tmp5770 = getelementptr inbounds float, float* %tmp5769, i64 1
+ %tmp5771 = getelementptr inbounds float, float* %tmp5770, i64 1
+ %tmp5772 = getelementptr inbounds float, float* %tmp5771, i64 1
+ %tmp5773 = getelementptr inbounds float, float* %tmp5772, i64 1
+ %tmp5774 = getelementptr inbounds float, float* %tmp5773, i64 1
+ %tmp5775 = getelementptr inbounds float, float* %tmp5774, i64 1
+ %tmp5776 = getelementptr inbounds float, float* %tmp5775, i64 1
+ %tmp5777 = getelementptr inbounds float, float* %tmp5776, i64 1
+ %tmp5778 = getelementptr inbounds float, float* %tmp5777, i64 1
+ %tmp5779 = getelementptr inbounds float, float* %tmp5778, i64 1
+ %tmp5780 = getelementptr inbounds float, float* %tmp5779, i64 1
+ %tmp5781 = getelementptr inbounds float, float* %tmp5780, i64 1
+ %tmp5782 = getelementptr inbounds float, float* %tmp5781, i64 1
+ %tmp5783 = getelementptr inbounds float, float* %tmp5782, i64 1
+ %tmp5784 = getelementptr inbounds float, float* %tmp5783, i64 1
+ %tmp5785 = getelementptr inbounds float, float* %tmp5784, i64 1
+ %tmp5786 = getelementptr inbounds float, float* %tmp5785, i64 1
+ %tmp5787 = getelementptr inbounds float, float* %tmp5786, i64 1
+ %tmp5788 = getelementptr inbounds float, float* %tmp5787, i64 1
+ %tmp5789 = getelementptr inbounds float, float* %tmp5788, i64 1
+ %tmp5790 = getelementptr inbounds float, float* %tmp5789, i64 1
+ %tmp5791 = getelementptr inbounds float, float* %tmp5790, i64 1
+ %tmp5792 = getelementptr inbounds float, float* %tmp5791, i64 1
+ %tmp5793 = getelementptr inbounds float, float* %tmp5792, i64 1
+ %tmp5794 = getelementptr inbounds float, float* %tmp5793, i64 1
+ %tmp5795 = getelementptr inbounds float, float* %tmp5794, i64 1
+ %tmp5796 = getelementptr inbounds float, float* %tmp5795, i64 1
+ %tmp5797 = getelementptr inbounds float, float* %tmp5796, i64 1
+ %tmp5798 = getelementptr inbounds float, float* %tmp5797, i64 1
+ %tmp5799 = getelementptr inbounds float, float* %tmp5798, i64 1
+ %tmp5800 = getelementptr inbounds float, float* %tmp5799, i64 1
+ %tmp5801 = getelementptr inbounds float, float* %tmp5800, i64 1
+ %tmp5802 = getelementptr inbounds float, float* %tmp5801, i64 1
+ %tmp5803 = getelementptr inbounds float, float* %tmp5802, i64 1
+ %tmp5804 = getelementptr inbounds float, float* %tmp5803, i64 1
+ %tmp5805 = getelementptr inbounds float, float* %tmp5804, i64 1
+ %tmp5806 = getelementptr inbounds float, float* %tmp5805, i64 1
+ %tmp5807 = getelementptr inbounds float, float* %tmp5806, i64 1
+ %tmp5808 = getelementptr inbounds float, float* %tmp5807, i64 1
+ %tmp5809 = getelementptr inbounds float, float* %tmp5808, i64 1
+ %tmp5810 = getelementptr inbounds float, float* %tmp5809, i64 1
+ %tmp5811 = getelementptr inbounds float, float* %tmp5810, i64 1
+ %tmp5812 = getelementptr inbounds float, float* %tmp5811, i64 1
+ %tmp5813 = getelementptr inbounds float, float* %tmp5812, i64 1
+ %tmp5814 = getelementptr inbounds float, float* %tmp5813, i64 1
+ %tmp5815 = getelementptr inbounds float, float* %tmp5814, i64 1
+ %tmp5816 = getelementptr inbounds float, float* %tmp5815, i64 1
+ %tmp5817 = getelementptr inbounds float, float* %tmp5816, i64 1
+ %tmp5818 = getelementptr inbounds float, float* %tmp5817, i64 1
+ %tmp5819 = getelementptr inbounds float, float* %tmp5818, i64 1
+ %tmp5820 = getelementptr inbounds float, float* %tmp5819, i64 1
+ %tmp5821 = getelementptr inbounds float, float* %tmp5820, i64 1
+ %tmp5822 = getelementptr inbounds float, float* %tmp5821, i64 1
+ %tmp5823 = getelementptr inbounds float, float* %tmp5822, i64 1
+ %tmp5824 = getelementptr inbounds float, float* %tmp5823, i64 1
+ %tmp5825 = getelementptr inbounds float, float* %tmp5824, i64 1
+ %tmp5826 = getelementptr inbounds float, float* %tmp5825, i64 1
+ %tmp5827 = getelementptr inbounds float, float* %tmp5826, i64 1
+ %tmp5828 = getelementptr inbounds float, float* %tmp5827, i64 1
+ %tmp5829 = getelementptr inbounds float, float* %tmp5828, i64 1
+ %tmp5830 = getelementptr inbounds float, float* %tmp5829, i64 1
+ %tmp5831 = getelementptr inbounds float, float* %tmp5830, i64 1
+ %tmp5832 = getelementptr inbounds float, float* %tmp5831, i64 1
+ %tmp5833 = getelementptr inbounds float, float* %tmp5832, i64 1
+ %tmp5834 = getelementptr inbounds float, float* %tmp5833, i64 1
+ %tmp5835 = getelementptr inbounds float, float* %tmp5834, i64 1
+ %tmp5836 = getelementptr inbounds float, float* %tmp5835, i64 1
+ %tmp5837 = getelementptr inbounds float, float* %tmp5836, i64 1
+ %tmp5838 = getelementptr inbounds float, float* %tmp5837, i64 1
+ %tmp5839 = getelementptr inbounds float, float* %tmp5838, i64 1
+ %tmp5840 = getelementptr inbounds float, float* %tmp5839, i64 1
+ %tmp5841 = getelementptr inbounds float, float* %tmp5840, i64 1
+ %tmp5842 = getelementptr inbounds float, float* %tmp5841, i64 1
+ %tmp5843 = getelementptr inbounds float, float* %tmp5842, i64 1
+ %tmp5844 = getelementptr inbounds float, float* %tmp5843, i64 1
+ %tmp5845 = getelementptr inbounds float, float* %tmp5844, i64 1
+ %tmp5846 = getelementptr inbounds float, float* %tmp5845, i64 1
+ %tmp5847 = getelementptr inbounds float, float* %tmp5846, i64 1
+ %tmp5848 = getelementptr inbounds float, float* %tmp5847, i64 1
+ %tmp5849 = getelementptr inbounds float, float* %tmp5848, i64 1
+ %tmp5850 = getelementptr inbounds float, float* %tmp5849, i64 1
+ %tmp5851 = getelementptr inbounds float, float* %tmp5850, i64 1
+ %tmp5852 = getelementptr inbounds float, float* %tmp5851, i64 1
+ %tmp5853 = getelementptr inbounds float, float* %tmp5852, i64 1
+ %tmp5854 = getelementptr inbounds float, float* %tmp5853, i64 1
+ %tmp5855 = getelementptr inbounds float, float* %tmp5854, i64 1
+ %tmp5856 = getelementptr inbounds float, float* %tmp5855, i64 1
+ %tmp5857 = getelementptr inbounds float, float* %tmp5856, i64 1
+ %tmp5858 = getelementptr inbounds float, float* %tmp5857, i64 1
+ %tmp5859 = getelementptr inbounds float, float* %tmp5858, i64 1
+ %tmp5860 = getelementptr inbounds float, float* %tmp5859, i64 1
+ %tmp5861 = getelementptr inbounds float, float* %tmp5860, i64 1
+ %tmp5862 = getelementptr inbounds float, float* %tmp5861, i64 1
+ %tmp5863 = getelementptr inbounds float, float* %tmp5862, i64 1
+ %tmp5864 = getelementptr inbounds float, float* %tmp5863, i64 1
+ %tmp5865 = getelementptr inbounds float, float* %tmp5864, i64 1
+ %tmp5866 = getelementptr inbounds float, float* %tmp5865, i64 1
+ %tmp5867 = getelementptr inbounds float, float* %tmp5866, i64 1
+ %tmp5868 = getelementptr inbounds float, float* %tmp5867, i64 1
+ %tmp5869 = getelementptr inbounds float, float* %tmp5868, i64 1
+ %tmp5870 = getelementptr inbounds float, float* %tmp5869, i64 1
+ %tmp5871 = getelementptr inbounds float, float* %tmp5870, i64 1
+ %tmp5872 = getelementptr inbounds float, float* %tmp5871, i64 1
+ %tmp5873 = getelementptr inbounds float, float* %tmp5872, i64 1
+ %tmp5874 = getelementptr inbounds float, float* %tmp5873, i64 1
+ %tmp5875 = getelementptr inbounds float, float* %tmp5874, i64 1
+ %tmp5876 = getelementptr inbounds float, float* %tmp5875, i64 1
+ %tmp5877 = getelementptr inbounds float, float* %tmp5876, i64 1
+ %tmp5878 = getelementptr inbounds float, float* %tmp5877, i64 1
+ %tmp5879 = getelementptr inbounds float, float* %tmp5878, i64 1
+ %tmp5880 = getelementptr inbounds float, float* %tmp5879, i64 1
+ %tmp5881 = getelementptr inbounds float, float* %tmp5880, i64 1
+ %tmp5882 = getelementptr inbounds float, float* %tmp5881, i64 1
+ %tmp5883 = getelementptr inbounds float, float* %tmp5882, i64 1
+ %tmp5884 = getelementptr inbounds float, float* %tmp5883, i64 1
+ %tmp5885 = getelementptr inbounds float, float* %tmp5884, i64 1
+ %tmp5886 = getelementptr inbounds float, float* %tmp5885, i64 1
+ %tmp5887 = getelementptr inbounds float, float* %tmp5886, i64 1
+ %tmp5888 = getelementptr inbounds float, float* %tmp5887, i64 1
+ %tmp5889 = getelementptr inbounds float, float* %tmp5888, i64 1
+ %tmp5890 = getelementptr inbounds float, float* %tmp5889, i64 1
+ %tmp5891 = getelementptr inbounds float, float* %tmp5890, i64 1
+ %tmp5892 = getelementptr inbounds float, float* %tmp5891, i64 1
+ %tmp5893 = getelementptr inbounds float, float* %tmp5892, i64 1
+ %tmp5894 = getelementptr inbounds float, float* %tmp5893, i64 1
+ %tmp5895 = getelementptr inbounds float, float* %tmp5894, i64 1
+ %tmp5896 = getelementptr inbounds float, float* %tmp5895, i64 1
+ %tmp5897 = getelementptr inbounds float, float* %tmp5896, i64 1
+ %tmp5898 = getelementptr inbounds float, float* %tmp5897, i64 1
+ %tmp5899 = getelementptr inbounds float, float* %tmp5898, i64 1
+ %tmp5900 = getelementptr inbounds float, float* %tmp5899, i64 1
+ %tmp5901 = getelementptr inbounds float, float* %tmp5900, i64 1
+ %tmp5902 = getelementptr inbounds float, float* %tmp5901, i64 1
+ %tmp5903 = getelementptr inbounds float, float* %tmp5902, i64 1
+ %tmp5904 = getelementptr inbounds float, float* %tmp5903, i64 1
+ %tmp5905 = getelementptr inbounds float, float* %tmp5904, i64 1
+ %tmp5906 = getelementptr inbounds float, float* %tmp5905, i64 1
+ %tmp5907 = getelementptr inbounds float, float* %tmp5906, i64 1
+ %tmp5908 = getelementptr inbounds float, float* %tmp5907, i64 1
+ %tmp5909 = getelementptr inbounds float, float* %tmp5908, i64 1
+ %tmp5910 = getelementptr inbounds float, float* %tmp5909, i64 1
+ %tmp5911 = getelementptr inbounds float, float* %tmp5910, i64 1
+ %tmp5912 = getelementptr inbounds float, float* %tmp5911, i64 1
+ %tmp5913 = getelementptr inbounds float, float* %tmp5912, i64 1
+ %tmp5914 = getelementptr inbounds float, float* %tmp5913, i64 1
+ %tmp5915 = getelementptr inbounds float, float* %tmp5914, i64 1
+ %tmp5916 = getelementptr inbounds float, float* %tmp5915, i64 1
+ %tmp5917 = getelementptr inbounds float, float* %tmp5916, i64 1
+ %tmp5918 = getelementptr inbounds float, float* %tmp5917, i64 1
+ %tmp5919 = getelementptr inbounds float, float* %tmp5918, i64 1
+ %tmp5920 = getelementptr inbounds float, float* %tmp5919, i64 1
+ %tmp5921 = getelementptr inbounds float, float* %tmp5920, i64 1
+ %tmp5922 = getelementptr inbounds float, float* %tmp5921, i64 1
+ %tmp5923 = getelementptr inbounds float, float* %tmp5922, i64 1
+ %tmp5924 = getelementptr inbounds float, float* %tmp5923, i64 1
+ %tmp5925 = getelementptr inbounds float, float* %tmp5924, i64 1
+ %tmp5926 = getelementptr inbounds float, float* %tmp5925, i64 1
+ %tmp5927 = getelementptr inbounds float, float* %tmp5926, i64 1
+ %tmp5928 = getelementptr inbounds float, float* %tmp5927, i64 1
+ %tmp5929 = getelementptr inbounds float, float* %tmp5928, i64 1
+ %tmp5930 = getelementptr inbounds float, float* %tmp5929, i64 1
+ %tmp5931 = getelementptr inbounds float, float* %tmp5930, i64 1
+ %tmp5932 = getelementptr inbounds float, float* %tmp5931, i64 1
+ %tmp5933 = getelementptr inbounds float, float* %tmp5932, i64 1
+ %tmp5934 = getelementptr inbounds float, float* %tmp5933, i64 1
+ %tmp5935 = getelementptr inbounds float, float* %tmp5934, i64 1
+ %tmp5936 = getelementptr inbounds float, float* %tmp5935, i64 1
+ %tmp5937 = getelementptr inbounds float, float* %tmp5936, i64 1
+ %tmp5938 = getelementptr inbounds float, float* %tmp5937, i64 1
+ %tmp5939 = getelementptr inbounds float, float* %tmp5938, i64 1
+ %tmp5940 = getelementptr inbounds float, float* %tmp5939, i64 1
+ %tmp5941 = getelementptr inbounds float, float* %tmp5940, i64 1
+ %tmp5942 = getelementptr inbounds float, float* %tmp5941, i64 1
+ %tmp5943 = getelementptr inbounds float, float* %tmp5942, i64 1
+ %tmp5944 = getelementptr inbounds float, float* %tmp5943, i64 1
+ %tmp5945 = getelementptr inbounds float, float* %tmp5944, i64 1
+ %tmp5946 = getelementptr inbounds float, float* %tmp5945, i64 1
+ %tmp5947 = getelementptr inbounds float, float* %tmp5946, i64 1
+ %tmp5948 = getelementptr inbounds float, float* %tmp5947, i64 1
+ %tmp5949 = getelementptr inbounds float, float* %tmp5948, i64 1
+ %tmp5950 = getelementptr inbounds float, float* %tmp5949, i64 1
+ %tmp5951 = getelementptr inbounds float, float* %tmp5950, i64 1
+ %tmp5952 = getelementptr inbounds float, float* %tmp5951, i64 1
+ %tmp5953 = getelementptr inbounds float, float* %tmp5952, i64 1
+ %tmp5954 = getelementptr inbounds float, float* %tmp5953, i64 1
+ %tmp5955 = getelementptr inbounds float, float* %tmp5954, i64 1
+ %tmp5956 = getelementptr inbounds float, float* %tmp5955, i64 1
+ %tmp5957 = getelementptr inbounds float, float* %tmp5956, i64 1
+ %tmp5958 = getelementptr inbounds float, float* %tmp5957, i64 1
+ %tmp5959 = getelementptr inbounds float, float* %tmp5958, i64 1
+ %tmp5960 = getelementptr inbounds float, float* %tmp5959, i64 1
+ %tmp5961 = getelementptr inbounds float, float* %tmp5960, i64 1
+ %tmp5962 = getelementptr inbounds float, float* %tmp5961, i64 1
+ %tmp5963 = getelementptr inbounds float, float* %tmp5962, i64 1
+ %tmp5964 = getelementptr inbounds float, float* %tmp5963, i64 1
+ %tmp5965 = getelementptr inbounds float, float* %tmp5964, i64 1
+ %tmp5966 = getelementptr inbounds float, float* %tmp5965, i64 1
+ %tmp5967 = getelementptr inbounds float, float* %tmp5966, i64 1
+ %tmp5968 = getelementptr inbounds float, float* %tmp5967, i64 1
+ %tmp5969 = getelementptr inbounds float, float* %tmp5968, i64 1
+ %tmp5970 = getelementptr inbounds float, float* %tmp5969, i64 1
+ %tmp5971 = getelementptr inbounds float, float* %tmp5970, i64 1
+ %tmp5972 = getelementptr inbounds float, float* %tmp5971, i64 1
+ %tmp5973 = getelementptr inbounds float, float* %tmp5972, i64 1
+ %tmp5974 = getelementptr inbounds float, float* %tmp5973, i64 1
+ %tmp5975 = getelementptr inbounds float, float* %tmp5974, i64 1
+ %tmp5976 = getelementptr inbounds float, float* %tmp5975, i64 1
+ %tmp5977 = getelementptr inbounds float, float* %tmp5976, i64 1
+ %tmp5978 = getelementptr inbounds float, float* %tmp5977, i64 1
+ %tmp5979 = getelementptr inbounds float, float* %tmp5978, i64 1
+ %tmp5980 = getelementptr inbounds float, float* %tmp5979, i64 1
+ %tmp5981 = getelementptr inbounds float, float* %tmp5980, i64 1
+ %tmp5982 = getelementptr inbounds float, float* %tmp5981, i64 1
+ %tmp5983 = getelementptr inbounds float, float* %tmp5982, i64 1
+ %tmp5984 = getelementptr inbounds float, float* %tmp5983, i64 1
+ %tmp5985 = getelementptr inbounds float, float* %tmp5984, i64 1
+ %tmp5986 = getelementptr inbounds float, float* %tmp5985, i64 1
+ %tmp5987 = getelementptr inbounds float, float* %tmp5986, i64 1
+ %tmp5988 = getelementptr inbounds float, float* %tmp5987, i64 1
+ %tmp5989 = getelementptr inbounds float, float* %tmp5988, i64 1
+ %tmp5990 = getelementptr inbounds float, float* %tmp5989, i64 1
+ %tmp5991 = getelementptr inbounds float, float* %tmp5990, i64 1
+ %tmp5992 = getelementptr inbounds float, float* %tmp5991, i64 1
+ %tmp5993 = getelementptr inbounds float, float* %tmp5992, i64 1
+ %tmp5994 = getelementptr inbounds float, float* %tmp5993, i64 1
+ %tmp5995 = getelementptr inbounds float, float* %tmp5994, i64 1
+ %tmp5996 = getelementptr inbounds float, float* %tmp5995, i64 1
+ %tmp5997 = getelementptr inbounds float, float* %tmp5996, i64 1
+ %tmp5998 = getelementptr inbounds float, float* %tmp5997, i64 1
+ %tmp5999 = getelementptr inbounds float, float* %tmp5998, i64 1
+ %tmp6000 = getelementptr inbounds float, float* %tmp5999, i64 1
+ %tmp6001 = getelementptr inbounds float, float* %tmp6000, i64 1
+ %tmp6002 = getelementptr inbounds float, float* %tmp6001, i64 1
+ %tmp6003 = getelementptr inbounds float, float* %tmp6002, i64 1
+ %tmp6004 = getelementptr inbounds float, float* %tmp6003, i64 1
+ %tmp6005 = getelementptr inbounds float, float* %tmp6004, i64 1
+ %tmp6006 = getelementptr inbounds float, float* %tmp6005, i64 1
+ %tmp6007 = getelementptr inbounds float, float* %tmp6006, i64 1
+ %tmp6008 = getelementptr inbounds float, float* %tmp6007, i64 1
+ %tmp6009 = getelementptr inbounds float, float* %tmp6008, i64 1
+ %tmp6010 = getelementptr inbounds float, float* %tmp6009, i64 1
+ %tmp6011 = getelementptr inbounds float, float* %tmp6010, i64 1
+ %tmp6012 = getelementptr inbounds float, float* %tmp6011, i64 1
+ %tmp6013 = getelementptr inbounds float, float* %tmp6012, i64 1
+ %tmp6014 = getelementptr inbounds float, float* %tmp6013, i64 1
+ %tmp6015 = getelementptr inbounds float, float* %tmp6014, i64 1
+ %tmp6016 = getelementptr inbounds float, float* %tmp6015, i64 1
+ %tmp6017 = getelementptr inbounds float, float* %tmp6016, i64 1
+ %tmp6018 = getelementptr inbounds float, float* %tmp6017, i64 1
+ %tmp6019 = getelementptr inbounds float, float* %tmp6018, i64 1
+ %tmp6020 = getelementptr inbounds float, float* %tmp6019, i64 1
+ %tmp6021 = getelementptr inbounds float, float* %tmp6020, i64 1
+ %tmp6022 = getelementptr inbounds float, float* %tmp6021, i64 1
+ %tmp6023 = getelementptr inbounds float, float* %tmp6022, i64 1
+ %tmp6024 = getelementptr inbounds float, float* %tmp6023, i64 1
+ %tmp6025 = getelementptr inbounds float, float* %tmp6024, i64 1
+ %tmp6026 = getelementptr inbounds float, float* %tmp6025, i64 1
+ %tmp6027 = getelementptr inbounds float, float* %tmp6026, i64 1
+ %tmp6028 = getelementptr inbounds float, float* %tmp6027, i64 1
+ %tmp6029 = getelementptr inbounds float, float* %tmp6028, i64 1
+ %tmp6030 = getelementptr inbounds float, float* %tmp6029, i64 1
+ %tmp6031 = getelementptr inbounds float, float* %tmp6030, i64 1
+ %tmp6032 = getelementptr inbounds float, float* %tmp6031, i64 1
+ %tmp6033 = getelementptr inbounds float, float* %tmp6032, i64 1
+ %tmp6034 = getelementptr inbounds float, float* %tmp6033, i64 1
+ %tmp6035 = getelementptr inbounds float, float* %tmp6034, i64 1
+ %tmp6036 = getelementptr inbounds float, float* %tmp6035, i64 1
+ %tmp6037 = getelementptr inbounds float, float* %tmp6036, i64 1
+ %tmp6038 = getelementptr inbounds float, float* %tmp6037, i64 1
+ %tmp6039 = getelementptr inbounds float, float* %tmp6038, i64 1
+ %tmp6040 = getelementptr inbounds float, float* %tmp6039, i64 1
+ %tmp6041 = getelementptr inbounds float, float* %tmp6040, i64 1
+ %tmp6042 = getelementptr inbounds float, float* %tmp6041, i64 1
+ %tmp6043 = getelementptr inbounds float, float* %tmp6042, i64 1
+ %tmp6044 = getelementptr inbounds float, float* %tmp6043, i64 1
+ %tmp6045 = getelementptr inbounds float, float* %tmp6044, i64 1
+ %tmp6046 = getelementptr inbounds float, float* %tmp6045, i64 1
+ %tmp6047 = getelementptr inbounds float, float* %tmp6046, i64 1
+ %tmp6048 = getelementptr inbounds float, float* %tmp6047, i64 1
+ %tmp6049 = getelementptr inbounds float, float* %tmp6048, i64 1
+ %tmp6050 = getelementptr inbounds float, float* %tmp6049, i64 1
+ %tmp6051 = getelementptr inbounds float, float* %tmp6050, i64 1
+ %tmp6052 = getelementptr inbounds float, float* %tmp6051, i64 1
+ %tmp6053 = getelementptr inbounds float, float* %tmp6052, i64 1
+ %tmp6054 = getelementptr inbounds float, float* %tmp6053, i64 1
+ %tmp6055 = getelementptr inbounds float, float* %tmp6054, i64 1
+ %tmp6056 = getelementptr inbounds float, float* %tmp6055, i64 1
+ %tmp6057 = getelementptr inbounds float, float* %tmp6056, i64 1
+ %tmp6058 = getelementptr inbounds float, float* %tmp6057, i64 1
+ %tmp6059 = getelementptr inbounds float, float* %tmp6058, i64 1
+ %tmp6060 = getelementptr inbounds float, float* %tmp6059, i64 1
+ %tmp6061 = getelementptr inbounds float, float* %tmp6060, i64 1
+ %tmp6062 = getelementptr inbounds float, float* %tmp6061, i64 1
+ %tmp6063 = getelementptr inbounds float, float* %tmp6062, i64 1
+ %tmp6064 = getelementptr inbounds float, float* %tmp6063, i64 1
+ %tmp6065 = getelementptr inbounds float, float* %tmp6064, i64 1
+ %tmp6066 = getelementptr inbounds float, float* %tmp6065, i64 1
+ %tmp6067 = getelementptr inbounds float, float* %tmp6066, i64 1
+ %tmp6068 = getelementptr inbounds float, float* %tmp6067, i64 1
+ %tmp6069 = getelementptr inbounds float, float* %tmp6068, i64 1
+ %tmp6070 = getelementptr inbounds float, float* %tmp6069, i64 1
+ %tmp6071 = getelementptr inbounds float, float* %tmp6070, i64 1
+ %tmp6072 = getelementptr inbounds float, float* %tmp6071, i64 1
+ %tmp6073 = getelementptr inbounds float, float* %tmp6072, i64 1
+ %tmp6074 = getelementptr inbounds float, float* %tmp6073, i64 1
+ %tmp6075 = getelementptr inbounds float, float* %tmp6074, i64 1
+ %tmp6076 = getelementptr inbounds float, float* %tmp6075, i64 1
+ %tmp6077 = getelementptr inbounds float, float* %tmp6076, i64 1
+ %tmp6078 = getelementptr inbounds float, float* %tmp6077, i64 1
+ %tmp6079 = getelementptr inbounds float, float* %tmp6078, i64 1
+ %tmp6080 = getelementptr inbounds float, float* %tmp6079, i64 1
+ %tmp6081 = getelementptr inbounds float, float* %tmp6080, i64 1
+ %tmp6082 = getelementptr inbounds float, float* %tmp6081, i64 1
+ %tmp6083 = getelementptr inbounds float, float* %tmp6082, i64 1
+ %tmp6084 = getelementptr inbounds float, float* %tmp6083, i64 1
+ %tmp6085 = getelementptr inbounds float, float* %tmp6084, i64 1
+ %tmp6086 = getelementptr inbounds float, float* %tmp6085, i64 1
+ %tmp6087 = getelementptr inbounds float, float* %tmp6086, i64 1
+ %tmp6088 = getelementptr inbounds float, float* %tmp6087, i64 1
+ %tmp6089 = getelementptr inbounds float, float* %tmp6088, i64 1
+ %tmp6090 = getelementptr inbounds float, float* %tmp6089, i64 1
+ %tmp6091 = getelementptr inbounds float, float* %tmp6090, i64 1
+ %tmp6092 = getelementptr inbounds float, float* %tmp6091, i64 1
+ %tmp6093 = getelementptr inbounds float, float* %tmp6092, i64 1
+ %tmp6094 = getelementptr inbounds float, float* %tmp6093, i64 1
+ %tmp6095 = getelementptr inbounds float, float* %tmp6094, i64 1
+ %tmp6096 = getelementptr inbounds float, float* %tmp6095, i64 1
+ %tmp6097 = getelementptr inbounds float, float* %tmp6096, i64 1
+ %tmp6098 = getelementptr inbounds float, float* %tmp6097, i64 1
+ %tmp6099 = getelementptr inbounds float, float* %tmp6098, i64 1
+ %tmp6100 = getelementptr inbounds float, float* %tmp6099, i64 1
+ %tmp6101 = getelementptr inbounds float, float* %tmp6100, i64 1
+ %tmp6102 = getelementptr inbounds float, float* %tmp6101, i64 1
+ %tmp6103 = getelementptr inbounds float, float* %tmp6102, i64 1
+ %tmp6104 = getelementptr inbounds float, float* %tmp6103, i64 1
+ %tmp6105 = getelementptr inbounds float, float* %tmp6104, i64 1
+ %tmp6106 = getelementptr inbounds float, float* %tmp6105, i64 1
+ %tmp6107 = getelementptr inbounds float, float* %tmp6106, i64 1
+ %tmp6108 = getelementptr inbounds float, float* %tmp6107, i64 1
+ %tmp6109 = getelementptr inbounds float, float* %tmp6108, i64 1
+ %tmp6110 = getelementptr inbounds float, float* %tmp6109, i64 1
+ %tmp6111 = getelementptr inbounds float, float* %tmp6110, i64 1
+ %tmp6112 = getelementptr inbounds float, float* %tmp6111, i64 1
+ %tmp6113 = getelementptr inbounds float, float* %tmp6112, i64 1
+ %tmp6114 = getelementptr inbounds float, float* %tmp6113, i64 1
+ %tmp6115 = getelementptr inbounds float, float* %tmp6114, i64 1
+ %tmp6116 = getelementptr inbounds float, float* %tmp6115, i64 1
+ %tmp6117 = getelementptr inbounds float, float* %tmp6116, i64 1
+ %tmp6118 = getelementptr inbounds float, float* %tmp6117, i64 1
+ %tmp6119 = getelementptr inbounds float, float* %tmp6118, i64 1
+ %tmp6120 = getelementptr inbounds float, float* %tmp6119, i64 1
+ %tmp6121 = getelementptr inbounds float, float* %tmp6120, i64 1
+ %tmp6122 = getelementptr inbounds float, float* %tmp6121, i64 1
+ %tmp6123 = getelementptr inbounds float, float* %tmp6122, i64 1
+ %tmp6124 = getelementptr inbounds float, float* %tmp6123, i64 1
+ %tmp6125 = getelementptr inbounds float, float* %tmp6124, i64 1
+ %tmp6126 = getelementptr inbounds float, float* %tmp6125, i64 1
+ %tmp6127 = getelementptr inbounds float, float* %tmp6126, i64 1
+ %tmp6128 = getelementptr inbounds float, float* %tmp6127, i64 1
+ %tmp6129 = getelementptr inbounds float, float* %tmp6128, i64 1
+ %tmp6130 = getelementptr inbounds float, float* %tmp6129, i64 1
+ %tmp6131 = getelementptr inbounds float, float* %tmp6130, i64 1
+ %tmp6132 = getelementptr inbounds float, float* %tmp6131, i64 1
+ %tmp6133 = getelementptr inbounds float, float* %tmp6132, i64 1
+ %tmp6134 = getelementptr inbounds float, float* %tmp6133, i64 1
+ %tmp6135 = getelementptr inbounds float, float* %tmp6134, i64 1
+ %tmp6136 = getelementptr inbounds float, float* %tmp6135, i64 1
+ %tmp6137 = getelementptr inbounds float, float* %tmp6136, i64 1
+ %tmp6138 = getelementptr inbounds float, float* %tmp6137, i64 1
+ %tmp6139 = getelementptr inbounds float, float* %tmp6138, i64 1
+ %tmp6140 = getelementptr inbounds float, float* %tmp6139, i64 1
+ %tmp6141 = getelementptr inbounds float, float* %tmp6140, i64 1
+ %tmp6142 = getelementptr inbounds float, float* %tmp6141, i64 1
+ %tmp6143 = getelementptr inbounds float, float* %tmp6142, i64 1
+ %tmp6144 = getelementptr inbounds float, float* %tmp6143, i64 1
+ %tmp6145 = getelementptr inbounds float, float* %tmp6144, i64 1
+ %tmp6146 = getelementptr inbounds float, float* %tmp6145, i64 1
+ %tmp6147 = getelementptr inbounds float, float* %tmp6146, i64 1
+ %tmp6148 = getelementptr inbounds float, float* %tmp6147, i64 1
+ %tmp6149 = getelementptr inbounds float, float* %tmp6148, i64 1
+ %tmp6150 = getelementptr inbounds float, float* %tmp6149, i64 1
+ %tmp6151 = getelementptr inbounds float, float* %tmp6150, i64 1
+ %tmp6152 = getelementptr inbounds float, float* %tmp6151, i64 1
+ %tmp6153 = getelementptr inbounds float, float* %tmp6152, i64 1
+ %tmp6154 = getelementptr inbounds float, float* %tmp6153, i64 1
+ %tmp6155 = getelementptr inbounds float, float* %tmp6154, i64 1
+ %tmp6156 = getelementptr inbounds float, float* %tmp6155, i64 1
+ %tmp6157 = getelementptr inbounds float, float* %tmp6156, i64 1
+ %tmp6158 = getelementptr inbounds float, float* %tmp6157, i64 1
+ %tmp6159 = getelementptr inbounds float, float* %tmp6158, i64 1
+ %tmp6160 = getelementptr inbounds float, float* %tmp6159, i64 1
+ %tmp6161 = getelementptr inbounds float, float* %tmp6160, i64 1
+ %tmp6162 = getelementptr inbounds float, float* %tmp6161, i64 1
+ %tmp6163 = getelementptr inbounds float, float* %tmp6162, i64 1
+ %tmp6164 = getelementptr inbounds float, float* %tmp6163, i64 1
+ %tmp6165 = getelementptr inbounds float, float* %tmp6164, i64 1
+ %tmp6166 = getelementptr inbounds float, float* %tmp6165, i64 1
+ %tmp6167 = getelementptr inbounds float, float* %tmp6166, i64 1
+ %tmp6168 = getelementptr inbounds float, float* %tmp6167, i64 1
+ %tmp6169 = getelementptr inbounds float, float* %tmp6168, i64 1
+ %tmp6170 = getelementptr inbounds float, float* %tmp6169, i64 1
+ %tmp6171 = getelementptr inbounds float, float* %tmp6170, i64 1
+ %tmp6172 = getelementptr inbounds float, float* %tmp6171, i64 1
+ %tmp6173 = getelementptr inbounds float, float* %tmp6172, i64 1
+ %tmp6174 = getelementptr inbounds float, float* %tmp6173, i64 1
+ %tmp6175 = getelementptr inbounds float, float* %tmp6174, i64 1
+ %tmp6176 = getelementptr inbounds float, float* %tmp6175, i64 1
+ %tmp6177 = getelementptr inbounds float, float* %tmp6176, i64 1
+ %tmp6178 = getelementptr inbounds float, float* %tmp6177, i64 1
+ %tmp6179 = getelementptr inbounds float, float* %tmp6178, i64 1
+ %tmp6180 = getelementptr inbounds float, float* %tmp6179, i64 1
+ %tmp6181 = getelementptr inbounds float, float* %tmp6180, i64 1
+ %tmp6182 = getelementptr inbounds float, float* %tmp6181, i64 1
+ %tmp6183 = getelementptr inbounds float, float* %tmp6182, i64 1
+ %tmp6184 = getelementptr inbounds float, float* %tmp6183, i64 1
+ %tmp6185 = getelementptr inbounds float, float* %tmp6184, i64 1
+ %tmp6186 = getelementptr inbounds float, float* %tmp6185, i64 1
+ %tmp6187 = getelementptr inbounds float, float* %tmp6186, i64 1
+ %tmp6188 = getelementptr inbounds float, float* %tmp6187, i64 1
+ %tmp6189 = getelementptr inbounds float, float* %tmp6188, i64 1
+ %tmp6190 = getelementptr inbounds float, float* %tmp6189, i64 1
+ %tmp6191 = getelementptr inbounds float, float* %tmp6190, i64 1
+ %tmp6192 = getelementptr inbounds float, float* %tmp6191, i64 1
+ %tmp6193 = getelementptr inbounds float, float* %tmp6192, i64 1
+ %tmp6194 = getelementptr inbounds float, float* %tmp6193, i64 1
+ %tmp6195 = getelementptr inbounds float, float* %tmp6194, i64 1
+ %tmp6196 = getelementptr inbounds float, float* %tmp6195, i64 1
+ %tmp6197 = getelementptr inbounds float, float* %tmp6196, i64 1
+ %tmp6198 = getelementptr inbounds float, float* %tmp6197, i64 1
+ %tmp6199 = getelementptr inbounds float, float* %tmp6198, i64 1
+ %tmp6200 = getelementptr inbounds float, float* %tmp6199, i64 1
+ %tmp6201 = getelementptr inbounds float, float* %tmp6200, i64 1
+ %tmp6202 = getelementptr inbounds float, float* %tmp6201, i64 1
+ %tmp6203 = getelementptr inbounds float, float* %tmp6202, i64 1
+ %tmp6204 = getelementptr inbounds float, float* %tmp6203, i64 1
+ %tmp6205 = getelementptr inbounds float, float* %tmp6204, i64 1
+ %tmp6206 = getelementptr inbounds float, float* %tmp6205, i64 1
+ %tmp6207 = getelementptr inbounds float, float* %tmp6206, i64 1
+ %tmp6208 = getelementptr inbounds float, float* %tmp6207, i64 1
+ %tmp6209 = getelementptr inbounds float, float* %tmp6208, i64 1
+ %tmp6210 = getelementptr inbounds float, float* %tmp6209, i64 1
+ %tmp6211 = getelementptr inbounds float, float* %tmp6210, i64 1
+ %tmp6212 = getelementptr inbounds float, float* %tmp6211, i64 1
+ %tmp6213 = getelementptr inbounds float, float* %tmp6212, i64 1
+ %tmp6214 = getelementptr inbounds float, float* %tmp6213, i64 1
+ %tmp6215 = getelementptr inbounds float, float* %tmp6214, i64 1
+ %tmp6216 = getelementptr inbounds float, float* %tmp6215, i64 1
+ %tmp6217 = getelementptr inbounds float, float* %tmp6216, i64 1
+ %tmp6218 = getelementptr inbounds float, float* %tmp6217, i64 1
+ %tmp6219 = getelementptr inbounds float, float* %tmp6218, i64 1
+ %tmp6220 = getelementptr inbounds float, float* %tmp6219, i64 1
+ %tmp6221 = getelementptr inbounds float, float* %tmp6220, i64 1
+ %tmp6222 = getelementptr inbounds float, float* %tmp6221, i64 1
+ %tmp6223 = getelementptr inbounds float, float* %tmp6222, i64 1
+ %tmp6224 = getelementptr inbounds float, float* %tmp6223, i64 1
+ %tmp6225 = getelementptr inbounds float, float* %tmp6224, i64 1
+ %tmp6226 = getelementptr inbounds float, float* %tmp6225, i64 1
+ %tmp6227 = getelementptr inbounds float, float* %tmp6226, i64 1
+ %tmp6228 = getelementptr inbounds float, float* %tmp6227, i64 1
+ %tmp6229 = getelementptr inbounds float, float* %tmp6228, i64 1
+ %tmp6230 = getelementptr inbounds float, float* %tmp6229, i64 1
+ %tmp6231 = getelementptr inbounds float, float* %tmp6230, i64 1
+ %tmp6232 = getelementptr inbounds float, float* %tmp6231, i64 1
+ %tmp6233 = getelementptr inbounds float, float* %tmp6232, i64 1
+ %tmp6234 = getelementptr inbounds float, float* %tmp6233, i64 1
+ %tmp6235 = getelementptr inbounds float, float* %tmp6234, i64 1
+ %tmp6236 = getelementptr inbounds float, float* %tmp6235, i64 1
+ %tmp6237 = getelementptr inbounds float, float* %tmp6236, i64 1
+ %tmp6238 = getelementptr inbounds float, float* %tmp6237, i64 1
+ %tmp6239 = getelementptr inbounds float, float* %tmp6238, i64 1
+ %tmp6240 = getelementptr inbounds float, float* %tmp6239, i64 1
+ %tmp6241 = getelementptr inbounds float, float* %tmp6240, i64 1
+ %tmp6242 = getelementptr inbounds float, float* %tmp6241, i64 1
+ %tmp6243 = getelementptr inbounds float, float* %tmp6242, i64 1
+ %tmp6244 = getelementptr inbounds float, float* %tmp6243, i64 1
+ %tmp6245 = getelementptr inbounds float, float* %tmp6244, i64 1
+ %tmp6246 = getelementptr inbounds float, float* %tmp6245, i64 1
+ %tmp6247 = getelementptr inbounds float, float* %tmp6246, i64 1
+ %tmp6248 = getelementptr inbounds float, float* %tmp6247, i64 1
+ %tmp6249 = getelementptr inbounds float, float* %tmp6248, i64 1
+ %tmp6250 = getelementptr inbounds float, float* %tmp6249, i64 1
+ %tmp6251 = getelementptr inbounds float, float* %tmp6250, i64 1
+ %tmp6252 = getelementptr inbounds float, float* %tmp6251, i64 1
+ %tmp6253 = getelementptr inbounds float, float* %tmp6252, i64 1
+ %tmp6254 = getelementptr inbounds float, float* %tmp6253, i64 1
+ %tmp6255 = getelementptr inbounds float, float* %tmp6254, i64 1
+ %tmp6256 = getelementptr inbounds float, float* %tmp6255, i64 1
+ %tmp6257 = getelementptr inbounds float, float* %tmp6256, i64 1
+ %tmp6258 = getelementptr inbounds float, float* %tmp6257, i64 1
+ %tmp6259 = getelementptr inbounds float, float* %tmp6258, i64 1
+ %tmp6260 = getelementptr inbounds float, float* %tmp6259, i64 1
+ %tmp6261 = getelementptr inbounds float, float* %tmp6260, i64 1
+ %tmp6262 = getelementptr inbounds float, float* %tmp6261, i64 1
+ %tmp6263 = getelementptr inbounds float, float* %tmp6262, i64 1
+ %tmp6264 = getelementptr inbounds float, float* %tmp6263, i64 1
+ %tmp6265 = getelementptr inbounds float, float* %tmp6264, i64 1
+ %tmp6266 = getelementptr inbounds float, float* %tmp6265, i64 1
+ %tmp6267 = getelementptr inbounds float, float* %tmp6266, i64 1
+ %tmp6268 = getelementptr inbounds float, float* %tmp6267, i64 1
+ %tmp6269 = getelementptr inbounds float, float* %tmp6268, i64 1
+ %tmp6270 = getelementptr inbounds float, float* %tmp6269, i64 1
+ %tmp6271 = getelementptr inbounds float, float* %tmp6270, i64 1
+ %tmp6272 = getelementptr inbounds float, float* %tmp6271, i64 1
+ %tmp6273 = getelementptr inbounds float, float* %tmp6272, i64 1
+ %tmp6274 = getelementptr inbounds float, float* %tmp6273, i64 1
+ %tmp6275 = getelementptr inbounds float, float* %tmp6274, i64 1
+ %tmp6276 = getelementptr inbounds float, float* %tmp6275, i64 1
+ %tmp6277 = getelementptr inbounds float, float* %tmp6276, i64 1
+ %tmp6278 = getelementptr inbounds float, float* %tmp6277, i64 1
+ %tmp6279 = getelementptr inbounds float, float* %tmp6278, i64 1
+ %tmp6280 = getelementptr inbounds float, float* %tmp6279, i64 1
+ %tmp6281 = getelementptr inbounds float, float* %tmp6280, i64 1
+ %tmp6282 = getelementptr inbounds float, float* %tmp6281, i64 1
+ %tmp6283 = getelementptr inbounds float, float* %tmp6282, i64 1
+ %tmp6284 = getelementptr inbounds float, float* %tmp6283, i64 1
+ %tmp6285 = getelementptr inbounds float, float* %tmp6284, i64 1
+ %tmp6286 = getelementptr inbounds float, float* %tmp6285, i64 1
+ %tmp6287 = getelementptr inbounds float, float* %tmp6286, i64 1
+ %tmp6288 = getelementptr inbounds float, float* %tmp6287, i64 1
+ %tmp6289 = getelementptr inbounds float, float* %tmp6288, i64 1
+ %tmp6290 = getelementptr inbounds float, float* %tmp6289, i64 1
+ %tmp6291 = getelementptr inbounds float, float* %tmp6290, i64 1
+ %tmp6292 = getelementptr inbounds float, float* %tmp6291, i64 1
+ %tmp6293 = getelementptr inbounds float, float* %tmp6292, i64 1
+ %tmp6294 = getelementptr inbounds float, float* %tmp6293, i64 1
+ %tmp6295 = getelementptr inbounds float, float* %tmp6294, i64 1
+ %tmp6296 = getelementptr inbounds float, float* %tmp6295, i64 1
+ %tmp6297 = getelementptr inbounds float, float* %tmp6296, i64 1
+ %tmp6298 = getelementptr inbounds float, float* %tmp6297, i64 1
+ %tmp6299 = getelementptr inbounds float, float* %tmp6298, i64 1
+ %tmp6300 = getelementptr inbounds float, float* %tmp6299, i64 1
+ %tmp6301 = getelementptr inbounds float, float* %tmp6300, i64 1
+ %tmp6302 = getelementptr inbounds float, float* %tmp6301, i64 1
+ %tmp6303 = getelementptr inbounds float, float* %tmp6302, i64 1
+ %tmp6304 = getelementptr inbounds float, float* %tmp6303, i64 1
+ %tmp6305 = getelementptr inbounds float, float* %tmp6304, i64 1
+ %tmp6306 = getelementptr inbounds float, float* %tmp6305, i64 1
+ %tmp6307 = getelementptr inbounds float, float* %tmp6306, i64 1
+ %tmp6308 = getelementptr inbounds float, float* %tmp6307, i64 1
+ %tmp6309 = getelementptr inbounds float, float* %tmp6308, i64 1
+ %tmp6310 = getelementptr inbounds float, float* %tmp6309, i64 1
+ %tmp6311 = getelementptr inbounds float, float* %tmp6310, i64 1
+ %tmp6312 = getelementptr inbounds float, float* %tmp6311, i64 1
+ %tmp6313 = getelementptr inbounds float, float* %tmp6312, i64 1
+ %tmp6314 = getelementptr inbounds float, float* %tmp6313, i64 1
+ %tmp6315 = getelementptr inbounds float, float* %tmp6314, i64 1
+ %tmp6316 = getelementptr inbounds float, float* %tmp6315, i64 1
+ %tmp6317 = getelementptr inbounds float, float* %tmp6316, i64 1
+ %tmp6318 = getelementptr inbounds float, float* %tmp6317, i64 1
+ %tmp6319 = getelementptr inbounds float, float* %tmp6318, i64 1
+ %tmp6320 = getelementptr inbounds float, float* %tmp6319, i64 1
+ %tmp6321 = getelementptr inbounds float, float* %tmp6320, i64 1
+ %tmp6322 = getelementptr inbounds float, float* %tmp6321, i64 1
+ %tmp6323 = getelementptr inbounds float, float* %tmp6322, i64 1
+ %tmp6324 = getelementptr inbounds float, float* %tmp6323, i64 1
+ %tmp6325 = getelementptr inbounds float, float* %tmp6324, i64 1
+ %tmp6326 = getelementptr inbounds float, float* %tmp6325, i64 1
+ %tmp6327 = getelementptr inbounds float, float* %tmp6326, i64 1
+ %tmp6328 = getelementptr inbounds float, float* %tmp6327, i64 1
+ %tmp6329 = getelementptr inbounds float, float* %tmp6328, i64 1
+ %tmp6330 = getelementptr inbounds float, float* %tmp6329, i64 1
+ %tmp6331 = getelementptr inbounds float, float* %tmp6330, i64 1
+ %tmp6332 = getelementptr inbounds float, float* %tmp6331, i64 1
+ %tmp6333 = getelementptr inbounds float, float* %tmp6332, i64 1
+ %tmp6334 = getelementptr inbounds float, float* %tmp6333, i64 1
+ %tmp6335 = getelementptr inbounds float, float* %tmp6334, i64 1
+ %tmp6336 = getelementptr inbounds float, float* %tmp6335, i64 1
+ %tmp6337 = getelementptr inbounds float, float* %tmp6336, i64 1
+ %tmp6338 = getelementptr inbounds float, float* %tmp6337, i64 1
+ %tmp6339 = getelementptr inbounds float, float* %tmp6338, i64 1
+ %tmp6340 = getelementptr inbounds float, float* %tmp6339, i64 1
+ %tmp6341 = getelementptr inbounds float, float* %tmp6340, i64 1
+ %tmp6342 = getelementptr inbounds float, float* %tmp6341, i64 1
+ %tmp6343 = getelementptr inbounds float, float* %tmp6342, i64 1
+ %tmp6344 = getelementptr inbounds float, float* %tmp6343, i64 1
+ %tmp6345 = getelementptr inbounds float, float* %tmp6344, i64 1
+ %tmp6346 = getelementptr inbounds float, float* %tmp6345, i64 1
+ %tmp6347 = getelementptr inbounds float, float* %tmp6346, i64 1
+ %tmp6348 = getelementptr inbounds float, float* %tmp6347, i64 1
+ %tmp6349 = getelementptr inbounds float, float* %tmp6348, i64 1
+ %tmp6350 = getelementptr inbounds float, float* %tmp6349, i64 1
+ %tmp6351 = getelementptr inbounds float, float* %tmp6350, i64 1
+ %tmp6352 = getelementptr inbounds float, float* %tmp6351, i64 1
+ %tmp6353 = getelementptr inbounds float, float* %tmp6352, i64 1
+ %tmp6354 = getelementptr inbounds float, float* %tmp6353, i64 1
+ %tmp6355 = getelementptr inbounds float, float* %tmp6354, i64 1
+ %tmp6356 = getelementptr inbounds float, float* %tmp6355, i64 1
+ %tmp6357 = getelementptr inbounds float, float* %tmp6356, i64 1
+ %tmp6358 = getelementptr inbounds float, float* %tmp6357, i64 1
+ %tmp6359 = getelementptr inbounds float, float* %tmp6358, i64 1
+ %tmp6360 = getelementptr inbounds float, float* %tmp6359, i64 1
+ %tmp6361 = getelementptr inbounds float, float* %tmp6360, i64 1
+ %tmp6362 = getelementptr inbounds float, float* %tmp6361, i64 1
+ %tmp6363 = getelementptr inbounds float, float* %tmp6362, i64 1
+ %tmp6364 = getelementptr inbounds float, float* %tmp6363, i64 1
+ %tmp6365 = getelementptr inbounds float, float* %tmp6364, i64 1
+ %tmp6366 = getelementptr inbounds float, float* %tmp6365, i64 1
+ %tmp6367 = getelementptr inbounds float, float* %tmp6366, i64 1
+ %tmp6368 = getelementptr inbounds float, float* %tmp6367, i64 1
+ %tmp6369 = getelementptr inbounds float, float* %tmp6368, i64 1
+ %tmp6370 = getelementptr inbounds float, float* %tmp6369, i64 1
+ %tmp6371 = getelementptr inbounds float, float* %tmp6370, i64 1
+ %tmp6372 = getelementptr inbounds float, float* %tmp6371, i64 1
+ %tmp6373 = getelementptr inbounds float, float* %tmp6372, i64 1
+ %tmp6374 = getelementptr inbounds float, float* %tmp6373, i64 1
+ %tmp6375 = getelementptr inbounds float, float* %tmp6374, i64 1
+ %tmp6376 = getelementptr inbounds float, float* %tmp6375, i64 1
+ %tmp6377 = getelementptr inbounds float, float* %tmp6376, i64 1
+ %tmp6378 = getelementptr inbounds float, float* %tmp6377, i64 1
+ %tmp6379 = getelementptr inbounds float, float* %tmp6378, i64 1
+ %tmp6380 = getelementptr inbounds float, float* %tmp6379, i64 1
+ %tmp6381 = getelementptr inbounds float, float* %tmp6380, i64 1
+ %tmp6382 = getelementptr inbounds float, float* %tmp6381, i64 1
+ %tmp6383 = getelementptr inbounds float, float* %tmp6382, i64 1
+ %tmp6384 = getelementptr inbounds float, float* %tmp6383, i64 1
+ %tmp6385 = getelementptr inbounds float, float* %tmp6384, i64 1
+ %tmp6386 = getelementptr inbounds float, float* %tmp6385, i64 1
+ %tmp6387 = getelementptr inbounds float, float* %tmp6386, i64 1
+ %tmp6388 = getelementptr inbounds float, float* %tmp6387, i64 1
+ %tmp6389 = getelementptr inbounds float, float* %tmp6388, i64 1
+ %tmp6390 = getelementptr inbounds float, float* %tmp6389, i64 1
+ %tmp6391 = getelementptr inbounds float, float* %tmp6390, i64 1
+ %tmp6392 = getelementptr inbounds float, float* %tmp6391, i64 1
+ %tmp6393 = getelementptr inbounds float, float* %tmp6392, i64 1
+ %tmp6394 = getelementptr inbounds float, float* %tmp6393, i64 1
+ %tmp6395 = getelementptr inbounds float, float* %tmp6394, i64 1
+ %tmp6396 = getelementptr inbounds float, float* %tmp6395, i64 1
+ %tmp6397 = getelementptr inbounds float, float* %tmp6396, i64 1
+ %tmp6398 = getelementptr inbounds float, float* %tmp6397, i64 1
+ %tmp6399 = getelementptr inbounds float, float* %tmp6398, i64 1
+ %tmp6400 = getelementptr inbounds float, float* %tmp6399, i64 1
+ %tmp6401 = getelementptr inbounds float, float* %tmp6400, i64 1
+ %tmp6402 = getelementptr inbounds float, float* %tmp6401, i64 1
+ %tmp6403 = getelementptr inbounds float, float* %tmp6402, i64 1
+ %tmp6404 = getelementptr inbounds float, float* %tmp6403, i64 1
+ %tmp6405 = getelementptr inbounds float, float* %tmp6404, i64 1
+ %tmp6406 = getelementptr inbounds float, float* %tmp6405, i64 1
+ %tmp6407 = getelementptr inbounds float, float* %tmp6406, i64 1
+ %tmp6408 = getelementptr inbounds float, float* %tmp6407, i64 1
+ %tmp6409 = getelementptr inbounds float, float* %tmp6408, i64 1
+ %tmp6410 = getelementptr inbounds float, float* %tmp6409, i64 1
+ %tmp6411 = getelementptr inbounds float, float* %tmp6410, i64 1
+ %tmp6412 = getelementptr inbounds float, float* %tmp6411, i64 1
+ %tmp6413 = getelementptr inbounds float, float* %tmp6412, i64 1
+ %tmp6414 = getelementptr inbounds float, float* %tmp6413, i64 1
+ %tmp6415 = getelementptr inbounds float, float* %tmp6414, i64 1
+ %tmp6416 = getelementptr inbounds float, float* %tmp6415, i64 1
+ %tmp6417 = getelementptr inbounds float, float* %tmp6416, i64 1
+ %tmp6418 = getelementptr inbounds float, float* %tmp6417, i64 1
+ %tmp6419 = getelementptr inbounds float, float* %tmp6418, i64 1
+ %tmp6420 = getelementptr inbounds float, float* %tmp6419, i64 1
+ %tmp6421 = getelementptr inbounds float, float* %tmp6420, i64 1
+ %tmp6422 = getelementptr inbounds float, float* %tmp6421, i64 1
+ %tmp6423 = getelementptr inbounds float, float* %tmp6422, i64 1
+ %tmp6424 = getelementptr inbounds float, float* %tmp6423, i64 1
+ %tmp6425 = getelementptr inbounds float, float* %tmp6424, i64 1
+ %tmp6426 = getelementptr inbounds float, float* %tmp6425, i64 1
+ %tmp6427 = getelementptr inbounds float, float* %tmp6426, i64 1
+ %tmp6428 = getelementptr inbounds float, float* %tmp6427, i64 1
+ %tmp6429 = getelementptr inbounds float, float* %tmp6428, i64 1
+ %tmp6430 = getelementptr inbounds float, float* %tmp6429, i64 1
+ %tmp6431 = getelementptr inbounds float, float* %tmp6430, i64 1
+ %tmp6432 = getelementptr inbounds float, float* %tmp6431, i64 1
+ %tmp6433 = getelementptr inbounds float, float* %tmp6432, i64 1
+ %tmp6434 = getelementptr inbounds float, float* %tmp6433, i64 1
+ %tmp6435 = getelementptr inbounds float, float* %tmp6434, i64 1
+ %tmp6436 = getelementptr inbounds float, float* %tmp6435, i64 1
+ %tmp6437 = getelementptr inbounds float, float* %tmp6436, i64 1
+ %tmp6438 = getelementptr inbounds float, float* %tmp6437, i64 1
+ %tmp6439 = getelementptr inbounds float, float* %tmp6438, i64 1
+ %tmp6440 = getelementptr inbounds float, float* %tmp6439, i64 1
+ %tmp6441 = getelementptr inbounds float, float* %tmp6440, i64 1
+ %tmp6442 = getelementptr inbounds float, float* %tmp6441, i64 1
+ %tmp6443 = getelementptr inbounds float, float* %tmp6442, i64 1
+ %tmp6444 = getelementptr inbounds float, float* %tmp6443, i64 1
+ %tmp6445 = getelementptr inbounds float, float* %tmp6444, i64 1
+ %tmp6446 = getelementptr inbounds float, float* %tmp6445, i64 1
+ %tmp6447 = getelementptr inbounds float, float* %tmp6446, i64 1
+ %tmp6448 = getelementptr inbounds float, float* %tmp6447, i64 1
+ %tmp6449 = getelementptr inbounds float, float* %tmp6448, i64 1
+ %tmp6450 = getelementptr inbounds float, float* %tmp6449, i64 1
+ %tmp6451 = getelementptr inbounds float, float* %tmp6450, i64 1
+ %tmp6452 = getelementptr inbounds float, float* %tmp6451, i64 1
+ %tmp6453 = getelementptr inbounds float, float* %tmp6452, i64 1
+ %tmp6454 = getelementptr inbounds float, float* %tmp6453, i64 1
+ %tmp6455 = getelementptr inbounds float, float* %tmp6454, i64 1
+ %tmp6456 = getelementptr inbounds float, float* %tmp6455, i64 1
+ %tmp6457 = getelementptr inbounds float, float* %tmp6456, i64 1
+ %tmp6458 = getelementptr inbounds float, float* %tmp6457, i64 1
+ %tmp6459 = getelementptr inbounds float, float* %tmp6458, i64 1
+ %tmp6460 = getelementptr inbounds float, float* %tmp6459, i64 1
+ %tmp6461 = getelementptr inbounds float, float* %tmp6460, i64 1
+ %tmp6462 = getelementptr inbounds float, float* %tmp6461, i64 1
+ %tmp6463 = getelementptr inbounds float, float* %tmp6462, i64 1
+ %tmp6464 = getelementptr inbounds float, float* %tmp6463, i64 1
+ %tmp6465 = getelementptr inbounds float, float* %tmp6464, i64 1
+ %tmp6466 = getelementptr inbounds float, float* %tmp6465, i64 1
+ %tmp6467 = getelementptr inbounds float, float* %tmp6466, i64 1
+ %tmp6468 = getelementptr inbounds float, float* %tmp6467, i64 1
+ %tmp6469 = getelementptr inbounds float, float* %tmp6468, i64 1
+ %tmp6470 = getelementptr inbounds float, float* %tmp6469, i64 1
+ %tmp6471 = getelementptr inbounds float, float* %tmp6470, i64 1
+ %tmp6472 = getelementptr inbounds float, float* %tmp6471, i64 1
+ %tmp6473 = getelementptr inbounds float, float* %tmp6472, i64 1
+ %tmp6474 = getelementptr inbounds float, float* %tmp6473, i64 1
+ %tmp6475 = getelementptr inbounds float, float* %tmp6474, i64 1
+ %tmp6476 = getelementptr inbounds float, float* %tmp6475, i64 1
+ %tmp6477 = getelementptr inbounds float, float* %tmp6476, i64 1
+ %tmp6478 = getelementptr inbounds float, float* %tmp6477, i64 1
+ %tmp6479 = getelementptr inbounds float, float* %tmp6478, i64 1
+ %tmp6480 = getelementptr inbounds float, float* %tmp6479, i64 1
+ %tmp6481 = getelementptr inbounds float, float* %tmp6480, i64 1
+ %tmp6482 = getelementptr inbounds float, float* %tmp6481, i64 1
+ %tmp6483 = getelementptr inbounds float, float* %tmp6482, i64 1
+ %tmp6484 = getelementptr inbounds float, float* %tmp6483, i64 1
+ %tmp6485 = getelementptr inbounds float, float* %tmp6484, i64 1
+ %tmp6486 = getelementptr inbounds float, float* %tmp6485, i64 1
+ %tmp6487 = getelementptr inbounds float, float* %tmp6486, i64 1
+ %tmp6488 = getelementptr inbounds float, float* %tmp6487, i64 1
+ %tmp6489 = getelementptr inbounds float, float* %tmp6488, i64 1
+ %tmp6490 = getelementptr inbounds float, float* %tmp6489, i64 1
+ %tmp6491 = getelementptr inbounds float, float* %tmp6490, i64 1
+ %tmp6492 = getelementptr inbounds float, float* %tmp6491, i64 1
+ %tmp6493 = getelementptr inbounds float, float* %tmp6492, i64 1
+ %tmp6494 = getelementptr inbounds float, float* %tmp6493, i64 1
+ %tmp6495 = getelementptr inbounds float, float* %tmp6494, i64 1
+ %tmp6496 = getelementptr inbounds float, float* %tmp6495, i64 1
+ %tmp6497 = getelementptr inbounds float, float* %tmp6496, i64 1
+ %tmp6498 = getelementptr inbounds float, float* %tmp6497, i64 1
+ %tmp6499 = getelementptr inbounds float, float* %tmp6498, i64 1
+ %tmp6500 = getelementptr inbounds float, float* %tmp6499, i64 1
+ %tmp6501 = getelementptr inbounds float, float* %tmp6500, i64 1
+ %tmp6502 = getelementptr inbounds float, float* %tmp6501, i64 1
+ %tmp6503 = getelementptr inbounds float, float* %tmp6502, i64 1
+ %tmp6504 = getelementptr inbounds float, float* %tmp6503, i64 1
+ %tmp6505 = getelementptr inbounds float, float* %tmp6504, i64 1
+ %tmp6506 = getelementptr inbounds float, float* %tmp6505, i64 1
+ %tmp6507 = getelementptr inbounds float, float* %tmp6506, i64 1
+ %tmp6508 = getelementptr inbounds float, float* %tmp6507, i64 1
+ %tmp6509 = getelementptr inbounds float, float* %tmp6508, i64 1
+ %tmp6510 = getelementptr inbounds float, float* %tmp6509, i64 1
+ %tmp6511 = getelementptr inbounds float, float* %tmp6510, i64 1
+ %tmp6512 = getelementptr inbounds float, float* %tmp6511, i64 1
+ %tmp6513 = getelementptr inbounds float, float* %tmp6512, i64 1
+ %tmp6514 = getelementptr inbounds float, float* %tmp6513, i64 1
+ %tmp6515 = getelementptr inbounds float, float* %tmp6514, i64 1
+ %tmp6516 = getelementptr inbounds float, float* %tmp6515, i64 1
+ %tmp6517 = getelementptr inbounds float, float* %tmp6516, i64 1
+ %tmp6518 = getelementptr inbounds float, float* %tmp6517, i64 1
+ %tmp6519 = getelementptr inbounds float, float* %tmp6518, i64 1
+ %tmp6520 = getelementptr inbounds float, float* %tmp6519, i64 1
+ %tmp6521 = getelementptr inbounds float, float* %tmp6520, i64 1
+ %tmp6522 = getelementptr inbounds float, float* %tmp6521, i64 1
+ %tmp6523 = getelementptr inbounds float, float* %tmp6522, i64 1
+ %tmp6524 = getelementptr inbounds float, float* %tmp6523, i64 1
+ %tmp6525 = getelementptr inbounds float, float* %tmp6524, i64 1
+ %tmp6526 = getelementptr inbounds float, float* %tmp6525, i64 1
+ %tmp6527 = getelementptr inbounds float, float* %tmp6526, i64 1
+ %tmp6528 = getelementptr inbounds float, float* %tmp6527, i64 1
+ %tmp6529 = getelementptr inbounds float, float* %tmp6528, i64 1
+ %tmp6530 = getelementptr inbounds float, float* %tmp6529, i64 1
+ %tmp6531 = getelementptr inbounds float, float* %tmp6530, i64 1
+ %tmp6532 = getelementptr inbounds float, float* %tmp6531, i64 1
+ %tmp6533 = getelementptr inbounds float, float* %tmp6532, i64 1
+ %tmp6534 = getelementptr inbounds float, float* %tmp6533, i64 1
+ %tmp6535 = getelementptr inbounds float, float* %tmp6534, i64 1
+ %tmp6536 = getelementptr inbounds float, float* %tmp6535, i64 1
+ %tmp6537 = getelementptr inbounds float, float* %tmp6536, i64 1
+ %tmp6538 = getelementptr inbounds float, float* %tmp6537, i64 1
+ %tmp6539 = getelementptr inbounds float, float* %tmp6538, i64 1
+ %tmp6540 = getelementptr inbounds float, float* %tmp6539, i64 1
+ %tmp6541 = getelementptr inbounds float, float* %tmp6540, i64 1
+ %tmp6542 = getelementptr inbounds float, float* %tmp6541, i64 1
+ %tmp6543 = getelementptr inbounds float, float* %tmp6542, i64 1
+ %tmp6544 = getelementptr inbounds float, float* %tmp6543, i64 1
+ %tmp6545 = getelementptr inbounds float, float* %tmp6544, i64 1
+ %tmp6546 = getelementptr inbounds float, float* %tmp6545, i64 1
+ %tmp6547 = getelementptr inbounds float, float* %tmp6546, i64 1
+ %tmp6548 = getelementptr inbounds float, float* %tmp6547, i64 1
+ %tmp6549 = getelementptr inbounds float, float* %tmp6548, i64 1
+ %tmp6550 = getelementptr inbounds float, float* %tmp6549, i64 1
+ %tmp6551 = getelementptr inbounds float, float* %tmp6550, i64 1
+ %tmp6552 = getelementptr inbounds float, float* %tmp6551, i64 1
+ %tmp6553 = getelementptr inbounds float, float* %tmp6552, i64 1
+ %tmp6554 = getelementptr inbounds float, float* %tmp6553, i64 1
+ %tmp6555 = getelementptr inbounds float, float* %tmp6554, i64 1
+ %tmp6556 = getelementptr inbounds float, float* %tmp6555, i64 1
+ %tmp6557 = getelementptr inbounds float, float* %tmp6556, i64 1
+ %tmp6558 = getelementptr inbounds float, float* %tmp6557, i64 1
+ %tmp6559 = getelementptr inbounds float, float* %tmp6558, i64 1
+ %tmp6560 = getelementptr inbounds float, float* %tmp6559, i64 1
+ %tmp6561 = getelementptr inbounds float, float* %tmp6560, i64 1
+ %tmp6562 = getelementptr inbounds float, float* %tmp6561, i64 1
+ %tmp6563 = getelementptr inbounds float, float* %tmp6562, i64 1
+ %tmp6564 = getelementptr inbounds float, float* %tmp6563, i64 1
+ %tmp6565 = getelementptr inbounds float, float* %tmp6564, i64 1
+ %tmp6566 = getelementptr inbounds float, float* %tmp6565, i64 1
+ %tmp6567 = getelementptr inbounds float, float* %tmp6566, i64 1
+ %tmp6568 = getelementptr inbounds float, float* %tmp6567, i64 1
+ %tmp6569 = getelementptr inbounds float, float* %tmp6568, i64 1
+ %tmp6570 = getelementptr inbounds float, float* %tmp6569, i64 1
+ %tmp6571 = getelementptr inbounds float, float* %tmp6570, i64 1
+ %tmp6572 = getelementptr inbounds float, float* %tmp6571, i64 1
+ %tmp6573 = getelementptr inbounds float, float* %tmp6572, i64 1
+ %tmp6574 = getelementptr inbounds float, float* %tmp6573, i64 1
+ %tmp6575 = getelementptr inbounds float, float* %tmp6574, i64 1
+ %tmp6576 = getelementptr inbounds float, float* %tmp6575, i64 1
+ %tmp6577 = getelementptr inbounds float, float* %tmp6576, i64 1
+ %tmp6578 = getelementptr inbounds float, float* %tmp6577, i64 1
+ %tmp6579 = getelementptr inbounds float, float* %tmp6578, i64 1
+ %tmp6580 = getelementptr inbounds float, float* %tmp6579, i64 1
+ %tmp6581 = getelementptr inbounds float, float* %tmp6580, i64 1
+ %tmp6582 = getelementptr inbounds float, float* %tmp6581, i64 1
+ %tmp6583 = getelementptr inbounds float, float* %tmp6582, i64 1
+ %tmp6584 = getelementptr inbounds float, float* %tmp6583, i64 1
+ %tmp6585 = getelementptr inbounds float, float* %tmp6584, i64 1
+ %tmp6586 = getelementptr inbounds float, float* %tmp6585, i64 1
+ %tmp6587 = getelementptr inbounds float, float* %tmp6586, i64 1
+ %tmp6588 = getelementptr inbounds float, float* %tmp6587, i64 1
+ %tmp6589 = getelementptr inbounds float, float* %tmp6588, i64 1
+ %tmp6590 = getelementptr inbounds float, float* %tmp6589, i64 1
+ %tmp6591 = getelementptr inbounds float, float* %tmp6590, i64 1
+ %tmp6592 = getelementptr inbounds float, float* %tmp6591, i64 1
+ %tmp6593 = getelementptr inbounds float, float* %tmp6592, i64 1
+ %tmp6594 = getelementptr inbounds float, float* %tmp6593, i64 1
+ %tmp6595 = getelementptr inbounds float, float* %tmp6594, i64 1
+ %tmp6596 = getelementptr inbounds float, float* %tmp6595, i64 1
+ %tmp6597 = getelementptr inbounds float, float* %tmp6596, i64 1
+ %tmp6598 = getelementptr inbounds float, float* %tmp6597, i64 1
+ %tmp6599 = getelementptr inbounds float, float* %tmp6598, i64 1
+ %tmp6600 = getelementptr inbounds float, float* %tmp6599, i64 1
+ %tmp6601 = getelementptr inbounds float, float* %tmp6600, i64 1
+ %tmp6602 = getelementptr inbounds float, float* %tmp6601, i64 1
+ %tmp6603 = getelementptr inbounds float, float* %tmp6602, i64 1
+ %tmp6604 = getelementptr inbounds float, float* %tmp6603, i64 1
+ %tmp6605 = getelementptr inbounds float, float* %tmp6604, i64 1
+ %tmp6606 = getelementptr inbounds float, float* %tmp6605, i64 1
+ %tmp6607 = getelementptr inbounds float, float* %tmp6606, i64 1
+ %tmp6608 = getelementptr inbounds float, float* %tmp6607, i64 1
+ %tmp6609 = getelementptr inbounds float, float* %tmp6608, i64 1
+ %tmp6610 = getelementptr inbounds float, float* %tmp6609, i64 1
+ %tmp6611 = getelementptr inbounds float, float* %tmp6610, i64 1
+ %tmp6612 = getelementptr inbounds float, float* %tmp6611, i64 1
+ %tmp6613 = getelementptr inbounds float, float* %tmp6612, i64 1
+ %tmp6614 = getelementptr inbounds float, float* %tmp6613, i64 1
+ %tmp6615 = getelementptr inbounds float, float* %tmp6614, i64 1
+ %tmp6616 = getelementptr inbounds float, float* %tmp6615, i64 1
+ %tmp6617 = getelementptr inbounds float, float* %tmp6616, i64 1
+ %tmp6618 = getelementptr inbounds float, float* %tmp6617, i64 1
+ %tmp6619 = getelementptr inbounds float, float* %tmp6618, i64 1
+ %tmp6620 = getelementptr inbounds float, float* %tmp6619, i64 1
+ %tmp6621 = getelementptr inbounds float, float* %tmp6620, i64 1
+ %tmp6622 = getelementptr inbounds float, float* %tmp6621, i64 1
+ %tmp6623 = getelementptr inbounds float, float* %tmp6622, i64 1
+ %tmp6624 = getelementptr inbounds float, float* %tmp6623, i64 1
+ %tmp6625 = getelementptr inbounds float, float* %tmp6624, i64 1
+ %tmp6626 = getelementptr inbounds float, float* %tmp6625, i64 1
+ %tmp6627 = getelementptr inbounds float, float* %tmp6626, i64 1
+ %tmp6628 = getelementptr inbounds float, float* %tmp6627, i64 1
+ %tmp6629 = getelementptr inbounds float, float* %tmp6628, i64 1
+ %tmp6630 = getelementptr inbounds float, float* %tmp6629, i64 1
+ %tmp6631 = getelementptr inbounds float, float* %tmp6630, i64 1
+ %tmp6632 = getelementptr inbounds float, float* %tmp6631, i64 1
+ %tmp6633 = getelementptr inbounds float, float* %tmp6632, i64 1
+ %tmp6634 = getelementptr inbounds float, float* %tmp6633, i64 1
+ %tmp6635 = getelementptr inbounds float, float* %tmp6634, i64 1
+ %tmp6636 = getelementptr inbounds float, float* %tmp6635, i64 1
+ %tmp6637 = getelementptr inbounds float, float* %tmp6636, i64 1
+ %tmp6638 = getelementptr inbounds float, float* %tmp6637, i64 1
+ %tmp6639 = getelementptr inbounds float, float* %tmp6638, i64 1
+ %tmp6640 = getelementptr inbounds float, float* %tmp6639, i64 1
+ %tmp6641 = getelementptr inbounds float, float* %tmp6640, i64 1
+ %tmp6642 = getelementptr inbounds float, float* %tmp6641, i64 1
+ %tmp6643 = getelementptr inbounds float, float* %tmp6642, i64 1
+ %tmp6644 = getelementptr inbounds float, float* %tmp6643, i64 1
+ %tmp6645 = getelementptr inbounds float, float* %tmp6644, i64 1
+ %tmp6646 = getelementptr inbounds float, float* %tmp6645, i64 1
+ %tmp6647 = getelementptr inbounds float, float* %tmp6646, i64 1
+ %tmp6648 = getelementptr inbounds float, float* %tmp6647, i64 1
+ %tmp6649 = getelementptr inbounds float, float* %tmp6648, i64 1
+ %tmp6650 = getelementptr inbounds float, float* %tmp6649, i64 1
+ %tmp6651 = getelementptr inbounds float, float* %tmp6650, i64 1
+ %tmp6652 = getelementptr inbounds float, float* %tmp6651, i64 1
+ %tmp6653 = getelementptr inbounds float, float* %tmp6652, i64 1
+ %tmp6654 = getelementptr inbounds float, float* %tmp6653, i64 1
+ %tmp6655 = getelementptr inbounds float, float* %tmp6654, i64 1
+ %tmp6656 = getelementptr inbounds float, float* %tmp6655, i64 1
+ %tmp6657 = getelementptr inbounds float, float* %tmp6656, i64 1
+ %tmp6658 = getelementptr inbounds float, float* %tmp6657, i64 1
+ %tmp6659 = getelementptr inbounds float, float* %tmp6658, i64 1
+ %tmp6660 = getelementptr inbounds float, float* %tmp6659, i64 1
+ %tmp6661 = getelementptr inbounds float, float* %tmp6660, i64 1
+ %tmp6662 = getelementptr inbounds float, float* %tmp6661, i64 1
+ %tmp6663 = getelementptr inbounds float, float* %tmp6662, i64 1
+ %tmp6664 = getelementptr inbounds float, float* %tmp6663, i64 1
+ %tmp6665 = getelementptr inbounds float, float* %tmp6664, i64 1
+ %tmp6666 = getelementptr inbounds float, float* %tmp6665, i64 1
+ %tmp6667 = getelementptr inbounds float, float* %tmp6666, i64 1
+ %tmp6668 = getelementptr inbounds float, float* %tmp6667, i64 1
+ %tmp6669 = getelementptr inbounds float, float* %tmp6668, i64 1
+ %tmp6670 = getelementptr inbounds float, float* %tmp6669, i64 1
+ %tmp6671 = getelementptr inbounds float, float* %tmp6670, i64 1
+ %tmp6672 = getelementptr inbounds float, float* %tmp6671, i64 1
+ %tmp6673 = getelementptr inbounds float, float* %tmp6672, i64 1
+ %tmp6674 = getelementptr inbounds float, float* %tmp6673, i64 1
+ %tmp6675 = getelementptr inbounds float, float* %tmp6674, i64 1
+ %tmp6676 = getelementptr inbounds float, float* %tmp6675, i64 1
+ %tmp6677 = getelementptr inbounds float, float* %tmp6676, i64 1
+ %tmp6678 = getelementptr inbounds float, float* %tmp6677, i64 1
+ %tmp6679 = getelementptr inbounds float, float* %tmp6678, i64 1
+ %tmp6680 = getelementptr inbounds float, float* %tmp6679, i64 1
+ %tmp6681 = getelementptr inbounds float, float* %tmp6680, i64 1
+ %tmp6682 = getelementptr inbounds float, float* %tmp6681, i64 1
+ %tmp6683 = getelementptr inbounds float, float* %tmp6682, i64 1
+ %tmp6684 = getelementptr inbounds float, float* %tmp6683, i64 1
+ %tmp6685 = getelementptr inbounds float, float* %tmp6684, i64 1
+ %tmp6686 = getelementptr inbounds float, float* %tmp6685, i64 1
+ %tmp6687 = getelementptr inbounds float, float* %tmp6686, i64 1
+ %tmp6688 = getelementptr inbounds float, float* %tmp6687, i64 1
+ %tmp6689 = getelementptr inbounds float, float* %tmp6688, i64 1
+ %tmp6690 = getelementptr inbounds float, float* %tmp6689, i64 1
+ %tmp6691 = getelementptr inbounds float, float* %tmp6690, i64 1
+ %tmp6692 = getelementptr inbounds float, float* %tmp6691, i64 1
+ %tmp6693 = getelementptr inbounds float, float* %tmp6692, i64 1
+ %tmp6694 = getelementptr inbounds float, float* %tmp6693, i64 1
+ %tmp6695 = getelementptr inbounds float, float* %tmp6694, i64 1
+ %tmp6696 = getelementptr inbounds float, float* %tmp6695, i64 1
+ %tmp6697 = getelementptr inbounds float, float* %tmp6696, i64 1
+ %tmp6698 = getelementptr inbounds float, float* %tmp6697, i64 1
+ %tmp6699 = getelementptr inbounds float, float* %tmp6698, i64 1
+ %tmp6700 = getelementptr inbounds float, float* %tmp6699, i64 1
+ %tmp6701 = getelementptr inbounds float, float* %tmp6700, i64 1
+ %tmp6702 = getelementptr inbounds float, float* %tmp6701, i64 1
+ %tmp6703 = getelementptr inbounds float, float* %tmp6702, i64 1
+ %tmp6704 = getelementptr inbounds float, float* %tmp6703, i64 1
+ %tmp6705 = getelementptr inbounds float, float* %tmp6704, i64 1
+ %tmp6706 = getelementptr inbounds float, float* %tmp6705, i64 1
+ %tmp6707 = getelementptr inbounds float, float* %tmp6706, i64 1
+ %tmp6708 = getelementptr inbounds float, float* %tmp6707, i64 1
+ %tmp6709 = getelementptr inbounds float, float* %tmp6708, i64 1
+ %tmp6710 = getelementptr inbounds float, float* %tmp6709, i64 1
+ %tmp6711 = getelementptr inbounds float, float* %tmp6710, i64 1
+ %tmp6712 = getelementptr inbounds float, float* %tmp6711, i64 1
+ %tmp6713 = getelementptr inbounds float, float* %tmp6712, i64 1
+ %tmp6714 = getelementptr inbounds float, float* %tmp6713, i64 1
+ %tmp6715 = getelementptr inbounds float, float* %tmp6714, i64 1
+ %tmp6716 = getelementptr inbounds float, float* %tmp6715, i64 1
+ %tmp6717 = getelementptr inbounds float, float* %tmp6716, i64 1
+ %tmp6718 = getelementptr inbounds float, float* %tmp6717, i64 1
+ %tmp6719 = getelementptr inbounds float, float* %tmp6718, i64 1
+ %tmp6720 = getelementptr inbounds float, float* %tmp6719, i64 1
+ %tmp6721 = getelementptr inbounds float, float* %tmp6720, i64 1
+ %tmp6722 = getelementptr inbounds float, float* %tmp6721, i64 1
+ %tmp6723 = getelementptr inbounds float, float* %tmp6722, i64 1
+ %tmp6724 = getelementptr inbounds float, float* %tmp6723, i64 1
+ %tmp6725 = getelementptr inbounds float, float* %tmp6724, i64 1
+ %tmp6726 = getelementptr inbounds float, float* %tmp6725, i64 1
+ %tmp6727 = getelementptr inbounds float, float* %tmp6726, i64 1
+ %tmp6728 = getelementptr inbounds float, float* %tmp6727, i64 1
+ %tmp6729 = getelementptr inbounds float, float* %tmp6728, i64 1
+ %tmp6730 = getelementptr inbounds float, float* %tmp6729, i64 1
+ %tmp6731 = getelementptr inbounds float, float* %tmp6730, i64 1
+ %tmp6732 = getelementptr inbounds float, float* %tmp6731, i64 1
+ %tmp6733 = getelementptr inbounds float, float* %tmp6732, i64 1
+ %tmp6734 = getelementptr inbounds float, float* %tmp6733, i64 1
+ %tmp6735 = getelementptr inbounds float, float* %tmp6734, i64 1
+ %tmp6736 = getelementptr inbounds float, float* %tmp6735, i64 1
+ %tmp6737 = getelementptr inbounds float, float* %tmp6736, i64 1
+ %tmp6738 = getelementptr inbounds float, float* %tmp6737, i64 1
+ %tmp6739 = getelementptr inbounds float, float* %tmp6738, i64 1
+ %tmp6740 = getelementptr inbounds float, float* %tmp6739, i64 1
+ %tmp6741 = getelementptr inbounds float, float* %tmp6740, i64 1
+ %tmp6742 = getelementptr inbounds float, float* %tmp6741, i64 1
+ %tmp6743 = getelementptr inbounds float, float* %tmp6742, i64 1
+ %tmp6744 = getelementptr inbounds float, float* %tmp6743, i64 1
+ %tmp6745 = getelementptr inbounds float, float* %tmp6744, i64 1
+ %tmp6746 = getelementptr inbounds float, float* %tmp6745, i64 1
+ %tmp6747 = getelementptr inbounds float, float* %tmp6746, i64 1
+ %tmp6748 = getelementptr inbounds float, float* %tmp6747, i64 1
+ %tmp6749 = getelementptr inbounds float, float* %tmp6748, i64 1
+ %tmp6750 = getelementptr inbounds float, float* %tmp6749, i64 1
+ %tmp6751 = getelementptr inbounds float, float* %tmp6750, i64 1
+ %tmp6752 = getelementptr inbounds float, float* %tmp6751, i64 1
+ %tmp6753 = getelementptr inbounds float, float* %tmp6752, i64 1
+ %tmp6754 = getelementptr inbounds float, float* %tmp6753, i64 1
+ %tmp6755 = getelementptr inbounds float, float* %tmp6754, i64 1
+ %tmp6756 = getelementptr inbounds float, float* %tmp6755, i64 1
+ %tmp6757 = getelementptr inbounds float, float* %tmp6756, i64 1
+ %tmp6758 = getelementptr inbounds float, float* %tmp6757, i64 1
+ %tmp6759 = getelementptr inbounds float, float* %tmp6758, i64 1
+ %tmp6760 = getelementptr inbounds float, float* %tmp6759, i64 1
+ %tmp6761 = getelementptr inbounds float, float* %tmp6760, i64 1
+ %tmp6762 = getelementptr inbounds float, float* %tmp6761, i64 1
+ %tmp6763 = getelementptr inbounds float, float* %tmp6762, i64 1
+ %tmp6764 = getelementptr inbounds float, float* %tmp6763, i64 1
+ %tmp6765 = getelementptr inbounds float, float* %tmp6764, i64 1
+ %tmp6766 = getelementptr inbounds float, float* %tmp6765, i64 1
+ %tmp6767 = getelementptr inbounds float, float* %tmp6766, i64 1
+ %tmp6768 = getelementptr inbounds float, float* %tmp6767, i64 1
+ %tmp6769 = getelementptr inbounds float, float* %tmp6768, i64 1
+ %tmp6770 = getelementptr inbounds float, float* %tmp6769, i64 1
+ %tmp6771 = getelementptr inbounds float, float* %tmp6770, i64 1
+ %tmp6772 = getelementptr inbounds float, float* %tmp6771, i64 1
+ %tmp6773 = getelementptr inbounds float, float* %tmp6772, i64 1
+ %tmp6774 = getelementptr inbounds float, float* %tmp6773, i64 1
+ %tmp6775 = getelementptr inbounds float, float* %tmp6774, i64 1
+ %tmp6776 = getelementptr inbounds float, float* %tmp6775, i64 1
+ %tmp6777 = getelementptr inbounds float, float* %tmp6776, i64 1
+ %tmp6778 = getelementptr inbounds float, float* %tmp6777, i64 1
+ %tmp6779 = getelementptr inbounds float, float* %tmp6778, i64 1
+ %tmp6780 = getelementptr inbounds float, float* %tmp6779, i64 1
+ %tmp6781 = getelementptr inbounds float, float* %tmp6780, i64 1
+ %tmp6782 = getelementptr inbounds float, float* %tmp6781, i64 1
+ %tmp6783 = getelementptr inbounds float, float* %tmp6782, i64 1
+ %tmp6784 = getelementptr inbounds float, float* %tmp6783, i64 1
+ %tmp6785 = getelementptr inbounds float, float* %tmp6784, i64 1
+ %tmp6786 = getelementptr inbounds float, float* %tmp6785, i64 1
+ %tmp6787 = getelementptr inbounds float, float* %tmp6786, i64 1
+ %tmp6788 = getelementptr inbounds float, float* %tmp6787, i64 1
+ %tmp6789 = getelementptr inbounds float, float* %tmp6788, i64 1
+ %tmp6790 = getelementptr inbounds float, float* %tmp6789, i64 1
+ %tmp6791 = getelementptr inbounds float, float* %tmp6790, i64 1
+ %tmp6792 = getelementptr inbounds float, float* %tmp6791, i64 1
+ %tmp6793 = getelementptr inbounds float, float* %tmp6792, i64 1
+ %tmp6794 = getelementptr inbounds float, float* %tmp6793, i64 1
+ %tmp6795 = getelementptr inbounds float, float* %tmp6794, i64 1
+ %tmp6796 = getelementptr inbounds float, float* %tmp6795, i64 1
+ %tmp6797 = getelementptr inbounds float, float* %tmp6796, i64 1
+ %tmp6798 = getelementptr inbounds float, float* %tmp6797, i64 1
+ %tmp6799 = getelementptr inbounds float, float* %tmp6798, i64 1
+ %tmp6800 = getelementptr inbounds float, float* %tmp6799, i64 1
+ %tmp6801 = getelementptr inbounds float, float* %tmp6800, i64 1
+ %tmp6802 = getelementptr inbounds float, float* %tmp6801, i64 1
+ %tmp6803 = getelementptr inbounds float, float* %tmp6802, i64 1
+ %tmp6804 = getelementptr inbounds float, float* %tmp6803, i64 1
+ %tmp6805 = getelementptr inbounds float, float* %tmp6804, i64 1
+ %tmp6806 = getelementptr inbounds float, float* %tmp6805, i64 1
+ %tmp6807 = getelementptr inbounds float, float* %tmp6806, i64 1
+ %tmp6808 = getelementptr inbounds float, float* %tmp6807, i64 1
+ %tmp6809 = getelementptr inbounds float, float* %tmp6808, i64 1
+ %tmp6810 = getelementptr inbounds float, float* %tmp6809, i64 1
+ %tmp6811 = getelementptr inbounds float, float* %tmp6810, i64 1
+ %tmp6812 = getelementptr inbounds float, float* %tmp6811, i64 1
+ %tmp6813 = getelementptr inbounds float, float* %tmp6812, i64 1
+ %tmp6814 = getelementptr inbounds float, float* %tmp6813, i64 1
+ %tmp6815 = getelementptr inbounds float, float* %tmp6814, i64 1
+ %tmp6816 = getelementptr inbounds float, float* %tmp6815, i64 1
+ %tmp6817 = getelementptr inbounds float, float* %tmp6816, i64 1
+ %tmp6818 = getelementptr inbounds float, float* %tmp6817, i64 1
+ %tmp6819 = getelementptr inbounds float, float* %tmp6818, i64 1
+ %tmp6820 = getelementptr inbounds float, float* %tmp6819, i64 1
+ %tmp6821 = getelementptr inbounds float, float* %tmp6820, i64 1
+ %tmp6822 = getelementptr inbounds float, float* %tmp6821, i64 1
+ %tmp6823 = getelementptr inbounds float, float* %tmp6822, i64 1
+ %tmp6824 = getelementptr inbounds float, float* %tmp6823, i64 1
+ %tmp6825 = getelementptr inbounds float, float* %tmp6824, i64 1
+ %tmp6826 = getelementptr inbounds float, float* %tmp6825, i64 1
+ %tmp6827 = getelementptr inbounds float, float* %tmp6826, i64 1
+ %tmp6828 = getelementptr inbounds float, float* %tmp6827, i64 1
+ %tmp6829 = getelementptr inbounds float, float* %tmp6828, i64 1
+ %tmp6830 = getelementptr inbounds float, float* %tmp6829, i64 1
+ %tmp6831 = getelementptr inbounds float, float* %tmp6830, i64 1
+ %tmp6832 = getelementptr inbounds float, float* %tmp6831, i64 1
+ %tmp6833 = getelementptr inbounds float, float* %tmp6832, i64 1
+ %tmp6834 = getelementptr inbounds float, float* %tmp6833, i64 1
+ %tmp6835 = getelementptr inbounds float, float* %tmp6834, i64 1
+ %tmp6836 = getelementptr inbounds float, float* %tmp6835, i64 1
+ %tmp6837 = getelementptr inbounds float, float* %tmp6836, i64 1
+ %tmp6838 = getelementptr inbounds float, float* %tmp6837, i64 1
+ %tmp6839 = getelementptr inbounds float, float* %tmp6838, i64 1
+ %tmp6840 = getelementptr inbounds float, float* %tmp6839, i64 1
+ %tmp6841 = getelementptr inbounds float, float* %tmp6840, i64 1
+ %tmp6842 = getelementptr inbounds float, float* %tmp6841, i64 1
+ %tmp6843 = getelementptr inbounds float, float* %tmp6842, i64 1
+ %tmp6844 = getelementptr inbounds float, float* %tmp6843, i64 1
+ %tmp6845 = getelementptr inbounds float, float* %tmp6844, i64 1
+ %tmp6846 = getelementptr inbounds float, float* %tmp6845, i64 1
+ %tmp6847 = getelementptr inbounds float, float* %tmp6846, i64 1
+ %tmp6848 = getelementptr inbounds float, float* %tmp6847, i64 1
+ %tmp6849 = getelementptr inbounds float, float* %tmp6848, i64 1
+ %tmp6850 = getelementptr inbounds float, float* %tmp6849, i64 1
+ %tmp6851 = getelementptr inbounds float, float* %tmp6850, i64 1
+ %tmp6852 = getelementptr inbounds float, float* %tmp6851, i64 1
+ %tmp6853 = getelementptr inbounds float, float* %tmp6852, i64 1
+ %tmp6854 = getelementptr inbounds float, float* %tmp6853, i64 1
+ %tmp6855 = getelementptr inbounds float, float* %tmp6854, i64 1
+ %tmp6856 = getelementptr inbounds float, float* %tmp6855, i64 1
+ %tmp6857 = getelementptr inbounds float, float* %tmp6856, i64 1
+ %tmp6858 = getelementptr inbounds float, float* %tmp6857, i64 1
+ %tmp6859 = getelementptr inbounds float, float* %tmp6858, i64 1
+ %tmp6860 = getelementptr inbounds float, float* %tmp6859, i64 1
+ %tmp6861 = getelementptr inbounds float, float* %tmp6860, i64 1
+ %tmp6862 = getelementptr inbounds float, float* %tmp6861, i64 1
+ %tmp6863 = getelementptr inbounds float, float* %tmp6862, i64 1
+ %tmp6864 = getelementptr inbounds float, float* %tmp6863, i64 1
+ %tmp6865 = getelementptr inbounds float, float* %tmp6864, i64 1
+ %tmp6866 = getelementptr inbounds float, float* %tmp6865, i64 1
+ %tmp6867 = getelementptr inbounds float, float* %tmp6866, i64 1
+ %tmp6868 = getelementptr inbounds float, float* %tmp6867, i64 1
+ %tmp6869 = getelementptr inbounds float, float* %tmp6868, i64 1
+ %tmp6870 = getelementptr inbounds float, float* %tmp6869, i64 1
+ %tmp6871 = getelementptr inbounds float, float* %tmp6870, i64 1
+ %tmp6872 = getelementptr inbounds float, float* %tmp6871, i64 1
+ %tmp6873 = getelementptr inbounds float, float* %tmp6872, i64 1
+ %tmp6874 = getelementptr inbounds float, float* %tmp6873, i64 1
+ %tmp6875 = getelementptr inbounds float, float* %tmp6874, i64 1
+ %tmp6876 = getelementptr inbounds float, float* %tmp6875, i64 1
+ %tmp6877 = getelementptr inbounds float, float* %tmp6876, i64 1
+ %tmp6878 = getelementptr inbounds float, float* %tmp6877, i64 1
+ %tmp6879 = getelementptr inbounds float, float* %tmp6878, i64 1
+ %tmp6880 = getelementptr inbounds float, float* %tmp6879, i64 1
+ %tmp6881 = getelementptr inbounds float, float* %tmp6880, i64 1
+ %tmp6882 = getelementptr inbounds float, float* %tmp6881, i64 1
+ %tmp6883 = getelementptr inbounds float, float* %tmp6882, i64 1
+ %tmp6884 = getelementptr inbounds float, float* %tmp6883, i64 1
+ %tmp6885 = getelementptr inbounds float, float* %tmp6884, i64 1
+ %tmp6886 = getelementptr inbounds float, float* %tmp6885, i64 1
+ %tmp6887 = getelementptr inbounds float, float* %tmp6886, i64 1
+ %tmp6888 = getelementptr inbounds float, float* %tmp6887, i64 1
+ %tmp6889 = getelementptr inbounds float, float* %tmp6888, i64 1
+ %tmp6890 = getelementptr inbounds float, float* %tmp6889, i64 1
+ %tmp6891 = getelementptr inbounds float, float* %tmp6890, i64 1
+ %tmp6892 = getelementptr inbounds float, float* %tmp6891, i64 1
+ %tmp6893 = getelementptr inbounds float, float* %tmp6892, i64 1
+ %tmp6894 = getelementptr inbounds float, float* %tmp6893, i64 1
+ %tmp6895 = getelementptr inbounds float, float* %tmp6894, i64 1
+ %tmp6896 = getelementptr inbounds float, float* %tmp6895, i64 1
+ %tmp6897 = getelementptr inbounds float, float* %tmp6896, i64 1
+ %tmp6898 = getelementptr inbounds float, float* %tmp6897, i64 1
+ %tmp6899 = getelementptr inbounds float, float* %tmp6898, i64 1
+ %tmp6900 = getelementptr inbounds float, float* %tmp6899, i64 1
+ %tmp6901 = getelementptr inbounds float, float* %tmp6900, i64 1
+ %tmp6902 = getelementptr inbounds float, float* %tmp6901, i64 1
+ %tmp6903 = getelementptr inbounds float, float* %tmp6902, i64 1
+ %tmp6904 = getelementptr inbounds float, float* %tmp6903, i64 1
+ %tmp6905 = getelementptr inbounds float, float* %tmp6904, i64 1
+ %tmp6906 = getelementptr inbounds float, float* %tmp6905, i64 1
+ %tmp6907 = getelementptr inbounds float, float* %tmp6906, i64 1
+ %tmp6908 = getelementptr inbounds float, float* %tmp6907, i64 1
+ %tmp6909 = getelementptr inbounds float, float* %tmp6908, i64 1
+ %tmp6910 = getelementptr inbounds float, float* %tmp6909, i64 1
+ %tmp6911 = getelementptr inbounds float, float* %tmp6910, i64 1
+ %tmp6912 = getelementptr inbounds float, float* %tmp6911, i64 1
+ %tmp6913 = getelementptr inbounds float, float* %tmp6912, i64 1
+ %tmp6914 = getelementptr inbounds float, float* %tmp6913, i64 1
+ %tmp6915 = getelementptr inbounds float, float* %tmp6914, i64 1
+ %tmp6916 = getelementptr inbounds float, float* %tmp6915, i64 1
+ %tmp6917 = getelementptr inbounds float, float* %tmp6916, i64 1
+ %tmp6918 = getelementptr inbounds float, float* %tmp6917, i64 1
+ %tmp6919 = getelementptr inbounds float, float* %tmp6918, i64 1
+ %tmp6920 = getelementptr inbounds float, float* %tmp6919, i64 1
+ %tmp6921 = getelementptr inbounds float, float* %tmp6920, i64 1
+ %tmp6922 = getelementptr inbounds float, float* %tmp6921, i64 1
+ %tmp6923 = getelementptr inbounds float, float* %tmp6922, i64 1
+ %tmp6924 = getelementptr inbounds float, float* %tmp6923, i64 1
+ %tmp6925 = getelementptr inbounds float, float* %tmp6924, i64 1
+ %tmp6926 = getelementptr inbounds float, float* %tmp6925, i64 1
+ %tmp6927 = getelementptr inbounds float, float* %tmp6926, i64 1
+ %tmp6928 = getelementptr inbounds float, float* %tmp6927, i64 1
+ %tmp6929 = getelementptr inbounds float, float* %tmp6928, i64 1
+ %tmp6930 = getelementptr inbounds float, float* %tmp6929, i64 1
+ %tmp6931 = getelementptr inbounds float, float* %tmp6930, i64 1
+ %tmp6932 = getelementptr inbounds float, float* %tmp6931, i64 1
+ %tmp6933 = getelementptr inbounds float, float* %tmp6932, i64 1
+ %tmp6934 = getelementptr inbounds float, float* %tmp6933, i64 1
+ %tmp6935 = getelementptr inbounds float, float* %tmp6934, i64 1
+ %tmp6936 = getelementptr inbounds float, float* %tmp6935, i64 1
+ %tmp6937 = getelementptr inbounds float, float* %tmp6936, i64 1
+ %tmp6938 = getelementptr inbounds float, float* %tmp6937, i64 1
+ %tmp6939 = getelementptr inbounds float, float* %tmp6938, i64 1
+ %tmp6940 = getelementptr inbounds float, float* %tmp6939, i64 1
+ %tmp6941 = getelementptr inbounds float, float* %tmp6940, i64 1
+ %tmp6942 = getelementptr inbounds float, float* %tmp6941, i64 1
+ %tmp6943 = getelementptr inbounds float, float* %tmp6942, i64 1
+ %tmp6944 = getelementptr inbounds float, float* %tmp6943, i64 1
+ %tmp6945 = getelementptr inbounds float, float* %tmp6944, i64 1
+ %tmp6946 = getelementptr inbounds float, float* %tmp6945, i64 1
+ %tmp6947 = getelementptr inbounds float, float* %tmp6946, i64 1
+ %tmp6948 = getelementptr inbounds float, float* %tmp6947, i64 1
+ %tmp6949 = getelementptr inbounds float, float* %tmp6948, i64 1
+ %tmp6950 = getelementptr inbounds float, float* %tmp6949, i64 1
+ %tmp6951 = getelementptr inbounds float, float* %tmp6950, i64 1
+ %tmp6952 = getelementptr inbounds float, float* %tmp6951, i64 1
+ %tmp6953 = getelementptr inbounds float, float* %tmp6952, i64 1
+ %tmp6954 = getelementptr inbounds float, float* %tmp6953, i64 1
+ %tmp6955 = getelementptr inbounds float, float* %tmp6954, i64 1
+ %tmp6956 = getelementptr inbounds float, float* %tmp6955, i64 1
+ %tmp6957 = getelementptr inbounds float, float* %tmp6956, i64 1
+ %tmp6958 = getelementptr inbounds float, float* %tmp6957, i64 1
+ %tmp6959 = getelementptr inbounds float, float* %tmp6958, i64 1
+ %tmp6960 = getelementptr inbounds float, float* %tmp6959, i64 1
+ %tmp6961 = getelementptr inbounds float, float* %tmp6960, i64 1
+ %tmp6962 = getelementptr inbounds float, float* %tmp6961, i64 1
+ %tmp6963 = getelementptr inbounds float, float* %tmp6962, i64 1
+ %tmp6964 = getelementptr inbounds float, float* %tmp6963, i64 1
+ %tmp6965 = getelementptr inbounds float, float* %tmp6964, i64 1
+ %tmp6966 = getelementptr inbounds float, float* %tmp6965, i64 1
+ %tmp6967 = getelementptr inbounds float, float* %tmp6966, i64 1
+ %tmp6968 = getelementptr inbounds float, float* %tmp6967, i64 1
+ %tmp6969 = getelementptr inbounds float, float* %tmp6968, i64 1
+ %tmp6970 = getelementptr inbounds float, float* %tmp6969, i64 1
+ %tmp6971 = getelementptr inbounds float, float* %tmp6970, i64 1
+ %tmp6972 = getelementptr inbounds float, float* %tmp6971, i64 1
+ %tmp6973 = getelementptr inbounds float, float* %tmp6972, i64 1
+ %tmp6974 = getelementptr inbounds float, float* %tmp6973, i64 1
+ %tmp6975 = getelementptr inbounds float, float* %tmp6974, i64 1
+ %tmp6976 = getelementptr inbounds float, float* %tmp6975, i64 1
+ %tmp6977 = getelementptr inbounds float, float* %tmp6976, i64 1
+ %tmp6978 = getelementptr inbounds float, float* %tmp6977, i64 1
+ %tmp6979 = getelementptr inbounds float, float* %tmp6978, i64 1
+ %tmp6980 = getelementptr inbounds float, float* %tmp6979, i64 1
+ %tmp6981 = getelementptr inbounds float, float* %tmp6980, i64 1
+ %tmp6982 = getelementptr inbounds float, float* %tmp6981, i64 1
+ %tmp6983 = getelementptr inbounds float, float* %tmp6982, i64 1
+ %tmp6984 = getelementptr inbounds float, float* %tmp6983, i64 1
+ %tmp6985 = getelementptr inbounds float, float* %tmp6984, i64 1
+ %tmp6986 = getelementptr inbounds float, float* %tmp6985, i64 1
+ %tmp6987 = getelementptr inbounds float, float* %tmp6986, i64 1
+ %tmp6988 = getelementptr inbounds float, float* %tmp6987, i64 1
+ %tmp6989 = getelementptr inbounds float, float* %tmp6988, i64 1
+ %tmp6990 = getelementptr inbounds float, float* %tmp6989, i64 1
+ %tmp6991 = getelementptr inbounds float, float* %tmp6990, i64 1
+ %tmp6992 = getelementptr inbounds float, float* %tmp6991, i64 1
+ %tmp6993 = getelementptr inbounds float, float* %tmp6992, i64 1
+ %tmp6994 = getelementptr inbounds float, float* %tmp6993, i64 1
+ %tmp6995 = getelementptr inbounds float, float* %tmp6994, i64 1
+ %tmp6996 = getelementptr inbounds float, float* %tmp6995, i64 1
+ %tmp6997 = getelementptr inbounds float, float* %tmp6996, i64 1
+ %tmp6998 = getelementptr inbounds float, float* %tmp6997, i64 1
+ %tmp6999 = getelementptr inbounds float, float* %tmp6998, i64 1
+ %tmp7000 = getelementptr inbounds float, float* %tmp6999, i64 1
+ %tmp7001 = getelementptr inbounds float, float* %tmp7000, i64 1
+ %tmp7002 = getelementptr inbounds float, float* %tmp7001, i64 1
+ %tmp7003 = getelementptr inbounds float, float* %tmp7002, i64 1
+ %tmp7004 = getelementptr inbounds float, float* %tmp7003, i64 1
+ %tmp7005 = getelementptr inbounds float, float* %tmp7004, i64 1
+ %tmp7006 = getelementptr inbounds float, float* %tmp7005, i64 1
+ %tmp7007 = getelementptr inbounds float, float* %tmp7006, i64 1
+ %tmp7008 = getelementptr inbounds float, float* %tmp7007, i64 1
+ %tmp7009 = getelementptr inbounds float, float* %tmp7008, i64 1
+ %tmp7010 = getelementptr inbounds float, float* %tmp7009, i64 1
+ %tmp7011 = getelementptr inbounds float, float* %tmp7010, i64 1
+ %tmp7012 = getelementptr inbounds float, float* %tmp7011, i64 1
+ %tmp7013 = getelementptr inbounds float, float* %tmp7012, i64 1
+ %tmp7014 = getelementptr inbounds float, float* %tmp7013, i64 1
+ %tmp7015 = getelementptr inbounds float, float* %tmp7014, i64 1
+ %tmp7016 = getelementptr inbounds float, float* %tmp7015, i64 1
+ %tmp7017 = getelementptr inbounds float, float* %tmp7016, i64 1
+ %tmp7018 = getelementptr inbounds float, float* %tmp7017, i64 1
+ %tmp7019 = getelementptr inbounds float, float* %tmp7018, i64 1
+ %tmp7020 = getelementptr inbounds float, float* %tmp7019, i64 1
+ %tmp7021 = getelementptr inbounds float, float* %tmp7020, i64 1
+ %tmp7022 = getelementptr inbounds float, float* %tmp7021, i64 1
+ %tmp7023 = getelementptr inbounds float, float* %tmp7022, i64 1
+ %tmp7024 = getelementptr inbounds float, float* %tmp7023, i64 1
+ %tmp7025 = getelementptr inbounds float, float* %tmp7024, i64 1
+ %tmp7026 = getelementptr inbounds float, float* %tmp7025, i64 1
+ %tmp7027 = getelementptr inbounds float, float* %tmp7026, i64 1
+ %tmp7028 = getelementptr inbounds float, float* %tmp7027, i64 1
+ %tmp7029 = getelementptr inbounds float, float* %tmp7028, i64 1
+ %tmp7030 = getelementptr inbounds float, float* %tmp7029, i64 1
+ %tmp7031 = getelementptr inbounds float, float* %tmp7030, i64 1
+ %tmp7032 = getelementptr inbounds float, float* %tmp7031, i64 1
+ %tmp7033 = getelementptr inbounds float, float* %tmp7032, i64 1
+ %tmp7034 = getelementptr inbounds float, float* %tmp7033, i64 1
+ %tmp7035 = getelementptr inbounds float, float* %tmp7034, i64 1
+ %tmp7036 = getelementptr inbounds float, float* %tmp7035, i64 1
+ %tmp7037 = getelementptr inbounds float, float* %tmp7036, i64 1
+ %tmp7038 = getelementptr inbounds float, float* %tmp7037, i64 1
+ %tmp7039 = getelementptr inbounds float, float* %tmp7038, i64 1
+ %tmp7040 = getelementptr inbounds float, float* %tmp7039, i64 1
+ %tmp7041 = getelementptr inbounds float, float* %tmp7040, i64 1
+ %tmp7042 = getelementptr inbounds float, float* %tmp7041, i64 1
+ %tmp7043 = getelementptr inbounds float, float* %tmp7042, i64 1
+ %tmp7044 = getelementptr inbounds float, float* %tmp7043, i64 1
+ %tmp7045 = getelementptr inbounds float, float* %tmp7044, i64 1
+ %tmp7046 = getelementptr inbounds float, float* %tmp7045, i64 1
+ %tmp7047 = getelementptr inbounds float, float* %tmp7046, i64 1
+ %tmp7048 = getelementptr inbounds float, float* %tmp7047, i64 1
+ %tmp7049 = getelementptr inbounds float, float* %tmp7048, i64 1
+ %tmp7050 = getelementptr inbounds float, float* %tmp7049, i64 1
+ %tmp7051 = getelementptr inbounds float, float* %tmp7050, i64 1
+ %tmp7052 = getelementptr inbounds float, float* %tmp7051, i64 1
+ %tmp7053 = getelementptr inbounds float, float* %tmp7052, i64 1
+ %tmp7054 = getelementptr inbounds float, float* %tmp7053, i64 1
+ %tmp7055 = getelementptr inbounds float, float* %tmp7054, i64 1
+ %tmp7056 = getelementptr inbounds float, float* %tmp7055, i64 1
+ %tmp7057 = getelementptr inbounds float, float* %tmp7056, i64 1
+ %tmp7058 = getelementptr inbounds float, float* %tmp7057, i64 1
+ %tmp7059 = getelementptr inbounds float, float* %tmp7058, i64 1
+ %tmp7060 = getelementptr inbounds float, float* %tmp7059, i64 1
+ %tmp7061 = getelementptr inbounds float, float* %tmp7060, i64 1
+ %tmp7062 = getelementptr inbounds float, float* %tmp7061, i64 1
+ %tmp7063 = getelementptr inbounds float, float* %tmp7062, i64 1
+ %tmp7064 = getelementptr inbounds float, float* %tmp7063, i64 1
+ %tmp7065 = getelementptr inbounds float, float* %tmp7064, i64 1
+ %tmp7066 = getelementptr inbounds float, float* %tmp7065, i64 1
+ %tmp7067 = getelementptr inbounds float, float* %tmp7066, i64 1
+ %tmp7068 = getelementptr inbounds float, float* %tmp7067, i64 1
+ %tmp7069 = getelementptr inbounds float, float* %tmp7068, i64 1
+ %tmp7070 = getelementptr inbounds float, float* %tmp7069, i64 1
+ %tmp7071 = getelementptr inbounds float, float* %tmp7070, i64 1
+ %tmp7072 = getelementptr inbounds float, float* %tmp7071, i64 1
+ %tmp7073 = getelementptr inbounds float, float* %tmp7072, i64 1
+ %tmp7074 = getelementptr inbounds float, float* %tmp7073, i64 1
+ %tmp7075 = getelementptr inbounds float, float* %tmp7074, i64 1
+ %tmp7076 = getelementptr inbounds float, float* %tmp7075, i64 1
+ %tmp7077 = getelementptr inbounds float, float* %tmp7076, i64 1
+ %tmp7078 = getelementptr inbounds float, float* %tmp7077, i64 1
+ %tmp7079 = getelementptr inbounds float, float* %tmp7078, i64 1
+ %tmp7080 = getelementptr inbounds float, float* %tmp7079, i64 1
+ %tmp7081 = getelementptr inbounds float, float* %tmp7080, i64 1
+ %tmp7082 = getelementptr inbounds float, float* %tmp7081, i64 1
+ %tmp7083 = getelementptr inbounds float, float* %tmp7082, i64 1
+ %tmp7084 = getelementptr inbounds float, float* %tmp7083, i64 1
+ %tmp7085 = getelementptr inbounds float, float* %tmp7084, i64 1
+ %tmp7086 = getelementptr inbounds float, float* %tmp7085, i64 1
+ %tmp7087 = getelementptr inbounds float, float* %tmp7086, i64 1
+ %tmp7088 = getelementptr inbounds float, float* %tmp7087, i64 1
+ %tmp7089 = getelementptr inbounds float, float* %tmp7088, i64 1
+ %tmp7090 = getelementptr inbounds float, float* %tmp7089, i64 1
+ %tmp7091 = getelementptr inbounds float, float* %tmp7090, i64 1
+ %tmp7092 = getelementptr inbounds float, float* %tmp7091, i64 1
+ %tmp7093 = getelementptr inbounds float, float* %tmp7092, i64 1
+ %tmp7094 = getelementptr inbounds float, float* %tmp7093, i64 1
+ %tmp7095 = getelementptr inbounds float, float* %tmp7094, i64 1
+ %tmp7096 = getelementptr inbounds float, float* %tmp7095, i64 1
+ %tmp7097 = getelementptr inbounds float, float* %tmp7096, i64 1
+ %tmp7098 = getelementptr inbounds float, float* %tmp7097, i64 1
+ %tmp7099 = getelementptr inbounds float, float* %tmp7098, i64 1
+ %tmp7100 = getelementptr inbounds float, float* %tmp7099, i64 1
+ %tmp7101 = getelementptr inbounds float, float* %tmp7100, i64 1
+ %tmp7102 = getelementptr inbounds float, float* %tmp7101, i64 1
+ %tmp7103 = getelementptr inbounds float, float* %tmp7102, i64 1
+ %tmp7104 = getelementptr inbounds float, float* %tmp7103, i64 1
+ %tmp7105 = getelementptr inbounds float, float* %tmp7104, i64 1
+ %tmp7106 = getelementptr inbounds float, float* %tmp7105, i64 1
+ %tmp7107 = getelementptr inbounds float, float* %tmp7106, i64 1
+ %tmp7108 = getelementptr inbounds float, float* %tmp7107, i64 1
+ %tmp7109 = getelementptr inbounds float, float* %tmp7108, i64 1
+ %tmp7110 = getelementptr inbounds float, float* %tmp7109, i64 1
+ %tmp7111 = getelementptr inbounds float, float* %tmp7110, i64 1
+ %tmp7112 = getelementptr inbounds float, float* %tmp7111, i64 1
+ %tmp7113 = getelementptr inbounds float, float* %tmp7112, i64 1
+ %tmp7114 = getelementptr inbounds float, float* %tmp7113, i64 1
+ %tmp7115 = getelementptr inbounds float, float* %tmp7114, i64 1
+ %tmp7116 = getelementptr inbounds float, float* %tmp7115, i64 1
+ %tmp7117 = getelementptr inbounds float, float* %tmp7116, i64 1
+ %tmp7118 = getelementptr inbounds float, float* %tmp7117, i64 1
+ %tmp7119 = getelementptr inbounds float, float* %tmp7118, i64 1
+ %tmp7120 = getelementptr inbounds float, float* %tmp7119, i64 1
+ %tmp7121 = getelementptr inbounds float, float* %tmp7120, i64 1
+ %tmp7122 = getelementptr inbounds float, float* %tmp7121, i64 1
+ %tmp7123 = getelementptr inbounds float, float* %tmp7122, i64 1
+ %tmp7124 = getelementptr inbounds float, float* %tmp7123, i64 1
+ %tmp7125 = getelementptr inbounds float, float* %tmp7124, i64 1
+ %tmp7126 = getelementptr inbounds float, float* %tmp7125, i64 1
+ %tmp7127 = getelementptr inbounds float, float* %tmp7126, i64 1
+ %tmp7128 = getelementptr inbounds float, float* %tmp7127, i64 1
+ %tmp7129 = getelementptr inbounds float, float* %tmp7128, i64 1
+ %tmp7130 = getelementptr inbounds float, float* %tmp7129, i64 1
+ %tmp7131 = getelementptr inbounds float, float* %tmp7130, i64 1
+ %tmp7132 = getelementptr inbounds float, float* %tmp7131, i64 1
+ %tmp7133 = getelementptr inbounds float, float* %tmp7132, i64 1
+ %tmp7134 = getelementptr inbounds float, float* %tmp7133, i64 1
+ %tmp7135 = getelementptr inbounds float, float* %tmp7134, i64 1
+ %tmp7136 = getelementptr inbounds float, float* %tmp7135, i64 1
+ %tmp7137 = getelementptr inbounds float, float* %tmp7136, i64 1
+ %tmp7138 = getelementptr inbounds float, float* %tmp7137, i64 1
+ %tmp7139 = getelementptr inbounds float, float* %tmp7138, i64 1
+ %tmp7140 = getelementptr inbounds float, float* %tmp7139, i64 1
+ %tmp7141 = getelementptr inbounds float, float* %tmp7140, i64 1
+ %tmp7142 = getelementptr inbounds float, float* %tmp7141, i64 1
+ %tmp7143 = getelementptr inbounds float, float* %tmp7142, i64 1
+ %tmp7144 = getelementptr inbounds float, float* %tmp7143, i64 1
+ %tmp7145 = getelementptr inbounds float, float* %tmp7144, i64 1
+ %tmp7146 = getelementptr inbounds float, float* %tmp7145, i64 1
+ %tmp7147 = getelementptr inbounds float, float* %tmp7146, i64 1
+ %tmp7148 = getelementptr inbounds float, float* %tmp7147, i64 1
+ %tmp7149 = getelementptr inbounds float, float* %tmp7148, i64 1
+ %tmp7150 = getelementptr inbounds float, float* %tmp7149, i64 1
+ %tmp7151 = getelementptr inbounds float, float* %tmp7150, i64 1
+ %tmp7152 = getelementptr inbounds float, float* %tmp7151, i64 1
+ %tmp7153 = getelementptr inbounds float, float* %tmp7152, i64 1
+ %tmp7154 = getelementptr inbounds float, float* %tmp7153, i64 1
+ %tmp7155 = getelementptr inbounds float, float* %tmp7154, i64 1
+ %tmp7156 = getelementptr inbounds float, float* %tmp7155, i64 1
+ %tmp7157 = getelementptr inbounds float, float* %tmp7156, i64 1
+ %tmp7158 = getelementptr inbounds float, float* %tmp7157, i64 1
+ %tmp7159 = getelementptr inbounds float, float* %tmp7158, i64 1
+ %tmp7160 = getelementptr inbounds float, float* %tmp7159, i64 1
+ %tmp7161 = getelementptr inbounds float, float* %tmp7160, i64 1
+ %tmp7162 = getelementptr inbounds float, float* %tmp7161, i64 1
+ %tmp7163 = getelementptr inbounds float, float* %tmp7162, i64 1
+ %tmp7164 = getelementptr inbounds float, float* %tmp7163, i64 1
+ %tmp7165 = getelementptr inbounds float, float* %tmp7164, i64 1
+ %tmp7166 = getelementptr inbounds float, float* %tmp7165, i64 1
+ %tmp7167 = getelementptr inbounds float, float* %tmp7166, i64 1
+ %tmp7168 = getelementptr inbounds float, float* %tmp7167, i64 1
+ %tmp7169 = getelementptr inbounds float, float* %tmp7168, i64 1
+ %tmp7170 = getelementptr inbounds float, float* %tmp7169, i64 1
+ %tmp7171 = getelementptr inbounds float, float* %tmp7170, i64 1
+ %tmp7172 = getelementptr inbounds float, float* %tmp7171, i64 1
+ %tmp7173 = getelementptr inbounds float, float* %tmp7172, i64 1
+ %tmp7174 = getelementptr inbounds float, float* %tmp7173, i64 1
+ %tmp7175 = getelementptr inbounds float, float* %tmp7174, i64 1
+ %tmp7176 = getelementptr inbounds float, float* %tmp7175, i64 1
+ %tmp7177 = getelementptr inbounds float, float* %tmp7176, i64 1
+ %tmp7178 = getelementptr inbounds float, float* %tmp7177, i64 1
+ %tmp7179 = getelementptr inbounds float, float* %tmp7178, i64 1
+ %tmp7180 = getelementptr inbounds float, float* %tmp7179, i64 1
+ %tmp7181 = getelementptr inbounds float, float* %tmp7180, i64 1
+ %tmp7182 = getelementptr inbounds float, float* %tmp7181, i64 1
+ %tmp7183 = getelementptr inbounds float, float* %tmp7182, i64 1
+ %tmp7184 = getelementptr inbounds float, float* %tmp7183, i64 1
+ %tmp7185 = getelementptr inbounds float, float* %tmp7184, i64 1
+ %tmp7186 = getelementptr inbounds float, float* %tmp7185, i64 1
+ %tmp7187 = getelementptr inbounds float, float* %tmp7186, i64 1
+ %tmp7188 = getelementptr inbounds float, float* %tmp7187, i64 1
+ %tmp7189 = getelementptr inbounds float, float* %tmp7188, i64 1
+ %tmp7190 = getelementptr inbounds float, float* %tmp7189, i64 1
+ %tmp7191 = getelementptr inbounds float, float* %tmp7190, i64 1
+ %tmp7192 = getelementptr inbounds float, float* %tmp7191, i64 1
+ %tmp7193 = getelementptr inbounds float, float* %tmp7192, i64 1
+ %tmp7194 = getelementptr inbounds float, float* %tmp7193, i64 1
+ %tmp7195 = getelementptr inbounds float, float* %tmp7194, i64 1
+ %tmp7196 = getelementptr inbounds float, float* %tmp7195, i64 1
+ %tmp7197 = getelementptr inbounds float, float* %tmp7196, i64 1
+ %tmp7198 = getelementptr inbounds float, float* %tmp7197, i64 1
+ %tmp7199 = getelementptr inbounds float, float* %tmp7198, i64 1
+ %tmp7200 = getelementptr inbounds float, float* %tmp7199, i64 1
+ %tmp7201 = getelementptr inbounds float, float* %tmp7200, i64 1
+ %tmp7202 = getelementptr inbounds float, float* %tmp7201, i64 1
+ %tmp7203 = getelementptr inbounds float, float* %tmp7202, i64 1
+ %tmp7204 = getelementptr inbounds float, float* %tmp7203, i64 1
+ %tmp7205 = getelementptr inbounds float, float* %tmp7204, i64 1
+ %tmp7206 = getelementptr inbounds float, float* %tmp7205, i64 1
+ %tmp7207 = getelementptr inbounds float, float* %tmp7206, i64 1
+ %tmp7208 = getelementptr inbounds float, float* %tmp7207, i64 1
+ %tmp7209 = getelementptr inbounds float, float* %tmp7208, i64 1
+ %tmp7210 = getelementptr inbounds float, float* %tmp7209, i64 1
+ %tmp7211 = getelementptr inbounds float, float* %tmp7210, i64 1
+ %tmp7212 = getelementptr inbounds float, float* %tmp7211, i64 1
+ %tmp7213 = getelementptr inbounds float, float* %tmp7212, i64 1
+ %tmp7214 = getelementptr inbounds float, float* %tmp7213, i64 1
+ %tmp7215 = getelementptr inbounds float, float* %tmp7214, i64 1
+ %tmp7216 = getelementptr inbounds float, float* %tmp7215, i64 1
+ %tmp7217 = getelementptr inbounds float, float* %tmp7216, i64 1
+ %tmp7218 = getelementptr inbounds float, float* %tmp7217, i64 1
+ %tmp7219 = getelementptr inbounds float, float* %tmp7218, i64 1
+ %tmp7220 = getelementptr inbounds float, float* %tmp7219, i64 1
+ %tmp7221 = getelementptr inbounds float, float* %tmp7220, i64 1
+ %tmp7222 = getelementptr inbounds float, float* %tmp7221, i64 1
+ %tmp7223 = getelementptr inbounds float, float* %tmp7222, i64 1
+ %tmp7224 = getelementptr inbounds float, float* %tmp7223, i64 1
+ %tmp7225 = getelementptr inbounds float, float* %tmp7224, i64 1
+ %tmp7226 = getelementptr inbounds float, float* %tmp7225, i64 1
+ %tmp7227 = getelementptr inbounds float, float* %tmp7226, i64 1
+ %tmp7228 = getelementptr inbounds float, float* %tmp7227, i64 1
+ %tmp7229 = getelementptr inbounds float, float* %tmp7228, i64 1
+ %tmp7230 = getelementptr inbounds float, float* %tmp7229, i64 1
+ %tmp7231 = getelementptr inbounds float, float* %tmp7230, i64 1
+ %tmp7232 = getelementptr inbounds float, float* %tmp7231, i64 1
+ %tmp7233 = getelementptr inbounds float, float* %tmp7232, i64 1
+ %tmp7234 = getelementptr inbounds float, float* %tmp7233, i64 1
+ %tmp7235 = getelementptr inbounds float, float* %tmp7234, i64 1
+ %tmp7236 = getelementptr inbounds float, float* %tmp7235, i64 1
+ %tmp7237 = getelementptr inbounds float, float* %tmp7236, i64 1
+ %tmp7238 = getelementptr inbounds float, float* %tmp7237, i64 1
+ %tmp7239 = getelementptr inbounds float, float* %tmp7238, i64 1
+ %tmp7240 = getelementptr inbounds float, float* %tmp7239, i64 1
+ %tmp7241 = getelementptr inbounds float, float* %tmp7240, i64 1
+ %tmp7242 = getelementptr inbounds float, float* %tmp7241, i64 1
+ %tmp7243 = getelementptr inbounds float, float* %tmp7242, i64 1
+ %tmp7244 = getelementptr inbounds float, float* %tmp7243, i64 1
+ %tmp7245 = getelementptr inbounds float, float* %tmp7244, i64 1
+ %tmp7246 = getelementptr inbounds float, float* %tmp7245, i64 1
+ %tmp7247 = getelementptr inbounds float, float* %tmp7246, i64 1
+ %tmp7248 = getelementptr inbounds float, float* %tmp7247, i64 1
+ %tmp7249 = getelementptr inbounds float, float* %tmp7248, i64 1
+ %tmp7250 = getelementptr inbounds float, float* %tmp7249, i64 1
+ %tmp7251 = getelementptr inbounds float, float* %tmp7250, i64 1
+ %tmp7252 = getelementptr inbounds float, float* %tmp7251, i64 1
+ %tmp7253 = getelementptr inbounds float, float* %tmp7252, i64 1
+ %tmp7254 = getelementptr inbounds float, float* %tmp7253, i64 1
+ %tmp7255 = getelementptr inbounds float, float* %tmp7254, i64 1
+ %tmp7256 = getelementptr inbounds float, float* %tmp7255, i64 1
+ %tmp7257 = getelementptr inbounds float, float* %tmp7256, i64 1
+ %tmp7258 = getelementptr inbounds float, float* %tmp7257, i64 1
+ %tmp7259 = getelementptr inbounds float, float* %tmp7258, i64 1
+ %tmp7260 = getelementptr inbounds float, float* %tmp7259, i64 1
+ %tmp7261 = getelementptr inbounds float, float* %tmp7260, i64 1
+ %tmp7262 = getelementptr inbounds float, float* %tmp7261, i64 1
+ %tmp7263 = getelementptr inbounds float, float* %tmp7262, i64 1
+ %tmp7264 = getelementptr inbounds float, float* %tmp7263, i64 1
+ %tmp7265 = getelementptr inbounds float, float* %tmp7264, i64 1
+ %tmp7266 = getelementptr inbounds float, float* %tmp7265, i64 1
+ %tmp7267 = getelementptr inbounds float, float* %tmp7266, i64 1
+ %tmp7268 = getelementptr inbounds float, float* %tmp7267, i64 1
+ %tmp7269 = getelementptr inbounds float, float* %tmp7268, i64 1
+ %tmp7270 = getelementptr inbounds float, float* %tmp7269, i64 1
+ %tmp7271 = getelementptr inbounds float, float* %tmp7270, i64 1
+ %tmp7272 = getelementptr inbounds float, float* %tmp7271, i64 1
+ %tmp7273 = getelementptr inbounds float, float* %tmp7272, i64 1
+ %tmp7274 = getelementptr inbounds float, float* %tmp7273, i64 1
+ %tmp7275 = getelementptr inbounds float, float* %tmp7274, i64 1
+ %tmp7276 = getelementptr inbounds float, float* %tmp7275, i64 1
+ %tmp7277 = getelementptr inbounds float, float* %tmp7276, i64 1
+ %tmp7278 = getelementptr inbounds float, float* %tmp7277, i64 1
+ %tmp7279 = getelementptr inbounds float, float* %tmp7278, i64 1
+ %tmp7280 = getelementptr inbounds float, float* %tmp7279, i64 1
+ %tmp7281 = getelementptr inbounds float, float* %tmp7280, i64 1
+ %tmp7282 = getelementptr inbounds float, float* %tmp7281, i64 1
+ %tmp7283 = getelementptr inbounds float, float* %tmp7282, i64 1
+ %tmp7284 = getelementptr inbounds float, float* %tmp7283, i64 1
+ %tmp7285 = getelementptr inbounds float, float* %tmp7284, i64 1
+ %tmp7286 = getelementptr inbounds float, float* %tmp7285, i64 1
+ %tmp7287 = getelementptr inbounds float, float* %tmp7286, i64 1
+ %tmp7288 = getelementptr inbounds float, float* %tmp7287, i64 1
+ %tmp7289 = getelementptr inbounds float, float* %tmp7288, i64 1
+ %tmp7290 = getelementptr inbounds float, float* %tmp7289, i64 1
+ %tmp7291 = getelementptr inbounds float, float* %tmp7290, i64 1
+ %tmp7292 = getelementptr inbounds float, float* %tmp7291, i64 1
+ %tmp7293 = getelementptr inbounds float, float* %tmp7292, i64 1
+ %tmp7294 = getelementptr inbounds float, float* %tmp7293, i64 1
+ %tmp7295 = getelementptr inbounds float, float* %tmp7294, i64 1
+ %tmp7296 = getelementptr inbounds float, float* %tmp7295, i64 1
+ %tmp7297 = getelementptr inbounds float, float* %tmp7296, i64 1
+ %tmp7298 = getelementptr inbounds float, float* %tmp7297, i64 1
+ %tmp7299 = getelementptr inbounds float, float* %tmp7298, i64 1
+ %tmp7300 = getelementptr inbounds float, float* %tmp7299, i64 1
+ %tmp7301 = getelementptr inbounds float, float* %tmp7300, i64 1
+ %tmp7302 = getelementptr inbounds float, float* %tmp7301, i64 1
+ %tmp7303 = getelementptr inbounds float, float* %tmp7302, i64 1
+ %tmp7304 = getelementptr inbounds float, float* %tmp7303, i64 1
+ %tmp7305 = getelementptr inbounds float, float* %tmp7304, i64 1
+ %tmp7306 = getelementptr inbounds float, float* %tmp7305, i64 1
+ %tmp7307 = getelementptr inbounds float, float* %tmp7306, i64 1
+ %tmp7308 = getelementptr inbounds float, float* %tmp7307, i64 1
+ %tmp7309 = getelementptr inbounds float, float* %tmp7308, i64 1
+ %tmp7310 = getelementptr inbounds float, float* %tmp7309, i64 1
+ %tmp7311 = getelementptr inbounds float, float* %tmp7310, i64 1
+ %tmp7312 = getelementptr inbounds float, float* %tmp7311, i64 1
+ %tmp7313 = getelementptr inbounds float, float* %tmp7312, i64 1
+ %tmp7314 = getelementptr inbounds float, float* %tmp7313, i64 1
+ %tmp7315 = getelementptr inbounds float, float* %tmp7314, i64 1
+ %tmp7316 = getelementptr inbounds float, float* %tmp7315, i64 1
+ %tmp7317 = getelementptr inbounds float, float* %tmp7316, i64 1
+ %tmp7318 = getelementptr inbounds float, float* %tmp7317, i64 1
+ %tmp7319 = getelementptr inbounds float, float* %tmp7318, i64 1
+ %tmp7320 = getelementptr inbounds float, float* %tmp7319, i64 1
+ %tmp7321 = getelementptr inbounds float, float* %tmp7320, i64 1
+ %tmp7322 = getelementptr inbounds float, float* %tmp7321, i64 1
+ %tmp7323 = getelementptr inbounds float, float* %tmp7322, i64 1
+ %tmp7324 = getelementptr inbounds float, float* %tmp7323, i64 1
+ %tmp7325 = getelementptr inbounds float, float* %tmp7324, i64 1
+ %tmp7326 = getelementptr inbounds float, float* %tmp7325, i64 1
+ %tmp7327 = getelementptr inbounds float, float* %tmp7326, i64 1
+ %tmp7328 = getelementptr inbounds float, float* %tmp7327, i64 1
+ %tmp7329 = getelementptr inbounds float, float* %tmp7328, i64 1
+ %tmp7330 = getelementptr inbounds float, float* %tmp7329, i64 1
+ %tmp7331 = getelementptr inbounds float, float* %tmp7330, i64 1
+ %tmp7332 = getelementptr inbounds float, float* %tmp7331, i64 1
+ %tmp7333 = getelementptr inbounds float, float* %tmp7332, i64 1
+ %tmp7334 = getelementptr inbounds float, float* %tmp7333, i64 1
+ %tmp7335 = getelementptr inbounds float, float* %tmp7334, i64 1
+ %tmp7336 = getelementptr inbounds float, float* %tmp7335, i64 1
+ %tmp7337 = getelementptr inbounds float, float* %tmp7336, i64 1
+ %tmp7338 = getelementptr inbounds float, float* %tmp7337, i64 1
+ %tmp7339 = getelementptr inbounds float, float* %tmp7338, i64 1
+ %tmp7340 = getelementptr inbounds float, float* %tmp7339, i64 1
+ %tmp7341 = getelementptr inbounds float, float* %tmp7340, i64 1
+ %tmp7342 = getelementptr inbounds float, float* %tmp7341, i64 1
+ %tmp7343 = getelementptr inbounds float, float* %tmp7342, i64 1
+ %tmp7344 = getelementptr inbounds float, float* %tmp7343, i64 1
+ %tmp7345 = getelementptr inbounds float, float* %tmp7344, i64 1
+ %tmp7346 = getelementptr inbounds float, float* %tmp7345, i64 1
+ %tmp7347 = getelementptr inbounds float, float* %tmp7346, i64 1
+ %tmp7348 = getelementptr inbounds float, float* %tmp7347, i64 1
+ %tmp7349 = getelementptr inbounds float, float* %tmp7348, i64 1
+ %tmp7350 = getelementptr inbounds float, float* %tmp7349, i64 1
+ %tmp7351 = getelementptr inbounds float, float* %tmp7350, i64 1
+ %tmp7352 = getelementptr inbounds float, float* %tmp7351, i64 1
+ %tmp7353 = getelementptr inbounds float, float* %tmp7352, i64 1
+ %tmp7354 = getelementptr inbounds float, float* %tmp7353, i64 1
+ %tmp7355 = getelementptr inbounds float, float* %tmp7354, i64 1
+ %tmp7356 = getelementptr inbounds float, float* %tmp7355, i64 1
+ %tmp7357 = getelementptr inbounds float, float* %tmp7356, i64 1
+ %tmp7358 = getelementptr inbounds float, float* %tmp7357, i64 1
+ %tmp7359 = getelementptr inbounds float, float* %tmp7358, i64 1
+ %tmp7360 = getelementptr inbounds float, float* %tmp7359, i64 1
+ %tmp7361 = getelementptr inbounds float, float* %tmp7360, i64 1
+ %tmp7362 = getelementptr inbounds float, float* %tmp7361, i64 1
+ %tmp7363 = getelementptr inbounds float, float* %tmp7362, i64 1
+ %tmp7364 = getelementptr inbounds float, float* %tmp7363, i64 1
+ %tmp7365 = getelementptr inbounds float, float* %tmp7364, i64 1
+ %tmp7366 = getelementptr inbounds float, float* %tmp7365, i64 1
+ %tmp7367 = getelementptr inbounds float, float* %tmp7366, i64 1
+ %tmp7368 = getelementptr inbounds float, float* %tmp7367, i64 1
+ %tmp7369 = getelementptr inbounds float, float* %tmp7368, i64 1
+ %tmp7370 = getelementptr inbounds float, float* %tmp7369, i64 1
+ %tmp7371 = getelementptr inbounds float, float* %tmp7370, i64 1
+ %tmp7372 = getelementptr inbounds float, float* %tmp7371, i64 1
+ %tmp7373 = getelementptr inbounds float, float* %tmp7372, i64 1
+ %tmp7374 = getelementptr inbounds float, float* %tmp7373, i64 1
+ %tmp7375 = getelementptr inbounds float, float* %tmp7374, i64 1
+ %tmp7376 = getelementptr inbounds float, float* %tmp7375, i64 1
+ %tmp7377 = getelementptr inbounds float, float* %tmp7376, i64 1
+ %tmp7378 = getelementptr inbounds float, float* %tmp7377, i64 1
+ %tmp7379 = getelementptr inbounds float, float* %tmp7378, i64 1
+ %tmp7380 = getelementptr inbounds float, float* %tmp7379, i64 1
+ %tmp7381 = getelementptr inbounds float, float* %tmp7380, i64 1
+ %tmp7382 = getelementptr inbounds float, float* %tmp7381, i64 1
+ %tmp7383 = getelementptr inbounds float, float* %tmp7382, i64 1
+ %tmp7384 = getelementptr inbounds float, float* %tmp7383, i64 1
+ %tmp7385 = getelementptr inbounds float, float* %tmp7384, i64 1
+ %tmp7386 = getelementptr inbounds float, float* %tmp7385, i64 1
+ %tmp7387 = getelementptr inbounds float, float* %tmp7386, i64 1
+ %tmp7388 = getelementptr inbounds float, float* %tmp7387, i64 1
+ %tmp7389 = getelementptr inbounds float, float* %tmp7388, i64 1
+ %tmp7390 = getelementptr inbounds float, float* %tmp7389, i64 1
+ %tmp7391 = getelementptr inbounds float, float* %tmp7390, i64 1
+ %tmp7392 = getelementptr inbounds float, float* %tmp7391, i64 1
+ %tmp7393 = getelementptr inbounds float, float* %tmp7392, i64 1
+ %tmp7394 = getelementptr inbounds float, float* %tmp7393, i64 1
+ %tmp7395 = getelementptr inbounds float, float* %tmp7394, i64 1
+ %tmp7396 = getelementptr inbounds float, float* %tmp7395, i64 1
+ %tmp7397 = getelementptr inbounds float, float* %tmp7396, i64 1
+ %tmp7398 = getelementptr inbounds float, float* %tmp7397, i64 1
+ %tmp7399 = getelementptr inbounds float, float* %tmp7398, i64 1
+ %tmp7400 = getelementptr inbounds float, float* %tmp7399, i64 1
+ %tmp7401 = getelementptr inbounds float, float* %tmp7400, i64 1
+ %tmp7402 = getelementptr inbounds float, float* %tmp7401, i64 1
+ %tmp7403 = getelementptr inbounds float, float* %tmp7402, i64 1
+ %tmp7404 = getelementptr inbounds float, float* %tmp7403, i64 1
+ %tmp7405 = getelementptr inbounds float, float* %tmp7404, i64 1
+ %tmp7406 = getelementptr inbounds float, float* %tmp7405, i64 1
+ %tmp7407 = getelementptr inbounds float, float* %tmp7406, i64 1
+ %tmp7408 = getelementptr inbounds float, float* %tmp7407, i64 1
+ %tmp7409 = getelementptr inbounds float, float* %tmp7408, i64 1
+ %tmp7410 = getelementptr inbounds float, float* %tmp7409, i64 1
+ %tmp7411 = getelementptr inbounds float, float* %tmp7410, i64 1
+ %tmp7412 = getelementptr inbounds float, float* %tmp7411, i64 1
+ %tmp7413 = getelementptr inbounds float, float* %tmp7412, i64 1
+ %tmp7414 = getelementptr inbounds float, float* %tmp7413, i64 1
+ %tmp7415 = getelementptr inbounds float, float* %tmp7414, i64 1
+ %tmp7416 = getelementptr inbounds float, float* %tmp7415, i64 1
+ %tmp7417 = getelementptr inbounds float, float* %tmp7416, i64 1
+ %tmp7418 = getelementptr inbounds float, float* %tmp7417, i64 1
+ %tmp7419 = getelementptr inbounds float, float* %tmp7418, i64 1
+ %tmp7420 = getelementptr inbounds float, float* %tmp7419, i64 1
+ %tmp7421 = getelementptr inbounds float, float* %tmp7420, i64 1
+ %tmp7422 = getelementptr inbounds float, float* %tmp7421, i64 1
+ %tmp7423 = getelementptr inbounds float, float* %tmp7422, i64 1
+ %tmp7424 = getelementptr inbounds float, float* %tmp7423, i64 1
+ %tmp7425 = getelementptr inbounds float, float* %tmp7424, i64 1
+ %tmp7426 = getelementptr inbounds float, float* %tmp7425, i64 1
+ %tmp7427 = getelementptr inbounds float, float* %tmp7426, i64 1
+ %tmp7428 = getelementptr inbounds float, float* %tmp7427, i64 1
+ %tmp7429 = getelementptr inbounds float, float* %tmp7428, i64 1
+ %tmp7430 = getelementptr inbounds float, float* %tmp7429, i64 1
+ %tmp7431 = getelementptr inbounds float, float* %tmp7430, i64 1
+ %tmp7432 = getelementptr inbounds float, float* %tmp7431, i64 1
+ %tmp7433 = getelementptr inbounds float, float* %tmp7432, i64 1
+ %tmp7434 = getelementptr inbounds float, float* %tmp7433, i64 1
+ %tmp7435 = getelementptr inbounds float, float* %tmp7434, i64 1
+ %tmp7436 = getelementptr inbounds float, float* %tmp7435, i64 1
+ %tmp7437 = getelementptr inbounds float, float* %tmp7436, i64 1
+ %tmp7438 = getelementptr inbounds float, float* %tmp7437, i64 1
+ %tmp7439 = getelementptr inbounds float, float* %tmp7438, i64 1
+ %tmp7440 = getelementptr inbounds float, float* %tmp7439, i64 1
+ %tmp7441 = getelementptr inbounds float, float* %tmp7440, i64 1
+ %tmp7442 = getelementptr inbounds float, float* %tmp7441, i64 1
+ %tmp7443 = getelementptr inbounds float, float* %tmp7442, i64 1
+ %tmp7444 = getelementptr inbounds float, float* %tmp7443, i64 1
+ %tmp7445 = getelementptr inbounds float, float* %tmp7444, i64 1
+ %tmp7446 = getelementptr inbounds float, float* %tmp7445, i64 1
+ %tmp7447 = getelementptr inbounds float, float* %tmp7446, i64 1
+ %tmp7448 = getelementptr inbounds float, float* %tmp7447, i64 1
+ %tmp7449 = getelementptr inbounds float, float* %tmp7448, i64 1
+ %tmp7450 = getelementptr inbounds float, float* %tmp7449, i64 1
+ %tmp7451 = getelementptr inbounds float, float* %tmp7450, i64 1
+ %tmp7452 = getelementptr inbounds float, float* %tmp7451, i64 1
+ %tmp7453 = getelementptr inbounds float, float* %tmp7452, i64 1
+ %tmp7454 = getelementptr inbounds float, float* %tmp7453, i64 1
+ %tmp7455 = getelementptr inbounds float, float* %tmp7454, i64 1
+ %tmp7456 = getelementptr inbounds float, float* %tmp7455, i64 1
+ %tmp7457 = getelementptr inbounds float, float* %tmp7456, i64 1
+ %tmp7458 = getelementptr inbounds float, float* %tmp7457, i64 1
+ %tmp7459 = getelementptr inbounds float, float* %tmp7458, i64 1
+ %tmp7460 = getelementptr inbounds float, float* %tmp7459, i64 1
+ %tmp7461 = getelementptr inbounds float, float* %tmp7460, i64 1
+ %tmp7462 = getelementptr inbounds float, float* %tmp7461, i64 1
+ %tmp7463 = getelementptr inbounds float, float* %tmp7462, i64 1
+ %tmp7464 = getelementptr inbounds float, float* %tmp7463, i64 1
+ %tmp7465 = getelementptr inbounds float, float* %tmp7464, i64 1
+ %tmp7466 = getelementptr inbounds float, float* %tmp7465, i64 1
+ %tmp7467 = getelementptr inbounds float, float* %tmp7466, i64 1
+ %tmp7468 = getelementptr inbounds float, float* %tmp7467, i64 1
+ %tmp7469 = getelementptr inbounds float, float* %tmp7468, i64 1
+ %tmp7470 = getelementptr inbounds float, float* %tmp7469, i64 1
+ %tmp7471 = getelementptr inbounds float, float* %tmp7470, i64 1
+ %tmp7472 = getelementptr inbounds float, float* %tmp7471, i64 1
+ %tmp7473 = getelementptr inbounds float, float* %tmp7472, i64 1
+ %tmp7474 = getelementptr inbounds float, float* %tmp7473, i64 1
+ %tmp7475 = getelementptr inbounds float, float* %tmp7474, i64 1
+ %tmp7476 = getelementptr inbounds float, float* %tmp7475, i64 1
+ %tmp7477 = getelementptr inbounds float, float* %tmp7476, i64 1
+ %tmp7478 = getelementptr inbounds float, float* %tmp7477, i64 1
+ %tmp7479 = getelementptr inbounds float, float* %tmp7478, i64 1
+ %tmp7480 = getelementptr inbounds float, float* %tmp7479, i64 1
+ %tmp7481 = getelementptr inbounds float, float* %tmp7480, i64 1
+ %tmp7482 = getelementptr inbounds float, float* %tmp7481, i64 1
+ %tmp7483 = getelementptr inbounds float, float* %tmp7482, i64 1
+ %tmp7484 = getelementptr inbounds float, float* %tmp7483, i64 1
+ %tmp7485 = getelementptr inbounds float, float* %tmp7484, i64 1
+ %tmp7486 = getelementptr inbounds float, float* %tmp7485, i64 1
+ %tmp7487 = getelementptr inbounds float, float* %tmp7486, i64 1
+ %tmp7488 = getelementptr inbounds float, float* %tmp7487, i64 1
+ %tmp7489 = getelementptr inbounds float, float* %tmp7488, i64 1
+ %tmp7490 = getelementptr inbounds float, float* %tmp7489, i64 1
+ %tmp7491 = getelementptr inbounds float, float* %tmp7490, i64 1
+ %tmp7492 = getelementptr inbounds float, float* %tmp7491, i64 1
+ %tmp7493 = getelementptr inbounds float, float* %tmp7492, i64 1
+ %tmp7494 = getelementptr inbounds float, float* %tmp7493, i64 1
+ %tmp7495 = getelementptr inbounds float, float* %tmp7494, i64 1
+ %tmp7496 = getelementptr inbounds float, float* %tmp7495, i64 1
+ %tmp7497 = getelementptr inbounds float, float* %tmp7496, i64 1
+ %tmp7498 = getelementptr inbounds float, float* %tmp7497, i64 1
+ %tmp7499 = getelementptr inbounds float, float* %tmp7498, i64 1
+ %tmp7500 = getelementptr inbounds float, float* %tmp7499, i64 1
+ %tmp7501 = getelementptr inbounds float, float* %tmp7500, i64 1
+ %tmp7502 = getelementptr inbounds float, float* %tmp7501, i64 1
+ %tmp7503 = getelementptr inbounds float, float* %tmp7502, i64 1
+ %tmp7504 = getelementptr inbounds float, float* %tmp7503, i64 1
+ %tmp7505 = getelementptr inbounds float, float* %tmp7504, i64 1
+ %tmp7506 = getelementptr inbounds float, float* %tmp7505, i64 1
+ %tmp7507 = getelementptr inbounds float, float* %tmp7506, i64 1
+ %tmp7508 = getelementptr inbounds float, float* %tmp7507, i64 1
+ %tmp7509 = getelementptr inbounds float, float* %tmp7508, i64 1
+ %tmp7510 = getelementptr inbounds float, float* %tmp7509, i64 1
+ %tmp7511 = getelementptr inbounds float, float* %tmp7510, i64 1
+ %tmp7512 = getelementptr inbounds float, float* %tmp7511, i64 1
+ %tmp7513 = getelementptr inbounds float, float* %tmp7512, i64 1
+ %tmp7514 = getelementptr inbounds float, float* %tmp7513, i64 1
+ %tmp7515 = getelementptr inbounds float, float* %tmp7514, i64 1
+ %tmp7516 = getelementptr inbounds float, float* %tmp7515, i64 1
+ %tmp7517 = getelementptr inbounds float, float* %tmp7516, i64 1
+ %tmp7518 = getelementptr inbounds float, float* %tmp7517, i64 1
+ %tmp7519 = getelementptr inbounds float, float* %tmp7518, i64 1
+ %tmp7520 = getelementptr inbounds float, float* %tmp7519, i64 1
+ %tmp7521 = getelementptr inbounds float, float* %tmp7520, i64 1
+ %tmp7522 = getelementptr inbounds float, float* %tmp7521, i64 1
+ %tmp7523 = getelementptr inbounds float, float* %tmp7522, i64 1
+ %tmp7524 = getelementptr inbounds float, float* %tmp7523, i64 1
+ %tmp7525 = getelementptr inbounds float, float* %tmp7524, i64 1
+ %tmp7526 = getelementptr inbounds float, float* %tmp7525, i64 1
+ %tmp7527 = getelementptr inbounds float, float* %tmp7526, i64 1
+ %tmp7528 = getelementptr inbounds float, float* %tmp7527, i64 1
+ %tmp7529 = getelementptr inbounds float, float* %tmp7528, i64 1
+ %tmp7530 = getelementptr inbounds float, float* %tmp7529, i64 1
+ %tmp7531 = getelementptr inbounds float, float* %tmp7530, i64 1
+ %tmp7532 = getelementptr inbounds float, float* %tmp7531, i64 1
+ %tmp7533 = getelementptr inbounds float, float* %tmp7532, i64 1
+ %tmp7534 = getelementptr inbounds float, float* %tmp7533, i64 1
+ %tmp7535 = getelementptr inbounds float, float* %tmp7534, i64 1
+ %tmp7536 = getelementptr inbounds float, float* %tmp7535, i64 1
+ %tmp7537 = getelementptr inbounds float, float* %tmp7536, i64 1
+ %tmp7538 = getelementptr inbounds float, float* %tmp7537, i64 1
+ %tmp7539 = getelementptr inbounds float, float* %tmp7538, i64 1
+ %tmp7540 = getelementptr inbounds float, float* %tmp7539, i64 1
+ %tmp7541 = getelementptr inbounds float, float* %tmp7540, i64 1
+ %tmp7542 = getelementptr inbounds float, float* %tmp7541, i64 1
+ %tmp7543 = getelementptr inbounds float, float* %tmp7542, i64 1
+ %tmp7544 = getelementptr inbounds float, float* %tmp7543, i64 1
+ %tmp7545 = getelementptr inbounds float, float* %tmp7544, i64 1
+ %tmp7546 = getelementptr inbounds float, float* %tmp7545, i64 1
+ %tmp7547 = getelementptr inbounds float, float* %tmp7546, i64 1
+ %tmp7548 = getelementptr inbounds float, float* %tmp7547, i64 1
+ %tmp7549 = getelementptr inbounds float, float* %tmp7548, i64 1
+ %tmp7550 = getelementptr inbounds float, float* %tmp7549, i64 1
+ %tmp7551 = getelementptr inbounds float, float* %tmp7550, i64 1
+ %tmp7552 = getelementptr inbounds float, float* %tmp7551, i64 1
+ %tmp7553 = getelementptr inbounds float, float* %tmp7552, i64 1
+ %tmp7554 = getelementptr inbounds float, float* %tmp7553, i64 1
+ %tmp7555 = getelementptr inbounds float, float* %tmp7554, i64 1
+ %tmp7556 = getelementptr inbounds float, float* %tmp7555, i64 1
+ %tmp7557 = getelementptr inbounds float, float* %tmp7556, i64 1
+ %tmp7558 = getelementptr inbounds float, float* %tmp7557, i64 1
+ %tmp7559 = getelementptr inbounds float, float* %tmp7558, i64 1
+ %tmp7560 = getelementptr inbounds float, float* %tmp7559, i64 1
+ %tmp7561 = getelementptr inbounds float, float* %tmp7560, i64 1
+ %tmp7562 = getelementptr inbounds float, float* %tmp7561, i64 1
+ %tmp7563 = getelementptr inbounds float, float* %tmp7562, i64 1
+ %tmp7564 = getelementptr inbounds float, float* %tmp7563, i64 1
+ %tmp7565 = getelementptr inbounds float, float* %tmp7564, i64 1
+ %tmp7566 = getelementptr inbounds float, float* %tmp7565, i64 1
+ %tmp7567 = getelementptr inbounds float, float* %tmp7566, i64 1
+ %tmp7568 = getelementptr inbounds float, float* %tmp7567, i64 1
+ %tmp7569 = getelementptr inbounds float, float* %tmp7568, i64 1
+ %tmp7570 = getelementptr inbounds float, float* %tmp7569, i64 1
+ %tmp7571 = getelementptr inbounds float, float* %tmp7570, i64 1
+ %tmp7572 = getelementptr inbounds float, float* %tmp7571, i64 1
+ %tmp7573 = getelementptr inbounds float, float* %tmp7572, i64 1
+ %tmp7574 = getelementptr inbounds float, float* %tmp7573, i64 1
+ %tmp7575 = getelementptr inbounds float, float* %tmp7574, i64 1
+ %tmp7576 = getelementptr inbounds float, float* %tmp7575, i64 1
+ %tmp7577 = getelementptr inbounds float, float* %tmp7576, i64 1
+ %tmp7578 = getelementptr inbounds float, float* %tmp7577, i64 1
+ %tmp7579 = getelementptr inbounds float, float* %tmp7578, i64 1
+ %tmp7580 = getelementptr inbounds float, float* %tmp7579, i64 1
+ %tmp7581 = getelementptr inbounds float, float* %tmp7580, i64 1
+ %tmp7582 = getelementptr inbounds float, float* %tmp7581, i64 1
+ %tmp7583 = getelementptr inbounds float, float* %tmp7582, i64 1
+ %tmp7584 = getelementptr inbounds float, float* %tmp7583, i64 1
+ %tmp7585 = getelementptr inbounds float, float* %tmp7584, i64 1
+ %tmp7586 = getelementptr inbounds float, float* %tmp7585, i64 1
+ %tmp7587 = getelementptr inbounds float, float* %tmp7586, i64 1
+ %tmp7588 = getelementptr inbounds float, float* %tmp7587, i64 1
+ %tmp7589 = getelementptr inbounds float, float* %tmp7588, i64 1
+ %tmp7590 = getelementptr inbounds float, float* %tmp7589, i64 1
+ %tmp7591 = getelementptr inbounds float, float* %tmp7590, i64 1
+ %tmp7592 = getelementptr inbounds float, float* %tmp7591, i64 1
+ %tmp7593 = getelementptr inbounds float, float* %tmp7592, i64 1
+ %tmp7594 = getelementptr inbounds float, float* %tmp7593, i64 1
+ %tmp7595 = getelementptr inbounds float, float* %tmp7594, i64 1
+ %tmp7596 = getelementptr inbounds float, float* %tmp7595, i64 1
+ %tmp7597 = getelementptr inbounds float, float* %tmp7596, i64 1
+ %tmp7598 = getelementptr inbounds float, float* %tmp7597, i64 1
+ %tmp7599 = getelementptr inbounds float, float* %tmp7598, i64 1
+ %tmp7600 = getelementptr inbounds float, float* %tmp7599, i64 1
+ %tmp7601 = getelementptr inbounds float, float* %tmp7600, i64 1
+ %tmp7602 = getelementptr inbounds float, float* %tmp7601, i64 1
+ %tmp7603 = getelementptr inbounds float, float* %tmp7602, i64 1
+ %tmp7604 = getelementptr inbounds float, float* %tmp7603, i64 1
+ %tmp7605 = getelementptr inbounds float, float* %tmp7604, i64 1
+ %tmp7606 = getelementptr inbounds float, float* %tmp7605, i64 1
+ %tmp7607 = getelementptr inbounds float, float* %tmp7606, i64 1
+ %tmp7608 = getelementptr inbounds float, float* %tmp7607, i64 1
+ %tmp7609 = getelementptr inbounds float, float* %tmp7608, i64 1
+ %tmp7610 = getelementptr inbounds float, float* %tmp7609, i64 1
+ %tmp7611 = getelementptr inbounds float, float* %tmp7610, i64 1
+ %tmp7612 = getelementptr inbounds float, float* %tmp7611, i64 1
+ %tmp7613 = getelementptr inbounds float, float* %tmp7612, i64 1
+ %tmp7614 = getelementptr inbounds float, float* %tmp7613, i64 1
+ %tmp7615 = getelementptr inbounds float, float* %tmp7614, i64 1
+ %tmp7616 = getelementptr inbounds float, float* %tmp7615, i64 1
+ %tmp7617 = getelementptr inbounds float, float* %tmp7616, i64 1
+ %tmp7618 = getelementptr inbounds float, float* %tmp7617, i64 1
+ %tmp7619 = getelementptr inbounds float, float* %tmp7618, i64 1
+ %tmp7620 = getelementptr inbounds float, float* %tmp7619, i64 1
+ %tmp7621 = getelementptr inbounds float, float* %tmp7620, i64 1
+ %tmp7622 = getelementptr inbounds float, float* %tmp7621, i64 1
+ %tmp7623 = getelementptr inbounds float, float* %tmp7622, i64 1
+ %tmp7624 = getelementptr inbounds float, float* %tmp7623, i64 1
+ %tmp7625 = getelementptr inbounds float, float* %tmp7624, i64 1
+ %tmp7626 = getelementptr inbounds float, float* %tmp7625, i64 1
+ %tmp7627 = getelementptr inbounds float, float* %tmp7626, i64 1
+ %tmp7628 = getelementptr inbounds float, float* %tmp7627, i64 1
+ %tmp7629 = getelementptr inbounds float, float* %tmp7628, i64 1
+ %tmp7630 = getelementptr inbounds float, float* %tmp7629, i64 1
+ %tmp7631 = getelementptr inbounds float, float* %tmp7630, i64 1
+ %tmp7632 = getelementptr inbounds float, float* %tmp7631, i64 1
+ %tmp7633 = getelementptr inbounds float, float* %tmp7632, i64 1
+ %tmp7634 = getelementptr inbounds float, float* %tmp7633, i64 1
+ %tmp7635 = getelementptr inbounds float, float* %tmp7634, i64 1
+ %tmp7636 = getelementptr inbounds float, float* %tmp7635, i64 1
+ %tmp7637 = getelementptr inbounds float, float* %tmp7636, i64 1
+ %tmp7638 = getelementptr inbounds float, float* %tmp7637, i64 1
+ %tmp7639 = getelementptr inbounds float, float* %tmp7638, i64 1
+ %tmp7640 = getelementptr inbounds float, float* %tmp7639, i64 1
+ %tmp7641 = getelementptr inbounds float, float* %tmp7640, i64 1
+ %tmp7642 = getelementptr inbounds float, float* %tmp7641, i64 1
+ %tmp7643 = getelementptr inbounds float, float* %tmp7642, i64 1
+ %tmp7644 = getelementptr inbounds float, float* %tmp7643, i64 1
+ %tmp7645 = getelementptr inbounds float, float* %tmp7644, i64 1
+ %tmp7646 = getelementptr inbounds float, float* %tmp7645, i64 1
+ %tmp7647 = getelementptr inbounds float, float* %tmp7646, i64 1
+ %tmp7648 = getelementptr inbounds float, float* %tmp7647, i64 1
+ %tmp7649 = getelementptr inbounds float, float* %tmp7648, i64 1
+ %tmp7650 = getelementptr inbounds float, float* %tmp7649, i64 1
+ %tmp7651 = getelementptr inbounds float, float* %tmp7650, i64 1
+ %tmp7652 = getelementptr inbounds float, float* %tmp7651, i64 1
+ %tmp7653 = getelementptr inbounds float, float* %tmp7652, i64 1
+ %tmp7654 = getelementptr inbounds float, float* %tmp7653, i64 1
+ %tmp7655 = getelementptr inbounds float, float* %tmp7654, i64 1
+ %tmp7656 = getelementptr inbounds float, float* %tmp7655, i64 1
+ %tmp7657 = getelementptr inbounds float, float* %tmp7656, i64 1
+ %tmp7658 = getelementptr inbounds float, float* %tmp7657, i64 1
+ %tmp7659 = getelementptr inbounds float, float* %tmp7658, i64 1
+ %tmp7660 = getelementptr inbounds float, float* %tmp7659, i64 1
+ %tmp7661 = getelementptr inbounds float, float* %tmp7660, i64 1
+ %tmp7662 = getelementptr inbounds float, float* %tmp7661, i64 1
+ %tmp7663 = getelementptr inbounds float, float* %tmp7662, i64 1
+ %tmp7664 = getelementptr inbounds float, float* %tmp7663, i64 1
+ %tmp7665 = getelementptr inbounds float, float* %tmp7664, i64 1
+ %tmp7666 = getelementptr inbounds float, float* %tmp7665, i64 1
+ %tmp7667 = getelementptr inbounds float, float* %tmp7666, i64 1
+ %tmp7668 = getelementptr inbounds float, float* %tmp7667, i64 1
+ %tmp7669 = getelementptr inbounds float, float* %tmp7668, i64 1
+ %tmp7670 = getelementptr inbounds float, float* %tmp7669, i64 1
+ %tmp7671 = getelementptr inbounds float, float* %tmp7670, i64 1
+ %tmp7672 = getelementptr inbounds float, float* %tmp7671, i64 1
+ %tmp7673 = getelementptr inbounds float, float* %tmp7672, i64 1
+ %tmp7674 = getelementptr inbounds float, float* %tmp7673, i64 1
+ %tmp7675 = getelementptr inbounds float, float* %tmp7674, i64 1
+ %tmp7676 = getelementptr inbounds float, float* %tmp7675, i64 1
+ %tmp7677 = getelementptr inbounds float, float* %tmp7676, i64 1
+ %tmp7678 = getelementptr inbounds float, float* %tmp7677, i64 1
+ %tmp7679 = getelementptr inbounds float, float* %tmp7678, i64 1
+ %tmp7680 = getelementptr inbounds float, float* %tmp7679, i64 1
+ %tmp7681 = getelementptr inbounds float, float* %tmp7680, i64 1
+ %tmp7682 = getelementptr inbounds float, float* %tmp7681, i64 1
+ %tmp7683 = getelementptr inbounds float, float* %tmp7682, i64 1
+ %tmp7684 = getelementptr inbounds float, float* %tmp7683, i64 1
+ %tmp7685 = getelementptr inbounds float, float* %tmp7684, i64 1
+ %tmp7686 = getelementptr inbounds float, float* %tmp7685, i64 1
+ %tmp7687 = getelementptr inbounds float, float* %tmp7686, i64 1
+ %tmp7688 = getelementptr inbounds float, float* %tmp7687, i64 1
+ %tmp7689 = getelementptr inbounds float, float* %tmp7688, i64 1
+ %tmp7690 = getelementptr inbounds float, float* %tmp7689, i64 1
+ %tmp7691 = getelementptr inbounds float, float* %tmp7690, i64 1
+ %tmp7692 = getelementptr inbounds float, float* %tmp7691, i64 1
+ %tmp7693 = getelementptr inbounds float, float* %tmp7692, i64 1
+ %tmp7694 = getelementptr inbounds float, float* %tmp7693, i64 1
+ %tmp7695 = getelementptr inbounds float, float* %tmp7694, i64 1
+ %tmp7696 = getelementptr inbounds float, float* %tmp7695, i64 1
+ %tmp7697 = getelementptr inbounds float, float* %tmp7696, i64 1
+ %tmp7698 = getelementptr inbounds float, float* %tmp7697, i64 1
+ %tmp7699 = getelementptr inbounds float, float* %tmp7698, i64 1
+ %tmp7700 = getelementptr inbounds float, float* %tmp7699, i64 1
+ %tmp7701 = getelementptr inbounds float, float* %tmp7700, i64 1
+ %tmp7702 = getelementptr inbounds float, float* %tmp7701, i64 1
+ %tmp7703 = getelementptr inbounds float, float* %tmp7702, i64 1
+ %tmp7704 = getelementptr inbounds float, float* %tmp7703, i64 1
+ %tmp7705 = getelementptr inbounds float, float* %tmp7704, i64 1
+ %tmp7706 = getelementptr inbounds float, float* %tmp7705, i64 1
+ %tmp7707 = getelementptr inbounds float, float* %tmp7706, i64 1
+ %tmp7708 = getelementptr inbounds float, float* %tmp7707, i64 1
+ %tmp7709 = getelementptr inbounds float, float* %tmp7708, i64 1
+ %tmp7710 = getelementptr inbounds float, float* %tmp7709, i64 1
+ %tmp7711 = getelementptr inbounds float, float* %tmp7710, i64 1
+ %tmp7712 = getelementptr inbounds float, float* %tmp7711, i64 1
+ %tmp7713 = getelementptr inbounds float, float* %tmp7712, i64 1
+ %tmp7714 = getelementptr inbounds float, float* %tmp7713, i64 1
+ %tmp7715 = getelementptr inbounds float, float* %tmp7714, i64 1
+ %tmp7716 = getelementptr inbounds float, float* %tmp7715, i64 1
+ %tmp7717 = getelementptr inbounds float, float* %tmp7716, i64 1
+ %tmp7718 = getelementptr inbounds float, float* %tmp7717, i64 1
+ %tmp7719 = getelementptr inbounds float, float* %tmp7718, i64 1
+ %tmp7720 = getelementptr inbounds float, float* %tmp7719, i64 1
+ %tmp7721 = getelementptr inbounds float, float* %tmp7720, i64 1
+ %tmp7722 = getelementptr inbounds float, float* %tmp7721, i64 1
+ %tmp7723 = getelementptr inbounds float, float* %tmp7722, i64 1
+ %tmp7724 = getelementptr inbounds float, float* %tmp7723, i64 1
+ %tmp7725 = getelementptr inbounds float, float* %tmp7724, i64 1
+ %tmp7726 = getelementptr inbounds float, float* %tmp7725, i64 1
+ %tmp7727 = getelementptr inbounds float, float* %tmp7726, i64 1
+ %tmp7728 = getelementptr inbounds float, float* %tmp7727, i64 1
+ %tmp7729 = getelementptr inbounds float, float* %tmp7728, i64 1
+ %tmp7730 = getelementptr inbounds float, float* %tmp7729, i64 1
+ %tmp7731 = getelementptr inbounds float, float* %tmp7730, i64 1
+ %tmp7732 = getelementptr inbounds float, float* %tmp7731, i64 1
+ %tmp7733 = getelementptr inbounds float, float* %tmp7732, i64 1
+ %tmp7734 = getelementptr inbounds float, float* %tmp7733, i64 1
+ %tmp7735 = getelementptr inbounds float, float* %tmp7734, i64 1
+ %tmp7736 = getelementptr inbounds float, float* %tmp7735, i64 1
+ %tmp7737 = getelementptr inbounds float, float* %tmp7736, i64 1
+ %tmp7738 = getelementptr inbounds float, float* %tmp7737, i64 1
+ %tmp7739 = getelementptr inbounds float, float* %tmp7738, i64 1
+ %tmp7740 = getelementptr inbounds float, float* %tmp7739, i64 1
+ %tmp7741 = getelementptr inbounds float, float* %tmp7740, i64 1
+ %tmp7742 = getelementptr inbounds float, float* %tmp7741, i64 1
+ %tmp7743 = getelementptr inbounds float, float* %tmp7742, i64 1
+ %tmp7744 = getelementptr inbounds float, float* %tmp7743, i64 1
+ %tmp7745 = getelementptr inbounds float, float* %tmp7744, i64 1
+ %tmp7746 = getelementptr inbounds float, float* %tmp7745, i64 1
+ %tmp7747 = getelementptr inbounds float, float* %tmp7746, i64 1
+ %tmp7748 = getelementptr inbounds float, float* %tmp7747, i64 1
+ %tmp7749 = getelementptr inbounds float, float* %tmp7748, i64 1
+ %tmp7750 = getelementptr inbounds float, float* %tmp7749, i64 1
+ %tmp7751 = getelementptr inbounds float, float* %tmp7750, i64 1
+ %tmp7752 = getelementptr inbounds float, float* %tmp7751, i64 1
+ %tmp7753 = getelementptr inbounds float, float* %tmp7752, i64 1
+ %tmp7754 = getelementptr inbounds float, float* %tmp7753, i64 1
+ %tmp7755 = getelementptr inbounds float, float* %tmp7754, i64 1
+ %tmp7756 = getelementptr inbounds float, float* %tmp7755, i64 1
+ %tmp7757 = getelementptr inbounds float, float* %tmp7756, i64 1
+ %tmp7758 = getelementptr inbounds float, float* %tmp7757, i64 1
+ %tmp7759 = getelementptr inbounds float, float* %tmp7758, i64 1
+ %tmp7760 = getelementptr inbounds float, float* %tmp7759, i64 1
+ %tmp7761 = getelementptr inbounds float, float* %tmp7760, i64 1
+ %tmp7762 = getelementptr inbounds float, float* %tmp7761, i64 1
+ %tmp7763 = getelementptr inbounds float, float* %tmp7762, i64 1
+ %tmp7764 = getelementptr inbounds float, float* %tmp7763, i64 1
+ %tmp7765 = getelementptr inbounds float, float* %tmp7764, i64 1
+ %tmp7766 = getelementptr inbounds float, float* %tmp7765, i64 1
+ %tmp7767 = getelementptr inbounds float, float* %tmp7766, i64 1
+ %tmp7768 = getelementptr inbounds float, float* %tmp7767, i64 1
+ %tmp7769 = getelementptr inbounds float, float* %tmp7768, i64 1
+ %tmp7770 = getelementptr inbounds float, float* %tmp7769, i64 1
+ %tmp7771 = getelementptr inbounds float, float* %tmp7770, i64 1
+ %tmp7772 = getelementptr inbounds float, float* %tmp7771, i64 1
+ %tmp7773 = getelementptr inbounds float, float* %tmp7772, i64 1
+ %tmp7774 = getelementptr inbounds float, float* %tmp7773, i64 1
+ %tmp7775 = getelementptr inbounds float, float* %tmp7774, i64 1
+ %tmp7776 = getelementptr inbounds float, float* %tmp7775, i64 1
+ %tmp7777 = getelementptr inbounds float, float* %tmp7776, i64 1
+ %tmp7778 = getelementptr inbounds float, float* %tmp7777, i64 1
+ %tmp7779 = getelementptr inbounds float, float* %tmp7778, i64 1
+ %tmp7780 = getelementptr inbounds float, float* %tmp7779, i64 1
+ %tmp7781 = getelementptr inbounds float, float* %tmp7780, i64 1
+ %tmp7782 = getelementptr inbounds float, float* %tmp7781, i64 1
+ %tmp7783 = getelementptr inbounds float, float* %tmp7782, i64 1
+ %tmp7784 = getelementptr inbounds float, float* %tmp7783, i64 1
+ %tmp7785 = getelementptr inbounds float, float* %tmp7784, i64 1
+ %tmp7786 = getelementptr inbounds float, float* %tmp7785, i64 1
+ %tmp7787 = getelementptr inbounds float, float* %tmp7786, i64 1
+ %tmp7788 = getelementptr inbounds float, float* %tmp7787, i64 1
+ %tmp7789 = getelementptr inbounds float, float* %tmp7788, i64 1
+ %tmp7790 = getelementptr inbounds float, float* %tmp7789, i64 1
+ %tmp7791 = getelementptr inbounds float, float* %tmp7790, i64 1
+ %tmp7792 = getelementptr inbounds float, float* %tmp7791, i64 1
+ %tmp7793 = getelementptr inbounds float, float* %tmp7792, i64 1
+ %tmp7794 = getelementptr inbounds float, float* %tmp7793, i64 1
+ %tmp7795 = getelementptr inbounds float, float* %tmp7794, i64 1
+ %tmp7796 = getelementptr inbounds float, float* %tmp7795, i64 1
+ %tmp7797 = getelementptr inbounds float, float* %tmp7796, i64 1
+ %tmp7798 = getelementptr inbounds float, float* %tmp7797, i64 1
+ %tmp7799 = getelementptr inbounds float, float* %tmp7798, i64 1
+ %tmp7800 = getelementptr inbounds float, float* %tmp7799, i64 1
+ %tmp7801 = getelementptr inbounds float, float* %tmp7800, i64 1
+ %tmp7802 = getelementptr inbounds float, float* %tmp7801, i64 1
+ %tmp7803 = getelementptr inbounds float, float* %tmp7802, i64 1
+ %tmp7804 = getelementptr inbounds float, float* %tmp7803, i64 1
+ %tmp7805 = getelementptr inbounds float, float* %tmp7804, i64 1
+ %tmp7806 = getelementptr inbounds float, float* %tmp7805, i64 1
+ %tmp7807 = getelementptr inbounds float, float* %tmp7806, i64 1
+ %tmp7808 = getelementptr inbounds float, float* %tmp7807, i64 1
+ %tmp7809 = getelementptr inbounds float, float* %tmp7808, i64 1
+ %tmp7810 = getelementptr inbounds float, float* %tmp7809, i64 1
+ %tmp7811 = getelementptr inbounds float, float* %tmp7810, i64 1
+ %tmp7812 = getelementptr inbounds float, float* %tmp7811, i64 1
+ %tmp7813 = getelementptr inbounds float, float* %tmp7812, i64 1
+ %tmp7814 = getelementptr inbounds float, float* %tmp7813, i64 1
+ %tmp7815 = getelementptr inbounds float, float* %tmp7814, i64 1
+ %tmp7816 = getelementptr inbounds float, float* %tmp7815, i64 1
+ %tmp7817 = getelementptr inbounds float, float* %tmp7816, i64 1
+ %tmp7818 = getelementptr inbounds float, float* %tmp7817, i64 1
+ %tmp7819 = getelementptr inbounds float, float* %tmp7818, i64 1
+ %tmp7820 = getelementptr inbounds float, float* %tmp7819, i64 1
+ %tmp7821 = getelementptr inbounds float, float* %tmp7820, i64 1
+ %tmp7822 = getelementptr inbounds float, float* %tmp7821, i64 1
+ %tmp7823 = getelementptr inbounds float, float* %tmp7822, i64 1
+ %tmp7824 = getelementptr inbounds float, float* %tmp7823, i64 1
+ %tmp7825 = getelementptr inbounds float, float* %tmp7824, i64 1
+ %tmp7826 = getelementptr inbounds float, float* %tmp7825, i64 1
+ %tmp7827 = getelementptr inbounds float, float* %tmp7826, i64 1
+ %tmp7828 = getelementptr inbounds float, float* %tmp7827, i64 1
+ %tmp7829 = getelementptr inbounds float, float* %tmp7828, i64 1
+ %tmp7830 = getelementptr inbounds float, float* %tmp7829, i64 1
+ %tmp7831 = getelementptr inbounds float, float* %tmp7830, i64 1
+ %tmp7832 = getelementptr inbounds float, float* %tmp7831, i64 1
+ %tmp7833 = getelementptr inbounds float, float* %tmp7832, i64 1
+ %tmp7834 = getelementptr inbounds float, float* %tmp7833, i64 1
+ %tmp7835 = getelementptr inbounds float, float* %tmp7834, i64 1
+ %tmp7836 = getelementptr inbounds float, float* %tmp7835, i64 1
+ %tmp7837 = getelementptr inbounds float, float* %tmp7836, i64 1
+ %tmp7838 = getelementptr inbounds float, float* %tmp7837, i64 1
+ %tmp7839 = getelementptr inbounds float, float* %tmp7838, i64 1
+ %tmp7840 = getelementptr inbounds float, float* %tmp7839, i64 1
+ %tmp7841 = getelementptr inbounds float, float* %tmp7840, i64 1
+ %tmp7842 = getelementptr inbounds float, float* %tmp7841, i64 1
+ %tmp7843 = getelementptr inbounds float, float* %tmp7842, i64 1
+ %tmp7844 = getelementptr inbounds float, float* %tmp7843, i64 1
+ %tmp7845 = getelementptr inbounds float, float* %tmp7844, i64 1
+ %tmp7846 = getelementptr inbounds float, float* %tmp7845, i64 1
+ %tmp7847 = getelementptr inbounds float, float* %tmp7846, i64 1
+ %tmp7848 = getelementptr inbounds float, float* %tmp7847, i64 1
+ %tmp7849 = getelementptr inbounds float, float* %tmp7848, i64 1
+ %tmp7850 = getelementptr inbounds float, float* %tmp7849, i64 1
+ %tmp7851 = getelementptr inbounds float, float* %tmp7850, i64 1
+ %tmp7852 = getelementptr inbounds float, float* %tmp7851, i64 1
+ %tmp7853 = getelementptr inbounds float, float* %tmp7852, i64 1
+ %tmp7854 = getelementptr inbounds float, float* %tmp7853, i64 1
+ %tmp7855 = getelementptr inbounds float, float* %tmp7854, i64 1
+ %tmp7856 = getelementptr inbounds float, float* %tmp7855, i64 1
+ %tmp7857 = getelementptr inbounds float, float* %tmp7856, i64 1
+ %tmp7858 = getelementptr inbounds float, float* %tmp7857, i64 1
+ %tmp7859 = getelementptr inbounds float, float* %tmp7858, i64 1
+ %tmp7860 = getelementptr inbounds float, float* %tmp7859, i64 1
+ %tmp7861 = getelementptr inbounds float, float* %tmp7860, i64 1
+ %tmp7862 = getelementptr inbounds float, float* %tmp7861, i64 1
+ %tmp7863 = getelementptr inbounds float, float* %tmp7862, i64 1
+ %tmp7864 = getelementptr inbounds float, float* %tmp7863, i64 1
+ %tmp7865 = getelementptr inbounds float, float* %tmp7864, i64 1
+ %tmp7866 = getelementptr inbounds float, float* %tmp7865, i64 1
+ %tmp7867 = getelementptr inbounds float, float* %tmp7866, i64 1
+ %tmp7868 = getelementptr inbounds float, float* %tmp7867, i64 1
+ %tmp7869 = getelementptr inbounds float, float* %tmp7868, i64 1
+ %tmp7870 = getelementptr inbounds float, float* %tmp7869, i64 1
+ %tmp7871 = getelementptr inbounds float, float* %tmp7870, i64 1
+ %tmp7872 = getelementptr inbounds float, float* %tmp7871, i64 1
+ %tmp7873 = getelementptr inbounds float, float* %tmp7872, i64 1
+ %tmp7874 = getelementptr inbounds float, float* %tmp7873, i64 1
+ %tmp7875 = getelementptr inbounds float, float* %tmp7874, i64 1
+ %tmp7876 = getelementptr inbounds float, float* %tmp7875, i64 1
+ %tmp7877 = getelementptr inbounds float, float* %tmp7876, i64 1
+ %tmp7878 = getelementptr inbounds float, float* %tmp7877, i64 1
+ %tmp7879 = getelementptr inbounds float, float* %tmp7878, i64 1
+ %tmp7880 = getelementptr inbounds float, float* %tmp7879, i64 1
+ %tmp7881 = getelementptr inbounds float, float* %tmp7880, i64 1
+ %tmp7882 = getelementptr inbounds float, float* %tmp7881, i64 1
+ %tmp7883 = getelementptr inbounds float, float* %tmp7882, i64 1
+ %tmp7884 = getelementptr inbounds float, float* %tmp7883, i64 1
+ %tmp7885 = getelementptr inbounds float, float* %tmp7884, i64 1
+ %tmp7886 = getelementptr inbounds float, float* %tmp7885, i64 1
+ %tmp7887 = getelementptr inbounds float, float* %tmp7886, i64 1
+ %tmp7888 = getelementptr inbounds float, float* %tmp7887, i64 1
+ %tmp7889 = getelementptr inbounds float, float* %tmp7888, i64 1
+ %tmp7890 = getelementptr inbounds float, float* %tmp7889, i64 1
+ %tmp7891 = getelementptr inbounds float, float* %tmp7890, i64 1
+ %tmp7892 = getelementptr inbounds float, float* %tmp7891, i64 1
+ %tmp7893 = getelementptr inbounds float, float* %tmp7892, i64 1
+ %tmp7894 = getelementptr inbounds float, float* %tmp7893, i64 1
+ %tmp7895 = getelementptr inbounds float, float* %tmp7894, i64 1
+ %tmp7896 = getelementptr inbounds float, float* %tmp7895, i64 1
+ %tmp7897 = getelementptr inbounds float, float* %tmp7896, i64 1
+ %tmp7898 = getelementptr inbounds float, float* %tmp7897, i64 1
+ %tmp7899 = getelementptr inbounds float, float* %tmp7898, i64 1
+ %tmp7900 = getelementptr inbounds float, float* %tmp7899, i64 1
+ %tmp7901 = getelementptr inbounds float, float* %tmp7900, i64 1
+ %tmp7902 = getelementptr inbounds float, float* %tmp7901, i64 1
+ %tmp7903 = getelementptr inbounds float, float* %tmp7902, i64 1
+ %tmp7904 = getelementptr inbounds float, float* %tmp7903, i64 1
+ %tmp7905 = getelementptr inbounds float, float* %tmp7904, i64 1
+ %tmp7906 = getelementptr inbounds float, float* %tmp7905, i64 1
+ %tmp7907 = getelementptr inbounds float, float* %tmp7906, i64 1
+ %tmp7908 = getelementptr inbounds float, float* %tmp7907, i64 1
+ %tmp7909 = getelementptr inbounds float, float* %tmp7908, i64 1
+ %tmp7910 = getelementptr inbounds float, float* %tmp7909, i64 1
+ %tmp7911 = getelementptr inbounds float, float* %tmp7910, i64 1
+ %tmp7912 = getelementptr inbounds float, float* %tmp7911, i64 1
+ %tmp7913 = getelementptr inbounds float, float* %tmp7912, i64 1
+ %tmp7914 = getelementptr inbounds float, float* %tmp7913, i64 1
+ %tmp7915 = getelementptr inbounds float, float* %tmp7914, i64 1
+ %tmp7916 = getelementptr inbounds float, float* %tmp7915, i64 1
+ %tmp7917 = getelementptr inbounds float, float* %tmp7916, i64 1
+ %tmp7918 = getelementptr inbounds float, float* %tmp7917, i64 1
+ %tmp7919 = getelementptr inbounds float, float* %tmp7918, i64 1
+ %tmp7920 = getelementptr inbounds float, float* %tmp7919, i64 1
+ %tmp7921 = getelementptr inbounds float, float* %tmp7920, i64 1
+ %tmp7922 = getelementptr inbounds float, float* %tmp7921, i64 1
+ %tmp7923 = getelementptr inbounds float, float* %tmp7922, i64 1
+ %tmp7924 = getelementptr inbounds float, float* %tmp7923, i64 1
+ %tmp7925 = getelementptr inbounds float, float* %tmp7924, i64 1
+ %tmp7926 = getelementptr inbounds float, float* %tmp7925, i64 1
+ %tmp7927 = getelementptr inbounds float, float* %tmp7926, i64 1
+ %tmp7928 = getelementptr inbounds float, float* %tmp7927, i64 1
+ %tmp7929 = getelementptr inbounds float, float* %tmp7928, i64 1
+ %tmp7930 = getelementptr inbounds float, float* %tmp7929, i64 1
+ %tmp7931 = getelementptr inbounds float, float* %tmp7930, i64 1
+ %tmp7932 = getelementptr inbounds float, float* %tmp7931, i64 1
+ %tmp7933 = getelementptr inbounds float, float* %tmp7932, i64 1
+ %tmp7934 = getelementptr inbounds float, float* %tmp7933, i64 1
+ %tmp7935 = getelementptr inbounds float, float* %tmp7934, i64 1
+ %tmp7936 = getelementptr inbounds float, float* %tmp7935, i64 1
+ %tmp7937 = getelementptr inbounds float, float* %tmp7936, i64 1
+ %tmp7938 = getelementptr inbounds float, float* %tmp7937, i64 1
+ %tmp7939 = getelementptr inbounds float, float* %tmp7938, i64 1
+ %tmp7940 = getelementptr inbounds float, float* %tmp7939, i64 1
+ %tmp7941 = getelementptr inbounds float, float* %tmp7940, i64 1
+ %tmp7942 = getelementptr inbounds float, float* %tmp7941, i64 1
+ %tmp7943 = getelementptr inbounds float, float* %tmp7942, i64 1
+ %tmp7944 = getelementptr inbounds float, float* %tmp7943, i64 1
+ %tmp7945 = getelementptr inbounds float, float* %tmp7944, i64 1
+ %tmp7946 = getelementptr inbounds float, float* %tmp7945, i64 1
+ %tmp7947 = getelementptr inbounds float, float* %tmp7946, i64 1
+ %tmp7948 = getelementptr inbounds float, float* %tmp7947, i64 1
+ %tmp7949 = getelementptr inbounds float, float* %tmp7948, i64 1
+ %tmp7950 = getelementptr inbounds float, float* %tmp7949, i64 1
+ %tmp7951 = getelementptr inbounds float, float* %tmp7950, i64 1
+ %tmp7952 = getelementptr inbounds float, float* %tmp7951, i64 1
+ %tmp7953 = getelementptr inbounds float, float* %tmp7952, i64 1
+ %tmp7954 = getelementptr inbounds float, float* %tmp7953, i64 1
+ %tmp7955 = getelementptr inbounds float, float* %tmp7954, i64 1
+ %tmp7956 = getelementptr inbounds float, float* %tmp7955, i64 1
+ %tmp7957 = getelementptr inbounds float, float* %tmp7956, i64 1
+ %tmp7958 = getelementptr inbounds float, float* %tmp7957, i64 1
+ %tmp7959 = getelementptr inbounds float, float* %tmp7958, i64 1
+ %tmp7960 = getelementptr inbounds float, float* %tmp7959, i64 1
+ %tmp7961 = getelementptr inbounds float, float* %tmp7960, i64 1
+ %tmp7962 = getelementptr inbounds float, float* %tmp7961, i64 1
+ %tmp7963 = getelementptr inbounds float, float* %tmp7962, i64 1
+ %tmp7964 = getelementptr inbounds float, float* %tmp7963, i64 1
+ %tmp7965 = getelementptr inbounds float, float* %tmp7964, i64 1
+ %tmp7966 = getelementptr inbounds float, float* %tmp7965, i64 1
+ %tmp7967 = getelementptr inbounds float, float* %tmp7966, i64 1
+ %tmp7968 = getelementptr inbounds float, float* %tmp7967, i64 1
+ %tmp7969 = getelementptr inbounds float, float* %tmp7968, i64 1
+ %tmp7970 = getelementptr inbounds float, float* %tmp7969, i64 1
+ %tmp7971 = getelementptr inbounds float, float* %tmp7970, i64 1
+ %tmp7972 = getelementptr inbounds float, float* %tmp7971, i64 1
+ %tmp7973 = getelementptr inbounds float, float* %tmp7972, i64 1
+ %tmp7974 = getelementptr inbounds float, float* %tmp7973, i64 1
+ %tmp7975 = getelementptr inbounds float, float* %tmp7974, i64 1
+ %tmp7976 = getelementptr inbounds float, float* %tmp7975, i64 1
+ %tmp7977 = getelementptr inbounds float, float* %tmp7976, i64 1
+ %tmp7978 = getelementptr inbounds float, float* %tmp7977, i64 1
+ %tmp7979 = getelementptr inbounds float, float* %tmp7978, i64 1
+ %tmp7980 = getelementptr inbounds float, float* %tmp7979, i64 1
+ %tmp7981 = getelementptr inbounds float, float* %tmp7980, i64 1
+ %tmp7982 = getelementptr inbounds float, float* %tmp7981, i64 1
+ %tmp7983 = getelementptr inbounds float, float* %tmp7982, i64 1
+ %tmp7984 = getelementptr inbounds float, float* %tmp7983, i64 1
+ %tmp7985 = getelementptr inbounds float, float* %tmp7984, i64 1
+ %tmp7986 = getelementptr inbounds float, float* %tmp7985, i64 1
+ %tmp7987 = getelementptr inbounds float, float* %tmp7986, i64 1
+ %tmp7988 = getelementptr inbounds float, float* %tmp7987, i64 1
+ %tmp7989 = getelementptr inbounds float, float* %tmp7988, i64 1
+ %tmp7990 = getelementptr inbounds float, float* %tmp7989, i64 1
+ %tmp7991 = getelementptr inbounds float, float* %tmp7990, i64 1
+ %tmp7992 = getelementptr inbounds float, float* %tmp7991, i64 1
+ %tmp7993 = getelementptr inbounds float, float* %tmp7992, i64 1
+ %tmp7994 = getelementptr inbounds float, float* %tmp7993, i64 1
+ %tmp7995 = getelementptr inbounds float, float* %tmp7994, i64 1
+ %tmp7996 = getelementptr inbounds float, float* %tmp7995, i64 1
+ %tmp7997 = getelementptr inbounds float, float* %tmp7996, i64 1
+ %tmp7998 = getelementptr inbounds float, float* %tmp7997, i64 1
+ %tmp7999 = getelementptr inbounds float, float* %tmp7998, i64 1
+ %tmp8000 = getelementptr inbounds float, float* %tmp7999, i64 1
+ %tmp8001 = getelementptr inbounds float, float* %tmp8000, i64 1
+ %tmp8002 = getelementptr inbounds float, float* %tmp8001, i64 1
+ %tmp8003 = getelementptr inbounds float, float* %tmp8002, i64 1
+ %tmp8004 = getelementptr inbounds float, float* %tmp8003, i64 1
+ %tmp8005 = getelementptr inbounds float, float* %tmp8004, i64 1
+ %tmp8006 = getelementptr inbounds float, float* %tmp8005, i64 1
+ %tmp8007 = getelementptr inbounds float, float* %tmp8006, i64 1
+ %tmp8008 = getelementptr inbounds float, float* %tmp8007, i64 1
+ %tmp8009 = getelementptr inbounds float, float* %tmp8008, i64 1
+ %tmp8010 = getelementptr inbounds float, float* %tmp8009, i64 1
+ %tmp8011 = getelementptr inbounds float, float* %tmp8010, i64 1
+ %tmp8012 = getelementptr inbounds float, float* %tmp8011, i64 1
+ %tmp8013 = getelementptr inbounds float, float* %tmp8012, i64 1
+ %tmp8014 = getelementptr inbounds float, float* %tmp8013, i64 1
+ %tmp8015 = getelementptr inbounds float, float* %tmp8014, i64 1
+ %tmp8016 = getelementptr inbounds float, float* %tmp8015, i64 1
+ %tmp8017 = getelementptr inbounds float, float* %tmp8016, i64 1
+ %tmp8018 = getelementptr inbounds float, float* %tmp8017, i64 1
+ %tmp8019 = getelementptr inbounds float, float* %tmp8018, i64 1
+ %tmp8020 = getelementptr inbounds float, float* %tmp8019, i64 1
+ %tmp8021 = getelementptr inbounds float, float* %tmp8020, i64 1
+ %tmp8022 = getelementptr inbounds float, float* %tmp8021, i64 1
+ %tmp8023 = getelementptr inbounds float, float* %tmp8022, i64 1
+ %tmp8024 = getelementptr inbounds float, float* %tmp8023, i64 1
+ %tmp8025 = getelementptr inbounds float, float* %tmp8024, i64 1
+ %tmp8026 = getelementptr inbounds float, float* %tmp8025, i64 1
+ %tmp8027 = getelementptr inbounds float, float* %tmp8026, i64 1
+ %tmp8028 = getelementptr inbounds float, float* %tmp8027, i64 1
+ %tmp8029 = getelementptr inbounds float, float* %tmp8028, i64 1
+ %tmp8030 = getelementptr inbounds float, float* %tmp8029, i64 1
+ %tmp8031 = getelementptr inbounds float, float* %tmp8030, i64 1
+ %tmp8032 = getelementptr inbounds float, float* %tmp8031, i64 1
+ %tmp8033 = getelementptr inbounds float, float* %tmp8032, i64 1
+ %tmp8034 = getelementptr inbounds float, float* %tmp8033, i64 1
+ %tmp8035 = getelementptr inbounds float, float* %tmp8034, i64 1
+ %tmp8036 = getelementptr inbounds float, float* %tmp8035, i64 1
+ %tmp8037 = getelementptr inbounds float, float* %tmp8036, i64 1
+ %tmp8038 = getelementptr inbounds float, float* %tmp8037, i64 1
+ %tmp8039 = getelementptr inbounds float, float* %tmp8038, i64 1
+ %tmp8040 = getelementptr inbounds float, float* %tmp8039, i64 1
+ %tmp8041 = getelementptr inbounds float, float* %tmp8040, i64 1
+ %tmp8042 = getelementptr inbounds float, float* %tmp8041, i64 1
+ %tmp8043 = getelementptr inbounds float, float* %tmp8042, i64 1
+ %tmp8044 = getelementptr inbounds float, float* %tmp8043, i64 1
+ %tmp8045 = getelementptr inbounds float, float* %tmp8044, i64 1
+ %tmp8046 = getelementptr inbounds float, float* %tmp8045, i64 1
+ %tmp8047 = getelementptr inbounds float, float* %tmp8046, i64 1
+ %tmp8048 = getelementptr inbounds float, float* %tmp8047, i64 1
+ %tmp8049 = getelementptr inbounds float, float* %tmp8048, i64 1
+ %tmp8050 = getelementptr inbounds float, float* %tmp8049, i64 1
+ %tmp8051 = getelementptr inbounds float, float* %tmp8050, i64 1
+ %tmp8052 = getelementptr inbounds float, float* %tmp8051, i64 1
+ %tmp8053 = getelementptr inbounds float, float* %tmp8052, i64 1
+ %tmp8054 = getelementptr inbounds float, float* %tmp8053, i64 1
+ %tmp8055 = getelementptr inbounds float, float* %tmp8054, i64 1
+ %tmp8056 = getelementptr inbounds float, float* %tmp8055, i64 1
+ %tmp8057 = getelementptr inbounds float, float* %tmp8056, i64 1
+ %tmp8058 = getelementptr inbounds float, float* %tmp8057, i64 1
+ %tmp8059 = getelementptr inbounds float, float* %tmp8058, i64 1
+ %tmp8060 = getelementptr inbounds float, float* %tmp8059, i64 1
+ %tmp8061 = getelementptr inbounds float, float* %tmp8060, i64 1
+ %tmp8062 = getelementptr inbounds float, float* %tmp8061, i64 1
+ %tmp8063 = getelementptr inbounds float, float* %tmp8062, i64 1
+ %tmp8064 = getelementptr inbounds float, float* %tmp8063, i64 1
+ %tmp8065 = getelementptr inbounds float, float* %tmp8064, i64 1
+ %tmp8066 = getelementptr inbounds float, float* %tmp8065, i64 1
+ %tmp8067 = getelementptr inbounds float, float* %tmp8066, i64 1
+ %tmp8068 = getelementptr inbounds float, float* %tmp8067, i64 1
+ %tmp8069 = getelementptr inbounds float, float* %tmp8068, i64 1
+ %tmp8070 = getelementptr inbounds float, float* %tmp8069, i64 1
+ %tmp8071 = getelementptr inbounds float, float* %tmp8070, i64 1
+ %tmp8072 = getelementptr inbounds float, float* %tmp8071, i64 1
+ %tmp8073 = getelementptr inbounds float, float* %tmp8072, i64 1
+ %tmp8074 = getelementptr inbounds float, float* %tmp8073, i64 1
+ %tmp8075 = getelementptr inbounds float, float* %tmp8074, i64 1
+ %tmp8076 = getelementptr inbounds float, float* %tmp8075, i64 1
+ %tmp8077 = getelementptr inbounds float, float* %tmp8076, i64 1
+ %tmp8078 = getelementptr inbounds float, float* %tmp8077, i64 1
+ %tmp8079 = getelementptr inbounds float, float* %tmp8078, i64 1
+ %tmp8080 = getelementptr inbounds float, float* %tmp8079, i64 1
+ %tmp8081 = getelementptr inbounds float, float* %tmp8080, i64 1
+ %tmp8082 = getelementptr inbounds float, float* %tmp8081, i64 1
+ %tmp8083 = getelementptr inbounds float, float* %tmp8082, i64 1
+ %tmp8084 = getelementptr inbounds float, float* %tmp8083, i64 1
+ %tmp8085 = getelementptr inbounds float, float* %tmp8084, i64 1
+ %tmp8086 = getelementptr inbounds float, float* %tmp8085, i64 1
+ %tmp8087 = getelementptr inbounds float, float* %tmp8086, i64 1
+ %tmp8088 = getelementptr inbounds float, float* %tmp8087, i64 1
+ %tmp8089 = getelementptr inbounds float, float* %tmp8088, i64 1
+ %tmp8090 = getelementptr inbounds float, float* %tmp8089, i64 1
+ %tmp8091 = getelementptr inbounds float, float* %tmp8090, i64 1
+ %tmp8092 = getelementptr inbounds float, float* %tmp8091, i64 1
+ %tmp8093 = getelementptr inbounds float, float* %tmp8092, i64 1
+ %tmp8094 = getelementptr inbounds float, float* %tmp8093, i64 1
+ %tmp8095 = getelementptr inbounds float, float* %tmp8094, i64 1
+ %tmp8096 = getelementptr inbounds float, float* %tmp8095, i64 1
+ %tmp8097 = getelementptr inbounds float, float* %tmp8096, i64 1
+ %tmp8098 = getelementptr inbounds float, float* %tmp8097, i64 1
+ %tmp8099 = getelementptr inbounds float, float* %tmp8098, i64 1
+ %tmp8100 = getelementptr inbounds float, float* %tmp8099, i64 1
+ %tmp8101 = getelementptr inbounds float, float* %tmp8100, i64 1
+ %tmp8102 = getelementptr inbounds float, float* %tmp8101, i64 1
+ %tmp8103 = getelementptr inbounds float, float* %tmp8102, i64 1
+ %tmp8104 = getelementptr inbounds float, float* %tmp8103, i64 1
+ %tmp8105 = getelementptr inbounds float, float* %tmp8104, i64 1
+ %tmp8106 = getelementptr inbounds float, float* %tmp8105, i64 1
+ %tmp8107 = getelementptr inbounds float, float* %tmp8106, i64 1
+ %tmp8108 = getelementptr inbounds float, float* %tmp8107, i64 1
+ %tmp8109 = getelementptr inbounds float, float* %tmp8108, i64 1
+ %tmp8110 = getelementptr inbounds float, float* %tmp8109, i64 1
+ %tmp8111 = getelementptr inbounds float, float* %tmp8110, i64 1
+ %tmp8112 = getelementptr inbounds float, float* %tmp8111, i64 1
+ %tmp8113 = getelementptr inbounds float, float* %tmp8112, i64 1
+ %tmp8114 = getelementptr inbounds float, float* %tmp8113, i64 1
+ %tmp8115 = getelementptr inbounds float, float* %tmp8114, i64 1
+ %tmp8116 = getelementptr inbounds float, float* %tmp8115, i64 1
+ %tmp8117 = getelementptr inbounds float, float* %tmp8116, i64 1
+ %tmp8118 = getelementptr inbounds float, float* %tmp8117, i64 1
+ %tmp8119 = getelementptr inbounds float, float* %tmp8118, i64 1
+ %tmp8120 = getelementptr inbounds float, float* %tmp8119, i64 1
+ %tmp8121 = getelementptr inbounds float, float* %tmp8120, i64 1
+ %tmp8122 = getelementptr inbounds float, float* %tmp8121, i64 1
+ %tmp8123 = getelementptr inbounds float, float* %tmp8122, i64 1
+ %tmp8124 = getelementptr inbounds float, float* %tmp8123, i64 1
+ %tmp8125 = getelementptr inbounds float, float* %tmp8124, i64 1
+ %tmp8126 = getelementptr inbounds float, float* %tmp8125, i64 1
+ %tmp8127 = getelementptr inbounds float, float* %tmp8126, i64 1
+ %tmp8128 = getelementptr inbounds float, float* %tmp8127, i64 1
+ %tmp8129 = getelementptr inbounds float, float* %tmp8128, i64 1
+ %tmp8130 = getelementptr inbounds float, float* %tmp8129, i64 1
+ %tmp8131 = getelementptr inbounds float, float* %tmp8130, i64 1
+ %tmp8132 = getelementptr inbounds float, float* %tmp8131, i64 1
+ %tmp8133 = getelementptr inbounds float, float* %tmp8132, i64 1
+ %tmp8134 = getelementptr inbounds float, float* %tmp8133, i64 1
+ %tmp8135 = getelementptr inbounds float, float* %tmp8134, i64 1
+ %tmp8136 = getelementptr inbounds float, float* %tmp8135, i64 1
+ %tmp8137 = getelementptr inbounds float, float* %tmp8136, i64 1
+ %tmp8138 = getelementptr inbounds float, float* %tmp8137, i64 1
+ %tmp8139 = getelementptr inbounds float, float* %tmp8138, i64 1
+ %tmp8140 = getelementptr inbounds float, float* %tmp8139, i64 1
+ %tmp8141 = getelementptr inbounds float, float* %tmp8140, i64 1
+ %tmp8142 = getelementptr inbounds float, float* %tmp8141, i64 1
+ %tmp8143 = getelementptr inbounds float, float* %tmp8142, i64 1
+ %tmp8144 = getelementptr inbounds float, float* %tmp8143, i64 1
+ %tmp8145 = getelementptr inbounds float, float* %tmp8144, i64 1
+ %tmp8146 = getelementptr inbounds float, float* %tmp8145, i64 1
+ %tmp8147 = getelementptr inbounds float, float* %tmp8146, i64 1
+ %tmp8148 = getelementptr inbounds float, float* %tmp8147, i64 1
+ %tmp8149 = getelementptr inbounds float, float* %tmp8148, i64 1
+ %tmp8150 = getelementptr inbounds float, float* %tmp8149, i64 1
+ %tmp8151 = getelementptr inbounds float, float* %tmp8150, i64 1
+ %tmp8152 = getelementptr inbounds float, float* %tmp8151, i64 1
+ %tmp8153 = getelementptr inbounds float, float* %tmp8152, i64 1
+ %tmp8154 = getelementptr inbounds float, float* %tmp8153, i64 1
+ %tmp8155 = getelementptr inbounds float, float* %tmp8154, i64 1
+ %tmp8156 = getelementptr inbounds float, float* %tmp8155, i64 1
+ %tmp8157 = getelementptr inbounds float, float* %tmp8156, i64 1
+ %tmp8158 = getelementptr inbounds float, float* %tmp8157, i64 1
+ %tmp8159 = getelementptr inbounds float, float* %tmp8158, i64 1
+ %tmp8160 = getelementptr inbounds float, float* %tmp8159, i64 1
+ %tmp8161 = getelementptr inbounds float, float* %tmp8160, i64 1
+ %tmp8162 = getelementptr inbounds float, float* %tmp8161, i64 1
+ %tmp8163 = getelementptr inbounds float, float* %tmp8162, i64 1
+ %tmp8164 = getelementptr inbounds float, float* %tmp8163, i64 1
+ %tmp8165 = getelementptr inbounds float, float* %tmp8164, i64 1
+ %tmp8166 = getelementptr inbounds float, float* %tmp8165, i64 1
+ %tmp8167 = getelementptr inbounds float, float* %tmp8166, i64 1
+ %tmp8168 = getelementptr inbounds float, float* %tmp8167, i64 1
+ %tmp8169 = getelementptr inbounds float, float* %tmp8168, i64 1
+ %tmp8170 = getelementptr inbounds float, float* %tmp8169, i64 1
+ %tmp8171 = getelementptr inbounds float, float* %tmp8170, i64 1
+ %tmp8172 = getelementptr inbounds float, float* %tmp8171, i64 1
+ %tmp8173 = getelementptr inbounds float, float* %tmp8172, i64 1
+ %tmp8174 = getelementptr inbounds float, float* %tmp8173, i64 1
+ %tmp8175 = getelementptr inbounds float, float* %tmp8174, i64 1
+ %tmp8176 = getelementptr inbounds float, float* %tmp8175, i64 1
+ %tmp8177 = getelementptr inbounds float, float* %tmp8176, i64 1
+ %tmp8178 = getelementptr inbounds float, float* %tmp8177, i64 1
+ %tmp8179 = getelementptr inbounds float, float* %tmp8178, i64 1
+ %tmp8180 = getelementptr inbounds float, float* %tmp8179, i64 1
+ %tmp8181 = getelementptr inbounds float, float* %tmp8180, i64 1
+ %tmp8182 = getelementptr inbounds float, float* %tmp8181, i64 1
+ %tmp8183 = getelementptr inbounds float, float* %tmp8182, i64 1
+ %tmp8184 = getelementptr inbounds float, float* %tmp8183, i64 1
+ %tmp8185 = getelementptr inbounds float, float* %tmp8184, i64 1
+ %tmp8186 = getelementptr inbounds float, float* %tmp8185, i64 1
+ %tmp8187 = getelementptr inbounds float, float* %tmp8186, i64 1
+ %tmp8188 = getelementptr inbounds float, float* %tmp8187, i64 1
+ %tmp8189 = getelementptr inbounds float, float* %tmp8188, i64 1
+ %tmp8190 = getelementptr inbounds float, float* %tmp8189, i64 1
+ %tmp8191 = getelementptr inbounds float, float* %tmp8190, i64 1
+ %tmp8192 = getelementptr inbounds float, float* %tmp8191, i64 1
+ %tmp8193 = getelementptr inbounds float, float* %tmp8192, i64 1
+ %tmp8194 = getelementptr inbounds float, float* %tmp8193, i64 1
+ %tmp8195 = getelementptr inbounds float, float* %tmp8194, i64 1
+ %tmp8196 = getelementptr inbounds float, float* %tmp8195, i64 1
+ %tmp8197 = getelementptr inbounds float, float* %tmp8196, i64 1
+ %tmp8198 = getelementptr inbounds float, float* %tmp8197, i64 1
+ %tmp8199 = getelementptr inbounds float, float* %tmp8198, i64 1
+ %tmp8200 = getelementptr inbounds float, float* %tmp8199, i64 1
+ %tmp8201 = getelementptr inbounds float, float* %tmp8200, i64 1
+ %tmp8202 = getelementptr inbounds float, float* %tmp8201, i64 1
+ %tmp8203 = getelementptr inbounds float, float* %tmp8202, i64 1
+ %tmp8204 = getelementptr inbounds float, float* %tmp8203, i64 1
+ %tmp8205 = getelementptr inbounds float, float* %tmp8204, i64 1
+ %tmp8206 = getelementptr inbounds float, float* %tmp8205, i64 1
+ %tmp8207 = getelementptr inbounds float, float* %tmp8206, i64 1
+ %tmp8208 = getelementptr inbounds float, float* %tmp8207, i64 1
+ %tmp8209 = getelementptr inbounds float, float* %tmp8208, i64 1
+ %tmp8210 = getelementptr inbounds float, float* %tmp8209, i64 1
+ %tmp8211 = getelementptr inbounds float, float* %tmp8210, i64 1
+ %tmp8212 = getelementptr inbounds float, float* %tmp8211, i64 1
+ %tmp8213 = getelementptr inbounds float, float* %tmp8212, i64 1
+ %tmp8214 = getelementptr inbounds float, float* %tmp8213, i64 1
+ %tmp8215 = getelementptr inbounds float, float* %tmp8214, i64 1
+ %tmp8216 = getelementptr inbounds float, float* %tmp8215, i64 1
+ %tmp8217 = getelementptr inbounds float, float* %tmp8216, i64 1
+ %tmp8218 = getelementptr inbounds float, float* %tmp8217, i64 1
+ %tmp8219 = getelementptr inbounds float, float* %tmp8218, i64 1
+ %tmp8220 = getelementptr inbounds float, float* %tmp8219, i64 1
+ %tmp8221 = getelementptr inbounds float, float* %tmp8220, i64 1
+ %tmp8222 = getelementptr inbounds float, float* %tmp8221, i64 1
+ %tmp8223 = getelementptr inbounds float, float* %tmp8222, i64 1
+ %tmp8224 = getelementptr inbounds float, float* %tmp8223, i64 1
+ %tmp8225 = getelementptr inbounds float, float* %tmp8224, i64 1
+ %tmp8226 = getelementptr inbounds float, float* %tmp8225, i64 1
+ %tmp8227 = getelementptr inbounds float, float* %tmp8226, i64 1
+ %tmp8228 = getelementptr inbounds float, float* %tmp8227, i64 1
+ %tmp8229 = getelementptr inbounds float, float* %tmp8228, i64 1
+ %tmp8230 = getelementptr inbounds float, float* %tmp8229, i64 1
+ %tmp8231 = getelementptr inbounds float, float* %tmp8230, i64 1
+ %tmp8232 = getelementptr inbounds float, float* %tmp8231, i64 1
+ %tmp8233 = getelementptr inbounds float, float* %tmp8232, i64 1
+ %tmp8234 = getelementptr inbounds float, float* %tmp8233, i64 1
+ %tmp8235 = getelementptr inbounds float, float* %tmp8234, i64 1
+ %tmp8236 = getelementptr inbounds float, float* %tmp8235, i64 1
+ %tmp8237 = getelementptr inbounds float, float* %tmp8236, i64 1
+ %tmp8238 = getelementptr inbounds float, float* %tmp8237, i64 1
+ %tmp8239 = getelementptr inbounds float, float* %tmp8238, i64 1
+ %tmp8240 = getelementptr inbounds float, float* %tmp8239, i64 1
+ %tmp8241 = getelementptr inbounds float, float* %tmp8240, i64 1
+ %tmp8242 = getelementptr inbounds float, float* %tmp8241, i64 1
+ %tmp8243 = getelementptr inbounds float, float* %tmp8242, i64 1
+ %tmp8244 = getelementptr inbounds float, float* %tmp8243, i64 1
+ %tmp8245 = getelementptr inbounds float, float* %tmp8244, i64 1
+ %tmp8246 = getelementptr inbounds float, float* %tmp8245, i64 1
+ %tmp8247 = getelementptr inbounds float, float* %tmp8246, i64 1
+ %tmp8248 = getelementptr inbounds float, float* %tmp8247, i64 1
+ %tmp8249 = getelementptr inbounds float, float* %tmp8248, i64 1
+ %tmp8250 = getelementptr inbounds float, float* %tmp8249, i64 1
+ %tmp8251 = getelementptr inbounds float, float* %tmp8250, i64 1
+ %tmp8252 = getelementptr inbounds float, float* %tmp8251, i64 1
+ %tmp8253 = getelementptr inbounds float, float* %tmp8252, i64 1
+ %tmp8254 = getelementptr inbounds float, float* %tmp8253, i64 1
+ %tmp8255 = getelementptr inbounds float, float* %tmp8254, i64 1
+ %tmp8256 = getelementptr inbounds float, float* %tmp8255, i64 1
+ %tmp8257 = getelementptr inbounds float, float* %tmp8256, i64 1
+ %tmp8258 = getelementptr inbounds float, float* %tmp8257, i64 1
+ %tmp8259 = getelementptr inbounds float, float* %tmp8258, i64 1
+ %tmp8260 = getelementptr inbounds float, float* %tmp8259, i64 1
+ %tmp8261 = getelementptr inbounds float, float* %tmp8260, i64 1
+ %tmp8262 = getelementptr inbounds float, float* %tmp8261, i64 1
+ %tmp8263 = getelementptr inbounds float, float* %tmp8262, i64 1
+ %tmp8264 = getelementptr inbounds float, float* %tmp8263, i64 1
+ %tmp8265 = getelementptr inbounds float, float* %tmp8264, i64 1
+ %tmp8266 = getelementptr inbounds float, float* %tmp8265, i64 1
+ %tmp8267 = getelementptr inbounds float, float* %tmp8266, i64 1
+ %tmp8268 = getelementptr inbounds float, float* %tmp8267, i64 1
+ %tmp8269 = getelementptr inbounds float, float* %tmp8268, i64 1
+ %tmp8270 = getelementptr inbounds float, float* %tmp8269, i64 1
+ %tmp8271 = getelementptr inbounds float, float* %tmp8270, i64 1
+ %tmp8272 = getelementptr inbounds float, float* %tmp8271, i64 1
+ %tmp8273 = getelementptr inbounds float, float* %tmp8272, i64 1
+ %tmp8274 = getelementptr inbounds float, float* %tmp8273, i64 1
+ %tmp8275 = getelementptr inbounds float, float* %tmp8274, i64 1
+ %tmp8276 = getelementptr inbounds float, float* %tmp8275, i64 1
+ %tmp8277 = getelementptr inbounds float, float* %tmp8276, i64 1
+ %tmp8278 = getelementptr inbounds float, float* %tmp8277, i64 1
+ %tmp8279 = getelementptr inbounds float, float* %tmp8278, i64 1
+ %tmp8280 = getelementptr inbounds float, float* %tmp8279, i64 1
+ %tmp8281 = getelementptr inbounds float, float* %tmp8280, i64 1
+ %tmp8282 = getelementptr inbounds float, float* %tmp8281, i64 1
+ %tmp8283 = getelementptr inbounds float, float* %tmp8282, i64 1
+ %tmp8284 = getelementptr inbounds float, float* %tmp8283, i64 1
+ %tmp8285 = getelementptr inbounds float, float* %tmp8284, i64 1
+ %tmp8286 = getelementptr inbounds float, float* %tmp8285, i64 1
+ %tmp8287 = getelementptr inbounds float, float* %tmp8286, i64 1
+ %tmp8288 = getelementptr inbounds float, float* %tmp8287, i64 1
+ %tmp8289 = getelementptr inbounds float, float* %tmp8288, i64 1
+ %tmp8290 = getelementptr inbounds float, float* %tmp8289, i64 1
+ %tmp8291 = getelementptr inbounds float, float* %tmp8290, i64 1
+ %tmp8292 = getelementptr inbounds float, float* %tmp8291, i64 1
+ %tmp8293 = getelementptr inbounds float, float* %tmp8292, i64 1
+ %tmp8294 = getelementptr inbounds float, float* %tmp8293, i64 1
+ %tmp8295 = getelementptr inbounds float, float* %tmp8294, i64 1
+ %tmp8296 = getelementptr inbounds float, float* %tmp8295, i64 1
+ %tmp8297 = getelementptr inbounds float, float* %tmp8296, i64 1
+ %tmp8298 = getelementptr inbounds float, float* %tmp8297, i64 1
+ %tmp8299 = getelementptr inbounds float, float* %tmp8298, i64 1
+ %tmp8300 = getelementptr inbounds float, float* %tmp8299, i64 1
+ %tmp8301 = getelementptr inbounds float, float* %tmp8300, i64 1
+ %tmp8302 = getelementptr inbounds float, float* %tmp8301, i64 1
+ %tmp8303 = getelementptr inbounds float, float* %tmp8302, i64 1
+ %tmp8304 = getelementptr inbounds float, float* %tmp8303, i64 1
+ %tmp8305 = getelementptr inbounds float, float* %tmp8304, i64 1
+ %tmp8306 = getelementptr inbounds float, float* %tmp8305, i64 1
+ %tmp8307 = getelementptr inbounds float, float* %tmp8306, i64 1
+ %tmp8308 = getelementptr inbounds float, float* %tmp8307, i64 1
+ %tmp8309 = getelementptr inbounds float, float* %tmp8308, i64 1
+ %tmp8310 = getelementptr inbounds float, float* %tmp8309, i64 1
+ %tmp8311 = getelementptr inbounds float, float* %tmp8310, i64 1
+ %tmp8312 = getelementptr inbounds float, float* %tmp8311, i64 1
+ %tmp8313 = getelementptr inbounds float, float* %tmp8312, i64 1
+ %tmp8314 = getelementptr inbounds float, float* %tmp8313, i64 1
+ %tmp8315 = getelementptr inbounds float, float* %tmp8314, i64 1
+ %tmp8316 = getelementptr inbounds float, float* %tmp8315, i64 1
+ %tmp8317 = getelementptr inbounds float, float* %tmp8316, i64 1
+ %tmp8318 = getelementptr inbounds float, float* %tmp8317, i64 1
+ %tmp8319 = getelementptr inbounds float, float* %tmp8318, i64 1
+ %tmp8320 = getelementptr inbounds float, float* %tmp8319, i64 1
+ %tmp8321 = getelementptr inbounds float, float* %tmp8320, i64 1
+ %tmp8322 = getelementptr inbounds float, float* %tmp8321, i64 1
+ %tmp8323 = getelementptr inbounds float, float* %tmp8322, i64 1
+ %tmp8324 = getelementptr inbounds float, float* %tmp8323, i64 1
+ %tmp8325 = getelementptr inbounds float, float* %tmp8324, i64 1
+ %tmp8326 = getelementptr inbounds float, float* %tmp8325, i64 1
+ %tmp8327 = getelementptr inbounds float, float* %tmp8326, i64 1
+ %tmp8328 = getelementptr inbounds float, float* %tmp8327, i64 1
+ %tmp8329 = getelementptr inbounds float, float* %tmp8328, i64 1
+ %tmp8330 = getelementptr inbounds float, float* %tmp8329, i64 1
+ %tmp8331 = getelementptr inbounds float, float* %tmp8330, i64 1
+ %tmp8332 = getelementptr inbounds float, float* %tmp8331, i64 1
+ %tmp8333 = getelementptr inbounds float, float* %tmp8332, i64 1
+ %tmp8334 = getelementptr inbounds float, float* %tmp8333, i64 1
+ %tmp8335 = getelementptr inbounds float, float* %tmp8334, i64 1
+ %tmp8336 = getelementptr inbounds float, float* %tmp8335, i64 1
+ %tmp8337 = getelementptr inbounds float, float* %tmp8336, i64 1
+ %tmp8338 = getelementptr inbounds float, float* %tmp8337, i64 1
+ %tmp8339 = getelementptr inbounds float, float* %tmp8338, i64 1
+ %tmp8340 = getelementptr inbounds float, float* %tmp8339, i64 1
+ %tmp8341 = getelementptr inbounds float, float* %tmp8340, i64 1
+ %tmp8342 = getelementptr inbounds float, float* %tmp8341, i64 1
+ %tmp8343 = getelementptr inbounds float, float* %tmp8342, i64 1
+ %tmp8344 = getelementptr inbounds float, float* %tmp8343, i64 1
+ %tmp8345 = getelementptr inbounds float, float* %tmp8344, i64 1
+ %tmp8346 = getelementptr inbounds float, float* %tmp8345, i64 1
+ %tmp8347 = getelementptr inbounds float, float* %tmp8346, i64 1
+ %tmp8348 = getelementptr inbounds float, float* %tmp8347, i64 1
+ %tmp8349 = getelementptr inbounds float, float* %tmp8348, i64 1
+ %tmp8350 = getelementptr inbounds float, float* %tmp8349, i64 1
+ %tmp8351 = getelementptr inbounds float, float* %tmp8350, i64 1
+ %tmp8352 = getelementptr inbounds float, float* %tmp8351, i64 1
+ %tmp8353 = getelementptr inbounds float, float* %tmp8352, i64 1
+ %tmp8354 = getelementptr inbounds float, float* %tmp8353, i64 1
+ %tmp8355 = getelementptr inbounds float, float* %tmp8354, i64 1
+ %tmp8356 = getelementptr inbounds float, float* %tmp8355, i64 1
+ %tmp8357 = getelementptr inbounds float, float* %tmp8356, i64 1
+ %tmp8358 = getelementptr inbounds float, float* %tmp8357, i64 1
+ %tmp8359 = getelementptr inbounds float, float* %tmp8358, i64 1
+ %tmp8360 = getelementptr inbounds float, float* %tmp8359, i64 1
+ %tmp8361 = getelementptr inbounds float, float* %tmp8360, i64 1
+ %tmp8362 = getelementptr inbounds float, float* %tmp8361, i64 1
+ %tmp8363 = getelementptr inbounds float, float* %tmp8362, i64 1
+ %tmp8364 = getelementptr inbounds float, float* %tmp8363, i64 1
+ %tmp8365 = getelementptr inbounds float, float* %tmp8364, i64 1
+ %tmp8366 = getelementptr inbounds float, float* %tmp8365, i64 1
+ %tmp8367 = getelementptr inbounds float, float* %tmp8366, i64 1
+ %tmp8368 = getelementptr inbounds float, float* %tmp8367, i64 1
+ %tmp8369 = getelementptr inbounds float, float* %tmp8368, i64 1
+ %tmp8370 = getelementptr inbounds float, float* %tmp8369, i64 1
+ %tmp8371 = getelementptr inbounds float, float* %tmp8370, i64 1
+ %tmp8372 = getelementptr inbounds float, float* %tmp8371, i64 1
+ %tmp8373 = getelementptr inbounds float, float* %tmp8372, i64 1
+ %tmp8374 = getelementptr inbounds float, float* %tmp8373, i64 1
+ %tmp8375 = getelementptr inbounds float, float* %tmp8374, i64 1
+ %tmp8376 = getelementptr inbounds float, float* %tmp8375, i64 1
+ %tmp8377 = getelementptr inbounds float, float* %tmp8376, i64 1
+ %tmp8378 = getelementptr inbounds float, float* %tmp8377, i64 1
+ %tmp8379 = getelementptr inbounds float, float* %tmp8378, i64 1
+ %tmp8380 = getelementptr inbounds float, float* %tmp8379, i64 1
+ %tmp8381 = getelementptr inbounds float, float* %tmp8380, i64 1
+ %tmp8382 = getelementptr inbounds float, float* %tmp8381, i64 1
+ %tmp8383 = getelementptr inbounds float, float* %tmp8382, i64 1
+ %tmp8384 = getelementptr inbounds float, float* %tmp8383, i64 1
+ %tmp8385 = getelementptr inbounds float, float* %tmp8384, i64 1
+ %tmp8386 = getelementptr inbounds float, float* %tmp8385, i64 1
+ %tmp8387 = getelementptr inbounds float, float* %tmp8386, i64 1
+ %tmp8388 = getelementptr inbounds float, float* %tmp8387, i64 1
+ %tmp8389 = getelementptr inbounds float, float* %tmp8388, i64 1
+ %tmp8390 = getelementptr inbounds float, float* %tmp8389, i64 1
+ %tmp8391 = getelementptr inbounds float, float* %tmp8390, i64 1
+ %tmp8392 = getelementptr inbounds float, float* %tmp8391, i64 1
+ %tmp8393 = getelementptr inbounds float, float* %tmp8392, i64 1
+ %tmp8394 = getelementptr inbounds float, float* %tmp8393, i64 1
+ %tmp8395 = getelementptr inbounds float, float* %tmp8394, i64 1
+ %tmp8396 = getelementptr inbounds float, float* %tmp8395, i64 1
+ %tmp8397 = getelementptr inbounds float, float* %tmp8396, i64 1
+ %tmp8398 = getelementptr inbounds float, float* %tmp8397, i64 1
+ %tmp8399 = getelementptr inbounds float, float* %tmp8398, i64 1
+ %tmp8400 = getelementptr inbounds float, float* %tmp8399, i64 1
+ %tmp8401 = getelementptr inbounds float, float* %tmp8400, i64 1
+ %tmp8402 = getelementptr inbounds float, float* %tmp8401, i64 1
+ %tmp8403 = getelementptr inbounds float, float* %tmp8402, i64 1
+ %tmp8404 = getelementptr inbounds float, float* %tmp8403, i64 1
+ %tmp8405 = getelementptr inbounds float, float* %tmp8404, i64 1
+ %tmp8406 = getelementptr inbounds float, float* %tmp8405, i64 1
+ %tmp8407 = getelementptr inbounds float, float* %tmp8406, i64 1
+ %tmp8408 = getelementptr inbounds float, float* %tmp8407, i64 1
+ %tmp8409 = getelementptr inbounds float, float* %tmp8408, i64 1
+ %tmp8410 = getelementptr inbounds float, float* %tmp8409, i64 1
+ %tmp8411 = getelementptr inbounds float, float* %tmp8410, i64 1
+ %tmp8412 = getelementptr inbounds float, float* %tmp8411, i64 1
+ %tmp8413 = getelementptr inbounds float, float* %tmp8412, i64 1
+ %tmp8414 = getelementptr inbounds float, float* %tmp8413, i64 1
+ %tmp8415 = getelementptr inbounds float, float* %tmp8414, i64 1
+ %tmp8416 = getelementptr inbounds float, float* %tmp8415, i64 1
+ %tmp8417 = getelementptr inbounds float, float* %tmp8416, i64 1
+ %tmp8418 = getelementptr inbounds float, float* %tmp8417, i64 1
+ %tmp8419 = getelementptr inbounds float, float* %tmp8418, i64 1
+ %tmp8420 = getelementptr inbounds float, float* %tmp8419, i64 1
+ %tmp8421 = getelementptr inbounds float, float* %tmp8420, i64 1
+ %tmp8422 = getelementptr inbounds float, float* %tmp8421, i64 1
+ %tmp8423 = getelementptr inbounds float, float* %tmp8422, i64 1
+ %tmp8424 = getelementptr inbounds float, float* %tmp8423, i64 1
+ %tmp8425 = getelementptr inbounds float, float* %tmp8424, i64 1
+ %tmp8426 = getelementptr inbounds float, float* %tmp8425, i64 1
+ %tmp8427 = getelementptr inbounds float, float* %tmp8426, i64 1
+ %tmp8428 = getelementptr inbounds float, float* %tmp8427, i64 1
+ %tmp8429 = getelementptr inbounds float, float* %tmp8428, i64 1
+ %tmp8430 = getelementptr inbounds float, float* %tmp8429, i64 1
+ %tmp8431 = getelementptr inbounds float, float* %tmp8430, i64 1
+ %tmp8432 = getelementptr inbounds float, float* %tmp8431, i64 1
+ %tmp8433 = getelementptr inbounds float, float* %tmp8432, i64 1
+ %tmp8434 = getelementptr inbounds float, float* %tmp8433, i64 1
+ %tmp8435 = getelementptr inbounds float, float* %tmp8434, i64 1
+ %tmp8436 = getelementptr inbounds float, float* %tmp8435, i64 1
+ %tmp8437 = getelementptr inbounds float, float* %tmp8436, i64 1
+ %tmp8438 = getelementptr inbounds float, float* %tmp8437, i64 1
+ %tmp8439 = getelementptr inbounds float, float* %tmp8438, i64 1
+ %tmp8440 = getelementptr inbounds float, float* %tmp8439, i64 1
+ %tmp8441 = getelementptr inbounds float, float* %tmp8440, i64 1
+ %tmp8442 = getelementptr inbounds float, float* %tmp8441, i64 1
+ %tmp8443 = getelementptr inbounds float, float* %tmp8442, i64 1
+ %tmp8444 = getelementptr inbounds float, float* %tmp8443, i64 1
+ %tmp8445 = getelementptr inbounds float, float* %tmp8444, i64 1
+ %tmp8446 = getelementptr inbounds float, float* %tmp8445, i64 1
+ %tmp8447 = getelementptr inbounds float, float* %tmp8446, i64 1
+ %tmp8448 = getelementptr inbounds float, float* %tmp8447, i64 1
+ %tmp8449 = getelementptr inbounds float, float* %tmp8448, i64 1
+ %tmp8450 = getelementptr inbounds float, float* %tmp8449, i64 1
+ %tmp8451 = getelementptr inbounds float, float* %tmp8450, i64 1
+ %tmp8452 = getelementptr inbounds float, float* %tmp8451, i64 1
+ %tmp8453 = getelementptr inbounds float, float* %tmp8452, i64 1
+ %tmp8454 = getelementptr inbounds float, float* %tmp8453, i64 1
+ %tmp8455 = getelementptr inbounds float, float* %tmp8454, i64 1
+ %tmp8456 = getelementptr inbounds float, float* %tmp8455, i64 1
+ %tmp8457 = getelementptr inbounds float, float* %tmp8456, i64 1
+ %tmp8458 = getelementptr inbounds float, float* %tmp8457, i64 1
+ %tmp8459 = getelementptr inbounds float, float* %tmp8458, i64 1
+ %tmp8460 = getelementptr inbounds float, float* %tmp8459, i64 1
+ %tmp8461 = getelementptr inbounds float, float* %tmp8460, i64 1
+ %tmp8462 = getelementptr inbounds float, float* %tmp8461, i64 1
+ %tmp8463 = getelementptr inbounds float, float* %tmp8462, i64 1
+ %tmp8464 = getelementptr inbounds float, float* %tmp8463, i64 1
+ %tmp8465 = getelementptr inbounds float, float* %tmp8464, i64 1
+ %tmp8466 = getelementptr inbounds float, float* %tmp8465, i64 1
+ %tmp8467 = getelementptr inbounds float, float* %tmp8466, i64 1
+ %tmp8468 = getelementptr inbounds float, float* %tmp8467, i64 1
+ %tmp8469 = getelementptr inbounds float, float* %tmp8468, i64 1
+ %tmp8470 = getelementptr inbounds float, float* %tmp8469, i64 1
+ %tmp8471 = getelementptr inbounds float, float* %tmp8470, i64 1
+ %tmp8472 = getelementptr inbounds float, float* %tmp8471, i64 1
+ %tmp8473 = getelementptr inbounds float, float* %tmp8472, i64 1
+ %tmp8474 = getelementptr inbounds float, float* %tmp8473, i64 1
+ %tmp8475 = getelementptr inbounds float, float* %tmp8474, i64 1
+ %tmp8476 = getelementptr inbounds float, float* %tmp8475, i64 1
+ %tmp8477 = getelementptr inbounds float, float* %tmp8476, i64 1
+ %tmp8478 = getelementptr inbounds float, float* %tmp8477, i64 1
+ %tmp8479 = getelementptr inbounds float, float* %tmp8478, i64 1
+ %tmp8480 = getelementptr inbounds float, float* %tmp8479, i64 1
+ %tmp8481 = getelementptr inbounds float, float* %tmp8480, i64 1
+ %tmp8482 = getelementptr inbounds float, float* %tmp8481, i64 1
+ %tmp8483 = getelementptr inbounds float, float* %tmp8482, i64 1
+ %tmp8484 = getelementptr inbounds float, float* %tmp8483, i64 1
+ %tmp8485 = getelementptr inbounds float, float* %tmp8484, i64 1
+ %tmp8486 = getelementptr inbounds float, float* %tmp8485, i64 1
+ %tmp8487 = getelementptr inbounds float, float* %tmp8486, i64 1
+ %tmp8488 = getelementptr inbounds float, float* %tmp8487, i64 1
+ %tmp8489 = getelementptr inbounds float, float* %tmp8488, i64 1
+ %tmp8490 = getelementptr inbounds float, float* %tmp8489, i64 1
+ %tmp8491 = getelementptr inbounds float, float* %tmp8490, i64 1
+ %tmp8492 = getelementptr inbounds float, float* %tmp8491, i64 1
+ %tmp8493 = getelementptr inbounds float, float* %tmp8492, i64 1
+ %tmp8494 = getelementptr inbounds float, float* %tmp8493, i64 1
+ %tmp8495 = getelementptr inbounds float, float* %tmp8494, i64 1
+ %tmp8496 = getelementptr inbounds float, float* %tmp8495, i64 1
+ %tmp8497 = getelementptr inbounds float, float* %tmp8496, i64 1
+ %tmp8498 = getelementptr inbounds float, float* %tmp8497, i64 1
+ %tmp8499 = getelementptr inbounds float, float* %tmp8498, i64 1
+ %tmp8500 = getelementptr inbounds float, float* %tmp8499, i64 1
+ %tmp8501 = getelementptr inbounds float, float* %tmp8500, i64 1
+ %tmp8502 = getelementptr inbounds float, float* %tmp8501, i64 1
+ %tmp8503 = getelementptr inbounds float, float* %tmp8502, i64 1
+ %tmp8504 = getelementptr inbounds float, float* %tmp8503, i64 1
+ %tmp8505 = getelementptr inbounds float, float* %tmp8504, i64 1
+ %tmp8506 = getelementptr inbounds float, float* %tmp8505, i64 1
+ %tmp8507 = getelementptr inbounds float, float* %tmp8506, i64 1
+ %tmp8508 = getelementptr inbounds float, float* %tmp8507, i64 1
+ %tmp8509 = getelementptr inbounds float, float* %tmp8508, i64 1
+ %tmp8510 = getelementptr inbounds float, float* %tmp8509, i64 1
+ %tmp8511 = getelementptr inbounds float, float* %tmp8510, i64 1
+ %tmp8512 = getelementptr inbounds float, float* %tmp8511, i64 1
+ %tmp8513 = getelementptr inbounds float, float* %tmp8512, i64 1
+ %tmp8514 = getelementptr inbounds float, float* %tmp8513, i64 1
+ %tmp8515 = getelementptr inbounds float, float* %tmp8514, i64 1
+ %tmp8516 = getelementptr inbounds float, float* %tmp8515, i64 1
+ %tmp8517 = getelementptr inbounds float, float* %tmp8516, i64 1
+ %tmp8518 = getelementptr inbounds float, float* %tmp8517, i64 1
+ %tmp8519 = getelementptr inbounds float, float* %tmp8518, i64 1
+ %tmp8520 = getelementptr inbounds float, float* %tmp8519, i64 1
+ %tmp8521 = getelementptr inbounds float, float* %tmp8520, i64 1
+ %tmp8522 = getelementptr inbounds float, float* %tmp8521, i64 1
+ %tmp8523 = getelementptr inbounds float, float* %tmp8522, i64 1
+ %tmp8524 = getelementptr inbounds float, float* %tmp8523, i64 1
+ %tmp8525 = getelementptr inbounds float, float* %tmp8524, i64 1
+ %tmp8526 = getelementptr inbounds float, float* %tmp8525, i64 1
+ %tmp8527 = getelementptr inbounds float, float* %tmp8526, i64 1
+ %tmp8528 = getelementptr inbounds float, float* %tmp8527, i64 1
+ %tmp8529 = getelementptr inbounds float, float* %tmp8528, i64 1
+ %tmp8530 = getelementptr inbounds float, float* %tmp8529, i64 1
+ %tmp8531 = getelementptr inbounds float, float* %tmp8530, i64 1
+ %tmp8532 = getelementptr inbounds float, float* %tmp8531, i64 1
+ %tmp8533 = getelementptr inbounds float, float* %tmp8532, i64 1
+ %tmp8534 = getelementptr inbounds float, float* %tmp8533, i64 1
+ %tmp8535 = getelementptr inbounds float, float* %tmp8534, i64 1
+ %tmp8536 = getelementptr inbounds float, float* %tmp8535, i64 1
+ %tmp8537 = getelementptr inbounds float, float* %tmp8536, i64 1
+ %tmp8538 = getelementptr inbounds float, float* %tmp8537, i64 1
+ %tmp8539 = getelementptr inbounds float, float* %tmp8538, i64 1
+ %tmp8540 = getelementptr inbounds float, float* %tmp8539, i64 1
+ %tmp8541 = getelementptr inbounds float, float* %tmp8540, i64 1
+ %tmp8542 = getelementptr inbounds float, float* %tmp8541, i64 1
+ %tmp8543 = getelementptr inbounds float, float* %tmp8542, i64 1
+ %tmp8544 = getelementptr inbounds float, float* %tmp8543, i64 1
+ %tmp8545 = getelementptr inbounds float, float* %tmp8544, i64 1
+ %tmp8546 = getelementptr inbounds float, float* %tmp8545, i64 1
+ %tmp8547 = getelementptr inbounds float, float* %tmp8546, i64 1
+ %tmp8548 = getelementptr inbounds float, float* %tmp8547, i64 1
+ %tmp8549 = getelementptr inbounds float, float* %tmp8548, i64 1
+ %tmp8550 = getelementptr inbounds float, float* %tmp8549, i64 1
+ %tmp8551 = getelementptr inbounds float, float* %tmp8550, i64 1
+ %tmp8552 = getelementptr inbounds float, float* %tmp8551, i64 1
+ %tmp8553 = getelementptr inbounds float, float* %tmp8552, i64 1
+ %tmp8554 = getelementptr inbounds float, float* %tmp8553, i64 1
+ %tmp8555 = getelementptr inbounds float, float* %tmp8554, i64 1
+ %tmp8556 = getelementptr inbounds float, float* %tmp8555, i64 1
+ %tmp8557 = getelementptr inbounds float, float* %tmp8556, i64 1
+ %tmp8558 = getelementptr inbounds float, float* %tmp8557, i64 1
+ %tmp8559 = getelementptr inbounds float, float* %tmp8558, i64 1
+ %tmp8560 = getelementptr inbounds float, float* %tmp8559, i64 1
+ %tmp8561 = getelementptr inbounds float, float* %tmp8560, i64 1
+ %tmp8562 = getelementptr inbounds float, float* %tmp8561, i64 1
+ %tmp8563 = getelementptr inbounds float, float* %tmp8562, i64 1
+ %tmp8564 = getelementptr inbounds float, float* %tmp8563, i64 1
+ %tmp8565 = getelementptr inbounds float, float* %tmp8564, i64 1
+ %tmp8566 = getelementptr inbounds float, float* %tmp8565, i64 1
+ %tmp8567 = getelementptr inbounds float, float* %tmp8566, i64 1
+ %tmp8568 = getelementptr inbounds float, float* %tmp8567, i64 1
+ %tmp8569 = getelementptr inbounds float, float* %tmp8568, i64 1
+ %tmp8570 = getelementptr inbounds float, float* %tmp8569, i64 1
+ %tmp8571 = getelementptr inbounds float, float* %tmp8570, i64 1
+ %tmp8572 = getelementptr inbounds float, float* %tmp8571, i64 1
+ %tmp8573 = getelementptr inbounds float, float* %tmp8572, i64 1
+ %tmp8574 = getelementptr inbounds float, float* %tmp8573, i64 1
+ %tmp8575 = getelementptr inbounds float, float* %tmp8574, i64 1
+ %tmp8576 = getelementptr inbounds float, float* %tmp8575, i64 1
+ %tmp8577 = getelementptr inbounds float, float* %tmp8576, i64 1
+ %tmp8578 = getelementptr inbounds float, float* %tmp8577, i64 1
+ %tmp8579 = getelementptr inbounds float, float* %tmp8578, i64 1
+ %tmp8580 = getelementptr inbounds float, float* %tmp8579, i64 1
+ %tmp8581 = getelementptr inbounds float, float* %tmp8580, i64 1
+ %tmp8582 = getelementptr inbounds float, float* %tmp8581, i64 1
+ %tmp8583 = getelementptr inbounds float, float* %tmp8582, i64 1
+ %tmp8584 = getelementptr inbounds float, float* %tmp8583, i64 1
+ %tmp8585 = getelementptr inbounds float, float* %tmp8584, i64 1
+ %tmp8586 = getelementptr inbounds float, float* %tmp8585, i64 1
+ %tmp8587 = getelementptr inbounds float, float* %tmp8586, i64 1
+ %tmp8588 = getelementptr inbounds float, float* %tmp8587, i64 1
+ %tmp8589 = getelementptr inbounds float, float* %tmp8588, i64 1
+ %tmp8590 = getelementptr inbounds float, float* %tmp8589, i64 1
+ %tmp8591 = getelementptr inbounds float, float* %tmp8590, i64 1
+ %tmp8592 = getelementptr inbounds float, float* %tmp8591, i64 1
+ %tmp8593 = getelementptr inbounds float, float* %tmp8592, i64 1
+ %tmp8594 = getelementptr inbounds float, float* %tmp8593, i64 1
+ %tmp8595 = getelementptr inbounds float, float* %tmp8594, i64 1
+ %tmp8596 = getelementptr inbounds float, float* %tmp8595, i64 1
+ %tmp8597 = getelementptr inbounds float, float* %tmp8596, i64 1
+ %tmp8598 = getelementptr inbounds float, float* %tmp8597, i64 1
+ %tmp8599 = getelementptr inbounds float, float* %tmp8598, i64 1
+ %tmp8600 = getelementptr inbounds float, float* %tmp8599, i64 1
+ %tmp8601 = getelementptr inbounds float, float* %tmp8600, i64 1
+ %tmp8602 = getelementptr inbounds float, float* %tmp8601, i64 1
+ %tmp8603 = getelementptr inbounds float, float* %tmp8602, i64 1
+ %tmp8604 = getelementptr inbounds float, float* %tmp8603, i64 1
+ %tmp8605 = getelementptr inbounds float, float* %tmp8604, i64 1
+ %tmp8606 = getelementptr inbounds float, float* %tmp8605, i64 1
+ %tmp8607 = getelementptr inbounds float, float* %tmp8606, i64 1
+ %tmp8608 = getelementptr inbounds float, float* %tmp8607, i64 1
+ %tmp8609 = getelementptr inbounds float, float* %tmp8608, i64 1
+ %tmp8610 = getelementptr inbounds float, float* %tmp8609, i64 1
+ %tmp8611 = getelementptr inbounds float, float* %tmp8610, i64 1
+ %tmp8612 = getelementptr inbounds float, float* %tmp8611, i64 1
+ %tmp8613 = getelementptr inbounds float, float* %tmp8612, i64 1
+ %tmp8614 = getelementptr inbounds float, float* %tmp8613, i64 1
+ %tmp8615 = getelementptr inbounds float, float* %tmp8614, i64 1
+ %tmp8616 = getelementptr inbounds float, float* %tmp8615, i64 1
+ %tmp8617 = getelementptr inbounds float, float* %tmp8616, i64 1
+ %tmp8618 = getelementptr inbounds float, float* %tmp8617, i64 1
+ %tmp8619 = getelementptr inbounds float, float* %tmp8618, i64 1
+ %tmp8620 = getelementptr inbounds float, float* %tmp8619, i64 1
+ %tmp8621 = getelementptr inbounds float, float* %tmp8620, i64 1
+ %tmp8622 = getelementptr inbounds float, float* %tmp8621, i64 1
+ %tmp8623 = getelementptr inbounds float, float* %tmp8622, i64 1
+ %tmp8624 = getelementptr inbounds float, float* %tmp8623, i64 1
+ %tmp8625 = getelementptr inbounds float, float* %tmp8624, i64 1
+ %tmp8626 = getelementptr inbounds float, float* %tmp8625, i64 1
+ %tmp8627 = getelementptr inbounds float, float* %tmp8626, i64 1
+ %tmp8628 = getelementptr inbounds float, float* %tmp8627, i64 1
+ %tmp8629 = getelementptr inbounds float, float* %tmp8628, i64 1
+ %tmp8630 = getelementptr inbounds float, float* %tmp8629, i64 1
+ %tmp8631 = getelementptr inbounds float, float* %tmp8630, i64 1
+ %tmp8632 = getelementptr inbounds float, float* %tmp8631, i64 1
+ %tmp8633 = getelementptr inbounds float, float* %tmp8632, i64 1
+ %tmp8634 = getelementptr inbounds float, float* %tmp8633, i64 1
+ %tmp8635 = getelementptr inbounds float, float* %tmp8634, i64 1
+ %tmp8636 = getelementptr inbounds float, float* %tmp8635, i64 1
+ %tmp8637 = getelementptr inbounds float, float* %tmp8636, i64 1
+ %tmp8638 = getelementptr inbounds float, float* %tmp8637, i64 1
+ %tmp8639 = getelementptr inbounds float, float* %tmp8638, i64 1
+ %tmp8640 = getelementptr inbounds float, float* %tmp8639, i64 1
+ %tmp8641 = getelementptr inbounds float, float* %tmp8640, i64 1
+ %tmp8642 = getelementptr inbounds float, float* %tmp8641, i64 1
+ %tmp8643 = getelementptr inbounds float, float* %tmp8642, i64 1
+ %tmp8644 = getelementptr inbounds float, float* %tmp8643, i64 1
+ %tmp8645 = getelementptr inbounds float, float* %tmp8644, i64 1
+ %tmp8646 = getelementptr inbounds float, float* %tmp8645, i64 1
+ %tmp8647 = getelementptr inbounds float, float* %tmp8646, i64 1
+ %tmp8648 = getelementptr inbounds float, float* %tmp8647, i64 1
+ %tmp8649 = getelementptr inbounds float, float* %tmp8648, i64 1
+ %tmp8650 = getelementptr inbounds float, float* %tmp8649, i64 1
+ %tmp8651 = getelementptr inbounds float, float* %tmp8650, i64 1
+ %tmp8652 = getelementptr inbounds float, float* %tmp8651, i64 1
+ %tmp8653 = getelementptr inbounds float, float* %tmp8652, i64 1
+ %tmp8654 = getelementptr inbounds float, float* %tmp8653, i64 1
+ %tmp8655 = getelementptr inbounds float, float* %tmp8654, i64 1
+ %tmp8656 = getelementptr inbounds float, float* %tmp8655, i64 1
+ %tmp8657 = getelementptr inbounds float, float* %tmp8656, i64 1
+ %tmp8658 = getelementptr inbounds float, float* %tmp8657, i64 1
+ %tmp8659 = getelementptr inbounds float, float* %tmp8658, i64 1
+ %tmp8660 = getelementptr inbounds float, float* %tmp8659, i64 1
+ %tmp8661 = getelementptr inbounds float, float* %tmp8660, i64 1
+ %tmp8662 = getelementptr inbounds float, float* %tmp8661, i64 1
+ %tmp8663 = getelementptr inbounds float, float* %tmp8662, i64 1
+ %tmp8664 = getelementptr inbounds float, float* %tmp8663, i64 1
+ %tmp8665 = getelementptr inbounds float, float* %tmp8664, i64 1
+ %tmp8666 = getelementptr inbounds float, float* %tmp8665, i64 1
+ %tmp8667 = getelementptr inbounds float, float* %tmp8666, i64 1
+ %tmp8668 = getelementptr inbounds float, float* %tmp8667, i64 1
+ %tmp8669 = getelementptr inbounds float, float* %tmp8668, i64 1
+ %tmp8670 = getelementptr inbounds float, float* %tmp8669, i64 1
+ %tmp8671 = getelementptr inbounds float, float* %tmp8670, i64 1
+ %tmp8672 = getelementptr inbounds float, float* %tmp8671, i64 1
+ %tmp8673 = getelementptr inbounds float, float* %tmp8672, i64 1
+ %tmp8674 = getelementptr inbounds float, float* %tmp8673, i64 1
+ %tmp8675 = getelementptr inbounds float, float* %tmp8674, i64 1
+ %tmp8676 = getelementptr inbounds float, float* %tmp8675, i64 1
+ %tmp8677 = getelementptr inbounds float, float* %tmp8676, i64 1
+ %tmp8678 = getelementptr inbounds float, float* %tmp8677, i64 1
+ %tmp8679 = getelementptr inbounds float, float* %tmp8678, i64 1
+ %tmp8680 = getelementptr inbounds float, float* %tmp8679, i64 1
+ %tmp8681 = getelementptr inbounds float, float* %tmp8680, i64 1
+ %tmp8682 = getelementptr inbounds float, float* %tmp8681, i64 1
+ %tmp8683 = getelementptr inbounds float, float* %tmp8682, i64 1
+ %tmp8684 = getelementptr inbounds float, float* %tmp8683, i64 1
+ %tmp8685 = getelementptr inbounds float, float* %tmp8684, i64 1
+ %tmp8686 = getelementptr inbounds float, float* %tmp8685, i64 1
+ %tmp8687 = getelementptr inbounds float, float* %tmp8686, i64 1
+ %tmp8688 = getelementptr inbounds float, float* %tmp8687, i64 1
+ %tmp8689 = getelementptr inbounds float, float* %tmp8688, i64 1
+ %tmp8690 = getelementptr inbounds float, float* %tmp8689, i64 1
+ %tmp8691 = getelementptr inbounds float, float* %tmp8690, i64 1
+ %tmp8692 = getelementptr inbounds float, float* %tmp8691, i64 1
+ %tmp8693 = getelementptr inbounds float, float* %tmp8692, i64 1
+ %tmp8694 = getelementptr inbounds float, float* %tmp8693, i64 1
+ %tmp8695 = getelementptr inbounds float, float* %tmp8694, i64 1
+ %tmp8696 = getelementptr inbounds float, float* %tmp8695, i64 1
+ %tmp8697 = getelementptr inbounds float, float* %tmp8696, i64 1
+ %tmp8698 = getelementptr inbounds float, float* %tmp8697, i64 1
+ %tmp8699 = getelementptr inbounds float, float* %tmp8698, i64 1
+ %tmp8700 = getelementptr inbounds float, float* %tmp8699, i64 1
+ %tmp8701 = getelementptr inbounds float, float* %tmp8700, i64 1
+ %tmp8702 = getelementptr inbounds float, float* %tmp8701, i64 1
+ %tmp8703 = getelementptr inbounds float, float* %tmp8702, i64 1
+ %tmp8704 = getelementptr inbounds float, float* %tmp8703, i64 1
+ %tmp8705 = getelementptr inbounds float, float* %tmp8704, i64 1
+ %tmp8706 = getelementptr inbounds float, float* %tmp8705, i64 1
+ %tmp8707 = getelementptr inbounds float, float* %tmp8706, i64 1
+ %tmp8708 = getelementptr inbounds float, float* %tmp8707, i64 1
+ %tmp8709 = getelementptr inbounds float, float* %tmp8708, i64 1
+ %tmp8710 = getelementptr inbounds float, float* %tmp8709, i64 1
+ %tmp8711 = getelementptr inbounds float, float* %tmp8710, i64 1
+ %tmp8712 = getelementptr inbounds float, float* %tmp8711, i64 1
+ %tmp8713 = getelementptr inbounds float, float* %tmp8712, i64 1
+ %tmp8714 = getelementptr inbounds float, float* %tmp8713, i64 1
+ %tmp8715 = getelementptr inbounds float, float* %tmp8714, i64 1
+ %tmp8716 = getelementptr inbounds float, float* %tmp8715, i64 1
+ %tmp8717 = getelementptr inbounds float, float* %tmp8716, i64 1
+ %tmp8718 = getelementptr inbounds float, float* %tmp8717, i64 1
+ %tmp8719 = getelementptr inbounds float, float* %tmp8718, i64 1
+ %tmp8720 = getelementptr inbounds float, float* %tmp8719, i64 1
+ %tmp8721 = getelementptr inbounds float, float* %tmp8720, i64 1
+ %tmp8722 = getelementptr inbounds float, float* %tmp8721, i64 1
+ %tmp8723 = getelementptr inbounds float, float* %tmp8722, i64 1
+ %tmp8724 = getelementptr inbounds float, float* %tmp8723, i64 1
+ %tmp8725 = getelementptr inbounds float, float* %tmp8724, i64 1
+ %tmp8726 = getelementptr inbounds float, float* %tmp8725, i64 1
+ %tmp8727 = getelementptr inbounds float, float* %tmp8726, i64 1
+ %tmp8728 = getelementptr inbounds float, float* %tmp8727, i64 1
+ %tmp8729 = getelementptr inbounds float, float* %tmp8728, i64 1
+ %tmp8730 = getelementptr inbounds float, float* %tmp8729, i64 1
+ %tmp8731 = getelementptr inbounds float, float* %tmp8730, i64 1
+ %tmp8732 = getelementptr inbounds float, float* %tmp8731, i64 1
+ %tmp8733 = getelementptr inbounds float, float* %tmp8732, i64 1
+ %tmp8734 = getelementptr inbounds float, float* %tmp8733, i64 1
+ %tmp8735 = getelementptr inbounds float, float* %tmp8734, i64 1
+ %tmp8736 = getelementptr inbounds float, float* %tmp8735, i64 1
+ %tmp8737 = getelementptr inbounds float, float* %tmp8736, i64 1
+ %tmp8738 = getelementptr inbounds float, float* %tmp8737, i64 1
+ %tmp8739 = getelementptr inbounds float, float* %tmp8738, i64 1
+ %tmp8740 = getelementptr inbounds float, float* %tmp8739, i64 1
+ %tmp8741 = getelementptr inbounds float, float* %tmp8740, i64 1
+ %tmp8742 = getelementptr inbounds float, float* %tmp8741, i64 1
+ %tmp8743 = getelementptr inbounds float, float* %tmp8742, i64 1
+ %tmp8744 = getelementptr inbounds float, float* %tmp8743, i64 1
+ %tmp8745 = getelementptr inbounds float, float* %tmp8744, i64 1
+ %tmp8746 = getelementptr inbounds float, float* %tmp8745, i64 1
+ %tmp8747 = getelementptr inbounds float, float* %tmp8746, i64 1
+ %tmp8748 = getelementptr inbounds float, float* %tmp8747, i64 1
+ %tmp8749 = getelementptr inbounds float, float* %tmp8748, i64 1
+ %tmp8750 = getelementptr inbounds float, float* %tmp8749, i64 1
+ %tmp8751 = getelementptr inbounds float, float* %tmp8750, i64 1
+ %tmp8752 = getelementptr inbounds float, float* %tmp8751, i64 1
+ %tmp8753 = getelementptr inbounds float, float* %tmp8752, i64 1
+ %tmp8754 = getelementptr inbounds float, float* %tmp8753, i64 1
+ %tmp8755 = getelementptr inbounds float, float* %tmp8754, i64 1
+ %tmp8756 = getelementptr inbounds float, float* %tmp8755, i64 1
+ %tmp8757 = getelementptr inbounds float, float* %tmp8756, i64 1
+ %tmp8758 = getelementptr inbounds float, float* %tmp8757, i64 1
+ %tmp8759 = getelementptr inbounds float, float* %tmp8758, i64 1
+ %tmp8760 = getelementptr inbounds float, float* %tmp8759, i64 1
+ %tmp8761 = getelementptr inbounds float, float* %tmp8760, i64 1
+ %tmp8762 = getelementptr inbounds float, float* %tmp8761, i64 1
+ %tmp8763 = getelementptr inbounds float, float* %tmp8762, i64 1
+ %tmp8764 = getelementptr inbounds float, float* %tmp8763, i64 1
+ %tmp8765 = getelementptr inbounds float, float* %tmp8764, i64 1
+ %tmp8766 = getelementptr inbounds float, float* %tmp8765, i64 1
+ %tmp8767 = getelementptr inbounds float, float* %tmp8766, i64 1
+ %tmp8768 = getelementptr inbounds float, float* %tmp8767, i64 1
+ %tmp8769 = getelementptr inbounds float, float* %tmp8768, i64 1
+ %tmp8770 = getelementptr inbounds float, float* %tmp8769, i64 1
+ %tmp8771 = getelementptr inbounds float, float* %tmp8770, i64 1
+ %tmp8772 = getelementptr inbounds float, float* %tmp8771, i64 1
+ %tmp8773 = getelementptr inbounds float, float* %tmp8772, i64 1
+ %tmp8774 = getelementptr inbounds float, float* %tmp8773, i64 1
+ %tmp8775 = getelementptr inbounds float, float* %tmp8774, i64 1
+ %tmp8776 = getelementptr inbounds float, float* %tmp8775, i64 1
+ %tmp8777 = getelementptr inbounds float, float* %tmp8776, i64 1
+ %tmp8778 = getelementptr inbounds float, float* %tmp8777, i64 1
+ %tmp8779 = getelementptr inbounds float, float* %tmp8778, i64 1
+ %tmp8780 = getelementptr inbounds float, float* %tmp8779, i64 1
+ %tmp8781 = getelementptr inbounds float, float* %tmp8780, i64 1
+ %tmp8782 = getelementptr inbounds float, float* %tmp8781, i64 1
+ %tmp8783 = getelementptr inbounds float, float* %tmp8782, i64 1
+ %tmp8784 = getelementptr inbounds float, float* %tmp8783, i64 1
+ %tmp8785 = getelementptr inbounds float, float* %tmp8784, i64 1
+ %tmp8786 = getelementptr inbounds float, float* %tmp8785, i64 1
+ %tmp8787 = getelementptr inbounds float, float* %tmp8786, i64 1
+ %tmp8788 = getelementptr inbounds float, float* %tmp8787, i64 1
+ %tmp8789 = getelementptr inbounds float, float* %tmp8788, i64 1
+ %tmp8790 = getelementptr inbounds float, float* %tmp8789, i64 1
+ %tmp8791 = getelementptr inbounds float, float* %tmp8790, i64 1
+ %tmp8792 = getelementptr inbounds float, float* %tmp8791, i64 1
+ %tmp8793 = getelementptr inbounds float, float* %tmp8792, i64 1
+ %tmp8794 = getelementptr inbounds float, float* %tmp8793, i64 1
+ %tmp8795 = getelementptr inbounds float, float* %tmp8794, i64 1
+ %tmp8796 = getelementptr inbounds float, float* %tmp8795, i64 1
+ %tmp8797 = getelementptr inbounds float, float* %tmp8796, i64 1
+ %tmp8798 = getelementptr inbounds float, float* %tmp8797, i64 1
+ %tmp8799 = getelementptr inbounds float, float* %tmp8798, i64 1
+ %tmp8800 = getelementptr inbounds float, float* %tmp8799, i64 1
+ %tmp8801 = getelementptr inbounds float, float* %tmp8800, i64 1
+ %tmp8802 = getelementptr inbounds float, float* %tmp8801, i64 1
+ %tmp8803 = getelementptr inbounds float, float* %tmp8802, i64 1
+ %tmp8804 = getelementptr inbounds float, float* %tmp8803, i64 1
+ %tmp8805 = getelementptr inbounds float, float* %tmp8804, i64 1
+ %tmp8806 = getelementptr inbounds float, float* %tmp8805, i64 1
+ %tmp8807 = getelementptr inbounds float, float* %tmp8806, i64 1
+ %tmp8808 = getelementptr inbounds float, float* %tmp8807, i64 1
+ %tmp8809 = getelementptr inbounds float, float* %tmp8808, i64 1
+ %tmp8810 = getelementptr inbounds float, float* %tmp8809, i64 1
+ %tmp8811 = getelementptr inbounds float, float* %tmp8810, i64 1
+ %tmp8812 = getelementptr inbounds float, float* %tmp8811, i64 1
+ %tmp8813 = getelementptr inbounds float, float* %tmp8812, i64 1
+ %tmp8814 = getelementptr inbounds float, float* %tmp8813, i64 1
+ %tmp8815 = getelementptr inbounds float, float* %tmp8814, i64 1
+ %tmp8816 = getelementptr inbounds float, float* %tmp8815, i64 1
+ %tmp8817 = getelementptr inbounds float, float* %tmp8816, i64 1
+ %tmp8818 = getelementptr inbounds float, float* %tmp8817, i64 1
+ %tmp8819 = getelementptr inbounds float, float* %tmp8818, i64 1
+ %tmp8820 = getelementptr inbounds float, float* %tmp8819, i64 1
+ %tmp8821 = getelementptr inbounds float, float* %tmp8820, i64 1
+ %tmp8822 = getelementptr inbounds float, float* %tmp8821, i64 1
+ %tmp8823 = getelementptr inbounds float, float* %tmp8822, i64 1
+ %tmp8824 = getelementptr inbounds float, float* %tmp8823, i64 1
+ %tmp8825 = getelementptr inbounds float, float* %tmp8824, i64 1
+ %tmp8826 = getelementptr inbounds float, float* %tmp8825, i64 1
+ %tmp8827 = getelementptr inbounds float, float* %tmp8826, i64 1
+ %tmp8828 = getelementptr inbounds float, float* %tmp8827, i64 1
+ %tmp8829 = getelementptr inbounds float, float* %tmp8828, i64 1
+ %tmp8830 = getelementptr inbounds float, float* %tmp8829, i64 1
+ %tmp8831 = getelementptr inbounds float, float* %tmp8830, i64 1
+ %tmp8832 = getelementptr inbounds float, float* %tmp8831, i64 1
+ %tmp8833 = getelementptr inbounds float, float* %tmp8832, i64 1
+ %tmp8834 = getelementptr inbounds float, float* %tmp8833, i64 1
+ %tmp8835 = getelementptr inbounds float, float* %tmp8834, i64 1
+ %tmp8836 = getelementptr inbounds float, float* %tmp8835, i64 1
+ %tmp8837 = getelementptr inbounds float, float* %tmp8836, i64 1
+ %tmp8838 = getelementptr inbounds float, float* %tmp8837, i64 1
+ %tmp8839 = getelementptr inbounds float, float* %tmp8838, i64 1
+ %tmp8840 = getelementptr inbounds float, float* %tmp8839, i64 1
+ %tmp8841 = getelementptr inbounds float, float* %tmp8840, i64 1
+ %tmp8842 = getelementptr inbounds float, float* %tmp8841, i64 1
+ %tmp8843 = getelementptr inbounds float, float* %tmp8842, i64 1
+ %tmp8844 = getelementptr inbounds float, float* %tmp8843, i64 1
+ %tmp8845 = getelementptr inbounds float, float* %tmp8844, i64 1
+ %tmp8846 = getelementptr inbounds float, float* %tmp8845, i64 1
+ %tmp8847 = getelementptr inbounds float, float* %tmp8846, i64 1
+ %tmp8848 = getelementptr inbounds float, float* %tmp8847, i64 1
+ %tmp8849 = getelementptr inbounds float, float* %tmp8848, i64 1
+ %tmp8850 = getelementptr inbounds float, float* %tmp8849, i64 1
+ %tmp8851 = getelementptr inbounds float, float* %tmp8850, i64 1
+ %tmp8852 = getelementptr inbounds float, float* %tmp8851, i64 1
+ %tmp8853 = getelementptr inbounds float, float* %tmp8852, i64 1
+ %tmp8854 = getelementptr inbounds float, float* %tmp8853, i64 1
+ %tmp8855 = getelementptr inbounds float, float* %tmp8854, i64 1
+ %tmp8856 = getelementptr inbounds float, float* %tmp8855, i64 1
+ %tmp8857 = getelementptr inbounds float, float* %tmp8856, i64 1
+ %tmp8858 = getelementptr inbounds float, float* %tmp8857, i64 1
+ %tmp8859 = getelementptr inbounds float, float* %tmp8858, i64 1
+ %tmp8860 = getelementptr inbounds float, float* %tmp8859, i64 1
+ %tmp8861 = getelementptr inbounds float, float* %tmp8860, i64 1
+ %tmp8862 = getelementptr inbounds float, float* %tmp8861, i64 1
+ %tmp8863 = getelementptr inbounds float, float* %tmp8862, i64 1
+ %tmp8864 = getelementptr inbounds float, float* %tmp8863, i64 1
+ %tmp8865 = getelementptr inbounds float, float* %tmp8864, i64 1
+ %tmp8866 = getelementptr inbounds float, float* %tmp8865, i64 1
+ %tmp8867 = getelementptr inbounds float, float* %tmp8866, i64 1
+ %tmp8868 = getelementptr inbounds float, float* %tmp8867, i64 1
+ %tmp8869 = getelementptr inbounds float, float* %tmp8868, i64 1
+ %tmp8870 = getelementptr inbounds float, float* %tmp8869, i64 1
+ %tmp8871 = getelementptr inbounds float, float* %tmp8870, i64 1
+ %tmp8872 = getelementptr inbounds float, float* %tmp8871, i64 1
+ %tmp8873 = getelementptr inbounds float, float* %tmp8872, i64 1
+ %tmp8874 = getelementptr inbounds float, float* %tmp8873, i64 1
+ %tmp8875 = getelementptr inbounds float, float* %tmp8874, i64 1
+ %tmp8876 = getelementptr inbounds float, float* %tmp8875, i64 1
+ %tmp8877 = getelementptr inbounds float, float* %tmp8876, i64 1
+ %tmp8878 = getelementptr inbounds float, float* %tmp8877, i64 1
+ %tmp8879 = getelementptr inbounds float, float* %tmp8878, i64 1
+ %tmp8880 = getelementptr inbounds float, float* %tmp8879, i64 1
+ %tmp8881 = getelementptr inbounds float, float* %tmp8880, i64 1
+ %tmp8882 = getelementptr inbounds float, float* %tmp8881, i64 1
+ %tmp8883 = getelementptr inbounds float, float* %tmp8882, i64 1
+ %tmp8884 = getelementptr inbounds float, float* %tmp8883, i64 1
+ %tmp8885 = getelementptr inbounds float, float* %tmp8884, i64 1
+ %tmp8886 = getelementptr inbounds float, float* %tmp8885, i64 1
+ %tmp8887 = getelementptr inbounds float, float* %tmp8886, i64 1
+ %tmp8888 = getelementptr inbounds float, float* %tmp8887, i64 1
+ %tmp8889 = getelementptr inbounds float, float* %tmp8888, i64 1
+ %tmp8890 = getelementptr inbounds float, float* %tmp8889, i64 1
+ %tmp8891 = getelementptr inbounds float, float* %tmp8890, i64 1
+ %tmp8892 = getelementptr inbounds float, float* %tmp8891, i64 1
+ %tmp8893 = getelementptr inbounds float, float* %tmp8892, i64 1
+ %tmp8894 = getelementptr inbounds float, float* %tmp8893, i64 1
+ %tmp8895 = getelementptr inbounds float, float* %tmp8894, i64 1
+ %tmp8896 = getelementptr inbounds float, float* %tmp8895, i64 1
+ %tmp8897 = getelementptr inbounds float, float* %tmp8896, i64 1
+ %tmp8898 = getelementptr inbounds float, float* %tmp8897, i64 1
+ %tmp8899 = getelementptr inbounds float, float* %tmp8898, i64 1
+ %tmp8900 = getelementptr inbounds float, float* %tmp8899, i64 1
+ %tmp8901 = getelementptr inbounds float, float* %tmp8900, i64 1
+ %tmp8902 = getelementptr inbounds float, float* %tmp8901, i64 1
+ %tmp8903 = getelementptr inbounds float, float* %tmp8902, i64 1
+ %tmp8904 = getelementptr inbounds float, float* %tmp8903, i64 1
+ %tmp8905 = getelementptr inbounds float, float* %tmp8904, i64 1
+ %tmp8906 = getelementptr inbounds float, float* %tmp8905, i64 1
+ %tmp8907 = getelementptr inbounds float, float* %tmp8906, i64 1
+ %tmp8908 = getelementptr inbounds float, float* %tmp8907, i64 1
+ %tmp8909 = getelementptr inbounds float, float* %tmp8908, i64 1
+ %tmp8910 = getelementptr inbounds float, float* %tmp8909, i64 1
+ %tmp8911 = getelementptr inbounds float, float* %tmp8910, i64 1
+ %tmp8912 = getelementptr inbounds float, float* %tmp8911, i64 1
+ %tmp8913 = getelementptr inbounds float, float* %tmp8912, i64 1
+ %tmp8914 = getelementptr inbounds float, float* %tmp8913, i64 1
+ %tmp8915 = getelementptr inbounds float, float* %tmp8914, i64 1
+ %tmp8916 = getelementptr inbounds float, float* %tmp8915, i64 1
+ %tmp8917 = getelementptr inbounds float, float* %tmp8916, i64 1
+ %tmp8918 = getelementptr inbounds float, float* %tmp8917, i64 1
+ %tmp8919 = getelementptr inbounds float, float* %tmp8918, i64 1
+ %tmp8920 = getelementptr inbounds float, float* %tmp8919, i64 1
+ %tmp8921 = getelementptr inbounds float, float* %tmp8920, i64 1
+ %tmp8922 = getelementptr inbounds float, float* %tmp8921, i64 1
+ %tmp8923 = getelementptr inbounds float, float* %tmp8922, i64 1
+ %tmp8924 = getelementptr inbounds float, float* %tmp8923, i64 1
+ %tmp8925 = getelementptr inbounds float, float* %tmp8924, i64 1
+ %tmp8926 = getelementptr inbounds float, float* %tmp8925, i64 1
+ %tmp8927 = getelementptr inbounds float, float* %tmp8926, i64 1
+ %tmp8928 = getelementptr inbounds float, float* %tmp8927, i64 1
+ %tmp8929 = getelementptr inbounds float, float* %tmp8928, i64 1
+ %tmp8930 = getelementptr inbounds float, float* %tmp8929, i64 1
+ %tmp8931 = getelementptr inbounds float, float* %tmp8930, i64 1
+ %tmp8932 = getelementptr inbounds float, float* %tmp8931, i64 1
+ %tmp8933 = getelementptr inbounds float, float* %tmp8932, i64 1
+ %tmp8934 = getelementptr inbounds float, float* %tmp8933, i64 1
+ %tmp8935 = getelementptr inbounds float, float* %tmp8934, i64 1
+ %tmp8936 = getelementptr inbounds float, float* %tmp8935, i64 1
+ %tmp8937 = getelementptr inbounds float, float* %tmp8936, i64 1
+ %tmp8938 = getelementptr inbounds float, float* %tmp8937, i64 1
+ %tmp8939 = getelementptr inbounds float, float* %tmp8938, i64 1
+ %tmp8940 = getelementptr inbounds float, float* %tmp8939, i64 1
+ %tmp8941 = getelementptr inbounds float, float* %tmp8940, i64 1
+ %tmp8942 = getelementptr inbounds float, float* %tmp8941, i64 1
+ %tmp8943 = getelementptr inbounds float, float* %tmp8942, i64 1
+ %tmp8944 = getelementptr inbounds float, float* %tmp8943, i64 1
+ %tmp8945 = getelementptr inbounds float, float* %tmp8944, i64 1
+ %tmp8946 = getelementptr inbounds float, float* %tmp8945, i64 1
+ %tmp8947 = getelementptr inbounds float, float* %tmp8946, i64 1
+ %tmp8948 = getelementptr inbounds float, float* %tmp8947, i64 1
+ %tmp8949 = getelementptr inbounds float, float* %tmp8948, i64 1
+ %tmp8950 = getelementptr inbounds float, float* %tmp8949, i64 1
+ %tmp8951 = getelementptr inbounds float, float* %tmp8950, i64 1
+ %tmp8952 = getelementptr inbounds float, float* %tmp8951, i64 1
+ %tmp8953 = getelementptr inbounds float, float* %tmp8952, i64 1
+ %tmp8954 = getelementptr inbounds float, float* %tmp8953, i64 1
+ %tmp8955 = getelementptr inbounds float, float* %tmp8954, i64 1
+ %tmp8956 = getelementptr inbounds float, float* %tmp8955, i64 1
+ %tmp8957 = getelementptr inbounds float, float* %tmp8956, i64 1
+ %tmp8958 = getelementptr inbounds float, float* %tmp8957, i64 1
+ %tmp8959 = getelementptr inbounds float, float* %tmp8958, i64 1
+ %tmp8960 = getelementptr inbounds float, float* %tmp8959, i64 1
+ %tmp8961 = getelementptr inbounds float, float* %tmp8960, i64 1
+ %tmp8962 = getelementptr inbounds float, float* %tmp8961, i64 1
+ %tmp8963 = getelementptr inbounds float, float* %tmp8962, i64 1
+ %tmp8964 = getelementptr inbounds float, float* %tmp8963, i64 1
+ %tmp8965 = getelementptr inbounds float, float* %tmp8964, i64 1
+ %tmp8966 = getelementptr inbounds float, float* %tmp8965, i64 1
+ %tmp8967 = getelementptr inbounds float, float* %tmp8966, i64 1
+ %tmp8968 = getelementptr inbounds float, float* %tmp8967, i64 1
+ %tmp8969 = getelementptr inbounds float, float* %tmp8968, i64 1
+ %tmp8970 = getelementptr inbounds float, float* %tmp8969, i64 1
+ %tmp8971 = getelementptr inbounds float, float* %tmp8970, i64 1
+ %tmp8972 = getelementptr inbounds float, float* %tmp8971, i64 1
+ %tmp8973 = getelementptr inbounds float, float* %tmp8972, i64 1
+ %tmp8974 = getelementptr inbounds float, float* %tmp8973, i64 1
+ %tmp8975 = getelementptr inbounds float, float* %tmp8974, i64 1
+ %tmp8976 = getelementptr inbounds float, float* %tmp8975, i64 1
+ %tmp8977 = getelementptr inbounds float, float* %tmp8976, i64 1
+ %tmp8978 = getelementptr inbounds float, float* %tmp8977, i64 1
+ %tmp8979 = getelementptr inbounds float, float* %tmp8978, i64 1
+ %tmp8980 = getelementptr inbounds float, float* %tmp8979, i64 1
+ %tmp8981 = getelementptr inbounds float, float* %tmp8980, i64 1
+ %tmp8982 = getelementptr inbounds float, float* %tmp8981, i64 1
+ %tmp8983 = getelementptr inbounds float, float* %tmp8982, i64 1
+ %tmp8984 = getelementptr inbounds float, float* %tmp8983, i64 1
+ %tmp8985 = getelementptr inbounds float, float* %tmp8984, i64 1
+ %tmp8986 = getelementptr inbounds float, float* %tmp8985, i64 1
+ %tmp8987 = getelementptr inbounds float, float* %tmp8986, i64 1
+ %tmp8988 = getelementptr inbounds float, float* %tmp8987, i64 1
+ %tmp8989 = getelementptr inbounds float, float* %tmp8988, i64 1
+ %tmp8990 = getelementptr inbounds float, float* %tmp8989, i64 1
+ %tmp8991 = getelementptr inbounds float, float* %tmp8990, i64 1
+ %tmp8992 = getelementptr inbounds float, float* %tmp8991, i64 1
+ %tmp8993 = getelementptr inbounds float, float* %tmp8992, i64 1
+ %tmp8994 = getelementptr inbounds float, float* %tmp8993, i64 1
+ %tmp8995 = getelementptr inbounds float, float* %tmp8994, i64 1
+ %tmp8996 = getelementptr inbounds float, float* %tmp8995, i64 1
+ %tmp8997 = getelementptr inbounds float, float* %tmp8996, i64 1
+ %tmp8998 = getelementptr inbounds float, float* %tmp8997, i64 1
+ %tmp8999 = getelementptr inbounds float, float* %tmp8998, i64 1
+ %tmp9000 = getelementptr inbounds float, float* %tmp8999, i64 1
+ %tmp9001 = getelementptr inbounds float, float* %tmp9000, i64 1
+ %tmp9002 = getelementptr inbounds float, float* %tmp9001, i64 1
+ %tmp9003 = getelementptr inbounds float, float* %tmp9002, i64 1
+ %tmp9004 = getelementptr inbounds float, float* %tmp9003, i64 1
+ %tmp9005 = getelementptr inbounds float, float* %tmp9004, i64 1
+ %tmp9006 = getelementptr inbounds float, float* %tmp9005, i64 1
+ %tmp9007 = getelementptr inbounds float, float* %tmp9006, i64 1
+ %tmp9008 = getelementptr inbounds float, float* %tmp9007, i64 1
+ %tmp9009 = getelementptr inbounds float, float* %tmp9008, i64 1
+ %tmp9010 = getelementptr inbounds float, float* %tmp9009, i64 1
+ %tmp9011 = getelementptr inbounds float, float* %tmp9010, i64 1
+ %tmp9012 = getelementptr inbounds float, float* %tmp9011, i64 1
+ %tmp9013 = getelementptr inbounds float, float* %tmp9012, i64 1
+ %tmp9014 = getelementptr inbounds float, float* %tmp9013, i64 1
+ %tmp9015 = getelementptr inbounds float, float* %tmp9014, i64 1
+ %tmp9016 = getelementptr inbounds float, float* %tmp9015, i64 1
+ %tmp9017 = getelementptr inbounds float, float* %tmp9016, i64 1
+ %tmp9018 = getelementptr inbounds float, float* %tmp9017, i64 1
+ %tmp9019 = getelementptr inbounds float, float* %tmp9018, i64 1
+ %tmp9020 = getelementptr inbounds float, float* %tmp9019, i64 1
+ %tmp9021 = getelementptr inbounds float, float* %tmp9020, i64 1
+ %tmp9022 = getelementptr inbounds float, float* %tmp9021, i64 1
+ %tmp9023 = getelementptr inbounds float, float* %tmp9022, i64 1
+ %tmp9024 = getelementptr inbounds float, float* %tmp9023, i64 1
+ %tmp9025 = getelementptr inbounds float, float* %tmp9024, i64 1
+ %tmp9026 = getelementptr inbounds float, float* %tmp9025, i64 1
+ %tmp9027 = getelementptr inbounds float, float* %tmp9026, i64 1
+ %tmp9028 = getelementptr inbounds float, float* %tmp9027, i64 1
+ %tmp9029 = getelementptr inbounds float, float* %tmp9028, i64 1
+ %tmp9030 = getelementptr inbounds float, float* %tmp9029, i64 1
+ %tmp9031 = getelementptr inbounds float, float* %tmp9030, i64 1
+ %tmp9032 = getelementptr inbounds float, float* %tmp9031, i64 1
+ %tmp9033 = getelementptr inbounds float, float* %tmp9032, i64 1
+ %tmp9034 = getelementptr inbounds float, float* %tmp9033, i64 1
+ %tmp9035 = getelementptr inbounds float, float* %tmp9034, i64 1
+ %tmp9036 = getelementptr inbounds float, float* %tmp9035, i64 1
+ %tmp9037 = getelementptr inbounds float, float* %tmp9036, i64 1
+ %tmp9038 = getelementptr inbounds float, float* %tmp9037, i64 1
+ %tmp9039 = getelementptr inbounds float, float* %tmp9038, i64 1
+ %tmp9040 = getelementptr inbounds float, float* %tmp9039, i64 1
+ %tmp9041 = getelementptr inbounds float, float* %tmp9040, i64 1
+ %tmp9042 = getelementptr inbounds float, float* %tmp9041, i64 1
+ %tmp9043 = getelementptr inbounds float, float* %tmp9042, i64 1
+ %tmp9044 = getelementptr inbounds float, float* %tmp9043, i64 1
+ %tmp9045 = getelementptr inbounds float, float* %tmp9044, i64 1
+ %tmp9046 = getelementptr inbounds float, float* %tmp9045, i64 1
+ %tmp9047 = getelementptr inbounds float, float* %tmp9046, i64 1
+ %tmp9048 = getelementptr inbounds float, float* %tmp9047, i64 1
+ %tmp9049 = getelementptr inbounds float, float* %tmp9048, i64 1
+ %tmp9050 = getelementptr inbounds float, float* %tmp9049, i64 1
+ %tmp9051 = getelementptr inbounds float, float* %tmp9050, i64 1
+ %tmp9052 = getelementptr inbounds float, float* %tmp9051, i64 1
+ %tmp9053 = getelementptr inbounds float, float* %tmp9052, i64 1
+ %tmp9054 = getelementptr inbounds float, float* %tmp9053, i64 1
+ %tmp9055 = getelementptr inbounds float, float* %tmp9054, i64 1
+ %tmp9056 = getelementptr inbounds float, float* %tmp9055, i64 1
+ %tmp9057 = getelementptr inbounds float, float* %tmp9056, i64 1
+ %tmp9058 = getelementptr inbounds float, float* %tmp9057, i64 1
+ %tmp9059 = getelementptr inbounds float, float* %tmp9058, i64 1
+ %tmp9060 = getelementptr inbounds float, float* %tmp9059, i64 1
+ %tmp9061 = getelementptr inbounds float, float* %tmp9060, i64 1
+ %tmp9062 = getelementptr inbounds float, float* %tmp9061, i64 1
+ %tmp9063 = getelementptr inbounds float, float* %tmp9062, i64 1
+ %tmp9064 = getelementptr inbounds float, float* %tmp9063, i64 1
+ %tmp9065 = getelementptr inbounds float, float* %tmp9064, i64 1
+ %tmp9066 = getelementptr inbounds float, float* %tmp9065, i64 1
+ %tmp9067 = getelementptr inbounds float, float* %tmp9066, i64 1
+ %tmp9068 = getelementptr inbounds float, float* %tmp9067, i64 1
+ %tmp9069 = getelementptr inbounds float, float* %tmp9068, i64 1
+ %tmp9070 = getelementptr inbounds float, float* %tmp9069, i64 1
+ %tmp9071 = getelementptr inbounds float, float* %tmp9070, i64 1
+ %tmp9072 = getelementptr inbounds float, float* %tmp9071, i64 1
+ %tmp9073 = getelementptr inbounds float, float* %tmp9072, i64 1
+ %tmp9074 = getelementptr inbounds float, float* %tmp9073, i64 1
+ %tmp9075 = getelementptr inbounds float, float* %tmp9074, i64 1
+ %tmp9076 = getelementptr inbounds float, float* %tmp9075, i64 1
+ %tmp9077 = getelementptr inbounds float, float* %tmp9076, i64 1
+ %tmp9078 = getelementptr inbounds float, float* %tmp9077, i64 1
+ %tmp9079 = getelementptr inbounds float, float* %tmp9078, i64 1
+ %tmp9080 = getelementptr inbounds float, float* %tmp9079, i64 1
+ %tmp9081 = getelementptr inbounds float, float* %tmp9080, i64 1
+ %tmp9082 = getelementptr inbounds float, float* %tmp9081, i64 1
+ %tmp9083 = getelementptr inbounds float, float* %tmp9082, i64 1
+ %tmp9084 = getelementptr inbounds float, float* %tmp9083, i64 1
+ %tmp9085 = getelementptr inbounds float, float* %tmp9084, i64 1
+ %tmp9086 = getelementptr inbounds float, float* %tmp9085, i64 1
+ %tmp9087 = getelementptr inbounds float, float* %tmp9086, i64 1
+ %tmp9088 = getelementptr inbounds float, float* %tmp9087, i64 1
+ %tmp9089 = getelementptr inbounds float, float* %tmp9088, i64 1
+ %tmp9090 = getelementptr inbounds float, float* %tmp9089, i64 1
+ %tmp9091 = getelementptr inbounds float, float* %tmp9090, i64 1
+ %tmp9092 = getelementptr inbounds float, float* %tmp9091, i64 1
+ %tmp9093 = getelementptr inbounds float, float* %tmp9092, i64 1
+ %tmp9094 = getelementptr inbounds float, float* %tmp9093, i64 1
+ %tmp9095 = getelementptr inbounds float, float* %tmp9094, i64 1
+ %tmp9096 = getelementptr inbounds float, float* %tmp9095, i64 1
+ %tmp9097 = getelementptr inbounds float, float* %tmp9096, i64 1
+ %tmp9098 = getelementptr inbounds float, float* %tmp9097, i64 1
+ %tmp9099 = getelementptr inbounds float, float* %tmp9098, i64 1
+ %tmp9100 = getelementptr inbounds float, float* %tmp9099, i64 1
+ %tmp9101 = getelementptr inbounds float, float* %tmp9100, i64 1
+ %tmp9102 = getelementptr inbounds float, float* %tmp9101, i64 1
+ %tmp9103 = getelementptr inbounds float, float* %tmp9102, i64 1
+ %tmp9104 = getelementptr inbounds float, float* %tmp9103, i64 1
+ %tmp9105 = getelementptr inbounds float, float* %tmp9104, i64 1
+ %tmp9106 = getelementptr inbounds float, float* %tmp9105, i64 1
+ %tmp9107 = getelementptr inbounds float, float* %tmp9106, i64 1
+ %tmp9108 = getelementptr inbounds float, float* %tmp9107, i64 1
+ %tmp9109 = getelementptr inbounds float, float* %tmp9108, i64 1
+ %tmp9110 = getelementptr inbounds float, float* %tmp9109, i64 1
+ %tmp9111 = getelementptr inbounds float, float* %tmp9110, i64 1
+ %tmp9112 = getelementptr inbounds float, float* %tmp9111, i64 1
+ %tmp9113 = getelementptr inbounds float, float* %tmp9112, i64 1
+ %tmp9114 = getelementptr inbounds float, float* %tmp9113, i64 1
+ %tmp9115 = getelementptr inbounds float, float* %tmp9114, i64 1
+ %tmp9116 = getelementptr inbounds float, float* %tmp9115, i64 1
+ %tmp9117 = getelementptr inbounds float, float* %tmp9116, i64 1
+ %tmp9118 = getelementptr inbounds float, float* %tmp9117, i64 1
+ %tmp9119 = getelementptr inbounds float, float* %tmp9118, i64 1
+ %tmp9120 = getelementptr inbounds float, float* %tmp9119, i64 1
+ %tmp9121 = getelementptr inbounds float, float* %tmp9120, i64 1
+ %tmp9122 = getelementptr inbounds float, float* %tmp9121, i64 1
+ %tmp9123 = getelementptr inbounds float, float* %tmp9122, i64 1
+ %tmp9124 = getelementptr inbounds float, float* %tmp9123, i64 1
+ %tmp9125 = getelementptr inbounds float, float* %tmp9124, i64 1
+ %tmp9126 = getelementptr inbounds float, float* %tmp9125, i64 1
+ %tmp9127 = getelementptr inbounds float, float* %tmp9126, i64 1
+ %tmp9128 = getelementptr inbounds float, float* %tmp9127, i64 1
+ %tmp9129 = getelementptr inbounds float, float* %tmp9128, i64 1
+ %tmp9130 = getelementptr inbounds float, float* %tmp9129, i64 1
+ %tmp9131 = getelementptr inbounds float, float* %tmp9130, i64 1
+ %tmp9132 = getelementptr inbounds float, float* %tmp9131, i64 1
+ %tmp9133 = getelementptr inbounds float, float* %tmp9132, i64 1
+ %tmp9134 = getelementptr inbounds float, float* %tmp9133, i64 1
+ %tmp9135 = getelementptr inbounds float, float* %tmp9134, i64 1
+ %tmp9136 = getelementptr inbounds float, float* %tmp9135, i64 1
+ %tmp9137 = getelementptr inbounds float, float* %tmp9136, i64 1
+ %tmp9138 = getelementptr inbounds float, float* %tmp9137, i64 1
+ %tmp9139 = getelementptr inbounds float, float* %tmp9138, i64 1
+ %tmp9140 = getelementptr inbounds float, float* %tmp9139, i64 1
+ %tmp9141 = getelementptr inbounds float, float* %tmp9140, i64 1
+ %tmp9142 = getelementptr inbounds float, float* %tmp9141, i64 1
+ %tmp9143 = getelementptr inbounds float, float* %tmp9142, i64 1
+ %tmp9144 = getelementptr inbounds float, float* %tmp9143, i64 1
+ %tmp9145 = getelementptr inbounds float, float* %tmp9144, i64 1
+ %tmp9146 = getelementptr inbounds float, float* %tmp9145, i64 1
+ %tmp9147 = getelementptr inbounds float, float* %tmp9146, i64 1
+ %tmp9148 = getelementptr inbounds float, float* %tmp9147, i64 1
+ %tmp9149 = getelementptr inbounds float, float* %tmp9148, i64 1
+ %tmp9150 = getelementptr inbounds float, float* %tmp9149, i64 1
+ %tmp9151 = getelementptr inbounds float, float* %tmp9150, i64 1
+ %tmp9152 = getelementptr inbounds float, float* %tmp9151, i64 1
+ %tmp9153 = getelementptr inbounds float, float* %tmp9152, i64 1
+ %tmp9154 = getelementptr inbounds float, float* %tmp9153, i64 1
+ %tmp9155 = getelementptr inbounds float, float* %tmp9154, i64 1
+ %tmp9156 = getelementptr inbounds float, float* %tmp9155, i64 1
+ %tmp9157 = getelementptr inbounds float, float* %tmp9156, i64 1
+ %tmp9158 = getelementptr inbounds float, float* %tmp9157, i64 1
+ %tmp9159 = getelementptr inbounds float, float* %tmp9158, i64 1
+ %tmp9160 = getelementptr inbounds float, float* %tmp9159, i64 1
+ %tmp9161 = getelementptr inbounds float, float* %tmp9160, i64 1
+ %tmp9162 = getelementptr inbounds float, float* %tmp9161, i64 1
+ %tmp9163 = getelementptr inbounds float, float* %tmp9162, i64 1
+ %tmp9164 = getelementptr inbounds float, float* %tmp9163, i64 1
+ %tmp9165 = getelementptr inbounds float, float* %tmp9164, i64 1
+ %tmp9166 = getelementptr inbounds float, float* %tmp9165, i64 1
+ %tmp9167 = getelementptr inbounds float, float* %tmp9166, i64 1
+ %tmp9168 = getelementptr inbounds float, float* %tmp9167, i64 1
+ %tmp9169 = getelementptr inbounds float, float* %tmp9168, i64 1
+ %tmp9170 = getelementptr inbounds float, float* %tmp9169, i64 1
+ %tmp9171 = getelementptr inbounds float, float* %tmp9170, i64 1
+ %tmp9172 = getelementptr inbounds float, float* %tmp9171, i64 1
+ %tmp9173 = getelementptr inbounds float, float* %tmp9172, i64 1
+ %tmp9174 = getelementptr inbounds float, float* %tmp9173, i64 1
+ %tmp9175 = getelementptr inbounds float, float* %tmp9174, i64 1
+ %tmp9176 = getelementptr inbounds float, float* %tmp9175, i64 1
+ %tmp9177 = getelementptr inbounds float, float* %tmp9176, i64 1
+ %tmp9178 = getelementptr inbounds float, float* %tmp9177, i64 1
+ %tmp9179 = getelementptr inbounds float, float* %tmp9178, i64 1
+ %tmp9180 = getelementptr inbounds float, float* %tmp9179, i64 1
+ %tmp9181 = getelementptr inbounds float, float* %tmp9180, i64 1
+ %tmp9182 = getelementptr inbounds float, float* %tmp9181, i64 1
+ %tmp9183 = getelementptr inbounds float, float* %tmp9182, i64 1
+ %tmp9184 = getelementptr inbounds float, float* %tmp9183, i64 1
+ %tmp9185 = getelementptr inbounds float, float* %tmp9184, i64 1
+ %tmp9186 = getelementptr inbounds float, float* %tmp9185, i64 1
+ %tmp9187 = getelementptr inbounds float, float* %tmp9186, i64 1
+ %tmp9188 = getelementptr inbounds float, float* %tmp9187, i64 1
+ %tmp9189 = getelementptr inbounds float, float* %tmp9188, i64 1
+ %tmp9190 = getelementptr inbounds float, float* %tmp9189, i64 1
+ %tmp9191 = getelementptr inbounds float, float* %tmp9190, i64 1
+ %tmp9192 = getelementptr inbounds float, float* %tmp9191, i64 1
+ %tmp9193 = getelementptr inbounds float, float* %tmp9192, i64 1
+ %tmp9194 = getelementptr inbounds float, float* %tmp9193, i64 1
+ %tmp9195 = getelementptr inbounds float, float* %tmp9194, i64 1
+ %tmp9196 = getelementptr inbounds float, float* %tmp9195, i64 1
+ %tmp9197 = getelementptr inbounds float, float* %tmp9196, i64 1
+ %tmp9198 = getelementptr inbounds float, float* %tmp9197, i64 1
+ %tmp9199 = getelementptr inbounds float, float* %tmp9198, i64 1
+ %tmp9200 = getelementptr inbounds float, float* %tmp9199, i64 1
+ %tmp9201 = getelementptr inbounds float, float* %tmp9200, i64 1
+ %tmp9202 = getelementptr inbounds float, float* %tmp9201, i64 1
+ %tmp9203 = getelementptr inbounds float, float* %tmp9202, i64 1
+ %tmp9204 = getelementptr inbounds float, float* %tmp9203, i64 1
+ %tmp9205 = getelementptr inbounds float, float* %tmp9204, i64 1
+ %tmp9206 = getelementptr inbounds float, float* %tmp9205, i64 1
+ %tmp9207 = getelementptr inbounds float, float* %tmp9206, i64 1
+ %tmp9208 = getelementptr inbounds float, float* %tmp9207, i64 1
+ %tmp9209 = getelementptr inbounds float, float* %tmp9208, i64 1
+ %tmp9210 = getelementptr inbounds float, float* %tmp9209, i64 1
+ %tmp9211 = getelementptr inbounds float, float* %tmp9210, i64 1
+ %tmp9212 = getelementptr inbounds float, float* %tmp9211, i64 1
+ %tmp9213 = getelementptr inbounds float, float* %tmp9212, i64 1
+ %tmp9214 = getelementptr inbounds float, float* %tmp9213, i64 1
+ %tmp9215 = getelementptr inbounds float, float* %tmp9214, i64 1
+ %tmp9216 = getelementptr inbounds float, float* %tmp9215, i64 1
+ %tmp9217 = getelementptr inbounds float, float* %tmp9216, i64 1
+ %tmp9218 = getelementptr inbounds float, float* %tmp9217, i64 1
+ %tmp9219 = getelementptr inbounds float, float* %tmp9218, i64 1
+ %tmp9220 = getelementptr inbounds float, float* %tmp9219, i64 1
+ %tmp9221 = getelementptr inbounds float, float* %tmp9220, i64 1
+ %tmp9222 = getelementptr inbounds float, float* %tmp9221, i64 1
+ %tmp9223 = getelementptr inbounds float, float* %tmp9222, i64 1
+ %tmp9224 = getelementptr inbounds float, float* %tmp9223, i64 1
+ %tmp9225 = getelementptr inbounds float, float* %tmp9224, i64 1
+ %tmp9226 = getelementptr inbounds float, float* %tmp9225, i64 1
+ %tmp9227 = getelementptr inbounds float, float* %tmp9226, i64 1
+ %tmp9228 = getelementptr inbounds float, float* %tmp9227, i64 1
+ %tmp9229 = getelementptr inbounds float, float* %tmp9228, i64 1
+ %tmp9230 = getelementptr inbounds float, float* %tmp9229, i64 1
+ %tmp9231 = getelementptr inbounds float, float* %tmp9230, i64 1
+ %tmp9232 = getelementptr inbounds float, float* %tmp9231, i64 1
+ %tmp9233 = getelementptr inbounds float, float* %tmp9232, i64 1
+ %tmp9234 = getelementptr inbounds float, float* %tmp9233, i64 1
+ %tmp9235 = getelementptr inbounds float, float* %tmp9234, i64 1
+ %tmp9236 = getelementptr inbounds float, float* %tmp9235, i64 1
+ %tmp9237 = getelementptr inbounds float, float* %tmp9236, i64 1
+ %tmp9238 = getelementptr inbounds float, float* %tmp9237, i64 1
+ %tmp9239 = getelementptr inbounds float, float* %tmp9238, i64 1
+ %tmp9240 = getelementptr inbounds float, float* %tmp9239, i64 1
+ %tmp9241 = getelementptr inbounds float, float* %tmp9240, i64 1
+ %tmp9242 = getelementptr inbounds float, float* %tmp9241, i64 1
+ %tmp9243 = getelementptr inbounds float, float* %tmp9242, i64 1
+ %tmp9244 = getelementptr inbounds float, float* %tmp9243, i64 1
+ %tmp9245 = getelementptr inbounds float, float* %tmp9244, i64 1
+ %tmp9246 = getelementptr inbounds float, float* %tmp9245, i64 1
+ %tmp9247 = getelementptr inbounds float, float* %tmp9246, i64 1
+ %tmp9248 = getelementptr inbounds float, float* %tmp9247, i64 1
+ %tmp9249 = getelementptr inbounds float, float* %tmp9248, i64 1
+ %tmp9250 = getelementptr inbounds float, float* %tmp9249, i64 1
+ %tmp9251 = getelementptr inbounds float, float* %tmp9250, i64 1
+ %tmp9252 = getelementptr inbounds float, float* %tmp9251, i64 1
+ %tmp9253 = getelementptr inbounds float, float* %tmp9252, i64 1
+ %tmp9254 = getelementptr inbounds float, float* %tmp9253, i64 1
+ %tmp9255 = getelementptr inbounds float, float* %tmp9254, i64 1
+ %tmp9256 = getelementptr inbounds float, float* %tmp9255, i64 1
+ %tmp9257 = getelementptr inbounds float, float* %tmp9256, i64 1
+ %tmp9258 = getelementptr inbounds float, float* %tmp9257, i64 1
+ %tmp9259 = getelementptr inbounds float, float* %tmp9258, i64 1
+ %tmp9260 = getelementptr inbounds float, float* %tmp9259, i64 1
+ %tmp9261 = getelementptr inbounds float, float* %tmp9260, i64 1
+ %tmp9262 = getelementptr inbounds float, float* %tmp9261, i64 1
+ %tmp9263 = getelementptr inbounds float, float* %tmp9262, i64 1
+ %tmp9264 = getelementptr inbounds float, float* %tmp9263, i64 1
+ %tmp9265 = getelementptr inbounds float, float* %tmp9264, i64 1
+ %tmp9266 = getelementptr inbounds float, float* %tmp9265, i64 1
+ %tmp9267 = getelementptr inbounds float, float* %tmp9266, i64 1
+ %tmp9268 = getelementptr inbounds float, float* %tmp9267, i64 1
+ %tmp9269 = getelementptr inbounds float, float* %tmp9268, i64 1
+ %tmp9270 = getelementptr inbounds float, float* %tmp9269, i64 1
+ %tmp9271 = getelementptr inbounds float, float* %tmp9270, i64 1
+ %tmp9272 = getelementptr inbounds float, float* %tmp9271, i64 1
+ %tmp9273 = getelementptr inbounds float, float* %tmp9272, i64 1
+ %tmp9274 = getelementptr inbounds float, float* %tmp9273, i64 1
+ %tmp9275 = getelementptr inbounds float, float* %tmp9274, i64 1
+ %tmp9276 = getelementptr inbounds float, float* %tmp9275, i64 1
+ %tmp9277 = getelementptr inbounds float, float* %tmp9276, i64 1
+ %tmp9278 = getelementptr inbounds float, float* %tmp9277, i64 1
+ %tmp9279 = getelementptr inbounds float, float* %tmp9278, i64 1
+ %tmp9280 = getelementptr inbounds float, float* %tmp9279, i64 1
+ %tmp9281 = getelementptr inbounds float, float* %tmp9280, i64 1
+ %tmp9282 = getelementptr inbounds float, float* %tmp9281, i64 1
+ %tmp9283 = getelementptr inbounds float, float* %tmp9282, i64 1
+ %tmp9284 = getelementptr inbounds float, float* %tmp9283, i64 1
+ %tmp9285 = getelementptr inbounds float, float* %tmp9284, i64 1
+ %tmp9286 = getelementptr inbounds float, float* %tmp9285, i64 1
+ %tmp9287 = getelementptr inbounds float, float* %tmp9286, i64 1
+ %tmp9288 = getelementptr inbounds float, float* %tmp9287, i64 1
+ %tmp9289 = getelementptr inbounds float, float* %tmp9288, i64 1
+ %tmp9290 = getelementptr inbounds float, float* %tmp9289, i64 1
+ %tmp9291 = getelementptr inbounds float, float* %tmp9290, i64 1
+ %tmp9292 = getelementptr inbounds float, float* %tmp9291, i64 1
+ %tmp9293 = getelementptr inbounds float, float* %tmp9292, i64 1
+ %tmp9294 = getelementptr inbounds float, float* %tmp9293, i64 1
+ %tmp9295 = getelementptr inbounds float, float* %tmp9294, i64 1
+ %tmp9296 = getelementptr inbounds float, float* %tmp9295, i64 1
+ %tmp9297 = getelementptr inbounds float, float* %tmp9296, i64 1
+ %tmp9298 = getelementptr inbounds float, float* %tmp9297, i64 1
+ %tmp9299 = getelementptr inbounds float, float* %tmp9298, i64 1
+ %tmp9300 = getelementptr inbounds float, float* %tmp9299, i64 1
+ %tmp9301 = getelementptr inbounds float, float* %tmp9300, i64 1
+ %tmp9302 = getelementptr inbounds float, float* %tmp9301, i64 1
+ %tmp9303 = getelementptr inbounds float, float* %tmp9302, i64 1
+ %tmp9304 = getelementptr inbounds float, float* %tmp9303, i64 1
+ %tmp9305 = getelementptr inbounds float, float* %tmp9304, i64 1
+ %tmp9306 = getelementptr inbounds float, float* %tmp9305, i64 1
+ %tmp9307 = getelementptr inbounds float, float* %tmp9306, i64 1
+ %tmp9308 = getelementptr inbounds float, float* %tmp9307, i64 1
+ %tmp9309 = getelementptr inbounds float, float* %tmp9308, i64 1
+ %tmp9310 = getelementptr inbounds float, float* %tmp9309, i64 1
+ %tmp9311 = getelementptr inbounds float, float* %tmp9310, i64 1
+ %tmp9312 = getelementptr inbounds float, float* %tmp9311, i64 1
+ %tmp9313 = getelementptr inbounds float, float* %tmp9312, i64 1
+ %tmp9314 = getelementptr inbounds float, float* %tmp9313, i64 1
+ %tmp9315 = getelementptr inbounds float, float* %tmp9314, i64 1
+ %tmp9316 = getelementptr inbounds float, float* %tmp9315, i64 1
+ %tmp9317 = getelementptr inbounds float, float* %tmp9316, i64 1
+ %tmp9318 = getelementptr inbounds float, float* %tmp9317, i64 1
+ %tmp9319 = getelementptr inbounds float, float* %tmp9318, i64 1
+ %tmp9320 = getelementptr inbounds float, float* %tmp9319, i64 1
+ %tmp9321 = getelementptr inbounds float, float* %tmp9320, i64 1
+ %tmp9322 = getelementptr inbounds float, float* %tmp9321, i64 1
+ %tmp9323 = getelementptr inbounds float, float* %tmp9322, i64 1
+ %tmp9324 = getelementptr inbounds float, float* %tmp9323, i64 1
+ %tmp9325 = getelementptr inbounds float, float* %tmp9324, i64 1
+ %tmp9326 = getelementptr inbounds float, float* %tmp9325, i64 1
+ %tmp9327 = getelementptr inbounds float, float* %tmp9326, i64 1
+ %tmp9328 = getelementptr inbounds float, float* %tmp9327, i64 1
+ %tmp9329 = getelementptr inbounds float, float* %tmp9328, i64 1
+ %tmp9330 = getelementptr inbounds float, float* %tmp9329, i64 1
+ %tmp9331 = getelementptr inbounds float, float* %tmp9330, i64 1
+ %tmp9332 = getelementptr inbounds float, float* %tmp9331, i64 1
+ %tmp9333 = getelementptr inbounds float, float* %tmp9332, i64 1
+ %tmp9334 = getelementptr inbounds float, float* %tmp9333, i64 1
+ %tmp9335 = getelementptr inbounds float, float* %tmp9334, i64 1
+ %tmp9336 = getelementptr inbounds float, float* %tmp9335, i64 1
+ %tmp9337 = getelementptr inbounds float, float* %tmp9336, i64 1
+ %tmp9338 = getelementptr inbounds float, float* %tmp9337, i64 1
+ %tmp9339 = getelementptr inbounds float, float* %tmp9338, i64 1
+ %tmp9340 = getelementptr inbounds float, float* %tmp9339, i64 1
+ %tmp9341 = getelementptr inbounds float, float* %tmp9340, i64 1
+ %tmp9342 = getelementptr inbounds float, float* %tmp9341, i64 1
+ %tmp9343 = getelementptr inbounds float, float* %tmp9342, i64 1
+ %tmp9344 = getelementptr inbounds float, float* %tmp9343, i64 1
+ %tmp9345 = getelementptr inbounds float, float* %tmp9344, i64 1
+ %tmp9346 = getelementptr inbounds float, float* %tmp9345, i64 1
+ %tmp9347 = getelementptr inbounds float, float* %tmp9346, i64 1
+ %tmp9348 = getelementptr inbounds float, float* %tmp9347, i64 1
+ %tmp9349 = getelementptr inbounds float, float* %tmp9348, i64 1
+ %tmp9350 = getelementptr inbounds float, float* %tmp9349, i64 1
+ %tmp9351 = getelementptr inbounds float, float* %tmp9350, i64 1
+ %tmp9352 = getelementptr inbounds float, float* %tmp9351, i64 1
+ %tmp9353 = getelementptr inbounds float, float* %tmp9352, i64 1
+ %tmp9354 = getelementptr inbounds float, float* %tmp9353, i64 1
+ %tmp9355 = getelementptr inbounds float, float* %tmp9354, i64 1
+ %tmp9356 = getelementptr inbounds float, float* %tmp9355, i64 1
+ %tmp9357 = getelementptr inbounds float, float* %tmp9356, i64 1
+ %tmp9358 = getelementptr inbounds float, float* %tmp9357, i64 1
+ %tmp9359 = getelementptr inbounds float, float* %tmp9358, i64 1
+ %tmp9360 = getelementptr inbounds float, float* %tmp9359, i64 1
+ %tmp9361 = getelementptr inbounds float, float* %tmp9360, i64 1
+ %tmp9362 = getelementptr inbounds float, float* %tmp9361, i64 1
+ %tmp9363 = getelementptr inbounds float, float* %tmp9362, i64 1
+ %tmp9364 = getelementptr inbounds float, float* %tmp9363, i64 1
+ %tmp9365 = getelementptr inbounds float, float* %tmp9364, i64 1
+ %tmp9366 = getelementptr inbounds float, float* %tmp9365, i64 1
+ %tmp9367 = getelementptr inbounds float, float* %tmp9366, i64 1
+ %tmp9368 = getelementptr inbounds float, float* %tmp9367, i64 1
+ %tmp9369 = getelementptr inbounds float, float* %tmp9368, i64 1
+ %tmp9370 = getelementptr inbounds float, float* %tmp9369, i64 1
+ %tmp9371 = getelementptr inbounds float, float* %tmp9370, i64 1
+ %tmp9372 = getelementptr inbounds float, float* %tmp9371, i64 1
+ %tmp9373 = getelementptr inbounds float, float* %tmp9372, i64 1
+ %tmp9374 = getelementptr inbounds float, float* %tmp9373, i64 1
+ %tmp9375 = getelementptr inbounds float, float* %tmp9374, i64 1
+ %tmp9376 = getelementptr inbounds float, float* %tmp9375, i64 1
+ %tmp9377 = getelementptr inbounds float, float* %tmp9376, i64 1
+ %tmp9378 = getelementptr inbounds float, float* %tmp9377, i64 1
+ %tmp9379 = getelementptr inbounds float, float* %tmp9378, i64 1
+ %tmp9380 = getelementptr inbounds float, float* %tmp9379, i64 1
+ %tmp9381 = getelementptr inbounds float, float* %tmp9380, i64 1
+ %tmp9382 = getelementptr inbounds float, float* %tmp9381, i64 1
+ %tmp9383 = getelementptr inbounds float, float* %tmp9382, i64 1
+ %tmp9384 = getelementptr inbounds float, float* %tmp9383, i64 1
+ %tmp9385 = getelementptr inbounds float, float* %tmp9384, i64 1
+ %tmp9386 = getelementptr inbounds float, float* %tmp9385, i64 1
+ %tmp9387 = getelementptr inbounds float, float* %tmp9386, i64 1
+ %tmp9388 = getelementptr inbounds float, float* %tmp9387, i64 1
+ %tmp9389 = getelementptr inbounds float, float* %tmp9388, i64 1
+ %tmp9390 = getelementptr inbounds float, float* %tmp9389, i64 1
+ %tmp9391 = getelementptr inbounds float, float* %tmp9390, i64 1
+ %tmp9392 = getelementptr inbounds float, float* %tmp9391, i64 1
+ %tmp9393 = getelementptr inbounds float, float* %tmp9392, i64 1
+ %tmp9394 = getelementptr inbounds float, float* %tmp9393, i64 1
+ %tmp9395 = getelementptr inbounds float, float* %tmp9394, i64 1
+ %tmp9396 = getelementptr inbounds float, float* %tmp9395, i64 1
+ %tmp9397 = getelementptr inbounds float, float* %tmp9396, i64 1
+ %tmp9398 = getelementptr inbounds float, float* %tmp9397, i64 1
+ %tmp9399 = getelementptr inbounds float, float* %tmp9398, i64 1
+ %tmp9400 = getelementptr inbounds float, float* %tmp9399, i64 1
+ %tmp9401 = getelementptr inbounds float, float* %tmp9400, i64 1
+ %tmp9402 = getelementptr inbounds float, float* %tmp9401, i64 1
+ %tmp9403 = getelementptr inbounds float, float* %tmp9402, i64 1
+ %tmp9404 = getelementptr inbounds float, float* %tmp9403, i64 1
+ %tmp9405 = getelementptr inbounds float, float* %tmp9404, i64 1
+ %tmp9406 = getelementptr inbounds float, float* %tmp9405, i64 1
+ %tmp9407 = getelementptr inbounds float, float* %tmp9406, i64 1
+ %tmp9408 = getelementptr inbounds float, float* %tmp9407, i64 1
+ %tmp9409 = getelementptr inbounds float, float* %tmp9408, i64 1
+ %tmp9410 = getelementptr inbounds float, float* %tmp9409, i64 1
+ %tmp9411 = getelementptr inbounds float, float* %tmp9410, i64 1
+ %tmp9412 = getelementptr inbounds float, float* %tmp9411, i64 1
+ %tmp9413 = getelementptr inbounds float, float* %tmp9412, i64 1
+ %tmp9414 = getelementptr inbounds float, float* %tmp9413, i64 1
+ %tmp9415 = getelementptr inbounds float, float* %tmp9414, i64 1
+ %tmp9416 = getelementptr inbounds float, float* %tmp9415, i64 1
+ %tmp9417 = getelementptr inbounds float, float* %tmp9416, i64 1
+ %tmp9418 = getelementptr inbounds float, float* %tmp9417, i64 1
+ %tmp9419 = getelementptr inbounds float, float* %tmp9418, i64 1
+ %tmp9420 = getelementptr inbounds float, float* %tmp9419, i64 1
+ %tmp9421 = getelementptr inbounds float, float* %tmp9420, i64 1
+ %tmp9422 = getelementptr inbounds float, float* %tmp9421, i64 1
+ %tmp9423 = getelementptr inbounds float, float* %tmp9422, i64 1
+ %tmp9424 = getelementptr inbounds float, float* %tmp9423, i64 1
+ %tmp9425 = getelementptr inbounds float, float* %tmp9424, i64 1
+ %tmp9426 = getelementptr inbounds float, float* %tmp9425, i64 1
+ %tmp9427 = getelementptr inbounds float, float* %tmp9426, i64 1
+ %tmp9428 = getelementptr inbounds float, float* %tmp9427, i64 1
+ %tmp9429 = getelementptr inbounds float, float* %tmp9428, i64 1
+ %tmp9430 = getelementptr inbounds float, float* %tmp9429, i64 1
+ %tmp9431 = getelementptr inbounds float, float* %tmp9430, i64 1
+ %tmp9432 = getelementptr inbounds float, float* %tmp9431, i64 1
+ %tmp9433 = getelementptr inbounds float, float* %tmp9432, i64 1
+ %tmp9434 = getelementptr inbounds float, float* %tmp9433, i64 1
+ %tmp9435 = getelementptr inbounds float, float* %tmp9434, i64 1
+ %tmp9436 = getelementptr inbounds float, float* %tmp9435, i64 1
+ %tmp9437 = getelementptr inbounds float, float* %tmp9436, i64 1
+ %tmp9438 = getelementptr inbounds float, float* %tmp9437, i64 1
+ %tmp9439 = getelementptr inbounds float, float* %tmp9438, i64 1
+ %tmp9440 = getelementptr inbounds float, float* %tmp9439, i64 1
+ %tmp9441 = getelementptr inbounds float, float* %tmp9440, i64 1
+ %tmp9442 = getelementptr inbounds float, float* %tmp9441, i64 1
+ %tmp9443 = getelementptr inbounds float, float* %tmp9442, i64 1
+ %tmp9444 = getelementptr inbounds float, float* %tmp9443, i64 1
+ %tmp9445 = getelementptr inbounds float, float* %tmp9444, i64 1
+ %tmp9446 = getelementptr inbounds float, float* %tmp9445, i64 1
+ %tmp9447 = getelementptr inbounds float, float* %tmp9446, i64 1
+ %tmp9448 = getelementptr inbounds float, float* %tmp9447, i64 1
+ %tmp9449 = getelementptr inbounds float, float* %tmp9448, i64 1
+ %tmp9450 = getelementptr inbounds float, float* %tmp9449, i64 1
+ %tmp9451 = getelementptr inbounds float, float* %tmp9450, i64 1
+ %tmp9452 = getelementptr inbounds float, float* %tmp9451, i64 1
+ %tmp9453 = getelementptr inbounds float, float* %tmp9452, i64 1
+ %tmp9454 = getelementptr inbounds float, float* %tmp9453, i64 1
+ %tmp9455 = getelementptr inbounds float, float* %tmp9454, i64 1
+ %tmp9456 = getelementptr inbounds float, float* %tmp9455, i64 1
+ %tmp9457 = getelementptr inbounds float, float* %tmp9456, i64 1
+ %tmp9458 = getelementptr inbounds float, float* %tmp9457, i64 1
+ %tmp9459 = getelementptr inbounds float, float* %tmp9458, i64 1
+ %tmp9460 = getelementptr inbounds float, float* %tmp9459, i64 1
+ %tmp9461 = getelementptr inbounds float, float* %tmp9460, i64 1
+ %tmp9462 = getelementptr inbounds float, float* %tmp9461, i64 1
+ %tmp9463 = getelementptr inbounds float, float* %tmp9462, i64 1
+ %tmp9464 = getelementptr inbounds float, float* %tmp9463, i64 1
+ %tmp9465 = getelementptr inbounds float, float* %tmp9464, i64 1
+ %tmp9466 = getelementptr inbounds float, float* %tmp9465, i64 1
+ %tmp9467 = getelementptr inbounds float, float* %tmp9466, i64 1
+ %tmp9468 = getelementptr inbounds float, float* %tmp9467, i64 1
+ %tmp9469 = getelementptr inbounds float, float* %tmp9468, i64 1
+ %tmp9470 = getelementptr inbounds float, float* %tmp9469, i64 1
+ %tmp9471 = getelementptr inbounds float, float* %tmp9470, i64 1
+ %tmp9472 = getelementptr inbounds float, float* %tmp9471, i64 1
+ %tmp9473 = getelementptr inbounds float, float* %tmp9472, i64 1
+ %tmp9474 = getelementptr inbounds float, float* %tmp9473, i64 1
+ %tmp9475 = getelementptr inbounds float, float* %tmp9474, i64 1
+ %tmp9476 = getelementptr inbounds float, float* %tmp9475, i64 1
+ %tmp9477 = getelementptr inbounds float, float* %tmp9476, i64 1
+ %tmp9478 = getelementptr inbounds float, float* %tmp9477, i64 1
+ %tmp9479 = getelementptr inbounds float, float* %tmp9478, i64 1
+ %tmp9480 = getelementptr inbounds float, float* %tmp9479, i64 1
+ %tmp9481 = getelementptr inbounds float, float* %tmp9480, i64 1
+ %tmp9482 = getelementptr inbounds float, float* %tmp9481, i64 1
+ %tmp9483 = getelementptr inbounds float, float* %tmp9482, i64 1
+ %tmp9484 = getelementptr inbounds float, float* %tmp9483, i64 1
+ %tmp9485 = getelementptr inbounds float, float* %tmp9484, i64 1
+ %tmp9486 = getelementptr inbounds float, float* %tmp9485, i64 1
+ %tmp9487 = getelementptr inbounds float, float* %tmp9486, i64 1
+ %tmp9488 = getelementptr inbounds float, float* %tmp9487, i64 1
+ %tmp9489 = getelementptr inbounds float, float* %tmp9488, i64 1
+ %tmp9490 = getelementptr inbounds float, float* %tmp9489, i64 1
+ %tmp9491 = getelementptr inbounds float, float* %tmp9490, i64 1
+ %tmp9492 = getelementptr inbounds float, float* %tmp9491, i64 1
+ %tmp9493 = getelementptr inbounds float, float* %tmp9492, i64 1
+ %tmp9494 = getelementptr inbounds float, float* %tmp9493, i64 1
+ %tmp9495 = getelementptr inbounds float, float* %tmp9494, i64 1
+ %tmp9496 = getelementptr inbounds float, float* %tmp9495, i64 1
+ %tmp9497 = getelementptr inbounds float, float* %tmp9496, i64 1
+ %tmp9498 = getelementptr inbounds float, float* %tmp9497, i64 1
+ %tmp9499 = getelementptr inbounds float, float* %tmp9498, i64 1
+ %tmp9500 = getelementptr inbounds float, float* %tmp9499, i64 1
+ %tmp9501 = getelementptr inbounds float, float* %tmp9500, i64 1
+ %tmp9502 = getelementptr inbounds float, float* %tmp9501, i64 1
+ %tmp9503 = getelementptr inbounds float, float* %tmp9502, i64 1
+ %tmp9504 = getelementptr inbounds float, float* %tmp9503, i64 1
+ %tmp9505 = getelementptr inbounds float, float* %tmp9504, i64 1
+ %tmp9506 = getelementptr inbounds float, float* %tmp9505, i64 1
+ %tmp9507 = getelementptr inbounds float, float* %tmp9506, i64 1
+ %tmp9508 = getelementptr inbounds float, float* %tmp9507, i64 1
+ %tmp9509 = getelementptr inbounds float, float* %tmp9508, i64 1
+ %tmp9510 = getelementptr inbounds float, float* %tmp9509, i64 1
+ %tmp9511 = getelementptr inbounds float, float* %tmp9510, i64 1
+ %tmp9512 = getelementptr inbounds float, float* %tmp9511, i64 1
+ %tmp9513 = getelementptr inbounds float, float* %tmp9512, i64 1
+ %tmp9514 = getelementptr inbounds float, float* %tmp9513, i64 1
+ %tmp9515 = getelementptr inbounds float, float* %tmp9514, i64 1
+ %tmp9516 = getelementptr inbounds float, float* %tmp9515, i64 1
+ %tmp9517 = getelementptr inbounds float, float* %tmp9516, i64 1
+ %tmp9518 = getelementptr inbounds float, float* %tmp9517, i64 1
+ %tmp9519 = getelementptr inbounds float, float* %tmp9518, i64 1
+ %tmp9520 = getelementptr inbounds float, float* %tmp9519, i64 1
+ %tmp9521 = getelementptr inbounds float, float* %tmp9520, i64 1
+ %tmp9522 = getelementptr inbounds float, float* %tmp9521, i64 1
+ %tmp9523 = getelementptr inbounds float, float* %tmp9522, i64 1
+ %tmp9524 = getelementptr inbounds float, float* %tmp9523, i64 1
+ %tmp9525 = getelementptr inbounds float, float* %tmp9524, i64 1
+ %tmp9526 = getelementptr inbounds float, float* %tmp9525, i64 1
+ %tmp9527 = getelementptr inbounds float, float* %tmp9526, i64 1
+ %tmp9528 = getelementptr inbounds float, float* %tmp9527, i64 1
+ %tmp9529 = getelementptr inbounds float, float* %tmp9528, i64 1
+ %tmp9530 = getelementptr inbounds float, float* %tmp9529, i64 1
+ %tmp9531 = getelementptr inbounds float, float* %tmp9530, i64 1
+ %tmp9532 = getelementptr inbounds float, float* %tmp9531, i64 1
+ %tmp9533 = getelementptr inbounds float, float* %tmp9532, i64 1
+ %tmp9534 = getelementptr inbounds float, float* %tmp9533, i64 1
+ %tmp9535 = getelementptr inbounds float, float* %tmp9534, i64 1
+ %tmp9536 = getelementptr inbounds float, float* %tmp9535, i64 1
+ %tmp9537 = getelementptr inbounds float, float* %tmp9536, i64 1
+ %tmp9538 = getelementptr inbounds float, float* %tmp9537, i64 1
+ %tmp9539 = getelementptr inbounds float, float* %tmp9538, i64 1
+ %tmp9540 = getelementptr inbounds float, float* %tmp9539, i64 1
+ %tmp9541 = getelementptr inbounds float, float* %tmp9540, i64 1
+ %tmp9542 = getelementptr inbounds float, float* %tmp9541, i64 1
+ %tmp9543 = getelementptr inbounds float, float* %tmp9542, i64 1
+ %tmp9544 = getelementptr inbounds float, float* %tmp9543, i64 1
+ %tmp9545 = getelementptr inbounds float, float* %tmp9544, i64 1
+ %tmp9546 = getelementptr inbounds float, float* %tmp9545, i64 1
+ %tmp9547 = getelementptr inbounds float, float* %tmp9546, i64 1
+ %tmp9548 = getelementptr inbounds float, float* %tmp9547, i64 1
+ %tmp9549 = getelementptr inbounds float, float* %tmp9548, i64 1
+ %tmp9550 = getelementptr inbounds float, float* %tmp9549, i64 1
+ %tmp9551 = getelementptr inbounds float, float* %tmp9550, i64 1
+ %tmp9552 = getelementptr inbounds float, float* %tmp9551, i64 1
+ %tmp9553 = getelementptr inbounds float, float* %tmp9552, i64 1
+ %tmp9554 = getelementptr inbounds float, float* %tmp9553, i64 1
+ %tmp9555 = getelementptr inbounds float, float* %tmp9554, i64 1
+ %tmp9556 = getelementptr inbounds float, float* %tmp9555, i64 1
+ %tmp9557 = getelementptr inbounds float, float* %tmp9556, i64 1
+ %tmp9558 = getelementptr inbounds float, float* %tmp9557, i64 1
+ %tmp9559 = getelementptr inbounds float, float* %tmp9558, i64 1
+ %tmp9560 = getelementptr inbounds float, float* %tmp9559, i64 1
+ %tmp9561 = getelementptr inbounds float, float* %tmp9560, i64 1
+ %tmp9562 = getelementptr inbounds float, float* %tmp9561, i64 1
+ %tmp9563 = getelementptr inbounds float, float* %tmp9562, i64 1
+ %tmp9564 = getelementptr inbounds float, float* %tmp9563, i64 1
+ %tmp9565 = getelementptr inbounds float, float* %tmp9564, i64 1
+ %tmp9566 = getelementptr inbounds float, float* %tmp9565, i64 1
+ %tmp9567 = getelementptr inbounds float, float* %tmp9566, i64 1
+ %tmp9568 = getelementptr inbounds float, float* %tmp9567, i64 1
+ %tmp9569 = getelementptr inbounds float, float* %tmp9568, i64 1
+ %tmp9570 = getelementptr inbounds float, float* %tmp9569, i64 1
+ %tmp9571 = getelementptr inbounds float, float* %tmp9570, i64 1
+ %tmp9572 = getelementptr inbounds float, float* %tmp9571, i64 1
+ %tmp9573 = getelementptr inbounds float, float* %tmp9572, i64 1
+ %tmp9574 = getelementptr inbounds float, float* %tmp9573, i64 1
+ %tmp9575 = getelementptr inbounds float, float* %tmp9574, i64 1
+ %tmp9576 = getelementptr inbounds float, float* %tmp9575, i64 1
+ %tmp9577 = getelementptr inbounds float, float* %tmp9576, i64 1
+ %tmp9578 = getelementptr inbounds float, float* %tmp9577, i64 1
+ %tmp9579 = getelementptr inbounds float, float* %tmp9578, i64 1
+ %tmp9580 = getelementptr inbounds float, float* %tmp9579, i64 1
+ %tmp9581 = getelementptr inbounds float, float* %tmp9580, i64 1
+ %tmp9582 = getelementptr inbounds float, float* %tmp9581, i64 1
+ %tmp9583 = getelementptr inbounds float, float* %tmp9582, i64 1
+ %tmp9584 = getelementptr inbounds float, float* %tmp9583, i64 1
+ %tmp9585 = getelementptr inbounds float, float* %tmp9584, i64 1
+ %tmp9586 = getelementptr inbounds float, float* %tmp9585, i64 1
+ %tmp9587 = getelementptr inbounds float, float* %tmp9586, i64 1
+ %tmp9588 = getelementptr inbounds float, float* %tmp9587, i64 1
+ %tmp9589 = getelementptr inbounds float, float* %tmp9588, i64 1
+ %tmp9590 = getelementptr inbounds float, float* %tmp9589, i64 1
+ %tmp9591 = getelementptr inbounds float, float* %tmp9590, i64 1
+ %tmp9592 = getelementptr inbounds float, float* %tmp9591, i64 1
+ %tmp9593 = getelementptr inbounds float, float* %tmp9592, i64 1
+ %tmp9594 = getelementptr inbounds float, float* %tmp9593, i64 1
+ %tmp9595 = getelementptr inbounds float, float* %tmp9594, i64 1
+ %tmp9596 = getelementptr inbounds float, float* %tmp9595, i64 1
+ %tmp9597 = getelementptr inbounds float, float* %tmp9596, i64 1
+ %tmp9598 = getelementptr inbounds float, float* %tmp9597, i64 1
+ %tmp9599 = getelementptr inbounds float, float* %tmp9598, i64 1
+ %tmp9600 = getelementptr inbounds float, float* %tmp9599, i64 1
+ %tmp9601 = getelementptr inbounds float, float* %tmp9600, i64 1
+ %tmp9602 = getelementptr inbounds float, float* %tmp9601, i64 1
+ %tmp9603 = getelementptr inbounds float, float* %tmp9602, i64 1
+ %tmp9604 = getelementptr inbounds float, float* %tmp9603, i64 1
+ %tmp9605 = getelementptr inbounds float, float* %tmp9604, i64 1
+ %tmp9606 = getelementptr inbounds float, float* %tmp9605, i64 1
+ %tmp9607 = getelementptr inbounds float, float* %tmp9606, i64 1
+ %tmp9608 = getelementptr inbounds float, float* %tmp9607, i64 1
+ %tmp9609 = getelementptr inbounds float, float* %tmp9608, i64 1
+ %tmp9610 = getelementptr inbounds float, float* %tmp9609, i64 1
+ %tmp9611 = getelementptr inbounds float, float* %tmp9610, i64 1
+ %tmp9612 = getelementptr inbounds float, float* %tmp9611, i64 1
+ %tmp9613 = getelementptr inbounds float, float* %tmp9612, i64 1
+ %tmp9614 = getelementptr inbounds float, float* %tmp9613, i64 1
+ %tmp9615 = getelementptr inbounds float, float* %tmp9614, i64 1
+ %tmp9616 = getelementptr inbounds float, float* %tmp9615, i64 1
+ %tmp9617 = getelementptr inbounds float, float* %tmp9616, i64 1
+ %tmp9618 = getelementptr inbounds float, float* %tmp9617, i64 1
+ %tmp9619 = getelementptr inbounds float, float* %tmp9618, i64 1
+ %tmp9620 = getelementptr inbounds float, float* %tmp9619, i64 1
+ %tmp9621 = getelementptr inbounds float, float* %tmp9620, i64 1
+ %tmp9622 = getelementptr inbounds float, float* %tmp9621, i64 1
+ %tmp9623 = getelementptr inbounds float, float* %tmp9622, i64 1
+ %tmp9624 = getelementptr inbounds float, float* %tmp9623, i64 1
+ %tmp9625 = getelementptr inbounds float, float* %tmp9624, i64 1
+ %tmp9626 = getelementptr inbounds float, float* %tmp9625, i64 1
+ %tmp9627 = getelementptr inbounds float, float* %tmp9626, i64 1
+ %tmp9628 = getelementptr inbounds float, float* %tmp9627, i64 1
+ %tmp9629 = getelementptr inbounds float, float* %tmp9628, i64 1
+ %tmp9630 = getelementptr inbounds float, float* %tmp9629, i64 1
+ %tmp9631 = getelementptr inbounds float, float* %tmp9630, i64 1
+ %tmp9632 = getelementptr inbounds float, float* %tmp9631, i64 1
+ %tmp9633 = getelementptr inbounds float, float* %tmp9632, i64 1
+ %tmp9634 = getelementptr inbounds float, float* %tmp9633, i64 1
+ %tmp9635 = getelementptr inbounds float, float* %tmp9634, i64 1
+ %tmp9636 = getelementptr inbounds float, float* %tmp9635, i64 1
+ %tmp9637 = getelementptr inbounds float, float* %tmp9636, i64 1
+ %tmp9638 = getelementptr inbounds float, float* %tmp9637, i64 1
+ %tmp9639 = getelementptr inbounds float, float* %tmp9638, i64 1
+ %tmp9640 = getelementptr inbounds float, float* %tmp9639, i64 1
+ %tmp9641 = getelementptr inbounds float, float* %tmp9640, i64 1
+ %tmp9642 = getelementptr inbounds float, float* %tmp9641, i64 1
+ %tmp9643 = getelementptr inbounds float, float* %tmp9642, i64 1
+ %tmp9644 = getelementptr inbounds float, float* %tmp9643, i64 1
+ %tmp9645 = getelementptr inbounds float, float* %tmp9644, i64 1
+ %tmp9646 = getelementptr inbounds float, float* %tmp9645, i64 1
+ %tmp9647 = getelementptr inbounds float, float* %tmp9646, i64 1
+ %tmp9648 = getelementptr inbounds float, float* %tmp9647, i64 1
+ %tmp9649 = getelementptr inbounds float, float* %tmp9648, i64 1
+ %tmp9650 = getelementptr inbounds float, float* %tmp9649, i64 1
+ %tmp9651 = getelementptr inbounds float, float* %tmp9650, i64 1
+ %tmp9652 = getelementptr inbounds float, float* %tmp9651, i64 1
+ %tmp9653 = getelementptr inbounds float, float* %tmp9652, i64 1
+ %tmp9654 = getelementptr inbounds float, float* %tmp9653, i64 1
+ %tmp9655 = getelementptr inbounds float, float* %tmp9654, i64 1
+ %tmp9656 = getelementptr inbounds float, float* %tmp9655, i64 1
+ %tmp9657 = getelementptr inbounds float, float* %tmp9656, i64 1
+ %tmp9658 = getelementptr inbounds float, float* %tmp9657, i64 1
+ %tmp9659 = getelementptr inbounds float, float* %tmp9658, i64 1
+ %tmp9660 = getelementptr inbounds float, float* %tmp9659, i64 1
+ %tmp9661 = getelementptr inbounds float, float* %tmp9660, i64 1
+ %tmp9662 = getelementptr inbounds float, float* %tmp9661, i64 1
+ %tmp9663 = getelementptr inbounds float, float* %tmp9662, i64 1
+ %tmp9664 = getelementptr inbounds float, float* %tmp9663, i64 1
+ %tmp9665 = getelementptr inbounds float, float* %tmp9664, i64 1
+ %tmp9666 = getelementptr inbounds float, float* %tmp9665, i64 1
+ %tmp9667 = getelementptr inbounds float, float* %tmp9666, i64 1
+ %tmp9668 = getelementptr inbounds float, float* %tmp9667, i64 1
+ %tmp9669 = getelementptr inbounds float, float* %tmp9668, i64 1
+ %tmp9670 = getelementptr inbounds float, float* %tmp9669, i64 1
+ %tmp9671 = getelementptr inbounds float, float* %tmp9670, i64 1
+ %tmp9672 = getelementptr inbounds float, float* %tmp9671, i64 1
+ %tmp9673 = getelementptr inbounds float, float* %tmp9672, i64 1
+ %tmp9674 = getelementptr inbounds float, float* %tmp9673, i64 1
+ %tmp9675 = getelementptr inbounds float, float* %tmp9674, i64 1
+ %tmp9676 = getelementptr inbounds float, float* %tmp9675, i64 1
+ %tmp9677 = getelementptr inbounds float, float* %tmp9676, i64 1
+ %tmp9678 = getelementptr inbounds float, float* %tmp9677, i64 1
+ %tmp9679 = getelementptr inbounds float, float* %tmp9678, i64 1
+ %tmp9680 = getelementptr inbounds float, float* %tmp9679, i64 1
+ %tmp9681 = getelementptr inbounds float, float* %tmp9680, i64 1
+ %tmp9682 = getelementptr inbounds float, float* %tmp9681, i64 1
+ %tmp9683 = getelementptr inbounds float, float* %tmp9682, i64 1
+ %tmp9684 = getelementptr inbounds float, float* %tmp9683, i64 1
+ %tmp9685 = getelementptr inbounds float, float* %tmp9684, i64 1
+ %tmp9686 = getelementptr inbounds float, float* %tmp9685, i64 1
+ %tmp9687 = getelementptr inbounds float, float* %tmp9686, i64 1
+ %tmp9688 = getelementptr inbounds float, float* %tmp9687, i64 1
+ %tmp9689 = getelementptr inbounds float, float* %tmp9688, i64 1
+ %tmp9690 = getelementptr inbounds float, float* %tmp9689, i64 1
+ %tmp9691 = getelementptr inbounds float, float* %tmp9690, i64 1
+ %tmp9692 = getelementptr inbounds float, float* %tmp9691, i64 1
+ %tmp9693 = getelementptr inbounds float, float* %tmp9692, i64 1
+ %tmp9694 = getelementptr inbounds float, float* %tmp9693, i64 1
+ %tmp9695 = getelementptr inbounds float, float* %tmp9694, i64 1
+ %tmp9696 = getelementptr inbounds float, float* %tmp9695, i64 1
+ %tmp9697 = getelementptr inbounds float, float* %tmp9696, i64 1
+ %tmp9698 = getelementptr inbounds float, float* %tmp9697, i64 1
+ %tmp9699 = getelementptr inbounds float, float* %tmp9698, i64 1
+ %tmp9700 = getelementptr inbounds float, float* %tmp9699, i64 1
+ %tmp9701 = getelementptr inbounds float, float* %tmp9700, i64 1
+ %tmp9702 = getelementptr inbounds float, float* %tmp9701, i64 1
+ %tmp9703 = getelementptr inbounds float, float* %tmp9702, i64 1
+ %tmp9704 = getelementptr inbounds float, float* %tmp9703, i64 1
+ %tmp9705 = getelementptr inbounds float, float* %tmp9704, i64 1
+ %tmp9706 = getelementptr inbounds float, float* %tmp9705, i64 1
+ %tmp9707 = getelementptr inbounds float, float* %tmp9706, i64 1
+ %tmp9708 = getelementptr inbounds float, float* %tmp9707, i64 1
+ %tmp9709 = getelementptr inbounds float, float* %tmp9708, i64 1
+ %tmp9710 = getelementptr inbounds float, float* %tmp9709, i64 1
+ %tmp9711 = getelementptr inbounds float, float* %tmp9710, i64 1
+ %tmp9712 = getelementptr inbounds float, float* %tmp9711, i64 1
+ %tmp9713 = getelementptr inbounds float, float* %tmp9712, i64 1
+ %tmp9714 = getelementptr inbounds float, float* %tmp9713, i64 1
+ %tmp9715 = getelementptr inbounds float, float* %tmp9714, i64 1
+ %tmp9716 = getelementptr inbounds float, float* %tmp9715, i64 1
+ %tmp9717 = getelementptr inbounds float, float* %tmp9716, i64 1
+ %tmp9718 = getelementptr inbounds float, float* %tmp9717, i64 1
+ %tmp9719 = getelementptr inbounds float, float* %tmp9718, i64 1
+ %tmp9720 = getelementptr inbounds float, float* %tmp9719, i64 1
+ %tmp9721 = getelementptr inbounds float, float* %tmp9720, i64 1
+ %tmp9722 = getelementptr inbounds float, float* %tmp9721, i64 1
+ %tmp9723 = getelementptr inbounds float, float* %tmp9722, i64 1
+ %tmp9724 = getelementptr inbounds float, float* %tmp9723, i64 1
+ %tmp9725 = getelementptr inbounds float, float* %tmp9724, i64 1
+ %tmp9726 = getelementptr inbounds float, float* %tmp9725, i64 1
+ %tmp9727 = getelementptr inbounds float, float* %tmp9726, i64 1
+ %tmp9728 = getelementptr inbounds float, float* %tmp9727, i64 1
+ %tmp9729 = getelementptr inbounds float, float* %tmp9728, i64 1
+ %tmp9730 = getelementptr inbounds float, float* %tmp9729, i64 1
+ %tmp9731 = getelementptr inbounds float, float* %tmp9730, i64 1
+ %tmp9732 = getelementptr inbounds float, float* %tmp9731, i64 1
+ %tmp9733 = getelementptr inbounds float, float* %tmp9732, i64 1
+ %tmp9734 = getelementptr inbounds float, float* %tmp9733, i64 1
+ %tmp9735 = getelementptr inbounds float, float* %tmp9734, i64 1
+ %tmp9736 = getelementptr inbounds float, float* %tmp9735, i64 1
+ %tmp9737 = getelementptr inbounds float, float* %tmp9736, i64 1
+ %tmp9738 = getelementptr inbounds float, float* %tmp9737, i64 1
+ %tmp9739 = getelementptr inbounds float, float* %tmp9738, i64 1
+ %tmp9740 = getelementptr inbounds float, float* %tmp9739, i64 1
+ %tmp9741 = getelementptr inbounds float, float* %tmp9740, i64 1
+ %tmp9742 = getelementptr inbounds float, float* %tmp9741, i64 1
+ %tmp9743 = getelementptr inbounds float, float* %tmp9742, i64 1
+ %tmp9744 = getelementptr inbounds float, float* %tmp9743, i64 1
+ %tmp9745 = getelementptr inbounds float, float* %tmp9744, i64 1
+ %tmp9746 = getelementptr inbounds float, float* %tmp9745, i64 1
+ %tmp9747 = getelementptr inbounds float, float* %tmp9746, i64 1
+ %tmp9748 = getelementptr inbounds float, float* %tmp9747, i64 1
+ %tmp9749 = getelementptr inbounds float, float* %tmp9748, i64 1
+ %tmp9750 = getelementptr inbounds float, float* %tmp9749, i64 1
+ %tmp9751 = getelementptr inbounds float, float* %tmp9750, i64 1
+ %tmp9752 = getelementptr inbounds float, float* %tmp9751, i64 1
+ %tmp9753 = getelementptr inbounds float, float* %tmp9752, i64 1
+ %tmp9754 = getelementptr inbounds float, float* %tmp9753, i64 1
+ %tmp9755 = getelementptr inbounds float, float* %tmp9754, i64 1
+ %tmp9756 = getelementptr inbounds float, float* %tmp9755, i64 1
+ %tmp9757 = getelementptr inbounds float, float* %tmp9756, i64 1
+ %tmp9758 = getelementptr inbounds float, float* %tmp9757, i64 1
+ %tmp9759 = getelementptr inbounds float, float* %tmp9758, i64 1
+ %tmp9760 = getelementptr inbounds float, float* %tmp9759, i64 1
+ %tmp9761 = getelementptr inbounds float, float* %tmp9760, i64 1
+ %tmp9762 = getelementptr inbounds float, float* %tmp9761, i64 1
+ %tmp9763 = getelementptr inbounds float, float* %tmp9762, i64 1
+ %tmp9764 = getelementptr inbounds float, float* %tmp9763, i64 1
+ %tmp9765 = getelementptr inbounds float, float* %tmp9764, i64 1
+ %tmp9766 = getelementptr inbounds float, float* %tmp9765, i64 1
+ %tmp9767 = getelementptr inbounds float, float* %tmp9766, i64 1
+ %tmp9768 = getelementptr inbounds float, float* %tmp9767, i64 1
+ %tmp9769 = getelementptr inbounds float, float* %tmp9768, i64 1
+ %tmp9770 = getelementptr inbounds float, float* %tmp9769, i64 1
+ %tmp9771 = getelementptr inbounds float, float* %tmp9770, i64 1
+ %tmp9772 = getelementptr inbounds float, float* %tmp9771, i64 1
+ %tmp9773 = getelementptr inbounds float, float* %tmp9772, i64 1
+ %tmp9774 = getelementptr inbounds float, float* %tmp9773, i64 1
+ %tmp9775 = getelementptr inbounds float, float* %tmp9774, i64 1
+ %tmp9776 = getelementptr inbounds float, float* %tmp9775, i64 1
+ %tmp9777 = getelementptr inbounds float, float* %tmp9776, i64 1
+ %tmp9778 = getelementptr inbounds float, float* %tmp9777, i64 1
+ %tmp9779 = getelementptr inbounds float, float* %tmp9778, i64 1
+ %tmp9780 = getelementptr inbounds float, float* %tmp9779, i64 1
+ %tmp9781 = getelementptr inbounds float, float* %tmp9780, i64 1
+ %tmp9782 = getelementptr inbounds float, float* %tmp9781, i64 1
+ %tmp9783 = getelementptr inbounds float, float* %tmp9782, i64 1
+ %tmp9784 = getelementptr inbounds float, float* %tmp9783, i64 1
+ %tmp9785 = getelementptr inbounds float, float* %tmp9784, i64 1
+ %tmp9786 = getelementptr inbounds float, float* %tmp9785, i64 1
+ %tmp9787 = getelementptr inbounds float, float* %tmp9786, i64 1
+ %tmp9788 = getelementptr inbounds float, float* %tmp9787, i64 1
+ %tmp9789 = getelementptr inbounds float, float* %tmp9788, i64 1
+ %tmp9790 = getelementptr inbounds float, float* %tmp9789, i64 1
+ %tmp9791 = getelementptr inbounds float, float* %tmp9790, i64 1
+ %tmp9792 = getelementptr inbounds float, float* %tmp9791, i64 1
+ %tmp9793 = getelementptr inbounds float, float* %tmp9792, i64 1
+ %tmp9794 = getelementptr inbounds float, float* %tmp9793, i64 1
+ %tmp9795 = getelementptr inbounds float, float* %tmp9794, i64 1
+ %tmp9796 = getelementptr inbounds float, float* %tmp9795, i64 1
+ %tmp9797 = getelementptr inbounds float, float* %tmp9796, i64 1
+ %tmp9798 = getelementptr inbounds float, float* %tmp9797, i64 1
+ %tmp9799 = getelementptr inbounds float, float* %tmp9798, i64 1
+ %tmp9800 = getelementptr inbounds float, float* %tmp9799, i64 1
+ %tmp9801 = getelementptr inbounds float, float* %tmp9800, i64 1
+ %tmp9802 = getelementptr inbounds float, float* %tmp9801, i64 1
+ %tmp9803 = getelementptr inbounds float, float* %tmp9802, i64 1
+ %tmp9804 = getelementptr inbounds float, float* %tmp9803, i64 1
+ %tmp9805 = getelementptr inbounds float, float* %tmp9804, i64 1
+ %tmp9806 = getelementptr inbounds float, float* %tmp9805, i64 1
+ %tmp9807 = getelementptr inbounds float, float* %tmp9806, i64 1
+ %tmp9808 = getelementptr inbounds float, float* %tmp9807, i64 1
+ %tmp9809 = getelementptr inbounds float, float* %tmp9808, i64 1
+ %tmp9810 = getelementptr inbounds float, float* %tmp9809, i64 1
+ %tmp9811 = getelementptr inbounds float, float* %tmp9810, i64 1
+ %tmp9812 = getelementptr inbounds float, float* %tmp9811, i64 1
+ %tmp9813 = getelementptr inbounds float, float* %tmp9812, i64 1
+ %tmp9814 = getelementptr inbounds float, float* %tmp9813, i64 1
+ %tmp9815 = getelementptr inbounds float, float* %tmp9814, i64 1
+ %tmp9816 = getelementptr inbounds float, float* %tmp9815, i64 1
+ %tmp9817 = getelementptr inbounds float, float* %tmp9816, i64 1
+ %tmp9818 = getelementptr inbounds float, float* %tmp9817, i64 1
+ %tmp9819 = getelementptr inbounds float, float* %tmp9818, i64 1
+ %tmp9820 = getelementptr inbounds float, float* %tmp9819, i64 1
+ %tmp9821 = getelementptr inbounds float, float* %tmp9820, i64 1
+ %tmp9822 = getelementptr inbounds float, float* %tmp9821, i64 1
+ %tmp9823 = getelementptr inbounds float, float* %tmp9822, i64 1
+ %tmp9824 = getelementptr inbounds float, float* %tmp9823, i64 1
+ %tmp9825 = getelementptr inbounds float, float* %tmp9824, i64 1
+ %tmp9826 = getelementptr inbounds float, float* %tmp9825, i64 1
+ %tmp9827 = getelementptr inbounds float, float* %tmp9826, i64 1
+ %tmp9828 = getelementptr inbounds float, float* %tmp9827, i64 1
+ %tmp9829 = getelementptr inbounds float, float* %tmp9828, i64 1
+ %tmp9830 = getelementptr inbounds float, float* %tmp9829, i64 1
+ %tmp9831 = getelementptr inbounds float, float* %tmp9830, i64 1
+ %tmp9832 = getelementptr inbounds float, float* %tmp9831, i64 1
+ %tmp9833 = getelementptr inbounds float, float* %tmp9832, i64 1
+ %tmp9834 = getelementptr inbounds float, float* %tmp9833, i64 1
+ %tmp9835 = getelementptr inbounds float, float* %tmp9834, i64 1
+ %tmp9836 = getelementptr inbounds float, float* %tmp9835, i64 1
+ %tmp9837 = getelementptr inbounds float, float* %tmp9836, i64 1
+ %tmp9838 = getelementptr inbounds float, float* %tmp9837, i64 1
+ %tmp9839 = getelementptr inbounds float, float* %tmp9838, i64 1
+ %tmp9840 = getelementptr inbounds float, float* %tmp9839, i64 1
+ %tmp9841 = getelementptr inbounds float, float* %tmp9840, i64 1
+ %tmp9842 = getelementptr inbounds float, float* %tmp9841, i64 1
+ %tmp9843 = getelementptr inbounds float, float* %tmp9842, i64 1
+ %tmp9844 = getelementptr inbounds float, float* %tmp9843, i64 1
+ %tmp9845 = getelementptr inbounds float, float* %tmp9844, i64 1
+ %tmp9846 = getelementptr inbounds float, float* %tmp9845, i64 1
+ %tmp9847 = getelementptr inbounds float, float* %tmp9846, i64 1
+ %tmp9848 = getelementptr inbounds float, float* %tmp9847, i64 1
+ %tmp9849 = getelementptr inbounds float, float* %tmp9848, i64 1
+ %tmp9850 = getelementptr inbounds float, float* %tmp9849, i64 1
+ %tmp9851 = getelementptr inbounds float, float* %tmp9850, i64 1
+ %tmp9852 = getelementptr inbounds float, float* %tmp9851, i64 1
+ %tmp9853 = getelementptr inbounds float, float* %tmp9852, i64 1
+ %tmp9854 = getelementptr inbounds float, float* %tmp9853, i64 1
+ %tmp9855 = getelementptr inbounds float, float* %tmp9854, i64 1
+ %tmp9856 = getelementptr inbounds float, float* %tmp9855, i64 1
+ %tmp9857 = getelementptr inbounds float, float* %tmp9856, i64 1
+ %tmp9858 = getelementptr inbounds float, float* %tmp9857, i64 1
+ %tmp9859 = getelementptr inbounds float, float* %tmp9858, i64 1
+ %tmp9860 = getelementptr inbounds float, float* %tmp9859, i64 1
+ %tmp9861 = getelementptr inbounds float, float* %tmp9860, i64 1
+ %tmp9862 = getelementptr inbounds float, float* %tmp9861, i64 1
+ %tmp9863 = getelementptr inbounds float, float* %tmp9862, i64 1
+ %tmp9864 = getelementptr inbounds float, float* %tmp9863, i64 1
+ %tmp9865 = getelementptr inbounds float, float* %tmp9864, i64 1
+ %tmp9866 = getelementptr inbounds float, float* %tmp9865, i64 1
+ %tmp9867 = getelementptr inbounds float, float* %tmp9866, i64 1
+ %tmp9868 = getelementptr inbounds float, float* %tmp9867, i64 1
+ %tmp9869 = getelementptr inbounds float, float* %tmp9868, i64 1
+ %tmp9870 = getelementptr inbounds float, float* %tmp9869, i64 1
+ %tmp9871 = getelementptr inbounds float, float* %tmp9870, i64 1
+ %tmp9872 = getelementptr inbounds float, float* %tmp9871, i64 1
+ %tmp9873 = getelementptr inbounds float, float* %tmp9872, i64 1
+ %tmp9874 = getelementptr inbounds float, float* %tmp9873, i64 1
+ %tmp9875 = getelementptr inbounds float, float* %tmp9874, i64 1
+ %tmp9876 = getelementptr inbounds float, float* %tmp9875, i64 1
+ %tmp9877 = getelementptr inbounds float, float* %tmp9876, i64 1
+ %tmp9878 = getelementptr inbounds float, float* %tmp9877, i64 1
+ %tmp9879 = getelementptr inbounds float, float* %tmp9878, i64 1
+ %tmp9880 = getelementptr inbounds float, float* %tmp9879, i64 1
+ %tmp9881 = getelementptr inbounds float, float* %tmp9880, i64 1
+ %tmp9882 = getelementptr inbounds float, float* %tmp9881, i64 1
+ %tmp9883 = getelementptr inbounds float, float* %tmp9882, i64 1
+ %tmp9884 = getelementptr inbounds float, float* %tmp9883, i64 1
+ %tmp9885 = getelementptr inbounds float, float* %tmp9884, i64 1
+ %tmp9886 = getelementptr inbounds float, float* %tmp9885, i64 1
+ %tmp9887 = getelementptr inbounds float, float* %tmp9886, i64 1
+ %tmp9888 = getelementptr inbounds float, float* %tmp9887, i64 1
+ %tmp9889 = getelementptr inbounds float, float* %tmp9888, i64 1
+ %tmp9890 = getelementptr inbounds float, float* %tmp9889, i64 1
+ %tmp9891 = getelementptr inbounds float, float* %tmp9890, i64 1
+ %tmp9892 = getelementptr inbounds float, float* %tmp9891, i64 1
+ %tmp9893 = getelementptr inbounds float, float* %tmp9892, i64 1
+ %tmp9894 = getelementptr inbounds float, float* %tmp9893, i64 1
+ %tmp9895 = getelementptr inbounds float, float* %tmp9894, i64 1
+ %tmp9896 = getelementptr inbounds float, float* %tmp9895, i64 1
+ %tmp9897 = getelementptr inbounds float, float* %tmp9896, i64 1
+ %tmp9898 = getelementptr inbounds float, float* %tmp9897, i64 1
+ %tmp9899 = getelementptr inbounds float, float* %tmp9898, i64 1
+ %tmp9900 = getelementptr inbounds float, float* %tmp9899, i64 1
+ %tmp9901 = getelementptr inbounds float, float* %tmp9900, i64 1
+ %tmp9902 = getelementptr inbounds float, float* %tmp9901, i64 1
+ %tmp9903 = getelementptr inbounds float, float* %tmp9902, i64 1
+ %tmp9904 = getelementptr inbounds float, float* %tmp9903, i64 1
+ %tmp9905 = getelementptr inbounds float, float* %tmp9904, i64 1
+ %tmp9906 = getelementptr inbounds float, float* %tmp9905, i64 1
+ %tmp9907 = getelementptr inbounds float, float* %tmp9906, i64 1
+ %tmp9908 = getelementptr inbounds float, float* %tmp9907, i64 1
+ %tmp9909 = getelementptr inbounds float, float* %tmp9908, i64 1
+ %tmp9910 = getelementptr inbounds float, float* %tmp9909, i64 1
+ %tmp9911 = getelementptr inbounds float, float* %tmp9910, i64 1
+ %tmp9912 = getelementptr inbounds float, float* %tmp9911, i64 1
+ %tmp9913 = getelementptr inbounds float, float* %tmp9912, i64 1
+ %tmp9914 = getelementptr inbounds float, float* %tmp9913, i64 1
+ %tmp9915 = getelementptr inbounds float, float* %tmp9914, i64 1
+ %tmp9916 = getelementptr inbounds float, float* %tmp9915, i64 1
+ %tmp9917 = getelementptr inbounds float, float* %tmp9916, i64 1
+ %tmp9918 = getelementptr inbounds float, float* %tmp9917, i64 1
+ %tmp9919 = getelementptr inbounds float, float* %tmp9918, i64 1
+ %tmp9920 = getelementptr inbounds float, float* %tmp9919, i64 1
+ %tmp9921 = getelementptr inbounds float, float* %tmp9920, i64 1
+ %tmp9922 = getelementptr inbounds float, float* %tmp9921, i64 1
+ %tmp9923 = getelementptr inbounds float, float* %tmp9922, i64 1
+ %tmp9924 = getelementptr inbounds float, float* %tmp9923, i64 1
+ %tmp9925 = getelementptr inbounds float, float* %tmp9924, i64 1
+ %tmp9926 = getelementptr inbounds float, float* %tmp9925, i64 1
+ %tmp9927 = getelementptr inbounds float, float* %tmp9926, i64 1
+ %tmp9928 = getelementptr inbounds float, float* %tmp9927, i64 1
+ %tmp9929 = getelementptr inbounds float, float* %tmp9928, i64 1
+ %tmp9930 = getelementptr inbounds float, float* %tmp9929, i64 1
+ %tmp9931 = getelementptr inbounds float, float* %tmp9930, i64 1
+ %tmp9932 = getelementptr inbounds float, float* %tmp9931, i64 1
+ %tmp9933 = getelementptr inbounds float, float* %tmp9932, i64 1
+ %tmp9934 = getelementptr inbounds float, float* %tmp9933, i64 1
+ %tmp9935 = getelementptr inbounds float, float* %tmp9934, i64 1
+ %tmp9936 = getelementptr inbounds float, float* %tmp9935, i64 1
+ %tmp9937 = getelementptr inbounds float, float* %tmp9936, i64 1
+ %tmp9938 = getelementptr inbounds float, float* %tmp9937, i64 1
+ %tmp9939 = getelementptr inbounds float, float* %tmp9938, i64 1
+ %tmp9940 = getelementptr inbounds float, float* %tmp9939, i64 1
+ %tmp9941 = getelementptr inbounds float, float* %tmp9940, i64 1
+ %tmp9942 = getelementptr inbounds float, float* %tmp9941, i64 1
+ %tmp9943 = getelementptr inbounds float, float* %tmp9942, i64 1
+ %tmp9944 = getelementptr inbounds float, float* %tmp9943, i64 1
+ %tmp9945 = getelementptr inbounds float, float* %tmp9944, i64 1
+ %tmp9946 = getelementptr inbounds float, float* %tmp9945, i64 1
+ %tmp9947 = getelementptr inbounds float, float* %tmp9946, i64 1
+ %tmp9948 = getelementptr inbounds float, float* %tmp9947, i64 1
+ %tmp9949 = getelementptr inbounds float, float* %tmp9948, i64 1
+ %tmp9950 = getelementptr inbounds float, float* %tmp9949, i64 1
+ %tmp9951 = getelementptr inbounds float, float* %tmp9950, i64 1
+ %tmp9952 = getelementptr inbounds float, float* %tmp9951, i64 1
+ %tmp9953 = getelementptr inbounds float, float* %tmp9952, i64 1
+ %tmp9954 = getelementptr inbounds float, float* %tmp9953, i64 1
+ %tmp9955 = getelementptr inbounds float, float* %tmp9954, i64 1
+ %tmp9956 = getelementptr inbounds float, float* %tmp9955, i64 1
+ %tmp9957 = getelementptr inbounds float, float* %tmp9956, i64 1
+ %tmp9958 = getelementptr inbounds float, float* %tmp9957, i64 1
+ %tmp9959 = getelementptr inbounds float, float* %tmp9958, i64 1
+ %tmp9960 = getelementptr inbounds float, float* %tmp9959, i64 1
+ %tmp9961 = getelementptr inbounds float, float* %tmp9960, i64 1
+ %tmp9962 = getelementptr inbounds float, float* %tmp9961, i64 1
+ %tmp9963 = getelementptr inbounds float, float* %tmp9962, i64 1
+ %tmp9964 = getelementptr inbounds float, float* %tmp9963, i64 1
+ %tmp9965 = getelementptr inbounds float, float* %tmp9964, i64 1
+ %tmp9966 = getelementptr inbounds float, float* %tmp9965, i64 1
+ %tmp9967 = getelementptr inbounds float, float* %tmp9966, i64 1
+ %tmp9968 = getelementptr inbounds float, float* %tmp9967, i64 1
+ %tmp9969 = getelementptr inbounds float, float* %tmp9968, i64 1
+ %tmp9970 = getelementptr inbounds float, float* %tmp9969, i64 1
+ %tmp9971 = getelementptr inbounds float, float* %tmp9970, i64 1
+ %tmp9972 = getelementptr inbounds float, float* %tmp9971, i64 1
+ %tmp9973 = getelementptr inbounds float, float* %tmp9972, i64 1
+ %tmp9974 = getelementptr inbounds float, float* %tmp9973, i64 1
+ %tmp9975 = getelementptr inbounds float, float* %tmp9974, i64 1
+ %tmp9976 = getelementptr inbounds float, float* %tmp9975, i64 1
+ %tmp9977 = getelementptr inbounds float, float* %tmp9976, i64 1
+ %tmp9978 = getelementptr inbounds float, float* %tmp9977, i64 1
+ %tmp9979 = getelementptr inbounds float, float* %tmp9978, i64 1
+ %tmp9980 = getelementptr inbounds float, float* %tmp9979, i64 1
+ %tmp9981 = getelementptr inbounds float, float* %tmp9980, i64 1
+ %tmp9982 = getelementptr inbounds float, float* %tmp9981, i64 1
+ %tmp9983 = getelementptr inbounds float, float* %tmp9982, i64 1
+ %tmp9984 = getelementptr inbounds float, float* %tmp9983, i64 1
+ %tmp9985 = getelementptr inbounds float, float* %tmp9984, i64 1
+ %tmp9986 = getelementptr inbounds float, float* %tmp9985, i64 1
+ %tmp9987 = getelementptr inbounds float, float* %tmp9986, i64 1
+ %tmp9988 = getelementptr inbounds float, float* %tmp9987, i64 1
+ %tmp9989 = getelementptr inbounds float, float* %tmp9988, i64 1
+ %tmp9990 = getelementptr inbounds float, float* %tmp9989, i64 1
+ %tmp9991 = getelementptr inbounds float, float* %tmp9990, i64 1
+ %tmp9992 = getelementptr inbounds float, float* %tmp9991, i64 1
+ %tmp9993 = getelementptr inbounds float, float* %tmp9992, i64 1
+ %tmp9994 = getelementptr inbounds float, float* %tmp9993, i64 1
+ %tmp9995 = getelementptr inbounds float, float* %tmp9994, i64 1
+ %tmp9996 = getelementptr inbounds float, float* %tmp9995, i64 1
+ %tmp9997 = getelementptr inbounds float, float* %tmp9996, i64 1
+ %tmp9998 = getelementptr inbounds float, float* %tmp9997, i64 1
+ %tmp9999 = getelementptr inbounds float, float* %tmp9998, i64 1
+ %tmp10000 = getelementptr inbounds float, float* %tmp9999, i64 1
+ %tmp10001 = getelementptr inbounds float, float* %tmp10000, i64 1
+ %tmp10002 = getelementptr inbounds float, float* %tmp10001, i64 1
+ %tmp10003 = getelementptr inbounds float, float* %tmp10002, i64 1
+ %tmp10004 = getelementptr inbounds float, float* %tmp10003, i64 1
+ %tmp10005 = getelementptr inbounds float, float* %tmp10004, i64 1
+ %tmp10006 = getelementptr inbounds float, float* %tmp10005, i64 1
+ %tmp10007 = getelementptr inbounds float, float* %tmp10006, i64 1
+ %tmp10008 = getelementptr inbounds float, float* %tmp10007, i64 1
+ %tmp10009 = getelementptr inbounds float, float* %tmp10008, i64 1
+ %tmp10010 = getelementptr inbounds float, float* %tmp10009, i64 1
+ %tmp10011 = getelementptr inbounds float, float* %tmp10010, i64 1
+ %tmp10012 = getelementptr inbounds float, float* %tmp10011, i64 1
+ %tmp10013 = getelementptr inbounds float, float* %tmp10012, i64 1
+ %tmp10014 = getelementptr inbounds float, float* %tmp10013, i64 1
+ %tmp10015 = getelementptr inbounds float, float* %tmp10014, i64 1
+ %tmp10016 = getelementptr inbounds float, float* %tmp10015, i64 1
+ %tmp10017 = getelementptr inbounds float, float* %tmp10016, i64 1
+ %tmp10018 = getelementptr inbounds float, float* %tmp10017, i64 1
+ %tmp10019 = getelementptr inbounds float, float* %tmp10018, i64 1
+ %tmp10020 = getelementptr inbounds float, float* %tmp10019, i64 1
+ %tmp10021 = getelementptr inbounds float, float* %tmp10020, i64 1
+ %tmp10022 = getelementptr inbounds float, float* %tmp10021, i64 1
+ %tmp10023 = getelementptr inbounds float, float* %tmp10022, i64 1
+ %tmp10024 = getelementptr inbounds float, float* %tmp10023, i64 1
+ %tmp10025 = getelementptr inbounds float, float* %tmp10024, i64 1
+ %tmp10026 = getelementptr inbounds float, float* %tmp10025, i64 1
+ %tmp10027 = getelementptr inbounds float, float* %tmp10026, i64 1
+ %tmp10028 = getelementptr inbounds float, float* %tmp10027, i64 1
+ %tmp10029 = getelementptr inbounds float, float* %tmp10028, i64 1
+ %tmp10030 = getelementptr inbounds float, float* %tmp10029, i64 1
+ %tmp10031 = getelementptr inbounds float, float* %tmp10030, i64 1
+ %tmp10032 = getelementptr inbounds float, float* %tmp10031, i64 1
+ %tmp10033 = getelementptr inbounds float, float* %tmp10032, i64 1
+ %tmp10034 = getelementptr inbounds float, float* %tmp10033, i64 1
+ %tmp10035 = getelementptr inbounds float, float* %tmp10034, i64 1
+ %tmp10036 = getelementptr inbounds float, float* %tmp10035, i64 1
+ %tmp10037 = getelementptr inbounds float, float* %tmp10036, i64 1
+ %tmp10038 = getelementptr inbounds float, float* %tmp10037, i64 1
+ %tmp10039 = getelementptr inbounds float, float* %tmp10038, i64 1
+ %tmp10040 = getelementptr inbounds float, float* %tmp10039, i64 1
+ %tmp10041 = getelementptr inbounds float, float* %tmp10040, i64 1
+ %tmp10042 = getelementptr inbounds float, float* %tmp10041, i64 1
+ %tmp10043 = getelementptr inbounds float, float* %tmp10042, i64 1
+ %tmp10044 = getelementptr inbounds float, float* %tmp10043, i64 1
+ %tmp10045 = getelementptr inbounds float, float* %tmp10044, i64 1
+ %tmp10046 = getelementptr inbounds float, float* %tmp10045, i64 1
+ %tmp10047 = getelementptr inbounds float, float* %tmp10046, i64 1
+ %tmp10048 = getelementptr inbounds float, float* %tmp10047, i64 1
+ %tmp10049 = getelementptr inbounds float, float* %tmp10048, i64 1
+ %tmp10050 = getelementptr inbounds float, float* %tmp10049, i64 1
+ %tmp10051 = getelementptr inbounds float, float* %tmp10050, i64 1
+ %tmp10052 = getelementptr inbounds float, float* %tmp10051, i64 1
+ %tmp10053 = getelementptr inbounds float, float* %tmp10052, i64 1
+ %tmp10054 = getelementptr inbounds float, float* %tmp10053, i64 1
+ %tmp10055 = getelementptr inbounds float, float* %tmp10054, i64 1
+ %tmp10056 = getelementptr inbounds float, float* %tmp10055, i64 1
+ %tmp10057 = getelementptr inbounds float, float* %tmp10056, i64 1
+ %tmp10058 = getelementptr inbounds float, float* %tmp10057, i64 1
+ %tmp10059 = getelementptr inbounds float, float* %tmp10058, i64 1
+ %tmp10060 = getelementptr inbounds float, float* %tmp10059, i64 1
+ %tmp10061 = getelementptr inbounds float, float* %tmp10060, i64 1
+ %tmp10062 = getelementptr inbounds float, float* %tmp10061, i64 1
+ %tmp10063 = getelementptr inbounds float, float* %tmp10062, i64 1
+ %tmp10064 = getelementptr inbounds float, float* %tmp10063, i64 1
+ %tmp10065 = getelementptr inbounds float, float* %tmp10064, i64 1
+ %tmp10066 = getelementptr inbounds float, float* %tmp10065, i64 1
+ %tmp10067 = getelementptr inbounds float, float* %tmp10066, i64 1
+ %tmp10068 = getelementptr inbounds float, float* %tmp10067, i64 1
+ %tmp10069 = getelementptr inbounds float, float* %tmp10068, i64 1
+ %tmp10070 = getelementptr inbounds float, float* %tmp10069, i64 1
+ %tmp10071 = getelementptr inbounds float, float* %tmp10070, i64 1
+ %tmp10072 = getelementptr inbounds float, float* %tmp10071, i64 1
+ %tmp10073 = getelementptr inbounds float, float* %tmp10072, i64 1
+ %tmp10074 = getelementptr inbounds float, float* %tmp10073, i64 1
+ %tmp10075 = getelementptr inbounds float, float* %tmp10074, i64 1
+ %tmp10076 = getelementptr inbounds float, float* %tmp10075, i64 1
+ %tmp10077 = getelementptr inbounds float, float* %tmp10076, i64 1
+ %tmp10078 = getelementptr inbounds float, float* %tmp10077, i64 1
+ %tmp10079 = getelementptr inbounds float, float* %tmp10078, i64 1
+ %tmp10080 = getelementptr inbounds float, float* %tmp10079, i64 1
+ %tmp10081 = getelementptr inbounds float, float* %tmp10080, i64 1
+ %tmp10082 = getelementptr inbounds float, float* %tmp10081, i64 1
+ %tmp10083 = getelementptr inbounds float, float* %tmp10082, i64 1
+ %tmp10084 = getelementptr inbounds float, float* %tmp10083, i64 1
+ %tmp10085 = getelementptr inbounds float, float* %tmp10084, i64 1
+ %tmp10086 = getelementptr inbounds float, float* %tmp10085, i64 1
+ %tmp10087 = getelementptr inbounds float, float* %tmp10086, i64 1
+ %tmp10088 = getelementptr inbounds float, float* %tmp10087, i64 1
+ %tmp10089 = getelementptr inbounds float, float* %tmp10088, i64 1
+ %tmp10090 = getelementptr inbounds float, float* %tmp10089, i64 1
+ %tmp10091 = getelementptr inbounds float, float* %tmp10090, i64 1
+ %tmp10092 = getelementptr inbounds float, float* %tmp10091, i64 1
+ %tmp10093 = getelementptr inbounds float, float* %tmp10092, i64 1
+ %tmp10094 = getelementptr inbounds float, float* %tmp10093, i64 1
+ %tmp10095 = getelementptr inbounds float, float* %tmp10094, i64 1
+ %tmp10096 = getelementptr inbounds float, float* %tmp10095, i64 1
+ %tmp10097 = getelementptr inbounds float, float* %tmp10096, i64 1
+ %tmp10098 = getelementptr inbounds float, float* %tmp10097, i64 1
+ %tmp10099 = getelementptr inbounds float, float* %tmp10098, i64 1
+ %tmp10100 = getelementptr inbounds float, float* %tmp10099, i64 1
+ %tmp10101 = getelementptr inbounds float, float* %tmp10100, i64 1
+ %tmp10102 = getelementptr inbounds float, float* %tmp10101, i64 1
+ %tmp10103 = getelementptr inbounds float, float* %tmp10102, i64 1
+ %tmp10104 = getelementptr inbounds float, float* %tmp10103, i64 1
+ %tmp10105 = getelementptr inbounds float, float* %tmp10104, i64 1
+ %tmp10106 = getelementptr inbounds float, float* %tmp10105, i64 1
+ %tmp10107 = getelementptr inbounds float, float* %tmp10106, i64 1
+ %tmp10108 = getelementptr inbounds float, float* %tmp10107, i64 1
+ %tmp10109 = getelementptr inbounds float, float* %tmp10108, i64 1
+ %tmp10110 = getelementptr inbounds float, float* %tmp10109, i64 1
+ %tmp10111 = getelementptr inbounds float, float* %tmp10110, i64 1
+ %tmp10112 = getelementptr inbounds float, float* %tmp10111, i64 1
+ %tmp10113 = getelementptr inbounds float, float* %tmp10112, i64 1
+ %tmp10114 = getelementptr inbounds float, float* %tmp10113, i64 1
+ %tmp10115 = getelementptr inbounds float, float* %tmp10114, i64 1
+ %tmp10116 = getelementptr inbounds float, float* %tmp10115, i64 1
+ %tmp10117 = getelementptr inbounds float, float* %tmp10116, i64 1
+ %tmp10118 = getelementptr inbounds float, float* %tmp10117, i64 1
+ %tmp10119 = getelementptr inbounds float, float* %tmp10118, i64 1
+ %tmp10120 = getelementptr inbounds float, float* %tmp10119, i64 1
+ %tmp10121 = getelementptr inbounds float, float* %tmp10120, i64 1
+ %tmp10122 = getelementptr inbounds float, float* %tmp10121, i64 1
+ %tmp10123 = getelementptr inbounds float, float* %tmp10122, i64 1
+ %tmp10124 = getelementptr inbounds float, float* %tmp10123, i64 1
+ %tmp10125 = getelementptr inbounds float, float* %tmp10124, i64 1
+ %tmp10126 = getelementptr inbounds float, float* %tmp10125, i64 1
+ %tmp10127 = getelementptr inbounds float, float* %tmp10126, i64 1
+ %tmp10128 = getelementptr inbounds float, float* %tmp10127, i64 1
+ %tmp10129 = getelementptr inbounds float, float* %tmp10128, i64 1
+ %tmp10130 = getelementptr inbounds float, float* %tmp10129, i64 1
+ %tmp10131 = getelementptr inbounds float, float* %tmp10130, i64 1
+ %tmp10132 = getelementptr inbounds float, float* %tmp10131, i64 1
+ %tmp10133 = getelementptr inbounds float, float* %tmp10132, i64 1
+ %tmp10134 = getelementptr inbounds float, float* %tmp10133, i64 1
+ %tmp10135 = getelementptr inbounds float, float* %tmp10134, i64 1
+ %tmp10136 = getelementptr inbounds float, float* %tmp10135, i64 1
+ %tmp10137 = getelementptr inbounds float, float* %tmp10136, i64 1
+ %tmp10138 = getelementptr inbounds float, float* %tmp10137, i64 1
+ %tmp10139 = getelementptr inbounds float, float* %tmp10138, i64 1
+ %tmp10140 = getelementptr inbounds float, float* %tmp10139, i64 1
+ %tmp10141 = getelementptr inbounds float, float* %tmp10140, i64 1
+ %tmp10142 = getelementptr inbounds float, float* %tmp10141, i64 1
+ %tmp10143 = getelementptr inbounds float, float* %tmp10142, i64 1
+ %tmp10144 = getelementptr inbounds float, float* %tmp10143, i64 1
+ %tmp10145 = getelementptr inbounds float, float* %tmp10144, i64 1
+ %tmp10146 = getelementptr inbounds float, float* %tmp10145, i64 1
+ %tmp10147 = getelementptr inbounds float, float* %tmp10146, i64 1
+ %tmp10148 = getelementptr inbounds float, float* %tmp10147, i64 1
+ %tmp10149 = getelementptr inbounds float, float* %tmp10148, i64 1
+ %tmp10150 = getelementptr inbounds float, float* %tmp10149, i64 1
+ %tmp10151 = getelementptr inbounds float, float* %tmp10150, i64 1
+ %tmp10152 = getelementptr inbounds float, float* %tmp10151, i64 1
+ %tmp10153 = getelementptr inbounds float, float* %tmp10152, i64 1
+ %tmp10154 = getelementptr inbounds float, float* %tmp10153, i64 1
+ %tmp10155 = getelementptr inbounds float, float* %tmp10154, i64 1
+ %tmp10156 = getelementptr inbounds float, float* %tmp10155, i64 1
+ %tmp10157 = getelementptr inbounds float, float* %tmp10156, i64 1
+ %tmp10158 = getelementptr inbounds float, float* %tmp10157, i64 1
+ %tmp10159 = getelementptr inbounds float, float* %tmp10158, i64 1
+ %tmp10160 = getelementptr inbounds float, float* %tmp10159, i64 1
+ %tmp10161 = getelementptr inbounds float, float* %tmp10160, i64 1
+ %tmp10162 = getelementptr inbounds float, float* %tmp10161, i64 1
+ %tmp10163 = getelementptr inbounds float, float* %tmp10162, i64 1
+ %tmp10164 = getelementptr inbounds float, float* %tmp10163, i64 1
+ %tmp10165 = getelementptr inbounds float, float* %tmp10164, i64 1
+ %tmp10166 = getelementptr inbounds float, float* %tmp10165, i64 1
+ %tmp10167 = getelementptr inbounds float, float* %tmp10166, i64 1
+ %tmp10168 = getelementptr inbounds float, float* %tmp10167, i64 1
+ %tmp10169 = getelementptr inbounds float, float* %tmp10168, i64 1
+ %tmp10170 = getelementptr inbounds float, float* %tmp10169, i64 1
+ %tmp10171 = getelementptr inbounds float, float* %tmp10170, i64 1
+ %tmp10172 = getelementptr inbounds float, float* %tmp10171, i64 1
+ %tmp10173 = getelementptr inbounds float, float* %tmp10172, i64 1
+ %tmp10174 = getelementptr inbounds float, float* %tmp10173, i64 1
+ %tmp10175 = getelementptr inbounds float, float* %tmp10174, i64 1
+ %tmp10176 = getelementptr inbounds float, float* %tmp10175, i64 1
+ %tmp10177 = getelementptr inbounds float, float* %tmp10176, i64 1
+ %tmp10178 = getelementptr inbounds float, float* %tmp10177, i64 1
+ %tmp10179 = getelementptr inbounds float, float* %tmp10178, i64 1
+ %tmp10180 = getelementptr inbounds float, float* %tmp10179, i64 1
+ %tmp10181 = getelementptr inbounds float, float* %tmp10180, i64 1
+ %tmp10182 = getelementptr inbounds float, float* %tmp10181, i64 1
+ %tmp10183 = getelementptr inbounds float, float* %tmp10182, i64 1
+ %tmp10184 = getelementptr inbounds float, float* %tmp10183, i64 1
+ %tmp10185 = getelementptr inbounds float, float* %tmp10184, i64 1
+ %tmp10186 = getelementptr inbounds float, float* %tmp10185, i64 1
+ %tmp10187 = getelementptr inbounds float, float* %tmp10186, i64 1
+ %tmp10188 = getelementptr inbounds float, float* %tmp10187, i64 1
+ %tmp10189 = getelementptr inbounds float, float* %tmp10188, i64 1
+ %tmp10190 = getelementptr inbounds float, float* %tmp10189, i64 1
+ %tmp10191 = getelementptr inbounds float, float* %tmp10190, i64 1
+ %tmp10192 = getelementptr inbounds float, float* %tmp10191, i64 1
+ %tmp10193 = getelementptr inbounds float, float* %tmp10192, i64 1
+ %tmp10194 = getelementptr inbounds float, float* %tmp10193, i64 1
+ %tmp10195 = getelementptr inbounds float, float* %tmp10194, i64 1
+ %tmp10196 = getelementptr inbounds float, float* %tmp10195, i64 1
+ %tmp10197 = getelementptr inbounds float, float* %tmp10196, i64 1
+ %tmp10198 = getelementptr inbounds float, float* %tmp10197, i64 1
+ %tmp10199 = getelementptr inbounds float, float* %tmp10198, i64 1
+ %tmp10200 = getelementptr inbounds float, float* %tmp10199, i64 1
+ %tmp10201 = getelementptr inbounds float, float* %tmp10200, i64 1
+ %tmp10202 = getelementptr inbounds float, float* %tmp10201, i64 1
+ %tmp10203 = getelementptr inbounds float, float* %tmp10202, i64 1
+ %tmp10204 = getelementptr inbounds float, float* %tmp10203, i64 1
+ %tmp10205 = getelementptr inbounds float, float* %tmp10204, i64 1
+ %tmp10206 = getelementptr inbounds float, float* %tmp10205, i64 1
+ %tmp10207 = getelementptr inbounds float, float* %tmp10206, i64 1
+ %tmp10208 = getelementptr inbounds float, float* %tmp10207, i64 1
+ %tmp10209 = getelementptr inbounds float, float* %tmp10208, i64 1
+ %tmp10210 = getelementptr inbounds float, float* %tmp10209, i64 1
+ %tmp10211 = getelementptr inbounds float, float* %tmp10210, i64 1
+ %tmp10212 = getelementptr inbounds float, float* %tmp10211, i64 1
+ %tmp10213 = getelementptr inbounds float, float* %tmp10212, i64 1
+ %tmp10214 = getelementptr inbounds float, float* %tmp10213, i64 1
+ %tmp10215 = getelementptr inbounds float, float* %tmp10214, i64 1
+ %tmp10216 = getelementptr inbounds float, float* %tmp10215, i64 1
+ %tmp10217 = getelementptr inbounds float, float* %tmp10216, i64 1
+ %tmp10218 = getelementptr inbounds float, float* %tmp10217, i64 1
+ %tmp10219 = getelementptr inbounds float, float* %tmp10218, i64 1
+ %tmp10220 = getelementptr inbounds float, float* %tmp10219, i64 1
+ %tmp10221 = getelementptr inbounds float, float* %tmp10220, i64 1
+ %tmp10222 = getelementptr inbounds float, float* %tmp10221, i64 1
+ %tmp10223 = getelementptr inbounds float, float* %tmp10222, i64 1
+ %tmp10224 = getelementptr inbounds float, float* %tmp10223, i64 1
+ %tmp10225 = getelementptr inbounds float, float* %tmp10224, i64 1
+ %tmp10226 = getelementptr inbounds float, float* %tmp10225, i64 1
+ %tmp10227 = getelementptr inbounds float, float* %tmp10226, i64 1
+ %tmp10228 = getelementptr inbounds float, float* %tmp10227, i64 1
+ %tmp10229 = getelementptr inbounds float, float* %tmp10228, i64 1
+ %tmp10230 = getelementptr inbounds float, float* %tmp10229, i64 1
+ %tmp10231 = getelementptr inbounds float, float* %tmp10230, i64 1
+ %tmp10232 = getelementptr inbounds float, float* %tmp10231, i64 1
+ %tmp10233 = getelementptr inbounds float, float* %tmp10232, i64 1
+ %tmp10234 = getelementptr inbounds float, float* %tmp10233, i64 1
+ %tmp10235 = getelementptr inbounds float, float* %tmp10234, i64 1
+ %tmp10236 = getelementptr inbounds float, float* %tmp10235, i64 1
+ %tmp10237 = getelementptr inbounds float, float* %tmp10236, i64 1
+ %tmp10238 = getelementptr inbounds float, float* %tmp10237, i64 1
+ %tmp10239 = getelementptr inbounds float, float* %tmp10238, i64 1
+ %tmp10240 = getelementptr inbounds float, float* %tmp10239, i64 1
+ %tmp10241 = getelementptr inbounds float, float* %tmp10240, i64 1
+ %tmp10242 = getelementptr inbounds float, float* %tmp10241, i64 1
+ %tmp10243 = getelementptr inbounds float, float* %tmp10242, i64 1
+ %tmp10244 = getelementptr inbounds float, float* %tmp10243, i64 1
+ %tmp10245 = getelementptr inbounds float, float* %tmp10244, i64 1
+ %tmp10246 = getelementptr inbounds float, float* %tmp10245, i64 1
+ %tmp10247 = getelementptr inbounds float, float* %tmp10246, i64 1
+ %tmp10248 = getelementptr inbounds float, float* %tmp10247, i64 1
+ %tmp10249 = getelementptr inbounds float, float* %tmp10248, i64 1
+ %tmp10250 = getelementptr inbounds float, float* %tmp10249, i64 1
+ %tmp10251 = getelementptr inbounds float, float* %tmp10250, i64 1
+ %tmp10252 = getelementptr inbounds float, float* %tmp10251, i64 1
+ %tmp10253 = getelementptr inbounds float, float* %tmp10252, i64 1
+ %tmp10254 = getelementptr inbounds float, float* %tmp10253, i64 1
+ %tmp10255 = getelementptr inbounds float, float* %tmp10254, i64 1
+ %tmp10256 = getelementptr inbounds float, float* %tmp10255, i64 1
+ %tmp10257 = getelementptr inbounds float, float* %tmp10256, i64 1
+ %tmp10258 = getelementptr inbounds float, float* %tmp10257, i64 1
+ %tmp10259 = getelementptr inbounds float, float* %tmp10258, i64 1
+ %tmp10260 = getelementptr inbounds float, float* %tmp10259, i64 1
+ %tmp10261 = getelementptr inbounds float, float* %tmp10260, i64 1
+ %tmp10262 = getelementptr inbounds float, float* %tmp10261, i64 1
+ %tmp10263 = getelementptr inbounds float, float* %tmp10262, i64 1
+ %tmp10264 = getelementptr inbounds float, float* %tmp10263, i64 1
+ %tmp10265 = getelementptr inbounds float, float* %tmp10264, i64 1
+ %tmp10266 = getelementptr inbounds float, float* %tmp10265, i64 1
+ %tmp10267 = getelementptr inbounds float, float* %tmp10266, i64 1
+ %tmp10268 = getelementptr inbounds float, float* %tmp10267, i64 1
+ %tmp10269 = getelementptr inbounds float, float* %tmp10268, i64 1
+ %tmp10270 = getelementptr inbounds float, float* %tmp10269, i64 1
+ %tmp10271 = getelementptr inbounds float, float* %tmp10270, i64 1
+ %tmp10272 = getelementptr inbounds float, float* %tmp10271, i64 1
+ %tmp10273 = getelementptr inbounds float, float* %tmp10272, i64 1
+ %tmp10274 = getelementptr inbounds float, float* %tmp10273, i64 1
+ %tmp10275 = getelementptr inbounds float, float* %tmp10274, i64 1
+ %tmp10276 = getelementptr inbounds float, float* %tmp10275, i64 1
+ %tmp10277 = getelementptr inbounds float, float* %tmp10276, i64 1
+ %tmp10278 = getelementptr inbounds float, float* %tmp10277, i64 1
+ %tmp10279 = getelementptr inbounds float, float* %tmp10278, i64 1
+ %tmp10280 = getelementptr inbounds float, float* %tmp10279, i64 1
+ %tmp10281 = getelementptr inbounds float, float* %tmp10280, i64 1
+ %tmp10282 = getelementptr inbounds float, float* %tmp10281, i64 1
+ %tmp10283 = getelementptr inbounds float, float* %tmp10282, i64 1
+ %tmp10284 = getelementptr inbounds float, float* %tmp10283, i64 1
+ %tmp10285 = getelementptr inbounds float, float* %tmp10284, i64 1
+ %tmp10286 = getelementptr inbounds float, float* %tmp10285, i64 1
+ %tmp10287 = getelementptr inbounds float, float* %tmp10286, i64 1
+ %tmp10288 = getelementptr inbounds float, float* %tmp10287, i64 1
+ %tmp10289 = getelementptr inbounds float, float* %tmp10288, i64 1
+ %tmp10290 = getelementptr inbounds float, float* %tmp10289, i64 1
+ %tmp10291 = getelementptr inbounds float, float* %tmp10290, i64 1
+ %tmp10292 = getelementptr inbounds float, float* %tmp10291, i64 1
+ %tmp10293 = getelementptr inbounds float, float* %tmp10292, i64 1
+ %tmp10294 = getelementptr inbounds float, float* %tmp10293, i64 1
+ %tmp10295 = getelementptr inbounds float, float* %tmp10294, i64 1
+ %tmp10296 = getelementptr inbounds float, float* %tmp10295, i64 1
+ %tmp10297 = getelementptr inbounds float, float* %tmp10296, i64 1
+ %tmp10298 = getelementptr inbounds float, float* %tmp10297, i64 1
+ %tmp10299 = getelementptr inbounds float, float* %tmp10298, i64 1
+ %tmp10300 = getelementptr inbounds float, float* %tmp10299, i64 1
+ %tmp10301 = getelementptr inbounds float, float* %tmp10300, i64 1
+ %tmp10302 = getelementptr inbounds float, float* %tmp10301, i64 1
+ %tmp10303 = getelementptr inbounds float, float* %tmp10302, i64 1
+ %tmp10304 = getelementptr inbounds float, float* %tmp10303, i64 1
+ %tmp10305 = getelementptr inbounds float, float* %tmp10304, i64 1
+ %tmp10306 = getelementptr inbounds float, float* %tmp10305, i64 1
+ %tmp10307 = getelementptr inbounds float, float* %tmp10306, i64 1
+ %tmp10308 = getelementptr inbounds float, float* %tmp10307, i64 1
+ %tmp10309 = getelementptr inbounds float, float* %tmp10308, i64 1
+ %tmp10310 = getelementptr inbounds float, float* %tmp10309, i64 1
+ %tmp10311 = getelementptr inbounds float, float* %tmp10310, i64 1
+ %tmp10312 = getelementptr inbounds float, float* %tmp10311, i64 1
+ %tmp10313 = getelementptr inbounds float, float* %tmp10312, i64 1
+ %tmp10314 = getelementptr inbounds float, float* %tmp10313, i64 1
+ %tmp10315 = getelementptr inbounds float, float* %tmp10314, i64 1
+ %tmp10316 = getelementptr inbounds float, float* %tmp10315, i64 1
+ %tmp10317 = getelementptr inbounds float, float* %tmp10316, i64 1
+ %tmp10318 = getelementptr inbounds float, float* %tmp10317, i64 1
+ %tmp10319 = getelementptr inbounds float, float* %tmp10318, i64 1
+ %tmp10320 = getelementptr inbounds float, float* %tmp10319, i64 1
+ %tmp10321 = getelementptr inbounds float, float* %tmp10320, i64 1
+ %tmp10322 = getelementptr inbounds float, float* %tmp10321, i64 1
+ %tmp10323 = getelementptr inbounds float, float* %tmp10322, i64 1
+ %tmp10324 = getelementptr inbounds float, float* %tmp10323, i64 1
+ %tmp10325 = getelementptr inbounds float, float* %tmp10324, i64 1
+ %tmp10326 = getelementptr inbounds float, float* %tmp10325, i64 1
+ %tmp10327 = getelementptr inbounds float, float* %tmp10326, i64 1
+ %tmp10328 = getelementptr inbounds float, float* %tmp10327, i64 1
+ %tmp10329 = getelementptr inbounds float, float* %tmp10328, i64 1
+ %tmp10330 = getelementptr inbounds float, float* %tmp10329, i64 1
+ %tmp10331 = getelementptr inbounds float, float* %tmp10330, i64 1
+ %tmp10332 = getelementptr inbounds float, float* %tmp10331, i64 1
+ %tmp10333 = getelementptr inbounds float, float* %tmp10332, i64 1
+ %tmp10334 = getelementptr inbounds float, float* %tmp10333, i64 1
+ %tmp10335 = getelementptr inbounds float, float* %tmp10334, i64 1
+ %tmp10336 = getelementptr inbounds float, float* %tmp10335, i64 1
+ %tmp10337 = getelementptr inbounds float, float* %tmp10336, i64 1
+ %tmp10338 = getelementptr inbounds float, float* %tmp10337, i64 1
+ %tmp10339 = getelementptr inbounds float, float* %tmp10338, i64 1
+ %tmp10340 = getelementptr inbounds float, float* %tmp10339, i64 1
+ %tmp10341 = getelementptr inbounds float, float* %tmp10340, i64 1
+ %tmp10342 = getelementptr inbounds float, float* %tmp10341, i64 1
+ %tmp10343 = getelementptr inbounds float, float* %tmp10342, i64 1
+ %tmp10344 = getelementptr inbounds float, float* %tmp10343, i64 1
+ %tmp10345 = getelementptr inbounds float, float* %tmp10344, i64 1
+ %tmp10346 = getelementptr inbounds float, float* %tmp10345, i64 1
+ %tmp10347 = getelementptr inbounds float, float* %tmp10346, i64 1
+ %tmp10348 = getelementptr inbounds float, float* %tmp10347, i64 1
+ %tmp10349 = getelementptr inbounds float, float* %tmp10348, i64 1
+ %tmp10350 = getelementptr inbounds float, float* %tmp10349, i64 1
+ %tmp10351 = getelementptr inbounds float, float* %tmp10350, i64 1
+ %tmp10352 = getelementptr inbounds float, float* %tmp10351, i64 1
+ %tmp10353 = getelementptr inbounds float, float* %tmp10352, i64 1
+ %tmp10354 = getelementptr inbounds float, float* %tmp10353, i64 1
+ %tmp10355 = getelementptr inbounds float, float* %tmp10354, i64 1
+ %tmp10356 = getelementptr inbounds float, float* %tmp10355, i64 1
+ %tmp10357 = getelementptr inbounds float, float* %tmp10356, i64 1
+ %tmp10358 = getelementptr inbounds float, float* %tmp10357, i64 1
+ %tmp10359 = getelementptr inbounds float, float* %tmp10358, i64 1
+ %tmp10360 = getelementptr inbounds float, float* %tmp10359, i64 1
+ %tmp10361 = getelementptr inbounds float, float* %tmp10360, i64 1
+ %tmp10362 = getelementptr inbounds float, float* %tmp10361, i64 1
+ %tmp10363 = getelementptr inbounds float, float* %tmp10362, i64 1
+ %tmp10364 = getelementptr inbounds float, float* %tmp10363, i64 1
+ %tmp10365 = getelementptr inbounds float, float* %tmp10364, i64 1
+ %tmp10366 = getelementptr inbounds float, float* %tmp10365, i64 1
+ %tmp10367 = getelementptr inbounds float, float* %tmp10366, i64 1
+ %tmp10368 = getelementptr inbounds float, float* %tmp10367, i64 1
+ %tmp10369 = getelementptr inbounds float, float* %tmp10368, i64 1
+ %tmp10370 = getelementptr inbounds float, float* %tmp10369, i64 1
+ %tmp10371 = getelementptr inbounds float, float* %tmp10370, i64 1
+ %tmp10372 = getelementptr inbounds float, float* %tmp10371, i64 1
+ %tmp10373 = getelementptr inbounds float, float* %tmp10372, i64 1
+ %tmp10374 = getelementptr inbounds float, float* %tmp10373, i64 1
+ %tmp10375 = getelementptr inbounds float, float* %tmp10374, i64 1
+ %tmp10376 = getelementptr inbounds float, float* %tmp10375, i64 1
+ %tmp10377 = getelementptr inbounds float, float* %tmp10376, i64 1
+ %tmp10378 = getelementptr inbounds float, float* %tmp10377, i64 1
+ %tmp10379 = getelementptr inbounds float, float* %tmp10378, i64 1
+ %tmp10380 = getelementptr inbounds float, float* %tmp10379, i64 1
+ %tmp10381 = getelementptr inbounds float, float* %tmp10380, i64 1
+ %tmp10382 = getelementptr inbounds float, float* %tmp10381, i64 1
+ %tmp10383 = getelementptr inbounds float, float* %tmp10382, i64 1
+ %tmp10384 = getelementptr inbounds float, float* %tmp10383, i64 1
+ %tmp10385 = getelementptr inbounds float, float* %tmp10384, i64 1
+ %tmp10386 = getelementptr inbounds float, float* %tmp10385, i64 1
+ %tmp10387 = getelementptr inbounds float, float* %tmp10386, i64 1
+ %tmp10388 = getelementptr inbounds float, float* %tmp10387, i64 1
+ %tmp10389 = getelementptr inbounds float, float* %tmp10388, i64 1
+ %tmp10390 = getelementptr inbounds float, float* %tmp10389, i64 1
+ %tmp10391 = getelementptr inbounds float, float* %tmp10390, i64 1
+ %tmp10392 = getelementptr inbounds float, float* %tmp10391, i64 1
+ %tmp10393 = getelementptr inbounds float, float* %tmp10392, i64 1
+ %tmp10394 = getelementptr inbounds float, float* %tmp10393, i64 1
+ %tmp10395 = getelementptr inbounds float, float* %tmp10394, i64 1
+ %tmp10396 = getelementptr inbounds float, float* %tmp10395, i64 1
+ %tmp10397 = getelementptr inbounds float, float* %tmp10396, i64 1
+ %tmp10398 = getelementptr inbounds float, float* %tmp10397, i64 1
+ %tmp10399 = getelementptr inbounds float, float* %tmp10398, i64 1
+ %tmp10400 = getelementptr inbounds float, float* %tmp10399, i64 1
+ %tmp10401 = getelementptr inbounds float, float* %tmp10400, i64 1
+ %tmp10402 = getelementptr inbounds float, float* %tmp10401, i64 1
+ %tmp10403 = getelementptr inbounds float, float* %tmp10402, i64 1
+ %tmp10404 = getelementptr inbounds float, float* %tmp10403, i64 1
+ %tmp10405 = getelementptr inbounds float, float* %tmp10404, i64 1
+ %tmp10406 = getelementptr inbounds float, float* %tmp10405, i64 1
+ %tmp10407 = getelementptr inbounds float, float* %tmp10406, i64 1
+ %tmp10408 = getelementptr inbounds float, float* %tmp10407, i64 1
+ %tmp10409 = getelementptr inbounds float, float* %tmp10408, i64 1
+ %tmp10410 = getelementptr inbounds float, float* %tmp10409, i64 1
+ %tmp10411 = getelementptr inbounds float, float* %tmp10410, i64 1
+ %tmp10412 = getelementptr inbounds float, float* %tmp10411, i64 1
+ %tmp10413 = getelementptr inbounds float, float* %tmp10412, i64 1
+ %tmp10414 = getelementptr inbounds float, float* %tmp10413, i64 1
+ %tmp10415 = getelementptr inbounds float, float* %tmp10414, i64 1
+ %tmp10416 = getelementptr inbounds float, float* %tmp10415, i64 1
+ %tmp10417 = getelementptr inbounds float, float* %tmp10416, i64 1
+ %tmp10418 = getelementptr inbounds float, float* %tmp10417, i64 1
+ %tmp10419 = getelementptr inbounds float, float* %tmp10418, i64 1
+ %tmp10420 = getelementptr inbounds float, float* %tmp10419, i64 1
+ %tmp10421 = getelementptr inbounds float, float* %tmp10420, i64 1
+ %tmp10422 = getelementptr inbounds float, float* %tmp10421, i64 1
+ %tmp10423 = getelementptr inbounds float, float* %tmp10422, i64 1
+ %tmp10424 = getelementptr inbounds float, float* %tmp10423, i64 1
+ %tmp10425 = getelementptr inbounds float, float* %tmp10424, i64 1
+ %tmp10426 = getelementptr inbounds float, float* %tmp10425, i64 1
+ %tmp10427 = getelementptr inbounds float, float* %tmp10426, i64 1
+ %tmp10428 = getelementptr inbounds float, float* %tmp10427, i64 1
+ %tmp10429 = getelementptr inbounds float, float* %tmp10428, i64 1
+ %tmp10430 = getelementptr inbounds float, float* %tmp10429, i64 1
+ %tmp10431 = getelementptr inbounds float, float* %tmp10430, i64 1
+ %tmp10432 = getelementptr inbounds float, float* %tmp10431, i64 1
+ %tmp10433 = getelementptr inbounds float, float* %tmp10432, i64 1
+ %tmp10434 = getelementptr inbounds float, float* %tmp10433, i64 1
+ %tmp10435 = getelementptr inbounds float, float* %tmp10434, i64 1
+ %tmp10436 = getelementptr inbounds float, float* %tmp10435, i64 1
+ %tmp10437 = getelementptr inbounds float, float* %tmp10436, i64 1
+ %tmp10438 = getelementptr inbounds float, float* %tmp10437, i64 1
+ %tmp10439 = getelementptr inbounds float, float* %tmp10438, i64 1
+ %tmp10440 = getelementptr inbounds float, float* %tmp10439, i64 1
+ %tmp10441 = getelementptr inbounds float, float* %tmp10440, i64 1
+ %tmp10442 = getelementptr inbounds float, float* %tmp10441, i64 1
+ %tmp10443 = getelementptr inbounds float, float* %tmp10442, i64 1
+ %tmp10444 = getelementptr inbounds float, float* %tmp10443, i64 1
+ %tmp10445 = getelementptr inbounds float, float* %tmp10444, i64 1
+ %tmp10446 = getelementptr inbounds float, float* %tmp10445, i64 1
+ %tmp10447 = getelementptr inbounds float, float* %tmp10446, i64 1
+ %tmp10448 = getelementptr inbounds float, float* %tmp10447, i64 1
+ %tmp10449 = getelementptr inbounds float, float* %tmp10448, i64 1
+ %tmp10450 = getelementptr inbounds float, float* %tmp10449, i64 1
+ %tmp10451 = getelementptr inbounds float, float* %tmp10450, i64 1
+ %tmp10452 = getelementptr inbounds float, float* %tmp10451, i64 1
+ %tmp10453 = getelementptr inbounds float, float* %tmp10452, i64 1
+ %tmp10454 = getelementptr inbounds float, float* %tmp10453, i64 1
+ %tmp10455 = getelementptr inbounds float, float* %tmp10454, i64 1
+ %tmp10456 = getelementptr inbounds float, float* %tmp10455, i64 1
+ %tmp10457 = getelementptr inbounds float, float* %tmp10456, i64 1
+ %tmp10458 = getelementptr inbounds float, float* %tmp10457, i64 1
+ %tmp10459 = getelementptr inbounds float, float* %tmp10458, i64 1
+ %tmp10460 = getelementptr inbounds float, float* %tmp10459, i64 1
+ %tmp10461 = getelementptr inbounds float, float* %tmp10460, i64 1
+ %tmp10462 = getelementptr inbounds float, float* %tmp10461, i64 1
+ %tmp10463 = getelementptr inbounds float, float* %tmp10462, i64 1
+ %tmp10464 = getelementptr inbounds float, float* %tmp10463, i64 1
+ %tmp10465 = getelementptr inbounds float, float* %tmp10464, i64 1
+ %tmp10466 = getelementptr inbounds float, float* %tmp10465, i64 1
+ %tmp10467 = getelementptr inbounds float, float* %tmp10466, i64 1
+ %tmp10468 = getelementptr inbounds float, float* %tmp10467, i64 1
+ %tmp10469 = getelementptr inbounds float, float* %tmp10468, i64 1
+ %tmp10470 = getelementptr inbounds float, float* %tmp10469, i64 1
+ %tmp10471 = getelementptr inbounds float, float* %tmp10470, i64 1
+ %tmp10472 = getelementptr inbounds float, float* %tmp10471, i64 1
+ %tmp10473 = getelementptr inbounds float, float* %tmp10472, i64 1
+ %tmp10474 = getelementptr inbounds float, float* %tmp10473, i64 1
+ %tmp10475 = getelementptr inbounds float, float* %tmp10474, i64 1
+ %tmp10476 = getelementptr inbounds float, float* %tmp10475, i64 1
+ %tmp10477 = getelementptr inbounds float, float* %tmp10476, i64 1
+ %tmp10478 = getelementptr inbounds float, float* %tmp10477, i64 1
+ %tmp10479 = getelementptr inbounds float, float* %tmp10478, i64 1
+ %tmp10480 = getelementptr inbounds float, float* %tmp10479, i64 1
+ %tmp10481 = getelementptr inbounds float, float* %tmp10480, i64 1
+ %tmp10482 = getelementptr inbounds float, float* %tmp10481, i64 1
+ %tmp10483 = getelementptr inbounds float, float* %tmp10482, i64 1
+ %tmp10484 = getelementptr inbounds float, float* %tmp10483, i64 1
+ %tmp10485 = getelementptr inbounds float, float* %tmp10484, i64 1
+ %tmp10486 = getelementptr inbounds float, float* %tmp10485, i64 1
+ %tmp10487 = getelementptr inbounds float, float* %tmp10486, i64 1
+ %tmp10488 = getelementptr inbounds float, float* %tmp10487, i64 1
+ %tmp10489 = getelementptr inbounds float, float* %tmp10488, i64 1
+ %tmp10490 = getelementptr inbounds float, float* %tmp10489, i64 1
+ %tmp10491 = getelementptr inbounds float, float* %tmp10490, i64 1
+ %tmp10492 = getelementptr inbounds float, float* %tmp10491, i64 1
+ %tmp10493 = getelementptr inbounds float, float* %tmp10492, i64 1
+ %tmp10494 = getelementptr inbounds float, float* %tmp10493, i64 1
+ %tmp10495 = getelementptr inbounds float, float* %tmp10494, i64 1
+ %tmp10496 = getelementptr inbounds float, float* %tmp10495, i64 1
+ %tmp10497 = getelementptr inbounds float, float* %tmp10496, i64 1
+ %tmp10498 = getelementptr inbounds float, float* %tmp10497, i64 1
+ %tmp10499 = getelementptr inbounds float, float* %tmp10498, i64 1
+ %tmp10500 = getelementptr inbounds float, float* %tmp10499, i64 1
+ %tmp10501 = getelementptr inbounds float, float* %tmp10500, i64 1
+ %tmp10502 = getelementptr inbounds float, float* %tmp10501, i64 1
+ %tmp10503 = getelementptr inbounds float, float* %tmp10502, i64 1
+ %tmp10504 = getelementptr inbounds float, float* %tmp10503, i64 1
+ %tmp10505 = getelementptr inbounds float, float* %tmp10504, i64 1
+ %tmp10506 = getelementptr inbounds float, float* %tmp10505, i64 1
+ %tmp10507 = getelementptr inbounds float, float* %tmp10506, i64 1
+ %tmp10508 = getelementptr inbounds float, float* %tmp10507, i64 1
+ %tmp10509 = getelementptr inbounds float, float* %tmp10508, i64 1
+ %tmp10510 = getelementptr inbounds float, float* %tmp10509, i64 1
+ %tmp10511 = getelementptr inbounds float, float* %tmp10510, i64 1
+ %tmp10512 = getelementptr inbounds float, float* %tmp10511, i64 1
+ %tmp10513 = getelementptr inbounds float, float* %tmp10512, i64 1
+ %tmp10514 = getelementptr inbounds float, float* %tmp10513, i64 1
+ %tmp10515 = getelementptr inbounds float, float* %tmp10514, i64 1
+ %tmp10516 = getelementptr inbounds float, float* %tmp10515, i64 1
+ %tmp10517 = getelementptr inbounds float, float* %tmp10516, i64 1
+ %tmp10518 = getelementptr inbounds float, float* %tmp10517, i64 1
+ %tmp10519 = getelementptr inbounds float, float* %tmp10518, i64 1
+ %tmp10520 = getelementptr inbounds float, float* %tmp10519, i64 1
+ %tmp10521 = getelementptr inbounds float, float* %tmp10520, i64 1
+ %tmp10522 = getelementptr inbounds float, float* %tmp10521, i64 1
+ %tmp10523 = getelementptr inbounds float, float* %tmp10522, i64 1
+ %tmp10524 = getelementptr inbounds float, float* %tmp10523, i64 1
+ %tmp10525 = getelementptr inbounds float, float* %tmp10524, i64 1
+ %tmp10526 = getelementptr inbounds float, float* %tmp10525, i64 1
+ %tmp10527 = getelementptr inbounds float, float* %tmp10526, i64 1
+ %tmp10528 = getelementptr inbounds float, float* %tmp10527, i64 1
+ %tmp10529 = getelementptr inbounds float, float* %tmp10528, i64 1
+ %tmp10530 = getelementptr inbounds float, float* %tmp10529, i64 1
+ %tmp10531 = getelementptr inbounds float, float* %tmp10530, i64 1
+ %tmp10532 = getelementptr inbounds float, float* %tmp10531, i64 1
+ %tmp10533 = getelementptr inbounds float, float* %tmp10532, i64 1
+ %tmp10534 = getelementptr inbounds float, float* %tmp10533, i64 1
+ %tmp10535 = getelementptr inbounds float, float* %tmp10534, i64 1
+ %tmp10536 = getelementptr inbounds float, float* %tmp10535, i64 1
+ %tmp10537 = getelementptr inbounds float, float* %tmp10536, i64 1
+ %tmp10538 = getelementptr inbounds float, float* %tmp10537, i64 1
+ %tmp10539 = getelementptr inbounds float, float* %tmp10538, i64 1
+ %tmp10540 = getelementptr inbounds float, float* %tmp10539, i64 1
+ %tmp10541 = getelementptr inbounds float, float* %tmp10540, i64 1
+ %tmp10542 = getelementptr inbounds float, float* %tmp10541, i64 1
+ %tmp10543 = getelementptr inbounds float, float* %tmp10542, i64 1
+ %tmp10544 = getelementptr inbounds float, float* %tmp10543, i64 1
+ %tmp10545 = getelementptr inbounds float, float* %tmp10544, i64 1
+ %tmp10546 = getelementptr inbounds float, float* %tmp10545, i64 1
+ %tmp10547 = getelementptr inbounds float, float* %tmp10546, i64 1
+ %tmp10548 = getelementptr inbounds float, float* %tmp10547, i64 1
+ %tmp10549 = getelementptr inbounds float, float* %tmp10548, i64 1
+ %tmp10550 = getelementptr inbounds float, float* %tmp10549, i64 1
+ %tmp10551 = getelementptr inbounds float, float* %tmp10550, i64 1
+ %tmp10552 = getelementptr inbounds float, float* %tmp10551, i64 1
+ %tmp10553 = getelementptr inbounds float, float* %tmp10552, i64 1
+ %tmp10554 = getelementptr inbounds float, float* %tmp10553, i64 1
+ %tmp10555 = getelementptr inbounds float, float* %tmp10554, i64 1
+ %tmp10556 = getelementptr inbounds float, float* %tmp10555, i64 1
+ %tmp10557 = getelementptr inbounds float, float* %tmp10556, i64 1
+ %tmp10558 = getelementptr inbounds float, float* %tmp10557, i64 1
+ %tmp10559 = getelementptr inbounds float, float* %tmp10558, i64 1
+ %tmp10560 = getelementptr inbounds float, float* %tmp10559, i64 1
+ %tmp10561 = getelementptr inbounds float, float* %tmp10560, i64 1
+ %tmp10562 = getelementptr inbounds float, float* %tmp10561, i64 1
+ %tmp10563 = getelementptr inbounds float, float* %tmp10562, i64 1
+ %tmp10564 = getelementptr inbounds float, float* %tmp10563, i64 1
+ %tmp10565 = getelementptr inbounds float, float* %tmp10564, i64 1
+ %tmp10566 = getelementptr inbounds float, float* %tmp10565, i64 1
+ %tmp10567 = getelementptr inbounds float, float* %tmp10566, i64 1
+ %tmp10568 = getelementptr inbounds float, float* %tmp10567, i64 1
+ %tmp10569 = getelementptr inbounds float, float* %tmp10568, i64 1
+ %tmp10570 = getelementptr inbounds float, float* %tmp10569, i64 1
+ %tmp10571 = getelementptr inbounds float, float* %tmp10570, i64 1
+ %tmp10572 = getelementptr inbounds float, float* %tmp10571, i64 1
+ %tmp10573 = getelementptr inbounds float, float* %tmp10572, i64 1
+ %tmp10574 = getelementptr inbounds float, float* %tmp10573, i64 1
+ %tmp10575 = getelementptr inbounds float, float* %tmp10574, i64 1
+ %tmp10576 = getelementptr inbounds float, float* %tmp10575, i64 1
+ %tmp10577 = getelementptr inbounds float, float* %tmp10576, i64 1
+ %tmp10578 = getelementptr inbounds float, float* %tmp10577, i64 1
+ %tmp10579 = getelementptr inbounds float, float* %tmp10578, i64 1
+ %tmp10580 = getelementptr inbounds float, float* %tmp10579, i64 1
+ %tmp10581 = getelementptr inbounds float, float* %tmp10580, i64 1
+ %tmp10582 = getelementptr inbounds float, float* %tmp10581, i64 1
+ %tmp10583 = getelementptr inbounds float, float* %tmp10582, i64 1
+ %tmp10584 = getelementptr inbounds float, float* %tmp10583, i64 1
+ %tmp10585 = getelementptr inbounds float, float* %tmp10584, i64 1
+ %tmp10586 = getelementptr inbounds float, float* %tmp10585, i64 1
+ %tmp10587 = getelementptr inbounds float, float* %tmp10586, i64 1
+ %tmp10588 = getelementptr inbounds float, float* %tmp10587, i64 1
+ %tmp10589 = getelementptr inbounds float, float* %tmp10588, i64 1
+ %tmp10590 = getelementptr inbounds float, float* %tmp10589, i64 1
+ %tmp10591 = getelementptr inbounds float, float* %tmp10590, i64 1
+ %tmp10592 = getelementptr inbounds float, float* %tmp10591, i64 1
+ %tmp10593 = getelementptr inbounds float, float* %tmp10592, i64 1
+ %tmp10594 = getelementptr inbounds float, float* %tmp10593, i64 1
+ %tmp10595 = getelementptr inbounds float, float* %tmp10594, i64 1
+ %tmp10596 = getelementptr inbounds float, float* %tmp10595, i64 1
+ %tmp10597 = getelementptr inbounds float, float* %tmp10596, i64 1
+ %tmp10598 = getelementptr inbounds float, float* %tmp10597, i64 1
+ %tmp10599 = getelementptr inbounds float, float* %tmp10598, i64 1
+ %tmp10600 = getelementptr inbounds float, float* %tmp10599, i64 1
+ %tmp10601 = getelementptr inbounds float, float* %tmp10600, i64 1
+ %tmp10602 = getelementptr inbounds float, float* %tmp10601, i64 1
+ %tmp10603 = getelementptr inbounds float, float* %tmp10602, i64 1
+ %tmp10604 = getelementptr inbounds float, float* %tmp10603, i64 1
+ %tmp10605 = getelementptr inbounds float, float* %tmp10604, i64 1
+ %tmp10606 = getelementptr inbounds float, float* %tmp10605, i64 1
+ %tmp10607 = getelementptr inbounds float, float* %tmp10606, i64 1
+ %tmp10608 = getelementptr inbounds float, float* %tmp10607, i64 1
+ %tmp10609 = getelementptr inbounds float, float* %tmp10608, i64 1
+ %tmp10610 = getelementptr inbounds float, float* %tmp10609, i64 1
+ %tmp10611 = getelementptr inbounds float, float* %tmp10610, i64 1
+ %tmp10612 = getelementptr inbounds float, float* %tmp10611, i64 1
+ %tmp10613 = getelementptr inbounds float, float* %tmp10612, i64 1
+ %tmp10614 = getelementptr inbounds float, float* %tmp10613, i64 1
+ %tmp10615 = getelementptr inbounds float, float* %tmp10614, i64 1
+ %tmp10616 = getelementptr inbounds float, float* %tmp10615, i64 1
+ %tmp10617 = getelementptr inbounds float, float* %tmp10616, i64 1
+ %tmp10618 = getelementptr inbounds float, float* %tmp10617, i64 1
+ %tmp10619 = getelementptr inbounds float, float* %tmp10618, i64 1
+ %tmp10620 = getelementptr inbounds float, float* %tmp10619, i64 1
+ %tmp10621 = getelementptr inbounds float, float* %tmp10620, i64 1
+ %tmp10622 = getelementptr inbounds float, float* %tmp10621, i64 1
+ %tmp10623 = getelementptr inbounds float, float* %tmp10622, i64 1
+ %tmp10624 = getelementptr inbounds float, float* %tmp10623, i64 1
+ %tmp10625 = getelementptr inbounds float, float* %tmp10624, i64 1
+ %tmp10626 = getelementptr inbounds float, float* %tmp10625, i64 1
+ %tmp10627 = getelementptr inbounds float, float* %tmp10626, i64 1
+ %tmp10628 = getelementptr inbounds float, float* %tmp10627, i64 1
+ %tmp10629 = getelementptr inbounds float, float* %tmp10628, i64 1
+ %tmp10630 = getelementptr inbounds float, float* %tmp10629, i64 1
+ %tmp10631 = getelementptr inbounds float, float* %tmp10630, i64 1
+ %tmp10632 = getelementptr inbounds float, float* %tmp10631, i64 1
+ %tmp10633 = getelementptr inbounds float, float* %tmp10632, i64 1
+ %tmp10634 = getelementptr inbounds float, float* %tmp10633, i64 1
+ %tmp10635 = getelementptr inbounds float, float* %tmp10634, i64 1
+ %tmp10636 = getelementptr inbounds float, float* %tmp10635, i64 1
+ %tmp10637 = getelementptr inbounds float, float* %tmp10636, i64 1
+ %tmp10638 = getelementptr inbounds float, float* %tmp10637, i64 1
+ %tmp10639 = getelementptr inbounds float, float* %tmp10638, i64 1
+ %tmp10640 = getelementptr inbounds float, float* %tmp10639, i64 1
+ %tmp10641 = getelementptr inbounds float, float* %tmp10640, i64 1
+ %tmp10642 = getelementptr inbounds float, float* %tmp10641, i64 1
+ %tmp10643 = getelementptr inbounds float, float* %tmp10642, i64 1
+ %tmp10644 = getelementptr inbounds float, float* %tmp10643, i64 1
+ %tmp10645 = getelementptr inbounds float, float* %tmp10644, i64 1
+ %tmp10646 = getelementptr inbounds float, float* %tmp10645, i64 1
+ %tmp10647 = getelementptr inbounds float, float* %tmp10646, i64 1
+ %tmp10648 = getelementptr inbounds float, float* %tmp10647, i64 1
+ %tmp10649 = getelementptr inbounds float, float* %tmp10648, i64 1
+ %tmp10650 = getelementptr inbounds float, float* %tmp10649, i64 1
+ %tmp10651 = getelementptr inbounds float, float* %tmp10650, i64 1
+ %tmp10652 = getelementptr inbounds float, float* %tmp10651, i64 1
+ %tmp10653 = getelementptr inbounds float, float* %tmp10652, i64 1
+ %tmp10654 = getelementptr inbounds float, float* %tmp10653, i64 1
+ %tmp10655 = getelementptr inbounds float, float* %tmp10654, i64 1
+ %tmp10656 = getelementptr inbounds float, float* %tmp10655, i64 1
+ %tmp10657 = getelementptr inbounds float, float* %tmp10656, i64 1
+ %tmp10658 = getelementptr inbounds float, float* %tmp10657, i64 1
+ %tmp10659 = getelementptr inbounds float, float* %tmp10658, i64 1
+ %tmp10660 = getelementptr inbounds float, float* %tmp10659, i64 1
+ %tmp10661 = getelementptr inbounds float, float* %tmp10660, i64 1
+ %tmp10662 = getelementptr inbounds float, float* %tmp10661, i64 1
+ %tmp10663 = getelementptr inbounds float, float* %tmp10662, i64 1
+ %tmp10664 = getelementptr inbounds float, float* %tmp10663, i64 1
+ %tmp10665 = getelementptr inbounds float, float* %tmp10664, i64 1
+ %tmp10666 = getelementptr inbounds float, float* %tmp10665, i64 1
+ %tmp10667 = getelementptr inbounds float, float* %tmp10666, i64 1
+ %tmp10668 = getelementptr inbounds float, float* %tmp10667, i64 1
+ %tmp10669 = getelementptr inbounds float, float* %tmp10668, i64 1
+ %tmp10670 = getelementptr inbounds float, float* %tmp10669, i64 1
+ %tmp10671 = getelementptr inbounds float, float* %tmp10670, i64 1
+ %tmp10672 = getelementptr inbounds float, float* %tmp10671, i64 1
+ %tmp10673 = getelementptr inbounds float, float* %tmp10672, i64 1
+ %tmp10674 = getelementptr inbounds float, float* %tmp10673, i64 1
+ %tmp10675 = getelementptr inbounds float, float* %tmp10674, i64 1
+ %tmp10676 = getelementptr inbounds float, float* %tmp10675, i64 1
+ %tmp10677 = getelementptr inbounds float, float* %tmp10676, i64 1
+ %tmp10678 = getelementptr inbounds float, float* %tmp10677, i64 1
+ %tmp10679 = getelementptr inbounds float, float* %tmp10678, i64 1
+ %tmp10680 = getelementptr inbounds float, float* %tmp10679, i64 1
+ %tmp10681 = getelementptr inbounds float, float* %tmp10680, i64 1
+ %tmp10682 = getelementptr inbounds float, float* %tmp10681, i64 1
+ %tmp10683 = getelementptr inbounds float, float* %tmp10682, i64 1
+ %tmp10684 = getelementptr inbounds float, float* %tmp10683, i64 1
+ %tmp10685 = getelementptr inbounds float, float* %tmp10684, i64 1
+ %tmp10686 = getelementptr inbounds float, float* %tmp10685, i64 1
+ %tmp10687 = getelementptr inbounds float, float* %tmp10686, i64 1
+ %tmp10688 = getelementptr inbounds float, float* %tmp10687, i64 1
+ %tmp10689 = getelementptr inbounds float, float* %tmp10688, i64 1
+ %tmp10690 = getelementptr inbounds float, float* %tmp10689, i64 1
+ %tmp10691 = getelementptr inbounds float, float* %tmp10690, i64 1
+ %tmp10692 = getelementptr inbounds float, float* %tmp10691, i64 1
+ %tmp10693 = getelementptr inbounds float, float* %tmp10692, i64 1
+ %tmp10694 = getelementptr inbounds float, float* %tmp10693, i64 1
+ %tmp10695 = getelementptr inbounds float, float* %tmp10694, i64 1
+ %tmp10696 = getelementptr inbounds float, float* %tmp10695, i64 1
+ %tmp10697 = getelementptr inbounds float, float* %tmp10696, i64 1
+ %tmp10698 = getelementptr inbounds float, float* %tmp10697, i64 1
+ %tmp10699 = getelementptr inbounds float, float* %tmp10698, i64 1
+ %tmp10700 = getelementptr inbounds float, float* %tmp10699, i64 1
+ %tmp10701 = getelementptr inbounds float, float* %tmp10700, i64 1
+ %tmp10702 = getelementptr inbounds float, float* %tmp10701, i64 1
+ %tmp10703 = getelementptr inbounds float, float* %tmp10702, i64 1
+ %tmp10704 = getelementptr inbounds float, float* %tmp10703, i64 1
+ %tmp10705 = getelementptr inbounds float, float* %tmp10704, i64 1
+ %tmp10706 = getelementptr inbounds float, float* %tmp10705, i64 1
+ %tmp10707 = getelementptr inbounds float, float* %tmp10706, i64 1
+ %tmp10708 = getelementptr inbounds float, float* %tmp10707, i64 1
+ %tmp10709 = getelementptr inbounds float, float* %tmp10708, i64 1
+ %tmp10710 = getelementptr inbounds float, float* %tmp10709, i64 1
+ %tmp10711 = getelementptr inbounds float, float* %tmp10710, i64 1
+ %tmp10712 = getelementptr inbounds float, float* %tmp10711, i64 1
+ %tmp10713 = getelementptr inbounds float, float* %tmp10712, i64 1
+ %tmp10714 = getelementptr inbounds float, float* %tmp10713, i64 1
+ %tmp10715 = getelementptr inbounds float, float* %tmp10714, i64 1
+ %tmp10716 = getelementptr inbounds float, float* %tmp10715, i64 1
+ %tmp10717 = getelementptr inbounds float, float* %tmp10716, i64 1
+ %tmp10718 = getelementptr inbounds float, float* %tmp10717, i64 1
+ %tmp10719 = getelementptr inbounds float, float* %tmp10718, i64 1
+ %tmp10720 = getelementptr inbounds float, float* %tmp10719, i64 1
+ %tmp10721 = getelementptr inbounds float, float* %tmp10720, i64 1
+ %tmp10722 = getelementptr inbounds float, float* %tmp10721, i64 1
+ %tmp10723 = getelementptr inbounds float, float* %tmp10722, i64 1
+ %tmp10724 = getelementptr inbounds float, float* %tmp10723, i64 1
+ %tmp10725 = getelementptr inbounds float, float* %tmp10724, i64 1
+ %tmp10726 = getelementptr inbounds float, float* %tmp10725, i64 1
+ %tmp10727 = getelementptr inbounds float, float* %tmp10726, i64 1
+ %tmp10728 = getelementptr inbounds float, float* %tmp10727, i64 1
+ %tmp10729 = getelementptr inbounds float, float* %tmp10728, i64 1
+ %tmp10730 = getelementptr inbounds float, float* %tmp10729, i64 1
+ %tmp10731 = getelementptr inbounds float, float* %tmp10730, i64 1
+ %tmp10732 = getelementptr inbounds float, float* %tmp10731, i64 1
+ %tmp10733 = getelementptr inbounds float, float* %tmp10732, i64 1
+ %tmp10734 = getelementptr inbounds float, float* %tmp10733, i64 1
+ %tmp10735 = getelementptr inbounds float, float* %tmp10734, i64 1
+ %tmp10736 = getelementptr inbounds float, float* %tmp10735, i64 1
+ %tmp10737 = getelementptr inbounds float, float* %tmp10736, i64 1
+ %tmp10738 = getelementptr inbounds float, float* %tmp10737, i64 1
+ %tmp10739 = getelementptr inbounds float, float* %tmp10738, i64 1
+ %tmp10740 = getelementptr inbounds float, float* %tmp10739, i64 1
+ %tmp10741 = getelementptr inbounds float, float* %tmp10740, i64 1
+ %tmp10742 = getelementptr inbounds float, float* %tmp10741, i64 1
+ %tmp10743 = getelementptr inbounds float, float* %tmp10742, i64 1
+ %tmp10744 = getelementptr inbounds float, float* %tmp10743, i64 1
+ %tmp10745 = getelementptr inbounds float, float* %tmp10744, i64 1
+ %tmp10746 = getelementptr inbounds float, float* %tmp10745, i64 1
+ %tmp10747 = getelementptr inbounds float, float* %tmp10746, i64 1
+ %tmp10748 = getelementptr inbounds float, float* %tmp10747, i64 1
+ %tmp10749 = getelementptr inbounds float, float* %tmp10748, i64 1
+ %tmp10750 = getelementptr inbounds float, float* %tmp10749, i64 1
+ %tmp10751 = getelementptr inbounds float, float* %tmp10750, i64 1
+ %tmp10752 = getelementptr inbounds float, float* %tmp10751, i64 1
+ %tmp10753 = getelementptr inbounds float, float* %tmp10752, i64 1
+ %tmp10754 = getelementptr inbounds float, float* %tmp10753, i64 1
+ %tmp10755 = getelementptr inbounds float, float* %tmp10754, i64 1
+ %tmp10756 = getelementptr inbounds float, float* %tmp10755, i64 1
+ %tmp10757 = getelementptr inbounds float, float* %tmp10756, i64 1
+ %tmp10758 = getelementptr inbounds float, float* %tmp10757, i64 1
+ %tmp10759 = getelementptr inbounds float, float* %tmp10758, i64 1
+ %tmp10760 = getelementptr inbounds float, float* %tmp10759, i64 1
+ %tmp10761 = getelementptr inbounds float, float* %tmp10760, i64 1
+ %tmp10762 = getelementptr inbounds float, float* %tmp10761, i64 1
+ %tmp10763 = getelementptr inbounds float, float* %tmp10762, i64 1
+ %tmp10764 = getelementptr inbounds float, float* %tmp10763, i64 1
+ %tmp10765 = getelementptr inbounds float, float* %tmp10764, i64 1
+ %tmp10766 = getelementptr inbounds float, float* %tmp10765, i64 1
+ %tmp10767 = getelementptr inbounds float, float* %tmp10766, i64 1
+ %tmp10768 = getelementptr inbounds float, float* %tmp10767, i64 1
+ %tmp10769 = getelementptr inbounds float, float* %tmp10768, i64 1
+ %tmp10770 = getelementptr inbounds float, float* %tmp10769, i64 1
+ %tmp10771 = getelementptr inbounds float, float* %tmp10770, i64 1
+ %tmp10772 = getelementptr inbounds float, float* %tmp10771, i64 1
+ %tmp10773 = getelementptr inbounds float, float* %tmp10772, i64 1
+ %tmp10774 = getelementptr inbounds float, float* %tmp10773, i64 1
+ %tmp10775 = getelementptr inbounds float, float* %tmp10774, i64 1
+ %tmp10776 = getelementptr inbounds float, float* %tmp10775, i64 1
+ %tmp10777 = getelementptr inbounds float, float* %tmp10776, i64 1
+ %tmp10778 = getelementptr inbounds float, float* %tmp10777, i64 1
+ %tmp10779 = getelementptr inbounds float, float* %tmp10778, i64 1
+ %tmp10780 = getelementptr inbounds float, float* %tmp10779, i64 1
+ %tmp10781 = getelementptr inbounds float, float* %tmp10780, i64 1
+ %tmp10782 = getelementptr inbounds float, float* %tmp10781, i64 1
+ %tmp10783 = getelementptr inbounds float, float* %tmp10782, i64 1
+ %tmp10784 = getelementptr inbounds float, float* %tmp10783, i64 1
+ %tmp10785 = getelementptr inbounds float, float* %tmp10784, i64 1
+ %tmp10786 = getelementptr inbounds float, float* %tmp10785, i64 1
+ %tmp10787 = getelementptr inbounds float, float* %tmp10786, i64 1
+ %tmp10788 = getelementptr inbounds float, float* %tmp10787, i64 1
+ %tmp10789 = getelementptr inbounds float, float* %tmp10788, i64 1
+ %tmp10790 = getelementptr inbounds float, float* %tmp10789, i64 1
+ %tmp10791 = getelementptr inbounds float, float* %tmp10790, i64 1
+ %tmp10792 = getelementptr inbounds float, float* %tmp10791, i64 1
+ %tmp10793 = getelementptr inbounds float, float* %tmp10792, i64 1
+ %tmp10794 = getelementptr inbounds float, float* %tmp10793, i64 1
+ %tmp10795 = getelementptr inbounds float, float* %tmp10794, i64 1
+ %tmp10796 = getelementptr inbounds float, float* %tmp10795, i64 1
+ %tmp10797 = getelementptr inbounds float, float* %tmp10796, i64 1
+ %tmp10798 = getelementptr inbounds float, float* %tmp10797, i64 1
+ %tmp10799 = getelementptr inbounds float, float* %tmp10798, i64 1
+ %tmp10800 = getelementptr inbounds float, float* %tmp10799, i64 1
+ %tmp10801 = getelementptr inbounds float, float* %tmp10800, i64 1
+ %tmp10802 = getelementptr inbounds float, float* %tmp10801, i64 1
+ %tmp10803 = getelementptr inbounds float, float* %tmp10802, i64 1
+ %tmp10804 = getelementptr inbounds float, float* %tmp10803, i64 1
+ %tmp10805 = getelementptr inbounds float, float* %tmp10804, i64 1
+ %tmp10806 = getelementptr inbounds float, float* %tmp10805, i64 1
+ %tmp10807 = getelementptr inbounds float, float* %tmp10806, i64 1
+ %tmp10808 = getelementptr inbounds float, float* %tmp10807, i64 1
+ %tmp10809 = getelementptr inbounds float, float* %tmp10808, i64 1
+ %tmp10810 = getelementptr inbounds float, float* %tmp10809, i64 1
+ %tmp10811 = getelementptr inbounds float, float* %tmp10810, i64 1
+ %tmp10812 = getelementptr inbounds float, float* %tmp10811, i64 1
+ %tmp10813 = getelementptr inbounds float, float* %tmp10812, i64 1
+ %tmp10814 = getelementptr inbounds float, float* %tmp10813, i64 1
+ %tmp10815 = getelementptr inbounds float, float* %tmp10814, i64 1
+ %tmp10816 = getelementptr inbounds float, float* %tmp10815, i64 1
+ %tmp10817 = getelementptr inbounds float, float* %tmp10816, i64 1
+ %tmp10818 = getelementptr inbounds float, float* %tmp10817, i64 1
+ %tmp10819 = getelementptr inbounds float, float* %tmp10818, i64 1
+ %tmp10820 = getelementptr inbounds float, float* %tmp10819, i64 1
+ %tmp10821 = getelementptr inbounds float, float* %tmp10820, i64 1
+ %tmp10822 = getelementptr inbounds float, float* %tmp10821, i64 1
+ %tmp10823 = getelementptr inbounds float, float* %tmp10822, i64 1
+ %tmp10824 = getelementptr inbounds float, float* %tmp10823, i64 1
+ %tmp10825 = getelementptr inbounds float, float* %tmp10824, i64 1
+ %tmp10826 = getelementptr inbounds float, float* %tmp10825, i64 1
+ %tmp10827 = getelementptr inbounds float, float* %tmp10826, i64 1
+ %tmp10828 = getelementptr inbounds float, float* %tmp10827, i64 1
+ %tmp10829 = getelementptr inbounds float, float* %tmp10828, i64 1
+ %tmp10830 = getelementptr inbounds float, float* %tmp10829, i64 1
+ %tmp10831 = getelementptr inbounds float, float* %tmp10830, i64 1
+ %tmp10832 = getelementptr inbounds float, float* %tmp10831, i64 1
+ %tmp10833 = getelementptr inbounds float, float* %tmp10832, i64 1
+ %tmp10834 = getelementptr inbounds float, float* %tmp10833, i64 1
+ %tmp10835 = getelementptr inbounds float, float* %tmp10834, i64 1
+ %tmp10836 = getelementptr inbounds float, float* %tmp10835, i64 1
+ %tmp10837 = getelementptr inbounds float, float* %tmp10836, i64 1
+ %tmp10838 = getelementptr inbounds float, float* %tmp10837, i64 1
+ %tmp10839 = getelementptr inbounds float, float* %tmp10838, i64 1
+ %tmp10840 = getelementptr inbounds float, float* %tmp10839, i64 1
+ %tmp10841 = getelementptr inbounds float, float* %tmp10840, i64 1
+ %tmp10842 = getelementptr inbounds float, float* %tmp10841, i64 1
+ %tmp10843 = getelementptr inbounds float, float* %tmp10842, i64 1
+ %tmp10844 = getelementptr inbounds float, float* %tmp10843, i64 1
+ %tmp10845 = getelementptr inbounds float, float* %tmp10844, i64 1
+ %tmp10846 = getelementptr inbounds float, float* %tmp10845, i64 1
+ %tmp10847 = getelementptr inbounds float, float* %tmp10846, i64 1
+ %tmp10848 = getelementptr inbounds float, float* %tmp10847, i64 1
+ %tmp10849 = getelementptr inbounds float, float* %tmp10848, i64 1
+ %tmp10850 = getelementptr inbounds float, float* %tmp10849, i64 1
+ %tmp10851 = getelementptr inbounds float, float* %tmp10850, i64 1
+ %tmp10852 = getelementptr inbounds float, float* %tmp10851, i64 1
+ %tmp10853 = getelementptr inbounds float, float* %tmp10852, i64 1
+ %tmp10854 = getelementptr inbounds float, float* %tmp10853, i64 1
+ %tmp10855 = getelementptr inbounds float, float* %tmp10854, i64 1
+ %tmp10856 = getelementptr inbounds float, float* %tmp10855, i64 1
+ %tmp10857 = getelementptr inbounds float, float* %tmp10856, i64 1
+ %tmp10858 = getelementptr inbounds float, float* %tmp10857, i64 1
+ %tmp10859 = getelementptr inbounds float, float* %tmp10858, i64 1
+ %tmp10860 = getelementptr inbounds float, float* %tmp10859, i64 1
+ %tmp10861 = getelementptr inbounds float, float* %tmp10860, i64 1
+ %tmp10862 = getelementptr inbounds float, float* %tmp10861, i64 1
+ %tmp10863 = getelementptr inbounds float, float* %tmp10862, i64 1
+ %tmp10864 = getelementptr inbounds float, float* %tmp10863, i64 1
+ %tmp10865 = getelementptr inbounds float, float* %tmp10864, i64 1
+ %tmp10866 = getelementptr inbounds float, float* %tmp10865, i64 1
+ %tmp10867 = getelementptr inbounds float, float* %tmp10866, i64 1
+ %tmp10868 = getelementptr inbounds float, float* %tmp10867, i64 1
+ %tmp10869 = getelementptr inbounds float, float* %tmp10868, i64 1
+ %tmp10870 = getelementptr inbounds float, float* %tmp10869, i64 1
+ %tmp10871 = getelementptr inbounds float, float* %tmp10870, i64 1
+ %tmp10872 = getelementptr inbounds float, float* %tmp10871, i64 1
+ %tmp10873 = getelementptr inbounds float, float* %tmp10872, i64 1
+ %tmp10874 = getelementptr inbounds float, float* %tmp10873, i64 1
+ %tmp10875 = getelementptr inbounds float, float* %tmp10874, i64 1
+ %tmp10876 = getelementptr inbounds float, float* %tmp10875, i64 1
+ %tmp10877 = getelementptr inbounds float, float* %tmp10876, i64 1
+ %tmp10878 = getelementptr inbounds float, float* %tmp10877, i64 1
+ %tmp10879 = getelementptr inbounds float, float* %tmp10878, i64 1
+ %tmp10880 = getelementptr inbounds float, float* %tmp10879, i64 1
+ %tmp10881 = getelementptr inbounds float, float* %tmp10880, i64 1
+ %tmp10882 = getelementptr inbounds float, float* %tmp10881, i64 1
+ %tmp10883 = getelementptr inbounds float, float* %tmp10882, i64 1
+ %tmp10884 = getelementptr inbounds float, float* %tmp10883, i64 1
+ %tmp10885 = getelementptr inbounds float, float* %tmp10884, i64 1
+ %tmp10886 = getelementptr inbounds float, float* %tmp10885, i64 1
+ %tmp10887 = getelementptr inbounds float, float* %tmp10886, i64 1
+ %tmp10888 = getelementptr inbounds float, float* %tmp10887, i64 1
+ %tmp10889 = getelementptr inbounds float, float* %tmp10888, i64 1
+ %tmp10890 = getelementptr inbounds float, float* %tmp10889, i64 1
+ %tmp10891 = getelementptr inbounds float, float* %tmp10890, i64 1
+ %tmp10892 = getelementptr inbounds float, float* %tmp10891, i64 1
+ %tmp10893 = getelementptr inbounds float, float* %tmp10892, i64 1
+ %tmp10894 = getelementptr inbounds float, float* %tmp10893, i64 1
+ %tmp10895 = getelementptr inbounds float, float* %tmp10894, i64 1
+ %tmp10896 = getelementptr inbounds float, float* %tmp10895, i64 1
+ %tmp10897 = getelementptr inbounds float, float* %tmp10896, i64 1
+ %tmp10898 = getelementptr inbounds float, float* %tmp10897, i64 1
+ %tmp10899 = getelementptr inbounds float, float* %tmp10898, i64 1
+ %tmp10900 = getelementptr inbounds float, float* %tmp10899, i64 1
+ %tmp10901 = getelementptr inbounds float, float* %tmp10900, i64 1
+ %tmp10902 = getelementptr inbounds float, float* %tmp10901, i64 1
+ %tmp10903 = getelementptr inbounds float, float* %tmp10902, i64 1
+ %tmp10904 = getelementptr inbounds float, float* %tmp10903, i64 1
+ %tmp10905 = getelementptr inbounds float, float* %tmp10904, i64 1
+ %tmp10906 = getelementptr inbounds float, float* %tmp10905, i64 1
+ %tmp10907 = getelementptr inbounds float, float* %tmp10906, i64 1
+ %tmp10908 = getelementptr inbounds float, float* %tmp10907, i64 1
+ %tmp10909 = getelementptr inbounds float, float* %tmp10908, i64 1
+ %tmp10910 = getelementptr inbounds float, float* %tmp10909, i64 1
+ %tmp10911 = getelementptr inbounds float, float* %tmp10910, i64 1
+ %tmp10912 = getelementptr inbounds float, float* %tmp10911, i64 1
+ %tmp10913 = getelementptr inbounds float, float* %tmp10912, i64 1
+ %tmp10914 = getelementptr inbounds float, float* %tmp10913, i64 1
+ %tmp10915 = getelementptr inbounds float, float* %tmp10914, i64 1
+ %tmp10916 = getelementptr inbounds float, float* %tmp10915, i64 1
+ %tmp10917 = getelementptr inbounds float, float* %tmp10916, i64 1
+ %tmp10918 = getelementptr inbounds float, float* %tmp10917, i64 1
+ %tmp10919 = getelementptr inbounds float, float* %tmp10918, i64 1
+ %tmp10920 = getelementptr inbounds float, float* %tmp10919, i64 1
+ %tmp10921 = getelementptr inbounds float, float* %tmp10920, i64 1
+ %tmp10922 = getelementptr inbounds float, float* %tmp10921, i64 1
+ %tmp10923 = getelementptr inbounds float, float* %tmp10922, i64 1
+ %tmp10924 = getelementptr inbounds float, float* %tmp10923, i64 1
+ %tmp10925 = getelementptr inbounds float, float* %tmp10924, i64 1
+ %tmp10926 = getelementptr inbounds float, float* %tmp10925, i64 1
+ %tmp10927 = getelementptr inbounds float, float* %tmp10926, i64 1
+ %tmp10928 = getelementptr inbounds float, float* %tmp10927, i64 1
+ %tmp10929 = getelementptr inbounds float, float* %tmp10928, i64 1
+ %tmp10930 = getelementptr inbounds float, float* %tmp10929, i64 1
+ %tmp10931 = getelementptr inbounds float, float* %tmp10930, i64 1
+ %tmp10932 = getelementptr inbounds float, float* %tmp10931, i64 1
+ %tmp10933 = getelementptr inbounds float, float* %tmp10932, i64 1
+ %tmp10934 = getelementptr inbounds float, float* %tmp10933, i64 1
+ %tmp10935 = getelementptr inbounds float, float* %tmp10934, i64 1
+ %tmp10936 = getelementptr inbounds float, float* %tmp10935, i64 1
+ %tmp10937 = getelementptr inbounds float, float* %tmp10936, i64 1
+ %tmp10938 = getelementptr inbounds float, float* %tmp10937, i64 1
+ %tmp10939 = getelementptr inbounds float, float* %tmp10938, i64 1
+ %tmp10940 = getelementptr inbounds float, float* %tmp10939, i64 1
+ %tmp10941 = getelementptr inbounds float, float* %tmp10940, i64 1
+ %tmp10942 = getelementptr inbounds float, float* %tmp10941, i64 1
+ %tmp10943 = getelementptr inbounds float, float* %tmp10942, i64 1
+ %tmp10944 = getelementptr inbounds float, float* %tmp10943, i64 1
+ %tmp10945 = getelementptr inbounds float, float* %tmp10944, i64 1
+ %tmp10946 = getelementptr inbounds float, float* %tmp10945, i64 1
+ %tmp10947 = getelementptr inbounds float, float* %tmp10946, i64 1
+ %tmp10948 = getelementptr inbounds float, float* %tmp10947, i64 1
+ %tmp10949 = getelementptr inbounds float, float* %tmp10948, i64 1
+ %tmp10950 = getelementptr inbounds float, float* %tmp10949, i64 1
+ %tmp10951 = getelementptr inbounds float, float* %tmp10950, i64 1
+ %tmp10952 = getelementptr inbounds float, float* %tmp10951, i64 1
+ %tmp10953 = getelementptr inbounds float, float* %tmp10952, i64 1
+ %tmp10954 = getelementptr inbounds float, float* %tmp10953, i64 1
+ %tmp10955 = getelementptr inbounds float, float* %tmp10954, i64 1
+ %tmp10956 = getelementptr inbounds float, float* %tmp10955, i64 1
+ %tmp10957 = getelementptr inbounds float, float* %tmp10956, i64 1
+ %tmp10958 = getelementptr inbounds float, float* %tmp10957, i64 1
+ %tmp10959 = getelementptr inbounds float, float* %tmp10958, i64 1
+ %tmp10960 = getelementptr inbounds float, float* %tmp10959, i64 1
+ %tmp10961 = getelementptr inbounds float, float* %tmp10960, i64 1
+ %tmp10962 = getelementptr inbounds float, float* %tmp10961, i64 1
+ %tmp10963 = getelementptr inbounds float, float* %tmp10962, i64 1
+ %tmp10964 = getelementptr inbounds float, float* %tmp10963, i64 1
+ %tmp10965 = getelementptr inbounds float, float* %tmp10964, i64 1
+ %tmp10966 = getelementptr inbounds float, float* %tmp10965, i64 1
+ %tmp10967 = getelementptr inbounds float, float* %tmp10966, i64 1
+ %tmp10968 = getelementptr inbounds float, float* %tmp10967, i64 1
+ %tmp10969 = getelementptr inbounds float, float* %tmp10968, i64 1
+ %tmp10970 = getelementptr inbounds float, float* %tmp10969, i64 1
+ %tmp10971 = getelementptr inbounds float, float* %tmp10970, i64 1
+ %tmp10972 = getelementptr inbounds float, float* %tmp10971, i64 1
+ %tmp10973 = getelementptr inbounds float, float* %tmp10972, i64 1
+ %tmp10974 = getelementptr inbounds float, float* %tmp10973, i64 1
+ %tmp10975 = getelementptr inbounds float, float* %tmp10974, i64 1
+ %tmp10976 = getelementptr inbounds float, float* %tmp10975, i64 1
+ %tmp10977 = getelementptr inbounds float, float* %tmp10976, i64 1
+ %tmp10978 = getelementptr inbounds float, float* %tmp10977, i64 1
+ %tmp10979 = getelementptr inbounds float, float* %tmp10978, i64 1
+ %tmp10980 = getelementptr inbounds float, float* %tmp10979, i64 1
+ %tmp10981 = getelementptr inbounds float, float* %tmp10980, i64 1
+ %tmp10982 = getelementptr inbounds float, float* %tmp10981, i64 1
+ %tmp10983 = getelementptr inbounds float, float* %tmp10982, i64 1
+ %tmp10984 = getelementptr inbounds float, float* %tmp10983, i64 1
+ %tmp10985 = getelementptr inbounds float, float* %tmp10984, i64 1
+ %tmp10986 = getelementptr inbounds float, float* %tmp10985, i64 1
+ %tmp10987 = getelementptr inbounds float, float* %tmp10986, i64 1
+ %tmp10988 = getelementptr inbounds float, float* %tmp10987, i64 1
+ %tmp10989 = getelementptr inbounds float, float* %tmp10988, i64 1
+ %tmp10990 = getelementptr inbounds float, float* %tmp10989, i64 1
+ %tmp10991 = getelementptr inbounds float, float* %tmp10990, i64 1
+ %tmp10992 = getelementptr inbounds float, float* %tmp10991, i64 1
+ %tmp10993 = getelementptr inbounds float, float* %tmp10992, i64 1
+ %tmp10994 = getelementptr inbounds float, float* %tmp10993, i64 1
+ %tmp10995 = getelementptr inbounds float, float* %tmp10994, i64 1
+ %tmp10996 = getelementptr inbounds float, float* %tmp10995, i64 1
+ %tmp10997 = getelementptr inbounds float, float* %tmp10996, i64 1
+ %tmp10998 = getelementptr inbounds float, float* %tmp10997, i64 1
+ %tmp10999 = getelementptr inbounds float, float* %tmp10998, i64 1
+ %tmp11000 = getelementptr inbounds float, float* %tmp10999, i64 1
+ %tmp11001 = getelementptr inbounds float, float* %tmp11000, i64 1
+ %tmp11002 = getelementptr inbounds float, float* %tmp11001, i64 1
+ %tmp11003 = getelementptr inbounds float, float* %tmp11002, i64 1
+ %tmp11004 = getelementptr inbounds float, float* %tmp11003, i64 1
+ %tmp11005 = getelementptr inbounds float, float* %tmp11004, i64 1
+ %tmp11006 = getelementptr inbounds float, float* %tmp11005, i64 1
+ %tmp11007 = getelementptr inbounds float, float* %tmp11006, i64 1
+ %tmp11008 = getelementptr inbounds float, float* %tmp11007, i64 1
+ %tmp11009 = getelementptr inbounds float, float* %tmp11008, i64 1
+ %tmp11010 = getelementptr inbounds float, float* %tmp11009, i64 1
+ %tmp11011 = getelementptr inbounds float, float* %tmp11010, i64 1
+ %tmp11012 = getelementptr inbounds float, float* %tmp11011, i64 1
+ %tmp11013 = getelementptr inbounds float, float* %tmp11012, i64 1
+ %tmp11014 = getelementptr inbounds float, float* %tmp11013, i64 1
+ %tmp11015 = getelementptr inbounds float, float* %tmp11014, i64 1
+ %tmp11016 = getelementptr inbounds float, float* %tmp11015, i64 1
+ %tmp11017 = getelementptr inbounds float, float* %tmp11016, i64 1
+ %tmp11018 = getelementptr inbounds float, float* %tmp11017, i64 1
+ %tmp11019 = getelementptr inbounds float, float* %tmp11018, i64 1
+ %tmp11020 = getelementptr inbounds float, float* %tmp11019, i64 1
+ %tmp11021 = getelementptr inbounds float, float* %tmp11020, i64 1
+ %tmp11022 = getelementptr inbounds float, float* %tmp11021, i64 1
+ %tmp11023 = getelementptr inbounds float, float* %tmp11022, i64 1
+ %tmp11024 = getelementptr inbounds float, float* %tmp11023, i64 1
+ %tmp11025 = getelementptr inbounds float, float* %tmp11024, i64 1
+ %tmp11026 = getelementptr inbounds float, float* %tmp11025, i64 1
+ %tmp11027 = getelementptr inbounds float, float* %tmp11026, i64 1
+ %tmp11028 = getelementptr inbounds float, float* %tmp11027, i64 1
+ %tmp11029 = getelementptr inbounds float, float* %tmp11028, i64 1
+ %tmp11030 = getelementptr inbounds float, float* %tmp11029, i64 1
+ %tmp11031 = getelementptr inbounds float, float* %tmp11030, i64 1
+ %tmp11032 = getelementptr inbounds float, float* %tmp11031, i64 1
+ %tmp11033 = getelementptr inbounds float, float* %tmp11032, i64 1
+ %tmp11034 = getelementptr inbounds float, float* %tmp11033, i64 1
+ %tmp11035 = getelementptr inbounds float, float* %tmp11034, i64 1
+ %tmp11036 = getelementptr inbounds float, float* %tmp11035, i64 1
+ %tmp11037 = getelementptr inbounds float, float* %tmp11036, i64 1
+ %tmp11038 = getelementptr inbounds float, float* %tmp11037, i64 1
+ %tmp11039 = getelementptr inbounds float, float* %tmp11038, i64 1
+ %tmp11040 = getelementptr inbounds float, float* %tmp11039, i64 1
+ %tmp11041 = getelementptr inbounds float, float* %tmp11040, i64 1
+ %tmp11042 = getelementptr inbounds float, float* %tmp11041, i64 1
+ %tmp11043 = getelementptr inbounds float, float* %tmp11042, i64 1
+ %tmp11044 = getelementptr inbounds float, float* %tmp11043, i64 1
+ %tmp11045 = getelementptr inbounds float, float* %tmp11044, i64 1
+ %tmp11046 = getelementptr inbounds float, float* %tmp11045, i64 1
+ %tmp11047 = getelementptr inbounds float, float* %tmp11046, i64 1
+ %tmp11048 = getelementptr inbounds float, float* %tmp11047, i64 1
+ %tmp11049 = getelementptr inbounds float, float* %tmp11048, i64 1
+ %tmp11050 = getelementptr inbounds float, float* %tmp11049, i64 1
+ %tmp11051 = getelementptr inbounds float, float* %tmp11050, i64 1
+ %tmp11052 = getelementptr inbounds float, float* %tmp11051, i64 1
+ %tmp11053 = getelementptr inbounds float, float* %tmp11052, i64 1
+ %tmp11054 = getelementptr inbounds float, float* %tmp11053, i64 1
+ %tmp11055 = getelementptr inbounds float, float* %tmp11054, i64 1
+ %tmp11056 = getelementptr inbounds float, float* %tmp11055, i64 1
+ %tmp11057 = getelementptr inbounds float, float* %tmp11056, i64 1
+ %tmp11058 = getelementptr inbounds float, float* %tmp11057, i64 1
+ %tmp11059 = getelementptr inbounds float, float* %tmp11058, i64 1
+ %tmp11060 = getelementptr inbounds float, float* %tmp11059, i64 1
+ %tmp11061 = getelementptr inbounds float, float* %tmp11060, i64 1
+ %tmp11062 = getelementptr inbounds float, float* %tmp11061, i64 1
+ %tmp11063 = getelementptr inbounds float, float* %tmp11062, i64 1
+ %tmp11064 = getelementptr inbounds float, float* %tmp11063, i64 1
+ %tmp11065 = getelementptr inbounds float, float* %tmp11064, i64 1
+ %tmp11066 = getelementptr inbounds float, float* %tmp11065, i64 1
+ %tmp11067 = getelementptr inbounds float, float* %tmp11066, i64 1
+ %tmp11068 = getelementptr inbounds float, float* %tmp11067, i64 1
+ %tmp11069 = getelementptr inbounds float, float* %tmp11068, i64 1
+ %tmp11070 = getelementptr inbounds float, float* %tmp11069, i64 1
+ %tmp11071 = getelementptr inbounds float, float* %tmp11070, i64 1
+ %tmp11072 = getelementptr inbounds float, float* %tmp11071, i64 1
+ %tmp11073 = getelementptr inbounds float, float* %tmp11072, i64 1
+ %tmp11074 = getelementptr inbounds float, float* %tmp11073, i64 1
+ %tmp11075 = getelementptr inbounds float, float* %tmp11074, i64 1
+ %tmp11076 = getelementptr inbounds float, float* %tmp11075, i64 1
+ %tmp11077 = getelementptr inbounds float, float* %tmp11076, i64 1
+ %tmp11078 = getelementptr inbounds float, float* %tmp11077, i64 1
+ %tmp11079 = getelementptr inbounds float, float* %tmp11078, i64 1
+ %tmp11080 = getelementptr inbounds float, float* %tmp11079, i64 1
+ %tmp11081 = getelementptr inbounds float, float* %tmp11080, i64 1
+ %tmp11082 = getelementptr inbounds float, float* %tmp11081, i64 1
+ %tmp11083 = getelementptr inbounds float, float* %tmp11082, i64 1
+ %tmp11084 = getelementptr inbounds float, float* %tmp11083, i64 1
+ %tmp11085 = getelementptr inbounds float, float* %tmp11084, i64 1
+ %tmp11086 = getelementptr inbounds float, float* %tmp11085, i64 1
+ %tmp11087 = getelementptr inbounds float, float* %tmp11086, i64 1
+ %tmp11088 = getelementptr inbounds float, float* %tmp11087, i64 1
+ %tmp11089 = getelementptr inbounds float, float* %tmp11088, i64 1
+ %tmp11090 = getelementptr inbounds float, float* %tmp11089, i64 1
+ %tmp11091 = getelementptr inbounds float, float* %tmp11090, i64 1
+ %tmp11092 = getelementptr inbounds float, float* %tmp11091, i64 1
+ %tmp11093 = getelementptr inbounds float, float* %tmp11092, i64 1
+ %tmp11094 = getelementptr inbounds float, float* %tmp11093, i64 1
+ %tmp11095 = getelementptr inbounds float, float* %tmp11094, i64 1
+ %tmp11096 = getelementptr inbounds float, float* %tmp11095, i64 1
+ %tmp11097 = getelementptr inbounds float, float* %tmp11096, i64 1
+ %tmp11098 = getelementptr inbounds float, float* %tmp11097, i64 1
+ %tmp11099 = getelementptr inbounds float, float* %tmp11098, i64 1
+ %tmp11100 = getelementptr inbounds float, float* %tmp11099, i64 1
+ %tmp11101 = getelementptr inbounds float, float* %tmp11100, i64 1
+ %tmp11102 = getelementptr inbounds float, float* %tmp11101, i64 1
+ %tmp11103 = getelementptr inbounds float, float* %tmp11102, i64 1
+ %tmp11104 = getelementptr inbounds float, float* %tmp11103, i64 1
+ %tmp11105 = getelementptr inbounds float, float* %tmp11104, i64 1
+ %tmp11106 = getelementptr inbounds float, float* %tmp11105, i64 1
+ %tmp11107 = getelementptr inbounds float, float* %tmp11106, i64 1
+ %tmp11108 = getelementptr inbounds float, float* %tmp11107, i64 1
+ %tmp11109 = getelementptr inbounds float, float* %tmp11108, i64 1
+ %tmp11110 = getelementptr inbounds float, float* %tmp11109, i64 1
+ %tmp11111 = getelementptr inbounds float, float* %tmp11110, i64 1
+ %tmp11112 = getelementptr inbounds float, float* %tmp11111, i64 1
+ %tmp11113 = getelementptr inbounds float, float* %tmp11112, i64 1
+ %tmp11114 = getelementptr inbounds float, float* %tmp11113, i64 1
+ %tmp11115 = getelementptr inbounds float, float* %tmp11114, i64 1
+ %tmp11116 = getelementptr inbounds float, float* %tmp11115, i64 1
+ %tmp11117 = getelementptr inbounds float, float* %tmp11116, i64 1
+ %tmp11118 = getelementptr inbounds float, float* %tmp11117, i64 1
+ %tmp11119 = getelementptr inbounds float, float* %tmp11118, i64 1
+ %tmp11120 = getelementptr inbounds float, float* %tmp11119, i64 1
+ %tmp11121 = getelementptr inbounds float, float* %tmp11120, i64 1
+ %tmp11122 = getelementptr inbounds float, float* %tmp11121, i64 1
+ %tmp11123 = getelementptr inbounds float, float* %tmp11122, i64 1
+ %tmp11124 = getelementptr inbounds float, float* %tmp11123, i64 1
+ %tmp11125 = getelementptr inbounds float, float* %tmp11124, i64 1
+ %tmp11126 = getelementptr inbounds float, float* %tmp11125, i64 1
+ %tmp11127 = getelementptr inbounds float, float* %tmp11126, i64 1
+ %tmp11128 = getelementptr inbounds float, float* %tmp11127, i64 1
+ %tmp11129 = getelementptr inbounds float, float* %tmp11128, i64 1
+ %tmp11130 = getelementptr inbounds float, float* %tmp11129, i64 1
+ %tmp11131 = getelementptr inbounds float, float* %tmp11130, i64 1
+ %tmp11132 = getelementptr inbounds float, float* %tmp11131, i64 1
+ %tmp11133 = getelementptr inbounds float, float* %tmp11132, i64 1
+ %tmp11134 = getelementptr inbounds float, float* %tmp11133, i64 1
+ %tmp11135 = getelementptr inbounds float, float* %tmp11134, i64 1
+ %tmp11136 = getelementptr inbounds float, float* %tmp11135, i64 1
+ %tmp11137 = getelementptr inbounds float, float* %tmp11136, i64 1
+ %tmp11138 = getelementptr inbounds float, float* %tmp11137, i64 1
+ %tmp11139 = getelementptr inbounds float, float* %tmp11138, i64 1
+ %tmp11140 = getelementptr inbounds float, float* %tmp11139, i64 1
+ %tmp11141 = getelementptr inbounds float, float* %tmp11140, i64 1
+ %tmp11142 = getelementptr inbounds float, float* %tmp11141, i64 1
+ %tmp11143 = getelementptr inbounds float, float* %tmp11142, i64 1
+ %tmp11144 = getelementptr inbounds float, float* %tmp11143, i64 1
+ %tmp11145 = getelementptr inbounds float, float* %tmp11144, i64 1
+ %tmp11146 = getelementptr inbounds float, float* %tmp11145, i64 1
+ %tmp11147 = getelementptr inbounds float, float* %tmp11146, i64 1
+ %tmp11148 = getelementptr inbounds float, float* %tmp11147, i64 1
+ %tmp11149 = getelementptr inbounds float, float* %tmp11148, i64 1
+ %tmp11150 = getelementptr inbounds float, float* %tmp11149, i64 1
+ %tmp11151 = getelementptr inbounds float, float* %tmp11150, i64 1
+ %tmp11152 = getelementptr inbounds float, float* %tmp11151, i64 1
+ %tmp11153 = getelementptr inbounds float, float* %tmp11152, i64 1
+ %tmp11154 = getelementptr inbounds float, float* %tmp11153, i64 1
+ %tmp11155 = getelementptr inbounds float, float* %tmp11154, i64 1
+ %tmp11156 = getelementptr inbounds float, float* %tmp11155, i64 1
+ %tmp11157 = getelementptr inbounds float, float* %tmp11156, i64 1
+ %tmp11158 = getelementptr inbounds float, float* %tmp11157, i64 1
+ %tmp11159 = getelementptr inbounds float, float* %tmp11158, i64 1
+ %tmp11160 = getelementptr inbounds float, float* %tmp11159, i64 1
+ %tmp11161 = getelementptr inbounds float, float* %tmp11160, i64 1
+ %tmp11162 = getelementptr inbounds float, float* %tmp11161, i64 1
+ %tmp11163 = getelementptr inbounds float, float* %tmp11162, i64 1
+ %tmp11164 = getelementptr inbounds float, float* %tmp11163, i64 1
+ %tmp11165 = getelementptr inbounds float, float* %tmp11164, i64 1
+ %tmp11166 = getelementptr inbounds float, float* %tmp11165, i64 1
+ %tmp11167 = getelementptr inbounds float, float* %tmp11166, i64 1
+ %tmp11168 = getelementptr inbounds float, float* %tmp11167, i64 1
+ %tmp11169 = getelementptr inbounds float, float* %tmp11168, i64 1
+ %tmp11170 = getelementptr inbounds float, float* %tmp11169, i64 1
+ %tmp11171 = getelementptr inbounds float, float* %tmp11170, i64 1
+ %tmp11172 = getelementptr inbounds float, float* %tmp11171, i64 1
+ %tmp11173 = getelementptr inbounds float, float* %tmp11172, i64 1
+ %tmp11174 = getelementptr inbounds float, float* %tmp11173, i64 1
+ %tmp11175 = getelementptr inbounds float, float* %tmp11174, i64 1
+ %tmp11176 = getelementptr inbounds float, float* %tmp11175, i64 1
+ %tmp11177 = getelementptr inbounds float, float* %tmp11176, i64 1
+ %tmp11178 = getelementptr inbounds float, float* %tmp11177, i64 1
+ %tmp11179 = getelementptr inbounds float, float* %tmp11178, i64 1
+ %tmp11180 = getelementptr inbounds float, float* %tmp11179, i64 1
+ %tmp11181 = getelementptr inbounds float, float* %tmp11180, i64 1
+ %tmp11182 = getelementptr inbounds float, float* %tmp11181, i64 1
+ %tmp11183 = getelementptr inbounds float, float* %tmp11182, i64 1
+ %tmp11184 = getelementptr inbounds float, float* %tmp11183, i64 1
+ %tmp11185 = getelementptr inbounds float, float* %tmp11184, i64 1
+ %tmp11186 = getelementptr inbounds float, float* %tmp11185, i64 1
+ %tmp11187 = getelementptr inbounds float, float* %tmp11186, i64 1
+ %tmp11188 = getelementptr inbounds float, float* %tmp11187, i64 1
+ %tmp11189 = getelementptr inbounds float, float* %tmp11188, i64 1
+ %tmp11190 = getelementptr inbounds float, float* %tmp11189, i64 1
+ %tmp11191 = getelementptr inbounds float, float* %tmp11190, i64 1
+ %tmp11192 = getelementptr inbounds float, float* %tmp11191, i64 1
+ %tmp11193 = getelementptr inbounds float, float* %tmp11192, i64 1
+ %tmp11194 = getelementptr inbounds float, float* %tmp11193, i64 1
+ %tmp11195 = getelementptr inbounds float, float* %tmp11194, i64 1
+ %tmp11196 = getelementptr inbounds float, float* %tmp11195, i64 1
+ %tmp11197 = getelementptr inbounds float, float* %tmp11196, i64 1
+ %tmp11198 = getelementptr inbounds float, float* %tmp11197, i64 1
+ %tmp11199 = getelementptr inbounds float, float* %tmp11198, i64 1
+ %tmp11200 = getelementptr inbounds float, float* %tmp11199, i64 1
+ %tmp11201 = getelementptr inbounds float, float* %tmp11200, i64 1
+ %tmp11202 = getelementptr inbounds float, float* %tmp11201, i64 1
+ %tmp11203 = getelementptr inbounds float, float* %tmp11202, i64 1
+ %tmp11204 = getelementptr inbounds float, float* %tmp11203, i64 1
+ %tmp11205 = getelementptr inbounds float, float* %tmp11204, i64 1
+ %tmp11206 = getelementptr inbounds float, float* %tmp11205, i64 1
+ %tmp11207 = getelementptr inbounds float, float* %tmp11206, i64 1
+ %tmp11208 = getelementptr inbounds float, float* %tmp11207, i64 1
+ %tmp11209 = getelementptr inbounds float, float* %tmp11208, i64 1
+ %tmp11210 = getelementptr inbounds float, float* %tmp11209, i64 1
+ %tmp11211 = getelementptr inbounds float, float* %tmp11210, i64 1
+ %tmp11212 = getelementptr inbounds float, float* %tmp11211, i64 1
+ %tmp11213 = getelementptr inbounds float, float* %tmp11212, i64 1
+ %tmp11214 = getelementptr inbounds float, float* %tmp11213, i64 1
+ %tmp11215 = getelementptr inbounds float, float* %tmp11214, i64 1
+ %tmp11216 = getelementptr inbounds float, float* %tmp11215, i64 1
+ %tmp11217 = getelementptr inbounds float, float* %tmp11216, i64 1
+ %tmp11218 = getelementptr inbounds float, float* %tmp11217, i64 1
+ %tmp11219 = getelementptr inbounds float, float* %tmp11218, i64 1
+ %tmp11220 = getelementptr inbounds float, float* %tmp11219, i64 1
+ %tmp11221 = getelementptr inbounds float, float* %tmp11220, i64 1
+ %tmp11222 = getelementptr inbounds float, float* %tmp11221, i64 1
+ %tmp11223 = getelementptr inbounds float, float* %tmp11222, i64 1
+ %tmp11224 = getelementptr inbounds float, float* %tmp11223, i64 1
+ %tmp11225 = getelementptr inbounds float, float* %tmp11224, i64 1
+ %tmp11226 = getelementptr inbounds float, float* %tmp11225, i64 1
+ %tmp11227 = getelementptr inbounds float, float* %tmp11226, i64 1
+ %tmp11228 = getelementptr inbounds float, float* %tmp11227, i64 1
+ %tmp11229 = getelementptr inbounds float, float* %tmp11228, i64 1
+ %tmp11230 = getelementptr inbounds float, float* %tmp11229, i64 1
+ %tmp11231 = getelementptr inbounds float, float* %tmp11230, i64 1
+ %tmp11232 = getelementptr inbounds float, float* %tmp11231, i64 1
+ %tmp11233 = getelementptr inbounds float, float* %tmp11232, i64 1
+ %tmp11234 = getelementptr inbounds float, float* %tmp11233, i64 1
+ %tmp11235 = getelementptr inbounds float, float* %tmp11234, i64 1
+ %tmp11236 = getelementptr inbounds float, float* %tmp11235, i64 1
+ %tmp11237 = getelementptr inbounds float, float* %tmp11236, i64 1
+ %tmp11238 = getelementptr inbounds float, float* %tmp11237, i64 1
+ %tmp11239 = getelementptr inbounds float, float* %tmp11238, i64 1
+ %tmp11240 = getelementptr inbounds float, float* %tmp11239, i64 1
+ %tmp11241 = getelementptr inbounds float, float* %tmp11240, i64 1
+ %tmp11242 = getelementptr inbounds float, float* %tmp11241, i64 1
+ %tmp11243 = getelementptr inbounds float, float* %tmp11242, i64 1
+ %tmp11244 = getelementptr inbounds float, float* %tmp11243, i64 1
+ %tmp11245 = getelementptr inbounds float, float* %tmp11244, i64 1
+ %tmp11246 = getelementptr inbounds float, float* %tmp11245, i64 1
+ %tmp11247 = getelementptr inbounds float, float* %tmp11246, i64 1
+ %tmp11248 = getelementptr inbounds float, float* %tmp11247, i64 1
+ %tmp11249 = getelementptr inbounds float, float* %tmp11248, i64 1
+ %tmp11250 = getelementptr inbounds float, float* %tmp11249, i64 1
+ %tmp11251 = getelementptr inbounds float, float* %tmp11250, i64 1
+ %tmp11252 = getelementptr inbounds float, float* %tmp11251, i64 1
+ %tmp11253 = getelementptr inbounds float, float* %tmp11252, i64 1
+ %tmp11254 = getelementptr inbounds float, float* %tmp11253, i64 1
+ %tmp11255 = getelementptr inbounds float, float* %tmp11254, i64 1
+ %tmp11256 = getelementptr inbounds float, float* %tmp11255, i64 1
+ %tmp11257 = getelementptr inbounds float, float* %tmp11256, i64 1
+ %tmp11258 = getelementptr inbounds float, float* %tmp11257, i64 1
+ %tmp11259 = getelementptr inbounds float, float* %tmp11258, i64 1
+ %tmp11260 = getelementptr inbounds float, float* %tmp11259, i64 1
+ %tmp11261 = getelementptr inbounds float, float* %tmp11260, i64 1
+ %tmp11262 = getelementptr inbounds float, float* %tmp11261, i64 1
+ %tmp11263 = getelementptr inbounds float, float* %tmp11262, i64 1
+ %tmp11264 = getelementptr inbounds float, float* %tmp11263, i64 1
+ %tmp11265 = getelementptr inbounds float, float* %tmp11264, i64 1
+ %tmp11266 = getelementptr inbounds float, float* %tmp11265, i64 1
+ %tmp11267 = getelementptr inbounds float, float* %tmp11266, i64 1
+ %tmp11268 = getelementptr inbounds float, float* %tmp11267, i64 1
+ %tmp11269 = getelementptr inbounds float, float* %tmp11268, i64 1
+ %tmp11270 = getelementptr inbounds float, float* %tmp11269, i64 1
+ %tmp11271 = getelementptr inbounds float, float* %tmp11270, i64 1
+ %tmp11272 = getelementptr inbounds float, float* %tmp11271, i64 1
+ %tmp11273 = getelementptr inbounds float, float* %tmp11272, i64 1
+ %tmp11274 = getelementptr inbounds float, float* %tmp11273, i64 1
+ %tmp11275 = getelementptr inbounds float, float* %tmp11274, i64 1
+ %tmp11276 = getelementptr inbounds float, float* %tmp11275, i64 1
+ %tmp11277 = getelementptr inbounds float, float* %tmp11276, i64 1
+ %tmp11278 = getelementptr inbounds float, float* %tmp11277, i64 1
+ %tmp11279 = getelementptr inbounds float, float* %tmp11278, i64 1
+ %tmp11280 = getelementptr inbounds float, float* %tmp11279, i64 1
+ %tmp11281 = getelementptr inbounds float, float* %tmp11280, i64 1
+ %tmp11282 = getelementptr inbounds float, float* %tmp11281, i64 1
+ %tmp11283 = getelementptr inbounds float, float* %tmp11282, i64 1
+ %tmp11284 = getelementptr inbounds float, float* %tmp11283, i64 1
+ %tmp11285 = getelementptr inbounds float, float* %tmp11284, i64 1
+ %tmp11286 = getelementptr inbounds float, float* %tmp11285, i64 1
+ %tmp11287 = getelementptr inbounds float, float* %tmp11286, i64 1
+ %tmp11288 = getelementptr inbounds float, float* %tmp11287, i64 1
+ %tmp11289 = getelementptr inbounds float, float* %tmp11288, i64 1
+ %tmp11290 = getelementptr inbounds float, float* %tmp11289, i64 1
+ %tmp11291 = getelementptr inbounds float, float* %tmp11290, i64 1
+ %tmp11292 = getelementptr inbounds float, float* %tmp11291, i64 1
+ %tmp11293 = getelementptr inbounds float, float* %tmp11292, i64 1
+ %tmp11294 = getelementptr inbounds float, float* %tmp11293, i64 1
+ %tmp11295 = getelementptr inbounds float, float* %tmp11294, i64 1
+ %tmp11296 = getelementptr inbounds float, float* %tmp11295, i64 1
+ %tmp11297 = getelementptr inbounds float, float* %tmp11296, i64 1
+ %tmp11298 = getelementptr inbounds float, float* %tmp11297, i64 1
+ %tmp11299 = getelementptr inbounds float, float* %tmp11298, i64 1
+ %tmp11300 = getelementptr inbounds float, float* %tmp11299, i64 1
+ %tmp11301 = getelementptr inbounds float, float* %tmp11300, i64 1
+ %tmp11302 = getelementptr inbounds float, float* %tmp11301, i64 1
+ %tmp11303 = getelementptr inbounds float, float* %tmp11302, i64 1
+ %tmp11304 = getelementptr inbounds float, float* %tmp11303, i64 1
+ %tmp11305 = getelementptr inbounds float, float* %tmp11304, i64 1
+ %tmp11306 = getelementptr inbounds float, float* %tmp11305, i64 1
+ %tmp11307 = getelementptr inbounds float, float* %tmp11306, i64 1
+ %tmp11308 = getelementptr inbounds float, float* %tmp11307, i64 1
+ %tmp11309 = getelementptr inbounds float, float* %tmp11308, i64 1
+ %tmp11310 = getelementptr inbounds float, float* %tmp11309, i64 1
+ %tmp11311 = getelementptr inbounds float, float* %tmp11310, i64 1
+ %tmp11312 = getelementptr inbounds float, float* %tmp11311, i64 1
+ %tmp11313 = getelementptr inbounds float, float* %tmp11312, i64 1
+ %tmp11314 = getelementptr inbounds float, float* %tmp11313, i64 1
+ %tmp11315 = getelementptr inbounds float, float* %tmp11314, i64 1
+ %tmp11316 = getelementptr inbounds float, float* %tmp11315, i64 1
+ %tmp11317 = getelementptr inbounds float, float* %tmp11316, i64 1
+ %tmp11318 = getelementptr inbounds float, float* %tmp11317, i64 1
+ %tmp11319 = getelementptr inbounds float, float* %tmp11318, i64 1
+ %tmp11320 = getelementptr inbounds float, float* %tmp11319, i64 1
+ %tmp11321 = getelementptr inbounds float, float* %tmp11320, i64 1
+ %tmp11322 = getelementptr inbounds float, float* %tmp11321, i64 1
+ %tmp11323 = getelementptr inbounds float, float* %tmp11322, i64 1
+ %tmp11324 = getelementptr inbounds float, float* %tmp11323, i64 1
+ %tmp11325 = getelementptr inbounds float, float* %tmp11324, i64 1
+ %tmp11326 = getelementptr inbounds float, float* %tmp11325, i64 1
+ %tmp11327 = getelementptr inbounds float, float* %tmp11326, i64 1
+ %tmp11328 = getelementptr inbounds float, float* %tmp11327, i64 1
+ %tmp11329 = getelementptr inbounds float, float* %tmp11328, i64 1
+ %tmp11330 = getelementptr inbounds float, float* %tmp11329, i64 1
+ %tmp11331 = getelementptr inbounds float, float* %tmp11330, i64 1
+ %tmp11332 = getelementptr inbounds float, float* %tmp11331, i64 1
+ %tmp11333 = getelementptr inbounds float, float* %tmp11332, i64 1
+ %tmp11334 = getelementptr inbounds float, float* %tmp11333, i64 1
+ %tmp11335 = getelementptr inbounds float, float* %tmp11334, i64 1
+ %tmp11336 = getelementptr inbounds float, float* %tmp11335, i64 1
+ %tmp11337 = getelementptr inbounds float, float* %tmp11336, i64 1
+ %tmp11338 = getelementptr inbounds float, float* %tmp11337, i64 1
+ %tmp11339 = getelementptr inbounds float, float* %tmp11338, i64 1
+ %tmp11340 = getelementptr inbounds float, float* %tmp11339, i64 1
+ %tmp11341 = getelementptr inbounds float, float* %tmp11340, i64 1
+ %tmp11342 = getelementptr inbounds float, float* %tmp11341, i64 1
+ %tmp11343 = getelementptr inbounds float, float* %tmp11342, i64 1
+ %tmp11344 = getelementptr inbounds float, float* %tmp11343, i64 1
+ %tmp11345 = getelementptr inbounds float, float* %tmp11344, i64 1
+ %tmp11346 = getelementptr inbounds float, float* %tmp11345, i64 1
+ %tmp11347 = getelementptr inbounds float, float* %tmp11346, i64 1
+ %tmp11348 = getelementptr inbounds float, float* %tmp11347, i64 1
+ %tmp11349 = getelementptr inbounds float, float* %tmp11348, i64 1
+ %tmp11350 = getelementptr inbounds float, float* %tmp11349, i64 1
+ %tmp11351 = getelementptr inbounds float, float* %tmp11350, i64 1
+ %tmp11352 = getelementptr inbounds float, float* %tmp11351, i64 1
+ %tmp11353 = getelementptr inbounds float, float* %tmp11352, i64 1
+ %tmp11354 = getelementptr inbounds float, float* %tmp11353, i64 1
+ %tmp11355 = getelementptr inbounds float, float* %tmp11354, i64 1
+ %tmp11356 = getelementptr inbounds float, float* %tmp11355, i64 1
+ %tmp11357 = getelementptr inbounds float, float* %tmp11356, i64 1
+ %tmp11358 = getelementptr inbounds float, float* %tmp11357, i64 1
+ %tmp11359 = getelementptr inbounds float, float* %tmp11358, i64 1
+ %tmp11360 = getelementptr inbounds float, float* %tmp11359, i64 1
+ %tmp11361 = getelementptr inbounds float, float* %tmp11360, i64 1
+ %tmp11362 = getelementptr inbounds float, float* %tmp11361, i64 1
+ %tmp11363 = getelementptr inbounds float, float* %tmp11362, i64 1
+ %tmp11364 = getelementptr inbounds float, float* %tmp11363, i64 1
+ %tmp11365 = getelementptr inbounds float, float* %tmp11364, i64 1
+ %tmp11366 = getelementptr inbounds float, float* %tmp11365, i64 1
+ %tmp11367 = getelementptr inbounds float, float* %tmp11366, i64 1
+ %tmp11368 = getelementptr inbounds float, float* %tmp11367, i64 1
+ %tmp11369 = getelementptr inbounds float, float* %tmp11368, i64 1
+ %tmp11370 = getelementptr inbounds float, float* %tmp11369, i64 1
+ %tmp11371 = getelementptr inbounds float, float* %tmp11370, i64 1
+ %tmp11372 = getelementptr inbounds float, float* %tmp11371, i64 1
+ %tmp11373 = getelementptr inbounds float, float* %tmp11372, i64 1
+ %tmp11374 = getelementptr inbounds float, float* %tmp11373, i64 1
+ %tmp11375 = getelementptr inbounds float, float* %tmp11374, i64 1
+ %tmp11376 = getelementptr inbounds float, float* %tmp11375, i64 1
+ %tmp11377 = getelementptr inbounds float, float* %tmp11376, i64 1
+ %tmp11378 = getelementptr inbounds float, float* %tmp11377, i64 1
+ %tmp11379 = getelementptr inbounds float, float* %tmp11378, i64 1
+ %tmp11380 = getelementptr inbounds float, float* %tmp11379, i64 1
+ %tmp11381 = getelementptr inbounds float, float* %tmp11380, i64 1
+ %tmp11382 = getelementptr inbounds float, float* %tmp11381, i64 1
+ %tmp11383 = getelementptr inbounds float, float* %tmp11382, i64 1
+ %tmp11384 = getelementptr inbounds float, float* %tmp11383, i64 1
+ %tmp11385 = getelementptr inbounds float, float* %tmp11384, i64 1
+ %tmp11386 = getelementptr inbounds float, float* %tmp11385, i64 1
+ %tmp11387 = getelementptr inbounds float, float* %tmp11386, i64 1
+ %tmp11388 = getelementptr inbounds float, float* %tmp11387, i64 1
+ %tmp11389 = getelementptr inbounds float, float* %tmp11388, i64 1
+ %tmp11390 = getelementptr inbounds float, float* %tmp11389, i64 1
+ %tmp11391 = getelementptr inbounds float, float* %tmp11390, i64 1
+ %tmp11392 = getelementptr inbounds float, float* %tmp11391, i64 1
+ %tmp11393 = getelementptr inbounds float, float* %tmp11392, i64 1
+ %tmp11394 = getelementptr inbounds float, float* %tmp11393, i64 1
+ %tmp11395 = getelementptr inbounds float, float* %tmp11394, i64 1
+ %tmp11396 = getelementptr inbounds float, float* %tmp11395, i64 1
+ %tmp11397 = getelementptr inbounds float, float* %tmp11396, i64 1
+ %tmp11398 = getelementptr inbounds float, float* %tmp11397, i64 1
+ %tmp11399 = getelementptr inbounds float, float* %tmp11398, i64 1
+ %tmp11400 = getelementptr inbounds float, float* %tmp11399, i64 1
+ %tmp11401 = getelementptr inbounds float, float* %tmp11400, i64 1
+ %tmp11402 = getelementptr inbounds float, float* %tmp11401, i64 1
+ %tmp11403 = getelementptr inbounds float, float* %tmp11402, i64 1
+ %tmp11404 = getelementptr inbounds float, float* %tmp11403, i64 1
+ %tmp11405 = getelementptr inbounds float, float* %tmp11404, i64 1
+ %tmp11406 = getelementptr inbounds float, float* %tmp11405, i64 1
+ %tmp11407 = getelementptr inbounds float, float* %tmp11406, i64 1
+ %tmp11408 = getelementptr inbounds float, float* %tmp11407, i64 1
+ %tmp11409 = getelementptr inbounds float, float* %tmp11408, i64 1
+ %tmp11410 = getelementptr inbounds float, float* %tmp11409, i64 1
+ %tmp11411 = getelementptr inbounds float, float* %tmp11410, i64 1
+ %tmp11412 = getelementptr inbounds float, float* %tmp11411, i64 1
+ %tmp11413 = getelementptr inbounds float, float* %tmp11412, i64 1
+ %tmp11414 = getelementptr inbounds float, float* %tmp11413, i64 1
+ %tmp11415 = getelementptr inbounds float, float* %tmp11414, i64 1
+ %tmp11416 = getelementptr inbounds float, float* %tmp11415, i64 1
+ %tmp11417 = getelementptr inbounds float, float* %tmp11416, i64 1
+ %tmp11418 = getelementptr inbounds float, float* %tmp11417, i64 1
+ %tmp11419 = getelementptr inbounds float, float* %tmp11418, i64 1
+ %tmp11420 = getelementptr inbounds float, float* %tmp11419, i64 1
+ %tmp11421 = getelementptr inbounds float, float* %tmp11420, i64 1
+ %tmp11422 = getelementptr inbounds float, float* %tmp11421, i64 1
+ %tmp11423 = getelementptr inbounds float, float* %tmp11422, i64 1
+ %tmp11424 = getelementptr inbounds float, float* %tmp11423, i64 1
+ %tmp11425 = getelementptr inbounds float, float* %tmp11424, i64 1
+ %tmp11426 = getelementptr inbounds float, float* %tmp11425, i64 1
+ %tmp11427 = getelementptr inbounds float, float* %tmp11426, i64 1
+ %tmp11428 = getelementptr inbounds float, float* %tmp11427, i64 1
+ %tmp11429 = getelementptr inbounds float, float* %tmp11428, i64 1
+ %tmp11430 = getelementptr inbounds float, float* %tmp11429, i64 1
+ %tmp11431 = getelementptr inbounds float, float* %tmp11430, i64 1
+ %tmp11432 = getelementptr inbounds float, float* %tmp11431, i64 1
+ %tmp11433 = getelementptr inbounds float, float* %tmp11432, i64 1
+ %tmp11434 = getelementptr inbounds float, float* %tmp11433, i64 1
+ %tmp11435 = getelementptr inbounds float, float* %tmp11434, i64 1
+ %tmp11436 = getelementptr inbounds float, float* %tmp11435, i64 1
+ %tmp11437 = getelementptr inbounds float, float* %tmp11436, i64 1
+ %tmp11438 = getelementptr inbounds float, float* %tmp11437, i64 1
+ %tmp11439 = getelementptr inbounds float, float* %tmp11438, i64 1
+ %tmp11440 = getelementptr inbounds float, float* %tmp11439, i64 1
+ %tmp11441 = getelementptr inbounds float, float* %tmp11440, i64 1
+ %tmp11442 = getelementptr inbounds float, float* %tmp11441, i64 1
+ %tmp11443 = getelementptr inbounds float, float* %tmp11442, i64 1
+ %tmp11444 = getelementptr inbounds float, float* %tmp11443, i64 1
+ %tmp11445 = getelementptr inbounds float, float* %tmp11444, i64 1
+ %tmp11446 = getelementptr inbounds float, float* %tmp11445, i64 1
+ %tmp11447 = getelementptr inbounds float, float* %tmp11446, i64 1
+ %tmp11448 = getelementptr inbounds float, float* %tmp11447, i64 1
+ %tmp11449 = getelementptr inbounds float, float* %tmp11448, i64 1
+ %tmp11450 = getelementptr inbounds float, float* %tmp11449, i64 1
+ %tmp11451 = getelementptr inbounds float, float* %tmp11450, i64 1
+ %tmp11452 = getelementptr inbounds float, float* %tmp11451, i64 1
+ %tmp11453 = getelementptr inbounds float, float* %tmp11452, i64 1
+ %tmp11454 = getelementptr inbounds float, float* %tmp11453, i64 1
+ %tmp11455 = getelementptr inbounds float, float* %tmp11454, i64 1
+ %tmp11456 = getelementptr inbounds float, float* %tmp11455, i64 1
+ %tmp11457 = getelementptr inbounds float, float* %tmp11456, i64 1
+ %tmp11458 = getelementptr inbounds float, float* %tmp11457, i64 1
+ %tmp11459 = getelementptr inbounds float, float* %tmp11458, i64 1
+ %tmp11460 = getelementptr inbounds float, float* %tmp11459, i64 1
+ %tmp11461 = getelementptr inbounds float, float* %tmp11460, i64 1
+ %tmp11462 = getelementptr inbounds float, float* %tmp11461, i64 1
+ %tmp11463 = getelementptr inbounds float, float* %tmp11462, i64 1
+ %tmp11464 = getelementptr inbounds float, float* %tmp11463, i64 1
+ %tmp11465 = getelementptr inbounds float, float* %tmp11464, i64 1
+ %tmp11466 = getelementptr inbounds float, float* %tmp11465, i64 1
+ %tmp11467 = getelementptr inbounds float, float* %tmp11466, i64 1
+ %tmp11468 = getelementptr inbounds float, float* %tmp11467, i64 1
+ %tmp11469 = getelementptr inbounds float, float* %tmp11468, i64 1
+ %tmp11470 = getelementptr inbounds float, float* %tmp11469, i64 1
+ %tmp11471 = getelementptr inbounds float, float* %tmp11470, i64 1
+ %tmp11472 = getelementptr inbounds float, float* %tmp11471, i64 1
+ %tmp11473 = getelementptr inbounds float, float* %tmp11472, i64 1
+ %tmp11474 = getelementptr inbounds float, float* %tmp11473, i64 1
+ %tmp11475 = getelementptr inbounds float, float* %tmp11474, i64 1
+ %tmp11476 = getelementptr inbounds float, float* %tmp11475, i64 1
+ %tmp11477 = getelementptr inbounds float, float* %tmp11476, i64 1
+ %tmp11478 = getelementptr inbounds float, float* %tmp11477, i64 1
+ %tmp11479 = getelementptr inbounds float, float* %tmp11478, i64 1
+ %tmp11480 = getelementptr inbounds float, float* %tmp11479, i64 1
+ %tmp11481 = getelementptr inbounds float, float* %tmp11480, i64 1
+ %tmp11482 = getelementptr inbounds float, float* %tmp11481, i64 1
+ %tmp11483 = getelementptr inbounds float, float* %tmp11482, i64 1
+ %tmp11484 = getelementptr inbounds float, float* %tmp11483, i64 1
+ %tmp11485 = getelementptr inbounds float, float* %tmp11484, i64 1
+ %tmp11486 = getelementptr inbounds float, float* %tmp11485, i64 1
+ %tmp11487 = getelementptr inbounds float, float* %tmp11486, i64 1
+ %tmp11488 = getelementptr inbounds float, float* %tmp11487, i64 1
+ %tmp11489 = getelementptr inbounds float, float* %tmp11488, i64 1
+ %tmp11490 = getelementptr inbounds float, float* %tmp11489, i64 1
+ %tmp11491 = getelementptr inbounds float, float* %tmp11490, i64 1
+ %tmp11492 = getelementptr inbounds float, float* %tmp11491, i64 1
+ %tmp11493 = getelementptr inbounds float, float* %tmp11492, i64 1
+ %tmp11494 = getelementptr inbounds float, float* %tmp11493, i64 1
+ %tmp11495 = getelementptr inbounds float, float* %tmp11494, i64 1
+ %tmp11496 = getelementptr inbounds float, float* %tmp11495, i64 1
+ %tmp11497 = getelementptr inbounds float, float* %tmp11496, i64 1
+ %tmp11498 = getelementptr inbounds float, float* %tmp11497, i64 1
+ %tmp11499 = getelementptr inbounds float, float* %tmp11498, i64 1
+ %tmp11500 = getelementptr inbounds float, float* %tmp11499, i64 1
+ %tmp11501 = getelementptr inbounds float, float* %tmp11500, i64 1
+ %tmp11502 = getelementptr inbounds float, float* %tmp11501, i64 1
+ %tmp11503 = getelementptr inbounds float, float* %tmp11502, i64 1
+ %tmp11504 = getelementptr inbounds float, float* %tmp11503, i64 1
+ %tmp11505 = getelementptr inbounds float, float* %tmp11504, i64 1
+ %tmp11506 = getelementptr inbounds float, float* %tmp11505, i64 1
+ %tmp11507 = getelementptr inbounds float, float* %tmp11506, i64 1
+ %tmp11508 = getelementptr inbounds float, float* %tmp11507, i64 1
+ %tmp11509 = getelementptr inbounds float, float* %tmp11508, i64 1
+ %tmp11510 = getelementptr inbounds float, float* %tmp11509, i64 1
+ %tmp11511 = getelementptr inbounds float, float* %tmp11510, i64 1
+ %tmp11512 = getelementptr inbounds float, float* %tmp11511, i64 1
+ %tmp11513 = getelementptr inbounds float, float* %tmp11512, i64 1
+ %tmp11514 = getelementptr inbounds float, float* %tmp11513, i64 1
+ %tmp11515 = getelementptr inbounds float, float* %tmp11514, i64 1
+ %tmp11516 = getelementptr inbounds float, float* %tmp11515, i64 1
+ %tmp11517 = getelementptr inbounds float, float* %tmp11516, i64 1
+ %tmp11518 = getelementptr inbounds float, float* %tmp11517, i64 1
+ %tmp11519 = getelementptr inbounds float, float* %tmp11518, i64 1
+ %tmp11520 = getelementptr inbounds float, float* %tmp11519, i64 1
+ %tmp11521 = getelementptr inbounds float, float* %tmp11520, i64 1
+ %tmp11522 = getelementptr inbounds float, float* %tmp11521, i64 1
+ %tmp11523 = getelementptr inbounds float, float* %tmp11522, i64 1
+ %tmp11524 = getelementptr inbounds float, float* %tmp11523, i64 1
+ %tmp11525 = getelementptr inbounds float, float* %tmp11524, i64 1
+ %tmp11526 = getelementptr inbounds float, float* %tmp11525, i64 1
+ %tmp11527 = getelementptr inbounds float, float* %tmp11526, i64 1
+ %tmp11528 = getelementptr inbounds float, float* %tmp11527, i64 1
+ %tmp11529 = getelementptr inbounds float, float* %tmp11528, i64 1
+ %tmp11530 = getelementptr inbounds float, float* %tmp11529, i64 1
+ %tmp11531 = getelementptr inbounds float, float* %tmp11530, i64 1
+ %tmp11532 = getelementptr inbounds float, float* %tmp11531, i64 1
+ %tmp11533 = getelementptr inbounds float, float* %tmp11532, i64 1
+ %tmp11534 = getelementptr inbounds float, float* %tmp11533, i64 1
+ %tmp11535 = getelementptr inbounds float, float* %tmp11534, i64 1
+ %tmp11536 = getelementptr inbounds float, float* %tmp11535, i64 1
+ %tmp11537 = getelementptr inbounds float, float* %tmp11536, i64 1
+ %tmp11538 = getelementptr inbounds float, float* %tmp11537, i64 1
+ %tmp11539 = getelementptr inbounds float, float* %tmp11538, i64 1
+ %tmp11540 = getelementptr inbounds float, float* %tmp11539, i64 1
+ %tmp11541 = getelementptr inbounds float, float* %tmp11540, i64 1
+ %tmp11542 = getelementptr inbounds float, float* %tmp11541, i64 1
+ %tmp11543 = getelementptr inbounds float, float* %tmp11542, i64 1
+ %tmp11544 = getelementptr inbounds float, float* %tmp11543, i64 1
+ %tmp11545 = getelementptr inbounds float, float* %tmp11544, i64 1
+ %tmp11546 = getelementptr inbounds float, float* %tmp11545, i64 1
+ %tmp11547 = getelementptr inbounds float, float* %tmp11546, i64 1
+ %tmp11548 = getelementptr inbounds float, float* %tmp11547, i64 1
+ %tmp11549 = getelementptr inbounds float, float* %tmp11548, i64 1
+ %tmp11550 = getelementptr inbounds float, float* %tmp11549, i64 1
+ %tmp11551 = getelementptr inbounds float, float* %tmp11550, i64 1
+ %tmp11552 = getelementptr inbounds float, float* %tmp11551, i64 1
+ %tmp11553 = getelementptr inbounds float, float* %tmp11552, i64 1
+ %tmp11554 = getelementptr inbounds float, float* %tmp11553, i64 1
+ %tmp11555 = getelementptr inbounds float, float* %tmp11554, i64 1
+ %tmp11556 = getelementptr inbounds float, float* %tmp11555, i64 1
+ %tmp11557 = getelementptr inbounds float, float* %tmp11556, i64 1
+ %tmp11558 = getelementptr inbounds float, float* %tmp11557, i64 1
+ %tmp11559 = getelementptr inbounds float, float* %tmp11558, i64 1
+ %tmp11560 = getelementptr inbounds float, float* %tmp11559, i64 1
+ %tmp11561 = getelementptr inbounds float, float* %tmp11560, i64 1
+ %tmp11562 = getelementptr inbounds float, float* %tmp11561, i64 1
+ %tmp11563 = getelementptr inbounds float, float* %tmp11562, i64 1
+ %tmp11564 = getelementptr inbounds float, float* %tmp11563, i64 1
+ %tmp11565 = getelementptr inbounds float, float* %tmp11564, i64 1
+ %tmp11566 = getelementptr inbounds float, float* %tmp11565, i64 1
+ %tmp11567 = getelementptr inbounds float, float* %tmp11566, i64 1
+ %tmp11568 = getelementptr inbounds float, float* %tmp11567, i64 1
+ %tmp11569 = getelementptr inbounds float, float* %tmp11568, i64 1
+ %tmp11570 = getelementptr inbounds float, float* %tmp11569, i64 1
+ %tmp11571 = getelementptr inbounds float, float* %tmp11570, i64 1
+ %tmp11572 = getelementptr inbounds float, float* %tmp11571, i64 1
+ %tmp11573 = getelementptr inbounds float, float* %tmp11572, i64 1
+ %tmp11574 = getelementptr inbounds float, float* %tmp11573, i64 1
+ %tmp11575 = getelementptr inbounds float, float* %tmp11574, i64 1
+ %tmp11576 = getelementptr inbounds float, float* %tmp11575, i64 1
+ %tmp11577 = getelementptr inbounds float, float* %tmp11576, i64 1
+ %tmp11578 = getelementptr inbounds float, float* %tmp11577, i64 1
+ %tmp11579 = getelementptr inbounds float, float* %tmp11578, i64 1
+ %tmp11580 = getelementptr inbounds float, float* %tmp11579, i64 1
+ %tmp11581 = getelementptr inbounds float, float* %tmp11580, i64 1
+ %tmp11582 = getelementptr inbounds float, float* %tmp11581, i64 1
+ %tmp11583 = getelementptr inbounds float, float* %tmp11582, i64 1
+ %tmp11584 = getelementptr inbounds float, float* %tmp11583, i64 1
+ %tmp11585 = getelementptr inbounds float, float* %tmp11584, i64 1
+ %tmp11586 = getelementptr inbounds float, float* %tmp11585, i64 1
+ %tmp11587 = getelementptr inbounds float, float* %tmp11586, i64 1
+ %tmp11588 = getelementptr inbounds float, float* %tmp11587, i64 1
+ %tmp11589 = getelementptr inbounds float, float* %tmp11588, i64 1
+ %tmp11590 = getelementptr inbounds float, float* %tmp11589, i64 1
+ %tmp11591 = getelementptr inbounds float, float* %tmp11590, i64 1
+ %tmp11592 = getelementptr inbounds float, float* %tmp11591, i64 1
+ %tmp11593 = getelementptr inbounds float, float* %tmp11592, i64 1
+ %tmp11594 = getelementptr inbounds float, float* %tmp11593, i64 1
+ %tmp11595 = getelementptr inbounds float, float* %tmp11594, i64 1
+ %tmp11596 = getelementptr inbounds float, float* %tmp11595, i64 1
+ %tmp11597 = getelementptr inbounds float, float* %tmp11596, i64 1
+ %tmp11598 = getelementptr inbounds float, float* %tmp11597, i64 1
+ %tmp11599 = getelementptr inbounds float, float* %tmp11598, i64 1
+ %tmp11600 = getelementptr inbounds float, float* %tmp11599, i64 1
+ %tmp11601 = getelementptr inbounds float, float* %tmp11600, i64 1
+ %tmp11602 = getelementptr inbounds float, float* %tmp11601, i64 1
+ %tmp11603 = getelementptr inbounds float, float* %tmp11602, i64 1
+ %tmp11604 = getelementptr inbounds float, float* %tmp11603, i64 1
+ %tmp11605 = getelementptr inbounds float, float* %tmp11604, i64 1
+ %tmp11606 = getelementptr inbounds float, float* %tmp11605, i64 1
+ %tmp11607 = getelementptr inbounds float, float* %tmp11606, i64 1
+ %tmp11608 = getelementptr inbounds float, float* %tmp11607, i64 1
+ %tmp11609 = getelementptr inbounds float, float* %tmp11608, i64 1
+ %tmp11610 = getelementptr inbounds float, float* %tmp11609, i64 1
+ %tmp11611 = getelementptr inbounds float, float* %tmp11610, i64 1
+ %tmp11612 = getelementptr inbounds float, float* %tmp11611, i64 1
+ %tmp11613 = getelementptr inbounds float, float* %tmp11612, i64 1
+ %tmp11614 = getelementptr inbounds float, float* %tmp11613, i64 1
+ %tmp11615 = getelementptr inbounds float, float* %tmp11614, i64 1
+ %tmp11616 = getelementptr inbounds float, float* %tmp11615, i64 1
+ %tmp11617 = getelementptr inbounds float, float* %tmp11616, i64 1
+ %tmp11618 = getelementptr inbounds float, float* %tmp11617, i64 1
+ %tmp11619 = getelementptr inbounds float, float* %tmp11618, i64 1
+ %tmp11620 = getelementptr inbounds float, float* %tmp11619, i64 1
+ %tmp11621 = getelementptr inbounds float, float* %tmp11620, i64 1
+ %tmp11622 = getelementptr inbounds float, float* %tmp11621, i64 1
+ %tmp11623 = getelementptr inbounds float, float* %tmp11622, i64 1
+ %tmp11624 = getelementptr inbounds float, float* %tmp11623, i64 1
+ %tmp11625 = getelementptr inbounds float, float* %tmp11624, i64 1
+ %tmp11626 = getelementptr inbounds float, float* %tmp11625, i64 1
+ %tmp11627 = getelementptr inbounds float, float* %tmp11626, i64 1
+ %tmp11628 = getelementptr inbounds float, float* %tmp11627, i64 1
+ %tmp11629 = getelementptr inbounds float, float* %tmp11628, i64 1
+ %tmp11630 = getelementptr inbounds float, float* %tmp11629, i64 1
+ %tmp11631 = getelementptr inbounds float, float* %tmp11630, i64 1
+ %tmp11632 = getelementptr inbounds float, float* %tmp11631, i64 1
+ %tmp11633 = getelementptr inbounds float, float* %tmp11632, i64 1
+ %tmp11634 = getelementptr inbounds float, float* %tmp11633, i64 1
+ %tmp11635 = getelementptr inbounds float, float* %tmp11634, i64 1
+ %tmp11636 = getelementptr inbounds float, float* %tmp11635, i64 1
+ %tmp11637 = getelementptr inbounds float, float* %tmp11636, i64 1
+ %tmp11638 = getelementptr inbounds float, float* %tmp11637, i64 1
+ %tmp11639 = getelementptr inbounds float, float* %tmp11638, i64 1
+ %tmp11640 = getelementptr inbounds float, float* %tmp11639, i64 1
+ %tmp11641 = getelementptr inbounds float, float* %tmp11640, i64 1
+ %tmp11642 = getelementptr inbounds float, float* %tmp11641, i64 1
+ %tmp11643 = getelementptr inbounds float, float* %tmp11642, i64 1
+ %tmp11644 = getelementptr inbounds float, float* %tmp11643, i64 1
+ %tmp11645 = getelementptr inbounds float, float* %tmp11644, i64 1
+ %tmp11646 = getelementptr inbounds float, float* %tmp11645, i64 1
+ %tmp11647 = getelementptr inbounds float, float* %tmp11646, i64 1
+ %tmp11648 = getelementptr inbounds float, float* %tmp11647, i64 1
+ %tmp11649 = getelementptr inbounds float, float* %tmp11648, i64 1
+ %tmp11650 = getelementptr inbounds float, float* %tmp11649, i64 1
+ %tmp11651 = getelementptr inbounds float, float* %tmp11650, i64 1
+ %tmp11652 = getelementptr inbounds float, float* %tmp11651, i64 1
+ %tmp11653 = getelementptr inbounds float, float* %tmp11652, i64 1
+ %tmp11654 = getelementptr inbounds float, float* %tmp11653, i64 1
+ %tmp11655 = getelementptr inbounds float, float* %tmp11654, i64 1
+ %tmp11656 = getelementptr inbounds float, float* %tmp11655, i64 1
+ %tmp11657 = getelementptr inbounds float, float* %tmp11656, i64 1
+ %tmp11658 = getelementptr inbounds float, float* %tmp11657, i64 1
+ %tmp11659 = getelementptr inbounds float, float* %tmp11658, i64 1
+ %tmp11660 = getelementptr inbounds float, float* %tmp11659, i64 1
+ %tmp11661 = getelementptr inbounds float, float* %tmp11660, i64 1
+ %tmp11662 = getelementptr inbounds float, float* %tmp11661, i64 1
+ %tmp11663 = getelementptr inbounds float, float* %tmp11662, i64 1
+ %tmp11664 = getelementptr inbounds float, float* %tmp11663, i64 1
+ %tmp11665 = getelementptr inbounds float, float* %tmp11664, i64 1
+ %tmp11666 = getelementptr inbounds float, float* %tmp11665, i64 1
+ %tmp11667 = getelementptr inbounds float, float* %tmp11666, i64 1
+ %tmp11668 = getelementptr inbounds float, float* %tmp11667, i64 1
+ %tmp11669 = getelementptr inbounds float, float* %tmp11668, i64 1
+ %tmp11670 = getelementptr inbounds float, float* %tmp11669, i64 1
+ %tmp11671 = getelementptr inbounds float, float* %tmp11670, i64 1
+ %tmp11672 = getelementptr inbounds float, float* %tmp11671, i64 1
+ %tmp11673 = getelementptr inbounds float, float* %tmp11672, i64 1
+ %tmp11674 = getelementptr inbounds float, float* %tmp11673, i64 1
+ %tmp11675 = getelementptr inbounds float, float* %tmp11674, i64 1
+ %tmp11676 = getelementptr inbounds float, float* %tmp11675, i64 1
+ %tmp11677 = getelementptr inbounds float, float* %tmp11676, i64 1
+ %tmp11678 = getelementptr inbounds float, float* %tmp11677, i64 1
+ %tmp11679 = getelementptr inbounds float, float* %tmp11678, i64 1
+ %tmp11680 = getelementptr inbounds float, float* %tmp11679, i64 1
+ %tmp11681 = getelementptr inbounds float, float* %tmp11680, i64 1
+ %tmp11682 = getelementptr inbounds float, float* %tmp11681, i64 1
+ %tmp11683 = getelementptr inbounds float, float* %tmp11682, i64 1
+ %tmp11684 = getelementptr inbounds float, float* %tmp11683, i64 1
+ %tmp11685 = getelementptr inbounds float, float* %tmp11684, i64 1
+ %tmp11686 = getelementptr inbounds float, float* %tmp11685, i64 1
+ %tmp11687 = getelementptr inbounds float, float* %tmp11686, i64 1
+ %tmp11688 = getelementptr inbounds float, float* %tmp11687, i64 1
+ %tmp11689 = getelementptr inbounds float, float* %tmp11688, i64 1
+ %tmp11690 = getelementptr inbounds float, float* %tmp11689, i64 1
+ %tmp11691 = getelementptr inbounds float, float* %tmp11690, i64 1
+ %tmp11692 = getelementptr inbounds float, float* %tmp11691, i64 1
+ %tmp11693 = getelementptr inbounds float, float* %tmp11692, i64 1
+ %tmp11694 = getelementptr inbounds float, float* %tmp11693, i64 1
+ %tmp11695 = getelementptr inbounds float, float* %tmp11694, i64 1
+ %tmp11696 = getelementptr inbounds float, float* %tmp11695, i64 1
+ %tmp11697 = getelementptr inbounds float, float* %tmp11696, i64 1
+ %tmp11698 = getelementptr inbounds float, float* %tmp11697, i64 1
+ %tmp11699 = getelementptr inbounds float, float* %tmp11698, i64 1
+ %tmp11700 = getelementptr inbounds float, float* %tmp11699, i64 1
+ %tmp11701 = getelementptr inbounds float, float* %tmp11700, i64 1
+ %tmp11702 = getelementptr inbounds float, float* %tmp11701, i64 1
+ %tmp11703 = getelementptr inbounds float, float* %tmp11702, i64 1
+ %tmp11704 = getelementptr inbounds float, float* %tmp11703, i64 1
+ %tmp11705 = getelementptr inbounds float, float* %tmp11704, i64 1
+ %tmp11706 = getelementptr inbounds float, float* %tmp11705, i64 1
+ %tmp11707 = getelementptr inbounds float, float* %tmp11706, i64 1
+ %tmp11708 = getelementptr inbounds float, float* %tmp11707, i64 1
+ %tmp11709 = getelementptr inbounds float, float* %tmp11708, i64 1
+ %tmp11710 = getelementptr inbounds float, float* %tmp11709, i64 1
+ %tmp11711 = getelementptr inbounds float, float* %tmp11710, i64 1
+ %tmp11712 = getelementptr inbounds float, float* %tmp11711, i64 1
+ %tmp11713 = getelementptr inbounds float, float* %tmp11712, i64 1
+ %tmp11714 = getelementptr inbounds float, float* %tmp11713, i64 1
+ %tmp11715 = getelementptr inbounds float, float* %tmp11714, i64 1
+ %tmp11716 = getelementptr inbounds float, float* %tmp11715, i64 1
+ %tmp11717 = getelementptr inbounds float, float* %tmp11716, i64 1
+ %tmp11718 = getelementptr inbounds float, float* %tmp11717, i64 1
+ %tmp11719 = getelementptr inbounds float, float* %tmp11718, i64 1
+ %tmp11720 = getelementptr inbounds float, float* %tmp11719, i64 1
+ %tmp11721 = getelementptr inbounds float, float* %tmp11720, i64 1
+ %tmp11722 = getelementptr inbounds float, float* %tmp11721, i64 1
+ %tmp11723 = getelementptr inbounds float, float* %tmp11722, i64 1
+ %tmp11724 = getelementptr inbounds float, float* %tmp11723, i64 1
+ %tmp11725 = getelementptr inbounds float, float* %tmp11724, i64 1
+ %tmp11726 = getelementptr inbounds float, float* %tmp11725, i64 1
+ %tmp11727 = getelementptr inbounds float, float* %tmp11726, i64 1
+ %tmp11728 = getelementptr inbounds float, float* %tmp11727, i64 1
+ %tmp11729 = getelementptr inbounds float, float* %tmp11728, i64 1
+ %tmp11730 = getelementptr inbounds float, float* %tmp11729, i64 1
+ %tmp11731 = getelementptr inbounds float, float* %tmp11730, i64 1
+ %tmp11732 = getelementptr inbounds float, float* %tmp11731, i64 1
+ %tmp11733 = getelementptr inbounds float, float* %tmp11732, i64 1
+ %tmp11734 = getelementptr inbounds float, float* %tmp11733, i64 1
+ %tmp11735 = getelementptr inbounds float, float* %tmp11734, i64 1
+ %tmp11736 = getelementptr inbounds float, float* %tmp11735, i64 1
+ %tmp11737 = getelementptr inbounds float, float* %tmp11736, i64 1
+ %tmp11738 = getelementptr inbounds float, float* %tmp11737, i64 1
+ %tmp11739 = getelementptr inbounds float, float* %tmp11738, i64 1
+ %tmp11740 = getelementptr inbounds float, float* %tmp11739, i64 1
+ %tmp11741 = getelementptr inbounds float, float* %tmp11740, i64 1
+ %tmp11742 = getelementptr inbounds float, float* %tmp11741, i64 1
+ %tmp11743 = getelementptr inbounds float, float* %tmp11742, i64 1
+ %tmp11744 = getelementptr inbounds float, float* %tmp11743, i64 1
+ %tmp11745 = getelementptr inbounds float, float* %tmp11744, i64 1
+ %tmp11746 = getelementptr inbounds float, float* %tmp11745, i64 1
+ %tmp11747 = getelementptr inbounds float, float* %tmp11746, i64 1
+ %tmp11748 = getelementptr inbounds float, float* %tmp11747, i64 1
+ %tmp11749 = getelementptr inbounds float, float* %tmp11748, i64 1
+ %tmp11750 = getelementptr inbounds float, float* %tmp11749, i64 1
+ %tmp11751 = getelementptr inbounds float, float* %tmp11750, i64 1
+ %tmp11752 = getelementptr inbounds float, float* %tmp11751, i64 1
+ %tmp11753 = getelementptr inbounds float, float* %tmp11752, i64 1
+ %tmp11754 = getelementptr inbounds float, float* %tmp11753, i64 1
+ %tmp11755 = getelementptr inbounds float, float* %tmp11754, i64 1
+ %tmp11756 = getelementptr inbounds float, float* %tmp11755, i64 1
+ %tmp11757 = getelementptr inbounds float, float* %tmp11756, i64 1
+ %tmp11758 = getelementptr inbounds float, float* %tmp11757, i64 1
+ %tmp11759 = getelementptr inbounds float, float* %tmp11758, i64 1
+ %tmp11760 = getelementptr inbounds float, float* %tmp11759, i64 1
+ %tmp11761 = getelementptr inbounds float, float* %tmp11760, i64 1
+ %tmp11762 = getelementptr inbounds float, float* %tmp11761, i64 1
+ %tmp11763 = getelementptr inbounds float, float* %tmp11762, i64 1
+ %tmp11764 = getelementptr inbounds float, float* %tmp11763, i64 1
+ %tmp11765 = getelementptr inbounds float, float* %tmp11764, i64 1
+ %tmp11766 = getelementptr inbounds float, float* %tmp11765, i64 1
+ %tmp11767 = getelementptr inbounds float, float* %tmp11766, i64 1
+ %tmp11768 = getelementptr inbounds float, float* %tmp11767, i64 1
+ %tmp11769 = getelementptr inbounds float, float* %tmp11768, i64 1
+ %tmp11770 = getelementptr inbounds float, float* %tmp11769, i64 1
+ %tmp11771 = getelementptr inbounds float, float* %tmp11770, i64 1
+ %tmp11772 = getelementptr inbounds float, float* %tmp11771, i64 1
+ %tmp11773 = getelementptr inbounds float, float* %tmp11772, i64 1
+ %tmp11774 = getelementptr inbounds float, float* %tmp11773, i64 1
+ %tmp11775 = getelementptr inbounds float, float* %tmp11774, i64 1
+ %tmp11776 = getelementptr inbounds float, float* %tmp11775, i64 1
+ %tmp11777 = getelementptr inbounds float, float* %tmp11776, i64 1
+ %tmp11778 = getelementptr inbounds float, float* %tmp11777, i64 1
+ %tmp11779 = getelementptr inbounds float, float* %tmp11778, i64 1
+ %tmp11780 = getelementptr inbounds float, float* %tmp11779, i64 1
+ %tmp11781 = getelementptr inbounds float, float* %tmp11780, i64 1
+ %tmp11782 = getelementptr inbounds float, float* %tmp11781, i64 1
+ %tmp11783 = getelementptr inbounds float, float* %tmp11782, i64 1
+ %tmp11784 = getelementptr inbounds float, float* %tmp11783, i64 1
+ %tmp11785 = getelementptr inbounds float, float* %tmp11784, i64 1
+ %tmp11786 = getelementptr inbounds float, float* %tmp11785, i64 1
+ %tmp11787 = getelementptr inbounds float, float* %tmp11786, i64 1
+ %tmp11788 = getelementptr inbounds float, float* %tmp11787, i64 1
+ %tmp11789 = getelementptr inbounds float, float* %tmp11788, i64 1
+ %tmp11790 = getelementptr inbounds float, float* %tmp11789, i64 1
+ %tmp11791 = getelementptr inbounds float, float* %tmp11790, i64 1
+ %tmp11792 = getelementptr inbounds float, float* %tmp11791, i64 1
+ %tmp11793 = getelementptr inbounds float, float* %tmp11792, i64 1
+ %tmp11794 = getelementptr inbounds float, float* %tmp11793, i64 1
+ %tmp11795 = getelementptr inbounds float, float* %tmp11794, i64 1
+ %tmp11796 = getelementptr inbounds float, float* %tmp11795, i64 1
+ %tmp11797 = getelementptr inbounds float, float* %tmp11796, i64 1
+ %tmp11798 = getelementptr inbounds float, float* %tmp11797, i64 1
+ %tmp11799 = getelementptr inbounds float, float* %tmp11798, i64 1
+ %tmp11800 = getelementptr inbounds float, float* %tmp11799, i64 1
+ %tmp11801 = getelementptr inbounds float, float* %tmp11800, i64 1
+ %tmp11802 = getelementptr inbounds float, float* %tmp11801, i64 1
+ %tmp11803 = getelementptr inbounds float, float* %tmp11802, i64 1
+ %tmp11804 = getelementptr inbounds float, float* %tmp11803, i64 1
+ %tmp11805 = getelementptr inbounds float, float* %tmp11804, i64 1
+ %tmp11806 = getelementptr inbounds float, float* %tmp11805, i64 1
+ %tmp11807 = getelementptr inbounds float, float* %tmp11806, i64 1
+ %tmp11808 = getelementptr inbounds float, float* %tmp11807, i64 1
+ %tmp11809 = getelementptr inbounds float, float* %tmp11808, i64 1
+ %tmp11810 = getelementptr inbounds float, float* %tmp11809, i64 1
+ %tmp11811 = getelementptr inbounds float, float* %tmp11810, i64 1
+ %tmp11812 = getelementptr inbounds float, float* %tmp11811, i64 1
+ %tmp11813 = getelementptr inbounds float, float* %tmp11812, i64 1
+ %tmp11814 = getelementptr inbounds float, float* %tmp11813, i64 1
+ %tmp11815 = getelementptr inbounds float, float* %tmp11814, i64 1
+ %tmp11816 = getelementptr inbounds float, float* %tmp11815, i64 1
+ %tmp11817 = getelementptr inbounds float, float* %tmp11816, i64 1
+ %tmp11818 = getelementptr inbounds float, float* %tmp11817, i64 1
+ %tmp11819 = getelementptr inbounds float, float* %tmp11818, i64 1
+ %tmp11820 = getelementptr inbounds float, float* %tmp11819, i64 1
+ %tmp11821 = getelementptr inbounds float, float* %tmp11820, i64 1
+ %tmp11822 = getelementptr inbounds float, float* %tmp11821, i64 1
+ %tmp11823 = getelementptr inbounds float, float* %tmp11822, i64 1
+ %tmp11824 = getelementptr inbounds float, float* %tmp11823, i64 1
+ %tmp11825 = getelementptr inbounds float, float* %tmp11824, i64 1
+ %tmp11826 = getelementptr inbounds float, float* %tmp11825, i64 1
+ %tmp11827 = getelementptr inbounds float, float* %tmp11826, i64 1
+ %tmp11828 = getelementptr inbounds float, float* %tmp11827, i64 1
+ %tmp11829 = getelementptr inbounds float, float* %tmp11828, i64 1
+ %tmp11830 = getelementptr inbounds float, float* %tmp11829, i64 1
+ %tmp11831 = getelementptr inbounds float, float* %tmp11830, i64 1
+ %tmp11832 = getelementptr inbounds float, float* %tmp11831, i64 1
+ %tmp11833 = getelementptr inbounds float, float* %tmp11832, i64 1
+ %tmp11834 = getelementptr inbounds float, float* %tmp11833, i64 1
+ %tmp11835 = getelementptr inbounds float, float* %tmp11834, i64 1
+ %tmp11836 = getelementptr inbounds float, float* %tmp11835, i64 1
+ %tmp11837 = getelementptr inbounds float, float* %tmp11836, i64 1
+ %tmp11838 = getelementptr inbounds float, float* %tmp11837, i64 1
+ %tmp11839 = getelementptr inbounds float, float* %tmp11838, i64 1
+ %tmp11840 = getelementptr inbounds float, float* %tmp11839, i64 1
+ %tmp11841 = getelementptr inbounds float, float* %tmp11840, i64 1
+ %tmp11842 = getelementptr inbounds float, float* %tmp11841, i64 1
+ %tmp11843 = getelementptr inbounds float, float* %tmp11842, i64 1
+ %tmp11844 = getelementptr inbounds float, float* %tmp11843, i64 1
+ %tmp11845 = getelementptr inbounds float, float* %tmp11844, i64 1
+ %tmp11846 = getelementptr inbounds float, float* %tmp11845, i64 1
+ %tmp11847 = getelementptr inbounds float, float* %tmp11846, i64 1
+ %tmp11848 = getelementptr inbounds float, float* %tmp11847, i64 1
+ %tmp11849 = getelementptr inbounds float, float* %tmp11848, i64 1
+ %tmp11850 = getelementptr inbounds float, float* %tmp11849, i64 1
+ %tmp11851 = getelementptr inbounds float, float* %tmp11850, i64 1
+ %tmp11852 = getelementptr inbounds float, float* %tmp11851, i64 1
+ %tmp11853 = getelementptr inbounds float, float* %tmp11852, i64 1
+ %tmp11854 = getelementptr inbounds float, float* %tmp11853, i64 1
+ %tmp11855 = getelementptr inbounds float, float* %tmp11854, i64 1
+ %tmp11856 = getelementptr inbounds float, float* %tmp11855, i64 1
+ %tmp11857 = getelementptr inbounds float, float* %tmp11856, i64 1
+ %tmp11858 = getelementptr inbounds float, float* %tmp11857, i64 1
+ %tmp11859 = getelementptr inbounds float, float* %tmp11858, i64 1
+ %tmp11860 = getelementptr inbounds float, float* %tmp11859, i64 1
+ %tmp11861 = getelementptr inbounds float, float* %tmp11860, i64 1
+ %tmp11862 = getelementptr inbounds float, float* %tmp11861, i64 1
+ %tmp11863 = getelementptr inbounds float, float* %tmp11862, i64 1
+ %tmp11864 = getelementptr inbounds float, float* %tmp11863, i64 1
+ %tmp11865 = getelementptr inbounds float, float* %tmp11864, i64 1
+ %tmp11866 = getelementptr inbounds float, float* %tmp11865, i64 1
+ %tmp11867 = getelementptr inbounds float, float* %tmp11866, i64 1
+ %tmp11868 = getelementptr inbounds float, float* %tmp11867, i64 1
+ %tmp11869 = getelementptr inbounds float, float* %tmp11868, i64 1
+ %tmp11870 = getelementptr inbounds float, float* %tmp11869, i64 1
+ %tmp11871 = getelementptr inbounds float, float* %tmp11870, i64 1
+ %tmp11872 = getelementptr inbounds float, float* %tmp11871, i64 1
+ %tmp11873 = getelementptr inbounds float, float* %tmp11872, i64 1
+ %tmp11874 = getelementptr inbounds float, float* %tmp11873, i64 1
+ %tmp11875 = getelementptr inbounds float, float* %tmp11874, i64 1
+ %tmp11876 = getelementptr inbounds float, float* %tmp11875, i64 1
+ %tmp11877 = getelementptr inbounds float, float* %tmp11876, i64 1
+ %tmp11878 = getelementptr inbounds float, float* %tmp11877, i64 1
+ %tmp11879 = getelementptr inbounds float, float* %tmp11878, i64 1
+ %tmp11880 = getelementptr inbounds float, float* %tmp11879, i64 1
+ %tmp11881 = getelementptr inbounds float, float* %tmp11880, i64 1
+ %tmp11882 = getelementptr inbounds float, float* %tmp11881, i64 1
+ %tmp11883 = getelementptr inbounds float, float* %tmp11882, i64 1
+ %tmp11884 = getelementptr inbounds float, float* %tmp11883, i64 1
+ %tmp11885 = getelementptr inbounds float, float* %tmp11884, i64 1
+ %tmp11886 = getelementptr inbounds float, float* %tmp11885, i64 1
+ %tmp11887 = getelementptr inbounds float, float* %tmp11886, i64 1
+ %tmp11888 = getelementptr inbounds float, float* %tmp11887, i64 1
+ %tmp11889 = getelementptr inbounds float, float* %tmp11888, i64 1
+ %tmp11890 = getelementptr inbounds float, float* %tmp11889, i64 1
+ %tmp11891 = getelementptr inbounds float, float* %tmp11890, i64 1
+ %tmp11892 = getelementptr inbounds float, float* %tmp11891, i64 1
+ %tmp11893 = getelementptr inbounds float, float* %tmp11892, i64 1
+ %tmp11894 = getelementptr inbounds float, float* %tmp11893, i64 1
+ %tmp11895 = getelementptr inbounds float, float* %tmp11894, i64 1
+ %tmp11896 = getelementptr inbounds float, float* %tmp11895, i64 1
+ %tmp11897 = getelementptr inbounds float, float* %tmp11896, i64 1
+ %tmp11898 = getelementptr inbounds float, float* %tmp11897, i64 1
+ %tmp11899 = getelementptr inbounds float, float* %tmp11898, i64 1
+ %tmp11900 = getelementptr inbounds float, float* %tmp11899, i64 1
+ %tmp11901 = getelementptr inbounds float, float* %tmp11900, i64 1
+ %tmp11902 = getelementptr inbounds float, float* %tmp11901, i64 1
+ %tmp11903 = getelementptr inbounds float, float* %tmp11902, i64 1
+ %tmp11904 = getelementptr inbounds float, float* %tmp11903, i64 1
+ %tmp11905 = getelementptr inbounds float, float* %tmp11904, i64 1
+ %tmp11906 = getelementptr inbounds float, float* %tmp11905, i64 1
+ %tmp11907 = getelementptr inbounds float, float* %tmp11906, i64 1
+ %tmp11908 = getelementptr inbounds float, float* %tmp11907, i64 1
+ %tmp11909 = getelementptr inbounds float, float* %tmp11908, i64 1
+ %tmp11910 = getelementptr inbounds float, float* %tmp11909, i64 1
+ %tmp11911 = getelementptr inbounds float, float* %tmp11910, i64 1
+ %tmp11912 = getelementptr inbounds float, float* %tmp11911, i64 1
+ %tmp11913 = getelementptr inbounds float, float* %tmp11912, i64 1
+ %tmp11914 = getelementptr inbounds float, float* %tmp11913, i64 1
+ %tmp11915 = getelementptr inbounds float, float* %tmp11914, i64 1
+ %tmp11916 = getelementptr inbounds float, float* %tmp11915, i64 1
+ %tmp11917 = getelementptr inbounds float, float* %tmp11916, i64 1
+ %tmp11918 = getelementptr inbounds float, float* %tmp11917, i64 1
+ %tmp11919 = getelementptr inbounds float, float* %tmp11918, i64 1
+ %tmp11920 = getelementptr inbounds float, float* %tmp11919, i64 1
+ %tmp11921 = getelementptr inbounds float, float* %tmp11920, i64 1
+ %tmp11922 = getelementptr inbounds float, float* %tmp11921, i64 1
+ %tmp11923 = getelementptr inbounds float, float* %tmp11922, i64 1
+ %tmp11924 = getelementptr inbounds float, float* %tmp11923, i64 1
+ %tmp11925 = getelementptr inbounds float, float* %tmp11924, i64 1
+ %tmp11926 = getelementptr inbounds float, float* %tmp11925, i64 1
+ %tmp11927 = getelementptr inbounds float, float* %tmp11926, i64 1
+ %tmp11928 = getelementptr inbounds float, float* %tmp11927, i64 1
+ %tmp11929 = getelementptr inbounds float, float* %tmp11928, i64 1
+ %tmp11930 = getelementptr inbounds float, float* %tmp11929, i64 1
+ %tmp11931 = getelementptr inbounds float, float* %tmp11930, i64 1
+ %tmp11932 = getelementptr inbounds float, float* %tmp11931, i64 1
+ %tmp11933 = getelementptr inbounds float, float* %tmp11932, i64 1
+ %tmp11934 = getelementptr inbounds float, float* %tmp11933, i64 1
+ %tmp11935 = getelementptr inbounds float, float* %tmp11934, i64 1
+ %tmp11936 = getelementptr inbounds float, float* %tmp11935, i64 1
+ %tmp11937 = getelementptr inbounds float, float* %tmp11936, i64 1
+ %tmp11938 = getelementptr inbounds float, float* %tmp11937, i64 1
+ %tmp11939 = getelementptr inbounds float, float* %tmp11938, i64 1
+ %tmp11940 = getelementptr inbounds float, float* %tmp11939, i64 1
+ %tmp11941 = getelementptr inbounds float, float* %tmp11940, i64 1
+ %tmp11942 = getelementptr inbounds float, float* %tmp11941, i64 1
+ %tmp11943 = getelementptr inbounds float, float* %tmp11942, i64 1
+ %tmp11944 = getelementptr inbounds float, float* %tmp11943, i64 1
+ %tmp11945 = getelementptr inbounds float, float* %tmp11944, i64 1
+ %tmp11946 = getelementptr inbounds float, float* %tmp11945, i64 1
+ %tmp11947 = getelementptr inbounds float, float* %tmp11946, i64 1
+ %tmp11948 = getelementptr inbounds float, float* %tmp11947, i64 1
+ %tmp11949 = getelementptr inbounds float, float* %tmp11948, i64 1
+ %tmp11950 = getelementptr inbounds float, float* %tmp11949, i64 1
+ %tmp11951 = getelementptr inbounds float, float* %tmp11950, i64 1
+ %tmp11952 = getelementptr inbounds float, float* %tmp11951, i64 1
+ %tmp11953 = getelementptr inbounds float, float* %tmp11952, i64 1
+ %tmp11954 = getelementptr inbounds float, float* %tmp11953, i64 1
+ %tmp11955 = getelementptr inbounds float, float* %tmp11954, i64 1
+ %tmp11956 = getelementptr inbounds float, float* %tmp11955, i64 1
+ %tmp11957 = getelementptr inbounds float, float* %tmp11956, i64 1
+ %tmp11958 = getelementptr inbounds float, float* %tmp11957, i64 1
+ %tmp11959 = getelementptr inbounds float, float* %tmp11958, i64 1
+ %tmp11960 = getelementptr inbounds float, float* %tmp11959, i64 1
+ %tmp11961 = getelementptr inbounds float, float* %tmp11960, i64 1
+ %tmp11962 = getelementptr inbounds float, float* %tmp11961, i64 1
+ %tmp11963 = getelementptr inbounds float, float* %tmp11962, i64 1
+ %tmp11964 = getelementptr inbounds float, float* %tmp11963, i64 1
+ %tmp11965 = getelementptr inbounds float, float* %tmp11964, i64 1
+ %tmp11966 = getelementptr inbounds float, float* %tmp11965, i64 1
+ %tmp11967 = getelementptr inbounds float, float* %tmp11966, i64 1
+ %tmp11968 = getelementptr inbounds float, float* %tmp11967, i64 1
+ %tmp11969 = getelementptr inbounds float, float* %tmp11968, i64 1
+ %tmp11970 = getelementptr inbounds float, float* %tmp11969, i64 1
+ %tmp11971 = getelementptr inbounds float, float* %tmp11970, i64 1
+ %tmp11972 = getelementptr inbounds float, float* %tmp11971, i64 1
+ %tmp11973 = getelementptr inbounds float, float* %tmp11972, i64 1
+ %tmp11974 = getelementptr inbounds float, float* %tmp11973, i64 1
+ %tmp11975 = getelementptr inbounds float, float* %tmp11974, i64 1
+ %tmp11976 = getelementptr inbounds float, float* %tmp11975, i64 1
+ %tmp11977 = getelementptr inbounds float, float* %tmp11976, i64 1
+ %tmp11978 = getelementptr inbounds float, float* %tmp11977, i64 1
+ %tmp11979 = getelementptr inbounds float, float* %tmp11978, i64 1
+ %tmp11980 = getelementptr inbounds float, float* %tmp11979, i64 1
+ %tmp11981 = getelementptr inbounds float, float* %tmp11980, i64 1
+ %tmp11982 = getelementptr inbounds float, float* %tmp11981, i64 1
+ %tmp11983 = getelementptr inbounds float, float* %tmp11982, i64 1
+ %tmp11984 = getelementptr inbounds float, float* %tmp11983, i64 1
+ %tmp11985 = getelementptr inbounds float, float* %tmp11984, i64 1
+ %tmp11986 = getelementptr inbounds float, float* %tmp11985, i64 1
+ %tmp11987 = getelementptr inbounds float, float* %tmp11986, i64 1
+ %tmp11988 = getelementptr inbounds float, float* %tmp11987, i64 1
+ %tmp11989 = getelementptr inbounds float, float* %tmp11988, i64 1
+ %tmp11990 = getelementptr inbounds float, float* %tmp11989, i64 1
+ %tmp11991 = getelementptr inbounds float, float* %tmp11990, i64 1
+ %tmp11992 = getelementptr inbounds float, float* %tmp11991, i64 1
+ %tmp11993 = getelementptr inbounds float, float* %tmp11992, i64 1
+ %tmp11994 = getelementptr inbounds float, float* %tmp11993, i64 1
+ %tmp11995 = getelementptr inbounds float, float* %tmp11994, i64 1
+ %tmp11996 = getelementptr inbounds float, float* %tmp11995, i64 1
+ %tmp11997 = getelementptr inbounds float, float* %tmp11996, i64 1
+ %tmp11998 = getelementptr inbounds float, float* %tmp11997, i64 1
+ %tmp11999 = getelementptr inbounds float, float* %tmp11998, i64 1
+ %tmp12000 = getelementptr inbounds float, float* %tmp11999, i64 1
+ %tmp12001 = getelementptr inbounds float, float* %tmp12000, i64 1
+ %tmp12002 = getelementptr inbounds float, float* %tmp12001, i64 1
+ %tmp12003 = getelementptr inbounds float, float* %tmp12002, i64 1
+ %tmp12004 = getelementptr inbounds float, float* %tmp12003, i64 1
+ %tmp12005 = getelementptr inbounds float, float* %tmp12004, i64 1
+ %tmp12006 = getelementptr inbounds float, float* %tmp12005, i64 1
+ %tmp12007 = getelementptr inbounds float, float* %tmp12006, i64 1
+ %tmp12008 = getelementptr inbounds float, float* %tmp12007, i64 1
+ %tmp12009 = getelementptr inbounds float, float* %tmp12008, i64 1
+ %tmp12010 = getelementptr inbounds float, float* %tmp12009, i64 1
+ %tmp12011 = getelementptr inbounds float, float* %tmp12010, i64 1
+ %tmp12012 = getelementptr inbounds float, float* %tmp12011, i64 1
+ %tmp12013 = getelementptr inbounds float, float* %tmp12012, i64 1
+ %tmp12014 = getelementptr inbounds float, float* %tmp12013, i64 1
+ %tmp12015 = getelementptr inbounds float, float* %tmp12014, i64 1
+ %tmp12016 = getelementptr inbounds float, float* %tmp12015, i64 1
+ %tmp12017 = getelementptr inbounds float, float* %tmp12016, i64 1
+ %tmp12018 = getelementptr inbounds float, float* %tmp12017, i64 1
+ %tmp12019 = getelementptr inbounds float, float* %tmp12018, i64 1
+ %tmp12020 = getelementptr inbounds float, float* %tmp12019, i64 1
+ %tmp12021 = getelementptr inbounds float, float* %tmp12020, i64 1
+ %tmp12022 = getelementptr inbounds float, float* %tmp12021, i64 1
+ %tmp12023 = getelementptr inbounds float, float* %tmp12022, i64 1
+ %tmp12024 = getelementptr inbounds float, float* %tmp12023, i64 1
+ %tmp12025 = getelementptr inbounds float, float* %tmp12024, i64 1
+ %tmp12026 = getelementptr inbounds float, float* %tmp12025, i64 1
+ %tmp12027 = getelementptr inbounds float, float* %tmp12026, i64 1
+ %tmp12028 = getelementptr inbounds float, float* %tmp12027, i64 1
+ %tmp12029 = getelementptr inbounds float, float* %tmp12028, i64 1
+ %tmp12030 = getelementptr inbounds float, float* %tmp12029, i64 1
+ %tmp12031 = getelementptr inbounds float, float* %tmp12030, i64 1
+ %tmp12032 = getelementptr inbounds float, float* %tmp12031, i64 1
+ %tmp12033 = getelementptr inbounds float, float* %tmp12032, i64 1
+ %tmp12034 = getelementptr inbounds float, float* %tmp12033, i64 1
+ %tmp12035 = getelementptr inbounds float, float* %tmp12034, i64 1
+ %tmp12036 = getelementptr inbounds float, float* %tmp12035, i64 1
+ %tmp12037 = getelementptr inbounds float, float* %tmp12036, i64 1
+ %tmp12038 = getelementptr inbounds float, float* %tmp12037, i64 1
+ %tmp12039 = getelementptr inbounds float, float* %tmp12038, i64 1
+ %tmp12040 = getelementptr inbounds float, float* %tmp12039, i64 1
+ %tmp12041 = getelementptr inbounds float, float* %tmp12040, i64 1
+ %tmp12042 = getelementptr inbounds float, float* %tmp12041, i64 1
+ %tmp12043 = getelementptr inbounds float, float* %tmp12042, i64 1
+ %tmp12044 = getelementptr inbounds float, float* %tmp12043, i64 1
+ %tmp12045 = getelementptr inbounds float, float* %tmp12044, i64 1
+ %tmp12046 = getelementptr inbounds float, float* %tmp12045, i64 1
+ %tmp12047 = getelementptr inbounds float, float* %tmp12046, i64 1
+ %tmp12048 = getelementptr inbounds float, float* %tmp12047, i64 1
+ %tmp12049 = getelementptr inbounds float, float* %tmp12048, i64 1
+ %tmp12050 = getelementptr inbounds float, float* %tmp12049, i64 1
+ %tmp12051 = getelementptr inbounds float, float* %tmp12050, i64 1
+ %tmp12052 = getelementptr inbounds float, float* %tmp12051, i64 1
+ %tmp12053 = getelementptr inbounds float, float* %tmp12052, i64 1
+ %tmp12054 = getelementptr inbounds float, float* %tmp12053, i64 1
+ %tmp12055 = getelementptr inbounds float, float* %tmp12054, i64 1
+ %tmp12056 = getelementptr inbounds float, float* %tmp12055, i64 1
+ %tmp12057 = getelementptr inbounds float, float* %tmp12056, i64 1
+ %tmp12058 = getelementptr inbounds float, float* %tmp12057, i64 1
+ %tmp12059 = getelementptr inbounds float, float* %tmp12058, i64 1
+ %tmp12060 = getelementptr inbounds float, float* %tmp12059, i64 1
+ %tmp12061 = getelementptr inbounds float, float* %tmp12060, i64 1
+ %tmp12062 = getelementptr inbounds float, float* %tmp12061, i64 1
+ %tmp12063 = getelementptr inbounds float, float* %tmp12062, i64 1
+ %tmp12064 = getelementptr inbounds float, float* %tmp12063, i64 1
+ %tmp12065 = getelementptr inbounds float, float* %tmp12064, i64 1
+ %tmp12066 = getelementptr inbounds float, float* %tmp12065, i64 1
+ %tmp12067 = getelementptr inbounds float, float* %tmp12066, i64 1
+ %tmp12068 = getelementptr inbounds float, float* %tmp12067, i64 1
+ %tmp12069 = getelementptr inbounds float, float* %tmp12068, i64 1
+ %tmp12070 = getelementptr inbounds float, float* %tmp12069, i64 1
+ %tmp12071 = getelementptr inbounds float, float* %tmp12070, i64 1
+ %tmp12072 = getelementptr inbounds float, float* %tmp12071, i64 1
+ %tmp12073 = getelementptr inbounds float, float* %tmp12072, i64 1
+ %tmp12074 = getelementptr inbounds float, float* %tmp12073, i64 1
+ %tmp12075 = getelementptr inbounds float, float* %tmp12074, i64 1
+ %tmp12076 = getelementptr inbounds float, float* %tmp12075, i64 1
+ %tmp12077 = getelementptr inbounds float, float* %tmp12076, i64 1
+ %tmp12078 = getelementptr inbounds float, float* %tmp12077, i64 1
+ %tmp12079 = getelementptr inbounds float, float* %tmp12078, i64 1
+ %tmp12080 = getelementptr inbounds float, float* %tmp12079, i64 1
+ %tmp12081 = getelementptr inbounds float, float* %tmp12080, i64 1
+ %tmp12082 = getelementptr inbounds float, float* %tmp12081, i64 1
+ %tmp12083 = getelementptr inbounds float, float* %tmp12082, i64 1
+ %tmp12084 = getelementptr inbounds float, float* %tmp12083, i64 1
+ %tmp12085 = getelementptr inbounds float, float* %tmp12084, i64 1
+ %tmp12086 = getelementptr inbounds float, float* %tmp12085, i64 1
+ %tmp12087 = getelementptr inbounds float, float* %tmp12086, i64 1
+ %tmp12088 = getelementptr inbounds float, float* %tmp12087, i64 1
+ %tmp12089 = getelementptr inbounds float, float* %tmp12088, i64 1
+ %tmp12090 = getelementptr inbounds float, float* %tmp12089, i64 1
+ %tmp12091 = getelementptr inbounds float, float* %tmp12090, i64 1
+ %tmp12092 = getelementptr inbounds float, float* %tmp12091, i64 1
+ %tmp12093 = getelementptr inbounds float, float* %tmp12092, i64 1
+ %tmp12094 = getelementptr inbounds float, float* %tmp12093, i64 1
+ %tmp12095 = getelementptr inbounds float, float* %tmp12094, i64 1
+ %tmp12096 = getelementptr inbounds float, float* %tmp12095, i64 1
+ %tmp12097 = getelementptr inbounds float, float* %tmp12096, i64 1
+ %tmp12098 = getelementptr inbounds float, float* %tmp12097, i64 1
+ %tmp12099 = getelementptr inbounds float, float* %tmp12098, i64 1
+ %tmp12100 = getelementptr inbounds float, float* %tmp12099, i64 1
+ %tmp12101 = getelementptr inbounds float, float* %tmp12100, i64 1
+ %tmp12102 = getelementptr inbounds float, float* %tmp12101, i64 1
+ %tmp12103 = getelementptr inbounds float, float* %tmp12102, i64 1
+ %tmp12104 = getelementptr inbounds float, float* %tmp12103, i64 1
+ %tmp12105 = getelementptr inbounds float, float* %tmp12104, i64 1
+ %tmp12106 = getelementptr inbounds float, float* %tmp12105, i64 1
+ %tmp12107 = getelementptr inbounds float, float* %tmp12106, i64 1
+ %tmp12108 = getelementptr inbounds float, float* %tmp12107, i64 1
+ %tmp12109 = getelementptr inbounds float, float* %tmp12108, i64 1
+ %tmp12110 = getelementptr inbounds float, float* %tmp12109, i64 1
+ %tmp12111 = getelementptr inbounds float, float* %tmp12110, i64 1
+ %tmp12112 = getelementptr inbounds float, float* %tmp12111, i64 1
+ %tmp12113 = getelementptr inbounds float, float* %tmp12112, i64 1
+ %tmp12114 = getelementptr inbounds float, float* %tmp12113, i64 1
+ %tmp12115 = getelementptr inbounds float, float* %tmp12114, i64 1
+ %tmp12116 = getelementptr inbounds float, float* %tmp12115, i64 1
+ %tmp12117 = getelementptr inbounds float, float* %tmp12116, i64 1
+ %tmp12118 = getelementptr inbounds float, float* %tmp12117, i64 1
+ %tmp12119 = getelementptr inbounds float, float* %tmp12118, i64 1
+ %tmp12120 = getelementptr inbounds float, float* %tmp12119, i64 1
+ %tmp12121 = getelementptr inbounds float, float* %tmp12120, i64 1
+ %tmp12122 = getelementptr inbounds float, float* %tmp12121, i64 1
+ %tmp12123 = getelementptr inbounds float, float* %tmp12122, i64 1
+ %tmp12124 = getelementptr inbounds float, float* %tmp12123, i64 1
+ %tmp12125 = getelementptr inbounds float, float* %tmp12124, i64 1
+ %tmp12126 = getelementptr inbounds float, float* %tmp12125, i64 1
+ %tmp12127 = getelementptr inbounds float, float* %tmp12126, i64 1
+ %tmp12128 = getelementptr inbounds float, float* %tmp12127, i64 1
+ %tmp12129 = getelementptr inbounds float, float* %tmp12128, i64 1
+ %tmp12130 = getelementptr inbounds float, float* %tmp12129, i64 1
+ %tmp12131 = getelementptr inbounds float, float* %tmp12130, i64 1
+ %tmp12132 = getelementptr inbounds float, float* %tmp12131, i64 1
+ %tmp12133 = getelementptr inbounds float, float* %tmp12132, i64 1
+ %tmp12134 = getelementptr inbounds float, float* %tmp12133, i64 1
+ %tmp12135 = getelementptr inbounds float, float* %tmp12134, i64 1
+ %tmp12136 = getelementptr inbounds float, float* %tmp12135, i64 1
+ %tmp12137 = getelementptr inbounds float, float* %tmp12136, i64 1
+ %tmp12138 = getelementptr inbounds float, float* %tmp12137, i64 1
+ %tmp12139 = getelementptr inbounds float, float* %tmp12138, i64 1
+ %tmp12140 = getelementptr inbounds float, float* %tmp12139, i64 1
+ %tmp12141 = getelementptr inbounds float, float* %tmp12140, i64 1
+ %tmp12142 = getelementptr inbounds float, float* %tmp12141, i64 1
+ %tmp12143 = getelementptr inbounds float, float* %tmp12142, i64 1
+ %tmp12144 = getelementptr inbounds float, float* %tmp12143, i64 1
+ %tmp12145 = getelementptr inbounds float, float* %tmp12144, i64 1
+ %tmp12146 = getelementptr inbounds float, float* %tmp12145, i64 1
+ %tmp12147 = getelementptr inbounds float, float* %tmp12146, i64 1
+ %tmp12148 = getelementptr inbounds float, float* %tmp12147, i64 1
+ %tmp12149 = getelementptr inbounds float, float* %tmp12148, i64 1
+ %tmp12150 = getelementptr inbounds float, float* %tmp12149, i64 1
+ %tmp12151 = getelementptr inbounds float, float* %tmp12150, i64 1
+ %tmp12152 = getelementptr inbounds float, float* %tmp12151, i64 1
+ %tmp12153 = getelementptr inbounds float, float* %tmp12152, i64 1
+ %tmp12154 = getelementptr inbounds float, float* %tmp12153, i64 1
+ %tmp12155 = getelementptr inbounds float, float* %tmp12154, i64 1
+ %tmp12156 = getelementptr inbounds float, float* %tmp12155, i64 1
+ %tmp12157 = getelementptr inbounds float, float* %tmp12156, i64 1
+ %tmp12158 = getelementptr inbounds float, float* %tmp12157, i64 1
+ %tmp12159 = getelementptr inbounds float, float* %tmp12158, i64 1
+ %tmp12160 = getelementptr inbounds float, float* %tmp12159, i64 1
+ %tmp12161 = getelementptr inbounds float, float* %tmp12160, i64 1
+ %tmp12162 = getelementptr inbounds float, float* %tmp12161, i64 1
+ %tmp12163 = getelementptr inbounds float, float* %tmp12162, i64 1
+ %tmp12164 = getelementptr inbounds float, float* %tmp12163, i64 1
+ %tmp12165 = getelementptr inbounds float, float* %tmp12164, i64 1
+ %tmp12166 = getelementptr inbounds float, float* %tmp12165, i64 1
+ %tmp12167 = getelementptr inbounds float, float* %tmp12166, i64 1
+ %tmp12168 = getelementptr inbounds float, float* %tmp12167, i64 1
+ %tmp12169 = getelementptr inbounds float, float* %tmp12168, i64 1
+ %tmp12170 = getelementptr inbounds float, float* %tmp12169, i64 1
+ %tmp12171 = getelementptr inbounds float, float* %tmp12170, i64 1
+ %tmp12172 = getelementptr inbounds float, float* %tmp12171, i64 1
+ %tmp12173 = getelementptr inbounds float, float* %tmp12172, i64 1
+ %tmp12174 = getelementptr inbounds float, float* %tmp12173, i64 1
+ %tmp12175 = getelementptr inbounds float, float* %tmp12174, i64 1
+ %tmp12176 = getelementptr inbounds float, float* %tmp12175, i64 1
+ %tmp12177 = getelementptr inbounds float, float* %tmp12176, i64 1
+ %tmp12178 = getelementptr inbounds float, float* %tmp12177, i64 1
+ %tmp12179 = getelementptr inbounds float, float* %tmp12178, i64 1
+ %tmp12180 = getelementptr inbounds float, float* %tmp12179, i64 1
+ %tmp12181 = getelementptr inbounds float, float* %tmp12180, i64 1
+ %tmp12182 = getelementptr inbounds float, float* %tmp12181, i64 1
+ %tmp12183 = getelementptr inbounds float, float* %tmp12182, i64 1
+ %tmp12184 = getelementptr inbounds float, float* %tmp12183, i64 1
+ %tmp12185 = getelementptr inbounds float, float* %tmp12184, i64 1
+ %tmp12186 = getelementptr inbounds float, float* %tmp12185, i64 1
+ %tmp12187 = getelementptr inbounds float, float* %tmp12186, i64 1
+ %tmp12188 = getelementptr inbounds float, float* %tmp12187, i64 1
+ %tmp12189 = getelementptr inbounds float, float* %tmp12188, i64 1
+ %tmp12190 = getelementptr inbounds float, float* %tmp12189, i64 1
+ %tmp12191 = getelementptr inbounds float, float* %tmp12190, i64 1
+ %tmp12192 = getelementptr inbounds float, float* %tmp12191, i64 1
+ %tmp12193 = getelementptr inbounds float, float* %tmp12192, i64 1
+ %tmp12194 = getelementptr inbounds float, float* %tmp12193, i64 1
+ %tmp12195 = getelementptr inbounds float, float* %tmp12194, i64 1
+ %tmp12196 = getelementptr inbounds float, float* %tmp12195, i64 1
+ %tmp12197 = getelementptr inbounds float, float* %tmp12196, i64 1
+ %tmp12198 = getelementptr inbounds float, float* %tmp12197, i64 1
+ %tmp12199 = getelementptr inbounds float, float* %tmp12198, i64 1
+ %tmp12200 = getelementptr inbounds float, float* %tmp12199, i64 1
+ %tmp12201 = getelementptr inbounds float, float* %tmp12200, i64 1
+ %tmp12202 = getelementptr inbounds float, float* %tmp12201, i64 1
+ %tmp12203 = getelementptr inbounds float, float* %tmp12202, i64 1
+ %tmp12204 = getelementptr inbounds float, float* %tmp12203, i64 1
+ %tmp12205 = getelementptr inbounds float, float* %tmp12204, i64 1
+ %tmp12206 = getelementptr inbounds float, float* %tmp12205, i64 1
+ %tmp12207 = getelementptr inbounds float, float* %tmp12206, i64 1
+ %tmp12208 = getelementptr inbounds float, float* %tmp12207, i64 1
+ %tmp12209 = getelementptr inbounds float, float* %tmp12208, i64 1
+ %tmp12210 = getelementptr inbounds float, float* %tmp12209, i64 1
+ %tmp12211 = getelementptr inbounds float, float* %tmp12210, i64 1
+ %tmp12212 = getelementptr inbounds float, float* %tmp12211, i64 1
+ %tmp12213 = getelementptr inbounds float, float* %tmp12212, i64 1
+ %tmp12214 = getelementptr inbounds float, float* %tmp12213, i64 1
+ %tmp12215 = getelementptr inbounds float, float* %tmp12214, i64 1
+ %tmp12216 = getelementptr inbounds float, float* %tmp12215, i64 1
+ %tmp12217 = getelementptr inbounds float, float* %tmp12216, i64 1
+ %tmp12218 = getelementptr inbounds float, float* %tmp12217, i64 1
+ %tmp12219 = getelementptr inbounds float, float* %tmp12218, i64 1
+ %tmp12220 = getelementptr inbounds float, float* %tmp12219, i64 1
+ %tmp12221 = getelementptr inbounds float, float* %tmp12220, i64 1
+ %tmp12222 = getelementptr inbounds float, float* %tmp12221, i64 1
+ %tmp12223 = getelementptr inbounds float, float* %tmp12222, i64 1
+ %tmp12224 = getelementptr inbounds float, float* %tmp12223, i64 1
+ %tmp12225 = getelementptr inbounds float, float* %tmp12224, i64 1
+ %tmp12226 = getelementptr inbounds float, float* %tmp12225, i64 1
+ %tmp12227 = getelementptr inbounds float, float* %tmp12226, i64 1
+ %tmp12228 = getelementptr inbounds float, float* %tmp12227, i64 1
+ %tmp12229 = getelementptr inbounds float, float* %tmp12228, i64 1
+ %tmp12230 = getelementptr inbounds float, float* %tmp12229, i64 1
+ %tmp12231 = getelementptr inbounds float, float* %tmp12230, i64 1
+ %tmp12232 = getelementptr inbounds float, float* %tmp12231, i64 1
+ %tmp12233 = getelementptr inbounds float, float* %tmp12232, i64 1
+ %tmp12234 = getelementptr inbounds float, float* %tmp12233, i64 1
+ %tmp12235 = getelementptr inbounds float, float* %tmp12234, i64 1
+ %tmp12236 = getelementptr inbounds float, float* %tmp12235, i64 1
+ %tmp12237 = getelementptr inbounds float, float* %tmp12236, i64 1
+ %tmp12238 = getelementptr inbounds float, float* %tmp12237, i64 1
+ %tmp12239 = getelementptr inbounds float, float* %tmp12238, i64 1
+ %tmp12240 = getelementptr inbounds float, float* %tmp12239, i64 1
+ %tmp12241 = getelementptr inbounds float, float* %tmp12240, i64 1
+ %tmp12242 = getelementptr inbounds float, float* %tmp12241, i64 1
+ %tmp12243 = getelementptr inbounds float, float* %tmp12242, i64 1
+ %tmp12244 = getelementptr inbounds float, float* %tmp12243, i64 1
+ %tmp12245 = getelementptr inbounds float, float* %tmp12244, i64 1
+ %tmp12246 = getelementptr inbounds float, float* %tmp12245, i64 1
+ %tmp12247 = getelementptr inbounds float, float* %tmp12246, i64 1
+ %tmp12248 = getelementptr inbounds float, float* %tmp12247, i64 1
+ %tmp12249 = getelementptr inbounds float, float* %tmp12248, i64 1
+ %tmp12250 = getelementptr inbounds float, float* %tmp12249, i64 1
+ %tmp12251 = getelementptr inbounds float, float* %tmp12250, i64 1
+ %tmp12252 = getelementptr inbounds float, float* %tmp12251, i64 1
+ %tmp12253 = getelementptr inbounds float, float* %tmp12252, i64 1
+ %tmp12254 = getelementptr inbounds float, float* %tmp12253, i64 1
+ %tmp12255 = getelementptr inbounds float, float* %tmp12254, i64 1
+ %tmp12256 = getelementptr inbounds float, float* %tmp12255, i64 1
+ %tmp12257 = getelementptr inbounds float, float* %tmp12256, i64 1
+ %tmp12258 = getelementptr inbounds float, float* %tmp12257, i64 1
+ %tmp12259 = getelementptr inbounds float, float* %tmp12258, i64 1
+ %tmp12260 = getelementptr inbounds float, float* %tmp12259, i64 1
+ %tmp12261 = getelementptr inbounds float, float* %tmp12260, i64 1
+ %tmp12262 = getelementptr inbounds float, float* %tmp12261, i64 1
+ %tmp12263 = getelementptr inbounds float, float* %tmp12262, i64 1
+ %tmp12264 = getelementptr inbounds float, float* %tmp12263, i64 1
+ %tmp12265 = getelementptr inbounds float, float* %tmp12264, i64 1
+ %tmp12266 = getelementptr inbounds float, float* %tmp12265, i64 1
+ %tmp12267 = getelementptr inbounds float, float* %tmp12266, i64 1
+ %tmp12268 = getelementptr inbounds float, float* %tmp12267, i64 1
+ %tmp12269 = getelementptr inbounds float, float* %tmp12268, i64 1
+ %tmp12270 = getelementptr inbounds float, float* %tmp12269, i64 1
+ %tmp12271 = getelementptr inbounds float, float* %tmp12270, i64 1
+ %tmp12272 = getelementptr inbounds float, float* %tmp12271, i64 1
+ %tmp12273 = getelementptr inbounds float, float* %tmp12272, i64 1
+ %tmp12274 = getelementptr inbounds float, float* %tmp12273, i64 1
+ %tmp12275 = getelementptr inbounds float, float* %tmp12274, i64 1
+ %tmp12276 = getelementptr inbounds float, float* %tmp12275, i64 1
+ %tmp12277 = getelementptr inbounds float, float* %tmp12276, i64 1
+ %tmp12278 = getelementptr inbounds float, float* %tmp12277, i64 1
+ %tmp12279 = getelementptr inbounds float, float* %tmp12278, i64 1
+ %tmp12280 = getelementptr inbounds float, float* %tmp12279, i64 1
+ %tmp12281 = getelementptr inbounds float, float* %tmp12280, i64 1
+ %tmp12282 = getelementptr inbounds float, float* %tmp12281, i64 1
+ %tmp12283 = getelementptr inbounds float, float* %tmp12282, i64 1
+ %tmp12284 = getelementptr inbounds float, float* %tmp12283, i64 1
+ %tmp12285 = getelementptr inbounds float, float* %tmp12284, i64 1
+ %tmp12286 = getelementptr inbounds float, float* %tmp12285, i64 1
+ %tmp12287 = getelementptr inbounds float, float* %tmp12286, i64 1
+ %tmp12288 = getelementptr inbounds float, float* %tmp12287, i64 1
+ %tmp12289 = getelementptr inbounds float, float* %tmp12288, i64 1
+ %tmp12290 = getelementptr inbounds float, float* %tmp12289, i64 1
+ %tmp12291 = getelementptr inbounds float, float* %tmp12290, i64 1
+ %tmp12292 = getelementptr inbounds float, float* %tmp12291, i64 1
+ %tmp12293 = getelementptr inbounds float, float* %tmp12292, i64 1
+ %tmp12294 = getelementptr inbounds float, float* %tmp12293, i64 1
+ %tmp12295 = getelementptr inbounds float, float* %tmp12294, i64 1
+ %tmp12296 = getelementptr inbounds float, float* %tmp12295, i64 1
+ %tmp12297 = getelementptr inbounds float, float* %tmp12296, i64 1
+ %tmp12298 = getelementptr inbounds float, float* %tmp12297, i64 1
+ %tmp12299 = getelementptr inbounds float, float* %tmp12298, i64 1
+ %tmp12300 = getelementptr inbounds float, float* %tmp12299, i64 1
+ %tmp12301 = getelementptr inbounds float, float* %tmp12300, i64 1
+ %tmp12302 = getelementptr inbounds float, float* %tmp12301, i64 1
+ %tmp12303 = getelementptr inbounds float, float* %tmp12302, i64 1
+ %tmp12304 = getelementptr inbounds float, float* %tmp12303, i64 1
+ %tmp12305 = getelementptr inbounds float, float* %tmp12304, i64 1
+ %tmp12306 = getelementptr inbounds float, float* %tmp12305, i64 1
+ %tmp12307 = getelementptr inbounds float, float* %tmp12306, i64 1
+ %tmp12308 = getelementptr inbounds float, float* %tmp12307, i64 1
+ %tmp12309 = getelementptr inbounds float, float* %tmp12308, i64 1
+ %tmp12310 = getelementptr inbounds float, float* %tmp12309, i64 1
+ %tmp12311 = getelementptr inbounds float, float* %tmp12310, i64 1
+ %tmp12312 = getelementptr inbounds float, float* %tmp12311, i64 1
+ %tmp12313 = getelementptr inbounds float, float* %tmp12312, i64 1
+ %tmp12314 = getelementptr inbounds float, float* %tmp12313, i64 1
+ %tmp12315 = getelementptr inbounds float, float* %tmp12314, i64 1
+ %tmp12316 = getelementptr inbounds float, float* %tmp12315, i64 1
+ %tmp12317 = getelementptr inbounds float, float* %tmp12316, i64 1
+ %tmp12318 = getelementptr inbounds float, float* %tmp12317, i64 1
+ %tmp12319 = getelementptr inbounds float, float* %tmp12318, i64 1
+ %tmp12320 = getelementptr inbounds float, float* %tmp12319, i64 1
+ %tmp12321 = getelementptr inbounds float, float* %tmp12320, i64 1
+ %tmp12322 = getelementptr inbounds float, float* %tmp12321, i64 1
+ %tmp12323 = getelementptr inbounds float, float* %tmp12322, i64 1
+ %tmp12324 = getelementptr inbounds float, float* %tmp12323, i64 1
+ %tmp12325 = getelementptr inbounds float, float* %tmp12324, i64 1
+ %tmp12326 = getelementptr inbounds float, float* %tmp12325, i64 1
+ %tmp12327 = getelementptr inbounds float, float* %tmp12326, i64 1
+ %tmp12328 = getelementptr inbounds float, float* %tmp12327, i64 1
+ %tmp12329 = getelementptr inbounds float, float* %tmp12328, i64 1
+ %tmp12330 = getelementptr inbounds float, float* %tmp12329, i64 1
+ %tmp12331 = getelementptr inbounds float, float* %tmp12330, i64 1
+ %tmp12332 = getelementptr inbounds float, float* %tmp12331, i64 1
+ %tmp12333 = getelementptr inbounds float, float* %tmp12332, i64 1
+ %tmp12334 = getelementptr inbounds float, float* %tmp12333, i64 1
+ %tmp12335 = getelementptr inbounds float, float* %tmp12334, i64 1
+ %tmp12336 = getelementptr inbounds float, float* %tmp12335, i64 1
+ %tmp12337 = getelementptr inbounds float, float* %tmp12336, i64 1
+ %tmp12338 = getelementptr inbounds float, float* %tmp12337, i64 1
+ %tmp12339 = getelementptr inbounds float, float* %tmp12338, i64 1
+ %tmp12340 = getelementptr inbounds float, float* %tmp12339, i64 1
+ %tmp12341 = getelementptr inbounds float, float* %tmp12340, i64 1
+ %tmp12342 = getelementptr inbounds float, float* %tmp12341, i64 1
+ %tmp12343 = getelementptr inbounds float, float* %tmp12342, i64 1
+ %tmp12344 = getelementptr inbounds float, float* %tmp12343, i64 1
+ %tmp12345 = getelementptr inbounds float, float* %tmp12344, i64 1
+ %tmp12346 = getelementptr inbounds float, float* %tmp12345, i64 1
+ %tmp12347 = getelementptr inbounds float, float* %tmp12346, i64 1
+ %tmp12348 = getelementptr inbounds float, float* %tmp12347, i64 1
+ %tmp12349 = getelementptr inbounds float, float* %tmp12348, i64 1
+ %tmp12350 = getelementptr inbounds float, float* %tmp12349, i64 1
+ %tmp12351 = getelementptr inbounds float, float* %tmp12350, i64 1
+ %tmp12352 = getelementptr inbounds float, float* %tmp12351, i64 1
+ %tmp12353 = getelementptr inbounds float, float* %tmp12352, i64 1
+ %tmp12354 = getelementptr inbounds float, float* %tmp12353, i64 1
+ %tmp12355 = getelementptr inbounds float, float* %tmp12354, i64 1
+ %tmp12356 = getelementptr inbounds float, float* %tmp12355, i64 1
+ %tmp12357 = getelementptr inbounds float, float* %tmp12356, i64 1
+ %tmp12358 = getelementptr inbounds float, float* %tmp12357, i64 1
+ %tmp12359 = getelementptr inbounds float, float* %tmp12358, i64 1
+ %tmp12360 = getelementptr inbounds float, float* %tmp12359, i64 1
+ %tmp12361 = getelementptr inbounds float, float* %tmp12360, i64 1
+ %tmp12362 = getelementptr inbounds float, float* %tmp12361, i64 1
+ %tmp12363 = getelementptr inbounds float, float* %tmp12362, i64 1
+ %tmp12364 = getelementptr inbounds float, float* %tmp12363, i64 1
+ %tmp12365 = getelementptr inbounds float, float* %tmp12364, i64 1
+ %tmp12366 = getelementptr inbounds float, float* %tmp12365, i64 1
+ %tmp12367 = getelementptr inbounds float, float* %tmp12366, i64 1
+ %tmp12368 = getelementptr inbounds float, float* %tmp12367, i64 1
+ %tmp12369 = getelementptr inbounds float, float* %tmp12368, i64 1
+ %tmp12370 = getelementptr inbounds float, float* %tmp12369, i64 1
+ %tmp12371 = getelementptr inbounds float, float* %tmp12370, i64 1
+ %tmp12372 = getelementptr inbounds float, float* %tmp12371, i64 1
+ %tmp12373 = getelementptr inbounds float, float* %tmp12372, i64 1
+ %tmp12374 = getelementptr inbounds float, float* %tmp12373, i64 1
+ %tmp12375 = getelementptr inbounds float, float* %tmp12374, i64 1
+ %tmp12376 = getelementptr inbounds float, float* %tmp12375, i64 1
+ %tmp12377 = getelementptr inbounds float, float* %tmp12376, i64 1
+ %tmp12378 = getelementptr inbounds float, float* %tmp12377, i64 1
+ %tmp12379 = getelementptr inbounds float, float* %tmp12378, i64 1
+ %tmp12380 = getelementptr inbounds float, float* %tmp12379, i64 1
+ %tmp12381 = getelementptr inbounds float, float* %tmp12380, i64 1
+ %tmp12382 = getelementptr inbounds float, float* %tmp12381, i64 1
+ %tmp12383 = getelementptr inbounds float, float* %tmp12382, i64 1
+ %tmp12384 = getelementptr inbounds float, float* %tmp12383, i64 1
+ %tmp12385 = getelementptr inbounds float, float* %tmp12384, i64 1
+ %tmp12386 = getelementptr inbounds float, float* %tmp12385, i64 1
+ %tmp12387 = getelementptr inbounds float, float* %tmp12386, i64 1
+ %tmp12388 = getelementptr inbounds float, float* %tmp12387, i64 1
+ %tmp12389 = getelementptr inbounds float, float* %tmp12388, i64 1
+ %tmp12390 = getelementptr inbounds float, float* %tmp12389, i64 1
+ %tmp12391 = getelementptr inbounds float, float* %tmp12390, i64 1
+ %tmp12392 = getelementptr inbounds float, float* %tmp12391, i64 1
+ %tmp12393 = getelementptr inbounds float, float* %tmp12392, i64 1
+ %tmp12394 = getelementptr inbounds float, float* %tmp12393, i64 1
+ %tmp12395 = getelementptr inbounds float, float* %tmp12394, i64 1
+ %tmp12396 = getelementptr inbounds float, float* %tmp12395, i64 1
+ %tmp12397 = getelementptr inbounds float, float* %tmp12396, i64 1
+ %tmp12398 = getelementptr inbounds float, float* %tmp12397, i64 1
+ %tmp12399 = getelementptr inbounds float, float* %tmp12398, i64 1
+ %tmp12400 = getelementptr inbounds float, float* %tmp12399, i64 1
+ %tmp12401 = getelementptr inbounds float, float* %tmp12400, i64 1
+ %tmp12402 = getelementptr inbounds float, float* %tmp12401, i64 1
+ %tmp12403 = getelementptr inbounds float, float* %tmp12402, i64 1
+ %tmp12404 = getelementptr inbounds float, float* %tmp12403, i64 1
+ %tmp12405 = getelementptr inbounds float, float* %tmp12404, i64 1
+ %tmp12406 = getelementptr inbounds float, float* %tmp12405, i64 1
+ %tmp12407 = getelementptr inbounds float, float* %tmp12406, i64 1
+ %tmp12408 = getelementptr inbounds float, float* %tmp12407, i64 1
+ %tmp12409 = getelementptr inbounds float, float* %tmp12408, i64 1
+ %tmp12410 = getelementptr inbounds float, float* %tmp12409, i64 1
+ %tmp12411 = getelementptr inbounds float, float* %tmp12410, i64 1
+ %tmp12412 = getelementptr inbounds float, float* %tmp12411, i64 1
+ %tmp12413 = getelementptr inbounds float, float* %tmp12412, i64 1
+ %tmp12414 = getelementptr inbounds float, float* %tmp12413, i64 1
+ %tmp12415 = getelementptr inbounds float, float* %tmp12414, i64 1
+ %tmp12416 = getelementptr inbounds float, float* %tmp12415, i64 1
+ %tmp12417 = getelementptr inbounds float, float* %tmp12416, i64 1
+ %tmp12418 = getelementptr inbounds float, float* %tmp12417, i64 1
+ %tmp12419 = getelementptr inbounds float, float* %tmp12418, i64 1
+ %tmp12420 = getelementptr inbounds float, float* %tmp12419, i64 1
+ %tmp12421 = getelementptr inbounds float, float* %tmp12420, i64 1
+ %tmp12422 = getelementptr inbounds float, float* %tmp12421, i64 1
+ %tmp12423 = getelementptr inbounds float, float* %tmp12422, i64 1
+ %tmp12424 = getelementptr inbounds float, float* %tmp12423, i64 1
+ %tmp12425 = getelementptr inbounds float, float* %tmp12424, i64 1
+ %tmp12426 = getelementptr inbounds float, float* %tmp12425, i64 1
+ %tmp12427 = getelementptr inbounds float, float* %tmp12426, i64 1
+ %tmp12428 = getelementptr inbounds float, float* %tmp12427, i64 1
+ %tmp12429 = getelementptr inbounds float, float* %tmp12428, i64 1
+ %tmp12430 = getelementptr inbounds float, float* %tmp12429, i64 1
+ %tmp12431 = getelementptr inbounds float, float* %tmp12430, i64 1
+ %tmp12432 = getelementptr inbounds float, float* %tmp12431, i64 1
+ %tmp12433 = getelementptr inbounds float, float* %tmp12432, i64 1
+ %tmp12434 = getelementptr inbounds float, float* %tmp12433, i64 1
+ %tmp12435 = getelementptr inbounds float, float* %tmp12434, i64 1
+ %tmp12436 = getelementptr inbounds float, float* %tmp12435, i64 1
+ %tmp12437 = getelementptr inbounds float, float* %tmp12436, i64 1
+ %tmp12438 = getelementptr inbounds float, float* %tmp12437, i64 1
+ %tmp12439 = getelementptr inbounds float, float* %tmp12438, i64 1
+ %tmp12440 = getelementptr inbounds float, float* %tmp12439, i64 1
+ %tmp12441 = getelementptr inbounds float, float* %tmp12440, i64 1
+ %tmp12442 = getelementptr inbounds float, float* %tmp12441, i64 1
+ %tmp12443 = getelementptr inbounds float, float* %tmp12442, i64 1
+ %tmp12444 = getelementptr inbounds float, float* %tmp12443, i64 1
+ %tmp12445 = getelementptr inbounds float, float* %tmp12444, i64 1
+ %tmp12446 = getelementptr inbounds float, float* %tmp12445, i64 1
+ %tmp12447 = getelementptr inbounds float, float* %tmp12446, i64 1
+ %tmp12448 = getelementptr inbounds float, float* %tmp12447, i64 1
+ %tmp12449 = getelementptr inbounds float, float* %tmp12448, i64 1
+ %tmp12450 = getelementptr inbounds float, float* %tmp12449, i64 1
+ %tmp12451 = getelementptr inbounds float, float* %tmp12450, i64 1
+ %tmp12452 = getelementptr inbounds float, float* %tmp12451, i64 1
+ %tmp12453 = getelementptr inbounds float, float* %tmp12452, i64 1
+ %tmp12454 = getelementptr inbounds float, float* %tmp12453, i64 1
+ %tmp12455 = getelementptr inbounds float, float* %tmp12454, i64 1
+ %tmp12456 = getelementptr inbounds float, float* %tmp12455, i64 1
+ %tmp12457 = getelementptr inbounds float, float* %tmp12456, i64 1
+ %tmp12458 = getelementptr inbounds float, float* %tmp12457, i64 1
+ %tmp12459 = getelementptr inbounds float, float* %tmp12458, i64 1
+ %tmp12460 = getelementptr inbounds float, float* %tmp12459, i64 1
+ %tmp12461 = getelementptr inbounds float, float* %tmp12460, i64 1
+ %tmp12462 = getelementptr inbounds float, float* %tmp12461, i64 1
+ %tmp12463 = getelementptr inbounds float, float* %tmp12462, i64 1
+ %tmp12464 = getelementptr inbounds float, float* %tmp12463, i64 1
+ %tmp12465 = getelementptr inbounds float, float* %tmp12464, i64 1
+ %tmp12466 = getelementptr inbounds float, float* %tmp12465, i64 1
+ %tmp12467 = getelementptr inbounds float, float* %tmp12466, i64 1
+ %tmp12468 = getelementptr inbounds float, float* %tmp12467, i64 1
+ %tmp12469 = getelementptr inbounds float, float* %tmp12468, i64 1
+ %tmp12470 = getelementptr inbounds float, float* %tmp12469, i64 1
+ %tmp12471 = getelementptr inbounds float, float* %tmp12470, i64 1
+ %tmp12472 = getelementptr inbounds float, float* %tmp12471, i64 1
+ %tmp12473 = getelementptr inbounds float, float* %tmp12472, i64 1
+ %tmp12474 = getelementptr inbounds float, float* %tmp12473, i64 1
+ %tmp12475 = getelementptr inbounds float, float* %tmp12474, i64 1
+ %tmp12476 = getelementptr inbounds float, float* %tmp12475, i64 1
+ %tmp12477 = getelementptr inbounds float, float* %tmp12476, i64 1
+ %tmp12478 = getelementptr inbounds float, float* %tmp12477, i64 1
+ %tmp12479 = getelementptr inbounds float, float* %tmp12478, i64 1
+ %tmp12480 = getelementptr inbounds float, float* %tmp12479, i64 1
+ %tmp12481 = getelementptr inbounds float, float* %tmp12480, i64 1
+ %tmp12482 = getelementptr inbounds float, float* %tmp12481, i64 1
+ %tmp12483 = getelementptr inbounds float, float* %tmp12482, i64 1
+ %tmp12484 = getelementptr inbounds float, float* %tmp12483, i64 1
+ %tmp12485 = getelementptr inbounds float, float* %tmp12484, i64 1
+ %tmp12486 = getelementptr inbounds float, float* %tmp12485, i64 1
+ %tmp12487 = getelementptr inbounds float, float* %tmp12486, i64 1
+ %tmp12488 = getelementptr inbounds float, float* %tmp12487, i64 1
+ %tmp12489 = getelementptr inbounds float, float* %tmp12488, i64 1
+ %tmp12490 = getelementptr inbounds float, float* %tmp12489, i64 1
+ %tmp12491 = getelementptr inbounds float, float* %tmp12490, i64 1
+ %tmp12492 = getelementptr inbounds float, float* %tmp12491, i64 1
+ %tmp12493 = getelementptr inbounds float, float* %tmp12492, i64 1
+ %tmp12494 = getelementptr inbounds float, float* %tmp12493, i64 1
+ %tmp12495 = getelementptr inbounds float, float* %tmp12494, i64 1
+ %tmp12496 = getelementptr inbounds float, float* %tmp12495, i64 1
+ %tmp12497 = getelementptr inbounds float, float* %tmp12496, i64 1
+ %tmp12498 = getelementptr inbounds float, float* %tmp12497, i64 1
+ %tmp12499 = getelementptr inbounds float, float* %tmp12498, i64 1
+ %tmp12500 = getelementptr inbounds float, float* %tmp12499, i64 1
+ %tmp12501 = getelementptr inbounds float, float* %tmp12500, i64 1
+ %tmp12502 = getelementptr inbounds float, float* %tmp12501, i64 1
+ %tmp12503 = getelementptr inbounds float, float* %tmp12502, i64 1
+ %tmp12504 = getelementptr inbounds float, float* %tmp12503, i64 1
+ %tmp12505 = getelementptr inbounds float, float* %tmp12504, i64 1
+ %tmp12506 = getelementptr inbounds float, float* %tmp12505, i64 1
+ %tmp12507 = getelementptr inbounds float, float* %tmp12506, i64 1
+ %tmp12508 = getelementptr inbounds float, float* %tmp12507, i64 1
+ %tmp12509 = getelementptr inbounds float, float* %tmp12508, i64 1
+ %tmp12510 = getelementptr inbounds float, float* %tmp12509, i64 1
+ %tmp12511 = getelementptr inbounds float, float* %tmp12510, i64 1
+ %tmp12512 = getelementptr inbounds float, float* %tmp12511, i64 1
+ %tmp12513 = getelementptr inbounds float, float* %tmp12512, i64 1
+ %tmp12514 = getelementptr inbounds float, float* %tmp12513, i64 1
+ %tmp12515 = getelementptr inbounds float, float* %tmp12514, i64 1
+ %tmp12516 = getelementptr inbounds float, float* %tmp12515, i64 1
+ %tmp12517 = getelementptr inbounds float, float* %tmp12516, i64 1
+ %tmp12518 = getelementptr inbounds float, float* %tmp12517, i64 1
+ %tmp12519 = getelementptr inbounds float, float* %tmp12518, i64 1
+ %tmp12520 = getelementptr inbounds float, float* %tmp12519, i64 1
+ %tmp12521 = getelementptr inbounds float, float* %tmp12520, i64 1
+ %tmp12522 = getelementptr inbounds float, float* %tmp12521, i64 1
+ %tmp12523 = getelementptr inbounds float, float* %tmp12522, i64 1
+ %tmp12524 = getelementptr inbounds float, float* %tmp12523, i64 1
+ %tmp12525 = getelementptr inbounds float, float* %tmp12524, i64 1
+ %tmp12526 = getelementptr inbounds float, float* %tmp12525, i64 1
+ %tmp12527 = getelementptr inbounds float, float* %tmp12526, i64 1
+ %tmp12528 = getelementptr inbounds float, float* %tmp12527, i64 1
+ %tmp12529 = getelementptr inbounds float, float* %tmp12528, i64 1
+ %tmp12530 = getelementptr inbounds float, float* %tmp12529, i64 1
+ %tmp12531 = getelementptr inbounds float, float* %tmp12530, i64 1
+ %tmp12532 = getelementptr inbounds float, float* %tmp12531, i64 1
+ %tmp12533 = getelementptr inbounds float, float* %tmp12532, i64 1
+ %tmp12534 = getelementptr inbounds float, float* %tmp12533, i64 1
+ %tmp12535 = getelementptr inbounds float, float* %tmp12534, i64 1
+ %tmp12536 = getelementptr inbounds float, float* %tmp12535, i64 1
+ %tmp12537 = getelementptr inbounds float, float* %tmp12536, i64 1
+ %tmp12538 = getelementptr inbounds float, float* %tmp12537, i64 1
+ %tmp12539 = getelementptr inbounds float, float* %tmp12538, i64 1
+ %tmp12540 = getelementptr inbounds float, float* %tmp12539, i64 1
+ %tmp12541 = getelementptr inbounds float, float* %tmp12540, i64 1
+ %tmp12542 = getelementptr inbounds float, float* %tmp12541, i64 1
+ %tmp12543 = getelementptr inbounds float, float* %tmp12542, i64 1
+ %tmp12544 = getelementptr inbounds float, float* %tmp12543, i64 1
+ %tmp12545 = getelementptr inbounds float, float* %tmp12544, i64 1
+ %tmp12546 = getelementptr inbounds float, float* %tmp12545, i64 1
+ %tmp12547 = getelementptr inbounds float, float* %tmp12546, i64 1
+ %tmp12548 = getelementptr inbounds float, float* %tmp12547, i64 1
+ %tmp12549 = getelementptr inbounds float, float* %tmp12548, i64 1
+ %tmp12550 = getelementptr inbounds float, float* %tmp12549, i64 1
+ %tmp12551 = getelementptr inbounds float, float* %tmp12550, i64 1
+ %tmp12552 = getelementptr inbounds float, float* %tmp12551, i64 1
+ %tmp12553 = getelementptr inbounds float, float* %tmp12552, i64 1
+ %tmp12554 = getelementptr inbounds float, float* %tmp12553, i64 1
+ %tmp12555 = getelementptr inbounds float, float* %tmp12554, i64 1
+ %tmp12556 = getelementptr inbounds float, float* %tmp12555, i64 1
+ %tmp12557 = getelementptr inbounds float, float* %tmp12556, i64 1
+ %tmp12558 = getelementptr inbounds float, float* %tmp12557, i64 1
+ %tmp12559 = getelementptr inbounds float, float* %tmp12558, i64 1
+ %tmp12560 = getelementptr inbounds float, float* %tmp12559, i64 1
+ %tmp12561 = getelementptr inbounds float, float* %tmp12560, i64 1
+ %tmp12562 = getelementptr inbounds float, float* %tmp12561, i64 1
+ %tmp12563 = getelementptr inbounds float, float* %tmp12562, i64 1
+ %tmp12564 = getelementptr inbounds float, float* %tmp12563, i64 1
+ %tmp12565 = getelementptr inbounds float, float* %tmp12564, i64 1
+ %tmp12566 = getelementptr inbounds float, float* %tmp12565, i64 1
+ %tmp12567 = getelementptr inbounds float, float* %tmp12566, i64 1
+ %tmp12568 = getelementptr inbounds float, float* %tmp12567, i64 1
+ %tmp12569 = getelementptr inbounds float, float* %tmp12568, i64 1
+ %tmp12570 = getelementptr inbounds float, float* %tmp12569, i64 1
+ %tmp12571 = getelementptr inbounds float, float* %tmp12570, i64 1
+ %tmp12572 = getelementptr inbounds float, float* %tmp12571, i64 1
+ %tmp12573 = getelementptr inbounds float, float* %tmp12572, i64 1
+ %tmp12574 = getelementptr inbounds float, float* %tmp12573, i64 1
+ %tmp12575 = getelementptr inbounds float, float* %tmp12574, i64 1
+ %tmp12576 = getelementptr inbounds float, float* %tmp12575, i64 1
+ %tmp12577 = getelementptr inbounds float, float* %tmp12576, i64 1
+ %tmp12578 = getelementptr inbounds float, float* %tmp12577, i64 1
+ %tmp12579 = getelementptr inbounds float, float* %tmp12578, i64 1
+ %tmp12580 = getelementptr inbounds float, float* %tmp12579, i64 1
+ %tmp12581 = getelementptr inbounds float, float* %tmp12580, i64 1
+ %tmp12582 = getelementptr inbounds float, float* %tmp12581, i64 1
+ %tmp12583 = getelementptr inbounds float, float* %tmp12582, i64 1
+ %tmp12584 = getelementptr inbounds float, float* %tmp12583, i64 1
+ %tmp12585 = getelementptr inbounds float, float* %tmp12584, i64 1
+ %tmp12586 = getelementptr inbounds float, float* %tmp12585, i64 1
+ %tmp12587 = getelementptr inbounds float, float* %tmp12586, i64 1
+ %tmp12588 = getelementptr inbounds float, float* %tmp12587, i64 1
+ %tmp12589 = getelementptr inbounds float, float* %tmp12588, i64 1
+ %tmp12590 = getelementptr inbounds float, float* %tmp12589, i64 1
+ %tmp12591 = getelementptr inbounds float, float* %tmp12590, i64 1
+ %tmp12592 = getelementptr inbounds float, float* %tmp12591, i64 1
+ %tmp12593 = getelementptr inbounds float, float* %tmp12592, i64 1
+ %tmp12594 = getelementptr inbounds float, float* %tmp12593, i64 1
+ %tmp12595 = getelementptr inbounds float, float* %tmp12594, i64 1
+ %tmp12596 = getelementptr inbounds float, float* %tmp12595, i64 1
+ %tmp12597 = getelementptr inbounds float, float* %tmp12596, i64 1
+ %tmp12598 = getelementptr inbounds float, float* %tmp12597, i64 1
+ %tmp12599 = getelementptr inbounds float, float* %tmp12598, i64 1
+ %tmp12600 = getelementptr inbounds float, float* %tmp12599, i64 1
+ %tmp12601 = getelementptr inbounds float, float* %tmp12600, i64 1
+ %tmp12602 = getelementptr inbounds float, float* %tmp12601, i64 1
+ %tmp12603 = getelementptr inbounds float, float* %tmp12602, i64 1
+ %tmp12604 = getelementptr inbounds float, float* %tmp12603, i64 1
+ %tmp12605 = getelementptr inbounds float, float* %tmp12604, i64 1
+ %tmp12606 = getelementptr inbounds float, float* %tmp12605, i64 1
+ %tmp12607 = getelementptr inbounds float, float* %tmp12606, i64 1
+ %tmp12608 = getelementptr inbounds float, float* %tmp12607, i64 1
+ %tmp12609 = getelementptr inbounds float, float* %tmp12608, i64 1
+ %tmp12610 = getelementptr inbounds float, float* %tmp12609, i64 1
+ %tmp12611 = getelementptr inbounds float, float* %tmp12610, i64 1
+ %tmp12612 = getelementptr inbounds float, float* %tmp12611, i64 1
+ %tmp12613 = getelementptr inbounds float, float* %tmp12612, i64 1
+ %tmp12614 = getelementptr inbounds float, float* %tmp12613, i64 1
+ %tmp12615 = getelementptr inbounds float, float* %tmp12614, i64 1
+ %tmp12616 = getelementptr inbounds float, float* %tmp12615, i64 1
+ %tmp12617 = getelementptr inbounds float, float* %tmp12616, i64 1
+ %tmp12618 = getelementptr inbounds float, float* %tmp12617, i64 1
+ %tmp12619 = getelementptr inbounds float, float* %tmp12618, i64 1
+ %tmp12620 = getelementptr inbounds float, float* %tmp12619, i64 1
+ %tmp12621 = getelementptr inbounds float, float* %tmp12620, i64 1
+ %tmp12622 = getelementptr inbounds float, float* %tmp12621, i64 1
+ %tmp12623 = getelementptr inbounds float, float* %tmp12622, i64 1
+ %tmp12624 = getelementptr inbounds float, float* %tmp12623, i64 1
+ %tmp12625 = getelementptr inbounds float, float* %tmp12624, i64 1
+ %tmp12626 = getelementptr inbounds float, float* %tmp12625, i64 1
+ %tmp12627 = getelementptr inbounds float, float* %tmp12626, i64 1
+ %tmp12628 = getelementptr inbounds float, float* %tmp12627, i64 1
+ %tmp12629 = getelementptr inbounds float, float* %tmp12628, i64 1
+ %tmp12630 = getelementptr inbounds float, float* %tmp12629, i64 1
+ %tmp12631 = getelementptr inbounds float, float* %tmp12630, i64 1
+ %tmp12632 = getelementptr inbounds float, float* %tmp12631, i64 1
+ %tmp12633 = getelementptr inbounds float, float* %tmp12632, i64 1
+ %tmp12634 = getelementptr inbounds float, float* %tmp12633, i64 1
+ %tmp12635 = getelementptr inbounds float, float* %tmp12634, i64 1
+ %tmp12636 = getelementptr inbounds float, float* %tmp12635, i64 1
+ %tmp12637 = getelementptr inbounds float, float* %tmp12636, i64 1
+ %tmp12638 = getelementptr inbounds float, float* %tmp12637, i64 1
+ %tmp12639 = getelementptr inbounds float, float* %tmp12638, i64 1
+ %tmp12640 = getelementptr inbounds float, float* %tmp12639, i64 1
+ %tmp12641 = getelementptr inbounds float, float* %tmp12640, i64 1
+ %tmp12642 = getelementptr inbounds float, float* %tmp12641, i64 1
+ %tmp12643 = getelementptr inbounds float, float* %tmp12642, i64 1
+ %tmp12644 = getelementptr inbounds float, float* %tmp12643, i64 1
+ %tmp12645 = getelementptr inbounds float, float* %tmp12644, i64 1
+ %tmp12646 = getelementptr inbounds float, float* %tmp12645, i64 1
+ %tmp12647 = getelementptr inbounds float, float* %tmp12646, i64 1
+ %tmp12648 = getelementptr inbounds float, float* %tmp12647, i64 1
+ %tmp12649 = getelementptr inbounds float, float* %tmp12648, i64 1
+ %tmp12650 = getelementptr inbounds float, float* %tmp12649, i64 1
+ %tmp12651 = getelementptr inbounds float, float* %tmp12650, i64 1
+ %tmp12652 = getelementptr inbounds float, float* %tmp12651, i64 1
+ %tmp12653 = getelementptr inbounds float, float* %tmp12652, i64 1
+ %tmp12654 = getelementptr inbounds float, float* %tmp12653, i64 1
+ %tmp12655 = getelementptr inbounds float, float* %tmp12654, i64 1
+ %tmp12656 = getelementptr inbounds float, float* %tmp12655, i64 1
+ %tmp12657 = getelementptr inbounds float, float* %tmp12656, i64 1
+ %tmp12658 = getelementptr inbounds float, float* %tmp12657, i64 1
+ %tmp12659 = getelementptr inbounds float, float* %tmp12658, i64 1
+ %tmp12660 = getelementptr inbounds float, float* %tmp12659, i64 1
+ %tmp12661 = getelementptr inbounds float, float* %tmp12660, i64 1
+ %tmp12662 = getelementptr inbounds float, float* %tmp12661, i64 1
+ %tmp12663 = getelementptr inbounds float, float* %tmp12662, i64 1
+ %tmp12664 = getelementptr inbounds float, float* %tmp12663, i64 1
+ %tmp12665 = getelementptr inbounds float, float* %tmp12664, i64 1
+ %tmp12666 = getelementptr inbounds float, float* %tmp12665, i64 1
+ %tmp12667 = getelementptr inbounds float, float* %tmp12666, i64 1
+ %tmp12668 = getelementptr inbounds float, float* %tmp12667, i64 1
+ %tmp12669 = getelementptr inbounds float, float* %tmp12668, i64 1
+ %tmp12670 = getelementptr inbounds float, float* %tmp12669, i64 1
+ %tmp12671 = getelementptr inbounds float, float* %tmp12670, i64 1
+ %tmp12672 = getelementptr inbounds float, float* %tmp12671, i64 1
+ %tmp12673 = getelementptr inbounds float, float* %tmp12672, i64 1
+ %tmp12674 = getelementptr inbounds float, float* %tmp12673, i64 1
+ %tmp12675 = getelementptr inbounds float, float* %tmp12674, i64 1
+ %tmp12676 = getelementptr inbounds float, float* %tmp12675, i64 1
+ %tmp12677 = getelementptr inbounds float, float* %tmp12676, i64 1
+ %tmp12678 = getelementptr inbounds float, float* %tmp12677, i64 1
+ %tmp12679 = getelementptr inbounds float, float* %tmp12678, i64 1
+ %tmp12680 = getelementptr inbounds float, float* %tmp12679, i64 1
+ %tmp12681 = getelementptr inbounds float, float* %tmp12680, i64 1
+ %tmp12682 = getelementptr inbounds float, float* %tmp12681, i64 1
+ %tmp12683 = getelementptr inbounds float, float* %tmp12682, i64 1
+ %tmp12684 = getelementptr inbounds float, float* %tmp12683, i64 1
+ %tmp12685 = getelementptr inbounds float, float* %tmp12684, i64 1
+ %tmp12686 = getelementptr inbounds float, float* %tmp12685, i64 1
+ %tmp12687 = getelementptr inbounds float, float* %tmp12686, i64 1
+ %tmp12688 = getelementptr inbounds float, float* %tmp12687, i64 1
+ %tmp12689 = getelementptr inbounds float, float* %tmp12688, i64 1
+ %tmp12690 = getelementptr inbounds float, float* %tmp12689, i64 1
+ %tmp12691 = getelementptr inbounds float, float* %tmp12690, i64 1
+ %tmp12692 = getelementptr inbounds float, float* %tmp12691, i64 1
+ %tmp12693 = getelementptr inbounds float, float* %tmp12692, i64 1
+ %tmp12694 = getelementptr inbounds float, float* %tmp12693, i64 1
+ %tmp12695 = getelementptr inbounds float, float* %tmp12694, i64 1
+ %tmp12696 = getelementptr inbounds float, float* %tmp12695, i64 1
+ %tmp12697 = getelementptr inbounds float, float* %tmp12696, i64 1
+ %tmp12698 = getelementptr inbounds float, float* %tmp12697, i64 1
+ %tmp12699 = getelementptr inbounds float, float* %tmp12698, i64 1
+ %tmp12700 = getelementptr inbounds float, float* %tmp12699, i64 1
+ %tmp12701 = getelementptr inbounds float, float* %tmp12700, i64 1
+ %tmp12702 = getelementptr inbounds float, float* %tmp12701, i64 1
+ %tmp12703 = getelementptr inbounds float, float* %tmp12702, i64 1
+ %tmp12704 = getelementptr inbounds float, float* %tmp12703, i64 1
+ %tmp12705 = getelementptr inbounds float, float* %tmp12704, i64 1
+ %tmp12706 = getelementptr inbounds float, float* %tmp12705, i64 1
+ %tmp12707 = getelementptr inbounds float, float* %tmp12706, i64 1
+ %tmp12708 = getelementptr inbounds float, float* %tmp12707, i64 1
+ %tmp12709 = getelementptr inbounds float, float* %tmp12708, i64 1
+ %tmp12710 = getelementptr inbounds float, float* %tmp12709, i64 1
+ %tmp12711 = getelementptr inbounds float, float* %tmp12710, i64 1
+ %tmp12712 = getelementptr inbounds float, float* %tmp12711, i64 1
+ %tmp12713 = getelementptr inbounds float, float* %tmp12712, i64 1
+ %tmp12714 = getelementptr inbounds float, float* %tmp12713, i64 1
+ %tmp12715 = getelementptr inbounds float, float* %tmp12714, i64 1
+ %tmp12716 = getelementptr inbounds float, float* %tmp12715, i64 1
+ %tmp12717 = getelementptr inbounds float, float* %tmp12716, i64 1
+ %tmp12718 = getelementptr inbounds float, float* %tmp12717, i64 1
+ %tmp12719 = getelementptr inbounds float, float* %tmp12718, i64 1
+ %tmp12720 = getelementptr inbounds float, float* %tmp12719, i64 1
+ %tmp12721 = getelementptr inbounds float, float* %tmp12720, i64 1
+ %tmp12722 = getelementptr inbounds float, float* %tmp12721, i64 1
+ %tmp12723 = getelementptr inbounds float, float* %tmp12722, i64 1
+ %tmp12724 = getelementptr inbounds float, float* %tmp12723, i64 1
+ %tmp12725 = getelementptr inbounds float, float* %tmp12724, i64 1
+ %tmp12726 = getelementptr inbounds float, float* %tmp12725, i64 1
+ %tmp12727 = getelementptr inbounds float, float* %tmp12726, i64 1
+ %tmp12728 = getelementptr inbounds float, float* %tmp12727, i64 1
+ %tmp12729 = getelementptr inbounds float, float* %tmp12728, i64 1
+ %tmp12730 = getelementptr inbounds float, float* %tmp12729, i64 1
+ %tmp12731 = getelementptr inbounds float, float* %tmp12730, i64 1
+ %tmp12732 = getelementptr inbounds float, float* %tmp12731, i64 1
+ %tmp12733 = getelementptr inbounds float, float* %tmp12732, i64 1
+ %tmp12734 = getelementptr inbounds float, float* %tmp12733, i64 1
+ %tmp12735 = getelementptr inbounds float, float* %tmp12734, i64 1
+ %tmp12736 = getelementptr inbounds float, float* %tmp12735, i64 1
+ %tmp12737 = getelementptr inbounds float, float* %tmp12736, i64 1
+ %tmp12738 = getelementptr inbounds float, float* %tmp12737, i64 1
+ %tmp12739 = getelementptr inbounds float, float* %tmp12738, i64 1
+ %tmp12740 = getelementptr inbounds float, float* %tmp12739, i64 1
+ %tmp12741 = getelementptr inbounds float, float* %tmp12740, i64 1
+ %tmp12742 = getelementptr inbounds float, float* %tmp12741, i64 1
+ %tmp12743 = getelementptr inbounds float, float* %tmp12742, i64 1
+ %tmp12744 = getelementptr inbounds float, float* %tmp12743, i64 1
+ %tmp12745 = getelementptr inbounds float, float* %tmp12744, i64 1
+ %tmp12746 = getelementptr inbounds float, float* %tmp12745, i64 1
+ %tmp12747 = getelementptr inbounds float, float* %tmp12746, i64 1
+ %tmp12748 = getelementptr inbounds float, float* %tmp12747, i64 1
+ %tmp12749 = getelementptr inbounds float, float* %tmp12748, i64 1
+ %tmp12750 = getelementptr inbounds float, float* %tmp12749, i64 1
+ %tmp12751 = getelementptr inbounds float, float* %tmp12750, i64 1
+ %tmp12752 = getelementptr inbounds float, float* %tmp12751, i64 1
+ %tmp12753 = getelementptr inbounds float, float* %tmp12752, i64 1
+ %tmp12754 = getelementptr inbounds float, float* %tmp12753, i64 1
+ %tmp12755 = getelementptr inbounds float, float* %tmp12754, i64 1
+ %tmp12756 = getelementptr inbounds float, float* %tmp12755, i64 1
+ %tmp12757 = getelementptr inbounds float, float* %tmp12756, i64 1
+ %tmp12758 = getelementptr inbounds float, float* %tmp12757, i64 1
+ %tmp12759 = getelementptr inbounds float, float* %tmp12758, i64 1
+ %tmp12760 = getelementptr inbounds float, float* %tmp12759, i64 1
+ %tmp12761 = getelementptr inbounds float, float* %tmp12760, i64 1
+ %tmp12762 = getelementptr inbounds float, float* %tmp12761, i64 1
+ %tmp12763 = getelementptr inbounds float, float* %tmp12762, i64 1
+ %tmp12764 = getelementptr inbounds float, float* %tmp12763, i64 1
+ %tmp12765 = getelementptr inbounds float, float* %tmp12764, i64 1
+ %tmp12766 = getelementptr inbounds float, float* %tmp12765, i64 1
+ %tmp12767 = getelementptr inbounds float, float* %tmp12766, i64 1
+ %tmp12768 = getelementptr inbounds float, float* %tmp12767, i64 1
+ %tmp12769 = getelementptr inbounds float, float* %tmp12768, i64 1
+ %tmp12770 = getelementptr inbounds float, float* %tmp12769, i64 1
+ %tmp12771 = getelementptr inbounds float, float* %tmp12770, i64 1
+ %tmp12772 = getelementptr inbounds float, float* %tmp12771, i64 1
+ %tmp12773 = getelementptr inbounds float, float* %tmp12772, i64 1
+ %tmp12774 = getelementptr inbounds float, float* %tmp12773, i64 1
+ %tmp12775 = getelementptr inbounds float, float* %tmp12774, i64 1
+ %tmp12776 = getelementptr inbounds float, float* %tmp12775, i64 1
+ %tmp12777 = getelementptr inbounds float, float* %tmp12776, i64 1
+ %tmp12778 = getelementptr inbounds float, float* %tmp12777, i64 1
+ %tmp12779 = getelementptr inbounds float, float* %tmp12778, i64 1
+ %tmp12780 = getelementptr inbounds float, float* %tmp12779, i64 1
+ %tmp12781 = getelementptr inbounds float, float* %tmp12780, i64 1
+ %tmp12782 = getelementptr inbounds float, float* %tmp12781, i64 1
+ %tmp12783 = getelementptr inbounds float, float* %tmp12782, i64 1
+ %tmp12784 = getelementptr inbounds float, float* %tmp12783, i64 1
+ %tmp12785 = getelementptr inbounds float, float* %tmp12784, i64 1
+ %tmp12786 = getelementptr inbounds float, float* %tmp12785, i64 1
+ %tmp12787 = getelementptr inbounds float, float* %tmp12786, i64 1
+ %tmp12788 = getelementptr inbounds float, float* %tmp12787, i64 1
+ %tmp12789 = getelementptr inbounds float, float* %tmp12788, i64 1
+ %tmp12790 = getelementptr inbounds float, float* %tmp12789, i64 1
+ %tmp12791 = getelementptr inbounds float, float* %tmp12790, i64 1
+ %tmp12792 = getelementptr inbounds float, float* %tmp12791, i64 1
+ %tmp12793 = getelementptr inbounds float, float* %tmp12792, i64 1
+ %tmp12794 = getelementptr inbounds float, float* %tmp12793, i64 1
+ %tmp12795 = getelementptr inbounds float, float* %tmp12794, i64 1
+ %tmp12796 = getelementptr inbounds float, float* %tmp12795, i64 1
+ %tmp12797 = getelementptr inbounds float, float* %tmp12796, i64 1
+ %tmp12798 = getelementptr inbounds float, float* %tmp12797, i64 1
+ %tmp12799 = getelementptr inbounds float, float* %tmp12798, i64 1
+ %tmp12800 = getelementptr inbounds float, float* %tmp12799, i64 1
+ %tmp12801 = getelementptr inbounds float, float* %tmp12800, i64 1
+ %tmp12802 = getelementptr inbounds float, float* %tmp12801, i64 1
+ %tmp12803 = getelementptr inbounds float, float* %tmp12802, i64 1
+ %tmp12804 = getelementptr inbounds float, float* %tmp12803, i64 1
+ %tmp12805 = getelementptr inbounds float, float* %tmp12804, i64 1
+ %tmp12806 = getelementptr inbounds float, float* %tmp12805, i64 1
+ %tmp12807 = getelementptr inbounds float, float* %tmp12806, i64 1
+ %tmp12808 = getelementptr inbounds float, float* %tmp12807, i64 1
+ %tmp12809 = getelementptr inbounds float, float* %tmp12808, i64 1
+ %tmp12810 = getelementptr inbounds float, float* %tmp12809, i64 1
+ %tmp12811 = getelementptr inbounds float, float* %tmp12810, i64 1
+ %tmp12812 = getelementptr inbounds float, float* %tmp12811, i64 1
+ %tmp12813 = getelementptr inbounds float, float* %tmp12812, i64 1
+ %tmp12814 = getelementptr inbounds float, float* %tmp12813, i64 1
+ %tmp12815 = getelementptr inbounds float, float* %tmp12814, i64 1
+ %tmp12816 = getelementptr inbounds float, float* %tmp12815, i64 1
+ %tmp12817 = getelementptr inbounds float, float* %tmp12816, i64 1
+ %tmp12818 = getelementptr inbounds float, float* %tmp12817, i64 1
+ %tmp12819 = getelementptr inbounds float, float* %tmp12818, i64 1
+ %tmp12820 = getelementptr inbounds float, float* %tmp12819, i64 1
+ %tmp12821 = getelementptr inbounds float, float* %tmp12820, i64 1
+ %tmp12822 = getelementptr inbounds float, float* %tmp12821, i64 1
+ %tmp12823 = getelementptr inbounds float, float* %tmp12822, i64 1
+ %tmp12824 = getelementptr inbounds float, float* %tmp12823, i64 1
+ %tmp12825 = getelementptr inbounds float, float* %tmp12824, i64 1
+ %tmp12826 = getelementptr inbounds float, float* %tmp12825, i64 1
+ %tmp12827 = getelementptr inbounds float, float* %tmp12826, i64 1
+ %tmp12828 = getelementptr inbounds float, float* %tmp12827, i64 1
+ %tmp12829 = getelementptr inbounds float, float* %tmp12828, i64 1
+ %tmp12830 = getelementptr inbounds float, float* %tmp12829, i64 1
+ %tmp12831 = getelementptr inbounds float, float* %tmp12830, i64 1
+ %tmp12832 = getelementptr inbounds float, float* %tmp12831, i64 1
+ %tmp12833 = getelementptr inbounds float, float* %tmp12832, i64 1
+ %tmp12834 = getelementptr inbounds float, float* %tmp12833, i64 1
+ %tmp12835 = getelementptr inbounds float, float* %tmp12834, i64 1
+ %tmp12836 = getelementptr inbounds float, float* %tmp12835, i64 1
+ %tmp12837 = getelementptr inbounds float, float* %tmp12836, i64 1
+ %tmp12838 = getelementptr inbounds float, float* %tmp12837, i64 1
+ %tmp12839 = getelementptr inbounds float, float* %tmp12838, i64 1
+ %tmp12840 = getelementptr inbounds float, float* %tmp12839, i64 1
+ %tmp12841 = getelementptr inbounds float, float* %tmp12840, i64 1
+ %tmp12842 = getelementptr inbounds float, float* %tmp12841, i64 1
+ %tmp12843 = getelementptr inbounds float, float* %tmp12842, i64 1
+ %tmp12844 = getelementptr inbounds float, float* %tmp12843, i64 1
+ %tmp12845 = getelementptr inbounds float, float* %tmp12844, i64 1
+ %tmp12846 = getelementptr inbounds float, float* %tmp12845, i64 1
+ %tmp12847 = getelementptr inbounds float, float* %tmp12846, i64 1
+ %tmp12848 = getelementptr inbounds float, float* %tmp12847, i64 1
+ %tmp12849 = getelementptr inbounds float, float* %tmp12848, i64 1
+ %tmp12850 = getelementptr inbounds float, float* %tmp12849, i64 1
+ %tmp12851 = getelementptr inbounds float, float* %tmp12850, i64 1
+ %tmp12852 = getelementptr inbounds float, float* %tmp12851, i64 1
+ %tmp12853 = getelementptr inbounds float, float* %tmp12852, i64 1
+ %tmp12854 = getelementptr inbounds float, float* %tmp12853, i64 1
+ %tmp12855 = getelementptr inbounds float, float* %tmp12854, i64 1
+ %tmp12856 = getelementptr inbounds float, float* %tmp12855, i64 1
+ %tmp12857 = getelementptr inbounds float, float* %tmp12856, i64 1
+ %tmp12858 = getelementptr inbounds float, float* %tmp12857, i64 1
+ %tmp12859 = getelementptr inbounds float, float* %tmp12858, i64 1
+ %tmp12860 = getelementptr inbounds float, float* %tmp12859, i64 1
+ %tmp12861 = getelementptr inbounds float, float* %tmp12860, i64 1
+ %tmp12862 = getelementptr inbounds float, float* %tmp12861, i64 1
+ %tmp12863 = getelementptr inbounds float, float* %tmp12862, i64 1
+ %tmp12864 = getelementptr inbounds float, float* %tmp12863, i64 1
+ %tmp12865 = getelementptr inbounds float, float* %tmp12864, i64 1
+ %tmp12866 = getelementptr inbounds float, float* %tmp12865, i64 1
+ %tmp12867 = getelementptr inbounds float, float* %tmp12866, i64 1
+ %tmp12868 = getelementptr inbounds float, float* %tmp12867, i64 1
+ %tmp12869 = getelementptr inbounds float, float* %tmp12868, i64 1
+ %tmp12870 = getelementptr inbounds float, float* %tmp12869, i64 1
+ %tmp12871 = getelementptr inbounds float, float* %tmp12870, i64 1
+ %tmp12872 = getelementptr inbounds float, float* %tmp12871, i64 1
+ %tmp12873 = getelementptr inbounds float, float* %tmp12872, i64 1
+ %tmp12874 = getelementptr inbounds float, float* %tmp12873, i64 1
+ %tmp12875 = getelementptr inbounds float, float* %tmp12874, i64 1
+ %tmp12876 = getelementptr inbounds float, float* %tmp12875, i64 1
+ %tmp12877 = getelementptr inbounds float, float* %tmp12876, i64 1
+ %tmp12878 = getelementptr inbounds float, float* %tmp12877, i64 1
+ %tmp12879 = getelementptr inbounds float, float* %tmp12878, i64 1
+ %tmp12880 = getelementptr inbounds float, float* %tmp12879, i64 1
+ %tmp12881 = getelementptr inbounds float, float* %tmp12880, i64 1
+ %tmp12882 = getelementptr inbounds float, float* %tmp12881, i64 1
+ %tmp12883 = getelementptr inbounds float, float* %tmp12882, i64 1
+ %tmp12884 = getelementptr inbounds float, float* %tmp12883, i64 1
+ %tmp12885 = getelementptr inbounds float, float* %tmp12884, i64 1
+ %tmp12886 = getelementptr inbounds float, float* %tmp12885, i64 1
+ %tmp12887 = getelementptr inbounds float, float* %tmp12886, i64 1
+ %tmp12888 = getelementptr inbounds float, float* %tmp12887, i64 1
+ %tmp12889 = getelementptr inbounds float, float* %tmp12888, i64 1
+ %tmp12890 = getelementptr inbounds float, float* %tmp12889, i64 1
+ %tmp12891 = getelementptr inbounds float, float* %tmp12890, i64 1
+ %tmp12892 = getelementptr inbounds float, float* %tmp12891, i64 1
+ %tmp12893 = getelementptr inbounds float, float* %tmp12892, i64 1
+ %tmp12894 = getelementptr inbounds float, float* %tmp12893, i64 1
+ %tmp12895 = getelementptr inbounds float, float* %tmp12894, i64 1
+ %tmp12896 = getelementptr inbounds float, float* %tmp12895, i64 1
+ %tmp12897 = getelementptr inbounds float, float* %tmp12896, i64 1
+ %tmp12898 = getelementptr inbounds float, float* %tmp12897, i64 1
+ %tmp12899 = getelementptr inbounds float, float* %tmp12898, i64 1
+ %tmp12900 = getelementptr inbounds float, float* %tmp12899, i64 1
+ %tmp12901 = getelementptr inbounds float, float* %tmp12900, i64 1
+ %tmp12902 = getelementptr inbounds float, float* %tmp12901, i64 1
+ %tmp12903 = getelementptr inbounds float, float* %tmp12902, i64 1
+ %tmp12904 = getelementptr inbounds float, float* %tmp12903, i64 1
+ %tmp12905 = getelementptr inbounds float, float* %tmp12904, i64 1
+ %tmp12906 = getelementptr inbounds float, float* %tmp12905, i64 1
+ %tmp12907 = getelementptr inbounds float, float* %tmp12906, i64 1
+ %tmp12908 = getelementptr inbounds float, float* %tmp12907, i64 1
+ %tmp12909 = getelementptr inbounds float, float* %tmp12908, i64 1
+ %tmp12910 = getelementptr inbounds float, float* %tmp12909, i64 1
+ %tmp12911 = getelementptr inbounds float, float* %tmp12910, i64 1
+ %tmp12912 = getelementptr inbounds float, float* %tmp12911, i64 1
+ %tmp12913 = getelementptr inbounds float, float* %tmp12912, i64 1
+ %tmp12914 = getelementptr inbounds float, float* %tmp12913, i64 1
+ %tmp12915 = getelementptr inbounds float, float* %tmp12914, i64 1
+ %tmp12916 = getelementptr inbounds float, float* %tmp12915, i64 1
+ %tmp12917 = getelementptr inbounds float, float* %tmp12916, i64 1
+ %tmp12918 = getelementptr inbounds float, float* %tmp12917, i64 1
+ %tmp12919 = getelementptr inbounds float, float* %tmp12918, i64 1
+ %tmp12920 = getelementptr inbounds float, float* %tmp12919, i64 1
+ %tmp12921 = getelementptr inbounds float, float* %tmp12920, i64 1
+ %tmp12922 = getelementptr inbounds float, float* %tmp12921, i64 1
+ %tmp12923 = getelementptr inbounds float, float* %tmp12922, i64 1
+ %tmp12924 = getelementptr inbounds float, float* %tmp12923, i64 1
+ %tmp12925 = getelementptr inbounds float, float* %tmp12924, i64 1
+ %tmp12926 = getelementptr inbounds float, float* %tmp12925, i64 1
+ %tmp12927 = getelementptr inbounds float, float* %tmp12926, i64 1
+ %tmp12928 = getelementptr inbounds float, float* %tmp12927, i64 1
+ %tmp12929 = getelementptr inbounds float, float* %tmp12928, i64 1
+ %tmp12930 = getelementptr inbounds float, float* %tmp12929, i64 1
+ %tmp12931 = getelementptr inbounds float, float* %tmp12930, i64 1
+ %tmp12932 = getelementptr inbounds float, float* %tmp12931, i64 1
+ %tmp12933 = getelementptr inbounds float, float* %tmp12932, i64 1
+ %tmp12934 = getelementptr inbounds float, float* %tmp12933, i64 1
+ %tmp12935 = getelementptr inbounds float, float* %tmp12934, i64 1
+ %tmp12936 = getelementptr inbounds float, float* %tmp12935, i64 1
+ %tmp12937 = getelementptr inbounds float, float* %tmp12936, i64 1
+ %tmp12938 = getelementptr inbounds float, float* %tmp12937, i64 1
+ %tmp12939 = getelementptr inbounds float, float* %tmp12938, i64 1
+ %tmp12940 = getelementptr inbounds float, float* %tmp12939, i64 1
+ %tmp12941 = getelementptr inbounds float, float* %tmp12940, i64 1
+ %tmp12942 = getelementptr inbounds float, float* %tmp12941, i64 1
+ %tmp12943 = getelementptr inbounds float, float* %tmp12942, i64 1
+ %tmp12944 = getelementptr inbounds float, float* %tmp12943, i64 1
+ %tmp12945 = getelementptr inbounds float, float* %tmp12944, i64 1
+ %tmp12946 = getelementptr inbounds float, float* %tmp12945, i64 1
+ %tmp12947 = getelementptr inbounds float, float* %tmp12946, i64 1
+ %tmp12948 = getelementptr inbounds float, float* %tmp12947, i64 1
+ %tmp12949 = getelementptr inbounds float, float* %tmp12948, i64 1
+ %tmp12950 = getelementptr inbounds float, float* %tmp12949, i64 1
+ %tmp12951 = getelementptr inbounds float, float* %tmp12950, i64 1
+ %tmp12952 = getelementptr inbounds float, float* %tmp12951, i64 1
+ %tmp12953 = getelementptr inbounds float, float* %tmp12952, i64 1
+ %tmp12954 = getelementptr inbounds float, float* %tmp12953, i64 1
+ %tmp12955 = getelementptr inbounds float, float* %tmp12954, i64 1
+ %tmp12956 = getelementptr inbounds float, float* %tmp12955, i64 1
+ %tmp12957 = getelementptr inbounds float, float* %tmp12956, i64 1
+ %tmp12958 = getelementptr inbounds float, float* %tmp12957, i64 1
+ %tmp12959 = getelementptr inbounds float, float* %tmp12958, i64 1
+ %tmp12960 = getelementptr inbounds float, float* %tmp12959, i64 1
+ %tmp12961 = getelementptr inbounds float, float* %tmp12960, i64 1
+ %tmp12962 = getelementptr inbounds float, float* %tmp12961, i64 1
+ %tmp12963 = getelementptr inbounds float, float* %tmp12962, i64 1
+ %tmp12964 = getelementptr inbounds float, float* %tmp12963, i64 1
+ %tmp12965 = getelementptr inbounds float, float* %tmp12964, i64 1
+ %tmp12966 = getelementptr inbounds float, float* %tmp12965, i64 1
+ %tmp12967 = getelementptr inbounds float, float* %tmp12966, i64 1
+ %tmp12968 = getelementptr inbounds float, float* %tmp12967, i64 1
+ %tmp12969 = getelementptr inbounds float, float* %tmp12968, i64 1
+ %tmp12970 = getelementptr inbounds float, float* %tmp12969, i64 1
+ %tmp12971 = getelementptr inbounds float, float* %tmp12970, i64 1
+ %tmp12972 = getelementptr inbounds float, float* %tmp12971, i64 1
+ %tmp12973 = getelementptr inbounds float, float* %tmp12972, i64 1
+ %tmp12974 = getelementptr inbounds float, float* %tmp12973, i64 1
+ %tmp12975 = getelementptr inbounds float, float* %tmp12974, i64 1
+ %tmp12976 = getelementptr inbounds float, float* %tmp12975, i64 1
+ %tmp12977 = getelementptr inbounds float, float* %tmp12976, i64 1
+ %tmp12978 = getelementptr inbounds float, float* %tmp12977, i64 1
+ %tmp12979 = getelementptr inbounds float, float* %tmp12978, i64 1
+ %tmp12980 = getelementptr inbounds float, float* %tmp12979, i64 1
+ %tmp12981 = getelementptr inbounds float, float* %tmp12980, i64 1
+ %tmp12982 = getelementptr inbounds float, float* %tmp12981, i64 1
+ %tmp12983 = getelementptr inbounds float, float* %tmp12982, i64 1
+ %tmp12984 = getelementptr inbounds float, float* %tmp12983, i64 1
+ %tmp12985 = getelementptr inbounds float, float* %tmp12984, i64 1
+ %tmp12986 = getelementptr inbounds float, float* %tmp12985, i64 1
+ %tmp12987 = getelementptr inbounds float, float* %tmp12986, i64 1
+ %tmp12988 = getelementptr inbounds float, float* %tmp12987, i64 1
+ %tmp12989 = getelementptr inbounds float, float* %tmp12988, i64 1
+ %tmp12990 = getelementptr inbounds float, float* %tmp12989, i64 1
+ %tmp12991 = getelementptr inbounds float, float* %tmp12990, i64 1
+ %tmp12992 = getelementptr inbounds float, float* %tmp12991, i64 1
+ %tmp12993 = getelementptr inbounds float, float* %tmp12992, i64 1
+ %tmp12994 = getelementptr inbounds float, float* %tmp12993, i64 1
+ %tmp12995 = getelementptr inbounds float, float* %tmp12994, i64 1
+ %tmp12996 = getelementptr inbounds float, float* %tmp12995, i64 1
+ %tmp12997 = getelementptr inbounds float, float* %tmp12996, i64 1
+ %tmp12998 = getelementptr inbounds float, float* %tmp12997, i64 1
+ %tmp12999 = getelementptr inbounds float, float* %tmp12998, i64 1
+ %tmp13000 = getelementptr inbounds float, float* %tmp12999, i64 1
+ %tmp13001 = getelementptr inbounds float, float* %tmp13000, i64 1
+ %tmp13002 = getelementptr inbounds float, float* %tmp13001, i64 1
+ %tmp13003 = getelementptr inbounds float, float* %tmp13002, i64 1
+ %tmp13004 = getelementptr inbounds float, float* %tmp13003, i64 1
+ %tmp13005 = getelementptr inbounds float, float* %tmp13004, i64 1
+ %tmp13006 = getelementptr inbounds float, float* %tmp13005, i64 1
+ %tmp13007 = getelementptr inbounds float, float* %tmp13006, i64 1
+ %tmp13008 = getelementptr inbounds float, float* %tmp13007, i64 1
+ %tmp13009 = getelementptr inbounds float, float* %tmp13008, i64 1
+ %tmp13010 = getelementptr inbounds float, float* %tmp13009, i64 1
+ %tmp13011 = getelementptr inbounds float, float* %tmp13010, i64 1
+ %tmp13012 = getelementptr inbounds float, float* %tmp13011, i64 1
+ %tmp13013 = getelementptr inbounds float, float* %tmp13012, i64 1
+ %tmp13014 = getelementptr inbounds float, float* %tmp13013, i64 1
+ %tmp13015 = getelementptr inbounds float, float* %tmp13014, i64 1
+ %tmp13016 = getelementptr inbounds float, float* %tmp13015, i64 1
+ %tmp13017 = getelementptr inbounds float, float* %tmp13016, i64 1
+ %tmp13018 = getelementptr inbounds float, float* %tmp13017, i64 1
+ %tmp13019 = getelementptr inbounds float, float* %tmp13018, i64 1
+ %tmp13020 = getelementptr inbounds float, float* %tmp13019, i64 1
+ %tmp13021 = getelementptr inbounds float, float* %tmp13020, i64 1
+ %tmp13022 = getelementptr inbounds float, float* %tmp13021, i64 1
+ %tmp13023 = getelementptr inbounds float, float* %tmp13022, i64 1
+ %tmp13024 = getelementptr inbounds float, float* %tmp13023, i64 1
+ %tmp13025 = getelementptr inbounds float, float* %tmp13024, i64 1
+ %tmp13026 = getelementptr inbounds float, float* %tmp13025, i64 1
+ %tmp13027 = getelementptr inbounds float, float* %tmp13026, i64 1
+ %tmp13028 = getelementptr inbounds float, float* %tmp13027, i64 1
+ %tmp13029 = getelementptr inbounds float, float* %tmp13028, i64 1
+ %tmp13030 = getelementptr inbounds float, float* %tmp13029, i64 1
+ %tmp13031 = getelementptr inbounds float, float* %tmp13030, i64 1
+ %tmp13032 = getelementptr inbounds float, float* %tmp13031, i64 1
+ %tmp13033 = getelementptr inbounds float, float* %tmp13032, i64 1
+ %tmp13034 = getelementptr inbounds float, float* %tmp13033, i64 1
+ %tmp13035 = getelementptr inbounds float, float* %tmp13034, i64 1
+ %tmp13036 = getelementptr inbounds float, float* %tmp13035, i64 1
+ %tmp13037 = getelementptr inbounds float, float* %tmp13036, i64 1
+ %tmp13038 = getelementptr inbounds float, float* %tmp13037, i64 1
+ %tmp13039 = getelementptr inbounds float, float* %tmp13038, i64 1
+ %tmp13040 = getelementptr inbounds float, float* %tmp13039, i64 1
+ %tmp13041 = getelementptr inbounds float, float* %tmp13040, i64 1
+ %tmp13042 = getelementptr inbounds float, float* %tmp13041, i64 1
+ %tmp13043 = getelementptr inbounds float, float* %tmp13042, i64 1
+ %tmp13044 = getelementptr inbounds float, float* %tmp13043, i64 1
+ %tmp13045 = getelementptr inbounds float, float* %tmp13044, i64 1
+ %tmp13046 = getelementptr inbounds float, float* %tmp13045, i64 1
+ %tmp13047 = getelementptr inbounds float, float* %tmp13046, i64 1
+ %tmp13048 = getelementptr inbounds float, float* %tmp13047, i64 1
+ %tmp13049 = getelementptr inbounds float, float* %tmp13048, i64 1
+ %tmp13050 = getelementptr inbounds float, float* %tmp13049, i64 1
+ %tmp13051 = getelementptr inbounds float, float* %tmp13050, i64 1
+ %tmp13052 = getelementptr inbounds float, float* %tmp13051, i64 1
+ %tmp13053 = getelementptr inbounds float, float* %tmp13052, i64 1
+ %tmp13054 = getelementptr inbounds float, float* %tmp13053, i64 1
+ %tmp13055 = getelementptr inbounds float, float* %tmp13054, i64 1
+ %tmp13056 = getelementptr inbounds float, float* %tmp13055, i64 1
+ %tmp13057 = getelementptr inbounds float, float* %tmp13056, i64 1
+ %tmp13058 = getelementptr inbounds float, float* %tmp13057, i64 1
+ %tmp13059 = getelementptr inbounds float, float* %tmp13058, i64 1
+ %tmp13060 = getelementptr inbounds float, float* %tmp13059, i64 1
+ %tmp13061 = getelementptr inbounds float, float* %tmp13060, i64 1
+ %tmp13062 = getelementptr inbounds float, float* %tmp13061, i64 1
+ %tmp13063 = getelementptr inbounds float, float* %tmp13062, i64 1
+ %tmp13064 = getelementptr inbounds float, float* %tmp13063, i64 1
+ %tmp13065 = getelementptr inbounds float, float* %tmp13064, i64 1
+ %tmp13066 = getelementptr inbounds float, float* %tmp13065, i64 1
+ %tmp13067 = getelementptr inbounds float, float* %tmp13066, i64 1
+ %tmp13068 = getelementptr inbounds float, float* %tmp13067, i64 1
+ %tmp13069 = getelementptr inbounds float, float* %tmp13068, i64 1
+ %tmp13070 = getelementptr inbounds float, float* %tmp13069, i64 1
+ %tmp13071 = getelementptr inbounds float, float* %tmp13070, i64 1
+ %tmp13072 = getelementptr inbounds float, float* %tmp13071, i64 1
+ %tmp13073 = getelementptr inbounds float, float* %tmp13072, i64 1
+ %tmp13074 = getelementptr inbounds float, float* %tmp13073, i64 1
+ %tmp13075 = getelementptr inbounds float, float* %tmp13074, i64 1
+ %tmp13076 = getelementptr inbounds float, float* %tmp13075, i64 1
+ %tmp13077 = getelementptr inbounds float, float* %tmp13076, i64 1
+ %tmp13078 = getelementptr inbounds float, float* %tmp13077, i64 1
+ %tmp13079 = getelementptr inbounds float, float* %tmp13078, i64 1
+ %tmp13080 = getelementptr inbounds float, float* %tmp13079, i64 1
+ %tmp13081 = getelementptr inbounds float, float* %tmp13080, i64 1
+ %tmp13082 = getelementptr inbounds float, float* %tmp13081, i64 1
+ %tmp13083 = getelementptr inbounds float, float* %tmp13082, i64 1
+ %tmp13084 = getelementptr inbounds float, float* %tmp13083, i64 1
+ %tmp13085 = getelementptr inbounds float, float* %tmp13084, i64 1
+ %tmp13086 = getelementptr inbounds float, float* %tmp13085, i64 1
+ %tmp13087 = getelementptr inbounds float, float* %tmp13086, i64 1
+ %tmp13088 = getelementptr inbounds float, float* %tmp13087, i64 1
+ %tmp13089 = getelementptr inbounds float, float* %tmp13088, i64 1
+ %tmp13090 = getelementptr inbounds float, float* %tmp13089, i64 1
+ %tmp13091 = getelementptr inbounds float, float* %tmp13090, i64 1
+ %tmp13092 = getelementptr inbounds float, float* %tmp13091, i64 1
+ %tmp13093 = getelementptr inbounds float, float* %tmp13092, i64 1
+ %tmp13094 = getelementptr inbounds float, float* %tmp13093, i64 1
+ %tmp13095 = getelementptr inbounds float, float* %tmp13094, i64 1
+ %tmp13096 = getelementptr inbounds float, float* %tmp13095, i64 1
+ %tmp13097 = getelementptr inbounds float, float* %tmp13096, i64 1
+ %tmp13098 = getelementptr inbounds float, float* %tmp13097, i64 1
+ %tmp13099 = getelementptr inbounds float, float* %tmp13098, i64 1
+ %tmp13100 = getelementptr inbounds float, float* %tmp13099, i64 1
+ %tmp13101 = getelementptr inbounds float, float* %tmp13100, i64 1
+ %tmp13102 = getelementptr inbounds float, float* %tmp13101, i64 1
+ %tmp13103 = getelementptr inbounds float, float* %tmp13102, i64 1
+ %tmp13104 = getelementptr inbounds float, float* %tmp13103, i64 1
+ %tmp13105 = getelementptr inbounds float, float* %tmp13104, i64 1
+ %tmp13106 = getelementptr inbounds float, float* %tmp13105, i64 1
+ %tmp13107 = getelementptr inbounds float, float* %tmp13106, i64 1
+ %tmp13108 = getelementptr inbounds float, float* %tmp13107, i64 1
+ %tmp13109 = getelementptr inbounds float, float* %tmp13108, i64 1
+ %tmp13110 = getelementptr inbounds float, float* %tmp13109, i64 1
+ %tmp13111 = getelementptr inbounds float, float* %tmp13110, i64 1
+ %tmp13112 = getelementptr inbounds float, float* %tmp13111, i64 1
+ %tmp13113 = getelementptr inbounds float, float* %tmp13112, i64 1
+ %tmp13114 = getelementptr inbounds float, float* %tmp13113, i64 1
+ %tmp13115 = getelementptr inbounds float, float* %tmp13114, i64 1
+ %tmp13116 = getelementptr inbounds float, float* %tmp13115, i64 1
+ %tmp13117 = getelementptr inbounds float, float* %tmp13116, i64 1
+ %tmp13118 = getelementptr inbounds float, float* %tmp13117, i64 1
+ %tmp13119 = getelementptr inbounds float, float* %tmp13118, i64 1
+ %tmp13120 = getelementptr inbounds float, float* %tmp13119, i64 1
+ %tmp13121 = getelementptr inbounds float, float* %tmp13120, i64 1
+ %tmp13122 = getelementptr inbounds float, float* %tmp13121, i64 1
+ %tmp13123 = getelementptr inbounds float, float* %tmp13122, i64 1
+ %tmp13124 = getelementptr inbounds float, float* %tmp13123, i64 1
+ %tmp13125 = getelementptr inbounds float, float* %tmp13124, i64 1
+ %tmp13126 = getelementptr inbounds float, float* %tmp13125, i64 1
+ %tmp13127 = getelementptr inbounds float, float* %tmp13126, i64 1
+ %tmp13128 = getelementptr inbounds float, float* %tmp13127, i64 1
+ %tmp13129 = getelementptr inbounds float, float* %tmp13128, i64 1
+ %tmp13130 = getelementptr inbounds float, float* %tmp13129, i64 1
+ %tmp13131 = getelementptr inbounds float, float* %tmp13130, i64 1
+ %tmp13132 = getelementptr inbounds float, float* %tmp13131, i64 1
+ %tmp13133 = getelementptr inbounds float, float* %tmp13132, i64 1
+ %tmp13134 = getelementptr inbounds float, float* %tmp13133, i64 1
+ %tmp13135 = getelementptr inbounds float, float* %tmp13134, i64 1
+ %tmp13136 = getelementptr inbounds float, float* %tmp13135, i64 1
+ %tmp13137 = getelementptr inbounds float, float* %tmp13136, i64 1
+ %tmp13138 = getelementptr inbounds float, float* %tmp13137, i64 1
+ %tmp13139 = getelementptr inbounds float, float* %tmp13138, i64 1
+ %tmp13140 = getelementptr inbounds float, float* %tmp13139, i64 1
+ %tmp13141 = getelementptr inbounds float, float* %tmp13140, i64 1
+ %tmp13142 = getelementptr inbounds float, float* %tmp13141, i64 1
+ %tmp13143 = getelementptr inbounds float, float* %tmp13142, i64 1
+ %tmp13144 = getelementptr inbounds float, float* %tmp13143, i64 1
+ %tmp13145 = getelementptr inbounds float, float* %tmp13144, i64 1
+ %tmp13146 = getelementptr inbounds float, float* %tmp13145, i64 1
+ %tmp13147 = getelementptr inbounds float, float* %tmp13146, i64 1
+ %tmp13148 = getelementptr inbounds float, float* %tmp13147, i64 1
+ %tmp13149 = getelementptr inbounds float, float* %tmp13148, i64 1
+ %tmp13150 = getelementptr inbounds float, float* %tmp13149, i64 1
+ %tmp13151 = getelementptr inbounds float, float* %tmp13150, i64 1
+ %tmp13152 = getelementptr inbounds float, float* %tmp13151, i64 1
+ %tmp13153 = getelementptr inbounds float, float* %tmp13152, i64 1
+ %tmp13154 = getelementptr inbounds float, float* %tmp13153, i64 1
+ %tmp13155 = getelementptr inbounds float, float* %tmp13154, i64 1
+ %tmp13156 = getelementptr inbounds float, float* %tmp13155, i64 1
+ %tmp13157 = getelementptr inbounds float, float* %tmp13156, i64 1
+ %tmp13158 = getelementptr inbounds float, float* %tmp13157, i64 1
+ %tmp13159 = getelementptr inbounds float, float* %tmp13158, i64 1
+ %tmp13160 = getelementptr inbounds float, float* %tmp13159, i64 1
+ %tmp13161 = getelementptr inbounds float, float* %tmp13160, i64 1
+ %tmp13162 = getelementptr inbounds float, float* %tmp13161, i64 1
+ %tmp13163 = getelementptr inbounds float, float* %tmp13162, i64 1
+ %tmp13164 = getelementptr inbounds float, float* %tmp13163, i64 1
+ %tmp13165 = getelementptr inbounds float, float* %tmp13164, i64 1
+ %tmp13166 = getelementptr inbounds float, float* %tmp13165, i64 1
+ %tmp13167 = getelementptr inbounds float, float* %tmp13166, i64 1
+ %tmp13168 = getelementptr inbounds float, float* %tmp13167, i64 1
+ %tmp13169 = getelementptr inbounds float, float* %tmp13168, i64 1
+ %tmp13170 = getelementptr inbounds float, float* %tmp13169, i64 1
+ %tmp13171 = getelementptr inbounds float, float* %tmp13170, i64 1
+ %tmp13172 = getelementptr inbounds float, float* %tmp13171, i64 1
+ %tmp13173 = getelementptr inbounds float, float* %tmp13172, i64 1
+ %tmp13174 = getelementptr inbounds float, float* %tmp13173, i64 1
+ %tmp13175 = getelementptr inbounds float, float* %tmp13174, i64 1
+ %tmp13176 = getelementptr inbounds float, float* %tmp13175, i64 1
+ %tmp13177 = getelementptr inbounds float, float* %tmp13176, i64 1
+ %tmp13178 = getelementptr inbounds float, float* %tmp13177, i64 1
+ %tmp13179 = getelementptr inbounds float, float* %tmp13178, i64 1
+ %tmp13180 = getelementptr inbounds float, float* %tmp13179, i64 1
+ %tmp13181 = getelementptr inbounds float, float* %tmp13180, i64 1
+ %tmp13182 = getelementptr inbounds float, float* %tmp13181, i64 1
+ %tmp13183 = getelementptr inbounds float, float* %tmp13182, i64 1
+ %tmp13184 = getelementptr inbounds float, float* %tmp13183, i64 1
+ %tmp13185 = getelementptr inbounds float, float* %tmp13184, i64 1
+ %tmp13186 = getelementptr inbounds float, float* %tmp13185, i64 1
+ %tmp13187 = getelementptr inbounds float, float* %tmp13186, i64 1
+ %tmp13188 = getelementptr inbounds float, float* %tmp13187, i64 1
+ %tmp13189 = getelementptr inbounds float, float* %tmp13188, i64 1
+ %tmp13190 = getelementptr inbounds float, float* %tmp13189, i64 1
+ %tmp13191 = getelementptr inbounds float, float* %tmp13190, i64 1
+ %tmp13192 = getelementptr inbounds float, float* %tmp13191, i64 1
+ %tmp13193 = getelementptr inbounds float, float* %tmp13192, i64 1
+ %tmp13194 = getelementptr inbounds float, float* %tmp13193, i64 1
+ %tmp13195 = getelementptr inbounds float, float* %tmp13194, i64 1
+ %tmp13196 = getelementptr inbounds float, float* %tmp13195, i64 1
+ %tmp13197 = getelementptr inbounds float, float* %tmp13196, i64 1
+ %tmp13198 = getelementptr inbounds float, float* %tmp13197, i64 1
+ %tmp13199 = getelementptr inbounds float, float* %tmp13198, i64 1
+ %tmp13200 = getelementptr inbounds float, float* %tmp13199, i64 1
+ %tmp13201 = getelementptr inbounds float, float* %tmp13200, i64 1
+ %tmp13202 = getelementptr inbounds float, float* %tmp13201, i64 1
+ %tmp13203 = getelementptr inbounds float, float* %tmp13202, i64 1
+ %tmp13204 = getelementptr inbounds float, float* %tmp13203, i64 1
+ %tmp13205 = getelementptr inbounds float, float* %tmp13204, i64 1
+ %tmp13206 = getelementptr inbounds float, float* %tmp13205, i64 1
+ %tmp13207 = getelementptr inbounds float, float* %tmp13206, i64 1
+ %tmp13208 = getelementptr inbounds float, float* %tmp13207, i64 1
+ %tmp13209 = getelementptr inbounds float, float* %tmp13208, i64 1
+ %tmp13210 = getelementptr inbounds float, float* %tmp13209, i64 1
+ %tmp13211 = getelementptr inbounds float, float* %tmp13210, i64 1
+ %tmp13212 = getelementptr inbounds float, float* %tmp13211, i64 1
+ %tmp13213 = getelementptr inbounds float, float* %tmp13212, i64 1
+ %tmp13214 = getelementptr inbounds float, float* %tmp13213, i64 1
+ %tmp13215 = getelementptr inbounds float, float* %tmp13214, i64 1
+ %tmp13216 = getelementptr inbounds float, float* %tmp13215, i64 1
+ %tmp13217 = getelementptr inbounds float, float* %tmp13216, i64 1
+ %tmp13218 = getelementptr inbounds float, float* %tmp13217, i64 1
+ %tmp13219 = getelementptr inbounds float, float* %tmp13218, i64 1
+ %tmp13220 = getelementptr inbounds float, float* %tmp13219, i64 1
+ %tmp13221 = getelementptr inbounds float, float* %tmp13220, i64 1
+ %tmp13222 = getelementptr inbounds float, float* %tmp13221, i64 1
+ %tmp13223 = getelementptr inbounds float, float* %tmp13222, i64 1
+ %tmp13224 = getelementptr inbounds float, float* %tmp13223, i64 1
+ %tmp13225 = getelementptr inbounds float, float* %tmp13224, i64 1
+ %tmp13226 = getelementptr inbounds float, float* %tmp13225, i64 1
+ %tmp13227 = getelementptr inbounds float, float* %tmp13226, i64 1
+ %tmp13228 = getelementptr inbounds float, float* %tmp13227, i64 1
+ %tmp13229 = getelementptr inbounds float, float* %tmp13228, i64 1
+ %tmp13230 = getelementptr inbounds float, float* %tmp13229, i64 1
+ %tmp13231 = getelementptr inbounds float, float* %tmp13230, i64 1
+ %tmp13232 = getelementptr inbounds float, float* %tmp13231, i64 1
+ %tmp13233 = getelementptr inbounds float, float* %tmp13232, i64 1
+ %tmp13234 = getelementptr inbounds float, float* %tmp13233, i64 1
+ %tmp13235 = getelementptr inbounds float, float* %tmp13234, i64 1
+ %tmp13236 = getelementptr inbounds float, float* %tmp13235, i64 1
+ %tmp13237 = getelementptr inbounds float, float* %tmp13236, i64 1
+ %tmp13238 = getelementptr inbounds float, float* %tmp13237, i64 1
+ %tmp13239 = getelementptr inbounds float, float* %tmp13238, i64 1
+ %tmp13240 = getelementptr inbounds float, float* %tmp13239, i64 1
+ %tmp13241 = getelementptr inbounds float, float* %tmp13240, i64 1
+ %tmp13242 = getelementptr inbounds float, float* %tmp13241, i64 1
+ %tmp13243 = getelementptr inbounds float, float* %tmp13242, i64 1
+ %tmp13244 = getelementptr inbounds float, float* %tmp13243, i64 1
+ %tmp13245 = getelementptr inbounds float, float* %tmp13244, i64 1
+ %tmp13246 = getelementptr inbounds float, float* %tmp13245, i64 1
+ %tmp13247 = getelementptr inbounds float, float* %tmp13246, i64 1
+ %tmp13248 = getelementptr inbounds float, float* %tmp13247, i64 1
+ %tmp13249 = getelementptr inbounds float, float* %tmp13248, i64 1
+ %tmp13250 = getelementptr inbounds float, float* %tmp13249, i64 1
+ %tmp13251 = getelementptr inbounds float, float* %tmp13250, i64 1
+ %tmp13252 = getelementptr inbounds float, float* %tmp13251, i64 1
+ %tmp13253 = getelementptr inbounds float, float* %tmp13252, i64 1
+ %tmp13254 = getelementptr inbounds float, float* %tmp13253, i64 1
+ %tmp13255 = getelementptr inbounds float, float* %tmp13254, i64 1
+ %tmp13256 = getelementptr inbounds float, float* %tmp13255, i64 1
+ %tmp13257 = getelementptr inbounds float, float* %tmp13256, i64 1
+ %tmp13258 = getelementptr inbounds float, float* %tmp13257, i64 1
+ %tmp13259 = getelementptr inbounds float, float* %tmp13258, i64 1
+ %tmp13260 = getelementptr inbounds float, float* %tmp13259, i64 1
+ %tmp13261 = getelementptr inbounds float, float* %tmp13260, i64 1
+ %tmp13262 = getelementptr inbounds float, float* %tmp13261, i64 1
+ %tmp13263 = getelementptr inbounds float, float* %tmp13262, i64 1
+ %tmp13264 = getelementptr inbounds float, float* %tmp13263, i64 1
+ %tmp13265 = getelementptr inbounds float, float* %tmp13264, i64 1
+ %tmp13266 = getelementptr inbounds float, float* %tmp13265, i64 1
+ %tmp13267 = getelementptr inbounds float, float* %tmp13266, i64 1
+ %tmp13268 = getelementptr inbounds float, float* %tmp13267, i64 1
+ %tmp13269 = getelementptr inbounds float, float* %tmp13268, i64 1
+ %tmp13270 = getelementptr inbounds float, float* %tmp13269, i64 1
+ %tmp13271 = getelementptr inbounds float, float* %tmp13270, i64 1
+ %tmp13272 = getelementptr inbounds float, float* %tmp13271, i64 1
+ %tmp13273 = getelementptr inbounds float, float* %tmp13272, i64 1
+ %tmp13274 = getelementptr inbounds float, float* %tmp13273, i64 1
+ %tmp13275 = getelementptr inbounds float, float* %tmp13274, i64 1
+ %tmp13276 = getelementptr inbounds float, float* %tmp13275, i64 1
+ %tmp13277 = getelementptr inbounds float, float* %tmp13276, i64 1
+ %tmp13278 = getelementptr inbounds float, float* %tmp13277, i64 1
+ %tmp13279 = getelementptr inbounds float, float* %tmp13278, i64 1
+ %tmp13280 = getelementptr inbounds float, float* %tmp13279, i64 1
+ %tmp13281 = getelementptr inbounds float, float* %tmp13280, i64 1
+ %tmp13282 = getelementptr inbounds float, float* %tmp13281, i64 1
+ %tmp13283 = getelementptr inbounds float, float* %tmp13282, i64 1
+ %tmp13284 = getelementptr inbounds float, float* %tmp13283, i64 1
+ %tmp13285 = getelementptr inbounds float, float* %tmp13284, i64 1
+ %tmp13286 = getelementptr inbounds float, float* %tmp13285, i64 1
+ %tmp13287 = getelementptr inbounds float, float* %tmp13286, i64 1
+ %tmp13288 = getelementptr inbounds float, float* %tmp13287, i64 1
+ %tmp13289 = getelementptr inbounds float, float* %tmp13288, i64 1
+ %tmp13290 = getelementptr inbounds float, float* %tmp13289, i64 1
+ %tmp13291 = getelementptr inbounds float, float* %tmp13290, i64 1
+ %tmp13292 = getelementptr inbounds float, float* %tmp13291, i64 1
+ %tmp13293 = getelementptr inbounds float, float* %tmp13292, i64 1
+ %tmp13294 = getelementptr inbounds float, float* %tmp13293, i64 1
+ %tmp13295 = getelementptr inbounds float, float* %tmp13294, i64 1
+ %tmp13296 = getelementptr inbounds float, float* %tmp13295, i64 1
+ %tmp13297 = getelementptr inbounds float, float* %tmp13296, i64 1
+ %tmp13298 = getelementptr inbounds float, float* %tmp13297, i64 1
+ %tmp13299 = getelementptr inbounds float, float* %tmp13298, i64 1
+ %tmp13300 = getelementptr inbounds float, float* %tmp13299, i64 1
+ %tmp13301 = getelementptr inbounds float, float* %tmp13300, i64 1
+ %tmp13302 = getelementptr inbounds float, float* %tmp13301, i64 1
+ %tmp13303 = getelementptr inbounds float, float* %tmp13302, i64 1
+ %tmp13304 = getelementptr inbounds float, float* %tmp13303, i64 1
+ %tmp13305 = getelementptr inbounds float, float* %tmp13304, i64 1
+ %tmp13306 = getelementptr inbounds float, float* %tmp13305, i64 1
+ %tmp13307 = getelementptr inbounds float, float* %tmp13306, i64 1
+ %tmp13308 = getelementptr inbounds float, float* %tmp13307, i64 1
+ %tmp13309 = getelementptr inbounds float, float* %tmp13308, i64 1
+ %tmp13310 = getelementptr inbounds float, float* %tmp13309, i64 1
+ %tmp13311 = getelementptr inbounds float, float* %tmp13310, i64 1
+ %tmp13312 = getelementptr inbounds float, float* %tmp13311, i64 1
+ %tmp13313 = getelementptr inbounds float, float* %tmp13312, i64 1
+ %tmp13314 = getelementptr inbounds float, float* %tmp13313, i64 1
+ %tmp13315 = getelementptr inbounds float, float* %tmp13314, i64 1
+ %tmp13316 = getelementptr inbounds float, float* %tmp13315, i64 1
+ %tmp13317 = getelementptr inbounds float, float* %tmp13316, i64 1
+ %tmp13318 = getelementptr inbounds float, float* %tmp13317, i64 1
+ %tmp13319 = getelementptr inbounds float, float* %tmp13318, i64 1
+ %tmp13320 = getelementptr inbounds float, float* %tmp13319, i64 1
+ %tmp13321 = getelementptr inbounds float, float* %tmp13320, i64 1
+ %tmp13322 = getelementptr inbounds float, float* %tmp13321, i64 1
+ %tmp13323 = getelementptr inbounds float, float* %tmp13322, i64 1
+ %tmp13324 = getelementptr inbounds float, float* %tmp13323, i64 1
+ %tmp13325 = getelementptr inbounds float, float* %tmp13324, i64 1
+ %tmp13326 = getelementptr inbounds float, float* %tmp13325, i64 1
+ %tmp13327 = getelementptr inbounds float, float* %tmp13326, i64 1
+ %tmp13328 = getelementptr inbounds float, float* %tmp13327, i64 1
+ %tmp13329 = getelementptr inbounds float, float* %tmp13328, i64 1
+ %tmp13330 = getelementptr inbounds float, float* %tmp13329, i64 1
+ %tmp13331 = getelementptr inbounds float, float* %tmp13330, i64 1
+ %tmp13332 = getelementptr inbounds float, float* %tmp13331, i64 1
+ %tmp13333 = getelementptr inbounds float, float* %tmp13332, i64 1
+ %tmp13334 = getelementptr inbounds float, float* %tmp13333, i64 1
+ %tmp13335 = getelementptr inbounds float, float* %tmp13334, i64 1
+ %tmp13336 = getelementptr inbounds float, float* %tmp13335, i64 1
+ %tmp13337 = getelementptr inbounds float, float* %tmp13336, i64 1
+ %tmp13338 = getelementptr inbounds float, float* %tmp13337, i64 1
+ %tmp13339 = getelementptr inbounds float, float* %tmp13338, i64 1
+ %tmp13340 = getelementptr inbounds float, float* %tmp13339, i64 1
+ %tmp13341 = getelementptr inbounds float, float* %tmp13340, i64 1
+ %tmp13342 = getelementptr inbounds float, float* %tmp13341, i64 1
+ %tmp13343 = getelementptr inbounds float, float* %tmp13342, i64 1
+ %tmp13344 = getelementptr inbounds float, float* %tmp13343, i64 1
+ %tmp13345 = getelementptr inbounds float, float* %tmp13344, i64 1
+ %tmp13346 = getelementptr inbounds float, float* %tmp13345, i64 1
+ %tmp13347 = getelementptr inbounds float, float* %tmp13346, i64 1
+ %tmp13348 = getelementptr inbounds float, float* %tmp13347, i64 1
+ %tmp13349 = getelementptr inbounds float, float* %tmp13348, i64 1
+ %tmp13350 = getelementptr inbounds float, float* %tmp13349, i64 1
+ %tmp13351 = getelementptr inbounds float, float* %tmp13350, i64 1
+ %tmp13352 = getelementptr inbounds float, float* %tmp13351, i64 1
+ %tmp13353 = getelementptr inbounds float, float* %tmp13352, i64 1
+ %tmp13354 = getelementptr inbounds float, float* %tmp13353, i64 1
+ %tmp13355 = getelementptr inbounds float, float* %tmp13354, i64 1
+ %tmp13356 = getelementptr inbounds float, float* %tmp13355, i64 1
+ %tmp13357 = getelementptr inbounds float, float* %tmp13356, i64 1
+ %tmp13358 = getelementptr inbounds float, float* %tmp13357, i64 1
+ %tmp13359 = getelementptr inbounds float, float* %tmp13358, i64 1
+ %tmp13360 = getelementptr inbounds float, float* %tmp13359, i64 1
+ %tmp13361 = getelementptr inbounds float, float* %tmp13360, i64 1
+ %tmp13362 = getelementptr inbounds float, float* %tmp13361, i64 1
+ %tmp13363 = getelementptr inbounds float, float* %tmp13362, i64 1
+ %tmp13364 = getelementptr inbounds float, float* %tmp13363, i64 1
+ %tmp13365 = getelementptr inbounds float, float* %tmp13364, i64 1
+ %tmp13366 = getelementptr inbounds float, float* %tmp13365, i64 1
+ %tmp13367 = getelementptr inbounds float, float* %tmp13366, i64 1
+ %tmp13368 = getelementptr inbounds float, float* %tmp13367, i64 1
+ %tmp13369 = getelementptr inbounds float, float* %tmp13368, i64 1
+ %tmp13370 = getelementptr inbounds float, float* %tmp13369, i64 1
+ %tmp13371 = getelementptr inbounds float, float* %tmp13370, i64 1
+ %tmp13372 = getelementptr inbounds float, float* %tmp13371, i64 1
+ %tmp13373 = getelementptr inbounds float, float* %tmp13372, i64 1
+ %tmp13374 = getelementptr inbounds float, float* %tmp13373, i64 1
+ %tmp13375 = getelementptr inbounds float, float* %tmp13374, i64 1
+ %tmp13376 = getelementptr inbounds float, float* %tmp13375, i64 1
+ %tmp13377 = getelementptr inbounds float, float* %tmp13376, i64 1
+ %tmp13378 = getelementptr inbounds float, float* %tmp13377, i64 1
+ %tmp13379 = getelementptr inbounds float, float* %tmp13378, i64 1
+ %tmp13380 = getelementptr inbounds float, float* %tmp13379, i64 1
+ %tmp13381 = getelementptr inbounds float, float* %tmp13380, i64 1
+ %tmp13382 = getelementptr inbounds float, float* %tmp13381, i64 1
+ %tmp13383 = getelementptr inbounds float, float* %tmp13382, i64 1
+ %tmp13384 = getelementptr inbounds float, float* %tmp13383, i64 1
+ %tmp13385 = getelementptr inbounds float, float* %tmp13384, i64 1
+ %tmp13386 = getelementptr inbounds float, float* %tmp13385, i64 1
+ %tmp13387 = getelementptr inbounds float, float* %tmp13386, i64 1
+ %tmp13388 = getelementptr inbounds float, float* %tmp13387, i64 1
+ %tmp13389 = getelementptr inbounds float, float* %tmp13388, i64 1
+ %tmp13390 = getelementptr inbounds float, float* %tmp13389, i64 1
+ %tmp13391 = getelementptr inbounds float, float* %tmp13390, i64 1
+ %tmp13392 = getelementptr inbounds float, float* %tmp13391, i64 1
+ %tmp13393 = getelementptr inbounds float, float* %tmp13392, i64 1
+ %tmp13394 = getelementptr inbounds float, float* %tmp13393, i64 1
+ %tmp13395 = getelementptr inbounds float, float* %tmp13394, i64 1
+ %tmp13396 = getelementptr inbounds float, float* %tmp13395, i64 1
+ %tmp13397 = getelementptr inbounds float, float* %tmp13396, i64 1
+ %tmp13398 = getelementptr inbounds float, float* %tmp13397, i64 1
+ %tmp13399 = getelementptr inbounds float, float* %tmp13398, i64 1
+ %tmp13400 = getelementptr inbounds float, float* %tmp13399, i64 1
+ %tmp13401 = getelementptr inbounds float, float* %tmp13400, i64 1
+ %tmp13402 = getelementptr inbounds float, float* %tmp13401, i64 1
+ %tmp13403 = getelementptr inbounds float, float* %tmp13402, i64 1
+ %tmp13404 = getelementptr inbounds float, float* %tmp13403, i64 1
+ %tmp13405 = getelementptr inbounds float, float* %tmp13404, i64 1
+ %tmp13406 = getelementptr inbounds float, float* %tmp13405, i64 1
+ %tmp13407 = getelementptr inbounds float, float* %tmp13406, i64 1
+ %tmp13408 = getelementptr inbounds float, float* %tmp13407, i64 1
+ %tmp13409 = getelementptr inbounds float, float* %tmp13408, i64 1
+ %tmp13410 = getelementptr inbounds float, float* %tmp13409, i64 1
+ %tmp13411 = getelementptr inbounds float, float* %tmp13410, i64 1
+ %tmp13412 = getelementptr inbounds float, float* %tmp13411, i64 1
+ %tmp13413 = getelementptr inbounds float, float* %tmp13412, i64 1
+ %tmp13414 = getelementptr inbounds float, float* %tmp13413, i64 1
+ %tmp13415 = getelementptr inbounds float, float* %tmp13414, i64 1
+ %tmp13416 = getelementptr inbounds float, float* %tmp13415, i64 1
+ %tmp13417 = getelementptr inbounds float, float* %tmp13416, i64 1
+ %tmp13418 = getelementptr inbounds float, float* %tmp13417, i64 1
+ %tmp13419 = getelementptr inbounds float, float* %tmp13418, i64 1
+ %tmp13420 = getelementptr inbounds float, float* %tmp13419, i64 1
+ %tmp13421 = getelementptr inbounds float, float* %tmp13420, i64 1
+ %tmp13422 = getelementptr inbounds float, float* %tmp13421, i64 1
+ %tmp13423 = getelementptr inbounds float, float* %tmp13422, i64 1
+ %tmp13424 = getelementptr inbounds float, float* %tmp13423, i64 1
+ %tmp13425 = getelementptr inbounds float, float* %tmp13424, i64 1
+ %tmp13426 = getelementptr inbounds float, float* %tmp13425, i64 1
+ %tmp13427 = getelementptr inbounds float, float* %tmp13426, i64 1
+ %tmp13428 = getelementptr inbounds float, float* %tmp13427, i64 1
+ %tmp13429 = getelementptr inbounds float, float* %tmp13428, i64 1
+ %tmp13430 = getelementptr inbounds float, float* %tmp13429, i64 1
+ %tmp13431 = getelementptr inbounds float, float* %tmp13430, i64 1
+ %tmp13432 = getelementptr inbounds float, float* %tmp13431, i64 1
+ %tmp13433 = getelementptr inbounds float, float* %tmp13432, i64 1
+ %tmp13434 = getelementptr inbounds float, float* %tmp13433, i64 1
+ %tmp13435 = getelementptr inbounds float, float* %tmp13434, i64 1
+ %tmp13436 = getelementptr inbounds float, float* %tmp13435, i64 1
+ %tmp13437 = getelementptr inbounds float, float* %tmp13436, i64 1
+ %tmp13438 = getelementptr inbounds float, float* %tmp13437, i64 1
+ %tmp13439 = getelementptr inbounds float, float* %tmp13438, i64 1
+ %tmp13440 = getelementptr inbounds float, float* %tmp13439, i64 1
+ %tmp13441 = getelementptr inbounds float, float* %tmp13440, i64 1
+ %tmp13442 = getelementptr inbounds float, float* %tmp13441, i64 1
+ %tmp13443 = getelementptr inbounds float, float* %tmp13442, i64 1
+ %tmp13444 = getelementptr inbounds float, float* %tmp13443, i64 1
+ %tmp13445 = getelementptr inbounds float, float* %tmp13444, i64 1
+ %tmp13446 = getelementptr inbounds float, float* %tmp13445, i64 1
+ %tmp13447 = getelementptr inbounds float, float* %tmp13446, i64 1
+ %tmp13448 = getelementptr inbounds float, float* %tmp13447, i64 1
+ %tmp13449 = getelementptr inbounds float, float* %tmp13448, i64 1
+ %tmp13450 = getelementptr inbounds float, float* %tmp13449, i64 1
+ %tmp13451 = getelementptr inbounds float, float* %tmp13450, i64 1
+ %tmp13452 = getelementptr inbounds float, float* %tmp13451, i64 1
+ %tmp13453 = getelementptr inbounds float, float* %tmp13452, i64 1
+ %tmp13454 = getelementptr inbounds float, float* %tmp13453, i64 1
+ %tmp13455 = getelementptr inbounds float, float* %tmp13454, i64 1
+ %tmp13456 = getelementptr inbounds float, float* %tmp13455, i64 1
+ %tmp13457 = getelementptr inbounds float, float* %tmp13456, i64 1
+ %tmp13458 = getelementptr inbounds float, float* %tmp13457, i64 1
+ %tmp13459 = getelementptr inbounds float, float* %tmp13458, i64 1
+ %tmp13460 = getelementptr inbounds float, float* %tmp13459, i64 1
+ %tmp13461 = getelementptr inbounds float, float* %tmp13460, i64 1
+ %tmp13462 = getelementptr inbounds float, float* %tmp13461, i64 1
+ %tmp13463 = getelementptr inbounds float, float* %tmp13462, i64 1
+ %tmp13464 = getelementptr inbounds float, float* %tmp13463, i64 1
+ %tmp13465 = getelementptr inbounds float, float* %tmp13464, i64 1
+ %tmp13466 = getelementptr inbounds float, float* %tmp13465, i64 1
+ %tmp13467 = getelementptr inbounds float, float* %tmp13466, i64 1
+ %tmp13468 = getelementptr inbounds float, float* %tmp13467, i64 1
+ %tmp13469 = getelementptr inbounds float, float* %tmp13468, i64 1
+ %tmp13470 = getelementptr inbounds float, float* %tmp13469, i64 1
+ %tmp13471 = getelementptr inbounds float, float* %tmp13470, i64 1
+ %tmp13472 = getelementptr inbounds float, float* %tmp13471, i64 1
+ %tmp13473 = getelementptr inbounds float, float* %tmp13472, i64 1
+ %tmp13474 = getelementptr inbounds float, float* %tmp13473, i64 1
+ %tmp13475 = getelementptr inbounds float, float* %tmp13474, i64 1
+ %tmp13476 = getelementptr inbounds float, float* %tmp13475, i64 1
+ %tmp13477 = getelementptr inbounds float, float* %tmp13476, i64 1
+ %tmp13478 = getelementptr inbounds float, float* %tmp13477, i64 1
+ %tmp13479 = getelementptr inbounds float, float* %tmp13478, i64 1
+ %tmp13480 = getelementptr inbounds float, float* %tmp13479, i64 1
+ %tmp13481 = getelementptr inbounds float, float* %tmp13480, i64 1
+ %tmp13482 = getelementptr inbounds float, float* %tmp13481, i64 1
+ %tmp13483 = getelementptr inbounds float, float* %tmp13482, i64 1
+ %tmp13484 = getelementptr inbounds float, float* %tmp13483, i64 1
+ %tmp13485 = getelementptr inbounds float, float* %tmp13484, i64 1
+ %tmp13486 = getelementptr inbounds float, float* %tmp13485, i64 1
+ %tmp13487 = getelementptr inbounds float, float* %tmp13486, i64 1
+ %tmp13488 = getelementptr inbounds float, float* %tmp13487, i64 1
+ %tmp13489 = getelementptr inbounds float, float* %tmp13488, i64 1
+ %tmp13490 = getelementptr inbounds float, float* %tmp13489, i64 1
+ %tmp13491 = getelementptr inbounds float, float* %tmp13490, i64 1
+ %tmp13492 = getelementptr inbounds float, float* %tmp13491, i64 1
+ %tmp13493 = getelementptr inbounds float, float* %tmp13492, i64 1
+ %tmp13494 = getelementptr inbounds float, float* %tmp13493, i64 1
+ %tmp13495 = getelementptr inbounds float, float* %tmp13494, i64 1
+ %tmp13496 = getelementptr inbounds float, float* %tmp13495, i64 1
+ %tmp13497 = getelementptr inbounds float, float* %tmp13496, i64 1
+ %tmp13498 = getelementptr inbounds float, float* %tmp13497, i64 1
+ %tmp13499 = getelementptr inbounds float, float* %tmp13498, i64 1
+ %tmp13500 = getelementptr inbounds float, float* %tmp13499, i64 1
+ %tmp13501 = getelementptr inbounds float, float* %tmp13500, i64 1
+ %tmp13502 = getelementptr inbounds float, float* %tmp13501, i64 1
+ %tmp13503 = getelementptr inbounds float, float* %tmp13502, i64 1
+ %tmp13504 = getelementptr inbounds float, float* %tmp13503, i64 1
+ %tmp13505 = getelementptr inbounds float, float* %tmp13504, i64 1
+ %tmp13506 = getelementptr inbounds float, float* %tmp13505, i64 1
+ %tmp13507 = getelementptr inbounds float, float* %tmp13506, i64 1
+ %tmp13508 = getelementptr inbounds float, float* %tmp13507, i64 1
+ %tmp13509 = getelementptr inbounds float, float* %tmp13508, i64 1
+ %tmp13510 = getelementptr inbounds float, float* %tmp13509, i64 1
+ %tmp13511 = getelementptr inbounds float, float* %tmp13510, i64 1
+ %tmp13512 = getelementptr inbounds float, float* %tmp13511, i64 1
+ %tmp13513 = getelementptr inbounds float, float* %tmp13512, i64 1
+ %tmp13514 = getelementptr inbounds float, float* %tmp13513, i64 1
+ %tmp13515 = getelementptr inbounds float, float* %tmp13514, i64 1
+ %tmp13516 = getelementptr inbounds float, float* %tmp13515, i64 1
+ %tmp13517 = getelementptr inbounds float, float* %tmp13516, i64 1
+ %tmp13518 = getelementptr inbounds float, float* %tmp13517, i64 1
+ %tmp13519 = getelementptr inbounds float, float* %tmp13518, i64 1
+ %tmp13520 = getelementptr inbounds float, float* %tmp13519, i64 1
+ %tmp13521 = getelementptr inbounds float, float* %tmp13520, i64 1
+ %tmp13522 = getelementptr inbounds float, float* %tmp13521, i64 1
+ %tmp13523 = getelementptr inbounds float, float* %tmp13522, i64 1
+ %tmp13524 = getelementptr inbounds float, float* %tmp13523, i64 1
+ %tmp13525 = getelementptr inbounds float, float* %tmp13524, i64 1
+ %tmp13526 = getelementptr inbounds float, float* %tmp13525, i64 1
+ %tmp13527 = getelementptr inbounds float, float* %tmp13526, i64 1
+ %tmp13528 = getelementptr inbounds float, float* %tmp13527, i64 1
+ %tmp13529 = getelementptr inbounds float, float* %tmp13528, i64 1
+ %tmp13530 = getelementptr inbounds float, float* %tmp13529, i64 1
+ %tmp13531 = getelementptr inbounds float, float* %tmp13530, i64 1
+ %tmp13532 = getelementptr inbounds float, float* %tmp13531, i64 1
+ %tmp13533 = getelementptr inbounds float, float* %tmp13532, i64 1
+ %tmp13534 = getelementptr inbounds float, float* %tmp13533, i64 1
+ %tmp13535 = getelementptr inbounds float, float* %tmp13534, i64 1
+ %tmp13536 = getelementptr inbounds float, float* %tmp13535, i64 1
+ %tmp13537 = getelementptr inbounds float, float* %tmp13536, i64 1
+ %tmp13538 = getelementptr inbounds float, float* %tmp13537, i64 1
+ %tmp13539 = getelementptr inbounds float, float* %tmp13538, i64 1
+ %tmp13540 = getelementptr inbounds float, float* %tmp13539, i64 1
+ %tmp13541 = getelementptr inbounds float, float* %tmp13540, i64 1
+ %tmp13542 = getelementptr inbounds float, float* %tmp13541, i64 1
+ %tmp13543 = getelementptr inbounds float, float* %tmp13542, i64 1
+ %tmp13544 = getelementptr inbounds float, float* %tmp13543, i64 1
+ %tmp13545 = getelementptr inbounds float, float* %tmp13544, i64 1
+ %tmp13546 = getelementptr inbounds float, float* %tmp13545, i64 1
+ %tmp13547 = getelementptr inbounds float, float* %tmp13546, i64 1
+ %tmp13548 = getelementptr inbounds float, float* %tmp13547, i64 1
+ %tmp13549 = getelementptr inbounds float, float* %tmp13548, i64 1
+ %tmp13550 = getelementptr inbounds float, float* %tmp13549, i64 1
+ %tmp13551 = getelementptr inbounds float, float* %tmp13550, i64 1
+ %tmp13552 = getelementptr inbounds float, float* %tmp13551, i64 1
+ %tmp13553 = getelementptr inbounds float, float* %tmp13552, i64 1
+ %tmp13554 = getelementptr inbounds float, float* %tmp13553, i64 1
+ %tmp13555 = getelementptr inbounds float, float* %tmp13554, i64 1
+ %tmp13556 = getelementptr inbounds float, float* %tmp13555, i64 1
+ %tmp13557 = getelementptr inbounds float, float* %tmp13556, i64 1
+ %tmp13558 = getelementptr inbounds float, float* %tmp13557, i64 1
+ %tmp13559 = getelementptr inbounds float, float* %tmp13558, i64 1
+ %tmp13560 = getelementptr inbounds float, float* %tmp13559, i64 1
+ %tmp13561 = getelementptr inbounds float, float* %tmp13560, i64 1
+ %tmp13562 = getelementptr inbounds float, float* %tmp13561, i64 1
+ %tmp13563 = getelementptr inbounds float, float* %tmp13562, i64 1
+ %tmp13564 = getelementptr inbounds float, float* %tmp13563, i64 1
+ %tmp13565 = getelementptr inbounds float, float* %tmp13564, i64 1
+ %tmp13566 = getelementptr inbounds float, float* %tmp13565, i64 1
+ %tmp13567 = getelementptr inbounds float, float* %tmp13566, i64 1
+ %tmp13568 = getelementptr inbounds float, float* %tmp13567, i64 1
+ %tmp13569 = getelementptr inbounds float, float* %tmp13568, i64 1
+ %tmp13570 = getelementptr inbounds float, float* %tmp13569, i64 1
+ %tmp13571 = getelementptr inbounds float, float* %tmp13570, i64 1
+ %tmp13572 = getelementptr inbounds float, float* %tmp13571, i64 1
+ %tmp13573 = getelementptr inbounds float, float* %tmp13572, i64 1
+ %tmp13574 = getelementptr inbounds float, float* %tmp13573, i64 1
+ %tmp13575 = getelementptr inbounds float, float* %tmp13574, i64 1
+ %tmp13576 = getelementptr inbounds float, float* %tmp13575, i64 1
+ %tmp13577 = getelementptr inbounds float, float* %tmp13576, i64 1
+ %tmp13578 = getelementptr inbounds float, float* %tmp13577, i64 1
+ %tmp13579 = getelementptr inbounds float, float* %tmp13578, i64 1
+ %tmp13580 = getelementptr inbounds float, float* %tmp13579, i64 1
+ %tmp13581 = getelementptr inbounds float, float* %tmp13580, i64 1
+ %tmp13582 = getelementptr inbounds float, float* %tmp13581, i64 1
+ %tmp13583 = getelementptr inbounds float, float* %tmp13582, i64 1
+ %tmp13584 = getelementptr inbounds float, float* %tmp13583, i64 1
+ %tmp13585 = getelementptr inbounds float, float* %tmp13584, i64 1
+ %tmp13586 = getelementptr inbounds float, float* %tmp13585, i64 1
+ %tmp13587 = getelementptr inbounds float, float* %tmp13586, i64 1
+ %tmp13588 = getelementptr inbounds float, float* %tmp13587, i64 1
+ %tmp13589 = getelementptr inbounds float, float* %tmp13588, i64 1
+ %tmp13590 = getelementptr inbounds float, float* %tmp13589, i64 1
+ %tmp13591 = getelementptr inbounds float, float* %tmp13590, i64 1
+ %tmp13592 = getelementptr inbounds float, float* %tmp13591, i64 1
+ %tmp13593 = getelementptr inbounds float, float* %tmp13592, i64 1
+ %tmp13594 = getelementptr inbounds float, float* %tmp13593, i64 1
+ %tmp13595 = getelementptr inbounds float, float* %tmp13594, i64 1
+ %tmp13596 = getelementptr inbounds float, float* %tmp13595, i64 1
+ %tmp13597 = getelementptr inbounds float, float* %tmp13596, i64 1
+ %tmp13598 = getelementptr inbounds float, float* %tmp13597, i64 1
+ %tmp13599 = getelementptr inbounds float, float* %tmp13598, i64 1
+ %tmp13600 = getelementptr inbounds float, float* %tmp13599, i64 1
+ %tmp13601 = getelementptr inbounds float, float* %tmp13600, i64 1
+ %tmp13602 = getelementptr inbounds float, float* %tmp13601, i64 1
+ %tmp13603 = getelementptr inbounds float, float* %tmp13602, i64 1
+ %tmp13604 = getelementptr inbounds float, float* %tmp13603, i64 1
+ %tmp13605 = getelementptr inbounds float, float* %tmp13604, i64 1
+ %tmp13606 = getelementptr inbounds float, float* %tmp13605, i64 1
+ %tmp13607 = getelementptr inbounds float, float* %tmp13606, i64 1
+ %tmp13608 = getelementptr inbounds float, float* %tmp13607, i64 1
+ %tmp13609 = getelementptr inbounds float, float* %tmp13608, i64 1
+ %tmp13610 = getelementptr inbounds float, float* %tmp13609, i64 1
+ %tmp13611 = getelementptr inbounds float, float* %tmp13610, i64 1
+ %tmp13612 = getelementptr inbounds float, float* %tmp13611, i64 1
+ %tmp13613 = getelementptr inbounds float, float* %tmp13612, i64 1
+ %tmp13614 = getelementptr inbounds float, float* %tmp13613, i64 1
+ %tmp13615 = getelementptr inbounds float, float* %tmp13614, i64 1
+ %tmp13616 = getelementptr inbounds float, float* %tmp13615, i64 1
+ %tmp13617 = getelementptr inbounds float, float* %tmp13616, i64 1
+ %tmp13618 = getelementptr inbounds float, float* %tmp13617, i64 1
+ %tmp13619 = getelementptr inbounds float, float* %tmp13618, i64 1
+ %tmp13620 = getelementptr inbounds float, float* %tmp13619, i64 1
+ %tmp13621 = getelementptr inbounds float, float* %tmp13620, i64 1
+ %tmp13622 = getelementptr inbounds float, float* %tmp13621, i64 1
+ %tmp13623 = getelementptr inbounds float, float* %tmp13622, i64 1
+ %tmp13624 = getelementptr inbounds float, float* %tmp13623, i64 1
+ %tmp13625 = getelementptr inbounds float, float* %tmp13624, i64 1
+ %tmp13626 = getelementptr inbounds float, float* %tmp13625, i64 1
+ %tmp13627 = getelementptr inbounds float, float* %tmp13626, i64 1
+ %tmp13628 = getelementptr inbounds float, float* %tmp13627, i64 1
+ %tmp13629 = getelementptr inbounds float, float* %tmp13628, i64 1
+ %tmp13630 = getelementptr inbounds float, float* %tmp13629, i64 1
+ %tmp13631 = getelementptr inbounds float, float* %tmp13630, i64 1
+ %tmp13632 = getelementptr inbounds float, float* %tmp13631, i64 1
+ %tmp13633 = getelementptr inbounds float, float* %tmp13632, i64 1
+ %tmp13634 = getelementptr inbounds float, float* %tmp13633, i64 1
+ %tmp13635 = getelementptr inbounds float, float* %tmp13634, i64 1
+ %tmp13636 = getelementptr inbounds float, float* %tmp13635, i64 1
+ %tmp13637 = getelementptr inbounds float, float* %tmp13636, i64 1
+ %tmp13638 = getelementptr inbounds float, float* %tmp13637, i64 1
+ %tmp13639 = getelementptr inbounds float, float* %tmp13638, i64 1
+ %tmp13640 = getelementptr inbounds float, float* %tmp13639, i64 1
+ %tmp13641 = getelementptr inbounds float, float* %tmp13640, i64 1
+ %tmp13642 = getelementptr inbounds float, float* %tmp13641, i64 1
+ %tmp13643 = getelementptr inbounds float, float* %tmp13642, i64 1
+ %tmp13644 = getelementptr inbounds float, float* %tmp13643, i64 1
+ %tmp13645 = getelementptr inbounds float, float* %tmp13644, i64 1
+ %tmp13646 = getelementptr inbounds float, float* %tmp13645, i64 1
+ %tmp13647 = getelementptr inbounds float, float* %tmp13646, i64 1
+ %tmp13648 = getelementptr inbounds float, float* %tmp13647, i64 1
+ %tmp13649 = getelementptr inbounds float, float* %tmp13648, i64 1
+ %tmp13650 = getelementptr inbounds float, float* %tmp13649, i64 1
+ %tmp13651 = getelementptr inbounds float, float* %tmp13650, i64 1
+ %tmp13652 = getelementptr inbounds float, float* %tmp13651, i64 1
+ %tmp13653 = getelementptr inbounds float, float* %tmp13652, i64 1
+ %tmp13654 = getelementptr inbounds float, float* %tmp13653, i64 1
+ %tmp13655 = getelementptr inbounds float, float* %tmp13654, i64 1
+ %tmp13656 = getelementptr inbounds float, float* %tmp13655, i64 1
+ %tmp13657 = getelementptr inbounds float, float* %tmp13656, i64 1
+ %tmp13658 = getelementptr inbounds float, float* %tmp13657, i64 1
+ %tmp13659 = getelementptr inbounds float, float* %tmp13658, i64 1
+ %tmp13660 = getelementptr inbounds float, float* %tmp13659, i64 1
+ %tmp13661 = getelementptr inbounds float, float* %tmp13660, i64 1
+ %tmp13662 = getelementptr inbounds float, float* %tmp13661, i64 1
+ %tmp13663 = getelementptr inbounds float, float* %tmp13662, i64 1
+ %tmp13664 = getelementptr inbounds float, float* %tmp13663, i64 1
+ %tmp13665 = getelementptr inbounds float, float* %tmp13664, i64 1
+ %tmp13666 = getelementptr inbounds float, float* %tmp13665, i64 1
+ %tmp13667 = getelementptr inbounds float, float* %tmp13666, i64 1
+ %tmp13668 = getelementptr inbounds float, float* %tmp13667, i64 1
+ %tmp13669 = getelementptr inbounds float, float* %tmp13668, i64 1
+ %tmp13670 = getelementptr inbounds float, float* %tmp13669, i64 1
+ %tmp13671 = getelementptr inbounds float, float* %tmp13670, i64 1
+ %tmp13672 = getelementptr inbounds float, float* %tmp13671, i64 1
+ %tmp13673 = getelementptr inbounds float, float* %tmp13672, i64 1
+ %tmp13674 = getelementptr inbounds float, float* %tmp13673, i64 1
+ %tmp13675 = getelementptr inbounds float, float* %tmp13674, i64 1
+ %tmp13676 = getelementptr inbounds float, float* %tmp13675, i64 1
+ %tmp13677 = getelementptr inbounds float, float* %tmp13676, i64 1
+ %tmp13678 = getelementptr inbounds float, float* %tmp13677, i64 1
+ %tmp13679 = getelementptr inbounds float, float* %tmp13678, i64 1
+ %tmp13680 = getelementptr inbounds float, float* %tmp13679, i64 1
+ %tmp13681 = getelementptr inbounds float, float* %tmp13680, i64 1
+ %tmp13682 = getelementptr inbounds float, float* %tmp13681, i64 1
+ %tmp13683 = getelementptr inbounds float, float* %tmp13682, i64 1
+ %tmp13684 = getelementptr inbounds float, float* %tmp13683, i64 1
+ %tmp13685 = getelementptr inbounds float, float* %tmp13684, i64 1
+ %tmp13686 = getelementptr inbounds float, float* %tmp13685, i64 1
+ %tmp13687 = getelementptr inbounds float, float* %tmp13686, i64 1
+ %tmp13688 = getelementptr inbounds float, float* %tmp13687, i64 1
+ %tmp13689 = getelementptr inbounds float, float* %tmp13688, i64 1
+ %tmp13690 = getelementptr inbounds float, float* %tmp13689, i64 1
+ %tmp13691 = getelementptr inbounds float, float* %tmp13690, i64 1
+ %tmp13692 = getelementptr inbounds float, float* %tmp13691, i64 1
+ %tmp13693 = getelementptr inbounds float, float* %tmp13692, i64 1
+ %tmp13694 = getelementptr inbounds float, float* %tmp13693, i64 1
+ %tmp13695 = getelementptr inbounds float, float* %tmp13694, i64 1
+ %tmp13696 = getelementptr inbounds float, float* %tmp13695, i64 1
+ %tmp13697 = getelementptr inbounds float, float* %tmp13696, i64 1
+ %tmp13698 = getelementptr inbounds float, float* %tmp13697, i64 1
+ %tmp13699 = getelementptr inbounds float, float* %tmp13698, i64 1
+ %tmp13700 = getelementptr inbounds float, float* %tmp13699, i64 1
+ %tmp13701 = getelementptr inbounds float, float* %tmp13700, i64 1
+ %tmp13702 = getelementptr inbounds float, float* %tmp13701, i64 1
+ %tmp13703 = getelementptr inbounds float, float* %tmp13702, i64 1
+ %tmp13704 = getelementptr inbounds float, float* %tmp13703, i64 1
+ %tmp13705 = getelementptr inbounds float, float* %tmp13704, i64 1
+ %tmp13706 = getelementptr inbounds float, float* %tmp13705, i64 1
+ %tmp13707 = getelementptr inbounds float, float* %tmp13706, i64 1
+ %tmp13708 = getelementptr inbounds float, float* %tmp13707, i64 1
+ %tmp13709 = getelementptr inbounds float, float* %tmp13708, i64 1
+ %tmp13710 = getelementptr inbounds float, float* %tmp13709, i64 1
+ %tmp13711 = getelementptr inbounds float, float* %tmp13710, i64 1
+ %tmp13712 = getelementptr inbounds float, float* %tmp13711, i64 1
+ %tmp13713 = getelementptr inbounds float, float* %tmp13712, i64 1
+ %tmp13714 = getelementptr inbounds float, float* %tmp13713, i64 1
+ %tmp13715 = getelementptr inbounds float, float* %tmp13714, i64 1
+ %tmp13716 = getelementptr inbounds float, float* %tmp13715, i64 1
+ %tmp13717 = getelementptr inbounds float, float* %tmp13716, i64 1
+ %tmp13718 = getelementptr inbounds float, float* %tmp13717, i64 1
+ %tmp13719 = getelementptr inbounds float, float* %tmp13718, i64 1
+ %tmp13720 = getelementptr inbounds float, float* %tmp13719, i64 1
+ %tmp13721 = getelementptr inbounds float, float* %tmp13720, i64 1
+ %tmp13722 = getelementptr inbounds float, float* %tmp13721, i64 1
+ %tmp13723 = getelementptr inbounds float, float* %tmp13722, i64 1
+ %tmp13724 = getelementptr inbounds float, float* %tmp13723, i64 1
+ %tmp13725 = getelementptr inbounds float, float* %tmp13724, i64 1
+ %tmp13726 = getelementptr inbounds float, float* %tmp13725, i64 1
+ %tmp13727 = getelementptr inbounds float, float* %tmp13726, i64 1
+ %tmp13728 = getelementptr inbounds float, float* %tmp13727, i64 1
+ %tmp13729 = getelementptr inbounds float, float* %tmp13728, i64 1
+ %tmp13730 = getelementptr inbounds float, float* %tmp13729, i64 1
+ %tmp13731 = getelementptr inbounds float, float* %tmp13730, i64 1
+ %tmp13732 = getelementptr inbounds float, float* %tmp13731, i64 1
+ %tmp13733 = getelementptr inbounds float, float* %tmp13732, i64 1
+ %tmp13734 = getelementptr inbounds float, float* %tmp13733, i64 1
+ %tmp13735 = getelementptr inbounds float, float* %tmp13734, i64 1
+ %tmp13736 = getelementptr inbounds float, float* %tmp13735, i64 1
+ %tmp13737 = getelementptr inbounds float, float* %tmp13736, i64 1
+ %tmp13738 = getelementptr inbounds float, float* %tmp13737, i64 1
+ %tmp13739 = getelementptr inbounds float, float* %tmp13738, i64 1
+ %tmp13740 = getelementptr inbounds float, float* %tmp13739, i64 1
+ %tmp13741 = getelementptr inbounds float, float* %tmp13740, i64 1
+ %tmp13742 = getelementptr inbounds float, float* %tmp13741, i64 1
+ %tmp13743 = getelementptr inbounds float, float* %tmp13742, i64 1
+ %tmp13744 = getelementptr inbounds float, float* %tmp13743, i64 1
+ %tmp13745 = getelementptr inbounds float, float* %tmp13744, i64 1
+ %tmp13746 = getelementptr inbounds float, float* %tmp13745, i64 1
+ %tmp13747 = getelementptr inbounds float, float* %tmp13746, i64 1
+ %tmp13748 = getelementptr inbounds float, float* %tmp13747, i64 1
+ %tmp13749 = getelementptr inbounds float, float* %tmp13748, i64 1
+ %tmp13750 = getelementptr inbounds float, float* %tmp13749, i64 1
+ %tmp13751 = getelementptr inbounds float, float* %tmp13750, i64 1
+ %tmp13752 = getelementptr inbounds float, float* %tmp13751, i64 1
+ %tmp13753 = getelementptr inbounds float, float* %tmp13752, i64 1
+ %tmp13754 = getelementptr inbounds float, float* %tmp13753, i64 1
+ %tmp13755 = getelementptr inbounds float, float* %tmp13754, i64 1
+ %tmp13756 = getelementptr inbounds float, float* %tmp13755, i64 1
+ %tmp13757 = getelementptr inbounds float, float* %tmp13756, i64 1
+ %tmp13758 = getelementptr inbounds float, float* %tmp13757, i64 1
+ %tmp13759 = getelementptr inbounds float, float* %tmp13758, i64 1
+ %tmp13760 = getelementptr inbounds float, float* %tmp13759, i64 1
+ %tmp13761 = getelementptr inbounds float, float* %tmp13760, i64 1
+ %tmp13762 = getelementptr inbounds float, float* %tmp13761, i64 1
+ %tmp13763 = getelementptr inbounds float, float* %tmp13762, i64 1
+ %tmp13764 = getelementptr inbounds float, float* %tmp13763, i64 1
+ %tmp13765 = getelementptr inbounds float, float* %tmp13764, i64 1
+ %tmp13766 = getelementptr inbounds float, float* %tmp13765, i64 1
+ %tmp13767 = getelementptr inbounds float, float* %tmp13766, i64 1
+ %tmp13768 = getelementptr inbounds float, float* %tmp13767, i64 1
+ %tmp13769 = getelementptr inbounds float, float* %tmp13768, i64 1
+ %tmp13770 = getelementptr inbounds float, float* %tmp13769, i64 1
+ %tmp13771 = getelementptr inbounds float, float* %tmp13770, i64 1
+ %tmp13772 = getelementptr inbounds float, float* %tmp13771, i64 1
+ %tmp13773 = getelementptr inbounds float, float* %tmp13772, i64 1
+ %tmp13774 = getelementptr inbounds float, float* %tmp13773, i64 1
+ %tmp13775 = getelementptr inbounds float, float* %tmp13774, i64 1
+ %tmp13776 = getelementptr inbounds float, float* %tmp13775, i64 1
+ %tmp13777 = getelementptr inbounds float, float* %tmp13776, i64 1
+ %tmp13778 = getelementptr inbounds float, float* %tmp13777, i64 1
+ %tmp13779 = getelementptr inbounds float, float* %tmp13778, i64 1
+ %tmp13780 = getelementptr inbounds float, float* %tmp13779, i64 1
+ %tmp13781 = getelementptr inbounds float, float* %tmp13780, i64 1
+ %tmp13782 = getelementptr inbounds float, float* %tmp13781, i64 1
+ %tmp13783 = getelementptr inbounds float, float* %tmp13782, i64 1
+ %tmp13784 = getelementptr inbounds float, float* %tmp13783, i64 1
+ %tmp13785 = getelementptr inbounds float, float* %tmp13784, i64 1
+ %tmp13786 = getelementptr inbounds float, float* %tmp13785, i64 1
+ %tmp13787 = getelementptr inbounds float, float* %tmp13786, i64 1
+ %tmp13788 = getelementptr inbounds float, float* %tmp13787, i64 1
+ %tmp13789 = getelementptr inbounds float, float* %tmp13788, i64 1
+ %tmp13790 = getelementptr inbounds float, float* %tmp13789, i64 1
+ %tmp13791 = getelementptr inbounds float, float* %tmp13790, i64 1
+ %tmp13792 = getelementptr inbounds float, float* %tmp13791, i64 1
+ %tmp13793 = getelementptr inbounds float, float* %tmp13792, i64 1
+ %tmp13794 = getelementptr inbounds float, float* %tmp13793, i64 1
+ %tmp13795 = getelementptr inbounds float, float* %tmp13794, i64 1
+ %tmp13796 = getelementptr inbounds float, float* %tmp13795, i64 1
+ %tmp13797 = getelementptr inbounds float, float* %tmp13796, i64 1
+ %tmp13798 = getelementptr inbounds float, float* %tmp13797, i64 1
+ %tmp13799 = getelementptr inbounds float, float* %tmp13798, i64 1
+ %tmp13800 = getelementptr inbounds float, float* %tmp13799, i64 1
+ %tmp13801 = getelementptr inbounds float, float* %tmp13800, i64 1
+ %tmp13802 = getelementptr inbounds float, float* %tmp13801, i64 1
+ %tmp13803 = getelementptr inbounds float, float* %tmp13802, i64 1
+ %tmp13804 = getelementptr inbounds float, float* %tmp13803, i64 1
+ %tmp13805 = getelementptr inbounds float, float* %tmp13804, i64 1
+ %tmp13806 = getelementptr inbounds float, float* %tmp13805, i64 1
+ %tmp13807 = getelementptr inbounds float, float* %tmp13806, i64 1
+ %tmp13808 = getelementptr inbounds float, float* %tmp13807, i64 1
+ %tmp13809 = getelementptr inbounds float, float* %tmp13808, i64 1
+ %tmp13810 = getelementptr inbounds float, float* %tmp13809, i64 1
+ %tmp13811 = getelementptr inbounds float, float* %tmp13810, i64 1
+ %tmp13812 = getelementptr inbounds float, float* %tmp13811, i64 1
+ %tmp13813 = getelementptr inbounds float, float* %tmp13812, i64 1
+ %tmp13814 = getelementptr inbounds float, float* %tmp13813, i64 1
+ %tmp13815 = getelementptr inbounds float, float* %tmp13814, i64 1
+ %tmp13816 = getelementptr inbounds float, float* %tmp13815, i64 1
+ %tmp13817 = getelementptr inbounds float, float* %tmp13816, i64 1
+ %tmp13818 = getelementptr inbounds float, float* %tmp13817, i64 1
+ %tmp13819 = getelementptr inbounds float, float* %tmp13818, i64 1
+ %tmp13820 = getelementptr inbounds float, float* %tmp13819, i64 1
+ %tmp13821 = getelementptr inbounds float, float* %tmp13820, i64 1
+ %tmp13822 = getelementptr inbounds float, float* %tmp13821, i64 1
+ %tmp13823 = getelementptr inbounds float, float* %tmp13822, i64 1
+ %tmp13824 = getelementptr inbounds float, float* %tmp13823, i64 1
+ %tmp13825 = getelementptr inbounds float, float* %tmp13824, i64 1
+ %tmp13826 = getelementptr inbounds float, float* %tmp13825, i64 1
+ %tmp13827 = getelementptr inbounds float, float* %tmp13826, i64 1
+ %tmp13828 = getelementptr inbounds float, float* %tmp13827, i64 1
+ %tmp13829 = getelementptr inbounds float, float* %tmp13828, i64 1
+ %tmp13830 = getelementptr inbounds float, float* %tmp13829, i64 1
+ %tmp13831 = getelementptr inbounds float, float* %tmp13830, i64 1
+ %tmp13832 = getelementptr inbounds float, float* %tmp13831, i64 1
+ %tmp13833 = getelementptr inbounds float, float* %tmp13832, i64 1
+ %tmp13834 = getelementptr inbounds float, float* %tmp13833, i64 1
+ %tmp13835 = getelementptr inbounds float, float* %tmp13834, i64 1
+ %tmp13836 = getelementptr inbounds float, float* %tmp13835, i64 1
+ %tmp13837 = getelementptr inbounds float, float* %tmp13836, i64 1
+ %tmp13838 = getelementptr inbounds float, float* %tmp13837, i64 1
+ %tmp13839 = getelementptr inbounds float, float* %tmp13838, i64 1
+ %tmp13840 = getelementptr inbounds float, float* %tmp13839, i64 1
+ %tmp13841 = getelementptr inbounds float, float* %tmp13840, i64 1
+ %tmp13842 = getelementptr inbounds float, float* %tmp13841, i64 1
+ %tmp13843 = getelementptr inbounds float, float* %tmp13842, i64 1
+ %tmp13844 = getelementptr inbounds float, float* %tmp13843, i64 1
+ %tmp13845 = getelementptr inbounds float, float* %tmp13844, i64 1
+ %tmp13846 = getelementptr inbounds float, float* %tmp13845, i64 1
+ %tmp13847 = getelementptr inbounds float, float* %tmp13846, i64 1
+ %tmp13848 = getelementptr inbounds float, float* %tmp13847, i64 1
+ %tmp13849 = getelementptr inbounds float, float* %tmp13848, i64 1
+ %tmp13850 = getelementptr inbounds float, float* %tmp13849, i64 1
+ %tmp13851 = getelementptr inbounds float, float* %tmp13850, i64 1
+ %tmp13852 = getelementptr inbounds float, float* %tmp13851, i64 1
+ %tmp13853 = getelementptr inbounds float, float* %tmp13852, i64 1
+ %tmp13854 = getelementptr inbounds float, float* %tmp13853, i64 1
+ %tmp13855 = getelementptr inbounds float, float* %tmp13854, i64 1
+ %tmp13856 = getelementptr inbounds float, float* %tmp13855, i64 1
+ %tmp13857 = getelementptr inbounds float, float* %tmp13856, i64 1
+ %tmp13858 = getelementptr inbounds float, float* %tmp13857, i64 1
+ %tmp13859 = getelementptr inbounds float, float* %tmp13858, i64 1
+ %tmp13860 = getelementptr inbounds float, float* %tmp13859, i64 1
+ %tmp13861 = getelementptr inbounds float, float* %tmp13860, i64 1
+ %tmp13862 = getelementptr inbounds float, float* %tmp13861, i64 1
+ %tmp13863 = getelementptr inbounds float, float* %tmp13862, i64 1
+ %tmp13864 = getelementptr inbounds float, float* %tmp13863, i64 1
+ %tmp13865 = getelementptr inbounds float, float* %tmp13864, i64 1
+ %tmp13866 = getelementptr inbounds float, float* %tmp13865, i64 1
+ %tmp13867 = getelementptr inbounds float, float* %tmp13866, i64 1
+ %tmp13868 = getelementptr inbounds float, float* %tmp13867, i64 1
+ %tmp13869 = getelementptr inbounds float, float* %tmp13868, i64 1
+ %tmp13870 = getelementptr inbounds float, float* %tmp13869, i64 1
+ %tmp13871 = getelementptr inbounds float, float* %tmp13870, i64 1
+ %tmp13872 = getelementptr inbounds float, float* %tmp13871, i64 1
+ %tmp13873 = getelementptr inbounds float, float* %tmp13872, i64 1
+ %tmp13874 = getelementptr inbounds float, float* %tmp13873, i64 1
+ %tmp13875 = getelementptr inbounds float, float* %tmp13874, i64 1
+ %tmp13876 = getelementptr inbounds float, float* %tmp13875, i64 1
+ %tmp13877 = getelementptr inbounds float, float* %tmp13876, i64 1
+ %tmp13878 = getelementptr inbounds float, float* %tmp13877, i64 1
+ %tmp13879 = getelementptr inbounds float, float* %tmp13878, i64 1
+ %tmp13880 = getelementptr inbounds float, float* %tmp13879, i64 1
+ %tmp13881 = getelementptr inbounds float, float* %tmp13880, i64 1
+ %tmp13882 = getelementptr inbounds float, float* %tmp13881, i64 1
+ %tmp13883 = getelementptr inbounds float, float* %tmp13882, i64 1
+ %tmp13884 = getelementptr inbounds float, float* %tmp13883, i64 1
+ %tmp13885 = getelementptr inbounds float, float* %tmp13884, i64 1
+ %tmp13886 = getelementptr inbounds float, float* %tmp13885, i64 1
+ %tmp13887 = getelementptr inbounds float, float* %tmp13886, i64 1
+ %tmp13888 = getelementptr inbounds float, float* %tmp13887, i64 1
+ %tmp13889 = getelementptr inbounds float, float* %tmp13888, i64 1
+ %tmp13890 = getelementptr inbounds float, float* %tmp13889, i64 1
+ %tmp13891 = getelementptr inbounds float, float* %tmp13890, i64 1
+ %tmp13892 = getelementptr inbounds float, float* %tmp13891, i64 1
+ %tmp13893 = getelementptr inbounds float, float* %tmp13892, i64 1
+ %tmp13894 = getelementptr inbounds float, float* %tmp13893, i64 1
+ %tmp13895 = getelementptr inbounds float, float* %tmp13894, i64 1
+ %tmp13896 = getelementptr inbounds float, float* %tmp13895, i64 1
+ %tmp13897 = getelementptr inbounds float, float* %tmp13896, i64 1
+ %tmp13898 = getelementptr inbounds float, float* %tmp13897, i64 1
+ %tmp13899 = getelementptr inbounds float, float* %tmp13898, i64 1
+ %tmp13900 = getelementptr inbounds float, float* %tmp13899, i64 1
+ %tmp13901 = getelementptr inbounds float, float* %tmp13900, i64 1
+ %tmp13902 = getelementptr inbounds float, float* %tmp13901, i64 1
+ %tmp13903 = getelementptr inbounds float, float* %tmp13902, i64 1
+ %tmp13904 = getelementptr inbounds float, float* %tmp13903, i64 1
+ %tmp13905 = getelementptr inbounds float, float* %tmp13904, i64 1
+ %tmp13906 = getelementptr inbounds float, float* %tmp13905, i64 1
+ %tmp13907 = getelementptr inbounds float, float* %tmp13906, i64 1
+ %tmp13908 = getelementptr inbounds float, float* %tmp13907, i64 1
+ %tmp13909 = getelementptr inbounds float, float* %tmp13908, i64 1
+ %tmp13910 = getelementptr inbounds float, float* %tmp13909, i64 1
+ %tmp13911 = getelementptr inbounds float, float* %tmp13910, i64 1
+ %tmp13912 = getelementptr inbounds float, float* %tmp13911, i64 1
+ %tmp13913 = getelementptr inbounds float, float* %tmp13912, i64 1
+ %tmp13914 = getelementptr inbounds float, float* %tmp13913, i64 1
+ %tmp13915 = getelementptr inbounds float, float* %tmp13914, i64 1
+ %tmp13916 = getelementptr inbounds float, float* %tmp13915, i64 1
+ %tmp13917 = getelementptr inbounds float, float* %tmp13916, i64 1
+ %tmp13918 = getelementptr inbounds float, float* %tmp13917, i64 1
+ %tmp13919 = getelementptr inbounds float, float* %tmp13918, i64 1
+ %tmp13920 = getelementptr inbounds float, float* %tmp13919, i64 1
+ %tmp13921 = getelementptr inbounds float, float* %tmp13920, i64 1
+ %tmp13922 = getelementptr inbounds float, float* %tmp13921, i64 1
+ %tmp13923 = getelementptr inbounds float, float* %tmp13922, i64 1
+ %tmp13924 = getelementptr inbounds float, float* %tmp13923, i64 1
+ %tmp13925 = getelementptr inbounds float, float* %tmp13924, i64 1
+ %tmp13926 = getelementptr inbounds float, float* %tmp13925, i64 1
+ %tmp13927 = getelementptr inbounds float, float* %tmp13926, i64 1
+ %tmp13928 = getelementptr inbounds float, float* %tmp13927, i64 1
+ %tmp13929 = getelementptr inbounds float, float* %tmp13928, i64 1
+ %tmp13930 = getelementptr inbounds float, float* %tmp13929, i64 1
+ %tmp13931 = getelementptr inbounds float, float* %tmp13930, i64 1
+ %tmp13932 = getelementptr inbounds float, float* %tmp13931, i64 1
+ %tmp13933 = getelementptr inbounds float, float* %tmp13932, i64 1
+ %tmp13934 = getelementptr inbounds float, float* %tmp13933, i64 1
+ %tmp13935 = getelementptr inbounds float, float* %tmp13934, i64 1
+ %tmp13936 = getelementptr inbounds float, float* %tmp13935, i64 1
+ %tmp13937 = getelementptr inbounds float, float* %tmp13936, i64 1
+ %tmp13938 = getelementptr inbounds float, float* %tmp13937, i64 1
+ %tmp13939 = getelementptr inbounds float, float* %tmp13938, i64 1
+ %tmp13940 = getelementptr inbounds float, float* %tmp13939, i64 1
+ %tmp13941 = getelementptr inbounds float, float* %tmp13940, i64 1
+ %tmp13942 = getelementptr inbounds float, float* %tmp13941, i64 1
+ %tmp13943 = getelementptr inbounds float, float* %tmp13942, i64 1
+ %tmp13944 = getelementptr inbounds float, float* %tmp13943, i64 1
+ %tmp13945 = getelementptr inbounds float, float* %tmp13944, i64 1
+ %tmp13946 = getelementptr inbounds float, float* %tmp13945, i64 1
+ %tmp13947 = getelementptr inbounds float, float* %tmp13946, i64 1
+ %tmp13948 = getelementptr inbounds float, float* %tmp13947, i64 1
+ %tmp13949 = getelementptr inbounds float, float* %tmp13948, i64 1
+ %tmp13950 = getelementptr inbounds float, float* %tmp13949, i64 1
+ %tmp13951 = getelementptr inbounds float, float* %tmp13950, i64 1
+ %tmp13952 = getelementptr inbounds float, float* %tmp13951, i64 1
+ %tmp13953 = getelementptr inbounds float, float* %tmp13952, i64 1
+ %tmp13954 = getelementptr inbounds float, float* %tmp13953, i64 1
+ %tmp13955 = getelementptr inbounds float, float* %tmp13954, i64 1
+ %tmp13956 = getelementptr inbounds float, float* %tmp13955, i64 1
+ %tmp13957 = getelementptr inbounds float, float* %tmp13956, i64 1
+ %tmp13958 = getelementptr inbounds float, float* %tmp13957, i64 1
+ %tmp13959 = getelementptr inbounds float, float* %tmp13958, i64 1
+ %tmp13960 = getelementptr inbounds float, float* %tmp13959, i64 1
+ %tmp13961 = getelementptr inbounds float, float* %tmp13960, i64 1
+ %tmp13962 = getelementptr inbounds float, float* %tmp13961, i64 1
+ %tmp13963 = getelementptr inbounds float, float* %tmp13962, i64 1
+ %tmp13964 = getelementptr inbounds float, float* %tmp13963, i64 1
+ %tmp13965 = getelementptr inbounds float, float* %tmp13964, i64 1
+ %tmp13966 = getelementptr inbounds float, float* %tmp13965, i64 1
+ %tmp13967 = getelementptr inbounds float, float* %tmp13966, i64 1
+ %tmp13968 = getelementptr inbounds float, float* %tmp13967, i64 1
+ %tmp13969 = getelementptr inbounds float, float* %tmp13968, i64 1
+ %tmp13970 = getelementptr inbounds float, float* %tmp13969, i64 1
+ %tmp13971 = getelementptr inbounds float, float* %tmp13970, i64 1
+ %tmp13972 = getelementptr inbounds float, float* %tmp13971, i64 1
+ %tmp13973 = getelementptr inbounds float, float* %tmp13972, i64 1
+ %tmp13974 = getelementptr inbounds float, float* %tmp13973, i64 1
+ %tmp13975 = getelementptr inbounds float, float* %tmp13974, i64 1
+ %tmp13976 = getelementptr inbounds float, float* %tmp13975, i64 1
+ %tmp13977 = getelementptr inbounds float, float* %tmp13976, i64 1
+ %tmp13978 = getelementptr inbounds float, float* %tmp13977, i64 1
+ %tmp13979 = getelementptr inbounds float, float* %tmp13978, i64 1
+ %tmp13980 = getelementptr inbounds float, float* %tmp13979, i64 1
+ %tmp13981 = getelementptr inbounds float, float* %tmp13980, i64 1
+ %tmp13982 = getelementptr inbounds float, float* %tmp13981, i64 1
+ %tmp13983 = getelementptr inbounds float, float* %tmp13982, i64 1
+ %tmp13984 = getelementptr inbounds float, float* %tmp13983, i64 1
+ %tmp13985 = getelementptr inbounds float, float* %tmp13984, i64 1
+ %tmp13986 = getelementptr inbounds float, float* %tmp13985, i64 1
+ %tmp13987 = getelementptr inbounds float, float* %tmp13986, i64 1
+ %tmp13988 = getelementptr inbounds float, float* %tmp13987, i64 1
+ %tmp13989 = getelementptr inbounds float, float* %tmp13988, i64 1
+ %tmp13990 = getelementptr inbounds float, float* %tmp13989, i64 1
+ %tmp13991 = getelementptr inbounds float, float* %tmp13990, i64 1
+ %tmp13992 = getelementptr inbounds float, float* %tmp13991, i64 1
+ %tmp13993 = getelementptr inbounds float, float* %tmp13992, i64 1
+ %tmp13994 = getelementptr inbounds float, float* %tmp13993, i64 1
+ %tmp13995 = getelementptr inbounds float, float* %tmp13994, i64 1
+ %tmp13996 = getelementptr inbounds float, float* %tmp13995, i64 1
+ %tmp13997 = getelementptr inbounds float, float* %tmp13996, i64 1
+ %tmp13998 = getelementptr inbounds float, float* %tmp13997, i64 1
+ %tmp13999 = getelementptr inbounds float, float* %tmp13998, i64 1
+ %tmp14000 = getelementptr inbounds float, float* %tmp13999, i64 1
+ %tmp14001 = getelementptr inbounds float, float* %tmp14000, i64 1
+ %tmp14002 = getelementptr inbounds float, float* %tmp14001, i64 1
+ %tmp14003 = getelementptr inbounds float, float* %tmp14002, i64 1
+ %tmp14004 = getelementptr inbounds float, float* %tmp14003, i64 1
+ %tmp14005 = getelementptr inbounds float, float* %tmp14004, i64 1
+ %tmp14006 = getelementptr inbounds float, float* %tmp14005, i64 1
+ %tmp14007 = getelementptr inbounds float, float* %tmp14006, i64 1
+ %tmp14008 = getelementptr inbounds float, float* %tmp14007, i64 1
+ %tmp14009 = getelementptr inbounds float, float* %tmp14008, i64 1
+ %tmp14010 = getelementptr inbounds float, float* %tmp14009, i64 1
+ %tmp14011 = getelementptr inbounds float, float* %tmp14010, i64 1
+ %tmp14012 = getelementptr inbounds float, float* %tmp14011, i64 1
+ %tmp14013 = getelementptr inbounds float, float* %tmp14012, i64 1
+ %tmp14014 = getelementptr inbounds float, float* %tmp14013, i64 1
+ %tmp14015 = getelementptr inbounds float, float* %tmp14014, i64 1
+ %tmp14016 = getelementptr inbounds float, float* %tmp14015, i64 1
+ %tmp14017 = getelementptr inbounds float, float* %tmp14016, i64 1
+ %tmp14018 = getelementptr inbounds float, float* %tmp14017, i64 1
+ %tmp14019 = getelementptr inbounds float, float* %tmp14018, i64 1
+ %tmp14020 = getelementptr inbounds float, float* %tmp14019, i64 1
+ %tmp14021 = getelementptr inbounds float, float* %tmp14020, i64 1
+ %tmp14022 = getelementptr inbounds float, float* %tmp14021, i64 1
+ %tmp14023 = getelementptr inbounds float, float* %tmp14022, i64 1
+ %tmp14024 = getelementptr inbounds float, float* %tmp14023, i64 1
+ %tmp14025 = getelementptr inbounds float, float* %tmp14024, i64 1
+ %tmp14026 = getelementptr inbounds float, float* %tmp14025, i64 1
+ %tmp14027 = getelementptr inbounds float, float* %tmp14026, i64 1
+ %tmp14028 = getelementptr inbounds float, float* %tmp14027, i64 1
+ %tmp14029 = getelementptr inbounds float, float* %tmp14028, i64 1
+ %tmp14030 = getelementptr inbounds float, float* %tmp14029, i64 1
+ %tmp14031 = getelementptr inbounds float, float* %tmp14030, i64 1
+ %tmp14032 = getelementptr inbounds float, float* %tmp14031, i64 1
+ %tmp14033 = getelementptr inbounds float, float* %tmp14032, i64 1
+ %tmp14034 = getelementptr inbounds float, float* %tmp14033, i64 1
+ %tmp14035 = getelementptr inbounds float, float* %tmp14034, i64 1
+ %tmp14036 = getelementptr inbounds float, float* %tmp14035, i64 1
+ %tmp14037 = getelementptr inbounds float, float* %tmp14036, i64 1
+ %tmp14038 = getelementptr inbounds float, float* %tmp14037, i64 1
+ %tmp14039 = getelementptr inbounds float, float* %tmp14038, i64 1
+ %tmp14040 = getelementptr inbounds float, float* %tmp14039, i64 1
+ %tmp14041 = getelementptr inbounds float, float* %tmp14040, i64 1
+ %tmp14042 = getelementptr inbounds float, float* %tmp14041, i64 1
+ %tmp14043 = getelementptr inbounds float, float* %tmp14042, i64 1
+ %tmp14044 = getelementptr inbounds float, float* %tmp14043, i64 1
+ %tmp14045 = getelementptr inbounds float, float* %tmp14044, i64 1
+ %tmp14046 = getelementptr inbounds float, float* %tmp14045, i64 1
+ %tmp14047 = getelementptr inbounds float, float* %tmp14046, i64 1
+ %tmp14048 = getelementptr inbounds float, float* %tmp14047, i64 1
+ %tmp14049 = getelementptr inbounds float, float* %tmp14048, i64 1
+ %tmp14050 = getelementptr inbounds float, float* %tmp14049, i64 1
+ %tmp14051 = getelementptr inbounds float, float* %tmp14050, i64 1
+ %tmp14052 = getelementptr inbounds float, float* %tmp14051, i64 1
+ %tmp14053 = getelementptr inbounds float, float* %tmp14052, i64 1
+ %tmp14054 = getelementptr inbounds float, float* %tmp14053, i64 1
+ %tmp14055 = getelementptr inbounds float, float* %tmp14054, i64 1
+ %tmp14056 = getelementptr inbounds float, float* %tmp14055, i64 1
+ %tmp14057 = getelementptr inbounds float, float* %tmp14056, i64 1
+ %tmp14058 = getelementptr inbounds float, float* %tmp14057, i64 1
+ %tmp14059 = getelementptr inbounds float, float* %tmp14058, i64 1
+ %tmp14060 = getelementptr inbounds float, float* %tmp14059, i64 1
+ %tmp14061 = getelementptr inbounds float, float* %tmp14060, i64 1
+ %tmp14062 = getelementptr inbounds float, float* %tmp14061, i64 1
+ %tmp14063 = getelementptr inbounds float, float* %tmp14062, i64 1
+ %tmp14064 = getelementptr inbounds float, float* %tmp14063, i64 1
+ %tmp14065 = getelementptr inbounds float, float* %tmp14064, i64 1
+ %tmp14066 = getelementptr inbounds float, float* %tmp14065, i64 1
+ %tmp14067 = getelementptr inbounds float, float* %tmp14066, i64 1
+ %tmp14068 = getelementptr inbounds float, float* %tmp14067, i64 1
+ %tmp14069 = getelementptr inbounds float, float* %tmp14068, i64 1
+ %tmp14070 = getelementptr inbounds float, float* %tmp14069, i64 1
+ %tmp14071 = getelementptr inbounds float, float* %tmp14070, i64 1
+ %tmp14072 = getelementptr inbounds float, float* %tmp14071, i64 1
+ %tmp14073 = getelementptr inbounds float, float* %tmp14072, i64 1
+ %tmp14074 = getelementptr inbounds float, float* %tmp14073, i64 1
+ %tmp14075 = getelementptr inbounds float, float* %tmp14074, i64 1
+ %tmp14076 = getelementptr inbounds float, float* %tmp14075, i64 1
+ %tmp14077 = getelementptr inbounds float, float* %tmp14076, i64 1
+ %tmp14078 = getelementptr inbounds float, float* %tmp14077, i64 1
+ %tmp14079 = getelementptr inbounds float, float* %tmp14078, i64 1
+ %tmp14080 = getelementptr inbounds float, float* %tmp14079, i64 1
+ %tmp14081 = getelementptr inbounds float, float* %tmp14080, i64 1
+ %tmp14082 = getelementptr inbounds float, float* %tmp14081, i64 1
+ %tmp14083 = getelementptr inbounds float, float* %tmp14082, i64 1
+ %tmp14084 = getelementptr inbounds float, float* %tmp14083, i64 1
+ %tmp14085 = getelementptr inbounds float, float* %tmp14084, i64 1
+ %tmp14086 = getelementptr inbounds float, float* %tmp14085, i64 1
+ %tmp14087 = getelementptr inbounds float, float* %tmp14086, i64 1
+ %tmp14088 = getelementptr inbounds float, float* %tmp14087, i64 1
+ %tmp14089 = getelementptr inbounds float, float* %tmp14088, i64 1
+ %tmp14090 = getelementptr inbounds float, float* %tmp14089, i64 1
+ %tmp14091 = getelementptr inbounds float, float* %tmp14090, i64 1
+ %tmp14092 = getelementptr inbounds float, float* %tmp14091, i64 1
+ %tmp14093 = getelementptr inbounds float, float* %tmp14092, i64 1
+ %tmp14094 = getelementptr inbounds float, float* %tmp14093, i64 1
+ %tmp14095 = getelementptr inbounds float, float* %tmp14094, i64 1
+ %tmp14096 = getelementptr inbounds float, float* %tmp14095, i64 1
+ %tmp14097 = getelementptr inbounds float, float* %tmp14096, i64 1
+ %tmp14098 = getelementptr inbounds float, float* %tmp14097, i64 1
+ %tmp14099 = getelementptr inbounds float, float* %tmp14098, i64 1
+ %tmp14100 = getelementptr inbounds float, float* %tmp14099, i64 1
+ %tmp14101 = getelementptr inbounds float, float* %tmp14100, i64 1
+ %tmp14102 = getelementptr inbounds float, float* %tmp14101, i64 1
+ %tmp14103 = getelementptr inbounds float, float* %tmp14102, i64 1
+ %tmp14104 = getelementptr inbounds float, float* %tmp14103, i64 1
+ %tmp14105 = getelementptr inbounds float, float* %tmp14104, i64 1
+ %tmp14106 = getelementptr inbounds float, float* %tmp14105, i64 1
+ %tmp14107 = getelementptr inbounds float, float* %tmp14106, i64 1
+ %tmp14108 = getelementptr inbounds float, float* %tmp14107, i64 1
+ %tmp14109 = getelementptr inbounds float, float* %tmp14108, i64 1
+ %tmp14110 = getelementptr inbounds float, float* %tmp14109, i64 1
+ %tmp14111 = getelementptr inbounds float, float* %tmp14110, i64 1
+ %tmp14112 = getelementptr inbounds float, float* %tmp14111, i64 1
+ %tmp14113 = getelementptr inbounds float, float* %tmp14112, i64 1
+ %tmp14114 = getelementptr inbounds float, float* %tmp14113, i64 1
+ %tmp14115 = getelementptr inbounds float, float* %tmp14114, i64 1
+ %tmp14116 = getelementptr inbounds float, float* %tmp14115, i64 1
+ %tmp14117 = getelementptr inbounds float, float* %tmp14116, i64 1
+ %tmp14118 = getelementptr inbounds float, float* %tmp14117, i64 1
+ %tmp14119 = getelementptr inbounds float, float* %tmp14118, i64 1
+ %tmp14120 = getelementptr inbounds float, float* %tmp14119, i64 1
+ %tmp14121 = getelementptr inbounds float, float* %tmp14120, i64 1
+ %tmp14122 = getelementptr inbounds float, float* %tmp14121, i64 1
+ %tmp14123 = getelementptr inbounds float, float* %tmp14122, i64 1
+ %tmp14124 = getelementptr inbounds float, float* %tmp14123, i64 1
+ %tmp14125 = getelementptr inbounds float, float* %tmp14124, i64 1
+ %tmp14126 = getelementptr inbounds float, float* %tmp14125, i64 1
+ %tmp14127 = getelementptr inbounds float, float* %tmp14126, i64 1
+ %tmp14128 = getelementptr inbounds float, float* %tmp14127, i64 1
+ %tmp14129 = getelementptr inbounds float, float* %tmp14128, i64 1
+ %tmp14130 = getelementptr inbounds float, float* %tmp14129, i64 1
+ %tmp14131 = getelementptr inbounds float, float* %tmp14130, i64 1
+ %tmp14132 = getelementptr inbounds float, float* %tmp14131, i64 1
+ %tmp14133 = getelementptr inbounds float, float* %tmp14132, i64 1
+ %tmp14134 = getelementptr inbounds float, float* %tmp14133, i64 1
+ %tmp14135 = getelementptr inbounds float, float* %tmp14134, i64 1
+ %tmp14136 = getelementptr inbounds float, float* %tmp14135, i64 1
+ %tmp14137 = getelementptr inbounds float, float* %tmp14136, i64 1
+ %tmp14138 = getelementptr inbounds float, float* %tmp14137, i64 1
+ %tmp14139 = getelementptr inbounds float, float* %tmp14138, i64 1
+ %tmp14140 = getelementptr inbounds float, float* %tmp14139, i64 1
+ %tmp14141 = getelementptr inbounds float, float* %tmp14140, i64 1
+ %tmp14142 = getelementptr inbounds float, float* %tmp14141, i64 1
+ %tmp14143 = getelementptr inbounds float, float* %tmp14142, i64 1
+ %tmp14144 = getelementptr inbounds float, float* %tmp14143, i64 1
+ %tmp14145 = getelementptr inbounds float, float* %tmp14144, i64 1
+ %tmp14146 = getelementptr inbounds float, float* %tmp14145, i64 1
+ %tmp14147 = getelementptr inbounds float, float* %tmp14146, i64 1
+ %tmp14148 = getelementptr inbounds float, float* %tmp14147, i64 1
+ %tmp14149 = getelementptr inbounds float, float* %tmp14148, i64 1
+ %tmp14150 = getelementptr inbounds float, float* %tmp14149, i64 1
+ %tmp14151 = getelementptr inbounds float, float* %tmp14150, i64 1
+ %tmp14152 = getelementptr inbounds float, float* %tmp14151, i64 1
+ %tmp14153 = getelementptr inbounds float, float* %tmp14152, i64 1
+ %tmp14154 = getelementptr inbounds float, float* %tmp14153, i64 1
+ %tmp14155 = getelementptr inbounds float, float* %tmp14154, i64 1
+ %tmp14156 = getelementptr inbounds float, float* %tmp14155, i64 1
+ %tmp14157 = getelementptr inbounds float, float* %tmp14156, i64 1
+ %tmp14158 = getelementptr inbounds float, float* %tmp14157, i64 1
+ %tmp14159 = getelementptr inbounds float, float* %tmp14158, i64 1
+ %tmp14160 = getelementptr inbounds float, float* %tmp14159, i64 1
+ %tmp14161 = getelementptr inbounds float, float* %tmp14160, i64 1
+ %tmp14162 = getelementptr inbounds float, float* %tmp14161, i64 1
+ %tmp14163 = getelementptr inbounds float, float* %tmp14162, i64 1
+ %tmp14164 = getelementptr inbounds float, float* %tmp14163, i64 1
+ %tmp14165 = getelementptr inbounds float, float* %tmp14164, i64 1
+ %tmp14166 = getelementptr inbounds float, float* %tmp14165, i64 1
+ %tmp14167 = getelementptr inbounds float, float* %tmp14166, i64 1
+ %tmp14168 = getelementptr inbounds float, float* %tmp14167, i64 1
+ %tmp14169 = getelementptr inbounds float, float* %tmp14168, i64 1
+ %tmp14170 = getelementptr inbounds float, float* %tmp14169, i64 1
+ %tmp14171 = getelementptr inbounds float, float* %tmp14170, i64 1
+ %tmp14172 = getelementptr inbounds float, float* %tmp14171, i64 1
+ %tmp14173 = getelementptr inbounds float, float* %tmp14172, i64 1
+ %tmp14174 = getelementptr inbounds float, float* %tmp14173, i64 1
+ %tmp14175 = getelementptr inbounds float, float* %tmp14174, i64 1
+ %tmp14176 = getelementptr inbounds float, float* %tmp14175, i64 1
+ %tmp14177 = getelementptr inbounds float, float* %tmp14176, i64 1
+ %tmp14178 = getelementptr inbounds float, float* %tmp14177, i64 1
+ %tmp14179 = getelementptr inbounds float, float* %tmp14178, i64 1
+ %tmp14180 = getelementptr inbounds float, float* %tmp14179, i64 1
+ %tmp14181 = getelementptr inbounds float, float* %tmp14180, i64 1
+ %tmp14182 = getelementptr inbounds float, float* %tmp14181, i64 1
+ %tmp14183 = getelementptr inbounds float, float* %tmp14182, i64 1
+ %tmp14184 = getelementptr inbounds float, float* %tmp14183, i64 1
+ %tmp14185 = getelementptr inbounds float, float* %tmp14184, i64 1
+ %tmp14186 = getelementptr inbounds float, float* %tmp14185, i64 1
+ %tmp14187 = getelementptr inbounds float, float* %tmp14186, i64 1
+ %tmp14188 = getelementptr inbounds float, float* %tmp14187, i64 1
+ %tmp14189 = getelementptr inbounds float, float* %tmp14188, i64 1
+ %tmp14190 = getelementptr inbounds float, float* %tmp14189, i64 1
+ %tmp14191 = getelementptr inbounds float, float* %tmp14190, i64 1
+ %tmp14192 = getelementptr inbounds float, float* %tmp14191, i64 1
+ %tmp14193 = getelementptr inbounds float, float* %tmp14192, i64 1
+ %tmp14194 = getelementptr inbounds float, float* %tmp14193, i64 1
+ %tmp14195 = getelementptr inbounds float, float* %tmp14194, i64 1
+ %tmp14196 = getelementptr inbounds float, float* %tmp14195, i64 1
+ %tmp14197 = getelementptr inbounds float, float* %tmp14196, i64 1
+ %tmp14198 = getelementptr inbounds float, float* %tmp14197, i64 1
+ %tmp14199 = getelementptr inbounds float, float* %tmp14198, i64 1
+ %tmp14200 = getelementptr inbounds float, float* %tmp14199, i64 1
+ %tmp14201 = getelementptr inbounds float, float* %tmp14200, i64 1
+ %tmp14202 = getelementptr inbounds float, float* %tmp14201, i64 1
+ %tmp14203 = getelementptr inbounds float, float* %tmp14202, i64 1
+ %tmp14204 = getelementptr inbounds float, float* %tmp14203, i64 1
+ %tmp14205 = getelementptr inbounds float, float* %tmp14204, i64 1
+ %tmp14206 = getelementptr inbounds float, float* %tmp14205, i64 1
+ %tmp14207 = getelementptr inbounds float, float* %tmp14206, i64 1
+ %tmp14208 = getelementptr inbounds float, float* %tmp14207, i64 1
+ %tmp14209 = getelementptr inbounds float, float* %tmp14208, i64 1
+ %tmp14210 = getelementptr inbounds float, float* %tmp14209, i64 1
+ %tmp14211 = getelementptr inbounds float, float* %tmp14210, i64 1
+ %tmp14212 = getelementptr inbounds float, float* %tmp14211, i64 1
+ %tmp14213 = getelementptr inbounds float, float* %tmp14212, i64 1
+ %tmp14214 = getelementptr inbounds float, float* %tmp14213, i64 1
+ %tmp14215 = getelementptr inbounds float, float* %tmp14214, i64 1
+ %tmp14216 = getelementptr inbounds float, float* %tmp14215, i64 1
+ %tmp14217 = getelementptr inbounds float, float* %tmp14216, i64 1
+ %tmp14218 = getelementptr inbounds float, float* %tmp14217, i64 1
+ %tmp14219 = getelementptr inbounds float, float* %tmp14218, i64 1
+ %tmp14220 = getelementptr inbounds float, float* %tmp14219, i64 1
+ %tmp14221 = getelementptr inbounds float, float* %tmp14220, i64 1
+ %tmp14222 = getelementptr inbounds float, float* %tmp14221, i64 1
+ %tmp14223 = getelementptr inbounds float, float* %tmp14222, i64 1
+ %tmp14224 = getelementptr inbounds float, float* %tmp14223, i64 1
+ %tmp14225 = getelementptr inbounds float, float* %tmp14224, i64 1
+ %tmp14226 = getelementptr inbounds float, float* %tmp14225, i64 1
+ %tmp14227 = getelementptr inbounds float, float* %tmp14226, i64 1
+ %tmp14228 = getelementptr inbounds float, float* %tmp14227, i64 1
+ %tmp14229 = getelementptr inbounds float, float* %tmp14228, i64 1
+ %tmp14230 = getelementptr inbounds float, float* %tmp14229, i64 1
+ %tmp14231 = getelementptr inbounds float, float* %tmp14230, i64 1
+ %tmp14232 = getelementptr inbounds float, float* %tmp14231, i64 1
+ %tmp14233 = getelementptr inbounds float, float* %tmp14232, i64 1
+ %tmp14234 = getelementptr inbounds float, float* %tmp14233, i64 1
+ %tmp14235 = getelementptr inbounds float, float* %tmp14234, i64 1
+ %tmp14236 = getelementptr inbounds float, float* %tmp14235, i64 1
+ %tmp14237 = getelementptr inbounds float, float* %tmp14236, i64 1
+ %tmp14238 = getelementptr inbounds float, float* %tmp14237, i64 1
+ %tmp14239 = getelementptr inbounds float, float* %tmp14238, i64 1
+ %tmp14240 = getelementptr inbounds float, float* %tmp14239, i64 1
+ %tmp14241 = getelementptr inbounds float, float* %tmp14240, i64 1
+ %tmp14242 = getelementptr inbounds float, float* %tmp14241, i64 1
+ %tmp14243 = getelementptr inbounds float, float* %tmp14242, i64 1
+ %tmp14244 = getelementptr inbounds float, float* %tmp14243, i64 1
+ %tmp14245 = getelementptr inbounds float, float* %tmp14244, i64 1
+ %tmp14246 = getelementptr inbounds float, float* %tmp14245, i64 1
+ %tmp14247 = getelementptr inbounds float, float* %tmp14246, i64 1
+ %tmp14248 = getelementptr inbounds float, float* %tmp14247, i64 1
+ %tmp14249 = getelementptr inbounds float, float* %tmp14248, i64 1
+ %tmp14250 = getelementptr inbounds float, float* %tmp14249, i64 1
+ %tmp14251 = getelementptr inbounds float, float* %tmp14250, i64 1
+ %tmp14252 = getelementptr inbounds float, float* %tmp14251, i64 1
+ %tmp14253 = getelementptr inbounds float, float* %tmp14252, i64 1
+ %tmp14254 = getelementptr inbounds float, float* %tmp14253, i64 1
+ %tmp14255 = getelementptr inbounds float, float* %tmp14254, i64 1
+ %tmp14256 = getelementptr inbounds float, float* %tmp14255, i64 1
+ %tmp14257 = getelementptr inbounds float, float* %tmp14256, i64 1
+ %tmp14258 = getelementptr inbounds float, float* %tmp14257, i64 1
+ %tmp14259 = getelementptr inbounds float, float* %tmp14258, i64 1
+ %tmp14260 = getelementptr inbounds float, float* %tmp14259, i64 1
+ %tmp14261 = getelementptr inbounds float, float* %tmp14260, i64 1
+ %tmp14262 = getelementptr inbounds float, float* %tmp14261, i64 1
+ %tmp14263 = getelementptr inbounds float, float* %tmp14262, i64 1
+ %tmp14264 = getelementptr inbounds float, float* %tmp14263, i64 1
+ %tmp14265 = getelementptr inbounds float, float* %tmp14264, i64 1
+ %tmp14266 = getelementptr inbounds float, float* %tmp14265, i64 1
+ %tmp14267 = getelementptr inbounds float, float* %tmp14266, i64 1
+ %tmp14268 = getelementptr inbounds float, float* %tmp14267, i64 1
+ %tmp14269 = getelementptr inbounds float, float* %tmp14268, i64 1
+ %tmp14270 = getelementptr inbounds float, float* %tmp14269, i64 1
+ %tmp14271 = getelementptr inbounds float, float* %tmp14270, i64 1
+ %tmp14272 = getelementptr inbounds float, float* %tmp14271, i64 1
+ %tmp14273 = getelementptr inbounds float, float* %tmp14272, i64 1
+ %tmp14274 = getelementptr inbounds float, float* %tmp14273, i64 1
+ %tmp14275 = getelementptr inbounds float, float* %tmp14274, i64 1
+ %tmp14276 = getelementptr inbounds float, float* %tmp14275, i64 1
+ %tmp14277 = getelementptr inbounds float, float* %tmp14276, i64 1
+ %tmp14278 = getelementptr inbounds float, float* %tmp14277, i64 1
+ %tmp14279 = getelementptr inbounds float, float* %tmp14278, i64 1
+ %tmp14280 = getelementptr inbounds float, float* %tmp14279, i64 1
+ %tmp14281 = getelementptr inbounds float, float* %tmp14280, i64 1
+ %tmp14282 = getelementptr inbounds float, float* %tmp14281, i64 1
+ %tmp14283 = getelementptr inbounds float, float* %tmp14282, i64 1
+ %tmp14284 = getelementptr inbounds float, float* %tmp14283, i64 1
+ %tmp14285 = getelementptr inbounds float, float* %tmp14284, i64 1
+ %tmp14286 = getelementptr inbounds float, float* %tmp14285, i64 1
+ %tmp14287 = getelementptr inbounds float, float* %tmp14286, i64 1
+ %tmp14288 = getelementptr inbounds float, float* %tmp14287, i64 1
+ %tmp14289 = getelementptr inbounds float, float* %tmp14288, i64 1
+ %tmp14290 = getelementptr inbounds float, float* %tmp14289, i64 1
+ %tmp14291 = getelementptr inbounds float, float* %tmp14290, i64 1
+ %tmp14292 = getelementptr inbounds float, float* %tmp14291, i64 1
+ %tmp14293 = getelementptr inbounds float, float* %tmp14292, i64 1
+ %tmp14294 = getelementptr inbounds float, float* %tmp14293, i64 1
+ %tmp14295 = getelementptr inbounds float, float* %tmp14294, i64 1
+ %tmp14296 = getelementptr inbounds float, float* %tmp14295, i64 1
+ %tmp14297 = getelementptr inbounds float, float* %tmp14296, i64 1
+ %tmp14298 = getelementptr inbounds float, float* %tmp14297, i64 1
+ %tmp14299 = getelementptr inbounds float, float* %tmp14298, i64 1
+ %tmp14300 = getelementptr inbounds float, float* %tmp14299, i64 1
+ %tmp14301 = getelementptr inbounds float, float* %tmp14300, i64 1
+ %tmp14302 = getelementptr inbounds float, float* %tmp14301, i64 1
+ %tmp14303 = getelementptr inbounds float, float* %tmp14302, i64 1
+ %tmp14304 = getelementptr inbounds float, float* %tmp14303, i64 1
+ %tmp14305 = getelementptr inbounds float, float* %tmp14304, i64 1
+ %tmp14306 = getelementptr inbounds float, float* %tmp14305, i64 1
+ %tmp14307 = getelementptr inbounds float, float* %tmp14306, i64 1
+ %tmp14308 = getelementptr inbounds float, float* %tmp14307, i64 1
+ %tmp14309 = getelementptr inbounds float, float* %tmp14308, i64 1
+ %tmp14310 = getelementptr inbounds float, float* %tmp14309, i64 1
+ %tmp14311 = getelementptr inbounds float, float* %tmp14310, i64 1
+ %tmp14312 = getelementptr inbounds float, float* %tmp14311, i64 1
+ %tmp14313 = getelementptr inbounds float, float* %tmp14312, i64 1
+ %tmp14314 = getelementptr inbounds float, float* %tmp14313, i64 1
+ %tmp14315 = getelementptr inbounds float, float* %tmp14314, i64 1
+ %tmp14316 = getelementptr inbounds float, float* %tmp14315, i64 1
+ %tmp14317 = getelementptr inbounds float, float* %tmp14316, i64 1
+ %tmp14318 = getelementptr inbounds float, float* %tmp14317, i64 1
+ %tmp14319 = getelementptr inbounds float, float* %tmp14318, i64 1
+ %tmp14320 = getelementptr inbounds float, float* %tmp14319, i64 1
+ %tmp14321 = getelementptr inbounds float, float* %tmp14320, i64 1
+ %tmp14322 = getelementptr inbounds float, float* %tmp14321, i64 1
+ %tmp14323 = getelementptr inbounds float, float* %tmp14322, i64 1
+ %tmp14324 = getelementptr inbounds float, float* %tmp14323, i64 1
+ %tmp14325 = getelementptr inbounds float, float* %tmp14324, i64 1
+ %tmp14326 = getelementptr inbounds float, float* %tmp14325, i64 1
+ %tmp14327 = getelementptr inbounds float, float* %tmp14326, i64 1
+ %tmp14328 = getelementptr inbounds float, float* %tmp14327, i64 1
+ %tmp14329 = getelementptr inbounds float, float* %tmp14328, i64 1
+ %tmp14330 = getelementptr inbounds float, float* %tmp14329, i64 1
+ %tmp14331 = getelementptr inbounds float, float* %tmp14330, i64 1
+ %tmp14332 = getelementptr inbounds float, float* %tmp14331, i64 1
+ %tmp14333 = getelementptr inbounds float, float* %tmp14332, i64 1
+ %tmp14334 = getelementptr inbounds float, float* %tmp14333, i64 1
+ %tmp14335 = getelementptr inbounds float, float* %tmp14334, i64 1
+ %tmp14336 = getelementptr inbounds float, float* %tmp14335, i64 1
+ %tmp14337 = getelementptr inbounds float, float* %tmp14336, i64 1
+ %tmp14338 = getelementptr inbounds float, float* %tmp14337, i64 1
+ %tmp14339 = getelementptr inbounds float, float* %tmp14338, i64 1
+ %tmp14340 = getelementptr inbounds float, float* %tmp14339, i64 1
+ %tmp14341 = getelementptr inbounds float, float* %tmp14340, i64 1
+ %tmp14342 = getelementptr inbounds float, float* %tmp14341, i64 1
+ %tmp14343 = getelementptr inbounds float, float* %tmp14342, i64 1
+ %tmp14344 = getelementptr inbounds float, float* %tmp14343, i64 1
+ %tmp14345 = getelementptr inbounds float, float* %tmp14344, i64 1
+ %tmp14346 = getelementptr inbounds float, float* %tmp14345, i64 1
+ %tmp14347 = getelementptr inbounds float, float* %tmp14346, i64 1
+ %tmp14348 = getelementptr inbounds float, float* %tmp14347, i64 1
+ %tmp14349 = getelementptr inbounds float, float* %tmp14348, i64 1
+ %tmp14350 = getelementptr inbounds float, float* %tmp14349, i64 1
+ %tmp14351 = getelementptr inbounds float, float* %tmp14350, i64 1
+ %tmp14352 = getelementptr inbounds float, float* %tmp14351, i64 1
+ %tmp14353 = getelementptr inbounds float, float* %tmp14352, i64 1
+ %tmp14354 = getelementptr inbounds float, float* %tmp14353, i64 1
+ %tmp14355 = getelementptr inbounds float, float* %tmp14354, i64 1
+ %tmp14356 = getelementptr inbounds float, float* %tmp14355, i64 1
+ %tmp14357 = getelementptr inbounds float, float* %tmp14356, i64 1
+ %tmp14358 = getelementptr inbounds float, float* %tmp14357, i64 1
+ %tmp14359 = getelementptr inbounds float, float* %tmp14358, i64 1
+ %tmp14360 = getelementptr inbounds float, float* %tmp14359, i64 1
+ %tmp14361 = getelementptr inbounds float, float* %tmp14360, i64 1
+ %tmp14362 = getelementptr inbounds float, float* %tmp14361, i64 1
+ %tmp14363 = getelementptr inbounds float, float* %tmp14362, i64 1
+ %tmp14364 = getelementptr inbounds float, float* %tmp14363, i64 1
+ %tmp14365 = getelementptr inbounds float, float* %tmp14364, i64 1
+ %tmp14366 = getelementptr inbounds float, float* %tmp14365, i64 1
+ %tmp14367 = getelementptr inbounds float, float* %tmp14366, i64 1
+ %tmp14368 = getelementptr inbounds float, float* %tmp14367, i64 1
+ %tmp14369 = getelementptr inbounds float, float* %tmp14368, i64 1
+ %tmp14370 = getelementptr inbounds float, float* %tmp14369, i64 1
+ %tmp14371 = getelementptr inbounds float, float* %tmp14370, i64 1
+ %tmp14372 = getelementptr inbounds float, float* %tmp14371, i64 1
+ %tmp14373 = getelementptr inbounds float, float* %tmp14372, i64 1
+ %tmp14374 = getelementptr inbounds float, float* %tmp14373, i64 1
+ %tmp14375 = getelementptr inbounds float, float* %tmp14374, i64 1
+ %tmp14376 = getelementptr inbounds float, float* %tmp14375, i64 1
+ %tmp14377 = getelementptr inbounds float, float* %tmp14376, i64 1
+ %tmp14378 = getelementptr inbounds float, float* %tmp14377, i64 1
+ %tmp14379 = getelementptr inbounds float, float* %tmp14378, i64 1
+ %tmp14380 = getelementptr inbounds float, float* %tmp14379, i64 1
+ %tmp14381 = getelementptr inbounds float, float* %tmp14380, i64 1
+ %tmp14382 = getelementptr inbounds float, float* %tmp14381, i64 1
+ %tmp14383 = getelementptr inbounds float, float* %tmp14382, i64 1
+ %tmp14384 = getelementptr inbounds float, float* %tmp14383, i64 1
+ %tmp14385 = getelementptr inbounds float, float* %tmp14384, i64 1
+ %tmp14386 = getelementptr inbounds float, float* %tmp14385, i64 1
+ %tmp14387 = getelementptr inbounds float, float* %tmp14386, i64 1
+ %tmp14388 = getelementptr inbounds float, float* %tmp14387, i64 1
+ %tmp14389 = getelementptr inbounds float, float* %tmp14388, i64 1
+ %tmp14390 = getelementptr inbounds float, float* %tmp14389, i64 1
+ %tmp14391 = getelementptr inbounds float, float* %tmp14390, i64 1
+ %tmp14392 = getelementptr inbounds float, float* %tmp14391, i64 1
+ %tmp14393 = getelementptr inbounds float, float* %tmp14392, i64 1
+ %tmp14394 = getelementptr inbounds float, float* %tmp14393, i64 1
+ %tmp14395 = getelementptr inbounds float, float* %tmp14394, i64 1
+ %tmp14396 = getelementptr inbounds float, float* %tmp14395, i64 1
+ %tmp14397 = getelementptr inbounds float, float* %tmp14396, i64 1
+ %tmp14398 = getelementptr inbounds float, float* %tmp14397, i64 1
+ %tmp14399 = getelementptr inbounds float, float* %tmp14398, i64 1
+ %tmp14400 = getelementptr inbounds float, float* %tmp14399, i64 1
+ %tmp14401 = getelementptr inbounds float, float* %tmp14400, i64 1
+ %tmp14402 = getelementptr inbounds float, float* %tmp14401, i64 1
+ %tmp14403 = getelementptr inbounds float, float* %tmp14402, i64 1
+ %tmp14404 = getelementptr inbounds float, float* %tmp14403, i64 1
+ %tmp14405 = getelementptr inbounds float, float* %tmp14404, i64 1
+ %tmp14406 = getelementptr inbounds float, float* %tmp14405, i64 1
+ %tmp14407 = getelementptr inbounds float, float* %tmp14406, i64 1
+ %tmp14408 = getelementptr inbounds float, float* %tmp14407, i64 1
+ %tmp14409 = getelementptr inbounds float, float* %tmp14408, i64 1
+ %tmp14410 = getelementptr inbounds float, float* %tmp14409, i64 1
+ %tmp14411 = getelementptr inbounds float, float* %tmp14410, i64 1
+ %tmp14412 = getelementptr inbounds float, float* %tmp14411, i64 1
+ %tmp14413 = getelementptr inbounds float, float* %tmp14412, i64 1
+ %tmp14414 = getelementptr inbounds float, float* %tmp14413, i64 1
+ %tmp14415 = getelementptr inbounds float, float* %tmp14414, i64 1
+ %tmp14416 = getelementptr inbounds float, float* %tmp14415, i64 1
+ %tmp14417 = getelementptr inbounds float, float* %tmp14416, i64 1
+ %tmp14418 = getelementptr inbounds float, float* %tmp14417, i64 1
+ %tmp14419 = getelementptr inbounds float, float* %tmp14418, i64 1
+ %tmp14420 = getelementptr inbounds float, float* %tmp14419, i64 1
+ %tmp14421 = getelementptr inbounds float, float* %tmp14420, i64 1
+ %tmp14422 = getelementptr inbounds float, float* %tmp14421, i64 1
+ %tmp14423 = getelementptr inbounds float, float* %tmp14422, i64 1
+ %tmp14424 = getelementptr inbounds float, float* %tmp14423, i64 1
+ %tmp14425 = getelementptr inbounds float, float* %tmp14424, i64 1
+ %tmp14426 = getelementptr inbounds float, float* %tmp14425, i64 1
+ %tmp14427 = getelementptr inbounds float, float* %tmp14426, i64 1
+ %tmp14428 = getelementptr inbounds float, float* %tmp14427, i64 1
+ %tmp14429 = getelementptr inbounds float, float* %tmp14428, i64 1
+ %tmp14430 = getelementptr inbounds float, float* %tmp14429, i64 1
+ %tmp14431 = getelementptr inbounds float, float* %tmp14430, i64 1
+ %tmp14432 = getelementptr inbounds float, float* %tmp14431, i64 1
+ %tmp14433 = getelementptr inbounds float, float* %tmp14432, i64 1
+ %tmp14434 = getelementptr inbounds float, float* %tmp14433, i64 1
+ %tmp14435 = getelementptr inbounds float, float* %tmp14434, i64 1
+ %tmp14436 = getelementptr inbounds float, float* %tmp14435, i64 1
+ %tmp14437 = getelementptr inbounds float, float* %tmp14436, i64 1
+ %tmp14438 = getelementptr inbounds float, float* %tmp14437, i64 1
+ %tmp14439 = getelementptr inbounds float, float* %tmp14438, i64 1
+ %tmp14440 = getelementptr inbounds float, float* %tmp14439, i64 1
+ %tmp14441 = getelementptr inbounds float, float* %tmp14440, i64 1
+ %tmp14442 = getelementptr inbounds float, float* %tmp14441, i64 1
+ %tmp14443 = getelementptr inbounds float, float* %tmp14442, i64 1
+ %tmp14444 = getelementptr inbounds float, float* %tmp14443, i64 1
+ %tmp14445 = getelementptr inbounds float, float* %tmp14444, i64 1
+ %tmp14446 = getelementptr inbounds float, float* %tmp14445, i64 1
+ %tmp14447 = getelementptr inbounds float, float* %tmp14446, i64 1
+ %tmp14448 = getelementptr inbounds float, float* %tmp14447, i64 1
+ %tmp14449 = getelementptr inbounds float, float* %tmp14448, i64 1
+ %tmp14450 = getelementptr inbounds float, float* %tmp14449, i64 1
+ %tmp14451 = getelementptr inbounds float, float* %tmp14450, i64 1
+ %tmp14452 = getelementptr inbounds float, float* %tmp14451, i64 1
+ %tmp14453 = getelementptr inbounds float, float* %tmp14452, i64 1
+ %tmp14454 = getelementptr inbounds float, float* %tmp14453, i64 1
+ %tmp14455 = getelementptr inbounds float, float* %tmp14454, i64 1
+ %tmp14456 = getelementptr inbounds float, float* %tmp14455, i64 1
+ %tmp14457 = getelementptr inbounds float, float* %tmp14456, i64 1
+ %tmp14458 = getelementptr inbounds float, float* %tmp14457, i64 1
+ %tmp14459 = getelementptr inbounds float, float* %tmp14458, i64 1
+ %tmp14460 = getelementptr inbounds float, float* %tmp14459, i64 1
+ %tmp14461 = getelementptr inbounds float, float* %tmp14460, i64 1
+ %tmp14462 = getelementptr inbounds float, float* %tmp14461, i64 1
+ %tmp14463 = getelementptr inbounds float, float* %tmp14462, i64 1
+ %tmp14464 = getelementptr inbounds float, float* %tmp14463, i64 1
+ %tmp14465 = getelementptr inbounds float, float* %tmp14464, i64 1
+ %tmp14466 = getelementptr inbounds float, float* %tmp14465, i64 1
+ %tmp14467 = getelementptr inbounds float, float* %tmp14466, i64 1
+ %tmp14468 = getelementptr inbounds float, float* %tmp14467, i64 1
+ %tmp14469 = getelementptr inbounds float, float* %tmp14468, i64 1
+ %tmp14470 = getelementptr inbounds float, float* %tmp14469, i64 1
+ %tmp14471 = getelementptr inbounds float, float* %tmp14470, i64 1
+ %tmp14472 = getelementptr inbounds float, float* %tmp14471, i64 1
+ %tmp14473 = getelementptr inbounds float, float* %tmp14472, i64 1
+ %tmp14474 = getelementptr inbounds float, float* %tmp14473, i64 1
+ %tmp14475 = getelementptr inbounds float, float* %tmp14474, i64 1
+ %tmp14476 = getelementptr inbounds float, float* %tmp14475, i64 1
+ %tmp14477 = getelementptr inbounds float, float* %tmp14476, i64 1
+ %tmp14478 = getelementptr inbounds float, float* %tmp14477, i64 1
+ %tmp14479 = getelementptr inbounds float, float* %tmp14478, i64 1
+ %tmp14480 = getelementptr inbounds float, float* %tmp14479, i64 1
+ %tmp14481 = getelementptr inbounds float, float* %tmp14480, i64 1
+ %tmp14482 = getelementptr inbounds float, float* %tmp14481, i64 1
+ %tmp14483 = getelementptr inbounds float, float* %tmp14482, i64 1
+ %tmp14484 = getelementptr inbounds float, float* %tmp14483, i64 1
+ %tmp14485 = getelementptr inbounds float, float* %tmp14484, i64 1
+ %tmp14486 = getelementptr inbounds float, float* %tmp14485, i64 1
+ %tmp14487 = getelementptr inbounds float, float* %tmp14486, i64 1
+ %tmp14488 = getelementptr inbounds float, float* %tmp14487, i64 1
+ %tmp14489 = getelementptr inbounds float, float* %tmp14488, i64 1
+ %tmp14490 = getelementptr inbounds float, float* %tmp14489, i64 1
+ %tmp14491 = getelementptr inbounds float, float* %tmp14490, i64 1
+ %tmp14492 = getelementptr inbounds float, float* %tmp14491, i64 1
+ %tmp14493 = getelementptr inbounds float, float* %tmp14492, i64 1
+ %tmp14494 = getelementptr inbounds float, float* %tmp14493, i64 1
+ %tmp14495 = getelementptr inbounds float, float* %tmp14494, i64 1
+ %tmp14496 = getelementptr inbounds float, float* %tmp14495, i64 1
+ %tmp14497 = getelementptr inbounds float, float* %tmp14496, i64 1
+ %tmp14498 = getelementptr inbounds float, float* %tmp14497, i64 1
+ %tmp14499 = getelementptr inbounds float, float* %tmp14498, i64 1
+ %tmp14500 = getelementptr inbounds float, float* %tmp14499, i64 1
+ %tmp14501 = getelementptr inbounds float, float* %tmp14500, i64 1
+ %tmp14502 = getelementptr inbounds float, float* %tmp14501, i64 1
+ %tmp14503 = getelementptr inbounds float, float* %tmp14502, i64 1
+ %tmp14504 = getelementptr inbounds float, float* %tmp14503, i64 1
+ %tmp14505 = getelementptr inbounds float, float* %tmp14504, i64 1
+ %tmp14506 = getelementptr inbounds float, float* %tmp14505, i64 1
+ %tmp14507 = getelementptr inbounds float, float* %tmp14506, i64 1
+ %tmp14508 = getelementptr inbounds float, float* %tmp14507, i64 1
+ %tmp14509 = getelementptr inbounds float, float* %tmp14508, i64 1
+ %tmp14510 = getelementptr inbounds float, float* %tmp14509, i64 1
+ %tmp14511 = getelementptr inbounds float, float* %tmp14510, i64 1
+ %tmp14512 = getelementptr inbounds float, float* %tmp14511, i64 1
+ %tmp14513 = getelementptr inbounds float, float* %tmp14512, i64 1
+ %tmp14514 = getelementptr inbounds float, float* %tmp14513, i64 1
+ %tmp14515 = getelementptr inbounds float, float* %tmp14514, i64 1
+ %tmp14516 = getelementptr inbounds float, float* %tmp14515, i64 1
+ %tmp14517 = getelementptr inbounds float, float* %tmp14516, i64 1
+ %tmp14518 = getelementptr inbounds float, float* %tmp14517, i64 1
+ %tmp14519 = getelementptr inbounds float, float* %tmp14518, i64 1
+ %tmp14520 = getelementptr inbounds float, float* %tmp14519, i64 1
+ %tmp14521 = getelementptr inbounds float, float* %tmp14520, i64 1
+ %tmp14522 = getelementptr inbounds float, float* %tmp14521, i64 1
+ %tmp14523 = getelementptr inbounds float, float* %tmp14522, i64 1
+ %tmp14524 = getelementptr inbounds float, float* %tmp14523, i64 1
+ %tmp14525 = getelementptr inbounds float, float* %tmp14524, i64 1
+ %tmp14526 = getelementptr inbounds float, float* %tmp14525, i64 1
+ %tmp14527 = getelementptr inbounds float, float* %tmp14526, i64 1
+ %tmp14528 = getelementptr inbounds float, float* %tmp14527, i64 1
+ %tmp14529 = getelementptr inbounds float, float* %tmp14528, i64 1
+ %tmp14530 = getelementptr inbounds float, float* %tmp14529, i64 1
+ %tmp14531 = getelementptr inbounds float, float* %tmp14530, i64 1
+ %tmp14532 = getelementptr inbounds float, float* %tmp14531, i64 1
+ %tmp14533 = getelementptr inbounds float, float* %tmp14532, i64 1
+ %tmp14534 = getelementptr inbounds float, float* %tmp14533, i64 1
+ %tmp14535 = getelementptr inbounds float, float* %tmp14534, i64 1
+ %tmp14536 = getelementptr inbounds float, float* %tmp14535, i64 1
+ %tmp14537 = getelementptr inbounds float, float* %tmp14536, i64 1
+ %tmp14538 = getelementptr inbounds float, float* %tmp14537, i64 1
+ %tmp14539 = getelementptr inbounds float, float* %tmp14538, i64 1
+ %tmp14540 = getelementptr inbounds float, float* %tmp14539, i64 1
+ %tmp14541 = getelementptr inbounds float, float* %tmp14540, i64 1
+ %tmp14542 = getelementptr inbounds float, float* %tmp14541, i64 1
+ %tmp14543 = getelementptr inbounds float, float* %tmp14542, i64 1
+ %tmp14544 = getelementptr inbounds float, float* %tmp14543, i64 1
+ %tmp14545 = getelementptr inbounds float, float* %tmp14544, i64 1
+ %tmp14546 = getelementptr inbounds float, float* %tmp14545, i64 1
+ %tmp14547 = getelementptr inbounds float, float* %tmp14546, i64 1
+ %tmp14548 = getelementptr inbounds float, float* %tmp14547, i64 1
+ %tmp14549 = getelementptr inbounds float, float* %tmp14548, i64 1
+ %tmp14550 = getelementptr inbounds float, float* %tmp14549, i64 1
+ %tmp14551 = getelementptr inbounds float, float* %tmp14550, i64 1
+ %tmp14552 = getelementptr inbounds float, float* %tmp14551, i64 1
+ %tmp14553 = getelementptr inbounds float, float* %tmp14552, i64 1
+ %tmp14554 = getelementptr inbounds float, float* %tmp14553, i64 1
+ %tmp14555 = getelementptr inbounds float, float* %tmp14554, i64 1
+ %tmp14556 = getelementptr inbounds float, float* %tmp14555, i64 1
+ %tmp14557 = getelementptr inbounds float, float* %tmp14556, i64 1
+ %tmp14558 = getelementptr inbounds float, float* %tmp14557, i64 1
+ %tmp14559 = getelementptr inbounds float, float* %tmp14558, i64 1
+ %tmp14560 = getelementptr inbounds float, float* %tmp14559, i64 1
+ %tmp14561 = getelementptr inbounds float, float* %tmp14560, i64 1
+ %tmp14562 = getelementptr inbounds float, float* %tmp14561, i64 1
+ %tmp14563 = getelementptr inbounds float, float* %tmp14562, i64 1
+ %tmp14564 = getelementptr inbounds float, float* %tmp14563, i64 1
+ %tmp14565 = getelementptr inbounds float, float* %tmp14564, i64 1
+ %tmp14566 = getelementptr inbounds float, float* %tmp14565, i64 1
+ %tmp14567 = getelementptr inbounds float, float* %tmp14566, i64 1
+ %tmp14568 = getelementptr inbounds float, float* %tmp14567, i64 1
+ %tmp14569 = getelementptr inbounds float, float* %tmp14568, i64 1
+ %tmp14570 = getelementptr inbounds float, float* %tmp14569, i64 1
+ %tmp14571 = getelementptr inbounds float, float* %tmp14570, i64 1
+ %tmp14572 = getelementptr inbounds float, float* %tmp14571, i64 1
+ %tmp14573 = getelementptr inbounds float, float* %tmp14572, i64 1
+ %tmp14574 = getelementptr inbounds float, float* %tmp14573, i64 1
+ %tmp14575 = getelementptr inbounds float, float* %tmp14574, i64 1
+ %tmp14576 = getelementptr inbounds float, float* %tmp14575, i64 1
+ %tmp14577 = getelementptr inbounds float, float* %tmp14576, i64 1
+ %tmp14578 = getelementptr inbounds float, float* %tmp14577, i64 1
+ %tmp14579 = getelementptr inbounds float, float* %tmp14578, i64 1
+ %tmp14580 = getelementptr inbounds float, float* %tmp14579, i64 1
+ %tmp14581 = getelementptr inbounds float, float* %tmp14580, i64 1
+ %tmp14582 = getelementptr inbounds float, float* %tmp14581, i64 1
+ %tmp14583 = getelementptr inbounds float, float* %tmp14582, i64 1
+ %tmp14584 = getelementptr inbounds float, float* %tmp14583, i64 1
+ %tmp14585 = getelementptr inbounds float, float* %tmp14584, i64 1
+ %tmp14586 = getelementptr inbounds float, float* %tmp14585, i64 1
+ %tmp14587 = getelementptr inbounds float, float* %tmp14586, i64 1
+ %tmp14588 = getelementptr inbounds float, float* %tmp14587, i64 1
+ %tmp14589 = getelementptr inbounds float, float* %tmp14588, i64 1
+ %tmp14590 = getelementptr inbounds float, float* %tmp14589, i64 1
+ %tmp14591 = getelementptr inbounds float, float* %tmp14590, i64 1
+ %tmp14592 = getelementptr inbounds float, float* %tmp14591, i64 1
+ %tmp14593 = getelementptr inbounds float, float* %tmp14592, i64 1
+ %tmp14594 = getelementptr inbounds float, float* %tmp14593, i64 1
+ %tmp14595 = getelementptr inbounds float, float* %tmp14594, i64 1
+ %tmp14596 = getelementptr inbounds float, float* %tmp14595, i64 1
+ %tmp14597 = getelementptr inbounds float, float* %tmp14596, i64 1
+ %tmp14598 = getelementptr inbounds float, float* %tmp14597, i64 1
+ %tmp14599 = getelementptr inbounds float, float* %tmp14598, i64 1
+ %tmp14600 = getelementptr inbounds float, float* %tmp14599, i64 1
+ %tmp14601 = getelementptr inbounds float, float* %tmp14600, i64 1
+ %tmp14602 = getelementptr inbounds float, float* %tmp14601, i64 1
+ %tmp14603 = getelementptr inbounds float, float* %tmp14602, i64 1
+ %tmp14604 = getelementptr inbounds float, float* %tmp14603, i64 1
+ %tmp14605 = getelementptr inbounds float, float* %tmp14604, i64 1
+ %tmp14606 = getelementptr inbounds float, float* %tmp14605, i64 1
+ %tmp14607 = getelementptr inbounds float, float* %tmp14606, i64 1
+ %tmp14608 = getelementptr inbounds float, float* %tmp14607, i64 1
+ %tmp14609 = getelementptr inbounds float, float* %tmp14608, i64 1
+ %tmp14610 = getelementptr inbounds float, float* %tmp14609, i64 1
+ %tmp14611 = getelementptr inbounds float, float* %tmp14610, i64 1
+ %tmp14612 = getelementptr inbounds float, float* %tmp14611, i64 1
+ %tmp14613 = getelementptr inbounds float, float* %tmp14612, i64 1
+ %tmp14614 = getelementptr inbounds float, float* %tmp14613, i64 1
+ %tmp14615 = getelementptr inbounds float, float* %tmp14614, i64 1
+ %tmp14616 = getelementptr inbounds float, float* %tmp14615, i64 1
+ %tmp14617 = getelementptr inbounds float, float* %tmp14616, i64 1
+ %tmp14618 = getelementptr inbounds float, float* %tmp14617, i64 1
+ %tmp14619 = getelementptr inbounds float, float* %tmp14618, i64 1
+ %tmp14620 = getelementptr inbounds float, float* %tmp14619, i64 1
+ %tmp14621 = getelementptr inbounds float, float* %tmp14620, i64 1
+ %tmp14622 = getelementptr inbounds float, float* %tmp14621, i64 1
+ %tmp14623 = getelementptr inbounds float, float* %tmp14622, i64 1
+ %tmp14624 = getelementptr inbounds float, float* %tmp14623, i64 1
+ %tmp14625 = getelementptr inbounds float, float* %tmp14624, i64 1
+ %tmp14626 = getelementptr inbounds float, float* %tmp14625, i64 1
+ %tmp14627 = getelementptr inbounds float, float* %tmp14626, i64 1
+ %tmp14628 = getelementptr inbounds float, float* %tmp14627, i64 1
+ %tmp14629 = getelementptr inbounds float, float* %tmp14628, i64 1
+ %tmp14630 = getelementptr inbounds float, float* %tmp14629, i64 1
+ %tmp14631 = getelementptr inbounds float, float* %tmp14630, i64 1
+ %tmp14632 = getelementptr inbounds float, float* %tmp14631, i64 1
+ %tmp14633 = getelementptr inbounds float, float* %tmp14632, i64 1
+ %tmp14634 = getelementptr inbounds float, float* %tmp14633, i64 1
+ %tmp14635 = getelementptr inbounds float, float* %tmp14634, i64 1
+ %tmp14636 = getelementptr inbounds float, float* %tmp14635, i64 1
+ %tmp14637 = getelementptr inbounds float, float* %tmp14636, i64 1
+ %tmp14638 = getelementptr inbounds float, float* %tmp14637, i64 1
+ %tmp14639 = getelementptr inbounds float, float* %tmp14638, i64 1
+ %tmp14640 = getelementptr inbounds float, float* %tmp14639, i64 1
+ %tmp14641 = getelementptr inbounds float, float* %tmp14640, i64 1
+ %tmp14642 = getelementptr inbounds float, float* %tmp14641, i64 1
+ %tmp14643 = getelementptr inbounds float, float* %tmp14642, i64 1
+ %tmp14644 = getelementptr inbounds float, float* %tmp14643, i64 1
+ %tmp14645 = getelementptr inbounds float, float* %tmp14644, i64 1
+ %tmp14646 = getelementptr inbounds float, float* %tmp14645, i64 1
+ %tmp14647 = getelementptr inbounds float, float* %tmp14646, i64 1
+ %tmp14648 = getelementptr inbounds float, float* %tmp14647, i64 1
+ %tmp14649 = getelementptr inbounds float, float* %tmp14648, i64 1
+ %tmp14650 = getelementptr inbounds float, float* %tmp14649, i64 1
+ %tmp14651 = getelementptr inbounds float, float* %tmp14650, i64 1
+ %tmp14652 = getelementptr inbounds float, float* %tmp14651, i64 1
+ %tmp14653 = getelementptr inbounds float, float* %tmp14652, i64 1
+ %tmp14654 = getelementptr inbounds float, float* %tmp14653, i64 1
+ %tmp14655 = getelementptr inbounds float, float* %tmp14654, i64 1
+ %tmp14656 = getelementptr inbounds float, float* %tmp14655, i64 1
+ %tmp14657 = getelementptr inbounds float, float* %tmp14656, i64 1
+ %tmp14658 = getelementptr inbounds float, float* %tmp14657, i64 1
+ %tmp14659 = getelementptr inbounds float, float* %tmp14658, i64 1
+ %tmp14660 = getelementptr inbounds float, float* %tmp14659, i64 1
+ %tmp14661 = getelementptr inbounds float, float* %tmp14660, i64 1
+ %tmp14662 = getelementptr inbounds float, float* %tmp14661, i64 1
+ %tmp14663 = getelementptr inbounds float, float* %tmp14662, i64 1
+ %tmp14664 = getelementptr inbounds float, float* %tmp14663, i64 1
+ %tmp14665 = getelementptr inbounds float, float* %tmp14664, i64 1
+ %tmp14666 = getelementptr inbounds float, float* %tmp14665, i64 1
+ %tmp14667 = getelementptr inbounds float, float* %tmp14666, i64 1
+ %tmp14668 = getelementptr inbounds float, float* %tmp14667, i64 1
+ %tmp14669 = getelementptr inbounds float, float* %tmp14668, i64 1
+ %tmp14670 = getelementptr inbounds float, float* %tmp14669, i64 1
+ %tmp14671 = getelementptr inbounds float, float* %tmp14670, i64 1
+ %tmp14672 = getelementptr inbounds float, float* %tmp14671, i64 1
+ %tmp14673 = getelementptr inbounds float, float* %tmp14672, i64 1
+ %tmp14674 = getelementptr inbounds float, float* %tmp14673, i64 1
+ %tmp14675 = getelementptr inbounds float, float* %tmp14674, i64 1
+ %tmp14676 = getelementptr inbounds float, float* %tmp14675, i64 1
+ %tmp14677 = getelementptr inbounds float, float* %tmp14676, i64 1
+ %tmp14678 = getelementptr inbounds float, float* %tmp14677, i64 1
+ %tmp14679 = getelementptr inbounds float, float* %tmp14678, i64 1
+ %tmp14680 = getelementptr inbounds float, float* %tmp14679, i64 1
+ %tmp14681 = getelementptr inbounds float, float* %tmp14680, i64 1
+ %tmp14682 = getelementptr inbounds float, float* %tmp14681, i64 1
+ %tmp14683 = getelementptr inbounds float, float* %tmp14682, i64 1
+ %tmp14684 = getelementptr inbounds float, float* %tmp14683, i64 1
+ %tmp14685 = getelementptr inbounds float, float* %tmp14684, i64 1
+ %tmp14686 = getelementptr inbounds float, float* %tmp14685, i64 1
+ %tmp14687 = getelementptr inbounds float, float* %tmp14686, i64 1
+ %tmp14688 = getelementptr inbounds float, float* %tmp14687, i64 1
+ %tmp14689 = getelementptr inbounds float, float* %tmp14688, i64 1
+ %tmp14690 = getelementptr inbounds float, float* %tmp14689, i64 1
+ %tmp14691 = getelementptr inbounds float, float* %tmp14690, i64 1
+ %tmp14692 = getelementptr inbounds float, float* %tmp14691, i64 1
+ %tmp14693 = getelementptr inbounds float, float* %tmp14692, i64 1
+ %tmp14694 = getelementptr inbounds float, float* %tmp14693, i64 1
+ %tmp14695 = getelementptr inbounds float, float* %tmp14694, i64 1
+ %tmp14696 = getelementptr inbounds float, float* %tmp14695, i64 1
+ %tmp14697 = getelementptr inbounds float, float* %tmp14696, i64 1
+ %tmp14698 = getelementptr inbounds float, float* %tmp14697, i64 1
+ %tmp14699 = getelementptr inbounds float, float* %tmp14698, i64 1
+ %tmp14700 = getelementptr inbounds float, float* %tmp14699, i64 1
+ %tmp14701 = getelementptr inbounds float, float* %tmp14700, i64 1
+ %tmp14702 = getelementptr inbounds float, float* %tmp14701, i64 1
+ %tmp14703 = getelementptr inbounds float, float* %tmp14702, i64 1
+ %tmp14704 = getelementptr inbounds float, float* %tmp14703, i64 1
+ %tmp14705 = getelementptr inbounds float, float* %tmp14704, i64 1
+ %tmp14706 = getelementptr inbounds float, float* %tmp14705, i64 1
+ %tmp14707 = getelementptr inbounds float, float* %tmp14706, i64 1
+ %tmp14708 = getelementptr inbounds float, float* %tmp14707, i64 1
+ %tmp14709 = getelementptr inbounds float, float* %tmp14708, i64 1
+ %tmp14710 = getelementptr inbounds float, float* %tmp14709, i64 1
+ %tmp14711 = getelementptr inbounds float, float* %tmp14710, i64 1
+ %tmp14712 = getelementptr inbounds float, float* %tmp14711, i64 1
+ %tmp14713 = getelementptr inbounds float, float* %tmp14712, i64 1
+ %tmp14714 = getelementptr inbounds float, float* %tmp14713, i64 1
+ %tmp14715 = getelementptr inbounds float, float* %tmp14714, i64 1
+ %tmp14716 = getelementptr inbounds float, float* %tmp14715, i64 1
+ %tmp14717 = getelementptr inbounds float, float* %tmp14716, i64 1
+ %tmp14718 = getelementptr inbounds float, float* %tmp14717, i64 1
+ %tmp14719 = getelementptr inbounds float, float* %tmp14718, i64 1
+ %tmp14720 = getelementptr inbounds float, float* %tmp14719, i64 1
+ %tmp14721 = getelementptr inbounds float, float* %tmp14720, i64 1
+ %tmp14722 = getelementptr inbounds float, float* %tmp14721, i64 1
+ %tmp14723 = getelementptr inbounds float, float* %tmp14722, i64 1
+ %tmp14724 = getelementptr inbounds float, float* %tmp14723, i64 1
+ %tmp14725 = getelementptr inbounds float, float* %tmp14724, i64 1
+ %tmp14726 = getelementptr inbounds float, float* %tmp14725, i64 1
+ %tmp14727 = getelementptr inbounds float, float* %tmp14726, i64 1
+ %tmp14728 = getelementptr inbounds float, float* %tmp14727, i64 1
+ %tmp14729 = getelementptr inbounds float, float* %tmp14728, i64 1
+ %tmp14730 = getelementptr inbounds float, float* %tmp14729, i64 1
+ %tmp14731 = getelementptr inbounds float, float* %tmp14730, i64 1
+ %tmp14732 = getelementptr inbounds float, float* %tmp14731, i64 1
+ %tmp14733 = getelementptr inbounds float, float* %tmp14732, i64 1
+ %tmp14734 = getelementptr inbounds float, float* %tmp14733, i64 1
+ %tmp14735 = getelementptr inbounds float, float* %tmp14734, i64 1
+ %tmp14736 = getelementptr inbounds float, float* %tmp14735, i64 1
+ %tmp14737 = getelementptr inbounds float, float* %tmp14736, i64 1
+ %tmp14738 = getelementptr inbounds float, float* %tmp14737, i64 1
+ %tmp14739 = getelementptr inbounds float, float* %tmp14738, i64 1
+ %tmp14740 = getelementptr inbounds float, float* %tmp14739, i64 1
+ %tmp14741 = getelementptr inbounds float, float* %tmp14740, i64 1
+ %tmp14742 = getelementptr inbounds float, float* %tmp14741, i64 1
+ %tmp14743 = getelementptr inbounds float, float* %tmp14742, i64 1
+ %tmp14744 = getelementptr inbounds float, float* %tmp14743, i64 1
+ %tmp14745 = getelementptr inbounds float, float* %tmp14744, i64 1
+ %tmp14746 = getelementptr inbounds float, float* %tmp14745, i64 1
+ %tmp14747 = getelementptr inbounds float, float* %tmp14746, i64 1
+ %tmp14748 = getelementptr inbounds float, float* %tmp14747, i64 1
+ %tmp14749 = getelementptr inbounds float, float* %tmp14748, i64 1
+ %tmp14750 = getelementptr inbounds float, float* %tmp14749, i64 1
+ %tmp14751 = getelementptr inbounds float, float* %tmp14750, i64 1
+ %tmp14752 = getelementptr inbounds float, float* %tmp14751, i64 1
+ %tmp14753 = getelementptr inbounds float, float* %tmp14752, i64 1
+ %tmp14754 = getelementptr inbounds float, float* %tmp14753, i64 1
+ %tmp14755 = getelementptr inbounds float, float* %tmp14754, i64 1
+ %tmp14756 = getelementptr inbounds float, float* %tmp14755, i64 1
+ %tmp14757 = getelementptr inbounds float, float* %tmp14756, i64 1
+ %tmp14758 = getelementptr inbounds float, float* %tmp14757, i64 1
+ %tmp14759 = getelementptr inbounds float, float* %tmp14758, i64 1
+ %tmp14760 = getelementptr inbounds float, float* %tmp14759, i64 1
+ %tmp14761 = getelementptr inbounds float, float* %tmp14760, i64 1
+ %tmp14762 = getelementptr inbounds float, float* %tmp14761, i64 1
+ %tmp14763 = getelementptr inbounds float, float* %tmp14762, i64 1
+ %tmp14764 = getelementptr inbounds float, float* %tmp14763, i64 1
+ %tmp14765 = getelementptr inbounds float, float* %tmp14764, i64 1
+ %tmp14766 = getelementptr inbounds float, float* %tmp14765, i64 1
+ %tmp14767 = getelementptr inbounds float, float* %tmp14766, i64 1
+ %tmp14768 = getelementptr inbounds float, float* %tmp14767, i64 1
+ %tmp14769 = getelementptr inbounds float, float* %tmp14768, i64 1
+ %tmp14770 = getelementptr inbounds float, float* %tmp14769, i64 1
+ %tmp14771 = getelementptr inbounds float, float* %tmp14770, i64 1
+ %tmp14772 = getelementptr inbounds float, float* %tmp14771, i64 1
+ %tmp14773 = getelementptr inbounds float, float* %tmp14772, i64 1
+ %tmp14774 = getelementptr inbounds float, float* %tmp14773, i64 1
+ %tmp14775 = getelementptr inbounds float, float* %tmp14774, i64 1
+ %tmp14776 = getelementptr inbounds float, float* %tmp14775, i64 1
+ %tmp14777 = getelementptr inbounds float, float* %tmp14776, i64 1
+ %tmp14778 = getelementptr inbounds float, float* %tmp14777, i64 1
+ %tmp14779 = getelementptr inbounds float, float* %tmp14778, i64 1
+ %tmp14780 = getelementptr inbounds float, float* %tmp14779, i64 1
+ %tmp14781 = getelementptr inbounds float, float* %tmp14780, i64 1
+ %tmp14782 = getelementptr inbounds float, float* %tmp14781, i64 1
+ %tmp14783 = getelementptr inbounds float, float* %tmp14782, i64 1
+ %tmp14784 = getelementptr inbounds float, float* %tmp14783, i64 1
+ %tmp14785 = getelementptr inbounds float, float* %tmp14784, i64 1
+ %tmp14786 = getelementptr inbounds float, float* %tmp14785, i64 1
+ %tmp14787 = getelementptr inbounds float, float* %tmp14786, i64 1
+ %tmp14788 = getelementptr inbounds float, float* %tmp14787, i64 1
+ %tmp14789 = getelementptr inbounds float, float* %tmp14788, i64 1
+ %tmp14790 = getelementptr inbounds float, float* %tmp14789, i64 1
+ %tmp14791 = getelementptr inbounds float, float* %tmp14790, i64 1
+ %tmp14792 = getelementptr inbounds float, float* %tmp14791, i64 1
+ %tmp14793 = getelementptr inbounds float, float* %tmp14792, i64 1
+ %tmp14794 = getelementptr inbounds float, float* %tmp14793, i64 1
+ %tmp14795 = getelementptr inbounds float, float* %tmp14794, i64 1
+ %tmp14796 = getelementptr inbounds float, float* %tmp14795, i64 1
+ %tmp14797 = getelementptr inbounds float, float* %tmp14796, i64 1
+ %tmp14798 = getelementptr inbounds float, float* %tmp14797, i64 1
+ %tmp14799 = getelementptr inbounds float, float* %tmp14798, i64 1
+ %tmp14800 = getelementptr inbounds float, float* %tmp14799, i64 1
+ %tmp14801 = getelementptr inbounds float, float* %tmp14800, i64 1
+ %tmp14802 = getelementptr inbounds float, float* %tmp14801, i64 1
+ %tmp14803 = getelementptr inbounds float, float* %tmp14802, i64 1
+ %tmp14804 = getelementptr inbounds float, float* %tmp14803, i64 1
+ %tmp14805 = getelementptr inbounds float, float* %tmp14804, i64 1
+ %tmp14806 = getelementptr inbounds float, float* %tmp14805, i64 1
+ %tmp14807 = getelementptr inbounds float, float* %tmp14806, i64 1
+ %tmp14808 = getelementptr inbounds float, float* %tmp14807, i64 1
+ %tmp14809 = getelementptr inbounds float, float* %tmp14808, i64 1
+ %tmp14810 = getelementptr inbounds float, float* %tmp14809, i64 1
+ %tmp14811 = getelementptr inbounds float, float* %tmp14810, i64 1
+ %tmp14812 = getelementptr inbounds float, float* %tmp14811, i64 1
+ %tmp14813 = getelementptr inbounds float, float* %tmp14812, i64 1
+ %tmp14814 = getelementptr inbounds float, float* %tmp14813, i64 1
+ %tmp14815 = getelementptr inbounds float, float* %tmp14814, i64 1
+ %tmp14816 = getelementptr inbounds float, float* %tmp14815, i64 1
+ %tmp14817 = getelementptr inbounds float, float* %tmp14816, i64 1
+ %tmp14818 = getelementptr inbounds float, float* %tmp14817, i64 1
+ %tmp14819 = getelementptr inbounds float, float* %tmp14818, i64 1
+ %tmp14820 = getelementptr inbounds float, float* %tmp14819, i64 1
+ %tmp14821 = getelementptr inbounds float, float* %tmp14820, i64 1
+ %tmp14822 = getelementptr inbounds float, float* %tmp14821, i64 1
+ %tmp14823 = getelementptr inbounds float, float* %tmp14822, i64 1
+ %tmp14824 = getelementptr inbounds float, float* %tmp14823, i64 1
+ %tmp14825 = getelementptr inbounds float, float* %tmp14824, i64 1
+ %tmp14826 = getelementptr inbounds float, float* %tmp14825, i64 1
+ %tmp14827 = getelementptr inbounds float, float* %tmp14826, i64 1
+ %tmp14828 = getelementptr inbounds float, float* %tmp14827, i64 1
+ %tmp14829 = getelementptr inbounds float, float* %tmp14828, i64 1
+ %tmp14830 = getelementptr inbounds float, float* %tmp14829, i64 1
+ %tmp14831 = getelementptr inbounds float, float* %tmp14830, i64 1
+ %tmp14832 = getelementptr inbounds float, float* %tmp14831, i64 1
+ %tmp14833 = getelementptr inbounds float, float* %tmp14832, i64 1
+ %tmp14834 = getelementptr inbounds float, float* %tmp14833, i64 1
+ %tmp14835 = getelementptr inbounds float, float* %tmp14834, i64 1
+ %tmp14836 = getelementptr inbounds float, float* %tmp14835, i64 1
+ %tmp14837 = getelementptr inbounds float, float* %tmp14836, i64 1
+ %tmp14838 = getelementptr inbounds float, float* %tmp14837, i64 1
+ %tmp14839 = getelementptr inbounds float, float* %tmp14838, i64 1
+ %tmp14840 = getelementptr inbounds float, float* %tmp14839, i64 1
+ %tmp14841 = getelementptr inbounds float, float* %tmp14840, i64 1
+ %tmp14842 = getelementptr inbounds float, float* %tmp14841, i64 1
+ %tmp14843 = getelementptr inbounds float, float* %tmp14842, i64 1
+ %tmp14844 = getelementptr inbounds float, float* %tmp14843, i64 1
+ %tmp14845 = getelementptr inbounds float, float* %tmp14844, i64 1
+ %tmp14846 = getelementptr inbounds float, float* %tmp14845, i64 1
+ %tmp14847 = getelementptr inbounds float, float* %tmp14846, i64 1
+ %tmp14848 = getelementptr inbounds float, float* %tmp14847, i64 1
+ %tmp14849 = getelementptr inbounds float, float* %tmp14848, i64 1
+ %tmp14850 = getelementptr inbounds float, float* %tmp14849, i64 1
+ %tmp14851 = getelementptr inbounds float, float* %tmp14850, i64 1
+ %tmp14852 = getelementptr inbounds float, float* %tmp14851, i64 1
+ %tmp14853 = getelementptr inbounds float, float* %tmp14852, i64 1
+ %tmp14854 = getelementptr inbounds float, float* %tmp14853, i64 1
+ %tmp14855 = getelementptr inbounds float, float* %tmp14854, i64 1
+ %tmp14856 = getelementptr inbounds float, float* %tmp14855, i64 1
+ %tmp14857 = getelementptr inbounds float, float* %tmp14856, i64 1
+ %tmp14858 = getelementptr inbounds float, float* %tmp14857, i64 1
+ %tmp14859 = getelementptr inbounds float, float* %tmp14858, i64 1
+ %tmp14860 = getelementptr inbounds float, float* %tmp14859, i64 1
+ %tmp14861 = getelementptr inbounds float, float* %tmp14860, i64 1
+ %tmp14862 = getelementptr inbounds float, float* %tmp14861, i64 1
+ %tmp14863 = getelementptr inbounds float, float* %tmp14862, i64 1
+ %tmp14864 = getelementptr inbounds float, float* %tmp14863, i64 1
+ %tmp14865 = getelementptr inbounds float, float* %tmp14864, i64 1
+ %tmp14866 = getelementptr inbounds float, float* %tmp14865, i64 1
+ %tmp14867 = getelementptr inbounds float, float* %tmp14866, i64 1
+ %tmp14868 = getelementptr inbounds float, float* %tmp14867, i64 1
+ %tmp14869 = getelementptr inbounds float, float* %tmp14868, i64 1
+ %tmp14870 = getelementptr inbounds float, float* %tmp14869, i64 1
+ %tmp14871 = getelementptr inbounds float, float* %tmp14870, i64 1
+ %tmp14872 = getelementptr inbounds float, float* %tmp14871, i64 1
+ %tmp14873 = getelementptr inbounds float, float* %tmp14872, i64 1
+ %tmp14874 = getelementptr inbounds float, float* %tmp14873, i64 1
+ %tmp14875 = getelementptr inbounds float, float* %tmp14874, i64 1
+ %tmp14876 = getelementptr inbounds float, float* %tmp14875, i64 1
+ %tmp14877 = getelementptr inbounds float, float* %tmp14876, i64 1
+ %tmp14878 = getelementptr inbounds float, float* %tmp14877, i64 1
+ %tmp14879 = getelementptr inbounds float, float* %tmp14878, i64 1
+ %tmp14880 = getelementptr inbounds float, float* %tmp14879, i64 1
+ %tmp14881 = getelementptr inbounds float, float* %tmp14880, i64 1
+ %tmp14882 = getelementptr inbounds float, float* %tmp14881, i64 1
+ %tmp14883 = getelementptr inbounds float, float* %tmp14882, i64 1
+ %tmp14884 = getelementptr inbounds float, float* %tmp14883, i64 1
+ %tmp14885 = getelementptr inbounds float, float* %tmp14884, i64 1
+ %tmp14886 = getelementptr inbounds float, float* %tmp14885, i64 1
+ %tmp14887 = getelementptr inbounds float, float* %tmp14886, i64 1
+ %tmp14888 = getelementptr inbounds float, float* %tmp14887, i64 1
+ %tmp14889 = getelementptr inbounds float, float* %tmp14888, i64 1
+ %tmp14890 = getelementptr inbounds float, float* %tmp14889, i64 1
+ %tmp14891 = getelementptr inbounds float, float* %tmp14890, i64 1
+ %tmp14892 = getelementptr inbounds float, float* %tmp14891, i64 1
+ %tmp14893 = getelementptr inbounds float, float* %tmp14892, i64 1
+ %tmp14894 = getelementptr inbounds float, float* %tmp14893, i64 1
+ %tmp14895 = getelementptr inbounds float, float* %tmp14894, i64 1
+ %tmp14896 = getelementptr inbounds float, float* %tmp14895, i64 1
+ %tmp14897 = getelementptr inbounds float, float* %tmp14896, i64 1
+ %tmp14898 = getelementptr inbounds float, float* %tmp14897, i64 1
+ %tmp14899 = getelementptr inbounds float, float* %tmp14898, i64 1
+ %tmp14900 = getelementptr inbounds float, float* %tmp14899, i64 1
+ %tmp14901 = getelementptr inbounds float, float* %tmp14900, i64 1
+ %tmp14902 = getelementptr inbounds float, float* %tmp14901, i64 1
+ %tmp14903 = getelementptr inbounds float, float* %tmp14902, i64 1
+ %tmp14904 = getelementptr inbounds float, float* %tmp14903, i64 1
+ %tmp14905 = getelementptr inbounds float, float* %tmp14904, i64 1
+ %tmp14906 = getelementptr inbounds float, float* %tmp14905, i64 1
+ %tmp14907 = getelementptr inbounds float, float* %tmp14906, i64 1
+ %tmp14908 = getelementptr inbounds float, float* %tmp14907, i64 1
+ %tmp14909 = getelementptr inbounds float, float* %tmp14908, i64 1
+ %tmp14910 = getelementptr inbounds float, float* %tmp14909, i64 1
+ %tmp14911 = getelementptr inbounds float, float* %tmp14910, i64 1
+ %tmp14912 = getelementptr inbounds float, float* %tmp14911, i64 1
+ %tmp14913 = getelementptr inbounds float, float* %tmp14912, i64 1
+ %tmp14914 = getelementptr inbounds float, float* %tmp14913, i64 1
+ %tmp14915 = getelementptr inbounds float, float* %tmp14914, i64 1
+ %tmp14916 = getelementptr inbounds float, float* %tmp14915, i64 1
+ %tmp14917 = getelementptr inbounds float, float* %tmp14916, i64 1
+ %tmp14918 = getelementptr inbounds float, float* %tmp14917, i64 1
+ %tmp14919 = getelementptr inbounds float, float* %tmp14918, i64 1
+ %tmp14920 = getelementptr inbounds float, float* %tmp14919, i64 1
+ %tmp14921 = getelementptr inbounds float, float* %tmp14920, i64 1
+ %tmp14922 = getelementptr inbounds float, float* %tmp14921, i64 1
+ %tmp14923 = getelementptr inbounds float, float* %tmp14922, i64 1
+ %tmp14924 = getelementptr inbounds float, float* %tmp14923, i64 1
+ %tmp14925 = getelementptr inbounds float, float* %tmp14924, i64 1
+ %tmp14926 = getelementptr inbounds float, float* %tmp14925, i64 1
+ %tmp14927 = getelementptr inbounds float, float* %tmp14926, i64 1
+ %tmp14928 = getelementptr inbounds float, float* %tmp14927, i64 1
+ %tmp14929 = getelementptr inbounds float, float* %tmp14928, i64 1
+ %tmp14930 = getelementptr inbounds float, float* %tmp14929, i64 1
+ %tmp14931 = getelementptr inbounds float, float* %tmp14930, i64 1
+ %tmp14932 = getelementptr inbounds float, float* %tmp14931, i64 1
+ %tmp14933 = getelementptr inbounds float, float* %tmp14932, i64 1
+ %tmp14934 = getelementptr inbounds float, float* %tmp14933, i64 1
+ %tmp14935 = getelementptr inbounds float, float* %tmp14934, i64 1
+ %tmp14936 = getelementptr inbounds float, float* %tmp14935, i64 1
+ %tmp14937 = getelementptr inbounds float, float* %tmp14936, i64 1
+ %tmp14938 = getelementptr inbounds float, float* %tmp14937, i64 1
+ %tmp14939 = getelementptr inbounds float, float* %tmp14938, i64 1
+ %tmp14940 = getelementptr inbounds float, float* %tmp14939, i64 1
+ %tmp14941 = getelementptr inbounds float, float* %tmp14940, i64 1
+ %tmp14942 = getelementptr inbounds float, float* %tmp14941, i64 1
+ %tmp14943 = getelementptr inbounds float, float* %tmp14942, i64 1
+ %tmp14944 = getelementptr inbounds float, float* %tmp14943, i64 1
+ %tmp14945 = getelementptr inbounds float, float* %tmp14944, i64 1
+ %tmp14946 = getelementptr inbounds float, float* %tmp14945, i64 1
+ %tmp14947 = getelementptr inbounds float, float* %tmp14946, i64 1
+ %tmp14948 = getelementptr inbounds float, float* %tmp14947, i64 1
+ %tmp14949 = getelementptr inbounds float, float* %tmp14948, i64 1
+ %tmp14950 = getelementptr inbounds float, float* %tmp14949, i64 1
+ %tmp14951 = getelementptr inbounds float, float* %tmp14950, i64 1
+ %tmp14952 = getelementptr inbounds float, float* %tmp14951, i64 1
+ %tmp14953 = getelementptr inbounds float, float* %tmp14952, i64 1
+ %tmp14954 = getelementptr inbounds float, float* %tmp14953, i64 1
+ %tmp14955 = getelementptr inbounds float, float* %tmp14954, i64 1
+ %tmp14956 = getelementptr inbounds float, float* %tmp14955, i64 1
+ %tmp14957 = getelementptr inbounds float, float* %tmp14956, i64 1
+ %tmp14958 = getelementptr inbounds float, float* %tmp14957, i64 1
+ %tmp14959 = getelementptr inbounds float, float* %tmp14958, i64 1
+ %tmp14960 = getelementptr inbounds float, float* %tmp14959, i64 1
+ %tmp14961 = getelementptr inbounds float, float* %tmp14960, i64 1
+ %tmp14962 = getelementptr inbounds float, float* %tmp14961, i64 1
+ %tmp14963 = getelementptr inbounds float, float* %tmp14962, i64 1
+ %tmp14964 = getelementptr inbounds float, float* %tmp14963, i64 1
+ %tmp14965 = getelementptr inbounds float, float* %tmp14964, i64 1
+ %tmp14966 = getelementptr inbounds float, float* %tmp14965, i64 1
+ %tmp14967 = getelementptr inbounds float, float* %tmp14966, i64 1
+ %tmp14968 = getelementptr inbounds float, float* %tmp14967, i64 1
+ %tmp14969 = getelementptr inbounds float, float* %tmp14968, i64 1
+ %tmp14970 = getelementptr inbounds float, float* %tmp14969, i64 1
+ %tmp14971 = getelementptr inbounds float, float* %tmp14970, i64 1
+ %tmp14972 = getelementptr inbounds float, float* %tmp14971, i64 1
+ %tmp14973 = getelementptr inbounds float, float* %tmp14972, i64 1
+ %tmp14974 = getelementptr inbounds float, float* %tmp14973, i64 1
+ %tmp14975 = getelementptr inbounds float, float* %tmp14974, i64 1
+ %tmp14976 = getelementptr inbounds float, float* %tmp14975, i64 1
+ %tmp14977 = getelementptr inbounds float, float* %tmp14976, i64 1
+ %tmp14978 = getelementptr inbounds float, float* %tmp14977, i64 1
+ %tmp14979 = getelementptr inbounds float, float* %tmp14978, i64 1
+ %tmp14980 = getelementptr inbounds float, float* %tmp14979, i64 1
+ %tmp14981 = getelementptr inbounds float, float* %tmp14980, i64 1
+ %tmp14982 = getelementptr inbounds float, float* %tmp14981, i64 1
+ %tmp14983 = getelementptr inbounds float, float* %tmp14982, i64 1
+ %tmp14984 = getelementptr inbounds float, float* %tmp14983, i64 1
+ %tmp14985 = getelementptr inbounds float, float* %tmp14984, i64 1
+ %tmp14986 = getelementptr inbounds float, float* %tmp14985, i64 1
+ %tmp14987 = getelementptr inbounds float, float* %tmp14986, i64 1
+ %tmp14988 = getelementptr inbounds float, float* %tmp14987, i64 1
+ %tmp14989 = getelementptr inbounds float, float* %tmp14988, i64 1
+ %tmp14990 = getelementptr inbounds float, float* %tmp14989, i64 1
+ %tmp14991 = getelementptr inbounds float, float* %tmp14990, i64 1
+ %tmp14992 = getelementptr inbounds float, float* %tmp14991, i64 1
+ %tmp14993 = getelementptr inbounds float, float* %tmp14992, i64 1
+ %tmp14994 = getelementptr inbounds float, float* %tmp14993, i64 1
+ %tmp14995 = getelementptr inbounds float, float* %tmp14994, i64 1
+ %tmp14996 = getelementptr inbounds float, float* %tmp14995, i64 1
+ %tmp14997 = getelementptr inbounds float, float* %tmp14996, i64 1
+ %tmp14998 = getelementptr inbounds float, float* %tmp14997, i64 1
+ %tmp14999 = getelementptr inbounds float, float* %tmp14998, i64 1
+ %tmp15000 = getelementptr inbounds float, float* %tmp14999, i64 1
+ %tmp15001 = getelementptr inbounds float, float* %tmp15000, i64 1
+ %tmp15002 = getelementptr inbounds float, float* %tmp15001, i64 1
+ %tmp15003 = getelementptr inbounds float, float* %tmp15002, i64 1
+ %tmp15004 = getelementptr inbounds float, float* %tmp15003, i64 1
+ %tmp15005 = getelementptr inbounds float, float* %tmp15004, i64 1
+ %tmp15006 = getelementptr inbounds float, float* %tmp15005, i64 1
+ %tmp15007 = getelementptr inbounds float, float* %tmp15006, i64 1
+ %tmp15008 = getelementptr inbounds float, float* %tmp15007, i64 1
+ %tmp15009 = getelementptr inbounds float, float* %tmp15008, i64 1
+ %tmp15010 = getelementptr inbounds float, float* %tmp15009, i64 1
+ %tmp15011 = getelementptr inbounds float, float* %tmp15010, i64 1
+ %tmp15012 = getelementptr inbounds float, float* %tmp15011, i64 1
+ %tmp15013 = getelementptr inbounds float, float* %tmp15012, i64 1
+ %tmp15014 = getelementptr inbounds float, float* %tmp15013, i64 1
+ %tmp15015 = getelementptr inbounds float, float* %tmp15014, i64 1
+ %tmp15016 = getelementptr inbounds float, float* %tmp15015, i64 1
+ %tmp15017 = getelementptr inbounds float, float* %tmp15016, i64 1
+ %tmp15018 = getelementptr inbounds float, float* %tmp15017, i64 1
+ %tmp15019 = getelementptr inbounds float, float* %tmp15018, i64 1
+ %tmp15020 = getelementptr inbounds float, float* %tmp15019, i64 1
+ %tmp15021 = getelementptr inbounds float, float* %tmp15020, i64 1
+ %tmp15022 = getelementptr inbounds float, float* %tmp15021, i64 1
+ %tmp15023 = getelementptr inbounds float, float* %tmp15022, i64 1
+ %tmp15024 = getelementptr inbounds float, float* %tmp15023, i64 1
+ %tmp15025 = getelementptr inbounds float, float* %tmp15024, i64 1
+ %tmp15026 = getelementptr inbounds float, float* %tmp15025, i64 1
+ %tmp15027 = getelementptr inbounds float, float* %tmp15026, i64 1
+ %tmp15028 = getelementptr inbounds float, float* %tmp15027, i64 1
+ %tmp15029 = getelementptr inbounds float, float* %tmp15028, i64 1
+ %tmp15030 = getelementptr inbounds float, float* %tmp15029, i64 1
+ %tmp15031 = getelementptr inbounds float, float* %tmp15030, i64 1
+ %tmp15032 = getelementptr inbounds float, float* %tmp15031, i64 1
+ %tmp15033 = getelementptr inbounds float, float* %tmp15032, i64 1
+ %tmp15034 = getelementptr inbounds float, float* %tmp15033, i64 1
+ %tmp15035 = getelementptr inbounds float, float* %tmp15034, i64 1
+ %tmp15036 = getelementptr inbounds float, float* %tmp15035, i64 1
+ %tmp15037 = getelementptr inbounds float, float* %tmp15036, i64 1
+ %tmp15038 = getelementptr inbounds float, float* %tmp15037, i64 1
+ %tmp15039 = getelementptr inbounds float, float* %tmp15038, i64 1
+ %tmp15040 = getelementptr inbounds float, float* %tmp15039, i64 1
+ %tmp15041 = getelementptr inbounds float, float* %tmp15040, i64 1
+ %tmp15042 = getelementptr inbounds float, float* %tmp15041, i64 1
+ %tmp15043 = getelementptr inbounds float, float* %tmp15042, i64 1
+ %tmp15044 = getelementptr inbounds float, float* %tmp15043, i64 1
+ %tmp15045 = getelementptr inbounds float, float* %tmp15044, i64 1
+ %tmp15046 = getelementptr inbounds float, float* %tmp15045, i64 1
+ %tmp15047 = getelementptr inbounds float, float* %tmp15046, i64 1
+ %tmp15048 = getelementptr inbounds float, float* %tmp15047, i64 1
+ %tmp15049 = getelementptr inbounds float, float* %tmp15048, i64 1
+ %tmp15050 = getelementptr inbounds float, float* %tmp15049, i64 1
+ %tmp15051 = getelementptr inbounds float, float* %tmp15050, i64 1
+ %tmp15052 = getelementptr inbounds float, float* %tmp15051, i64 1
+ %tmp15053 = getelementptr inbounds float, float* %tmp15052, i64 1
+ %tmp15054 = getelementptr inbounds float, float* %tmp15053, i64 1
+ %tmp15055 = getelementptr inbounds float, float* %tmp15054, i64 1
+ %tmp15056 = getelementptr inbounds float, float* %tmp15055, i64 1
+ %tmp15057 = getelementptr inbounds float, float* %tmp15056, i64 1
+ %tmp15058 = getelementptr inbounds float, float* %tmp15057, i64 1
+ %tmp15059 = getelementptr inbounds float, float* %tmp15058, i64 1
+ %tmp15060 = getelementptr inbounds float, float* %tmp15059, i64 1
+ %tmp15061 = getelementptr inbounds float, float* %tmp15060, i64 1
+ %tmp15062 = getelementptr inbounds float, float* %tmp15061, i64 1
+ %tmp15063 = getelementptr inbounds float, float* %tmp15062, i64 1
+ %tmp15064 = getelementptr inbounds float, float* %tmp15063, i64 1
+ %tmp15065 = getelementptr inbounds float, float* %tmp15064, i64 1
+ %tmp15066 = getelementptr inbounds float, float* %tmp15065, i64 1
+ %tmp15067 = getelementptr inbounds float, float* %tmp15066, i64 1
+ %tmp15068 = getelementptr inbounds float, float* %tmp15067, i64 1
+ %tmp15069 = getelementptr inbounds float, float* %tmp15068, i64 1
+ %tmp15070 = getelementptr inbounds float, float* %tmp15069, i64 1
+ %tmp15071 = getelementptr inbounds float, float* %tmp15070, i64 1
+ %tmp15072 = getelementptr inbounds float, float* %tmp15071, i64 1
+ %tmp15073 = getelementptr inbounds float, float* %tmp15072, i64 1
+ %tmp15074 = getelementptr inbounds float, float* %tmp15073, i64 1
+ %tmp15075 = getelementptr inbounds float, float* %tmp15074, i64 1
+ %tmp15076 = getelementptr inbounds float, float* %tmp15075, i64 1
+ %tmp15077 = getelementptr inbounds float, float* %tmp15076, i64 1
+ %tmp15078 = getelementptr inbounds float, float* %tmp15077, i64 1
+ %tmp15079 = getelementptr inbounds float, float* %tmp15078, i64 1
+ %tmp15080 = getelementptr inbounds float, float* %tmp15079, i64 1
+ %tmp15081 = getelementptr inbounds float, float* %tmp15080, i64 1
+ %tmp15082 = getelementptr inbounds float, float* %tmp15081, i64 1
+ %tmp15083 = getelementptr inbounds float, float* %tmp15082, i64 1
+ %tmp15084 = getelementptr inbounds float, float* %tmp15083, i64 1
+ %tmp15085 = getelementptr inbounds float, float* %tmp15084, i64 1
+ %tmp15086 = getelementptr inbounds float, float* %tmp15085, i64 1
+ %tmp15087 = getelementptr inbounds float, float* %tmp15086, i64 1
+ %tmp15088 = getelementptr inbounds float, float* %tmp15087, i64 1
+ %tmp15089 = getelementptr inbounds float, float* %tmp15088, i64 1
+ %tmp15090 = getelementptr inbounds float, float* %tmp15089, i64 1
+ %tmp15091 = getelementptr inbounds float, float* %tmp15090, i64 1
+ %tmp15092 = getelementptr inbounds float, float* %tmp15091, i64 1
+ %tmp15093 = getelementptr inbounds float, float* %tmp15092, i64 1
+ %tmp15094 = getelementptr inbounds float, float* %tmp15093, i64 1
+ %tmp15095 = getelementptr inbounds float, float* %tmp15094, i64 1
+ %tmp15096 = getelementptr inbounds float, float* %tmp15095, i64 1
+ %tmp15097 = getelementptr inbounds float, float* %tmp15096, i64 1
+ %tmp15098 = getelementptr inbounds float, float* %tmp15097, i64 1
+ %tmp15099 = getelementptr inbounds float, float* %tmp15098, i64 1
+ %tmp15100 = getelementptr inbounds float, float* %tmp15099, i64 1
+ %tmp15101 = getelementptr inbounds float, float* %tmp15100, i64 1
+ %tmp15102 = getelementptr inbounds float, float* %tmp15101, i64 1
+ %tmp15103 = getelementptr inbounds float, float* %tmp15102, i64 1
+ %tmp15104 = getelementptr inbounds float, float* %tmp15103, i64 1
+ %tmp15105 = getelementptr inbounds float, float* %tmp15104, i64 1
+ %tmp15106 = getelementptr inbounds float, float* %tmp15105, i64 1
+ %tmp15107 = getelementptr inbounds float, float* %tmp15106, i64 1
+ %tmp15108 = getelementptr inbounds float, float* %tmp15107, i64 1
+ %tmp15109 = getelementptr inbounds float, float* %tmp15108, i64 1
+ %tmp15110 = getelementptr inbounds float, float* %tmp15109, i64 1
+ %tmp15111 = getelementptr inbounds float, float* %tmp15110, i64 1
+ %tmp15112 = getelementptr inbounds float, float* %tmp15111, i64 1
+ %tmp15113 = getelementptr inbounds float, float* %tmp15112, i64 1
+ %tmp15114 = getelementptr inbounds float, float* %tmp15113, i64 1
+ %tmp15115 = getelementptr inbounds float, float* %tmp15114, i64 1
+ %tmp15116 = getelementptr inbounds float, float* %tmp15115, i64 1
+ %tmp15117 = getelementptr inbounds float, float* %tmp15116, i64 1
+ %tmp15118 = getelementptr inbounds float, float* %tmp15117, i64 1
+ %tmp15119 = getelementptr inbounds float, float* %tmp15118, i64 1
+ %tmp15120 = getelementptr inbounds float, float* %tmp15119, i64 1
+ %tmp15121 = getelementptr inbounds float, float* %tmp15120, i64 1
+ %tmp15122 = getelementptr inbounds float, float* %tmp15121, i64 1
+ %tmp15123 = getelementptr inbounds float, float* %tmp15122, i64 1
+ %tmp15124 = getelementptr inbounds float, float* %tmp15123, i64 1
+ %tmp15125 = getelementptr inbounds float, float* %tmp15124, i64 1
+ %tmp15126 = getelementptr inbounds float, float* %tmp15125, i64 1
+ %tmp15127 = getelementptr inbounds float, float* %tmp15126, i64 1
+ %tmp15128 = getelementptr inbounds float, float* %tmp15127, i64 1
+ %tmp15129 = getelementptr inbounds float, float* %tmp15128, i64 1
+ %tmp15130 = getelementptr inbounds float, float* %tmp15129, i64 1
+ %tmp15131 = getelementptr inbounds float, float* %tmp15130, i64 1
+ %tmp15132 = getelementptr inbounds float, float* %tmp15131, i64 1
+ %tmp15133 = getelementptr inbounds float, float* %tmp15132, i64 1
+ %tmp15134 = getelementptr inbounds float, float* %tmp15133, i64 1
+ %tmp15135 = getelementptr inbounds float, float* %tmp15134, i64 1
+ %tmp15136 = getelementptr inbounds float, float* %tmp15135, i64 1
+ %tmp15137 = getelementptr inbounds float, float* %tmp15136, i64 1
+ %tmp15138 = getelementptr inbounds float, float* %tmp15137, i64 1
+ %tmp15139 = getelementptr inbounds float, float* %tmp15138, i64 1
+ %tmp15140 = getelementptr inbounds float, float* %tmp15139, i64 1
+ %tmp15141 = getelementptr inbounds float, float* %tmp15140, i64 1
+ %tmp15142 = getelementptr inbounds float, float* %tmp15141, i64 1
+ %tmp15143 = getelementptr inbounds float, float* %tmp15142, i64 1
+ %tmp15144 = getelementptr inbounds float, float* %tmp15143, i64 1
+ %tmp15145 = getelementptr inbounds float, float* %tmp15144, i64 1
+ %tmp15146 = getelementptr inbounds float, float* %tmp15145, i64 1
+ %tmp15147 = getelementptr inbounds float, float* %tmp15146, i64 1
+ %tmp15148 = getelementptr inbounds float, float* %tmp15147, i64 1
+ %tmp15149 = getelementptr inbounds float, float* %tmp15148, i64 1
+ %tmp15150 = getelementptr inbounds float, float* %tmp15149, i64 1
+ %tmp15151 = getelementptr inbounds float, float* %tmp15150, i64 1
+ %tmp15152 = getelementptr inbounds float, float* %tmp15151, i64 1
+ %tmp15153 = getelementptr inbounds float, float* %tmp15152, i64 1
+ %tmp15154 = getelementptr inbounds float, float* %tmp15153, i64 1
+ %tmp15155 = getelementptr inbounds float, float* %tmp15154, i64 1
+ %tmp15156 = getelementptr inbounds float, float* %tmp15155, i64 1
+ %tmp15157 = getelementptr inbounds float, float* %tmp15156, i64 1
+ %tmp15158 = getelementptr inbounds float, float* %tmp15157, i64 1
+ %tmp15159 = getelementptr inbounds float, float* %tmp15158, i64 1
+ %tmp15160 = getelementptr inbounds float, float* %tmp15159, i64 1
+ %tmp15161 = getelementptr inbounds float, float* %tmp15160, i64 1
+ %tmp15162 = getelementptr inbounds float, float* %tmp15161, i64 1
+ %tmp15163 = getelementptr inbounds float, float* %tmp15162, i64 1
+ %tmp15164 = getelementptr inbounds float, float* %tmp15163, i64 1
+ %tmp15165 = getelementptr inbounds float, float* %tmp15164, i64 1
+ %tmp15166 = getelementptr inbounds float, float* %tmp15165, i64 1
+ %tmp15167 = getelementptr inbounds float, float* %tmp15166, i64 1
+ %tmp15168 = getelementptr inbounds float, float* %tmp15167, i64 1
+ %tmp15169 = getelementptr inbounds float, float* %tmp15168, i64 1
+ %tmp15170 = getelementptr inbounds float, float* %tmp15169, i64 1
+ %tmp15171 = getelementptr inbounds float, float* %tmp15170, i64 1
+ %tmp15172 = getelementptr inbounds float, float* %tmp15171, i64 1
+ %tmp15173 = getelementptr inbounds float, float* %tmp15172, i64 1
+ %tmp15174 = getelementptr inbounds float, float* %tmp15173, i64 1
+ %tmp15175 = getelementptr inbounds float, float* %tmp15174, i64 1
+ %tmp15176 = getelementptr inbounds float, float* %tmp15175, i64 1
+ %tmp15177 = getelementptr inbounds float, float* %tmp15176, i64 1
+ %tmp15178 = getelementptr inbounds float, float* %tmp15177, i64 1
+ %tmp15179 = getelementptr inbounds float, float* %tmp15178, i64 1
+ %tmp15180 = getelementptr inbounds float, float* %tmp15179, i64 1
+ %tmp15181 = getelementptr inbounds float, float* %tmp15180, i64 1
+ %tmp15182 = getelementptr inbounds float, float* %tmp15181, i64 1
+ %tmp15183 = getelementptr inbounds float, float* %tmp15182, i64 1
+ %tmp15184 = getelementptr inbounds float, float* %tmp15183, i64 1
+ %tmp15185 = getelementptr inbounds float, float* %tmp15184, i64 1
+ %tmp15186 = getelementptr inbounds float, float* %tmp15185, i64 1
+ %tmp15187 = getelementptr inbounds float, float* %tmp15186, i64 1
+ %tmp15188 = getelementptr inbounds float, float* %tmp15187, i64 1
+ %tmp15189 = getelementptr inbounds float, float* %tmp15188, i64 1
+ %tmp15190 = getelementptr inbounds float, float* %tmp15189, i64 1
+ %tmp15191 = getelementptr inbounds float, float* %tmp15190, i64 1
+ %tmp15192 = getelementptr inbounds float, float* %tmp15191, i64 1
+ %tmp15193 = getelementptr inbounds float, float* %tmp15192, i64 1
+ %tmp15194 = getelementptr inbounds float, float* %tmp15193, i64 1
+ %tmp15195 = getelementptr inbounds float, float* %tmp15194, i64 1
+ %tmp15196 = getelementptr inbounds float, float* %tmp15195, i64 1
+ %tmp15197 = getelementptr inbounds float, float* %tmp15196, i64 1
+ %tmp15198 = getelementptr inbounds float, float* %tmp15197, i64 1
+ %tmp15199 = getelementptr inbounds float, float* %tmp15198, i64 1
+ %tmp15200 = getelementptr inbounds float, float* %tmp15199, i64 1
+ %tmp15201 = getelementptr inbounds float, float* %tmp15200, i64 1
+ %tmp15202 = getelementptr inbounds float, float* %tmp15201, i64 1
+ %tmp15203 = getelementptr inbounds float, float* %tmp15202, i64 1
+ %tmp15204 = getelementptr inbounds float, float* %tmp15203, i64 1
+ %tmp15205 = getelementptr inbounds float, float* %tmp15204, i64 1
+ %tmp15206 = getelementptr inbounds float, float* %tmp15205, i64 1
+ %tmp15207 = getelementptr inbounds float, float* %tmp15206, i64 1
+ %tmp15208 = getelementptr inbounds float, float* %tmp15207, i64 1
+ %tmp15209 = getelementptr inbounds float, float* %tmp15208, i64 1
+ %tmp15210 = getelementptr inbounds float, float* %tmp15209, i64 1
+ %tmp15211 = getelementptr inbounds float, float* %tmp15210, i64 1
+ %tmp15212 = getelementptr inbounds float, float* %tmp15211, i64 1
+ %tmp15213 = getelementptr inbounds float, float* %tmp15212, i64 1
+ %tmp15214 = getelementptr inbounds float, float* %tmp15213, i64 1
+ %tmp15215 = getelementptr inbounds float, float* %tmp15214, i64 1
+ %tmp15216 = getelementptr inbounds float, float* %tmp15215, i64 1
+ %tmp15217 = getelementptr inbounds float, float* %tmp15216, i64 1
+ %tmp15218 = getelementptr inbounds float, float* %tmp15217, i64 1
+ %tmp15219 = getelementptr inbounds float, float* %tmp15218, i64 1
+ %tmp15220 = getelementptr inbounds float, float* %tmp15219, i64 1
+ %tmp15221 = getelementptr inbounds float, float* %tmp15220, i64 1
+ %tmp15222 = getelementptr inbounds float, float* %tmp15221, i64 1
+ %tmp15223 = getelementptr inbounds float, float* %tmp15222, i64 1
+ %tmp15224 = getelementptr inbounds float, float* %tmp15223, i64 1
+ %tmp15225 = getelementptr inbounds float, float* %tmp15224, i64 1
+ %tmp15226 = getelementptr inbounds float, float* %tmp15225, i64 1
+ %tmp15227 = getelementptr inbounds float, float* %tmp15226, i64 1
+ %tmp15228 = getelementptr inbounds float, float* %tmp15227, i64 1
+ %tmp15229 = getelementptr inbounds float, float* %tmp15228, i64 1
+ %tmp15230 = getelementptr inbounds float, float* %tmp15229, i64 1
+ %tmp15231 = getelementptr inbounds float, float* %tmp15230, i64 1
+ %tmp15232 = getelementptr inbounds float, float* %tmp15231, i64 1
+ %tmp15233 = getelementptr inbounds float, float* %tmp15232, i64 1
+ %tmp15234 = getelementptr inbounds float, float* %tmp15233, i64 1
+ %tmp15235 = getelementptr inbounds float, float* %tmp15234, i64 1
+ %tmp15236 = getelementptr inbounds float, float* %tmp15235, i64 1
+ %tmp15237 = getelementptr inbounds float, float* %tmp15236, i64 1
+ %tmp15238 = getelementptr inbounds float, float* %tmp15237, i64 1
+ %tmp15239 = getelementptr inbounds float, float* %tmp15238, i64 1
+ %tmp15240 = getelementptr inbounds float, float* %tmp15239, i64 1
+ %tmp15241 = getelementptr inbounds float, float* %tmp15240, i64 1
+ %tmp15242 = getelementptr inbounds float, float* %tmp15241, i64 1
+ %tmp15243 = getelementptr inbounds float, float* %tmp15242, i64 1
+ %tmp15244 = getelementptr inbounds float, float* %tmp15243, i64 1
+ %tmp15245 = getelementptr inbounds float, float* %tmp15244, i64 1
+ %tmp15246 = getelementptr inbounds float, float* %tmp15245, i64 1
+ %tmp15247 = getelementptr inbounds float, float* %tmp15246, i64 1
+ %tmp15248 = getelementptr inbounds float, float* %tmp15247, i64 1
+ %tmp15249 = getelementptr inbounds float, float* %tmp15248, i64 1
+ %tmp15250 = getelementptr inbounds float, float* %tmp15249, i64 1
+ %tmp15251 = getelementptr inbounds float, float* %tmp15250, i64 1
+ %tmp15252 = getelementptr inbounds float, float* %tmp15251, i64 1
+ %tmp15253 = getelementptr inbounds float, float* %tmp15252, i64 1
+ %tmp15254 = getelementptr inbounds float, float* %tmp15253, i64 1
+ %tmp15255 = getelementptr inbounds float, float* %tmp15254, i64 1
+ %tmp15256 = getelementptr inbounds float, float* %tmp15255, i64 1
+ %tmp15257 = getelementptr inbounds float, float* %tmp15256, i64 1
+ %tmp15258 = getelementptr inbounds float, float* %tmp15257, i64 1
+ %tmp15259 = getelementptr inbounds float, float* %tmp15258, i64 1
+ %tmp15260 = getelementptr inbounds float, float* %tmp15259, i64 1
+ %tmp15261 = getelementptr inbounds float, float* %tmp15260, i64 1
+ %tmp15262 = getelementptr inbounds float, float* %tmp15261, i64 1
+ %tmp15263 = getelementptr inbounds float, float* %tmp15262, i64 1
+ %tmp15264 = getelementptr inbounds float, float* %tmp15263, i64 1
+ %tmp15265 = getelementptr inbounds float, float* %tmp15264, i64 1
+ %tmp15266 = getelementptr inbounds float, float* %tmp15265, i64 1
+ %tmp15267 = getelementptr inbounds float, float* %tmp15266, i64 1
+ %tmp15268 = getelementptr inbounds float, float* %tmp15267, i64 1
+ %tmp15269 = getelementptr inbounds float, float* %tmp15268, i64 1
+ %tmp15270 = getelementptr inbounds float, float* %tmp15269, i64 1
+ %tmp15271 = getelementptr inbounds float, float* %tmp15270, i64 1
+ %tmp15272 = getelementptr inbounds float, float* %tmp15271, i64 1
+ %tmp15273 = getelementptr inbounds float, float* %tmp15272, i64 1
+ %tmp15274 = getelementptr inbounds float, float* %tmp15273, i64 1
+ %tmp15275 = getelementptr inbounds float, float* %tmp15274, i64 1
+ %tmp15276 = getelementptr inbounds float, float* %tmp15275, i64 1
+ %tmp15277 = getelementptr inbounds float, float* %tmp15276, i64 1
+ %tmp15278 = getelementptr inbounds float, float* %tmp15277, i64 1
+ %tmp15279 = getelementptr inbounds float, float* %tmp15278, i64 1
+ %tmp15280 = getelementptr inbounds float, float* %tmp15279, i64 1
+ %tmp15281 = getelementptr inbounds float, float* %tmp15280, i64 1
+ %tmp15282 = getelementptr inbounds float, float* %tmp15281, i64 1
+ %tmp15283 = getelementptr inbounds float, float* %tmp15282, i64 1
+ %tmp15284 = getelementptr inbounds float, float* %tmp15283, i64 1
+ %tmp15285 = getelementptr inbounds float, float* %tmp15284, i64 1
+ %tmp15286 = getelementptr inbounds float, float* %tmp15285, i64 1
+ %tmp15287 = getelementptr inbounds float, float* %tmp15286, i64 1
+ %tmp15288 = getelementptr inbounds float, float* %tmp15287, i64 1
+ %tmp15289 = getelementptr inbounds float, float* %tmp15288, i64 1
+ %tmp15290 = getelementptr inbounds float, float* %tmp15289, i64 1
+ %tmp15291 = getelementptr inbounds float, float* %tmp15290, i64 1
+ %tmp15292 = getelementptr inbounds float, float* %tmp15291, i64 1
+ %tmp15293 = getelementptr inbounds float, float* %tmp15292, i64 1
+ %tmp15294 = getelementptr inbounds float, float* %tmp15293, i64 1
+ %tmp15295 = getelementptr inbounds float, float* %tmp15294, i64 1
+ %tmp15296 = getelementptr inbounds float, float* %tmp15295, i64 1
+ %tmp15297 = getelementptr inbounds float, float* %tmp15296, i64 1
+ %tmp15298 = getelementptr inbounds float, float* %tmp15297, i64 1
+ %tmp15299 = getelementptr inbounds float, float* %tmp15298, i64 1
+ %tmp15300 = getelementptr inbounds float, float* %tmp15299, i64 1
+ %tmp15301 = getelementptr inbounds float, float* %tmp15300, i64 1
+ %tmp15302 = getelementptr inbounds float, float* %tmp15301, i64 1
+ %tmp15303 = getelementptr inbounds float, float* %tmp15302, i64 1
+ %tmp15304 = getelementptr inbounds float, float* %tmp15303, i64 1
+ %tmp15305 = getelementptr inbounds float, float* %tmp15304, i64 1
+ %tmp15306 = getelementptr inbounds float, float* %tmp15305, i64 1
+ %tmp15307 = getelementptr inbounds float, float* %tmp15306, i64 1
+ %tmp15308 = getelementptr inbounds float, float* %tmp15307, i64 1
+ %tmp15309 = getelementptr inbounds float, float* %tmp15308, i64 1
+ %tmp15310 = getelementptr inbounds float, float* %tmp15309, i64 1
+ %tmp15311 = getelementptr inbounds float, float* %tmp15310, i64 1
+ %tmp15312 = getelementptr inbounds float, float* %tmp15311, i64 1
+ %tmp15313 = getelementptr inbounds float, float* %tmp15312, i64 1
+ %tmp15314 = getelementptr inbounds float, float* %tmp15313, i64 1
+ %tmp15315 = getelementptr inbounds float, float* %tmp15314, i64 1
+ %tmp15316 = getelementptr inbounds float, float* %tmp15315, i64 1
+ %tmp15317 = getelementptr inbounds float, float* %tmp15316, i64 1
+ %tmp15318 = getelementptr inbounds float, float* %tmp15317, i64 1
+ %tmp15319 = getelementptr inbounds float, float* %tmp15318, i64 1
+ %tmp15320 = getelementptr inbounds float, float* %tmp15319, i64 1
+ %tmp15321 = getelementptr inbounds float, float* %tmp15320, i64 1
+ %tmp15322 = getelementptr inbounds float, float* %tmp15321, i64 1
+ %tmp15323 = getelementptr inbounds float, float* %tmp15322, i64 1
+ %tmp15324 = getelementptr inbounds float, float* %tmp15323, i64 1
+ %tmp15325 = getelementptr inbounds float, float* %tmp15324, i64 1
+ %tmp15326 = getelementptr inbounds float, float* %tmp15325, i64 1
+ %tmp15327 = getelementptr inbounds float, float* %tmp15326, i64 1
+ %tmp15328 = getelementptr inbounds float, float* %tmp15327, i64 1
+ %tmp15329 = getelementptr inbounds float, float* %tmp15328, i64 1
+ %tmp15330 = getelementptr inbounds float, float* %tmp15329, i64 1
+ %tmp15331 = getelementptr inbounds float, float* %tmp15330, i64 1
+ %tmp15332 = getelementptr inbounds float, float* %tmp15331, i64 1
+ %tmp15333 = getelementptr inbounds float, float* %tmp15332, i64 1
+ %tmp15334 = getelementptr inbounds float, float* %tmp15333, i64 1
+ %tmp15335 = getelementptr inbounds float, float* %tmp15334, i64 1
+ %tmp15336 = getelementptr inbounds float, float* %tmp15335, i64 1
+ %tmp15337 = getelementptr inbounds float, float* %tmp15336, i64 1
+ %tmp15338 = getelementptr inbounds float, float* %tmp15337, i64 1
+ %tmp15339 = getelementptr inbounds float, float* %tmp15338, i64 1
+ %tmp15340 = getelementptr inbounds float, float* %tmp15339, i64 1
+ %tmp15341 = getelementptr inbounds float, float* %tmp15340, i64 1
+ %tmp15342 = getelementptr inbounds float, float* %tmp15341, i64 1
+ %tmp15343 = getelementptr inbounds float, float* %tmp15342, i64 1
+ %tmp15344 = getelementptr inbounds float, float* %tmp15343, i64 1
+ %tmp15345 = getelementptr inbounds float, float* %tmp15344, i64 1
+ %tmp15346 = getelementptr inbounds float, float* %tmp15345, i64 1
+ %tmp15347 = getelementptr inbounds float, float* %tmp15346, i64 1
+ %tmp15348 = getelementptr inbounds float, float* %tmp15347, i64 1
+ %tmp15349 = getelementptr inbounds float, float* %tmp15348, i64 1
+ %tmp15350 = getelementptr inbounds float, float* %tmp15349, i64 1
+ %tmp15351 = getelementptr inbounds float, float* %tmp15350, i64 1
+ %tmp15352 = getelementptr inbounds float, float* %tmp15351, i64 1
+ %tmp15353 = getelementptr inbounds float, float* %tmp15352, i64 1
+ %tmp15354 = getelementptr inbounds float, float* %tmp15353, i64 1
+ %tmp15355 = getelementptr inbounds float, float* %tmp15354, i64 1
+ %tmp15356 = getelementptr inbounds float, float* %tmp15355, i64 1
+ %tmp15357 = getelementptr inbounds float, float* %tmp15356, i64 1
+ %tmp15358 = getelementptr inbounds float, float* %tmp15357, i64 1
+ %tmp15359 = getelementptr inbounds float, float* %tmp15358, i64 1
+ %tmp15360 = getelementptr inbounds float, float* %tmp15359, i64 1
+ %tmp15361 = getelementptr inbounds float, float* %tmp15360, i64 1
+ %tmp15362 = getelementptr inbounds float, float* %tmp15361, i64 1
+ %tmp15363 = getelementptr inbounds float, float* %tmp15362, i64 1
+ %tmp15364 = getelementptr inbounds float, float* %tmp15363, i64 1
+ %tmp15365 = getelementptr inbounds float, float* %tmp15364, i64 1
+ %tmp15366 = getelementptr inbounds float, float* %tmp15365, i64 1
+ %tmp15367 = getelementptr inbounds float, float* %tmp15366, i64 1
+ %tmp15368 = getelementptr inbounds float, float* %tmp15367, i64 1
+ %tmp15369 = getelementptr inbounds float, float* %tmp15368, i64 1
+ %tmp15370 = getelementptr inbounds float, float* %tmp15369, i64 1
+ %tmp15371 = getelementptr inbounds float, float* %tmp15370, i64 1
+ %tmp15372 = getelementptr inbounds float, float* %tmp15371, i64 1
+ %tmp15373 = getelementptr inbounds float, float* %tmp15372, i64 1
+ %tmp15374 = getelementptr inbounds float, float* %tmp15373, i64 1
+ %tmp15375 = getelementptr inbounds float, float* %tmp15374, i64 1
+ %tmp15376 = getelementptr inbounds float, float* %tmp15375, i64 1
+ %tmp15377 = getelementptr inbounds float, float* %tmp15376, i64 1
+ %tmp15378 = getelementptr inbounds float, float* %tmp15377, i64 1
+ %tmp15379 = getelementptr inbounds float, float* %tmp15378, i64 1
+ %tmp15380 = getelementptr inbounds float, float* %tmp15379, i64 1
+ %tmp15381 = getelementptr inbounds float, float* %tmp15380, i64 1
+ %tmp15382 = getelementptr inbounds float, float* %tmp15381, i64 1
+ %tmp15383 = getelementptr inbounds float, float* %tmp15382, i64 1
+ %tmp15384 = getelementptr inbounds float, float* %tmp15383, i64 1
+ %tmp15385 = getelementptr inbounds float, float* %tmp15384, i64 1
+ %tmp15386 = getelementptr inbounds float, float* %tmp15385, i64 1
+ %tmp15387 = getelementptr inbounds float, float* %tmp15386, i64 1
+ %tmp15388 = getelementptr inbounds float, float* %tmp15387, i64 1
+ %tmp15389 = getelementptr inbounds float, float* %tmp15388, i64 1
+ %tmp15390 = getelementptr inbounds float, float* %tmp15389, i64 1
+ %tmp15391 = getelementptr inbounds float, float* %tmp15390, i64 1
+ %tmp15392 = getelementptr inbounds float, float* %tmp15391, i64 1
+ %tmp15393 = getelementptr inbounds float, float* %tmp15392, i64 1
+ %tmp15394 = getelementptr inbounds float, float* %tmp15393, i64 1
+ %tmp15395 = getelementptr inbounds float, float* %tmp15394, i64 1
+ %tmp15396 = getelementptr inbounds float, float* %tmp15395, i64 1
+ %tmp15397 = getelementptr inbounds float, float* %tmp15396, i64 1
+ %tmp15398 = getelementptr inbounds float, float* %tmp15397, i64 1
+ %tmp15399 = getelementptr inbounds float, float* %tmp15398, i64 1
+ %tmp15400 = getelementptr inbounds float, float* %tmp15399, i64 1
+ %tmp15401 = getelementptr inbounds float, float* %tmp15400, i64 1
+ %tmp15402 = getelementptr inbounds float, float* %tmp15401, i64 1
+ %tmp15403 = getelementptr inbounds float, float* %tmp15402, i64 1
+ %tmp15404 = getelementptr inbounds float, float* %tmp15403, i64 1
+ %tmp15405 = getelementptr inbounds float, float* %tmp15404, i64 1
+ %tmp15406 = getelementptr inbounds float, float* %tmp15405, i64 1
+ %tmp15407 = getelementptr inbounds float, float* %tmp15406, i64 1
+ %tmp15408 = getelementptr inbounds float, float* %tmp15407, i64 1
+ %tmp15409 = getelementptr inbounds float, float* %tmp15408, i64 1
+ %tmp15410 = getelementptr inbounds float, float* %tmp15409, i64 1
+ %tmp15411 = getelementptr inbounds float, float* %tmp15410, i64 1
+ %tmp15412 = getelementptr inbounds float, float* %tmp15411, i64 1
+ %tmp15413 = getelementptr inbounds float, float* %tmp15412, i64 1
+ %tmp15414 = getelementptr inbounds float, float* %tmp15413, i64 1
+ %tmp15415 = getelementptr inbounds float, float* %tmp15414, i64 1
+ %tmp15416 = getelementptr inbounds float, float* %tmp15415, i64 1
+ %tmp15417 = getelementptr inbounds float, float* %tmp15416, i64 1
+ %tmp15418 = getelementptr inbounds float, float* %tmp15417, i64 1
+ %tmp15419 = getelementptr inbounds float, float* %tmp15418, i64 1
+ %tmp15420 = getelementptr inbounds float, float* %tmp15419, i64 1
+ %tmp15421 = getelementptr inbounds float, float* %tmp15420, i64 1
+ %tmp15422 = getelementptr inbounds float, float* %tmp15421, i64 1
+ %tmp15423 = getelementptr inbounds float, float* %tmp15422, i64 1
+ %tmp15424 = getelementptr inbounds float, float* %tmp15423, i64 1
+ %tmp15425 = getelementptr inbounds float, float* %tmp15424, i64 1
+ %tmp15426 = getelementptr inbounds float, float* %tmp15425, i64 1
+ %tmp15427 = getelementptr inbounds float, float* %tmp15426, i64 1
+ %tmp15428 = getelementptr inbounds float, float* %tmp15427, i64 1
+ %tmp15429 = getelementptr inbounds float, float* %tmp15428, i64 1
+ %tmp15430 = getelementptr inbounds float, float* %tmp15429, i64 1
+ %tmp15431 = getelementptr inbounds float, float* %tmp15430, i64 1
+ %tmp15432 = getelementptr inbounds float, float* %tmp15431, i64 1
+ %tmp15433 = getelementptr inbounds float, float* %tmp15432, i64 1
+ %tmp15434 = getelementptr inbounds float, float* %tmp15433, i64 1
+ %tmp15435 = getelementptr inbounds float, float* %tmp15434, i64 1
+ %tmp15436 = getelementptr inbounds float, float* %tmp15435, i64 1
+ %tmp15437 = getelementptr inbounds float, float* %tmp15436, i64 1
+ %tmp15438 = getelementptr inbounds float, float* %tmp15437, i64 1
+ %tmp15439 = getelementptr inbounds float, float* %tmp15438, i64 1
+ %tmp15440 = getelementptr inbounds float, float* %tmp15439, i64 1
+ %tmp15441 = getelementptr inbounds float, float* %tmp15440, i64 1
+ %tmp15442 = getelementptr inbounds float, float* %tmp15441, i64 1
+ %tmp15443 = getelementptr inbounds float, float* %tmp15442, i64 1
+ %tmp15444 = getelementptr inbounds float, float* %tmp15443, i64 1
+ %tmp15445 = getelementptr inbounds float, float* %tmp15444, i64 1
+ %tmp15446 = getelementptr inbounds float, float* %tmp15445, i64 1
+ %tmp15447 = getelementptr inbounds float, float* %tmp15446, i64 1
+ %tmp15448 = getelementptr inbounds float, float* %tmp15447, i64 1
+ %tmp15449 = getelementptr inbounds float, float* %tmp15448, i64 1
+ %tmp15450 = getelementptr inbounds float, float* %tmp15449, i64 1
+ %tmp15451 = getelementptr inbounds float, float* %tmp15450, i64 1
+ %tmp15452 = getelementptr inbounds float, float* %tmp15451, i64 1
+ %tmp15453 = getelementptr inbounds float, float* %tmp15452, i64 1
+ %tmp15454 = getelementptr inbounds float, float* %tmp15453, i64 1
+ %tmp15455 = getelementptr inbounds float, float* %tmp15454, i64 1
+ %tmp15456 = getelementptr inbounds float, float* %tmp15455, i64 1
+ %tmp15457 = getelementptr inbounds float, float* %tmp15456, i64 1
+ %tmp15458 = getelementptr inbounds float, float* %tmp15457, i64 1
+ %tmp15459 = getelementptr inbounds float, float* %tmp15458, i64 1
+ %tmp15460 = getelementptr inbounds float, float* %tmp15459, i64 1
+ %tmp15461 = getelementptr inbounds float, float* %tmp15460, i64 1
+ %tmp15462 = getelementptr inbounds float, float* %tmp15461, i64 1
+ %tmp15463 = getelementptr inbounds float, float* %tmp15462, i64 1
+ %tmp15464 = getelementptr inbounds float, float* %tmp15463, i64 1
+ %tmp15465 = getelementptr inbounds float, float* %tmp15464, i64 1
+ %tmp15466 = getelementptr inbounds float, float* %tmp15465, i64 1
+ %tmp15467 = getelementptr inbounds float, float* %tmp15466, i64 1
+ %tmp15468 = getelementptr inbounds float, float* %tmp15467, i64 1
+ %tmp15469 = getelementptr inbounds float, float* %tmp15468, i64 1
+ %tmp15470 = getelementptr inbounds float, float* %tmp15469, i64 1
+ %tmp15471 = getelementptr inbounds float, float* %tmp15470, i64 1
+ %tmp15472 = getelementptr inbounds float, float* %tmp15471, i64 1
+ %tmp15473 = getelementptr inbounds float, float* %tmp15472, i64 1
+ %tmp15474 = getelementptr inbounds float, float* %tmp15473, i64 1
+ %tmp15475 = getelementptr inbounds float, float* %tmp15474, i64 1
+ %tmp15476 = getelementptr inbounds float, float* %tmp15475, i64 1
+ %tmp15477 = getelementptr inbounds float, float* %tmp15476, i64 1
+ %tmp15478 = getelementptr inbounds float, float* %tmp15477, i64 1
+ %tmp15479 = getelementptr inbounds float, float* %tmp15478, i64 1
+ %tmp15480 = getelementptr inbounds float, float* %tmp15479, i64 1
+ %tmp15481 = getelementptr inbounds float, float* %tmp15480, i64 1
+ %tmp15482 = getelementptr inbounds float, float* %tmp15481, i64 1
+ %tmp15483 = getelementptr inbounds float, float* %tmp15482, i64 1
+ %tmp15484 = getelementptr inbounds float, float* %tmp15483, i64 1
+ %tmp15485 = getelementptr inbounds float, float* %tmp15484, i64 1
+ %tmp15486 = getelementptr inbounds float, float* %tmp15485, i64 1
+ %tmp15487 = getelementptr inbounds float, float* %tmp15486, i64 1
+ %tmp15488 = getelementptr inbounds float, float* %tmp15487, i64 1
+ %tmp15489 = getelementptr inbounds float, float* %tmp15488, i64 1
+ %tmp15490 = getelementptr inbounds float, float* %tmp15489, i64 1
+ %tmp15491 = getelementptr inbounds float, float* %tmp15490, i64 1
+ %tmp15492 = getelementptr inbounds float, float* %tmp15491, i64 1
+ %tmp15493 = getelementptr inbounds float, float* %tmp15492, i64 1
+ %tmp15494 = getelementptr inbounds float, float* %tmp15493, i64 1
+ %tmp15495 = getelementptr inbounds float, float* %tmp15494, i64 1
+ %tmp15496 = getelementptr inbounds float, float* %tmp15495, i64 1
+ %tmp15497 = getelementptr inbounds float, float* %tmp15496, i64 1
+ %tmp15498 = getelementptr inbounds float, float* %tmp15497, i64 1
+ %tmp15499 = getelementptr inbounds float, float* %tmp15498, i64 1
+ %tmp15500 = getelementptr inbounds float, float* %tmp15499, i64 1
+ %tmp15501 = getelementptr inbounds float, float* %tmp15500, i64 1
+ %tmp15502 = getelementptr inbounds float, float* %tmp15501, i64 1
+ %tmp15503 = getelementptr inbounds float, float* %tmp15502, i64 1
+ %tmp15504 = getelementptr inbounds float, float* %tmp15503, i64 1
+ %tmp15505 = getelementptr inbounds float, float* %tmp15504, i64 1
+ %tmp15506 = getelementptr inbounds float, float* %tmp15505, i64 1
+ %tmp15507 = getelementptr inbounds float, float* %tmp15506, i64 1
+ %tmp15508 = getelementptr inbounds float, float* %tmp15507, i64 1
+ %tmp15509 = getelementptr inbounds float, float* %tmp15508, i64 1
+ %tmp15510 = getelementptr inbounds float, float* %tmp15509, i64 1
+ %tmp15511 = getelementptr inbounds float, float* %tmp15510, i64 1
+ %tmp15512 = getelementptr inbounds float, float* %tmp15511, i64 1
+ %tmp15513 = getelementptr inbounds float, float* %tmp15512, i64 1
+ %tmp15514 = getelementptr inbounds float, float* %tmp15513, i64 1
+ %tmp15515 = getelementptr inbounds float, float* %tmp15514, i64 1
+ %tmp15516 = getelementptr inbounds float, float* %tmp15515, i64 1
+ %tmp15517 = getelementptr inbounds float, float* %tmp15516, i64 1
+ %tmp15518 = getelementptr inbounds float, float* %tmp15517, i64 1
+ %tmp15519 = getelementptr inbounds float, float* %tmp15518, i64 1
+ %tmp15520 = getelementptr inbounds float, float* %tmp15519, i64 1
+ %tmp15521 = getelementptr inbounds float, float* %tmp15520, i64 1
+ %tmp15522 = getelementptr inbounds float, float* %tmp15521, i64 1
+ %tmp15523 = getelementptr inbounds float, float* %tmp15522, i64 1
+ %tmp15524 = getelementptr inbounds float, float* %tmp15523, i64 1
+ %tmp15525 = getelementptr inbounds float, float* %tmp15524, i64 1
+ %tmp15526 = getelementptr inbounds float, float* %tmp15525, i64 1
+ %tmp15527 = getelementptr inbounds float, float* %tmp15526, i64 1
+ %tmp15528 = getelementptr inbounds float, float* %tmp15527, i64 1
+ %tmp15529 = getelementptr inbounds float, float* %tmp15528, i64 1
+ %tmp15530 = getelementptr inbounds float, float* %tmp15529, i64 1
+ %tmp15531 = getelementptr inbounds float, float* %tmp15530, i64 1
+ %tmp15532 = getelementptr inbounds float, float* %tmp15531, i64 1
+ %tmp15533 = getelementptr inbounds float, float* %tmp15532, i64 1
+ %tmp15534 = getelementptr inbounds float, float* %tmp15533, i64 1
+ %tmp15535 = getelementptr inbounds float, float* %tmp15534, i64 1
+ %tmp15536 = getelementptr inbounds float, float* %tmp15535, i64 1
+ %tmp15537 = getelementptr inbounds float, float* %tmp15536, i64 1
+ %tmp15538 = getelementptr inbounds float, float* %tmp15537, i64 1
+ %tmp15539 = getelementptr inbounds float, float* %tmp15538, i64 1
+ %tmp15540 = getelementptr inbounds float, float* %tmp15539, i64 1
+ %tmp15541 = getelementptr inbounds float, float* %tmp15540, i64 1
+ %tmp15542 = getelementptr inbounds float, float* %tmp15541, i64 1
+ %tmp15543 = getelementptr inbounds float, float* %tmp15542, i64 1
+ %tmp15544 = getelementptr inbounds float, float* %tmp15543, i64 1
+ %tmp15545 = getelementptr inbounds float, float* %tmp15544, i64 1
+ %tmp15546 = getelementptr inbounds float, float* %tmp15545, i64 1
+ %tmp15547 = getelementptr inbounds float, float* %tmp15546, i64 1
+ %tmp15548 = getelementptr inbounds float, float* %tmp15547, i64 1
+ %tmp15549 = getelementptr inbounds float, float* %tmp15548, i64 1
+ %tmp15550 = getelementptr inbounds float, float* %tmp15549, i64 1
+ %tmp15551 = getelementptr inbounds float, float* %tmp15550, i64 1
+ %tmp15552 = getelementptr inbounds float, float* %tmp15551, i64 1
+ %tmp15553 = getelementptr inbounds float, float* %tmp15552, i64 1
+ %tmp15554 = getelementptr inbounds float, float* %tmp15553, i64 1
+ %tmp15555 = getelementptr inbounds float, float* %tmp15554, i64 1
+ %tmp15556 = getelementptr inbounds float, float* %tmp15555, i64 1
+ %tmp15557 = getelementptr inbounds float, float* %tmp15556, i64 1
+ %tmp15558 = getelementptr inbounds float, float* %tmp15557, i64 1
+ %tmp15559 = getelementptr inbounds float, float* %tmp15558, i64 1
+ %tmp15560 = getelementptr inbounds float, float* %tmp15559, i64 1
+ %tmp15561 = getelementptr inbounds float, float* %tmp15560, i64 1
+ %tmp15562 = getelementptr inbounds float, float* %tmp15561, i64 1
+ %tmp15563 = getelementptr inbounds float, float* %tmp15562, i64 1
+ %tmp15564 = getelementptr inbounds float, float* %tmp15563, i64 1
+ %tmp15565 = getelementptr inbounds float, float* %tmp15564, i64 1
+ %tmp15566 = getelementptr inbounds float, float* %tmp15565, i64 1
+ %tmp15567 = getelementptr inbounds float, float* %tmp15566, i64 1
+ %tmp15568 = getelementptr inbounds float, float* %tmp15567, i64 1
+ %tmp15569 = getelementptr inbounds float, float* %tmp15568, i64 1
+ %tmp15570 = getelementptr inbounds float, float* %tmp15569, i64 1
+ %tmp15571 = getelementptr inbounds float, float* %tmp15570, i64 1
+ %tmp15572 = getelementptr inbounds float, float* %tmp15571, i64 1
+ %tmp15573 = getelementptr inbounds float, float* %tmp15572, i64 1
+ %tmp15574 = getelementptr inbounds float, float* %tmp15573, i64 1
+ %tmp15575 = getelementptr inbounds float, float* %tmp15574, i64 1
+ %tmp15576 = getelementptr inbounds float, float* %tmp15575, i64 1
+ %tmp15577 = getelementptr inbounds float, float* %tmp15576, i64 1
+ %tmp15578 = getelementptr inbounds float, float* %tmp15577, i64 1
+ %tmp15579 = getelementptr inbounds float, float* %tmp15578, i64 1
+ %tmp15580 = getelementptr inbounds float, float* %tmp15579, i64 1
+ %tmp15581 = getelementptr inbounds float, float* %tmp15580, i64 1
+ %tmp15582 = getelementptr inbounds float, float* %tmp15581, i64 1
+ %tmp15583 = getelementptr inbounds float, float* %tmp15582, i64 1
+ %tmp15584 = getelementptr inbounds float, float* %tmp15583, i64 1
+ %tmp15585 = getelementptr inbounds float, float* %tmp15584, i64 1
+ %tmp15586 = getelementptr inbounds float, float* %tmp15585, i64 1
+ %tmp15587 = getelementptr inbounds float, float* %tmp15586, i64 1
+ %tmp15588 = getelementptr inbounds float, float* %tmp15587, i64 1
+ %tmp15589 = getelementptr inbounds float, float* %tmp15588, i64 1
+ %tmp15590 = getelementptr inbounds float, float* %tmp15589, i64 1
+ %tmp15591 = getelementptr inbounds float, float* %tmp15590, i64 1
+ %tmp15592 = getelementptr inbounds float, float* %tmp15591, i64 1
+ %tmp15593 = getelementptr inbounds float, float* %tmp15592, i64 1
+ %tmp15594 = getelementptr inbounds float, float* %tmp15593, i64 1
+ %tmp15595 = getelementptr inbounds float, float* %tmp15594, i64 1
+ %tmp15596 = getelementptr inbounds float, float* %tmp15595, i64 1
+ %tmp15597 = getelementptr inbounds float, float* %tmp15596, i64 1
+ %tmp15598 = getelementptr inbounds float, float* %tmp15597, i64 1
+ %tmp15599 = getelementptr inbounds float, float* %tmp15598, i64 1
+ %tmp15600 = getelementptr inbounds float, float* %tmp15599, i64 1
+ %tmp15601 = getelementptr inbounds float, float* %tmp15600, i64 1
+ %tmp15602 = getelementptr inbounds float, float* %tmp15601, i64 1
+ %tmp15603 = getelementptr inbounds float, float* %tmp15602, i64 1
+ %tmp15604 = getelementptr inbounds float, float* %tmp15603, i64 1
+ %tmp15605 = getelementptr inbounds float, float* %tmp15604, i64 1
+ %tmp15606 = getelementptr inbounds float, float* %tmp15605, i64 1
+ %tmp15607 = getelementptr inbounds float, float* %tmp15606, i64 1
+ %tmp15608 = getelementptr inbounds float, float* %tmp15607, i64 1
+ %tmp15609 = getelementptr inbounds float, float* %tmp15608, i64 1
+ %tmp15610 = getelementptr inbounds float, float* %tmp15609, i64 1
+ %tmp15611 = getelementptr inbounds float, float* %tmp15610, i64 1
+ %tmp15612 = getelementptr inbounds float, float* %tmp15611, i64 1
+ %tmp15613 = getelementptr inbounds float, float* %tmp15612, i64 1
+ %tmp15614 = getelementptr inbounds float, float* %tmp15613, i64 1
+ %tmp15615 = getelementptr inbounds float, float* %tmp15614, i64 1
+ %tmp15616 = getelementptr inbounds float, float* %tmp15615, i64 1
+ %tmp15617 = getelementptr inbounds float, float* %tmp15616, i64 1
+ %tmp15618 = getelementptr inbounds float, float* %tmp15617, i64 1
+ %tmp15619 = getelementptr inbounds float, float* %tmp15618, i64 1
+ %tmp15620 = getelementptr inbounds float, float* %tmp15619, i64 1
+ %tmp15621 = getelementptr inbounds float, float* %tmp15620, i64 1
+ %tmp15622 = getelementptr inbounds float, float* %tmp15621, i64 1
+ %tmp15623 = getelementptr inbounds float, float* %tmp15622, i64 1
+ %tmp15624 = getelementptr inbounds float, float* %tmp15623, i64 1
+ %tmp15625 = getelementptr inbounds float, float* %tmp15624, i64 1
+ %tmp15626 = getelementptr inbounds float, float* %tmp15625, i64 1
+ %tmp15627 = getelementptr inbounds float, float* %tmp15626, i64 1
+ %tmp15628 = getelementptr inbounds float, float* %tmp15627, i64 1
+ %tmp15629 = getelementptr inbounds float, float* %tmp15628, i64 1
+ %tmp15630 = getelementptr inbounds float, float* %tmp15629, i64 1
+ %tmp15631 = getelementptr inbounds float, float* %tmp15630, i64 1
+ %tmp15632 = getelementptr inbounds float, float* %tmp15631, i64 1
+ %tmp15633 = getelementptr inbounds float, float* %tmp15632, i64 1
+ %tmp15634 = getelementptr inbounds float, float* %tmp15633, i64 1
+ %tmp15635 = getelementptr inbounds float, float* %tmp15634, i64 1
+ %tmp15636 = getelementptr inbounds float, float* %tmp15635, i64 1
+ %tmp15637 = getelementptr inbounds float, float* %tmp15636, i64 1
+ %tmp15638 = getelementptr inbounds float, float* %tmp15637, i64 1
+ %tmp15639 = getelementptr inbounds float, float* %tmp15638, i64 1
+ %tmp15640 = getelementptr inbounds float, float* %tmp15639, i64 1
+ %tmp15641 = getelementptr inbounds float, float* %tmp15640, i64 1
+ %tmp15642 = getelementptr inbounds float, float* %tmp15641, i64 1
+ %tmp15643 = getelementptr inbounds float, float* %tmp15642, i64 1
+ %tmp15644 = getelementptr inbounds float, float* %tmp15643, i64 1
+ %tmp15645 = getelementptr inbounds float, float* %tmp15644, i64 1
+ %tmp15646 = getelementptr inbounds float, float* %tmp15645, i64 1
+ %tmp15647 = getelementptr inbounds float, float* %tmp15646, i64 1
+ %tmp15648 = getelementptr inbounds float, float* %tmp15647, i64 1
+ %tmp15649 = getelementptr inbounds float, float* %tmp15648, i64 1
+ %tmp15650 = getelementptr inbounds float, float* %tmp15649, i64 1
+ %tmp15651 = getelementptr inbounds float, float* %tmp15650, i64 1
+ %tmp15652 = getelementptr inbounds float, float* %tmp15651, i64 1
+ %tmp15653 = getelementptr inbounds float, float* %tmp15652, i64 1
+ %tmp15654 = getelementptr inbounds float, float* %tmp15653, i64 1
+ %tmp15655 = getelementptr inbounds float, float* %tmp15654, i64 1
+ %tmp15656 = getelementptr inbounds float, float* %tmp15655, i64 1
+ %tmp15657 = getelementptr inbounds float, float* %tmp15656, i64 1
+ %tmp15658 = getelementptr inbounds float, float* %tmp15657, i64 1
+ %tmp15659 = getelementptr inbounds float, float* %tmp15658, i64 1
+ %tmp15660 = getelementptr inbounds float, float* %tmp15659, i64 1
+ %tmp15661 = getelementptr inbounds float, float* %tmp15660, i64 1
+ %tmp15662 = getelementptr inbounds float, float* %tmp15661, i64 1
+ %tmp15663 = getelementptr inbounds float, float* %tmp15662, i64 1
+ %tmp15664 = getelementptr inbounds float, float* %tmp15663, i64 1
+ %tmp15665 = getelementptr inbounds float, float* %tmp15664, i64 1
+ %tmp15666 = getelementptr inbounds float, float* %tmp15665, i64 1
+ %tmp15667 = getelementptr inbounds float, float* %tmp15666, i64 1
+ %tmp15668 = getelementptr inbounds float, float* %tmp15667, i64 1
+ %tmp15669 = getelementptr inbounds float, float* %tmp15668, i64 1
+ %tmp15670 = getelementptr inbounds float, float* %tmp15669, i64 1
+ %tmp15671 = getelementptr inbounds float, float* %tmp15670, i64 1
+ %tmp15672 = getelementptr inbounds float, float* %tmp15671, i64 1
+ %tmp15673 = getelementptr inbounds float, float* %tmp15672, i64 1
+ %tmp15674 = getelementptr inbounds float, float* %tmp15673, i64 1
+ %tmp15675 = getelementptr inbounds float, float* %tmp15674, i64 1
+ %tmp15676 = getelementptr inbounds float, float* %tmp15675, i64 1
+ %tmp15677 = getelementptr inbounds float, float* %tmp15676, i64 1
+ %tmp15678 = getelementptr inbounds float, float* %tmp15677, i64 1
+ %tmp15679 = getelementptr inbounds float, float* %tmp15678, i64 1
+ %tmp15680 = getelementptr inbounds float, float* %tmp15679, i64 1
+ %tmp15681 = getelementptr inbounds float, float* %tmp15680, i64 1
+ %tmp15682 = getelementptr inbounds float, float* %tmp15681, i64 1
+ %tmp15683 = getelementptr inbounds float, float* %tmp15682, i64 1
+ %tmp15684 = getelementptr inbounds float, float* %tmp15683, i64 1
+ %tmp15685 = getelementptr inbounds float, float* %tmp15684, i64 1
+ %tmp15686 = getelementptr inbounds float, float* %tmp15685, i64 1
+ %tmp15687 = getelementptr inbounds float, float* %tmp15686, i64 1
+ %tmp15688 = getelementptr inbounds float, float* %tmp15687, i64 1
+ %tmp15689 = getelementptr inbounds float, float* %tmp15688, i64 1
+ %tmp15690 = getelementptr inbounds float, float* %tmp15689, i64 1
+ %tmp15691 = getelementptr inbounds float, float* %tmp15690, i64 1
+ %tmp15692 = getelementptr inbounds float, float* %tmp15691, i64 1
+ %tmp15693 = getelementptr inbounds float, float* %tmp15692, i64 1
+ %tmp15694 = getelementptr inbounds float, float* %tmp15693, i64 1
+ %tmp15695 = getelementptr inbounds float, float* %tmp15694, i64 1
+ %tmp15696 = getelementptr inbounds float, float* %tmp15695, i64 1
+ %tmp15697 = getelementptr inbounds float, float* %tmp15696, i64 1
+ %tmp15698 = getelementptr inbounds float, float* %tmp15697, i64 1
+ %tmp15699 = getelementptr inbounds float, float* %tmp15698, i64 1
+ %tmp15700 = getelementptr inbounds float, float* %tmp15699, i64 1
+ %tmp15701 = getelementptr inbounds float, float* %tmp15700, i64 1
+ %tmp15702 = getelementptr inbounds float, float* %tmp15701, i64 1
+ %tmp15703 = getelementptr inbounds float, float* %tmp15702, i64 1
+ %tmp15704 = getelementptr inbounds float, float* %tmp15703, i64 1
+ %tmp15705 = getelementptr inbounds float, float* %tmp15704, i64 1
+ %tmp15706 = getelementptr inbounds float, float* %tmp15705, i64 1
+ %tmp15707 = getelementptr inbounds float, float* %tmp15706, i64 1
+ %tmp15708 = getelementptr inbounds float, float* %tmp15707, i64 1
+ %tmp15709 = getelementptr inbounds float, float* %tmp15708, i64 1
+ %tmp15710 = getelementptr inbounds float, float* %tmp15709, i64 1
+ %tmp15711 = getelementptr inbounds float, float* %tmp15710, i64 1
+ %tmp15712 = getelementptr inbounds float, float* %tmp15711, i64 1
+ %tmp15713 = getelementptr inbounds float, float* %tmp15712, i64 1
+ %tmp15714 = getelementptr inbounds float, float* %tmp15713, i64 1
+ %tmp15715 = getelementptr inbounds float, float* %tmp15714, i64 1
+ %tmp15716 = getelementptr inbounds float, float* %tmp15715, i64 1
+ %tmp15717 = getelementptr inbounds float, float* %tmp15716, i64 1
+ %tmp15718 = getelementptr inbounds float, float* %tmp15717, i64 1
+ %tmp15719 = getelementptr inbounds float, float* %tmp15718, i64 1
+ %tmp15720 = getelementptr inbounds float, float* %tmp15719, i64 1
+ %tmp15721 = getelementptr inbounds float, float* %tmp15720, i64 1
+ %tmp15722 = getelementptr inbounds float, float* %tmp15721, i64 1
+ %tmp15723 = getelementptr inbounds float, float* %tmp15722, i64 1
+ %tmp15724 = getelementptr inbounds float, float* %tmp15723, i64 1
+ %tmp15725 = getelementptr inbounds float, float* %tmp15724, i64 1
+ %tmp15726 = getelementptr inbounds float, float* %tmp15725, i64 1
+ %tmp15727 = getelementptr inbounds float, float* %tmp15726, i64 1
+ %tmp15728 = getelementptr inbounds float, float* %tmp15727, i64 1
+ %tmp15729 = getelementptr inbounds float, float* %tmp15728, i64 1
+ %tmp15730 = getelementptr inbounds float, float* %tmp15729, i64 1
+ %tmp15731 = getelementptr inbounds float, float* %tmp15730, i64 1
+ %tmp15732 = getelementptr inbounds float, float* %tmp15731, i64 1
+ %tmp15733 = getelementptr inbounds float, float* %tmp15732, i64 1
+ %tmp15734 = getelementptr inbounds float, float* %tmp15733, i64 1
+ %tmp15735 = getelementptr inbounds float, float* %tmp15734, i64 1
+ %tmp15736 = getelementptr inbounds float, float* %tmp15735, i64 1
+ %tmp15737 = getelementptr inbounds float, float* %tmp15736, i64 1
+ %tmp15738 = getelementptr inbounds float, float* %tmp15737, i64 1
+ %tmp15739 = getelementptr inbounds float, float* %tmp15738, i64 1
+ %tmp15740 = getelementptr inbounds float, float* %tmp15739, i64 1
+ %tmp15741 = getelementptr inbounds float, float* %tmp15740, i64 1
+ %tmp15742 = getelementptr inbounds float, float* %tmp15741, i64 1
+ %tmp15743 = getelementptr inbounds float, float* %tmp15742, i64 1
+ %tmp15744 = getelementptr inbounds float, float* %tmp15743, i64 1
+ %tmp15745 = getelementptr inbounds float, float* %tmp15744, i64 1
+ %tmp15746 = getelementptr inbounds float, float* %tmp15745, i64 1
+ %tmp15747 = getelementptr inbounds float, float* %tmp15746, i64 1
+ %tmp15748 = getelementptr inbounds float, float* %tmp15747, i64 1
+ %tmp15749 = getelementptr inbounds float, float* %tmp15748, i64 1
+ %tmp15750 = getelementptr inbounds float, float* %tmp15749, i64 1
+ %tmp15751 = getelementptr inbounds float, float* %tmp15750, i64 1
+ %tmp15752 = getelementptr inbounds float, float* %tmp15751, i64 1
+ %tmp15753 = getelementptr inbounds float, float* %tmp15752, i64 1
+ %tmp15754 = getelementptr inbounds float, float* %tmp15753, i64 1
+ %tmp15755 = getelementptr inbounds float, float* %tmp15754, i64 1
+ %tmp15756 = getelementptr inbounds float, float* %tmp15755, i64 1
+ %tmp15757 = getelementptr inbounds float, float* %tmp15756, i64 1
+ %tmp15758 = getelementptr inbounds float, float* %tmp15757, i64 1
+ %tmp15759 = getelementptr inbounds float, float* %tmp15758, i64 1
+ %tmp15760 = getelementptr inbounds float, float* %tmp15759, i64 1
+ %tmp15761 = getelementptr inbounds float, float* %tmp15760, i64 1
+ %tmp15762 = getelementptr inbounds float, float* %tmp15761, i64 1
+ %tmp15763 = getelementptr inbounds float, float* %tmp15762, i64 1
+ %tmp15764 = getelementptr inbounds float, float* %tmp15763, i64 1
+ %tmp15765 = getelementptr inbounds float, float* %tmp15764, i64 1
+ %tmp15766 = getelementptr inbounds float, float* %tmp15765, i64 1
+ %tmp15767 = getelementptr inbounds float, float* %tmp15766, i64 1
+ %tmp15768 = getelementptr inbounds float, float* %tmp15767, i64 1
+ %tmp15769 = getelementptr inbounds float, float* %tmp15768, i64 1
+ %tmp15770 = getelementptr inbounds float, float* %tmp15769, i64 1
+ %tmp15771 = getelementptr inbounds float, float* %tmp15770, i64 1
+ %tmp15772 = getelementptr inbounds float, float* %tmp15771, i64 1
+ %tmp15773 = getelementptr inbounds float, float* %tmp15772, i64 1
+ %tmp15774 = getelementptr inbounds float, float* %tmp15773, i64 1
+ %tmp15775 = getelementptr inbounds float, float* %tmp15774, i64 1
+ %tmp15776 = getelementptr inbounds float, float* %tmp15775, i64 1
+ %tmp15777 = getelementptr inbounds float, float* %tmp15776, i64 1
+ %tmp15778 = getelementptr inbounds float, float* %tmp15777, i64 1
+ %tmp15779 = getelementptr inbounds float, float* %tmp15778, i64 1
+ %tmp15780 = getelementptr inbounds float, float* %tmp15779, i64 1
+ %tmp15781 = getelementptr inbounds float, float* %tmp15780, i64 1
+ %tmp15782 = getelementptr inbounds float, float* %tmp15781, i64 1
+ %tmp15783 = getelementptr inbounds float, float* %tmp15782, i64 1
+ %tmp15784 = getelementptr inbounds float, float* %tmp15783, i64 1
+ %tmp15785 = getelementptr inbounds float, float* %tmp15784, i64 1
+ %tmp15786 = getelementptr inbounds float, float* %tmp15785, i64 1
+ %tmp15787 = getelementptr inbounds float, float* %tmp15786, i64 1
+ %tmp15788 = getelementptr inbounds float, float* %tmp15787, i64 1
+ %tmp15789 = getelementptr inbounds float, float* %tmp15788, i64 1
+ %tmp15790 = getelementptr inbounds float, float* %tmp15789, i64 1
+ %tmp15791 = getelementptr inbounds float, float* %tmp15790, i64 1
+ %tmp15792 = getelementptr inbounds float, float* %tmp15791, i64 1
+ %tmp15793 = getelementptr inbounds float, float* %tmp15792, i64 1
+ %tmp15794 = getelementptr inbounds float, float* %tmp15793, i64 1
+ %tmp15795 = getelementptr inbounds float, float* %tmp15794, i64 1
+ %tmp15796 = getelementptr inbounds float, float* %tmp15795, i64 1
+ %tmp15797 = getelementptr inbounds float, float* %tmp15796, i64 1
+ %tmp15798 = getelementptr inbounds float, float* %tmp15797, i64 1
+ %tmp15799 = getelementptr inbounds float, float* %tmp15798, i64 1
+ %tmp15800 = getelementptr inbounds float, float* %tmp15799, i64 1
+ %tmp15801 = getelementptr inbounds float, float* %tmp15800, i64 1
+ %tmp15802 = getelementptr inbounds float, float* %tmp15801, i64 1
+ %tmp15803 = getelementptr inbounds float, float* %tmp15802, i64 1
+ %tmp15804 = getelementptr inbounds float, float* %tmp15803, i64 1
+ %tmp15805 = getelementptr inbounds float, float* %tmp15804, i64 1
+ %tmp15806 = getelementptr inbounds float, float* %tmp15805, i64 1
+ %tmp15807 = getelementptr inbounds float, float* %tmp15806, i64 1
+ %tmp15808 = getelementptr inbounds float, float* %tmp15807, i64 1
+ %tmp15809 = getelementptr inbounds float, float* %tmp15808, i64 1
+ %tmp15810 = getelementptr inbounds float, float* %tmp15809, i64 1
+ %tmp15811 = getelementptr inbounds float, float* %tmp15810, i64 1
+ %tmp15812 = getelementptr inbounds float, float* %tmp15811, i64 1
+ %tmp15813 = getelementptr inbounds float, float* %tmp15812, i64 1
+ %tmp15814 = getelementptr inbounds float, float* %tmp15813, i64 1
+ %tmp15815 = getelementptr inbounds float, float* %tmp15814, i64 1
+ %tmp15816 = getelementptr inbounds float, float* %tmp15815, i64 1
+ %tmp15817 = getelementptr inbounds float, float* %tmp15816, i64 1
+ %tmp15818 = getelementptr inbounds float, float* %tmp15817, i64 1
+ %tmp15819 = getelementptr inbounds float, float* %tmp15818, i64 1
+ %tmp15820 = getelementptr inbounds float, float* %tmp15819, i64 1
+ %tmp15821 = getelementptr inbounds float, float* %tmp15820, i64 1
+ %tmp15822 = getelementptr inbounds float, float* %tmp15821, i64 1
+ %tmp15823 = getelementptr inbounds float, float* %tmp15822, i64 1
+ %tmp15824 = getelementptr inbounds float, float* %tmp15823, i64 1
+ %tmp15825 = getelementptr inbounds float, float* %tmp15824, i64 1
+ %tmp15826 = getelementptr inbounds float, float* %tmp15825, i64 1
+ %tmp15827 = getelementptr inbounds float, float* %tmp15826, i64 1
+ %tmp15828 = getelementptr inbounds float, float* %tmp15827, i64 1
+ %tmp15829 = getelementptr inbounds float, float* %tmp15828, i64 1
+ %tmp15830 = getelementptr inbounds float, float* %tmp15829, i64 1
+ %tmp15831 = getelementptr inbounds float, float* %tmp15830, i64 1
+ %tmp15832 = getelementptr inbounds float, float* %tmp15831, i64 1
+ %tmp15833 = getelementptr inbounds float, float* %tmp15832, i64 1
+ %tmp15834 = getelementptr inbounds float, float* %tmp15833, i64 1
+ %tmp15835 = getelementptr inbounds float, float* %tmp15834, i64 1
+ %tmp15836 = getelementptr inbounds float, float* %tmp15835, i64 1
+ %tmp15837 = getelementptr inbounds float, float* %tmp15836, i64 1
+ %tmp15838 = getelementptr inbounds float, float* %tmp15837, i64 1
+ %tmp15839 = getelementptr inbounds float, float* %tmp15838, i64 1
+ %tmp15840 = getelementptr inbounds float, float* %tmp15839, i64 1
+ %tmp15841 = getelementptr inbounds float, float* %tmp15840, i64 1
+ %tmp15842 = getelementptr inbounds float, float* %tmp15841, i64 1
+ %tmp15843 = getelementptr inbounds float, float* %tmp15842, i64 1
+ %tmp15844 = getelementptr inbounds float, float* %tmp15843, i64 1
+ %tmp15845 = getelementptr inbounds float, float* %tmp15844, i64 1
+ %tmp15846 = getelementptr inbounds float, float* %tmp15845, i64 1
+ %tmp15847 = getelementptr inbounds float, float* %tmp15846, i64 1
+ %tmp15848 = getelementptr inbounds float, float* %tmp15847, i64 1
+ %tmp15849 = getelementptr inbounds float, float* %tmp15848, i64 1
+ %tmp15850 = getelementptr inbounds float, float* %tmp15849, i64 1
+ %tmp15851 = getelementptr inbounds float, float* %tmp15850, i64 1
+ %tmp15852 = getelementptr inbounds float, float* %tmp15851, i64 1
+ %tmp15853 = getelementptr inbounds float, float* %tmp15852, i64 1
+ %tmp15854 = getelementptr inbounds float, float* %tmp15853, i64 1
+ %tmp15855 = getelementptr inbounds float, float* %tmp15854, i64 1
+ %tmp15856 = getelementptr inbounds float, float* %tmp15855, i64 1
+ %tmp15857 = getelementptr inbounds float, float* %tmp15856, i64 1
+ %tmp15858 = getelementptr inbounds float, float* %tmp15857, i64 1
+ %tmp15859 = getelementptr inbounds float, float* %tmp15858, i64 1
+ %tmp15860 = getelementptr inbounds float, float* %tmp15859, i64 1
+ %tmp15861 = getelementptr inbounds float, float* %tmp15860, i64 1
+ %tmp15862 = getelementptr inbounds float, float* %tmp15861, i64 1
+ %tmp15863 = getelementptr inbounds float, float* %tmp15862, i64 1
+ %tmp15864 = getelementptr inbounds float, float* %tmp15863, i64 1
+ %tmp15865 = getelementptr inbounds float, float* %tmp15864, i64 1
+ %tmp15866 = getelementptr inbounds float, float* %tmp15865, i64 1
+ %tmp15867 = getelementptr inbounds float, float* %tmp15866, i64 1
+ %tmp15868 = getelementptr inbounds float, float* %tmp15867, i64 1
+ %tmp15869 = getelementptr inbounds float, float* %tmp15868, i64 1
+ %tmp15870 = getelementptr inbounds float, float* %tmp15869, i64 1
+ %tmp15871 = getelementptr inbounds float, float* %tmp15870, i64 1
+ %tmp15872 = getelementptr inbounds float, float* %tmp15871, i64 1
+ %tmp15873 = getelementptr inbounds float, float* %tmp15872, i64 1
+ %tmp15874 = getelementptr inbounds float, float* %tmp15873, i64 1
+ %tmp15875 = getelementptr inbounds float, float* %tmp15874, i64 1
+ %tmp15876 = getelementptr inbounds float, float* %tmp15875, i64 1
+ %tmp15877 = getelementptr inbounds float, float* %tmp15876, i64 1
+ %tmp15878 = getelementptr inbounds float, float* %tmp15877, i64 1
+ %tmp15879 = getelementptr inbounds float, float* %tmp15878, i64 1
+ %tmp15880 = getelementptr inbounds float, float* %tmp15879, i64 1
+ %tmp15881 = getelementptr inbounds float, float* %tmp15880, i64 1
+ %tmp15882 = getelementptr inbounds float, float* %tmp15881, i64 1
+ %tmp15883 = getelementptr inbounds float, float* %tmp15882, i64 1
+ %tmp15884 = getelementptr inbounds float, float* %tmp15883, i64 1
+ %tmp15885 = getelementptr inbounds float, float* %tmp15884, i64 1
+ %tmp15886 = getelementptr inbounds float, float* %tmp15885, i64 1
+ %tmp15887 = getelementptr inbounds float, float* %tmp15886, i64 1
+ %tmp15888 = getelementptr inbounds float, float* %tmp15887, i64 1
+ %tmp15889 = getelementptr inbounds float, float* %tmp15888, i64 1
+ %tmp15890 = getelementptr inbounds float, float* %tmp15889, i64 1
+ %tmp15891 = getelementptr inbounds float, float* %tmp15890, i64 1
+ %tmp15892 = getelementptr inbounds float, float* %tmp15891, i64 1
+ %tmp15893 = getelementptr inbounds float, float* %tmp15892, i64 1
+ %tmp15894 = getelementptr inbounds float, float* %tmp15893, i64 1
+ %tmp15895 = getelementptr inbounds float, float* %tmp15894, i64 1
+ %tmp15896 = getelementptr inbounds float, float* %tmp15895, i64 1
+ %tmp15897 = getelementptr inbounds float, float* %tmp15896, i64 1
+ %tmp15898 = getelementptr inbounds float, float* %tmp15897, i64 1
+ %tmp15899 = getelementptr inbounds float, float* %tmp15898, i64 1
+ %tmp15900 = getelementptr inbounds float, float* %tmp15899, i64 1
+ %tmp15901 = getelementptr inbounds float, float* %tmp15900, i64 1
+ %tmp15902 = getelementptr inbounds float, float* %tmp15901, i64 1
+ %tmp15903 = getelementptr inbounds float, float* %tmp15902, i64 1
+ %tmp15904 = getelementptr inbounds float, float* %tmp15903, i64 1
+ %tmp15905 = getelementptr inbounds float, float* %tmp15904, i64 1
+ %tmp15906 = getelementptr inbounds float, float* %tmp15905, i64 1
+ %tmp15907 = getelementptr inbounds float, float* %tmp15906, i64 1
+ %tmp15908 = getelementptr inbounds float, float* %tmp15907, i64 1
+ %tmp15909 = getelementptr inbounds float, float* %tmp15908, i64 1
+ %tmp15910 = getelementptr inbounds float, float* %tmp15909, i64 1
+ %tmp15911 = getelementptr inbounds float, float* %tmp15910, i64 1
+ %tmp15912 = getelementptr inbounds float, float* %tmp15911, i64 1
+ %tmp15913 = getelementptr inbounds float, float* %tmp15912, i64 1
+ %tmp15914 = getelementptr inbounds float, float* %tmp15913, i64 1
+ %tmp15915 = getelementptr inbounds float, float* %tmp15914, i64 1
+ %tmp15916 = getelementptr inbounds float, float* %tmp15915, i64 1
+ %tmp15917 = getelementptr inbounds float, float* %tmp15916, i64 1
+ %tmp15918 = getelementptr inbounds float, float* %tmp15917, i64 1
+ %tmp15919 = getelementptr inbounds float, float* %tmp15918, i64 1
+ %tmp15920 = getelementptr inbounds float, float* %tmp15919, i64 1
+ %tmp15921 = getelementptr inbounds float, float* %tmp15920, i64 1
+ %tmp15922 = getelementptr inbounds float, float* %tmp15921, i64 1
+ %tmp15923 = getelementptr inbounds float, float* %tmp15922, i64 1
+ %tmp15924 = getelementptr inbounds float, float* %tmp15923, i64 1
+ %tmp15925 = getelementptr inbounds float, float* %tmp15924, i64 1
+ %tmp15926 = getelementptr inbounds float, float* %tmp15925, i64 1
+ %tmp15927 = getelementptr inbounds float, float* %tmp15926, i64 1
+ %tmp15928 = getelementptr inbounds float, float* %tmp15927, i64 1
+ %tmp15929 = getelementptr inbounds float, float* %tmp15928, i64 1
+ %tmp15930 = getelementptr inbounds float, float* %tmp15929, i64 1
+ %tmp15931 = getelementptr inbounds float, float* %tmp15930, i64 1
+ %tmp15932 = getelementptr inbounds float, float* %tmp15931, i64 1
+ %tmp15933 = getelementptr inbounds float, float* %tmp15932, i64 1
+ %tmp15934 = getelementptr inbounds float, float* %tmp15933, i64 1
+ %tmp15935 = getelementptr inbounds float, float* %tmp15934, i64 1
+ %tmp15936 = getelementptr inbounds float, float* %tmp15935, i64 1
+ %tmp15937 = getelementptr inbounds float, float* %tmp15936, i64 1
+ %tmp15938 = getelementptr inbounds float, float* %tmp15937, i64 1
+ %tmp15939 = getelementptr inbounds float, float* %tmp15938, i64 1
+ %tmp15940 = getelementptr inbounds float, float* %tmp15939, i64 1
+ %tmp15941 = getelementptr inbounds float, float* %tmp15940, i64 1
+ %tmp15942 = getelementptr inbounds float, float* %tmp15941, i64 1
+ %tmp15943 = getelementptr inbounds float, float* %tmp15942, i64 1
+ %tmp15944 = getelementptr inbounds float, float* %tmp15943, i64 1
+ %tmp15945 = getelementptr inbounds float, float* %tmp15944, i64 1
+ %tmp15946 = getelementptr inbounds float, float* %tmp15945, i64 1
+ %tmp15947 = getelementptr inbounds float, float* %tmp15946, i64 1
+ %tmp15948 = getelementptr inbounds float, float* %tmp15947, i64 1
+ %tmp15949 = getelementptr inbounds float, float* %tmp15948, i64 1
+ %tmp15950 = getelementptr inbounds float, float* %tmp15949, i64 1
+ %tmp15951 = getelementptr inbounds float, float* %tmp15950, i64 1
+ %tmp15952 = getelementptr inbounds float, float* %tmp15951, i64 1
+ %tmp15953 = getelementptr inbounds float, float* %tmp15952, i64 1
+ %tmp15954 = getelementptr inbounds float, float* %tmp15953, i64 1
+ %tmp15955 = getelementptr inbounds float, float* %tmp15954, i64 1
+ %tmp15956 = getelementptr inbounds float, float* %tmp15955, i64 1
+ %tmp15957 = getelementptr inbounds float, float* %tmp15956, i64 1
+ %tmp15958 = getelementptr inbounds float, float* %tmp15957, i64 1
+ %tmp15959 = getelementptr inbounds float, float* %tmp15958, i64 1
+ %tmp15960 = getelementptr inbounds float, float* %tmp15959, i64 1
+ %tmp15961 = getelementptr inbounds float, float* %tmp15960, i64 1
+ %tmp15962 = getelementptr inbounds float, float* %tmp15961, i64 1
+ %tmp15963 = getelementptr inbounds float, float* %tmp15962, i64 1
+ %tmp15964 = getelementptr inbounds float, float* %tmp15963, i64 1
+ %tmp15965 = getelementptr inbounds float, float* %tmp15964, i64 1
+ %tmp15966 = getelementptr inbounds float, float* %tmp15965, i64 1
+ %tmp15967 = getelementptr inbounds float, float* %tmp15966, i64 1
+ %tmp15968 = getelementptr inbounds float, float* %tmp15967, i64 1
+ %tmp15969 = getelementptr inbounds float, float* %tmp15968, i64 1
+ %tmp15970 = getelementptr inbounds float, float* %tmp15969, i64 1
+ %tmp15971 = getelementptr inbounds float, float* %tmp15970, i64 1
+ %tmp15972 = getelementptr inbounds float, float* %tmp15971, i64 1
+ %tmp15973 = getelementptr inbounds float, float* %tmp15972, i64 1
+ %tmp15974 = getelementptr inbounds float, float* %tmp15973, i64 1
+ %tmp15975 = getelementptr inbounds float, float* %tmp15974, i64 1
+ %tmp15976 = getelementptr inbounds float, float* %tmp15975, i64 1
+ %tmp15977 = getelementptr inbounds float, float* %tmp15976, i64 1
+ %tmp15978 = getelementptr inbounds float, float* %tmp15977, i64 1
+ %tmp15979 = getelementptr inbounds float, float* %tmp15978, i64 1
+ %tmp15980 = getelementptr inbounds float, float* %tmp15979, i64 1
+ %tmp15981 = getelementptr inbounds float, float* %tmp15980, i64 1
+ %tmp15982 = getelementptr inbounds float, float* %tmp15981, i64 1
+ %tmp15983 = getelementptr inbounds float, float* %tmp15982, i64 1
+ %tmp15984 = getelementptr inbounds float, float* %tmp15983, i64 1
+ %tmp15985 = getelementptr inbounds float, float* %tmp15984, i64 1
+ %tmp15986 = getelementptr inbounds float, float* %tmp15985, i64 1
+ %tmp15987 = getelementptr inbounds float, float* %tmp15986, i64 1
+ %tmp15988 = getelementptr inbounds float, float* %tmp15987, i64 1
+ %tmp15989 = getelementptr inbounds float, float* %tmp15988, i64 1
+ %tmp15990 = getelementptr inbounds float, float* %tmp15989, i64 1
+ %tmp15991 = getelementptr inbounds float, float* %tmp15990, i64 1
+ %tmp15992 = getelementptr inbounds float, float* %tmp15991, i64 1
+ %tmp15993 = getelementptr inbounds float, float* %tmp15992, i64 1
+ %tmp15994 = getelementptr inbounds float, float* %tmp15993, i64 1
+ %tmp15995 = getelementptr inbounds float, float* %tmp15994, i64 1
+ %tmp15996 = getelementptr inbounds float, float* %tmp15995, i64 1
+ %tmp15997 = getelementptr inbounds float, float* %tmp15996, i64 1
+ %tmp15998 = getelementptr inbounds float, float* %tmp15997, i64 1
+ %tmp15999 = getelementptr inbounds float, float* %tmp15998, i64 1
+ %tmp16000 = getelementptr inbounds float, float* %tmp15999, i64 1
+ %tmp16001 = getelementptr inbounds float, float* %tmp16000, i64 1
+ %tmp16002 = getelementptr inbounds float, float* %tmp16001, i64 1
+ %tmp16003 = getelementptr inbounds float, float* %tmp16002, i64 1
+ %tmp16004 = getelementptr inbounds float, float* %tmp16003, i64 1
+ %tmp16005 = getelementptr inbounds float, float* %tmp16004, i64 1
+ %tmp16006 = getelementptr inbounds float, float* %tmp16005, i64 1
+ %tmp16007 = getelementptr inbounds float, float* %tmp16006, i64 1
+ %tmp16008 = getelementptr inbounds float, float* %tmp16007, i64 1
+ %tmp16009 = getelementptr inbounds float, float* %tmp16008, i64 1
+ %tmp16010 = getelementptr inbounds float, float* %tmp16009, i64 1
+ %tmp16011 = getelementptr inbounds float, float* %tmp16010, i64 1
+ %tmp16012 = getelementptr inbounds float, float* %tmp16011, i64 1
+ %tmp16013 = getelementptr inbounds float, float* %tmp16012, i64 1
+ %tmp16014 = getelementptr inbounds float, float* %tmp16013, i64 1
+ %tmp16015 = getelementptr inbounds float, float* %tmp16014, i64 1
+ %tmp16016 = getelementptr inbounds float, float* %tmp16015, i64 1
+ %tmp16017 = getelementptr inbounds float, float* %tmp16016, i64 1
+ %tmp16018 = getelementptr inbounds float, float* %tmp16017, i64 1
+ %tmp16019 = getelementptr inbounds float, float* %tmp16018, i64 1
+ %tmp16020 = getelementptr inbounds float, float* %tmp16019, i64 1
+ %tmp16021 = getelementptr inbounds float, float* %tmp16020, i64 1
+ %tmp16022 = getelementptr inbounds float, float* %tmp16021, i64 1
+ %tmp16023 = getelementptr inbounds float, float* %tmp16022, i64 1
+ %tmp16024 = getelementptr inbounds float, float* %tmp16023, i64 1
+ %tmp16025 = getelementptr inbounds float, float* %tmp16024, i64 1
+ %tmp16026 = getelementptr inbounds float, float* %tmp16025, i64 1
+ %tmp16027 = getelementptr inbounds float, float* %tmp16026, i64 1
+ %tmp16028 = getelementptr inbounds float, float* %tmp16027, i64 1
+ %tmp16029 = getelementptr inbounds float, float* %tmp16028, i64 1
+ %tmp16030 = getelementptr inbounds float, float* %tmp16029, i64 1
+ %tmp16031 = getelementptr inbounds float, float* %tmp16030, i64 1
+ %tmp16032 = getelementptr inbounds float, float* %tmp16031, i64 1
+ %tmp16033 = getelementptr inbounds float, float* %tmp16032, i64 1
+ %tmp16034 = getelementptr inbounds float, float* %tmp16033, i64 1
+ %tmp16035 = getelementptr inbounds float, float* %tmp16034, i64 1
+ %tmp16036 = getelementptr inbounds float, float* %tmp16035, i64 1
+ %tmp16037 = getelementptr inbounds float, float* %tmp16036, i64 1
+ %tmp16038 = getelementptr inbounds float, float* %tmp16037, i64 1
+ %tmp16039 = getelementptr inbounds float, float* %tmp16038, i64 1
+ %tmp16040 = getelementptr inbounds float, float* %tmp16039, i64 1
+ %tmp16041 = getelementptr inbounds float, float* %tmp16040, i64 1
+ %tmp16042 = getelementptr inbounds float, float* %tmp16041, i64 1
+ %tmp16043 = getelementptr inbounds float, float* %tmp16042, i64 1
+ %tmp16044 = getelementptr inbounds float, float* %tmp16043, i64 1
+ %tmp16045 = getelementptr inbounds float, float* %tmp16044, i64 1
+ %tmp16046 = getelementptr inbounds float, float* %tmp16045, i64 1
+ %tmp16047 = getelementptr inbounds float, float* %tmp16046, i64 1
+ %tmp16048 = getelementptr inbounds float, float* %tmp16047, i64 1
+ %tmp16049 = getelementptr inbounds float, float* %tmp16048, i64 1
+ %tmp16050 = getelementptr inbounds float, float* %tmp16049, i64 1
+ %tmp16051 = getelementptr inbounds float, float* %tmp16050, i64 1
+ %tmp16052 = getelementptr inbounds float, float* %tmp16051, i64 1
+ %tmp16053 = getelementptr inbounds float, float* %tmp16052, i64 1
+ %tmp16054 = getelementptr inbounds float, float* %tmp16053, i64 1
+ %tmp16055 = getelementptr inbounds float, float* %tmp16054, i64 1
+ %tmp16056 = getelementptr inbounds float, float* %tmp16055, i64 1
+ %tmp16057 = getelementptr inbounds float, float* %tmp16056, i64 1
+ %tmp16058 = getelementptr inbounds float, float* %tmp16057, i64 1
+ %tmp16059 = getelementptr inbounds float, float* %tmp16058, i64 1
+ %tmp16060 = getelementptr inbounds float, float* %tmp16059, i64 1
+ %tmp16061 = getelementptr inbounds float, float* %tmp16060, i64 1
+ %tmp16062 = getelementptr inbounds float, float* %tmp16061, i64 1
+ %tmp16063 = getelementptr inbounds float, float* %tmp16062, i64 1
+ %tmp16064 = getelementptr inbounds float, float* %tmp16063, i64 1
+ %tmp16065 = getelementptr inbounds float, float* %tmp16064, i64 1
+ %tmp16066 = getelementptr inbounds float, float* %tmp16065, i64 1
+ %tmp16067 = getelementptr inbounds float, float* %tmp16066, i64 1
+ %tmp16068 = getelementptr inbounds float, float* %tmp16067, i64 1
+ %tmp16069 = getelementptr inbounds float, float* %tmp16068, i64 1
+ %tmp16070 = getelementptr inbounds float, float* %tmp16069, i64 1
+ %tmp16071 = getelementptr inbounds float, float* %tmp16070, i64 1
+ %tmp16072 = getelementptr inbounds float, float* %tmp16071, i64 1
+ %tmp16073 = getelementptr inbounds float, float* %tmp16072, i64 1
+ %tmp16074 = getelementptr inbounds float, float* %tmp16073, i64 1
+ %tmp16075 = getelementptr inbounds float, float* %tmp16074, i64 1
+ %tmp16076 = getelementptr inbounds float, float* %tmp16075, i64 1
+ %tmp16077 = getelementptr inbounds float, float* %tmp16076, i64 1
+ %tmp16078 = getelementptr inbounds float, float* %tmp16077, i64 1
+ %tmp16079 = getelementptr inbounds float, float* %tmp16078, i64 1
+ %tmp16080 = getelementptr inbounds float, float* %tmp16079, i64 1
+ %tmp16081 = getelementptr inbounds float, float* %tmp16080, i64 1
+ %tmp16082 = getelementptr inbounds float, float* %tmp16081, i64 1
+ %tmp16083 = getelementptr inbounds float, float* %tmp16082, i64 1
+ %tmp16084 = getelementptr inbounds float, float* %tmp16083, i64 1
+ %tmp16085 = getelementptr inbounds float, float* %tmp16084, i64 1
+ %tmp16086 = getelementptr inbounds float, float* %tmp16085, i64 1
+ %tmp16087 = getelementptr inbounds float, float* %tmp16086, i64 1
+ %tmp16088 = getelementptr inbounds float, float* %tmp16087, i64 1
+ %tmp16089 = getelementptr inbounds float, float* %tmp16088, i64 1
+ %tmp16090 = getelementptr inbounds float, float* %tmp16089, i64 1
+ %tmp16091 = getelementptr inbounds float, float* %tmp16090, i64 1
+ %tmp16092 = getelementptr inbounds float, float* %tmp16091, i64 1
+ %tmp16093 = getelementptr inbounds float, float* %tmp16092, i64 1
+ %tmp16094 = getelementptr inbounds float, float* %tmp16093, i64 1
+ %tmp16095 = getelementptr inbounds float, float* %tmp16094, i64 1
+ %tmp16096 = getelementptr inbounds float, float* %tmp16095, i64 1
+ %tmp16097 = getelementptr inbounds float, float* %tmp16096, i64 1
+ %tmp16098 = getelementptr inbounds float, float* %tmp16097, i64 1
+ %tmp16099 = getelementptr inbounds float, float* %tmp16098, i64 1
+ %tmp16100 = getelementptr inbounds float, float* %tmp16099, i64 1
+ %tmp16101 = getelementptr inbounds float, float* %tmp16100, i64 1
+ %tmp16102 = getelementptr inbounds float, float* %tmp16101, i64 1
+ %tmp16103 = getelementptr inbounds float, float* %tmp16102, i64 1
+ %tmp16104 = getelementptr inbounds float, float* %tmp16103, i64 1
+ %tmp16105 = getelementptr inbounds float, float* %tmp16104, i64 1
+ %tmp16106 = getelementptr inbounds float, float* %tmp16105, i64 1
+ %tmp16107 = getelementptr inbounds float, float* %tmp16106, i64 1
+ %tmp16108 = getelementptr inbounds float, float* %tmp16107, i64 1
+ %tmp16109 = getelementptr inbounds float, float* %tmp16108, i64 1
+ %tmp16110 = getelementptr inbounds float, float* %tmp16109, i64 1
+ %tmp16111 = getelementptr inbounds float, float* %tmp16110, i64 1
+ %tmp16112 = getelementptr inbounds float, float* %tmp16111, i64 1
+ %tmp16113 = getelementptr inbounds float, float* %tmp16112, i64 1
+ %tmp16114 = getelementptr inbounds float, float* %tmp16113, i64 1
+ %tmp16115 = getelementptr inbounds float, float* %tmp16114, i64 1
+ %tmp16116 = getelementptr inbounds float, float* %tmp16115, i64 1
+ %tmp16117 = getelementptr inbounds float, float* %tmp16116, i64 1
+ %tmp16118 = getelementptr inbounds float, float* %tmp16117, i64 1
+ %tmp16119 = getelementptr inbounds float, float* %tmp16118, i64 1
+ %tmp16120 = getelementptr inbounds float, float* %tmp16119, i64 1
+ %tmp16121 = getelementptr inbounds float, float* %tmp16120, i64 1
+ %tmp16122 = getelementptr inbounds float, float* %tmp16121, i64 1
+ %tmp16123 = getelementptr inbounds float, float* %tmp16122, i64 1
+ %tmp16124 = getelementptr inbounds float, float* %tmp16123, i64 1
+ %tmp16125 = getelementptr inbounds float, float* %tmp16124, i64 1
+ %tmp16126 = getelementptr inbounds float, float* %tmp16125, i64 1
+ %tmp16127 = getelementptr inbounds float, float* %tmp16126, i64 1
+ %tmp16128 = getelementptr inbounds float, float* %tmp16127, i64 1
+ %tmp16129 = getelementptr inbounds float, float* %tmp16128, i64 1
+ %tmp16130 = getelementptr inbounds float, float* %tmp16129, i64 1
+ %tmp16131 = getelementptr inbounds float, float* %tmp16130, i64 1
+ %tmp16132 = getelementptr inbounds float, float* %tmp16131, i64 1
+ %tmp16133 = getelementptr inbounds float, float* %tmp16132, i64 1
+ %tmp16134 = getelementptr inbounds float, float* %tmp16133, i64 1
+ %tmp16135 = getelementptr inbounds float, float* %tmp16134, i64 1
+ %tmp16136 = getelementptr inbounds float, float* %tmp16135, i64 1
+ %tmp16137 = getelementptr inbounds float, float* %tmp16136, i64 1
+ %tmp16138 = getelementptr inbounds float, float* %tmp16137, i64 1
+ %tmp16139 = getelementptr inbounds float, float* %tmp16138, i64 1
+ %tmp16140 = getelementptr inbounds float, float* %tmp16139, i64 1
+ %tmp16141 = getelementptr inbounds float, float* %tmp16140, i64 1
+ %tmp16142 = getelementptr inbounds float, float* %tmp16141, i64 1
+ %tmp16143 = getelementptr inbounds float, float* %tmp16142, i64 1
+ %tmp16144 = getelementptr inbounds float, float* %tmp16143, i64 1
+ %tmp16145 = getelementptr inbounds float, float* %tmp16144, i64 1
+ %tmp16146 = getelementptr inbounds float, float* %tmp16145, i64 1
+ %tmp16147 = getelementptr inbounds float, float* %tmp16146, i64 1
+ %tmp16148 = getelementptr inbounds float, float* %tmp16147, i64 1
+ %tmp16149 = getelementptr inbounds float, float* %tmp16148, i64 1
+ %tmp16150 = getelementptr inbounds float, float* %tmp16149, i64 1
+ %tmp16151 = getelementptr inbounds float, float* %tmp16150, i64 1
+ %tmp16152 = getelementptr inbounds float, float* %tmp16151, i64 1
+ %tmp16153 = getelementptr inbounds float, float* %tmp16152, i64 1
+ %tmp16154 = getelementptr inbounds float, float* %tmp16153, i64 1
+ %tmp16155 = getelementptr inbounds float, float* %tmp16154, i64 1
+ %tmp16156 = getelementptr inbounds float, float* %tmp16155, i64 1
+ %tmp16157 = getelementptr inbounds float, float* %tmp16156, i64 1
+ %tmp16158 = getelementptr inbounds float, float* %tmp16157, i64 1
+ %tmp16159 = getelementptr inbounds float, float* %tmp16158, i64 1
+ %tmp16160 = getelementptr inbounds float, float* %tmp16159, i64 1
+ %tmp16161 = getelementptr inbounds float, float* %tmp16160, i64 1
+ %tmp16162 = getelementptr inbounds float, float* %tmp16161, i64 1
+ %tmp16163 = getelementptr inbounds float, float* %tmp16162, i64 1
+ %tmp16164 = getelementptr inbounds float, float* %tmp16163, i64 1
+ %tmp16165 = getelementptr inbounds float, float* %tmp16164, i64 1
+ %tmp16166 = getelementptr inbounds float, float* %tmp16165, i64 1
+ %tmp16167 = getelementptr inbounds float, float* %tmp16166, i64 1
+ %tmp16168 = getelementptr inbounds float, float* %tmp16167, i64 1
+ %tmp16169 = getelementptr inbounds float, float* %tmp16168, i64 1
+ %tmp16170 = getelementptr inbounds float, float* %tmp16169, i64 1
+ %tmp16171 = getelementptr inbounds float, float* %tmp16170, i64 1
+ %tmp16172 = getelementptr inbounds float, float* %tmp16171, i64 1
+ %tmp16173 = getelementptr inbounds float, float* %tmp16172, i64 1
+ %tmp16174 = getelementptr inbounds float, float* %tmp16173, i64 1
+ %tmp16175 = getelementptr inbounds float, float* %tmp16174, i64 1
+ %tmp16176 = getelementptr inbounds float, float* %tmp16175, i64 1
+ %tmp16177 = getelementptr inbounds float, float* %tmp16176, i64 1
+ %tmp16178 = getelementptr inbounds float, float* %tmp16177, i64 1
+ %tmp16179 = getelementptr inbounds float, float* %tmp16178, i64 1
+ %tmp16180 = getelementptr inbounds float, float* %tmp16179, i64 1
+ %tmp16181 = getelementptr inbounds float, float* %tmp16180, i64 1
+ %tmp16182 = getelementptr inbounds float, float* %tmp16181, i64 1
+ %tmp16183 = getelementptr inbounds float, float* %tmp16182, i64 1
+ %tmp16184 = getelementptr inbounds float, float* %tmp16183, i64 1
+ %tmp16185 = getelementptr inbounds float, float* %tmp16184, i64 1
+ %tmp16186 = getelementptr inbounds float, float* %tmp16185, i64 1
+ %tmp16187 = getelementptr inbounds float, float* %tmp16186, i64 1
+ %tmp16188 = getelementptr inbounds float, float* %tmp16187, i64 1
+ %tmp16189 = getelementptr inbounds float, float* %tmp16188, i64 1
+ %tmp16190 = getelementptr inbounds float, float* %tmp16189, i64 1
+ %tmp16191 = getelementptr inbounds float, float* %tmp16190, i64 1
+ %tmp16192 = getelementptr inbounds float, float* %tmp16191, i64 1
+ %tmp16193 = getelementptr inbounds float, float* %tmp16192, i64 1
+ %tmp16194 = getelementptr inbounds float, float* %tmp16193, i64 1
+ %tmp16195 = getelementptr inbounds float, float* %tmp16194, i64 1
+ %tmp16196 = getelementptr inbounds float, float* %tmp16195, i64 1
+ %tmp16197 = getelementptr inbounds float, float* %tmp16196, i64 1
+ %tmp16198 = getelementptr inbounds float, float* %tmp16197, i64 1
+ %tmp16199 = getelementptr inbounds float, float* %tmp16198, i64 1
+ %tmp16200 = getelementptr inbounds float, float* %tmp16199, i64 1
+ %tmp16201 = getelementptr inbounds float, float* %tmp16200, i64 1
+ %tmp16202 = getelementptr inbounds float, float* %tmp16201, i64 1
+ %tmp16203 = getelementptr inbounds float, float* %tmp16202, i64 1
+ %tmp16204 = getelementptr inbounds float, float* %tmp16203, i64 1
+ %tmp16205 = getelementptr inbounds float, float* %tmp16204, i64 1
+ %tmp16206 = getelementptr inbounds float, float* %tmp16205, i64 1
+ %tmp16207 = getelementptr inbounds float, float* %tmp16206, i64 1
+ %tmp16208 = getelementptr inbounds float, float* %tmp16207, i64 1
+ %tmp16209 = getelementptr inbounds float, float* %tmp16208, i64 1
+ %tmp16210 = getelementptr inbounds float, float* %tmp16209, i64 1
+ %tmp16211 = getelementptr inbounds float, float* %tmp16210, i64 1
+ %tmp16212 = getelementptr inbounds float, float* %tmp16211, i64 1
+ %tmp16213 = getelementptr inbounds float, float* %tmp16212, i64 1
+ %tmp16214 = getelementptr inbounds float, float* %tmp16213, i64 1
+ %tmp16215 = getelementptr inbounds float, float* %tmp16214, i64 1
+ %tmp16216 = getelementptr inbounds float, float* %tmp16215, i64 1
+ %tmp16217 = getelementptr inbounds float, float* %tmp16216, i64 1
+ %tmp16218 = getelementptr inbounds float, float* %tmp16217, i64 1
+ %tmp16219 = getelementptr inbounds float, float* %tmp16218, i64 1
+ %tmp16220 = getelementptr inbounds float, float* %tmp16219, i64 1
+ %tmp16221 = getelementptr inbounds float, float* %tmp16220, i64 1
+ %tmp16222 = getelementptr inbounds float, float* %tmp16221, i64 1
+ %tmp16223 = getelementptr inbounds float, float* %tmp16222, i64 1
+ %tmp16224 = getelementptr inbounds float, float* %tmp16223, i64 1
+ %tmp16225 = getelementptr inbounds float, float* %tmp16224, i64 1
+ %tmp16226 = getelementptr inbounds float, float* %tmp16225, i64 1
+ %tmp16227 = getelementptr inbounds float, float* %tmp16226, i64 1
+ %tmp16228 = getelementptr inbounds float, float* %tmp16227, i64 1
+ %tmp16229 = getelementptr inbounds float, float* %tmp16228, i64 1
+ %tmp16230 = getelementptr inbounds float, float* %tmp16229, i64 1
+ %tmp16231 = getelementptr inbounds float, float* %tmp16230, i64 1
+ %tmp16232 = getelementptr inbounds float, float* %tmp16231, i64 1
+ %tmp16233 = getelementptr inbounds float, float* %tmp16232, i64 1
+ %tmp16234 = getelementptr inbounds float, float* %tmp16233, i64 1
+ %tmp16235 = getelementptr inbounds float, float* %tmp16234, i64 1
+ %tmp16236 = getelementptr inbounds float, float* %tmp16235, i64 1
+ %tmp16237 = getelementptr inbounds float, float* %tmp16236, i64 1
+ %tmp16238 = getelementptr inbounds float, float* %tmp16237, i64 1
+ %tmp16239 = getelementptr inbounds float, float* %tmp16238, i64 1
+ %tmp16240 = getelementptr inbounds float, float* %tmp16239, i64 1
+ %tmp16241 = getelementptr inbounds float, float* %tmp16240, i64 1
+ %tmp16242 = getelementptr inbounds float, float* %tmp16241, i64 1
+ %tmp16243 = getelementptr inbounds float, float* %tmp16242, i64 1
+ %tmp16244 = getelementptr inbounds float, float* %tmp16243, i64 1
+ %tmp16245 = getelementptr inbounds float, float* %tmp16244, i64 1
+ %tmp16246 = getelementptr inbounds float, float* %tmp16245, i64 1
+ %tmp16247 = getelementptr inbounds float, float* %tmp16246, i64 1
+ %tmp16248 = getelementptr inbounds float, float* %tmp16247, i64 1
+ %tmp16249 = getelementptr inbounds float, float* %tmp16248, i64 1
+ %tmp16250 = getelementptr inbounds float, float* %tmp16249, i64 1
+ %tmp16251 = getelementptr inbounds float, float* %tmp16250, i64 1
+ %tmp16252 = getelementptr inbounds float, float* %tmp16251, i64 1
+ %tmp16253 = getelementptr inbounds float, float* %tmp16252, i64 1
+ %tmp16254 = getelementptr inbounds float, float* %tmp16253, i64 1
+ %tmp16255 = getelementptr inbounds float, float* %tmp16254, i64 1
+ %tmp16256 = getelementptr inbounds float, float* %tmp16255, i64 1
+ %tmp16257 = getelementptr inbounds float, float* %tmp16256, i64 1
+ %tmp16258 = getelementptr inbounds float, float* %tmp16257, i64 1
+ %tmp16259 = getelementptr inbounds float, float* %tmp16258, i64 1
+ %tmp16260 = getelementptr inbounds float, float* %tmp16259, i64 1
+ %tmp16261 = getelementptr inbounds float, float* %tmp16260, i64 1
+ %tmp16262 = getelementptr inbounds float, float* %tmp16261, i64 1
+ %tmp16263 = getelementptr inbounds float, float* %tmp16262, i64 1
+ %tmp16264 = getelementptr inbounds float, float* %tmp16263, i64 1
+ %tmp16265 = getelementptr inbounds float, float* %tmp16264, i64 1
+ %tmp16266 = getelementptr inbounds float, float* %tmp16265, i64 1
+ %tmp16267 = getelementptr inbounds float, float* %tmp16266, i64 1
+ %tmp16268 = getelementptr inbounds float, float* %tmp16267, i64 1
+ %tmp16269 = getelementptr inbounds float, float* %tmp16268, i64 1
+ %tmp16270 = getelementptr inbounds float, float* %tmp16269, i64 1
+ %tmp16271 = getelementptr inbounds float, float* %tmp16270, i64 1
+ %tmp16272 = getelementptr inbounds float, float* %tmp16271, i64 1
+ %tmp16273 = getelementptr inbounds float, float* %tmp16272, i64 1
+ %tmp16274 = getelementptr inbounds float, float* %tmp16273, i64 1
+ %tmp16275 = getelementptr inbounds float, float* %tmp16274, i64 1
+ %tmp16276 = getelementptr inbounds float, float* %tmp16275, i64 1
+ %tmp16277 = getelementptr inbounds float, float* %tmp16276, i64 1
+ %tmp16278 = getelementptr inbounds float, float* %tmp16277, i64 1
+ %tmp16279 = getelementptr inbounds float, float* %tmp16278, i64 1
+ %tmp16280 = getelementptr inbounds float, float* %tmp16279, i64 1
+ %tmp16281 = getelementptr inbounds float, float* %tmp16280, i64 1
+ %tmp16282 = getelementptr inbounds float, float* %tmp16281, i64 1
+ %tmp16283 = getelementptr inbounds float, float* %tmp16282, i64 1
+ %tmp16284 = getelementptr inbounds float, float* %tmp16283, i64 1
+ %tmp16285 = getelementptr inbounds float, float* %tmp16284, i64 1
+ %tmp16286 = getelementptr inbounds float, float* %tmp16285, i64 1
+ %tmp16287 = getelementptr inbounds float, float* %tmp16286, i64 1
+ %tmp16288 = getelementptr inbounds float, float* %tmp16287, i64 1
+ %tmp16289 = getelementptr inbounds float, float* %tmp16288, i64 1
+ %tmp16290 = getelementptr inbounds float, float* %tmp16289, i64 1
+ %tmp16291 = getelementptr inbounds float, float* %tmp16290, i64 1
+ %tmp16292 = getelementptr inbounds float, float* %tmp16291, i64 1
+ %tmp16293 = getelementptr inbounds float, float* %tmp16292, i64 1
+ %tmp16294 = getelementptr inbounds float, float* %tmp16293, i64 1
+ %tmp16295 = getelementptr inbounds float, float* %tmp16294, i64 1
+ %tmp16296 = getelementptr inbounds float, float* %tmp16295, i64 1
+ %tmp16297 = getelementptr inbounds float, float* %tmp16296, i64 1
+ %tmp16298 = getelementptr inbounds float, float* %tmp16297, i64 1
+ %tmp16299 = getelementptr inbounds float, float* %tmp16298, i64 1
+ %tmp16300 = getelementptr inbounds float, float* %tmp16299, i64 1
+ %tmp16301 = getelementptr inbounds float, float* %tmp16300, i64 1
+ %tmp16302 = getelementptr inbounds float, float* %tmp16301, i64 1
+ %tmp16303 = getelementptr inbounds float, float* %tmp16302, i64 1
+ %tmp16304 = getelementptr inbounds float, float* %tmp16303, i64 1
+ %tmp16305 = getelementptr inbounds float, float* %tmp16304, i64 1
+ %tmp16306 = getelementptr inbounds float, float* %tmp16305, i64 1
+ %tmp16307 = getelementptr inbounds float, float* %tmp16306, i64 1
+ %tmp16308 = getelementptr inbounds float, float* %tmp16307, i64 1
+ %tmp16309 = getelementptr inbounds float, float* %tmp16308, i64 1
+ %tmp16310 = getelementptr inbounds float, float* %tmp16309, i64 1
+ %tmp16311 = getelementptr inbounds float, float* %tmp16310, i64 1
+ %tmp16312 = getelementptr inbounds float, float* %tmp16311, i64 1
+ %tmp16313 = getelementptr inbounds float, float* %tmp16312, i64 1
+ %tmp16314 = getelementptr inbounds float, float* %tmp16313, i64 1
+ %tmp16315 = getelementptr inbounds float, float* %tmp16314, i64 1
+ %tmp16316 = getelementptr inbounds float, float* %tmp16315, i64 1
+ %tmp16317 = getelementptr inbounds float, float* %tmp16316, i64 1
+ %tmp16318 = getelementptr inbounds float, float* %tmp16317, i64 1
+ %tmp16319 = getelementptr inbounds float, float* %tmp16318, i64 1
+ %tmp16320 = getelementptr inbounds float, float* %tmp16319, i64 1
+ %tmp16321 = getelementptr inbounds float, float* %tmp16320, i64 1
+ %tmp16322 = getelementptr inbounds float, float* %tmp16321, i64 1
+ %tmp16323 = getelementptr inbounds float, float* %tmp16322, i64 1
+ %tmp16324 = getelementptr inbounds float, float* %tmp16323, i64 1
+ %tmp16325 = getelementptr inbounds float, float* %tmp16324, i64 1
+ %tmp16326 = getelementptr inbounds float, float* %tmp16325, i64 1
+ %tmp16327 = getelementptr inbounds float, float* %tmp16326, i64 1
+ %tmp16328 = getelementptr inbounds float, float* %tmp16327, i64 1
+ %tmp16329 = getelementptr inbounds float, float* %tmp16328, i64 1
+ %tmp16330 = getelementptr inbounds float, float* %tmp16329, i64 1
+ %tmp16331 = getelementptr inbounds float, float* %tmp16330, i64 1
+ %tmp16332 = getelementptr inbounds float, float* %tmp16331, i64 1
+ %tmp16333 = getelementptr inbounds float, float* %tmp16332, i64 1
+ %tmp16334 = getelementptr inbounds float, float* %tmp16333, i64 1
+ %tmp16335 = getelementptr inbounds float, float* %tmp16334, i64 1
+ %tmp16336 = getelementptr inbounds float, float* %tmp16335, i64 1
+ %tmp16337 = getelementptr inbounds float, float* %tmp16336, i64 1
+ %tmp16338 = getelementptr inbounds float, float* %tmp16337, i64 1
+ %tmp16339 = getelementptr inbounds float, float* %tmp16338, i64 1
+ %tmp16340 = getelementptr inbounds float, float* %tmp16339, i64 1
+ %tmp16341 = getelementptr inbounds float, float* %tmp16340, i64 1
+ %tmp16342 = getelementptr inbounds float, float* %tmp16341, i64 1
+ %tmp16343 = getelementptr inbounds float, float* %tmp16342, i64 1
+ %tmp16344 = getelementptr inbounds float, float* %tmp16343, i64 1
+ %tmp16345 = getelementptr inbounds float, float* %tmp16344, i64 1
+ %tmp16346 = getelementptr inbounds float, float* %tmp16345, i64 1
+ %tmp16347 = getelementptr inbounds float, float* %tmp16346, i64 1
+ %tmp16348 = getelementptr inbounds float, float* %tmp16347, i64 1
+ %tmp16349 = getelementptr inbounds float, float* %tmp16348, i64 1
+ %tmp16350 = getelementptr inbounds float, float* %tmp16349, i64 1
+ %tmp16351 = getelementptr inbounds float, float* %tmp16350, i64 1
+ %tmp16352 = getelementptr inbounds float, float* %tmp16351, i64 1
+ %tmp16353 = getelementptr inbounds float, float* %tmp16352, i64 1
+ %tmp16354 = getelementptr inbounds float, float* %tmp16353, i64 1
+ %tmp16355 = getelementptr inbounds float, float* %tmp16354, i64 1
+ %tmp16356 = getelementptr inbounds float, float* %tmp16355, i64 1
+ %tmp16357 = getelementptr inbounds float, float* %tmp16356, i64 1
+ %tmp16358 = getelementptr inbounds float, float* %tmp16357, i64 1
+ %tmp16359 = getelementptr inbounds float, float* %tmp16358, i64 1
+ %tmp16360 = getelementptr inbounds float, float* %tmp16359, i64 1
+ %tmp16361 = getelementptr inbounds float, float* %tmp16360, i64 1
+ %tmp16362 = getelementptr inbounds float, float* %tmp16361, i64 1
+ %tmp16363 = getelementptr inbounds float, float* %tmp16362, i64 1
+ %tmp16364 = getelementptr inbounds float, float* %tmp16363, i64 1
+ %tmp16365 = getelementptr inbounds float, float* %tmp16364, i64 1
+ %tmp16366 = getelementptr inbounds float, float* %tmp16365, i64 1
+ %tmp16367 = getelementptr inbounds float, float* %tmp16366, i64 1
+ %tmp16368 = getelementptr inbounds float, float* %tmp16367, i64 1
+ %tmp16369 = getelementptr inbounds float, float* %tmp16368, i64 1
+ %tmp16370 = getelementptr inbounds float, float* %tmp16369, i64 1
+ %tmp16371 = getelementptr inbounds float, float* %tmp16370, i64 1
+ %tmp16372 = getelementptr inbounds float, float* %tmp16371, i64 1
+ %tmp16373 = getelementptr inbounds float, float* %tmp16372, i64 1
+ %tmp16374 = getelementptr inbounds float, float* %tmp16373, i64 1
+ %tmp16375 = getelementptr inbounds float, float* %tmp16374, i64 1
+ %tmp16376 = getelementptr inbounds float, float* %tmp16375, i64 1
+ %tmp16377 = getelementptr inbounds float, float* %tmp16376, i64 1
+ %tmp16378 = getelementptr inbounds float, float* %tmp16377, i64 1
+ %tmp16379 = getelementptr inbounds float, float* %tmp16378, i64 1
+ %tmp16380 = getelementptr inbounds float, float* %tmp16379, i64 1
+ %tmp16381 = getelementptr inbounds float, float* %tmp16380, i64 1
+ %tmp16382 = getelementptr inbounds float, float* %tmp16381, i64 1
+ %tmp16383 = getelementptr inbounds float, float* %tmp16382, i64 1
+ %tmp16384 = getelementptr inbounds float, float* %tmp16383, i64 1
+ %tmp16385 = getelementptr inbounds float, float* %tmp16384, i64 1
+ %tmp16386 = getelementptr inbounds float, float* %tmp16385, i64 1
+ %tmp16387 = getelementptr inbounds float, float* %tmp16386, i64 1
+ %tmp16388 = getelementptr inbounds float, float* %tmp16387, i64 1
+ %tmp16389 = getelementptr inbounds float, float* %tmp16388, i64 1
+ %tmp16390 = getelementptr inbounds float, float* %tmp16389, i64 1
+ %tmp16391 = getelementptr inbounds float, float* %tmp16390, i64 1
+ %tmp16392 = getelementptr inbounds float, float* %tmp16391, i64 1
+ %tmp16393 = getelementptr inbounds float, float* %tmp16392, i64 1
+ %tmp16394 = getelementptr inbounds float, float* %tmp16393, i64 1
+ %tmp16395 = getelementptr inbounds float, float* %tmp16394, i64 1
+ %tmp16396 = getelementptr inbounds float, float* %tmp16395, i64 1
+ %tmp16397 = getelementptr inbounds float, float* %tmp16396, i64 1
+ %tmp16398 = getelementptr inbounds float, float* %tmp16397, i64 1
+ %tmp16399 = getelementptr inbounds float, float* %tmp16398, i64 1
+ %tmp16400 = getelementptr inbounds float, float* %tmp16399, i64 1
+ %tmp16401 = getelementptr inbounds float, float* %tmp16400, i64 1
+ %tmp16402 = getelementptr inbounds float, float* %tmp16401, i64 1
+ %tmp16403 = getelementptr inbounds float, float* %tmp16402, i64 1
+ %tmp16404 = getelementptr inbounds float, float* %tmp16403, i64 1
+ %tmp16405 = getelementptr inbounds float, float* %tmp16404, i64 1
+ %tmp16406 = getelementptr inbounds float, float* %tmp16405, i64 1
+ %tmp16407 = getelementptr inbounds float, float* %tmp16406, i64 1
+ %tmp16408 = getelementptr inbounds float, float* %tmp16407, i64 1
+ %tmp16409 = getelementptr inbounds float, float* %tmp16408, i64 1
+ %tmp16410 = getelementptr inbounds float, float* %tmp16409, i64 1
+ %tmp16411 = getelementptr inbounds float, float* %tmp16410, i64 1
+ %tmp16412 = getelementptr inbounds float, float* %tmp16411, i64 1
+ %tmp16413 = getelementptr inbounds float, float* %tmp16412, i64 1
+ %tmp16414 = getelementptr inbounds float, float* %tmp16413, i64 1
+ %tmp16415 = getelementptr inbounds float, float* %tmp16414, i64 1
+ %tmp16416 = getelementptr inbounds float, float* %tmp16415, i64 1
+ %tmp16417 = getelementptr inbounds float, float* %tmp16416, i64 1
+ %tmp16418 = getelementptr inbounds float, float* %tmp16417, i64 1
+ %tmp16419 = getelementptr inbounds float, float* %tmp16418, i64 1
+ %tmp16420 = getelementptr inbounds float, float* %tmp16419, i64 1
+ %tmp16421 = getelementptr inbounds float, float* %tmp16420, i64 1
+ %tmp16422 = getelementptr inbounds float, float* %tmp16421, i64 1
+ %tmp16423 = getelementptr inbounds float, float* %tmp16422, i64 1
+ %tmp16424 = getelementptr inbounds float, float* %tmp16423, i64 1
+ %tmp16425 = getelementptr inbounds float, float* %tmp16424, i64 1
+ %tmp16426 = getelementptr inbounds float, float* %tmp16425, i64 1
+ %tmp16427 = getelementptr inbounds float, float* %tmp16426, i64 1
+ %tmp16428 = getelementptr inbounds float, float* %tmp16427, i64 1
+ %tmp16429 = getelementptr inbounds float, float* %tmp16428, i64 1
+ %tmp16430 = getelementptr inbounds float, float* %tmp16429, i64 1
+ %tmp16431 = getelementptr inbounds float, float* %tmp16430, i64 1
+ %tmp16432 = getelementptr inbounds float, float* %tmp16431, i64 1
+ %tmp16433 = getelementptr inbounds float, float* %tmp16432, i64 1
+ %tmp16434 = getelementptr inbounds float, float* %tmp16433, i64 1
+ %tmp16435 = getelementptr inbounds float, float* %tmp16434, i64 1
+ %tmp16436 = getelementptr inbounds float, float* %tmp16435, i64 1
+ %tmp16437 = getelementptr inbounds float, float* %tmp16436, i64 1
+ %tmp16438 = getelementptr inbounds float, float* %tmp16437, i64 1
+ %tmp16439 = getelementptr inbounds float, float* %tmp16438, i64 1
+ %tmp16440 = getelementptr inbounds float, float* %tmp16439, i64 1
+ %tmp16441 = getelementptr inbounds float, float* %tmp16440, i64 1
+ %tmp16442 = getelementptr inbounds float, float* %tmp16441, i64 1
+ %tmp16443 = getelementptr inbounds float, float* %tmp16442, i64 1
+ %tmp16444 = getelementptr inbounds float, float* %tmp16443, i64 1
+ %tmp16445 = getelementptr inbounds float, float* %tmp16444, i64 1
+ %tmp16446 = getelementptr inbounds float, float* %tmp16445, i64 1
+ %tmp16447 = getelementptr inbounds float, float* %tmp16446, i64 1
+ %tmp16448 = getelementptr inbounds float, float* %tmp16447, i64 1
+ %tmp16449 = getelementptr inbounds float, float* %tmp16448, i64 1
+ %tmp16450 = getelementptr inbounds float, float* %tmp16449, i64 1
+ %tmp16451 = getelementptr inbounds float, float* %tmp16450, i64 1
+ %tmp16452 = getelementptr inbounds float, float* %tmp16451, i64 1
+ %tmp16453 = getelementptr inbounds float, float* %tmp16452, i64 1
+ %tmp16454 = getelementptr inbounds float, float* %tmp16453, i64 1
+ %tmp16455 = getelementptr inbounds float, float* %tmp16454, i64 1
+ %tmp16456 = getelementptr inbounds float, float* %tmp16455, i64 1
+ %tmp16457 = getelementptr inbounds float, float* %tmp16456, i64 1
+ %tmp16458 = getelementptr inbounds float, float* %tmp16457, i64 1
+ %tmp16459 = getelementptr inbounds float, float* %tmp16458, i64 1
+ %tmp16460 = getelementptr inbounds float, float* %tmp16459, i64 1
+ %tmp16461 = getelementptr inbounds float, float* %tmp16460, i64 1
+ %tmp16462 = getelementptr inbounds float, float* %tmp16461, i64 1
+ %tmp16463 = getelementptr inbounds float, float* %tmp16462, i64 1
+ %tmp16464 = getelementptr inbounds float, float* %tmp16463, i64 1
+ %tmp16465 = getelementptr inbounds float, float* %tmp16464, i64 1
+ %tmp16466 = getelementptr inbounds float, float* %tmp16465, i64 1
+ %tmp16467 = getelementptr inbounds float, float* %tmp16466, i64 1
+ %tmp16468 = getelementptr inbounds float, float* %tmp16467, i64 1
+ %tmp16469 = getelementptr inbounds float, float* %tmp16468, i64 1
+ %tmp16470 = getelementptr inbounds float, float* %tmp16469, i64 1
+ %tmp16471 = getelementptr inbounds float, float* %tmp16470, i64 1
+ %tmp16472 = getelementptr inbounds float, float* %tmp16471, i64 1
+ %tmp16473 = getelementptr inbounds float, float* %tmp16472, i64 1
+ %tmp16474 = getelementptr inbounds float, float* %tmp16473, i64 1
+ %tmp16475 = getelementptr inbounds float, float* %tmp16474, i64 1
+ %tmp16476 = getelementptr inbounds float, float* %tmp16475, i64 1
+ %tmp16477 = getelementptr inbounds float, float* %tmp16476, i64 1
+ %tmp16478 = getelementptr inbounds float, float* %tmp16477, i64 1
+ %tmp16479 = getelementptr inbounds float, float* %tmp16478, i64 1
+ %tmp16480 = getelementptr inbounds float, float* %tmp16479, i64 1
+ %tmp16481 = getelementptr inbounds float, float* %tmp16480, i64 1
+ %tmp16482 = getelementptr inbounds float, float* %tmp16481, i64 1
+ %tmp16483 = getelementptr inbounds float, float* %tmp16482, i64 1
+ %tmp16484 = getelementptr inbounds float, float* %tmp16483, i64 1
+ %tmp16485 = getelementptr inbounds float, float* %tmp16484, i64 1
+ %tmp16486 = getelementptr inbounds float, float* %tmp16485, i64 1
+ %tmp16487 = getelementptr inbounds float, float* %tmp16486, i64 1
+ %tmp16488 = getelementptr inbounds float, float* %tmp16487, i64 1
+ %tmp16489 = getelementptr inbounds float, float* %tmp16488, i64 1
+ %tmp16490 = getelementptr inbounds float, float* %tmp16489, i64 1
+ %tmp16491 = getelementptr inbounds float, float* %tmp16490, i64 1
+ %tmp16492 = getelementptr inbounds float, float* %tmp16491, i64 1
+ %tmp16493 = getelementptr inbounds float, float* %tmp16492, i64 1
+ %tmp16494 = getelementptr inbounds float, float* %tmp16493, i64 1
+ %tmp16495 = getelementptr inbounds float, float* %tmp16494, i64 1
+ %tmp16496 = getelementptr inbounds float, float* %tmp16495, i64 1
+ %tmp16497 = getelementptr inbounds float, float* %tmp16496, i64 1
+ %tmp16498 = getelementptr inbounds float, float* %tmp16497, i64 1
+ %tmp16499 = getelementptr inbounds float, float* %tmp16498, i64 1
+ %tmp16500 = getelementptr inbounds float, float* %tmp16499, i64 1
+ %tmp16501 = getelementptr inbounds float, float* %tmp16500, i64 1
+ %tmp16502 = getelementptr inbounds float, float* %tmp16501, i64 1
+ %tmp16503 = getelementptr inbounds float, float* %tmp16502, i64 1
+ %tmp16504 = getelementptr inbounds float, float* %tmp16503, i64 1
+ %tmp16505 = getelementptr inbounds float, float* %tmp16504, i64 1
+ %tmp16506 = getelementptr inbounds float, float* %tmp16505, i64 1
+ %tmp16507 = getelementptr inbounds float, float* %tmp16506, i64 1
+ %tmp16508 = getelementptr inbounds float, float* %tmp16507, i64 1
+ %tmp16509 = getelementptr inbounds float, float* %tmp16508, i64 1
+ %tmp16510 = getelementptr inbounds float, float* %tmp16509, i64 1
+ %tmp16511 = getelementptr inbounds float, float* %tmp16510, i64 1
+ %tmp16512 = getelementptr inbounds float, float* %tmp16511, i64 1
+ %tmp16513 = getelementptr inbounds float, float* %tmp16512, i64 1
+ %tmp16514 = getelementptr inbounds float, float* %tmp16513, i64 1
+ %tmp16515 = getelementptr inbounds float, float* %tmp16514, i64 1
+ %tmp16516 = getelementptr inbounds float, float* %tmp16515, i64 1
+ %tmp16517 = getelementptr inbounds float, float* %tmp16516, i64 1
+ %tmp16518 = getelementptr inbounds float, float* %tmp16517, i64 1
+ %tmp16519 = getelementptr inbounds float, float* %tmp16518, i64 1
+ %tmp16520 = getelementptr inbounds float, float* %tmp16519, i64 1
+ %tmp16521 = getelementptr inbounds float, float* %tmp16520, i64 1
+ %tmp16522 = getelementptr inbounds float, float* %tmp16521, i64 1
+ %tmp16523 = getelementptr inbounds float, float* %tmp16522, i64 1
+ %tmp16524 = getelementptr inbounds float, float* %tmp16523, i64 1
+ %tmp16525 = getelementptr inbounds float, float* %tmp16524, i64 1
+ %tmp16526 = getelementptr inbounds float, float* %tmp16525, i64 1
+ %tmp16527 = getelementptr inbounds float, float* %tmp16526, i64 1
+ %tmp16528 = getelementptr inbounds float, float* %tmp16527, i64 1
+ %tmp16529 = getelementptr inbounds float, float* %tmp16528, i64 1
+ %tmp16530 = getelementptr inbounds float, float* %tmp16529, i64 1
+ %tmp16531 = getelementptr inbounds float, float* %tmp16530, i64 1
+ %tmp16532 = getelementptr inbounds float, float* %tmp16531, i64 1
+ %tmp16533 = getelementptr inbounds float, float* %tmp16532, i64 1
+ %tmp16534 = getelementptr inbounds float, float* %tmp16533, i64 1
+ %tmp16535 = getelementptr inbounds float, float* %tmp16534, i64 1
+ %tmp16536 = getelementptr inbounds float, float* %tmp16535, i64 1
+ %tmp16537 = getelementptr inbounds float, float* %tmp16536, i64 1
+ %tmp16538 = getelementptr inbounds float, float* %tmp16537, i64 1
+ %tmp16539 = getelementptr inbounds float, float* %tmp16538, i64 1
+ %tmp16540 = getelementptr inbounds float, float* %tmp16539, i64 1
+ %tmp16541 = getelementptr inbounds float, float* %tmp16540, i64 1
+ %tmp16542 = getelementptr inbounds float, float* %tmp16541, i64 1
+ %tmp16543 = getelementptr inbounds float, float* %tmp16542, i64 1
+ %tmp16544 = getelementptr inbounds float, float* %tmp16543, i64 1
+ %tmp16545 = getelementptr inbounds float, float* %tmp16544, i64 1
+ %tmp16546 = getelementptr inbounds float, float* %tmp16545, i64 1
+ %tmp16547 = getelementptr inbounds float, float* %tmp16546, i64 1
+ %tmp16548 = getelementptr inbounds float, float* %tmp16547, i64 1
+ %tmp16549 = getelementptr inbounds float, float* %tmp16548, i64 1
+ %tmp16550 = getelementptr inbounds float, float* %tmp16549, i64 1
+ %tmp16551 = getelementptr inbounds float, float* %tmp16550, i64 1
+ %tmp16552 = getelementptr inbounds float, float* %tmp16551, i64 1
+ %tmp16553 = getelementptr inbounds float, float* %tmp16552, i64 1
+ %tmp16554 = getelementptr inbounds float, float* %tmp16553, i64 1
+ %tmp16555 = getelementptr inbounds float, float* %tmp16554, i64 1
+ %tmp16556 = getelementptr inbounds float, float* %tmp16555, i64 1
+ %tmp16557 = getelementptr inbounds float, float* %tmp16556, i64 1
+ %tmp16558 = getelementptr inbounds float, float* %tmp16557, i64 1
+ %tmp16559 = getelementptr inbounds float, float* %tmp16558, i64 1
+ %tmp16560 = getelementptr inbounds float, float* %tmp16559, i64 1
+ %tmp16561 = getelementptr inbounds float, float* %tmp16560, i64 1
+ %tmp16562 = getelementptr inbounds float, float* %tmp16561, i64 1
+ %tmp16563 = getelementptr inbounds float, float* %tmp16562, i64 1
+ %tmp16564 = getelementptr inbounds float, float* %tmp16563, i64 1
+ %tmp16565 = getelementptr inbounds float, float* %tmp16564, i64 1
+ %tmp16566 = getelementptr inbounds float, float* %tmp16565, i64 1
+ %tmp16567 = getelementptr inbounds float, float* %tmp16566, i64 1
+ %tmp16568 = getelementptr inbounds float, float* %tmp16567, i64 1
+ %tmp16569 = getelementptr inbounds float, float* %tmp16568, i64 1
+ %tmp16570 = getelementptr inbounds float, float* %tmp16569, i64 1
+ %tmp16571 = getelementptr inbounds float, float* %tmp16570, i64 1
+ %tmp16572 = getelementptr inbounds float, float* %tmp16571, i64 1
+ %tmp16573 = getelementptr inbounds float, float* %tmp16572, i64 1
+ %tmp16574 = getelementptr inbounds float, float* %tmp16573, i64 1
+ %tmp16575 = getelementptr inbounds float, float* %tmp16574, i64 1
+ %tmp16576 = getelementptr inbounds float, float* %tmp16575, i64 1
+ %tmp16577 = getelementptr inbounds float, float* %tmp16576, i64 1
+ %tmp16578 = getelementptr inbounds float, float* %tmp16577, i64 1
+ %tmp16579 = getelementptr inbounds float, float* %tmp16578, i64 1
+ %tmp16580 = getelementptr inbounds float, float* %tmp16579, i64 1
+ %tmp16581 = getelementptr inbounds float, float* %tmp16580, i64 1
+ %tmp16582 = getelementptr inbounds float, float* %tmp16581, i64 1
+ %tmp16583 = getelementptr inbounds float, float* %tmp16582, i64 1
+ %tmp16584 = getelementptr inbounds float, float* %tmp16583, i64 1
+ %tmp16585 = getelementptr inbounds float, float* %tmp16584, i64 1
+ %tmp16586 = getelementptr inbounds float, float* %tmp16585, i64 1
+ %tmp16587 = getelementptr inbounds float, float* %tmp16586, i64 1
+ %tmp16588 = getelementptr inbounds float, float* %tmp16587, i64 1
+ %tmp16589 = getelementptr inbounds float, float* %tmp16588, i64 1
+ %tmp16590 = getelementptr inbounds float, float* %tmp16589, i64 1
+ %tmp16591 = getelementptr inbounds float, float* %tmp16590, i64 1
+ %tmp16592 = getelementptr inbounds float, float* %tmp16591, i64 1
+ %tmp16593 = getelementptr inbounds float, float* %tmp16592, i64 1
+ %tmp16594 = getelementptr inbounds float, float* %tmp16593, i64 1
+ %tmp16595 = getelementptr inbounds float, float* %tmp16594, i64 1
+ %tmp16596 = getelementptr inbounds float, float* %tmp16595, i64 1
+ %tmp16597 = getelementptr inbounds float, float* %tmp16596, i64 1
+ %tmp16598 = getelementptr inbounds float, float* %tmp16597, i64 1
+ %tmp16599 = getelementptr inbounds float, float* %tmp16598, i64 1
+ %tmp16600 = getelementptr inbounds float, float* %tmp16599, i64 1
+ %tmp16601 = getelementptr inbounds float, float* %tmp16600, i64 1
+ %tmp16602 = getelementptr inbounds float, float* %tmp16601, i64 1
+ %tmp16603 = getelementptr inbounds float, float* %tmp16602, i64 1
+ %tmp16604 = getelementptr inbounds float, float* %tmp16603, i64 1
+ %tmp16605 = getelementptr inbounds float, float* %tmp16604, i64 1
+ %tmp16606 = getelementptr inbounds float, float* %tmp16605, i64 1
+ %tmp16607 = getelementptr inbounds float, float* %tmp16606, i64 1
+ %tmp16608 = getelementptr inbounds float, float* %tmp16607, i64 1
+ %tmp16609 = getelementptr inbounds float, float* %tmp16608, i64 1
+ %tmp16610 = getelementptr inbounds float, float* %tmp16609, i64 1
+ %tmp16611 = getelementptr inbounds float, float* %tmp16610, i64 1
+ %tmp16612 = getelementptr inbounds float, float* %tmp16611, i64 1
+ %tmp16613 = getelementptr inbounds float, float* %tmp16612, i64 1
+ %tmp16614 = getelementptr inbounds float, float* %tmp16613, i64 1
+ %tmp16615 = getelementptr inbounds float, float* %tmp16614, i64 1
+ %tmp16616 = getelementptr inbounds float, float* %tmp16615, i64 1
+ %tmp16617 = getelementptr inbounds float, float* %tmp16616, i64 1
+ %tmp16618 = getelementptr inbounds float, float* %tmp16617, i64 1
+ %tmp16619 = getelementptr inbounds float, float* %tmp16618, i64 1
+ %tmp16620 = getelementptr inbounds float, float* %tmp16619, i64 1
+ %tmp16621 = getelementptr inbounds float, float* %tmp16620, i64 1
+ %tmp16622 = getelementptr inbounds float, float* %tmp16621, i64 1
+ %tmp16623 = getelementptr inbounds float, float* %tmp16622, i64 1
+ %tmp16624 = getelementptr inbounds float, float* %tmp16623, i64 1
+ %tmp16625 = getelementptr inbounds float, float* %tmp16624, i64 1
+ %tmp16626 = getelementptr inbounds float, float* %tmp16625, i64 1
+ %tmp16627 = getelementptr inbounds float, float* %tmp16626, i64 1
+ %tmp16628 = getelementptr inbounds float, float* %tmp16627, i64 1
+ %tmp16629 = getelementptr inbounds float, float* %tmp16628, i64 1
+ %tmp16630 = getelementptr inbounds float, float* %tmp16629, i64 1
+ %tmp16631 = getelementptr inbounds float, float* %tmp16630, i64 1
+ %tmp16632 = getelementptr inbounds float, float* %tmp16631, i64 1
+ %tmp16633 = getelementptr inbounds float, float* %tmp16632, i64 1
+ %tmp16634 = getelementptr inbounds float, float* %tmp16633, i64 1
+ %tmp16635 = getelementptr inbounds float, float* %tmp16634, i64 1
+ %tmp16636 = getelementptr inbounds float, float* %tmp16635, i64 1
+ %tmp16637 = getelementptr inbounds float, float* %tmp16636, i64 1
+ %tmp16638 = getelementptr inbounds float, float* %tmp16637, i64 1
+ %tmp16639 = getelementptr inbounds float, float* %tmp16638, i64 1
+ %tmp16640 = getelementptr inbounds float, float* %tmp16639, i64 1
+ %tmp16641 = getelementptr inbounds float, float* %tmp16640, i64 1
+ %tmp16642 = getelementptr inbounds float, float* %tmp16641, i64 1
+ %tmp16643 = getelementptr inbounds float, float* %tmp16642, i64 1
+ %tmp16644 = getelementptr inbounds float, float* %tmp16643, i64 1
+ %tmp16645 = getelementptr inbounds float, float* %tmp16644, i64 1
+ %tmp16646 = getelementptr inbounds float, float* %tmp16645, i64 1
+ %tmp16647 = getelementptr inbounds float, float* %tmp16646, i64 1
+ %tmp16648 = getelementptr inbounds float, float* %tmp16647, i64 1
+ %tmp16649 = getelementptr inbounds float, float* %tmp16648, i64 1
+ %tmp16650 = getelementptr inbounds float, float* %tmp16649, i64 1
+ %tmp16651 = getelementptr inbounds float, float* %tmp16650, i64 1
+ %tmp16652 = getelementptr inbounds float, float* %tmp16651, i64 1
+ %tmp16653 = getelementptr inbounds float, float* %tmp16652, i64 1
+ %tmp16654 = getelementptr inbounds float, float* %tmp16653, i64 1
+ %tmp16655 = getelementptr inbounds float, float* %tmp16654, i64 1
+ %tmp16656 = getelementptr inbounds float, float* %tmp16655, i64 1
+ %tmp16657 = getelementptr inbounds float, float* %tmp16656, i64 1
+ %tmp16658 = getelementptr inbounds float, float* %tmp16657, i64 1
+ %tmp16659 = getelementptr inbounds float, float* %tmp16658, i64 1
+ %tmp16660 = getelementptr inbounds float, float* %tmp16659, i64 1
+ %tmp16661 = getelementptr inbounds float, float* %tmp16660, i64 1
+ %tmp16662 = getelementptr inbounds float, float* %tmp16661, i64 1
+ %tmp16663 = getelementptr inbounds float, float* %tmp16662, i64 1
+ %tmp16664 = getelementptr inbounds float, float* %tmp16663, i64 1
+ %tmp16665 = getelementptr inbounds float, float* %tmp16664, i64 1
+ %tmp16666 = getelementptr inbounds float, float* %tmp16665, i64 1
+ %tmp16667 = getelementptr inbounds float, float* %tmp16666, i64 1
+ %tmp16668 = getelementptr inbounds float, float* %tmp16667, i64 1
+ %tmp16669 = getelementptr inbounds float, float* %tmp16668, i64 1
+ %tmp16670 = getelementptr inbounds float, float* %tmp16669, i64 1
+ %tmp16671 = getelementptr inbounds float, float* %tmp16670, i64 1
+ %tmp16672 = getelementptr inbounds float, float* %tmp16671, i64 1
+ %tmp16673 = getelementptr inbounds float, float* %tmp16672, i64 1
+ %tmp16674 = getelementptr inbounds float, float* %tmp16673, i64 1
+ %tmp16675 = getelementptr inbounds float, float* %tmp16674, i64 1
+ %tmp16676 = getelementptr inbounds float, float* %tmp16675, i64 1
+ %tmp16677 = getelementptr inbounds float, float* %tmp16676, i64 1
+ %tmp16678 = getelementptr inbounds float, float* %tmp16677, i64 1
+ %tmp16679 = getelementptr inbounds float, float* %tmp16678, i64 1
+ %tmp16680 = getelementptr inbounds float, float* %tmp16679, i64 1
+ %tmp16681 = getelementptr inbounds float, float* %tmp16680, i64 1
+ %tmp16682 = getelementptr inbounds float, float* %tmp16681, i64 1
+ %tmp16683 = getelementptr inbounds float, float* %tmp16682, i64 1
+ %tmp16684 = getelementptr inbounds float, float* %tmp16683, i64 1
+ %tmp16685 = getelementptr inbounds float, float* %tmp16684, i64 1
+ %tmp16686 = getelementptr inbounds float, float* %tmp16685, i64 1
+ %tmp16687 = getelementptr inbounds float, float* %tmp16686, i64 1
+ %tmp16688 = getelementptr inbounds float, float* %tmp16687, i64 1
+ %tmp16689 = getelementptr inbounds float, float* %tmp16688, i64 1
+ %tmp16690 = getelementptr inbounds float, float* %tmp16689, i64 1
+ %tmp16691 = getelementptr inbounds float, float* %tmp16690, i64 1
+ %tmp16692 = getelementptr inbounds float, float* %tmp16691, i64 1
+ %tmp16693 = getelementptr inbounds float, float* %tmp16692, i64 1
+ %tmp16694 = getelementptr inbounds float, float* %tmp16693, i64 1
+ %tmp16695 = getelementptr inbounds float, float* %tmp16694, i64 1
+ %tmp16696 = getelementptr inbounds float, float* %tmp16695, i64 1
+ %tmp16697 = getelementptr inbounds float, float* %tmp16696, i64 1
+ %tmp16698 = getelementptr inbounds float, float* %tmp16697, i64 1
+ %tmp16699 = getelementptr inbounds float, float* %tmp16698, i64 1
+ %tmp16700 = getelementptr inbounds float, float* %tmp16699, i64 1
+ %tmp16701 = getelementptr inbounds float, float* %tmp16700, i64 1
+ %tmp16702 = getelementptr inbounds float, float* %tmp16701, i64 1
+ %tmp16703 = getelementptr inbounds float, float* %tmp16702, i64 1
+ %tmp16704 = getelementptr inbounds float, float* %tmp16703, i64 1
+ %tmp16705 = getelementptr inbounds float, float* %tmp16704, i64 1
+ %tmp16706 = getelementptr inbounds float, float* %tmp16705, i64 1
+ %tmp16707 = getelementptr inbounds float, float* %tmp16706, i64 1
+ %tmp16708 = getelementptr inbounds float, float* %tmp16707, i64 1
+ %tmp16709 = getelementptr inbounds float, float* %tmp16708, i64 1
+ %tmp16710 = getelementptr inbounds float, float* %tmp16709, i64 1
+ %tmp16711 = getelementptr inbounds float, float* %tmp16710, i64 1
+ %tmp16712 = getelementptr inbounds float, float* %tmp16711, i64 1
+ %tmp16713 = getelementptr inbounds float, float* %tmp16712, i64 1
+ %tmp16714 = getelementptr inbounds float, float* %tmp16713, i64 1
+ %tmp16715 = getelementptr inbounds float, float* %tmp16714, i64 1
+ %tmp16716 = getelementptr inbounds float, float* %tmp16715, i64 1
+ %tmp16717 = getelementptr inbounds float, float* %tmp16716, i64 1
+ %tmp16718 = getelementptr inbounds float, float* %tmp16717, i64 1
+ %tmp16719 = getelementptr inbounds float, float* %tmp16718, i64 1
+ %tmp16720 = getelementptr inbounds float, float* %tmp16719, i64 1
+ %tmp16721 = getelementptr inbounds float, float* %tmp16720, i64 1
+ %tmp16722 = getelementptr inbounds float, float* %tmp16721, i64 1
+ %tmp16723 = getelementptr inbounds float, float* %tmp16722, i64 1
+ %tmp16724 = getelementptr inbounds float, float* %tmp16723, i64 1
+ %tmp16725 = getelementptr inbounds float, float* %tmp16724, i64 1
+ %tmp16726 = getelementptr inbounds float, float* %tmp16725, i64 1
+ %tmp16727 = getelementptr inbounds float, float* %tmp16726, i64 1
+ %tmp16728 = getelementptr inbounds float, float* %tmp16727, i64 1
+ %tmp16729 = getelementptr inbounds float, float* %tmp16728, i64 1
+ %tmp16730 = getelementptr inbounds float, float* %tmp16729, i64 1
+ %tmp16731 = getelementptr inbounds float, float* %tmp16730, i64 1
+ %tmp16732 = getelementptr inbounds float, float* %tmp16731, i64 1
+ %tmp16733 = getelementptr inbounds float, float* %tmp16732, i64 1
+ %tmp16734 = getelementptr inbounds float, float* %tmp16733, i64 1
+ %tmp16735 = getelementptr inbounds float, float* %tmp16734, i64 1
+ %tmp16736 = getelementptr inbounds float, float* %tmp16735, i64 1
+ %tmp16737 = getelementptr inbounds float, float* %tmp16736, i64 1
+ %tmp16738 = getelementptr inbounds float, float* %tmp16737, i64 1
+ %tmp16739 = getelementptr inbounds float, float* %tmp16738, i64 1
+ %tmp16740 = getelementptr inbounds float, float* %tmp16739, i64 1
+ %tmp16741 = getelementptr inbounds float, float* %tmp16740, i64 1
+ %tmp16742 = getelementptr inbounds float, float* %tmp16741, i64 1
+ %tmp16743 = getelementptr inbounds float, float* %tmp16742, i64 1
+ %tmp16744 = getelementptr inbounds float, float* %tmp16743, i64 1
+ %tmp16745 = getelementptr inbounds float, float* %tmp16744, i64 1
+ %tmp16746 = getelementptr inbounds float, float* %tmp16745, i64 1
+ %tmp16747 = getelementptr inbounds float, float* %tmp16746, i64 1
+ %tmp16748 = getelementptr inbounds float, float* %tmp16747, i64 1
+ %tmp16749 = getelementptr inbounds float, float* %tmp16748, i64 1
+ %tmp16750 = getelementptr inbounds float, float* %tmp16749, i64 1
+ %tmp16751 = getelementptr inbounds float, float* %tmp16750, i64 1
+ %tmp16752 = getelementptr inbounds float, float* %tmp16751, i64 1
+ %tmp16753 = getelementptr inbounds float, float* %tmp16752, i64 1
+ %tmp16754 = getelementptr inbounds float, float* %tmp16753, i64 1
+ %tmp16755 = getelementptr inbounds float, float* %tmp16754, i64 1
+ %tmp16756 = getelementptr inbounds float, float* %tmp16755, i64 1
+ %tmp16757 = getelementptr inbounds float, float* %tmp16756, i64 1
+ %tmp16758 = getelementptr inbounds float, float* %tmp16757, i64 1
+ %tmp16759 = getelementptr inbounds float, float* %tmp16758, i64 1
+ %tmp16760 = getelementptr inbounds float, float* %tmp16759, i64 1
+ %tmp16761 = getelementptr inbounds float, float* %tmp16760, i64 1
+ %tmp16762 = getelementptr inbounds float, float* %tmp16761, i64 1
+ %tmp16763 = getelementptr inbounds float, float* %tmp16762, i64 1
+ %tmp16764 = getelementptr inbounds float, float* %tmp16763, i64 1
+ %tmp16765 = getelementptr inbounds float, float* %tmp16764, i64 1
+ %tmp16766 = getelementptr inbounds float, float* %tmp16765, i64 1
+ %tmp16767 = getelementptr inbounds float, float* %tmp16766, i64 1
+ %tmp16768 = getelementptr inbounds float, float* %tmp16767, i64 1
+ %tmp16769 = getelementptr inbounds float, float* %tmp16768, i64 1
+ %tmp16770 = getelementptr inbounds float, float* %tmp16769, i64 1
+ %tmp16771 = getelementptr inbounds float, float* %tmp16770, i64 1
+ %tmp16772 = getelementptr inbounds float, float* %tmp16771, i64 1
+ %tmp16773 = getelementptr inbounds float, float* %tmp16772, i64 1
+ %tmp16774 = getelementptr inbounds float, float* %tmp16773, i64 1
+ %tmp16775 = getelementptr inbounds float, float* %tmp16774, i64 1
+ %tmp16776 = getelementptr inbounds float, float* %tmp16775, i64 1
+ %tmp16777 = getelementptr inbounds float, float* %tmp16776, i64 1
+ %tmp16778 = getelementptr inbounds float, float* %tmp16777, i64 1
+ %tmp16779 = getelementptr inbounds float, float* %tmp16778, i64 1
+ %tmp16780 = getelementptr inbounds float, float* %tmp16779, i64 1
+ %tmp16781 = getelementptr inbounds float, float* %tmp16780, i64 1
+ %tmp16782 = getelementptr inbounds float, float* %tmp16781, i64 1
+ %tmp16783 = getelementptr inbounds float, float* %tmp16782, i64 1
+ %tmp16784 = getelementptr inbounds float, float* %tmp16783, i64 1
+ %tmp16785 = getelementptr inbounds float, float* %tmp16784, i64 1
+ %tmp16786 = getelementptr inbounds float, float* %tmp16785, i64 1
+ %tmp16787 = getelementptr inbounds float, float* %tmp16786, i64 1
+ %tmp16788 = getelementptr inbounds float, float* %tmp16787, i64 1
+ %tmp16789 = getelementptr inbounds float, float* %tmp16788, i64 1
+ %tmp16790 = getelementptr inbounds float, float* %tmp16789, i64 1
+ %tmp16791 = getelementptr inbounds float, float* %tmp16790, i64 1
+ %tmp16792 = getelementptr inbounds float, float* %tmp16791, i64 1
+ %tmp16793 = getelementptr inbounds float, float* %tmp16792, i64 1
+ %tmp16794 = getelementptr inbounds float, float* %tmp16793, i64 1
+ %tmp16795 = getelementptr inbounds float, float* %tmp16794, i64 1
+ %tmp16796 = getelementptr inbounds float, float* %tmp16795, i64 1
+ %tmp16797 = getelementptr inbounds float, float* %tmp16796, i64 1
+ %tmp16798 = getelementptr inbounds float, float* %tmp16797, i64 1
+ %tmp16799 = getelementptr inbounds float, float* %tmp16798, i64 1
+ %tmp16800 = getelementptr inbounds float, float* %tmp16799, i64 1
+ %tmp16801 = getelementptr inbounds float, float* %tmp16800, i64 1
+ %tmp16802 = getelementptr inbounds float, float* %tmp16801, i64 1
+ %tmp16803 = getelementptr inbounds float, float* %tmp16802, i64 1
+ %tmp16804 = getelementptr inbounds float, float* %tmp16803, i64 1
+ %tmp16805 = getelementptr inbounds float, float* %tmp16804, i64 1
+ %tmp16806 = getelementptr inbounds float, float* %tmp16805, i64 1
+ %tmp16807 = getelementptr inbounds float, float* %tmp16806, i64 1
+ %tmp16808 = getelementptr inbounds float, float* %tmp16807, i64 1
+ %tmp16809 = getelementptr inbounds float, float* %tmp16808, i64 1
+ %tmp16810 = getelementptr inbounds float, float* %tmp16809, i64 1
+ %tmp16811 = getelementptr inbounds float, float* %tmp16810, i64 1
+ %tmp16812 = getelementptr inbounds float, float* %tmp16811, i64 1
+ %tmp16813 = getelementptr inbounds float, float* %tmp16812, i64 1
+ %tmp16814 = getelementptr inbounds float, float* %tmp16813, i64 1
+ %tmp16815 = getelementptr inbounds float, float* %tmp16814, i64 1
+ %tmp16816 = getelementptr inbounds float, float* %tmp16815, i64 1
+ %tmp16817 = getelementptr inbounds float, float* %tmp16816, i64 1
+ %tmp16818 = getelementptr inbounds float, float* %tmp16817, i64 1
+ %tmp16819 = getelementptr inbounds float, float* %tmp16818, i64 1
+ %tmp16820 = getelementptr inbounds float, float* %tmp16819, i64 1
+ %tmp16821 = getelementptr inbounds float, float* %tmp16820, i64 1
+ %tmp16822 = getelementptr inbounds float, float* %tmp16821, i64 1
+ %tmp16823 = getelementptr inbounds float, float* %tmp16822, i64 1
+ %tmp16824 = getelementptr inbounds float, float* %tmp16823, i64 1
+ %tmp16825 = getelementptr inbounds float, float* %tmp16824, i64 1
+ %tmp16826 = getelementptr inbounds float, float* %tmp16825, i64 1
+ %tmp16827 = getelementptr inbounds float, float* %tmp16826, i64 1
+ %tmp16828 = getelementptr inbounds float, float* %tmp16827, i64 1
+ %tmp16829 = getelementptr inbounds float, float* %tmp16828, i64 1
+ %tmp16830 = getelementptr inbounds float, float* %tmp16829, i64 1
+ %tmp16831 = getelementptr inbounds float, float* %tmp16830, i64 1
+ %tmp16832 = getelementptr inbounds float, float* %tmp16831, i64 1
+ %tmp16833 = getelementptr inbounds float, float* %tmp16832, i64 1
+ %tmp16834 = getelementptr inbounds float, float* %tmp16833, i64 1
+ %tmp16835 = getelementptr inbounds float, float* %tmp16834, i64 1
+ %tmp16836 = getelementptr inbounds float, float* %tmp16835, i64 1
+ %tmp16837 = getelementptr inbounds float, float* %tmp16836, i64 1
+ %tmp16838 = getelementptr inbounds float, float* %tmp16837, i64 1
+ %tmp16839 = getelementptr inbounds float, float* %tmp16838, i64 1
+ %tmp16840 = getelementptr inbounds float, float* %tmp16839, i64 1
+ %tmp16841 = getelementptr inbounds float, float* %tmp16840, i64 1
+ %tmp16842 = getelementptr inbounds float, float* %tmp16841, i64 1
+ %tmp16843 = getelementptr inbounds float, float* %tmp16842, i64 1
+ %tmp16844 = getelementptr inbounds float, float* %tmp16843, i64 1
+ %tmp16845 = getelementptr inbounds float, float* %tmp16844, i64 1
+ %tmp16846 = getelementptr inbounds float, float* %tmp16845, i64 1
+ %tmp16847 = getelementptr inbounds float, float* %tmp16846, i64 1
+ %tmp16848 = getelementptr inbounds float, float* %tmp16847, i64 1
+ %tmp16849 = getelementptr inbounds float, float* %tmp16848, i64 1
+ %tmp16850 = getelementptr inbounds float, float* %tmp16849, i64 1
+ %tmp16851 = getelementptr inbounds float, float* %tmp16850, i64 1
+ %tmp16852 = getelementptr inbounds float, float* %tmp16851, i64 1
+ %tmp16853 = getelementptr inbounds float, float* %tmp16852, i64 1
+ %tmp16854 = getelementptr inbounds float, float* %tmp16853, i64 1
+ %tmp16855 = getelementptr inbounds float, float* %tmp16854, i64 1
+ %tmp16856 = getelementptr inbounds float, float* %tmp16855, i64 1
+ %tmp16857 = getelementptr inbounds float, float* %tmp16856, i64 1
+ %tmp16858 = getelementptr inbounds float, float* %tmp16857, i64 1
+ %tmp16859 = getelementptr inbounds float, float* %tmp16858, i64 1
+ %tmp16860 = getelementptr inbounds float, float* %tmp16859, i64 1
+ %tmp16861 = getelementptr inbounds float, float* %tmp16860, i64 1
+ %tmp16862 = getelementptr inbounds float, float* %tmp16861, i64 1
+ %tmp16863 = getelementptr inbounds float, float* %tmp16862, i64 1
+ %tmp16864 = getelementptr inbounds float, float* %tmp16863, i64 1
+ %tmp16865 = getelementptr inbounds float, float* %tmp16864, i64 1
+ %tmp16866 = getelementptr inbounds float, float* %tmp16865, i64 1
+ %tmp16867 = getelementptr inbounds float, float* %tmp16866, i64 1
+ %tmp16868 = getelementptr inbounds float, float* %tmp16867, i64 1
+ %tmp16869 = getelementptr inbounds float, float* %tmp16868, i64 1
+ %tmp16870 = getelementptr inbounds float, float* %tmp16869, i64 1
+ %tmp16871 = getelementptr inbounds float, float* %tmp16870, i64 1
+ %tmp16872 = getelementptr inbounds float, float* %tmp16871, i64 1
+ %tmp16873 = getelementptr inbounds float, float* %tmp16872, i64 1
+ %tmp16874 = getelementptr inbounds float, float* %tmp16873, i64 1
+ %tmp16875 = getelementptr inbounds float, float* %tmp16874, i64 1
+ %tmp16876 = getelementptr inbounds float, float* %tmp16875, i64 1
+ %tmp16877 = getelementptr inbounds float, float* %tmp16876, i64 1
+ %tmp16878 = getelementptr inbounds float, float* %tmp16877, i64 1
+ %tmp16879 = getelementptr inbounds float, float* %tmp16878, i64 1
+ %tmp16880 = getelementptr inbounds float, float* %tmp16879, i64 1
+ %tmp16881 = getelementptr inbounds float, float* %tmp16880, i64 1
+ %tmp16882 = getelementptr inbounds float, float* %tmp16881, i64 1
+ %tmp16883 = getelementptr inbounds float, float* %tmp16882, i64 1
+ %tmp16884 = getelementptr inbounds float, float* %tmp16883, i64 1
+ %tmp16885 = getelementptr inbounds float, float* %tmp16884, i64 1
+ %tmp16886 = getelementptr inbounds float, float* %tmp16885, i64 1
+ %tmp16887 = getelementptr inbounds float, float* %tmp16886, i64 1
+ %tmp16888 = getelementptr inbounds float, float* %tmp16887, i64 1
+ %tmp16889 = getelementptr inbounds float, float* %tmp16888, i64 1
+ %tmp16890 = getelementptr inbounds float, float* %tmp16889, i64 1
+ %tmp16891 = getelementptr inbounds float, float* %tmp16890, i64 1
+ %tmp16892 = getelementptr inbounds float, float* %tmp16891, i64 1
+ %tmp16893 = getelementptr inbounds float, float* %tmp16892, i64 1
+ %tmp16894 = getelementptr inbounds float, float* %tmp16893, i64 1
+ %tmp16895 = getelementptr inbounds float, float* %tmp16894, i64 1
+ %tmp16896 = getelementptr inbounds float, float* %tmp16895, i64 1
+ %tmp16897 = getelementptr inbounds float, float* %tmp16896, i64 1
+ %tmp16898 = getelementptr inbounds float, float* %tmp16897, i64 1
+ %tmp16899 = getelementptr inbounds float, float* %tmp16898, i64 1
+ %tmp16900 = getelementptr inbounds float, float* %tmp16899, i64 1
+ %tmp16901 = getelementptr inbounds float, float* %tmp16900, i64 1
+ %tmp16902 = getelementptr inbounds float, float* %tmp16901, i64 1
+ %tmp16903 = getelementptr inbounds float, float* %tmp16902, i64 1
+ %tmp16904 = getelementptr inbounds float, float* %tmp16903, i64 1
+ %tmp16905 = getelementptr inbounds float, float* %tmp16904, i64 1
+ %tmp16906 = getelementptr inbounds float, float* %tmp16905, i64 1
+ %tmp16907 = getelementptr inbounds float, float* %tmp16906, i64 1
+ %tmp16908 = getelementptr inbounds float, float* %tmp16907, i64 1
+ %tmp16909 = getelementptr inbounds float, float* %tmp16908, i64 1
+ %tmp16910 = getelementptr inbounds float, float* %tmp16909, i64 1
+ %tmp16911 = getelementptr inbounds float, float* %tmp16910, i64 1
+ %tmp16912 = getelementptr inbounds float, float* %tmp16911, i64 1
+ %tmp16913 = getelementptr inbounds float, float* %tmp16912, i64 1
+ %tmp16914 = getelementptr inbounds float, float* %tmp16913, i64 1
+ %tmp16915 = getelementptr inbounds float, float* %tmp16914, i64 1
+ %tmp16916 = getelementptr inbounds float, float* %tmp16915, i64 1
+ %tmp16917 = getelementptr inbounds float, float* %tmp16916, i64 1
+ %tmp16918 = getelementptr inbounds float, float* %tmp16917, i64 1
+ %tmp16919 = getelementptr inbounds float, float* %tmp16918, i64 1
+ %tmp16920 = getelementptr inbounds float, float* %tmp16919, i64 1
+ %tmp16921 = getelementptr inbounds float, float* %tmp16920, i64 1
+ %tmp16922 = getelementptr inbounds float, float* %tmp16921, i64 1
+ %tmp16923 = getelementptr inbounds float, float* %tmp16922, i64 1
+ %tmp16924 = getelementptr inbounds float, float* %tmp16923, i64 1
+ %tmp16925 = getelementptr inbounds float, float* %tmp16924, i64 1
+ %tmp16926 = getelementptr inbounds float, float* %tmp16925, i64 1
+ %tmp16927 = getelementptr inbounds float, float* %tmp16926, i64 1
+ %tmp16928 = getelementptr inbounds float, float* %tmp16927, i64 1
+ %tmp16929 = getelementptr inbounds float, float* %tmp16928, i64 1
+ %tmp16930 = getelementptr inbounds float, float* %tmp16929, i64 1
+ %tmp16931 = getelementptr inbounds float, float* %tmp16930, i64 1
+ %tmp16932 = getelementptr inbounds float, float* %tmp16931, i64 1
+ %tmp16933 = getelementptr inbounds float, float* %tmp16932, i64 1
+ %tmp16934 = getelementptr inbounds float, float* %tmp16933, i64 1
+ %tmp16935 = getelementptr inbounds float, float* %tmp16934, i64 1
+ %tmp16936 = getelementptr inbounds float, float* %tmp16935, i64 1
+ %tmp16937 = getelementptr inbounds float, float* %tmp16936, i64 1
+ %tmp16938 = getelementptr inbounds float, float* %tmp16937, i64 1
+ %tmp16939 = getelementptr inbounds float, float* %tmp16938, i64 1
+ %tmp16940 = getelementptr inbounds float, float* %tmp16939, i64 1
+ %tmp16941 = getelementptr inbounds float, float* %tmp16940, i64 1
+ %tmp16942 = getelementptr inbounds float, float* %tmp16941, i64 1
+ %tmp16943 = getelementptr inbounds float, float* %tmp16942, i64 1
+ %tmp16944 = getelementptr inbounds float, float* %tmp16943, i64 1
+ %tmp16945 = getelementptr inbounds float, float* %tmp16944, i64 1
+ %tmp16946 = getelementptr inbounds float, float* %tmp16945, i64 1
+ %tmp16947 = getelementptr inbounds float, float* %tmp16946, i64 1
+ %tmp16948 = getelementptr inbounds float, float* %tmp16947, i64 1
+ %tmp16949 = getelementptr inbounds float, float* %tmp16948, i64 1
+ %tmp16950 = getelementptr inbounds float, float* %tmp16949, i64 1
+ %tmp16951 = getelementptr inbounds float, float* %tmp16950, i64 1
+ %tmp16952 = getelementptr inbounds float, float* %tmp16951, i64 1
+ %tmp16953 = getelementptr inbounds float, float* %tmp16952, i64 1
+ %tmp16954 = getelementptr inbounds float, float* %tmp16953, i64 1
+ %tmp16955 = getelementptr inbounds float, float* %tmp16954, i64 1
+ %tmp16956 = getelementptr inbounds float, float* %tmp16955, i64 1
+ %tmp16957 = getelementptr inbounds float, float* %tmp16956, i64 1
+ %tmp16958 = getelementptr inbounds float, float* %tmp16957, i64 1
+ %tmp16959 = getelementptr inbounds float, float* %tmp16958, i64 1
+ %tmp16960 = getelementptr inbounds float, float* %tmp16959, i64 1
+ %tmp16961 = getelementptr inbounds float, float* %tmp16960, i64 1
+ %tmp16962 = getelementptr inbounds float, float* %tmp16961, i64 1
+ %tmp16963 = getelementptr inbounds float, float* %tmp16962, i64 1
+ %tmp16964 = getelementptr inbounds float, float* %tmp16963, i64 1
+ %tmp16965 = getelementptr inbounds float, float* %tmp16964, i64 1
+ %tmp16966 = getelementptr inbounds float, float* %tmp16965, i64 1
+ %tmp16967 = getelementptr inbounds float, float* %tmp16966, i64 1
+ %tmp16968 = getelementptr inbounds float, float* %tmp16967, i64 1
+ %tmp16969 = getelementptr inbounds float, float* %tmp16968, i64 1
+ %tmp16970 = getelementptr inbounds float, float* %tmp16969, i64 1
+ %tmp16971 = getelementptr inbounds float, float* %tmp16970, i64 1
+ %tmp16972 = getelementptr inbounds float, float* %tmp16971, i64 1
+ %tmp16973 = getelementptr inbounds float, float* %tmp16972, i64 1
+ %tmp16974 = getelementptr inbounds float, float* %tmp16973, i64 1
+ %tmp16975 = getelementptr inbounds float, float* %tmp16974, i64 1
+ %tmp16976 = getelementptr inbounds float, float* %tmp16975, i64 1
+ %tmp16977 = getelementptr inbounds float, float* %tmp16976, i64 1
+ %tmp16978 = getelementptr inbounds float, float* %tmp16977, i64 1
+ %tmp16979 = getelementptr inbounds float, float* %tmp16978, i64 1
+ %tmp16980 = getelementptr inbounds float, float* %tmp16979, i64 1
+ %tmp16981 = getelementptr inbounds float, float* %tmp16980, i64 1
+ %tmp16982 = getelementptr inbounds float, float* %tmp16981, i64 1
+ %tmp16983 = getelementptr inbounds float, float* %tmp16982, i64 1
+ %tmp16984 = getelementptr inbounds float, float* %tmp16983, i64 1
+ %tmp16985 = getelementptr inbounds float, float* %tmp16984, i64 1
+ %tmp16986 = getelementptr inbounds float, float* %tmp16985, i64 1
+ %tmp16987 = getelementptr inbounds float, float* %tmp16986, i64 1
+ %tmp16988 = getelementptr inbounds float, float* %tmp16987, i64 1
+ %tmp16989 = getelementptr inbounds float, float* %tmp16988, i64 1
+ %tmp16990 = getelementptr inbounds float, float* %tmp16989, i64 1
+ %tmp16991 = getelementptr inbounds float, float* %tmp16990, i64 1
+ %tmp16992 = getelementptr inbounds float, float* %tmp16991, i64 1
+ %tmp16993 = getelementptr inbounds float, float* %tmp16992, i64 1
+ %tmp16994 = getelementptr inbounds float, float* %tmp16993, i64 1
+ %tmp16995 = getelementptr inbounds float, float* %tmp16994, i64 1
+ %tmp16996 = getelementptr inbounds float, float* %tmp16995, i64 1
+ %tmp16997 = getelementptr inbounds float, float* %tmp16996, i64 1
+ %tmp16998 = getelementptr inbounds float, float* %tmp16997, i64 1
+ %tmp16999 = getelementptr inbounds float, float* %tmp16998, i64 1
+ %tmp17000 = getelementptr inbounds float, float* %tmp16999, i64 1
+ %tmp17001 = getelementptr inbounds float, float* %tmp17000, i64 1
+ %tmp17002 = getelementptr inbounds float, float* %tmp17001, i64 1
+ %tmp17003 = getelementptr inbounds float, float* %tmp17002, i64 1
+ %tmp17004 = getelementptr inbounds float, float* %tmp17003, i64 1
+ %tmp17005 = getelementptr inbounds float, float* %tmp17004, i64 1
+ %tmp17006 = getelementptr inbounds float, float* %tmp17005, i64 1
+ %tmp17007 = getelementptr inbounds float, float* %tmp17006, i64 1
+ %tmp17008 = getelementptr inbounds float, float* %tmp17007, i64 1
+ %tmp17009 = getelementptr inbounds float, float* %tmp17008, i64 1
+ %tmp17010 = getelementptr inbounds float, float* %tmp17009, i64 1
+ %tmp17011 = getelementptr inbounds float, float* %tmp17010, i64 1
+ %tmp17012 = getelementptr inbounds float, float* %tmp17011, i64 1
+ %tmp17013 = getelementptr inbounds float, float* %tmp17012, i64 1
+ %tmp17014 = getelementptr inbounds float, float* %tmp17013, i64 1
+ %tmp17015 = getelementptr inbounds float, float* %tmp17014, i64 1
+ %tmp17016 = getelementptr inbounds float, float* %tmp17015, i64 1
+ %tmp17017 = getelementptr inbounds float, float* %tmp17016, i64 1
+ %tmp17018 = getelementptr inbounds float, float* %tmp17017, i64 1
+ %tmp17019 = getelementptr inbounds float, float* %tmp17018, i64 1
+ %tmp17020 = getelementptr inbounds float, float* %tmp17019, i64 1
+ %tmp17021 = getelementptr inbounds float, float* %tmp17020, i64 1
+ %tmp17022 = getelementptr inbounds float, float* %tmp17021, i64 1
+ %tmp17023 = getelementptr inbounds float, float* %tmp17022, i64 1
+ %tmp17024 = getelementptr inbounds float, float* %tmp17023, i64 1
+ %tmp17025 = getelementptr inbounds float, float* %tmp17024, i64 1
+ %tmp17026 = getelementptr inbounds float, float* %tmp17025, i64 1
+ %tmp17027 = getelementptr inbounds float, float* %tmp17026, i64 1
+ %tmp17028 = getelementptr inbounds float, float* %tmp17027, i64 1
+ %tmp17029 = getelementptr inbounds float, float* %tmp17028, i64 1
+ %tmp17030 = getelementptr inbounds float, float* %tmp17029, i64 1
+ %tmp17031 = getelementptr inbounds float, float* %tmp17030, i64 1
+ %tmp17032 = getelementptr inbounds float, float* %tmp17031, i64 1
+ %tmp17033 = getelementptr inbounds float, float* %tmp17032, i64 1
+ %tmp17034 = getelementptr inbounds float, float* %tmp17033, i64 1
+ %tmp17035 = getelementptr inbounds float, float* %tmp17034, i64 1
+ %tmp17036 = getelementptr inbounds float, float* %tmp17035, i64 1
+ %tmp17037 = getelementptr inbounds float, float* %tmp17036, i64 1
+ %tmp17038 = getelementptr inbounds float, float* %tmp17037, i64 1
+ %tmp17039 = getelementptr inbounds float, float* %tmp17038, i64 1
+ %tmp17040 = getelementptr inbounds float, float* %tmp17039, i64 1
+ %tmp17041 = getelementptr inbounds float, float* %tmp17040, i64 1
+ %tmp17042 = getelementptr inbounds float, float* %tmp17041, i64 1
+ %tmp17043 = getelementptr inbounds float, float* %tmp17042, i64 1
+ %tmp17044 = getelementptr inbounds float, float* %tmp17043, i64 1
+ %tmp17045 = getelementptr inbounds float, float* %tmp17044, i64 1
+ %tmp17046 = getelementptr inbounds float, float* %tmp17045, i64 1
+ %tmp17047 = getelementptr inbounds float, float* %tmp17046, i64 1
+ %tmp17048 = getelementptr inbounds float, float* %tmp17047, i64 1
+ %tmp17049 = getelementptr inbounds float, float* %tmp17048, i64 1
+ %tmp17050 = getelementptr inbounds float, float* %tmp17049, i64 1
+ %tmp17051 = getelementptr inbounds float, float* %tmp17050, i64 1
+ %tmp17052 = getelementptr inbounds float, float* %tmp17051, i64 1
+ %tmp17053 = getelementptr inbounds float, float* %tmp17052, i64 1
+ %tmp17054 = getelementptr inbounds float, float* %tmp17053, i64 1
+ %tmp17055 = getelementptr inbounds float, float* %tmp17054, i64 1
+ %tmp17056 = getelementptr inbounds float, float* %tmp17055, i64 1
+ %tmp17057 = getelementptr inbounds float, float* %tmp17056, i64 1
+ %tmp17058 = getelementptr inbounds float, float* %tmp17057, i64 1
+ %tmp17059 = getelementptr inbounds float, float* %tmp17058, i64 1
+ %tmp17060 = getelementptr inbounds float, float* %tmp17059, i64 1
+ %tmp17061 = getelementptr inbounds float, float* %tmp17060, i64 1
+ %tmp17062 = getelementptr inbounds float, float* %tmp17061, i64 1
+ %tmp17063 = getelementptr inbounds float, float* %tmp17062, i64 1
+ %tmp17064 = getelementptr inbounds float, float* %tmp17063, i64 1
+ %tmp17065 = getelementptr inbounds float, float* %tmp17064, i64 1
+ %tmp17066 = getelementptr inbounds float, float* %tmp17065, i64 1
+ %tmp17067 = getelementptr inbounds float, float* %tmp17066, i64 1
+ %tmp17068 = getelementptr inbounds float, float* %tmp17067, i64 1
+ %tmp17069 = getelementptr inbounds float, float* %tmp17068, i64 1
+ %tmp17070 = getelementptr inbounds float, float* %tmp17069, i64 1
+ %tmp17071 = getelementptr inbounds float, float* %tmp17070, i64 1
+ %tmp17072 = getelementptr inbounds float, float* %tmp17071, i64 1
+ %tmp17073 = getelementptr inbounds float, float* %tmp17072, i64 1
+ %tmp17074 = getelementptr inbounds float, float* %tmp17073, i64 1
+ %tmp17075 = getelementptr inbounds float, float* %tmp17074, i64 1
+ %tmp17076 = getelementptr inbounds float, float* %tmp17075, i64 1
+ %tmp17077 = getelementptr inbounds float, float* %tmp17076, i64 1
+ %tmp17078 = getelementptr inbounds float, float* %tmp17077, i64 1
+ %tmp17079 = getelementptr inbounds float, float* %tmp17078, i64 1
+ %tmp17080 = getelementptr inbounds float, float* %tmp17079, i64 1
+ %tmp17081 = getelementptr inbounds float, float* %tmp17080, i64 1
+ %tmp17082 = getelementptr inbounds float, float* %tmp17081, i64 1
+ %tmp17083 = getelementptr inbounds float, float* %tmp17082, i64 1
+ %tmp17084 = getelementptr inbounds float, float* %tmp17083, i64 1
+ %tmp17085 = getelementptr inbounds float, float* %tmp17084, i64 1
+ %tmp17086 = getelementptr inbounds float, float* %tmp17085, i64 1
+ %tmp17087 = getelementptr inbounds float, float* %tmp17086, i64 1
+ %tmp17088 = getelementptr inbounds float, float* %tmp17087, i64 1
+ %tmp17089 = getelementptr inbounds float, float* %tmp17088, i64 1
+ %tmp17090 = getelementptr inbounds float, float* %tmp17089, i64 1
+ %tmp17091 = getelementptr inbounds float, float* %tmp17090, i64 1
+ %tmp17092 = getelementptr inbounds float, float* %tmp17091, i64 1
+ %tmp17093 = getelementptr inbounds float, float* %tmp17092, i64 1
+ %tmp17094 = getelementptr inbounds float, float* %tmp17093, i64 1
+ %tmp17095 = getelementptr inbounds float, float* %tmp17094, i64 1
+ %tmp17096 = getelementptr inbounds float, float* %tmp17095, i64 1
+ %tmp17097 = getelementptr inbounds float, float* %tmp17096, i64 1
+ %tmp17098 = getelementptr inbounds float, float* %tmp17097, i64 1
+ %tmp17099 = getelementptr inbounds float, float* %tmp17098, i64 1
+ %tmp17100 = getelementptr inbounds float, float* %tmp17099, i64 1
+ %tmp17101 = getelementptr inbounds float, float* %tmp17100, i64 1
+ %tmp17102 = getelementptr inbounds float, float* %tmp17101, i64 1
+ %tmp17103 = getelementptr inbounds float, float* %tmp17102, i64 1
+ %tmp17104 = getelementptr inbounds float, float* %tmp17103, i64 1
+ %tmp17105 = getelementptr inbounds float, float* %tmp17104, i64 1
+ %tmp17106 = getelementptr inbounds float, float* %tmp17105, i64 1
+ %tmp17107 = getelementptr inbounds float, float* %tmp17106, i64 1
+ %tmp17108 = getelementptr inbounds float, float* %tmp17107, i64 1
+ %tmp17109 = getelementptr inbounds float, float* %tmp17108, i64 1
+ %tmp17110 = getelementptr inbounds float, float* %tmp17109, i64 1
+ %tmp17111 = getelementptr inbounds float, float* %tmp17110, i64 1
+ %tmp17112 = getelementptr inbounds float, float* %tmp17111, i64 1
+ %tmp17113 = getelementptr inbounds float, float* %tmp17112, i64 1
+ %tmp17114 = getelementptr inbounds float, float* %tmp17113, i64 1
+ %tmp17115 = getelementptr inbounds float, float* %tmp17114, i64 1
+ %tmp17116 = getelementptr inbounds float, float* %tmp17115, i64 1
+ %tmp17117 = getelementptr inbounds float, float* %tmp17116, i64 1
+ %tmp17118 = getelementptr inbounds float, float* %tmp17117, i64 1
+ %tmp17119 = getelementptr inbounds float, float* %tmp17118, i64 1
+ %tmp17120 = getelementptr inbounds float, float* %tmp17119, i64 1
+ %tmp17121 = getelementptr inbounds float, float* %tmp17120, i64 1
+ %tmp17122 = getelementptr inbounds float, float* %tmp17121, i64 1
+ %tmp17123 = getelementptr inbounds float, float* %tmp17122, i64 1
+ %tmp17124 = getelementptr inbounds float, float* %tmp17123, i64 1
+ %tmp17125 = getelementptr inbounds float, float* %tmp17124, i64 1
+ %tmp17126 = getelementptr inbounds float, float* %tmp17125, i64 1
+ %tmp17127 = getelementptr inbounds float, float* %tmp17126, i64 1
+ %tmp17128 = getelementptr inbounds float, float* %tmp17127, i64 1
+ %tmp17129 = getelementptr inbounds float, float* %tmp17128, i64 1
+ %tmp17130 = getelementptr inbounds float, float* %tmp17129, i64 1
+ %tmp17131 = getelementptr inbounds float, float* %tmp17130, i64 1
+ %tmp17132 = getelementptr inbounds float, float* %tmp17131, i64 1
+ %tmp17133 = getelementptr inbounds float, float* %tmp17132, i64 1
+ %tmp17134 = getelementptr inbounds float, float* %tmp17133, i64 1
+ %tmp17135 = getelementptr inbounds float, float* %tmp17134, i64 1
+ %tmp17136 = getelementptr inbounds float, float* %tmp17135, i64 1
+ %tmp17137 = getelementptr inbounds float, float* %tmp17136, i64 1
+ %tmp17138 = getelementptr inbounds float, float* %tmp17137, i64 1
+ %tmp17139 = getelementptr inbounds float, float* %tmp17138, i64 1
+ %tmp17140 = getelementptr inbounds float, float* %tmp17139, i64 1
+ %tmp17141 = getelementptr inbounds float, float* %tmp17140, i64 1
+ %tmp17142 = getelementptr inbounds float, float* %tmp17141, i64 1
+ %tmp17143 = getelementptr inbounds float, float* %tmp17142, i64 1
+ %tmp17144 = getelementptr inbounds float, float* %tmp17143, i64 1
+ %tmp17145 = getelementptr inbounds float, float* %tmp17144, i64 1
+ %tmp17146 = getelementptr inbounds float, float* %tmp17145, i64 1
+ %tmp17147 = getelementptr inbounds float, float* %tmp17146, i64 1
+ %tmp17148 = getelementptr inbounds float, float* %tmp17147, i64 1
+ %tmp17149 = getelementptr inbounds float, float* %tmp17148, i64 1
+ %tmp17150 = getelementptr inbounds float, float* %tmp17149, i64 1
+ %tmp17151 = getelementptr inbounds float, float* %tmp17150, i64 1
+ %tmp17152 = getelementptr inbounds float, float* %tmp17151, i64 1
+ %tmp17153 = getelementptr inbounds float, float* %tmp17152, i64 1
+ %tmp17154 = getelementptr inbounds float, float* %tmp17153, i64 1
+ %tmp17155 = getelementptr inbounds float, float* %tmp17154, i64 1
+ %tmp17156 = getelementptr inbounds float, float* %tmp17155, i64 1
+ %tmp17157 = getelementptr inbounds float, float* %tmp17156, i64 1
+ %tmp17158 = getelementptr inbounds float, float* %tmp17157, i64 1
+ %tmp17159 = getelementptr inbounds float, float* %tmp17158, i64 1
+ %tmp17160 = getelementptr inbounds float, float* %tmp17159, i64 1
+ %tmp17161 = getelementptr inbounds float, float* %tmp17160, i64 1
+ %tmp17162 = getelementptr inbounds float, float* %tmp17161, i64 1
+ %tmp17163 = getelementptr inbounds float, float* %tmp17162, i64 1
+ %tmp17164 = getelementptr inbounds float, float* %tmp17163, i64 1
+ %tmp17165 = getelementptr inbounds float, float* %tmp17164, i64 1
+ %tmp17166 = getelementptr inbounds float, float* %tmp17165, i64 1
+ %tmp17167 = getelementptr inbounds float, float* %tmp17166, i64 1
+ %tmp17168 = getelementptr inbounds float, float* %tmp17167, i64 1
+ %tmp17169 = getelementptr inbounds float, float* %tmp17168, i64 1
+ %tmp17170 = getelementptr inbounds float, float* %tmp17169, i64 1
+ %tmp17171 = getelementptr inbounds float, float* %tmp17170, i64 1
+ %tmp17172 = getelementptr inbounds float, float* %tmp17171, i64 1
+ %tmp17173 = getelementptr inbounds float, float* %tmp17172, i64 1
+ %tmp17174 = getelementptr inbounds float, float* %tmp17173, i64 1
+ %tmp17175 = getelementptr inbounds float, float* %tmp17174, i64 1
+ %tmp17176 = getelementptr inbounds float, float* %tmp17175, i64 1
+ %tmp17177 = getelementptr inbounds float, float* %tmp17176, i64 1
+ %tmp17178 = getelementptr inbounds float, float* %tmp17177, i64 1
+ %tmp17179 = getelementptr inbounds float, float* %tmp17178, i64 1
+ %tmp17180 = getelementptr inbounds float, float* %tmp17179, i64 1
+ %tmp17181 = getelementptr inbounds float, float* %tmp17180, i64 1
+ %tmp17182 = getelementptr inbounds float, float* %tmp17181, i64 1
+ %tmp17183 = getelementptr inbounds float, float* %tmp17182, i64 1
+ %tmp17184 = getelementptr inbounds float, float* %tmp17183, i64 1
+ %tmp17185 = getelementptr inbounds float, float* %tmp17184, i64 1
+ %tmp17186 = getelementptr inbounds float, float* %tmp17185, i64 1
+ %tmp17187 = getelementptr inbounds float, float* %tmp17186, i64 1
+ %tmp17188 = getelementptr inbounds float, float* %tmp17187, i64 1
+ %tmp17189 = getelementptr inbounds float, float* %tmp17188, i64 1
+ %tmp17190 = getelementptr inbounds float, float* %tmp17189, i64 1
+ %tmp17191 = getelementptr inbounds float, float* %tmp17190, i64 1
+ %tmp17192 = getelementptr inbounds float, float* %tmp17191, i64 1
+ %tmp17193 = getelementptr inbounds float, float* %tmp17192, i64 1
+ %tmp17194 = getelementptr inbounds float, float* %tmp17193, i64 1
+ %tmp17195 = getelementptr inbounds float, float* %tmp17194, i64 1
+ %tmp17196 = getelementptr inbounds float, float* %tmp17195, i64 1
+ %tmp17197 = getelementptr inbounds float, float* %tmp17196, i64 1
+ %tmp17198 = getelementptr inbounds float, float* %tmp17197, i64 1
+ %tmp17199 = getelementptr inbounds float, float* %tmp17198, i64 1
+ %tmp17200 = getelementptr inbounds float, float* %tmp17199, i64 1
+ %tmp17201 = getelementptr inbounds float, float* %tmp17200, i64 1
+ %tmp17202 = getelementptr inbounds float, float* %tmp17201, i64 1
+ %tmp17203 = getelementptr inbounds float, float* %tmp17202, i64 1
+ %tmp17204 = getelementptr inbounds float, float* %tmp17203, i64 1
+ %tmp17205 = getelementptr inbounds float, float* %tmp17204, i64 1
+ %tmp17206 = getelementptr inbounds float, float* %tmp17205, i64 1
+ %tmp17207 = getelementptr inbounds float, float* %tmp17206, i64 1
+ %tmp17208 = getelementptr inbounds float, float* %tmp17207, i64 1
+ %tmp17209 = getelementptr inbounds float, float* %tmp17208, i64 1
+ %tmp17210 = getelementptr inbounds float, float* %tmp17209, i64 1
+ %tmp17211 = getelementptr inbounds float, float* %tmp17210, i64 1
+ %tmp17212 = getelementptr inbounds float, float* %tmp17211, i64 1
+ %tmp17213 = getelementptr inbounds float, float* %tmp17212, i64 1
+ %tmp17214 = getelementptr inbounds float, float* %tmp17213, i64 1
+ %tmp17215 = getelementptr inbounds float, float* %tmp17214, i64 1
+ %tmp17216 = getelementptr inbounds float, float* %tmp17215, i64 1
+ %tmp17217 = getelementptr inbounds float, float* %tmp17216, i64 1
+ %tmp17218 = getelementptr inbounds float, float* %tmp17217, i64 1
+ %tmp17219 = getelementptr inbounds float, float* %tmp17218, i64 1
+ %tmp17220 = getelementptr inbounds float, float* %tmp17219, i64 1
+ %tmp17221 = getelementptr inbounds float, float* %tmp17220, i64 1
+ %tmp17222 = getelementptr inbounds float, float* %tmp17221, i64 1
+ %tmp17223 = getelementptr inbounds float, float* %tmp17222, i64 1
+ %tmp17224 = getelementptr inbounds float, float* %tmp17223, i64 1
+ %tmp17225 = getelementptr inbounds float, float* %tmp17224, i64 1
+ %tmp17226 = getelementptr inbounds float, float* %tmp17225, i64 1
+ %tmp17227 = getelementptr inbounds float, float* %tmp17226, i64 1
+ %tmp17228 = getelementptr inbounds float, float* %tmp17227, i64 1
+ %tmp17229 = getelementptr inbounds float, float* %tmp17228, i64 1
+ %tmp17230 = getelementptr inbounds float, float* %tmp17229, i64 1
+ %tmp17231 = getelementptr inbounds float, float* %tmp17230, i64 1
+ %tmp17232 = getelementptr inbounds float, float* %tmp17231, i64 1
+ %tmp17233 = getelementptr inbounds float, float* %tmp17232, i64 1
+ %tmp17234 = getelementptr inbounds float, float* %tmp17233, i64 1
+ %tmp17235 = getelementptr inbounds float, float* %tmp17234, i64 1
+ %tmp17236 = getelementptr inbounds float, float* %tmp17235, i64 1
+ %tmp17237 = getelementptr inbounds float, float* %tmp17236, i64 1
+ %tmp17238 = getelementptr inbounds float, float* %tmp17237, i64 1
+ %tmp17239 = getelementptr inbounds float, float* %tmp17238, i64 1
+ %tmp17240 = getelementptr inbounds float, float* %tmp17239, i64 1
+ %tmp17241 = getelementptr inbounds float, float* %tmp17240, i64 1
+ %tmp17242 = getelementptr inbounds float, float* %tmp17241, i64 1
+ %tmp17243 = getelementptr inbounds float, float* %tmp17242, i64 1
+ %tmp17244 = getelementptr inbounds float, float* %tmp17243, i64 1
+ %tmp17245 = getelementptr inbounds float, float* %tmp17244, i64 1
+ %tmp17246 = getelementptr inbounds float, float* %tmp17245, i64 1
+ %tmp17247 = getelementptr inbounds float, float* %tmp17246, i64 1
+ %tmp17248 = getelementptr inbounds float, float* %tmp17247, i64 1
+ %tmp17249 = getelementptr inbounds float, float* %tmp17248, i64 1
+ %tmp17250 = getelementptr inbounds float, float* %tmp17249, i64 1
+ %tmp17251 = getelementptr inbounds float, float* %tmp17250, i64 1
+ %tmp17252 = getelementptr inbounds float, float* %tmp17251, i64 1
+ %tmp17253 = getelementptr inbounds float, float* %tmp17252, i64 1
+ %tmp17254 = getelementptr inbounds float, float* %tmp17253, i64 1
+ %tmp17255 = getelementptr inbounds float, float* %tmp17254, i64 1
+ %tmp17256 = getelementptr inbounds float, float* %tmp17255, i64 1
+ %tmp17257 = getelementptr inbounds float, float* %tmp17256, i64 1
+ %tmp17258 = getelementptr inbounds float, float* %tmp17257, i64 1
+ %tmp17259 = getelementptr inbounds float, float* %tmp17258, i64 1
+ %tmp17260 = getelementptr inbounds float, float* %tmp17259, i64 1
+ %tmp17261 = getelementptr inbounds float, float* %tmp17260, i64 1
+ %tmp17262 = getelementptr inbounds float, float* %tmp17261, i64 1
+ %tmp17263 = getelementptr inbounds float, float* %tmp17262, i64 1
+ %tmp17264 = getelementptr inbounds float, float* %tmp17263, i64 1
+ %tmp17265 = getelementptr inbounds float, float* %tmp17264, i64 1
+ %tmp17266 = getelementptr inbounds float, float* %tmp17265, i64 1
+ %tmp17267 = getelementptr inbounds float, float* %tmp17266, i64 1
+ %tmp17268 = getelementptr inbounds float, float* %tmp17267, i64 1
+ %tmp17269 = getelementptr inbounds float, float* %tmp17268, i64 1
+ %tmp17270 = getelementptr inbounds float, float* %tmp17269, i64 1
+ %tmp17271 = getelementptr inbounds float, float* %tmp17270, i64 1
+ %tmp17272 = getelementptr inbounds float, float* %tmp17271, i64 1
+ %tmp17273 = getelementptr inbounds float, float* %tmp17272, i64 1
+ %tmp17274 = getelementptr inbounds float, float* %tmp17273, i64 1
+ %tmp17275 = getelementptr inbounds float, float* %tmp17274, i64 1
+ %tmp17276 = getelementptr inbounds float, float* %tmp17275, i64 1
+ %tmp17277 = getelementptr inbounds float, float* %tmp17276, i64 1
+ %tmp17278 = getelementptr inbounds float, float* %tmp17277, i64 1
+ %tmp17279 = getelementptr inbounds float, float* %tmp17278, i64 1
+ %tmp17280 = getelementptr inbounds float, float* %tmp17279, i64 1
+ %tmp17281 = getelementptr inbounds float, float* %tmp17280, i64 1
+ %tmp17282 = getelementptr inbounds float, float* %tmp17281, i64 1
+ %tmp17283 = getelementptr inbounds float, float* %tmp17282, i64 1
+ %tmp17284 = getelementptr inbounds float, float* %tmp17283, i64 1
+ %tmp17285 = getelementptr inbounds float, float* %tmp17284, i64 1
+ %tmp17286 = getelementptr inbounds float, float* %tmp17285, i64 1
+ %tmp17287 = getelementptr inbounds float, float* %tmp17286, i64 1
+ %tmp17288 = getelementptr inbounds float, float* %tmp17287, i64 1
+ %tmp17289 = getelementptr inbounds float, float* %tmp17288, i64 1
+ %tmp17290 = getelementptr inbounds float, float* %tmp17289, i64 1
+ %tmp17291 = getelementptr inbounds float, float* %tmp17290, i64 1
+ %tmp17292 = getelementptr inbounds float, float* %tmp17291, i64 1
+ %tmp17293 = getelementptr inbounds float, float* %tmp17292, i64 1
+ %tmp17294 = getelementptr inbounds float, float* %tmp17293, i64 1
+ %tmp17295 = getelementptr inbounds float, float* %tmp17294, i64 1
+ %tmp17296 = getelementptr inbounds float, float* %tmp17295, i64 1
+ %tmp17297 = getelementptr inbounds float, float* %tmp17296, i64 1
+ %tmp17298 = getelementptr inbounds float, float* %tmp17297, i64 1
+ %tmp17299 = getelementptr inbounds float, float* %tmp17298, i64 1
+ %tmp17300 = getelementptr inbounds float, float* %tmp17299, i64 1
+ %tmp17301 = getelementptr inbounds float, float* %tmp17300, i64 1
+ %tmp17302 = getelementptr inbounds float, float* %tmp17301, i64 1
+ %tmp17303 = getelementptr inbounds float, float* %tmp17302, i64 1
+ %tmp17304 = getelementptr inbounds float, float* %tmp17303, i64 1
+ %tmp17305 = getelementptr inbounds float, float* %tmp17304, i64 1
+ %tmp17306 = getelementptr inbounds float, float* %tmp17305, i64 1
+ %tmp17307 = getelementptr inbounds float, float* %tmp17306, i64 1
+ %tmp17308 = getelementptr inbounds float, float* %tmp17307, i64 1
+ %tmp17309 = getelementptr inbounds float, float* %tmp17308, i64 1
+ %tmp17310 = getelementptr inbounds float, float* %tmp17309, i64 1
+ %tmp17311 = getelementptr inbounds float, float* %tmp17310, i64 1
+ %tmp17312 = getelementptr inbounds float, float* %tmp17311, i64 1
+ %tmp17313 = getelementptr inbounds float, float* %tmp17312, i64 1
+ %tmp17314 = getelementptr inbounds float, float* %tmp17313, i64 1
+ %tmp17315 = getelementptr inbounds float, float* %tmp17314, i64 1
+ %tmp17316 = getelementptr inbounds float, float* %tmp17315, i64 1
+ %tmp17317 = getelementptr inbounds float, float* %tmp17316, i64 1
+ %tmp17318 = getelementptr inbounds float, float* %tmp17317, i64 1
+ %tmp17319 = getelementptr inbounds float, float* %tmp17318, i64 1
+ %tmp17320 = getelementptr inbounds float, float* %tmp17319, i64 1
+ %tmp17321 = getelementptr inbounds float, float* %tmp17320, i64 1
+ %tmp17322 = getelementptr inbounds float, float* %tmp17321, i64 1
+ %tmp17323 = getelementptr inbounds float, float* %tmp17322, i64 1
+ %tmp17324 = getelementptr inbounds float, float* %tmp17323, i64 1
+ %tmp17325 = getelementptr inbounds float, float* %tmp17324, i64 1
+ %tmp17326 = getelementptr inbounds float, float* %tmp17325, i64 1
+ %tmp17327 = getelementptr inbounds float, float* %tmp17326, i64 1
+ %tmp17328 = getelementptr inbounds float, float* %tmp17327, i64 1
+ %tmp17329 = getelementptr inbounds float, float* %tmp17328, i64 1
+ %tmp17330 = getelementptr inbounds float, float* %tmp17329, i64 1
+ %tmp17331 = getelementptr inbounds float, float* %tmp17330, i64 1
+ %tmp17332 = getelementptr inbounds float, float* %tmp17331, i64 1
+ %tmp17333 = getelementptr inbounds float, float* %tmp17332, i64 1
+ %tmp17334 = getelementptr inbounds float, float* %tmp17333, i64 1
+ %tmp17335 = getelementptr inbounds float, float* %tmp17334, i64 1
+ %tmp17336 = getelementptr inbounds float, float* %tmp17335, i64 1
+ %tmp17337 = getelementptr inbounds float, float* %tmp17336, i64 1
+ %tmp17338 = getelementptr inbounds float, float* %tmp17337, i64 1
+ %tmp17339 = getelementptr inbounds float, float* %tmp17338, i64 1
+ %tmp17340 = getelementptr inbounds float, float* %tmp17339, i64 1
+ %tmp17341 = getelementptr inbounds float, float* %tmp17340, i64 1
+ %tmp17342 = getelementptr inbounds float, float* %tmp17341, i64 1
+ %tmp17343 = getelementptr inbounds float, float* %tmp17342, i64 1
+ %tmp17344 = getelementptr inbounds float, float* %tmp17343, i64 1
+ %tmp17345 = getelementptr inbounds float, float* %tmp17344, i64 1
+ %tmp17346 = getelementptr inbounds float, float* %tmp17345, i64 1
+ %tmp17347 = getelementptr inbounds float, float* %tmp17346, i64 1
+ %tmp17348 = getelementptr inbounds float, float* %tmp17347, i64 1
+ %tmp17349 = getelementptr inbounds float, float* %tmp17348, i64 1
+ %tmp17350 = getelementptr inbounds float, float* %tmp17349, i64 1
+ %tmp17351 = getelementptr inbounds float, float* %tmp17350, i64 1
+ %tmp17352 = getelementptr inbounds float, float* %tmp17351, i64 1
+ %tmp17353 = getelementptr inbounds float, float* %tmp17352, i64 1
+ %tmp17354 = getelementptr inbounds float, float* %tmp17353, i64 1
+ %tmp17355 = getelementptr inbounds float, float* %tmp17354, i64 1
+ %tmp17356 = getelementptr inbounds float, float* %tmp17355, i64 1
+ %tmp17357 = getelementptr inbounds float, float* %tmp17356, i64 1
+ %tmp17358 = getelementptr inbounds float, float* %tmp17357, i64 1
+ %tmp17359 = getelementptr inbounds float, float* %tmp17358, i64 1
+ %tmp17360 = getelementptr inbounds float, float* %tmp17359, i64 1
+ %tmp17361 = getelementptr inbounds float, float* %tmp17360, i64 1
+ %tmp17362 = getelementptr inbounds float, float* %tmp17361, i64 1
+ %tmp17363 = getelementptr inbounds float, float* %tmp17362, i64 1
+ %tmp17364 = getelementptr inbounds float, float* %tmp17363, i64 1
+ %tmp17365 = getelementptr inbounds float, float* %tmp17364, i64 1
+ %tmp17366 = getelementptr inbounds float, float* %tmp17365, i64 1
+ %tmp17367 = getelementptr inbounds float, float* %tmp17366, i64 1
+ %tmp17368 = getelementptr inbounds float, float* %tmp17367, i64 1
+ %tmp17369 = getelementptr inbounds float, float* %tmp17368, i64 1
+ %tmp17370 = getelementptr inbounds float, float* %tmp17369, i64 1
+ %tmp17371 = getelementptr inbounds float, float* %tmp17370, i64 1
+ %tmp17372 = getelementptr inbounds float, float* %tmp17371, i64 1
+ %tmp17373 = getelementptr inbounds float, float* %tmp17372, i64 1
+ %tmp17374 = getelementptr inbounds float, float* %tmp17373, i64 1
+ %tmp17375 = getelementptr inbounds float, float* %tmp17374, i64 1
+ %tmp17376 = getelementptr inbounds float, float* %tmp17375, i64 1
+ %tmp17377 = getelementptr inbounds float, float* %tmp17376, i64 1
+ %tmp17378 = getelementptr inbounds float, float* %tmp17377, i64 1
+ %tmp17379 = getelementptr inbounds float, float* %tmp17378, i64 1
+ %tmp17380 = getelementptr inbounds float, float* %tmp17379, i64 1
+ %tmp17381 = getelementptr inbounds float, float* %tmp17380, i64 1
+ %tmp17382 = getelementptr inbounds float, float* %tmp17381, i64 1
+ %tmp17383 = getelementptr inbounds float, float* %tmp17382, i64 1
+ %tmp17384 = getelementptr inbounds float, float* %tmp17383, i64 1
+ %tmp17385 = getelementptr inbounds float, float* %tmp17384, i64 1
+ %tmp17386 = getelementptr inbounds float, float* %tmp17385, i64 1
+ %tmp17387 = getelementptr inbounds float, float* %tmp17386, i64 1
+ %tmp17388 = getelementptr inbounds float, float* %tmp17387, i64 1
+ %tmp17389 = getelementptr inbounds float, float* %tmp17388, i64 1
+ %tmp17390 = getelementptr inbounds float, float* %tmp17389, i64 1
+ %tmp17391 = getelementptr inbounds float, float* %tmp17390, i64 1
+ %tmp17392 = getelementptr inbounds float, float* %tmp17391, i64 1
+ %tmp17393 = getelementptr inbounds float, float* %tmp17392, i64 1
+ %tmp17394 = getelementptr inbounds float, float* %tmp17393, i64 1
+ %tmp17395 = getelementptr inbounds float, float* %tmp17394, i64 1
+ %tmp17396 = getelementptr inbounds float, float* %tmp17395, i64 1
+ %tmp17397 = getelementptr inbounds float, float* %tmp17396, i64 1
+ %tmp17398 = getelementptr inbounds float, float* %tmp17397, i64 1
+ %tmp17399 = getelementptr inbounds float, float* %tmp17398, i64 1
+ %tmp17400 = getelementptr inbounds float, float* %tmp17399, i64 1
+ %tmp17401 = getelementptr inbounds float, float* %tmp17400, i64 1
+ %tmp17402 = getelementptr inbounds float, float* %tmp17401, i64 1
+ %tmp17403 = getelementptr inbounds float, float* %tmp17402, i64 1
+ %tmp17404 = getelementptr inbounds float, float* %tmp17403, i64 1
+ %tmp17405 = getelementptr inbounds float, float* %tmp17404, i64 1
+ %tmp17406 = getelementptr inbounds float, float* %tmp17405, i64 1
+ %tmp17407 = getelementptr inbounds float, float* %tmp17406, i64 1
+ %tmp17408 = getelementptr inbounds float, float* %tmp17407, i64 1
+ %tmp17409 = getelementptr inbounds float, float* %tmp17408, i64 1
+ %tmp17410 = getelementptr inbounds float, float* %tmp17409, i64 1
+ %tmp17411 = getelementptr inbounds float, float* %tmp17410, i64 1
+ %tmp17412 = getelementptr inbounds float, float* %tmp17411, i64 1
+ %tmp17413 = getelementptr inbounds float, float* %tmp17412, i64 1
+ %tmp17414 = getelementptr inbounds float, float* %tmp17413, i64 1
+ %tmp17415 = getelementptr inbounds float, float* %tmp17414, i64 1
+ %tmp17416 = getelementptr inbounds float, float* %tmp17415, i64 1
+ %tmp17417 = getelementptr inbounds float, float* %tmp17416, i64 1
+ %tmp17418 = getelementptr inbounds float, float* %tmp17417, i64 1
+ %tmp17419 = getelementptr inbounds float, float* %tmp17418, i64 1
+ %tmp17420 = getelementptr inbounds float, float* %tmp17419, i64 1
+ %tmp17421 = getelementptr inbounds float, float* %tmp17420, i64 1
+ %tmp17422 = getelementptr inbounds float, float* %tmp17421, i64 1
+ %tmp17423 = getelementptr inbounds float, float* %tmp17422, i64 1
+ %tmp17424 = getelementptr inbounds float, float* %tmp17423, i64 1
+ %tmp17425 = getelementptr inbounds float, float* %tmp17424, i64 1
+ %tmp17426 = getelementptr inbounds float, float* %tmp17425, i64 1
+ %tmp17427 = getelementptr inbounds float, float* %tmp17426, i64 1
+ %tmp17428 = getelementptr inbounds float, float* %tmp17427, i64 1
+ %tmp17429 = getelementptr inbounds float, float* %tmp17428, i64 1
+ %tmp17430 = getelementptr inbounds float, float* %tmp17429, i64 1
+ %tmp17431 = getelementptr inbounds float, float* %tmp17430, i64 1
+ %tmp17432 = getelementptr inbounds float, float* %tmp17431, i64 1
+ %tmp17433 = getelementptr inbounds float, float* %tmp17432, i64 1
+ %tmp17434 = getelementptr inbounds float, float* %tmp17433, i64 1
+ %tmp17435 = getelementptr inbounds float, float* %tmp17434, i64 1
+ %tmp17436 = getelementptr inbounds float, float* %tmp17435, i64 1
+ %tmp17437 = getelementptr inbounds float, float* %tmp17436, i64 1
+ %tmp17438 = getelementptr inbounds float, float* %tmp17437, i64 1
+ %tmp17439 = getelementptr inbounds float, float* %tmp17438, i64 1
+ %tmp17440 = getelementptr inbounds float, float* %tmp17439, i64 1
+ %tmp17441 = getelementptr inbounds float, float* %tmp17440, i64 1
+ %tmp17442 = getelementptr inbounds float, float* %tmp17441, i64 1
+ %tmp17443 = getelementptr inbounds float, float* %tmp17442, i64 1
+ %tmp17444 = getelementptr inbounds float, float* %tmp17443, i64 1
+ %tmp17445 = getelementptr inbounds float, float* %tmp17444, i64 1
+ %tmp17446 = getelementptr inbounds float, float* %tmp17445, i64 1
+ %tmp17447 = getelementptr inbounds float, float* %tmp17446, i64 1
+ %tmp17448 = getelementptr inbounds float, float* %tmp17447, i64 1
+ %tmp17449 = getelementptr inbounds float, float* %tmp17448, i64 1
+ %tmp17450 = getelementptr inbounds float, float* %tmp17449, i64 1
+ %tmp17451 = getelementptr inbounds float, float* %tmp17450, i64 1
+ %tmp17452 = getelementptr inbounds float, float* %tmp17451, i64 1
+ %tmp17453 = getelementptr inbounds float, float* %tmp17452, i64 1
+ %tmp17454 = getelementptr inbounds float, float* %tmp17453, i64 1
+ %tmp17455 = getelementptr inbounds float, float* %tmp17454, i64 1
+ %tmp17456 = getelementptr inbounds float, float* %tmp17455, i64 1
+ %tmp17457 = getelementptr inbounds float, float* %tmp17456, i64 1
+ %tmp17458 = getelementptr inbounds float, float* %tmp17457, i64 1
+ %tmp17459 = getelementptr inbounds float, float* %tmp17458, i64 1
+ %tmp17460 = getelementptr inbounds float, float* %tmp17459, i64 1
+ %tmp17461 = getelementptr inbounds float, float* %tmp17460, i64 1
+ %tmp17462 = getelementptr inbounds float, float* %tmp17461, i64 1
+ %tmp17463 = getelementptr inbounds float, float* %tmp17462, i64 1
+ %tmp17464 = getelementptr inbounds float, float* %tmp17463, i64 1
+ %tmp17465 = getelementptr inbounds float, float* %tmp17464, i64 1
+ %tmp17466 = getelementptr inbounds float, float* %tmp17465, i64 1
+ %tmp17467 = getelementptr inbounds float, float* %tmp17466, i64 1
+ %tmp17468 = getelementptr inbounds float, float* %tmp17467, i64 1
+ %tmp17469 = getelementptr inbounds float, float* %tmp17468, i64 1
+ %tmp17470 = getelementptr inbounds float, float* %tmp17469, i64 1
+ %tmp17471 = getelementptr inbounds float, float* %tmp17470, i64 1
+ %tmp17472 = getelementptr inbounds float, float* %tmp17471, i64 1
+ %tmp17473 = getelementptr inbounds float, float* %tmp17472, i64 1
+ %tmp17474 = getelementptr inbounds float, float* %tmp17473, i64 1
+ %tmp17475 = getelementptr inbounds float, float* %tmp17474, i64 1
+ %tmp17476 = getelementptr inbounds float, float* %tmp17475, i64 1
+ %tmp17477 = getelementptr inbounds float, float* %tmp17476, i64 1
+ %tmp17478 = getelementptr inbounds float, float* %tmp17477, i64 1
+ %tmp17479 = getelementptr inbounds float, float* %tmp17478, i64 1
+ %tmp17480 = getelementptr inbounds float, float* %tmp17479, i64 1
+ %tmp17481 = getelementptr inbounds float, float* %tmp17480, i64 1
+ %tmp17482 = getelementptr inbounds float, float* %tmp17481, i64 1
+ %tmp17483 = getelementptr inbounds float, float* %tmp17482, i64 1
+ %tmp17484 = getelementptr inbounds float, float* %tmp17483, i64 1
+ %tmp17485 = getelementptr inbounds float, float* %tmp17484, i64 1
+ %tmp17486 = getelementptr inbounds float, float* %tmp17485, i64 1
+ %tmp17487 = getelementptr inbounds float, float* %tmp17486, i64 1
+ %tmp17488 = getelementptr inbounds float, float* %tmp17487, i64 1
+ %tmp17489 = getelementptr inbounds float, float* %tmp17488, i64 1
+ %tmp17490 = getelementptr inbounds float, float* %tmp17489, i64 1
+ %tmp17491 = getelementptr inbounds float, float* %tmp17490, i64 1
+ %tmp17492 = getelementptr inbounds float, float* %tmp17491, i64 1
+ %tmp17493 = getelementptr inbounds float, float* %tmp17492, i64 1
+ %tmp17494 = getelementptr inbounds float, float* %tmp17493, i64 1
+ %tmp17495 = getelementptr inbounds float, float* %tmp17494, i64 1
+ %tmp17496 = getelementptr inbounds float, float* %tmp17495, i64 1
+ %tmp17497 = getelementptr inbounds float, float* %tmp17496, i64 1
+ %tmp17498 = getelementptr inbounds float, float* %tmp17497, i64 1
+ %tmp17499 = getelementptr inbounds float, float* %tmp17498, i64 1
+ %tmp17500 = getelementptr inbounds float, float* %tmp17499, i64 1
+ %tmp17501 = getelementptr inbounds float, float* %tmp17500, i64 1
+ %tmp17502 = getelementptr inbounds float, float* %tmp17501, i64 1
+ %tmp17503 = getelementptr inbounds float, float* %tmp17502, i64 1
+ %tmp17504 = getelementptr inbounds float, float* %tmp17503, i64 1
+ %tmp17505 = getelementptr inbounds float, float* %tmp17504, i64 1
+ %tmp17506 = getelementptr inbounds float, float* %tmp17505, i64 1
+ %tmp17507 = getelementptr inbounds float, float* %tmp17506, i64 1
+ %tmp17508 = getelementptr inbounds float, float* %tmp17507, i64 1
+ %tmp17509 = getelementptr inbounds float, float* %tmp17508, i64 1
+ %tmp17510 = getelementptr inbounds float, float* %tmp17509, i64 1
+ %tmp17511 = getelementptr inbounds float, float* %tmp17510, i64 1
+ %tmp17512 = getelementptr inbounds float, float* %tmp17511, i64 1
+ %tmp17513 = getelementptr inbounds float, float* %tmp17512, i64 1
+ %tmp17514 = getelementptr inbounds float, float* %tmp17513, i64 1
+ %tmp17515 = getelementptr inbounds float, float* %tmp17514, i64 1
+ %tmp17516 = getelementptr inbounds float, float* %tmp17515, i64 1
+ %tmp17517 = getelementptr inbounds float, float* %tmp17516, i64 1
+ %tmp17518 = getelementptr inbounds float, float* %tmp17517, i64 1
+ %tmp17519 = getelementptr inbounds float, float* %tmp17518, i64 1
+ %tmp17520 = getelementptr inbounds float, float* %tmp17519, i64 1
+ %tmp17521 = getelementptr inbounds float, float* %tmp17520, i64 1
+ %tmp17522 = getelementptr inbounds float, float* %tmp17521, i64 1
+ %tmp17523 = getelementptr inbounds float, float* %tmp17522, i64 1
+ %tmp17524 = getelementptr inbounds float, float* %tmp17523, i64 1
+ %tmp17525 = getelementptr inbounds float, float* %tmp17524, i64 1
+ %tmp17526 = getelementptr inbounds float, float* %tmp17525, i64 1
+ %tmp17527 = getelementptr inbounds float, float* %tmp17526, i64 1
+ %tmp17528 = getelementptr inbounds float, float* %tmp17527, i64 1
+ %tmp17529 = getelementptr inbounds float, float* %tmp17528, i64 1
+ %tmp17530 = getelementptr inbounds float, float* %tmp17529, i64 1
+ %tmp17531 = getelementptr inbounds float, float* %tmp17530, i64 1
+ %tmp17532 = getelementptr inbounds float, float* %tmp17531, i64 1
+ %tmp17533 = getelementptr inbounds float, float* %tmp17532, i64 1
+ %tmp17534 = getelementptr inbounds float, float* %tmp17533, i64 1
+ %tmp17535 = getelementptr inbounds float, float* %tmp17534, i64 1
+ %tmp17536 = getelementptr inbounds float, float* %tmp17535, i64 1
+ %tmp17537 = getelementptr inbounds float, float* %tmp17536, i64 1
+ %tmp17538 = getelementptr inbounds float, float* %tmp17537, i64 1
+ %tmp17539 = getelementptr inbounds float, float* %tmp17538, i64 1
+ %tmp17540 = getelementptr inbounds float, float* %tmp17539, i64 1
+ %tmp17541 = getelementptr inbounds float, float* %tmp17540, i64 1
+ %tmp17542 = getelementptr inbounds float, float* %tmp17541, i64 1
+ %tmp17543 = getelementptr inbounds float, float* %tmp17542, i64 1
+ %tmp17544 = getelementptr inbounds float, float* %tmp17543, i64 1
+ %tmp17545 = getelementptr inbounds float, float* %tmp17544, i64 1
+ %tmp17546 = getelementptr inbounds float, float* %tmp17545, i64 1
+ %tmp17547 = getelementptr inbounds float, float* %tmp17546, i64 1
+ %tmp17548 = getelementptr inbounds float, float* %tmp17547, i64 1
+ %tmp17549 = getelementptr inbounds float, float* %tmp17548, i64 1
+ %tmp17550 = getelementptr inbounds float, float* %tmp17549, i64 1
+ %tmp17551 = getelementptr inbounds float, float* %tmp17550, i64 1
+ %tmp17552 = getelementptr inbounds float, float* %tmp17551, i64 1
+ %tmp17553 = getelementptr inbounds float, float* %tmp17552, i64 1
+ %tmp17554 = getelementptr inbounds float, float* %tmp17553, i64 1
+ %tmp17555 = getelementptr inbounds float, float* %tmp17554, i64 1
+ %tmp17556 = getelementptr inbounds float, float* %tmp17555, i64 1
+ %tmp17557 = getelementptr inbounds float, float* %tmp17556, i64 1
+ %tmp17558 = getelementptr inbounds float, float* %tmp17557, i64 1
+ %tmp17559 = getelementptr inbounds float, float* %tmp17558, i64 1
+ %tmp17560 = getelementptr inbounds float, float* %tmp17559, i64 1
+ %tmp17561 = getelementptr inbounds float, float* %tmp17560, i64 1
+ %tmp17562 = getelementptr inbounds float, float* %tmp17561, i64 1
+ %tmp17563 = getelementptr inbounds float, float* %tmp17562, i64 1
+ %tmp17564 = getelementptr inbounds float, float* %tmp17563, i64 1
+ %tmp17565 = getelementptr inbounds float, float* %tmp17564, i64 1
+ %tmp17566 = getelementptr inbounds float, float* %tmp17565, i64 1
+ %tmp17567 = getelementptr inbounds float, float* %tmp17566, i64 1
+ %tmp17568 = getelementptr inbounds float, float* %tmp17567, i64 1
+ %tmp17569 = getelementptr inbounds float, float* %tmp17568, i64 1
+ %tmp17570 = getelementptr inbounds float, float* %tmp17569, i64 1
+ %tmp17571 = getelementptr inbounds float, float* %tmp17570, i64 1
+ %tmp17572 = getelementptr inbounds float, float* %tmp17571, i64 1
+ %tmp17573 = getelementptr inbounds float, float* %tmp17572, i64 1
+ %tmp17574 = getelementptr inbounds float, float* %tmp17573, i64 1
+ %tmp17575 = getelementptr inbounds float, float* %tmp17574, i64 1
+ %tmp17576 = getelementptr inbounds float, float* %tmp17575, i64 1
+ %tmp17577 = getelementptr inbounds float, float* %tmp17576, i64 1
+ %tmp17578 = getelementptr inbounds float, float* %tmp17577, i64 1
+ %tmp17579 = getelementptr inbounds float, float* %tmp17578, i64 1
+ %tmp17580 = getelementptr inbounds float, float* %tmp17579, i64 1
+ %tmp17581 = getelementptr inbounds float, float* %tmp17580, i64 1
+ %tmp17582 = getelementptr inbounds float, float* %tmp17581, i64 1
+ %tmp17583 = getelementptr inbounds float, float* %tmp17582, i64 1
+ %tmp17584 = getelementptr inbounds float, float* %tmp17583, i64 1
+ %tmp17585 = getelementptr inbounds float, float* %tmp17584, i64 1
+ %tmp17586 = getelementptr inbounds float, float* %tmp17585, i64 1
+ %tmp17587 = getelementptr inbounds float, float* %tmp17586, i64 1
+ %tmp17588 = getelementptr inbounds float, float* %tmp17587, i64 1
+ %tmp17589 = getelementptr inbounds float, float* %tmp17588, i64 1
+ %tmp17590 = getelementptr inbounds float, float* %tmp17589, i64 1
+ %tmp17591 = getelementptr inbounds float, float* %tmp17590, i64 1
+ %tmp17592 = getelementptr inbounds float, float* %tmp17591, i64 1
+ %tmp17593 = getelementptr inbounds float, float* %tmp17592, i64 1
+ %tmp17594 = getelementptr inbounds float, float* %tmp17593, i64 1
+ %tmp17595 = getelementptr inbounds float, float* %tmp17594, i64 1
+ %tmp17596 = getelementptr inbounds float, float* %tmp17595, i64 1
+ %tmp17597 = getelementptr inbounds float, float* %tmp17596, i64 1
+ %tmp17598 = getelementptr inbounds float, float* %tmp17597, i64 1
+ %tmp17599 = getelementptr inbounds float, float* %tmp17598, i64 1
+ %tmp17600 = getelementptr inbounds float, float* %tmp17599, i64 1
+ %tmp17601 = getelementptr inbounds float, float* %tmp17600, i64 1
+ %tmp17602 = getelementptr inbounds float, float* %tmp17601, i64 1
+ %tmp17603 = getelementptr inbounds float, float* %tmp17602, i64 1
+ %tmp17604 = getelementptr inbounds float, float* %tmp17603, i64 1
+ %tmp17605 = getelementptr inbounds float, float* %tmp17604, i64 1
+ %tmp17606 = getelementptr inbounds float, float* %tmp17605, i64 1
+ %tmp17607 = getelementptr inbounds float, float* %tmp17606, i64 1
+ %tmp17608 = getelementptr inbounds float, float* %tmp17607, i64 1
+ %tmp17609 = getelementptr inbounds float, float* %tmp17608, i64 1
+ %tmp17610 = getelementptr inbounds float, float* %tmp17609, i64 1
+ %tmp17611 = getelementptr inbounds float, float* %tmp17610, i64 1
+ %tmp17612 = getelementptr inbounds float, float* %tmp17611, i64 1
+ %tmp17613 = getelementptr inbounds float, float* %tmp17612, i64 1
+ %tmp17614 = getelementptr inbounds float, float* %tmp17613, i64 1
+ %tmp17615 = getelementptr inbounds float, float* %tmp17614, i64 1
+ %tmp17616 = getelementptr inbounds float, float* %tmp17615, i64 1
+ %tmp17617 = getelementptr inbounds float, float* %tmp17616, i64 1
+ %tmp17618 = getelementptr inbounds float, float* %tmp17617, i64 1
+ %tmp17619 = getelementptr inbounds float, float* %tmp17618, i64 1
+ %tmp17620 = getelementptr inbounds float, float* %tmp17619, i64 1
+ %tmp17621 = getelementptr inbounds float, float* %tmp17620, i64 1
+ %tmp17622 = getelementptr inbounds float, float* %tmp17621, i64 1
+ %tmp17623 = getelementptr inbounds float, float* %tmp17622, i64 1
+ %tmp17624 = getelementptr inbounds float, float* %tmp17623, i64 1
+ %tmp17625 = getelementptr inbounds float, float* %tmp17624, i64 1
+ %tmp17626 = getelementptr inbounds float, float* %tmp17625, i64 1
+ %tmp17627 = getelementptr inbounds float, float* %tmp17626, i64 1
+ %tmp17628 = getelementptr inbounds float, float* %tmp17627, i64 1
+ %tmp17629 = getelementptr inbounds float, float* %tmp17628, i64 1
+ %tmp17630 = getelementptr inbounds float, float* %tmp17629, i64 1
+ %tmp17631 = getelementptr inbounds float, float* %tmp17630, i64 1
+ %tmp17632 = getelementptr inbounds float, float* %tmp17631, i64 1
+ %tmp17633 = getelementptr inbounds float, float* %tmp17632, i64 1
+ %tmp17634 = getelementptr inbounds float, float* %tmp17633, i64 1
+ %tmp17635 = getelementptr inbounds float, float* %tmp17634, i64 1
+ %tmp17636 = getelementptr inbounds float, float* %tmp17635, i64 1
+ %tmp17637 = getelementptr inbounds float, float* %tmp17636, i64 1
+ %tmp17638 = getelementptr inbounds float, float* %tmp17637, i64 1
+ %tmp17639 = getelementptr inbounds float, float* %tmp17638, i64 1
+ %tmp17640 = getelementptr inbounds float, float* %tmp17639, i64 1
+ %tmp17641 = getelementptr inbounds float, float* %tmp17640, i64 1
+ %tmp17642 = getelementptr inbounds float, float* %tmp17641, i64 1
+ %tmp17643 = getelementptr inbounds float, float* %tmp17642, i64 1
+ %tmp17644 = getelementptr inbounds float, float* %tmp17643, i64 1
+ %tmp17645 = getelementptr inbounds float, float* %tmp17644, i64 1
+ %tmp17646 = getelementptr inbounds float, float* %tmp17645, i64 1
+ %tmp17647 = getelementptr inbounds float, float* %tmp17646, i64 1
+ %tmp17648 = getelementptr inbounds float, float* %tmp17647, i64 1
+ %tmp17649 = getelementptr inbounds float, float* %tmp17648, i64 1
+ %tmp17650 = getelementptr inbounds float, float* %tmp17649, i64 1
+ %tmp17651 = getelementptr inbounds float, float* %tmp17650, i64 1
+ %tmp17652 = getelementptr inbounds float, float* %tmp17651, i64 1
+ %tmp17653 = getelementptr inbounds float, float* %tmp17652, i64 1
+ %tmp17654 = getelementptr inbounds float, float* %tmp17653, i64 1
+ %tmp17655 = getelementptr inbounds float, float* %tmp17654, i64 1
+ %tmp17656 = getelementptr inbounds float, float* %tmp17655, i64 1
+ %tmp17657 = getelementptr inbounds float, float* %tmp17656, i64 1
+ %tmp17658 = getelementptr inbounds float, float* %tmp17657, i64 1
+ %tmp17659 = getelementptr inbounds float, float* %tmp17658, i64 1
+ %tmp17660 = getelementptr inbounds float, float* %tmp17659, i64 1
+ %tmp17661 = getelementptr inbounds float, float* %tmp17660, i64 1
+ %tmp17662 = getelementptr inbounds float, float* %tmp17661, i64 1
+ %tmp17663 = getelementptr inbounds float, float* %tmp17662, i64 1
+ %tmp17664 = getelementptr inbounds float, float* %tmp17663, i64 1
+ %tmp17665 = getelementptr inbounds float, float* %tmp17664, i64 1
+ %tmp17666 = getelementptr inbounds float, float* %tmp17665, i64 1
+ %tmp17667 = getelementptr inbounds float, float* %tmp17666, i64 1
+ %tmp17668 = getelementptr inbounds float, float* %tmp17667, i64 1
+ %tmp17669 = getelementptr inbounds float, float* %tmp17668, i64 1
+ %tmp17670 = getelementptr inbounds float, float* %tmp17669, i64 1
+ %tmp17671 = getelementptr inbounds float, float* %tmp17670, i64 1
+ %tmp17672 = getelementptr inbounds float, float* %tmp17671, i64 1
+ %tmp17673 = getelementptr inbounds float, float* %tmp17672, i64 1
+ %tmp17674 = getelementptr inbounds float, float* %tmp17673, i64 1
+ %tmp17675 = getelementptr inbounds float, float* %tmp17674, i64 1
+ %tmp17676 = getelementptr inbounds float, float* %tmp17675, i64 1
+ %tmp17677 = getelementptr inbounds float, float* %tmp17676, i64 1
+ %tmp17678 = getelementptr inbounds float, float* %tmp17677, i64 1
+ %tmp17679 = getelementptr inbounds float, float* %tmp17678, i64 1
+ %tmp17680 = getelementptr inbounds float, float* %tmp17679, i64 1
+ %tmp17681 = getelementptr inbounds float, float* %tmp17680, i64 1
+ %tmp17682 = getelementptr inbounds float, float* %tmp17681, i64 1
+ %tmp17683 = getelementptr inbounds float, float* %tmp17682, i64 1
+ %tmp17684 = getelementptr inbounds float, float* %tmp17683, i64 1
+ %tmp17685 = getelementptr inbounds float, float* %tmp17684, i64 1
+ %tmp17686 = getelementptr inbounds float, float* %tmp17685, i64 1
+ %tmp17687 = getelementptr inbounds float, float* %tmp17686, i64 1
+ %tmp17688 = getelementptr inbounds float, float* %tmp17687, i64 1
+ %tmp17689 = getelementptr inbounds float, float* %tmp17688, i64 1
+ %tmp17690 = getelementptr inbounds float, float* %tmp17689, i64 1
+ %tmp17691 = getelementptr inbounds float, float* %tmp17690, i64 1
+ %tmp17692 = getelementptr inbounds float, float* %tmp17691, i64 1
+ %tmp17693 = getelementptr inbounds float, float* %tmp17692, i64 1
+ %tmp17694 = getelementptr inbounds float, float* %tmp17693, i64 1
+ %tmp17695 = getelementptr inbounds float, float* %tmp17694, i64 1
+ %tmp17696 = getelementptr inbounds float, float* %tmp17695, i64 1
+ %tmp17697 = getelementptr inbounds float, float* %tmp17696, i64 1
+ %tmp17698 = getelementptr inbounds float, float* %tmp17697, i64 1
+ %tmp17699 = getelementptr inbounds float, float* %tmp17698, i64 1
+ %tmp17700 = getelementptr inbounds float, float* %tmp17699, i64 1
+ %tmp17701 = getelementptr inbounds float, float* %tmp17700, i64 1
+ %tmp17702 = getelementptr inbounds float, float* %tmp17701, i64 1
+ %tmp17703 = getelementptr inbounds float, float* %tmp17702, i64 1
+ %tmp17704 = getelementptr inbounds float, float* %tmp17703, i64 1
+ %tmp17705 = getelementptr inbounds float, float* %tmp17704, i64 1
+ %tmp17706 = getelementptr inbounds float, float* %tmp17705, i64 1
+ %tmp17707 = getelementptr inbounds float, float* %tmp17706, i64 1
+ %tmp17708 = getelementptr inbounds float, float* %tmp17707, i64 1
+ %tmp17709 = getelementptr inbounds float, float* %tmp17708, i64 1
+ %tmp17710 = getelementptr inbounds float, float* %tmp17709, i64 1
+ %tmp17711 = getelementptr inbounds float, float* %tmp17710, i64 1
+ %tmp17712 = getelementptr inbounds float, float* %tmp17711, i64 1
+ %tmp17713 = getelementptr inbounds float, float* %tmp17712, i64 1
+ %tmp17714 = getelementptr inbounds float, float* %tmp17713, i64 1
+ %tmp17715 = getelementptr inbounds float, float* %tmp17714, i64 1
+ %tmp17716 = getelementptr inbounds float, float* %tmp17715, i64 1
+ %tmp17717 = getelementptr inbounds float, float* %tmp17716, i64 1
+ %tmp17718 = getelementptr inbounds float, float* %tmp17717, i64 1
+ %tmp17719 = getelementptr inbounds float, float* %tmp17718, i64 1
+ %tmp17720 = getelementptr inbounds float, float* %tmp17719, i64 1
+ %tmp17721 = getelementptr inbounds float, float* %tmp17720, i64 1
+ %tmp17722 = getelementptr inbounds float, float* %tmp17721, i64 1
+ %tmp17723 = getelementptr inbounds float, float* %tmp17722, i64 1
+ %tmp17724 = getelementptr inbounds float, float* %tmp17723, i64 1
+ %tmp17725 = getelementptr inbounds float, float* %tmp17724, i64 1
+ %tmp17726 = getelementptr inbounds float, float* %tmp17725, i64 1
+ %tmp17727 = getelementptr inbounds float, float* %tmp17726, i64 1
+ %tmp17728 = getelementptr inbounds float, float* %tmp17727, i64 1
+ %tmp17729 = getelementptr inbounds float, float* %tmp17728, i64 1
+ %tmp17730 = getelementptr inbounds float, float* %tmp17729, i64 1
+ %tmp17731 = getelementptr inbounds float, float* %tmp17730, i64 1
+ %tmp17732 = getelementptr inbounds float, float* %tmp17731, i64 1
+ %tmp17733 = getelementptr inbounds float, float* %tmp17732, i64 1
+ %tmp17734 = getelementptr inbounds float, float* %tmp17733, i64 1
+ %tmp17735 = getelementptr inbounds float, float* %tmp17734, i64 1
+ %tmp17736 = getelementptr inbounds float, float* %tmp17735, i64 1
+ %tmp17737 = getelementptr inbounds float, float* %tmp17736, i64 1
+ %tmp17738 = getelementptr inbounds float, float* %tmp17737, i64 1
+ %tmp17739 = getelementptr inbounds float, float* %tmp17738, i64 1
+ %tmp17740 = getelementptr inbounds float, float* %tmp17739, i64 1
+ %tmp17741 = getelementptr inbounds float, float* %tmp17740, i64 1
+ %tmp17742 = getelementptr inbounds float, float* %tmp17741, i64 1
+ %tmp17743 = getelementptr inbounds float, float* %tmp17742, i64 1
+ %tmp17744 = getelementptr inbounds float, float* %tmp17743, i64 1
+ %tmp17745 = getelementptr inbounds float, float* %tmp17744, i64 1
+ %tmp17746 = getelementptr inbounds float, float* %tmp17745, i64 1
+ %tmp17747 = getelementptr inbounds float, float* %tmp17746, i64 1
+ %tmp17748 = getelementptr inbounds float, float* %tmp17747, i64 1
+ %tmp17749 = getelementptr inbounds float, float* %tmp17748, i64 1
+ %tmp17750 = getelementptr inbounds float, float* %tmp17749, i64 1
+ %tmp17751 = getelementptr inbounds float, float* %tmp17750, i64 1
+ %tmp17752 = getelementptr inbounds float, float* %tmp17751, i64 1
+ %tmp17753 = getelementptr inbounds float, float* %tmp17752, i64 1
+ %tmp17754 = getelementptr inbounds float, float* %tmp17753, i64 1
+ %tmp17755 = getelementptr inbounds float, float* %tmp17754, i64 1
+ %tmp17756 = getelementptr inbounds float, float* %tmp17755, i64 1
+ %tmp17757 = getelementptr inbounds float, float* %tmp17756, i64 1
+ %tmp17758 = getelementptr inbounds float, float* %tmp17757, i64 1
+ %tmp17759 = getelementptr inbounds float, float* %tmp17758, i64 1
+ %tmp17760 = getelementptr inbounds float, float* %tmp17759, i64 1
+ %tmp17761 = getelementptr inbounds float, float* %tmp17760, i64 1
+ %tmp17762 = getelementptr inbounds float, float* %tmp17761, i64 1
+ %tmp17763 = getelementptr inbounds float, float* %tmp17762, i64 1
+ %tmp17764 = getelementptr inbounds float, float* %tmp17763, i64 1
+ %tmp17765 = getelementptr inbounds float, float* %tmp17764, i64 1
+ %tmp17766 = getelementptr inbounds float, float* %tmp17765, i64 1
+ %tmp17767 = getelementptr inbounds float, float* %tmp17766, i64 1
+ %tmp17768 = getelementptr inbounds float, float* %tmp17767, i64 1
+ %tmp17769 = getelementptr inbounds float, float* %tmp17768, i64 1
+ %tmp17770 = getelementptr inbounds float, float* %tmp17769, i64 1
+ %tmp17771 = getelementptr inbounds float, float* %tmp17770, i64 1
+ %tmp17772 = getelementptr inbounds float, float* %tmp17771, i64 1
+ %tmp17773 = getelementptr inbounds float, float* %tmp17772, i64 1
+ %tmp17774 = getelementptr inbounds float, float* %tmp17773, i64 1
+ %tmp17775 = getelementptr inbounds float, float* %tmp17774, i64 1
+ %tmp17776 = getelementptr inbounds float, float* %tmp17775, i64 1
+ %tmp17777 = getelementptr inbounds float, float* %tmp17776, i64 1
+ %tmp17778 = getelementptr inbounds float, float* %tmp17777, i64 1
+ %tmp17779 = getelementptr inbounds float, float* %tmp17778, i64 1
+ %tmp17780 = getelementptr inbounds float, float* %tmp17779, i64 1
+ %tmp17781 = getelementptr inbounds float, float* %tmp17780, i64 1
+ %tmp17782 = getelementptr inbounds float, float* %tmp17781, i64 1
+ %tmp17783 = getelementptr inbounds float, float* %tmp17782, i64 1
+ %tmp17784 = getelementptr inbounds float, float* %tmp17783, i64 1
+ %tmp17785 = getelementptr inbounds float, float* %tmp17784, i64 1
+ %tmp17786 = getelementptr inbounds float, float* %tmp17785, i64 1
+ %tmp17787 = getelementptr inbounds float, float* %tmp17786, i64 1
+ %tmp17788 = getelementptr inbounds float, float* %tmp17787, i64 1
+ %tmp17789 = getelementptr inbounds float, float* %tmp17788, i64 1
+ %tmp17790 = getelementptr inbounds float, float* %tmp17789, i64 1
+ %tmp17791 = getelementptr inbounds float, float* %tmp17790, i64 1
+ %tmp17792 = getelementptr inbounds float, float* %tmp17791, i64 1
+ %tmp17793 = getelementptr inbounds float, float* %tmp17792, i64 1
+ %tmp17794 = getelementptr inbounds float, float* %tmp17793, i64 1
+ %tmp17795 = getelementptr inbounds float, float* %tmp17794, i64 1
+ %tmp17796 = getelementptr inbounds float, float* %tmp17795, i64 1
+ %tmp17797 = getelementptr inbounds float, float* %tmp17796, i64 1
+ %tmp17798 = getelementptr inbounds float, float* %tmp17797, i64 1
+ %tmp17799 = getelementptr inbounds float, float* %tmp17798, i64 1
+ %tmp17800 = getelementptr inbounds float, float* %tmp17799, i64 1
+ %tmp17801 = getelementptr inbounds float, float* %tmp17800, i64 1
+ %tmp17802 = getelementptr inbounds float, float* %tmp17801, i64 1
+ %tmp17803 = getelementptr inbounds float, float* %tmp17802, i64 1
+ %tmp17804 = getelementptr inbounds float, float* %tmp17803, i64 1
+ %tmp17805 = getelementptr inbounds float, float* %tmp17804, i64 1
+ %tmp17806 = getelementptr inbounds float, float* %tmp17805, i64 1
+ %tmp17807 = getelementptr inbounds float, float* %tmp17806, i64 1
+ %tmp17808 = getelementptr inbounds float, float* %tmp17807, i64 1
+ %tmp17809 = getelementptr inbounds float, float* %tmp17808, i64 1
+ %tmp17810 = getelementptr inbounds float, float* %tmp17809, i64 1
+ %tmp17811 = getelementptr inbounds float, float* %tmp17810, i64 1
+ %tmp17812 = getelementptr inbounds float, float* %tmp17811, i64 1
+ %tmp17813 = getelementptr inbounds float, float* %tmp17812, i64 1
+ %tmp17814 = getelementptr inbounds float, float* %tmp17813, i64 1
+ %tmp17815 = getelementptr inbounds float, float* %tmp17814, i64 1
+ %tmp17816 = getelementptr inbounds float, float* %tmp17815, i64 1
+ %tmp17817 = getelementptr inbounds float, float* %tmp17816, i64 1
+ %tmp17818 = getelementptr inbounds float, float* %tmp17817, i64 1
+ %tmp17819 = getelementptr inbounds float, float* %tmp17818, i64 1
+ %tmp17820 = getelementptr inbounds float, float* %tmp17819, i64 1
+ %tmp17821 = getelementptr inbounds float, float* %tmp17820, i64 1
+ %tmp17822 = getelementptr inbounds float, float* %tmp17821, i64 1
+ %tmp17823 = getelementptr inbounds float, float* %tmp17822, i64 1
+ %tmp17824 = getelementptr inbounds float, float* %tmp17823, i64 1
+ %tmp17825 = getelementptr inbounds float, float* %tmp17824, i64 1
+ %tmp17826 = getelementptr inbounds float, float* %tmp17825, i64 1
+ %tmp17827 = getelementptr inbounds float, float* %tmp17826, i64 1
+ %tmp17828 = getelementptr inbounds float, float* %tmp17827, i64 1
+ %tmp17829 = getelementptr inbounds float, float* %tmp17828, i64 1
+ %tmp17830 = getelementptr inbounds float, float* %tmp17829, i64 1
+ %tmp17831 = getelementptr inbounds float, float* %tmp17830, i64 1
+ %tmp17832 = getelementptr inbounds float, float* %tmp17831, i64 1
+ %tmp17833 = getelementptr inbounds float, float* %tmp17832, i64 1
+ %tmp17834 = getelementptr inbounds float, float* %tmp17833, i64 1
+ %tmp17835 = getelementptr inbounds float, float* %tmp17834, i64 1
+ %tmp17836 = getelementptr inbounds float, float* %tmp17835, i64 1
+ %tmp17837 = getelementptr inbounds float, float* %tmp17836, i64 1
+ %tmp17838 = getelementptr inbounds float, float* %tmp17837, i64 1
+ %tmp17839 = getelementptr inbounds float, float* %tmp17838, i64 1
+ %tmp17840 = getelementptr inbounds float, float* %tmp17839, i64 1
+ %tmp17841 = getelementptr inbounds float, float* %tmp17840, i64 1
+ %tmp17842 = getelementptr inbounds float, float* %tmp17841, i64 1
+ %tmp17843 = getelementptr inbounds float, float* %tmp17842, i64 1
+ %tmp17844 = getelementptr inbounds float, float* %tmp17843, i64 1
+ %tmp17845 = getelementptr inbounds float, float* %tmp17844, i64 1
+ %tmp17846 = getelementptr inbounds float, float* %tmp17845, i64 1
+ %tmp17847 = getelementptr inbounds float, float* %tmp17846, i64 1
+ %tmp17848 = getelementptr inbounds float, float* %tmp17847, i64 1
+ %tmp17849 = getelementptr inbounds float, float* %tmp17848, i64 1
+ %tmp17850 = getelementptr inbounds float, float* %tmp17849, i64 1
+ %tmp17851 = getelementptr inbounds float, float* %tmp17850, i64 1
+ %tmp17852 = getelementptr inbounds float, float* %tmp17851, i64 1
+ %tmp17853 = getelementptr inbounds float, float* %tmp17852, i64 1
+ %tmp17854 = getelementptr inbounds float, float* %tmp17853, i64 1
+ %tmp17855 = getelementptr inbounds float, float* %tmp17854, i64 1
+ %tmp17856 = getelementptr inbounds float, float* %tmp17855, i64 1
+ %tmp17857 = getelementptr inbounds float, float* %tmp17856, i64 1
+ %tmp17858 = getelementptr inbounds float, float* %tmp17857, i64 1
+ %tmp17859 = getelementptr inbounds float, float* %tmp17858, i64 1
+ %tmp17860 = getelementptr inbounds float, float* %tmp17859, i64 1
+ %tmp17861 = getelementptr inbounds float, float* %tmp17860, i64 1
+ %tmp17862 = getelementptr inbounds float, float* %tmp17861, i64 1
+ %tmp17863 = getelementptr inbounds float, float* %tmp17862, i64 1
+ %tmp17864 = getelementptr inbounds float, float* %tmp17863, i64 1
+ %tmp17865 = getelementptr inbounds float, float* %tmp17864, i64 1
+ %tmp17866 = getelementptr inbounds float, float* %tmp17865, i64 1
+ %tmp17867 = getelementptr inbounds float, float* %tmp17866, i64 1
+ %tmp17868 = getelementptr inbounds float, float* %tmp17867, i64 1
+ %tmp17869 = getelementptr inbounds float, float* %tmp17868, i64 1
+ %tmp17870 = getelementptr inbounds float, float* %tmp17869, i64 1
+ %tmp17871 = getelementptr inbounds float, float* %tmp17870, i64 1
+ %tmp17872 = getelementptr inbounds float, float* %tmp17871, i64 1
+ %tmp17873 = getelementptr inbounds float, float* %tmp17872, i64 1
+ %tmp17874 = getelementptr inbounds float, float* %tmp17873, i64 1
+ %tmp17875 = getelementptr inbounds float, float* %tmp17874, i64 1
+ %tmp17876 = getelementptr inbounds float, float* %tmp17875, i64 1
+ %tmp17877 = getelementptr inbounds float, float* %tmp17876, i64 1
+ %tmp17878 = getelementptr inbounds float, float* %tmp17877, i64 1
+ %tmp17879 = getelementptr inbounds float, float* %tmp17878, i64 1
+ %tmp17880 = getelementptr inbounds float, float* %tmp17879, i64 1
+ %tmp17881 = getelementptr inbounds float, float* %tmp17880, i64 1
+ %tmp17882 = getelementptr inbounds float, float* %tmp17881, i64 1
+ %tmp17883 = getelementptr inbounds float, float* %tmp17882, i64 1
+ %tmp17884 = getelementptr inbounds float, float* %tmp17883, i64 1
+ %tmp17885 = getelementptr inbounds float, float* %tmp17884, i64 1
+ %tmp17886 = getelementptr inbounds float, float* %tmp17885, i64 1
+ %tmp17887 = getelementptr inbounds float, float* %tmp17886, i64 1
+ %tmp17888 = getelementptr inbounds float, float* %tmp17887, i64 1
+ %tmp17889 = getelementptr inbounds float, float* %tmp17888, i64 1
+ %tmp17890 = getelementptr inbounds float, float* %tmp17889, i64 1
+ %tmp17891 = getelementptr inbounds float, float* %tmp17890, i64 1
+ %tmp17892 = getelementptr inbounds float, float* %tmp17891, i64 1
+ %tmp17893 = getelementptr inbounds float, float* %tmp17892, i64 1
+ %tmp17894 = getelementptr inbounds float, float* %tmp17893, i64 1
+ %tmp17895 = getelementptr inbounds float, float* %tmp17894, i64 1
+ %tmp17896 = getelementptr inbounds float, float* %tmp17895, i64 1
+ %tmp17897 = getelementptr inbounds float, float* %tmp17896, i64 1
+ %tmp17898 = getelementptr inbounds float, float* %tmp17897, i64 1
+ %tmp17899 = getelementptr inbounds float, float* %tmp17898, i64 1
+ %tmp17900 = getelementptr inbounds float, float* %tmp17899, i64 1
+ %tmp17901 = getelementptr inbounds float, float* %tmp17900, i64 1
+ %tmp17902 = getelementptr inbounds float, float* %tmp17901, i64 1
+ %tmp17903 = getelementptr inbounds float, float* %tmp17902, i64 1
+ %tmp17904 = getelementptr inbounds float, float* %tmp17903, i64 1
+ %tmp17905 = getelementptr inbounds float, float* %tmp17904, i64 1
+ %tmp17906 = getelementptr inbounds float, float* %tmp17905, i64 1
+ %tmp17907 = getelementptr inbounds float, float* %tmp17906, i64 1
+ %tmp17908 = getelementptr inbounds float, float* %tmp17907, i64 1
+ %tmp17909 = getelementptr inbounds float, float* %tmp17908, i64 1
+ %tmp17910 = getelementptr inbounds float, float* %tmp17909, i64 1
+ %tmp17911 = getelementptr inbounds float, float* %tmp17910, i64 1
+ %tmp17912 = getelementptr inbounds float, float* %tmp17911, i64 1
+ %tmp17913 = getelementptr inbounds float, float* %tmp17912, i64 1
+ %tmp17914 = getelementptr inbounds float, float* %tmp17913, i64 1
+ %tmp17915 = getelementptr inbounds float, float* %tmp17914, i64 1
+ %tmp17916 = getelementptr inbounds float, float* %tmp17915, i64 1
+ %tmp17917 = getelementptr inbounds float, float* %tmp17916, i64 1
+ %tmp17918 = getelementptr inbounds float, float* %tmp17917, i64 1
+ %tmp17919 = getelementptr inbounds float, float* %tmp17918, i64 1
+ %tmp17920 = getelementptr inbounds float, float* %tmp17919, i64 1
+ %tmp17921 = getelementptr inbounds float, float* %tmp17920, i64 1
+ %tmp17922 = getelementptr inbounds float, float* %tmp17921, i64 1
+ %tmp17923 = getelementptr inbounds float, float* %tmp17922, i64 1
+ %tmp17924 = getelementptr inbounds float, float* %tmp17923, i64 1
+ %tmp17925 = getelementptr inbounds float, float* %tmp17924, i64 1
+ %tmp17926 = getelementptr inbounds float, float* %tmp17925, i64 1
+ %tmp17927 = getelementptr inbounds float, float* %tmp17926, i64 1
+ %tmp17928 = getelementptr inbounds float, float* %tmp17927, i64 1
+ %tmp17929 = getelementptr inbounds float, float* %tmp17928, i64 1
+ %tmp17930 = getelementptr inbounds float, float* %tmp17929, i64 1
+ %tmp17931 = getelementptr inbounds float, float* %tmp17930, i64 1
+ %tmp17932 = getelementptr inbounds float, float* %tmp17931, i64 1
+ %tmp17933 = getelementptr inbounds float, float* %tmp17932, i64 1
+ %tmp17934 = getelementptr inbounds float, float* %tmp17933, i64 1
+ %tmp17935 = getelementptr inbounds float, float* %tmp17934, i64 1
+ %tmp17936 = getelementptr inbounds float, float* %tmp17935, i64 1
+ %tmp17937 = getelementptr inbounds float, float* %tmp17936, i64 1
+ %tmp17938 = getelementptr inbounds float, float* %tmp17937, i64 1
+ %tmp17939 = getelementptr inbounds float, float* %tmp17938, i64 1
+ %tmp17940 = getelementptr inbounds float, float* %tmp17939, i64 1
+ %tmp17941 = getelementptr inbounds float, float* %tmp17940, i64 1
+ %tmp17942 = getelementptr inbounds float, float* %tmp17941, i64 1
+ %tmp17943 = getelementptr inbounds float, float* %tmp17942, i64 1
+ %tmp17944 = getelementptr inbounds float, float* %tmp17943, i64 1
+ %tmp17945 = getelementptr inbounds float, float* %tmp17944, i64 1
+ %tmp17946 = getelementptr inbounds float, float* %tmp17945, i64 1
+ %tmp17947 = getelementptr inbounds float, float* %tmp17946, i64 1
+ %tmp17948 = getelementptr inbounds float, float* %tmp17947, i64 1
+ %tmp17949 = getelementptr inbounds float, float* %tmp17948, i64 1
+ %tmp17950 = getelementptr inbounds float, float* %tmp17949, i64 1
+ %tmp17951 = getelementptr inbounds float, float* %tmp17950, i64 1
+ %tmp17952 = getelementptr inbounds float, float* %tmp17951, i64 1
+ %tmp17953 = getelementptr inbounds float, float* %tmp17952, i64 1
+ %tmp17954 = getelementptr inbounds float, float* %tmp17953, i64 1
+ %tmp17955 = getelementptr inbounds float, float* %tmp17954, i64 1
+ %tmp17956 = getelementptr inbounds float, float* %tmp17955, i64 1
+ %tmp17957 = getelementptr inbounds float, float* %tmp17956, i64 1
+ %tmp17958 = getelementptr inbounds float, float* %tmp17957, i64 1
+ %tmp17959 = getelementptr inbounds float, float* %tmp17958, i64 1
+ %tmp17960 = getelementptr inbounds float, float* %tmp17959, i64 1
+ %tmp17961 = getelementptr inbounds float, float* %tmp17960, i64 1
+ %tmp17962 = getelementptr inbounds float, float* %tmp17961, i64 1
+ %tmp17963 = getelementptr inbounds float, float* %tmp17962, i64 1
+ %tmp17964 = getelementptr inbounds float, float* %tmp17963, i64 1
+ %tmp17965 = getelementptr inbounds float, float* %tmp17964, i64 1
+ %tmp17966 = getelementptr inbounds float, float* %tmp17965, i64 1
+ %tmp17967 = getelementptr inbounds float, float* %tmp17966, i64 1
+ %tmp17968 = getelementptr inbounds float, float* %tmp17967, i64 1
+ %tmp17969 = getelementptr inbounds float, float* %tmp17968, i64 1
+ %tmp17970 = getelementptr inbounds float, float* %tmp17969, i64 1
+ %tmp17971 = getelementptr inbounds float, float* %tmp17970, i64 1
+ %tmp17972 = getelementptr inbounds float, float* %tmp17971, i64 1
+ %tmp17973 = getelementptr inbounds float, float* %tmp17972, i64 1
+ %tmp17974 = getelementptr inbounds float, float* %tmp17973, i64 1
+ %tmp17975 = getelementptr inbounds float, float* %tmp17974, i64 1
+ %tmp17976 = getelementptr inbounds float, float* %tmp17975, i64 1
+ %tmp17977 = getelementptr inbounds float, float* %tmp17976, i64 1
+ %tmp17978 = getelementptr inbounds float, float* %tmp17977, i64 1
+ %tmp17979 = getelementptr inbounds float, float* %tmp17978, i64 1
+ %tmp17980 = getelementptr inbounds float, float* %tmp17979, i64 1
+ %tmp17981 = getelementptr inbounds float, float* %tmp17980, i64 1
+ %tmp17982 = getelementptr inbounds float, float* %tmp17981, i64 1
+ %tmp17983 = getelementptr inbounds float, float* %tmp17982, i64 1
+ %tmp17984 = getelementptr inbounds float, float* %tmp17983, i64 1
+ %tmp17985 = getelementptr inbounds float, float* %tmp17984, i64 1
+ %tmp17986 = getelementptr inbounds float, float* %tmp17985, i64 1
+ %tmp17987 = getelementptr inbounds float, float* %tmp17986, i64 1
+ %tmp17988 = getelementptr inbounds float, float* %tmp17987, i64 1
+ %tmp17989 = getelementptr inbounds float, float* %tmp17988, i64 1
+ %tmp17990 = getelementptr inbounds float, float* %tmp17989, i64 1
+ %tmp17991 = getelementptr inbounds float, float* %tmp17990, i64 1
+ %tmp17992 = getelementptr inbounds float, float* %tmp17991, i64 1
+ %tmp17993 = getelementptr inbounds float, float* %tmp17992, i64 1
+ %tmp17994 = getelementptr inbounds float, float* %tmp17993, i64 1
+ %tmp17995 = getelementptr inbounds float, float* %tmp17994, i64 1
+ %tmp17996 = getelementptr inbounds float, float* %tmp17995, i64 1
+ %tmp17997 = getelementptr inbounds float, float* %tmp17996, i64 1
+ %tmp17998 = getelementptr inbounds float, float* %tmp17997, i64 1
+ %tmp17999 = getelementptr inbounds float, float* %tmp17998, i64 1
+ %tmp18000 = getelementptr inbounds float, float* %tmp17999, i64 1
+ %tmp18001 = getelementptr inbounds float, float* %tmp18000, i64 1
+ %tmp18002 = getelementptr inbounds float, float* %tmp18001, i64 1
+ %tmp18003 = getelementptr inbounds float, float* %tmp18002, i64 1
+ %tmp18004 = getelementptr inbounds float, float* %tmp18003, i64 1
+ %tmp18005 = getelementptr inbounds float, float* %tmp18004, i64 1
+ %tmp18006 = getelementptr inbounds float, float* %tmp18005, i64 1
+ %tmp18007 = getelementptr inbounds float, float* %tmp18006, i64 1
+ %tmp18008 = getelementptr inbounds float, float* %tmp18007, i64 1
+ %tmp18009 = getelementptr inbounds float, float* %tmp18008, i64 1
+ %tmp18010 = getelementptr inbounds float, float* %tmp18009, i64 1
+ %tmp18011 = getelementptr inbounds float, float* %tmp18010, i64 1
+ %tmp18012 = getelementptr inbounds float, float* %tmp18011, i64 1
+ %tmp18013 = getelementptr inbounds float, float* %tmp18012, i64 1
+ %tmp18014 = getelementptr inbounds float, float* %tmp18013, i64 1
+ %tmp18015 = getelementptr inbounds float, float* %tmp18014, i64 1
+ %tmp18016 = getelementptr inbounds float, float* %tmp18015, i64 1
+ %tmp18017 = getelementptr inbounds float, float* %tmp18016, i64 1
+ %tmp18018 = getelementptr inbounds float, float* %tmp18017, i64 1
+ %tmp18019 = getelementptr inbounds float, float* %tmp18018, i64 1
+ %tmp18020 = getelementptr inbounds float, float* %tmp18019, i64 1
+ %tmp18021 = getelementptr inbounds float, float* %tmp18020, i64 1
+ %tmp18022 = getelementptr inbounds float, float* %tmp18021, i64 1
+ %tmp18023 = getelementptr inbounds float, float* %tmp18022, i64 1
+ %tmp18024 = getelementptr inbounds float, float* %tmp18023, i64 1
+ %tmp18025 = getelementptr inbounds float, float* %tmp18024, i64 1
+ %tmp18026 = getelementptr inbounds float, float* %tmp18025, i64 1
+ %tmp18027 = getelementptr inbounds float, float* %tmp18026, i64 1
+ %tmp18028 = getelementptr inbounds float, float* %tmp18027, i64 1
+ %tmp18029 = getelementptr inbounds float, float* %tmp18028, i64 1
+ %tmp18030 = getelementptr inbounds float, float* %tmp18029, i64 1
+ %tmp18031 = getelementptr inbounds float, float* %tmp18030, i64 1
+ %tmp18032 = getelementptr inbounds float, float* %tmp18031, i64 1
+ %tmp18033 = getelementptr inbounds float, float* %tmp18032, i64 1
+ %tmp18034 = getelementptr inbounds float, float* %tmp18033, i64 1
+ %tmp18035 = getelementptr inbounds float, float* %tmp18034, i64 1
+ %tmp18036 = getelementptr inbounds float, float* %tmp18035, i64 1
+ %tmp18037 = getelementptr inbounds float, float* %tmp18036, i64 1
+ %tmp18038 = getelementptr inbounds float, float* %tmp18037, i64 1
+ %tmp18039 = getelementptr inbounds float, float* %tmp18038, i64 1
+ %tmp18040 = getelementptr inbounds float, float* %tmp18039, i64 1
+ %tmp18041 = getelementptr inbounds float, float* %tmp18040, i64 1
+ %tmp18042 = getelementptr inbounds float, float* %tmp18041, i64 1
+ %tmp18043 = getelementptr inbounds float, float* %tmp18042, i64 1
+ %tmp18044 = getelementptr inbounds float, float* %tmp18043, i64 1
+ %tmp18045 = getelementptr inbounds float, float* %tmp18044, i64 1
+ %tmp18046 = getelementptr inbounds float, float* %tmp18045, i64 1
+ %tmp18047 = getelementptr inbounds float, float* %tmp18046, i64 1
+ %tmp18048 = getelementptr inbounds float, float* %tmp18047, i64 1
+ %tmp18049 = getelementptr inbounds float, float* %tmp18048, i64 1
+ %tmp18050 = getelementptr inbounds float, float* %tmp18049, i64 1
+ %tmp18051 = getelementptr inbounds float, float* %tmp18050, i64 1
+ %tmp18052 = getelementptr inbounds float, float* %tmp18051, i64 1
+ %tmp18053 = getelementptr inbounds float, float* %tmp18052, i64 1
+ %tmp18054 = getelementptr inbounds float, float* %tmp18053, i64 1
+ %tmp18055 = getelementptr inbounds float, float* %tmp18054, i64 1
+ %tmp18056 = getelementptr inbounds float, float* %tmp18055, i64 1
+ %tmp18057 = getelementptr inbounds float, float* %tmp18056, i64 1
+ %tmp18058 = getelementptr inbounds float, float* %tmp18057, i64 1
+ %tmp18059 = getelementptr inbounds float, float* %tmp18058, i64 1
+ %tmp18060 = getelementptr inbounds float, float* %tmp18059, i64 1
+ %tmp18061 = getelementptr inbounds float, float* %tmp18060, i64 1
+ %tmp18062 = getelementptr inbounds float, float* %tmp18061, i64 1
+ %tmp18063 = getelementptr inbounds float, float* %tmp18062, i64 1
+ %tmp18064 = getelementptr inbounds float, float* %tmp18063, i64 1
+ %tmp18065 = getelementptr inbounds float, float* %tmp18064, i64 1
+ %tmp18066 = getelementptr inbounds float, float* %tmp18065, i64 1
+ %tmp18067 = getelementptr inbounds float, float* %tmp18066, i64 1
+ %tmp18068 = getelementptr inbounds float, float* %tmp18067, i64 1
+ %tmp18069 = getelementptr inbounds float, float* %tmp18068, i64 1
+ %tmp18070 = getelementptr inbounds float, float* %tmp18069, i64 1
+ %tmp18071 = getelementptr inbounds float, float* %tmp18070, i64 1
+ %tmp18072 = getelementptr inbounds float, float* %tmp18071, i64 1
+ %tmp18073 = getelementptr inbounds float, float* %tmp18072, i64 1
+ %tmp18074 = getelementptr inbounds float, float* %tmp18073, i64 1
+ %tmp18075 = getelementptr inbounds float, float* %tmp18074, i64 1
+ %tmp18076 = getelementptr inbounds float, float* %tmp18075, i64 1
+ %tmp18077 = getelementptr inbounds float, float* %tmp18076, i64 1
+ %tmp18078 = getelementptr inbounds float, float* %tmp18077, i64 1
+ %tmp18079 = getelementptr inbounds float, float* %tmp18078, i64 1
+ %tmp18080 = getelementptr inbounds float, float* %tmp18079, i64 1
+ %tmp18081 = getelementptr inbounds float, float* %tmp18080, i64 1
+ %tmp18082 = getelementptr inbounds float, float* %tmp18081, i64 1
+ %tmp18083 = getelementptr inbounds float, float* %tmp18082, i64 1
+ %tmp18084 = getelementptr inbounds float, float* %tmp18083, i64 1
+ %tmp18085 = getelementptr inbounds float, float* %tmp18084, i64 1
+ %tmp18086 = getelementptr inbounds float, float* %tmp18085, i64 1
+ %tmp18087 = getelementptr inbounds float, float* %tmp18086, i64 1
+ %tmp18088 = getelementptr inbounds float, float* %tmp18087, i64 1
+ %tmp18089 = getelementptr inbounds float, float* %tmp18088, i64 1
+ %tmp18090 = getelementptr inbounds float, float* %tmp18089, i64 1
+ %tmp18091 = getelementptr inbounds float, float* %tmp18090, i64 1
+ %tmp18092 = getelementptr inbounds float, float* %tmp18091, i64 1
+ %tmp18093 = getelementptr inbounds float, float* %tmp18092, i64 1
+ %tmp18094 = getelementptr inbounds float, float* %tmp18093, i64 1
+ %tmp18095 = getelementptr inbounds float, float* %tmp18094, i64 1
+ %tmp18096 = getelementptr inbounds float, float* %tmp18095, i64 1
+ %tmp18097 = getelementptr inbounds float, float* %tmp18096, i64 1
+ %tmp18098 = getelementptr inbounds float, float* %tmp18097, i64 1
+ %tmp18099 = getelementptr inbounds float, float* %tmp18098, i64 1
+ %tmp18100 = getelementptr inbounds float, float* %tmp18099, i64 1
+ %tmp18101 = getelementptr inbounds float, float* %tmp18100, i64 1
+ %tmp18102 = getelementptr inbounds float, float* %tmp18101, i64 1
+ %tmp18103 = getelementptr inbounds float, float* %tmp18102, i64 1
+ %tmp18104 = getelementptr inbounds float, float* %tmp18103, i64 1
+ %tmp18105 = getelementptr inbounds float, float* %tmp18104, i64 1
+ %tmp18106 = getelementptr inbounds float, float* %tmp18105, i64 1
+ %tmp18107 = getelementptr inbounds float, float* %tmp18106, i64 1
+ %tmp18108 = getelementptr inbounds float, float* %tmp18107, i64 1
+ %tmp18109 = getelementptr inbounds float, float* %tmp18108, i64 1
+ %tmp18110 = getelementptr inbounds float, float* %tmp18109, i64 1
+ %tmp18111 = getelementptr inbounds float, float* %tmp18110, i64 1
+ %tmp18112 = getelementptr inbounds float, float* %tmp18111, i64 1
+ %tmp18113 = getelementptr inbounds float, float* %tmp18112, i64 1
+ %tmp18114 = getelementptr inbounds float, float* %tmp18113, i64 1
+ %tmp18115 = getelementptr inbounds float, float* %tmp18114, i64 1
+ %tmp18116 = getelementptr inbounds float, float* %tmp18115, i64 1
+ %tmp18117 = getelementptr inbounds float, float* %tmp18116, i64 1
+ %tmp18118 = getelementptr inbounds float, float* %tmp18117, i64 1
+ %tmp18119 = getelementptr inbounds float, float* %tmp18118, i64 1
+ %tmp18120 = getelementptr inbounds float, float* %tmp18119, i64 1
+ %tmp18121 = getelementptr inbounds float, float* %tmp18120, i64 1
+ %tmp18122 = getelementptr inbounds float, float* %tmp18121, i64 1
+ %tmp18123 = getelementptr inbounds float, float* %tmp18122, i64 1
+ %tmp18124 = getelementptr inbounds float, float* %tmp18123, i64 1
+ %tmp18125 = getelementptr inbounds float, float* %tmp18124, i64 1
+ %tmp18126 = getelementptr inbounds float, float* %tmp18125, i64 1
+ %tmp18127 = getelementptr inbounds float, float* %tmp18126, i64 1
+ %tmp18128 = getelementptr inbounds float, float* %tmp18127, i64 1
+ %tmp18129 = getelementptr inbounds float, float* %tmp18128, i64 1
+ %tmp18130 = getelementptr inbounds float, float* %tmp18129, i64 1
+ %tmp18131 = getelementptr inbounds float, float* %tmp18130, i64 1
+ %tmp18132 = getelementptr inbounds float, float* %tmp18131, i64 1
+ %tmp18133 = getelementptr inbounds float, float* %tmp18132, i64 1
+ %tmp18134 = getelementptr inbounds float, float* %tmp18133, i64 1
+ %tmp18135 = getelementptr inbounds float, float* %tmp18134, i64 1
+ %tmp18136 = getelementptr inbounds float, float* %tmp18135, i64 1
+ %tmp18137 = getelementptr inbounds float, float* %tmp18136, i64 1
+ %tmp18138 = getelementptr inbounds float, float* %tmp18137, i64 1
+ %tmp18139 = getelementptr inbounds float, float* %tmp18138, i64 1
+ %tmp18140 = getelementptr inbounds float, float* %tmp18139, i64 1
+ %tmp18141 = getelementptr inbounds float, float* %tmp18140, i64 1
+ %tmp18142 = getelementptr inbounds float, float* %tmp18141, i64 1
+ %tmp18143 = getelementptr inbounds float, float* %tmp18142, i64 1
+ %tmp18144 = getelementptr inbounds float, float* %tmp18143, i64 1
+ %tmp18145 = getelementptr inbounds float, float* %tmp18144, i64 1
+ %tmp18146 = getelementptr inbounds float, float* %tmp18145, i64 1
+ %tmp18147 = getelementptr inbounds float, float* %tmp18146, i64 1
+ %tmp18148 = getelementptr inbounds float, float* %tmp18147, i64 1
+ %tmp18149 = getelementptr inbounds float, float* %tmp18148, i64 1
+ %tmp18150 = getelementptr inbounds float, float* %tmp18149, i64 1
+ %tmp18151 = getelementptr inbounds float, float* %tmp18150, i64 1
+ %tmp18152 = getelementptr inbounds float, float* %tmp18151, i64 1
+ %tmp18153 = getelementptr inbounds float, float* %tmp18152, i64 1
+ %tmp18154 = getelementptr inbounds float, float* %tmp18153, i64 1
+ %tmp18155 = getelementptr inbounds float, float* %tmp18154, i64 1
+ %tmp18156 = getelementptr inbounds float, float* %tmp18155, i64 1
+ %tmp18157 = getelementptr inbounds float, float* %tmp18156, i64 1
+ %tmp18158 = getelementptr inbounds float, float* %tmp18157, i64 1
+ %tmp18159 = getelementptr inbounds float, float* %tmp18158, i64 1
+ %tmp18160 = getelementptr inbounds float, float* %tmp18159, i64 1
+ %tmp18161 = getelementptr inbounds float, float* %tmp18160, i64 1
+ %tmp18162 = getelementptr inbounds float, float* %tmp18161, i64 1
+ %tmp18163 = getelementptr inbounds float, float* %tmp18162, i64 1
+ %tmp18164 = getelementptr inbounds float, float* %tmp18163, i64 1
+ %tmp18165 = getelementptr inbounds float, float* %tmp18164, i64 1
+ %tmp18166 = getelementptr inbounds float, float* %tmp18165, i64 1
+ %tmp18167 = getelementptr inbounds float, float* %tmp18166, i64 1
+ %tmp18168 = getelementptr inbounds float, float* %tmp18167, i64 1
+ %tmp18169 = getelementptr inbounds float, float* %tmp18168, i64 1
+ %tmp18170 = getelementptr inbounds float, float* %tmp18169, i64 1
+ %tmp18171 = getelementptr inbounds float, float* %tmp18170, i64 1
+ %tmp18172 = getelementptr inbounds float, float* %tmp18171, i64 1
+ %tmp18173 = getelementptr inbounds float, float* %tmp18172, i64 1
+ %tmp18174 = getelementptr inbounds float, float* %tmp18173, i64 1
+ %tmp18175 = getelementptr inbounds float, float* %tmp18174, i64 1
+ %tmp18176 = getelementptr inbounds float, float* %tmp18175, i64 1
+ %tmp18177 = getelementptr inbounds float, float* %tmp18176, i64 1
+ %tmp18178 = getelementptr inbounds float, float* %tmp18177, i64 1
+ %tmp18179 = getelementptr inbounds float, float* %tmp18178, i64 1
+ %tmp18180 = getelementptr inbounds float, float* %tmp18179, i64 1
+ %tmp18181 = getelementptr inbounds float, float* %tmp18180, i64 1
+ %tmp18182 = getelementptr inbounds float, float* %tmp18181, i64 1
+ %tmp18183 = getelementptr inbounds float, float* %tmp18182, i64 1
+ %tmp18184 = getelementptr inbounds float, float* %tmp18183, i64 1
+ %tmp18185 = getelementptr inbounds float, float* %tmp18184, i64 1
+ %tmp18186 = getelementptr inbounds float, float* %tmp18185, i64 1
+ %tmp18187 = getelementptr inbounds float, float* %tmp18186, i64 1
+ %tmp18188 = getelementptr inbounds float, float* %tmp18187, i64 1
+ %tmp18189 = getelementptr inbounds float, float* %tmp18188, i64 1
+ %tmp18190 = getelementptr inbounds float, float* %tmp18189, i64 1
+ %tmp18191 = getelementptr inbounds float, float* %tmp18190, i64 1
+ %tmp18192 = getelementptr inbounds float, float* %tmp18191, i64 1
+ %tmp18193 = getelementptr inbounds float, float* %tmp18192, i64 1
+ %tmp18194 = getelementptr inbounds float, float* %tmp18193, i64 1
+ %tmp18195 = getelementptr inbounds float, float* %tmp18194, i64 1
+ %tmp18196 = getelementptr inbounds float, float* %tmp18195, i64 1
+ %tmp18197 = getelementptr inbounds float, float* %tmp18196, i64 1
+ %tmp18198 = getelementptr inbounds float, float* %tmp18197, i64 1
+ %tmp18199 = getelementptr inbounds float, float* %tmp18198, i64 1
+ %tmp18200 = getelementptr inbounds float, float* %tmp18199, i64 1
+ %tmp18201 = getelementptr inbounds float, float* %tmp18200, i64 1
+ %tmp18202 = getelementptr inbounds float, float* %tmp18201, i64 1
+ %tmp18203 = getelementptr inbounds float, float* %tmp18202, i64 1
+ %tmp18204 = getelementptr inbounds float, float* %tmp18203, i64 1
+ %tmp18205 = getelementptr inbounds float, float* %tmp18204, i64 1
+ %tmp18206 = getelementptr inbounds float, float* %tmp18205, i64 1
+ %tmp18207 = getelementptr inbounds float, float* %tmp18206, i64 1
+ %tmp18208 = getelementptr inbounds float, float* %tmp18207, i64 1
+ %tmp18209 = getelementptr inbounds float, float* %tmp18208, i64 1
+ %tmp18210 = getelementptr inbounds float, float* %tmp18209, i64 1
+ %tmp18211 = getelementptr inbounds float, float* %tmp18210, i64 1
+ %tmp18212 = getelementptr inbounds float, float* %tmp18211, i64 1
+ %tmp18213 = getelementptr inbounds float, float* %tmp18212, i64 1
+ %tmp18214 = getelementptr inbounds float, float* %tmp18213, i64 1
+ %tmp18215 = getelementptr inbounds float, float* %tmp18214, i64 1
+ %tmp18216 = getelementptr inbounds float, float* %tmp18215, i64 1
+ %tmp18217 = getelementptr inbounds float, float* %tmp18216, i64 1
+ %tmp18218 = getelementptr inbounds float, float* %tmp18217, i64 1
+ %tmp18219 = getelementptr inbounds float, float* %tmp18218, i64 1
+ %tmp18220 = getelementptr inbounds float, float* %tmp18219, i64 1
+ %tmp18221 = getelementptr inbounds float, float* %tmp18220, i64 1
+ %tmp18222 = getelementptr inbounds float, float* %tmp18221, i64 1
+ %tmp18223 = getelementptr inbounds float, float* %tmp18222, i64 1
+ %tmp18224 = getelementptr inbounds float, float* %tmp18223, i64 1
+ %tmp18225 = getelementptr inbounds float, float* %tmp18224, i64 1
+ %tmp18226 = getelementptr inbounds float, float* %tmp18225, i64 1
+ %tmp18227 = getelementptr inbounds float, float* %tmp18226, i64 1
+ %tmp18228 = getelementptr inbounds float, float* %tmp18227, i64 1
+ %tmp18229 = getelementptr inbounds float, float* %tmp18228, i64 1
+ %tmp18230 = getelementptr inbounds float, float* %tmp18229, i64 1
+ %tmp18231 = getelementptr inbounds float, float* %tmp18230, i64 1
+ %tmp18232 = getelementptr inbounds float, float* %tmp18231, i64 1
+ %tmp18233 = getelementptr inbounds float, float* %tmp18232, i64 1
+ %tmp18234 = getelementptr inbounds float, float* %tmp18233, i64 1
+ %tmp18235 = getelementptr inbounds float, float* %tmp18234, i64 1
+ %tmp18236 = getelementptr inbounds float, float* %tmp18235, i64 1
+ %tmp18237 = getelementptr inbounds float, float* %tmp18236, i64 1
+ %tmp18238 = getelementptr inbounds float, float* %tmp18237, i64 1
+ %tmp18239 = getelementptr inbounds float, float* %tmp18238, i64 1
+ %tmp18240 = getelementptr inbounds float, float* %tmp18239, i64 1
+ %tmp18241 = getelementptr inbounds float, float* %tmp18240, i64 1
+ %tmp18242 = getelementptr inbounds float, float* %tmp18241, i64 1
+ %tmp18243 = getelementptr inbounds float, float* %tmp18242, i64 1
+ %tmp18244 = getelementptr inbounds float, float* %tmp18243, i64 1
+ %tmp18245 = getelementptr inbounds float, float* %tmp18244, i64 1
+ %tmp18246 = getelementptr inbounds float, float* %tmp18245, i64 1
+ %tmp18247 = getelementptr inbounds float, float* %tmp18246, i64 1
+ %tmp18248 = getelementptr inbounds float, float* %tmp18247, i64 1
+ %tmp18249 = getelementptr inbounds float, float* %tmp18248, i64 1
+ %tmp18250 = getelementptr inbounds float, float* %tmp18249, i64 1
+ %tmp18251 = getelementptr inbounds float, float* %tmp18250, i64 1
+ %tmp18252 = getelementptr inbounds float, float* %tmp18251, i64 1
+ %tmp18253 = getelementptr inbounds float, float* %tmp18252, i64 1
+ %tmp18254 = getelementptr inbounds float, float* %tmp18253, i64 1
+ %tmp18255 = getelementptr inbounds float, float* %tmp18254, i64 1
+ %tmp18256 = getelementptr inbounds float, float* %tmp18255, i64 1
+ %tmp18257 = getelementptr inbounds float, float* %tmp18256, i64 1
+ %tmp18258 = getelementptr inbounds float, float* %tmp18257, i64 1
+ %tmp18259 = getelementptr inbounds float, float* %tmp18258, i64 1
+ %tmp18260 = getelementptr inbounds float, float* %tmp18259, i64 1
+ %tmp18261 = getelementptr inbounds float, float* %tmp18260, i64 1
+ %tmp18262 = getelementptr inbounds float, float* %tmp18261, i64 1
+ %tmp18263 = getelementptr inbounds float, float* %tmp18262, i64 1
+ %tmp18264 = getelementptr inbounds float, float* %tmp18263, i64 1
+ %tmp18265 = getelementptr inbounds float, float* %tmp18264, i64 1
+ %tmp18266 = getelementptr inbounds float, float* %tmp18265, i64 1
+ %tmp18267 = getelementptr inbounds float, float* %tmp18266, i64 1
+ %tmp18268 = getelementptr inbounds float, float* %tmp18267, i64 1
+ %tmp18269 = getelementptr inbounds float, float* %tmp18268, i64 1
+ %tmp18270 = getelementptr inbounds float, float* %tmp18269, i64 1
+ %tmp18271 = getelementptr inbounds float, float* %tmp18270, i64 1
+ %tmp18272 = getelementptr inbounds float, float* %tmp18271, i64 1
+ %tmp18273 = getelementptr inbounds float, float* %tmp18272, i64 1
+ %tmp18274 = getelementptr inbounds float, float* %tmp18273, i64 1
+ %tmp18275 = getelementptr inbounds float, float* %tmp18274, i64 1
+ %tmp18276 = getelementptr inbounds float, float* %tmp18275, i64 1
+ %tmp18277 = getelementptr inbounds float, float* %tmp18276, i64 1
+ %tmp18278 = getelementptr inbounds float, float* %tmp18277, i64 1
+ %tmp18279 = getelementptr inbounds float, float* %tmp18278, i64 1
+ %tmp18280 = getelementptr inbounds float, float* %tmp18279, i64 1
+ %tmp18281 = getelementptr inbounds float, float* %tmp18280, i64 1
+ %tmp18282 = getelementptr inbounds float, float* %tmp18281, i64 1
+ %tmp18283 = getelementptr inbounds float, float* %tmp18282, i64 1
+ %tmp18284 = getelementptr inbounds float, float* %tmp18283, i64 1
+ %tmp18285 = getelementptr inbounds float, float* %tmp18284, i64 1
+ %tmp18286 = getelementptr inbounds float, float* %tmp18285, i64 1
+ %tmp18287 = getelementptr inbounds float, float* %tmp18286, i64 1
+ %tmp18288 = getelementptr inbounds float, float* %tmp18287, i64 1
+ %tmp18289 = getelementptr inbounds float, float* %tmp18288, i64 1
+ %tmp18290 = getelementptr inbounds float, float* %tmp18289, i64 1
+ %tmp18291 = getelementptr inbounds float, float* %tmp18290, i64 1
+ %tmp18292 = getelementptr inbounds float, float* %tmp18291, i64 1
+ %tmp18293 = getelementptr inbounds float, float* %tmp18292, i64 1
+ %tmp18294 = getelementptr inbounds float, float* %tmp18293, i64 1
+ %tmp18295 = getelementptr inbounds float, float* %tmp18294, i64 1
+ %tmp18296 = getelementptr inbounds float, float* %tmp18295, i64 1
+ %tmp18297 = getelementptr inbounds float, float* %tmp18296, i64 1
+ %tmp18298 = getelementptr inbounds float, float* %tmp18297, i64 1
+ %tmp18299 = getelementptr inbounds float, float* %tmp18298, i64 1
+ %tmp18300 = getelementptr inbounds float, float* %tmp18299, i64 1
+ %tmp18301 = getelementptr inbounds float, float* %tmp18300, i64 1
+ %tmp18302 = getelementptr inbounds float, float* %tmp18301, i64 1
+ %tmp18303 = getelementptr inbounds float, float* %tmp18302, i64 1
+ %tmp18304 = getelementptr inbounds float, float* %tmp18303, i64 1
+ %tmp18305 = getelementptr inbounds float, float* %tmp18304, i64 1
+ %tmp18306 = getelementptr inbounds float, float* %tmp18305, i64 1
+ %tmp18307 = getelementptr inbounds float, float* %tmp18306, i64 1
+ %tmp18308 = getelementptr inbounds float, float* %tmp18307, i64 1
+ %tmp18309 = getelementptr inbounds float, float* %tmp18308, i64 1
+ %tmp18310 = getelementptr inbounds float, float* %tmp18309, i64 1
+ %tmp18311 = getelementptr inbounds float, float* %tmp18310, i64 1
+ %tmp18312 = getelementptr inbounds float, float* %tmp18311, i64 1
+ %tmp18313 = getelementptr inbounds float, float* %tmp18312, i64 1
+ %tmp18314 = getelementptr inbounds float, float* %tmp18313, i64 1
+ %tmp18315 = getelementptr inbounds float, float* %tmp18314, i64 1
+ %tmp18316 = getelementptr inbounds float, float* %tmp18315, i64 1
+ %tmp18317 = getelementptr inbounds float, float* %tmp18316, i64 1
+ %tmp18318 = getelementptr inbounds float, float* %tmp18317, i64 1
+ %tmp18319 = getelementptr inbounds float, float* %tmp18318, i64 1
+ %tmp18320 = getelementptr inbounds float, float* %tmp18319, i64 1
+ %tmp18321 = getelementptr inbounds float, float* %tmp18320, i64 1
+ %tmp18322 = getelementptr inbounds float, float* %tmp18321, i64 1
+ %tmp18323 = getelementptr inbounds float, float* %tmp18322, i64 1
+ %tmp18324 = getelementptr inbounds float, float* %tmp18323, i64 1
+ %tmp18325 = getelementptr inbounds float, float* %tmp18324, i64 1
+ %tmp18326 = getelementptr inbounds float, float* %tmp18325, i64 1
+ %tmp18327 = getelementptr inbounds float, float* %tmp18326, i64 1
+ %tmp18328 = getelementptr inbounds float, float* %tmp18327, i64 1
+ %tmp18329 = getelementptr inbounds float, float* %tmp18328, i64 1
+ %tmp18330 = getelementptr inbounds float, float* %tmp18329, i64 1
+ %tmp18331 = getelementptr inbounds float, float* %tmp18330, i64 1
+ %tmp18332 = getelementptr inbounds float, float* %tmp18331, i64 1
+ %tmp18333 = getelementptr inbounds float, float* %tmp18332, i64 1
+ %tmp18334 = getelementptr inbounds float, float* %tmp18333, i64 1
+ %tmp18335 = getelementptr inbounds float, float* %tmp18334, i64 1
+ %tmp18336 = getelementptr inbounds float, float* %tmp18335, i64 1
+ %tmp18337 = getelementptr inbounds float, float* %tmp18336, i64 1
+ %tmp18338 = getelementptr inbounds float, float* %tmp18337, i64 1
+ %tmp18339 = getelementptr inbounds float, float* %tmp18338, i64 1
+ %tmp18340 = getelementptr inbounds float, float* %tmp18339, i64 1
+ %tmp18341 = getelementptr inbounds float, float* %tmp18340, i64 1
+ %tmp18342 = getelementptr inbounds float, float* %tmp18341, i64 1
+ %tmp18343 = getelementptr inbounds float, float* %tmp18342, i64 1
+ %tmp18344 = getelementptr inbounds float, float* %tmp18343, i64 1
+ %tmp18345 = getelementptr inbounds float, float* %tmp18344, i64 1
+ %tmp18346 = getelementptr inbounds float, float* %tmp18345, i64 1
+ %tmp18347 = getelementptr inbounds float, float* %tmp18346, i64 1
+ %tmp18348 = getelementptr inbounds float, float* %tmp18347, i64 1
+ %tmp18349 = getelementptr inbounds float, float* %tmp18348, i64 1
+ %tmp18350 = getelementptr inbounds float, float* %tmp18349, i64 1
+ %tmp18351 = getelementptr inbounds float, float* %tmp18350, i64 1
+ %tmp18352 = getelementptr inbounds float, float* %tmp18351, i64 1
+ %tmp18353 = getelementptr inbounds float, float* %tmp18352, i64 1
+ %tmp18354 = getelementptr inbounds float, float* %tmp18353, i64 1
+ %tmp18355 = getelementptr inbounds float, float* %tmp18354, i64 1
+ %tmp18356 = getelementptr inbounds float, float* %tmp18355, i64 1
+ %tmp18357 = getelementptr inbounds float, float* %tmp18356, i64 1
+ %tmp18358 = getelementptr inbounds float, float* %tmp18357, i64 1
+ %tmp18359 = getelementptr inbounds float, float* %tmp18358, i64 1
+ %tmp18360 = getelementptr inbounds float, float* %tmp18359, i64 1
+ %tmp18361 = getelementptr inbounds float, float* %tmp18360, i64 1
+ %tmp18362 = getelementptr inbounds float, float* %tmp18361, i64 1
+ %tmp18363 = getelementptr inbounds float, float* %tmp18362, i64 1
+ %tmp18364 = getelementptr inbounds float, float* %tmp18363, i64 1
+ %tmp18365 = getelementptr inbounds float, float* %tmp18364, i64 1
+ %tmp18366 = getelementptr inbounds float, float* %tmp18365, i64 1
+ %tmp18367 = getelementptr inbounds float, float* %tmp18366, i64 1
+ %tmp18368 = getelementptr inbounds float, float* %tmp18367, i64 1
+ %tmp18369 = getelementptr inbounds float, float* %tmp18368, i64 1
+ %tmp18370 = getelementptr inbounds float, float* %tmp18369, i64 1
+ %tmp18371 = getelementptr inbounds float, float* %tmp18370, i64 1
+ %tmp18372 = getelementptr inbounds float, float* %tmp18371, i64 1
+ %tmp18373 = getelementptr inbounds float, float* %tmp18372, i64 1
+ %tmp18374 = getelementptr inbounds float, float* %tmp18373, i64 1
+ %tmp18375 = getelementptr inbounds float, float* %tmp18374, i64 1
+ %tmp18376 = getelementptr inbounds float, float* %tmp18375, i64 1
+ %tmp18377 = getelementptr inbounds float, float* %tmp18376, i64 1
+ %tmp18378 = getelementptr inbounds float, float* %tmp18377, i64 1
+ %tmp18379 = getelementptr inbounds float, float* %tmp18378, i64 1
+ %tmp18380 = getelementptr inbounds float, float* %tmp18379, i64 1
+ %tmp18381 = getelementptr inbounds float, float* %tmp18380, i64 1
+ %tmp18382 = getelementptr inbounds float, float* %tmp18381, i64 1
+ %tmp18383 = getelementptr inbounds float, float* %tmp18382, i64 1
+ %tmp18384 = getelementptr inbounds float, float* %tmp18383, i64 1
+ %tmp18385 = getelementptr inbounds float, float* %tmp18384, i64 1
+ %tmp18386 = getelementptr inbounds float, float* %tmp18385, i64 1
+ %tmp18387 = getelementptr inbounds float, float* %tmp18386, i64 1
+ %tmp18388 = getelementptr inbounds float, float* %tmp18387, i64 1
+ %tmp18389 = getelementptr inbounds float, float* %tmp18388, i64 1
+ %tmp18390 = getelementptr inbounds float, float* %tmp18389, i64 1
+ %tmp18391 = getelementptr inbounds float, float* %tmp18390, i64 1
+ %tmp18392 = getelementptr inbounds float, float* %tmp18391, i64 1
+ %tmp18393 = getelementptr inbounds float, float* %tmp18392, i64 1
+ %tmp18394 = getelementptr inbounds float, float* %tmp18393, i64 1
+ %tmp18395 = getelementptr inbounds float, float* %tmp18394, i64 1
+ %tmp18396 = getelementptr inbounds float, float* %tmp18395, i64 1
+ %tmp18397 = getelementptr inbounds float, float* %tmp18396, i64 1
+ %tmp18398 = getelementptr inbounds float, float* %tmp18397, i64 1
+ %tmp18399 = getelementptr inbounds float, float* %tmp18398, i64 1
+ %tmp18400 = getelementptr inbounds float, float* %tmp18399, i64 1
+ %tmp18401 = getelementptr inbounds float, float* %tmp18400, i64 1
+ %tmp18402 = getelementptr inbounds float, float* %tmp18401, i64 1
+ %tmp18403 = getelementptr inbounds float, float* %tmp18402, i64 1
+ %tmp18404 = getelementptr inbounds float, float* %tmp18403, i64 1
+ %tmp18405 = getelementptr inbounds float, float* %tmp18404, i64 1
+ %tmp18406 = getelementptr inbounds float, float* %tmp18405, i64 1
+ %tmp18407 = getelementptr inbounds float, float* %tmp18406, i64 1
+ %tmp18408 = getelementptr inbounds float, float* %tmp18407, i64 1
+ %tmp18409 = getelementptr inbounds float, float* %tmp18408, i64 1
+ %tmp18410 = getelementptr inbounds float, float* %tmp18409, i64 1
+ %tmp18411 = getelementptr inbounds float, float* %tmp18410, i64 1
+ %tmp18412 = getelementptr inbounds float, float* %tmp18411, i64 1
+ %tmp18413 = getelementptr inbounds float, float* %tmp18412, i64 1
+ %tmp18414 = getelementptr inbounds float, float* %tmp18413, i64 1
+ %tmp18415 = getelementptr inbounds float, float* %tmp18414, i64 1
+ %tmp18416 = getelementptr inbounds float, float* %tmp18415, i64 1
+ %tmp18417 = getelementptr inbounds float, float* %tmp18416, i64 1
+ %tmp18418 = getelementptr inbounds float, float* %tmp18417, i64 1
+ %tmp18419 = getelementptr inbounds float, float* %tmp18418, i64 1
+ %tmp18420 = getelementptr inbounds float, float* %tmp18419, i64 1
+ %tmp18421 = getelementptr inbounds float, float* %tmp18420, i64 1
+ %tmp18422 = getelementptr inbounds float, float* %tmp18421, i64 1
+ %tmp18423 = getelementptr inbounds float, float* %tmp18422, i64 1
+ %tmp18424 = getelementptr inbounds float, float* %tmp18423, i64 1
+ %tmp18425 = getelementptr inbounds float, float* %tmp18424, i64 1
+ %tmp18426 = getelementptr inbounds float, float* %tmp18425, i64 1
+ %tmp18427 = getelementptr inbounds float, float* %tmp18426, i64 1
+ %tmp18428 = getelementptr inbounds float, float* %tmp18427, i64 1
+ %tmp18429 = getelementptr inbounds float, float* %tmp18428, i64 1
+ %tmp18430 = getelementptr inbounds float, float* %tmp18429, i64 1
+ %tmp18431 = getelementptr inbounds float, float* %tmp18430, i64 1
+ %tmp18432 = getelementptr inbounds float, float* %tmp18431, i64 1
+ %tmp18433 = getelementptr inbounds float, float* %tmp18432, i64 1
+ %tmp18434 = getelementptr inbounds float, float* %tmp18433, i64 1
+ %tmp18435 = getelementptr inbounds float, float* %tmp18434, i64 1
+ %tmp18436 = getelementptr inbounds float, float* %tmp18435, i64 1
+ %tmp18437 = getelementptr inbounds float, float* %tmp18436, i64 1
+ %tmp18438 = getelementptr inbounds float, float* %tmp18437, i64 1
+ %tmp18439 = getelementptr inbounds float, float* %tmp18438, i64 1
+ %tmp18440 = getelementptr inbounds float, float* %tmp18439, i64 1
+ %tmp18441 = getelementptr inbounds float, float* %tmp18440, i64 1
+ %tmp18442 = getelementptr inbounds float, float* %tmp18441, i64 1
+ %tmp18443 = getelementptr inbounds float, float* %tmp18442, i64 1
+ %tmp18444 = getelementptr inbounds float, float* %tmp18443, i64 1
+ %tmp18445 = getelementptr inbounds float, float* %tmp18444, i64 1
+ %tmp18446 = getelementptr inbounds float, float* %tmp18445, i64 1
+ %tmp18447 = getelementptr inbounds float, float* %tmp18446, i64 1
+ %tmp18448 = getelementptr inbounds float, float* %tmp18447, i64 1
+ %tmp18449 = getelementptr inbounds float, float* %tmp18448, i64 1
+ %tmp18450 = getelementptr inbounds float, float* %tmp18449, i64 1
+ %tmp18451 = getelementptr inbounds float, float* %tmp18450, i64 1
+ %tmp18452 = getelementptr inbounds float, float* %tmp18451, i64 1
+ %tmp18453 = getelementptr inbounds float, float* %tmp18452, i64 1
+ %tmp18454 = getelementptr inbounds float, float* %tmp18453, i64 1
+ %tmp18455 = getelementptr inbounds float, float* %tmp18454, i64 1
+ %tmp18456 = getelementptr inbounds float, float* %tmp18455, i64 1
+ %tmp18457 = getelementptr inbounds float, float* %tmp18456, i64 1
+ %tmp18458 = getelementptr inbounds float, float* %tmp18457, i64 1
+ %tmp18459 = getelementptr inbounds float, float* %tmp18458, i64 1
+ %tmp18460 = getelementptr inbounds float, float* %tmp18459, i64 1
+ %tmp18461 = getelementptr inbounds float, float* %tmp18460, i64 1
+ %tmp18462 = getelementptr inbounds float, float* %tmp18461, i64 1
+ %tmp18463 = getelementptr inbounds float, float* %tmp18462, i64 1
+ %tmp18464 = getelementptr inbounds float, float* %tmp18463, i64 1
+ %tmp18465 = getelementptr inbounds float, float* %tmp18464, i64 1
+ %tmp18466 = getelementptr inbounds float, float* %tmp18465, i64 1
+ %tmp18467 = getelementptr inbounds float, float* %tmp18466, i64 1
+ %tmp18468 = getelementptr inbounds float, float* %tmp18467, i64 1
+ %tmp18469 = getelementptr inbounds float, float* %tmp18468, i64 1
+ %tmp18470 = getelementptr inbounds float, float* %tmp18469, i64 1
+ %tmp18471 = getelementptr inbounds float, float* %tmp18470, i64 1
+ %tmp18472 = getelementptr inbounds float, float* %tmp18471, i64 1
+ %tmp18473 = getelementptr inbounds float, float* %tmp18472, i64 1
+ %tmp18474 = getelementptr inbounds float, float* %tmp18473, i64 1
+ %tmp18475 = getelementptr inbounds float, float* %tmp18474, i64 1
+ %tmp18476 = getelementptr inbounds float, float* %tmp18475, i64 1
+ %tmp18477 = getelementptr inbounds float, float* %tmp18476, i64 1
+ %tmp18478 = getelementptr inbounds float, float* %tmp18477, i64 1
+ %tmp18479 = getelementptr inbounds float, float* %tmp18478, i64 1
+ %tmp18480 = getelementptr inbounds float, float* %tmp18479, i64 1
+ %tmp18481 = getelementptr inbounds float, float* %tmp18480, i64 1
+ %tmp18482 = getelementptr inbounds float, float* %tmp18481, i64 1
+ %tmp18483 = getelementptr inbounds float, float* %tmp18482, i64 1
+ %tmp18484 = getelementptr inbounds float, float* %tmp18483, i64 1
+ %tmp18485 = getelementptr inbounds float, float* %tmp18484, i64 1
+ %tmp18486 = getelementptr inbounds float, float* %tmp18485, i64 1
+ %tmp18487 = getelementptr inbounds float, float* %tmp18486, i64 1
+ %tmp18488 = getelementptr inbounds float, float* %tmp18487, i64 1
+ %tmp18489 = getelementptr inbounds float, float* %tmp18488, i64 1
+ %tmp18490 = getelementptr inbounds float, float* %tmp18489, i64 1
+ %tmp18491 = getelementptr inbounds float, float* %tmp18490, i64 1
+ %tmp18492 = getelementptr inbounds float, float* %tmp18491, i64 1
+ %tmp18493 = getelementptr inbounds float, float* %tmp18492, i64 1
+ %tmp18494 = getelementptr inbounds float, float* %tmp18493, i64 1
+ %tmp18495 = getelementptr inbounds float, float* %tmp18494, i64 1
+ %tmp18496 = getelementptr inbounds float, float* %tmp18495, i64 1
+ %tmp18497 = getelementptr inbounds float, float* %tmp18496, i64 1
+ %tmp18498 = getelementptr inbounds float, float* %tmp18497, i64 1
+ %tmp18499 = getelementptr inbounds float, float* %tmp18498, i64 1
+ %tmp18500 = getelementptr inbounds float, float* %tmp18499, i64 1
+ %tmp18501 = getelementptr inbounds float, float* %tmp18500, i64 1
+ %tmp18502 = getelementptr inbounds float, float* %tmp18501, i64 1
+ %tmp18503 = getelementptr inbounds float, float* %tmp18502, i64 1
+ %tmp18504 = getelementptr inbounds float, float* %tmp18503, i64 1
+ %tmp18505 = getelementptr inbounds float, float* %tmp18504, i64 1
+ %tmp18506 = getelementptr inbounds float, float* %tmp18505, i64 1
+ %tmp18507 = getelementptr inbounds float, float* %tmp18506, i64 1
+ %tmp18508 = getelementptr inbounds float, float* %tmp18507, i64 1
+ %tmp18509 = getelementptr inbounds float, float* %tmp18508, i64 1
+ %tmp18510 = getelementptr inbounds float, float* %tmp18509, i64 1
+ %tmp18511 = getelementptr inbounds float, float* %tmp18510, i64 1
+ %tmp18512 = getelementptr inbounds float, float* %tmp18511, i64 1
+ %tmp18513 = getelementptr inbounds float, float* %tmp18512, i64 1
+ %tmp18514 = getelementptr inbounds float, float* %tmp18513, i64 1
+ %tmp18515 = getelementptr inbounds float, float* %tmp18514, i64 1
+ %tmp18516 = getelementptr inbounds float, float* %tmp18515, i64 1
+ %tmp18517 = getelementptr inbounds float, float* %tmp18516, i64 1
+ %tmp18518 = getelementptr inbounds float, float* %tmp18517, i64 1
+ %tmp18519 = getelementptr inbounds float, float* %tmp18518, i64 1
+ %tmp18520 = getelementptr inbounds float, float* %tmp18519, i64 1
+ %tmp18521 = getelementptr inbounds float, float* %tmp18520, i64 1
+ %tmp18522 = getelementptr inbounds float, float* %tmp18521, i64 1
+ %tmp18523 = getelementptr inbounds float, float* %tmp18522, i64 1
+ %tmp18524 = getelementptr inbounds float, float* %tmp18523, i64 1
+ %tmp18525 = getelementptr inbounds float, float* %tmp18524, i64 1
+ %tmp18526 = getelementptr inbounds float, float* %tmp18525, i64 1
+ %tmp18527 = getelementptr inbounds float, float* %tmp18526, i64 1
+ %tmp18528 = getelementptr inbounds float, float* %tmp18527, i64 1
+ %tmp18529 = getelementptr inbounds float, float* %tmp18528, i64 1
+ %tmp18530 = getelementptr inbounds float, float* %tmp18529, i64 1
+ %tmp18531 = getelementptr inbounds float, float* %tmp18530, i64 1
+ %tmp18532 = getelementptr inbounds float, float* %tmp18531, i64 1
+ %tmp18533 = getelementptr inbounds float, float* %tmp18532, i64 1
+ %tmp18534 = getelementptr inbounds float, float* %tmp18533, i64 1
+ %tmp18535 = getelementptr inbounds float, float* %tmp18534, i64 1
+ %tmp18536 = getelementptr inbounds float, float* %tmp18535, i64 1
+ %tmp18537 = getelementptr inbounds float, float* %tmp18536, i64 1
+ %tmp18538 = getelementptr inbounds float, float* %tmp18537, i64 1
+ %tmp18539 = getelementptr inbounds float, float* %tmp18538, i64 1
+ %tmp18540 = getelementptr inbounds float, float* %tmp18539, i64 1
+ %tmp18541 = getelementptr inbounds float, float* %tmp18540, i64 1
+ %tmp18542 = getelementptr inbounds float, float* %tmp18541, i64 1
+ %tmp18543 = getelementptr inbounds float, float* %tmp18542, i64 1
+ %tmp18544 = getelementptr inbounds float, float* %tmp18543, i64 1
+ %tmp18545 = getelementptr inbounds float, float* %tmp18544, i64 1
+ %tmp18546 = getelementptr inbounds float, float* %tmp18545, i64 1
+ %tmp18547 = getelementptr inbounds float, float* %tmp18546, i64 1
+ %tmp18548 = getelementptr inbounds float, float* %tmp18547, i64 1
+ %tmp18549 = getelementptr inbounds float, float* %tmp18548, i64 1
+ %tmp18550 = getelementptr inbounds float, float* %tmp18549, i64 1
+ %tmp18551 = getelementptr inbounds float, float* %tmp18550, i64 1
+ %tmp18552 = getelementptr inbounds float, float* %tmp18551, i64 1
+ %tmp18553 = getelementptr inbounds float, float* %tmp18552, i64 1
+ %tmp18554 = getelementptr inbounds float, float* %tmp18553, i64 1
+ %tmp18555 = getelementptr inbounds float, float* %tmp18554, i64 1
+ %tmp18556 = getelementptr inbounds float, float* %tmp18555, i64 1
+ %tmp18557 = getelementptr inbounds float, float* %tmp18556, i64 1
+ %tmp18558 = getelementptr inbounds float, float* %tmp18557, i64 1
+ %tmp18559 = getelementptr inbounds float, float* %tmp18558, i64 1
+ %tmp18560 = getelementptr inbounds float, float* %tmp18559, i64 1
+ %tmp18561 = getelementptr inbounds float, float* %tmp18560, i64 1
+ %tmp18562 = getelementptr inbounds float, float* %tmp18561, i64 1
+ %tmp18563 = getelementptr inbounds float, float* %tmp18562, i64 1
+ %tmp18564 = getelementptr inbounds float, float* %tmp18563, i64 1
+ %tmp18565 = getelementptr inbounds float, float* %tmp18564, i64 1
+ %tmp18566 = getelementptr inbounds float, float* %tmp18565, i64 1
+ %tmp18567 = getelementptr inbounds float, float* %tmp18566, i64 1
+ %tmp18568 = getelementptr inbounds float, float* %tmp18567, i64 1
+ %tmp18569 = getelementptr inbounds float, float* %tmp18568, i64 1
+ %tmp18570 = getelementptr inbounds float, float* %tmp18569, i64 1
+ %tmp18571 = getelementptr inbounds float, float* %tmp18570, i64 1
+ %tmp18572 = getelementptr inbounds float, float* %tmp18571, i64 1
+ %tmp18573 = getelementptr inbounds float, float* %tmp18572, i64 1
+ %tmp18574 = getelementptr inbounds float, float* %tmp18573, i64 1
+ %tmp18575 = getelementptr inbounds float, float* %tmp18574, i64 1
+ %tmp18576 = getelementptr inbounds float, float* %tmp18575, i64 1
+ %tmp18577 = getelementptr inbounds float, float* %tmp18576, i64 1
+ %tmp18578 = getelementptr inbounds float, float* %tmp18577, i64 1
+ %tmp18579 = getelementptr inbounds float, float* %tmp18578, i64 1
+ %tmp18580 = getelementptr inbounds float, float* %tmp18579, i64 1
+ %tmp18581 = getelementptr inbounds float, float* %tmp18580, i64 1
+ %tmp18582 = getelementptr inbounds float, float* %tmp18581, i64 1
+ %tmp18583 = getelementptr inbounds float, float* %tmp18582, i64 1
+ %tmp18584 = getelementptr inbounds float, float* %tmp18583, i64 1
+ %tmp18585 = getelementptr inbounds float, float* %tmp18584, i64 1
+ %tmp18586 = getelementptr inbounds float, float* %tmp18585, i64 1
+ %tmp18587 = getelementptr inbounds float, float* %tmp18586, i64 1
+ %tmp18588 = getelementptr inbounds float, float* %tmp18587, i64 1
+ %tmp18589 = getelementptr inbounds float, float* %tmp18588, i64 1
+ %tmp18590 = getelementptr inbounds float, float* %tmp18589, i64 1
+ %tmp18591 = getelementptr inbounds float, float* %tmp18590, i64 1
+ %tmp18592 = getelementptr inbounds float, float* %tmp18591, i64 1
+ %tmp18593 = getelementptr inbounds float, float* %tmp18592, i64 1
+ %tmp18594 = getelementptr inbounds float, float* %tmp18593, i64 1
+ %tmp18595 = getelementptr inbounds float, float* %tmp18594, i64 1
+ %tmp18596 = getelementptr inbounds float, float* %tmp18595, i64 1
+ %tmp18597 = getelementptr inbounds float, float* %tmp18596, i64 1
+ %tmp18598 = getelementptr inbounds float, float* %tmp18597, i64 1
+ %tmp18599 = getelementptr inbounds float, float* %tmp18598, i64 1
+ %tmp18600 = getelementptr inbounds float, float* %tmp18599, i64 1
+ %tmp18601 = getelementptr inbounds float, float* %tmp18600, i64 1
+ %tmp18602 = getelementptr inbounds float, float* %tmp18601, i64 1
+ %tmp18603 = getelementptr inbounds float, float* %tmp18602, i64 1
+ %tmp18604 = getelementptr inbounds float, float* %tmp18603, i64 1
+ %tmp18605 = getelementptr inbounds float, float* %tmp18604, i64 1
+ %tmp18606 = getelementptr inbounds float, float* %tmp18605, i64 1
+ %tmp18607 = getelementptr inbounds float, float* %tmp18606, i64 1
+ %tmp18608 = getelementptr inbounds float, float* %tmp18607, i64 1
+ %tmp18609 = getelementptr inbounds float, float* %tmp18608, i64 1
+ %tmp18610 = getelementptr inbounds float, float* %tmp18609, i64 1
+ %tmp18611 = getelementptr inbounds float, float* %tmp18610, i64 1
+ %tmp18612 = getelementptr inbounds float, float* %tmp18611, i64 1
+ %tmp18613 = getelementptr inbounds float, float* %tmp18612, i64 1
+ %tmp18614 = getelementptr inbounds float, float* %tmp18613, i64 1
+ %tmp18615 = getelementptr inbounds float, float* %tmp18614, i64 1
+ %tmp18616 = getelementptr inbounds float, float* %tmp18615, i64 1
+ %tmp18617 = getelementptr inbounds float, float* %tmp18616, i64 1
+ %tmp18618 = getelementptr inbounds float, float* %tmp18617, i64 1
+ %tmp18619 = getelementptr inbounds float, float* %tmp18618, i64 1
+ %tmp18620 = getelementptr inbounds float, float* %tmp18619, i64 1
+ %tmp18621 = getelementptr inbounds float, float* %tmp18620, i64 1
+ %tmp18622 = getelementptr inbounds float, float* %tmp18621, i64 1
+ %tmp18623 = getelementptr inbounds float, float* %tmp18622, i64 1
+ %tmp18624 = getelementptr inbounds float, float* %tmp18623, i64 1
+ %tmp18625 = getelementptr inbounds float, float* %tmp18624, i64 1
+ %tmp18626 = getelementptr inbounds float, float* %tmp18625, i64 1
+ %tmp18627 = getelementptr inbounds float, float* %tmp18626, i64 1
+ %tmp18628 = getelementptr inbounds float, float* %tmp18627, i64 1
+ %tmp18629 = getelementptr inbounds float, float* %tmp18628, i64 1
+ %tmp18630 = getelementptr inbounds float, float* %tmp18629, i64 1
+ %tmp18631 = getelementptr inbounds float, float* %tmp18630, i64 1
+ %tmp18632 = getelementptr inbounds float, float* %tmp18631, i64 1
+ %tmp18633 = getelementptr inbounds float, float* %tmp18632, i64 1
+ %tmp18634 = getelementptr inbounds float, float* %tmp18633, i64 1
+ %tmp18635 = getelementptr inbounds float, float* %tmp18634, i64 1
+ %tmp18636 = getelementptr inbounds float, float* %tmp18635, i64 1
+ %tmp18637 = getelementptr inbounds float, float* %tmp18636, i64 1
+ %tmp18638 = getelementptr inbounds float, float* %tmp18637, i64 1
+ %tmp18639 = getelementptr inbounds float, float* %tmp18638, i64 1
+ %tmp18640 = getelementptr inbounds float, float* %tmp18639, i64 1
+ %tmp18641 = getelementptr inbounds float, float* %tmp18640, i64 1
+ %tmp18642 = getelementptr inbounds float, float* %tmp18641, i64 1
+ %tmp18643 = getelementptr inbounds float, float* %tmp18642, i64 1
+ %tmp18644 = getelementptr inbounds float, float* %tmp18643, i64 1
+ %tmp18645 = getelementptr inbounds float, float* %tmp18644, i64 1
+ %tmp18646 = getelementptr inbounds float, float* %tmp18645, i64 1
+ %tmp18647 = getelementptr inbounds float, float* %tmp18646, i64 1
+ %tmp18648 = getelementptr inbounds float, float* %tmp18647, i64 1
+ %tmp18649 = getelementptr inbounds float, float* %tmp18648, i64 1
+ %tmp18650 = getelementptr inbounds float, float* %tmp18649, i64 1
+ %tmp18651 = getelementptr inbounds float, float* %tmp18650, i64 1
+ %tmp18652 = getelementptr inbounds float, float* %tmp18651, i64 1
+ %tmp18653 = getelementptr inbounds float, float* %tmp18652, i64 1
+ %tmp18654 = getelementptr inbounds float, float* %tmp18653, i64 1
+ %tmp18655 = getelementptr inbounds float, float* %tmp18654, i64 1
+ %tmp18656 = getelementptr inbounds float, float* %tmp18655, i64 1
+ %tmp18657 = getelementptr inbounds float, float* %tmp18656, i64 1
+ %tmp18658 = getelementptr inbounds float, float* %tmp18657, i64 1
+ %tmp18659 = getelementptr inbounds float, float* %tmp18658, i64 1
+ %tmp18660 = getelementptr inbounds float, float* %tmp18659, i64 1
+ %tmp18661 = getelementptr inbounds float, float* %tmp18660, i64 1
+ %tmp18662 = getelementptr inbounds float, float* %tmp18661, i64 1
+ %tmp18663 = getelementptr inbounds float, float* %tmp18662, i64 1
+ %tmp18664 = getelementptr inbounds float, float* %tmp18663, i64 1
+ %tmp18665 = getelementptr inbounds float, float* %tmp18664, i64 1
+ %tmp18666 = getelementptr inbounds float, float* %tmp18665, i64 1
+ %tmp18667 = getelementptr inbounds float, float* %tmp18666, i64 1
+ %tmp18668 = getelementptr inbounds float, float* %tmp18667, i64 1
+ %tmp18669 = getelementptr inbounds float, float* %tmp18668, i64 1
+ %tmp18670 = getelementptr inbounds float, float* %tmp18669, i64 1
+ %tmp18671 = getelementptr inbounds float, float* %tmp18670, i64 1
+ %tmp18672 = getelementptr inbounds float, float* %tmp18671, i64 1
+ %tmp18673 = getelementptr inbounds float, float* %tmp18672, i64 1
+ %tmp18674 = getelementptr inbounds float, float* %tmp18673, i64 1
+ %tmp18675 = getelementptr inbounds float, float* %tmp18674, i64 1
+ %tmp18676 = getelementptr inbounds float, float* %tmp18675, i64 1
+ %tmp18677 = getelementptr inbounds float, float* %tmp18676, i64 1
+ %tmp18678 = getelementptr inbounds float, float* %tmp18677, i64 1
+ %tmp18679 = getelementptr inbounds float, float* %tmp18678, i64 1
+ %tmp18680 = getelementptr inbounds float, float* %tmp18679, i64 1
+ %tmp18681 = getelementptr inbounds float, float* %tmp18680, i64 1
+ %tmp18682 = getelementptr inbounds float, float* %tmp18681, i64 1
+ %tmp18683 = getelementptr inbounds float, float* %tmp18682, i64 1
+ %tmp18684 = getelementptr inbounds float, float* %tmp18683, i64 1
+ %tmp18685 = getelementptr inbounds float, float* %tmp18684, i64 1
+ %tmp18686 = getelementptr inbounds float, float* %tmp18685, i64 1
+ %tmp18687 = getelementptr inbounds float, float* %tmp18686, i64 1
+ %tmp18688 = getelementptr inbounds float, float* %tmp18687, i64 1
+ %tmp18689 = getelementptr inbounds float, float* %tmp18688, i64 1
+ %tmp18690 = getelementptr inbounds float, float* %tmp18689, i64 1
+ %tmp18691 = getelementptr inbounds float, float* %tmp18690, i64 1
+ %tmp18692 = getelementptr inbounds float, float* %tmp18691, i64 1
+ %tmp18693 = getelementptr inbounds float, float* %tmp18692, i64 1
+ %tmp18694 = getelementptr inbounds float, float* %tmp18693, i64 1
+ %tmp18695 = getelementptr inbounds float, float* %tmp18694, i64 1
+ %tmp18696 = getelementptr inbounds float, float* %tmp18695, i64 1
+ %tmp18697 = getelementptr inbounds float, float* %tmp18696, i64 1
+ %tmp18698 = getelementptr inbounds float, float* %tmp18697, i64 1
+ %tmp18699 = getelementptr inbounds float, float* %tmp18698, i64 1
+ %tmp18700 = getelementptr inbounds float, float* %tmp18699, i64 1
+ %tmp18701 = getelementptr inbounds float, float* %tmp18700, i64 1
+ %tmp18702 = getelementptr inbounds float, float* %tmp18701, i64 1
+ %tmp18703 = getelementptr inbounds float, float* %tmp18702, i64 1
+ %tmp18704 = getelementptr inbounds float, float* %tmp18703, i64 1
+ %tmp18705 = getelementptr inbounds float, float* %tmp18704, i64 1
+ %tmp18706 = getelementptr inbounds float, float* %tmp18705, i64 1
+ %tmp18707 = getelementptr inbounds float, float* %tmp18706, i64 1
+ %tmp18708 = getelementptr inbounds float, float* %tmp18707, i64 1
+ %tmp18709 = getelementptr inbounds float, float* %tmp18708, i64 1
+ %tmp18710 = getelementptr inbounds float, float* %tmp18709, i64 1
+ %tmp18711 = getelementptr inbounds float, float* %tmp18710, i64 1
+ %tmp18712 = getelementptr inbounds float, float* %tmp18711, i64 1
+ %tmp18713 = getelementptr inbounds float, float* %tmp18712, i64 1
+ %tmp18714 = getelementptr inbounds float, float* %tmp18713, i64 1
+ %tmp18715 = getelementptr inbounds float, float* %tmp18714, i64 1
+ %tmp18716 = getelementptr inbounds float, float* %tmp18715, i64 1
+ %tmp18717 = getelementptr inbounds float, float* %tmp18716, i64 1
+ %tmp18718 = getelementptr inbounds float, float* %tmp18717, i64 1
+ %tmp18719 = getelementptr inbounds float, float* %tmp18718, i64 1
+ %tmp18720 = getelementptr inbounds float, float* %tmp18719, i64 1
+ %tmp18721 = getelementptr inbounds float, float* %tmp18720, i64 1
+ %tmp18722 = getelementptr inbounds float, float* %tmp18721, i64 1
+ %tmp18723 = getelementptr inbounds float, float* %tmp18722, i64 1
+ %tmp18724 = getelementptr inbounds float, float* %tmp18723, i64 1
+ %tmp18725 = getelementptr inbounds float, float* %tmp18724, i64 1
+ %tmp18726 = getelementptr inbounds float, float* %tmp18725, i64 1
+ %tmp18727 = getelementptr inbounds float, float* %tmp18726, i64 1
+ %tmp18728 = getelementptr inbounds float, float* %tmp18727, i64 1
+ %tmp18729 = getelementptr inbounds float, float* %tmp18728, i64 1
+ %tmp18730 = getelementptr inbounds float, float* %tmp18729, i64 1
+ %tmp18731 = getelementptr inbounds float, float* %tmp18730, i64 1
+ %tmp18732 = getelementptr inbounds float, float* %tmp18731, i64 1
+ %tmp18733 = getelementptr inbounds float, float* %tmp18732, i64 1
+ %tmp18734 = getelementptr inbounds float, float* %tmp18733, i64 1
+ %tmp18735 = getelementptr inbounds float, float* %tmp18734, i64 1
+ %tmp18736 = getelementptr inbounds float, float* %tmp18735, i64 1
+ %tmp18737 = getelementptr inbounds float, float* %tmp18736, i64 1
+ %tmp18738 = getelementptr inbounds float, float* %tmp18737, i64 1
+ %tmp18739 = getelementptr inbounds float, float* %tmp18738, i64 1
+ %tmp18740 = getelementptr inbounds float, float* %tmp18739, i64 1
+ %tmp18741 = getelementptr inbounds float, float* %tmp18740, i64 1
+ %tmp18742 = getelementptr inbounds float, float* %tmp18741, i64 1
+ %tmp18743 = getelementptr inbounds float, float* %tmp18742, i64 1
+ %tmp18744 = getelementptr inbounds float, float* %tmp18743, i64 1
+ %tmp18745 = getelementptr inbounds float, float* %tmp18744, i64 1
+ %tmp18746 = getelementptr inbounds float, float* %tmp18745, i64 1
+ %tmp18747 = getelementptr inbounds float, float* %tmp18746, i64 1
+ %tmp18748 = getelementptr inbounds float, float* %tmp18747, i64 1
+ %tmp18749 = getelementptr inbounds float, float* %tmp18748, i64 1
+ %tmp18750 = getelementptr inbounds float, float* %tmp18749, i64 1
+ %tmp18751 = getelementptr inbounds float, float* %tmp18750, i64 1
+ %tmp18752 = getelementptr inbounds float, float* %tmp18751, i64 1
+ %tmp18753 = getelementptr inbounds float, float* %tmp18752, i64 1
+ %tmp18754 = getelementptr inbounds float, float* %tmp18753, i64 1
+ %tmp18755 = getelementptr inbounds float, float* %tmp18754, i64 1
+ %tmp18756 = getelementptr inbounds float, float* %tmp18755, i64 1
+ %tmp18757 = getelementptr inbounds float, float* %tmp18756, i64 1
+ %tmp18758 = getelementptr inbounds float, float* %tmp18757, i64 1
+ %tmp18759 = getelementptr inbounds float, float* %tmp18758, i64 1
+ %tmp18760 = getelementptr inbounds float, float* %tmp18759, i64 1
+ %tmp18761 = getelementptr inbounds float, float* %tmp18760, i64 1
+ %tmp18762 = getelementptr inbounds float, float* %tmp18761, i64 1
+ %tmp18763 = getelementptr inbounds float, float* %tmp18762, i64 1
+ %tmp18764 = getelementptr inbounds float, float* %tmp18763, i64 1
+ %tmp18765 = getelementptr inbounds float, float* %tmp18764, i64 1
+ %tmp18766 = getelementptr inbounds float, float* %tmp18765, i64 1
+ %tmp18767 = getelementptr inbounds float, float* %tmp18766, i64 1
+ %tmp18768 = getelementptr inbounds float, float* %tmp18767, i64 1
+ %tmp18769 = getelementptr inbounds float, float* %tmp18768, i64 1
+ %tmp18770 = getelementptr inbounds float, float* %tmp18769, i64 1
+ %tmp18771 = getelementptr inbounds float, float* %tmp18770, i64 1
+ %tmp18772 = getelementptr inbounds float, float* %tmp18771, i64 1
+ %tmp18773 = getelementptr inbounds float, float* %tmp18772, i64 1
+ %tmp18774 = getelementptr inbounds float, float* %tmp18773, i64 1
+ %tmp18775 = getelementptr inbounds float, float* %tmp18774, i64 1
+ %tmp18776 = getelementptr inbounds float, float* %tmp18775, i64 1
+ %tmp18777 = getelementptr inbounds float, float* %tmp18776, i64 1
+ %tmp18778 = getelementptr inbounds float, float* %tmp18777, i64 1
+ %tmp18779 = getelementptr inbounds float, float* %tmp18778, i64 1
+ %tmp18780 = getelementptr inbounds float, float* %tmp18779, i64 1
+ %tmp18781 = getelementptr inbounds float, float* %tmp18780, i64 1
+ %tmp18782 = getelementptr inbounds float, float* %tmp18781, i64 1
+ %tmp18783 = getelementptr inbounds float, float* %tmp18782, i64 1
+ %tmp18784 = getelementptr inbounds float, float* %tmp18783, i64 1
+ %tmp18785 = getelementptr inbounds float, float* %tmp18784, i64 1
+ %tmp18786 = getelementptr inbounds float, float* %tmp18785, i64 1
+ %tmp18787 = getelementptr inbounds float, float* %tmp18786, i64 1
+ %tmp18788 = getelementptr inbounds float, float* %tmp18787, i64 1
+ %tmp18789 = getelementptr inbounds float, float* %tmp18788, i64 1
+ %tmp18790 = getelementptr inbounds float, float* %tmp18789, i64 1
+ %tmp18791 = getelementptr inbounds float, float* %tmp18790, i64 1
+ %tmp18792 = getelementptr inbounds float, float* %tmp18791, i64 1
+ %tmp18793 = getelementptr inbounds float, float* %tmp18792, i64 1
+ %tmp18794 = getelementptr inbounds float, float* %tmp18793, i64 1
+ %tmp18795 = getelementptr inbounds float, float* %tmp18794, i64 1
+ %tmp18796 = getelementptr inbounds float, float* %tmp18795, i64 1
+ %tmp18797 = getelementptr inbounds float, float* %tmp18796, i64 1
+ %tmp18798 = getelementptr inbounds float, float* %tmp18797, i64 1
+ %tmp18799 = getelementptr inbounds float, float* %tmp18798, i64 1
+ %tmp18800 = getelementptr inbounds float, float* %tmp18799, i64 1
+ %tmp18801 = getelementptr inbounds float, float* %tmp18800, i64 1
+ %tmp18802 = getelementptr inbounds float, float* %tmp18801, i64 1
+ %tmp18803 = getelementptr inbounds float, float* %tmp18802, i64 1
+ %tmp18804 = getelementptr inbounds float, float* %tmp18803, i64 1
+ %tmp18805 = getelementptr inbounds float, float* %tmp18804, i64 1
+ %tmp18806 = getelementptr inbounds float, float* %tmp18805, i64 1
+ %tmp18807 = getelementptr inbounds float, float* %tmp18806, i64 1
+ %tmp18808 = getelementptr inbounds float, float* %tmp18807, i64 1
+ %tmp18809 = getelementptr inbounds float, float* %tmp18808, i64 1
+ %tmp18810 = getelementptr inbounds float, float* %tmp18809, i64 1
+ %tmp18811 = getelementptr inbounds float, float* %tmp18810, i64 1
+ %tmp18812 = getelementptr inbounds float, float* %tmp18811, i64 1
+ %tmp18813 = getelementptr inbounds float, float* %tmp18812, i64 1
+ %tmp18814 = getelementptr inbounds float, float* %tmp18813, i64 1
+ %tmp18815 = getelementptr inbounds float, float* %tmp18814, i64 1
+ %tmp18816 = getelementptr inbounds float, float* %tmp18815, i64 1
+ %tmp18817 = getelementptr inbounds float, float* %tmp18816, i64 1
+ %tmp18818 = getelementptr inbounds float, float* %tmp18817, i64 1
+ %tmp18819 = getelementptr inbounds float, float* %tmp18818, i64 1
+ %tmp18820 = getelementptr inbounds float, float* %tmp18819, i64 1
+ %tmp18821 = getelementptr inbounds float, float* %tmp18820, i64 1
+ %tmp18822 = getelementptr inbounds float, float* %tmp18821, i64 1
+ %tmp18823 = getelementptr inbounds float, float* %tmp18822, i64 1
+ %tmp18824 = getelementptr inbounds float, float* %tmp18823, i64 1
+ %tmp18825 = getelementptr inbounds float, float* %tmp18824, i64 1
+ %tmp18826 = getelementptr inbounds float, float* %tmp18825, i64 1
+ %tmp18827 = getelementptr inbounds float, float* %tmp18826, i64 1
+ %tmp18828 = getelementptr inbounds float, float* %tmp18827, i64 1
+ %tmp18829 = getelementptr inbounds float, float* %tmp18828, i64 1
+ %tmp18830 = getelementptr inbounds float, float* %tmp18829, i64 1
+ %tmp18831 = getelementptr inbounds float, float* %tmp18830, i64 1
+ %tmp18832 = getelementptr inbounds float, float* %tmp18831, i64 1
+ %tmp18833 = getelementptr inbounds float, float* %tmp18832, i64 1
+ %tmp18834 = getelementptr inbounds float, float* %tmp18833, i64 1
+ %tmp18835 = getelementptr inbounds float, float* %tmp18834, i64 1
+ %tmp18836 = getelementptr inbounds float, float* %tmp18835, i64 1
+ %tmp18837 = getelementptr inbounds float, float* %tmp18836, i64 1
+ %tmp18838 = getelementptr inbounds float, float* %tmp18837, i64 1
+ %tmp18839 = getelementptr inbounds float, float* %tmp18838, i64 1
+ %tmp18840 = getelementptr inbounds float, float* %tmp18839, i64 1
+ %tmp18841 = getelementptr inbounds float, float* %tmp18840, i64 1
+ %tmp18842 = getelementptr inbounds float, float* %tmp18841, i64 1
+ %tmp18843 = getelementptr inbounds float, float* %tmp18842, i64 1
+ %tmp18844 = getelementptr inbounds float, float* %tmp18843, i64 1
+ %tmp18845 = getelementptr inbounds float, float* %tmp18844, i64 1
+ %tmp18846 = getelementptr inbounds float, float* %tmp18845, i64 1
+ %tmp18847 = getelementptr inbounds float, float* %tmp18846, i64 1
+ %tmp18848 = getelementptr inbounds float, float* %tmp18847, i64 1
+ %tmp18849 = getelementptr inbounds float, float* %tmp18848, i64 1
+ %tmp18850 = getelementptr inbounds float, float* %tmp18849, i64 1
+ %tmp18851 = getelementptr inbounds float, float* %tmp18850, i64 1
+ %tmp18852 = getelementptr inbounds float, float* %tmp18851, i64 1
+ %tmp18853 = getelementptr inbounds float, float* %tmp18852, i64 1
+ %tmp18854 = getelementptr inbounds float, float* %tmp18853, i64 1
+ %tmp18855 = getelementptr inbounds float, float* %tmp18854, i64 1
+ %tmp18856 = getelementptr inbounds float, float* %tmp18855, i64 1
+ %tmp18857 = getelementptr inbounds float, float* %tmp18856, i64 1
+ %tmp18858 = getelementptr inbounds float, float* %tmp18857, i64 1
+ %tmp18859 = getelementptr inbounds float, float* %tmp18858, i64 1
+ %tmp18860 = getelementptr inbounds float, float* %tmp18859, i64 1
+ %tmp18861 = getelementptr inbounds float, float* %tmp18860, i64 1
+ %tmp18862 = getelementptr inbounds float, float* %tmp18861, i64 1
+ %tmp18863 = getelementptr inbounds float, float* %tmp18862, i64 1
+ %tmp18864 = getelementptr inbounds float, float* %tmp18863, i64 1
+ %tmp18865 = getelementptr inbounds float, float* %tmp18864, i64 1
+ %tmp18866 = getelementptr inbounds float, float* %tmp18865, i64 1
+ %tmp18867 = getelementptr inbounds float, float* %tmp18866, i64 1
+ %tmp18868 = getelementptr inbounds float, float* %tmp18867, i64 1
+ %tmp18869 = getelementptr inbounds float, float* %tmp18868, i64 1
+ %tmp18870 = getelementptr inbounds float, float* %tmp18869, i64 1
+ %tmp18871 = getelementptr inbounds float, float* %tmp18870, i64 1
+ %tmp18872 = getelementptr inbounds float, float* %tmp18871, i64 1
+ %tmp18873 = getelementptr inbounds float, float* %tmp18872, i64 1
+ %tmp18874 = getelementptr inbounds float, float* %tmp18873, i64 1
+ %tmp18875 = getelementptr inbounds float, float* %tmp18874, i64 1
+ %tmp18876 = getelementptr inbounds float, float* %tmp18875, i64 1
+ %tmp18877 = getelementptr inbounds float, float* %tmp18876, i64 1
+ %tmp18878 = getelementptr inbounds float, float* %tmp18877, i64 1
+ %tmp18879 = getelementptr inbounds float, float* %tmp18878, i64 1
+ %tmp18880 = getelementptr inbounds float, float* %tmp18879, i64 1
+ %tmp18881 = getelementptr inbounds float, float* %tmp18880, i64 1
+ %tmp18882 = getelementptr inbounds float, float* %tmp18881, i64 1
+ %tmp18883 = getelementptr inbounds float, float* %tmp18882, i64 1
+ %tmp18884 = getelementptr inbounds float, float* %tmp18883, i64 1
+ %tmp18885 = getelementptr inbounds float, float* %tmp18884, i64 1
+ %tmp18886 = getelementptr inbounds float, float* %tmp18885, i64 1
+ %tmp18887 = getelementptr inbounds float, float* %tmp18886, i64 1
+ %tmp18888 = getelementptr inbounds float, float* %tmp18887, i64 1
+ %tmp18889 = getelementptr inbounds float, float* %tmp18888, i64 1
+ %tmp18890 = getelementptr inbounds float, float* %tmp18889, i64 1
+ %tmp18891 = getelementptr inbounds float, float* %tmp18890, i64 1
+ %tmp18892 = getelementptr inbounds float, float* %tmp18891, i64 1
+ %tmp18893 = getelementptr inbounds float, float* %tmp18892, i64 1
+ %tmp18894 = getelementptr inbounds float, float* %tmp18893, i64 1
+ %tmp18895 = getelementptr inbounds float, float* %tmp18894, i64 1
+ %tmp18896 = getelementptr inbounds float, float* %tmp18895, i64 1
+ %tmp18897 = getelementptr inbounds float, float* %tmp18896, i64 1
+ %tmp18898 = getelementptr inbounds float, float* %tmp18897, i64 1
+ %tmp18899 = getelementptr inbounds float, float* %tmp18898, i64 1
+ %tmp18900 = getelementptr inbounds float, float* %tmp18899, i64 1
+ %tmp18901 = getelementptr inbounds float, float* %tmp18900, i64 1
+ %tmp18902 = getelementptr inbounds float, float* %tmp18901, i64 1
+ %tmp18903 = getelementptr inbounds float, float* %tmp18902, i64 1
+ %tmp18904 = getelementptr inbounds float, float* %tmp18903, i64 1
+ %tmp18905 = getelementptr inbounds float, float* %tmp18904, i64 1
+ %tmp18906 = getelementptr inbounds float, float* %tmp18905, i64 1
+ %tmp18907 = getelementptr inbounds float, float* %tmp18906, i64 1
+ %tmp18908 = getelementptr inbounds float, float* %tmp18907, i64 1
+ %tmp18909 = getelementptr inbounds float, float* %tmp18908, i64 1
+ %tmp18910 = getelementptr inbounds float, float* %tmp18909, i64 1
+ %tmp18911 = getelementptr inbounds float, float* %tmp18910, i64 1
+ %tmp18912 = getelementptr inbounds float, float* %tmp18911, i64 1
+ %tmp18913 = getelementptr inbounds float, float* %tmp18912, i64 1
+ %tmp18914 = getelementptr inbounds float, float* %tmp18913, i64 1
+ %tmp18915 = getelementptr inbounds float, float* %tmp18914, i64 1
+ %tmp18916 = getelementptr inbounds float, float* %tmp18915, i64 1
+ %tmp18917 = getelementptr inbounds float, float* %tmp18916, i64 1
+ %tmp18918 = getelementptr inbounds float, float* %tmp18917, i64 1
+ %tmp18919 = getelementptr inbounds float, float* %tmp18918, i64 1
+ %tmp18920 = getelementptr inbounds float, float* %tmp18919, i64 1
+ %tmp18921 = getelementptr inbounds float, float* %tmp18920, i64 1
+ %tmp18922 = getelementptr inbounds float, float* %tmp18921, i64 1
+ %tmp18923 = getelementptr inbounds float, float* %tmp18922, i64 1
+ %tmp18924 = getelementptr inbounds float, float* %tmp18923, i64 1
+ %tmp18925 = getelementptr inbounds float, float* %tmp18924, i64 1
+ %tmp18926 = getelementptr inbounds float, float* %tmp18925, i64 1
+ %tmp18927 = getelementptr inbounds float, float* %tmp18926, i64 1
+ %tmp18928 = getelementptr inbounds float, float* %tmp18927, i64 1
+ %tmp18929 = getelementptr inbounds float, float* %tmp18928, i64 1
+ %tmp18930 = getelementptr inbounds float, float* %tmp18929, i64 1
+ %tmp18931 = getelementptr inbounds float, float* %tmp18930, i64 1
+ %tmp18932 = getelementptr inbounds float, float* %tmp18931, i64 1
+ %tmp18933 = getelementptr inbounds float, float* %tmp18932, i64 1
+ %tmp18934 = getelementptr inbounds float, float* %tmp18933, i64 1
+ %tmp18935 = getelementptr inbounds float, float* %tmp18934, i64 1
+ %tmp18936 = getelementptr inbounds float, float* %tmp18935, i64 1
+ %tmp18937 = getelementptr inbounds float, float* %tmp18936, i64 1
+ %tmp18938 = getelementptr inbounds float, float* %tmp18937, i64 1
+ %tmp18939 = getelementptr inbounds float, float* %tmp18938, i64 1
+ %tmp18940 = getelementptr inbounds float, float* %tmp18939, i64 1
+ %tmp18941 = getelementptr inbounds float, float* %tmp18940, i64 1
+ %tmp18942 = getelementptr inbounds float, float* %tmp18941, i64 1
+ %tmp18943 = getelementptr inbounds float, float* %tmp18942, i64 1
+ %tmp18944 = getelementptr inbounds float, float* %tmp18943, i64 1
+ %tmp18945 = getelementptr inbounds float, float* %tmp18944, i64 1
+ %tmp18946 = getelementptr inbounds float, float* %tmp18945, i64 1
+ %tmp18947 = getelementptr inbounds float, float* %tmp18946, i64 1
+ %tmp18948 = getelementptr inbounds float, float* %tmp18947, i64 1
+ %tmp18949 = getelementptr inbounds float, float* %tmp18948, i64 1
+ %tmp18950 = getelementptr inbounds float, float* %tmp18949, i64 1
+ %tmp18951 = getelementptr inbounds float, float* %tmp18950, i64 1
+ %tmp18952 = getelementptr inbounds float, float* %tmp18951, i64 1
+ %tmp18953 = getelementptr inbounds float, float* %tmp18952, i64 1
+ %tmp18954 = getelementptr inbounds float, float* %tmp18953, i64 1
+ %tmp18955 = getelementptr inbounds float, float* %tmp18954, i64 1
+ %tmp18956 = getelementptr inbounds float, float* %tmp18955, i64 1
+ %tmp18957 = getelementptr inbounds float, float* %tmp18956, i64 1
+ %tmp18958 = getelementptr inbounds float, float* %tmp18957, i64 1
+ %tmp18959 = getelementptr inbounds float, float* %tmp18958, i64 1
+ %tmp18960 = getelementptr inbounds float, float* %tmp18959, i64 1
+ %tmp18961 = getelementptr inbounds float, float* %tmp18960, i64 1
+ %tmp18962 = getelementptr inbounds float, float* %tmp18961, i64 1
+ %tmp18963 = getelementptr inbounds float, float* %tmp18962, i64 1
+ %tmp18964 = getelementptr inbounds float, float* %tmp18963, i64 1
+ %tmp18965 = getelementptr inbounds float, float* %tmp18964, i64 1
+ %tmp18966 = getelementptr inbounds float, float* %tmp18965, i64 1
+ %tmp18967 = getelementptr inbounds float, float* %tmp18966, i64 1
+ %tmp18968 = getelementptr inbounds float, float* %tmp18967, i64 1
+ %tmp18969 = getelementptr inbounds float, float* %tmp18968, i64 1
+ %tmp18970 = getelementptr inbounds float, float* %tmp18969, i64 1
+ %tmp18971 = getelementptr inbounds float, float* %tmp18970, i64 1
+ %tmp18972 = getelementptr inbounds float, float* %tmp18971, i64 1
+ %tmp18973 = getelementptr inbounds float, float* %tmp18972, i64 1
+ %tmp18974 = getelementptr inbounds float, float* %tmp18973, i64 1
+ %tmp18975 = getelementptr inbounds float, float* %tmp18974, i64 1
+ %tmp18976 = getelementptr inbounds float, float* %tmp18975, i64 1
+ %tmp18977 = getelementptr inbounds float, float* %tmp18976, i64 1
+ %tmp18978 = getelementptr inbounds float, float* %tmp18977, i64 1
+ %tmp18979 = getelementptr inbounds float, float* %tmp18978, i64 1
+ %tmp18980 = getelementptr inbounds float, float* %tmp18979, i64 1
+ %tmp18981 = getelementptr inbounds float, float* %tmp18980, i64 1
+ %tmp18982 = getelementptr inbounds float, float* %tmp18981, i64 1
+ %tmp18983 = getelementptr inbounds float, float* %tmp18982, i64 1
+ %tmp18984 = getelementptr inbounds float, float* %tmp18983, i64 1
+ %tmp18985 = getelementptr inbounds float, float* %tmp18984, i64 1
+ %tmp18986 = getelementptr inbounds float, float* %tmp18985, i64 1
+ %tmp18987 = getelementptr inbounds float, float* %tmp18986, i64 1
+ %tmp18988 = getelementptr inbounds float, float* %tmp18987, i64 1
+ %tmp18989 = getelementptr inbounds float, float* %tmp18988, i64 1
+ %tmp18990 = getelementptr inbounds float, float* %tmp18989, i64 1
+ %tmp18991 = getelementptr inbounds float, float* %tmp18990, i64 1
+ %tmp18992 = getelementptr inbounds float, float* %tmp18991, i64 1
+ %tmp18993 = getelementptr inbounds float, float* %tmp18992, i64 1
+ %tmp18994 = getelementptr inbounds float, float* %tmp18993, i64 1
+ %tmp18995 = getelementptr inbounds float, float* %tmp18994, i64 1
+ %tmp18996 = getelementptr inbounds float, float* %tmp18995, i64 1
+ %tmp18997 = getelementptr inbounds float, float* %tmp18996, i64 1
+ %tmp18998 = getelementptr inbounds float, float* %tmp18997, i64 1
+ %tmp18999 = getelementptr inbounds float, float* %tmp18998, i64 1
+ %tmp19000 = getelementptr inbounds float, float* %tmp18999, i64 1
+ %tmp19001 = getelementptr inbounds float, float* %tmp19000, i64 1
+ %tmp19002 = getelementptr inbounds float, float* %tmp19001, i64 1
+ %tmp19003 = getelementptr inbounds float, float* %tmp19002, i64 1
+ %tmp19004 = getelementptr inbounds float, float* %tmp19003, i64 1
+ %tmp19005 = getelementptr inbounds float, float* %tmp19004, i64 1
+ %tmp19006 = getelementptr inbounds float, float* %tmp19005, i64 1
+ %tmp19007 = getelementptr inbounds float, float* %tmp19006, i64 1
+ %tmp19008 = getelementptr inbounds float, float* %tmp19007, i64 1
+ %tmp19009 = getelementptr inbounds float, float* %tmp19008, i64 1
+ %tmp19010 = getelementptr inbounds float, float* %tmp19009, i64 1
+ %tmp19011 = getelementptr inbounds float, float* %tmp19010, i64 1
+ %tmp19012 = getelementptr inbounds float, float* %tmp19011, i64 1
+ %tmp19013 = getelementptr inbounds float, float* %tmp19012, i64 1
+ %tmp19014 = getelementptr inbounds float, float* %tmp19013, i64 1
+ %tmp19015 = getelementptr inbounds float, float* %tmp19014, i64 1
+ %tmp19016 = getelementptr inbounds float, float* %tmp19015, i64 1
+ %tmp19017 = getelementptr inbounds float, float* %tmp19016, i64 1
+ %tmp19018 = getelementptr inbounds float, float* %tmp19017, i64 1
+ %tmp19019 = getelementptr inbounds float, float* %tmp19018, i64 1
+ %tmp19020 = getelementptr inbounds float, float* %tmp19019, i64 1
+ %tmp19021 = getelementptr inbounds float, float* %tmp19020, i64 1
+ %tmp19022 = getelementptr inbounds float, float* %tmp19021, i64 1
+ %tmp19023 = getelementptr inbounds float, float* %tmp19022, i64 1
+ %tmp19024 = getelementptr inbounds float, float* %tmp19023, i64 1
+ %tmp19025 = getelementptr inbounds float, float* %tmp19024, i64 1
+ %tmp19026 = getelementptr inbounds float, float* %tmp19025, i64 1
+ %tmp19027 = getelementptr inbounds float, float* %tmp19026, i64 1
+ %tmp19028 = getelementptr inbounds float, float* %tmp19027, i64 1
+ %tmp19029 = getelementptr inbounds float, float* %tmp19028, i64 1
+ %tmp19030 = getelementptr inbounds float, float* %tmp19029, i64 1
+ %tmp19031 = getelementptr inbounds float, float* %tmp19030, i64 1
+ %tmp19032 = getelementptr inbounds float, float* %tmp19031, i64 1
+ %tmp19033 = getelementptr inbounds float, float* %tmp19032, i64 1
+ %tmp19034 = getelementptr inbounds float, float* %tmp19033, i64 1
+ %tmp19035 = getelementptr inbounds float, float* %tmp19034, i64 1
+ %tmp19036 = getelementptr inbounds float, float* %tmp19035, i64 1
+ %tmp19037 = getelementptr inbounds float, float* %tmp19036, i64 1
+ %tmp19038 = getelementptr inbounds float, float* %tmp19037, i64 1
+ %tmp19039 = getelementptr inbounds float, float* %tmp19038, i64 1
+ %tmp19040 = getelementptr inbounds float, float* %tmp19039, i64 1
+ %tmp19041 = getelementptr inbounds float, float* %tmp19040, i64 1
+ %tmp19042 = getelementptr inbounds float, float* %tmp19041, i64 1
+ %tmp19043 = getelementptr inbounds float, float* %tmp19042, i64 1
+ %tmp19044 = getelementptr inbounds float, float* %tmp19043, i64 1
+ %tmp19045 = getelementptr inbounds float, float* %tmp19044, i64 1
+ %tmp19046 = getelementptr inbounds float, float* %tmp19045, i64 1
+ %tmp19047 = getelementptr inbounds float, float* %tmp19046, i64 1
+ %tmp19048 = getelementptr inbounds float, float* %tmp19047, i64 1
+ %tmp19049 = getelementptr inbounds float, float* %tmp19048, i64 1
+ %tmp19050 = getelementptr inbounds float, float* %tmp19049, i64 1
+ %tmp19051 = getelementptr inbounds float, float* %tmp19050, i64 1
+ %tmp19052 = getelementptr inbounds float, float* %tmp19051, i64 1
+ %tmp19053 = getelementptr inbounds float, float* %tmp19052, i64 1
+ %tmp19054 = getelementptr inbounds float, float* %tmp19053, i64 1
+ %tmp19055 = getelementptr inbounds float, float* %tmp19054, i64 1
+ %tmp19056 = getelementptr inbounds float, float* %tmp19055, i64 1
+ %tmp19057 = getelementptr inbounds float, float* %tmp19056, i64 1
+ %tmp19058 = getelementptr inbounds float, float* %tmp19057, i64 1
+ %tmp19059 = getelementptr inbounds float, float* %tmp19058, i64 1
+ %tmp19060 = getelementptr inbounds float, float* %tmp19059, i64 1
+ %tmp19061 = getelementptr inbounds float, float* %tmp19060, i64 1
+ %tmp19062 = getelementptr inbounds float, float* %tmp19061, i64 1
+ %tmp19063 = getelementptr inbounds float, float* %tmp19062, i64 1
+ %tmp19064 = getelementptr inbounds float, float* %tmp19063, i64 1
+ %tmp19065 = getelementptr inbounds float, float* %tmp19064, i64 1
+ %tmp19066 = getelementptr inbounds float, float* %tmp19065, i64 1
+ %tmp19067 = getelementptr inbounds float, float* %tmp19066, i64 1
+ %tmp19068 = getelementptr inbounds float, float* %tmp19067, i64 1
+ %tmp19069 = getelementptr inbounds float, float* %tmp19068, i64 1
+ %tmp19070 = getelementptr inbounds float, float* %tmp19069, i64 1
+ %tmp19071 = getelementptr inbounds float, float* %tmp19070, i64 1
+ %tmp19072 = getelementptr inbounds float, float* %tmp19071, i64 1
+ %tmp19073 = getelementptr inbounds float, float* %tmp19072, i64 1
+ %tmp19074 = getelementptr inbounds float, float* %tmp19073, i64 1
+ %tmp19075 = getelementptr inbounds float, float* %tmp19074, i64 1
+ %tmp19076 = getelementptr inbounds float, float* %tmp19075, i64 1
+ %tmp19077 = getelementptr inbounds float, float* %tmp19076, i64 1
+ %tmp19078 = getelementptr inbounds float, float* %tmp19077, i64 1
+ %tmp19079 = getelementptr inbounds float, float* %tmp19078, i64 1
+ %tmp19080 = getelementptr inbounds float, float* %tmp19079, i64 1
+ %tmp19081 = getelementptr inbounds float, float* %tmp19080, i64 1
+ %tmp19082 = getelementptr inbounds float, float* %tmp19081, i64 1
+ %tmp19083 = getelementptr inbounds float, float* %tmp19082, i64 1
+ %tmp19084 = getelementptr inbounds float, float* %tmp19083, i64 1
+ %tmp19085 = getelementptr inbounds float, float* %tmp19084, i64 1
+ %tmp19086 = getelementptr inbounds float, float* %tmp19085, i64 1
+ %tmp19087 = getelementptr inbounds float, float* %tmp19086, i64 1
+ %tmp19088 = getelementptr inbounds float, float* %tmp19087, i64 1
+ %tmp19089 = getelementptr inbounds float, float* %tmp19088, i64 1
+ %tmp19090 = getelementptr inbounds float, float* %tmp19089, i64 1
+ %tmp19091 = getelementptr inbounds float, float* %tmp19090, i64 1
+ %tmp19092 = getelementptr inbounds float, float* %tmp19091, i64 1
+ %tmp19093 = getelementptr inbounds float, float* %tmp19092, i64 1
+ %tmp19094 = getelementptr inbounds float, float* %tmp19093, i64 1
+ %tmp19095 = getelementptr inbounds float, float* %tmp19094, i64 1
+ %tmp19096 = getelementptr inbounds float, float* %tmp19095, i64 1
+ %tmp19097 = getelementptr inbounds float, float* %tmp19096, i64 1
+ %tmp19098 = getelementptr inbounds float, float* %tmp19097, i64 1
+ %tmp19099 = getelementptr inbounds float, float* %tmp19098, i64 1
+ %tmp19100 = getelementptr inbounds float, float* %tmp19099, i64 1
+ %tmp19101 = getelementptr inbounds float, float* %tmp19100, i64 1
+ %tmp19102 = getelementptr inbounds float, float* %tmp19101, i64 1
+ %tmp19103 = getelementptr inbounds float, float* %tmp19102, i64 1
+ %tmp19104 = getelementptr inbounds float, float* %tmp19103, i64 1
+ %tmp19105 = getelementptr inbounds float, float* %tmp19104, i64 1
+ %tmp19106 = getelementptr inbounds float, float* %tmp19105, i64 1
+ %tmp19107 = getelementptr inbounds float, float* %tmp19106, i64 1
+ %tmp19108 = getelementptr inbounds float, float* %tmp19107, i64 1
+ %tmp19109 = getelementptr inbounds float, float* %tmp19108, i64 1
+ %tmp19110 = getelementptr inbounds float, float* %tmp19109, i64 1
+ %tmp19111 = getelementptr inbounds float, float* %tmp19110, i64 1
+ %tmp19112 = getelementptr inbounds float, float* %tmp19111, i64 1
+ %tmp19113 = getelementptr inbounds float, float* %tmp19112, i64 1
+ %tmp19114 = getelementptr inbounds float, float* %tmp19113, i64 1
+ %tmp19115 = getelementptr inbounds float, float* %tmp19114, i64 1
+ %tmp19116 = getelementptr inbounds float, float* %tmp19115, i64 1
+ %tmp19117 = getelementptr inbounds float, float* %tmp19116, i64 1
+ %tmp19118 = getelementptr inbounds float, float* %tmp19117, i64 1
+ %tmp19119 = getelementptr inbounds float, float* %tmp19118, i64 1
+ %tmp19120 = getelementptr inbounds float, float* %tmp19119, i64 1
+ %tmp19121 = getelementptr inbounds float, float* %tmp19120, i64 1
+ %tmp19122 = getelementptr inbounds float, float* %tmp19121, i64 1
+ %tmp19123 = getelementptr inbounds float, float* %tmp19122, i64 1
+ %tmp19124 = getelementptr inbounds float, float* %tmp19123, i64 1
+ %tmp19125 = getelementptr inbounds float, float* %tmp19124, i64 1
+ %tmp19126 = getelementptr inbounds float, float* %tmp19125, i64 1
+ %tmp19127 = getelementptr inbounds float, float* %tmp19126, i64 1
+ %tmp19128 = getelementptr inbounds float, float* %tmp19127, i64 1
+ %tmp19129 = getelementptr inbounds float, float* %tmp19128, i64 1
+ %tmp19130 = getelementptr inbounds float, float* %tmp19129, i64 1
+ %tmp19131 = getelementptr inbounds float, float* %tmp19130, i64 1
+ %tmp19132 = getelementptr inbounds float, float* %tmp19131, i64 1
+ %tmp19133 = getelementptr inbounds float, float* %tmp19132, i64 1
+ %tmp19134 = getelementptr inbounds float, float* %tmp19133, i64 1
+ %tmp19135 = getelementptr inbounds float, float* %tmp19134, i64 1
+ %tmp19136 = getelementptr inbounds float, float* %tmp19135, i64 1
+ %tmp19137 = getelementptr inbounds float, float* %tmp19136, i64 1
+ %tmp19138 = getelementptr inbounds float, float* %tmp19137, i64 1
+ %tmp19139 = getelementptr inbounds float, float* %tmp19138, i64 1
+ %tmp19140 = getelementptr inbounds float, float* %tmp19139, i64 1
+ %tmp19141 = getelementptr inbounds float, float* %tmp19140, i64 1
+ %tmp19142 = getelementptr inbounds float, float* %tmp19141, i64 1
+ %tmp19143 = getelementptr inbounds float, float* %tmp19142, i64 1
+ %tmp19144 = getelementptr inbounds float, float* %tmp19143, i64 1
+ %tmp19145 = getelementptr inbounds float, float* %tmp19144, i64 1
+ %tmp19146 = getelementptr inbounds float, float* %tmp19145, i64 1
+ %tmp19147 = getelementptr inbounds float, float* %tmp19146, i64 1
+ %tmp19148 = getelementptr inbounds float, float* %tmp19147, i64 1
+ %tmp19149 = getelementptr inbounds float, float* %tmp19148, i64 1
+ %tmp19150 = getelementptr inbounds float, float* %tmp19149, i64 1
+ %tmp19151 = getelementptr inbounds float, float* %tmp19150, i64 1
+ %tmp19152 = getelementptr inbounds float, float* %tmp19151, i64 1
+ %tmp19153 = getelementptr inbounds float, float* %tmp19152, i64 1
+ %tmp19154 = getelementptr inbounds float, float* %tmp19153, i64 1
+ %tmp19155 = getelementptr inbounds float, float* %tmp19154, i64 1
+ %tmp19156 = getelementptr inbounds float, float* %tmp19155, i64 1
+ %tmp19157 = getelementptr inbounds float, float* %tmp19156, i64 1
+ %tmp19158 = getelementptr inbounds float, float* %tmp19157, i64 1
+ %tmp19159 = getelementptr inbounds float, float* %tmp19158, i64 1
+ %tmp19160 = getelementptr inbounds float, float* %tmp19159, i64 1
+ %tmp19161 = getelementptr inbounds float, float* %tmp19160, i64 1
+ %tmp19162 = getelementptr inbounds float, float* %tmp19161, i64 1
+ %tmp19163 = getelementptr inbounds float, float* %tmp19162, i64 1
+ %tmp19164 = getelementptr inbounds float, float* %tmp19163, i64 1
+ %tmp19165 = getelementptr inbounds float, float* %tmp19164, i64 1
+ %tmp19166 = getelementptr inbounds float, float* %tmp19165, i64 1
+ %tmp19167 = getelementptr inbounds float, float* %tmp19166, i64 1
+ %tmp19168 = getelementptr inbounds float, float* %tmp19167, i64 1
+ %tmp19169 = getelementptr inbounds float, float* %tmp19168, i64 1
+ %tmp19170 = getelementptr inbounds float, float* %tmp19169, i64 1
+ %tmp19171 = getelementptr inbounds float, float* %tmp19170, i64 1
+ %tmp19172 = getelementptr inbounds float, float* %tmp19171, i64 1
+ %tmp19173 = getelementptr inbounds float, float* %tmp19172, i64 1
+ %tmp19174 = getelementptr inbounds float, float* %tmp19173, i64 1
+ %tmp19175 = getelementptr inbounds float, float* %tmp19174, i64 1
+ %tmp19176 = getelementptr inbounds float, float* %tmp19175, i64 1
+ %tmp19177 = getelementptr inbounds float, float* %tmp19176, i64 1
+ %tmp19178 = getelementptr inbounds float, float* %tmp19177, i64 1
+ %tmp19179 = getelementptr inbounds float, float* %tmp19178, i64 1
+ %tmp19180 = getelementptr inbounds float, float* %tmp19179, i64 1
+ %tmp19181 = getelementptr inbounds float, float* %tmp19180, i64 1
+ %tmp19182 = getelementptr inbounds float, float* %tmp19181, i64 1
+ %tmp19183 = getelementptr inbounds float, float* %tmp19182, i64 1
+ %tmp19184 = getelementptr inbounds float, float* %tmp19183, i64 1
+ %tmp19185 = getelementptr inbounds float, float* %tmp19184, i64 1
+ %tmp19186 = getelementptr inbounds float, float* %tmp19185, i64 1
+ %tmp19187 = getelementptr inbounds float, float* %tmp19186, i64 1
+ %tmp19188 = getelementptr inbounds float, float* %tmp19187, i64 1
+ %tmp19189 = getelementptr inbounds float, float* %tmp19188, i64 1
+ %tmp19190 = getelementptr inbounds float, float* %tmp19189, i64 1
+ %tmp19191 = getelementptr inbounds float, float* %tmp19190, i64 1
+ %tmp19192 = getelementptr inbounds float, float* %tmp19191, i64 1
+ %tmp19193 = getelementptr inbounds float, float* %tmp19192, i64 1
+ %tmp19194 = getelementptr inbounds float, float* %tmp19193, i64 1
+ %tmp19195 = getelementptr inbounds float, float* %tmp19194, i64 1
+ %tmp19196 = getelementptr inbounds float, float* %tmp19195, i64 1
+ %tmp19197 = getelementptr inbounds float, float* %tmp19196, i64 1
+ %tmp19198 = getelementptr inbounds float, float* %tmp19197, i64 1
+ %tmp19199 = getelementptr inbounds float, float* %tmp19198, i64 1
+ %tmp19200 = getelementptr inbounds float, float* %tmp19199, i64 1
+ %tmp19201 = getelementptr inbounds float, float* %tmp19200, i64 1
+ %tmp19202 = getelementptr inbounds float, float* %tmp19201, i64 1
+ %tmp19203 = getelementptr inbounds float, float* %tmp19202, i64 1
+ %tmp19204 = getelementptr inbounds float, float* %tmp19203, i64 1
+ %tmp19205 = getelementptr inbounds float, float* %tmp19204, i64 1
+ %tmp19206 = getelementptr inbounds float, float* %tmp19205, i64 1
+ %tmp19207 = getelementptr inbounds float, float* %tmp19206, i64 1
+ %tmp19208 = getelementptr inbounds float, float* %tmp19207, i64 1
+ %tmp19209 = getelementptr inbounds float, float* %tmp19208, i64 1
+ %tmp19210 = getelementptr inbounds float, float* %tmp19209, i64 1
+ %tmp19211 = getelementptr inbounds float, float* %tmp19210, i64 1
+ %tmp19212 = getelementptr inbounds float, float* %tmp19211, i64 1
+ %tmp19213 = getelementptr inbounds float, float* %tmp19212, i64 1
+ %tmp19214 = getelementptr inbounds float, float* %tmp19213, i64 1
+ %tmp19215 = getelementptr inbounds float, float* %tmp19214, i64 1
+ %tmp19216 = getelementptr inbounds float, float* %tmp19215, i64 1
+ %tmp19217 = getelementptr inbounds float, float* %tmp19216, i64 1
+ %tmp19218 = getelementptr inbounds float, float* %tmp19217, i64 1
+ %tmp19219 = getelementptr inbounds float, float* %tmp19218, i64 1
+ %tmp19220 = getelementptr inbounds float, float* %tmp19219, i64 1
+ %tmp19221 = getelementptr inbounds float, float* %tmp19220, i64 1
+ %tmp19222 = getelementptr inbounds float, float* %tmp19221, i64 1
+ %tmp19223 = getelementptr inbounds float, float* %tmp19222, i64 1
+ %tmp19224 = getelementptr inbounds float, float* %tmp19223, i64 1
+ %tmp19225 = getelementptr inbounds float, float* %tmp19224, i64 1
+ %tmp19226 = getelementptr inbounds float, float* %tmp19225, i64 1
+ %tmp19227 = getelementptr inbounds float, float* %tmp19226, i64 1
+ %tmp19228 = getelementptr inbounds float, float* %tmp19227, i64 1
+ %tmp19229 = getelementptr inbounds float, float* %tmp19228, i64 1
+ %tmp19230 = getelementptr inbounds float, float* %tmp19229, i64 1
+ %tmp19231 = getelementptr inbounds float, float* %tmp19230, i64 1
+ %tmp19232 = getelementptr inbounds float, float* %tmp19231, i64 1
+ %tmp19233 = getelementptr inbounds float, float* %tmp19232, i64 1
+ %tmp19234 = getelementptr inbounds float, float* %tmp19233, i64 1
+ %tmp19235 = getelementptr inbounds float, float* %tmp19234, i64 1
+ %tmp19236 = getelementptr inbounds float, float* %tmp19235, i64 1
+ %tmp19237 = getelementptr inbounds float, float* %tmp19236, i64 1
+ %tmp19238 = getelementptr inbounds float, float* %tmp19237, i64 1
+ %tmp19239 = getelementptr inbounds float, float* %tmp19238, i64 1
+ %tmp19240 = getelementptr inbounds float, float* %tmp19239, i64 1
+ %tmp19241 = getelementptr inbounds float, float* %tmp19240, i64 1
+ %tmp19242 = getelementptr inbounds float, float* %tmp19241, i64 1
+ %tmp19243 = getelementptr inbounds float, float* %tmp19242, i64 1
+ %tmp19244 = getelementptr inbounds float, float* %tmp19243, i64 1
+ %tmp19245 = getelementptr inbounds float, float* %tmp19244, i64 1
+ %tmp19246 = getelementptr inbounds float, float* %tmp19245, i64 1
+ %tmp19247 = getelementptr inbounds float, float* %tmp19246, i64 1
+ %tmp19248 = getelementptr inbounds float, float* %tmp19247, i64 1
+ %tmp19249 = getelementptr inbounds float, float* %tmp19248, i64 1
+ %tmp19250 = getelementptr inbounds float, float* %tmp19249, i64 1
+ %tmp19251 = getelementptr inbounds float, float* %tmp19250, i64 1
+ %tmp19252 = getelementptr inbounds float, float* %tmp19251, i64 1
+ %tmp19253 = getelementptr inbounds float, float* %tmp19252, i64 1
+ %tmp19254 = getelementptr inbounds float, float* %tmp19253, i64 1
+ %tmp19255 = getelementptr inbounds float, float* %tmp19254, i64 1
+ %tmp19256 = getelementptr inbounds float, float* %tmp19255, i64 1
+ %tmp19257 = getelementptr inbounds float, float* %tmp19256, i64 1
+ %tmp19258 = getelementptr inbounds float, float* %tmp19257, i64 1
+ %tmp19259 = getelementptr inbounds float, float* %tmp19258, i64 1
+ %tmp19260 = getelementptr inbounds float, float* %tmp19259, i64 1
+ %tmp19261 = getelementptr inbounds float, float* %tmp19260, i64 1
+ %tmp19262 = getelementptr inbounds float, float* %tmp19261, i64 1
+ %tmp19263 = getelementptr inbounds float, float* %tmp19262, i64 1
+ %tmp19264 = getelementptr inbounds float, float* %tmp19263, i64 1
+ %tmp19265 = getelementptr inbounds float, float* %tmp19264, i64 1
+ %tmp19266 = getelementptr inbounds float, float* %tmp19265, i64 1
+ %tmp19267 = getelementptr inbounds float, float* %tmp19266, i64 1
+ %tmp19268 = getelementptr inbounds float, float* %tmp19267, i64 1
+ %tmp19269 = getelementptr inbounds float, float* %tmp19268, i64 1
+ %tmp19270 = getelementptr inbounds float, float* %tmp19269, i64 1
+ %tmp19271 = getelementptr inbounds float, float* %tmp19270, i64 1
+ %tmp19272 = getelementptr inbounds float, float* %tmp19271, i64 1
+ %tmp19273 = getelementptr inbounds float, float* %tmp19272, i64 1
+ %tmp19274 = getelementptr inbounds float, float* %tmp19273, i64 1
+ %tmp19275 = getelementptr inbounds float, float* %tmp19274, i64 1
+ %tmp19276 = getelementptr inbounds float, float* %tmp19275, i64 1
+ %tmp19277 = getelementptr inbounds float, float* %tmp19276, i64 1
+ %tmp19278 = getelementptr inbounds float, float* %tmp19277, i64 1
+ %tmp19279 = getelementptr inbounds float, float* %tmp19278, i64 1
+ %tmp19280 = getelementptr inbounds float, float* %tmp19279, i64 1
+ %tmp19281 = getelementptr inbounds float, float* %tmp19280, i64 1
+ %tmp19282 = getelementptr inbounds float, float* %tmp19281, i64 1
+ %tmp19283 = getelementptr inbounds float, float* %tmp19282, i64 1
+ %tmp19284 = getelementptr inbounds float, float* %tmp19283, i64 1
+ %tmp19285 = getelementptr inbounds float, float* %tmp19284, i64 1
+ %tmp19286 = getelementptr inbounds float, float* %tmp19285, i64 1
+ %tmp19287 = getelementptr inbounds float, float* %tmp19286, i64 1
+ %tmp19288 = getelementptr inbounds float, float* %tmp19287, i64 1
+ %tmp19289 = getelementptr inbounds float, float* %tmp19288, i64 1
+ %tmp19290 = getelementptr inbounds float, float* %tmp19289, i64 1
+ %tmp19291 = getelementptr inbounds float, float* %tmp19290, i64 1
+ %tmp19292 = getelementptr inbounds float, float* %tmp19291, i64 1
+ %tmp19293 = getelementptr inbounds float, float* %tmp19292, i64 1
+ %tmp19294 = getelementptr inbounds float, float* %tmp19293, i64 1
+ %tmp19295 = getelementptr inbounds float, float* %tmp19294, i64 1
+ %tmp19296 = getelementptr inbounds float, float* %tmp19295, i64 1
+ %tmp19297 = getelementptr inbounds float, float* %tmp19296, i64 1
+ %tmp19298 = getelementptr inbounds float, float* %tmp19297, i64 1
+ %tmp19299 = getelementptr inbounds float, float* %tmp19298, i64 1
+ %tmp19300 = getelementptr inbounds float, float* %tmp19299, i64 1
+ %tmp19301 = getelementptr inbounds float, float* %tmp19300, i64 1
+ %tmp19302 = getelementptr inbounds float, float* %tmp19301, i64 1
+ %tmp19303 = getelementptr inbounds float, float* %tmp19302, i64 1
+ %tmp19304 = getelementptr inbounds float, float* %tmp19303, i64 1
+ %tmp19305 = getelementptr inbounds float, float* %tmp19304, i64 1
+ %tmp19306 = getelementptr inbounds float, float* %tmp19305, i64 1
+ %tmp19307 = getelementptr inbounds float, float* %tmp19306, i64 1
+ %tmp19308 = getelementptr inbounds float, float* %tmp19307, i64 1
+ %tmp19309 = getelementptr inbounds float, float* %tmp19308, i64 1
+ %tmp19310 = getelementptr inbounds float, float* %tmp19309, i64 1
+ %tmp19311 = getelementptr inbounds float, float* %tmp19310, i64 1
+ %tmp19312 = getelementptr inbounds float, float* %tmp19311, i64 1
+ %tmp19313 = getelementptr inbounds float, float* %tmp19312, i64 1
+ %tmp19314 = getelementptr inbounds float, float* %tmp19313, i64 1
+ %tmp19315 = getelementptr inbounds float, float* %tmp19314, i64 1
+ %tmp19316 = getelementptr inbounds float, float* %tmp19315, i64 1
+ %tmp19317 = getelementptr inbounds float, float* %tmp19316, i64 1
+ %tmp19318 = getelementptr inbounds float, float* %tmp19317, i64 1
+ %tmp19319 = getelementptr inbounds float, float* %tmp19318, i64 1
+ %tmp19320 = getelementptr inbounds float, float* %tmp19319, i64 1
+ %tmp19321 = getelementptr inbounds float, float* %tmp19320, i64 1
+ %tmp19322 = getelementptr inbounds float, float* %tmp19321, i64 1
+ %tmp19323 = getelementptr inbounds float, float* %tmp19322, i64 1
+ %tmp19324 = getelementptr inbounds float, float* %tmp19323, i64 1
+ %tmp19325 = getelementptr inbounds float, float* %tmp19324, i64 1
+ %tmp19326 = getelementptr inbounds float, float* %tmp19325, i64 1
+ %tmp19327 = getelementptr inbounds float, float* %tmp19326, i64 1
+ %tmp19328 = getelementptr inbounds float, float* %tmp19327, i64 1
+ %tmp19329 = getelementptr inbounds float, float* %tmp19328, i64 1
+ %tmp19330 = getelementptr inbounds float, float* %tmp19329, i64 1
+ %tmp19331 = getelementptr inbounds float, float* %tmp19330, i64 1
+ %tmp19332 = getelementptr inbounds float, float* %tmp19331, i64 1
+ %tmp19333 = getelementptr inbounds float, float* %tmp19332, i64 1
+ %tmp19334 = getelementptr inbounds float, float* %tmp19333, i64 1
+ %tmp19335 = getelementptr inbounds float, float* %tmp19334, i64 1
+ %tmp19336 = getelementptr inbounds float, float* %tmp19335, i64 1
+ %tmp19337 = getelementptr inbounds float, float* %tmp19336, i64 1
+ %tmp19338 = getelementptr inbounds float, float* %tmp19337, i64 1
+ %tmp19339 = getelementptr inbounds float, float* %tmp19338, i64 1
+ %tmp19340 = getelementptr inbounds float, float* %tmp19339, i64 1
+ %tmp19341 = getelementptr inbounds float, float* %tmp19340, i64 1
+ %tmp19342 = getelementptr inbounds float, float* %tmp19341, i64 1
+ %tmp19343 = getelementptr inbounds float, float* %tmp19342, i64 1
+ %tmp19344 = getelementptr inbounds float, float* %tmp19343, i64 1
+ %tmp19345 = getelementptr inbounds float, float* %tmp19344, i64 1
+ %tmp19346 = getelementptr inbounds float, float* %tmp19345, i64 1
+ %tmp19347 = getelementptr inbounds float, float* %tmp19346, i64 1
+ %tmp19348 = getelementptr inbounds float, float* %tmp19347, i64 1
+ %tmp19349 = getelementptr inbounds float, float* %tmp19348, i64 1
+ %tmp19350 = getelementptr inbounds float, float* %tmp19349, i64 1
+ %tmp19351 = getelementptr inbounds float, float* %tmp19350, i64 1
+ %tmp19352 = getelementptr inbounds float, float* %tmp19351, i64 1
+ %tmp19353 = getelementptr inbounds float, float* %tmp19352, i64 1
+ %tmp19354 = getelementptr inbounds float, float* %tmp19353, i64 1
+ %tmp19355 = getelementptr inbounds float, float* %tmp19354, i64 1
+ %tmp19356 = getelementptr inbounds float, float* %tmp19355, i64 1
+ %tmp19357 = getelementptr inbounds float, float* %tmp19356, i64 1
+ %tmp19358 = getelementptr inbounds float, float* %tmp19357, i64 1
+ %tmp19359 = getelementptr inbounds float, float* %tmp19358, i64 1
+ %tmp19360 = getelementptr inbounds float, float* %tmp19359, i64 1
+ %tmp19361 = getelementptr inbounds float, float* %tmp19360, i64 1
+ %tmp19362 = getelementptr inbounds float, float* %tmp19361, i64 1
+ %tmp19363 = getelementptr inbounds float, float* %tmp19362, i64 1
+ %tmp19364 = getelementptr inbounds float, float* %tmp19363, i64 1
+ %tmp19365 = getelementptr inbounds float, float* %tmp19364, i64 1
+ %tmp19366 = getelementptr inbounds float, float* %tmp19365, i64 1
+ %tmp19367 = getelementptr inbounds float, float* %tmp19366, i64 1
+ %tmp19368 = getelementptr inbounds float, float* %tmp19367, i64 1
+ %tmp19369 = getelementptr inbounds float, float* %tmp19368, i64 1
+ %tmp19370 = getelementptr inbounds float, float* %tmp19369, i64 1
+ %tmp19371 = getelementptr inbounds float, float* %tmp19370, i64 1
+ %tmp19372 = getelementptr inbounds float, float* %tmp19371, i64 1
+ %tmp19373 = getelementptr inbounds float, float* %tmp19372, i64 1
+ %tmp19374 = getelementptr inbounds float, float* %tmp19373, i64 1
+ %tmp19375 = getelementptr inbounds float, float* %tmp19374, i64 1
+ %tmp19376 = getelementptr inbounds float, float* %tmp19375, i64 1
+ %tmp19377 = getelementptr inbounds float, float* %tmp19376, i64 1
+ %tmp19378 = getelementptr inbounds float, float* %tmp19377, i64 1
+ %tmp19379 = getelementptr inbounds float, float* %tmp19378, i64 1
+ %tmp19380 = getelementptr inbounds float, float* %tmp19379, i64 1
+ %tmp19381 = getelementptr inbounds float, float* %tmp19380, i64 1
+ %tmp19382 = getelementptr inbounds float, float* %tmp19381, i64 1
+ %tmp19383 = getelementptr inbounds float, float* %tmp19382, i64 1
+ %tmp19384 = getelementptr inbounds float, float* %tmp19383, i64 1
+ %tmp19385 = getelementptr inbounds float, float* %tmp19384, i64 1
+ %tmp19386 = getelementptr inbounds float, float* %tmp19385, i64 1
+ %tmp19387 = getelementptr inbounds float, float* %tmp19386, i64 1
+ %tmp19388 = getelementptr inbounds float, float* %tmp19387, i64 1
+ %tmp19389 = getelementptr inbounds float, float* %tmp19388, i64 1
+ %tmp19390 = getelementptr inbounds float, float* %tmp19389, i64 1
+ %tmp19391 = getelementptr inbounds float, float* %tmp19390, i64 1
+ %tmp19392 = getelementptr inbounds float, float* %tmp19391, i64 1
+ %tmp19393 = getelementptr inbounds float, float* %tmp19392, i64 1
+ %tmp19394 = getelementptr inbounds float, float* %tmp19393, i64 1
+ %tmp19395 = getelementptr inbounds float, float* %tmp19394, i64 1
+ %tmp19396 = getelementptr inbounds float, float* %tmp19395, i64 1
+ %tmp19397 = getelementptr inbounds float, float* %tmp19396, i64 1
+ %tmp19398 = getelementptr inbounds float, float* %tmp19397, i64 1
+ %tmp19399 = getelementptr inbounds float, float* %tmp19398, i64 1
+ %tmp19400 = getelementptr inbounds float, float* %tmp19399, i64 1
+ %tmp19401 = getelementptr inbounds float, float* %tmp19400, i64 1
+ %tmp19402 = getelementptr inbounds float, float* %tmp19401, i64 1
+ %tmp19403 = getelementptr inbounds float, float* %tmp19402, i64 1
+ %tmp19404 = getelementptr inbounds float, float* %tmp19403, i64 1
+ %tmp19405 = getelementptr inbounds float, float* %tmp19404, i64 1
+ %tmp19406 = getelementptr inbounds float, float* %tmp19405, i64 1
+ %tmp19407 = getelementptr inbounds float, float* %tmp19406, i64 1
+ %tmp19408 = getelementptr inbounds float, float* %tmp19407, i64 1
+ %tmp19409 = getelementptr inbounds float, float* %tmp19408, i64 1
+ %tmp19410 = getelementptr inbounds float, float* %tmp19409, i64 1
+ %tmp19411 = getelementptr inbounds float, float* %tmp19410, i64 1
+ %tmp19412 = getelementptr inbounds float, float* %tmp19411, i64 1
+ %tmp19413 = getelementptr inbounds float, float* %tmp19412, i64 1
+ %tmp19414 = getelementptr inbounds float, float* %tmp19413, i64 1
+ %tmp19415 = getelementptr inbounds float, float* %tmp19414, i64 1
+ %tmp19416 = getelementptr inbounds float, float* %tmp19415, i64 1
+ %tmp19417 = getelementptr inbounds float, float* %tmp19416, i64 1
+ %tmp19418 = getelementptr inbounds float, float* %tmp19417, i64 1
+ %tmp19419 = getelementptr inbounds float, float* %tmp19418, i64 1
+ %tmp19420 = getelementptr inbounds float, float* %tmp19419, i64 1
+ %tmp19421 = getelementptr inbounds float, float* %tmp19420, i64 1
+ %tmp19422 = getelementptr inbounds float, float* %tmp19421, i64 1
+ %tmp19423 = getelementptr inbounds float, float* %tmp19422, i64 1
+ %tmp19424 = getelementptr inbounds float, float* %tmp19423, i64 1
+ %tmp19425 = getelementptr inbounds float, float* %tmp19424, i64 1
+ %tmp19426 = getelementptr inbounds float, float* %tmp19425, i64 1
+ %tmp19427 = getelementptr inbounds float, float* %tmp19426, i64 1
+ %tmp19428 = getelementptr inbounds float, float* %tmp19427, i64 1
+ %tmp19429 = getelementptr inbounds float, float* %tmp19428, i64 1
+ %tmp19430 = getelementptr inbounds float, float* %tmp19429, i64 1
+ %tmp19431 = getelementptr inbounds float, float* %tmp19430, i64 1
+ %tmp19432 = getelementptr inbounds float, float* %tmp19431, i64 1
+ %tmp19433 = getelementptr inbounds float, float* %tmp19432, i64 1
+ %tmp19434 = getelementptr inbounds float, float* %tmp19433, i64 1
+ %tmp19435 = getelementptr inbounds float, float* %tmp19434, i64 1
+ %tmp19436 = getelementptr inbounds float, float* %tmp19435, i64 1
+ %tmp19437 = getelementptr inbounds float, float* %tmp19436, i64 1
+ %tmp19438 = getelementptr inbounds float, float* %tmp19437, i64 1
+ %tmp19439 = getelementptr inbounds float, float* %tmp19438, i64 1
+ %tmp19440 = getelementptr inbounds float, float* %tmp19439, i64 1
+ %tmp19441 = getelementptr inbounds float, float* %tmp19440, i64 1
+ %tmp19442 = getelementptr inbounds float, float* %tmp19441, i64 1
+ %tmp19443 = getelementptr inbounds float, float* %tmp19442, i64 1
+ %tmp19444 = getelementptr inbounds float, float* %tmp19443, i64 1
+ %tmp19445 = getelementptr inbounds float, float* %tmp19444, i64 1
+ %tmp19446 = getelementptr inbounds float, float* %tmp19445, i64 1
+ %tmp19447 = getelementptr inbounds float, float* %tmp19446, i64 1
+ %tmp19448 = getelementptr inbounds float, float* %tmp19447, i64 1
+ %tmp19449 = getelementptr inbounds float, float* %tmp19448, i64 1
+ %tmp19450 = getelementptr inbounds float, float* %tmp19449, i64 1
+ %tmp19451 = getelementptr inbounds float, float* %tmp19450, i64 1
+ %tmp19452 = getelementptr inbounds float, float* %tmp19451, i64 1
+ %tmp19453 = getelementptr inbounds float, float* %tmp19452, i64 1
+ %tmp19454 = getelementptr inbounds float, float* %tmp19453, i64 1
+ %tmp19455 = getelementptr inbounds float, float* %tmp19454, i64 1
+ %tmp19456 = getelementptr inbounds float, float* %tmp19455, i64 1
+ %tmp19457 = getelementptr inbounds float, float* %tmp19456, i64 1
+ %tmp19458 = getelementptr inbounds float, float* %tmp19457, i64 1
+ %tmp19459 = getelementptr inbounds float, float* %tmp19458, i64 1
+ %tmp19460 = getelementptr inbounds float, float* %tmp19459, i64 1
+ %tmp19461 = getelementptr inbounds float, float* %tmp19460, i64 1
+ %tmp19462 = getelementptr inbounds float, float* %tmp19461, i64 1
+ %tmp19463 = getelementptr inbounds float, float* %tmp19462, i64 1
+ %tmp19464 = getelementptr inbounds float, float* %tmp19463, i64 1
+ %tmp19465 = getelementptr inbounds float, float* %tmp19464, i64 1
+ %tmp19466 = getelementptr inbounds float, float* %tmp19465, i64 1
+ %tmp19467 = getelementptr inbounds float, float* %tmp19466, i64 1
+ %tmp19468 = getelementptr inbounds float, float* %tmp19467, i64 1
+ %tmp19469 = getelementptr inbounds float, float* %tmp19468, i64 1
+ %tmp19470 = getelementptr inbounds float, float* %tmp19469, i64 1
+ %tmp19471 = getelementptr inbounds float, float* %tmp19470, i64 1
+ %tmp19472 = getelementptr inbounds float, float* %tmp19471, i64 1
+ %tmp19473 = getelementptr inbounds float, float* %tmp19472, i64 1
+ %tmp19474 = getelementptr inbounds float, float* %tmp19473, i64 1
+ %tmp19475 = getelementptr inbounds float, float* %tmp19474, i64 1
+ %tmp19476 = getelementptr inbounds float, float* %tmp19475, i64 1
+ %tmp19477 = getelementptr inbounds float, float* %tmp19476, i64 1
+ %tmp19478 = getelementptr inbounds float, float* %tmp19477, i64 1
+ %tmp19479 = getelementptr inbounds float, float* %tmp19478, i64 1
+ %tmp19480 = getelementptr inbounds float, float* %tmp19479, i64 1
+ %tmp19481 = getelementptr inbounds float, float* %tmp19480, i64 1
+ %tmp19482 = getelementptr inbounds float, float* %tmp19481, i64 1
+ %tmp19483 = getelementptr inbounds float, float* %tmp19482, i64 1
+ %tmp19484 = getelementptr inbounds float, float* %tmp19483, i64 1
+ %tmp19485 = getelementptr inbounds float, float* %tmp19484, i64 1
+ %tmp19486 = getelementptr inbounds float, float* %tmp19485, i64 1
+ %tmp19487 = getelementptr inbounds float, float* %tmp19486, i64 1
+ %tmp19488 = getelementptr inbounds float, float* %tmp19487, i64 1
+ %tmp19489 = getelementptr inbounds float, float* %tmp19488, i64 1
+ %tmp19490 = getelementptr inbounds float, float* %tmp19489, i64 1
+ %tmp19491 = getelementptr inbounds float, float* %tmp19490, i64 1
+ %tmp19492 = getelementptr inbounds float, float* %tmp19491, i64 1
+ %tmp19493 = getelementptr inbounds float, float* %tmp19492, i64 1
+ %tmp19494 = getelementptr inbounds float, float* %tmp19493, i64 1
+ %tmp19495 = getelementptr inbounds float, float* %tmp19494, i64 1
+ %tmp19496 = getelementptr inbounds float, float* %tmp19495, i64 1
+ %tmp19497 = getelementptr inbounds float, float* %tmp19496, i64 1
+ %tmp19498 = getelementptr inbounds float, float* %tmp19497, i64 1
+ %tmp19499 = getelementptr inbounds float, float* %tmp19498, i64 1
+ %tmp19500 = getelementptr inbounds float, float* %tmp19499, i64 1
+ %tmp19501 = getelementptr inbounds float, float* %tmp19500, i64 1
+ %tmp19502 = getelementptr inbounds float, float* %tmp19501, i64 1
+ %tmp19503 = getelementptr inbounds float, float* %tmp19502, i64 1
+ %tmp19504 = getelementptr inbounds float, float* %tmp19503, i64 1
+ %tmp19505 = getelementptr inbounds float, float* %tmp19504, i64 1
+ %tmp19506 = getelementptr inbounds float, float* %tmp19505, i64 1
+ %tmp19507 = getelementptr inbounds float, float* %tmp19506, i64 1
+ %tmp19508 = getelementptr inbounds float, float* %tmp19507, i64 1
+ %tmp19509 = getelementptr inbounds float, float* %tmp19508, i64 1
+ %tmp19510 = getelementptr inbounds float, float* %tmp19509, i64 1
+ %tmp19511 = getelementptr inbounds float, float* %tmp19510, i64 1
+ %tmp19512 = getelementptr inbounds float, float* %tmp19511, i64 1
+ %tmp19513 = getelementptr inbounds float, float* %tmp19512, i64 1
+ %tmp19514 = getelementptr inbounds float, float* %tmp19513, i64 1
+ %tmp19515 = getelementptr inbounds float, float* %tmp19514, i64 1
+ %tmp19516 = getelementptr inbounds float, float* %tmp19515, i64 1
+ %tmp19517 = getelementptr inbounds float, float* %tmp19516, i64 1
+ %tmp19518 = getelementptr inbounds float, float* %tmp19517, i64 1
+ %tmp19519 = getelementptr inbounds float, float* %tmp19518, i64 1
+ %tmp19520 = getelementptr inbounds float, float* %tmp19519, i64 1
+ %tmp19521 = getelementptr inbounds float, float* %tmp19520, i64 1
+ %tmp19522 = getelementptr inbounds float, float* %tmp19521, i64 1
+ %tmp19523 = getelementptr inbounds float, float* %tmp19522, i64 1
+ %tmp19524 = getelementptr inbounds float, float* %tmp19523, i64 1
+ %tmp19525 = getelementptr inbounds float, float* %tmp19524, i64 1
+ %tmp19526 = getelementptr inbounds float, float* %tmp19525, i64 1
+ %tmp19527 = getelementptr inbounds float, float* %tmp19526, i64 1
+ %tmp19528 = getelementptr inbounds float, float* %tmp19527, i64 1
+ %tmp19529 = getelementptr inbounds float, float* %tmp19528, i64 1
+ %tmp19530 = getelementptr inbounds float, float* %tmp19529, i64 1
+ %tmp19531 = getelementptr inbounds float, float* %tmp19530, i64 1
+ %tmp19532 = getelementptr inbounds float, float* %tmp19531, i64 1
+ %tmp19533 = getelementptr inbounds float, float* %tmp19532, i64 1
+ %tmp19534 = getelementptr inbounds float, float* %tmp19533, i64 1
+ %tmp19535 = getelementptr inbounds float, float* %tmp19534, i64 1
+ %tmp19536 = getelementptr inbounds float, float* %tmp19535, i64 1
+ %tmp19537 = getelementptr inbounds float, float* %tmp19536, i64 1
+ %tmp19538 = getelementptr inbounds float, float* %tmp19537, i64 1
+ %tmp19539 = getelementptr inbounds float, float* %tmp19538, i64 1
+ %tmp19540 = getelementptr inbounds float, float* %tmp19539, i64 1
+ %tmp19541 = getelementptr inbounds float, float* %tmp19540, i64 1
+ %tmp19542 = getelementptr inbounds float, float* %tmp19541, i64 1
+ %tmp19543 = getelementptr inbounds float, float* %tmp19542, i64 1
+ %tmp19544 = getelementptr inbounds float, float* %tmp19543, i64 1
+ %tmp19545 = getelementptr inbounds float, float* %tmp19544, i64 1
+ %tmp19546 = getelementptr inbounds float, float* %tmp19545, i64 1
+ %tmp19547 = getelementptr inbounds float, float* %tmp19546, i64 1
+ %tmp19548 = getelementptr inbounds float, float* %tmp19547, i64 1
+ %tmp19549 = getelementptr inbounds float, float* %tmp19548, i64 1
+ %tmp19550 = getelementptr inbounds float, float* %tmp19549, i64 1
+ %tmp19551 = getelementptr inbounds float, float* %tmp19550, i64 1
+ %tmp19552 = getelementptr inbounds float, float* %tmp19551, i64 1
+ %tmp19553 = getelementptr inbounds float, float* %tmp19552, i64 1
+ %tmp19554 = getelementptr inbounds float, float* %tmp19553, i64 1
+ %tmp19555 = getelementptr inbounds float, float* %tmp19554, i64 1
+ %tmp19556 = getelementptr inbounds float, float* %tmp19555, i64 1
+ %tmp19557 = getelementptr inbounds float, float* %tmp19556, i64 1
+ %tmp19558 = getelementptr inbounds float, float* %tmp19557, i64 1
+ %tmp19559 = getelementptr inbounds float, float* %tmp19558, i64 1
+ %tmp19560 = getelementptr inbounds float, float* %tmp19559, i64 1
+ %tmp19561 = getelementptr inbounds float, float* %tmp19560, i64 1
+ %tmp19562 = getelementptr inbounds float, float* %tmp19561, i64 1
+ %tmp19563 = getelementptr inbounds float, float* %tmp19562, i64 1
+ %tmp19564 = getelementptr inbounds float, float* %tmp19563, i64 1
+ %tmp19565 = getelementptr inbounds float, float* %tmp19564, i64 1
+ %tmp19566 = getelementptr inbounds float, float* %tmp19565, i64 1
+ %tmp19567 = getelementptr inbounds float, float* %tmp19566, i64 1
+ %tmp19568 = getelementptr inbounds float, float* %tmp19567, i64 1
+ %tmp19569 = getelementptr inbounds float, float* %tmp19568, i64 1
+ %tmp19570 = getelementptr inbounds float, float* %tmp19569, i64 1
+ %tmp19571 = getelementptr inbounds float, float* %tmp19570, i64 1
+ %tmp19572 = getelementptr inbounds float, float* %tmp19571, i64 1
+ %tmp19573 = getelementptr inbounds float, float* %tmp19572, i64 1
+ %tmp19574 = getelementptr inbounds float, float* %tmp19573, i64 1
+ %tmp19575 = getelementptr inbounds float, float* %tmp19574, i64 1
+ %tmp19576 = getelementptr inbounds float, float* %tmp19575, i64 1
+ %tmp19577 = getelementptr inbounds float, float* %tmp19576, i64 1
+ %tmp19578 = getelementptr inbounds float, float* %tmp19577, i64 1
+ %tmp19579 = getelementptr inbounds float, float* %tmp19578, i64 1
+ %tmp19580 = getelementptr inbounds float, float* %tmp19579, i64 1
+ %tmp19581 = getelementptr inbounds float, float* %tmp19580, i64 1
+ %tmp19582 = getelementptr inbounds float, float* %tmp19581, i64 1
+ %tmp19583 = getelementptr inbounds float, float* %tmp19582, i64 1
+ %tmp19584 = getelementptr inbounds float, float* %tmp19583, i64 1
+ %tmp19585 = getelementptr inbounds float, float* %tmp19584, i64 1
+ %tmp19586 = getelementptr inbounds float, float* %tmp19585, i64 1
+ %tmp19587 = getelementptr inbounds float, float* %tmp19586, i64 1
+ %tmp19588 = getelementptr inbounds float, float* %tmp19587, i64 1
+ %tmp19589 = getelementptr inbounds float, float* %tmp19588, i64 1
+ %tmp19590 = getelementptr inbounds float, float* %tmp19589, i64 1
+ %tmp19591 = getelementptr inbounds float, float* %tmp19590, i64 1
+ %tmp19592 = getelementptr inbounds float, float* %tmp19591, i64 1
+ %tmp19593 = getelementptr inbounds float, float* %tmp19592, i64 1
+ %tmp19594 = getelementptr inbounds float, float* %tmp19593, i64 1
+ %tmp19595 = getelementptr inbounds float, float* %tmp19594, i64 1
+ %tmp19596 = getelementptr inbounds float, float* %tmp19595, i64 1
+ %tmp19597 = getelementptr inbounds float, float* %tmp19596, i64 1
+ %tmp19598 = getelementptr inbounds float, float* %tmp19597, i64 1
+ %tmp19599 = getelementptr inbounds float, float* %tmp19598, i64 1
+ %tmp19600 = getelementptr inbounds float, float* %tmp19599, i64 1
+ %tmp19601 = getelementptr inbounds float, float* %tmp19600, i64 1
+ %tmp19602 = getelementptr inbounds float, float* %tmp19601, i64 1
+ %tmp19603 = getelementptr inbounds float, float* %tmp19602, i64 1
+ %tmp19604 = getelementptr inbounds float, float* %tmp19603, i64 1
+ %tmp19605 = getelementptr inbounds float, float* %tmp19604, i64 1
+ %tmp19606 = getelementptr inbounds float, float* %tmp19605, i64 1
+ %tmp19607 = getelementptr inbounds float, float* %tmp19606, i64 1
+ %tmp19608 = getelementptr inbounds float, float* %tmp19607, i64 1
+ %tmp19609 = getelementptr inbounds float, float* %tmp19608, i64 1
+ %tmp19610 = getelementptr inbounds float, float* %tmp19609, i64 1
+ %tmp19611 = getelementptr inbounds float, float* %tmp19610, i64 1
+ %tmp19612 = getelementptr inbounds float, float* %tmp19611, i64 1
+ %tmp19613 = getelementptr inbounds float, float* %tmp19612, i64 1
+ %tmp19614 = getelementptr inbounds float, float* %tmp19613, i64 1
+ %tmp19615 = getelementptr inbounds float, float* %tmp19614, i64 1
+ %tmp19616 = getelementptr inbounds float, float* %tmp19615, i64 1
+ %tmp19617 = getelementptr inbounds float, float* %tmp19616, i64 1
+ %tmp19618 = getelementptr inbounds float, float* %tmp19617, i64 1
+ %tmp19619 = getelementptr inbounds float, float* %tmp19618, i64 1
+ %tmp19620 = getelementptr inbounds float, float* %tmp19619, i64 1
+ %tmp19621 = getelementptr inbounds float, float* %tmp19620, i64 1
+ %tmp19622 = getelementptr inbounds float, float* %tmp19621, i64 1
+ %tmp19623 = getelementptr inbounds float, float* %tmp19622, i64 1
+ %tmp19624 = getelementptr inbounds float, float* %tmp19623, i64 1
+ %tmp19625 = getelementptr inbounds float, float* %tmp19624, i64 1
+ %tmp19626 = getelementptr inbounds float, float* %tmp19625, i64 1
+ %tmp19627 = getelementptr inbounds float, float* %tmp19626, i64 1
+ %tmp19628 = getelementptr inbounds float, float* %tmp19627, i64 1
+ %tmp19629 = getelementptr inbounds float, float* %tmp19628, i64 1
+ %tmp19630 = getelementptr inbounds float, float* %tmp19629, i64 1
+ %tmp19631 = getelementptr inbounds float, float* %tmp19630, i64 1
+ %tmp19632 = getelementptr inbounds float, float* %tmp19631, i64 1
+ %tmp19633 = getelementptr inbounds float, float* %tmp19632, i64 1
+ %tmp19634 = getelementptr inbounds float, float* %tmp19633, i64 1
+ %tmp19635 = getelementptr inbounds float, float* %tmp19634, i64 1
+ %tmp19636 = getelementptr inbounds float, float* %tmp19635, i64 1
+ %tmp19637 = getelementptr inbounds float, float* %tmp19636, i64 1
+ %tmp19638 = getelementptr inbounds float, float* %tmp19637, i64 1
+ %tmp19639 = getelementptr inbounds float, float* %tmp19638, i64 1
+ %tmp19640 = getelementptr inbounds float, float* %tmp19639, i64 1
+ %tmp19641 = getelementptr inbounds float, float* %tmp19640, i64 1
+ %tmp19642 = getelementptr inbounds float, float* %tmp19641, i64 1
+ %tmp19643 = getelementptr inbounds float, float* %tmp19642, i64 1
+ %tmp19644 = getelementptr inbounds float, float* %tmp19643, i64 1
+ %tmp19645 = getelementptr inbounds float, float* %tmp19644, i64 1
+ %tmp19646 = getelementptr inbounds float, float* %tmp19645, i64 1
+ %tmp19647 = getelementptr inbounds float, float* %tmp19646, i64 1
+ %tmp19648 = getelementptr inbounds float, float* %tmp19647, i64 1
+ %tmp19649 = getelementptr inbounds float, float* %tmp19648, i64 1
+ %tmp19650 = getelementptr inbounds float, float* %tmp19649, i64 1
+ %tmp19651 = getelementptr inbounds float, float* %tmp19650, i64 1
+ %tmp19652 = getelementptr inbounds float, float* %tmp19651, i64 1
+ %tmp19653 = getelementptr inbounds float, float* %tmp19652, i64 1
+ %tmp19654 = getelementptr inbounds float, float* %tmp19653, i64 1
+ %tmp19655 = getelementptr inbounds float, float* %tmp19654, i64 1
+ %tmp19656 = getelementptr inbounds float, float* %tmp19655, i64 1
+ %tmp19657 = getelementptr inbounds float, float* %tmp19656, i64 1
+ %tmp19658 = getelementptr inbounds float, float* %tmp19657, i64 1
+ %tmp19659 = getelementptr inbounds float, float* %tmp19658, i64 1
+ %tmp19660 = getelementptr inbounds float, float* %tmp19659, i64 1
+ %tmp19661 = getelementptr inbounds float, float* %tmp19660, i64 1
+ %tmp19662 = getelementptr inbounds float, float* %tmp19661, i64 1
+ %tmp19663 = getelementptr inbounds float, float* %tmp19662, i64 1
+ %tmp19664 = getelementptr inbounds float, float* %tmp19663, i64 1
+ %tmp19665 = getelementptr inbounds float, float* %tmp19664, i64 1
+ %tmp19666 = getelementptr inbounds float, float* %tmp19665, i64 1
+ %tmp19667 = getelementptr inbounds float, float* %tmp19666, i64 1
+ %tmp19668 = getelementptr inbounds float, float* %tmp19667, i64 1
+ %tmp19669 = getelementptr inbounds float, float* %tmp19668, i64 1
+ %tmp19670 = getelementptr inbounds float, float* %tmp19669, i64 1
+ %tmp19671 = getelementptr inbounds float, float* %tmp19670, i64 1
+ %tmp19672 = getelementptr inbounds float, float* %tmp19671, i64 1
+ %tmp19673 = getelementptr inbounds float, float* %tmp19672, i64 1
+ %tmp19674 = getelementptr inbounds float, float* %tmp19673, i64 1
+ %tmp19675 = getelementptr inbounds float, float* %tmp19674, i64 1
+ %tmp19676 = getelementptr inbounds float, float* %tmp19675, i64 1
+ %tmp19677 = getelementptr inbounds float, float* %tmp19676, i64 1
+ %tmp19678 = getelementptr inbounds float, float* %tmp19677, i64 1
+ %tmp19679 = getelementptr inbounds float, float* %tmp19678, i64 1
+ %tmp19680 = getelementptr inbounds float, float* %tmp19679, i64 1
+ %tmp19681 = getelementptr inbounds float, float* %tmp19680, i64 1
+ %tmp19682 = getelementptr inbounds float, float* %tmp19681, i64 1
+ %tmp19683 = getelementptr inbounds float, float* %tmp19682, i64 1
+ %tmp19684 = getelementptr inbounds float, float* %tmp19683, i64 1
+ %tmp19685 = getelementptr inbounds float, float* %tmp19684, i64 1
+ %tmp19686 = getelementptr inbounds float, float* %tmp19685, i64 1
+ %tmp19687 = getelementptr inbounds float, float* %tmp19686, i64 1
+ %tmp19688 = getelementptr inbounds float, float* %tmp19687, i64 1
+ %tmp19689 = getelementptr inbounds float, float* %tmp19688, i64 1
+ %tmp19690 = getelementptr inbounds float, float* %tmp19689, i64 1
+ %tmp19691 = getelementptr inbounds float, float* %tmp19690, i64 1
+ %tmp19692 = getelementptr inbounds float, float* %tmp19691, i64 1
+ %tmp19693 = getelementptr inbounds float, float* %tmp19692, i64 1
+ %tmp19694 = getelementptr inbounds float, float* %tmp19693, i64 1
+ %tmp19695 = getelementptr inbounds float, float* %tmp19694, i64 1
+ %tmp19696 = getelementptr inbounds float, float* %tmp19695, i64 1
+ %tmp19697 = getelementptr inbounds float, float* %tmp19696, i64 1
+ %tmp19698 = getelementptr inbounds float, float* %tmp19697, i64 1
+ %tmp19699 = getelementptr inbounds float, float* %tmp19698, i64 1
+ %tmp19700 = getelementptr inbounds float, float* %tmp19699, i64 1
+ %tmp19701 = getelementptr inbounds float, float* %tmp19700, i64 1
+ %tmp19702 = getelementptr inbounds float, float* %tmp19701, i64 1
+ %tmp19703 = getelementptr inbounds float, float* %tmp19702, i64 1
+ %tmp19704 = getelementptr inbounds float, float* %tmp19703, i64 1
+ %tmp19705 = getelementptr inbounds float, float* %tmp19704, i64 1
+ %tmp19706 = getelementptr inbounds float, float* %tmp19705, i64 1
+ %tmp19707 = getelementptr inbounds float, float* %tmp19706, i64 1
+ %tmp19708 = getelementptr inbounds float, float* %tmp19707, i64 1
+ %tmp19709 = getelementptr inbounds float, float* %tmp19708, i64 1
+ %tmp19710 = getelementptr inbounds float, float* %tmp19709, i64 1
+ %tmp19711 = getelementptr inbounds float, float* %tmp19710, i64 1
+ %tmp19712 = getelementptr inbounds float, float* %tmp19711, i64 1
+ %tmp19713 = getelementptr inbounds float, float* %tmp19712, i64 1
+ %tmp19714 = getelementptr inbounds float, float* %tmp19713, i64 1
+ %tmp19715 = getelementptr inbounds float, float* %tmp19714, i64 1
+ %tmp19716 = getelementptr inbounds float, float* %tmp19715, i64 1
+ %tmp19717 = getelementptr inbounds float, float* %tmp19716, i64 1
+ %tmp19718 = getelementptr inbounds float, float* %tmp19717, i64 1
+ %tmp19719 = getelementptr inbounds float, float* %tmp19718, i64 1
+ %tmp19720 = getelementptr inbounds float, float* %tmp19719, i64 1
+ %tmp19721 = getelementptr inbounds float, float* %tmp19720, i64 1
+ %tmp19722 = getelementptr inbounds float, float* %tmp19721, i64 1
+ %tmp19723 = getelementptr inbounds float, float* %tmp19722, i64 1
+ %tmp19724 = getelementptr inbounds float, float* %tmp19723, i64 1
+ %tmp19725 = getelementptr inbounds float, float* %tmp19724, i64 1
+ %tmp19726 = getelementptr inbounds float, float* %tmp19725, i64 1
+ %tmp19727 = getelementptr inbounds float, float* %tmp19726, i64 1
+ %tmp19728 = getelementptr inbounds float, float* %tmp19727, i64 1
+ %tmp19729 = getelementptr inbounds float, float* %tmp19728, i64 1
+ %tmp19730 = getelementptr inbounds float, float* %tmp19729, i64 1
+ %tmp19731 = getelementptr inbounds float, float* %tmp19730, i64 1
+ %tmp19732 = getelementptr inbounds float, float* %tmp19731, i64 1
+ %tmp19733 = getelementptr inbounds float, float* %tmp19732, i64 1
+ %tmp19734 = getelementptr inbounds float, float* %tmp19733, i64 1
+ %tmp19735 = getelementptr inbounds float, float* %tmp19734, i64 1
+ %tmp19736 = getelementptr inbounds float, float* %tmp19735, i64 1
+ %tmp19737 = getelementptr inbounds float, float* %tmp19736, i64 1
+ %tmp19738 = getelementptr inbounds float, float* %tmp19737, i64 1
+ %tmp19739 = getelementptr inbounds float, float* %tmp19738, i64 1
+ %tmp19740 = getelementptr inbounds float, float* %tmp19739, i64 1
+ %tmp19741 = getelementptr inbounds float, float* %tmp19740, i64 1
+ %tmp19742 = getelementptr inbounds float, float* %tmp19741, i64 1
+ %tmp19743 = getelementptr inbounds float, float* %tmp19742, i64 1
+ %tmp19744 = getelementptr inbounds float, float* %tmp19743, i64 1
+ %tmp19745 = getelementptr inbounds float, float* %tmp19744, i64 1
+ %tmp19746 = getelementptr inbounds float, float* %tmp19745, i64 1
+ %tmp19747 = getelementptr inbounds float, float* %tmp19746, i64 1
+ %tmp19748 = getelementptr inbounds float, float* %tmp19747, i64 1
+ %tmp19749 = getelementptr inbounds float, float* %tmp19748, i64 1
+ %tmp19750 = getelementptr inbounds float, float* %tmp19749, i64 1
+ %tmp19751 = getelementptr inbounds float, float* %tmp19750, i64 1
+ %tmp19752 = getelementptr inbounds float, float* %tmp19751, i64 1
+ %tmp19753 = getelementptr inbounds float, float* %tmp19752, i64 1
+ %tmp19754 = getelementptr inbounds float, float* %tmp19753, i64 1
+ %tmp19755 = getelementptr inbounds float, float* %tmp19754, i64 1
+ %tmp19756 = getelementptr inbounds float, float* %tmp19755, i64 1
+ %tmp19757 = getelementptr inbounds float, float* %tmp19756, i64 1
+ %tmp19758 = getelementptr inbounds float, float* %tmp19757, i64 1
+ %tmp19759 = getelementptr inbounds float, float* %tmp19758, i64 1
+ %tmp19760 = getelementptr inbounds float, float* %tmp19759, i64 1
+ %tmp19761 = getelementptr inbounds float, float* %tmp19760, i64 1
+ %tmp19762 = getelementptr inbounds float, float* %tmp19761, i64 1
+ %tmp19763 = getelementptr inbounds float, float* %tmp19762, i64 1
+ %tmp19764 = getelementptr inbounds float, float* %tmp19763, i64 1
+ %tmp19765 = getelementptr inbounds float, float* %tmp19764, i64 1
+ %tmp19766 = getelementptr inbounds float, float* %tmp19765, i64 1
+ %tmp19767 = getelementptr inbounds float, float* %tmp19766, i64 1
+ %tmp19768 = getelementptr inbounds float, float* %tmp19767, i64 1
+ %tmp19769 = getelementptr inbounds float, float* %tmp19768, i64 1
+ %tmp19770 = getelementptr inbounds float, float* %tmp19769, i64 1
+ %tmp19771 = getelementptr inbounds float, float* %tmp19770, i64 1
+ %tmp19772 = getelementptr inbounds float, float* %tmp19771, i64 1
+ %tmp19773 = getelementptr inbounds float, float* %tmp19772, i64 1
+ %tmp19774 = getelementptr inbounds float, float* %tmp19773, i64 1
+ %tmp19775 = getelementptr inbounds float, float* %tmp19774, i64 1
+ %tmp19776 = getelementptr inbounds float, float* %tmp19775, i64 1
+ %tmp19777 = getelementptr inbounds float, float* %tmp19776, i64 1
+ %tmp19778 = getelementptr inbounds float, float* %tmp19777, i64 1
+ %tmp19779 = getelementptr inbounds float, float* %tmp19778, i64 1
+ %tmp19780 = getelementptr inbounds float, float* %tmp19779, i64 1
+ %tmp19781 = getelementptr inbounds float, float* %tmp19780, i64 1
+ %tmp19782 = getelementptr inbounds float, float* %tmp19781, i64 1
+ %tmp19783 = getelementptr inbounds float, float* %tmp19782, i64 1
+ %tmp19784 = getelementptr inbounds float, float* %tmp19783, i64 1
+ %tmp19785 = getelementptr inbounds float, float* %tmp19784, i64 1
+ %tmp19786 = getelementptr inbounds float, float* %tmp19785, i64 1
+ %tmp19787 = getelementptr inbounds float, float* %tmp19786, i64 1
+ %tmp19788 = getelementptr inbounds float, float* %tmp19787, i64 1
+ %tmp19789 = getelementptr inbounds float, float* %tmp19788, i64 1
+ %tmp19790 = getelementptr inbounds float, float* %tmp19789, i64 1
+ %tmp19791 = getelementptr inbounds float, float* %tmp19790, i64 1
+ %tmp19792 = getelementptr inbounds float, float* %tmp19791, i64 1
+ %tmp19793 = getelementptr inbounds float, float* %tmp19792, i64 1
+ %tmp19794 = getelementptr inbounds float, float* %tmp19793, i64 1
+ %tmp19795 = getelementptr inbounds float, float* %tmp19794, i64 1
+ %tmp19796 = getelementptr inbounds float, float* %tmp19795, i64 1
+ %tmp19797 = getelementptr inbounds float, float* %tmp19796, i64 1
+ %tmp19798 = getelementptr inbounds float, float* %tmp19797, i64 1
+ %tmp19799 = getelementptr inbounds float, float* %tmp19798, i64 1
+ %tmp19800 = getelementptr inbounds float, float* %tmp19799, i64 1
+ %tmp19801 = getelementptr inbounds float, float* %tmp19800, i64 1
+ %tmp19802 = getelementptr inbounds float, float* %tmp19801, i64 1
+ %tmp19803 = getelementptr inbounds float, float* %tmp19802, i64 1
+ %tmp19804 = getelementptr inbounds float, float* %tmp19803, i64 1
+ %tmp19805 = getelementptr inbounds float, float* %tmp19804, i64 1
+ %tmp19806 = getelementptr inbounds float, float* %tmp19805, i64 1
+ %tmp19807 = getelementptr inbounds float, float* %tmp19806, i64 1
+ %tmp19808 = getelementptr inbounds float, float* %tmp19807, i64 1
+ %tmp19809 = getelementptr inbounds float, float* %tmp19808, i64 1
+ %tmp19810 = getelementptr inbounds float, float* %tmp19809, i64 1
+ %tmp19811 = getelementptr inbounds float, float* %tmp19810, i64 1
+ %tmp19812 = getelementptr inbounds float, float* %tmp19811, i64 1
+ %tmp19813 = getelementptr inbounds float, float* %tmp19812, i64 1
+ %tmp19814 = getelementptr inbounds float, float* %tmp19813, i64 1
+ %tmp19815 = getelementptr inbounds float, float* %tmp19814, i64 1
+ %tmp19816 = getelementptr inbounds float, float* %tmp19815, i64 1
+ %tmp19817 = getelementptr inbounds float, float* %tmp19816, i64 1
+ %tmp19818 = getelementptr inbounds float, float* %tmp19817, i64 1
+ %tmp19819 = getelementptr inbounds float, float* %tmp19818, i64 1
+ %tmp19820 = getelementptr inbounds float, float* %tmp19819, i64 1
+ %tmp19821 = getelementptr inbounds float, float* %tmp19820, i64 1
+ %tmp19822 = getelementptr inbounds float, float* %tmp19821, i64 1
+ %tmp19823 = getelementptr inbounds float, float* %tmp19822, i64 1
+ %tmp19824 = getelementptr inbounds float, float* %tmp19823, i64 1
+ %tmp19825 = getelementptr inbounds float, float* %tmp19824, i64 1
+ %tmp19826 = getelementptr inbounds float, float* %tmp19825, i64 1
+ %tmp19827 = getelementptr inbounds float, float* %tmp19826, i64 1
+ %tmp19828 = getelementptr inbounds float, float* %tmp19827, i64 1
+ %tmp19829 = getelementptr inbounds float, float* %tmp19828, i64 1
+ %tmp19830 = getelementptr inbounds float, float* %tmp19829, i64 1
+ %tmp19831 = getelementptr inbounds float, float* %tmp19830, i64 1
+ %tmp19832 = getelementptr inbounds float, float* %tmp19831, i64 1
+ %tmp19833 = getelementptr inbounds float, float* %tmp19832, i64 1
+ %tmp19834 = getelementptr inbounds float, float* %tmp19833, i64 1
+ %tmp19835 = getelementptr inbounds float, float* %tmp19834, i64 1
+ %tmp19836 = getelementptr inbounds float, float* %tmp19835, i64 1
+ %tmp19837 = getelementptr inbounds float, float* %tmp19836, i64 1
+ %tmp19838 = getelementptr inbounds float, float* %tmp19837, i64 1
+ %tmp19839 = getelementptr inbounds float, float* %tmp19838, i64 1
+ %tmp19840 = getelementptr inbounds float, float* %tmp19839, i64 1
+ %tmp19841 = getelementptr inbounds float, float* %tmp19840, i64 1
+ %tmp19842 = getelementptr inbounds float, float* %tmp19841, i64 1
+ %tmp19843 = getelementptr inbounds float, float* %tmp19842, i64 1
+ %tmp19844 = getelementptr inbounds float, float* %tmp19843, i64 1
+ %tmp19845 = getelementptr inbounds float, float* %tmp19844, i64 1
+ %tmp19846 = getelementptr inbounds float, float* %tmp19845, i64 1
+ %tmp19847 = getelementptr inbounds float, float* %tmp19846, i64 1
+ %tmp19848 = getelementptr inbounds float, float* %tmp19847, i64 1
+ %tmp19849 = getelementptr inbounds float, float* %tmp19848, i64 1
+ %tmp19850 = getelementptr inbounds float, float* %tmp19849, i64 1
+ %tmp19851 = getelementptr inbounds float, float* %tmp19850, i64 1
+ %tmp19852 = getelementptr inbounds float, float* %tmp19851, i64 1
+ %tmp19853 = getelementptr inbounds float, float* %tmp19852, i64 1
+ %tmp19854 = getelementptr inbounds float, float* %tmp19853, i64 1
+ %tmp19855 = getelementptr inbounds float, float* %tmp19854, i64 1
+ %tmp19856 = getelementptr inbounds float, float* %tmp19855, i64 1
+ %tmp19857 = getelementptr inbounds float, float* %tmp19856, i64 1
+ %tmp19858 = getelementptr inbounds float, float* %tmp19857, i64 1
+ %tmp19859 = getelementptr inbounds float, float* %tmp19858, i64 1
+ %tmp19860 = getelementptr inbounds float, float* %tmp19859, i64 1
+ %tmp19861 = getelementptr inbounds float, float* %tmp19860, i64 1
+ %tmp19862 = getelementptr inbounds float, float* %tmp19861, i64 1
+ %tmp19863 = getelementptr inbounds float, float* %tmp19862, i64 1
+ %tmp19864 = getelementptr inbounds float, float* %tmp19863, i64 1
+ %tmp19865 = getelementptr inbounds float, float* %tmp19864, i64 1
+ %tmp19866 = getelementptr inbounds float, float* %tmp19865, i64 1
+ %tmp19867 = getelementptr inbounds float, float* %tmp19866, i64 1
+ %tmp19868 = getelementptr inbounds float, float* %tmp19867, i64 1
+ %tmp19869 = getelementptr inbounds float, float* %tmp19868, i64 1
+ %tmp19870 = getelementptr inbounds float, float* %tmp19869, i64 1
+ %tmp19871 = getelementptr inbounds float, float* %tmp19870, i64 1
+ %tmp19872 = getelementptr inbounds float, float* %tmp19871, i64 1
+ %tmp19873 = getelementptr inbounds float, float* %tmp19872, i64 1
+ %tmp19874 = getelementptr inbounds float, float* %tmp19873, i64 1
+ %tmp19875 = getelementptr inbounds float, float* %tmp19874, i64 1
+ %tmp19876 = getelementptr inbounds float, float* %tmp19875, i64 1
+ %tmp19877 = getelementptr inbounds float, float* %tmp19876, i64 1
+ %tmp19878 = getelementptr inbounds float, float* %tmp19877, i64 1
+ %tmp19879 = getelementptr inbounds float, float* %tmp19878, i64 1
+ %tmp19880 = getelementptr inbounds float, float* %tmp19879, i64 1
+ %tmp19881 = getelementptr inbounds float, float* %tmp19880, i64 1
+ %tmp19882 = getelementptr inbounds float, float* %tmp19881, i64 1
+ %tmp19883 = getelementptr inbounds float, float* %tmp19882, i64 1
+ %tmp19884 = getelementptr inbounds float, float* %tmp19883, i64 1
+ %tmp19885 = getelementptr inbounds float, float* %tmp19884, i64 1
+ %tmp19886 = getelementptr inbounds float, float* %tmp19885, i64 1
+ %tmp19887 = getelementptr inbounds float, float* %tmp19886, i64 1
+ %tmp19888 = getelementptr inbounds float, float* %tmp19887, i64 1
+ %tmp19889 = getelementptr inbounds float, float* %tmp19888, i64 1
+ %tmp19890 = getelementptr inbounds float, float* %tmp19889, i64 1
+ %tmp19891 = getelementptr inbounds float, float* %tmp19890, i64 1
+ %tmp19892 = getelementptr inbounds float, float* %tmp19891, i64 1
+ %tmp19893 = getelementptr inbounds float, float* %tmp19892, i64 1
+ %tmp19894 = getelementptr inbounds float, float* %tmp19893, i64 1
+ %tmp19895 = getelementptr inbounds float, float* %tmp19894, i64 1
+ %tmp19896 = getelementptr inbounds float, float* %tmp19895, i64 1
+ %tmp19897 = getelementptr inbounds float, float* %tmp19896, i64 1
+ %tmp19898 = getelementptr inbounds float, float* %tmp19897, i64 1
+ %tmp19899 = getelementptr inbounds float, float* %tmp19898, i64 1
+ %tmp19900 = getelementptr inbounds float, float* %tmp19899, i64 1
+ %tmp19901 = getelementptr inbounds float, float* %tmp19900, i64 1
+ %tmp19902 = getelementptr inbounds float, float* %tmp19901, i64 1
+ %tmp19903 = getelementptr inbounds float, float* %tmp19902, i64 1
+ %tmp19904 = getelementptr inbounds float, float* %tmp19903, i64 1
+ %tmp19905 = getelementptr inbounds float, float* %tmp19904, i64 1
+ %tmp19906 = getelementptr inbounds float, float* %tmp19905, i64 1
+ %tmp19907 = getelementptr inbounds float, float* %tmp19906, i64 1
+ %tmp19908 = getelementptr inbounds float, float* %tmp19907, i64 1
+ %tmp19909 = getelementptr inbounds float, float* %tmp19908, i64 1
+ %tmp19910 = getelementptr inbounds float, float* %tmp19909, i64 1
+ %tmp19911 = getelementptr inbounds float, float* %tmp19910, i64 1
+ %tmp19912 = getelementptr inbounds float, float* %tmp19911, i64 1
+ %tmp19913 = getelementptr inbounds float, float* %tmp19912, i64 1
+ %tmp19914 = getelementptr inbounds float, float* %tmp19913, i64 1
+ %tmp19915 = getelementptr inbounds float, float* %tmp19914, i64 1
+ %tmp19916 = getelementptr inbounds float, float* %tmp19915, i64 1
+ %tmp19917 = getelementptr inbounds float, float* %tmp19916, i64 1
+ %tmp19918 = getelementptr inbounds float, float* %tmp19917, i64 1
+ %tmp19919 = getelementptr inbounds float, float* %tmp19918, i64 1
+ %tmp19920 = getelementptr inbounds float, float* %tmp19919, i64 1
+ %tmp19921 = getelementptr inbounds float, float* %tmp19920, i64 1
+ %tmp19922 = getelementptr inbounds float, float* %tmp19921, i64 1
+ %tmp19923 = getelementptr inbounds float, float* %tmp19922, i64 1
+ %tmp19924 = getelementptr inbounds float, float* %tmp19923, i64 1
+ %tmp19925 = getelementptr inbounds float, float* %tmp19924, i64 1
+ %tmp19926 = getelementptr inbounds float, float* %tmp19925, i64 1
+ %tmp19927 = getelementptr inbounds float, float* %tmp19926, i64 1
+ %tmp19928 = getelementptr inbounds float, float* %tmp19927, i64 1
+ %tmp19929 = getelementptr inbounds float, float* %tmp19928, i64 1
+ %tmp19930 = getelementptr inbounds float, float* %tmp19929, i64 1
+ %tmp19931 = getelementptr inbounds float, float* %tmp19930, i64 1
+ %tmp19932 = getelementptr inbounds float, float* %tmp19931, i64 1
+ %tmp19933 = getelementptr inbounds float, float* %tmp19932, i64 1
+ %tmp19934 = getelementptr inbounds float, float* %tmp19933, i64 1
+ %tmp19935 = getelementptr inbounds float, float* %tmp19934, i64 1
+ %tmp19936 = getelementptr inbounds float, float* %tmp19935, i64 1
+ %tmp19937 = getelementptr inbounds float, float* %tmp19936, i64 1
+ %tmp19938 = getelementptr inbounds float, float* %tmp19937, i64 1
+ %tmp19939 = getelementptr inbounds float, float* %tmp19938, i64 1
+ %tmp19940 = getelementptr inbounds float, float* %tmp19939, i64 1
+ %tmp19941 = getelementptr inbounds float, float* %tmp19940, i64 1
+ %tmp19942 = getelementptr inbounds float, float* %tmp19941, i64 1
+ %tmp19943 = getelementptr inbounds float, float* %tmp19942, i64 1
+ %tmp19944 = getelementptr inbounds float, float* %tmp19943, i64 1
+ %tmp19945 = getelementptr inbounds float, float* %tmp19944, i64 1
+ %tmp19946 = getelementptr inbounds float, float* %tmp19945, i64 1
+ %tmp19947 = getelementptr inbounds float, float* %tmp19946, i64 1
+ %tmp19948 = getelementptr inbounds float, float* %tmp19947, i64 1
+ %tmp19949 = getelementptr inbounds float, float* %tmp19948, i64 1
+ %tmp19950 = getelementptr inbounds float, float* %tmp19949, i64 1
+ %tmp19951 = getelementptr inbounds float, float* %tmp19950, i64 1
+ %tmp19952 = getelementptr inbounds float, float* %tmp19951, i64 1
+ %tmp19953 = getelementptr inbounds float, float* %tmp19952, i64 1
+ %tmp19954 = getelementptr inbounds float, float* %tmp19953, i64 1
+ %tmp19955 = getelementptr inbounds float, float* %tmp19954, i64 1
+ %tmp19956 = getelementptr inbounds float, float* %tmp19955, i64 1
+ %tmp19957 = getelementptr inbounds float, float* %tmp19956, i64 1
+ %tmp19958 = getelementptr inbounds float, float* %tmp19957, i64 1
+ %tmp19959 = getelementptr inbounds float, float* %tmp19958, i64 1
+ %tmp19960 = getelementptr inbounds float, float* %tmp19959, i64 1
+ %tmp19961 = getelementptr inbounds float, float* %tmp19960, i64 1
+ %tmp19962 = getelementptr inbounds float, float* %tmp19961, i64 1
+ %tmp19963 = getelementptr inbounds float, float* %tmp19962, i64 1
+ %tmp19964 = getelementptr inbounds float, float* %tmp19963, i64 1
+ %tmp19965 = getelementptr inbounds float, float* %tmp19964, i64 1
+ %tmp19966 = getelementptr inbounds float, float* %tmp19965, i64 1
+ %tmp19967 = getelementptr inbounds float, float* %tmp19966, i64 1
+ %tmp19968 = getelementptr inbounds float, float* %tmp19967, i64 1
+ %tmp19969 = getelementptr inbounds float, float* %tmp19968, i64 1
+ %tmp19970 = getelementptr inbounds float, float* %tmp19969, i64 1
+ %tmp19971 = getelementptr inbounds float, float* %tmp19970, i64 1
+ %tmp19972 = getelementptr inbounds float, float* %tmp19971, i64 1
+ %tmp19973 = getelementptr inbounds float, float* %tmp19972, i64 1
+ %tmp19974 = getelementptr inbounds float, float* %tmp19973, i64 1
+ %tmp19975 = getelementptr inbounds float, float* %tmp19974, i64 1
+ %tmp19976 = getelementptr inbounds float, float* %tmp19975, i64 1
+ %tmp19977 = getelementptr inbounds float, float* %tmp19976, i64 1
+ %tmp19978 = getelementptr inbounds float, float* %tmp19977, i64 1
+ %tmp19979 = getelementptr inbounds float, float* %tmp19978, i64 1
+ %tmp19980 = getelementptr inbounds float, float* %tmp19979, i64 1
+ %tmp19981 = getelementptr inbounds float, float* %tmp19980, i64 1
+ %tmp19982 = getelementptr inbounds float, float* %tmp19981, i64 1
+ %tmp19983 = getelementptr inbounds float, float* %tmp19982, i64 1
+ %tmp19984 = getelementptr inbounds float, float* %tmp19983, i64 1
+ %tmp19985 = getelementptr inbounds float, float* %tmp19984, i64 1
+ %tmp19986 = getelementptr inbounds float, float* %tmp19985, i64 1
+ %tmp19987 = getelementptr inbounds float, float* %tmp19986, i64 1
+ %tmp19988 = getelementptr inbounds float, float* %tmp19987, i64 1
+ %tmp19989 = getelementptr inbounds float, float* %tmp19988, i64 1
+ %tmp19990 = getelementptr inbounds float, float* %tmp19989, i64 1
+ %tmp19991 = getelementptr inbounds float, float* %tmp19990, i64 1
+ %tmp19992 = getelementptr inbounds float, float* %tmp19991, i64 1
+ %tmp19993 = getelementptr inbounds float, float* %tmp19992, i64 1
+ %tmp19994 = getelementptr inbounds float, float* %tmp19993, i64 1
+ %tmp19995 = getelementptr inbounds float, float* %tmp19994, i64 1
+ %tmp19996 = getelementptr inbounds float, float* %tmp19995, i64 1
+ %tmp19997 = getelementptr inbounds float, float* %tmp19996, i64 1
+ %tmp19998 = getelementptr inbounds float, float* %tmp19997, i64 1
+ %tmp19999 = getelementptr inbounds float, float* %tmp19998, i64 1
+ %tmp20000 = getelementptr inbounds float, float* %tmp19999, i64 1
+ %tmp20001 = getelementptr inbounds float, float* %tmp20000, i64 1
+ %tmp20002 = getelementptr inbounds float, float* %tmp20001, i64 1
+ %tmp20003 = getelementptr inbounds float, float* %tmp20002, i64 1
+ %tmp20004 = getelementptr inbounds float, float* %tmp20003, i64 1
+ %tmp20005 = getelementptr inbounds float, float* %tmp20004, i64 1
+ %tmp20006 = getelementptr inbounds float, float* %tmp20005, i64 1
+ %tmp20007 = getelementptr inbounds float, float* %tmp20006, i64 1
+ %tmp20008 = getelementptr inbounds float, float* %tmp20007, i64 1
+ %tmp20009 = getelementptr inbounds float, float* %tmp20008, i64 1
+ %tmp20010 = getelementptr inbounds float, float* %tmp20009, i64 1
+ %tmp20011 = getelementptr inbounds float, float* %tmp20010, i64 1
+ %tmp20012 = getelementptr inbounds float, float* %tmp20011, i64 1
+ %tmp20013 = getelementptr inbounds float, float* %tmp20012, i64 1
+ %tmp20014 = getelementptr inbounds float, float* %tmp20013, i64 1
+ %tmp20015 = getelementptr inbounds float, float* %tmp20014, i64 1
+ %tmp20016 = getelementptr inbounds float, float* %tmp20015, i64 1
+ %tmp20017 = getelementptr inbounds float, float* %tmp20016, i64 1
+ %tmp20018 = getelementptr inbounds float, float* %tmp20017, i64 1
+ %tmp20019 = getelementptr inbounds float, float* %tmp20018, i64 1
+ %tmp20020 = getelementptr inbounds float, float* %tmp20019, i64 1
+ %tmp20021 = getelementptr inbounds float, float* %tmp20020, i64 1
+ %tmp20022 = getelementptr inbounds float, float* %tmp20021, i64 1
+ %tmp20023 = getelementptr inbounds float, float* %tmp20022, i64 1
+ %tmp20024 = getelementptr inbounds float, float* %tmp20023, i64 1
+ %tmp20025 = getelementptr inbounds float, float* %tmp20024, i64 1
+ %tmp20026 = getelementptr inbounds float, float* %tmp20025, i64 1
+ %tmp20027 = getelementptr inbounds float, float* %tmp20026, i64 1
+ %tmp20028 = getelementptr inbounds float, float* %tmp20027, i64 1
+ %tmp20029 = getelementptr inbounds float, float* %tmp20028, i64 1
+ %tmp20030 = getelementptr inbounds float, float* %tmp20029, i64 1
+ %tmp20031 = getelementptr inbounds float, float* %tmp20030, i64 1
+ %tmp20032 = getelementptr inbounds float, float* %tmp20031, i64 1
+ %tmp20033 = getelementptr inbounds float, float* %tmp20032, i64 1
+ %tmp20034 = getelementptr inbounds float, float* %tmp20033, i64 1
+ %tmp20035 = getelementptr inbounds float, float* %tmp20034, i64 1
+ %tmp20036 = getelementptr inbounds float, float* %tmp20035, i64 1
+ %tmp20037 = getelementptr inbounds float, float* %tmp20036, i64 1
+ %tmp20038 = getelementptr inbounds float, float* %tmp20037, i64 1
+ %tmp20039 = getelementptr inbounds float, float* %tmp20038, i64 1
+ %tmp20040 = getelementptr inbounds float, float* %tmp20039, i64 1
+ %tmp20041 = getelementptr inbounds float, float* %tmp20040, i64 1
+ %tmp20042 = getelementptr inbounds float, float* %tmp20041, i64 1
+ %tmp20043 = getelementptr inbounds float, float* %tmp20042, i64 1
+ %tmp20044 = getelementptr inbounds float, float* %tmp20043, i64 1
+ %tmp20045 = getelementptr inbounds float, float* %tmp20044, i64 1
+ %tmp20046 = getelementptr inbounds float, float* %tmp20045, i64 1
+ %tmp20047 = getelementptr inbounds float, float* %tmp20046, i64 1
+ %tmp20048 = getelementptr inbounds float, float* %tmp20047, i64 1
+ %tmp20049 = getelementptr inbounds float, float* %tmp20048, i64 1
+ %tmp20050 = getelementptr inbounds float, float* %tmp20049, i64 1
+ %tmp20051 = getelementptr inbounds float, float* %tmp20050, i64 1
+ %tmp20052 = getelementptr inbounds float, float* %tmp20051, i64 1
+ %tmp20053 = getelementptr inbounds float, float* %tmp20052, i64 1
+ %tmp20054 = getelementptr inbounds float, float* %tmp20053, i64 1
+ %tmp20055 = getelementptr inbounds float, float* %tmp20054, i64 1
+ %tmp20056 = getelementptr inbounds float, float* %tmp20055, i64 1
+ %tmp20057 = getelementptr inbounds float, float* %tmp20056, i64 1
+ %tmp20058 = getelementptr inbounds float, float* %tmp20057, i64 1
+ %tmp20059 = getelementptr inbounds float, float* %tmp20058, i64 1
+ %tmp20060 = getelementptr inbounds float, float* %tmp20059, i64 1
+ %tmp20061 = getelementptr inbounds float, float* %tmp20060, i64 1
+ %tmp20062 = getelementptr inbounds float, float* %tmp20061, i64 1
+ %tmp20063 = getelementptr inbounds float, float* %tmp20062, i64 1
+ %tmp20064 = getelementptr inbounds float, float* %tmp20063, i64 1
+ %tmp20065 = getelementptr inbounds float, float* %tmp20064, i64 1
+ %tmp20066 = getelementptr inbounds float, float* %tmp20065, i64 1
+ %tmp20067 = getelementptr inbounds float, float* %tmp20066, i64 1
+ %tmp20068 = getelementptr inbounds float, float* %tmp20067, i64 1
+ %tmp20069 = getelementptr inbounds float, float* %tmp20068, i64 1
+ %tmp20070 = getelementptr inbounds float, float* %tmp20069, i64 1
+ %tmp20071 = getelementptr inbounds float, float* %tmp20070, i64 1
+ %tmp20072 = getelementptr inbounds float, float* %tmp20071, i64 1
+ %tmp20073 = getelementptr inbounds float, float* %tmp20072, i64 1
+ %tmp20074 = getelementptr inbounds float, float* %tmp20073, i64 1
+ %tmp20075 = getelementptr inbounds float, float* %tmp20074, i64 1
+ %tmp20076 = getelementptr inbounds float, float* %tmp20075, i64 1
+ %tmp20077 = getelementptr inbounds float, float* %tmp20076, i64 1
+ %tmp20078 = getelementptr inbounds float, float* %tmp20077, i64 1
+ %tmp20079 = getelementptr inbounds float, float* %tmp20078, i64 1
+ %tmp20080 = getelementptr inbounds float, float* %tmp20079, i64 1
+ %tmp20081 = getelementptr inbounds float, float* %tmp20080, i64 1
+ %tmp20082 = getelementptr inbounds float, float* %tmp20081, i64 1
+ %tmp20083 = getelementptr inbounds float, float* %tmp20082, i64 1
+ %tmp20084 = getelementptr inbounds float, float* %tmp20083, i64 1
+ %tmp20085 = getelementptr inbounds float, float* %tmp20084, i64 1
+ %tmp20086 = getelementptr inbounds float, float* %tmp20085, i64 1
+ %tmp20087 = getelementptr inbounds float, float* %tmp20086, i64 1
+ %tmp20088 = getelementptr inbounds float, float* %tmp20087, i64 1
+ %tmp20089 = getelementptr inbounds float, float* %tmp20088, i64 1
+ %tmp20090 = getelementptr inbounds float, float* %tmp20089, i64 1
+ %tmp20091 = getelementptr inbounds float, float* %tmp20090, i64 1
+ %tmp20092 = getelementptr inbounds float, float* %tmp20091, i64 1
+ %tmp20093 = getelementptr inbounds float, float* %tmp20092, i64 1
+ %tmp20094 = getelementptr inbounds float, float* %tmp20093, i64 1
+ %tmp20095 = getelementptr inbounds float, float* %tmp20094, i64 1
+ %tmp20096 = getelementptr inbounds float, float* %tmp20095, i64 1
+ %tmp20097 = getelementptr inbounds float, float* %tmp20096, i64 1
+ %tmp20098 = getelementptr inbounds float, float* %tmp20097, i64 1
+ %tmp20099 = getelementptr inbounds float, float* %tmp20098, i64 1
+ %tmp20100 = getelementptr inbounds float, float* %tmp20099, i64 1
+ %tmp20101 = getelementptr inbounds float, float* %tmp20100, i64 1
+ %tmp20102 = getelementptr inbounds float, float* %tmp20101, i64 1
+ %tmp20103 = getelementptr inbounds float, float* %tmp20102, i64 1
+ %tmp20104 = getelementptr inbounds float, float* %tmp20103, i64 1
+ %tmp20105 = getelementptr inbounds float, float* %tmp20104, i64 1
+ %tmp20106 = getelementptr inbounds float, float* %tmp20105, i64 1
+ %tmp20107 = getelementptr inbounds float, float* %tmp20106, i64 1
+ %tmp20108 = getelementptr inbounds float, float* %tmp20107, i64 1
+ %tmp20109 = getelementptr inbounds float, float* %tmp20108, i64 1
+ %tmp20110 = getelementptr inbounds float, float* %tmp20109, i64 1
+ %tmp20111 = getelementptr inbounds float, float* %tmp20110, i64 1
+ %tmp20112 = getelementptr inbounds float, float* %tmp20111, i64 1
+ %tmp20113 = getelementptr inbounds float, float* %tmp20112, i64 1
+ %tmp20114 = getelementptr inbounds float, float* %tmp20113, i64 1
+ %tmp20115 = getelementptr inbounds float, float* %tmp20114, i64 1
+ %tmp20116 = getelementptr inbounds float, float* %tmp20115, i64 1
+ %tmp20117 = getelementptr inbounds float, float* %tmp20116, i64 1
+ %tmp20118 = getelementptr inbounds float, float* %tmp20117, i64 1
+ %tmp20119 = getelementptr inbounds float, float* %tmp20118, i64 1
+ %tmp20120 = getelementptr inbounds float, float* %tmp20119, i64 1
+ %tmp20121 = getelementptr inbounds float, float* %tmp20120, i64 1
+ %tmp20122 = getelementptr inbounds float, float* %tmp20121, i64 1
+ %tmp20123 = getelementptr inbounds float, float* %tmp20122, i64 1
+ %tmp20124 = getelementptr inbounds float, float* %tmp20123, i64 1
+ %tmp20125 = getelementptr inbounds float, float* %tmp20124, i64 1
+ %tmp20126 = getelementptr inbounds float, float* %tmp20125, i64 1
+ %tmp20127 = getelementptr inbounds float, float* %tmp20126, i64 1
+ %tmp20128 = getelementptr inbounds float, float* %tmp20127, i64 1
+ %tmp20129 = getelementptr inbounds float, float* %tmp20128, i64 1
+ %tmp20130 = getelementptr inbounds float, float* %tmp20129, i64 1
+ %tmp20131 = getelementptr inbounds float, float* %tmp20130, i64 1
+ %tmp20132 = getelementptr inbounds float, float* %tmp20131, i64 1
+ %tmp20133 = getelementptr inbounds float, float* %tmp20132, i64 1
+ %tmp20134 = getelementptr inbounds float, float* %tmp20133, i64 1
+ %tmp20135 = getelementptr inbounds float, float* %tmp20134, i64 1
+ %tmp20136 = getelementptr inbounds float, float* %tmp20135, i64 1
+ %tmp20137 = getelementptr inbounds float, float* %tmp20136, i64 1
+ %tmp20138 = getelementptr inbounds float, float* %tmp20137, i64 1
+ %tmp20139 = getelementptr inbounds float, float* %tmp20138, i64 1
+ %tmp20140 = getelementptr inbounds float, float* %tmp20139, i64 1
+ %tmp20141 = getelementptr inbounds float, float* %tmp20140, i64 1
+ %tmp20142 = getelementptr inbounds float, float* %tmp20141, i64 1
+ %tmp20143 = getelementptr inbounds float, float* %tmp20142, i64 1
+ %tmp20144 = getelementptr inbounds float, float* %tmp20143, i64 1
+ %tmp20145 = getelementptr inbounds float, float* %tmp20144, i64 1
+ %tmp20146 = getelementptr inbounds float, float* %tmp20145, i64 1
+ %tmp20147 = getelementptr inbounds float, float* %tmp20146, i64 1
+ %tmp20148 = getelementptr inbounds float, float* %tmp20147, i64 1
+ %tmp20149 = getelementptr inbounds float, float* %tmp20148, i64 1
+ %tmp20150 = getelementptr inbounds float, float* %tmp20149, i64 1
+ %tmp20151 = getelementptr inbounds float, float* %tmp20150, i64 1
+ %tmp20152 = getelementptr inbounds float, float* %tmp20151, i64 1
+ %tmp20153 = getelementptr inbounds float, float* %tmp20152, i64 1
+ %tmp20154 = getelementptr inbounds float, float* %tmp20153, i64 1
+ %tmp20155 = getelementptr inbounds float, float* %tmp20154, i64 1
+ %tmp20156 = getelementptr inbounds float, float* %tmp20155, i64 1
+ %tmp20157 = getelementptr inbounds float, float* %tmp20156, i64 1
+ %tmp20158 = getelementptr inbounds float, float* %tmp20157, i64 1
+ %tmp20159 = getelementptr inbounds float, float* %tmp20158, i64 1
+ %tmp20160 = getelementptr inbounds float, float* %tmp20159, i64 1
+ %tmp20161 = getelementptr inbounds float, float* %tmp20160, i64 1
+ %tmp20162 = getelementptr inbounds float, float* %tmp20161, i64 1
+ %tmp20163 = getelementptr inbounds float, float* %tmp20162, i64 1
+ %tmp20164 = getelementptr inbounds float, float* %tmp20163, i64 1
+ %tmp20165 = getelementptr inbounds float, float* %tmp20164, i64 1
+ %tmp20166 = getelementptr inbounds float, float* %tmp20165, i64 1
+ %tmp20167 = getelementptr inbounds float, float* %tmp20166, i64 1
+ %tmp20168 = getelementptr inbounds float, float* %tmp20167, i64 1
+ %tmp20169 = getelementptr inbounds float, float* %tmp20168, i64 1
+ %tmp20170 = getelementptr inbounds float, float* %tmp20169, i64 1
+ %tmp20171 = getelementptr inbounds float, float* %tmp20170, i64 1
+ %tmp20172 = getelementptr inbounds float, float* %tmp20171, i64 1
+ %tmp20173 = getelementptr inbounds float, float* %tmp20172, i64 1
+ %tmp20174 = getelementptr inbounds float, float* %tmp20173, i64 1
+ %tmp20175 = getelementptr inbounds float, float* %tmp20174, i64 1
+ %tmp20176 = getelementptr inbounds float, float* %tmp20175, i64 1
+ %tmp20177 = getelementptr inbounds float, float* %tmp20176, i64 1
+ %tmp20178 = getelementptr inbounds float, float* %tmp20177, i64 1
+ %tmp20179 = getelementptr inbounds float, float* %tmp20178, i64 1
+ %tmp20180 = getelementptr inbounds float, float* %tmp20179, i64 1
+ %tmp20181 = getelementptr inbounds float, float* %tmp20180, i64 1
+ %tmp20182 = getelementptr inbounds float, float* %tmp20181, i64 1
+ %tmp20183 = getelementptr inbounds float, float* %tmp20182, i64 1
+ %tmp20184 = getelementptr inbounds float, float* %tmp20183, i64 1
+ %tmp20185 = getelementptr inbounds float, float* %tmp20184, i64 1
+ %tmp20186 = getelementptr inbounds float, float* %tmp20185, i64 1
+ %tmp20187 = getelementptr inbounds float, float* %tmp20186, i64 1
+ %tmp20188 = getelementptr inbounds float, float* %tmp20187, i64 1
+ %tmp20189 = getelementptr inbounds float, float* %tmp20188, i64 1
+ %tmp20190 = getelementptr inbounds float, float* %tmp20189, i64 1
+ %tmp20191 = getelementptr inbounds float, float* %tmp20190, i64 1
+ %tmp20192 = getelementptr inbounds float, float* %tmp20191, i64 1
+ %tmp20193 = getelementptr inbounds float, float* %tmp20192, i64 1
+ %tmp20194 = getelementptr inbounds float, float* %tmp20193, i64 1
+ %tmp20195 = getelementptr inbounds float, float* %tmp20194, i64 1
+ %tmp20196 = getelementptr inbounds float, float* %tmp20195, i64 1
+ %tmp20197 = getelementptr inbounds float, float* %tmp20196, i64 1
+ %tmp20198 = getelementptr inbounds float, float* %tmp20197, i64 1
+ %tmp20199 = getelementptr inbounds float, float* %tmp20198, i64 1
+ %tmp20200 = getelementptr inbounds float, float* %tmp20199, i64 1
+ %tmp20201 = getelementptr inbounds float, float* %tmp20200, i64 1
+ %tmp20202 = getelementptr inbounds float, float* %tmp20201, i64 1
+ %tmp20203 = getelementptr inbounds float, float* %tmp20202, i64 1
+ %tmp20204 = getelementptr inbounds float, float* %tmp20203, i64 1
+ %tmp20205 = getelementptr inbounds float, float* %tmp20204, i64 1
+ %tmp20206 = getelementptr inbounds float, float* %tmp20205, i64 1
+ %tmp20207 = getelementptr inbounds float, float* %tmp20206, i64 1
+ %tmp20208 = getelementptr inbounds float, float* %tmp20207, i64 1
+ %tmp20209 = getelementptr inbounds float, float* %tmp20208, i64 1
+ %tmp20210 = getelementptr inbounds float, float* %tmp20209, i64 1
+ %tmp20211 = getelementptr inbounds float, float* %tmp20210, i64 1
+ %tmp20212 = getelementptr inbounds float, float* %tmp20211, i64 1
+ %tmp20213 = getelementptr inbounds float, float* %tmp20212, i64 1
+ %tmp20214 = getelementptr inbounds float, float* %tmp20213, i64 1
+ %tmp20215 = getelementptr inbounds float, float* %tmp20214, i64 1
+ %tmp20216 = getelementptr inbounds float, float* %tmp20215, i64 1
+ %tmp20217 = getelementptr inbounds float, float* %tmp20216, i64 1
+ %tmp20218 = getelementptr inbounds float, float* %tmp20217, i64 1
+ %tmp20219 = getelementptr inbounds float, float* %tmp20218, i64 1
+ %tmp20220 = getelementptr inbounds float, float* %tmp20219, i64 1
+ %tmp20221 = getelementptr inbounds float, float* %tmp20220, i64 1
+ %tmp20222 = getelementptr inbounds float, float* %tmp20221, i64 1
+ %tmp20223 = getelementptr inbounds float, float* %tmp20222, i64 1
+ %tmp20224 = getelementptr inbounds float, float* %tmp20223, i64 1
+ %tmp20225 = getelementptr inbounds float, float* %tmp20224, i64 1
+ %tmp20226 = getelementptr inbounds float, float* %tmp20225, i64 1
+ %tmp20227 = getelementptr inbounds float, float* %tmp20226, i64 1
+ %tmp20228 = getelementptr inbounds float, float* %tmp20227, i64 1
+ %tmp20229 = getelementptr inbounds float, float* %tmp20228, i64 1
+ %tmp20230 = getelementptr inbounds float, float* %tmp20229, i64 1
+ %tmp20231 = getelementptr inbounds float, float* %tmp20230, i64 1
+ %tmp20232 = getelementptr inbounds float, float* %tmp20231, i64 1
+ %tmp20233 = getelementptr inbounds float, float* %tmp20232, i64 1
+ %tmp20234 = getelementptr inbounds float, float* %tmp20233, i64 1
+ %tmp20235 = getelementptr inbounds float, float* %tmp20234, i64 1
+ %tmp20236 = getelementptr inbounds float, float* %tmp20235, i64 1
+ %tmp20237 = getelementptr inbounds float, float* %tmp20236, i64 1
+ %tmp20238 = getelementptr inbounds float, float* %tmp20237, i64 1
+ %tmp20239 = getelementptr inbounds float, float* %tmp20238, i64 1
+ %tmp20240 = getelementptr inbounds float, float* %tmp20239, i64 1
+ %tmp20241 = getelementptr inbounds float, float* %tmp20240, i64 1
+ %tmp20242 = getelementptr inbounds float, float* %tmp20241, i64 1
+ %tmp20243 = getelementptr inbounds float, float* %tmp20242, i64 1
+ %tmp20244 = getelementptr inbounds float, float* %tmp20243, i64 1
+ %tmp20245 = getelementptr inbounds float, float* %tmp20244, i64 1
+ %tmp20246 = getelementptr inbounds float, float* %tmp20245, i64 1
+ %tmp20247 = getelementptr inbounds float, float* %tmp20246, i64 1
+ %tmp20248 = getelementptr inbounds float, float* %tmp20247, i64 1
+ %tmp20249 = getelementptr inbounds float, float* %tmp20248, i64 1
+ %tmp20250 = getelementptr inbounds float, float* %tmp20249, i64 1
+ %tmp20251 = getelementptr inbounds float, float* %tmp20250, i64 1
+ %tmp20252 = getelementptr inbounds float, float* %tmp20251, i64 1
+ %tmp20253 = getelementptr inbounds float, float* %tmp20252, i64 1
+ %tmp20254 = getelementptr inbounds float, float* %tmp20253, i64 1
+ %tmp20255 = getelementptr inbounds float, float* %tmp20254, i64 1
+ %tmp20256 = getelementptr inbounds float, float* %tmp20255, i64 1
+ %tmp20257 = getelementptr inbounds float, float* %tmp20256, i64 1
+ %tmp20258 = getelementptr inbounds float, float* %tmp20257, i64 1
+ %tmp20259 = getelementptr inbounds float, float* %tmp20258, i64 1
+ %tmp20260 = getelementptr inbounds float, float* %tmp20259, i64 1
+ %tmp20261 = getelementptr inbounds float, float* %tmp20260, i64 1
+ %tmp20262 = getelementptr inbounds float, float* %tmp20261, i64 1
+ %tmp20263 = getelementptr inbounds float, float* %tmp20262, i64 1
+ %tmp20264 = getelementptr inbounds float, float* %tmp20263, i64 1
+ %tmp20265 = getelementptr inbounds float, float* %tmp20264, i64 1
+ %tmp20266 = getelementptr inbounds float, float* %tmp20265, i64 1
+ %tmp20267 = getelementptr inbounds float, float* %tmp20266, i64 1
+ %tmp20268 = getelementptr inbounds float, float* %tmp20267, i64 1
+ %tmp20269 = getelementptr inbounds float, float* %tmp20268, i64 1
+ %tmp20270 = getelementptr inbounds float, float* %tmp20269, i64 1
+ %tmp20271 = getelementptr inbounds float, float* %tmp20270, i64 1
+ %tmp20272 = getelementptr inbounds float, float* %tmp20271, i64 1
+ %tmp20273 = getelementptr inbounds float, float* %tmp20272, i64 1
+ %tmp20274 = getelementptr inbounds float, float* %tmp20273, i64 1
+ %tmp20275 = getelementptr inbounds float, float* %tmp20274, i64 1
+ %tmp20276 = getelementptr inbounds float, float* %tmp20275, i64 1
+ %tmp20277 = getelementptr inbounds float, float* %tmp20276, i64 1
+ %tmp20278 = getelementptr inbounds float, float* %tmp20277, i64 1
+ %tmp20279 = getelementptr inbounds float, float* %tmp20278, i64 1
+ %tmp20280 = getelementptr inbounds float, float* %tmp20279, i64 1
+ %tmp20281 = getelementptr inbounds float, float* %tmp20280, i64 1
+ %tmp20282 = getelementptr inbounds float, float* %tmp20281, i64 1
+ %tmp20283 = getelementptr inbounds float, float* %tmp20282, i64 1
+ %tmp20284 = getelementptr inbounds float, float* %tmp20283, i64 1
+ %tmp20285 = getelementptr inbounds float, float* %tmp20284, i64 1
+ %tmp20286 = getelementptr inbounds float, float* %tmp20285, i64 1
+ %tmp20287 = getelementptr inbounds float, float* %tmp20286, i64 1
+ %tmp20288 = getelementptr inbounds float, float* %tmp20287, i64 1
+ %tmp20289 = getelementptr inbounds float, float* %tmp20288, i64 1
+ %tmp20290 = getelementptr inbounds float, float* %tmp20289, i64 1
+ %tmp20291 = getelementptr inbounds float, float* %tmp20290, i64 1
+ %tmp20292 = getelementptr inbounds float, float* %tmp20291, i64 1
+ %tmp20293 = getelementptr inbounds float, float* %tmp20292, i64 1
+ %tmp20294 = getelementptr inbounds float, float* %tmp20293, i64 1
+ %tmp20295 = getelementptr inbounds float, float* %tmp20294, i64 1
+ %tmp20296 = getelementptr inbounds float, float* %tmp20295, i64 1
+ %tmp20297 = getelementptr inbounds float, float* %tmp20296, i64 1
+ %tmp20298 = getelementptr inbounds float, float* %tmp20297, i64 1
+ %tmp20299 = getelementptr inbounds float, float* %tmp20298, i64 1
+ %tmp20300 = getelementptr inbounds float, float* %tmp20299, i64 1
+ %tmp20301 = getelementptr inbounds float, float* %tmp20300, i64 1
+ %tmp20302 = getelementptr inbounds float, float* %tmp20301, i64 1
+ %tmp20303 = getelementptr inbounds float, float* %tmp20302, i64 1
+ %tmp20304 = getelementptr inbounds float, float* %tmp20303, i64 1
+ %tmp20305 = getelementptr inbounds float, float* %tmp20304, i64 1
+ %tmp20306 = getelementptr inbounds float, float* %tmp20305, i64 1
+ %tmp20307 = getelementptr inbounds float, float* %tmp20306, i64 1
+ %tmp20308 = getelementptr inbounds float, float* %tmp20307, i64 1
+ %tmp20309 = getelementptr inbounds float, float* %tmp20308, i64 1
+ %tmp20310 = getelementptr inbounds float, float* %tmp20309, i64 1
+ %tmp20311 = getelementptr inbounds float, float* %tmp20310, i64 1
+ %tmp20312 = getelementptr inbounds float, float* %tmp20311, i64 1
+ %tmp20313 = getelementptr inbounds float, float* %tmp20312, i64 1
+ %tmp20314 = getelementptr inbounds float, float* %tmp20313, i64 1
+ %tmp20315 = getelementptr inbounds float, float* %tmp20314, i64 1
+ %tmp20316 = getelementptr inbounds float, float* %tmp20315, i64 1
+ %tmp20317 = getelementptr inbounds float, float* %tmp20316, i64 1
+ %tmp20318 = getelementptr inbounds float, float* %tmp20317, i64 1
+ %tmp20319 = getelementptr inbounds float, float* %tmp20318, i64 1
+ %tmp20320 = getelementptr inbounds float, float* %tmp20319, i64 1
+ %tmp20321 = getelementptr inbounds float, float* %tmp20320, i64 1
+ %tmp20322 = getelementptr inbounds float, float* %tmp20321, i64 1
+ %tmp20323 = getelementptr inbounds float, float* %tmp20322, i64 1
+ %tmp20324 = getelementptr inbounds float, float* %tmp20323, i64 1
+ %tmp20325 = getelementptr inbounds float, float* %tmp20324, i64 1
+ %tmp20326 = getelementptr inbounds float, float* %tmp20325, i64 1
+ %tmp20327 = getelementptr inbounds float, float* %tmp20326, i64 1
+ %tmp20328 = getelementptr inbounds float, float* %tmp20327, i64 1
+ %tmp20329 = getelementptr inbounds float, float* %tmp20328, i64 1
+ %tmp20330 = getelementptr inbounds float, float* %tmp20329, i64 1
+ %tmp20331 = getelementptr inbounds float, float* %tmp20330, i64 1
+ %tmp20332 = getelementptr inbounds float, float* %tmp20331, i64 1
+ %tmp20333 = getelementptr inbounds float, float* %tmp20332, i64 1
+ %tmp20334 = getelementptr inbounds float, float* %tmp20333, i64 1
+ %tmp20335 = getelementptr inbounds float, float* %tmp20334, i64 1
+ %tmp20336 = getelementptr inbounds float, float* %tmp20335, i64 1
+ %tmp20337 = getelementptr inbounds float, float* %tmp20336, i64 1
+ %tmp20338 = getelementptr inbounds float, float* %tmp20337, i64 1
+ %tmp20339 = getelementptr inbounds float, float* %tmp20338, i64 1
+ %tmp20340 = getelementptr inbounds float, float* %tmp20339, i64 1
+ %tmp20341 = getelementptr inbounds float, float* %tmp20340, i64 1
+ %tmp20342 = getelementptr inbounds float, float* %tmp20341, i64 1
+ %tmp20343 = getelementptr inbounds float, float* %tmp20342, i64 1
+ %tmp20344 = getelementptr inbounds float, float* %tmp20343, i64 1
+ %tmp20345 = getelementptr inbounds float, float* %tmp20344, i64 1
+ %tmp20346 = getelementptr inbounds float, float* %tmp20345, i64 1
+ %tmp20347 = getelementptr inbounds float, float* %tmp20346, i64 1
+ %tmp20348 = getelementptr inbounds float, float* %tmp20347, i64 1
+ %tmp20349 = getelementptr inbounds float, float* %tmp20348, i64 1
+ %tmp20350 = getelementptr inbounds float, float* %tmp20349, i64 1
+ %tmp20351 = getelementptr inbounds float, float* %tmp20350, i64 1
+ %tmp20352 = getelementptr inbounds float, float* %tmp20351, i64 1
+ %tmp20353 = getelementptr inbounds float, float* %tmp20352, i64 1
+ %tmp20354 = getelementptr inbounds float, float* %tmp20353, i64 1
+ %tmp20355 = getelementptr inbounds float, float* %tmp20354, i64 1
+ %tmp20356 = getelementptr inbounds float, float* %tmp20355, i64 1
+ %tmp20357 = getelementptr inbounds float, float* %tmp20356, i64 1
+ %tmp20358 = getelementptr inbounds float, float* %tmp20357, i64 1
+ %tmp20359 = getelementptr inbounds float, float* %tmp20358, i64 1
+ %tmp20360 = getelementptr inbounds float, float* %tmp20359, i64 1
+ %tmp20361 = getelementptr inbounds float, float* %tmp20360, i64 1
+ %tmp20362 = getelementptr inbounds float, float* %tmp20361, i64 1
+ %tmp20363 = getelementptr inbounds float, float* %tmp20362, i64 1
+ %tmp20364 = getelementptr inbounds float, float* %tmp20363, i64 1
+ %tmp20365 = getelementptr inbounds float, float* %tmp20364, i64 1
+ %tmp20366 = getelementptr inbounds float, float* %tmp20365, i64 1
+ %tmp20367 = getelementptr inbounds float, float* %tmp20366, i64 1
+ %tmp20368 = getelementptr inbounds float, float* %tmp20367, i64 1
+ %tmp20369 = getelementptr inbounds float, float* %tmp20368, i64 1
+ %tmp20370 = getelementptr inbounds float, float* %tmp20369, i64 1
+ %tmp20371 = getelementptr inbounds float, float* %tmp20370, i64 1
+ %tmp20372 = getelementptr inbounds float, float* %tmp20371, i64 1
+ %tmp20373 = getelementptr inbounds float, float* %tmp20372, i64 1
+ %tmp20374 = getelementptr inbounds float, float* %tmp20373, i64 1
+ %tmp20375 = getelementptr inbounds float, float* %tmp20374, i64 1
+ %tmp20376 = getelementptr inbounds float, float* %tmp20375, i64 1
+ %tmp20377 = getelementptr inbounds float, float* %tmp20376, i64 1
+ %tmp20378 = getelementptr inbounds float, float* %tmp20377, i64 1
+ %tmp20379 = getelementptr inbounds float, float* %tmp20378, i64 1
+ %tmp20380 = getelementptr inbounds float, float* %tmp20379, i64 1
+ %tmp20381 = getelementptr inbounds float, float* %tmp20380, i64 1
+ %tmp20382 = getelementptr inbounds float, float* %tmp20381, i64 1
+ %tmp20383 = getelementptr inbounds float, float* %tmp20382, i64 1
+ %tmp20384 = getelementptr inbounds float, float* %tmp20383, i64 1
+ %tmp20385 = getelementptr inbounds float, float* %tmp20384, i64 1
+ %tmp20386 = getelementptr inbounds float, float* %tmp20385, i64 1
+ %tmp20387 = getelementptr inbounds float, float* %tmp20386, i64 1
+ %tmp20388 = getelementptr inbounds float, float* %tmp20387, i64 1
+ %tmp20389 = getelementptr inbounds float, float* %tmp20388, i64 1
+ %tmp20390 = getelementptr inbounds float, float* %tmp20389, i64 1
+ %tmp20391 = getelementptr inbounds float, float* %tmp20390, i64 1
+ %tmp20392 = getelementptr inbounds float, float* %tmp20391, i64 1
+ %tmp20393 = getelementptr inbounds float, float* %tmp20392, i64 1
+ %tmp20394 = getelementptr inbounds float, float* %tmp20393, i64 1
+ %tmp20395 = getelementptr inbounds float, float* %tmp20394, i64 1
+ %tmp20396 = getelementptr inbounds float, float* %tmp20395, i64 1
+ %tmp20397 = getelementptr inbounds float, float* %tmp20396, i64 1
+ %tmp20398 = getelementptr inbounds float, float* %tmp20397, i64 1
+ %tmp20399 = getelementptr inbounds float, float* %tmp20398, i64 1
+ %tmp20400 = getelementptr inbounds float, float* %tmp20399, i64 1
+ %tmp20401 = getelementptr inbounds float, float* %tmp20400, i64 1
+ %tmp20402 = getelementptr inbounds float, float* %tmp20401, i64 1
+ %tmp20403 = getelementptr inbounds float, float* %tmp20402, i64 1
+ %tmp20404 = getelementptr inbounds float, float* %tmp20403, i64 1
+ %tmp20405 = getelementptr inbounds float, float* %tmp20404, i64 1
+ %tmp20406 = getelementptr inbounds float, float* %tmp20405, i64 1
+ %tmp20407 = getelementptr inbounds float, float* %tmp20406, i64 1
+ %tmp20408 = getelementptr inbounds float, float* %tmp20407, i64 1
+ %tmp20409 = getelementptr inbounds float, float* %tmp20408, i64 1
+ %tmp20410 = getelementptr inbounds float, float* %tmp20409, i64 1
+ %tmp20411 = getelementptr inbounds float, float* %tmp20410, i64 1
+ %tmp20412 = getelementptr inbounds float, float* %tmp20411, i64 1
+ %tmp20413 = getelementptr inbounds float, float* %tmp20412, i64 1
+ %tmp20414 = getelementptr inbounds float, float* %tmp20413, i64 1
+ %tmp20415 = getelementptr inbounds float, float* %tmp20414, i64 1
+ %tmp20416 = getelementptr inbounds float, float* %tmp20415, i64 1
+ %tmp20417 = getelementptr inbounds float, float* %tmp20416, i64 1
+ %tmp20418 = getelementptr inbounds float, float* %tmp20417, i64 1
+ %tmp20419 = getelementptr inbounds float, float* %tmp20418, i64 1
+ %tmp20420 = getelementptr inbounds float, float* %tmp20419, i64 1
+ %tmp20421 = getelementptr inbounds float, float* %tmp20420, i64 1
+ %tmp20422 = getelementptr inbounds float, float* %tmp20421, i64 1
+ %tmp20423 = getelementptr inbounds float, float* %tmp20422, i64 1
+ %tmp20424 = getelementptr inbounds float, float* %tmp20423, i64 1
+ %tmp20425 = getelementptr inbounds float, float* %tmp20424, i64 1
+ %tmp20426 = getelementptr inbounds float, float* %tmp20425, i64 1
+ %tmp20427 = getelementptr inbounds float, float* %tmp20426, i64 1
+ %tmp20428 = getelementptr inbounds float, float* %tmp20427, i64 1
+ %tmp20429 = getelementptr inbounds float, float* %tmp20428, i64 1
+ %tmp20430 = getelementptr inbounds float, float* %tmp20429, i64 1
+ %tmp20431 = getelementptr inbounds float, float* %tmp20430, i64 1
+ %tmp20432 = getelementptr inbounds float, float* %tmp20431, i64 1
+ %tmp20433 = getelementptr inbounds float, float* %tmp20432, i64 1
+ %tmp20434 = getelementptr inbounds float, float* %tmp20433, i64 1
+ %tmp20435 = getelementptr inbounds float, float* %tmp20434, i64 1
+ %tmp20436 = getelementptr inbounds float, float* %tmp20435, i64 1
+ %tmp20437 = getelementptr inbounds float, float* %tmp20436, i64 1
+ %tmp20438 = getelementptr inbounds float, float* %tmp20437, i64 1
+ %tmp20439 = getelementptr inbounds float, float* %tmp20438, i64 1
+ %tmp20440 = getelementptr inbounds float, float* %tmp20439, i64 1
+ %tmp20441 = getelementptr inbounds float, float* %tmp20440, i64 1
+ %tmp20442 = getelementptr inbounds float, float* %tmp20441, i64 1
+ %tmp20443 = getelementptr inbounds float, float* %tmp20442, i64 1
+ %tmp20444 = getelementptr inbounds float, float* %tmp20443, i64 1
+ %tmp20445 = getelementptr inbounds float, float* %tmp20444, i64 1
+ %tmp20446 = getelementptr inbounds float, float* %tmp20445, i64 1
+ %tmp20447 = getelementptr inbounds float, float* %tmp20446, i64 1
+ %tmp20448 = getelementptr inbounds float, float* %tmp20447, i64 1
+ %tmp20449 = getelementptr inbounds float, float* %tmp20448, i64 1
+ %tmp20450 = getelementptr inbounds float, float* %tmp20449, i64 1
+ %tmp20451 = getelementptr inbounds float, float* %tmp20450, i64 1
+ %tmp20452 = getelementptr inbounds float, float* %tmp20451, i64 1
+ %tmp20453 = getelementptr inbounds float, float* %tmp20452, i64 1
+ %tmp20454 = getelementptr inbounds float, float* %tmp20453, i64 1
+ %tmp20455 = getelementptr inbounds float, float* %tmp20454, i64 1
+ %tmp20456 = getelementptr inbounds float, float* %tmp20455, i64 1
+ %tmp20457 = getelementptr inbounds float, float* %tmp20456, i64 1
+ %tmp20458 = getelementptr inbounds float, float* %tmp20457, i64 1
+ %tmp20459 = getelementptr inbounds float, float* %tmp20458, i64 1
+ %tmp20460 = getelementptr inbounds float, float* %tmp20459, i64 1
+ %tmp20461 = getelementptr inbounds float, float* %tmp20460, i64 1
+ %tmp20462 = getelementptr inbounds float, float* %tmp20461, i64 1
+ %tmp20463 = getelementptr inbounds float, float* %tmp20462, i64 1
+ %tmp20464 = getelementptr inbounds float, float* %tmp20463, i64 1
+ %tmp20465 = getelementptr inbounds float, float* %tmp20464, i64 1
+ %tmp20466 = getelementptr inbounds float, float* %tmp20465, i64 1
+ %tmp20467 = getelementptr inbounds float, float* %tmp20466, i64 1
+ %tmp20468 = getelementptr inbounds float, float* %tmp20467, i64 1
+ %tmp20469 = getelementptr inbounds float, float* %tmp20468, i64 1
+ %tmp20470 = getelementptr inbounds float, float* %tmp20469, i64 1
+ %tmp20471 = getelementptr inbounds float, float* %tmp20470, i64 1
+ %tmp20472 = getelementptr inbounds float, float* %tmp20471, i64 1
+ %tmp20473 = getelementptr inbounds float, float* %tmp20472, i64 1
+ %tmp20474 = getelementptr inbounds float, float* %tmp20473, i64 1
+ %tmp20475 = getelementptr inbounds float, float* %tmp20474, i64 1
+ %tmp20476 = getelementptr inbounds float, float* %tmp20475, i64 1
+ %tmp20477 = getelementptr inbounds float, float* %tmp20476, i64 1
+ %tmp20478 = getelementptr inbounds float, float* %tmp20477, i64 1
+ %tmp20479 = getelementptr inbounds float, float* %tmp20478, i64 1
+ %tmp20480 = getelementptr inbounds float, float* %tmp20479, i64 1
+ %tmp20481 = getelementptr inbounds float, float* %tmp20480, i64 1
+ %tmp20482 = getelementptr inbounds float, float* %tmp20481, i64 1
+ %tmp20483 = getelementptr inbounds float, float* %tmp20482, i64 1
+ %tmp20484 = getelementptr inbounds float, float* %tmp20483, i64 1
+ %tmp20485 = getelementptr inbounds float, float* %tmp20484, i64 1
+ %tmp20486 = getelementptr inbounds float, float* %tmp20485, i64 1
+ %tmp20487 = getelementptr inbounds float, float* %tmp20486, i64 1
+ %tmp20488 = getelementptr inbounds float, float* %tmp20487, i64 1
+ %tmp20489 = getelementptr inbounds float, float* %tmp20488, i64 1
+ %tmp20490 = getelementptr inbounds float, float* %tmp20489, i64 1
+ %tmp20491 = getelementptr inbounds float, float* %tmp20490, i64 1
+ %tmp20492 = getelementptr inbounds float, float* %tmp20491, i64 1
+ %tmp20493 = getelementptr inbounds float, float* %tmp20492, i64 1
+ %tmp20494 = getelementptr inbounds float, float* %tmp20493, i64 1
+ %tmp20495 = getelementptr inbounds float, float* %tmp20494, i64 1
+ %tmp20496 = getelementptr inbounds float, float* %tmp20495, i64 1
+ %tmp20497 = getelementptr inbounds float, float* %tmp20496, i64 1
+ %tmp20498 = getelementptr inbounds float, float* %tmp20497, i64 1
+ %tmp20499 = getelementptr inbounds float, float* %tmp20498, i64 1
+ %tmp20500 = getelementptr inbounds float, float* %tmp20499, i64 1
+ %tmp20501 = getelementptr inbounds float, float* %tmp20500, i64 1
+ %tmp20502 = getelementptr inbounds float, float* %tmp20501, i64 1
+ %tmp20503 = getelementptr inbounds float, float* %tmp20502, i64 1
+ %tmp20504 = getelementptr inbounds float, float* %tmp20503, i64 1
+ %tmp20505 = getelementptr inbounds float, float* %tmp20504, i64 1
+ %tmp20506 = getelementptr inbounds float, float* %tmp20505, i64 1
+ %tmp20507 = getelementptr inbounds float, float* %tmp20506, i64 1
+ %tmp20508 = getelementptr inbounds float, float* %tmp20507, i64 1
+ %tmp20509 = getelementptr inbounds float, float* %tmp20508, i64 1
+ %tmp20510 = getelementptr inbounds float, float* %tmp20509, i64 1
+ %tmp20511 = getelementptr inbounds float, float* %tmp20510, i64 1
+ %tmp20512 = getelementptr inbounds float, float* %tmp20511, i64 1
+ %tmp20513 = getelementptr inbounds float, float* %tmp20512, i64 1
+ %tmp20514 = getelementptr inbounds float, float* %tmp20513, i64 1
+ %tmp20515 = getelementptr inbounds float, float* %tmp20514, i64 1
+ %tmp20516 = getelementptr inbounds float, float* %tmp20515, i64 1
+ %tmp20517 = getelementptr inbounds float, float* %tmp20516, i64 1
+ %tmp20518 = getelementptr inbounds float, float* %tmp20517, i64 1
+ %tmp20519 = getelementptr inbounds float, float* %tmp20518, i64 1
+ %tmp20520 = getelementptr inbounds float, float* %tmp20519, i64 1
+ %tmp20521 = getelementptr inbounds float, float* %tmp20520, i64 1
+ %tmp20522 = getelementptr inbounds float, float* %tmp20521, i64 1
+ %tmp20523 = getelementptr inbounds float, float* %tmp20522, i64 1
+ %tmp20524 = getelementptr inbounds float, float* %tmp20523, i64 1
+ %tmp20525 = getelementptr inbounds float, float* %tmp20524, i64 1
+ %tmp20526 = getelementptr inbounds float, float* %tmp20525, i64 1
+ %tmp20527 = getelementptr inbounds float, float* %tmp20526, i64 1
+ %tmp20528 = getelementptr inbounds float, float* %tmp20527, i64 1
+ %tmp20529 = getelementptr inbounds float, float* %tmp20528, i64 1
+ %tmp20530 = getelementptr inbounds float, float* %tmp20529, i64 1
+ %tmp20531 = getelementptr inbounds float, float* %tmp20530, i64 1
+ %tmp20532 = getelementptr inbounds float, float* %tmp20531, i64 1
+ %tmp20533 = getelementptr inbounds float, float* %tmp20532, i64 1
+ %tmp20534 = getelementptr inbounds float, float* %tmp20533, i64 1
+ %tmp20535 = getelementptr inbounds float, float* %tmp20534, i64 1
+ %tmp20536 = getelementptr inbounds float, float* %tmp20535, i64 1
+ %tmp20537 = getelementptr inbounds float, float* %tmp20536, i64 1
+ %tmp20538 = getelementptr inbounds float, float* %tmp20537, i64 1
+ %tmp20539 = getelementptr inbounds float, float* %tmp20538, i64 1
+ %tmp20540 = getelementptr inbounds float, float* %tmp20539, i64 1
+ %tmp20541 = getelementptr inbounds float, float* %tmp20540, i64 1
+ %tmp20542 = getelementptr inbounds float, float* %tmp20541, i64 1
+ %tmp20543 = getelementptr inbounds float, float* %tmp20542, i64 1
+ %tmp20544 = getelementptr inbounds float, float* %tmp20543, i64 1
+ %tmp20545 = getelementptr inbounds float, float* %tmp20544, i64 1
+ %tmp20546 = getelementptr inbounds float, float* %tmp20545, i64 1
+ %tmp20547 = getelementptr inbounds float, float* %tmp20546, i64 1
+ %tmp20548 = getelementptr inbounds float, float* %tmp20547, i64 1
+ %tmp20549 = getelementptr inbounds float, float* %tmp20548, i64 1
+ %tmp20550 = getelementptr inbounds float, float* %tmp20549, i64 1
+ %tmp20551 = getelementptr inbounds float, float* %tmp20550, i64 1
+ %tmp20552 = getelementptr inbounds float, float* %tmp20551, i64 1
+ %tmp20553 = getelementptr inbounds float, float* %tmp20552, i64 1
+ %tmp20554 = getelementptr inbounds float, float* %tmp20553, i64 1
+ %tmp20555 = getelementptr inbounds float, float* %tmp20554, i64 1
+ %tmp20556 = getelementptr inbounds float, float* %tmp20555, i64 1
+ %tmp20557 = getelementptr inbounds float, float* %tmp20556, i64 1
+ %tmp20558 = getelementptr inbounds float, float* %tmp20557, i64 1
+ %tmp20559 = getelementptr inbounds float, float* %tmp20558, i64 1
+ %tmp20560 = getelementptr inbounds float, float* %tmp20559, i64 1
+ %tmp20561 = getelementptr inbounds float, float* %tmp20560, i64 1
+ %tmp20562 = getelementptr inbounds float, float* %tmp20561, i64 1
+ %tmp20563 = getelementptr inbounds float, float* %tmp20562, i64 1
+ %tmp20564 = getelementptr inbounds float, float* %tmp20563, i64 1
+ %tmp20565 = getelementptr inbounds float, float* %tmp20564, i64 1
+ %tmp20566 = getelementptr inbounds float, float* %tmp20565, i64 1
+ %tmp20567 = getelementptr inbounds float, float* %tmp20566, i64 1
+ %tmp20568 = getelementptr inbounds float, float* %tmp20567, i64 1
+ %tmp20569 = getelementptr inbounds float, float* %tmp20568, i64 1
+ %tmp20570 = getelementptr inbounds float, float* %tmp20569, i64 1
+ %tmp20571 = getelementptr inbounds float, float* %tmp20570, i64 1
+ %tmp20572 = getelementptr inbounds float, float* %tmp20571, i64 1
+ %tmp20573 = getelementptr inbounds float, float* %tmp20572, i64 1
+ %tmp20574 = getelementptr inbounds float, float* %tmp20573, i64 1
+ %tmp20575 = getelementptr inbounds float, float* %tmp20574, i64 1
+ %tmp20576 = getelementptr inbounds float, float* %tmp20575, i64 1
+ %tmp20577 = getelementptr inbounds float, float* %tmp20576, i64 1
+ %tmp20578 = getelementptr inbounds float, float* %tmp20577, i64 1
+ %tmp20579 = getelementptr inbounds float, float* %tmp20578, i64 1
+ %tmp20580 = getelementptr inbounds float, float* %tmp20579, i64 1
+ %tmp20581 = getelementptr inbounds float, float* %tmp20580, i64 1
+ %tmp20582 = getelementptr inbounds float, float* %tmp20581, i64 1
+ %tmp20583 = getelementptr inbounds float, float* %tmp20582, i64 1
+ %tmp20584 = getelementptr inbounds float, float* %tmp20583, i64 1
+ %tmp20585 = getelementptr inbounds float, float* %tmp20584, i64 1
+ %tmp20586 = getelementptr inbounds float, float* %tmp20585, i64 1
+ %tmp20587 = getelementptr inbounds float, float* %tmp20586, i64 1
+ %tmp20588 = getelementptr inbounds float, float* %tmp20587, i64 1
+ %tmp20589 = getelementptr inbounds float, float* %tmp20588, i64 1
+ %tmp20590 = getelementptr inbounds float, float* %tmp20589, i64 1
+ %tmp20591 = getelementptr inbounds float, float* %tmp20590, i64 1
+ %tmp20592 = getelementptr inbounds float, float* %tmp20591, i64 1
+ %tmp20593 = getelementptr inbounds float, float* %tmp20592, i64 1
+ %tmp20594 = getelementptr inbounds float, float* %tmp20593, i64 1
+ %tmp20595 = getelementptr inbounds float, float* %tmp20594, i64 1
+ %tmp20596 = getelementptr inbounds float, float* %tmp20595, i64 1
+ %tmp20597 = getelementptr inbounds float, float* %tmp20596, i64 1
+ %tmp20598 = getelementptr inbounds float, float* %tmp20597, i64 1
+ %tmp20599 = getelementptr inbounds float, float* %tmp20598, i64 1
+ %tmp20600 = getelementptr inbounds float, float* %tmp20599, i64 1
+ %tmp20601 = getelementptr inbounds float, float* %tmp20600, i64 1
+ %tmp20602 = getelementptr inbounds float, float* %tmp20601, i64 1
+ %tmp20603 = getelementptr inbounds float, float* %tmp20602, i64 1
+ %tmp20604 = getelementptr inbounds float, float* %tmp20603, i64 1
+ %tmp20605 = getelementptr inbounds float, float* %tmp20604, i64 1
+ %tmp20606 = getelementptr inbounds float, float* %tmp20605, i64 1
+ %tmp20607 = getelementptr inbounds float, float* %tmp20606, i64 1
+ %tmp20608 = getelementptr inbounds float, float* %tmp20607, i64 1
+ %tmp20609 = getelementptr inbounds float, float* %tmp20608, i64 1
+ %tmp20610 = getelementptr inbounds float, float* %tmp20609, i64 1
+ %tmp20611 = getelementptr inbounds float, float* %tmp20610, i64 1
+ %tmp20612 = getelementptr inbounds float, float* %tmp20611, i64 1
+ %tmp20613 = getelementptr inbounds float, float* %tmp20612, i64 1
+ %tmp20614 = getelementptr inbounds float, float* %tmp20613, i64 1
+ %tmp20615 = getelementptr inbounds float, float* %tmp20614, i64 1
+ %tmp20616 = getelementptr inbounds float, float* %tmp20615, i64 1
+ %tmp20617 = getelementptr inbounds float, float* %tmp20616, i64 1
+ %tmp20618 = getelementptr inbounds float, float* %tmp20617, i64 1
+ %tmp20619 = getelementptr inbounds float, float* %tmp20618, i64 1
+ %tmp20620 = getelementptr inbounds float, float* %tmp20619, i64 1
+ %tmp20621 = getelementptr inbounds float, float* %tmp20620, i64 1
+ %tmp20622 = getelementptr inbounds float, float* %tmp20621, i64 1
+ %tmp20623 = getelementptr inbounds float, float* %tmp20622, i64 1
+ %tmp20624 = getelementptr inbounds float, float* %tmp20623, i64 1
+ %tmp20625 = getelementptr inbounds float, float* %tmp20624, i64 1
+ %tmp20626 = getelementptr inbounds float, float* %tmp20625, i64 1
+ %tmp20627 = getelementptr inbounds float, float* %tmp20626, i64 1
+ %tmp20628 = getelementptr inbounds float, float* %tmp20627, i64 1
+ %tmp20629 = getelementptr inbounds float, float* %tmp20628, i64 1
+ %tmp20630 = getelementptr inbounds float, float* %tmp20629, i64 1
+ %tmp20631 = getelementptr inbounds float, float* %tmp20630, i64 1
+ %tmp20632 = getelementptr inbounds float, float* %tmp20631, i64 1
+ %tmp20633 = getelementptr inbounds float, float* %tmp20632, i64 1
+ %tmp20634 = getelementptr inbounds float, float* %tmp20633, i64 1
+ %tmp20635 = getelementptr inbounds float, float* %tmp20634, i64 1
+ %tmp20636 = getelementptr inbounds float, float* %tmp20635, i64 1
+ %tmp20637 = getelementptr inbounds float, float* %tmp20636, i64 1
+ %tmp20638 = getelementptr inbounds float, float* %tmp20637, i64 1
+ %tmp20639 = getelementptr inbounds float, float* %tmp20638, i64 1
+ %tmp20640 = getelementptr inbounds float, float* %tmp20639, i64 1
+ %tmp20641 = getelementptr inbounds float, float* %tmp20640, i64 1
+ %tmp20642 = getelementptr inbounds float, float* %tmp20641, i64 1
+ %tmp20643 = getelementptr inbounds float, float* %tmp20642, i64 1
+ %tmp20644 = getelementptr inbounds float, float* %tmp20643, i64 1
+ %tmp20645 = getelementptr inbounds float, float* %tmp20644, i64 1
+ %tmp20646 = getelementptr inbounds float, float* %tmp20645, i64 1
+ %tmp20647 = getelementptr inbounds float, float* %tmp20646, i64 1
+ %tmp20648 = getelementptr inbounds float, float* %tmp20647, i64 1
+ %tmp20649 = getelementptr inbounds float, float* %tmp20648, i64 1
+ %tmp20650 = getelementptr inbounds float, float* %tmp20649, i64 1
+ %tmp20651 = getelementptr inbounds float, float* %tmp20650, i64 1
+ %tmp20652 = getelementptr inbounds float, float* %tmp20651, i64 1
+ %tmp20653 = getelementptr inbounds float, float* %tmp20652, i64 1
+ %tmp20654 = getelementptr inbounds float, float* %tmp20653, i64 1
+ %tmp20655 = getelementptr inbounds float, float* %tmp20654, i64 1
+ %tmp20656 = getelementptr inbounds float, float* %tmp20655, i64 1
+ %tmp20657 = getelementptr inbounds float, float* %tmp20656, i64 1
+ %tmp20658 = getelementptr inbounds float, float* %tmp20657, i64 1
+ %tmp20659 = getelementptr inbounds float, float* %tmp20658, i64 1
+ %tmp20660 = getelementptr inbounds float, float* %tmp20659, i64 1
+ %tmp20661 = getelementptr inbounds float, float* %tmp20660, i64 1
+ %tmp20662 = getelementptr inbounds float, float* %tmp20661, i64 1
+ %tmp20663 = getelementptr inbounds float, float* %tmp20662, i64 1
+ %tmp20664 = getelementptr inbounds float, float* %tmp20663, i64 1
+ %tmp20665 = getelementptr inbounds float, float* %tmp20664, i64 1
+ %tmp20666 = getelementptr inbounds float, float* %tmp20665, i64 1
+ %tmp20667 = getelementptr inbounds float, float* %tmp20666, i64 1
+ %tmp20668 = getelementptr inbounds float, float* %tmp20667, i64 1
+ %tmp20669 = getelementptr inbounds float, float* %tmp20668, i64 1
+ %tmp20670 = getelementptr inbounds float, float* %tmp20669, i64 1
+ %tmp20671 = getelementptr inbounds float, float* %tmp20670, i64 1
+ %tmp20672 = getelementptr inbounds float, float* %tmp20671, i64 1
+ %tmp20673 = getelementptr inbounds float, float* %tmp20672, i64 1
+ %tmp20674 = getelementptr inbounds float, float* %tmp20673, i64 1
+ %tmp20675 = getelementptr inbounds float, float* %tmp20674, i64 1
+ %tmp20676 = getelementptr inbounds float, float* %tmp20675, i64 1
+ %tmp20677 = getelementptr inbounds float, float* %tmp20676, i64 1
+ %tmp20678 = getelementptr inbounds float, float* %tmp20677, i64 1
+ %tmp20679 = getelementptr inbounds float, float* %tmp20678, i64 1
+ %tmp20680 = getelementptr inbounds float, float* %tmp20679, i64 1
+ %tmp20681 = getelementptr inbounds float, float* %tmp20680, i64 1
+ %tmp20682 = getelementptr inbounds float, float* %tmp20681, i64 1
+ %tmp20683 = getelementptr inbounds float, float* %tmp20682, i64 1
+ %tmp20684 = getelementptr inbounds float, float* %tmp20683, i64 1
+ %tmp20685 = getelementptr inbounds float, float* %tmp20684, i64 1
+ %tmp20686 = getelementptr inbounds float, float* %tmp20685, i64 1
+ %tmp20687 = getelementptr inbounds float, float* %tmp20686, i64 1
+ %tmp20688 = getelementptr inbounds float, float* %tmp20687, i64 1
+ %tmp20689 = getelementptr inbounds float, float* %tmp20688, i64 1
+ %tmp20690 = getelementptr inbounds float, float* %tmp20689, i64 1
+ %tmp20691 = getelementptr inbounds float, float* %tmp20690, i64 1
+ %tmp20692 = getelementptr inbounds float, float* %tmp20691, i64 1
+ %tmp20693 = getelementptr inbounds float, float* %tmp20692, i64 1
+ %tmp20694 = getelementptr inbounds float, float* %tmp20693, i64 1
+ %tmp20695 = getelementptr inbounds float, float* %tmp20694, i64 1
+ %tmp20696 = getelementptr inbounds float, float* %tmp20695, i64 1
+ %tmp20697 = getelementptr inbounds float, float* %tmp20696, i64 1
+ %tmp20698 = getelementptr inbounds float, float* %tmp20697, i64 1
+ %tmp20699 = getelementptr inbounds float, float* %tmp20698, i64 1
+ %tmp20700 = getelementptr inbounds float, float* %tmp20699, i64 1
+ %tmp20701 = getelementptr inbounds float, float* %tmp20700, i64 1
+ %tmp20702 = getelementptr inbounds float, float* %tmp20701, i64 1
+ %tmp20703 = getelementptr inbounds float, float* %tmp20702, i64 1
+ %tmp20704 = getelementptr inbounds float, float* %tmp20703, i64 1
+ %tmp20705 = getelementptr inbounds float, float* %tmp20704, i64 1
+ %tmp20706 = getelementptr inbounds float, float* %tmp20705, i64 1
+ %tmp20707 = getelementptr inbounds float, float* %tmp20706, i64 1
+ %tmp20708 = getelementptr inbounds float, float* %tmp20707, i64 1
+ %tmp20709 = getelementptr inbounds float, float* %tmp20708, i64 1
+ %tmp20710 = getelementptr inbounds float, float* %tmp20709, i64 1
+ %tmp20711 = getelementptr inbounds float, float* %tmp20710, i64 1
+ %tmp20712 = getelementptr inbounds float, float* %tmp20711, i64 1
+ %tmp20713 = getelementptr inbounds float, float* %tmp20712, i64 1
+ %tmp20714 = getelementptr inbounds float, float* %tmp20713, i64 1
+ %tmp20715 = getelementptr inbounds float, float* %tmp20714, i64 1
+ %tmp20716 = getelementptr inbounds float, float* %tmp20715, i64 1
+ %tmp20717 = getelementptr inbounds float, float* %tmp20716, i64 1
+ %tmp20718 = getelementptr inbounds float, float* %tmp20717, i64 1
+ %tmp20719 = getelementptr inbounds float, float* %tmp20718, i64 1
+ %tmp20720 = getelementptr inbounds float, float* %tmp20719, i64 1
+ %tmp20721 = getelementptr inbounds float, float* %tmp20720, i64 1
+ %tmp20722 = getelementptr inbounds float, float* %tmp20721, i64 1
+ %tmp20723 = getelementptr inbounds float, float* %tmp20722, i64 1
+ %tmp20724 = getelementptr inbounds float, float* %tmp20723, i64 1
+ %tmp20725 = getelementptr inbounds float, float* %tmp20724, i64 1
+ %tmp20726 = getelementptr inbounds float, float* %tmp20725, i64 1
+ %tmp20727 = getelementptr inbounds float, float* %tmp20726, i64 1
+ %tmp20728 = getelementptr inbounds float, float* %tmp20727, i64 1
+ %tmp20729 = getelementptr inbounds float, float* %tmp20728, i64 1
+ %tmp20730 = getelementptr inbounds float, float* %tmp20729, i64 1
+ %tmp20731 = getelementptr inbounds float, float* %tmp20730, i64 1
+ %tmp20732 = getelementptr inbounds float, float* %tmp20731, i64 1
+ %tmp20733 = getelementptr inbounds float, float* %tmp20732, i64 1
+ %tmp20734 = getelementptr inbounds float, float* %tmp20733, i64 1
+ %tmp20735 = getelementptr inbounds float, float* %tmp20734, i64 1
+ %tmp20736 = getelementptr inbounds float, float* %tmp20735, i64 1
+ %tmp20737 = getelementptr inbounds float, float* %tmp20736, i64 1
+ %tmp20738 = getelementptr inbounds float, float* %tmp20737, i64 1
+ %tmp20739 = getelementptr inbounds float, float* %tmp20738, i64 1
+ %tmp20740 = getelementptr inbounds float, float* %tmp20739, i64 1
+ %tmp20741 = getelementptr inbounds float, float* %tmp20740, i64 1
+ %tmp20742 = getelementptr inbounds float, float* %tmp20741, i64 1
+ %tmp20743 = getelementptr inbounds float, float* %tmp20742, i64 1
+ %tmp20744 = getelementptr inbounds float, float* %tmp20743, i64 1
+ %tmp20745 = getelementptr inbounds float, float* %tmp20744, i64 1
+ %tmp20746 = getelementptr inbounds float, float* %tmp20745, i64 1
+ %tmp20747 = getelementptr inbounds float, float* %tmp20746, i64 1
+ %tmp20748 = getelementptr inbounds float, float* %tmp20747, i64 1
+ %tmp20749 = getelementptr inbounds float, float* %tmp20748, i64 1
+ %tmp20750 = getelementptr inbounds float, float* %tmp20749, i64 1
+ %tmp20751 = getelementptr inbounds float, float* %tmp20750, i64 1
+ %tmp20752 = getelementptr inbounds float, float* %tmp20751, i64 1
+ %tmp20753 = getelementptr inbounds float, float* %tmp20752, i64 1
+ %tmp20754 = getelementptr inbounds float, float* %tmp20753, i64 1
+ %tmp20755 = getelementptr inbounds float, float* %tmp20754, i64 1
+ %tmp20756 = getelementptr inbounds float, float* %tmp20755, i64 1
+ %tmp20757 = getelementptr inbounds float, float* %tmp20756, i64 1
+ %tmp20758 = getelementptr inbounds float, float* %tmp20757, i64 1
+ %tmp20759 = getelementptr inbounds float, float* %tmp20758, i64 1
+ %tmp20760 = getelementptr inbounds float, float* %tmp20759, i64 1
+ %tmp20761 = getelementptr inbounds float, float* %tmp20760, i64 1
+ %tmp20762 = getelementptr inbounds float, float* %tmp20761, i64 1
+ %tmp20763 = getelementptr inbounds float, float* %tmp20762, i64 1
+ %tmp20764 = getelementptr inbounds float, float* %tmp20763, i64 1
+ %tmp20765 = getelementptr inbounds float, float* %tmp20764, i64 1
+ %tmp20766 = getelementptr inbounds float, float* %tmp20765, i64 1
+ %tmp20767 = getelementptr inbounds float, float* %tmp20766, i64 1
+ %tmp20768 = getelementptr inbounds float, float* %tmp20767, i64 1
+ %tmp20769 = getelementptr inbounds float, float* %tmp20768, i64 1
+ %tmp20770 = getelementptr inbounds float, float* %tmp20769, i64 1
+ %tmp20771 = getelementptr inbounds float, float* %tmp20770, i64 1
+ %tmp20772 = getelementptr inbounds float, float* %tmp20771, i64 1
+ %tmp20773 = getelementptr inbounds float, float* %tmp20772, i64 1
+ %tmp20774 = getelementptr inbounds float, float* %tmp20773, i64 1
+ %tmp20775 = getelementptr inbounds float, float* %tmp20774, i64 1
+ %tmp20776 = getelementptr inbounds float, float* %tmp20775, i64 1
+ %tmp20777 = getelementptr inbounds float, float* %tmp20776, i64 1
+ %tmp20778 = getelementptr inbounds float, float* %tmp20777, i64 1
+ %tmp20779 = getelementptr inbounds float, float* %tmp20778, i64 1
+ %tmp20780 = getelementptr inbounds float, float* %tmp20779, i64 1
+ %tmp20781 = getelementptr inbounds float, float* %tmp20780, i64 1
+ %tmp20782 = getelementptr inbounds float, float* %tmp20781, i64 1
+ %tmp20783 = getelementptr inbounds float, float* %tmp20782, i64 1
+ %tmp20784 = getelementptr inbounds float, float* %tmp20783, i64 1
+ %tmp20785 = getelementptr inbounds float, float* %tmp20784, i64 1
+ %tmp20786 = getelementptr inbounds float, float* %tmp20785, i64 1
+ %tmp20787 = getelementptr inbounds float, float* %tmp20786, i64 1
+ %tmp20788 = getelementptr inbounds float, float* %tmp20787, i64 1
+ %tmp20789 = getelementptr inbounds float, float* %tmp20788, i64 1
+ %tmp20790 = getelementptr inbounds float, float* %tmp20789, i64 1
+ %tmp20791 = getelementptr inbounds float, float* %tmp20790, i64 1
+ %tmp20792 = getelementptr inbounds float, float* %tmp20791, i64 1
+ %tmp20793 = getelementptr inbounds float, float* %tmp20792, i64 1
+ %tmp20794 = getelementptr inbounds float, float* %tmp20793, i64 1
+ %tmp20795 = getelementptr inbounds float, float* %tmp20794, i64 1
+ %tmp20796 = getelementptr inbounds float, float* %tmp20795, i64 1
+ %tmp20797 = getelementptr inbounds float, float* %tmp20796, i64 1
+ %tmp20798 = getelementptr inbounds float, float* %tmp20797, i64 1
+ %tmp20799 = getelementptr inbounds float, float* %tmp20798, i64 1
+ %tmp20800 = getelementptr inbounds float, float* %tmp20799, i64 1
+ %tmp20801 = getelementptr inbounds float, float* %tmp20800, i64 1
+ %tmp20802 = getelementptr inbounds float, float* %tmp20801, i64 1
+ %tmp20803 = getelementptr inbounds float, float* %tmp20802, i64 1
+ %tmp20804 = getelementptr inbounds float, float* %tmp20803, i64 1
+ %tmp20805 = getelementptr inbounds float, float* %tmp20804, i64 1
+ %tmp20806 = getelementptr inbounds float, float* %tmp20805, i64 1
+ %tmp20807 = getelementptr inbounds float, float* %tmp20806, i64 1
+ %tmp20808 = getelementptr inbounds float, float* %tmp20807, i64 1
+ %tmp20809 = getelementptr inbounds float, float* %tmp20808, i64 1
+ %tmp20810 = getelementptr inbounds float, float* %tmp20809, i64 1
+ %tmp20811 = getelementptr inbounds float, float* %tmp20810, i64 1
+ %tmp20812 = getelementptr inbounds float, float* %tmp20811, i64 1
+ %tmp20813 = getelementptr inbounds float, float* %tmp20812, i64 1
+ %tmp20814 = getelementptr inbounds float, float* %tmp20813, i64 1
+ %tmp20815 = getelementptr inbounds float, float* %tmp20814, i64 1
+ %tmp20816 = getelementptr inbounds float, float* %tmp20815, i64 1
+ %tmp20817 = getelementptr inbounds float, float* %tmp20816, i64 1
+ %tmp20818 = getelementptr inbounds float, float* %tmp20817, i64 1
+ %tmp20819 = getelementptr inbounds float, float* %tmp20818, i64 1
+ %tmp20820 = getelementptr inbounds float, float* %tmp20819, i64 1
+ %tmp20821 = getelementptr inbounds float, float* %tmp20820, i64 1
+ %tmp20822 = getelementptr inbounds float, float* %tmp20821, i64 1
+ %tmp20823 = getelementptr inbounds float, float* %tmp20822, i64 1
+ %tmp20824 = getelementptr inbounds float, float* %tmp20823, i64 1
+ %tmp20825 = getelementptr inbounds float, float* %tmp20824, i64 1
+ %tmp20826 = getelementptr inbounds float, float* %tmp20825, i64 1
+ %tmp20827 = getelementptr inbounds float, float* %tmp20826, i64 1
+ %tmp20828 = getelementptr inbounds float, float* %tmp20827, i64 1
+ %tmp20829 = getelementptr inbounds float, float* %tmp20828, i64 1
+ %tmp20830 = getelementptr inbounds float, float* %tmp20829, i64 1
+ %tmp20831 = getelementptr inbounds float, float* %tmp20830, i64 1
+ %tmp20832 = getelementptr inbounds float, float* %tmp20831, i64 1
+ %tmp20833 = getelementptr inbounds float, float* %tmp20832, i64 1
+ %tmp20834 = getelementptr inbounds float, float* %tmp20833, i64 1
+ %tmp20835 = getelementptr inbounds float, float* %tmp20834, i64 1
+ %tmp20836 = getelementptr inbounds float, float* %tmp20835, i64 1
+ %tmp20837 = getelementptr inbounds float, float* %tmp20836, i64 1
+ %tmp20838 = getelementptr inbounds float, float* %tmp20837, i64 1
+ %tmp20839 = getelementptr inbounds float, float* %tmp20838, i64 1
+ %tmp20840 = getelementptr inbounds float, float* %tmp20839, i64 1
+ %tmp20841 = getelementptr inbounds float, float* %tmp20840, i64 1
+ %tmp20842 = getelementptr inbounds float, float* %tmp20841, i64 1
+ %tmp20843 = getelementptr inbounds float, float* %tmp20842, i64 1
+ %tmp20844 = getelementptr inbounds float, float* %tmp20843, i64 1
+ %tmp20845 = getelementptr inbounds float, float* %tmp20844, i64 1
+ %tmp20846 = getelementptr inbounds float, float* %tmp20845, i64 1
+ %tmp20847 = getelementptr inbounds float, float* %tmp20846, i64 1
+ %tmp20848 = getelementptr inbounds float, float* %tmp20847, i64 1
+ %tmp20849 = getelementptr inbounds float, float* %tmp20848, i64 1
+ %tmp20850 = getelementptr inbounds float, float* %tmp20849, i64 1
+ %tmp20851 = getelementptr inbounds float, float* %tmp20850, i64 1
+ %tmp20852 = getelementptr inbounds float, float* %tmp20851, i64 1
+ %tmp20853 = getelementptr inbounds float, float* %tmp20852, i64 1
+ %tmp20854 = getelementptr inbounds float, float* %tmp20853, i64 1
+ %tmp20855 = getelementptr inbounds float, float* %tmp20854, i64 1
+ %tmp20856 = getelementptr inbounds float, float* %tmp20855, i64 1
+ %tmp20857 = getelementptr inbounds float, float* %tmp20856, i64 1
+ %tmp20858 = getelementptr inbounds float, float* %tmp20857, i64 1
+ %tmp20859 = getelementptr inbounds float, float* %tmp20858, i64 1
+ %tmp20860 = getelementptr inbounds float, float* %tmp20859, i64 1
+ %tmp20861 = getelementptr inbounds float, float* %tmp20860, i64 1
+ %tmp20862 = getelementptr inbounds float, float* %tmp20861, i64 1
+ %tmp20863 = getelementptr inbounds float, float* %tmp20862, i64 1
+ %tmp20864 = getelementptr inbounds float, float* %tmp20863, i64 1
+ %tmp20865 = getelementptr inbounds float, float* %tmp20864, i64 1
+ %tmp20866 = getelementptr inbounds float, float* %tmp20865, i64 1
+ %tmp20867 = getelementptr inbounds float, float* %tmp20866, i64 1
+ %tmp20868 = getelementptr inbounds float, float* %tmp20867, i64 1
+ %tmp20869 = getelementptr inbounds float, float* %tmp20868, i64 1
+ %tmp20870 = getelementptr inbounds float, float* %tmp20869, i64 1
+ %tmp20871 = getelementptr inbounds float, float* %tmp20870, i64 1
+ %tmp20872 = getelementptr inbounds float, float* %tmp20871, i64 1
+ %tmp20873 = getelementptr inbounds float, float* %tmp20872, i64 1
+ %tmp20874 = getelementptr inbounds float, float* %tmp20873, i64 1
+ %tmp20875 = getelementptr inbounds float, float* %tmp20874, i64 1
+ %tmp20876 = getelementptr inbounds float, float* %tmp20875, i64 1
+ %tmp20877 = getelementptr inbounds float, float* %tmp20876, i64 1
+ %tmp20878 = getelementptr inbounds float, float* %tmp20877, i64 1
+ %tmp20879 = getelementptr inbounds float, float* %tmp20878, i64 1
+ %tmp20880 = getelementptr inbounds float, float* %tmp20879, i64 1
+ %tmp20881 = getelementptr inbounds float, float* %tmp20880, i64 1
+ %tmp20882 = getelementptr inbounds float, float* %tmp20881, i64 1
+ %tmp20883 = getelementptr inbounds float, float* %tmp20882, i64 1
+ %tmp20884 = getelementptr inbounds float, float* %tmp20883, i64 1
+ %tmp20885 = getelementptr inbounds float, float* %tmp20884, i64 1
+ %tmp20886 = getelementptr inbounds float, float* %tmp20885, i64 1
+ %tmp20887 = getelementptr inbounds float, float* %tmp20886, i64 1
+ %tmp20888 = getelementptr inbounds float, float* %tmp20887, i64 1
+ %tmp20889 = getelementptr inbounds float, float* %tmp20888, i64 1
+ %tmp20890 = getelementptr inbounds float, float* %tmp20889, i64 1
+ %tmp20891 = getelementptr inbounds float, float* %tmp20890, i64 1
+ %tmp20892 = getelementptr inbounds float, float* %tmp20891, i64 1
+ %tmp20893 = getelementptr inbounds float, float* %tmp20892, i64 1
+ %tmp20894 = getelementptr inbounds float, float* %tmp20893, i64 1
+ %tmp20895 = getelementptr inbounds float, float* %tmp20894, i64 1
+ %tmp20896 = getelementptr inbounds float, float* %tmp20895, i64 1
+ %tmp20897 = getelementptr inbounds float, float* %tmp20896, i64 1
+ %tmp20898 = getelementptr inbounds float, float* %tmp20897, i64 1
+ %tmp20899 = getelementptr inbounds float, float* %tmp20898, i64 1
+ %tmp20900 = getelementptr inbounds float, float* %tmp20899, i64 1
+ %tmp20901 = getelementptr inbounds float, float* %tmp20900, i64 1
+ %tmp20902 = getelementptr inbounds float, float* %tmp20901, i64 1
+ %tmp20903 = getelementptr inbounds float, float* %tmp20902, i64 1
+ %tmp20904 = getelementptr inbounds float, float* %tmp20903, i64 1
+ %tmp20905 = getelementptr inbounds float, float* %tmp20904, i64 1
+ %tmp20906 = getelementptr inbounds float, float* %tmp20905, i64 1
+ %tmp20907 = getelementptr inbounds float, float* %tmp20906, i64 1
+ %tmp20908 = getelementptr inbounds float, float* %tmp20907, i64 1
+ %tmp20909 = getelementptr inbounds float, float* %tmp20908, i64 1
+ %tmp20910 = getelementptr inbounds float, float* %tmp20909, i64 1
+ %tmp20911 = getelementptr inbounds float, float* %tmp20910, i64 1
+ %tmp20912 = getelementptr inbounds float, float* %tmp20911, i64 1
+ %tmp20913 = getelementptr inbounds float, float* %tmp20912, i64 1
+ %tmp20914 = getelementptr inbounds float, float* %tmp20913, i64 1
+ %tmp20915 = getelementptr inbounds float, float* %tmp20914, i64 1
+ %tmp20916 = getelementptr inbounds float, float* %tmp20915, i64 1
+ %tmp20917 = getelementptr inbounds float, float* %tmp20916, i64 1
+ %tmp20918 = getelementptr inbounds float, float* %tmp20917, i64 1
+ %tmp20919 = getelementptr inbounds float, float* %tmp20918, i64 1
+ %tmp20920 = getelementptr inbounds float, float* %tmp20919, i64 1
+ %tmp20921 = getelementptr inbounds float, float* %tmp20920, i64 1
+ %tmp20922 = getelementptr inbounds float, float* %tmp20921, i64 1
+ %tmp20923 = getelementptr inbounds float, float* %tmp20922, i64 1
+ %tmp20924 = getelementptr inbounds float, float* %tmp20923, i64 1
+ %tmp20925 = getelementptr inbounds float, float* %tmp20924, i64 1
+ %tmp20926 = getelementptr inbounds float, float* %tmp20925, i64 1
+ %tmp20927 = getelementptr inbounds float, float* %tmp20926, i64 1
+ %tmp20928 = getelementptr inbounds float, float* %tmp20927, i64 1
+ %tmp20929 = getelementptr inbounds float, float* %tmp20928, i64 1
+ %tmp20930 = getelementptr inbounds float, float* %tmp20929, i64 1
+ %tmp20931 = getelementptr inbounds float, float* %tmp20930, i64 1
+ %tmp20932 = getelementptr inbounds float, float* %tmp20931, i64 1
+ %tmp20933 = getelementptr inbounds float, float* %tmp20932, i64 1
+ %tmp20934 = getelementptr inbounds float, float* %tmp20933, i64 1
+ %tmp20935 = getelementptr inbounds float, float* %tmp20934, i64 1
+ %tmp20936 = getelementptr inbounds float, float* %tmp20935, i64 1
+ %tmp20937 = getelementptr inbounds float, float* %tmp20936, i64 1
+ %tmp20938 = getelementptr inbounds float, float* %tmp20937, i64 1
+ %tmp20939 = getelementptr inbounds float, float* %tmp20938, i64 1
+ %tmp20940 = getelementptr inbounds float, float* %tmp20939, i64 1
+ %tmp20941 = getelementptr inbounds float, float* %tmp20940, i64 1
+ %tmp20942 = getelementptr inbounds float, float* %tmp20941, i64 1
+ %tmp20943 = getelementptr inbounds float, float* %tmp20942, i64 1
+ %tmp20944 = getelementptr inbounds float, float* %tmp20943, i64 1
+ %tmp20945 = getelementptr inbounds float, float* %tmp20944, i64 1
+ %tmp20946 = getelementptr inbounds float, float* %tmp20945, i64 1
+ %tmp20947 = getelementptr inbounds float, float* %tmp20946, i64 1
+ %tmp20948 = getelementptr inbounds float, float* %tmp20947, i64 1
+ %tmp20949 = getelementptr inbounds float, float* %tmp20948, i64 1
+ %tmp20950 = getelementptr inbounds float, float* %tmp20949, i64 1
+ %tmp20951 = getelementptr inbounds float, float* %tmp20950, i64 1
+ %tmp20952 = getelementptr inbounds float, float* %tmp20951, i64 1
+ %tmp20953 = getelementptr inbounds float, float* %tmp20952, i64 1
+ %tmp20954 = getelementptr inbounds float, float* %tmp20953, i64 1
+ %tmp20955 = getelementptr inbounds float, float* %tmp20954, i64 1
+ %tmp20956 = getelementptr inbounds float, float* %tmp20955, i64 1
+ %tmp20957 = getelementptr inbounds float, float* %tmp20956, i64 1
+ %tmp20958 = getelementptr inbounds float, float* %tmp20957, i64 1
+ %tmp20959 = getelementptr inbounds float, float* %tmp20958, i64 1
+ %tmp20960 = getelementptr inbounds float, float* %tmp20959, i64 1
+ %tmp20961 = getelementptr inbounds float, float* %tmp20960, i64 1
+ %tmp20962 = getelementptr inbounds float, float* %tmp20961, i64 1
+ %tmp20963 = getelementptr inbounds float, float* %tmp20962, i64 1
+ %tmp20964 = getelementptr inbounds float, float* %tmp20963, i64 1
+ %tmp20965 = getelementptr inbounds float, float* %tmp20964, i64 1
+ %tmp20966 = getelementptr inbounds float, float* %tmp20965, i64 1
+ %tmp20967 = getelementptr inbounds float, float* %tmp20966, i64 1
+ %tmp20968 = getelementptr inbounds float, float* %tmp20967, i64 1
+ %tmp20969 = getelementptr inbounds float, float* %tmp20968, i64 1
+ %tmp20970 = getelementptr inbounds float, float* %tmp20969, i64 1
+ %tmp20971 = getelementptr inbounds float, float* %tmp20970, i64 1
+ %tmp20972 = getelementptr inbounds float, float* %tmp20971, i64 1
+ %tmp20973 = getelementptr inbounds float, float* %tmp20972, i64 1
+ %tmp20974 = getelementptr inbounds float, float* %tmp20973, i64 1
+ %tmp20975 = getelementptr inbounds float, float* %tmp20974, i64 1
+ %tmp20976 = getelementptr inbounds float, float* %tmp20975, i64 1
+ %tmp20977 = getelementptr inbounds float, float* %tmp20976, i64 1
+ %tmp20978 = getelementptr inbounds float, float* %tmp20977, i64 1
+ %tmp20979 = getelementptr inbounds float, float* %tmp20978, i64 1
+ %tmp20980 = getelementptr inbounds float, float* %tmp20979, i64 1
+ %tmp20981 = getelementptr inbounds float, float* %tmp20980, i64 1
+ %tmp20982 = getelementptr inbounds float, float* %tmp20981, i64 1
+ %tmp20983 = getelementptr inbounds float, float* %tmp20982, i64 1
+ %tmp20984 = getelementptr inbounds float, float* %tmp20983, i64 1
+ %tmp20985 = getelementptr inbounds float, float* %tmp20984, i64 1
+ %tmp20986 = getelementptr inbounds float, float* %tmp20985, i64 1
+ %tmp20987 = getelementptr inbounds float, float* %tmp20986, i64 1
+ %tmp20988 = getelementptr inbounds float, float* %tmp20987, i64 1
+ %tmp20989 = getelementptr inbounds float, float* %tmp20988, i64 1
+ %tmp20990 = getelementptr inbounds float, float* %tmp20989, i64 1
+ %tmp20991 = getelementptr inbounds float, float* %tmp20990, i64 1
+ %tmp20992 = getelementptr inbounds float, float* %tmp20991, i64 1
+ %tmp20993 = getelementptr inbounds float, float* %tmp20992, i64 1
+ %tmp20994 = getelementptr inbounds float, float* %tmp20993, i64 1
+ %tmp20995 = getelementptr inbounds float, float* %tmp20994, i64 1
+ %tmp20996 = getelementptr inbounds float, float* %tmp20995, i64 1
+ %tmp20997 = getelementptr inbounds float, float* %tmp20996, i64 1
+ %tmp20998 = getelementptr inbounds float, float* %tmp20997, i64 1
+ %tmp20999 = getelementptr inbounds float, float* %tmp20998, i64 1
+ %tmp21000 = getelementptr inbounds float, float* %tmp20999, i64 1
+ %tmp21001 = getelementptr inbounds float, float* %tmp21000, i64 1
+ %tmp21002 = getelementptr inbounds float, float* %tmp21001, i64 1
+ %tmp21003 = getelementptr inbounds float, float* %tmp21002, i64 1
+ %tmp21004 = getelementptr inbounds float, float* %tmp21003, i64 1
+ %tmp21005 = getelementptr inbounds float, float* %tmp21004, i64 1
+ %tmp21006 = getelementptr inbounds float, float* %tmp21005, i64 1
+ %tmp21007 = getelementptr inbounds float, float* %tmp21006, i64 1
+ %tmp21008 = getelementptr inbounds float, float* %tmp21007, i64 1
+ %tmp21009 = getelementptr inbounds float, float* %tmp21008, i64 1
+ %tmp21010 = getelementptr inbounds float, float* %tmp21009, i64 1
+ %tmp21011 = getelementptr inbounds float, float* %tmp21010, i64 1
+ %tmp21012 = getelementptr inbounds float, float* %tmp21011, i64 1
+ %tmp21013 = getelementptr inbounds float, float* %tmp21012, i64 1
+ %tmp21014 = getelementptr inbounds float, float* %tmp21013, i64 1
+ %tmp21015 = getelementptr inbounds float, float* %tmp21014, i64 1
+ %tmp21016 = getelementptr inbounds float, float* %tmp21015, i64 1
+ %tmp21017 = getelementptr inbounds float, float* %tmp21016, i64 1
+ %tmp21018 = getelementptr inbounds float, float* %tmp21017, i64 1
+ %tmp21019 = getelementptr inbounds float, float* %tmp21018, i64 1
+ %tmp21020 = getelementptr inbounds float, float* %tmp21019, i64 1
+ %tmp21021 = getelementptr inbounds float, float* %tmp21020, i64 1
+ %tmp21022 = getelementptr inbounds float, float* %tmp21021, i64 1
+ %tmp21023 = getelementptr inbounds float, float* %tmp21022, i64 1
+ %tmp21024 = getelementptr inbounds float, float* %tmp21023, i64 1
+ %tmp21025 = getelementptr inbounds float, float* %tmp21024, i64 1
+ %tmp21026 = getelementptr inbounds float, float* %tmp21025, i64 1
+ %tmp21027 = getelementptr inbounds float, float* %tmp21026, i64 1
+ %tmp21028 = getelementptr inbounds float, float* %tmp21027, i64 1
+ %tmp21029 = getelementptr inbounds float, float* %tmp21028, i64 1
+ %tmp21030 = getelementptr inbounds float, float* %tmp21029, i64 1
+ %tmp21031 = getelementptr inbounds float, float* %tmp21030, i64 1
+ %tmp21032 = getelementptr inbounds float, float* %tmp21031, i64 1
+ %tmp21033 = getelementptr inbounds float, float* %tmp21032, i64 1
+ %tmp21034 = getelementptr inbounds float, float* %tmp21033, i64 1
+ %tmp21035 = getelementptr inbounds float, float* %tmp21034, i64 1
+ %tmp21036 = getelementptr inbounds float, float* %tmp21035, i64 1
+ %tmp21037 = getelementptr inbounds float, float* %tmp21036, i64 1
+ %tmp21038 = getelementptr inbounds float, float* %tmp21037, i64 1
+ %tmp21039 = getelementptr inbounds float, float* %tmp21038, i64 1
+ %tmp21040 = getelementptr inbounds float, float* %tmp21039, i64 1
+ %tmp21041 = getelementptr inbounds float, float* %tmp21040, i64 1
+ %tmp21042 = getelementptr inbounds float, float* %tmp21041, i64 1
+ %tmp21043 = getelementptr inbounds float, float* %tmp21042, i64 1
+ %tmp21044 = getelementptr inbounds float, float* %tmp21043, i64 1
+ %tmp21045 = getelementptr inbounds float, float* %tmp21044, i64 1
+ %tmp21046 = getelementptr inbounds float, float* %tmp21045, i64 1
+ %tmp21047 = getelementptr inbounds float, float* %tmp21046, i64 1
+ %tmp21048 = getelementptr inbounds float, float* %tmp21047, i64 1
+ %tmp21049 = getelementptr inbounds float, float* %tmp21048, i64 1
+ %tmp21050 = getelementptr inbounds float, float* %tmp21049, i64 1
+ %tmp21051 = getelementptr inbounds float, float* %tmp21050, i64 1
+ %tmp21052 = getelementptr inbounds float, float* %tmp21051, i64 1
+ %tmp21053 = getelementptr inbounds float, float* %tmp21052, i64 1
+ %tmp21054 = getelementptr inbounds float, float* %tmp21053, i64 1
+ %tmp21055 = getelementptr inbounds float, float* %tmp21054, i64 1
+ %tmp21056 = getelementptr inbounds float, float* %tmp21055, i64 1
+ %tmp21057 = getelementptr inbounds float, float* %tmp21056, i64 1
+ %tmp21058 = getelementptr inbounds float, float* %tmp21057, i64 1
+ %tmp21059 = getelementptr inbounds float, float* %tmp21058, i64 1
+ %tmp21060 = getelementptr inbounds float, float* %tmp21059, i64 1
+ %tmp21061 = getelementptr inbounds float, float* %tmp21060, i64 1
+ %tmp21062 = getelementptr inbounds float, float* %tmp21061, i64 1
+ %tmp21063 = getelementptr inbounds float, float* %tmp21062, i64 1
+ %tmp21064 = getelementptr inbounds float, float* %tmp21063, i64 1
+ %tmp21065 = getelementptr inbounds float, float* %tmp21064, i64 1
+ %tmp21066 = getelementptr inbounds float, float* %tmp21065, i64 1
+ %tmp21067 = getelementptr inbounds float, float* %tmp21066, i64 1
+ %tmp21068 = getelementptr inbounds float, float* %tmp21067, i64 1
+ %tmp21069 = getelementptr inbounds float, float* %tmp21068, i64 1
+ %tmp21070 = getelementptr inbounds float, float* %tmp21069, i64 1
+ %tmp21071 = getelementptr inbounds float, float* %tmp21070, i64 1
+ %tmp21072 = getelementptr inbounds float, float* %tmp21071, i64 1
+ %tmp21073 = getelementptr inbounds float, float* %tmp21072, i64 1
+ %tmp21074 = getelementptr inbounds float, float* %tmp21073, i64 1
+ %tmp21075 = getelementptr inbounds float, float* %tmp21074, i64 1
+ %tmp21076 = getelementptr inbounds float, float* %tmp21075, i64 1
+ %tmp21077 = getelementptr inbounds float, float* %tmp21076, i64 1
+ %tmp21078 = getelementptr inbounds float, float* %tmp21077, i64 1
+ %tmp21079 = getelementptr inbounds float, float* %tmp21078, i64 1
+ %tmp21080 = getelementptr inbounds float, float* %tmp21079, i64 1
+ %tmp21081 = getelementptr inbounds float, float* %tmp21080, i64 1
+ %tmp21082 = getelementptr inbounds float, float* %tmp21081, i64 1
+ %tmp21083 = getelementptr inbounds float, float* %tmp21082, i64 1
+ %tmp21084 = getelementptr inbounds float, float* %tmp21083, i64 1
+ %tmp21085 = getelementptr inbounds float, float* %tmp21084, i64 1
+ %tmp21086 = getelementptr inbounds float, float* %tmp21085, i64 1
+ %tmp21087 = getelementptr inbounds float, float* %tmp21086, i64 1
+ %tmp21088 = getelementptr inbounds float, float* %tmp21087, i64 1
+ %tmp21089 = getelementptr inbounds float, float* %tmp21088, i64 1
+ %tmp21090 = getelementptr inbounds float, float* %tmp21089, i64 1
+ %tmp21091 = getelementptr inbounds float, float* %tmp21090, i64 1
+ %tmp21092 = getelementptr inbounds float, float* %tmp21091, i64 1
+ %tmp21093 = getelementptr inbounds float, float* %tmp21092, i64 1
+ %tmp21094 = getelementptr inbounds float, float* %tmp21093, i64 1
+ %tmp21095 = getelementptr inbounds float, float* %tmp21094, i64 1
+ %tmp21096 = getelementptr inbounds float, float* %tmp21095, i64 1
+ %tmp21097 = getelementptr inbounds float, float* %tmp21096, i64 1
+ %tmp21098 = getelementptr inbounds float, float* %tmp21097, i64 1
+ %tmp21099 = getelementptr inbounds float, float* %tmp21098, i64 1
+ %tmp21100 = getelementptr inbounds float, float* %tmp21099, i64 1
+ %tmp21101 = getelementptr inbounds float, float* %tmp21100, i64 1
+ %tmp21102 = getelementptr inbounds float, float* %tmp21101, i64 1
+ %tmp21103 = getelementptr inbounds float, float* %tmp21102, i64 1
+ %tmp21104 = getelementptr inbounds float, float* %tmp21103, i64 1
+ %tmp21105 = getelementptr inbounds float, float* %tmp21104, i64 1
+ %tmp21106 = getelementptr inbounds float, float* %tmp21105, i64 1
+ %tmp21107 = getelementptr inbounds float, float* %tmp21106, i64 1
+ %tmp21108 = getelementptr inbounds float, float* %tmp21107, i64 1
+ %tmp21109 = getelementptr inbounds float, float* %tmp21108, i64 1
+ %tmp21110 = getelementptr inbounds float, float* %tmp21109, i64 1
+ %tmp21111 = getelementptr inbounds float, float* %tmp21110, i64 1
+ %tmp21112 = getelementptr inbounds float, float* %tmp21111, i64 1
+ %tmp21113 = getelementptr inbounds float, float* %tmp21112, i64 1
+ %tmp21114 = getelementptr inbounds float, float* %tmp21113, i64 1
+ %tmp21115 = getelementptr inbounds float, float* %tmp21114, i64 1
+ %tmp21116 = getelementptr inbounds float, float* %tmp21115, i64 1
+ %tmp21117 = getelementptr inbounds float, float* %tmp21116, i64 1
+ %tmp21118 = getelementptr inbounds float, float* %tmp21117, i64 1
+ %tmp21119 = getelementptr inbounds float, float* %tmp21118, i64 1
+ %tmp21120 = getelementptr inbounds float, float* %tmp21119, i64 1
+ %tmp21121 = getelementptr inbounds float, float* %tmp21120, i64 1
+ %tmp21122 = getelementptr inbounds float, float* %tmp21121, i64 1
+ %tmp21123 = getelementptr inbounds float, float* %tmp21122, i64 1
+ %tmp21124 = getelementptr inbounds float, float* %tmp21123, i64 1
+ %tmp21125 = getelementptr inbounds float, float* %tmp21124, i64 1
+ %tmp21126 = getelementptr inbounds float, float* %tmp21125, i64 1
+ %tmp21127 = getelementptr inbounds float, float* %tmp21126, i64 1
+ %tmp21128 = getelementptr inbounds float, float* %tmp21127, i64 1
+ %tmp21129 = getelementptr inbounds float, float* %tmp21128, i64 1
+ %tmp21130 = getelementptr inbounds float, float* %tmp21129, i64 1
+ %tmp21131 = getelementptr inbounds float, float* %tmp21130, i64 1
+ %tmp21132 = getelementptr inbounds float, float* %tmp21131, i64 1
+ %tmp21133 = getelementptr inbounds float, float* %tmp21132, i64 1
+ %tmp21134 = getelementptr inbounds float, float* %tmp21133, i64 1
+ %tmp21135 = getelementptr inbounds float, float* %tmp21134, i64 1
+ %tmp21136 = getelementptr inbounds float, float* %tmp21135, i64 1
+ %tmp21137 = getelementptr inbounds float, float* %tmp21136, i64 1
+ %tmp21138 = getelementptr inbounds float, float* %tmp21137, i64 1
+ %tmp21139 = getelementptr inbounds float, float* %tmp21138, i64 1
+ %tmp21140 = getelementptr inbounds float, float* %tmp21139, i64 1
+ %tmp21141 = getelementptr inbounds float, float* %tmp21140, i64 1
+ %tmp21142 = getelementptr inbounds float, float* %tmp21141, i64 1
+ %tmp21143 = getelementptr inbounds float, float* %tmp21142, i64 1
+ %tmp21144 = getelementptr inbounds float, float* %tmp21143, i64 1
+ %tmp21145 = getelementptr inbounds float, float* %tmp21144, i64 1
+ %tmp21146 = getelementptr inbounds float, float* %tmp21145, i64 1
+ %tmp21147 = getelementptr inbounds float, float* %tmp21146, i64 1
+ %tmp21148 = getelementptr inbounds float, float* %tmp21147, i64 1
+ %tmp21149 = getelementptr inbounds float, float* %tmp21148, i64 1
+ %tmp21150 = getelementptr inbounds float, float* %tmp21149, i64 1
+ %tmp21151 = getelementptr inbounds float, float* %tmp21150, i64 1
+ %tmp21152 = getelementptr inbounds float, float* %tmp21151, i64 1
+ %tmp21153 = getelementptr inbounds float, float* %tmp21152, i64 1
+ %tmp21154 = getelementptr inbounds float, float* %tmp21153, i64 1
+ %tmp21155 = getelementptr inbounds float, float* %tmp21154, i64 1
+ %tmp21156 = getelementptr inbounds float, float* %tmp21155, i64 1
+ %tmp21157 = getelementptr inbounds float, float* %tmp21156, i64 1
+ %tmp21158 = getelementptr inbounds float, float* %tmp21157, i64 1
+ %tmp21159 = getelementptr inbounds float, float* %tmp21158, i64 1
+ %tmp21160 = getelementptr inbounds float, float* %tmp21159, i64 1
+ %tmp21161 = getelementptr inbounds float, float* %tmp21160, i64 1
+ %tmp21162 = getelementptr inbounds float, float* %tmp21161, i64 1
+ %tmp21163 = getelementptr inbounds float, float* %tmp21162, i64 1
+ %tmp21164 = getelementptr inbounds float, float* %tmp21163, i64 1
+ %tmp21165 = getelementptr inbounds float, float* %tmp21164, i64 1
+ %tmp21166 = getelementptr inbounds float, float* %tmp21165, i64 1
+ %tmp21167 = getelementptr inbounds float, float* %tmp21166, i64 1
+ %tmp21168 = getelementptr inbounds float, float* %tmp21167, i64 1
+ %tmp21169 = getelementptr inbounds float, float* %tmp21168, i64 1
+ %tmp21170 = getelementptr inbounds float, float* %tmp21169, i64 1
+ %tmp21171 = getelementptr inbounds float, float* %tmp21170, i64 1
+ %tmp21172 = getelementptr inbounds float, float* %tmp21171, i64 1
+ %tmp21173 = getelementptr inbounds float, float* %tmp21172, i64 1
+ %tmp21174 = getelementptr inbounds float, float* %tmp21173, i64 1
+ %tmp21175 = getelementptr inbounds float, float* %tmp21174, i64 1
+ %tmp21176 = getelementptr inbounds float, float* %tmp21175, i64 1
+ %tmp21177 = getelementptr inbounds float, float* %tmp21176, i64 1
+ %tmp21178 = getelementptr inbounds float, float* %tmp21177, i64 1
+ %tmp21179 = getelementptr inbounds float, float* %tmp21178, i64 1
+ %tmp21180 = getelementptr inbounds float, float* %tmp21179, i64 1
+ %tmp21181 = getelementptr inbounds float, float* %tmp21180, i64 1
+ %tmp21182 = getelementptr inbounds float, float* %tmp21181, i64 1
+ %tmp21183 = getelementptr inbounds float, float* %tmp21182, i64 1
+ %tmp21184 = getelementptr inbounds float, float* %tmp21183, i64 1
+ %tmp21185 = getelementptr inbounds float, float* %tmp21184, i64 1
+ %tmp21186 = getelementptr inbounds float, float* %tmp21185, i64 1
+ %tmp21187 = getelementptr inbounds float, float* %tmp21186, i64 1
+ %tmp21188 = getelementptr inbounds float, float* %tmp21187, i64 1
+ %tmp21189 = getelementptr inbounds float, float* %tmp21188, i64 1
+ %tmp21190 = getelementptr inbounds float, float* %tmp21189, i64 1
+ %tmp21191 = getelementptr inbounds float, float* %tmp21190, i64 1
+ %tmp21192 = getelementptr inbounds float, float* %tmp21191, i64 1
+ %tmp21193 = getelementptr inbounds float, float* %tmp21192, i64 1
+ %tmp21194 = getelementptr inbounds float, float* %tmp21193, i64 1
+ %tmp21195 = getelementptr inbounds float, float* %tmp21194, i64 1
+ %tmp21196 = getelementptr inbounds float, float* %tmp21195, i64 1
+ %tmp21197 = getelementptr inbounds float, float* %tmp21196, i64 1
+ %tmp21198 = getelementptr inbounds float, float* %tmp21197, i64 1
+ %tmp21199 = getelementptr inbounds float, float* %tmp21198, i64 1
+ %tmp21200 = getelementptr inbounds float, float* %tmp21199, i64 1
+ %tmp21201 = getelementptr inbounds float, float* %tmp21200, i64 1
+ %tmp21202 = getelementptr inbounds float, float* %tmp21201, i64 1
+ %tmp21203 = getelementptr inbounds float, float* %tmp21202, i64 1
+ %tmp21204 = getelementptr inbounds float, float* %tmp21203, i64 1
+ %tmp21205 = getelementptr inbounds float, float* %tmp21204, i64 1
+ %tmp21206 = getelementptr inbounds float, float* %tmp21205, i64 1
+ %tmp21207 = getelementptr inbounds float, float* %tmp21206, i64 1
+ %tmp21208 = getelementptr inbounds float, float* %tmp21207, i64 1
+ %tmp21209 = getelementptr inbounds float, float* %tmp21208, i64 1
+ %tmp21210 = getelementptr inbounds float, float* %tmp21209, i64 1
+ %tmp21211 = getelementptr inbounds float, float* %tmp21210, i64 1
+ %tmp21212 = getelementptr inbounds float, float* %tmp21211, i64 1
+ %tmp21213 = getelementptr inbounds float, float* %tmp21212, i64 1
+ %tmp21214 = getelementptr inbounds float, float* %tmp21213, i64 1
+ %tmp21215 = getelementptr inbounds float, float* %tmp21214, i64 1
+ %tmp21216 = getelementptr inbounds float, float* %tmp21215, i64 1
+ %tmp21217 = getelementptr inbounds float, float* %tmp21216, i64 1
+ %tmp21218 = getelementptr inbounds float, float* %tmp21217, i64 1
+ %tmp21219 = getelementptr inbounds float, float* %tmp21218, i64 1
+ %tmp21220 = getelementptr inbounds float, float* %tmp21219, i64 1
+ %tmp21221 = getelementptr inbounds float, float* %tmp21220, i64 1
+ %tmp21222 = getelementptr inbounds float, float* %tmp21221, i64 1
+ %tmp21223 = getelementptr inbounds float, float* %tmp21222, i64 1
+ %tmp21224 = getelementptr inbounds float, float* %tmp21223, i64 1
+ %tmp21225 = getelementptr inbounds float, float* %tmp21224, i64 1
+ %tmp21226 = getelementptr inbounds float, float* %tmp21225, i64 1
+ %tmp21227 = getelementptr inbounds float, float* %tmp21226, i64 1
+ %tmp21228 = getelementptr inbounds float, float* %tmp21227, i64 1
+ %tmp21229 = getelementptr inbounds float, float* %tmp21228, i64 1
+ %tmp21230 = getelementptr inbounds float, float* %tmp21229, i64 1
+ %tmp21231 = getelementptr inbounds float, float* %tmp21230, i64 1
+ %tmp21232 = getelementptr inbounds float, float* %tmp21231, i64 1
+ %tmp21233 = getelementptr inbounds float, float* %tmp21232, i64 1
+ %tmp21234 = getelementptr inbounds float, float* %tmp21233, i64 1
+ %tmp21235 = getelementptr inbounds float, float* %tmp21234, i64 1
+ %tmp21236 = getelementptr inbounds float, float* %tmp21235, i64 1
+ %tmp21237 = getelementptr inbounds float, float* %tmp21236, i64 1
+ %tmp21238 = getelementptr inbounds float, float* %tmp21237, i64 1
+ %tmp21239 = getelementptr inbounds float, float* %tmp21238, i64 1
+ %tmp21240 = getelementptr inbounds float, float* %tmp21239, i64 1
+ %tmp21241 = getelementptr inbounds float, float* %tmp21240, i64 1
+ %tmp21242 = getelementptr inbounds float, float* %tmp21241, i64 1
+ %tmp21243 = getelementptr inbounds float, float* %tmp21242, i64 1
+ %tmp21244 = getelementptr inbounds float, float* %tmp21243, i64 1
+ %tmp21245 = getelementptr inbounds float, float* %tmp21244, i64 1
+ %tmp21246 = getelementptr inbounds float, float* %tmp21245, i64 1
+ %tmp21247 = getelementptr inbounds float, float* %tmp21246, i64 1
+ %tmp21248 = getelementptr inbounds float, float* %tmp21247, i64 1
+ %tmp21249 = getelementptr inbounds float, float* %tmp21248, i64 1
+ %tmp21250 = getelementptr inbounds float, float* %tmp21249, i64 1
+ %tmp21251 = getelementptr inbounds float, float* %tmp21250, i64 1
+ %tmp21252 = getelementptr inbounds float, float* %tmp21251, i64 1
+ %tmp21253 = getelementptr inbounds float, float* %tmp21252, i64 1
+ %tmp21254 = getelementptr inbounds float, float* %tmp21253, i64 1
+ %tmp21255 = getelementptr inbounds float, float* %tmp21254, i64 1
+ %tmp21256 = getelementptr inbounds float, float* %tmp21255, i64 1
+ %tmp21257 = getelementptr inbounds float, float* %tmp21256, i64 1
+ %tmp21258 = getelementptr inbounds float, float* %tmp21257, i64 1
+ %tmp21259 = getelementptr inbounds float, float* %tmp21258, i64 1
+ %tmp21260 = getelementptr inbounds float, float* %tmp21259, i64 1
+ %tmp21261 = getelementptr inbounds float, float* %tmp21260, i64 1
+ %tmp21262 = getelementptr inbounds float, float* %tmp21261, i64 1
+ %tmp21263 = getelementptr inbounds float, float* %tmp21262, i64 1
+ %tmp21264 = getelementptr inbounds float, float* %tmp21263, i64 1
+ %tmp21265 = getelementptr inbounds float, float* %tmp21264, i64 1
+ %tmp21266 = getelementptr inbounds float, float* %tmp21265, i64 1
+ %tmp21267 = getelementptr inbounds float, float* %tmp21266, i64 1
+ %tmp21268 = getelementptr inbounds float, float* %tmp21267, i64 1
+ %tmp21269 = getelementptr inbounds float, float* %tmp21268, i64 1
+ %tmp21270 = getelementptr inbounds float, float* %tmp21269, i64 1
+ %tmp21271 = getelementptr inbounds float, float* %tmp21270, i64 1
+ %tmp21272 = getelementptr inbounds float, float* %tmp21271, i64 1
+ %tmp21273 = getelementptr inbounds float, float* %tmp21272, i64 1
+ %tmp21274 = getelementptr inbounds float, float* %tmp21273, i64 1
+ %tmp21275 = getelementptr inbounds float, float* %tmp21274, i64 1
+ %tmp21276 = getelementptr inbounds float, float* %tmp21275, i64 1
+ %tmp21277 = getelementptr inbounds float, float* %tmp21276, i64 1
+ %tmp21278 = getelementptr inbounds float, float* %tmp21277, i64 1
+ %tmp21279 = getelementptr inbounds float, float* %tmp21278, i64 1
+ %tmp21280 = getelementptr inbounds float, float* %tmp21279, i64 1
+ %tmp21281 = getelementptr inbounds float, float* %tmp21280, i64 1
+ %tmp21282 = getelementptr inbounds float, float* %tmp21281, i64 1
+ %tmp21283 = getelementptr inbounds float, float* %tmp21282, i64 1
+ %tmp21284 = getelementptr inbounds float, float* %tmp21283, i64 1
+ %tmp21285 = getelementptr inbounds float, float* %tmp21284, i64 1
+ %tmp21286 = getelementptr inbounds float, float* %tmp21285, i64 1
+ %tmp21287 = getelementptr inbounds float, float* %tmp21286, i64 1
+ %tmp21288 = getelementptr inbounds float, float* %tmp21287, i64 1
+ %tmp21289 = getelementptr inbounds float, float* %tmp21288, i64 1
+ %tmp21290 = getelementptr inbounds float, float* %tmp21289, i64 1
+ %tmp21291 = getelementptr inbounds float, float* %tmp21290, i64 1
+ %tmp21292 = getelementptr inbounds float, float* %tmp21291, i64 1
+ %tmp21293 = getelementptr inbounds float, float* %tmp21292, i64 1
+ %tmp21294 = getelementptr inbounds float, float* %tmp21293, i64 1
+ %tmp21295 = getelementptr inbounds float, float* %tmp21294, i64 1
+ %tmp21296 = getelementptr inbounds float, float* %tmp21295, i64 1
+ %tmp21297 = getelementptr inbounds float, float* %tmp21296, i64 1
+ %tmp21298 = getelementptr inbounds float, float* %tmp21297, i64 1
+ %tmp21299 = getelementptr inbounds float, float* %tmp21298, i64 1
+ %tmp21300 = getelementptr inbounds float, float* %tmp21299, i64 1
+ %tmp21301 = getelementptr inbounds float, float* %tmp21300, i64 1
+ %tmp21302 = getelementptr inbounds float, float* %tmp21301, i64 1
+ %tmp21303 = getelementptr inbounds float, float* %tmp21302, i64 1
+ %tmp21304 = getelementptr inbounds float, float* %tmp21303, i64 1
+ %tmp21305 = getelementptr inbounds float, float* %tmp21304, i64 1
+ %tmp21306 = getelementptr inbounds float, float* %tmp21305, i64 1
+ %tmp21307 = getelementptr inbounds float, float* %tmp21306, i64 1
+ %tmp21308 = getelementptr inbounds float, float* %tmp21307, i64 1
+ %tmp21309 = getelementptr inbounds float, float* %tmp21308, i64 1
+ %tmp21310 = getelementptr inbounds float, float* %tmp21309, i64 1
+ %tmp21311 = getelementptr inbounds float, float* %tmp21310, i64 1
+ %tmp21312 = getelementptr inbounds float, float* %tmp21311, i64 1
+ %tmp21313 = getelementptr inbounds float, float* %tmp21312, i64 1
+ %tmp21314 = getelementptr inbounds float, float* %tmp21313, i64 1
+ %tmp21315 = getelementptr inbounds float, float* %tmp21314, i64 1
+ %tmp21316 = getelementptr inbounds float, float* %tmp21315, i64 1
+ %tmp21317 = getelementptr inbounds float, float* %tmp21316, i64 1
+ %tmp21318 = getelementptr inbounds float, float* %tmp21317, i64 1
+ %tmp21319 = getelementptr inbounds float, float* %tmp21318, i64 1
+ %tmp21320 = getelementptr inbounds float, float* %tmp21319, i64 1
+ %tmp21321 = getelementptr inbounds float, float* %tmp21320, i64 1
+ %tmp21322 = getelementptr inbounds float, float* %tmp21321, i64 1
+ %tmp21323 = getelementptr inbounds float, float* %tmp21322, i64 1
+ %tmp21324 = getelementptr inbounds float, float* %tmp21323, i64 1
+ %tmp21325 = getelementptr inbounds float, float* %tmp21324, i64 1
+ %tmp21326 = getelementptr inbounds float, float* %tmp21325, i64 1
+ %tmp21327 = getelementptr inbounds float, float* %tmp21326, i64 1
+ %tmp21328 = getelementptr inbounds float, float* %tmp21327, i64 1
+ %tmp21329 = getelementptr inbounds float, float* %tmp21328, i64 1
+ %tmp21330 = getelementptr inbounds float, float* %tmp21329, i64 1
+ %tmp21331 = getelementptr inbounds float, float* %tmp21330, i64 1
+ %tmp21332 = getelementptr inbounds float, float* %tmp21331, i64 1
+ %tmp21333 = getelementptr inbounds float, float* %tmp21332, i64 1
+ %tmp21334 = getelementptr inbounds float, float* %tmp21333, i64 1
+ %tmp21335 = getelementptr inbounds float, float* %tmp21334, i64 1
+ %tmp21336 = getelementptr inbounds float, float* %tmp21335, i64 1
+ %tmp21337 = getelementptr inbounds float, float* %tmp21336, i64 1
+ %tmp21338 = getelementptr inbounds float, float* %tmp21337, i64 1
+ %tmp21339 = getelementptr inbounds float, float* %tmp21338, i64 1
+ %tmp21340 = getelementptr inbounds float, float* %tmp21339, i64 1
+ %tmp21341 = getelementptr inbounds float, float* %tmp21340, i64 1
+ %tmp21342 = getelementptr inbounds float, float* %tmp21341, i64 1
+ %tmp21343 = getelementptr inbounds float, float* %tmp21342, i64 1
+ %tmp21344 = getelementptr inbounds float, float* %tmp21343, i64 1
+ %tmp21345 = getelementptr inbounds float, float* %tmp21344, i64 1
+ %tmp21346 = getelementptr inbounds float, float* %tmp21345, i64 1
+ %tmp21347 = getelementptr inbounds float, float* %tmp21346, i64 1
+ %tmp21348 = getelementptr inbounds float, float* %tmp21347, i64 1
+ %tmp21349 = getelementptr inbounds float, float* %tmp21348, i64 1
+ %tmp21350 = getelementptr inbounds float, float* %tmp21349, i64 1
+ %tmp21351 = getelementptr inbounds float, float* %tmp21350, i64 1
+ %tmp21352 = getelementptr inbounds float, float* %tmp21351, i64 1
+ %tmp21353 = getelementptr inbounds float, float* %tmp21352, i64 1
+ %tmp21354 = getelementptr inbounds float, float* %tmp21353, i64 1
+ %tmp21355 = getelementptr inbounds float, float* %tmp21354, i64 1
+ %tmp21356 = getelementptr inbounds float, float* %tmp21355, i64 1
+ %tmp21357 = getelementptr inbounds float, float* %tmp21356, i64 1
+ %tmp21358 = getelementptr inbounds float, float* %tmp21357, i64 1
+ %tmp21359 = getelementptr inbounds float, float* %tmp21358, i64 1
+ %tmp21360 = getelementptr inbounds float, float* %tmp21359, i64 1
+ %tmp21361 = getelementptr inbounds float, float* %tmp21360, i64 1
+ %tmp21362 = getelementptr inbounds float, float* %tmp21361, i64 1
+ %tmp21363 = getelementptr inbounds float, float* %tmp21362, i64 1
+ %tmp21364 = getelementptr inbounds float, float* %tmp21363, i64 1
+ %tmp21365 = getelementptr inbounds float, float* %tmp21364, i64 1
+ %tmp21366 = getelementptr inbounds float, float* %tmp21365, i64 1
+ %tmp21367 = getelementptr inbounds float, float* %tmp21366, i64 1
+ %tmp21368 = getelementptr inbounds float, float* %tmp21367, i64 1
+ %tmp21369 = getelementptr inbounds float, float* %tmp21368, i64 1
+ %tmp21370 = getelementptr inbounds float, float* %tmp21369, i64 1
+ %tmp21371 = getelementptr inbounds float, float* %tmp21370, i64 1
+ %tmp21372 = getelementptr inbounds float, float* %tmp21371, i64 1
+ %tmp21373 = getelementptr inbounds float, float* %tmp21372, i64 1
+ %tmp21374 = getelementptr inbounds float, float* %tmp21373, i64 1
+ %tmp21375 = getelementptr inbounds float, float* %tmp21374, i64 1
+ %tmp21376 = getelementptr inbounds float, float* %tmp21375, i64 1
+ %tmp21377 = getelementptr inbounds float, float* %tmp21376, i64 1
+ %tmp21378 = getelementptr inbounds float, float* %tmp21377, i64 1
+ %tmp21379 = getelementptr inbounds float, float* %tmp21378, i64 1
+ %tmp21380 = getelementptr inbounds float, float* %tmp21379, i64 1
+ %tmp21381 = getelementptr inbounds float, float* %tmp21380, i64 1
+ %tmp21382 = getelementptr inbounds float, float* %tmp21381, i64 1
+ %tmp21383 = getelementptr inbounds float, float* %tmp21382, i64 1
+ %tmp21384 = getelementptr inbounds float, float* %tmp21383, i64 1
+ %tmp21385 = getelementptr inbounds float, float* %tmp21384, i64 1
+ %tmp21386 = getelementptr inbounds float, float* %tmp21385, i64 1
+ %tmp21387 = getelementptr inbounds float, float* %tmp21386, i64 1
+ %tmp21388 = getelementptr inbounds float, float* %tmp21387, i64 1
+ %tmp21389 = getelementptr inbounds float, float* %tmp21388, i64 1
+ %tmp21390 = getelementptr inbounds float, float* %tmp21389, i64 1
+ %tmp21391 = getelementptr inbounds float, float* %tmp21390, i64 1
+ %tmp21392 = getelementptr inbounds float, float* %tmp21391, i64 1
+ %tmp21393 = getelementptr inbounds float, float* %tmp21392, i64 1
+ %tmp21394 = getelementptr inbounds float, float* %tmp21393, i64 1
+ %tmp21395 = getelementptr inbounds float, float* %tmp21394, i64 1
+ %tmp21396 = getelementptr inbounds float, float* %tmp21395, i64 1
+ %tmp21397 = getelementptr inbounds float, float* %tmp21396, i64 1
+ %tmp21398 = getelementptr inbounds float, float* %tmp21397, i64 1
+ %tmp21399 = getelementptr inbounds float, float* %tmp21398, i64 1
+ %tmp21400 = getelementptr inbounds float, float* %tmp21399, i64 1
+ %tmp21401 = getelementptr inbounds float, float* %tmp21400, i64 1
+ %tmp21402 = getelementptr inbounds float, float* %tmp21401, i64 1
+ %tmp21403 = getelementptr inbounds float, float* %tmp21402, i64 1
+ %tmp21404 = getelementptr inbounds float, float* %tmp21403, i64 1
+ %tmp21405 = getelementptr inbounds float, float* %tmp21404, i64 1
+ %tmp21406 = getelementptr inbounds float, float* %tmp21405, i64 1
+ %tmp21407 = getelementptr inbounds float, float* %tmp21406, i64 1
+ %tmp21408 = getelementptr inbounds float, float* %tmp21407, i64 1
+ %tmp21409 = getelementptr inbounds float, float* %tmp21408, i64 1
+ %tmp21410 = getelementptr inbounds float, float* %tmp21409, i64 1
+ %tmp21411 = getelementptr inbounds float, float* %tmp21410, i64 1
+ %tmp21412 = getelementptr inbounds float, float* %tmp21411, i64 1
+ %tmp21413 = getelementptr inbounds float, float* %tmp21412, i64 1
+ %tmp21414 = getelementptr inbounds float, float* %tmp21413, i64 1
+ %tmp21415 = getelementptr inbounds float, float* %tmp21414, i64 1
+ %tmp21416 = getelementptr inbounds float, float* %tmp21415, i64 1
+ %tmp21417 = getelementptr inbounds float, float* %tmp21416, i64 1
+ %tmp21418 = getelementptr inbounds float, float* %tmp21417, i64 1
+ %tmp21419 = getelementptr inbounds float, float* %tmp21418, i64 1
+ %tmp21420 = getelementptr inbounds float, float* %tmp21419, i64 1
+ %tmp21421 = getelementptr inbounds float, float* %tmp21420, i64 1
+ %tmp21422 = getelementptr inbounds float, float* %tmp21421, i64 1
+ %tmp21423 = getelementptr inbounds float, float* %tmp21422, i64 1
+ %tmp21424 = getelementptr inbounds float, float* %tmp21423, i64 1
+ %tmp21425 = getelementptr inbounds float, float* %tmp21424, i64 1
+ %tmp21426 = getelementptr inbounds float, float* %tmp21425, i64 1
+ %tmp21427 = getelementptr inbounds float, float* %tmp21426, i64 1
+ %tmp21428 = getelementptr inbounds float, float* %tmp21427, i64 1
+ %tmp21429 = getelementptr inbounds float, float* %tmp21428, i64 1
+ %tmp21430 = getelementptr inbounds float, float* %tmp21429, i64 1
+ %tmp21431 = getelementptr inbounds float, float* %tmp21430, i64 1
+ %tmp21432 = getelementptr inbounds float, float* %tmp21431, i64 1
+ %tmp21433 = getelementptr inbounds float, float* %tmp21432, i64 1
+ %tmp21434 = getelementptr inbounds float, float* %tmp21433, i64 1
+ %tmp21435 = getelementptr inbounds float, float* %tmp21434, i64 1
+ %tmp21436 = getelementptr inbounds float, float* %tmp21435, i64 1
+ %tmp21437 = getelementptr inbounds float, float* %tmp21436, i64 1
+ %tmp21438 = getelementptr inbounds float, float* %tmp21437, i64 1
+ %tmp21439 = getelementptr inbounds float, float* %tmp21438, i64 1
+ %tmp21440 = getelementptr inbounds float, float* %tmp21439, i64 1
+ %tmp21441 = getelementptr inbounds float, float* %tmp21440, i64 1
+ %tmp21442 = getelementptr inbounds float, float* %tmp21441, i64 1
+ %tmp21443 = getelementptr inbounds float, float* %tmp21442, i64 1
+ %tmp21444 = getelementptr inbounds float, float* %tmp21443, i64 1
+ %tmp21445 = getelementptr inbounds float, float* %tmp21444, i64 1
+ %tmp21446 = getelementptr inbounds float, float* %tmp21445, i64 1
+ %tmp21447 = getelementptr inbounds float, float* %tmp21446, i64 1
+ %tmp21448 = getelementptr inbounds float, float* %tmp21447, i64 1
+ %tmp21449 = getelementptr inbounds float, float* %tmp21448, i64 1
+ %tmp21450 = getelementptr inbounds float, float* %tmp21449, i64 1
+ %tmp21451 = getelementptr inbounds float, float* %tmp21450, i64 1
+ %tmp21452 = getelementptr inbounds float, float* %tmp21451, i64 1
+ %tmp21453 = getelementptr inbounds float, float* %tmp21452, i64 1
+ %tmp21454 = getelementptr inbounds float, float* %tmp21453, i64 1
+ %tmp21455 = getelementptr inbounds float, float* %tmp21454, i64 1
+ %tmp21456 = getelementptr inbounds float, float* %tmp21455, i64 1
+ %tmp21457 = getelementptr inbounds float, float* %tmp21456, i64 1
+ %tmp21458 = getelementptr inbounds float, float* %tmp21457, i64 1
+ %tmp21459 = getelementptr inbounds float, float* %tmp21458, i64 1
+ %tmp21460 = getelementptr inbounds float, float* %tmp21459, i64 1
+ %tmp21461 = getelementptr inbounds float, float* %tmp21460, i64 1
+ %tmp21462 = getelementptr inbounds float, float* %tmp21461, i64 1
+ %tmp21463 = getelementptr inbounds float, float* %tmp21462, i64 1
+ %tmp21464 = getelementptr inbounds float, float* %tmp21463, i64 1
+ %tmp21465 = getelementptr inbounds float, float* %tmp21464, i64 1
+ %tmp21466 = getelementptr inbounds float, float* %tmp21465, i64 1
+ %tmp21467 = getelementptr inbounds float, float* %tmp21466, i64 1
+ %tmp21468 = getelementptr inbounds float, float* %tmp21467, i64 1
+ %tmp21469 = getelementptr inbounds float, float* %tmp21468, i64 1
+ %tmp21470 = getelementptr inbounds float, float* %tmp21469, i64 1
+ %tmp21471 = getelementptr inbounds float, float* %tmp21470, i64 1
+ %tmp21472 = getelementptr inbounds float, float* %tmp21471, i64 1
+ %tmp21473 = getelementptr inbounds float, float* %tmp21472, i64 1
+ %tmp21474 = getelementptr inbounds float, float* %tmp21473, i64 1
+ %tmp21475 = getelementptr inbounds float, float* %tmp21474, i64 1
+ %tmp21476 = getelementptr inbounds float, float* %tmp21475, i64 1
+ %tmp21477 = getelementptr inbounds float, float* %tmp21476, i64 1
+ %tmp21478 = getelementptr inbounds float, float* %tmp21477, i64 1
+ %tmp21479 = getelementptr inbounds float, float* %tmp21478, i64 1
+ %tmp21480 = getelementptr inbounds float, float* %tmp21479, i64 1
+ %tmp21481 = getelementptr inbounds float, float* %tmp21480, i64 1
+ %tmp21482 = getelementptr inbounds float, float* %tmp21481, i64 1
+ %tmp21483 = getelementptr inbounds float, float* %tmp21482, i64 1
+ %tmp21484 = getelementptr inbounds float, float* %tmp21483, i64 1
+ %tmp21485 = getelementptr inbounds float, float* %tmp21484, i64 1
+ %tmp21486 = getelementptr inbounds float, float* %tmp21485, i64 1
+ %tmp21487 = getelementptr inbounds float, float* %tmp21486, i64 1
+ %tmp21488 = getelementptr inbounds float, float* %tmp21487, i64 1
+ %tmp21489 = getelementptr inbounds float, float* %tmp21488, i64 1
+ %tmp21490 = getelementptr inbounds float, float* %tmp21489, i64 1
+ %tmp21491 = getelementptr inbounds float, float* %tmp21490, i64 1
+ %tmp21492 = getelementptr inbounds float, float* %tmp21491, i64 1
+ %tmp21493 = getelementptr inbounds float, float* %tmp21492, i64 1
+ %tmp21494 = getelementptr inbounds float, float* %tmp21493, i64 1
+ %tmp21495 = getelementptr inbounds float, float* %tmp21494, i64 1
+ %tmp21496 = getelementptr inbounds float, float* %tmp21495, i64 1
+ %tmp21497 = getelementptr inbounds float, float* %tmp21496, i64 1
+ %tmp21498 = getelementptr inbounds float, float* %tmp21497, i64 1
+ %tmp21499 = getelementptr inbounds float, float* %tmp21498, i64 1
+ %tmp21500 = getelementptr inbounds float, float* %tmp21499, i64 1
+ %tmp21501 = getelementptr inbounds float, float* %tmp21500, i64 1
+ %tmp21502 = getelementptr inbounds float, float* %tmp21501, i64 1
+ %tmp21503 = getelementptr inbounds float, float* %tmp21502, i64 1
+ %tmp21504 = getelementptr inbounds float, float* %tmp21503, i64 1
+ %tmp21505 = getelementptr inbounds float, float* %tmp21504, i64 1
+ %tmp21506 = getelementptr inbounds float, float* %tmp21505, i64 1
+ %tmp21507 = getelementptr inbounds float, float* %tmp21506, i64 1
+ %tmp21508 = getelementptr inbounds float, float* %tmp21507, i64 1
+ %tmp21509 = getelementptr inbounds float, float* %tmp21508, i64 1
+ %tmp21510 = getelementptr inbounds float, float* %tmp21509, i64 1
+ %tmp21511 = getelementptr inbounds float, float* %tmp21510, i64 1
+ %tmp21512 = getelementptr inbounds float, float* %tmp21511, i64 1
+ %tmp21513 = getelementptr inbounds float, float* %tmp21512, i64 1
+ %tmp21514 = getelementptr inbounds float, float* %tmp21513, i64 1
+ %tmp21515 = getelementptr inbounds float, float* %tmp21514, i64 1
+ %tmp21516 = getelementptr inbounds float, float* %tmp21515, i64 1
+ %tmp21517 = getelementptr inbounds float, float* %tmp21516, i64 1
+ %tmp21518 = getelementptr inbounds float, float* %tmp21517, i64 1
+ %tmp21519 = getelementptr inbounds float, float* %tmp21518, i64 1
+ %tmp21520 = getelementptr inbounds float, float* %tmp21519, i64 1
+ %tmp21521 = getelementptr inbounds float, float* %tmp21520, i64 1
+ %tmp21522 = getelementptr inbounds float, float* %tmp21521, i64 1
+ %tmp21523 = getelementptr inbounds float, float* %tmp21522, i64 1
+ %tmp21524 = getelementptr inbounds float, float* %tmp21523, i64 1
+ %tmp21525 = getelementptr inbounds float, float* %tmp21524, i64 1
+ %tmp21526 = getelementptr inbounds float, float* %tmp21525, i64 1
+ %tmp21527 = getelementptr inbounds float, float* %tmp21526, i64 1
+ %tmp21528 = getelementptr inbounds float, float* %tmp21527, i64 1
+ %tmp21529 = getelementptr inbounds float, float* %tmp21528, i64 1
+ %tmp21530 = getelementptr inbounds float, float* %tmp21529, i64 1
+ %tmp21531 = getelementptr inbounds float, float* %tmp21530, i64 1
+ %tmp21532 = getelementptr inbounds float, float* %tmp21531, i64 1
+ %tmp21533 = getelementptr inbounds float, float* %tmp21532, i64 1
+ %tmp21534 = getelementptr inbounds float, float* %tmp21533, i64 1
+ %tmp21535 = getelementptr inbounds float, float* %tmp21534, i64 1
+ %tmp21536 = getelementptr inbounds float, float* %tmp21535, i64 1
+ %tmp21537 = getelementptr inbounds float, float* %tmp21536, i64 1
+ %tmp21538 = getelementptr inbounds float, float* %tmp21537, i64 1
+ %tmp21539 = getelementptr inbounds float, float* %tmp21538, i64 1
+ %tmp21540 = getelementptr inbounds float, float* %tmp21539, i64 1
+ %tmp21541 = getelementptr inbounds float, float* %tmp21540, i64 1
+ %tmp21542 = getelementptr inbounds float, float* %tmp21541, i64 1
+ %tmp21543 = getelementptr inbounds float, float* %tmp21542, i64 1
+ %tmp21544 = getelementptr inbounds float, float* %tmp21543, i64 1
+ %tmp21545 = getelementptr inbounds float, float* %tmp21544, i64 1
+ %tmp21546 = getelementptr inbounds float, float* %tmp21545, i64 1
+ %tmp21547 = getelementptr inbounds float, float* %tmp21546, i64 1
+ %tmp21548 = getelementptr inbounds float, float* %tmp21547, i64 1
+ %tmp21549 = getelementptr inbounds float, float* %tmp21548, i64 1
+ %tmp21550 = getelementptr inbounds float, float* %tmp21549, i64 1
+ %tmp21551 = getelementptr inbounds float, float* %tmp21550, i64 1
+ %tmp21552 = getelementptr inbounds float, float* %tmp21551, i64 1
+ %tmp21553 = getelementptr inbounds float, float* %tmp21552, i64 1
+ %tmp21554 = getelementptr inbounds float, float* %tmp21553, i64 1
+ %tmp21555 = getelementptr inbounds float, float* %tmp21554, i64 1
+ %tmp21556 = getelementptr inbounds float, float* %tmp21555, i64 1
+ %tmp21557 = getelementptr inbounds float, float* %tmp21556, i64 1
+ %tmp21558 = getelementptr inbounds float, float* %tmp21557, i64 1
+ %tmp21559 = getelementptr inbounds float, float* %tmp21558, i64 1
+ %tmp21560 = getelementptr inbounds float, float* %tmp21559, i64 1
+ %tmp21561 = getelementptr inbounds float, float* %tmp21560, i64 1
+ %tmp21562 = getelementptr inbounds float, float* %tmp21561, i64 1
+ %tmp21563 = getelementptr inbounds float, float* %tmp21562, i64 1
+ %tmp21564 = getelementptr inbounds float, float* %tmp21563, i64 1
+ %tmp21565 = getelementptr inbounds float, float* %tmp21564, i64 1
+ %tmp21566 = getelementptr inbounds float, float* %tmp21565, i64 1
+ %tmp21567 = getelementptr inbounds float, float* %tmp21566, i64 1
+ %tmp21568 = getelementptr inbounds float, float* %tmp21567, i64 1
+ %tmp21569 = getelementptr inbounds float, float* %tmp21568, i64 1
+ %tmp21570 = getelementptr inbounds float, float* %tmp21569, i64 1
+ %tmp21571 = getelementptr inbounds float, float* %tmp21570, i64 1
+ %tmp21572 = getelementptr inbounds float, float* %tmp21571, i64 1
+ %tmp21573 = getelementptr inbounds float, float* %tmp21572, i64 1
+ %tmp21574 = getelementptr inbounds float, float* %tmp21573, i64 1
+ %tmp21575 = getelementptr inbounds float, float* %tmp21574, i64 1
+ %tmp21576 = getelementptr inbounds float, float* %tmp21575, i64 1
+ %tmp21577 = getelementptr inbounds float, float* %tmp21576, i64 1
+ %tmp21578 = getelementptr inbounds float, float* %tmp21577, i64 1
+ %tmp21579 = getelementptr inbounds float, float* %tmp21578, i64 1
+ %tmp21580 = getelementptr inbounds float, float* %tmp21579, i64 1
+ %tmp21581 = getelementptr inbounds float, float* %tmp21580, i64 1
+ %tmp21582 = getelementptr inbounds float, float* %tmp21581, i64 1
+ %tmp21583 = getelementptr inbounds float, float* %tmp21582, i64 1
+ %tmp21584 = getelementptr inbounds float, float* %tmp21583, i64 1
+ %tmp21585 = getelementptr inbounds float, float* %tmp21584, i64 1
+ %tmp21586 = getelementptr inbounds float, float* %tmp21585, i64 1
+ %tmp21587 = getelementptr inbounds float, float* %tmp21586, i64 1
+ %tmp21588 = getelementptr inbounds float, float* %tmp21587, i64 1
+ %tmp21589 = getelementptr inbounds float, float* %tmp21588, i64 1
+ %tmp21590 = getelementptr inbounds float, float* %tmp21589, i64 1
+ %tmp21591 = getelementptr inbounds float, float* %tmp21590, i64 1
+ %tmp21592 = getelementptr inbounds float, float* %tmp21591, i64 1
+ %tmp21593 = getelementptr inbounds float, float* %tmp21592, i64 1
+ %tmp21594 = getelementptr inbounds float, float* %tmp21593, i64 1
+ %tmp21595 = getelementptr inbounds float, float* %tmp21594, i64 1
+ %tmp21596 = getelementptr inbounds float, float* %tmp21595, i64 1
+ %tmp21597 = getelementptr inbounds float, float* %tmp21596, i64 1
+ %tmp21598 = getelementptr inbounds float, float* %tmp21597, i64 1
+ %tmp21599 = getelementptr inbounds float, float* %tmp21598, i64 1
+ %tmp21600 = getelementptr inbounds float, float* %tmp21599, i64 1
+ %tmp21601 = getelementptr inbounds float, float* %tmp21600, i64 1
+ %tmp21602 = getelementptr inbounds float, float* %tmp21601, i64 1
+ %tmp21603 = getelementptr inbounds float, float* %tmp21602, i64 1
+ %tmp21604 = getelementptr inbounds float, float* %tmp21603, i64 1
+ %tmp21605 = getelementptr inbounds float, float* %tmp21604, i64 1
+ %tmp21606 = getelementptr inbounds float, float* %tmp21605, i64 1
+ %tmp21607 = getelementptr inbounds float, float* %tmp21606, i64 1
+ %tmp21608 = getelementptr inbounds float, float* %tmp21607, i64 1
+ %tmp21609 = getelementptr inbounds float, float* %tmp21608, i64 1
+ %tmp21610 = getelementptr inbounds float, float* %tmp21609, i64 1
+ %tmp21611 = getelementptr inbounds float, float* %tmp21610, i64 1
+ %tmp21612 = getelementptr inbounds float, float* %tmp21611, i64 1
+ %tmp21613 = getelementptr inbounds float, float* %tmp21612, i64 1
+ %tmp21614 = getelementptr inbounds float, float* %tmp21613, i64 1
+ %tmp21615 = getelementptr inbounds float, float* %tmp21614, i64 1
+ %tmp21616 = getelementptr inbounds float, float* %tmp21615, i64 1
+ %tmp21617 = getelementptr inbounds float, float* %tmp21616, i64 1
+ %tmp21618 = getelementptr inbounds float, float* %tmp21617, i64 1
+ %tmp21619 = getelementptr inbounds float, float* %tmp21618, i64 1
+ %tmp21620 = getelementptr inbounds float, float* %tmp21619, i64 1
+ %tmp21621 = getelementptr inbounds float, float* %tmp21620, i64 1
+ %tmp21622 = getelementptr inbounds float, float* %tmp21621, i64 1
+ %tmp21623 = getelementptr inbounds float, float* %tmp21622, i64 1
+ %tmp21624 = getelementptr inbounds float, float* %tmp21623, i64 1
+ %tmp21625 = getelementptr inbounds float, float* %tmp21624, i64 1
+ %tmp21626 = getelementptr inbounds float, float* %tmp21625, i64 1
+ %tmp21627 = getelementptr inbounds float, float* %tmp21626, i64 1
+ %tmp21628 = getelementptr inbounds float, float* %tmp21627, i64 1
+ %tmp21629 = getelementptr inbounds float, float* %tmp21628, i64 1
+ %tmp21630 = getelementptr inbounds float, float* %tmp21629, i64 1
+ %tmp21631 = getelementptr inbounds float, float* %tmp21630, i64 1
+ %tmp21632 = getelementptr inbounds float, float* %tmp21631, i64 1
+ %tmp21633 = getelementptr inbounds float, float* %tmp21632, i64 1
+ %tmp21634 = getelementptr inbounds float, float* %tmp21633, i64 1
+ %tmp21635 = getelementptr inbounds float, float* %tmp21634, i64 1
+ %tmp21636 = getelementptr inbounds float, float* %tmp21635, i64 1
+ %tmp21637 = getelementptr inbounds float, float* %tmp21636, i64 1
+ %tmp21638 = getelementptr inbounds float, float* %tmp21637, i64 1
+ %tmp21639 = getelementptr inbounds float, float* %tmp21638, i64 1
+ %tmp21640 = getelementptr inbounds float, float* %tmp21639, i64 1
+ %tmp21641 = getelementptr inbounds float, float* %tmp21640, i64 1
+ %tmp21642 = getelementptr inbounds float, float* %tmp21641, i64 1
+ %tmp21643 = getelementptr inbounds float, float* %tmp21642, i64 1
+ %tmp21644 = getelementptr inbounds float, float* %tmp21643, i64 1
+ %tmp21645 = getelementptr inbounds float, float* %tmp21644, i64 1
+ %tmp21646 = getelementptr inbounds float, float* %tmp21645, i64 1
+ %tmp21647 = getelementptr inbounds float, float* %tmp21646, i64 1
+ %tmp21648 = getelementptr inbounds float, float* %tmp21647, i64 1
+ %tmp21649 = getelementptr inbounds float, float* %tmp21648, i64 1
+ %tmp21650 = getelementptr inbounds float, float* %tmp21649, i64 1
+ %tmp21651 = getelementptr inbounds float, float* %tmp21650, i64 1
+ %tmp21652 = getelementptr inbounds float, float* %tmp21651, i64 1
+ %tmp21653 = getelementptr inbounds float, float* %tmp21652, i64 1
+ %tmp21654 = getelementptr inbounds float, float* %tmp21653, i64 1
+ %tmp21655 = getelementptr inbounds float, float* %tmp21654, i64 1
+ %tmp21656 = getelementptr inbounds float, float* %tmp21655, i64 1
+ %tmp21657 = getelementptr inbounds float, float* %tmp21656, i64 1
+ %tmp21658 = getelementptr inbounds float, float* %tmp21657, i64 1
+ %tmp21659 = getelementptr inbounds float, float* %tmp21658, i64 1
+ %tmp21660 = getelementptr inbounds float, float* %tmp21659, i64 1
+ %tmp21661 = getelementptr inbounds float, float* %tmp21660, i64 1
+ %tmp21662 = getelementptr inbounds float, float* %tmp21661, i64 1
+ %tmp21663 = getelementptr inbounds float, float* %tmp21662, i64 1
+ %tmp21664 = getelementptr inbounds float, float* %tmp21663, i64 1
+ %tmp21665 = getelementptr inbounds float, float* %tmp21664, i64 1
+ %tmp21666 = getelementptr inbounds float, float* %tmp21665, i64 1
+ %tmp21667 = getelementptr inbounds float, float* %tmp21666, i64 1
+ %tmp21668 = getelementptr inbounds float, float* %tmp21667, i64 1
+ %tmp21669 = getelementptr inbounds float, float* %tmp21668, i64 1
+ %tmp21670 = getelementptr inbounds float, float* %tmp21669, i64 1
+ %tmp21671 = getelementptr inbounds float, float* %tmp21670, i64 1
+ %tmp21672 = getelementptr inbounds float, float* %tmp21671, i64 1
+ %tmp21673 = getelementptr inbounds float, float* %tmp21672, i64 1
+ %tmp21674 = getelementptr inbounds float, float* %tmp21673, i64 1
+ %tmp21675 = getelementptr inbounds float, float* %tmp21674, i64 1
+ %tmp21676 = getelementptr inbounds float, float* %tmp21675, i64 1
+ %tmp21677 = getelementptr inbounds float, float* %tmp21676, i64 1
+ %tmp21678 = getelementptr inbounds float, float* %tmp21677, i64 1
+ %tmp21679 = getelementptr inbounds float, float* %tmp21678, i64 1
+ %tmp21680 = getelementptr inbounds float, float* %tmp21679, i64 1
+ %tmp21681 = getelementptr inbounds float, float* %tmp21680, i64 1
+ %tmp21682 = getelementptr inbounds float, float* %tmp21681, i64 1
+ %tmp21683 = getelementptr inbounds float, float* %tmp21682, i64 1
+ %tmp21684 = getelementptr inbounds float, float* %tmp21683, i64 1
+ %tmp21685 = getelementptr inbounds float, float* %tmp21684, i64 1
+ %tmp21686 = getelementptr inbounds float, float* %tmp21685, i64 1
+ %tmp21687 = getelementptr inbounds float, float* %tmp21686, i64 1
+ %tmp21688 = getelementptr inbounds float, float* %tmp21687, i64 1
+ %tmp21689 = getelementptr inbounds float, float* %tmp21688, i64 1
+ %tmp21690 = getelementptr inbounds float, float* %tmp21689, i64 1
+ %tmp21691 = getelementptr inbounds float, float* %tmp21690, i64 1
+ %tmp21692 = getelementptr inbounds float, float* %tmp21691, i64 1
+ %tmp21693 = getelementptr inbounds float, float* %tmp21692, i64 1
+ %tmp21694 = getelementptr inbounds float, float* %tmp21693, i64 1
+ %tmp21695 = getelementptr inbounds float, float* %tmp21694, i64 1
+ %tmp21696 = getelementptr inbounds float, float* %tmp21695, i64 1
+ %tmp21697 = getelementptr inbounds float, float* %tmp21696, i64 1
+ %tmp21698 = getelementptr inbounds float, float* %tmp21697, i64 1
+ %tmp21699 = getelementptr inbounds float, float* %tmp21698, i64 1
+ %tmp21700 = getelementptr inbounds float, float* %tmp21699, i64 1
+ %tmp21701 = getelementptr inbounds float, float* %tmp21700, i64 1
+ %tmp21702 = getelementptr inbounds float, float* %tmp21701, i64 1
+ %tmp21703 = getelementptr inbounds float, float* %tmp21702, i64 1
+ %tmp21704 = getelementptr inbounds float, float* %tmp21703, i64 1
+ %tmp21705 = getelementptr inbounds float, float* %tmp21704, i64 1
+ %tmp21706 = getelementptr inbounds float, float* %tmp21705, i64 1
+ %tmp21707 = getelementptr inbounds float, float* %tmp21706, i64 1
+ %tmp21708 = getelementptr inbounds float, float* %tmp21707, i64 1
+ %tmp21709 = getelementptr inbounds float, float* %tmp21708, i64 1
+ %tmp21710 = getelementptr inbounds float, float* %tmp21709, i64 1
+ %tmp21711 = getelementptr inbounds float, float* %tmp21710, i64 1
+ %tmp21712 = getelementptr inbounds float, float* %tmp21711, i64 1
+ %tmp21713 = getelementptr inbounds float, float* %tmp21712, i64 1
+ %tmp21714 = getelementptr inbounds float, float* %tmp21713, i64 1
+ %tmp21715 = getelementptr inbounds float, float* %tmp21714, i64 1
+ %tmp21716 = getelementptr inbounds float, float* %tmp21715, i64 1
+ %tmp21717 = getelementptr inbounds float, float* %tmp21716, i64 1
+ %tmp21718 = getelementptr inbounds float, float* %tmp21717, i64 1
+ %tmp21719 = getelementptr inbounds float, float* %tmp21718, i64 1
+ %tmp21720 = getelementptr inbounds float, float* %tmp21719, i64 1
+ %tmp21721 = getelementptr inbounds float, float* %tmp21720, i64 1
+ %tmp21722 = getelementptr inbounds float, float* %tmp21721, i64 1
+ %tmp21723 = getelementptr inbounds float, float* %tmp21722, i64 1
+ %tmp21724 = getelementptr inbounds float, float* %tmp21723, i64 1
+ %tmp21725 = getelementptr inbounds float, float* %tmp21724, i64 1
+ %tmp21726 = getelementptr inbounds float, float* %tmp21725, i64 1
+ %tmp21727 = getelementptr inbounds float, float* %tmp21726, i64 1
+ %tmp21728 = getelementptr inbounds float, float* %tmp21727, i64 1
+ %tmp21729 = getelementptr inbounds float, float* %tmp21728, i64 1
+ %tmp21730 = getelementptr inbounds float, float* %tmp21729, i64 1
+ %tmp21731 = getelementptr inbounds float, float* %tmp21730, i64 1
+ %tmp21732 = getelementptr inbounds float, float* %tmp21731, i64 1
+ %tmp21733 = getelementptr inbounds float, float* %tmp21732, i64 1
+ %tmp21734 = getelementptr inbounds float, float* %tmp21733, i64 1
+ %tmp21735 = getelementptr inbounds float, float* %tmp21734, i64 1
+ %tmp21736 = getelementptr inbounds float, float* %tmp21735, i64 1
+ %tmp21737 = getelementptr inbounds float, float* %tmp21736, i64 1
+ %tmp21738 = getelementptr inbounds float, float* %tmp21737, i64 1
+ %tmp21739 = getelementptr inbounds float, float* %tmp21738, i64 1
+ %tmp21740 = getelementptr inbounds float, float* %tmp21739, i64 1
+ %tmp21741 = getelementptr inbounds float, float* %tmp21740, i64 1
+ %tmp21742 = getelementptr inbounds float, float* %tmp21741, i64 1
+ %tmp21743 = getelementptr inbounds float, float* %tmp21742, i64 1
+ %tmp21744 = getelementptr inbounds float, float* %tmp21743, i64 1
+ %tmp21745 = getelementptr inbounds float, float* %tmp21744, i64 1
+ %tmp21746 = getelementptr inbounds float, float* %tmp21745, i64 1
+ %tmp21747 = getelementptr inbounds float, float* %tmp21746, i64 1
+ %tmp21748 = getelementptr inbounds float, float* %tmp21747, i64 1
+ %tmp21749 = getelementptr inbounds float, float* %tmp21748, i64 1
+ %tmp21750 = getelementptr inbounds float, float* %tmp21749, i64 1
+ %tmp21751 = getelementptr inbounds float, float* %tmp21750, i64 1
+ %tmp21752 = getelementptr inbounds float, float* %tmp21751, i64 1
+ %tmp21753 = getelementptr inbounds float, float* %tmp21752, i64 1
+ %tmp21754 = getelementptr inbounds float, float* %tmp21753, i64 1
+ %tmp21755 = getelementptr inbounds float, float* %tmp21754, i64 1
+ %tmp21756 = getelementptr inbounds float, float* %tmp21755, i64 1
+ %tmp21757 = getelementptr inbounds float, float* %tmp21756, i64 1
+ %tmp21758 = getelementptr inbounds float, float* %tmp21757, i64 1
+ %tmp21759 = getelementptr inbounds float, float* %tmp21758, i64 1
+ %tmp21760 = getelementptr inbounds float, float* %tmp21759, i64 1
+ %tmp21761 = getelementptr inbounds float, float* %tmp21760, i64 1
+ %tmp21762 = getelementptr inbounds float, float* %tmp21761, i64 1
+ %tmp21763 = getelementptr inbounds float, float* %tmp21762, i64 1
+ %tmp21764 = getelementptr inbounds float, float* %tmp21763, i64 1
+ %tmp21765 = getelementptr inbounds float, float* %tmp21764, i64 1
+ %tmp21766 = getelementptr inbounds float, float* %tmp21765, i64 1
+ %tmp21767 = getelementptr inbounds float, float* %tmp21766, i64 1
+ %tmp21768 = getelementptr inbounds float, float* %tmp21767, i64 1
+ %tmp21769 = getelementptr inbounds float, float* %tmp21768, i64 1
+ %tmp21770 = getelementptr inbounds float, float* %tmp21769, i64 1
+ %tmp21771 = getelementptr inbounds float, float* %tmp21770, i64 1
+ %tmp21772 = getelementptr inbounds float, float* %tmp21771, i64 1
+ %tmp21773 = getelementptr inbounds float, float* %tmp21772, i64 1
+ %tmp21774 = getelementptr inbounds float, float* %tmp21773, i64 1
+ %tmp21775 = getelementptr inbounds float, float* %tmp21774, i64 1
+ %tmp21776 = getelementptr inbounds float, float* %tmp21775, i64 1
+ %tmp21777 = getelementptr inbounds float, float* %tmp21776, i64 1
+ %tmp21778 = getelementptr inbounds float, float* %tmp21777, i64 1
+ %tmp21779 = getelementptr inbounds float, float* %tmp21778, i64 1
+ %tmp21780 = getelementptr inbounds float, float* %tmp21779, i64 1
+ %tmp21781 = getelementptr inbounds float, float* %tmp21780, i64 1
+ %tmp21782 = getelementptr inbounds float, float* %tmp21781, i64 1
+ %tmp21783 = getelementptr inbounds float, float* %tmp21782, i64 1
+ %tmp21784 = getelementptr inbounds float, float* %tmp21783, i64 1
+ %tmp21785 = getelementptr inbounds float, float* %tmp21784, i64 1
+ %tmp21786 = getelementptr inbounds float, float* %tmp21785, i64 1
+ %tmp21787 = getelementptr inbounds float, float* %tmp21786, i64 1
+ %tmp21788 = getelementptr inbounds float, float* %tmp21787, i64 1
+ %tmp21789 = getelementptr inbounds float, float* %tmp21788, i64 1
+ %tmp21790 = getelementptr inbounds float, float* %tmp21789, i64 1
+ %tmp21791 = getelementptr inbounds float, float* %tmp21790, i64 1
+ %tmp21792 = getelementptr inbounds float, float* %tmp21791, i64 1
+ %tmp21793 = getelementptr inbounds float, float* %tmp21792, i64 1
+ %tmp21794 = getelementptr inbounds float, float* %tmp21793, i64 1
+ %tmp21795 = getelementptr inbounds float, float* %tmp21794, i64 1
+ %tmp21796 = getelementptr inbounds float, float* %tmp21795, i64 1
+ %tmp21797 = getelementptr inbounds float, float* %tmp21796, i64 1
+ %tmp21798 = getelementptr inbounds float, float* %tmp21797, i64 1
+ %tmp21799 = getelementptr inbounds float, float* %tmp21798, i64 1
+ %tmp21800 = getelementptr inbounds float, float* %tmp21799, i64 1
+ %tmp21801 = getelementptr inbounds float, float* %tmp21800, i64 1
+ %tmp21802 = getelementptr inbounds float, float* %tmp21801, i64 1
+ %tmp21803 = getelementptr inbounds float, float* %tmp21802, i64 1
+ %tmp21804 = getelementptr inbounds float, float* %tmp21803, i64 1
+ %tmp21805 = getelementptr inbounds float, float* %tmp21804, i64 1
+ %tmp21806 = getelementptr inbounds float, float* %tmp21805, i64 1
+ %tmp21807 = getelementptr inbounds float, float* %tmp21806, i64 1
+ %tmp21808 = getelementptr inbounds float, float* %tmp21807, i64 1
+ %tmp21809 = getelementptr inbounds float, float* %tmp21808, i64 1
+ %tmp21810 = getelementptr inbounds float, float* %tmp21809, i64 1
+ %tmp21811 = getelementptr inbounds float, float* %tmp21810, i64 1
+ %tmp21812 = getelementptr inbounds float, float* %tmp21811, i64 1
+ %tmp21813 = getelementptr inbounds float, float* %tmp21812, i64 1
+ %tmp21814 = getelementptr inbounds float, float* %tmp21813, i64 1
+ %tmp21815 = getelementptr inbounds float, float* %tmp21814, i64 1
+ %tmp21816 = getelementptr inbounds float, float* %tmp21815, i64 1
+ %tmp21817 = getelementptr inbounds float, float* %tmp21816, i64 1
+ %tmp21818 = getelementptr inbounds float, float* %tmp21817, i64 1
+ %tmp21819 = getelementptr inbounds float, float* %tmp21818, i64 1
+ %tmp21820 = getelementptr inbounds float, float* %tmp21819, i64 1
+ %tmp21821 = getelementptr inbounds float, float* %tmp21820, i64 1
+ %tmp21822 = getelementptr inbounds float, float* %tmp21821, i64 1
+ %tmp21823 = getelementptr inbounds float, float* %tmp21822, i64 1
+ %tmp21824 = getelementptr inbounds float, float* %tmp21823, i64 1
+ %tmp21825 = getelementptr inbounds float, float* %tmp21824, i64 1
+ %tmp21826 = getelementptr inbounds float, float* %tmp21825, i64 1
+ %tmp21827 = getelementptr inbounds float, float* %tmp21826, i64 1
+ %tmp21828 = getelementptr inbounds float, float* %tmp21827, i64 1
+ %tmp21829 = getelementptr inbounds float, float* %tmp21828, i64 1
+ %tmp21830 = getelementptr inbounds float, float* %tmp21829, i64 1
+ %tmp21831 = getelementptr inbounds float, float* %tmp21830, i64 1
+ %tmp21832 = getelementptr inbounds float, float* %tmp21831, i64 1
+ %tmp21833 = getelementptr inbounds float, float* %tmp21832, i64 1
+ %tmp21834 = getelementptr inbounds float, float* %tmp21833, i64 1
+ %tmp21835 = getelementptr inbounds float, float* %tmp21834, i64 1
+ %tmp21836 = getelementptr inbounds float, float* %tmp21835, i64 1
+ %tmp21837 = getelementptr inbounds float, float* %tmp21836, i64 1
+ %tmp21838 = getelementptr inbounds float, float* %tmp21837, i64 1
+ %tmp21839 = getelementptr inbounds float, float* %tmp21838, i64 1
+ %tmp21840 = getelementptr inbounds float, float* %tmp21839, i64 1
+ %tmp21841 = getelementptr inbounds float, float* %tmp21840, i64 1
+ %tmp21842 = getelementptr inbounds float, float* %tmp21841, i64 1
+ %tmp21843 = getelementptr inbounds float, float* %tmp21842, i64 1
+ %tmp21844 = getelementptr inbounds float, float* %tmp21843, i64 1
+ %tmp21845 = getelementptr inbounds float, float* %tmp21844, i64 1
+ %tmp21846 = getelementptr inbounds float, float* %tmp21845, i64 1
+ %tmp21847 = getelementptr inbounds float, float* %tmp21846, i64 1
+ %tmp21848 = getelementptr inbounds float, float* %tmp21847, i64 1
+ %tmp21849 = getelementptr inbounds float, float* %tmp21848, i64 1
+ %tmp21850 = getelementptr inbounds float, float* %tmp21849, i64 1
+ %tmp21851 = getelementptr inbounds float, float* %tmp21850, i64 1
+ %tmp21852 = getelementptr inbounds float, float* %tmp21851, i64 1
+ %tmp21853 = getelementptr inbounds float, float* %tmp21852, i64 1
+ %tmp21854 = getelementptr inbounds float, float* %tmp21853, i64 1
+ %tmp21855 = getelementptr inbounds float, float* %tmp21854, i64 1
+ %tmp21856 = getelementptr inbounds float, float* %tmp21855, i64 1
+ %tmp21857 = getelementptr inbounds float, float* %tmp21856, i64 1
+ %tmp21858 = getelementptr inbounds float, float* %tmp21857, i64 1
+ %tmp21859 = getelementptr inbounds float, float* %tmp21858, i64 1
+ %tmp21860 = getelementptr inbounds float, float* %tmp21859, i64 1
+ %tmp21861 = getelementptr inbounds float, float* %tmp21860, i64 1
+ %tmp21862 = getelementptr inbounds float, float* %tmp21861, i64 1
+ %tmp21863 = getelementptr inbounds float, float* %tmp21862, i64 1
+ %tmp21864 = getelementptr inbounds float, float* %tmp21863, i64 1
+ %tmp21865 = getelementptr inbounds float, float* %tmp21864, i64 1
+ %tmp21866 = getelementptr inbounds float, float* %tmp21865, i64 1
+ %tmp21867 = getelementptr inbounds float, float* %tmp21866, i64 1
+ %tmp21868 = getelementptr inbounds float, float* %tmp21867, i64 1
+ %tmp21869 = getelementptr inbounds float, float* %tmp21868, i64 1
+ %tmp21870 = getelementptr inbounds float, float* %tmp21869, i64 1
+ %tmp21871 = getelementptr inbounds float, float* %tmp21870, i64 1
+ %tmp21872 = getelementptr inbounds float, float* %tmp21871, i64 1
+ %tmp21873 = getelementptr inbounds float, float* %tmp21872, i64 1
+ %tmp21874 = getelementptr inbounds float, float* %tmp21873, i64 1
+ %tmp21875 = getelementptr inbounds float, float* %tmp21874, i64 1
+ %tmp21876 = getelementptr inbounds float, float* %tmp21875, i64 1
+ %tmp21877 = getelementptr inbounds float, float* %tmp21876, i64 1
+ %tmp21878 = getelementptr inbounds float, float* %tmp21877, i64 1
+ %tmp21879 = getelementptr inbounds float, float* %tmp21878, i64 1
+ %tmp21880 = getelementptr inbounds float, float* %tmp21879, i64 1
+ %tmp21881 = getelementptr inbounds float, float* %tmp21880, i64 1
+ %tmp21882 = getelementptr inbounds float, float* %tmp21881, i64 1
+ %tmp21883 = getelementptr inbounds float, float* %tmp21882, i64 1
+ %tmp21884 = getelementptr inbounds float, float* %tmp21883, i64 1
+ %tmp21885 = getelementptr inbounds float, float* %tmp21884, i64 1
+ %tmp21886 = getelementptr inbounds float, float* %tmp21885, i64 1
+ %tmp21887 = getelementptr inbounds float, float* %tmp21886, i64 1
+ %tmp21888 = getelementptr inbounds float, float* %tmp21887, i64 1
+ %tmp21889 = getelementptr inbounds float, float* %tmp21888, i64 1
+ %tmp21890 = getelementptr inbounds float, float* %tmp21889, i64 1
+ %tmp21891 = getelementptr inbounds float, float* %tmp21890, i64 1
+ %tmp21892 = getelementptr inbounds float, float* %tmp21891, i64 1
+ %tmp21893 = getelementptr inbounds float, float* %tmp21892, i64 1
+ %tmp21894 = getelementptr inbounds float, float* %tmp21893, i64 1
+ %tmp21895 = getelementptr inbounds float, float* %tmp21894, i64 1
+ %tmp21896 = getelementptr inbounds float, float* %tmp21895, i64 1
+ %tmp21897 = getelementptr inbounds float, float* %tmp21896, i64 1
+ %tmp21898 = getelementptr inbounds float, float* %tmp21897, i64 1
+ %tmp21899 = getelementptr inbounds float, float* %tmp21898, i64 1
+ %tmp21900 = getelementptr inbounds float, float* %tmp21899, i64 1
+ %tmp21901 = getelementptr inbounds float, float* %tmp21900, i64 1
+ %tmp21902 = getelementptr inbounds float, float* %tmp21901, i64 1
+ %tmp21903 = getelementptr inbounds float, float* %tmp21902, i64 1
+ %tmp21904 = getelementptr inbounds float, float* %tmp21903, i64 1
+ %tmp21905 = getelementptr inbounds float, float* %tmp21904, i64 1
+ %tmp21906 = getelementptr inbounds float, float* %tmp21905, i64 1
+ %tmp21907 = getelementptr inbounds float, float* %tmp21906, i64 1
+ %tmp21908 = getelementptr inbounds float, float* %tmp21907, i64 1
+ %tmp21909 = getelementptr inbounds float, float* %tmp21908, i64 1
+ %tmp21910 = getelementptr inbounds float, float* %tmp21909, i64 1
+ %tmp21911 = getelementptr inbounds float, float* %tmp21910, i64 1
+ %tmp21912 = getelementptr inbounds float, float* %tmp21911, i64 1
+ %tmp21913 = getelementptr inbounds float, float* %tmp21912, i64 1
+ %tmp21914 = getelementptr inbounds float, float* %tmp21913, i64 1
+ %tmp21915 = getelementptr inbounds float, float* %tmp21914, i64 1
+ %tmp21916 = getelementptr inbounds float, float* %tmp21915, i64 1
+ %tmp21917 = getelementptr inbounds float, float* %tmp21916, i64 1
+ %tmp21918 = getelementptr inbounds float, float* %tmp21917, i64 1
+ %tmp21919 = getelementptr inbounds float, float* %tmp21918, i64 1
+ %tmp21920 = getelementptr inbounds float, float* %tmp21919, i64 1
+ %tmp21921 = getelementptr inbounds float, float* %tmp21920, i64 1
+ %tmp21922 = getelementptr inbounds float, float* %tmp21921, i64 1
+ %tmp21923 = getelementptr inbounds float, float* %tmp21922, i64 1
+ %tmp21924 = getelementptr inbounds float, float* %tmp21923, i64 1
+ %tmp21925 = getelementptr inbounds float, float* %tmp21924, i64 1
+ %tmp21926 = getelementptr inbounds float, float* %tmp21925, i64 1
+ %tmp21927 = getelementptr inbounds float, float* %tmp21926, i64 1
+ %tmp21928 = getelementptr inbounds float, float* %tmp21927, i64 1
+ %tmp21929 = getelementptr inbounds float, float* %tmp21928, i64 1
+ %tmp21930 = getelementptr inbounds float, float* %tmp21929, i64 1
+ %tmp21931 = getelementptr inbounds float, float* %tmp21930, i64 1
+ %tmp21932 = getelementptr inbounds float, float* %tmp21931, i64 1
+ %tmp21933 = getelementptr inbounds float, float* %tmp21932, i64 1
+ %tmp21934 = getelementptr inbounds float, float* %tmp21933, i64 1
+ %tmp21935 = getelementptr inbounds float, float* %tmp21934, i64 1
+ %tmp21936 = getelementptr inbounds float, float* %tmp21935, i64 1
+ %tmp21937 = getelementptr inbounds float, float* %tmp21936, i64 1
+ %tmp21938 = getelementptr inbounds float, float* %tmp21937, i64 1
+ %tmp21939 = getelementptr inbounds float, float* %tmp21938, i64 1
+ %tmp21940 = getelementptr inbounds float, float* %tmp21939, i64 1
+ %tmp21941 = getelementptr inbounds float, float* %tmp21940, i64 1
+ %tmp21942 = getelementptr inbounds float, float* %tmp21941, i64 1
+ %tmp21943 = getelementptr inbounds float, float* %tmp21942, i64 1
+ %tmp21944 = getelementptr inbounds float, float* %tmp21943, i64 1
+ %tmp21945 = getelementptr inbounds float, float* %tmp21944, i64 1
+ %tmp21946 = getelementptr inbounds float, float* %tmp21945, i64 1
+ %tmp21947 = getelementptr inbounds float, float* %tmp21946, i64 1
+ %tmp21948 = getelementptr inbounds float, float* %tmp21947, i64 1
+ %tmp21949 = getelementptr inbounds float, float* %tmp21948, i64 1
+ %tmp21950 = getelementptr inbounds float, float* %tmp21949, i64 1
+ %tmp21951 = getelementptr inbounds float, float* %tmp21950, i64 1
+ %tmp21952 = getelementptr inbounds float, float* %tmp21951, i64 1
+ %tmp21953 = getelementptr inbounds float, float* %tmp21952, i64 1
+ %tmp21954 = getelementptr inbounds float, float* %tmp21953, i64 1
+ %tmp21955 = getelementptr inbounds float, float* %tmp21954, i64 1
+ %tmp21956 = getelementptr inbounds float, float* %tmp21955, i64 1
+ %tmp21957 = getelementptr inbounds float, float* %tmp21956, i64 1
+ %tmp21958 = getelementptr inbounds float, float* %tmp21957, i64 1
+ %tmp21959 = getelementptr inbounds float, float* %tmp21958, i64 1
+ %tmp21960 = getelementptr inbounds float, float* %tmp21959, i64 1
+ %tmp21961 = getelementptr inbounds float, float* %tmp21960, i64 1
+ %tmp21962 = getelementptr inbounds float, float* %tmp21961, i64 1
+ %tmp21963 = getelementptr inbounds float, float* %tmp21962, i64 1
+ %tmp21964 = getelementptr inbounds float, float* %tmp21963, i64 1
+ %tmp21965 = getelementptr inbounds float, float* %tmp21964, i64 1
+ %tmp21966 = getelementptr inbounds float, float* %tmp21965, i64 1
+ %tmp21967 = getelementptr inbounds float, float* %tmp21966, i64 1
+ %tmp21968 = getelementptr inbounds float, float* %tmp21967, i64 1
+ %tmp21969 = getelementptr inbounds float, float* %tmp21968, i64 1
+ %tmp21970 = getelementptr inbounds float, float* %tmp21969, i64 1
+ %tmp21971 = getelementptr inbounds float, float* %tmp21970, i64 1
+ %tmp21972 = getelementptr inbounds float, float* %tmp21971, i64 1
+ %tmp21973 = getelementptr inbounds float, float* %tmp21972, i64 1
+ %tmp21974 = getelementptr inbounds float, float* %tmp21973, i64 1
+ %tmp21975 = getelementptr inbounds float, float* %tmp21974, i64 1
+ %tmp21976 = getelementptr inbounds float, float* %tmp21975, i64 1
+ %tmp21977 = getelementptr inbounds float, float* %tmp21976, i64 1
+ %tmp21978 = getelementptr inbounds float, float* %tmp21977, i64 1
+ %tmp21979 = getelementptr inbounds float, float* %tmp21978, i64 1
+ %tmp21980 = getelementptr inbounds float, float* %tmp21979, i64 1
+ %tmp21981 = getelementptr inbounds float, float* %tmp21980, i64 1
+ %tmp21982 = getelementptr inbounds float, float* %tmp21981, i64 1
+ %tmp21983 = getelementptr inbounds float, float* %tmp21982, i64 1
+ %tmp21984 = getelementptr inbounds float, float* %tmp21983, i64 1
+ %tmp21985 = getelementptr inbounds float, float* %tmp21984, i64 1
+ %tmp21986 = getelementptr inbounds float, float* %tmp21985, i64 1
+ %tmp21987 = getelementptr inbounds float, float* %tmp21986, i64 1
+ %tmp21988 = getelementptr inbounds float, float* %tmp21987, i64 1
+ %tmp21989 = getelementptr inbounds float, float* %tmp21988, i64 1
+ %tmp21990 = getelementptr inbounds float, float* %tmp21989, i64 1
+ %tmp21991 = getelementptr inbounds float, float* %tmp21990, i64 1
+ %tmp21992 = getelementptr inbounds float, float* %tmp21991, i64 1
+ %tmp21993 = getelementptr inbounds float, float* %tmp21992, i64 1
+ %tmp21994 = getelementptr inbounds float, float* %tmp21993, i64 1
+ %tmp21995 = getelementptr inbounds float, float* %tmp21994, i64 1
+ %tmp21996 = getelementptr inbounds float, float* %tmp21995, i64 1
+ %tmp21997 = getelementptr inbounds float, float* %tmp21996, i64 1
+ %tmp21998 = getelementptr inbounds float, float* %tmp21997, i64 1
+ %tmp21999 = getelementptr inbounds float, float* %tmp21998, i64 1
+ %tmp22000 = getelementptr inbounds float, float* %tmp21999, i64 1
+ %tmp22001 = getelementptr inbounds float, float* %tmp22000, i64 1
+ %tmp22002 = getelementptr inbounds float, float* %tmp22001, i64 1
+ %tmp22003 = getelementptr inbounds float, float* %tmp22002, i64 1
+ %tmp22004 = getelementptr inbounds float, float* %tmp22003, i64 1
+ %tmp22005 = getelementptr inbounds float, float* %tmp22004, i64 1
+ %tmp22006 = getelementptr inbounds float, float* %tmp22005, i64 1
+ %tmp22007 = getelementptr inbounds float, float* %tmp22006, i64 1
+ %tmp22008 = getelementptr inbounds float, float* %tmp22007, i64 1
+ %tmp22009 = getelementptr inbounds float, float* %tmp22008, i64 1
+ %tmp22010 = getelementptr inbounds float, float* %tmp22009, i64 1
+ %tmp22011 = getelementptr inbounds float, float* %tmp22010, i64 1
+ %tmp22012 = getelementptr inbounds float, float* %tmp22011, i64 1
+ %tmp22013 = getelementptr inbounds float, float* %tmp22012, i64 1
+ %tmp22014 = getelementptr inbounds float, float* %tmp22013, i64 1
+ %tmp22015 = getelementptr inbounds float, float* %tmp22014, i64 1
+ %tmp22016 = getelementptr inbounds float, float* %tmp22015, i64 1
+ %tmp22017 = getelementptr inbounds float, float* %tmp22016, i64 1
+ %tmp22018 = getelementptr inbounds float, float* %tmp22017, i64 1
+ %tmp22019 = getelementptr inbounds float, float* %tmp22018, i64 1
+ %tmp22020 = getelementptr inbounds float, float* %tmp22019, i64 1
+ %tmp22021 = getelementptr inbounds float, float* %tmp22020, i64 1
+ %tmp22022 = getelementptr inbounds float, float* %tmp22021, i64 1
+ %tmp22023 = getelementptr inbounds float, float* %tmp22022, i64 1
+ %tmp22024 = getelementptr inbounds float, float* %tmp22023, i64 1
+ %tmp22025 = getelementptr inbounds float, float* %tmp22024, i64 1
+ %tmp22026 = getelementptr inbounds float, float* %tmp22025, i64 1
+ %tmp22027 = getelementptr inbounds float, float* %tmp22026, i64 1
+ %tmp22028 = getelementptr inbounds float, float* %tmp22027, i64 1
+ %tmp22029 = getelementptr inbounds float, float* %tmp22028, i64 1
+ %tmp22030 = getelementptr inbounds float, float* %tmp22029, i64 1
+ %tmp22031 = getelementptr inbounds float, float* %tmp22030, i64 1
+ %tmp22032 = getelementptr inbounds float, float* %tmp22031, i64 1
+ %tmp22033 = getelementptr inbounds float, float* %tmp22032, i64 1
+ %tmp22034 = getelementptr inbounds float, float* %tmp22033, i64 1
+ %tmp22035 = getelementptr inbounds float, float* %tmp22034, i64 1
+ %tmp22036 = getelementptr inbounds float, float* %tmp22035, i64 1
+ %tmp22037 = getelementptr inbounds float, float* %tmp22036, i64 1
+ %tmp22038 = getelementptr inbounds float, float* %tmp22037, i64 1
+ %tmp22039 = getelementptr inbounds float, float* %tmp22038, i64 1
+ %tmp22040 = getelementptr inbounds float, float* %tmp22039, i64 1
+ %tmp22041 = getelementptr inbounds float, float* %tmp22040, i64 1
+ %tmp22042 = getelementptr inbounds float, float* %tmp22041, i64 1
+ %tmp22043 = getelementptr inbounds float, float* %tmp22042, i64 1
+ %tmp22044 = getelementptr inbounds float, float* %tmp22043, i64 1
+ %tmp22045 = getelementptr inbounds float, float* %tmp22044, i64 1
+ %tmp22046 = getelementptr inbounds float, float* %tmp22045, i64 1
+ %tmp22047 = getelementptr inbounds float, float* %tmp22046, i64 1
+ %tmp22048 = getelementptr inbounds float, float* %tmp22047, i64 1
+ %tmp22049 = getelementptr inbounds float, float* %tmp22048, i64 1
+ %tmp22050 = getelementptr inbounds float, float* %tmp22049, i64 1
+ %tmp22051 = getelementptr inbounds float, float* %tmp22050, i64 1
+ %tmp22052 = getelementptr inbounds float, float* %tmp22051, i64 1
+ %tmp22053 = getelementptr inbounds float, float* %tmp22052, i64 1
+ %tmp22054 = getelementptr inbounds float, float* %tmp22053, i64 1
+ %tmp22055 = getelementptr inbounds float, float* %tmp22054, i64 1
+ %tmp22056 = getelementptr inbounds float, float* %tmp22055, i64 1
+ %tmp22057 = getelementptr inbounds float, float* %tmp22056, i64 1
+ %tmp22058 = getelementptr inbounds float, float* %tmp22057, i64 1
+ %tmp22059 = getelementptr inbounds float, float* %tmp22058, i64 1
+ %tmp22060 = getelementptr inbounds float, float* %tmp22059, i64 1
+ %tmp22061 = getelementptr inbounds float, float* %tmp22060, i64 1
+ %tmp22062 = getelementptr inbounds float, float* %tmp22061, i64 1
+ %tmp22063 = getelementptr inbounds float, float* %tmp22062, i64 1
+ %tmp22064 = getelementptr inbounds float, float* %tmp22063, i64 1
+ %tmp22065 = getelementptr inbounds float, float* %tmp22064, i64 1
+ %tmp22066 = getelementptr inbounds float, float* %tmp22065, i64 1
+ %tmp22067 = getelementptr inbounds float, float* %tmp22066, i64 1
+ %tmp22068 = getelementptr inbounds float, float* %tmp22067, i64 1
+ %tmp22069 = getelementptr inbounds float, float* %tmp22068, i64 1
+ %tmp22070 = getelementptr inbounds float, float* %tmp22069, i64 1
+ %tmp22071 = getelementptr inbounds float, float* %tmp22070, i64 1
+ %tmp22072 = getelementptr inbounds float, float* %tmp22071, i64 1
+ %tmp22073 = getelementptr inbounds float, float* %tmp22072, i64 1
+ %tmp22074 = getelementptr inbounds float, float* %tmp22073, i64 1
+ %tmp22075 = getelementptr inbounds float, float* %tmp22074, i64 1
+ %tmp22076 = getelementptr inbounds float, float* %tmp22075, i64 1
+ %tmp22077 = getelementptr inbounds float, float* %tmp22076, i64 1
+ %tmp22078 = getelementptr inbounds float, float* %tmp22077, i64 1
+ %tmp22079 = getelementptr inbounds float, float* %tmp22078, i64 1
+ %tmp22080 = getelementptr inbounds float, float* %tmp22079, i64 1
+ %tmp22081 = getelementptr inbounds float, float* %tmp22080, i64 1
+ %tmp22082 = getelementptr inbounds float, float* %tmp22081, i64 1
+ %tmp22083 = getelementptr inbounds float, float* %tmp22082, i64 1
+ %tmp22084 = getelementptr inbounds float, float* %tmp22083, i64 1
+ %tmp22085 = getelementptr inbounds float, float* %tmp22084, i64 1
+ %tmp22086 = getelementptr inbounds float, float* %tmp22085, i64 1
+ %tmp22087 = getelementptr inbounds float, float* %tmp22086, i64 1
+ %tmp22088 = getelementptr inbounds float, float* %tmp22087, i64 1
+ %tmp22089 = getelementptr inbounds float, float* %tmp22088, i64 1
+ %tmp22090 = getelementptr inbounds float, float* %tmp22089, i64 1
+ %tmp22091 = getelementptr inbounds float, float* %tmp22090, i64 1
+ %tmp22092 = getelementptr inbounds float, float* %tmp22091, i64 1
+ %tmp22093 = getelementptr inbounds float, float* %tmp22092, i64 1
+ %tmp22094 = getelementptr inbounds float, float* %tmp22093, i64 1
+ %tmp22095 = getelementptr inbounds float, float* %tmp22094, i64 1
+ %tmp22096 = getelementptr inbounds float, float* %tmp22095, i64 1
+ %tmp22097 = getelementptr inbounds float, float* %tmp22096, i64 1
+ %tmp22098 = getelementptr inbounds float, float* %tmp22097, i64 1
+ %tmp22099 = getelementptr inbounds float, float* %tmp22098, i64 1
+ %tmp22100 = getelementptr inbounds float, float* %tmp22099, i64 1
+ %tmp22101 = getelementptr inbounds float, float* %tmp22100, i64 1
+ %tmp22102 = getelementptr inbounds float, float* %tmp22101, i64 1
+ %tmp22103 = getelementptr inbounds float, float* %tmp22102, i64 1
+ %tmp22104 = getelementptr inbounds float, float* %tmp22103, i64 1
+ %tmp22105 = getelementptr inbounds float, float* %tmp22104, i64 1
+ %tmp22106 = getelementptr inbounds float, float* %tmp22105, i64 1
+ %tmp22107 = getelementptr inbounds float, float* %tmp22106, i64 1
+ %tmp22108 = getelementptr inbounds float, float* %tmp22107, i64 1
+ %tmp22109 = getelementptr inbounds float, float* %tmp22108, i64 1
+ %tmp22110 = getelementptr inbounds float, float* %tmp22109, i64 1
+ %tmp22111 = getelementptr inbounds float, float* %tmp22110, i64 1
+ %tmp22112 = getelementptr inbounds float, float* %tmp22111, i64 1
+ %tmp22113 = getelementptr inbounds float, float* %tmp22112, i64 1
+ %tmp22114 = getelementptr inbounds float, float* %tmp22113, i64 1
+ %tmp22115 = getelementptr inbounds float, float* %tmp22114, i64 1
+ %tmp22116 = getelementptr inbounds float, float* %tmp22115, i64 1
+ %tmp22117 = getelementptr inbounds float, float* %tmp22116, i64 1
+ %tmp22118 = getelementptr inbounds float, float* %tmp22117, i64 1
+ %tmp22119 = getelementptr inbounds float, float* %tmp22118, i64 1
+ %tmp22120 = getelementptr inbounds float, float* %tmp22119, i64 1
+ %tmp22121 = getelementptr inbounds float, float* %tmp22120, i64 1
+ %tmp22122 = getelementptr inbounds float, float* %tmp22121, i64 1
+ %tmp22123 = getelementptr inbounds float, float* %tmp22122, i64 1
+ %tmp22124 = getelementptr inbounds float, float* %tmp22123, i64 1
+ %tmp22125 = getelementptr inbounds float, float* %tmp22124, i64 1
+ %tmp22126 = getelementptr inbounds float, float* %tmp22125, i64 1
+ %tmp22127 = getelementptr inbounds float, float* %tmp22126, i64 1
+ %tmp22128 = getelementptr inbounds float, float* %tmp22127, i64 1
+ %tmp22129 = getelementptr inbounds float, float* %tmp22128, i64 1
+ %tmp22130 = getelementptr inbounds float, float* %tmp22129, i64 1
+ %tmp22131 = getelementptr inbounds float, float* %tmp22130, i64 1
+ %tmp22132 = getelementptr inbounds float, float* %tmp22131, i64 1
+ %tmp22133 = getelementptr inbounds float, float* %tmp22132, i64 1
+ %tmp22134 = getelementptr inbounds float, float* %tmp22133, i64 1
+ %tmp22135 = getelementptr inbounds float, float* %tmp22134, i64 1
+ %tmp22136 = getelementptr inbounds float, float* %tmp22135, i64 1
+ %tmp22137 = getelementptr inbounds float, float* %tmp22136, i64 1
+ %tmp22138 = getelementptr inbounds float, float* %tmp22137, i64 1
+ %tmp22139 = getelementptr inbounds float, float* %tmp22138, i64 1
+ %tmp22140 = getelementptr inbounds float, float* %tmp22139, i64 1
+ %tmp22141 = getelementptr inbounds float, float* %tmp22140, i64 1
+ %tmp22142 = getelementptr inbounds float, float* %tmp22141, i64 1
+ %tmp22143 = getelementptr inbounds float, float* %tmp22142, i64 1
+ %tmp22144 = getelementptr inbounds float, float* %tmp22143, i64 1
+ %tmp22145 = getelementptr inbounds float, float* %tmp22144, i64 1
+ %tmp22146 = getelementptr inbounds float, float* %tmp22145, i64 1
+ %tmp22147 = getelementptr inbounds float, float* %tmp22146, i64 1
+ %tmp22148 = getelementptr inbounds float, float* %tmp22147, i64 1
+ %tmp22149 = getelementptr inbounds float, float* %tmp22148, i64 1
+ %tmp22150 = getelementptr inbounds float, float* %tmp22149, i64 1
+ %tmp22151 = getelementptr inbounds float, float* %tmp22150, i64 1
+ %tmp22152 = getelementptr inbounds float, float* %tmp22151, i64 1
+ %tmp22153 = getelementptr inbounds float, float* %tmp22152, i64 1
+ %tmp22154 = getelementptr inbounds float, float* %tmp22153, i64 1
+ %tmp22155 = getelementptr inbounds float, float* %tmp22154, i64 1
+ %tmp22156 = getelementptr inbounds float, float* %tmp22155, i64 1
+ %tmp22157 = getelementptr inbounds float, float* %tmp22156, i64 1
+ %tmp22158 = getelementptr inbounds float, float* %tmp22157, i64 1
+ %tmp22159 = getelementptr inbounds float, float* %tmp22158, i64 1
+ %tmp22160 = getelementptr inbounds float, float* %tmp22159, i64 1
+ %tmp22161 = getelementptr inbounds float, float* %tmp22160, i64 1
+ %tmp22162 = getelementptr inbounds float, float* %tmp22161, i64 1
+ %tmp22163 = getelementptr inbounds float, float* %tmp22162, i64 1
+ %tmp22164 = getelementptr inbounds float, float* %tmp22163, i64 1
+ %tmp22165 = getelementptr inbounds float, float* %tmp22164, i64 1
+ %tmp22166 = getelementptr inbounds float, float* %tmp22165, i64 1
+ %tmp22167 = getelementptr inbounds float, float* %tmp22166, i64 1
+ %tmp22168 = getelementptr inbounds float, float* %tmp22167, i64 1
+ %tmp22169 = getelementptr inbounds float, float* %tmp22168, i64 1
+ %tmp22170 = getelementptr inbounds float, float* %tmp22169, i64 1
+ %tmp22171 = getelementptr inbounds float, float* %tmp22170, i64 1
+ %tmp22172 = getelementptr inbounds float, float* %tmp22171, i64 1
+ %tmp22173 = getelementptr inbounds float, float* %tmp22172, i64 1
+ %tmp22174 = getelementptr inbounds float, float* %tmp22173, i64 1
+ %tmp22175 = getelementptr inbounds float, float* %tmp22174, i64 1
+ %tmp22176 = getelementptr inbounds float, float* %tmp22175, i64 1
+ %tmp22177 = getelementptr inbounds float, float* %tmp22176, i64 1
+ %tmp22178 = getelementptr inbounds float, float* %tmp22177, i64 1
+ %tmp22179 = getelementptr inbounds float, float* %tmp22178, i64 1
+ %tmp22180 = getelementptr inbounds float, float* %tmp22179, i64 1
+ %tmp22181 = getelementptr inbounds float, float* %tmp22180, i64 1
+ %tmp22182 = getelementptr inbounds float, float* %tmp22181, i64 1
+ %tmp22183 = getelementptr inbounds float, float* %tmp22182, i64 1
+ %tmp22184 = getelementptr inbounds float, float* %tmp22183, i64 1
+ %tmp22185 = getelementptr inbounds float, float* %tmp22184, i64 1
+ %tmp22186 = getelementptr inbounds float, float* %tmp22185, i64 1
+ %tmp22187 = getelementptr inbounds float, float* %tmp22186, i64 1
+ %tmp22188 = getelementptr inbounds float, float* %tmp22187, i64 1
+ %tmp22189 = getelementptr inbounds float, float* %tmp22188, i64 1
+ %tmp22190 = getelementptr inbounds float, float* %tmp22189, i64 1
+ %tmp22191 = getelementptr inbounds float, float* %tmp22190, i64 1
+ %tmp22192 = getelementptr inbounds float, float* %tmp22191, i64 1
+ %tmp22193 = getelementptr inbounds float, float* %tmp22192, i64 1
+ %tmp22194 = getelementptr inbounds float, float* %tmp22193, i64 1
+ %tmp22195 = getelementptr inbounds float, float* %tmp22194, i64 1
+ %tmp22196 = getelementptr inbounds float, float* %tmp22195, i64 1
+ %tmp22197 = getelementptr inbounds float, float* %tmp22196, i64 1
+ %tmp22198 = getelementptr inbounds float, float* %tmp22197, i64 1
+ %tmp22199 = getelementptr inbounds float, float* %tmp22198, i64 1
+ %tmp22200 = getelementptr inbounds float, float* %tmp22199, i64 1
+ %tmp22201 = getelementptr inbounds float, float* %tmp22200, i64 1
+ %tmp22202 = getelementptr inbounds float, float* %tmp22201, i64 1
+ %tmp22203 = getelementptr inbounds float, float* %tmp22202, i64 1
+ %tmp22204 = getelementptr inbounds float, float* %tmp22203, i64 1
+ %tmp22205 = getelementptr inbounds float, float* %tmp22204, i64 1
+ %tmp22206 = getelementptr inbounds float, float* %tmp22205, i64 1
+ %tmp22207 = getelementptr inbounds float, float* %tmp22206, i64 1
+ %tmp22208 = getelementptr inbounds float, float* %tmp22207, i64 1
+ %tmp22209 = getelementptr inbounds float, float* %tmp22208, i64 1
+ %tmp22210 = getelementptr inbounds float, float* %tmp22209, i64 1
+ %tmp22211 = getelementptr inbounds float, float* %tmp22210, i64 1
+ %tmp22212 = getelementptr inbounds float, float* %tmp22211, i64 1
+ %tmp22213 = getelementptr inbounds float, float* %tmp22212, i64 1
+ %tmp22214 = getelementptr inbounds float, float* %tmp22213, i64 1
+ %tmp22215 = getelementptr inbounds float, float* %tmp22214, i64 1
+ %tmp22216 = getelementptr inbounds float, float* %tmp22215, i64 1
+ %tmp22217 = getelementptr inbounds float, float* %tmp22216, i64 1
+ %tmp22218 = getelementptr inbounds float, float* %tmp22217, i64 1
+ %tmp22219 = getelementptr inbounds float, float* %tmp22218, i64 1
+ %tmp22220 = getelementptr inbounds float, float* %tmp22219, i64 1
+ %tmp22221 = getelementptr inbounds float, float* %tmp22220, i64 1
+ %tmp22222 = getelementptr inbounds float, float* %tmp22221, i64 1
+ %tmp22223 = getelementptr inbounds float, float* %tmp22222, i64 1
+ %tmp22224 = getelementptr inbounds float, float* %tmp22223, i64 1
+ %tmp22225 = getelementptr inbounds float, float* %tmp22224, i64 1
+ %tmp22226 = getelementptr inbounds float, float* %tmp22225, i64 1
+ %tmp22227 = getelementptr inbounds float, float* %tmp22226, i64 1
+ %tmp22228 = getelementptr inbounds float, float* %tmp22227, i64 1
+ %tmp22229 = getelementptr inbounds float, float* %tmp22228, i64 1
+ %tmp22230 = getelementptr inbounds float, float* %tmp22229, i64 1
+ %tmp22231 = getelementptr inbounds float, float* %tmp22230, i64 1
+ %tmp22232 = getelementptr inbounds float, float* %tmp22231, i64 1
+ %tmp22233 = getelementptr inbounds float, float* %tmp22232, i64 1
+ %tmp22234 = getelementptr inbounds float, float* %tmp22233, i64 1
+ %tmp22235 = getelementptr inbounds float, float* %tmp22234, i64 1
+ %tmp22236 = getelementptr inbounds float, float* %tmp22235, i64 1
+ %tmp22237 = getelementptr inbounds float, float* %tmp22236, i64 1
+ %tmp22238 = getelementptr inbounds float, float* %tmp22237, i64 1
+ %tmp22239 = getelementptr inbounds float, float* %tmp22238, i64 1
+ %tmp22240 = getelementptr inbounds float, float* %tmp22239, i64 1
+ %tmp22241 = getelementptr inbounds float, float* %tmp22240, i64 1
+ %tmp22242 = getelementptr inbounds float, float* %tmp22241, i64 1
+ %tmp22243 = getelementptr inbounds float, float* %tmp22242, i64 1
+ %tmp22244 = getelementptr inbounds float, float* %tmp22243, i64 1
+ %tmp22245 = getelementptr inbounds float, float* %tmp22244, i64 1
+ %tmp22246 = getelementptr inbounds float, float* %tmp22245, i64 1
+ %tmp22247 = getelementptr inbounds float, float* %tmp22246, i64 1
+ %tmp22248 = getelementptr inbounds float, float* %tmp22247, i64 1
+ %tmp22249 = getelementptr inbounds float, float* %tmp22248, i64 1
+ %tmp22250 = getelementptr inbounds float, float* %tmp22249, i64 1
+ %tmp22251 = getelementptr inbounds float, float* %tmp22250, i64 1
+ %tmp22252 = getelementptr inbounds float, float* %tmp22251, i64 1
+ %tmp22253 = getelementptr inbounds float, float* %tmp22252, i64 1
+ %tmp22254 = getelementptr inbounds float, float* %tmp22253, i64 1
+ %tmp22255 = getelementptr inbounds float, float* %tmp22254, i64 1
+ %tmp22256 = getelementptr inbounds float, float* %tmp22255, i64 1
+ %tmp22257 = getelementptr inbounds float, float* %tmp22256, i64 1
+ %tmp22258 = getelementptr inbounds float, float* %tmp22257, i64 1
+ %tmp22259 = getelementptr inbounds float, float* %tmp22258, i64 1
+ %tmp22260 = getelementptr inbounds float, float* %tmp22259, i64 1
+ %tmp22261 = getelementptr inbounds float, float* %tmp22260, i64 1
+ %tmp22262 = getelementptr inbounds float, float* %tmp22261, i64 1
+ %tmp22263 = getelementptr inbounds float, float* %tmp22262, i64 1
+ %tmp22264 = getelementptr inbounds float, float* %tmp22263, i64 1
+ %tmp22265 = getelementptr inbounds float, float* %tmp22264, i64 1
+ %tmp22266 = getelementptr inbounds float, float* %tmp22265, i64 1
+ %tmp22267 = getelementptr inbounds float, float* %tmp22266, i64 1
+ %tmp22268 = getelementptr inbounds float, float* %tmp22267, i64 1
+ %tmp22269 = getelementptr inbounds float, float* %tmp22268, i64 1
+ %tmp22270 = getelementptr inbounds float, float* %tmp22269, i64 1
+ %tmp22271 = getelementptr inbounds float, float* %tmp22270, i64 1
+ %tmp22272 = getelementptr inbounds float, float* %tmp22271, i64 1
+ %tmp22273 = getelementptr inbounds float, float* %tmp22272, i64 1
+ %tmp22274 = getelementptr inbounds float, float* %tmp22273, i64 1
+ %tmp22275 = getelementptr inbounds float, float* %tmp22274, i64 1
+ %tmp22276 = getelementptr inbounds float, float* %tmp22275, i64 1
+ %tmp22277 = getelementptr inbounds float, float* %tmp22276, i64 1
+ %tmp22278 = getelementptr inbounds float, float* %tmp22277, i64 1
+ %tmp22279 = getelementptr inbounds float, float* %tmp22278, i64 1
+ %tmp22280 = getelementptr inbounds float, float* %tmp22279, i64 1
+ %tmp22281 = getelementptr inbounds float, float* %tmp22280, i64 1
+ %tmp22282 = getelementptr inbounds float, float* %tmp22281, i64 1
+ %tmp22283 = getelementptr inbounds float, float* %tmp22282, i64 1
+ %tmp22284 = getelementptr inbounds float, float* %tmp22283, i64 1
+ %tmp22285 = getelementptr inbounds float, float* %tmp22284, i64 1
+ %tmp22286 = getelementptr inbounds float, float* %tmp22285, i64 1
+ %tmp22287 = getelementptr inbounds float, float* %tmp22286, i64 1
+ %tmp22288 = getelementptr inbounds float, float* %tmp22287, i64 1
+ %tmp22289 = getelementptr inbounds float, float* %tmp22288, i64 1
+ %tmp22290 = getelementptr inbounds float, float* %tmp22289, i64 1
+ %tmp22291 = getelementptr inbounds float, float* %tmp22290, i64 1
+ %tmp22292 = getelementptr inbounds float, float* %tmp22291, i64 1
+ %tmp22293 = getelementptr inbounds float, float* %tmp22292, i64 1
+ %tmp22294 = getelementptr inbounds float, float* %tmp22293, i64 1
+ %tmp22295 = getelementptr inbounds float, float* %tmp22294, i64 1
+ %tmp22296 = getelementptr inbounds float, float* %tmp22295, i64 1
+ %tmp22297 = getelementptr inbounds float, float* %tmp22296, i64 1
+ %tmp22298 = getelementptr inbounds float, float* %tmp22297, i64 1
+ %tmp22299 = getelementptr inbounds float, float* %tmp22298, i64 1
+ %tmp22300 = getelementptr inbounds float, float* %tmp22299, i64 1
+ %tmp22301 = getelementptr inbounds float, float* %tmp22300, i64 1
+ %tmp22302 = getelementptr inbounds float, float* %tmp22301, i64 1
+ %tmp22303 = getelementptr inbounds float, float* %tmp22302, i64 1
+ %tmp22304 = getelementptr inbounds float, float* %tmp22303, i64 1
+ %tmp22305 = getelementptr inbounds float, float* %tmp22304, i64 1
+ %tmp22306 = getelementptr inbounds float, float* %tmp22305, i64 1
+ %tmp22307 = getelementptr inbounds float, float* %tmp22306, i64 1
+ %tmp22308 = getelementptr inbounds float, float* %tmp22307, i64 1
+ %tmp22309 = getelementptr inbounds float, float* %tmp22308, i64 1
+ %tmp22310 = getelementptr inbounds float, float* %tmp22309, i64 1
+ %tmp22311 = getelementptr inbounds float, float* %tmp22310, i64 1
+ %tmp22312 = getelementptr inbounds float, float* %tmp22311, i64 1
+ %tmp22313 = getelementptr inbounds float, float* %tmp22312, i64 1
+ %tmp22314 = getelementptr inbounds float, float* %tmp22313, i64 1
+ %tmp22315 = getelementptr inbounds float, float* %tmp22314, i64 1
+ %tmp22316 = getelementptr inbounds float, float* %tmp22315, i64 1
+ %tmp22317 = getelementptr inbounds float, float* %tmp22316, i64 1
+ %tmp22318 = getelementptr inbounds float, float* %tmp22317, i64 1
+ %tmp22319 = getelementptr inbounds float, float* %tmp22318, i64 1
+ %tmp22320 = getelementptr inbounds float, float* %tmp22319, i64 1
+ %tmp22321 = getelementptr inbounds float, float* %tmp22320, i64 1
+ %tmp22322 = getelementptr inbounds float, float* %tmp22321, i64 1
+ %tmp22323 = getelementptr inbounds float, float* %tmp22322, i64 1
+ %tmp22324 = getelementptr inbounds float, float* %tmp22323, i64 1
+ %tmp22325 = getelementptr inbounds float, float* %tmp22324, i64 1
+ %tmp22326 = getelementptr inbounds float, float* %tmp22325, i64 1
+ %tmp22327 = getelementptr inbounds float, float* %tmp22326, i64 1
+ %tmp22328 = getelementptr inbounds float, float* %tmp22327, i64 1
+ %tmp22329 = getelementptr inbounds float, float* %tmp22328, i64 1
+ %tmp22330 = getelementptr inbounds float, float* %tmp22329, i64 1
+ %tmp22331 = getelementptr inbounds float, float* %tmp22330, i64 1
+ %tmp22332 = getelementptr inbounds float, float* %tmp22331, i64 1
+ %tmp22333 = getelementptr inbounds float, float* %tmp22332, i64 1
+ %tmp22334 = getelementptr inbounds float, float* %tmp22333, i64 1
+ %tmp22335 = getelementptr inbounds float, float* %tmp22334, i64 1
+ %tmp22336 = getelementptr inbounds float, float* %tmp22335, i64 1
+ %tmp22337 = getelementptr inbounds float, float* %tmp22336, i64 1
+ %tmp22338 = getelementptr inbounds float, float* %tmp22337, i64 1
+ %tmp22339 = getelementptr inbounds float, float* %tmp22338, i64 1
+ %tmp22340 = getelementptr inbounds float, float* %tmp22339, i64 1
+ %tmp22341 = getelementptr inbounds float, float* %tmp22340, i64 1
+ %tmp22342 = getelementptr inbounds float, float* %tmp22341, i64 1
+ %tmp22343 = getelementptr inbounds float, float* %tmp22342, i64 1
+ %tmp22344 = getelementptr inbounds float, float* %tmp22343, i64 1
+ %tmp22345 = getelementptr inbounds float, float* %tmp22344, i64 1
+ %tmp22346 = getelementptr inbounds float, float* %tmp22345, i64 1
+ %tmp22347 = getelementptr inbounds float, float* %tmp22346, i64 1
+ %tmp22348 = getelementptr inbounds float, float* %tmp22347, i64 1
+ %tmp22349 = getelementptr inbounds float, float* %tmp22348, i64 1
+ %tmp22350 = getelementptr inbounds float, float* %tmp22349, i64 1
+ %tmp22351 = getelementptr inbounds float, float* %tmp22350, i64 1
+ %tmp22352 = getelementptr inbounds float, float* %tmp22351, i64 1
+ %tmp22353 = getelementptr inbounds float, float* %tmp22352, i64 1
+ %tmp22354 = getelementptr inbounds float, float* %tmp22353, i64 1
+ %tmp22355 = getelementptr inbounds float, float* %tmp22354, i64 1
+ %tmp22356 = getelementptr inbounds float, float* %tmp22355, i64 1
+ %tmp22357 = getelementptr inbounds float, float* %tmp22356, i64 1
+ %tmp22358 = getelementptr inbounds float, float* %tmp22357, i64 1
+ %tmp22359 = getelementptr inbounds float, float* %tmp22358, i64 1
+ %tmp22360 = getelementptr inbounds float, float* %tmp22359, i64 1
+ %tmp22361 = getelementptr inbounds float, float* %tmp22360, i64 1
+ %tmp22362 = getelementptr inbounds float, float* %tmp22361, i64 1
+ %tmp22363 = getelementptr inbounds float, float* %tmp22362, i64 1
+ %tmp22364 = getelementptr inbounds float, float* %tmp22363, i64 1
+ %tmp22365 = getelementptr inbounds float, float* %tmp22364, i64 1
+ %tmp22366 = getelementptr inbounds float, float* %tmp22365, i64 1
+ %tmp22367 = getelementptr inbounds float, float* %tmp22366, i64 1
+ %tmp22368 = getelementptr inbounds float, float* %tmp22367, i64 1
+ %tmp22369 = getelementptr inbounds float, float* %tmp22368, i64 1
+ %tmp22370 = getelementptr inbounds float, float* %tmp22369, i64 1
+ %tmp22371 = getelementptr inbounds float, float* %tmp22370, i64 1
+ %tmp22372 = getelementptr inbounds float, float* %tmp22371, i64 1
+ %tmp22373 = getelementptr inbounds float, float* %tmp22372, i64 1
+ %tmp22374 = getelementptr inbounds float, float* %tmp22373, i64 1
+ %tmp22375 = getelementptr inbounds float, float* %tmp22374, i64 1
+ %tmp22376 = getelementptr inbounds float, float* %tmp22375, i64 1
+ %tmp22377 = getelementptr inbounds float, float* %tmp22376, i64 1
+ %tmp22378 = getelementptr inbounds float, float* %tmp22377, i64 1
+ %tmp22379 = getelementptr inbounds float, float* %tmp22378, i64 1
+ %tmp22380 = getelementptr inbounds float, float* %tmp22379, i64 1
+ %tmp22381 = getelementptr inbounds float, float* %tmp22380, i64 1
+ %tmp22382 = getelementptr inbounds float, float* %tmp22381, i64 1
+ %tmp22383 = getelementptr inbounds float, float* %tmp22382, i64 1
+ %tmp22384 = getelementptr inbounds float, float* %tmp22383, i64 1
+ %tmp22385 = getelementptr inbounds float, float* %tmp22384, i64 1
+ %tmp22386 = getelementptr inbounds float, float* %tmp22385, i64 1
+ %tmp22387 = getelementptr inbounds float, float* %tmp22386, i64 1
+ %tmp22388 = getelementptr inbounds float, float* %tmp22387, i64 1
+ %tmp22389 = getelementptr inbounds float, float* %tmp22388, i64 1
+ %tmp22390 = getelementptr inbounds float, float* %tmp22389, i64 1
+ %tmp22391 = getelementptr inbounds float, float* %tmp22390, i64 1
+ %tmp22392 = getelementptr inbounds float, float* %tmp22391, i64 1
+ %tmp22393 = getelementptr inbounds float, float* %tmp22392, i64 1
+ %tmp22394 = getelementptr inbounds float, float* %tmp22393, i64 1
+ %tmp22395 = getelementptr inbounds float, float* %tmp22394, i64 1
+ %tmp22396 = getelementptr inbounds float, float* %tmp22395, i64 1
+ %tmp22397 = getelementptr inbounds float, float* %tmp22396, i64 1
+ %tmp22398 = getelementptr inbounds float, float* %tmp22397, i64 1
+ %tmp22399 = getelementptr inbounds float, float* %tmp22398, i64 1
+ %tmp22400 = getelementptr inbounds float, float* %tmp22399, i64 1
+ %tmp22401 = getelementptr inbounds float, float* %tmp22400, i64 1
+ %tmp22402 = getelementptr inbounds float, float* %tmp22401, i64 1
+ %tmp22403 = getelementptr inbounds float, float* %tmp22402, i64 1
+ %tmp22404 = getelementptr inbounds float, float* %tmp22403, i64 1
+ %tmp22405 = getelementptr inbounds float, float* %tmp22404, i64 1
+ %tmp22406 = getelementptr inbounds float, float* %tmp22405, i64 1
+ %tmp22407 = getelementptr inbounds float, float* %tmp22406, i64 1
+ %tmp22408 = getelementptr inbounds float, float* %tmp22407, i64 1
+ %tmp22409 = getelementptr inbounds float, float* %tmp22408, i64 1
+ %tmp22410 = getelementptr inbounds float, float* %tmp22409, i64 1
+ %tmp22411 = getelementptr inbounds float, float* %tmp22410, i64 1
+ %tmp22412 = getelementptr inbounds float, float* %tmp22411, i64 1
+ %tmp22413 = getelementptr inbounds float, float* %tmp22412, i64 1
+ %tmp22414 = getelementptr inbounds float, float* %tmp22413, i64 1
+ %tmp22415 = getelementptr inbounds float, float* %tmp22414, i64 1
+ %tmp22416 = getelementptr inbounds float, float* %tmp22415, i64 1
+ %tmp22417 = getelementptr inbounds float, float* %tmp22416, i64 1
+ %tmp22418 = getelementptr inbounds float, float* %tmp22417, i64 1
+ %tmp22419 = getelementptr inbounds float, float* %tmp22418, i64 1
+ %tmp22420 = getelementptr inbounds float, float* %tmp22419, i64 1
+ %tmp22421 = getelementptr inbounds float, float* %tmp22420, i64 1
+ %tmp22422 = getelementptr inbounds float, float* %tmp22421, i64 1
+ %tmp22423 = getelementptr inbounds float, float* %tmp22422, i64 1
+ %tmp22424 = getelementptr inbounds float, float* %tmp22423, i64 1
+ %tmp22425 = getelementptr inbounds float, float* %tmp22424, i64 1
+ %tmp22426 = getelementptr inbounds float, float* %tmp22425, i64 1
+ %tmp22427 = getelementptr inbounds float, float* %tmp22426, i64 1
+ %tmp22428 = getelementptr inbounds float, float* %tmp22427, i64 1
+ %tmp22429 = getelementptr inbounds float, float* %tmp22428, i64 1
+ %tmp22430 = getelementptr inbounds float, float* %tmp22429, i64 1
+ %tmp22431 = getelementptr inbounds float, float* %tmp22430, i64 1
+ %tmp22432 = getelementptr inbounds float, float* %tmp22431, i64 1
+ %tmp22433 = getelementptr inbounds float, float* %tmp22432, i64 1
+ %tmp22434 = getelementptr inbounds float, float* %tmp22433, i64 1
+ %tmp22435 = getelementptr inbounds float, float* %tmp22434, i64 1
+ %tmp22436 = getelementptr inbounds float, float* %tmp22435, i64 1
+ %tmp22437 = getelementptr inbounds float, float* %tmp22436, i64 1
+ %tmp22438 = getelementptr inbounds float, float* %tmp22437, i64 1
+ %tmp22439 = getelementptr inbounds float, float* %tmp22438, i64 1
+ %tmp22440 = getelementptr inbounds float, float* %tmp22439, i64 1
+ %tmp22441 = getelementptr inbounds float, float* %tmp22440, i64 1
+ %tmp22442 = getelementptr inbounds float, float* %tmp22441, i64 1
+ %tmp22443 = getelementptr inbounds float, float* %tmp22442, i64 1
+ %tmp22444 = getelementptr inbounds float, float* %tmp22443, i64 1
+ %tmp22445 = getelementptr inbounds float, float* %tmp22444, i64 1
+ %tmp22446 = getelementptr inbounds float, float* %tmp22445, i64 1
+ %tmp22447 = getelementptr inbounds float, float* %tmp22446, i64 1
+ %tmp22448 = getelementptr inbounds float, float* %tmp22447, i64 1
+ %tmp22449 = getelementptr inbounds float, float* %tmp22448, i64 1
+ %tmp22450 = getelementptr inbounds float, float* %tmp22449, i64 1
+ %tmp22451 = getelementptr inbounds float, float* %tmp22450, i64 1
+ %tmp22452 = getelementptr inbounds float, float* %tmp22451, i64 1
+ %tmp22453 = getelementptr inbounds float, float* %tmp22452, i64 1
+ %tmp22454 = getelementptr inbounds float, float* %tmp22453, i64 1
+ %tmp22455 = getelementptr inbounds float, float* %tmp22454, i64 1
+ %tmp22456 = getelementptr inbounds float, float* %tmp22455, i64 1
+ %tmp22457 = getelementptr inbounds float, float* %tmp22456, i64 1
+ %tmp22458 = getelementptr inbounds float, float* %tmp22457, i64 1
+ %tmp22459 = getelementptr inbounds float, float* %tmp22458, i64 1
+ %tmp22460 = getelementptr inbounds float, float* %tmp22459, i64 1
+ %tmp22461 = getelementptr inbounds float, float* %tmp22460, i64 1
+ %tmp22462 = getelementptr inbounds float, float* %tmp22461, i64 1
+ %tmp22463 = getelementptr inbounds float, float* %tmp22462, i64 1
+ %tmp22464 = getelementptr inbounds float, float* %tmp22463, i64 1
+ %tmp22465 = getelementptr inbounds float, float* %tmp22464, i64 1
+ %tmp22466 = getelementptr inbounds float, float* %tmp22465, i64 1
+ %tmp22467 = getelementptr inbounds float, float* %tmp22466, i64 1
+ %tmp22468 = getelementptr inbounds float, float* %tmp22467, i64 1
+ %tmp22469 = getelementptr inbounds float, float* %tmp22468, i64 1
+ %tmp22470 = getelementptr inbounds float, float* %tmp22469, i64 1
+ %tmp22471 = getelementptr inbounds float, float* %tmp22470, i64 1
+ %tmp22472 = getelementptr inbounds float, float* %tmp22471, i64 1
+ %tmp22473 = getelementptr inbounds float, float* %tmp22472, i64 1
+ %tmp22474 = getelementptr inbounds float, float* %tmp22473, i64 1
+ %tmp22475 = getelementptr inbounds float, float* %tmp22474, i64 1
+ %tmp22476 = getelementptr inbounds float, float* %tmp22475, i64 1
+ %tmp22477 = getelementptr inbounds float, float* %tmp22476, i64 1
+ %tmp22478 = getelementptr inbounds float, float* %tmp22477, i64 1
+ %tmp22479 = getelementptr inbounds float, float* %tmp22478, i64 1
+ %tmp22480 = getelementptr inbounds float, float* %tmp22479, i64 1
+ %tmp22481 = getelementptr inbounds float, float* %tmp22480, i64 1
+ %tmp22482 = getelementptr inbounds float, float* %tmp22481, i64 1
+ %tmp22483 = getelementptr inbounds float, float* %tmp22482, i64 1
+ %tmp22484 = getelementptr inbounds float, float* %tmp22483, i64 1
+ %tmp22485 = getelementptr inbounds float, float* %tmp22484, i64 1
+ %tmp22486 = getelementptr inbounds float, float* %tmp22485, i64 1
+ %tmp22487 = getelementptr inbounds float, float* %tmp22486, i64 1
+ %tmp22488 = getelementptr inbounds float, float* %tmp22487, i64 1
+ %tmp22489 = getelementptr inbounds float, float* %tmp22488, i64 1
+ %tmp22490 = getelementptr inbounds float, float* %tmp22489, i64 1
+ %tmp22491 = getelementptr inbounds float, float* %tmp22490, i64 1
+ %tmp22492 = getelementptr inbounds float, float* %tmp22491, i64 1
+ %tmp22493 = getelementptr inbounds float, float* %tmp22492, i64 1
+ %tmp22494 = getelementptr inbounds float, float* %tmp22493, i64 1
+ %tmp22495 = getelementptr inbounds float, float* %tmp22494, i64 1
+ %tmp22496 = getelementptr inbounds float, float* %tmp22495, i64 1
+ %tmp22497 = getelementptr inbounds float, float* %tmp22496, i64 1
+ %tmp22498 = getelementptr inbounds float, float* %tmp22497, i64 1
+ %tmp22499 = getelementptr inbounds float, float* %tmp22498, i64 1
+ %tmp22500 = getelementptr inbounds float, float* %tmp22499, i64 1
+ %tmp22501 = getelementptr inbounds float, float* %tmp22500, i64 1
+ %tmp22502 = getelementptr inbounds float, float* %tmp22501, i64 1
+ %tmp22503 = getelementptr inbounds float, float* %tmp22502, i64 1
+ %tmp22504 = getelementptr inbounds float, float* %tmp22503, i64 1
+ %tmp22505 = getelementptr inbounds float, float* %tmp22504, i64 1
+ %tmp22506 = getelementptr inbounds float, float* %tmp22505, i64 1
+ %tmp22507 = getelementptr inbounds float, float* %tmp22506, i64 1
+ %tmp22508 = getelementptr inbounds float, float* %tmp22507, i64 1
+ %tmp22509 = getelementptr inbounds float, float* %tmp22508, i64 1
+ %tmp22510 = getelementptr inbounds float, float* %tmp22509, i64 1
+ %tmp22511 = getelementptr inbounds float, float* %tmp22510, i64 1
+ %tmp22512 = getelementptr inbounds float, float* %tmp22511, i64 1
+ %tmp22513 = getelementptr inbounds float, float* %tmp22512, i64 1
+ %tmp22514 = getelementptr inbounds float, float* %tmp22513, i64 1
+ %tmp22515 = getelementptr inbounds float, float* %tmp22514, i64 1
+ %tmp22516 = getelementptr inbounds float, float* %tmp22515, i64 1
+ %tmp22517 = getelementptr inbounds float, float* %tmp22516, i64 1
+ %tmp22518 = getelementptr inbounds float, float* %tmp22517, i64 1
+ %tmp22519 = getelementptr inbounds float, float* %tmp22518, i64 1
+ %tmp22520 = getelementptr inbounds float, float* %tmp22519, i64 1
+ %tmp22521 = getelementptr inbounds float, float* %tmp22520, i64 1
+ %tmp22522 = getelementptr inbounds float, float* %tmp22521, i64 1
+ %tmp22523 = getelementptr inbounds float, float* %tmp22522, i64 1
+ %tmp22524 = getelementptr inbounds float, float* %tmp22523, i64 1
+ %tmp22525 = getelementptr inbounds float, float* %tmp22524, i64 1
+ %tmp22526 = getelementptr inbounds float, float* %tmp22525, i64 1
+ %tmp22527 = getelementptr inbounds float, float* %tmp22526, i64 1
+ %tmp22528 = getelementptr inbounds float, float* %tmp22527, i64 1
+ %tmp22529 = getelementptr inbounds float, float* %tmp22528, i64 1
+ %tmp22530 = getelementptr inbounds float, float* %tmp22529, i64 1
+ %tmp22531 = getelementptr inbounds float, float* %tmp22530, i64 1
+ %tmp22532 = getelementptr inbounds float, float* %tmp22531, i64 1
+ %tmp22533 = getelementptr inbounds float, float* %tmp22532, i64 1
+ %tmp22534 = getelementptr inbounds float, float* %tmp22533, i64 1
+ %tmp22535 = getelementptr inbounds float, float* %tmp22534, i64 1
+ %tmp22536 = getelementptr inbounds float, float* %tmp22535, i64 1
+ %tmp22537 = getelementptr inbounds float, float* %tmp22536, i64 1
+ %tmp22538 = getelementptr inbounds float, float* %tmp22537, i64 1
+ %tmp22539 = getelementptr inbounds float, float* %tmp22538, i64 1
+ %tmp22540 = getelementptr inbounds float, float* %tmp22539, i64 1
+ %tmp22541 = getelementptr inbounds float, float* %tmp22540, i64 1
+ %tmp22542 = getelementptr inbounds float, float* %tmp22541, i64 1
+ %tmp22543 = getelementptr inbounds float, float* %tmp22542, i64 1
+ %tmp22544 = getelementptr inbounds float, float* %tmp22543, i64 1
+ %tmp22545 = getelementptr inbounds float, float* %tmp22544, i64 1
+ %tmp22546 = getelementptr inbounds float, float* %tmp22545, i64 1
+ %tmp22547 = getelementptr inbounds float, float* %tmp22546, i64 1
+ %tmp22548 = getelementptr inbounds float, float* %tmp22547, i64 1
+ %tmp22549 = getelementptr inbounds float, float* %tmp22548, i64 1
+ %tmp22550 = getelementptr inbounds float, float* %tmp22549, i64 1
+ %tmp22551 = getelementptr inbounds float, float* %tmp22550, i64 1
+ %tmp22552 = getelementptr inbounds float, float* %tmp22551, i64 1
+ %tmp22553 = getelementptr inbounds float, float* %tmp22552, i64 1
+ %tmp22554 = getelementptr inbounds float, float* %tmp22553, i64 1
+ %tmp22555 = getelementptr inbounds float, float* %tmp22554, i64 1
+ %tmp22556 = getelementptr inbounds float, float* %tmp22555, i64 1
+ %tmp22557 = getelementptr inbounds float, float* %tmp22556, i64 1
+ %tmp22558 = getelementptr inbounds float, float* %tmp22557, i64 1
+ %tmp22559 = getelementptr inbounds float, float* %tmp22558, i64 1
+ %tmp22560 = getelementptr inbounds float, float* %tmp22559, i64 1
+ %tmp22561 = getelementptr inbounds float, float* %tmp22560, i64 1
+ %tmp22562 = getelementptr inbounds float, float* %tmp22561, i64 1
+ %tmp22563 = getelementptr inbounds float, float* %tmp22562, i64 1
+ %tmp22564 = getelementptr inbounds float, float* %tmp22563, i64 1
+ %tmp22565 = getelementptr inbounds float, float* %tmp22564, i64 1
+ %tmp22566 = getelementptr inbounds float, float* %tmp22565, i64 1
+ %tmp22567 = getelementptr inbounds float, float* %tmp22566, i64 1
+ %tmp22568 = getelementptr inbounds float, float* %tmp22567, i64 1
+ %tmp22569 = getelementptr inbounds float, float* %tmp22568, i64 1
+ %tmp22570 = getelementptr inbounds float, float* %tmp22569, i64 1
+ %tmp22571 = getelementptr inbounds float, float* %tmp22570, i64 1
+ %tmp22572 = getelementptr inbounds float, float* %tmp22571, i64 1
+ %tmp22573 = getelementptr inbounds float, float* %tmp22572, i64 1
+ %tmp22574 = getelementptr inbounds float, float* %tmp22573, i64 1
+ %tmp22575 = getelementptr inbounds float, float* %tmp22574, i64 1
+ %tmp22576 = getelementptr inbounds float, float* %tmp22575, i64 1
+ %tmp22577 = getelementptr inbounds float, float* %tmp22576, i64 1
+ %tmp22578 = getelementptr inbounds float, float* %tmp22577, i64 1
+ %tmp22579 = getelementptr inbounds float, float* %tmp22578, i64 1
+ %tmp22580 = getelementptr inbounds float, float* %tmp22579, i64 1
+ %tmp22581 = getelementptr inbounds float, float* %tmp22580, i64 1
+ %tmp22582 = getelementptr inbounds float, float* %tmp22581, i64 1
+ %tmp22583 = getelementptr inbounds float, float* %tmp22582, i64 1
+ %tmp22584 = getelementptr inbounds float, float* %tmp22583, i64 1
+ %tmp22585 = getelementptr inbounds float, float* %tmp22584, i64 1
+ %tmp22586 = getelementptr inbounds float, float* %tmp22585, i64 1
+ %tmp22587 = getelementptr inbounds float, float* %tmp22586, i64 1
+ %tmp22588 = getelementptr inbounds float, float* %tmp22587, i64 1
+ %tmp22589 = getelementptr inbounds float, float* %tmp22588, i64 1
+ %tmp22590 = getelementptr inbounds float, float* %tmp22589, i64 1
+ %tmp22591 = getelementptr inbounds float, float* %tmp22590, i64 1
+ %tmp22592 = getelementptr inbounds float, float* %tmp22591, i64 1
+ %tmp22593 = getelementptr inbounds float, float* %tmp22592, i64 1
+ %tmp22594 = getelementptr inbounds float, float* %tmp22593, i64 1
+ %tmp22595 = getelementptr inbounds float, float* %tmp22594, i64 1
+ %tmp22596 = getelementptr inbounds float, float* %tmp22595, i64 1
+ %tmp22597 = getelementptr inbounds float, float* %tmp22596, i64 1
+ %tmp22598 = getelementptr inbounds float, float* %tmp22597, i64 1
+ %tmp22599 = getelementptr inbounds float, float* %tmp22598, i64 1
+ %tmp22600 = getelementptr inbounds float, float* %tmp22599, i64 1
+ %tmp22601 = getelementptr inbounds float, float* %tmp22600, i64 1
+ %tmp22602 = getelementptr inbounds float, float* %tmp22601, i64 1
+ %tmp22603 = getelementptr inbounds float, float* %tmp22602, i64 1
+ %tmp22604 = getelementptr inbounds float, float* %tmp22603, i64 1
+ %tmp22605 = getelementptr inbounds float, float* %tmp22604, i64 1
+ %tmp22606 = getelementptr inbounds float, float* %tmp22605, i64 1
+ %tmp22607 = getelementptr inbounds float, float* %tmp22606, i64 1
+ %tmp22608 = getelementptr inbounds float, float* %tmp22607, i64 1
+ %tmp22609 = getelementptr inbounds float, float* %tmp22608, i64 1
+ %tmp22610 = getelementptr inbounds float, float* %tmp22609, i64 1
+ %tmp22611 = getelementptr inbounds float, float* %tmp22610, i64 1
+ %tmp22612 = getelementptr inbounds float, float* %tmp22611, i64 1
+ %tmp22613 = getelementptr inbounds float, float* %tmp22612, i64 1
+ %tmp22614 = getelementptr inbounds float, float* %tmp22613, i64 1
+ %tmp22615 = getelementptr inbounds float, float* %tmp22614, i64 1
+ %tmp22616 = getelementptr inbounds float, float* %tmp22615, i64 1
+ %tmp22617 = getelementptr inbounds float, float* %tmp22616, i64 1
+ %tmp22618 = getelementptr inbounds float, float* %tmp22617, i64 1
+ %tmp22619 = getelementptr inbounds float, float* %tmp22618, i64 1
+ %tmp22620 = getelementptr inbounds float, float* %tmp22619, i64 1
+ %tmp22621 = getelementptr inbounds float, float* %tmp22620, i64 1
+ %tmp22622 = getelementptr inbounds float, float* %tmp22621, i64 1
+ %tmp22623 = getelementptr inbounds float, float* %tmp22622, i64 1
+ %tmp22624 = getelementptr inbounds float, float* %tmp22623, i64 1
+ %tmp22625 = getelementptr inbounds float, float* %tmp22624, i64 1
+ %tmp22626 = getelementptr inbounds float, float* %tmp22625, i64 1
+ %tmp22627 = getelementptr inbounds float, float* %tmp22626, i64 1
+ %tmp22628 = getelementptr inbounds float, float* %tmp22627, i64 1
+ %tmp22629 = getelementptr inbounds float, float* %tmp22628, i64 1
+ %tmp22630 = getelementptr inbounds float, float* %tmp22629, i64 1
+ %tmp22631 = getelementptr inbounds float, float* %tmp22630, i64 1
+ %tmp22632 = getelementptr inbounds float, float* %tmp22631, i64 1
+ %tmp22633 = getelementptr inbounds float, float* %tmp22632, i64 1
+ %tmp22634 = getelementptr inbounds float, float* %tmp22633, i64 1
+ %tmp22635 = getelementptr inbounds float, float* %tmp22634, i64 1
+ %tmp22636 = getelementptr inbounds float, float* %tmp22635, i64 1
+ %tmp22637 = getelementptr inbounds float, float* %tmp22636, i64 1
+ %tmp22638 = getelementptr inbounds float, float* %tmp22637, i64 1
+ %tmp22639 = getelementptr inbounds float, float* %tmp22638, i64 1
+ %tmp22640 = getelementptr inbounds float, float* %tmp22639, i64 1
+ %tmp22641 = getelementptr inbounds float, float* %tmp22640, i64 1
+ %tmp22642 = getelementptr inbounds float, float* %tmp22641, i64 1
+ %tmp22643 = getelementptr inbounds float, float* %tmp22642, i64 1
+ %tmp22644 = getelementptr inbounds float, float* %tmp22643, i64 1
+ %tmp22645 = getelementptr inbounds float, float* %tmp22644, i64 1
+ %tmp22646 = getelementptr inbounds float, float* %tmp22645, i64 1
+ %tmp22647 = getelementptr inbounds float, float* %tmp22646, i64 1
+ %tmp22648 = getelementptr inbounds float, float* %tmp22647, i64 1
+ %tmp22649 = getelementptr inbounds float, float* %tmp22648, i64 1
+ %tmp22650 = getelementptr inbounds float, float* %tmp22649, i64 1
+ %tmp22651 = getelementptr inbounds float, float* %tmp22650, i64 1
+ %tmp22652 = getelementptr inbounds float, float* %tmp22651, i64 1
+ %tmp22653 = getelementptr inbounds float, float* %tmp22652, i64 1
+ %tmp22654 = getelementptr inbounds float, float* %tmp22653, i64 1
+ %tmp22655 = getelementptr inbounds float, float* %tmp22654, i64 1
+ %tmp22656 = getelementptr inbounds float, float* %tmp22655, i64 1
+ %tmp22657 = getelementptr inbounds float, float* %tmp22656, i64 1
+ %tmp22658 = getelementptr inbounds float, float* %tmp22657, i64 1
+ %tmp22659 = getelementptr inbounds float, float* %tmp22658, i64 1
+ %tmp22660 = getelementptr inbounds float, float* %tmp22659, i64 1
+ %tmp22661 = getelementptr inbounds float, float* %tmp22660, i64 1
+ %tmp22662 = getelementptr inbounds float, float* %tmp22661, i64 1
+ %tmp22663 = getelementptr inbounds float, float* %tmp22662, i64 1
+ %tmp22664 = getelementptr inbounds float, float* %tmp22663, i64 1
+ %tmp22665 = getelementptr inbounds float, float* %tmp22664, i64 1
+ %tmp22666 = getelementptr inbounds float, float* %tmp22665, i64 1
+ %tmp22667 = getelementptr inbounds float, float* %tmp22666, i64 1
+ %tmp22668 = getelementptr inbounds float, float* %tmp22667, i64 1
+ %tmp22669 = getelementptr inbounds float, float* %tmp22668, i64 1
+ %tmp22670 = getelementptr inbounds float, float* %tmp22669, i64 1
+ %tmp22671 = getelementptr inbounds float, float* %tmp22670, i64 1
+ %tmp22672 = getelementptr inbounds float, float* %tmp22671, i64 1
+ %tmp22673 = getelementptr inbounds float, float* %tmp22672, i64 1
+ %tmp22674 = getelementptr inbounds float, float* %tmp22673, i64 1
+ %tmp22675 = getelementptr inbounds float, float* %tmp22674, i64 1
+ %tmp22676 = getelementptr inbounds float, float* %tmp22675, i64 1
+ %tmp22677 = getelementptr inbounds float, float* %tmp22676, i64 1
+ %tmp22678 = getelementptr inbounds float, float* %tmp22677, i64 1
+ %tmp22679 = getelementptr inbounds float, float* %tmp22678, i64 1
+ %tmp22680 = getelementptr inbounds float, float* %tmp22679, i64 1
+ %tmp22681 = getelementptr inbounds float, float* %tmp22680, i64 1
+ %tmp22682 = getelementptr inbounds float, float* %tmp22681, i64 1
+ %tmp22683 = getelementptr inbounds float, float* %tmp22682, i64 1
+ %tmp22684 = getelementptr inbounds float, float* %tmp22683, i64 1
+ %tmp22685 = getelementptr inbounds float, float* %tmp22684, i64 1
+ %tmp22686 = getelementptr inbounds float, float* %tmp22685, i64 1
+ %tmp22687 = getelementptr inbounds float, float* %tmp22686, i64 1
+ %tmp22688 = getelementptr inbounds float, float* %tmp22687, i64 1
+ %tmp22689 = getelementptr inbounds float, float* %tmp22688, i64 1
+ %tmp22690 = getelementptr inbounds float, float* %tmp22689, i64 1
+ %tmp22691 = getelementptr inbounds float, float* %tmp22690, i64 1
+ %tmp22692 = getelementptr inbounds float, float* %tmp22691, i64 1
+ %tmp22693 = getelementptr inbounds float, float* %tmp22692, i64 1
+ %tmp22694 = getelementptr inbounds float, float* %tmp22693, i64 1
+ %tmp22695 = getelementptr inbounds float, float* %tmp22694, i64 1
+ %tmp22696 = getelementptr inbounds float, float* %tmp22695, i64 1
+ %tmp22697 = getelementptr inbounds float, float* %tmp22696, i64 1
+ %tmp22698 = getelementptr inbounds float, float* %tmp22697, i64 1
+ %tmp22699 = getelementptr inbounds float, float* %tmp22698, i64 1
+ %tmp22700 = getelementptr inbounds float, float* %tmp22699, i64 1
+ %tmp22701 = getelementptr inbounds float, float* %tmp22700, i64 1
+ %tmp22702 = getelementptr inbounds float, float* %tmp22701, i64 1
+ %tmp22703 = getelementptr inbounds float, float* %tmp22702, i64 1
+ %tmp22704 = getelementptr inbounds float, float* %tmp22703, i64 1
+ %tmp22705 = getelementptr inbounds float, float* %tmp22704, i64 1
+ %tmp22706 = getelementptr inbounds float, float* %tmp22705, i64 1
+ %tmp22707 = getelementptr inbounds float, float* %tmp22706, i64 1
+ %tmp22708 = getelementptr inbounds float, float* %tmp22707, i64 1
+ %tmp22709 = getelementptr inbounds float, float* %tmp22708, i64 1
+ %tmp22710 = getelementptr inbounds float, float* %tmp22709, i64 1
+ %tmp22711 = getelementptr inbounds float, float* %tmp22710, i64 1
+ %tmp22712 = getelementptr inbounds float, float* %tmp22711, i64 1
+ %tmp22713 = getelementptr inbounds float, float* %tmp22712, i64 1
+ %tmp22714 = getelementptr inbounds float, float* %tmp22713, i64 1
+ %tmp22715 = getelementptr inbounds float, float* %tmp22714, i64 1
+ %tmp22716 = getelementptr inbounds float, float* %tmp22715, i64 1
+ %tmp22717 = getelementptr inbounds float, float* %tmp22716, i64 1
+ %tmp22718 = getelementptr inbounds float, float* %tmp22717, i64 1
+ %tmp22719 = getelementptr inbounds float, float* %tmp22718, i64 1
+ %tmp22720 = getelementptr inbounds float, float* %tmp22719, i64 1
+ %tmp22721 = getelementptr inbounds float, float* %tmp22720, i64 1
+ %tmp22722 = getelementptr inbounds float, float* %tmp22721, i64 1
+ %tmp22723 = getelementptr inbounds float, float* %tmp22722, i64 1
+ %tmp22724 = getelementptr inbounds float, float* %tmp22723, i64 1
+ %tmp22725 = getelementptr inbounds float, float* %tmp22724, i64 1
+ %tmp22726 = getelementptr inbounds float, float* %tmp22725, i64 1
+ %tmp22727 = getelementptr inbounds float, float* %tmp22726, i64 1
+ %tmp22728 = getelementptr inbounds float, float* %tmp22727, i64 1
+ %tmp22729 = getelementptr inbounds float, float* %tmp22728, i64 1
+ %tmp22730 = getelementptr inbounds float, float* %tmp22729, i64 1
+ %tmp22731 = getelementptr inbounds float, float* %tmp22730, i64 1
+ %tmp22732 = getelementptr inbounds float, float* %tmp22731, i64 1
+ %tmp22733 = getelementptr inbounds float, float* %tmp22732, i64 1
+ %tmp22734 = getelementptr inbounds float, float* %tmp22733, i64 1
+ %tmp22735 = getelementptr inbounds float, float* %tmp22734, i64 1
+ %tmp22736 = getelementptr inbounds float, float* %tmp22735, i64 1
+ %tmp22737 = getelementptr inbounds float, float* %tmp22736, i64 1
+ %tmp22738 = getelementptr inbounds float, float* %tmp22737, i64 1
+ %tmp22739 = getelementptr inbounds float, float* %tmp22738, i64 1
+ %tmp22740 = getelementptr inbounds float, float* %tmp22739, i64 1
+ %tmp22741 = getelementptr inbounds float, float* %tmp22740, i64 1
+ %tmp22742 = getelementptr inbounds float, float* %tmp22741, i64 1
+ %tmp22743 = getelementptr inbounds float, float* %tmp22742, i64 1
+ %tmp22744 = getelementptr inbounds float, float* %tmp22743, i64 1
+ %tmp22745 = getelementptr inbounds float, float* %tmp22744, i64 1
+ %tmp22746 = getelementptr inbounds float, float* %tmp22745, i64 1
+ %tmp22747 = getelementptr inbounds float, float* %tmp22746, i64 1
+ %tmp22748 = getelementptr inbounds float, float* %tmp22747, i64 1
+ %tmp22749 = getelementptr inbounds float, float* %tmp22748, i64 1
+ %tmp22750 = getelementptr inbounds float, float* %tmp22749, i64 1
+ %tmp22751 = getelementptr inbounds float, float* %tmp22750, i64 1
+ %tmp22752 = getelementptr inbounds float, float* %tmp22751, i64 1
+ %tmp22753 = getelementptr inbounds float, float* %tmp22752, i64 1
+ %tmp22754 = getelementptr inbounds float, float* %tmp22753, i64 1
+ %tmp22755 = getelementptr inbounds float, float* %tmp22754, i64 1
+ %tmp22756 = getelementptr inbounds float, float* %tmp22755, i64 1
+ %tmp22757 = getelementptr inbounds float, float* %tmp22756, i64 1
+ %tmp22758 = getelementptr inbounds float, float* %tmp22757, i64 1
+ %tmp22759 = getelementptr inbounds float, float* %tmp22758, i64 1
+ %tmp22760 = getelementptr inbounds float, float* %tmp22759, i64 1
+ %tmp22761 = getelementptr inbounds float, float* %tmp22760, i64 1
+ %tmp22762 = getelementptr inbounds float, float* %tmp22761, i64 1
+ %tmp22763 = getelementptr inbounds float, float* %tmp22762, i64 1
+ %tmp22764 = getelementptr inbounds float, float* %tmp22763, i64 1
+ %tmp22765 = getelementptr inbounds float, float* %tmp22764, i64 1
+ %tmp22766 = getelementptr inbounds float, float* %tmp22765, i64 1
+ %tmp22767 = getelementptr inbounds float, float* %tmp22766, i64 1
+ %tmp22768 = getelementptr inbounds float, float* %tmp22767, i64 1
+ %tmp22769 = getelementptr inbounds float, float* %tmp22768, i64 1
+ %tmp22770 = getelementptr inbounds float, float* %tmp22769, i64 1
+ %tmp22771 = getelementptr inbounds float, float* %tmp22770, i64 1
+ %tmp22772 = getelementptr inbounds float, float* %tmp22771, i64 1
+ %tmp22773 = getelementptr inbounds float, float* %tmp22772, i64 1
+ %tmp22774 = getelementptr inbounds float, float* %tmp22773, i64 1
+ %tmp22775 = getelementptr inbounds float, float* %tmp22774, i64 1
+ %tmp22776 = getelementptr inbounds float, float* %tmp22775, i64 1
+ %tmp22777 = getelementptr inbounds float, float* %tmp22776, i64 1
+ %tmp22778 = getelementptr inbounds float, float* %tmp22777, i64 1
+ %tmp22779 = getelementptr inbounds float, float* %tmp22778, i64 1
+ %tmp22780 = getelementptr inbounds float, float* %tmp22779, i64 1
+ %tmp22781 = getelementptr inbounds float, float* %tmp22780, i64 1
+ %tmp22782 = getelementptr inbounds float, float* %tmp22781, i64 1
+ %tmp22783 = getelementptr inbounds float, float* %tmp22782, i64 1
+ %tmp22784 = getelementptr inbounds float, float* %tmp22783, i64 1
+ %tmp22785 = getelementptr inbounds float, float* %tmp22784, i64 1
+ %tmp22786 = getelementptr inbounds float, float* %tmp22785, i64 1
+ %tmp22787 = getelementptr inbounds float, float* %tmp22786, i64 1
+ %tmp22788 = getelementptr inbounds float, float* %tmp22787, i64 1
+ %tmp22789 = getelementptr inbounds float, float* %tmp22788, i64 1
+ %tmp22790 = getelementptr inbounds float, float* %tmp22789, i64 1
+ %tmp22791 = getelementptr inbounds float, float* %tmp22790, i64 1
+ %tmp22792 = getelementptr inbounds float, float* %tmp22791, i64 1
+ %tmp22793 = getelementptr inbounds float, float* %tmp22792, i64 1
+ %tmp22794 = getelementptr inbounds float, float* %tmp22793, i64 1
+ %tmp22795 = getelementptr inbounds float, float* %tmp22794, i64 1
+ %tmp22796 = getelementptr inbounds float, float* %tmp22795, i64 1
+ %tmp22797 = getelementptr inbounds float, float* %tmp22796, i64 1
+ %tmp22798 = getelementptr inbounds float, float* %tmp22797, i64 1
+ %tmp22799 = getelementptr inbounds float, float* %tmp22798, i64 1
+ %tmp22800 = getelementptr inbounds float, float* %tmp22799, i64 1
+ %tmp22801 = getelementptr inbounds float, float* %tmp22800, i64 1
+ %tmp22802 = getelementptr inbounds float, float* %tmp22801, i64 1
+ %tmp22803 = getelementptr inbounds float, float* %tmp22802, i64 1
+ %tmp22804 = getelementptr inbounds float, float* %tmp22803, i64 1
+ %tmp22805 = getelementptr inbounds float, float* %tmp22804, i64 1
+ %tmp22806 = getelementptr inbounds float, float* %tmp22805, i64 1
+ %tmp22807 = getelementptr inbounds float, float* %tmp22806, i64 1
+ %tmp22808 = getelementptr inbounds float, float* %tmp22807, i64 1
+ %tmp22809 = getelementptr inbounds float, float* %tmp22808, i64 1
+ %tmp22810 = getelementptr inbounds float, float* %tmp22809, i64 1
+ %tmp22811 = getelementptr inbounds float, float* %tmp22810, i64 1
+ %tmp22812 = getelementptr inbounds float, float* %tmp22811, i64 1
+ %tmp22813 = getelementptr inbounds float, float* %tmp22812, i64 1
+ %tmp22814 = getelementptr inbounds float, float* %tmp22813, i64 1
+ %tmp22815 = getelementptr inbounds float, float* %tmp22814, i64 1
+ %tmp22816 = getelementptr inbounds float, float* %tmp22815, i64 1
+ %tmp22817 = getelementptr inbounds float, float* %tmp22816, i64 1
+ %tmp22818 = getelementptr inbounds float, float* %tmp22817, i64 1
+ %tmp22819 = getelementptr inbounds float, float* %tmp22818, i64 1
+ %tmp22820 = getelementptr inbounds float, float* %tmp22819, i64 1
+ %tmp22821 = getelementptr inbounds float, float* %tmp22820, i64 1
+ %tmp22822 = getelementptr inbounds float, float* %tmp22821, i64 1
+ %tmp22823 = getelementptr inbounds float, float* %tmp22822, i64 1
+ %tmp22824 = getelementptr inbounds float, float* %tmp22823, i64 1
+ %tmp22825 = getelementptr inbounds float, float* %tmp22824, i64 1
+ %tmp22826 = getelementptr inbounds float, float* %tmp22825, i64 1
+ %tmp22827 = getelementptr inbounds float, float* %tmp22826, i64 1
+ %tmp22828 = getelementptr inbounds float, float* %tmp22827, i64 1
+ %tmp22829 = getelementptr inbounds float, float* %tmp22828, i64 1
+ %tmp22830 = getelementptr inbounds float, float* %tmp22829, i64 1
+ %tmp22831 = getelementptr inbounds float, float* %tmp22830, i64 1
+ %tmp22832 = getelementptr inbounds float, float* %tmp22831, i64 1
+ %tmp22833 = getelementptr inbounds float, float* %tmp22832, i64 1
+ %tmp22834 = getelementptr inbounds float, float* %tmp22833, i64 1
+ %tmp22835 = getelementptr inbounds float, float* %tmp22834, i64 1
+ %tmp22836 = getelementptr inbounds float, float* %tmp22835, i64 1
+ %tmp22837 = getelementptr inbounds float, float* %tmp22836, i64 1
+ %tmp22838 = getelementptr inbounds float, float* %tmp22837, i64 1
+ %tmp22839 = getelementptr inbounds float, float* %tmp22838, i64 1
+ %tmp22840 = getelementptr inbounds float, float* %tmp22839, i64 1
+ %tmp22841 = getelementptr inbounds float, float* %tmp22840, i64 1
+ %tmp22842 = getelementptr inbounds float, float* %tmp22841, i64 1
+ %tmp22843 = getelementptr inbounds float, float* %tmp22842, i64 1
+ %tmp22844 = getelementptr inbounds float, float* %tmp22843, i64 1
+ %tmp22845 = getelementptr inbounds float, float* %tmp22844, i64 1
+ %tmp22846 = getelementptr inbounds float, float* %tmp22845, i64 1
+ %tmp22847 = getelementptr inbounds float, float* %tmp22846, i64 1
+ %tmp22848 = getelementptr inbounds float, float* %tmp22847, i64 1
+ %tmp22849 = getelementptr inbounds float, float* %tmp22848, i64 1
+ %tmp22850 = getelementptr inbounds float, float* %tmp22849, i64 1
+ %tmp22851 = getelementptr inbounds float, float* %tmp22850, i64 1
+ %tmp22852 = getelementptr inbounds float, float* %tmp22851, i64 1
+ %tmp22853 = getelementptr inbounds float, float* %tmp22852, i64 1
+ %tmp22854 = getelementptr inbounds float, float* %tmp22853, i64 1
+ %tmp22855 = getelementptr inbounds float, float* %tmp22854, i64 1
+ %tmp22856 = getelementptr inbounds float, float* %tmp22855, i64 1
+ %tmp22857 = getelementptr inbounds float, float* %tmp22856, i64 1
+ %tmp22858 = getelementptr inbounds float, float* %tmp22857, i64 1
+ %tmp22859 = getelementptr inbounds float, float* %tmp22858, i64 1
+ %tmp22860 = getelementptr inbounds float, float* %tmp22859, i64 1
+ %tmp22861 = getelementptr inbounds float, float* %tmp22860, i64 1
+ %tmp22862 = getelementptr inbounds float, float* %tmp22861, i64 1
+ %tmp22863 = getelementptr inbounds float, float* %tmp22862, i64 1
+ %tmp22864 = getelementptr inbounds float, float* %tmp22863, i64 1
+ %tmp22865 = getelementptr inbounds float, float* %tmp22864, i64 1
+ %tmp22866 = getelementptr inbounds float, float* %tmp22865, i64 1
+ %tmp22867 = getelementptr inbounds float, float* %tmp22866, i64 1
+ %tmp22868 = getelementptr inbounds float, float* %tmp22867, i64 1
+ %tmp22869 = getelementptr inbounds float, float* %tmp22868, i64 1
+ %tmp22870 = getelementptr inbounds float, float* %tmp22869, i64 1
+ %tmp22871 = getelementptr inbounds float, float* %tmp22870, i64 1
+ %tmp22872 = getelementptr inbounds float, float* %tmp22871, i64 1
+ %tmp22873 = getelementptr inbounds float, float* %tmp22872, i64 1
+ %tmp22874 = getelementptr inbounds float, float* %tmp22873, i64 1
+ %tmp22875 = getelementptr inbounds float, float* %tmp22874, i64 1
+ %tmp22876 = getelementptr inbounds float, float* %tmp22875, i64 1
+ %tmp22877 = getelementptr inbounds float, float* %tmp22876, i64 1
+ %tmp22878 = getelementptr inbounds float, float* %tmp22877, i64 1
+ %tmp22879 = getelementptr inbounds float, float* %tmp22878, i64 1
+ %tmp22880 = getelementptr inbounds float, float* %tmp22879, i64 1
+ %tmp22881 = getelementptr inbounds float, float* %tmp22880, i64 1
+ %tmp22882 = getelementptr inbounds float, float* %tmp22881, i64 1
+ %tmp22883 = getelementptr inbounds float, float* %tmp22882, i64 1
+ %tmp22884 = getelementptr inbounds float, float* %tmp22883, i64 1
+ %tmp22885 = getelementptr inbounds float, float* %tmp22884, i64 1
+ %tmp22886 = getelementptr inbounds float, float* %tmp22885, i64 1
+ %tmp22887 = getelementptr inbounds float, float* %tmp22886, i64 1
+ %tmp22888 = getelementptr inbounds float, float* %tmp22887, i64 1
+ %tmp22889 = getelementptr inbounds float, float* %tmp22888, i64 1
+ %tmp22890 = getelementptr inbounds float, float* %tmp22889, i64 1
+ %tmp22891 = getelementptr inbounds float, float* %tmp22890, i64 1
+ %tmp22892 = getelementptr inbounds float, float* %tmp22891, i64 1
+ %tmp22893 = getelementptr inbounds float, float* %tmp22892, i64 1
+ %tmp22894 = getelementptr inbounds float, float* %tmp22893, i64 1
+ %tmp22895 = getelementptr inbounds float, float* %tmp22894, i64 1
+ %tmp22896 = getelementptr inbounds float, float* %tmp22895, i64 1
+ %tmp22897 = getelementptr inbounds float, float* %tmp22896, i64 1
+ %tmp22898 = getelementptr inbounds float, float* %tmp22897, i64 1
+ %tmp22899 = getelementptr inbounds float, float* %tmp22898, i64 1
+ %tmp22900 = getelementptr inbounds float, float* %tmp22899, i64 1
+ %tmp22901 = getelementptr inbounds float, float* %tmp22900, i64 1
+ %tmp22902 = getelementptr inbounds float, float* %tmp22901, i64 1
+ %tmp22903 = getelementptr inbounds float, float* %tmp22902, i64 1
+ %tmp22904 = getelementptr inbounds float, float* %tmp22903, i64 1
+ %tmp22905 = getelementptr inbounds float, float* %tmp22904, i64 1
+ %tmp22906 = getelementptr inbounds float, float* %tmp22905, i64 1
+ %tmp22907 = getelementptr inbounds float, float* %tmp22906, i64 1
+ %tmp22908 = getelementptr inbounds float, float* %tmp22907, i64 1
+ %tmp22909 = getelementptr inbounds float, float* %tmp22908, i64 1
+ %tmp22910 = getelementptr inbounds float, float* %tmp22909, i64 1
+ %tmp22911 = getelementptr inbounds float, float* %tmp22910, i64 1
+ %tmp22912 = getelementptr inbounds float, float* %tmp22911, i64 1
+ %tmp22913 = getelementptr inbounds float, float* %tmp22912, i64 1
+ %tmp22914 = getelementptr inbounds float, float* %tmp22913, i64 1
+ %tmp22915 = getelementptr inbounds float, float* %tmp22914, i64 1
+ %tmp22916 = getelementptr inbounds float, float* %tmp22915, i64 1
+ %tmp22917 = getelementptr inbounds float, float* %tmp22916, i64 1
+ %tmp22918 = getelementptr inbounds float, float* %tmp22917, i64 1
+ %tmp22919 = getelementptr inbounds float, float* %tmp22918, i64 1
+ %tmp22920 = getelementptr inbounds float, float* %tmp22919, i64 1
+ %tmp22921 = getelementptr inbounds float, float* %tmp22920, i64 1
+ %tmp22922 = getelementptr inbounds float, float* %tmp22921, i64 1
+ %tmp22923 = getelementptr inbounds float, float* %tmp22922, i64 1
+ %tmp22924 = getelementptr inbounds float, float* %tmp22923, i64 1
+ %tmp22925 = getelementptr inbounds float, float* %tmp22924, i64 1
+ %tmp22926 = getelementptr inbounds float, float* %tmp22925, i64 1
+ %tmp22927 = getelementptr inbounds float, float* %tmp22926, i64 1
+ %tmp22928 = getelementptr inbounds float, float* %tmp22927, i64 1
+ %tmp22929 = getelementptr inbounds float, float* %tmp22928, i64 1
+ %tmp22930 = getelementptr inbounds float, float* %tmp22929, i64 1
+ %tmp22931 = getelementptr inbounds float, float* %tmp22930, i64 1
+ %tmp22932 = getelementptr inbounds float, float* %tmp22931, i64 1
+ %tmp22933 = getelementptr inbounds float, float* %tmp22932, i64 1
+ %tmp22934 = getelementptr inbounds float, float* %tmp22933, i64 1
+ %tmp22935 = getelementptr inbounds float, float* %tmp22934, i64 1
+ %tmp22936 = getelementptr inbounds float, float* %tmp22935, i64 1
+ %tmp22937 = getelementptr inbounds float, float* %tmp22936, i64 1
+ %tmp22938 = getelementptr inbounds float, float* %tmp22937, i64 1
+ %tmp22939 = getelementptr inbounds float, float* %tmp22938, i64 1
+ %tmp22940 = getelementptr inbounds float, float* %tmp22939, i64 1
+ %tmp22941 = getelementptr inbounds float, float* %tmp22940, i64 1
+ %tmp22942 = getelementptr inbounds float, float* %tmp22941, i64 1
+ %tmp22943 = getelementptr inbounds float, float* %tmp22942, i64 1
+ %tmp22944 = getelementptr inbounds float, float* %tmp22943, i64 1
+ %tmp22945 = getelementptr inbounds float, float* %tmp22944, i64 1
+ %tmp22946 = getelementptr inbounds float, float* %tmp22945, i64 1
+ %tmp22947 = getelementptr inbounds float, float* %tmp22946, i64 1
+ %tmp22948 = getelementptr inbounds float, float* %tmp22947, i64 1
+ %tmp22949 = getelementptr inbounds float, float* %tmp22948, i64 1
+ %tmp22950 = getelementptr inbounds float, float* %tmp22949, i64 1
+ %tmp22951 = getelementptr inbounds float, float* %tmp22950, i64 1
+ %tmp22952 = getelementptr inbounds float, float* %tmp22951, i64 1
+ %tmp22953 = getelementptr inbounds float, float* %tmp22952, i64 1
+ %tmp22954 = getelementptr inbounds float, float* %tmp22953, i64 1
+ %tmp22955 = getelementptr inbounds float, float* %tmp22954, i64 1
+ %tmp22956 = getelementptr inbounds float, float* %tmp22955, i64 1
+ %tmp22957 = getelementptr inbounds float, float* %tmp22956, i64 1
+ %tmp22958 = getelementptr inbounds float, float* %tmp22957, i64 1
+ %tmp22959 = getelementptr inbounds float, float* %tmp22958, i64 1
+ %tmp22960 = getelementptr inbounds float, float* %tmp22959, i64 1
+ %tmp22961 = getelementptr inbounds float, float* %tmp22960, i64 1
+ %tmp22962 = getelementptr inbounds float, float* %tmp22961, i64 1
+ %tmp22963 = getelementptr inbounds float, float* %tmp22962, i64 1
+ %tmp22964 = getelementptr inbounds float, float* %tmp22963, i64 1
+ %tmp22965 = getelementptr inbounds float, float* %tmp22964, i64 1
+ %tmp22966 = getelementptr inbounds float, float* %tmp22965, i64 1
+ %tmp22967 = getelementptr inbounds float, float* %tmp22966, i64 1
+ %tmp22968 = getelementptr inbounds float, float* %tmp22967, i64 1
+ %tmp22969 = getelementptr inbounds float, float* %tmp22968, i64 1
+ %tmp22970 = getelementptr inbounds float, float* %tmp22969, i64 1
+ %tmp22971 = getelementptr inbounds float, float* %tmp22970, i64 1
+ %tmp22972 = getelementptr inbounds float, float* %tmp22971, i64 1
+ %tmp22973 = getelementptr inbounds float, float* %tmp22972, i64 1
+ %tmp22974 = getelementptr inbounds float, float* %tmp22973, i64 1
+ %tmp22975 = getelementptr inbounds float, float* %tmp22974, i64 1
+ %tmp22976 = getelementptr inbounds float, float* %tmp22975, i64 1
+ %tmp22977 = getelementptr inbounds float, float* %tmp22976, i64 1
+ %tmp22978 = getelementptr inbounds float, float* %tmp22977, i64 1
+ %tmp22979 = getelementptr inbounds float, float* %tmp22978, i64 1
+ %tmp22980 = getelementptr inbounds float, float* %tmp22979, i64 1
+ %tmp22981 = getelementptr inbounds float, float* %tmp22980, i64 1
+ %tmp22982 = getelementptr inbounds float, float* %tmp22981, i64 1
+ %tmp22983 = getelementptr inbounds float, float* %tmp22982, i64 1
+ %tmp22984 = getelementptr inbounds float, float* %tmp22983, i64 1
+ %tmp22985 = getelementptr inbounds float, float* %tmp22984, i64 1
+ %tmp22986 = getelementptr inbounds float, float* %tmp22985, i64 1
+ %tmp22987 = getelementptr inbounds float, float* %tmp22986, i64 1
+ %tmp22988 = getelementptr inbounds float, float* %tmp22987, i64 1
+ %tmp22989 = getelementptr inbounds float, float* %tmp22988, i64 1
+ %tmp22990 = getelementptr inbounds float, float* %tmp22989, i64 1
+ %tmp22991 = getelementptr inbounds float, float* %tmp22990, i64 1
+ %tmp22992 = getelementptr inbounds float, float* %tmp22991, i64 1
+ %tmp22993 = getelementptr inbounds float, float* %tmp22992, i64 1
+ %tmp22994 = getelementptr inbounds float, float* %tmp22993, i64 1
+ %tmp22995 = getelementptr inbounds float, float* %tmp22994, i64 1
+ %tmp22996 = getelementptr inbounds float, float* %tmp22995, i64 1
+ %tmp22997 = getelementptr inbounds float, float* %tmp22996, i64 1
+ %tmp22998 = getelementptr inbounds float, float* %tmp22997, i64 1
+ %tmp22999 = getelementptr inbounds float, float* %tmp22998, i64 1
+ %tmp23000 = getelementptr inbounds float, float* %tmp22999, i64 1
+ %tmp23001 = getelementptr inbounds float, float* %tmp23000, i64 1
+ %tmp23002 = getelementptr inbounds float, float* %tmp23001, i64 1
+ %tmp23003 = getelementptr inbounds float, float* %tmp23002, i64 1
+ %tmp23004 = getelementptr inbounds float, float* %tmp23003, i64 1
+ %tmp23005 = getelementptr inbounds float, float* %tmp23004, i64 1
+ %tmp23006 = getelementptr inbounds float, float* %tmp23005, i64 1
+ %tmp23007 = getelementptr inbounds float, float* %tmp23006, i64 1
+ %tmp23008 = getelementptr inbounds float, float* %tmp23007, i64 1
+ %tmp23009 = getelementptr inbounds float, float* %tmp23008, i64 1
+ %tmp23010 = getelementptr inbounds float, float* %tmp23009, i64 1
+ %tmp23011 = getelementptr inbounds float, float* %tmp23010, i64 1
+ %tmp23012 = getelementptr inbounds float, float* %tmp23011, i64 1
+ %tmp23013 = getelementptr inbounds float, float* %tmp23012, i64 1
+ %tmp23014 = getelementptr inbounds float, float* %tmp23013, i64 1
+ %tmp23015 = getelementptr inbounds float, float* %tmp23014, i64 1
+ %tmp23016 = getelementptr inbounds float, float* %tmp23015, i64 1
+ %tmp23017 = getelementptr inbounds float, float* %tmp23016, i64 1
+ %tmp23018 = getelementptr inbounds float, float* %tmp23017, i64 1
+ %tmp23019 = getelementptr inbounds float, float* %tmp23018, i64 1
+ %tmp23020 = getelementptr inbounds float, float* %tmp23019, i64 1
+ %tmp23021 = getelementptr inbounds float, float* %tmp23020, i64 1
+ %tmp23022 = getelementptr inbounds float, float* %tmp23021, i64 1
+ %tmp23023 = getelementptr inbounds float, float* %tmp23022, i64 1
+ %tmp23024 = getelementptr inbounds float, float* %tmp23023, i64 1
+ %tmp23025 = getelementptr inbounds float, float* %tmp23024, i64 1
+ %tmp23026 = getelementptr inbounds float, float* %tmp23025, i64 1
+ %tmp23027 = getelementptr inbounds float, float* %tmp23026, i64 1
+ %tmp23028 = getelementptr inbounds float, float* %tmp23027, i64 1
+ %tmp23029 = getelementptr inbounds float, float* %tmp23028, i64 1
+ %tmp23030 = getelementptr inbounds float, float* %tmp23029, i64 1
+ %tmp23031 = getelementptr inbounds float, float* %tmp23030, i64 1
+ %tmp23032 = getelementptr inbounds float, float* %tmp23031, i64 1
+ %tmp23033 = getelementptr inbounds float, float* %tmp23032, i64 1
+ %tmp23034 = getelementptr inbounds float, float* %tmp23033, i64 1
+ %tmp23035 = getelementptr inbounds float, float* %tmp23034, i64 1
+ %tmp23036 = getelementptr inbounds float, float* %tmp23035, i64 1
+ %tmp23037 = getelementptr inbounds float, float* %tmp23036, i64 1
+ %tmp23038 = getelementptr inbounds float, float* %tmp23037, i64 1
+ %tmp23039 = getelementptr inbounds float, float* %tmp23038, i64 1
+ %tmp23040 = getelementptr inbounds float, float* %tmp23039, i64 1
+ %tmp23041 = getelementptr inbounds float, float* %tmp23040, i64 1
+ %tmp23042 = getelementptr inbounds float, float* %tmp23041, i64 1
+ %tmp23043 = getelementptr inbounds float, float* %tmp23042, i64 1
+ %tmp23044 = getelementptr inbounds float, float* %tmp23043, i64 1
+ %tmp23045 = getelementptr inbounds float, float* %tmp23044, i64 1
+ %tmp23046 = getelementptr inbounds float, float* %tmp23045, i64 1
+ %tmp23047 = getelementptr inbounds float, float* %tmp23046, i64 1
+ %tmp23048 = getelementptr inbounds float, float* %tmp23047, i64 1
+ %tmp23049 = getelementptr inbounds float, float* %tmp23048, i64 1
+ %tmp23050 = getelementptr inbounds float, float* %tmp23049, i64 1
+ %tmp23051 = getelementptr inbounds float, float* %tmp23050, i64 1
+ %tmp23052 = getelementptr inbounds float, float* %tmp23051, i64 1
+ %tmp23053 = getelementptr inbounds float, float* %tmp23052, i64 1
+ %tmp23054 = getelementptr inbounds float, float* %tmp23053, i64 1
+ %tmp23055 = getelementptr inbounds float, float* %tmp23054, i64 1
+ %tmp23056 = getelementptr inbounds float, float* %tmp23055, i64 1
+ %tmp23057 = getelementptr inbounds float, float* %tmp23056, i64 1
+ %tmp23058 = getelementptr inbounds float, float* %tmp23057, i64 1
+ %tmp23059 = getelementptr inbounds float, float* %tmp23058, i64 1
+ %tmp23060 = getelementptr inbounds float, float* %tmp23059, i64 1
+ %tmp23061 = getelementptr inbounds float, float* %tmp23060, i64 1
+ %tmp23062 = getelementptr inbounds float, float* %tmp23061, i64 1
+ %tmp23063 = getelementptr inbounds float, float* %tmp23062, i64 1
+ %tmp23064 = getelementptr inbounds float, float* %tmp23063, i64 1
+ %tmp23065 = getelementptr inbounds float, float* %tmp23064, i64 1
+ %tmp23066 = getelementptr inbounds float, float* %tmp23065, i64 1
+ %tmp23067 = getelementptr inbounds float, float* %tmp23066, i64 1
+ %tmp23068 = getelementptr inbounds float, float* %tmp23067, i64 1
+ %tmp23069 = getelementptr inbounds float, float* %tmp23068, i64 1
+ %tmp23070 = getelementptr inbounds float, float* %tmp23069, i64 1
+ %tmp23071 = getelementptr inbounds float, float* %tmp23070, i64 1
+ %tmp23072 = getelementptr inbounds float, float* %tmp23071, i64 1
+ %tmp23073 = getelementptr inbounds float, float* %tmp23072, i64 1
+ %tmp23074 = getelementptr inbounds float, float* %tmp23073, i64 1
+ %tmp23075 = getelementptr inbounds float, float* %tmp23074, i64 1
+ %tmp23076 = getelementptr inbounds float, float* %tmp23075, i64 1
+ %tmp23077 = getelementptr inbounds float, float* %tmp23076, i64 1
+ %tmp23078 = getelementptr inbounds float, float* %tmp23077, i64 1
+ %tmp23079 = getelementptr inbounds float, float* %tmp23078, i64 1
+ %tmp23080 = getelementptr inbounds float, float* %tmp23079, i64 1
+ %tmp23081 = getelementptr inbounds float, float* %tmp23080, i64 1
+ %tmp23082 = getelementptr inbounds float, float* %tmp23081, i64 1
+ %tmp23083 = getelementptr inbounds float, float* %tmp23082, i64 1
+ %tmp23084 = getelementptr inbounds float, float* %tmp23083, i64 1
+ %tmp23085 = getelementptr inbounds float, float* %tmp23084, i64 1
+ %tmp23086 = getelementptr inbounds float, float* %tmp23085, i64 1
+ %tmp23087 = getelementptr inbounds float, float* %tmp23086, i64 1
+ %tmp23088 = getelementptr inbounds float, float* %tmp23087, i64 1
+ %tmp23089 = getelementptr inbounds float, float* %tmp23088, i64 1
+ %tmp23090 = getelementptr inbounds float, float* %tmp23089, i64 1
+ %tmp23091 = getelementptr inbounds float, float* %tmp23090, i64 1
+ %tmp23092 = getelementptr inbounds float, float* %tmp23091, i64 1
+ %tmp23093 = getelementptr inbounds float, float* %tmp23092, i64 1
+ %tmp23094 = getelementptr inbounds float, float* %tmp23093, i64 1
+ %tmp23095 = getelementptr inbounds float, float* %tmp23094, i64 1
+ %tmp23096 = getelementptr inbounds float, float* %tmp23095, i64 1
+ %tmp23097 = getelementptr inbounds float, float* %tmp23096, i64 1
+ %tmp23098 = getelementptr inbounds float, float* %tmp23097, i64 1
+ %tmp23099 = getelementptr inbounds float, float* %tmp23098, i64 1
+ %tmp23100 = getelementptr inbounds float, float* %tmp23099, i64 1
+ %tmp23101 = getelementptr inbounds float, float* %tmp23100, i64 1
+ %tmp23102 = getelementptr inbounds float, float* %tmp23101, i64 1
+ %tmp23103 = getelementptr inbounds float, float* %tmp23102, i64 1
+ %tmp23104 = getelementptr inbounds float, float* %tmp23103, i64 1
+ %tmp23105 = getelementptr inbounds float, float* %tmp23104, i64 1
+ %tmp23106 = getelementptr inbounds float, float* %tmp23105, i64 1
+ %tmp23107 = getelementptr inbounds float, float* %tmp23106, i64 1
+ %tmp23108 = getelementptr inbounds float, float* %tmp23107, i64 1
+ %tmp23109 = getelementptr inbounds float, float* %tmp23108, i64 1
+ %tmp23110 = getelementptr inbounds float, float* %tmp23109, i64 1
+ %tmp23111 = getelementptr inbounds float, float* %tmp23110, i64 1
+ %tmp23112 = getelementptr inbounds float, float* %tmp23111, i64 1
+ %tmp23113 = getelementptr inbounds float, float* %tmp23112, i64 1
+ %tmp23114 = getelementptr inbounds float, float* %tmp23113, i64 1
+ %tmp23115 = getelementptr inbounds float, float* %tmp23114, i64 1
+ %tmp23116 = getelementptr inbounds float, float* %tmp23115, i64 1
+ %tmp23117 = getelementptr inbounds float, float* %tmp23116, i64 1
+ %tmp23118 = getelementptr inbounds float, float* %tmp23117, i64 1
+ %tmp23119 = getelementptr inbounds float, float* %tmp23118, i64 1
+ %tmp23120 = getelementptr inbounds float, float* %tmp23119, i64 1
+ %tmp23121 = getelementptr inbounds float, float* %tmp23120, i64 1
+ %tmp23122 = getelementptr inbounds float, float* %tmp23121, i64 1
+ %tmp23123 = getelementptr inbounds float, float* %tmp23122, i64 1
+ %tmp23124 = getelementptr inbounds float, float* %tmp23123, i64 1
+ %tmp23125 = getelementptr inbounds float, float* %tmp23124, i64 1
+ %tmp23126 = getelementptr inbounds float, float* %tmp23125, i64 1
+ %tmp23127 = getelementptr inbounds float, float* %tmp23126, i64 1
+ %tmp23128 = getelementptr inbounds float, float* %tmp23127, i64 1
+ %tmp23129 = getelementptr inbounds float, float* %tmp23128, i64 1
+ %tmp23130 = getelementptr inbounds float, float* %tmp23129, i64 1
+ %tmp23131 = getelementptr inbounds float, float* %tmp23130, i64 1
+ %tmp23132 = getelementptr inbounds float, float* %tmp23131, i64 1
+ %tmp23133 = getelementptr inbounds float, float* %tmp23132, i64 1
+ %tmp23134 = getelementptr inbounds float, float* %tmp23133, i64 1
+ %tmp23135 = getelementptr inbounds float, float* %tmp23134, i64 1
+ %tmp23136 = getelementptr inbounds float, float* %tmp23135, i64 1
+ %tmp23137 = getelementptr inbounds float, float* %tmp23136, i64 1
+ %tmp23138 = getelementptr inbounds float, float* %tmp23137, i64 1
+ %tmp23139 = getelementptr inbounds float, float* %tmp23138, i64 1
+ %tmp23140 = getelementptr inbounds float, float* %tmp23139, i64 1
+ %tmp23141 = getelementptr inbounds float, float* %tmp23140, i64 1
+ %tmp23142 = getelementptr inbounds float, float* %tmp23141, i64 1
+ %tmp23143 = getelementptr inbounds float, float* %tmp23142, i64 1
+ %tmp23144 = getelementptr inbounds float, float* %tmp23143, i64 1
+ %tmp23145 = getelementptr inbounds float, float* %tmp23144, i64 1
+ %tmp23146 = getelementptr inbounds float, float* %tmp23145, i64 1
+ %tmp23147 = getelementptr inbounds float, float* %tmp23146, i64 1
+ %tmp23148 = getelementptr inbounds float, float* %tmp23147, i64 1
+ %tmp23149 = getelementptr inbounds float, float* %tmp23148, i64 1
+ %tmp23150 = getelementptr inbounds float, float* %tmp23149, i64 1
+ %tmp23151 = getelementptr inbounds float, float* %tmp23150, i64 1
+ %tmp23152 = getelementptr inbounds float, float* %tmp23151, i64 1
+ %tmp23153 = getelementptr inbounds float, float* %tmp23152, i64 1
+ %tmp23154 = getelementptr inbounds float, float* %tmp23153, i64 1
+ %tmp23155 = getelementptr inbounds float, float* %tmp23154, i64 1
+ %tmp23156 = getelementptr inbounds float, float* %tmp23155, i64 1
+ %tmp23157 = getelementptr inbounds float, float* %tmp23156, i64 1
+ %tmp23158 = getelementptr inbounds float, float* %tmp23157, i64 1
+ %tmp23159 = getelementptr inbounds float, float* %tmp23158, i64 1
+ %tmp23160 = getelementptr inbounds float, float* %tmp23159, i64 1
+ %tmp23161 = getelementptr inbounds float, float* %tmp23160, i64 1
+ %tmp23162 = getelementptr inbounds float, float* %tmp23161, i64 1
+ %tmp23163 = getelementptr inbounds float, float* %tmp23162, i64 1
+ %tmp23164 = getelementptr inbounds float, float* %tmp23163, i64 1
+ %tmp23165 = getelementptr inbounds float, float* %tmp23164, i64 1
+ %tmp23166 = getelementptr inbounds float, float* %tmp23165, i64 1
+ %tmp23167 = getelementptr inbounds float, float* %tmp23166, i64 1
+ %tmp23168 = getelementptr inbounds float, float* %tmp23167, i64 1
+ %tmp23169 = getelementptr inbounds float, float* %tmp23168, i64 1
+ %tmp23170 = getelementptr inbounds float, float* %tmp23169, i64 1
+ %tmp23171 = getelementptr inbounds float, float* %tmp23170, i64 1
+ %tmp23172 = getelementptr inbounds float, float* %tmp23171, i64 1
+ %tmp23173 = getelementptr inbounds float, float* %tmp23172, i64 1
+ %tmp23174 = getelementptr inbounds float, float* %tmp23173, i64 1
+ %tmp23175 = getelementptr inbounds float, float* %tmp23174, i64 1
+ %tmp23176 = getelementptr inbounds float, float* %tmp23175, i64 1
+ %tmp23177 = getelementptr inbounds float, float* %tmp23176, i64 1
+ %tmp23178 = getelementptr inbounds float, float* %tmp23177, i64 1
+ %tmp23179 = getelementptr inbounds float, float* %tmp23178, i64 1
+ %tmp23180 = getelementptr inbounds float, float* %tmp23179, i64 1
+ %tmp23181 = getelementptr inbounds float, float* %tmp23180, i64 1
+ %tmp23182 = getelementptr inbounds float, float* %tmp23181, i64 1
+ %tmp23183 = getelementptr inbounds float, float* %tmp23182, i64 1
+ %tmp23184 = getelementptr inbounds float, float* %tmp23183, i64 1
+ %tmp23185 = getelementptr inbounds float, float* %tmp23184, i64 1
+ %tmp23186 = getelementptr inbounds float, float* %tmp23185, i64 1
+ %tmp23187 = getelementptr inbounds float, float* %tmp23186, i64 1
+ %tmp23188 = getelementptr inbounds float, float* %tmp23187, i64 1
+ %tmp23189 = getelementptr inbounds float, float* %tmp23188, i64 1
+ %tmp23190 = getelementptr inbounds float, float* %tmp23189, i64 1
+ %tmp23191 = getelementptr inbounds float, float* %tmp23190, i64 1
+ %tmp23192 = getelementptr inbounds float, float* %tmp23191, i64 1
+ %tmp23193 = getelementptr inbounds float, float* %tmp23192, i64 1
+ %tmp23194 = getelementptr inbounds float, float* %tmp23193, i64 1
+ %tmp23195 = getelementptr inbounds float, float* %tmp23194, i64 1
+ %tmp23196 = getelementptr inbounds float, float* %tmp23195, i64 1
+ %tmp23197 = getelementptr inbounds float, float* %tmp23196, i64 1
+ %tmp23198 = getelementptr inbounds float, float* %tmp23197, i64 1
+ %tmp23199 = getelementptr inbounds float, float* %tmp23198, i64 1
+ %tmp23200 = getelementptr inbounds float, float* %tmp23199, i64 1
+ %tmp23201 = getelementptr inbounds float, float* %tmp23200, i64 1
+ %tmp23202 = getelementptr inbounds float, float* %tmp23201, i64 1
+ %tmp23203 = getelementptr inbounds float, float* %tmp23202, i64 1
+ %tmp23204 = getelementptr inbounds float, float* %tmp23203, i64 1
+ %tmp23205 = getelementptr inbounds float, float* %tmp23204, i64 1
+ %tmp23206 = getelementptr inbounds float, float* %tmp23205, i64 1
+ %tmp23207 = getelementptr inbounds float, float* %tmp23206, i64 1
+ %tmp23208 = getelementptr inbounds float, float* %tmp23207, i64 1
+ %tmp23209 = getelementptr inbounds float, float* %tmp23208, i64 1
+ %tmp23210 = getelementptr inbounds float, float* %tmp23209, i64 1
+ %tmp23211 = getelementptr inbounds float, float* %tmp23210, i64 1
+ %tmp23212 = getelementptr inbounds float, float* %tmp23211, i64 1
+ %tmp23213 = getelementptr inbounds float, float* %tmp23212, i64 1
+ %tmp23214 = getelementptr inbounds float, float* %tmp23213, i64 1
+ %tmp23215 = getelementptr inbounds float, float* %tmp23214, i64 1
+ %tmp23216 = getelementptr inbounds float, float* %tmp23215, i64 1
+ %tmp23217 = getelementptr inbounds float, float* %tmp23216, i64 1
+ %tmp23218 = getelementptr inbounds float, float* %tmp23217, i64 1
+ %tmp23219 = getelementptr inbounds float, float* %tmp23218, i64 1
+ %tmp23220 = getelementptr inbounds float, float* %tmp23219, i64 1
+ %tmp23221 = getelementptr inbounds float, float* %tmp23220, i64 1
+ %tmp23222 = getelementptr inbounds float, float* %tmp23221, i64 1
+ %tmp23223 = getelementptr inbounds float, float* %tmp23222, i64 1
+ %tmp23224 = getelementptr inbounds float, float* %tmp23223, i64 1
+ %tmp23225 = getelementptr inbounds float, float* %tmp23224, i64 1
+ %tmp23226 = getelementptr inbounds float, float* %tmp23225, i64 1
+ %tmp23227 = getelementptr inbounds float, float* %tmp23226, i64 1
+ %tmp23228 = getelementptr inbounds float, float* %tmp23227, i64 1
+ %tmp23229 = getelementptr inbounds float, float* %tmp23228, i64 1
+ %tmp23230 = getelementptr inbounds float, float* %tmp23229, i64 1
+ %tmp23231 = getelementptr inbounds float, float* %tmp23230, i64 1
+ %tmp23232 = getelementptr inbounds float, float* %tmp23231, i64 1
+ %tmp23233 = getelementptr inbounds float, float* %tmp23232, i64 1
+ %tmp23234 = getelementptr inbounds float, float* %tmp23233, i64 1
+ %tmp23235 = getelementptr inbounds float, float* %tmp23234, i64 1
+ %tmp23236 = getelementptr inbounds float, float* %tmp23235, i64 1
+ %tmp23237 = getelementptr inbounds float, float* %tmp23236, i64 1
+ %tmp23238 = getelementptr inbounds float, float* %tmp23237, i64 1
+ %tmp23239 = getelementptr inbounds float, float* %tmp23238, i64 1
+ %tmp23240 = getelementptr inbounds float, float* %tmp23239, i64 1
+ %tmp23241 = getelementptr inbounds float, float* %tmp23240, i64 1
+ %tmp23242 = getelementptr inbounds float, float* %tmp23241, i64 1
+ %tmp23243 = getelementptr inbounds float, float* %tmp23242, i64 1
+ %tmp23244 = getelementptr inbounds float, float* %tmp23243, i64 1
+ %tmp23245 = getelementptr inbounds float, float* %tmp23244, i64 1
+ %tmp23246 = getelementptr inbounds float, float* %tmp23245, i64 1
+ %tmp23247 = getelementptr inbounds float, float* %tmp23246, i64 1
+ %tmp23248 = getelementptr inbounds float, float* %tmp23247, i64 1
+ %tmp23249 = getelementptr inbounds float, float* %tmp23248, i64 1
+ %tmp23250 = getelementptr inbounds float, float* %tmp23249, i64 1
+ %tmp23251 = getelementptr inbounds float, float* %tmp23250, i64 1
+ %tmp23252 = getelementptr inbounds float, float* %tmp23251, i64 1
+ %tmp23253 = getelementptr inbounds float, float* %tmp23252, i64 1
+ %tmp23254 = getelementptr inbounds float, float* %tmp23253, i64 1
+ %tmp23255 = getelementptr inbounds float, float* %tmp23254, i64 1
+ %tmp23256 = getelementptr inbounds float, float* %tmp23255, i64 1
+ %tmp23257 = getelementptr inbounds float, float* %tmp23256, i64 1
+ %tmp23258 = getelementptr inbounds float, float* %tmp23257, i64 1
+ %tmp23259 = getelementptr inbounds float, float* %tmp23258, i64 1
+ %tmp23260 = getelementptr inbounds float, float* %tmp23259, i64 1
+ %tmp23261 = getelementptr inbounds float, float* %tmp23260, i64 1
+ %tmp23262 = getelementptr inbounds float, float* %tmp23261, i64 1
+ %tmp23263 = getelementptr inbounds float, float* %tmp23262, i64 1
+ %tmp23264 = getelementptr inbounds float, float* %tmp23263, i64 1
+ %tmp23265 = getelementptr inbounds float, float* %tmp23264, i64 1
+ %tmp23266 = getelementptr inbounds float, float* %tmp23265, i64 1
+ %tmp23267 = getelementptr inbounds float, float* %tmp23266, i64 1
+ %tmp23268 = getelementptr inbounds float, float* %tmp23267, i64 1
+ %tmp23269 = getelementptr inbounds float, float* %tmp23268, i64 1
+ %tmp23270 = getelementptr inbounds float, float* %tmp23269, i64 1
+ %tmp23271 = getelementptr inbounds float, float* %tmp23270, i64 1
+ %tmp23272 = getelementptr inbounds float, float* %tmp23271, i64 1
+ %tmp23273 = getelementptr inbounds float, float* %tmp23272, i64 1
+ %tmp23274 = getelementptr inbounds float, float* %tmp23273, i64 1
+ %tmp23275 = getelementptr inbounds float, float* %tmp23274, i64 1
+ %tmp23276 = getelementptr inbounds float, float* %tmp23275, i64 1
+ %tmp23277 = getelementptr inbounds float, float* %tmp23276, i64 1
+ %tmp23278 = getelementptr inbounds float, float* %tmp23277, i64 1
+ %tmp23279 = getelementptr inbounds float, float* %tmp23278, i64 1
+ %tmp23280 = getelementptr inbounds float, float* %tmp23279, i64 1
+ %tmp23281 = getelementptr inbounds float, float* %tmp23280, i64 1
+ %tmp23282 = getelementptr inbounds float, float* %tmp23281, i64 1
+ %tmp23283 = getelementptr inbounds float, float* %tmp23282, i64 1
+ %tmp23284 = getelementptr inbounds float, float* %tmp23283, i64 1
+ %tmp23285 = getelementptr inbounds float, float* %tmp23284, i64 1
+ %tmp23286 = getelementptr inbounds float, float* %tmp23285, i64 1
+ %tmp23287 = getelementptr inbounds float, float* %tmp23286, i64 1
+ %tmp23288 = getelementptr inbounds float, float* %tmp23287, i64 1
+ %tmp23289 = getelementptr inbounds float, float* %tmp23288, i64 1
+ %tmp23290 = getelementptr inbounds float, float* %tmp23289, i64 1
+ %tmp23291 = getelementptr inbounds float, float* %tmp23290, i64 1
+ %tmp23292 = getelementptr inbounds float, float* %tmp23291, i64 1
+ %tmp23293 = getelementptr inbounds float, float* %tmp23292, i64 1
+ %tmp23294 = getelementptr inbounds float, float* %tmp23293, i64 1
+ %tmp23295 = getelementptr inbounds float, float* %tmp23294, i64 1
+ %tmp23296 = getelementptr inbounds float, float* %tmp23295, i64 1
+ %tmp23297 = getelementptr inbounds float, float* %tmp23296, i64 1
+ %tmp23298 = getelementptr inbounds float, float* %tmp23297, i64 1
+ %tmp23299 = getelementptr inbounds float, float* %tmp23298, i64 1
+ %tmp23300 = getelementptr inbounds float, float* %tmp23299, i64 1
+ %tmp23301 = getelementptr inbounds float, float* %tmp23300, i64 1
+ %tmp23302 = getelementptr inbounds float, float* %tmp23301, i64 1
+ %tmp23303 = getelementptr inbounds float, float* %tmp23302, i64 1
+ %tmp23304 = getelementptr inbounds float, float* %tmp23303, i64 1
+ %tmp23305 = getelementptr inbounds float, float* %tmp23304, i64 1
+ %tmp23306 = getelementptr inbounds float, float* %tmp23305, i64 1
+ %tmp23307 = getelementptr inbounds float, float* %tmp23306, i64 1
+ %tmp23308 = getelementptr inbounds float, float* %tmp23307, i64 1
+ %tmp23309 = getelementptr inbounds float, float* %tmp23308, i64 1
+ %tmp23310 = getelementptr inbounds float, float* %tmp23309, i64 1
+ %tmp23311 = getelementptr inbounds float, float* %tmp23310, i64 1
+ %tmp23312 = getelementptr inbounds float, float* %tmp23311, i64 1
+ %tmp23313 = getelementptr inbounds float, float* %tmp23312, i64 1
+ %tmp23314 = getelementptr inbounds float, float* %tmp23313, i64 1
+ %tmp23315 = getelementptr inbounds float, float* %tmp23314, i64 1
+ %tmp23316 = getelementptr inbounds float, float* %tmp23315, i64 1
+ %tmp23317 = getelementptr inbounds float, float* %tmp23316, i64 1
+ %tmp23318 = getelementptr inbounds float, float* %tmp23317, i64 1
+ %tmp23319 = getelementptr inbounds float, float* %tmp23318, i64 1
+ %tmp23320 = getelementptr inbounds float, float* %tmp23319, i64 1
+ %tmp23321 = getelementptr inbounds float, float* %tmp23320, i64 1
+ %tmp23322 = getelementptr inbounds float, float* %tmp23321, i64 1
+ %tmp23323 = getelementptr inbounds float, float* %tmp23322, i64 1
+ %tmp23324 = getelementptr inbounds float, float* %tmp23323, i64 1
+ %tmp23325 = getelementptr inbounds float, float* %tmp23324, i64 1
+ %tmp23326 = getelementptr inbounds float, float* %tmp23325, i64 1
+ %tmp23327 = getelementptr inbounds float, float* %tmp23326, i64 1
+ %tmp23328 = getelementptr inbounds float, float* %tmp23327, i64 1
+ %tmp23329 = getelementptr inbounds float, float* %tmp23328, i64 1
+ %tmp23330 = getelementptr inbounds float, float* %tmp23329, i64 1
+ %tmp23331 = getelementptr inbounds float, float* %tmp23330, i64 1
+ %tmp23332 = getelementptr inbounds float, float* %tmp23331, i64 1
+ %tmp23333 = getelementptr inbounds float, float* %tmp23332, i64 1
+ %tmp23334 = getelementptr inbounds float, float* %tmp23333, i64 1
+ %tmp23335 = getelementptr inbounds float, float* %tmp23334, i64 1
+ %tmp23336 = getelementptr inbounds float, float* %tmp23335, i64 1
+ %tmp23337 = getelementptr inbounds float, float* %tmp23336, i64 1
+ %tmp23338 = getelementptr inbounds float, float* %tmp23337, i64 1
+ %tmp23339 = getelementptr inbounds float, float* %tmp23338, i64 1
+ %tmp23340 = getelementptr inbounds float, float* %tmp23339, i64 1
+ %tmp23341 = getelementptr inbounds float, float* %tmp23340, i64 1
+ %tmp23342 = getelementptr inbounds float, float* %tmp23341, i64 1
+ %tmp23343 = getelementptr inbounds float, float* %tmp23342, i64 1
+ %tmp23344 = getelementptr inbounds float, float* %tmp23343, i64 1
+ %tmp23345 = getelementptr inbounds float, float* %tmp23344, i64 1
+ %tmp23346 = getelementptr inbounds float, float* %tmp23345, i64 1
+ %tmp23347 = getelementptr inbounds float, float* %tmp23346, i64 1
+ %tmp23348 = getelementptr inbounds float, float* %tmp23347, i64 1
+ %tmp23349 = getelementptr inbounds float, float* %tmp23348, i64 1
+ %tmp23350 = getelementptr inbounds float, float* %tmp23349, i64 1
+ %tmp23351 = getelementptr inbounds float, float* %tmp23350, i64 1
+ %tmp23352 = getelementptr inbounds float, float* %tmp23351, i64 1
+ %tmp23353 = getelementptr inbounds float, float* %tmp23352, i64 1
+ %tmp23354 = getelementptr inbounds float, float* %tmp23353, i64 1
+ %tmp23355 = getelementptr inbounds float, float* %tmp23354, i64 1
+ %tmp23356 = getelementptr inbounds float, float* %tmp23355, i64 1
+ %tmp23357 = getelementptr inbounds float, float* %tmp23356, i64 1
+ %tmp23358 = getelementptr inbounds float, float* %tmp23357, i64 1
+ %tmp23359 = getelementptr inbounds float, float* %tmp23358, i64 1
+ %tmp23360 = getelementptr inbounds float, float* %tmp23359, i64 1
+ %tmp23361 = getelementptr inbounds float, float* %tmp23360, i64 1
+ %tmp23362 = getelementptr inbounds float, float* %tmp23361, i64 1
+ %tmp23363 = getelementptr inbounds float, float* %tmp23362, i64 1
+ %tmp23364 = getelementptr inbounds float, float* %tmp23363, i64 1
+ %tmp23365 = getelementptr inbounds float, float* %tmp23364, i64 1
+ %tmp23366 = getelementptr inbounds float, float* %tmp23365, i64 1
+ %tmp23367 = getelementptr inbounds float, float* %tmp23366, i64 1
+ %tmp23368 = getelementptr inbounds float, float* %tmp23367, i64 1
+ %tmp23369 = getelementptr inbounds float, float* %tmp23368, i64 1
+ %tmp23370 = getelementptr inbounds float, float* %tmp23369, i64 1
+ %tmp23371 = getelementptr inbounds float, float* %tmp23370, i64 1
+ %tmp23372 = getelementptr inbounds float, float* %tmp23371, i64 1
+ %tmp23373 = getelementptr inbounds float, float* %tmp23372, i64 1
+ %tmp23374 = getelementptr inbounds float, float* %tmp23373, i64 1
+ %tmp23375 = getelementptr inbounds float, float* %tmp23374, i64 1
+ %tmp23376 = getelementptr inbounds float, float* %tmp23375, i64 1
+ %tmp23377 = getelementptr inbounds float, float* %tmp23376, i64 1
+ %tmp23378 = getelementptr inbounds float, float* %tmp23377, i64 1
+ %tmp23379 = getelementptr inbounds float, float* %tmp23378, i64 1
+ %tmp23380 = getelementptr inbounds float, float* %tmp23379, i64 1
+ %tmp23381 = getelementptr inbounds float, float* %tmp23380, i64 1
+ %tmp23382 = getelementptr inbounds float, float* %tmp23381, i64 1
+ %tmp23383 = getelementptr inbounds float, float* %tmp23382, i64 1
+ %tmp23384 = getelementptr inbounds float, float* %tmp23383, i64 1
+ %tmp23385 = getelementptr inbounds float, float* %tmp23384, i64 1
+ %tmp23386 = getelementptr inbounds float, float* %tmp23385, i64 1
+ %tmp23387 = getelementptr inbounds float, float* %tmp23386, i64 1
+ %tmp23388 = getelementptr inbounds float, float* %tmp23387, i64 1
+ %tmp23389 = getelementptr inbounds float, float* %tmp23388, i64 1
+ %tmp23390 = getelementptr inbounds float, float* %tmp23389, i64 1
+ %tmp23391 = getelementptr inbounds float, float* %tmp23390, i64 1
+ %tmp23392 = getelementptr inbounds float, float* %tmp23391, i64 1
+ %tmp23393 = getelementptr inbounds float, float* %tmp23392, i64 1
+ %tmp23394 = getelementptr inbounds float, float* %tmp23393, i64 1
+ %tmp23395 = getelementptr inbounds float, float* %tmp23394, i64 1
+ %tmp23396 = getelementptr inbounds float, float* %tmp23395, i64 1
+ %tmp23397 = getelementptr inbounds float, float* %tmp23396, i64 1
+ %tmp23398 = getelementptr inbounds float, float* %tmp23397, i64 1
+ %tmp23399 = getelementptr inbounds float, float* %tmp23398, i64 1
+ %tmp23400 = getelementptr inbounds float, float* %tmp23399, i64 1
+ %tmp23401 = getelementptr inbounds float, float* %tmp23400, i64 1
+ %tmp23402 = getelementptr inbounds float, float* %tmp23401, i64 1
+ %tmp23403 = getelementptr inbounds float, float* %tmp23402, i64 1
+ %tmp23404 = getelementptr inbounds float, float* %tmp23403, i64 1
+ %tmp23405 = getelementptr inbounds float, float* %tmp23404, i64 1
+ %tmp23406 = getelementptr inbounds float, float* %tmp23405, i64 1
+ %tmp23407 = getelementptr inbounds float, float* %tmp23406, i64 1
+ %tmp23408 = getelementptr inbounds float, float* %tmp23407, i64 1
+ %tmp23409 = getelementptr inbounds float, float* %tmp23408, i64 1
+ %tmp23410 = getelementptr inbounds float, float* %tmp23409, i64 1
+ %tmp23411 = getelementptr inbounds float, float* %tmp23410, i64 1
+ %tmp23412 = getelementptr inbounds float, float* %tmp23411, i64 1
+ %tmp23413 = getelementptr inbounds float, float* %tmp23412, i64 1
+ %tmp23414 = getelementptr inbounds float, float* %tmp23413, i64 1
+ %tmp23415 = getelementptr inbounds float, float* %tmp23414, i64 1
+ %tmp23416 = getelementptr inbounds float, float* %tmp23415, i64 1
+ %tmp23417 = getelementptr inbounds float, float* %tmp23416, i64 1
+ %tmp23418 = getelementptr inbounds float, float* %tmp23417, i64 1
+ %tmp23419 = getelementptr inbounds float, float* %tmp23418, i64 1
+ %tmp23420 = getelementptr inbounds float, float* %tmp23419, i64 1
+ %tmp23421 = getelementptr inbounds float, float* %tmp23420, i64 1
+ %tmp23422 = getelementptr inbounds float, float* %tmp23421, i64 1
+ %tmp23423 = getelementptr inbounds float, float* %tmp23422, i64 1
+ %tmp23424 = getelementptr inbounds float, float* %tmp23423, i64 1
+ %tmp23425 = getelementptr inbounds float, float* %tmp23424, i64 1
+ %tmp23426 = getelementptr inbounds float, float* %tmp23425, i64 1
+ %tmp23427 = getelementptr inbounds float, float* %tmp23426, i64 1
+ %tmp23428 = getelementptr inbounds float, float* %tmp23427, i64 1
+ %tmp23429 = getelementptr inbounds float, float* %tmp23428, i64 1
+ %tmp23430 = getelementptr inbounds float, float* %tmp23429, i64 1
+ %tmp23431 = getelementptr inbounds float, float* %tmp23430, i64 1
+ %tmp23432 = getelementptr inbounds float, float* %tmp23431, i64 1
+ %tmp23433 = getelementptr inbounds float, float* %tmp23432, i64 1
+ %tmp23434 = getelementptr inbounds float, float* %tmp23433, i64 1
+ %tmp23435 = getelementptr inbounds float, float* %tmp23434, i64 1
+ %tmp23436 = getelementptr inbounds float, float* %tmp23435, i64 1
+ %tmp23437 = getelementptr inbounds float, float* %tmp23436, i64 1
+ %tmp23438 = getelementptr inbounds float, float* %tmp23437, i64 1
+ %tmp23439 = getelementptr inbounds float, float* %tmp23438, i64 1
+ %tmp23440 = getelementptr inbounds float, float* %tmp23439, i64 1
+ %tmp23441 = getelementptr inbounds float, float* %tmp23440, i64 1
+ %tmp23442 = getelementptr inbounds float, float* %tmp23441, i64 1
+ %tmp23443 = getelementptr inbounds float, float* %tmp23442, i64 1
+ %tmp23444 = getelementptr inbounds float, float* %tmp23443, i64 1
+ %tmp23445 = getelementptr inbounds float, float* %tmp23444, i64 1
+ %tmp23446 = getelementptr inbounds float, float* %tmp23445, i64 1
+ %tmp23447 = getelementptr inbounds float, float* %tmp23446, i64 1
+ %tmp23448 = getelementptr inbounds float, float* %tmp23447, i64 1
+ %tmp23449 = getelementptr inbounds float, float* %tmp23448, i64 1
+ %tmp23450 = getelementptr inbounds float, float* %tmp23449, i64 1
+ %tmp23451 = getelementptr inbounds float, float* %tmp23450, i64 1
+ %tmp23452 = getelementptr inbounds float, float* %tmp23451, i64 1
+ %tmp23453 = getelementptr inbounds float, float* %tmp23452, i64 1
+ %tmp23454 = getelementptr inbounds float, float* %tmp23453, i64 1
+ %tmp23455 = getelementptr inbounds float, float* %tmp23454, i64 1
+ %tmp23456 = getelementptr inbounds float, float* %tmp23455, i64 1
+ %tmp23457 = getelementptr inbounds float, float* %tmp23456, i64 1
+ %tmp23458 = getelementptr inbounds float, float* %tmp23457, i64 1
+ %tmp23459 = getelementptr inbounds float, float* %tmp23458, i64 1
+ %tmp23460 = getelementptr inbounds float, float* %tmp23459, i64 1
+ %tmp23461 = getelementptr inbounds float, float* %tmp23460, i64 1
+ %tmp23462 = getelementptr inbounds float, float* %tmp23461, i64 1
+ %tmp23463 = getelementptr inbounds float, float* %tmp23462, i64 1
+ %tmp23464 = getelementptr inbounds float, float* %tmp23463, i64 1
+ %tmp23465 = getelementptr inbounds float, float* %tmp23464, i64 1
+ %tmp23466 = getelementptr inbounds float, float* %tmp23465, i64 1
+ %tmp23467 = getelementptr inbounds float, float* %tmp23466, i64 1
+ %tmp23468 = getelementptr inbounds float, float* %tmp23467, i64 1
+ %tmp23469 = getelementptr inbounds float, float* %tmp23468, i64 1
+ %tmp23470 = getelementptr inbounds float, float* %tmp23469, i64 1
+ %tmp23471 = getelementptr inbounds float, float* %tmp23470, i64 1
+ %tmp23472 = getelementptr inbounds float, float* %tmp23471, i64 1
+ %tmp23473 = getelementptr inbounds float, float* %tmp23472, i64 1
+ %tmp23474 = getelementptr inbounds float, float* %tmp23473, i64 1
+ %tmp23475 = getelementptr inbounds float, float* %tmp23474, i64 1
+ %tmp23476 = getelementptr inbounds float, float* %tmp23475, i64 1
+ %tmp23477 = getelementptr inbounds float, float* %tmp23476, i64 1
+ %tmp23478 = getelementptr inbounds float, float* %tmp23477, i64 1
+ %tmp23479 = getelementptr inbounds float, float* %tmp23478, i64 1
+ %tmp23480 = getelementptr inbounds float, float* %tmp23479, i64 1
+ %tmp23481 = getelementptr inbounds float, float* %tmp23480, i64 1
+ %tmp23482 = getelementptr inbounds float, float* %tmp23481, i64 1
+ %tmp23483 = getelementptr inbounds float, float* %tmp23482, i64 1
+ %tmp23484 = getelementptr inbounds float, float* %tmp23483, i64 1
+ %tmp23485 = getelementptr inbounds float, float* %tmp23484, i64 1
+ %tmp23486 = getelementptr inbounds float, float* %tmp23485, i64 1
+ %tmp23487 = getelementptr inbounds float, float* %tmp23486, i64 1
+ %tmp23488 = getelementptr inbounds float, float* %tmp23487, i64 1
+ %tmp23489 = getelementptr inbounds float, float* %tmp23488, i64 1
+ %tmp23490 = getelementptr inbounds float, float* %tmp23489, i64 1
+ %tmp23491 = getelementptr inbounds float, float* %tmp23490, i64 1
+ %tmp23492 = getelementptr inbounds float, float* %tmp23491, i64 1
+ %tmp23493 = getelementptr inbounds float, float* %tmp23492, i64 1
+ %tmp23494 = getelementptr inbounds float, float* %tmp23493, i64 1
+ %tmp23495 = getelementptr inbounds float, float* %tmp23494, i64 1
+ %tmp23496 = getelementptr inbounds float, float* %tmp23495, i64 1
+ %tmp23497 = getelementptr inbounds float, float* %tmp23496, i64 1
+ %tmp23498 = getelementptr inbounds float, float* %tmp23497, i64 1
+ %tmp23499 = getelementptr inbounds float, float* %tmp23498, i64 1
+ %tmp23500 = getelementptr inbounds float, float* %tmp23499, i64 1
+ %tmp23501 = getelementptr inbounds float, float* %tmp23500, i64 1
+ %tmp23502 = getelementptr inbounds float, float* %tmp23501, i64 1
+ %tmp23503 = getelementptr inbounds float, float* %tmp23502, i64 1
+ %tmp23504 = getelementptr inbounds float, float* %tmp23503, i64 1
+ %tmp23505 = getelementptr inbounds float, float* %tmp23504, i64 1
+ %tmp23506 = getelementptr inbounds float, float* %tmp23505, i64 1
+ %tmp23507 = getelementptr inbounds float, float* %tmp23506, i64 1
+ %tmp23508 = getelementptr inbounds float, float* %tmp23507, i64 1
+ %tmp23509 = getelementptr inbounds float, float* %tmp23508, i64 1
+ %tmp23510 = getelementptr inbounds float, float* %tmp23509, i64 1
+ %tmp23511 = getelementptr inbounds float, float* %tmp23510, i64 1
+ %tmp23512 = getelementptr inbounds float, float* %tmp23511, i64 1
+ %tmp23513 = getelementptr inbounds float, float* %tmp23512, i64 1
+ %tmp23514 = getelementptr inbounds float, float* %tmp23513, i64 1
+ %tmp23515 = getelementptr inbounds float, float* %tmp23514, i64 1
+ %tmp23516 = getelementptr inbounds float, float* %tmp23515, i64 1
+ %tmp23517 = getelementptr inbounds float, float* %tmp23516, i64 1
+ %tmp23518 = getelementptr inbounds float, float* %tmp23517, i64 1
+ %tmp23519 = getelementptr inbounds float, float* %tmp23518, i64 1
+ %tmp23520 = getelementptr inbounds float, float* %tmp23519, i64 1
+ %tmp23521 = getelementptr inbounds float, float* %tmp23520, i64 1
+ %tmp23522 = getelementptr inbounds float, float* %tmp23521, i64 1
+ %tmp23523 = getelementptr inbounds float, float* %tmp23522, i64 1
+ %tmp23524 = getelementptr inbounds float, float* %tmp23523, i64 1
+ %tmp23525 = getelementptr inbounds float, float* %tmp23524, i64 1
+ %tmp23526 = getelementptr inbounds float, float* %tmp23525, i64 1
+ %tmp23527 = getelementptr inbounds float, float* %tmp23526, i64 1
+ %tmp23528 = getelementptr inbounds float, float* %tmp23527, i64 1
+ %tmp23529 = getelementptr inbounds float, float* %tmp23528, i64 1
+ %tmp23530 = getelementptr inbounds float, float* %tmp23529, i64 1
+ %tmp23531 = getelementptr inbounds float, float* %tmp23530, i64 1
+ %tmp23532 = getelementptr inbounds float, float* %tmp23531, i64 1
+ %tmp23533 = getelementptr inbounds float, float* %tmp23532, i64 1
+ %tmp23534 = getelementptr inbounds float, float* %tmp23533, i64 1
+ %tmp23535 = getelementptr inbounds float, float* %tmp23534, i64 1
+ %tmp23536 = getelementptr inbounds float, float* %tmp23535, i64 1
+ %tmp23537 = getelementptr inbounds float, float* %tmp23536, i64 1
+ %tmp23538 = getelementptr inbounds float, float* %tmp23537, i64 1
+ %tmp23539 = getelementptr inbounds float, float* %tmp23538, i64 1
+ %tmp23540 = getelementptr inbounds float, float* %tmp23539, i64 1
+ %tmp23541 = getelementptr inbounds float, float* %tmp23540, i64 1
+ %tmp23542 = getelementptr inbounds float, float* %tmp23541, i64 1
+ %tmp23543 = getelementptr inbounds float, float* %tmp23542, i64 1
+ %tmp23544 = getelementptr inbounds float, float* %tmp23543, i64 1
+ %tmp23545 = getelementptr inbounds float, float* %tmp23544, i64 1
+ %tmp23546 = getelementptr inbounds float, float* %tmp23545, i64 1
+ %tmp23547 = getelementptr inbounds float, float* %tmp23546, i64 1
+ %tmp23548 = getelementptr inbounds float, float* %tmp23547, i64 1
+ %tmp23549 = getelementptr inbounds float, float* %tmp23548, i64 1
+ %tmp23550 = getelementptr inbounds float, float* %tmp23549, i64 1
+ %tmp23551 = getelementptr inbounds float, float* %tmp23550, i64 1
+ %tmp23552 = getelementptr inbounds float, float* %tmp23551, i64 1
+ %tmp23553 = getelementptr inbounds float, float* %tmp23552, i64 1
+ %tmp23554 = getelementptr inbounds float, float* %tmp23553, i64 1
+ %tmp23555 = getelementptr inbounds float, float* %tmp23554, i64 1
+ %tmp23556 = getelementptr inbounds float, float* %tmp23555, i64 1
+ %tmp23557 = getelementptr inbounds float, float* %tmp23556, i64 1
+ %tmp23558 = getelementptr inbounds float, float* %tmp23557, i64 1
+ %tmp23559 = getelementptr inbounds float, float* %tmp23558, i64 1
+ %tmp23560 = getelementptr inbounds float, float* %tmp23559, i64 1
+ %tmp23561 = getelementptr inbounds float, float* %tmp23560, i64 1
+ %tmp23562 = getelementptr inbounds float, float* %tmp23561, i64 1
+ %tmp23563 = getelementptr inbounds float, float* %tmp23562, i64 1
+ %tmp23564 = getelementptr inbounds float, float* %tmp23563, i64 1
+ %tmp23565 = getelementptr inbounds float, float* %tmp23564, i64 1
+ %tmp23566 = getelementptr inbounds float, float* %tmp23565, i64 1
+ %tmp23567 = getelementptr inbounds float, float* %tmp23566, i64 1
+ %tmp23568 = getelementptr inbounds float, float* %tmp23567, i64 1
+ %tmp23569 = getelementptr inbounds float, float* %tmp23568, i64 1
+ %tmp23570 = getelementptr inbounds float, float* %tmp23569, i64 1
+ %tmp23571 = getelementptr inbounds float, float* %tmp23570, i64 1
+ %tmp23572 = getelementptr inbounds float, float* %tmp23571, i64 1
+ %tmp23573 = getelementptr inbounds float, float* %tmp23572, i64 1
+ %tmp23574 = getelementptr inbounds float, float* %tmp23573, i64 1
+ %tmp23575 = getelementptr inbounds float, float* %tmp23574, i64 1
+ %tmp23576 = getelementptr inbounds float, float* %tmp23575, i64 1
+ %tmp23577 = getelementptr inbounds float, float* %tmp23576, i64 1
+ %tmp23578 = getelementptr inbounds float, float* %tmp23577, i64 1
+ %tmp23579 = getelementptr inbounds float, float* %tmp23578, i64 1
+ %tmp23580 = getelementptr inbounds float, float* %tmp23579, i64 1
+ %tmp23581 = getelementptr inbounds float, float* %tmp23580, i64 1
+ %tmp23582 = getelementptr inbounds float, float* %tmp23581, i64 1
+ %tmp23583 = getelementptr inbounds float, float* %tmp23582, i64 1
+ %tmp23584 = getelementptr inbounds float, float* %tmp23583, i64 1
+ %tmp23585 = getelementptr inbounds float, float* %tmp23584, i64 1
+ %tmp23586 = getelementptr inbounds float, float* %tmp23585, i64 1
+ %tmp23587 = getelementptr inbounds float, float* %tmp23586, i64 1
+ %tmp23588 = getelementptr inbounds float, float* %tmp23587, i64 1
+ %tmp23589 = getelementptr inbounds float, float* %tmp23588, i64 1
+ %tmp23590 = getelementptr inbounds float, float* %tmp23589, i64 1
+ %tmp23591 = getelementptr inbounds float, float* %tmp23590, i64 1
+ %tmp23592 = getelementptr inbounds float, float* %tmp23591, i64 1
+ %tmp23593 = getelementptr inbounds float, float* %tmp23592, i64 1
+ %tmp23594 = getelementptr inbounds float, float* %tmp23593, i64 1
+ %tmp23595 = getelementptr inbounds float, float* %tmp23594, i64 1
+ %tmp23596 = getelementptr inbounds float, float* %tmp23595, i64 1
+ %tmp23597 = getelementptr inbounds float, float* %tmp23596, i64 1
+ %tmp23598 = getelementptr inbounds float, float* %tmp23597, i64 1
+ %tmp23599 = getelementptr inbounds float, float* %tmp23598, i64 1
+ %tmp23600 = getelementptr inbounds float, float* %tmp23599, i64 1
+ %tmp23601 = getelementptr inbounds float, float* %tmp23600, i64 1
+ %tmp23602 = getelementptr inbounds float, float* %tmp23601, i64 1
+ %tmp23603 = getelementptr inbounds float, float* %tmp23602, i64 1
+ %tmp23604 = getelementptr inbounds float, float* %tmp23603, i64 1
+ %tmp23605 = getelementptr inbounds float, float* %tmp23604, i64 1
+ %tmp23606 = getelementptr inbounds float, float* %tmp23605, i64 1
+ %tmp23607 = getelementptr inbounds float, float* %tmp23606, i64 1
+ %tmp23608 = getelementptr inbounds float, float* %tmp23607, i64 1
+ %tmp23609 = getelementptr inbounds float, float* %tmp23608, i64 1
+ %tmp23610 = getelementptr inbounds float, float* %tmp23609, i64 1
+ %tmp23611 = getelementptr inbounds float, float* %tmp23610, i64 1
+ %tmp23612 = getelementptr inbounds float, float* %tmp23611, i64 1
+ %tmp23613 = getelementptr inbounds float, float* %tmp23612, i64 1
+ %tmp23614 = getelementptr inbounds float, float* %tmp23613, i64 1
+ %tmp23615 = getelementptr inbounds float, float* %tmp23614, i64 1
+ %tmp23616 = getelementptr inbounds float, float* %tmp23615, i64 1
+ %tmp23617 = getelementptr inbounds float, float* %tmp23616, i64 1
+ %tmp23618 = getelementptr inbounds float, float* %tmp23617, i64 1
+ %tmp23619 = getelementptr inbounds float, float* %tmp23618, i64 1
+ %tmp23620 = getelementptr inbounds float, float* %tmp23619, i64 1
+ %tmp23621 = getelementptr inbounds float, float* %tmp23620, i64 1
+ %tmp23622 = getelementptr inbounds float, float* %tmp23621, i64 1
+ %tmp23623 = getelementptr inbounds float, float* %tmp23622, i64 1
+ %tmp23624 = getelementptr inbounds float, float* %tmp23623, i64 1
+ %tmp23625 = getelementptr inbounds float, float* %tmp23624, i64 1
+ %tmp23626 = getelementptr inbounds float, float* %tmp23625, i64 1
+ %tmp23627 = getelementptr inbounds float, float* %tmp23626, i64 1
+ %tmp23628 = getelementptr inbounds float, float* %tmp23627, i64 1
+ %tmp23629 = getelementptr inbounds float, float* %tmp23628, i64 1
+ %tmp23630 = getelementptr inbounds float, float* %tmp23629, i64 1
+ %tmp23631 = getelementptr inbounds float, float* %tmp23630, i64 1
+ %tmp23632 = getelementptr inbounds float, float* %tmp23631, i64 1
+ %tmp23633 = getelementptr inbounds float, float* %tmp23632, i64 1
+ %tmp23634 = getelementptr inbounds float, float* %tmp23633, i64 1
+ %tmp23635 = getelementptr inbounds float, float* %tmp23634, i64 1
+ %tmp23636 = getelementptr inbounds float, float* %tmp23635, i64 1
+ %tmp23637 = getelementptr inbounds float, float* %tmp23636, i64 1
+ %tmp23638 = getelementptr inbounds float, float* %tmp23637, i64 1
+ %tmp23639 = getelementptr inbounds float, float* %tmp23638, i64 1
+ %tmp23640 = getelementptr inbounds float, float* %tmp23639, i64 1
+ %tmp23641 = getelementptr inbounds float, float* %tmp23640, i64 1
+ %tmp23642 = getelementptr inbounds float, float* %tmp23641, i64 1
+ %tmp23643 = getelementptr inbounds float, float* %tmp23642, i64 1
+ %tmp23644 = getelementptr inbounds float, float* %tmp23643, i64 1
+ %tmp23645 = getelementptr inbounds float, float* %tmp23644, i64 1
+ %tmp23646 = getelementptr inbounds float, float* %tmp23645, i64 1
+ %tmp23647 = getelementptr inbounds float, float* %tmp23646, i64 1
+ %tmp23648 = getelementptr inbounds float, float* %tmp23647, i64 1
+ %tmp23649 = getelementptr inbounds float, float* %tmp23648, i64 1
+ %tmp23650 = getelementptr inbounds float, float* %tmp23649, i64 1
+ %tmp23651 = getelementptr inbounds float, float* %tmp23650, i64 1
+ %tmp23652 = getelementptr inbounds float, float* %tmp23651, i64 1
+ %tmp23653 = getelementptr inbounds float, float* %tmp23652, i64 1
+ %tmp23654 = getelementptr inbounds float, float* %tmp23653, i64 1
+ %tmp23655 = getelementptr inbounds float, float* %tmp23654, i64 1
+ %tmp23656 = getelementptr inbounds float, float* %tmp23655, i64 1
+ %tmp23657 = getelementptr inbounds float, float* %tmp23656, i64 1
+ %tmp23658 = getelementptr inbounds float, float* %tmp23657, i64 1
+ %tmp23659 = getelementptr inbounds float, float* %tmp23658, i64 1
+ %tmp23660 = getelementptr inbounds float, float* %tmp23659, i64 1
+ %tmp23661 = getelementptr inbounds float, float* %tmp23660, i64 1
+ %tmp23662 = getelementptr inbounds float, float* %tmp23661, i64 1
+ %tmp23663 = getelementptr inbounds float, float* %tmp23662, i64 1
+ %tmp23664 = getelementptr inbounds float, float* %tmp23663, i64 1
+ %tmp23665 = getelementptr inbounds float, float* %tmp23664, i64 1
+ %tmp23666 = getelementptr inbounds float, float* %tmp23665, i64 1
+ %tmp23667 = getelementptr inbounds float, float* %tmp23666, i64 1
+ %tmp23668 = getelementptr inbounds float, float* %tmp23667, i64 1
+ %tmp23669 = getelementptr inbounds float, float* %tmp23668, i64 1
+ %tmp23670 = getelementptr inbounds float, float* %tmp23669, i64 1
+ %tmp23671 = getelementptr inbounds float, float* %tmp23670, i64 1
+ %tmp23672 = getelementptr inbounds float, float* %tmp23671, i64 1
+ %tmp23673 = getelementptr inbounds float, float* %tmp23672, i64 1
+ %tmp23674 = getelementptr inbounds float, float* %tmp23673, i64 1
+ %tmp23675 = getelementptr inbounds float, float* %tmp23674, i64 1
+ %tmp23676 = getelementptr inbounds float, float* %tmp23675, i64 1
+ %tmp23677 = getelementptr inbounds float, float* %tmp23676, i64 1
+ %tmp23678 = getelementptr inbounds float, float* %tmp23677, i64 1
+ %tmp23679 = getelementptr inbounds float, float* %tmp23678, i64 1
+ %tmp23680 = getelementptr inbounds float, float* %tmp23679, i64 1
+ %tmp23681 = getelementptr inbounds float, float* %tmp23680, i64 1
+ %tmp23682 = getelementptr inbounds float, float* %tmp23681, i64 1
+ %tmp23683 = getelementptr inbounds float, float* %tmp23682, i64 1
+ %tmp23684 = getelementptr inbounds float, float* %tmp23683, i64 1
+ %tmp23685 = getelementptr inbounds float, float* %tmp23684, i64 1
+ %tmp23686 = getelementptr inbounds float, float* %tmp23685, i64 1
+ %tmp23687 = getelementptr inbounds float, float* %tmp23686, i64 1
+ %tmp23688 = getelementptr inbounds float, float* %tmp23687, i64 1
+ %tmp23689 = getelementptr inbounds float, float* %tmp23688, i64 1
+ %tmp23690 = getelementptr inbounds float, float* %tmp23689, i64 1
+ %tmp23691 = getelementptr inbounds float, float* %tmp23690, i64 1
+ %tmp23692 = getelementptr inbounds float, float* %tmp23691, i64 1
+ %tmp23693 = getelementptr inbounds float, float* %tmp23692, i64 1
+ %tmp23694 = getelementptr inbounds float, float* %tmp23693, i64 1
+ %tmp23695 = getelementptr inbounds float, float* %tmp23694, i64 1
+ %tmp23696 = getelementptr inbounds float, float* %tmp23695, i64 1
+ %tmp23697 = getelementptr inbounds float, float* %tmp23696, i64 1
+ %tmp23698 = getelementptr inbounds float, float* %tmp23697, i64 1
+ %tmp23699 = getelementptr inbounds float, float* %tmp23698, i64 1
+ %tmp23700 = getelementptr inbounds float, float* %tmp23699, i64 1
+ %tmp23701 = getelementptr inbounds float, float* %tmp23700, i64 1
+ %tmp23702 = getelementptr inbounds float, float* %tmp23701, i64 1
+ %tmp23703 = getelementptr inbounds float, float* %tmp23702, i64 1
+ %tmp23704 = getelementptr inbounds float, float* %tmp23703, i64 1
+ %tmp23705 = getelementptr inbounds float, float* %tmp23704, i64 1
+ %tmp23706 = getelementptr inbounds float, float* %tmp23705, i64 1
+ %tmp23707 = getelementptr inbounds float, float* %tmp23706, i64 1
+ %tmp23708 = getelementptr inbounds float, float* %tmp23707, i64 1
+ %tmp23709 = getelementptr inbounds float, float* %tmp23708, i64 1
+ %tmp23710 = getelementptr inbounds float, float* %tmp23709, i64 1
+ %tmp23711 = getelementptr inbounds float, float* %tmp23710, i64 1
+ %tmp23712 = getelementptr inbounds float, float* %tmp23711, i64 1
+ %tmp23713 = getelementptr inbounds float, float* %tmp23712, i64 1
+ %tmp23714 = getelementptr inbounds float, float* %tmp23713, i64 1
+ %tmp23715 = getelementptr inbounds float, float* %tmp23714, i64 1
+ %tmp23716 = getelementptr inbounds float, float* %tmp23715, i64 1
+ %tmp23717 = getelementptr inbounds float, float* %tmp23716, i64 1
+ %tmp23718 = getelementptr inbounds float, float* %tmp23717, i64 1
+ %tmp23719 = getelementptr inbounds float, float* %tmp23718, i64 1
+ %tmp23720 = getelementptr inbounds float, float* %tmp23719, i64 1
+ %tmp23721 = getelementptr inbounds float, float* %tmp23720, i64 1
+ %tmp23722 = getelementptr inbounds float, float* %tmp23721, i64 1
+ %tmp23723 = getelementptr inbounds float, float* %tmp23722, i64 1
+ %tmp23724 = getelementptr inbounds float, float* %tmp23723, i64 1
+ %tmp23725 = getelementptr inbounds float, float* %tmp23724, i64 1
+ %tmp23726 = getelementptr inbounds float, float* %tmp23725, i64 1
+ %tmp23727 = getelementptr inbounds float, float* %tmp23726, i64 1
+ %tmp23728 = getelementptr inbounds float, float* %tmp23727, i64 1
+ %tmp23729 = getelementptr inbounds float, float* %tmp23728, i64 1
+ %tmp23730 = getelementptr inbounds float, float* %tmp23729, i64 1
+ %tmp23731 = getelementptr inbounds float, float* %tmp23730, i64 1
+ %tmp23732 = getelementptr inbounds float, float* %tmp23731, i64 1
+ %tmp23733 = getelementptr inbounds float, float* %tmp23732, i64 1
+ %tmp23734 = getelementptr inbounds float, float* %tmp23733, i64 1
+ %tmp23735 = getelementptr inbounds float, float* %tmp23734, i64 1
+ %tmp23736 = getelementptr inbounds float, float* %tmp23735, i64 1
+ %tmp23737 = getelementptr inbounds float, float* %tmp23736, i64 1
+ %tmp23738 = getelementptr inbounds float, float* %tmp23737, i64 1
+ %tmp23739 = getelementptr inbounds float, float* %tmp23738, i64 1
+ %tmp23740 = getelementptr inbounds float, float* %tmp23739, i64 1
+ %tmp23741 = getelementptr inbounds float, float* %tmp23740, i64 1
+ %tmp23742 = getelementptr inbounds float, float* %tmp23741, i64 1
+ %tmp23743 = getelementptr inbounds float, float* %tmp23742, i64 1
+ %tmp23744 = getelementptr inbounds float, float* %tmp23743, i64 1
+ %tmp23745 = getelementptr inbounds float, float* %tmp23744, i64 1
+ %tmp23746 = getelementptr inbounds float, float* %tmp23745, i64 1
+ %tmp23747 = getelementptr inbounds float, float* %tmp23746, i64 1
+ %tmp23748 = getelementptr inbounds float, float* %tmp23747, i64 1
+ %tmp23749 = getelementptr inbounds float, float* %tmp23748, i64 1
+ %tmp23750 = getelementptr inbounds float, float* %tmp23749, i64 1
+ %tmp23751 = getelementptr inbounds float, float* %tmp23750, i64 1
+ %tmp23752 = getelementptr inbounds float, float* %tmp23751, i64 1
+ %tmp23753 = getelementptr inbounds float, float* %tmp23752, i64 1
+ %tmp23754 = getelementptr inbounds float, float* %tmp23753, i64 1
+ %tmp23755 = getelementptr inbounds float, float* %tmp23754, i64 1
+ %tmp23756 = getelementptr inbounds float, float* %tmp23755, i64 1
+ %tmp23757 = getelementptr inbounds float, float* %tmp23756, i64 1
+ %tmp23758 = getelementptr inbounds float, float* %tmp23757, i64 1
+ %tmp23759 = getelementptr inbounds float, float* %tmp23758, i64 1
+ %tmp23760 = getelementptr inbounds float, float* %tmp23759, i64 1
+ %tmp23761 = getelementptr inbounds float, float* %tmp23760, i64 1
+ %tmp23762 = getelementptr inbounds float, float* %tmp23761, i64 1
+ %tmp23763 = getelementptr inbounds float, float* %tmp23762, i64 1
+ %tmp23764 = getelementptr inbounds float, float* %tmp23763, i64 1
+ %tmp23765 = getelementptr inbounds float, float* %tmp23764, i64 1
+ %tmp23766 = getelementptr inbounds float, float* %tmp23765, i64 1
+ %tmp23767 = getelementptr inbounds float, float* %tmp23766, i64 1
+ %tmp23768 = getelementptr inbounds float, float* %tmp23767, i64 1
+ %tmp23769 = getelementptr inbounds float, float* %tmp23768, i64 1
+ %tmp23770 = getelementptr inbounds float, float* %tmp23769, i64 1
+ %tmp23771 = getelementptr inbounds float, float* %tmp23770, i64 1
+ %tmp23772 = getelementptr inbounds float, float* %tmp23771, i64 1
+ %tmp23773 = getelementptr inbounds float, float* %tmp23772, i64 1
+ %tmp23774 = getelementptr inbounds float, float* %tmp23773, i64 1
+ %tmp23775 = getelementptr inbounds float, float* %tmp23774, i64 1
+ %tmp23776 = getelementptr inbounds float, float* %tmp23775, i64 1
+ %tmp23777 = getelementptr inbounds float, float* %tmp23776, i64 1
+ %tmp23778 = getelementptr inbounds float, float* %tmp23777, i64 1
+ %tmp23779 = getelementptr inbounds float, float* %tmp23778, i64 1
+ %tmp23780 = getelementptr inbounds float, float* %tmp23779, i64 1
+ %tmp23781 = getelementptr inbounds float, float* %tmp23780, i64 1
+ %tmp23782 = getelementptr inbounds float, float* %tmp23781, i64 1
+ %tmp23783 = getelementptr inbounds float, float* %tmp23782, i64 1
+ %tmp23784 = getelementptr inbounds float, float* %tmp23783, i64 1
+ %tmp23785 = getelementptr inbounds float, float* %tmp23784, i64 1
+ %tmp23786 = getelementptr inbounds float, float* %tmp23785, i64 1
+ %tmp23787 = getelementptr inbounds float, float* %tmp23786, i64 1
+ %tmp23788 = getelementptr inbounds float, float* %tmp23787, i64 1
+ %tmp23789 = getelementptr inbounds float, float* %tmp23788, i64 1
+ %tmp23790 = getelementptr inbounds float, float* %tmp23789, i64 1
+ %tmp23791 = getelementptr inbounds float, float* %tmp23790, i64 1
+ %tmp23792 = getelementptr inbounds float, float* %tmp23791, i64 1
+ %tmp23793 = getelementptr inbounds float, float* %tmp23792, i64 1
+ %tmp23794 = getelementptr inbounds float, float* %tmp23793, i64 1
+ %tmp23795 = getelementptr inbounds float, float* %tmp23794, i64 1
+ %tmp23796 = getelementptr inbounds float, float* %tmp23795, i64 1
+ %tmp23797 = getelementptr inbounds float, float* %tmp23796, i64 1
+ %tmp23798 = getelementptr inbounds float, float* %tmp23797, i64 1
+ %tmp23799 = getelementptr inbounds float, float* %tmp23798, i64 1
+ %tmp23800 = getelementptr inbounds float, float* %tmp23799, i64 1
+ %tmp23801 = getelementptr inbounds float, float* %tmp23800, i64 1
+ %tmp23802 = getelementptr inbounds float, float* %tmp23801, i64 1
+ %tmp23803 = getelementptr inbounds float, float* %tmp23802, i64 1
+ %tmp23804 = getelementptr inbounds float, float* %tmp23803, i64 1
+ %tmp23805 = getelementptr inbounds float, float* %tmp23804, i64 1
+ %tmp23806 = getelementptr inbounds float, float* %tmp23805, i64 1
+ %tmp23807 = getelementptr inbounds float, float* %tmp23806, i64 1
+ %tmp23808 = getelementptr inbounds float, float* %tmp23807, i64 1
+ %tmp23809 = getelementptr inbounds float, float* %tmp23808, i64 1
+ %tmp23810 = getelementptr inbounds float, float* %tmp23809, i64 1
+ %tmp23811 = getelementptr inbounds float, float* %tmp23810, i64 1
+ %tmp23812 = getelementptr inbounds float, float* %tmp23811, i64 1
+ %tmp23813 = getelementptr inbounds float, float* %tmp23812, i64 1
+ %tmp23814 = getelementptr inbounds float, float* %tmp23813, i64 1
+ %tmp23815 = getelementptr inbounds float, float* %tmp23814, i64 1
+ %tmp23816 = getelementptr inbounds float, float* %tmp23815, i64 1
+ %tmp23817 = getelementptr inbounds float, float* %tmp23816, i64 1
+ %tmp23818 = getelementptr inbounds float, float* %tmp23817, i64 1
+ %tmp23819 = getelementptr inbounds float, float* %tmp23818, i64 1
+ %tmp23820 = getelementptr inbounds float, float* %tmp23819, i64 1
+ %tmp23821 = getelementptr inbounds float, float* %tmp23820, i64 1
+ %tmp23822 = getelementptr inbounds float, float* %tmp23821, i64 1
+ %tmp23823 = getelementptr inbounds float, float* %tmp23822, i64 1
+ %tmp23824 = getelementptr inbounds float, float* %tmp23823, i64 1
+ %tmp23825 = getelementptr inbounds float, float* %tmp23824, i64 1
+ %tmp23826 = getelementptr inbounds float, float* %tmp23825, i64 1
+ %tmp23827 = getelementptr inbounds float, float* %tmp23826, i64 1
+ %tmp23828 = getelementptr inbounds float, float* %tmp23827, i64 1
+ %tmp23829 = getelementptr inbounds float, float* %tmp23828, i64 1
+ %tmp23830 = getelementptr inbounds float, float* %tmp23829, i64 1
+ %tmp23831 = getelementptr inbounds float, float* %tmp23830, i64 1
+ %tmp23832 = getelementptr inbounds float, float* %tmp23831, i64 1
+ %tmp23833 = getelementptr inbounds float, float* %tmp23832, i64 1
+ %tmp23834 = getelementptr inbounds float, float* %tmp23833, i64 1
+ %tmp23835 = getelementptr inbounds float, float* %tmp23834, i64 1
+ %tmp23836 = getelementptr inbounds float, float* %tmp23835, i64 1
+ %tmp23837 = getelementptr inbounds float, float* %tmp23836, i64 1
+ %tmp23838 = getelementptr inbounds float, float* %tmp23837, i64 1
+ %tmp23839 = getelementptr inbounds float, float* %tmp23838, i64 1
+ %tmp23840 = getelementptr inbounds float, float* %tmp23839, i64 1
+ %tmp23841 = getelementptr inbounds float, float* %tmp23840, i64 1
+ %tmp23842 = getelementptr inbounds float, float* %tmp23841, i64 1
+ %tmp23843 = getelementptr inbounds float, float* %tmp23842, i64 1
+ %tmp23844 = getelementptr inbounds float, float* %tmp23843, i64 1
+ %tmp23845 = getelementptr inbounds float, float* %tmp23844, i64 1
+ %tmp23846 = getelementptr inbounds float, float* %tmp23845, i64 1
+ %tmp23847 = getelementptr inbounds float, float* %tmp23846, i64 1
+ %tmp23848 = getelementptr inbounds float, float* %tmp23847, i64 1
+ %tmp23849 = getelementptr inbounds float, float* %tmp23848, i64 1
+ %tmp23850 = getelementptr inbounds float, float* %tmp23849, i64 1
+ %tmp23851 = getelementptr inbounds float, float* %tmp23850, i64 1
+ %tmp23852 = getelementptr inbounds float, float* %tmp23851, i64 1
+ %tmp23853 = getelementptr inbounds float, float* %tmp23852, i64 1
+ %tmp23854 = getelementptr inbounds float, float* %tmp23853, i64 1
+ %tmp23855 = getelementptr inbounds float, float* %tmp23854, i64 1
+ %tmp23856 = getelementptr inbounds float, float* %tmp23855, i64 1
+ %tmp23857 = getelementptr inbounds float, float* %tmp23856, i64 1
+ %tmp23858 = getelementptr inbounds float, float* %tmp23857, i64 1
+ %tmp23859 = getelementptr inbounds float, float* %tmp23858, i64 1
+ %tmp23860 = getelementptr inbounds float, float* %tmp23859, i64 1
+ %tmp23861 = getelementptr inbounds float, float* %tmp23860, i64 1
+ %tmp23862 = getelementptr inbounds float, float* %tmp23861, i64 1
+ %tmp23863 = getelementptr inbounds float, float* %tmp23862, i64 1
+ %tmp23864 = getelementptr inbounds float, float* %tmp23863, i64 1
+ %tmp23865 = getelementptr inbounds float, float* %tmp23864, i64 1
+ %tmp23866 = getelementptr inbounds float, float* %tmp23865, i64 1
+ %tmp23867 = getelementptr inbounds float, float* %tmp23866, i64 1
+ %tmp23868 = getelementptr inbounds float, float* %tmp23867, i64 1
+ %tmp23869 = getelementptr inbounds float, float* %tmp23868, i64 1
+ %tmp23870 = getelementptr inbounds float, float* %tmp23869, i64 1
+ %tmp23871 = getelementptr inbounds float, float* %tmp23870, i64 1
+ %tmp23872 = getelementptr inbounds float, float* %tmp23871, i64 1
+ %tmp23873 = getelementptr inbounds float, float* %tmp23872, i64 1
+ %tmp23874 = getelementptr inbounds float, float* %tmp23873, i64 1
+ %tmp23875 = getelementptr inbounds float, float* %tmp23874, i64 1
+ %tmp23876 = getelementptr inbounds float, float* %tmp23875, i64 1
+ %tmp23877 = getelementptr inbounds float, float* %tmp23876, i64 1
+ %tmp23878 = getelementptr inbounds float, float* %tmp23877, i64 1
+ %tmp23879 = getelementptr inbounds float, float* %tmp23878, i64 1
+ %tmp23880 = getelementptr inbounds float, float* %tmp23879, i64 1
+ %tmp23881 = getelementptr inbounds float, float* %tmp23880, i64 1
+ %tmp23882 = getelementptr inbounds float, float* %tmp23881, i64 1
+ %tmp23883 = getelementptr inbounds float, float* %tmp23882, i64 1
+ %tmp23884 = getelementptr inbounds float, float* %tmp23883, i64 1
+ %tmp23885 = getelementptr inbounds float, float* %tmp23884, i64 1
+ %tmp23886 = getelementptr inbounds float, float* %tmp23885, i64 1
+ %tmp23887 = getelementptr inbounds float, float* %tmp23886, i64 1
+ %tmp23888 = getelementptr inbounds float, float* %tmp23887, i64 1
+ %tmp23889 = getelementptr inbounds float, float* %tmp23888, i64 1
+ %tmp23890 = getelementptr inbounds float, float* %tmp23889, i64 1
+ %tmp23891 = getelementptr inbounds float, float* %tmp23890, i64 1
+ %tmp23892 = getelementptr inbounds float, float* %tmp23891, i64 1
+ %tmp23893 = getelementptr inbounds float, float* %tmp23892, i64 1
+ %tmp23894 = getelementptr inbounds float, float* %tmp23893, i64 1
+ %tmp23895 = getelementptr inbounds float, float* %tmp23894, i64 1
+ %tmp23896 = getelementptr inbounds float, float* %tmp23895, i64 1
+ %tmp23897 = getelementptr inbounds float, float* %tmp23896, i64 1
+ %tmp23898 = getelementptr inbounds float, float* %tmp23897, i64 1
+ %tmp23899 = getelementptr inbounds float, float* %tmp23898, i64 1
+ %tmp23900 = getelementptr inbounds float, float* %tmp23899, i64 1
+ %tmp23901 = getelementptr inbounds float, float* %tmp23900, i64 1
+ %tmp23902 = getelementptr inbounds float, float* %tmp23901, i64 1
+ %tmp23903 = getelementptr inbounds float, float* %tmp23902, i64 1
+ %tmp23904 = getelementptr inbounds float, float* %tmp23903, i64 1
+ %tmp23905 = getelementptr inbounds float, float* %tmp23904, i64 1
+ %tmp23906 = getelementptr inbounds float, float* %tmp23905, i64 1
+ %tmp23907 = getelementptr inbounds float, float* %tmp23906, i64 1
+ %tmp23908 = getelementptr inbounds float, float* %tmp23907, i64 1
+ %tmp23909 = getelementptr inbounds float, float* %tmp23908, i64 1
+ %tmp23910 = getelementptr inbounds float, float* %tmp23909, i64 1
+ %tmp23911 = getelementptr inbounds float, float* %tmp23910, i64 1
+ %tmp23912 = getelementptr inbounds float, float* %tmp23911, i64 1
+ %tmp23913 = getelementptr inbounds float, float* %tmp23912, i64 1
+ %tmp23914 = getelementptr inbounds float, float* %tmp23913, i64 1
+ %tmp23915 = getelementptr inbounds float, float* %tmp23914, i64 1
+ %tmp23916 = getelementptr inbounds float, float* %tmp23915, i64 1
+ %tmp23917 = getelementptr inbounds float, float* %tmp23916, i64 1
+ %tmp23918 = getelementptr inbounds float, float* %tmp23917, i64 1
+ %tmp23919 = getelementptr inbounds float, float* %tmp23918, i64 1
+ %tmp23920 = getelementptr inbounds float, float* %tmp23919, i64 1
+ %tmp23921 = getelementptr inbounds float, float* %tmp23920, i64 1
+ %tmp23922 = getelementptr inbounds float, float* %tmp23921, i64 1
+ %tmp23923 = getelementptr inbounds float, float* %tmp23922, i64 1
+ %tmp23924 = getelementptr inbounds float, float* %tmp23923, i64 1
+ %tmp23925 = getelementptr inbounds float, float* %tmp23924, i64 1
+ %tmp23926 = getelementptr inbounds float, float* %tmp23925, i64 1
+ %tmp23927 = getelementptr inbounds float, float* %tmp23926, i64 1
+ %tmp23928 = getelementptr inbounds float, float* %tmp23927, i64 1
+ %tmp23929 = getelementptr inbounds float, float* %tmp23928, i64 1
+ %tmp23930 = getelementptr inbounds float, float* %tmp23929, i64 1
+ %tmp23931 = getelementptr inbounds float, float* %tmp23930, i64 1
+ %tmp23932 = getelementptr inbounds float, float* %tmp23931, i64 1
+ %tmp23933 = getelementptr inbounds float, float* %tmp23932, i64 1
+ %tmp23934 = getelementptr inbounds float, float* %tmp23933, i64 1
+ %tmp23935 = getelementptr inbounds float, float* %tmp23934, i64 1
+ %tmp23936 = getelementptr inbounds float, float* %tmp23935, i64 1
+ %tmp23937 = getelementptr inbounds float, float* %tmp23936, i64 1
+ %tmp23938 = getelementptr inbounds float, float* %tmp23937, i64 1
+ %tmp23939 = getelementptr inbounds float, float* %tmp23938, i64 1
+ %tmp23940 = getelementptr inbounds float, float* %tmp23939, i64 1
+ %tmp23941 = getelementptr inbounds float, float* %tmp23940, i64 1
+ %tmp23942 = getelementptr inbounds float, float* %tmp23941, i64 1
+ %tmp23943 = getelementptr inbounds float, float* %tmp23942, i64 1
+ %tmp23944 = getelementptr inbounds float, float* %tmp23943, i64 1
+ %tmp23945 = getelementptr inbounds float, float* %tmp23944, i64 1
+ %tmp23946 = getelementptr inbounds float, float* %tmp23945, i64 1
+ %tmp23947 = getelementptr inbounds float, float* %tmp23946, i64 1
+ %tmp23948 = getelementptr inbounds float, float* %tmp23947, i64 1
+ %tmp23949 = getelementptr inbounds float, float* %tmp23948, i64 1
+ %tmp23950 = getelementptr inbounds float, float* %tmp23949, i64 1
+ %tmp23951 = getelementptr inbounds float, float* %tmp23950, i64 1
+ %tmp23952 = getelementptr inbounds float, float* %tmp23951, i64 1
+ %tmp23953 = getelementptr inbounds float, float* %tmp23952, i64 1
+ %tmp23954 = getelementptr inbounds float, float* %tmp23953, i64 1
+ %tmp23955 = getelementptr inbounds float, float* %tmp23954, i64 1
+ %tmp23956 = getelementptr inbounds float, float* %tmp23955, i64 1
+ %tmp23957 = getelementptr inbounds float, float* %tmp23956, i64 1
+ %tmp23958 = getelementptr inbounds float, float* %tmp23957, i64 1
+ %tmp23959 = getelementptr inbounds float, float* %tmp23958, i64 1
+ %tmp23960 = getelementptr inbounds float, float* %tmp23959, i64 1
+ %tmp23961 = getelementptr inbounds float, float* %tmp23960, i64 1
+ %tmp23962 = getelementptr inbounds float, float* %tmp23961, i64 1
+ %tmp23963 = getelementptr inbounds float, float* %tmp23962, i64 1
+ %tmp23964 = getelementptr inbounds float, float* %tmp23963, i64 1
+ %tmp23965 = getelementptr inbounds float, float* %tmp23964, i64 1
+ %tmp23966 = getelementptr inbounds float, float* %tmp23965, i64 1
+ %tmp23967 = getelementptr inbounds float, float* %tmp23966, i64 1
+ %tmp23968 = getelementptr inbounds float, float* %tmp23967, i64 1
+ %tmp23969 = getelementptr inbounds float, float* %tmp23968, i64 1
+ %tmp23970 = getelementptr inbounds float, float* %tmp23969, i64 1
+ %tmp23971 = getelementptr inbounds float, float* %tmp23970, i64 1
+ %tmp23972 = getelementptr inbounds float, float* %tmp23971, i64 1
+ %tmp23973 = getelementptr inbounds float, float* %tmp23972, i64 1
+ %tmp23974 = getelementptr inbounds float, float* %tmp23973, i64 1
+ %tmp23975 = getelementptr inbounds float, float* %tmp23974, i64 1
+ %tmp23976 = getelementptr inbounds float, float* %tmp23975, i64 1
+ %tmp23977 = getelementptr inbounds float, float* %tmp23976, i64 1
+ %tmp23978 = getelementptr inbounds float, float* %tmp23977, i64 1
+ %tmp23979 = getelementptr inbounds float, float* %tmp23978, i64 1
+ %tmp23980 = getelementptr inbounds float, float* %tmp23979, i64 1
+ %tmp23981 = getelementptr inbounds float, float* %tmp23980, i64 1
+ %tmp23982 = getelementptr inbounds float, float* %tmp23981, i64 1
+ %tmp23983 = getelementptr inbounds float, float* %tmp23982, i64 1
+ %tmp23984 = getelementptr inbounds float, float* %tmp23983, i64 1
+ %tmp23985 = getelementptr inbounds float, float* %tmp23984, i64 1
+ %tmp23986 = getelementptr inbounds float, float* %tmp23985, i64 1
+ %tmp23987 = getelementptr inbounds float, float* %tmp23986, i64 1
+ %tmp23988 = getelementptr inbounds float, float* %tmp23987, i64 1
+ %tmp23989 = getelementptr inbounds float, float* %tmp23988, i64 1
+ %tmp23990 = getelementptr inbounds float, float* %tmp23989, i64 1
+ %tmp23991 = getelementptr inbounds float, float* %tmp23990, i64 1
+ %tmp23992 = getelementptr inbounds float, float* %tmp23991, i64 1
+ %tmp23993 = getelementptr inbounds float, float* %tmp23992, i64 1
+ %tmp23994 = getelementptr inbounds float, float* %tmp23993, i64 1
+ %tmp23995 = getelementptr inbounds float, float* %tmp23994, i64 1
+ %tmp23996 = getelementptr inbounds float, float* %tmp23995, i64 1
+ %tmp23997 = getelementptr inbounds float, float* %tmp23996, i64 1
+ %tmp23998 = getelementptr inbounds float, float* %tmp23997, i64 1
+ %tmp23999 = getelementptr inbounds float, float* %tmp23998, i64 1
+ %tmp24000 = getelementptr inbounds float, float* %tmp23999, i64 1
+ %tmp24001 = getelementptr inbounds float, float* %tmp24000, i64 1
+ %tmp24002 = getelementptr inbounds float, float* %tmp24001, i64 1
+ %tmp24003 = getelementptr inbounds float, float* %tmp24002, i64 1
+ %tmp24004 = getelementptr inbounds float, float* %tmp24003, i64 1
+ %tmp24005 = getelementptr inbounds float, float* %tmp24004, i64 1
+ %tmp24006 = getelementptr inbounds float, float* %tmp24005, i64 1
+ %tmp24007 = getelementptr inbounds float, float* %tmp24006, i64 1
+ %tmp24008 = getelementptr inbounds float, float* %tmp24007, i64 1
+ %tmp24009 = getelementptr inbounds float, float* %tmp24008, i64 1
+ %tmp24010 = getelementptr inbounds float, float* %tmp24009, i64 1
+ %tmp24011 = getelementptr inbounds float, float* %tmp24010, i64 1
+ %tmp24012 = getelementptr inbounds float, float* %tmp24011, i64 1
+ %tmp24013 = getelementptr inbounds float, float* %tmp24012, i64 1
+ %tmp24014 = getelementptr inbounds float, float* %tmp24013, i64 1
+ %tmp24015 = getelementptr inbounds float, float* %tmp24014, i64 1
+ %tmp24016 = getelementptr inbounds float, float* %tmp24015, i64 1
+ %tmp24017 = getelementptr inbounds float, float* %tmp24016, i64 1
+ %tmp24018 = getelementptr inbounds float, float* %tmp24017, i64 1
+ %tmp24019 = getelementptr inbounds float, float* %tmp24018, i64 1
+ %tmp24020 = getelementptr inbounds float, float* %tmp24019, i64 1
+ %tmp24021 = getelementptr inbounds float, float* %tmp24020, i64 1
+ %tmp24022 = getelementptr inbounds float, float* %tmp24021, i64 1
+ %tmp24023 = getelementptr inbounds float, float* %tmp24022, i64 1
+ %tmp24024 = getelementptr inbounds float, float* %tmp24023, i64 1
+ %tmp24025 = getelementptr inbounds float, float* %tmp24024, i64 1
+ %tmp24026 = getelementptr inbounds float, float* %tmp24025, i64 1
+ %tmp24027 = getelementptr inbounds float, float* %tmp24026, i64 1
+ %tmp24028 = getelementptr inbounds float, float* %tmp24027, i64 1
+ %tmp24029 = getelementptr inbounds float, float* %tmp24028, i64 1
+ %tmp24030 = getelementptr inbounds float, float* %tmp24029, i64 1
+ %tmp24031 = getelementptr inbounds float, float* %tmp24030, i64 1
+ %tmp24032 = getelementptr inbounds float, float* %tmp24031, i64 1
+ %tmp24033 = getelementptr inbounds float, float* %tmp24032, i64 1
+ %tmp24034 = getelementptr inbounds float, float* %tmp24033, i64 1
+ %tmp24035 = getelementptr inbounds float, float* %tmp24034, i64 1
+ %tmp24036 = getelementptr inbounds float, float* %tmp24035, i64 1
+ %tmp24037 = getelementptr inbounds float, float* %tmp24036, i64 1
+ %tmp24038 = getelementptr inbounds float, float* %tmp24037, i64 1
+ %tmp24039 = getelementptr inbounds float, float* %tmp24038, i64 1
+ %tmp24040 = getelementptr inbounds float, float* %tmp24039, i64 1
+ %tmp24041 = getelementptr inbounds float, float* %tmp24040, i64 1
+ %tmp24042 = getelementptr inbounds float, float* %tmp24041, i64 1
+ %tmp24043 = getelementptr inbounds float, float* %tmp24042, i64 1
+ %tmp24044 = getelementptr inbounds float, float* %tmp24043, i64 1
+ %tmp24045 = getelementptr inbounds float, float* %tmp24044, i64 1
+ %tmp24046 = getelementptr inbounds float, float* %tmp24045, i64 1
+ %tmp24047 = getelementptr inbounds float, float* %tmp24046, i64 1
+ %tmp24048 = getelementptr inbounds float, float* %tmp24047, i64 1
+ %tmp24049 = getelementptr inbounds float, float* %tmp24048, i64 1
+ %tmp24050 = getelementptr inbounds float, float* %tmp24049, i64 1
+ %tmp24051 = getelementptr inbounds float, float* %tmp24050, i64 1
+ %tmp24052 = getelementptr inbounds float, float* %tmp24051, i64 1
+ %tmp24053 = getelementptr inbounds float, float* %tmp24052, i64 1
+ %tmp24054 = getelementptr inbounds float, float* %tmp24053, i64 1
+ %tmp24055 = getelementptr inbounds float, float* %tmp24054, i64 1
+ %tmp24056 = getelementptr inbounds float, float* %tmp24055, i64 1
+ %tmp24057 = getelementptr inbounds float, float* %tmp24056, i64 1
+ %tmp24058 = getelementptr inbounds float, float* %tmp24057, i64 1
+ %tmp24059 = getelementptr inbounds float, float* %tmp24058, i64 1
+ %tmp24060 = getelementptr inbounds float, float* %tmp24059, i64 1
+ %tmp24061 = getelementptr inbounds float, float* %tmp24060, i64 1
+ %tmp24062 = getelementptr inbounds float, float* %tmp24061, i64 1
+ %tmp24063 = getelementptr inbounds float, float* %tmp24062, i64 1
+ %tmp24064 = getelementptr inbounds float, float* %tmp24063, i64 1
+ %tmp24065 = getelementptr inbounds float, float* %tmp24064, i64 1
+ %tmp24066 = getelementptr inbounds float, float* %tmp24065, i64 1
+ %tmp24067 = getelementptr inbounds float, float* %tmp24066, i64 1
+ %tmp24068 = getelementptr inbounds float, float* %tmp24067, i64 1
+ %tmp24069 = getelementptr inbounds float, float* %tmp24068, i64 1
+ %tmp24070 = getelementptr inbounds float, float* %tmp24069, i64 1
+ %tmp24071 = getelementptr inbounds float, float* %tmp24070, i64 1
+ %tmp24072 = getelementptr inbounds float, float* %tmp24071, i64 1
+ %tmp24073 = getelementptr inbounds float, float* %tmp24072, i64 1
+ %tmp24074 = getelementptr inbounds float, float* %tmp24073, i64 1
+ %tmp24075 = getelementptr inbounds float, float* %tmp24074, i64 1
+ %tmp24076 = getelementptr inbounds float, float* %tmp24075, i64 1
+ %tmp24077 = getelementptr inbounds float, float* %tmp24076, i64 1
+ %tmp24078 = getelementptr inbounds float, float* %tmp24077, i64 1
+ %tmp24079 = getelementptr inbounds float, float* %tmp24078, i64 1
+ %tmp24080 = getelementptr inbounds float, float* %tmp24079, i64 1
+ %tmp24081 = getelementptr inbounds float, float* %tmp24080, i64 1
+ %tmp24082 = getelementptr inbounds float, float* %tmp24081, i64 1
+ %tmp24083 = getelementptr inbounds float, float* %tmp24082, i64 1
+ %tmp24084 = getelementptr inbounds float, float* %tmp24083, i64 1
+ %tmp24085 = getelementptr inbounds float, float* %tmp24084, i64 1
+ %tmp24086 = getelementptr inbounds float, float* %tmp24085, i64 1
+ %tmp24087 = getelementptr inbounds float, float* %tmp24086, i64 1
+ %tmp24088 = getelementptr inbounds float, float* %tmp24087, i64 1
+ %tmp24089 = getelementptr inbounds float, float* %tmp24088, i64 1
+ %tmp24090 = getelementptr inbounds float, float* %tmp24089, i64 1
+ %tmp24091 = getelementptr inbounds float, float* %tmp24090, i64 1
+ %tmp24092 = getelementptr inbounds float, float* %tmp24091, i64 1
+ %tmp24093 = getelementptr inbounds float, float* %tmp24092, i64 1
+ %tmp24094 = getelementptr inbounds float, float* %tmp24093, i64 1
+ %tmp24095 = getelementptr inbounds float, float* %tmp24094, i64 1
+ %tmp24096 = getelementptr inbounds float, float* %tmp24095, i64 1
+ %tmp24097 = getelementptr inbounds float, float* %tmp24096, i64 1
+ %tmp24098 = getelementptr inbounds float, float* %tmp24097, i64 1
+ %tmp24099 = getelementptr inbounds float, float* %tmp24098, i64 1
+ %tmp24100 = getelementptr inbounds float, float* %tmp24099, i64 1
+ %tmp24101 = getelementptr inbounds float, float* %tmp24100, i64 1
+ %tmp24102 = getelementptr inbounds float, float* %tmp24101, i64 1
+ %tmp24103 = getelementptr inbounds float, float* %tmp24102, i64 1
+ %tmp24104 = getelementptr inbounds float, float* %tmp24103, i64 1
+ %tmp24105 = getelementptr inbounds float, float* %tmp24104, i64 1
+ %tmp24106 = getelementptr inbounds float, float* %tmp24105, i64 1
+ %tmp24107 = getelementptr inbounds float, float* %tmp24106, i64 1
+ %tmp24108 = getelementptr inbounds float, float* %tmp24107, i64 1
+ %tmp24109 = getelementptr inbounds float, float* %tmp24108, i64 1
+ %tmp24110 = getelementptr inbounds float, float* %tmp24109, i64 1
+ %tmp24111 = getelementptr inbounds float, float* %tmp24110, i64 1
+ %tmp24112 = getelementptr inbounds float, float* %tmp24111, i64 1
+ %tmp24113 = getelementptr inbounds float, float* %tmp24112, i64 1
+ %tmp24114 = getelementptr inbounds float, float* %tmp24113, i64 1
+ %tmp24115 = getelementptr inbounds float, float* %tmp24114, i64 1
+ %tmp24116 = getelementptr inbounds float, float* %tmp24115, i64 1
+ %tmp24117 = getelementptr inbounds float, float* %tmp24116, i64 1
+ %tmp24118 = getelementptr inbounds float, float* %tmp24117, i64 1
+ %tmp24119 = getelementptr inbounds float, float* %tmp24118, i64 1
+ %tmp24120 = getelementptr inbounds float, float* %tmp24119, i64 1
+ %tmp24121 = getelementptr inbounds float, float* %tmp24120, i64 1
+ %tmp24122 = getelementptr inbounds float, float* %tmp24121, i64 1
+ %tmp24123 = getelementptr inbounds float, float* %tmp24122, i64 1
+ %tmp24124 = getelementptr inbounds float, float* %tmp24123, i64 1
+ %tmp24125 = getelementptr inbounds float, float* %tmp24124, i64 1
+ %tmp24126 = getelementptr inbounds float, float* %tmp24125, i64 1
+ %tmp24127 = getelementptr inbounds float, float* %tmp24126, i64 1
+ %tmp24128 = getelementptr inbounds float, float* %tmp24127, i64 1
+ %tmp24129 = getelementptr inbounds float, float* %tmp24128, i64 1
+ %tmp24130 = getelementptr inbounds float, float* %tmp24129, i64 1
+ %tmp24131 = getelementptr inbounds float, float* %tmp24130, i64 1
+ %tmp24132 = getelementptr inbounds float, float* %tmp24131, i64 1
+ %tmp24133 = getelementptr inbounds float, float* %tmp24132, i64 1
+ %tmp24134 = getelementptr inbounds float, float* %tmp24133, i64 1
+ %tmp24135 = getelementptr inbounds float, float* %tmp24134, i64 1
+ %tmp24136 = getelementptr inbounds float, float* %tmp24135, i64 1
+ %tmp24137 = getelementptr inbounds float, float* %tmp24136, i64 1
+ %tmp24138 = getelementptr inbounds float, float* %tmp24137, i64 1
+ %tmp24139 = getelementptr inbounds float, float* %tmp24138, i64 1
+ %tmp24140 = getelementptr inbounds float, float* %tmp24139, i64 1
+ %tmp24141 = getelementptr inbounds float, float* %tmp24140, i64 1
+ %tmp24142 = getelementptr inbounds float, float* %tmp24141, i64 1
+ %tmp24143 = getelementptr inbounds float, float* %tmp24142, i64 1
+ %tmp24144 = getelementptr inbounds float, float* %tmp24143, i64 1
+ %tmp24145 = getelementptr inbounds float, float* %tmp24144, i64 1
+ %tmp24146 = getelementptr inbounds float, float* %tmp24145, i64 1
+ %tmp24147 = getelementptr inbounds float, float* %tmp24146, i64 1
+ %tmp24148 = getelementptr inbounds float, float* %tmp24147, i64 1
+ %tmp24149 = getelementptr inbounds float, float* %tmp24148, i64 1
+ %tmp24150 = getelementptr inbounds float, float* %tmp24149, i64 1
+ %tmp24151 = getelementptr inbounds float, float* %tmp24150, i64 1
+ %tmp24152 = getelementptr inbounds float, float* %tmp24151, i64 1
+ %tmp24153 = getelementptr inbounds float, float* %tmp24152, i64 1
+ %tmp24154 = getelementptr inbounds float, float* %tmp24153, i64 1
+ %tmp24155 = getelementptr inbounds float, float* %tmp24154, i64 1
+ %tmp24156 = getelementptr inbounds float, float* %tmp24155, i64 1
+ %tmp24157 = getelementptr inbounds float, float* %tmp24156, i64 1
+ %tmp24158 = getelementptr inbounds float, float* %tmp24157, i64 1
+ %tmp24159 = getelementptr inbounds float, float* %tmp24158, i64 1
+ %tmp24160 = getelementptr inbounds float, float* %tmp24159, i64 1
+ %tmp24161 = getelementptr inbounds float, float* %tmp24160, i64 1
+ %tmp24162 = getelementptr inbounds float, float* %tmp24161, i64 1
+ %tmp24163 = getelementptr inbounds float, float* %tmp24162, i64 1
+ %tmp24164 = getelementptr inbounds float, float* %tmp24163, i64 1
+ %tmp24165 = getelementptr inbounds float, float* %tmp24164, i64 1
+ %tmp24166 = getelementptr inbounds float, float* %tmp24165, i64 1
+ %tmp24167 = getelementptr inbounds float, float* %tmp24166, i64 1
+ %tmp24168 = getelementptr inbounds float, float* %tmp24167, i64 1
+ %tmp24169 = getelementptr inbounds float, float* %tmp24168, i64 1
+ %tmp24170 = getelementptr inbounds float, float* %tmp24169, i64 1
+ %tmp24171 = getelementptr inbounds float, float* %tmp24170, i64 1
+ %tmp24172 = getelementptr inbounds float, float* %tmp24171, i64 1
+ %tmp24173 = getelementptr inbounds float, float* %tmp24172, i64 1
+ %tmp24174 = getelementptr inbounds float, float* %tmp24173, i64 1
+ %tmp24175 = getelementptr inbounds float, float* %tmp24174, i64 1
+ %tmp24176 = getelementptr inbounds float, float* %tmp24175, i64 1
+ %tmp24177 = getelementptr inbounds float, float* %tmp24176, i64 1
+ %tmp24178 = getelementptr inbounds float, float* %tmp24177, i64 1
+ %tmp24179 = getelementptr inbounds float, float* %tmp24178, i64 1
+ %tmp24180 = getelementptr inbounds float, float* %tmp24179, i64 1
+ %tmp24181 = getelementptr inbounds float, float* %tmp24180, i64 1
+ %tmp24182 = getelementptr inbounds float, float* %tmp24181, i64 1
+ %tmp24183 = getelementptr inbounds float, float* %tmp24182, i64 1
+ %tmp24184 = getelementptr inbounds float, float* %tmp24183, i64 1
+ %tmp24185 = getelementptr inbounds float, float* %tmp24184, i64 1
+ %tmp24186 = getelementptr inbounds float, float* %tmp24185, i64 1
+ %tmp24187 = getelementptr inbounds float, float* %tmp24186, i64 1
+ %tmp24188 = getelementptr inbounds float, float* %tmp24187, i64 1
+ %tmp24189 = getelementptr inbounds float, float* %tmp24188, i64 1
+ %tmp24190 = getelementptr inbounds float, float* %tmp24189, i64 1
+ %tmp24191 = getelementptr inbounds float, float* %tmp24190, i64 1
+ %tmp24192 = getelementptr inbounds float, float* %tmp24191, i64 1
+ %tmp24193 = getelementptr inbounds float, float* %tmp24192, i64 1
+ %tmp24194 = getelementptr inbounds float, float* %tmp24193, i64 1
+ %tmp24195 = getelementptr inbounds float, float* %tmp24194, i64 1
+ %tmp24196 = getelementptr inbounds float, float* %tmp24195, i64 1
+ %tmp24197 = getelementptr inbounds float, float* %tmp24196, i64 1
+ %tmp24198 = getelementptr inbounds float, float* %tmp24197, i64 1
+ %tmp24199 = getelementptr inbounds float, float* %tmp24198, i64 1
+ %tmp24200 = getelementptr inbounds float, float* %tmp24199, i64 1
+ %tmp24201 = getelementptr inbounds float, float* %tmp24200, i64 1
+ %tmp24202 = getelementptr inbounds float, float* %tmp24201, i64 1
+ %tmp24203 = getelementptr inbounds float, float* %tmp24202, i64 1
+ %tmp24204 = getelementptr inbounds float, float* %tmp24203, i64 1
+ %tmp24205 = getelementptr inbounds float, float* %tmp24204, i64 1
+ %tmp24206 = getelementptr inbounds float, float* %tmp24205, i64 1
+ %tmp24207 = getelementptr inbounds float, float* %tmp24206, i64 1
+ %tmp24208 = getelementptr inbounds float, float* %tmp24207, i64 1
+ %tmp24209 = getelementptr inbounds float, float* %tmp24208, i64 1
+ %tmp24210 = getelementptr inbounds float, float* %tmp24209, i64 1
+ %tmp24211 = getelementptr inbounds float, float* %tmp24210, i64 1
+ %tmp24212 = getelementptr inbounds float, float* %tmp24211, i64 1
+ %tmp24213 = getelementptr inbounds float, float* %tmp24212, i64 1
+ %tmp24214 = getelementptr inbounds float, float* %tmp24213, i64 1
+ %tmp24215 = getelementptr inbounds float, float* %tmp24214, i64 1
+ %tmp24216 = getelementptr inbounds float, float* %tmp24215, i64 1
+ %tmp24217 = getelementptr inbounds float, float* %tmp24216, i64 1
+ %tmp24218 = getelementptr inbounds float, float* %tmp24217, i64 1
+ %tmp24219 = getelementptr inbounds float, float* %tmp24218, i64 1
+ %tmp24220 = getelementptr inbounds float, float* %tmp24219, i64 1
+ %tmp24221 = getelementptr inbounds float, float* %tmp24220, i64 1
+ %tmp24222 = getelementptr inbounds float, float* %tmp24221, i64 1
+ %tmp24223 = getelementptr inbounds float, float* %tmp24222, i64 1
+ %tmp24224 = getelementptr inbounds float, float* %tmp24223, i64 1
+ %tmp24225 = getelementptr inbounds float, float* %tmp24224, i64 1
+ %tmp24226 = getelementptr inbounds float, float* %tmp24225, i64 1
+ %tmp24227 = getelementptr inbounds float, float* %tmp24226, i64 1
+ %tmp24228 = getelementptr inbounds float, float* %tmp24227, i64 1
+ %tmp24229 = getelementptr inbounds float, float* %tmp24228, i64 1
+ %tmp24230 = getelementptr inbounds float, float* %tmp24229, i64 1
+ %tmp24231 = getelementptr inbounds float, float* %tmp24230, i64 1
+ %tmp24232 = getelementptr inbounds float, float* %tmp24231, i64 1
+ %tmp24233 = getelementptr inbounds float, float* %tmp24232, i64 1
+ %tmp24234 = getelementptr inbounds float, float* %tmp24233, i64 1
+ %tmp24235 = getelementptr inbounds float, float* %tmp24234, i64 1
+ %tmp24236 = getelementptr inbounds float, float* %tmp24235, i64 1
+ %tmp24237 = getelementptr inbounds float, float* %tmp24236, i64 1
+ %tmp24238 = getelementptr inbounds float, float* %tmp24237, i64 1
+ %tmp24239 = getelementptr inbounds float, float* %tmp24238, i64 1
+ %tmp24240 = getelementptr inbounds float, float* %tmp24239, i64 1
+ %tmp24241 = getelementptr inbounds float, float* %tmp24240, i64 1
+ %tmp24242 = getelementptr inbounds float, float* %tmp24241, i64 1
+ %tmp24243 = getelementptr inbounds float, float* %tmp24242, i64 1
+ %tmp24244 = getelementptr inbounds float, float* %tmp24243, i64 1
+ %tmp24245 = getelementptr inbounds float, float* %tmp24244, i64 1
+ %tmp24246 = getelementptr inbounds float, float* %tmp24245, i64 1
+ %tmp24247 = getelementptr inbounds float, float* %tmp24246, i64 1
+ %tmp24248 = getelementptr inbounds float, float* %tmp24247, i64 1
+ %tmp24249 = getelementptr inbounds float, float* %tmp24248, i64 1
+ %tmp24250 = getelementptr inbounds float, float* %tmp24249, i64 1
+ %tmp24251 = getelementptr inbounds float, float* %tmp24250, i64 1
+ %tmp24252 = getelementptr inbounds float, float* %tmp24251, i64 1
+ %tmp24253 = getelementptr inbounds float, float* %tmp24252, i64 1
+ %tmp24254 = getelementptr inbounds float, float* %tmp24253, i64 1
+ %tmp24255 = getelementptr inbounds float, float* %tmp24254, i64 1
+ %tmp24256 = getelementptr inbounds float, float* %tmp24255, i64 1
+ %tmp24257 = getelementptr inbounds float, float* %tmp24256, i64 1
+ %tmp24258 = getelementptr inbounds float, float* %tmp24257, i64 1
+ %tmp24259 = getelementptr inbounds float, float* %tmp24258, i64 1
+ %tmp24260 = getelementptr inbounds float, float* %tmp24259, i64 1
+ %tmp24261 = getelementptr inbounds float, float* %tmp24260, i64 1
+ %tmp24262 = getelementptr inbounds float, float* %tmp24261, i64 1
+ %tmp24263 = getelementptr inbounds float, float* %tmp24262, i64 1
+ %tmp24264 = getelementptr inbounds float, float* %tmp24263, i64 1
+ %tmp24265 = getelementptr inbounds float, float* %tmp24264, i64 1
+ %tmp24266 = getelementptr inbounds float, float* %tmp24265, i64 1
+ %tmp24267 = getelementptr inbounds float, float* %tmp24266, i64 1
+ %tmp24268 = getelementptr inbounds float, float* %tmp24267, i64 1
+ %tmp24269 = getelementptr inbounds float, float* %tmp24268, i64 1
+ %tmp24270 = getelementptr inbounds float, float* %tmp24269, i64 1
+ %tmp24271 = getelementptr inbounds float, float* %tmp24270, i64 1
+ %tmp24272 = getelementptr inbounds float, float* %tmp24271, i64 1
+ %tmp24273 = getelementptr inbounds float, float* %tmp24272, i64 1
+ %tmp24274 = getelementptr inbounds float, float* %tmp24273, i64 1
+ %tmp24275 = getelementptr inbounds float, float* %tmp24274, i64 1
+ %tmp24276 = getelementptr inbounds float, float* %tmp24275, i64 1
+ %tmp24277 = getelementptr inbounds float, float* %tmp24276, i64 1
+ %tmp24278 = getelementptr inbounds float, float* %tmp24277, i64 1
+ %tmp24279 = getelementptr inbounds float, float* %tmp24278, i64 1
+ %tmp24280 = getelementptr inbounds float, float* %tmp24279, i64 1
+ %tmp24281 = getelementptr inbounds float, float* %tmp24280, i64 1
+ %tmp24282 = getelementptr inbounds float, float* %tmp24281, i64 1
+ %tmp24283 = getelementptr inbounds float, float* %tmp24282, i64 1
+ %tmp24284 = getelementptr inbounds float, float* %tmp24283, i64 1
+ %tmp24285 = getelementptr inbounds float, float* %tmp24284, i64 1
+ %tmp24286 = getelementptr inbounds float, float* %tmp24285, i64 1
+ %tmp24287 = getelementptr inbounds float, float* %tmp24286, i64 1
+ %tmp24288 = getelementptr inbounds float, float* %tmp24287, i64 1
+ %tmp24289 = getelementptr inbounds float, float* %tmp24288, i64 1
+ %tmp24290 = getelementptr inbounds float, float* %tmp24289, i64 1
+ %tmp24291 = getelementptr inbounds float, float* %tmp24290, i64 1
+ %tmp24292 = getelementptr inbounds float, float* %tmp24291, i64 1
+ %tmp24293 = getelementptr inbounds float, float* %tmp24292, i64 1
+ %tmp24294 = getelementptr inbounds float, float* %tmp24293, i64 1
+ %tmp24295 = getelementptr inbounds float, float* %tmp24294, i64 1
+ %tmp24296 = getelementptr inbounds float, float* %tmp24295, i64 1
+ %tmp24297 = getelementptr inbounds float, float* %tmp24296, i64 1
+ %tmp24298 = getelementptr inbounds float, float* %tmp24297, i64 1
+ %tmp24299 = getelementptr inbounds float, float* %tmp24298, i64 1
+ %tmp24300 = getelementptr inbounds float, float* %tmp24299, i64 1
+ %tmp24301 = getelementptr inbounds float, float* %tmp24300, i64 1
+ %tmp24302 = getelementptr inbounds float, float* %tmp24301, i64 1
+ %tmp24303 = getelementptr inbounds float, float* %tmp24302, i64 1
+ %tmp24304 = getelementptr inbounds float, float* %tmp24303, i64 1
+ %tmp24305 = getelementptr inbounds float, float* %tmp24304, i64 1
+ %tmp24306 = getelementptr inbounds float, float* %tmp24305, i64 1
+ %tmp24307 = getelementptr inbounds float, float* %tmp24306, i64 1
+ %tmp24308 = getelementptr inbounds float, float* %tmp24307, i64 1
+ %tmp24309 = getelementptr inbounds float, float* %tmp24308, i64 1
+ %tmp24310 = getelementptr inbounds float, float* %tmp24309, i64 1
+ %tmp24311 = getelementptr inbounds float, float* %tmp24310, i64 1
+ %tmp24312 = getelementptr inbounds float, float* %tmp24311, i64 1
+ %tmp24313 = getelementptr inbounds float, float* %tmp24312, i64 1
+ %tmp24314 = getelementptr inbounds float, float* %tmp24313, i64 1
+ %tmp24315 = getelementptr inbounds float, float* %tmp24314, i64 1
+ %tmp24316 = getelementptr inbounds float, float* %tmp24315, i64 1
+ %tmp24317 = getelementptr inbounds float, float* %tmp24316, i64 1
+ %tmp24318 = getelementptr inbounds float, float* %tmp24317, i64 1
+ %tmp24319 = getelementptr inbounds float, float* %tmp24318, i64 1
+ %tmp24320 = getelementptr inbounds float, float* %tmp24319, i64 1
+ %tmp24321 = getelementptr inbounds float, float* %tmp24320, i64 1
+ %tmp24322 = getelementptr inbounds float, float* %tmp24321, i64 1
+ %tmp24323 = getelementptr inbounds float, float* %tmp24322, i64 1
+ %tmp24324 = getelementptr inbounds float, float* %tmp24323, i64 1
+ %tmp24325 = getelementptr inbounds float, float* %tmp24324, i64 1
+ %tmp24326 = getelementptr inbounds float, float* %tmp24325, i64 1
+ %tmp24327 = getelementptr inbounds float, float* %tmp24326, i64 1
+ %tmp24328 = getelementptr inbounds float, float* %tmp24327, i64 1
+ %tmp24329 = getelementptr inbounds float, float* %tmp24328, i64 1
+ %tmp24330 = getelementptr inbounds float, float* %tmp24329, i64 1
+ %tmp24331 = getelementptr inbounds float, float* %tmp24330, i64 1
+ %tmp24332 = getelementptr inbounds float, float* %tmp24331, i64 1
+ %tmp24333 = getelementptr inbounds float, float* %tmp24332, i64 1
+ %tmp24334 = getelementptr inbounds float, float* %tmp24333, i64 1
+ %tmp24335 = getelementptr inbounds float, float* %tmp24334, i64 1
+ %tmp24336 = getelementptr inbounds float, float* %tmp24335, i64 1
+ %tmp24337 = getelementptr inbounds float, float* %tmp24336, i64 1
+ %tmp24338 = getelementptr inbounds float, float* %tmp24337, i64 1
+ %tmp24339 = getelementptr inbounds float, float* %tmp24338, i64 1
+ %tmp24340 = getelementptr inbounds float, float* %tmp24339, i64 1
+ %tmp24341 = getelementptr inbounds float, float* %tmp24340, i64 1
+ %tmp24342 = getelementptr inbounds float, float* %tmp24341, i64 1
+ %tmp24343 = getelementptr inbounds float, float* %tmp24342, i64 1
+ %tmp24344 = getelementptr inbounds float, float* %tmp24343, i64 1
+ %tmp24345 = getelementptr inbounds float, float* %tmp24344, i64 1
+ %tmp24346 = getelementptr inbounds float, float* %tmp24345, i64 1
+ %tmp24347 = getelementptr inbounds float, float* %tmp24346, i64 1
+ %tmp24348 = getelementptr inbounds float, float* %tmp24347, i64 1
+ %tmp24349 = getelementptr inbounds float, float* %tmp24348, i64 1
+ %tmp24350 = getelementptr inbounds float, float* %tmp24349, i64 1
+ %tmp24351 = getelementptr inbounds float, float* %tmp24350, i64 1
+ %tmp24352 = getelementptr inbounds float, float* %tmp24351, i64 1
+ %tmp24353 = getelementptr inbounds float, float* %tmp24352, i64 1
+ %tmp24354 = getelementptr inbounds float, float* %tmp24353, i64 1
+ %tmp24355 = getelementptr inbounds float, float* %tmp24354, i64 1
+ %tmp24356 = getelementptr inbounds float, float* %tmp24355, i64 1
+ %tmp24357 = getelementptr inbounds float, float* %tmp24356, i64 1
+ %tmp24358 = getelementptr inbounds float, float* %tmp24357, i64 1
+ %tmp24359 = getelementptr inbounds float, float* %tmp24358, i64 1
+ %tmp24360 = getelementptr inbounds float, float* %tmp24359, i64 1
+ %tmp24361 = getelementptr inbounds float, float* %tmp24360, i64 1
+ %tmp24362 = getelementptr inbounds float, float* %tmp24361, i64 1
+ %tmp24363 = getelementptr inbounds float, float* %tmp24362, i64 1
+ %tmp24364 = getelementptr inbounds float, float* %tmp24363, i64 1
+ %tmp24365 = getelementptr inbounds float, float* %tmp24364, i64 1
+ %tmp24366 = getelementptr inbounds float, float* %tmp24365, i64 1
+ %tmp24367 = getelementptr inbounds float, float* %tmp24366, i64 1
+ %tmp24368 = getelementptr inbounds float, float* %tmp24367, i64 1
+ %tmp24369 = getelementptr inbounds float, float* %tmp24368, i64 1
+ %tmp24370 = getelementptr inbounds float, float* %tmp24369, i64 1
+ %tmp24371 = getelementptr inbounds float, float* %tmp24370, i64 1
+ %tmp24372 = getelementptr inbounds float, float* %tmp24371, i64 1
+ %tmp24373 = getelementptr inbounds float, float* %tmp24372, i64 1
+ %tmp24374 = getelementptr inbounds float, float* %tmp24373, i64 1
+ %tmp24375 = getelementptr inbounds float, float* %tmp24374, i64 1
+ %tmp24376 = getelementptr inbounds float, float* %tmp24375, i64 1
+ %tmp24377 = getelementptr inbounds float, float* %tmp24376, i64 1
+ %tmp24378 = getelementptr inbounds float, float* %tmp24377, i64 1
+ %tmp24379 = getelementptr inbounds float, float* %tmp24378, i64 1
+ %tmp24380 = getelementptr inbounds float, float* %tmp24379, i64 1
+ %tmp24381 = getelementptr inbounds float, float* %tmp24380, i64 1
+ %tmp24382 = getelementptr inbounds float, float* %tmp24381, i64 1
+ %tmp24383 = getelementptr inbounds float, float* %tmp24382, i64 1
+ %tmp24384 = getelementptr inbounds float, float* %tmp24383, i64 1
+ %tmp24385 = getelementptr inbounds float, float* %tmp24384, i64 1
+ %tmp24386 = getelementptr inbounds float, float* %tmp24385, i64 1
+ %tmp24387 = getelementptr inbounds float, float* %tmp24386, i64 1
+ %tmp24388 = getelementptr inbounds float, float* %tmp24387, i64 1
+ %tmp24389 = getelementptr inbounds float, float* %tmp24388, i64 1
+ %tmp24390 = getelementptr inbounds float, float* %tmp24389, i64 1
+ %tmp24391 = getelementptr inbounds float, float* %tmp24390, i64 1
+ %tmp24392 = getelementptr inbounds float, float* %tmp24391, i64 1
+ %tmp24393 = getelementptr inbounds float, float* %tmp24392, i64 1
+ %tmp24394 = getelementptr inbounds float, float* %tmp24393, i64 1
+ %tmp24395 = getelementptr inbounds float, float* %tmp24394, i64 1
+ %tmp24396 = getelementptr inbounds float, float* %tmp24395, i64 1
+ %tmp24397 = getelementptr inbounds float, float* %tmp24396, i64 1
+ %tmp24398 = getelementptr inbounds float, float* %tmp24397, i64 1
+ %tmp24399 = getelementptr inbounds float, float* %tmp24398, i64 1
+ %tmp24400 = getelementptr inbounds float, float* %tmp24399, i64 1
+ %tmp24401 = getelementptr inbounds float, float* %tmp24400, i64 1
+ %tmp24402 = getelementptr inbounds float, float* %tmp24401, i64 1
+ %tmp24403 = getelementptr inbounds float, float* %tmp24402, i64 1
+ %tmp24404 = getelementptr inbounds float, float* %tmp24403, i64 1
+ %tmp24405 = getelementptr inbounds float, float* %tmp24404, i64 1
+ %tmp24406 = getelementptr inbounds float, float* %tmp24405, i64 1
+ %tmp24407 = getelementptr inbounds float, float* %tmp24406, i64 1
+ %tmp24408 = getelementptr inbounds float, float* %tmp24407, i64 1
+ %tmp24409 = getelementptr inbounds float, float* %tmp24408, i64 1
+ %tmp24410 = getelementptr inbounds float, float* %tmp24409, i64 1
+ %tmp24411 = getelementptr inbounds float, float* %tmp24410, i64 1
+ %tmp24412 = getelementptr inbounds float, float* %tmp24411, i64 1
+ %tmp24413 = getelementptr inbounds float, float* %tmp24412, i64 1
+ %tmp24414 = getelementptr inbounds float, float* %tmp24413, i64 1
+ %tmp24415 = getelementptr inbounds float, float* %tmp24414, i64 1
+ %tmp24416 = getelementptr inbounds float, float* %tmp24415, i64 1
+ %tmp24417 = getelementptr inbounds float, float* %tmp24416, i64 1
+ %tmp24418 = getelementptr inbounds float, float* %tmp24417, i64 1
+ %tmp24419 = getelementptr inbounds float, float* %tmp24418, i64 1
+ %tmp24420 = getelementptr inbounds float, float* %tmp24419, i64 1
+ %tmp24421 = getelementptr inbounds float, float* %tmp24420, i64 1
+ %tmp24422 = getelementptr inbounds float, float* %tmp24421, i64 1
+ %tmp24423 = getelementptr inbounds float, float* %tmp24422, i64 1
+ %tmp24424 = getelementptr inbounds float, float* %tmp24423, i64 1
+ %tmp24425 = getelementptr inbounds float, float* %tmp24424, i64 1
+ %tmp24426 = getelementptr inbounds float, float* %tmp24425, i64 1
+ %tmp24427 = getelementptr inbounds float, float* %tmp24426, i64 1
+ %tmp24428 = getelementptr inbounds float, float* %tmp24427, i64 1
+ %tmp24429 = getelementptr inbounds float, float* %tmp24428, i64 1
+ %tmp24430 = getelementptr inbounds float, float* %tmp24429, i64 1
+ %tmp24431 = getelementptr inbounds float, float* %tmp24430, i64 1
+ %tmp24432 = getelementptr inbounds float, float* %tmp24431, i64 1
+ %tmp24433 = getelementptr inbounds float, float* %tmp24432, i64 1
+ %tmp24434 = getelementptr inbounds float, float* %tmp24433, i64 1
+ %tmp24435 = getelementptr inbounds float, float* %tmp24434, i64 1
+ %tmp24436 = getelementptr inbounds float, float* %tmp24435, i64 1
+ %tmp24437 = getelementptr inbounds float, float* %tmp24436, i64 1
+ %tmp24438 = getelementptr inbounds float, float* %tmp24437, i64 1
+ %tmp24439 = getelementptr inbounds float, float* %tmp24438, i64 1
+ %tmp24440 = getelementptr inbounds float, float* %tmp24439, i64 1
+ %tmp24441 = getelementptr inbounds float, float* %tmp24440, i64 1
+ %tmp24442 = getelementptr inbounds float, float* %tmp24441, i64 1
+ %tmp24443 = getelementptr inbounds float, float* %tmp24442, i64 1
+ %tmp24444 = getelementptr inbounds float, float* %tmp24443, i64 1
+ %tmp24445 = getelementptr inbounds float, float* %tmp24444, i64 1
+ %tmp24446 = getelementptr inbounds float, float* %tmp24445, i64 1
+ %tmp24447 = getelementptr inbounds float, float* %tmp24446, i64 1
+ %tmp24448 = getelementptr inbounds float, float* %tmp24447, i64 1
+ %tmp24449 = getelementptr inbounds float, float* %tmp24448, i64 1
+ %tmp24450 = getelementptr inbounds float, float* %tmp24449, i64 1
+ %tmp24451 = getelementptr inbounds float, float* %tmp24450, i64 1
+ %tmp24452 = getelementptr inbounds float, float* %tmp24451, i64 1
+ %tmp24453 = getelementptr inbounds float, float* %tmp24452, i64 1
+ %tmp24454 = getelementptr inbounds float, float* %tmp24453, i64 1
+ %tmp24455 = getelementptr inbounds float, float* %tmp24454, i64 1
+ %tmp24456 = getelementptr inbounds float, float* %tmp24455, i64 1
+ %tmp24457 = getelementptr inbounds float, float* %tmp24456, i64 1
+ %tmp24458 = getelementptr inbounds float, float* %tmp24457, i64 1
+ %tmp24459 = getelementptr inbounds float, float* %tmp24458, i64 1
+ %tmp24460 = getelementptr inbounds float, float* %tmp24459, i64 1
+ %tmp24461 = getelementptr inbounds float, float* %tmp24460, i64 1
+ %tmp24462 = getelementptr inbounds float, float* %tmp24461, i64 1
+ %tmp24463 = getelementptr inbounds float, float* %tmp24462, i64 1
+ %tmp24464 = getelementptr inbounds float, float* %tmp24463, i64 1
+ %tmp24465 = getelementptr inbounds float, float* %tmp24464, i64 1
+ %tmp24466 = getelementptr inbounds float, float* %tmp24465, i64 1
+ %tmp24467 = getelementptr inbounds float, float* %tmp24466, i64 1
+ %tmp24468 = getelementptr inbounds float, float* %tmp24467, i64 1
+ %tmp24469 = getelementptr inbounds float, float* %tmp24468, i64 1
+ %tmp24470 = getelementptr inbounds float, float* %tmp24469, i64 1
+ %tmp24471 = getelementptr inbounds float, float* %tmp24470, i64 1
+ %tmp24472 = getelementptr inbounds float, float* %tmp24471, i64 1
+ %tmp24473 = getelementptr inbounds float, float* %tmp24472, i64 1
+ %tmp24474 = getelementptr inbounds float, float* %tmp24473, i64 1
+ %tmp24475 = getelementptr inbounds float, float* %tmp24474, i64 1
+ %tmp24476 = getelementptr inbounds float, float* %tmp24475, i64 1
+ %tmp24477 = getelementptr inbounds float, float* %tmp24476, i64 1
+ %tmp24478 = getelementptr inbounds float, float* %tmp24477, i64 1
+ %tmp24479 = getelementptr inbounds float, float* %tmp24478, i64 1
+ %tmp24480 = getelementptr inbounds float, float* %tmp24479, i64 1
+ %tmp24481 = getelementptr inbounds float, float* %tmp24480, i64 1
+ %tmp24482 = getelementptr inbounds float, float* %tmp24481, i64 1
+ %tmp24483 = getelementptr inbounds float, float* %tmp24482, i64 1
+ %tmp24484 = getelementptr inbounds float, float* %tmp24483, i64 1
+ %tmp24485 = getelementptr inbounds float, float* %tmp24484, i64 1
+ %tmp24486 = getelementptr inbounds float, float* %tmp24485, i64 1
+ %tmp24487 = getelementptr inbounds float, float* %tmp24486, i64 1
+ %tmp24488 = getelementptr inbounds float, float* %tmp24487, i64 1
+ %tmp24489 = getelementptr inbounds float, float* %tmp24488, i64 1
+ %tmp24490 = getelementptr inbounds float, float* %tmp24489, i64 1
+ %tmp24491 = getelementptr inbounds float, float* %tmp24490, i64 1
+ %tmp24492 = getelementptr inbounds float, float* %tmp24491, i64 1
+ %tmp24493 = getelementptr inbounds float, float* %tmp24492, i64 1
+ %tmp24494 = getelementptr inbounds float, float* %tmp24493, i64 1
+ %tmp24495 = getelementptr inbounds float, float* %tmp24494, i64 1
+ %tmp24496 = getelementptr inbounds float, float* %tmp24495, i64 1
+ %tmp24497 = getelementptr inbounds float, float* %tmp24496, i64 1
+ %tmp24498 = getelementptr inbounds float, float* %tmp24497, i64 1
+ %tmp24499 = getelementptr inbounds float, float* %tmp24498, i64 1
+ %tmp24500 = getelementptr inbounds float, float* %tmp24499, i64 1
+ %tmp24501 = getelementptr inbounds float, float* %tmp24500, i64 1
+ %tmp24502 = getelementptr inbounds float, float* %tmp24501, i64 1
+ %tmp24503 = getelementptr inbounds float, float* %tmp24502, i64 1
+ %tmp24504 = getelementptr inbounds float, float* %tmp24503, i64 1
+ %tmp24505 = getelementptr inbounds float, float* %tmp24504, i64 1
+ %tmp24506 = getelementptr inbounds float, float* %tmp24505, i64 1
+ %tmp24507 = getelementptr inbounds float, float* %tmp24506, i64 1
+ %tmp24508 = getelementptr inbounds float, float* %tmp24507, i64 1
+ %tmp24509 = getelementptr inbounds float, float* %tmp24508, i64 1
+ %tmp24510 = getelementptr inbounds float, float* %tmp24509, i64 1
+ %tmp24511 = getelementptr inbounds float, float* %tmp24510, i64 1
+ %tmp24512 = getelementptr inbounds float, float* %tmp24511, i64 1
+ %tmp24513 = getelementptr inbounds float, float* %tmp24512, i64 1
+ %tmp24514 = getelementptr inbounds float, float* %tmp24513, i64 1
+ %tmp24515 = getelementptr inbounds float, float* %tmp24514, i64 1
+ %tmp24516 = getelementptr inbounds float, float* %tmp24515, i64 1
+ %tmp24517 = getelementptr inbounds float, float* %tmp24516, i64 1
+ %tmp24518 = getelementptr inbounds float, float* %tmp24517, i64 1
+ %tmp24519 = getelementptr inbounds float, float* %tmp24518, i64 1
+ %tmp24520 = getelementptr inbounds float, float* %tmp24519, i64 1
+ %tmp24521 = getelementptr inbounds float, float* %tmp24520, i64 1
+ %tmp24522 = getelementptr inbounds float, float* %tmp24521, i64 1
+ %tmp24523 = getelementptr inbounds float, float* %tmp24522, i64 1
+ %tmp24524 = getelementptr inbounds float, float* %tmp24523, i64 1
+ %tmp24525 = getelementptr inbounds float, float* %tmp24524, i64 1
+ %tmp24526 = getelementptr inbounds float, float* %tmp24525, i64 1
+ %tmp24527 = getelementptr inbounds float, float* %tmp24526, i64 1
+ %tmp24528 = getelementptr inbounds float, float* %tmp24527, i64 1
+ %tmp24529 = getelementptr inbounds float, float* %tmp24528, i64 1
+ %tmp24530 = getelementptr inbounds float, float* %tmp24529, i64 1
+ %tmp24531 = getelementptr inbounds float, float* %tmp24530, i64 1
+ %tmp24532 = getelementptr inbounds float, float* %tmp24531, i64 1
+ %tmp24533 = getelementptr inbounds float, float* %tmp24532, i64 1
+ %tmp24534 = getelementptr inbounds float, float* %tmp24533, i64 1
+ %tmp24535 = getelementptr inbounds float, float* %tmp24534, i64 1
+ %tmp24536 = getelementptr inbounds float, float* %tmp24535, i64 1
+ %tmp24537 = getelementptr inbounds float, float* %tmp24536, i64 1
+ %tmp24538 = getelementptr inbounds float, float* %tmp24537, i64 1
+ %tmp24539 = getelementptr inbounds float, float* %tmp24538, i64 1
+ %tmp24540 = getelementptr inbounds float, float* %tmp24539, i64 1
+ %tmp24541 = getelementptr inbounds float, float* %tmp24540, i64 1
+ %tmp24542 = getelementptr inbounds float, float* %tmp24541, i64 1
+ %tmp24543 = getelementptr inbounds float, float* %tmp24542, i64 1
+ %tmp24544 = getelementptr inbounds float, float* %tmp24543, i64 1
+ %tmp24545 = getelementptr inbounds float, float* %tmp24544, i64 1
+ %tmp24546 = getelementptr inbounds float, float* %tmp24545, i64 1
+ %tmp24547 = getelementptr inbounds float, float* %tmp24546, i64 1
+ %tmp24548 = getelementptr inbounds float, float* %tmp24547, i64 1
+ %tmp24549 = getelementptr inbounds float, float* %tmp24548, i64 1
+ %tmp24550 = getelementptr inbounds float, float* %tmp24549, i64 1
+ %tmp24551 = getelementptr inbounds float, float* %tmp24550, i64 1
+ %tmp24552 = getelementptr inbounds float, float* %tmp24551, i64 1
+ %tmp24553 = getelementptr inbounds float, float* %tmp24552, i64 1
+ %tmp24554 = getelementptr inbounds float, float* %tmp24553, i64 1
+ %tmp24555 = getelementptr inbounds float, float* %tmp24554, i64 1
+ %tmp24556 = getelementptr inbounds float, float* %tmp24555, i64 1
+ %tmp24557 = getelementptr inbounds float, float* %tmp24556, i64 1
+ %tmp24558 = getelementptr inbounds float, float* %tmp24557, i64 1
+ %tmp24559 = getelementptr inbounds float, float* %tmp24558, i64 1
+ %tmp24560 = getelementptr inbounds float, float* %tmp24559, i64 1
+ %tmp24561 = getelementptr inbounds float, float* %tmp24560, i64 1
+ %tmp24562 = getelementptr inbounds float, float* %tmp24561, i64 1
+ %tmp24563 = getelementptr inbounds float, float* %tmp24562, i64 1
+ %tmp24564 = getelementptr inbounds float, float* %tmp24563, i64 1
+ %tmp24565 = getelementptr inbounds float, float* %tmp24564, i64 1
+ %tmp24566 = getelementptr inbounds float, float* %tmp24565, i64 1
+ %tmp24567 = getelementptr inbounds float, float* %tmp24566, i64 1
+ %tmp24568 = getelementptr inbounds float, float* %tmp24567, i64 1
+ %tmp24569 = getelementptr inbounds float, float* %tmp24568, i64 1
+ %tmp24570 = getelementptr inbounds float, float* %tmp24569, i64 1
+ %tmp24571 = getelementptr inbounds float, float* %tmp24570, i64 1
+ %tmp24572 = getelementptr inbounds float, float* %tmp24571, i64 1
+ %tmp24573 = getelementptr inbounds float, float* %tmp24572, i64 1
+ %tmp24574 = getelementptr inbounds float, float* %tmp24573, i64 1
+ %tmp24575 = getelementptr inbounds float, float* %tmp24574, i64 1
+ %tmp24576 = getelementptr inbounds float, float* %tmp24575, i64 1
+ %tmp24577 = getelementptr inbounds float, float* %tmp24576, i64 1
+ %tmp24578 = getelementptr inbounds float, float* %tmp24577, i64 1
+ %tmp24579 = getelementptr inbounds float, float* %tmp24578, i64 1
+ %tmp24580 = getelementptr inbounds float, float* %tmp24579, i64 1
+ %tmp24581 = getelementptr inbounds float, float* %tmp24580, i64 1
+ %tmp24582 = getelementptr inbounds float, float* %tmp24581, i64 1
+ %tmp24583 = getelementptr inbounds float, float* %tmp24582, i64 1
+ %tmp24584 = getelementptr inbounds float, float* %tmp24583, i64 1
+ %tmp24585 = getelementptr inbounds float, float* %tmp24584, i64 1
+ %tmp24586 = getelementptr inbounds float, float* %tmp24585, i64 1
+ %tmp24587 = getelementptr inbounds float, float* %tmp24586, i64 1
+ %tmp24588 = getelementptr inbounds float, float* %tmp24587, i64 1
+ %tmp24589 = getelementptr inbounds float, float* %tmp24588, i64 1
+ %tmp24590 = getelementptr inbounds float, float* %tmp24589, i64 1
+ %tmp24591 = getelementptr inbounds float, float* %tmp24590, i64 1
+ %tmp24592 = getelementptr inbounds float, float* %tmp24591, i64 1
+ %tmp24593 = getelementptr inbounds float, float* %tmp24592, i64 1
+ %tmp24594 = getelementptr inbounds float, float* %tmp24593, i64 1
+ %tmp24595 = getelementptr inbounds float, float* %tmp24594, i64 1
+ %tmp24596 = getelementptr inbounds float, float* %tmp24595, i64 1
+ %tmp24597 = getelementptr inbounds float, float* %tmp24596, i64 1
+ %tmp24598 = getelementptr inbounds float, float* %tmp24597, i64 1
+ %tmp24599 = getelementptr inbounds float, float* %tmp24598, i64 1
+ %tmp24600 = getelementptr inbounds float, float* %tmp24599, i64 1
+ %tmp24601 = getelementptr inbounds float, float* %tmp24600, i64 1
+ %tmp24602 = getelementptr inbounds float, float* %tmp24601, i64 1
+ %tmp24603 = getelementptr inbounds float, float* %tmp24602, i64 1
+ %tmp24604 = getelementptr inbounds float, float* %tmp24603, i64 1
+ %tmp24605 = getelementptr inbounds float, float* %tmp24604, i64 1
+ %tmp24606 = getelementptr inbounds float, float* %tmp24605, i64 1
+ %tmp24607 = getelementptr inbounds float, float* %tmp24606, i64 1
+ %tmp24608 = getelementptr inbounds float, float* %tmp24607, i64 1
+ %tmp24609 = getelementptr inbounds float, float* %tmp24608, i64 1
+ %tmp24610 = getelementptr inbounds float, float* %tmp24609, i64 1
+ %tmp24611 = getelementptr inbounds float, float* %tmp24610, i64 1
+ %tmp24612 = getelementptr inbounds float, float* %tmp24611, i64 1
+ %tmp24613 = getelementptr inbounds float, float* %tmp24612, i64 1
+ %tmp24614 = getelementptr inbounds float, float* %tmp24613, i64 1
+ %tmp24615 = getelementptr inbounds float, float* %tmp24614, i64 1
+ %tmp24616 = getelementptr inbounds float, float* %tmp24615, i64 1
+ %tmp24617 = getelementptr inbounds float, float* %tmp24616, i64 1
+ %tmp24618 = getelementptr inbounds float, float* %tmp24617, i64 1
+ %tmp24619 = getelementptr inbounds float, float* %tmp24618, i64 1
+ %tmp24620 = getelementptr inbounds float, float* %tmp24619, i64 1
+ %tmp24621 = getelementptr inbounds float, float* %tmp24620, i64 1
+ %tmp24622 = getelementptr inbounds float, float* %tmp24621, i64 1
+ %tmp24623 = getelementptr inbounds float, float* %tmp24622, i64 1
+ %tmp24624 = getelementptr inbounds float, float* %tmp24623, i64 1
+ %tmp24625 = getelementptr inbounds float, float* %tmp24624, i64 1
+ %tmp24626 = getelementptr inbounds float, float* %tmp24625, i64 1
+ %tmp24627 = getelementptr inbounds float, float* %tmp24626, i64 1
+ %tmp24628 = getelementptr inbounds float, float* %tmp24627, i64 1
+ %tmp24629 = getelementptr inbounds float, float* %tmp24628, i64 1
+ %tmp24630 = getelementptr inbounds float, float* %tmp24629, i64 1
+ %tmp24631 = getelementptr inbounds float, float* %tmp24630, i64 1
+ %tmp24632 = getelementptr inbounds float, float* %tmp24631, i64 1
+ %tmp24633 = getelementptr inbounds float, float* %tmp24632, i64 1
+ %tmp24634 = getelementptr inbounds float, float* %tmp24633, i64 1
+ %tmp24635 = getelementptr inbounds float, float* %tmp24634, i64 1
+ %tmp24636 = getelementptr inbounds float, float* %tmp24635, i64 1
+ %tmp24637 = getelementptr inbounds float, float* %tmp24636, i64 1
+ %tmp24638 = getelementptr inbounds float, float* %tmp24637, i64 1
+ %tmp24639 = getelementptr inbounds float, float* %tmp24638, i64 1
+ %tmp24640 = getelementptr inbounds float, float* %tmp24639, i64 1
+ %tmp24641 = getelementptr inbounds float, float* %tmp24640, i64 1
+ %tmp24642 = getelementptr inbounds float, float* %tmp24641, i64 1
+ %tmp24643 = getelementptr inbounds float, float* %tmp24642, i64 1
+ %tmp24644 = getelementptr inbounds float, float* %tmp24643, i64 1
+ %tmp24645 = getelementptr inbounds float, float* %tmp24644, i64 1
+ %tmp24646 = getelementptr inbounds float, float* %tmp24645, i64 1
+ %tmp24647 = getelementptr inbounds float, float* %tmp24646, i64 1
+ %tmp24648 = getelementptr inbounds float, float* %tmp24647, i64 1
+ %tmp24649 = getelementptr inbounds float, float* %tmp24648, i64 1
+ %tmp24650 = getelementptr inbounds float, float* %tmp24649, i64 1
+ %tmp24651 = getelementptr inbounds float, float* %tmp24650, i64 1
+ %tmp24652 = getelementptr inbounds float, float* %tmp24651, i64 1
+ %tmp24653 = getelementptr inbounds float, float* %tmp24652, i64 1
+ %tmp24654 = getelementptr inbounds float, float* %tmp24653, i64 1
+ %tmp24655 = getelementptr inbounds float, float* %tmp24654, i64 1
+ %tmp24656 = getelementptr inbounds float, float* %tmp24655, i64 1
+ %tmp24657 = getelementptr inbounds float, float* %tmp24656, i64 1
+ %tmp24658 = getelementptr inbounds float, float* %tmp24657, i64 1
+ %tmp24659 = getelementptr inbounds float, float* %tmp24658, i64 1
+ %tmp24660 = getelementptr inbounds float, float* %tmp24659, i64 1
+ %tmp24661 = getelementptr inbounds float, float* %tmp24660, i64 1
+ %tmp24662 = getelementptr inbounds float, float* %tmp24661, i64 1
+ %tmp24663 = getelementptr inbounds float, float* %tmp24662, i64 1
+ %tmp24664 = getelementptr inbounds float, float* %tmp24663, i64 1
+ %tmp24665 = getelementptr inbounds float, float* %tmp24664, i64 1
+ %tmp24666 = getelementptr inbounds float, float* %tmp24665, i64 1
+ %tmp24667 = getelementptr inbounds float, float* %tmp24666, i64 1
+ %tmp24668 = getelementptr inbounds float, float* %tmp24667, i64 1
+ %tmp24669 = getelementptr inbounds float, float* %tmp24668, i64 1
+ %tmp24670 = getelementptr inbounds float, float* %tmp24669, i64 1
+ %tmp24671 = getelementptr inbounds float, float* %tmp24670, i64 1
+ %tmp24672 = getelementptr inbounds float, float* %tmp24671, i64 1
+ %tmp24673 = getelementptr inbounds float, float* %tmp24672, i64 1
+ %tmp24674 = getelementptr inbounds float, float* %tmp24673, i64 1
+ %tmp24675 = getelementptr inbounds float, float* %tmp24674, i64 1
+ %tmp24676 = getelementptr inbounds float, float* %tmp24675, i64 1
+ %tmp24677 = getelementptr inbounds float, float* %tmp24676, i64 1
+ %tmp24678 = getelementptr inbounds float, float* %tmp24677, i64 1
+ %tmp24679 = getelementptr inbounds float, float* %tmp24678, i64 1
+ %tmp24680 = getelementptr inbounds float, float* %tmp24679, i64 1
+ %tmp24681 = getelementptr inbounds float, float* %tmp24680, i64 1
+ %tmp24682 = getelementptr inbounds float, float* %tmp24681, i64 1
+ %tmp24683 = getelementptr inbounds float, float* %tmp24682, i64 1
+ %tmp24684 = getelementptr inbounds float, float* %tmp24683, i64 1
+ %tmp24685 = getelementptr inbounds float, float* %tmp24684, i64 1
+ %tmp24686 = getelementptr inbounds float, float* %tmp24685, i64 1
+ %tmp24687 = getelementptr inbounds float, float* %tmp24686, i64 1
+ %tmp24688 = getelementptr inbounds float, float* %tmp24687, i64 1
+ %tmp24689 = getelementptr inbounds float, float* %tmp24688, i64 1
+ %tmp24690 = getelementptr inbounds float, float* %tmp24689, i64 1
+ %tmp24691 = getelementptr inbounds float, float* %tmp24690, i64 1
+ %tmp24692 = getelementptr inbounds float, float* %tmp24691, i64 1
+ %tmp24693 = getelementptr inbounds float, float* %tmp24692, i64 1
+ %tmp24694 = getelementptr inbounds float, float* %tmp24693, i64 1
+ %tmp24695 = getelementptr inbounds float, float* %tmp24694, i64 1
+ %tmp24696 = getelementptr inbounds float, float* %tmp24695, i64 1
+ %tmp24697 = getelementptr inbounds float, float* %tmp24696, i64 1
+ %tmp24698 = getelementptr inbounds float, float* %tmp24697, i64 1
+ %tmp24699 = getelementptr inbounds float, float* %tmp24698, i64 1
+ %tmp24700 = getelementptr inbounds float, float* %tmp24699, i64 1
+ %tmp24701 = getelementptr inbounds float, float* %tmp24700, i64 1
+ %tmp24702 = getelementptr inbounds float, float* %tmp24701, i64 1
+ %tmp24703 = getelementptr inbounds float, float* %tmp24702, i64 1
+ %tmp24704 = getelementptr inbounds float, float* %tmp24703, i64 1
+ %tmp24705 = getelementptr inbounds float, float* %tmp24704, i64 1
+ %tmp24706 = getelementptr inbounds float, float* %tmp24705, i64 1
+ %tmp24707 = getelementptr inbounds float, float* %tmp24706, i64 1
+ %tmp24708 = getelementptr inbounds float, float* %tmp24707, i64 1
+ %tmp24709 = getelementptr inbounds float, float* %tmp24708, i64 1
+ %tmp24710 = getelementptr inbounds float, float* %tmp24709, i64 1
+ %tmp24711 = getelementptr inbounds float, float* %tmp24710, i64 1
+ %tmp24712 = getelementptr inbounds float, float* %tmp24711, i64 1
+ %tmp24713 = getelementptr inbounds float, float* %tmp24712, i64 1
+ %tmp24714 = getelementptr inbounds float, float* %tmp24713, i64 1
+ %tmp24715 = getelementptr inbounds float, float* %tmp24714, i64 1
+ %tmp24716 = getelementptr inbounds float, float* %tmp24715, i64 1
+ %tmp24717 = getelementptr inbounds float, float* %tmp24716, i64 1
+ %tmp24718 = getelementptr inbounds float, float* %tmp24717, i64 1
+ %tmp24719 = getelementptr inbounds float, float* %tmp24718, i64 1
+ %tmp24720 = getelementptr inbounds float, float* %tmp24719, i64 1
+ %tmp24721 = getelementptr inbounds float, float* %tmp24720, i64 1
+ %tmp24722 = getelementptr inbounds float, float* %tmp24721, i64 1
+ %tmp24723 = getelementptr inbounds float, float* %tmp24722, i64 1
+ %tmp24724 = getelementptr inbounds float, float* %tmp24723, i64 1
+ %tmp24725 = getelementptr inbounds float, float* %tmp24724, i64 1
+ %tmp24726 = getelementptr inbounds float, float* %tmp24725, i64 1
+ %tmp24727 = getelementptr inbounds float, float* %tmp24726, i64 1
+ %tmp24728 = getelementptr inbounds float, float* %tmp24727, i64 1
+ %tmp24729 = getelementptr inbounds float, float* %tmp24728, i64 1
+ %tmp24730 = getelementptr inbounds float, float* %tmp24729, i64 1
+ %tmp24731 = getelementptr inbounds float, float* %tmp24730, i64 1
+ %tmp24732 = getelementptr inbounds float, float* %tmp24731, i64 1
+ %tmp24733 = getelementptr inbounds float, float* %tmp24732, i64 1
+ %tmp24734 = getelementptr inbounds float, float* %tmp24733, i64 1
+ %tmp24735 = getelementptr inbounds float, float* %tmp24734, i64 1
+ %tmp24736 = getelementptr inbounds float, float* %tmp24735, i64 1
+ %tmp24737 = getelementptr inbounds float, float* %tmp24736, i64 1
+ %tmp24738 = getelementptr inbounds float, float* %tmp24737, i64 1
+ %tmp24739 = getelementptr inbounds float, float* %tmp24738, i64 1
+ %tmp24740 = getelementptr inbounds float, float* %tmp24739, i64 1
+ %tmp24741 = getelementptr inbounds float, float* %tmp24740, i64 1
+ %tmp24742 = getelementptr inbounds float, float* %tmp24741, i64 1
+ %tmp24743 = getelementptr inbounds float, float* %tmp24742, i64 1
+ %tmp24744 = getelementptr inbounds float, float* %tmp24743, i64 1
+ %tmp24745 = getelementptr inbounds float, float* %tmp24744, i64 1
+ %tmp24746 = getelementptr inbounds float, float* %tmp24745, i64 1
+ %tmp24747 = getelementptr inbounds float, float* %tmp24746, i64 1
+ %tmp24748 = getelementptr inbounds float, float* %tmp24747, i64 1
+ %tmp24749 = getelementptr inbounds float, float* %tmp24748, i64 1
+ %tmp24750 = getelementptr inbounds float, float* %tmp24749, i64 1
+ %tmp24751 = getelementptr inbounds float, float* %tmp24750, i64 1
+ %tmp24752 = getelementptr inbounds float, float* %tmp24751, i64 1
+ %tmp24753 = getelementptr inbounds float, float* %tmp24752, i64 1
+ %tmp24754 = getelementptr inbounds float, float* %tmp24753, i64 1
+ %tmp24755 = getelementptr inbounds float, float* %tmp24754, i64 1
+ %tmp24756 = getelementptr inbounds float, float* %tmp24755, i64 1
+ %tmp24757 = getelementptr inbounds float, float* %tmp24756, i64 1
+ %tmp24758 = getelementptr inbounds float, float* %tmp24757, i64 1
+ %tmp24759 = getelementptr inbounds float, float* %tmp24758, i64 1
+ %tmp24760 = getelementptr inbounds float, float* %tmp24759, i64 1
+ %tmp24761 = getelementptr inbounds float, float* %tmp24760, i64 1
+ %tmp24762 = getelementptr inbounds float, float* %tmp24761, i64 1
+ %tmp24763 = getelementptr inbounds float, float* %tmp24762, i64 1
+ %tmp24764 = getelementptr inbounds float, float* %tmp24763, i64 1
+ %tmp24765 = getelementptr inbounds float, float* %tmp24764, i64 1
+ %tmp24766 = getelementptr inbounds float, float* %tmp24765, i64 1
+ %tmp24767 = getelementptr inbounds float, float* %tmp24766, i64 1
+ %tmp24768 = getelementptr inbounds float, float* %tmp24767, i64 1
+ %tmp24769 = getelementptr inbounds float, float* %tmp24768, i64 1
+ %tmp24770 = getelementptr inbounds float, float* %tmp24769, i64 1
+ %tmp24771 = getelementptr inbounds float, float* %tmp24770, i64 1
+ %tmp24772 = getelementptr inbounds float, float* %tmp24771, i64 1
+ %tmp24773 = getelementptr inbounds float, float* %tmp24772, i64 1
+ %tmp24774 = getelementptr inbounds float, float* %tmp24773, i64 1
+ %tmp24775 = getelementptr inbounds float, float* %tmp24774, i64 1
+ %tmp24776 = getelementptr inbounds float, float* %tmp24775, i64 1
+ %tmp24777 = getelementptr inbounds float, float* %tmp24776, i64 1
+ %tmp24778 = getelementptr inbounds float, float* %tmp24777, i64 1
+ %tmp24779 = getelementptr inbounds float, float* %tmp24778, i64 1
+ %tmp24780 = getelementptr inbounds float, float* %tmp24779, i64 1
+ %tmp24781 = getelementptr inbounds float, float* %tmp24780, i64 1
+ %tmp24782 = getelementptr inbounds float, float* %tmp24781, i64 1
+ %tmp24783 = getelementptr inbounds float, float* %tmp24782, i64 1
+ %tmp24784 = getelementptr inbounds float, float* %tmp24783, i64 1
+ %tmp24785 = getelementptr inbounds float, float* %tmp24784, i64 1
+ %tmp24786 = getelementptr inbounds float, float* %tmp24785, i64 1
+ %tmp24787 = getelementptr inbounds float, float* %tmp24786, i64 1
+ %tmp24788 = getelementptr inbounds float, float* %tmp24787, i64 1
+ %tmp24789 = getelementptr inbounds float, float* %tmp24788, i64 1
+ %tmp24790 = getelementptr inbounds float, float* %tmp24789, i64 1
+ %tmp24791 = getelementptr inbounds float, float* %tmp24790, i64 1
+ %tmp24792 = getelementptr inbounds float, float* %tmp24791, i64 1
+ %tmp24793 = getelementptr inbounds float, float* %tmp24792, i64 1
+ %tmp24794 = getelementptr inbounds float, float* %tmp24793, i64 1
+ %tmp24795 = getelementptr inbounds float, float* %tmp24794, i64 1
+ %tmp24796 = getelementptr inbounds float, float* %tmp24795, i64 1
+ %tmp24797 = getelementptr inbounds float, float* %tmp24796, i64 1
+ %tmp24798 = getelementptr inbounds float, float* %tmp24797, i64 1
+ %tmp24799 = getelementptr inbounds float, float* %tmp24798, i64 1
+ %tmp24800 = getelementptr inbounds float, float* %tmp24799, i64 1
+ %tmp24801 = getelementptr inbounds float, float* %tmp24800, i64 1
+ %tmp24802 = getelementptr inbounds float, float* %tmp24801, i64 1
+ %tmp24803 = getelementptr inbounds float, float* %tmp24802, i64 1
+ %tmp24804 = getelementptr inbounds float, float* %tmp24803, i64 1
+ %tmp24805 = getelementptr inbounds float, float* %tmp24804, i64 1
+ %tmp24806 = getelementptr inbounds float, float* %tmp24805, i64 1
+ %tmp24807 = getelementptr inbounds float, float* %tmp24806, i64 1
+ %tmp24808 = getelementptr inbounds float, float* %tmp24807, i64 1
+ %tmp24809 = getelementptr inbounds float, float* %tmp24808, i64 1
+ %tmp24810 = getelementptr inbounds float, float* %tmp24809, i64 1
+ %tmp24811 = getelementptr inbounds float, float* %tmp24810, i64 1
+ %tmp24812 = getelementptr inbounds float, float* %tmp24811, i64 1
+ %tmp24813 = getelementptr inbounds float, float* %tmp24812, i64 1
+ %tmp24814 = getelementptr inbounds float, float* %tmp24813, i64 1
+ %tmp24815 = getelementptr inbounds float, float* %tmp24814, i64 1
+ %tmp24816 = getelementptr inbounds float, float* %tmp24815, i64 1
+ %tmp24817 = getelementptr inbounds float, float* %tmp24816, i64 1
+ %tmp24818 = getelementptr inbounds float, float* %tmp24817, i64 1
+ %tmp24819 = getelementptr inbounds float, float* %tmp24818, i64 1
+ %tmp24820 = getelementptr inbounds float, float* %tmp24819, i64 1
+ %tmp24821 = getelementptr inbounds float, float* %tmp24820, i64 1
+ %tmp24822 = getelementptr inbounds float, float* %tmp24821, i64 1
+ %tmp24823 = getelementptr inbounds float, float* %tmp24822, i64 1
+ %tmp24824 = getelementptr inbounds float, float* %tmp24823, i64 1
+ %tmp24825 = getelementptr inbounds float, float* %tmp24824, i64 1
+ %tmp24826 = getelementptr inbounds float, float* %tmp24825, i64 1
+ %tmp24827 = getelementptr inbounds float, float* %tmp24826, i64 1
+ %tmp24828 = getelementptr inbounds float, float* %tmp24827, i64 1
+ %tmp24829 = getelementptr inbounds float, float* %tmp24828, i64 1
+ %tmp24830 = getelementptr inbounds float, float* %tmp24829, i64 1
+ %tmp24831 = getelementptr inbounds float, float* %tmp24830, i64 1
+ %tmp24832 = getelementptr inbounds float, float* %tmp24831, i64 1
+ %tmp24833 = getelementptr inbounds float, float* %tmp24832, i64 1
+ %tmp24834 = getelementptr inbounds float, float* %tmp24833, i64 1
+ %tmp24835 = getelementptr inbounds float, float* %tmp24834, i64 1
+ %tmp24836 = getelementptr inbounds float, float* %tmp24835, i64 1
+ %tmp24837 = getelementptr inbounds float, float* %tmp24836, i64 1
+ %tmp24838 = getelementptr inbounds float, float* %tmp24837, i64 1
+ %tmp24839 = getelementptr inbounds float, float* %tmp24838, i64 1
+ %tmp24840 = getelementptr inbounds float, float* %tmp24839, i64 1
+ %tmp24841 = getelementptr inbounds float, float* %tmp24840, i64 1
+ %tmp24842 = getelementptr inbounds float, float* %tmp24841, i64 1
+ %tmp24843 = getelementptr inbounds float, float* %tmp24842, i64 1
+ %tmp24844 = getelementptr inbounds float, float* %tmp24843, i64 1
+ %tmp24845 = getelementptr inbounds float, float* %tmp24844, i64 1
+ %tmp24846 = getelementptr inbounds float, float* %tmp24845, i64 1
+ %tmp24847 = getelementptr inbounds float, float* %tmp24846, i64 1
+ %tmp24848 = getelementptr inbounds float, float* %tmp24847, i64 1
+ %tmp24849 = getelementptr inbounds float, float* %tmp24848, i64 1
+ %tmp24850 = getelementptr inbounds float, float* %tmp24849, i64 1
+ %tmp24851 = getelementptr inbounds float, float* %tmp24850, i64 1
+ %tmp24852 = getelementptr inbounds float, float* %tmp24851, i64 1
+ %tmp24853 = getelementptr inbounds float, float* %tmp24852, i64 1
+ %tmp24854 = getelementptr inbounds float, float* %tmp24853, i64 1
+ %tmp24855 = getelementptr inbounds float, float* %tmp24854, i64 1
+ %tmp24856 = getelementptr inbounds float, float* %tmp24855, i64 1
+ %tmp24857 = getelementptr inbounds float, float* %tmp24856, i64 1
+ %tmp24858 = getelementptr inbounds float, float* %tmp24857, i64 1
+ %tmp24859 = getelementptr inbounds float, float* %tmp24858, i64 1
+ %tmp24860 = getelementptr inbounds float, float* %tmp24859, i64 1
+ %tmp24861 = getelementptr inbounds float, float* %tmp24860, i64 1
+ %tmp24862 = getelementptr inbounds float, float* %tmp24861, i64 1
+ %tmp24863 = getelementptr inbounds float, float* %tmp24862, i64 1
+ %tmp24864 = getelementptr inbounds float, float* %tmp24863, i64 1
+ %tmp24865 = getelementptr inbounds float, float* %tmp24864, i64 1
+ %tmp24866 = getelementptr inbounds float, float* %tmp24865, i64 1
+ %tmp24867 = getelementptr inbounds float, float* %tmp24866, i64 1
+ %tmp24868 = getelementptr inbounds float, float* %tmp24867, i64 1
+ %tmp24869 = getelementptr inbounds float, float* %tmp24868, i64 1
+ %tmp24870 = getelementptr inbounds float, float* %tmp24869, i64 1
+ %tmp24871 = getelementptr inbounds float, float* %tmp24870, i64 1
+ %tmp24872 = getelementptr inbounds float, float* %tmp24871, i64 1
+ %tmp24873 = getelementptr inbounds float, float* %tmp24872, i64 1
+ %tmp24874 = getelementptr inbounds float, float* %tmp24873, i64 1
+ %tmp24875 = getelementptr inbounds float, float* %tmp24874, i64 1
+ %tmp24876 = getelementptr inbounds float, float* %tmp24875, i64 1
+ %tmp24877 = getelementptr inbounds float, float* %tmp24876, i64 1
+ %tmp24878 = getelementptr inbounds float, float* %tmp24877, i64 1
+ %tmp24879 = getelementptr inbounds float, float* %tmp24878, i64 1
+ %tmp24880 = getelementptr inbounds float, float* %tmp24879, i64 1
+ %tmp24881 = getelementptr inbounds float, float* %tmp24880, i64 1
+ %tmp24882 = getelementptr inbounds float, float* %tmp24881, i64 1
+ %tmp24883 = getelementptr inbounds float, float* %tmp24882, i64 1
+ %tmp24884 = getelementptr inbounds float, float* %tmp24883, i64 1
+ %tmp24885 = getelementptr inbounds float, float* %tmp24884, i64 1
+ %tmp24886 = getelementptr inbounds float, float* %tmp24885, i64 1
+ %tmp24887 = getelementptr inbounds float, float* %tmp24886, i64 1
+ %tmp24888 = getelementptr inbounds float, float* %tmp24887, i64 1
+ %tmp24889 = getelementptr inbounds float, float* %tmp24888, i64 1
+ %tmp24890 = getelementptr inbounds float, float* %tmp24889, i64 1
+ %tmp24891 = getelementptr inbounds float, float* %tmp24890, i64 1
+ %tmp24892 = getelementptr inbounds float, float* %tmp24891, i64 1
+ %tmp24893 = getelementptr inbounds float, float* %tmp24892, i64 1
+ %tmp24894 = getelementptr inbounds float, float* %tmp24893, i64 1
+ %tmp24895 = getelementptr inbounds float, float* %tmp24894, i64 1
+ %tmp24896 = getelementptr inbounds float, float* %tmp24895, i64 1
+ %tmp24897 = getelementptr inbounds float, float* %tmp24896, i64 1
+ %tmp24898 = getelementptr inbounds float, float* %tmp24897, i64 1
+ %tmp24899 = getelementptr inbounds float, float* %tmp24898, i64 1
+ %tmp24900 = getelementptr inbounds float, float* %tmp24899, i64 1
+ %tmp24901 = getelementptr inbounds float, float* %tmp24900, i64 1
+ %tmp24902 = getelementptr inbounds float, float* %tmp24901, i64 1
+ %tmp24903 = getelementptr inbounds float, float* %tmp24902, i64 1
+ %tmp24904 = getelementptr inbounds float, float* %tmp24903, i64 1
+ %tmp24905 = getelementptr inbounds float, float* %tmp24904, i64 1
+ %tmp24906 = getelementptr inbounds float, float* %tmp24905, i64 1
+ %tmp24907 = getelementptr inbounds float, float* %tmp24906, i64 1
+ %tmp24908 = getelementptr inbounds float, float* %tmp24907, i64 1
+ %tmp24909 = getelementptr inbounds float, float* %tmp24908, i64 1
+ %tmp24910 = getelementptr inbounds float, float* %tmp24909, i64 1
+ %tmp24911 = getelementptr inbounds float, float* %tmp24910, i64 1
+ %tmp24912 = getelementptr inbounds float, float* %tmp24911, i64 1
+ %tmp24913 = getelementptr inbounds float, float* %tmp24912, i64 1
+ %tmp24914 = getelementptr inbounds float, float* %tmp24913, i64 1
+ %tmp24915 = getelementptr inbounds float, float* %tmp24914, i64 1
+ %tmp24916 = getelementptr inbounds float, float* %tmp24915, i64 1
+ %tmp24917 = getelementptr inbounds float, float* %tmp24916, i64 1
+ %tmp24918 = getelementptr inbounds float, float* %tmp24917, i64 1
+ %tmp24919 = getelementptr inbounds float, float* %tmp24918, i64 1
+ %tmp24920 = getelementptr inbounds float, float* %tmp24919, i64 1
+ %tmp24921 = getelementptr inbounds float, float* %tmp24920, i64 1
+ %tmp24922 = getelementptr inbounds float, float* %tmp24921, i64 1
+ %tmp24923 = getelementptr inbounds float, float* %tmp24922, i64 1
+ %tmp24924 = getelementptr inbounds float, float* %tmp24923, i64 1
+ %tmp24925 = getelementptr inbounds float, float* %tmp24924, i64 1
+ %tmp24926 = getelementptr inbounds float, float* %tmp24925, i64 1
+ %tmp24927 = getelementptr inbounds float, float* %tmp24926, i64 1
+ %tmp24928 = getelementptr inbounds float, float* %tmp24927, i64 1
+ %tmp24929 = getelementptr inbounds float, float* %tmp24928, i64 1
+ %tmp24930 = getelementptr inbounds float, float* %tmp24929, i64 1
+ %tmp24931 = getelementptr inbounds float, float* %tmp24930, i64 1
+ %tmp24932 = getelementptr inbounds float, float* %tmp24931, i64 1
+ %tmp24933 = getelementptr inbounds float, float* %tmp24932, i64 1
+ %tmp24934 = getelementptr inbounds float, float* %tmp24933, i64 1
+ %tmp24935 = getelementptr inbounds float, float* %tmp24934, i64 1
+ %tmp24936 = getelementptr inbounds float, float* %tmp24935, i64 1
+ %tmp24937 = getelementptr inbounds float, float* %tmp24936, i64 1
+ %tmp24938 = getelementptr inbounds float, float* %tmp24937, i64 1
+ %tmp24939 = getelementptr inbounds float, float* %tmp24938, i64 1
+ %tmp24940 = getelementptr inbounds float, float* %tmp24939, i64 1
+ %tmp24941 = getelementptr inbounds float, float* %tmp24940, i64 1
+ %tmp24942 = getelementptr inbounds float, float* %tmp24941, i64 1
+ %tmp24943 = getelementptr inbounds float, float* %tmp24942, i64 1
+ %tmp24944 = getelementptr inbounds float, float* %tmp24943, i64 1
+ %tmp24945 = getelementptr inbounds float, float* %tmp24944, i64 1
+ %tmp24946 = getelementptr inbounds float, float* %tmp24945, i64 1
store float 0x3F43FD0D00000000, float* %tmp24946
- %tmp24947 = getelementptr inbounds float* undef, i64 1
- %tmp24948 = getelementptr inbounds float* undef, i64 1
- %tmp24949 = getelementptr inbounds float* undef, i64 1
- %tmp24950 = getelementptr inbounds float* undef, i64 1
- %tmp24951 = getelementptr inbounds float* %tmp24950, i64 1
- %tmp24952 = getelementptr inbounds float* undef, i64 1
- %tmp24953 = getelementptr inbounds float* undef, i64 1
- %tmp24954 = getelementptr inbounds float* undef, i64 1
- %tmp24955 = getelementptr inbounds float* undef, i64 1
- %tmp24956 = getelementptr inbounds float* undef, i64 1
- %tmp24957 = getelementptr inbounds float* undef, i64 1
- %tmp24958 = getelementptr inbounds float* %tmp24957, i64 1
- %tmp24959 = getelementptr inbounds float* undef, i64 1
- %tmp24960 = getelementptr inbounds float* undef, i64 1
- %tmp24961 = getelementptr inbounds float* undef, i64 1
- %tmp24962 = getelementptr inbounds float* undef, i64 1
- %tmp24963 = getelementptr inbounds float* undef, i64 1
- %tmp24964 = getelementptr inbounds float* undef, i64 1
- %tmp24965 = getelementptr inbounds float* undef, i64 1
- %tmp24966 = getelementptr inbounds float* %tmp24965, i64 1
- %tmp24967 = getelementptr inbounds float* undef, i64 1
- %tmp24968 = getelementptr inbounds float* undef, i64 1
- %tmp24969 = getelementptr inbounds float* undef, i64 1
- %tmp24970 = getelementptr inbounds float* undef, i64 1
- %tmp24971 = getelementptr inbounds float* %tmp24970, i64 1
- %tmp24972 = getelementptr inbounds float* %tmp24971, i64 1
- %tmp24973 = getelementptr inbounds float* %tmp24972, i64 1
- %tmp24974 = getelementptr inbounds float* undef, i64 1
- %tmp24975 = getelementptr inbounds float* undef, i64 1
- %tmp24976 = getelementptr inbounds float* %tmp24975, i64 1
- %tmp24977 = getelementptr inbounds float* undef, i64 1
- %tmp24978 = getelementptr inbounds float* undef, i64 1
- %tmp24979 = getelementptr inbounds float* undef, i64 1
- %tmp24980 = getelementptr inbounds float* undef, i64 1
- %tmp24981 = getelementptr inbounds float* undef, i64 1
- %tmp24982 = getelementptr inbounds float* undef, i64 1
- %tmp24983 = getelementptr inbounds float* %tmp24982, i64 1
- %tmp24984 = getelementptr inbounds float* undef, i64 1
- %tmp24985 = getelementptr inbounds float* %tmp24984, i64 1
- %tmp24986 = getelementptr inbounds float* undef, i64 1
- %tmp24987 = getelementptr inbounds float* %tmp24986, i64 1
- %tmp24988 = getelementptr inbounds float* %tmp24987, i64 1
- %tmp24989 = getelementptr inbounds float* undef, i64 1
- %tmp24990 = getelementptr inbounds float* undef, i64 1
- %tmp24991 = getelementptr inbounds float* %tmp24990, i64 1
- %tmp24992 = getelementptr inbounds float* undef, i64 1
- %tmp24993 = getelementptr inbounds float* %tmp24992, i64 1
- %tmp24994 = getelementptr inbounds float* %tmp24993, i64 1
- %tmp24995 = getelementptr inbounds float* undef, i64 1
- %tmp24996 = getelementptr inbounds float* undef, i64 1
- %tmp24997 = getelementptr inbounds float* undef, i64 1
- %tmp24998 = getelementptr inbounds float* undef, i64 1
- %tmp24999 = getelementptr inbounds float* undef, i64 1
- %tmp25000 = getelementptr inbounds float* undef, i64 1
- %tmp25001 = getelementptr inbounds float* undef, i64 1
- %tmp25002 = getelementptr inbounds float* undef, i64 1
- %tmp25003 = getelementptr inbounds float* undef, i64 1
- %tmp25004 = getelementptr inbounds float* undef, i64 1
- %tmp25005 = getelementptr inbounds float* undef, i64 1
- %tmp25006 = getelementptr inbounds float* undef, i64 1
- %tmp25007 = getelementptr inbounds float* undef, i64 1
- %tmp25008 = getelementptr inbounds float* undef, i64 1
- %tmp25009 = getelementptr inbounds float* undef, i64 1
- %tmp25010 = getelementptr inbounds float* undef, i64 1
- %tmp25011 = getelementptr inbounds float* undef, i64 1
- %tmp25012 = getelementptr inbounds float* %tmp25011, i64 1
- %tmp25013 = getelementptr inbounds float* undef, i64 1
- %tmp25014 = getelementptr inbounds float* undef, i64 1
- %tmp25015 = getelementptr inbounds float* undef, i64 1
- %tmp25016 = getelementptr inbounds float* undef, i64 1
- %tmp25017 = getelementptr inbounds float* %tmp25016, i64 1
- %tmp25018 = getelementptr inbounds float* undef, i64 1
- %tmp25019 = getelementptr inbounds float* undef, i64 1
- %tmp25020 = getelementptr inbounds float* undef, i64 1
- %tmp25021 = getelementptr inbounds float* undef, i64 1
- %tmp25022 = getelementptr inbounds float* undef, i64 1
- %tmp25023 = getelementptr inbounds float* %tmp25022, i64 1
- %tmp25024 = getelementptr inbounds float* %tmp25023, i64 1
- %tmp25025 = getelementptr inbounds float* undef, i64 1
- %tmp25026 = getelementptr inbounds float* undef, i64 1
- %tmp25027 = getelementptr inbounds float* undef, i64 1
- %tmp25028 = getelementptr inbounds float* undef, i64 1
- %tmp25029 = getelementptr inbounds float* undef, i64 1
- %tmp25030 = getelementptr inbounds float* undef, i64 1
- %tmp25031 = getelementptr inbounds float* undef, i64 1
- %tmp25032 = getelementptr inbounds float* undef, i64 1
- %tmp25033 = getelementptr inbounds float* undef, i64 1
- %tmp25034 = getelementptr inbounds float* undef, i64 1
- %tmp25035 = getelementptr inbounds float* %tmp25034, i64 1
- %tmp25036 = getelementptr inbounds float* undef, i64 1
- %tmp25037 = getelementptr inbounds float* undef, i64 1
- %tmp25038 = getelementptr inbounds float* %tmp25037, i64 1
- %tmp25039 = getelementptr inbounds float* undef, i64 1
- %tmp25040 = getelementptr inbounds float* undef, i64 1
- %tmp25041 = getelementptr inbounds float* undef, i64 1
- %tmp25042 = getelementptr inbounds float* undef, i64 1
- %tmp25043 = getelementptr inbounds float* undef, i64 1
- %tmp25044 = getelementptr inbounds float* undef, i64 1
- %tmp25045 = getelementptr inbounds float* %tmp25044, i64 1
- %tmp25046 = getelementptr inbounds float* undef, i64 1
- %tmp25047 = getelementptr inbounds float* %tmp25046, i64 1
- %tmp25048 = getelementptr inbounds float* undef, i64 1
- %tmp25049 = getelementptr inbounds float* %tmp25048, i64 1
- %tmp25050 = getelementptr inbounds float* %tmp25049, i64 1
- %tmp25051 = getelementptr inbounds float* undef, i64 1
- %tmp25052 = getelementptr inbounds float* undef, i64 1
- %tmp25053 = getelementptr inbounds float* undef, i64 1
- %tmp25054 = getelementptr inbounds float* undef, i64 1
- %tmp25055 = getelementptr inbounds float* undef, i64 1
- %tmp25056 = getelementptr inbounds float* undef, i64 1
- %tmp25057 = getelementptr inbounds float* undef, i64 1
- %tmp25058 = getelementptr inbounds float* undef, i64 1
- %tmp25059 = getelementptr inbounds float* undef, i64 1
- %tmp25060 = getelementptr inbounds float* undef, i64 1
- %tmp25061 = getelementptr inbounds float* undef, i64 1
- %tmp25062 = getelementptr inbounds float* undef, i64 1
- %tmp25063 = getelementptr inbounds float* undef, i64 1
- %tmp25064 = getelementptr inbounds float* undef, i64 1
- %tmp25065 = getelementptr inbounds float* undef, i64 1
- %tmp25066 = getelementptr inbounds float* undef, i64 1
- %tmp25067 = getelementptr inbounds float* %tmp25066, i64 1
- %tmp25068 = getelementptr inbounds float* undef, i64 1
- %tmp25069 = getelementptr inbounds float* %tmp25068, i64 1
- %tmp25070 = getelementptr inbounds float* undef, i64 1
- %tmp25071 = getelementptr inbounds float* undef, i64 1
- %tmp25072 = getelementptr inbounds float* undef, i64 1
- %tmp25073 = getelementptr inbounds float* undef, i64 1
- %tmp25074 = getelementptr inbounds float* undef, i64 1
- %tmp25075 = getelementptr inbounds float* %tmp25074, i64 1
- %tmp25076 = getelementptr inbounds float* undef, i64 1
- %tmp25077 = getelementptr inbounds float* undef, i64 1
- %tmp25078 = getelementptr inbounds float* undef, i64 1
- %tmp25079 = getelementptr inbounds float* undef, i64 1
- %tmp25080 = getelementptr inbounds float* undef, i64 1
- %tmp25081 = getelementptr inbounds float* undef, i64 1
- %tmp25082 = getelementptr inbounds float* undef, i64 1
- %tmp25083 = getelementptr inbounds float* undef, i64 1
- %tmp25084 = getelementptr inbounds float* undef, i64 1
- %tmp25085 = getelementptr inbounds float* undef, i64 1
- %tmp25086 = getelementptr inbounds float* undef, i64 1
- %tmp25087 = getelementptr inbounds float* undef, i64 1
- %tmp25088 = getelementptr inbounds float* undef, i64 1
- %tmp25089 = getelementptr inbounds float* undef, i64 1
- %tmp25090 = getelementptr inbounds float* undef, i64 1
- %tmp25091 = getelementptr inbounds float* undef, i64 1
- %tmp25092 = getelementptr inbounds float* undef, i64 1
- %tmp25093 = getelementptr inbounds float* undef, i64 1
- %tmp25094 = getelementptr inbounds float* undef, i64 1
- %tmp25095 = getelementptr inbounds float* %tmp25094, i64 1
- %tmp25096 = getelementptr inbounds float* undef, i64 1
- %tmp25097 = getelementptr inbounds float* %tmp25096, i64 1
- %tmp25098 = getelementptr inbounds float* %tmp25097, i64 1
- %tmp25099 = getelementptr inbounds float* undef, i64 1
- %tmp25100 = getelementptr inbounds float* undef, i64 1
- %tmp25101 = getelementptr inbounds float* undef, i64 1
- %tmp25102 = getelementptr inbounds float* undef, i64 1
- %tmp25103 = getelementptr inbounds float* undef, i64 1
- %tmp25104 = getelementptr inbounds float* undef, i64 1
- %tmp25105 = getelementptr inbounds float* undef, i64 1
- %tmp25106 = getelementptr inbounds float* undef, i64 1
- %tmp25107 = getelementptr inbounds float* %tmp25106, i64 1
- %tmp25108 = getelementptr inbounds float* undef, i64 1
- %tmp25109 = getelementptr inbounds float* undef, i64 1
- %tmp25110 = getelementptr inbounds float* undef, i64 1
- %tmp25111 = getelementptr inbounds float* undef, i64 1
- %tmp25112 = getelementptr inbounds float* undef, i64 1
- %tmp25113 = getelementptr inbounds float* undef, i64 1
- %tmp25114 = getelementptr inbounds float* undef, i64 1
- %tmp25115 = getelementptr inbounds float* undef, i64 1
- %tmp25116 = getelementptr inbounds float* undef, i64 1
- %tmp25117 = getelementptr inbounds float* undef, i64 1
- %tmp25118 = getelementptr inbounds float* undef, i64 1
- %tmp25119 = getelementptr inbounds float* undef, i64 1
- %tmp25120 = getelementptr inbounds float* undef, i64 1
- %tmp25121 = getelementptr inbounds float* undef, i64 1
- %tmp25122 = getelementptr inbounds float* %tmp25121, i64 1
- %tmp25123 = getelementptr inbounds float* undef, i64 1
- %tmp25124 = getelementptr inbounds float* undef, i64 1
- %tmp25125 = getelementptr inbounds float* undef, i64 1
- %tmp25126 = getelementptr inbounds float* undef, i64 1
- %tmp25127 = getelementptr inbounds float* undef, i64 1
- %tmp25128 = getelementptr inbounds float* undef, i64 1
- %tmp25129 = getelementptr inbounds float* undef, i64 1
- %tmp25130 = getelementptr inbounds float* undef, i64 1
- %tmp25131 = getelementptr inbounds float* undef, i64 1
- %tmp25132 = getelementptr inbounds float* undef, i64 1
- %tmp25133 = getelementptr inbounds float* undef, i64 1
- %tmp25134 = getelementptr inbounds float* undef, i64 1
- %tmp25135 = getelementptr inbounds float* undef, i64 1
- %tmp25136 = getelementptr inbounds float* undef, i64 1
- %tmp25137 = getelementptr inbounds float* undef, i64 1
- %tmp25138 = getelementptr inbounds float* undef, i64 1
- %tmp25139 = getelementptr inbounds float* undef, i64 1
- %tmp25140 = getelementptr inbounds float* undef, i64 1
- %tmp25141 = getelementptr inbounds float* undef, i64 1
- %tmp25142 = getelementptr inbounds float* undef, i64 1
- %tmp25143 = getelementptr inbounds float* undef, i64 1
- %tmp25144 = getelementptr inbounds float* undef, i64 1
- %tmp25145 = getelementptr inbounds float* undef, i64 1
- %tmp25146 = getelementptr inbounds float* %tmp25145, i64 1
- %tmp25147 = getelementptr inbounds float* undef, i64 1
- %tmp25148 = getelementptr inbounds float* %tmp25147, i64 1
- %tmp25149 = getelementptr inbounds float* undef, i64 1
- %tmp25150 = getelementptr inbounds float* undef, i64 1
- %tmp25151 = getelementptr inbounds float* undef, i64 1
- %tmp25152 = getelementptr inbounds float* undef, i64 1
- %tmp25153 = getelementptr inbounds float* %tmp25152, i64 1
- %tmp25154 = getelementptr inbounds float* undef, i64 1
- %tmp25155 = getelementptr inbounds float* undef, i64 1
- %tmp25156 = getelementptr inbounds float* undef, i64 1
- %tmp25157 = getelementptr inbounds float* undef, i64 1
- %tmp25158 = getelementptr inbounds float* undef, i64 1
- %tmp25159 = getelementptr inbounds float* undef, i64 1
- %tmp25160 = getelementptr inbounds float* undef, i64 1
- %tmp25161 = getelementptr inbounds float* undef, i64 1
- %tmp25162 = getelementptr inbounds float* %tmp25161, i64 1
- %tmp25163 = getelementptr inbounds float* undef, i64 1
- %tmp25164 = getelementptr inbounds float* undef, i64 1
- %tmp25165 = getelementptr inbounds float* undef, i64 1
- %tmp25166 = getelementptr inbounds float* undef, i64 1
- %tmp25167 = getelementptr inbounds float* undef, i64 1
- %tmp25168 = getelementptr inbounds float* undef, i64 1
- %tmp25169 = getelementptr inbounds float* undef, i64 1
- %tmp25170 = getelementptr inbounds float* %tmp25169, i64 1
- %tmp25171 = getelementptr inbounds float* undef, i64 1
- %tmp25172 = getelementptr inbounds float* undef, i64 1
- %tmp25173 = getelementptr inbounds float* undef, i64 1
- %tmp25174 = getelementptr inbounds float* undef, i64 1
- %tmp25175 = getelementptr inbounds float* %tmp25174, i64 1
- %tmp25176 = getelementptr inbounds float* undef, i64 1
- %tmp25177 = getelementptr inbounds float* undef, i64 1
- %tmp25178 = getelementptr inbounds float* %tmp25177, i64 1
- %tmp25179 = getelementptr inbounds float* undef, i64 1
- %tmp25180 = getelementptr inbounds float* undef, i64 1
- %tmp25181 = getelementptr inbounds float* undef, i64 1
- %tmp25182 = getelementptr inbounds float* undef, i64 1
- %tmp25183 = getelementptr inbounds float* undef, i64 1
- %tmp25184 = getelementptr inbounds float* undef, i64 1
- %tmp25185 = getelementptr inbounds float* undef, i64 1
- %tmp25186 = getelementptr inbounds float* undef, i64 1
- %tmp25187 = getelementptr inbounds float* %tmp25186, i64 1
- %tmp25188 = getelementptr inbounds float* %tmp25187, i64 1
- %tmp25189 = getelementptr inbounds float* undef, i64 1
- %tmp25190 = getelementptr inbounds float* undef, i64 1
- %tmp25191 = getelementptr inbounds float* undef, i64 1
- %tmp25192 = getelementptr inbounds float* %tmp25191, i64 1
- %tmp25193 = getelementptr inbounds float* undef, i64 1
- %tmp25194 = getelementptr inbounds float* undef, i64 1
- %tmp25195 = getelementptr inbounds float* undef, i64 1
- %tmp25196 = getelementptr inbounds float* undef, i64 1
- %tmp25197 = getelementptr inbounds float* undef, i64 1
- %tmp25198 = getelementptr inbounds float* undef, i64 1
- %tmp25199 = getelementptr inbounds float* undef, i64 1
- %tmp25200 = getelementptr inbounds float* undef, i64 1
- %tmp25201 = getelementptr inbounds float* %tmp25200, i64 1
- %tmp25202 = getelementptr inbounds float* undef, i64 1
- %tmp25203 = getelementptr inbounds float* undef, i64 1
- %tmp25204 = getelementptr inbounds float* undef, i64 1
- %tmp25205 = getelementptr inbounds float* undef, i64 1
- %tmp25206 = getelementptr inbounds float* undef, i64 1
- %tmp25207 = getelementptr inbounds float* undef, i64 1
- %tmp25208 = getelementptr inbounds float* undef, i64 1
- %tmp25209 = getelementptr inbounds float* undef, i64 1
- %tmp25210 = getelementptr inbounds float* undef, i64 1
- %tmp25211 = getelementptr inbounds float* undef, i64 1
- %tmp25212 = getelementptr inbounds float* undef, i64 1
- %tmp25213 = getelementptr inbounds float* undef, i64 1
- %tmp25214 = getelementptr inbounds float* undef, i64 1
- %tmp25215 = getelementptr inbounds float* undef, i64 1
- %tmp25216 = getelementptr inbounds float* undef, i64 1
- %tmp25217 = getelementptr inbounds float* undef, i64 1
- %tmp25218 = getelementptr inbounds float* undef, i64 1
- %tmp25219 = getelementptr inbounds float* undef, i64 1
- %tmp25220 = getelementptr inbounds float* undef, i64 1
- %tmp25221 = getelementptr inbounds float* undef, i64 1
- %tmp25222 = getelementptr inbounds float* undef, i64 1
- %tmp25223 = getelementptr inbounds float* undef, i64 1
- %tmp25224 = getelementptr inbounds float* undef, i64 1
- %tmp25225 = getelementptr inbounds float* undef, i64 1
- %tmp25226 = getelementptr inbounds float* undef, i64 1
- %tmp25227 = getelementptr inbounds float* undef, i64 1
- %tmp25228 = getelementptr inbounds float* undef, i64 1
- %tmp25229 = getelementptr inbounds float* undef, i64 1
- %tmp25230 = getelementptr inbounds float* %tmp25229, i64 1
- %tmp25231 = getelementptr inbounds float* undef, i64 1
- %tmp25232 = getelementptr inbounds float* undef, i64 1
- %tmp25233 = getelementptr inbounds float* undef, i64 1
- %tmp25234 = getelementptr inbounds float* undef, i64 1
- %tmp25235 = getelementptr inbounds float* %tmp25234, i64 1
- %tmp25236 = getelementptr inbounds float* undef, i64 1
- %tmp25237 = getelementptr inbounds float* %tmp25236, i64 1
- %tmp25238 = getelementptr inbounds float* undef, i64 1
- %tmp25239 = getelementptr inbounds float* undef, i64 1
- %tmp25240 = getelementptr inbounds float* undef, i64 1
- %tmp25241 = getelementptr inbounds float* undef, i64 1
- %tmp25242 = getelementptr inbounds float* undef, i64 1
- %tmp25243 = getelementptr inbounds float* undef, i64 1
- %tmp25244 = getelementptr inbounds float* undef, i64 1
- %tmp25245 = getelementptr inbounds float* undef, i64 1
- %tmp25246 = getelementptr inbounds float* undef, i64 1
- %tmp25247 = getelementptr inbounds float* undef, i64 1
- %tmp25248 = getelementptr inbounds float* %tmp25247, i64 1
- %tmp25249 = getelementptr inbounds float* undef, i64 1
- %tmp25250 = getelementptr inbounds float* undef, i64 1
- %tmp25251 = getelementptr inbounds float* undef, i64 1
- %tmp25252 = getelementptr inbounds float* undef, i64 1
- %tmp25253 = getelementptr inbounds float* undef, i64 1
- %tmp25254 = getelementptr inbounds float* undef, i64 1
- %tmp25255 = getelementptr inbounds float* undef, i64 1
- %tmp25256 = getelementptr inbounds float* undef, i64 1
- %tmp25257 = getelementptr inbounds float* undef, i64 1
- %tmp25258 = getelementptr inbounds float* undef, i64 1
- %tmp25259 = getelementptr inbounds float* undef, i64 1
- %tmp25260 = getelementptr inbounds float* undef, i64 1
- %tmp25261 = getelementptr inbounds float* undef, i64 1
- %tmp25262 = getelementptr inbounds float* undef, i64 1
- %tmp25263 = getelementptr inbounds float* undef, i64 1
- %tmp25264 = getelementptr inbounds float* undef, i64 1
- %tmp25265 = getelementptr inbounds float* undef, i64 1
- %tmp25266 = getelementptr inbounds float* undef, i64 1
- %tmp25267 = getelementptr inbounds float* undef, i64 1
- %tmp25268 = getelementptr inbounds float* undef, i64 1
- %tmp25269 = getelementptr inbounds float* undef, i64 1
+ %tmp24947 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24948 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24949 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24950 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24951 = getelementptr inbounds float, float* %tmp24950, i64 1
+ %tmp24952 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24953 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24954 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24955 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24956 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24957 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24958 = getelementptr inbounds float, float* %tmp24957, i64 1
+ %tmp24959 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24960 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24961 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24962 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24963 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24964 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24965 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24966 = getelementptr inbounds float, float* %tmp24965, i64 1
+ %tmp24967 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24968 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24969 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24970 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24971 = getelementptr inbounds float, float* %tmp24970, i64 1
+ %tmp24972 = getelementptr inbounds float, float* %tmp24971, i64 1
+ %tmp24973 = getelementptr inbounds float, float* %tmp24972, i64 1
+ %tmp24974 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24975 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24976 = getelementptr inbounds float, float* %tmp24975, i64 1
+ %tmp24977 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24978 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24979 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24980 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24981 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24982 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24983 = getelementptr inbounds float, float* %tmp24982, i64 1
+ %tmp24984 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24985 = getelementptr inbounds float, float* %tmp24984, i64 1
+ %tmp24986 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24987 = getelementptr inbounds float, float* %tmp24986, i64 1
+ %tmp24988 = getelementptr inbounds float, float* %tmp24987, i64 1
+ %tmp24989 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24990 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24991 = getelementptr inbounds float, float* %tmp24990, i64 1
+ %tmp24992 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24993 = getelementptr inbounds float, float* %tmp24992, i64 1
+ %tmp24994 = getelementptr inbounds float, float* %tmp24993, i64 1
+ %tmp24995 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24996 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24997 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24998 = getelementptr inbounds float, float* undef, i64 1
+ %tmp24999 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25000 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25001 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25002 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25003 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25004 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25005 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25006 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25007 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25008 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25009 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25010 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25011 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25012 = getelementptr inbounds float, float* %tmp25011, i64 1
+ %tmp25013 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25014 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25015 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25016 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25017 = getelementptr inbounds float, float* %tmp25016, i64 1
+ %tmp25018 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25019 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25020 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25021 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25022 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25023 = getelementptr inbounds float, float* %tmp25022, i64 1
+ %tmp25024 = getelementptr inbounds float, float* %tmp25023, i64 1
+ %tmp25025 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25026 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25027 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25028 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25029 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25030 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25031 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25032 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25033 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25034 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25035 = getelementptr inbounds float, float* %tmp25034, i64 1
+ %tmp25036 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25037 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25038 = getelementptr inbounds float, float* %tmp25037, i64 1
+ %tmp25039 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25040 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25041 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25042 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25043 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25044 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25045 = getelementptr inbounds float, float* %tmp25044, i64 1
+ %tmp25046 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25047 = getelementptr inbounds float, float* %tmp25046, i64 1
+ %tmp25048 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25049 = getelementptr inbounds float, float* %tmp25048, i64 1
+ %tmp25050 = getelementptr inbounds float, float* %tmp25049, i64 1
+ %tmp25051 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25052 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25053 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25054 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25055 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25056 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25057 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25058 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25059 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25060 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25061 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25062 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25063 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25064 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25065 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25066 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25067 = getelementptr inbounds float, float* %tmp25066, i64 1
+ %tmp25068 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25069 = getelementptr inbounds float, float* %tmp25068, i64 1
+ %tmp25070 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25071 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25072 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25073 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25074 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25075 = getelementptr inbounds float, float* %tmp25074, i64 1
+ %tmp25076 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25077 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25078 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25079 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25080 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25081 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25082 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25083 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25084 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25085 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25086 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25087 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25088 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25089 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25090 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25091 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25092 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25093 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25094 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25095 = getelementptr inbounds float, float* %tmp25094, i64 1
+ %tmp25096 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25097 = getelementptr inbounds float, float* %tmp25096, i64 1
+ %tmp25098 = getelementptr inbounds float, float* %tmp25097, i64 1
+ %tmp25099 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25100 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25101 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25102 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25103 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25104 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25105 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25106 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25107 = getelementptr inbounds float, float* %tmp25106, i64 1
+ %tmp25108 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25109 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25110 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25111 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25112 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25113 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25114 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25115 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25116 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25117 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25118 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25119 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25120 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25121 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25122 = getelementptr inbounds float, float* %tmp25121, i64 1
+ %tmp25123 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25124 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25125 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25126 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25127 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25128 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25129 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25130 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25131 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25132 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25133 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25134 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25135 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25136 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25137 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25138 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25139 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25140 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25141 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25142 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25143 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25144 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25145 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25146 = getelementptr inbounds float, float* %tmp25145, i64 1
+ %tmp25147 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25148 = getelementptr inbounds float, float* %tmp25147, i64 1
+ %tmp25149 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25150 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25151 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25152 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25153 = getelementptr inbounds float, float* %tmp25152, i64 1
+ %tmp25154 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25155 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25156 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25157 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25158 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25159 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25160 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25161 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25162 = getelementptr inbounds float, float* %tmp25161, i64 1
+ %tmp25163 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25164 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25165 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25166 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25167 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25168 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25169 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25170 = getelementptr inbounds float, float* %tmp25169, i64 1
+ %tmp25171 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25172 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25173 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25174 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25175 = getelementptr inbounds float, float* %tmp25174, i64 1
+ %tmp25176 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25177 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25178 = getelementptr inbounds float, float* %tmp25177, i64 1
+ %tmp25179 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25180 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25181 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25182 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25183 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25184 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25185 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25186 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25187 = getelementptr inbounds float, float* %tmp25186, i64 1
+ %tmp25188 = getelementptr inbounds float, float* %tmp25187, i64 1
+ %tmp25189 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25190 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25191 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25192 = getelementptr inbounds float, float* %tmp25191, i64 1
+ %tmp25193 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25194 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25195 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25196 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25197 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25198 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25199 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25200 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25201 = getelementptr inbounds float, float* %tmp25200, i64 1
+ %tmp25202 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25203 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25204 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25205 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25206 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25207 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25208 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25209 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25210 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25211 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25212 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25213 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25214 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25215 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25216 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25217 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25218 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25219 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25220 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25221 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25222 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25223 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25224 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25225 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25226 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25227 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25228 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25229 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25230 = getelementptr inbounds float, float* %tmp25229, i64 1
+ %tmp25231 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25232 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25233 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25234 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25235 = getelementptr inbounds float, float* %tmp25234, i64 1
+ %tmp25236 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25237 = getelementptr inbounds float, float* %tmp25236, i64 1
+ %tmp25238 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25239 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25240 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25241 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25242 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25243 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25244 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25245 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25246 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25247 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25248 = getelementptr inbounds float, float* %tmp25247, i64 1
+ %tmp25249 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25250 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25251 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25252 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25253 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25254 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25255 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25256 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25257 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25258 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25259 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25260 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25261 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25262 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25263 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25264 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25265 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25266 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25267 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25268 = getelementptr inbounds float, float* undef, i64 1
+ %tmp25269 = getelementptr inbounds float, float* undef, i64 1
br i1 undef, label %bb25270, label %bb25271
bb25270: ; preds = %bb2
@@ -25487,11 +25487,11 @@ bb25332: ; preds = %bb25330, %bb25305
br i1 undef, label %bb25333, label %bb25357
bb25333: ; preds = %bb25332
- invoke void (...)* @printf()
+ invoke void (...) @printf()
to label %bb25334 unwind label %bb25324
bb25334: ; preds = %bb25333
- invoke void (...)* @printf(i32 undef)
+ invoke void (...) @printf(i32 undef)
to label %bb25335 unwind label %bb25324
bb25335: ; preds = %bb25334
@@ -25501,7 +25501,7 @@ bb25336: ; preds = %bb25338, %bb25335
br i1 undef, label %bb25337, label %bb25339
bb25337: ; preds = %bb25336
- invoke void (...)* @printf(i32 undef, double undef)
+ invoke void (...) @printf(i32 undef, double undef)
to label %bb25338 unwind label %bb25324
bb25338: ; preds = %bb25337
@@ -25517,11 +25517,11 @@ bb25341: ; preds = %bb25340
br label %bb25340
bb25342: ; preds = %bb25340
- invoke void (...)* @printf()
+ invoke void (...) @printf()
to label %bb25343 unwind label %bb25324
bb25343: ; preds = %bb25342
- invoke void (...)* @printf(double undef, double undef)
+ invoke void (...) @printf(double undef, double undef)
to label %bb25344 unwind label %bb25324
bb25344: ; preds = %bb25343
@@ -25547,15 +25547,15 @@ bb25350: ; preds = %bb25349
br label %bb25349
bb25351: ; preds = %bb25349
- invoke void (...)* @printf()
+ invoke void (...) @printf()
to label %bb25352 unwind label %bb25355
bb25352: ; preds = %bb25351
- invoke void (...)* @printf(double undef)
+ invoke void (...) @printf(double undef)
to label %bb25353 unwind label %bb25355
bb25353: ; preds = %bb25352
- invoke void (...)* @printf()
+ invoke void (...) @printf()
to label %bb25354 unwind label %bb25355
bb25354: ; preds = %bb25353
@@ -25567,7 +25567,7 @@ bb25355: ; preds = %bb25353, %bb25352,
br label %bb25359
bb25357: ; preds = %bb25332
- invoke void (...)* @printf()
+ invoke void (...) @printf()
to label %bb25358 unwind label %bb25324
bb25358: ; preds = %bb25357, %bb25354
diff --git a/test/CodeGen/X86/large-gep-scale.ll b/test/CodeGen/X86/large-gep-scale.ll
index 143294e8b07f..8e6e4d23a818 100644
--- a/test/CodeGen/X86/large-gep-scale.ll
+++ b/test/CodeGen/X86/large-gep-scale.ll
@@ -7,6 +7,6 @@
; CHECK: shll $2, %edx
define fastcc i32* @_ada_smkr([2147483647 x i32]* %u, i32 %t) nounwind {
- %x = getelementptr [2147483647 x i32]* %u, i32 %t, i32 0
+ %x = getelementptr [2147483647 x i32], [2147483647 x i32]* %u, i32 %t, i32 0
ret i32* %x
}
diff --git a/test/CodeGen/X86/ldzero.ll b/test/CodeGen/X86/ldzero.ll
index dab04bc353c6..3befa8a99b34 100644
--- a/test/CodeGen/X86/ldzero.ll
+++ b/test/CodeGen/X86/ldzero.ll
@@ -11,15 +11,15 @@ entry:
%d = alloca double, align 8 ; <double*> [#uses=2]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store double 0.000000e+00, double* %d, align 8
- %tmp1 = load double* %d, align 8 ; <double> [#uses=1]
+ %tmp1 = load double, double* %d, align 8 ; <double> [#uses=1]
%tmp12 = fpext double %tmp1 to x86_fp80 ; <x86_fp80> [#uses=1]
store x86_fp80 %tmp12, x86_fp80* %tmp, align 16
- %tmp3 = load x86_fp80* %tmp, align 16 ; <x86_fp80> [#uses=1]
+ %tmp3 = load x86_fp80, x86_fp80* %tmp, align 16 ; <x86_fp80> [#uses=1]
store x86_fp80 %tmp3, x86_fp80* %retval, align 16
br label %return
return: ; preds = %entry
- %retval4 = load x86_fp80* %retval ; <x86_fp80> [#uses=1]
+ %retval4 = load x86_fp80, x86_fp80* %retval ; <x86_fp80> [#uses=1]
ret x86_fp80 %retval4
}
@@ -30,14 +30,14 @@ entry:
%ld = alloca x86_fp80, align 16 ; <x86_fp80*> [#uses=2]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store x86_fp80 0xK00000000000000000000, x86_fp80* %ld, align 16
- %tmp1 = load x86_fp80* %ld, align 16 ; <x86_fp80> [#uses=1]
+ %tmp1 = load x86_fp80, x86_fp80* %ld, align 16 ; <x86_fp80> [#uses=1]
%tmp12 = fptrunc x86_fp80 %tmp1 to double ; <double> [#uses=1]
store double %tmp12, double* %tmp, align 8
- %tmp3 = load double* %tmp, align 8 ; <double> [#uses=1]
+ %tmp3 = load double, double* %tmp, align 8 ; <double> [#uses=1]
store double %tmp3, double* %retval, align 8
br label %return
return: ; preds = %entry
- %retval4 = load double* %retval ; <double> [#uses=1]
+ %retval4 = load double, double* %retval ; <double> [#uses=1]
ret double %retval4
}
diff --git a/test/CodeGen/X86/lea-5.ll b/test/CodeGen/X86/lea-5.ll
index 50d3aaf4c594..b89c199e7197 100644
--- a/test/CodeGen/X86/lea-5.ll
+++ b/test/CodeGen/X86/lea-5.ll
@@ -14,11 +14,11 @@ entry:
while.cond: ; preds = %while.cond, %entry
%d.addr.0 = phi i32 [ %d, %entry ], [ %inc, %while.cond ]
- %arrayidx = getelementptr inbounds [8 x i32]* %a, i32 0, i32 %d.addr.0
+ %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %a, i32 0, i32 %d.addr.0
; CHECK: leaq -40(%rsp,%r{{[^,]*}},4), %rax
; X32: leal -40(%rsp,%r{{[^,]*}},4), %eax
- %0 = load i32* %arrayidx, align 4
+ %0 = load i32, i32* %arrayidx, align 4
%cmp1 = icmp eq i32 %0, 0
%inc = add nsw i32 %d.addr.0, 1
@@ -41,11 +41,11 @@ entry:
while.cond: ; preds = %while.cond, %entry
%d.addr.0 = phi i32 [ %d, %entry ], [ %inc, %while.cond ]
- %arrayidx = getelementptr inbounds [8 x i32]* %a, i32 0, i32 %d.addr.0
+ %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %a, i32 0, i32 %d.addr.0
; CHECK: leaq (%rsp,%r{{[^,]*}},4), %rax
; X32: leal (%rsp,%r{{[^,]*}},4), %eax
- %0 = load i32* %arrayidx, align 4
+ %0 = load i32, i32* %arrayidx, align 4
%cmp1 = icmp eq i32 %0, 0
%inc = add nsw i32 %d.addr.0, 1
diff --git a/test/CodeGen/X86/lea-recursion.ll b/test/CodeGen/X86/lea-recursion.ll
index 9480600312ce..55bcd7819c37 100644
--- a/test/CodeGen/X86/lea-recursion.ll
+++ b/test/CodeGen/X86/lea-recursion.ll
@@ -13,34 +13,34 @@
define void @foo() {
entry:
- %tmp4 = load i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 0) ; <i32> [#uses=1]
- %tmp8 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 0) ; <i32> [#uses=1]
+ %tmp4 = load i32, i32* getelementptr ([1000 x i32], [1000 x i32]* @g0, i32 0, i32 0) ; <i32> [#uses=1]
+ %tmp8 = load i32, i32* getelementptr ([1000 x i32], [1000 x i32]* @g1, i32 0, i32 0) ; <i32> [#uses=1]
%tmp9 = add i32 %tmp4, 1 ; <i32> [#uses=1]
%tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=2]
- store i32 %tmp10, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 1)
- %tmp8.1 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 1) ; <i32> [#uses=1]
+ store i32 %tmp10, i32* getelementptr ([1000 x i32], [1000 x i32]* @g0, i32 0, i32 1)
+ %tmp8.1 = load i32, i32* getelementptr ([1000 x i32], [1000 x i32]* @g1, i32 0, i32 1) ; <i32> [#uses=1]
%tmp9.1 = add i32 %tmp10, 1 ; <i32> [#uses=1]
%tmp10.1 = add i32 %tmp9.1, %tmp8.1 ; <i32> [#uses=2]
- store i32 %tmp10.1, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 2)
- %tmp8.2 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 2) ; <i32> [#uses=1]
+ store i32 %tmp10.1, i32* getelementptr ([1000 x i32], [1000 x i32]* @g0, i32 0, i32 2)
+ %tmp8.2 = load i32, i32* getelementptr ([1000 x i32], [1000 x i32]* @g1, i32 0, i32 2) ; <i32> [#uses=1]
%tmp9.2 = add i32 %tmp10.1, 1 ; <i32> [#uses=1]
%tmp10.2 = add i32 %tmp9.2, %tmp8.2 ; <i32> [#uses=2]
- store i32 %tmp10.2, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 3)
- %tmp8.3 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 3) ; <i32> [#uses=1]
+ store i32 %tmp10.2, i32* getelementptr ([1000 x i32], [1000 x i32]* @g0, i32 0, i32 3)
+ %tmp8.3 = load i32, i32* getelementptr ([1000 x i32], [1000 x i32]* @g1, i32 0, i32 3) ; <i32> [#uses=1]
%tmp9.3 = add i32 %tmp10.2, 1 ; <i32> [#uses=1]
%tmp10.3 = add i32 %tmp9.3, %tmp8.3 ; <i32> [#uses=2]
- store i32 %tmp10.3, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 4)
- %tmp8.4 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 4) ; <i32> [#uses=1]
+ store i32 %tmp10.3, i32* getelementptr ([1000 x i32], [1000 x i32]* @g0, i32 0, i32 4)
+ %tmp8.4 = load i32, i32* getelementptr ([1000 x i32], [1000 x i32]* @g1, i32 0, i32 4) ; <i32> [#uses=1]
%tmp9.4 = add i32 %tmp10.3, 1 ; <i32> [#uses=1]
%tmp10.4 = add i32 %tmp9.4, %tmp8.4 ; <i32> [#uses=2]
- store i32 %tmp10.4, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 5)
- %tmp8.5 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 5) ; <i32> [#uses=1]
+ store i32 %tmp10.4, i32* getelementptr ([1000 x i32], [1000 x i32]* @g0, i32 0, i32 5)
+ %tmp8.5 = load i32, i32* getelementptr ([1000 x i32], [1000 x i32]* @g1, i32 0, i32 5) ; <i32> [#uses=1]
%tmp9.5 = add i32 %tmp10.4, 1 ; <i32> [#uses=1]
%tmp10.5 = add i32 %tmp9.5, %tmp8.5 ; <i32> [#uses=2]
- store i32 %tmp10.5, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 6)
- %tmp8.6 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 6) ; <i32> [#uses=1]
+ store i32 %tmp10.5, i32* getelementptr ([1000 x i32], [1000 x i32]* @g0, i32 0, i32 6)
+ %tmp8.6 = load i32, i32* getelementptr ([1000 x i32], [1000 x i32]* @g1, i32 0, i32 6) ; <i32> [#uses=1]
%tmp9.6 = add i32 %tmp10.5, 1 ; <i32> [#uses=1]
%tmp10.6 = add i32 %tmp9.6, %tmp8.6 ; <i32> [#uses=1]
- store i32 %tmp10.6, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 7)
+ store i32 %tmp10.6, i32* getelementptr ([1000 x i32], [1000 x i32]* @g0, i32 0, i32 7)
ret void
}
diff --git a/test/CodeGen/X86/leaf-fp-elim.ll b/test/CodeGen/X86/leaf-fp-elim.ll
index 1bb3c7519146..dfbfbf7d11cb 100644
--- a/test/CodeGen/X86/leaf-fp-elim.ll
+++ b/test/CodeGen/X86/leaf-fp-elim.ll
@@ -19,7 +19,7 @@ entry:
br i1 %0, label %return, label %bb
bb: ; preds = %entry
- tail call void asm "mov $1, $0", "=*m,{cx},~{dirflag},~{fpsr},~{flags}"(i8** @msg, i8* getelementptr inbounds ([2 x i8]* @.str, i64 0, i64 0)) nounwind
+ tail call void asm "mov $1, $0", "=*m,{cx},~{dirflag},~{fpsr},~{flags}"(i8** @msg, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i64 0, i64 0)) nounwind
tail call void @llvm.trap()
unreachable
diff --git a/test/CodeGen/X86/legalize-shift-64.ll b/test/CodeGen/X86/legalize-shift-64.ll
index 64460bb91186..fb8f7b6a6027 100644
--- a/test/CodeGen/X86/legalize-shift-64.ll
+++ b/test/CodeGen/X86/legalize-shift-64.ll
@@ -71,7 +71,7 @@ define i32 @test6() {
%t = alloca i64, align 8
store i32 1, i32* %x, align 4
store i64 1, i64* %t, align 8 ;; DEAD
- %load = load i32* %x, align 4
+ %load = load i32, i32* %x, align 4
%shl = shl i32 %load, 8
%add = add i32 %shl, -224
%sh_prom = zext i32 %add to i64
diff --git a/test/CodeGen/X86/legalize-sub-zero-2.ll b/test/CodeGen/X86/legalize-sub-zero-2.ll
index f02ca715aeeb..78d0e731fcfd 100644
--- a/test/CodeGen/X86/legalize-sub-zero-2.ll
+++ b/test/CodeGen/X86/legalize-sub-zero-2.ll
@@ -33,7 +33,7 @@ bb80: ; preds = %bb71
unreachable
bb92: ; preds = %bb71
- %1 = getelementptr inbounds i8* undef, i32 %.sum745
+ %1 = getelementptr inbounds i8, i8* undef, i32 %.sum745
unreachable
bb348: ; preds = %bb27
diff --git a/test/CodeGen/X86/licm-nested.ll b/test/CodeGen/X86/licm-nested.ll
index 083ae0875e39..42e6d12ec1e0 100644
--- a/test/CodeGen/X86/licm-nested.ll
+++ b/test/CodeGen/X86/licm-nested.ll
@@ -13,8 +13,8 @@ entry:
br i1 %cmp, label %while.cond.preheader, label %bb.nph53
while.cond.preheader: ; preds = %entry
- %arrayidx = getelementptr inbounds i8** %argv, i64 1 ; <i8**> [#uses=1]
- %tmp2 = load i8** %arrayidx ; <i8*> [#uses=1]
+ %arrayidx = getelementptr inbounds i8*, i8** %argv, i64 1 ; <i8**> [#uses=1]
+ %tmp2 = load i8*, i8** %arrayidx ; <i8*> [#uses=1]
%call = tail call i32 @atoi(i8* %tmp2) nounwind ; <i32> [#uses=2]
%tobool51 = icmp eq i32 %call, 0 ; <i1> [#uses=1]
br i1 %tobool51, label %while.end, label %bb.nph53
@@ -35,7 +35,7 @@ bb.nph: ; preds = %while.cond.loopexit
for.body: ; preds = %for.body, %bb.nph
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] ; <i64> [#uses=2]
%tmp = add i64 %indvar, 2 ; <i64> [#uses=1]
- %arrayidx10 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp ; <i8*> [#uses=1]
+ %arrayidx10 = getelementptr [8193 x i8], [8193 x i8]* @main.flags, i64 0, i64 %tmp ; <i8*> [#uses=1]
store i8 1, i8* %arrayidx10
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2]
%exitcond = icmp eq i64 %indvar.next, 8191 ; <i1> [#uses=1]
@@ -49,8 +49,8 @@ for.body15: ; preds = %for.body, %for.inc3
%tmp71 = add i64 %tmp70, 6 ; <i64> [#uses=1]
%tmp73 = shl i64 %indvar57, 1 ; <i64> [#uses=1]
%add = add i64 %tmp73, 4 ; <i64> [#uses=2]
- %arrayidx17 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp68 ; <i8*> [#uses=1]
- %tmp18 = load i8* %arrayidx17 ; <i8> [#uses=1]
+ %arrayidx17 = getelementptr [8193 x i8], [8193 x i8]* @main.flags, i64 0, i64 %tmp68 ; <i8*> [#uses=1]
+ %tmp18 = load i8, i8* %arrayidx17 ; <i8> [#uses=1]
%tobool19 = icmp eq i8 %tmp18, 0 ; <i1> [#uses=1]
br i1 %tobool19, label %for.inc35, label %if.then
@@ -62,7 +62,7 @@ for.body25: ; preds = %if.then, %for.body2
%indvar55 = phi i64 [ %indvar.next56, %for.body25 ], [ 0, %if.then ] ; <i64> [#uses=2]
%tmp60 = mul i64 %tmp68, %indvar55 ; <i64> [#uses=2]
%tmp75 = add i64 %add, %tmp60 ; <i64> [#uses=1]
- %arrayidx27 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp75 ; <i8*> [#uses=1]
+ %arrayidx27 = getelementptr [8193 x i8], [8193 x i8]* @main.flags, i64 0, i64 %tmp75 ; <i8*> [#uses=1]
store i8 0, i8* %arrayidx27
%add31 = add i64 %tmp71, %tmp60 ; <i64> [#uses=1]
%cmp24 = icmp slt i64 %add31, 8193 ; <i1> [#uses=1]
@@ -81,7 +81,7 @@ for.inc35: ; preds = %for.body15, %for.en
while.end: ; preds = %while.cond.loopexit, %while.cond.preheader
%count.0.lcssa = phi i32 [ 0, %while.cond.preheader ], [ %count.1, %while.cond.loopexit ] ; <i32> [#uses=1]
- %call40 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i64 0, i64 0), i32 %count.0.lcssa) nounwind ; <i32> [#uses=0]
+ %call40 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), i32 %count.0.lcssa) nounwind ; <i32> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/X86/licm-regpressure.ll b/test/CodeGen/X86/licm-regpressure.ll
new file mode 100644
index 000000000000..0ab655419c88
--- /dev/null
+++ b/test/CodeGen/X86/licm-regpressure.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; This tests currently fails as MachineLICM does not compute register pressure
+; correctly. More details: llvm.org/PR23143
+; XFAIL: *
+
+; MachineLICM should take register pressure into account.
+; CHECK-NOT: Spill
+
+%struct.A = type { i32, i32, i32, i32, i32, i32, i32 }
+
+define void @test(i1 %b, %struct.A* %a) nounwind {
+entry:
+ br label %loop-header
+
+loop-header:
+ br label %loop-body
+
+loop-body:
+ %0 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0
+ %1 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 1
+ %2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 2
+ %3 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 3
+ %4 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 4
+ %5 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 5
+ %6 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 6
+ call void @assign(i32* %0)
+ call void @assign(i32* %1)
+ call void @assign(i32* %2)
+ call void @assign(i32* %3)
+ call void @assign(i32* %4)
+ call void @assign(i32* %5)
+ call void @assign(i32* %6)
+ br i1 %b, label %loop-body, label %loop-exit
+
+loop-exit:
+ ret void
+}
+
+declare void @assign(i32*)
diff --git a/test/CodeGen/X86/licm-symbol.ll b/test/CodeGen/X86/licm-symbol.ll
index c3d1938e9dbd..0f115ddbb6c2 100644
--- a/test/CodeGen/X86/licm-symbol.ll
+++ b/test/CodeGen/X86/licm-symbol.ll
@@ -29,11 +29,11 @@ bb151: ; preds = %bb59, %bb56, %bb14
br i1 undef, label %bb56, label %bb59
bb56: ; preds = %bb151
- %t0 = call i32 (%struct.FILE*)* @fprintf(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2)) nounwind
+ %t0 = call i32 (%struct.FILE*) @fprintf(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE], [0 x %struct.FILE]* @__sF, i32 0, i32 2)) nounwind
br label %bb151
bb59: ; preds = %bb151
- %t1 = call i32 (%struct.FILE*)* @fprintf(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2)) nounwind
+ %t1 = call i32 (%struct.FILE*) @fprintf(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE], [0 x %struct.FILE]* @__sF, i32 0, i32 2)) nounwind
br label %bb151
}
diff --git a/test/CodeGen/X86/liveness-local-regalloc.ll b/test/CodeGen/X86/liveness-local-regalloc.ll
index 721f545985d7..0954f9d5dd47 100644
--- a/test/CodeGen/X86/liveness-local-regalloc.ll
+++ b/test/CodeGen/X86/liveness-local-regalloc.ll
@@ -49,7 +49,7 @@ bb6: ; preds = %.loopexit
bb8: ; preds = %.loopexit
%tmp9 = sext i32 %.04 to i64 ; <i64> [#uses=1]
- %tmp10 = getelementptr inbounds %0* %arg, i64 0, i32 11, i64 %tmp9 ; <i8*> [#uses=1]
+ %tmp10 = getelementptr inbounds %0, %0* %arg, i64 0, i32 11, i64 %tmp9 ; <i8*> [#uses=1]
store i8 0, i8* %tmp10, align 1
ret void
@@ -71,7 +71,7 @@ BB:
%A2 = alloca <2 x i8>
%A1 = alloca i1
%A = alloca i32
- %L = load i8* %0
+ %L = load i8, i8* %0
store i8 -37, i8* %0
%E = extractelement <4 x i64> zeroinitializer, i32 2
%Shuff = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 5, i32 7, i32 1, i32 3>
diff --git a/test/CodeGen/X86/llc-override-mcpu-mattr.ll b/test/CodeGen/X86/llc-override-mcpu-mattr.ll
new file mode 100644
index 000000000000..19a5ed591867
--- /dev/null
+++ b/test/CodeGen/X86/llc-override-mcpu-mattr.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march x86-64 -mcpu=broadwell | FileCheck %s
+; RUN: llc < %s -march x86-64 -mattr=+avx2 | FileCheck %s
+
+; Check that llc can overide function attributes target-cpu and target-features
+; using command line options -mcpu and -mattr.
+
+; CHECK: vpsadbw (%r{{si|dx}}), %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
+
+define <4 x i64> @foo1(<4 x i64>* %s1, <4 x i64>* %s2) #0 {
+entry:
+ %ps1 = load <4 x i64>, <4 x i64>* %s1
+ %ps2 = load <4 x i64>, <4 x i64>* %s2
+ %0 = bitcast <4 x i64> %ps1 to <32 x i8>
+ %1 = bitcast <4 x i64> %ps2 to <32 x i8>
+ %2 = tail call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %0, <32 x i8> %1)
+ ret <4 x i64> %2
+}
+
+declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>)
+
+attributes #0 = { "target-cpu"="core2" "target-features"="+ssse3,+cx16,+sse4.2,+sse4.1,+sse,+sse2,+sse3,+avx,+popcnt" }
diff --git a/test/CodeGen/X86/load-slice.ll b/test/CodeGen/X86/load-slice.ll
index 49eb13160bbc..2f90f819d47e 100644
--- a/test/CodeGen/X86/load-slice.ll
+++ b/test/CodeGen/X86/load-slice.ll
@@ -28,7 +28,7 @@
; Swap Imm and Real.
; STRESS-NEXT: vinsertps $16, [[RES_Imm]], [[RES_Real]], [[RES_Vec:%xmm[0-9]+]]
; Put the results back into out[out_start].
-; STRESS-NEXT: vmovq [[RES_Vec]], ([[BASE]])
+; STRESS-NEXT: vmovlps [[RES_Vec]], ([[BASE]])
;
; Same for REGULAR, we eliminate register bank copy with each slices.
; REGULAR-LABEL: t1:
@@ -43,25 +43,25 @@
; Swap Imm and Real.
; REGULAR-NEXT: vinsertps $16, [[RES_Imm]], [[RES_Real]], [[RES_Vec:%xmm[0-9]+]]
; Put the results back into out[out_start].
-; REGULAR-NEXT: vmovq [[RES_Vec]], ([[BASE]])
+; REGULAR-NEXT: vmovlps [[RES_Vec]], ([[BASE]])
define void @t1(%class.Complex* nocapture %out, i64 %out_start) {
entry:
- %arrayidx = getelementptr inbounds %class.Complex* %out, i64 %out_start
+ %arrayidx = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %out_start
%tmp = bitcast %class.Complex* %arrayidx to i64*
- %tmp1 = load i64* %tmp, align 8
+ %tmp1 = load i64, i64* %tmp, align 8
%t0.sroa.0.0.extract.trunc = trunc i64 %tmp1 to i32
%tmp2 = bitcast i32 %t0.sroa.0.0.extract.trunc to float
%t0.sroa.2.0.extract.shift = lshr i64 %tmp1, 32
%t0.sroa.2.0.extract.trunc = trunc i64 %t0.sroa.2.0.extract.shift to i32
%tmp3 = bitcast i32 %t0.sroa.2.0.extract.trunc to float
%add = add i64 %out_start, 8
- %arrayidx2 = getelementptr inbounds %class.Complex* %out, i64 %add
- %i.i = getelementptr inbounds %class.Complex* %arrayidx2, i64 0, i32 0
- %tmp4 = load float* %i.i, align 4
+ %arrayidx2 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %add
+ %i.i = getelementptr inbounds %class.Complex, %class.Complex* %arrayidx2, i64 0, i32 0
+ %tmp4 = load float, float* %i.i, align 4
%add.i = fadd float %tmp4, %tmp2
%retval.sroa.0.0.vec.insert.i = insertelement <2 x float> undef, float %add.i, i32 0
- %r.i = getelementptr inbounds %class.Complex* %arrayidx2, i64 0, i32 1
- %tmp5 = load float* %r.i, align 4
+ %r.i = getelementptr inbounds %class.Complex, %class.Complex* %arrayidx2, i64 0, i32 1
+ %tmp5 = load float, float* %r.i, align 4
%add5.i = fadd float %tmp5, %tmp3
%retval.sroa.0.4.vec.insert.i = insertelement <2 x float> %retval.sroa.0.0.vec.insert.i, float %add5.i, i32 1
%ref.tmp.sroa.0.0.cast = bitcast %class.Complex* %arrayidx to <2 x float>*
@@ -100,9 +100,9 @@ declare void @llvm.lifetime.end(i64, i8* nocapture)
; REGULAR-LABEL: t2:
; REGULAR: shrq $48
define i32 @t2(%class.Complex* nocapture %out, i64 %out_start) {
- %arrayidx = getelementptr inbounds %class.Complex* %out, i64 %out_start
+ %arrayidx = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %out_start
%bitcast = bitcast %class.Complex* %arrayidx to i64*
- %chunk64 = load i64* %bitcast, align 8
+ %chunk64 = load i64, i64* %bitcast, align 8
%slice32_low = trunc i64 %chunk64 to i32
%shift48 = lshr i64 %chunk64, 48
%slice32_high = trunc i64 %shift48 to i32
@@ -125,9 +125,9 @@ define i32 @t2(%class.Complex* nocapture %out, i64 %out_start) {
; REGULAR: shrq $48
; REGULAR: shrq $32
define i32 @t3(%class.Complex* nocapture %out, i64 %out_start) {
- %arrayidx = getelementptr inbounds %class.Complex* %out, i64 %out_start
+ %arrayidx = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %out_start
%bitcast = bitcast %class.Complex* %arrayidx to i64*
- %chunk64 = load i64* %bitcast, align 8
+ %chunk64 = load i64, i64* %bitcast, align 8
%slice32_low = trunc i64 %chunk64 to i32
%shift48 = lshr i64 %chunk64, 48
%slice32_high = trunc i64 %shift48 to i32
diff --git a/test/CodeGen/X86/logical-load-fold.ll b/test/CodeGen/X86/logical-load-fold.ll
new file mode 100644
index 000000000000..5aac2d7910cd
--- /dev/null
+++ b/test/CodeGen/X86/logical-load-fold.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2,sse-unaligned-mem | FileCheck %s --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX
+
+; Although we have the ability to fold an unaligned load with AVX
+; and under special conditions with some SSE implementations, we
+; can not fold the load under any circumstances in these test
+; cases because they are not 16-byte loads. The load must be
+; executed as a scalar ('movs*') with a zero extension to
+; 128-bits and then used in the packed logical ('andp*') op.
+; PR22371 - http://llvm.org/bugs/show_bug.cgi?id=22371
+
+define double @load_double_no_fold(double %x, double %y) {
+; SSE2-LABEL: load_double_no_fold:
+; SSE2: BB#0:
+; SSE2-NEXT: cmplesd %xmm0, %xmm1
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT: andpd %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: load_double_no_fold:
+; AVX: BB#0:
+; AVX-NEXT: vcmplesd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+
+ %cmp = fcmp oge double %x, %y
+ %zext = zext i1 %cmp to i32
+ %conv = sitofp i32 %zext to double
+ ret double %conv
+}
+
+define float @load_float_no_fold(float %x, float %y) {
+; SSE2-LABEL: load_float_no_fold:
+; SSE2: BB#0:
+; SSE2-NEXT: cmpless %xmm0, %xmm1
+; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: andps %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: load_float_no_fold:
+; AVX: BB#0:
+; AVX-NEXT: vcmpless %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+
+ %cmp = fcmp oge float %x, %y
+ %zext = zext i1 %cmp to i32
+ %conv = sitofp i32 %zext to float
+ ret float %conv
+}
+
diff --git a/test/CodeGen/X86/longlong-deadload.ll b/test/CodeGen/X86/longlong-deadload.ll
index 73e10127c065..3adaf49e372b 100644
--- a/test/CodeGen/X86/longlong-deadload.ll
+++ b/test/CodeGen/X86/longlong-deadload.ll
@@ -6,7 +6,7 @@ define void @test(i64* %P) nounwind {
; CHECK: movl 4(%esp), %[[REGISTER:.*]]
; CHECK-NOT: 4(%[[REGISTER]])
; CHECK: ret
- %tmp1 = load i64* %P, align 8 ; <i64> [#uses=1]
+ %tmp1 = load i64, i64* %P, align 8 ; <i64> [#uses=1]
%tmp2 = xor i64 %tmp1, 1 ; <i64> [#uses=1]
store i64 %tmp2, i64* %P, align 8
ret void
diff --git a/test/CodeGen/X86/loop-hoist.ll b/test/CodeGen/X86/loop-hoist.ll
index c103e29f3bf3..8de240c0a97f 100644
--- a/test/CodeGen/X86/loop-hoist.ll
+++ b/test/CodeGen/X86/loop-hoist.ll
@@ -16,7 +16,7 @@ entry:
cond_true: ; preds = %cond_true, %entry
%indvar = phi i32 [ %x, %entry ], [ %indvar.next, %cond_true ] ; <i32> [#uses=2]
%i.0.0 = bitcast i32 %indvar to i32 ; <i32> [#uses=2]
- %tmp = getelementptr [0 x i32]* @Arr, i32 0, i32 %i.0.0 ; <i32*> [#uses=1]
+ %tmp = getelementptr [0 x i32], [0 x i32]* @Arr, i32 0, i32 %i.0.0 ; <i32*> [#uses=1]
store i32 %i.0.0, i32* %tmp
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next, %N ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/loop-strength-reduce-2.ll b/test/CodeGen/X86/loop-strength-reduce-2.ll
index b546462b684f..062819021415 100644
--- a/test/CodeGen/X86/loop-strength-reduce-2.ll
+++ b/test/CodeGen/X86/loop-strength-reduce-2.ll
@@ -31,10 +31,10 @@ cond_true: ; preds = %cond_true, %entry
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ] ; <i32> [#uses=2]
%i.0.0 = bitcast i32 %indvar to i32 ; <i32> [#uses=2]
%tmp2 = add i32 %i.0.0, 1 ; <i32> [#uses=1]
- %tmp = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2 ; <i32*> [#uses=1]
+ %tmp = getelementptr [16 x [16 x i32]], [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2 ; <i32*> [#uses=1]
store i32 4, i32* %tmp
%tmp5.upgrd.1 = add i32 %i.0.0, 2 ; <i32> [#uses=1]
- %tmp7 = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1 ; <i32*> [#uses=1]
+ %tmp7 = getelementptr [16 x [16 x i32]], [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1 ; <i32*> [#uses=1]
store i32 5, i32* %tmp7
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next, %N ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/loop-strength-reduce-3.ll b/test/CodeGen/X86/loop-strength-reduce-3.ll
index b1c9fb9c0772..9340c5c26713 100644
--- a/test/CodeGen/X86/loop-strength-reduce-3.ll
+++ b/test/CodeGen/X86/loop-strength-reduce-3.ll
@@ -19,10 +19,10 @@ cond_true: ; preds = %cond_true, %entry
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ] ; <i32> [#uses=2]
%i.0.0 = bitcast i32 %indvar to i32 ; <i32> [#uses=2]
%tmp2 = add i32 %i.0.0, 1 ; <i32> [#uses=1]
- %tmp = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2 ; <i32*> [#uses=1]
+ %tmp = getelementptr [16 x [16 x i32]], [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2 ; <i32*> [#uses=1]
store i32 4, i32* %tmp
%tmp5.upgrd.1 = add i32 %i.0.0, 2 ; <i32> [#uses=1]
- %tmp7 = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1 ; <i32*> [#uses=1]
+ %tmp7 = getelementptr [16 x [16 x i32]], [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1 ; <i32*> [#uses=1]
store i32 5, i32* %tmp7
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next, %N ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/loop-strength-reduce.ll b/test/CodeGen/X86/loop-strength-reduce.ll
index 42c6ac4983db..2f80e0bb78bd 100644
--- a/test/CodeGen/X86/loop-strength-reduce.ll
+++ b/test/CodeGen/X86/loop-strength-reduce.ll
@@ -19,10 +19,10 @@ cond_true: ; preds = %cond_true, %entry
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ] ; <i32> [#uses=2]
%i.0.0 = bitcast i32 %indvar to i32 ; <i32> [#uses=2]
%tmp2 = add i32 %i.0.0, 1 ; <i32> [#uses=1]
- %tmp = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2 ; <i32*> [#uses=1]
+ %tmp = getelementptr [16 x [16 x i32]], [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2 ; <i32*> [#uses=1]
store i32 4, i32* %tmp
%tmp5.upgrd.1 = add i32 %i.0.0, 2 ; <i32> [#uses=1]
- %tmp7 = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1 ; <i32*> [#uses=1]
+ %tmp7 = getelementptr [16 x [16 x i32]], [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1 ; <i32*> [#uses=1]
store i32 5, i32* %tmp7
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next, %N ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/loop-strength-reduce2.ll b/test/CodeGen/X86/loop-strength-reduce2.ll
index 689ee1cf23f4..a83503c7fe7b 100644
--- a/test/CodeGen/X86/loop-strength-reduce2.ll
+++ b/test/CodeGen/X86/loop-strength-reduce2.ll
@@ -18,7 +18,7 @@ bb: ; preds = %bb, %entry
%tmp.16 = add i32 %tmp.15, %tmp. ; <i32> [#uses=2]
%k_addr.0.0 = bitcast i32 %tmp.16 to i32 ; <i32> [#uses=1]
%gep.upgrd.1 = zext i32 %tmp.16 to i64 ; <i64> [#uses=1]
- %tmp = getelementptr [8193 x i8]* @flags2, i32 0, i64 %gep.upgrd.1 ; <i8*> [#uses=1]
+ %tmp = getelementptr [8193 x i8], [8193 x i8]* @flags2, i32 0, i64 %gep.upgrd.1 ; <i8*> [#uses=1]
store i8 0, i8* %tmp
%k_addr.0 = add i32 %k_addr.0.0, %i ; <i32> [#uses=1]
%tmp.upgrd.2 = icmp sgt i32 %k_addr.0, 8192 ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll
index 32e78790abdb..786534b00d39 100644
--- a/test/CodeGen/X86/loop-strength-reduce4.ll
+++ b/test/CodeGen/X86/loop-strength-reduce4.ll
@@ -26,31 +26,31 @@ bb: ; preds = %bb, %entry
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
%t.063.0 = phi i32 [ 0, %entry ], [ %tmp47, %bb ] ; <i32> [#uses=1]
%j.065.0 = shl i32 %indvar, 2 ; <i32> [#uses=4]
- %tmp3 = getelementptr [0 x i32]* @state, i32 0, i32 %j.065.0 ; <i32*> [#uses=2]
- %tmp4 = load i32* %tmp3, align 4 ; <i32> [#uses=1]
- %tmp6 = getelementptr [0 x i32]* @S, i32 0, i32 %t.063.0 ; <i32*> [#uses=1]
- %tmp7 = load i32* %tmp6, align 4 ; <i32> [#uses=1]
+ %tmp3 = getelementptr [0 x i32], [0 x i32]* @state, i32 0, i32 %j.065.0 ; <i32*> [#uses=2]
+ %tmp4 = load i32, i32* %tmp3, align 4 ; <i32> [#uses=1]
+ %tmp6 = getelementptr [0 x i32], [0 x i32]* @S, i32 0, i32 %t.063.0 ; <i32*> [#uses=1]
+ %tmp7 = load i32, i32* %tmp6, align 4 ; <i32> [#uses=1]
%tmp8 = xor i32 %tmp7, %tmp4 ; <i32> [#uses=2]
store i32 %tmp8, i32* %tmp3, align 4
%tmp1378 = or i32 %j.065.0, 1 ; <i32> [#uses=1]
- %tmp16 = getelementptr [0 x i32]* @state, i32 0, i32 %tmp1378 ; <i32*> [#uses=2]
- %tmp17 = load i32* %tmp16, align 4 ; <i32> [#uses=1]
- %tmp19 = getelementptr [0 x i32]* @S, i32 0, i32 %tmp8 ; <i32*> [#uses=1]
- %tmp20 = load i32* %tmp19, align 4 ; <i32> [#uses=1]
+ %tmp16 = getelementptr [0 x i32], [0 x i32]* @state, i32 0, i32 %tmp1378 ; <i32*> [#uses=2]
+ %tmp17 = load i32, i32* %tmp16, align 4 ; <i32> [#uses=1]
+ %tmp19 = getelementptr [0 x i32], [0 x i32]* @S, i32 0, i32 %tmp8 ; <i32*> [#uses=1]
+ %tmp20 = load i32, i32* %tmp19, align 4 ; <i32> [#uses=1]
%tmp21 = xor i32 %tmp20, %tmp17 ; <i32> [#uses=2]
store i32 %tmp21, i32* %tmp16, align 4
%tmp2680 = or i32 %j.065.0, 2 ; <i32> [#uses=1]
- %tmp29 = getelementptr [0 x i32]* @state, i32 0, i32 %tmp2680 ; <i32*> [#uses=2]
- %tmp30 = load i32* %tmp29, align 4 ; <i32> [#uses=1]
- %tmp32 = getelementptr [0 x i32]* @S, i32 0, i32 %tmp21 ; <i32*> [#uses=1]
- %tmp33 = load i32* %tmp32, align 4 ; <i32> [#uses=1]
+ %tmp29 = getelementptr [0 x i32], [0 x i32]* @state, i32 0, i32 %tmp2680 ; <i32*> [#uses=2]
+ %tmp30 = load i32, i32* %tmp29, align 4 ; <i32> [#uses=1]
+ %tmp32 = getelementptr [0 x i32], [0 x i32]* @S, i32 0, i32 %tmp21 ; <i32*> [#uses=1]
+ %tmp33 = load i32, i32* %tmp32, align 4 ; <i32> [#uses=1]
%tmp34 = xor i32 %tmp33, %tmp30 ; <i32> [#uses=2]
store i32 %tmp34, i32* %tmp29, align 4
%tmp3982 = or i32 %j.065.0, 3 ; <i32> [#uses=1]
- %tmp42 = getelementptr [0 x i32]* @state, i32 0, i32 %tmp3982 ; <i32*> [#uses=2]
- %tmp43 = load i32* %tmp42, align 4 ; <i32> [#uses=1]
- %tmp45 = getelementptr [0 x i32]* @S, i32 0, i32 %tmp34 ; <i32*> [#uses=1]
- %tmp46 = load i32* %tmp45, align 4 ; <i32> [#uses=1]
+ %tmp42 = getelementptr [0 x i32], [0 x i32]* @state, i32 0, i32 %tmp3982 ; <i32*> [#uses=2]
+ %tmp43 = load i32, i32* %tmp42, align 4 ; <i32> [#uses=1]
+ %tmp45 = getelementptr [0 x i32], [0 x i32]* @S, i32 0, i32 %tmp34 ; <i32*> [#uses=1]
+ %tmp46 = load i32, i32* %tmp45, align 4 ; <i32> [#uses=1]
%tmp47 = xor i32 %tmp46, %tmp43 ; <i32> [#uses=3]
store i32 %tmp47, i32* %tmp42, align 4
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
diff --git a/test/CodeGen/X86/loop-strength-reduce7.ll b/test/CodeGen/X86/loop-strength-reduce7.ll
index 4b565a67fb2d..92ec485e7752 100644
--- a/test/CodeGen/X86/loop-strength-reduce7.ll
+++ b/test/CodeGen/X86/loop-strength-reduce7.ll
@@ -27,12 +27,12 @@ bb28.i37: ; preds = %bb33.i47, %bb5
bb29.i38: ; preds = %bb33.i47, %bb28.i37
%indvar32.i = phi i32 [ %indvar.next33.i, %bb33.i47 ], [ 0, %bb28.i37 ] ; <i32> [#uses=2]
%sfb.314.i = add i32 %indvar32.i, 0 ; <i32> [#uses=3]
- %1 = getelementptr [4 x [21 x double]]* null, i32 0, i32 %0, i32 %sfb.314.i ; <double*> [#uses=1]
- %2 = load double* %1, align 8 ; <double> [#uses=0]
+ %1 = getelementptr [4 x [21 x double]], [4 x [21 x double]]* null, i32 0, i32 %0, i32 %sfb.314.i ; <double*> [#uses=1]
+ %2 = load double, double* %1, align 8 ; <double> [#uses=0]
br i1 false, label %bb30.i41, label %bb33.i47
bb30.i41: ; preds = %bb29.i38
- %3 = getelementptr %struct.III_scalefac_t* null, i32 0, i32 1, i32 %sfb.314.i, i32 %i.1.reg2mem.0.i ; <i32*> [#uses=1]
+ %3 = getelementptr %struct.III_scalefac_t, %struct.III_scalefac_t* null, i32 0, i32 1, i32 %sfb.314.i, i32 %i.1.reg2mem.0.i ; <i32*> [#uses=1]
store i32 0, i32* %3, align 4
br label %bb33.i47
diff --git a/test/CodeGen/X86/loop-strength-reduce8.ll b/test/CodeGen/X86/loop-strength-reduce8.ll
index c36047c451ae..716e1478c9ab 100644
--- a/test/CodeGen/X86/loop-strength-reduce8.ll
+++ b/test/CodeGen/X86/loop-strength-reduce8.ll
@@ -53,23 +53,23 @@ entry:
%p1 = bitcast i8** %p to i8* ; <i8*> [#uses=2]
call void @llvm.va_start(i8* %p1)
%0 = call fastcc %struct.tree_node* @make_node(i32 %code) nounwind ; <%struct.tree_node*> [#uses=2]
- %1 = getelementptr [256 x i32]* @tree_code_length, i32 0, i32 %code ; <i32*> [#uses=1]
- %2 = load i32* %1, align 4 ; <i32> [#uses=2]
- %3 = load i32* @lineno, align 4 ; <i32> [#uses=1]
+ %1 = getelementptr [256 x i32], [256 x i32]* @tree_code_length, i32 0, i32 %code ; <i32*> [#uses=1]
+ %2 = load i32, i32* %1, align 4 ; <i32> [#uses=2]
+ %3 = load i32, i32* @lineno, align 4 ; <i32> [#uses=1]
%4 = bitcast %struct.tree_node* %0 to %struct.tree_exp* ; <%struct.tree_exp*> [#uses=2]
- %5 = getelementptr %struct.tree_exp* %4, i32 0, i32 1 ; <i32*> [#uses=1]
+ %5 = getelementptr %struct.tree_exp, %struct.tree_exp* %4, i32 0, i32 1 ; <i32*> [#uses=1]
store i32 %3, i32* %5, align 4
%6 = icmp sgt i32 %2, 0 ; <i1> [#uses=1]
br i1 %6, label %bb, label %bb3
bb: ; preds = %bb, %entry
%i.01 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ] ; <i32> [#uses=2]
- %7 = load i8** %p, align 4 ; <i8*> [#uses=2]
- %8 = getelementptr i8* %7, i32 4 ; <i8*> [#uses=1]
+ %7 = load i8*, i8** %p, align 4 ; <i8*> [#uses=2]
+ %8 = getelementptr i8, i8* %7, i32 4 ; <i8*> [#uses=1]
store i8* %8, i8** %p, align 4
%9 = bitcast i8* %7 to %struct.tree_node** ; <%struct.tree_node**> [#uses=1]
- %10 = load %struct.tree_node** %9, align 4 ; <%struct.tree_node*> [#uses=1]
- %11 = getelementptr %struct.tree_exp* %4, i32 0, i32 2, i32 %i.01 ; <%struct.tree_node**> [#uses=1]
+ %10 = load %struct.tree_node*, %struct.tree_node** %9, align 4 ; <%struct.tree_node*> [#uses=1]
+ %11 = getelementptr %struct.tree_exp, %struct.tree_exp* %4, i32 0, i32 2, i32 %i.01 ; <%struct.tree_node**> [#uses=1]
store %struct.tree_node* %10, %struct.tree_node** %11, align 4
%indvar.next = add i32 %i.01, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next, %2 ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/lower-vec-shift-2.ll b/test/CodeGen/X86/lower-vec-shift-2.ll
index 770775d32427..fb8fbba71fca 100644
--- a/test/CodeGen/X86/lower-vec-shift-2.ll
+++ b/test/CodeGen/X86/lower-vec-shift-2.ll
@@ -1,19 +1,20 @@
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s -check-prefix=SSE2
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s -check-prefix=AVX
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE2
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX
define <8 x i16> @test1(<8 x i16> %A, <8 x i16> %B) {
; SSE2-LABEL: test1:
-; SSE2: # BB#0
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: movzwl %ax, %eax
-; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: psllw %xmm1, %xmm0
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: movzwl %ax, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: psllw %xmm1, %xmm0
; SSE2-NEXT: retq
+;
; AVX-LABEL: test1:
-; AVX: # BB#0
-; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
-; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%vecinit14 = shufflevector <8 x i16> %B, <8 x i16> undef, <8 x i32> zeroinitializer
@@ -23,16 +24,17 @@ entry:
define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test2:
-; SSE2: # BB#0
-; SSE2-NEXT: xorps %xmm2, %xmm2
-; SSE2-NEXT: movss %xmm1, %xmm2
-; SSE2-NEXT: pslld %xmm2, %xmm0
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: xorps %xmm2, %xmm2
+; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; SSE2-NEXT: pslld %xmm2, %xmm0
; SSE2-NEXT: retq
+;
; AVX-LABEL: test2:
-; AVX: # BB#0
-; AVX-NEXT: vpxor %xmm2, %xmm2
-; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
-; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%vecinit6 = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -42,12 +44,13 @@ entry:
define <2 x i64> @test3(<2 x i64> %A, <2 x i64> %B) {
; SSE2-LABEL: test3:
-; SSE2: # BB#0
-; SSE2-NEXT: psllq %xmm1, %xmm0
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: psllq %xmm1, %xmm0
; SSE2-NEXT: retq
+;
; AVX-LABEL: test3:
-; AVX: # BB#0
-; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%vecinit2 = shufflevector <2 x i64> %B, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -57,17 +60,18 @@ entry:
define <8 x i16> @test4(<8 x i16> %A, <8 x i16> %B) {
; SSE2-LABEL: test4:
-; SSE2: # BB#0
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: movzwl %ax, %eax
-; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: psrlw %xmm1, %xmm0
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: movzwl %ax, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: psrlw %xmm1, %xmm0
; SSE2-NEXT: retq
+;
; AVX-LABEL: test4:
-; AVX: # BB#0
-; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
-; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%vecinit14 = shufflevector <8 x i16> %B, <8 x i16> undef, <8 x i32> zeroinitializer
@@ -77,16 +81,17 @@ entry:
define <4 x i32> @test5(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test5:
-; SSE2: # BB#0
-; SSE2-NEXT: xorps %xmm2, %xmm2
-; SSE2-NEXT: movss %xmm1, %xmm2
-; SSE2-NEXT: psrld %xmm2, %xmm0
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: xorps %xmm2, %xmm2
+; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; SSE2-NEXT: psrld %xmm2, %xmm0
; SSE2-NEXT: retq
+;
; AVX-LABEL: test5:
-; AVX: # BB#0
-; AVX-NEXT: vpxor %xmm2, %xmm2
-; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
-; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%vecinit6 = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -96,12 +101,13 @@ entry:
define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) {
; SSE2-LABEL: test6:
-; SSE2: # BB#0
-; SSE2-NEXT: psrlq %xmm1, %xmm0
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: psrlq %xmm1, %xmm0
; SSE2-NEXT: retq
+;
; AVX-LABEL: test6:
-; AVX: # BB#0
-; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%vecinit2 = shufflevector <2 x i64> %B, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -111,17 +117,18 @@ entry:
define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) {
; SSE2-LABEL: test7:
-; SSE2: # BB#0
-; SSE2-NEXT: movd %xmm1, %eax
-; SSE2-NEXT: movzwl %ax, %eax
-; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: psraw %xmm1, %xmm0
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movd %xmm1, %eax
+; SSE2-NEXT: movzwl %ax, %eax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: psraw %xmm1, %xmm0
; SSE2-NEXT: retq
+;
; AVX-LABEL: test7:
-; AVX: # BB#0
-; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
-; AVX-NEXT: vpsraw %xmm1, %xmm0, %xmm0
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; AVX-NEXT: vpsraw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%vecinit14 = shufflevector <8 x i16> %B, <8 x i16> undef, <8 x i32> zeroinitializer
@@ -131,16 +138,17 @@ entry:
define <4 x i32> @test8(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: test8:
-; SSE2: # BB#0
-; SSE2-NEXT: xorps %xmm2, %xmm2
-; SSE2-NEXT: movss %xmm1, %xmm2
-; SSE2-NEXT: psrad %xmm2, %xmm0
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: xorps %xmm2, %xmm2
+; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; SSE2-NEXT: psrad %xmm2, %xmm0
; SSE2-NEXT: retq
+;
; AVX-LABEL: test8:
-; AVX: # BB#0
-; AVX-NEXT: vpxor %xmm2, %xmm2
-; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
-; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%vecinit6 = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer
diff --git a/test/CodeGen/X86/lsr-delayed-fold.ll b/test/CodeGen/X86/lsr-delayed-fold.ll
index 8ed97e447fee..eaa52dec2835 100644
--- a/test/CodeGen/X86/lsr-delayed-fold.ll
+++ b/test/CodeGen/X86/lsr-delayed-fold.ll
@@ -42,7 +42,7 @@ for.cond: ; preds = %lbl_264, %for.inc,
lbl_264: ; preds = %if.end, %lbl_264.preheader
%g_263.tmp.0 = phi i8 [ %g_263.tmp.1, %for.cond ] ; <i8> [#uses=1]
- %tmp7 = load i16* undef ; <i16> [#uses=1]
+ %tmp7 = load i16, i16* undef ; <i16> [#uses=1]
%conv8 = trunc i16 %tmp7 to i8 ; <i8> [#uses=1]
%mul.i = mul i8 %p_95.addr.0, %p_95.addr.0 ; <i8> [#uses=1]
%mul.i18 = mul i8 %mul.i, %conv8 ; <i8> [#uses=1]
@@ -98,8 +98,8 @@ lor.lhs.false: ; preds = %for.body
%add106 = trunc i64 %tmp43 to i32 ; <i32> [#uses=1]
%add112 = trunc i64 %tmp45 to i32 ; <i32> [#uses=1]
%add118 = trunc i64 %tmp47 to i32 ; <i32> [#uses=1]
- %tmp10 = getelementptr %struct.Bu* %bu, i64 %indvar, i32 2 ; <i32*> [#uses=1]
- %tmp11 = load i32* %tmp10 ; <i32> [#uses=0]
+ %tmp10 = getelementptr %struct.Bu, %struct.Bu* %bu, i64 %indvar, i32 2 ; <i32*> [#uses=1]
+ %tmp11 = load i32, i32* %tmp10 ; <i32> [#uses=0]
tail call void undef(i32 %add22)
tail call void undef(i32 %add28)
tail call void undef(i32 %add34)
diff --git a/test/CodeGen/X86/lsr-i386.ll b/test/CodeGen/X86/lsr-i386.ll
index 02baf2072e3b..9338939fafd0 100644
--- a/test/CodeGen/X86/lsr-i386.ll
+++ b/test/CodeGen/X86/lsr-i386.ll
@@ -22,7 +22,7 @@ entry:
bb1: ; preds = %bb6, %bb
%indvar11 = phi i32 [ %indvar.next12, %bb6 ], [ 0, %entry ] ; <i32> [#uses=2]
%tmp21 = add i32 %indvar11, 1 ; <i32> [#uses=1]
- %t = load i32* getelementptr inbounds (%struct.anon* @mp2grad_, i32 0, i32 1)
+ %t = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @mp2grad_, i32 0, i32 1)
%tmp15 = mul i32 %n, %t ; <i32> [#uses=1]
%tmp16 = add i32 %tmp21, %tmp15 ; <i32> [#uses=1]
%tmp17 = shl i32 %tmp16, 3 ; <i32> [#uses=1]
@@ -32,7 +32,7 @@ bb1: ; preds = %bb6, %bb
bb2: ; preds = %bb2, %bb2.preheader
%indvar = phi i32 [ 0, %bb1 ], [ %indvar.next, %bb2 ] ; <i32> [#uses=2]
%tmp19 = add i32 %tmp18, %indvar ; <i32> [#uses=1]
- %scevgep = getelementptr %struct.anon* @mp2grad_, i32 0, i32 0, i32 %tmp19 ; <i32*> [#uses=1]
+ %scevgep = getelementptr %struct.anon, %struct.anon* @mp2grad_, i32 0, i32 0, i32 %tmp19 ; <i32*> [#uses=1]
store i32 0, i32* %scevgep
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
%c = icmp ne i32 %indvar.next, %m
diff --git a/test/CodeGen/X86/lsr-interesting-step.ll b/test/CodeGen/X86/lsr-interesting-step.ll
index 8ea3c53de41e..fe8337e2981a 100644
--- a/test/CodeGen/X86/lsr-interesting-step.ll
+++ b/test/CodeGen/X86/lsr-interesting-step.ll
@@ -27,7 +27,7 @@ bb10: ; preds = %bb7
bb11: ; preds = %bb10, %bb11
%tmp12 = phi i64 [ %tmp14, %bb11 ], [ 2, %bb10 ] ; <i64> [#uses=2]
- %tmp13 = getelementptr inbounds [8192 x i8]* @flags, i64 0, i64 %tmp12 ; <i8*> [#uses=1]
+ %tmp13 = getelementptr inbounds [8192 x i8], [8192 x i8]* @flags, i64 0, i64 %tmp12 ; <i8*> [#uses=1]
store i8 0, i8* %tmp13, align 1
%tmp14 = add nsw i64 %tmp12, %tmp8 ; <i64> [#uses=2]
%tmp15 = icmp slt i64 %tmp14, 8192 ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll
index e7d74a924075..2e3929be31c5 100644
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -17,9 +17,9 @@
define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind {
entry:
- %0 = load i32* %rk, align 4 ; <i32> [#uses=1]
- %1 = getelementptr i32* %rk, i64 1 ; <i32*> [#uses=1]
- %2 = load i32* %1, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* %rk, align 4 ; <i32> [#uses=1]
+ %1 = getelementptr i32, i32* %rk, i64 1 ; <i32*> [#uses=1]
+ %2 = load i32, i32* %1, align 4 ; <i32> [#uses=1]
%tmp15 = add i32 %r, -1 ; <i32> [#uses=1]
%tmp.16 = zext i32 %tmp15 to i64 ; <i64> [#uses=2]
br label %bb
@@ -32,64 +32,64 @@ bb: ; preds = %bb1, %entry
%rk26 = bitcast i32* %rk to i8* ; <i8*> [#uses=6]
%3 = lshr i32 %s0.0, 24 ; <i32> [#uses=1]
%4 = zext i32 %3 to i64 ; <i64> [#uses=1]
- %5 = getelementptr [256 x i32]* @Te0, i64 0, i64 %4 ; <i32*> [#uses=1]
- %6 = load i32* %5, align 4 ; <i32> [#uses=1]
+ %5 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %4 ; <i32*> [#uses=1]
+ %6 = load i32, i32* %5, align 4 ; <i32> [#uses=1]
%7 = lshr i32 %s1.0, 16 ; <i32> [#uses=1]
%8 = and i32 %7, 255 ; <i32> [#uses=1]
%9 = zext i32 %8 to i64 ; <i64> [#uses=1]
- %10 = getelementptr [256 x i32]* @Te1, i64 0, i64 %9 ; <i32*> [#uses=1]
- %11 = load i32* %10, align 4 ; <i32> [#uses=1]
+ %10 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %9 ; <i32*> [#uses=1]
+ %11 = load i32, i32* %10, align 4 ; <i32> [#uses=1]
%ctg2.sum2728 = or i64 %tmp18, 8 ; <i64> [#uses=1]
- %12 = getelementptr i8* %rk26, i64 %ctg2.sum2728 ; <i8*> [#uses=1]
+ %12 = getelementptr i8, i8* %rk26, i64 %ctg2.sum2728 ; <i8*> [#uses=1]
%13 = bitcast i8* %12 to i32* ; <i32*> [#uses=1]
- %14 = load i32* %13, align 4 ; <i32> [#uses=1]
+ %14 = load i32, i32* %13, align 4 ; <i32> [#uses=1]
%15 = xor i32 %11, %6 ; <i32> [#uses=1]
%16 = xor i32 %15, %14 ; <i32> [#uses=3]
%17 = lshr i32 %s1.0, 24 ; <i32> [#uses=1]
%18 = zext i32 %17 to i64 ; <i64> [#uses=1]
- %19 = getelementptr [256 x i32]* @Te0, i64 0, i64 %18 ; <i32*> [#uses=1]
- %20 = load i32* %19, align 4 ; <i32> [#uses=1]
+ %19 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %18 ; <i32*> [#uses=1]
+ %20 = load i32, i32* %19, align 4 ; <i32> [#uses=1]
%21 = and i32 %s0.0, 255 ; <i32> [#uses=1]
%22 = zext i32 %21 to i64 ; <i64> [#uses=1]
- %23 = getelementptr [256 x i32]* @Te3, i64 0, i64 %22 ; <i32*> [#uses=1]
- %24 = load i32* %23, align 4 ; <i32> [#uses=1]
+ %23 = getelementptr [256 x i32], [256 x i32]* @Te3, i64 0, i64 %22 ; <i32*> [#uses=1]
+ %24 = load i32, i32* %23, align 4 ; <i32> [#uses=1]
%ctg2.sum2930 = or i64 %tmp18, 12 ; <i64> [#uses=1]
- %25 = getelementptr i8* %rk26, i64 %ctg2.sum2930 ; <i8*> [#uses=1]
+ %25 = getelementptr i8, i8* %rk26, i64 %ctg2.sum2930 ; <i8*> [#uses=1]
%26 = bitcast i8* %25 to i32* ; <i32*> [#uses=1]
- %27 = load i32* %26, align 4 ; <i32> [#uses=1]
+ %27 = load i32, i32* %26, align 4 ; <i32> [#uses=1]
%28 = xor i32 %24, %20 ; <i32> [#uses=1]
%29 = xor i32 %28, %27 ; <i32> [#uses=4]
%30 = lshr i32 %16, 24 ; <i32> [#uses=1]
%31 = zext i32 %30 to i64 ; <i64> [#uses=1]
- %32 = getelementptr [256 x i32]* @Te0, i64 0, i64 %31 ; <i32*> [#uses=1]
- %33 = load i32* %32, align 4 ; <i32> [#uses=2]
+ %32 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %31 ; <i32*> [#uses=1]
+ %33 = load i32, i32* %32, align 4 ; <i32> [#uses=2]
%exitcond = icmp eq i64 %indvar, %tmp.16 ; <i1> [#uses=1]
br i1 %exitcond, label %bb2, label %bb1
bb1: ; preds = %bb
%ctg2.sum31 = add i64 %tmp18, 16 ; <i64> [#uses=1]
- %34 = getelementptr i8* %rk26, i64 %ctg2.sum31 ; <i8*> [#uses=1]
+ %34 = getelementptr i8, i8* %rk26, i64 %ctg2.sum31 ; <i8*> [#uses=1]
%35 = bitcast i8* %34 to i32* ; <i32*> [#uses=1]
%36 = lshr i32 %29, 16 ; <i32> [#uses=1]
%37 = and i32 %36, 255 ; <i32> [#uses=1]
%38 = zext i32 %37 to i64 ; <i64> [#uses=1]
- %39 = getelementptr [256 x i32]* @Te1, i64 0, i64 %38 ; <i32*> [#uses=1]
- %40 = load i32* %39, align 4 ; <i32> [#uses=1]
- %41 = load i32* %35, align 4 ; <i32> [#uses=1]
+ %39 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %38 ; <i32*> [#uses=1]
+ %40 = load i32, i32* %39, align 4 ; <i32> [#uses=1]
+ %41 = load i32, i32* %35, align 4 ; <i32> [#uses=1]
%42 = xor i32 %40, %33 ; <i32> [#uses=1]
%43 = xor i32 %42, %41 ; <i32> [#uses=1]
%44 = lshr i32 %29, 24 ; <i32> [#uses=1]
%45 = zext i32 %44 to i64 ; <i64> [#uses=1]
- %46 = getelementptr [256 x i32]* @Te0, i64 0, i64 %45 ; <i32*> [#uses=1]
- %47 = load i32* %46, align 4 ; <i32> [#uses=1]
+ %46 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %45 ; <i32*> [#uses=1]
+ %47 = load i32, i32* %46, align 4 ; <i32> [#uses=1]
%48 = and i32 %16, 255 ; <i32> [#uses=1]
%49 = zext i32 %48 to i64 ; <i64> [#uses=1]
- %50 = getelementptr [256 x i32]* @Te3, i64 0, i64 %49 ; <i32*> [#uses=1]
- %51 = load i32* %50, align 4 ; <i32> [#uses=1]
+ %50 = getelementptr [256 x i32], [256 x i32]* @Te3, i64 0, i64 %49 ; <i32*> [#uses=1]
+ %51 = load i32, i32* %50, align 4 ; <i32> [#uses=1]
%ctg2.sum32 = add i64 %tmp18, 20 ; <i64> [#uses=1]
- %52 = getelementptr i8* %rk26, i64 %ctg2.sum32 ; <i8*> [#uses=1]
+ %52 = getelementptr i8, i8* %rk26, i64 %ctg2.sum32 ; <i8*> [#uses=1]
%53 = bitcast i8* %52 to i32* ; <i32*> [#uses=1]
- %54 = load i32* %53, align 4 ; <i32> [#uses=1]
+ %54 = load i32, i32* %53, align 4 ; <i32> [#uses=1]
%55 = xor i32 %51, %47 ; <i32> [#uses=1]
%56 = xor i32 %55, %54 ; <i32> [#uses=1]
%indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
@@ -98,48 +98,48 @@ bb1: ; preds = %bb
bb2: ; preds = %bb
%tmp10 = shl i64 %tmp.16, 4 ; <i64> [#uses=2]
%ctg2.sum = add i64 %tmp10, 16 ; <i64> [#uses=1]
- %tmp1213 = getelementptr i8* %rk26, i64 %ctg2.sum ; <i8*> [#uses=1]
+ %tmp1213 = getelementptr i8, i8* %rk26, i64 %ctg2.sum ; <i8*> [#uses=1]
%57 = bitcast i8* %tmp1213 to i32* ; <i32*> [#uses=1]
%58 = and i32 %33, -16777216 ; <i32> [#uses=1]
%59 = lshr i32 %29, 16 ; <i32> [#uses=1]
%60 = and i32 %59, 255 ; <i32> [#uses=1]
%61 = zext i32 %60 to i64 ; <i64> [#uses=1]
- %62 = getelementptr [256 x i32]* @Te1, i64 0, i64 %61 ; <i32*> [#uses=1]
- %63 = load i32* %62, align 4 ; <i32> [#uses=1]
+ %62 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %61 ; <i32*> [#uses=1]
+ %63 = load i32, i32* %62, align 4 ; <i32> [#uses=1]
%64 = and i32 %63, 16711680 ; <i32> [#uses=1]
%65 = or i32 %64, %58 ; <i32> [#uses=1]
- %66 = load i32* %57, align 4 ; <i32> [#uses=1]
+ %66 = load i32, i32* %57, align 4 ; <i32> [#uses=1]
%67 = xor i32 %65, %66 ; <i32> [#uses=2]
%68 = lshr i32 %29, 8 ; <i32> [#uses=1]
%69 = zext i32 %68 to i64 ; <i64> [#uses=1]
- %70 = getelementptr [256 x i32]* @Te0, i64 0, i64 %69 ; <i32*> [#uses=1]
- %71 = load i32* %70, align 4 ; <i32> [#uses=1]
+ %70 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %69 ; <i32*> [#uses=1]
+ %71 = load i32, i32* %70, align 4 ; <i32> [#uses=1]
%72 = and i32 %71, -16777216 ; <i32> [#uses=1]
%73 = and i32 %16, 255 ; <i32> [#uses=1]
%74 = zext i32 %73 to i64 ; <i64> [#uses=1]
- %75 = getelementptr [256 x i32]* @Te1, i64 0, i64 %74 ; <i32*> [#uses=1]
- %76 = load i32* %75, align 4 ; <i32> [#uses=1]
+ %75 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %74 ; <i32*> [#uses=1]
+ %76 = load i32, i32* %75, align 4 ; <i32> [#uses=1]
%77 = and i32 %76, 16711680 ; <i32> [#uses=1]
%78 = or i32 %77, %72 ; <i32> [#uses=1]
%ctg2.sum25 = add i64 %tmp10, 20 ; <i64> [#uses=1]
- %79 = getelementptr i8* %rk26, i64 %ctg2.sum25 ; <i8*> [#uses=1]
+ %79 = getelementptr i8, i8* %rk26, i64 %ctg2.sum25 ; <i8*> [#uses=1]
%80 = bitcast i8* %79 to i32* ; <i32*> [#uses=1]
- %81 = load i32* %80, align 4 ; <i32> [#uses=1]
+ %81 = load i32, i32* %80, align 4 ; <i32> [#uses=1]
%82 = xor i32 %78, %81 ; <i32> [#uses=2]
%83 = lshr i32 %67, 24 ; <i32> [#uses=1]
%84 = trunc i32 %83 to i8 ; <i8> [#uses=1]
store i8 %84, i8* %out, align 1
%85 = lshr i32 %67, 16 ; <i32> [#uses=1]
%86 = trunc i32 %85 to i8 ; <i8> [#uses=1]
- %87 = getelementptr i8* %out, i64 1 ; <i8*> [#uses=1]
+ %87 = getelementptr i8, i8* %out, i64 1 ; <i8*> [#uses=1]
store i8 %86, i8* %87, align 1
- %88 = getelementptr i8* %out, i64 4 ; <i8*> [#uses=1]
+ %88 = getelementptr i8, i8* %out, i64 4 ; <i8*> [#uses=1]
%89 = lshr i32 %82, 24 ; <i32> [#uses=1]
%90 = trunc i32 %89 to i8 ; <i8> [#uses=1]
store i8 %90, i8* %88, align 1
%91 = lshr i32 %82, 16 ; <i32> [#uses=1]
%92 = trunc i32 %91 to i8 ; <i8> [#uses=1]
- %93 = getelementptr i8* %out, i64 5 ; <i8*> [#uses=1]
+ %93 = getelementptr i8, i8* %out, i64 5 ; <i8*> [#uses=1]
store i8 %92, i8* %93, align 1
ret void
}
@@ -175,8 +175,8 @@ for.body: ; preds = %for.body.lr.ph, %fo
%indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
%bi.06 = phi i32 [ 0, %for.body.lr.ph ], [ %i.addr.0.bi.0, %for.body ]
%b.05 = phi i32 [ 0, %for.body.lr.ph ], [ %.b.0, %for.body ]
- %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
- %1 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %1 = load i32, i32* %arrayidx, align 4
%cmp1 = icmp ugt i32 %1, %b.05
%.b.0 = select i1 %cmp1, i32 %1, i32 %b.05
%2 = trunc i64 %indvars.iv to i32
diff --git a/test/CodeGen/X86/lsr-normalization.ll b/test/CodeGen/X86/lsr-normalization.ll
index 2775558b0cfd..09c892c9fc88 100644
--- a/test/CodeGen/X86/lsr-normalization.ll
+++ b/test/CodeGen/X86/lsr-normalization.ll
@@ -22,12 +22,12 @@ bb:
%tmp = alloca %0, align 8 ; <%0*> [#uses=11]
%tmp2 = bitcast %0* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memset.p0i8.i64(i8* %tmp2, i8 0, i64 16, i32 8, i1 false) nounwind
- %tmp3 = getelementptr inbounds %0* %tmp, i64 0, i32 0 ; <%0**> [#uses=3]
+ %tmp3 = getelementptr inbounds %0, %0* %tmp, i64 0, i32 0 ; <%0**> [#uses=3]
store %0* %tmp, %0** %tmp3
- %tmp4 = getelementptr inbounds %0* %tmp, i64 0, i32 1 ; <%0**> [#uses=1]
+ %tmp4 = getelementptr inbounds %0, %0* %tmp, i64 0, i32 1 ; <%0**> [#uses=1]
store %0* %tmp, %0** %tmp4
%tmp5 = call noalias i8* @_Znwm(i64 24) nounwind ; <i8*> [#uses=2]
- %tmp6 = getelementptr inbounds i8* %tmp5, i64 16 ; <i8*> [#uses=2]
+ %tmp6 = getelementptr inbounds i8, i8* %tmp5, i64 16 ; <i8*> [#uses=2]
%tmp7 = icmp eq i8* %tmp6, null ; <i1> [#uses=1]
br i1 %tmp7, label %bb10, label %bb8
@@ -39,19 +39,19 @@ bb8: ; preds = %bb
bb10: ; preds = %bb8, %bb
%tmp11 = bitcast i8* %tmp5 to %0* ; <%0*> [#uses=1]
call void @_ZNSt15_List_node_base4hookEPS_(%0* %tmp11, %0* %tmp) nounwind
- %tmp12 = load %0** %tmp3 ; <%0*> [#uses=3]
+ %tmp12 = load %0*, %0** %tmp3 ; <%0*> [#uses=3]
%tmp13 = icmp eq %0* %tmp12, %tmp ; <i1> [#uses=1]
br i1 %tmp13, label %bb14, label %bb16
bb14: ; preds = %bb10
- %tmp15 = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @1, i64 0, i64 0))
+ %tmp15 = call i32 @puts(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @1, i64 0, i64 0))
br label %bb35
bb16: ; preds = %bb16, %bb10
%tmp17 = phi i64 [ %tmp22, %bb16 ], [ 0, %bb10 ] ; <i64> [#uses=1]
%tmp18 = phi %0* [ %tmp20, %bb16 ], [ %tmp12, %bb10 ] ; <%0*> [#uses=1]
- %tmp19 = getelementptr inbounds %0* %tmp18, i64 0, i32 0 ; <%0**> [#uses=1]
- %tmp20 = load %0** %tmp19 ; <%0*> [#uses=2]
+ %tmp19 = getelementptr inbounds %0, %0* %tmp18, i64 0, i32 0 ; <%0**> [#uses=1]
+ %tmp20 = load %0*, %0** %tmp19 ; <%0*> [#uses=2]
%tmp21 = icmp eq %0* %tmp20, %tmp ; <i1> [#uses=1]
%tmp22 = add i64 %tmp17, 1 ; <i64> [#uses=2]
br i1 %tmp21, label %bb23, label %bb16
@@ -63,26 +63,26 @@ bb23: ; preds = %bb16
bb25: ; preds = %bb25, %bb23
%tmp26 = phi i64 [ %tmp31, %bb25 ], [ 0, %bb23 ] ; <i64> [#uses=1]
%tmp27 = phi %0* [ %tmp29, %bb25 ], [ %tmp12, %bb23 ] ; <%0*> [#uses=1]
- %tmp28 = getelementptr inbounds %0* %tmp27, i64 0, i32 0 ; <%0**> [#uses=1]
- %tmp29 = load %0** %tmp28 ; <%0*> [#uses=2]
+ %tmp28 = getelementptr inbounds %0, %0* %tmp27, i64 0, i32 0 ; <%0**> [#uses=1]
+ %tmp29 = load %0*, %0** %tmp28 ; <%0*> [#uses=2]
%tmp30 = icmp eq %0* %tmp29, %tmp ; <i1> [#uses=1]
%tmp31 = add i64 %tmp26, 1 ; <i64> [#uses=2]
br i1 %tmp30, label %bb32, label %bb25
bb32: ; preds = %bb25
%tmp33 = mul i64 %tmp31, %tmp24 ; <i64> [#uses=1]
- %tmp34 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @0, i64 0, i64 0), i64 %tmp33) nounwind
+ %tmp34 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @0, i64 0, i64 0), i64 %tmp33) nounwind
br label %bb35
bb35: ; preds = %bb32, %bb14
- %tmp36 = load %0** %tmp3 ; <%0*> [#uses=2]
+ %tmp36 = load %0*, %0** %tmp3 ; <%0*> [#uses=2]
%tmp37 = icmp eq %0* %tmp36, %tmp ; <i1> [#uses=1]
br i1 %tmp37, label %bb44, label %bb38
bb38: ; preds = %bb38, %bb35
%tmp39 = phi %0* [ %tmp41, %bb38 ], [ %tmp36, %bb35 ] ; <%0*> [#uses=2]
- %tmp40 = getelementptr inbounds %0* %tmp39, i64 0, i32 0 ; <%0**> [#uses=1]
- %tmp41 = load %0** %tmp40 ; <%0*> [#uses=2]
+ %tmp40 = getelementptr inbounds %0, %0* %tmp39, i64 0, i32 0 ; <%0**> [#uses=1]
+ %tmp41 = load %0*, %0** %tmp40 ; <%0*> [#uses=2]
%tmp42 = bitcast %0* %tmp39 to i8* ; <i8*> [#uses=1]
call void @_ZdlPv(i8* %tmp42) nounwind
%tmp43 = icmp eq %0* %tmp41, %tmp ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/lsr-quadratic-expand.ll b/test/CodeGen/X86/lsr-quadratic-expand.ll
index 2bbb47092904..29a8da2ef3aa 100644
--- a/test/CodeGen/X86/lsr-quadratic-expand.ll
+++ b/test/CodeGen/X86/lsr-quadratic-expand.ll
@@ -13,7 +13,7 @@ bb: ; preds = %bb, %entry
%z5 = add nsw i32 %z4, %z2
%z6 = trunc i32 %z5 to i16
call fastcc void @dw210x_op_rw(i16 zeroext %z6)
- %z7 = getelementptr i8* null, i64 %z
+ %z7 = getelementptr i8, i8* null, i64 %z
store i8 undef, i8* %z7, align 1
%z8 = add nsw i32 %z2, 1
br label %bb
diff --git a/test/CodeGen/X86/lsr-redundant-addressing.ll b/test/CodeGen/X86/lsr-redundant-addressing.ll
index cb0ac8b67a7e..31a1859e3b27 100644
--- a/test/CodeGen/X86/lsr-redundant-addressing.ll
+++ b/test/CodeGen/X86/lsr-redundant-addressing.ll
@@ -22,8 +22,8 @@ bb:
bb38: ; preds = %bb200, %bb
%tmp39 = phi i64 [ %tmp201, %bb200 ], [ 0, %bb ]
%tmp40 = sub i64 0, %tmp39
- %tmp47 = getelementptr [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 0
- %tmp34 = load i32* %tmp47, align 16
+ %tmp47 = getelementptr [5 x %0], [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 0
+ %tmp34 = load i32, i32* %tmp47, align 16
%tmp203 = icmp slt i32 %tmp34, 12
br i1 %tmp203, label %bb215, label %bb200
@@ -32,20 +32,20 @@ bb200: ; preds = %bb38
br label %bb38
bb215: ; preds = %bb38
- %tmp50 = getelementptr [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 1, i64 2
- %tmp49 = getelementptr [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 1, i64 1
- %tmp48 = getelementptr [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 1, i64 0
+ %tmp50 = getelementptr [5 x %0], [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 1, i64 2
+ %tmp49 = getelementptr [5 x %0], [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 1, i64 1
+ %tmp48 = getelementptr [5 x %0], [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 1, i64 0
%tmp216 = add nsw i32 %tmp34, 1
store i32 %tmp216, i32* %tmp47, align 16
%tmp217 = sext i32 %tmp216 to i64
- %tmp218 = getelementptr inbounds [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 0
- %tmp219 = load i32* %tmp218, align 8
+ %tmp218 = getelementptr inbounds [13 x %1], [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 0
+ %tmp219 = load i32, i32* %tmp218, align 8
store i32 %tmp219, i32* %tmp48, align 4
- %tmp220 = getelementptr inbounds [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 1
- %tmp221 = load i32* %tmp220, align 4
+ %tmp220 = getelementptr inbounds [13 x %1], [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 1
+ %tmp221 = load i32, i32* %tmp220, align 4
store i32 %tmp221, i32* %tmp49, align 4
- %tmp222 = getelementptr inbounds [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 2
- %tmp223 = load i32* %tmp222, align 8
+ %tmp222 = getelementptr inbounds [13 x %1], [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 2
+ %tmp223 = load i32, i32* %tmp222, align 8
store i32 %tmp223, i32* %tmp50, align 4
ret void
}
diff --git a/test/CodeGen/X86/lsr-reuse-trunc.ll b/test/CodeGen/X86/lsr-reuse-trunc.ll
index 276dab72f7cc..7f73b6b9d1ee 100644
--- a/test/CodeGen/X86/lsr-reuse-trunc.ll
+++ b/test/CodeGen/X86/lsr-reuse-trunc.ll
@@ -14,18 +14,18 @@
define void @vvfloorf(float* nocapture %y, float* nocapture %x, i32* nocapture %n) nounwind {
entry:
- %0 = load i32* %n, align 4
+ %0 = load i32, i32* %n, align 4
%1 = icmp sgt i32 %0, 0
br i1 %1, label %bb, label %return
bb:
%indvar = phi i64 [ %indvar.next, %bb ], [ 0, %entry ]
%tmp = shl i64 %indvar, 2
- %scevgep = getelementptr float* %y, i64 %tmp
+ %scevgep = getelementptr float, float* %y, i64 %tmp
%scevgep9 = bitcast float* %scevgep to <4 x float>*
- %scevgep10 = getelementptr float* %x, i64 %tmp
+ %scevgep10 = getelementptr float, float* %x, i64 %tmp
%scevgep1011 = bitcast float* %scevgep10 to <4 x float>*
- %2 = load <4 x float>* %scevgep1011, align 16
+ %2 = load <4 x float>, <4 x float>* %scevgep1011, align 16
%3 = bitcast <4 x float> %2 to <4 x i32>
%4 = and <4 x i32> %3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
%5 = bitcast <4 x i32> %4 to <4 x float>
@@ -48,7 +48,7 @@ bb:
store <4 x float> %19, <4 x float>* %scevgep9, align 16
%tmp12 = add i64 %tmp, 4
%tmp13 = trunc i64 %tmp12 to i32
- %20 = load i32* %n, align 4
+ %20 = load i32, i32* %n, align 4
%21 = icmp sgt i32 %20, %tmp13
%indvar.next = add i64 %indvar, 1
br i1 %21, label %bb, label %return
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
index 40c041ab6b09..dd1e40f6a1ec 100644
--- a/test/CodeGen/X86/lsr-reuse.ll
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -25,11 +25,11 @@ entry:
loop:
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
- %Ai = getelementptr inbounds double* %A, i64 %i
- %Bi = getelementptr inbounds double* %B, i64 %i
- %Ci = getelementptr inbounds double* %C, i64 %i
- %t1 = load double* %Bi
- %t2 = load double* %Ci
+ %Ai = getelementptr inbounds double, double* %A, i64 %i
+ %Bi = getelementptr inbounds double, double* %B, i64 %i
+ %Ci = getelementptr inbounds double, double* %C, i64 %i
+ %t1 = load double, double* %Bi
+ %t2 = load double, double* %Ci
%m = fmul double %t1, %t2
store double %m, double* %Ai
%i.next = add nsw i64 %i, 1
@@ -70,19 +70,19 @@ entry:
loop:
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
- %Ai = getelementptr inbounds double* %A, i64 %i
- %Bi = getelementptr inbounds double* %B, i64 %i
- %Ci = getelementptr inbounds double* %C, i64 %i
- %t1 = load double* %Bi
- %t2 = load double* %Ci
+ %Ai = getelementptr inbounds double, double* %A, i64 %i
+ %Bi = getelementptr inbounds double, double* %B, i64 %i
+ %Ci = getelementptr inbounds double, double* %C, i64 %i
+ %t1 = load double, double* %Bi
+ %t2 = load double, double* %Ci
%m = fmul double %t1, %t2
store double %m, double* %Ai
%j = add i64 %i, 256
- %Aj = getelementptr inbounds double* %A, i64 %j
- %Bj = getelementptr inbounds double* %B, i64 %j
- %Cj = getelementptr inbounds double* %C, i64 %j
- %t3 = load double* %Bj
- %t4 = load double* %Cj
+ %Aj = getelementptr inbounds double, double* %A, i64 %j
+ %Bj = getelementptr inbounds double, double* %B, i64 %j
+ %Cj = getelementptr inbounds double, double* %C, i64 %j
+ %t3 = load double, double* %Bj
+ %t4 = load double, double* %Cj
%o = fdiv double %t3, %t4
store double %o, double* %Aj
%i.next = add nsw i64 %i, 1
@@ -116,19 +116,19 @@ entry:
loop:
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
- %Ai = getelementptr inbounds double* %A, i64 %i
- %Bi = getelementptr inbounds double* %B, i64 %i
- %Ci = getelementptr inbounds double* %C, i64 %i
- %t1 = load double* %Bi
- %t2 = load double* %Ci
+ %Ai = getelementptr inbounds double, double* %A, i64 %i
+ %Bi = getelementptr inbounds double, double* %B, i64 %i
+ %Ci = getelementptr inbounds double, double* %C, i64 %i
+ %t1 = load double, double* %Bi
+ %t2 = load double, double* %Ci
%m = fmul double %t1, %t2
store double %m, double* %Ai
%j = sub i64 %i, 256
- %Aj = getelementptr inbounds double* %A, i64 %j
- %Bj = getelementptr inbounds double* %B, i64 %j
- %Cj = getelementptr inbounds double* %C, i64 %j
- %t3 = load double* %Bj
- %t4 = load double* %Cj
+ %Aj = getelementptr inbounds double, double* %A, i64 %j
+ %Bj = getelementptr inbounds double, double* %B, i64 %j
+ %Cj = getelementptr inbounds double, double* %C, i64 %j
+ %t3 = load double, double* %Bj
+ %t4 = load double, double* %Cj
%o = fdiv double %t3, %t4
store double %o, double* %Aj
%i.next = add nsw i64 %i, 1
@@ -162,19 +162,19 @@ entry:
loop:
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
%k = add i64 %i, 256
- %Ak = getelementptr inbounds double* %A, i64 %k
- %Bk = getelementptr inbounds double* %B, i64 %k
- %Ck = getelementptr inbounds double* %C, i64 %k
- %t1 = load double* %Bk
- %t2 = load double* %Ck
+ %Ak = getelementptr inbounds double, double* %A, i64 %k
+ %Bk = getelementptr inbounds double, double* %B, i64 %k
+ %Ck = getelementptr inbounds double, double* %C, i64 %k
+ %t1 = load double, double* %Bk
+ %t2 = load double, double* %Ck
%m = fmul double %t1, %t2
store double %m, double* %Ak
%j = sub i64 %i, 256
- %Aj = getelementptr inbounds double* %A, i64 %j
- %Bj = getelementptr inbounds double* %B, i64 %j
- %Cj = getelementptr inbounds double* %C, i64 %j
- %t3 = load double* %Bj
- %t4 = load double* %Cj
+ %Aj = getelementptr inbounds double, double* %A, i64 %j
+ %Bj = getelementptr inbounds double, double* %B, i64 %j
+ %Cj = getelementptr inbounds double, double* %C, i64 %j
+ %t3 = load double, double* %Bj
+ %t4 = load double, double* %Cj
%o = fdiv double %t3, %t4
store double %o, double* %Aj
%i.next = add nsw i64 %i, 1
@@ -205,11 +205,11 @@ entry:
loop:
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
- %Ai = getelementptr inbounds double* %A, i64 %i
- %Bi = getelementptr inbounds double* %B, i64 %i
- %Ci = getelementptr inbounds double* %C, i64 %i
- %t1 = load double* %Bi
- %t2 = load double* %Ci
+ %Ai = getelementptr inbounds double, double* %A, i64 %i
+ %Bi = getelementptr inbounds double, double* %B, i64 %i
+ %Ci = getelementptr inbounds double, double* %C, i64 %i
+ %t1 = load double, double* %Bi
+ %t2 = load double, double* %Ci
%m = fmul double %t1, %t2
store double %m, double* %Ai
%i.next = add nsw i64 %i, 1
@@ -240,11 +240,11 @@ entry:
loop:
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
- %Ai = getelementptr inbounds double* %A, i64 %i
- %Bi = getelementptr inbounds double* %B, i64 %i
- %Ci = getelementptr inbounds double* %C, i64 %i
- %t1 = load double* %Bi
- %t2 = load double* %Ci
+ %Ai = getelementptr inbounds double, double* %A, i64 %i
+ %Bi = getelementptr inbounds double, double* %B, i64 %i
+ %Ci = getelementptr inbounds double, double* %C, i64 %i
+ %t1 = load double, double* %Bi
+ %t2 = load double, double* %Ci
%m = fmul double %t1, %t2
store double %m, double* %Ai
%i.next = add nsw i64 %i, 1
@@ -280,20 +280,20 @@ entry:
loop:
%i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
%i5 = add i64 %i, 5
- %Ai = getelementptr double* %A, i64 %i5
- %t2 = load double* %Ai
- %Bi = getelementptr double* %B, i64 %i5
- %t4 = load double* %Bi
+ %Ai = getelementptr double, double* %A, i64 %i5
+ %t2 = load double, double* %Ai
+ %Bi = getelementptr double, double* %B, i64 %i5
+ %t4 = load double, double* %Bi
%t5 = fadd double %t2, %t4
- %Ci = getelementptr double* %C, i64 %i5
+ %Ci = getelementptr double, double* %C, i64 %i5
store double %t5, double* %Ci
%i10 = add i64 %i, 10
- %Ai10 = getelementptr double* %A, i64 %i10
- %t9 = load double* %Ai10
- %Bi10 = getelementptr double* %B, i64 %i10
- %t11 = load double* %Bi10
+ %Ai10 = getelementptr double, double* %A, i64 %i10
+ %t9 = load double, double* %Ai10
+ %Bi10 = getelementptr double, double* %B, i64 %i10
+ %t11 = load double, double* %Bi10
%t12 = fsub double %t9, %t11
- %Ci10 = getelementptr double* %C, i64 %i10
+ %Ci10 = getelementptr double, double* %C, i64 %i10
store double %t12, double* %Ci10
%i.next = add i64 %i, 1
%exitcond = icmp eq i64 %i.next, 5000
@@ -327,20 +327,20 @@ entry:
loop:
%i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
%i5 = add i64 %i, 5
- %Ai = getelementptr double* %A, i64 %i5
- %t2 = load double* %Ai
- %Bi = getelementptr double* %B, i64 %i5
- %t4 = load double* %Bi
+ %Ai = getelementptr double, double* %A, i64 %i5
+ %t2 = load double, double* %Ai
+ %Bi = getelementptr double, double* %B, i64 %i5
+ %t4 = load double, double* %Bi
%t5 = fadd double %t2, %t4
- %Ci = getelementptr double* %C, i64 %i5
+ %Ci = getelementptr double, double* %C, i64 %i5
store double %t5, double* %Ci
%i10 = add i64 %i, 10
- %Ai10 = getelementptr double* %A, i64 %i10
- %t9 = load double* %Ai10
- %Bi10 = getelementptr double* %B, i64 %i10
- %t11 = load double* %Bi10
+ %Ai10 = getelementptr double, double* %A, i64 %i10
+ %t9 = load double, double* %Ai10
+ %Bi10 = getelementptr double, double* %B, i64 %i10
+ %t11 = load double, double* %Bi10
%t12 = fsub double %t9, %t11
- %Ci10 = getelementptr double* %C, i64 %i10
+ %Ci10 = getelementptr double, double* %C, i64 %i10
store double %t12, double* %Ci10
%i.next = add i64 %i, 1
%exitcond = icmp eq i64 %i.next, %n
@@ -372,11 +372,11 @@ entry:
loop:
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
call void @use(i64 %i)
- %Ai = getelementptr inbounds double* %A, i64 %i
- %Bi = getelementptr inbounds double* %B, i64 %i
- %Ci = getelementptr inbounds double* %C, i64 %i
- %t1 = load double* %Bi
- %t2 = load double* %Ci
+ %Ai = getelementptr inbounds double, double* %A, i64 %i
+ %Bi = getelementptr inbounds double, double* %B, i64 %i
+ %Ci = getelementptr inbounds double, double* %C, i64 %i
+ %t1 = load double, double* %Bi
+ %t2 = load double, double* %Ci
%m = fmul double %t1, %t2
store double %m, double* %Ai
%i.next = add nsw i64 %i, 1
@@ -413,8 +413,8 @@ bb.nph14: ; preds = %entry
bb: ; preds = %bb3, %bb.nph14
%indvar16 = phi i64 [ 0, %bb.nph14 ], [ %indvar.next17, %bb3 ] ; <i64> [#uses=3]
%s.113 = phi i32 [ 0, %bb.nph14 ], [ %s.0.lcssa, %bb3 ] ; <i32> [#uses=2]
- %scevgep2526 = getelementptr [123123 x %struct.anon]* @bars, i64 0, i64 %indvar16, i32 0 ; <i32*> [#uses=1]
- %1 = load i32* %scevgep2526, align 4 ; <i32> [#uses=2]
+ %scevgep2526 = getelementptr [123123 x %struct.anon], [123123 x %struct.anon]* @bars, i64 0, i64 %indvar16, i32 0 ; <i32*> [#uses=1]
+ %1 = load i32, i32* %scevgep2526, align 4 ; <i32> [#uses=2]
%2 = icmp sgt i32 %1, 0 ; <i1> [#uses=1]
br i1 %2, label %bb.nph, label %bb3
@@ -425,8 +425,8 @@ bb.nph: ; preds = %bb
bb1: ; preds = %bb.nph, %bb1
%indvar = phi i64 [ 0, %bb.nph ], [ %tmp19, %bb1 ] ; <i64> [#uses=2]
%s.07 = phi i32 [ %s.113, %bb.nph ], [ %4, %bb1 ] ; <i32> [#uses=1]
- %c.08 = getelementptr [123123 x %struct.anon]* @bars, i64 0, i64 %indvar16, i32 1, i64 %indvar ; <i32*> [#uses=1]
- %3 = load i32* %c.08, align 4 ; <i32> [#uses=1]
+ %c.08 = getelementptr [123123 x %struct.anon], [123123 x %struct.anon]* @bars, i64 0, i64 %indvar16, i32 1, i64 %indvar ; <i32*> [#uses=1]
+ %3 = load i32, i32* %c.08, align 4 ; <i32> [#uses=1]
%4 = add nsw i32 %3, %s.07 ; <i32> [#uses=2]
%tmp19 = add i64 %indvar, 1 ; <i64> [#uses=2]
%5 = icmp sgt i64 %tmp23, %tmp19 ; <i1> [#uses=1]
@@ -493,7 +493,7 @@ define void @test(float* %arg, i64 %arg1, float* nocapture %arg2, float* nocaptu
bb:
%t = alloca float, align 4 ; <float*> [#uses=3]
%t7 = alloca float, align 4 ; <float*> [#uses=2]
- %t8 = load float* %arg3 ; <float> [#uses=8]
+ %t8 = load float, float* %arg3 ; <float> [#uses=8]
%t9 = ptrtoint float* %arg to i64 ; <i64> [#uses=1]
%t10 = ptrtoint float* %arg4 to i64 ; <i64> [#uses=1]
%t11 = xor i64 %t10, %t9 ; <i64> [#uses=1]
@@ -507,15 +507,15 @@ bb:
br i1 %t18, label %bb19, label %bb213
bb19: ; preds = %bb
- %t20 = load float* %arg2 ; <float> [#uses=1]
+ %t20 = load float, float* %arg2 ; <float> [#uses=1]
br label %bb21
bb21: ; preds = %bb32, %bb19
%t22 = phi i64 [ %t36, %bb32 ], [ 0, %bb19 ] ; <i64> [#uses=21]
%t23 = phi float [ %t35, %bb32 ], [ %t20, %bb19 ] ; <float> [#uses=6]
%t24 = sub i64 %arg6, %t22 ; <i64> [#uses=4]
- %t25 = getelementptr float* %arg4, i64 %t22 ; <float*> [#uses=4]
- %t26 = getelementptr float* %arg, i64 %t22 ; <float*> [#uses=3]
+ %t25 = getelementptr float, float* %arg4, i64 %t22 ; <float*> [#uses=4]
+ %t26 = getelementptr float, float* %arg, i64 %t22 ; <float*> [#uses=3]
%t27 = icmp sgt i64 %t24, 0 ; <i1> [#uses=1]
br i1 %t27, label %bb28, label %bb37
@@ -526,7 +526,7 @@ bb28: ; preds = %bb21
br i1 %t31, label %bb37, label %bb32
bb32: ; preds = %bb28
- %t33 = load float* %t26 ; <float> [#uses=1]
+ %t33 = load float, float* %t26 ; <float> [#uses=1]
%t34 = fmul float %t23, %t33 ; <float> [#uses=1]
store float %t34, float* %t25
%t35 = fadd float %t23, %t8 ; <float> [#uses=1]
@@ -561,8 +561,8 @@ bb57: ; preds = %bb37
br i1 %t56, label %bb61, label %bb112
bb58: ; preds = %bb68
- %t59 = getelementptr float* %arg, i64 %t78 ; <float*> [#uses=1]
- %t60 = getelementptr float* %arg4, i64 %t78 ; <float*> [#uses=1]
+ %t59 = getelementptr float, float* %arg, i64 %t78 ; <float*> [#uses=1]
+ %t60 = getelementptr float, float* %arg4, i64 %t78 ; <float*> [#uses=1]
br label %bb112
bb61: ; preds = %bb57
@@ -582,32 +582,32 @@ bb68: ; preds = %bb68, %bb61
%t73 = phi <4 x float> [ %t52, %bb61 ], [ %t109, %bb68 ] ; <<4 x float>> [#uses=2]
%t74 = shl i64 %t69, 4 ; <i64> [#uses=5]
%t75 = add i64 %t22, %t74 ; <i64> [#uses=2]
- %t76 = getelementptr float* %arg, i64 %t75 ; <float*> [#uses=1]
+ %t76 = getelementptr float, float* %arg, i64 %t75 ; <float*> [#uses=1]
%t77 = bitcast float* %t76 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t78 = add i64 %t62, %t74 ; <i64> [#uses=2]
%t79 = add i64 %t63, %t74 ; <i64> [#uses=2]
- %t80 = getelementptr float* %arg, i64 %t79 ; <float*> [#uses=1]
+ %t80 = getelementptr float, float* %arg, i64 %t79 ; <float*> [#uses=1]
%t81 = bitcast float* %t80 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t82 = add i64 %t64, %t74 ; <i64> [#uses=2]
- %t83 = getelementptr float* %arg, i64 %t82 ; <float*> [#uses=1]
+ %t83 = getelementptr float, float* %arg, i64 %t82 ; <float*> [#uses=1]
%t84 = bitcast float* %t83 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t85 = add i64 %t65, %t74 ; <i64> [#uses=2]
- %t86 = getelementptr float* %arg, i64 %t85 ; <float*> [#uses=1]
+ %t86 = getelementptr float, float* %arg, i64 %t85 ; <float*> [#uses=1]
%t87 = bitcast float* %t86 to <4 x float>* ; <<4 x float>*> [#uses=1]
- %t88 = getelementptr float* %arg4, i64 %t75 ; <float*> [#uses=1]
+ %t88 = getelementptr float, float* %arg4, i64 %t75 ; <float*> [#uses=1]
%t89 = bitcast float* %t88 to <4 x float>* ; <<4 x float>*> [#uses=1]
- %t90 = getelementptr float* %arg4, i64 %t79 ; <float*> [#uses=1]
+ %t90 = getelementptr float, float* %arg4, i64 %t79 ; <float*> [#uses=1]
%t91 = bitcast float* %t90 to <4 x float>* ; <<4 x float>*> [#uses=1]
- %t92 = getelementptr float* %arg4, i64 %t82 ; <float*> [#uses=1]
+ %t92 = getelementptr float, float* %arg4, i64 %t82 ; <float*> [#uses=1]
%t93 = bitcast float* %t92 to <4 x float>* ; <<4 x float>*> [#uses=1]
- %t94 = getelementptr float* %arg4, i64 %t85 ; <float*> [#uses=1]
+ %t94 = getelementptr float, float* %arg4, i64 %t85 ; <float*> [#uses=1]
%t95 = bitcast float* %t94 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t96 = mul i64 %t69, -16 ; <i64> [#uses=1]
%t97 = add i64 %t67, %t96 ; <i64> [#uses=2]
- %t98 = load <4 x float>* %t77 ; <<4 x float>> [#uses=1]
- %t99 = load <4 x float>* %t81 ; <<4 x float>> [#uses=1]
- %t100 = load <4 x float>* %t84 ; <<4 x float>> [#uses=1]
- %t101 = load <4 x float>* %t87 ; <<4 x float>> [#uses=1]
+ %t98 = load <4 x float>, <4 x float>* %t77 ; <<4 x float>> [#uses=1]
+ %t99 = load <4 x float>, <4 x float>* %t81 ; <<4 x float>> [#uses=1]
+ %t100 = load <4 x float>, <4 x float>* %t84 ; <<4 x float>> [#uses=1]
+ %t101 = load <4 x float>, <4 x float>* %t87 ; <<4 x float>> [#uses=1]
%t102 = fmul <4 x float> %t98, %t71 ; <<4 x float>> [#uses=1]
%t103 = fadd <4 x float> %t71, %t55 ; <<4 x float>> [#uses=2]
%t104 = fmul <4 x float> %t99, %t73 ; <<4 x float>> [#uses=1]
@@ -636,15 +636,15 @@ bb118: ; preds = %bb37
br i1 %t56, label %bb122, label %bb194
bb119: ; preds = %bb137
- %t120 = getelementptr float* %arg, i64 %t145 ; <float*> [#uses=1]
- %t121 = getelementptr float* %arg4, i64 %t145 ; <float*> [#uses=1]
+ %t120 = getelementptr float, float* %arg, i64 %t145 ; <float*> [#uses=1]
+ %t121 = getelementptr float, float* %arg4, i64 %t145 ; <float*> [#uses=1]
br label %bb194
bb122: ; preds = %bb118
%t123 = add i64 %t22, -1 ; <i64> [#uses=1]
- %t124 = getelementptr inbounds float* %arg, i64 %t123 ; <float*> [#uses=1]
+ %t124 = getelementptr inbounds float, float* %arg, i64 %t123 ; <float*> [#uses=1]
%t125 = bitcast float* %t124 to <4 x float>* ; <<4 x float>*> [#uses=1]
- %t126 = load <4 x float>* %t125 ; <<4 x float>> [#uses=1]
+ %t126 = load <4 x float>, <4 x float>* %t125 ; <<4 x float>> [#uses=1]
%t127 = add i64 %t22, 16 ; <i64> [#uses=1]
%t128 = add i64 %t22, 3 ; <i64> [#uses=1]
%t129 = add i64 %t22, 7 ; <i64> [#uses=1]
@@ -667,35 +667,35 @@ bb137: ; preds = %bb137, %bb122
%t144 = shl i64 %t138, 4 ; <i64> [#uses=9]
%t145 = add i64 %t127, %t144 ; <i64> [#uses=2]
%t146 = add i64 %t128, %t144 ; <i64> [#uses=1]
- %t147 = getelementptr float* %arg, i64 %t146 ; <float*> [#uses=1]
+ %t147 = getelementptr float, float* %arg, i64 %t146 ; <float*> [#uses=1]
%t148 = bitcast float* %t147 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t149 = add i64 %t129, %t144 ; <i64> [#uses=1]
- %t150 = getelementptr float* %arg, i64 %t149 ; <float*> [#uses=1]
+ %t150 = getelementptr float, float* %arg, i64 %t149 ; <float*> [#uses=1]
%t151 = bitcast float* %t150 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t152 = add i64 %t130, %t144 ; <i64> [#uses=1]
- %t153 = getelementptr float* %arg, i64 %t152 ; <float*> [#uses=1]
+ %t153 = getelementptr float, float* %arg, i64 %t152 ; <float*> [#uses=1]
%t154 = bitcast float* %t153 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t155 = add i64 %t131, %t144 ; <i64> [#uses=1]
- %t156 = getelementptr float* %arg, i64 %t155 ; <float*> [#uses=1]
+ %t156 = getelementptr float, float* %arg, i64 %t155 ; <float*> [#uses=1]
%t157 = bitcast float* %t156 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t158 = add i64 %t22, %t144 ; <i64> [#uses=1]
- %t159 = getelementptr float* %arg4, i64 %t158 ; <float*> [#uses=1]
+ %t159 = getelementptr float, float* %arg4, i64 %t158 ; <float*> [#uses=1]
%t160 = bitcast float* %t159 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t161 = add i64 %t132, %t144 ; <i64> [#uses=1]
- %t162 = getelementptr float* %arg4, i64 %t161 ; <float*> [#uses=1]
+ %t162 = getelementptr float, float* %arg4, i64 %t161 ; <float*> [#uses=1]
%t163 = bitcast float* %t162 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t164 = add i64 %t133, %t144 ; <i64> [#uses=1]
- %t165 = getelementptr float* %arg4, i64 %t164 ; <float*> [#uses=1]
+ %t165 = getelementptr float, float* %arg4, i64 %t164 ; <float*> [#uses=1]
%t166 = bitcast float* %t165 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t167 = add i64 %t134, %t144 ; <i64> [#uses=1]
- %t168 = getelementptr float* %arg4, i64 %t167 ; <float*> [#uses=1]
+ %t168 = getelementptr float, float* %arg4, i64 %t167 ; <float*> [#uses=1]
%t169 = bitcast float* %t168 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t170 = mul i64 %t138, -16 ; <i64> [#uses=1]
%t171 = add i64 %t136, %t170 ; <i64> [#uses=2]
- %t172 = load <4 x float>* %t148 ; <<4 x float>> [#uses=2]
- %t173 = load <4 x float>* %t151 ; <<4 x float>> [#uses=2]
- %t174 = load <4 x float>* %t154 ; <<4 x float>> [#uses=2]
- %t175 = load <4 x float>* %t157 ; <<4 x float>> [#uses=2]
+ %t172 = load <4 x float>, <4 x float>* %t148 ; <<4 x float>> [#uses=2]
+ %t173 = load <4 x float>, <4 x float>* %t151 ; <<4 x float>> [#uses=2]
+ %t174 = load <4 x float>, <4 x float>* %t154 ; <<4 x float>> [#uses=2]
+ %t175 = load <4 x float>, <4 x float>* %t157 ; <<4 x float>> [#uses=2]
%t176 = shufflevector <4 x float> %t143, <4 x float> %t172, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
%t177 = shufflevector <4 x float> %t176, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
%t178 = shufflevector <4 x float> %t172, <4 x float> %t173, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
@@ -732,9 +732,9 @@ bb194: ; preds = %bb119, %bb118, %bb1
bb201: ; preds = %bb201, %bb194
%t202 = phi i64 [ %t209, %bb201 ], [ 0, %bb194 ] ; <i64> [#uses=3]
%t203 = phi float [ %t208, %bb201 ], [ %t199, %bb194 ] ; <float> [#uses=2]
- %t204 = getelementptr float* %t198, i64 %t202 ; <float*> [#uses=1]
- %t205 = getelementptr float* %t197, i64 %t202 ; <float*> [#uses=1]
- %t206 = load float* %t204 ; <float> [#uses=1]
+ %t204 = getelementptr float, float* %t198, i64 %t202 ; <float*> [#uses=1]
+ %t205 = getelementptr float, float* %t197, i64 %t202 ; <float*> [#uses=1]
+ %t206 = load float, float* %t204 ; <float> [#uses=1]
%t207 = fmul float %t203, %t206 ; <float> [#uses=1]
store float %t207, float* %t205
%t208 = fadd float %t203, %t8 ; <float> [#uses=2]
diff --git a/test/CodeGen/X86/lsr-static-addr.ll b/test/CodeGen/X86/lsr-static-addr.ll
index 1bac790f57f9..1765ed7871d8 100644
--- a/test/CodeGen/X86/lsr-static-addr.ll
+++ b/test/CodeGen/X86/lsr-static-addr.ll
@@ -29,8 +29,8 @@ entry:
for.body:
%i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
- %arrayidx = getelementptr [0 x double]* @A, i64 0, i64 %i.06
- %tmp3 = load double* %arrayidx, align 8
+ %arrayidx = getelementptr [0 x double], [0 x double]* @A, i64 0, i64 %i.06
+ %tmp3 = load double, double* %arrayidx, align 8
%mul = fmul double %tmp3, 2.300000e+00
store double %mul, double* %arrayidx, align 8
%inc = add nsw i64 %i.06, 1
diff --git a/test/CodeGen/X86/lsr-wrap.ll b/test/CodeGen/X86/lsr-wrap.ll
index d605e4f14fe4..adf954477791 100644
--- a/test/CodeGen/X86/lsr-wrap.ll
+++ b/test/CodeGen/X86/lsr-wrap.ll
@@ -20,7 +20,7 @@ bb: ; preds = %bb, %entry
%indvar = phi i16 [ 0, %entry ], [ %indvar.next, %bb ] ; <i16> [#uses=2]
%tmp = sub i16 0, %indvar ; <i16> [#uses=1]
%tmp27 = trunc i16 %tmp to i8 ; <i8> [#uses=1]
- %tmp1 = load i32* @g_19, align 4 ; <i32> [#uses=2]
+ %tmp1 = load i32, i32* @g_19, align 4 ; <i32> [#uses=2]
%tmp2 = add i32 %tmp1, 1 ; <i32> [#uses=1]
store i32 %tmp2, i32* @g_19, align 4
%tmp3 = trunc i32 %tmp1 to i8 ; <i8> [#uses=1]
diff --git a/test/CodeGen/X86/lzcnt-tzcnt.ll b/test/CodeGen/X86/lzcnt-tzcnt.ll
index e98764a0d787..aa9ae2b7b100 100644
--- a/test/CodeGen/X86/lzcnt-tzcnt.ll
+++ b/test/CodeGen/X86/lzcnt-tzcnt.ll
@@ -106,7 +106,7 @@ define i64 @test9_ctlz(i64 %v) {
define i16 @test10_ctlz(i16* %ptr) {
- %v = load i16* %ptr
+ %v = load i16, i16* %ptr
%cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true)
%tobool = icmp eq i16 %v, 0
%cond = select i1 %tobool, i16 16, i16 %cnt
@@ -119,7 +119,7 @@ define i16 @test10_ctlz(i16* %ptr) {
define i32 @test11_ctlz(i32* %ptr) {
- %v = load i32* %ptr
+ %v = load i32, i32* %ptr
%cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true)
%tobool = icmp eq i32 %v, 0
%cond = select i1 %tobool, i32 32, i32 %cnt
@@ -132,7 +132,7 @@ define i32 @test11_ctlz(i32* %ptr) {
define i64 @test12_ctlz(i64* %ptr) {
- %v = load i64* %ptr
+ %v = load i64, i64* %ptr
%cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
%tobool = icmp eq i64 %v, 0
%cond = select i1 %tobool, i64 64, i64 %cnt
@@ -145,7 +145,7 @@ define i64 @test12_ctlz(i64* %ptr) {
define i16 @test13_ctlz(i16* %ptr) {
- %v = load i16* %ptr
+ %v = load i16, i16* %ptr
%cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true)
%tobool = icmp eq i16 0, %v
%cond = select i1 %tobool, i16 16, i16 %cnt
@@ -158,7 +158,7 @@ define i16 @test13_ctlz(i16* %ptr) {
define i32 @test14_ctlz(i32* %ptr) {
- %v = load i32* %ptr
+ %v = load i32, i32* %ptr
%cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true)
%tobool = icmp eq i32 0, %v
%cond = select i1 %tobool, i32 32, i32 %cnt
@@ -171,7 +171,7 @@ define i32 @test14_ctlz(i32* %ptr) {
define i64 @test15_ctlz(i64* %ptr) {
- %v = load i64* %ptr
+ %v = load i64, i64* %ptr
%cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
%tobool = icmp eq i64 0, %v
%cond = select i1 %tobool, i64 64, i64 %cnt
@@ -184,7 +184,7 @@ define i64 @test15_ctlz(i64* %ptr) {
define i16 @test16_ctlz(i16* %ptr) {
- %v = load i16* %ptr
+ %v = load i16, i16* %ptr
%cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true)
%tobool = icmp eq i16 0, %v
%cond = select i1 %tobool, i16 %cnt, i16 16
@@ -197,7 +197,7 @@ define i16 @test16_ctlz(i16* %ptr) {
define i32 @test17_ctlz(i32* %ptr) {
- %v = load i32* %ptr
+ %v = load i32, i32* %ptr
%cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true)
%tobool = icmp eq i32 0, %v
%cond = select i1 %tobool, i32 %cnt, i32 32
@@ -210,7 +210,7 @@ define i32 @test17_ctlz(i32* %ptr) {
define i64 @test18_ctlz(i64* %ptr) {
- %v = load i64* %ptr
+ %v = load i64, i64* %ptr
%cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
%tobool = icmp eq i64 0, %v
%cond = select i1 %tobool, i64 %cnt, i64 64
@@ -322,7 +322,7 @@ define i64 @test9_cttz(i64 %v) {
define i16 @test10_cttz(i16* %ptr) {
- %v = load i16* %ptr
+ %v = load i16, i16* %ptr
%cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
%tobool = icmp eq i16 %v, 0
%cond = select i1 %tobool, i16 16, i16 %cnt
@@ -335,7 +335,7 @@ define i16 @test10_cttz(i16* %ptr) {
define i32 @test11_cttz(i32* %ptr) {
- %v = load i32* %ptr
+ %v = load i32, i32* %ptr
%cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
%tobool = icmp eq i32 %v, 0
%cond = select i1 %tobool, i32 32, i32 %cnt
@@ -348,7 +348,7 @@ define i32 @test11_cttz(i32* %ptr) {
define i64 @test12_cttz(i64* %ptr) {
- %v = load i64* %ptr
+ %v = load i64, i64* %ptr
%cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
%tobool = icmp eq i64 %v, 0
%cond = select i1 %tobool, i64 64, i64 %cnt
@@ -361,7 +361,7 @@ define i64 @test12_cttz(i64* %ptr) {
define i16 @test13_cttz(i16* %ptr) {
- %v = load i16* %ptr
+ %v = load i16, i16* %ptr
%cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
%tobool = icmp eq i16 0, %v
%cond = select i1 %tobool, i16 16, i16 %cnt
@@ -374,7 +374,7 @@ define i16 @test13_cttz(i16* %ptr) {
define i32 @test14_cttz(i32* %ptr) {
- %v = load i32* %ptr
+ %v = load i32, i32* %ptr
%cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
%tobool = icmp eq i32 0, %v
%cond = select i1 %tobool, i32 32, i32 %cnt
@@ -387,7 +387,7 @@ define i32 @test14_cttz(i32* %ptr) {
define i64 @test15_cttz(i64* %ptr) {
- %v = load i64* %ptr
+ %v = load i64, i64* %ptr
%cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
%tobool = icmp eq i64 0, %v
%cond = select i1 %tobool, i64 64, i64 %cnt
@@ -400,7 +400,7 @@ define i64 @test15_cttz(i64* %ptr) {
define i16 @test16_cttz(i16* %ptr) {
- %v = load i16* %ptr
+ %v = load i16, i16* %ptr
%cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
%tobool = icmp eq i16 0, %v
%cond = select i1 %tobool, i16 %cnt, i16 16
@@ -413,7 +413,7 @@ define i16 @test16_cttz(i16* %ptr) {
define i32 @test17_cttz(i32* %ptr) {
- %v = load i32* %ptr
+ %v = load i32, i32* %ptr
%cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
%tobool = icmp eq i32 0, %v
%cond = select i1 %tobool, i32 %cnt, i32 32
@@ -426,7 +426,7 @@ define i32 @test17_cttz(i32* %ptr) {
define i64 @test18_cttz(i64* %ptr) {
- %v = load i64* %ptr
+ %v = load i64, i64* %ptr
%cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
%tobool = icmp eq i64 0, %v
%cond = select i1 %tobool, i64 %cnt, i64 64
diff --git a/test/CodeGen/X86/machine-cse.ll b/test/CodeGen/X86/machine-cse.ll
index 409147b1d1f1..c6876d29dfc2 100644
--- a/test/CodeGen/X86/machine-cse.ll
+++ b/test/CodeGen/X86/machine-cse.ll
@@ -11,7 +11,7 @@ entry:
; CHECK-LABEL: t:
; CHECK: leaq (%rax,%rax,4)
%0 = zext i32 %base to i64
- %1 = getelementptr inbounds %struct.s2* null, i64 %0
+ %1 = getelementptr inbounds %struct.s2, %struct.s2* null, i64 %0
br i1 undef, label %bb1, label %bb2
bb1:
@@ -19,7 +19,7 @@ bb1:
; CHECK-NOT: shlq $9
; CHECK-NOT: leaq
; CHECK: call
- %2 = getelementptr inbounds %struct.s2* null, i64 %0, i32 0
+ %2 = getelementptr inbounds %struct.s2, %struct.s2* null, i64 %0, i32 0
call void @bar(i32* %2) nounwind
unreachable
@@ -62,7 +62,7 @@ if.end34: ; preds = %sw.bb
; CHECK: %if.end34
; CHECK: leal
; CHECK-NOT: imull
- tail call void (...)* @printf(i32 %test_case, i32 %mul20) nounwind
+ tail call void (...) @printf(i32 %test_case, i32 %mul20) nounwind
%tmp = mul i32 %scale, %test_case
%tmp752 = mul i32 %tmp, 3
%tmp753 = zext i32 %tmp752 to i64
@@ -126,7 +126,7 @@ do.body:
br i1 %cmp3, label %return, label %do.cond
do.cond:
- %incdec.ptr = getelementptr inbounds i8* %p.0, i64 1
+ %incdec.ptr = getelementptr inbounds i8, i8* %p.0, i64 1
%dec = add i64 %n.addr.0, -1
%cmp6 = icmp eq i64 %dec, 0
br i1 %cmp6, label %return, label %do.body
@@ -147,7 +147,7 @@ define i32 @t2() {
br i1 %c, label %a, label %b
a:
- %l = load i32* @t2_global
+ %l = load i32, i32* @t2_global
ret i32 %l
b:
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index 9ddc84708d5b..8c0a4d4f1752 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -15,17 +15,17 @@ entry:
loop:
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
%indvar.i8 = and i64 %indvar, 255
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%indvar.i24 = and i64 %indvar, 16777215
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fmul double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = add i64 %indvar, 1
@@ -48,17 +48,17 @@ entry:
loop:
%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
%indvar.i8 = and i64 %indvar, 255
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%indvar.i24 = and i64 %indvar, 16777215
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fmul double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = sub i64 %indvar, 1
@@ -82,18 +82,18 @@ loop:
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
%s0 = shl i64 %indvar, 8
%indvar.i8 = ashr i64 %s0, 8
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%s1 = shl i64 %indvar, 24
%indvar.i24 = ashr i64 %s1, 24
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fmul double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = add i64 %indvar, 1
@@ -117,18 +117,18 @@ loop:
%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
%s0 = shl i64 %indvar, 8
%indvar.i8 = ashr i64 %s0, 8
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%s1 = shl i64 %indvar, 24
%indvar.i24 = ashr i64 %s1, 24
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fmul double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = sub i64 %indvar, 1
@@ -151,17 +151,17 @@ entry:
loop:
%indvar = phi i64 [ 18446744073709551615, %entry ], [ %indvar.next, %loop ]
%indvar.i8 = and i64 %indvar, 255
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%indvar.i24 = and i64 %indvar, 16777215
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fmul double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = add i64 %indvar, 1
@@ -184,17 +184,17 @@ entry:
loop:
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
%indvar.i8 = and i64 %indvar, 255
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%indvar.i24 = and i64 %indvar, 16777215
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fdiv double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = sub i64 %indvar, 1
@@ -218,18 +218,18 @@ loop:
%indvar = phi i64 [ 18446744073709551615, %entry ], [ %indvar.next, %loop ]
%s0 = shl i64 %indvar, 8
%indvar.i8 = ashr i64 %s0, 8
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%s1 = shl i64 %indvar, 24
%indvar.i24 = ashr i64 %s1, 24
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fdiv double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = add i64 %indvar, 1
@@ -253,18 +253,18 @@ loop:
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
%s0 = shl i64 %indvar, 8
%indvar.i8 = ashr i64 %s0, 8
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%s1 = shl i64 %indvar, 24
%indvar.i24 = ashr i64 %s1, 24
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fdiv double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = sub i64 %indvar, 1
diff --git a/test/CodeGen/X86/masked-iv-unsafe.ll b/test/CodeGen/X86/masked-iv-unsafe.ll
index f23c02019548..974a1cfb90d1 100644
--- a/test/CodeGen/X86/masked-iv-unsafe.ll
+++ b/test/CodeGen/X86/masked-iv-unsafe.ll
@@ -13,17 +13,17 @@ entry:
loop:
%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
%indvar.i8 = and i64 %indvar, 255
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%indvar.i24 = and i64 %indvar, 16777215
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fmul double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = add i64 %indvar, 1
@@ -41,17 +41,17 @@ entry:
loop:
%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
%indvar.i8 = and i64 %indvar, 255
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%indvar.i24 = and i64 %indvar, 16777215
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fmul double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = sub i64 %indvar, 1
@@ -70,18 +70,18 @@ loop:
%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
%s0 = shl i64 %indvar, 8
%indvar.i8 = ashr i64 %s0, 8
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%s1 = shl i64 %indvar, 24
%indvar.i24 = ashr i64 %s1, 24
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fmul double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = add i64 %indvar, 1
@@ -100,18 +100,18 @@ loop:
%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
%s0 = shl i64 %indvar, 8
%indvar.i8 = ashr i64 %s0, 8
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%s1 = shl i64 %indvar, 24
%indvar.i24 = ashr i64 %s1, 24
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fmul double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = sub i64 %indvar, 1
@@ -129,17 +129,17 @@ entry:
loop:
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
%indvar.i8 = and i64 %indvar, 255
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%indvar.i24 = and i64 %indvar, 16777215
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fmul double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = add i64 %indvar, 1
@@ -157,17 +157,17 @@ entry:
loop:
%indvar = phi i64 [ %n, %entry ], [ %indvar.next, %loop ]
%indvar.i8 = and i64 %indvar, 255
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%indvar.i24 = and i64 %indvar, 16777215
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fmul double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = sub i64 %indvar, 1
@@ -186,18 +186,18 @@ loop:
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
%s0 = shl i64 %indvar, 8
%indvar.i8 = ashr i64 %s0, 8
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%s1 = shl i64 %indvar, 24
%indvar.i24 = ashr i64 %s1, 24
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fmul double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = add i64 %indvar, 1
@@ -216,18 +216,18 @@ loop:
%indvar = phi i64 [ %n, %entry ], [ %indvar.next, %loop ]
%s0 = shl i64 %indvar, 8
%indvar.i8 = ashr i64 %s0, 8
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%s1 = shl i64 %indvar, 24
%indvar.i24 = ashr i64 %s1, 24
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fmul double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = sub i64 %indvar, 1
@@ -245,17 +245,17 @@ entry:
loop:
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
%indvar.i8 = and i64 %indvar, 255
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%indvar.i24 = and i64 %indvar, 16777215
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fmul double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = sub i64 %indvar, 1
@@ -273,17 +273,17 @@ entry:
loop:
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
%indvar.i8 = and i64 %indvar, 255
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%indvar.i24 = and i64 %indvar, 16777215
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fmul double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = add i64 %indvar, 3
@@ -301,17 +301,17 @@ entry:
loop:
%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
%indvar.i8 = and i64 %indvar, 255
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%indvar.i24 = and i64 %indvar, 16777215
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fmul double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = sub i64 %indvar, 3
@@ -330,18 +330,18 @@ loop:
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
%s0 = shl i64 %indvar, 8
%indvar.i8 = ashr i64 %s0, 8
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%s1 = shl i64 %indvar, 24
%indvar.i24 = ashr i64 %s1, 24
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fmul double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = add i64 %indvar, 3
@@ -360,18 +360,18 @@ loop:
%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
%s0 = shl i64 %indvar, 8
%indvar.i8 = ashr i64 %s0, 8
- %t0 = getelementptr double* %d, i64 %indvar.i8
- %t1 = load double* %t0
+ %t0 = getelementptr double, double* %d, i64 %indvar.i8
+ %t1 = load double, double* %t0
%t2 = fmul double %t1, 0.1
store double %t2, double* %t0
%s1 = shl i64 %indvar, 24
%indvar.i24 = ashr i64 %s1, 24
- %t3 = getelementptr double* %d, i64 %indvar.i24
- %t4 = load double* %t3
+ %t3 = getelementptr double, double* %d, i64 %indvar.i24
+ %t4 = load double, double* %t3
%t5 = fmul double %t4, 2.3
store double %t5, double* %t3
- %t6 = getelementptr double* %d, i64 %indvar
- %t7 = load double* %t6
+ %t6 = getelementptr double, double* %d, i64 %indvar
+ %t7 = load double, double* %t6
%t8 = fmul double %t7, 4.5
store double %t8, double* %t6
%indvar.next = sub i64 %indvar, 3
diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll
new file mode 100644
index 000000000000..de16e5ddc06b
--- /dev/null
+++ b/test/CodeGen/X86/masked_gather_scatter.ll
@@ -0,0 +1,142 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=knl < %s | FileCheck %s -check-prefix=KNL
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; KNL-LABEL: test1
+; KNL: kxnorw %k1, %k1, %k1
+; KNL: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+define <16 x float> @test1(float* %base, <16 x i32> %ind) {
+
+ %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
+ %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
+
+ %sext_ind = sext <16 x i32> %ind to <16 x i64>
+ %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
+
+ %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+ ret <16 x float>%res
+}
+
+declare <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>)
+declare <16 x float> @llvm.masked.gather.v16f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
+declare <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> , i32, <8 x i1> , <8 x i32> )
+
+; KNL-LABEL: test2
+; KNL: kmovw %esi, %k1
+; KNL: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+define <16 x float> @test2(float* %base, <16 x i32> %ind, i16 %mask) {
+
+ %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
+ %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
+
+ %sext_ind = sext <16 x i32> %ind to <16 x i64>
+ %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
+ %imask = bitcast i16 %mask to <16 x i1>
+ %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> %imask, <16 x float>undef)
+ ret <16 x float> %res
+}
+
+; KNL-LABEL: test3
+; KNL: kmovw %esi, %k1
+; KNL: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
+define <16 x i32> @test3(i32* %base, <16 x i32> %ind, i16 %mask) {
+
+ %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
+ %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
+
+ %sext_ind = sext <16 x i32> %ind to <16 x i64>
+ %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i64> %sext_ind
+ %imask = bitcast i16 %mask to <16 x i1>
+ %res = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
+ ret <16 x i32> %res
+}
+
+; KNL-LABEL: test4
+; KNL: kmovw %esi, %k1
+; KNL: kmovw
+; KNL: vpgatherdd
+; KNL: vpgatherdd
+
+define <16 x i32> @test4(i32* %base, <16 x i32> %ind, i16 %mask) {
+
+ %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
+ %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
+
+ %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
+ %imask = bitcast i16 %mask to <16 x i1>
+ %gt1 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
+ %gt2 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
+ %res = add <16 x i32> %gt1, %gt2
+ ret <16 x i32> %res
+}
+
+; KNL-LABEL: test5
+; KNL: kmovw %k1, %k2
+; KNL: vpscatterdd {{.*}}%k2
+; KNL: vpscatterdd {{.*}}%k1
+
+define void @test5(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
+
+ %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
+ %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
+
+ %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
+ %imask = bitcast i16 %mask to <16 x i1>
+ call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+ call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+ ret void
+}
+
+declare void @llvm.masked.scatter.v8i32(<8 x i32> , <8 x i32*> , i32 , <8 x i1> )
+declare void @llvm.masked.scatter.v16i32(<16 x i32> , <16 x i32*> , i32 , <16 x i1> )
+
+; KNL-LABEL: test6
+; KNL: kxnorw %k1, %k1, %k1
+; KNL: kxnorw %k2, %k2, %k2
+; KNL: vpgatherqd (,%zmm{{.*}}), %ymm{{.*}} {%k2}
+; KNL: vpscatterqd %ymm{{.*}}, (,%zmm{{.*}}) {%k1}
+define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) {
+
+ %a = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
+
+ call void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <8 x i32>%a
+}
+
+; In this case the index should be promoted to <8 x i64> for KNL
+; KNL-LABEL: test7
+; KNL: vpmovsxdq %ymm0, %zmm0
+; KNL: kmovw %k1, %k2
+; KNL: vpgatherqd {{.*}} {%k2}
+; KNL: vpgatherqd {{.*}} {%k1}
+define <8 x i32> @test7(i32* %base, <8 x i32> %ind, i8 %mask) {
+
+ %broadcast.splatinsert = insertelement <8 x i32*> undef, i32* %base, i32 0
+ %broadcast.splat = shufflevector <8 x i32*> %broadcast.splatinsert, <8 x i32*> undef, <8 x i32> zeroinitializer
+
+ %gep.random = getelementptr i32, <8 x i32*> %broadcast.splat, <8 x i32> %ind
+ %imask = bitcast i8 %mask to <8 x i1>
+ %gt1 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>undef)
+ %gt2 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>%gt1)
+ %res = add <8 x i32> %gt1, %gt2
+ ret <8 x i32> %res
+}
+
+; No uniform base in this case, index <8 x i64> contains addresses,
+; each gather call will be split into two
+; KNL-LABEL: test8
+; KNL: kshiftrw $8, %k1, %k2
+; KNL: vpgatherqd
+; KNL: vpgatherqd
+; KNL: vinserti64x4
+; KNL: vpgatherqd
+; KNL: vpgatherqd
+; KNL: vinserti64x4
+define <16 x i32> @test8(<16 x i32*> %ptr.random, <16 x i32> %ind, i16 %mask) {
+ %imask = bitcast i16 %mask to <16 x i1>
+ %gt1 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
+ %gt2 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
+ %res = add <16 x i32> %gt1, %gt2
+ ret <16 x i32> %res
+}
diff --git a/test/CodeGen/X86/masked_memop.ll b/test/CodeGen/X86/masked_memop.ll
index b5ff630f497a..6c16e634a59f 100644
--- a/test/CodeGen/X86/masked_memop.ll
+++ b/test/CodeGen/X86/masked_memop.ll
@@ -1,13 +1,14 @@
; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=knl < %s | FileCheck %s -check-prefix=AVX512
; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
; RUN: opt -mtriple=x86_64-apple-darwin -codegenprepare -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=AVX_SCALAR
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=skx < %s | FileCheck %s -check-prefix=SKX
; AVX512-LABEL: test1
; AVX512: vmovdqu32 (%rdi), %zmm0 {%k1} {z}
; AVX2-LABEL: test1
-; AVX2: vpmaskmovd 32(%rdi)
-; AVX2: vpmaskmovd (%rdi)
+; AVX2: vpmaskmovd {{.*}}(%rdi)
+; AVX2: vpmaskmovd {{.*}}(%rdi)
; AVX2-NOT: blend
; AVX_SCALAR-LABEL: test1
@@ -82,6 +83,9 @@ define <8 x double> @test5(<8 x i32> %trigger, <8 x double>* %addr, <8 x double>
; AVX2-LABEL: test6
; AVX2: vmaskmovpd
; AVX2: vblendvpd
+
+; SKX-LABEL: test6
+; SKX: vmovupd {{.*}}{%k1}
define <2 x double> @test6(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) {
%mask = icmp eq <2 x i64> %trigger, zeroinitializer
%res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
@@ -91,6 +95,9 @@ define <2 x double> @test6(<2 x i64> %trigger, <2 x double>* %addr, <2 x double>
; AVX2-LABEL: test7
; AVX2: vmaskmovps {{.*}}(%rdi)
; AVX2: blend
+
+; SKX-LABEL: test7
+; SKX: vmovups (%rdi){{.*}}{%k1}
define <4 x float> @test7(<4 x i32> %trigger, <4 x float>* %addr, <4 x float> %dst) {
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
%res = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %addr, i32 4, <4 x i1>%mask, <4 x float>%dst)
@@ -100,6 +107,9 @@ define <4 x float> @test7(<4 x i32> %trigger, <4 x float>* %addr, <4 x float> %d
; AVX2-LABEL: test8
; AVX2: vpmaskmovd {{.*}}(%rdi)
; AVX2: blend
+
+; SKX-LABEL: test8
+; SKX: vmovdqu32 (%rdi){{.*}}{%k1}
define <4 x i32> @test8(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
%res = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
@@ -108,6 +118,9 @@ define <4 x i32> @test8(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
; AVX2-LABEL: test9
; AVX2: vpmaskmovd %xmm
+
+; SKX-LABEL: test9
+; SKX: vmovdqu32 %xmm{{.*}}{%k1}
define void @test9(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
call void @llvm.masked.store.v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask)
@@ -117,23 +130,32 @@ define void @test9(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
; AVX2-LABEL: test10
; AVX2: vmaskmovpd (%rdi), %ymm
; AVX2: blend
+
+; SKX-LABEL: test10
+; SKX: vmovapd {{.*}}{%k1}
define <4 x double> @test10(<4 x i32> %trigger, <4 x double>* %addr, <4 x double> %dst) {
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
- %res = call <4 x double> @llvm.masked.load.v4f64(<4 x double>* %addr, i32 4, <4 x i1>%mask, <4 x double>%dst)
+ %res = call <4 x double> @llvm.masked.load.v4f64(<4 x double>* %addr, i32 32, <4 x i1>%mask, <4 x double>%dst)
ret <4 x double> %res
}
; AVX2-LABEL: test11
; AVX2: vmaskmovps
; AVX2: vblendvps
+
+; SKX-LABEL: test11
+; SKX: vmovaps {{.*}}{%k1}
define <8 x float> @test11(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {
%mask = icmp eq <8 x i32> %trigger, zeroinitializer
- %res = call <8 x float> @llvm.masked.load.v8f32(<8 x float>* %addr, i32 4, <8 x i1>%mask, <8 x float>%dst)
+ %res = call <8 x float> @llvm.masked.load.v8f32(<8 x float>* %addr, i32 32, <8 x i1>%mask, <8 x float>%dst)
ret <8 x float> %res
}
; AVX2-LABEL: test12
; AVX2: vpmaskmovd %ymm
+
+; SKX-LABEL: test12
+; SKX: vmovdqu32 {{.*}}{%k1}
define void @test12(<8 x i32> %trigger, <8 x i32>* %addr, <8 x i32> %val) {
%mask = icmp eq <8 x i32> %trigger, zeroinitializer
call void @llvm.masked.store.v8i32(<8 x i32>%val, <8 x i32>* %addr, i32 4, <8 x i1>%mask)
@@ -150,8 +172,15 @@ define void @test13(<16 x i32> %trigger, <16 x float>* %addr, <16 x float> %val)
}
; AVX2-LABEL: test14
-; AVX2: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; AVX2: vpshufd
+; AVX2: vmovq
; AVX2: vmaskmovps
+
+; SKX-LABEL: test14
+; SKX: kshiftl
+; SKX: kshiftr
+; SKX: vmovups {{.*}}{%k1}
+
define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
call void @llvm.masked.store.v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)
@@ -160,6 +189,11 @@ define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
; AVX2-LABEL: test15
; AVX2: vpmaskmovd
+
+; SKX-LABEL: test15
+; SKX: kshiftl
+; SKX: kshiftr
+; SKX: vmovdqu32 {{.*}}{%k1}
define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
@@ -169,6 +203,11 @@ define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
; AVX2-LABEL: test16
; AVX2: vmaskmovps
; AVX2: vblendvps
+
+; SKX-LABEL: test16
+; SKX: kshiftl
+; SKX: kshiftr
+; SKX: vmovups {{.*}}{%k1}
define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) {
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
%res = call <2 x float> @llvm.masked.load.v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
@@ -179,6 +218,11 @@ define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %
; AVX2: vpmaskmovd
; AVX2: vblendvps
; AVX2: vpmovsxdq
+
+; SKX-LABEL: test17
+; SKX: kshiftl
+; SKX: kshiftr
+; SKX: vmovdqu32 {{.*}}{%k1}
define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
%res = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
diff --git a/test/CodeGen/X86/mcinst-lowering.ll b/test/CodeGen/X86/mcinst-lowering.ll
index a82cfc431ba4..51b2895f1c78 100644
--- a/test/CodeGen/X86/mcinst-lowering.ll
+++ b/test/CodeGen/X86/mcinst-lowering.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-apple-darwin10.0.0"
define i32 @f0(i32* nocapture %x) nounwind readonly ssp {
entry:
- %tmp1 = load i32* %x ; <i32> [#uses=2]
+ %tmp1 = load i32, i32* %x ; <i32> [#uses=2]
%tobool = icmp eq i32 %tmp1, 0 ; <i1> [#uses=1]
br i1 %tobool, label %if.end, label %return
diff --git a/test/CodeGen/X86/mem-intrin-base-reg.ll b/test/CodeGen/X86/mem-intrin-base-reg.ll
index 9a6de3dd1d92..9bace29e185f 100644
--- a/test/CodeGen/X86/mem-intrin-base-reg.ll
+++ b/test/CodeGen/X86/mem-intrin-base-reg.ll
@@ -24,9 +24,9 @@ no_vectors:
ret i32 0
spill_vectors:
- %vp1 = getelementptr <4 x i32>* %vp0, i32 1
- %v0 = load <4 x i32>* %vp0
- %v1 = load <4 x i32>* %vp1
+ %vp1 = getelementptr <4 x i32>, <4 x i32>* %vp0, i32 1
+ %v0 = load <4 x i32>, <4 x i32>* %vp0
+ %v1 = load <4 x i32>, <4 x i32>* %vp1
%vicmp = icmp slt <4 x i32> %v0, %v1
%icmp = extractelement <4 x i1> %vicmp, i32 0
call void @escape_vla_and_icmp(i8* null, i1 zeroext %icmp)
@@ -49,9 +49,9 @@ no_vectors:
ret i32 0
spill_vectors:
- %vp1 = getelementptr <4 x i32>* %vp0, i32 1
- %v0 = load <4 x i32>* %vp0
- %v1 = load <4 x i32>* %vp1
+ %vp1 = getelementptr <4 x i32>, <4 x i32>* %vp0, i32 1
+ %v0 = load <4 x i32>, <4 x i32>* %vp0
+ %v1 = load <4 x i32>, <4 x i32>* %vp1
%vicmp = icmp slt <4 x i32> %v0, %v1
%icmp = extractelement <4 x i1> %vicmp, i32 0
%vla = alloca i8, i32 %n
@@ -77,9 +77,9 @@ no_vectors:
ret i32 0
spill_vectors:
- %vp1 = getelementptr <4 x i32>* %vp0, i32 1
- %v0 = load <4 x i32>* %vp0
- %v1 = load <4 x i32>* %vp1
+ %vp1 = getelementptr <4 x i32>, <4 x i32>* %vp0, i32 1
+ %v0 = load <4 x i32>, <4 x i32>* %vp0
+ %v1 = load <4 x i32>, <4 x i32>* %vp1
%vicmp = icmp slt <4 x i32> %v0, %v1
%icmp = extractelement <4 x i1> %vicmp, i32 0
%vla = alloca i8, i32 %n
diff --git a/test/CodeGen/X86/mem-promote-integers.ll b/test/CodeGen/X86/mem-promote-integers.ll
index ea38b95a864e..3023cf2e900e 100644
--- a/test/CodeGen/X86/mem-promote-integers.ll
+++ b/test/CodeGen/X86/mem-promote-integers.ll
@@ -5,7 +5,7 @@
; RUN: llc -march=x86-64 < %s > /dev/null
define <1 x i8> @test_1xi8(<1 x i8> %x, <1 x i8>* %b) {
- %bb = load <1 x i8>* %b
+ %bb = load <1 x i8>, <1 x i8>* %b
%tt = xor <1 x i8> %x, %bb
store <1 x i8> %tt, <1 x i8>* %b
br label %next
@@ -16,7 +16,7 @@ next:
define <1 x i16> @test_1xi16(<1 x i16> %x, <1 x i16>* %b) {
- %bb = load <1 x i16>* %b
+ %bb = load <1 x i16>, <1 x i16>* %b
%tt = xor <1 x i16> %x, %bb
store <1 x i16> %tt, <1 x i16>* %b
br label %next
@@ -27,7 +27,7 @@ next:
define <1 x i32> @test_1xi32(<1 x i32> %x, <1 x i32>* %b) {
- %bb = load <1 x i32>* %b
+ %bb = load <1 x i32>, <1 x i32>* %b
%tt = xor <1 x i32> %x, %bb
store <1 x i32> %tt, <1 x i32>* %b
br label %next
@@ -38,7 +38,7 @@ next:
define <1 x i64> @test_1xi64(<1 x i64> %x, <1 x i64>* %b) {
- %bb = load <1 x i64>* %b
+ %bb = load <1 x i64>, <1 x i64>* %b
%tt = xor <1 x i64> %x, %bb
store <1 x i64> %tt, <1 x i64>* %b
br label %next
@@ -49,7 +49,7 @@ next:
define <1 x i128> @test_1xi128(<1 x i128> %x, <1 x i128>* %b) {
- %bb = load <1 x i128>* %b
+ %bb = load <1 x i128>, <1 x i128>* %b
%tt = xor <1 x i128> %x, %bb
store <1 x i128> %tt, <1 x i128>* %b
br label %next
@@ -60,7 +60,7 @@ next:
define <1 x i256> @test_1xi256(<1 x i256> %x, <1 x i256>* %b) {
- %bb = load <1 x i256>* %b
+ %bb = load <1 x i256>, <1 x i256>* %b
%tt = xor <1 x i256> %x, %bb
store <1 x i256> %tt, <1 x i256>* %b
br label %next
@@ -71,7 +71,7 @@ next:
define <1 x i512> @test_1xi512(<1 x i512> %x, <1 x i512>* %b) {
- %bb = load <1 x i512>* %b
+ %bb = load <1 x i512>, <1 x i512>* %b
%tt = xor <1 x i512> %x, %bb
store <1 x i512> %tt, <1 x i512>* %b
br label %next
@@ -82,7 +82,7 @@ next:
define <2 x i8> @test_2xi8(<2 x i8> %x, <2 x i8>* %b) {
- %bb = load <2 x i8>* %b
+ %bb = load <2 x i8>, <2 x i8>* %b
%tt = xor <2 x i8> %x, %bb
store <2 x i8> %tt, <2 x i8>* %b
br label %next
@@ -93,7 +93,7 @@ next:
define <2 x i16> @test_2xi16(<2 x i16> %x, <2 x i16>* %b) {
- %bb = load <2 x i16>* %b
+ %bb = load <2 x i16>, <2 x i16>* %b
%tt = xor <2 x i16> %x, %bb
store <2 x i16> %tt, <2 x i16>* %b
br label %next
@@ -104,7 +104,7 @@ next:
define <2 x i32> @test_2xi32(<2 x i32> %x, <2 x i32>* %b) {
- %bb = load <2 x i32>* %b
+ %bb = load <2 x i32>, <2 x i32>* %b
%tt = xor <2 x i32> %x, %bb
store <2 x i32> %tt, <2 x i32>* %b
br label %next
@@ -115,7 +115,7 @@ next:
define <2 x i64> @test_2xi64(<2 x i64> %x, <2 x i64>* %b) {
- %bb = load <2 x i64>* %b
+ %bb = load <2 x i64>, <2 x i64>* %b
%tt = xor <2 x i64> %x, %bb
store <2 x i64> %tt, <2 x i64>* %b
br label %next
@@ -126,7 +126,7 @@ next:
define <2 x i128> @test_2xi128(<2 x i128> %x, <2 x i128>* %b) {
- %bb = load <2 x i128>* %b
+ %bb = load <2 x i128>, <2 x i128>* %b
%tt = xor <2 x i128> %x, %bb
store <2 x i128> %tt, <2 x i128>* %b
br label %next
@@ -137,7 +137,7 @@ next:
define <2 x i256> @test_2xi256(<2 x i256> %x, <2 x i256>* %b) {
- %bb = load <2 x i256>* %b
+ %bb = load <2 x i256>, <2 x i256>* %b
%tt = xor <2 x i256> %x, %bb
store <2 x i256> %tt, <2 x i256>* %b
br label %next
@@ -148,7 +148,7 @@ next:
define <2 x i512> @test_2xi512(<2 x i512> %x, <2 x i512>* %b) {
- %bb = load <2 x i512>* %b
+ %bb = load <2 x i512>, <2 x i512>* %b
%tt = xor <2 x i512> %x, %bb
store <2 x i512> %tt, <2 x i512>* %b
br label %next
@@ -159,7 +159,7 @@ next:
define <3 x i8> @test_3xi8(<3 x i8> %x, <3 x i8>* %b) {
- %bb = load <3 x i8>* %b
+ %bb = load <3 x i8>, <3 x i8>* %b
%tt = xor <3 x i8> %x, %bb
store <3 x i8> %tt, <3 x i8>* %b
br label %next
@@ -170,7 +170,7 @@ next:
define <3 x i16> @test_3xi16(<3 x i16> %x, <3 x i16>* %b) {
- %bb = load <3 x i16>* %b
+ %bb = load <3 x i16>, <3 x i16>* %b
%tt = xor <3 x i16> %x, %bb
store <3 x i16> %tt, <3 x i16>* %b
br label %next
@@ -181,7 +181,7 @@ next:
define <3 x i32> @test_3xi32(<3 x i32> %x, <3 x i32>* %b) {
- %bb = load <3 x i32>* %b
+ %bb = load <3 x i32>, <3 x i32>* %b
%tt = xor <3 x i32> %x, %bb
store <3 x i32> %tt, <3 x i32>* %b
br label %next
@@ -192,7 +192,7 @@ next:
define <3 x i64> @test_3xi64(<3 x i64> %x, <3 x i64>* %b) {
- %bb = load <3 x i64>* %b
+ %bb = load <3 x i64>, <3 x i64>* %b
%tt = xor <3 x i64> %x, %bb
store <3 x i64> %tt, <3 x i64>* %b
br label %next
@@ -203,7 +203,7 @@ next:
define <3 x i128> @test_3xi128(<3 x i128> %x, <3 x i128>* %b) {
- %bb = load <3 x i128>* %b
+ %bb = load <3 x i128>, <3 x i128>* %b
%tt = xor <3 x i128> %x, %bb
store <3 x i128> %tt, <3 x i128>* %b
br label %next
@@ -214,7 +214,7 @@ next:
define <3 x i256> @test_3xi256(<3 x i256> %x, <3 x i256>* %b) {
- %bb = load <3 x i256>* %b
+ %bb = load <3 x i256>, <3 x i256>* %b
%tt = xor <3 x i256> %x, %bb
store <3 x i256> %tt, <3 x i256>* %b
br label %next
@@ -225,7 +225,7 @@ next:
define <3 x i512> @test_3xi512(<3 x i512> %x, <3 x i512>* %b) {
- %bb = load <3 x i512>* %b
+ %bb = load <3 x i512>, <3 x i512>* %b
%tt = xor <3 x i512> %x, %bb
store <3 x i512> %tt, <3 x i512>* %b
br label %next
@@ -236,7 +236,7 @@ next:
define <4 x i8> @test_4xi8(<4 x i8> %x, <4 x i8>* %b) {
- %bb = load <4 x i8>* %b
+ %bb = load <4 x i8>, <4 x i8>* %b
%tt = xor <4 x i8> %x, %bb
store <4 x i8> %tt, <4 x i8>* %b
br label %next
@@ -247,7 +247,7 @@ next:
define <4 x i16> @test_4xi16(<4 x i16> %x, <4 x i16>* %b) {
- %bb = load <4 x i16>* %b
+ %bb = load <4 x i16>, <4 x i16>* %b
%tt = xor <4 x i16> %x, %bb
store <4 x i16> %tt, <4 x i16>* %b
br label %next
@@ -258,7 +258,7 @@ next:
define <4 x i32> @test_4xi32(<4 x i32> %x, <4 x i32>* %b) {
- %bb = load <4 x i32>* %b
+ %bb = load <4 x i32>, <4 x i32>* %b
%tt = xor <4 x i32> %x, %bb
store <4 x i32> %tt, <4 x i32>* %b
br label %next
@@ -269,7 +269,7 @@ next:
define <4 x i64> @test_4xi64(<4 x i64> %x, <4 x i64>* %b) {
- %bb = load <4 x i64>* %b
+ %bb = load <4 x i64>, <4 x i64>* %b
%tt = xor <4 x i64> %x, %bb
store <4 x i64> %tt, <4 x i64>* %b
br label %next
@@ -280,7 +280,7 @@ next:
define <4 x i128> @test_4xi128(<4 x i128> %x, <4 x i128>* %b) {
- %bb = load <4 x i128>* %b
+ %bb = load <4 x i128>, <4 x i128>* %b
%tt = xor <4 x i128> %x, %bb
store <4 x i128> %tt, <4 x i128>* %b
br label %next
@@ -291,7 +291,7 @@ next:
define <4 x i256> @test_4xi256(<4 x i256> %x, <4 x i256>* %b) {
- %bb = load <4 x i256>* %b
+ %bb = load <4 x i256>, <4 x i256>* %b
%tt = xor <4 x i256> %x, %bb
store <4 x i256> %tt, <4 x i256>* %b
br label %next
@@ -302,7 +302,7 @@ next:
define <4 x i512> @test_4xi512(<4 x i512> %x, <4 x i512>* %b) {
- %bb = load <4 x i512>* %b
+ %bb = load <4 x i512>, <4 x i512>* %b
%tt = xor <4 x i512> %x, %bb
store <4 x i512> %tt, <4 x i512>* %b
br label %next
@@ -313,7 +313,7 @@ next:
define <5 x i8> @test_5xi8(<5 x i8> %x, <5 x i8>* %b) {
- %bb = load <5 x i8>* %b
+ %bb = load <5 x i8>, <5 x i8>* %b
%tt = xor <5 x i8> %x, %bb
store <5 x i8> %tt, <5 x i8>* %b
br label %next
@@ -324,7 +324,7 @@ next:
define <5 x i16> @test_5xi16(<5 x i16> %x, <5 x i16>* %b) {
- %bb = load <5 x i16>* %b
+ %bb = load <5 x i16>, <5 x i16>* %b
%tt = xor <5 x i16> %x, %bb
store <5 x i16> %tt, <5 x i16>* %b
br label %next
@@ -335,7 +335,7 @@ next:
define <5 x i32> @test_5xi32(<5 x i32> %x, <5 x i32>* %b) {
- %bb = load <5 x i32>* %b
+ %bb = load <5 x i32>, <5 x i32>* %b
%tt = xor <5 x i32> %x, %bb
store <5 x i32> %tt, <5 x i32>* %b
br label %next
@@ -346,7 +346,7 @@ next:
define <5 x i64> @test_5xi64(<5 x i64> %x, <5 x i64>* %b) {
- %bb = load <5 x i64>* %b
+ %bb = load <5 x i64>, <5 x i64>* %b
%tt = xor <5 x i64> %x, %bb
store <5 x i64> %tt, <5 x i64>* %b
br label %next
@@ -357,7 +357,7 @@ next:
define <5 x i128> @test_5xi128(<5 x i128> %x, <5 x i128>* %b) {
- %bb = load <5 x i128>* %b
+ %bb = load <5 x i128>, <5 x i128>* %b
%tt = xor <5 x i128> %x, %bb
store <5 x i128> %tt, <5 x i128>* %b
br label %next
@@ -368,7 +368,7 @@ next:
define <5 x i256> @test_5xi256(<5 x i256> %x, <5 x i256>* %b) {
- %bb = load <5 x i256>* %b
+ %bb = load <5 x i256>, <5 x i256>* %b
%tt = xor <5 x i256> %x, %bb
store <5 x i256> %tt, <5 x i256>* %b
br label %next
@@ -379,7 +379,7 @@ next:
define <5 x i512> @test_5xi512(<5 x i512> %x, <5 x i512>* %b) {
- %bb = load <5 x i512>* %b
+ %bb = load <5 x i512>, <5 x i512>* %b
%tt = xor <5 x i512> %x, %bb
store <5 x i512> %tt, <5 x i512>* %b
br label %next
diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll
index 0a534926c6cd..e5f1f526b467 100644
--- a/test/CodeGen/X86/memcmp.ll
+++ b/test/CodeGen/X86/memcmp.ll
@@ -11,7 +11,7 @@ declare i32 @memcmp(...)
define void @memcmp2(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
entry:
- %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 2) nounwind ; <i32> [#uses=1]
+ %0 = tail call i32 (...) @memcmp(i8* %X, i8* %Y, i32 2) nounwind ; <i32> [#uses=1]
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
br i1 %1, label %return, label %bb
@@ -31,7 +31,7 @@ return: ; preds = %entry
define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
entry:
- %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1]
+ %0 = tail call i32 (...) @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1]
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
br i1 %1, label %return, label %bb
@@ -49,7 +49,7 @@ return: ; preds = %entry
define void @memcmp4(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
entry:
- %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 4) nounwind ; <i32> [#uses=1]
+ %0 = tail call i32 (...) @memcmp(i8* %X, i8* %Y, i32 4) nounwind ; <i32> [#uses=1]
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
br i1 %1, label %return, label %bb
@@ -66,7 +66,7 @@ return: ; preds = %entry
define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind {
entry:
- %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 1), i32 4) nounwind ; <i32> [#uses=1]
+ %0 = tail call i32 (...) @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 1), i32 4) nounwind ; <i32> [#uses=1]
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
br i1 %1, label %return, label %bb
@@ -82,7 +82,7 @@ return: ; preds = %entry
define void @memcmp8(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
entry:
- %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 8) nounwind ; <i32> [#uses=1]
+ %0 = tail call i32 (...) @memcmp(i8* %X, i8* %Y, i32 8) nounwind ; <i32> [#uses=1]
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
br i1 %1, label %return, label %bb
@@ -99,7 +99,7 @@ return: ; preds = %entry
define void @memcmp8a(i8* %X, i32* nocapture %P) nounwind {
entry:
- %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 0), i32 8) nounwind ; <i32> [#uses=1]
+ %0 = tail call i32 (...) @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0), i32 8) nounwind ; <i32> [#uses=1]
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
br i1 %1, label %return, label %bb
diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll
index 6ae7807810e9..0111c0d433f1 100644
--- a/test/CodeGen/X86/memcpy-2.ll
+++ b/test/CodeGen/X86/memcpy-2.ll
@@ -46,7 +46,7 @@ entry:
; X86-64: movq $0
%tmp1 = alloca [25 x i8]
%tmp2 = bitcast [25 x i8]* %tmp1 to i8*
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* getelementptr inbounds ([25 x i8]* @.str, i32 0, i32 0), i32 25, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str, i32 0, i32 0), i32 25, i32 1, i1 false)
unreachable
}
@@ -188,7 +188,7 @@ entry:
; X86-64: movl $2021161080
%tmp1 = alloca [30 x i8]
%tmp2 = bitcast [30 x i8]* %tmp1 to i8*
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* getelementptr inbounds ([30 x i8]* @.str2, i32 0, i32 0), i32 30, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* getelementptr inbounds ([30 x i8], [30 x i8]* @.str2, i32 0, i32 0), i32 30, i32 1, i1 false)
unreachable
}
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index 88b6cfd2295f..f582571252b5 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -84,7 +84,7 @@ entry:
define void @test5(i8* nocapture %C) nounwind uwtable ssp {
entry:
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([30 x i8]* @.str, i64 0, i64 0), i64 16, i32 1, i1 false)
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([30 x i8], [30 x i8]* @.str, i64 0, i64 0), i64 16, i32 1, i1 false)
ret void
; DARWIN-LABEL: test5:
@@ -101,7 +101,7 @@ entry:
; DARWIN: test6
; DARWIN: movw $0, 8
; DARWIN: movq $120, 0
- tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* getelementptr inbounds ([2 x i8]* @.str2, i64 0, i64 0), i64 10, i32 1, i1 false)
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str2, i64 0, i64 0), i64 10, i32 1, i1 false)
ret void
}
diff --git a/test/CodeGen/X86/memset-3.ll b/test/CodeGen/X86/memset-3.ll
index 29febfab29af..455e6756013f 100644
--- a/test/CodeGen/X86/memset-3.ll
+++ b/test/CodeGen/X86/memset-3.ll
@@ -4,7 +4,7 @@
define void @t() nounwind ssp {
entry:
%buf = alloca [512 x i8], align 1
- %ptr = getelementptr inbounds [512 x i8]* %buf, i32 0, i32 0
+ %ptr = getelementptr inbounds [512 x i8], [512 x i8]* %buf, i32 0, i32 0
call void @llvm.memset.p0i8.i32(i8* %ptr, i8 undef, i32 512, i32 1, i1 false)
unreachable
}
diff --git a/test/CodeGen/X86/memset.ll b/test/CodeGen/X86/memset.ll
index 0d479f0abe7b..96a22e885675 100644
--- a/test/CodeGen/X86/memset.ll
+++ b/test/CodeGen/X86/memset.ll
@@ -7,7 +7,7 @@
define void @t() nounwind {
entry:
%up_mvd = alloca [8 x %struct.x] ; <[8 x %struct.x]*> [#uses=2]
- %up_mvd116 = getelementptr [8 x %struct.x]* %up_mvd, i32 0, i32 0 ; <%struct.x*> [#uses=1]
+ %up_mvd116 = getelementptr [8 x %struct.x], [8 x %struct.x]* %up_mvd, i32 0, i32 0 ; <%struct.x*> [#uses=1]
%tmp110117 = bitcast [8 x %struct.x]* %up_mvd to i8* ; <i8*> [#uses=1]
call void @llvm.memset.p0i8.i64(i8* %tmp110117, i8 0, i64 32, i32 8, i1 false)
diff --git a/test/CodeGen/X86/merge-consecutive-stores-i1.ll b/test/CodeGen/X86/merge-consecutive-stores-i1.ll
new file mode 100644
index 000000000000..a7f5c2142271
--- /dev/null
+++ b/test/CodeGen/X86/merge-consecutive-stores-i1.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=x86-64 < %s
+
+; Ensure that MergeConsecutiveStores doesn't crash when dealing with
+; i1 operands.
+
+%struct.X = type { i1, i1 }
+
+@b = common global %struct.X zeroinitializer, align 4
+
+define void @foo() {
+entry:
+ store i1 0, i1* getelementptr inbounds (%struct.X, %struct.X* @b, i64 0, i32 0), align 4
+ store i1 0, i1* getelementptr inbounds (%struct.X, %struct.X* @b, i64 0, i32 1), align 1
+ ret void
+}
diff --git a/test/CodeGen/X86/merge_store.ll b/test/CodeGen/X86/merge_store.ll
index f98963d8e90e..2701f369bcde 100644
--- a/test/CodeGen/X86/merge_store.ll
+++ b/test/CodeGen/X86/merge_store.ll
@@ -10,16 +10,16 @@ entry:
for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
store i32 1, i32* %arrayidx, align 4
%0 = or i64 %indvars.iv, 1
- %arrayidx2 = getelementptr inbounds i32* %a, i64 %0
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %0
store i32 1, i32* %arrayidx2, align 4
%1 = or i64 %indvars.iv, 2
- %arrayidx5 = getelementptr inbounds i32* %a, i64 %1
+ %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %1
store i32 1, i32* %arrayidx5, align 4
%2 = or i64 %indvars.iv, 3
- %arrayidx8 = getelementptr inbounds i32* %a, i64 %2
+ %arrayidx8 = getelementptr inbounds i32, i32* %a, i64 %2
store i32 1, i32* %arrayidx8, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
%3 = trunc i64 %indvars.iv.next to i32
diff --git a/test/CodeGen/X86/mingw-alloca.ll b/test/CodeGen/X86/mingw-alloca.ll
index 72b69400ffa1..cbad4fbfa2ea 100644
--- a/test/CodeGen/X86/mingw-alloca.ll
+++ b/test/CodeGen/X86/mingw-alloca.ll
@@ -29,7 +29,7 @@ entry:
; ELF: calll _alloca
; ELF: movl 8028(%esp), %eax
%A2 = alloca [2000 x i32], align 16 ; <[2000 x i32]*> [#uses=1]
- %A2.sub = getelementptr [2000 x i32]* %A2, i32 0, i32 0 ; <i32*> [#uses=1]
+ %A2.sub = getelementptr [2000 x i32], [2000 x i32]* %A2, i32 0, i32 0 ; <i32*> [#uses=1]
call void @bar2( i32* %A2.sub, i32 %N )
ret void
}
diff --git a/test/CodeGen/X86/misaligned-memset.ll b/test/CodeGen/X86/misaligned-memset.ll
index 21f8bf2bf29e..ef8e0e81ad77 100644
--- a/test/CodeGen/X86/misaligned-memset.ll
+++ b/test/CodeGen/X86/misaligned-memset.ll
@@ -7,8 +7,8 @@ define i32 @main() nounwind ssp {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
- call void @llvm.memset.p0i8.i64(i8* bitcast (i64* getelementptr inbounds ([3 x i64]* @a, i32 0, i64 1) to i8*), i8 0, i64 16, i32 1, i1 false)
- %0 = load i32* %retval
+ call void @llvm.memset.p0i8.i64(i8* bitcast (i64* getelementptr inbounds ([3 x i64], [3 x i64]* @a, i32 0, i64 1) to i8*), i8 0, i64 16, i32 1, i1 false)
+ %0 = load i32, i32* %retval
ret i32 %0
}
diff --git a/test/CodeGen/X86/misched-aa-colored.ll b/test/CodeGen/X86/misched-aa-colored.ll
index 52a5e5d25d11..ef7b98ac9c69 100644
--- a/test/CodeGen/X86/misched-aa-colored.ll
+++ b/test/CodeGen/X86/misched-aa-colored.ll
@@ -155,13 +155,13 @@ entry:
%ref.tmp.i = alloca %"struct.std::pair.112.119.719.1079.2039.2159.2399.4199", align 8
%Op.i = alloca %"class.llvm::SDValue.3.603.963.1923.2043.2283.4083", align 8
%0 = bitcast %"struct.std::pair.112.119.719.1079.2039.2159.2399.4199"* %ref.tmp.i to i8*
- %retval.sroa.0.0.idx.i36 = getelementptr inbounds %"struct.std::pair.112.119.719.1079.2039.2159.2399.4199"* %ref.tmp.i, i64 0, i32 1, i32 0, i32 0
- %retval.sroa.0.0.copyload.i37 = load i32* %retval.sroa.0.0.idx.i36, align 8
+ %retval.sroa.0.0.idx.i36 = getelementptr inbounds %"struct.std::pair.112.119.719.1079.2039.2159.2399.4199", %"struct.std::pair.112.119.719.1079.2039.2159.2399.4199"* %ref.tmp.i, i64 0, i32 1, i32 0, i32 0
+ %retval.sroa.0.0.copyload.i37 = load i32, i32* %retval.sroa.0.0.idx.i36, align 8
call void @llvm.lifetime.end(i64 24, i8* %0) #1
- %agg.tmp8.sroa.2.0.copyload = load i32* undef, align 8
+ %agg.tmp8.sroa.2.0.copyload = load i32, i32* undef, align 8
%1 = bitcast %"class.llvm::SDValue.3.603.963.1923.2043.2283.4083"* %Op.i to i8*
call void @llvm.lifetime.start(i64 16, i8* %1) #1
- %2 = getelementptr %"class.llvm::SDValue.3.603.963.1923.2043.2283.4083"* %Op.i, i64 0, i32 1
+ %2 = getelementptr %"class.llvm::SDValue.3.603.963.1923.2043.2283.4083", %"class.llvm::SDValue.3.603.963.1923.2043.2283.4083"* %Op.i, i64 0, i32 1
store i32 %agg.tmp8.sroa.2.0.copyload, i32* %2, align 8
; CHECK: movl (%rax), %eax
diff --git a/test/CodeGen/X86/misched-aa-mmos.ll b/test/CodeGen/X86/misched-aa-mmos.ll
index 343e26f54725..c457a5eb4130 100644
--- a/test/CodeGen/X86/misched-aa-mmos.ll
+++ b/test/CodeGen/X86/misched-aa-mmos.ll
@@ -18,13 +18,13 @@ entry:
br i1 undef, label %if.else56, label %cond.end.i
cond.end.i:
- %significand.i18.i = getelementptr inbounds %c1* %temp_rhs, i64 0, i32 1
- %exponent.i = getelementptr inbounds %c1* %temp_rhs, i64 0, i32 2
- %0 = load i16* %exponent.i, align 8
+ %significand.i18.i = getelementptr inbounds %c1, %c1* %temp_rhs, i64 0, i32 1
+ %exponent.i = getelementptr inbounds %c1, %c1* %temp_rhs, i64 0, i32 2
+ %0 = load i16, i16* %exponent.i, align 8
%sub.i = add i16 %0, -1
store i16 %sub.i, i16* %exponent.i, align 8
%parts.i.i = bitcast %u1* %significand.i18.i to i64**
- %1 = load i64** %parts.i.i, align 8
+ %1 = load i64*, i64** %parts.i.i, align 8
%call5.i = call zeroext i1 @bar(i64* %1, i32 undef) #1
unreachable
diff --git a/test/CodeGen/X86/misched-balance.ll b/test/CodeGen/X86/misched-balance.ll
index 1900802ac9b3..ca3b57992a2e 100644
--- a/test/CodeGen/X86/misched-balance.ll
+++ b/test/CodeGen/X86/misched-balance.ll
@@ -48,65 +48,65 @@ entry:
; CHECK-LABEL: %end
for.body:
%indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ]
- %tmp57 = load i32* %tmp56, align 4
- %arrayidx12.us.i61 = getelementptr inbounds i32* %pre, i64 %indvars.iv42.i
- %tmp58 = load i32* %arrayidx12.us.i61, align 4
+ %tmp57 = load i32, i32* %tmp56, align 4
+ %arrayidx12.us.i61 = getelementptr inbounds i32, i32* %pre, i64 %indvars.iv42.i
+ %tmp58 = load i32, i32* %arrayidx12.us.i61, align 4
%mul.us.i = mul nsw i32 %tmp58, %tmp57
- %arrayidx8.us.i.1 = getelementptr inbounds i32* %tmp56, i64 1
- %tmp59 = load i32* %arrayidx8.us.i.1, align 4
- %arrayidx12.us.i61.1 = getelementptr inbounds i32* %pre94, i64 %indvars.iv42.i
- %tmp60 = load i32* %arrayidx12.us.i61.1, align 4
+ %arrayidx8.us.i.1 = getelementptr inbounds i32, i32* %tmp56, i64 1
+ %tmp59 = load i32, i32* %arrayidx8.us.i.1, align 4
+ %arrayidx12.us.i61.1 = getelementptr inbounds i32, i32* %pre94, i64 %indvars.iv42.i
+ %tmp60 = load i32, i32* %arrayidx12.us.i61.1, align 4
%mul.us.i.1 = mul nsw i32 %tmp60, %tmp59
%add.us.i.1 = add nsw i32 %mul.us.i.1, %mul.us.i
- %arrayidx8.us.i.2 = getelementptr inbounds i32* %tmp56, i64 2
- %tmp61 = load i32* %arrayidx8.us.i.2, align 4
- %arrayidx12.us.i61.2 = getelementptr inbounds i32* %pre95, i64 %indvars.iv42.i
- %tmp62 = load i32* %arrayidx12.us.i61.2, align 4
+ %arrayidx8.us.i.2 = getelementptr inbounds i32, i32* %tmp56, i64 2
+ %tmp61 = load i32, i32* %arrayidx8.us.i.2, align 4
+ %arrayidx12.us.i61.2 = getelementptr inbounds i32, i32* %pre95, i64 %indvars.iv42.i
+ %tmp62 = load i32, i32* %arrayidx12.us.i61.2, align 4
%mul.us.i.2 = mul nsw i32 %tmp62, %tmp61
%add.us.i.2 = add nsw i32 %mul.us.i.2, %add.us.i.1
- %arrayidx8.us.i.3 = getelementptr inbounds i32* %tmp56, i64 3
- %tmp63 = load i32* %arrayidx8.us.i.3, align 4
- %arrayidx12.us.i61.3 = getelementptr inbounds i32* %pre96, i64 %indvars.iv42.i
- %tmp64 = load i32* %arrayidx12.us.i61.3, align 4
+ %arrayidx8.us.i.3 = getelementptr inbounds i32, i32* %tmp56, i64 3
+ %tmp63 = load i32, i32* %arrayidx8.us.i.3, align 4
+ %arrayidx12.us.i61.3 = getelementptr inbounds i32, i32* %pre96, i64 %indvars.iv42.i
+ %tmp64 = load i32, i32* %arrayidx12.us.i61.3, align 4
%mul.us.i.3 = mul nsw i32 %tmp64, %tmp63
%add.us.i.3 = add nsw i32 %mul.us.i.3, %add.us.i.2
- %arrayidx8.us.i.4 = getelementptr inbounds i32* %tmp56, i64 4
- %tmp65 = load i32* %arrayidx8.us.i.4, align 4
- %arrayidx12.us.i61.4 = getelementptr inbounds i32* %pre97, i64 %indvars.iv42.i
- %tmp66 = load i32* %arrayidx12.us.i61.4, align 4
+ %arrayidx8.us.i.4 = getelementptr inbounds i32, i32* %tmp56, i64 4
+ %tmp65 = load i32, i32* %arrayidx8.us.i.4, align 4
+ %arrayidx12.us.i61.4 = getelementptr inbounds i32, i32* %pre97, i64 %indvars.iv42.i
+ %tmp66 = load i32, i32* %arrayidx12.us.i61.4, align 4
%mul.us.i.4 = mul nsw i32 %tmp66, %tmp65
%add.us.i.4 = add nsw i32 %mul.us.i.4, %add.us.i.3
- %arrayidx8.us.i.5 = getelementptr inbounds i32* %tmp56, i64 5
- %tmp67 = load i32* %arrayidx8.us.i.5, align 4
- %arrayidx12.us.i61.5 = getelementptr inbounds i32* %pre98, i64 %indvars.iv42.i
- %tmp68 = load i32* %arrayidx12.us.i61.5, align 4
+ %arrayidx8.us.i.5 = getelementptr inbounds i32, i32* %tmp56, i64 5
+ %tmp67 = load i32, i32* %arrayidx8.us.i.5, align 4
+ %arrayidx12.us.i61.5 = getelementptr inbounds i32, i32* %pre98, i64 %indvars.iv42.i
+ %tmp68 = load i32, i32* %arrayidx12.us.i61.5, align 4
%mul.us.i.5 = mul nsw i32 %tmp68, %tmp67
%add.us.i.5 = add nsw i32 %mul.us.i.5, %add.us.i.4
- %arrayidx8.us.i.6 = getelementptr inbounds i32* %tmp56, i64 6
- %tmp69 = load i32* %arrayidx8.us.i.6, align 4
- %arrayidx12.us.i61.6 = getelementptr inbounds i32* %pre99, i64 %indvars.iv42.i
- %tmp70 = load i32* %arrayidx12.us.i61.6, align 4
+ %arrayidx8.us.i.6 = getelementptr inbounds i32, i32* %tmp56, i64 6
+ %tmp69 = load i32, i32* %arrayidx8.us.i.6, align 4
+ %arrayidx12.us.i61.6 = getelementptr inbounds i32, i32* %pre99, i64 %indvars.iv42.i
+ %tmp70 = load i32, i32* %arrayidx12.us.i61.6, align 4
%mul.us.i.6 = mul nsw i32 %tmp70, %tmp69
%add.us.i.6 = add nsw i32 %mul.us.i.6, %add.us.i.5
- %arrayidx8.us.i.7 = getelementptr inbounds i32* %tmp56, i64 7
- %tmp71 = load i32* %arrayidx8.us.i.7, align 4
- %arrayidx12.us.i61.7 = getelementptr inbounds i32* %pre100, i64 %indvars.iv42.i
- %tmp72 = load i32* %arrayidx12.us.i61.7, align 4
+ %arrayidx8.us.i.7 = getelementptr inbounds i32, i32* %tmp56, i64 7
+ %tmp71 = load i32, i32* %arrayidx8.us.i.7, align 4
+ %arrayidx12.us.i61.7 = getelementptr inbounds i32, i32* %pre100, i64 %indvars.iv42.i
+ %tmp72 = load i32, i32* %arrayidx12.us.i61.7, align 4
%mul.us.i.7 = mul nsw i32 %tmp72, %tmp71
%add.us.i.7 = add nsw i32 %mul.us.i.7, %add.us.i.6
- %arrayidx8.us.i.8 = getelementptr inbounds i32* %tmp56, i64 8
- %tmp73 = load i32* %arrayidx8.us.i.8, align 4
- %arrayidx12.us.i61.8 = getelementptr inbounds i32* %pre101, i64 %indvars.iv42.i
- %tmp74 = load i32* %arrayidx12.us.i61.8, align 4
+ %arrayidx8.us.i.8 = getelementptr inbounds i32, i32* %tmp56, i64 8
+ %tmp73 = load i32, i32* %arrayidx8.us.i.8, align 4
+ %arrayidx12.us.i61.8 = getelementptr inbounds i32, i32* %pre101, i64 %indvars.iv42.i
+ %tmp74 = load i32, i32* %arrayidx12.us.i61.8, align 4
%mul.us.i.8 = mul nsw i32 %tmp74, %tmp73
%add.us.i.8 = add nsw i32 %mul.us.i.8, %add.us.i.7
- %arrayidx8.us.i.9 = getelementptr inbounds i32* %tmp56, i64 9
- %tmp75 = load i32* %arrayidx8.us.i.9, align 4
- %arrayidx12.us.i61.9 = getelementptr inbounds i32* %pre102, i64 %indvars.iv42.i
- %tmp76 = load i32* %arrayidx12.us.i61.9, align 4
+ %arrayidx8.us.i.9 = getelementptr inbounds i32, i32* %tmp56, i64 9
+ %tmp75 = load i32, i32* %arrayidx8.us.i.9, align 4
+ %arrayidx12.us.i61.9 = getelementptr inbounds i32, i32* %pre102, i64 %indvars.iv42.i
+ %tmp76 = load i32, i32* %arrayidx12.us.i61.9, align 4
%mul.us.i.9 = mul nsw i32 %tmp76, %tmp75
%add.us.i.9 = add nsw i32 %mul.us.i.9, %add.us.i.8
- %arrayidx16.us.i = getelementptr inbounds i32* %tmp55, i64 %indvars.iv42.i
+ %arrayidx16.us.i = getelementptr inbounds i32, i32* %tmp55, i64 %indvars.iv42.i
store i32 %add.us.i.9, i32* %arrayidx16.us.i, align 4
%indvars.iv.next43.i = add i64 %indvars.iv42.i, 1
%lftr.wideiv = trunc i64 %indvars.iv.next43.i to i32
@@ -159,46 +159,46 @@ entry:
br label %for.body
for.body:
%indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ]
- %tmp57 = load i32* %tmp56, align 4
- %arrayidx12.us.i61 = getelementptr inbounds i32* %pre, i64 %indvars.iv42.i
- %tmp58 = load i32* %arrayidx12.us.i61, align 4
- %arrayidx8.us.i.1 = getelementptr inbounds i32* %tmp56, i64 1
- %tmp59 = load i32* %arrayidx8.us.i.1, align 4
- %arrayidx12.us.i61.1 = getelementptr inbounds i32* %pre94, i64 %indvars.iv42.i
- %tmp60 = load i32* %arrayidx12.us.i61.1, align 4
- %arrayidx8.us.i.2 = getelementptr inbounds i32* %tmp56, i64 2
- %tmp61 = load i32* %arrayidx8.us.i.2, align 4
- %arrayidx12.us.i61.2 = getelementptr inbounds i32* %pre95, i64 %indvars.iv42.i
- %tmp62 = load i32* %arrayidx12.us.i61.2, align 4
- %arrayidx8.us.i.3 = getelementptr inbounds i32* %tmp56, i64 3
- %tmp63 = load i32* %arrayidx8.us.i.3, align 4
- %arrayidx12.us.i61.3 = getelementptr inbounds i32* %pre96, i64 %indvars.iv42.i
- %tmp64 = load i32* %arrayidx12.us.i61.3, align 4
- %arrayidx8.us.i.4 = getelementptr inbounds i32* %tmp56, i64 4
- %tmp65 = load i32* %arrayidx8.us.i.4, align 4
- %arrayidx12.us.i61.4 = getelementptr inbounds i32* %pre97, i64 %indvars.iv42.i
- %tmp66 = load i32* %arrayidx12.us.i61.4, align 4
- %arrayidx8.us.i.5 = getelementptr inbounds i32* %tmp56, i64 5
- %tmp67 = load i32* %arrayidx8.us.i.5, align 4
- %arrayidx12.us.i61.5 = getelementptr inbounds i32* %pre98, i64 %indvars.iv42.i
- %tmp68 = load i32* %arrayidx12.us.i61.5, align 4
- %arrayidx8.us.i.6 = getelementptr inbounds i32* %tmp56, i64 6
- %tmp69 = load i32* %arrayidx8.us.i.6, align 4
- %arrayidx12.us.i61.6 = getelementptr inbounds i32* %pre99, i64 %indvars.iv42.i
- %tmp70 = load i32* %arrayidx12.us.i61.6, align 4
+ %tmp57 = load i32, i32* %tmp56, align 4
+ %arrayidx12.us.i61 = getelementptr inbounds i32, i32* %pre, i64 %indvars.iv42.i
+ %tmp58 = load i32, i32* %arrayidx12.us.i61, align 4
+ %arrayidx8.us.i.1 = getelementptr inbounds i32, i32* %tmp56, i64 1
+ %tmp59 = load i32, i32* %arrayidx8.us.i.1, align 4
+ %arrayidx12.us.i61.1 = getelementptr inbounds i32, i32* %pre94, i64 %indvars.iv42.i
+ %tmp60 = load i32, i32* %arrayidx12.us.i61.1, align 4
+ %arrayidx8.us.i.2 = getelementptr inbounds i32, i32* %tmp56, i64 2
+ %tmp61 = load i32, i32* %arrayidx8.us.i.2, align 4
+ %arrayidx12.us.i61.2 = getelementptr inbounds i32, i32* %pre95, i64 %indvars.iv42.i
+ %tmp62 = load i32, i32* %arrayidx12.us.i61.2, align 4
+ %arrayidx8.us.i.3 = getelementptr inbounds i32, i32* %tmp56, i64 3
+ %tmp63 = load i32, i32* %arrayidx8.us.i.3, align 4
+ %arrayidx12.us.i61.3 = getelementptr inbounds i32, i32* %pre96, i64 %indvars.iv42.i
+ %tmp64 = load i32, i32* %arrayidx12.us.i61.3, align 4
+ %arrayidx8.us.i.4 = getelementptr inbounds i32, i32* %tmp56, i64 4
+ %tmp65 = load i32, i32* %arrayidx8.us.i.4, align 4
+ %arrayidx12.us.i61.4 = getelementptr inbounds i32, i32* %pre97, i64 %indvars.iv42.i
+ %tmp66 = load i32, i32* %arrayidx12.us.i61.4, align 4
+ %arrayidx8.us.i.5 = getelementptr inbounds i32, i32* %tmp56, i64 5
+ %tmp67 = load i32, i32* %arrayidx8.us.i.5, align 4
+ %arrayidx12.us.i61.5 = getelementptr inbounds i32, i32* %pre98, i64 %indvars.iv42.i
+ %tmp68 = load i32, i32* %arrayidx12.us.i61.5, align 4
+ %arrayidx8.us.i.6 = getelementptr inbounds i32, i32* %tmp56, i64 6
+ %tmp69 = load i32, i32* %arrayidx8.us.i.6, align 4
+ %arrayidx12.us.i61.6 = getelementptr inbounds i32, i32* %pre99, i64 %indvars.iv42.i
+ %tmp70 = load i32, i32* %arrayidx12.us.i61.6, align 4
%mul.us.i = mul nsw i32 %tmp58, %tmp57
- %arrayidx8.us.i.7 = getelementptr inbounds i32* %tmp56, i64 7
- %tmp71 = load i32* %arrayidx8.us.i.7, align 4
- %arrayidx12.us.i61.7 = getelementptr inbounds i32* %pre100, i64 %indvars.iv42.i
- %tmp72 = load i32* %arrayidx12.us.i61.7, align 4
- %arrayidx8.us.i.8 = getelementptr inbounds i32* %tmp56, i64 8
- %tmp73 = load i32* %arrayidx8.us.i.8, align 4
- %arrayidx12.us.i61.8 = getelementptr inbounds i32* %pre101, i64 %indvars.iv42.i
- %tmp74 = load i32* %arrayidx12.us.i61.8, align 4
- %arrayidx8.us.i.9 = getelementptr inbounds i32* %tmp56, i64 9
- %tmp75 = load i32* %arrayidx8.us.i.9, align 4
- %arrayidx12.us.i61.9 = getelementptr inbounds i32* %pre102, i64 %indvars.iv42.i
- %tmp76 = load i32* %arrayidx12.us.i61.9, align 4
+ %arrayidx8.us.i.7 = getelementptr inbounds i32, i32* %tmp56, i64 7
+ %tmp71 = load i32, i32* %arrayidx8.us.i.7, align 4
+ %arrayidx12.us.i61.7 = getelementptr inbounds i32, i32* %pre100, i64 %indvars.iv42.i
+ %tmp72 = load i32, i32* %arrayidx12.us.i61.7, align 4
+ %arrayidx8.us.i.8 = getelementptr inbounds i32, i32* %tmp56, i64 8
+ %tmp73 = load i32, i32* %arrayidx8.us.i.8, align 4
+ %arrayidx12.us.i61.8 = getelementptr inbounds i32, i32* %pre101, i64 %indvars.iv42.i
+ %tmp74 = load i32, i32* %arrayidx12.us.i61.8, align 4
+ %arrayidx8.us.i.9 = getelementptr inbounds i32, i32* %tmp56, i64 9
+ %tmp75 = load i32, i32* %arrayidx8.us.i.9, align 4
+ %arrayidx12.us.i61.9 = getelementptr inbounds i32, i32* %pre102, i64 %indvars.iv42.i
+ %tmp76 = load i32, i32* %arrayidx12.us.i61.9, align 4
%mul.us.i.1 = mul nsw i32 %tmp60, %tmp59
%add.us.i.1 = add nsw i32 %mul.us.i.1, %mul.us.i
%mul.us.i.2 = mul nsw i32 %tmp62, %tmp61
@@ -217,7 +217,7 @@ for.body:
%add.us.i.8 = add nsw i32 %mul.us.i.8, %add.us.i.7
%mul.us.i.9 = mul nsw i32 %tmp76, %tmp75
%add.us.i.9 = add nsw i32 %mul.us.i.9, %add.us.i.8
- %arrayidx16.us.i = getelementptr inbounds i32* %tmp55, i64 %indvars.iv42.i
+ %arrayidx16.us.i = getelementptr inbounds i32, i32* %tmp55, i64 %indvars.iv42.i
store i32 %add.us.i.9, i32* %arrayidx16.us.i, align 4
%indvars.iv.next43.i = add i64 %indvars.iv42.i, 1
%lftr.wideiv = trunc i64 %indvars.iv.next43.i to i32
@@ -243,20 +243,20 @@ end:
@d = external global i32, align 4
define i32 @encpc1() nounwind {
entry:
- %l1 = load i32* @a, align 16
+ %l1 = load i32, i32* @a, align 16
%conv = shl i32 %l1, 8
%s5 = lshr i32 %l1, 8
%add = or i32 %conv, %s5
store i32 %add, i32* @b
- %l6 = load i32* @a
- %l7 = load i32* @c
+ %l6 = load i32, i32* @a
+ %l7 = load i32, i32* @c
%add.i = add i32 %l7, %l6
%idxprom.i = zext i32 %l7 to i64
- %arrayidx.i = getelementptr inbounds i32* @d, i64 %idxprom.i
- %l8 = load i32* %arrayidx.i
+ %arrayidx.i = getelementptr inbounds i32, i32* @d, i64 %idxprom.i
+ %l8 = load i32, i32* %arrayidx.i
store i32 346, i32* @c
store i32 20021, i32* @d
- %l9 = load i32* @a
+ %l9 = load i32, i32* @a
store i32 %l8, i32* @a
store i32 %l9, i32* @b
store i32 %add.i, i32* @c
diff --git a/test/CodeGen/X86/misched-code-difference-with-debug.ll b/test/CodeGen/X86/misched-code-difference-with-debug.ll
index fb2a986e561b..0f1f382c49a8 100644
--- a/test/CodeGen/X86/misched-code-difference-with-debug.ll
+++ b/test/CodeGen/X86/misched-code-difference-with-debug.ll
@@ -32,11 +32,11 @@ declare i32 @test_function(%class.C*, i8 signext, i8 signext, i8 signext, ...)
define void @test_without_debug() {
entry:
%c = alloca %class.C, align 1
- %0 = load i8* @argc, align 1
+ %0 = load i8, i8* @argc, align 1
%conv = sext i8 %0 to i32
- %call = call i32 (%class.C*, i8, i8, i8, ...)* @test_function(%class.C* %c, i8 signext 0, i8 signext %0, i8 signext 0, i32 %conv)
- %1 = load i8* @argc, align 1
- %call2 = call i32 (%class.C*, i8, i8, i8, ...)* @test_function(%class.C* %c, i8 signext 0, i8 signext %1, i8 signext 0, i32 %conv)
+ %call = call i32 (%class.C*, i8, i8, i8, ...) @test_function(%class.C* %c, i8 signext 0, i8 signext %0, i8 signext 0, i32 %conv)
+ %1 = load i8, i8* @argc, align 1
+ %call2 = call i32 (%class.C*, i8, i8, i8, ...) @test_function(%class.C* %c, i8 signext 0, i8 signext %1, i8 signext 0, i32 %conv)
ret void
}
@@ -46,14 +46,14 @@ entry:
define void @test_with_debug() {
entry:
%c = alloca %class.C, align 1
- %0 = load i8* @argc, align 1
- tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !19, metadata !29)
+ %0 = load i8, i8* @argc, align 1
+ tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !19, metadata !29), !dbg !DILocation(scope: !13)
%conv = sext i8 %0 to i32
- tail call void @llvm.dbg.value(metadata %class.C* %c, i64 0, metadata !18, metadata !29)
- %call = call i32 (%class.C*, i8, i8, i8, ...)* @test_function(%class.C* %c, i8 signext 0, i8 signext %0, i8 signext 0, i32 %conv)
- %1 = load i8* @argc, align 1
- call void @llvm.dbg.value(metadata %class.C* %c, i64 0, metadata !18, metadata !29)
- %call2 = call i32 (%class.C*, i8, i8, i8, ...)* @test_function(%class.C* %c, i8 signext 0, i8 signext %1, i8 signext 0, i32 %conv)
+ tail call void @llvm.dbg.value(metadata %class.C* %c, i64 0, metadata !18, metadata !29), !dbg !DILocation(scope: !13)
+ %call = call i32 (%class.C*, i8, i8, i8, ...) @test_function(%class.C* %c, i8 signext 0, i8 signext %0, i8 signext 0, i32 %conv)
+ %1 = load i8, i8* @argc, align 1
+ call void @llvm.dbg.value(metadata %class.C* %c, i64 0, metadata !18, metadata !29), !dbg !DILocation(scope: !13)
+ %call2 = call i32 (%class.C*, i8, i8, i8, ...) @test_function(%class.C* %c, i8 signext 0, i8 signext %1, i8 signext 0, i32 %conv)
ret void
}
@@ -62,29 +62,29 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!22, !23}
-!0 = !{!"", !1, !2, !3, !12, !20, !2} ; [ DW_TAG_compile_unit ] [test.cpp] [DW_LANG_C_plus_plus]
-!1 = !{!"test.cpp", !""}
+!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !20, imports: !2)
+!1 = !DIFile(filename: "test.cpp", directory: "")
!2 = !{}
!3 = !{!4}
-!4 = !{!"0x2\00C\002\008\008\000\000\000", !1, null, null, !5, null, null, !"_ZTS1C"} ; [ DW_TAG_class_type ] [C] [line 2, size 8, align 8, offset 0] [def] [from ]
+!4 = !DICompositeType(tag: DW_TAG_class_type, name: "C", line: 2, size: 8, align: 8, file: !1, elements: !5, identifier: "_ZTS1C")
!5 = !{!6}
-!6 = !{!"", !1, !"_ZTS1C", !7, null, null, null, null, null} ; [ DW_TAG_subprogram ] [line 4] [public] [test]
-!7 = !{!"", null, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = !DISubprogram(name: "test", file: !1, scope: !"_ZTS1C", type: !7)
+!7 = !DISubroutineType(types: !8)
!8 = !{!9, !10, !11, !11, !11, null}
-!9 = !{!"", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = !{!"", null, null, !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1C]
-!11 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!9 = !DIBasicType(encoding: DW_ATE_signed, size: 32, align: 32, name: "int")
+!10 = !DIDerivedType(baseType: !"_ZTS1C", tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial)
+!11 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
!12 = !{!13}
-!13 = !{!"0x2e\00test_with_debug\00test_with_debug\00test_with_debug\006\000\001\000\000\00256\001\006", !1, !14, !15, null, void ()* @test_with_debug, null, null, !17} ; [ DW_TAG_subprogram ] [line 6] [def] [test_with_debug]
-!14 = !{!"0x29", !1}
-!15 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = !DISubprogram(name: "test_with_debug", linkageName: "test_with_debug", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !14, type: !15, function: void ()* @test_with_debug, variables: !17)
+!14 = !DIFile(filename: "test.cpp", directory: "")
+!15 = !DISubroutineType(types: !16)
!16 = !{null}
!17 = !{!18, !19}
-!18 = !{!"0x100\00c\007\000", !13, !14, !"_ZTS1C"} ; [ DW_TAG_auto_variable ] [c] [line 7]
-!19 = !{!"0x100\00lc\008\000", !13, !14, !11} ; [ DW_TAG_auto_variable ] [lc] [line 8]
+!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 7, scope: !13, file: !14, type: !"_ZTS1C")
+!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "lc", line: 8, scope: !13, file: !14, type: !11)
!20 = !{!21}
-!21 = !{!"0x34\00argc\00argc\00\001\000\001", null, !14, !11, i8* @argc, null} ; [ DW_TAG_variable ] [argc] [line 1] [def]
+!21 = !DIGlobalVariable(name: "argc", line: 1, isLocal: false, isDefinition: true, scope: null, file: !14, type: !11, variable: i8* @argc)
!22 = !{i32 2, !"Dwarf Version", i32 4}
-!23 = !{i32 2, !"Debug Info Version", i32 2}
-!25 = !MDLocation(line: 8, column: 3, scope: !13)
-!29 = !{!"0x102"} ; [ DW_TAG_expression ]
+!23 = !{i32 2, !"Debug Info Version", i32 3}
+!25 = !DILocation(line: 8, column: 3, scope: !13)
+!29 = !DIExpression()
diff --git a/test/CodeGen/X86/misched-crash.ll b/test/CodeGen/X86/misched-crash.ll
index 21c3fa3510d6..fa7de1a843ea 100644
--- a/test/CodeGen/X86/misched-crash.ll
+++ b/test/CodeGen/X86/misched-crash.ll
@@ -9,7 +9,7 @@ entry:
%cmp = icmp ult i64 %_x1, %_x2
%cond = select i1 %cmp, i64 %_x1, i64 %_x2
%cond10 = select i1 %cmp, i64 %_x2, i64 %_x1
- %0 = load i64* null, align 8
+ %0 = load i64, i64* null, align 8
%cmp16 = icmp ult i64 %cond, %0
%cmp23 = icmp ugt i64 %cond10, 0
br i1 %cmp16, label %land.lhs.true21, label %return
@@ -26,8 +26,8 @@ if.then24: ; preds = %land.lhs.true21
for.body34.i: ; preds = %for.inc39.i, %if.then24
%index.178.i = phi i64 [ %add21.i, %if.then24 ], [ %inc41.i, %for.inc39.i ]
- %arrayidx35.i = getelementptr inbounds i8* %plane, i64 %index.178.i
- %1 = load i8* %arrayidx35.i, align 1
+ %arrayidx35.i = getelementptr inbounds i8, i8* %plane, i64 %index.178.i
+ %1 = load i8, i8* %arrayidx35.i, align 1
%tobool36.i = icmp eq i8 %1, 0
br i1 %tobool36.i, label %for.inc39.i, label %return
diff --git a/test/CodeGen/X86/misched-fusion.ll b/test/CodeGen/X86/misched-fusion.ll
index 859d92d6978b..0975faacb9ed 100644
--- a/test/CodeGen/X86/misched-fusion.ll
+++ b/test/CodeGen/X86/misched-fusion.ll
@@ -16,11 +16,11 @@ loop:
loop1:
%cond = icmp eq i32* %var, null
- %next.load = load i32** %next.ptr
+ %next.load = load i32*, i32** %next.ptr
br i1 %cond, label %loop, label %loop2
loop2: ; preds = %loop1
- %gep = getelementptr inbounds i32** %next.ptr, i32 1
+ %gep = getelementptr inbounds i32*, i32** %next.ptr, i32 1
store i32* %next.load, i32** undef
br label %loop
}
@@ -42,12 +42,12 @@ loop:
loop1:
%var2 = sub i32 %var, 1
%cond = icmp eq i32 %var2, 0
- %next.load = load i32** %next.ptr
- %next.var = load i32* %next.load
+ %next.load = load i32*, i32** %next.ptr
+ %next.var = load i32, i32* %next.load
br i1 %cond, label %loop, label %loop2
loop2:
- %gep = getelementptr inbounds i32** %next.ptr, i32 1
+ %gep = getelementptr inbounds i32*, i32** %next.ptr, i32 1
store i32* %next.load, i32** undef
br label %loop
}
@@ -70,12 +70,12 @@ loop2a: ; preds = %loop1, %body, %entr
loop1: ; preds = %loop2a, %loop2b
%var2 = sub i32 %var, 1
%cond = icmp slt i32 %var2, 0
- %next.load = load i32** %next.ptr
- %next.var = load i32* %next.load
+ %next.load = load i32*, i32** %next.ptr
+ %next.var = load i32, i32* %next.load
br i1 %cond, label %loop2a, label %loop2b
loop2b: ; preds = %loop1
- %gep = getelementptr inbounds i32** %next.ptr, i32 1
+ %gep = getelementptr inbounds i32*, i32** %next.ptr, i32 1
store i32* %next.load, i32** undef
br label %loop2a
}
@@ -97,12 +97,12 @@ loop2a: ; preds = %loop1, %body, %entr
loop1: ; preds = %loop2a, %loop2b
%var2 = sub i32 %var, 1
%cond = icmp ult i32 %var2, %n
- %next.load = load i32** %next.ptr
- %next.var = load i32* %next.load
+ %next.load = load i32*, i32** %next.ptr
+ %next.var = load i32, i32* %next.load
br i1 %cond, label %loop2a, label %loop2b
loop2b: ; preds = %loop1
- %gep = getelementptr inbounds i32** %next.ptr, i32 1
+ %gep = getelementptr inbounds i32*, i32** %next.ptr, i32 1
store i32* %next.load, i32** undef
br label %loop2a
}
diff --git a/test/CodeGen/X86/misched-matmul.ll b/test/CodeGen/X86/misched-matmul.ll
index 5454b7cf780a..384344691f9b 100644
--- a/test/CodeGen/X86/misched-matmul.ll
+++ b/test/CodeGen/X86/misched-matmul.ll
@@ -14,87 +14,87 @@
define void @wrap_mul4(double* nocapture %Out, [4 x double]* nocapture %A, [4 x double]* nocapture %B) #0 {
entry:
- %arrayidx1.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 0
- %0 = load double* %arrayidx1.i, align 8
- %arrayidx3.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 0
- %1 = load double* %arrayidx3.i, align 8
+ %arrayidx1.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 0, i64 0
+ %0 = load double, double* %arrayidx1.i, align 8
+ %arrayidx3.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 0, i64 0
+ %1 = load double, double* %arrayidx3.i, align 8
%mul.i = fmul double %0, %1
- %arrayidx5.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 1
- %2 = load double* %arrayidx5.i, align 8
- %arrayidx7.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 0
- %3 = load double* %arrayidx7.i, align 8
+ %arrayidx5.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 0, i64 1
+ %2 = load double, double* %arrayidx5.i, align 8
+ %arrayidx7.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 1, i64 0
+ %3 = load double, double* %arrayidx7.i, align 8
%mul8.i = fmul double %2, %3
%add.i = fadd double %mul.i, %mul8.i
- %arrayidx10.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 2
- %4 = load double* %arrayidx10.i, align 8
- %arrayidx12.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 0
- %5 = load double* %arrayidx12.i, align 8
+ %arrayidx10.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 0, i64 2
+ %4 = load double, double* %arrayidx10.i, align 8
+ %arrayidx12.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 2, i64 0
+ %5 = load double, double* %arrayidx12.i, align 8
%mul13.i = fmul double %4, %5
%add14.i = fadd double %add.i, %mul13.i
- %arrayidx16.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 3
- %6 = load double* %arrayidx16.i, align 8
- %arrayidx18.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 0
- %7 = load double* %arrayidx18.i, align 8
+ %arrayidx16.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 0, i64 3
+ %6 = load double, double* %arrayidx16.i, align 8
+ %arrayidx18.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 3, i64 0
+ %7 = load double, double* %arrayidx18.i, align 8
%mul19.i = fmul double %6, %7
%add20.i = fadd double %add14.i, %mul19.i
- %arrayidx25.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 1
- %8 = load double* %arrayidx25.i, align 8
+ %arrayidx25.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 0, i64 1
+ %8 = load double, double* %arrayidx25.i, align 8
%mul26.i = fmul double %0, %8
- %arrayidx30.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 1
- %9 = load double* %arrayidx30.i, align 8
+ %arrayidx30.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 1, i64 1
+ %9 = load double, double* %arrayidx30.i, align 8
%mul31.i = fmul double %2, %9
%add32.i = fadd double %mul26.i, %mul31.i
- %arrayidx36.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 1
- %10 = load double* %arrayidx36.i, align 8
+ %arrayidx36.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 2, i64 1
+ %10 = load double, double* %arrayidx36.i, align 8
%mul37.i = fmul double %4, %10
%add38.i = fadd double %add32.i, %mul37.i
- %arrayidx42.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 1
- %11 = load double* %arrayidx42.i, align 8
+ %arrayidx42.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 3, i64 1
+ %11 = load double, double* %arrayidx42.i, align 8
%mul43.i = fmul double %6, %11
%add44.i = fadd double %add38.i, %mul43.i
- %arrayidx49.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 2
- %12 = load double* %arrayidx49.i, align 8
+ %arrayidx49.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 0, i64 2
+ %12 = load double, double* %arrayidx49.i, align 8
%mul50.i = fmul double %0, %12
- %arrayidx54.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 2
- %13 = load double* %arrayidx54.i, align 8
+ %arrayidx54.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 1, i64 2
+ %13 = load double, double* %arrayidx54.i, align 8
%mul55.i = fmul double %2, %13
%add56.i = fadd double %mul50.i, %mul55.i
- %arrayidx60.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 2
- %14 = load double* %arrayidx60.i, align 8
+ %arrayidx60.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 2, i64 2
+ %14 = load double, double* %arrayidx60.i, align 8
%mul61.i = fmul double %4, %14
%add62.i = fadd double %add56.i, %mul61.i
- %arrayidx66.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 2
- %15 = load double* %arrayidx66.i, align 8
+ %arrayidx66.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 3, i64 2
+ %15 = load double, double* %arrayidx66.i, align 8
%mul67.i = fmul double %6, %15
%add68.i = fadd double %add62.i, %mul67.i
- %arrayidx73.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 3
- %16 = load double* %arrayidx73.i, align 8
+ %arrayidx73.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 0, i64 3
+ %16 = load double, double* %arrayidx73.i, align 8
%mul74.i = fmul double %0, %16
- %arrayidx78.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 3
- %17 = load double* %arrayidx78.i, align 8
+ %arrayidx78.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 1, i64 3
+ %17 = load double, double* %arrayidx78.i, align 8
%mul79.i = fmul double %2, %17
%add80.i = fadd double %mul74.i, %mul79.i
- %arrayidx84.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 3
- %18 = load double* %arrayidx84.i, align 8
+ %arrayidx84.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 2, i64 3
+ %18 = load double, double* %arrayidx84.i, align 8
%mul85.i = fmul double %4, %18
%add86.i = fadd double %add80.i, %mul85.i
- %arrayidx90.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 3
- %19 = load double* %arrayidx90.i, align 8
+ %arrayidx90.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 3, i64 3
+ %19 = load double, double* %arrayidx90.i, align 8
%mul91.i = fmul double %6, %19
%add92.i = fadd double %add86.i, %mul91.i
- %arrayidx95.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 0
- %20 = load double* %arrayidx95.i, align 8
+ %arrayidx95.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 1, i64 0
+ %20 = load double, double* %arrayidx95.i, align 8
%mul98.i = fmul double %1, %20
- %arrayidx100.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 1
- %21 = load double* %arrayidx100.i, align 8
+ %arrayidx100.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 1, i64 1
+ %21 = load double, double* %arrayidx100.i, align 8
%mul103.i = fmul double %3, %21
%add104.i = fadd double %mul98.i, %mul103.i
- %arrayidx106.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 2
- %22 = load double* %arrayidx106.i, align 8
+ %arrayidx106.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 1, i64 2
+ %22 = load double, double* %arrayidx106.i, align 8
%mul109.i = fmul double %5, %22
%add110.i = fadd double %add104.i, %mul109.i
- %arrayidx112.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 3
- %23 = load double* %arrayidx112.i, align 8
+ %arrayidx112.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 1, i64 3
+ %23 = load double, double* %arrayidx112.i, align 8
%mul115.i = fmul double %7, %23
%add116.i = fadd double %add110.i, %mul115.i
%mul122.i = fmul double %8, %20
@@ -118,19 +118,19 @@ entry:
%add182.i = fadd double %add176.i, %mul181.i
%mul187.i = fmul double %19, %23
%add188.i = fadd double %add182.i, %mul187.i
- %arrayidx191.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 0
- %24 = load double* %arrayidx191.i, align 8
+ %arrayidx191.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 2, i64 0
+ %24 = load double, double* %arrayidx191.i, align 8
%mul194.i = fmul double %1, %24
- %arrayidx196.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 1
- %25 = load double* %arrayidx196.i, align 8
+ %arrayidx196.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 2, i64 1
+ %25 = load double, double* %arrayidx196.i, align 8
%mul199.i = fmul double %3, %25
%add200.i = fadd double %mul194.i, %mul199.i
- %arrayidx202.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 2
- %26 = load double* %arrayidx202.i, align 8
+ %arrayidx202.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 2, i64 2
+ %26 = load double, double* %arrayidx202.i, align 8
%mul205.i = fmul double %5, %26
%add206.i = fadd double %add200.i, %mul205.i
- %arrayidx208.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 3
- %27 = load double* %arrayidx208.i, align 8
+ %arrayidx208.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 2, i64 3
+ %27 = load double, double* %arrayidx208.i, align 8
%mul211.i = fmul double %7, %27
%add212.i = fadd double %add206.i, %mul211.i
%mul218.i = fmul double %8, %24
@@ -154,19 +154,19 @@ entry:
%add278.i = fadd double %add272.i, %mul277.i
%mul283.i = fmul double %19, %27
%add284.i = fadd double %add278.i, %mul283.i
- %arrayidx287.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 0
- %28 = load double* %arrayidx287.i, align 8
+ %arrayidx287.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 3, i64 0
+ %28 = load double, double* %arrayidx287.i, align 8
%mul290.i = fmul double %1, %28
- %arrayidx292.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 1
- %29 = load double* %arrayidx292.i, align 8
+ %arrayidx292.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 3, i64 1
+ %29 = load double, double* %arrayidx292.i, align 8
%mul295.i = fmul double %3, %29
%add296.i = fadd double %mul290.i, %mul295.i
- %arrayidx298.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 2
- %30 = load double* %arrayidx298.i, align 8
+ %arrayidx298.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 3, i64 2
+ %30 = load double, double* %arrayidx298.i, align 8
%mul301.i = fmul double %5, %30
%add302.i = fadd double %add296.i, %mul301.i
- %arrayidx304.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 3
- %31 = load double* %arrayidx304.i, align 8
+ %arrayidx304.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 3, i64 3
+ %31 = load double, double* %arrayidx304.i, align 8
%mul307.i = fmul double %7, %31
%add308.i = fadd double %add302.i, %mul307.i
%mul314.i = fmul double %8, %28
@@ -191,35 +191,35 @@ entry:
%mul379.i = fmul double %19, %31
%add380.i = fadd double %add374.i, %mul379.i
store double %add20.i, double* %Out, align 8
- %Res.i.sroa.1.8.idx2 = getelementptr inbounds double* %Out, i64 1
+ %Res.i.sroa.1.8.idx2 = getelementptr inbounds double, double* %Out, i64 1
store double %add44.i, double* %Res.i.sroa.1.8.idx2, align 8
- %Res.i.sroa.2.16.idx4 = getelementptr inbounds double* %Out, i64 2
+ %Res.i.sroa.2.16.idx4 = getelementptr inbounds double, double* %Out, i64 2
store double %add68.i, double* %Res.i.sroa.2.16.idx4, align 8
- %Res.i.sroa.3.24.idx6 = getelementptr inbounds double* %Out, i64 3
+ %Res.i.sroa.3.24.idx6 = getelementptr inbounds double, double* %Out, i64 3
store double %add92.i, double* %Res.i.sroa.3.24.idx6, align 8
- %Res.i.sroa.4.32.idx8 = getelementptr inbounds double* %Out, i64 4
+ %Res.i.sroa.4.32.idx8 = getelementptr inbounds double, double* %Out, i64 4
store double %add116.i, double* %Res.i.sroa.4.32.idx8, align 8
- %Res.i.sroa.5.40.idx10 = getelementptr inbounds double* %Out, i64 5
+ %Res.i.sroa.5.40.idx10 = getelementptr inbounds double, double* %Out, i64 5
store double %add140.i, double* %Res.i.sroa.5.40.idx10, align 8
- %Res.i.sroa.6.48.idx12 = getelementptr inbounds double* %Out, i64 6
+ %Res.i.sroa.6.48.idx12 = getelementptr inbounds double, double* %Out, i64 6
store double %add164.i, double* %Res.i.sroa.6.48.idx12, align 8
- %Res.i.sroa.7.56.idx14 = getelementptr inbounds double* %Out, i64 7
+ %Res.i.sroa.7.56.idx14 = getelementptr inbounds double, double* %Out, i64 7
store double %add188.i, double* %Res.i.sroa.7.56.idx14, align 8
- %Res.i.sroa.8.64.idx16 = getelementptr inbounds double* %Out, i64 8
+ %Res.i.sroa.8.64.idx16 = getelementptr inbounds double, double* %Out, i64 8
store double %add212.i, double* %Res.i.sroa.8.64.idx16, align 8
- %Res.i.sroa.9.72.idx18 = getelementptr inbounds double* %Out, i64 9
+ %Res.i.sroa.9.72.idx18 = getelementptr inbounds double, double* %Out, i64 9
store double %add236.i, double* %Res.i.sroa.9.72.idx18, align 8
- %Res.i.sroa.10.80.idx20 = getelementptr inbounds double* %Out, i64 10
+ %Res.i.sroa.10.80.idx20 = getelementptr inbounds double, double* %Out, i64 10
store double %add260.i, double* %Res.i.sroa.10.80.idx20, align 8
- %Res.i.sroa.11.88.idx22 = getelementptr inbounds double* %Out, i64 11
+ %Res.i.sroa.11.88.idx22 = getelementptr inbounds double, double* %Out, i64 11
store double %add284.i, double* %Res.i.sroa.11.88.idx22, align 8
- %Res.i.sroa.12.96.idx24 = getelementptr inbounds double* %Out, i64 12
+ %Res.i.sroa.12.96.idx24 = getelementptr inbounds double, double* %Out, i64 12
store double %add308.i, double* %Res.i.sroa.12.96.idx24, align 8
- %Res.i.sroa.13.104.idx26 = getelementptr inbounds double* %Out, i64 13
+ %Res.i.sroa.13.104.idx26 = getelementptr inbounds double, double* %Out, i64 13
store double %add332.i, double* %Res.i.sroa.13.104.idx26, align 8
- %Res.i.sroa.14.112.idx28 = getelementptr inbounds double* %Out, i64 14
+ %Res.i.sroa.14.112.idx28 = getelementptr inbounds double, double* %Out, i64 14
store double %add356.i, double* %Res.i.sroa.14.112.idx28, align 8
- %Res.i.sroa.15.120.idx30 = getelementptr inbounds double* %Out, i64 15
+ %Res.i.sroa.15.120.idx30 = getelementptr inbounds double, double* %Out, i64 15
store double %add380.i, double* %Res.i.sroa.15.120.idx30, align 8
ret void
}
diff --git a/test/CodeGen/X86/misched-matrix.ll b/test/CodeGen/X86/misched-matrix.ll
index 23b561f6e5db..e62a1d04dad6 100644
--- a/test/CodeGen/X86/misched-matrix.ll
+++ b/test/CodeGen/X86/misched-matrix.ll
@@ -93,58 +93,58 @@ entry:
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx8 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 0
- %tmp = load i32* %arrayidx8, align 4
- %arrayidx12 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 0
- %tmp1 = load i32* %arrayidx12, align 4
- %arrayidx8.1 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 1
- %tmp2 = load i32* %arrayidx8.1, align 4
- %arrayidx12.1 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 0
- %tmp3 = load i32* %arrayidx12.1, align 4
- %arrayidx8.2 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 2
- %tmp4 = load i32* %arrayidx8.2, align 4
- %arrayidx12.2 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 0
- %tmp5 = load i32* %arrayidx12.2, align 4
- %arrayidx8.3 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 3
- %tmp6 = load i32* %arrayidx8.3, align 4
- %arrayidx12.3 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 0
- %tmp8 = load i32* %arrayidx8, align 4
- %arrayidx12.137 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 1
- %tmp9 = load i32* %arrayidx12.137, align 4
- %tmp10 = load i32* %arrayidx8.1, align 4
- %arrayidx12.1.1 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 1
- %tmp11 = load i32* %arrayidx12.1.1, align 4
- %tmp12 = load i32* %arrayidx8.2, align 4
- %arrayidx12.2.1 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 1
- %tmp13 = load i32* %arrayidx12.2.1, align 4
- %tmp14 = load i32* %arrayidx8.3, align 4
- %arrayidx12.3.1 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 1
- %tmp15 = load i32* %arrayidx12.3.1, align 4
- %tmp16 = load i32* %arrayidx8, align 4
- %arrayidx12.239 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 2
- %tmp17 = load i32* %arrayidx12.239, align 4
- %tmp18 = load i32* %arrayidx8.1, align 4
- %arrayidx12.1.2 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 2
- %tmp19 = load i32* %arrayidx12.1.2, align 4
- %tmp20 = load i32* %arrayidx8.2, align 4
- %arrayidx12.2.2 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 2
- %tmp21 = load i32* %arrayidx12.2.2, align 4
- %tmp22 = load i32* %arrayidx8.3, align 4
- %arrayidx12.3.2 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 2
- %tmp23 = load i32* %arrayidx12.3.2, align 4
- %tmp24 = load i32* %arrayidx8, align 4
- %arrayidx12.341 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 3
- %tmp25 = load i32* %arrayidx12.341, align 4
- %tmp26 = load i32* %arrayidx8.1, align 4
- %arrayidx12.1.3 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 3
- %tmp27 = load i32* %arrayidx12.1.3, align 4
- %tmp28 = load i32* %arrayidx8.2, align 4
- %arrayidx12.2.3 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 3
- %tmp29 = load i32* %arrayidx12.2.3, align 4
- %tmp30 = load i32* %arrayidx8.3, align 4
- %arrayidx12.3.3 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 3
- %tmp31 = load i32* %arrayidx12.3.3, align 4
- %tmp7 = load i32* %arrayidx12.3, align 4
+ %arrayidx8 = getelementptr inbounds [4 x i32], [4 x i32]* %m1, i64 %indvars.iv, i64 0
+ %tmp = load i32, i32* %arrayidx8, align 4
+ %arrayidx12 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 0, i64 0
+ %tmp1 = load i32, i32* %arrayidx12, align 4
+ %arrayidx8.1 = getelementptr inbounds [4 x i32], [4 x i32]* %m1, i64 %indvars.iv, i64 1
+ %tmp2 = load i32, i32* %arrayidx8.1, align 4
+ %arrayidx12.1 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 1, i64 0
+ %tmp3 = load i32, i32* %arrayidx12.1, align 4
+ %arrayidx8.2 = getelementptr inbounds [4 x i32], [4 x i32]* %m1, i64 %indvars.iv, i64 2
+ %tmp4 = load i32, i32* %arrayidx8.2, align 4
+ %arrayidx12.2 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 2, i64 0
+ %tmp5 = load i32, i32* %arrayidx12.2, align 4
+ %arrayidx8.3 = getelementptr inbounds [4 x i32], [4 x i32]* %m1, i64 %indvars.iv, i64 3
+ %tmp6 = load i32, i32* %arrayidx8.3, align 4
+ %arrayidx12.3 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 3, i64 0
+ %tmp8 = load i32, i32* %arrayidx8, align 4
+ %arrayidx12.137 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 0, i64 1
+ %tmp9 = load i32, i32* %arrayidx12.137, align 4
+ %tmp10 = load i32, i32* %arrayidx8.1, align 4
+ %arrayidx12.1.1 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 1, i64 1
+ %tmp11 = load i32, i32* %arrayidx12.1.1, align 4
+ %tmp12 = load i32, i32* %arrayidx8.2, align 4
+ %arrayidx12.2.1 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 2, i64 1
+ %tmp13 = load i32, i32* %arrayidx12.2.1, align 4
+ %tmp14 = load i32, i32* %arrayidx8.3, align 4
+ %arrayidx12.3.1 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 3, i64 1
+ %tmp15 = load i32, i32* %arrayidx12.3.1, align 4
+ %tmp16 = load i32, i32* %arrayidx8, align 4
+ %arrayidx12.239 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 0, i64 2
+ %tmp17 = load i32, i32* %arrayidx12.239, align 4
+ %tmp18 = load i32, i32* %arrayidx8.1, align 4
+ %arrayidx12.1.2 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 1, i64 2
+ %tmp19 = load i32, i32* %arrayidx12.1.2, align 4
+ %tmp20 = load i32, i32* %arrayidx8.2, align 4
+ %arrayidx12.2.2 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 2, i64 2
+ %tmp21 = load i32, i32* %arrayidx12.2.2, align 4
+ %tmp22 = load i32, i32* %arrayidx8.3, align 4
+ %arrayidx12.3.2 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 3, i64 2
+ %tmp23 = load i32, i32* %arrayidx12.3.2, align 4
+ %tmp24 = load i32, i32* %arrayidx8, align 4
+ %arrayidx12.341 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 0, i64 3
+ %tmp25 = load i32, i32* %arrayidx12.341, align 4
+ %tmp26 = load i32, i32* %arrayidx8.1, align 4
+ %arrayidx12.1.3 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 1, i64 3
+ %tmp27 = load i32, i32* %arrayidx12.1.3, align 4
+ %tmp28 = load i32, i32* %arrayidx8.2, align 4
+ %arrayidx12.2.3 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 2, i64 3
+ %tmp29 = load i32, i32* %arrayidx12.2.3, align 4
+ %tmp30 = load i32, i32* %arrayidx8.3, align 4
+ %arrayidx12.3.3 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 3, i64 3
+ %tmp31 = load i32, i32* %arrayidx12.3.3, align 4
+ %tmp7 = load i32, i32* %arrayidx12.3, align 4
%mul = mul nsw i32 %tmp1, %tmp
%mul.1 = mul nsw i32 %tmp3, %tmp2
%mul.2 = mul nsw i32 %tmp5, %tmp4
@@ -173,13 +173,13 @@ for.body: ; preds = %for.body, %entry
%add.1.3 = add nsw i32 %mul.1.3, %mul.342
%add.2.3 = add nsw i32 %mul.2.3, %add.1.3
%add.3.3 = add nsw i32 %mul.3.3, %add.2.3
- %arrayidx16 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 0
+ %arrayidx16 = getelementptr inbounds [4 x i32], [4 x i32]* %m3, i64 %indvars.iv, i64 0
store i32 %add.3, i32* %arrayidx16, align 4
- %arrayidx16.1 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 1
+ %arrayidx16.1 = getelementptr inbounds [4 x i32], [4 x i32]* %m3, i64 %indvars.iv, i64 1
store i32 %add.3.1, i32* %arrayidx16.1, align 4
- %arrayidx16.2 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 2
+ %arrayidx16.2 = getelementptr inbounds [4 x i32], [4 x i32]* %m3, i64 %indvars.iv, i64 2
store i32 %add.3.2, i32* %arrayidx16.2, align 4
- %arrayidx16.3 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 3
+ %arrayidx16.3 = getelementptr inbounds [4 x i32], [4 x i32]* %m3, i64 %indvars.iv, i64 3
store i32 %add.3.3, i32* %arrayidx16.3, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/test/CodeGen/X86/misched-new.ll b/test/CodeGen/X86/misched-new.ll
index 89e45b7cfc21..410a7f320643 100644
--- a/test/CodeGen/X86/misched-new.ll
+++ b/test/CodeGen/X86/misched-new.ll
@@ -90,12 +90,12 @@ define void @hasundef() unnamed_addr uwtable ssp align 2 {
; TOPDOWN: movzbl %al
; TOPDOWN: ret
define void @testSubregTracking() nounwind uwtable ssp align 2 {
- %tmp = load i8* undef, align 1
+ %tmp = load i8, i8* undef, align 1
%tmp6 = sub i8 0, %tmp
- %tmp7 = load i8* undef, align 1
+ %tmp7 = load i8, i8* undef, align 1
%tmp8 = udiv i8 %tmp6, %tmp7
%tmp9 = zext i8 %tmp8 to i64
- %tmp10 = load i8* undef, align 1
+ %tmp10 = load i8, i8* undef, align 1
%tmp11 = zext i8 %tmp10 to i64
%tmp12 = mul i64 %tmp11, %tmp9
%tmp13 = urem i8 %tmp6, %tmp7
diff --git a/test/CodeGen/X86/mmx-arg-passing-x86-64.ll b/test/CodeGen/X86/mmx-arg-passing-x86-64.ll
new file mode 100644
index 000000000000..2727e3eb0280
--- /dev/null
+++ b/test/CodeGen/X86/mmx-arg-passing-x86-64.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X86-64
+;
+; On Darwin x86-64, v8i8, v4i16, v2i32 values are passed in XMM[0-7].
+; On Darwin x86-64, v1i64 values are passed in 64-bit GPRs.
+
+@g_v8qi = external global <8 x i8>
+
+define void @t3() nounwind {
+; X86-64-LABEL: t3:
+; X86-64: ## BB#0:
+; X86-64-NEXT: movq _g_v8qi@{{.*}}(%rip), %rax
+; X86-64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86-64-NEXT: movb $1, %al
+; X86-64-NEXT: jmp _pass_v8qi ## TAILCALL
+ %tmp3 = load <8 x i8>, <8 x i8>* @g_v8qi, align 8
+ %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
+ %tmp4 = tail call i32 (...) @pass_v8qi( x86_mmx %tmp3a ) nounwind
+ ret void
+}
+
+define void @t4(x86_mmx %v1, x86_mmx %v2) nounwind {
+; X86-64-LABEL: t4:
+; X86-64: ## BB#0:
+; X86-64-NEXT: movdq2q %xmm1, %mm0
+; X86-64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
+; X86-64-NEXT: movdq2q %xmm0, %mm0
+; X86-64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp)
+; X86-64-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X86-64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86-64-NEXT: paddb %xmm1, %xmm0
+; X86-64-NEXT: movb $1, %al
+; X86-64-NEXT: jmp _pass_v8qi ## TAILCALL
+ %v1a = bitcast x86_mmx %v1 to <8 x i8>
+ %v2b = bitcast x86_mmx %v2 to <8 x i8>
+ %tmp3 = add <8 x i8> %v1a, %v2b
+ %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
+ %tmp4 = tail call i32 (...) @pass_v8qi( x86_mmx %tmp3a ) nounwind
+ ret void
+}
+
+define void @t5() nounwind {
+; X86-64-LABEL: t5:
+; X86-64: ## BB#0:
+; X86-64-NEXT: pushq %rax
+; X86-64-NEXT: xorl %edi, %edi
+; X86-64-NEXT: callq _pass_v1di
+; X86-64-NEXT: popq %rax
+; X86-64-NEXT: retq
+ call void @pass_v1di( <1 x i64> zeroinitializer )
+ ret void
+}
+
+declare i32 @pass_v8qi(...)
+declare void @pass_v1di(<1 x i64>)
diff --git a/test/CodeGen/X86/mmx-arg-passing.ll b/test/CodeGen/X86/mmx-arg-passing.ll
index 3a0fb95711e5..4e0031076200 100644
--- a/test/CodeGen/X86/mmx-arg-passing.ll
+++ b/test/CodeGen/X86/mmx-arg-passing.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | FileCheck %s -check-prefix=X86-32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s -check-prefix=X86-64
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | FileCheck %s --check-prefix=X86-32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X86-64
;
; On Darwin x86-32, v8i8, v4i16, v2i32 values are passed in MM[0-2].
; On Darwin x86-32, v1i64 values are passed in memory. In this example, they
@@ -10,29 +10,40 @@
@u1 = external global x86_mmx
define void @t1(x86_mmx %v1) nounwind {
- store x86_mmx %v1, x86_mmx* @u1, align 8
- ret void
-
; X86-32-LABEL: t1:
-; X86-32: movq %mm0
-
+; X86-32: ## BB#0:
+; X86-32-NEXT: movl L_u1$non_lazy_ptr, %eax
+; X86-32-NEXT: movq %mm0, (%eax)
+; X86-32-NEXT: retl
+;
; X86-64-LABEL: t1:
-; X86-64: movdq2q %xmm0
-; X86-64: movq %mm0
+; X86-64: ## BB#0:
+; X86-64-NEXT: movdq2q %xmm0, %mm0
+; X86-64-NEXT: movq _u1@{{.*}}(%rip), %rax
+; X86-64-NEXT: movq %mm0, (%rax)
+; X86-64-NEXT: retq
+ store x86_mmx %v1, x86_mmx* @u1, align 8
+ ret void
}
@u2 = external global x86_mmx
define void @t2(<1 x i64> %v1) nounwind {
+; X86-32-LABEL: t2:
+; X86-32: ## BB#0:
+; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-32-NEXT: movl L_u2$non_lazy_ptr, %edx
+; X86-32-NEXT: movl %ecx, 4(%edx)
+; X86-32-NEXT: movl %eax, (%edx)
+; X86-32-NEXT: retl
+;
+; X86-64-LABEL: t2:
+; X86-64: ## BB#0:
+; X86-64-NEXT: movq _u2@{{.*}}(%rip), %rax
+; X86-64-NEXT: movq %rdi, (%rax)
+; X86-64-NEXT: retq
%tmp = bitcast <1 x i64> %v1 to x86_mmx
store x86_mmx %tmp, x86_mmx* @u2, align 8
ret void
-
-; X86-32-LABEL: t2:
-; X86-32: movl 4(%esp)
-; X86-32: movl 8(%esp)
-
-; X86-64-LABEL: t2:
-; X86-64: movq %rdi
}
-
diff --git a/test/CodeGen/X86/mmx-arg-passing2.ll b/test/CodeGen/X86/mmx-arg-passing2.ll
deleted file mode 100644
index c132d311b94b..000000000000
--- a/test/CodeGen/X86/mmx-arg-passing2.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movdq2q | count 2
-; Since the add is not an MMX add, we don't have a movq2dq any more.
-
-@g_v8qi = external global <8 x i8>
-
-define void @t1() nounwind {
- %tmp3 = load <8 x i8>* @g_v8qi, align 8
- %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
- %tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
- ret void
-}
-
-define void @t2(x86_mmx %v1, x86_mmx %v2) nounwind {
- %v1a = bitcast x86_mmx %v1 to <8 x i8>
- %v2b = bitcast x86_mmx %v2 to <8 x i8>
- %tmp3 = add <8 x i8> %v1a, %v2b
- %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
- %tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
- ret void
-}
-
-define void @t3() nounwind {
- call void @pass_v1di( <1 x i64> zeroinitializer )
- ret void
-}
-
-declare i32 @pass_v8qi(...)
-declare void @pass_v1di(<1 x i64>)
diff --git a/test/CodeGen/X86/mmx-arith.ll b/test/CodeGen/X86/mmx-arith.ll
index 681748732401..114d2535d603 100644
--- a/test/CodeGen/X86/mmx-arith.ll
+++ b/test/CodeGen/X86/mmx-arith.ll
@@ -1,309 +1,308 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | FileCheck -check-prefix=X64 %s
;; A basic sanity check to make sure that MMX arithmetic actually compiles.
;; First is a straight translation of the original with bitcasts as needed.
-define void @foo(x86_mmx* %A, x86_mmx* %B) {
+; X32-LABEL: test0
+; X64-LABEL: test0
+define void @test0(x86_mmx* %A, x86_mmx* %B) {
entry:
- %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
- %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp1a = bitcast x86_mmx %tmp1 to <8 x i8>
- %tmp3a = bitcast x86_mmx %tmp3 to <8 x i8>
- %tmp4 = add <8 x i8> %tmp1a, %tmp3a ; <<8 x i8>> [#uses=2]
- %tmp4a = bitcast <8 x i8> %tmp4 to x86_mmx
- store x86_mmx %tmp4a, x86_mmx* %A
- %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b( x86_mmx %tmp4a, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp12, x86_mmx* %A
- %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp21, x86_mmx* %A
- %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp21a = bitcast x86_mmx %tmp21 to <8 x i8>
- %tmp27a = bitcast x86_mmx %tmp27 to <8 x i8>
- %tmp28 = sub <8 x i8> %tmp21a, %tmp27a ; <<8 x i8>> [#uses=2]
- %tmp28a = bitcast <8 x i8> %tmp28 to x86_mmx
- store x86_mmx %tmp28a, x86_mmx* %A
- %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b( x86_mmx %tmp28a, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp36, x86_mmx* %A
- %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp45, x86_mmx* %A
- %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp45a = bitcast x86_mmx %tmp45 to <8 x i8>
- %tmp51a = bitcast x86_mmx %tmp51 to <8 x i8>
- %tmp52 = mul <8 x i8> %tmp45a, %tmp51a ; <<8 x i8>> [#uses=2]
- %tmp52a = bitcast <8 x i8> %tmp52 to x86_mmx
- store x86_mmx %tmp52a, x86_mmx* %A
- %tmp57 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp57a = bitcast x86_mmx %tmp57 to <8 x i8>
- %tmp58 = and <8 x i8> %tmp52, %tmp57a ; <<8 x i8>> [#uses=2]
- %tmp58a = bitcast <8 x i8> %tmp58 to x86_mmx
- store x86_mmx %tmp58a, x86_mmx* %A
- %tmp63 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp63a = bitcast x86_mmx %tmp63 to <8 x i8>
- %tmp64 = or <8 x i8> %tmp58, %tmp63a ; <<8 x i8>> [#uses=2]
- %tmp64a = bitcast <8 x i8> %tmp64 to x86_mmx
- store x86_mmx %tmp64a, x86_mmx* %A
- %tmp69 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp69a = bitcast x86_mmx %tmp69 to <8 x i8>
- %tmp64b = bitcast x86_mmx %tmp64a to <8 x i8>
- %tmp70 = xor <8 x i8> %tmp64b, %tmp69a ; <<8 x i8>> [#uses=1]
- %tmp70a = bitcast <8 x i8> %tmp70 to x86_mmx
- store x86_mmx %tmp70a, x86_mmx* %A
- tail call void @llvm.x86.mmx.emms( )
- ret void
+ %tmp1 = load x86_mmx, x86_mmx* %A
+ %tmp3 = load x86_mmx, x86_mmx* %B
+ %tmp1a = bitcast x86_mmx %tmp1 to <8 x i8>
+ %tmp3a = bitcast x86_mmx %tmp3 to <8 x i8>
+ %tmp4 = add <8 x i8> %tmp1a, %tmp3a
+ %tmp4a = bitcast <8 x i8> %tmp4 to x86_mmx
+ store x86_mmx %tmp4a, x86_mmx* %A
+ %tmp7 = load x86_mmx, x86_mmx* %B
+ %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %tmp4a, x86_mmx %tmp7)
+ store x86_mmx %tmp12, x86_mmx* %A
+ %tmp16 = load x86_mmx, x86_mmx* %B
+ %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %tmp12, x86_mmx %tmp16)
+ store x86_mmx %tmp21, x86_mmx* %A
+ %tmp27 = load x86_mmx, x86_mmx* %B
+ %tmp21a = bitcast x86_mmx %tmp21 to <8 x i8>
+ %tmp27a = bitcast x86_mmx %tmp27 to <8 x i8>
+ %tmp28 = sub <8 x i8> %tmp21a, %tmp27a
+ %tmp28a = bitcast <8 x i8> %tmp28 to x86_mmx
+ store x86_mmx %tmp28a, x86_mmx* %A
+ %tmp31 = load x86_mmx, x86_mmx* %B
+ %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %tmp28a, x86_mmx %tmp31)
+ store x86_mmx %tmp36, x86_mmx* %A
+ %tmp40 = load x86_mmx, x86_mmx* %B
+ %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %tmp36, x86_mmx %tmp40)
+ store x86_mmx %tmp45, x86_mmx* %A
+ %tmp51 = load x86_mmx, x86_mmx* %B
+ %tmp45a = bitcast x86_mmx %tmp45 to <8 x i8>
+ %tmp51a = bitcast x86_mmx %tmp51 to <8 x i8>
+ %tmp52 = mul <8 x i8> %tmp45a, %tmp51a
+ %tmp52a = bitcast <8 x i8> %tmp52 to x86_mmx
+ store x86_mmx %tmp52a, x86_mmx* %A
+ %tmp57 = load x86_mmx, x86_mmx* %B
+ %tmp57a = bitcast x86_mmx %tmp57 to <8 x i8>
+ %tmp58 = and <8 x i8> %tmp52, %tmp57a
+ %tmp58a = bitcast <8 x i8> %tmp58 to x86_mmx
+ store x86_mmx %tmp58a, x86_mmx* %A
+ %tmp63 = load x86_mmx, x86_mmx* %B
+ %tmp63a = bitcast x86_mmx %tmp63 to <8 x i8>
+ %tmp64 = or <8 x i8> %tmp58, %tmp63a
+ %tmp64a = bitcast <8 x i8> %tmp64 to x86_mmx
+ store x86_mmx %tmp64a, x86_mmx* %A
+ %tmp69 = load x86_mmx, x86_mmx* %B
+ %tmp69a = bitcast x86_mmx %tmp69 to <8 x i8>
+ %tmp64b = bitcast x86_mmx %tmp64a to <8 x i8>
+ %tmp70 = xor <8 x i8> %tmp64b, %tmp69a
+ %tmp70a = bitcast <8 x i8> %tmp70 to x86_mmx
+ store x86_mmx %tmp70a, x86_mmx* %A
+ tail call void @llvm.x86.mmx.emms()
+ ret void
}
-define void @baz(x86_mmx* %A, x86_mmx* %B) {
+; X32-LABEL: test1
+; X64-LABEL: test1
+define void @test1(x86_mmx* %A, x86_mmx* %B) {
entry:
- %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
- %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp1a = bitcast x86_mmx %tmp1 to <2 x i32>
- %tmp3a = bitcast x86_mmx %tmp3 to <2 x i32>
- %tmp4 = add <2 x i32> %tmp1a, %tmp3a ; <<2 x i32>> [#uses=2]
- %tmp4a = bitcast <2 x i32> %tmp4 to x86_mmx
- store x86_mmx %tmp4a, x86_mmx* %A
- %tmp9 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp9a = bitcast x86_mmx %tmp9 to <2 x i32>
- %tmp10 = sub <2 x i32> %tmp4, %tmp9a ; <<2 x i32>> [#uses=2]
- %tmp10a = bitcast <2 x i32> %tmp4 to x86_mmx
- store x86_mmx %tmp10a, x86_mmx* %A
- %tmp15 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp10b = bitcast x86_mmx %tmp10a to <2 x i32>
- %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32>
- %tmp16 = mul <2 x i32> %tmp10b, %tmp15a ; <<2 x i32>> [#uses=2]
- %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx
- store x86_mmx %tmp16a, x86_mmx* %A
- %tmp21 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp16b = bitcast x86_mmx %tmp16a to <2 x i32>
- %tmp21a = bitcast x86_mmx %tmp21 to <2 x i32>
- %tmp22 = and <2 x i32> %tmp16b, %tmp21a ; <<2 x i32>> [#uses=2]
- %tmp22a = bitcast <2 x i32> %tmp22 to x86_mmx
- store x86_mmx %tmp22a, x86_mmx* %A
- %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp22b = bitcast x86_mmx %tmp22a to <2 x i32>
- %tmp27a = bitcast x86_mmx %tmp27 to <2 x i32>
- %tmp28 = or <2 x i32> %tmp22b, %tmp27a ; <<2 x i32>> [#uses=2]
- %tmp28a = bitcast <2 x i32> %tmp28 to x86_mmx
- store x86_mmx %tmp28a, x86_mmx* %A
- %tmp33 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp28b = bitcast x86_mmx %tmp28a to <2 x i32>
- %tmp33a = bitcast x86_mmx %tmp33 to <2 x i32>
- %tmp34 = xor <2 x i32> %tmp28b, %tmp33a ; <<2 x i32>> [#uses=1]
- %tmp34a = bitcast <2 x i32> %tmp34 to x86_mmx
- store x86_mmx %tmp34a, x86_mmx* %A
- tail call void @llvm.x86.mmx.emms( )
- ret void
+ %tmp1 = load x86_mmx, x86_mmx* %A
+ %tmp3 = load x86_mmx, x86_mmx* %B
+ %tmp1a = bitcast x86_mmx %tmp1 to <2 x i32>
+ %tmp3a = bitcast x86_mmx %tmp3 to <2 x i32>
+ %tmp4 = add <2 x i32> %tmp1a, %tmp3a
+ %tmp4a = bitcast <2 x i32> %tmp4 to x86_mmx
+ store x86_mmx %tmp4a, x86_mmx* %A
+ %tmp9 = load x86_mmx, x86_mmx* %B
+ %tmp9a = bitcast x86_mmx %tmp9 to <2 x i32>
+ %tmp10 = sub <2 x i32> %tmp4, %tmp9a
+ %tmp10a = bitcast <2 x i32> %tmp4 to x86_mmx
+ store x86_mmx %tmp10a, x86_mmx* %A
+ %tmp15 = load x86_mmx, x86_mmx* %B
+ %tmp10b = bitcast x86_mmx %tmp10a to <2 x i32>
+ %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32>
+ %tmp16 = mul <2 x i32> %tmp10b, %tmp15a
+ %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx
+ store x86_mmx %tmp16a, x86_mmx* %A
+ %tmp21 = load x86_mmx, x86_mmx* %B
+ %tmp16b = bitcast x86_mmx %tmp16a to <2 x i32>
+ %tmp21a = bitcast x86_mmx %tmp21 to <2 x i32>
+ %tmp22 = and <2 x i32> %tmp16b, %tmp21a
+ %tmp22a = bitcast <2 x i32> %tmp22 to x86_mmx
+ store x86_mmx %tmp22a, x86_mmx* %A
+ %tmp27 = load x86_mmx, x86_mmx* %B
+ %tmp22b = bitcast x86_mmx %tmp22a to <2 x i32>
+ %tmp27a = bitcast x86_mmx %tmp27 to <2 x i32>
+ %tmp28 = or <2 x i32> %tmp22b, %tmp27a
+ %tmp28a = bitcast <2 x i32> %tmp28 to x86_mmx
+ store x86_mmx %tmp28a, x86_mmx* %A
+ %tmp33 = load x86_mmx, x86_mmx* %B
+ %tmp28b = bitcast x86_mmx %tmp28a to <2 x i32>
+ %tmp33a = bitcast x86_mmx %tmp33 to <2 x i32>
+ %tmp34 = xor <2 x i32> %tmp28b, %tmp33a
+ %tmp34a = bitcast <2 x i32> %tmp34 to x86_mmx
+ store x86_mmx %tmp34a, x86_mmx* %A
+ tail call void @llvm.x86.mmx.emms( )
+ ret void
}
-define void @bar(x86_mmx* %A, x86_mmx* %B) {
+; X32-LABEL: test2
+; X64-LABEL: test2
+define void @test2(x86_mmx* %A, x86_mmx* %B) {
entry:
- %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
- %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp1a = bitcast x86_mmx %tmp1 to <4 x i16>
- %tmp3a = bitcast x86_mmx %tmp3 to <4 x i16>
- %tmp4 = add <4 x i16> %tmp1a, %tmp3a ; <<4 x i16>> [#uses=2]
- %tmp4a = bitcast <4 x i16> %tmp4 to x86_mmx
- store x86_mmx %tmp4a, x86_mmx* %A
- %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w( x86_mmx %tmp4a, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp12, x86_mmx* %A
- %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp21, x86_mmx* %A
- %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp21a = bitcast x86_mmx %tmp21 to <4 x i16>
- %tmp27a = bitcast x86_mmx %tmp27 to <4 x i16>
- %tmp28 = sub <4 x i16> %tmp21a, %tmp27a ; <<4 x i16>> [#uses=2]
- %tmp28a = bitcast <4 x i16> %tmp28 to x86_mmx
- store x86_mmx %tmp28a, x86_mmx* %A
- %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w( x86_mmx %tmp28a, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp36, x86_mmx* %A
- %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp45, x86_mmx* %A
- %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp45a = bitcast x86_mmx %tmp45 to <4 x i16>
- %tmp51a = bitcast x86_mmx %tmp51 to <4 x i16>
- %tmp52 = mul <4 x i16> %tmp45a, %tmp51a ; <<4 x i16>> [#uses=2]
- %tmp52a = bitcast <4 x i16> %tmp52 to x86_mmx
- store x86_mmx %tmp52a, x86_mmx* %A
- %tmp55 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w( x86_mmx %tmp52a, x86_mmx %tmp55 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp60, x86_mmx* %A
- %tmp64 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd( x86_mmx %tmp60, x86_mmx %tmp64 ) ; <x86_mmx> [#uses=1]
- %tmp70 = bitcast x86_mmx %tmp69 to x86_mmx ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp70, x86_mmx* %A
- %tmp75 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp70a = bitcast x86_mmx %tmp70 to <4 x i16>
- %tmp75a = bitcast x86_mmx %tmp75 to <4 x i16>
- %tmp76 = and <4 x i16> %tmp70a, %tmp75a ; <<4 x i16>> [#uses=2]
- %tmp76a = bitcast <4 x i16> %tmp76 to x86_mmx
- store x86_mmx %tmp76a, x86_mmx* %A
- %tmp81 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp76b = bitcast x86_mmx %tmp76a to <4 x i16>
- %tmp81a = bitcast x86_mmx %tmp81 to <4 x i16>
- %tmp82 = or <4 x i16> %tmp76b, %tmp81a ; <<4 x i16>> [#uses=2]
- %tmp82a = bitcast <4 x i16> %tmp82 to x86_mmx
- store x86_mmx %tmp82a, x86_mmx* %A
- %tmp87 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp82b = bitcast x86_mmx %tmp82a to <4 x i16>
- %tmp87a = bitcast x86_mmx %tmp87 to <4 x i16>
- %tmp88 = xor <4 x i16> %tmp82b, %tmp87a ; <<4 x i16>> [#uses=1]
- %tmp88a = bitcast <4 x i16> %tmp88 to x86_mmx
- store x86_mmx %tmp88a, x86_mmx* %A
- tail call void @llvm.x86.mmx.emms( )
- ret void
+ %tmp1 = load x86_mmx, x86_mmx* %A
+ %tmp3 = load x86_mmx, x86_mmx* %B
+ %tmp1a = bitcast x86_mmx %tmp1 to <4 x i16>
+ %tmp3a = bitcast x86_mmx %tmp3 to <4 x i16>
+ %tmp4 = add <4 x i16> %tmp1a, %tmp3a
+ %tmp4a = bitcast <4 x i16> %tmp4 to x86_mmx
+ store x86_mmx %tmp4a, x86_mmx* %A
+ %tmp7 = load x86_mmx, x86_mmx* %B
+ %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %tmp4a, x86_mmx %tmp7)
+ store x86_mmx %tmp12, x86_mmx* %A
+ %tmp16 = load x86_mmx, x86_mmx* %B
+ %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %tmp12, x86_mmx %tmp16)
+ store x86_mmx %tmp21, x86_mmx* %A
+ %tmp27 = load x86_mmx, x86_mmx* %B
+ %tmp21a = bitcast x86_mmx %tmp21 to <4 x i16>
+ %tmp27a = bitcast x86_mmx %tmp27 to <4 x i16>
+ %tmp28 = sub <4 x i16> %tmp21a, %tmp27a
+ %tmp28a = bitcast <4 x i16> %tmp28 to x86_mmx
+ store x86_mmx %tmp28a, x86_mmx* %A
+ %tmp31 = load x86_mmx, x86_mmx* %B
+ %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %tmp28a, x86_mmx %tmp31)
+ store x86_mmx %tmp36, x86_mmx* %A
+ %tmp40 = load x86_mmx, x86_mmx* %B
+ %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %tmp36, x86_mmx %tmp40)
+ store x86_mmx %tmp45, x86_mmx* %A
+ %tmp51 = load x86_mmx, x86_mmx* %B
+ %tmp45a = bitcast x86_mmx %tmp45 to <4 x i16>
+ %tmp51a = bitcast x86_mmx %tmp51 to <4 x i16>
+ %tmp52 = mul <4 x i16> %tmp45a, %tmp51a
+ %tmp52a = bitcast <4 x i16> %tmp52 to x86_mmx
+ store x86_mmx %tmp52a, x86_mmx* %A
+ %tmp55 = load x86_mmx, x86_mmx* %B
+ %tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %tmp52a, x86_mmx %tmp55)
+ store x86_mmx %tmp60, x86_mmx* %A
+ %tmp64 = load x86_mmx, x86_mmx* %B
+ %tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %tmp60, x86_mmx %tmp64)
+ %tmp70 = bitcast x86_mmx %tmp69 to x86_mmx
+ store x86_mmx %tmp70, x86_mmx* %A
+ %tmp75 = load x86_mmx, x86_mmx* %B
+ %tmp70a = bitcast x86_mmx %tmp70 to <4 x i16>
+ %tmp75a = bitcast x86_mmx %tmp75 to <4 x i16>
+ %tmp76 = and <4 x i16> %tmp70a, %tmp75a
+ %tmp76a = bitcast <4 x i16> %tmp76 to x86_mmx
+ store x86_mmx %tmp76a, x86_mmx* %A
+ %tmp81 = load x86_mmx, x86_mmx* %B
+ %tmp76b = bitcast x86_mmx %tmp76a to <4 x i16>
+ %tmp81a = bitcast x86_mmx %tmp81 to <4 x i16>
+ %tmp82 = or <4 x i16> %tmp76b, %tmp81a
+ %tmp82a = bitcast <4 x i16> %tmp82 to x86_mmx
+ store x86_mmx %tmp82a, x86_mmx* %A
+ %tmp87 = load x86_mmx, x86_mmx* %B
+ %tmp82b = bitcast x86_mmx %tmp82a to <4 x i16>
+ %tmp87a = bitcast x86_mmx %tmp87 to <4 x i16>
+ %tmp88 = xor <4 x i16> %tmp82b, %tmp87a
+ %tmp88a = bitcast <4 x i16> %tmp88 to x86_mmx
+ store x86_mmx %tmp88a, x86_mmx* %A
+ tail call void @llvm.x86.mmx.emms( )
+ ret void
}
-;; The following is modified to use MMX intrinsics everywhere they work.
+; X32-LABEL: test3
+define <1 x i64> @test3(<1 x i64>* %a, <1 x i64>* %b, i32 %count) nounwind {
+entry:
+ %tmp2942 = icmp eq i32 %count, 0
+ br i1 %tmp2942, label %bb31, label %bb26
+
+bb26:
+; X32: addl
+; X32: adcl
+ %i.037.0 = phi i32 [ 0, %entry ], [ %tmp25, %bb26 ]
+ %sum.035.0 = phi <1 x i64> [ zeroinitializer, %entry ], [ %tmp22, %bb26 ]
+ %tmp13 = getelementptr <1 x i64>, <1 x i64>* %b, i32 %i.037.0
+ %tmp14 = load <1 x i64>, <1 x i64>* %tmp13
+ %tmp18 = getelementptr <1 x i64>, <1 x i64>* %a, i32 %i.037.0
+ %tmp19 = load <1 x i64>, <1 x i64>* %tmp18
+ %tmp21 = add <1 x i64> %tmp19, %tmp14
+ %tmp22 = add <1 x i64> %tmp21, %sum.035.0
+ %tmp25 = add i32 %i.037.0, 1
+ %tmp29 = icmp ult i32 %tmp25, %count
+ br i1 %tmp29, label %bb26, label %bb31
+
+bb31:
+ %sum.035.1 = phi <1 x i64> [ zeroinitializer, %entry ], [ %tmp22, %bb26 ]
+ ret <1 x i64> %sum.035.1
+}
-define void @fooa(x86_mmx* %A, x86_mmx* %B) {
+; There are no MMX operations here, so we use XMM or i64.
+; X64-LABEL: ti8
+define void @ti8(double %a, double %b) nounwind {
entry:
- %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
- %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.b( x86_mmx %tmp1, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp4, x86_mmx* %A
- %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b( x86_mmx %tmp4, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp12, x86_mmx* %A
- %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp21, x86_mmx* %A
- %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp28 = tail call x86_mmx @llvm.x86.mmx.psub.b( x86_mmx %tmp21, x86_mmx %tmp27 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp28, x86_mmx* %A
- %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b( x86_mmx %tmp28, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp36, x86_mmx* %A
- %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp45, x86_mmx* %A
- %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp51a = bitcast x86_mmx %tmp51 to i64
- %tmp51aa = bitcast i64 %tmp51a to <8 x i8>
- %tmp51b = bitcast x86_mmx %tmp45 to <8 x i8>
- %tmp52 = mul <8 x i8> %tmp51b, %tmp51aa ; <x86_mmx> [#uses=2]
- %tmp52a = bitcast <8 x i8> %tmp52 to i64
- %tmp52aa = bitcast i64 %tmp52a to x86_mmx
- store x86_mmx %tmp52aa, x86_mmx* %A
- %tmp57 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp58 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp51, x86_mmx %tmp57 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp58, x86_mmx* %A
- %tmp63 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp64 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp58, x86_mmx %tmp63 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp64, x86_mmx* %A
- %tmp69 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp70 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp64, x86_mmx %tmp69 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp70, x86_mmx* %A
- tail call void @llvm.x86.mmx.emms( )
- ret void
+ %tmp1 = bitcast double %a to <8 x i8>
+ %tmp2 = bitcast double %b to <8 x i8>
+ %tmp3 = add <8 x i8> %tmp1, %tmp2
+; X64: paddb
+ store <8 x i8> %tmp3, <8 x i8>* null
+ ret void
}
-define void @baza(x86_mmx* %A, x86_mmx* %B) {
+; X64-LABEL: ti16
+define void @ti16(double %a, double %b) nounwind {
entry:
- %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
- %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.d( x86_mmx %tmp1, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp4, x86_mmx* %A
- %tmp9 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp10 = tail call x86_mmx @llvm.x86.mmx.psub.d( x86_mmx %tmp4, x86_mmx %tmp9 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp10, x86_mmx* %A
- %tmp15 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp10a = bitcast x86_mmx %tmp10 to <2 x i32>
- %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32>
- %tmp16 = mul <2 x i32> %tmp10a, %tmp15a ; <x86_mmx> [#uses=2]
- %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx
- store x86_mmx %tmp16a, x86_mmx* %A
- %tmp21 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp22 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp16a, x86_mmx %tmp21 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp22, x86_mmx* %A
- %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp28 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp22, x86_mmx %tmp27 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp28, x86_mmx* %A
- %tmp33 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp34 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp28, x86_mmx %tmp33 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp34, x86_mmx* %A
- tail call void @llvm.x86.mmx.emms( )
- ret void
+ %tmp1 = bitcast double %a to <4 x i16>
+ %tmp2 = bitcast double %b to <4 x i16>
+ %tmp3 = add <4 x i16> %tmp1, %tmp2
+; X64: paddw
+ store <4 x i16> %tmp3, <4 x i16>* null
+ ret void
}
-define void @bara(x86_mmx* %A, x86_mmx* %B) {
+; X64-LABEL: ti32
+define void @ti32(double %a, double %b) nounwind {
entry:
- %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
- %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.w( x86_mmx %tmp1, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp4, x86_mmx* %A
- %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w( x86_mmx %tmp4, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp12, x86_mmx* %A
- %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp21, x86_mmx* %A
- %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp28 = tail call x86_mmx @llvm.x86.mmx.psub.w( x86_mmx %tmp21, x86_mmx %tmp27 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp28, x86_mmx* %A
- %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w( x86_mmx %tmp28, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp36, x86_mmx* %A
- %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp45, x86_mmx* %A
- %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp52 = tail call x86_mmx @llvm.x86.mmx.pmull.w( x86_mmx %tmp45, x86_mmx %tmp51 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp52, x86_mmx* %A
- %tmp55 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w( x86_mmx %tmp52, x86_mmx %tmp55 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp60, x86_mmx* %A
- %tmp64 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd( x86_mmx %tmp60, x86_mmx %tmp64 ) ; <x86_mmx> [#uses=1]
- %tmp70 = bitcast x86_mmx %tmp69 to x86_mmx ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp70, x86_mmx* %A
- %tmp75 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp76 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp70, x86_mmx %tmp75 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp76, x86_mmx* %A
- %tmp81 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp82 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp76, x86_mmx %tmp81 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp82, x86_mmx* %A
- %tmp87 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
- %tmp88 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp82, x86_mmx %tmp87 ) ; <x86_mmx> [#uses=2]
- store x86_mmx %tmp88, x86_mmx* %A
- tail call void @llvm.x86.mmx.emms( )
- ret void
+ %tmp1 = bitcast double %a to <2 x i32>
+ %tmp2 = bitcast double %b to <2 x i32>
+ %tmp3 = add <2 x i32> %tmp1, %tmp2
+; X64: paddd
+ store <2 x i32> %tmp3, <2 x i32>* null
+ ret void
}
-declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx)
+; X64-LABEL: ti64
+define void @ti64(double %a, double %b) nounwind {
+entry:
+ %tmp1 = bitcast double %a to <1 x i64>
+ %tmp2 = bitcast double %b to <1 x i64>
+ %tmp3 = add <1 x i64> %tmp1, %tmp2
+; X64: addq
+ store <1 x i64> %tmp3, <1 x i64>* null
+ ret void
+}
-declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx)
+; MMX intrinsics calls get us MMX instructions.
+; X64-LABEL: ti8a
+define void @ti8a(double %a, double %b) nounwind {
+entry:
+ %tmp1 = bitcast double %a to x86_mmx
+; X64: movdq2q
+ %tmp2 = bitcast double %b to x86_mmx
+; X64: movdq2q
+ %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %tmp1, x86_mmx %tmp2)
+ store x86_mmx %tmp3, x86_mmx* null
+ ret void
+}
-declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
+; X64-LABEL: ti16a
+define void @ti16a(double %a, double %b) nounwind {
+entry:
+ %tmp1 = bitcast double %a to x86_mmx
+; X64: movdq2q
+ %tmp2 = bitcast double %b to x86_mmx
+; X64: movdq2q
+ %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %tmp1, x86_mmx %tmp2)
+ store x86_mmx %tmp3, x86_mmx* null
+ ret void
+}
-declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx)
+; X64-LABEL: ti32a
+define void @ti32a(double %a, double %b) nounwind {
+entry:
+ %tmp1 = bitcast double %a to x86_mmx
+; X64: movdq2q
+ %tmp2 = bitcast double %b to x86_mmx
+; X64: movdq2q
+ %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %tmp1, x86_mmx %tmp2)
+ store x86_mmx %tmp3, x86_mmx* null
+ ret void
+}
-declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx)
+; X64-LABEL: ti64a
+define void @ti64a(double %a, double %b) nounwind {
+entry:
+ %tmp1 = bitcast double %a to x86_mmx
+; X64: movdq2q
+ %tmp2 = bitcast double %b to x86_mmx
+; X64: movdq2q
+ %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %tmp1, x86_mmx %tmp2)
+ store x86_mmx %tmp3, x86_mmx* null
+ ret void
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx)
declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx)
declare void @llvm.x86.mmx.emms()
-declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
-declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
-declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx)
declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx)
-declare x86_mmx @llvm.x86.mmx.padds.d(x86_mmx, x86_mmx)
declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx)
declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx)
-declare x86_mmx @llvm.x86.mmx.psubs.d(x86_mmx, x86_mmx)
-declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx)
-declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx)
-declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx)
-declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx)
-declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx)
-declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx)
-declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx)
diff --git a/test/CodeGen/X86/mmx-bitcast-to-i64.ll b/test/CodeGen/X86/mmx-bitcast-to-i64.ll
deleted file mode 100644
index 8b1840abf615..000000000000
--- a/test/CodeGen/X86/mmx-bitcast-to-i64.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; RUN: llc < %s -march=x86-64 | grep movd | count 4
-
-define i64 @foo(x86_mmx* %p) {
- %t = load x86_mmx* %p
- %u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %t)
- %s = bitcast x86_mmx %u to i64
- ret i64 %s
-}
-define i64 @goo(x86_mmx* %p) {
- %t = load x86_mmx* %p
- %u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %t)
- %s = bitcast x86_mmx %u to i64
- ret i64 %s
-}
-define i64 @hoo(x86_mmx* %p) {
- %t = load x86_mmx* %p
- %u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %t)
- %s = bitcast x86_mmx %u to i64
- ret i64 %s
-}
-define i64 @ioo(x86_mmx* %p) {
- %t = load x86_mmx* %p
- %u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %t)
- %s = bitcast x86_mmx %u to i64
- ret i64 %s
-}
-
-declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
-declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
-declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
-declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
diff --git a/test/CodeGen/X86/mmx-bitcast.ll b/test/CodeGen/X86/mmx-bitcast.ll
new file mode 100644
index 000000000000..00c803917f75
--- /dev/null
+++ b/test/CodeGen/X86/mmx-bitcast.ll
@@ -0,0 +1,108 @@
+; RUN: llc < %s -mtriple=x86_64-darwin -mattr=+mmx,+sse2 | FileCheck %s
+
+define i64 @t0(x86_mmx* %p) {
+; CHECK-LABEL: t0:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movq
+; CHECK-NEXT: paddq %mm0, %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: retq
+ %t = load x86_mmx, x86_mmx* %p
+ %u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %t)
+ %s = bitcast x86_mmx %u to i64
+ ret i64 %s
+}
+
+define i64 @t1(x86_mmx* %p) {
+; CHECK-LABEL: t1:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movq
+; CHECK-NEXT: paddd %mm0, %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: retq
+ %t = load x86_mmx, x86_mmx* %p
+ %u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %t)
+ %s = bitcast x86_mmx %u to i64
+ ret i64 %s
+}
+
+define i64 @t2(x86_mmx* %p) {
+; CHECK-LABEL: t2:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movq
+; CHECK-NEXT: paddw %mm0, %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: retq
+ %t = load x86_mmx, x86_mmx* %p
+ %u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %t)
+ %s = bitcast x86_mmx %u to i64
+ ret i64 %s
+}
+
+define i64 @t3(x86_mmx* %p) {
+; CHECK-LABEL: t3:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movq
+; CHECK-NEXT: paddb %mm0, %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: retq
+ %t = load x86_mmx, x86_mmx* %p
+ %u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %t)
+ %s = bitcast x86_mmx %u to i64
+ ret i64 %s
+}
+
+@R = external global x86_mmx
+
+define void @t4(<1 x i64> %A, <1 x i64> %B) {
+; CHECK-LABEL: t4:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: movd
+; CHECK-NEXT: movd
+; CHECK: retq
+entry:
+ %tmp2 = bitcast <1 x i64> %A to x86_mmx
+ %tmp3 = bitcast <1 x i64> %B to x86_mmx
+ %tmp7 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %tmp2, x86_mmx %tmp3)
+ store x86_mmx %tmp7, x86_mmx* @R
+ tail call void @llvm.x86.mmx.emms()
+ ret void
+}
+
+define i64 @t5(i32 %a, i32 %b) nounwind readnone {
+; CHECK-LABEL: t5:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movd
+; CHECK-NEXT: movd
+; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; CHECK-NEXT: movd %xmm1, %rax
+; CHECK-NEXT: retq
+ %v0 = insertelement <2 x i32> undef, i32 %a, i32 0
+ %v1 = insertelement <2 x i32> %v0, i32 %b, i32 1
+ %conv = bitcast <2 x i32> %v1 to i64
+ ret i64 %conv
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32)
+
+define <1 x i64> @t6(i64 %t) {
+; CHECK-LABEL: t6:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movd
+; CHECK-NEXT: psllq $48, %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: retq
+ %t1 = insertelement <1 x i64> undef, i64 %t, i32 0
+ %t0 = bitcast <1 x i64> %t1 to x86_mmx
+ %t2 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %t0, i32 48)
+ %t3 = bitcast x86_mmx %t2 to <1 x i64>
+ ret <1 x i64> %t3
+}
+
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
+declare void @llvm.x86.mmx.emms()
+
diff --git a/test/CodeGen/X86/mmx-copy-gprs.ll b/test/CodeGen/X86/mmx-copy-gprs.ll
index 377875565bf8..6d39713833e8 100644
--- a/test/CodeGen/X86/mmx-copy-gprs.ll
+++ b/test/CodeGen/X86/mmx-copy-gprs.ll
@@ -11,7 +11,7 @@
define void @foo(<1 x i64>* %x, <1 x i64>* %y) nounwind {
entry:
- %tmp1 = load <1 x i64>* %y, align 8 ; <<1 x i64>> [#uses=1]
+ %tmp1 = load <1 x i64>, <1 x i64>* %y, align 8 ; <<1 x i64>> [#uses=1]
store <1 x i64> %tmp1, <1 x i64>* %x, align 8
ret void
}
diff --git a/test/CodeGen/X86/mmx-emms.ll b/test/CodeGen/X86/mmx-emms.ll
deleted file mode 100644
index 5ff2588da699..000000000000
--- a/test/CodeGen/X86/mmx-emms.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | grep emms
-define void @foo() {
-entry:
- call void @llvm.x86.mmx.emms( )
- br label %return
-
-return: ; preds = %entry
- ret void
-}
-
-declare void @llvm.x86.mmx.emms()
diff --git a/test/CodeGen/X86/mmx-fold-load.ll b/test/CodeGen/X86/mmx-fold-load.ll
new file mode 100644
index 000000000000..2b9d30f59fd5
--- /dev/null
+++ b/test/CodeGen/X86/mmx-fold-load.ll
@@ -0,0 +1,282 @@
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
+
+define i64 @t0(<1 x i64>* %a, i32* %b) {
+; CHECK-LABEL: t0:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: movq (%[[REG1:[a-z]+]]), %mm0
+; CHECK-NEXT: psllq (%[[REG2:[a-z]+]]), %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <1 x i64>* %a to x86_mmx*
+ %1 = load x86_mmx, x86_mmx* %0, align 8
+ %2 = load i32, i32* %b, align 4
+ %3 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %1, i32 %2)
+ %4 = bitcast x86_mmx %3 to i64
+ ret i64 %4
+}
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32)
+
+define i64 @t1(<1 x i64>* %a, i32* %b) {
+; CHECK-LABEL: t1:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: movq (%[[REG1]]), %mm0
+; CHECK-NEXT: psrlq (%[[REG2]]), %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <1 x i64>* %a to x86_mmx*
+ %1 = load x86_mmx, x86_mmx* %0, align 8
+ %2 = load i32, i32* %b, align 4
+ %3 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %1, i32 %2)
+ %4 = bitcast x86_mmx %3 to i64
+ ret i64 %4
+}
+declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32)
+
+define i64 @t2(<1 x i64>* %a, i32* %b) {
+; CHECK-LABEL: t2:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: movq (%[[REG1]]), %mm0
+; CHECK-NEXT: psllw (%[[REG2]]), %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <1 x i64>* %a to x86_mmx*
+ %1 = load x86_mmx, x86_mmx* %0, align 8
+ %2 = load i32, i32* %b, align 4
+ %3 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %1, i32 %2)
+ %4 = bitcast x86_mmx %3 to i64
+ ret i64 %4
+}
+declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32)
+
+define i64 @t3(<1 x i64>* %a, i32* %b) {
+; CHECK-LABEL: t3:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: movq (%[[REG1]]), %mm0
+; CHECK-NEXT: psrlw (%[[REG2]]), %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <1 x i64>* %a to x86_mmx*
+ %1 = load x86_mmx, x86_mmx* %0, align 8
+ %2 = load i32, i32* %b, align 4
+ %3 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %1, i32 %2)
+ %4 = bitcast x86_mmx %3 to i64
+ ret i64 %4
+}
+declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32)
+
+define i64 @t4(<1 x i64>* %a, i32* %b) {
+; CHECK-LABEL: t4:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: movq (%[[REG1]]), %mm0
+; CHECK-NEXT: pslld (%[[REG2]]), %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <1 x i64>* %a to x86_mmx*
+ %1 = load x86_mmx, x86_mmx* %0, align 8
+ %2 = load i32, i32* %b, align 4
+ %3 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %1, i32 %2)
+ %4 = bitcast x86_mmx %3 to i64
+ ret i64 %4
+}
+declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32)
+
+define i64 @t5(<1 x i64>* %a, i32* %b) {
+; CHECK-LABEL: t5:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: movq (%[[REG1]]), %mm0
+; CHECK-NEXT: psrld (%[[REG2]]), %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <1 x i64>* %a to x86_mmx*
+ %1 = load x86_mmx, x86_mmx* %0, align 8
+ %2 = load i32, i32* %b, align 4
+ %3 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %1, i32 %2)
+ %4 = bitcast x86_mmx %3 to i64
+ ret i64 %4
+}
+declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32)
+
+define i64 @t6(<1 x i64>* %a, i32* %b) {
+; CHECK-LABEL: t6:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: movq (%[[REG1]]), %mm0
+; CHECK-NEXT: psraw (%[[REG2]]), %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <1 x i64>* %a to x86_mmx*
+ %1 = load x86_mmx, x86_mmx* %0, align 8
+ %2 = load i32, i32* %b, align 4
+ %3 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %1, i32 %2)
+ %4 = bitcast x86_mmx %3 to i64
+ ret i64 %4
+}
+declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32)
+
+define i64 @t7(<1 x i64>* %a, i32* %b) {
+; CHECK-LABEL: t7:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: movq (%[[REG1]]), %mm0
+; CHECK-NEXT: psrad (%[[REG2]]), %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <1 x i64>* %a to x86_mmx*
+ %1 = load x86_mmx, x86_mmx* %0, align 8
+ %2 = load i32, i32* %b, align 4
+ %3 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %1, i32 %2)
+ %4 = bitcast x86_mmx %3 to i64
+ ret i64 %4
+}
+declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32)
+
+define i64 @tt0(x86_mmx %t, x86_mmx* %q) {
+; CHECK-LABEL: tt0:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: paddb (%[[REG3:[a-z]+]]), %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: emms
+; CHECK-NEXT: retq
+entry:
+ %v = load x86_mmx, x86_mmx* %q
+ %u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %v)
+ %s = bitcast x86_mmx %u to i64
+ call void @llvm.x86.mmx.emms()
+ ret i64 %s
+}
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare void @llvm.x86.mmx.emms()
+
+define i64 @tt1(x86_mmx %t, x86_mmx* %q) {
+; CHECK-LABEL: tt1:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: paddw (%[[REG3]]), %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: emms
+; CHECK-NEXT: retq
+entry:
+ %v = load x86_mmx, x86_mmx* %q
+ %u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %v)
+ %s = bitcast x86_mmx %u to i64
+ call void @llvm.x86.mmx.emms()
+ ret i64 %s
+}
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+
+define i64 @tt2(x86_mmx %t, x86_mmx* %q) {
+; CHECK-LABEL: tt2:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: paddd (%[[REG3]]), %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: emms
+; CHECK-NEXT: retq
+entry:
+ %v = load x86_mmx, x86_mmx* %q
+ %u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %v)
+ %s = bitcast x86_mmx %u to i64
+ call void @llvm.x86.mmx.emms()
+ ret i64 %s
+}
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+
+define i64 @tt3(x86_mmx %t, x86_mmx* %q) {
+; CHECK-LABEL: tt3:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: paddq (%[[REG3]]), %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: emms
+; CHECK-NEXT: retq
+entry:
+ %v = load x86_mmx, x86_mmx* %q
+ %u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %v)
+ %s = bitcast x86_mmx %u to i64
+ call void @llvm.x86.mmx.emms()
+ ret i64 %s
+}
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
+
+define i64 @tt4(x86_mmx %t, x86_mmx* %q) {
+; CHECK-LABEL: tt4:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: paddusb (%[[REG3]]), %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: emms
+; CHECK-NEXT: retq
+entry:
+ %v = load x86_mmx, x86_mmx* %q
+ %u = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %t, x86_mmx %v)
+ %s = bitcast x86_mmx %u to i64
+ call void @llvm.x86.mmx.emms()
+ ret i64 %s
+}
+declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx)
+
+define i64 @tt5(x86_mmx %t, x86_mmx* %q) {
+; CHECK-LABEL: tt5:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: paddusw (%[[REG3]]), %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: emms
+; CHECK-NEXT: retq
+entry:
+ %v = load x86_mmx, x86_mmx* %q
+ %u = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %t, x86_mmx %v)
+ %s = bitcast x86_mmx %u to i64
+ call void @llvm.x86.mmx.emms()
+ ret i64 %s
+}
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
+
+define i64 @tt6(x86_mmx %t, x86_mmx* %q) {
+; CHECK-LABEL: tt6:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: psrlw (%[[REG3]]), %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: emms
+; CHECK-NEXT: retq
+entry:
+ %v = load x86_mmx, x86_mmx* %q
+ %u = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %t, x86_mmx %v)
+ %s = bitcast x86_mmx %u to i64
+ call void @llvm.x86.mmx.emms()
+ ret i64 %s
+}
+declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx)
+
+define i64 @tt7(x86_mmx %t, x86_mmx* %q) {
+; CHECK-LABEL: tt7:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: psrld (%[[REG3]]), %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: emms
+; CHECK-NEXT: retq
+entry:
+ %v = load x86_mmx, x86_mmx* %q
+ %u = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %t, x86_mmx %v)
+ %s = bitcast x86_mmx %u to i64
+ call void @llvm.x86.mmx.emms()
+ ret i64 %s
+}
+declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx)
+
+define i64 @tt8(x86_mmx %t, x86_mmx* %q) {
+; CHECK-LABEL: tt8:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: psrlq (%[[REG3]]), %mm0
+; CHECK-NEXT: movd %mm0, %rax
+; CHECK-NEXT: emms
+; CHECK-NEXT: retq
+entry:
+ %v = load x86_mmx, x86_mmx* %q
+ %u = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %t, x86_mmx %v)
+ %s = bitcast x86_mmx %u to i64
+ call void @llvm.x86.mmx.emms()
+ ret i64 %s
+}
+declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx)
diff --git a/test/CodeGen/X86/mmx-insert-element.ll b/test/CodeGen/X86/mmx-insert-element.ll
deleted file mode 100644
index 348dac8d4d59..000000000000
--- a/test/CodeGen/X86/mmx-insert-element.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep movq
-; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pshufd
-; This is not an MMX operation; promoted to XMM.
-
-define x86_mmx @qux(i32 %A) nounwind {
- %tmp3 = insertelement <2 x i32> < i32 0, i32 undef >, i32 %A, i32 1 ; <<2 x i32>> [#uses=1]
- %tmp4 = bitcast <2 x i32> %tmp3 to x86_mmx
- ret x86_mmx %tmp4
-}
diff --git a/test/CodeGen/X86/mmx-builtins.ll b/test/CodeGen/X86/mmx-intrinsics.ll
index aabdd53b09d6..39d481b16e7a 100644
--- a/test/CodeGen/X86/mmx-builtins.ll
+++ b/test/CodeGen/X86/mmx-intrinsics.ll
@@ -1347,3 +1347,12 @@ define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind {
}
declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
+
+; CHECK-LABEL: test90
+define void @test90() {
+; CHECK: emms
+ call void @llvm.x86.mmx.emms()
+ ret void
+}
+
+declare void @llvm.x86.mmx.emms()
diff --git a/test/CodeGen/X86/mmx-pinsrw.ll b/test/CodeGen/X86/mmx-pinsrw.ll
deleted file mode 100644
index 33dd2eb81cfa..000000000000
--- a/test/CodeGen/X86/mmx-pinsrw.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mcpu=corei7 | FileCheck %s
-; PR2562
-
-; CHECK: pinsr
-
-external global i16 ; <i16*>:0 [#uses=1]
-external global <4 x i16> ; <<4 x i16>*>:1 [#uses=2]
-
-declare void @abort()
-
-define void @""() {
- load i16* @0 ; <i16>:1 [#uses=1]
- load <4 x i16>* @1 ; <<4 x i16>>:2 [#uses=1]
- insertelement <4 x i16> %2, i16 %1, i32 0 ; <<4 x i16>>:3 [#uses=1]
- store <4 x i16> %3, <4 x i16>* @1
- ret void
-}
diff --git a/test/CodeGen/X86/mmx-punpckhdq.ll b/test/CodeGen/X86/mmx-punpckhdq.ll
deleted file mode 100644
index 9e8f5bf53363..000000000000
--- a/test/CodeGen/X86/mmx-punpckhdq.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx,+sse4.2 -mtriple=x86_64-apple-darwin10 | FileCheck %s
-; There are no MMX operations in bork; promoted to XMM.
-
-define void @bork(<1 x i64>* %x) {
-; CHECK: bork
-; CHECK: movlpd
-entry:
- %tmp2 = load <1 x i64>* %x ; <<1 x i64>> [#uses=1]
- %tmp6 = bitcast <1 x i64> %tmp2 to <2 x i32> ; <<2 x i32>> [#uses=1]
- %tmp9 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32> < i32 1, i32 1 > ; <<2 x i32>> [#uses=1]
- %tmp10 = bitcast <2 x i32> %tmp9 to <1 x i64> ; <<1 x i64>> [#uses=1]
- store <1 x i64> %tmp10, <1 x i64>* %x
- tail call void @llvm.x86.mmx.emms( )
- ret void
-}
-
-; pork uses MMX.
-
-define void @pork(x86_mmx* %x) {
-; CHECK: pork
-; CHECK: punpckhdq
-entry:
- %tmp2 = load x86_mmx* %x ; <x86_mmx> [#uses=1]
- %tmp9 = tail call x86_mmx @llvm.x86.mmx.punpckhdq (x86_mmx %tmp2, x86_mmx %tmp2)
- store x86_mmx %tmp9, x86_mmx* %x
- tail call void @llvm.x86.mmx.emms( )
- ret void
-}
-
-declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx)
-declare void @llvm.x86.mmx.emms()
diff --git a/test/CodeGen/X86/mmx-s2v.ll b/test/CodeGen/X86/mmx-s2v.ll
deleted file mode 100644
index c98023c0f417..000000000000
--- a/test/CodeGen/X86/mmx-s2v.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx
-; PR2574
-
-define void @entry(i32 %m_task_id, i32 %start_x, i32 %end_x) {; <label>:0
- br i1 true, label %bb.nph, label %._crit_edge
-
-bb.nph: ; preds = %bb.nph, %0
- %t2206f2.0 = phi <2 x float> [ %2, %bb.nph ], [ undef, %0 ] ; <<2 x float>> [#uses=1]
- insertelement <2 x float> %t2206f2.0, float 0.000000e+00, i32 0 ; <<2 x float>>:1 [#uses=1]
- insertelement <2 x float> %1, float 0.000000e+00, i32 1 ; <<2 x float>>:2 [#uses=1]
- br label %bb.nph
-
-._crit_edge: ; preds = %0
- ret void
-}
diff --git a/test/CodeGen/X86/mmx-shift.ll b/test/CodeGen/X86/mmx-shift.ll
deleted file mode 100644
index c7c6e75a5071..000000000000
--- a/test/CodeGen/X86/mmx-shift.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | FileCheck %s
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | FileCheck %s
-
-define i64 @t1(<1 x i64> %mm1) nounwind {
-entry:
- %tmp = bitcast <1 x i64> %mm1 to x86_mmx
- %tmp6 = tail call x86_mmx @llvm.x86.mmx.pslli.q( x86_mmx %tmp, i32 32 ) ; <x86_mmx> [#uses=1]
- %retval1112 = bitcast x86_mmx %tmp6 to i64
- ret i64 %retval1112
-
-; CHECK-LABEL: t1:
-; CHECK: psllq $32
-}
-
-declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
-
-define i64 @t2(x86_mmx %mm1, x86_mmx %mm2) nounwind {
-entry:
- %tmp7 = tail call x86_mmx @llvm.x86.mmx.psra.d( x86_mmx %mm1, x86_mmx %mm2 ) nounwind readnone ; <x86_mmx> [#uses=1]
- %retval1112 = bitcast x86_mmx %tmp7 to i64
- ret i64 %retval1112
-
-; CHECK-LABEL: t2:
-; CHECK: psrad
-}
-
-declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @t3(x86_mmx %mm1, i32 %bits) nounwind {
-entry:
- %tmp8 = tail call x86_mmx @llvm.x86.mmx.psrli.w( x86_mmx %mm1, i32 %bits ) nounwind readnone ; <x86_mmx> [#uses=1]
- %retval1314 = bitcast x86_mmx %tmp8 to i64
- ret i64 %retval1314
-
-; CHECK-LABEL: t3:
-; CHECK: psrlw
-}
-
-declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
diff --git a/test/CodeGen/X86/mmx-shuffle.ll b/test/CodeGen/X86/mmx-shuffle.ll
deleted file mode 100644
index 869f32b89fb7..000000000000
--- a/test/CodeGen/X86/mmx-shuffle.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; RUN: llc < %s -mcpu=yonah
-; PR1427
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i686-pc-linux-gnu"
- %struct.DrawHelper = type { void (i32, %struct.QT_FT_Span*, i8*)*, void (i32, %struct.QT_FT_Span*, i8*)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i32, i32)* }
- %struct.QBasicAtomic = type { i32 }
- %struct.QClipData = type { i32, %"struct.QClipData::ClipLine"*, i32, i32, %struct.QT_FT_Span*, i32, i32, i32, i32 }
- %"struct.QClipData::ClipLine" = type { i32, %struct.QT_FT_Span* }
- %struct.QRasterBuffer = type { %struct.QRect, %struct.QRegion, %struct.QClipData*, %struct.QClipData*, i8, i32, i32, %struct.DrawHelper*, i32, i32, i32, i8* }
- %struct.QRect = type { i32, i32, i32, i32 }
- %struct.QRegion = type { %"struct.QRegion::QRegionData"* }
- %"struct.QRegion::QRegionData" = type { %struct.QBasicAtomic, %struct._XRegion*, i8*, %struct.QRegionPrivate* }
- %struct.QRegionPrivate = type opaque
- %struct.QT_FT_Span = type { i16, i16, i16, i8 }
- %struct._XRegion = type opaque
-
-define void @_Z19qt_bitmapblit16_sseP13QRasterBufferiijPKhiii(%struct.QRasterBuffer* %rasterBuffer, i32 %x, i32 %y, i32 %color, i8* %src, i32 %width, i32 %height, i32 %stride) {
-entry:
- %tmp528 = bitcast <8 x i8> zeroinitializer to <2 x i32> ; <<2 x i32>> [#uses=1]
- %tmp529 = and <2 x i32> %tmp528, bitcast (<4 x i16> < i16 -32640, i16 16448, i16 8224, i16 4112 > to <2 x i32>) ; <<2 x i32>> [#uses=1]
- %tmp542 = bitcast <2 x i32> %tmp529 to <4 x i16> ; <<4 x i16>> [#uses=1]
- %tmp543 = add <4 x i16> %tmp542, < i16 0, i16 16448, i16 24672, i16 28784 > ; <<4 x i16>> [#uses=1]
- %tmp555 = bitcast <4 x i16> %tmp543 to <8 x i8> ; <<8 x i8>> [#uses=1]
- %tmp556 = bitcast <8 x i8> %tmp555 to x86_mmx
- %tmp557 = bitcast <8 x i8> zeroinitializer to x86_mmx
- tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp557, x86_mmx %tmp556, i8* null )
- ret void
-}
-
-declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*)
diff --git a/test/CodeGen/X86/movbe.ll b/test/CodeGen/X86/movbe.ll
index e248410b2020..49e765de2e77 100644
--- a/test/CodeGen/X86/movbe.ll
+++ b/test/CodeGen/X86/movbe.ll
@@ -16,7 +16,7 @@ define void @test1(i16* nocapture %x, i16 %y) nounwind {
}
define i16 @test2(i16* %x) nounwind {
- %load = load i16* %x, align 2
+ %load = load i16, i16* %x, align 2
%bswap = call i16 @llvm.bswap.i16(i16 %load)
ret i16 %bswap
; CHECK-LABEL: test2:
@@ -36,7 +36,7 @@ define void @test3(i32* nocapture %x, i32 %y) nounwind {
}
define i32 @test4(i32* %x) nounwind {
- %load = load i32* %x, align 4
+ %load = load i32, i32* %x, align 4
%bswap = call i32 @llvm.bswap.i32(i32 %load)
ret i32 %bswap
; CHECK-LABEL: test4:
@@ -56,7 +56,7 @@ define void @test5(i64* %x, i64 %y) nounwind {
}
define i64 @test6(i64* %x) nounwind {
- %load = load i64* %x, align 8
+ %load = load i64, i64* %x, align 8
%bswap = call i64 @llvm.bswap.i64(i64 %load)
ret i64 %bswap
; CHECK-LABEL: test6:
diff --git a/test/CodeGen/X86/movfs.ll b/test/CodeGen/X86/movfs.ll
index 823e98689e7d..75b2404ec56e 100644
--- a/test/CodeGen/X86/movfs.ll
+++ b/test/CodeGen/X86/movfs.ll
@@ -2,7 +2,7 @@
define i32 @foo() nounwind readonly {
entry:
- %tmp = load i32* addrspace(257)* getelementptr (i32* addrspace(257)* inttoptr (i32 72 to i32* addrspace(257)*), i32 31) ; <i32*> [#uses=1]
- %tmp1 = load i32* %tmp ; <i32> [#uses=1]
+ %tmp = load i32*, i32* addrspace(257)* getelementptr (i32*, i32* addrspace(257)* inttoptr (i32 72 to i32* addrspace(257)*), i32 31) ; <i32*> [#uses=1]
+ %tmp1 = load i32, i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll
index 96c5dbb8ea98..07d497b9f0a9 100644
--- a/test/CodeGen/X86/movgs.ll
+++ b/test/CodeGen/X86/movgs.ll
@@ -15,8 +15,8 @@ define i32 @test1() nounwind readonly {
; X64-NEXT: movl (%rax), %eax
; X64-NEXT: retq
entry:
- %tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31) ; <i32*> [#uses=1]
- %tmp1 = load i32* %tmp ; <i32> [#uses=1]
+ %tmp = load i32*, i32* addrspace(256)* getelementptr (i32*, i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31) ; <i32*> [#uses=1]
+ %tmp1 = load i32, i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
@@ -39,7 +39,7 @@ define i64 @test2(void (i8*)* addrspace(256)* %tmp8) nounwind {
; X64-NEXT: {{(addq.*%rsp|popq)}}
; X64-NEXT: retq
entry:
- %tmp9 = load void (i8*)* addrspace(256)* %tmp8, align 8
+ %tmp9 = load void (i8*)*, void (i8*)* addrspace(256)* %tmp8, align 8
tail call void %tmp9(i8* undef) nounwind optsize
ret i64 0
}
@@ -56,7 +56,7 @@ define <2 x i64> @pmovsxwd_1(i64 addrspace(256)* %p) nounwind readonly {
; X64-NEXT: pmovsxwd %gs:(%{{(rcx|rdi)}}), %xmm0
; X64-NEXT: retq
entry:
- %0 = load i64 addrspace(256)* %p
+ %0 = load i64, i64 addrspace(256)* %p
%tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0
%1 = bitcast <2 x i64> %tmp2 to <8 x i16>
%2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone
@@ -83,10 +83,10 @@ define i32 @test_no_cse() nounwind readonly {
; X64-NEXT: addl (%rcx), %eax
; X64-NEXT: retq
entry:
- %tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31) ; <i32*> [#uses=1]
- %tmp1 = load i32* %tmp ; <i32> [#uses=1]
- %tmp2 = load i32* addrspace(257)* getelementptr (i32* addrspace(257)* inttoptr (i32 72 to i32* addrspace(257)*), i32 31) ; <i32*> [#uses=1]
- %tmp3 = load i32* %tmp2 ; <i32> [#uses=1]
+ %tmp = load i32*, i32* addrspace(256)* getelementptr (i32*, i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31) ; <i32*> [#uses=1]
+ %tmp1 = load i32, i32* %tmp ; <i32> [#uses=1]
+ %tmp2 = load i32*, i32* addrspace(257)* getelementptr (i32*, i32* addrspace(257)* inttoptr (i32 72 to i32* addrspace(257)*), i32 31) ; <i32*> [#uses=1]
+ %tmp3 = load i32, i32* %tmp2 ; <i32> [#uses=1]
%tmp4 = add i32 %tmp1, %tmp3
ret i32 %tmp4
}
diff --git a/test/CodeGen/X86/movmsk.ll b/test/CodeGen/X86/movmsk.ll
index 25206621077b..a7ebebca4b72 100644
--- a/test/CodeGen/X86/movmsk.ll
+++ b/test/CodeGen/X86/movmsk.ll
@@ -13,7 +13,7 @@ entry:
%0 = bitcast double* %__x.addr.i to i8*
%1 = bitcast %0* %__u.i to i8*
store double %d1, double* %__x.addr.i, align 8
- %__f.i = getelementptr inbounds %0* %__u.i, i64 0, i32 0
+ %__f.i = getelementptr inbounds %0, %0* %__u.i, i64 0, i32 0
store double %d1, double* %__f.i, align 8
%tmp = bitcast double %d1 to i64
; CHECK-NOT: shr
@@ -32,7 +32,7 @@ entry:
%0 = bitcast double* %__x.addr.i to i8*
%1 = bitcast %0* %__u.i to i8*
store double %add, double* %__x.addr.i, align 8
- %__f.i = getelementptr inbounds %0* %__u.i, i64 0, i32 0
+ %__f.i = getelementptr inbounds %0, %0* %__u.i, i64 0, i32 0
store double %add, double* %__f.i, align 8
%tmp = bitcast double %add to i64
; CHECK-NOT: shr
@@ -50,7 +50,7 @@ entry:
%0 = bitcast float* %__x.addr.i to i8*
%1 = bitcast %union.anon* %__u.i to i8*
store float %f1, float* %__x.addr.i, align 4
- %__f.i = getelementptr inbounds %union.anon* %__u.i, i64 0, i32 0
+ %__f.i = getelementptr inbounds %union.anon, %union.anon* %__u.i, i64 0, i32 0
store float %f1, float* %__f.i, align 4
%2 = bitcast float %f1 to i32
; CHECK-NOT: shr
@@ -68,7 +68,7 @@ entry:
%0 = bitcast float* %__x.addr.i to i8*
%1 = bitcast %union.anon* %__u.i to i8*
store float %add, float* %__x.addr.i, align 4
- %__f.i = getelementptr inbounds %union.anon* %__u.i, i64 0, i32 0
+ %__f.i = getelementptr inbounds %union.anon, %union.anon* %__u.i, i64 0, i32 0
store float %add, float* %__f.i, align 4
%2 = bitcast float %add to i32
; CHECK-NOT: shr
@@ -104,8 +104,8 @@ entry:
; CHECK-NOT: movslq
%0 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %x) nounwind
%idxprom = sext i32 %0 to i64
- %arrayidx = getelementptr inbounds i32* %indexTable, i64 %idxprom
- %1 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %indexTable, i64 %idxprom
+ %1 = load i32, i32* %arrayidx, align 4
ret i32 %1
}
@@ -117,8 +117,8 @@ entry:
%0 = bitcast <4 x float> %x to <2 x double>
%1 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %0) nounwind
%idxprom = sext i32 %1 to i64
- %arrayidx = getelementptr inbounds i32* %indexTable, i64 %idxprom
- %2 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %indexTable, i64 %idxprom
+ %2 = load i32, i32* %arrayidx, align 4
ret i32 %2
}
diff --git a/test/CodeGen/X86/movtopush.ll b/test/CodeGen/X86/movtopush.ll
index cb48ed747be4..f89e52457f35 100644
--- a/test/CodeGen/X86/movtopush.ll
+++ b/test/CodeGen/X86/movtopush.ll
@@ -1,24 +1,68 @@
; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL
+; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=X64
; RUN: llc < %s -mtriple=i686-windows -force-align-stack -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED
+
declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
declare void @inreg(i32 %a, i32 inreg %b, i32 %c, i32 %d)
+declare void @oneparam(i32 %a)
+declare void @eightparams(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h)
+
; Here, we should have a reserved frame, so we don't expect pushes
-; NORMAL-LABEL: test1
+; NORMAL-LABEL: test1:
; NORMAL: subl $16, %esp
; NORMAL-NEXT: movl $4, 12(%esp)
; NORMAL-NEXT: movl $3, 8(%esp)
; NORMAL-NEXT: movl $2, 4(%esp)
; NORMAL-NEXT: movl $1, (%esp)
; NORMAL-NEXT: call
+; NORMAL-NEXT: addl $16, %esp
define void @test1() {
entry:
call void @good(i32 1, i32 2, i32 3, i32 4)
ret void
}
-; Here, we expect a sequence of 4 immediate pushes
-; NORMAL-LABEL: test2
+; We're optimizing for code size, so we should get pushes for x86,
+; even though there is a reserved call frame.
+; Make sure we don't touch x86-64
+; NORMAL-LABEL: test1b:
+; NORMAL-NOT: subl {{.*}} %esp
+; NORMAL: pushl $4
+; NORMAL-NEXT: pushl $3
+; NORMAL-NEXT: pushl $2
+; NORMAL-NEXT: pushl $1
+; NORMAL-NEXT: call
+; NORMAL-NEXT: addl $16, %esp
+; X64-LABEL: test1b:
+; X64: movl $1, %ecx
+; X64-NEXT: movl $2, %edx
+; X64-NEXT: movl $3, %r8d
+; X64-NEXT: movl $4, %r9d
+; X64-NEXT: callq good
+define void @test1b() optsize {
+entry:
+ call void @good(i32 1, i32 2, i32 3, i32 4)
+ ret void
+}
+
+; Same as above, but for minsize
+; NORMAL-LABEL: test1c:
+; NORMAL-NOT: subl {{.*}} %esp
+; NORMAL: pushl $4
+; NORMAL-NEXT: pushl $3
+; NORMAL-NEXT: pushl $2
+; NORMAL-NEXT: pushl $1
+; NORMAL-NEXT: call
+; NORMAL-NEXT: addl $16, %esp
+define void @test1c() minsize {
+entry:
+ call void @good(i32 1, i32 2, i32 3, i32 4)
+ ret void
+}
+
+; If we have a reserved frame, we should have pushes
+; NORMAL-LABEL: test2:
; NORMAL-NOT: subl {{.*}} %esp
; NORMAL: pushl $4
; NORMAL-NEXT: pushl $3
@@ -34,53 +78,54 @@ entry:
; Again, we expect a sequence of 4 immediate pushes
; Checks that we generate the right pushes for >8bit immediates
-; NORMAL-LABEL: test2b
+; NORMAL-LABEL: test2b:
; NORMAL-NOT: subl {{.*}} %esp
; NORMAL: pushl $4096
; NORMAL-NEXT: pushl $3072
; NORMAL-NEXT: pushl $2048
; NORMAL-NEXT: pushl $1024
; NORMAL-NEXT: call
-define void @test2b(i32 %k) {
+; NORMAL-NEXT: addl $16, %esp
+define void @test2b() optsize {
entry:
- %a = alloca i32, i32 %k
call void @good(i32 1024, i32 2048, i32 3072, i32 4096)
ret void
}
; The first push should push a register
-; NORMAL-LABEL: test3
+; NORMAL-LABEL: test3:
; NORMAL-NOT: subl {{.*}} %esp
; NORMAL: pushl $4
; NORMAL-NEXT: pushl $3
; NORMAL-NEXT: pushl $2
; NORMAL-NEXT: pushl %e{{..}}
; NORMAL-NEXT: call
-define void @test3(i32 %k) {
+; NORMAL-NEXT: addl $16, %esp
+define void @test3(i32 %k) optsize {
entry:
- %a = alloca i32, i32 %k
- call void @good(i32 %k, i32 2, i32 3, i32 4)
+ %f = add i32 %k, 1
+ call void @good(i32 %f, i32 2, i32 3, i32 4)
ret void
}
; We don't support weird calling conventions
-; NORMAL-LABEL: test4
+; NORMAL-LABEL: test4:
; NORMAL: subl $12, %esp
; NORMAL-NEXT: movl $4, 8(%esp)
; NORMAL-NEXT: movl $3, 4(%esp)
; NORMAL-NEXT: movl $1, (%esp)
; NORMAL-NEXT: movl $2, %eax
; NORMAL-NEXT: call
-define void @test4(i32 %k) {
+; NORMAL-NEXT: addl $12, %esp
+define void @test4() optsize {
entry:
- %a = alloca i32, i32 %k
call void @inreg(i32 1, i32 2, i32 3, i32 4)
ret void
}
-; Check that additional alignment is added when the pushes
-; don't add up to the required alignment.
-; ALIGNED-LABEL: test5
+; When there is no reserved call frame, check that additional alignment
+; is added when the pushes don't add up to the required alignment.
+; ALIGNED-LABEL: test5:
; ALIGNED: subl $16, %esp
; ALIGNED-NEXT: pushl $4
; ALIGNED-NEXT: pushl $3
@@ -94,10 +139,38 @@ entry:
ret void
}
+; When the alignment adds up, do the transformation
+; ALIGNED-LABEL: test5b:
+; ALIGNED: pushl $8
+; ALIGNED-NEXT: pushl $7
+; ALIGNED-NEXT: pushl $6
+; ALIGNED-NEXT: pushl $5
+; ALIGNED-NEXT: pushl $4
+; ALIGNED-NEXT: pushl $3
+; ALIGNED-NEXT: pushl $2
+; ALIGNED-NEXT: pushl $1
+; ALIGNED-NEXT: call
+define void @test5b() optsize {
+entry:
+ call void @eightparams(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8)
+ ret void
+}
+
+; When having to compensate for the alignment isn't worth it,
+; don't use pushes.
+; ALIGNED-LABEL: test5c:
+; ALIGNED: movl $1, (%esp)
+; ALIGNED-NEXT: call
+define void @test5c() optsize {
+entry:
+ call void @oneparam(i32 1)
+ ret void
+}
+
; Check that pushing the addresses of globals (Or generally, things that
; aren't exactly immediates) isn't broken.
; Fixes PR21878.
-; NORMAL-LABEL: test6
+; NORMAL-LABEL: test6:
; NORMAL: pushl $_ext
; NORMAL-NEXT: call
declare void @f(i8*)
@@ -110,3 +183,164 @@ bb:
alloca i32
ret void
}
+
+; Check that we fold simple cases into the push
+; NORMAL-LABEL: test7:
+; NORMAL-NOT: subl {{.*}} %esp
+; NORMAL: movl 4(%esp), [[EAX:%e..]]
+; NORMAL-NEXT: pushl $4
+; NORMAL-NEXT: pushl ([[EAX]])
+; NORMAL-NEXT: pushl $2
+; NORMAL-NEXT: pushl $1
+; NORMAL-NEXT: call
+; NORMAL-NEXT: addl $16, %esp
+define void @test7(i32* %ptr) optsize {
+entry:
+ %val = load i32, i32* %ptr
+ call void @good(i32 1, i32 2, i32 %val, i32 4)
+ ret void
+}
+
+; Fold stack-relative loads into the push, with correct offset
+; In particular, at the second push, %b was at 12(%esp) and
+; %a wast at 8(%esp), but the second push bumped %esp, so %a
+; is now it at 12(%esp)
+; NORMAL-LABEL: test8:
+; NORMAL: pushl $4
+; NORMAL-NEXT: pushl 12(%esp)
+; NORMAL-NEXT: pushl 12(%esp)
+; NORMAL-NEXT: pushl $1
+; NORMAL-NEXT: call
+; NORMAL-NEXT: addl $16, %esp
+define void @test8(i32 %a, i32 %b) optsize {
+entry:
+ call void @good(i32 1, i32 %a, i32 %b, i32 4)
+ ret void
+}
+
+; If one function is using push instructions, and the other isn't
+; (because it has frame-index references), then we must resolve
+; these references correctly.
+; NORMAL-LABEL: test9:
+; NORMAL-NOT: leal (%esp),
+; NORMAL: pushl $4
+; NORMAL-NEXT: pushl $3
+; NORMAL-NEXT: pushl $2
+; NORMAL-NEXT: pushl $1
+; NORMAL-NEXT: call
+; NORMAL-NEXT: addl $16, %esp
+; NORMAL-NEXT: subl $16, %esp
+; NORMAL-NEXT: leal 16(%esp), [[EAX:%e..]]
+; NORMAL-NEXT: movl [[EAX]], 12(%esp)
+; NORMAL-NEXT: movl $7, 8(%esp)
+; NORMAL-NEXT: movl $6, 4(%esp)
+; NORMAL-NEXT: movl $5, (%esp)
+; NORMAL-NEXT: call
+; NORMAL-NEXT: addl $16, %esp
+define void @test9() optsize {
+entry:
+ %p = alloca i32, align 4
+ call void @good(i32 1, i32 2, i32 3, i32 4)
+ %0 = ptrtoint i32* %p to i32
+ call void @good(i32 5, i32 6, i32 7, i32 %0)
+ ret void
+}
+
+; We can end up with an indirect call which gets reloaded on the spot.
+; Make sure we reference the correct stack slot - we spill into (%esp)
+; and reload from 16(%esp) due to the pushes.
+; NORMAL-LABEL: test10:
+; NORMAL: movl $_good, [[ALLOC:.*]]
+; NORMAL-NEXT: movl [[ALLOC]], [[EAX:%e..]]
+; NORMAL-NEXT: movl [[EAX]], (%esp) # 4-byte Spill
+; NORMAL: nop
+; NORMAL: pushl $4
+; NORMAL-NEXT: pushl $3
+; NORMAL-NEXT: pushl $2
+; NORMAL-NEXT: pushl $1
+; NORMAL-NEXT: calll *16(%esp)
+; NORMAL-NEXT: addl $16, %esp
+define void @test10() optsize {
+ %stack_fptr = alloca void (i32, i32, i32, i32)*
+ store void (i32, i32, i32, i32)* @good, void (i32, i32, i32, i32)** %stack_fptr
+ %good_ptr = load volatile void (i32, i32, i32, i32)*, void (i32, i32, i32, i32)** %stack_fptr
+ call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
+ call void (i32, i32, i32, i32) %good_ptr(i32 1, i32 2, i32 3, i32 4)
+ ret void
+}
+
+; We can't fold the load from the global into the push because of
+; interference from the store
+; NORMAL-LABEL: test11:
+; NORMAL: movl _the_global, [[EAX:%e..]]
+; NORMAL-NEXT: movl $42, _the_global
+; NORMAL-NEXT: pushl $4
+; NORMAL-NEXT: pushl $3
+; NORMAL-NEXT: pushl $2
+; NORMAL-NEXT: pushl [[EAX]]
+; NORMAL-NEXT: call
+; NORMAL-NEXT: addl $16, %esp
+@the_global = external global i32
+define void @test11() optsize {
+ %myload = load i32, i32* @the_global
+ store i32 42, i32* @the_global
+ call void @good(i32 %myload, i32 2, i32 3, i32 4)
+ ret void
+}
+
+; Converting one mov into a push isn't worth it when
+; doing so forces too much overhead for other calls.
+; NORMAL-LABEL: test12:
+; NORMAL: subl $16, %esp
+; NORMAL-NEXT: movl $4, 8(%esp)
+; NORMAL-NEXT: movl $3, 4(%esp)
+; NORMAL-NEXT: movl $1, (%esp)
+; NORMAL-NEXT: movl $2, %eax
+; NORMAL-NEXT: calll _inreg
+; NORMAL-NEXT: movl $8, 12(%esp)
+; NORMAL-NEXT: movl $7, 8(%esp)
+; NORMAL-NEXT: movl $6, 4(%esp)
+; NORMAL-NEXT: movl $5, (%esp)
+; NORMAL-NEXT: calll _good
+; NORMAL-NEXT: movl $12, 8(%esp)
+; NORMAL-NEXT: movl $11, 4(%esp)
+; NORMAL-NEXT: movl $9, (%esp)
+; NORMAL-NEXT: movl $10, %eax
+; NORMAL-NEXT: calll _inreg
+; NORMAL-NEXT: addl $16, %esp
+define void @test12() optsize {
+entry:
+ call void @inreg(i32 1, i32 2, i32 3, i32 4)
+ call void @good(i32 5, i32 6, i32 7, i32 8)
+ call void @inreg(i32 9, i32 10, i32 11, i32 12)
+ ret void
+}
+
+; But if the gains outweigh the overhead, we should do it
+; NORMAL-LABEL: test12b:
+; NORMAL: pushl $4
+; NORMAL-NEXT: pushl $3
+; NORMAL-NEXT: pushl $2
+; NORMAL-NEXT: pushl $1
+; NORMAL-NEXT: calll _good
+; NORMAL-NEXT: addl $16, %esp
+; NORMAL-NEXT: subl $12, %esp
+; NORMAL-NEXT: movl $8, 8(%esp)
+; NORMAL-NEXT: movl $7, 4(%esp)
+; NORMAL-NEXT: movl $5, (%esp)
+; NORMAL-NEXT: movl $6, %eax
+; NORMAL-NEXT: calll _inreg
+; NORMAL-NEXT: addl $12, %esp
+; NORMAL-NEXT: pushl $12
+; NORMAL-NEXT: pushl $11
+; NORMAL-NEXT: pushl $10
+; NORMAL-NEXT: pushl $9
+; NORMAL-NEXT: calll _good
+; NORMAL-NEXT: addl $16, %esp
+define void @test12b() optsize {
+entry:
+ call void @good(i32 1, i32 2, i32 3, i32 4)
+ call void @inreg(i32 5, i32 6, i32 7, i32 8)
+ call void @good(i32 9, i32 10, i32 11, i32 12)
+ ret void
+}
diff --git a/test/CodeGen/X86/ms-inline-asm.ll b/test/CodeGen/X86/ms-inline-asm.ll
index f0bdbba50ef3..428eb1b7190b 100644
--- a/test/CodeGen/X86/ms-inline-asm.ll
+++ b/test/CodeGen/X86/ms-inline-asm.ll
@@ -44,13 +44,13 @@ entry:
define i32 @t18() nounwind {
entry:
%foo = alloca %struct.t18_type, align 4
- %a = getelementptr inbounds %struct.t18_type* %foo, i32 0, i32 0
+ %a = getelementptr inbounds %struct.t18_type, %struct.t18_type* %foo, i32 0, i32 0
store i32 1, i32* %a, align 4
- %b = getelementptr inbounds %struct.t18_type* %foo, i32 0, i32 1
+ %b = getelementptr inbounds %struct.t18_type, %struct.t18_type* %foo, i32 0, i32 1
store i32 2, i32* %b, align 4
call void asm sideeffect inteldialect "lea ebx, foo\0A\09mov eax, [ebx].0\0A\09mov [ebx].4, ecx", "~{eax},~{dirflag},~{fpsr},~{flags}"() nounwind
- %b1 = getelementptr inbounds %struct.t18_type* %foo, i32 0, i32 1
- %0 = load i32* %b1, align 4
+ %b1 = getelementptr inbounds %struct.t18_type, %struct.t18_type* %foo, i32 0, i32 1
+ %0 = load i32, i32* %b1, align 4
ret i32 %0
; CHECK: t18
; CHECK: {{## InlineAsm Start|#APP}}
@@ -87,7 +87,7 @@ entry:
%res = alloca i32*, align 4
call void asm sideeffect inteldialect "lea edi, dword ptr $0", "*m,~{edi},~{dirflag},~{fpsr},~{flags}"([2 x i32]* @results) nounwind
call void asm sideeffect inteldialect "mov dword ptr $0, edi", "=*m,~{dirflag},~{fpsr},~{flags}"(i32** %res) nounwind
- %0 = load i32** %res, align 4
+ %0 = load i32*, i32** %res, align 4
ret i32* %0
; CHECK-LABEL: t30:
; CHECK: {{## InlineAsm Start|#APP}}
@@ -111,7 +111,7 @@ entry:
%val = alloca i32, align 64
store i32 -1, i32* %val, align 64
call void asm sideeffect inteldialect "mov dword ptr $0, esp", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %val)
- %sp = load i32* %val, align 64
+ %sp = load i32, i32* %val, align 64
ret i32 %sp
; CHECK-LABEL: t31:
; CHECK: pushl %ebp
diff --git a/test/CodeGen/X86/mul128_sext_loop.ll b/test/CodeGen/X86/mul128_sext_loop.ll
index a516f03cbc3e..efb7e02720b4 100644
--- a/test/CodeGen/X86/mul128_sext_loop.ll
+++ b/test/CodeGen/X86/mul128_sext_loop.ll
@@ -14,8 +14,8 @@ define void @test(i64* nocapture %arr, i64 %arrsize, i64 %factor) nounwind uwtab
; CHECK-NOT: mul
%carry.02 = phi i128 [ 0, %.lr.ph ], [ %10, %3 ]
%i.01 = phi i64 [ 0, %.lr.ph ], [ %11, %3 ]
- %4 = getelementptr inbounds i64* %arr, i64 %i.01
- %5 = load i64* %4, align 8
+ %4 = getelementptr inbounds i64, i64* %arr, i64 %i.01
+ %5 = load i64, i64* %4, align 8
%6 = sext i64 %5 to i128
%7 = mul nsw i128 %6, %2
%8 = add nsw i128 %7, %carry.02
diff --git a/test/CodeGen/X86/muloti.ll b/test/CodeGen/X86/muloti.ll
index 2f0986e831e2..6c6198e400fc 100644
--- a/test/CodeGen/X86/muloti.ll
+++ b/test/CodeGen/X86/muloti.ll
@@ -41,21 +41,21 @@ entry:
%coerce1 = alloca i128, align 16
%b.addr = alloca i128, align 16
%0 = bitcast i128* %coerce to %0*
- %1 = getelementptr %0* %0, i32 0, i32 0
+ %1 = getelementptr %0, %0* %0, i32 0, i32 0
store i64 %a.coerce0, i64* %1
- %2 = getelementptr %0* %0, i32 0, i32 1
+ %2 = getelementptr %0, %0* %0, i32 0, i32 1
store i64 %a.coerce1, i64* %2
- %a = load i128* %coerce, align 16
+ %a = load i128, i128* %coerce, align 16
store i128 %a, i128* %a.addr, align 16
%3 = bitcast i128* %coerce1 to %0*
- %4 = getelementptr %0* %3, i32 0, i32 0
+ %4 = getelementptr %0, %0* %3, i32 0, i32 0
store i64 %b.coerce0, i64* %4
- %5 = getelementptr %0* %3, i32 0, i32 1
+ %5 = getelementptr %0, %0* %3, i32 0, i32 1
store i64 %b.coerce1, i64* %5
- %b = load i128* %coerce1, align 16
+ %b = load i128, i128* %coerce1, align 16
store i128 %b, i128* %b.addr, align 16
- %tmp = load i128* %a.addr, align 16
- %tmp2 = load i128* %b.addr, align 16
+ %tmp = load i128, i128* %a.addr, align 16
+ %tmp2 = load i128, i128* %b.addr, align 16
%6 = call %1 @llvm.umul.with.overflow.i128(i128 %tmp, i128 %tmp2)
; CHECK: cmov
; CHECK: divti3
@@ -70,7 +70,7 @@ overflow: ; preds = %entry
nooverflow: ; preds = %entry
store i128 %7, i128* %retval
%9 = bitcast i128* %retval to %0*
- %10 = load %0* %9, align 1
+ %10 = load %0, %0* %9, align 1
ret %0 %10
}
diff --git a/test/CodeGen/X86/mult-alt-generic-i686.ll b/test/CodeGen/X86/mult-alt-generic-i686.ll
index 54bc3a42f035..9ebdf55d0e03 100644
--- a/test/CodeGen/X86/mult-alt-generic-i686.ll
+++ b/test/CodeGen/X86/mult-alt-generic-i686.ll
@@ -33,10 +33,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r,r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -48,10 +48,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r,r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -63,7 +63,7 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
ret void
@@ -120,10 +120,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
@@ -137,15 +137,15 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
store i32 %2, i32* %out0, align 4
- %3 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %3 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %3, i32* %out0, align 4
%4 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
store i32 %4, i32* %out0, align 4
@@ -158,14 +158,14 @@ define void @single_p() nounwind {
entry:
%out0 = alloca i32, align 4
store i32 0, i32* %out0, align 4
- %0 = call i32 asm "foo $1,$0", "=r,im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %0 = call i32 asm "foo $1,$0", "=r,im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %0, i32* %out0, align 4
ret void
}
define void @multi_m() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
call void asm "foo $1,$0", "=*m|r,m|r,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
ret void
}
@@ -190,10 +190,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -205,10 +205,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -220,7 +220,7 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|m,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
ret void
@@ -277,10 +277,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
@@ -294,15 +294,15 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
store i32 %2, i32* %out0, align 4
- %3 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %3 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %3, i32* %out0, align 4
%4 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
store i32 %4, i32* %out0, align 4
@@ -315,7 +315,7 @@ define void @multi_p() nounwind {
entry:
%out0 = alloca i32, align 4
store i32 0, i32* %out0, align 4
- %0 = call i32 asm "foo $1,$0", "=r|r,r|im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %0, i32* %out0, align 4
ret void
}
diff --git a/test/CodeGen/X86/mult-alt-generic-x86_64.ll b/test/CodeGen/X86/mult-alt-generic-x86_64.ll
index 84a9c8140943..a87655e5eef3 100644
--- a/test/CodeGen/X86/mult-alt-generic-x86_64.ll
+++ b/test/CodeGen/X86/mult-alt-generic-x86_64.ll
@@ -33,10 +33,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r,r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -48,10 +48,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r,r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -63,7 +63,7 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
ret void
@@ -120,10 +120,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
@@ -137,15 +137,15 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
store i32 %2, i32* %out0, align 4
- %3 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %3 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %3, i32* %out0, align 4
%4 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
store i32 %4, i32* %out0, align 4
@@ -158,14 +158,14 @@ define void @single_p() nounwind {
entry:
%out0 = alloca i32, align 4
store i32 0, i32* %out0, align 4
- %0 = call i32 asm "foo $1,$0", "=r,im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %0 = call i32 asm "foo $1,$0", "=r,im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %0, i32* %out0, align 4
ret void
}
define void @multi_m() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
call void asm "foo $1,$0", "=*m|r,m|r,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
ret void
}
@@ -190,10 +190,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -205,10 +205,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* %in1, align 4
+ %tmp1 = load i32, i32* %in1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
ret void
@@ -220,7 +220,7 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|m,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
ret void
@@ -277,10 +277,10 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
@@ -294,15 +294,15 @@ entry:
%in1 = alloca i32, align 4
store i32 0, i32* %out0, align 4
store i32 1, i32* %in1, align 4
- %tmp = load i32* %in1, align 4
+ %tmp = load i32, i32* %in1, align 4
%0 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* %out0, align 4
- %tmp1 = load i32* @min1, align 4
+ %tmp1 = load i32, i32* @min1, align 4
%1 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
store i32 %1, i32* %out0, align 4
%2 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
store i32 %2, i32* %out0, align 4
- %3 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %3 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %3, i32* %out0, align 4
%4 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
store i32 %4, i32* %out0, align 4
@@ -315,7 +315,7 @@ define void @multi_p() nounwind {
entry:
%out0 = alloca i32, align 4
store i32 0, i32* %out0, align 4
- %0 = call i32 asm "foo $1,$0", "=r|r,r|im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32], [2 x i32]* @marray, i32 0, i32 0)) nounwind
store i32 %0, i32* %out0, align 4
ret void
}
diff --git a/test/CodeGen/X86/mult-alt-x86.ll b/test/CodeGen/X86/mult-alt-x86.ll
index cb2219a6ed75..5174f85adb9f 100644
--- a/test/CodeGen/X86/mult-alt-x86.ll
+++ b/test/CodeGen/X86/mult-alt-x86.ll
@@ -11,7 +11,7 @@ target triple = "i686-pc-win32"
define void @single_R() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
%0 = call i32 asm "foo $1,$0", "=R,R,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* @mout0, align 4
ret void
@@ -19,7 +19,7 @@ entry:
define void @single_q() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
%0 = call i32 asm "foo $1,$0", "=q,q,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* @mout0, align 4
ret void
@@ -27,7 +27,7 @@ entry:
define void @single_Q() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
%0 = call i32 asm "foo $1,$0", "=Q,Q,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* @mout0, align 4
ret void
@@ -35,7 +35,7 @@ entry:
define void @single_a() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
%0 = call i32 asm "foo $1,$0", "={ax},{ax},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* @mout0, align 4
ret void
@@ -43,7 +43,7 @@ entry:
define void @single_b() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
%0 = call i32 asm "foo $1,$0", "={bx},{bx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* @mout0, align 4
ret void
@@ -51,7 +51,7 @@ entry:
define void @single_c() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
%0 = call i32 asm "foo $1,$0", "={cx},{cx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* @mout0, align 4
ret void
@@ -59,7 +59,7 @@ entry:
define void @single_d() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
%0 = call i32 asm "foo $1,$0", "={dx},{dx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* @mout0, align 4
ret void
@@ -67,7 +67,7 @@ entry:
define void @single_S() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
%0 = call i32 asm "foo $1,$0", "={si},{si},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* @mout0, align 4
ret void
@@ -75,7 +75,7 @@ entry:
define void @single_D() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
%0 = call i32 asm "foo $1,$0", "={di},{di},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* @mout0, align 4
ret void
@@ -83,7 +83,7 @@ entry:
define void @single_A() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
%0 = call i32 asm "foo $1,$0", "=A,A,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
store i32 %0, i32* @mout0, align 4
ret void
@@ -106,7 +106,7 @@ entry:
define void @single_y() nounwind {
entry:
- %tmp = load double* @din1, align 8
+ %tmp = load double, double* @din1, align 8
%0 = call double asm "foo $1,$0", "=y,y,~{dirflag},~{fpsr},~{flags}"(double %tmp) nounwind
store double %0, double* @dout0, align 8
ret void
@@ -114,7 +114,7 @@ entry:
define void @single_x() nounwind {
entry:
- %tmp = load double* @din1, align 8
+ %tmp = load double, double* @din1, align 8
%0 = call double asm "foo $1,$0", "=x,x,~{dirflag},~{fpsr},~{flags}"(double %tmp) nounwind
store double %0, double* @dout0, align 8
ret void
@@ -191,70 +191,70 @@ entry:
define void @multi_R() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
call void asm "foo $1,$0", "=*r|R|m,r|R|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
ret void
}
define void @multi_q() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
call void asm "foo $1,$0", "=*r|q|m,r|q|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
ret void
}
define void @multi_Q() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
call void asm "foo $1,$0", "=*r|Q|m,r|Q|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
ret void
}
define void @multi_a() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
call void asm "foo $1,$0", "=*r|{ax}|m,r|{ax}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
ret void
}
define void @multi_b() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
call void asm "foo $1,$0", "=*r|{bx}|m,r|{bx}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
ret void
}
define void @multi_c() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
call void asm "foo $1,$0", "=*r|{cx}|m,r|{cx}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
ret void
}
define void @multi_d() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
call void asm "foo $1,$0", "=*r|{dx}|m,r|{dx},~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
ret void
}
define void @multi_S() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
call void asm "foo $1,$0", "=*r|{si}|m,r|{si}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
ret void
}
define void @multi_D() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
call void asm "foo $1,$0", "=*r|{di}|m,r|{di}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
ret void
}
define void @multi_A() nounwind {
entry:
- %tmp = load i32* @min1, align 4
+ %tmp = load i32, i32* @min1, align 4
call void asm "foo $1,$0", "=*r|A|m,r|A|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
ret void
}
@@ -276,14 +276,14 @@ entry:
define void @multi_y() nounwind {
entry:
- %tmp = load double* @din1, align 8
+ %tmp = load double, double* @din1, align 8
call void asm "foo $1,$0", "=*r|y|m,r|y|m,~{dirflag},~{fpsr},~{flags}"(double* @dout0, double %tmp) nounwind
ret void
}
define void @multi_x() nounwind {
entry:
- %tmp = load double* @din1, align 8
+ %tmp = load double, double* @din1, align 8
call void asm "foo $1,$0", "=*r|x|m,r|x|m,~{dirflag},~{fpsr},~{flags}"(double* @dout0, double %tmp) nounwind
ret void
}
diff --git a/test/CodeGen/X86/multiple-loop-post-inc.ll b/test/CodeGen/X86/multiple-loop-post-inc.ll
index 4edc1ff0b3fa..be778da57332 100644
--- a/test/CodeGen/X86/multiple-loop-post-inc.ll
+++ b/test/CodeGen/X86/multiple-loop-post-inc.ll
@@ -19,7 +19,7 @@ define void @foo(float* %I, i64 %IS, float* nocapture %Start, float* nocapture %
entry:
%times4 = alloca float, align 4 ; <float*> [#uses=3]
%timesN = alloca float, align 4 ; <float*> [#uses=2]
- %0 = load float* %Step, align 4 ; <float> [#uses=8]
+ %0 = load float, float* %Step, align 4 ; <float> [#uses=8]
%1 = ptrtoint float* %I to i64 ; <i64> [#uses=1]
%2 = ptrtoint float* %O to i64 ; <i64> [#uses=1]
%tmp = xor i64 %2, %1 ; <i64> [#uses=1]
@@ -34,11 +34,11 @@ entry:
br i1 %9, label %bb, label %return
bb: ; preds = %entry
- %10 = load float* %Start, align 4 ; <float> [#uses=1]
+ %10 = load float, float* %Start, align 4 ; <float> [#uses=1]
br label %bb2
bb1: ; preds = %bb3
- %11 = load float* %I_addr.0, align 4 ; <float> [#uses=1]
+ %11 = load float, float* %I_addr.0, align 4 ; <float> [#uses=1]
%12 = fmul float %11, %x.0 ; <float> [#uses=1]
store float %12, float* %O_addr.0, align 4
%13 = fadd float %x.0, %0 ; <float> [#uses=1]
@@ -49,8 +49,8 @@ bb2: ; preds = %bb1, %bb
%14 = phi i64 [ %indvar.next53, %bb1 ], [ 0, %bb ] ; <i64> [#uses=21]
%x.0 = phi float [ %13, %bb1 ], [ %10, %bb ] ; <float> [#uses=6]
%N_addr.0 = sub i64 %N, %14 ; <i64> [#uses=4]
- %O_addr.0 = getelementptr float* %O, i64 %14 ; <float*> [#uses=4]
- %I_addr.0 = getelementptr float* %I, i64 %14 ; <float*> [#uses=3]
+ %O_addr.0 = getelementptr float, float* %O, i64 %14 ; <float*> [#uses=4]
+ %I_addr.0 = getelementptr float, float* %I, i64 %14 ; <float*> [#uses=3]
%15 = icmp slt i64 %N_addr.0, 1 ; <i1> [#uses=1]
br i1 %15, label %bb4, label %bb3
@@ -105,32 +105,32 @@ bb5: ; preds = %bb.nph43, %bb5
%vX1.036 = phi <4 x float> [ %32, %bb.nph43 ], [ %47, %bb5 ] ; <<4 x float>> [#uses=2]
%tmp104 = shl i64 %indvar102, 4 ; <i64> [#uses=5]
%tmp105 = add i64 %14, %tmp104 ; <i64> [#uses=2]
- %scevgep106 = getelementptr float* %I, i64 %tmp105 ; <float*> [#uses=1]
+ %scevgep106 = getelementptr float, float* %I, i64 %tmp105 ; <float*> [#uses=1]
%scevgep106107 = bitcast float* %scevgep106 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp109 = add i64 %tmp108, %tmp104 ; <i64> [#uses=2]
%tmp112 = add i64 %tmp111, %tmp104 ; <i64> [#uses=2]
- %scevgep113 = getelementptr float* %I, i64 %tmp112 ; <float*> [#uses=1]
+ %scevgep113 = getelementptr float, float* %I, i64 %tmp112 ; <float*> [#uses=1]
%scevgep113114 = bitcast float* %scevgep113 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp116 = add i64 %tmp115, %tmp104 ; <i64> [#uses=2]
- %scevgep117 = getelementptr float* %I, i64 %tmp116 ; <float*> [#uses=1]
+ %scevgep117 = getelementptr float, float* %I, i64 %tmp116 ; <float*> [#uses=1]
%scevgep117118 = bitcast float* %scevgep117 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp120 = add i64 %tmp119, %tmp104 ; <i64> [#uses=2]
- %scevgep121 = getelementptr float* %I, i64 %tmp120 ; <float*> [#uses=1]
+ %scevgep121 = getelementptr float, float* %I, i64 %tmp120 ; <float*> [#uses=1]
%scevgep121122 = bitcast float* %scevgep121 to <4 x float>* ; <<4 x float>*> [#uses=1]
- %scevgep123 = getelementptr float* %O, i64 %tmp105 ; <float*> [#uses=1]
+ %scevgep123 = getelementptr float, float* %O, i64 %tmp105 ; <float*> [#uses=1]
%scevgep123124 = bitcast float* %scevgep123 to <4 x float>* ; <<4 x float>*> [#uses=1]
- %scevgep126 = getelementptr float* %O, i64 %tmp112 ; <float*> [#uses=1]
+ %scevgep126 = getelementptr float, float* %O, i64 %tmp112 ; <float*> [#uses=1]
%scevgep126127 = bitcast float* %scevgep126 to <4 x float>* ; <<4 x float>*> [#uses=1]
- %scevgep128 = getelementptr float* %O, i64 %tmp116 ; <float*> [#uses=1]
+ %scevgep128 = getelementptr float, float* %O, i64 %tmp116 ; <float*> [#uses=1]
%scevgep128129 = bitcast float* %scevgep128 to <4 x float>* ; <<4 x float>*> [#uses=1]
- %scevgep130 = getelementptr float* %O, i64 %tmp120 ; <float*> [#uses=1]
+ %scevgep130 = getelementptr float, float* %O, i64 %tmp120 ; <float*> [#uses=1]
%scevgep130131 = bitcast float* %scevgep130 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp132 = mul i64 %indvar102, -16 ; <i64> [#uses=1]
%tmp136 = add i64 %tmp135, %tmp132 ; <i64> [#uses=2]
- %36 = load <4 x float>* %scevgep106107, align 16 ; <<4 x float>> [#uses=1]
- %37 = load <4 x float>* %scevgep113114, align 16 ; <<4 x float>> [#uses=1]
- %38 = load <4 x float>* %scevgep117118, align 16 ; <<4 x float>> [#uses=1]
- %39 = load <4 x float>* %scevgep121122, align 16 ; <<4 x float>> [#uses=1]
+ %36 = load <4 x float>, <4 x float>* %scevgep106107, align 16 ; <<4 x float>> [#uses=1]
+ %37 = load <4 x float>, <4 x float>* %scevgep113114, align 16 ; <<4 x float>> [#uses=1]
+ %38 = load <4 x float>, <4 x float>* %scevgep117118, align 16 ; <<4 x float>> [#uses=1]
+ %39 = load <4 x float>, <4 x float>* %scevgep121122, align 16 ; <<4 x float>> [#uses=1]
%40 = fmul <4 x float> %36, %vX0.039 ; <<4 x float>> [#uses=1]
%41 = fadd <4 x float> %vX0.039, %asmtmp.i18 ; <<4 x float>> [#uses=2]
%42 = fmul <4 x float> %37, %vX1.036 ; <<4 x float>> [#uses=1]
@@ -149,8 +149,8 @@ bb5: ; preds = %bb.nph43, %bb5
bb6.bb7_crit_edge: ; preds = %bb5
call void asm sideeffect "# Stop.", "~{dirflag},~{fpsr},~{flags}"() nounwind
- %scevgep110 = getelementptr float* %I, i64 %tmp109 ; <float*> [#uses=1]
- %scevgep125 = getelementptr float* %O, i64 %tmp109 ; <float*> [#uses=1]
+ %scevgep110 = getelementptr float, float* %I, i64 %tmp109 ; <float*> [#uses=1]
+ %scevgep125 = getelementptr float, float* %O, i64 %tmp109 ; <float*> [#uses=1]
br label %bb7
bb7: ; preds = %bb6.bb7_crit_edge, %bb6.preheader
@@ -166,9 +166,9 @@ bb8: ; preds = %bb4
bb.nph: ; preds = %bb8
%I_addr.0.sum = add i64 %14, -1 ; <i64> [#uses=1]
- %49 = getelementptr inbounds float* %I, i64 %I_addr.0.sum ; <float*> [#uses=1]
+ %49 = getelementptr inbounds float, float* %I, i64 %I_addr.0.sum ; <float*> [#uses=1]
%50 = bitcast float* %49 to <4 x float>* ; <<4 x float>*> [#uses=1]
- %51 = load <4 x float>* %50, align 16 ; <<4 x float>> [#uses=1]
+ %51 = load <4 x float>, <4 x float>* %50, align 16 ; <<4 x float>> [#uses=1]
%tmp54 = add i64 %14, 16 ; <i64> [#uses=1]
%tmp56 = add i64 %14, 3 ; <i64> [#uses=1]
%tmp60 = add i64 %14, 7 ; <i64> [#uses=1]
@@ -191,35 +191,35 @@ bb9: ; preds = %bb.nph, %bb9
%tmp51 = shl i64 %indvar, 4 ; <i64> [#uses=9]
%tmp55 = add i64 %tmp54, %tmp51 ; <i64> [#uses=2]
%tmp57 = add i64 %tmp56, %tmp51 ; <i64> [#uses=1]
- %scevgep58 = getelementptr float* %I, i64 %tmp57 ; <float*> [#uses=1]
+ %scevgep58 = getelementptr float, float* %I, i64 %tmp57 ; <float*> [#uses=1]
%scevgep5859 = bitcast float* %scevgep58 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp61 = add i64 %tmp60, %tmp51 ; <i64> [#uses=1]
- %scevgep62 = getelementptr float* %I, i64 %tmp61 ; <float*> [#uses=1]
+ %scevgep62 = getelementptr float, float* %I, i64 %tmp61 ; <float*> [#uses=1]
%scevgep6263 = bitcast float* %scevgep62 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp65 = add i64 %tmp64, %tmp51 ; <i64> [#uses=1]
- %scevgep66 = getelementptr float* %I, i64 %tmp65 ; <float*> [#uses=1]
+ %scevgep66 = getelementptr float, float* %I, i64 %tmp65 ; <float*> [#uses=1]
%scevgep6667 = bitcast float* %scevgep66 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp69 = add i64 %tmp68, %tmp51 ; <i64> [#uses=1]
- %scevgep70 = getelementptr float* %I, i64 %tmp69 ; <float*> [#uses=1]
+ %scevgep70 = getelementptr float, float* %I, i64 %tmp69 ; <float*> [#uses=1]
%scevgep7071 = bitcast float* %scevgep70 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp72 = add i64 %14, %tmp51 ; <i64> [#uses=1]
- %scevgep73 = getelementptr float* %O, i64 %tmp72 ; <float*> [#uses=1]
+ %scevgep73 = getelementptr float, float* %O, i64 %tmp72 ; <float*> [#uses=1]
%scevgep7374 = bitcast float* %scevgep73 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp77 = add i64 %tmp76, %tmp51 ; <i64> [#uses=1]
- %scevgep78 = getelementptr float* %O, i64 %tmp77 ; <float*> [#uses=1]
+ %scevgep78 = getelementptr float, float* %O, i64 %tmp77 ; <float*> [#uses=1]
%scevgep7879 = bitcast float* %scevgep78 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp81 = add i64 %tmp80, %tmp51 ; <i64> [#uses=1]
- %scevgep82 = getelementptr float* %O, i64 %tmp81 ; <float*> [#uses=1]
+ %scevgep82 = getelementptr float, float* %O, i64 %tmp81 ; <float*> [#uses=1]
%scevgep8283 = bitcast float* %scevgep82 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp85 = add i64 %tmp84, %tmp51 ; <i64> [#uses=1]
- %scevgep86 = getelementptr float* %O, i64 %tmp85 ; <float*> [#uses=1]
+ %scevgep86 = getelementptr float, float* %O, i64 %tmp85 ; <float*> [#uses=1]
%scevgep8687 = bitcast float* %scevgep86 to <4 x float>* ; <<4 x float>*> [#uses=1]
%tmp88 = mul i64 %indvar, -16 ; <i64> [#uses=1]
%tmp92 = add i64 %tmp91, %tmp88 ; <i64> [#uses=2]
- %52 = load <4 x float>* %scevgep5859, align 16 ; <<4 x float>> [#uses=2]
- %53 = load <4 x float>* %scevgep6263, align 16 ; <<4 x float>> [#uses=2]
- %54 = load <4 x float>* %scevgep6667, align 16 ; <<4 x float>> [#uses=2]
- %55 = load <4 x float>* %scevgep7071, align 16 ; <<4 x float>> [#uses=2]
+ %52 = load <4 x float>, <4 x float>* %scevgep5859, align 16 ; <<4 x float>> [#uses=2]
+ %53 = load <4 x float>, <4 x float>* %scevgep6263, align 16 ; <<4 x float>> [#uses=2]
+ %54 = load <4 x float>, <4 x float>* %scevgep6667, align 16 ; <<4 x float>> [#uses=2]
+ %55 = load <4 x float>, <4 x float>* %scevgep7071, align 16 ; <<4 x float>> [#uses=2]
%56 = shufflevector <4 x float> %vI0.019, <4 x float> %52, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
%57 = shufflevector <4 x float> %56, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
%58 = shufflevector <4 x float> %52, <4 x float> %53, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
@@ -245,8 +245,8 @@ bb9: ; preds = %bb.nph, %bb9
br i1 %72, label %bb9, label %bb10.bb11.loopexit_crit_edge
bb10.bb11.loopexit_crit_edge: ; preds = %bb9
- %scevgep = getelementptr float* %I, i64 %tmp55 ; <float*> [#uses=1]
- %scevgep75 = getelementptr float* %O, i64 %tmp55 ; <float*> [#uses=1]
+ %scevgep = getelementptr float, float* %I, i64 %tmp55 ; <float*> [#uses=1]
+ %scevgep75 = getelementptr float, float* %O, i64 %tmp55 ; <float*> [#uses=1]
br label %bb11
bb11: ; preds = %bb8, %bb10.bb11.loopexit_crit_edge, %bb7
@@ -261,9 +261,9 @@ bb11: ; preds = %bb8, %bb10.bb11.loo
bb12: ; preds = %bb11, %bb12
%indvar94 = phi i64 [ %indvar.next95, %bb12 ], [ 0, %bb11 ] ; <i64> [#uses=3]
%x.130 = phi float [ %77, %bb12 ], [ %73, %bb11 ] ; <float> [#uses=2]
- %I_addr.433 = getelementptr float* %I_addr.2, i64 %indvar94 ; <float*> [#uses=1]
- %O_addr.432 = getelementptr float* %O_addr.2, i64 %indvar94 ; <float*> [#uses=1]
- %75 = load float* %I_addr.433, align 4 ; <float> [#uses=1]
+ %I_addr.433 = getelementptr float, float* %I_addr.2, i64 %indvar94 ; <float*> [#uses=1]
+ %O_addr.432 = getelementptr float, float* %O_addr.2, i64 %indvar94 ; <float*> [#uses=1]
+ %75 = load float, float* %I_addr.433, align 4 ; <float> [#uses=1]
%76 = fmul float %75, %x.130 ; <float> [#uses=1]
store float %76, float* %O_addr.432, align 4
%77 = fadd float %x.130, %0 ; <float> [#uses=2]
@@ -293,7 +293,7 @@ outer: ; preds = %bb1, %entry
inner: ; preds = %bb0, %if.end275
%i8 = phi i32 [ %a, %outer ], [ %indvar.next159, %bb0 ] ; <i32> [#uses=2]
- %t338 = load i32* undef ; <i32> [#uses=1]
+ %t338 = load i32, i32* undef ; <i32> [#uses=1]
%t191 = mul i32 %i8, %t338 ; <i32> [#uses=1]
%t179 = add i32 %i6, %t191 ; <i32> [#uses=1]
br label %bb0
diff --git a/test/CodeGen/X86/mulx32.ll b/test/CodeGen/X86/mulx32.ll
index b75ac009e76d..42ef2eb6f647 100644
--- a/test/CodeGen/X86/mulx32.ll
+++ b/test/CodeGen/X86/mulx32.ll
@@ -11,7 +11,7 @@ define i64 @f1(i32 %a, i32 %b) {
}
define i64 @f2(i32 %a, i32* %p) {
- %b = load i32* %p
+ %b = load i32, i32* %p
%x = zext i32 %a to i64
%y = zext i32 %b to i64
%r = mul i64 %x, %y
diff --git a/test/CodeGen/X86/mulx64.ll b/test/CodeGen/X86/mulx64.ll
index d5730282a137..808c02290b7c 100644
--- a/test/CodeGen/X86/mulx64.ll
+++ b/test/CodeGen/X86/mulx64.ll
@@ -11,7 +11,7 @@ define i128 @f1(i64 %a, i64 %b) {
}
define i128 @f2(i64 %a, i64* %p) {
- %b = load i64* %p
+ %b = load i64, i64* %p
%x = zext i64 %a to i128
%y = zext i64 %b to i128
%r = mul i128 %x, %y
diff --git a/test/CodeGen/X86/musttail-fastcall.ll b/test/CodeGen/X86/musttail-fastcall.ll
index c7e5ffcfa877..a95e0ff12c34 100644
--- a/test/CodeGen/X86/musttail-fastcall.ll
+++ b/test/CodeGen/X86/musttail-fastcall.ll
@@ -9,13 +9,13 @@
declare void @puts(i8*)
define i32 @call_fast_thunk() {
- %r = call x86_fastcallcc i32 (...)* @fast_thunk(i32 inreg 1, i32 inreg 2, i32 3)
+ %r = call x86_fastcallcc i32 (...) @fast_thunk(i32 inreg 1, i32 inreg 2, i32 3)
ret i32 %r
}
define x86_fastcallcc i32 @fast_thunk(...) {
- call void @puts(i8* getelementptr ([4 x i8]* @asdf, i32 0, i32 0))
- %r = musttail call x86_fastcallcc i32 (...)* bitcast (i32 (i32, i32, i32)* @fast_target to i32 (...)*) (...)
+ call void @puts(i8* getelementptr ([4 x i8], [4 x i8]* @asdf, i32 0, i32 0))
+ %r = musttail call x86_fastcallcc i32 (...) bitcast (i32 (i32, i32, i32)* @fast_target to i32 (...)*) (...)
ret i32 %r
}
@@ -38,13 +38,13 @@ define x86_fastcallcc i32 @fast_target(i32 inreg %a, i32 inreg %b, i32 %c) {
; Repeat the test for vectorcall, which has XMM registers.
define i32 @call_vector_thunk() {
- %r = call x86_vectorcallcc i32 (...)* @vector_thunk(i32 inreg 1, i32 inreg 2, i32 3)
+ %r = call x86_vectorcallcc i32 (...) @vector_thunk(i32 inreg 1, i32 inreg 2, i32 3)
ret i32 %r
}
define x86_vectorcallcc i32 @vector_thunk(...) {
- call void @puts(i8* getelementptr ([4 x i8]* @asdf, i32 0, i32 0))
- %r = musttail call x86_vectorcallcc i32 (...)* bitcast (i32 (i32, i32, i32)* @vector_target to i32 (...)*) (...)
+ call void @puts(i8* getelementptr ([4 x i8], [4 x i8]* @asdf, i32 0, i32 0))
+ %r = musttail call x86_vectorcallcc i32 (...) bitcast (i32 (i32, i32, i32)* @vector_target to i32 (...)*) (...)
ret i32 %r
}
diff --git a/test/CodeGen/X86/musttail-indirect.ll b/test/CodeGen/X86/musttail-indirect.ll
index 9d21b5ea5d52..7bb71c3fb038 100644
--- a/test/CodeGen/X86/musttail-indirect.ll
+++ b/test/CodeGen/X86/musttail-indirect.ll
@@ -31,8 +31,8 @@
define x86_thiscallcc i32 @f_thunk(%struct.B* %this, i32) {
entry:
%1 = bitcast %struct.B* %this to i32 (%struct.B*, i32)***
- %vtable = load i32 (%struct.B*, i32)*** %1
- %2 = load i32 (%struct.B*, i32)** %vtable
+ %vtable = load i32 (%struct.B*, i32)**, i32 (%struct.B*, i32)*** %1
+ %2 = load i32 (%struct.B*, i32)*, i32 (%struct.B*, i32)** %vtable
%3 = musttail call x86_thiscallcc i32 %2(%struct.B* %this, i32 %0)
ret i32 %3
}
@@ -45,9 +45,9 @@ entry:
define x86_thiscallcc i32 @g_thunk(%struct.B* %this, <{ %struct.A, i32, %struct.A }>* inalloca) {
entry:
%1 = bitcast %struct.B* %this to i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)***
- %vtable = load i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*** %1
- %vfn = getelementptr inbounds i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vtable, i32 1
- %2 = load i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vfn
+ %vtable = load i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)**, i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*** %1
+ %vfn = getelementptr inbounds i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*, i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vtable, i32 1
+ %2 = load i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*, i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vfn
%3 = musttail call x86_thiscallcc i32 %2(%struct.B* %this, <{ %struct.A, i32, %struct.A }>* inalloca %0)
ret i32 %3
}
@@ -59,9 +59,9 @@ entry:
define x86_thiscallcc void @h_thunk(%struct.B* %this, <{ %struct.A, i32, %struct.A }>* inalloca) {
entry:
%1 = bitcast %struct.B* %this to void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)***
- %vtable = load void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*** %1
- %vfn = getelementptr inbounds void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vtable, i32 2
- %2 = load void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vfn
+ %vtable = load void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)**, void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*** %1
+ %vfn = getelementptr inbounds void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*, void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vtable, i32 2
+ %2 = load void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*, void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vfn
musttail call x86_thiscallcc void %2(%struct.B* %this, <{ %struct.A, i32, %struct.A }>* inalloca %0)
ret void
}
@@ -73,9 +73,9 @@ entry:
define x86_thiscallcc %struct.A* @i_thunk(%struct.B* %this, <{ %struct.A*, %struct.A, i32, %struct.A }>* inalloca) {
entry:
%1 = bitcast %struct.B* %this to %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)***
- %vtable = load %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)*** %1
- %vfn = getelementptr inbounds %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)** %vtable, i32 3
- %2 = load %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)** %vfn
+ %vtable = load %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)**, %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)*** %1
+ %vfn = getelementptr inbounds %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)*, %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)** %vtable, i32 3
+ %2 = load %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)*, %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)** %vfn
%3 = musttail call x86_thiscallcc %struct.A* %2(%struct.B* %this, <{ %struct.A*, %struct.A, i32, %struct.A }>* inalloca %0)
ret %struct.A* %3
}
@@ -86,9 +86,9 @@ entry:
define x86_thiscallcc void @j_thunk(%struct.A* noalias sret %agg.result, %struct.B* %this, i32) {
entry:
%1 = bitcast %struct.B* %this to void (%struct.A*, %struct.B*, i32)***
- %vtable = load void (%struct.A*, %struct.B*, i32)*** %1
- %vfn = getelementptr inbounds void (%struct.A*, %struct.B*, i32)** %vtable, i32 4
- %2 = load void (%struct.A*, %struct.B*, i32)** %vfn
+ %vtable = load void (%struct.A*, %struct.B*, i32)**, void (%struct.A*, %struct.B*, i32)*** %1
+ %vfn = getelementptr inbounds void (%struct.A*, %struct.B*, i32)*, void (%struct.A*, %struct.B*, i32)** %vtable, i32 4
+ %2 = load void (%struct.A*, %struct.B*, i32)*, void (%struct.A*, %struct.B*, i32)** %vfn
musttail call x86_thiscallcc void %2(%struct.A* sret %agg.result, %struct.B* %this, i32 %0)
ret void
}
@@ -99,12 +99,12 @@ entry:
; CHECK-NOT: ret
define x86_stdcallcc i32 @stdcall_thunk(<{ %struct.B*, %struct.A }>* inalloca) {
entry:
- %this_ptr = getelementptr inbounds <{ %struct.B*, %struct.A }>* %0, i32 0, i32 0
- %this = load %struct.B** %this_ptr
+ %this_ptr = getelementptr inbounds <{ %struct.B*, %struct.A }>, <{ %struct.B*, %struct.A }>* %0, i32 0, i32 0
+ %this = load %struct.B*, %struct.B** %this_ptr
%1 = bitcast %struct.B* %this to i32 (<{ %struct.B*, %struct.A }>*)***
- %vtable = load i32 (<{ %struct.B*, %struct.A }>*)*** %1
- %vfn = getelementptr inbounds i32 (<{ %struct.B*, %struct.A }>*)** %vtable, i32 1
- %2 = load i32 (<{ %struct.B*, %struct.A }>*)** %vfn
+ %vtable = load i32 (<{ %struct.B*, %struct.A }>*)**, i32 (<{ %struct.B*, %struct.A }>*)*** %1
+ %vfn = getelementptr inbounds i32 (<{ %struct.B*, %struct.A }>*)*, i32 (<{ %struct.B*, %struct.A }>*)** %vtable, i32 1
+ %2 = load i32 (<{ %struct.B*, %struct.A }>*)*, i32 (<{ %struct.B*, %struct.A }>*)** %vfn
%3 = musttail call x86_stdcallcc i32 %2(<{ %struct.B*, %struct.A }>* inalloca %0)
ret i32 %3
}
@@ -116,9 +116,9 @@ entry:
define x86_fastcallcc i32 @fastcall_thunk(%struct.B* inreg %this, <{ %struct.A }>* inalloca) {
entry:
%1 = bitcast %struct.B* %this to i32 (%struct.B*, <{ %struct.A }>*)***
- %vtable = load i32 (%struct.B*, <{ %struct.A }>*)*** %1
- %vfn = getelementptr inbounds i32 (%struct.B*, <{ %struct.A }>*)** %vtable, i32 1
- %2 = load i32 (%struct.B*, <{ %struct.A }>*)** %vfn
+ %vtable = load i32 (%struct.B*, <{ %struct.A }>*)**, i32 (%struct.B*, <{ %struct.A }>*)*** %1
+ %vfn = getelementptr inbounds i32 (%struct.B*, <{ %struct.A }>*)*, i32 (%struct.B*, <{ %struct.A }>*)** %vtable, i32 1
+ %2 = load i32 (%struct.B*, <{ %struct.A }>*)*, i32 (%struct.B*, <{ %struct.A }>*)** %vfn
%3 = musttail call x86_fastcallcc i32 %2(%struct.B* inreg %this, <{ %struct.A }>* inalloca %0)
ret i32 %3
}
diff --git a/test/CodeGen/X86/musttail-thiscall.ll b/test/CodeGen/X86/musttail-thiscall.ll
index 8ea12482e504..1402f10b091a 100644
--- a/test/CodeGen/X86/musttail-thiscall.ll
+++ b/test/CodeGen/X86/musttail-thiscall.ll
@@ -4,7 +4,7 @@
; CHECK-LABEL: t1:
; CHECK: jmp {{_?}}t1_callee
define x86_thiscallcc void @t1(i8* %this) {
- %adj = getelementptr i8* %this, i32 4
+ %adj = getelementptr i8, i8* %this, i32 4
musttail call x86_thiscallcc void @t1_callee(i8* %adj)
ret void
}
@@ -13,7 +13,7 @@ declare x86_thiscallcc void @t1_callee(i8* %this)
; CHECK-LABEL: t2:
; CHECK: jmp {{_?}}t2_callee
define x86_thiscallcc i32 @t2(i8* %this, i32 %a) {
- %adj = getelementptr i8* %this, i32 4
+ %adj = getelementptr i8, i8* %this, i32 4
%rv = musttail call x86_thiscallcc i32 @t2_callee(i8* %adj, i32 %a)
ret i32 %rv
}
@@ -22,8 +22,8 @@ declare x86_thiscallcc i32 @t2_callee(i8* %this, i32 %a)
; CHECK-LABEL: t3:
; CHECK: jmp {{_?}}t3_callee
define x86_thiscallcc i8* @t3(i8* %this, <{ i8*, i32 }>* inalloca %args) {
- %adj = getelementptr i8* %this, i32 4
- %a_ptr = getelementptr <{ i8*, i32 }>* %args, i32 0, i32 1
+ %adj = getelementptr i8, i8* %this, i32 4
+ %a_ptr = getelementptr <{ i8*, i32 }>, <{ i8*, i32 }>* %args, i32 0, i32 1
store i32 0, i32* %a_ptr
%rv = musttail call x86_thiscallcc i8* @t3_callee(i8* %adj, <{ i8*, i32 }>* inalloca %args)
ret i8* %rv
diff --git a/test/CodeGen/X86/musttail-varargs.ll b/test/CodeGen/X86/musttail-varargs.ll
index 7f105a13a6a0..3613f4c08cce 100644
--- a/test/CodeGen/X86/musttail-varargs.ll
+++ b/test/CodeGen/X86/musttail-varargs.ll
@@ -16,8 +16,8 @@ define void @f_thunk(i8* %this, ...) {
%ap_i8 = bitcast [4 x i8*]* %ap to i8*
call void @llvm.va_start(i8* %ap_i8)
- %fptr = call void(i8*, ...)*(i8*)* @get_f(i8* %this)
- musttail call void (i8*, ...)* %fptr(i8* %this, ...)
+ %fptr = call void(i8*, ...)*(i8*) @get_f(i8* %this)
+ musttail call void (i8*, ...) %fptr(i8* %this, ...)
ret void
}
@@ -84,7 +84,7 @@ define void @f_thunk(i8* %this, ...) {
define void @g_thunk(i8* %fptr_i8, ...) {
%fptr = bitcast i8* %fptr_i8 to void (i8*, ...)*
- musttail call void (i8*, ...)* %fptr(i8* %fptr_i8, ...)
+ musttail call void (i8*, ...) %fptr(i8* %fptr_i8, ...)
ret void
}
@@ -106,23 +106,23 @@ define void @g_thunk(i8* %fptr_i8, ...) {
@g = external global i32
define void @h_thunk(%struct.Foo* %this, ...) {
- %cond_p = getelementptr %struct.Foo* %this, i32 0, i32 0
- %cond = load i1* %cond_p
+ %cond_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 0
+ %cond = load i1, i1* %cond_p
br i1 %cond, label %then, label %else
then:
- %a_p = getelementptr %struct.Foo* %this, i32 0, i32 1
- %a_i8 = load i8** %a_p
+ %a_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 1
+ %a_i8 = load i8*, i8** %a_p
%a = bitcast i8* %a_i8 to void (%struct.Foo*, ...)*
- musttail call void (%struct.Foo*, ...)* %a(%struct.Foo* %this, ...)
+ musttail call void (%struct.Foo*, ...) %a(%struct.Foo* %this, ...)
ret void
else:
- %b_p = getelementptr %struct.Foo* %this, i32 0, i32 2
- %b_i8 = load i8** %b_p
+ %b_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 2
+ %b_i8 = load i8*, i8** %b_p
%b = bitcast i8* %b_i8 to void (%struct.Foo*, ...)*
store i32 42, i32* @g
- musttail call void (%struct.Foo*, ...)* %b(%struct.Foo* %this, ...)
+ musttail call void (%struct.Foo*, ...) %b(%struct.Foo* %this, ...)
ret void
}
diff --git a/test/CodeGen/X86/nancvt.ll b/test/CodeGen/X86/nancvt.ll
index 8a665fa79cff..9222f6b76822 100644
--- a/test/CodeGen/X86/nancvt.ll
+++ b/test/CodeGen/X86/nancvt.ll
@@ -29,38 +29,38 @@ entry:
br label %bb23
bb: ; preds = %bb23
- %tmp = load i32* %i, align 4 ; <i32> [#uses=1]
- %tmp1 = getelementptr [3 x i32]* @fnan, i32 0, i32 %tmp ; <i32*> [#uses=1]
- %tmp2 = load i32* %tmp1, align 4 ; <i32> [#uses=1]
- %tmp3 = getelementptr %struct..0anon* %uf, i32 0, i32 0 ; <float*> [#uses=1]
+ %tmp = load i32, i32* %i, align 4 ; <i32> [#uses=1]
+ %tmp1 = getelementptr [3 x i32], [3 x i32]* @fnan, i32 0, i32 %tmp ; <i32*> [#uses=1]
+ %tmp2 = load i32, i32* %tmp1, align 4 ; <i32> [#uses=1]
+ %tmp3 = getelementptr %struct..0anon, %struct..0anon* %uf, i32 0, i32 0 ; <float*> [#uses=1]
%tmp34 = bitcast float* %tmp3 to i32* ; <i32*> [#uses=1]
store i32 %tmp2, i32* %tmp34, align 4
- %tmp5 = getelementptr %struct..0anon* %uf, i32 0, i32 0 ; <float*> [#uses=1]
- %tmp6 = load float* %tmp5, align 4 ; <float> [#uses=1]
+ %tmp5 = getelementptr %struct..0anon, %struct..0anon* %uf, i32 0, i32 0 ; <float*> [#uses=1]
+ %tmp6 = load float, float* %tmp5, align 4 ; <float> [#uses=1]
%tmp67 = fpext float %tmp6 to double ; <double> [#uses=1]
- %tmp8 = getelementptr %struct..1anon* %ud, i32 0, i32 0 ; <double*> [#uses=1]
+ %tmp8 = getelementptr %struct..1anon, %struct..1anon* %ud, i32 0, i32 0 ; <double*> [#uses=1]
store double %tmp67, double* %tmp8, align 8
- %tmp9 = getelementptr %struct..1anon* %ud, i32 0, i32 0 ; <double*> [#uses=1]
+ %tmp9 = getelementptr %struct..1anon, %struct..1anon* %ud, i32 0, i32 0 ; <double*> [#uses=1]
%tmp910 = bitcast double* %tmp9 to i64* ; <i64*> [#uses=1]
- %tmp11 = load i64* %tmp910, align 8 ; <i64> [#uses=1]
+ %tmp11 = load i64, i64* %tmp910, align 8 ; <i64> [#uses=1]
%tmp1112 = trunc i64 %tmp11 to i32 ; <i32> [#uses=1]
%tmp13 = and i32 %tmp1112, -1 ; <i32> [#uses=1]
- %tmp14 = getelementptr %struct..1anon* %ud, i32 0, i32 0 ; <double*> [#uses=1]
+ %tmp14 = getelementptr %struct..1anon, %struct..1anon* %ud, i32 0, i32 0 ; <double*> [#uses=1]
%tmp1415 = bitcast double* %tmp14 to i64* ; <i64*> [#uses=1]
- %tmp16 = load i64* %tmp1415, align 8 ; <i64> [#uses=1]
+ %tmp16 = load i64, i64* %tmp1415, align 8 ; <i64> [#uses=1]
%.cast = zext i32 32 to i64 ; <i64> [#uses=1]
%tmp17 = ashr i64 %tmp16, %.cast ; <i64> [#uses=1]
%tmp1718 = trunc i64 %tmp17 to i32 ; <i32> [#uses=1]
- %tmp19 = getelementptr [10 x i8]* @.str, i32 0, i32 0 ; <i8*> [#uses=1]
+ %tmp19 = getelementptr [10 x i8], [10 x i8]* @.str, i32 0, i32 0 ; <i8*> [#uses=1]
store volatile i32 %tmp1718, i32* @var
store volatile i32 %tmp13, i32* @var
- %tmp21 = load i32* %i, align 4 ; <i32> [#uses=1]
+ %tmp21 = load i32, i32* %i, align 4 ; <i32> [#uses=1]
%tmp22 = add i32 %tmp21, 1 ; <i32> [#uses=1]
store i32 %tmp22, i32* %i, align 4
br label %bb23
bb23: ; preds = %bb, %entry
- %tmp24 = load i32* %i, align 4 ; <i32> [#uses=1]
+ %tmp24 = load i32, i32* %i, align 4 ; <i32> [#uses=1]
%tmp25 = icmp sle i32 %tmp24, 2 ; <i1> [#uses=1]
%tmp2526 = zext i1 %tmp25 to i8 ; <i8> [#uses=1]
%toBool = icmp ne i8 %tmp2526, 0 ; <i1> [#uses=1]
@@ -71,29 +71,29 @@ bb27: ; preds = %bb23
br label %bb46
bb28: ; preds = %bb46
- %tmp29 = load i32* %i, align 4 ; <i32> [#uses=1]
- %tmp30 = getelementptr [3 x i64]* @dnan, i32 0, i32 %tmp29 ; <i64*> [#uses=1]
- %tmp31 = load i64* %tmp30, align 8 ; <i64> [#uses=1]
- %tmp32 = getelementptr %struct..1anon* %ud, i32 0, i32 0 ; <double*> [#uses=1]
+ %tmp29 = load i32, i32* %i, align 4 ; <i32> [#uses=1]
+ %tmp30 = getelementptr [3 x i64], [3 x i64]* @dnan, i32 0, i32 %tmp29 ; <i64*> [#uses=1]
+ %tmp31 = load i64, i64* %tmp30, align 8 ; <i64> [#uses=1]
+ %tmp32 = getelementptr %struct..1anon, %struct..1anon* %ud, i32 0, i32 0 ; <double*> [#uses=1]
%tmp3233 = bitcast double* %tmp32 to i64* ; <i64*> [#uses=1]
store i64 %tmp31, i64* %tmp3233, align 8
- %tmp35 = getelementptr %struct..1anon* %ud, i32 0, i32 0 ; <double*> [#uses=1]
- %tmp36 = load double* %tmp35, align 8 ; <double> [#uses=1]
+ %tmp35 = getelementptr %struct..1anon, %struct..1anon* %ud, i32 0, i32 0 ; <double*> [#uses=1]
+ %tmp36 = load double, double* %tmp35, align 8 ; <double> [#uses=1]
%tmp3637 = fptrunc double %tmp36 to float ; <float> [#uses=1]
- %tmp38 = getelementptr %struct..0anon* %uf, i32 0, i32 0 ; <float*> [#uses=1]
+ %tmp38 = getelementptr %struct..0anon, %struct..0anon* %uf, i32 0, i32 0 ; <float*> [#uses=1]
store float %tmp3637, float* %tmp38, align 4
- %tmp39 = getelementptr %struct..0anon* %uf, i32 0, i32 0 ; <float*> [#uses=1]
+ %tmp39 = getelementptr %struct..0anon, %struct..0anon* %uf, i32 0, i32 0 ; <float*> [#uses=1]
%tmp3940 = bitcast float* %tmp39 to i32* ; <i32*> [#uses=1]
- %tmp41 = load i32* %tmp3940, align 4 ; <i32> [#uses=1]
- %tmp42 = getelementptr [6 x i8]* @.str1, i32 0, i32 0 ; <i8*> [#uses=1]
+ %tmp41 = load i32, i32* %tmp3940, align 4 ; <i32> [#uses=1]
+ %tmp42 = getelementptr [6 x i8], [6 x i8]* @.str1, i32 0, i32 0 ; <i8*> [#uses=1]
store volatile i32 %tmp41, i32* @var
- %tmp44 = load i32* %i, align 4 ; <i32> [#uses=1]
+ %tmp44 = load i32, i32* %i, align 4 ; <i32> [#uses=1]
%tmp45 = add i32 %tmp44, 1 ; <i32> [#uses=1]
store i32 %tmp45, i32* %i, align 4
br label %bb46
bb46: ; preds = %bb28, %bb27
- %tmp47 = load i32* %i, align 4 ; <i32> [#uses=1]
+ %tmp47 = load i32, i32* %i, align 4 ; <i32> [#uses=1]
%tmp48 = icmp sle i32 %tmp47, 2 ; <i1> [#uses=1]
%tmp4849 = zext i1 %tmp48 to i8 ; <i8> [#uses=1]
%toBool50 = icmp ne i8 %tmp4849, 0 ; <i1> [#uses=1]
@@ -104,38 +104,38 @@ bb51: ; preds = %bb46
br label %bb78
bb52: ; preds = %bb78
- %tmp53 = load i32* %i, align 4 ; <i32> [#uses=1]
- %tmp54 = getelementptr [3 x i32]* @fsnan, i32 0, i32 %tmp53 ; <i32*> [#uses=1]
- %tmp55 = load i32* %tmp54, align 4 ; <i32> [#uses=1]
- %tmp56 = getelementptr %struct..0anon* %uf, i32 0, i32 0 ; <float*> [#uses=1]
+ %tmp53 = load i32, i32* %i, align 4 ; <i32> [#uses=1]
+ %tmp54 = getelementptr [3 x i32], [3 x i32]* @fsnan, i32 0, i32 %tmp53 ; <i32*> [#uses=1]
+ %tmp55 = load i32, i32* %tmp54, align 4 ; <i32> [#uses=1]
+ %tmp56 = getelementptr %struct..0anon, %struct..0anon* %uf, i32 0, i32 0 ; <float*> [#uses=1]
%tmp5657 = bitcast float* %tmp56 to i32* ; <i32*> [#uses=1]
store i32 %tmp55, i32* %tmp5657, align 4
- %tmp58 = getelementptr %struct..0anon* %uf, i32 0, i32 0 ; <float*> [#uses=1]
- %tmp59 = load float* %tmp58, align 4 ; <float> [#uses=1]
+ %tmp58 = getelementptr %struct..0anon, %struct..0anon* %uf, i32 0, i32 0 ; <float*> [#uses=1]
+ %tmp59 = load float, float* %tmp58, align 4 ; <float> [#uses=1]
%tmp5960 = fpext float %tmp59 to double ; <double> [#uses=1]
- %tmp61 = getelementptr %struct..1anon* %ud, i32 0, i32 0 ; <double*> [#uses=1]
+ %tmp61 = getelementptr %struct..1anon, %struct..1anon* %ud, i32 0, i32 0 ; <double*> [#uses=1]
store double %tmp5960, double* %tmp61, align 8
- %tmp62 = getelementptr %struct..1anon* %ud, i32 0, i32 0 ; <double*> [#uses=1]
+ %tmp62 = getelementptr %struct..1anon, %struct..1anon* %ud, i32 0, i32 0 ; <double*> [#uses=1]
%tmp6263 = bitcast double* %tmp62 to i64* ; <i64*> [#uses=1]
- %tmp64 = load i64* %tmp6263, align 8 ; <i64> [#uses=1]
+ %tmp64 = load i64, i64* %tmp6263, align 8 ; <i64> [#uses=1]
%tmp6465 = trunc i64 %tmp64 to i32 ; <i32> [#uses=1]
%tmp66 = and i32 %tmp6465, -1 ; <i32> [#uses=1]
- %tmp68 = getelementptr %struct..1anon* %ud, i32 0, i32 0 ; <double*> [#uses=1]
+ %tmp68 = getelementptr %struct..1anon, %struct..1anon* %ud, i32 0, i32 0 ; <double*> [#uses=1]
%tmp6869 = bitcast double* %tmp68 to i64* ; <i64*> [#uses=1]
- %tmp70 = load i64* %tmp6869, align 8 ; <i64> [#uses=1]
+ %tmp70 = load i64, i64* %tmp6869, align 8 ; <i64> [#uses=1]
%.cast71 = zext i32 32 to i64 ; <i64> [#uses=1]
%tmp72 = ashr i64 %tmp70, %.cast71 ; <i64> [#uses=1]
%tmp7273 = trunc i64 %tmp72 to i32 ; <i32> [#uses=1]
- %tmp74 = getelementptr [10 x i8]* @.str, i32 0, i32 0 ; <i8*> [#uses=1]
+ %tmp74 = getelementptr [10 x i8], [10 x i8]* @.str, i32 0, i32 0 ; <i8*> [#uses=1]
store volatile i32 %tmp7273, i32* @var
store volatile i32 %tmp66, i32* @var
- %tmp76 = load i32* %i, align 4 ; <i32> [#uses=1]
+ %tmp76 = load i32, i32* %i, align 4 ; <i32> [#uses=1]
%tmp77 = add i32 %tmp76, 1 ; <i32> [#uses=1]
store i32 %tmp77, i32* %i, align 4
br label %bb78
bb78: ; preds = %bb52, %bb51
- %tmp79 = load i32* %i, align 4 ; <i32> [#uses=1]
+ %tmp79 = load i32, i32* %i, align 4 ; <i32> [#uses=1]
%tmp80 = icmp sle i32 %tmp79, 2 ; <i1> [#uses=1]
%tmp8081 = zext i1 %tmp80 to i8 ; <i8> [#uses=1]
%toBool82 = icmp ne i8 %tmp8081, 0 ; <i1> [#uses=1]
@@ -146,29 +146,29 @@ bb83: ; preds = %bb78
br label %bb101
bb84: ; preds = %bb101
- %tmp85 = load i32* %i, align 4 ; <i32> [#uses=1]
- %tmp86 = getelementptr [3 x i64]* @dsnan, i32 0, i32 %tmp85 ; <i64*> [#uses=1]
- %tmp87 = load i64* %tmp86, align 8 ; <i64> [#uses=1]
- %tmp88 = getelementptr %struct..1anon* %ud, i32 0, i32 0 ; <double*> [#uses=1]
+ %tmp85 = load i32, i32* %i, align 4 ; <i32> [#uses=1]
+ %tmp86 = getelementptr [3 x i64], [3 x i64]* @dsnan, i32 0, i32 %tmp85 ; <i64*> [#uses=1]
+ %tmp87 = load i64, i64* %tmp86, align 8 ; <i64> [#uses=1]
+ %tmp88 = getelementptr %struct..1anon, %struct..1anon* %ud, i32 0, i32 0 ; <double*> [#uses=1]
%tmp8889 = bitcast double* %tmp88 to i64* ; <i64*> [#uses=1]
store i64 %tmp87, i64* %tmp8889, align 8
- %tmp90 = getelementptr %struct..1anon* %ud, i32 0, i32 0 ; <double*> [#uses=1]
- %tmp91 = load double* %tmp90, align 8 ; <double> [#uses=1]
+ %tmp90 = getelementptr %struct..1anon, %struct..1anon* %ud, i32 0, i32 0 ; <double*> [#uses=1]
+ %tmp91 = load double, double* %tmp90, align 8 ; <double> [#uses=1]
%tmp9192 = fptrunc double %tmp91 to float ; <float> [#uses=1]
- %tmp93 = getelementptr %struct..0anon* %uf, i32 0, i32 0 ; <float*> [#uses=1]
+ %tmp93 = getelementptr %struct..0anon, %struct..0anon* %uf, i32 0, i32 0 ; <float*> [#uses=1]
store float %tmp9192, float* %tmp93, align 4
- %tmp94 = getelementptr %struct..0anon* %uf, i32 0, i32 0 ; <float*> [#uses=1]
+ %tmp94 = getelementptr %struct..0anon, %struct..0anon* %uf, i32 0, i32 0 ; <float*> [#uses=1]
%tmp9495 = bitcast float* %tmp94 to i32* ; <i32*> [#uses=1]
- %tmp96 = load i32* %tmp9495, align 4 ; <i32> [#uses=1]
- %tmp97 = getelementptr [6 x i8]* @.str1, i32 0, i32 0 ; <i8*> [#uses=1]
+ %tmp96 = load i32, i32* %tmp9495, align 4 ; <i32> [#uses=1]
+ %tmp97 = getelementptr [6 x i8], [6 x i8]* @.str1, i32 0, i32 0 ; <i8*> [#uses=1]
store volatile i32 %tmp96, i32* @var
- %tmp99 = load i32* %i, align 4 ; <i32> [#uses=1]
+ %tmp99 = load i32, i32* %i, align 4 ; <i32> [#uses=1]
%tmp100 = add i32 %tmp99, 1 ; <i32> [#uses=1]
store i32 %tmp100, i32* %i, align 4
br label %bb101
bb101: ; preds = %bb84, %bb83
- %tmp102 = load i32* %i, align 4 ; <i32> [#uses=1]
+ %tmp102 = load i32, i32* %i, align 4 ; <i32> [#uses=1]
%tmp103 = icmp sle i32 %tmp102, 2 ; <i1> [#uses=1]
%tmp103104 = zext i1 %tmp103 to i8 ; <i8> [#uses=1]
%toBool105 = icmp ne i8 %tmp103104, 0 ; <i1> [#uses=1]
@@ -178,6 +178,6 @@ bb106: ; preds = %bb101
br label %return
return: ; preds = %bb106
- %retval107 = load i32* %retval ; <i32> [#uses=1]
+ %retval107 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %retval107
}
diff --git a/test/CodeGen/X86/narrow-shl-cst.ll b/test/CodeGen/X86/narrow-shl-cst.ll
index 40b976014a77..c9e9a3d2a976 100644
--- a/test/CodeGen/X86/narrow-shl-cst.ll
+++ b/test/CodeGen/X86/narrow-shl-cst.ll
@@ -99,3 +99,26 @@ define i64 @test11(i64 %x) nounwind {
; CHECK: xorq $-65536
; CHECK: shlq $33
}
+
+; PR23098
+define i32 @test12(i32 %x, i32* %y) nounwind {
+ %and = shl i32 %x, 1
+ %shl = and i32 %and, 255
+ store i32 %shl, i32* %y
+ ret i32 %shl
+; CHECK-LABEL: test12:
+; CHECK: andl $127
+; CHECK-NEXT: addl
+; CHECK-NOT: shl
+}
+
+define i64 @test13(i64 %x, i64* %y) nounwind {
+ %and = shl i64 %x, 1
+ %shl = and i64 %and, 255
+ store i64 %shl, i64* %y
+ ret i64 %shl
+; CHECK-LABEL: test13:
+; CHECK: andq $127
+; CHECK-NEXT: addq
+; CHECK-NOT: shl
+}
diff --git a/test/CodeGen/X86/narrow-shl-load.ll b/test/CodeGen/X86/narrow-shl-load.ll
index 5175bfc2bcb1..9dc0d749cb23 100644
--- a/test/CodeGen/X86/narrow-shl-load.ll
+++ b/test/CodeGen/X86/narrow-shl-load.ll
@@ -11,7 +11,7 @@ bb.nph:
br label %while.cond
while.cond: ; preds = %while.cond, %bb.nph
- %tmp6 = load i32* undef, align 4
+ %tmp6 = load i32, i32* undef, align 4
%and = or i64 undef, undef
%conv11 = zext i32 undef to i64
%conv14 = zext i32 %tmp6 to i64
@@ -20,7 +20,7 @@ while.cond: ; preds = %while.cond, %bb.nph
%and17 = or i64 %shl15.masked, %conv11
%add = add i64 %and17, 1
%xor = xor i64 %add, %and
- %tmp20 = load i64* undef, align 8
+ %tmp20 = load i64, i64* undef, align 8
%add21 = add i64 %xor, %tmp20
%conv22 = trunc i64 %add21 to i32
store i32 %conv22, i32* undef, align 4
@@ -34,7 +34,7 @@ while.end: ; preds = %while.cond
; PR8757
define i32 @test3(i32 *%P) nounwind ssp {
store volatile i32 128, i32* %P
- %tmp4.pre = load i32* %P
+ %tmp4.pre = load i32, i32* %P
%phitmp = trunc i32 %tmp4.pre to i16
%phitmp13 = shl i16 %phitmp, 8
%phitmp14 = ashr i16 %phitmp13, 8
diff --git a/test/CodeGen/X86/narrow_op-1.ll b/test/CodeGen/X86/narrow_op-1.ll
index 89ae3f1a3353..592ff94c57ba 100644
--- a/test/CodeGen/X86/narrow_op-1.ll
+++ b/test/CodeGen/X86/narrow_op-1.ll
@@ -5,9 +5,9 @@
define void @t1() nounwind optsize ssp {
entry:
- %0 = load i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+ %0 = load i32, i32* bitcast (i16* getelementptr (%struct.bf, %struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
%1 = or i32 %0, 65536
- store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+ store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf, %struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
ret void
; CHECK-LABEL: t1:
@@ -17,9 +17,9 @@ entry:
define void @t2() nounwind optsize ssp {
entry:
- %0 = load i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+ %0 = load i32, i32* bitcast (i16* getelementptr (%struct.bf, %struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
%1 = or i32 %0, 16842752
- store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+ store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf, %struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
ret void
; CHECK-LABEL: t2:
diff --git a/test/CodeGen/X86/negate-add-zero.ll b/test/CodeGen/X86/negate-add-zero.ll
index c961bd091b95..06341dc7ba53 100644
--- a/test/CodeGen/X86/negate-add-zero.ll
+++ b/test/CodeGen/X86/negate-add-zero.ll
@@ -486,8 +486,6 @@ declare void @_ZN7CDSListIP9HingeNodeEC1Eii(%"struct.CDSList<HingeNode*>"*, i32,
declare i8* @_Znwm(i32)
-declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind
-
declare void @_ZdlPv(i8*) nounwind
declare i32 @__gxx_personality_v0(...)
@@ -827,39 +825,39 @@ declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi6ELi0ELi0EEEENT_13
declare void @_ZN21HNodeTranslateRotate311toCartesianEv(%struct.HNodeTranslateRotate3*)
define linkonce void @_ZN21HNodeTranslateRotate36setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3* %this, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv) {
- %1 = getelementptr double* null, i32 -1 ; <double*> [#uses=1]
- %2 = load double* %1, align 8 ; <double> [#uses=1]
- %3 = load double* null, align 8 ; <double> [#uses=2]
- %4 = load double* null, align 8 ; <double> [#uses=2]
- %5 = load double* null, align 8 ; <double> [#uses=3]
- %6 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0 ; <double*> [#uses=0]
- %7 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 1 ; <double*> [#uses=0]
- %8 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 2 ; <double*> [#uses=0]
- %9 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 3 ; <double*> [#uses=0]
- %10 = load double* null, align 8 ; <double> [#uses=2]
+ %1 = getelementptr double, double* null, i32 -1 ; <double*> [#uses=1]
+ %2 = load double, double* %1, align 8 ; <double> [#uses=1]
+ %3 = load double, double* null, align 8 ; <double> [#uses=2]
+ %4 = load double, double* null, align 8 ; <double> [#uses=2]
+ %5 = load double, double* null, align 8 ; <double> [#uses=3]
+ %6 = getelementptr %struct.HNodeTranslateRotate3, %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0 ; <double*> [#uses=0]
+ %7 = getelementptr %struct.HNodeTranslateRotate3, %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 1 ; <double*> [#uses=0]
+ %8 = getelementptr %struct.HNodeTranslateRotate3, %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 2 ; <double*> [#uses=0]
+ %9 = getelementptr %struct.HNodeTranslateRotate3, %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 3 ; <double*> [#uses=0]
+ %10 = load double, double* null, align 8 ; <double> [#uses=2]
%11 = fsub double -0.000000e+00, %10 ; <double> [#uses=1]
- %12 = load double* null, align 8 ; <double> [#uses=2]
- %13 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 1, i32 0, i32 0, i32 0, i32 3 ; <double*> [#uses=1]
- %14 = load double* %13, align 8 ; <double> [#uses=2]
+ %12 = load double, double* null, align 8 ; <double> [#uses=2]
+ %13 = getelementptr %struct.HNodeTranslateRotate3, %struct.HNodeTranslateRotate3* %this, i32 0, i32 1, i32 0, i32 0, i32 0, i32 3 ; <double*> [#uses=1]
+ %14 = load double, double* %13, align 8 ; <double> [#uses=2]
%15 = fsub double -0.000000e+00, %14 ; <double> [#uses=1]
- %16 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 1, i32 0, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
- %17 = load double* %16, align 8 ; <double> [#uses=2]
+ %16 = getelementptr %struct.HNodeTranslateRotate3, %struct.HNodeTranslateRotate3* %this, i32 0, i32 1, i32 0, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %17 = load double, double* %16, align 8 ; <double> [#uses=2]
%18 = fsub double -0.000000e+00, %17 ; <double> [#uses=1]
- %19 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 0 ; <double*> [#uses=0]
- %20 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 3 ; <double*> [#uses=0]
- %21 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 6 ; <double*> [#uses=0]
- %22 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 9 ; <double*> [#uses=0]
- %23 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 1 ; <double*> [#uses=0]
- %24 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 4 ; <double*> [#uses=0]
- %25 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 7 ; <double*> [#uses=0]
- %26 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 10 ; <double*> [#uses=0]
- %27 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 2 ; <double*> [#uses=0]
- %28 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 5 ; <double*> [#uses=0]
- %29 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 8 ; <double*> [#uses=0]
- %30 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 11 ; <double*> [#uses=0]
- %31 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 0 ; <double*> [#uses=0]
- %32 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
- %33 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %19 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>", %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 0 ; <double*> [#uses=0]
+ %20 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>", %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 3 ; <double*> [#uses=0]
+ %21 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>", %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 6 ; <double*> [#uses=0]
+ %22 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>", %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 9 ; <double*> [#uses=0]
+ %23 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>", %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 1 ; <double*> [#uses=0]
+ %24 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>", %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 4 ; <double*> [#uses=0]
+ %25 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>", %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 7 ; <double*> [#uses=0]
+ %26 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>", %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 10 ; <double*> [#uses=0]
+ %27 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>", %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 2 ; <double*> [#uses=0]
+ %28 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>", %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 5 ; <double*> [#uses=0]
+ %29 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>", %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 8 ; <double*> [#uses=0]
+ %30 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>", %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 11 ; <double*> [#uses=0]
+ %31 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 0 ; <double*> [#uses=0]
+ %32 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %33 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
%34 = fmul double %17, %5 ; <double> [#uses=1]
%35 = fadd double 0.000000e+00, %34 ; <double> [#uses=1]
%36 = fadd double 0.000000e+00, 0.000000e+00 ; <double> [#uses=1]
@@ -882,17 +880,17 @@ define linkonce void @_ZN21HNodeTranslateRotate36setVelERK9CDSVectorIdLi1EN3CDS1
%51 = fmul double %35, 2.000000e+00 ; <double> [#uses=1]
%52 = fmul double %42, 2.000000e+00 ; <double> [#uses=1]
%53 = fmul double %50, 2.000000e+00 ; <double> [#uses=1]
- %54 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %54 = getelementptr %struct.HNodeTranslateRotate3, %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
store double %51, double* %54, align 8
- %55 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
+ %55 = getelementptr %struct.HNodeTranslateRotate3, %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 1 ; <double*> [#uses=1]
store double %52, double* %55, align 8
- %56 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %56 = getelementptr %struct.HNodeTranslateRotate3, %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
store double %53, double* %56, align 8
- %57 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 0 ; <%"struct.CDSVector<double,0,CDS::DefaultAlloc>"**> [#uses=1]
+ %57 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >", %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 0 ; <%"struct.CDSVector<double,0,CDS::DefaultAlloc>"**> [#uses=1]
store %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"** %57, align 8
- %58 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 1 ; <i32*> [#uses=1]
+ %58 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >", %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 1 ; <i32*> [#uses=1]
store i32 4, i32* %58, align 4
- %59 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 2 ; <i32*> [#uses=1]
+ %59 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >", %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 2 ; <i32*> [#uses=1]
store i32 3, i32* %59, align 8
unreachable
}
diff --git a/test/CodeGen/X86/negative-subscript.ll b/test/CodeGen/X86/negative-subscript.ll
index 28f7d6b2dbae..f69157551b7a 100644
--- a/test/CodeGen/X86/negative-subscript.ll
+++ b/test/CodeGen/X86/negative-subscript.ll
@@ -5,6 +5,6 @@
define i32 @main() nounwind {
entry:
- store i8* bitcast (i8** getelementptr ([255 x i8*]* @a, i32 0, i32 -2147483624) to i8*), i8** getelementptr ([255 x i8*]* @a, i32 0, i32 16), align 32
+ store i8* bitcast (i8** getelementptr ([255 x i8*], [255 x i8*]* @a, i32 0, i32 -2147483624) to i8*), i8** getelementptr ([255 x i8*], [255 x i8*]* @a, i32 0, i32 16), align 32
ret i32 0
}
diff --git a/test/CodeGen/X86/no-cmov.ll b/test/CodeGen/X86/no-cmov.ll
index e13edf26cad7..8fc0f7075c0e 100644
--- a/test/CodeGen/X86/no-cmov.ll
+++ b/test/CodeGen/X86/no-cmov.ll
@@ -2,7 +2,7 @@
define i32 @test1(i32 %g, i32* %j) {
%tobool = icmp eq i32 %g, 0
- %cmp = load i32* %j, align 4
+ %cmp = load i32, i32* %j, align 4
%retval.0 = select i1 %tobool, i32 1, i32 %cmp
ret i32 %retval.0
diff --git a/test/CodeGen/X86/non-unique-sections.ll b/test/CodeGen/X86/non-unique-sections.ll
new file mode 100644
index 000000000000..e588b9dda98d
--- /dev/null
+++ b/test/CodeGen/X86/non-unique-sections.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -unique-section-names=false | FileCheck %s
+
+; CHECK: .section .text,"ax",@progbits,unique
+; CHECK-NOT: section
+; CHECK: f:
+define void @f() {
+ ret void
+}
+
+; CHECK: .section .text,"ax",@progbits,unique
+; CHECK-NOT: section
+; CHECK: g:
+define void @g() {
+ ret void
+}
diff --git a/test/CodeGen/X86/nontemporal-2.ll b/test/CodeGen/X86/nontemporal-2.ll
index f62f3725d7d8..8c08b3c163c0 100644
--- a/test/CodeGen/X86/nontemporal-2.ll
+++ b/test/CodeGen/X86/nontemporal-2.ll
@@ -1,31 +1,303 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
-
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
; Make sure that we generate non-temporal stores for the test cases below.
+; We use xorps for zeroing, so domain information isn't available anymore.
-define void @test1(<4 x float>* %dst) {
-; CHECK-LABEL: test1:
+define void @test_zero_v4f32(<4 x float>* %dst) {
+; CHECK-LABEL: test_zero_v4f32:
; SSE: movntps
; AVX: vmovntps
store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !1
ret void
}
-define void @test2(<4 x i32>* %dst) {
-; CHECK-LABEL: test2:
+define void @test_zero_v4i32(<4 x i32>* %dst) {
+; CHECK-LABEL: test_zero_v4i32:
; SSE: movntps
; AVX: vmovntps
store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1
ret void
}
-define void @test3(<2 x double>* %dst) {
-; CHECK-LABEL: test3:
+define void @test_zero_v2f64(<2 x double>* %dst) {
+; CHECK-LABEL: test_zero_v2f64:
; SSE: movntps
; AVX: vmovntps
store <2 x double> zeroinitializer, <2 x double>* %dst, align 16, !nontemporal !1
ret void
}
+define void @test_zero_v2i64(<2 x i64>* %dst) {
+; CHECK-LABEL: test_zero_v2i64:
+; SSE: movntps
+; AVX: vmovntps
+ store <2 x i64> zeroinitializer, <2 x i64>* %dst, align 16, !nontemporal !1
+ ret void
+}
+
+define void @test_zero_v8i16(<8 x i16>* %dst) {
+; CHECK-LABEL: test_zero_v8i16:
+; SSE: movntps
+; AVX: vmovntps
+ store <8 x i16> zeroinitializer, <8 x i16>* %dst, align 16, !nontemporal !1
+ ret void
+}
+
+define void @test_zero_v16i8(<16 x i8>* %dst) {
+; CHECK-LABEL: test_zero_v16i8:
+; SSE: movntps
+; AVX: vmovntps
+ store <16 x i8> zeroinitializer, <16 x i8>* %dst, align 16, !nontemporal !1
+ ret void
+}
+
+; And now YMM versions.
+
+define void @test_zero_v8f32(<8 x float>* %dst) {
+; CHECK-LABEL: test_zero_v8f32:
+; AVX: vmovntps %ymm
+ store <8 x float> zeroinitializer, <8 x float>* %dst, align 32, !nontemporal !1
+ ret void
+}
+
+define void @test_zero_v8i32(<8 x i32>* %dst) {
+; CHECK-LABEL: test_zero_v8i32:
+; AVX2: vmovntps %ymm
+ store <8 x i32> zeroinitializer, <8 x i32>* %dst, align 32, !nontemporal !1
+ ret void
+}
+
+define void @test_zero_v4f64(<4 x double>* %dst) {
+; CHECK-LABEL: test_zero_v4f64:
+; AVX: vmovntps %ymm
+ store <4 x double> zeroinitializer, <4 x double>* %dst, align 32, !nontemporal !1
+ ret void
+}
+
+define void @test_zero_v4i64(<4 x i64>* %dst) {
+; CHECK-LABEL: test_zero_v4i64:
+; AVX2: vmovntps %ymm
+ store <4 x i64> zeroinitializer, <4 x i64>* %dst, align 32, !nontemporal !1
+ ret void
+}
+
+define void @test_zero_v16i16(<16 x i16>* %dst) {
+; CHECK-LABEL: test_zero_v16i16:
+; AVX2: vmovntps %ymm
+ store <16 x i16> zeroinitializer, <16 x i16>* %dst, align 32, !nontemporal !1
+ ret void
+}
+
+define void @test_zero_v32i8(<32 x i8>* %dst) {
+; CHECK-LABEL: test_zero_v32i8:
+; AVX2: vmovntps %ymm
+ store <32 x i8> zeroinitializer, <32 x i8>* %dst, align 32, !nontemporal !1
+ ret void
+}
+
+
+; Check that we also handle arguments. Here the type survives longer.
+
+define void @test_arg_v4f32(<4 x float> %arg, <4 x float>* %dst) {
+; CHECK-LABEL: test_arg_v4f32:
+; SSE: movntps
+; AVX: vmovntps
+ store <4 x float> %arg, <4 x float>* %dst, align 16, !nontemporal !1
+ ret void
+}
+
+define void @test_arg_v4i32(<4 x i32> %arg, <4 x i32>* %dst) {
+; CHECK-LABEL: test_arg_v4i32:
+; SSE: movntps
+; AVX: vmovntps
+ store <4 x i32> %arg, <4 x i32>* %dst, align 16, !nontemporal !1
+ ret void
+}
+
+define void @test_arg_v2f64(<2 x double> %arg, <2 x double>* %dst) {
+; CHECK-LABEL: test_arg_v2f64:
+; SSE: movntps
+; AVX: vmovntps
+ store <2 x double> %arg, <2 x double>* %dst, align 16, !nontemporal !1
+ ret void
+}
+
+define void @test_arg_v2i64(<2 x i64> %arg, <2 x i64>* %dst) {
+; CHECK-LABEL: test_arg_v2i64:
+; SSE: movntps
+; AVX: vmovntps
+ store <2 x i64> %arg, <2 x i64>* %dst, align 16, !nontemporal !1
+ ret void
+}
+
+define void @test_arg_v8i16(<8 x i16> %arg, <8 x i16>* %dst) {
+; CHECK-LABEL: test_arg_v8i16:
+; SSE: movntps
+; AVX: vmovntps
+ store <8 x i16> %arg, <8 x i16>* %dst, align 16, !nontemporal !1
+ ret void
+}
+
+define void @test_arg_v16i8(<16 x i8> %arg, <16 x i8>* %dst) {
+; CHECK-LABEL: test_arg_v16i8:
+; SSE: movntps
+; AVX: vmovntps
+ store <16 x i8> %arg, <16 x i8>* %dst, align 16, !nontemporal !1
+ ret void
+}
+
+; And now YMM versions.
+
+define void @test_arg_v8f32(<8 x float> %arg, <8 x float>* %dst) {
+; CHECK-LABEL: test_arg_v8f32:
+; AVX: vmovntps %ymm
+ store <8 x float> %arg, <8 x float>* %dst, align 32, !nontemporal !1
+ ret void
+}
+
+define void @test_arg_v8i32(<8 x i32> %arg, <8 x i32>* %dst) {
+; CHECK-LABEL: test_arg_v8i32:
+; AVX2: vmovntps %ymm
+ store <8 x i32> %arg, <8 x i32>* %dst, align 32, !nontemporal !1
+ ret void
+}
+
+define void @test_arg_v4f64(<4 x double> %arg, <4 x double>* %dst) {
+; CHECK-LABEL: test_arg_v4f64:
+; AVX: vmovntps %ymm
+ store <4 x double> %arg, <4 x double>* %dst, align 32, !nontemporal !1
+ ret void
+}
+
+define void @test_arg_v4i64(<4 x i64> %arg, <4 x i64>* %dst) {
+; CHECK-LABEL: test_arg_v4i64:
+; AVX2: vmovntps %ymm
+ store <4 x i64> %arg, <4 x i64>* %dst, align 32, !nontemporal !1
+ ret void
+}
+
+define void @test_arg_v16i16(<16 x i16> %arg, <16 x i16>* %dst) {
+; CHECK-LABEL: test_arg_v16i16:
+; AVX2: vmovntps %ymm
+ store <16 x i16> %arg, <16 x i16>* %dst, align 32, !nontemporal !1
+ ret void
+}
+
+define void @test_arg_v32i8(<32 x i8> %arg, <32 x i8>* %dst) {
+; CHECK-LABEL: test_arg_v32i8:
+; AVX2: vmovntps %ymm
+ store <32 x i8> %arg, <32 x i8>* %dst, align 32, !nontemporal !1
+ ret void
+}
+
+
+; Now check that if the execution domain is trivially visible, we use it.
+; We use an add to make the type survive all the way to the MOVNT.
+
+define void @test_op_v4f32(<4 x float> %a, <4 x float> %b, <4 x float>* %dst) {
+; CHECK-LABEL: test_op_v4f32:
+; SSE: movntps
+; AVX: vmovntps
+ %r = fadd <4 x float> %a, %b
+ store <4 x float> %r, <4 x float>* %dst, align 16, !nontemporal !1
+ ret void
+}
+
+define void @test_op_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32>* %dst) {
+; CHECK-LABEL: test_op_v4i32:
+; SSE: movntdq
+; AVX: vmovntdq
+ %r = add <4 x i32> %a, %b
+ store <4 x i32> %r, <4 x i32>* %dst, align 16, !nontemporal !1
+ ret void
+}
+
+define void @test_op_v2f64(<2 x double> %a, <2 x double> %b, <2 x double>* %dst) {
+; CHECK-LABEL: test_op_v2f64:
+; SSE: movntpd
+; AVX: vmovntpd
+ %r = fadd <2 x double> %a, %b
+ store <2 x double> %r, <2 x double>* %dst, align 16, !nontemporal !1
+ ret void
+}
+
+define void @test_op_v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64>* %dst) {
+; CHECK-LABEL: test_op_v2i64:
+; SSE: movntdq
+; AVX: vmovntdq
+ %r = add <2 x i64> %a, %b
+ store <2 x i64> %r, <2 x i64>* %dst, align 16, !nontemporal !1
+ ret void
+}
+
+define void @test_op_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16>* %dst) {
+; CHECK-LABEL: test_op_v8i16:
+; SSE: movntdq
+; AVX: vmovntdq
+ %r = add <8 x i16> %a, %b
+ store <8 x i16> %r, <8 x i16>* %dst, align 16, !nontemporal !1
+ ret void
+}
+
+define void @test_op_v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8>* %dst) {
+; CHECK-LABEL: test_op_v16i8:
+; SSE: movntdq
+; AVX: vmovntdq
+ %r = add <16 x i8> %a, %b
+ store <16 x i8> %r, <16 x i8>* %dst, align 16, !nontemporal !1
+ ret void
+}
+
+; And now YMM versions.
+
+define void @test_op_v8f32(<8 x float> %a, <8 x float> %b, <8 x float>* %dst) {
+; CHECK-LABEL: test_op_v8f32:
+; AVX: vmovntps %ymm
+ %r = fadd <8 x float> %a, %b
+ store <8 x float> %r, <8 x float>* %dst, align 32, !nontemporal !1
+ ret void
+}
+
+define void @test_op_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32>* %dst) {
+; CHECK-LABEL: test_op_v8i32:
+; AVX2: vmovntdq %ymm
+ %r = add <8 x i32> %a, %b
+ store <8 x i32> %r, <8 x i32>* %dst, align 32, !nontemporal !1
+ ret void
+}
+
+define void @test_op_v4f64(<4 x double> %a, <4 x double> %b, <4 x double>* %dst) {
+; CHECK-LABEL: test_op_v4f64:
+; AVX: vmovntpd %ymm
+ %r = fadd <4 x double> %a, %b
+ store <4 x double> %r, <4 x double>* %dst, align 32, !nontemporal !1
+ ret void
+}
+
+define void @test_op_v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %dst) {
+; CHECK-LABEL: test_op_v4i64:
+; AVX2: vmovntdq %ymm
+ %r = add <4 x i64> %a, %b
+ store <4 x i64> %r, <4 x i64>* %dst, align 32, !nontemporal !1
+ ret void
+}
+
+define void @test_op_v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16>* %dst) {
+; CHECK-LABEL: test_op_v16i16:
+; AVX2: vmovntdq %ymm
+ %r = add <16 x i16> %a, %b
+ store <16 x i16> %r, <16 x i16>* %dst, align 32, !nontemporal !1
+ ret void
+}
+
+define void @test_op_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8>* %dst) {
+; CHECK-LABEL: test_op_v32i8:
+; AVX2: vmovntdq %ymm
+ %r = add <32 x i8> %a, %b
+ store <32 x i8> %r, <32 x i8>* %dst, align 32, !nontemporal !1
+ ret void
+}
+
!1 = !{i32 1}
diff --git a/test/CodeGen/X86/norex-subreg.ll b/test/CodeGen/X86/norex-subreg.ll
index fb41dede287f..dd47af9ae9ab 100644
--- a/test/CodeGen/X86/norex-subreg.ll
+++ b/test/CodeGen/X86/norex-subreg.ll
@@ -15,17 +15,17 @@ target triple = "x86_64-apple-macosx10.7"
define void @f() nounwind uwtable ssp {
entry:
- %0 = load i32* undef, align 4
+ %0 = load i32, i32* undef, align 4
%add = add i32 0, %0
%conv1 = trunc i32 %add to i16
%bf.value = and i16 %conv1, 255
%1 = and i16 %bf.value, 255
%2 = shl i16 %1, 8
- %3 = load i16* undef, align 1
+ %3 = load i16, i16* undef, align 1
%4 = and i16 %3, 255
%5 = or i16 %4, %2
store i16 %5, i16* undef, align 1
- %6 = load i16* undef, align 1
+ %6 = load i16, i16* undef, align 1
%7 = lshr i16 %6, 8
%bf.clear2 = and i16 %7, 255
%conv3 = zext i16 %bf.clear2 to i32
diff --git a/test/CodeGen/X86/nosse-error1.ll b/test/CodeGen/X86/nosse-error1.ll
index 291379eeaec9..7617d59f4a05 100644
--- a/test/CodeGen/X86/nosse-error1.ll
+++ b/test/CodeGen/X86/nosse-error1.ll
@@ -12,16 +12,16 @@ target triple = "x86_64-unknown-linux-gnu"
define void @test() nounwind {
entry:
- %0 = load float* @f, align 4 ; <float> [#uses=1]
+ %0 = load float, float* @f, align 4 ; <float> [#uses=1]
%1 = tail call float @foo1(float %0) nounwind ; <float> [#uses=1]
store float %1, float* @f, align 4
- %2 = load double* @d, align 8 ; <double> [#uses=1]
+ %2 = load double, double* @d, align 8 ; <double> [#uses=1]
%3 = tail call double @foo2(double %2) nounwind ; <double> [#uses=1]
store double %3, double* @d, align 8
- %4 = load float* @f, align 4 ; <float> [#uses=1]
+ %4 = load float, float* @f, align 4 ; <float> [#uses=1]
%5 = tail call float @foo3(float %4) nounwind ; <float> [#uses=1]
store float %5, float* @f, align 4
- %6 = load double* @d, align 8 ; <double> [#uses=1]
+ %6 = load double, double* @d, align 8 ; <double> [#uses=1]
%7 = tail call double @foo4(double %6) nounwind ; <double> [#uses=1]
store double %7, double* @d, align 8
ret void
diff --git a/test/CodeGen/X86/nosse-error2.ll b/test/CodeGen/X86/nosse-error2.ll
index a7cee2dd8211..3da80aae686f 100644
--- a/test/CodeGen/X86/nosse-error2.ll
+++ b/test/CodeGen/X86/nosse-error2.ll
@@ -12,16 +12,16 @@ target triple = "i386-unknown-linux-gnu"
define void @test() nounwind {
entry:
- %0 = load float* @f, align 4 ; <float> [#uses=1]
+ %0 = load float, float* @f, align 4 ; <float> [#uses=1]
%1 = tail call inreg float @foo1(float inreg %0) nounwind ; <float> [#uses=1]
store float %1, float* @f, align 4
- %2 = load double* @d, align 8 ; <double> [#uses=1]
+ %2 = load double, double* @d, align 8 ; <double> [#uses=1]
%3 = tail call inreg double @foo2(double inreg %2) nounwind ; <double> [#uses=1]
store double %3, double* @d, align 8
- %4 = load float* @f, align 4 ; <float> [#uses=1]
+ %4 = load float, float* @f, align 4 ; <float> [#uses=1]
%5 = tail call inreg float @foo3(float inreg %4) nounwind ; <float> [#uses=1]
store float %5, float* @f, align 4
- %6 = load double* @d, align 8 ; <double> [#uses=1]
+ %6 = load double, double* @d, align 8 ; <double> [#uses=1]
%7 = tail call inreg double @foo4(double inreg %6) nounwind ; <double> [#uses=1]
store double %7, double* @d, align 8
ret void
diff --git a/test/CodeGen/X86/nosse-varargs.ll b/test/CodeGen/X86/nosse-varargs.ll
index e6da0ab5e371..8a81d0e71953 100644
--- a/test/CodeGen/X86/nosse-varargs.ll
+++ b/test/CodeGen/X86/nosse-varargs.ll
@@ -1,42 +1,43 @@
-; RUN: llvm-as < %s > %t
-; RUN: llc -march=x86-64 -mattr=-sse < %t | not grep xmm
-; RUN: llc -march=x86-64 < %t | grep xmm
+; RUN: llc < %s -march=x86-64 -mattr=-sse | FileCheck %s -check-prefix=NOSSE
+; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=YESSSE
; PR3403
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"
%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+; NOSSE-NOT: xmm
+; YESSSE: xmm
define i32 @foo(float %a, i8* nocapture %fmt, ...) nounwind {
entry:
%ap = alloca [1 x %struct.__va_list_tag], align 8 ; <[1 x %struct.__va_list_tag]*> [#uses=4]
%ap12 = bitcast [1 x %struct.__va_list_tag]* %ap to i8* ; <i8*> [#uses=2]
call void @llvm.va_start(i8* %ap12)
- %0 = getelementptr [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 0 ; <i32*> [#uses=2]
- %1 = load i32* %0, align 8 ; <i32> [#uses=3]
+ %0 = getelementptr [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 0 ; <i32*> [#uses=2]
+ %1 = load i32, i32* %0, align 8 ; <i32> [#uses=3]
%2 = icmp ult i32 %1, 48 ; <i1> [#uses=1]
br i1 %2, label %bb, label %bb3
bb: ; preds = %entry
- %3 = getelementptr [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 3 ; <i8**> [#uses=1]
- %4 = load i8** %3, align 8 ; <i8*> [#uses=1]
+ %3 = getelementptr [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 3 ; <i8**> [#uses=1]
+ %4 = load i8*, i8** %3, align 8 ; <i8*> [#uses=1]
%5 = inttoptr i32 %1 to i8* ; <i8*> [#uses=1]
%6 = ptrtoint i8* %5 to i64 ; <i64> [#uses=1]
- %ctg2 = getelementptr i8* %4, i64 %6 ; <i8*> [#uses=1]
+ %ctg2 = getelementptr i8, i8* %4, i64 %6 ; <i8*> [#uses=1]
%7 = add i32 %1, 8 ; <i32> [#uses=1]
store i32 %7, i32* %0, align 8
br label %bb4
bb3: ; preds = %entry
- %8 = getelementptr [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 2 ; <i8**> [#uses=2]
- %9 = load i8** %8, align 8 ; <i8*> [#uses=2]
- %10 = getelementptr i8* %9, i64 8 ; <i8*> [#uses=1]
+ %8 = getelementptr [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 2 ; <i8**> [#uses=2]
+ %9 = load i8*, i8** %8, align 8 ; <i8*> [#uses=2]
+ %10 = getelementptr i8, i8* %9, i64 8 ; <i8*> [#uses=1]
store i8* %10, i8** %8, align 8
br label %bb4
bb4: ; preds = %bb3, %bb
%addr.0.0 = phi i8* [ %ctg2, %bb ], [ %9, %bb3 ] ; <i8*> [#uses=1]
%11 = bitcast i8* %addr.0.0 to i32* ; <i32*> [#uses=1]
- %12 = load i32* %11, align 4 ; <i32> [#uses=1]
+ %12 = load i32, i32* %11, align 4 ; <i32> [#uses=1]
call void @llvm.va_end(i8* %ap12)
ret i32 %12
}
diff --git a/test/CodeGen/X86/null-streamer.ll b/test/CodeGen/X86/null-streamer.ll
index f6eb0e15aabb..3f5abfd40f29 100644
--- a/test/CodeGen/X86/null-streamer.ll
+++ b/test/CodeGen/X86/null-streamer.ll
@@ -14,16 +14,15 @@ define void @f1() {
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!11, !13}
-!0 = !{!"0x11\004\00 \001\00\000\00\000", !1, !2, !2, !3, !9, !2} ; [ DW_TAG_compile_unit ]
-!1 = !{!"", !""}
+!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: " ", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports: !2)
+!1 = !DIFile(filename: "file.c", directory: "")
!2 = !{}
!3 = !{!4}
-!4 = !{!"0x2e\00\00\00\002\000\001\000\006\00256\001\002", !1, !5, !6, null, i32 ()* null, null, null, !2} ; [ DW_TAG_subprogram ]
-!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ]
-!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ]
+!4 = !DISubprogram(name: "", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !1, type: !6, function: i32 ()* null, variables: !2)
+!6 = !DISubroutineType(types: !7)
!7 = !{!8}
-!8 = !{!"0x24\00\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!8 = !DIBasicType(tag: DW_TAG_base_type, size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !{!10}
-!10 = !{!"0x34\00i\00i\00_ZL1i\001\001\001", null, !5, !8, null, null} ; [ DW_TAG_variable ]
+!10 = !DIGlobalVariable(name: "i", linkageName: "_ZL1i", line: 1, isLocal: true, isDefinition: true, scope: null, file: !1, type: !8)
!11 = !{i32 2, !"Dwarf Version", i32 3}
-!13 = !{i32 1, !"Debug Info Version", i32 2}
+!13 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/object-size.ll b/test/CodeGen/X86/object-size.ll
index 0610f0b6de2e..9f42af4aea95 100644
--- a/test/CodeGen/X86/object-size.ll
+++ b/test/CodeGen/X86/object-size.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-darwin10.0"
define void @bar() nounwind ssp {
entry:
- %tmp = load i8** @p ; <i8*> [#uses=1]
+ %tmp = load i8*, i8** @p ; <i8*> [#uses=1]
%0 = call i64 @llvm.objectsize.i64.p0i8(i8* %tmp, i1 0) ; <i64> [#uses=1]
%cmp = icmp ne i64 %0, -1 ; <i1> [#uses=1]
; CHECK: movq $-1, [[RAX:%r..]]
@@ -17,15 +17,15 @@ entry:
br i1 %cmp, label %cond.true, label %cond.false
cond.true: ; preds = %entry
- %tmp1 = load i8** @p ; <i8*> [#uses=1]
- %tmp2 = load i8** @p ; <i8*> [#uses=1]
+ %tmp1 = load i8*, i8** @p ; <i8*> [#uses=1]
+ %tmp2 = load i8*, i8** @p ; <i8*> [#uses=1]
%1 = call i64 @llvm.objectsize.i64.p0i8(i8* %tmp2, i1 1) ; <i64> [#uses=1]
- %call = call i8* @__strcpy_chk(i8* %tmp1, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 %1) ssp ; <i8*> [#uses=1]
+ %call = call i8* @__strcpy_chk(i8* %tmp1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i64 %1) ssp ; <i8*> [#uses=1]
br label %cond.end
cond.false: ; preds = %entry
- %tmp3 = load i8** @p ; <i8*> [#uses=1]
- %call4 = call i8* @__inline_strcpy_chk(i8* %tmp3, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0)) ssp ; <i8*> [#uses=1]
+ %tmp3 = load i8*, i8** @p ; <i8*> [#uses=1]
+ %call4 = call i8* @__inline_strcpy_chk(i8* %tmp3, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0)) ssp ; <i8*> [#uses=1]
br label %cond.end
cond.end: ; preds = %cond.false, %cond.true
@@ -44,12 +44,12 @@ entry:
%__src.addr = alloca i8* ; <i8**> [#uses=2]
store i8* %__dest, i8** %__dest.addr
store i8* %__src, i8** %__src.addr
- %tmp = load i8** %__dest.addr ; <i8*> [#uses=1]
- %tmp1 = load i8** %__src.addr ; <i8*> [#uses=1]
- %tmp2 = load i8** %__dest.addr ; <i8*> [#uses=1]
+ %tmp = load i8*, i8** %__dest.addr ; <i8*> [#uses=1]
+ %tmp1 = load i8*, i8** %__src.addr ; <i8*> [#uses=1]
+ %tmp2 = load i8*, i8** %__dest.addr ; <i8*> [#uses=1]
%0 = call i64 @llvm.objectsize.i64.p0i8(i8* %tmp2, i1 1) ; <i64> [#uses=1]
%call = call i8* @__strcpy_chk(i8* %tmp, i8* %tmp1, i64 %0) ssp ; <i8*> [#uses=1]
store i8* %call, i8** %retval
- %1 = load i8** %retval ; <i8*> [#uses=1]
+ %1 = load i8*, i8** %retval ; <i8*> [#uses=1]
ret i8* %1
}
diff --git a/test/CodeGen/X86/odr_comdat.ll b/test/CodeGen/X86/odr_comdat.ll
deleted file mode 100644
index 547334c045a3..000000000000
--- a/test/CodeGen/X86/odr_comdat.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=X86LINUX
-
-; Checking that a comdat group gets generated correctly for a static member
-; of instantiated C++ templates.
-; see http://sourcery.mentor.com/public/cxx-abi/abi.html#vague-itemplate
-; section 5.2.6 Instantiated templates
-; "Any static member data object is emitted in a COMDAT identified by its mangled
-; name, in any object file with a reference to its name symbol."
-
-; Case 1: variable is not explicitly initialized, and ends up in a .bss section
-; X86LINUX: .section .bss._ZN1CIiE1iE,"aGw",@nobits,_ZN1CIiE1iE,comdat
-@_ZN1CIiE1iE = weak_odr global i32 0, align 4
-
-; Case 2: variable is explicitly initialized, and ends up in a .data section
-; X86LINUX: .section .data._ZN1CIiE1jE,"aGw",@progbits,_ZN1CIiE1jE,comdat
-@_ZN1CIiE1jE = weak_odr global i32 12, align 4
diff --git a/test/CodeGen/X86/opaque-constant-asm.ll b/test/CodeGen/X86/opaque-constant-asm.ll
index dd1cc8ec4839..f5daed8869b9 100644
--- a/test/CodeGen/X86/opaque-constant-asm.ll
+++ b/test/CodeGen/X86/opaque-constant-asm.ll
@@ -8,6 +8,6 @@
define void @test() {
; CHECK: #ASM $16
- call void asm sideeffect "#ASM $0", "n"(i32 ptrtoint (i32* getelementptr inbounds (%struct2* bitcast (%union.anon* getelementptr inbounds (%struct1* null, i32 0, i32 1) to %struct2*), i32 0, i32 2) to i32))
+ call void asm sideeffect "#ASM $0", "n"(i32 ptrtoint (i32* getelementptr inbounds (%struct2, %struct2* bitcast (%union.anon* getelementptr inbounds (%struct1, %struct1* null, i32 0, i32 1) to %struct2*), i32 0, i32 2) to i32))
ret void
}
diff --git a/test/CodeGen/X86/opt-ext-uses.ll b/test/CodeGen/X86/opt-ext-uses.ll
index 72fb38b27dfe..5d05ad9c4544 100644
--- a/test/CodeGen/X86/opt-ext-uses.ll
+++ b/test/CodeGen/X86/opt-ext-uses.ll
@@ -2,7 +2,7 @@
define signext i16 @t() {
entry:
- %tmp180 = load i16* null, align 2 ; <i16> [#uses=3]
+ %tmp180 = load i16, i16* null, align 2 ; <i16> [#uses=3]
%tmp180181 = sext i16 %tmp180 to i32 ; <i32> [#uses=1]
%tmp182 = add i16 %tmp180, 10
%tmp185 = icmp slt i16 %tmp182, 0 ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/optimize-max-0.ll b/test/CodeGen/X86/optimize-max-0.ll
index 981a16a44971..006592aaade2 100644
--- a/test/CodeGen/X86/optimize-max-0.ll
+++ b/test/CodeGen/X86/optimize-max-0.ll
@@ -32,9 +32,9 @@ bb6: ; preds = %bb7, %bb.nph7
%7 = add i32 %x.06, %4
%8 = shl i32 %x.06, 1
%9 = add i32 %6, %8
- %10 = getelementptr i8* %r, i32 %9
- %11 = load i8* %10, align 1
- %12 = getelementptr i8* %j, i32 %7
+ %10 = getelementptr i8, i8* %r, i32 %9
+ %11 = load i8, i8* %10, align 1
+ %12 = getelementptr i8, i8* %j, i32 %7
store i8 %11, i8* %12, align 1
br label %bb7
@@ -103,18 +103,18 @@ bb14: ; preds = %bb15, %bb.nph3
%x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]
%25 = shl i32 %x.12, 2
%26 = add i32 %25, %21
- %27 = getelementptr i8* %r, i32 %26
- %28 = load i8* %27, align 1
+ %27 = getelementptr i8, i8* %r, i32 %26
+ %28 = load i8, i8* %27, align 1
%.sum = add i32 %22, %x.12
- %29 = getelementptr i8* %j, i32 %.sum
+ %29 = getelementptr i8, i8* %j, i32 %.sum
store i8 %28, i8* %29, align 1
%30 = shl i32 %x.12, 2
%31 = or i32 %30, 2
%32 = add i32 %31, %21
- %33 = getelementptr i8* %r, i32 %32
- %34 = load i8* %33, align 1
+ %33 = getelementptr i8, i8* %r, i32 %32
+ %34 = load i8, i8* %33, align 1
%.sum6 = add i32 %23, %x.12
- %35 = getelementptr i8* %j, i32 %.sum6
+ %35 = getelementptr i8, i8* %j, i32 %.sum6
store i8 %34, i8* %35, align 1
br label %bb15
@@ -169,10 +169,10 @@ bb23: ; preds = %bb24, %bb.nph
%y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]
%45 = mul i32 %y.21, %42
%.sum1 = add i32 %45, %43
- %46 = getelementptr i8* %r, i32 %.sum1
+ %46 = getelementptr i8, i8* %r, i32 %.sum1
%47 = mul i32 %y.21, %w
%.sum5 = add i32 %47, %.sum3
- %48 = getelementptr i8* %j, i32 %.sum5
+ %48 = getelementptr i8, i8* %j, i32 %.sum5
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %48, i8* %46, i32 %w, i32 1, i1 false)
br label %bb24
@@ -187,7 +187,7 @@ bb24.bb26_crit_edge: ; preds = %bb24
bb26: ; preds = %bb24.bb26_crit_edge, %bb22
%49 = mul i32 %x, %w
%.sum4 = add i32 %.sum3, %49
- %50 = getelementptr i8* %j, i32 %.sum4
+ %50 = getelementptr i8, i8* %j, i32 %.sum4
%51 = mul i32 %x, %w
%52 = sdiv i32 %51, 2
tail call void @llvm.memset.p0i8.i32(i8* %50, i8 -128, i32 %52, i32 1, i1 false)
@@ -205,9 +205,9 @@ bb.nph11: ; preds = %bb29
bb30: ; preds = %bb31, %bb.nph11
%y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]
%56 = mul i32 %y.310, %54
- %57 = getelementptr i8* %r, i32 %56
+ %57 = getelementptr i8, i8* %r, i32 %56
%58 = mul i32 %y.310, %w
- %59 = getelementptr i8* %j, i32 %58
+ %59 = getelementptr i8, i8* %j, i32 %58
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %59, i8* %57, i32 %w, i32 1, i1 false)
br label %bb31
@@ -221,7 +221,7 @@ bb31.bb33_crit_edge: ; preds = %bb31
bb33: ; preds = %bb31.bb33_crit_edge, %bb29
%60 = mul i32 %x, %w
- %61 = getelementptr i8* %j, i32 %60
+ %61 = getelementptr i8, i8* %j, i32 %60
%62 = mul i32 %x, %w
%63 = sdiv i32 %62, 2
tail call void @llvm.memset.p0i8.i32(i8* %61, i8 -128, i32 %63, i32 1, i1 false)
@@ -257,9 +257,9 @@ bb6: ; preds = %bb7, %bb.nph7
%7 = add i32 %x.06, %4
%8 = shl i32 %x.06, 1
%9 = add i32 %6, %8
- %10 = getelementptr i8* %r, i32 %9
- %11 = load i8* %10, align 1
- %12 = getelementptr i8* %j, i32 %7
+ %10 = getelementptr i8, i8* %r, i32 %9
+ %11 = load i8, i8* %10, align 1
+ %12 = getelementptr i8, i8* %j, i32 %7
store i8 %11, i8* %12, align 1
br label %bb7
@@ -328,18 +328,18 @@ bb14: ; preds = %bb15, %bb.nph3
%x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]
%25 = shl i32 %x.12, 2
%26 = add i32 %25, %21
- %27 = getelementptr i8* %r, i32 %26
- %28 = load i8* %27, align 1
+ %27 = getelementptr i8, i8* %r, i32 %26
+ %28 = load i8, i8* %27, align 1
%.sum = add i32 %22, %x.12
- %29 = getelementptr i8* %j, i32 %.sum
+ %29 = getelementptr i8, i8* %j, i32 %.sum
store i8 %28, i8* %29, align 1
%30 = shl i32 %x.12, 2
%31 = or i32 %30, 2
%32 = add i32 %31, %21
- %33 = getelementptr i8* %r, i32 %32
- %34 = load i8* %33, align 1
+ %33 = getelementptr i8, i8* %r, i32 %32
+ %34 = load i8, i8* %33, align 1
%.sum6 = add i32 %23, %x.12
- %35 = getelementptr i8* %j, i32 %.sum6
+ %35 = getelementptr i8, i8* %j, i32 %.sum6
store i8 %34, i8* %35, align 1
br label %bb15
@@ -394,10 +394,10 @@ bb23: ; preds = %bb24, %bb.nph
%y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]
%45 = mul i32 %y.21, %42
%.sum1 = add i32 %45, %43
- %46 = getelementptr i8* %r, i32 %.sum1
+ %46 = getelementptr i8, i8* %r, i32 %.sum1
%47 = mul i32 %y.21, %w
%.sum5 = add i32 %47, %.sum3
- %48 = getelementptr i8* %j, i32 %.sum5
+ %48 = getelementptr i8, i8* %j, i32 %.sum5
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %48, i8* %46, i32 %w, i32 1, i1 false)
br label %bb24
@@ -412,7 +412,7 @@ bb24.bb26_crit_edge: ; preds = %bb24
bb26: ; preds = %bb24.bb26_crit_edge, %bb22
%49 = mul i32 %x, %w
%.sum4 = add i32 %.sum3, %49
- %50 = getelementptr i8* %j, i32 %.sum4
+ %50 = getelementptr i8, i8* %j, i32 %.sum4
%51 = mul i32 %x, %w
%52 = udiv i32 %51, 2
tail call void @llvm.memset.p0i8.i32(i8* %50, i8 -128, i32 %52, i32 1, i1 false)
@@ -430,9 +430,9 @@ bb.nph11: ; preds = %bb29
bb30: ; preds = %bb31, %bb.nph11
%y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]
%56 = mul i32 %y.310, %54
- %57 = getelementptr i8* %r, i32 %56
+ %57 = getelementptr i8, i8* %r, i32 %56
%58 = mul i32 %y.310, %w
- %59 = getelementptr i8* %j, i32 %58
+ %59 = getelementptr i8, i8* %j, i32 %58
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %59, i8* %57, i32 %w, i32 1, i1 false)
br label %bb31
@@ -446,7 +446,7 @@ bb31.bb33_crit_edge: ; preds = %bb31
bb33: ; preds = %bb31.bb33_crit_edge, %bb29
%60 = mul i32 %x, %w
- %61 = getelementptr i8* %j, i32 %60
+ %61 = getelementptr i8, i8* %j, i32 %60
%62 = mul i32 %x, %w
%63 = udiv i32 %62, 2
tail call void @llvm.memset.p0i8.i32(i8* %61, i8 -128, i32 %63, i32 1, i1 false)
diff --git a/test/CodeGen/X86/optimize-max-1.ll b/test/CodeGen/X86/optimize-max-1.ll
index ad6c24dce009..11e2f9a93a57 100644
--- a/test/CodeGen/X86/optimize-max-1.ll
+++ b/test/CodeGen/X86/optimize-max-1.ll
@@ -13,7 +13,7 @@ entry:
bb: ; preds = %bb, %entry
%i.0 = phi i64 [ 0, %entry ], [ %0, %bb ] ; <i64> [#uses=2]
- %scevgep = getelementptr double* %p, i64 %i.0 ; <double*> [#uses=1]
+ %scevgep = getelementptr double, double* %p, i64 %i.0 ; <double*> [#uses=1]
store double 0.000000e+00, double* %scevgep, align 8
%0 = add i64 %i.0, 1 ; <i64> [#uses=2]
%exitcond = icmp eq i64 %0, %smax ; <i1> [#uses=1]
@@ -31,7 +31,7 @@ entry:
bb: ; preds = %bb, %entry
%i.0 = phi i64 [ 0, %entry ], [ %0, %bb ] ; <i64> [#uses=2]
- %scevgep = getelementptr double* %p, i64 %i.0 ; <double*> [#uses=1]
+ %scevgep = getelementptr double, double* %p, i64 %i.0 ; <double*> [#uses=1]
store double 0.000000e+00, double* %scevgep, align 8
%0 = add i64 %i.0, 1 ; <i64> [#uses=2]
%exitcond = icmp eq i64 %0, %smax ; <i1> [#uses=1]
@@ -49,7 +49,7 @@ entry:
bb: ; preds = %bb, %entry
%i.0 = phi i64 [ 0, %entry ], [ %0, %bb ] ; <i64> [#uses=2]
- %scevgep = getelementptr double* %p, i64 %i.0 ; <double*> [#uses=1]
+ %scevgep = getelementptr double, double* %p, i64 %i.0 ; <double*> [#uses=1]
store double 0.000000e+00, double* %scevgep, align 8
%0 = add i64 %i.0, 1 ; <i64> [#uses=2]
%exitcond = icmp eq i64 %0, %umax ; <i1> [#uses=1]
@@ -67,7 +67,7 @@ entry:
bb: ; preds = %bb, %entry
%i.0 = phi i64 [ 0, %entry ], [ %0, %bb ] ; <i64> [#uses=2]
- %scevgep = getelementptr double* %p, i64 %i.0 ; <double*> [#uses=1]
+ %scevgep = getelementptr double, double* %p, i64 %i.0 ; <double*> [#uses=1]
store double 0.000000e+00, double* %scevgep, align 8
%0 = add i64 %i.0, 1 ; <i64> [#uses=2]
%exitcond = icmp eq i64 %0, %umax ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/optimize-max-2.ll b/test/CodeGen/X86/optimize-max-2.ll
index 10ab831c1e3d..45b542e2267c 100644
--- a/test/CodeGen/X86/optimize-max-2.ll
+++ b/test/CodeGen/X86/optimize-max-2.ll
@@ -19,8 +19,8 @@ entry:
bb4: ; preds = %bb4, %entry
%i.07 = phi i64 [ 0, %entry ], [ %2, %bb4 ] ; <i64> [#uses=2]
- %scevgep = getelementptr double* %p, i64 %i.07 ; <double*> [#uses=2]
- %0 = load double* %scevgep, align 8 ; <double> [#uses=1]
+ %scevgep = getelementptr double, double* %p, i64 %i.07 ; <double*> [#uses=2]
+ %0 = load double, double* %scevgep, align 8 ; <double> [#uses=1]
%1 = fmul double %0, 2.000000e+00 ; <double> [#uses=1]
store double %1, double* %scevgep, align 8
%2 = add i64 %i.07, 1 ; <i64> [#uses=2]
diff --git a/test/CodeGen/X86/optimize-max-3.ll b/test/CodeGen/X86/optimize-max-3.ll
index 1b653736ad3b..71885efbd31f 100644
--- a/test/CodeGen/X86/optimize-max-3.ll
+++ b/test/CodeGen/X86/optimize-max-3.ll
@@ -20,8 +20,8 @@ for.body.preheader: ; preds = %entry
for.body: ; preds = %for.body.preheader, %for.body
%i = phi i64 [ %i.next, %for.body ], [ 0, %for.body.preheader ] ; <i64> [#uses=2]
- %arrayidx = getelementptr double* %p, i64 %i ; <double*> [#uses=2]
- %t4 = load double* %arrayidx ; <double> [#uses=1]
+ %arrayidx = getelementptr double, double* %p, i64 %i ; <double*> [#uses=2]
+ %t4 = load double, double* %arrayidx ; <double> [#uses=1]
%mul = fmul double %t4, 2.200000e+00 ; <double> [#uses=1]
store double %mul, double* %arrayidx
%i.next = add nsw i64 %i, 1 ; <i64> [#uses=2]
diff --git a/test/CodeGen/X86/or-address.ll b/test/CodeGen/X86/or-address.ll
index 6bea864027bd..16b0aa60b689 100644
--- a/test/CodeGen/X86/or-address.ll
+++ b/test/CodeGen/X86/or-address.ll
@@ -26,16 +26,16 @@ bb: ; preds = %bb, %bb.nph
%4 = add i8 %3, %iftmp.0.0 ; <i8> [#uses=1]
%5 = shl i8 %4, 2 ; <i8> [#uses=1]
%6 = zext i8 %5 to i64 ; <i64> [#uses=4]
- %7 = getelementptr inbounds i32* %array, i64 %6 ; <i32*> [#uses=1]
+ %7 = getelementptr inbounds i32, i32* %array, i64 %6 ; <i32*> [#uses=1]
store i32 %r0, i32* %7, align 4
%8 = or i64 %6, 2 ; <i64> [#uses=1]
- %9 = getelementptr inbounds i32* %array, i64 %8 ; <i32*> [#uses=1]
+ %9 = getelementptr inbounds i32, i32* %array, i64 %8 ; <i32*> [#uses=1]
store i32 %r0, i32* %9, align 4
%10 = or i64 %6, 1 ; <i64> [#uses=1]
- %11 = getelementptr inbounds i32* %array, i64 %10 ; <i32*> [#uses=1]
+ %11 = getelementptr inbounds i32, i32* %array, i64 %10 ; <i32*> [#uses=1]
store i32 %r0, i32* %11, align 4
%12 = or i64 %6, 3 ; <i64> [#uses=1]
- %13 = getelementptr inbounds i32* %array, i64 %12 ; <i32*> [#uses=1]
+ %13 = getelementptr inbounds i32, i32* %array, i64 %12 ; <i32*> [#uses=1]
store i32 %r0, i32* %13, align 4
%14 = add nsw i8 %j.010, 1 ; <i8> [#uses=2]
%15 = add i8 %iftmp.0.0, 1 ; <i8> [#uses=1]
@@ -69,16 +69,16 @@ for.body: ; preds = %for.body, %bb.nph
%mul22 = shl i8 %inc.k.addr.1, 4 ; <i8> [#uses=1]
%add23 = add i8 %mul22, %mul ; <i8> [#uses=1]
%idxprom = zext i8 %add23 to i64 ; <i64> [#uses=4]
- %arrayidx = getelementptr inbounds i32* %array, i64 %idxprom ; <i32*> [#uses=1]
+ %arrayidx = getelementptr inbounds i32, i32* %array, i64 %idxprom ; <i32*> [#uses=1]
store i32 %r0, i32* %arrayidx
%add3356 = or i64 %idxprom, 2 ; <i64> [#uses=1]
- %arrayidx36 = getelementptr inbounds i32* %array, i64 %add3356 ; <i32*> [#uses=1]
+ %arrayidx36 = getelementptr inbounds i32, i32* %array, i64 %add3356 ; <i32*> [#uses=1]
store i32 %r0, i32* %arrayidx36
%add4058 = or i64 %idxprom, 1 ; <i64> [#uses=1]
- %arrayidx43 = getelementptr inbounds i32* %array, i64 %add4058 ; <i32*> [#uses=1]
+ %arrayidx43 = getelementptr inbounds i32, i32* %array, i64 %add4058 ; <i32*> [#uses=1]
store i32 %r0, i32* %arrayidx43
%add4760 = or i64 %idxprom, 3 ; <i64> [#uses=1]
- %arrayidx50 = getelementptr inbounds i32* %array, i64 %add4760 ; <i32*> [#uses=1]
+ %arrayidx50 = getelementptr inbounds i32, i32* %array, i64 %add4760 ; <i32*> [#uses=1]
store i32 %r0, i32* %arrayidx50
%inc52 = add nsw i8 %j.065, 1 ; <i8> [#uses=2]
%add = add i8 %cond, 1 ; <i8> [#uses=1]
diff --git a/test/CodeGen/X86/or-branch.ll b/test/CodeGen/X86/or-branch.ll
index 9ebf8901b77c..ae3ed3f8344a 100644
--- a/test/CodeGen/X86/or-branch.ll
+++ b/test/CodeGen/X86/or-branch.ll
@@ -2,14 +2,14 @@
define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind {
entry:
- %tmp = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp = tail call i32 (...) @bar( ) ; <i32> [#uses=0]
%tmp.upgrd.1 = icmp eq i32 %X, 0 ; <i1> [#uses=1]
%tmp3 = icmp slt i32 %Y, 5 ; <i1> [#uses=1]
%tmp4 = or i1 %tmp3, %tmp.upgrd.1 ; <i1> [#uses=1]
br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock
cond_true: ; preds = %entry
- %tmp5 = tail call i32 (...)* @bar( ) ; <i32> [#uses=0]
+ %tmp5 = tail call i32 (...) @bar( ) ; <i32> [#uses=0]
ret void
UnifiedReturnBlock: ; preds = %entry
diff --git a/test/CodeGen/X86/packed_struct.ll b/test/CodeGen/X86/packed_struct.ll
index da6e8f8745fe..c9aeb7deb4f3 100644
--- a/test/CodeGen/X86/packed_struct.ll
+++ b/test/CodeGen/X86/packed_struct.ll
@@ -17,9 +17,9 @@ target triple = "i686-pc-linux-gnu"
define i32 @foo() nounwind {
entry:
- %tmp = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 1) ; <i32> [#uses=1]
- %tmp3 = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 2) ; <i32> [#uses=1]
- %tmp6 = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 3) ; <i32> [#uses=1]
+ %tmp = load i32, i32* getelementptr (%struct.anon, %struct.anon* @foos, i32 0, i32 1) ; <i32> [#uses=1]
+ %tmp3 = load i32, i32* getelementptr (%struct.anon, %struct.anon* @foos, i32 0, i32 2) ; <i32> [#uses=1]
+ %tmp6 = load i32, i32* getelementptr (%struct.anon, %struct.anon* @foos, i32 0, i32 3) ; <i32> [#uses=1]
%tmp4 = add i32 %tmp3, %tmp ; <i32> [#uses=1]
%tmp7 = add i32 %tmp4, %tmp6 ; <i32> [#uses=1]
ret i32 %tmp7
@@ -27,8 +27,8 @@ entry:
define i8 @bar() nounwind {
entry:
- %tmp = load i8* getelementptr ([4 x <{ i32, i8 }>]* @bara, i32 0, i32 0, i32 1) ; <i8> [#uses=1]
- %tmp4 = load i8* getelementptr ([4 x <{ i32, i8 }>]* @bara, i32 0, i32 3, i32 1) ; <i8> [#uses=1]
+ %tmp = load i8, i8* getelementptr ([4 x <{ i32, i8 }>], [4 x <{ i32, i8 }>]* @bara, i32 0, i32 0, i32 1) ; <i8> [#uses=1]
+ %tmp4 = load i8, i8* getelementptr ([4 x <{ i32, i8 }>], [4 x <{ i32, i8 }>]* @bara, i32 0, i32 3, i32 1) ; <i8> [#uses=1]
%tmp5 = add i8 %tmp4, %tmp ; <i8> [#uses=1]
ret i8 %tmp5
}
diff --git a/test/CodeGen/X86/palignr-2.ll b/test/CodeGen/X86/palignr-2.ll
deleted file mode 100644
index 4df9a2284cb7..000000000000
--- a/test/CodeGen/X86/palignr-2.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+ssse3 | FileCheck %s
-; rdar://7341330
-
-@a = global [4 x i32] [i32 4, i32 5, i32 6, i32 7], align 16 ; <[4 x i32]*> [#uses=1]
-@c = common global [4 x i32] zeroinitializer, align 16 ; <[4 x i32]*> [#uses=1]
-@b = global [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 16 ; <[4 x i32]*> [#uses=1]
-
-define void @t1(<2 x i64> %a, <2 x i64> %b) nounwind ssp {
-entry:
-; CHECK-LABEL: t1:
-; palignr $3, %xmm1, %xmm0
- %0 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %a, <2 x i64> %b, i8 24) nounwind readnone
- store <2 x i64> %0, <2 x i64>* bitcast ([4 x i32]* @c to <2 x i64>*), align 16
- ret void
-}
-
-declare <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64>, <2 x i64>, i8) nounwind readnone
-
-define void @t2() nounwind ssp {
-entry:
-; CHECK-LABEL: t2:
-; palignr $4, _b, %xmm0
- %0 = load <2 x i64>* bitcast ([4 x i32]* @b to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1]
- %1 = load <2 x i64>* bitcast ([4 x i32]* @a to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1]
- %2 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %1, <2 x i64> %0, i8 32) nounwind readnone
- store <2 x i64> %2, <2 x i64>* bitcast ([4 x i32]* @c to <2 x i64>*), align 16
- ret void
-}
diff --git a/test/CodeGen/X86/palignr.ll b/test/CodeGen/X86/palignr.ll
index 3efcc2e41215..dfa2cedf45a2 100644
--- a/test/CodeGen/X86/palignr.ll
+++ b/test/CodeGen/X86/palignr.ll
@@ -40,7 +40,9 @@ define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
;
; CHECK-YONAH-LABEL: test3:
; CHECK-YONAH: # BB#0:
-; CHECK-YONAH-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
+; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; CHECK-YONAH-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
+; CHECK-YONAH-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-YONAH-NEXT: retl
%C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 undef, i32 4 >
ret <4 x i32> %C
diff --git a/test/CodeGen/X86/patchpoint-invoke.ll b/test/CodeGen/X86/patchpoint-invoke.ll
index 192cacc908ab..98e9eb3b6a44 100644
--- a/test/CodeGen/X86/patchpoint-invoke.ll
+++ b/test/CodeGen/X86/patchpoint-invoke.ll
@@ -5,8 +5,8 @@
define i64 @patchpoint_invoke(i64 %p1, i64 %p2) {
entry:
; CHECK-LABEL: patchpoint_invoke:
+; CHECK-NEXT: [[FUNC_BEGIN:.L.*]]:
; CHECK-NEXT: .cfi_startproc
-; CHECK: [[FUNC_BEGIN:.L.*]]:
; CHECK: .cfi_lsda 3, [[EXCEPTION_LABEL:.L[^ ]*]]
; CHECK: pushq %rbp
@@ -18,7 +18,7 @@ entry:
; CHECK-NEXT: [[PP_END:.L.*]]:
; CHECK: ret
%resolveCall = inttoptr i64 -559038736 to i8*
- %result = invoke i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 15, i8* %resolveCall, i32 1, i64 %p1, i64 %p2)
+ %result = invoke i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 15, i8* %resolveCall, i32 1, i64 %p1, i64 %p2)
to label %success unwind label %threw
success:
diff --git a/test/CodeGen/X86/patchpoint-webkit_jscc.ll b/test/CodeGen/X86/patchpoint-webkit_jscc.ll
index 5e76bf8d4e60..5c39438b22d6 100644
--- a/test/CodeGen/X86/patchpoint-webkit_jscc.ll
+++ b/test/CodeGen/X86/patchpoint-webkit_jscc.ll
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=corei7 < %s | FileCheck %s
-; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=corei7 -fast-isel -fast-isel-abort < %s | FileCheck %s --check-prefix=FAST
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=corei7 -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefix=FAST
; Test the webkit_jscc calling convention.
; One argument will be passed in register, the other will be pushed on the stack.
@@ -25,9 +25,9 @@ entry:
; FAST: movq %rax, (%rsp)
; FAST: callq
%resolveCall2 = inttoptr i64 -559038736 to i8*
- %result = tail call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 15, i8* %resolveCall2, i32 2, i64 %p4, i64 %p2)
+ %result = tail call webkit_jscc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 15, i8* %resolveCall2, i32 2, i64 %p4, i64 %p2)
%resolveCall3 = inttoptr i64 -559038737 to i8*
- tail call webkit_jscc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 15, i8* %resolveCall3, i32 2, i64 %p4, i64 %result)
+ tail call webkit_jscc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 6, i32 15, i8* %resolveCall3, i32 2, i64 %p4, i64 %result)
ret void
}
@@ -51,7 +51,7 @@ entry:
; FAST-NEXT: movabsq $-559038736, %r11
; FAST-NEXT: callq *%r11
%call = inttoptr i64 -559038736 to i8*
- %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 15, i8* %call, i32 6, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6)
+ %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 7, i32 15, i8* %call, i32 6, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6)
ret i64 %result
}
@@ -79,7 +79,7 @@ entry:
; FAST-NEXT: movabsq $-559038736, %r11
; FAST-NEXT: callq *%r11
%call = inttoptr i64 -559038736 to i8*
- %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 15, i8* %call, i32 10, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6, i32 undef, i32 8, i32 undef, i64 10)
+ %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 7, i32 15, i8* %call, i32 10, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6, i32 undef, i32 8, i32 undef, i64 10)
ret i64 %result
}
diff --git a/test/CodeGen/X86/patchpoint.ll b/test/CodeGen/X86/patchpoint.ll
index 07148f0329a2..82b15c36c2b2 100644
--- a/test/CodeGen/X86/patchpoint.ll
+++ b/test/CodeGen/X86/patchpoint.ll
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=corei7 < %s | FileCheck %s
-; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=corei7 -fast-isel -fast-isel-abort < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=corei7 -fast-isel -fast-isel-abort=1 < %s | FileCheck %s
; Trivial patchpoint codegen
;
@@ -15,12 +15,28 @@ entry:
; CHECK: movq %[[REG]], %rax
; CHECK: ret
%resolveCall2 = inttoptr i64 -559038736 to i8*
- %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 15, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+ %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 15, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
%resolveCall3 = inttoptr i64 -559038737 to i8*
- tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 3, i32 15, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
+ tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 3, i32 15, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
ret i64 %result
}
+; Trivial symbolic patchpoint codegen.
+;
+
+declare i64 @foo(i64 %p1, i64 %p2)
+define i64 @trivial_symbolic_patchpoint_codegen(i64 %p1, i64 %p2) {
+entry:
+; CHECK-LABEL: trivial_symbolic_patchpoint_codegen:
+; CHECK: movabsq $_foo, %r11
+; CHECK-NEXT: callq *%r11
+; CHECK-NEXT: xchgw %ax, %ax
+; CHECK: retq
+ %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 9, i32 15, i8* bitcast (i64 (i64, i64)* @foo to i8*), i32 2, i64 %p1, i64 %p2)
+ ret i64 %result
+}
+
+
; Caller frame metadata with stackmaps. This should not be optimized
; as a leaf function.
;
@@ -35,7 +51,7 @@ entry:
store i64 11, i64* %metadata
store i64 12, i64* %metadata
store i64 13, i64* %metadata
- call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
+ call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
ret void
}
@@ -47,15 +63,15 @@ define i64 @testLowerConstant(i64 %arg, i64 %tmp2, i64 %tmp10, i64* %tmp33, i64
entry:
%tmp80 = add i64 %tmp79, -16
%tmp81 = inttoptr i64 %tmp80 to i64*
- %tmp82 = load i64* %tmp81, align 8
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 5, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
- tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 15, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
- %tmp83 = load i64* %tmp33, align 8
+ %tmp82 = load i64, i64* %tmp81, align 8
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 14, i32 5, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
+ tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 15, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
+ %tmp83 = load i64, i64* %tmp33, align 8
%tmp84 = add i64 %tmp83, -24
%tmp85 = inttoptr i64 %tmp84 to i64*
- %tmp86 = load i64* %tmp85, align 8
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 17, i32 5, i64 %arg, i64 %tmp10, i64 %tmp86)
- tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 18, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
+ %tmp86 = load i64, i64* %tmp85, align 8
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 17, i32 5, i64 %arg, i64 %tmp10, i64 %tmp86)
+ tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 18, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
ret i64 10
}
@@ -67,7 +83,7 @@ entry:
; CHECK: nopl 8(%rax,%rax)
; CHECK-NEXT: popq
; CHECK-NEXT: ret
- %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 5, i8* null, i32 2, i64 %p1, i64 %p2)
+ %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 5, i8* null, i32 2, i64 %p1, i64 %p2)
ret void
}
@@ -78,7 +94,7 @@ entry:
; CHECK: movabsq $6153737369414576827, %r11
; CHECK-NEXT: callq *%r11
%resolveCall2 = inttoptr i64 6153737369414576827 to i8*
- %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 15, i8* %resolveCall2, i32 0)
+ %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 2, i32 15, i8* %resolveCall2, i32 0)
ret i64 %result
}
diff --git a/test/CodeGen/X86/peep-test-0.ll b/test/CodeGen/X86/peep-test-0.ll
index e521d8e37854..1772f008b94e 100644
--- a/test/CodeGen/X86/peep-test-0.ll
+++ b/test/CodeGen/X86/peep-test-0.ll
@@ -9,8 +9,8 @@ entry:
bb:
%indvar = phi i64 [ %n, %entry ], [ %indvar.next, %bb ]
%i.03 = add i64 %indvar, %n
- %0 = getelementptr double* %d, i64 %i.03
- %1 = load double* %0, align 8
+ %0 = getelementptr double, double* %d, i64 %i.03
+ %1 = load double, double* %0, align 8
%2 = fmul double %1, 3.000000e+00
store double %2, double* %0, align 8
%indvar.next = add i64 %indvar, 1
diff --git a/test/CodeGen/X86/peep-test-1.ll b/test/CodeGen/X86/peep-test-1.ll
index f83f0f6aa6ff..7448da3894da 100644
--- a/test/CodeGen/X86/peep-test-1.ll
+++ b/test/CodeGen/X86/peep-test-1.ll
@@ -9,8 +9,8 @@ define void @foo(i32 %n, double* nocapture %p) nounwind {
bb:
%indvar = phi i32 [ 0, %0 ], [ %indvar.next, %bb ]
%i.03 = sub i32 %n, %indvar
- %1 = getelementptr double* %p, i32 %i.03
- %2 = load double* %1, align 4
+ %1 = getelementptr double, double* %p, i32 %i.03
+ %2 = load double, double* %1, align 4
%3 = fmul double %2, 2.930000e+00
store double %3, double* %1, align 4
%4 = add i32 %i.03, -1
diff --git a/test/CodeGen/X86/peephole-fold-movsd.ll b/test/CodeGen/X86/peephole-fold-movsd.ll
index 09d9328815da..818040a6f02c 100644
--- a/test/CodeGen/X86/peephole-fold-movsd.ll
+++ b/test/CodeGen/X86/peephole-fold-movsd.ll
@@ -17,10 +17,10 @@ define void @foo1(double %a.coerce0, double %a.coerce1, double %b.coerce0, doubl
%1 = alloca <2 x double>, align 16
%tmpcast = bitcast <2 x double>* %1 to %struct.S1*
call void @foo3(%struct.S1* %tmpcast) #2
- %p2 = getelementptr inbounds %struct.S1* %tmpcast, i64 0, i32 0
- %2 = load double* %p2, align 16
- %p3 = getelementptr inbounds %struct.S1* %tmpcast, i64 0, i32 1
- %3 = load double* %p3, align 8
+ %p2 = getelementptr inbounds %struct.S1, %struct.S1* %tmpcast, i64 0, i32 0
+ %2 = load double, double* %p2, align 16
+ %p3 = getelementptr inbounds %struct.S1, %struct.S1* %tmpcast, i64 0, i32 1
+ %3 = load double, double* %p3, align 8
%4 = insertelement <2 x double> undef, double %2, i32 0
%5 = insertelement <2 x double> %4, double 0.000000e+00, i32 1
%6 = insertelement <2 x double> undef, double %3, i32 1
diff --git a/test/CodeGen/X86/peephole-multiple-folds.ll b/test/CodeGen/X86/peephole-multiple-folds.ll
index a6cec66c73c9..9fcc1a20798b 100644
--- a/test/CodeGen/X86/peephole-multiple-folds.ll
+++ b/test/CodeGen/X86/peephole-multiple-folds.ll
@@ -13,8 +13,8 @@ loopbody:
; CHECK: vfmadd231ps ({{%rsi|%rdx}}),
%vsum1 = phi <8 x float> [ %vsum1.next, %loopbody ], [ zeroinitializer, %entry ]
%vsum2 = phi <8 x float> [ %vsum2.next, %loopbody ], [ zeroinitializer, %entry ]
- %m1 = load <8 x float>* %p1, align 1
- %m2 = load <8 x float>* %p2, align 1
+ %m1 = load <8 x float>, <8 x float>* %p1, align 1
+ %m2 = load <8 x float>, <8 x float>* %p2, align 1
%vsum1.next = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %m1, <8 x float> zeroinitializer, <8 x float> %vsum1)
%vsum2.next = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %m2, <8 x float> zeroinitializer, <8 x float> %vsum2)
%vsum1.next.1 = extractelement <8 x float> %vsum1.next, i32 0
diff --git a/test/CodeGen/X86/phi-bit-propagation.ll b/test/CodeGen/X86/phi-bit-propagation.ll
index 94c97229b092..37f3f096556f 100644
--- a/test/CodeGen/X86/phi-bit-propagation.ll
+++ b/test/CodeGen/X86/phi-bit-propagation.ll
@@ -14,11 +14,11 @@ for.cond: ; preds = %for.inc, %entry
br i1 %cmp, label %return, label %for.body
for.body: ; preds = %for.cond
- %arrayidx = getelementptr inbounds i32* %b, i64 %conv
- %tmp5 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %conv
+ %tmp5 = load i32, i32* %arrayidx, align 4
%conv6 = zext i32 %tmp5 to i64
%rem.i.i.i.i = and i64 %conv6, 63
- %tmp3.i = load i64* %tmp.i.i.i.i, align 8
+ %tmp3.i = load i64, i64* %tmp.i.i.i.i, align 8
%shl.i.i = shl i64 1, %rem.i.i.i.i
%and.i = and i64 %shl.i.i, %tmp3.i
%cmp.i = icmp eq i64 %and.i, 0
diff --git a/test/CodeGen/X86/phielim-split.ll b/test/CodeGen/X86/phielim-split.ll
index aa477359d60e..423ef0486ac7 100644
--- a/test/CodeGen/X86/phielim-split.ll
+++ b/test/CodeGen/X86/phielim-split.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -no-phi-elim-live-out-early-exit | FileCheck %s
target triple = "x86_64-apple-macosx10.8.0"
; The critical edge from for.cond to if.end2 should be split to avoid injecting
@@ -18,8 +18,8 @@ entry:
for.cond: ; preds = %entry, %for.cond
%p.addr.0 = phi i8* [ %incdec.ptr, %for.cond ], [ %p, %entry ]
- %incdec.ptr = getelementptr inbounds i8* %p.addr.0, i64 1
- %0 = load i8* %p.addr.0, align 1
+ %incdec.ptr = getelementptr inbounds i8, i8* %p.addr.0, i64 1
+ %0 = load i8, i8* %p.addr.0, align 1
%tobool = icmp eq i8 %0, 0
br i1 %tobool, label %for.cond, label %if.end2
@@ -28,3 +28,40 @@ if.end2: ; preds = %for.cond, %entry
%add = add nsw i32 %r.0, %b
ret i32 %add
}
+
+; CHECK: split_live_out
+; CHECK: %while.body
+; CHECK: cmp
+; CHECK-NEXT: ja
+define i8* @split_live_out(i32 %value, i8* %target) nounwind uwtable readonly ssp {
+entry:
+ %cmp10 = icmp ugt i32 %value, 127
+ br i1 %cmp10, label %while.body.preheader, label %while.end
+
+while.body.preheader: ; preds = %entry
+ br label %while.body
+
+while.body: ; preds = %while.body.preheader, %while.body
+ %target.addr.012 = phi i8* [ %incdec.ptr, %while.body ], [ %target, %while.body.preheader ]
+ %value.addr.011 = phi i32 [ %shr, %while.body ], [ %value, %while.body.preheader ]
+ %or = or i32 %value.addr.011, 128
+ %conv = trunc i32 %or to i8
+ store i8 %conv, i8* %target.addr.012, align 1
+ %shr = lshr i32 %value.addr.011, 7
+ %incdec.ptr = getelementptr inbounds i8, i8* %target.addr.012, i64 1
+ %cmp = icmp ugt i32 %value.addr.011, 16383
+ br i1 %cmp, label %while.body, label %while.end.loopexit
+
+while.end.loopexit: ; preds = %while.body
+ %incdec.ptr.lcssa = phi i8* [ %incdec.ptr, %while.body ]
+ %shr.lcssa = phi i32 [ %shr, %while.body ]
+ br label %while.end
+
+while.end: ; preds = %while.end.loopexit, %entry
+ %target.addr.0.lcssa = phi i8* [ %target, %entry ], [ %incdec.ptr.lcssa, %while.end.loopexit ]
+ %value.addr.0.lcssa = phi i32 [ %value, %entry ], [ %shr.lcssa, %while.end.loopexit ]
+ %conv1 = trunc i32 %value.addr.0.lcssa to i8
+ store i8 %conv1, i8* %target.addr.0.lcssa, align 1
+ %incdec.ptr3 = getelementptr inbounds i8, i8* %target.addr.0.lcssa, i64 1
+ ret i8* %incdec.ptr3
+}
diff --git a/test/CodeGen/X86/phys-reg-local-regalloc.ll b/test/CodeGen/X86/phys-reg-local-regalloc.ll
index 37eca1ce0a72..a0adba0f8338 100644
--- a/test/CodeGen/X86/phys-reg-local-regalloc.ll
+++ b/test/CodeGen/X86/phys-reg-local-regalloc.ll
@@ -50,15 +50,15 @@ entry:
store i32 %asmtmp2, i32* %"%eax"
%3 = call i32 asm "", "={ax}"() nounwind ; <i32> [#uses=1]
call void asm sideeffect alignstack "movl $0, $1", "{eax},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %3, i32* %result) nounwind
- %4 = load i32* %result, align 4 ; <i32> [#uses=1]
- %5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 %4) nounwind ; <i32> [#uses=0]
+ %4 = load i32, i32* %result, align 4 ; <i32> [#uses=1]
+ %5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i32 0, i32 0), i32 %4) nounwind ; <i32> [#uses=0]
store i32 0, i32* %0, align 4
- %6 = load i32* %0, align 4 ; <i32> [#uses=1]
+ %6 = load i32, i32* %0, align 4 ; <i32> [#uses=1]
store i32 %6, i32* %retval, align 4
br label %return
return: ; preds = %entry
- %retval3 = load i32* %retval ; <i32> [#uses=1]
+ %retval3 = load i32, i32* %retval ; <i32> [#uses=1]
ret i32 %retval3
}
diff --git a/test/CodeGen/X86/phys_subreg_coalesce-2.ll b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
index 02c519fb3e47..8ee97ae07e65 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce-2.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc -no-phi-elim-live-out-early-exit -terminal-rule < %s -march=x86 | FileCheck %s
; PR2659
define i32 @binomial(i32 %n, i32 %k) nounwind {
diff --git a/test/CodeGen/X86/phys_subreg_coalesce-3.ll b/test/CodeGen/X86/phys_subreg_coalesce-3.ll
index 12a3adfdfe98..74e3d1291c05 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce-3.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce-3.ll
@@ -25,11 +25,11 @@ bb: ; preds = %bb, %bb.nph
%indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
%j.06 = sub i32 %j.03, %indvar ; <i32> [#uses=1]
%tmp11 = sub i32 %tmp10, %indvar ; <i32> [#uses=1]
- %scevgep = getelementptr i32* %ptr, i32 %tmp11 ; <i32*> [#uses=1]
- %1 = load i32* %scevgep, align 4 ; <i32> [#uses=1]
+ %scevgep = getelementptr i32, i32* %ptr, i32 %tmp11 ; <i32*> [#uses=1]
+ %1 = load i32, i32* %scevgep, align 4 ; <i32> [#uses=1]
%2 = ashr i32 %j.06, %shifts ; <i32> [#uses=1]
%3 = and i32 %2, 65535 ; <i32> [#uses=1]
- %4 = getelementptr inbounds i32* %quadrant, i32 %1 ; <i32*> [#uses=1]
+ %4 = getelementptr inbounds i32, i32* %quadrant, i32 %1 ; <i32*> [#uses=1]
store i32 %3, i32* %4, align 4
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next, %bbSize ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
index da1e2248065f..d543deb804d1 100644
--- a/test/CodeGen/X86/pic.ll
+++ b/test/CodeGen/X86/pic.ll
@@ -7,7 +7,7 @@
define void @test0() nounwind {
entry:
store i32* @dst, i32** @ptr
- %tmp.s = load i32* @src
+ %tmp.s = load i32, i32* @src
store i32 %tmp.s, i32* @dst
ret void
@@ -29,7 +29,7 @@ entry:
define void @test1() nounwind {
entry:
store i32* @dst2, i32** @ptr2
- %tmp.s = load i32* @src2
+ %tmp.s = load i32, i32* @src2
store i32 %tmp.s, i32* @dst2
ret void
@@ -69,10 +69,10 @@ entry:
define void @test3() nounwind {
entry:
- %tmp = call void(...)*(...)* @afoo()
+ %tmp = call void(...)*(...) @afoo()
store void(...)* %tmp, void(...)** @pfoo
- %tmp1 = load void(...)** @pfoo
- call void(...)* %tmp1()
+ %tmp1 = load void(...)*, void(...)** @pfoo
+ call void(...) %tmp1()
ret void
; LINUX-LABEL: test3:
; LINUX: calll .L3$pb
@@ -88,7 +88,7 @@ declare void(...)* @afoo(...)
define void @test4() nounwind {
entry:
- call void(...)* @foo()
+ call void(...) @foo()
ret void
; LINUX-LABEL: test4:
; LINUX: calll .L4$pb
@@ -107,7 +107,7 @@ declare void @foo(...)
define void @test5() nounwind {
entry:
store i32* @dst6, i32** @ptr6
- %tmp.s = load i32* @src6
+ %tmp.s = load i32, i32* @src6
store i32 %tmp.s, i32* @dst6
ret void
@@ -146,43 +146,43 @@ define void @test7(i32 %n.u) nounwind {
entry:
switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
bb:
- tail call void(...)* @foo1()
+ tail call void(...) @foo1()
ret void
bb1:
- tail call void(...)* @foo2()
+ tail call void(...) @foo2()
ret void
bb2:
- tail call void(...)* @foo6()
+ tail call void(...) @foo6()
ret void
bb3:
- tail call void(...)* @foo3()
+ tail call void(...) @foo3()
ret void
bb4:
- tail call void(...)* @foo4()
+ tail call void(...) @foo4()
ret void
bb5:
- tail call void(...)* @foo5()
+ tail call void(...) @foo5()
ret void
bb6:
- tail call void(...)* @foo1()
+ tail call void(...) @foo1()
ret void
bb7:
- tail call void(...)* @foo2()
+ tail call void(...) @foo2()
ret void
bb8:
- tail call void(...)* @foo6()
+ tail call void(...) @foo6()
ret void
bb9:
- tail call void(...)* @foo3()
+ tail call void(...) @foo3()
ret void
bb10:
- tail call void(...)* @foo4()
+ tail call void(...) @foo4()
ret void
bb11:
- tail call void(...)* @foo5()
+ tail call void(...) @foo5()
ret void
bb12:
- tail call void(...)* @foo6()
+ tail call void(...) @foo6()
ret void
; LINUX-LABEL: test7:
diff --git a/test/CodeGen/X86/pic_jumptable.ll b/test/CodeGen/X86/pic_jumptable.ll
index bdd885935842..8c1992a24ece 100644
--- a/test/CodeGen/X86/pic_jumptable.ll
+++ b/test/CodeGen/X86/pic_jumptable.ll
@@ -10,7 +10,7 @@
declare void @_Z3bari(i32)
-; CHECK-LINUX: .text._Z3fooILi1EEvi,"axG",@progbits,_Z3fooILi1EEvi,comdat
+; CHECK-LINUX: _Z3fooILi1EEvi:
define linkonce void @_Z3fooILi1EEvi(i32 %Y) nounwind {
entry:
; CHECK: L0$pb
@@ -31,7 +31,7 @@ entry:
%Y_addr = alloca i32 ; <i32*> [#uses=2]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store i32 %Y, i32* %Y_addr
- %tmp = load i32* %Y_addr ; <i32> [#uses=1]
+ %tmp = load i32, i32* %Y_addr ; <i32> [#uses=1]
switch i32 %tmp, label %bb10 [
i32 0, label %bb3
i32 1, label %bb
@@ -55,13 +55,15 @@ entry:
]
bb: ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
+ call void @_Z3bari( i32 0 )
br label %bb1
bb1: ; preds = %bb, %entry
+ call void @_Z3bari( i32 1 )
br label %bb2
bb2: ; preds = %bb1, %entry
- call void @_Z3bari( i32 1 )
+ call void @_Z3bari( i32 2 )
br label %bb11
bb3: ; preds = %entry
diff --git a/test/CodeGen/X86/pmovext.ll b/test/CodeGen/X86/pmovext.ll
index f0e468f53cb3..6c76949fb78b 100644
--- a/test/CodeGen/X86/pmovext.ll
+++ b/test/CodeGen/X86/pmovext.ll
@@ -8,7 +8,7 @@
;CHECK-NEXT: ret
define void @intrin_pmov(i16* noalias %dest, i8* noalias %src) nounwind uwtable ssp {
%1 = bitcast i8* %src to <2 x i64>*
- %2 = load <2 x i64>* %1, align 16
+ %2 = load <2 x i64>, <2 x i64>* %1, align 16
%3 = bitcast <2 x i64> %2 to <16 x i8>
%4 = tail call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %3) nounwind
%5 = bitcast i16* %dest to i8*
diff --git a/test/CodeGen/X86/pmovsx-inreg.ll b/test/CodeGen/X86/pmovsx-inreg.ll
index 07979f61ddd8..2897d6bd814e 100644
--- a/test/CodeGen/X86/pmovsx-inreg.ll
+++ b/test/CodeGen/X86/pmovsx-inreg.ll
@@ -6,7 +6,7 @@
; These tests inject a store into the chain to test the inreg versions of pmovsx
define void @test1(<2 x i8>* %in, <2 x i64>* %out) nounwind {
- %wide.load35 = load <2 x i8>* %in, align 1
+ %wide.load35 = load <2 x i8>, <2 x i8>* %in, align 1
%sext = sext <2 x i8> %wide.load35 to <2 x i64>
store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
store <2 x i64> %sext, <2 x i64>* %out, align 8
@@ -23,7 +23,7 @@ define void @test1(<2 x i8>* %in, <2 x i64>* %out) nounwind {
}
define void @test2(<4 x i8>* %in, <4 x i64>* %out) nounwind {
- %wide.load35 = load <4 x i8>* %in, align 1
+ %wide.load35 = load <4 x i8>, <4 x i8>* %in, align 1
%sext = sext <4 x i8> %wide.load35 to <4 x i64>
store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
store <4 x i64> %sext, <4 x i64>* %out, align 8
@@ -34,7 +34,7 @@ define void @test2(<4 x i8>* %in, <4 x i64>* %out) nounwind {
}
define void @test3(<4 x i8>* %in, <4 x i32>* %out) nounwind {
- %wide.load35 = load <4 x i8>* %in, align 1
+ %wide.load35 = load <4 x i8>, <4 x i8>* %in, align 1
%sext = sext <4 x i8> %wide.load35 to <4 x i32>
store <4 x i32> zeroinitializer, <4 x i32>* undef, align 8
store <4 x i32> %sext, <4 x i32>* %out, align 8
@@ -51,7 +51,7 @@ define void @test3(<4 x i8>* %in, <4 x i32>* %out) nounwind {
}
define void @test4(<8 x i8>* %in, <8 x i32>* %out) nounwind {
- %wide.load35 = load <8 x i8>* %in, align 1
+ %wide.load35 = load <8 x i8>, <8 x i8>* %in, align 1
%sext = sext <8 x i8> %wide.load35 to <8 x i32>
store <8 x i32> zeroinitializer, <8 x i32>* undef, align 8
store <8 x i32> %sext, <8 x i32>* %out, align 8
@@ -62,7 +62,7 @@ define void @test4(<8 x i8>* %in, <8 x i32>* %out) nounwind {
}
define void @test5(<8 x i8>* %in, <8 x i16>* %out) nounwind {
- %wide.load35 = load <8 x i8>* %in, align 1
+ %wide.load35 = load <8 x i8>, <8 x i8>* %in, align 1
%sext = sext <8 x i8> %wide.load35 to <8 x i16>
store <8 x i16> zeroinitializer, <8 x i16>* undef, align 8
store <8 x i16> %sext, <8 x i16>* %out, align 8
@@ -79,7 +79,7 @@ define void @test5(<8 x i8>* %in, <8 x i16>* %out) nounwind {
}
define void @test6(<16 x i8>* %in, <16 x i16>* %out) nounwind {
- %wide.load35 = load <16 x i8>* %in, align 1
+ %wide.load35 = load <16 x i8>, <16 x i8>* %in, align 1
%sext = sext <16 x i8> %wide.load35 to <16 x i16>
store <16 x i16> zeroinitializer, <16 x i16>* undef, align 8
store <16 x i16> %sext, <16 x i16>* %out, align 8
@@ -90,7 +90,7 @@ define void @test6(<16 x i8>* %in, <16 x i16>* %out) nounwind {
}
define void @test7(<2 x i16>* %in, <2 x i64>* %out) nounwind {
- %wide.load35 = load <2 x i16>* %in, align 1
+ %wide.load35 = load <2 x i16>, <2 x i16>* %in, align 1
%sext = sext <2 x i16> %wide.load35 to <2 x i64>
store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
store <2 x i64> %sext, <2 x i64>* %out, align 8
@@ -108,7 +108,7 @@ define void @test7(<2 x i16>* %in, <2 x i64>* %out) nounwind {
}
define void @test8(<4 x i16>* %in, <4 x i64>* %out) nounwind {
- %wide.load35 = load <4 x i16>* %in, align 1
+ %wide.load35 = load <4 x i16>, <4 x i16>* %in, align 1
%sext = sext <4 x i16> %wide.load35 to <4 x i64>
store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
store <4 x i64> %sext, <4 x i64>* %out, align 8
@@ -119,7 +119,7 @@ define void @test8(<4 x i16>* %in, <4 x i64>* %out) nounwind {
}
define void @test9(<4 x i16>* %in, <4 x i32>* %out) nounwind {
- %wide.load35 = load <4 x i16>* %in, align 1
+ %wide.load35 = load <4 x i16>, <4 x i16>* %in, align 1
%sext = sext <4 x i16> %wide.load35 to <4 x i32>
store <4 x i32> zeroinitializer, <4 x i32>* undef, align 8
store <4 x i32> %sext, <4 x i32>* %out, align 8
@@ -136,7 +136,7 @@ define void @test9(<4 x i16>* %in, <4 x i32>* %out) nounwind {
}
define void @test10(<8 x i16>* %in, <8 x i32>* %out) nounwind {
- %wide.load35 = load <8 x i16>* %in, align 1
+ %wide.load35 = load <8 x i16>, <8 x i16>* %in, align 1
%sext = sext <8 x i16> %wide.load35 to <8 x i32>
store <8 x i32> zeroinitializer, <8 x i32>* undef, align 8
store <8 x i32> %sext, <8 x i32>* %out, align 8
@@ -147,7 +147,7 @@ define void @test10(<8 x i16>* %in, <8 x i32>* %out) nounwind {
}
define void @test11(<2 x i32>* %in, <2 x i64>* %out) nounwind {
- %wide.load35 = load <2 x i32>* %in, align 1
+ %wide.load35 = load <2 x i32>, <2 x i32>* %in, align 1
%sext = sext <2 x i32> %wide.load35 to <2 x i64>
store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
store <2 x i64> %sext, <2 x i64>* %out, align 8
@@ -164,7 +164,7 @@ define void @test11(<2 x i32>* %in, <2 x i64>* %out) nounwind {
}
define void @test12(<4 x i32>* %in, <4 x i64>* %out) nounwind {
- %wide.load35 = load <4 x i32>* %in, align 1
+ %wide.load35 = load <4 x i32>, <4 x i32>* %in, align 1
%sext = sext <4 x i32> %wide.load35 to <4 x i64>
store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
store <4 x i64> %sext, <4 x i64>* %out, align 8
diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll
index 8937d6afa0ae..21463b8539dc 100644
--- a/test/CodeGen/X86/pmul.ll
+++ b/test/CodeGen/X86/pmul.ll
@@ -1,18 +1,68 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE41
+define <16 x i8> @mul8c(<16 x i8> %i) nounwind {
+; SSE2-LABEL: mul8c:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
+; SSE2-NEXT: psraw $8, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSE2-NEXT: psraw $8, %xmm2
+; SSE2-NEXT: pmullw %xmm1, %xmm2
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; SSE2-NEXT: pand %xmm3, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: psraw $8, %xmm0
+; SSE2-NEXT: pmullw %xmm1, %xmm0
+; SSE2-NEXT: pand %xmm3, %xmm0
+; SSE2-NEXT: packuswb %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: mul8c:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovsxbw %xmm0, %xmm1
+; SSE41-NEXT: pmovsxbw {{.*}}(%rip), %xmm2
+; SSE41-NEXT: pmullw %xmm2, %xmm1
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT: pand %xmm3, %xmm1
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE41-NEXT: pmovsxbw %xmm0, %xmm0
+; SSE41-NEXT: pmullw %xmm2, %xmm0
+; SSE41-NEXT: pand %xmm3, %xmm0
+; SSE41-NEXT: packuswb %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+entry:
+ %A = mul <16 x i8> %i, < i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117 >
+ ret <16 x i8> %A
+}
+
+define <8 x i16> @mul16c(<8 x i16> %i) nounwind {
+; ALL-LABEL: mul16c:
+; ALL: # BB#0: # %entry
+; ALL-NEXT: pmullw {{.*}}(%rip), %xmm0
+; ALL-NEXT: retq
+entry:
+ %A = mul <8 x i16> %i, < i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117 >
+ ret <8 x i16> %A
+}
+
define <4 x i32> @a(<4 x i32> %i) nounwind {
; SSE2-LABEL: a:
-; SSE2: movdqa {{.*}}, %[[X1:xmm[0-9]+]]
-; SSE2-NEXT: pshufd {{.*}} # [[X2:xmm[0-9]+]] = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq %[[X1]], %xmm0
-; SSE2-NEXT: pmuludq %[[X1]], %[[X2]]
-; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],[[X2]][0,2]
-; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [117,117,117,117]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSE2-NEXT: pmuludq %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pmuludq %xmm1, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: a:
-; SSE41: pmulld
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
; SSE41-NEXT: retq
entry:
%A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
@@ -21,26 +71,91 @@ entry:
define <2 x i64> @b(<2 x i64> %i) nounwind {
; ALL-LABEL: b:
-; ALL: pmuludq
-; ALL: pmuludq
-; ALL: pmuludq
+; ALL: # BB#0: # %entry
+; ALL-NEXT: movdqa {{.*#+}} xmm1 = [117,117]
+; ALL-NEXT: movdqa %xmm0, %xmm2
+; ALL-NEXT: pmuludq %xmm1, %xmm2
+; ALL-NEXT: pxor %xmm3, %xmm3
+; ALL-NEXT: pmuludq %xmm0, %xmm3
+; ALL-NEXT: psllq $32, %xmm3
+; ALL-NEXT: paddq %xmm3, %xmm2
+; ALL-NEXT: psrlq $32, %xmm0
+; ALL-NEXT: pmuludq %xmm1, %xmm0
+; ALL-NEXT: psllq $32, %xmm0
+; ALL-NEXT: paddq %xmm2, %xmm0
+; ALL-NEXT: retq
entry:
%A = mul <2 x i64> %i, < i64 117, i64 117 >
ret <2 x i64> %A
}
+define <16 x i8> @mul8(<16 x i8> %i, <16 x i8> %j) nounwind {
+; SSE2-LABEL: mul8:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSE2-NEXT: psraw $8, %xmm2
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSE2-NEXT: psraw $8, %xmm3
+; SSE2-NEXT: pmullw %xmm2, %xmm3
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE2-NEXT: pand %xmm2, %xmm3
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: psraw $8, %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: psraw $8, %xmm0
+; SSE2-NEXT: pmullw %xmm1, %xmm0
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: packuswb %xmm3, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: mul8:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovsxbw %xmm1, %xmm3
+; SSE41-NEXT: pmovsxbw %xmm0, %xmm2
+; SSE41-NEXT: pmullw %xmm3, %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT: pand %xmm3, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE41-NEXT: pmovsxbw %xmm1, %xmm1
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE41-NEXT: pmovsxbw %xmm0, %xmm0
+; SSE41-NEXT: pmullw %xmm1, %xmm0
+; SSE41-NEXT: pand %xmm3, %xmm0
+; SSE41-NEXT: packuswb %xmm0, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+entry:
+ %A = mul <16 x i8> %i, %j
+ ret <16 x i8> %A
+}
+
+define <8 x i16> @mul16(<8 x i16> %i, <8 x i16> %j) nounwind {
+; ALL-LABEL: mul16:
+; ALL: # BB#0: # %entry
+; ALL-NEXT: pmullw %xmm1, %xmm0
+; ALL-NEXT: retq
+entry:
+ %A = mul <8 x i16> %i, %j
+ ret <8 x i16> %A
+}
+
define <4 x i32> @c(<4 x i32> %i, <4 x i32> %j) nounwind {
; SSE2-LABEL: c:
-; SSE2: pshufd {{.*}} # [[X2:xmm[0-9]+]] = xmm0[1,1,3,3]
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE2-NEXT: pmuludq %xmm1, %xmm0
-; SSE2-NEXT: pshufd {{.*}} # xmm1 = xmm1[1,1,3,3]
-; SSE2-NEXT: pmuludq %[[X2]], %xmm1
-; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],xmm1[0,2]
-; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE2-NEXT: pmuludq %xmm2, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSE41-LABEL: c:
-; SSE41: pmulld
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmulld %xmm1, %xmm0
; SSE41-NEXT: retq
entry:
%A = mul <4 x i32> %i, %j
@@ -49,9 +164,19 @@ entry:
define <2 x i64> @d(<2 x i64> %i, <2 x i64> %j) nounwind {
; ALL-LABEL: d:
-; ALL: pmuludq
-; ALL: pmuludq
-; ALL: pmuludq
+; ALL: # BB#0: # %entry
+; ALL-NEXT: movdqa %xmm0, %xmm2
+; ALL-NEXT: pmuludq %xmm1, %xmm2
+; ALL-NEXT: movdqa %xmm1, %xmm3
+; ALL-NEXT: psrlq $32, %xmm3
+; ALL-NEXT: pmuludq %xmm0, %xmm3
+; ALL-NEXT: psllq $32, %xmm3
+; ALL-NEXT: paddq %xmm3, %xmm2
+; ALL-NEXT: psrlq $32, %xmm0
+; ALL-NEXT: pmuludq %xmm1, %xmm0
+; ALL-NEXT: psllq $32, %xmm0
+; ALL-NEXT: paddq %xmm2, %xmm0
+; ALL-NEXT: retq
entry:
%A = mul <2 x i64> %i, %j
ret <2 x i64> %A
@@ -61,20 +186,32 @@ declare void @foo()
define <4 x i32> @e(<4 x i32> %i, <4 x i32> %j) nounwind {
; SSE2-LABEL: e:
-; SSE2: movdqa {{[0-9]*}}(%rsp), %xmm0
-; SSE2-NEXT: pshufd {{.*}} # [[X1:xmm[0-9]+]] = xmm0[1,1,3,3]
-; SSE2-NEXT: movdqa {{[0-9]*}}(%rsp), %[[X2:xmm[0-9]+]]
-; SSE2-NEXT: pmuludq %[[X2]], %xmm0
-; SSE2-NEXT: pshufd {{.*}} # [[X2]] = [[X2]][1,1,3,3]
-; SSE2-NEXT: pmuludq %[[X1]], %[[X2]]
-; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2],[[X2]][0,2]
-; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
-; SSE2-NEXT: addq ${{[0-9]+}}, %rsp
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: subq $40, %rsp
+; SSE2-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE2-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; SSE2-NEXT: callq foo
+; SSE2-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload
+; SSE2-NEXT: pmuludq %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT: pmuludq %xmm1, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: addq $40, %rsp
; SSE2-NEXT: retq
;
; SSE41-LABEL: e:
-; SSE41: pmulld {{[0-9]+}}(%rsp), %xmm
-; SSE41-NEXT: addq ${{[0-9]+}}, %rsp
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: subq $40, %rsp
+; SSE41-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE41-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; SSE41-NEXT: callq foo
+; SSE41-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
+; SSE41-NEXT: pmulld {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
+; SSE41-NEXT: addq $40, %rsp
; SSE41-NEXT: retq
entry:
; Use a call to force spills.
@@ -85,9 +222,26 @@ entry:
define <2 x i64> @f(<2 x i64> %i, <2 x i64> %j) nounwind {
; ALL-LABEL: f:
-; ALL: pmuludq
-; ALL: pmuludq
-; ALL: pmuludq
+; ALL: # BB#0: # %entry
+; ALL-NEXT: subq $40, %rsp
+; ALL-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
+; ALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; ALL-NEXT: callq foo
+; ALL-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
+; ALL-NEXT: movdqa %xmm0, %xmm2
+; ALL-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm3 # 16-byte Reload
+; ALL-NEXT: pmuludq %xmm3, %xmm2
+; ALL-NEXT: movdqa %xmm3, %xmm1
+; ALL-NEXT: psrlq $32, %xmm1
+; ALL-NEXT: pmuludq %xmm0, %xmm1
+; ALL-NEXT: psllq $32, %xmm1
+; ALL-NEXT: paddq %xmm1, %xmm2
+; ALL-NEXT: psrlq $32, %xmm0
+; ALL-NEXT: pmuludq %xmm3, %xmm0
+; ALL-NEXT: psllq $32, %xmm0
+; ALL-NEXT: paddq %xmm2, %xmm0
+; ALL-NEXT: addq $40, %rsp
+; ALL-NEXT: retq
entry:
; Use a call to force spills.
call void @foo()
diff --git a/test/CodeGen/X86/pmulld.ll b/test/CodeGen/X86/pmulld.ll
index 3db0f73954d7..3fe3ebc9763c 100644
--- a/test/CodeGen/X86/pmulld.ll
+++ b/test/CodeGen/X86/pmulld.ll
@@ -20,7 +20,7 @@ define <4 x i32> @test1a(<4 x i32> %A, <4 x i32> *%Bp) nounwind {
; WIN64-NEXT: movdqa (%rcx), %xmm0
; WIN64-NEXT: pmulld (%rdx), %xmm0
- %B = load <4 x i32>* %Bp
+ %B = load <4 x i32>, <4 x i32>* %Bp
%C = mul <4 x i32> %A, %B
ret <4 x i32> %C
}
diff --git a/test/CodeGen/X86/pointer-vector.ll b/test/CodeGen/X86/pointer-vector.ll
index 0ee99875264f..48c8b2376bd5 100644
--- a/test/CodeGen/X86/pointer-vector.ll
+++ b/test/CodeGen/X86/pointer-vector.ll
@@ -31,7 +31,7 @@ entry:
;CHECK: LOAD0
define <4 x i8*> @LOAD0(<4 x i8*>* %p) nounwind {
entry:
- %G = load <4 x i8*>* %p
+ %G = load <4 x i8*>, <4 x i8*>* %p
;CHECK: movaps
ret <4 x i8*> %G
;CHECK: ret
@@ -40,7 +40,7 @@ entry:
;CHECK: LOAD1
define <4 x i8*> @LOAD1(<4 x i8*>* %p) nounwind {
entry:
- %G = load <4 x i8*>* %p
+ %G = load <4 x i8*>, <4 x i8*>* %p
;CHECK: movdqa
;CHECK: pshufd
;CHECK: movdqa
@@ -55,11 +55,11 @@ define <4 x i8*> @LOAD2(<4 x i8*>* %p) nounwind {
entry:
%I = alloca <4 x i8*>
;CHECK: sub
- %G = load <4 x i8*>* %p
+ %G = load <4 x i8*>, <4 x i8*>* %p
;CHECK: movaps
store <4 x i8*> %G, <4 x i8*>* %I
;CHECK: movaps
- %Z = load <4 x i8*>* %I
+ %Z = load <4 x i8*>, <4 x i8*>* %I
ret <4 x i8*> %Z
;CHECK: add
;CHECK: ret
@@ -68,7 +68,7 @@ entry:
;CHECK: INT2PTR0
define <4 x i32> @INT2PTR0(<4 x i8*>* %p) nounwind {
entry:
- %G = load <4 x i8*>* %p
+ %G = load <4 x i8*>, <4 x i8*>* %p
;CHECK: movl
;CHECK: movaps
%K = ptrtoint <4 x i8*> %G to <4 x i32>
@@ -79,10 +79,9 @@ entry:
;CHECK: INT2PTR1
define <4 x i32*> @INT2PTR1(<4 x i8>* %p) nounwind {
entry:
- %G = load <4 x i8>* %p
+ %G = load <4 x i8>, <4 x i8>* %p
;CHECK: movl
-;CHECK: pmovzxbd
-;CHECK: pand
+;CHECK: pmovzxbd (%
%K = inttoptr <4 x i8> %G to <4 x i32*>
;CHECK: ret
ret <4 x i32*> %K
@@ -91,7 +90,7 @@ entry:
;CHECK: BITCAST0
define <4 x i32*> @BITCAST0(<4 x i8*>* %p) nounwind {
entry:
- %G = load <4 x i8*>* %p
+ %G = load <4 x i8*>, <4 x i8*>* %p
;CHECK: movl
%T = bitcast <4 x i8*> %G to <4 x i32*>
;CHECK: movaps
@@ -102,7 +101,7 @@ entry:
;CHECK: BITCAST1
define <2 x i32*> @BITCAST1(<2 x i8*>* %p) nounwind {
entry:
- %G = load <2 x i8*>* %p
+ %G = load <2 x i8*>, <2 x i8*>* %p
;CHECK: movl
;CHECK: pmovzxdq
%T = bitcast <2 x i8*> %G to <2 x i32*>
@@ -113,8 +112,8 @@ entry:
;CHECK: ICMP0
define <4 x i32> @ICMP0(<4 x i8*>* %p0, <4 x i8*>* %p1) nounwind {
entry:
- %g0 = load <4 x i8*>* %p0
- %g1 = load <4 x i8*>* %p1
+ %g0 = load <4 x i8*>, <4 x i8*>* %p0
+ %g1 = load <4 x i8*>, <4 x i8*>* %p1
%k = icmp sgt <4 x i8*> %g0, %g1
;CHECK: pcmpgtd
%j = select <4 x i1> %k, <4 x i32> <i32 0, i32 1, i32 2, i32 4>, <4 x i32> <i32 9, i32 8, i32 7, i32 6>
@@ -125,8 +124,8 @@ entry:
;CHECK: ICMP1
define <4 x i32> @ICMP1(<4 x i8*>* %p0, <4 x i8*>* %p1) nounwind {
entry:
- %g0 = load <4 x i8*>* %p0
- %g1 = load <4 x i8*>* %p1
+ %g0 = load <4 x i8*>, <4 x i8*>* %p0
+ %g1 = load <4 x i8*>, <4 x i8*>* %p1
%k = icmp eq <4 x i8*> %g0, %g1
;CHECK: pcmpeqd
%j = select <4 x i1> %k, <4 x i32> <i32 0, i32 1, i32 2, i32 4>, <4 x i32> <i32 9, i32 8, i32 7, i32 6>
diff --git a/test/CodeGen/X86/postra-licm.ll b/test/CodeGen/X86/postra-licm.ll
index 946b8362122d..5c93160125e7 100644
--- a/test/CodeGen/X86/postra-licm.ll
+++ b/test/CodeGen/X86/postra-licm.ll
@@ -30,7 +30,7 @@ bb.i: ; preds = %bb3
unreachable
bb.nph41: ; preds = %bb3
- %0 = call %struct.FILE* @"\01_fopen$UNIX2003"(i8* undef, i8* getelementptr inbounds ([2 x i8]* @.str12, i32 0, i32 0)) nounwind ; <%struct.FILE*> [#uses=3]
+ %0 = call %struct.FILE* @"\01_fopen$UNIX2003"(i8* undef, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str12, i32 0, i32 0)) nounwind ; <%struct.FILE*> [#uses=3]
br i1 undef, label %bb4, label %bb5.preheader
bb5.preheader: ; preds = %bb.nph41
@@ -85,7 +85,7 @@ bb28: ; preds = %bb28, %bb26.prehead
br label %bb28
bb30: ; preds = %bb26.preheader
- %5 = call i32 @strcmp(i8* undef, i8* getelementptr inbounds ([7 x i8]* @.str19, i32 0, i32 0)) nounwind readonly ; <i32> [#uses=0]
+ %5 = call i32 @strcmp(i8* undef, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str19, i32 0, i32 0)) nounwind readonly ; <i32> [#uses=0]
br i1 undef, label %bb34, label %bb70
bb32.loopexit: ; preds = %bb45
@@ -101,19 +101,19 @@ bb35: ; preds = %bb34
unreachable
bb39.preheader: ; preds = %bb34
- %7 = getelementptr inbounds %struct.epoch_t* undef, i32 %indvar54, i32 3 ; <i32*> [#uses=1]
- %8 = getelementptr inbounds %struct.epoch_t* undef, i32 %indvar54, i32 2 ; <i32*> [#uses=0]
+ %7 = getelementptr inbounds %struct.epoch_t, %struct.epoch_t* undef, i32 %indvar54, i32 3 ; <i32*> [#uses=1]
+ %8 = getelementptr inbounds %struct.epoch_t, %struct.epoch_t* undef, i32 %indvar54, i32 2 ; <i32*> [#uses=0]
br i1 false, label %bb42, label %bb45
bb42: ; preds = %bb39.preheader
unreachable
bb45: ; preds = %bb39.preheader
- %9 = call i32 @strcmp(i8* undef, i8* getelementptr inbounds ([4 x i8]* @.str24, i32 0, i32 0)) nounwind readonly ; <i32> [#uses=0]
+ %9 = call i32 @strcmp(i8* undef, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str24, i32 0, i32 0)) nounwind readonly ; <i32> [#uses=0]
br i1 false, label %bb47, label %bb32.loopexit
bb47: ; preds = %bb45
- %10 = load i32* %7, align 4 ; <i32> [#uses=0]
+ %10 = load i32, i32* %7, align 4 ; <i32> [#uses=0]
unreachable
bb70: ; preds = %bb32.loopexit, %bb30
@@ -162,19 +162,19 @@ bb.nph: ; preds = %entry
bb: ; preds = %bb, %bb.nph
%tmp9 = mul i64 undef, undef ; <i64> [#uses=2]
%tmp12 = add i64 %tmp11, %tmp9 ; <i64> [#uses=1]
- %scevgep13 = getelementptr i8* %bufp, i64 %tmp12 ; <i8*> [#uses=1]
+ %scevgep13 = getelementptr i8, i8* %bufp, i64 %tmp12 ; <i8*> [#uses=1]
%tmp15 = add i64 %tmp14, %tmp9 ; <i64> [#uses=1]
- %scevgep16 = getelementptr i8* %bufp, i64 %tmp15 ; <i8*> [#uses=1]
- %0 = load i8* undef, align 1 ; <i8> [#uses=1]
+ %scevgep16 = getelementptr i8, i8* %bufp, i64 %tmp15 ; <i8*> [#uses=1]
+ %0 = load i8, i8* undef, align 1 ; <i8> [#uses=1]
%1 = zext i8 %0 to i32 ; <i32> [#uses=1]
- %2 = getelementptr inbounds [16 x i16]* @map_4_to_16, i64 0, i64 0 ; <i16*> [#uses=1]
- %3 = load i16* %2, align 2 ; <i16> [#uses=1]
+ %2 = getelementptr inbounds [16 x i16], [16 x i16]* @map_4_to_16, i64 0, i64 0 ; <i16*> [#uses=1]
+ %3 = load i16, i16* %2, align 2 ; <i16> [#uses=1]
%4 = trunc i16 %3 to i8 ; <i8> [#uses=1]
store i8 %4, i8* undef, align 1
%5 = and i32 %1, 15 ; <i32> [#uses=1]
%6 = zext i32 %5 to i64 ; <i64> [#uses=1]
- %7 = getelementptr inbounds [16 x i16]* @map_4_to_16, i64 0, i64 %6 ; <i16*> [#uses=1]
- %8 = load i16* %7, align 2 ; <i16> [#uses=2]
+ %7 = getelementptr inbounds [16 x i16], [16 x i16]* @map_4_to_16, i64 0, i64 %6 ; <i16*> [#uses=1]
+ %8 = load i16, i16* %7, align 2 ; <i16> [#uses=2]
%9 = lshr i16 %8, 8 ; <i16> [#uses=1]
%10 = trunc i16 %9 to i8 ; <i8> [#uses=1]
store i8 %10, i8* %scevgep13, align 1
diff --git a/test/CodeGen/X86/pr10475.ll b/test/CodeGen/X86/pr10475.ll
index 3efc39ee9f1f..d81fce8bc4f2 100644
--- a/test/CodeGen/X86/pr10475.ll
+++ b/test/CodeGen/X86/pr10475.ll
@@ -10,7 +10,7 @@ CF79: ; preds = %CF79, %BB
br i1 undef, label %CF79, label %CF84.critedge.critedge
CF84.critedge.critedge: ; preds = %CF79
- %L35 = load <8 x i32>* undef
+ %L35 = load <8 x i32>, <8 x i32>* undef
br label %CF85
CF85: ; preds = %CF85, %CF84.critedge.critedge
diff --git a/test/CodeGen/X86/pr10525.ll b/test/CodeGen/X86/pr10525.ll
index 30ce2979e8e1..436d89caabe0 100644
--- a/test/CodeGen/X86/pr10525.ll
+++ b/test/CodeGen/X86/pr10525.ll
@@ -4,7 +4,7 @@
define void @autogen_163411_5000() {
BB:
- %L = load <2 x i64>* undef
+ %L = load <2 x i64>, <2 x i64>* undef
%Shuff11 = shufflevector <2 x i64> %L, <2 x i64> %L, <2 x i32> <i32 2, i32 0>
%I51 = insertelement <2 x i64> undef, i64 undef, i32 0
%Shuff152 = shufflevector <2 x i64> %I51, <2 x i64> %Shuff11, <2 x i32> <i32 1, i32 3>
diff --git a/test/CodeGen/X86/pr11334.ll b/test/CodeGen/X86/pr11334.ll
index 0bdb0ec7cf44..6da4697c3b21 100644
--- a/test/CodeGen/X86/pr11334.ll
+++ b/test/CodeGen/X86/pr11334.ll
@@ -57,7 +57,7 @@ entry:
define void @test_vector_creation() nounwind {
%1 = insertelement <4 x double> undef, double 0.000000e+00, i32 2
- %2 = load double addrspace(1)* null
+ %2 = load double, double addrspace(1)* null
%3 = insertelement <4 x double> %1, double %2, i32 3
store <4 x double> %3, <4 x double>* undef
ret void
diff --git a/test/CodeGen/X86/pr12360.ll b/test/CodeGen/X86/pr12360.ll
index 673403624589..3e762da545d2 100644
--- a/test/CodeGen/X86/pr12360.ll
+++ b/test/CodeGen/X86/pr12360.ll
@@ -6,7 +6,7 @@ define zeroext i1 @f1(i8* %x) {
; CHECK-NEXT: ret
entry:
- %0 = load i8* %x, align 1, !range !0
+ %0 = load i8, i8* %x, align 1, !range !0
%tobool = trunc i8 %0 to i1
ret i1 %tobool
}
@@ -17,7 +17,7 @@ define zeroext i1 @f2(i8* %x) {
; CHECK-NEXT: ret
entry:
- %0 = load i8* %x, align 1, !range !0
+ %0 = load i8, i8* %x, align 1, !range !0
%tobool = icmp ne i8 %0, 0
ret i1 %tobool
}
diff --git a/test/CodeGen/X86/pr12889.ll b/test/CodeGen/X86/pr12889.ll
index 428e9b760b70..8234fcc67e08 100644
--- a/test/CodeGen/X86/pr12889.ll
+++ b/test/CodeGen/X86/pr12889.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-unknown-linux-gnu"
define void @func() nounwind uwtable {
entry:
- %0 = load i8* @c0, align 1
+ %0 = load i8, i8* @c0, align 1
%tobool = icmp ne i8 %0, 0
%conv = zext i1 %tobool to i8
%storemerge = shl nuw nsw i8 %conv, %conv
diff --git a/test/CodeGen/X86/pr13209.ll b/test/CodeGen/X86/pr13209.ll
index 8e5eca2b2c21..0d5196fc7c8a 100644
--- a/test/CodeGen/X86/pr13209.ll
+++ b/test/CodeGen/X86/pr13209.ll
@@ -11,37 +11,37 @@ indirectgoto.preheader:
%frombool.i5915.ph = phi i8 [ undef, %if.end51 ], [ %frombool.i5917, %jit_return ]
br label %indirectgoto
do.end165:
- %tmp92 = load i8** %x, align 8
+ %tmp92 = load i8*, i8** %x, align 8
br label %indirectgoto
do.end209:
- %tmp104 = load i8** %x, align 8
+ %tmp104 = load i8*, i8** %x, align 8
br label %indirectgoto
do.end220:
- %tmp107 = load i8** %x, align 8
+ %tmp107 = load i8*, i8** %x, align 8
br label %indirectgoto
do.end231:
- %tmp110 = load i8** %x, align 8
+ %tmp110 = load i8*, i8** %x, align 8
br label %indirectgoto
do.end242:
- %tmp113 = load i8** %x, align 8
+ %tmp113 = load i8*, i8** %x, align 8
br label %indirectgoto
do.end253:
- %tmp116 = load i8** %x, align 8
+ %tmp116 = load i8*, i8** %x, align 8
br label %indirectgoto
do.end286:
- %tmp125 = load i8** %x, align 8
+ %tmp125 = load i8*, i8** %x, align 8
br label %indirectgoto
do.end297:
- %tmp128 = load i8** %x, align 8
+ %tmp128 = load i8*, i8** %x, align 8
br label %indirectgoto
do.end308:
- %tmp131 = load i8** %x, align 8
+ %tmp131 = load i8*, i8** %x, align 8
br label %indirectgoto
do.end429:
- %tmp164 = load i8** %x, align 8
+ %tmp164 = load i8*, i8** %x, align 8
br label %indirectgoto
do.end440:
- %tmp167 = load i8** %x, align 8
+ %tmp167 = load i8*, i8** %x, align 8
br label %indirectgoto
do.body482:
br i1 false, label %indirectgoto, label %do.body495
@@ -55,16 +55,16 @@ inline_return:
jit_return:
br label %indirectgoto.preheader
L_JSOP_UINT24:
- %tmp864 = load i8** %x, align 8
+ %tmp864 = load i8*, i8** %x, align 8
br label %indirectgoto
L_JSOP_THROWING:
- %tmp1201 = load i8** %x, align 8
+ %tmp1201 = load i8*, i8** %x, align 8
br label %indirectgoto
do.body4936:
- %tmp1240 = load i8** %x, align 8
+ %tmp1240 = load i8*, i8** %x, align 8
br label %indirectgoto
do.body5184:
- %tmp1340 = load i8** %x, align 8
+ %tmp1340 = load i8*, i8** %x, align 8
br label %indirectgoto
if.end5571:
br label %inline_return
diff --git a/test/CodeGen/X86/pr13458.ll b/test/CodeGen/X86/pr13458.ll
index 55548b3c3b45..426fd67e6b43 100644
--- a/test/CodeGen/X86/pr13458.ll
+++ b/test/CodeGen/X86/pr13458.ll
@@ -9,6 +9,6 @@ target triple = "x86_64-apple-darwin11.4.2"
define void @MergeStats() nounwind {
allocas:
- %r.i.i720 = atomicrmw max i64* getelementptr inbounds (%v8_uniform_Stats.0.2.4.10* @globalStats, i64 0, i32 30), i64 0 seq_cst
+ %r.i.i720 = atomicrmw max i64* getelementptr inbounds (%v8_uniform_Stats.0.2.4.10, %v8_uniform_Stats.0.2.4.10* @globalStats, i64 0, i32 30), i64 0 seq_cst
ret void
}
diff --git a/test/CodeGen/X86/pr13859.ll b/test/CodeGen/X86/pr13859.ll
index 719721dfd87b..1ebc79647ee1 100644
--- a/test/CodeGen/X86/pr13859.ll
+++ b/test/CodeGen/X86/pr13859.ll
@@ -7,7 +7,7 @@ entry:
%aMyAlloca = alloca i32, align 32
%dest = alloca <1 x i64>, align 32
- %a32 = load i32* %aMyAlloca, align 4
+ %a32 = load i32, i32* %aMyAlloca, align 4
%aconv = trunc i32 %a32 to i16
%a36 = insertelement <4 x i16> undef, i16 %aconv, i32 0
%a37 = insertelement <4 x i16> %a36, i16 %aconv, i32 1
diff --git a/test/CodeGen/X86/pr13899.ll b/test/CodeGen/X86/pr13899.ll
index bc81e34d67e3..abfb918ff0f5 100644
--- a/test/CodeGen/X86/pr13899.ll
+++ b/test/CodeGen/X86/pr13899.ll
@@ -26,25 +26,25 @@ declare <8 x float> @bar64(<8 x float> %i0, <8 x float> %i1,
<8 x float> %i8, <8 x float> %i9)
define <8 x float> @foo64(<8 x float>* %p) {
- %1 = load <8 x float>* %p
- %idx1 = getelementptr inbounds <8 x float>* %p, i64 1
- %2 = load <8 x float>* %idx1
- %idx2 = getelementptr inbounds <8 x float>* %p, i64 2
- %3 = load <8 x float>* %idx2
- %idx3 = getelementptr inbounds <8 x float>* %p, i64 3
- %4 = load <8 x float>* %idx3
- %idx4 = getelementptr inbounds <8 x float>* %p, i64 4
- %5 = load <8 x float>* %idx4
- %idx5 = getelementptr inbounds <8 x float>* %p, i64 5
- %6 = load <8 x float>* %idx5
- %idx6 = getelementptr inbounds <8 x float>* %p, i64 6
- %7 = load <8 x float>* %idx6
- %idx7 = getelementptr inbounds <8 x float>* %p, i64 7
- %8 = load <8 x float>* %idx7
- %idx8 = getelementptr inbounds <8 x float>* %p, i64 8
- %9 = load <8 x float>* %idx8
- %idx9 = getelementptr inbounds <8 x float>* %p, i64 9
- %10 = load <8 x float>* %idx9
+ %1 = load <8 x float>, <8 x float>* %p
+ %idx1 = getelementptr inbounds <8 x float>, <8 x float>* %p, i64 1
+ %2 = load <8 x float>, <8 x float>* %idx1
+ %idx2 = getelementptr inbounds <8 x float>, <8 x float>* %p, i64 2
+ %3 = load <8 x float>, <8 x float>* %idx2
+ %idx3 = getelementptr inbounds <8 x float>, <8 x float>* %p, i64 3
+ %4 = load <8 x float>, <8 x float>* %idx3
+ %idx4 = getelementptr inbounds <8 x float>, <8 x float>* %p, i64 4
+ %5 = load <8 x float>, <8 x float>* %idx4
+ %idx5 = getelementptr inbounds <8 x float>, <8 x float>* %p, i64 5
+ %6 = load <8 x float>, <8 x float>* %idx5
+ %idx6 = getelementptr inbounds <8 x float>, <8 x float>* %p, i64 6
+ %7 = load <8 x float>, <8 x float>* %idx6
+ %idx7 = getelementptr inbounds <8 x float>, <8 x float>* %p, i64 7
+ %8 = load <8 x float>, <8 x float>* %idx7
+ %idx8 = getelementptr inbounds <8 x float>, <8 x float>* %p, i64 8
+ %9 = load <8 x float>, <8 x float>* %idx8
+ %idx9 = getelementptr inbounds <8 x float>, <8 x float>* %p, i64 9
+ %10 = load <8 x float>, <8 x float>* %idx9
%r = tail call <8 x float> @bar64(<8 x float> %1, <8 x float> %2,
<8 x float> %3, <8 x float> %4,
<8 x float> %5, <8 x float> %6,
diff --git a/test/CodeGen/X86/pr14161.ll b/test/CodeGen/X86/pr14161.ll
index c2bb8d3df8f3..95c71405bc9e 100644
--- a/test/CodeGen/X86/pr14161.ll
+++ b/test/CodeGen/X86/pr14161.ll
@@ -10,7 +10,7 @@ define <2 x i16> @good(<4 x i32>*, <4 x i8>*) {
; CHECK-NEXT: pmovzxwq %xmm0, %xmm0
; CHECK-NEXT: retq
entry:
- %2 = load <4 x i32>* %0, align 16
+ %2 = load <4 x i32>, <4 x i32>* %0, align 16
%3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
%4 = extractelement <4 x i32> %3, i32 0
%5 = extractelement <4 x i32> %3, i32 1
@@ -26,12 +26,11 @@ define <2 x i16> @bad(<4 x i32>*, <4 x i8>*) {
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movdqa (%rdi), %xmm0
; CHECK-NEXT: pminud {{.*}}(%rip), %xmm0
-; CHECK-NEXT: pextrd $1, %xmm0, %eax
-; CHECK-NEXT: movd %eax, %xmm0
-; CHECK-NEXT: pmovzxwq %xmm0, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; CHECK-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; CHECK-NEXT: retq
entry:
- %2 = load <4 x i32>* %0, align 16
+ %2 = load <4 x i32>, <4 x i32>* %0, align 16
%3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
%4 = extractelement <4 x i32> %3, i32 0
%5 = extractelement <4 x i32> %3, i32 1
diff --git a/test/CodeGen/X86/pr14333.ll b/test/CodeGen/X86/pr14333.ll
index 86c12ef6b547..89779302d7f2 100644
--- a/test/CodeGen/X86/pr14333.ll
+++ b/test/CodeGen/X86/pr14333.ll
@@ -1,9 +1,9 @@
; RUN: llc -mtriple=x86_64-unknown-unknown < %s
%foo = type { i64, i64 }
define void @bar(%foo* %zed) {
- %tmp = getelementptr inbounds %foo* %zed, i64 0, i32 0
+ %tmp = getelementptr inbounds %foo, %foo* %zed, i64 0, i32 0
store i64 0, i64* %tmp, align 8
- %tmp2 = getelementptr inbounds %foo* %zed, i64 0, i32 1
+ %tmp2 = getelementptr inbounds %foo, %foo* %zed, i64 0, i32 1
store i64 0, i64* %tmp2, align 8
%tmp3 = bitcast %foo* %zed to i8*
call void @llvm.memset.p0i8.i64(i8* %tmp3, i8 0, i64 16, i32 8, i1 false)
diff --git a/test/CodeGen/X86/pr14562.ll b/test/CodeGen/X86/pr14562.ll
index e66f1752a30f..31674546423e 100644
--- a/test/CodeGen/X86/pr14562.ll
+++ b/test/CodeGen/X86/pr14562.ll
@@ -3,7 +3,7 @@
@temp1 = global i64 -77129852189294865, align 8
define void @foo() nounwind {
- %x = load i64* @temp1, align 8
+ %x = load i64, i64* @temp1, align 8
%s = shl i64 %x, 32
%t = trunc i64 %s to i32
%z = zext i32 %t to i64
diff --git a/test/CodeGen/X86/pr1489.ll b/test/CodeGen/X86/pr1489.ll
index c9e24bfb13fa..13ced2a32745 100644
--- a/test/CodeGen/X86/pr1489.ll
+++ b/test/CodeGen/X86/pr1489.ll
@@ -48,7 +48,7 @@ entry:
%tmp1 = tail call i32 @bar( ) ; <i32> [#uses=1]
%tmp2 = tail call i32 @foo( ) ; <i32> [#uses=1]
%tmp3 = tail call i32 @quux( ) ; <i32> [#uses=1]
- %tmp5 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([13 x i8]* @.str, i32 0, i32 0), i32 %tmp3, i32 %tmp2, i32 %tmp1, i32 %tmp ) ; <i32> [#uses=0]
+ %tmp5 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), i32 %tmp3, i32 %tmp2, i32 %tmp1, i32 %tmp ) ; <i32> [#uses=0]
ret i32 undef
}
diff --git a/test/CodeGen/X86/pr1505b.ll b/test/CodeGen/X86/pr1505b.ll
index c348fec54674..c1363457c8c4 100644
--- a/test/CodeGen/X86/pr1505b.ll
+++ b/test/CodeGen/X86/pr1505b.ll
@@ -33,7 +33,7 @@ declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
define i32 @main() {
entry:
; CHECK: flds
- %tmp6 = load volatile float* @a ; <float> [#uses=1]
+ %tmp6 = load volatile float, float* @a ; <float> [#uses=1]
; CHECK: fstps (%esp)
; CHECK: tanf
%tmp9 = tail call float @tanf( float %tmp6 ) ; <float> [#uses=1]
@@ -41,14 +41,14 @@ entry:
; CHECK: fstp
; CHECK: fldl
- %tmp12 = load volatile double* @b ; <double> [#uses=1]
+ %tmp12 = load volatile double, double* @b ; <double> [#uses=1]
; CHECK: fstpl (%esp)
; CHECK: tan
%tmp13 = tail call double @tan( double %tmp12 ) ; <double> [#uses=1]
; Spill returned value:
; CHECK: fstp
%tmp1314 = fptrunc double %tmp13 to float ; <float> [#uses=1]
- %tmp16 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0) ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
+ %tmp16 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([12 x i8], [12 x i8]* @.str, i32 0, i32 0) ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
%tmp1920 = fpext float %tmp9 to double ; <double> [#uses=1]
; reload:
; CHECK: fld
@@ -58,7 +58,7 @@ entry:
%tmp30 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp22 ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
; reload:
; CHECK: ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
- %tmp34 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([13 x i8]* @.str1, i32 0, i32 0) ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
+ %tmp34 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([13 x i8], [13 x i8]* @.str1, i32 0, i32 0) ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
%tmp3940 = fpext float %tmp1314 to double ; <double> [#uses=1]
; CHECK: fld
; CHECK: fstpl
diff --git a/test/CodeGen/X86/pr15267.ll b/test/CodeGen/X86/pr15267.ll
index 90df9905fe1a..95d7deb34170 100644
--- a/test/CodeGen/X86/pr15267.ll
+++ b/test/CodeGen/X86/pr15267.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7-avx | FileCheck %s
define <4 x i3> @test1(<4 x i3>* %in) nounwind {
- %ret = load <4 x i3>* %in, align 1
+ %ret = load <4 x i3>, <4 x i3>* %in, align 1
ret <4 x i3> %ret
}
; CHECK-LABEL: test1
@@ -20,7 +20,7 @@ define <4 x i3> @test1(<4 x i3>* %in) nounwind {
; CHECK: ret
define <4 x i1> @test2(<4 x i1>* %in) nounwind {
- %ret = load <4 x i1>* %in, align 1
+ %ret = load <4 x i1>, <4 x i1>* %in, align 1
ret <4 x i1> %ret
}
@@ -40,7 +40,7 @@ define <4 x i1> @test2(<4 x i1>* %in) nounwind {
; CHECK: ret
define <4 x i64> @test3(<4 x i1>* %in) nounwind {
- %wide.load35 = load <4 x i1>* %in, align 1
+ %wide.load35 = load <4 x i1>, <4 x i1>* %in, align 1
%sext = sext <4 x i1> %wide.load35 to <4 x i64>
ret <4 x i64> %sext
}
@@ -50,25 +50,27 @@ define <4 x i64> @test3(<4 x i1>* %in) nounwind {
; CHECK: movq
; CHECK: shlq
; CHECK: sarq
-; CHECK: vmovq
; CHECK: movq
; CHECK: shlq
; CHECK: sarq
-; CHECK: vmovq
-; CHECK: vpunpcklqdq
+; CHECK: vmovd
+; CHECK: vpinsrd
; CHECK: movq
; CHECK: shlq
; CHECK: sarq
-; CHECK: vmovq
+; CHECK: vpinsrd
; CHECK: shlq
; CHECK: sarq
-; CHECK: vmovq
-; CHECK: vpunpcklqdq
+; CHECK: vpinsrd
+; CHECK: vpmovsxdq
+; CHECK: vmovd
+; CHECK: vpinsrd
+; CHECK: vpmovsxdq
; CHECK: vinsertf128
; CHECK: ret
define <16 x i4> @test4(<16 x i4>* %in) nounwind {
- %ret = load <16 x i4>* %in, align 1
+ %ret = load <16 x i4>, <16 x i4>* %in, align 1
ret <16 x i4> %ret
}
diff --git a/test/CodeGen/X86/pr15309.ll b/test/CodeGen/X86/pr15309.ll
index 6dbbc72a7b7a..e9d9b9e54c13 100644
--- a/test/CodeGen/X86/pr15309.ll
+++ b/test/CodeGen/X86/pr15309.ll
@@ -2,10 +2,10 @@
define void @test_convert_float2_ulong2(<2 x i64>* nocapture %src, <2 x float>* nocapture %dest) noinline {
L.entry:
- %0 = getelementptr <2 x i64>* %src, i32 10
- %1 = load <2 x i64>* %0, align 16
+ %0 = getelementptr <2 x i64>, <2 x i64>* %src, i32 10
+ %1 = load <2 x i64>, <2 x i64>* %0, align 16
%2 = uitofp <2 x i64> %1 to <2 x float>
- %3 = getelementptr <2 x float>* %dest, i32 10
+ %3 = getelementptr <2 x float>, <2 x float>* %dest, i32 10
store <2 x float> %2, <2 x float>* %3, align 8
ret void
}
diff --git a/test/CodeGen/X86/pr18023.ll b/test/CodeGen/X86/pr18023.ll
index 4c6f8cfce732..c7ea20c281ba 100644
--- a/test/CodeGen/X86/pr18023.ll
+++ b/test/CodeGen/X86/pr18023.ll
@@ -13,18 +13,18 @@
@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
define void @func() {
- store i32 1, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
- store i32 0, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 0), align 4
- %1 = load volatile i32* @b, align 4
- store i32 1, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
- store i32 0, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
- %2 = load volatile i32* @b, align 4
- store i32 1, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
- store i32 0, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 2), align 4
- %3 = load volatile i32* @b, align 4
+ store i32 1, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 1), align 4
+ store i32 0, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 0), align 4
+ %1 = load volatile i32, i32* @b, align 4
+ store i32 1, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 1), align 4
+ store i32 0, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 1), align 4
+ %2 = load volatile i32, i32* @b, align 4
+ store i32 1, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 1), align 4
+ store i32 0, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4
+ %3 = load volatile i32, i32* @b, align 4
store i32 3, i32* @c, align 4
- %4 = load i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %4)
+ %4 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 1), align 4
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %4)
ret void
}
diff --git a/test/CodeGen/X86/pr18162.ll b/test/CodeGen/X86/pr18162.ll
index 523e47db5eee..7912db863edb 100644
--- a/test/CodeGen/X86/pr18162.ll
+++ b/test/CodeGen/X86/pr18162.ll
@@ -13,15 +13,15 @@ define { i64, <2 x float> } @Foo(%"Iterator"* %this) {
entry:
%retval = alloca i32
%this.addr = alloca %"Iterator"*
- %this1 = load %"Iterator"** %this.addr
- %bundle_ = getelementptr inbounds %"Iterator"* %this1, i32 0, i32 0
- %0 = load i32** %bundle_
+ %this1 = load %"Iterator"*, %"Iterator"** %this.addr
+ %bundle_ = getelementptr inbounds %"Iterator", %"Iterator"* %this1, i32 0, i32 0
+ %0 = load i32*, i32** %bundle_
%1 = call { i64, <2 x float> } @Call()
%2 = call { i64, <2 x float> }* @CallPtr()
- %3 = getelementptr { i64, <2 x float> }* %2, i32 0, i32 1
+ %3 = getelementptr { i64, <2 x float> }, { i64, <2 x float> }* %2, i32 0, i32 1
%4 = extractvalue { i64, <2 x float> } %1, 1
store <2 x float> %4, <2 x float>* %3
- %5 = load { i64, <2 x float> }* %2
+ %5 = load { i64, <2 x float> }, { i64, <2 x float> }* %2
ret { i64, <2 x float> } %5
}
diff --git a/test/CodeGen/X86/pr18846.ll b/test/CodeGen/X86/pr18846.ll
index c65bc79d6813..02c33fed8d6b 100644
--- a/test/CodeGen/X86/pr18846.ll
+++ b/test/CodeGen/X86/pr18846.ll
@@ -21,31 +21,31 @@ for.body29: ; preds = %for.body29, %entry
br i1 undef, label %for.body29, label %for.body65
for.body65: ; preds = %for.body29
- %0 = load float* undef, align 4, !tbaa !1
+ %0 = load float, float* undef, align 4, !tbaa !1
%vecinit7.i4448 = insertelement <8 x float> undef, float %0, i32 7
- %1 = load float* null, align 4, !tbaa !1
+ %1 = load float, float* null, align 4, !tbaa !1
%vecinit7.i4304 = insertelement <8 x float> undef, float %1, i32 7
- %2 = load float* undef, align 4, !tbaa !1
+ %2 = load float, float* undef, align 4, !tbaa !1
%vecinit7.i4196 = insertelement <8 x float> undef, float %2, i32 7
%3 = or i64 0, 16
%add.ptr111.sum4096 = add i64 %3, 0
- %4 = load <8 x float>* null, align 16, !tbaa !5
- %add.ptr162 = getelementptr inbounds [65536 x float]* null, i64 0, i64 %add.ptr111.sum4096
+ %4 = load <8 x float>, <8 x float>* null, align 16, !tbaa !5
+ %add.ptr162 = getelementptr inbounds [65536 x float], [65536 x float]* null, i64 0, i64 %add.ptr111.sum4096
%__v.i4158 = bitcast float* %add.ptr162 to <8 x float>*
- %5 = load <8 x float>* %__v.i4158, align 16, !tbaa !5
+ %5 = load <8 x float>, <8 x float>* %__v.i4158, align 16, !tbaa !5
%add.ptr158.sum40975066 = or i64 %add.ptr111.sum4096, 8
- %add.ptr183 = getelementptr inbounds [65536 x float]* null, i64 0, i64 %add.ptr158.sum40975066
+ %add.ptr183 = getelementptr inbounds [65536 x float], [65536 x float]* null, i64 0, i64 %add.ptr158.sum40975066
%__v.i4162 = bitcast float* %add.ptr183 to <8 x float>*
- %6 = load <8 x float>* %__v.i4162, align 16, !tbaa !5
+ %6 = load <8 x float>, <8 x float>* %__v.i4162, align 16, !tbaa !5
%add.ptr200.sum40995067 = or i64 undef, 8
- %add.ptr225 = getelementptr inbounds [65536 x float]* null, i64 0, i64 %add.ptr200.sum40995067
+ %add.ptr225 = getelementptr inbounds [65536 x float], [65536 x float]* null, i64 0, i64 %add.ptr200.sum40995067
%__v.i4167 = bitcast float* %add.ptr225 to <8 x float>*
- %7 = load <8 x float>* %__v.i4167, align 4, !tbaa !5
- %8 = load <8 x float>* undef, align 16, !tbaa !5
+ %7 = load <8 x float>, <8 x float>* %__v.i4167, align 4, !tbaa !5
+ %8 = load <8 x float>, <8 x float>* undef, align 16, !tbaa !5
%add.ptr242.sum41015068 = or i64 0, 8
- %add.ptr267 = getelementptr inbounds [65536 x float]* null, i64 0, i64 %add.ptr242.sum41015068
+ %add.ptr267 = getelementptr inbounds [65536 x float], [65536 x float]* null, i64 0, i64 %add.ptr242.sum41015068
%__v.i4171 = bitcast float* %add.ptr267 to <8 x float>*
- %9 = load <8 x float>* %__v.i4171, align 4, !tbaa !5
+ %9 = load <8 x float>, <8 x float>* %__v.i4171, align 4, !tbaa !5
%mul.i4690 = fmul <8 x float> %7, undef
%add.i4665 = fadd <8 x float> undef, undef
%mul.i4616 = fmul <8 x float> %8, undef
@@ -56,8 +56,8 @@ for.body65: ; preds = %for.body29
%mul.i4578 = fmul <8 x float> %9, undef
%add.i4577 = fadd <8 x float> %add.i4593, %mul.i4578
call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4577) #1
- %10 = load <8 x float>* null, align 16, !tbaa !5
- %11 = load <8 x float>* undef, align 16, !tbaa !5
+ %10 = load <8 x float>, <8 x float>* null, align 16, !tbaa !5
+ %11 = load <8 x float>, <8 x float>* undef, align 16, !tbaa !5
%mul.i4564 = fmul <8 x float> %4, undef
%add.i4563 = fadd <8 x float> %10, %mul.i4564
%mul.i4560 = fmul <8 x float> %5, undef
@@ -107,7 +107,7 @@ for.body65: ; preds = %for.body29
call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4293) #1
call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4291) #1
call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4289) #1
- %12 = load <8 x float>* undef, align 16, !tbaa !5
+ %12 = load <8 x float>, <8 x float>* undef, align 16, !tbaa !5
%mul.i4274 = fmul <8 x float> undef, undef
%add.i4273 = fadd <8 x float> %12, %mul.i4274
%mul.i4258 = fmul <8 x float> %7, undef
diff --git a/test/CodeGen/X86/pr20020.ll b/test/CodeGen/X86/pr20020.ll
index 83dae369dd75..56c12faaf707 100644
--- a/test/CodeGen/X86/pr20020.ll
+++ b/test/CodeGen/X86/pr20020.ll
@@ -17,7 +17,7 @@ entry:
br i1 %cmp4, label %for.body.preheader, label %for.end38
for.body.preheader: ; preds = %entry
- %gep = getelementptr %struct.planet* %bodies, i64 1, i32 1
+ %gep = getelementptr %struct.planet, %struct.planet* %bodies, i64 1, i32 1
%gep13 = bitcast double* %gep to %struct.planet*
%0 = add i32 %nbodies, -1
br label %for.body
@@ -32,33 +32,33 @@ for.body: ; preds = %for.body.preheader,
br i1 %cmp22, label %for.body3.lr.ph, label %for.inc20
for.body3.lr.ph: ; preds = %for.body
- %x = getelementptr inbounds %struct.planet* %bodies, i64 %iv9, i32 0
- %y = getelementptr inbounds %struct.planet* %bodies, i64 %iv9, i32 1
- %vx = getelementptr inbounds %struct.planet* %bodies, i64 %iv9, i32 2
+ %x = getelementptr inbounds %struct.planet, %struct.planet* %bodies, i64 %iv9, i32 0
+ %y = getelementptr inbounds %struct.planet, %struct.planet* %bodies, i64 %iv9, i32 1
+ %vx = getelementptr inbounds %struct.planet, %struct.planet* %bodies, i64 %iv9, i32 2
br label %for.body3
for.body3: ; preds = %for.body3, %for.body3.lr.ph
%iv20 = phi i32 [ %iv.next21, %for.body3 ], [ %iv19, %for.body3.lr.ph ]
%iv15 = phi %struct.planet* [ %gep16, %for.body3 ], [ %iv, %for.body3.lr.ph ]
%iv1517 = bitcast %struct.planet* %iv15 to double*
- %2 = load double* %x, align 8
- %gep18 = getelementptr double* %iv1517, i64 -1
- %3 = load double* %gep18, align 8
+ %2 = load double, double* %x, align 8
+ %gep18 = getelementptr double, double* %iv1517, i64 -1
+ %3 = load double, double* %gep18, align 8
%sub = fsub double %2, %3
- %4 = load double* %y, align 8
- %5 = load double* %iv1517, align 8
+ %4 = load double, double* %y, align 8
+ %5 = load double, double* %iv1517, align 8
%sub8 = fsub double %4, %5
%add10 = fadd double %sub, %sub8
%call = tail call double @sqrt(double %sub8) #2
store double %add10, double* %vx, align 8
- %gep16 = getelementptr %struct.planet* %iv15, i64 1
+ %gep16 = getelementptr %struct.planet, %struct.planet* %iv15, i64 1
%iv.next21 = add i32 %iv20, -1
%exitcond = icmp eq i32 %iv.next21, 0
br i1 %exitcond, label %for.inc20, label %for.body3
for.inc20: ; preds = %for.body3, %for.body
%lftr.wideiv11 = trunc i64 %iv.next10 to i32
- %gep14 = getelementptr %struct.planet* %iv, i64 1
+ %gep14 = getelementptr %struct.planet, %struct.planet* %iv, i64 1
%iv.next = add i32 %iv19, -1
%exitcond12 = icmp eq i32 %lftr.wideiv11, %nbodies
br i1 %exitcond12, label %for.end38, label %for.body
diff --git a/test/CodeGen/X86/pr21099.ll b/test/CodeGen/X86/pr21099.ll
index 07292c125eea..cd8205dbc815 100644
--- a/test/CodeGen/X86/pr21099.ll
+++ b/test/CodeGen/X86/pr21099.ll
@@ -2,8 +2,7 @@
define void @pr21099(i64* %p) {
; CHECK-LABEL: pr21099
-; CHECK: lock
-; CHECK-NEXT: addq $-2147483648
+; CHECK: lock addq $-2147483648
; This number is INT32_MIN: 0x80000000UL
%1 = atomicrmw add i64* %p, i64 -2147483648 seq_cst
ret void
diff --git a/test/CodeGen/X86/pr2177.ll b/test/CodeGen/X86/pr2177.ll
index e941bf7fdabe..8260a7f0614d 100644
--- a/test/CodeGen/X86/pr2177.ll
+++ b/test/CodeGen/X86/pr2177.ll
@@ -22,10 +22,10 @@ bb10: ; preds = %bb5
bb54: ; preds = %bb5
ret void
bb118: ; preds = %bb5, %bb5, %bb5, %bb5
- %tmp125 = load i8** null, align 8 ; <i8*> [#uses=1]
+ %tmp125 = load i8*, i8** null, align 8 ; <i8*> [#uses=1]
%tmp125126 = bitcast i8* %tmp125 to %struct.S2259* ; <%struct.S2259*> [#uses=1]
- %tmp128 = getelementptr %struct.S2259* %tmp125126, i32 0, i32 0 ; <<4 x i16>*> [#uses=1]
- %tmp129 = load <4 x i16>* %tmp128, align 8 ; <<4 x i16>> [#uses=1]
+ %tmp128 = getelementptr %struct.S2259, %struct.S2259* %tmp125126, i32 0, i32 0 ; <<4 x i16>*> [#uses=1]
+ %tmp129 = load <4 x i16>, <4 x i16>* %tmp128, align 8 ; <<4 x i16>> [#uses=1]
store <4 x i16> %tmp129, <4 x i16>* null, align 8
ret void
bb155: ; preds = %bb5
diff --git a/test/CodeGen/X86/pr21792.ll b/test/CodeGen/X86/pr21792.ll
new file mode 100644
index 000000000000..59866c090543
--- /dev/null
+++ b/test/CodeGen/X86/pr21792.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=x86_64-linux -mcpu=corei7 < %s | FileCheck %s
+; This fixes a missing cases in the MI scheduler's constrainLocalCopy exposed by
+; PR21792
+
+@stuff = external constant [256 x double], align 16
+
+define void @func(<4 x float> %vx) {
+entry:
+ %tmp2 = bitcast <4 x float> %vx to <2 x i64>
+ %and.i = and <2 x i64> %tmp2, <i64 8727373547504, i64 8727373547504>
+ %tmp3 = bitcast <2 x i64> %and.i to <4 x i32>
+ %index.sroa.0.0.vec.extract = extractelement <4 x i32> %tmp3, i32 0
+ %idx.ext = sext i32 %index.sroa.0.0.vec.extract to i64
+ %add.ptr = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext
+ %tmp4 = bitcast i8* %add.ptr to double*
+ %index.sroa.0.4.vec.extract = extractelement <4 x i32> %tmp3, i32 1
+ %idx.ext5 = sext i32 %index.sroa.0.4.vec.extract to i64
+ %add.ptr6 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext5
+ %tmp5 = bitcast i8* %add.ptr6 to double*
+ %index.sroa.0.8.vec.extract = extractelement <4 x i32> %tmp3, i32 2
+ %idx.ext14 = sext i32 %index.sroa.0.8.vec.extract to i64
+ %add.ptr15 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext14
+ %tmp6 = bitcast i8* %add.ptr15 to double*
+ %index.sroa.0.12.vec.extract = extractelement <4 x i32> %tmp3, i32 3
+ %idx.ext19 = sext i32 %index.sroa.0.12.vec.extract to i64
+ %add.ptr20 = getelementptr inbounds i8, i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext19
+ %tmp7 = bitcast i8* %add.ptr20 to double*
+ %add.ptr46 = getelementptr inbounds i8, i8* bitcast (double* getelementptr inbounds ([256 x double], [256 x double]* @stuff, i64 0, i64 1) to i8*), i64 %idx.ext
+ %tmp16 = bitcast i8* %add.ptr46 to double*
+ %add.ptr51 = getelementptr inbounds i8, i8* bitcast (double* getelementptr inbounds ([256 x double], [256 x double]* @stuff, i64 0, i64 1) to i8*), i64 %idx.ext5
+ %tmp17 = bitcast i8* %add.ptr51 to double*
+ call void @toto(double* %tmp4, double* %tmp5, double* %tmp6, double* %tmp7, double* %tmp16, double* %tmp17)
+ ret void
+; CHECK-LABEL: func:
+; CHECK: pextrq $1, %xmm0,
+; CHECK-NEXT: movd %xmm0, %r[[AX:..]]
+; CHECK-NEXT: movslq %e[[AX]],
+; CHECK-NEXT: sarq $32, %r[[AX]]
+}
+
+declare void @toto(double*, double*, double*, double*, double*, double*)
diff --git a/test/CodeGen/X86/pr2182.ll b/test/CodeGen/X86/pr2182.ll
index 94429b265d97..0cf3acf23b3c 100644
--- a/test/CodeGen/X86/pr2182.ll
+++ b/test/CodeGen/X86/pr2182.ll
@@ -15,16 +15,16 @@ define void @loop_2() nounwind {
; CHECK-NEXT: addl $3, (%{{.*}})
; CHECK-NEXT: ret
- %tmp = load volatile i32* @x, align 4 ; <i32> [#uses=1]
+ %tmp = load volatile i32, i32* @x, align 4 ; <i32> [#uses=1]
%tmp1 = add i32 %tmp, 3 ; <i32> [#uses=1]
store volatile i32 %tmp1, i32* @x, align 4
- %tmp.1 = load volatile i32* @x, align 4 ; <i32> [#uses=1]
+ %tmp.1 = load volatile i32, i32* @x, align 4 ; <i32> [#uses=1]
%tmp1.1 = add i32 %tmp.1, 3 ; <i32> [#uses=1]
store volatile i32 %tmp1.1, i32* @x, align 4
- %tmp.2 = load volatile i32* @x, align 4 ; <i32> [#uses=1]
+ %tmp.2 = load volatile i32, i32* @x, align 4 ; <i32> [#uses=1]
%tmp1.2 = add i32 %tmp.2, 3 ; <i32> [#uses=1]
store volatile i32 %tmp1.2, i32* @x, align 4
- %tmp.3 = load volatile i32* @x, align 4 ; <i32> [#uses=1]
+ %tmp.3 = load volatile i32, i32* @x, align 4 ; <i32> [#uses=1]
%tmp1.3 = add i32 %tmp.3, 3 ; <i32> [#uses=1]
store volatile i32 %tmp1.3, i32* @x, align 4
ret void
diff --git a/test/CodeGen/X86/pr22774.ll b/test/CodeGen/X86/pr22774.ll
index 426fcc43e308..8a80e9d6d625 100644
--- a/test/CodeGen/X86/pr22774.ll
+++ b/test/CodeGen/X86/pr22774.ll
@@ -1,17 +1,14 @@
-; RUN: llc -mattr=avx %s -o - | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnu -mattr=+avx < %s | FileCheck %s
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-linux-gnu"
-
@in = global <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, align 32
@out = global <2 x i64> zeroinitializer, align 16
define i32 @_Z3foov() {
entry:
-; CHECK: {{vmovdqa|vmovaps}} in(%rip), %ymm0
+; CHECK: vmovdqa in(%rip), %ymm0
; CHECK-NEXT: vmovq %xmm0, %xmm0
-; CHECK-NEXT: {{vmovdqa|vmovaps}} %xmm0, out(%rip)
- %0 = load <4 x i64>* @in, align 32
+; CHECK-NEXT: vmovdqa %xmm0, out(%rip)
+ %0 = load <4 x i64>, <4 x i64>* @in, align 32
%vecext = extractelement <4 x i64> %0, i32 0
%vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
%vecinit1 = insertelement <2 x i64> %vecinit, i64 0, i32 1
diff --git a/test/CodeGen/X86/pr23103.ll b/test/CodeGen/X86/pr23103.ll
new file mode 100644
index 000000000000..e0508effac05
--- /dev/null
+++ b/test/CodeGen/X86/pr23103.ll
@@ -0,0 +1,21 @@
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mcpu=generic -mattr=+avx < %s | FileCheck %s
+
+; When commuting a VADDSDrr instruction, verify that the 'IsUndef' flag is
+; correctly propagated to the operands of the resulting instruction.
+; Test for PR23103;
+
+declare zeroext i1 @foo(<1 x double>)
+
+define <1 x double> @pr23103(<1 x double>* align 8 %Vp) {
+; CHECK-LABEL: pr23103:
+; CHECK: vmovsd (%rdi), %xmm0
+; CHECK-NEXT: vmovsd %xmm0, {{.*}}(%rsp) {{.*#+}} 8-byte Spill
+; CHECK-NEXT: callq foo
+; CHECK-NEXT: vaddsd {{.*}}(%rsp), %xmm0, %xmm0 {{.*#+}} 8-byte Folded Reload
+; CHECK: retq
+entry:
+ %V = load <1 x double>, <1 x double>* %Vp, align 8
+ %call = call zeroext i1 @foo(<1 x double> %V)
+ %fadd = fadd <1 x double> %V, undef
+ ret <1 x double> %fadd
+}
diff --git a/test/CodeGen/X86/pr23246.ll b/test/CodeGen/X86/pr23246.ll
new file mode 100644
index 000000000000..6eb24a6b1634
--- /dev/null
+++ b/test/CodeGen/X86/pr23246.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple x86_64-unknown-unknown | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; PR23246
+; We're really only interested in doing something sane with the shuffle.
+
+; CHECK-LABEL: test:
+; CHECK: movq2dq %mm0, %xmm0
+; CHECK-NEXT: pshufd {{.*}} xmm0 = xmm0[0,1,0,1]
+; CHECK-NEXT: retq
+define <2 x i64> @test(x86_mmx %a) #0 {
+entry:
+ %b = bitcast x86_mmx %a to <1 x i64>
+ %s = shufflevector <1 x i64> %b, <1 x i64> undef, <2 x i32> <i32 undef, i32 0>
+ ret <2 x i64> %s
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/X86/pr2326.ll b/test/CodeGen/X86/pr2326.ll
index f82dcb5d678f..88c7bb586701 100644
--- a/test/CodeGen/X86/pr2326.ll
+++ b/test/CodeGen/X86/pr2326.ll
@@ -4,12 +4,12 @@
define i32 @func_59(i32 %p_60) nounwind {
entry:
%l_108 = alloca i32 ; <i32*> [#uses=2]
- %tmp15 = load i32* null, align 4 ; <i32> [#uses=1]
- %tmp16 = load i32* %l_108, align 4 ; <i32> [#uses=1]
+ %tmp15 = load i32, i32* null, align 4 ; <i32> [#uses=1]
+ %tmp16 = load i32, i32* %l_108, align 4 ; <i32> [#uses=1]
%tmp17 = icmp eq i32 %tmp15, %tmp16 ; <i1> [#uses=1]
%tmp1718 = zext i1 %tmp17 to i8 ; <i8> [#uses=1]
- %tmp19 = load i32* null, align 4 ; <i32> [#uses=1]
- %tmp20 = load i32* %l_108, align 4 ; <i32> [#uses=1]
+ %tmp19 = load i32, i32* null, align 4 ; <i32> [#uses=1]
+ %tmp20 = load i32, i32* %l_108, align 4 ; <i32> [#uses=1]
%tmp21 = icmp ule i32 %tmp19, %tmp20 ; <i1> [#uses=1]
%tmp2122 = zext i1 %tmp21 to i8 ; <i8> [#uses=1]
%toBool23 = icmp ne i8 %tmp1718, 0 ; <i1> [#uses=1]
@@ -17,7 +17,7 @@ entry:
%tmp25 = and i1 %toBool23, %toBool24 ; <i1> [#uses=1]
%tmp2526 = zext i1 %tmp25 to i8 ; <i8> [#uses=1]
%tmp252627 = zext i8 %tmp2526 to i32 ; <i32> [#uses=1]
- %tmp29 = call i32 (...)* @func_15( i32 %tmp252627, i32 0 ) nounwind ; <i32> [#uses=0]
+ %tmp29 = call i32 (...) @func_15( i32 %tmp252627, i32 0 ) nounwind ; <i32> [#uses=0]
unreachable
}
diff --git a/test/CodeGen/X86/pr23273.ll b/test/CodeGen/X86/pr23273.ll
new file mode 100644
index 000000000000..2702eb820f2f
--- /dev/null
+++ b/test/CodeGen/X86/pr23273.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=i386-unknown-unknown -mcpu=generic -march=x86 -mattr=-sse2 -fast-isel < %s
+
+; Verify that the backend doesn't crash during fast-isel with an assertion
+; failure when selecting a int-to-double conversion. The fast selection routine
+; for SINT_TO_FP wrongly assumed that the target had at least SSE2.
+
+@a = common global i32 0, align 4
+
+define i32 @pr23273() {
+entry:
+ %0 = load i32, i32* @a, align 4
+ %conv = sitofp i32 %0 to double
+ %call = call i32 @fn1(double %conv)
+ ret i32 0
+}
+
+declare i32 @fn1(double) #1
diff --git a/test/CodeGen/X86/pr2656.ll b/test/CodeGen/X86/pr2656.ll
index 1122d2d57114..9a162d77ef48 100644
--- a/test/CodeGen/X86/pr2656.ll
+++ b/test/CodeGen/X86/pr2656.ll
@@ -11,15 +11,15 @@ target triple = "i686-apple-darwin9.4.0"
define void @foo(%struct.anon* byval %p) nounwind {
entry:
- %tmp = getelementptr %struct.anon* %p, i32 0, i32 0 ; <float*> [#uses=1]
- %tmp1 = load float* %tmp ; <float> [#uses=1]
- %tmp2 = getelementptr %struct.anon* %p, i32 0, i32 1 ; <float*> [#uses=1]
- %tmp3 = load float* %tmp2 ; <float> [#uses=1]
+ %tmp = getelementptr %struct.anon, %struct.anon* %p, i32 0, i32 0 ; <float*> [#uses=1]
+ %tmp1 = load float, float* %tmp ; <float> [#uses=1]
+ %tmp2 = getelementptr %struct.anon, %struct.anon* %p, i32 0, i32 1 ; <float*> [#uses=1]
+ %tmp3 = load float, float* %tmp2 ; <float> [#uses=1]
%neg = fsub float -0.000000e+00, %tmp1 ; <float> [#uses=1]
%conv = fpext float %neg to double ; <double> [#uses=1]
%neg4 = fsub float -0.000000e+00, %tmp3 ; <float> [#uses=1]
%conv5 = fpext float %neg4 to double ; <double> [#uses=1]
- %call = call i32 (...)* @printf( i8* getelementptr ([17 x i8]* @.str, i32 0, i32 0), double %conv, double %conv5 ) ; <i32> [#uses=0]
+ %call = call i32 (...) @printf( i8* getelementptr ([17 x i8], [17 x i8]* @.str, i32 0, i32 0), double %conv, double %conv5 ) ; <i32> [#uses=0]
ret void
}
diff --git a/test/CodeGen/X86/pr2849.ll b/test/CodeGen/X86/pr2849.ll
index 0fec4813e109..c3fd101ab6db 100644
--- a/test/CodeGen/X86/pr2849.ll
+++ b/test/CodeGen/X86/pr2849.ll
@@ -13,24 +13,24 @@ target triple = "x86_64-unknown-linux-gnu"
define void @obshow() {
entry:
- %tmp = load %struct.HashEntry** @hash_table_begin, align 8
+ %tmp = load %struct.HashEntry*, %struct.HashEntry** @hash_table_begin, align 8
br i1 false, label %xlygetvalue.exit, label %xlygetvalue.exit
xlygetvalue.exit:
%storemerge.in.i = phi %struct.NODE** [ null, %entry ], [ null, %entry ]
- %storemerge.i = load %struct.NODE** %storemerge.in.i
+ %storemerge.i = load %struct.NODE*, %struct.NODE** %storemerge.in.i
%tmp1 = ptrtoint %struct.NODE** %storemerge.in.i to i64
%tmp2 = lshr i64 %tmp1, 3
%tmp3 = and i64 %tmp2, 2147483647
- %tmp4 = getelementptr %struct.HashEntry* %tmp, i64 %tmp3, i32 0, i32 1
- %tmp7 = load i8** %tmp4, align 8
- %tmp8 = getelementptr %struct.NODE* %storemerge.i, i64 0, i32 2
+ %tmp4 = getelementptr %struct.HashEntry, %struct.HashEntry* %tmp, i64 %tmp3, i32 0, i32 1
+ %tmp7 = load i8*, i8** %tmp4, align 8
+ %tmp8 = getelementptr %struct.NODE, %struct.NODE* %storemerge.i, i64 0, i32 2
%tmp9 = bitcast %struct.anon* %tmp8 to %struct.NODE***
- %tmp11 = load %struct.NODE*** %tmp9, align 8
+ %tmp11 = load %struct.NODE**, %struct.NODE*** %tmp9, align 8
%tmp12 = ptrtoint %struct.NODE** %tmp11 to i64
%tmp13 = lshr i64 %tmp12, 3
%tmp14 = and i64 %tmp13, 2147483647
- %tmp15 = getelementptr %struct.HashEntry* %tmp, i64 %tmp14, i32 0, i32 1
+ %tmp15 = getelementptr %struct.HashEntry, %struct.HashEntry* %tmp, i64 %tmp14, i32 0, i32 1
call fastcc void @xlprint(i8** %tmp4, i8* %tmp7, i8** %tmp15)
ret void
}
diff --git a/test/CodeGen/X86/pr2924.ll b/test/CodeGen/X86/pr2924.ll
index b9e8dc1740d9..14e9fc49a2aa 100644
--- a/test/CodeGen/X86/pr2924.ll
+++ b/test/CodeGen/X86/pr2924.ll
@@ -7,18 +7,18 @@ target triple = "i686-pc-linux-gnu"
define x86_stdcallcc { i32, i8* } @_D3std6string7toupperFAaZAa({ i32, i8* } %s) {
entry_std.string.toupper:
- %tmp58 = load i32* null
+ %tmp58 = load i32, i32* null
%tmp59 = icmp eq i32 %tmp58, 0
- %r.val = load { i32, i8* }* null, align 8
+ %r.val = load { i32, i8* }, { i32, i8* }* null, align 8
%condtmp.0 = select i1 %tmp59, { i32, i8* } undef, { i32, i8* } %r.val
ret { i32, i8* } %condtmp.0
}
define { } @empty({ } %s) {
entry_std.string.toupper:
- %tmp58 = load i32* null
+ %tmp58 = load i32, i32* null
%tmp59 = icmp eq i32 %tmp58, 0
- %r.val = load { }* null, align 8
+ %r.val = load { }, { }* null, align 8
%condtmp.0 = select i1 %tmp59, { } undef, { } %r.val
ret { } %condtmp.0
}
diff --git a/test/CodeGen/X86/pr2982.ll b/test/CodeGen/X86/pr2982.ll
index 3f9a5953153b..b7902b8cc3a0 100644
--- a/test/CodeGen/X86/pr2982.ll
+++ b/test/CodeGen/X86/pr2982.ll
@@ -12,15 +12,15 @@ declare i32 @rshift_u_u(...)
define void @bar() nounwind {
entry:
- %0 = load i32* @g_279, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* @g_279, align 4 ; <i32> [#uses=1]
%1 = shl i32 %0, 1 ; <i32> [#uses=1]
%2 = and i32 %1, 2 ; <i32> [#uses=1]
- %3 = load i32* @g_265, align 4 ; <i32> [#uses=1]
- %4 = load i8* @g_3, align 1 ; <i8> [#uses=1]
+ %3 = load i32, i32* @g_265, align 4 ; <i32> [#uses=1]
+ %4 = load i8, i8* @g_3, align 1 ; <i8> [#uses=1]
%5 = sext i8 %4 to i32 ; <i32> [#uses=1]
%6 = add i32 %2, %3 ; <i32> [#uses=1]
%7 = add i32 %6, %5 ; <i32> [#uses=1]
- %8 = tail call i32 (...)* @rshift_u_u(i32 %7, i32 0) nounwind
+ %8 = tail call i32 (...) @rshift_u_u(i32 %7, i32 0) nounwind
; <i32> [#uses=0]
ret void
}
diff --git a/test/CodeGen/X86/pr3154.ll b/test/CodeGen/X86/pr3154.ll
index 18df97c72302..5da88403802e 100644
--- a/test/CodeGen/X86/pr3154.ll
+++ b/test/CodeGen/X86/pr3154.ll
@@ -8,7 +8,7 @@ entry:
%0 = add i32 %len, 2 ; <i32> [#uses=1]
%1 = add i32 %0, %lag ; <i32> [#uses=1]
%2 = alloca double, i32 %1 ; <double*> [#uses=2]
- %3 = getelementptr double* %2, i32 %lag ; <double*> [#uses=2]
+ %3 = getelementptr double, double* %2, i32 %lag ; <double*> [#uses=2]
%4 = ptrtoint double* %3 to i32 ; <i32> [#uses=1]
%5 = and i32 %4, 8 ; <i32> [#uses=1]
%6 = icmp eq i32 %5, 0 ; <i1> [#uses=1]
@@ -16,7 +16,7 @@ entry:
bb: ; preds = %entry
%.sum = add i32 %lag, 1 ; <i32> [#uses=1]
- %7 = getelementptr double* %2, i32 %.sum ; <double*> [#uses=1]
+ %7 = getelementptr double, double* %2, i32 %.sum ; <double*> [#uses=1]
br label %bb19
bb19: ; preds = %bb, %entry
@@ -32,8 +32,8 @@ bb19: ; preds = %bb, %entry
call void asm sideeffect "movsd $0, %xmm7 \0A\09movapd ff_pd_1, %xmm6 \0A\09movapd ff_pd_2, %xmm5 \0A\09movlhps %xmm7, %xmm7 \0A\09subpd %xmm5, %xmm7 \0A\09addsd %xmm6, %xmm7 \0A\09", "*m,~{dirflag},~{fpsr},~{flags}"(double* %c) nounwind
%15 = and i32 %len, 1 ; <i32> [#uses=1]
%toBool = icmp eq i32 %15, 0 ; <i1> [#uses=1]
- %16 = getelementptr double* %data15.0, i32 %11 ; <double*> [#uses=2]
- %17 = getelementptr i32* %data, i32 %11 ; <i32*> [#uses=2]
+ %16 = getelementptr double, double* %data15.0, i32 %11 ; <double*> [#uses=2]
+ %17 = getelementptr i32, i32* %data, i32 %11 ; <i32*> [#uses=2]
br i1 %toBool, label %bb22, label %bb20
bb20: ; preds = %bb19
@@ -51,14 +51,14 @@ bb28.preheader: ; preds = %bb22, %bb20
bb27: ; preds = %bb27, %bb28.preheader
%j4.042 = phi i32 [ 0, %bb28.preheader ], [ %indvar.next45, %bb27 ] ; <i32> [#uses=2]
%19 = sub i32 %j4.042, %lag ; <i32> [#uses=1]
- %20 = getelementptr double* %data15.0, i32 %19 ; <double*> [#uses=1]
+ %20 = getelementptr double, double* %data15.0, i32 %19 ; <double*> [#uses=1]
store double 0.000000e+00, double* %20, align 8
%indvar.next45 = add i32 %j4.042, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next45, %lag ; <i1> [#uses=1]
br i1 %exitcond, label %bb29, label %bb27
bb29: ; preds = %bb27, %bb28.preheader
- %21 = getelementptr double* %data15.0, i32 %len ; <double*> [#uses=3]
+ %21 = getelementptr double, double* %data15.0, i32 %len ; <double*> [#uses=3]
store double 0.000000e+00, double* %21, align 8
br i1 %18, label %bb.nph, label %bb37
@@ -77,18 +77,18 @@ bb30: ; preds = %bb35, %bb.nph
bb31: ; preds = %bb30
%26 = add i32 %j4.141, 2 ; <i32> [#uses=2]
%.sum38 = sub i32 %len, %j4.141 ; <i32> [#uses=1]
- %27 = getelementptr double* %data15.0, i32 %.sum38 ; <double*> [#uses=1]
- %28 = getelementptr double* %autoc, i32 %j4.141 ; <double*> [#uses=1]
- %29 = getelementptr double* %autoc, i32 %25 ; <double*> [#uses=1]
- %30 = getelementptr double* %autoc, i32 %26 ; <double*> [#uses=1]
+ %27 = getelementptr double, double* %data15.0, i32 %.sum38 ; <double*> [#uses=1]
+ %28 = getelementptr double, double* %autoc, i32 %j4.141 ; <double*> [#uses=1]
+ %29 = getelementptr double, double* %autoc, i32 %25 ; <double*> [#uses=1]
+ %30 = getelementptr double, double* %autoc, i32 %26 ; <double*> [#uses=1]
%asmtmp32 = call i32 asm sideeffect "movsd ff_pd_1, %xmm0 \0A\09movsd ff_pd_1, %xmm1 \0A\09movsd ff_pd_1, %xmm2 \0A\091: \0A\09movapd ($4,$0), %xmm3 \0A\09movupd -8($5,$0), %xmm4 \0A\09movapd ($5,$0), %xmm5 \0A\09mulpd %xmm3, %xmm4 \0A\09mulpd %xmm3, %xmm5 \0A\09mulpd -16($5,$0), %xmm3 \0A\09addpd %xmm4, %xmm1 \0A\09addpd %xmm5, %xmm0 \0A\09addpd %xmm3, %xmm2 \0A\09add $$16, $0 \0A\09jl 1b \0A\09movhlps %xmm0, %xmm3 \0A\09movhlps %xmm1, %xmm4 \0A\09movhlps %xmm2, %xmm5 \0A\09addsd %xmm3, %xmm0 \0A\09addsd %xmm4, %xmm1 \0A\09addsd %xmm5, %xmm2 \0A\09movsd %xmm0, $1 \0A\09movsd %xmm1, $2 \0A\09movsd %xmm2, $3 \0A\09", "=&r,=*m,=*m,=*m,r,r,0,~{dirflag},~{fpsr},~{flags}"(double* %28, double* %29, double* %30, double* %21, double* %27, i32 %22) nounwind ; <i32> [#uses=0]
br label %bb35
bb33: ; preds = %bb30
%.sum39 = sub i32 %len, %j4.141 ; <i32> [#uses=1]
- %31 = getelementptr double* %data15.0, i32 %.sum39 ; <double*> [#uses=1]
- %32 = getelementptr double* %autoc, i32 %j4.141 ; <double*> [#uses=1]
- %33 = getelementptr double* %autoc, i32 %25 ; <double*> [#uses=1]
+ %31 = getelementptr double, double* %data15.0, i32 %.sum39 ; <double*> [#uses=1]
+ %32 = getelementptr double, double* %autoc, i32 %j4.141 ; <double*> [#uses=1]
+ %33 = getelementptr double, double* %autoc, i32 %25 ; <double*> [#uses=1]
%asmtmp34 = call i32 asm sideeffect "movsd ff_pd_1, %xmm0 \0A\09movsd ff_pd_1, %xmm1 \0A\091: \0A\09movapd ($3,$0), %xmm3 \0A\09movupd -8($4,$0), %xmm4 \0A\09mulpd %xmm3, %xmm4 \0A\09mulpd ($4,$0), %xmm3 \0A\09addpd %xmm4, %xmm1 \0A\09addpd %xmm3, %xmm0 \0A\09add $$16, $0 \0A\09jl 1b \0A\09movhlps %xmm0, %xmm3 \0A\09movhlps %xmm1, %xmm4 \0A\09addsd %xmm3, %xmm0 \0A\09addsd %xmm4, %xmm1 \0A\09movsd %xmm0, $1 \0A\09movsd %xmm1, $2 \0A\09", "=&r,=*m,=*m,r,r,0,~{dirflag},~{fpsr},~{flags}"(double* %32, double* %33, double* %21, double* %31, i32 %22) nounwind ; <i32> [#uses=0]
%.pre = add i32 %j4.141, 2 ; <i32> [#uses=1]
br label %bb35
diff --git a/test/CodeGen/X86/pr3216.ll b/test/CodeGen/X86/pr3216.ll
index a4a48210d358..23dcf5693cd0 100644
--- a/test/CodeGen/X86/pr3216.ll
+++ b/test/CodeGen/X86/pr3216.ll
@@ -8,7 +8,7 @@ define i32 @main() nounwind {
; CHECK: sar{{.}} $5
; CHECK: ret
- %tmp = load i8* @foo
+ %tmp = load i8, i8* @foo
%bf.lo = lshr i8 %tmp, 5
%bf.lo.cleared = and i8 %bf.lo, 7
%1 = shl i8 %bf.lo.cleared, 5
diff --git a/test/CodeGen/X86/pr3241.ll b/test/CodeGen/X86/pr3241.ll
index 2f7917b77c39..f89634d5b82a 100644
--- a/test/CodeGen/X86/pr3241.ll
+++ b/test/CodeGen/X86/pr3241.ll
@@ -9,7 +9,7 @@ entry:
%t1 = call i32 @safe_add_macro_uint32_t_u_u() nounwind
%t2 = icmp sgt i32 %t1, 0
%t3 = zext i1 %t2 to i32
- %t4 = load i32* @g_620, align 4
+ %t4 = load i32, i32* @g_620, align 4
%t5 = icmp eq i32 %t3, %t4
%t6 = xor i32 %p_21, 1
%t7 = call i32 @func_55(i32 %t6) nounwind
diff --git a/test/CodeGen/X86/pr3244.ll b/test/CodeGen/X86/pr3244.ll
index 2598c2f976b2..c6419d8ce768 100644
--- a/test/CodeGen/X86/pr3244.ll
+++ b/test/CodeGen/X86/pr3244.ll
@@ -6,11 +6,11 @@
define i32 @func_42(i32 %p_43, i32 %p_44, i32 %p_45, i32 %p_46) nounwind {
entry:
- %0 = load i16* @g_62, align 2 ; <i16> [#uses=1]
- %1 = load i32* @g_487, align 4 ; <i32> [#uses=1]
+ %0 = load i16, i16* @g_62, align 2 ; <i16> [#uses=1]
+ %1 = load i32, i32* @g_487, align 4 ; <i32> [#uses=1]
%2 = trunc i16 %0 to i8 ; <i8> [#uses=1]
%3 = trunc i32 %1 to i8 ; <i8> [#uses=1]
- %4 = tail call i32 (...)* @func_7(i64 -4455561449541442965, i32 1)
+ %4 = tail call i32 (...) @func_7(i64 -4455561449541442965, i32 1)
nounwind ; <i32> [#uses=1]
%5 = trunc i32 %4 to i8 ; <i8> [#uses=1]
%6 = mul i8 %3, %2 ; <i8> [#uses=1]
diff --git a/test/CodeGen/X86/pr3250.ll b/test/CodeGen/X86/pr3250.ll
index cccbf54bcc6b..4ab989eaf77f 100644
--- a/test/CodeGen/X86/pr3250.ll
+++ b/test/CodeGen/X86/pr3250.ll
@@ -5,7 +5,7 @@ declare i32 @safe_sub_func_short_u_u(i16 signext, i16 signext) nounwind
define i32 @func_106(i32 %p_107) nounwind {
entry:
- %0 = tail call i32 (...)* @safe_div_(i32 %p_107, i32 1) nounwind
+ %0 = tail call i32 (...) @safe_div_(i32 %p_107, i32 1) nounwind
; <i32> [#uses=1]
%1 = lshr i32 %0, -9 ; <i32> [#uses=1]
%2 = trunc i32 %1 to i16 ; <i16> [#uses=1]
diff --git a/test/CodeGen/X86/pr3317.ll b/test/CodeGen/X86/pr3317.ll
index d83daf01d3b0..cab8ae6b73fd 100644
--- a/test/CodeGen/X86/pr3317.ll
+++ b/test/CodeGen/X86/pr3317.ll
@@ -20,8 +20,8 @@ declare void @jnjvmNullPointerException()
define i32 @JnJVM_java_rmi_activation_ActivationGroupID_hashCode__(%JavaObject* nocapture) nounwind {
start:
- %1 = getelementptr %JavaObject* %0, i64 1, i32 1 ; <%JavaCommonClass**> [#uses=1]
- %2 = load %JavaCommonClass** %1 ; <%JavaCommonClass*> [#uses=4]
+ %1 = getelementptr %JavaObject, %JavaObject* %0, i64 1, i32 1 ; <%JavaCommonClass**> [#uses=1]
+ %2 = load %JavaCommonClass*, %JavaCommonClass** %1 ; <%JavaCommonClass*> [#uses=4]
%3 = icmp eq %JavaCommonClass* %2, null ; <i1> [#uses=1]
br i1 %3, label %verifyNullExit1, label %verifyNullCont2
@@ -31,14 +31,14 @@ verifyNullExit1: ; preds = %start
verifyNullCont2: ; preds = %start
%4 = bitcast %JavaCommonClass* %2 to { %JavaObject, i16, i32, i64 }* ; <{ %JavaObject, i16, i32, i64 }*> [#uses=1]
- %5 = getelementptr { %JavaObject, i16, i32, i64 }* %4, i64 0, i32 2 ; <i32*> [#uses=1]
- %6 = load i32* %5 ; <i32> [#uses=1]
- %7 = getelementptr %JavaCommonClass* %2, i64 0, i32 4 ; <%JavaClass***> [#uses=1]
+ %5 = getelementptr { %JavaObject, i16, i32, i64 }, { %JavaObject, i16, i32, i64 }* %4, i64 0, i32 2 ; <i32*> [#uses=1]
+ %6 = load i32, i32* %5 ; <i32> [#uses=1]
+ %7 = getelementptr %JavaCommonClass, %JavaCommonClass* %2, i64 0, i32 4 ; <%JavaClass***> [#uses=1]
%8 = bitcast %JavaClass*** %7 to i64* ; <i64*> [#uses=1]
- %9 = load i64* %8 ; <i64> [#uses=1]
+ %9 = load i64, i64* %8 ; <i64> [#uses=1]
%10 = trunc i64 %9 to i32 ; <i32> [#uses=1]
- %11 = getelementptr %JavaCommonClass* %2, i64 0, i32 3 ; <i16*> [#uses=1]
- %12 = load i16* %11 ; <i16> [#uses=1]
+ %11 = getelementptr %JavaCommonClass, %JavaCommonClass* %2, i64 0, i32 3 ; <i16*> [#uses=1]
+ %12 = load i16, i16* %11 ; <i16> [#uses=1]
%13 = sext i16 %12 to i32 ; <i32> [#uses=1]
%14 = xor i32 %10, %6 ; <i32> [#uses=1]
%15 = xor i32 %14, %13 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/pr3366.ll b/test/CodeGen/X86/pr3366.ll
index 1127b6093215..b89a69ab7d41 100644
--- a/test/CodeGen/X86/pr3366.ll
+++ b/test/CodeGen/X86/pr3366.ll
@@ -3,7 +3,7 @@
define void @_ada_c34002a() nounwind {
entry:
- %0 = load i8* null, align 1
+ %0 = load i8, i8* null, align 1
%1 = sdiv i8 90, %0
%2 = icmp ne i8 %1, 3
%3 = zext i1 %2 to i8
diff --git a/test/CodeGen/X86/pr3457.ll b/test/CodeGen/X86/pr3457.ll
index 7264bcd12c42..d4c0020cb856 100644
--- a/test/CodeGen/X86/pr3457.ll
+++ b/test/CodeGen/X86/pr3457.ll
@@ -4,8 +4,8 @@
define void @foo(double* nocapture %P) nounwind {
entry:
- %0 = tail call double (...)* @test() nounwind ; <double> [#uses=2]
- %1 = tail call double (...)* @test() nounwind ; <double> [#uses=2]
+ %0 = tail call double (...) @test() nounwind ; <double> [#uses=2]
+ %1 = tail call double (...) @test() nounwind ; <double> [#uses=2]
%2 = fmul double %0, %0 ; <double> [#uses=1]
%3 = fmul double %1, %1 ; <double> [#uses=1]
%4 = fadd double %2, %3 ; <double> [#uses=1]
diff --git a/test/CodeGen/X86/pr3522.ll b/test/CodeGen/X86/pr3522.ll
index 9f8dc0370668..867f2828d4d9 100644
--- a/test/CodeGen/X86/pr3522.ll
+++ b/test/CodeGen/X86/pr3522.ll
@@ -9,7 +9,7 @@ define void @_ada_c34018a() {
entry:
%0 = tail call i32 @report__ident_int(i32 90) ; <i32> [#uses=1]
%1 = trunc i32 %0 to i8 ; <i8> [#uses=1]
- invoke void @__gnat_rcheck_12(i8* getelementptr ([13 x i8]* @.str, i32 0, i32 0), i32 32) noreturn
+ invoke void @__gnat_rcheck_12(i8* getelementptr ([13 x i8], [13 x i8]* @.str, i32 0, i32 0), i32 32) noreturn
to label %invcont unwind label %lpad
invcont: ; preds = %entry
diff --git a/test/CodeGen/X86/pr5145.ll b/test/CodeGen/X86/pr5145.ll
index 32a797ba138a..4dee5f8d7d2a 100644
--- a/test/CodeGen/X86/pr5145.ll
+++ b/test/CodeGen/X86/pr5145.ll
@@ -7,29 +7,25 @@ define void @atomic_maxmin_i8() {
; CHECK: [[LABEL1:\.?LBB[0-9]+_[0-9]+]]:
; CHECK: movsbl
; CHECK: cmpl
-; CHECK: lock
-; CHECK-NEXT: cmpxchgb
+; CHECK: lock cmpxchgb
; CHECK: jne [[LABEL1]]
%2 = atomicrmw min i8* @sc8, i8 6 acquire
; CHECK: [[LABEL3:\.?LBB[0-9]+_[0-9]+]]:
; CHECK: movsbl
; CHECK: cmpl
-; CHECK: lock
-; CHECK-NEXT: cmpxchgb
+; CHECK: lock cmpxchgb
; CHECK: jne [[LABEL3]]
%3 = atomicrmw umax i8* @sc8, i8 7 acquire
; CHECK: [[LABEL5:\.?LBB[0-9]+_[0-9]+]]:
; CHECK: movzbl
; CHECK: cmpl
-; CHECK: lock
-; CHECK-NEXT: cmpxchgb
+; CHECK: lock cmpxchgb
; CHECK: jne [[LABEL5]]
%4 = atomicrmw umin i8* @sc8, i8 8 acquire
; CHECK: [[LABEL7:\.?LBB[0-9]+_[0-9]+]]:
; CHECK: movzbl
; CHECK: cmpl
-; CHECK: lock
-; CHECK-NEXT: cmpxchgb
+; CHECK: lock cmpxchgb
; CHECK: jne [[LABEL7]]
ret void
}
diff --git a/test/CodeGen/X86/pr9127.ll b/test/CodeGen/X86/pr9127.ll
index ba92c77e22bd..33f9ace33ff5 100644
--- a/test/CodeGen/X86/pr9127.ll
+++ b/test/CodeGen/X86/pr9127.ll
@@ -3,7 +3,7 @@
define i8 @foobar(double %d, double* %x) {
entry:
- %tmp2 = load double* %x, align 8
+ %tmp2 = load double, double* %x, align 8
%cmp = fcmp oeq double %tmp2, %d
%conv3 = zext i1 %cmp to i8
ret i8 %conv3
diff --git a/test/CodeGen/X86/pre-ra-sched.ll b/test/CodeGen/X86/pre-ra-sched.ll
index bb4c1269b7cf..f8e196ba5808 100644
--- a/test/CodeGen/X86/pre-ra-sched.ll
+++ b/test/CodeGen/X86/pre-ra-sched.ll
@@ -14,33 +14,33 @@
; CHECK-NOT: Repushing
; CHECK: *** Final schedule
define i32 @test(i8* %pin) #0 {
- %g0 = getelementptr inbounds i8* %pin, i64 0
- %l0 = load i8* %g0, align 1
+ %g0 = getelementptr inbounds i8, i8* %pin, i64 0
+ %l0 = load i8, i8* %g0, align 1
- %g1a = getelementptr inbounds i8* %pin, i64 1
- %l1a = load i8* %g1a, align 1
+ %g1a = getelementptr inbounds i8, i8* %pin, i64 1
+ %l1a = load i8, i8* %g1a, align 1
%z1a = zext i8 %l1a to i32
- %g1b = getelementptr inbounds i8* %pin, i64 2
- %l1b = load i8* %g1b, align 1
+ %g1b = getelementptr inbounds i8, i8* %pin, i64 2
+ %l1b = load i8, i8* %g1b, align 1
%z1b = zext i8 %l1b to i32
%c1 = icmp ne i8 %l0, 0
%x1 = xor i32 %z1a, %z1b
%s1 = select i1 %c1, i32 %z1a, i32 %x1
- %g2a = getelementptr inbounds i8* %pin, i64 3
- %l2a = load i8* %g2a, align 1
+ %g2a = getelementptr inbounds i8, i8* %pin, i64 3
+ %l2a = load i8, i8* %g2a, align 1
%z2a = zext i8 %l2a to i32
- %g2b = getelementptr inbounds i8* %pin, i64 4
- %l2b = load i8* %g2b, align 1
+ %g2b = getelementptr inbounds i8, i8* %pin, i64 4
+ %l2b = load i8, i8* %g2b, align 1
%z2b = zext i8 %l2b to i32
%x2 = xor i32 %z2a, %z2b
%s2 = select i1 %c1, i32 %z2a, i32 %x2
- %g3a = getelementptr inbounds i8* %pin, i64 5
- %l3a = load i8* %g3a, align 1
+ %g3a = getelementptr inbounds i8, i8* %pin, i64 5
+ %l3a = load i8, i8* %g3a, align 1
%z3a = zext i8 %l3a to i32
- %g3b = getelementptr inbounds i8* %pin, i64 6
- %l3b = load i8* %g3b, align 1
+ %g3b = getelementptr inbounds i8, i8* %pin, i64 6
+ %l3b = load i8, i8* %g3b, align 1
%z3b = zext i8 %l3b to i32
%x3 = xor i32 %z3a, %z3b
%s3 = select i1 %c1, i32 %z3a, i32 %x3
diff --git a/test/CodeGen/X86/private-2.ll b/test/CodeGen/X86/private-2.ll
index cf2d74119374..21b6b3aff2d2 100644
--- a/test/CodeGen/X86/private-2.ll
+++ b/test/CodeGen/X86/private-2.ll
@@ -9,7 +9,7 @@
define internal i32* @"\01-[Example1 whatever]"() nounwind optsize ssp {
entry:
- %0 = getelementptr %struct.A* @"_ZZ20-[Example1 whatever]E4C.91", i64 0, i32 0 ; <i32**> [#uses=1]
- %1 = load i32** %0, align 8 ; <i32*> [#uses=1]
+ %0 = getelementptr %struct.A, %struct.A* @"_ZZ20-[Example1 whatever]E4C.91", i64 0, i32 0 ; <i32**> [#uses=1]
+ %1 = load i32*, i32** %0, align 8 ; <i32*> [#uses=1]
ret i32* %1
}
diff --git a/test/CodeGen/X86/private.ll b/test/CodeGen/X86/private.ll
index c02d19319a49..4b936d2323ec 100644
--- a/test/CodeGen/X86/private.ll
+++ b/test/CodeGen/X86/private.ll
@@ -10,7 +10,7 @@ define private void @foo() {
define i32 @bar() {
call void @foo()
- %1 = load i32* @baz, align 4
+ %1 = load i32, i32* @baz, align 4
ret i32 %1
; CHECK-LABEL: bar:
diff --git a/test/CodeGen/X86/promote-assert-zext.ll b/test/CodeGen/X86/promote-assert-zext.ll
index b582806c96a4..506748312052 100644
--- a/test/CodeGen/X86/promote-assert-zext.ll
+++ b/test/CodeGen/X86/promote-assert-zext.ll
@@ -11,7 +11,7 @@ target triple = "x86_64-apple-darwin11"
define i64 @_ZL5matchPKtPKhiR9MatchData(i8* %tmp13) nounwind {
entry:
- %tmp14 = load i8* %tmp13, align 1
+ %tmp14 = load i8, i8* %tmp13, align 1
%tmp17 = zext i8 %tmp14 to i16
br label %bb341
diff --git a/test/CodeGen/X86/promote-trunc.ll b/test/CodeGen/X86/promote-trunc.ll
index 40a58b073924..a20557a1fef2 100644
--- a/test/CodeGen/X86/promote-trunc.ll
+++ b/test/CodeGen/X86/promote-trunc.ll
@@ -1,9 +1,9 @@
; RUN: llc < %s -march=x86-64
define<4 x i8> @func_8_64() {
- %F = load <4 x i64>* undef
+ %F = load <4 x i64>, <4 x i64>* undef
%G = trunc <4 x i64> %F to <4 x i8>
- %H = load <4 x i64>* undef
+ %H = load <4 x i64>, <4 x i64>* undef
%Y = trunc <4 x i64> %H to <4 x i8>
%T = add <4 x i8> %Y, %G
ret <4 x i8> %T
diff --git a/test/CodeGen/X86/promote.ll b/test/CodeGen/X86/promote.ll
index 283f48cd37b4..38cdc14b380f 100644
--- a/test/CodeGen/X86/promote.ll
+++ b/test/CodeGen/X86/promote.ll
@@ -9,7 +9,7 @@ define i32 @mul_f(<4 x i8>* %A) {
entry:
; CHECK: pmul
; CHECK-NOT: mulb
- %0 = load <4 x i8>* %A, align 8
+ %0 = load <4 x i8>, <4 x i8>* %A, align 8
%mul = mul <4 x i8> %0, %0
store <4 x i8> %mul, <4 x i8>* undef
ret i32 0
@@ -23,7 +23,7 @@ entry:
; CHECK: pmovzxbd
; CHECK: paddd
; CHECK: pshufb
- %0 = load <4 x i8>* %A, align 8
+ %0 = load <4 x i8>, <4 x i8>* %A, align 8
%add = add <4 x i8> %0, %0
store <4 x i8> %add, <4 x i8>* undef
ret i32 0
diff --git a/test/CodeGen/X86/pshufb-mask-comments.ll b/test/CodeGen/X86/pshufb-mask-comments.ll
index ca5a02ce8d3a..105a035be592 100644
--- a/test/CodeGen/X86/pshufb-mask-comments.ll
+++ b/test/CodeGen/X86/pshufb-mask-comments.ll
@@ -41,10 +41,10 @@ define <16 x i8> @test5() {
; CHECK-LABEL: test5
; CHECK: pshufb {{.*}}
store <2 x i64> <i64 1, i64 0>, <2 x i64>* undef, align 16
- %l = load <2 x i64>* undef, align 16
+ %l = load <2 x i64>, <2 x i64>* undef, align 16
%shuffle = shufflevector <2 x i64> %l, <2 x i64> undef, <2 x i32> zeroinitializer
store <2 x i64> %shuffle, <2 x i64>* undef, align 16
- %1 = load <16 x i8>* undef, align 16
+ %1 = load <16 x i8>, <16 x i8>* undef, align 16
%2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> undef, <16 x i8> %1)
ret <16 x i8> %2
}
diff --git a/test/CodeGen/X86/psubus.ll b/test/CodeGen/X86/psubus.ll
index aff4afbd2e35..4b83b55997e2 100644
--- a/test/CodeGen/X86/psubus.ll
+++ b/test/CodeGen/X86/psubus.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE2
+; RUN: llc -mcpu=core2 < %s | FileCheck %s -check-prefix=SSSE3
; RUN: llc -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
; RUN: llc -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
@@ -7,334 +7,344 @@ target triple = "x86_64-apple-macosx10.8.0"
define void @test1(i16* nocapture %head) nounwind {
vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds i16* %head, i64 %index
+ %0 = getelementptr inbounds i16, i16* %head, i64 0
%1 = bitcast i16* %0 to <8 x i16>*
- %2 = load <8 x i16>* %1, align 2
+ %2 = load <8 x i16>, <8 x i16>* %1, align 2
%3 = icmp slt <8 x i16> %2, zeroinitializer
%4 = xor <8 x i16> %2, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
%5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer
store <8 x i16> %5, <8 x i16>* %1, align 2
- %index.next = add i64 %index, 8
- %6 = icmp eq i64 %index.next, 16384
- br i1 %6, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
ret void
-; SSE2: @test1
-; SSE2: psubusw LCPI0_0(%rip), %xmm0
+; SSSE3: @test1
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movdqu (%rdi), %xmm0
+; SSSE3-NEXT: psubusw LCPI0_0(%rip), %xmm0
+; SSSE3-NEXT: movdqu %xmm0, (%rdi)
+; SSSE3-NEXT: retq
; AVX1: @test1
-; AVX1: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovdqu (%rdi), %xmm0
+; AVX1-NEXT: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX1-NEXT: retq
; AVX2: @test1
-; AVX2: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqu (%rdi), %xmm0
+; AVX2-NEXT: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX2-NEXT: retq
}
define void @test2(i16* nocapture %head) nounwind {
vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds i16* %head, i64 %index
+ %0 = getelementptr inbounds i16, i16* %head, i64 0
%1 = bitcast i16* %0 to <8 x i16>*
- %2 = load <8 x i16>* %1, align 2
+ %2 = load <8 x i16>, <8 x i16>* %1, align 2
%3 = icmp ugt <8 x i16> %2, <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>
%4 = add <8 x i16> %2, <i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767>
%5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer
store <8 x i16> %5, <8 x i16>* %1, align 2
- %index.next = add i64 %index, 8
- %6 = icmp eq i64 %index.next, 16384
- br i1 %6, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
ret void
-; SSE2: @test2
-; SSE2: psubusw LCPI1_0(%rip), %xmm0
+; SSSE3: @test2
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movdqu (%rdi), %xmm0
+; SSSE3-NEXT: psubusw LCPI1_0(%rip), %xmm0
+; SSSE3-NEXT: movdqu %xmm0, (%rdi)
+; SSSE3-NEXT: retq
; AVX1: @test2
-; AVX1: vpsubusw LCPI1_0(%rip), %xmm0, %xmm0
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovdqu (%rdi), %xmm0
+; AVX1-NEXT: vpsubusw LCPI1_0(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX1-NEXT: retq
; AVX2: @test2
-; AVX2: vpsubusw LCPI1_0(%rip), %xmm0, %xmm0
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqu (%rdi), %xmm0
+; AVX2-NEXT: vpsubusw LCPI1_0(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX2-NEXT: retq
}
define void @test3(i16* nocapture %head, i16 zeroext %w) nounwind {
vector.ph:
%0 = insertelement <8 x i16> undef, i16 %w, i32 0
%broadcast15 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %1 = getelementptr inbounds i16* %head, i64 %index
+ %1 = getelementptr inbounds i16, i16* %head, i64 0
%2 = bitcast i16* %1 to <8 x i16>*
- %3 = load <8 x i16>* %2, align 2
+ %3 = load <8 x i16>, <8 x i16>* %2, align 2
%4 = icmp ult <8 x i16> %3, %broadcast15
%5 = sub <8 x i16> %3, %broadcast15
%6 = select <8 x i1> %4, <8 x i16> zeroinitializer, <8 x i16> %5
store <8 x i16> %6, <8 x i16>* %2, align 2
- %index.next = add i64 %index, 8
- %7 = icmp eq i64 %index.next, 16384
- br i1 %7, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
ret void
-; SSE2: @test3
-; SSE2: psubusw %xmm0, %xmm1
+; SSSE3: @test3
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd %esi, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSSE3-NEXT: movdqu (%rdi), %xmm1
+; SSSE3-NEXT: psubusw %xmm0, %xmm1
+; SSSE3-NEXT: movdqu %xmm1, (%rdi)
+; SSSE3-NEXT: retq
; AVX1: @test3
-; AVX1: vpsubusw %xmm0, %xmm1, %xmm1
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovd %esi, %xmm0
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT: vmovdqu (%rdi), %xmm1
+; AVX1-NEXT: vpsubusw %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX1-NEXT: retq
; AVX2: @test3
-; AVX2: vpsubusw %xmm0, %xmm1, %xmm1
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovd %esi, %xmm0
+; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
+; AVX2-NEXT: vmovdqu (%rdi), %xmm1
+; AVX2-NEXT: vpsubusw %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX2-NEXT: retq
}
define void @test4(i8* nocapture %head) nounwind {
vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds i8* %head, i64 %index
+ %0 = getelementptr inbounds i8, i8* %head, i64 0
%1 = bitcast i8* %0 to <16 x i8>*
- %2 = load <16 x i8>* %1, align 1
+ %2 = load <16 x i8>, <16 x i8>* %1, align 1
%3 = icmp slt <16 x i8> %2, zeroinitializer
%4 = xor <16 x i8> %2, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
%5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer
store <16 x i8> %5, <16 x i8>* %1, align 1
- %index.next = add i64 %index, 16
- %6 = icmp eq i64 %index.next, 16384
- br i1 %6, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
ret void
-; SSE2: @test4
-; SSE2: psubusb LCPI3_0(%rip), %xmm0
+; SSSE3: @test4
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movdqu (%rdi), %xmm0
+; SSSE3-NEXT: psubusb LCPI3_0(%rip), %xmm0
+; SSSE3-NEXT: movdqu %xmm0, (%rdi)
+; SSSE3-NEXT: retq
; AVX1: @test4
-; AVX1: vpsubusb LCPI3_0(%rip), %xmm0, %xmm0
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovdqu (%rdi), %xmm0
+; AVX1-NEXT: vpsubusb LCPI3_0(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX1-NEXT: retq
; AVX2: @test4
-; AVX2: vpsubusb LCPI3_0(%rip), %xmm0, %xmm0
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqu (%rdi), %xmm0
+; AVX2-NEXT: vpsubusb LCPI3_0(%rip), %xmm0, %xmm0
+; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX2-NEXT: retq
}
define void @test5(i8* nocapture %head) nounwind {
vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds i8* %head, i64 %index
+ %0 = getelementptr inbounds i8, i8* %head, i64 0
%1 = bitcast i8* %0 to <16 x i8>*
- %2 = load <16 x i8>* %1, align 1
+ %2 = load <16 x i8>, <16 x i8>* %1, align 1
%3 = icmp ugt <16 x i8> %2, <i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126>
%4 = add <16 x i8> %2, <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
%5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer
store <16 x i8> %5, <16 x i8>* %1, align 1
- %index.next = add i64 %index, 16
- %6 = icmp eq i64 %index.next, 16384
- br i1 %6, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
ret void
-; SSE2: @test5
-; SSE2: psubusb LCPI4_0(%rip), %xmm0
+; SSSE3: @test5
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movdqu (%rdi), %xmm0
+; SSSE3-NEXT: psubusb LCPI4_0(%rip), %xmm0
+; SSSE3-NEXT: movdqu %xmm0, (%rdi)
+; SSSE3-NEXT: retq
; AVX1: @test5
-; AVX1: vpsubusb LCPI4_0(%rip), %xmm0, %xmm0
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovdqu (%rdi), %xmm0
+; AVX1-NEXT: vpsubusb LCPI4_0(%rip), %xmm0
+; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX1-NEXT: retq
; AVX2: @test5
-; AVX2: vpsubusb LCPI4_0(%rip), %xmm0, %xmm0
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqu (%rdi), %xmm0
+; AVX2-NEXT: vpsubusb LCPI4_0(%rip), %xmm0
+; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX2-NEXT: retq
}
define void @test6(i8* nocapture %head, i8 zeroext %w) nounwind {
vector.ph:
%0 = insertelement <16 x i8> undef, i8 %w, i32 0
%broadcast15 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %1 = getelementptr inbounds i8* %head, i64 %index
+ %1 = getelementptr inbounds i8, i8* %head, i64 0
%2 = bitcast i8* %1 to <16 x i8>*
- %3 = load <16 x i8>* %2, align 1
+ %3 = load <16 x i8>, <16 x i8>* %2, align 1
%4 = icmp ult <16 x i8> %3, %broadcast15
%5 = sub <16 x i8> %3, %broadcast15
%6 = select <16 x i1> %4, <16 x i8> zeroinitializer, <16 x i8> %5
store <16 x i8> %6, <16 x i8>* %2, align 1
- %index.next = add i64 %index, 16
- %7 = icmp eq i64 %index.next, 16384
- br i1 %7, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
ret void
-; SSE2: @test6
-; SSE2: psubusb %xmm0, %xmm1
+; SSSE3: @test6
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movd %esi, %xmm0
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pshufb %xmm1, %xmm0
+; SSSE3-NEXT: movdqu (%rdi), %xmm1
+; SSSE3-NEXT: psubusb %xmm0, %xmm1
+; SSSE3-NEXT: movdqu %xmm1, (%rdi)
+; SSSE3-NEXT: retq
; AVX1: @test6
-; AVX1: vpsubusb %xmm0, %xmm1, %xmm1
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovd %esi, %xmm0
+; AVX1-NEXT: vpxor %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm1, %xmm0
+; AVX1-NEXT: vmovdqu (%rdi), %xmm1
+; AVX1-NEXT: vpsubusb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX1-NEXT: retq
; AVX2: @test6
-; AVX2: vpsubusb %xmm0, %xmm1, %xmm1
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovd %esi, %xmm0
+; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
+; AVX2-NEXT: vmovdqu (%rdi), %xmm1
+; AVX2-NEXT: vpsubusb %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
+; AVX2-NEXT: retq
}
define void @test7(i16* nocapture %head) nounwind {
vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds i16* %head, i64 %index
+ %0 = getelementptr inbounds i16, i16* %head, i64 0
%1 = bitcast i16* %0 to <16 x i16>*
- %2 = load <16 x i16>* %1, align 2
+ %2 = load <16 x i16>, <16 x i16>* %1, align 2
%3 = icmp slt <16 x i16> %2, zeroinitializer
%4 = xor <16 x i16> %2, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
%5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> zeroinitializer
store <16 x i16> %5, <16 x i16>* %1, align 2
- %index.next = add i64 %index, 8
- %6 = icmp eq i64 %index.next, 16384
- br i1 %6, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
ret void
; AVX2: @test7
-; AVX2: vpsubusw LCPI6_0(%rip), %ymm0, %ymm0
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; AVX2-NEXT: vpsubusw LCPI6_0(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
}
define void @test8(i16* nocapture %head) nounwind {
vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds i16* %head, i64 %index
+ %0 = getelementptr inbounds i16, i16* %head, i64 0
%1 = bitcast i16* %0 to <16 x i16>*
- %2 = load <16 x i16>* %1, align 2
+ %2 = load <16 x i16>, <16 x i16>* %1, align 2
%3 = icmp ugt <16 x i16> %2, <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>
%4 = add <16 x i16> %2, <i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767>
%5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> zeroinitializer
store <16 x i16> %5, <16 x i16>* %1, align 2
- %index.next = add i64 %index, 8
- %6 = icmp eq i64 %index.next, 16384
- br i1 %6, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
ret void
; AVX2: @test8
-; AVX2: vpsubusw LCPI7_0(%rip), %ymm0, %ymm0
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; AVX2-NEXT: vpsubusw LCPI7_0(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
}
define void @test9(i16* nocapture %head, i16 zeroext %w) nounwind {
vector.ph:
%0 = insertelement <16 x i16> undef, i16 %w, i32 0
%broadcast15 = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> zeroinitializer
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %1 = getelementptr inbounds i16* %head, i64 %index
+ %1 = getelementptr inbounds i16, i16* %head, i64 0
%2 = bitcast i16* %1 to <16 x i16>*
- %3 = load <16 x i16>* %2, align 2
+ %3 = load <16 x i16>, <16 x i16>* %2, align 2
%4 = icmp ult <16 x i16> %3, %broadcast15
%5 = sub <16 x i16> %3, %broadcast15
%6 = select <16 x i1> %4, <16 x i16> zeroinitializer, <16 x i16> %5
store <16 x i16> %6, <16 x i16>* %2, align 2
- %index.next = add i64 %index, 8
- %7 = icmp eq i64 %index.next, 16384
- br i1 %7, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
ret void
-
; AVX2: @test9
-; AVX2: vpsubusw %ymm0, %ymm1, %ymm1
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovd %esi, %xmm0
+; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
+; AVX2-NEXT: vmovdqu (%rdi), %ymm1
+; AVX2-NEXT: vpsubusw %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
}
define void @test10(i8* nocapture %head) nounwind {
vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds i8* %head, i64 %index
+ %0 = getelementptr inbounds i8, i8* %head, i64 0
%1 = bitcast i8* %0 to <32 x i8>*
- %2 = load <32 x i8>* %1, align 1
+ %2 = load <32 x i8>, <32 x i8>* %1, align 1
%3 = icmp slt <32 x i8> %2, zeroinitializer
%4 = xor <32 x i8> %2, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
%5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> zeroinitializer
store <32 x i8> %5, <32 x i8>* %1, align 1
- %index.next = add i64 %index, 16
- %6 = icmp eq i64 %index.next, 16384
- br i1 %6, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
ret void
-
; AVX2: @test10
-; AVX2: vpsubusb LCPI9_0(%rip), %ymm0, %ymm0
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; AVX2-NEXT: vpsubusb LCPI9_0(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
}
define void @test11(i8* nocapture %head) nounwind {
vector.ph:
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds i8* %head, i64 %index
+ %0 = getelementptr inbounds i8, i8* %head, i64 0
%1 = bitcast i8* %0 to <32 x i8>*
- %2 = load <32 x i8>* %1, align 1
+ %2 = load <32 x i8>, <32 x i8>* %1, align 1
%3 = icmp ugt <32 x i8> %2, <i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126>
%4 = add <32 x i8> %2, <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
%5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> zeroinitializer
store <32 x i8> %5, <32 x i8>* %1, align 1
- %index.next = add i64 %index, 16
- %6 = icmp eq i64 %index.next, 16384
- br i1 %6, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
ret void
; AVX2: @test11
-; AVX2: vpsubusb LCPI10_0(%rip), %ymm0, %ymm0
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; AVX2-NEXT: vpsubusb LCPI10_0(%rip), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
}
define void @test12(i8* nocapture %head, i8 zeroext %w) nounwind {
vector.ph:
%0 = insertelement <32 x i8> undef, i8 %w, i32 0
%broadcast15 = shufflevector <32 x i8> %0, <32 x i8> undef, <32 x i32> zeroinitializer
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %1 = getelementptr inbounds i8* %head, i64 %index
+ %1 = getelementptr inbounds i8, i8* %head, i64 0
%2 = bitcast i8* %1 to <32 x i8>*
- %3 = load <32 x i8>* %2, align 1
+ %3 = load <32 x i8>, <32 x i8>* %2, align 1
%4 = icmp ult <32 x i8> %3, %broadcast15
%5 = sub <32 x i8> %3, %broadcast15
%6 = select <32 x i1> %4, <32 x i8> zeroinitializer, <32 x i8> %5
store <32 x i8> %6, <32 x i8>* %2, align 1
- %index.next = add i64 %index, 16
- %7 = icmp eq i64 %index.next, 16384
- br i1 %7, label %for.end, label %vector.body
-
-for.end: ; preds = %vector.body
ret void
; AVX2: @test12
-; AVX2: vpsubusb %ymm0, %ymm1, %ymm1
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovd %esi, %xmm0
+; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT: vmovdqu (%rdi), %ymm1
+; AVX2-NEXT: vpsubusb %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
}
diff --git a/test/CodeGen/X86/ptrtoint-constexpr.ll b/test/CodeGen/X86/ptrtoint-constexpr.ll
index d1cb34bec8a0..095201572bf5 100644
--- a/test/CodeGen/X86/ptrtoint-constexpr.ll
+++ b/test/CodeGen/X86/ptrtoint-constexpr.ll
@@ -11,4 +11,4 @@
; CHECK: x:
; CHECK: .quad ((0+1)&4294967295)*3
-@x = global i64 mul (i64 3, i64 ptrtoint (i2* getelementptr (i2* null, i64 1) to i64))
+@x = global i64 mul (i64 3, i64 ptrtoint (i2* getelementptr (i2, i2* null, i64 1) to i64))
diff --git a/test/CodeGen/X86/ragreedy-bug.ll b/test/CodeGen/X86/ragreedy-bug.ll
index 83ac274bba19..e8426317f13d 100644
--- a/test/CodeGen/X86/ragreedy-bug.ll
+++ b/test/CodeGen/X86/ragreedy-bug.ll
@@ -30,31 +30,31 @@ declare i32 @__maskrune(i32, i64) #7
define fastcc i32 @prune_match(%struct.Connector_struct* nocapture readonly %a, %struct.Connector_struct* nocapture readonly %b) #9 {
entry:
%label56 = bitcast %struct.Connector_struct* %a to i16*
- %0 = load i16* %label56, align 2
+ %0 = load i16, i16* %label56, align 2
%label157 = bitcast %struct.Connector_struct* %b to i16*
- %1 = load i16* %label157, align 2
+ %1 = load i16, i16* %label157, align 2
%cmp = icmp eq i16 %0, %1
br i1 %cmp, label %if.end, label %return, !prof !988
if.end:
- %priority = getelementptr inbounds %struct.Connector_struct* %a, i64 0, i32 2
- %2 = load i8* %priority, align 1
- %priority5 = getelementptr inbounds %struct.Connector_struct* %b, i64 0, i32 2
- %3 = load i8* %priority5, align 1
- %string = getelementptr inbounds %struct.Connector_struct* %a, i64 0, i32 5
- %4 = load i8** %string, align 8
- %string7 = getelementptr inbounds %struct.Connector_struct* %b, i64 0, i32 5
- %5 = load i8** %string7, align 8
+ %priority = getelementptr inbounds %struct.Connector_struct, %struct.Connector_struct* %a, i64 0, i32 2
+ %2 = load i8, i8* %priority, align 1
+ %priority5 = getelementptr inbounds %struct.Connector_struct, %struct.Connector_struct* %b, i64 0, i32 2
+ %3 = load i8, i8* %priority5, align 1
+ %string = getelementptr inbounds %struct.Connector_struct, %struct.Connector_struct* %a, i64 0, i32 5
+ %4 = load i8*, i8** %string, align 8
+ %string7 = getelementptr inbounds %struct.Connector_struct, %struct.Connector_struct* %b, i64 0, i32 5
+ %5 = load i8*, i8** %string7, align 8
br label %while.cond
while.cond:
%lsr.iv27 = phi i64 [ %lsr.iv.next28, %if.end17 ], [ 0, %if.end ]
- %scevgep55 = getelementptr i8* %4, i64 %lsr.iv27
- %6 = load i8* %scevgep55, align 1
+ %scevgep55 = getelementptr i8, i8* %4, i64 %lsr.iv27
+ %6 = load i8, i8* %scevgep55, align 1
%idxprom.i.i = sext i8 %6 to i64
%isascii.i.i224 = icmp sgt i8 %6, -1
br i1 %isascii.i.i224, label %cond.true.i.i, label %cond.false.i.i, !prof !181
cond.true.i.i:
- %arrayidx.i.i = getelementptr inbounds %struct._RuneLocale* @_DefaultRuneLocale, i64 0, i32 5, i64 %idxprom.i.i
- %7 = load i32* %arrayidx.i.i, align 4
+ %arrayidx.i.i = getelementptr inbounds %struct._RuneLocale, %struct._RuneLocale* @_DefaultRuneLocale, i64 0, i32 5, i64 %idxprom.i.i
+ %7 = load i32, i32* %arrayidx.i.i, align 4
%and.i.i = and i32 %7, 32768
br label %isupper.exit
cond.false.i.i:
@@ -70,13 +70,13 @@ lor.rhs:
%sunkaddr = ptrtoint i8* %5 to i64
%sunkaddr58 = add i64 %sunkaddr, %lsr.iv27
%sunkaddr59 = inttoptr i64 %sunkaddr58 to i8*
- %9 = load i8* %sunkaddr59, align 1
+ %9 = load i8, i8* %sunkaddr59, align 1
%idxprom.i.i214 = sext i8 %9 to i64
%isascii.i.i213225 = icmp sgt i8 %9, -1
br i1 %isascii.i.i213225, label %cond.true.i.i217, label %cond.false.i.i219, !prof !181
cond.true.i.i217:
- %arrayidx.i.i215 = getelementptr inbounds %struct._RuneLocale* @_DefaultRuneLocale, i64 0, i32 5, i64 %idxprom.i.i214
- %10 = load i32* %arrayidx.i.i215, align 4
+ %arrayidx.i.i215 = getelementptr inbounds %struct._RuneLocale, %struct._RuneLocale* @_DefaultRuneLocale, i64 0, i32 5, i64 %idxprom.i.i214
+ %10 = load i32, i32* %arrayidx.i.i215, align 4
%and.i.i216 = and i32 %10, 32768
br label %isupper.exit223
cond.false.i.i219:
@@ -92,11 +92,11 @@ while.body:
%sunkaddr60 = ptrtoint i8* %4 to i64
%sunkaddr61 = add i64 %sunkaddr60, %lsr.iv27
%sunkaddr62 = inttoptr i64 %sunkaddr61 to i8*
- %12 = load i8* %sunkaddr62, align 1
+ %12 = load i8, i8* %sunkaddr62, align 1
%sunkaddr63 = ptrtoint i8* %5 to i64
%sunkaddr64 = add i64 %sunkaddr63, %lsr.iv27
%sunkaddr65 = inttoptr i64 %sunkaddr64 to i8*
- %13 = load i8* %sunkaddr65, align 1
+ %13 = load i8, i8* %sunkaddr65, align 1
%cmp14 = icmp eq i8 %12, %13
br i1 %cmp14, label %if.end17, label %return, !prof !991
if.end17:
@@ -110,13 +110,13 @@ if.then23:
%sunkaddr66 = ptrtoint %struct.Connector_struct* %a to i64
%sunkaddr67 = add i64 %sunkaddr66, 16
%sunkaddr68 = inttoptr i64 %sunkaddr67 to i8**
- %16 = load i8** %sunkaddr68, align 8
- %17 = load i8* %16, align 1
+ %16 = load i8*, i8** %sunkaddr68, align 8
+ %17 = load i8, i8* %16, align 1
%cmp26 = icmp eq i8 %17, 83
%sunkaddr69 = ptrtoint i8* %4 to i64
%sunkaddr70 = add i64 %sunkaddr69, %lsr.iv27
%sunkaddr71 = inttoptr i64 %sunkaddr70 to i8*
- %18 = load i8* %sunkaddr71, align 1
+ %18 = load i8, i8* %sunkaddr71, align 1
br i1 %cmp26, label %land.lhs.true28, label %while.cond59.preheader, !prof !993
land.lhs.true28:
switch i8 %18, label %land.rhs.preheader [
@@ -127,24 +127,24 @@ land.lhs.true35:
%sunkaddr72 = ptrtoint i8* %5 to i64
%sunkaddr73 = add i64 %sunkaddr72, %lsr.iv27
%sunkaddr74 = inttoptr i64 %sunkaddr73 to i8*
- %19 = load i8* %sunkaddr74, align 1
+ %19 = load i8, i8* %sunkaddr74, align 1
switch i8 %19, label %land.rhs.preheader [
i8 112, label %land.lhs.true43
], !prof !995
land.lhs.true43:
%20 = ptrtoint i8* %16 to i64
%21 = sub i64 0, %20
- %scevgep52 = getelementptr i8* %4, i64 %21
- %scevgep53 = getelementptr i8* %scevgep52, i64 %lsr.iv27
- %scevgep54 = getelementptr i8* %scevgep53, i64 -1
+ %scevgep52 = getelementptr i8, i8* %4, i64 %21
+ %scevgep53 = getelementptr i8, i8* %scevgep52, i64 %lsr.iv27
+ %scevgep54 = getelementptr i8, i8* %scevgep53, i64 -1
%cmp45 = icmp eq i8* %scevgep54, null
br i1 %cmp45, label %return, label %lor.lhs.false47, !prof !996
lor.lhs.false47:
%22 = ptrtoint i8* %16 to i64
%23 = sub i64 0, %22
- %scevgep47 = getelementptr i8* %4, i64 %23
- %scevgep48 = getelementptr i8* %scevgep47, i64 %lsr.iv27
- %scevgep49 = getelementptr i8* %scevgep48, i64 -2
+ %scevgep47 = getelementptr i8, i8* %4, i64 %23
+ %scevgep48 = getelementptr i8, i8* %scevgep47, i64 %lsr.iv27
+ %scevgep49 = getelementptr i8, i8* %scevgep48, i64 -2
%cmp50 = icmp eq i8* %scevgep49, null
br i1 %cmp50, label %land.lhs.true52, label %while.cond59.preheader, !prof !997
land.lhs.true52:
@@ -152,7 +152,7 @@ land.lhs.true52:
%sunkaddr76 = add i64 %sunkaddr75, %lsr.iv27
%sunkaddr77 = add i64 %sunkaddr76, -1
%sunkaddr78 = inttoptr i64 %sunkaddr77 to i8*
- %24 = load i8* %sunkaddr78, align 1
+ %24 = load i8, i8* %sunkaddr78, align 1
%cmp55 = icmp eq i8 %24, 73
%cmp61233 = icmp eq i8 %18, 0
%or.cond265 = or i1 %cmp55, %cmp61233
@@ -161,14 +161,14 @@ while.cond59.preheader:
%cmp61233.old = icmp eq i8 %18, 0
br i1 %cmp61233.old, label %return, label %land.rhs.preheader, !prof !999
land.rhs.preheader:
- %scevgep33 = getelementptr i8* %5, i64 %lsr.iv27
- %scevgep43 = getelementptr i8* %4, i64 %lsr.iv27
+ %scevgep33 = getelementptr i8, i8* %5, i64 %lsr.iv27
+ %scevgep43 = getelementptr i8, i8* %4, i64 %lsr.iv27
br label %land.rhs
land.rhs:
%lsr.iv = phi i64 [ 0, %land.rhs.preheader ], [ %lsr.iv.next, %if.then83 ]
%25 = phi i8 [ %27, %if.then83 ], [ %18, %land.rhs.preheader ]
- %scevgep34 = getelementptr i8* %scevgep33, i64 %lsr.iv
- %26 = load i8* %scevgep34, align 1
+ %scevgep34 = getelementptr i8, i8* %scevgep33, i64 %lsr.iv
+ %26 = load i8, i8* %scevgep34, align 1
%cmp64 = icmp eq i8 %26, 0
br i1 %cmp64, label %return, label %while.body66, !prof !1000
while.body66:
@@ -182,9 +182,9 @@ lor.lhs.false74:
%or.cond208 = or i1 %cmp77, %cmp81
br i1 %or.cond208, label %return, label %if.then83, !prof !1002
if.then83:
- %scevgep44 = getelementptr i8* %scevgep43, i64 %lsr.iv
- %scevgep45 = getelementptr i8* %scevgep44, i64 1
- %27 = load i8* %scevgep45, align 1
+ %scevgep44 = getelementptr i8, i8* %scevgep43, i64 %lsr.iv
+ %scevgep45 = getelementptr i8, i8* %scevgep44, i64 1
+ %27 = load i8, i8* %scevgep45, align 1
%cmp61 = icmp eq i8 %27, 0
%lsr.iv.next = add i64 %lsr.iv, 1
br i1 %cmp61, label %return, label %land.rhs, !prof !999
@@ -197,18 +197,18 @@ while.cond95.preheader:
%sunkaddr79 = ptrtoint i8* %4 to i64
%sunkaddr80 = add i64 %sunkaddr79, %lsr.iv27
%sunkaddr81 = inttoptr i64 %sunkaddr80 to i8*
- %28 = load i8* %sunkaddr81, align 1
+ %28 = load i8, i8* %sunkaddr81, align 1
%cmp97238 = icmp eq i8 %28, 0
br i1 %cmp97238, label %return, label %land.rhs99.preheader, !prof !1004
land.rhs99.preheader:
- %scevgep31 = getelementptr i8* %5, i64 %lsr.iv27
- %scevgep40 = getelementptr i8* %4, i64 %lsr.iv27
+ %scevgep31 = getelementptr i8, i8* %5, i64 %lsr.iv27
+ %scevgep40 = getelementptr i8, i8* %4, i64 %lsr.iv27
br label %land.rhs99
land.rhs99:
%lsr.iv17 = phi i64 [ 0, %land.rhs99.preheader ], [ %lsr.iv.next18, %if.then117 ]
%29 = phi i8 [ %31, %if.then117 ], [ %28, %land.rhs99.preheader ]
- %scevgep32 = getelementptr i8* %scevgep31, i64 %lsr.iv17
- %30 = load i8* %scevgep32, align 1
+ %scevgep32 = getelementptr i8, i8* %scevgep31, i64 %lsr.iv17
+ %30 = load i8, i8* %scevgep32, align 1
%cmp101 = icmp eq i8 %30, 0
br i1 %cmp101, label %return, label %while.body104, !prof !1005
while.body104:
@@ -219,9 +219,9 @@ while.body104:
%or.cond210 = or i1 %or.cond209, %cmp115
br i1 %or.cond210, label %if.then117, label %return, !prof !1006
if.then117:
- %scevgep41 = getelementptr i8* %scevgep40, i64 %lsr.iv17
- %scevgep42 = getelementptr i8* %scevgep41, i64 1
- %31 = load i8* %scevgep42, align 1
+ %scevgep41 = getelementptr i8, i8* %scevgep40, i64 %lsr.iv17
+ %scevgep42 = getelementptr i8, i8* %scevgep41, i64 1
+ %31 = load i8, i8* %scevgep42, align 1
%cmp97 = icmp eq i8 %31, 0
%lsr.iv.next18 = add i64 %lsr.iv17, 1
br i1 %cmp97, label %return, label %land.rhs99, !prof !1004
@@ -234,18 +234,18 @@ while.cond130.preheader:
%sunkaddr82 = ptrtoint i8* %4 to i64
%sunkaddr83 = add i64 %sunkaddr82, %lsr.iv27
%sunkaddr84 = inttoptr i64 %sunkaddr83 to i8*
- %32 = load i8* %sunkaddr84, align 1
+ %32 = load i8, i8* %sunkaddr84, align 1
%cmp132244 = icmp eq i8 %32, 0
br i1 %cmp132244, label %return, label %land.rhs134.preheader, !prof !1008
land.rhs134.preheader:
- %scevgep29 = getelementptr i8* %5, i64 %lsr.iv27
- %scevgep37 = getelementptr i8* %4, i64 %lsr.iv27
+ %scevgep29 = getelementptr i8, i8* %5, i64 %lsr.iv27
+ %scevgep37 = getelementptr i8, i8* %4, i64 %lsr.iv27
br label %land.rhs134
land.rhs134:
%lsr.iv22 = phi i64 [ 0, %land.rhs134.preheader ], [ %lsr.iv.next23, %if.then152 ]
%33 = phi i8 [ %35, %if.then152 ], [ %32, %land.rhs134.preheader ]
- %scevgep30 = getelementptr i8* %scevgep29, i64 %lsr.iv22
- %34 = load i8* %scevgep30, align 1
+ %scevgep30 = getelementptr i8, i8* %scevgep29, i64 %lsr.iv22
+ %34 = load i8, i8* %scevgep30, align 1
%cmp136 = icmp eq i8 %34, 0
br i1 %cmp136, label %return, label %while.body139, !prof !1009
while.body139:
@@ -256,9 +256,9 @@ while.body139:
%or.cond212 = or i1 %or.cond211, %cmp150
br i1 %or.cond212, label %if.then152, label %return, !prof !1010
if.then152:
- %scevgep38 = getelementptr i8* %scevgep37, i64 %lsr.iv22
- %scevgep39 = getelementptr i8* %scevgep38, i64 1
- %35 = load i8* %scevgep39, align 1
+ %scevgep38 = getelementptr i8, i8* %scevgep37, i64 %lsr.iv22
+ %scevgep39 = getelementptr i8, i8* %scevgep38, i64 1
+ %35 = load i8, i8* %scevgep39, align 1
%cmp132 = icmp eq i8 %35, 0
%lsr.iv.next23 = add i64 %lsr.iv22, 1
br i1 %cmp132, label %return, label %land.rhs134, !prof !1008
diff --git a/test/CodeGen/X86/ragreedy-hoist-spill.ll b/test/CodeGen/X86/ragreedy-hoist-spill.ll
index 57afb4152db5..e7dda5349568 100644
--- a/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -18,7 +18,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
entry:
%sub.ptr.rhs.cast646 = ptrtoint i8* %line to i64
%old = alloca [512 x i8], align 16
- %0 = getelementptr inbounds [512 x i8]* %old, i64 0, i64 0
+ %0 = getelementptr inbounds [512 x i8], [512 x i8]* %old, i64 0, i64 0
switch i64 %fid, label %if.then [
i64 2, label %if.end
i64 0, label %if.end
@@ -30,7 +30,7 @@ if.then:
if.end:
switch i64 undef, label %if.end25 [
i64 0, label %if.then4
- i64 1, label %land.lhs.true14
+ i64 1, label %if.end25
]
if.then4:
@@ -58,7 +58,7 @@ if.then.i2712:
unreachable
SyTime.exit2720:
- %add.ptr = getelementptr [512 x i8]* %old, i64 0, i64 512
+ %add.ptr = getelementptr [512 x i8], [512 x i8]* %old, i64 0, i64 512
%cmp293427 = icmp ult i8* %0, %add.ptr
br i1 %cmp293427, label %for.body.lr.ph, label %while.body.preheader
@@ -67,8 +67,8 @@ for.body.lr.ph:
br label %while.body.preheader
while.body.preheader:
- %add.ptr1603 = getelementptr [512 x i8]* null, i64 0, i64 512
- %echo.i3101 = getelementptr [16 x %struct.TMP.1]* @syBuf, i64 0, i64 %fid, i32 1
+ %add.ptr1603 = getelementptr [512 x i8], [512 x i8]* null, i64 0, i64 512
+ %echo.i3101 = getelementptr [16 x %struct.TMP.1], [16 x %struct.TMP.1]* @syBuf, i64 0, i64 %fid, i32 1
%1 = xor i64 %sub.ptr.rhs.cast646, -1
br label %do.body
@@ -210,7 +210,7 @@ land.lhs.true504:
br i1 undef, label %do.body479.backedge, label %if.end517
do.body479.backedge:
- %incdec.ptr480 = getelementptr i8* %incdec.ptr4803316, i64 1
+ %incdec.ptr480 = getelementptr i8, i8* %incdec.ptr4803316, i64 1
%cmp483 = icmp eq i8 undef, 0
br i1 %cmp483, label %if.end517, label %do.body479.backedge.land.rhs485_crit_edge
@@ -228,7 +228,7 @@ if.end517:
]
if.then532:
- store i8 0, i8* getelementptr inbounds ([512 x i8]* @SyFgets.yank, i64 0, i64 0), align 16, !tbaa !5
+ store i8 0, i8* getelementptr inbounds ([512 x i8], [512 x i8]* @SyFgets.yank, i64 0, i64 0), align 16, !tbaa !5
br label %for.cond534
for.cond534:
@@ -245,7 +245,7 @@ for.end552:
%s.2.lcssa = phi i8* [ undef, %for.cond542.preheader ], [ %q.4, %for.body545 ]
%sub.ptr.lhs.cast553 = ptrtoint i8* %s.2.lcssa to i64
%sub.ptr.sub555 = sub i64 %sub.ptr.lhs.cast553, 0
- %arrayidx556 = getelementptr i8* null, i64 %sub.ptr.sub555
+ %arrayidx556 = getelementptr i8, i8* null, i64 %sub.ptr.sub555
store i8 0, i8* %arrayidx556, align 1, !tbaa !5
br label %while.cond197.backedge
@@ -340,7 +340,7 @@ while.cond1683.preheader:
while.body1679:
%oldc.43406 = phi i32 [ %inc, %syEchoch.exit3070 ], [ %oldc.1.lcssa, %for.body1664.lr.ph ]
- %4 = load %struct.TMP.2** %echo.i3101, align 8, !tbaa !6
+ %4 = load %struct.TMP.2*, %struct.TMP.2** %echo.i3101, align 8, !tbaa !6
%call.i3062 = call i32 @fileno(%struct.TMP.2* %4)
br i1 undef, label %if.then.i3069, label %syEchoch.exit3070
@@ -360,10 +360,10 @@ while.end1693:
unreachable
for.body1723:
- %q.303203 = phi i8* [ getelementptr inbounds ([8192 x i8]* @syHistory, i64 0, i64 8189), %if.then1477 ], [ %incdec.ptr1730, %for.body1723 ]
- %add.ptr1728 = getelementptr i8* %q.303203, i64 %idx.neg1727
- %5 = load i8* %add.ptr1728, align 1, !tbaa !5
- %incdec.ptr1730 = getelementptr i8* %q.303203, i64 -1
+ %q.303203 = phi i8* [ getelementptr inbounds ([8192 x i8], [8192 x i8]* @syHistory, i64 0, i64 8189), %if.then1477 ], [ %incdec.ptr1730, %for.body1723 ]
+ %add.ptr1728 = getelementptr i8, i8* %q.303203, i64 %idx.neg1727
+ %5 = load i8, i8* %add.ptr1728, align 1, !tbaa !5
+ %incdec.ptr1730 = getelementptr i8, i8* %q.303203, i64 -1
br label %for.body1723
cleanup:
diff --git a/test/CodeGen/X86/ragreedy-last-chance-recoloring.ll b/test/CodeGen/X86/ragreedy-last-chance-recoloring.ll
index 49d58f437c21..f32875581f55 100644
--- a/test/CodeGen/X86/ragreedy-last-chance-recoloring.ll
+++ b/test/CodeGen/X86/ragreedy-last-chance-recoloring.ll
@@ -36,27 +36,27 @@ bb85: ; preds = %bb222, %bb85, %bb
indirectbr i8* undef, [label %bb439, label %bb85]
bb206: ; preds = %bb
- %tmp = getelementptr [499 x i32]* @fp_dh_36985b17790d59a27994eaab5dcb00ee, i32 0, i32 undef
- %tmp207 = load i32* %tmp
+ %tmp = getelementptr [499 x i32], [499 x i32]* @fp_dh_36985b17790d59a27994eaab5dcb00ee, i32 0, i32 undef
+ %tmp207 = load i32, i32* %tmp
%tmp208 = add i32 %tmp207, 1
%tmp209 = inttoptr i32 %tmp208 to i8*
indirectbr i8* %tmp209, [label %bb213]
bb213: ; preds = %bb206
- %tmp214 = load i32* @fp_dh_18716afa4a5354de0a302c8edb3b0ee1, align 4
- %tmp215 = load i8** @fp_dh_20a33cdeefab8f4c8887e82766cb9dcb, align 4
+ %tmp214 = load i32, i32* @fp_dh_18716afa4a5354de0a302c8edb3b0ee1, align 4
+ %tmp215 = load i8*, i8** @fp_dh_20a33cdeefab8f4c8887e82766cb9dcb, align 4
%tmp216 = urem i32 -717428541, %tmp214
- %tmp217 = getelementptr i8* %tmp215, i32 %tmp216
+ %tmp217 = getelementptr i8, i8* %tmp215, i32 %tmp216
%tmp218 = bitcast i8* %tmp217 to i32*
- %tmp219 = load i32* %tmp218, align 4
+ %tmp219 = load i32, i32* %tmp218, align 4
store i32 %tmp219, i32* undef, align 4
%tmp220 = select i1 false, i32 359373646, i32 1677237955
%tmp221 = add i32 %tmp220, 0
indirectbr i8* undef, [label %bb432, label %bb222]
bb222: ; preds = %bb213
- %tmp224 = load i32* undef, align 4
- %tmp225 = load i32* undef, align 4
+ %tmp224 = load i32, i32* undef, align 4
+ %tmp225 = load i32, i32* undef, align 4
%tmp226 = xor i32 %tmp225, %tmp224
%tmp227 = shl i32 %tmp226, 1
%tmp228 = and i32 %tmp227, -2048880334
@@ -65,13 +65,13 @@ bb222: ; preds = %bb213
%tmp231 = xor i32 %tmp230, 1059356227
%tmp232 = mul i32 %tmp231, 1603744721
%tmp233 = urem i32 %tmp232, 259
- %tmp234 = getelementptr [259 x i8]* bitcast (i8* getelementptr inbounds ([5419648 x i8]* @fp_dh_9d93c897906e39883c58b034c8e786b2, i32 0, i32 2039075) to [259 x i8]*), i32 0, i32 %tmp233
- %tmp235 = load i8* %tmp234, align 1
+ %tmp234 = getelementptr [259 x i8], [259 x i8]* bitcast (i8* getelementptr inbounds ([5419648 x i8], [5419648 x i8]* @fp_dh_9d93c897906e39883c58b034c8e786b2, i32 0, i32 2039075) to [259 x i8]*), i32 0, i32 %tmp233
+ %tmp235 = load i8, i8* %tmp234, align 1
%tmp236 = add i32 %tmp233, 2
- %tmp237 = getelementptr [264 x i8]* bitcast (i8* getelementptr inbounds ([5419648 x i8]* @fp_dh_9d93c897906e39883c58b034c8e786b2, i32 0, i32 3388166) to [264 x i8]*), i32 0, i32 %tmp236
- %tmp238 = load i8* %tmp237, align 1
- %tmp239 = getelementptr [265 x i8]* bitcast (i8* getelementptr inbounds ([5419648 x i8]* @fp_dh_9d93c897906e39883c58b034c8e786b2, i32 0, i32 1325165) to [265 x i8]*), i32 0, i32 0
- %tmp240 = load i8* %tmp239, align 1
+ %tmp237 = getelementptr [264 x i8], [264 x i8]* bitcast (i8* getelementptr inbounds ([5419648 x i8], [5419648 x i8]* @fp_dh_9d93c897906e39883c58b034c8e786b2, i32 0, i32 3388166) to [264 x i8]*), i32 0, i32 %tmp236
+ %tmp238 = load i8, i8* %tmp237, align 1
+ %tmp239 = getelementptr [265 x i8], [265 x i8]* bitcast (i8* getelementptr inbounds ([5419648 x i8], [5419648 x i8]* @fp_dh_9d93c897906e39883c58b034c8e786b2, i32 0, i32 1325165) to [265 x i8]*), i32 0, i32 0
+ %tmp240 = load i8, i8* %tmp239, align 1
%tmp241 = add i32 %tmp233, 6
%tmp242 = trunc i32 %tmp241 to i8
%tmp243 = mul i8 %tmp242, -3
@@ -80,7 +80,7 @@ bb222: ; preds = %bb213
%tmp246 = and i8 %tmp245, 6
%tmp247 = sub i8 0, %tmp246
%tmp248 = add i8 %tmp244, %tmp247
- %tmp249 = load i8* undef, align 1
+ %tmp249 = load i8, i8* undef, align 1
%tmp250 = xor i8 %tmp235, 17
%tmp251 = xor i8 %tmp250, %tmp238
%tmp252 = xor i8 %tmp251, %tmp240
@@ -88,13 +88,13 @@ bb222: ; preds = %bb213
%tmp254 = xor i8 %tmp253, %tmp248
%tmp255 = zext i8 %tmp254 to i16
%tmp256 = shl nuw i16 %tmp255, 8
- %tmp257 = load i8* null, align 1
- %tmp258 = load i32* @fp_dh_18716afa4a5354de0a302c8edb3b0ee1, align 4
- %tmp259 = load i8** @fp_dh_20a33cdeefab8f4c8887e82766cb9dcb, align 4
+ %tmp257 = load i8, i8* null, align 1
+ %tmp258 = load i32, i32* @fp_dh_18716afa4a5354de0a302c8edb3b0ee1, align 4
+ %tmp259 = load i8*, i8** @fp_dh_20a33cdeefab8f4c8887e82766cb9dcb, align 4
%tmp260 = urem i32 -717428541, %tmp258
- %tmp261 = getelementptr i8* %tmp259, i32 %tmp260
+ %tmp261 = getelementptr i8, i8* %tmp259, i32 %tmp260
%tmp262 = bitcast i8* %tmp261 to i32*
- %tmp263 = load i32* %tmp262, align 4
+ %tmp263 = load i32, i32* %tmp262, align 4
%tmp264 = xor i32 %tmp263, 0
%tmp265 = shl i32 %tmp264, 1
%tmp266 = and i32 %tmp265, -1312119832
@@ -104,8 +104,8 @@ bb222: ; preds = %bb213
%tmp270 = mul i32 %tmp269, 1603744721
%tmp271 = urem i32 %tmp270, 259
%tmp274 = add i32 %tmp271, 3
- %tmp275 = getelementptr [265 x i8]* bitcast (i8* getelementptr inbounds ([5419648 x i8]* @fp_dh_9d93c897906e39883c58b034c8e786b2, i32 0, i32 1325165) to [265 x i8]*), i32 0, i32 %tmp274
- %tmp276 = load i8* %tmp275, align 1
+ %tmp275 = getelementptr [265 x i8], [265 x i8]* bitcast (i8* getelementptr inbounds ([5419648 x i8], [5419648 x i8]* @fp_dh_9d93c897906e39883c58b034c8e786b2, i32 0, i32 1325165) to [265 x i8]*), i32 0, i32 %tmp274
+ %tmp276 = load i8, i8* %tmp275, align 1
%tmp277 = add i32 %tmp271, 6
%tmp278 = trunc i32 %tmp277 to i8
%tmp279 = mul i8 %tmp278, -3
@@ -161,8 +161,8 @@ bb222: ; preds = %bb213
%tmp334 = add i32 %tmp327, -1456704142
%tmp335 = zext i1 %tmp333 to i32
%tmp336 = add i32 %tmp334, %tmp335
- %tmp337 = getelementptr [499 x i32]* @fp_dh_36985b17790d59a27994eaab5dcb00ee, i32 0, i32 %tmp336
- %tmp338 = load i32* %tmp337
+ %tmp337 = getelementptr [499 x i32], [499 x i32]* @fp_dh_36985b17790d59a27994eaab5dcb00ee, i32 0, i32 %tmp336
+ %tmp338 = load i32, i32* %tmp337
%tmp339 = add i32 %tmp338, 1
%tmp340 = inttoptr i32 %tmp339 to i8*
indirectbr i8* %tmp340, [label %bb85, label %bb439]
@@ -170,8 +170,8 @@ bb222: ; preds = %bb213
bb432: ; preds = %bb432, %bb213
%tmp433 = phi i32 [ %tmp221, %bb213 ], [ %tmp433, %bb432 ]
%tmp434 = add i32 %tmp433, 1022523279
- %tmp435 = getelementptr [499 x i32]* @fp_dh_36985b17790d59a27994eaab5dcb00ee, i32 0, i32 %tmp434
- %tmp436 = load i32* %tmp435
+ %tmp435 = getelementptr [499 x i32], [499 x i32]* @fp_dh_36985b17790d59a27994eaab5dcb00ee, i32 0, i32 %tmp434
+ %tmp436 = load i32, i32* %tmp435
%tmp437 = add i32 %tmp436, 1
%tmp438 = inttoptr i32 %tmp437 to i8*
indirectbr i8* %tmp438, [label %bb432]
diff --git a/test/CodeGen/X86/rd-mod-wr-eflags.ll b/test/CodeGen/X86/rd-mod-wr-eflags.ll
index 5089bd761a80..972372151bcf 100644
--- a/test/CodeGen/X86/rd-mod-wr-eflags.ll
+++ b/test/CodeGen/X86/rd-mod-wr-eflags.ll
@@ -7,8 +7,8 @@ define void @_Z7releaseP3obj(%struct.obj* nocapture %o) nounwind uwtable ssp {
entry:
; CHECK: decq (%{{rdi|rcx}})
; CHECK-NEXT: je
- %refcnt = getelementptr inbounds %struct.obj* %o, i64 0, i32 0
- %0 = load i64* %refcnt, align 8
+ %refcnt = getelementptr inbounds %struct.obj, %struct.obj* %o, i64 0, i32 0
+ %0 = load i64, i64* %refcnt, align 8
%dec = add i64 %0, -1
store i64 %dec, i64* %refcnt, align 8
%tobool = icmp eq i64 %dec, 0
@@ -33,13 +33,13 @@ define i32 @test() nounwind uwtable ssp {
entry:
; CHECK: decq
; CHECK-NOT: decq
-%0 = load i64* @c, align 8
+%0 = load i64, i64* @c, align 8
%dec.i = add nsw i64 %0, -1
store i64 %dec.i, i64* @c, align 8
%tobool.i = icmp ne i64 %dec.i, 0
%lor.ext.i = zext i1 %tobool.i to i32
store i32 %lor.ext.i, i32* @a, align 4
-%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
+%call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
ret i32 0
}
@@ -47,13 +47,13 @@ ret i32 0
define i32 @test2() nounwind uwtable ssp {
entry:
; CHECK-NOT: decq ({{.*}})
-%0 = load i64* @c, align 8
+%0 = load i64, i64* @c, align 8
%dec.i = add nsw i64 %0, -1
store i64 %dec.i, i64* @c, align 8
%tobool.i = icmp ne i64 %0, 0
%lor.ext.i = zext i1 %tobool.i to i32
store i32 %lor.ext.i, i32* @a, align 4
-%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
+%call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
ret i32 0
}
@@ -69,9 +69,9 @@ declare void @other(%struct.obj2* ) nounwind;
define void @example_dec(%struct.obj2* %o) nounwind uwtable ssp {
; 64 bit dec
entry:
- %s64 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 0
+ %s64 = getelementptr inbounds %struct.obj2, %struct.obj2* %o, i64 0, i32 0
; CHECK-NOT: load
- %0 = load i64* %s64, align 8
+ %0 = load i64, i64* %s64, align 8
; CHECK: decq ({{.*}})
%dec = add i64 %0, -1
store i64 %dec, i64* %s64, align 8
@@ -80,9 +80,9 @@ entry:
; 32 bit dec
if.end:
- %s32 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 1
+ %s32 = getelementptr inbounds %struct.obj2, %struct.obj2* %o, i64 0, i32 1
; CHECK-NOT: load
- %1 = load i32* %s32, align 4
+ %1 = load i32, i32* %s32, align 4
; CHECK: decl {{[0-9][0-9]*}}({{.*}})
%dec1 = add i32 %1, -1
store i32 %dec1, i32* %s32, align 4
@@ -91,9 +91,9 @@ if.end:
; 16 bit dec
if.end1:
- %s16 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 2
+ %s16 = getelementptr inbounds %struct.obj2, %struct.obj2* %o, i64 0, i32 2
; CHECK-NOT: load
- %2 = load i16* %s16, align 2
+ %2 = load i16, i16* %s16, align 2
; CHECK: decw {{[0-9][0-9]*}}({{.*}})
%dec2 = add i16 %2, -1
store i16 %dec2, i16* %s16, align 2
@@ -102,9 +102,9 @@ if.end1:
; 8 bit dec
if.end2:
- %s8 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 3
+ %s8 = getelementptr inbounds %struct.obj2, %struct.obj2* %o, i64 0, i32 3
; CHECK-NOT: load
- %3 = load i8* %s8
+ %3 = load i8, i8* %s8
; CHECK: decb {{[0-9][0-9]*}}({{.*}})
%dec3 = add i8 %3, -1
store i8 %dec3, i8* %s8
@@ -123,9 +123,9 @@ return: ; preds = %if.end4, %if.end, %
define void @example_inc(%struct.obj2* %o) nounwind uwtable ssp {
; 64 bit inc
entry:
- %s64 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 0
+ %s64 = getelementptr inbounds %struct.obj2, %struct.obj2* %o, i64 0, i32 0
; CHECK-NOT: load
- %0 = load i64* %s64, align 8
+ %0 = load i64, i64* %s64, align 8
; CHECK: incq ({{.*}})
%inc = add i64 %0, 1
store i64 %inc, i64* %s64, align 8
@@ -134,9 +134,9 @@ entry:
; 32 bit inc
if.end:
- %s32 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 1
+ %s32 = getelementptr inbounds %struct.obj2, %struct.obj2* %o, i64 0, i32 1
; CHECK-NOT: load
- %1 = load i32* %s32, align 4
+ %1 = load i32, i32* %s32, align 4
; CHECK: incl {{[0-9][0-9]*}}({{.*}})
%inc1 = add i32 %1, 1
store i32 %inc1, i32* %s32, align 4
@@ -145,9 +145,9 @@ if.end:
; 16 bit inc
if.end1:
- %s16 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 2
+ %s16 = getelementptr inbounds %struct.obj2, %struct.obj2* %o, i64 0, i32 2
; CHECK-NOT: load
- %2 = load i16* %s16, align 2
+ %2 = load i16, i16* %s16, align 2
; CHECK: incw {{[0-9][0-9]*}}({{.*}})
%inc2 = add i16 %2, 1
store i16 %inc2, i16* %s16, align 2
@@ -156,9 +156,9 @@ if.end1:
; 8 bit inc
if.end2:
- %s8 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 3
+ %s8 = getelementptr inbounds %struct.obj2, %struct.obj2* %o, i64 0, i32 3
; CHECK-NOT: load
- %3 = load i8* %s8
+ %3 = load i8, i8* %s8
; CHECK: incb {{[0-9][0-9]*}}({{.*}})
%inc3 = add i8 %3, 1
store i8 %inc3, i8* %s8
@@ -181,9 +181,9 @@ define void @test3() nounwind ssp {
entry:
; CHECK-LABEL: test3:
; CHECK: decq 16(%rax)
- %0 = load i64** @foo, align 8
- %arrayidx = getelementptr inbounds i64* %0, i64 2
- %1 = load i64* %arrayidx, align 8
+ %0 = load i64*, i64** @foo, align 8
+ %arrayidx = getelementptr inbounds i64, i64* %0, i64 2
+ %1 = load i64, i64* %arrayidx, align 8
%dec = add i64 %1, -1
store i64 %dec, i64* %arrayidx, align 8
%cmp = icmp eq i64 %dec, 0
@@ -209,8 +209,8 @@ declare void @baz()
define void @test4() nounwind uwtable ssp {
entry:
- %0 = load i32* @x, align 4
- %1 = load i32* @y, align 4
+ %0 = load i32, i32* @x, align 4
+ %1 = load i32, i32* @y, align 4
%dec = add nsw i32 %1, -1
store i32 %dec, i32* @y, align 4
%tobool.i = icmp ne i32 %dec, 0
diff --git a/test/CodeGen/X86/rdrand.ll b/test/CodeGen/X86/rdrand.ll
index 48182d029eb2..107cde05a0e6 100644
--- a/test/CodeGen/X86/rdrand.ll
+++ b/test/CodeGen/X86/rdrand.ll
@@ -69,7 +69,7 @@ while.body: ; preds = %entry, %while.body
%p.addr.03 = phi i32* [ %incdec.ptr, %while.body ], [ %p, %entry ]
%n.addr.02 = phi i32 [ %dec, %while.body ], [ %n, %entry ]
%dec = add nsw i32 %n.addr.02, -1
- %incdec.ptr = getelementptr inbounds i32* %p.addr.03, i64 1
+ %incdec.ptr = getelementptr inbounds i32, i32* %p.addr.03, i64 1
%rand = tail call { i32, i32 } @llvm.x86.rdrand.32() nounwind
%v1 = extractvalue { i32, i32 } %rand, 0
store i32 %v1, i32* %p.addr.03, align 4
diff --git a/test/CodeGen/X86/recip-fastmath.ll b/test/CodeGen/X86/recip-fastmath.ll
index 83b86accdb38..fcd077092dab 100644
--- a/test/CodeGen/X86/recip-fastmath.ll
+++ b/test/CodeGen/X86/recip-fastmath.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+use-recip-est,+avx -x86-recip-refinement-steps=2 | FileCheck %s --check-prefix=REFINE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-recip-est | FileCheck %s --check-prefix=RECIP
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-recip-est -x86-recip-refinement-steps=2 | FileCheck %s --check-prefix=REFINE
; If the target's divss/divps instructions are substantially
; slower than rcpss/rcpps with a Newton-Raphson refinement,
@@ -20,13 +20,13 @@ define float @reciprocal_estimate(float %x) #0 {
; CHECK-NEXT: movaps
; CHECK-NEXT: retq
-; BTVER2-LABEL: reciprocal_estimate:
-; BTVER2: vrcpss
-; BTVER2: vmulss
-; BTVER2: vsubss
-; BTVER2: vmulss
-; BTVER2: vaddss
-; BTVER2-NEXT: retq
+; RECIP-LABEL: reciprocal_estimate:
+; RECIP: vrcpss
+; RECIP: vmulss
+; RECIP: vsubss
+; RECIP: vmulss
+; RECIP: vaddss
+; RECIP-NEXT: retq
; REFINE-LABEL: reciprocal_estimate:
; REFINE: vrcpss
@@ -51,13 +51,13 @@ define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 {
; CHECK-NEXT: movaps
; CHECK-NEXT: retq
-; BTVER2-LABEL: reciprocal_estimate_v4f32:
-; BTVER2: vrcpps
-; BTVER2: vmulps
-; BTVER2: vsubps
-; BTVER2: vmulps
-; BTVER2: vaddps
-; BTVER2-NEXT: retq
+; RECIP-LABEL: reciprocal_estimate_v4f32:
+; RECIP: vrcpps
+; RECIP: vmulps
+; RECIP: vsubps
+; RECIP: vmulps
+; RECIP: vaddps
+; RECIP-NEXT: retq
; REFINE-LABEL: reciprocal_estimate_v4f32:
; REFINE: vrcpps
@@ -85,13 +85,13 @@ define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 {
; CHECK-NEXT: movaps
; CHECK-NEXT: retq
-; BTVER2-LABEL: reciprocal_estimate_v8f32:
-; BTVER2: vrcpps
-; BTVER2: vmulps
-; BTVER2: vsubps
-; BTVER2: vmulps
-; BTVER2: vaddps
-; BTVER2-NEXT: retq
+; RECIP-LABEL: reciprocal_estimate_v8f32:
+; RECIP: vrcpps
+; RECIP: vmulps
+; RECIP: vsubps
+; RECIP: vmulps
+; RECIP: vaddps
+; RECIP-NEXT: retq
; REFINE-LABEL: reciprocal_estimate_v8f32:
; REFINE: vrcpps
diff --git a/test/CodeGen/X86/regalloc-reconcile-broken-hints.ll b/test/CodeGen/X86/regalloc-reconcile-broken-hints.ll
index 00679428ca63..016b0d13fc4a 100644
--- a/test/CodeGen/X86/regalloc-reconcile-broken-hints.ll
+++ b/test/CodeGen/X86/regalloc-reconcile-broken-hints.ll
@@ -43,10 +43,10 @@ declare noalias i32* @make_data()
define %struct._list* @make_list(i32* nocapture readonly %data, i32* nocapture %value, i32* nocapture %all) {
entry:
%call = tail call i8* @malloc(i64 16)
- %next = getelementptr inbounds i8* %call, i64 8
+ %next = getelementptr inbounds i8, i8* %call, i64 8
%tmp = bitcast i8* %next to %struct._list**
%tmp2 = bitcast i8* %call to %struct._list*
- %.pre78 = load i32* @ncol, align 4
+ %.pre78 = load i32, i32* @ncol, align 4
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %for.inc32, %entry
@@ -60,7 +60,7 @@ for.body3: ; preds = %if.end31, %for.cond
%row.172 = phi i32 [ %row.3, %if.end31 ], [ 0, %for.cond1.preheader ]
%col.071 = phi i32 [ %inc, %if.end31 ], [ 0, %for.cond1.preheader ]
%call4 = tail call i32* @make_data()
- %tmp5 = load i32* @ncol, align 4
+ %tmp5 = load i32, i32* @ncol, align 4
%tobool14.i = icmp eq i32 %tmp5, 0
br i1 %tobool14.i, label %while.cond.i, label %while.body.lr.ph.i
@@ -83,10 +83,10 @@ while.cond.i: ; preds = %land.rhs.i, %while.
br i1 %tobool.i66, label %if.else, label %land.rhs.i
land.rhs.i: ; preds = %while.cond.i
- %arrayidx.i67 = getelementptr inbounds i32* %call4, i64 %indvars.iv.next.i65
- %tmp11 = load i32* %arrayidx.i67, align 4
- %arrayidx2.i68 = getelementptr inbounds i32* %data, i64 %indvars.iv.next.i65
- %tmp12 = load i32* %arrayidx2.i68, align 4
+ %arrayidx.i67 = getelementptr inbounds i32, i32* %call4, i64 %indvars.iv.next.i65
+ %tmp11 = load i32, i32* %arrayidx.i67, align 4
+ %arrayidx2.i68 = getelementptr inbounds i32, i32* %data, i64 %indvars.iv.next.i65
+ %tmp12 = load i32, i32* %arrayidx2.i68, align 4
%cmp.i69 = icmp eq i32 %tmp11, %tmp12
br i1 %cmp.i69, label %while.cond.i, label %equal_data.exit
@@ -95,15 +95,15 @@ equal_data.exit: ; preds = %land.rhs.i
br i1 %cmp3.i, label %if.else, label %if.then
if.then: ; preds = %equal_data.exit
- %next7 = getelementptr inbounds %struct._list* %current.173, i64 0, i32 1
- %tmp14 = load %struct._list** %next7, align 8
- %next12 = getelementptr inbounds %struct._list* %tmp14, i64 0, i32 1
+ %next7 = getelementptr inbounds %struct._list, %struct._list* %current.173, i64 0, i32 1
+ %tmp14 = load %struct._list*, %struct._list** %next7, align 8
+ %next12 = getelementptr inbounds %struct._list, %struct._list* %tmp14, i64 0, i32 1
store %struct._list* null, %struct._list** %next12, align 8
- %tmp15 = load %struct._list** %next7, align 8
- %tmp16 = load i32* %value, align 4
+ %tmp15 = load %struct._list*, %struct._list** %next7, align 8
+ %tmp16 = load i32, i32* %value, align 4
%cmp14 = icmp eq i32 %tmp16, 1
%.tmp16 = select i1 %cmp14, i32 0, i32 %tmp16
- %tmp18 = load i32* %all, align 4
+ %tmp18 = load i32, i32* %all, align 4
%tmp19 = or i32 %tmp18, %.tmp16
%tmp20 = icmp eq i32 %tmp19, 0
br i1 %tmp20, label %if.then19, label %if.end31
@@ -123,12 +123,12 @@ if.end31: ; preds = %if.else, %if.then19
%row.3 = phi i32 [ %.row.172, %if.else ], [ %row.172, %if.then ], [ 0, %if.then19 ]
%current.2 = phi %struct._list* [ %current.173, %if.else ], [ %tmp15, %if.then ], [ %tmp15, %if.then19 ]
%inc = add nsw i32 %col.1, 1
- %tmp25 = load i32* @ncol, align 4
+ %tmp25 = load i32, i32* @ncol, align 4
%cmp2 = icmp eq i32 %inc, %tmp25
br i1 %cmp2, label %for.cond1.for.inc32_crit_edge, label %for.body3
for.cond1.for.inc32_crit_edge: ; preds = %if.end31
- %.pre79 = load i32* @nrow, align 4
+ %.pre79 = load i32, i32* @nrow, align 4
br label %for.inc32
for.inc32: ; preds = %for.cond1.for.inc32_crit_edge, %for.cond1.preheader
@@ -140,6 +140,6 @@ for.inc32: ; preds = %for.cond1.for.inc32
br i1 %cmp, label %for.end34, label %for.cond1.preheader
for.end34: ; preds = %for.inc32
- %.pre = load %struct._list** %tmp, align 8
+ %.pre = load %struct._list*, %struct._list** %tmp, align 8
ret %struct._list* %.pre
}
diff --git a/test/CodeGen/X86/regpressure.ll b/test/CodeGen/X86/regpressure.ll
index 1f756bee8a9d..8f352b8fbb5a 100644
--- a/test/CodeGen/X86/regpressure.ll
+++ b/test/CodeGen/X86/regpressure.ll
@@ -9,57 +9,57 @@
;; folded into the multiplies, 2 registers otherwise.
define i32 @regpressure1(i32* %P) {
- %A = load i32* %P ; <i32> [#uses=1]
- %Bp = getelementptr i32* %P, i32 1 ; <i32*> [#uses=1]
- %B = load i32* %Bp ; <i32> [#uses=1]
+ %A = load i32, i32* %P ; <i32> [#uses=1]
+ %Bp = getelementptr i32, i32* %P, i32 1 ; <i32*> [#uses=1]
+ %B = load i32, i32* %Bp ; <i32> [#uses=1]
%s1 = mul i32 %A, %B ; <i32> [#uses=1]
- %Cp = getelementptr i32* %P, i32 2 ; <i32*> [#uses=1]
- %C = load i32* %Cp ; <i32> [#uses=1]
+ %Cp = getelementptr i32, i32* %P, i32 2 ; <i32*> [#uses=1]
+ %C = load i32, i32* %Cp ; <i32> [#uses=1]
%s2 = mul i32 %s1, %C ; <i32> [#uses=1]
- %Dp = getelementptr i32* %P, i32 3 ; <i32*> [#uses=1]
- %D = load i32* %Dp ; <i32> [#uses=1]
+ %Dp = getelementptr i32, i32* %P, i32 3 ; <i32*> [#uses=1]
+ %D = load i32, i32* %Dp ; <i32> [#uses=1]
%s3 = mul i32 %s2, %D ; <i32> [#uses=1]
- %Ep = getelementptr i32* %P, i32 4 ; <i32*> [#uses=1]
- %E = load i32* %Ep ; <i32> [#uses=1]
+ %Ep = getelementptr i32, i32* %P, i32 4 ; <i32*> [#uses=1]
+ %E = load i32, i32* %Ep ; <i32> [#uses=1]
%s4 = mul i32 %s3, %E ; <i32> [#uses=1]
- %Fp = getelementptr i32* %P, i32 5 ; <i32*> [#uses=1]
- %F = load i32* %Fp ; <i32> [#uses=1]
+ %Fp = getelementptr i32, i32* %P, i32 5 ; <i32*> [#uses=1]
+ %F = load i32, i32* %Fp ; <i32> [#uses=1]
%s5 = mul i32 %s4, %F ; <i32> [#uses=1]
- %Gp = getelementptr i32* %P, i32 6 ; <i32*> [#uses=1]
- %G = load i32* %Gp ; <i32> [#uses=1]
+ %Gp = getelementptr i32, i32* %P, i32 6 ; <i32*> [#uses=1]
+ %G = load i32, i32* %Gp ; <i32> [#uses=1]
%s6 = mul i32 %s5, %G ; <i32> [#uses=1]
- %Hp = getelementptr i32* %P, i32 7 ; <i32*> [#uses=1]
- %H = load i32* %Hp ; <i32> [#uses=1]
+ %Hp = getelementptr i32, i32* %P, i32 7 ; <i32*> [#uses=1]
+ %H = load i32, i32* %Hp ; <i32> [#uses=1]
%s7 = mul i32 %s6, %H ; <i32> [#uses=1]
- %Ip = getelementptr i32* %P, i32 8 ; <i32*> [#uses=1]
- %I = load i32* %Ip ; <i32> [#uses=1]
+ %Ip = getelementptr i32, i32* %P, i32 8 ; <i32*> [#uses=1]
+ %I = load i32, i32* %Ip ; <i32> [#uses=1]
%s8 = mul i32 %s7, %I ; <i32> [#uses=1]
- %Jp = getelementptr i32* %P, i32 9 ; <i32*> [#uses=1]
- %J = load i32* %Jp ; <i32> [#uses=1]
+ %Jp = getelementptr i32, i32* %P, i32 9 ; <i32*> [#uses=1]
+ %J = load i32, i32* %Jp ; <i32> [#uses=1]
%s9 = mul i32 %s8, %J ; <i32> [#uses=1]
ret i32 %s9
}
define i32 @regpressure2(i32* %P) {
- %A = load i32* %P ; <i32> [#uses=1]
- %Bp = getelementptr i32* %P, i32 1 ; <i32*> [#uses=1]
- %B = load i32* %Bp ; <i32> [#uses=1]
- %Cp = getelementptr i32* %P, i32 2 ; <i32*> [#uses=1]
- %C = load i32* %Cp ; <i32> [#uses=1]
- %Dp = getelementptr i32* %P, i32 3 ; <i32*> [#uses=1]
- %D = load i32* %Dp ; <i32> [#uses=1]
- %Ep = getelementptr i32* %P, i32 4 ; <i32*> [#uses=1]
- %E = load i32* %Ep ; <i32> [#uses=1]
- %Fp = getelementptr i32* %P, i32 5 ; <i32*> [#uses=1]
- %F = load i32* %Fp ; <i32> [#uses=1]
- %Gp = getelementptr i32* %P, i32 6 ; <i32*> [#uses=1]
- %G = load i32* %Gp ; <i32> [#uses=1]
- %Hp = getelementptr i32* %P, i32 7 ; <i32*> [#uses=1]
- %H = load i32* %Hp ; <i32> [#uses=1]
- %Ip = getelementptr i32* %P, i32 8 ; <i32*> [#uses=1]
- %I = load i32* %Ip ; <i32> [#uses=1]
- %Jp = getelementptr i32* %P, i32 9 ; <i32*> [#uses=1]
- %J = load i32* %Jp ; <i32> [#uses=1]
+ %A = load i32, i32* %P ; <i32> [#uses=1]
+ %Bp = getelementptr i32, i32* %P, i32 1 ; <i32*> [#uses=1]
+ %B = load i32, i32* %Bp ; <i32> [#uses=1]
+ %Cp = getelementptr i32, i32* %P, i32 2 ; <i32*> [#uses=1]
+ %C = load i32, i32* %Cp ; <i32> [#uses=1]
+ %Dp = getelementptr i32, i32* %P, i32 3 ; <i32*> [#uses=1]
+ %D = load i32, i32* %Dp ; <i32> [#uses=1]
+ %Ep = getelementptr i32, i32* %P, i32 4 ; <i32*> [#uses=1]
+ %E = load i32, i32* %Ep ; <i32> [#uses=1]
+ %Fp = getelementptr i32, i32* %P, i32 5 ; <i32*> [#uses=1]
+ %F = load i32, i32* %Fp ; <i32> [#uses=1]
+ %Gp = getelementptr i32, i32* %P, i32 6 ; <i32*> [#uses=1]
+ %G = load i32, i32* %Gp ; <i32> [#uses=1]
+ %Hp = getelementptr i32, i32* %P, i32 7 ; <i32*> [#uses=1]
+ %H = load i32, i32* %Hp ; <i32> [#uses=1]
+ %Ip = getelementptr i32, i32* %P, i32 8 ; <i32*> [#uses=1]
+ %I = load i32, i32* %Ip ; <i32> [#uses=1]
+ %Jp = getelementptr i32, i32* %P, i32 9 ; <i32*> [#uses=1]
+ %J = load i32, i32* %Jp ; <i32> [#uses=1]
%s1 = mul i32 %A, %B ; <i32> [#uses=1]
%s2 = mul i32 %s1, %C ; <i32> [#uses=1]
%s3 = mul i32 %s2, %D ; <i32> [#uses=1]
@@ -73,25 +73,25 @@ define i32 @regpressure2(i32* %P) {
}
define i32 @regpressure3(i16* %P, i1 %Cond, i32* %Other) {
- %A = load i16* %P ; <i16> [#uses=1]
- %Bp = getelementptr i16* %P, i32 1 ; <i16*> [#uses=1]
- %B = load i16* %Bp ; <i16> [#uses=1]
- %Cp = getelementptr i16* %P, i32 2 ; <i16*> [#uses=1]
- %C = load i16* %Cp ; <i16> [#uses=1]
- %Dp = getelementptr i16* %P, i32 3 ; <i16*> [#uses=1]
- %D = load i16* %Dp ; <i16> [#uses=1]
- %Ep = getelementptr i16* %P, i32 4 ; <i16*> [#uses=1]
- %E = load i16* %Ep ; <i16> [#uses=1]
- %Fp = getelementptr i16* %P, i32 5 ; <i16*> [#uses=1]
- %F = load i16* %Fp ; <i16> [#uses=1]
- %Gp = getelementptr i16* %P, i32 6 ; <i16*> [#uses=1]
- %G = load i16* %Gp ; <i16> [#uses=1]
- %Hp = getelementptr i16* %P, i32 7 ; <i16*> [#uses=1]
- %H = load i16* %Hp ; <i16> [#uses=1]
- %Ip = getelementptr i16* %P, i32 8 ; <i16*> [#uses=1]
- %I = load i16* %Ip ; <i16> [#uses=1]
- %Jp = getelementptr i16* %P, i32 9 ; <i16*> [#uses=1]
- %J = load i16* %Jp ; <i16> [#uses=1]
+ %A = load i16, i16* %P ; <i16> [#uses=1]
+ %Bp = getelementptr i16, i16* %P, i32 1 ; <i16*> [#uses=1]
+ %B = load i16, i16* %Bp ; <i16> [#uses=1]
+ %Cp = getelementptr i16, i16* %P, i32 2 ; <i16*> [#uses=1]
+ %C = load i16, i16* %Cp ; <i16> [#uses=1]
+ %Dp = getelementptr i16, i16* %P, i32 3 ; <i16*> [#uses=1]
+ %D = load i16, i16* %Dp ; <i16> [#uses=1]
+ %Ep = getelementptr i16, i16* %P, i32 4 ; <i16*> [#uses=1]
+ %E = load i16, i16* %Ep ; <i16> [#uses=1]
+ %Fp = getelementptr i16, i16* %P, i32 5 ; <i16*> [#uses=1]
+ %F = load i16, i16* %Fp ; <i16> [#uses=1]
+ %Gp = getelementptr i16, i16* %P, i32 6 ; <i16*> [#uses=1]
+ %G = load i16, i16* %Gp ; <i16> [#uses=1]
+ %Hp = getelementptr i16, i16* %P, i32 7 ; <i16*> [#uses=1]
+ %H = load i16, i16* %Hp ; <i16> [#uses=1]
+ %Ip = getelementptr i16, i16* %P, i32 8 ; <i16*> [#uses=1]
+ %I = load i16, i16* %Ip ; <i16> [#uses=1]
+ %Jp = getelementptr i16, i16* %P, i32 9 ; <i16*> [#uses=1]
+ %J = load i16, i16* %Jp ; <i16> [#uses=1]
%A.upgrd.1 = sext i16 %A to i32 ; <i32> [#uses=1]
%B.upgrd.2 = sext i16 %B to i32 ; <i32> [#uses=1]
%D.upgrd.3 = sext i16 %D to i32 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/remat-constant.ll b/test/CodeGen/X86/remat-constant.ll
index 3e813209d410..5a6826f38090 100644
--- a/test/CodeGen/X86/remat-constant.ll
+++ b/test/CodeGen/X86/remat-constant.ll
@@ -7,7 +7,7 @@ declare void @bar() nounwind
declare void @qux(float %f) nounwind
define void @foo() nounwind {
- %f = load float* @a
+ %f = load float, float* @a
call void @bar()
call void @qux(float %f)
call void @qux(float %f)
diff --git a/test/CodeGen/X86/remat-fold-load.ll b/test/CodeGen/X86/remat-fold-load.ll
index de77ad375672..3478033bfbf1 100644
--- a/test/CodeGen/X86/remat-fold-load.ll
+++ b/test/CodeGen/X86/remat-fold-load.ll
@@ -38,26 +38,26 @@ while.body12: ; preds = %if.end24, %while.bo
if.then.i.i.i.i71: ; preds = %while.body12
%call4.i.i.i.i68 = call noalias i8* @malloc(i32 undef) nounwind
- %tmp1 = getelementptr inbounds %type_a* %tmp, i32 0, i32 1, i32 0, i32 1
+ %tmp1 = getelementptr inbounds %type_a, %type_a* %tmp, i32 0, i32 1, i32 0, i32 1
%buf_6.i.i.i.i70 = bitcast %type_d* %tmp1 to i8**
- %tmp2 = load i8** %buf_6.i.i.i.i70, align 4
+ %tmp2 = load i8*, i8** %buf_6.i.i.i.i70, align 4
call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* %tmp2, i32 undef, i32 1, i1 false) nounwind
unreachable
if.else.i.i.i.i74: ; preds = %while.body12
- %i_.i.i.i.i72 = getelementptr inbounds %type_a* %tmp, i32 0, i32 1, i32 0, i32 1, i32 0
- %tmp3 = load i64* %i_.i.i.i.i72, align 4
+ %i_.i.i.i.i72 = getelementptr inbounds %type_a, %type_a* %tmp, i32 0, i32 1, i32 0, i32 1, i32 0
+ %tmp3 = load i64, i64* %i_.i.i.i.i72, align 4
%tmp4 = zext i64 %tmp3 to i128
%tmp5 = shl nuw nsw i128 %tmp4, 32
%ins148 = or i128 %tmp5, %ins151
- %second3.i.i76 = getelementptr inbounds %type_a* %tmp, i32 0, i32 1, i32 1
- %tmp6 = load i32* %second3.i.i76, align 4
+ %second3.i.i76 = getelementptr inbounds %type_a, %type_a* %tmp, i32 0, i32 1, i32 1
+ %tmp6 = load i32, i32* %second3.i.i76, align 4
%tmp7 = zext i32 %tmp6 to i128
%tmp8 = shl nuw i128 %tmp7, 96
%mask144 = and i128 %ins148, 79228162495817593519834398720
- %tmp9 = load %type_e** undef, align 4
- %len_.i.i.i.i86 = getelementptr inbounds %type_e* %tmp9, i32 0, i32 0, i32 0
- %tmp10 = load i32* %len_.i.i.i.i86, align 4
+ %tmp9 = load %type_e*, %type_e** undef, align 4
+ %len_.i.i.i.i86 = getelementptr inbounds %type_e, %type_e* %tmp9, i32 0, i32 0, i32 0
+ %tmp10 = load i32, i32* %len_.i.i.i.i86, align 4
%tmp11 = zext i32 %tmp10 to i128
%ins135 = or i128 %tmp11, %ins135156160
%cmp.i.i.i.i.i88 = icmp sgt i32 %tmp10, 8
@@ -66,20 +66,20 @@ if.else.i.i.i.i74: ; preds = %while.body12
if.then.i.i.i.i92: ; preds = %if.else.i.i.i.i74
%call4.i.i.i.i89 = call noalias i8* @malloc(i32 %tmp10) nounwind
%ins126 = or i128 0, %ins135
- %tmp12 = getelementptr inbounds %type_e* %tmp9, i32 0, i32 0, i32 1
+ %tmp12 = getelementptr inbounds %type_e, %type_e* %tmp9, i32 0, i32 0, i32 1
%buf_6.i.i.i.i91 = bitcast %type_d* %tmp12 to i8**
- %tmp13 = load i8** %buf_6.i.i.i.i91, align 4
+ %tmp13 = load i8*, i8** %buf_6.i.i.i.i91, align 4
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %call4.i.i.i.i89, i8* %tmp13, i32 %tmp10, i32 1, i1 false) nounwind
br label %A
if.else.i.i.i.i95: ; preds = %if.else.i.i.i.i74
- %i_.i.i.i.i93 = getelementptr inbounds %type_e* %tmp9, i32 0, i32 0, i32 1, i32 0
+ %i_.i.i.i.i93 = getelementptr inbounds %type_e, %type_e* %tmp9, i32 0, i32 0, i32 1, i32 0
br label %A
A: ; preds = %if.else.i.i.i.i95, %if.then.i.i.i.i92
%ins135157 = phi i128 [ %ins126, %if.then.i.i.i.i92 ], [ undef, %if.else.i.i.i.i95 ]
- %second3.i.i97 = getelementptr inbounds %type_e* %tmp9, i32 0, i32 1
- %tmp14 = load i64* %second3.i.i97, align 4
+ %second3.i.i97 = getelementptr inbounds %type_e, %type_e* %tmp9, i32 0, i32 1
+ %tmp14 = load i64, i64* %second3.i.i97, align 4
%tmp15 = trunc i64 %tmp14 to i32
%cmp.i99 = icmp sgt i32 %tmp6, %tmp15
%tmp16 = trunc i128 %ins135157 to i32
@@ -118,13 +118,13 @@ E: ; preds = %D
if.end24: ; preds = %E, %C
%phitmp = or i128 %tmp8, %mask144
%phitmp158 = or i128 undef, undef
- %tmp18 = load %type_a** undef, align 4
- %tmp19 = load %type_a** undef, align 4
+ %tmp18 = load %type_a*, %type_a** undef, align 4
+ %tmp19 = load %type_a*, %type_a** undef, align 4
%cmp.i49 = icmp eq %type_a* %tmp18, %tmp19
br i1 %cmp.i49, label %while.cond10.while.end26_crit_edge, label %while.body12
while.cond10.while.end26_crit_edge: ; preds = %if.end24
- %.pre = load %type_e** undef, align 4
+ %.pre = load %type_e*, %type_e** undef, align 4
br label %while.end26
while.end26: ; preds = %while.cond10.while.end26_crit_edge, %while.end.while.end26_crit_edge
diff --git a/test/CodeGen/X86/remat-invalid-liveness.ll b/test/CodeGen/X86/remat-invalid-liveness.ll
index d285e83b7981..c6b43b0dd3e4 100644
--- a/test/CodeGen/X86/remat-invalid-liveness.ll
+++ b/test/CodeGen/X86/remat-invalid-liveness.ll
@@ -40,7 +40,7 @@ entry:
sw.bb.i: ; preds = %entry
%call.i.i.i = tail call i32 undef(%struct.A* %ht, i8 zeroext 22, i32 undef, i32 0, %struct.D* undef)
- %bf.load.i.i = load i128* undef, align 4
+ %bf.load.i.i = load i128, i128* undef, align 4
%bf.lshr.i.i = lshr i128 %bf.load.i.i, %const72
%shl1.i.i = shl nuw nsw i128 %bf.lshr.i.i, 8
%shl.i.i = trunc i128 %shl1.i.i to i32
@@ -50,22 +50,22 @@ __XXX2.exit.i.i: ; preds = %sw.bb.i
%extract11.i.i.i = lshr i128 %bf.load.i.i, %const3
%extract.t12.i.i.i = trunc i128 %extract11.i.i.i to i32
%bf.cast7.i.i.i = and i32 %extract.t12.i.i.i, 3
- %arrayidx.i.i.i = getelementptr inbounds %struct.A* %ht, i32 0, i32 3, i32 %bf.cast7.i.i.i
+ %arrayidx.i.i.i = getelementptr inbounds %struct.A, %struct.A* %ht, i32 0, i32 3, i32 %bf.cast7.i.i.i
br label %cond.end12.i.i
cond.false10.i.i: ; preds = %sw.bb.i
- %arrayidx.i6.i.i = getelementptr inbounds %struct.A* %ht, i32 0, i32 3, i32 0
+ %arrayidx.i6.i.i = getelementptr inbounds %struct.A, %struct.A* %ht, i32 0, i32 3, i32 0
br label %cond.end12.i.i
cond.end12.i.i: ; preds = %cond.false10.i.i, %__XXX2.exit.i.i
%.sink.in.i.i = phi i8** [ %arrayidx.i.i.i, %__XXX2.exit.i.i ], [ %arrayidx.i6.i.i, %cond.false10.i.i ]
- %.sink.i.i = load i8** %.sink.in.i.i, align 4
+ %.sink.i.i = load i8*, i8** %.sink.in.i.i, align 4
%tmp = bitcast i8* %.sink.i.i to %union.E*
br i1 undef, label %for.body.i.i, label %if.end196
for.body.i.i: ; preds = %for.body.i.i, %cond.end12.i.i
- %weak.i.i = getelementptr inbounds %union.E* %tmp, i32 undef, i32 0
- %tmp1 = load i32* %weak.i.i, align 4
+ %weak.i.i = getelementptr inbounds %union.E, %union.E* %tmp, i32 undef, i32 0
+ %tmp1 = load i32, i32* %weak.i.i, align 4
%cmp36.i.i = icmp ne i32 %tmp1, %shl.i.i
%or.cond = and i1 %cmp36.i.i, false
br i1 %or.cond, label %for.body.i.i, label %if.end196
diff --git a/test/CodeGen/X86/remat-scalar-zero.ll b/test/CodeGen/X86/remat-scalar-zero.ll
index f6095a75561c..0f081937b241 100644
--- a/test/CodeGen/X86/remat-scalar-zero.ll
+++ b/test/CodeGen/X86/remat-scalar-zero.ll
@@ -10,88 +10,88 @@
define void @foo(double* nocapture %x, double* nocapture %y) nounwind {
entry:
- %tmp1 = load double* %x ; <double> [#uses=1]
- %arrayidx4 = getelementptr inbounds double* %x, i64 1 ; <double*> [#uses=1]
- %tmp5 = load double* %arrayidx4 ; <double> [#uses=1]
- %arrayidx8 = getelementptr inbounds double* %x, i64 2 ; <double*> [#uses=1]
- %tmp9 = load double* %arrayidx8 ; <double> [#uses=1]
- %arrayidx12 = getelementptr inbounds double* %x, i64 3 ; <double*> [#uses=1]
- %tmp13 = load double* %arrayidx12 ; <double> [#uses=1]
- %arrayidx16 = getelementptr inbounds double* %x, i64 4 ; <double*> [#uses=1]
- %tmp17 = load double* %arrayidx16 ; <double> [#uses=1]
- %arrayidx20 = getelementptr inbounds double* %x, i64 5 ; <double*> [#uses=1]
- %tmp21 = load double* %arrayidx20 ; <double> [#uses=1]
- %arrayidx24 = getelementptr inbounds double* %x, i64 6 ; <double*> [#uses=1]
- %tmp25 = load double* %arrayidx24 ; <double> [#uses=1]
- %arrayidx28 = getelementptr inbounds double* %x, i64 7 ; <double*> [#uses=1]
- %tmp29 = load double* %arrayidx28 ; <double> [#uses=1]
- %arrayidx32 = getelementptr inbounds double* %x, i64 8 ; <double*> [#uses=1]
- %tmp33 = load double* %arrayidx32 ; <double> [#uses=1]
- %arrayidx36 = getelementptr inbounds double* %x, i64 9 ; <double*> [#uses=1]
- %tmp37 = load double* %arrayidx36 ; <double> [#uses=1]
- %arrayidx40 = getelementptr inbounds double* %x, i64 10 ; <double*> [#uses=1]
- %tmp41 = load double* %arrayidx40 ; <double> [#uses=1]
- %arrayidx44 = getelementptr inbounds double* %x, i64 11 ; <double*> [#uses=1]
- %tmp45 = load double* %arrayidx44 ; <double> [#uses=1]
- %arrayidx48 = getelementptr inbounds double* %x, i64 12 ; <double*> [#uses=1]
- %tmp49 = load double* %arrayidx48 ; <double> [#uses=1]
- %arrayidx52 = getelementptr inbounds double* %x, i64 13 ; <double*> [#uses=1]
- %tmp53 = load double* %arrayidx52 ; <double> [#uses=1]
- %arrayidx56 = getelementptr inbounds double* %x, i64 14 ; <double*> [#uses=1]
- %tmp57 = load double* %arrayidx56 ; <double> [#uses=1]
- %arrayidx60 = getelementptr inbounds double* %x, i64 15 ; <double*> [#uses=1]
- %tmp61 = load double* %arrayidx60 ; <double> [#uses=1]
- %arrayidx64 = getelementptr inbounds double* %x, i64 16 ; <double*> [#uses=1]
- %tmp65 = load double* %arrayidx64 ; <double> [#uses=1]
+ %tmp1 = load double, double* %x ; <double> [#uses=1]
+ %arrayidx4 = getelementptr inbounds double, double* %x, i64 1 ; <double*> [#uses=1]
+ %tmp5 = load double, double* %arrayidx4 ; <double> [#uses=1]
+ %arrayidx8 = getelementptr inbounds double, double* %x, i64 2 ; <double*> [#uses=1]
+ %tmp9 = load double, double* %arrayidx8 ; <double> [#uses=1]
+ %arrayidx12 = getelementptr inbounds double, double* %x, i64 3 ; <double*> [#uses=1]
+ %tmp13 = load double, double* %arrayidx12 ; <double> [#uses=1]
+ %arrayidx16 = getelementptr inbounds double, double* %x, i64 4 ; <double*> [#uses=1]
+ %tmp17 = load double, double* %arrayidx16 ; <double> [#uses=1]
+ %arrayidx20 = getelementptr inbounds double, double* %x, i64 5 ; <double*> [#uses=1]
+ %tmp21 = load double, double* %arrayidx20 ; <double> [#uses=1]
+ %arrayidx24 = getelementptr inbounds double, double* %x, i64 6 ; <double*> [#uses=1]
+ %tmp25 = load double, double* %arrayidx24 ; <double> [#uses=1]
+ %arrayidx28 = getelementptr inbounds double, double* %x, i64 7 ; <double*> [#uses=1]
+ %tmp29 = load double, double* %arrayidx28 ; <double> [#uses=1]
+ %arrayidx32 = getelementptr inbounds double, double* %x, i64 8 ; <double*> [#uses=1]
+ %tmp33 = load double, double* %arrayidx32 ; <double> [#uses=1]
+ %arrayidx36 = getelementptr inbounds double, double* %x, i64 9 ; <double*> [#uses=1]
+ %tmp37 = load double, double* %arrayidx36 ; <double> [#uses=1]
+ %arrayidx40 = getelementptr inbounds double, double* %x, i64 10 ; <double*> [#uses=1]
+ %tmp41 = load double, double* %arrayidx40 ; <double> [#uses=1]
+ %arrayidx44 = getelementptr inbounds double, double* %x, i64 11 ; <double*> [#uses=1]
+ %tmp45 = load double, double* %arrayidx44 ; <double> [#uses=1]
+ %arrayidx48 = getelementptr inbounds double, double* %x, i64 12 ; <double*> [#uses=1]
+ %tmp49 = load double, double* %arrayidx48 ; <double> [#uses=1]
+ %arrayidx52 = getelementptr inbounds double, double* %x, i64 13 ; <double*> [#uses=1]
+ %tmp53 = load double, double* %arrayidx52 ; <double> [#uses=1]
+ %arrayidx56 = getelementptr inbounds double, double* %x, i64 14 ; <double*> [#uses=1]
+ %tmp57 = load double, double* %arrayidx56 ; <double> [#uses=1]
+ %arrayidx60 = getelementptr inbounds double, double* %x, i64 15 ; <double*> [#uses=1]
+ %tmp61 = load double, double* %arrayidx60 ; <double> [#uses=1]
+ %arrayidx64 = getelementptr inbounds double, double* %x, i64 16 ; <double*> [#uses=1]
+ %tmp65 = load double, double* %arrayidx64 ; <double> [#uses=1]
%div = fdiv double %tmp1, 0.000000e+00 ; <double> [#uses=1]
store double %div, double* %y
%div70 = fdiv double %tmp5, 2.000000e-01 ; <double> [#uses=1]
- %arrayidx72 = getelementptr inbounds double* %y, i64 1 ; <double*> [#uses=1]
+ %arrayidx72 = getelementptr inbounds double, double* %y, i64 1 ; <double*> [#uses=1]
store double %div70, double* %arrayidx72
%div74 = fdiv double %tmp9, 2.000000e-01 ; <double> [#uses=1]
- %arrayidx76 = getelementptr inbounds double* %y, i64 2 ; <double*> [#uses=1]
+ %arrayidx76 = getelementptr inbounds double, double* %y, i64 2 ; <double*> [#uses=1]
store double %div74, double* %arrayidx76
%div78 = fdiv double %tmp13, 2.000000e-01 ; <double> [#uses=1]
- %arrayidx80 = getelementptr inbounds double* %y, i64 3 ; <double*> [#uses=1]
+ %arrayidx80 = getelementptr inbounds double, double* %y, i64 3 ; <double*> [#uses=1]
store double %div78, double* %arrayidx80
%div82 = fdiv double %tmp17, 2.000000e-01 ; <double> [#uses=1]
- %arrayidx84 = getelementptr inbounds double* %y, i64 4 ; <double*> [#uses=1]
+ %arrayidx84 = getelementptr inbounds double, double* %y, i64 4 ; <double*> [#uses=1]
store double %div82, double* %arrayidx84
%div86 = fdiv double %tmp21, 2.000000e-01 ; <double> [#uses=1]
- %arrayidx88 = getelementptr inbounds double* %y, i64 5 ; <double*> [#uses=1]
+ %arrayidx88 = getelementptr inbounds double, double* %y, i64 5 ; <double*> [#uses=1]
store double %div86, double* %arrayidx88
%div90 = fdiv double %tmp25, 2.000000e-01 ; <double> [#uses=1]
- %arrayidx92 = getelementptr inbounds double* %y, i64 6 ; <double*> [#uses=1]
+ %arrayidx92 = getelementptr inbounds double, double* %y, i64 6 ; <double*> [#uses=1]
store double %div90, double* %arrayidx92
%div94 = fdiv double %tmp29, 2.000000e-01 ; <double> [#uses=1]
- %arrayidx96 = getelementptr inbounds double* %y, i64 7 ; <double*> [#uses=1]
+ %arrayidx96 = getelementptr inbounds double, double* %y, i64 7 ; <double*> [#uses=1]
store double %div94, double* %arrayidx96
%div98 = fdiv double %tmp33, 2.000000e-01 ; <double> [#uses=1]
- %arrayidx100 = getelementptr inbounds double* %y, i64 8 ; <double*> [#uses=1]
+ %arrayidx100 = getelementptr inbounds double, double* %y, i64 8 ; <double*> [#uses=1]
store double %div98, double* %arrayidx100
%div102 = fdiv double %tmp37, 2.000000e-01 ; <double> [#uses=1]
- %arrayidx104 = getelementptr inbounds double* %y, i64 9 ; <double*> [#uses=1]
+ %arrayidx104 = getelementptr inbounds double, double* %y, i64 9 ; <double*> [#uses=1]
store double %div102, double* %arrayidx104
%div106 = fdiv double %tmp41, 2.000000e-01 ; <double> [#uses=1]
- %arrayidx108 = getelementptr inbounds double* %y, i64 10 ; <double*> [#uses=1]
+ %arrayidx108 = getelementptr inbounds double, double* %y, i64 10 ; <double*> [#uses=1]
store double %div106, double* %arrayidx108
%div110 = fdiv double %tmp45, 2.000000e-01 ; <double> [#uses=1]
- %arrayidx112 = getelementptr inbounds double* %y, i64 11 ; <double*> [#uses=1]
+ %arrayidx112 = getelementptr inbounds double, double* %y, i64 11 ; <double*> [#uses=1]
store double %div110, double* %arrayidx112
%div114 = fdiv double %tmp49, 2.000000e-01 ; <double> [#uses=1]
- %arrayidx116 = getelementptr inbounds double* %y, i64 12 ; <double*> [#uses=1]
+ %arrayidx116 = getelementptr inbounds double, double* %y, i64 12 ; <double*> [#uses=1]
store double %div114, double* %arrayidx116
%div118 = fdiv double %tmp53, 2.000000e-01 ; <double> [#uses=1]
- %arrayidx120 = getelementptr inbounds double* %y, i64 13 ; <double*> [#uses=1]
+ %arrayidx120 = getelementptr inbounds double, double* %y, i64 13 ; <double*> [#uses=1]
store double %div118, double* %arrayidx120
%div122 = fdiv double %tmp57, 2.000000e-01 ; <double> [#uses=1]
- %arrayidx124 = getelementptr inbounds double* %y, i64 14 ; <double*> [#uses=1]
+ %arrayidx124 = getelementptr inbounds double, double* %y, i64 14 ; <double*> [#uses=1]
store double %div122, double* %arrayidx124
%div126 = fdiv double %tmp61, 2.000000e-01 ; <double> [#uses=1]
- %arrayidx128 = getelementptr inbounds double* %y, i64 15 ; <double*> [#uses=1]
+ %arrayidx128 = getelementptr inbounds double, double* %y, i64 15 ; <double*> [#uses=1]
store double %div126, double* %arrayidx128
%div130 = fdiv double %tmp65, 0.000000e+00 ; <double> [#uses=1]
- %arrayidx132 = getelementptr inbounds double* %y, i64 16 ; <double*> [#uses=1]
+ %arrayidx132 = getelementptr inbounds double, double* %y, i64 16 ; <double*> [#uses=1]
store double %div130, double* %arrayidx132
ret void
}
diff --git a/test/CodeGen/X86/reverse_branches.ll b/test/CodeGen/X86/reverse_branches.ll
index ee6333e61e88..83a172b8de17 100644
--- a/test/CodeGen/X86/reverse_branches.ll
+++ b/test/CodeGen/X86/reverse_branches.ll
@@ -30,15 +30,15 @@ for.cond1:
br i1 %cmp2, label %for.body3, label %for.inc9
for.body3:
- %arraydecay = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv50, i64 0
+ %arraydecay = getelementptr inbounds [1000 x [1001 x i8]], [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv50, i64 0
%call = call i8* @memchr(i8* %arraydecay, i32 120, i64 1000)
- %add.ptr = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv50, i64 %indvars.iv50
+ %add.ptr = getelementptr inbounds [1000 x [1001 x i8]], [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv50, i64 %indvars.iv50
%cmp7 = icmp eq i8* %call, %add.ptr
%indvars.iv.next51 = add i64 %indvars.iv50, 1
br i1 %cmp7, label %for.cond1, label %if.then
if.then:
- %puts = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @str4, i64 0, i64 0))
+ %puts = call i32 @puts(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @str4, i64 0, i64 0))
call void @exit(i32 1) noreturn
unreachable
@@ -47,7 +47,7 @@ for.inc9:
br label %for.cond
for.end11:
- %puts42 = call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @.str2, i64 0, i64 0))
+ %puts42 = call i32 @puts(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str2, i64 0, i64 0))
br label %for.cond14
for.cond14:
@@ -62,30 +62,30 @@ for.cond18:
br i1 %cmp19, label %for.body20, label %for.inc38
for.body20:
- %arraydecay24 = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv, i64 0
+ %arraydecay24 = getelementptr inbounds [1000 x [1001 x i8]], [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv, i64 0
br label %do.body.i
do.body.i:
%n.addr.0.i = phi i64 [ %dec.i, %do.cond.i ], [ 1000, %for.body20 ]
%p.0.i = phi i8* [ %incdec.ptr.i, %do.cond.i ], [ %arraydecay24, %for.body20 ]
- %2 = load i8* %p.0.i, align 1
+ %2 = load i8, i8* %p.0.i, align 1
%cmp3.i = icmp eq i8 %2, 120
br i1 %cmp3.i, label %exit, label %do.cond.i
do.cond.i:
- %incdec.ptr.i = getelementptr inbounds i8* %p.0.i, i64 1
+ %incdec.ptr.i = getelementptr inbounds i8, i8* %p.0.i, i64 1
%dec.i = add i64 %n.addr.0.i, -1
%cmp5.i = icmp eq i64 %dec.i, 0
br i1 %cmp5.i, label %if.then32, label %do.body.i
exit:
- %add.ptr30 = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv, i64 %indvars.iv
+ %add.ptr30 = getelementptr inbounds [1000 x [1001 x i8]], [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv, i64 %indvars.iv
%cmp31 = icmp eq i8* %p.0.i, %add.ptr30
%indvars.iv.next = add i64 %indvars.iv, 1
br i1 %cmp31, label %for.cond18, label %if.then32
if.then32:
- %puts43 = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @str4, i64 0, i64 0))
+ %puts43 = call i32 @puts(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @str4, i64 0, i64 0))
call void @exit(i32 1) noreturn
unreachable
@@ -94,7 +94,7 @@ for.inc38:
br label %for.cond14
for.end40:
- %puts44 = call i32 @puts(i8* getelementptr inbounds ([11 x i8]* @.str3, i64 0, i64 0))
+ %puts44 = call i32 @puts(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str3, i64 0, i64 0))
ret i32 0
}
diff --git a/test/CodeGen/X86/rip-rel-address.ll b/test/CodeGen/X86/rip-rel-address.ll
index 24ff07b4b219..b49d597d9f05 100644
--- a/test/CodeGen/X86/rip-rel-address.ll
+++ b/test/CodeGen/X86/rip-rel-address.ll
@@ -6,7 +6,7 @@
@a = internal global double 3.4
define double @foo() nounwind {
- %a = load double* @a
+ %a = load double, double* @a
ret double %a
; PIC64: movsd _a(%rip), %xmm0
diff --git a/test/CodeGen/X86/rip-rel-lea.ll b/test/CodeGen/X86/rip-rel-lea.ll
index 71dacf60caa1..6d78aabb5c2f 100644
--- a/test/CodeGen/X86/rip-rel-lea.ll
+++ b/test/CodeGen/X86/rip-rel-lea.ll
@@ -7,7 +7,7 @@
@a = internal global double 3.4
define double* @foo() nounwind {
- %a = getelementptr double* @a, i64 0
+ %a = getelementptr double, double* @a, i64 0
ret double* %a
; PIC64: leaq a(%rip)
diff --git a/test/CodeGen/X86/rot32.ll b/test/CodeGen/X86/rot32.ll
index 7bdd606e9cbb..5738f70fa47e 100644
--- a/test/CodeGen/X86/rot32.ll
+++ b/test/CodeGen/X86/rot32.ll
@@ -61,7 +61,7 @@ define i32 @xfoop(i32* %p) nounwind readnone {
entry:
; BMI2-LABEL: xfoop:
; BMI2: rorxl $25, ({{.+}}), %{{.+}}
- %x = load i32* %p
+ %x = load i32, i32* %p
%a = lshr i32 %x, 25
%b = shl i32 %x, 7
%c = or i32 %a, %b
@@ -94,7 +94,7 @@ define i32 @xunp(i32* %p) nounwind readnone {
entry:
; BMI2-LABEL: xunp:
; BMI2: rorxl $7, ({{.+}}), %{{.+}}
- %x = load i32* %p
+ %x = load i32, i32* %p
%a = lshr i32 %x, 7
%b = shl i32 %x, 25
%c = or i32 %a, %b
diff --git a/test/CodeGen/X86/rot64.ll b/test/CodeGen/X86/rot64.ll
index e19a35da1cd6..f77bde050c78 100644
--- a/test/CodeGen/X86/rot64.ll
+++ b/test/CodeGen/X86/rot64.ll
@@ -55,7 +55,7 @@ define i64 @xfoop(i64* %p) nounwind readnone {
entry:
; BMI2-LABEL: xfoop:
; BMI2: rorxq $57, ({{.+}}), %{{.+}}
- %x = load i64* %p
+ %x = load i64, i64* %p
%a = lshr i64 %x, 57
%b = shl i64 %x, 7
%c = or i64 %a, %b
@@ -84,7 +84,7 @@ define i64 @xunp(i64* %p) nounwind readnone {
entry:
; BMI2-LABEL: xunp:
; BMI2: rorxq $7, ({{.+}}), %{{.+}}
- %x = load i64* %p
+ %x = load i64, i64* %p
%a = lshr i64 %x, 7
%b = shl i64 %x, 57
%c = or i64 %a, %b
diff --git a/test/CodeGen/X86/rotate4.ll b/test/CodeGen/X86/rotate4.ll
index 5372612aeab8..56a7d3285056 100644
--- a/test/CodeGen/X86/rotate4.ll
+++ b/test/CodeGen/X86/rotate4.ll
@@ -68,7 +68,7 @@ define void @rotate_left_m32(i32 *%pa, i32 %b) {
; no store:
; CHECK-NOT: mov
entry:
- %a = load i32* %pa, align 16
+ %a = load i32, i32* %pa, align 16
%and = and i32 %b, 31
%shl = shl i32 %a, %and
%0 = sub i32 0, %b
@@ -86,7 +86,7 @@ define void @rotate_right_m32(i32 *%pa, i32 %b) {
; no store:
; CHECK-NOT: mov
entry:
- %a = load i32* %pa, align 16
+ %a = load i32, i32* %pa, align 16
%and = and i32 %b, 31
%shl = lshr i32 %a, %and
%0 = sub i32 0, %b
@@ -104,7 +104,7 @@ define void @rotate_left_m64(i64 *%pa, i64 %b) {
; no store:
; CHECK-NOT: mov
entry:
- %a = load i64* %pa, align 16
+ %a = load i64, i64* %pa, align 16
%and = and i64 %b, 63
%shl = shl i64 %a, %and
%0 = sub i64 0, %b
@@ -122,7 +122,7 @@ define void @rotate_right_m64(i64 *%pa, i64 %b) {
; no store:
; CHECK-NOT: mov
entry:
- %a = load i64* %pa, align 16
+ %a = load i64, i64* %pa, align 16
%and = and i64 %b, 63
%shl = lshr i64 %a, %and
%0 = sub i64 0, %b
diff --git a/test/CodeGen/X86/sandybridge-loads.ll b/test/CodeGen/X86/sandybridge-loads.ll
index b8c364e2961c..2e31154068fc 100644
--- a/test/CodeGen/X86/sandybridge-loads.ll
+++ b/test/CodeGen/X86/sandybridge-loads.ll
@@ -8,10 +8,10 @@
;CHECK: ret
define void @wideloads(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
- %v0 = load <8 x float>* %a, align 16 ; <---- unaligned!
- %v1 = load <8 x float>* %b, align 32 ; <---- aligned!
+ %v0 = load <8 x float>, <8 x float>* %a, align 16 ; <---- unaligned!
+ %v1 = load <8 x float>, <8 x float>* %b, align 32 ; <---- aligned!
%m0 = fcmp olt <8 x float> %v1, %v0
- %v2 = load <8 x float>* %c, align 32 ; <---- aligned!
+ %v2 = load <8 x float>, <8 x float>* %c, align 32 ; <---- aligned!
%m1 = fcmp olt <8 x float> %v2, %v0
%mand = and <8 x i1> %m1, %m0
%r = zext <8 x i1> %mand to <8 x i32>
@@ -30,8 +30,8 @@ define void @wideloads(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwi
;CHECK: ret
define void @widestores(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
- %v0 = load <8 x float>* %a, align 32
- %v1 = load <8 x float>* %b, align 32
+ %v0 = load <8 x float>, <8 x float>* %a, align 32
+ %v1 = load <8 x float>, <8 x float>* %b, align 32
store <8 x float> %v0, <8 x float>* %b, align 32 ; <--- aligned
store <8 x float> %v1, <8 x float>* %a, align 16 ; <--- unaligned
ret void
diff --git a/test/CodeGen/X86/scalar-extract.ll b/test/CodeGen/X86/scalar-extract.ll
index 284583840933..b8ef5e74c436 100644
--- a/test/CodeGen/X86/scalar-extract.ll
+++ b/test/CodeGen/X86/scalar-extract.ll
@@ -6,7 +6,7 @@
define void @foo(<2 x i16>* %A, <2 x i16>* %B) {
entry:
- %tmp1 = load <2 x i16>* %A ; <<2 x i16>> [#uses=1]
+ %tmp1 = load <2 x i16>, <2 x i16>* %A ; <<2 x i16>> [#uses=1]
store <2 x i16> %tmp1, <2 x i16>* %B
ret void
}
diff --git a/test/CodeGen/X86/scalar_sse_minmax.ll b/test/CodeGen/X86/scalar_sse_minmax.ll
index bc4ab5d836c7..5ca3f85ce029 100644
--- a/test/CodeGen/X86/scalar_sse_minmax.ll
+++ b/test/CodeGen/X86/scalar_sse_minmax.ll
@@ -1,44 +1,53 @@
-; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 | \
-; RUN: grep mins | count 3
-; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 | \
-; RUN: grep maxs | count 2
-
-declare i1 @llvm.isunordered.f64(double, double)
-
-declare i1 @llvm.isunordered.f32(float, float)
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 | FileCheck %s
define float @min1(float %x, float %y) {
- %tmp = fcmp olt float %x, %y ; <i1> [#uses=1]
- %retval = select i1 %tmp, float %x, float %y ; <float> [#uses=1]
+; CHECK-LABEL: min1
+; CHECK: mins
+ %tmp = fcmp olt float %x, %y
+ %retval = select i1 %tmp, float %x, float %y
ret float %retval
}
define double @min2(double %x, double %y) {
- %tmp = fcmp olt double %x, %y ; <i1> [#uses=1]
- %retval = select i1 %tmp, double %x, double %y ; <double> [#uses=1]
+; CHECK-LABEL: min2
+; CHECK: mins
+ %tmp = fcmp olt double %x, %y
+ %retval = select i1 %tmp, double %x, double %y
ret double %retval
}
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
+define <4 x float> @min3(float %x, float %y) {
+; CHECK-LABEL: min3
+; CHECK: mins
+ %vec0 = insertelement <4 x float> undef, float %x, i32 0
+ %vec1 = insertelement <4 x float> undef, float %y, i32 0
+ %retval = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %vec0, <4 x float> %vec1)
+ ret <4 x float> %retval
+}
+
define float @max1(float %x, float %y) {
- %tmp = fcmp oge float %x, %y ; <i1> [#uses=1]
- %tmp2 = fcmp uno float %x, %y ; <i1> [#uses=1]
- %tmp3 = or i1 %tmp2, %tmp ; <i1> [#uses=1]
- %retval = select i1 %tmp3, float %x, float %y ; <float> [#uses=1]
+; CHECK-LABEL: max1
+; CHECK: maxs
+ %tmp = fcmp uge float %x, %y
+ %retval = select i1 %tmp, float %x, float %y
ret float %retval
}
define double @max2(double %x, double %y) {
- %tmp = fcmp oge double %x, %y ; <i1> [#uses=1]
- %tmp2 = fcmp uno double %x, %y ; <i1> [#uses=1]
- %tmp3 = or i1 %tmp2, %tmp ; <i1> [#uses=1]
- %retval = select i1 %tmp3, double %x, double %y ; <double> [#uses=1]
+; CHECK-LABEL: max2
+; CHECK: maxs
+ %tmp = fcmp uge double %x, %y
+ %retval = select i1 %tmp, double %x, double %y
ret double %retval
}
-define <4 x float> @min3(float %tmp37) {
- %tmp375 = insertelement <4 x float> undef, float %tmp37, i32 0 ; <<4 x float>> [#uses=1]
- %tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1]
- ret <4 x float> %tmp48
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
+define <4 x float> @max3(float %x, float %y) {
+; CHECK-LABEL: max3
+; CHECK: maxs
+ %vec0 = insertelement <4 x float> undef, float %x, i32 0
+ %vec1 = insertelement <4 x float> undef, float %y, i32 0
+ %retval = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %vec0, <4 x float> %vec1)
+ ret <4 x float> %retval
}
-
-declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
diff --git a/test/CodeGen/X86/scalar_widen_div.ll b/test/CodeGen/X86/scalar_widen_div.ll
index 5807d5babfff..1671f8f89108 100644
--- a/test/CodeGen/X86/scalar_widen_div.ll
+++ b/test/CodeGen/X86/scalar_widen_div.ll
@@ -17,17 +17,17 @@ entry:
store <2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)** %nsource.addr
store <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)** %dsource.addr
store <2 x i32> addrspace(1)* %qdest, <2 x i32> addrspace(1)** %qdest.addr
- %tmp = load <2 x i32> addrspace(1)** %qdest.addr
- %tmp1 = load i32* %index
- %arrayidx = getelementptr <2 x i32> addrspace(1)* %tmp, i32 %tmp1
- %tmp2 = load <2 x i32> addrspace(1)** %nsource.addr
- %tmp3 = load i32* %index
- %arrayidx4 = getelementptr <2 x i32> addrspace(1)* %tmp2, i32 %tmp3
- %tmp5 = load <2 x i32> addrspace(1)* %arrayidx4
- %tmp6 = load <2 x i32> addrspace(1)** %dsource.addr
- %tmp7 = load i32* %index
- %arrayidx8 = getelementptr <2 x i32> addrspace(1)* %tmp6, i32 %tmp7
- %tmp9 = load <2 x i32> addrspace(1)* %arrayidx8
+ %tmp = load <2 x i32> addrspace(1)*, <2 x i32> addrspace(1)** %qdest.addr
+ %tmp1 = load i32, i32* %index
+ %arrayidx = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %tmp, i32 %tmp1
+ %tmp2 = load <2 x i32> addrspace(1)*, <2 x i32> addrspace(1)** %nsource.addr
+ %tmp3 = load i32, i32* %index
+ %arrayidx4 = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %tmp2, i32 %tmp3
+ %tmp5 = load <2 x i32>, <2 x i32> addrspace(1)* %arrayidx4
+ %tmp6 = load <2 x i32> addrspace(1)*, <2 x i32> addrspace(1)** %dsource.addr
+ %tmp7 = load i32, i32* %index
+ %arrayidx8 = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %tmp6, i32 %tmp7
+ %tmp9 = load <2 x i32>, <2 x i32> addrspace(1)* %arrayidx8
%tmp10 = sdiv <2 x i32> %tmp5, %tmp9
store <2 x i32> %tmp10, <2 x i32> addrspace(1)* %arrayidx
ret void
@@ -180,10 +180,10 @@ bb.nph:
for.body:
%i.014 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ]
- %arrayidx11 = getelementptr <3 x i32>* %dest, i32 %i.014
- %tmp4 = load <3 x i32>* %arrayidx11 ; <<3 x i32>> [#uses=1]
- %arrayidx7 = getelementptr inbounds <3 x i32>* %old, i32 %i.014
- %tmp8 = load <3 x i32>* %arrayidx7 ; <<3 x i32>> [#uses=1]
+ %arrayidx11 = getelementptr <3 x i32>, <3 x i32>* %dest, i32 %i.014
+ %tmp4 = load <3 x i32>, <3 x i32>* %arrayidx11 ; <<3 x i32>> [#uses=1]
+ %arrayidx7 = getelementptr inbounds <3 x i32>, <3 x i32>* %old, i32 %i.014
+ %tmp8 = load <3 x i32>, <3 x i32>* %arrayidx7 ; <<3 x i32>> [#uses=1]
%div = sdiv <3 x i32> %tmp4, %tmp8
store <3 x i32> %div, <3 x i32>* %arrayidx11
%inc = add nsw i32 %i.014, 1
diff --git a/test/CodeGen/X86/scalarize-bitcast.ll b/test/CodeGen/X86/scalarize-bitcast.ll
index f6b29ecfbb60..60650f46302f 100644
--- a/test/CodeGen/X86/scalarize-bitcast.ll
+++ b/test/CodeGen/X86/scalarize-bitcast.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-pc-linux-gnu"
define void @mmxCombineMaskU(i32* nocapture %src, i32* nocapture %mask) nounwind {
entry:
- %tmp1 = load i32* %src ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* %src ; <i32> [#uses=1]
%0 = insertelement <2 x i32> undef, i32 %tmp1, i32 0 ; <<2 x i32>> [#uses=1]
%1 = insertelement <2 x i32> %0, i32 0, i32 1 ; <<2 x i32>> [#uses=1]
%conv.i.i = bitcast <2 x i32> %1 to <1 x i64> ; <<1 x i64>> [#uses=1]
@@ -21,7 +21,7 @@ entry:
%tmp24.i = extractelement <1 x i64> %tmp10.i, i32 0 ; <i64> [#uses=1]
%tmp10 = bitcast i64 %tmp24.i to <1 x i64> ; <<1 x i64>> [#uses=1]
%tmp7 = extractelement <1 x i64> %tmp10, i32 0 ; <i64> [#uses=1]
- %call6 = tail call i32 (...)* @store8888(i64 %tmp7) ; <i32> [#uses=1]
+ %call6 = tail call i32 (...) @store8888(i64 %tmp7) ; <i32> [#uses=1]
store i32 %call6, i32* %src
ret void
}
diff --git a/test/CodeGen/X86/scev-interchange.ll b/test/CodeGen/X86/scev-interchange.ll
index 0e7047b4845d..e224c0858aff 100644
--- a/test/CodeGen/X86/scev-interchange.ll
+++ b/test/CodeGen/X86/scev-interchange.ll
@@ -296,7 +296,7 @@ bb9.i216.i: ; preds = %bb29.loopexit.i.i, %bb8.i.i
bb15.i.i: ; preds = %bb16.preheader.i.i, %bb15.i.i
%j1.0212.i.i = phi i32 [ %1, %bb15.i.i ], [ 0, %bb16.preheader.i.i ] ; <i32> [#uses=2]
- %tmp6.i.i195.i.i = load i32* undef, align 4 ; <i32> [#uses=1]
+ %tmp6.i.i195.i.i = load i32, i32* undef, align 4 ; <i32> [#uses=1]
%tmp231.i.i = mul i32 %0, %tmp6.i.i195.i.i ; <i32> [#uses=1]
%tmp13.i197.i.i = add i32 %j1.0212.i.i, %tmp231.i.i ; <i32> [#uses=0]
%1 = add i32 %j1.0212.i.i, 1 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/segmented-stacks.ll b/test/CodeGen/X86/segmented-stacks.ll
index 3e47121a380e..55eaab91da50 100644
--- a/test/CodeGen/X86/segmented-stacks.ll
+++ b/test/CodeGen/X86/segmented-stacks.ll
@@ -11,16 +11,16 @@
; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -verify-machineinstrs | FileCheck %s -check-prefix=X64-MinGW
; We used to crash with filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux-gnux32 -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=i686-dragonfly -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-dragonfly -filetype=obj
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -filetype=obj -o /dev/null
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -filetype=obj -o /dev/null
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux-gnux32 -filetype=obj -o /dev/null
+; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -filetype=obj -o /dev/null
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -filetype=obj -o /dev/null
+; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -filetype=obj -o /dev/null
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -filetype=obj -o /dev/null
+; RUN: llc < %s -mcpu=generic -mtriple=i686-dragonfly -filetype=obj -o /dev/null
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-dragonfly -filetype=obj -o /dev/null
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -filetype=obj -o /dev/null
; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-solaris 2> %t.log
; RUN: FileCheck %s -input-file=%t.log -check-prefix=X64-Solaris
@@ -152,7 +152,7 @@ define void @test_basic() #0 {
}
define i32 @test_nested(i32 * nest %closure, i32 %other) #0 {
- %addend = load i32 * %closure
+ %addend = load i32 , i32 * %closure
%result = add i32 %other, %addend
%mem = alloca i32, i32 10
call void @dummy_use (i32* %mem, i32 10)
diff --git a/test/CodeGen/X86/seh-catch-all.ll b/test/CodeGen/X86/seh-catch-all.ll
new file mode 100644
index 000000000000..51840134eda3
--- /dev/null
+++ b/test/CodeGen/X86/seh-catch-all.ll
@@ -0,0 +1,44 @@
+; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s
+
+@str = linkonce_odr unnamed_addr constant [27 x i8] c"GetExceptionCode(): 0x%lx\0A\00", align 1
+
+declare i32 @__C_specific_handler(...)
+declare void @crash()
+declare i32 @printf(i8* nocapture readonly, ...) nounwind
+
+define i32 @main() {
+entry:
+ invoke void @crash()
+ to label %__try.cont unwind label %lpad
+
+lpad:
+ %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+ catch i8* null
+ %1 = extractvalue { i8*, i32 } %0, 0
+ %2 = ptrtoint i8* %1 to i64
+ %3 = trunc i64 %2 to i32
+ call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @str, i64 0, i64 0), i32 %3)
+ br label %__try.cont
+
+__try.cont:
+ ret i32 0
+
+eh.resume:
+ resume { i8*, i32 } %0
+}
+
+; Check that we can get the exception code from eax to the printf.
+
+; CHECK-LABEL: main:
+; CHECK: retq
+; CHECK: # Block address taken
+; CHECK: leaq str(%rip), %rcx
+; CHECK: movl %eax, %edx
+; CHECK: callq printf
+
+; CHECK: .seh_handlerdata
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .Ltmp{{[0-9]+}}@IMGREL
+; CHECK-NEXT: .Ltmp{{[0-9]+}}@IMGREL+1
+; CHECK-NEXT: 1
+; CHECK-NEXT: .Ltmp{{[0-9]+}}@IMGREL
diff --git a/test/CodeGen/X86/seh-except-finally.ll b/test/CodeGen/X86/seh-except-finally.ll
new file mode 100644
index 000000000000..c796f1ef2888
--- /dev/null
+++ b/test/CodeGen/X86/seh-except-finally.ll
@@ -0,0 +1,167 @@
+; RUN: llc < %s | FileCheck %s
+
+; Test case based on this source:
+; int puts(const char*);
+; __declspec(noinline) void crash() {
+; *(volatile int*)0 = 42;
+; }
+; int filt();
+; void use_both() {
+; __try {
+; __try {
+; crash();
+; } __finally {
+; puts("__finally");
+; }
+; } __except (filt()) {
+; puts("__except");
+; }
+; }
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+$"\01??_C@_09KJEHOMHG@__finally?$AA@" = comdat any
+
+$"\01??_C@_08MLCMLGHM@__except?$AA@" = comdat any
+
+@"\01??_C@_09KJEHOMHG@__finally?$AA@" = linkonce_odr unnamed_addr constant [10 x i8] c"__finally\00", comdat, align 1
+@"\01??_C@_08MLCMLGHM@__except?$AA@" = linkonce_odr unnamed_addr constant [9 x i8] c"__except\00", comdat, align 1
+
+declare void @crash()
+
+declare i32 @filt()
+
+; Function Attrs: nounwind uwtable
+define void @use_both() #1 {
+entry:
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ invoke void @crash() #5
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %entry
+ %0 = call i8* @llvm.frameaddress(i32 0)
+ invoke void @"\01?fin$0@0@use_both@@"(i1 zeroext false, i8* %0) #5
+ to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2: ; preds = %invoke.cont
+ br label %__try.cont
+
+lpad: ; preds = %entry
+ %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+ cleanup
+ catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@use_both@@" to i8*)
+ %2 = extractvalue { i8*, i32 } %1, 0
+ store i8* %2, i8** %exn.slot
+ %3 = extractvalue { i8*, i32 } %1, 1
+ store i32 %3, i32* %ehselector.slot
+ %4 = call i8* @llvm.frameaddress(i32 0)
+ invoke void @"\01?fin$0@0@use_both@@"(i1 zeroext true, i8* %4) #5
+ to label %invoke.cont3 unwind label %lpad1
+
+lpad1: ; preds = %lpad, %invoke.cont
+ %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+ catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@use_both@@" to i8*)
+ %6 = extractvalue { i8*, i32 } %5, 0
+ store i8* %6, i8** %exn.slot
+ %7 = extractvalue { i8*, i32 } %5, 1
+ store i32 %7, i32* %ehselector.slot
+ br label %catch.dispatch
+
+invoke.cont3: ; preds = %lpad
+ br label %catch.dispatch
+
+catch.dispatch: ; preds = %invoke.cont3, %lpad1
+ %sel = load i32, i32* %ehselector.slot
+ %8 = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@use_both@@" to i8*)) #6
+ %matches = icmp eq i32 %sel, %8
+ br i1 %matches, label %__except, label %eh.resume
+
+__except: ; preds = %catch.dispatch
+ %call = call i32 @puts(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @"\01??_C@_08MLCMLGHM@__except?$AA@", i32 0, i32 0))
+ br label %__try.cont
+
+__try.cont: ; preds = %__except, %invoke.cont2
+ ret void
+
+eh.resume: ; preds = %catch.dispatch
+ %exn = load i8*, i8** %exn.slot
+ %sel4 = load i32, i32* %ehselector.slot
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
+ %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel4, 1
+ resume { i8*, i32 } %lpad.val5
+}
+
+; CHECK-LABEL: use_both:
+; CHECK: .Ltmp0
+; CHECK: callq crash
+; CHECK: .Ltmp1
+; CHECK: .Ltmp3
+; CHECK: callq "?fin$0@0@use_both@@"
+; CHECK: .Ltmp4
+; CHECK: retq
+;
+; CHECK: .seh_handlerdata
+; CHECK-NEXT: .long 3
+; CHECK-NEXT: .long .Ltmp0@IMGREL
+; CHECK-NEXT: .long .Ltmp1@IMGREL+1
+; CHECK-NEXT: .long "?fin$0@0@use_both@@"@IMGREL
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long .Ltmp0@IMGREL
+; CHECK-NEXT: .long .Ltmp1@IMGREL+1
+; CHECK-NEXT: .long "?filt$0@0@use_both@@"@IMGREL
+; CHECK-NEXT: .long .Ltmp{{[0-9]+}}@IMGREL
+; CHECK-NEXT: .long .Ltmp3@IMGREL
+; CHECK-NEXT: .long .Ltmp4@IMGREL+1
+; CHECK-NEXT: .long "?filt$0@0@use_both@@"@IMGREL
+; CHECK-NEXT: .long .Ltmp{{[0-9]+}}@IMGREL
+
+; Function Attrs: noinline nounwind
+define internal i32 @"\01?filt$0@0@use_both@@"(i8* %exception_pointers, i8* %frame_pointer) #2 {
+entry:
+ %frame_pointer.addr = alloca i8*, align 8
+ %exception_pointers.addr = alloca i8*, align 8
+ %exn.slot = alloca i8*
+ store i8* %frame_pointer, i8** %frame_pointer.addr, align 8
+ store i8* %exception_pointers, i8** %exception_pointers.addr, align 8
+ %0 = load i8*, i8** %exception_pointers.addr
+ %1 = bitcast i8* %0 to { i32*, i8* }*
+ %2 = getelementptr inbounds { i32*, i8* }, { i32*, i8* }* %1, i32 0, i32 0
+ %3 = load i32*, i32** %2
+ %4 = load i32, i32* %3
+ %5 = zext i32 %4 to i64
+ %6 = inttoptr i64 %5 to i8*
+ store i8* %6, i8** %exn.slot
+ %call = call i32 @filt()
+ ret i32 %call
+}
+
+define internal void @"\01?fin$0@0@use_both@@"(i1 zeroext %abnormal_termination, i8* %frame_pointer) #3 {
+entry:
+ %frame_pointer.addr = alloca i8*, align 8
+ %abnormal_termination.addr = alloca i8, align 1
+ store i8* %frame_pointer, i8** %frame_pointer.addr, align 8
+ %frombool = zext i1 %abnormal_termination to i8
+ store i8 %frombool, i8* %abnormal_termination.addr, align 1
+ %call = call i32 @puts(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @"\01??_C@_09KJEHOMHG@__finally?$AA@", i32 0, i32 0))
+ ret void
+}
+
+declare i32 @puts(i8*) #3
+
+declare i32 @__C_specific_handler(...)
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.frameaddress(i32) #4
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #4
+
+attributes #0 = { noinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { noinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nounwind readnone }
+attributes #5 = { noinline }
+attributes #6 = { nounwind }
diff --git a/test/CodeGen/X86/seh-filter.ll b/test/CodeGen/X86/seh-filter.ll
new file mode 100644
index 000000000000..6a3a23edb1ae
--- /dev/null
+++ b/test/CodeGen/X86/seh-filter.ll
@@ -0,0 +1,21 @@
+; RUN: llc -O0 -mtriple=x86_64-windows-msvc < %s | FileCheck %s
+
+declare void @g()
+define void @f() {
+ invoke void @g() to label %return unwind label %lpad
+
+return:
+ ret void
+
+lpad:
+ %ehptrs = landingpad {i8*, i32} personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+ filter [0 x i8*] zeroinitializer
+ call void @__cxa_call_unexpected(i8* null)
+ unreachable
+}
+declare i32 @__C_specific_handler(...)
+declare void @__cxa_call_unexpected(i8*)
+
+; We don't emit entries for filters.
+; CHECK: .seh_handlerdata
+; CHECK: .long 0
diff --git a/test/CodeGen/X86/seh-finally.ll b/test/CodeGen/X86/seh-finally.ll
new file mode 100644
index 000000000000..91baed570f25
--- /dev/null
+++ b/test/CodeGen/X86/seh-finally.ll
@@ -0,0 +1,51 @@
+; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s
+
+@str_recovered = internal unnamed_addr constant [10 x i8] c"recovered\00", align 1
+
+declare void @crash()
+
+define i32 @main() {
+entry:
+ invoke void @crash()
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %entry
+ %call = call i32 @puts(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @str_recovered, i64 0, i64 0))
+ call void @abort()
+ ret i32 0
+
+lpad: ; preds = %entry
+ %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+ cleanup
+ %1 = extractvalue { i8*, i32 } %0, 0
+ %2 = extractvalue { i8*, i32 } %0, 1
+ %call2 = invoke i32 @puts(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @str_recovered, i64 0, i64 0))
+ to label %invoke.cont1 unwind label %terminate.lpad
+
+invoke.cont1: ; preds = %lpad
+ resume { i8*, i32 } %0
+
+terminate.lpad: ; preds = %lpad
+ %3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+ catch i8* null
+ call void @abort()
+ unreachable
+}
+
+; CHECK-LABEL: main:
+; CHECK: .seh_handlerdata
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long .Ltmp0@IMGREL
+; CHECK-NEXT: .long .Ltmp1@IMGREL
+; CHECK-NEXT: .long main.cleanup@IMGREL
+; CHECK-NEXT: .long 0
+
+; CHECK-LABEL: main.cleanup:
+; CHECK: callq puts
+; CHECK: retq
+
+declare i32 @__C_specific_handler(...)
+
+declare i32 @puts(i8*)
+
+declare void @abort()
diff --git a/test/CodeGen/X86/seh-safe-div.ll b/test/CodeGen/X86/seh-safe-div.ll
new file mode 100644
index 000000000000..80b15b601020
--- /dev/null
+++ b/test/CodeGen/X86/seh-safe-div.ll
@@ -0,0 +1,180 @@
+; RUN: llc -mtriple x86_64-pc-windows-msvc < %s | FileCheck %s
+
+; This test case is also intended to be run manually as a complete functional
+; test. It should link, print something, and exit zero rather than crashing.
+; It is the hypothetical lowering of a C source program that looks like:
+;
+; int safe_div(int *n, int *d) {
+; int r;
+; __try {
+; __try {
+; r = *n / *d;
+; } __except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION) {
+; puts("EXCEPTION_ACCESS_VIOLATION");
+; r = -1;
+; }
+; } __except(GetExceptionCode() == EXCEPTION_INT_DIVIDE_BY_ZERO) {
+; puts("EXCEPTION_INT_DIVIDE_BY_ZERO");
+; r = -2;
+; }
+; return r;
+; }
+
+@str1 = internal constant [27 x i8] c"EXCEPTION_ACCESS_VIOLATION\00"
+@str2 = internal constant [29 x i8] c"EXCEPTION_INT_DIVIDE_BY_ZERO\00"
+
+define i32 @safe_div(i32* %n, i32* %d) {
+entry:
+ %r = alloca i32, align 4
+ invoke void @try_body(i32* %r, i32* %n, i32* %d)
+ to label %__try.cont unwind label %lpad
+
+lpad:
+ %vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+ catch i8* bitcast (i32 (i8*, i8*)* @safe_div_filt0 to i8*)
+ catch i8* bitcast (i32 (i8*, i8*)* @safe_div_filt1 to i8*)
+ %ehptr = extractvalue { i8*, i32 } %vals, 0
+ %sel = extractvalue { i8*, i32 } %vals, 1
+ %filt0_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @safe_div_filt0 to i8*))
+ %is_filt0 = icmp eq i32 %sel, %filt0_val
+ br i1 %is_filt0, label %handler0, label %eh.dispatch1
+
+eh.dispatch1:
+ %filt1_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @safe_div_filt1 to i8*))
+ %is_filt1 = icmp eq i32 %sel, %filt1_val
+ br i1 %is_filt1, label %handler1, label %eh.resume
+
+handler0:
+ call void @puts(i8* getelementptr ([27 x i8], [27 x i8]* @str1, i32 0, i32 0))
+ store i32 -1, i32* %r, align 4
+ br label %__try.cont
+
+handler1:
+ call void @puts(i8* getelementptr ([29 x i8], [29 x i8]* @str2, i32 0, i32 0))
+ store i32 -2, i32* %r, align 4
+ br label %__try.cont
+
+eh.resume:
+ resume { i8*, i32 } %vals
+
+__try.cont:
+ %safe_ret = load i32, i32* %r, align 4
+ ret i32 %safe_ret
+}
+
+; Normal path code
+
+; CHECK: {{^}}safe_div:
+; CHECK: .seh_proc safe_div
+; CHECK: .seh_handler __C_specific_handler, @unwind, @except
+; CHECK: .Ltmp0:
+; CHECK: leaq [[rloc:.*\(%rsp\)]], %rcx
+; CHECK: callq try_body
+; CHECK-NEXT: .Ltmp1
+; CHECK: [[cont_bb:\.LBB0_[0-9]+]]:
+; CHECK: movl [[rloc]], %eax
+; CHECK: retq
+
+; Landing pad code
+
+; CHECK: [[handler0:\.Ltmp[0-9]+]]: # Block address taken
+; CHECK: # %handler0
+; CHECK: callq puts
+; CHECK: movl $-1, [[rloc]]
+; CHECK: jmp [[cont_bb]]
+
+; CHECK: [[handler1:\.Ltmp[0-9]+]]: # Block address taken
+; CHECK: # %handler1
+; CHECK: callq puts
+; CHECK: movl $-2, [[rloc]]
+; CHECK: jmp [[cont_bb]]
+
+; CHECK: .seh_handlerdata
+; CHECK-NEXT: .long 2
+; CHECK-NEXT: .long .Ltmp0@IMGREL
+; CHECK-NEXT: .long .Ltmp1@IMGREL+1
+; CHECK-NEXT: .long safe_div_filt0@IMGREL
+; CHECK-NEXT: .long [[handler0]]@IMGREL
+; CHECK-NEXT: .long .Ltmp0@IMGREL
+; CHECK-NEXT: .long .Ltmp1@IMGREL+1
+; CHECK-NEXT: .long safe_div_filt1@IMGREL
+; CHECK-NEXT: .long [[handler1]]@IMGREL
+; CHECK: .text
+; CHECK: .seh_endproc
+
+
+define void @try_body(i32* %r, i32* %n, i32* %d) {
+entry:
+ %0 = load i32, i32* %n, align 4
+ %1 = load i32, i32* %d, align 4
+ %div = sdiv i32 %0, %1
+ store i32 %div, i32* %r, align 4
+ ret void
+}
+
+; The prototype of these filter functions is:
+; int filter(EXCEPTION_POINTERS *eh_ptrs, void *rbp);
+
+; The definition of EXCEPTION_POINTERS is:
+; typedef struct _EXCEPTION_POINTERS {
+; EXCEPTION_RECORD *ExceptionRecord;
+; CONTEXT *ContextRecord;
+; } EXCEPTION_POINTERS;
+
+; The definition of EXCEPTION_RECORD is:
+; typedef struct _EXCEPTION_RECORD {
+; DWORD ExceptionCode;
+; ...
+; } EXCEPTION_RECORD;
+
+; The exception code can be retreived with two loads, one for the record
+; pointer and one for the code. The values of local variables can be
+; accessed via rbp, but that would require additional not yet implemented LLVM
+; support.
+
+define i32 @safe_div_filt0(i8* %eh_ptrs, i8* %rbp) {
+ %eh_ptrs_c = bitcast i8* %eh_ptrs to i32**
+ %eh_rec = load i32*, i32** %eh_ptrs_c
+ %eh_code = load i32, i32* %eh_rec
+ ; EXCEPTION_ACCESS_VIOLATION = 0xC0000005
+ %cmp = icmp eq i32 %eh_code, 3221225477
+ %filt.res = zext i1 %cmp to i32
+ ret i32 %filt.res
+}
+
+define i32 @safe_div_filt1(i8* %eh_ptrs, i8* %rbp) {
+ %eh_ptrs_c = bitcast i8* %eh_ptrs to i32**
+ %eh_rec = load i32*, i32** %eh_ptrs_c
+ %eh_code = load i32, i32* %eh_rec
+ ; EXCEPTION_INT_DIVIDE_BY_ZERO = 0xC0000094
+ %cmp = icmp eq i32 %eh_code, 3221225620
+ %filt.res = zext i1 %cmp to i32
+ ret i32 %filt.res
+}
+
+@str_result = internal constant [21 x i8] c"safe_div result: %d\0A\00"
+
+define i32 @main() {
+ %d.addr = alloca i32, align 4
+ %n.addr = alloca i32, align 4
+
+ store i32 10, i32* %n.addr, align 4
+ store i32 2, i32* %d.addr, align 4
+ %r1 = call i32 @safe_div(i32* %n.addr, i32* %d.addr)
+ call void (i8*, ...) @printf(i8* getelementptr ([21 x i8], [21 x i8]* @str_result, i32 0, i32 0), i32 %r1)
+
+ store i32 10, i32* %n.addr, align 4
+ store i32 0, i32* %d.addr, align 4
+ %r2 = call i32 @safe_div(i32* %n.addr, i32* %d.addr)
+ call void (i8*, ...) @printf(i8* getelementptr ([21 x i8], [21 x i8]* @str_result, i32 0, i32 0), i32 %r2)
+
+ %r3 = call i32 @safe_div(i32* %n.addr, i32* null)
+ call void (i8*, ...) @printf(i8* getelementptr ([21 x i8], [21 x i8]* @str_result, i32 0, i32 0), i32 %r3)
+ ret i32 0
+}
+
+declare i32 @__C_specific_handler(...)
+declare i32 @llvm.eh.typeid.for(i8*) readnone nounwind
+declare void @puts(i8*)
+declare void @printf(i8*, ...)
+declare void @abort()
diff --git a/test/CodeGen/X86/select-with-and-or.ll b/test/CodeGen/X86/select-with-and-or.ll
index 1ccf30bf2083..40af46bc0ff6 100644
--- a/test/CodeGen/X86/select-with-and-or.ll
+++ b/test/CodeGen/X86/select-with-and-or.ll
@@ -62,7 +62,7 @@ define <4 x i32> @test6(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
define <4 x i32> @test7(<4 x float> %a, <4 x float> %b, <4 x i32>* %p) {
%f = fcmp ult <4 x float> %a, %b
%s = sext <4 x i1> %f to <4 x i32>
- %l = load <4 x i32>* %p
+ %l = load <4 x i32>, <4 x i32>* %p
%r = and <4 x i32> %l, %s
ret <4 x i32> %r
; CHECK: test7
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index 7e6f15321415..a4e06b398446 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -5,8 +5,8 @@
%0 = type { i64, i32 }
define i32 @test1(%0* %p, %0* %q, i1 %r) nounwind {
- %t0 = load %0* %p
- %t1 = load %0* %q
+ %t0 = load %0, %0* %p
+ %t1 = load %0, %0* %q
%t4 = select i1 %r, %0 %t0, %0 %t1
%t5 = extractvalue %0 %t4, 1
ret i32 %t5
@@ -62,8 +62,8 @@ define signext i8 @test4(i8* nocapture %P, double %F) nounwind readonly {
entry:
%0 = fcmp olt double %F, 4.200000e+01 ; <i1> [#uses=1]
%iftmp.0.0 = select i1 %0, i32 4, i32 0 ; <i32> [#uses=1]
- %1 = getelementptr i8* %P, i32 %iftmp.0.0 ; <i8*> [#uses=1]
- %2 = load i8* %1, align 1 ; <i8> [#uses=1]
+ %1 = getelementptr i8, i8* %P, i32 %iftmp.0.0 ; <i8*> [#uses=1]
+ %2 = load i8, i8* %1, align 1 ; <i8> [#uses=1]
ret i8 %2
; CHECK-LABEL: test4:
; CHECK: movsbl ({{.*}},4), %eax
@@ -82,8 +82,8 @@ define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind {
}
define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
- %tmp = load <4 x float>* %A ; <<4 x float>> [#uses=1]
- %tmp3 = load <4 x float>* %B ; <<4 x float>> [#uses=2]
+ %tmp = load <4 x float>, <4 x float>* %A ; <<4 x float>> [#uses=1]
+ %tmp3 = load <4 x float>, <4 x float>* %B ; <<4 x float>> [#uses=2]
%tmp9 = fmul <4 x float> %tmp3, %tmp3 ; <<4 x float>> [#uses=1]
%tmp.upgrd.1 = icmp eq i32 %C, 0 ; <i1> [#uses=1]
%iftmp.38.0 = select i1 %tmp.upgrd.1, <4 x float> %tmp9, <4 x float> %tmp ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/selectiondag-crash.ll b/test/CodeGen/X86/selectiondag-crash.ll
new file mode 100644
index 000000000000..99789021a737
--- /dev/null
+++ b/test/CodeGen/X86/selectiondag-crash.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=corei7 < %s
+
+; Check that llc doesn't crash in the attempt to fold a shuffle with
+; a splat mask into a constant build_vector.
+
+define <8 x i8> @autogen_SD26299(i8) {
+BB:
+ %Shuff = shufflevector <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32> zeroinitializer, <8 x i32> <i32 2, i32 undef, i32 6, i32 8, i32 undef, i32 12, i32 14, i32 0>
+ %Shuff14 = shufflevector <8 x i32> %Shuff, <8 x i32> %Shuff, <8 x i32> <i32 7, i32 9, i32 11, i32 undef, i32 undef, i32 1, i32 3, i32 5>
+ %Shuff35 = shufflevector <8 x i32> %Shuff14, <8 x i32> %Shuff, <8 x i32> <i32 undef, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13>
+ %I42 = insertelement <8 x i32> %Shuff35, i32 88608, i32 0
+ %Shuff48 = shufflevector <8 x i32> %Shuff35, <8 x i32> %I42, <8 x i32> <i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 0, i32 2>
+ %Tr59 = trunc <8 x i32> %Shuff48 to <8 x i8>
+ ret <8 x i8> %Tr59
+}
diff --git a/test/CodeGen/X86/selectiondag-cse.ll b/test/CodeGen/X86/selectiondag-cse.ll
index a653a1c8ca3b..c9e58015339d 100644
--- a/test/CodeGen/X86/selectiondag-cse.ll
+++ b/test/CodeGen/X86/selectiondag-cse.ll
@@ -39,7 +39,7 @@ bb5:
%tmp13 = add i32 %tmp12, %tmp10
%tmp14 = lshr i32 %tmp13, 2
%tmp15 = trunc i32 %tmp14 to i16
- store i16 %tmp15, i16* getelementptr inbounds (%0* @images, i64 0, i32 47, i64 3, i64 0, i64 3), align 2
+ store i16 %tmp15, i16* getelementptr inbounds (%0, %0* @images, i64 0, i32 47, i64 3, i64 0, i64 3), align 2
%tmp16 = lshr i208 %tmp, 96
%tmp17 = trunc i208 %tmp16 to i32
%tmp18 = and i32 %tmp17, 65535
@@ -48,12 +48,12 @@ bb5:
%tmp21 = add i32 %tmp20, 0
%tmp22 = lshr i32 %tmp21, 2
%tmp23 = trunc i32 %tmp22 to i16
- store i16 %tmp23, i16* getelementptr inbounds (%0* @images, i64 0, i32 47, i64 3, i64 2, i64 3), align 2
+ store i16 %tmp23, i16* getelementptr inbounds (%0, %0* @images, i64 0, i32 47, i64 3, i64 2, i64 3), align 2
%tmp24 = add i32 %tmp6, %tmp9
%tmp25 = add i32 %tmp24, 0
%tmp26 = lshr i32 %tmp25, 2
%tmp27 = trunc i32 %tmp26 to i16
- store i16 %tmp27, i16* getelementptr inbounds (%0* @images, i64 0, i32 47, i64 7, i64 1, i64 2), align 4
+ store i16 %tmp27, i16* getelementptr inbounds (%0, %0* @images, i64 0, i32 47, i64 7, i64 1, i64 2), align 4
%tmp28 = lshr i208 %tmp, 80
%tmp29 = shl nuw nsw i208 %tmp28, 1
%tmp30 = trunc i208 %tmp29 to i32
@@ -61,7 +61,7 @@ bb5:
%tmp32 = add i32 %tmp12, %tmp31
%tmp33 = lshr i32 %tmp32, 2
%tmp34 = trunc i32 %tmp33 to i16
- store i16 %tmp34, i16* getelementptr inbounds (%0* @images, i64 0, i32 47, i64 7, i64 1, i64 3), align 2
+ store i16 %tmp34, i16* getelementptr inbounds (%0, %0* @images, i64 0, i32 47, i64 7, i64 1, i64 3), align 2
br label %bb35
bb35: ; preds = %bb5, %bb4
diff --git a/test/CodeGen/X86/setcc-narrowing.ll b/test/CodeGen/X86/setcc-narrowing.ll
index 25cb2c822c5b..bf5b45031a24 100644
--- a/test/CodeGen/X86/setcc-narrowing.ll
+++ b/test/CodeGen/X86/setcc-narrowing.ll
@@ -10,7 +10,7 @@ entry:
; CHECK-NEXT: setne %al
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: ret
- %0 = load i64* @t1.global, align 8
+ %0 = load i64, i64* @t1.global, align 8
%and = and i64 4294967295, %0
%cmp = icmp sgt i64 %and, 0
%conv = zext i1 %cmp to i32
diff --git a/test/CodeGen/X86/setcc.ll b/test/CodeGen/X86/setcc.ll
index 2454af926aae..6f1ddbdc6aca 100644
--- a/test/CodeGen/X86/setcc.ll
+++ b/test/CodeGen/X86/setcc.ll
@@ -29,7 +29,7 @@ define i64 @t3(i64 %x) nounwind readnone ssp {
entry:
; CHECK-LABEL: t3:
; CHECK: sbbq %rax, %rax
-; CHECK: andq $64, %rax
+; CHECK: andl $64, %eax
%0 = icmp ult i64 %x, 18 ; <i1> [#uses=1]
%iftmp.2.0 = select i1 %0, i64 64, i64 0 ; <i64> [#uses=1]
ret i64 %iftmp.2.0
diff --git a/test/CodeGen/X86/sext-load.ll b/test/CodeGen/X86/sext-load.ll
index 2753e8766294..2ea6e012192e 100644
--- a/test/CodeGen/X86/sext-load.ll
+++ b/test/CodeGen/X86/sext-load.ll
@@ -21,9 +21,9 @@ entry:
; CHECK-NEXT: ret
define i32 @test2({i16, [6 x i8]}* %this) {
entry:
- %b48 = getelementptr inbounds { i16, [6 x i8] }* %this, i32 0, i32 1
+ %b48 = getelementptr inbounds { i16, [6 x i8] }, { i16, [6 x i8] }* %this, i32 0, i32 1
%cast = bitcast [6 x i8]* %b48 to i48*
- %bf.load = load i48* %cast, align 2
+ %bf.load = load i48, i48* %cast, align 2
%bf.ashr = ashr i48 %bf.load, 32
%bf.cast = trunc i48 %bf.ashr to i32
ret i32 %bf.cast
diff --git a/test/CodeGen/X86/sha.ll b/test/CodeGen/X86/sha.ll
index bf81e9938ec8..fe42637bc538 100644
--- a/test/CodeGen/X86/sha.ll
+++ b/test/CodeGen/X86/sha.ll
@@ -13,7 +13,7 @@ entry:
define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
entry:
- %0 = load <4 x i32>* %b
+ %0 = load <4 x i32>, <4 x i32>* %b
%1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
ret <4 x i32> %1
; CHECK: test_sha1rnds4rm
@@ -32,7 +32,7 @@ entry:
define <4 x i32> @test_sha1nexterm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
entry:
- %0 = load <4 x i32>* %b
+ %0 = load <4 x i32>, <4 x i32>* %b
%1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0)
ret <4 x i32> %1
; CHECK: test_sha1nexterm
@@ -51,7 +51,7 @@ entry:
define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
entry:
- %0 = load <4 x i32>* %b
+ %0 = load <4 x i32>, <4 x i32>* %b
%1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0)
ret <4 x i32> %1
; CHECK: test_sha1msg1rm
@@ -70,7 +70,7 @@ entry:
define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
entry:
- %0 = load <4 x i32>* %b
+ %0 = load <4 x i32>, <4 x i32>* %b
%1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0)
ret <4 x i32> %1
; CHECK: test_sha1msg2rm
@@ -91,7 +91,7 @@ entry:
define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, <4 x i32>* %b, <4 x i32> %c) nounwind uwtable {
entry:
- %0 = load <4 x i32>* %b
+ %0 = load <4 x i32>, <4 x i32>* %b
%1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c)
ret <4 x i32> %1
; CHECK: test_sha256rnds2rm
@@ -112,7 +112,7 @@ entry:
define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
entry:
- %0 = load <4 x i32>* %b
+ %0 = load <4 x i32>, <4 x i32>* %b
%1 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %0)
ret <4 x i32> %1
; CHECK: test_sha256msg1rm
@@ -131,7 +131,7 @@ entry:
define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
entry:
- %0 = load <4 x i32>* %b
+ %0 = load <4 x i32>, <4 x i32>* %b
%1 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %0)
ret <4 x i32> %1
; CHECK: test_sha256msg2rm
diff --git a/test/CodeGen/X86/shift-and.ll b/test/CodeGen/X86/shift-and.ll
index d487368431b0..edd43a35ce56 100644
--- a/test/CodeGen/X86/shift-and.ll
+++ b/test/CodeGen/X86/shift-and.ll
@@ -38,7 +38,7 @@ define void @t3(i16 %t) nounwind {
; X64-NOT: andl
; X64: sarw
%shamt = and i16 %t, 31
- %tmp = load i16* @X
+ %tmp = load i16, i16* @X
%tmp1 = ashr i16 %tmp, %shamt
store i16 %tmp1, i16* @X
ret void
@@ -71,7 +71,7 @@ entry:
; X64: decq
; X64: andq
%shr = lshr i64 %key, 3
- %0 = load i64* %val, align 8
+ %0 = load i64, i64* %val, align 8
%sub = add i64 %0, 2305843009213693951
%and = and i64 %sub, %shr
ret i64 %and
diff --git a/test/CodeGen/X86/shift-avx2-crash.ll b/test/CodeGen/X86/shift-avx2-crash.ll
new file mode 100755
index 000000000000..b23d6e3359ee
--- /dev/null
+++ b/test/CodeGen/X86/shift-avx2-crash.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mcpu=core-avx2 > /dev/null
+; This test crashed on variable shift creation on AVX2
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i386-unknown-linux-gnu"
+
+define void @f_f(float* noalias nocapture %RET, float %aFOO, i32 %div) {
+allocas:
+ %__idiv_table_u32_offset10_offset_load.i = add i64 0, -2
+ br label %if_then18.i
+
+if_then18.i:
+ %aFOO_load_to_uint32 = fptoui float %aFOO to i32
+ %aFOO_load_to_uint32_broadcast_init = insertelement <8 x i32> undef, i32 %aFOO_load_to_uint32, i32 0
+ %aFOO_load_to_uint32_broadcast = shufflevector <8 x i32> %aFOO_load_to_uint32_broadcast_init, <8 x i32> undef, <8 x i32> zeroinitializer
+
+ %multiplier_load_broadcast_init.i = insertelement <8 x i64> undef, i64 2, i32 0
+ %multiplier_load_broadcast.i = shufflevector <8 x i64> %multiplier_load_broadcast_init.i, <8 x i64> undef, <8 x i32> zeroinitializer
+ %numerator_load_to_uint64.i = zext <8 x i32> %aFOO_load_to_uint32_broadcast to <8 x i64>
+
+ ;if replace '%__idiv_table_u32_offset10_offset_load.i' with '-2' or remove 'if_then18.i' label the error disappears
+ %add__shift_load21.i = add i64 %__idiv_table_u32_offset10_offset_load.i, 32
+ %add__shift_load21_broadcast_init.i = insertelement <8 x i64> undef, i64 %add__shift_load21.i, i32 0
+ %add__shift_load21_broadcast.i = shufflevector <8 x i64> %add__shift_load21_broadcast_init.i, <8 x i64> undef, <8 x i32> zeroinitializer
+
+ %mul_val_load_mult_load.i = mul <8 x i64> %numerator_load_to_uint64.i, %multiplier_load_broadcast.i
+ %bitop22.i = lshr <8 x i64> %mul_val_load_mult_load.i, %add__shift_load21_broadcast.i
+ %bitop22_to_uint32.i = trunc <8 x i64> %bitop22.i to <8 x i32>
+ br label %__fast_idiv___UM_vyuunu.exit
+
+
+__fast_idiv___UM_vyuunu.exit:
+ %calltmp_to_float = uitofp <8 x i32> %bitop22_to_uint32.i to <8 x float>
+ %ptrcast = bitcast float* %RET to <8 x float>*
+ store <8 x float> %calltmp_to_float, <8 x float>* %ptrcast, align 4
+ ret void
+}
+
diff --git a/test/CodeGen/X86/shift-bmi2.ll b/test/CodeGen/X86/shift-bmi2.ll
index 7615754a042a..63b6ec55fac8 100644
--- a/test/CodeGen/X86/shift-bmi2.ll
+++ b/test/CodeGen/X86/shift-bmi2.ll
@@ -27,7 +27,7 @@ entry:
define i32 @shl32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
entry:
- %x = load i32* %p
+ %x = load i32, i32* %p
%shl = shl i32 %x, %shamt
; BMI2: shl32p
; Source order scheduling prevents folding, rdar:14208996.
@@ -41,7 +41,7 @@ entry:
define i32 @shl32pi(i32* %p) nounwind uwtable readnone {
entry:
- %x = load i32* %p
+ %x = load i32, i32* %p
%shl = shl i32 %x, 5
; BMI2: shl32pi
; BMI2-NOT: shlxl
@@ -72,7 +72,7 @@ entry:
define i64 @shl64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
entry:
- %x = load i64* %p
+ %x = load i64, i64* %p
%shl = shl i64 %x, %shamt
; BMI264: shl64p
; BMI264: shlxq %{{.+}}, %{{.+}}, %{{.+}}
@@ -82,7 +82,7 @@ entry:
define i64 @shl64pi(i64* %p) nounwind uwtable readnone {
entry:
- %x = load i64* %p
+ %x = load i64, i64* %p
%shl = shl i64 %x, 7
; BMI264: shl64pi
; BMI264-NOT: shlxq
@@ -104,7 +104,7 @@ entry:
define i32 @lshr32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
entry:
- %x = load i32* %p
+ %x = load i32, i32* %p
%shl = lshr i32 %x, %shamt
; BMI2: lshr32p
; Source order scheduling prevents folding, rdar:14208996.
@@ -127,7 +127,7 @@ entry:
define i64 @lshr64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
entry:
- %x = load i64* %p
+ %x = load i64, i64* %p
%shl = lshr i64 %x, %shamt
; BMI264: lshr64p
; BMI264: shrxq %{{.+}}, %{{.+}}, %{{.+}}
@@ -149,7 +149,7 @@ entry:
define i32 @ashr32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
entry:
- %x = load i32* %p
+ %x = load i32, i32* %p
%shl = ashr i32 %x, %shamt
; BMI2: ashr32p
; Source order scheduling prevents folding, rdar:14208996.
@@ -172,7 +172,7 @@ entry:
define i64 @ashr64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
entry:
- %x = load i64* %p
+ %x = load i64, i64* %p
%shl = ashr i64 %x, %shamt
; BMI264: ashr64p
; BMI264: sarxq %{{.+}}, %{{.+}}, %{{.+}}
diff --git a/test/CodeGen/X86/shift-coalesce.ll b/test/CodeGen/X86/shift-coalesce.ll
index 5241042d0c55..dee7d373dcee 100644
--- a/test/CodeGen/X86/shift-coalesce.ll
+++ b/test/CodeGen/X86/shift-coalesce.ll
@@ -6,7 +6,7 @@
; PR687
define i64 @foo(i64 %x, i64* %X) {
- %tmp.1 = load i64* %X ; <i64> [#uses=1]
+ %tmp.1 = load i64, i64* %X ; <i64> [#uses=1]
%tmp.3 = trunc i64 %tmp.1 to i8 ; <i8> [#uses=1]
%shift.upgrd.1 = zext i8 %tmp.3 to i64 ; <i64> [#uses=1]
%tmp.4 = shl i64 %x, %shift.upgrd.1 ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/shift-codegen.ll b/test/CodeGen/X86/shift-codegen.ll
index 88b86100794a..7d52bdeb9e3a 100644
--- a/test/CodeGen/X86/shift-codegen.ll
+++ b/test/CodeGen/X86/shift-codegen.ll
@@ -15,9 +15,9 @@ define void @fn1() {
; CHECK-NOT: lea
; CHECK: ret
- %tmp = load i32* @Y ; <i32> [#uses=1]
+ %tmp = load i32, i32* @Y ; <i32> [#uses=1]
%tmp1 = shl i32 %tmp, 3 ; <i32> [#uses=1]
- %tmp2 = load i32* @X ; <i32> [#uses=1]
+ %tmp2 = load i32, i32* @X ; <i32> [#uses=1]
%tmp3 = or i32 %tmp1, %tmp2 ; <i32> [#uses=1]
store i32 %tmp3, i32* @X
ret void
diff --git a/test/CodeGen/X86/shift-combine.ll b/test/CodeGen/X86/shift-combine.ll
index 113dedb4a00c..ec62bcdcdba1 100644
--- a/test/CodeGen/X86/shift-combine.ll
+++ b/test/CodeGen/X86/shift-combine.ll
@@ -12,8 +12,8 @@ define i32 @test_lshr_and(i32 %x) {
entry:
%tmp2 = lshr i32 %x, 2
%tmp3 = and i32 %tmp2, 3
- %tmp4 = getelementptr [4 x i32]* @array, i32 0, i32 %tmp3
- %tmp5 = load i32* %tmp4, align 4
+ %tmp4 = getelementptr [4 x i32], [4 x i32]* @array, i32 0, i32 %tmp3
+ %tmp5 = load i32, i32* %tmp4, align 4
ret i32 %tmp5
}
diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll
index ea9002c397b8..698878708977 100644
--- a/test/CodeGen/X86/shift-folding.ll
+++ b/test/CodeGen/X86/shift-folding.ll
@@ -9,7 +9,7 @@ define i32* @test1(i32* %P, i32 %X) {
entry:
%Y = lshr i32 %X, 2
%gep.upgrd.1 = zext i32 %Y to i64
- %P2 = getelementptr i32* %P, i64 %gep.upgrd.1
+ %P2 = getelementptr i32, i32* %P, i64 %gep.upgrd.1
ret i32* %P2
}
@@ -22,7 +22,7 @@ define i32* @test2(i32* %P, i32 %X) {
entry:
%Y = shl i32 %X, 2
%gep.upgrd.2 = zext i32 %Y to i64
- %P2 = getelementptr i32* %P, i64 %gep.upgrd.2
+ %P2 = getelementptr i32, i32* %P, i64 %gep.upgrd.2
ret i32* %P2
}
@@ -34,7 +34,7 @@ define i32* @test3(i32* %P, i32 %X) {
entry:
%Y = ashr i32 %X, 2
- %P2 = getelementptr i32* %P, i32 %Y
+ %P2 = getelementptr i32, i32* %P, i32 %Y
ret i32* %P2
}
@@ -44,7 +44,7 @@ define fastcc i32 @test4(i32* %d) {
; CHECK: ret
entry:
- %tmp4 = load i32* %d
+ %tmp4 = load i32, i32* %d
%tmp512 = lshr i32 %tmp4, 24
ret i32 %tmp512
}
@@ -62,8 +62,8 @@ entry:
%i.zext = zext i16 %i to i32
%index = lshr i32 %i.zext, 11
%index.zext = zext i32 %index to i64
- %val.ptr = getelementptr inbounds i32* %arr, i64 %index.zext
- %val = load i32* %val.ptr
+ %val.ptr = getelementptr inbounds i32, i32* %arr, i64 %index.zext
+ %val = load i32, i32* %val.ptr
%val.zext = zext i32 %val to i64
%sum = add i64 %val.zext, %index.zext
ret i64 %sum
diff --git a/test/CodeGen/X86/shift-i256.ll b/test/CodeGen/X86/shift-i256.ll
index d5f65a6ed18c..866e7e67fb0a 100644
--- a/test/CodeGen/X86/shift-i256.ll
+++ b/test/CodeGen/X86/shift-i256.ll
@@ -1,9 +1,21 @@
-; RUN: llc < %s -march=x86
-; RUN: llc < %s -march=x86-64
+; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -O0 | FileCheck %s -check-prefix=CHECK-X64
+; RUN: llc < %s -march=x86-64 -O2 | FileCheck %s -check-prefix=CHECK-X64
-define void @t(i256 %x, i256 %a, i256* nocapture %r) nounwind readnone {
+; CHECK-LABEL: shift1
+define void @shift1(i256 %x, i256 %a, i256* nocapture %r) nounwind readnone {
entry:
%0 = ashr i256 %x, %a
store i256 %0, i256* %r
ret void
}
+
+; CHECK-LABEL: shift2
+define i256 @shift2(i256 %c) nounwind
+{
+ %b = shl i256 1, %c ; %c must not be a constant
+ ; Special case when %c is 0:
+ ; CHECK-X64: testb [[REG:%r[0-9]+b]], [[REG]]
+ ; CHECK-X64: cmoveq
+ ret i256 %b
+}
diff --git a/test/CodeGen/X86/shift-one.ll b/test/CodeGen/X86/shift-one.ll
index 0f80f90c773e..1ff02eb53e93 100644
--- a/test/CodeGen/X86/shift-one.ll
+++ b/test/CodeGen/X86/shift-one.ll
@@ -3,7 +3,7 @@
@x = external global i32 ; <i32*> [#uses=1]
define i32 @test() {
- %tmp.0 = load i32* @x ; <i32> [#uses=1]
+ %tmp.0 = load i32, i32* @x ; <i32> [#uses=1]
%tmp.1 = shl i32 %tmp.0, 1 ; <i32> [#uses=1]
ret i32 %tmp.1
}
diff --git a/test/CodeGen/X86/shift-pair.ll b/test/CodeGen/X86/shift-pair.ll
index 24ba1fc77074..62e51f002f7d 100644
--- a/test/CodeGen/X86/shift-pair.ll
+++ b/test/CodeGen/X86/shift-pair.ll
@@ -3,7 +3,7 @@
define i64 @test(i64 %A) {
; CHECK: @test
; CHECK: shrq $54
-; CHECK: andq $1020
+; CHECK: andl $1020
; CHECK: ret
%B = lshr i64 %A, 56
%C = shl i64 %B, 2
diff --git a/test/CodeGen/X86/shift-parts.ll b/test/CodeGen/X86/shift-parts.ll
index 763da6397101..0b25a7595f2a 100644
--- a/test/CodeGen/X86/shift-parts.ll
+++ b/test/CodeGen/X86/shift-parts.ll
@@ -9,7 +9,7 @@
define i32 @int87(i32 %uint64p_8, i1 %cond) nounwind {
entry:
- %srcval4 = load i320* bitcast (%0* @g_144 to i320*), align 8 ; <i320> [#uses=1]
+ %srcval4 = load i320, i320* bitcast (%0* @g_144 to i320*), align 8 ; <i320> [#uses=1]
br label %for.cond
for.cond: ; preds = %for.cond, %entry
diff --git a/test/CodeGen/X86/shl-i64.ll b/test/CodeGen/X86/shl-i64.ll
index f00058a8a886..849912cc12e9 100644
--- a/test/CodeGen/X86/shl-i64.ll
+++ b/test/CodeGen/X86/shl-i64.ll
@@ -6,10 +6,10 @@
define void @test_cl(<4 x i64>* %dst, <4 x i64>* %src, i32 %idx) {
entry:
- %arrayidx = getelementptr inbounds <4 x i64> * %src, i32 %idx
- %0 = load <4 x i64> * %arrayidx, align 32
- %arrayidx1 = getelementptr inbounds <4 x i64> * %dst, i32 %idx
- %1 = load <4 x i64> * %arrayidx1, align 32
+ %arrayidx = getelementptr inbounds <4 x i64>, <4 x i64> * %src, i32 %idx
+ %0 = load <4 x i64> , <4 x i64> * %arrayidx, align 32
+ %arrayidx1 = getelementptr inbounds <4 x i64>, <4 x i64> * %dst, i32 %idx
+ %1 = load <4 x i64> , <4 x i64> * %arrayidx1, align 32
%2 = extractelement <4 x i64> %1, i32 0
%and = and i64 %2, 63
%3 = insertelement <4 x i64> undef, i64 %and, i32 0
diff --git a/test/CodeGen/X86/shl_undef.ll b/test/CodeGen/X86/shl_undef.ll
index 705af5b4e332..f59d014a2d85 100644
--- a/test/CodeGen/X86/shl_undef.ll
+++ b/test/CodeGen/X86/shl_undef.ll
@@ -18,7 +18,7 @@ entry:
%tmp0 = alloca i8
%tmp1 = alloca i32
store i8 1, i8* %tmp0
- %tmp921.i7845 = load i8* %a0, align 1
+ %tmp921.i7845 = load i8, i8* %a0, align 1
%tmp309 = xor i8 %tmp921.i7845, 104
%tmp592 = zext i8 %tmp309 to i32
%tmp862 = xor i32 1293461297, %tmp592
@@ -49,7 +49,7 @@ entry:
; shl undef, x -> 0
define i32 @foo1_undef(i32* %a0) nounwind {
entry:
- %tmp1 = load i32* %a0, align 1
+ %tmp1 = load i32, i32* %a0, align 1
%tmp2 = shl i32 undef, %tmp1;
ret i32 %tmp2
}
diff --git a/test/CodeGen/X86/shrink-compare.ll b/test/CodeGen/X86/shrink-compare.ll
index 4ddef4ca5351..0efa073cb191 100644
--- a/test/CodeGen/X86/shrink-compare.ll
+++ b/test/CodeGen/X86/shrink-compare.ll
@@ -4,7 +4,7 @@ declare void @bar()
define void @test1(i32* nocapture %X) nounwind minsize {
entry:
- %tmp1 = load i32* %X, align 4
+ %tmp1 = load i32, i32* %X, align 4
%and = and i32 %tmp1, 255
%cmp = icmp eq i32 %and, 47
br i1 %cmp, label %if.then, label %if.end
@@ -72,7 +72,7 @@ lor.end: ; preds = %lor.rhs, %entry
; PR16551
define void @test5(i32 %X) nounwind minsize {
entry:
- %bf.load = load i56* bitcast ({ i8, i8, i8, i8, i8, i8, i8, i8 }* @x to i56*), align 4
+ %bf.load = load i56, i56* bitcast ({ i8, i8, i8, i8, i8, i8, i8, i8 }* @x to i56*), align 4
%bf.lshr = lshr i56 %bf.load, 32
%bf.cast = trunc i56 %bf.lshr to i32
%cmp = icmp ne i32 %bf.cast, 1
diff --git a/test/CodeGen/X86/shuffle-combine-crash.ll b/test/CodeGen/X86/shuffle-combine-crash.ll
index 6ab7b97e6a7b..06fcaa973893 100644
--- a/test/CodeGen/X86/shuffle-combine-crash.ll
+++ b/test/CodeGen/X86/shuffle-combine-crash.ll
@@ -18,7 +18,7 @@ define void @sample_test() {
br i1 undef, label %5, label %1
; <label>:1 ; preds = %0
- %2 = load <4 x i8>* undef
+ %2 = load <4 x i8>, <4 x i8>* undef
%3 = shufflevector <4 x i8> %2, <4 x i8> undef, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
%4 = shufflevector <4 x i8> %3, <4 x i8> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
store <4 x i8> %4, <4 x i8>* undef
diff --git a/test/CodeGen/X86/sibcall-4.ll b/test/CodeGen/X86/sibcall-4.ll
index 2c7f51d28025..23b73c031c0b 100644
--- a/test/CodeGen/X86/sibcall-4.ll
+++ b/test/CodeGen/X86/sibcall-4.ll
@@ -5,8 +5,8 @@ define ghccc void @t(i32* %Base_Arg, i32* %Sp_Arg, i32* %Hp_Arg, i32 %R1_Arg) no
cm1:
; CHECK-LABEL: t:
; CHECK: jmpl *%eax
- %nm3 = getelementptr i32* %Sp_Arg, i32 1
- %nm9 = load i32* %Sp_Arg
+ %nm3 = getelementptr i32, i32* %Sp_Arg, i32 1
+ %nm9 = load i32, i32* %Sp_Arg
%nma = inttoptr i32 %nm9 to void (i32*, i32*, i32*, i32)*
tail call ghccc void %nma(i32* %Base_Arg, i32* %nm3, i32* %Hp_Arg, i32 %R1_Arg) nounwind
ret void
diff --git a/test/CodeGen/X86/sibcall-5.ll b/test/CodeGen/X86/sibcall-5.ll
index b065cce17b24..aab028bd17c8 100644
--- a/test/CodeGen/X86/sibcall-5.ll
+++ b/test/CodeGen/X86/sibcall-5.ll
@@ -46,7 +46,7 @@ define hidden { double, double } @foo2(%0* %self, i8* nocapture %_cmd) uwtable o
; X64_BAD: call
; X64_BAD: call
; X64_BAD: call
- %1 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_2", align 8, !invariant.load !0
+ %1 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_2", align 8, !invariant.load !0
%2 = bitcast %0* %self to i8*
%3 = tail call { double, double } bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to { double, double } (i8*, i8*)*)(i8* %2, i8* %1) optsize
%4 = extractvalue { double, double } %3, 0
diff --git a/test/CodeGen/X86/sibcall-win64.ll b/test/CodeGen/X86/sibcall-win64.ll
new file mode 100644
index 000000000000..f7038726f9ca
--- /dev/null
+++ b/test/CodeGen/X86/sibcall-win64.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
+
+declare x86_64_win64cc void @win64_callee(i32)
+declare void @sysv_callee(i32)
+
+define void @sysv_caller(i32 %p1) {
+entry:
+ tail call x86_64_win64cc void @win64_callee(i32 %p1)
+ ret void
+}
+
+; CHECK-LABEL: sysv_caller:
+; CHECK: subq $40, %rsp
+; CHECK: callq win64_callee
+; CHECK: addq $40, %rsp
+; CHECK: retq
+
+define x86_64_win64cc void @win64_caller(i32 %p1) {
+entry:
+ tail call void @sysv_callee(i32 %p1)
+ ret void
+}
+
+; CHECK-LABEL: win64_caller:
+; CHECK: callq sysv_callee
+; CHECK: retq
+
+define void @sysv_matched(i32 %p1) {
+ tail call void @sysv_callee(i32 %p1)
+ ret void
+}
+
+; CHECK-LABEL: sysv_matched:
+; CHECK: jmp sysv_callee # TAILCALL
+
+define x86_64_win64cc void @win64_matched(i32 %p1) {
+ tail call x86_64_win64cc void @win64_callee(i32 %p1)
+ ret void
+}
+
+; CHECK-LABEL: win64_matched:
+; CHECK: jmp win64_callee # TAILCALL
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index 28fc626afd9d..b94960af65ab 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -1,5 +1,6 @@
; RUN: llc < %s -mtriple=i686-linux -mcpu=core2 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32
; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core2 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -mcpu=core2 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=X32ABI
define void @t1(i32 %x) nounwind ssp {
entry:
@@ -8,6 +9,9 @@ entry:
; 64-LABEL: t1:
; 64: jmp {{_?}}foo
+
+; X32ABI-LABEL: t1:
+; X32ABI: jmp {{_?}}foo
tail call void @foo() nounwind
ret void
}
@@ -21,6 +25,9 @@ entry:
; 64-LABEL: t2:
; 64: jmp {{_?}}foo2
+
+; X32ABI-LABEL: t2:
+; X32ABI: jmp {{_?}}foo2
%0 = tail call i32 @foo2() nounwind
ret void
}
@@ -34,6 +41,9 @@ entry:
; 64-LABEL: t3:
; 64: jmp {{_?}}foo3
+
+; X32ABI-LABEL: t3:
+; X32ABI: jmp {{_?}}foo3
%0 = tail call i32 @foo3() nounwind
ret void
}
@@ -49,6 +59,10 @@ entry:
; 64-LABEL: t4:
; 64-NOT: call
; 64: jmpq *
+
+; X32ABI-LABEL: t4:
+; X32ABI-NOT: call
+; X32ABI: jmpq *
tail call void %x(i32 0) nounwind
ret void
}
@@ -62,6 +76,13 @@ entry:
; 64-LABEL: t5:
; 64-NOT: call
; 64: jmpq *%rdi
+
+; X32ABI-LABEL: t5:
+; X32ABI-NOT: call
+; FIXME: This isn't needed since x32 psABI specifies that callers must
+; zero-extend pointers passed in registers.
+; X32ABI: movl %edi, %eax
+; X32ABI: jmpq *%rax
tail call void %x() nounwind
ret void
}
@@ -75,6 +96,10 @@ entry:
; 64-LABEL: t6:
; 64: jmp {{_?}}t6
; 64: jmp {{_?}}bar
+
+; X32ABI-LABEL: t6:
+; X32ABI: jmp {{_?}}t6
+; X32ABI: jmp {{_?}}bar
%0 = icmp slt i32 %x, 10
br i1 %0, label %bb, label %bb1
@@ -97,6 +122,9 @@ entry:
; 64-LABEL: t7:
; 64: jmp {{_?}}bar2
+
+; X32ABI-LABEL: t7:
+; X32ABI: jmp {{_?}}bar2
%0 = tail call i32 @bar2(i32 %a, i32 %b, i32 %c) nounwind
ret i32 %0
}
@@ -110,6 +138,9 @@ entry:
; 64-LABEL: t8:
; 64: jmp {{_?}}bar3
+
+; X32ABI-LABEL: t8:
+; X32ABI: jmp {{_?}}bar3
%0 = tail call signext i16 @bar3() nounwind ; <i16> [#uses=1]
ret i16 %0
}
@@ -123,6 +154,9 @@ entry:
; 64-LABEL: t9:
; 64: jmpq *
+
+; X32ABI-LABEL: t9:
+; X32ABI: jmpq *
%0 = bitcast i32 (i32)* %x to i16 (i32)*
%1 = tail call signext i16 %0(i32 0) nounwind
ret i16 %1
@@ -135,6 +169,9 @@ entry:
; 64-LABEL: t10:
; 64: callq
+
+; X32ABI-LABEL: t10:
+; X32ABI: callq
%0 = tail call i32 @foo4() noreturn nounwind
unreachable
}
@@ -153,9 +190,14 @@ define i32 @t11(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind ssp {
; 32: jmp {{_?}}foo5
; 64-LABEL: t11:
-; 64-NOT: subq ${{[0-9]+}}, %esp
-; 64-NOT: addq ${{[0-9]+}}, %esp
+; 64-NOT: subq ${{[0-9]+}}, %rsp
+; 64-NOT: addq ${{[0-9]+}}, %rsp
; 64: jmp {{_?}}foo5
+
+; X32ABI-LABEL: t11:
+; X32ABI-NOT: subl ${{[0-9]+}}, %esp
+; X32ABI-NOT: addl ${{[0-9]+}}, %esp
+; X32ABI: jmp {{_?}}foo5
entry:
%0 = icmp eq i32 %x, 0
br i1 %0, label %bb6, label %bb
@@ -179,9 +221,14 @@ define i32 @t12(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind ssp {
; 32: jmp {{_?}}foo6
; 64-LABEL: t12:
-; 64-NOT: subq ${{[0-9]+}}, %esp
-; 64-NOT: addq ${{[0-9]+}}, %esp
+; 64-NOT: subq ${{[0-9]+}}, %rsp
+; 64-NOT: addq ${{[0-9]+}}, %rsp
; 64: jmp {{_?}}foo6
+
+; X32ABI-LABEL: t12:
+; X32ABI-NOT: subl ${{[0-9]+}}, %esp
+; X32ABI-NOT: addl ${{[0-9]+}}, %esp
+; X32ABI: jmp {{_?}}foo6
entry:
%0 = icmp eq i32 %x, 0
br i1 %0, label %bb2, label %bb
@@ -210,6 +257,11 @@ define %struct.ns* @t13(%struct.cp* %yy) nounwind ssp {
; 64-NOT: jmp
; 64: callq
; 64: ret
+
+; X32ABI-LABEL: t13:
+; X32ABI-NOT: jmp
+; X32ABI: callq
+; X32ABI: ret
entry:
%0 = tail call fastcc %struct.ns* @foo7(%struct.cp* byval align 4 %yy, i8 signext 0) nounwind
ret %struct.ns* %0
@@ -230,11 +282,16 @@ entry:
; 64: movq 32(%rdi)
; 64-NOT: movq 16(%rdi)
; 64: jmpq *16({{%rdi|%rax}})
- %0 = getelementptr inbounds %struct.__block_literal_2* %.block_descriptor, i64 0, i32 5 ; <void ()**> [#uses=1]
- %1 = load void ()** %0, align 8 ; <void ()*> [#uses=2]
+
+; X32ABI-LABEL: t14:
+; X32ABI: movl 20(%edi), %edi
+; X32ABI-NEXT: movl 12(%edi), %eax
+; X32ABI-NEXT: jmpq *%rax
+ %0 = getelementptr inbounds %struct.__block_literal_2, %struct.__block_literal_2* %.block_descriptor, i64 0, i32 5 ; <void ()**> [#uses=1]
+ %1 = load void ()*, void ()** %0, align 8 ; <void ()*> [#uses=2]
%2 = bitcast void ()* %1 to %struct.__block_literal_1* ; <%struct.__block_literal_1*> [#uses=1]
- %3 = getelementptr inbounds %struct.__block_literal_1* %2, i64 0, i32 3 ; <i8**> [#uses=1]
- %4 = load i8** %3, align 8 ; <i8*> [#uses=1]
+ %3 = getelementptr inbounds %struct.__block_literal_1, %struct.__block_literal_1* %2, i64 0, i32 3 ; <i8**> [#uses=1]
+ %4 = load i8*, i8** %3, align 8 ; <i8*> [#uses=1]
%5 = bitcast i8* %4 to void (i8*)* ; <void (i8*)*> [#uses=1]
%6 = bitcast void ()* %1 to i8* ; <i8*> [#uses=1]
tail call void %5(i8* %6) nounwind
@@ -252,6 +309,10 @@ define void @t15(%struct.foo* noalias sret %agg.result) nounwind {
; 64-LABEL: t15:
; 64: callq {{_?}}f
; 64: retq
+
+; X32ABI-LABEL: t15:
+; X32ABI: callq {{_?}}f
+; X32ABI: retq
tail call fastcc void @f(%struct.foo* noalias sret %agg.result) nounwind
ret void
}
@@ -266,6 +327,9 @@ entry:
; 64-LABEL: t16:
; 64: jmp {{_?}}bar4
+
+; X32ABI-LABEL: t16:
+; X32ABI: jmp {{_?}}bar4
%0 = tail call double @bar4() nounwind
ret void
}
@@ -281,7 +345,11 @@ entry:
; 64-LABEL: t17:
; 64: xorl %eax, %eax
; 64: jmp {{_?}}bar5
- tail call void (...)* @bar5() nounwind
+
+; X32ABI-LABEL: t17:
+; X32ABI: xorl %eax, %eax
+; X32ABI: jmp {{_?}}bar5
+ tail call void (...) @bar5() nounwind
ret void
}
@@ -297,7 +365,11 @@ entry:
; 64-LABEL: t18:
; 64: xorl %eax, %eax
; 64: jmp {{_?}}bar6
- %0 = tail call double (...)* @bar6() nounwind
+
+; X32ABI-LABEL: t18:
+; X32ABI: xorl %eax, %eax
+; X32ABI: jmp {{_?}}bar6
+ %0 = tail call double (...) @bar6() nounwind
ret void
}
@@ -308,6 +380,10 @@ entry:
; CHECK-LABEL: t19:
; CHECK: andl $-32
; CHECK: calll {{_?}}foo
+
+; X32ABI-LABEL: t19:
+; X32ABI: andl $-32
+; X32ABI: callq {{_?}}foo
tail call void @foo() nounwind
ret void
}
@@ -324,6 +400,9 @@ entry:
; 64-LABEL: t20:
; 64: jmp {{_?}}foo20
+
+; X32ABI-LABEL: t20:
+; X32ABI: jmp {{_?}}foo20
%0 = tail call fastcc double @foo20(double %x) nounwind
ret double %0
}
diff --git a/test/CodeGen/X86/simple-zext.ll b/test/CodeGen/X86/simple-zext.ll
index ccd8292bcdb3..b80c0bc6b10d 100644
--- a/test/CodeGen/X86/simple-zext.ll
+++ b/test/CodeGen/X86/simple-zext.ll
@@ -6,7 +6,7 @@
define void @load_zext(i32* nocapture %p){
entry:
- %0 = load i32* %p, align 4
+ %0 = load i32, i32* %p, align 4
%and = and i32 %0, 255
tail call void @use(i32 %and)
ret void
diff --git a/test/CodeGen/X86/sincos-opt.ll b/test/CodeGen/X86/sincos-opt.ll
index 1e34a2be10b3..9d02bcd9a6c7 100644
--- a/test/CodeGen/X86/sincos-opt.ll
+++ b/test/CodeGen/X86/sincos-opt.ll
@@ -15,9 +15,8 @@ entry:
; OSX_SINCOS-LABEL: test1:
; OSX_SINCOS: callq ___sincosf_stret
-; OSX_SINCOS: movaps %xmm0, %xmm1
-; OSX_SINCOS: shufps {{.*}} ## xmm1 = xmm1[1,1,2,3]
-; OSX_SINCOS: addss %xmm0, %xmm1
+; OSX_SINCOS: movshdup {{.*}} xmm1 = xmm0[1,1,3,3]
+; OSX_SINCOS: addss %xmm1, %xmm0
; OSX_NOOPT: test1
; OSX_NOOPT: callq _sinf
diff --git a/test/CodeGen/X86/sink-cheap-instructions.ll b/test/CodeGen/X86/sink-cheap-instructions.ll
new file mode 100644
index 000000000000..9b9a6865af93
--- /dev/null
+++ b/test/CodeGen/X86/sink-cheap-instructions.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=x86_64-linux -sink-insts-to-avoid-spills | FileCheck %s -check-prefix=SINK
+
+; Ensure that we sink copy-like instructions into loops to avoid register
+; spills.
+
+; CHECK: Spill
+; SINK-NOT: Spill
+
+%struct.A = type { i32, i32, i32, i32, i32, i32 }
+
+define void @_Z1fPhP1A(i8* nocapture readonly %input, %struct.A* %a) {
+ %1 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0
+ %2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 1
+ %3 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 2
+ %4 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 3
+ %5 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 4
+ %6 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 5
+ br label %.backedge
+
+.backedge:
+ %.0 = phi i8* [ %input, %0 ], [ %7, %.backedge.backedge ]
+ %7 = getelementptr inbounds i8, i8* %.0, i64 1
+ %8 = load i8, i8* %7, align 1
+ switch i8 %8, label %.backedge.backedge [
+ i8 0, label %9
+ i8 10, label %10
+ i8 20, label %11
+ i8 30, label %12
+ i8 40, label %13
+ i8 50, label %14
+ ]
+
+; <label>:9
+ tail call void @_Z6assignPj(i32* %1)
+ br label %.backedge.backedge
+
+; <label>:10
+ tail call void @_Z6assignPj(i32* %2)
+ br label %.backedge.backedge
+
+.backedge.backedge:
+ br label %.backedge
+
+; <label>:11
+ tail call void @_Z6assignPj(i32* %3)
+ br label %.backedge.backedge
+
+; <label>:12
+ tail call void @_Z6assignPj(i32* %4)
+ br label %.backedge.backedge
+
+; <label>:13
+ tail call void @_Z6assignPj(i32* %5)
+ br label %.backedge.backedge
+
+; <label>:14
+ tail call void @_Z6assignPj(i32* %6)
+ br label %.backedge.backedge
+}
+
+declare void @_Z6assignPj(i32*)
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
index 455cf24bce1c..972fbdf48cb5 100644
--- a/test/CodeGen/X86/sink-hoist.ll
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -9,7 +9,7 @@
; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: jne
; CHECK-NEXT: divsd
-; CHECK-NEXT: movaps
+; CHECK-NEXT: movapd
; CHECK-NEXT: ret
; CHECK: divsd
@@ -28,7 +28,7 @@ define double @foo(double %x, double %y, i1 %c) nounwind {
; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: je
; CHECK: divsd
-; CHECK: movaps
+; CHECK: movapd
; CHECK: ret
define double @split(double %x, double %y, i1 %c) nounwind {
%a = fdiv double %x, 3.2
@@ -49,8 +49,8 @@ entry:
bb:
%i.03 = phi i64 [ 0, %entry ], [ %3, %bb ]
- %scevgep = getelementptr double* %p, i64 %i.03
- %1 = load double* %scevgep, align 8
+ %scevgep = getelementptr double, double* %p, i64 %i.03
+ %1 = load double, double* %scevgep, align 8
%2 = fdiv double 3.200000e+00, %1
store double %2, double* %scevgep, align 8
%3 = add nsw i64 %i.03, 1
@@ -104,7 +104,7 @@ entry:
bb: ; preds = %bb60
%i.0 = phi i32 [ 0, %bb60 ] ; <i32> [#uses=2]
%0 = bitcast float* %x_addr.0 to <4 x float>* ; <<4 x float>*> [#uses=1]
- %1 = load <4 x float>* %0, align 16 ; <<4 x float>> [#uses=4]
+ %1 = load <4 x float>, <4 x float>* %0, align 16 ; <<4 x float>> [#uses=4]
%tmp20 = bitcast <4 x float> %1 to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp22 = and <4 x i32> %tmp20, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647> ; <<4 x i32>> [#uses=1]
%tmp23 = bitcast <4 x i32> %tmp22 to <4 x float> ; <<4 x float>> [#uses=1]
@@ -127,10 +127,10 @@ bb: ; preds = %bb60
%tmp58 = bitcast <4 x i32> %tmp57 to <4 x float> ; <<4 x float>> [#uses=1]
%4 = bitcast float* %y_addr.0 to <4 x float>* ; <<4 x float>*> [#uses=1]
store <4 x float> %tmp58, <4 x float>* %4, align 16
- %5 = getelementptr float* %x_addr.0, i64 4 ; <float*> [#uses=1]
- %6 = getelementptr float* %y_addr.0, i64 4 ; <float*> [#uses=1]
+ %5 = getelementptr float, float* %x_addr.0, i64 4 ; <float*> [#uses=1]
+ %6 = getelementptr float, float* %y_addr.0, i64 4 ; <float*> [#uses=1]
%7 = add i32 %i.0, 4 ; <i32> [#uses=1]
- %8 = load i32* %n, align 4 ; <i32> [#uses=1]
+ %8 = load i32, i32* %n, align 4 ; <i32> [#uses=1]
%9 = icmp sgt i32 %8, %7 ; <i1> [#uses=1]
br i1 %9, label %bb60, label %return
@@ -157,14 +157,14 @@ declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
define void @default_get_pch_validity() nounwind {
entry:
- %tmp4 = load i32* @cl_options_count, align 4 ; <i32> [#uses=1]
+ %tmp4 = load i32, i32* @cl_options_count, align 4 ; <i32> [#uses=1]
%tmp5 = icmp eq i32 %tmp4, 0 ; <i1> [#uses=1]
br i1 %tmp5, label %bb6, label %bb2
bb2: ; preds = %bb2, %entry
%i.019 = phi i64 [ 0, %entry ], [ %tmp25, %bb2 ] ; <i64> [#uses=1]
%tmp25 = add i64 %i.019, 1 ; <i64> [#uses=2]
- %tmp11 = load i32* @cl_options_count, align 4 ; <i32> [#uses=1]
+ %tmp11 = load i32, i32* @cl_options_count, align 4 ; <i32> [#uses=1]
%tmp12 = zext i32 %tmp11 to i64 ; <i64> [#uses=1]
%tmp13 = icmp ugt i64 %tmp12, %tmp25 ; <i1> [#uses=1]
br i1 %tmp13, label %bb2, label %bb6
diff --git a/test/CodeGen/X86/sink-out-of-loop.ll b/test/CodeGen/X86/sink-out-of-loop.ll
index 6757f315b6da..4bf829a02738 100644
--- a/test/CodeGen/X86/sink-out-of-loop.ll
+++ b/test/CodeGen/X86/sink-out-of-loop.ll
@@ -61,7 +61,7 @@ entry:
loop:
%i = phi i32 [ 0, %entry ], [ %i2, %loop ]
%j = mul i32 %i, %i
- %addr = getelementptr i32* %output, i32 %i
+ %addr = getelementptr i32, i32* %output, i32 %i
store i32 %i, i32* %addr
%i2 = add i32 %i, 1
%exit_cond = icmp sge i32 %i2, %n
diff --git a/test/CodeGen/X86/sjlj.ll b/test/CodeGen/X86/sjlj.ll
index 681db0094384..3fa3d30ea5cc 100644
--- a/test/CodeGen/X86/sjlj.ll
+++ b/test/CodeGen/X86/sjlj.ll
@@ -15,9 +15,9 @@ declare void @llvm.eh.sjlj.longjmp(i8*) nounwind
define i32 @sj0() nounwind {
%fp = tail call i8* @llvm.frameaddress(i32 0)
- store i8* %fp, i8** getelementptr inbounds ([5 x i8*]* @buf, i64 0, i64 0), align 16
+ store i8* %fp, i8** getelementptr inbounds ([5 x i8*], [5 x i8*]* @buf, i64 0, i64 0), align 16
%sp = tail call i8* @llvm.stacksave()
- store i8* %sp, i8** getelementptr inbounds ([5 x i8*]* @buf, i64 0, i64 2), align 16
+ store i8* %sp, i8** getelementptr inbounds ([5 x i8*], [5 x i8*]* @buf, i64 0, i64 2), align 16
%r = tail call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([5 x i8*]* @buf to i8*))
ret i32 %r
; X86: sj0
diff --git a/test/CodeGen/X86/slow-incdec.ll b/test/CodeGen/X86/slow-incdec.ll
index 323e3ae8c472..1857f61e6c29 100644
--- a/test/CodeGen/X86/slow-incdec.ll
+++ b/test/CodeGen/X86/slow-incdec.ll
@@ -28,8 +28,8 @@ for.cond: ; preds = %for.body
for.body: ; preds = %for.body.preheader, %for.cond
%i.06 = phi i32 [ %dec, %for.cond ], [ %s, %for.body.preheader ]
- %arrayidx = getelementptr inbounds i32* %a, i32 %i.06
- %0 = load i32* %arrayidx, align 4, !tbaa !1
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.06
+ %0 = load i32, i32* %arrayidx, align 4, !tbaa !1
%cmp1 = icmp eq i32 %0, 0
;
%dec = add nsw i32 %i.06, -1
@@ -59,8 +59,8 @@ for.cond: ; preds = %for.body
for.body: ; preds = %for.body.preheader, %for.cond
%i.06 = phi i32 [ %inc, %for.cond ], [ %s, %for.body.preheader ]
- %arrayidx = getelementptr inbounds i32* %a, i32 %i.06
- %0 = load i32* %arrayidx, align 4, !tbaa !1
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.06
+ %0 = load i32, i32* %arrayidx, align 4, !tbaa !1
%cmp1 = icmp eq i32 %0, 0
%inc = add nsw i32 %i.06, 1
br i1 %cmp1, label %for.end.loopexit, label %for.cond
diff --git a/test/CodeGen/X86/smul-with-overflow.ll b/test/CodeGen/X86/smul-with-overflow.ll
index cefbda64751b..2b21f4ff84e9 100644
--- a/test/CodeGen/X86/smul-with-overflow.ll
+++ b/test/CodeGen/X86/smul-with-overflow.ll
@@ -11,11 +11,11 @@ entry:
br i1 %obit, label %overflow, label %normal
normal:
- %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+ %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
ret i1 true
overflow:
- %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+ %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
ret i1 false
; CHECK-LABEL: test1:
; CHECK: imull
@@ -30,11 +30,11 @@ entry:
br i1 %obit, label %overflow, label %normal
overflow:
- %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+ %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
ret i1 false
normal:
- %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+ %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
ret i1 true
; CHECK-LABEL: test2:
; CHECK: imull
diff --git a/test/CodeGen/X86/soft-fp.ll b/test/CodeGen/X86/soft-fp.ll
index a52135dc9087..fa38d1044a48 100644
--- a/test/CodeGen/X86/soft-fp.ll
+++ b/test/CodeGen/X86/soft-fp.ll
@@ -1,14 +1,16 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -soft-float | not grep xmm
-; RUN: llc < %s -march=x86-64 -mattr=+sse2 -soft-float | not grep xmm
+; RUN: llc < %s -march=x86 -mattr=+sse2,+soft-float | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,+soft-float | FileCheck %s
- %struct.__va_list_tag = type { i32, i32, i8*, i8* }
+; CHECK-NOT: xmm{[0-9]+}
+
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
define i32 @t1(i32 %a, ...) nounwind {
entry:
%va = alloca [1 x %struct.__va_list_tag], align 8 ; <[1 x %struct.__va_list_tag]*> [#uses=2]
%va12 = bitcast [1 x %struct.__va_list_tag]* %va to i8* ; <i8*> [#uses=2]
call void @llvm.va_start(i8* %va12)
- %va3 = getelementptr [1 x %struct.__va_list_tag]* %va, i64 0, i64 0 ; <%struct.__va_list_tag*> [#uses=1]
+ %va3 = getelementptr [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %va, i64 0, i64 0 ; <%struct.__va_list_tag*> [#uses=1]
call void @bar(%struct.__va_list_tag* %va3) nounwind
call void @llvm.va_end(i8* %va12)
ret i32 undef
diff --git a/test/CodeGen/X86/splat-const.ll b/test/CodeGen/X86/splat-const.ll
new file mode 100644
index 000000000000..19997b03ad5e
--- /dev/null
+++ b/test/CodeGen/X86/splat-const.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mcpu=penryn | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mcpu=sandybridge | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mcpu=haswell | FileCheck %s --check-prefix=AVX2
+; This checks that lowering for creation of constant vectors is sane and
+; doesn't use redundant shuffles. (fixes PR22276)
+target triple = "x86_64-unknown-unknown"
+
+define <4 x i32> @zero_vector() {
+; SSE-LABEL: zero_vector:
+; SSE: xorps %xmm0, %xmm0
+; SSE-NEXT: retq
+; AVX-LABEL: zero_vector:
+; AVX: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+; AVX2-LABEL: zero_vector:
+; AVX2: vxorps %xmm0, %xmm0, %xmm0
+; AVX2-NEXT: retq
+ %zero = insertelement <4 x i32> undef, i32 0, i32 0
+ %splat = shufflevector <4 x i32> %zero, <4 x i32> undef, <4 x i32> zeroinitializer
+ ret <4 x i32> %splat
+}
+
+; Note that for the "const_vector" versions, lowering that uses a shuffle
+; instead of a load would be legitimate, if it's a single broadcast shuffle.
+; (as opposed to the previous mess)
+; However, this is not the current preferred lowering.
+define <4 x i32> @const_vector() {
+; SSE-LABEL: const_vector:
+; SSE: movaps {{.*}}, %xmm0 # xmm0 = [42,42,42,42]
+; SSE-NEXT: retq
+; AVX-LABEL: const_vector:
+; AVX: vmovaps {{.*}}, %xmm0 # xmm0 = [42,42,42,42]
+; AVX-NEXT: retq
+; AVX2-LABEL: const_vector:
+; AVX2: vbroadcastss {{[^%].*}}, %xmm0
+; AVX2-NEXT: retq
+ %const = insertelement <4 x i32> undef, i32 42, i32 0
+ %splat = shufflevector <4 x i32> %const, <4 x i32> undef, <4 x i32> zeroinitializer
+ ret <4 x i32> %splat
+}
diff --git a/test/CodeGen/X86/splat-for-size.ll b/test/CodeGen/X86/splat-for-size.ll
index c052ad2aa146..635aa821d78a 100644
--- a/test/CodeGen/X86/splat-for-size.ll
+++ b/test/CodeGen/X86/splat-for-size.ll
@@ -138,4 +138,18 @@ define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 {
; CHECK: retq
}
+; PR23259: Verify that ISel doesn't crash with a 'fatal error in backend'
+; due to a missing AVX pattern to select a v2i64 X86ISD::BROADCAST of a
+; loadi64 with multiple uses.
+
+@A = common global <3 x i64> zeroinitializer, align 32
+
+define <8 x i64> @pr23259() #0 {
+entry:
+ %0 = load <4 x i64>, <4 x i64>* bitcast (<3 x i64>* @A to <4 x i64>*), align 32
+ %1 = shufflevector <4 x i64> %0, <4 x i64> undef, <3 x i32> <i32 undef, i32 undef, i32 2>
+ %shuffle = shufflevector <3 x i64> <i64 1, i64 undef, i64 undef>, <3 x i64> %1, <8 x i32> <i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <8 x i64> %shuffle
+}
+
attributes #0 = { optsize }
diff --git a/test/CodeGen/X86/split-eh-lpad-edges.ll b/test/CodeGen/X86/split-eh-lpad-edges.ll
index 756a3ddc86bb..852214e7c248 100644
--- a/test/CodeGen/X86/split-eh-lpad-edges.ll
+++ b/test/CodeGen/X86/split-eh-lpad-edges.ll
@@ -16,7 +16,7 @@ entry:
to label %invcont unwind label %lpad ; <%struct.NSObject*> [#uses=1]
invcont: ; preds = %entry
- %1 = invoke %struct.NSObject* (%struct.NSObject*, %struct.objc_selector*, ...)* @objc_msgSend(%struct.NSObject* %0, %struct.objc_selector* null)
+ %1 = invoke %struct.NSObject* (%struct.NSObject*, %struct.objc_selector*, ...) @objc_msgSend(%struct.NSObject* %0, %struct.objc_selector* null)
to label %invcont26 unwind label %lpad ; <%struct.NSObject*> [#uses=0]
invcont26: ; preds = %invcont
diff --git a/test/CodeGen/X86/split-vector-bitcast.ll b/test/CodeGen/X86/split-vector-bitcast.ll
index fae15cfaf26e..8d80754b9a35 100644
--- a/test/CodeGen/X86/split-vector-bitcast.ll
+++ b/test/CodeGen/X86/split-vector-bitcast.ll
@@ -3,7 +3,7 @@
; PR10497 + another isel issue with sse2 disabled
; (This is primarily checking that this construct doesn't crash.)
define void @a(<2 x float>* %a, <2 x i32>* %b) {
- %cc = load <2 x float>* %a
+ %cc = load <2 x float>, <2 x float>* %a
%c = fadd <2 x float> %cc, %cc
%dd = bitcast <2 x float> %c to <2 x i32>
%d = add <2 x i32> %dd, %dd
diff --git a/test/CodeGen/X86/sqrt-fastmath.ll b/test/CodeGen/X86/sqrt-fastmath.ll
index 24b175eed7a3..4c6b521156e0 100644
--- a/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/test/CodeGen/X86/sqrt-fastmath.ll
@@ -1,132 +1,141 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-sqrt-est | FileCheck %s --check-prefix=ESTIMATE
-; generated using "clang -S -O2 -ffast-math -emit-llvm sqrt.c" from
-; #include <math.h>
-;
-; double fd(double d){
-; return sqrt(d);
-; }
-;
-; float ff(float f){
-; return sqrtf(f);
-; }
-;
-; long double fld(long double ld){
-; return sqrtl(ld);
-; }
-;
-; Tests conversion of sqrt function calls into sqrt instructions when
-; -ffast-math is in effect.
+declare double @__sqrt_finite(double) #0
+declare float @__sqrtf_finite(float) #0
+declare x86_fp80 @__sqrtl_finite(x86_fp80) #0
+declare float @llvm.sqrt.f32(float) #0
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #0
+declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0
-; ModuleID = 'sqrt.c'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-; Function Attrs: nounwind readnone uwtable
define double @fd(double %d) #0 {
-entry:
-; CHECK: sqrtsd
- %call = tail call double @__sqrt_finite(double %d) #2
+; CHECK-LABEL: fd:
+; CHECK: # BB#0:
+; CHECK-NEXT: sqrtsd %xmm0, %xmm0
+; CHECK-NEXT: retq
+;
+; ESTIMATE-LABEL: fd:
+; ESTIMATE: # BB#0:
+; ESTIMATE-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
+; ESTIMATE-NEXT: retq
+ %call = tail call double @__sqrt_finite(double %d) #1
ret double %call
}
-; Function Attrs: nounwind readnone
-declare double @__sqrt_finite(double) #1
-; Function Attrs: nounwind readnone uwtable
define float @ff(float %f) #0 {
-entry:
-; CHECK: sqrtss
- %call = tail call float @__sqrtf_finite(float %f) #2
+; CHECK-LABEL: ff:
+; CHECK: # BB#0:
+; CHECK-NEXT: sqrtss %xmm0, %xmm0
+; CHECK-NEXT: retq
+;
+; ESTIMATE-LABEL: ff:
+; ESTIMATE: # BB#0:
+; ESTIMATE-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
+; ESTIMATE-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm2
+; ESTIMATE-NEXT: vmulss %xmm1, %xmm1, %xmm1
+; ESTIMATE-NEXT: vmulss %xmm0, %xmm1, %xmm1
+; ESTIMATE-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1
+; ESTIMATE-NEXT: vmulss %xmm2, %xmm1, %xmm1
+; ESTIMATE-NEXT: vmulss %xmm1, %xmm0, %xmm1
+; ESTIMATE-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; ESTIMATE-NEXT: vcmpeqss %xmm2, %xmm0, %xmm0
+; ESTIMATE-NEXT: vandnps %xmm1, %xmm0, %xmm0
+; ESTIMATE-NEXT: retq
+ %call = tail call float @__sqrtf_finite(float %f) #1
ret float %call
}
-; Function Attrs: nounwind readnone
-declare float @__sqrtf_finite(float) #1
-; Function Attrs: nounwind readnone uwtable
define x86_fp80 @fld(x86_fp80 %ld) #0 {
-entry:
-; CHECK: fsqrt
- %call = tail call x86_fp80 @__sqrtl_finite(x86_fp80 %ld) #2
+; CHECK-LABEL: fld:
+; CHECK: # BB#0:
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fsqrt
+; CHECK-NEXT: retq
+;
+; ESTIMATE-LABEL: fld:
+; ESTIMATE: # BB#0:
+; ESTIMATE-NEXT: fldt {{[0-9]+}}(%rsp)
+; ESTIMATE-NEXT: fsqrt
+; ESTIMATE-NEXT: retq
+ %call = tail call x86_fp80 @__sqrtl_finite(x86_fp80 %ld) #1
ret x86_fp80 %call
}
-declare x86_fp80 @__sqrtl_finite(x86_fp80) #1
-
-declare float @llvm.sqrt.f32(float) #1
-declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #1
-declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #1
-; If the target's sqrtss and divss instructions are substantially
-; slower than rsqrtss with a Newton-Raphson refinement, we should
-; generate the estimate sequence.
define float @reciprocal_square_root(float %x) #0 {
+; CHECK-LABEL: reciprocal_square_root:
+; CHECK: # BB#0:
+; CHECK-NEXT: sqrtss %xmm0, %xmm1
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: divss %xmm1, %xmm0
+; CHECK-NEXT: retq
+;
+; ESTIMATE-LABEL: reciprocal_square_root:
+; ESTIMATE: # BB#0:
+; ESTIMATE-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
+; ESTIMATE-NEXT: vmulss {{.*}}(%rip), %xmm1, %xmm2
+; ESTIMATE-NEXT: vmulss %xmm1, %xmm1, %xmm1
+; ESTIMATE-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; ESTIMATE-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
+; ESTIMATE-NEXT: vmulss %xmm2, %xmm0, %xmm0
+; ESTIMATE-NEXT: retq
%sqrt = tail call float @llvm.sqrt.f32(float %x)
%div = fdiv fast float 1.0, %sqrt
ret float %div
-
-; CHECK-LABEL: reciprocal_square_root:
-; CHECK: sqrtss
-; CHECK-NEXT: movss
-; CHECK-NEXT: divss
-; CHECK-NEXT: retq
-; BTVER2-LABEL: reciprocal_square_root:
-; BTVER2: vrsqrtss
-; BTVER2-NEXT: vmulss
-; BTVER2-NEXT: vmulss
-; BTVER2-NEXT: vmulss
-; BTVER2-NEXT: vaddss
-; BTVER2-NEXT: vmulss
-; BTVER2-NEXT: retq
}
define <4 x float> @reciprocal_square_root_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: reciprocal_square_root_v4f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: sqrtps %xmm0, %xmm1
+; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; CHECK-NEXT: divps %xmm1, %xmm0
+; CHECK-NEXT: retq
+;
+; ESTIMATE-LABEL: reciprocal_square_root_v4f32:
+; ESTIMATE: # BB#0:
+; ESTIMATE-NEXT: vrsqrtps %xmm0, %xmm1
+; ESTIMATE-NEXT: vmulps %xmm1, %xmm1, %xmm2
+; ESTIMATE-NEXT: vmulps %xmm0, %xmm2, %xmm0
+; ESTIMATE-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
+; ESTIMATE-NEXT: vmulps {{.*}}(%rip), %xmm1, %xmm1
+; ESTIMATE-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; ESTIMATE-NEXT: retq
%sqrt = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
ret <4 x float> %div
-
-; CHECK-LABEL: reciprocal_square_root_v4f32:
-; CHECK: sqrtps
-; CHECK-NEXT: movaps
-; CHECK-NEXT: divps
-; CHECK-NEXT: retq
-; BTVER2-LABEL: reciprocal_square_root_v4f32:
-; BTVER2: vrsqrtps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vaddps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: retq
}
define <8 x float> @reciprocal_square_root_v8f32(<8 x float> %x) #0 {
+; CHECK-LABEL: reciprocal_square_root_v8f32:
+; CHECK: # BB#0:
+; CHECK-NEXT: sqrtps %xmm1, %xmm2
+; CHECK-NEXT: sqrtps %xmm0, %xmm3
+; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: divps %xmm3, %xmm0
+; CHECK-NEXT: divps %xmm2, %xmm1
+; CHECK-NEXT: retq
+;
+; ESTIMATE-LABEL: reciprocal_square_root_v8f32:
+; ESTIMATE: # BB#0:
+; ESTIMATE-NEXT: vrsqrtps %ymm0, %ymm1
+; ESTIMATE-NEXT: vmulps %ymm1, %ymm1, %ymm2
+; ESTIMATE-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; ESTIMATE-NEXT: vaddps {{.*}}(%rip), %ymm0, %ymm0
+; ESTIMATE-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; ESTIMATE-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; ESTIMATE-NEXT: retq
%sqrt = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %x)
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
ret <8 x float> %div
-
-; CHECK-LABEL: reciprocal_square_root_v8f32:
-; CHECK: sqrtps
-; CHECK-NEXT: sqrtps
-; CHECK-NEXT: movaps
-; CHECK-NEXT: movaps
-; CHECK-NEXT: divps
-; CHECK-NEXT: divps
-; CHECK-NEXT: retq
-; BTVER2-LABEL: reciprocal_square_root_v8f32:
-; BTVER2: vrsqrtps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vaddps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: retq
}
-attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
+attributes #0 = { "unsafe-fp-math"="true" }
+attributes #1 = { nounwind readnone }
+
diff --git a/test/CodeGen/X86/sqrt.ll b/test/CodeGen/X86/sqrt.ll
index be7c6e867399..b22c041250fe 100644
--- a/test/CodeGen/X86/sqrt.ll
+++ b/test/CodeGen/X86/sqrt.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse2 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse2 -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx2,+avx | FileCheck %s --check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx2,+avx -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx2,+avx -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=AVX
define float @test_sqrt_f32(float %a) {
; SSE2-LABEL: test_sqrt_f32
diff --git a/test/CodeGen/X86/sret-implicit.ll b/test/CodeGen/X86/sret-implicit.ll
new file mode 100644
index 000000000000..2a998fc6b6c7
--- /dev/null
+++ b/test/CodeGen/X86/sret-implicit.ll
@@ -0,0 +1,34 @@
+; RUN: llc -mtriple=x86_64-apple-darwin8 < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -mtriple=i686-pc-linux < %s | FileCheck %s --check-prefix=X86
+; RUN: llc -mtriple=x86_64-apple-darwin8 -terminal-rule < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -mtriple=x86_64-pc-linux -terminal-rule < %s | FileCheck %s --check-prefix=X64
+
+define void @sret_void(i32* sret %p) {
+ store i32 0, i32* %p
+ ret void
+}
+
+; X64-LABEL: sret_void
+; X64-DAG: movl $0, (%rdi)
+; X64-DAG: movq %rdi, %rax
+; X64: retq
+
+; X86-LABEL: sret_void
+; X86: movl 4(%esp), %eax
+; X86: movl $0, (%eax)
+; X86: retl
+
+define i256 @sret_demoted() {
+ ret i256 0
+}
+
+; X64-LABEL: sret_demoted
+; X64-DAG: movq $0, (%rdi)
+; X64-DAG: movq %rdi, %rax
+; X64: retq
+
+; X86-LABEL: sret_demoted
+; X86: movl 4(%esp), %eax
+; X86: movl $0, (%eax)
+; X86: retl
diff --git a/test/CodeGen/X86/sse-align-0.ll b/test/CodeGen/X86/sse-align-0.ll
index 8ffd31247702..54c89ea411a4 100644
--- a/test/CodeGen/X86/sse-align-0.ll
+++ b/test/CodeGen/X86/sse-align-0.ll
@@ -2,12 +2,12 @@
; CHECK-NOT: mov
define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
- %t = load <4 x float>* %p
+ %t = load <4 x float>, <4 x float>* %p
%z = fmul <4 x float> %t, %x
ret <4 x float> %z
}
define <2 x double> @bar(<2 x double>* %p, <2 x double> %x) nounwind {
- %t = load <2 x double>* %p
+ %t = load <2 x double>, <2 x double>* %p
%z = fmul <2 x double> %t, %x
ret <2 x double> %z
}
diff --git a/test/CodeGen/X86/sse-align-1.ll b/test/CodeGen/X86/sse-align-1.ll
index c7a5cd559120..1a6058c6114c 100644
--- a/test/CodeGen/X86/sse-align-1.ll
+++ b/test/CodeGen/X86/sse-align-1.ll
@@ -1,10 +1,10 @@
; RUN: llc < %s -march=x86-64 | grep movap | count 2
define <4 x float> @foo(<4 x float>* %p) nounwind {
- %t = load <4 x float>* %p
+ %t = load <4 x float>, <4 x float>* %p
ret <4 x float> %t
}
define <2 x double> @bar(<2 x double>* %p) nounwind {
- %t = load <2 x double>* %p
+ %t = load <2 x double>, <2 x double>* %p
ret <2 x double> %t
}
diff --git a/test/CodeGen/X86/sse-align-10.ll b/test/CodeGen/X86/sse-align-10.ll
index 0f9169712556..81bf55354cd2 100644
--- a/test/CodeGen/X86/sse-align-10.ll
+++ b/test/CodeGen/X86/sse-align-10.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=x86-64 | grep movups | count 1
define <2 x i64> @bar(<2 x i64>* %p) nounwind {
- %t = load <2 x i64>* %p, align 8
+ %t = load <2 x i64>, <2 x i64>* %p, align 8
ret <2 x i64> %t
}
diff --git a/test/CodeGen/X86/sse-align-12.ll b/test/CodeGen/X86/sse-align-12.ll
index 396da0f48956..9441cc0002fb 100644
--- a/test/CodeGen/X86/sse-align-12.ll
+++ b/test/CodeGen/X86/sse-align-12.ll
@@ -6,7 +6,7 @@ define <4 x float> @a(<4 x float>* %y) nounwind {
; CHECK-NEXT: movups (%rdi), %xmm0
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
; CHECK-NEXT: retq
- %x = load <4 x float>* %y, align 4
+ %x = load <4 x float>, <4 x float>* %y, align 4
%a = extractelement <4 x float> %x, i32 0
%b = extractelement <4 x float> %x, i32 1
%c = extractelement <4 x float> %x, i32 2
@@ -24,7 +24,7 @@ define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind {
; CHECK-NEXT: movups (%rdi), %xmm1
; CHECK-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; CHECK-NEXT: retq
- %x = load <4 x float>* %y, align 4
+ %x = load <4 x float>, <4 x float>* %y, align 4
%a = extractelement <4 x float> %x, i32 2
%b = extractelement <4 x float> %x, i32 3
%c = extractelement <4 x float> %z, i32 2
@@ -42,7 +42,7 @@ define <2 x double> @c(<2 x double>* %y) nounwind {
; CHECK-NEXT: movupd (%rdi), %xmm0
; CHECK-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
; CHECK-NEXT: retq
- %x = load <2 x double>* %y, align 8
+ %x = load <2 x double>, <2 x double>* %y, align 8
%a = extractelement <2 x double> %x, i32 0
%c = extractelement <2 x double> %x, i32 1
%p = insertelement <2 x double> undef, double %c, i32 0
@@ -56,7 +56,7 @@ define <2 x double> @d(<2 x double>* %y, <2 x double> %z) nounwind {
; CHECK-NEXT: movupd (%rdi), %xmm1
; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; CHECK-NEXT: retq
- %x = load <2 x double>* %y, align 8
+ %x = load <2 x double>, <2 x double>* %y, align 8
%a = extractelement <2 x double> %x, i32 1
%c = extractelement <2 x double> %z, i32 1
%p = insertelement <2 x double> undef, double %c, i32 0
diff --git a/test/CodeGen/X86/sse-align-2.ll b/test/CodeGen/X86/sse-align-2.ll
index 98e75b56e891..063cc9d2f563 100644
--- a/test/CodeGen/X86/sse-align-2.ll
+++ b/test/CodeGen/X86/sse-align-2.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=x86-64 -mcpu=penryn | FileCheck %s
define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
- %t = load <4 x float>* %p, align 4
+ %t = load <4 x float>, <4 x float>* %p, align 4
%z = fmul <4 x float> %t, %x
ret <4 x float> %z
}
@@ -11,7 +11,7 @@ define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
; CHECK: ret
define <2 x double> @bar(<2 x double>* %p, <2 x double> %x) nounwind {
- %t = load <2 x double>* %p, align 8
+ %t = load <2 x double>, <2 x double>* %p, align 8
%z = fmul <2 x double> %t, %x
ret <2 x double> %z
}
diff --git a/test/CodeGen/X86/sse-align-5.ll b/test/CodeGen/X86/sse-align-5.ll
index 21cd2311b916..a64b953220d5 100644
--- a/test/CodeGen/X86/sse-align-5.ll
+++ b/test/CodeGen/X86/sse-align-5.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=x86-64 | grep movaps | count 1
define <2 x i64> @bar(<2 x i64>* %p) nounwind {
- %t = load <2 x i64>* %p
+ %t = load <2 x i64>, <2 x i64>* %p
ret <2 x i64> %t
}
diff --git a/test/CodeGen/X86/sse-align-6.ll b/test/CodeGen/X86/sse-align-6.ll
index fcea1b102a20..01f225101b96 100644
--- a/test/CodeGen/X86/sse-align-6.ll
+++ b/test/CodeGen/X86/sse-align-6.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=x86-64 | grep movdqu | count 1
define <2 x i64> @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
- %t = load <2 x i64>* %p, align 8
+ %t = load <2 x i64>, <2 x i64>* %p, align 8
%z = mul <2 x i64> %t, %x
ret <2 x i64> %z
}
diff --git a/test/CodeGen/X86/sse-align-9.ll b/test/CodeGen/X86/sse-align-9.ll
index cb26b9535a81..182c91c69d93 100644
--- a/test/CodeGen/X86/sse-align-9.ll
+++ b/test/CodeGen/X86/sse-align-9.ll
@@ -1,10 +1,10 @@
; RUN: llc < %s -march=x86-64 | grep movup | count 2
define <4 x float> @foo(<4 x float>* %p) nounwind {
- %t = load <4 x float>* %p, align 4
+ %t = load <4 x float>, <4 x float>* %p, align 4
ret <4 x float> %t
}
define <2 x double> @bar(<2 x double>* %p) nounwind {
- %t = load <2 x double>* %p, align 8
+ %t = load <2 x double>, <2 x double>* %p, align 8
ret <2 x double> %t
}
diff --git a/test/CodeGen/X86/sse-domains.ll b/test/CodeGen/X86/sse-domains.ll
index 8cf522dd3c15..8016a246fa66 100644
--- a/test/CodeGen/X86/sse-domains.ll
+++ b/test/CodeGen/X86/sse-domains.ll
@@ -33,9 +33,9 @@ while.body:
%x.02 = phi <4 x i32> [ %add, %while.body ], [ zeroinitializer, %entry ]
%dec = add nsw i32 %n.addr.03, -1
%and = and <4 x i32> %x.02, <i32 127, i32 127, i32 127, i32 127>
- %incdec.ptr = getelementptr inbounds <4 x i32>* %p.addr.04, i64 1
+ %incdec.ptr = getelementptr inbounds <4 x i32>, <4 x i32>* %p.addr.04, i64 1
store <4 x i32> %and, <4 x i32>* %p.addr.04, align 16
- %0 = load <4 x i32>* %incdec.ptr, align 16
+ %0 = load <4 x i32>, <4 x i32>* %incdec.ptr, align 16
%add = shl <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
%tobool = icmp eq i32 %dec, 0
br i1 %tobool, label %while.end, label %while.body
diff --git a/test/CodeGen/X86/sse-fcopysign.ll b/test/CodeGen/X86/sse-fcopysign.ll
index 0e0e4a9a86cf..25634b5472aa 100644
--- a/test/CodeGen/X86/sse-fcopysign.ll
+++ b/test/CodeGen/X86/sse-fcopysign.ll
@@ -1,16 +1,134 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep test
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X64
+
+;
+; Library Functions
+;
define float @tst1(float %a, float %b) {
- %tmp = tail call float @copysignf( float %b, float %a )
- ret float %tmp
+; X32-LABEL: @tst1
+; X32: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT: movss %xmm1, 4(%esp)
+; X32-NEXT: movss %xmm0, (%esp)
+; X32-NEXT: calll copysignf
+; X32-NEXT: addl $8, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: @tst1
+; X64: movaps %xmm0, %xmm2
+; X64-NEXT: movaps %xmm1, %xmm0
+; X64-NEXT: movaps %xmm2, %xmm1
+; X64-NEXT: jmp copysignf
+ %tmp = tail call float @copysignf( float %b, float %a )
+ ret float %tmp
}
define double @tst2(double %a, float %b, float %c) {
- %tmp1 = fadd float %b, %c
- %tmp2 = fpext float %tmp1 to double
- %tmp = tail call double @copysign( double %a, double %tmp2 )
- ret double %tmp
+; X32-LABEL: @tst2
+; X32: movsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT: addss 32(%esp), %xmm1
+; X32-NEXT: cvtss2sd %xmm1, %xmm1
+; X32-NEXT: movsd %xmm0, (%esp)
+; X32-NEXT: movsd %xmm1, 8(%esp)
+; X32-NEXT: calll copysign
+; X32-NEXT: addl $16, %esp
+; X32-NEXT: retl
+;
+; X64-LABEL: @tst2
+; X64: addss %xmm2, %xmm1
+; X64-NEXT: cvtss2sd %xmm1, %xmm1
+; X64-NEXT: jmp copysign
+ %tmp1 = fadd float %b, %c
+ %tmp2 = fpext float %tmp1 to double
+ %tmp = tail call double @copysign( double %a, double %tmp2 )
+ ret double %tmp
}
declare float @copysignf(float, float)
declare double @copysign(double, double)
+
+;
+; LLVM Intrinsic
+;
+
+define float @int1(float %a, float %b) {
+; X32-LABEL: @int1
+; X32: movss 12(%esp), %xmm0 {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT: movss 8(%esp), %xmm1 {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT: andps .LCPI2_0, %xmm1
+; X32-NEXT: andps .LCPI2_1, %xmm0
+; X32-NEXT: orps %xmm1, %xmm0
+; X32-NEXT: movss %xmm0, (%esp)
+; X32-NEXT: flds (%esp)
+; X32-NEXT: popl %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: @int1
+; X64: andps .LCPI2_0(%rip), %xmm0
+; X64-NEXT: andps .LCPI2_1(%rip), %xmm1
+; X64-NEXT: orps %xmm1, %xmm0
+; X64-NEXT: retq
+ %tmp = tail call float @llvm.copysign.f32( float %b, float %a )
+ ret float %tmp
+}
+
+define double @int2(double %a, float %b, float %c) {
+; X32-LABEL: @int2
+; X32: movsd 8(%ebp), %xmm0 {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: movss 16(%ebp), %xmm1 {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT: addss 20(%ebp), %xmm1
+; X32-NEXT: andpd .LCPI3_0, %xmm0
+; X32-NEXT: cvtss2sd %xmm1, %xmm1
+; X32-NEXT: andpd .LCPI3_1, %xmm1
+; X32-NEXT: orpd %xmm0, %xmm1
+; X32-NEXT: movsd %xmm1, (%esp)
+; X32-NEXT: fldl (%esp)
+; X32-NEXT: movl %ebp, %esp
+; X32-NEXT: popl %ebp
+; X32-NEXT: retl
+;
+; X64-LABEL: @int2
+; X64: addss %xmm2, %xmm1
+; X64-NEXT: andpd .LCPI3_0(%rip), %xmm0
+; X64-NEXT: cvtss2sd %xmm1, %xmm1
+; X64-NEXT: andpd .LCPI3_1(%rip), %xmm1
+; X64-NEXT: orpd %xmm1, %xmm0
+; X64-NEXT: retq
+ %tmp1 = fadd float %b, %c
+ %tmp2 = fpext float %tmp1 to double
+ %tmp = tail call double @llvm.copysign.f64( double %a, double %tmp2 )
+ ret double %tmp
+}
+
+define float @cst1() {
+; X32-LABEL: @cst1
+; X32: fld1
+; X32-NEXT: fchs
+; X32-NEXT: retl
+;
+; X64-LABEL: @cst1
+; X64: movss .LCPI4_0(%rip), %xmm0 {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT: retq
+ %tmp = tail call float @llvm.copysign.f32( float 1.0, float -2.0 )
+ ret float %tmp
+}
+
+define double @cst2() {
+; X32-LABEL: @cst2
+; X32: fldz
+; X32-NEXT: fchs
+; X32-NEXT: retl
+;
+; X64-LABEL: @cst2
+; X64: movsd .LCPI5_0(%rip), %xmm0 {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: retq
+ %tmp1 = fadd float -1.0, -1.0
+ %tmp2 = fpext float %tmp1 to double
+ %tmp = tail call double @llvm.copysign.f64( double 0.0, double %tmp2 )
+ ret double %tmp
+}
+
+declare float @llvm.copysign.f32(float %Mag, float %Sgn)
+declare double @llvm.copysign.f64(double %Mag, double %Sgn)
diff --git a/test/CodeGen/X86/sse-intel-ocl.ll b/test/CodeGen/X86/sse-intel-ocl.ll
index 188505072f05..b96ecc575021 100644
--- a/test/CodeGen/X86/sse-intel-ocl.ll
+++ b/test/CodeGen/X86/sse-intel-ocl.ll
@@ -36,7 +36,7 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
%y = alloca <16 x float>, align 16
%x = fadd <16 x float> %a, %b
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
- %2 = load <16 x float>* %y, align 16
+ %2 = load <16 x float>, <16 x float>* %y, align 16
%3 = fadd <16 x float> %2, %1
ret <16 x float> %3
}
@@ -63,7 +63,7 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
%y = alloca <16 x float>, align 16
%x = fadd <16 x float> %a, %b
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
- %2 = load <16 x float>* %y, align 16
+ %2 = load <16 x float>, <16 x float>* %y, align 16
%3 = fadd <16 x float> %1, %b
%4 = fadd <16 x float> %2, %3
ret <16 x float> %4
diff --git a/test/CodeGen/X86/sse-intrinsics-x86.ll b/test/CodeGen/X86/sse-intrinsics-x86.ll
index 65d44bfb5ba8..0857189be734 100644
--- a/test/CodeGen/X86/sse-intrinsics-x86.ll
+++ b/test/CodeGen/X86/sse-intrinsics-x86.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s
define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK: addss
diff --git a/test/CodeGen/X86/sse-load-ret.ll b/test/CodeGen/X86/sse-load-ret.ll
index 1ebcb1a6fa64..8da45a786e7c 100644
--- a/test/CodeGen/X86/sse-load-ret.ll
+++ b/test/CodeGen/X86/sse-load-ret.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -march=x86 -mcpu=yonah | not grep xmm
define double @test1(double* %P) {
- %X = load double* %P ; <double> [#uses=1]
+ %X = load double, double* %P ; <double> [#uses=1]
ret double %X
}
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index 4dcb54ca4b0b..e4d0373299fb 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -805,7 +805,7 @@ define double @ule_inverse_y(double %x) nounwind {
; CHECK-LABEL: clampTo3k_a:
; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
; CHECK-NEXT: minsd %xmm0, %xmm1
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: ret
; UNSAFE-LABEL: clampTo3k_a:
; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
@@ -813,7 +813,7 @@ define double @ule_inverse_y(double %x) nounwind {
; FINITE-LABEL: clampTo3k_a:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: movapd %xmm1, %xmm0
; FINITE-NEXT: ret
define double @clampTo3k_a(double %x) nounwind readnone {
entry:
@@ -831,7 +831,7 @@ entry:
; FINITE-LABEL: clampTo3k_b:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: movapd %xmm1, %xmm0
; FINITE-NEXT: ret
define double @clampTo3k_b(double %x) nounwind readnone {
entry:
@@ -843,7 +843,7 @@ entry:
; CHECK-LABEL: clampTo3k_c:
; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
; CHECK-NEXT: maxsd %xmm0, %xmm1
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: ret
; UNSAFE-LABEL: clampTo3k_c:
; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
@@ -851,7 +851,7 @@ entry:
; FINITE-LABEL: clampTo3k_c:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: movapd %xmm1, %xmm0
; FINITE-NEXT: ret
define double @clampTo3k_c(double %x) nounwind readnone {
entry:
@@ -869,7 +869,7 @@ entry:
; FINITE-LABEL: clampTo3k_d:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: movapd %xmm1, %xmm0
; FINITE-NEXT: ret
define double @clampTo3k_d(double %x) nounwind readnone {
entry:
@@ -881,7 +881,7 @@ entry:
; CHECK-LABEL: clampTo3k_e:
; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
; CHECK-NEXT: maxsd %xmm0, %xmm1
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: ret
; UNSAFE-LABEL: clampTo3k_e:
; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
@@ -889,7 +889,7 @@ entry:
; FINITE-LABEL: clampTo3k_e:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: movapd %xmm1, %xmm0
; FINITE-NEXT: ret
define double @clampTo3k_e(double %x) nounwind readnone {
entry:
@@ -907,7 +907,7 @@ entry:
; FINITE-LABEL: clampTo3k_f:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: movapd %xmm1, %xmm0
; FINITE-NEXT: ret
define double @clampTo3k_f(double %x) nounwind readnone {
entry:
@@ -919,7 +919,7 @@ entry:
; CHECK-LABEL: clampTo3k_g:
; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
; CHECK-NEXT: minsd %xmm0, %xmm1
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: ret
; UNSAFE-LABEL: clampTo3k_g:
; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
@@ -927,7 +927,7 @@ entry:
; FINITE-LABEL: clampTo3k_g:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: movapd %xmm1, %xmm0
; FINITE-NEXT: ret
define double @clampTo3k_g(double %x) nounwind readnone {
entry:
@@ -945,7 +945,7 @@ entry:
; FINITE-LABEL: clampTo3k_h:
; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: movapd %xmm1, %xmm0
; FINITE-NEXT: ret
define double @clampTo3k_h(double %x) nounwind readnone {
entry:
diff --git a/test/CodeGen/X86/sse-scalar-fp-arith-unary.ll b/test/CodeGen/X86/sse-scalar-fp-arith-unary.ll
new file mode 100644
index 000000000000..fab4f90279e8
--- /dev/null
+++ b/test/CodeGen/X86/sse-scalar-fp-arith-unary.ll
@@ -0,0 +1,73 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2 < %s | FileCheck --check-prefix=SSE %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse4.1 < %s | FileCheck --check-prefix=SSE %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck --check-prefix=AVX %s
+
+; PR21507 - https://llvm.org/bugs/show_bug.cgi?id=21507
+; Each function should be a single math op; no extra moves.
+
+
+define <4 x float> @recip(<4 x float> %x) {
+; SSE-LABEL: recip:
+; SSE: # BB#0:
+; SSE-NEXT: rcpss %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: recip:
+; AVX: # BB#0:
+; AVX-NEXT: vrcpss %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %y = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %x)
+ %shuf = shufflevector <4 x float> %y, <4 x float> %x, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+ ret <4 x float> %shuf
+}
+
+define <4 x float> @recip_square_root(<4 x float> %x) {
+; SSE-LABEL: recip_square_root:
+; SSE: # BB#0:
+; SSE-NEXT: rsqrtss %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: recip_square_root:
+; AVX: # BB#0:
+; AVX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %y = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %x)
+ %shuf = shufflevector <4 x float> %y, <4 x float> %x, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+ ret <4 x float> %shuf
+}
+
+define <4 x float> @square_root(<4 x float> %x) {
+; SSE-LABEL: square_root:
+; SSE: # BB#0:
+; SSE-NEXT: sqrtss %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: square_root:
+; AVX: # BB#0:
+; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %y = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x)
+ %shuf = shufflevector <4 x float> %y, <4 x float> %x, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+ ret <4 x float> %shuf
+}
+
+define <2 x double> @square_root_double(<2 x double> %x) {
+; SSE-LABEL: square_root_double:
+; SSE: # BB#0:
+; SSE-NEXT: sqrtsd %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: square_root_double:
+; AVX: # BB#0:
+; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %y = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %x)
+ %shuf = shufflevector <2 x double> %y, <2 x double> %x, <2 x i32> <i32 0, i32 3>
+ ret <2 x double> %shuf
+}
+
+declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>)
+declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>)
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>)
+declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>)
+
diff --git a/test/CodeGen/X86/sse-scalar-fp-arith.ll b/test/CodeGen/X86/sse-scalar-fp-arith.ll
index b122ef67544c..7162d2715bce 100644
--- a/test/CodeGen/X86/sse-scalar-fp-arith.ll
+++ b/test/CodeGen/X86/sse-scalar-fp-arith.ll
@@ -76,6 +76,31 @@ define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
ret <4 x float> %3
}
+define <4 x float> @test_sqrt_ss(<4 x float> %a) {
+; SSE2-LABEL: test_sqrt_ss:
+; SSE2: # BB#0:
+; SSE2-NEXT: sqrtss %xmm0, %xmm1
+; SSE2-NEXT: movss %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: test_sqrt_ss:
+; SSE41: # BB#0:
+; SSE41-NEXT: sqrtss %xmm0, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: test_sqrt_ss:
+; AVX: # BB#0:
+; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX-NEXT: retq
+ %1 = extractelement <4 x float> %a, i32 0
+ %2 = call float @llvm.sqrt.f32(float %1)
+ %3 = insertelement <4 x float> %a, float %2, i32 0
+ ret <4 x float> %3
+}
+declare float @llvm.sqrt.f32(float)
+
define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: test_add_sd:
; SSE: # BB#0:
@@ -144,6 +169,25 @@ define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
ret <2 x double> %3
}
+define <2 x double> @test_sqrt_sd(<2 x double> %a) {
+; SSE-LABEL: test_sqrt_sd:
+; SSE: # BB#0:
+; SSE-NEXT: sqrtsd %xmm0, %xmm1
+; SSE-NEXT: movsd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: test_sqrt_sd:
+; AVX: # BB#0:
+; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm1
+; AVX-NEXT: vmovsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %1 = extractelement <2 x double> %a, i32 0
+ %2 = call double @llvm.sqrt.f64(double %1)
+ %3 = insertelement <2 x double> %a, double %2, i32 0
+ ret <2 x double> %3
+}
+declare double @llvm.sqrt.f64(double)
+
define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: test2_add_ss:
; SSE: # BB#0:
@@ -220,7 +264,7 @@ define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: test2_add_sd:
; SSE: # BB#0:
; SSE-NEXT: addsd %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test2_add_sd:
@@ -238,7 +282,7 @@ define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: test2_sub_sd:
; SSE: # BB#0:
; SSE-NEXT: subsd %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test2_sub_sd:
@@ -256,7 +300,7 @@ define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: test2_mul_sd:
; SSE: # BB#0:
; SSE-NEXT: mulsd %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test2_mul_sd:
@@ -274,7 +318,7 @@ define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: test2_div_sd:
; SSE: # BB#0:
; SSE-NEXT: divsd %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test2_div_sd:
@@ -370,8 +414,155 @@ define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
ret <4 x float> %3
}
+; With SSE4.1 or greater, the shuffles in the following tests may
+; be lowered to X86Blendi nodes.
+
+define <4 x float> @blend_add_ss(<4 x float> %a, float %b) {
+; SSE-LABEL: blend_add_ss:
+; SSE: # BB#0:
+; SSE-NEXT: addss %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: blend_add_ss:
+; AVX: # BB#0:
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+
+ %ext = extractelement <4 x float> %a, i32 0
+ %op = fadd float %b, %ext
+ %ins = insertelement <4 x float> undef, float %op, i32 0
+ %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+ ret <4 x float> %shuf
+}
+
+define <4 x float> @blend_sub_ss(<4 x float> %a, float %b) {
+; SSE-LABEL: blend_sub_ss:
+; SSE: # BB#0:
+; SSE-NEXT: subss %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: blend_sub_ss:
+; AVX: # BB#0:
+; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+
+ %ext = extractelement <4 x float> %a, i32 0
+ %op = fsub float %ext, %b
+ %ins = insertelement <4 x float> undef, float %op, i32 0
+ %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+ ret <4 x float> %shuf
+}
+
+define <4 x float> @blend_mul_ss(<4 x float> %a, float %b) {
+; SSE-LABEL: blend_mul_ss:
+; SSE: # BB#0:
+; SSE-NEXT: mulss %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: blend_mul_ss:
+; AVX: # BB#0:
+; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+
+ %ext = extractelement <4 x float> %a, i32 0
+ %op = fmul float %b, %ext
+ %ins = insertelement <4 x float> undef, float %op, i32 0
+ %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+ ret <4 x float> %shuf
+}
+
+define <4 x float> @blend_div_ss(<4 x float> %a, float %b) {
+; SSE-LABEL: blend_div_ss:
+; SSE: # BB#0:
+; SSE-NEXT: divss %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: blend_div_ss:
+; AVX: # BB#0:
+; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+
+ %ext = extractelement <4 x float> %a, i32 0
+ %op = fdiv float %ext, %b
+ %ins = insertelement <4 x float> undef, float %op, i32 0
+ %shuf = shufflevector <4 x float> %ins, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+ ret <4 x float> %shuf
+}
+
+define <2 x double> @blend_add_sd(<2 x double> %a, double %b) {
+; SSE-LABEL: blend_add_sd:
+; SSE: # BB#0:
+; SSE-NEXT: addsd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: blend_add_sd:
+; AVX: # BB#0:
+; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+
+ %ext = extractelement <2 x double> %a, i32 0
+ %op = fadd double %b, %ext
+ %ins = insertelement <2 x double> undef, double %op, i32 0
+ %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
+ ret <2 x double> %shuf
+}
+
+define <2 x double> @blend_sub_sd(<2 x double> %a, double %b) {
+; SSE-LABEL: blend_sub_sd:
+; SSE: # BB#0:
+; SSE-NEXT: subsd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: blend_sub_sd:
+; AVX: # BB#0:
+; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+
+ %ext = extractelement <2 x double> %a, i32 0
+ %op = fsub double %ext, %b
+ %ins = insertelement <2 x double> undef, double %op, i32 0
+ %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
+ ret <2 x double> %shuf
+}
+
+define <2 x double> @blend_mul_sd(<2 x double> %a, double %b) {
+; SSE-LABEL: blend_mul_sd:
+; SSE: # BB#0:
+; SSE-NEXT: mulsd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: blend_mul_sd:
+; AVX: # BB#0:
+; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+
+ %ext = extractelement <2 x double> %a, i32 0
+ %op = fmul double %b, %ext
+ %ins = insertelement <2 x double> undef, double %op, i32 0
+ %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
+ ret <2 x double> %shuf
+}
+
+define <2 x double> @blend_div_sd(<2 x double> %a, double %b) {
+; SSE-LABEL: blend_div_sd:
+; SSE: # BB#0:
+; SSE-NEXT: divsd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: blend_div_sd:
+; AVX: # BB#0:
+; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+
+ %ext = extractelement <2 x double> %a, i32 0
+ %op = fdiv double %ext, %b
+ %ins = insertelement <2 x double> undef, double %op, i32 0
+ %shuf = shufflevector <2 x double> %ins, <2 x double> %a, <2 x i32> <i32 0, i32 3>
+ ret <2 x double> %shuf
+}
+
; Ensure that the backend selects SSE/AVX scalar fp instructions
-; from a packed fp instrution plus a vector insert.
+; from a packed fp instruction plus a vector insert.
define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) {
; SSE-LABEL: insert_test_add_ss:
@@ -561,7 +752,7 @@ define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: insert_test2_add_sd:
; SSE: # BB#0:
; SSE-NEXT: addsd %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: insert_test2_add_sd:
@@ -577,7 +768,7 @@ define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: insert_test2_sub_sd:
; SSE: # BB#0:
; SSE-NEXT: subsd %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: insert_test2_sub_sd:
@@ -593,7 +784,7 @@ define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: insert_test2_mul_sd:
; SSE: # BB#0:
; SSE-NEXT: mulsd %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: insert_test2_mul_sd:
@@ -609,7 +800,7 @@ define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: insert_test2_div_sd:
; SSE: # BB#0:
; SSE-NEXT: divsd %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: insert_test2_div_sd:
@@ -809,7 +1000,7 @@ define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: insert_test4_add_sd:
; SSE: # BB#0:
; SSE-NEXT: addsd %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: insert_test4_add_sd:
@@ -825,7 +1016,7 @@ define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: insert_test4_sub_sd:
; SSE: # BB#0:
; SSE-NEXT: subsd %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: insert_test4_sub_sd:
@@ -841,7 +1032,7 @@ define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: insert_test4_mul_sd:
; SSE: # BB#0:
; SSE-NEXT: mulsd %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: insert_test4_mul_sd:
@@ -857,7 +1048,7 @@ define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: insert_test4_div_sd:
; SSE: # BB#0:
; SSE-NEXT: divsd %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: insert_test4_div_sd:
diff --git a/test/CodeGen/X86/sse-unaligned-mem-feature.ll b/test/CodeGen/X86/sse-unaligned-mem-feature.ll
index 15f91ee04eaf..1c61a515f383 100644
--- a/test/CodeGen/X86/sse-unaligned-mem-feature.ll
+++ b/test/CodeGen/X86/sse-unaligned-mem-feature.ll
@@ -4,10 +4,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-unknown-linux-gnu"
define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind {
- %A = load <4 x float>* %P, align 4
+ %A = load <4 x float>, <4 x float>* %P, align 4
%B = fadd <4 x float> %A, %In
ret <4 x float> %B
; CHECK-LABEL: @foo
-; CHECK: addps (
+; CHECK: addps (%eax), %xmm0
}
diff --git a/test/CodeGen/X86/sse-varargs.ll b/test/CodeGen/X86/sse-varargs.ll
index da38f0e148f6..7c3c78113def 100644
--- a/test/CodeGen/X86/sse-varargs.ll
+++ b/test/CodeGen/X86/sse-varargs.ll
@@ -2,7 +2,7 @@
define i32 @t() nounwind {
entry:
- tail call void (i32, ...)* @foo( i32 1, <4 x i32> < i32 10, i32 11, i32 12, i32 13 > ) nounwind
+ tail call void (i32, ...) @foo( i32 1, <4 x i32> < i32 10, i32 11, i32 12, i32 13 > ) nounwind
ret i32 0
}
diff --git a/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
new file mode 100644
index 000000000000..b0412b96bdb2
--- /dev/null
+++ b/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=pentium4 -mattr=sse2 | FileCheck %s
+
+define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
+ ; CHECK: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
+ %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
+ ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
+ %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
+
+define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
+ ; CHECK: pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+ %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
+ ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
+ %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll
index ddb04211ec7b..53132a167fb8 100644
--- a/test/CodeGen/X86/sse2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s
define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK: addsd
@@ -142,7 +143,7 @@ declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
; CHECK: cvtsd2ss
- ; CHECK-NOT: cvtsd2ss %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
+ ; SSE-NOT: cvtsd2ss %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
%res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
ret <4 x float> %res
}
@@ -410,22 +411,6 @@ define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
-define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
- ; CHECK: pslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
- %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
- ; CHECK: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
- %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
-
-
define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK: psllq
%res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
@@ -506,22 +491,6 @@ define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
-define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
- ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
- %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
- ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
- %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
-
-
define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK: psrlq
%res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
@@ -613,7 +582,7 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
; CHECK: test_x86_sse2_storel_dq
; CHECK: movl
- ; CHECK: movq
+ ; CHECK: movlps
call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
ret void
}
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index b7db6cb56ef2..d3ee3c6f0454 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -10,7 +10,7 @@ define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
; CHECK-NEXT: movlpd {{[0-9]+}}(%esp), %xmm0
; CHECK-NEXT: movapd %xmm0, (%eax)
; CHECK-NEXT: retl
- %tmp3 = load <2 x double>* %A, align 16
+ %tmp3 = load <2 x double>, <2 x double>* %A, align 16
%tmp7 = insertelement <2 x double> undef, double %B, i32 0
%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
store <2 x double> %tmp9, <2 x double>* %r, align 16
@@ -26,7 +26,7 @@ define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
; CHECK-NEXT: movhpd {{[0-9]+}}(%esp), %xmm0
; CHECK-NEXT: movapd %xmm0, (%eax)
; CHECK-NEXT: retl
- %tmp3 = load <2 x double>* %A, align 16
+ %tmp3 = load <2 x double>, <2 x double>* %A, align 16
%tmp7 = insertelement <2 x double> undef, double %B, i32 0
%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
store <2 x double> %tmp9, <2 x double>* %r, align 16
@@ -44,8 +44,8 @@ define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; CHECK-NEXT: movaps %xmm0, (%eax)
; CHECK-NEXT: retl
- %tmp = load <4 x float>* %B ; <<4 x float>> [#uses=2]
- %tmp3 = load <4 x float>* %A ; <<4 x float>> [#uses=2]
+ %tmp = load <4 x float>, <4 x float>* %B ; <<4 x float>> [#uses=2]
+ %tmp3 = load <4 x float>, <4 x float>* %A ; <<4 x float>> [#uses=2]
%tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0 ; <float> [#uses=1]
%tmp7 = extractelement <4 x float> %tmp, i32 0 ; <float> [#uses=1]
%tmp8 = extractelement <4 x float> %tmp3, i32 1 ; <float> [#uses=1]
@@ -75,14 +75,14 @@ define <4 x i32> @test5(i8** %ptr) nounwind {
; CHECK: ## BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl (%eax), %eax
-; CHECK-NEXT: movss (%eax), %xmm1
+; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: pxor %xmm0, %xmm0
; CHECK-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; CHECK-NEXT: retl
- %tmp = load i8** %ptr ; <i8*> [#uses=1]
+ %tmp = load i8*, i8** %ptr ; <i8*> [#uses=1]
%tmp.upgrd.1 = bitcast i8* %tmp to float* ; <float*> [#uses=1]
- %tmp.upgrd.2 = load float* %tmp.upgrd.1 ; <float> [#uses=1]
+ %tmp.upgrd.2 = load float, float* %tmp.upgrd.1 ; <float> [#uses=1]
%tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0 ; <<4 x float>> [#uses=1]
%tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1]
%tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1]
@@ -103,7 +103,7 @@ define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind {
; CHECK-NEXT: movaps (%ecx), %xmm0
; CHECK-NEXT: movaps %xmm0, (%eax)
; CHECK-NEXT: retl
- %tmp1 = load <4 x float>* %A ; <<4 x float>> [#uses=1]
+ %tmp1 = load <4 x float>, <4 x float>* %A ; <<4 x float>> [#uses=1]
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
store <4 x float> %tmp2, <4 x float>* %res
ret void
@@ -129,10 +129,10 @@ define <2 x i64> @test8() nounwind {
; CHECK-NEXT: movl L_x$non_lazy_ptr, %eax
; CHECK-NEXT: movups (%eax), %xmm0
; CHECK-NEXT: retl
- %tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0) ; <i32> [#uses=1]
- %tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1) ; <i32> [#uses=1]
- %tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2) ; <i32> [#uses=1]
- %tmp7 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 3) ; <i32> [#uses=1]
+ %tmp = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @x, i32 0, i32 0) ; <i32> [#uses=1]
+ %tmp3 = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @x, i32 0, i32 1) ; <i32> [#uses=1]
+ %tmp5 = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @x, i32 0, i32 2) ; <i32> [#uses=1]
+ %tmp7 = load i32, i32* getelementptr ([4 x i32], [4 x i32]* @x, i32 0, i32 3) ; <i32> [#uses=1]
%tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0 ; <<4 x i32>> [#uses=1]
%tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
%tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2 ; <<4 x i32>> [#uses=1]
@@ -179,14 +179,14 @@ define void @test12() nounwind {
; CHECK-LABEL: test12:
; CHECK: ## BB#0:
; CHECK-NEXT: movapd 0, %xmm0
-; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; CHECK-NEXT: movsd %xmm0, %xmm1
+; CHECK-NEXT: movapd {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; CHECK-NEXT: xorpd %xmm2, %xmm2
; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
; CHECK-NEXT: addps %xmm1, %xmm0
; CHECK-NEXT: movaps %xmm0, 0
; CHECK-NEXT: retl
- %tmp1 = load <4 x float>* null ; <<4 x float>> [#uses=2]
+ %tmp1 = load <4 x float>, <4 x float>* null ; <<4 x float>> [#uses=2]
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
%tmp4 = fadd <4 x float> %tmp2, %tmp3 ; <<4 x float>> [#uses=1]
@@ -205,8 +205,8 @@ define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x fl
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; CHECK-NEXT: movaps %xmm0, (%eax)
; CHECK-NEXT: retl
- %tmp3 = load <4 x float>* %B ; <<4 x float>> [#uses=1]
- %tmp5 = load <4 x float>* %C ; <<4 x float>> [#uses=1]
+ %tmp3 = load <4 x float>, <4 x float>* %B ; <<4 x float>> [#uses=1]
+ %tmp5 = load <4 x float>, <4 x float>* %C ; <<4 x float>> [#uses=1]
%tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1]
store <4 x float> %tmp11, <4 x float>* %res
ret void
@@ -224,8 +224,8 @@ define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
; CHECK-NEXT: subps %xmm1, %xmm2
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; CHECK-NEXT: retl
- %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=2]
- %tmp5 = load <4 x float>* %x ; <<4 x float>> [#uses=2]
+ %tmp = load <4 x float>, <4 x float>* %y ; <<4 x float>> [#uses=2]
+ %tmp5 = load <4 x float>, <4 x float>* %x ; <<4 x float>> [#uses=2]
%tmp9 = fadd <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1]
%tmp21 = fsub <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1]
%tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1]
@@ -241,8 +241,8 @@ define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind {
; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1]
; CHECK-NEXT: retl
entry:
- %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=1]
- %tmp3 = load <4 x float>* %x ; <<4 x float>> [#uses=1]
+ %tmp = load <4 x float>, <4 x float>* %y ; <<4 x float>> [#uses=1]
+ %tmp3 = load <4 x float>, <4 x float>* %x ; <<4 x float>> [#uses=1]
%tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
ret <4 x float> %tmp4
}
@@ -256,8 +256,8 @@ define <2 x double> @test16(<4 x double> * nocapture %srcA, <2 x double>* nocap
; CHECK-NEXT: movapd 96(%eax), %xmm0
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; CHECK-NEXT: retl
- %i5 = getelementptr inbounds <4 x double>* %srcA, i32 3
- %i6 = load <4 x double>* %i5, align 32
+ %i5 = getelementptr inbounds <4 x double>, <4 x double>* %srcA, i32 3
+ %i6 = load <4 x double>, <4 x double>* %i5, align 32
%i7 = shufflevector <4 x double> %i6, <4 x double> undef, <2 x i32> <i32 0, i32 2>
ret <2 x double> %i7
}
@@ -293,7 +293,7 @@ entry:
define <2 x i64> @test_insert_64_zext(<2 x i64> %i) {
; CHECK-LABEL: test_insert_64_zext:
; CHECK: ## BB#0:
-; CHECK-NEXT: movq %xmm0, %xmm0
+; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; CHECK-NEXT: retl
%1 = shufflevector <2 x i64> %i, <2 x i64> <i64 0, i64 undef>, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %1
@@ -302,8 +302,7 @@ define <2 x i64> @test_insert_64_zext(<2 x i64> %i) {
define <4 x i32> @PR19721(<4 x i32> %i) {
; CHECK-LABEL: PR19721:
; CHECK: ## BB#0:
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: movss %xmm1, %xmm0
+; CHECK-NEXT: andps LCPI19_0, %xmm0
; CHECK-NEXT: retl
%bc = bitcast <4 x i32> %i to i128
%insert = and i128 %bc, -4294967296
@@ -316,10 +315,11 @@ define <4 x i32> @test_mul(<4 x i32> %x, <4 x i32> %y) {
; CHECK: ## BB#0:
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; CHECK-NEXT: pmuludq %xmm1, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; CHECK-NEXT: pmuludq %xmm2, %xmm1
-; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
-; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT: retl
%m = mul <4 x i32> %x, %y
ret <4 x i32> %m
diff --git a/test/CodeGen/X86/sse3-avx-addsub-2.ll b/test/CodeGen/X86/sse3-avx-addsub-2.ll
index 5b2de28c0f5d..71efa3f8f105 100644
--- a/test/CodeGen/X86/sse3-avx-addsub-2.ll
+++ b/test/CodeGen/X86/sse3-avx-addsub-2.ll
@@ -315,4 +315,17 @@ define <4 x float> @test16(<4 x float> %A, <4 x float> %B) {
; CHECK-NOT: addsubps
; CHECK: ret
-
+define <2 x float> @test_v2f32(<2 x float> %v0, <2 x float> %v1) {
+ %v2 = extractelement <2 x float> %v0, i32 0
+ %v3 = extractelement <2 x float> %v1, i32 0
+ %v4 = extractelement <2 x float> %v0, i32 1
+ %v5 = extractelement <2 x float> %v1, i32 1
+ %sub = fsub float %v2, %v3
+ %add = fadd float %v5, %v4
+ %res0 = insertelement <2 x float> undef, float %sub, i32 0
+ %res1 = insertelement <2 x float> %res0, float %add, i32 1
+ ret <2 x float> %res1
+}
+; CHECK-LABEL: test_v2f32
+; CHECK: addsubps %xmm1, %xmm0
+; CHECK-NEXT: retq
diff --git a/test/CodeGen/X86/sse3-avx-addsub.ll b/test/CodeGen/X86/sse3-avx-addsub.ll
index 431588f90ab2..76141fc876ae 100644
--- a/test/CodeGen/X86/sse3-avx-addsub.ll
+++ b/test/CodeGen/X86/sse3-avx-addsub.ll
@@ -87,7 +87,7 @@ define <2 x double> @test4(<2 x double> %A, <2 x double> %B) #0 {
define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) {
- %1 = load <4 x float>* %B
+ %1 = load <4 x float>, <4 x float>* %B
%add = fadd <4 x float> %A, %1
%sub = fsub <4 x float> %A, %1
%vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
@@ -100,7 +100,7 @@ define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) {
define <8 x float> @test2b(<8 x float> %A, <8 x float>* %B) {
- %1 = load <8 x float>* %B
+ %1 = load <8 x float>, <8 x float>* %B
%add = fadd <8 x float> %A, %1
%sub = fsub <8 x float> %A, %1
%vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
@@ -115,7 +115,7 @@ define <8 x float> @test2b(<8 x float> %A, <8 x float>* %B) {
define <4 x double> @test3b(<4 x double> %A, <4 x double>* %B) {
- %1 = load <4 x double>* %B
+ %1 = load <4 x double>, <4 x double>* %B
%add = fadd <4 x double> %A, %1
%sub = fsub <4 x double> %A, %1
%vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
@@ -130,7 +130,7 @@ define <4 x double> @test3b(<4 x double> %A, <4 x double>* %B) {
define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) {
- %1 = load <2 x double>* %B
+ %1 = load <2 x double>, <2 x double>* %B
%sub = fsub <2 x double> %A, %1
%add = fadd <2 x double> %A, %1
%vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 0a5b0cab851c..c1cd91beaf53 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -14,7 +14,7 @@ define void @t0(<8 x i16>* %dest, <8 x i16>* %old) nounwind {
; X64-NEXT: movdqa %xmm0, (%rdi)
; X64-NEXT: retq
entry:
- %tmp3 = load <8 x i16>* %old
+ %tmp3 = load <8 x i16>, <8 x i16>* %old
%tmp6 = shufflevector <8 x i16> %tmp3,
<8 x i16> < i16 1, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >,
<8 x i32> < i32 8, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >
@@ -25,17 +25,14 @@ entry:
define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
; X64-LABEL: t1:
; X64: ## BB#0:
-; X64-NEXT: movdqa (%rdi), %xmm0
-; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
-; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
-; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
-; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT: movaps {{.*#+}} xmm0 = [0,65535,65535,65535,65535,65535,65535,65535]
+; X64-NEXT: movaps %xmm0, %xmm1
+; X64-NEXT: andnps (%rsi), %xmm1
+; X64-NEXT: andps (%rdi), %xmm0
+; X64-NEXT: orps %xmm1, %xmm0
; X64-NEXT: retq
- %tmp1 = load <8 x i16>* %A
- %tmp2 = load <8 x i16>* %B
+ %tmp1 = load <8 x i16>, <8 x i16>* %A
+ %tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
ret <8 x i16> %tmp3
@@ -44,11 +41,11 @@ define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) nounwind {
; X64-LABEL: t2:
; X64: ## BB#0:
-; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
-; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
-; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,0,3,4,5,6,7]
-; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; X64-NEXT: movdqa {{.*#+}} xmm2 = [0,65535,65535,0,65535,65535,65535,65535]
+; X64-NEXT: pand %xmm2, %xmm0
+; X64-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[1,1,2,1,4,5,6,7]
+; X64-NEXT: pandn %xmm1, %xmm2
+; X64-NEXT: por %xmm2, %xmm0
; X64-NEXT: retq
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 >
ret <8 x i16> %tmp
@@ -92,7 +89,7 @@ define <8 x i16> @t5(<8 x i16> %A, <8 x i16> %B) nounwind {
define <8 x i16> @t6(<8 x i16> %A, <8 x i16> %B) nounwind {
; X64-LABEL: t6:
; X64: ## BB#0:
-; X64-NEXT: movss %xmm1, %xmm0
+; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; X64-NEXT: retq
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
ret <8 x i16> %tmp
@@ -115,7 +112,7 @@ define void @t8(<2 x i64>* %res, <2 x i64>* %A) nounwind {
; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
; X64-NEXT: movdqa %xmm0, (%rdi)
; X64-NEXT: retq
- %tmp = load <2 x i64>* %A
+ %tmp = load <2 x i64>, <2 x i64>* %A
%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>
%tmp0 = extractelement <8 x i16> %tmp.upgrd.1, i32 0
%tmp1 = extractelement <8 x i16> %tmp.upgrd.1, i32 1
@@ -145,9 +142,9 @@ define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
; X64-NEXT: movhpd (%rsi), %xmm0
; X64-NEXT: movapd %xmm0, (%rdi)
; X64-NEXT: retq
- %tmp = load <4 x float>* %r
+ %tmp = load <4 x float>, <4 x float>* %r
%tmp.upgrd.3 = bitcast <2 x i32>* %A to double*
- %tmp.upgrd.4 = load double* %tmp.upgrd.3
+ %tmp.upgrd.4 = load double, double* %tmp.upgrd.3
%tmp.upgrd.5 = insertelement <2 x double> undef, double %tmp.upgrd.4, i32 0
%tmp5 = insertelement <2 x double> %tmp.upgrd.5, double undef, i32 1
%tmp6 = bitcast <2 x double> %tmp5 to <4 x float>
@@ -181,7 +178,7 @@ define void @t10() nounwind {
; X64-NEXT: movq _g2@{{.*}}(%rip), %rax
; X64-NEXT: movq %xmm0, (%rax)
; X64-NEXT: retq
- load <4 x i32>* @g1, align 16
+ load <4 x i32>, <4 x i32>* @g1, align 16
bitcast <4 x i32> %1 to <8 x i16>
shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> < i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef >
bitcast <8 x i16> %3 to <2 x i64>
@@ -195,8 +192,8 @@ define void @t10() nounwind {
define <8 x i16> @t11(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
; X64-LABEL: t11:
; X64: ## BB#0: ## %entry
+; X64-NEXT: psrld $16, %xmm0
; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
; X64-NEXT: retq
entry:
%tmp7 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
@@ -232,8 +229,9 @@ entry:
define <8 x i16> @t14(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
; X64-LABEL: t14:
; X64: ## BB#0: ## %entry
-; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; X64-NEXT: psrlq $16, %xmm0
+; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; X64-NEXT: movdqa %xmm1, %xmm0
; X64-NEXT: retq
entry:
%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef , i32 undef >
@@ -245,11 +243,8 @@ define <8 x i16> @t15(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
; X64-LABEL: t15:
; X64: ## BB#0: ## %entry
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
-; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3]
-; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
-; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
+; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
entry:
%tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
@@ -262,15 +257,7 @@ define <16 x i8> @t16(<16 x i8> %T0) nounwind readnone {
; X64: ## BB#0: ## %entry
; X64-NEXT: movdqa {{.*#+}} xmm1 = [0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0]
; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X64-NEXT: pxor %xmm2, %xmm2
-; X64-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
-; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-NEXT: packuswb %xmm0, %xmm0
+; X64-NEXT: movdqa %xmm1, %xmm0
; X64-NEXT: retq
entry:
%tmp8 = shufflevector <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
@@ -282,13 +269,13 @@ entry:
define <4 x i32> @t17() nounwind {
; X64-LABEL: t17:
; X64: ## BB#0: ## %entry
-; X64-NEXT: movddup (%rax), %xmm0
+; X64-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
; X64-NEXT: andpd {{.*}}(%rip), %xmm0
; X64-NEXT: retq
entry:
- %tmp1 = load <4 x float>* undef, align 16
+ %tmp1 = load <4 x float>, <4 x float>* undef, align 16
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
- %tmp3 = load <4 x float>* undef, align 16
+ %tmp3 = load <4 x float>, <4 x float>* undef, align 16
%tmp4 = shufflevector <4 x float> %tmp2, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
%tmp5 = bitcast <4 x float> %tmp3 to <4 x i32>
%tmp6 = shufflevector <4 x i32> %tmp5, <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
diff --git a/test/CodeGen/X86/sse41-intrinsics-x86.ll b/test/CodeGen/X86/sse41-intrinsics-x86.ll
index 5f25a16380de..771e4024336c 100644
--- a/test/CodeGen/X86/sse41-intrinsics-x86.ll
+++ b/test/CodeGen/X86/sse41-intrinsics-x86.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse4.1 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s
define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
; CHECK: blendpd
diff --git a/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll b/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
index 55faf4d32b36..a16e79277143 100644
--- a/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
+++ b/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
@@ -5,7 +5,7 @@ define <8 x i16> @test_llvm_x86_sse41_pmovsxbw(<16 x i8>* %a) {
; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbw
; SSE41: pmovsxbw (%rdi), %xmm0
; AVX: vpmovsxbw (%rdi), %xmm0
- %1 = load <16 x i8>* %a, align 1
+ %1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %1)
ret <8 x i16> %2
}
@@ -14,7 +14,7 @@ define <4 x i32> @test_llvm_x86_sse41_pmovsxbd(<16 x i8>* %a) {
; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbd
; SSE41: pmovsxbd (%rdi), %xmm0
; AVX: vpmovsxbd (%rdi), %xmm0
- %1 = load <16 x i8>* %a, align 1
+ %1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %1)
ret <4 x i32> %2
}
@@ -23,7 +23,7 @@ define <2 x i64> @test_llvm_x86_sse41_pmovsxbq(<16 x i8>* %a) {
; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbq
; SSE41: pmovsxbq (%rdi), %xmm0
; AVX: vpmovsxbq (%rdi), %xmm0
- %1 = load <16 x i8>* %a, align 1
+ %1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %1)
ret <2 x i64> %2
}
@@ -32,7 +32,7 @@ define <4 x i32> @test_llvm_x86_sse41_pmovsxwd(<8 x i16>* %a) {
; CHECK-LABEL: test_llvm_x86_sse41_pmovsxwd
; SSE41: pmovsxwd (%rdi), %xmm0
; AVX: vpmovsxwd (%rdi), %xmm0
- %1 = load <8 x i16>* %a, align 1
+ %1 = load <8 x i16>, <8 x i16>* %a, align 1
%2 = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1)
ret <4 x i32> %2
}
@@ -41,7 +41,7 @@ define <2 x i64> @test_llvm_x86_sse41_pmovsxwq(<8 x i16>* %a) {
; CHECK-LABEL: test_llvm_x86_sse41_pmovsxwq
; SSE41: pmovsxwq (%rdi), %xmm0
; AVX: vpmovsxwq (%rdi), %xmm0
- %1 = load <8 x i16>* %a, align 1
+ %1 = load <8 x i16>, <8 x i16>* %a, align 1
%2 = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %1)
ret <2 x i64> %2
}
@@ -50,7 +50,7 @@ define <2 x i64> @test_llvm_x86_sse41_pmovsxdq(<4 x i32>* %a) {
; CHECK-LABEL: test_llvm_x86_sse41_pmovsxdq
; SSE41: pmovsxdq (%rdi), %xmm0
; AVX: vpmovsxdq (%rdi), %xmm0
- %1 = load <4 x i32>* %a, align 1
+ %1 = load <4 x i32>, <4 x i32>* %a, align 1
%2 = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %1)
ret <2 x i64> %2
}
@@ -59,7 +59,7 @@ define <8 x i16> @test_llvm_x86_sse41_pmovzxbw(<16 x i8>* %a) {
; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbw
; SSE41: pmovzxbw (%rdi), %xmm0
; AVX: vpmovzxbw (%rdi), %xmm0
- %1 = load <16 x i8>* %a, align 1
+ %1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %1)
ret <8 x i16> %2
}
@@ -68,7 +68,7 @@ define <4 x i32> @test_llvm_x86_sse41_pmovzxbd(<16 x i8>* %a) {
; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbd
; SSE41: pmovzxbd (%rdi), %xmm0
; AVX: vpmovzxbd (%rdi), %xmm0
- %1 = load <16 x i8>* %a, align 1
+ %1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %1)
ret <4 x i32> %2
}
@@ -77,7 +77,7 @@ define <2 x i64> @test_llvm_x86_sse41_pmovzxbq(<16 x i8>* %a) {
; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbq
; SSE41: pmovzxbq (%rdi), %xmm0
; AVX: vpmovzxbq (%rdi), %xmm0
- %1 = load <16 x i8>* %a, align 1
+ %1 = load <16 x i8>, <16 x i8>* %a, align 1
%2 = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %1)
ret <2 x i64> %2
}
@@ -86,7 +86,7 @@ define <4 x i32> @test_llvm_x86_sse41_pmovzxwd(<8 x i16>* %a) {
; CHECK-LABEL: test_llvm_x86_sse41_pmovzxwd
; SSE41: pmovzxwd (%rdi), %xmm0
; AVX: vpmovzxwd (%rdi), %xmm0
- %1 = load <8 x i16>* %a, align 1
+ %1 = load <8 x i16>, <8 x i16>* %a, align 1
%2 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %1)
ret <4 x i32> %2
}
@@ -95,7 +95,7 @@ define <2 x i64> @test_llvm_x86_sse41_pmovzxwq(<8 x i16>* %a) {
; CHECK-LABEL: test_llvm_x86_sse41_pmovzxwq
; SSE41: pmovzxwq (%rdi), %xmm0
; AVX: vpmovzxwq (%rdi), %xmm0
- %1 = load <8 x i16>* %a, align 1
+ %1 = load <8 x i16>, <8 x i16>* %a, align 1
%2 = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %1)
ret <2 x i64> %2
}
@@ -104,7 +104,7 @@ define <2 x i64> @test_llvm_x86_sse41_pmovzxdq(<4 x i32>* %a) {
; CHECK-LABEL: test_llvm_x86_sse41_pmovzxdq
; SSE41: pmovzxdq (%rdi), %xmm0
; AVX: vpmovzxdq (%rdi), %xmm0
- %1 = load <4 x i32>* %a, align 1
+ %1 = load <4 x i32>, <4 x i32>* %a, align 1
%2 = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %1)
ret <2 x i64> %2
}
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index 9c0c2221cb7f..8532c012aa9b 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -43,7 +43,7 @@ define <2 x i64> @pmovsxbd_1(i32* %p) nounwind {
; X64-NEXT: pmovsxbd (%rdi), %xmm0
; X64-NEXT: retq
entry:
- %0 = load i32* %p, align 4
+ %0 = load i32, i32* %p, align 4
%1 = insertelement <4 x i32> undef, i32 %0, i32 0
%2 = insertelement <4 x i32> %1, i32 0, i32 1
%3 = insertelement <4 x i32> %2, i32 0, i32 2
@@ -66,7 +66,7 @@ define <2 x i64> @pmovsxwd_1(i64* %p) nounwind readonly {
; X64-NEXT: pmovsxwd (%rdi), %xmm0
; X64-NEXT: retq
entry:
- %0 = load i64* %p ; <i64> [#uses=1]
+ %0 = load i64, i64* %p ; <i64> [#uses=1]
%tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0 ; <<2 x i64>> [#uses=1]
%1 = bitcast <2 x i64> %tmp2 to <8 x i16> ; <<8 x i16>> [#uses=1]
%2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone ; <<4 x i32>> [#uses=1]
@@ -78,16 +78,16 @@ define <2 x i64> @pmovzxbq_1() nounwind {
; X32-LABEL: pmovzxbq_1:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl L_g16$non_lazy_ptr, %eax
-; X32-NEXT: pmovzxbq (%eax), %xmm0
+; X32-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
; X32-NEXT: retl
;
; X64-LABEL: pmovzxbq_1:
; X64: ## BB#0: ## %entry
; X64-NEXT: movq _g16@{{.*}}(%rip), %rax
-; X64-NEXT: pmovzxbq (%rax), %xmm0
+; X64-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
; X64-NEXT: retq
entry:
- %0 = load i16* @g16, align 2 ; <i16> [#uses=1]
+ %0 = load i16, i16* @g16, align 2 ; <i16> [#uses=1]
%1 = insertelement <8 x i16> undef, i16 %0, i32 0 ; <<8 x i16>> [#uses=1]
%2 = bitcast <8 x i16> %1 to <16 x i8> ; <<16 x i8>> [#uses=1]
%3 = tail call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %2) nounwind readnone ; <<2 x i64>> [#uses=1]
@@ -199,28 +199,51 @@ define <4 x float> @insertps_1(<4 x float> %t1, <4 x float> %t2) nounwind {
declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
-define <4 x float> @insertps_2(<4 x float> %t1, float %t2) nounwind {
-; X32-LABEL: insertps_2:
+; When optimizing for speed, prefer blendps over insertps even if it means we have to
+; generate a separate movss to load the scalar operand.
+define <4 x float> @blendps_not_insertps_1(<4 x float> %t1, float %t2) nounwind {
+; X32-LABEL: blendps_not_insertps_1:
; X32: ## BB#0:
-; X32-NEXT: insertps $0, {{[0-9]+}}(%esp), %xmm0
+; X32-NEXT: movss {{.*#+}} xmm1
+; X32-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; X32-NEXT: retl
;
-; X64-LABEL: insertps_2:
+; X64-LABEL: blendps_not_insertps_1:
; X64: ## BB#0:
-; X64-NEXT: insertps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; X64-NEXT: retq
%tmp1 = insertelement <4 x float> %t1, float %t2, i32 0
ret <4 x float> %tmp1
}
-define <4 x float> @insertps_3(<4 x float> %t1, <4 x float> %t2) nounwind {
-; X32-LABEL: insertps_3:
+
+; When optimizing for size, generate an insertps if there's a load fold opportunity.
+; The difference between i386 and x86-64 ABIs for the float operand means we should
+; generate an insertps for X32 but not for X64!
+define <4 x float> @insertps_or_blendps(<4 x float> %t1, float %t2) minsize nounwind {
+; X32-LABEL: insertps_or_blendps:
; X32: ## BB#0:
-; X32-NEXT: insertps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; X32-NEXT: insertps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
; X32-NEXT: retl
;
-; X64-LABEL: insertps_3:
+; X64-LABEL: insertps_or_blendps:
; X64: ## BB#0:
-; X64-NEXT: insertps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; X64-NEXT: retq
+ %tmp1 = insertelement <4 x float> %t1, float %t2, i32 0
+ ret <4 x float> %tmp1
+}
+
+; An insert into the low 32-bits of a vector from the low 32-bits of another vector
+; is always just a blendps because blendps is never more expensive than insertps.
+define <4 x float> @blendps_not_insertps_2(<4 x float> %t1, <4 x float> %t2) nounwind {
+; X32-LABEL: blendps_not_insertps_2:
+; X32: ## BB#0:
+; X32-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; X32-NEXT: retl
+;
+; X64-LABEL: blendps_not_insertps_2:
+; X64: ## BB#0:
+; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; X64-NEXT: retq
%tmp2 = extractelement <4 x float> %t2, i32 0
%tmp1 = insertelement <4 x float> %t1, float %tmp2, i32 0
@@ -291,22 +314,20 @@ declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind {
; X32-LABEL: buildvector:
; X32: ## BB#0: ## %entry
-; X32-NEXT: movaps %xmm0, %xmm2
-; X32-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; X32-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; X32-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
; X32-NEXT: addss %xmm1, %xmm0
-; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
-; X32-NEXT: addss %xmm2, %xmm1
-; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; X32-NEXT: addss %xmm2, %xmm3
+; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3]
; X32-NEXT: retl
;
; X64-LABEL: buildvector:
; X64: ## BB#0: ## %entry
-; X64-NEXT: movaps %xmm0, %xmm2
-; X64-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; X64-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; X64-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
; X64-NEXT: addss %xmm1, %xmm0
-; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
-; X64-NEXT: addss %xmm2, %xmm1
-; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; X64-NEXT: addss %xmm2, %xmm3
+; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3]
; X64-NEXT: retq
entry:
%tmp7 = extractelement <2 x float> %A, i32 0
@@ -324,15 +345,15 @@ define <4 x float> @insertps_from_shufflevector_1(<4 x float> %a, <4 x float>* n
; X32-LABEL: insertps_from_shufflevector_1:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: insertps $48, (%eax), %xmm0
+; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_shufflevector_1:
; X64: ## BB#0: ## %entry
-; X64-NEXT: insertps $48, (%rdi), %xmm0
+; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; X64-NEXT: retq
entry:
- %0 = load <4 x float>* %pb, align 16
+ %0 = load <4 x float>, <4 x float>* %pb, align 16
%vecinit6 = shufflevector <4 x float> %a, <4 x float> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
ret <4 x float> %vecinit6
}
@@ -358,15 +379,17 @@ define <4 x i32> @pinsrd_from_shufflevector_i32(<4 x i32> %a, <4 x i32>* nocaptu
; X32-LABEL: pinsrd_from_shufflevector_i32:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: insertps $48, (%eax), %xmm0
+; X32-NEXT: pshufd {{.*#+}} xmm1 = mem[0,1,2,0]
+; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
; X32-NEXT: retl
;
; X64-LABEL: pinsrd_from_shufflevector_i32:
; X64: ## BB#0: ## %entry
-; X64-NEXT: insertps $48, (%rdi), %xmm0
+; X64-NEXT: pshufd {{.*#+}} xmm1 = mem[0,1,2,0]
+; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
; X64-NEXT: retq
entry:
- %0 = load <4 x i32>* %pb, align 16
+ %0 = load <4 x i32>, <4 x i32>* %pb, align 16
%vecinit6 = shufflevector <4 x i32> %a, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
ret <4 x i32> %vecinit6
}
@@ -374,12 +397,14 @@ entry:
define <4 x i32> @insertps_from_shufflevector_i32_2(<4 x i32> %a, <4 x i32> %b) {
; X32-LABEL: insertps_from_shufflevector_i32_2:
; X32: ## BB#0: ## %entry
-; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[3],xmm0[2,3]
+; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_shufflevector_i32_2:
; X64: ## BB#0: ## %entry
-; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[3],xmm0[2,3]
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
; X64-NEXT: retq
entry:
%vecinit6 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
@@ -390,14 +415,14 @@ define <4 x float> @insertps_from_load_ins_elt_undef(<4 x float> %a, float* %b)
; X32-LABEL: insertps_from_load_ins_elt_undef:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: insertps $16, (%eax), %xmm0
+; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_load_ins_elt_undef:
; X64: ## BB#0:
-; X64-NEXT: insertps $16, (%rdi), %xmm0
+; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
; X64-NEXT: retq
- %1 = load float* %b, align 4
+ %1 = load float, float* %b, align 4
%2 = insertelement <4 x float> undef, float %1, i32 0
%result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
ret <4 x float> %result
@@ -408,16 +433,18 @@ define <4 x i32> @insertps_from_load_ins_elt_undef_i32(<4 x i32> %a, i32* %b) {
; X32-LABEL: insertps_from_load_ins_elt_undef_i32:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movd (%eax), %xmm1
-; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
+; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_load_ins_elt_undef_i32:
; X64: ## BB#0:
-; X64-NEXT: movd (%rdi), %xmm1
-; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
+; X64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
; X64-NEXT: retq
- %1 = load i32* %b, align 4
+ %1 = load i32, i32* %b, align 4
%2 = insertelement <4 x i32> undef, i32 %1, i32 0
%result = shufflevector <4 x i32> %a, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
ret <4 x i32> %result
@@ -449,12 +476,12 @@ define <4 x float> @shuf_XYZ0(<4 x float> %x, <4 x float> %a) {
define <4 x float> @shuf_XY00(<4 x float> %x, <4 x float> %a) {
; X32-LABEL: shuf_XY00:
; X32: ## BB#0:
-; X32-NEXT: movq %xmm0, %xmm0
+; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; X32-NEXT: retl
;
; X64-LABEL: shuf_XY00:
; X64: ## BB#0:
-; X64-NEXT: movq %xmm0, %xmm0
+; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; X64-NEXT: retq
%vecext = extractelement <4 x float> %x, i32 0
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
@@ -547,18 +574,12 @@ define <4 x float> @shuf_X00A(<4 x float> %x, <4 x float> %a) {
define <4 x float> @shuf_X00X(<4 x float> %x, <4 x float> %a) {
; X32-LABEL: shuf_X00X:
; X32: ## BB#0:
-; X32-NEXT: xorps %xmm1, %xmm1
-; X32-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
-; X32-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[0]
-; X32-NEXT: movaps %xmm1, %xmm0
+; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm0[0]
; X32-NEXT: retl
;
; X64-LABEL: shuf_X00X:
; X64: ## BB#0:
-; X64-NEXT: xorps %xmm1, %xmm1
-; X64-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
-; X64-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[0]
-; X64-NEXT: movaps %xmm1, %xmm0
+; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm0[0]
; X64-NEXT: retq
%vecext = extractelement <4 x float> %x, i32 0
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
@@ -571,20 +592,14 @@ define <4 x float> @shuf_X00X(<4 x float> %x, <4 x float> %a) {
define <4 x float> @shuf_X0YC(<4 x float> %x, <4 x float> %a) {
; X32-LABEL: shuf_X0YC:
; X32: ## BB#0:
-; X32-NEXT: xorps %xmm2, %xmm2
-; X32-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1],xmm0[2,3]
-; X32-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1],xmm0[1],zero
-; X32-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm1[2]
-; X32-NEXT: movaps %xmm2, %xmm0
+; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[2]
; X32-NEXT: retl
;
; X64-LABEL: shuf_X0YC:
; X64: ## BB#0:
-; X64-NEXT: xorps %xmm2, %xmm2
-; X64-NEXT: blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1],xmm0[2,3]
-; X64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1],xmm0[1],zero
-; X64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm1[2]
-; X64-NEXT: movaps %xmm2, %xmm0
+; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[2]
; X64-NEXT: retq
%vecext = extractelement <4 x float> %x, i32 0
%vecinit = insertelement <4 x float> undef, float %vecext, i32 0
@@ -619,12 +634,12 @@ define <4 x i32> @i32_shuf_XYZ0(<4 x i32> %x, <4 x i32> %a) {
define <4 x i32> @i32_shuf_XY00(<4 x i32> %x, <4 x i32> %a) {
; X32-LABEL: i32_shuf_XY00:
; X32: ## BB#0:
-; X32-NEXT: movq %xmm0, %xmm0
+; X32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; X32-NEXT: retl
;
; X64-LABEL: i32_shuf_XY00:
; X64: ## BB#0:
-; X64-NEXT: movq %xmm0, %xmm0
+; X64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; X64-NEXT: retq
%vecext = extractelement <4 x i32> %x, i32 0
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
@@ -638,12 +653,16 @@ define <4 x i32> @i32_shuf_XY00(<4 x i32> %x, <4 x i32> %a) {
define <4 x i32> @i32_shuf_XYY0(<4 x i32> %x, <4 x i32> %a) {
; X32-LABEL: i32_shuf_XYY0:
; X32: ## BB#0:
-; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,1],zero
+; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
+; X32-NEXT: pxor %xmm0, %xmm0
+; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
; X32-NEXT: retl
;
; X64-LABEL: i32_shuf_XYY0:
; X64: ## BB#0:
-; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,1],zero
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
+; X64-NEXT: pxor %xmm0, %xmm0
+; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
; X64-NEXT: retq
%vecext = extractelement <4 x i32> %x, i32 0
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
@@ -657,12 +676,16 @@ define <4 x i32> @i32_shuf_XYY0(<4 x i32> %x, <4 x i32> %a) {
define <4 x i32> @i32_shuf_XYW0(<4 x i32> %x, <4 x i32> %a) {
; X32-LABEL: i32_shuf_XYW0:
; X32: ## BB#0:
-; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,3],zero
+; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,3,3]
+; X32-NEXT: pxor %xmm0, %xmm0
+; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
; X32-NEXT: retl
;
; X64-LABEL: i32_shuf_XYW0:
; X64: ## BB#0:
-; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,3],zero
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,3,3]
+; X64-NEXT: pxor %xmm0, %xmm0
+; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
; X64-NEXT: retq
%vecext = extractelement <4 x i32> %x, i32 0
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
@@ -677,12 +700,16 @@ define <4 x i32> @i32_shuf_XYW0(<4 x i32> %x, <4 x i32> %a) {
define <4 x i32> @i32_shuf_W00W(<4 x i32> %x, <4 x i32> %a) {
; X32-LABEL: i32_shuf_W00W:
; X32: ## BB#0:
-; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[3],zero,zero,xmm0[3]
+; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; X32-NEXT: pxor %xmm0, %xmm0
+; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
; X32-NEXT: retl
;
; X64-LABEL: i32_shuf_W00W:
; X64: ## BB#0:
-; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[3],zero,zero,xmm0[3]
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; X64-NEXT: pxor %xmm0, %xmm0
+; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
; X64-NEXT: retq
%vecext = extractelement <4 x i32> %x, i32 3
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
@@ -697,14 +724,16 @@ define <4 x i32> @i32_shuf_X00A(<4 x i32> %x, <4 x i32> %a) {
; X32: ## BB#0:
; X32-NEXT: pxor %xmm2, %xmm2
; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
-; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
+; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
; X32-NEXT: retl
;
; X64-LABEL: i32_shuf_X00A:
; X64: ## BB#0:
; X64-NEXT: pxor %xmm2, %xmm2
; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
-; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
+; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
; X64-NEXT: retq
%vecext = extractelement <4 x i32> %x, i32 0
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
@@ -717,18 +746,16 @@ define <4 x i32> @i32_shuf_X00A(<4 x i32> %x, <4 x i32> %a) {
define <4 x i32> @i32_shuf_X00X(<4 x i32> %x, <4 x i32> %a) {
; X32-LABEL: i32_shuf_X00X:
; X32: ## BB#0:
-; X32-NEXT: pxor %xmm1, %xmm1
-; X32-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; X32-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[0]
-; X32-NEXT: movaps %xmm1, %xmm0
+; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,2,0]
+; X32-NEXT: pxor %xmm0, %xmm0
+; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
; X32-NEXT: retl
;
; X64-LABEL: i32_shuf_X00X:
; X64: ## BB#0:
-; X64-NEXT: pxor %xmm1, %xmm1
-; X64-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; X64-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[0]
-; X64-NEXT: movaps %xmm1, %xmm0
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,2,0]
+; X64-NEXT: pxor %xmm0, %xmm0
+; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
; X64-NEXT: retq
%vecext = extractelement <4 x i32> %x, i32 0
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
@@ -741,18 +768,16 @@ define <4 x i32> @i32_shuf_X00X(<4 x i32> %x, <4 x i32> %a) {
define <4 x i32> @i32_shuf_X0YC(<4 x i32> %x, <4 x i32> %a) {
; X32-LABEL: i32_shuf_X0YC:
; X32: ## BB#0:
-; X32-NEXT: pmovzxdq %xmm0, %xmm2
-; X32-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1],xmm0[1],zero
-; X32-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm1[2]
-; X32-NEXT: movaps %xmm2, %xmm0
+; X32-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
+; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,2,2]
+; X32-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7]
; X32-NEXT: retl
;
; X64-LABEL: i32_shuf_X0YC:
; X64: ## BB#0:
-; X64-NEXT: pmovzxdq %xmm0, %xmm2
-; X64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1],xmm0[1],zero
-; X64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm1[2]
-; X64-NEXT: movaps %xmm2, %xmm0
+; X64-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,2,2]
+; X64-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3,4,5],xmm0[6,7]
; X64-NEXT: retq
%vecext = extractelement <4 x i32> %x, i32 0
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
@@ -814,14 +839,14 @@ define <4 x float> @insertps_from_vector_load(<4 x float> %a, <4 x float>* nocap
; X32-LABEL: insertps_from_vector_load:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: insertps $48, (%eax), %xmm0
+; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_vector_load:
; X64: ## BB#0:
-; X64-NEXT: insertps $48, (%rdi), %xmm0
+; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
; X64-NEXT: retq
- %1 = load <4 x float>* %pb, align 16
+ %1 = load <4 x float>, <4 x float>* %pb, align 16
%2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 48)
ret <4 x float> %2
}
@@ -832,14 +857,14 @@ define <4 x float> @insertps_from_vector_load_offset(<4 x float> %a, <4 x float>
; X32-LABEL: insertps_from_vector_load_offset:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: insertps $96, 4(%eax), %xmm0
+; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[1],xmm0[3]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_vector_load_offset:
; X64: ## BB#0:
-; X64-NEXT: insertps $96, 4(%rdi), %xmm0
+; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[1],xmm0[3]
; X64-NEXT: retq
- %1 = load <4 x float>* %pb, align 16
+ %1 = load <4 x float>, <4 x float>* %pb, align 16
%2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 96)
ret <4 x float> %2
}
@@ -851,16 +876,16 @@ define <4 x float> @insertps_from_vector_load_offset_2(<4 x float> %a, <4 x floa
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: shll $4, %ecx
-; X32-NEXT: insertps $-64, 12(%eax,%ecx), %xmm0
+; X32-NEXT: insertps {{.*#+}} xmm0 = mem[3],xmm0[1,2,3]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_vector_load_offset_2:
; X64: ## BB#0:
; X64-NEXT: shlq $4, %rsi
-; X64-NEXT: insertps $-64, 12(%rdi,%rsi), %xmm0
+; X64-NEXT: insertps {{.*#+}} xmm0 = mem[3],xmm0[1,2,3]
; X64-NEXT: retq
- %1 = getelementptr inbounds <4 x float>* %pb, i64 %index
- %2 = load <4 x float>* %1, align 16
+ %1 = getelementptr inbounds <4 x float>, <4 x float>* %pb, i64 %index
+ %2 = load <4 x float>, <4 x float>* %1, align 16
%3 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %2, i32 192)
ret <4 x float> %3
}
@@ -870,19 +895,19 @@ define <4 x float> @insertps_from_broadcast_loadf32(<4 x float> %a, float* nocap
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movss (%ecx,%eax,4), %xmm1
+; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; X32-NEXT: retl
;
; X64-LABEL: insertps_from_broadcast_loadf32:
; X64: ## BB#0:
-; X64-NEXT: movss (%rdi,%rsi,4), %xmm1
+; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; X64-NEXT: retq
- %1 = getelementptr inbounds float* %fb, i64 %index
- %2 = load float* %1, align 4
+ %1 = getelementptr inbounds float, float* %fb, i64 %index
+ %2 = load float, float* %1, align 4
%3 = insertelement <4 x float> undef, float %2, i32 0
%4 = insertelement <4 x float> %3, float %2, i32 1
%5 = insertelement <4 x float> %4, float %2, i32 2
@@ -906,7 +931,7 @@ define <4 x float> @insertps_from_broadcast_loadv4f32(<4 x float> %a, <4 x float
; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; X64-NEXT: retq
- %1 = load <4 x float>* %b, align 4
+ %1 = load <4 x float>, <4 x float>* %b, align 4
%2 = extractelement <4 x float> %1, i32 0
%3 = insertelement <4 x float> undef, float %2, i32 0
%4 = insertelement <4 x float> %3, float %2, i32 1
@@ -922,7 +947,7 @@ define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x fl
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movss (%ecx,%eax,4), %xmm4
+; X32-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero
; X32-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,0,0,0]
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[0]
; X32-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0]
@@ -935,7 +960,7 @@ define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x fl
;
; X64-LABEL: insertps_from_broadcast_multiple_use:
; X64: ## BB#0:
-; X64-NEXT: movss (%rdi,%rsi,4), %xmm4
+; X64-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero
; X64-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,0,0,0]
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[0]
; X64-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0]
@@ -945,8 +970,8 @@ define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x fl
; X64-NEXT: addps %xmm2, %xmm3
; X64-NEXT: addps %xmm3, %xmm0
; X64-NEXT: retq
- %1 = getelementptr inbounds float* %fb, i64 %index
- %2 = load float* %1, align 4
+ %1 = getelementptr inbounds float, float* %fb, i64 %index
+ %2 = load float, float* %1, align 4
%3 = insertelement <4 x float> undef, float %2, i32 0
%4 = insertelement <4 x float> %3, float %2, i32 1
%5 = insertelement <4 x float> %4, float %2, i32 2
@@ -965,18 +990,18 @@ define <4 x float> @insertps_with_undefs(<4 x float> %a, float* %b) {
; X32-LABEL: insertps_with_undefs:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movss (%eax), %xmm1
-; X32-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],zero,xmm0[0],xmm1[3]
-; X32-NEXT: movaps %xmm1, %xmm0
+; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; X32-NEXT: movapd %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: insertps_with_undefs:
; X64: ## BB#0:
-; X64-NEXT: movss (%rdi), %xmm1
-; X64-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],zero,xmm0[0],xmm1[3]
-; X64-NEXT: movaps %xmm1, %xmm0
+; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; X64-NEXT: movapd %xmm1, %xmm0
; X64-NEXT: retq
- %1 = load float* %b, align 4
+ %1 = load float, float* %b, align 4
%2 = insertelement <4 x float> undef, float %1, i32 0
%result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32 4, i32 undef, i32 0, i32 7>
ret <4 x float> %result
@@ -988,38 +1013,35 @@ define <4 x float> @pr20087(<4 x float> %a, <4 x float> *%ptr) {
; X32-LABEL: pr20087:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: insertps $-78, 8(%eax), %xmm0
+; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],mem[2]
; X32-NEXT: retl
;
; X64-LABEL: pr20087:
; X64: ## BB#0:
-; X64-NEXT: insertps $-78, 8(%rdi), %xmm0
+; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],mem[2]
; X64-NEXT: retq
- %load = load <4 x float> *%ptr
+ %load = load <4 x float> , <4 x float> *%ptr
%ret = shufflevector <4 x float> %load, <4 x float> %a, <4 x i32> <i32 4, i32 undef, i32 6, i32 2>
ret <4 x float> %ret
}
; Edge case for insertps where we end up with a shuffle with mask=<0, 7, -1, -1>
-define void @insertps_pr20411(i32* noalias nocapture %RET) #1 {
+define void @insertps_pr20411(<4 x i32> %shuffle109, <4 x i32> %shuffle116, i32* noalias nocapture %RET) #1 {
; X32-LABEL: insertps_pr20411:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: pshufd {{.*#+}} xmm0 = mem[3,1,2,3]
-; X32-NEXT: insertps $-36, LCPI49_1+12, %xmm0
-; X32-NEXT: movups %xmm0, (%eax)
+; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; X32-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; X32-NEXT: movdqu %xmm1, (%eax)
; X32-NEXT: retl
;
; X64-LABEL: insertps_pr20411:
; X64: ## BB#0:
-; X64-NEXT: pshufd {{.*#+}} xmm0 = mem[3,1,2,3]
-; X64-NEXT: insertps $-36, LCPI49_1+{{.*}}(%rip), %xmm0
-; X64-NEXT: movups %xmm0, (%rdi)
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; X64-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; X64-NEXT: movdqu %xmm1, (%rdi)
; X64-NEXT: retq
- %gather_load = shufflevector <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %shuffle109 = shufflevector <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; 4 5 6 7
- %shuffle116 = shufflevector <8 x i32> %gather_load, <8 x i32> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef> ; 3 x x x
- %shuffle117 = shufflevector <4 x i32> %shuffle109, <4 x i32> %shuffle116, <4 x i32> <i32 4, i32 3, i32 undef, i32 undef> ; 3 7 x x
+ %shuffle117 = shufflevector <4 x i32> %shuffle109, <4 x i32> %shuffle116, <4 x i32> <i32 0, i32 7, i32 undef, i32 undef>
%ptrcast = bitcast i32* %RET to <4 x i32>*
store <4 x i32> %shuffle117, <4 x i32>* %ptrcast, align 4
ret void
@@ -1027,12 +1049,12 @@ define void @insertps_pr20411(i32* noalias nocapture %RET) #1 {
define <4 x float> @insertps_4(<4 x float> %A, <4 x float> %B) {
; X32-LABEL: insertps_4:
-; X32: ## BB#0:
+; X32: ## BB#0: ## %entry
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2],zero
; X32-NEXT: retl
;
; X64-LABEL: insertps_4:
-; X64: ## BB#0:
+; X64: ## BB#0: ## %entry
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2],zero
; X64-NEXT: retq
entry:
@@ -1047,12 +1069,12 @@ entry:
define <4 x float> @insertps_5(<4 x float> %A, <4 x float> %B) {
; X32-LABEL: insertps_5:
-; X32: ## BB#0:
+; X32: ## BB#0: ## %entry
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[1],zero,zero
; X32-NEXT: retl
;
; X64-LABEL: insertps_5:
-; X64: ## BB#0:
+; X64: ## BB#0: ## %entry
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[1],zero,zero
; X64-NEXT: retq
entry:
@@ -1067,12 +1089,12 @@ entry:
define <4 x float> @insertps_6(<4 x float> %A, <4 x float> %B) {
; X32-LABEL: insertps_6:
-; X32: ## BB#0:
+; X32: ## BB#0: ## %entry
; X32-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[1],xmm1[2],zero
; X32-NEXT: retl
;
; X64-LABEL: insertps_6:
-; X64: ## BB#0:
+; X64: ## BB#0: ## %entry
; X64-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[1],xmm1[2],zero
; X64-NEXT: retq
entry:
@@ -1086,12 +1108,12 @@ entry:
define <4 x float> @insertps_7(<4 x float> %A, <4 x float> %B) {
; X32-LABEL: insertps_7:
-; X32: ## BB#0:
+; X32: ## BB#0: ## %entry
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[1],zero
; X32-NEXT: retl
;
; X64-LABEL: insertps_7:
-; X64: ## BB#0:
+; X64: ## BB#0: ## %entry
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[1],zero
; X64-NEXT: retq
entry:
@@ -1106,12 +1128,12 @@ entry:
define <4 x float> @insertps_8(<4 x float> %A, <4 x float> %B) {
; X32-LABEL: insertps_8:
-; X32: ## BB#0:
+; X32: ## BB#0: ## %entry
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; X32-NEXT: retl
;
; X64-LABEL: insertps_8:
-; X64: ## BB#0:
+; X64: ## BB#0: ## %entry
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; X64-NEXT: retq
entry:
@@ -1126,13 +1148,13 @@ entry:
define <4 x float> @insertps_9(<4 x float> %A, <4 x float> %B) {
; X32-LABEL: insertps_9:
-; X32: ## BB#0:
+; X32: ## BB#0: ## %entry
; X32-NEXT: insertps {{.*#+}} xmm1 = zero,xmm0[0],xmm1[2],zero
; X32-NEXT: movaps %xmm1, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: insertps_9:
-; X64: ## BB#0:
+; X64: ## BB#0: ## %entry
; X64-NEXT: insertps {{.*#+}} xmm1 = zero,xmm0[0],xmm1[2],zero
; X64-NEXT: movaps %xmm1, %xmm0
; X64-NEXT: retq
@@ -1146,7 +1168,6 @@ entry:
}
define <4 x float> @insertps_10(<4 x float> %A)
-{
; X32-LABEL: insertps_10:
; X32: ## BB#0:
; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0],zero
@@ -1156,6 +1177,7 @@ define <4 x float> @insertps_10(<4 x float> %A)
; X64: ## BB#0:
; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0],zero
; X64-NEXT: retq
+{
%vecext = extractelement <4 x float> %A, i32 0
%vecbuild1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %vecext, i32 0
%vecbuild2 = insertelement <4 x float> %vecbuild1, float %vecext, i32 2
@@ -1164,13 +1186,13 @@ define <4 x float> @insertps_10(<4 x float> %A)
define <4 x float> @build_vector_to_shuffle_1(<4 x float> %A) {
; X32-LABEL: build_vector_to_shuffle_1:
-; X32: ## BB#0:
+; X32: ## BB#0: ## %entry
; X32-NEXT: xorps %xmm1, %xmm1
; X32-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
; X32-NEXT: retl
;
; X64-LABEL: build_vector_to_shuffle_1:
-; X64: ## BB#0:
+; X64: ## BB#0: ## %entry
; X64-NEXT: xorps %xmm1, %xmm1
; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
; X64-NEXT: retq
@@ -1184,13 +1206,13 @@ entry:
define <4 x float> @build_vector_to_shuffle_2(<4 x float> %A) {
; X32-LABEL: build_vector_to_shuffle_2:
-; X32: ## BB#0:
+; X32: ## BB#0: ## %entry
; X32-NEXT: xorps %xmm1, %xmm1
; X32-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
; X32-NEXT: retl
;
; X64-LABEL: build_vector_to_shuffle_2:
-; X64: ## BB#0:
+; X64: ## BB#0: ## %entry
; X64-NEXT: xorps %xmm1, %xmm1
; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
; X64-NEXT: retq
diff --git a/test/CodeGen/X86/sse42-intrinsics-x86.ll b/test/CodeGen/X86/sse42-intrinsics-x86.ll
index 5ca800982827..706c86b71a4a 100644
--- a/test/CodeGen/X86/sse42-intrinsics-x86.ll
+++ b/test/CodeGen/X86/sse42-intrinsics-x86.ll
@@ -16,8 +16,8 @@ define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
; CHECK: movl $7
; CHECK: pcmpestri $7, (
; CHECK: movl
- %1 = load <16 x i8>* %a0
- %2 = load <16 x i8>* %a2
+ %1 = load <16 x i8>, <16 x i8>* %a0
+ %2 = load <16 x i8>, <16 x i8>* %a2
%res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -94,7 +94,7 @@ define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2
; CHECK: movl $7
; CHECK: pcmpestrm $7,
; CHECK-NOT: vmov
- %1 = load <16 x i8>* %a2
+ %1 = load <16 x i8>, <16 x i8>* %a2
%res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
@@ -112,8 +112,8 @@ declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind read
define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
; CHECK: pcmpistri $7, (
; CHECK: movl
- %1 = load <16 x i8>* %a0
- %2 = load <16 x i8>* %a1
+ %1 = load <16 x i8>, <16 x i8>* %a0
+ %2 = load <16 x i8>, <16 x i8>* %a1
%res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1]
ret i32 %res
}
@@ -176,7 +176,7 @@ declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwin
define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) {
; CHECK: pcmpistrm $7, (
; CHECK-NOT: vmov
- %1 = load <16 x i8>* %a1
+ %1 = load <16 x i8>, <16 x i8>* %a1
%res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}
diff --git a/test/CodeGen/X86/sse4a.ll b/test/CodeGen/X86/sse4a.ll
index 165d47639d7a..f8fa125f98e7 100644
--- a/test/CodeGen/X86/sse4a.ll
+++ b/test/CodeGen/X86/sse4a.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse4a | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux -mattr=sse4a | FileCheck %s
define void @test1(i8* %p, <4 x float> %a) nounwind optsize ssp {
; CHECK-LABEL: test1:
diff --git a/test/CodeGen/X86/sse_partial_update.ll b/test/CodeGen/X86/sse_partial_update.ll
index a88ab014641b..377c3b7d6ead 100644
--- a/test/CodeGen/X86/sse_partial_update.ll
+++ b/test/CodeGen/X86/sse_partial_update.ll
@@ -12,7 +12,7 @@ entry:
; CHECK-LABEL: rsqrtss:
; CHECK: rsqrtss %xmm0, %xmm0
; CHECK-NEXT: cvtss2sd %xmm0
-; CHECK-NEXT: shufps
+; CHECK-NEXT: movshdup
; CHECK-NEXT: cvtss2sd %xmm0
; CHECK-NEXT: movap
; CHECK-NEXT: jmp
@@ -33,7 +33,7 @@ entry:
; CHECK-LABEL: rcpss:
; CHECK: rcpss %xmm0, %xmm0
; CHECK-NEXT: cvtss2sd %xmm0
-; CHECK-NEXT: shufps
+; CHECK-NEXT: movshdup
; CHECK-NEXT: cvtss2sd %xmm0
; CHECK-NEXT: movap
; CHECK-NEXT: jmp
@@ -53,7 +53,7 @@ entry:
; CHECK-LABEL: sqrtss:
; CHECK: sqrtss %xmm0, %xmm0
; CHECK-NEXT: cvtss2sd %xmm0
-; CHECK-NEXT: shufps
+; CHECK-NEXT: movshdup
; CHECK-NEXT: cvtss2sd %xmm0
; CHECK-NEXT: movap
; CHECK-NEXT: jmp
@@ -67,3 +67,26 @@ entry:
ret void
}
declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+define void @sqrtsd(<2 x double> %a) nounwind uwtable ssp {
+entry:
+; CHECK-LABEL: sqrtsd:
+; CHECK: sqrtsd %xmm0, %xmm0
+; CHECK-NEXT: cvtsd2ss %xmm0
+; CHECK-NEXT: shufpd
+; CHECK-NEXT: cvtsd2ss %xmm0
+; CHECK-NEXT: movap
+; CHECK-NEXT: jmp
+
+ %0 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a) nounwind
+ %a0 = extractelement <2 x double> %0, i32 0
+ %conv = fptrunc double %a0 to float
+ %a1 = extractelement <2 x double> %0, i32 1
+ %conv3 = fptrunc double %a1 to float
+ tail call void @callee2(float %conv, float %conv3) nounwind
+ ret void
+}
+
+declare void @callee2(float, float)
+declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
+
diff --git a/test/CodeGen/X86/ssp-data-layout.ll b/test/CodeGen/X86/ssp-data-layout.ll
index e76ad7b871ba..4a63aceb7ccf 100644
--- a/test/CodeGen/X86/ssp-data-layout.ll
+++ b/test/CodeGen/X86/ssp-data-layout.ll
@@ -114,57 +114,57 @@ entry:
store i32 %call3, i32* %ptr, align 4
call void @end_addrof()
%call4 = call signext i16 @get_small_nonchar()
- %arrayidx = getelementptr inbounds [2 x i16]* %small2, i32 0, i64 0
+ %arrayidx = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i64 0
store i16 %call4, i16* %arrayidx, align 2
call void @end_small_nonchar()
%call5 = call i32 @get_large_nonchar()
- %arrayidx6 = getelementptr inbounds [8 x i32]* %large2, i32 0, i64 0
+ %arrayidx6 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i64 0
store i32 %call5, i32* %arrayidx6, align 4
call void @end_large_nonchar()
%call7 = call signext i8 @get_small_char()
- %arrayidx8 = getelementptr inbounds [2 x i8]* %small, i32 0, i64 0
+ %arrayidx8 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i64 0
store i8 %call7, i8* %arrayidx8, align 1
call void @end_small_char()
%call9 = call signext i8 @get_large_char()
- %arrayidx10 = getelementptr inbounds [8 x i8]* %large, i32 0, i64 0
+ %arrayidx10 = getelementptr inbounds [8 x i8], [8 x i8]* %large, i32 0, i64 0
store i8 %call9, i8* %arrayidx10, align 1
call void @end_large_char()
%call11 = call signext i8 @get_struct_large_char()
- %foo = getelementptr inbounds %struct.struct_large_char* %a, i32 0, i32 0
- %arrayidx12 = getelementptr inbounds [8 x i8]* %foo, i32 0, i64 0
+ %foo = getelementptr inbounds %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
+ %arrayidx12 = getelementptr inbounds [8 x i8], [8 x i8]* %foo, i32 0, i64 0
store i8 %call11, i8* %arrayidx12, align 1
call void @end_struct_large_char()
%call13 = call signext i8 @get_struct_small_char()
- %foo14 = getelementptr inbounds %struct.struct_small_char* %b, i32 0, i32 0
- %arrayidx15 = getelementptr inbounds [2 x i8]* %foo14, i32 0, i64 0
+ %foo14 = getelementptr inbounds %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
+ %arrayidx15 = getelementptr inbounds [2 x i8], [2 x i8]* %foo14, i32 0, i64 0
store i8 %call13, i8* %arrayidx15, align 1
call void @end_struct_small_char()
%call16 = call i32 @get_struct_large_nonchar()
- %foo17 = getelementptr inbounds %struct.struct_large_nonchar* %c, i32 0, i32 0
- %arrayidx18 = getelementptr inbounds [8 x i32]* %foo17, i32 0, i64 0
+ %foo17 = getelementptr inbounds %struct.struct_large_nonchar, %struct.struct_large_nonchar* %c, i32 0, i32 0
+ %arrayidx18 = getelementptr inbounds [8 x i32], [8 x i32]* %foo17, i32 0, i64 0
store i32 %call16, i32* %arrayidx18, align 4
call void @end_struct_large_nonchar()
%call19 = call signext i16 @get_struct_small_nonchar()
- %foo20 = getelementptr inbounds %struct.struct_small_nonchar* %d, i32 0, i32 0
- %arrayidx21 = getelementptr inbounds [2 x i16]* %foo20, i32 0, i64 0
+ %foo20 = getelementptr inbounds %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
+ %arrayidx21 = getelementptr inbounds [2 x i16], [2 x i16]* %foo20, i32 0, i64 0
store i16 %call19, i16* %arrayidx21, align 2
call void @end_struct_small_nonchar()
- %arraydecay = getelementptr inbounds [8 x i8]* %large, i32 0, i32 0
- %arraydecay22 = getelementptr inbounds [2 x i8]* %small, i32 0, i32 0
- %arraydecay23 = getelementptr inbounds [8 x i32]* %large2, i32 0, i32 0
- %arraydecay24 = getelementptr inbounds [2 x i16]* %small2, i32 0, i32 0
- %0 = load i32* %x, align 4
- %1 = load i32* %y, align 4
- %2 = load i32* %z, align 4
- %coerce.dive = getelementptr %struct.struct_large_char* %a, i32 0, i32 0
+ %arraydecay = getelementptr inbounds [8 x i8], [8 x i8]* %large, i32 0, i32 0
+ %arraydecay22 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i32 0
+ %arraydecay23 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i32 0
+ %arraydecay24 = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i32 0
+ %0 = load i32, i32* %x, align 4
+ %1 = load i32, i32* %y, align 4
+ %2 = load i32, i32* %z, align 4
+ %coerce.dive = getelementptr %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
%3 = bitcast [8 x i8]* %coerce.dive to i64*
- %4 = load i64* %3, align 1
- %coerce.dive25 = getelementptr %struct.struct_small_char* %b, i32 0, i32 0
+ %4 = load i64, i64* %3, align 1
+ %coerce.dive25 = getelementptr %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
%5 = bitcast [2 x i8]* %coerce.dive25 to i16*
- %6 = load i16* %5, align 1
- %coerce.dive26 = getelementptr %struct.struct_small_nonchar* %d, i32 0, i32 0
+ %6 = load i16, i16* %5, align 1
+ %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
%7 = bitcast [2 x i16]* %coerce.dive26 to i32*
- %8 = load i32* %7, align 1
+ %8 = load i32, i32* %7, align 1
call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
ret void
}
@@ -258,57 +258,57 @@ entry:
store i32 %call3, i32* %ptr, align 4
call void @end_addrof()
%call4 = call signext i16 @get_small_nonchar()
- %arrayidx = getelementptr inbounds [2 x i16]* %small2, i32 0, i64 0
+ %arrayidx = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i64 0
store i16 %call4, i16* %arrayidx, align 2
call void @end_small_nonchar()
%call5 = call i32 @get_large_nonchar()
- %arrayidx6 = getelementptr inbounds [8 x i32]* %large2, i32 0, i64 0
+ %arrayidx6 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i64 0
store i32 %call5, i32* %arrayidx6, align 4
call void @end_large_nonchar()
%call7 = call signext i8 @get_small_char()
- %arrayidx8 = getelementptr inbounds [2 x i8]* %small, i32 0, i64 0
+ %arrayidx8 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i64 0
store i8 %call7, i8* %arrayidx8, align 1
call void @end_small_char()
%call9 = call signext i8 @get_large_char()
- %arrayidx10 = getelementptr inbounds [8 x i8]* %large, i32 0, i64 0
+ %arrayidx10 = getelementptr inbounds [8 x i8], [8 x i8]* %large, i32 0, i64 0
store i8 %call9, i8* %arrayidx10, align 1
call void @end_large_char()
%call11 = call signext i8 @get_struct_large_char()
- %foo = getelementptr inbounds %struct.struct_large_char* %a, i32 0, i32 0
- %arrayidx12 = getelementptr inbounds [8 x i8]* %foo, i32 0, i64 0
+ %foo = getelementptr inbounds %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
+ %arrayidx12 = getelementptr inbounds [8 x i8], [8 x i8]* %foo, i32 0, i64 0
store i8 %call11, i8* %arrayidx12, align 1
call void @end_struct_large_char()
%call13 = call signext i8 @get_struct_small_char()
- %foo14 = getelementptr inbounds %struct.struct_small_char* %b, i32 0, i32 0
- %arrayidx15 = getelementptr inbounds [2 x i8]* %foo14, i32 0, i64 0
+ %foo14 = getelementptr inbounds %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
+ %arrayidx15 = getelementptr inbounds [2 x i8], [2 x i8]* %foo14, i32 0, i64 0
store i8 %call13, i8* %arrayidx15, align 1
call void @end_struct_small_char()
%call16 = call i32 @get_struct_large_nonchar()
- %foo17 = getelementptr inbounds %struct.struct_large_nonchar* %c, i32 0, i32 0
- %arrayidx18 = getelementptr inbounds [8 x i32]* %foo17, i32 0, i64 0
+ %foo17 = getelementptr inbounds %struct.struct_large_nonchar, %struct.struct_large_nonchar* %c, i32 0, i32 0
+ %arrayidx18 = getelementptr inbounds [8 x i32], [8 x i32]* %foo17, i32 0, i64 0
store i32 %call16, i32* %arrayidx18, align 4
call void @end_struct_large_nonchar()
%call19 = call signext i16 @get_struct_small_nonchar()
- %foo20 = getelementptr inbounds %struct.struct_small_nonchar* %d, i32 0, i32 0
- %arrayidx21 = getelementptr inbounds [2 x i16]* %foo20, i32 0, i64 0
+ %foo20 = getelementptr inbounds %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
+ %arrayidx21 = getelementptr inbounds [2 x i16], [2 x i16]* %foo20, i32 0, i64 0
store i16 %call19, i16* %arrayidx21, align 2
call void @end_struct_small_nonchar()
- %arraydecay = getelementptr inbounds [8 x i8]* %large, i32 0, i32 0
- %arraydecay22 = getelementptr inbounds [2 x i8]* %small, i32 0, i32 0
- %arraydecay23 = getelementptr inbounds [8 x i32]* %large2, i32 0, i32 0
- %arraydecay24 = getelementptr inbounds [2 x i16]* %small2, i32 0, i32 0
- %0 = load i32* %x, align 4
- %1 = load i32* %y, align 4
- %2 = load i32* %z, align 4
- %coerce.dive = getelementptr %struct.struct_large_char* %a, i32 0, i32 0
+ %arraydecay = getelementptr inbounds [8 x i8], [8 x i8]* %large, i32 0, i32 0
+ %arraydecay22 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i32 0
+ %arraydecay23 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i32 0
+ %arraydecay24 = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i32 0
+ %0 = load i32, i32* %x, align 4
+ %1 = load i32, i32* %y, align 4
+ %2 = load i32, i32* %z, align 4
+ %coerce.dive = getelementptr %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
%3 = bitcast [8 x i8]* %coerce.dive to i64*
- %4 = load i64* %3, align 1
- %coerce.dive25 = getelementptr %struct.struct_small_char* %b, i32 0, i32 0
+ %4 = load i64, i64* %3, align 1
+ %coerce.dive25 = getelementptr %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
%5 = bitcast [2 x i8]* %coerce.dive25 to i16*
- %6 = load i16* %5, align 1
- %coerce.dive26 = getelementptr %struct.struct_small_nonchar* %d, i32 0, i32 0
+ %6 = load i16, i16* %5, align 1
+ %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
%7 = bitcast [2 x i16]* %coerce.dive26 to i32*
- %8 = load i32* %7, align 1
+ %8 = load i32, i32* %7, align 1
call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
ret void
}
@@ -390,57 +390,57 @@ entry:
store i32 %call3, i32* %ptr, align 4
call void @end_addrof()
%call4 = call signext i16 @get_small_nonchar()
- %arrayidx = getelementptr inbounds [2 x i16]* %small2, i32 0, i64 0
+ %arrayidx = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i64 0
store i16 %call4, i16* %arrayidx, align 2
call void @end_small_nonchar()
%call5 = call i32 @get_large_nonchar()
- %arrayidx6 = getelementptr inbounds [8 x i32]* %large2, i32 0, i64 0
+ %arrayidx6 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i64 0
store i32 %call5, i32* %arrayidx6, align 4
call void @end_large_nonchar()
%call7 = call signext i8 @get_small_char()
- %arrayidx8 = getelementptr inbounds [2 x i8]* %small, i32 0, i64 0
+ %arrayidx8 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i64 0
store i8 %call7, i8* %arrayidx8, align 1
call void @end_small_char()
%call9 = call signext i8 @get_large_char()
- %arrayidx10 = getelementptr inbounds [8 x i8]* %large, i32 0, i64 0
+ %arrayidx10 = getelementptr inbounds [8 x i8], [8 x i8]* %large, i32 0, i64 0
store i8 %call9, i8* %arrayidx10, align 1
call void @end_large_char()
%call11 = call signext i8 @get_struct_large_char()
- %foo = getelementptr inbounds %struct.struct_large_char* %a, i32 0, i32 0
- %arrayidx12 = getelementptr inbounds [8 x i8]* %foo, i32 0, i64 0
+ %foo = getelementptr inbounds %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
+ %arrayidx12 = getelementptr inbounds [8 x i8], [8 x i8]* %foo, i32 0, i64 0
store i8 %call11, i8* %arrayidx12, align 1
call void @end_struct_large_char()
%call13 = call signext i8 @get_struct_small_char()
- %foo14 = getelementptr inbounds %struct.struct_small_char* %b, i32 0, i32 0
- %arrayidx15 = getelementptr inbounds [2 x i8]* %foo14, i32 0, i64 0
+ %foo14 = getelementptr inbounds %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
+ %arrayidx15 = getelementptr inbounds [2 x i8], [2 x i8]* %foo14, i32 0, i64 0
store i8 %call13, i8* %arrayidx15, align 1
call void @end_struct_small_char()
%call16 = call i32 @get_struct_large_nonchar()
- %foo17 = getelementptr inbounds %struct.struct_large_nonchar* %c, i32 0, i32 0
- %arrayidx18 = getelementptr inbounds [8 x i32]* %foo17, i32 0, i64 0
+ %foo17 = getelementptr inbounds %struct.struct_large_nonchar, %struct.struct_large_nonchar* %c, i32 0, i32 0
+ %arrayidx18 = getelementptr inbounds [8 x i32], [8 x i32]* %foo17, i32 0, i64 0
store i32 %call16, i32* %arrayidx18, align 4
call void @end_struct_large_nonchar()
%call19 = call signext i16 @get_struct_small_nonchar()
- %foo20 = getelementptr inbounds %struct.struct_small_nonchar* %d, i32 0, i32 0
- %arrayidx21 = getelementptr inbounds [2 x i16]* %foo20, i32 0, i64 0
+ %foo20 = getelementptr inbounds %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
+ %arrayidx21 = getelementptr inbounds [2 x i16], [2 x i16]* %foo20, i32 0, i64 0
store i16 %call19, i16* %arrayidx21, align 2
call void @end_struct_small_nonchar()
- %arraydecay = getelementptr inbounds [8 x i8]* %large, i32 0, i32 0
- %arraydecay22 = getelementptr inbounds [2 x i8]* %small, i32 0, i32 0
- %arraydecay23 = getelementptr inbounds [8 x i32]* %large2, i32 0, i32 0
- %arraydecay24 = getelementptr inbounds [2 x i16]* %small2, i32 0, i32 0
- %0 = load i32* %x, align 4
- %1 = load i32* %y, align 4
- %2 = load i32* %z, align 4
- %coerce.dive = getelementptr %struct.struct_large_char* %a, i32 0, i32 0
+ %arraydecay = getelementptr inbounds [8 x i8], [8 x i8]* %large, i32 0, i32 0
+ %arraydecay22 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i32 0
+ %arraydecay23 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i32 0
+ %arraydecay24 = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i32 0
+ %0 = load i32, i32* %x, align 4
+ %1 = load i32, i32* %y, align 4
+ %2 = load i32, i32* %z, align 4
+ %coerce.dive = getelementptr %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
%3 = bitcast [8 x i8]* %coerce.dive to i64*
- %4 = load i64* %3, align 1
- %coerce.dive25 = getelementptr %struct.struct_small_char* %b, i32 0, i32 0
+ %4 = load i64, i64* %3, align 1
+ %coerce.dive25 = getelementptr %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
%5 = bitcast [2 x i8]* %coerce.dive25 to i16*
- %6 = load i16* %5, align 1
- %coerce.dive26 = getelementptr %struct.struct_small_nonchar* %d, i32 0, i32 0
+ %6 = load i16, i16* %5, align 1
+ %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
%7 = bitcast [2 x i16]* %coerce.dive26 to i32*
- %8 = load i32* %7, align 1
+ %8 = load i32, i32* %7, align 1
call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
ret void
}
@@ -461,11 +461,11 @@ entry:
store i32 %call, i32* %x, align 4
call void @end_scalar1()
%call1 = call signext i8 @get_large_char()
- %arrayidx = getelementptr inbounds [8 x i8]* %large, i32 0, i64 0
+ %arrayidx = getelementptr inbounds [8 x i8], [8 x i8]* %large, i32 0, i64 0
store i8 %call1, i8* %arrayidx, align 1
call void @end_large_char()
- %0 = load i32* %x, align 4
- %arraydecay = getelementptr inbounds [8 x i8]* %large, i32 0, i32 0
+ %0 = load i32, i32* %x, align 4
+ %arraydecay = getelementptr inbounds [8 x i8], [8 x i8]* %large, i32 0, i32 0
call void @takes_two(i32 %0, i8* %arraydecay)
ret void
}
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index eafb7c29fa0a..0cff95f266a9 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -1,7 +1,10 @@
; RUN: llc < %s -relocation-model=static -mcpu=yonah | FileCheck %s
-; The double argument is at 4(esp) which is 16-byte aligned, allowing us to
-; fold the load into the andpd.
+; The double argument is at 4(esp) which is 16-byte aligned, but we
+; are required to read in extra bytes of memory in order to fold the
+; load. Bad Things may happen when reading/processing undefined bytes,
+; so don't fold the load.
+; PR22371 / http://reviews.llvm.org/D7474
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i686-apple-darwin8"
@@ -9,28 +12,37 @@ target triple = "i686-apple-darwin8"
define void @test({ double, double }* byval %z, double* %P) nounwind {
entry:
- %tmp3 = load double* @G, align 16 ; <double> [#uses=1]
+ %tmp3 = load double, double* @G, align 16 ; <double> [#uses=1]
%tmp4 = tail call double @fabs( double %tmp3 ) readnone ; <double> [#uses=1]
store volatile double %tmp4, double* %P
- %tmp = getelementptr { double, double }* %z, i32 0, i32 0 ; <double*> [#uses=1]
- %tmp1 = load volatile double* %tmp, align 8 ; <double> [#uses=1]
+ %tmp = getelementptr { double, double }, { double, double }* %z, i32 0, i32 0 ; <double*> [#uses=1]
+ %tmp1 = load volatile double, double* %tmp, align 8 ; <double> [#uses=1]
%tmp2 = tail call double @fabs( double %tmp1 ) readnone ; <double> [#uses=1]
- ; CHECK: andpd{{.*}}4(%esp), %xmm
%tmp6 = fadd double %tmp4, %tmp2 ; <double> [#uses=1]
store volatile double %tmp6, double* %P, align 8
ret void
+
+; CHECK-LABEL: test:
+; CHECK: movsd {{.*}}G, %xmm{{.*}}
+; CHECK: andpd %xmm{{.*}}, %xmm{{.*}}
+; CHECK: movsd 4(%esp), %xmm{{.*}}
+; CHECK: andpd %xmm{{.*}}, %xmm{{.*}}
+
+
}
define void @test2() alignstack(16) nounwind {
entry:
- ; CHECK: andl{{.*}}$-16, %esp
+; CHECK-LABEL: test2:
+; CHECK: andl{{.*}}$-16, %esp
ret void
}
; Use a call to force a spill.
define <2 x double> @test3(<2 x double> %x, <2 x double> %y) alignstack(32) nounwind {
entry:
- ; CHECK: andl{{.*}}$-32, %esp
+; CHECK-LABEL: test3:
+; CHECK: andl{{.*}}$-32, %esp
call void @test2()
%A = fmul <2 x double> %x, %y
ret <2 x double> %A
diff --git a/test/CodeGen/X86/stack-folding-3dnow.ll b/test/CodeGen/X86/stack-folding-3dnow.ll
new file mode 100644
index 000000000000..955bf44999cb
--- /dev/null
+++ b/test/CodeGen/X86/stack-folding-3dnow.ll
@@ -0,0 +1,217 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+3dnow | FileCheck %s
+
+define x86_mmx @stack_fold_pavgusb(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pavgusb
+ ;CHECK: pavgusb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pf2id(x86_mmx %a) {
+ ;CHECK-LABEL: stack_fold_pf2id
+ ;CHECK: pf2id {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %a) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pf2iw(x86_mmx %a) {
+ ;CHECK-LABEL: stack_fold_pf2iw
+ ;CHECK: pf2iw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %a) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfacc(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pfacc
+ ;CHECK: pfacc {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfadd(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pfadd
+ ;CHECK: pfadd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfcmpeq(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pfcmpeq
+ ;CHECK: pfcmpeq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfcmpge(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pfcmpge
+ ;CHECK: pfcmpge {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfcmpgt(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pfcmpgt
+ ;CHECK: pfcmpgt {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfmax(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pfmax
+ ;CHECK: pfmax {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfmin(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pfmin
+ ;CHECK: pfmin {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfmul(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pfmul
+ ;CHECK: pfmul {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfnacc(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pfnacc
+ ;CHECK: pfnacc {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfpnacc(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pfpnacc
+ ;CHECK: pfpnacc {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfrcp(x86_mmx %a) {
+ ;CHECK-LABEL: stack_fold_pfrcp
+ ;CHECK: pfrcp {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %a) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfrcpit1(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pfrcpit1
+ ;CHECK: pfrcpit1 {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfrcpit2(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pfrcpit2
+ ;CHECK: pfrcpit2 {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfrsqit1(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pfrsqit1
+ ;CHECK: pfrsqit1 {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfrsqrt(x86_mmx %a) {
+ ;CHECK-LABEL: stack_fold_pfrsqrt
+ ;CHECK: pfrsqrt {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %a) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfsub(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pfsub
+ ;CHECK: pfsub {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pfsubr(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pfsubr
+ ;CHECK: pfsubr {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pi2fd(x86_mmx %a) {
+ ;CHECK-LABEL: stack_fold_pi2fd
+ ;CHECK: pi2fd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %a) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pi2fw(x86_mmx %a) {
+ ;CHECK-LABEL: stack_fold_pi2fw
+ ;CHECK: pi2fw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %a) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pmulhrw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pmulhrw
+ ;CHECK: pmulhrw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pswapd(x86_mmx %a) {
+ ;CHECK-LABEL: stack_fold_pswapd
+ ;CHECK: pswapd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %a) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone
diff --git a/test/CodeGen/X86/stack-folding-fp-avx1.ll b/test/CodeGen/X86/stack-folding-fp-avx1.ll
new file mode 100644
index 000000000000..c7c1fc946386
--- /dev/null
+++ b/test/CodeGen/X86/stack-folding-fp-avx1.ll
@@ -0,0 +1,1827 @@
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx,+f16c < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; Stack reload folding tests.
+;
+; By including a nop call with sideeffects we can force a partial register spill of the
+; relevant registers and check that the reload is correctly folded into the instruction.
+
+define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_addpd
+ ;CHECK: vaddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fadd <2 x double> %a0, %a1
+ ret <2 x double> %2
+}
+
+define <4 x double> @stack_fold_addpd_ymm(<4 x double> %a0, <4 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_addpd_ymm
+ ;CHECK: vaddpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fadd <4 x double> %a0, %a1
+ ret <4 x double> %2
+}
+
+define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_addps
+ ;CHECK: vaddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fadd <4 x float> %a0, %a1
+ ret <4 x float> %2
+}
+
+define <8 x float> @stack_fold_addps_ymm(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_addps_ymm
+ ;CHECK: vaddps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fadd <8 x float> %a0, %a1
+ ret <8 x float> %2
+}
+
+define double @stack_fold_addsd(double %a0, double %a1) {
+ ;CHECK-LABEL: stack_fold_addsd
+ ;CHECK: vaddsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fadd double %a0, %a1
+ ret double %2
+}
+
+define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_addsd_int
+ ;CHECK: vaddsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define float @stack_fold_addss(float %a0, float %a1) {
+ ;CHECK-LABEL: stack_fold_addss
+ ;CHECK: vaddss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fadd float %a0, %a1
+ ret float %2
+}
+
+define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_addss_int
+ ;CHECK: vaddss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_addsubpd
+ ;CHECK: vaddsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define <4 x double> @stack_fold_addsubpd_ymm(<4 x double> %a0, <4 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_addsubpd_ymm
+ ;CHECK: vaddsubpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1)
+ ret <4 x double> %2
+}
+declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+define <4 x float> @stack_fold_addsubps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_addsubps
+ ;CHECK: vaddsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x float> @stack_fold_addsubps_ymm(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_addsubps_ymm
+ ;CHECK: vaddsubps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1)
+ ret <8 x float> %2
+}
+declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_andnpd
+ ;CHECK: vandnpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <2 x double> %a0 to <2 x i64>
+ %3 = bitcast <2 x double> %a1 to <2 x i64>
+ %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
+ %5 = and <2 x i64> %4, %3
+ %6 = bitcast <2 x i64> %5 to <2 x double>
+ ; fadd forces execution domain
+ %7 = fadd <2 x double> %6, <double 0x0, double 0x0>
+ ret <2 x double> %7
+}
+
+define <4 x double> @stack_fold_andnpd_ymm(<4 x double> %a0, <4 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_andnpd_ymm
+ ;CHECK: vandnpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <4 x double> %a0 to <4 x i64>
+ %3 = bitcast <4 x double> %a1 to <4 x i64>
+ %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1>
+ %5 = and <4 x i64> %4, %3
+ %6 = bitcast <4 x i64> %5 to <4 x double>
+ ; fadd forces execution domain
+ %7 = fadd <4 x double> %6, <double 0x0, double 0x0, double 0x0, double 0x0>
+ ret <4 x double> %7
+}
+
+define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_andnps
+ ;CHECK: vandnps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <4 x float> %a0 to <2 x i64>
+ %3 = bitcast <4 x float> %a1 to <2 x i64>
+ %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
+ %5 = and <2 x i64> %4, %3
+ %6 = bitcast <2 x i64> %5 to <4 x float>
+ ; fadd forces execution domain
+ %7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <4 x float> %7
+}
+
+define <8 x float> @stack_fold_andnps_ymm(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_andnps_ymm
+ ;CHECK: vandnps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <8 x float> %a0 to <4 x i64>
+ %3 = bitcast <8 x float> %a1 to <4 x i64>
+ %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1>
+ %5 = and <4 x i64> %4, %3
+ %6 = bitcast <4 x i64> %5 to <8 x float>
+ ; fadd forces execution domain
+ %7 = fadd <8 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <8 x float> %7
+}
+
+define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_andpd
+ ;CHECK: vandpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <2 x double> %a0 to <2 x i64>
+ %3 = bitcast <2 x double> %a1 to <2 x i64>
+ %4 = and <2 x i64> %2, %3
+ %5 = bitcast <2 x i64> %4 to <2 x double>
+ ; fadd forces execution domain
+ %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
+ ret <2 x double> %6
+}
+
+define <4 x double> @stack_fold_andpd_ymm(<4 x double> %a0, <4 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_andpd_ymm
+ ;CHECK: vandpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <4 x double> %a0 to <4 x i64>
+ %3 = bitcast <4 x double> %a1 to <4 x i64>
+ %4 = and <4 x i64> %2, %3
+ %5 = bitcast <4 x i64> %4 to <4 x double>
+ ; fadd forces execution domain
+ %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0>
+ ret <4 x double> %6
+}
+
+define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_andps
+ ;CHECK: vandps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <4 x float> %a0 to <2 x i64>
+ %3 = bitcast <4 x float> %a1 to <2 x i64>
+ %4 = and <2 x i64> %2, %3
+ %5 = bitcast <2 x i64> %4 to <4 x float>
+ ; fadd forces execution domain
+ %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <4 x float> %6
+}
+
+define <8 x float> @stack_fold_andps_ymm(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_andps_ymm
+ ;CHECK: vandps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <8 x float> %a0 to <4 x i64>
+ %3 = bitcast <8 x float> %a1 to <4 x i64>
+ %4 = and <4 x i64> %2, %3
+ %5 = bitcast <4 x i64> %4 to <8 x float>
+ ; fadd forces execution domain
+ %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <8 x float> %6
+}
+
+define <2 x double> @stack_fold_blendpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_blendpd
+ ;CHECK: vblendpd $2, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = select <2 x i1> <i1 1, i1 0>, <2 x double> %a0, <2 x double> %a1
+ ret <2 x double> %2
+}
+
+define <4 x double> @stack_fold_blendpd_ymm(<4 x double> %a0, <4 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_blendpd_ymm
+ ;CHECK: vblendpd $6, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x double> %a0, <4 x double> %a1
+ ret <4 x double> %2
+}
+
+define <4 x float> @stack_fold_blendps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_blendps
+ ;CHECK: vblendps $6, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x float> %a0, <4 x float> %a1
+ ret <4 x float> %2
+}
+
+define <8 x float> @stack_fold_blendps_ymm(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_blendps_ymm
+ ;CHECK: vblendps $102, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = select <8 x i1> <i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1>, <8 x float> %a0, <8 x float> %a1
+ ret <8 x float> %2
+}
+
+define <2 x double> @stack_fold_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %c) {
+ ;CHECK-LABEL: stack_fold_blendvpd
+ ;CHECK: vblendvpd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a1, <2 x double> %c, <2 x double> %a0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+define <4 x double> @stack_fold_blendvpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> %c) {
+ ;CHECK-LABEL: stack_fold_blendvpd_ymm
+ ;CHECK: vblendvpd {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a1, <4 x double> %c, <4 x double> %a0)
+ ret <4 x double> %2
+}
+declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+
+define <4 x float> @stack_fold_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %c) {
+ ;CHECK-LABEL: stack_fold_blendvps
+ ;CHECK: vblendvps {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a1, <4 x float> %c, <4 x float> %a0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+define <8 x float> @stack_fold_blendvps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> %c) {
+ ;CHECK-LABEL: stack_fold_blendvps_ymm
+ ;CHECK: vblendvps {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a1, <8 x float> %c, <8 x float> %a0)
+ ret <8 x float> %2
+}
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_cmppd
+ ;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+define <4 x double> @stack_fold_cmppd_ymm(<4 x double> %a0, <4 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_cmppd_ymm
+ ;CHECK: vcmpeqpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 0)
+ ret <4 x double> %2
+}
+declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
+
+define <4 x float> @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_cmpps
+ ;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+define <8 x float> @stack_fold_cmpps_ymm(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_cmpps_ymm
+ ;CHECK: vcmpeqps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0)
+ ret <8 x float> %2
+}
+declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
+
+define i32 @stack_fold_cmpsd(double %a0, double %a1) {
+ ;CHECK-LABEL: stack_fold_cmpsd
+ ;CHECK: vcmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fcmp oeq double %a0, %a1
+ %3 = zext i1 %2 to i32
+ ret i32 %3
+}
+
+define <2 x double> @stack_fold_cmpsd_int(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_cmpsd_int
+ ;CHECK: vcmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+define i32 @stack_fold_cmpss(float %a0, float %a1) {
+ ;CHECK-LABEL: stack_fold_cmpss
+ ;CHECK: vcmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fcmp oeq float %a0, %a1
+ %3 = zext i1 %2 to i32
+ ret i32 %3
+}
+
+define <4 x float> @stack_fold_cmpss_int(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_cmpss_int
+ ;CHECK: vcmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
+
+; TODO stack_fold_comisd
+
+define i32 @stack_fold_comisd_int(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_comisd_int
+ ;CHECK: vcomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+; TODO stack_fold_comiss
+
+define i32 @stack_fold_comiss_int(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_comiss_int
+ ;CHECK: vcomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtdq2pd
+ ;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
+
+define <4 x double> @stack_fold_cvtdq2pd_ymm(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtdq2pd_ymm
+ ;CHECK: vcvtdq2pd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0)
+ ret <4 x double> %2
+}
+declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
+
+define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtdq2ps
+ ;CHECK: vcvtdq2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = sitofp <4 x i32> %a0 to <4 x float>
+ ret <4 x float> %2
+}
+
+define <8 x float> @stack_fold_cvtdq2ps_ymm(<8 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtdq2ps_ymm
+ ;CHECK: vcvtdq2ps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = sitofp <8 x i32> %a0 to <8 x float>
+ ret <8 x float> %2
+}
+
+define <4 x i32> @stack_fold_cvtpd2dq(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtpd2dq
+ ;CHECK: vcvtpd2dqx {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
+
+define <4 x i32> @stack_fold_cvtpd2dq_ymm(<4 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtpd2dq_ymm
+ ;CHECK: vcvtpd2dqy {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
+
+define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtpd2ps
+ ;CHECK: vcvtpd2psx {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fptrunc <2 x double> %a0 to <2 x float>
+ ret <2 x float> %2
+}
+
+define <4 x float> @stack_fold_cvtpd2ps_ymm(<4 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtpd2ps_ymm
+ ;CHECK: vcvtpd2psy {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fptrunc <4 x double> %a0 to <4 x float>
+ ret <4 x float> %2
+}
+
+define <4 x float> @stack_fold_cvtph2ps(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtph2ps
+ ;CHECK: vcvtph2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>) nounwind readonly
+
+define <8 x float> @stack_fold_cvtph2ps_ymm(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtph2ps_ymm
+ ;CHECK: vcvtph2ps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0)
+ ret <8 x float> %2
+}
+declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readonly
+
+define <4 x i32> @stack_fold_cvtps2dq(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtps2dq
+ ;CHECK: vcvtps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
+
+define <8 x i32> @stack_fold_cvtps2dq_ymm(<8 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtps2dq_ymm
+ ;CHECK: vcvtps2dq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_cvtps2pd(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtps2pd
+ ;CHECK: vcvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
+
+define <4 x double> @stack_fold_cvtps2pd_ymm(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtps2pd_ymm
+ ;CHECK: vcvtps2pd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0)
+ ret <4 x double> %2
+}
+declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
+
+define <8 x i16> @stack_fold_cvtps2ph(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtps2ph
+ ;CHECK: vcvtps2ph $0, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
+ %1 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0)
+ %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ ret <8 x i16> %1
+}
+declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32) nounwind readonly
+
+define <8 x i16> @stack_fold_cvtps2ph_ymm(<8 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtps2ph_ymm
+ ;CHECK: vcvtps2ph $0, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
+ %1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0)
+ %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ ret <8 x i16> %1
+}
+declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readonly
+
+; TODO stack_fold_cvtsd2si
+
+define i32 @stack_fold_cvtsd2si_int(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtsd2si_int
+ ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
+
+; TODO stack_fold_cvtsd2si64
+
+define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtsd2si64_int
+ ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0)
+ ret i64 %2
+}
+declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
+
+; TODO stack_fold_cvtsd2ss
+
+define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtsd2ss_int
+ ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, <2 x double> %a0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
+
+define double @stack_fold_cvtsi2sd(i32 %a0) {
+ ;CHECK-LABEL: stack_fold_cvtsi2sd
+ ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = sitofp i32 %a0 to double
+ ret double %2
+}
+
+define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) {
+ ;CHECK-LABEL: stack_fold_cvtsi2sd_int
+ ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double 0x0, double 0x0>, i32 %a0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
+
+define double @stack_fold_cvtsi642sd(i64 %a0) {
+ ;CHECK-LABEL: stack_fold_cvtsi642sd
+ ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = sitofp i64 %a0 to double
+ ret double %2
+}
+
+define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) {
+ ;CHECK-LABEL: stack_fold_cvtsi642sd_int
+ ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> <double 0x0, double 0x0>, i64 %a0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
+
+define float @stack_fold_cvtsi2ss(i32 %a0) {
+ ;CHECK-LABEL: stack_fold_cvtsi2ss
+ ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = sitofp i32 %a0 to float
+ ret float %2
+}
+
+define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) {
+ ;CHECK-LABEL: stack_fold_cvtsi2ss_int
+ ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i32 %a0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
+
+define float @stack_fold_cvtsi642ss(i64 %a0) {
+ ;CHECK-LABEL: stack_fold_cvtsi642ss
+ ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = sitofp i64 %a0 to float
+ ret float %2
+}
+
+define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) {
+ ;CHECK-LABEL: stack_fold_cvtsi642ss_int
+ ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i64 %a0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
+
+; TODO stack_fold_cvtss2sd
+
+define <2 x double> @stack_fold_cvtss2sd_int(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtss2sd_int
+ ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %a0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
+
+; TODO stack_fold_cvtss2si
+
+define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtss2si_int
+ ;CHECK: vcvtss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
+
+; TODO stack_fold_cvtss2si64
+
+define i64 @stack_fold_cvtss2si64_int(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtss2si64_int
+ ;CHECK: vcvtss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0)
+ ret i64 %2
+}
+declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
+
+define <4 x i32> @stack_fold_cvttpd2dq(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_cvttpd2dq
+ ;CHECK: vcvttpd2dqx {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
+
+define <4 x i32> @stack_fold_cvttpd2dq_ymm(<4 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_cvttpd2dq_ymm
+ ;CHECK: vcvttpd2dqy {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fptosi <4 x double> %a0 to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <4 x i32> @stack_fold_cvttps2dq(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvttps2dq
+ ;CHECK: vcvttps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fptosi <4 x float> %a0 to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define <8 x i32> @stack_fold_cvttps2dq_ymm(<8 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvttps2dq_ymm
+ ;CHECK: vcvttps2dq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fptosi <8 x float> %a0 to <8 x i32>
+ ret <8 x i32> %2
+}
+
+define i32 @stack_fold_cvttsd2si(double %a0) {
+ ;CHECK-LABEL: stack_fold_cvttsd2si
+ ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fptosi double %a0 to i32
+ ret i32 %2
+}
+
+define i32 @stack_fold_cvttsd2si_int(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_cvttsd2si_int
+ ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
+
+define i64 @stack_fold_cvttsd2si64(double %a0) {
+ ;CHECK-LABEL: stack_fold_cvttsd2si64
+ ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fptosi double %a0 to i64
+ ret i64 %2
+}
+
+define i64 @stack_fold_cvttsd2si64_int(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_cvttsd2si64_int
+ ;CHECK: vcvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0)
+ ret i64 %2
+}
+declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
+
+define i32 @stack_fold_cvttss2si(float %a0) {
+ ;CHECK-LABEL: stack_fold_cvttss2si
+ ;CHECK: vcvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fptosi float %a0 to i32
+ ret i32 %2
+}
+
+define i32 @stack_fold_cvttss2si_int(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvttss2si_int
+ ;CHECK: vcvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
+
+define i64 @stack_fold_cvttss2si64(float %a0) {
+ ;CHECK-LABEL: stack_fold_cvttss2si64
+ ;CHECK: vcvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fptosi float %a0 to i64
+ ret i64 %2
+}
+
+define i64 @stack_fold_cvttss2si64_int(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvttss2si64_int
+ ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0)
+ ret i64 %2
+}
+declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_divpd
+ ;CHECK: vdivpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fdiv <2 x double> %a0, %a1
+ ret <2 x double> %2
+}
+
+define <4 x double> @stack_fold_divpd_ymm(<4 x double> %a0, <4 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_divpd_ymm
+ ;CHECK: vdivpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fdiv <4 x double> %a0, %a1
+ ret <4 x double> %2
+}
+
+define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_divps
+ ;CHECK: vdivps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fdiv <4 x float> %a0, %a1
+ ret <4 x float> %2
+}
+
+define <8 x float> @stack_fold_divps_ymm(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_divps_ymm
+ ;CHECK: vdivps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fdiv <8 x float> %a0, %a1
+ ret <8 x float> %2
+}
+
+define double @stack_fold_divsd(double %a0, double %a1) {
+ ;CHECK-LABEL: stack_fold_divsd
+ ;CHECK: vdivsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fdiv double %a0, %a1
+ ret double %2
+}
+
+define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_divsd_int
+ ;CHECK: vdivsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define float @stack_fold_divss(float %a0, float %a1) {
+ ;CHECK-LABEL: stack_fold_divss
+ ;CHECK: vdivss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fdiv float %a0, %a1
+ ret float %2
+}
+
+define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_divss_int
+ ;CHECK: vdivss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_dppd
+ ;CHECK: vdppd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+define <4 x float> @stack_fold_dpps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_dpps
+ ;CHECK: vdpps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+define <8 x float> @stack_fold_dpps_ymm(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_dpps_ymm
+ ;CHECK: vdpps $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7)
+ ret <8 x float> %2
+}
+declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
+
+define <4 x float> @stack_fold_extractf128(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_extractf128
+ ;CHECK: vextractf128 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
+ %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ ret <4 x float> %1
+}
+
+define i32 @stack_fold_extractps(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_extractps
+ ;CHECK: vextractps $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
+ ;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload
+ %1 = extractelement <4 x float> %a0, i32 1
+ %2 = bitcast float %1 to i32
+ %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ ret i32 %2
+}
+
+define <2 x double> @stack_fold_haddpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_haddpd
+ ;CHECK: vhaddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define <4 x double> @stack_fold_haddpd_ymm(<4 x double> %a0, <4 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_haddpd_ymm
+ ;CHECK: vhaddpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1)
+ ret <4 x double> %2
+}
+declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+define <4 x float> @stack_fold_haddps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_haddps
+ ;CHECK: vhaddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x float> @stack_fold_haddps_ymm(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_haddps_ymm
+ ;CHECK: vhaddps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1)
+ ret <8 x float> %2
+}
+declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_hsubpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_hsubpd
+ ;CHECK: vhsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define <4 x double> @stack_fold_hsubpd_ymm(<4 x double> %a0, <4 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_hsubpd_ymm
+ ;CHECK: vhsubpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1)
+ ret <4 x double> %2
+}
+declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_hsubps
+ ;CHECK: vhsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x float> @stack_fold_hsubps_ymm(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_hsubps_ymm
+ ;CHECK: vhsubps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1)
+ ret <8 x float> %2
+}
+declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+define <8 x float> @stack_fold_insertf128(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_insertf128
+ ;CHECK: vinsertf128 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x float> %2
+}
+
+; TODO stack_fold_insertps
+
+define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_maxpd
+ ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define <4 x double> @stack_fold_maxpd_ymm(<4 x double> %a0, <4 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_maxpd_ymm
+ ;CHECK: vmaxpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1)
+ ret <4 x double> %2
+}
+declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_maxps
+ ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x float> @stack_fold_maxps_ymm(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_maxps_ymm
+ ;CHECK: vmaxps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
+ ret <8 x float> %2
+}
+declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+define double @stack_fold_maxsd(double %a0, double %a1) {
+ ;CHECK-LABEL: stack_fold_maxsd
+ ;CHECK: vmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fcmp ogt double %a0, %a1
+ %3 = select i1 %2, double %a0, double %a1
+ ret double %3
+}
+
+define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_maxsd_int
+ ;CHECK: vmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define float @stack_fold_maxss(float %a0, float %a1) {
+ ;CHECK-LABEL: stack_fold_maxss
+ ;CHECK: vmaxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fcmp ogt float %a0, %a1
+ %3 = select i1 %2, float %a0, float %a1
+ ret float %3
+}
+
+define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_maxss_int
+ ;CHECK: vmaxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_minpd
+ ;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define <4 x double> @stack_fold_minpd_ymm(<4 x double> %a0, <4 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_minpd_ymm
+ ;CHECK: vminpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1)
+ ret <4 x double> %2
+}
+declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_minps
+ ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x float> @stack_fold_minps_ymm(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_minps_ymm
+ ;CHECK: vminps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
+ ret <8 x float> %2
+}
+declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+define double @stack_fold_minsd(double %a0, double %a1) {
+ ;CHECK-LABEL: stack_fold_minsd
+ ;CHECK: vminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fcmp olt double %a0, %a1
+ %3 = select i1 %2, double %a0, double %a1
+ ret double %3
+}
+
+define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_minsd_int
+ ;CHECK: vminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define float @stack_fold_minss(float %a0, float %a1) {
+ ;CHECK-LABEL: stack_fold_minss
+ ;CHECK: vminss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fcmp olt float %a0, %a1
+ %3 = select i1 %2, float %a0, float %a1
+ ret float %3
+}
+
+define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_minss_int
+ ;CHECK: vminss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_movddup(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_movddup
+ ;CHECK: vmovddup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+ ret <2 x double> %2
+}
+
+define <4 x double> @stack_fold_movddup_ymm(<4 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_movddup_ymm
+ ;CHECK: vmovddup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+ ret <4 x double> %2
+}
+
+; TODO stack_fold_movhpd (load / store)
+; TODO stack_fold_movhps (load / store)
+
+; TODO stack_fold_movlpd (load / store)
+; TODO stack_fold_movlps (load / store)
+
+define <4 x float> @stack_fold_movshdup(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_movshdup
+ ;CHECK: vmovshdup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+ ret <4 x float> %2
+}
+
+define <8 x float> @stack_fold_movshdup_ymm(<8 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_movshdup_ymm
+ ;CHECK: vmovshdup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+ ret <8 x float> %2
+}
+
+define <4 x float> @stack_fold_movsldup(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_movsldup
+ ;CHECK: vmovsldup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+ ret <4 x float> %2
+}
+
+define <8 x float> @stack_fold_movsldup_ymm(<8 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_movsldup_ymm
+ ;CHECK: vmovsldup {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+ ret <8 x float> %2
+}
+
+define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_mulpd
+ ;CHECK: vmulpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fmul <2 x double> %a0, %a1
+ ret <2 x double> %2
+}
+
+define <4 x double> @stack_fold_mulpd_ymm(<4 x double> %a0, <4 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_mulpd_ymm
+ ;CHECK: vmulpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fmul <4 x double> %a0, %a1
+ ret <4 x double> %2
+}
+
+define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_mulps
+ ;CHECK: vmulps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fmul <4 x float> %a0, %a1
+ ret <4 x float> %2
+}
+
+define <8 x float> @stack_fold_mulps_ymm(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_mulps_ymm
+ ;CHECK: vmulps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fmul <8 x float> %a0, %a1
+ ret <8 x float> %2
+}
+
+define double @stack_fold_mulsd(double %a0, double %a1) {
+ ;CHECK-LABEL: stack_fold_mulsd
+ ;CHECK: vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fmul double %a0, %a1
+ ret double %2
+}
+
+define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_mulsd_int
+ ;CHECK: vmulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define float @stack_fold_mulss(float %a0, float %a1) {
+ ;CHECK-LABEL: stack_fold_mulss
+ ;CHECK: vmulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fmul float %a0, %a1
+ ret float %2
+}
+
+define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_mulss_int
+ ;CHECK: vmulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_orpd
+ ;CHECK: vorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <2 x double> %a0 to <2 x i64>
+ %3 = bitcast <2 x double> %a1 to <2 x i64>
+ %4 = or <2 x i64> %2, %3
+ %5 = bitcast <2 x i64> %4 to <2 x double>
+ ; fadd forces execution domain
+ %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
+ ret <2 x double> %6
+}
+
+define <4 x double> @stack_fold_orpd_ymm(<4 x double> %a0, <4 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_orpd_ymm
+ ;CHECK: vorpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <4 x double> %a0 to <4 x i64>
+ %3 = bitcast <4 x double> %a1 to <4 x i64>
+ %4 = or <4 x i64> %2, %3
+ %5 = bitcast <4 x i64> %4 to <4 x double>
+ ; fadd forces execution domain
+ %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0>
+ ret <4 x double> %6
+}
+
+define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_orps
+ ;CHECK: vorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <4 x float> %a0 to <2 x i64>
+ %3 = bitcast <4 x float> %a1 to <2 x i64>
+ %4 = or <2 x i64> %2, %3
+ %5 = bitcast <2 x i64> %4 to <4 x float>
+ ; fadd forces execution domain
+ %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <4 x float> %6
+}
+
+define <8 x float> @stack_fold_orps_ymm(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_orps_ymm
+ ;CHECK: vorps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <8 x float> %a0 to <4 x i64>
+ %3 = bitcast <8 x float> %a1 to <4 x i64>
+ %4 = or <4 x i64> %2, %3
+ %5 = bitcast <4 x i64> %4 to <8 x float>
+ ; fadd forces execution domain
+ %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <8 x float> %6
+}
+
+define <8 x float> @stack_fold_perm2f128(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_perm2f128
+ ;CHECK: vperm2f128 $33, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+ ret <8 x float> %2
+}
+
+define <2 x double> @stack_fold_permilpd(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_permilpd
+ ;CHECK: vpermilpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+ ret <2 x double> %2
+}
+
+define <4 x double> @stack_fold_permilpd_ymm(<4 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_permilpd_ymm
+ ;CHECK: vpermilpd $5, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ ret <4 x double> %2
+}
+
+define <2 x double> @stack_fold_permilpdvar(<2 x double> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_permilpdvar
+ ;CHECK: vpermilpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
+
+define <4 x double> @stack_fold_permilpdvar_ymm(<4 x double> %a0, <4 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_permilpdvar_ymm
+ ;CHECK: vpermilpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1)
+ ret <4 x double> %2
+}
+declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
+
+define <4 x float> @stack_fold_permilps(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_permilps
+ ;CHECK: vpermilps $27, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ ret <4 x float> %2
+}
+
+define <8 x float> @stack_fold_permilps_ymm(<8 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_permilps_ymm
+ ;CHECK: vpermilps $27, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+ ret <8 x float> %2
+}
+
+define <4 x float> @stack_fold_permilpsvar(<4 x float> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_permilpsvar
+ ;CHECK: vpermilps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
+
+define <8 x float> @stack_fold_permilpsvar_ymm(<8 x float> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_permilpsvar_ymm
+ ;CHECK: vpermilps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1)
+ ret <8 x float> %2
+}
+declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
+
+; TODO stack_fold_rcpps
+
+define <4 x float> @stack_fold_rcpps_int(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_rcpps_int
+ ;CHECK: vrcpps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
+
+; TODO stack_fold_rcpps_ymm
+
+define <8 x float> @stack_fold_rcpps_ymm_int(<8 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_rcpps_ymm_int
+ ;CHECK: vrcpps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0)
+ ret <8 x float> %2
+}
+declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
+
+; TODO stack_fold_rcpss
+
+define <4 x float> @stack_fold_rcpss_int(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_rcpss_int
+ ;CHECK: vrcpss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_roundpd(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_roundpd
+ ;CHECK: vroundpd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
+
+define <4 x double> @stack_fold_roundpd_ymm(<4 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_roundpd_ymm
+ ;CHECK: vroundpd $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7)
+ ret <4 x double> %2
+}
+declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
+
+define <4 x float> @stack_fold_roundps(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_roundps
+ ;CHECK: vroundps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
+
+define <8 x float> @stack_fold_roundps_ymm(<8 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_roundps_ymm
+ ;CHECK: vroundps $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7)
+ ret <8 x float> %2
+}
+declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
+
+; TODO stack_fold_roundsd
+
+; TODO stack_fold_roundsd_int
+declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+; TODO stack_fold_roundss
+
+; TODO stack_fold_roundss_int
+declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
+
+; TODO stack_fold_rsqrtps
+
+define <4 x float> @stack_fold_rsqrtps_int(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_rsqrtps_int
+ ;CHECK: vrsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
+
+; TODO stack_fold_rsqrtps_ymm
+
+define <8 x float> @stack_fold_rsqrtps_ymm_int(<8 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_rsqrtps_ymm_int
+ ;CHECK: vrsqrtps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0)
+ ret <8 x float> %2
+}
+declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
+
+; TODO stack_fold_rsqrtss
+
+define <4 x float> @stack_fold_rsqrtss_int(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_rsqrtss_int
+ ;CHECK: vrsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_shufpd
+ ;CHECK: vshufpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
+ ret <2 x double> %2
+}
+
+define <4 x double> @stack_fold_shufpd_ymm(<4 x double> %a0, <4 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_shufpd_ymm
+ ;CHECK: vshufpd $5, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 4, i32 3, i32 6>
+ ret <4 x double> %2
+}
+
+define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_shufps
+ ;CHECK: vshufps $200, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 7>
+ ret <4 x float> %2
+}
+
+define <8 x float> @stack_fold_shufps_ymm(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_shufps_ymm
+ ;CHECK: vshufps $148, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 1, i32 9, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <8 x float> %2
+}
+
+define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_sqrtpd
+ ;CHECK: vsqrtpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
+
+define <4 x double> @stack_fold_sqrtpd_ymm(<4 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_sqrtpd_ymm
+ ;CHECK: vsqrtpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0)
+ ret <4 x double> %2
+}
+declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
+
+define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_sqrtps
+ ;CHECK: vsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
+
+define <8 x float> @stack_fold_sqrtps_ymm(<8 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_sqrtps_ymm
+ ;CHECK: vsqrtps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0)
+ ret <8 x float> %2
+}
+declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
+
+define double @stack_fold_sqrtsd(double %a0) {
+ ;CHECK-LABEL: stack_fold_sqrtsd
+ ;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call double @llvm.sqrt.f64(double %a0)
+ ret double %2
+}
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
+define <2 x double> @stack_fold_sqrtsd_int(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_sqrtsd_int
+ ;CHECK: vsqrtsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
+
+define float @stack_fold_sqrtss(float %a0) {
+ ;CHECK-LABEL: stack_fold_sqrtss
+ ;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call float @llvm.sqrt.f32(float %a0)
+ ret float %2
+}
+declare float @llvm.sqrt.f32(float) nounwind readnone
+
+define <4 x float> @stack_fold_sqrtss_int(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_sqrtss_int
+ ;CHECK: vsqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_subpd
+ ;CHECK: vsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fsub <2 x double> %a0, %a1
+ ret <2 x double> %2
+}
+
+define <4 x double> @stack_fold_subpd_ymm(<4 x double> %a0, <4 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_subpd_ymm
+ ;CHECK: vsubpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fsub <4 x double> %a0, %a1
+ ret <4 x double> %2
+}
+
+define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_subps
+ ;CHECK: vsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fsub <4 x float> %a0, %a1
+ ret <4 x float> %2
+}
+
+define <8 x float> @stack_fold_subps_ymm(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_subps_ymm
+ ;CHECK: vsubps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fsub <8 x float> %a0, %a1
+ ret <8 x float> %2
+}
+
+define double @stack_fold_subsd(double %a0, double %a1) {
+ ;CHECK-LABEL: stack_fold_subsd
+ ;CHECK: vsubsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fsub double %a0, %a1
+ ret double %2
+}
+
+define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_subsd_int
+ ;CHECK: vsubsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define float @stack_fold_subss(float %a0, float %a1) {
+ ;CHECK-LABEL: stack_fold_subss
+ ;CHECK: vsubss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fsub float %a0, %a1
+ ret float %2
+}
+
+define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_subss_int
+ ;CHECK: vsubss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define i32 @stack_fold_testpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_testpd
+ ;CHECK: vtestpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1)
+ ret i32 %2
+}
+declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define i32 @stack_fold_testpd_ymm(<4 x double> %a0, <4 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_testpd_ymm
+ ;CHECK: vtestpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1)
+ ret i32 %2
+}
+declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+define i32 @stack_fold_testps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_testps
+ ;CHECK: vtestps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1)
+ ret i32 %2
+}
+declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
+
+define i32 @stack_fold_testps_ymm(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_testps_ymm
+ ;CHECK: vtestps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1)
+ ret i32 %2
+}
+declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+define i32 @stack_fold_ucomisd(double %a0, double %a1) {
+ ;CHECK-LABEL: stack_fold_ucomisd
+ ;CHECK: vucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fcmp ueq double %a0, %a1
+ %3 = select i1 %2, i32 1, i32 -1
+ ret i32 %3
+}
+
+define i32 @stack_fold_ucomisd_int(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_ucomisd_int
+ ;CHECK: vucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define i32 @stack_fold_ucomiss(float %a0, float %a1) {
+ ;CHECK-LABEL: stack_fold_ucomiss
+ ;CHECK: vucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fcmp ueq float %a0, %a1
+ %3 = select i1 %2, i32 1, i32 -1
+ ret i32 %3
+}
+
+define i32 @stack_fold_ucomiss_int(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_ucomiss_int
+ ;CHECK: vucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_unpckhpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_unpckhpd
+ ;CHECK: vunpckhpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
+ ; fadd forces execution domain
+ %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
+ ret <2 x double> %3
+}
+
+define <4 x double> @stack_fold_unpckhpd_ymm(<4 x double> %a0, <4 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_unpckhpd_ymm
+ ;CHECK: vunpckhpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+ ; fadd forces execution domain
+ %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+ ret <4 x double> %3
+}
+
+define <4 x float> @stack_fold_unpckhps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_unpckhps
+ ;CHECK: vunpckhps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ ; fadd forces execution domain
+ %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <4 x float> %3
+}
+
+define <8 x float> @stack_fold_unpckhps_ymm(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_unpckhps_ymm
+ ;CHECK: vunpckhps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+ ; fadd forces execution domain
+ %3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <8 x float> %3
+}
+
+define <2 x double> @stack_fold_unpcklpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_unpcklpd
+ ;CHECK: vunpcklpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
+ ; fadd forces execution domain
+ %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
+ ret <2 x double> %3
+}
+
+define <4 x double> @stack_fold_unpcklpd_ymm(<4 x double> %a0, <4 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_unpcklpd_ymm
+ ;CHECK: vunpcklpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+ ; fadd forces execution domain
+ %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+ ret <4 x double> %3
+}
+
+define <4 x float> @stack_fold_unpcklps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_unpcklps
+ ;CHECK: vunpcklps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ ; fadd forces execution domain
+ %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <4 x float> %3
+}
+
+define <8 x float> @stack_fold_unpcklps_ymm(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_unpcklps_ymm
+ ;CHECK: vunpcklps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+ ; fadd forces execution domain
+ %3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <8 x float> %3
+}
+
+define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_xorpd
+ ;CHECK: vxorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <2 x double> %a0 to <2 x i64>
+ %3 = bitcast <2 x double> %a1 to <2 x i64>
+ %4 = xor <2 x i64> %2, %3
+ %5 = bitcast <2 x i64> %4 to <2 x double>
+ ; fadd forces execution domain
+ %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
+ ret <2 x double> %6
+}
+
+define <4 x double> @stack_fold_xorpd_ymm(<4 x double> %a0, <4 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_xorpd_ymm
+ ;CHECK: vxorpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <4 x double> %a0 to <4 x i64>
+ %3 = bitcast <4 x double> %a1 to <4 x i64>
+ %4 = xor <4 x i64> %2, %3
+ %5 = bitcast <4 x i64> %4 to <4 x double>
+ ; fadd forces execution domain
+ %6 = fadd <4 x double> %5, <double 0x0, double 0x0, double 0x0, double 0x0>
+ ret <4 x double> %6
+}
+
+define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_xorps
+ ;CHECK: vxorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <4 x float> %a0 to <2 x i64>
+ %3 = bitcast <4 x float> %a1 to <2 x i64>
+ %4 = xor <2 x i64> %2, %3
+ %5 = bitcast <2 x i64> %4 to <4 x float>
+ ; fadd forces execution domain
+ %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <4 x float> %6
+}
+
+define <8 x float> @stack_fold_xorps_ymm(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_xorps_ymm
+ ;CHECK: vxorps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <8 x float> %a0 to <4 x i64>
+ %3 = bitcast <8 x float> %a1 to <4 x i64>
+ %4 = xor <4 x i64> %2, %3
+ %5 = bitcast <4 x i64> %4 to <8 x float>
+ ; fadd forces execution domain
+ %6 = fadd <8 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <8 x float> %6
+}
diff --git a/test/CodeGen/X86/stack-folding-fp-sse42.ll b/test/CodeGen/X86/stack-folding-fp-sse42.ll
new file mode 100644
index 000000000000..95f0c3d3a188
--- /dev/null
+++ b/test/CodeGen/X86/stack-folding-fp-sse42.ll
@@ -0,0 +1,1097 @@
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.2 < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; Stack reload folding tests.
+;
+; By including a nop call with sideeffects we can force a partial register spill of the
+; relevant registers and check that the reload is correctly folded into the instruction.
+
+define <2 x double> @stack_fold_addpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_addpd
+ ;CHECK: addpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fadd <2 x double> %a0, %a1
+ ret <2 x double> %2
+}
+
+define <4 x float> @stack_fold_addps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_addps
+ ;CHECK: addps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fadd <4 x float> %a0, %a1
+ ret <4 x float> %2
+}
+
+define double @stack_fold_addsd(double %a0, double %a1) {
+ ;CHECK-LABEL: stack_fold_addsd
+ ;CHECK: addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fadd double %a0, %a1
+ ret double %2
+}
+
+define <2 x double> @stack_fold_addsd_int(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_addsd_int
+ ;CHECK: addsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define float @stack_fold_addss(float %a0, float %a1) {
+ ;CHECK-LABEL: stack_fold_addss
+ ;CHECK: addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fadd float %a0, %a1
+ ret float %2
+}
+
+define <4 x float> @stack_fold_addss_int(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_addss_int
+ ;CHECK: addss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_addsubpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_addsubpd
+ ;CHECK: addsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define <4 x float> @stack_fold_addsubps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_addsubps
+ ;CHECK: addsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_andnpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_andnpd
+ ;CHECK: andnpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <2 x double> %a0 to <2 x i64>
+ %3 = bitcast <2 x double> %a1 to <2 x i64>
+ %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
+ %5 = and <2 x i64> %4, %3
+ %6 = bitcast <2 x i64> %5 to <2 x double>
+ ; fadd forces execution domain
+ %7 = fadd <2 x double> %6, <double 0x0, double 0x0>
+ ret <2 x double> %7
+}
+
+define <4 x float> @stack_fold_andnps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_andnps
+ ;CHECK: andnps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <4 x float> %a0 to <2 x i64>
+ %3 = bitcast <4 x float> %a1 to <2 x i64>
+ %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
+ %5 = and <2 x i64> %4, %3
+ %6 = bitcast <2 x i64> %5 to <4 x float>
+ ; fadd forces execution domain
+ %7 = fadd <4 x float> %6, <float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <4 x float> %7
+}
+
+define <2 x double> @stack_fold_andpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_andpd
+ ;CHECK: andpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <2 x double> %a0 to <2 x i64>
+ %3 = bitcast <2 x double> %a1 to <2 x i64>
+ %4 = and <2 x i64> %2, %3
+ %5 = bitcast <2 x i64> %4 to <2 x double>
+ ; fadd forces execution domain
+ %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
+ ret <2 x double> %6
+}
+
+define <4 x float> @stack_fold_andps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_andps
+ ;CHECK: andps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <4 x float> %a0 to <2 x i64>
+ %3 = bitcast <4 x float> %a1 to <2 x i64>
+ %4 = and <2 x i64> %2, %3
+ %5 = bitcast <2 x i64> %4 to <4 x float>
+ ; fadd forces execution domain
+ %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <4 x float> %6
+}
+
+define <2 x double> @stack_fold_blendpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_blendpd
+ ;CHECK: blendpd $2, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = select <2 x i1> <i1 1, i1 0>, <2 x double> %a0, <2 x double> %a1
+ ret <2 x double> %2
+}
+
+define <4 x float> @stack_fold_blendps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_blendps
+ ;CHECK: blendps $6, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = select <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x float> %a0, <4 x float> %a1
+ ret <4 x float> %2
+}
+
+define <2 x double> @stack_fold_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %c) {
+ ;CHECK-LABEL: stack_fold_blendvpd
+ ;CHECK: blendvpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a1, <2 x double> %c, <2 x double> %a0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+define <4 x float> @stack_fold_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %c) {
+ ;CHECK-LABEL: stack_fold_blendvps
+ ;CHECK: blendvps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a1, <4 x float> %c, <4 x float> %a0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_cmppd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_cmppd
+ ;CHECK: cmpeqpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+define <4 x float> @stack_fold_cmpps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_cmpps
+ ;CHECK: cmpeqps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+define i32 @stack_fold_cmpsd(double %a0, double %a1) {
+ ;CHECK-LABEL: stack_fold_cmpsd
+ ;CHECK: cmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fcmp oeq double %a0, %a1
+ %3 = zext i1 %2 to i32
+ ret i32 %3
+}
+
+define <2 x double> @stack_fold_cmpsd_int(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_cmpsd_int
+ ;CHECK: cmpeqsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+define i32 @stack_fold_cmpss(float %a0, float %a1) {
+ ;CHECK-LABEL: stack_fold_cmpss
+ ;CHECK: cmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fcmp oeq float %a0, %a1
+ %3 = zext i1 %2 to i32
+ ret i32 %3
+}
+
+define <4 x float> @stack_fold_cmpss_int(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_cmpss_int
+ ;CHECK: cmpeqss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
+
+; TODO stack_fold_comisd
+
+define i32 @stack_fold_comisd_int(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_comisd_int
+ ;CHECK: comisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+; TODO stack_fold_comiss
+
+define i32 @stack_fold_comiss_int(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_comiss_int
+ ;CHECK: comiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_cvtdq2pd(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtdq2pd
+ ;CHECK: cvtdq2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
+
+define <4 x float> @stack_fold_cvtdq2ps(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtdq2ps
+ ;CHECK: cvtdq2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = sitofp <4 x i32> %a0 to <4 x float>
+ ret <4 x float> %2
+}
+
+define <4 x i32> @stack_fold_cvtpd2dq(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtpd2dq
+ ;CHECK: cvtpd2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
+
+define <2 x float> @stack_fold_cvtpd2ps(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtpd2ps
+ ;CHECK: cvtpd2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fptrunc <2 x double> %a0 to <2 x float>
+ ret <2 x float> %2
+}
+
+define <4 x i32> @stack_fold_cvtps2dq(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtps2dq
+ ;CHECK: cvtps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_cvtps2pd(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtps2pd
+ ;CHECK: cvtps2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
+
+; TODO stack_fold_cvtsd2si
+
+define i32 @stack_fold_cvtsd2si_int(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtsd2si_int
+ ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
+
+; TODO stack_fold_cvtsd2si64
+
+define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtsd2si64_int
+ ;CHECK: cvtsd2siq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0)
+ ret i64 %2
+}
+declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
+
+; TODO stack_fold_cvtsd2ss
+
+define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) optsize {
+ ;CHECK-LABEL: stack_fold_cvtsd2ss_int
+ ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, <2 x double> %a0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
+
+define double @stack_fold_cvtsi2sd(i32 %a0) optsize {
+ ;CHECK-LABEL: stack_fold_cvtsi2sd
+ ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = sitofp i32 %a0 to double
+ ret double %2
+}
+
+define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) {
+ ;CHECK-LABEL: stack_fold_cvtsi2sd_int
+ ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double 0x0, double 0x0>, i32 %a0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
+
+define double @stack_fold_cvtsi642sd(i64 %a0) optsize {
+ ;CHECK-LABEL: stack_fold_cvtsi642sd
+ ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = sitofp i64 %a0 to double
+ ret double %2
+}
+
+define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) {
+ ;CHECK-LABEL: stack_fold_cvtsi642sd_int
+ ;CHECK: cvtsi2sdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> <double 0x0, double 0x0>, i64 %a0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
+
+define float @stack_fold_cvtsi2ss(i32 %a0) optsize {
+ ;CHECK-LABEL: stack_fold_cvtsi2ss
+ ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = sitofp i32 %a0 to float
+ ret float %2
+}
+
+define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) {
+ ;CHECK-LABEL: stack_fold_cvtsi2ss_int
+ ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i32 %a0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
+
+define float @stack_fold_cvtsi642ss(i64 %a0) optsize {
+ ;CHECK-LABEL: stack_fold_cvtsi642ss
+ ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = sitofp i64 %a0 to float
+ ret float %2
+}
+
+define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) {
+ ;CHECK-LABEL: stack_fold_cvtsi642ss_int
+ ;CHECK: cvtsi2ssq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> <float 0x0, float 0x0, float 0x0, float 0x0>, i64 %a0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
+
+define double @stack_fold_cvtss2sd(float %a0) optsize {
+ ;CHECK-LABEL: stack_fold_cvtss2sd
+ ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fpext float %a0 to double
+ ret double %2
+}
+
+define <2 x double> @stack_fold_cvtss2sd_int(<4 x float> %a0) optsize {
+ ;CHECK-LABEL: stack_fold_cvtss2sd_int
+ ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %a0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
+
+; TODO stack_fold_cvtss2si
+
+define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtss2si_int
+ ;CHECK: cvtss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
+
+; TODO stack_fold_cvtss2si64
+
+define i64 @stack_fold_cvtss2si64_int(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtss2si64_int
+ ;CHECK: cvtss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0)
+ ret i64 %2
+}
+declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
+
+define <4 x i32> @stack_fold_cvttpd2dq(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_cvttpd2dq
+ ;CHECK: cvttpd2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
+
+define <4 x i32> @stack_fold_cvttps2dq(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvttps2dq
+ ;CHECK: cvttps2dq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fptosi <4 x float> %a0 to <4 x i32>
+ ret <4 x i32> %2
+}
+
+define i32 @stack_fold_cvttsd2si(double %a0) {
+ ;CHECK-LABEL: stack_fold_cvttsd2si
+ ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fptosi double %a0 to i32
+ ret i32 %2
+}
+
+define i32 @stack_fold_cvttsd2si_int(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_cvttsd2si_int
+ ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
+
+define i64 @stack_fold_cvttsd2si64(double %a0) {
+ ;CHECK-LABEL: stack_fold_cvttsd2si64
+ ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fptosi double %a0 to i64
+ ret i64 %2
+}
+
+define i64 @stack_fold_cvttsd2si64_int(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_cvttsd2si64_int
+ ;CHECK: cvttsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0)
+ ret i64 %2
+}
+declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
+
+define i32 @stack_fold_cvttss2si(float %a0) {
+ ;CHECK-LABEL: stack_fold_cvttss2si
+ ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fptosi float %a0 to i32
+ ret i32 %2
+}
+
+define i32 @stack_fold_cvttss2si_int(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvttss2si_int
+ ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
+
+define i64 @stack_fold_cvttss2si64(float %a0) {
+ ;CHECK-LABEL: stack_fold_cvttss2si64
+ ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fptosi float %a0 to i64
+ ret i64 %2
+}
+
+define i64 @stack_fold_cvttss2si64_int(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvttss2si64_int
+ ;CHECK: cvttss2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0)
+ ret i64 %2
+}
+declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_divpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_divpd
+ ;CHECK: divpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fdiv <2 x double> %a0, %a1
+ ret <2 x double> %2
+}
+
+define <4 x float> @stack_fold_divps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_divps
+ ;CHECK: divps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fdiv <4 x float> %a0, %a1
+ ret <4 x float> %2
+}
+
+define double @stack_fold_divsd(double %a0, double %a1) {
+ ;CHECK-LABEL: stack_fold_divsd
+ ;CHECK: divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fdiv double %a0, %a1
+ ret double %2
+}
+
+define <2 x double> @stack_fold_divsd_int(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_divsd_int
+ ;CHECK: divsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define float @stack_fold_divss(float %a0, float %a1) {
+ ;CHECK-LABEL: stack_fold_divss
+ ;CHECK: divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fdiv float %a0, %a1
+ ret float %2
+}
+
+define <4 x float> @stack_fold_divss_int(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_divss_int
+ ;CHECK: divss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_dppd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_dppd
+ ;CHECK: dppd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+define <4 x float> @stack_fold_dpps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_dpps
+ ;CHECK: dpps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+define i32 @stack_fold_extractps(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_extractps
+ ;CHECK: extractps $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
+ ;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload
+ %1 = extractelement <4 x float> %a0, i32 1
+ %2 = bitcast float %1 to i32
+ %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ ret i32 %2
+}
+
+define <2 x double> @stack_fold_haddpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_haddpd
+ ;CHECK: haddpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define <4 x float> @stack_fold_haddps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_haddps
+ ;CHECK: haddps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_hsubpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_hsubpd
+ ;CHECK: hsubpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_hsubps
+ ;CHECK: hsubps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
+
+; TODO stack_fold_insertps
+
+define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_maxpd
+ ;CHECK: maxpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define <4 x float> @stack_fold_maxps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_maxps
+ ;CHECK: maxps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
+
+define double @stack_fold_maxsd(double %a0, double %a1) {
+ ;CHECK-LABEL: stack_fold_maxsd
+ ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fcmp ogt double %a0, %a1
+ %3 = select i1 %2, double %a0, double %a1
+ ret double %3
+}
+
+define <2 x double> @stack_fold_maxsd_int(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_maxsd_int
+ ;CHECK: maxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define float @stack_fold_maxss(float %a0, float %a1) {
+ ;CHECK-LABEL: stack_fold_maxss
+ ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fcmp ogt float %a0, %a1
+ %3 = select i1 %2, float %a0, float %a1
+ ret float %3
+}
+
+define <4 x float> @stack_fold_maxss_int(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_maxss_int
+ ;CHECK: maxss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_minpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_minpd
+ ;CHECK: minpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define <4 x float> @stack_fold_minps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_minps
+ ;CHECK: minps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
+
+define double @stack_fold_minsd(double %a0, double %a1) {
+ ;CHECK-LABEL: stack_fold_minsd
+ ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fcmp olt double %a0, %a1
+ %3 = select i1 %2, double %a0, double %a1
+ ret double %3
+}
+
+define <2 x double> @stack_fold_minsd_int(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_minsd_int
+ ;CHECK: minsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define float @stack_fold_minss(float %a0, float %a1) {
+ ;CHECK-LABEL: stack_fold_minss
+ ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fcmp olt float %a0, %a1
+ %3 = select i1 %2, float %a0, float %a1
+ ret float %3
+}
+
+define <4 x float> @stack_fold_minss_int(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_minss_int
+ ;CHECK: minss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_movddup(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_movddup
+ ;CHECK: movddup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+ ret <2 x double> %2
+}
+; TODO stack_fold_movhpd (load / store)
+; TODO stack_fold_movhps (load / store)
+
+; TODO stack_fold_movlpd (load / store)
+; TODO stack_fold_movlps (load / store)
+
+define <4 x float> @stack_fold_movshdup(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_movshdup
+ ;CHECK: movshdup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+ ret <4 x float> %2
+}
+
+define <4 x float> @stack_fold_movsldup(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_movsldup
+ ;CHECK: movsldup {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+ ret <4 x float> %2
+}
+
+define <2 x double> @stack_fold_mulpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_mulpd
+ ;CHECK: mulpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fmul <2 x double> %a0, %a1
+ ret <2 x double> %2
+}
+
+define <4 x float> @stack_fold_mulps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_mulps
+ ;CHECK: mulps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fmul <4 x float> %a0, %a1
+ ret <4 x float> %2
+}
+
+define double @stack_fold_mulsd(double %a0, double %a1) {
+ ;CHECK-LABEL: stack_fold_mulsd
+ ;CHECK: mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fmul double %a0, %a1
+ ret double %2
+}
+
+define <2 x double> @stack_fold_mulsd_int(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_mulsd_int
+ ;CHECK: mulsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define float @stack_fold_mulss(float %a0, float %a1) {
+ ;CHECK-LABEL: stack_fold_mulss
+ ;CHECK: mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fmul float %a0, %a1
+ ret float %2
+}
+
+define <4 x float> @stack_fold_mulss_int(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_mulss_int
+ ;CHECK: mulss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_orpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_orpd
+ ;CHECK: orpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <2 x double> %a0 to <2 x i64>
+ %3 = bitcast <2 x double> %a1 to <2 x i64>
+ %4 = or <2 x i64> %2, %3
+ %5 = bitcast <2 x i64> %4 to <2 x double>
+ ; fadd forces execution domain
+ %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
+ ret <2 x double> %6
+}
+
+define <4 x float> @stack_fold_orps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_orps
+ ;CHECK: orps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <4 x float> %a0 to <2 x i64>
+ %3 = bitcast <4 x float> %a1 to <2 x i64>
+ %4 = or <2 x i64> %2, %3
+ %5 = bitcast <2 x i64> %4 to <4 x float>
+ ; fadd forces execution domain
+ %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <4 x float> %6
+}
+
+; TODO stack_fold_rcpps
+
+define <4 x float> @stack_fold_rcpps_int(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_rcpps_int
+ ;CHECK: rcpps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
+
+; TODO stack_fold_rcpss
+; TODO stack_fold_rcpss_int
+
+define <2 x double> @stack_fold_roundpd(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_roundpd
+ ;CHECK: roundpd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
+
+define <4 x float> @stack_fold_roundps(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_roundps
+ ;CHECK: roundps $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
+
+; TODO stack_fold_roundsd
+; TODO stack_fold_roundsd_int
+
+; TODO stack_fold_roundss
+; TODO stack_fold_roundss_int
+
+; TODO stack_fold_rsqrtps
+
+define <4 x float> @stack_fold_rsqrtps_int(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_rsqrtps_int
+ ;CHECK: rsqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
+
+; TODO stack_fold_rsqrtss
+; TODO stack_fold_rsqrtss_int
+
+define <2 x double> @stack_fold_shufpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_shufpd
+ ;CHECK: shufpd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
+ ret <2 x double> %2
+}
+
+define <4 x float> @stack_fold_shufps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_shufps
+ ;CHECK: shufps $200, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 2, i32 4, i32 7>
+ ret <4 x float> %2
+}
+
+define <2 x double> @stack_fold_sqrtpd(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_sqrtpd
+ ;CHECK: sqrtpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
+
+define <4 x float> @stack_fold_sqrtps(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_sqrtps
+ ;CHECK: sqrtps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
+
+; TODO stack_fold_sqrtsd
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
+; TODO stack_fold_sqrtsd_int
+declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
+
+; TODO stack_fold_sqrtss
+declare float @llvm.sqrt.f32(float) nounwind readnone
+
+; TODO stack_fold_sqrtss_int
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_subpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_subpd
+ ;CHECK: subpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fsub <2 x double> %a0, %a1
+ ret <2 x double> %2
+}
+
+define <4 x float> @stack_fold_subps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_subps
+ ;CHECK: subps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fsub <4 x float> %a0, %a1
+ ret <4 x float> %2
+}
+
+define double @stack_fold_subsd(double %a0, double %a1) {
+ ;CHECK-LABEL: stack_fold_subsd
+ ;CHECK: subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fsub double %a0, %a1
+ ret double %2
+}
+
+define <2 x double> @stack_fold_subsd_int(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_subsd_int
+ ;CHECK: subsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define float @stack_fold_subss(float %a0, float %a1) {
+ ;CHECK-LABEL: stack_fold_subss
+ ;CHECK: subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fsub float %a0, %a1
+ ret float %2
+}
+
+define <4 x float> @stack_fold_subss_int(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_subss_int
+ ;CHECK: subss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define i32 @stack_fold_ucomisd(double %a0, double %a1) {
+ ;CHECK-LABEL: stack_fold_ucomisd
+ ;CHECK: ucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fcmp ueq double %a0, %a1
+ %3 = select i1 %2, i32 1, i32 -1
+ ret i32 %3
+}
+
+define i32 @stack_fold_ucomisd_int(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_ucomisd_int
+ ;CHECK: ucomisd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define i32 @stack_fold_ucomiss(float %a0, float %a1) {
+ ;CHECK-LABEL: stack_fold_ucomiss
+ ;CHECK: ucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = fcmp ueq float %a0, %a1
+ %3 = select i1 %2, i32 1, i32 -1
+ ret i32 %3
+}
+
+define i32 @stack_fold_ucomiss_int(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_ucomiss_int
+ ;CHECK: ucomiss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_unpckhpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_unpckhpd
+ ;CHECK: unpckhpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
+ ; fadd forces execution domain
+ %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
+ ret <2 x double> %3
+}
+
+define <4 x float> @stack_fold_unpckhps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_unpckhps
+ ;CHECK: unpckhps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ ; fadd forces execution domain
+ %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <4 x float> %3
+}
+
+define <2 x double> @stack_fold_unpcklpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_unpcklpd
+ ;CHECK: unpcklpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
+ ; fadd forces execution domain
+ %3 = fadd <2 x double> %2, <double 0x0, double 0x0>
+ ret <2 x double> %3
+}
+
+define <4 x float> @stack_fold_unpcklps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_unpcklps
+ ;CHECK: unpcklps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ ; fadd forces execution domain
+ %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <4 x float> %3
+}
+
+define <2 x double> @stack_fold_xorpd(<2 x double> %a0, <2 x double> %a1) {
+ ;CHECK-LABEL: stack_fold_xorpd
+ ;CHECK: xorpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <2 x double> %a0 to <2 x i64>
+ %3 = bitcast <2 x double> %a1 to <2 x i64>
+ %4 = xor <2 x i64> %2, %3
+ %5 = bitcast <2 x i64> %4 to <2 x double>
+ ; fadd forces execution domain
+ %6 = fadd <2 x double> %5, <double 0x0, double 0x0>
+ ret <2 x double> %6
+}
+
+define <4 x float> @stack_fold_xorps(<4 x float> %a0, <4 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_xorps
+ ;CHECK: xorps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = bitcast <4 x float> %a0 to <2 x i64>
+ %3 = bitcast <4 x float> %a1 to <2 x i64>
+ %4 = xor <2 x i64> %2, %3
+ %5 = bitcast <2 x i64> %4 to <4 x float>
+ ; fadd forces execution domain
+ %6 = fadd <4 x float> %5, <float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <4 x float> %6
+}
diff --git a/test/CodeGen/X86/stack-folding-int-avx1.ll b/test/CodeGen/X86/stack-folding-int-avx1.ll
new file mode 100644
index 000000000000..fec297d5e9d4
--- /dev/null
+++ b/test/CodeGen/X86/stack-folding-int-avx1.ll
@@ -0,0 +1,1156 @@
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx,+aes,+pclmul < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; Stack reload folding tests.
+;
+; By including a nop call with sideeffects we can force a partial register spill of the
+; relevant registers and check that the reload is correctly folded into the instruction.
+
+define <2 x i64> @stack_fold_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_aesdec
+ ;CHECK: vaesdec {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @stack_fold_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_aesdeclast
+ ;CHECK: vaesdeclast {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @stack_fold_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_aesenc
+ ;CHECK: vaesenc {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @stack_fold_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_aesenclast
+ ;CHECK: vaesenclast {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @stack_fold_aesimc(<2 x i64> %a0) {
+ ;CHECK-LABEL: stack_fold_aesimc
+ ;CHECK: vaesimc {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
+
+define <2 x i64> @stack_fold_aeskeygenassist(<2 x i64> %a0) {
+ ;CHECK-LABEL: stack_fold_aeskeygenassist
+ ;CHECK: vaeskeygenassist $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
+
+define <4 x i32> @stack_fold_movd_load(i32 %a0) {
+ ;CHECK-LABEL: stack_fold_movd_load
+ ;CHECK: movd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = insertelement <4 x i32> zeroinitializer, i32 %a0, i32 0
+ ; add forces execution domain
+ %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %3
+}
+
+define i32 @stack_fold_movd_store(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_movd_store
+ ;CHECK: movd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
+ ; add forces execution domain
+ %1 = add <4 x i32> %a0, <i32 1, i32 1, i32 1, i32 1>
+ %2 = extractelement <4 x i32> %1, i32 0
+ %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ ret i32 %2
+}
+
+define <2 x i64> @stack_fold_movq_load(<2 x i64> %a0) {
+ ;CHECK-LABEL: stack_fold_movq_load
+ ;CHECK: movq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
+ ; add forces execution domain
+ %3 = add <2 x i64> %2, <i64 1, i64 1>
+ ret <2 x i64> %3
+}
+
+define i64 @stack_fold_movq_store(<2 x i64> %a0) {
+ ;CHECK-LABEL: stack_fold_movq_store
+ ;CHECK: movq {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 8-byte Folded Spill
+ ; add forces execution domain
+ %1 = add <2 x i64> %a0, <i64 1, i64 1>
+ %2 = extractelement <2 x i64> %1, i32 0
+ %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ ret i64 %2
+}
+
+define <8 x i16> @stack_fold_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_mpsadbw
+ ;CHECK: vmpsadbw $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+define <16 x i8> @stack_fold_pabsb(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pabsb
+ ;CHECK: vpabsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
+
+define <4 x i32> @stack_fold_pabsd(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pabsd
+ ;CHECK: vpabsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_pabsw(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pabsw
+ ;CHECK: vpabsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
+
+define <8 x i16> @stack_fold_packssdw(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_packssdw
+ ;CHECK: vpackssdw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <16 x i8> @stack_fold_packsswb(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_packsswb
+ ;CHECK: vpacksswb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @stack_fold_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_packusdw
+ ;CHECK: vpackusdw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <16 x i8> @stack_fold_packuswb(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_packuswb
+ ;CHECK: vpackuswb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_paddb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_paddb
+ ;CHECK: vpaddb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = add <16 x i8> %a0, %a1
+ ret <16 x i8> %2
+}
+
+define <4 x i32> @stack_fold_paddd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_paddd
+ ;CHECK: vpaddd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = add <4 x i32> %a0, %a1
+ ret <4 x i32> %2
+}
+
+define <2 x i64> @stack_fold_paddq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_paddq
+ ;CHECK: vpaddq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = add <2 x i64> %a0, %a1
+ ret <2 x i64> %2
+}
+
+define <16 x i8> @stack_fold_paddsb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_paddsb
+ ;CHECK: vpaddsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @stack_fold_paddsw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_paddsw
+ ;CHECK: vpaddsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_paddusb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_paddusb
+ ;CHECK: vpaddusb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @stack_fold_paddusw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_paddusw
+ ;CHECK: vpaddusw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @stack_fold_paddw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_paddw
+ ;CHECK: vpaddw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = add <8 x i16> %a0, %a1
+ ret <8 x i16> %2
+}
+
+define <16 x i8> @stack_fold_palignr(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_palignr
+ ;CHECK: vpalignr $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <16 x i8> %a1, <16 x i8> %a0, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
+ ret <16 x i8> %2
+}
+
+define <16 x i8> @stack_fold_pand(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pand
+ ;CHECK: vpand {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = and <16 x i8> %a0, %a1
+ ; add forces execution domain
+ %3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %3
+}
+
+define <16 x i8> @stack_fold_pandn(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pandn
+ ;CHECK: vpandn {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = xor <16 x i8> %a0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %3 = and <16 x i8> %2, %a1
+ ; add forces execution domain
+ %4 = add <16 x i8> %3, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %4
+}
+
+define <16 x i8> @stack_fold_pavgb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pavgb
+ ;CHECK: vpavgb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @stack_fold_pavgw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pavgw
+ ;CHECK: vpavgw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %c) {
+ ;CHECK-LABEL: stack_fold_pblendvb
+ ;CHECK: vpblendvb {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a1, <16 x i8> %c, <16 x i8> %a0)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @stack_fold_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pblendw
+ ;CHECK: vpblendw $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
+
+define <2 x i64> @stack_fold_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_pclmulqdq
+ ;CHECK: vpclmulqdq $0, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
+
+define <16 x i8> @stack_fold_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpeqb
+ ;CHECK: vpcmpeqb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp eq <16 x i8> %a0, %a1
+ %3 = sext <16 x i1> %2 to <16 x i8>
+ ret <16 x i8> %3
+}
+
+define <4 x i32> @stack_fold_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpeqd
+ ;CHECK: vpcmpeqd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp eq <4 x i32> %a0, %a1
+ %3 = sext <4 x i1> %2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <2 x i64> @stack_fold_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpeqq
+ ;CHECK: vpcmpeqq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp eq <2 x i64> %a0, %a1
+ %3 = sext <2 x i1> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <8 x i16> @stack_fold_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpeqw
+ ;CHECK: vpcmpeqw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp eq <8 x i16> %a0, %a1
+ %3 = sext <8 x i1> %2 to <8 x i16>
+ ret <8 x i16> %3
+}
+
+define i32 @stack_fold_pcmpestri(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpestri
+ ;CHECK: vpcmpestri $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{rax},~{flags}"()
+ %2 = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+define <16 x i8> @stack_fold_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpestrm
+ ;CHECK: vpcmpestrm $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{rax},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+define <16 x i8> @stack_fold_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpgtb
+ ;CHECK: vpcmpgtb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp sgt <16 x i8> %a0, %a1
+ %3 = sext <16 x i1> %2 to <16 x i8>
+ ret <16 x i8> %3
+}
+
+define <4 x i32> @stack_fold_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpgtd
+ ;CHECK: vpcmpgtd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp sgt <4 x i32> %a0, %a1
+ %3 = sext <4 x i1> %2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <2 x i64> @stack_fold_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpgtq
+ ;CHECK: vpcmpgtq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp sgt <2 x i64> %a0, %a1
+ %3 = sext <2 x i1> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <8 x i16> @stack_fold_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpgtw
+ ;CHECK: vpcmpgtw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp sgt <8 x i16> %a0, %a1
+ %3 = sext <8 x i1> %2 to <8 x i16>
+ ret <8 x i16> %3
+}
+
+define i32 @stack_fold_pcmpistri(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpistri
+ ;CHECK: vpcmpistri $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+define <16 x i8> @stack_fold_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpistrm
+ ;CHECK: vpcmpistrm $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+; TODO stack_fold_pextrb
+
+define i32 @stack_fold_pextrd(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pextrd
+ ;CHECK: pextrd $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
+ ;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload
+ %1 = extractelement <4 x i32> %a0, i32 1
+ %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ ret i32 %1
+}
+
+define i64 @stack_fold_pextrq(<2 x i64> %a0) {
+ ;CHECK-LABEL: stack_fold_pextrq
+ ;CHECK: pextrq $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 8-byte Folded Spill
+ ;CHECK: movq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Reload
+ %1 = extractelement <2 x i64> %a0, i32 1
+ %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ ret i64 %1
+}
+
+; TODO stack_fold_pextrw
+
+define <4 x i32> @stack_fold_phaddd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_phaddd
+ ;CHECK: vphaddd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_phaddsw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_phaddsw
+ ;CHECK: vphaddsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @stack_fold_phaddw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_phaddw
+ ;CHECK: vphaddw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @stack_fold_phminposuw(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_phminposuw
+ ;CHECK: vphminposuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
+
+define <4 x i32> @stack_fold_phsubd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_phsubd
+ ;CHECK: vphsubd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_phsubsw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_phsubsw
+ ;CHECK: vphsubsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @stack_fold_phsubw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_phsubw
+ ;CHECK: vphsubw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_pinsrb(<16 x i8> %a0, i8 %a1) {
+ ;CHECK-LABEL: stack_fold_pinsrb
+ ;CHECK: vpinsrb $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = insertelement <16 x i8> %a0, i8 %a1, i32 1
+ ret <16 x i8> %2
+}
+
+define <4 x i32> @stack_fold_pinsrd(<4 x i32> %a0, i32 %a1) {
+ ;CHECK-LABEL: stack_fold_pinsrd
+ ;CHECK: vpinsrd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = insertelement <4 x i32> %a0, i32 %a1, i32 1
+ ret <4 x i32> %2
+}
+
+define <2 x i64> @stack_fold_pinsrq(<2 x i64> %a0, i64 %a1) {
+ ;CHECK-LABEL: stack_fold_pinsrq
+ ;CHECK: vpinsrq $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = insertelement <2 x i64> %a0, i64 %a1, i32 1
+ ret <2 x i64> %2
+}
+
+define <8 x i16> @stack_fold_pinsrw(<8 x i16> %a0, i16 %a1) {
+ ;CHECK-LABEL: stack_fold_pinsrw
+ ;CHECK: vpinsrw $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = insertelement <8 x i16> %a0, i16 %a1, i32 1
+ ret <8 x i16> %2
+}
+
+define <8 x i16> @stack_fold_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaddubsw
+ ;CHECK: vpmaddubsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @stack_fold_pmaddwd(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaddwd
+ ;CHECK: vpmaddwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaxsb
+ ;CHECK: vpmaxsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @stack_fold_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaxsd
+ ;CHECK: vpmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_pmaxsw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaxsw
+ ;CHECK: vpmaxsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_pmaxub(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaxub
+ ;CHECK: vpmaxub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @stack_fold_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaxud
+ ;CHECK: vpmaxud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaxuw
+ ;CHECK: vpmaxuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pminsb
+ ;CHECK: vpminsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @stack_fold_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pminsd
+ ;CHECK: vpminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_pminsw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pminsw
+ ;CHECK: vpminsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_pminub(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pminub
+ ;CHECK: vpminub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @stack_fold_pminud(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pminud
+ ;CHECK: vpminud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pminuw
+ ;CHECK: vpminuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxbd
+ ;CHECK: vpmovsxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+
+define <2 x i64> @stack_fold_pmovsxbq(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxbq
+ ;CHECK: pmovsxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
+
+define <8 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxbw
+ ;CHECK: vpmovsxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @stack_fold_pmovsxdq(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxdq
+ ;CHECK: vpmovsxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @stack_fold_pmovsxwd(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxwd
+ ;CHECK: vpmovsxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+
+define <2 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxwq
+ ;CHECK: vpmovsxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
+
+define <4 x i32> @stack_fold_pmovzxbd(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxbd
+ ;CHECK: vpmovzxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 1, i32 19, i32 20, i32 21, i32 2, i32 22, i32 23, i32 24, i32 3, i32 25, i32 26, i32 27>
+ %3 = bitcast <16 x i8> %2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <2 x i64> @stack_fold_pmovzxbq(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxbq
+ ;CHECK: vpmovzxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 1, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+ %3 = bitcast <16 x i8> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <8 x i16> @stack_fold_pmovzxbw(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxbw
+ ;CHECK: vpmovzxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+ %3 = bitcast <16 x i8> %2 to <8 x i16>
+ ret <8 x i16> %3
+}
+
+define <2 x i64> @stack_fold_pmovzxdq(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxdq
+ ;CHECK: vpmovzxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ %3 = bitcast <4 x i32> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <4 x i32> @stack_fold_pmovzxwd(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxwd
+ ;CHECK: vpmovzxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ %3 = bitcast <8 x i16> %2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <2 x i64> @stack_fold_pmovzxwq(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxwq
+ ;CHECK: vpmovzxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 1, i32 11, i32 12, i32 13>
+ %3 = bitcast <8 x i16> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <2 x i64> @stack_fold_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pmuldq
+ ;CHECK: vpmuldq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmulhrsw
+ ;CHECK: vpmulhrsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @stack_fold_pmulhuw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmulhuw
+ ;CHECK: vpmulhuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @stack_fold_pmulhw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmulhw
+ ;CHECK: vpmulhw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @stack_fold_pmulld(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pmulld
+ ;CHECK: vpmulld {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = mul <4 x i32> %a0, %a1
+ ret <4 x i32> %2
+}
+
+define <8 x i16> @stack_fold_pmullw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmullw
+ ;CHECK: vpmullw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = mul <8 x i16> %a0, %a1
+ ret <8 x i16> %2
+}
+
+define <2 x i64> @stack_fold_pmuludq(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pmuludq
+ ;CHECK: vpmuludq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <16 x i8> @stack_fold_por(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_por
+ ;CHECK: vpor {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = or <16 x i8> %a0, %a1
+ ; add forces execution domain
+ %3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %3
+}
+
+define <2 x i64> @stack_fold_psadbw(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_psadbw
+ ;CHECK: vpsadbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <16 x i8> @stack_fold_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pshufb
+ ;CHECK: vpshufb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @stack_fold_pshufd(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pshufd
+ ;CHECK: vpshufd $27, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ ret <4 x i32> %2
+}
+
+define <8 x i16> @stack_fold_pshufhw(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pshufhw
+ ;CHECK: vpshufhw $11, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 4, i32 4>
+ ret <8 x i16> %2
+}
+
+define <8 x i16> @stack_fold_pshuflw(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pshuflw
+ ;CHECK: vpshuflw $27, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %2
+}
+
+define <16 x i8> @stack_fold_psignb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_psignb
+ ;CHECK: vpsignb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @stack_fold_psignd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_psignd
+ ;CHECK: vpsignd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_psignw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psignw
+ ;CHECK: vpsignw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @stack_fold_pslld(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pslld
+ ;CHECK: vpslld {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @stack_fold_psllq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_psllq
+ ;CHECK: vpsllq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @stack_fold_psllw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psllw
+ ;CHECK: vpsllw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @stack_fold_psrad(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_psrad
+ ;CHECK: vpsrad {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_psraw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psraw
+ ;CHECK: vpsraw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @stack_fold_psrld(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_psrld
+ ;CHECK: vpsrld {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @stack_fold_psrlq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_psrlq
+ ;CHECK: vpsrlq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @stack_fold_psrlw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psrlw
+ ;CHECK: vpsrlw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_psubb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_psubb
+ ;CHECK: vpsubb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = sub <16 x i8> %a0, %a1
+ ret <16 x i8> %2
+}
+
+define <4 x i32> @stack_fold_psubd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_psubd
+ ;CHECK: vpsubd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = sub <4 x i32> %a0, %a1
+ ret <4 x i32> %2
+}
+
+define <2 x i64> @stack_fold_psubq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_psubq
+ ;CHECK: vpsubq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = sub <2 x i64> %a0, %a1
+ ret <2 x i64> %2
+}
+
+define <16 x i8> @stack_fold_psubsb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_psubsb
+ ;CHECK: vpsubsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @stack_fold_psubsw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psubsw
+ ;CHECK: vpsubsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_psubusb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_psubusb
+ ;CHECK: vpsubusb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @stack_fold_psubusw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psubusw
+ ;CHECK: vpsubusw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @stack_fold_psubw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psubw
+ ;CHECK: vpsubw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = sub <8 x i16> %a0, %a1
+ ret <8 x i16> %2
+}
+
+define i32 @stack_fold_ptest(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_ptest
+ ;CHECK: vptest {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
+
+define i32 @stack_fold_ptest_ymm(<4 x i64> %a0, <4 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_ptest_ymm
+ ;CHECK: vptest {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1)
+ ret i32 %2
+}
+declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+define <16 x i8> @stack_fold_punpckhbw(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_punpckhbw
+ ;CHECK: vpunpckhbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+ ret <16 x i8> %2
+}
+
+define <4 x i32> @stack_fold_punpckhdq(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_punpckhdq
+ ;CHECK: vpunpckhdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ ; add forces execution domain
+ %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %3
+}
+
+define <2 x i64> @stack_fold_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_punpckhqdq
+ ;CHECK: vpunpckhqdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
+ ; add forces execution domain
+ %3 = add <2 x i64> %2, <i64 1, i64 1>
+ ret <2 x i64> %3
+}
+
+define <8 x i16> @stack_fold_punpckhwd(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_punpckhwd
+ ;CHECK: vpunpckhwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+ ret <8 x i16> %2
+}
+
+define <16 x i8> @stack_fold_punpcklbw(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_punpcklbw
+ ;CHECK: vpunpcklbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+ ret <16 x i8> %2
+}
+
+define <4 x i32> @stack_fold_punpckldq(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_punpckldq
+ ;CHECK: vpunpckldq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ ; add forces execution domain
+ %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %3
+}
+
+define <2 x i64> @stack_fold_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_punpcklqdq
+ ;CHECK: vpunpcklqdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
+ ; add forces execution domain
+ %3 = add <2 x i64> %2, <i64 1, i64 1>
+ ret <2 x i64> %3
+}
+
+define <8 x i16> @stack_fold_punpcklwd(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_punpcklwd
+ ;CHECK: vpunpcklwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ ret <8 x i16> %2
+}
+
+define <16 x i8> @stack_fold_pxor(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pxor
+ ;CHECK: vpxor {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = xor <16 x i8> %a0, %a1
+ ; add forces execution domain
+ %3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %3
+}
diff --git a/test/CodeGen/X86/stack-folding-int-avx2.ll b/test/CodeGen/X86/stack-folding-int-avx2.ll
new file mode 100644
index 000000000000..e930d244638a
--- /dev/null
+++ b/test/CodeGen/X86/stack-folding-int-avx2.ll
@@ -0,0 +1,1211 @@
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; Stack reload folding tests.
+;
+; By including a nop call with sideeffects we can force a partial register spill of the
+; relevant registers and check that the reload is correctly folded into the instruction.
+
+define <4 x double> @stack_fold_broadcastsd_ymm(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_broadcastsd_ymm
+ ;CHECK: vbroadcastsd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0)
+ ; fadd forces execution domain
+ %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+ ret <4 x double> %3
+}
+declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly
+
+define <4 x float> @stack_fold_broadcastss(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_broadcastss
+ ;CHECK: vbroadcastss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0)
+ ; fadd forces execution domain
+ %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <4 x float> %3
+}
+declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly
+
+define <8 x float> @stack_fold_broadcastss_ymm(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_broadcastss_ymm
+ ;CHECK: vbroadcastss {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0)
+ ; fadd forces execution domain
+ %3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
+ ret <8 x float> %3
+}
+declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly
+
+define <4 x i32> @stack_fold_extracti128(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_extracti128
+ ;CHECK: vextracti128 $1, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
+ ; add forces execution domain
+ %1 = add <8 x i32> %a0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %2 = shufflevector <8 x i32> %1, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ ret <4 x i32> %2
+}
+
+define <8 x i32> @stack_fold_inserti128(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_inserti128
+ ;CHECK: vinserti128 $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ; add forces execution domain
+ %3 = add <8 x i32> %2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <8 x i32> %3
+}
+
+define <16 x i16> @stack_fold_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_mpsadbw
+ ;CHECK: vmpsadbw $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
+
+define <32 x i8> @stack_fold_pabsb(<32 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pabsb
+ ;CHECK: vpabsb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0)
+ ret <32 x i8> %2
+}
+declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
+
+define <8 x i32> @stack_fold_pabsd(<8 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pabsd
+ ;CHECK: vpabsd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
+
+define <16 x i16> @stack_fold_pabsw(<16 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pabsw
+ ;CHECK: vpabsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
+
+define <16 x i16> @stack_fold_packssdw(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_packssdw
+ ;CHECK: vpackssdw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
+
+define <32 x i8> @stack_fold_packsswb(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_packsswb
+ ;CHECK: vpacksswb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1)
+ ret <32 x i8> %2
+}
+declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <16 x i16> @stack_fold_packusdw(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_packusdw
+ ;CHECK: vpackusdw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
+
+define <32 x i8> @stack_fold_packuswb(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_packuswb
+ ;CHECK: vpackuswb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1)
+ ret <32 x i8> %2
+}
+declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <32 x i8> @stack_fold_paddb(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_paddb
+ ;CHECK: vpaddb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = add <32 x i8> %a0, %a1
+ ret <32 x i8> %2
+}
+
+define <8 x i32> @stack_fold_paddd(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_paddd
+ ;CHECK: vpaddd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = add <8 x i32> %a0, %a1
+ ret <8 x i32> %2
+}
+
+define <4 x i64> @stack_fold_paddq(<4 x i64> %a0, <4 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_paddq
+ ;CHECK: vpaddq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = add <4 x i64> %a0, %a1
+ ret <4 x i64> %2
+}
+
+define <32 x i8> @stack_fold_paddsb(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_paddsb
+ ;CHECK: vpaddsb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1)
+ ret <32 x i8> %2
+}
+declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+define <16 x i16> @stack_fold_paddsw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_paddsw
+ ;CHECK: vpaddsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <32 x i8> @stack_fold_paddusb(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_paddusb
+ ;CHECK: vpaddusb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1)
+ ret <32 x i8> %2
+}
+declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+define <16 x i16> @stack_fold_paddusw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_paddusw
+ ;CHECK: vpaddusw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <16 x i16> @stack_fold_paddw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_paddw
+ ;CHECK: vpaddw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = add <16 x i16> %a0, %a1
+ ret <16 x i16> %2
+}
+
+define <32 x i8> @stack_fold_palignr(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_palignr
+ ;CHECK: vpalignr $1, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <32 x i8> %a1, <32 x i8> %a0, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
+ ret <32 x i8> %2
+}
+
+define <32 x i8> @stack_fold_pand(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pand
+ ;CHECK: vpand {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = and <32 x i8> %a0, %a1
+ ; add forces execution domain
+ %3 = add <32 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <32 x i8> %3
+}
+
+define <32 x i8> @stack_fold_pandn(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pandn
+ ;CHECK: vpandn {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = xor <32 x i8> %a0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %3 = and <32 x i8> %2, %a1
+ ; add forces execution domain
+ %4 = add <32 x i8> %3, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <32 x i8> %4
+}
+
+define <32 x i8> @stack_fold_pavgb(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pavgb
+ ;CHECK: vpavgb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1)
+ ret <32 x i8> %2
+}
+declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+define <16 x i16> @stack_fold_pavgw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pavgw
+ ;CHECK: vpavgw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <4 x i32> @stack_fold_pblendd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pblendd
+ ;CHECK: vpblendd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
+ ret <4 x i32> %2
+}
+
+define <8 x i32> @stack_fold_pblendd_ymm(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pblendd_ymm
+ ;CHECK: vpblendd $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i32> %2
+}
+
+define <32 x i8> @stack_fold_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %c) {
+ ;CHECK-LABEL: stack_fold_pblendvb
+ ;CHECK: vpblendvb {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a1, <32 x i8> %c, <32 x i8> %a0)
+ ret <32 x i8> %2
+}
+declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
+
+define <16 x i16> @stack_fold_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pblendw
+ ;CHECK: vpblendw $7, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i8 7)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone
+
+define <16 x i8> @stack_fold_pbroadcastb(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pbroadcastb
+ ;CHECK: vpbroadcastb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly
+
+define <32 x i8> @stack_fold_pbroadcastb_ymm(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pbroadcastb_ymm
+ ;CHECK: vpbroadcastb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0)
+ ret <32 x i8> %2
+}
+declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly
+
+define <4 x i32> @stack_fold_pbroadcastd(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pbroadcastd
+ ;CHECK: vpbroadcastd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0)
+ ; add forces execution domain
+ %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %3
+}
+declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly
+
+define <8 x i32> @stack_fold_pbroadcastd_ymm(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pbroadcastd_ymm
+ ;CHECK: vpbroadcastd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0)
+ ; add forces execution domain
+ %3 = add <8 x i32> %2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <8 x i32> %3
+}
+declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly
+
+define <2 x i64> @stack_fold_pbroadcastq(<2 x i64> %a0) {
+ ;CHECK-LABEL: stack_fold_pbroadcastq
+ ;CHECK: vpbroadcastq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0)
+ ; add forces execution domain
+ %3 = add <2 x i64> %2, <i64 1, i64 1>
+ ret <2 x i64> %3
+}
+declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly
+
+define <4 x i64> @stack_fold_pbroadcastq_ymm(<2 x i64> %a0) {
+ ;CHECK-LABEL: stack_fold_pbroadcastq_ymm
+ ;CHECK: vpbroadcastq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0)
+ ; add forces execution domain
+ %3 = add <4 x i64> %2, <i64 1, i64 1, i64 1, i64 1>
+ ret <4 x i64> %3
+}
+declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly
+
+define <8 x i16> @stack_fold_pbroadcastw(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pbroadcastw
+ ;CHECK: vpbroadcastw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly
+
+define <16 x i16> @stack_fold_pbroadcastw_ymm(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pbroadcastw_ymm
+ ;CHECK: vpbroadcastw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly
+
+define <32 x i8> @stack_fold_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpeqb
+ ;CHECK: vpcmpeqb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp eq <32 x i8> %a0, %a1
+ %3 = sext <32 x i1> %2 to <32 x i8>
+ ret <32 x i8> %3
+}
+
+define <8 x i32> @stack_fold_pcmpeqd(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpeqd
+ ;CHECK: vpcmpeqd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp eq <8 x i32> %a0, %a1
+ %3 = sext <8 x i1> %2 to <8 x i32>
+ ret <8 x i32> %3
+}
+
+define <4 x i64> @stack_fold_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpeqq
+ ;CHECK: vpcmpeqq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp eq <4 x i64> %a0, %a1
+ %3 = sext <4 x i1> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
+define <16 x i16> @stack_fold_pcmpeqw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpeqw
+ ;CHECK: vpcmpeqw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp eq <16 x i16> %a0, %a1
+ %3 = sext <16 x i1> %2 to <16 x i16>
+ ret <16 x i16> %3
+}
+
+define <32 x i8> @stack_fold_pcmpgtb(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpgtb
+ ;CHECK: vpcmpgtb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp sgt <32 x i8> %a0, %a1
+ %3 = sext <32 x i1> %2 to <32 x i8>
+ ret <32 x i8> %3
+}
+
+define <8 x i32> @stack_fold_pcmpgtd(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpgtd
+ ;CHECK: vpcmpgtd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp sgt <8 x i32> %a0, %a1
+ %3 = sext <8 x i1> %2 to <8 x i32>
+ ret <8 x i32> %3
+}
+
+define <4 x i64> @stack_fold_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpgtq
+ ;CHECK: vpcmpgtq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp sgt <4 x i64> %a0, %a1
+ %3 = sext <4 x i1> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
+define <16 x i16> @stack_fold_pcmpgtw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpgtw
+ ;CHECK: vpcmpgtw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp sgt <16 x i16> %a0, %a1
+ %3 = sext <16 x i1> %2 to <16 x i16>
+ ret <16 x i16> %3
+}
+
+define <8 x i32> @stack_fold_perm2i128(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_perm2i128
+ ;CHECK: vperm2i128 $33, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+ ; add forces execution domain
+ %3 = add <8 x i32> %2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <8 x i32> %3
+}
+
+define <8 x i32> @stack_fold_permd(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_permd
+ ;CHECK: vpermd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a1, <8 x i32> %a0)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
+
+define <4 x double> @stack_fold_permpd(<4 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_permpd
+ ;CHECK: vpermpd $255, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ ; fadd forces execution domain
+ %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+ ret <4 x double> %3
+}
+
+define <8 x float> @stack_fold_permps(<8 x float> %a0, <8 x float> %a1) {
+ ;CHECK-LABEL: stack_fold_permps
+ ;CHECK: vpermps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x float> %a0)
+ ret <8 x float> %2
+}
+declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly
+
+define <4 x i64> @stack_fold_permq(<4 x i64> %a0) {
+ ;CHECK-LABEL: stack_fold_permq
+ ;CHECK: vpermq $255, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+ ; add forces execution domain
+ %3 = add <4 x i64> %2, <i64 1, i64 1, i64 1, i64 1>
+ ret <4 x i64> %3
+}
+
+define <8 x i32> @stack_fold_phaddd(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_phaddd
+ ;CHECK: vphaddd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+define <16 x i16> @stack_fold_phaddsw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_phaddsw
+ ;CHECK: vphaddsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <16 x i16> @stack_fold_phaddw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_phaddw
+ ;CHECK: vphaddw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <8 x i32> @stack_fold_phsubd(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_phsubd
+ ;CHECK: vphsubd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+define <16 x i16> @stack_fold_phsubsw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_phsubsw
+ ;CHECK: vphsubsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <16 x i16> @stack_fold_phsubw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_phsubw
+ ;CHECK: vphsubw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <16 x i16> @stack_fold_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaddubsw
+ ;CHECK: vpmaddubsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
+
+define <8 x i32> @stack_fold_pmaddwd(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaddwd
+ ;CHECK: vpmaddwd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <32 x i8> @stack_fold_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaxsb
+ ;CHECK: vpmaxsb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1)
+ ret <32 x i8> %2
+}
+declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+define <8 x i32> @stack_fold_pmaxsd(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaxsd
+ ;CHECK: vpmaxsd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+define <16 x i16> @stack_fold_pmaxsw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaxsw
+ ;CHECK: vpmaxsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <32 x i8> @stack_fold_pmaxub(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaxub
+ ;CHECK: vpmaxub {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1)
+ ret <32 x i8> %2
+}
+declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+define <8 x i32> @stack_fold_pmaxud(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaxud
+ ;CHECK: vpmaxud {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+define <16 x i16> @stack_fold_pmaxuw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaxuw
+ ;CHECK: vpmaxuw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <32 x i8> @stack_fold_pminsb(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pminsb
+ ;CHECK: vpminsb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1)
+ ret <32 x i8> %2
+}
+declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+define <8 x i32> @stack_fold_pminsd(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pminsd
+ ;CHECK: vpminsd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+define <16 x i16> @stack_fold_pminsw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pminsw
+ ;CHECK: vpminsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <32 x i8> @stack_fold_pminub(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pminub
+ ;CHECK: vpminub {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1)
+ ret <32 x i8> %2
+}
+declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+define <8 x i32> @stack_fold_pminud(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pminud
+ ;CHECK: vpminud {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+define <16 x i16> @stack_fold_pminuw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pminuw
+ ;CHECK: vpminuw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <8 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxbd
+ ;CHECK: vpmovsxbd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
+
+define <4 x i64> @stack_fold_pmovsxbq(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxbq
+ ;CHECK: pmovsxbq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0)
+ ret <4 x i64> %2
+}
+declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
+
+define <16 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxbw
+ ;CHECK: vpmovsxbw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
+
+define <4 x i64> @stack_fold_pmovsxdq(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxdq
+ ;CHECK: vpmovsxdq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0)
+ ret <4 x i64> %2
+}
+declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
+
+define <8 x i32> @stack_fold_pmovsxwd(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxwd
+ ;CHECK: vpmovsxwd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
+
+define <4 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxwq
+ ;CHECK: vpmovsxwq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0)
+ ret <4 x i64> %2
+}
+declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
+
+define <8 x i32> @stack_fold_pmovzxbd(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxbd
+ ;CHECK: vpmovzxbd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
+
+define <4 x i64> @stack_fold_pmovzxbq(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxbq
+ ;CHECK: vpmovzxbq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0)
+ ret <4 x i64> %2
+}
+declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
+
+define <16 x i16> @stack_fold_pmovzxbw(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxbw
+ ;CHECK: vpmovzxbw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
+
+define <4 x i64> @stack_fold_pmovzxdq(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxdq
+ ;CHECK: vpmovzxdq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0)
+ ret <4 x i64> %2
+}
+declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
+
+define <8 x i32> @stack_fold_pmovzxwd(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxwd
+ ;CHECK: vpmovzxwd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
+
+define <4 x i64> @stack_fold_pmovzxwq(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxwq
+ ;CHECK: vpmovzxwq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0)
+ ret <4 x i64> %2
+}
+declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
+
+define <4 x i64> @stack_fold_pmuldq(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pmuldq
+ ;CHECK: vpmuldq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1)
+ ret <4 x i64> %2
+}
+declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
+
+define <16 x i16> @stack_fold_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmulhrsw
+ ;CHECK: vpmulhrsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <16 x i16> @stack_fold_pmulhuw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmulhuw
+ ;CHECK: vpmulhuw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <16 x i16> @stack_fold_pmulhw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmulhw
+ ;CHECK: vpmulhw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <8 x i32> @stack_fold_pmulld(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pmulld
+ ;CHECK: vpmulld {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = mul <8 x i32> %a0, %a1
+ ret <8 x i32> %2
+}
+
+define <16 x i16> @stack_fold_pmullw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmullw
+ ;CHECK: vpmullw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = mul <16 x i16> %a0, %a1
+ ret <16 x i16> %2
+}
+
+define <4 x i64> @stack_fold_pmuludq(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pmuludq
+ ;CHECK: vpmuludq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1)
+ ret <4 x i64> %2
+}
+declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
+
+define <32 x i8> @stack_fold_por(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_por
+ ;CHECK: vpor {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = or <32 x i8> %a0, %a1
+ ; add forces execution domain
+ %3 = add <32 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <32 x i8> %3
+}
+
+define <4 x i64> @stack_fold_psadbw(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_psadbw
+ ;CHECK: vpsadbw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1)
+ ret <4 x i64> %2
+}
+declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
+
+define <32 x i8> @stack_fold_pshufb(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pshufb
+ ;CHECK: vpshufb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1)
+ ret <32 x i8> %2
+}
+declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+define <8 x i32> @stack_fold_pshufd(<8 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pshufd
+ ;CHECK: vpshufd $27, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+ ret <8 x i32> %2
+}
+
+; TODO stack_fold_pshufhw
+
+; TODO stack_fold_pshuflw
+
+define <32 x i8> @stack_fold_psignb(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_psignb
+ ;CHECK: vpsignb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1)
+ ret <32 x i8> %2
+}
+declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+define <8 x i32> @stack_fold_psignd(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_psignd
+ ;CHECK: vpsignd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+define <16 x i16> @stack_fold_psignw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psignw
+ ;CHECK: vpsignw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <8 x i32> @stack_fold_pslld(<8 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pslld
+ ;CHECK: vpslld {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i64> @stack_fold_psllq(<4 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_psllq
+ ;CHECK: vpsllq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1)
+ ret <4 x i64> %2
+}
+declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone
+
+define <4 x i32> @stack_fold_psllvd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_psllvd
+ ;CHECK: vpsllvd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i32> @stack_fold_psllvd_ymm(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_psllvd_ymm
+ ;CHECK: vpsllvd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
+
+define <2 x i64> @stack_fold_psllvq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_psllvq
+ ;CHECK: vpsllvq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <4 x i64> @stack_fold_psllvq_ymm(<4 x i64> %a0, <4 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_psllvq_ymm
+ ;CHECK: vpsllvq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1)
+ ret <4 x i64> %2
+}
+declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+define <16 x i16> @stack_fold_psllw(<16 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psllw
+ ;CHECK: vpsllw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i32> @stack_fold_psrad(<8 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_psrad
+ ;CHECK: vpsrad {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @stack_fold_psravd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_psravd
+ ;CHECK: vpsravd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i32> @stack_fold_psravd_ymm(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_psravd_ymm
+ ;CHECK: vpsravd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
+
+define <16 x i16> @stack_fold_psraw(<16 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psraw
+ ;CHECK: vpsraw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i32> @stack_fold_psrld(<8 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_psrld
+ ;CHECK: vpsrld {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i64> @stack_fold_psrlq(<4 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_psrlq
+ ;CHECK: vpsrlq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1)
+ ret <4 x i64> %2
+}
+declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone
+
+define <4 x i32> @stack_fold_psrlvd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_psrlvd
+ ;CHECK: vpsrlvd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i32> @stack_fold_psrlvd_ymm(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_psrlvd_ymm
+ ;CHECK: vpsrlvd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1)
+ ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
+
+define <2 x i64> @stack_fold_psrlvq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_psrlvq
+ ;CHECK: vpsrlvq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <4 x i64> @stack_fold_psrlvq_ymm(<4 x i64> %a0, <4 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_psrlvq_ymm
+ ;CHECK: vpsrlvq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1)
+ ret <4 x i64> %2
+}
+declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+define <16 x i16> @stack_fold_psrlw(<16 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psrlw
+ ;CHECK: vpsrlw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone
+
+define <32 x i8> @stack_fold_psubb(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_psubb
+ ;CHECK: vpsubb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = sub <32 x i8> %a0, %a1
+ ret <32 x i8> %2
+}
+
+define <8 x i32> @stack_fold_psubd(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_psubd
+ ;CHECK: vpsubd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = sub <8 x i32> %a0, %a1
+ ret <8 x i32> %2
+}
+
+define <4 x i64> @stack_fold_psubq(<4 x i64> %a0, <4 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_psubq
+ ;CHECK: vpsubq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = sub <4 x i64> %a0, %a1
+ ret <4 x i64> %2
+}
+
+define <32 x i8> @stack_fold_psubsb(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_psubsb
+ ;CHECK: vpsubsb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1)
+ ret <32 x i8> %2
+}
+declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+define <16 x i16> @stack_fold_psubsw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psubsw
+ ;CHECK: vpsubsw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <32 x i8> @stack_fold_psubusb(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_psubusb
+ ;CHECK: vpsubusb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1)
+ ret <32 x i8> %2
+}
+declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+define <16 x i16> @stack_fold_psubusw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psubusw
+ ;CHECK: vpsubusw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1)
+ ret <16 x i16> %2
+}
+declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+define <16 x i16> @stack_fold_psubw(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psubw
+ ;CHECK: vpsubw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = sub <16 x i16> %a0, %a1
+ ret <16 x i16> %2
+}
+
+define <32 x i8> @stack_fold_punpckhbw(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_punpckhbw
+ ;CHECK: vpunpckhbw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+ ret <32 x i8> %2
+}
+
+define <8 x i32> @stack_fold_punpckhdq(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_punpckhdq
+ ;CHECK: vpunpckhdq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+ ; add forces execution domain
+ %3 = add <8 x i32> %2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <8 x i32> %3
+}
+
+define <4 x i64> @stack_fold_punpckhqdq(<4 x i64> %a0, <4 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_punpckhqdq
+ ;CHECK: vpunpckhqdq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+ ; add forces execution domain
+ %3 = add <4 x i64> %2, <i64 1, i64 1, i64 1, i64 1>
+ ret <4 x i64> %3
+}
+
+define <16 x i16> @stack_fold_punpckhwd(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_punpckhwd
+ ;CHECK: vpunpckhwd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+ ret <16 x i16> %2
+}
+
+define <32 x i8> @stack_fold_punpcklbw(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_punpcklbw
+ ;CHECK: vpunpcklbw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
+ ret <32 x i8> %2
+}
+
+define <8 x i32> @stack_fold_punpckldq(<8 x i32> %a0, <8 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_punpckldq
+ ;CHECK: vpunpckldq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+ ; add forces execution domain
+ %3 = add <8 x i32> %2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ ret <8 x i32> %3
+}
+
+define <4 x i64> @stack_fold_punpcklqdq(<4 x i64> %a0, <4 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_punpcklqdq
+ ;CHECK: vpunpcklqdq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+ ; add forces execution domain
+ %3 = add <4 x i64> %2, <i64 1, i64 1, i64 1, i64 1>
+ ret <4 x i64> %3
+}
+
+define <16 x i16> @stack_fold_punpcklwd(<16 x i16> %a0, <16 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_punpcklwd
+ ;CHECK: vpunpcklwd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
+ ret <16 x i16> %2
+}
+
+define <32 x i8> @stack_fold_pxor(<32 x i8> %a0, <32 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pxor
+ ;CHECK: vpxor {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = xor <32 x i8> %a0, %a1
+ ; add forces execution domain
+ %3 = add <32 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <32 x i8> %3
+}
diff --git a/test/CodeGen/X86/stack-folding-int-sse42.ll b/test/CodeGen/X86/stack-folding-int-sse42.ll
new file mode 100644
index 000000000000..e814ae6df501
--- /dev/null
+++ b/test/CodeGen/X86/stack-folding-int-sse42.ll
@@ -0,0 +1,1174 @@
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.2,+aes,+pclmul < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; Stack reload folding tests.
+;
+; By including a nop call with sideeffects we can force a partial register spill of the
+; relevant registers and check that the reload is correctly folded into the instruction.
+
+define <2 x i64> @stack_fold_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_aesdec
+ ;CHECK: aesdec {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @stack_fold_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_aesdeclast
+ ;CHECK: aesdeclast {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @stack_fold_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_aesenc
+ ;CHECK: aesenc {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @stack_fold_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_aesenclast
+ ;CHECK: aesenclast {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @stack_fold_aesimc(<2 x i64> %a0) {
+ ;CHECK-LABEL: stack_fold_aesimc
+ ;CHECK: aesimc {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
+
+define <2 x i64> @stack_fold_aeskeygenassist(<2 x i64> %a0) {
+ ;CHECK-LABEL: stack_fold_aeskeygenassist
+ ;CHECK: aeskeygenassist $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
+
+;TODO stack_fold_crc32_32_8
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
+
+;TODO stack_fold_crc32_32_16
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
+
+define i32 @stack_fold_crc32_32_32(i32 %a0, i32 %a1) {
+ ;CHECK-LABEL: stack_fold_crc32_32_32
+ ;CHECK: crc32l {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
+
+;TODO stack_fold_crc32_64_8
+declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
+
+define i64 @stack_fold_crc32_64_64(i64 %a0, i64 %a1) {
+ ;CHECK-LABEL: stack_fold_crc32_64_64
+ ;CHECK: crc32q {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
+ ret i64 %2
+}
+declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
+
+define <4 x i32> @stack_fold_movd_load(i32 %a0) {
+ ;CHECK-LABEL: stack_fold_movd_load
+ ;CHECK: movd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = insertelement <4 x i32> zeroinitializer, i32 %a0, i32 0
+ ; add forces execution domain
+ %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %3
+}
+
+define i32 @stack_fold_movd_store(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_movd_store
+ ;CHECK: movd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
+ ; add forces execution domain
+ %1 = add <4 x i32> %a0, <i32 1, i32 1, i32 1, i32 1>
+ %2 = extractelement <4 x i32> %1, i32 0
+ %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ ret i32 %2
+}
+
+define <2 x i64> @stack_fold_movq_load(<2 x i64> %a0) {
+ ;CHECK-LABEL: stack_fold_movq_load
+ ;CHECK: movq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
+ ; add forces execution domain
+ %3 = add <2 x i64> %2, <i64 1, i64 1>
+ ret <2 x i64> %3
+}
+
+define i64 @stack_fold_movq_store(<2 x i64> %a0) {
+ ;CHECK-LABEL: stack_fold_movq_store
+ ;CHECK: movq {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 8-byte Folded Spill
+ ; add forces execution domain
+ %1 = add <2 x i64> %a0, <i64 1, i64 1>
+ %2 = extractelement <2 x i64> %1, i32 0
+ %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ ret i64 %2
+}
+
+define <8 x i16> @stack_fold_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_mpsadbw
+ ;CHECK: mpsadbw $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+define <16 x i8> @stack_fold_pabsb(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pabsb
+ ;CHECK: pabsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
+
+define <4 x i32> @stack_fold_pabsd(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pabsd
+ ;CHECK: pabsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_pabsw(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pabsw
+ ;CHECK: pabsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
+
+define <8 x i16> @stack_fold_packssdw(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_packssdw
+ ;CHECK: packssdw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <16 x i8> @stack_fold_packsswb(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_packsswb
+ ;CHECK: packsswb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @stack_fold_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_packusdw
+ ;CHECK: packusdw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <16 x i8> @stack_fold_packuswb(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_packuswb
+ ;CHECK: packuswb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_paddb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_paddb
+ ;CHECK: paddb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = add <16 x i8> %a0, %a1
+ ret <16 x i8> %2
+}
+
+define <4 x i32> @stack_fold_paddd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_paddd
+ ;CHECK: paddd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = add <4 x i32> %a0, %a1
+ ret <4 x i32> %2
+}
+
+define <2 x i64> @stack_fold_paddq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_paddq
+ ;CHECK: paddq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = add <2 x i64> %a0, %a1
+ ret <2 x i64> %2
+}
+
+define <16 x i8> @stack_fold_paddsb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_paddsb
+ ;CHECK: paddsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @stack_fold_paddsw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_paddsw
+ ;CHECK: paddsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_paddusb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_paddusb
+ ;CHECK: paddusb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @stack_fold_paddusw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_paddusw
+ ;CHECK: paddusw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @stack_fold_paddw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_paddw
+ ;CHECK: paddw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = add <8 x i16> %a0, %a1
+ ret <8 x i16> %2
+}
+
+define <16 x i8> @stack_fold_palignr(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_palignr
+ ;CHECK: palignr $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <16 x i8> %a1, <16 x i8> %a0, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
+ ret <16 x i8> %2
+}
+
+define <16 x i8> @stack_fold_pand(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pand
+ ;CHECK: pand {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = and <16 x i8> %a0, %a1
+ ; add forces execution domain
+ %3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %3
+}
+
+define <16 x i8> @stack_fold_pandn(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pandn
+ ;CHECK: pandn {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = xor <16 x i8> %a0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %3 = and <16 x i8> %2, %a1
+ ; add forces execution domain
+ %4 = add <16 x i8> %3, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %4
+}
+
+define <16 x i8> @stack_fold_pavgb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pavgb
+ ;CHECK: pavgb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @stack_fold_pavgw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pavgw
+ ;CHECK: pavgw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %c) {
+ ;CHECK-LABEL: stack_fold_pblendvb
+ ;CHECK: pblendvb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a1, <16 x i8> %c, <16 x i8> %a0)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @stack_fold_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pblendw
+ ;CHECK: pblendw $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
+
+define <2 x i64> @stack_fold_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_pclmulqdq
+ ;CHECK: pclmulqdq $0, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
+
+define <16 x i8> @stack_fold_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpeqb
+ ;CHECK: pcmpeqb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp eq <16 x i8> %a0, %a1
+ %3 = sext <16 x i1> %2 to <16 x i8>
+ ret <16 x i8> %3
+}
+
+define <4 x i32> @stack_fold_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpeqd
+ ;CHECK: pcmpeqd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp eq <4 x i32> %a0, %a1
+ %3 = sext <4 x i1> %2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <2 x i64> @stack_fold_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpeqq
+ ;CHECK: pcmpeqq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp eq <2 x i64> %a0, %a1
+ %3 = sext <2 x i1> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <8 x i16> @stack_fold_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpeqw
+ ;CHECK: pcmpeqw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp eq <8 x i16> %a0, %a1
+ %3 = sext <8 x i1> %2 to <8 x i16>
+ ret <8 x i16> %3
+}
+
+define i32 @stack_fold_pcmpestri(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpestri
+ ;CHECK: pcmpestri $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{rax},~{flags}"()
+ %2 = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+define <16 x i8> @stack_fold_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpestrm
+ ;CHECK: pcmpestrm $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{rax},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+define <16 x i8> @stack_fold_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpgtb
+ ;CHECK: pcmpgtb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp sgt <16 x i8> %a0, %a1
+ %3 = sext <16 x i1> %2 to <16 x i8>
+ ret <16 x i8> %3
+}
+
+define <4 x i32> @stack_fold_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpgtd
+ ;CHECK: pcmpgtd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp sgt <4 x i32> %a0, %a1
+ %3 = sext <4 x i1> %2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <2 x i64> @stack_fold_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpgtq
+ ;CHECK: pcmpgtq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp sgt <2 x i64> %a0, %a1
+ %3 = sext <2 x i1> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <8 x i16> @stack_fold_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpgtw
+ ;CHECK: pcmpgtw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = icmp sgt <8 x i16> %a0, %a1
+ %3 = sext <8 x i1> %2 to <8 x i16>
+ ret <8 x i16> %3
+}
+
+define i32 @stack_fold_pcmpistri(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpistri
+ ;CHECK: pcmpistri $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+define <16 x i8> @stack_fold_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pcmpistrm
+ ;CHECK: pcmpistrm $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+; TODO stack_fold_pextrb
+
+define i32 @stack_fold_pextrd(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pextrd
+ ;CHECK: pextrd $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill
+ ;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload
+ %1 = extractelement <4 x i32> %a0, i32 1
+ %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ ret i32 %1
+}
+
+define i64 @stack_fold_pextrq(<2 x i64> %a0) {
+ ;CHECK-LABEL: stack_fold_pextrq
+ ;CHECK: pextrq $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 8-byte Folded Spill
+ ;CHECK: movq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Reload
+ %1 = extractelement <2 x i64> %a0, i32 1
+ %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ ret i64 %1
+}
+
+; TODO stack_fold_pextrw
+
+define <4 x i32> @stack_fold_phaddd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_phaddd
+ ;CHECK: phaddd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_phaddsw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_phaddsw
+ ;CHECK: phaddsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @stack_fold_phaddw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_phaddw
+ ;CHECK: phaddw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @stack_fold_phminposuw(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_phminposuw
+ ;CHECK: phminposuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
+
+define <4 x i32> @stack_fold_phsubd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_phsubd
+ ;CHECK: phsubd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_phsubsw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_phsubsw
+ ;CHECK: phsubsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @stack_fold_phsubw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_phsubw
+ ;CHECK: phsubw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_pinsrb(<16 x i8> %a0, i8 %a1) {
+ ;CHECK-LABEL: stack_fold_pinsrb
+ ;CHECK: pinsrb $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = insertelement <16 x i8> %a0, i8 %a1, i32 1
+ ret <16 x i8> %2
+}
+
+define <4 x i32> @stack_fold_pinsrd(<4 x i32> %a0, i32 %a1) {
+ ;CHECK-LABEL: stack_fold_pinsrd
+ ;CHECK: pinsrd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = insertelement <4 x i32> %a0, i32 %a1, i32 1
+ ret <4 x i32> %2
+}
+
+define <2 x i64> @stack_fold_pinsrq(<2 x i64> %a0, i64 %a1) {
+ ;CHECK-LABEL: stack_fold_pinsrq
+ ;CHECK: pinsrq $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = insertelement <2 x i64> %a0, i64 %a1, i32 1
+ ret <2 x i64> %2
+}
+
+define <8 x i16> @stack_fold_pinsrw(<8 x i16> %a0, i16 %a1) {
+ ;CHECK-LABEL: stack_fold_pinsrw
+ ;CHECK: pinsrw $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+ %2 = insertelement <8 x i16> %a0, i16 %a1, i32 1
+ ret <8 x i16> %2
+}
+
+define <8 x i16> @stack_fold_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaddubsw
+ ;CHECK: pmaddubsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @stack_fold_pmaddwd(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaddwd
+ ;CHECK: pmaddwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaxsb
+ ;CHECK: pmaxsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @stack_fold_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaxsd
+ ;CHECK: pmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_pmaxsw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaxsw
+ ;CHECK: pmaxsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_pmaxub(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaxub
+ ;CHECK: pmaxub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @stack_fold_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaxud
+ ;CHECK: pmaxud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmaxuw
+ ;CHECK: pmaxuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pminsb
+ ;CHECK: pminsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @stack_fold_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pminsd
+ ;CHECK: pminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_pminsw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pminsw
+ ;CHECK: pminsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_pminub(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pminub
+ ;CHECK: pminub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @stack_fold_pminud(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pminud
+ ;CHECK: pminud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pminuw
+ ;CHECK: pminuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxbd
+ ;CHECK: pmovsxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+
+define <2 x i64> @stack_fold_pmovsxbq(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxbq
+ ;CHECK: pmovsxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
+
+define <8 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxbw
+ ;CHECK: pmovsxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @stack_fold_pmovsxdq(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxdq
+ ;CHECK: pmovsxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @stack_fold_pmovsxwd(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxwd
+ ;CHECK: pmovsxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+
+define <2 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovsxwq
+ ;CHECK: pmovsxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
+
+define <4 x i32> @stack_fold_pmovzxbd(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxbd
+ ;CHECK: pmovzxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 1, i32 19, i32 20, i32 21, i32 2, i32 22, i32 23, i32 24, i32 3, i32 25, i32 26, i32 27>
+ %3 = bitcast <16 x i8> %2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <2 x i64> @stack_fold_pmovzxbq(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxbq
+ ;CHECK: pmovzxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 1, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
+ %3 = bitcast <16 x i8> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <8 x i16> @stack_fold_pmovzxbw(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxbw
+ ;CHECK: pmovzxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+ %3 = bitcast <16 x i8> %2 to <8 x i16>
+ ret <8 x i16> %3
+}
+
+define <2 x i64> @stack_fold_pmovzxdq(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxdq
+ ;CHECK: pmovzxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ %3 = bitcast <4 x i32> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <4 x i32> @stack_fold_pmovzxwd(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxwd
+ ;CHECK: pmovzxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ %3 = bitcast <8 x i16> %2 to <4 x i32>
+ ret <4 x i32> %3
+}
+
+define <2 x i64> @stack_fold_pmovzxwq(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pmovzxwq
+ ;CHECK: pmovzxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 1, i32 11, i32 12, i32 13>
+ %3 = bitcast <8 x i16> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <2 x i64> @stack_fold_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pmuldq
+ ;CHECK: pmuldq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmulhrsw
+ ;CHECK: pmulhrsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @stack_fold_pmulhuw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmulhuw
+ ;CHECK: pmulhuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @stack_fold_pmulhw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmulhw
+ ;CHECK: pmulhw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @stack_fold_pmulld(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pmulld
+ ;CHECK: pmulld {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = mul <4 x i32> %a0, %a1
+ ret <4 x i32> %2
+}
+
+define <8 x i16> @stack_fold_pmullw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_pmullw
+ ;CHECK: pmullw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = mul <8 x i16> %a0, %a1
+ ret <8 x i16> %2
+}
+
+define <2 x i64> @stack_fold_pmuludq(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pmuludq
+ ;CHECK: pmuludq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <16 x i8> @stack_fold_por(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_por
+ ;CHECK: por {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = or <16 x i8> %a0, %a1
+ ; add forces execution domain
+ %3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %3
+}
+
+define <2 x i64> @stack_fold_psadbw(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_psadbw
+ ;CHECK: psadbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <16 x i8> @stack_fold_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pshufb
+ ;CHECK: pshufb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @stack_fold_pshufd(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_pshufd
+ ;CHECK: pshufd $27, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ ret <4 x i32> %2
+}
+
+define <8 x i16> @stack_fold_pshufhw(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pshufhw
+ ;CHECK: pshufhw $11, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 4, i32 4>
+ ret <8 x i16> %2
+}
+
+define <8 x i16> @stack_fold_pshuflw(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_pshuflw
+ ;CHECK: pshuflw $27, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %2
+}
+
+define <16 x i8> @stack_fold_psignb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_psignb
+ ;CHECK: psignb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @stack_fold_psignd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_psignd
+ ;CHECK: psignd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_psignw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psignw
+ ;CHECK: psignw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @stack_fold_pslld(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_pslld
+ ;CHECK: pslld {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @stack_fold_psllq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_psllq
+ ;CHECK: psllq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @stack_fold_psllw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psllw
+ ;CHECK: psllw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @stack_fold_psrad(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_psrad
+ ;CHECK: psrad {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_psraw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psraw
+ ;CHECK: psraw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @stack_fold_psrld(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_psrld
+ ;CHECK: psrld {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @stack_fold_psrlq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_psrlq
+ ;CHECK: psrlq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @stack_fold_psrlw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psrlw
+ ;CHECK: psrlw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_psubb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_psubb
+ ;CHECK: psubb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = sub <16 x i8> %a0, %a1
+ ret <16 x i8> %2
+}
+
+define <4 x i32> @stack_fold_psubd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_psubd
+ ;CHECK: psubd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = sub <4 x i32> %a0, %a1
+ ret <4 x i32> %2
+}
+
+define <2 x i64> @stack_fold_psubq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_psubq
+ ;CHECK: psubq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = sub <2 x i64> %a0, %a1
+ ret <2 x i64> %2
+}
+
+define <16 x i8> @stack_fold_psubsb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_psubsb
+ ;CHECK: psubsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @stack_fold_psubsw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psubsw
+ ;CHECK: psubsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_psubusb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_psubusb
+ ;CHECK: psubusb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @stack_fold_psubusw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psubusw
+ ;CHECK: psubusw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @stack_fold_psubw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_psubw
+ ;CHECK: psubw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = sub <8 x i16> %a0, %a1
+ ret <8 x i16> %2
+}
+
+define i32 @stack_fold_ptest(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_ptest
+ ;CHECK: ptest {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1)
+ ret i32 %2
+}
+declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @stack_fold_punpckhbw(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_punpckhbw
+ ;CHECK: punpckhbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+ ret <16 x i8> %2
+}
+
+define <4 x i32> @stack_fold_punpckhdq(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_punpckhdq
+ ;CHECK: punpckhdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+ ; add forces execution domain
+ %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %3
+}
+
+define <2 x i64> @stack_fold_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_punpckhqdq
+ ;CHECK: punpckhqdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
+ ; add forces execution domain
+ %3 = add <2 x i64> %2, <i64 1, i64 1>
+ ret <2 x i64> %3
+}
+
+define <8 x i16> @stack_fold_punpckhwd(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_punpckhwd
+ ;CHECK: punpckhwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+ ret <8 x i16> %2
+}
+
+define <16 x i8> @stack_fold_punpcklbw(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_punpcklbw
+ ;CHECK: punpcklbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+ ret <16 x i8> %2
+}
+
+define <4 x i32> @stack_fold_punpckldq(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_punpckldq
+ ;CHECK: punpckldq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ ; add forces execution domain
+ %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %3
+}
+
+define <2 x i64> @stack_fold_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_punpcklqdq
+ ;CHECK: punpcklqdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
+ ; add forces execution domain
+ %3 = add <2 x i64> %2, <i64 1, i64 1>
+ ret <2 x i64> %3
+}
+
+define <8 x i16> @stack_fold_punpcklwd(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_punpcklwd
+ ;CHECK: punpcklwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+ ret <8 x i16> %2
+}
+
+define <16 x i8> @stack_fold_pxor(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_pxor
+ ;CHECK: pxor {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = xor <16 x i8> %a0, %a1
+ ; add forces execution domain
+ %3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %3
+}
diff --git a/test/CodeGen/X86/stack-folding-mmx.ll b/test/CodeGen/X86/stack-folding-mmx.ll
new file mode 100644
index 000000000000..8a5d4e2770dc
--- /dev/null
+++ b/test/CodeGen/X86/stack-folding-mmx.ll
@@ -0,0 +1,566 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s
+
+define x86_mmx @stack_fold_cvtpd2pi(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtpd2pi
+ ;CHECK: cvtpd2pi {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a0) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
+
+define <2 x double> @stack_fold_cvtpi2pd(x86_mmx %a0) {
+ ;CHECK-LABEL: stack_fold_cvtpi2pd
+ ;CHECK: cvtpi2pd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %a0) nounwind readnone
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
+
+define <4 x float> @stack_fold_cvtpi2ps(<4 x float> %a0, x86_mmx %a1) {
+ ;CHECK-LABEL: stack_fold_cvtpi2ps
+ ;CHECK: cvtpi2ps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a0, x86_mmx %a1) nounwind readnone
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_cvtps2pi(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvtps2pi
+ ;CHECK: cvtps2pi {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone
+
+define x86_mmx @stack_fold_cvttpd2pi(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_cvttpd2pi
+ ;CHECK: cvttpd2pi {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a0) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
+
+define x86_mmx @stack_fold_cvttps2pi(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_cvttps2pi
+ ;CHECK: cvttps2pi {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %a0) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone
+
+; TODO stack_fold_movd_load
+; TODO stack_fold_movd_store
+; TODO stack_fold_movq_load
+; TODO stack_fold_movq_store
+
+define x86_mmx @stack_fold_packssdw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_packssdw
+ ;CHECK: packssdw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_packsswb(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_packsswb
+ ;CHECK: packsswb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_packuswb(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_packuswb
+ ;CHECK: packuswb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_paddb(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_paddb
+ ;CHECK: paddb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_paddd(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_paddd
+ ;CHECK: paddd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_paddq(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_paddq
+ ;CHECK: paddq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_paddsb(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_paddsb
+ ;CHECK: paddsb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_paddsw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_paddsw
+ ;CHECK: paddsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_paddusb(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_paddusb
+ ;CHECK: paddusb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_paddusw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_paddusw
+ ;CHECK: paddusw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_paddw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_paddw
+ ;CHECK: paddw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pand(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pand
+ ;CHECK: pand {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pandn(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pandn
+ ;CHECK: pandn {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pavgb(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pavgb
+ ;CHECK: pavgb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pavgw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pavgw
+ ;CHECK: pavgw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pcmpeqb(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pcmpeqb
+ ;CHECK: pcmpeqb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pcmpeqd(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pcmpeqd
+ ;CHECK: pcmpeqd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pcmpeqw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pcmpeqw
+ ;CHECK: pcmpeqw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pcmpgtb(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pcmpgtb
+ ;CHECK: pcmpgtb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pcmpgtd(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pcmpgtd
+ ;CHECK: pcmpgtd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pcmpgtw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pcmpgtw
+ ;CHECK: pcmpgtw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
+
+; TODO stack_fold_pinsrw
+
+define x86_mmx @stack_fold_pmaddwd(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pmaddwd
+ ;CHECK: pmaddwd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pmaxsw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pmaxsw
+ ;CHECK: pmaxsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pmaxub(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pmaxub
+ ;CHECK: pmaxub {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pminsw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pminsw
+ ;CHECK: pminsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pminub(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pminub
+ ;CHECK: pminub {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pmulhuw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pmulhuw
+ ;CHECK: pmulhuw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pmulhw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pmulhw
+ ;CHECK: pmulhw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pmullw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pmullw
+ ;CHECK: pmullw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pmuludq(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pmuludq
+ ;CHECK: pmuludq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_por(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_por
+ ;CHECK: por {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psadbw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_psadbw
+ ;CHECK: psadbw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pshufw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pshufw
+ ;CHECK: pshufw $1, {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %a, i8 1) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
+
+define x86_mmx @stack_fold_pslld(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pslld
+ ;CHECK: pslld {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psllq(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_psllq
+ ;CHECK: psllq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psllw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_psllw
+ ;CHECK: psllw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psrad(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_psrad
+ ;CHECK: psrad {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psraw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_psraw
+ ;CHECK: psraw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psrld(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_psrld
+ ;CHECK: psrld {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psrlq(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_psrlq
+ ;CHECK: psrlq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psrlw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_psrlw
+ ;CHECK: psrlw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psubb(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_psubb
+ ;CHECK: psubb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psubd(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_psubd
+ ;CHECK: psubd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psubq(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_psubq
+ ;CHECK: psubq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psubsb(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_psubsb
+ ;CHECK: psubsb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psubsw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_psubsw
+ ;CHECK: psubsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psubusb(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_psubusb
+ ;CHECK: psubusb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psubusw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_psubusw
+ ;CHECK: psubusw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psubw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_psubw
+ ;CHECK: psubw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_punpckhbw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_punpckhbw
+ ;CHECK: punpckhbw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_punpckhdq(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_punpckhdq
+ ;CHECK: punpckhdq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_punpckhwd(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_punpckhwd
+ ;CHECK: punpckhwd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_punpcklbw(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_punpcklbw
+ ;CHECK: punpcklbw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_punpckldq(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_punpckldq
+ ;CHECK: punpckldq {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_punpcklwd(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_punpcklwd
+ ;CHECK: punpcklwd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pxor(x86_mmx %a, x86_mmx %b) {
+ ;CHECK-LABEL: stack_fold_pxor
+ ;CHECK: pxor {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+ %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+ %2 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %a, x86_mmx %b) nounwind readnone
+ ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
diff --git a/test/CodeGen/X86/stack-folding-xop.ll b/test/CodeGen/X86/stack-folding-xop.ll
new file mode 100644
index 000000000000..44a0d1dc6582
--- /dev/null
+++ b/test/CodeGen/X86/stack-folding-xop.ll
@@ -0,0 +1,718 @@
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx,+xop < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; Stack reload folding tests.
+;
+; By including a nop call with sideeffects we can force a partial register spill of the
+; relevant registers and check that the reload is correctly folded into the instruction.
+
+define <2 x double> @stack_fold_vfrczpd(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_vfrczpd
+ ;CHECK: vfrczpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone
+
+define <4 x double> @stack_fold_vfrczpd_ymm(<4 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_vfrczpd_ymm
+ ;CHECK: vfrczpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0)
+ ret <4 x double> %2
+}
+declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone
+
+define <4 x float> @stack_fold_vfrczps(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_vfrczps
+ ;CHECK: vfrczps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone
+
+define <8 x float> @stack_fold_vfrczps_ymm(<8 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_vfrczps_ymm
+ ;CHECK: vfrczps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0)
+ ret <8 x float> %2
+}
+declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone
+
+define <2 x double> @stack_fold_vfrczsd(<2 x double> %a0) {
+ ;CHECK-LABEL: stack_fold_vfrczsd
+ ;CHECK: vfrczsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone
+
+define <4 x float> @stack_fold_vfrczss(<4 x float> %a0) {
+ ;CHECK-LABEL: stack_fold_vfrczss
+ ;CHECK: vfrczss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone
+
+define <2 x i64> @stack_fold_vpcmov_rm(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
+ ;CHECK-LABEL: stack_fold_vpcmov_rm
+ ;CHECK: vpcmov {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2)
+ ret <2 x i64> %2
+}
+define <2 x i64> @stack_fold_vpcmov_mr(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
+ ;CHECK-LABEL: stack_fold_vpcmov_mr
+ ;CHECK: vpcmov {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a2, <2 x i64> %a1)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
+
+define <4 x i64> @stack_fold_vpcmov_rm_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
+ ;CHECK-LABEL: stack_fold_vpcmov_rm_ymm
+ ;CHECK: vpcmov {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2)
+ ret <4 x i64> %2
+}
+define <4 x i64> @stack_fold_vpcmov_mr_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
+ ;CHECK-LABEL: stack_fold_vpcmov_mr_ymm
+ ;CHECK: vpcmov {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a2, <4 x i64> %a1)
+ ret <4 x i64> %2
+}
+declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone
+
+define <16 x i8> @stack_fold_vpcomb(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_vpcomb
+ ;CHECK: vpcomltb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %a0, <16 x i8> %a1, i8 0)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+define <4 x i32> @stack_fold_vpcomd(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_vpcomd
+ ;CHECK: vpcomltd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %a0, <4 x i32> %a1, i8 0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone
+
+define <2 x i64> @stack_fold_vpcomq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_vpcomq
+ ;CHECK: vpcomltq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone
+
+define <16 x i8> @stack_fold_vpcomub(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_vpcomub
+ ;CHECK: vpcomltub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %a0, <16 x i8> %a1, i8 0)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+define <4 x i32> @stack_fold_vpcomud(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_vpcomud
+ ;CHECK: vpcomltud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %a0, <4 x i32> %a1, i8 0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone
+
+define <2 x i64> @stack_fold_vpcomuq(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_vpcomuq
+ ;CHECK: vpcomltuq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone
+
+define <8 x i16> @stack_fold_vpcomuw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_vpcomuw
+ ;CHECK: vpcomltuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %a0, <8 x i16> %a1, i8 0)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone
+
+define <8 x i16> @stack_fold_vpcomw(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_vpcomw
+ ;CHECK: vpcomltw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %a0, <8 x i16> %a1, i8 0)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
+
+define <2 x double> @stack_fold_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+ ;CHECK-LABEL: stack_fold_vpermil2pd_rm
+ ;CHECK: vpermil2pd $0, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 0)
+ ret <2 x double> %2
+}
+define <2 x double> @stack_fold_vpermil2pd_mr(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+ ;CHECK-LABEL: stack_fold_vpermil2pd_mr
+ ;CHECK: vpermil2pd $0, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a2, <2 x double> %a1, i8 0)
+ ret <2 x double> %2
+}
+declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+
+define <4 x double> @stack_fold_vpermil2pd_rm_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+ ;CHECK-LABEL: stack_fold_vpermil2pd_rm
+ ;CHECK: vpermil2pd $0, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 0)
+ ret <4 x double> %2
+}
+define <4 x double> @stack_fold_vpermil2pd_mr_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+ ;CHECK-LABEL: stack_fold_vpermil2pd_mr
+ ;CHECK: vpermil2pd $0, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a2, <4 x double> %a1, i8 0)
+ ret <4 x double> %2
+}
+declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
+
+define <4 x float> @stack_fold_vpermil2ps_rm(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+ ;CHECK-LABEL: stack_fold_vpermil2ps_rm
+ ;CHECK: vpermil2ps $0, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 0)
+ ret <4 x float> %2
+}
+define <4 x float> @stack_fold_vpermil2ps_mr(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+ ;CHECK-LABEL: stack_fold_vpermil2ps_mr
+ ;CHECK: vpermil2ps $0, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a2, <4 x float> %a1, i8 0)
+ ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+
+define <8 x float> @stack_fold_vpermil2ps_rm_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+ ;CHECK-LABEL: stack_fold_vpermil2ps_rm
+ ;CHECK: vpermil2ps $0, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 0)
+ ret <8 x float> %2
+}
+define <8 x float> @stack_fold_vpermil2ps_mr_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+ ;CHECK-LABEL: stack_fold_vpermil2ps_mr
+ ;CHECK: vpermil2ps $0, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a2, <8 x float> %a1, i8 0)
+ ret <8 x float> %2
+}
+declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+
+define <4 x i32> @stack_fold_vphaddbd(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_vphaddbd
+ ;CHECK: vphaddbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone
+
+define <2 x i64> @stack_fold_vphaddbq(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_vphaddbq
+ ;CHECK: vphaddbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %a0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone
+
+define <8 x i16> @stack_fold_vphaddbw(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_vphaddbw
+ ;CHECK: vphaddbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %a0)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @stack_fold_vphadddq(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_vphadddq
+ ;CHECK: vphadddq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %a0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @stack_fold_vphaddubd(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_vphaddubd
+ ;CHECK: vphaddubd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone
+
+define <2 x i64> @stack_fold_vphaddubq(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_vphaddubq
+ ;CHECK: vphaddubq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %a0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone
+
+define <8 x i16> @stack_fold_vphaddubw(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_vphaddubw
+ ;CHECK: vphaddubw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %a0)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @stack_fold_vphaddudq(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_vphaddudq
+ ;CHECK: vphaddudq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %a0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @stack_fold_vphadduwd(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_vphadduwd
+ ;CHECK: vphadduwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone
+
+define <2 x i64> @stack_fold_vphadduwq(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_vphadduwq
+ ;CHECK: vphadduwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %a0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone
+
+define <4 x i32> @stack_fold_vphaddwd(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_vphaddwd
+ ;CHECK: vphaddwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone
+
+define <2 x i64> @stack_fold_vphaddwq(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_vphaddwq
+ ;CHECK: vphaddwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %a0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone
+
+define <8 x i16> @stack_fold_vphsubbw(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_vphsubbw
+ ;CHECK: vphsubbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %a0)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @stack_fold_vphsubdq(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_vphsubdq
+ ;CHECK: vphsubdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %a0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @stack_fold_vphsubwd(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_vphsubwd
+ ;CHECK: vphsubwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone
+
+define <4 x i32> @stack_fold_vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
+ ;CHECK-LABEL: stack_fold_vpmacsdd
+ ;CHECK: vpmacsdd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @stack_fold_vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+ ;CHECK-LABEL: stack_fold_vpmacsdqh
+ ;CHECK: vpmacsdqh {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @stack_fold_vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+ ;CHECK-LABEL: stack_fold_vpmacsdql
+ ;CHECK: vpmacsdql {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <4 x i32> @stack_fold_vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
+ ;CHECK-LABEL: stack_fold_vpmacssdd
+ ;CHECK: vpmacssdd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @stack_fold_vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+ ;CHECK-LABEL: stack_fold_vpmacssdqh
+ ;CHECK: vpmacssdqh {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @stack_fold_vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+ ;CHECK-LABEL: stack_fold_vpmacssdql
+ ;CHECK: vpmacssdql {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <4 x i32> @stack_fold_vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+ ;CHECK-LABEL: stack_fold_vpmacsswd
+ ;CHECK: vpmacsswd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
+ ;CHECK-LABEL: stack_fold_vpmacssww
+ ;CHECK: vpmacssww {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @stack_fold_vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+ ;CHECK-LABEL: stack_fold_vpmacswd
+ ;CHECK: vpmacswd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @stack_fold_vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
+ ;CHECK-LABEL: stack_fold_vpmacsww
+ ;CHECK: vpmacsww {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @stack_fold_vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+ ;CHECK-LABEL: stack_fold_vpmadcsswd
+ ;CHECK: vpmadcsswd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @stack_fold_vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+ ;CHECK-LABEL: stack_fold_vpmadcswd
+ ;CHECK: vpmadcswd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <16 x i8> @stack_fold_vpperm_rm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+ ;CHECK-LABEL: stack_fold_vpperm_rm
+ ;CHECK: vpperm {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2)
+ ret <16 x i8> %2
+}
+define <16 x i8> @stack_fold_vpperm_mr(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+ ;CHECK-LABEL: stack_fold_vpperm_mr
+ ;CHECK: vpperm {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a2, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
+define <16 x i8> @stack_fold_vprotb(<16 x i8> %a0) {
+ ;CHECK-LABEL: stack_fold_vprotb
+ ;CHECK: vprotb $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8> %a0, i8 7)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8>, i8) nounwind readnone
+
+define <16 x i8> @stack_fold_vprotb_rm(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_vprotb_rm
+ ;CHECK: vprotb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+define <16 x i8> @stack_fold_vprotb_mr(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_vprotb_mr
+ ;CHECK: vprotb {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %a1, <16 x i8> %a0)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.xop.vprotb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @stack_fold_vprotd(<4 x i32> %a0) {
+ ;CHECK-LABEL: stack_fold_vprotd
+ ;CHECK: vprotd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32> %a0, i8 7)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32>, i8) nounwind readnone
+
+define <4 x i32> @stack_fold_vprotd_rm(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_vprotd_rm
+ ;CHECK: vprotd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+define <4 x i32> @stack_fold_vprotd_mr(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_vprotd_mr
+ ;CHECK: vprotd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %a1, <4 x i32> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vprotd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @stack_fold_vprotq(<2 x i64> %a0) {
+ ;CHECK-LABEL: stack_fold_vprotq
+ ;CHECK: vprotq $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64> %a0, i8 7)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64>, i8) nounwind readnone
+
+define <2 x i64> @stack_fold_vprotq_rm(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_vprotq_rm
+ ;CHECK: vprotq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a0, <2 x i64> %a1)
+ ret <2 x i64> %2
+}
+define <2 x i64> @stack_fold_vprotq_mr(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_vprotq_mr
+ ;CHECK: vprotq {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a1, <2 x i64> %a0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vprotq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @stack_fold_vprotw(<8 x i16> %a0) {
+ ;CHECK-LABEL: stack_fold_vprotw
+ ;CHECK: vprotw $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16> %a0, i8 7)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16>, i8) nounwind readnone
+
+define <8 x i16> @stack_fold_vprotw_rm(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_vprotw_rm
+ ;CHECK: vprotw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+define <8 x i16> @stack_fold_vprotw_mr(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_vprotw_mr
+ ;CHECK: vprotw {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %a1, <8 x i16> %a0)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_vpshab_rm(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_vpshab_rm
+ ;CHECK: vpshab {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+define <16 x i8> @stack_fold_vpshab_mr(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_vpshab_mr
+ ;CHECK: vpshab {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a1, <16 x i8> %a0)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @stack_fold_vpshad_rm(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_vpshad_rm
+ ;CHECK: vpshad {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+define <4 x i32> @stack_fold_vpshad_mr(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_vpshad_mr
+ ;CHECK: vpshad {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %a1, <4 x i32> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @stack_fold_vpshaq_rm(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_vpshaq_rm
+ ;CHECK: vpshaq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1)
+ ret <2 x i64> %2
+}
+define <2 x i64> @stack_fold_vpshaq_mr(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_vpshaq_mr
+ ;CHECK: vpshaq {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a1, <2 x i64> %a0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @stack_fold_vpshaw_rm(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_vpshaw_rm
+ ;CHECK: vpshaw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+define <8 x i16> @stack_fold_vpshaw_mr(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_vpshaw_mr
+ ;CHECK: vpshaw {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %a1, <8 x i16> %a0)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @stack_fold_vpshlb_rm(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_vpshlb_rm
+ ;CHECK: vpshlb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %a0, <16 x i8> %a1)
+ ret <16 x i8> %2
+}
+define <16 x i8> @stack_fold_vpshlb_mr(<16 x i8> %a0, <16 x i8> %a1) {
+ ;CHECK-LABEL: stack_fold_vpshlb_mr
+ ;CHECK: vpshlb {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %a1, <16 x i8> %a0)
+ ret <16 x i8> %2
+}
+declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @stack_fold_vpshld_rm(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_vpshld_rm
+ ;CHECK: vpshld {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a0, <4 x i32> %a1)
+ ret <4 x i32> %2
+}
+define <4 x i32> @stack_fold_vpshld_mr(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: stack_fold_vpshld_mr
+ ;CHECK: vpshld {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a1, <4 x i32> %a0)
+ ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @stack_fold_vpshlq_rm(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_vpshlq_rm
+ ;CHECK: vpshlq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1)
+ ret <2 x i64> %2
+}
+define <2 x i64> @stack_fold_vpshlq_mr(<2 x i64> %a0, <2 x i64> %a1) {
+ ;CHECK-LABEL: stack_fold_vpshlq_mr
+ ;CHECK: vpshlq {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a1, <2 x i64> %a0)
+ ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @stack_fold_vpshlw_rm(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_vpshlw_rm
+ ;CHECK: vpshlw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %a1)
+ ret <8 x i16> %2
+}
+define <8 x i16> @stack_fold_vpshlw_mr(<8 x i16> %a0, <8 x i16> %a1) {
+ ;CHECK-LABEL: stack_fold_vpshlw_mr
+ ;CHECK: vpshlw {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %2 = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a1, <8 x i16> %a0)
+ ret <8 x i16> %2
+}
+declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone
diff --git a/test/CodeGen/X86/stack-protector-dbginfo.ll b/test/CodeGen/X86/stack-protector-dbginfo.ll
index a84b77eac5f6..3aba19464b9d 100644
--- a/test/CodeGen/X86/stack-protector-dbginfo.ll
+++ b/test/CodeGen/X86/stack-protector-dbginfo.ll
@@ -10,9 +10,9 @@
; Function Attrs: nounwind sspreq
define i32 @_Z18read_response_sizev() #0 {
entry:
- tail call void @llvm.dbg.value(metadata !22, i64 0, metadata !23, metadata !{!"0x102"}), !dbg !39
- %0 = load i64* getelementptr inbounds ({ i64, [56 x i8] }* @a, i32 0, i32 0), align 8, !dbg !40
- tail call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !64, metadata !{!"0x102"}), !dbg !71
+ tail call void @llvm.dbg.value(metadata !22, i64 0, metadata !23, metadata !DIExpression()), !dbg !39
+ %0 = load i64, i64* getelementptr inbounds ({ i64, [56 x i8] }, { i64, [56 x i8] }* @a, i32 0, i32 0), align 8, !dbg !40
+ tail call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !64, metadata !DIExpression()), !dbg !71
%1 = trunc i64 %0 to i32
ret i32 %1
}
@@ -25,73 +25,72 @@ attributes #0 = { sspreq }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!21, !72}
-!0 = !{!"0x11\004\00clang version 3.4 \001\00\000\00\001", !1, !2, !5, !8, !20, !5} ; [ DW_TAG_compile_unit ] [/Users/matt/ryan_bug/<unknown>] [DW_LANG_C_plus_plus]
-!1 = !{!"<unknown>", !"/Users/matt/ryan_bug"}
+!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !5, subprograms: !8, globals: !20, imports: !5)
+!1 = !DIFile(filename: "<unknown>", directory: "/Users/matt/ryan_bug")
!2 = !{!3}
-!3 = !{!"0x4\00\0020\0032\0032\000\000\000", !1, !4, null, !6, null, null, null} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [def] [from ]
-!4 = !{!"0x13\00C\0019\008\008\000\000\000", !1, null, null, !5, null, null, null} ; [ DW_TAG_structure_type ] [C] [line 19, size 8, align 8, offset 0] [def] [from ]
+!3 = !DICompositeType(tag: DW_TAG_enumeration_type, line: 20, size: 32, align: 32, file: !1, scope: !4, elements: !6)
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "C", line: 19, size: 8, align: 8, file: !1, elements: !5)
!5 = !{}
!6 = !{!7}
-!7 = !{!"0x28\00max_frame_size\000"} ; [ DW_TAG_enumerator ] [max_frame_size :: 0]
+!7 = !DIEnumerator(name: "max_frame_size", value: 0) ; [ DW_TAG_enumerator ] [max_frame_size :: 0]
!8 = !{!9, !24, !41, !65}
-!9 = !{!"0x2e\00read_response_size\00read_response_size\00_Z18read_response_sizev\0027\000\001\000\006\00256\001\0027", !1, !10, !11, null, i32 ()* @_Z18read_response_sizev, null, null, !14} ; [ DW_TAG_subprogram ] [line 27] [def] [read_response_size]
-!10 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [/Users/matt/ryan_bug/<unknown>]
-!11 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!9 = !DISubprogram(name: "read_response_size", linkageName: "_Z18read_response_sizev", line: 27, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 27, file: !1, scope: !10, type: !11, function: i32 ()* @_Z18read_response_sizev, variables: !14)
+!10 = !DIFile(filename: "<unknown>", directory: "/Users/matt/ryan_bug")
+!11 = !DISubroutineType(types: !12)
!12 = !{!13}
-!13 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!14 = !{!15, !19}
-!15 = !{!"0x100\00b\0028\000", !9, !10, !16} ; [ DW_TAG_auto_variable ] [b] [line 28]
-!16 = !{!"0x13\00B\0016\0032\0032\000\000\000", !1, null, null, !17, null, null} ; [ DW_TAG_structure_type ] [B] [line 16, size 32, align 32, offset 0] [def] [from ]
+!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 28, scope: !9, file: !10, type: !16)
+!16 = !DICompositeType(tag: DW_TAG_structure_type, name: "B", line: 16, size: 32, align: 32, file: !1, elements: !17)
!17 = !{!18}
-!18 = !{!"0xd\00end_of_file\0017\0032\0032\000\000", !1, !16, !13} ; [ DW_TAG_member ] [end_of_file] [line 17, size 32, align 32, offset 0] [from int]
-!19 = !{!"0x100\00c\0029\000", !9, !10, !13} ; [ DW_TAG_auto_variable ] [c] [line 29]
+!18 = !DIDerivedType(tag: DW_TAG_member, name: "end_of_file", line: 17, size: 32, align: 32, file: !1, scope: !16, baseType: !13)
+!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 29, scope: !9, file: !10, type: !13)
!20 = !{}
!21 = !{i32 2, !"Dwarf Version", i32 2}
-!22 = !{i64* getelementptr inbounds ({ i64, [56 x i8] }* @a, i32 0, i32 0)}
-!23 = !{!"0x101\00p2\0033554444\000", !24, !10, !32, !38} ; [ DW_TAG_arg_variable ] [p2] [line 12]
-!24 = !{!"0x2e\00min<unsigned long long>\00min<unsigned long long>\00_ZN3__13minIyEERKT_S3_RS1_\0012\000\001\000\006\00256\001\0012", !1, !25, !27, null, null, !33, null, !35} ; [ DW_TAG_subprogram ] [line 12] [def] [min<unsigned long long>]
-!25 = !{!"0x39\00__1\001", !26, null} ; [ DW_TAG_namespace ] [__1] [line 1]
-!26 = !{!"main.cpp", !"/Users/matt/ryan_bug"}
-!27 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !28, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!22 = !{i64* getelementptr inbounds ({ i64, [56 x i8] }, { i64, [56 x i8] }* @a, i32 0, i32 0)}
+!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p2", line: 12, arg: 2, scope: !24, file: !10, type: !32)
+!24 = !DISubprogram(name: "min<unsigned long long>", linkageName: "_ZN3__13minIyEERKT_S3_RS1_", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !1, scope: !25, type: !27, templateParams: !33, variables: !35)
+!25 = !DINamespace(name: "__1", line: 1, file: !26, scope: null)
+!26 = !DIFile(filename: "main.cpp", directory: "/Users/matt/ryan_bug")
+!27 = !DISubroutineType(types: !28)
!28 = !{!29, !29, !32}
-!29 = !{!"0x10\00\000\000\000\000\000", null, null, !30} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
-!30 = !{!"0x26\00\000\000\000\000\000", null, null, !31} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from long long unsigned int]
-!31 = !{!"0x24\00long long unsigned int\000\0064\0064\000\000\007", null, null} ; [ DW_TAG_base_type ] [long long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
-!32 = !{!"0x10\00\000\000\000\000\000", null, null, !31} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from long long unsigned int]
+!29 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !30)
+!30 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !31)
+!31 = !DIBasicType(tag: DW_TAG_base_type, name: "long long unsigned int", size: 64, align: 64, encoding: DW_ATE_unsigned)
+!32 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !31)
!33 = !{!34}
-!34 = !{!"0x2f\00_Tp\000\000", null, !31, null} ; [ DW_TAG_template_type_parameter ]
+!34 = !DITemplateTypeParameter(name: "_Tp", type: !31)
!35 = !{!36, !37}
-!36 = !{!"0x101\00p1\0016777228\000", !24, !10, !29} ; [ DW_TAG_arg_variable ] [p1] [line 12]
-!37 = !{!"0x101\00p2\0033554444\000", !24, !10, !32} ; [ DW_TAG_arg_variable ] [p2] [line 12]
-!38 = !MDLocation(line: 33, scope: !9)
-!39 = !MDLocation(line: 12, scope: !24, inlinedAt: !38)
-!40 = !MDLocation(line: 9, scope: !41, inlinedAt: !59)
-!41 = !{!"0x2e\00min<unsigned long long, __1::A>\00min<unsigned long long, __1::A>\00_ZN3__13minIyNS_1AEEERKT_S4_RS2_T0_\007\000\001\000\006\00256\001\008", !1, !25, !42, null, null, !53, null, !55} ; [ DW_TAG_subprogram ] [line 7] [def] [scope 8] [min<unsigned long long, __1::A>]
-!42 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !43, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!36 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 12, arg: 1, scope: !24, file: !10, type: !29)
+!37 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p2", line: 12, arg: 2, scope: !24, file: !10, type: !32)
+!38 = !DILocation(line: 33, scope: !9)
+!39 = !DILocation(line: 12, scope: !24, inlinedAt: !38)
+!40 = !DILocation(line: 9, scope: !41, inlinedAt: !59)
+!41 = !DISubprogram(name: "min<unsigned long long, __1::A>", linkageName: "_ZN3__13minIyNS_1AEEERKT_S4_RS2_T0_", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !1, scope: !25, type: !42, templateParams: !53, variables: !55)
+!42 = !DISubroutineType(types: !43)
!43 = !{!29, !29, !32, !44}
-!44 = !{!"0x13\00A\000\008\008\000\000\000", !1, !25, null, !45, null, null, null} ; [ DW_TAG_structure_type ] [A] [line 0, size 8, align 8, offset 0] [def] [from ]
+!44 = !DICompositeType(tag: DW_TAG_structure_type, name: "A", size: 8, align: 8, file: !1, scope: !25, elements: !45)
!45 = !{!46}
-!46 = !{!"0x2e\00operator()\00operator()\00_ZN3__11AclERKiS2_\001\000\000\000\006\00256\001\001", !1, !44, !47, null, null, null, i32 0, !52} ; [ DW_TAG_subprogram ] [line 1] [operator()]
-!47 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !48, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!46 = !DISubprogram(name: "operator()", linkageName: "_ZN3__11AclERKiS2_", line: 1, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !44, type: !47)
+!47 = !DISubroutineType(types: !48)
!48 = !{!13, !49, !50, !50}
-!49 = !{!"0xf\00\000\0064\0064\000\001088", i32 0, null, !44} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
-!50 = !{!"0x10\00\000\000\000\000\000", null, null, !51} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
-!51 = !{!"0x26\00\000\000\000\000\000", null, null, !13} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from int]
-!52 = !{i32 786468}
+!49 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !44)
+!50 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !51)
+!51 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !13)
!53 = !{!34, !54}
-!54 = !{!"0x2f\00_Compare\000\000", null, !44, null} ; [ DW_TAG_template_type_parameter ]
+!54 = !DITemplateTypeParameter(name: "_Compare", type: !44)
!55 = !{!56, !57, !58}
-!56 = !{!"0x101\00p1\0016777223\000", !41, !10, !29} ; [ DW_TAG_arg_variable ] [p1] [line 7]
-!57 = !{!"0x101\00p2\0033554439\000", !41, !10, !32} ; [ DW_TAG_arg_variable ] [p2] [line 7]
-!58 = !{!"0x101\00p3\0050331656\000", !41, !10, !44} ; [ DW_TAG_arg_variable ] [p3] [line 8]
-!59 = !MDLocation(line: 13, scope: !24, inlinedAt: !38)
+!56 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 7, arg: 1, scope: !41, file: !10, type: !29)
+!57 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p2", line: 7, arg: 2, scope: !41, file: !10, type: !32)
+!58 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p3", line: 8, arg: 3, scope: !41, file: !10, type: !44)
+!59 = !DILocation(line: 13, scope: !24, inlinedAt: !38)
!63 = !{i32 undef}
-!64 = !{!"0x101\00p1\0033554433\000", !65, !10, !50, !40} ; [ DW_TAG_arg_variable ] [p1] [line 1]
-!65 = !{!"0x2e\00operator()\00operator()\00_ZN3__11AclERKiS2_\001\000\001\000\006\00256\001\002", !1, !25, !47, null, null, null, !46, !66} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [operator()]
+!64 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 1, arg: 2, scope: !65, file: !10, type: !50)
+!65 = !DISubprogram(name: "operator()", linkageName: "_ZN3__11AclERKiS2_", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !25, type: !47, declaration: !46, variables: !66)
!66 = !{!67, !69, !70}
-!67 = !{!"0x101\00this\0016777216\001088", !65, null, !68} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!68 = !{!"0xf\00\000\0064\0064\000\000", null, null, !44} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
-!69 = !{!"0x101\00p1\0033554433\000", !65, !10, !50} ; [ DW_TAG_arg_variable ] [p1] [line 1]
-!70 = !{!"0x101\00\0050331650\000", !65, !10, !50} ; [ DW_TAG_arg_variable ] [line 2]
-!71 = !MDLocation(line: 1, scope: !65, inlinedAt: !40)
-!72 = !{i32 1, !"Debug Info Version", i32 2}
+!67 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !65, type: !68)
+!68 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !44)
+!69 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 1, arg: 2, scope: !65, file: !10, type: !50)
+!70 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 2, arg: 3, scope: !65, file: !10, type: !50)
+!71 = !DILocation(line: 1, scope: !65, inlinedAt: !40)
+!72 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/stack-protector-vreg-to-vreg-copy.ll b/test/CodeGen/X86/stack-protector-vreg-to-vreg-copy.ll
index 7d499f9abd74..f3f9eebb26c4 100644
--- a/test/CodeGen/X86/stack-protector-vreg-to-vreg-copy.ll
+++ b/test/CodeGen/X86/stack-protector-vreg-to-vreg-copy.ll
@@ -28,7 +28,7 @@ declare void @g(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
define void @do_something(i32 %i) #0 {
entry:
%data = alloca [8 x i8], align 1
- %0 = load i32* @state, align 4
+ %0 = load i32, i32* @state, align 4
%cmp = icmp eq i32 %0, 0
br i1 %cmp, label %if.then, label %if.else
@@ -38,7 +38,7 @@ if.then: ; preds = %entry
if.else: ; preds = %entry
tail call fastcc void @send_int(i32 %i)
- %arrayidx = getelementptr inbounds [8 x i8]* %data, i32 0, i32 0
+ %arrayidx = getelementptr inbounds [8 x i8], [8 x i8]* %data, i32 0, i32 0
call void @zero_char(i8* %arrayidx)
br label %if.end
diff --git a/test/CodeGen/X86/stack-protector-weight.ll b/test/CodeGen/X86/stack-protector-weight.ll
index c5bf49134e4b..4220a4c46a0a 100644
--- a/test/CodeGen/X86/stack-protector-weight.ll
+++ b/test/CodeGen/X86/stack-protector-weight.ll
@@ -18,11 +18,11 @@ entry:
%a = alloca [128 x i32], align 16
%0 = bitcast [128 x i32]* %a to i8*
call void @llvm.lifetime.start(i64 512, i8* %0)
- %arraydecay = getelementptr inbounds [128 x i32]* %a, i64 0, i64 0
+ %arraydecay = getelementptr inbounds [128 x i32], [128 x i32]* %a, i64 0, i64 0
call void @foo2(i32* %arraydecay)
%idxprom = sext i32 %n to i64
- %arrayidx = getelementptr inbounds [128 x i32]* %a, i64 0, i64 %idxprom
- %1 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds [128 x i32], [128 x i32]* %a, i64 0, i64 %idxprom
+ %1 = load i32, i32* %arrayidx, align 4
call void @llvm.lifetime.end(i64 512, i8* %0)
ret i32 %1
}
diff --git a/test/CodeGen/X86/stack-protector.ll b/test/CodeGen/X86/stack-protector.ll
index 4db0f9a3426b..acaba6dc17f8 100644
--- a/test/CodeGen/X86/stack-protector.ll
+++ b/test/CodeGen/X86/stack-protector.ll
@@ -43,11 +43,11 @@ entry:
%a.addr = alloca i8*, align 8
%buf = alloca [16 x i8], align 16
store i8* %a, i8** %a.addr, align 8
- %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
- %0 = load i8** %a.addr, align 8
+ %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
+ %0 = load i8*, i8** %a.addr, align 8
%call = call i8* @strcpy(i8* %arraydecay, i8* %0)
- %arraydecay1 = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
- %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+ %arraydecay1 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
ret void
}
@@ -79,11 +79,11 @@ entry:
%a.addr = alloca i8*, align 8
%buf = alloca [16 x i8], align 16
store i8* %a, i8** %a.addr, align 8
- %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
- %0 = load i8** %a.addr, align 8
+ %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
+ %0 = load i8*, i8** %a.addr, align 8
%call = call i8* @strcpy(i8* %arraydecay, i8* %0)
- %arraydecay1 = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
- %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+ %arraydecay1 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
ret void
}
@@ -111,11 +111,11 @@ entry:
%a.addr = alloca i8*, align 8
%buf = alloca [16 x i8], align 16
store i8* %a, i8** %a.addr, align 8
- %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
- %0 = load i8** %a.addr, align 8
+ %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
+ %0 = load i8*, i8** %a.addr, align 8
%call = call i8* @strcpy(i8* %arraydecay, i8* %0)
- %arraydecay1 = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
- %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+ %arraydecay1 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
ret void
}
@@ -143,11 +143,11 @@ entry:
%a.addr = alloca i8*, align 8
%buf = alloca [16 x i8], align 16
store i8* %a, i8** %a.addr, align 8
- %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
- %0 = load i8** %a.addr, align 8
+ %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
+ %0 = load i8*, i8** %a.addr, align 8
%call = call i8* @strcpy(i8* %arraydecay, i8* %0)
- %arraydecay1 = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
- %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+ %arraydecay1 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
ret void
}
@@ -174,13 +174,13 @@ entry:
%a.addr = alloca i8*, align 8
%b = alloca %struct.foo, align 1
store i8* %a, i8** %a.addr, align 8
- %buf = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
- %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
- %0 = load i8** %a.addr, align 8
+ %buf = getelementptr inbounds %struct.foo, %struct.foo* %b, i32 0, i32 0
+ %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
+ %0 = load i8*, i8** %a.addr, align 8
%call = call i8* @strcpy(i8* %arraydecay, i8* %0)
- %buf1 = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
- %arraydecay2 = getelementptr inbounds [16 x i8]* %buf1, i32 0, i32 0
- %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+ %buf1 = getelementptr inbounds %struct.foo, %struct.foo* %b, i32 0, i32 0
+ %arraydecay2 = getelementptr inbounds [16 x i8], [16 x i8]* %buf1, i32 0, i32 0
+ %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
ret void
}
@@ -208,13 +208,13 @@ entry:
%a.addr = alloca i8*, align 8
%b = alloca %struct.foo, align 1
store i8* %a, i8** %a.addr, align 8
- %buf = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
- %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
- %0 = load i8** %a.addr, align 8
+ %buf = getelementptr inbounds %struct.foo, %struct.foo* %b, i32 0, i32 0
+ %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
+ %0 = load i8*, i8** %a.addr, align 8
%call = call i8* @strcpy(i8* %arraydecay, i8* %0)
- %buf1 = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
- %arraydecay2 = getelementptr inbounds [16 x i8]* %buf1, i32 0, i32 0
- %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+ %buf1 = getelementptr inbounds %struct.foo, %struct.foo* %b, i32 0, i32 0
+ %arraydecay2 = getelementptr inbounds [16 x i8], [16 x i8]* %buf1, i32 0, i32 0
+ %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
ret void
}
@@ -242,13 +242,13 @@ entry:
%a.addr = alloca i8*, align 8
%b = alloca %struct.foo, align 1
store i8* %a, i8** %a.addr, align 8
- %buf = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
- %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
- %0 = load i8** %a.addr, align 8
+ %buf = getelementptr inbounds %struct.foo, %struct.foo* %b, i32 0, i32 0
+ %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
+ %0 = load i8*, i8** %a.addr, align 8
%call = call i8* @strcpy(i8* %arraydecay, i8* %0)
- %buf1 = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
- %arraydecay2 = getelementptr inbounds [16 x i8]* %buf1, i32 0, i32 0
- %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+ %buf1 = getelementptr inbounds %struct.foo, %struct.foo* %b, i32 0, i32 0
+ %arraydecay2 = getelementptr inbounds [16 x i8], [16 x i8]* %buf1, i32 0, i32 0
+ %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
ret void
}
@@ -276,13 +276,13 @@ entry:
%a.addr = alloca i8*, align 8
%b = alloca %struct.foo, align 1
store i8* %a, i8** %a.addr, align 8
- %buf = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
- %arraydecay = getelementptr inbounds [16 x i8]* %buf, i32 0, i32 0
- %0 = load i8** %a.addr, align 8
+ %buf = getelementptr inbounds %struct.foo, %struct.foo* %b, i32 0, i32 0
+ %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
+ %0 = load i8*, i8** %a.addr, align 8
%call = call i8* @strcpy(i8* %arraydecay, i8* %0)
- %buf1 = getelementptr inbounds %struct.foo* %b, i32 0, i32 0
- %arraydecay2 = getelementptr inbounds [16 x i8]* %buf1, i32 0, i32 0
- %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+ %buf1 = getelementptr inbounds %struct.foo, %struct.foo* %b, i32 0, i32 0
+ %arraydecay2 = getelementptr inbounds [16 x i8], [16 x i8]* %buf1, i32 0, i32 0
+ %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
ret void
}
@@ -309,11 +309,11 @@ entry:
%a.addr = alloca i8*, align 8
%buf = alloca [4 x i8], align 1
store i8* %a, i8** %a.addr, align 8
- %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
- %0 = load i8** %a.addr, align 8
+ %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
+ %0 = load i8*, i8** %a.addr, align 8
%call = call i8* @strcpy(i8* %arraydecay, i8* %0)
- %arraydecay1 = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
- %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+ %arraydecay1 = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
ret void
}
@@ -341,11 +341,11 @@ entry:
%a.addr = alloca i8*, align 8
%buf = alloca [4 x i8], align 1
store i8* %a, i8** %a.addr, align 8
- %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
- %0 = load i8** %a.addr, align 8
+ %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
+ %0 = load i8*, i8** %a.addr, align 8
%call = call i8* @strcpy(i8* %arraydecay, i8* %0)
- %arraydecay1 = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
- %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+ %arraydecay1 = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
ret void
}
@@ -373,11 +373,11 @@ entry:
%a.addr = alloca i8*, align 8
%buf = alloca [4 x i8], align 1
store i8* %a, i8** %a.addr, align 8
- %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
- %0 = load i8** %a.addr, align 8
+ %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
+ %0 = load i8*, i8** %a.addr, align 8
%call = call i8* @strcpy(i8* %arraydecay, i8* %0)
- %arraydecay1 = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
- %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+ %arraydecay1 = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
ret void
}
@@ -405,11 +405,11 @@ entry:
%a.addr = alloca i8*, align 8
%buf = alloca [4 x i8], align 1
store i8* %a, i8** %a.addr, align 8
- %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
- %0 = load i8** %a.addr, align 8
+ %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
+ %0 = load i8*, i8** %a.addr, align 8
%call = call i8* @strcpy(i8* %arraydecay, i8* %0)
- %arraydecay1 = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
- %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
+ %arraydecay1 = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
ret void
}
@@ -436,13 +436,13 @@ entry:
%a.addr = alloca i8*, align 8
%b = alloca %struct.foo.0, align 1
store i8* %a, i8** %a.addr, align 8
- %buf = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
- %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
- %0 = load i8** %a.addr, align 8
+ %buf = getelementptr inbounds %struct.foo.0, %struct.foo.0* %b, i32 0, i32 0
+ %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
+ %0 = load i8*, i8** %a.addr, align 8
%call = call i8* @strcpy(i8* %arraydecay, i8* %0)
- %buf1 = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
- %arraydecay2 = getelementptr inbounds [4 x i8]* %buf1, i32 0, i32 0
- %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+ %buf1 = getelementptr inbounds %struct.foo.0, %struct.foo.0* %b, i32 0, i32 0
+ %arraydecay2 = getelementptr inbounds [4 x i8], [4 x i8]* %buf1, i32 0, i32 0
+ %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
ret void
}
@@ -470,13 +470,13 @@ entry:
%a.addr = alloca i8*, align 8
%b = alloca %struct.foo.0, align 1
store i8* %a, i8** %a.addr, align 8
- %buf = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
- %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
- %0 = load i8** %a.addr, align 8
+ %buf = getelementptr inbounds %struct.foo.0, %struct.foo.0* %b, i32 0, i32 0
+ %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
+ %0 = load i8*, i8** %a.addr, align 8
%call = call i8* @strcpy(i8* %arraydecay, i8* %0)
- %buf1 = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
- %arraydecay2 = getelementptr inbounds [4 x i8]* %buf1, i32 0, i32 0
- %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+ %buf1 = getelementptr inbounds %struct.foo.0, %struct.foo.0* %b, i32 0, i32 0
+ %arraydecay2 = getelementptr inbounds [4 x i8], [4 x i8]* %buf1, i32 0, i32 0
+ %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
ret void
}
@@ -504,13 +504,13 @@ entry:
%a.addr = alloca i8*, align 8
%b = alloca %struct.foo.0, align 1
store i8* %a, i8** %a.addr, align 8
- %buf = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
- %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
- %0 = load i8** %a.addr, align 8
+ %buf = getelementptr inbounds %struct.foo.0, %struct.foo.0* %b, i32 0, i32 0
+ %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
+ %0 = load i8*, i8** %a.addr, align 8
%call = call i8* @strcpy(i8* %arraydecay, i8* %0)
- %buf1 = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
- %arraydecay2 = getelementptr inbounds [4 x i8]* %buf1, i32 0, i32 0
- %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+ %buf1 = getelementptr inbounds %struct.foo.0, %struct.foo.0* %b, i32 0, i32 0
+ %arraydecay2 = getelementptr inbounds [4 x i8], [4 x i8]* %buf1, i32 0, i32 0
+ %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
ret void
}
@@ -538,13 +538,13 @@ entry:
%a.addr = alloca i8*, align 8
%b = alloca %struct.foo.0, align 1
store i8* %a, i8** %a.addr, align 8
- %buf = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
- %arraydecay = getelementptr inbounds [4 x i8]* %buf, i32 0, i32 0
- %0 = load i8** %a.addr, align 8
+ %buf = getelementptr inbounds %struct.foo.0, %struct.foo.0* %b, i32 0, i32 0
+ %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
+ %0 = load i8*, i8** %a.addr, align 8
%call = call i8* @strcpy(i8* %arraydecay, i8* %0)
- %buf1 = getelementptr inbounds %struct.foo.0* %b, i32 0, i32 0
- %arraydecay2 = getelementptr inbounds [4 x i8]* %buf1, i32 0, i32 0
- %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
+ %buf1 = getelementptr inbounds %struct.foo.0, %struct.foo.0* %b, i32 0, i32 0
+ %arraydecay2 = getelementptr inbounds [4 x i8], [4 x i8]* %buf1, i32 0, i32 0
+ %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay2)
ret void
}
@@ -570,8 +570,8 @@ entry:
; DARWIN-X64: .cfi_endproc
%a.addr = alloca i8*, align 8
store i8* %a, i8** %a.addr, align 8
- %0 = load i8** %a.addr, align 8
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %0)
+ %0 = load i8*, i8** %a.addr, align 8
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %0)
ret void
}
@@ -598,8 +598,8 @@ entry:
; DARWIN-X64: .cfi_endproc
%a.addr = alloca i8*, align 8
store i8* %a, i8** %a.addr, align 8
- %0 = load i8** %a.addr, align 8
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %0)
+ %0 = load i8*, i8** %a.addr, align 8
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %0)
ret void
}
@@ -626,8 +626,8 @@ entry:
; DARWIN-X64: .cfi_endproc
%a.addr = alloca i8*, align 8
store i8* %a, i8** %a.addr, align 8
- %0 = load i8** %a.addr, align 8
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %0)
+ %0 = load i8*, i8** %a.addr, align 8
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %0)
ret void
}
@@ -654,8 +654,8 @@ entry:
; DARWIN-X64: callq ___stack_chk_fail
%a.addr = alloca i8*, align 8
store i8* %a, i8** %a.addr, align 8
- %0 = load i8** %a.addr, align 8
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %0)
+ %0 = load i8*, i8** %a.addr, align 8
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %0)
ret void
}
@@ -683,7 +683,7 @@ entry:
%a = alloca i32, align 4
%j = alloca i32*, align 8
store i32 0, i32* %retval
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%add = add nsw i32 %0, 1
store i32 %add, i32* %a, align 4
store i32* %a, i32** %j, align 8
@@ -715,7 +715,7 @@ entry:
%a = alloca i32, align 4
%j = alloca i32*, align 8
store i32 0, i32* %retval
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%add = add nsw i32 %0, 1
store i32 %add, i32* %a, align 4
store i32* %a, i32** %j, align 8
@@ -747,7 +747,7 @@ entry:
%a = alloca i32, align 4
%j = alloca i32*, align 8
store i32 0, i32* %retval
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%add = add nsw i32 %0, 1
store i32 %add, i32* %a, align 4
store i32* %a, i32** %j, align 8
@@ -779,7 +779,7 @@ entry:
%a = alloca i32, align 4
%j = alloca i32*, align 8
store i32 0, i32* %retval
- %0 = load i32* %a, align 4
+ %0 = load i32, i32* %a, align 4
%add = add nsw i32 %0, 1
store i32 %add, i32* %a, align 4
store i32* %a, i32** %j, align 8
@@ -808,7 +808,7 @@ entry:
; DARWIN-X64: .cfi_endproc
%a = alloca i32, align 4
%0 = ptrtoint i32* %a to i64
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
ret void
}
@@ -835,7 +835,7 @@ entry:
; DARWIN-X64: .cfi_endproc
%a = alloca i32, align 4
%0 = ptrtoint i32* %a to i64
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
ret void
}
@@ -862,7 +862,7 @@ entry:
; DARWIN-X64: callq ___stack_chk_fail
%a = alloca i32, align 4
%0 = ptrtoint i32* %a to i64
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
ret void
}
@@ -889,7 +889,7 @@ entry:
; DARWIN-X64: callq ___stack_chk_fail
%a = alloca i32, align 4
%0 = ptrtoint i32* %a to i64
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
ret void
}
@@ -1021,7 +1021,7 @@ entry:
store double %call, double* %x, align 8
%cmp2 = fcmp ogt double %call, 0.000000e+00
%y.1 = select i1 %cmp2, double* %x, double* null
- %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double* %y.1)
ret void
}
@@ -1051,7 +1051,7 @@ entry:
store double %call, double* %x, align 8
%cmp2 = fcmp ogt double %call, 0.000000e+00
%y.1 = select i1 %cmp2, double* %x, double* null
- %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double* %y.1)
ret void
}
@@ -1081,7 +1081,7 @@ entry:
store double %call, double* %x, align 8
%cmp2 = fcmp ogt double %call, 0.000000e+00
%y.1 = select i1 %cmp2, double* %x, double* null
- %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double* %y.1)
ret void
}
@@ -1111,7 +1111,7 @@ entry:
store double %call, double* %x, align 8
%cmp2 = fcmp ogt double %call, 0.000000e+00
%y.1 = select i1 %cmp2, double* %x, double* null
- %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double* %y.1)
+ %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), double* %y.1)
ret void
}
@@ -1155,7 +1155,7 @@ if.then3: ; preds = %if.else
if.end4: ; preds = %if.else, %if.then3, %if.then
%y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
- %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0)
+ %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double* %y.0)
ret void
}
@@ -1200,7 +1200,7 @@ if.then3: ; preds = %if.else
if.end4: ; preds = %if.else, %if.then3, %if.then
%y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
- %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0)
+ %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double* %y.0)
ret void
}
@@ -1245,7 +1245,7 @@ if.then3: ; preds = %if.else
if.end4: ; preds = %if.else, %if.then3, %if.then
%y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
- %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0)
+ %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double* %y.0)
ret void
}
@@ -1290,7 +1290,7 @@ if.then3: ; preds = %if.else
if.end4: ; preds = %if.else, %if.then3, %if.then
%y.0 = phi double* [ null, %if.then ], [ %x, %if.then3 ], [ null, %if.else ]
- %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), double* %y.0)
+ %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double* %y.0)
ret void
}
@@ -1316,10 +1316,10 @@ entry:
; DARWIN-X64: .cfi_endproc
%c = alloca %struct.pair, align 4
%b = alloca i32*, align 8
- %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+ %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
store i32* %y, i32** %b, align 8
- %0 = load i32** %b, align 8
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32* %0)
+ %0 = load i32*, i32** %b, align 8
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32* %0)
ret void
}
@@ -1346,10 +1346,10 @@ entry:
; DARWIN-X64: .cfi_endproc
%c = alloca %struct.pair, align 4
%b = alloca i32*, align 8
- %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+ %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
store i32* %y, i32** %b, align 8
- %0 = load i32** %b, align 8
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32* %0)
+ %0 = load i32*, i32** %b, align 8
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32* %0)
ret void
}
@@ -1376,10 +1376,10 @@ entry:
; DARWIN-X64: callq ___stack_chk_fail
%c = alloca %struct.pair, align 4
%b = alloca i32*, align 8
- %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+ %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
store i32* %y, i32** %b, align 8
- %0 = load i32** %b, align 8
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32* %0)
+ %0 = load i32*, i32** %b, align 8
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32* %0)
ret void
}
@@ -1406,10 +1406,10 @@ entry:
; DARWIN-X64: callq ___stack_chk_fail
%c = alloca %struct.pair, align 4
%b = alloca i32*, align 8
- %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+ %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
store i32* %y, i32** %b, align 8
- %0 = load i32** %b, align 8
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32* %0)
+ %0 = load i32*, i32** %b, align 8
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32* %0)
ret void
}
@@ -1435,9 +1435,9 @@ entry:
; DARWIN-X64: .cfi_endproc
%c = alloca %struct.pair, align 4
%b = alloca i32*, align 8
- %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+ %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
%0 = ptrtoint i32* %y to i64
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
ret void
}
@@ -1464,9 +1464,9 @@ entry:
; DARWIN-X64: .cfi_endproc
%c = alloca %struct.pair, align 4
%b = alloca i32*, align 8
- %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+ %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
%0 = ptrtoint i32* %y to i64
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
ret void
}
@@ -1492,9 +1492,9 @@ entry:
; DARWIN-X64: callq ___stack_chk_fail
%c = alloca %struct.pair, align 4
%b = alloca i32*, align 8
- %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+ %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
%0 = ptrtoint i32* %y to i64
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
ret void
}
@@ -1521,9 +1521,9 @@ entry:
; DARWIN-X64: callq ___stack_chk_fail
%c = alloca %struct.pair, align 4
%b = alloca i32*, align 8
- %y = getelementptr inbounds %struct.pair* %c, i32 0, i32 1
+ %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
%0 = ptrtoint i32* %y to i64
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %0)
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %0)
ret void
}
@@ -1548,8 +1548,8 @@ entry:
; DARWIN-X64-NOT: callq ___stack_chk_fail
; DARWIN-X64: .cfi_endproc
%c = alloca %struct.pair, align 4
- %y = getelementptr inbounds %struct.pair* %c, i64 0, i32 1
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y)
+ %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i64 0, i32 1
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %y)
ret void
}
@@ -1575,8 +1575,8 @@ entry:
; DARWIN-X64-NOT: callq ___stack_chk_fail
; DARWIN-X64: .cfi_endproc
%c = alloca %struct.pair, align 4
- %y = getelementptr inbounds %struct.pair* %c, i64 0, i32 1
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y)
+ %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i64 0, i32 1
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %y)
ret void
}
@@ -1602,8 +1602,8 @@ entry:
; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
; DARWIN-X64: callq ___stack_chk_fail
%c = alloca %struct.pair, align 4
- %y = getelementptr inbounds %struct.pair* %c, i64 0, i32 1
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y)
+ %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i64 0, i32 1
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %y)
ret void
}
@@ -1629,8 +1629,8 @@ entry:
; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
; DARWIN-X64: callq ___stack_chk_fail
%c = alloca %struct.pair, align 4
- %y = getelementptr inbounds %struct.pair* %c, i64 0, i32 1
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %y)
+ %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i64 0, i32 1
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %y)
ret void
}
@@ -1655,8 +1655,8 @@ entry:
; DARWIN-X64-NOT: callq ___stack_chk_fail
; DARWIN-X64: .cfi_endproc
%a = alloca i32, align 4
- %add.ptr5 = getelementptr inbounds i32* %a, i64 -12
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5)
+ %add.ptr5 = getelementptr inbounds i32, i32* %a, i64 -12
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5)
ret void
}
@@ -1682,8 +1682,8 @@ entry:
; DARWIN-X64-NOT: callq ___stack_chk_fail
; DARWIN-X64: .cfi_endproc
%a = alloca i32, align 4
- %add.ptr5 = getelementptr inbounds i32* %a, i64 -12
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5)
+ %add.ptr5 = getelementptr inbounds i32, i32* %a, i64 -12
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5)
ret void
}
@@ -1709,8 +1709,8 @@ entry:
; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
; DARWIN-X64: callq ___stack_chk_fail
%a = alloca i32, align 4
- %add.ptr5 = getelementptr inbounds i32* %a, i64 -12
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5)
+ %add.ptr5 = getelementptr inbounds i32, i32* %a, i64 -12
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5)
ret void
}
@@ -1736,8 +1736,8 @@ entry:
; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
; DARWIN-X64: callq ___stack_chk_fail
%a = alloca i32, align 4
- %add.ptr5 = getelementptr inbounds i32* %a, i64 -12
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5)
+ %add.ptr5 = getelementptr inbounds i32, i32* %a, i64 -12
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32* %add.ptr5)
ret void
}
@@ -1767,8 +1767,8 @@ entry:
store i32 0, i32* %a, align 4
%0 = bitcast i32* %a to float*
store float* %0, float** %b, align 8
- %1 = load float** %b, align 8
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), float* %1)
+ %1 = load float*, float** %b, align 8
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), float* %1)
ret void
}
@@ -1799,8 +1799,8 @@ entry:
store i32 0, i32* %a, align 4
%0 = bitcast i32* %a to float*
store float* %0, float** %b, align 8
- %1 = load float** %b, align 8
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), float* %1)
+ %1 = load float*, float** %b, align 8
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), float* %1)
ret void
}
@@ -1831,8 +1831,8 @@ entry:
store i32 0, i32* %a, align 4
%0 = bitcast i32* %a to float*
store float* %0, float** %b, align 8
- %1 = load float** %b, align 8
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), float* %1)
+ %1 = load float*, float** %b, align 8
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), float* %1)
ret void
}
@@ -1863,8 +1863,8 @@ entry:
store i32 0, i32* %a, align 4
%0 = bitcast i32* %a to float*
store float* %0, float** %b, align 8
- %1 = load float** %b, align 8
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), float* %1)
+ %1 = load float*, float** %b, align 8
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), float* %1)
ret void
}
@@ -2004,9 +2004,9 @@ entry:
; DARWIN-X64-NOT: callq ___stack_chk_fail
; DARWIN-X64: .cfi_endproc
%c = alloca %struct.vec, align 16
- %y = getelementptr inbounds %struct.vec* %c, i64 0, i32 0
- %add.ptr = getelementptr inbounds <4 x i32>* %y, i64 -12
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr)
+ %y = getelementptr inbounds %struct.vec, %struct.vec* %c, i64 0, i32 0
+ %add.ptr = getelementptr inbounds <4 x i32>, <4 x i32>* %y, i64 -12
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr)
ret void
}
@@ -2032,9 +2032,9 @@ entry:
; DARWIN-X64-NOT: callq ___stack_chk_fail
; DARWIN-X64: .cfi_endproc
%c = alloca %struct.vec, align 16
- %y = getelementptr inbounds %struct.vec* %c, i64 0, i32 0
- %add.ptr = getelementptr inbounds <4 x i32>* %y, i64 -12
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr)
+ %y = getelementptr inbounds %struct.vec, %struct.vec* %c, i64 0, i32 0
+ %add.ptr = getelementptr inbounds <4 x i32>, <4 x i32>* %y, i64 -12
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr)
ret void
}
@@ -2060,9 +2060,9 @@ entry:
; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
; DARWIN-X64: callq ___stack_chk_fail
%c = alloca %struct.vec, align 16
- %y = getelementptr inbounds %struct.vec* %c, i64 0, i32 0
- %add.ptr = getelementptr inbounds <4 x i32>* %y, i64 -12
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr)
+ %y = getelementptr inbounds %struct.vec, %struct.vec* %c, i64 0, i32 0
+ %add.ptr = getelementptr inbounds <4 x i32>, <4 x i32>* %y, i64 -12
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr)
ret void
}
@@ -2088,9 +2088,9 @@ entry:
; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
; DARWIN-X64: callq ___stack_chk_fail
%c = alloca %struct.vec, align 16
- %y = getelementptr inbounds %struct.vec* %c, i64 0, i32 0
- %add.ptr = getelementptr inbounds <4 x i32>* %y, i64 -12
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr)
+ %y = getelementptr inbounds %struct.vec, %struct.vec* %c, i64 0, i32 0
+ %add.ptr = getelementptr inbounds <4 x i32>, <4 x i32>* %y, i64 -12
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), <4 x i32>* %add.ptr)
ret void
}
@@ -2264,9 +2264,9 @@ entry:
%c = alloca %struct.pair, align 4
%exn.slot = alloca i8*
%ehselector.slot = alloca i32
- %a = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+ %a = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 0
store i32 0, i32* %a, align 4
- %a1 = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+ %a1 = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 0
invoke void @_Z3exceptPi(i32* %a1)
to label %invoke.cont unwind label %lpad
@@ -2304,9 +2304,9 @@ entry:
%c = alloca %struct.pair, align 4
%exn.slot = alloca i8*
%ehselector.slot = alloca i32
- %a = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+ %a = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 0
store i32 0, i32* %a, align 4
- %a1 = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+ %a1 = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 0
invoke void @_Z3exceptPi(i32* %a1)
to label %invoke.cont unwind label %lpad
@@ -2344,9 +2344,9 @@ entry:
%c = alloca %struct.pair, align 4
%exn.slot = alloca i8*
%ehselector.slot = alloca i32
- %a = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+ %a = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 0
store i32 0, i32* %a, align 4
- %a1 = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+ %a1 = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 0
invoke void @_Z3exceptPi(i32* %a1)
to label %invoke.cont unwind label %lpad
@@ -2388,9 +2388,9 @@ entry:
%c = alloca %struct.pair, align 4
%exn.slot = alloca i8*
%ehselector.slot = alloca i32
- %a = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+ %a = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 0
store i32 0, i32* %a, align 4
- %a1 = getelementptr inbounds %struct.pair* %c, i32 0, i32 0
+ %a1 = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 0
invoke void @_Z3exceptPi(i32* %a1)
to label %invoke.cont unwind label %lpad
@@ -2428,7 +2428,7 @@ entry:
%call = call i32* @getp()
store i32* %call, i32** %a, align 8
store i32** %a, i32*** %b, align 8
- %0 = load i32*** %b, align 8
+ %0 = load i32**, i32*** %b, align 8
call void @funcall2(i32** %0)
ret void
}
@@ -2459,7 +2459,7 @@ entry:
%call = call i32* @getp()
store i32* %call, i32** %a, align 8
store i32** %a, i32*** %b, align 8
- %0 = load i32*** %b, align 8
+ %0 = load i32**, i32*** %b, align 8
call void @funcall2(i32** %0)
ret void
}
@@ -2490,7 +2490,7 @@ entry:
%call = call i32* @getp()
store i32* %call, i32** %a, align 8
store i32** %a, i32*** %b, align 8
- %0 = load i32*** %b, align 8
+ %0 = load i32**, i32*** %b, align 8
call void @funcall2(i32** %0)
ret void
}
@@ -2521,7 +2521,7 @@ entry:
%call = call i32* @getp()
store i32* %call, i32** %a, align 8
store i32** %a, i32*** %b, align 8
- %0 = load i32*** %b, align 8
+ %0 = load i32**, i32*** %b, align 8
call void @funcall2(i32** %0)
ret void
}
@@ -2552,7 +2552,7 @@ entry:
store i32* %call, i32** %a, align 8
%0 = bitcast i32** %a to float**
store float** %0, float*** %b, align 8
- %1 = load float*** %b, align 8
+ %1 = load float**, float*** %b, align 8
call void @funfloat2(float** %1)
ret void
}
@@ -2584,7 +2584,7 @@ entry:
store i32* %call, i32** %a, align 8
%0 = bitcast i32** %a to float**
store float** %0, float*** %b, align 8
- %1 = load float*** %b, align 8
+ %1 = load float**, float*** %b, align 8
call void @funfloat2(float** %1)
ret void
}
@@ -2616,7 +2616,7 @@ entry:
store i32* %call, i32** %a, align 8
%0 = bitcast i32** %a to float**
store float** %0, float*** %b, align 8
- %1 = load float*** %b, align 8
+ %1 = load float**, float*** %b, align 8
call void @funfloat2(float** %1)
ret void
}
@@ -2648,7 +2648,7 @@ entry:
store i32* %call, i32** %a, align 8
%0 = bitcast i32** %a to float**
store float** %0, float*** %b, align 8
- %1 = load float*** %b, align 8
+ %1 = load float**, float*** %b, align 8
call void @funfloat2(float** %1)
ret void
}
@@ -2674,9 +2674,9 @@ entry:
; DARWIN-X64-NOT: callq ___stack_chk_fail
; DARWIN-X64: .cfi_endproc
%a = alloca %class.A, align 1
- %array = getelementptr inbounds %class.A* %a, i32 0, i32 0
- %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
- %0 = load i8* %arrayidx, align 1
+ %array = getelementptr inbounds %class.A, %class.A* %a, i32 0, i32 0
+ %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %array, i32 0, i64 0
+ %0 = load i8, i8* %arrayidx, align 1
ret i8 %0
}
@@ -2702,9 +2702,9 @@ entry:
; DARWIN-X64-NOT: callq ___stack_chk_fail
; DARWIN-X64: .cfi_endproc
%a = alloca %class.A, align 1
- %array = getelementptr inbounds %class.A* %a, i32 0, i32 0
- %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
- %0 = load i8* %arrayidx, align 1
+ %array = getelementptr inbounds %class.A, %class.A* %a, i32 0, i32 0
+ %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %array, i32 0, i64 0
+ %0 = load i8, i8* %arrayidx, align 1
ret i8 %0
}
@@ -2730,9 +2730,9 @@ entry:
; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
; DARWIN-X64: callq ___stack_chk_fail
%a = alloca %class.A, align 1
- %array = getelementptr inbounds %class.A* %a, i32 0, i32 0
- %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
- %0 = load i8* %arrayidx, align 1
+ %array = getelementptr inbounds %class.A, %class.A* %a, i32 0, i32 0
+ %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %array, i32 0, i64 0
+ %0 = load i8, i8* %arrayidx, align 1
ret i8 %0
}
@@ -2758,9 +2758,9 @@ entry:
; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
; DARWIN-X64: callq ___stack_chk_fail
%a = alloca %class.A, align 1
- %array = getelementptr inbounds %class.A* %a, i32 0, i32 0
- %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
- %0 = load i8* %arrayidx, align 1
+ %array = getelementptr inbounds %class.A, %class.A* %a, i32 0, i32 0
+ %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %array, i32 0, i64 0
+ %0 = load i8, i8* %arrayidx, align 1
ret i8 %0
}
@@ -2785,13 +2785,13 @@ entry:
; DARWIN-X64-NOT: callq ___stack_chk_fail
; DARWIN-X64: .cfi_endproc
%x = alloca %struct.deep, align 1
- %b = getelementptr inbounds %struct.deep* %x, i32 0, i32 0
+ %b = getelementptr inbounds %struct.deep, %struct.deep* %x, i32 0, i32 0
%c = bitcast %union.anon* %b to %struct.anon*
- %d = getelementptr inbounds %struct.anon* %c, i32 0, i32 0
- %e = getelementptr inbounds %struct.anon.0* %d, i32 0, i32 0
+ %d = getelementptr inbounds %struct.anon, %struct.anon* %c, i32 0, i32 0
+ %e = getelementptr inbounds %struct.anon.0, %struct.anon.0* %d, i32 0, i32 0
%array = bitcast %union.anon.1* %e to [2 x i8]*
- %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %array, i32 0, i64 0
+ %0 = load i8, i8* %arrayidx, align 1
ret i8 %0
}
@@ -2817,13 +2817,13 @@ entry:
; DARWIN-X64-NOT: callq ___stack_chk_fail
; DARWIN-X64: .cfi_endproc
%x = alloca %struct.deep, align 1
- %b = getelementptr inbounds %struct.deep* %x, i32 0, i32 0
+ %b = getelementptr inbounds %struct.deep, %struct.deep* %x, i32 0, i32 0
%c = bitcast %union.anon* %b to %struct.anon*
- %d = getelementptr inbounds %struct.anon* %c, i32 0, i32 0
- %e = getelementptr inbounds %struct.anon.0* %d, i32 0, i32 0
+ %d = getelementptr inbounds %struct.anon, %struct.anon* %c, i32 0, i32 0
+ %e = getelementptr inbounds %struct.anon.0, %struct.anon.0* %d, i32 0, i32 0
%array = bitcast %union.anon.1* %e to [2 x i8]*
- %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %array, i32 0, i64 0
+ %0 = load i8, i8* %arrayidx, align 1
ret i8 %0
}
@@ -2849,13 +2849,13 @@ entry:
; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
; DARWIN-X64: callq ___stack_chk_fail
%x = alloca %struct.deep, align 1
- %b = getelementptr inbounds %struct.deep* %x, i32 0, i32 0
+ %b = getelementptr inbounds %struct.deep, %struct.deep* %x, i32 0, i32 0
%c = bitcast %union.anon* %b to %struct.anon*
- %d = getelementptr inbounds %struct.anon* %c, i32 0, i32 0
- %e = getelementptr inbounds %struct.anon.0* %d, i32 0, i32 0
+ %d = getelementptr inbounds %struct.anon, %struct.anon* %c, i32 0, i32 0
+ %e = getelementptr inbounds %struct.anon.0, %struct.anon.0* %d, i32 0, i32 0
%array = bitcast %union.anon.1* %e to [2 x i8]*
- %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %array, i32 0, i64 0
+ %0 = load i8, i8* %arrayidx, align 1
ret i8 %0
}
@@ -2881,13 +2881,13 @@ entry:
; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
; DARWIN-X64: callq ___stack_chk_fail
%x = alloca %struct.deep, align 1
- %b = getelementptr inbounds %struct.deep* %x, i32 0, i32 0
+ %b = getelementptr inbounds %struct.deep, %struct.deep* %x, i32 0, i32 0
%c = bitcast %union.anon* %b to %struct.anon*
- %d = getelementptr inbounds %struct.anon* %c, i32 0, i32 0
- %e = getelementptr inbounds %struct.anon.0* %d, i32 0, i32 0
+ %d = getelementptr inbounds %struct.anon, %struct.anon* %c, i32 0, i32 0
+ %e = getelementptr inbounds %struct.anon.0, %struct.anon.0* %d, i32 0, i32 0
%array = bitcast %union.anon.1* %e to [2 x i8]*
- %arrayidx = getelementptr inbounds [2 x i8]* %array, i32 0, i64 0
- %0 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %array, i32 0, i64 0
+ %0 = load i8, i8* %arrayidx, align 1
ret i8 %0
}
@@ -2914,7 +2914,7 @@ entry:
%n.addr = alloca i32, align 4
%a = alloca i32*, align 8
store i32 %n, i32* %n.addr, align 4
- %0 = load i32* %n.addr, align 4
+ %0 = load i32, i32* %n.addr, align 4
%conv = sext i32 %0 to i64
%1 = alloca i8, i64 %conv
%2 = bitcast i8* %1 to i32*
@@ -2946,7 +2946,7 @@ entry:
%n.addr = alloca i32, align 4
%a = alloca i32*, align 8
store i32 %n, i32* %n.addr, align 4
- %0 = load i32* %n.addr, align 4
+ %0 = load i32, i32* %n.addr, align 4
%conv = sext i32 %0 to i64
%1 = alloca i8, i64 %conv
%2 = bitcast i8* %1 to i32*
@@ -2978,7 +2978,7 @@ entry:
%n.addr = alloca i32, align 4
%a = alloca i32*, align 8
store i32 %n, i32* %n.addr, align 4
- %0 = load i32* %n.addr, align 4
+ %0 = load i32, i32* %n.addr, align 4
%conv = sext i32 %0 to i64
%1 = alloca i8, i64 %conv
%2 = bitcast i8* %1 to i32*
@@ -3010,7 +3010,7 @@ entry:
%n.addr = alloca i32, align 4
%a = alloca i32*, align 8
store i32 %n, i32* %n.addr, align 4
- %0 = load i32* %n.addr, align 4
+ %0 = load i32, i32* %n.addr, align 4
%conv = sext i32 %0 to i64
%1 = alloca i8, i64 %conv
%2 = bitcast i8* %1 to i32*
@@ -3039,8 +3039,8 @@ entry:
; DARWIN-X64-NOT: callq ___stack_chk_fail
; DARWIN-X64: .cfi_endproc
%a = alloca [4 x i32], align 16
- %arrayidx = getelementptr inbounds [4 x i32]* %a, i32 0, i64 0
- %0 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %a, i32 0, i64 0
+ %0 = load i32, i32* %arrayidx, align 4
ret i32 %0
}
@@ -3066,8 +3066,8 @@ entry:
; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
; DARWIN-X64: callq ___stack_chk_fail
%a = alloca [4 x i32], align 16
- %arrayidx = getelementptr inbounds [4 x i32]* %a, i32 0, i64 0
- %0 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %a, i32 0, i64 0
+ %0 = load i32, i32* %arrayidx, align 4
ret i32 %0
}
@@ -3093,8 +3093,8 @@ entry:
; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
; DARWIN-X64: callq ___stack_chk_fail
%a = alloca [4 x i32], align 16
- %arrayidx = getelementptr inbounds [4 x i32]* %a, i32 0, i64 0
- %0 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %a, i32 0, i64 0
+ %0 = load i32, i32* %arrayidx, align 4
ret i32 %0
}
@@ -3120,8 +3120,8 @@ entry:
; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
; DARWIN-X64: callq ___stack_chk_fail
%a = alloca [4 x i32], align 16
- %arrayidx = getelementptr inbounds [4 x i32]* %a, i32 0, i64 0
- %0 = load i32* %arrayidx, align 4
+ %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %a, i32 0, i64 0
+ %0 = load i32, i32* %arrayidx, align 4
ret i32 %0
}
@@ -3149,10 +3149,10 @@ entry:
; DARWIN-X64-NOT: callq ___stack_chk_fail
; DARWIN-X64: .cfi_endproc
%c = alloca %struct.nest, align 4
- %b = getelementptr inbounds %struct.nest* %c, i32 0, i32 1
- %_a = getelementptr inbounds %struct.pair* %b, i32 0, i32 0
- %0 = load i32* %_a, align 4
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %0)
+ %b = getelementptr inbounds %struct.nest, %struct.nest* %c, i32 0, i32 1
+ %_a = getelementptr inbounds %struct.pair, %struct.pair* %b, i32 0, i32 0
+ %0 = load i32, i32* %_a, align 4
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %0)
ret void
}
@@ -3181,8 +3181,8 @@ bb:
; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
; DARWIN-X64: callq ___stack_chk_fail
%tmp = alloca %struct.small*, align 8
- %tmp1 = call i32 (...)* @dummy(%struct.small** %tmp)
- %tmp2 = load %struct.small** %tmp, align 8
+ %tmp1 = call i32 (...) @dummy(%struct.small** %tmp)
+ %tmp2 = load %struct.small*, %struct.small** %tmp, align 8
%tmp3 = ptrtoint %struct.small* %tmp2 to i64
%tmp4 = trunc i64 %tmp3 to i32
%tmp5 = icmp sgt i32 %tmp4, 0
@@ -3192,8 +3192,8 @@ bb6: ; preds = %bb17, %bb
%tmp7 = phi %struct.small* [ %tmp19, %bb17 ], [ %tmp2, %bb ]
%tmp8 = phi i64 [ %tmp20, %bb17 ], [ 1, %bb ]
%tmp9 = phi i32 [ %tmp14, %bb17 ], [ %tmp1, %bb ]
- %tmp10 = getelementptr inbounds %struct.small* %tmp7, i64 0, i32 0
- %tmp11 = load i8* %tmp10, align 1
+ %tmp10 = getelementptr inbounds %struct.small, %struct.small* %tmp7, i64 0, i32 0
+ %tmp11 = load i8, i8* %tmp10, align 1
%tmp12 = icmp eq i8 %tmp11, 1
%tmp13 = add nsw i32 %tmp9, 8
%tmp14 = select i1 %tmp12, i32 %tmp13, i32 %tmp9
@@ -3202,14 +3202,14 @@ bb6: ; preds = %bb17, %bb
br i1 %tmp16, label %bb21, label %bb17
bb17: ; preds = %bb6
- %tmp18 = getelementptr inbounds %struct.small** %tmp, i64 %tmp8
- %tmp19 = load %struct.small** %tmp18, align 8
+ %tmp18 = getelementptr inbounds %struct.small*, %struct.small** %tmp, i64 %tmp8
+ %tmp19 = load %struct.small*, %struct.small** %tmp18, align 8
%tmp20 = add i64 %tmp8, 1
br label %bb6
bb21: ; preds = %bb6, %bb
%tmp22 = phi i32 [ %tmp1, %bb ], [ %tmp14, %bb6 ]
- %tmp23 = call i32 (...)* @dummy(i32 %tmp22)
+ %tmp23 = call i32 (...) @dummy(i32 %tmp22)
ret i32 undef
}
@@ -3234,8 +3234,8 @@ entry:
; DARWIN-X64-NOT: callq ___stack_chk_fail
; DARWIN-X64: .cfi_endproc
%test = alloca [32 x i8], align 16
- %arraydecay = getelementptr inbounds [32 x i8]* %test, i32 0, i32 0
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay)
+ %arraydecay = getelementptr inbounds [32 x i8], [32 x i8]* %test, i32 0, i32 0
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay)
ret i32 %call
}
@@ -3260,8 +3260,8 @@ entry:
; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
; DARWIN-X64: callq ___stack_chk_fail
%test = alloca [33 x i8], align 16
- %arraydecay = getelementptr inbounds [33 x i8]* %test, i32 0, i32 0
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay)
+ %arraydecay = getelementptr inbounds [33 x i8], [33 x i8]* %test, i32 0, i32 0
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay)
ret i32 %call
}
@@ -3286,8 +3286,8 @@ entry:
; DARWIN-X64-NOT: callq ___stack_chk_fail
; DARWIN-X64: .cfi_endproc
%test = alloca [4 x i8], align 1
- %arraydecay = getelementptr inbounds [4 x i8]* %test, i32 0, i32 0
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay)
+ %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %test, i32 0, i32 0
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay)
ret i32 %call
}
@@ -3312,8 +3312,8 @@ entry:
; DARWIN-X64: mov{{l|q}} ___stack_chk_guard
; DARWIN-X64: callq ___stack_chk_fail
%test = alloca [5 x i8], align 1
- %arraydecay = getelementptr inbounds [5 x i8]* %test, i32 0, i32 0
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay)
+ %arraydecay = getelementptr inbounds [5 x i8], [5 x i8]* %test, i32 0, i32 0
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay)
ret i32 %call
}
@@ -3343,11 +3343,11 @@ entry:
%0 = bitcast { i64, i8 }* %test.coerce to i8*
%1 = bitcast %struct.small_char* %test to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 12, i32 0, i1 false)
- %2 = getelementptr { i64, i8 }* %test.coerce, i32 0, i32 0
- %3 = load i64* %2, align 1
- %4 = getelementptr { i64, i8 }* %test.coerce, i32 0, i32 1
- %5 = load i8* %4, align 1
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %3, i8 %5)
+ %2 = getelementptr { i64, i8 }, { i64, i8 }* %test.coerce, i32 0, i32 0
+ %3 = load i64, i64* %2, align 1
+ %4 = getelementptr { i64, i8 }, { i64, i8 }* %test.coerce, i32 0, i32 1
+ %5 = load i8, i8* %4, align 1
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %3, i8 %5)
ret i32 %call
}
@@ -3377,11 +3377,11 @@ entry:
%0 = bitcast { i64, i8 }* %test.coerce to i8*
%1 = bitcast %struct.small_char* %test to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 12, i32 0, i1 false)
- %2 = getelementptr { i64, i8 }* %test.coerce, i32 0, i32 0
- %3 = load i64* %2, align 1
- %4 = getelementptr { i64, i8 }* %test.coerce, i32 0, i32 1
- %5 = load i8* %4, align 1
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %3, i8 %5)
+ %2 = getelementptr { i64, i8 }, { i64, i8 }* %test.coerce, i32 0, i32 0
+ %3 = load i64, i64* %2, align 1
+ %4 = getelementptr { i64, i8 }, { i64, i8 }* %test.coerce, i32 0, i32 1
+ %5 = load i8, i8* %4, align 1
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i64 %3, i8 %5)
ret i32 %call
}
@@ -3409,8 +3409,8 @@ entry:
%test = alloca i8*, align 8
%0 = alloca i8, i64 4
store i8* %0, i8** %test, align 8
- %1 = load i8** %test, align 8
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %1)
+ %1 = load i8*, i8** %test, align 8
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %1)
ret i32 %call
}
@@ -3437,8 +3437,8 @@ entry:
%test = alloca i8*, align 8
%0 = alloca i8, i64 5
store i8* %0, i8** %test, align 8
- %1 = load i8** %test, align 8
- %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %1)
+ %1 = load i8*, i8** %test, align 8
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %1)
ret i32 %call
}
diff --git a/test/CodeGen/X86/stack-update-frame-opcode.ll b/test/CodeGen/X86/stack-update-frame-opcode.ll
index 9a5a2421233d..943e86e10d3a 100644
--- a/test/CodeGen/X86/stack-update-frame-opcode.ll
+++ b/test/CodeGen/X86/stack-update-frame-opcode.ll
@@ -16,7 +16,7 @@ entry:
; ATOM_LP64: leaq -1608
; ATOM_ILP32: leal -1608
- %arraydecay = getelementptr inbounds [400 x i32]* %arr, i64 0, i64 0
+ %arraydecay = getelementptr inbounds [400 x i32], [400 x i32]* %arr, i64 0, i64 0
%call = call i32 @foo(i32 %a, i32* %arraydecay) nounwind
ret i32 %call
diff --git a/test/CodeGen/X86/stack_guard_remat.ll b/test/CodeGen/X86/stack_guard_remat.ll
index dd639a7c7b4c..90ac2cc601fa 100644
--- a/test/CodeGen/X86/stack_guard_remat.ll
+++ b/test/CodeGen/X86/stack_guard_remat.ll
@@ -10,7 +10,7 @@ entry:
%a1 = alloca [256 x i32], align 16
%0 = bitcast [256 x i32]* %a1 to i8*
call void @llvm.lifetime.start(i64 1024, i8* %0)
- %arraydecay = getelementptr inbounds [256 x i32]* %a1, i64 0, i64 0
+ %arraydecay = getelementptr inbounds [256 x i32], [256 x i32]* %a1, i64 0, i64 0
call void @foo3(i32* %arraydecay)
call void asm sideeffect "foo2", "~{r12},~{r13},~{r14},~{r15},~{ebx},~{esi},~{edi},~{dirflag},~{fpsr},~{flags}"()
call void @llvm.lifetime.end(i64 1024, i8* %0)
diff --git a/test/CodeGen/X86/stackmap-fast-isel.ll b/test/CodeGen/X86/stackmap-fast-isel.ll
index dfb16adaa339..1392e5bd87c1 100644
--- a/test/CodeGen/X86/stackmap-fast-isel.ll
+++ b/test/CodeGen/X86/stackmap-fast-isel.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -fast-isel -fast-isel-abort | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -fast-isel -fast-isel-abort=1 | FileCheck %s
; CHECK-LABEL: .section __LLVM_STACKMAPS,__llvm_stackmaps
; CHECK-NEXT: __LLVM_StackMaps:
@@ -99,7 +99,7 @@
define void @constantargs() {
entry:
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 15, i16 65535, i16 -1, i32 65536, i32 2000000000, i32 2147483647, i32 -1, i32 4294967295, i32 4294967296, i64 2147483648, i64 4294967295, i64 4294967296, i64 -1)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 1, i32 15, i16 65535, i16 -1, i32 65536, i32 2000000000, i32 2147483647, i32 -1, i32 4294967295, i32 4294967296, i64 2147483648, i64 4294967295, i64 4294967296, i64 -1)
ret void
}
@@ -116,7 +116,7 @@ entry:
; CHECK-NEXT: .long 33
define void @liveConstant() {
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 15, i32 5, i32 33)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 15, i32 5, i32 33)
ret void
}
@@ -139,7 +139,7 @@ entry:
store i64 11, i64* %metadata1
store i64 12, i64* %metadata1
store i64 13, i64* %metadata1
- call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 0, i64* %metadata1)
+ call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 0, i64* %metadata1)
ret void
}
@@ -155,10 +155,10 @@ entry:
; CHECK-LABEL: .long L{{.*}}-_longid
define void @longid() {
entry:
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4294967295, i32 0)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4294967296, i32 0)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 9223372036854775807, i32 0)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 -1, i32 0)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4294967295, i32 0)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4294967296, i32 0)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 9223372036854775807, i32 0)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 -1, i32 0)
ret void
}
diff --git a/test/CodeGen/X86/stackmap-large-constants.ll b/test/CodeGen/X86/stackmap-large-constants.ll
index 73ee4f3d1569..a38b9209a1cf 100644
--- a/test/CodeGen/X86/stackmap-large-constants.ll
+++ b/test/CodeGen/X86/stackmap-large-constants.ll
@@ -51,7 +51,7 @@
declare void @llvm.experimental.stackmap(i64, i32, ...)
define void @foo() {
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 0, i32 0, i64 9223372036854775807)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 0, i32 0, i64 9223372036854775807)
ret void
}
@@ -78,6 +78,6 @@ define void @foo() {
define void @bar() {
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 0, i32 0, i64 -9223372036854775808)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 0, i32 0, i64 -9223372036854775808)
ret void
}
diff --git a/test/CodeGen/X86/stackmap-liveness.ll b/test/CodeGen/X86/stackmap-liveness.ll
index 31553c0b6842..599b6265abfa 100644
--- a/test/CodeGen/X86/stackmap-liveness.ll
+++ b/test/CodeGen/X86/stackmap-liveness.ll
@@ -50,7 +50,7 @@ entry:
; PATCH-NEXT: .byte 16
; Align
; PATCH-NEXT: .align 3
- call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 1, i32 12, i8* null, i32 0)
+ call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 12, i8* null, i32 0)
%a2 = call i64 asm sideeffect "", "={r8}"() nounwind
%a3 = call i8 asm sideeffect "", "={ah}"() nounwind
%a4 = call <4 x double> asm sideeffect "", "={ymm0}"() nounwind
@@ -97,7 +97,7 @@ entry:
; PATCH-NEXT: .byte 16
; Align
; PATCH-NEXT: .align 3
- call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 2, i32 12, i8* null, i32 0)
+ call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 2, i32 12, i8* null, i32 0)
call void asm sideeffect "", "{r8},{ah},{ymm0},{ymm1}"(i64 %a2, i8 %a3, <4 x double> %a4, <4 x double> %a5) nounwind
; StackMap 3 (no liveness information available)
@@ -129,7 +129,7 @@ entry:
; PATCH-NEXT: .byte 16
; Align
; PATCH-NEXT: .align 3
- call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 3, i32 12, i8* null, i32 0)
+ call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 3, i32 12, i8* null, i32 0)
call void asm sideeffect "", "{xmm2}"(<2 x double> %a1) nounwind
ret void
}
@@ -166,8 +166,8 @@ entry:
; PATCH-NEXT: .byte 16
; Align
; PATCH-NEXT: .align 3
- call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4, i32 5)
- call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 5, i32 0, i8* null, i32 0)
+ call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 5)
+ call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 5, i32 0, i8* null, i32 0)
call void asm sideeffect "", "{xmm2}"(<2 x double> %a1) nounwind
ret void
}
diff --git a/test/CodeGen/X86/stackmap-nops.ll b/test/CodeGen/X86/stackmap-nops.ll
index 7932c0dfb99d..08fee2ecd3e0 100644
--- a/test/CodeGen/X86/stackmap-nops.ll
+++ b/test/CodeGen/X86/stackmap-nops.ll
@@ -193,41 +193,41 @@ entry:
; CHECK-NEXT: .byte 102
; CHECK-NEXT: .byte 102
; CHECK-NEXT: nopw %cs:512(%rax,%rax)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 0, i32 0)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 1, i32 1)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 2, i32 2)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 3, i32 3)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4, i32 4)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 5, i32 5)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 6, i32 6)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 7, i32 7)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 8, i32 8)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 9, i32 9)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 10, i32 10)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 11, i32 11)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 12, i32 12)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 13, i32 13)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 14)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 15, i32 15)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 16)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 17, i32 17)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 18, i32 18)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 19, i32 19)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 20, i32 20)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 21, i32 21)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 22, i32 22)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 23, i32 23)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 24, i32 24)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 25, i32 25)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 26, i32 26)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 27, i32 27)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 28, i32 28)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 29, i32 29)
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 30, i32 30)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 0, i32 0)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 1, i32 1)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 2, i32 2)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 3, i32 3)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 4)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 5, i32 5)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 6, i32 6)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 7, i32 7)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 8, i32 8)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 9, i32 9)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 10, i32 10)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 11, i32 11)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 12, i32 12)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 13, i32 13)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 14, i32 14)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 15, i32 15)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 16)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 17, i32 17)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 18, i32 18)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 19, i32 19)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 20, i32 20)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 21, i32 21)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 22, i32 22)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 23, i32 23)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 24, i32 24)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 25, i32 25)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 26, i32 26)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 27, i32 27)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 28, i32 28)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 29, i32 29)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 30, i32 30)
; Add an extra stackmap with a zero-length shadow to thwart the shadow
; optimization. This will force all 15 bytes of the previous shadow to be
; padded with nops.
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 31, i32 0)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 31, i32 0)
ret void
}
diff --git a/test/CodeGen/X86/stackmap-shadow-optimization.ll b/test/CodeGen/X86/stackmap-shadow-optimization.ll
index a3725f2c5b72..001d8d9f5434 100644
--- a/test/CodeGen/X86/stackmap-shadow-optimization.ll
+++ b/test/CodeGen/X86/stackmap-shadow-optimization.ll
@@ -18,7 +18,7 @@ entry:
; CHECK: callq _bar
; CHECK-NOT: nop
call void @bar()
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 0, i32 8)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 0, i32 8)
call void @bar()
call void @bar()
ret void
diff --git a/test/CodeGen/X86/stackmap.ll b/test/CodeGen/X86/stackmap.ll
index 5e356f3e03d1..0805e8147044 100644
--- a/test/CodeGen/X86/stackmap.ll
+++ b/test/CodeGen/X86/stackmap.ll
@@ -125,7 +125,7 @@
define void @constantargs() {
entry:
%0 = inttoptr i64 12345 to i8*
- tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 1, i32 15, i8* %0, i32 0, i16 65535, i16 -1, i32 65536, i32 2000000000, i32 2147483647, i32 -1, i32 4294967295, i32 4294967296, i64 2147483648, i64 4294967295, i64 4294967296, i64 -1)
+ tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 1, i32 15, i8* %0, i32 0, i16 65535, i16 -1, i32 65536, i32 2000000000, i32 2147483647, i32 -1, i32 4294967295, i32 4294967296, i64 2147483648, i64 4294967295, i64 4294967296, i64 -1)
ret void
}
@@ -147,7 +147,7 @@ entry:
; Runtime void->void call.
call void inttoptr (i64 -559038737 to void ()*)()
; Followed by inline OSR patchpoint with 12-byte shadow and 2 live vars.
- call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 3, i32 12, i64 %a, i64 %b)
+ call void (i64, i32, ...) @llvm.experimental.stackmap(i64 3, i32 12, i64 %a, i64 %b)
ret void
}
@@ -173,7 +173,7 @@ entry:
cold:
; OSR patchpoint with 12-byte nop-slide and 2 live vars.
%thunk = inttoptr i64 -559038737 to i8*
- call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 4, i32 15, i8* %thunk, i32 0, i64 %a, i64 %b)
+ call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4, i32 15, i8* %thunk, i32 0, i64 %a, i64 %b)
unreachable
ret:
ret void
@@ -194,7 +194,7 @@ ret:
define i64 @propertyRead(i64* %obj) {
entry:
%resolveRead = inttoptr i64 -559038737 to i8*
- %result = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 15, i8* %resolveRead, i32 1, i64* %obj)
+ %result = call anyregcc i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 15, i8* %resolveRead, i32 1, i64* %obj)
%add = add i64 %result, 3
ret i64 %add
}
@@ -214,7 +214,7 @@ entry:
define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a) {
entry:
%resolveWrite = inttoptr i64 -559038737 to i8*
- call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 15, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
+ call anyregcc void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 6, i32 15, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
ret void
}
@@ -236,7 +236,7 @@ entry:
define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
entry:
%resolveCall = inttoptr i64 -559038737 to i8*
- call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 7, i32 15, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+ call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 7, i32 15, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
ret void
}
@@ -258,7 +258,7 @@ entry:
define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
entry:
%resolveCall = inttoptr i64 -559038737 to i8*
- %result = call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 8, i32 15, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+ %result = call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 8, i32 15, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
%add = add i64 %result, 3
ret i64 %add
}
@@ -278,7 +278,7 @@ entry:
; CHECK-NEXT: .short 6
define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) {
entry:
- call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 11, i32 15, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
+ call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 15, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
ret void
}
@@ -297,7 +297,7 @@ entry:
; CHECK-NEXT: .short 6
define webkit_jscc void @spilledStackMapValue(i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16) {
entry:
- call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 12, i32 15, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
+ call void (i64, i32, ...) @llvm.experimental.stackmap(i64 12, i32 15, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16)
ret void
}
@@ -321,7 +321,7 @@ bb1:
unreachable
bb2:
- %tmp = load i64* inttoptr (i64 140685446136880 to i64*)
+ %tmp = load i64, i64* inttoptr (i64 140685446136880 to i64*)
br i1 undef, label %bb16, label %bb17
bb16:
@@ -333,7 +333,7 @@ bb17:
bb60:
tail call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 13, i32 5, i32 %tmp32)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 13, i32 5, i32 %tmp32)
unreachable
bb61:
@@ -367,7 +367,7 @@ define void @subRegOffset(i16 %arg) {
%arghi = lshr i16 %v, 8
%a1 = trunc i16 %arghi to i8
tail call void asm sideeffect "nop", "~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() nounwind
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 5, i8 %a0, i8 %a1)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 14, i32 5, i8 %a0, i8 %a1)
ret void
}
@@ -384,7 +384,7 @@ define void @subRegOffset(i16 %arg) {
; CHECK-NEXT: .long 33
define void @liveConstant() {
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 15, i32 5, i32 33)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 15, i32 5, i32 33)
ret void
}
@@ -422,10 +422,10 @@ entry:
store i64 11, i64* %metadata1
store i64 12, i64* %metadata1
store i64 13, i64* %metadata1
- call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 0, i64* %metadata1)
+ call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 0, i64* %metadata1)
%metadata2 = alloca i8, i32 4, align 8
%metadata3 = alloca i16, i32 4, align 8
- call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 17, i32 5, i8* null, i32 0, i8* %metadata2, i16* %metadata3)
+ call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 17, i32 5, i8* null, i32 0, i8* %metadata2, i16* %metadata3)
ret void
}
@@ -441,10 +441,10 @@ entry:
; CHECK-LABEL: .long L{{.*}}-_longid
define void @longid() {
entry:
- tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 4294967295, i32 0, i8* null, i32 0)
- tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 4294967296, i32 0, i8* null, i32 0)
- tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 9223372036854775807, i32 0, i8* null, i32 0)
- tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 -1, i32 0, i8* null, i32 0)
+ tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4294967295, i32 0, i8* null, i32 0)
+ tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4294967296, i32 0, i8* null, i32 0)
+ tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 9223372036854775807, i32 0, i8* null, i32 0)
+ tail call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 -1, i32 0, i8* null, i32 0)
ret void
}
@@ -462,7 +462,7 @@ entry:
; CHECK-NEXT: .long -{{[0-9]+}}
define void @clobberScratch(i32 %a) {
tail call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di},~{r8},~{r9},~{r10},~{r12},~{r13},~{r14},~{r15}"() nounwind
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 8, i32 %a)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 16, i32 8, i32 %a)
ret void
}
@@ -474,11 +474,11 @@ define void @clobberScratch(i32 %a) {
; CHECK-NEXT: .short 0
define void @needsStackRealignment() {
%val = alloca i64, i32 3, align 128
- tail call void (...)* @escape_values(i64* %val)
+ tail call void (...) @escape_values(i64* %val)
; Note: Adding any non-constant to the stackmap would fail because we
; expected to be able to address off the frame pointer. In a realigned
; frame, we must use the stack pointer instead. This is a separate bug.
- tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 0, i32 0)
+ tail call void (i64, i32, ...) @llvm.experimental.stackmap(i64 0, i32 0)
ret void
}
declare void @escape_values(...)
diff --git a/test/CodeGen/X86/statepoint-allocas.ll b/test/CodeGen/X86/statepoint-allocas.ll
new file mode 100644
index 000000000000..4af33e1f5478
--- /dev/null
+++ b/test/CodeGen/X86/statepoint-allocas.ll
@@ -0,0 +1,130 @@
+; RUN: llc < %s | FileCheck %s
+; Check that we can lower a use of an alloca both as a deopt value (where the
+; exact meaning is up to the consumer of the stackmap) and as an explicit spill
+; slot used for GC.
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare zeroext i1 @return_i1()
+
+; Can we handle an explicit relocation slot (in the form of an alloca) given
+; to the statepoint?
+define i32 addrspace(1)* @test(i32 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test
+; CHECK: pushq %rax
+; CHECK: movq %rdi, (%rsp)
+; CHECK: callq return_i1
+; CHECK: movq (%rsp), %rax
+; CHECK: popq %rdx
+; CHECK: retq
+entry:
+ %alloca = alloca i32 addrspace(1)*, align 8
+ store i32 addrspace(1)* %ptr, i32 addrspace(1)** %alloca
+ call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)** %alloca)
+ %rel = load i32 addrspace(1)*, i32 addrspace(1)** %alloca
+ ret i32 addrspace(1)* %rel
+}
+
+; Can we handle an alloca as a deopt value?
+define i32 addrspace(1)* @test2(i32 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test2
+; CHECK: pushq %rax
+; CHECK: movq %rdi, (%rsp)
+; CHECK: callq return_i1
+; CHECK: xorl %eax, %eax
+; CHECK: popq %rdx
+; CHECK: retq
+entry:
+ %alloca = alloca i32 addrspace(1)*, align 8
+ store i32 addrspace(1)* %ptr, i32 addrspace(1)** %alloca
+ call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 1, i32 addrspace(1)** %alloca)
+ ret i32 addrspace(1)* null
+}
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+
+
+; CHECK-LABEL: .section .llvm_stackmaps
+; CHECK-NEXT: __LLVM_StackMaps:
+; Header
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .short 0
+; Num Functions
+; CHECK-NEXT: .long 2
+; Num LargeConstants
+; CHECK-NEXT: .long 0
+; Num Callsites
+; CHECK-NEXT: .long 2
+
+; Functions and stack size
+; CHECK-NEXT: .quad test
+; CHECK-NEXT: .quad 8
+; CHECK-NEXT: .quad test2
+; CHECK-NEXT: .quad 8
+
+; Large Constants
+; Statepoint ID only
+; CHECK: .quad 0
+
+; Callsites
+; The GC one
+; CHECK: .long .Ltmp1-test
+; CHECK: .short 0
+; CHECK: .short 4
+; SmallConstant (0)
+; CHECK: .byte 4
+; CHECK: .byte 8
+; CHECK: .short 0
+; CHECK: .long 0
+; SmallConstant (0)
+; CHECK: .byte 4
+; CHECK: .byte 8
+; CHECK: .short 0
+; CHECK: .long 0
+; SmallConstant (0)
+; CHECK: .byte 4
+; CHECK: .byte 8
+; CHECK: .short 0
+; CHECK: .long 0
+; Direct Spill Slot [RSP+0]
+; CHECK: .byte 2
+; CHECK: .byte 8
+; CHECK: .short 7
+; CHECK: .long 0
+; No Padding or LiveOuts
+; CHECK: .short 0
+; CHECK: .short 0
+; CHECK: .align 8
+
+; The Deopt one
+; CHECK: .long .Ltmp3-test2
+; CHECK: .short 0
+; CHECK: .short 4
+; SmallConstant (0)
+; CHECK: .byte 4
+; CHECK: .byte 8
+; CHECK: .short 0
+; CHECK: .long 0
+; SmallConstant (0)
+; CHECK: .byte 4
+; CHECK: .byte 8
+; CHECK: .short 0
+; CHECK: .long 0
+; SmallConstant (1)
+; CHECK: .byte 4
+; CHECK: .byte 8
+; CHECK: .short 0
+; CHECK: .long 1
+; Direct Spill Slot [RSP+0]
+; CHECK: .byte 2
+; CHECK: .byte 8
+; CHECK: .short 7
+; CHECK: .long 0
+
+; No Padding or LiveOuts
+; CHECK: .short 0
+; CHECK: .short 0
+; CHECK: .align 8
+
diff --git a/test/CodeGen/X86/statepoint-call-lowering.ll b/test/CodeGen/X86/statepoint-call-lowering.ll
index e7a0dcab9ab7..8f352b7728c3 100644
--- a/test/CodeGen/X86/statepoint-call-lowering.ll
+++ b/test/CodeGen/X86/statepoint-call-lowering.ll
@@ -9,8 +9,9 @@ declare zeroext i1 @return_i1()
declare zeroext i32 @return_i32()
declare i32* @return_i32ptr()
declare float @return_float()
+declare void @varargf(i32, ...)
-define i1 @test_i1_return() {
+define i1 @test_i1_return() gc "statepoint-example" {
; CHECK-LABEL: test_i1_return
; This is just checking that a i1 gets lowered normally when there's no extra
; state arguments to the statepoint
@@ -19,72 +20,98 @@ define i1 @test_i1_return() {
; CHECK: popq %rdx
; CHECK: retq
entry:
- %safepoint_token = tail call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0)
- %call1 = call zeroext i1 @llvm.experimental.gc.result.int.i1(i32 %safepoint_token)
+ %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0)
+ %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
ret i1 %call1
}
-define i32 @test_i32_return() {
+define i32 @test_i32_return() gc "statepoint-example" {
; CHECK-LABEL: test_i32_return
; CHECK: pushq %rax
; CHECK: callq return_i32
; CHECK: popq %rdx
; CHECK: retq
entry:
- %safepoint_token = tail call i32 (i32 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i32f(i32 ()* @return_i32, i32 0, i32 0, i32 0)
- %call1 = call zeroext i32 @llvm.experimental.gc.result.int.i32(i32 %safepoint_token)
+ %safepoint_token = tail call i32 (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 0, i32 0, i32 0)
+ %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(i32 %safepoint_token)
ret i32 %call1
}
-define i32* @test_i32ptr_return() {
+define i32* @test_i32ptr_return() gc "statepoint-example" {
; CHECK-LABEL: test_i32ptr_return
; CHECK: pushq %rax
; CHECK: callq return_i32ptr
; CHECK: popq %rdx
; CHECK: retq
entry:
- %safepoint_token = tail call i32 (i32* ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_p0i32f(i32* ()* @return_i32ptr, i32 0, i32 0, i32 0)
- %call1 = call i32* @llvm.experimental.gc.result.ptr.p0i32(i32 %safepoint_token)
+ %safepoint_token = tail call i32 (i64, i32, i32* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p0i32f(i64 0, i32 0, i32* ()* @return_i32ptr, i32 0, i32 0, i32 0, i32 0)
+ %call1 = call i32* @llvm.experimental.gc.result.p0i32(i32 %safepoint_token)
ret i32* %call1
}
-define float @test_float_return() {
+define float @test_float_return() gc "statepoint-example" {
; CHECK-LABEL: test_float_return
; CHECK: pushq %rax
; CHECK: callq return_float
; CHECK: popq %rax
; CHECK: retq
entry:
- %safepoint_token = tail call i32 (float ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_f32f(float ()* @return_float, i32 0, i32 0, i32 0)
- %call1 = call float @llvm.experimental.gc.result.float.f32(i32 %safepoint_token)
+ %safepoint_token = tail call i32 (i64, i32, float ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_f32f(i64 0, i32 0, float ()* @return_float, i32 0, i32 0, i32 0, i32 0)
+ %call1 = call float @llvm.experimental.gc.result.f32(i32 %safepoint_token)
ret float %call1
}
-define i1 @test_relocate(i32* %a) {
+define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" {
; CHECK-LABEL: test_relocate
; Check that an ununsed relocate has no code-generation impact
; CHECK: pushq %rax
; CHECK: callq return_i1
-; CHECK-NEXT: .Ltmp13:
+; CHECK-NEXT: .Ltmp9:
; CHECK-NEXT: popq %rdx
; CHECK-NEXT: retq
entry:
- %safepoint_token = tail call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %a)
- %call1 = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token, i32 4, i32 4)
- %call2 = call zeroext i1 @llvm.experimental.gc.result.int.i1(i32 %safepoint_token)
+ %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %a)
+ %call1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 7, i32 7)
+ %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
ret i1 %call2
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()*, i32, i32, ...)
-declare i1 @llvm.experimental.gc.result.int.i1(i32)
+define void @test_void_vararg() gc "statepoint-example" {
+; CHECK-LABEL: test_void_vararg
+; Check a statepoint wrapping a *void* returning vararg function works
+; CHECK: callq varargf
+entry:
+ %safepoint_token = tail call i32 (i64, i32, void (i32, ...)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64 0, i32 0, void (i32, ...)* @varargf, i32 2, i32 0, i32 42, i32 43, i32 0, i32 0)
+ ;; if we try to use the result from a statepoint wrapping a
+ ;; non-void-returning varargf, we will experience a crash.
+ ret void
+}
+
+define i1 @test_i1_return_patchable() gc "statepoint-example" {
+; CHECK-LABEL: test_i1_return_patchable
+; A patchable variant of test_i1_return
+; CHECK: pushq %rax
+; CHECK: nopl
+; CHECK: popq %rdx
+; CHECK: retq
+entry:
+ %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 3, i1 ()*null, i32 0, i32 0, i32 0, i32 0)
+ %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
+ ret i1 %call1
+}
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i1 @llvm.experimental.gc.result.i1(i32)
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_i32f(i64, i32, i32 ()*, i32, i32, ...)
+declare i32 @llvm.experimental.gc.result.i32(i32)
-declare i32 @llvm.experimental.gc.statepoint.p0f_i32f(i32 ()*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.result.int.i32(i32)
+declare i32 @llvm.experimental.gc.statepoint.p0f_p0i32f(i64, i32, i32* ()*, i32, i32, ...)
+declare i32* @llvm.experimental.gc.result.p0i32(i32)
-declare i32 @llvm.experimental.gc.statepoint.p0f_p0i32f(i32* ()*, i32, i32, ...)
-declare i32* @llvm.experimental.gc.result.ptr.p0i32(i32)
+declare i32 @llvm.experimental.gc.statepoint.p0f_f32f(i64, i32, float ()*, i32, i32, ...)
+declare float @llvm.experimental.gc.result.f32(i32)
-declare i32 @llvm.experimental.gc.statepoint.p0f_f32f(float ()*, i32, i32, ...)
-declare float @llvm.experimental.gc.result.float.f32(i32)
+declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64, i32, void (i32, ...)*, i32, i32, ...)
-declare i32* @llvm.experimental.gc.relocate.p0i32(i32, i32, i32)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32)
diff --git a/test/CodeGen/X86/statepoint-forward.ll b/test/CodeGen/X86/statepoint-forward.ll
index 12a6ac2c72a9..698229e705f4 100644
--- a/test/CodeGen/X86/statepoint-forward.ll
+++ b/test/CodeGen/X86/statepoint-forward.ll
@@ -22,12 +22,12 @@ declare void @func() readonly
;; be valid, but is not currently implemented.
define i1 @test_load_forward(i32 addrspace(1)* addrspace(1)* %p) gc "statepoint-example" {
entry:
- %before = load i32 addrspace(1)* addrspace(1)* %p
+ %before = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %p
%cmp1 = call i1 @f(i32 addrspace(1)* %before)
call void @llvm.assume(i1 %cmp1)
- %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p)
- %pnew = call i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(i32 %safepoint_token, i32 4, i32 4)
- %after = load i32 addrspace(1)* addrspace(1)* %pnew
+ %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p)
+ %pnew = call i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(i32 %safepoint_token, i32 7, i32 7)
+ %after = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %pnew
%cmp2 = call i1 @f(i32 addrspace(1)* %after)
ret i1 %cmp2
@@ -44,9 +44,9 @@ entry:
%cmp1 = call i1 @f(i32 addrspace(1)* %v)
call void @llvm.assume(i1 %cmp1)
store i32 addrspace(1)* %v, i32 addrspace(1)* addrspace(1)* %p
- %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p)
- %pnew = call i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(i32 %safepoint_token, i32 4, i32 4)
- %after = load i32 addrspace(1)* addrspace(1)* %pnew
+ %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p)
+ %pnew = call i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(i32 %safepoint_token, i32 7, i32 7)
+ %after = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %pnew
%cmp2 = call i1 @f(i32 addrspace(1)* %after)
ret i1 %cmp2
@@ -69,11 +69,11 @@ declare i1 @f(i32 addrspace(1)* %v) readnone
; statepoint does not provide the collector with this root.
define i1 @test_load_forward_nongc_heap(i32 addrspace(1)** %p) gc "statepoint-example" {
entry:
- %before = load i32 addrspace(1)** %p
+ %before = load i32 addrspace(1)*, i32 addrspace(1)** %p
%cmp1 = call i1 @f(i32 addrspace(1)* %before)
call void @llvm.assume(i1 %cmp1)
- call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0)
- %after = load i32 addrspace(1)** %p
+ call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0)
+ %after = load i32 addrspace(1)*, i32 addrspace(1)** %p
%cmp2 = call i1 @f(i32 addrspace(1)* %after)
ret i1 %cmp2
@@ -90,8 +90,8 @@ entry:
%cmp1 = call i1 @f(i32 addrspace(1)* %v)
call void @llvm.assume(i1 %cmp1)
store i32 addrspace(1)* %v, i32 addrspace(1)** %p
- call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0)
- %after = load i32 addrspace(1)** %p
+ call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0)
+ %after = load i32 addrspace(1)*, i32 addrspace(1)** %p
%cmp2 = call i1 @f(i32 addrspace(1)* %after)
ret i1 %cmp2
@@ -102,6 +102,5 @@ entry:
}
declare void @llvm.assume(i1)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()*, i32, i32, ...)
+declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
declare i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(i32, i32, i32) #3
-
diff --git a/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll b/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll
new file mode 100644
index 000000000000..61b8ded2c472
--- /dev/null
+++ b/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll
@@ -0,0 +1,133 @@
+; RUN: llc < %s | FileCheck %s
+; This file contains a collection of basic tests to ensure we didn't
+; screw up normal call lowering when a statepoint is a GC transition.
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare zeroext i1 @return_i1()
+declare zeroext i32 @return_i32()
+declare zeroext i32 @return_i32_with_args(i32, i8*)
+declare i32* @return_i32ptr()
+declare float @return_float()
+declare void @varargf(i32, ...)
+
+define i1 @test_i1_return() gc "statepoint-example" {
+; CHECK-LABEL: test_i1_return
+; This is just checking that a i1 gets lowered normally when there's no extra
+; state arguments to the statepoint
+; CHECK: pushq %rax
+; CHECK: callq return_i1
+; CHECK: popq %rdx
+; CHECK: retq
+entry:
+ %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 1, i32 0, i32 0)
+ %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
+ ret i1 %call1
+}
+
+define i32 @test_i32_return() gc "statepoint-example" {
+; CHECK-LABEL: test_i32_return
+; CHECK: pushq %rax
+; CHECK: callq return_i32
+; CHECK: popq %rdx
+; CHECK: retq
+entry:
+ %safepoint_token = tail call i32 (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 1, i32 0, i32 0)
+ %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(i32 %safepoint_token)
+ ret i32 %call1
+}
+
+define i32* @test_i32ptr_return() gc "statepoint-example" {
+; CHECK-LABEL: test_i32ptr_return
+; CHECK: pushq %rax
+; CHECK: callq return_i32ptr
+; CHECK: popq %rdx
+; CHECK: retq
+entry:
+ %safepoint_token = tail call i32 (i64, i32, i32* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p0i32f(i64 0, i32 0, i32* ()* @return_i32ptr, i32 0, i32 1, i32 0, i32 0)
+ %call1 = call i32* @llvm.experimental.gc.result.p0i32(i32 %safepoint_token)
+ ret i32* %call1
+}
+
+define float @test_float_return() gc "statepoint-example" {
+; CHECK-LABEL: test_float_return
+; CHECK: pushq %rax
+; CHECK: callq return_float
+; CHECK: popq %rax
+; CHECK: retq
+entry:
+ %safepoint_token = tail call i32 (i64, i32, float ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_f32f(i64 0, i32 0, float ()* @return_float, i32 0, i32 1, i32 0, i32 0)
+ %call1 = call float @llvm.experimental.gc.result.f32(i32 %safepoint_token)
+ ret float %call1
+}
+
+define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" {
+; CHECK-LABEL: test_relocate
+; Check that an ununsed relocate has no code-generation impact
+; CHECK: pushq %rax
+; CHECK: callq return_i1
+; CHECK-NEXT: .Ltmp9:
+; CHECK-NEXT: popq %rdx
+; CHECK-NEXT: retq
+entry:
+ %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 1, i32 0, i32 0, i32 addrspace(1)* %a)
+ %call1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 7, i32 7)
+ %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
+ ret i1 %call2
+}
+
+define void @test_void_vararg() gc "statepoint-example" {
+; CHECK-LABEL: test_void_vararg
+; Check a statepoint wrapping a *void* returning vararg function works
+; CHECK: callq varargf
+entry:
+ %safepoint_token = tail call i32 (i64, i32, void (i32, ...)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64 0, i32 0, void (i32, ...)* @varargf, i32 2, i32 1, i32 42, i32 43, i32 0, i32 0)
+ ;; if we try to use the result from a statepoint wrapping a
+ ;; non-void-returning varargf, we will experience a crash.
+ ret void
+}
+
+define i32 @test_transition_args() gc "statepoint-example" {
+; CHECK-LABEL: test_transition_args
+; CHECK: pushq %rax
+; CHECK: callq return_i32
+; CHECK: popq %rdx
+; CHECK: retq
+entry:
+ %val = alloca i32
+ %safepoint_token = call i32 (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 1, i32 2, i32* %val, i64 42, i32 0)
+ %call1 = call i32 @llvm.experimental.gc.result.i32(i32 %safepoint_token)
+ ret i32 %call1
+}
+
+define i32 @test_transition_args_2() gc "statepoint-example" {
+; CHECK-LABEL: test_transition_args_2
+; CHECK: pushq %rax
+; CHECK: callq return_i32
+; CHECK: popq %rdx
+; CHECK: retq
+entry:
+ %val = alloca i32
+ %arg = alloca i8
+ %safepoint_token = call i32 (i64, i32, i32 (i32, i8*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32i32p0i8f(i64 0, i32 0, i32 (i32, i8*)* @return_i32_with_args, i32 2, i32 1, i32 0, i8* %arg, i32 2, i32* %val, i64 42, i32 0)
+ %call1 = call i32 @llvm.experimental.gc.result.i32(i32 %safepoint_token)
+ ret i32 %call1
+}
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i1 @llvm.experimental.gc.result.i1(i32)
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_i32f(i64, i32, i32 ()*, i32, i32, ...)
+declare i32 @llvm.experimental.gc.statepoint.p0f_i32i32p0i8f(i64, i32, i32 (i32, i8*)*, i32, i32, ...)
+declare i32 @llvm.experimental.gc.result.i32(i32)
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_p0i32f(i64, i32, i32* ()*, i32, i32, ...)
+declare i32* @llvm.experimental.gc.result.p0i32(i32)
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_f32f(i64, i32, float ()*, i32, i32, ...)
+declare float @llvm.experimental.gc.result.f32(i32)
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64, i32, void (i32, ...)*, i32, i32, ...)
+
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) \ No newline at end of file
diff --git a/test/CodeGen/X86/statepoint-invoke.ll b/test/CodeGen/X86/statepoint-invoke.ll
new file mode 100644
index 000000000000..df78978c117c
--- /dev/null
+++ b/test/CodeGen/X86/statepoint-invoke.ll
@@ -0,0 +1,198 @@
+; RUN: llc < %s 2>&1 | FileCheck %s
+
+target triple = "x86_64-pc-linux-gnu"
+
+declare void @"some_call"(i64 addrspace(1)*)
+declare i64 addrspace(1)* @"some_other_call"(i64 addrspace(1)*)
+
+declare i32 @"personality_function"()
+
+define i64 addrspace(1)* @test_basic(i64 addrspace(1)* %obj,
+ i64 addrspace(1)* %obj1)
+gc "statepoint-example" {
+entry:
+ ; CHECK: Ltmp{{[0-9]+}}:
+ ; CHECK: callq some_call
+ ; CHECK: Ltmp{{[0-9]+}}:
+ %0 = invoke i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1)
+ to label %invoke_safepoint_normal_dest unwind label %exceptional_return
+
+invoke_safepoint_normal_dest:
+ ; CHECK: movq
+ %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %0, i32 13, i32 13)
+ %obj1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %0, i32 14, i32 14)
+ br label %normal_return
+
+normal_return:
+ ; CHECK: retq
+ ret i64 addrspace(1)* %obj.relocated
+
+exceptional_return:
+ ; CHECK: Ltmp{{[0-9]+}}:
+ ; CHECK: movq
+ ; CHECK: retq
+ %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function"
+ cleanup
+ %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1
+ %obj.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 13, i32 13)
+ %obj1.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 14, i32 14)
+ ret i64 addrspace(1)* %obj1.relocated1
+}
+; CHECK-LABEL: GCC_except_table{{[0-9]+}}:
+; CHECK: .long .Ltmp{{[0-9]+}}-.Ltmp{{[0-9]+}}
+; CHECK: .long .Ltmp{{[0-9]+}}-.Lfunc_begin{{[0-9]+}}
+; CHECK: .byte 0
+; CHECK: .align 4
+
+define i64 addrspace(1)* @test_result(i64 addrspace(1)* %obj,
+ i64 addrspace(1)* %obj1)
+ gc "statepoint-example" {
+entry:
+ ; CHECK: .Ltmp{{[0-9]+}}:
+ ; CHECK: callq some_other_call
+ ; CHECK: .Ltmp{{[0-9]+}}:
+ %0 = invoke i32 (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 0, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @some_other_call, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1)
+ to label %normal_return unwind label %exceptional_return
+
+normal_return:
+ ; CHECK: popq
+ ; CHECK: retq
+ %ret_val = call i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(i32 %0)
+ ret i64 addrspace(1)* %ret_val
+
+exceptional_return:
+ ; CHECK: .Ltmp{{[0-9]+}}:
+ ; CHECK: movq
+ %landing_pad = landingpad { i8*, i32 } personality i32 ()* @personality_function
+ cleanup
+ %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1
+ %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 13, i32 13)
+ ret i64 addrspace(1)* %obj.relocated
+}
+; CHECK-LABEL: GCC_except_table{{[0-9]+}}:
+; CHECK: .long .Ltmp{{[0-9]+}}-.Ltmp{{[0-9]+}}
+; CHECK: .long .Ltmp{{[0-9]+}}-.Lfunc_begin{{[0-9]+}}
+; CHECK: .byte 0
+; CHECK: .align 4
+
+define i64 addrspace(1)* @test_same_val(i1 %cond, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3)
+ gc "statepoint-example" {
+entry:
+ br i1 %cond, label %left, label %right
+
+left:
+ ; CHECK-LABEL: %left
+ ; CHECK: movq %rdx, 8(%rsp)
+ ; CHECK: movq
+ ; CHECK: callq some_call
+ %sp1 = invoke i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2)
+ to label %left.relocs unwind label %exceptional_return.left
+
+left.relocs:
+ ; CHECK: movq (%rsp),
+ ; CHECK: movq 8(%rsp), [[REGVAL2:%[a-z]+]]
+ %val1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 13, i32 13)
+ %val2.relocated_left = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 14, i32 14)
+ br label %normal_return
+
+right:
+ ; CHECK-LABEL: %right
+ ; CHECK: movq
+ ; CHECK: movq %rdx, (%rsp)
+ ; CHECK: callq some_call
+ %sp2 = invoke i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3)
+ to label %right.relocs unwind label %exceptional_return.right
+
+right.relocs:
+ ; CHECK: movq (%rsp), [[REGVAL2]]
+ ; CHECK: movq
+ %val2.relocated_right = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp2, i32 13, i32 13)
+ %val3.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp2, i32 14, i32 14)
+ br label %normal_return
+
+normal_return:
+ ; CHECK-LABEL: %normal_return
+ ; CHECK: cmoveq {{.*}}[[REGVAL2]]{{.*}}
+ ; CHECK retq
+ %a1 = phi i64 addrspace(1)* [%val1.relocated, %left.relocs], [%val3.relocated, %right.relocs]
+ %a2 = phi i64 addrspace(1)* [%val2.relocated_left, %left.relocs], [%val2.relocated_right, %right.relocs]
+ %ret = select i1 %cond, i64 addrspace(1)* %a1, i64 addrspace(1)* %a2
+ ret i64 addrspace(1)* %ret
+
+exceptional_return.left:
+ %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function"
+ cleanup
+ %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1
+ %val.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 13, i32 13)
+ ret i64 addrspace(1)* %val.relocated2
+
+exceptional_return.right:
+ %landing_pad1 = landingpad { i8*, i32 } personality i32 ()* @"personality_function"
+ cleanup
+ %relocate_token1 = extractvalue { i8*, i32 } %landing_pad1, 1
+ %val.relocated3 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token1, i32 13, i32 13)
+ ret i64 addrspace(1)* %val.relocated3
+}
+
+define i64 addrspace(1)* @test_null_undef(i64 addrspace(1)* %val1)
+ gc "statepoint-example" {
+; CHECK-LABEL: test_null_undef:
+entry:
+ ; CHECK: callq some_call
+ %sp1 = invoke i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* null, i64 addrspace(1)* undef)
+ to label %normal_return unwind label %exceptional_return
+
+normal_return:
+ ; CHECK-LABEL: %normal_return
+ ; CHECK: xorl %eax, %eax
+ ; CHECK-NEXT: popq
+ ; CHECK-NEXT: retq
+ %null.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 13, i32 13)
+ %undef.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 14, i32 14)
+ ret i64 addrspace(1)* %null.relocated
+
+exceptional_return:
+ %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function"
+ cleanup
+ %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1
+ %null.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 13, i32 13)
+ %undef.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 14, i32 14)
+ ret i64 addrspace(1)* %null.relocated2
+}
+
+define i64 addrspace(1)* @test_alloca_and_const(i64 addrspace(1)* %val1)
+ gc "statepoint-example" {
+; CHECK-LABEL: test_alloca_and_const:
+entry:
+ %a = alloca i32
+ %aa = addrspacecast i32* %a to i32 addrspace(1)*
+ %c = inttoptr i64 15 to i64 addrspace(1)*
+ ; CHECK: callq
+ %sp = invoke i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %aa, i64 addrspace(1)* %c)
+ to label %normal_return unwind label %exceptional_return
+
+normal_return:
+ ; CHECK: leaq
+ ; CHECK-NEXT: popq
+ ; CHECK-NEXT: retq
+ %aa.rel = call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %sp, i32 13, i32 13)
+ %aa.converted = bitcast i32 addrspace(1)* %aa.rel to i64 addrspace(1)*
+ ret i64 addrspace(1)* %aa.converted
+
+exceptional_return:
+ ; CHECK: movl $15
+ ; CHECK-NEXT: popq
+ ; CHECK-NEXT: retq
+ %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function"
+ cleanup
+ %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1
+ %aa.rel2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 14, i32 14)
+ ret i64 addrspace(1)* %aa.rel2
+}
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)
+declare i32 @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...)
+
+declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32, i32, i32)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32)
+declare i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(i32)
diff --git a/test/CodeGen/X86/statepoint-stack-usage.ll b/test/CodeGen/X86/statepoint-stack-usage.ll
index fd24bf841688..02d20c9fcb96 100644
--- a/test/CodeGen/X86/statepoint-stack-usage.ll
+++ b/test/CodeGen/X86/statepoint-stack-usage.ll
@@ -8,53 +8,53 @@ target triple = "x86_64-pc-linux-gnu"
; of GC arguments differ, niave lowering code would insert loads and
; stores to rearrange items on the stack. We need to make sure (for
; performance) that this doesn't happen.
-define i32 @back_to_back_calls(i32* %a, i32* %b, i32* %c) #1 {
+define i32 @back_to_back_calls(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 gc "statepoint-example" {
; CHECK-LABEL: back_to_back_calls
; The exact stores don't matter, but there need to be three stack slots created
-; CHECK: movq %rdx, 16(%rsp)
-; CHECK: movq %rdi, 8(%rsp)
+; CHECK: movq %rdi, 16(%rsp)
+; CHECK: movq %rdx, 8(%rsp)
; CHECK: movq %rsi, (%rsp)
- %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32* %a, i32* %b, i32* %c)
- %a1 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token, i32 9, i32 9)
- %b1 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token, i32 9, i32 10)
- %c1 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token, i32 9, i32 11)
+ %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
+ %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 12)
+ %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 13)
+ %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 14)
; CHECK: callq
; This is the key check. There should NOT be any memory moves here
; CHECK-NOT: movq
- %safepoint_token2 = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32* %c1, i32* %b1, i32* %a1)
- %a2 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token2, i32 9, i32 11)
- %b2 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token2, i32 9, i32 10)
- %c2 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token2, i32 9, i32 9)
+ %safepoint_token2 = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %c1, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1)
+ %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 14)
+ %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 13)
+ %c2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 12)
; CHECK: callq
ret i32 1
}
; This test simply checks that minor changes in vm state don't prevent slots
; being reused for gc values.
-define i32 @reserve_first(i32* %a, i32* %b, i32* %c) #1 {
+define i32 @reserve_first(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 gc "statepoint-example" {
; CHECK-LABEL: reserve_first
; The exact stores don't matter, but there need to be three stack slots created
-; CHECK: movq %rdx, 16(%rsp)
-; CHECK: movq %rdi, 8(%rsp)
+; CHECK: movq %rdi, 16(%rsp)
+; CHECK: movq %rdx, 8(%rsp)
; CHECK: movq %rsi, (%rsp)
- %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32* %a, i32* %b, i32* %c)
- %a1 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token, i32 9, i32 9)
- %b1 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token, i32 9, i32 10)
- %c1 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token, i32 9, i32 11)
+ %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
+ %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 12)
+ %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 13)
+ %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 14)
; CHECK: callq
; This is the key check. There should NOT be any memory moves here
; CHECK-NOT: movq
- %safepoint_token2 = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32* %a1, i32 0, i32* %c1, i32 0, i32 0, i32* %c1, i32* %b1, i32* %a1)
- %a2 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token2, i32 9, i32 11)
- %b2 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token2, i32 9, i32 10)
- %c2 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token2, i32 9, i32 9)
+ %safepoint_token2 = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 addrspace(1)* %a1, i32 0, i32 addrspace(1)* %c1, i32 0, i32 0, i32 addrspace(1)* %c1, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1)
+ %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 14)
+ %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 13)
+ %c2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 12)
; CHECK: callq
ret i32 1
}
; Function Attrs: nounwind
-declare i32* @llvm.experimental.gc.relocate.p0i32(i32, i32, i32) #3
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()*, i32, i32, ...)
+declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-attributes #1 = { uwtable }
+attributes #1 = { uwtable } \ No newline at end of file
diff --git a/test/CodeGen/X86/statepoint-stackmap-format.ll b/test/CodeGen/X86/statepoint-stackmap-format.ll
index 416674839ea8..6bb0d8980e5b 100644
--- a/test/CodeGen/X86/statepoint-stackmap-format.ll
+++ b/test/CodeGen/X86/statepoint-stackmap-format.ll
@@ -8,32 +8,73 @@ target triple = "x86_64-pc-linux-gnu"
declare zeroext i1 @return_i1()
-define i1 @test(i32 addrspace(1)* %ptr) {
+define i1 @test(i32 addrspace(1)* %ptr_base, i32 %arg)
+ gc "statepoint-example" {
; CHECK-LABEL: test
-; Do we see one spill for the local value and the store to the
+; Do we see two spills for the local values and the store to the
; alloca?
-; CHECK: subq $24, %rsp
-; CHECK: movq $0, 8(%rsp)
-; CHECK: movq %rdi, (%rsp)
+; CHECK: subq $40, %rsp
+; CHECK: movq $0, 24(%rsp)
+; CHECK: movq %rdi, 16(%rsp)
+; CHECK: movq %rax, 8(%rsp)
; CHECK: callq return_i1
-; CHECK: addq $24, %rsp
+; CHECK: addq $40, %rsp
; CHECK: retq
entry:
%metadata1 = alloca i32 addrspace(1)*, i32 2, align 8
store i32 addrspace(1)* null, i32 addrspace(1)** %metadata1
- %safepoint_token = tail call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr, i32 addrspace(1)* null, i32 addrspace(1)* %ptr, i32 addrspace(1)* null)
- %call1 = call zeroext i1 @llvm.experimental.gc.result.int.i1(i32 %safepoint_token)
- %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 6, i32 6)
- %b = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 7, i32 7)
+ %ptr_derived = getelementptr i32, i32 addrspace(1)* %ptr_base, i32 %arg
+ %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* null, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* %ptr_derived, i32 addrspace(1)* null)
+ %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
+ %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 9, i32 9)
+ %b = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 9, i32 10)
+ %c = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 11, i32 11)
;
ret i1 %call1
}
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()*, i32, i32, ...)
-declare i1 @llvm.experimental.gc.result.int.i1(i32)
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
+; This is similar to the previous test except that we have derived pointer as
+; argument to the function. Despite that this can not happen after the
+; RewriteSafepointForGC pass, lowering should be able to handle it anyway.
+define i1 @test_derived_arg(i32 addrspace(1)* %ptr_base,
+ i32 addrspace(1)* %ptr_derived)
+ gc "statepoint-example" {
+; CHECK-LABEL: test_derived_arg
+; Do we see two spills for the local values and the store to the
+; alloca?
+; CHECK: subq $40, %rsp
+; CHECK: movq $0, 24(%rsp)
+; CHECK: movq %rdi, 16(%rsp)
+; CHECK: movq %rsi, 8(%rsp)
+; CHECK: callq return_i1
+; CHECK: addq $40, %rsp
+; CHECK: retq
+entry:
+ %metadata1 = alloca i32 addrspace(1)*, i32 2, align 8
+ store i32 addrspace(1)* null, i32 addrspace(1)** %metadata1
+ %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* null, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* %ptr_derived, i32 addrspace(1)* null)
+ %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
+ %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 9, i32 9)
+ %b = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 9, i32 10)
+ %c = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 11, i32 11)
+;
+ ret i1 %call1
+}
+
+; Simple test case to check that we emit the ID field correctly
+define i1 @test_id() gc "statepoint-example" {
+; CHECK-LABEL: test_id
+entry:
+ %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 237, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0)
+ %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
+ ret i1 %call1
+}
+declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i1 @llvm.experimental.gc.result.i1(i32)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
+
; CHECK-LABEL: .section .llvm_stackmaps
; CHECK-NEXT: __LLVM_StackMaps:
; Header
@@ -41,25 +82,36 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
; CHECK-NEXT: .byte 0
; CHECK-NEXT: .short 0
; Num Functions
-; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long 3
; Num LargeConstants
; CHECK-NEXT: .long 0
; Num Callsites
-; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long 3
; Functions and stack size
; CHECK-NEXT: .quad test
-; CHECK-NEXT: .quad 24
+; CHECK-NEXT: .quad 40
+; CHECK-NEXT: .quad test_derived_arg
+; CHECK-NEXT: .quad 40
+
+;
+; test
+;
; Large Constants
; Statepoint ID only
-; CHECK: .quad 2882400000
+; CHECK: .quad 0
; Callsites
; Constant arguments
; CHECK: .long .Ltmp1-test
; CHECK: .short 0
-; CHECK: .short 8
+; CHECK: .short 11
+; SmallConstant (0)
+; CHECK: .byte 4
+; CHECK: .byte 8
+; CHECK: .short 0
+; CHECK: .long 0
; SmallConstant (0)
; CHECK: .byte 4
; CHECK: .byte 8
@@ -74,7 +126,7 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
; CHECK: .byte 2
; CHECK: .byte 8
; CHECK: .short 7
-; CHECK: .long 0
+; CHECK: .long 16
; SmallConstant (0)
; CHECK: .byte 4
; CHECK: .byte 8
@@ -90,20 +142,139 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
; CHECK: .byte 8
; CHECK: .short 0
; CHECK: .long 0
-; Direct Spill Slot [RSP+0]
+; Direct Spill Slot [RSP+16]
+; CHECK: .byte 2
+; CHECK: .byte 8
+; CHECK: .short 7
+; CHECK: .long 16
+; Direct Spill Slot [RSP+8]
; CHECK: .byte 2
; CHECK: .byte 8
; CHECK: .short 7
+; CHECK: .long 8
+; Direct Spill Slot [RSP+16]
+; CHECK: .byte 2
+; CHECK: .byte 8
+; CHECK: .short 7
+; CHECK: .long 16
+; Direct Spill Slot [RSP+16]
+; CHECK: .byte 2
+; CHECK: .byte 8
+; CHECK: .short 7
+; CHECK: .long 16
+
+; No Padding or LiveOuts
+; CHECK: .short 0
+; CHECK: .short 0
+; CHECK: .align 8
+
+;
+; test_derived_arg
+;
+
+; Large Constants
+; Statepoint ID only
+; CHECK: .quad 0
+
+; Callsites
+; Constant arguments
+; CHECK: .long .Ltmp3-test_derived_arg
+; CHECK: .short 0
+; CHECK: .short 11
+; SmallConstant (0)
+; CHECK: .byte 4
+; CHECK: .byte 8
+; CHECK: .short 0
; CHECK: .long 0
+; SmallConstant (2)
+; CHECK: .byte 4
+; CHECK: .byte 8
+; CHECK: .short 0
+; CHECK: .long 2
; Direct Spill Slot [RSP+0]
; CHECK: .byte 2
; CHECK: .byte 8
; CHECK: .short 7
+; CHECK: .long 16
+; SmallConstant (0)
+; CHECK: .byte 4
+; CHECK: .byte 8
+; CHECK: .short 0
+; CHECK: .long 0
+; SmallConstant (0)
+; CHECK: .byte 4
+; CHECK: .byte 8
+; CHECK: .short 0
+; CHECK: .long 0
+; SmallConstant (0)
+; CHECK: .byte 4
+; CHECK: .byte 8
+; CHECK: .short 0
; CHECK: .long 0
+; Direct Spill Slot [RSP+16]
+; CHECK: .byte 2
+; CHECK: .byte 8
+; CHECK: .short 7
+; CHECK: .long 16
+; Direct Spill Slot [RSP+8]
+; CHECK: .byte 2
+; CHECK: .byte 8
+; CHECK: .short 7
+; CHECK: .long 8
+; Direct Spill Slot [RSP+16]
+; CHECK: .byte 2
+; CHECK: .byte 8
+; CHECK: .short 7
+; CHECK: .long 16
+; Direct Spill Slot [RSP+16]
+; CHECK: .byte 2
+; CHECK: .byte 8
+; CHECK: .short 7
+; CHECK: .long 16
; No Padding or LiveOuts
; CHECK: .short 0
; CHECK: .short 0
; CHECK: .align 8
+; Records for the test_id function:
+; No large constants
+
+; The Statepoint ID:
+; CHECK: .quad 237
+
+; Instruction Offset
+; CHECK: .long .Ltmp5-test_id
+
+; Reserved:
+; CHECK: .short 0
+
+; NumLocations:
+; CHECK: .short 3
+
+; StkMapRecord[0]:
+; SmallConstant(0):
+; CHECK: .byte 4
+; CHECK: .byte 8
+; CHECK: .short 0
+; CHECK: .long 0
+
+; StkMapRecord[1]:
+; SmallConstant(0):
+; CHECK: .byte 4
+; CHECK: .byte 8
+; CHECK: .short 0
+; CHECK: .long 0
+
+; StkMapRecord[2]:
+; SmallConstant(0):
+; CHECK: .byte 4
+; CHECK: .byte 8
+; CHECK: .short 0
+; CHECK: .long 0
+
+; No padding or LiveOuts
+; CHECK: .short 0
+; CHECK: .short 0
+; CHECK: .align 8
diff --git a/test/CodeGen/X86/stdarg.ll b/test/CodeGen/X86/stdarg.ll
index 5728daf1ee1c..18d502ad5834 100644
--- a/test/CodeGen/X86/stdarg.ll
+++ b/test/CodeGen/X86/stdarg.ll
@@ -8,7 +8,7 @@ entry:
%ap = alloca [1 x %struct.__va_list_tag], align 8; <[1 x %struct.__va_list_tag]*> [#uses=2]
%ap12 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*; <i8*> [#uses=2]
call void @llvm.va_start(i8* %ap12)
- %ap3 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0; <%struct.__va_list_tag*> [#uses=1]
+ %ap3 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0; <%struct.__va_list_tag*> [#uses=1]
call void @bar(%struct.__va_list_tag* %ap3) nounwind
call void @llvm.va_end(i8* %ap12)
ret void
diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll
index e3cc2fa668ef..6c1c56e43a4c 100644
--- a/test/CodeGen/X86/store-narrow.ll
+++ b/test/CodeGen/X86/store-narrow.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-darwin10.2"
define void @test1(i32* nocapture %a0, i8 zeroext %a1) nounwind ssp {
entry:
- %A = load i32* %a0, align 4
+ %A = load i32, i32* %a0, align 4
%B = and i32 %A, -256 ; 0xFFFFFF00
%C = zext i8 %a1 to i32
%D = or i32 %C, %B
@@ -23,7 +23,7 @@ entry:
define void @test2(i32* nocapture %a0, i8 zeroext %a1) nounwind ssp {
entry:
- %A = load i32* %a0, align 4
+ %A = load i32, i32* %a0, align 4
%B = and i32 %A, -65281 ; 0xFFFF00FF
%C = zext i8 %a1 to i32
%CS = shl i32 %C, 8
@@ -40,7 +40,7 @@ entry:
define void @test3(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
entry:
- %A = load i32* %a0, align 4
+ %A = load i32, i32* %a0, align 4
%B = and i32 %A, -65536 ; 0xFFFF0000
%C = zext i16 %a1 to i32
%D = or i32 %B, %C
@@ -56,7 +56,7 @@ entry:
define void @test4(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
entry:
- %A = load i32* %a0, align 4
+ %A = load i32, i32* %a0, align 4
%B = and i32 %A, 65535 ; 0x0000FFFF
%C = zext i16 %a1 to i32
%CS = shl i32 %C, 16
@@ -73,7 +73,7 @@ entry:
define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp {
entry:
- %A = load i64* %a0, align 4
+ %A = load i64, i64* %a0, align 4
%B = and i64 %A, -4294901761 ; 0xFFFFFFFF0000FFFF
%C = zext i16 %a1 to i64
%CS = shl i64 %C, 16
@@ -90,7 +90,7 @@ entry:
define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp {
entry:
- %A = load i64* %a0, align 4
+ %A = load i64, i64* %a0, align 4
%B = and i64 %A, -280375465082881 ; 0xFFFF00FFFFFFFFFF
%C = zext i8 %a1 to i64
%CS = shl i64 %C, 40
@@ -108,8 +108,8 @@ entry:
define i32 @test7(i64* nocapture %a0, i8 zeroext %a1, i32* %P2) nounwind {
entry:
- %OtherLoad = load i32 *%P2
- %A = load i64* %a0, align 4
+ %OtherLoad = load i32 , i32 *%P2
+ %A = load i64, i64* %a0, align 4
%B = and i64 %A, -280375465082881 ; 0xFFFF00FFFFFFFFFF
%C = zext i8 %a1 to i64
%CS = shl i64 %C, 40
@@ -136,7 +136,7 @@ entry:
; X64-NEXT: movl %eax, _g_16(%rip)
; X64-NEXT: ret
define void @test8() nounwind {
- %tmp = load i32* @g_16
+ %tmp = load i32, i32* @g_16
store i32 0, i32* @g_16
%or = or i32 %tmp, 1
store i32 %or, i32* @g_16
@@ -147,7 +147,7 @@ define void @test8() nounwind {
; X64-NEXT: orb $1, _g_16(%rip)
; X64-NEXT: ret
define void @test9() nounwind {
- %tmp = load i32* @g_16
+ %tmp = load i32, i32* @g_16
%or = or i32 %tmp, 1
store i32 %or, i32* @g_16
ret void
@@ -160,7 +160,7 @@ define void @test9() nounwind {
; X64-NEXT: ret
define i8 @test10(i8* %P) nounwind ssp {
entry:
- %tmp = load i8* %P, align 1
+ %tmp = load i8, i8* %P, align 1
%conv = sext i8 %tmp to i32
%shr3 = lshr i32 %conv, 8
%conv2 = trunc i32 %shr3 to i8
diff --git a/test/CodeGen/X86/store_op_load_fold.ll b/test/CodeGen/X86/store_op_load_fold.ll
index bbeb7443c07f..c4cdc0e9932a 100644
--- a/test/CodeGen/X86/store_op_load_fold.ll
+++ b/test/CodeGen/X86/store_op_load_fold.ll
@@ -9,7 +9,7 @@ define void @foo() nounwind {
; CHECK-NOT: mov
; CHECK: add
; CHECK-NEXT: ret
- %tmp.0 = load i16* @X ; <i16> [#uses=1]
+ %tmp.0 = load i16, i16* @X ; <i16> [#uses=1]
%tmp.3 = add i16 %tmp.0, 329 ; <i16> [#uses=1]
store i16 %tmp.3, i16* @X
ret void
@@ -23,8 +23,8 @@ define void @test2() nounwind uwtable ssp {
; CHECK: mov
; CHECK-NEXT: and
; CHECK-NEXT: ret
- %bf.load35 = load i56* bitcast ([7 x i8]* getelementptr inbounds (%struct.S2* @s2, i32 0, i32 5) to i56*), align 16
+ %bf.load35 = load i56, i56* bitcast ([7 x i8]* getelementptr inbounds (%struct.S2, %struct.S2* @s2, i32 0, i32 5) to i56*), align 16
%bf.clear36 = and i56 %bf.load35, -1125895611875329
- store i56 %bf.clear36, i56* bitcast ([7 x i8]* getelementptr inbounds (%struct.S2* @s2, i32 0, i32 5) to i56*), align 16
+ store i56 %bf.clear36, i56* bitcast ([7 x i8]* getelementptr inbounds (%struct.S2, %struct.S2* @s2, i32 0, i32 5) to i56*), align 16
ret void
}
diff --git a/test/CodeGen/X86/store_op_load_fold2.ll b/test/CodeGen/X86/store_op_load_fold2.ll
index 705fdcdc13af..f47d87f4bb89 100644
--- a/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/test/CodeGen/X86/store_op_load_fold2.ll
@@ -6,10 +6,10 @@ target datalayout = "e-p:32:32"
define internal fastcc i32 @dct_chroma(i32 %uv, i32 %cr_cbp) nounwind {
cond_true2732.preheader: ; preds = %entry
- %tmp2666 = getelementptr %struct.Macroblock* null, i32 0, i32 13 ; <i64*> [#uses=2]
+ %tmp2666 = getelementptr %struct.Macroblock, %struct.Macroblock* null, i32 0, i32 13 ; <i64*> [#uses=2]
%tmp2674 = trunc i32 0 to i8 ; <i8> [#uses=1]
- %tmp2667.us.us = load i64* %tmp2666 ; <i64> [#uses=1]
- %tmp2670.us.us = load i64* null ; <i64> [#uses=1]
+ %tmp2667.us.us = load i64, i64* %tmp2666 ; <i64> [#uses=1]
+ %tmp2670.us.us = load i64, i64* null ; <i64> [#uses=1]
%shift.upgrd.1 = zext i8 %tmp2674 to i64 ; <i64> [#uses=1]
%tmp2675.us.us = shl i64 %tmp2670.us.us, %shift.upgrd.1 ; <i64> [#uses=1]
%tmp2675not.us.us = xor i64 %tmp2675.us.us, -1 ; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/stores-merging.ll b/test/CodeGen/X86/stores-merging.ll
index 61dea088995b..d6daa573b4ae 100644
--- a/test/CodeGen/X86/stores-merging.ll
+++ b/test/CodeGen/X86/stores-merging.ll
@@ -15,9 +15,9 @@ entry:
; CHECK: movq %rax, e+4(%rip)
; CHECK: movl $456, e+8(%rip)
- store i32 1, i32* getelementptr inbounds (%structTy* @e, i64 0, i32 1), align 4
- store i32 123, i32* getelementptr inbounds (%structTy* @e, i64 0, i32 2), align 4
- store i32 456, i32* getelementptr inbounds (%structTy* @e, i64 0, i32 2), align 4
+ store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4
+ store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
+ store i32 456, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
ret void
}
diff --git a/test/CodeGen/X86/stride-nine-with-base-reg.ll b/test/CodeGen/X86/stride-nine-with-base-reg.ll
index ddf059c675df..551bd7c2541e 100644
--- a/test/CodeGen/X86/stride-nine-with-base-reg.ll
+++ b/test/CodeGen/X86/stride-nine-with-base-reg.ll
@@ -17,16 +17,16 @@ entry:
bb:
%i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]
- %tmp2 = getelementptr [1000 x i8]* @B, i32 0, i32 %i.019.0
- %tmp3 = load i8* %tmp2, align 4
+ %tmp2 = getelementptr [1000 x i8], [1000 x i8]* @B, i32 0, i32 %i.019.0
+ %tmp3 = load i8, i8* %tmp2, align 4
%tmp4 = mul i8 %tmp3, 2
- %tmp5 = getelementptr [1000 x i8]* @A, i32 0, i32 %i.019.0
+ %tmp5 = getelementptr [1000 x i8], [1000 x i8]* @A, i32 0, i32 %i.019.0
store i8 %tmp4, i8* %tmp5, align 4
%tmp8 = mul i32 %i.019.0, 9
%tmp0 = add i32 %tmp8, %p
- %tmp10 = getelementptr [1000 x i8]* @P, i32 0, i32 %tmp0
+ %tmp10 = getelementptr [1000 x i8], [1000 x i8]* @P, i32 0, i32 %tmp0
store i8 17, i8* %tmp10, align 4
- %tmp11 = getelementptr [1000 x i8]* @Q, i32 0, i32 %tmp0
+ %tmp11 = getelementptr [1000 x i8], [1000 x i8]* @Q, i32 0, i32 %tmp0
store i8 19, i8* %tmp11, align 4
%indvar.next = add i32 %i.019.0, 1
%exitcond = icmp eq i32 %indvar.next, %m
diff --git a/test/CodeGen/X86/stride-reuse.ll b/test/CodeGen/X86/stride-reuse.ll
index 81de22ca4e35..af036f3a8f14 100644
--- a/test/CodeGen/X86/stride-reuse.ll
+++ b/test/CodeGen/X86/stride-reuse.ll
@@ -13,14 +13,14 @@ entry:
bb:
%i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]
- %tmp2 = getelementptr [1000 x float]* @B, i32 0, i32 %i.019.0
- %tmp3 = load float* %tmp2, align 4
+ %tmp2 = getelementptr [1000 x float], [1000 x float]* @B, i32 0, i32 %i.019.0
+ %tmp3 = load float, float* %tmp2, align 4
%tmp4 = fmul float %tmp3, 2.000000e+00
- %tmp5 = getelementptr [1000 x float]* @A, i32 0, i32 %i.019.0
+ %tmp5 = getelementptr [1000 x float], [1000 x float]* @A, i32 0, i32 %i.019.0
store float %tmp4, float* %tmp5, align 4
%tmp8 = shl i32 %i.019.0, 1
%tmp9 = add i32 %tmp8, 64
- %tmp10 = getelementptr [1000 x i32]* @P, i32 0, i32 %i.019.0
+ %tmp10 = getelementptr [1000 x i32], [1000 x i32]* @P, i32 0, i32 %i.019.0
store i32 %tmp9, i32* %tmp10, align 4
%indvar.next = add i32 %i.019.0, 1
%exitcond = icmp eq i32 %indvar.next, %m
diff --git a/test/CodeGen/X86/sub-with-overflow.ll b/test/CodeGen/X86/sub-with-overflow.ll
index baaee3541108..fa00d6f61c70 100644
--- a/test/CodeGen/X86/sub-with-overflow.ll
+++ b/test/CodeGen/X86/sub-with-overflow.ll
@@ -11,11 +11,11 @@ entry:
br i1 %obit, label %overflow, label %normal
normal:
- %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+ %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
ret i1 true
overflow:
- %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+ %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
ret i1 false
; CHECK-LABEL: func1:
@@ -31,11 +31,11 @@ entry:
br i1 %obit, label %carry, label %normal
normal:
- %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+ %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
ret i1 true
carry:
- %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+ %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
ret i1 false
; CHECK-LABEL: func2:
diff --git a/test/CodeGen/X86/subreg-to-reg-0.ll b/test/CodeGen/X86/subreg-to-reg-0.ll
index d718c85a1d1f..251a754f4383 100644
--- a/test/CodeGen/X86/subreg-to-reg-0.ll
+++ b/test/CodeGen/X86/subreg-to-reg-0.ll
@@ -4,7 +4,7 @@
; x86-64's implicit zero-extension!
define i64 @foo(i32* %p) nounwind {
- %t = load i32* %p
+ %t = load i32, i32* %p
%n = add i32 %t, 1
%z = zext i32 %n to i64
ret i64 %z
diff --git a/test/CodeGen/X86/subreg-to-reg-2.ll b/test/CodeGen/X86/subreg-to-reg-2.ll
index 49d2e88d2c8d..6766b013a36e 100644
--- a/test/CodeGen/X86/subreg-to-reg-2.ll
+++ b/test/CodeGen/X86/subreg-to-reg-2.ll
@@ -10,8 +10,8 @@
define internal fastcc %XXValue* @t(i64* %out, %"struct.XXC::ArrayStorage"* %tmp9) nounwind {
prologue:
- %array = load %XXValue** inttoptr (i64 11111111 to %XXValue**) ; <%XXValue*> [#uses=0]
- %index = load %XXValue** inttoptr (i64 22222222 to %XXValue**) ; <%XXValue*> [#uses=1]
+ %array = load %XXValue*, %XXValue** inttoptr (i64 11111111 to %XXValue**) ; <%XXValue*> [#uses=0]
+ %index = load %XXValue*, %XXValue** inttoptr (i64 22222222 to %XXValue**) ; <%XXValue*> [#uses=1]
%tmp = ptrtoint %XXValue* %index to i64 ; <i64> [#uses=2]
store i64 %tmp, i64* %out
%tmp6 = trunc i64 %tmp to i32 ; <i32> [#uses=1]
@@ -19,7 +19,7 @@ prologue:
bb5: ; preds = %prologue
%tmp10 = zext i32 %tmp6 to i64 ; <i64> [#uses=1]
- %tmp11 = getelementptr %"struct.XXC::ArrayStorage"* %tmp9, i64 0, i32 5, i64 %tmp10 ; <%XXValue**> [#uses=1]
- %tmp12 = load %XXValue** %tmp11, align 8 ; <%XXValue*> [#uses=1]
+ %tmp11 = getelementptr %"struct.XXC::ArrayStorage", %"struct.XXC::ArrayStorage"* %tmp9, i64 0, i32 5, i64 %tmp10 ; <%XXValue**> [#uses=1]
+ %tmp12 = load %XXValue*, %XXValue** %tmp11, align 8 ; <%XXValue*> [#uses=1]
ret %XXValue* %tmp12
}
diff --git a/test/CodeGen/X86/subreg-to-reg-4.ll b/test/CodeGen/X86/subreg-to-reg-4.ll
index 0693789fe5d5..8340fc536140 100644
--- a/test/CodeGen/X86/subreg-to-reg-4.ll
+++ b/test/CodeGen/X86/subreg-to-reg-4.ll
@@ -28,7 +28,7 @@ entry:
}
define void @cola(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
entry:
- %p = load i64* %x
+ %p = load i64, i64* %x
%t0 = add i64 %p, %y
%t1 = and i64 %t0, 4294967295
%t2 = xor i64 %t1, %u
@@ -37,7 +37,7 @@ entry:
}
define void @yaks(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
entry:
- %p = load i64* %x
+ %p = load i64, i64* %x
%t0 = add i64 %p, %y
%t1 = xor i64 %t0, %u
%t2 = and i64 %t1, 4294967295
@@ -46,8 +46,8 @@ entry:
}
define void @foo(i64 *%x, i64 *%y, i64* %z) nounwind readnone {
entry:
- %a = load i64* %x
- %b = load i64* %y
+ %a = load i64, i64* %x
+ %b = load i64, i64* %y
%t0 = add i64 %a, %b
%t1 = and i64 %t0, 4294967295
store i64 %t1, i64* %z
@@ -94,7 +94,7 @@ entry:
}
define void @scola(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
entry:
- %p = load i64* %x
+ %p = load i64, i64* %x
%t0 = sub i64 %p, %y
%t1 = and i64 %t0, 4294967295
%t2 = xor i64 %t1, %u
@@ -103,7 +103,7 @@ entry:
}
define void @syaks(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
entry:
- %p = load i64* %x
+ %p = load i64, i64* %x
%t0 = sub i64 %p, %y
%t1 = xor i64 %t0, %u
%t2 = and i64 %t1, 4294967295
@@ -112,8 +112,8 @@ entry:
}
define void @sfoo(i64 *%x, i64 *%y, i64* %z) nounwind readnone {
entry:
- %a = load i64* %x
- %b = load i64* %y
+ %a = load i64, i64* %x
+ %b = load i64, i64* %y
%t0 = sub i64 %a, %b
%t1 = and i64 %t0, 4294967295
store i64 %t1, i64* %z
diff --git a/test/CodeGen/X86/subreg-to-reg-6.ll b/test/CodeGen/X86/subreg-to-reg-6.ll
index 76430cd783e3..bef09fa944ef 100644
--- a/test/CodeGen/X86/subreg-to-reg-6.ll
+++ b/test/CodeGen/X86/subreg-to-reg-6.ll
@@ -2,7 +2,7 @@
define i64 @foo() nounwind {
entry:
- %t0 = load i32* null, align 8
+ %t0 = load i32, i32* null, align 8
switch i32 %t0, label %bb65 [
i32 16, label %bb
i32 12, label %bb56
diff --git a/test/CodeGen/X86/sunkaddr-ext.ll b/test/CodeGen/X86/sunkaddr-ext.ll
index 6d238678ce30..b57981151bdd 100644
--- a/test/CodeGen/X86/sunkaddr-ext.ll
+++ b/test/CodeGen/X86/sunkaddr-ext.ll
@@ -11,11 +11,11 @@ target triple = "x86_64-apple-macosx10.9.0"
define void @test_sink(i8* %arg1, i32 %arg2, i8 %arg3) #0 {
%tmp1 = add i32 -2147483648, %arg2
%tmp2 = add i32 -2147483648, %tmp1
- %tmp3 = getelementptr i8* %arg1, i32 %arg2
+ %tmp3 = getelementptr i8, i8* %arg1, i32 %arg2
br label %bb1
bb1:
- %tmp4 = getelementptr i8* %arg1, i32 %tmp2
+ %tmp4 = getelementptr i8, i8* %arg1, i32 %tmp2
store i8 %arg3, i8* %tmp4
ret void;
}
diff --git a/test/CodeGen/X86/switch-bt.ll b/test/CodeGen/X86/switch-bt.ll
index a80002bc97cc..2cf3aafe5471 100644
--- a/test/CodeGen/X86/switch-bt.ll
+++ b/test/CodeGen/X86/switch-bt.ll
@@ -16,8 +16,8 @@ define void @test(i8* %l) nounwind {
entry:
%l.addr = alloca i8*, align 8 ; <i8**> [#uses=2]
store i8* %l, i8** %l.addr
- %tmp = load i8** %l.addr ; <i8*> [#uses=1]
- %tmp1 = load i8* %tmp ; <i8> [#uses=1]
+ %tmp = load i8*, i8** %l.addr ; <i8*> [#uses=1]
+ %tmp1 = load i8, i8* %tmp ; <i8> [#uses=1]
%conv = sext i8 %tmp1 to i32 ; <i32> [#uses=1]
switch i32 %conv, label %sw.default [
i32 62, label %sw.bb
@@ -99,3 +99,59 @@ if.then:
if.end:
ret void
}
+
+; Ensure that optimizing for jump tables doesn't needlessly deteriorate the
+; created binary tree search. See PR22262.
+define void @test4(i32 %x, i32* %y) {
+; CHECK-LABEL: test4:
+
+entry:
+ switch i32 %x, label %sw.default [
+ i32 10, label %sw.bb
+ i32 20, label %sw.bb1
+ i32 30, label %sw.bb2
+ i32 40, label %sw.bb3
+ i32 50, label %sw.bb4
+ i32 60, label %sw.bb5
+ ]
+sw.bb:
+ store i32 1, i32* %y
+ br label %sw.epilog
+sw.bb1:
+ store i32 2, i32* %y
+ br label %sw.epilog
+sw.bb2:
+ store i32 3, i32* %y
+ br label %sw.epilog
+sw.bb3:
+ store i32 4, i32* %y
+ br label %sw.epilog
+sw.bb4:
+ store i32 5, i32* %y
+ br label %sw.epilog
+sw.bb5:
+ store i32 6, i32* %y
+ br label %sw.epilog
+sw.default:
+ store i32 7, i32* %y
+ br label %sw.epilog
+sw.epilog:
+ ret void
+
+; The balanced binary switch here would start with a comparison against 39, but
+; it is currently starting with 29 because of the density-sum heuristic.
+; CHECK: cmpl $39
+; CHECK: jg
+; CHECK: cmpl $10
+; CHECK: je
+; CHECK: cmpl $20
+; CHECK: jne
+; CHECK: cmpl $40
+; CHECK: je
+; CHECK: cmpl $50
+; CHECK: jne
+; CHECK: cmpl $30
+; CHECK: jne
+; CHECK: cmpl $60
+; CHECK: jne
+}
diff --git a/test/CodeGen/X86/switch-crit-edge-constant.ll b/test/CodeGen/X86/switch-crit-edge-constant.ll
index 18f987e72213..e9a208d709ef 100644
--- a/test/CodeGen/X86/switch-crit-edge-constant.ll
+++ b/test/CodeGen/X86/switch-crit-edge-constant.ll
@@ -34,20 +34,20 @@ cond_true: ; preds = %bb2
br label %blahaha
blahaha: ; preds = %cond_true, %bb2, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
- %s.0 = phi i8* [ getelementptr ([8 x i8]* @str, i32 0, i64 0), %cond_true ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str2, i32 0, i64 0), %bb2 ] ; <i8*> [#uses=13]
- %tmp8 = tail call i32 (i8*, ...)* @printf( i8* %s.0 ) ; <i32> [#uses=0]
- %tmp10 = tail call i32 (i8*, ...)* @printf( i8* %s.0 ) ; <i32> [#uses=0]
- %tmp12 = tail call i32 (i8*, ...)* @printf( i8* %s.0 ) ; <i32> [#uses=0]
- %tmp14 = tail call i32 (i8*, ...)* @printf( i8* %s.0 ) ; <i32> [#uses=0]
- %tmp16 = tail call i32 (i8*, ...)* @printf( i8* %s.0 ) ; <i32> [#uses=0]
- %tmp18 = tail call i32 (i8*, ...)* @printf( i8* %s.0 ) ; <i32> [#uses=0]
- %tmp20 = tail call i32 (i8*, ...)* @printf( i8* %s.0 ) ; <i32> [#uses=0]
- %tmp22 = tail call i32 (i8*, ...)* @printf( i8* %s.0 ) ; <i32> [#uses=0]
- %tmp24 = tail call i32 (i8*, ...)* @printf( i8* %s.0 ) ; <i32> [#uses=0]
- %tmp26 = tail call i32 (i8*, ...)* @printf( i8* %s.0 ) ; <i32> [#uses=0]
- %tmp28 = tail call i32 (i8*, ...)* @printf( i8* %s.0 ) ; <i32> [#uses=0]
- %tmp30 = tail call i32 (i8*, ...)* @printf( i8* %s.0 ) ; <i32> [#uses=0]
- %tmp32 = tail call i32 (i8*, ...)* @printf( i8* %s.0 ) ; <i32> [#uses=0]
+ %s.0 = phi i8* [ getelementptr ([8 x i8], [8 x i8]* @str, i32 0, i64 0), %cond_true ], [ getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8], [5 x i8]* @str2, i32 0, i64 0), %bb2 ] ; <i8*> [#uses=13]
+ %tmp8 = tail call i32 (i8*, ...) @printf( i8* %s.0 ) ; <i32> [#uses=0]
+ %tmp10 = tail call i32 (i8*, ...) @printf( i8* %s.0 ) ; <i32> [#uses=0]
+ %tmp12 = tail call i32 (i8*, ...) @printf( i8* %s.0 ) ; <i32> [#uses=0]
+ %tmp14 = tail call i32 (i8*, ...) @printf( i8* %s.0 ) ; <i32> [#uses=0]
+ %tmp16 = tail call i32 (i8*, ...) @printf( i8* %s.0 ) ; <i32> [#uses=0]
+ %tmp18 = tail call i32 (i8*, ...) @printf( i8* %s.0 ) ; <i32> [#uses=0]
+ %tmp20 = tail call i32 (i8*, ...) @printf( i8* %s.0 ) ; <i32> [#uses=0]
+ %tmp22 = tail call i32 (i8*, ...) @printf( i8* %s.0 ) ; <i32> [#uses=0]
+ %tmp24 = tail call i32 (i8*, ...) @printf( i8* %s.0 ) ; <i32> [#uses=0]
+ %tmp26 = tail call i32 (i8*, ...) @printf( i8* %s.0 ) ; <i32> [#uses=0]
+ %tmp28 = tail call i32 (i8*, ...) @printf( i8* %s.0 ) ; <i32> [#uses=0]
+ %tmp30 = tail call i32 (i8*, ...) @printf( i8* %s.0 ) ; <i32> [#uses=0]
+ %tmp32 = tail call i32 (i8*, ...) @printf( i8* %s.0 ) ; <i32> [#uses=0]
ret void
}
diff --git a/test/CodeGen/X86/switch-or.ll b/test/CodeGen/X86/switch-or.ll
index 75832c7d304c..6e6b013d9fa8 100644
--- a/test/CodeGen/X86/switch-or.ll
+++ b/test/CodeGen/X86/switch-or.ll
@@ -12,7 +12,7 @@ entry:
]
if.then:
- %call = tail call i32 (...)* @bar() nounwind
+ %call = tail call i32 (...) @bar() nounwind
ret void
if.end:
diff --git a/test/CodeGen/X86/switch-zextload.ll b/test/CodeGen/X86/switch-zextload.ll
index 55425bc7da5c..2dd3f0e3ae72 100644
--- a/test/CodeGen/X86/switch-zextload.ll
+++ b/test/CodeGen/X86/switch-zextload.ll
@@ -9,7 +9,7 @@ target triple = "i386-apple-darwin9.6"
define fastcc void @set_proof_and_disproof_numbers(%struct.node_t* nocapture %node) nounwind {
entry:
- %0 = load i8* null, align 1 ; <i8> [#uses=1]
+ %0 = load i8, i8* null, align 1 ; <i8> [#uses=1]
switch i8 %0, label %return [
i8 2, label %bb31
i8 0, label %bb80
diff --git a/test/CodeGen/X86/switch.ll b/test/CodeGen/X86/switch.ll
new file mode 100644
index 000000000000..66a739c8470c
--- /dev/null
+++ b/test/CodeGen/X86/switch.ll
@@ -0,0 +1,536 @@
+; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s
+; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -O0 | FileCheck --check-prefix=NOOPT %s
+
+declare void @g(i32)
+
+define void @basic(i32 %x) {
+entry:
+ switch i32 %x, label %return [
+ i32 3, label %bb0
+ i32 1, label %bb1
+ i32 4, label %bb1
+ i32 5, label %bb2
+ ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 1) br label %return
+return: ret void
+
+; Should be lowered as straight compares in -O0 mode.
+; NOOPT-LABEL: basic
+; NOOPT: subl $1, %eax
+; NOOPT: je
+; NOOPT: subl $3, %eax
+; NOOPT: je
+; NOOPT: subl $4, %eax
+; NOOPT: je
+; NOOPT: subl $5, %eax
+; NOOPT: je
+
+; Jump table otherwise.
+; CHECK-LABEL: basic
+; CHECK: decl
+; CHECK: cmpl $4
+; CHECK: ja
+; CHECK: jmpq *.LJTI
+}
+
+
+define void @simple_ranges(i32 %x) {
+entry:
+ switch i32 %x, label %return [
+ i32 0, label %bb0
+ i32 1, label %bb0
+ i32 2, label %bb0
+ i32 3, label %bb0
+ i32 100, label %bb1
+ i32 101, label %bb1
+ i32 102, label %bb1
+ i32 103, label %bb1
+ ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+return: ret void
+
+; Should be lowered to two range checks.
+; CHECK-LABEL: simple_ranges
+; CHECK: leal -100
+; CHECK: cmpl $4
+; CHECK: jae
+; CHECK: cmpl $3
+; CHECK: ja
+
+; We do this even at -O0, because it's cheap and makes codegen faster.
+; NOOPT-LABEL: simple_ranges
+; NOOPT: subl $4
+; NOOPT: jb
+; NOOPT: addl $-100
+; NOOPT: subl $4
+; NOOPT: jb
+}
+
+
+define void @jt_is_better(i32 %x) {
+entry:
+ switch i32 %x, label %return [
+ i32 0, label %bb0
+ i32 2, label %bb0
+ i32 4, label %bb0
+ i32 1, label %bb1
+ i32 3, label %bb1
+ i32 5, label %bb1
+
+ i32 6, label %bb2
+ i32 7, label %bb3
+ i32 8, label %bb4
+ ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+bb3: tail call void @g(i32 3) br label %return
+bb4: tail call void @g(i32 4) br label %return
+return: ret void
+
+; Cases 0-5 could be lowered with two bit tests,
+; but with 6-8, the whole switch is suitable for a jump table.
+; CHECK-LABEL: jt_is_better
+; CHECK: cmpl $8
+; CHECK: jbe
+; CHECK: jmpq *.LJTI
+}
+
+
+define void @bt_is_better(i32 %x) {
+entry:
+ switch i32 %x, label %return [
+ i32 0, label %bb0
+ i32 3, label %bb0
+ i32 6, label %bb0
+ i32 1, label %bb1
+ i32 4, label %bb1
+ i32 7, label %bb1
+ i32 2, label %bb2
+ i32 5, label %bb2
+ i32 8, label %bb2
+
+ ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+return: ret void
+
+; This could be lowered as a jump table, but bit tests is more efficient.
+; CHECK-LABEL: bt_is_better
+; 73 = 2^0 + 2^3 + 2^6
+; CHECK: movl $73
+; CHECK: btl
+; 146 = 2^1 + 2^4 + 2^7
+; CHECK: movl $146
+; CHECK: btl
+; 292 = 2^2 + 2^5 + 2^8
+; CHECK: movl $292
+; CHECK: btl
+}
+
+
+define void @optimal_pivot1(i32 %x) {
+entry:
+ switch i32 %x, label %return [
+ i32 100, label %bb0
+ i32 200, label %bb1
+ i32 300, label %bb0
+ i32 400, label %bb1
+ i32 500, label %bb0
+ i32 600, label %bb1
+
+ ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+return: ret void
+
+; Should pivot around 400 for two subtrees of equal size.
+; CHECK-LABEL: optimal_pivot1
+; CHECK-NOT: cmpl
+; CHECK: cmpl $399
+}
+
+
+define void @optimal_pivot2(i32 %x) {
+entry:
+ switch i32 %x, label %return [
+ i32 100, label %bb0 i32 101, label %bb1 i32 102, label %bb2 i32 103, label %bb3
+ i32 200, label %bb0 i32 201, label %bb1 i32 202, label %bb2 i32 203, label %bb3
+ i32 300, label %bb0 i32 301, label %bb1 i32 302, label %bb2 i32 303, label %bb3
+ i32 400, label %bb0 i32 401, label %bb1 i32 402, label %bb2 i32 403, label %bb3
+
+ ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+bb3: tail call void @g(i32 3) br label %return
+return: ret void
+
+; Should pivot around 300 for two subtrees with two jump tables each.
+; CHECK-LABEL: optimal_pivot2
+; CHECK-NOT: cmpl
+; CHECK: cmpl $299
+; CHECK: jmpq *.LJTI
+; CHECK: jmpq *.LJTI
+; CHECK: jmpq *.LJTI
+; CHECK: jmpq *.LJTI
+}
+
+
+define void @optimal_jump_table1(i32 %x) {
+entry:
+ switch i32 %x, label %return [
+ i32 0, label %bb0
+ i32 5, label %bb1
+ i32 6, label %bb2
+ i32 12, label %bb3
+ i32 13, label %bb4
+ i32 15, label %bb5
+ ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+bb3: tail call void @g(i32 3) br label %return
+bb4: tail call void @g(i32 4) br label %return
+bb5: tail call void @g(i32 5) br label %return
+return: ret void
+
+; Splitting in the largest gap (between 6 and 12) would yield suboptimal result.
+; Expecting a jump table from 5 to 15.
+; CHECK-LABEL: optimal_jump_table1
+; CHECK: leal -5
+; CHECK: cmpl $10
+; CHECK: jmpq *.LJTI
+}
+
+
+define void @optimal_jump_table2(i32 %x) {
+entry:
+ switch i32 %x, label %return [
+ i32 0, label %bb0
+ i32 1, label %bb1
+ i32 2, label %bb2
+ i32 9, label %bb3
+ i32 14, label %bb4
+ i32 15, label %bb5
+ ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+bb3: tail call void @g(i32 3) br label %return
+bb4: tail call void @g(i32 4) br label %return
+bb5: tail call void @g(i32 5) br label %return
+return: ret void
+
+; Partitioning the cases to the minimum number of dense sets is not good enough.
+; This can be partitioned as {0,1,2,9},{14,15} or {0,1,2},{9,14,15}. The former
+; should be preferred. Expecting a table from 0-9.
+; CHECK-LABEL: optimal_jump_table2
+; CHECK: cmpl $9
+; CHECK: jmpq *.LJTI
+}
+
+
+define void @optimal_jump_table3(i32 %x) {
+entry:
+ switch i32 %x, label %return [
+ i32 1, label %bb0
+ i32 2, label %bb1
+ i32 3, label %bb2
+ i32 10, label %bb3
+ i32 13, label %bb0
+ i32 14, label %bb1
+ i32 15, label %bb2
+ i32 20, label %bb3
+ i32 25, label %bb4
+ ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+bb3: tail call void @g(i32 3) br label %return
+bb4: tail call void @g(i32 4) br label %return
+return: ret void
+
+; Splitting to maximize left-right density sum and gap size would split this
+; between 3 and 10, and then between 20 and 25. It's better to build a table
+; from 1-20.
+; CHECK-LABEL: optimal_jump_table3
+; CHECK: leal -1
+; CHECK: cmpl $19
+; CHECK: jmpq *.LJTI
+}
+
+%struct.S = type { %struct.S*, i32 }
+define void @phi_node_trouble(%struct.S* %s) {
+entry:
+ br label %header
+header:
+ %ptr = phi %struct.S* [ %s, %entry ], [ %next, %loop ]
+ %bool = icmp eq %struct.S* %ptr, null
+ br i1 %bool, label %exit, label %loop
+loop:
+ %nextptr = getelementptr inbounds %struct.S, %struct.S* %ptr, i64 0, i32 0
+ %next = load %struct.S*, %struct.S** %nextptr
+ %xptr = getelementptr inbounds %struct.S, %struct.S* %next, i64 0, i32 1
+ %x = load i32, i32* %xptr
+ switch i32 %x, label %exit [
+ i32 4, label %header
+ i32 36, label %exit2
+ i32 69, label %exit2
+ i32 25, label %exit2
+ ]
+exit:
+ ret void
+exit2:
+ ret void
+
+; This will be lowered to a comparison with 4 and then bit tests. Make sure
+; that the phi node in %header gets a value from the comparison block.
+; CHECK-LABEL: phi_node_trouble
+; CHECK: movq (%[[REG1:[a-z]+]]), %[[REG1]]
+; CHECK: movl 8(%[[REG1]]), %[[REG2:[a-z]+]]
+; CHECK: cmpl $4, %[[REG2]]
+}
+
+
+define void @default_only(i32 %x) {
+entry:
+ br label %sw
+return:
+ ret void
+sw:
+ switch i32 %x, label %return [
+ ]
+
+; Branch directly to the default.
+; (In optimized builds the switch is removed earlier.)
+; NOOPT-LABEL: default_only
+; NOOPT: .[[L:[A-Z0-9_]+]]:
+; NOOPT-NEXT: retq
+; NOOPT: jmp .[[L]]
+}
+
+
+define void @int_max_table_cluster(i8 %x) {
+entry:
+ switch i8 %x, label %return [
+ i8 0, label %bb0 i8 1, label %bb0 i8 2, label %bb0 i8 3, label %bb0
+ i8 4, label %bb0 i8 5, label %bb0 i8 6, label %bb0 i8 7, label %bb0
+ i8 8, label %bb0 i8 9, label %bb0 i8 10, label %bb0 i8 11, label %bb0
+ i8 12, label %bb0 i8 13, label %bb0 i8 14, label %bb0 i8 15, label %bb0
+ i8 16, label %bb0 i8 17, label %bb0 i8 18, label %bb0 i8 19, label %bb0
+ i8 20, label %bb0 i8 21, label %bb0 i8 22, label %bb0 i8 23, label %bb0
+ i8 24, label %bb0 i8 25, label %bb0 i8 26, label %bb0 i8 27, label %bb0
+ i8 28, label %bb0 i8 29, label %bb0 i8 30, label %bb0 i8 31, label %bb0
+ i8 32, label %bb0 i8 33, label %bb0 i8 34, label %bb0 i8 35, label %bb0
+ i8 36, label %bb0 i8 37, label %bb0 i8 38, label %bb0 i8 39, label %bb0
+ i8 40, label %bb0 i8 41, label %bb0 i8 42, label %bb0 i8 43, label %bb0
+ i8 44, label %bb0 i8 45, label %bb0 i8 46, label %bb0 i8 47, label %bb0
+ i8 48, label %bb0 i8 49, label %bb0 i8 50, label %bb0 i8 51, label %bb0
+ i8 52, label %bb0 i8 53, label %bb0 i8 54, label %bb0 i8 55, label %bb0
+ i8 56, label %bb0 i8 57, label %bb0 i8 58, label %bb0 i8 59, label %bb0
+ i8 60, label %bb0 i8 61, label %bb0 i8 62, label %bb0 i8 63, label %bb0
+ i8 64, label %bb0 i8 65, label %bb0 i8 66, label %bb0 i8 67, label %bb0
+ i8 68, label %bb0 i8 69, label %bb0 i8 70, label %bb0 i8 71, label %bb0
+ i8 72, label %bb0 i8 73, label %bb0 i8 74, label %bb0 i8 75, label %bb0
+ i8 76, label %bb0 i8 77, label %bb0 i8 78, label %bb0 i8 79, label %bb0
+ i8 80, label %bb0 i8 81, label %bb0 i8 82, label %bb0 i8 83, label %bb0
+ i8 84, label %bb0 i8 85, label %bb0 i8 86, label %bb0 i8 87, label %bb0
+ i8 88, label %bb0 i8 89, label %bb0 i8 90, label %bb0 i8 91, label %bb0
+ i8 92, label %bb0 i8 93, label %bb0 i8 94, label %bb0 i8 95, label %bb0
+ i8 96, label %bb0 i8 97, label %bb0 i8 98, label %bb0 i8 99, label %bb0
+ i8 100, label %bb0 i8 101, label %bb0 i8 102, label %bb0 i8 103, label %bb0
+ i8 104, label %bb0 i8 105, label %bb0 i8 106, label %bb0 i8 107, label %bb0
+ i8 108, label %bb0 i8 109, label %bb0 i8 110, label %bb0 i8 111, label %bb0
+ i8 112, label %bb0 i8 113, label %bb0 i8 114, label %bb0 i8 115, label %bb0
+ i8 116, label %bb0 i8 117, label %bb0 i8 118, label %bb0 i8 119, label %bb0
+ i8 120, label %bb0 i8 121, label %bb0 i8 122, label %bb0 i8 123, label %bb0
+ i8 124, label %bb0 i8 125, label %bb0 i8 126, label %bb0 i8 127, label %bb0
+ i8 -64, label %bb1 i8 -63, label %bb1 i8 -62, label %bb1 i8 -61, label %bb1
+ i8 -60, label %bb1 i8 -59, label %bb1 i8 -58, label %bb1 i8 -57, label %bb1
+ i8 -56, label %bb1 i8 -55, label %bb1 i8 -54, label %bb1 i8 -53, label %bb1
+ i8 -52, label %bb1 i8 -51, label %bb1 i8 -50, label %bb1 i8 -49, label %bb1
+ i8 -48, label %bb1 i8 -47, label %bb1 i8 -46, label %bb1 i8 -45, label %bb1
+ i8 -44, label %bb1 i8 -43, label %bb1 i8 -42, label %bb1 i8 -41, label %bb1
+ i8 -40, label %bb1 i8 -39, label %bb1 i8 -38, label %bb1 i8 -37, label %bb1
+ i8 -36, label %bb1 i8 -35, label %bb1 i8 -34, label %bb1 i8 -33, label %bb1
+ i8 -32, label %bb2 i8 -31, label %bb2 i8 -30, label %bb2 i8 -29, label %bb2
+ i8 -28, label %bb2 i8 -27, label %bb2 i8 -26, label %bb2 i8 -25, label %bb2
+ i8 -24, label %bb2 i8 -23, label %bb2 i8 -22, label %bb2 i8 -21, label %bb2
+ i8 -20, label %bb2 i8 -19, label %bb2 i8 -18, label %bb2 i8 -17, label %bb2
+ i8 -16, label %bb3 i8 -15, label %bb3 i8 -14, label %bb3 i8 -13, label %bb3
+ i8 -12, label %bb3 i8 -11, label %bb3 i8 -10, label %bb3 i8 -9, label %bb3
+ ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 1) br label %return
+bb3: tail call void @g(i32 1) br label %return
+return: ret void
+
+; Don't infloop on jump tables where the upper bound is the max value of the
+; input type (in this case 127).
+; CHECK-LABEL: int_max_table_cluster
+; CHECK: jmpq *.LJTI
+}
+
+
+define void @bt_order_by_weight(i32 %x) {
+entry:
+ switch i32 %x, label %return [
+ i32 0, label %bb0
+ i32 3, label %bb0
+ i32 6, label %bb0
+ i32 1, label %bb1
+ i32 4, label %bb1
+ i32 7, label %bb1
+ i32 2, label %bb2
+ i32 5, label %bb2
+ i32 8, label %bb2
+ i32 9, label %bb2
+ ], !prof !1
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+return: ret void
+
+; Cases 1,4,7 have a very large branch weight (which shouldn't overflow), so
+; their bit test should come first. 0,3,6 and 2,5,8,9 both have a weight of 12,
+; but the latter set has more cases, so should be tested for earlier.
+
+; CHECK-LABEL: bt_order_by_weight
+; 146 = 2^1 + 2^4 + 2^7
+; CHECK: movl $146
+; CHECK: btl
+; 292 = 2^2 + 2^5 + 2^8 + 2^9
+; CHECK: movl $804
+; CHECK: btl
+; 73 = 2^0 + 2^3 + 2^6
+; CHECK: movl $73
+; CHECK: btl
+}
+
+!1 = !{!"branch_weights",
+ ; Default:
+ i32 1,
+ ; Cases 0,3,6:
+ i32 4, i32 4, i32 4,
+ ; Cases 1,4,7:
+ i32 4294967295, i32 2, i32 4294967295,
+ ; Cases 2,5,8,9:
+ i32 3, i32 3, i32 3, i32 3}
+
+define void @order_by_weight_and_fallthrough(i32 %x) {
+entry:
+ switch i32 %x, label %return [
+ i32 100, label %bb1
+ i32 200, label %bb0
+ i32 300, label %bb0
+ ], !prof !2
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+return: ret void
+
+; Case 200 has the highest weight and should come first. 100 and 300 have the
+; same weight, but 300 goes to the 'next' block, so should be last.
+; CHECK-LABEL: order_by_weight_and_fallthrough
+; CHECK: cmpl $200
+; CHECK: cmpl $100
+; CHECK: cmpl $300
+}
+
+!2 = !{!"branch_weights",
+ ; Default:
+ i32 1,
+ ; Case 100:
+ i32 10,
+ ; Case 200:
+ i32 1000,
+ ; Case 300:
+ i32 10}
+
+
+define void @zero_weight_tree(i32 %x) {
+entry:
+ switch i32 %x, label %return [
+ i32 0, label %bb0
+ i32 10, label %bb1
+ i32 20, label %bb2
+ i32 30, label %bb3
+ i32 40, label %bb4
+ i32 50, label %bb5
+ ], !prof !3
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+bb3: tail call void @g(i32 3) br label %return
+bb4: tail call void @g(i32 4) br label %return
+bb5: tail call void @g(i32 5) br label %return
+return: ret void
+
+; Make sure to pick a pivot in the middle also with zero-weight cases.
+; CHECK-LABEL: zero_weight_tree
+; CHECK-NOT: cmpl
+; CHECK: cmpl $29
+}
+
+!3 = !{!"branch_weights", i32 1, i32 10, i32 0, i32 0, i32 0, i32 0, i32 10}
+
+
+define void @left_leaning_weight_balanced_tree(i32 %x) {
+entry:
+ switch i32 %x, label %return [
+ i32 0, label %bb0
+ i32 10, label %bb1
+ i32 20, label %bb2
+ i32 30, label %bb3
+ i32 40, label %bb4
+ i32 50, label %bb5
+ ], !prof !4
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+bb3: tail call void @g(i32 3) br label %return
+bb4: tail call void @g(i32 4) br label %return
+bb5: tail call void @g(i32 5) br label %return
+return: ret void
+
+; To balance the tree by weight, the pivot is shifted to the right, moving hot
+; cases closer to the root.
+; CHECK-LABEL: left_leaning_weight_balanced_tree
+; CHECK-NOT: cmpl
+; CHECK: cmpl $39
+}
+
+!4 = !{!"branch_weights", i32 1, i32 10, i32 1, i32 1, i32 1, i32 10, i32 10}
+
+
+define void @jump_table_affects_balance(i32 %x) {
+entry:
+ switch i32 %x, label %return [
+ ; Jump table:
+ i32 0, label %bb0
+ i32 1, label %bb1
+ i32 2, label %bb2
+ i32 3, label %bb3
+
+ i32 100, label %bb0
+ i32 200, label %bb1
+ i32 300, label %bb2
+ ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+bb3: tail call void @g(i32 3) br label %return
+return: ret void
+
+; CHECK-LABEL: jump_table_affects_balance
+; If the tree were balanced based on number of clusters, {0-3,100} would go on
+; the left and {200,300} on the right. However, the jump table weights as much
+; as its components, so 100 is selected as the pivot.
+; CHECK-NOT: cmpl
+; CHECK: cmpl $99
+}
diff --git a/test/CodeGen/X86/tail-call-win64.ll b/test/CodeGen/X86/tail-call-win64.ll
new file mode 100644
index 000000000000..fb10d5d2a24a
--- /dev/null
+++ b/test/CodeGen/X86/tail-call-win64.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=x86_64-windows -show-mc-encoding < %s | FileCheck %s
+
+; The Win64 ABI wants tail jmps to use a REX_W prefix so it can distinguish
+; in-function jumps from function exiting jumps.
+
+define void @tail_jmp_reg(i32, i32, void ()* %fptr) {
+ tail call void () %fptr()
+ ret void
+}
+
+; Check that we merge the REX prefixes into 0x49 instead of 0x48, 0x41.
+
+; CHECK-LABEL: tail_jmp_reg:
+; CHECK: rex64 jmpq *%r8
+; CHECK: encoding: [0x49,0xff,0xe0]
+
+declare void @tail_tgt()
+
+define void @tail_jmp_imm() {
+ tail call void @tail_tgt()
+ ret void
+}
+
+; CHECK-LABEL: tail_jmp_imm:
+; CHECK: rex64 jmp tail_tgt
+
+@g_fptr = global void ()* @tail_tgt
+
+define void @tail_jmp_mem() {
+ %fptr = load void ()*, void ()** @g_fptr
+ tail call void () %fptr()
+ ret void
+}
+
+; CHECK-LABEL: tail_jmp_mem:
+; CHECK: rex64 jmpq *g_fptr(%rip)
diff --git a/test/CodeGen/X86/tail-dup-addr.ll b/test/CodeGen/X86/tail-dup-addr.ll
index c68a8c6bf845..3e5c8c8dbc3f 100644
--- a/test/CodeGen/X86/tail-dup-addr.ll
+++ b/test/CodeGen/X86/tail-dup-addr.ll
@@ -10,7 +10,7 @@
define void @foo() noreturn nounwind uwtable ssp {
entry:
- %tmp = load i32* @a, align 4
+ %tmp = load i32, i32* @a, align 4
%foo = icmp eq i32 0, %tmp
br i1 %foo, label %sw.bb, label %sw.default
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index 73d93ff993c6..f590176d9815 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -126,12 +126,12 @@ altret:
define i1 @dont_merge_oddly(float* %result) nounwind {
entry:
- %tmp4 = getelementptr float* %result, i32 2
- %tmp5 = load float* %tmp4, align 4
- %tmp7 = getelementptr float* %result, i32 4
- %tmp8 = load float* %tmp7, align 4
- %tmp10 = getelementptr float* %result, i32 6
- %tmp11 = load float* %tmp10, align 4
+ %tmp4 = getelementptr float, float* %result, i32 2
+ %tmp5 = load float, float* %tmp4, align 4
+ %tmp7 = getelementptr float, float* %result, i32 4
+ %tmp8 = load float, float* %tmp7, align 4
+ %tmp10 = getelementptr float, float* %result, i32 6
+ %tmp11 = load float, float* %tmp10, align 4
%tmp12 = fcmp olt float %tmp8, %tmp11
br i1 %tmp12, label %bb, label %bb21
@@ -179,7 +179,7 @@ bb30:
define fastcc void @c_expand_expr_stmt(%union.tree_node* %expr) nounwind {
entry:
- %tmp4 = load i8* null, align 8 ; <i8> [#uses=3]
+ %tmp4 = load i8, i8* null, align 8 ; <i8> [#uses=3]
switch i8 %tmp4, label %bb3 [
i8 18, label %bb
]
@@ -199,9 +199,9 @@ bb2.i: ; preds = %bb
br label %bb3
lvalue_p.exit: ; preds = %bb.i
- %tmp21 = load %union.tree_node** null, align 8 ; <%union.tree_node*> [#uses=3]
- %tmp22 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 0 ; <i8*> [#uses=1]
- %tmp23 = load i8* %tmp22, align 8 ; <i8> [#uses=1]
+ %tmp21 = load %union.tree_node*, %union.tree_node** null, align 8 ; <%union.tree_node*> [#uses=3]
+ %tmp22 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 0 ; <i8*> [#uses=1]
+ %tmp23 = load i8, i8* %tmp22, align 8 ; <i8> [#uses=1]
%tmp24 = zext i8 %tmp23 to i32 ; <i32> [#uses=1]
switch i32 %tmp24, label %lvalue_p.exit4 [
i32 0, label %bb2.i3
@@ -209,11 +209,11 @@ lvalue_p.exit: ; preds = %bb.i
]
bb.i1: ; preds = %lvalue_p.exit
- %tmp25 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 2 ; <i32*> [#uses=1]
+ %tmp25 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp21, i64 0, i32 0, i32 2 ; <i32*> [#uses=1]
%tmp26 = bitcast i32* %tmp25 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
- %tmp27 = load %union.tree_node** %tmp26, align 8 ; <%union.tree_node*> [#uses=2]
- %tmp28 = getelementptr inbounds %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
- %tmp29 = load i8* %tmp28, align 8 ; <i8> [#uses=1]
+ %tmp27 = load %union.tree_node*, %union.tree_node** %tmp26, align 8 ; <%union.tree_node*> [#uses=2]
+ %tmp28 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+ %tmp29 = load i8, i8* %tmp28, align 8 ; <i8> [#uses=1]
%tmp30 = zext i8 %tmp29 to i32 ; <i32> [#uses=1]
switch i32 %tmp30, label %lvalue_p.exit4 [
i32 0, label %bb2.i.i2
@@ -226,22 +226,22 @@ bb.i.i: ; preds = %bb.i1
br label %lvalue_p.exit4
bb2.i.i2: ; preds = %bb.i1
- %tmp35 = getelementptr inbounds %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
+ %tmp35 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
%tmp36 = bitcast i8* %tmp35 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
- %tmp37 = load %union.tree_node** %tmp36, align 8 ; <%union.tree_node*> [#uses=1]
- %tmp38 = getelementptr inbounds %union.tree_node* %tmp37, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
- %tmp39 = load i8* %tmp38, align 8 ; <i8> [#uses=1]
+ %tmp37 = load %union.tree_node*, %union.tree_node** %tmp36, align 8 ; <%union.tree_node*> [#uses=1]
+ %tmp38 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp37, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+ %tmp39 = load i8, i8* %tmp38, align 8 ; <i8> [#uses=1]
switch i8 %tmp39, label %bb2 [
i8 16, label %lvalue_p.exit4
i8 23, label %lvalue_p.exit4
]
bb2.i3: ; preds = %lvalue_p.exit
- %tmp40 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
+ %tmp40 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
%tmp41 = bitcast i8* %tmp40 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
- %tmp42 = load %union.tree_node** %tmp41, align 8 ; <%union.tree_node*> [#uses=1]
- %tmp43 = getelementptr inbounds %union.tree_node* %tmp42, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
- %tmp44 = load i8* %tmp43, align 8 ; <i8> [#uses=1]
+ %tmp42 = load %union.tree_node*, %union.tree_node** %tmp41, align 8 ; <%union.tree_node*> [#uses=1]
+ %tmp43 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp42, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+ %tmp44 = load i8, i8* %tmp43, align 8 ; <i8> [#uses=1]
switch i8 %tmp44, label %bb2 [
i8 16, label %lvalue_p.exit4
i8 23, label %lvalue_p.exit4
diff --git a/test/CodeGen/X86/tailcall-64.ll b/test/CodeGen/X86/tailcall-64.ll
index deab1dcc7eb9..9e054fea5b35 100644
--- a/test/CodeGen/X86/tailcall-64.ll
+++ b/test/CodeGen/X86/tailcall-64.ll
@@ -165,7 +165,7 @@ declare %struct.B* @testu()
define %struct.A* @test_upcast() {
entry:
%A = tail call %struct.B* @testu()
- %x = getelementptr inbounds %struct.B* %A, i32 0, i32 0
+ %x = getelementptr inbounds %struct.B, %struct.B* %A, i32 0, i32 0
ret %struct.A* %x
}
@@ -182,13 +182,13 @@ define { i64, i64 } @crash(i8* %this) {
; Check that we can fold an indexed load into a tail call instruction.
; CHECK: fold_indexed_load
; CHECK: leaq (%rsi,%rsi,4), %[[RAX:r..]]
-; CHECK: jmpq *16(%{{r..}},%[[RAX]],8) # TAILCALL
+; CHECK: jmpq *16(%{{r..}},%[[RAX]],8) ## TAILCALL
%struct.funcs = type { i32 (i8*, i32*, i32)*, i32 (i8*)*, i32 (i8*)*, i32 (i8*, i32)*, i32 }
@func_table = external global [0 x %struct.funcs]
define void @fold_indexed_load(i8* %mbstr, i64 %idxprom) nounwind uwtable ssp {
entry:
- %dsplen = getelementptr inbounds [0 x %struct.funcs]* @func_table, i64 0, i64 %idxprom, i32 2
- %x1 = load i32 (i8*)** %dsplen, align 8
+ %dsplen = getelementptr inbounds [0 x %struct.funcs], [0 x %struct.funcs]* @func_table, i64 0, i64 %idxprom, i32 2
+ %x1 = load i32 (i8*)*, i32 (i8*)** %dsplen, align 8
%call = tail call i32 %x1(i8* %mbstr) nounwind
ret void
}
@@ -207,15 +207,15 @@ entry:
; }
;
; CHECK-LABEL: rdar12282281
-; CHECK: jmpq *%r11 # TAILCALL
+; CHECK: jmpq *%r11 ## TAILCALL
@funcs = external constant [0 x i32 (i8*, ...)*]
define i32 @rdar12282281(i32 %n) nounwind uwtable ssp {
entry:
%idxprom = sext i32 %n to i64
- %arrayidx = getelementptr inbounds [0 x i32 (i8*, ...)*]* @funcs, i64 0, i64 %idxprom
- %0 = load i32 (i8*, ...)** %arrayidx, align 8
- %call = tail call i32 (i8*, ...)* %0(i8* null, i32 0, i32 0, i32 0, i32 0, i32 0) nounwind
+ %arrayidx = getelementptr inbounds [0 x i32 (i8*, ...)*], [0 x i32 (i8*, ...)*]* @funcs, i64 0, i64 %idxprom
+ %0 = load i32 (i8*, ...)*, i32 (i8*, ...)** %arrayidx, align 8
+ %call = tail call i32 (i8*, ...) %0(i8* null, i32 0, i32 0, i32 0, i32 0, i32 0) nounwind
ret i32 %call
}
diff --git a/test/CodeGen/X86/tailcall-fastisel.ll b/test/CodeGen/X86/tailcall-fastisel.ll
index 842ed25439f8..f69e75ca6c65 100644
--- a/test/CodeGen/X86/tailcall-fastisel.ll
+++ b/test/CodeGen/X86/tailcall-fastisel.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -tailcallopt -fast-isel -fast-isel-abort | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -tailcallopt -fast-isel -fast-isel-abort=1 | FileCheck %s
%0 = type { i64, i32, i8* }
@@ -11,7 +11,7 @@ fail: ; preds = %entry
define i32 @foo() nounwind {
entry:
- %0 = tail call i32 (...)* @bar() nounwind ; <i32> [#uses=1]
+ %0 = tail call i32 (...) @bar() nounwind ; <i32> [#uses=1]
ret i32 %0
}
diff --git a/test/CodeGen/X86/tailcall-mem-intrinsics.ll b/test/CodeGen/X86/tailcall-mem-intrinsics.ll
new file mode 100644
index 000000000000..0e0ab5c478fc
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-mem-intrinsics.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+
+; CHECK-LABEL: tail_memcpy
+; CHECK: jmp memcpy
+define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
+entry:
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
+ ret void
+}
+
+; CHECK-LABEL: tail_memset
+; CHECK; jmp memmove
+define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
+entry:
+ tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
+ ret void
+}
+
+; CHECK-LABEL: tail_memset
+; CHECK: jmp memset
+define void @tail_memset(i8* nocapture %p, i8 %c, i32 %n) #0 {
+entry:
+ tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/X86/tailcall-returndup-void.ll b/test/CodeGen/X86/tailcall-returndup-void.ll
index 2c39cb4468df..62c40164d798 100644
--- a/test/CodeGen/X86/tailcall-returndup-void.ll
+++ b/test/CodeGen/X86/tailcall-returndup-void.ll
@@ -7,7 +7,7 @@ declare ghccc void @sEH_info(i64* noalias nocapture, i64* noalias nocapture, i64
define ghccc void @rBM_info(i64* noalias nocapture %Base_Arg, i64* noalias nocapture %Sp_Arg, i64* noalias nocapture %Hp_Arg, i64 %R1_Arg, i64 %R2_Arg, i64 %R3_Arg) nounwind align 8 {
c263:
- %ln265 = getelementptr inbounds i64* %Sp_Arg, i64 -2
+ %ln265 = getelementptr inbounds i64, i64* %Sp_Arg, i64 -2
%ln266 = ptrtoint i64* %ln265 to i64
%ln268 = icmp ult i64 %ln266, %R3_Arg
br i1 %ln268, label %c26a, label %n26p
@@ -16,7 +16,7 @@ n26p: ; preds = %c263
br i1 icmp ne (i64 and (i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 7), i64 0), label %c1ZP.i, label %n1ZQ.i
n1ZQ.i: ; preds = %n26p
- %ln1ZT.i = load i64* getelementptr inbounds ([0 x i64]* @sES_closure, i64 0, i64 0), align 8
+ %ln1ZT.i = load i64, i64* getelementptr inbounds ([0 x i64], [0 x i64]* @sES_closure, i64 0, i64 0), align 8
%ln1ZU.i = inttoptr i64 %ln1ZT.i to void (i64*, i64*, i64*, i64, i64, i64)*
tail call ghccc void %ln1ZU.i(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 %R3_Arg) nounwind
br label %rBL_info.exit
@@ -29,8 +29,8 @@ rBL_info.exit: ; preds = %c1ZP.i, %n1ZQ.i
ret void
c26a: ; preds = %c263
- %ln27h = getelementptr inbounds i64* %Base_Arg, i64 -2
- %ln27j = load i64* %ln27h, align 8
+ %ln27h = getelementptr inbounds i64, i64* %Base_Arg, i64 -2
+ %ln27j = load i64, i64* %ln27h, align 8
%ln27k = inttoptr i64 %ln27j to void (i64*, i64*, i64*, i64, i64, i64)*
tail call ghccc void %ln27k(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 %R1_Arg, i64 %R2_Arg, i64 %R3_Arg) nounwind
ret void
diff --git a/test/CodeGen/X86/tailcall-ri64.ll b/test/CodeGen/X86/tailcall-ri64.ll
index 914d8f7b8bc7..443d48868e54 100644
--- a/test/CodeGen/X86/tailcall-ri64.ll
+++ b/test/CodeGen/X86/tailcall-ri64.ll
@@ -16,9 +16,9 @@ define %vt* @_ZN4llvm9UnsetInit20convertInitializerToEPNS_5RecTyE(%class*
%this, %vt* %Ty) align 2 {
entry:
%0 = bitcast %vt* %Ty to %vt* (%vt*, %class*)***
- %vtable = load %vt* (%vt*, %class*)*** %0, align 8
- %vfn = getelementptr inbounds %vt* (%vt*, %class*)** %vtable, i64 4
- %1 = load %vt* (%vt*, %class*)** %vfn, align 8
+ %vtable = load %vt* (%vt*, %class*)**, %vt* (%vt*, %class*)*** %0, align 8
+ %vfn = getelementptr inbounds %vt* (%vt*, %class*)*, %vt* (%vt*, %class*)** %vtable, i64 4
+ %1 = load %vt* (%vt*, %class*)*, %vt* (%vt*, %class*)** %vfn, align 8
%call = tail call %vt* %1(%vt* %Ty, %class* %this)
ret %vt* %call
}
diff --git a/test/CodeGen/X86/tailcallbyval.ll b/test/CodeGen/X86/tailcallbyval.ll
index 9a0b57c138c2..8a0113a645a3 100644
--- a/test/CodeGen/X86/tailcallbyval.ll
+++ b/test/CodeGen/X86/tailcallbyval.ll
@@ -5,8 +5,8 @@
define fastcc i32 @tailcallee(%struct.s* byval %a) nounwind {
entry:
- %tmp2 = getelementptr %struct.s* %a, i32 0, i32 0
- %tmp3 = load i32* %tmp2
+ %tmp2 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 0
+ %tmp3 = load i32, i32* %tmp2
ret i32 %tmp3
; CHECK: tailcallee
; CHECK: movl 4(%esp), %eax
diff --git a/test/CodeGen/X86/tailcallbyval64.ll b/test/CodeGen/X86/tailcallbyval64.ll
index 75a6d874da37..9df1470c67f1 100644
--- a/test/CodeGen/X86/tailcallbyval64.ll
+++ b/test/CodeGen/X86/tailcallbyval64.ll
@@ -35,8 +35,8 @@ declare fastcc i64 @tailcallee(%struct.s* byval %a, i64 %val, i64 %val2, i64 %v
define fastcc i64 @tailcaller(i64 %b, %struct.s* byval %a) {
entry:
- %tmp2 = getelementptr %struct.s* %a, i32 0, i32 1
- %tmp3 = load i64* %tmp2, align 8
+ %tmp2 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 1
+ %tmp3 = load i64, i64* %tmp2, align 8
%tmp4 = tail call fastcc i64 @tailcallee(%struct.s* byval %a , i64 %tmp3, i64 %b, i64 7, i64 13, i64 17)
ret i64 %tmp4
}
diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
index bff5f9924f66..158b777fe1fb 100644
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@@ -12,9 +12,9 @@
; Add %in1 %p1 to a different temporary register (%eax).
; CHECK: addl {{%edi|%ecx}}, [[R1]]
; Move param %in2 to stack.
-; CHECK: movl [[R2]], [[A1]](%rsp)
+; CHECK-DAG: movl [[R2]], [[A1]](%rsp)
; Move result of addition to stack.
-; CHECK: movl [[R1]], [[A2]](%rsp)
+; CHECK-DAG: movl [[R1]], [[A2]](%rsp)
; Eventually, do a TAILCALL
; CHECK: TAILCALL
diff --git a/test/CodeGen/X86/tbm-intrinsics-x86_64.ll b/test/CodeGen/X86/tbm-intrinsics-x86_64.ll
index 1beee72dfd0a..12218cc8ec4a 100644
--- a/test/CodeGen/X86/tbm-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/tbm-intrinsics-x86_64.ll
@@ -16,7 +16,7 @@ entry:
; CHECK-LABEL: test_x86_tbm_bextri_u32_m:
; CHECK-NOT: mov
; CHECK: bextr $
- %tmp1 = load i32* %a, align 4
+ %tmp1 = load i32, i32* %a, align 4
%0 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %tmp1, i32 2814)
ret i32 %0
}
@@ -37,7 +37,7 @@ entry:
; CHECK-LABEL: test_x86_tbm_bextri_u64_m:
; CHECK-NOT: mov
; CHECK: bextr $
- %tmp1 = load i64* %a, align 8
+ %tmp1 = load i64, i64* %a, align 8
%0 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %tmp1, i64 2814)
ret i64 %0
}
diff --git a/test/CodeGen/X86/tbm_patterns.ll b/test/CodeGen/X86/tbm_patterns.ll
index 79eea10af3ae..80d36d5af4d2 100644
--- a/test/CodeGen/X86/tbm_patterns.ll
+++ b/test/CodeGen/X86/tbm_patterns.ll
@@ -15,7 +15,7 @@ entry:
; CHECK-LABEL: test_x86_tbm_bextri_u32_m:
; CHECK-NOT: mov
; CHECK: bextr $
- %0 = load i32* %a
+ %0 = load i32, i32* %a
%1 = lshr i32 %0, 4
%2 = and i32 %1, 4095
ret i32 %2
@@ -36,7 +36,7 @@ entry:
; CHECK-LABEL: test_x86_tbm_bextri_u64_m:
; CHECK-NOT: mov
; CHECK: bextr $
- %0 = load i64* %a
+ %0 = load i64, i64* %a
%1 = lshr i64 %0, 4
%2 = and i64 %1, 4095
ret i64 %2
diff --git a/test/CodeGen/X86/test-shrink-bug.ll b/test/CodeGen/X86/test-shrink-bug.ll
index 64631ea5fc9b..1bb1e6384832 100644
--- a/test/CodeGen/X86/test-shrink-bug.ll
+++ b/test/CodeGen/X86/test-shrink-bug.ll
@@ -14,7 +14,7 @@ declare i32 @func_16(i8 signext %p_19, i32 %p_20) nounwind
define i32 @func_35(i64 %p_38) nounwind ssp {
entry:
- %tmp = load i8* @g_14 ; <i8> [#uses=2]
+ %tmp = load i8, i8* @g_14 ; <i8> [#uses=2]
%conv = zext i8 %tmp to i32 ; <i32> [#uses=1]
%cmp = icmp sle i32 1, %conv ; <i1> [#uses=1]
%conv2 = zext i1 %cmp to i32 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/testl-commute.ll b/test/CodeGen/X86/testl-commute.ll
index bf6debf17542..a9a9e581d995 100644
--- a/test/CodeGen/X86/testl-commute.ll
+++ b/test/CodeGen/X86/testl-commute.ll
@@ -13,8 +13,8 @@ define i32 @test(i32* %P, i32* %G) nounwind {
; CHECK: ret
entry:
- %0 = load i32* %P, align 4 ; <i32> [#uses=3]
- %1 = load i32* %G, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* %P, align 4 ; <i32> [#uses=3]
+ %1 = load i32, i32* %G, align 4 ; <i32> [#uses=1]
%2 = and i32 %1, %0 ; <i32> [#uses=1]
%3 = icmp eq i32 %2, 0 ; <i1> [#uses=1]
br i1 %3, label %bb1, label %bb
@@ -34,8 +34,8 @@ define i32 @test2(i32* %P, i32* %G) nounwind {
; CHECK: ret
entry:
- %0 = load i32* %P, align 4 ; <i32> [#uses=3]
- %1 = load i32* %G, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* %P, align 4 ; <i32> [#uses=3]
+ %1 = load i32, i32* %G, align 4 ; <i32> [#uses=1]
%2 = and i32 %0, %1 ; <i32> [#uses=1]
%3 = icmp eq i32 %2, 0 ; <i1> [#uses=1]
br i1 %3, label %bb1, label %bb
@@ -55,8 +55,8 @@ define i32 @test3(i32* %P, i32* %G) nounwind {
; CHECK: ret
entry:
- %0 = load i32* %P, align 4 ; <i32> [#uses=3]
- %1 = load i32* %G, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* %P, align 4 ; <i32> [#uses=3]
+ %1 = load i32, i32* %G, align 4 ; <i32> [#uses=1]
%2 = and i32 %0, %1 ; <i32> [#uses=1]
%3 = icmp eq i32 %2, 0 ; <i1> [#uses=1]
br i1 %3, label %bb1, label %bb
diff --git a/test/CodeGen/X86/this-return-64.ll b/test/CodeGen/X86/this-return-64.ll
index 4e6be71238ab..df8f416c4075 100644
--- a/test/CodeGen/X86/this-return-64.ll
+++ b/test/CodeGen/X86/this-return-64.ll
@@ -16,7 +16,7 @@ define %struct.C* @C_ctor(%struct.C* %this, i32 %y) {
entry:
; CHECK-LABEL: C_ctor:
; CHECK: jmp B_ctor # TAILCALL
- %0 = getelementptr inbounds %struct.C* %this, i64 0, i32 0
+ %0 = getelementptr inbounds %struct.C, %struct.C* %this, i64 0, i32 0
%call = tail call %struct.B* @B_ctor(%struct.B* %0, i32 %y)
ret %struct.C* %this
}
@@ -25,7 +25,7 @@ define %struct.C* @C_ctor_nothisret(%struct.C* %this, i32 %y) {
entry:
; CHECK-LABEL: C_ctor_nothisret:
; CHECK-NOT: jmp B_ctor_nothisret
- %0 = getelementptr inbounds %struct.C* %this, i64 0, i32 0
+ %0 = getelementptr inbounds %struct.C, %struct.C* %this, i64 0, i32 0
%call = tail call %struct.B* @B_ctor_nothisret(%struct.B* %0, i32 %y)
ret %struct.C* %this
}
@@ -39,7 +39,7 @@ entry:
; CHECK: jmp B_ctor # TAILCALL
%0 = bitcast %struct.D* %this to %struct.A*
%call = tail call %struct.A* @A_ctor(%struct.A* %0)
- %1 = getelementptr inbounds %struct.D* %this, i64 0, i32 0
+ %1 = getelementptr inbounds %struct.D, %struct.D* %this, i64 0, i32 0
%call2 = tail call %struct.B* @B_ctor(%struct.B* %1, i32 %y)
; (this next line would never be generated by Clang, actually)
%2 = bitcast %struct.A* %call to %struct.D*
@@ -55,7 +55,7 @@ entry:
; CHECK-NOT: jmp B_ctor_nothisret
%0 = bitcast %struct.D* %this to %struct.A*
%call = tail call %struct.A* @A_ctor_nothisret(%struct.A* %0)
- %1 = getelementptr inbounds %struct.D* %this, i64 0, i32 0
+ %1 = getelementptr inbounds %struct.D, %struct.D* %this, i64 0, i32 0
%call2 = tail call %struct.B* @B_ctor_nothisret(%struct.B* %1, i32 %y)
; (this next line would never be generated by Clang, actually)
%2 = bitcast %struct.A* %call to %struct.D*
@@ -69,7 +69,7 @@ entry:
; CHECK: callq B_ctor
; CHECK: movq [[SAVETHIS]], %rcx
; CHECK: jmp B_ctor # TAILCALL
- %b = getelementptr inbounds %struct.E* %this, i64 0, i32 0
+ %b = getelementptr inbounds %struct.E, %struct.E* %this, i64 0, i32 0
%call = tail call %struct.B* @B_ctor(%struct.B* %b, i32 %x)
%call4 = tail call %struct.B* @B_ctor(%struct.B* %b, i32 %x)
ret %struct.E* %this
@@ -82,7 +82,7 @@ entry:
; CHECK: callq B_ctor_nothisret
; CHECK: movq [[SAVETHIS]], %rcx
; CHECK-NOT: jmp B_ctor_nothisret
- %b = getelementptr inbounds %struct.E* %this, i64 0, i32 0
+ %b = getelementptr inbounds %struct.E, %struct.E* %this, i64 0, i32 0
%call = tail call %struct.B* @B_ctor_nothisret(%struct.B* %b, i32 %x)
%call4 = tail call %struct.B* @B_ctor_nothisret(%struct.B* %b, i32 %x)
ret %struct.E* %this
diff --git a/test/CodeGen/X86/tls-addr-non-leaf-function.ll b/test/CodeGen/X86/tls-addr-non-leaf-function.ll
index ec47232059f8..b9cab65465b8 100644
--- a/test/CodeGen/X86/tls-addr-non-leaf-function.ll
+++ b/test/CodeGen/X86/tls-addr-non-leaf-function.ll
@@ -32,6 +32,6 @@ target triple = "x86_64-unknown-linux-gnu"
@x = thread_local global i32 0
define i32 @foo() "no-frame-pointer-elim-non-leaf" {
- %a = load i32* @x, align 4
+ %a = load i32, i32* @x, align 4
ret i32 %a
}
diff --git a/test/CodeGen/X86/tls-local-dynamic.ll b/test/CodeGen/X86/tls-local-dynamic.ll
index 4841e52c5b0e..1f1b41a8a6d4 100644
--- a/test/CodeGen/X86/tls-local-dynamic.ll
+++ b/test/CodeGen/X86/tls-local-dynamic.ll
@@ -32,7 +32,7 @@ entry:
if.else:
- %0 = load i32* @x, align 4
+ %0 = load i32, i32* @x, align 4
%cmp1 = icmp eq i32 %i, 2
br i1 %cmp1, label %if.then2, label %return
; Now we call __tls_get_addr.
@@ -43,7 +43,7 @@ if.else:
if.then2:
- %1 = load i32* @y, align 4
+ %1 = load i32, i32* @y, align 4
%add = add nsw i32 %1, %0
br label %return
; This accesses TLS, but is dominated by the previous block,
diff --git a/test/CodeGen/X86/tls-pic.ll b/test/CodeGen/X86/tls-pic.ll
index 0c79da6667a1..805bc25c17b6 100644
--- a/test/CodeGen/X86/tls-pic.ll
+++ b/test/CodeGen/X86/tls-pic.ll
@@ -7,7 +7,7 @@
define i32 @f1() {
entry:
- %tmp1 = load i32* @i
+ %tmp1 = load i32, i32* @i
ret i32 %tmp1
}
@@ -39,7 +39,7 @@ entry:
define i32 @f3() {
entry:
- %tmp1 = load i32* @i ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* @i ; <i32> [#uses=1]
ret i32 %tmp1
}
@@ -68,8 +68,8 @@ entry:
define i32 @f5() nounwind {
entry:
- %0 = load i32* @j, align 4
- %1 = load i32* @k, align 4
+ %0 = load i32, i32* @j, align 4
+ %1 = load i32, i32* @k, align 4
%add = add nsw i32 %0, %1
ret i32 %add
}
diff --git a/test/CodeGen/X86/tls-pie.ll b/test/CodeGen/X86/tls-pie.ll
index d1e09c2442f7..10fe1e94bbdc 100644
--- a/test/CodeGen/X86/tls-pie.ll
+++ b/test/CodeGen/X86/tls-pie.ll
@@ -15,7 +15,7 @@ define i32 @f1() {
; X64-NEXT: ret
entry:
- %tmp1 = load i32* @i
+ %tmp1 = load i32, i32* @i
ret i32 %tmp1
}
@@ -49,7 +49,7 @@ define i32 @f3() {
; X64-NEXT: ret
entry:
- %tmp1 = load i32* @i2
+ %tmp1 = load i32, i32* @i2
ret i32 %tmp1
}
diff --git a/test/CodeGen/X86/tls.ll b/test/CodeGen/X86/tls.ll
index 75e7fc4f6bb3..0f3d3adec4c3 100644
--- a/test/CodeGen/X86/tls.ll
+++ b/test/CodeGen/X86/tls.ll
@@ -12,6 +12,7 @@
@i5 = external hidden thread_local global i32
@s1 = thread_local global i16 15
@b1 = thread_local global i8 0
+@b2 = thread_local(localexec) global i8 0
define i32 @f1() {
; X32_LINUX-LABEL: f1:
@@ -40,7 +41,7 @@ define i32 @f1() {
; MINGW32-NEXT: retl
entry:
- %tmp1 = load i32* @i1
+ %tmp1 = load i32, i32* @i1
ret i32 %tmp1
}
@@ -105,7 +106,7 @@ define i32 @f3() nounwind {
; MINGW32-NEXT: retl
entry:
- %tmp1 = load i32* @i2
+ %tmp1 = load i32, i32* @i2
ret i32 %tmp1
}
@@ -168,7 +169,7 @@ define i32 @f5() nounwind {
; MINGW32-NEXT: retl
entry:
- %tmp1 = load i32* @i3
+ %tmp1 = load i32, i32* @i3
ret i32 %tmp1
}
@@ -219,7 +220,7 @@ define i32 @f7() {
; MINGW32-NEXT: retl
entry:
- %tmp1 = load i32* @i4
+ %tmp1 = load i32, i32* @i4
ret i32 %tmp1
}
@@ -258,7 +259,7 @@ define i32 @f9() {
; MINGW32-NEXT: retl
entry:
- %tmp1 = load i32* @i5
+ %tmp1 = load i32, i32* @i5
ret i32 %tmp1
}
@@ -309,7 +310,7 @@ define i16 @f11() {
; MINGW32: retl
entry:
- %tmp1 = load i16* @s1
+ %tmp1 = load i16, i16* @s1
ret i16 %tmp1
}
@@ -341,7 +342,7 @@ define i32 @f12() {
entry:
- %tmp1 = load i16* @s1
+ %tmp1 = load i16, i16* @s1
%tmp2 = sext i16 %tmp1 to i32
ret i32 %tmp2
}
@@ -373,7 +374,7 @@ define i8 @f13() {
; MINGW32-NEXT: retl
entry:
- %tmp1 = load i8* @b1
+ %tmp1 = load i8, i8* @b1
ret i8 %tmp1
}
@@ -404,8 +405,36 @@ define i32 @f14() {
; MINGW32-NEXT: retl
entry:
- %tmp1 = load i8* @b1
+ %tmp1 = load i8, i8* @b1
%tmp2 = sext i8 %tmp1 to i32
ret i32 %tmp2
}
+define i8* @f15() {
+; X32_LINUX-LABEL: f15:
+; X32_LINUX: movl %gs:0, %eax
+; X32_LINUX-NEXT: leal b2@NTPOFF(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX-LABEL: f15:
+; X64_LINUX: movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq b2@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
+; X32_WIN-LABEL: f15:
+; X32_WIN: movl %fs:__tls_array, %eax
+; X32_WIN-NEXT: movl (%eax), %eax
+; X32_WIN-NEXT: leal _b2@SECREL32(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN-LABEL: f15:
+; X64_WIN: movq %gs:88, %rax
+; X64_WIN-NEXT: movq (%rax), %rax
+; X64_WIN-NEXT: leaq b2@SECREL32(%rax), %rax
+; X64_WIN-NEXT: ret
+; MINGW32-LABEL: f15:
+; MINGW32: movl %fs:44, %eax
+; MINGW32-NEXT: movl (%eax), %eax
+; MINGW32-NEXT: leal _b2@SECREL32(%eax), %eax
+; MINGW32-NEXT: ret
+entry:
+ ret i8* @b2
+}
+
diff --git a/test/CodeGen/X86/tlv-1.ll b/test/CodeGen/X86/tlv-1.ll
index 66e2f819ee24..5f017d31dbbf 100644
--- a/test/CodeGen/X86/tlv-1.ll
+++ b/test/CodeGen/X86/tlv-1.ll
@@ -7,7 +7,7 @@
define void @main() nounwind ssp {
; CHECK-LABEL: main:
entry:
- call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds (%struct.A* @c, i32 0, i32 0, i32 0), i8 0, i64 60, i32 1, i1 false)
+ call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds (%struct.A, %struct.A* @c, i32 0, i32 0, i32 0), i8 0, i64 60, i32 1, i1 false)
unreachable
; CHECK: movq _c@TLVP(%rip), %rdi
; CHECK-NEXT: callq *(%rdi)
@@ -25,8 +25,8 @@ entry:
; CHECK: movq _b@TLVP(%rip),
; CHECK: callq *
; CHECK: subl (%rax), [[REGISTER]]
- %0 = load i32* @a, align 4
- %1 = load i32* @b, align 4
+ %0 = load i32, i32* @a, align 4
+ %1 = load i32, i32* @b, align 4
%sub = sub nsw i32 %0, %1
ret i32 %sub
}
diff --git a/test/CodeGen/X86/trap.ll b/test/CodeGen/X86/trap.ll
index 149c667c8cb7..ca33f9e6b4e1 100644
--- a/test/CodeGen/X86/trap.ll
+++ b/test/CodeGen/X86/trap.ll
@@ -1,15 +1,25 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -mtriple=i686-apple-darwin8 -mcpu=yonah | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=i686-unknown-linux -mcpu=yonah | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=x86_64-scei-ps4 | FileCheck %s -check-prefix=PS4
-; CHECK-LABEL: test0:
-; CHECK: ud2
+; DARWIN-LABEL: test0:
+; DARWIN: ud2
+; LINUX-LABEL: test0:
+; LINUX: ud2
+; PS4-LABEL: test0:
+; PS4: ud2
define i32 @test0() noreturn nounwind {
entry:
tail call void @llvm.trap( )
unreachable
}
-; CHECK-LABEL: test1:
-; CHECK: int3
+; DARWIN-LABEL: test1:
+; DARWIN: int3
+; LINUX-LABEL: test1:
+; LINUX: int3
+; PS4-LABEL: test1:
+; PS4: int $65
define i32 @test1() noreturn nounwind {
entry:
tail call void @llvm.debugtrap( )
diff --git a/test/CodeGen/X86/trunc-ext-ld-st.ll b/test/CodeGen/X86/trunc-ext-ld-st.ll
index 8de6297906c7..8c60697d8032 100644
--- a/test/CodeGen/X86/trunc-ext-ld-st.ll
+++ b/test/CodeGen/X86/trunc-ext-ld-st.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse4.1 | FileCheck %s
;CHECK-LABEL: load_2_i8:
; A single 16-bit load
@@ -10,7 +10,7 @@
;CHECK: ret
define void @load_2_i8(<2 x i8>* %A) {
- %T = load <2 x i8>* %A
+ %T = load <2 x i8>, <2 x i8>* %A
%G = add <2 x i8> %T, <i8 9, i8 7>
store <2 x i8> %G, <2 x i8>* %A
ret void
@@ -24,7 +24,7 @@ define void @load_2_i8(<2 x i8>* %A) {
;CHECK: movd
;CHECK: ret
define void @load_2_i16(<2 x i16>* %A) {
- %T = load <2 x i16>* %A
+ %T = load <2 x i16>, <2 x i16>* %A
%G = add <2 x i16> %T, <i16 9, i16 7>
store <2 x i16> %G, <2 x i16>* %A
ret void
@@ -36,7 +36,7 @@ define void @load_2_i16(<2 x i16>* %A) {
;CHECK: pshufd
;CHECK: ret
define void @load_2_i32(<2 x i32>* %A) {
- %T = load <2 x i32>* %A
+ %T = load <2 x i32>, <2 x i32>* %A
%G = add <2 x i32> %T, <i32 9, i32 7>
store <2 x i32> %G, <2 x i32>* %A
ret void
@@ -48,7 +48,7 @@ define void @load_2_i32(<2 x i32>* %A) {
;CHECK: pshufb
;CHECK: ret
define void @load_4_i8(<4 x i8>* %A) {
- %T = load <4 x i8>* %A
+ %T = load <4 x i8>, <4 x i8>* %A
%G = add <4 x i8> %T, <i8 1, i8 4, i8 9, i8 7>
store <4 x i8> %G, <4 x i8>* %A
ret void
@@ -60,7 +60,7 @@ define void @load_4_i8(<4 x i8>* %A) {
;CHECK: pshufb
;CHECK: ret
define void @load_4_i16(<4 x i16>* %A) {
- %T = load <4 x i16>* %A
+ %T = load <4 x i16>, <4 x i16>* %A
%G = add <4 x i16> %T, <i16 1, i16 4, i16 9, i16 7>
store <4 x i16> %G, <4 x i16>* %A
ret void
@@ -72,7 +72,7 @@ define void @load_4_i16(<4 x i16>* %A) {
;CHECK: pshufb
;CHECK: ret
define void @load_8_i8(<8 x i8>* %A) {
- %T = load <8 x i8>* %A
+ %T = load <8 x i8>, <8 x i8>* %A
%G = add <8 x i8> %T, %T
store <8 x i8> %G, <8 x i8>* %A
ret void
diff --git a/test/CodeGen/X86/trunc-to-bool.ll b/test/CodeGen/X86/trunc-to-bool.ll
index 0ed634774ab3..3dd98eea7fa9 100644
--- a/test/CodeGen/X86/trunc-to-bool.ll
+++ b/test/CodeGen/X86/trunc-to-bool.ll
@@ -25,7 +25,7 @@ ret_false:
; CHECK: btl
define i32 @test3(i8* %ptr) nounwind {
- %val = load i8* %ptr
+ %val = load i8, i8* %ptr
%tmp = trunc i8 %val to i1
br i1 %tmp, label %cond_true, label %cond_false
cond_true:
diff --git a/test/CodeGen/X86/twoaddr-coalesce-3.ll b/test/CodeGen/X86/twoaddr-coalesce-3.ll
new file mode 100644
index 000000000000..33c9d46f13c3
--- /dev/null
+++ b/test/CodeGen/X86/twoaddr-coalesce-3.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -march=x86-64 -relocation-model=pic | FileCheck %s
+; This test is to ensure the TwoAddrInstruction pass chooses the proper operands to
+; merge and generates fewer mov insns.
+
+@M = common global i32 0, align 4
+@total = common global i32 0, align 4
+@g = common global i32 0, align 4
+
+; Function Attrs: nounwind uwtable
+define void @foo() {
+entry:
+ %0 = load i32, i32* @M, align 4
+ %cmp3 = icmp sgt i32 %0, 0
+ br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry
+ %total.promoted = load i32, i32* @total, align 4
+ br label %for.body
+
+; Check that only one mov will be generated in the kernel loop.
+; CHECK-LABEL: foo:
+; CHECK: [[LOOP1:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body
+; CHECK-NOT: mov
+; CHECK: movl {{.*}}, [[REG1:%[a-z0-9]+]]
+; CHECK-NOT: mov
+; CHECK: shrl $31, [[REG1]]
+; CHECK-NOT: mov
+; CHECK: jl [[LOOP1]]
+for.body: ; preds = %for.body.lr.ph, %for.body
+ %add5 = phi i32 [ %total.promoted, %for.body.lr.ph ], [ %add, %for.body ]
+ %i.04 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+ %div = sdiv i32 %i.04, 2
+ %add = add nsw i32 %div, %add5
+ %inc = add nuw nsw i32 %i.04, 1
+ %cmp = icmp slt i32 %inc, %0
+ br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge: ; preds = %for.body
+ store i32 %add, i32* @total, align 4
+ br label %for.end
+
+for.end: ; preds = %for.cond.for.end_crit_edge, %entry
+ ret void
+}
+
+; Function Attrs: nounwind uwtable
+define void @goo() {
+entry:
+ %0 = load i32, i32* @M, align 4
+ %cmp3 = icmp sgt i32 %0, 0
+ br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry
+ %total.promoted = load i32, i32* @total, align 4
+ br label %for.body
+
+; Check that only two mov will be generated in the kernel loop.
+; CHECK-LABEL: goo:
+; CHECK: [[LOOP2:^[a-zA-Z0-9_.]+]]: {{#.*}} %for.body
+; CHECK-NOT: mov
+; CHECK: movl {{.*}}, [[REG2:%[a-z0-9]+]]
+; CHECK-NOT: mov
+; CHECK: shrl $31, [[REG2]]
+; CHECK-NOT: mov
+; CHECK: movl {{.*}}
+; CHECK: jl [[LOOP2]]
+for.body: ; preds = %for.body.lr.ph, %for.body
+ %add5 = phi i32 [ %total.promoted, %for.body.lr.ph ], [ %add, %for.body ]
+ %i.04 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+ %div = sdiv i32 %i.04, 2
+ %add = add nsw i32 %div, %add5
+ store volatile i32 %add, i32* @g, align 4
+ %inc = add nuw nsw i32 %i.04, 1
+ %cmp = icmp slt i32 %inc, %0
+ br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge: ; preds = %for.body
+ store i32 %add, i32* @total, align 4
+ br label %for.end
+
+for.end: ; preds = %for.cond.for.end_crit_edge, %entry
+ ret void
+}
+
diff --git a/test/CodeGen/X86/twoaddr-coalesce.ll b/test/CodeGen/X86/twoaddr-coalesce.ll
index 6f6d6f2cd967..38685ec27c02 100644
--- a/test/CodeGen/X86/twoaddr-coalesce.ll
+++ b/test/CodeGen/X86/twoaddr-coalesce.ll
@@ -12,7 +12,7 @@ bb1: ; preds = %bb1, %bb1.thread
%0 = trunc i32 %i.0.reg2mem.0 to i8 ; <i8> [#uses=1]
%1 = sdiv i8 %0, 2 ; <i8> [#uses=1]
%2 = sext i8 %1 to i32 ; <i32> [#uses=1]
- %3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
+ %3 = tail call i32 (i8*, ...) @printf(i8* getelementptr ([4 x i8], [4 x i8]* @"\01LC", i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
%indvar.next = add i32 %i.0.reg2mem.0, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next, 258 ; <i1> [#uses=1]
br i1 %exitcond, label %bb2, label %bb1
diff --git a/test/CodeGen/X86/twoaddr-pass-sink.ll b/test/CodeGen/X86/twoaddr-pass-sink.ll
index 9ca280627afe..9a98e4794f9e 100644
--- a/test/CodeGen/X86/twoaddr-pass-sink.ll
+++ b/test/CodeGen/X86/twoaddr-pass-sink.ll
@@ -8,20 +8,20 @@ entry:
bb: ; preds = %bb, %entry
%skiplist_addr.0.rec = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
%vYp_addr.0.rec = shl i32 %skiplist_addr.0.rec, 3 ; <i32> [#uses=3]
- %vDct_addr.0 = getelementptr <2 x i64>* %vDct, i32 %vYp_addr.0.rec ; <<2 x i64>*> [#uses=1]
- %vYp_addr.0 = getelementptr <2 x i64>* %vYp, i32 %vYp_addr.0.rec ; <<2 x i64>*> [#uses=1]
- %skiplist_addr.0 = getelementptr i8* %skiplist, i32 %skiplist_addr.0.rec ; <i8*> [#uses=1]
+ %vDct_addr.0 = getelementptr <2 x i64>, <2 x i64>* %vDct, i32 %vYp_addr.0.rec ; <<2 x i64>*> [#uses=1]
+ %vYp_addr.0 = getelementptr <2 x i64>, <2 x i64>* %vYp, i32 %vYp_addr.0.rec ; <<2 x i64>*> [#uses=1]
+ %skiplist_addr.0 = getelementptr i8, i8* %skiplist, i32 %skiplist_addr.0.rec ; <i8*> [#uses=1]
%vDct_addr.0.sum43 = or i32 %vYp_addr.0.rec, 1 ; <i32> [#uses=1]
- %tmp7 = getelementptr <2 x i64>* %vDct, i32 %vDct_addr.0.sum43 ; <<2 x i64>*> [#uses=1]
- %tmp8 = load <2 x i64>* %tmp7, align 16 ; <<2 x i64>> [#uses=1]
- %tmp11 = load <2 x i64>* %vDct_addr.0, align 16 ; <<2 x i64>> [#uses=1]
+ %tmp7 = getelementptr <2 x i64>, <2 x i64>* %vDct, i32 %vDct_addr.0.sum43 ; <<2 x i64>*> [#uses=1]
+ %tmp8 = load <2 x i64>, <2 x i64>* %tmp7, align 16 ; <<2 x i64>> [#uses=1]
+ %tmp11 = load <2 x i64>, <2 x i64>* %vDct_addr.0, align 16 ; <<2 x i64>> [#uses=1]
%tmp13 = bitcast <2 x i64> %tmp8 to <8 x i16> ; <<8 x i16>> [#uses=1]
%tmp15 = bitcast <2 x i64> %tmp11 to <8 x i16> ; <<8 x i16>> [#uses=1]
%tmp16 = shufflevector <8 x i16> %tmp15, <8 x i16> %tmp13, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x i16>> [#uses=1]
%tmp26 = mul <8 x i16> %tmp25, %tmp16 ; <<8 x i16>> [#uses=1]
%tmp27 = bitcast <8 x i16> %tmp26 to <2 x i64> ; <<2 x i64>> [#uses=1]
store <2 x i64> %tmp27, <2 x i64>* %vYp_addr.0, align 16
- %tmp37 = load i8* %skiplist_addr.0, align 1 ; <i8> [#uses=1]
+ %tmp37 = load i8, i8* %skiplist_addr.0, align 1 ; <i8> [#uses=1]
%tmp38 = icmp eq i8 %tmp37, 0 ; <i1> [#uses=1]
%indvar.next = add i32 %skiplist_addr.0.rec, 1 ; <i32> [#uses=1]
br i1 %tmp38, label %return, label %bb
diff --git a/test/CodeGen/X86/uint64-to-float.ll b/test/CodeGen/X86/uint64-to-float.ll
index ca764e7568f3..a1074a6d6989 100644
--- a/test/CodeGen/X86/uint64-to-float.ll
+++ b/test/CodeGen/X86/uint64-to-float.ll
@@ -6,13 +6,13 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.0.0"
-; CHECK: testq %rdi, %rdi
+; CHECK: andl
+; CHECK-NEXT: testq %rdi, %rdi
; CHECK-NEXT: js LBB0_1
; CHECK: cvtsi2ss
; CHECK-NEXT: ret
; CHECK: LBB0_1
; CHECK: shrq
-; CHECK-NEXT: andq
; CHECK-NEXT: orq
; CHECK-NEXT: cvtsi2ss
define float @test(i64 %a) {
diff --git a/test/CodeGen/X86/uint_to_fp-2.ll b/test/CodeGen/X86/uint_to_fp-2.ll
index e47f15453ed4..4b594f7c62ab 100644
--- a/test/CodeGen/X86/uint_to_fp-2.ll
+++ b/test/CodeGen/X86/uint_to_fp-2.ll
@@ -7,7 +7,7 @@ define float @test1(i32 %x) nounwind readnone {
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: movsd .LCPI0_0, %xmm0
; CHECK-NEXT: movd {{[0-9]+}}(%esp), %xmm1
-; CHECK-NEXT: orps %xmm0, %xmm1
+; CHECK-NEXT: orpd %xmm0, %xmm1
; CHECK-NEXT: subsd %xmm0, %xmm1
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: cvtsd2ss %xmm1, %xmm0
diff --git a/test/CodeGen/X86/umul-with-carry.ll b/test/CodeGen/X86/umul-with-carry.ll
index 56fdadbf937b..6435760e88a4 100644
--- a/test/CodeGen/X86/umul-with-carry.ll
+++ b/test/CodeGen/X86/umul-with-carry.ll
@@ -14,11 +14,11 @@ entry:
br i1 %obit, label %carry, label %normal
normal:
- %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+ %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
ret i1 true
carry:
- %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+ %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
ret i1 false
}
diff --git a/test/CodeGen/X86/unaligned-32-byte-memops.ll b/test/CodeGen/X86/unaligned-32-byte-memops.ll
index 347f330d67ae..b337a80b84b3 100644
--- a/test/CodeGen/X86/unaligned-32-byte-memops.ll
+++ b/test/CodeGen/X86/unaligned-32-byte-memops.ll
@@ -20,7 +20,7 @@ define <8 x float> @load32bytes(<8 x float>* %Ap) {
; HASWELL: vmovups
; HASWELL: retq
- %A = load <8 x float>* %Ap, align 16
+ %A = load <8 x float>, <8 x float>* %Ap, align 16
ret <8 x float> %A
}
@@ -48,56 +48,6 @@ define void @store32bytes(<8 x float> %A, <8 x float>* %P) {
; Merge two consecutive 16-byte subvector loads into a single 32-byte load
; if it's faster.
-declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8)
-
-; Use the vinsertf128 intrinsic to model source code
-; that explicitly uses AVX intrinsics.
-define <8 x float> @combine_16_byte_loads(<4 x float>* %ptr) {
- ; CHECK-LABEL: combine_16_byte_loads
-
- ; SANDYB: vmovups
- ; SANDYB-NEXT: vinsertf128
- ; SANDYB-NEXT: retq
-
- ; BTVER2: vmovups
- ; BTVER2-NEXT: retq
-
- ; HASWELL: vmovups
- ; HASWELL-NEXT: retq
-
- %ptr2 = getelementptr inbounds <4 x float>* %ptr, i64 1
- %v1 = load <4 x float>* %ptr, align 1
- %v2 = load <4 x float>* %ptr2, align 1
- %shuffle = shufflevector <4 x float> %v1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
- %v3 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %shuffle, <4 x float> %v2, i8 1)
- ret <8 x float> %v3
-}
-
-; Swap the operands of the shufflevector and vinsertf128 to ensure that the
-; pattern still matches.
-define <8 x float> @combine_16_byte_loads_swap(<4 x float>* %ptr) {
- ; CHECK-LABEL: combine_16_byte_loads_swap
-
- ; SANDYB: vmovups
- ; SANDYB-NEXT: vinsertf128
- ; SANDYB-NEXT: retq
-
- ; BTVER2: vmovups
- ; BTVER2-NEXT: retq
-
- ; HASWELL: vmovups
- ; HASWELL-NEXT: retq
-
- %ptr2 = getelementptr inbounds <4 x float>* %ptr, i64 1
- %v1 = load <4 x float>* %ptr, align 1
- %v2 = load <4 x float>* %ptr2, align 1
- %shuffle = shufflevector <4 x float> %v2, <4 x float> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
- %v3 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %shuffle, <4 x float> %v1, i8 0)
- ret <8 x float> %v3
-}
-
-; Replace the vinsertf128 intrinsic with a shufflevector as might be
-; expected from auto-vectorized code.
define <8 x float> @combine_16_byte_loads_no_intrinsic(<4 x float>* %ptr) {
; CHECK-LABEL: combine_16_byte_loads_no_intrinsic
@@ -111,9 +61,10 @@ define <8 x float> @combine_16_byte_loads_no_intrinsic(<4 x float>* %ptr) {
; HASWELL: vmovups
; HASWELL-NEXT: retq
- %ptr2 = getelementptr inbounds <4 x float>* %ptr, i64 1
- %v1 = load <4 x float>* %ptr, align 1
- %v2 = load <4 x float>* %ptr2, align 1
+ %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 3
+ %ptr2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 4
+ %v1 = load <4 x float>, <4 x float>* %ptr1, align 1
+ %v2 = load <4 x float>, <4 x float>* %ptr2, align 1
%v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x float> %v3
}
@@ -133,9 +84,10 @@ define <8 x float> @combine_16_byte_loads_no_intrinsic_swap(<4 x float>* %ptr) {
; HASWELL: vmovups
; HASWELL-NEXT: retq
- %ptr2 = getelementptr inbounds <4 x float>* %ptr, i64 1
- %v1 = load <4 x float>* %ptr, align 1
- %v2 = load <4 x float>* %ptr2, align 1
+ %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 4
+ %ptr2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 5
+ %v1 = load <4 x float>, <4 x float>* %ptr1, align 1
+ %v2 = load <4 x float>, <4 x float>* %ptr2, align 1
%v3 = shufflevector <4 x float> %v2, <4 x float> %v1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
ret <8 x float> %v3
}
@@ -160,13 +112,14 @@ define <4 x i64> @combine_16_byte_loads_i64(<2 x i64>* %ptr, <4 x i64> %x) {
; BTVER2-NEXT: vinsertf128
; BTVER2-NEXT: retq
- ; HASWELL: vmovdqu
- ; HASWELL-NEXT: vpaddq
+ ; HASWELL-NOT: vextract
+ ; HASWELL: vpaddq
; HASWELL-NEXT: retq
- %ptr2 = getelementptr inbounds <2 x i64>* %ptr, i64 1
- %v1 = load <2 x i64>* %ptr, align 1
- %v2 = load <2 x i64>* %ptr2, align 1
+ %ptr1 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 5
+ %ptr2 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 6
+ %v1 = load <2 x i64>, <2 x i64>* %ptr1, align 1
+ %v2 = load <2 x i64>, <2 x i64>* %ptr2, align 1
%v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%v4 = add <4 x i64> %v3, %x
ret <4 x i64> %v4
@@ -187,13 +140,14 @@ define <8 x i32> @combine_16_byte_loads_i32(<4 x i32>* %ptr, <8 x i32> %x) {
; BTVER2-NEXT: vinsertf128
; BTVER2-NEXT: retq
- ; HASWELL: vmovdqu
- ; HASWELL-NEXT: vpaddd
+ ; HASWELL-NOT: vextract
+ ; HASWELL: vpaddd
; HASWELL-NEXT: retq
- %ptr2 = getelementptr inbounds <4 x i32>* %ptr, i64 1
- %v1 = load <4 x i32>* %ptr, align 1
- %v2 = load <4 x i32>* %ptr2, align 1
+ %ptr1 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 6
+ %ptr2 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 7
+ %v1 = load <4 x i32>, <4 x i32>* %ptr1, align 1
+ %v2 = load <4 x i32>, <4 x i32>* %ptr2, align 1
%v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%v4 = add <8 x i32> %v3, %x
ret <8 x i32> %v4
@@ -214,13 +168,14 @@ define <16 x i16> @combine_16_byte_loads_i16(<8 x i16>* %ptr, <16 x i16> %x) {
; BTVER2-NEXT: vinsertf128
; BTVER2-NEXT: retq
- ; HASWELL: vmovdqu
- ; HASWELL-NEXT: vpaddw
+ ; HASWELL-NOT: vextract
+ ; HASWELL: vpaddw
; HASWELL-NEXT: retq
- %ptr2 = getelementptr inbounds <8 x i16>* %ptr, i64 1
- %v1 = load <8 x i16>* %ptr, align 1
- %v2 = load <8 x i16>* %ptr2, align 1
+ %ptr1 = getelementptr inbounds <8 x i16>, <8 x i16>* %ptr, i64 7
+ %ptr2 = getelementptr inbounds <8 x i16>, <8 x i16>* %ptr, i64 8
+ %v1 = load <8 x i16>, <8 x i16>* %ptr1, align 1
+ %v2 = load <8 x i16>, <8 x i16>* %ptr2, align 1
%v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%v4 = add <16 x i16> %v3, %x
ret <16 x i16> %v4
@@ -241,13 +196,14 @@ define <32 x i8> @combine_16_byte_loads_i8(<16 x i8>* %ptr, <32 x i8> %x) {
; BTVER2-NEXT: vinsertf128
; BTVER2-NEXT: retq
- ; HASWELL: vmovdqu
- ; HASWELL-NEXT: vpaddb
+ ; HASWELL-NOT: vextract
+ ; HASWELL: vpaddb
; HASWELL-NEXT: retq
- %ptr2 = getelementptr inbounds <16 x i8>* %ptr, i64 1
- %v1 = load <16 x i8>* %ptr, align 1
- %v2 = load <16 x i8>* %ptr2, align 1
+ %ptr1 = getelementptr inbounds <16 x i8>, <16 x i8>* %ptr, i64 8
+ %ptr2 = getelementptr inbounds <16 x i8>, <16 x i8>* %ptr, i64 9
+ %v1 = load <16 x i8>, <16 x i8>* %ptr1, align 1
+ %v2 = load <16 x i8>, <16 x i8>* %ptr2, align 1
%v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%v4 = add <32 x i8> %v3, %x
ret <32 x i8> %v4
@@ -261,17 +217,18 @@ define <4 x double> @combine_16_byte_loads_double(<2 x double>* %ptr, <4 x doubl
; SANDYB-NEXT: vaddpd
; SANDYB-NEXT: retq
- ; BTVER2: vmovupd
- ; BTVER2-NEXT: vaddpd
+ ; BTVER2-NOT: vinsertf128
+ ; BTVER2: vaddpd
; BTVER2-NEXT: retq
- ; HASWELL: vmovupd
+ ; HASWELL-NOT: vinsertf128
; HASWELL: vaddpd
; HASWELL-NEXT: retq
- %ptr2 = getelementptr inbounds <2 x double>* %ptr, i64 1
- %v1 = load <2 x double>* %ptr, align 1
- %v2 = load <2 x double>* %ptr2, align 1
+ %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 9
+ %ptr2 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 10
+ %v1 = load <2 x double>, <2 x double>* %ptr1, align 1
+ %v2 = load <2 x double>, <2 x double>* %ptr2, align 1
%v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%v4 = fadd <4 x double> %v3, %x
ret <4 x double> %v4
diff --git a/test/CodeGen/X86/unaligned-load.ll b/test/CodeGen/X86/unaligned-load.ll
index d8fffbec4f0f..ffbbcff2e5d6 100644
--- a/test/CodeGen/X86/unaligned-load.ll
+++ b/test/CodeGen/X86/unaligned-load.ll
@@ -11,8 +11,8 @@ entry:
br label %bb
bb: ; preds = %bb, %entry
- %String2Loc9 = getelementptr inbounds [31 x i8]* %String2Loc, i64 0, i64 0
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1, i1 false)
+ %String2Loc9 = getelementptr inbounds [31 x i8], [31 x i8]* %String2Loc, i64 0, i64 0
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1, i1 false)
br label %bb
return: ; No predecessors!
diff --git a/test/CodeGen/X86/unaligned-spill-folding.ll b/test/CodeGen/X86/unaligned-spill-folding.ll
index 154ce9e324d6..33e2daf9dc1b 100644
--- a/test/CodeGen/X86/unaligned-spill-folding.ll
+++ b/test/CodeGen/X86/unaligned-spill-folding.ll
@@ -11,9 +11,9 @@ vector.ph:
vector.body:
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %0 = getelementptr inbounds [32 x i32]* @arr, i32 0, i32 %index
+ %0 = getelementptr inbounds [32 x i32], [32 x i32]* @arr, i32 0, i32 %index
%1 = bitcast i32* %0 to <4 x i32>*
- %wide.load = load <4 x i32>* %1, align 16
+ %wide.load = load <4 x i32>, <4 x i32>* %1, align 16
%2 = add nsw <4 x i32> %wide.load, <i32 10, i32 10, i32 10, i32 10>
%3 = xor <4 x i32> %2, <i32 123345, i32 123345, i32 123345, i32 123345>
%4 = add nsw <4 x i32> %3, <i32 112, i32 112, i32 112, i32 112>
diff --git a/test/CodeGen/X86/unknown-location.ll b/test/CodeGen/X86/unknown-location.ll
index 140121ba3035..c018a49d135e 100644
--- a/test/CodeGen/X86/unknown-location.ll
+++ b/test/CodeGen/X86/unknown-location.ll
@@ -8,7 +8,7 @@
; CHECK-NEXT: .loc 1 0 0
; CHECK: cltd
; CHECK-NEXT: idivl
-; CHECK-NEXT: .loc 2 4 3
+; CHECK-NEXT: .loc 1 4 3
define i32 @foo(i32 %w, i32 %x, i32 %y, i32 %z) nounwind {
entry:
@@ -21,16 +21,16 @@ entry:
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!12}
-!0 = !{!"0x101\00x\001\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
-!1 = !{!"0x2e\00foo\00foo\00foo\001\000\001\000\006\000\000\001", !10, !2, !4, null, i32 (i32, i32, i32, i32)* @foo, null, null, null} ; [ DW_TAG_subprogram ]
-!2 = !{!"0x29", !10} ; [ DW_TAG_file_type ]
-!3 = !{!"0x11\0012\00producer\000\00\000\00\000", !10, !11, !11, !9, null, null} ; [ DW_TAG_compile_unit ]
-!4 = !{!"0x15\00\000\000\000\000\000\000", !10, !2, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 1, arg: 0, scope: !1, file: !2, type: !6)
+!1 = !DISubprogram(name: "foo", linkageName: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !10, scope: !2, type: !4, function: i32 (i32, i32, i32, i32)* @foo)
+!2 = !DIFile(filename: "test.c", directory: "/dir")
+!3 = !DICompileUnit(language: DW_LANG_C99, producer: "producer", isOptimized: false, emissionKind: 0, file: !10, enums: !11, retainedTypes: !11, subprograms: !9)
+!4 = !DISubroutineType(types: !5)
!5 = !{!6}
-!6 = !{!"0x24\00int\000\0032\0032\000\000\005", !10, !2} ; [ DW_TAG_base_type ]
-!7 = !{!"0xb\001\0030\000", !2, !1} ; [ DW_TAG_lexical_block ]
-!8 = !MDLocation(line: 4, column: 3, scope: !7)
+!6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!7 = distinct !DILexicalBlock(line: 1, column: 30, file: !10, scope: !1)
+!8 = !DILocation(line: 4, column: 3, scope: !7)
!9 = !{!1}
-!10 = !{!"test.c", !"/dir"}
-!11 = !{i32 0}
-!12 = !{i32 1, !"Debug Info Version", i32 2}
+!10 = !DIFile(filename: "test.c", directory: "/dir")
+!11 = !{}
+!12 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/unwindraise.ll b/test/CodeGen/X86/unwindraise.ll
index 9bbe98043fd8..fb8319b63c2c 100644
--- a/test/CodeGen/X86/unwindraise.ll
+++ b/test/CodeGen/X86/unwindraise.ll
@@ -35,11 +35,11 @@ entry:
%2 = bitcast %struct._Unwind_Context* %cur_context to i8*
%3 = bitcast %struct._Unwind_Context* %this_context to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 240, i32 8, i1 false)
- %personality = getelementptr inbounds %struct._Unwind_FrameState* %fs, i64 0, i32 6
- %retaddr_column.i = getelementptr inbounds %struct._Unwind_FrameState* %fs, i64 0, i32 9
- %flags.i.i.i.i = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 5
- %ra.i = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 2
- %exception_class = getelementptr inbounds %struct._Unwind_Exception* %exc, i64 0, i32 0
+ %personality = getelementptr inbounds %struct._Unwind_FrameState, %struct._Unwind_FrameState* %fs, i64 0, i32 6
+ %retaddr_column.i = getelementptr inbounds %struct._Unwind_FrameState, %struct._Unwind_FrameState* %fs, i64 0, i32 9
+ %flags.i.i.i.i = getelementptr inbounds %struct._Unwind_Context, %struct._Unwind_Context* %cur_context, i64 0, i32 5
+ %ra.i = getelementptr inbounds %struct._Unwind_Context, %struct._Unwind_Context* %cur_context, i64 0, i32 2
+ %exception_class = getelementptr inbounds %struct._Unwind_Exception, %struct._Unwind_Exception* %exc, i64 0, i32 0
br label %while.body
while.body: ; preds = %uw_update_context.exit, %entry
@@ -50,12 +50,12 @@ while.body: ; preds = %uw_update_context.e
]
if.end3: ; preds = %while.body
- %4 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality, align 8
+ %4 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)*, i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality, align 8
%tobool = icmp eq i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)* %4, null
br i1 %tobool, label %if.end13, label %if.then4
if.then4: ; preds = %if.end3
- %5 = load i64* %exception_class, align 8
+ %5 = load i64, i64* %exception_class, align 8
%call6 = call i32 %4(i32 1, i32 1, i64 %5, %struct._Unwind_Exception* %exc, %struct._Unwind_Context* %cur_context)
switch i32 %call6, label %do.end21.loopexit46 [
i32 6, label %while.end
@@ -64,7 +64,7 @@ if.then4: ; preds = %if.end3
if.end13: ; preds = %if.then4, %if.end3
call fastcc void @uw_update_context_1(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs)
- %6 = load i64* %retaddr_column.i, align 8
+ %6 = load i64, i64* %retaddr_column.i, align 8
%conv.i = trunc i64 %6 to i32
%cmp.i.i.i = icmp slt i32 %conv.i, 18
br i1 %cmp.i.i.i, label %cond.end.i.i.i, label %cond.true.i.i.i
@@ -76,18 +76,18 @@ cond.true.i.i.i: ; preds = %if.end13
cond.end.i.i.i: ; preds = %if.end13
%sext.i = shl i64 %6, 32
%idxprom.i.i.i = ashr exact i64 %sext.i, 32
- %arrayidx.i.i.i = getelementptr inbounds [18 x i8]* @dwarf_reg_size_table, i64 0, i64 %idxprom.i.i.i
- %7 = load i8* %arrayidx.i.i.i, align 1
- %arrayidx2.i.i.i = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 0, i64 %idxprom.i.i.i
- %8 = load i8** %arrayidx2.i.i.i, align 8
- %9 = load i64* %flags.i.i.i.i, align 8
+ %arrayidx.i.i.i = getelementptr inbounds [18 x i8], [18 x i8]* @dwarf_reg_size_table, i64 0, i64 %idxprom.i.i.i
+ %7 = load i8, i8* %arrayidx.i.i.i, align 1
+ %arrayidx2.i.i.i = getelementptr inbounds %struct._Unwind_Context, %struct._Unwind_Context* %cur_context, i64 0, i32 0, i64 %idxprom.i.i.i
+ %8 = load i8*, i8** %arrayidx2.i.i.i, align 8
+ %9 = load i64, i64* %flags.i.i.i.i, align 8
%and.i.i.i.i = and i64 %9, 4611686018427387904
%tobool.i.i.i = icmp eq i64 %and.i.i.i.i, 0
br i1 %tobool.i.i.i, label %if.end.i.i.i, label %land.lhs.true.i.i.i
land.lhs.true.i.i.i: ; preds = %cond.end.i.i.i
- %arrayidx4.i.i.i = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 8, i64 %idxprom.i.i.i
- %10 = load i8* %arrayidx4.i.i.i, align 1
+ %arrayidx4.i.i.i = getelementptr inbounds %struct._Unwind_Context, %struct._Unwind_Context* %cur_context, i64 0, i32 8, i64 %idxprom.i.i.i
+ %10 = load i8, i8* %arrayidx4.i.i.i, align 1
%tobool6.i.i.i = icmp eq i8 %10, 0
br i1 %tobool6.i.i.i, label %if.end.i.i.i, label %if.then.i.i.i
@@ -101,7 +101,7 @@ if.end.i.i.i: ; preds = %land.lhs.true.i.i.i
if.then10.i.i.i: ; preds = %if.end.i.i.i
%12 = bitcast i8* %8 to i64*
- %13 = load i64* %12, align 8
+ %13 = load i64, i64* %12, align 8
br label %uw_update_context.exit
cond.true14.i.i.i: ; preds = %if.end.i.i.i
@@ -115,36 +115,36 @@ uw_update_context.exit: ; preds = %if.then10.i.i.i, %i
br label %while.body
while.end: ; preds = %if.then4
- %private_1 = getelementptr inbounds %struct._Unwind_Exception* %exc, i64 0, i32 2
+ %private_1 = getelementptr inbounds %struct._Unwind_Exception, %struct._Unwind_Exception* %exc, i64 0, i32 2
store i64 0, i64* %private_1, align 8
- %15 = load i8** %ra.i, align 8
+ %15 = load i8*, i8** %ra.i, align 8
%16 = ptrtoint i8* %15 to i64
- %private_2 = getelementptr inbounds %struct._Unwind_Exception* %exc, i64 0, i32 3
+ %private_2 = getelementptr inbounds %struct._Unwind_Exception, %struct._Unwind_Exception* %exc, i64 0, i32 3
store i64 %16, i64* %private_2, align 8
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 240, i32 8, i1 false)
%17 = bitcast %struct._Unwind_FrameState* %fs.i to i8*
call void @llvm.lifetime.start(i64 -1, i8* %17)
- %personality.i = getelementptr inbounds %struct._Unwind_FrameState* %fs.i, i64 0, i32 6
- %retaddr_column.i22 = getelementptr inbounds %struct._Unwind_FrameState* %fs.i, i64 0, i32 9
+ %personality.i = getelementptr inbounds %struct._Unwind_FrameState, %struct._Unwind_FrameState* %fs.i, i64 0, i32 6
+ %retaddr_column.i22 = getelementptr inbounds %struct._Unwind_FrameState, %struct._Unwind_FrameState* %fs.i, i64 0, i32 9
br label %while.body.i
while.body.i: ; preds = %uw_update_context.exit44, %while.end
%call.i = call fastcc i32 @uw_frame_state_for(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs.i)
- %18 = load i8** %ra.i, align 8
+ %18 = load i8*, i8** %ra.i, align 8
%19 = ptrtoint i8* %18 to i64
- %20 = load i64* %private_2, align 8
+ %20 = load i64, i64* %private_2, align 8
%cmp.i = icmp eq i64 %19, %20
%cmp2.i = icmp eq i32 %call.i, 0
br i1 %cmp2.i, label %if.end.i, label %do.end21
if.end.i: ; preds = %while.body.i
- %21 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality.i, align 8
+ %21 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)*, i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality.i, align 8
%tobool.i = icmp eq i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)* %21, null
br i1 %tobool.i, label %if.end12.i, label %if.then3.i
if.then3.i: ; preds = %if.end.i
%or.i = select i1 %cmp.i, i32 6, i32 2
- %22 = load i64* %exception_class, align 8
+ %22 = load i64, i64* %exception_class, align 8
%call5.i = call i32 %21(i32 1, i32 %or.i, i64 %22, %struct._Unwind_Exception* %exc, %struct._Unwind_Context* %cur_context)
switch i32 %call5.i, label %do.end21 [
i32 7, label %do.body19
@@ -160,7 +160,7 @@ cond.true.i: ; preds = %if.end12.i
cond.end.i: ; preds = %if.end12.i
call fastcc void @uw_update_context_1(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs.i)
- %23 = load i64* %retaddr_column.i22, align 8
+ %23 = load i64, i64* %retaddr_column.i22, align 8
%conv.i23 = trunc i64 %23 to i32
%cmp.i.i.i24 = icmp slt i32 %conv.i23, 18
br i1 %cmp.i.i.i24, label %cond.end.i.i.i33, label %cond.true.i.i.i25
@@ -172,18 +172,18 @@ cond.true.i.i.i25: ; preds = %cond.end.i
cond.end.i.i.i33: ; preds = %cond.end.i
%sext.i26 = shl i64 %23, 32
%idxprom.i.i.i27 = ashr exact i64 %sext.i26, 32
- %arrayidx.i.i.i28 = getelementptr inbounds [18 x i8]* @dwarf_reg_size_table, i64 0, i64 %idxprom.i.i.i27
- %24 = load i8* %arrayidx.i.i.i28, align 1
- %arrayidx2.i.i.i29 = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 0, i64 %idxprom.i.i.i27
- %25 = load i8** %arrayidx2.i.i.i29, align 8
- %26 = load i64* %flags.i.i.i.i, align 8
+ %arrayidx.i.i.i28 = getelementptr inbounds [18 x i8], [18 x i8]* @dwarf_reg_size_table, i64 0, i64 %idxprom.i.i.i27
+ %24 = load i8, i8* %arrayidx.i.i.i28, align 1
+ %arrayidx2.i.i.i29 = getelementptr inbounds %struct._Unwind_Context, %struct._Unwind_Context* %cur_context, i64 0, i32 0, i64 %idxprom.i.i.i27
+ %25 = load i8*, i8** %arrayidx2.i.i.i29, align 8
+ %26 = load i64, i64* %flags.i.i.i.i, align 8
%and.i.i.i.i31 = and i64 %26, 4611686018427387904
%tobool.i.i.i32 = icmp eq i64 %and.i.i.i.i31, 0
br i1 %tobool.i.i.i32, label %if.end.i.i.i39, label %land.lhs.true.i.i.i36
land.lhs.true.i.i.i36: ; preds = %cond.end.i.i.i33
- %arrayidx4.i.i.i34 = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 8, i64 %idxprom.i.i.i27
- %27 = load i8* %arrayidx4.i.i.i34, align 1
+ %arrayidx4.i.i.i34 = getelementptr inbounds %struct._Unwind_Context, %struct._Unwind_Context* %cur_context, i64 0, i32 8, i64 %idxprom.i.i.i27
+ %27 = load i8, i8* %arrayidx4.i.i.i34, align 1
%tobool6.i.i.i35 = icmp eq i8 %27, 0
br i1 %tobool6.i.i.i35, label %if.end.i.i.i39, label %if.then.i.i.i37
@@ -197,7 +197,7 @@ if.end.i.i.i39: ; preds = %land.lhs.true.i.i.i
if.then10.i.i.i40: ; preds = %if.end.i.i.i39
%29 = bitcast i8* %25 to i64*
- %30 = load i64* %29, align 8
+ %30 = load i64, i64* %29, align 8
br label %uw_update_context.exit44
cond.true14.i.i.i41: ; preds = %if.end.i.i.i39
@@ -213,7 +213,7 @@ uw_update_context.exit44: ; preds = %if.then10.i.i.i40,
do.body19: ; preds = %if.then3.i
call void @llvm.lifetime.end(i64 -1, i8* %17)
%call20 = call fastcc i64 @uw_install_context_1(%struct._Unwind_Context* %this_context, %struct._Unwind_Context* %cur_context)
- %32 = load i8** %ra.i, align 8
+ %32 = load i8*, i8** %ra.i, align 8
call void @llvm.eh.return.i64(i64 %call20, i8* %32)
unreachable
diff --git a/test/CodeGen/X86/use-add-flags.ll b/test/CodeGen/X86/use-add-flags.ll
index fd57f5ca8d2d..da0002cc2520 100644
--- a/test/CodeGen/X86/use-add-flags.ll
+++ b/test/CodeGen/X86/use-add-flags.ll
@@ -13,7 +13,7 @@
; CHECK: ret
define i32 @test1(i32* %x, i32 %y, i32 %a, i32 %b) nounwind {
- %tmp2 = load i32* %x, align 4 ; <i32> [#uses=1]
+ %tmp2 = load i32, i32* %x, align 4 ; <i32> [#uses=1]
%tmp4 = add i32 %tmp2, %y ; <i32> [#uses=1]
%tmp5 = icmp slt i32 %tmp4, 0 ; <i1> [#uses=1]
%tmp.0 = select i1 %tmp5, i32 %a, i32 %b ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/utf16-cfstrings.ll b/test/CodeGen/X86/utf16-cfstrings.ll
index c7ec3eb7abce..5f0e78fccc65 100644
--- a/test/CodeGen/X86/utf16-cfstrings.ll
+++ b/test/CodeGen/X86/utf16-cfstrings.ll
@@ -6,7 +6,7 @@
@__CFConstantStringClassReference = external global [0 x i32]
@.str = internal unnamed_addr constant [5 x i16] [i16 252, i16 98, i16 101, i16 114, i16 0], align 2
-@_unnamed_cfstring_ = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 2000, i8* bitcast ([5 x i16]* @.str to i8*), i64 4 }, section "__DATA,__cfstring"
+@_unnamed_cfstring_ = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32], [0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 2000, i8* bitcast ([5 x i16]* @.str to i8*), i64 4 }, section "__DATA,__cfstring"
; CHECK: .section __TEXT,__ustring
; CHECK-NEXT: .align 1
@@ -21,7 +21,7 @@ define i32 @main() uwtable ssp {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
- call void (%0*, ...)* @NSLog(%0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to %0*))
+ call void (%0*, ...) @NSLog(%0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to %0*))
ret i32 0
}
diff --git a/test/CodeGen/X86/v2f32.ll b/test/CodeGen/X86/v2f32.ll
index b9bd80f949ec..7beed52295ee 100644
--- a/test/CodeGen/X86/v2f32.ll
+++ b/test/CodeGen/X86/v2f32.ll
@@ -5,8 +5,7 @@
define void @test1(<2 x float> %Q, float *%P2) nounwind {
; X64-LABEL: test1:
; X64: # BB#0:
-; X64-NEXT: movaps %xmm0, %xmm1
-; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; X64-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; X64-NEXT: addss %xmm0, %xmm1
; X64-NEXT: movss %xmm1, (%rdi)
; X64-NEXT: retq
@@ -14,8 +13,7 @@ define void @test1(<2 x float> %Q, float *%P2) nounwind {
; X32-LABEL: test1:
; X32: # BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movaps %xmm0, %xmm1
-; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; X32-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; X32-NEXT: addss %xmm0, %xmm1
; X32-NEXT: movss %xmm1, (%eax)
; X32-NEXT: retl
diff --git a/test/CodeGen/X86/v4i32load-crash.ll b/test/CodeGen/X86/v4i32load-crash.ll
index 3e7f9e63c9a8..8d019bc43d6b 100644
--- a/test/CodeGen/X86/v4i32load-crash.ll
+++ b/test/CodeGen/X86/v4i32load-crash.ll
@@ -13,10 +13,10 @@
; Function Attrs: nounwind
define void @fn3(i32 %el) {
entry:
- %0 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 0)
- %1 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 1)
- %2 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 2)
- %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 3)
+ %0 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i32 0)
+ %1 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i32 1)
+ %2 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i32 2)
+ %3 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i32 3)
%4 = insertelement <4 x i32> undef, i32 %0, i32 0
%5 = insertelement <4 x i32> %4, i32 %1, i32 1
%6 = insertelement <4 x i32> %5, i32 %2, i32 2
diff --git a/test/CodeGen/X86/v8i1-masks.ll b/test/CodeGen/X86/v8i1-masks.ll
index 5da6e9636ebe..21fe96321987 100644
--- a/test/CodeGen/X86/v8i1-masks.ll
+++ b/test/CodeGen/X86/v8i1-masks.ll
@@ -10,10 +10,10 @@
;CHECK: ret
define void @and_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
- %v0 = load <8 x float>* %a, align 16
- %v1 = load <8 x float>* %b, align 16
+ %v0 = load <8 x float>, <8 x float>* %a, align 16
+ %v1 = load <8 x float>, <8 x float>* %b, align 16
%m0 = fcmp olt <8 x float> %v1, %v0
- %v2 = load <8 x float>* %c, align 16
+ %v2 = load <8 x float>, <8 x float>* %c, align 16
%m1 = fcmp olt <8 x float> %v2, %v0
%mand = and <8 x i1> %m1, %m0
%r = zext <8 x i1> %mand to <8 x i32>
@@ -28,8 +28,8 @@ define void @and_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwi
;CHECK: vmovaps
;CHECK: ret
define void @neg_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
- %v0 = load <8 x float>* %a, align 16
- %v1 = load <8 x float>* %b, align 16
+ %v0 = load <8 x float>, <8 x float>* %a, align 16
+ %v1 = load <8 x float>, <8 x float>* %b, align 16
%m0 = fcmp olt <8 x float> %v1, %v0
%mand = xor <8 x i1> %m0, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
%r = zext <8 x i1> %mand to <8 x i32>
diff --git a/test/CodeGen/X86/vaargs.ll b/test/CodeGen/X86/vaargs.ll
index 43c895eb39ef..3767f41c2aae 100644
--- a/test/CodeGen/X86/vaargs.ll
+++ b/test/CodeGen/X86/vaargs.ll
@@ -28,9 +28,9 @@ define i32 @sum(i32 %count, ...) nounwind optsize ssp uwtable {
br i1 %2, label %.lr.ph, label %._crit_edge
.lr.ph: ; preds = %0
- %3 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 0
- %4 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 2
- %.pre = load i32* %3, align 16
+ %3 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 0
+ %4 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 2
+ %.pre = load i32, i32* %3, align 16
br label %5
; <label>:5 ; preds = %.lr.ph, %13
@@ -45,8 +45,8 @@ define i32 @sum(i32 %count, ...) nounwind optsize ssp uwtable {
br label %13
; <label>:10 ; preds = %5
- %11 = load i8** %4, align 8
- %12 = getelementptr i8* %11, i64 8
+ %11 = load i8*, i8** %4, align 8
+ %12 = getelementptr i8, i8* %11, i64 8
store i8* %12, i8** %4, align 8
br label %13
diff --git a/test/CodeGen/X86/vararg-callee-cleanup.ll b/test/CodeGen/X86/vararg-callee-cleanup.ll
index 2dcf319a2080..bb1104d85d87 100644
--- a/test/CodeGen/X86/vararg-callee-cleanup.ll
+++ b/test/CodeGen/X86/vararg-callee-cleanup.ll
@@ -4,8 +4,8 @@ target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
declare x86_thiscallcc void @thiscall_thunk(i8* %this, ...)
define i32 @call_varargs_thiscall_thunk(i8* %a, i32 %b, i32 %c, i32 %d) {
- call x86_thiscallcc void (i8*, ...)* @thiscall_thunk(i8* %a, i32 1, i32 2)
- call x86_thiscallcc void (i8*, ...)* @thiscall_thunk(i8* %a, i32 1, i32 2)
+ call x86_thiscallcc void (i8*, ...) @thiscall_thunk(i8* %a, i32 1, i32 2)
+ call x86_thiscallcc void (i8*, ...) @thiscall_thunk(i8* %a, i32 1, i32 2)
%t1 = add i32 %b, %c
%r = add i32 %t1, %d
ret i32 %r
@@ -19,8 +19,8 @@ define i32 @call_varargs_thiscall_thunk(i8* %a, i32 %b, i32 %c, i32 %d) {
declare x86_stdcallcc void @stdcall_thunk(i8* %this, ...)
define i32 @call_varargs_stdcall_thunk(i8* %a, i32 %b, i32 %c, i32 %d) {
- call x86_stdcallcc void (i8*, ...)* @stdcall_thunk(i8* %a, i32 1, i32 2)
- call x86_stdcallcc void (i8*, ...)* @stdcall_thunk(i8* %a, i32 1, i32 2)
+ call x86_stdcallcc void (i8*, ...) @stdcall_thunk(i8* %a, i32 1, i32 2)
+ call x86_stdcallcc void (i8*, ...) @stdcall_thunk(i8* %a, i32 1, i32 2)
%t1 = add i32 %b, %c
%r = add i32 %t1, %d
ret i32 %r
@@ -32,8 +32,8 @@ define i32 @call_varargs_stdcall_thunk(i8* %a, i32 %b, i32 %c, i32 %d) {
declare x86_fastcallcc void @fastcall_thunk(i8* %this, ...)
define i32 @call_varargs_fastcall_thunk(i8* %a, i32 %b, i32 %c, i32 %d) {
- call x86_fastcallcc void (i8*, ...)* @fastcall_thunk(i8* inreg %a, i32 inreg 1, i32 2)
- call x86_fastcallcc void (i8*, ...)* @fastcall_thunk(i8* inreg %a, i32 inreg 1, i32 2)
+ call x86_fastcallcc void (i8*, ...) @fastcall_thunk(i8* inreg %a, i32 inreg 1, i32 2)
+ call x86_fastcallcc void (i8*, ...) @fastcall_thunk(i8* inreg %a, i32 inreg 1, i32 2)
%t1 = add i32 %b, %c
%r = add i32 %t1, %d
ret i32 %r
diff --git a/test/CodeGen/X86/vararg_tailcall.ll b/test/CodeGen/X86/vararg_tailcall.ll
index eeda5e15a168..98aa4a89afc1 100644
--- a/test/CodeGen/X86/vararg_tailcall.ll
+++ b/test/CodeGen/X86/vararg_tailcall.ll
@@ -15,7 +15,7 @@
; WIN64: callq
define void @foo(i64 %arg) nounwind optsize ssp noredzone {
entry:
- %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %arg) nounwind optsize noredzone
+ %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i64 %arg) nounwind optsize noredzone
ret void
}
@@ -27,7 +27,7 @@ declare i32 @printf(i8*, ...) optsize noredzone
; WIN64: jmp
define void @bar(i64 %arg) nounwind optsize ssp noredzone {
entry:
- tail call void @bar2(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %arg) nounwind optsize noredzone
+ tail call void @bar2(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i64 %arg) nounwind optsize noredzone
ret void
}
@@ -39,8 +39,8 @@ declare void @bar2(i8*, i64) optsize noredzone
; WIN64: callq
define i8* @foo2(i8* %arg) nounwind optsize ssp noredzone {
entry:
- %tmp1 = load i8** @sel, align 8
- %call = tail call i8* (i8*, i8*, ...)* @x2(i8* %arg, i8* %tmp1) nounwind optsize noredzone
+ %tmp1 = load i8*, i8** @sel, align 8
+ %call = tail call i8* (i8*, i8*, ...) @x2(i8* %arg, i8* %tmp1) nounwind optsize noredzone
ret i8* %call
}
@@ -52,11 +52,11 @@ declare i8* @x2(i8*, i8*, ...) optsize noredzone
; WIN64: callq
define i8* @foo6(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
entry:
- %tmp2 = load i8** @sel3, align 8
- %tmp3 = load i8** @sel4, align 8
- %tmp4 = load i8** @sel5, align 8
- %tmp5 = load i8** @sel6, align 8
- %call = tail call i8* (i8*, i8*, i8*, ...)* @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5) nounwind optsize noredzone
+ %tmp2 = load i8*, i8** @sel3, align 8
+ %tmp3 = load i8*, i8** @sel4, align 8
+ %tmp4 = load i8*, i8** @sel5, align 8
+ %tmp5 = load i8*, i8** @sel6, align 8
+ %call = tail call i8* (i8*, i8*, i8*, ...) @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5) nounwind optsize noredzone
ret i8* %call
}
@@ -68,12 +68,12 @@ declare i8* @x3(i8*, i8*, i8*, ...) optsize noredzone
; WIN64: callq
define i8* @foo7(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
entry:
- %tmp2 = load i8** @sel3, align 8
- %tmp3 = load i8** @sel4, align 8
- %tmp4 = load i8** @sel5, align 8
- %tmp5 = load i8** @sel6, align 8
- %tmp6 = load i8** @sel7, align 8
- %call = tail call i8* (i8*, i8*, i8*, i8*, i8*, i8*, i8*, ...)* @x7(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i8* %tmp6) nounwind optsize noredzone
+ %tmp2 = load i8*, i8** @sel3, align 8
+ %tmp3 = load i8*, i8** @sel4, align 8
+ %tmp4 = load i8*, i8** @sel5, align 8
+ %tmp5 = load i8*, i8** @sel6, align 8
+ %tmp6 = load i8*, i8** @sel7, align 8
+ %call = tail call i8* (i8*, i8*, i8*, i8*, i8*, i8*, i8*, ...) @x7(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i8* %tmp6) nounwind optsize noredzone
ret i8* %call
}
@@ -85,10 +85,10 @@ declare i8* @x7(i8*, i8*, i8*, i8*, i8*, i8*, i8*, ...) optsize noredzone
; WIN64: callq
define i8* @foo8(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
entry:
- %tmp2 = load i8** @sel3, align 8
- %tmp3 = load i8** @sel4, align 8
- %tmp4 = load i8** @sel5, align 8
- %tmp5 = load i8** @sel6, align 8
- %call = tail call i8* (i8*, i8*, i8*, ...)* @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i32 48879, i32 48879) nounwind optsize noredzone
+ %tmp2 = load i8*, i8** @sel3, align 8
+ %tmp3 = load i8*, i8** @sel4, align 8
+ %tmp4 = load i8*, i8** @sel5, align 8
+ %tmp5 = load i8*, i8** @sel6, align 8
+ %call = tail call i8* (i8*, i8*, i8*, ...) @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i32 48879, i32 48879) nounwind optsize noredzone
ret i8* %call
}
diff --git a/test/CodeGen/X86/variadic-node-pic.ll b/test/CodeGen/X86/variadic-node-pic.ll
index 1182a306abd0..704459e67a6d 100644
--- a/test/CodeGen/X86/variadic-node-pic.ll
+++ b/test/CodeGen/X86/variadic-node-pic.ll
@@ -6,6 +6,6 @@ target triple = "x86_64-apple-darwin8"
declare void @xscanf(i64) nounwind
define void @foo() nounwind {
- call void (i64)* @xscanf( i64 0 ) nounwind
+ call void (i64) @xscanf( i64 0 ) nounwind
unreachable
}
diff --git a/test/CodeGen/X86/vec-loadsingles-alignment.ll b/test/CodeGen/X86/vec-loadsingles-alignment.ll
index 6aa2adb228e1..ee3bfb311adf 100644
--- a/test/CodeGen/X86/vec-loadsingles-alignment.ll
+++ b/test/CodeGen/X86/vec-loadsingles-alignment.ll
@@ -10,14 +10,14 @@ define i32 @subb() nounwind ssp {
; CHECK-LABEL: subb:
; CHECK: vmovups e(%rip), %ymm
entry:
- %0 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 7), align 4
- %1 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 6), align 8
- %2 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 5), align 4
- %3 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 4), align 16
- %4 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 3), align 4
- %5 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 2), align 8
- %6 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 1), align 4
- %7 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 0), align 16
+ %0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @e, i64 0, i64 7), align 4
+ %1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @e, i64 0, i64 6), align 8
+ %2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @e, i64 0, i64 5), align 4
+ %3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @e, i64 0, i64 4), align 16
+ %4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @e, i64 0, i64 3), align 4
+ %5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @e, i64 0, i64 2), align 8
+ %6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @e, i64 0, i64 1), align 4
+ %7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @e, i64 0, i64 0), align 16
%vecinit.i = insertelement <8 x i32> undef, i32 %7, i32 0
%vecinit1.i = insertelement <8 x i32> %vecinit.i, i32 %6, i32 1
%vecinit2.i = insertelement <8 x i32> %vecinit1.i, i32 %5, i32 2
diff --git a/test/CodeGen/X86/vec-trunc-store.ll b/test/CodeGen/X86/vec-trunc-store.ll
index 4d665f1843ef..d7897f8f3fa9 100644
--- a/test/CodeGen/X86/vec-trunc-store.ll
+++ b/test/CodeGen/X86/vec-trunc-store.ll
@@ -1,14 +1,14 @@
; RUN: llc < %s -march=x86-64
define void @foo(<8 x i32>* %p) nounwind {
- %t = load <8 x i32>* %p
+ %t = load <8 x i32>, <8 x i32>* %p
%cti69 = trunc <8 x i32> %t to <8 x i16> ; <<8 x i16>> [#uses=1]
store <8 x i16> %cti69, <8 x i16>* undef
ret void
}
define void @bar(<4 x i32>* %p) nounwind {
- %t = load <4 x i32>* %p
+ %t = load <4 x i32>, <4 x i32>* %p
%cti44 = trunc <4 x i32> %t to <4 x i16> ; <<4 x i16>> [#uses=1]
store <4 x i16> %cti44, <4 x i16>* undef
ret void
diff --git a/test/CodeGen/X86/vec_align.ll b/test/CodeGen/X86/vec_align.ll
index e27311561b2c..558d768aced4 100644
--- a/test/CodeGen/X86/vec_align.ll
+++ b/test/CodeGen/X86/vec_align.ll
@@ -16,15 +16,15 @@ define %f4 @test1(float %W, float %X, float %Y, float %Z) nounwind {
}
define %f4 @test2() nounwind {
- %Wp = getelementptr { float,float,float,float}* @G, i32 0, i32 0
- %Xp = getelementptr { float,float,float,float}* @G, i32 0, i32 1
- %Yp = getelementptr { float,float,float,float}* @G, i32 0, i32 2
- %Zp = getelementptr { float,float,float,float}* @G, i32 0, i32 3
+ %Wp = getelementptr { float,float,float,float}, { float,float,float,float}* @G, i32 0, i32 0
+ %Xp = getelementptr { float,float,float,float}, { float,float,float,float}* @G, i32 0, i32 1
+ %Yp = getelementptr { float,float,float,float}, { float,float,float,float}* @G, i32 0, i32 2
+ %Zp = getelementptr { float,float,float,float}, { float,float,float,float}* @G, i32 0, i32 3
- %W = load float* %Wp
- %X = load float* %Xp
- %Y = load float* %Yp
- %Z = load float* %Zp
+ %W = load float, float* %Wp
+ %X = load float, float* %Xp
+ %Y = load float, float* %Yp
+ %Z = load float, float* %Zp
%tmp = insertelement %f4 undef, float %W, i32 0
%tmp2 = insertelement %f4 %tmp, float %X, i32 1
diff --git a/test/CodeGen/X86/vec_anyext.ll b/test/CodeGen/X86/vec_anyext.ll
index d2a4c7f60dd7..c088d7f57b1a 100644
--- a/test/CodeGen/X86/vec_anyext.ll
+++ b/test/CodeGen/X86/vec_anyext.ll
@@ -2,9 +2,9 @@
; PR 9267
define<4 x i16> @func_16_32() {
- %F = load <4 x i32>* undef
+ %F = load <4 x i32>, <4 x i32>* undef
%G = trunc <4 x i32> %F to <4 x i16>
- %H = load <4 x i32>* undef
+ %H = load <4 x i32>, <4 x i32>* undef
%Y = trunc <4 x i32> %H to <4 x i16>
%T = add <4 x i16> %Y, %G
store <4 x i16>%T , <4 x i16>* undef
@@ -12,9 +12,9 @@ define<4 x i16> @func_16_32() {
}
define<4 x i16> @func_16_64() {
- %F = load <4 x i64>* undef
+ %F = load <4 x i64>, <4 x i64>* undef
%G = trunc <4 x i64> %F to <4 x i16>
- %H = load <4 x i64>* undef
+ %H = load <4 x i64>, <4 x i64>* undef
%Y = trunc <4 x i64> %H to <4 x i16>
%T = xor <4 x i16> %Y, %G
store <4 x i16>%T , <4 x i16>* undef
@@ -22,36 +22,36 @@ define<4 x i16> @func_16_64() {
}
define<4 x i32> @func_32_64() {
- %F = load <4 x i64>* undef
+ %F = load <4 x i64>, <4 x i64>* undef
%G = trunc <4 x i64> %F to <4 x i32>
- %H = load <4 x i64>* undef
+ %H = load <4 x i64>, <4 x i64>* undef
%Y = trunc <4 x i64> %H to <4 x i32>
%T = or <4 x i32> %Y, %G
ret <4 x i32> %T
}
define<4 x i8> @func_8_16() {
- %F = load <4 x i16>* undef
+ %F = load <4 x i16>, <4 x i16>* undef
%G = trunc <4 x i16> %F to <4 x i8>
- %H = load <4 x i16>* undef
+ %H = load <4 x i16>, <4 x i16>* undef
%Y = trunc <4 x i16> %H to <4 x i8>
%T = add <4 x i8> %Y, %G
ret <4 x i8> %T
}
define<4 x i8> @func_8_32() {
- %F = load <4 x i32>* undef
+ %F = load <4 x i32>, <4 x i32>* undef
%G = trunc <4 x i32> %F to <4 x i8>
- %H = load <4 x i32>* undef
+ %H = load <4 x i32>, <4 x i32>* undef
%Y = trunc <4 x i32> %H to <4 x i8>
%T = sub <4 x i8> %Y, %G
ret <4 x i8> %T
}
define<4 x i8> @func_8_64() {
- %F = load <4 x i64>* undef
+ %F = load <4 x i64>, <4 x i64>* undef
%G = trunc <4 x i64> %F to <4 x i8>
- %H = load <4 x i64>* undef
+ %H = load <4 x i64>, <4 x i64>* undef
%Y = trunc <4 x i64> %H to <4 x i8>
%T = add <4 x i8> %Y, %G
ret <4 x i8> %T
diff --git a/test/CodeGen/X86/vec_cast2.ll b/test/CodeGen/X86/vec_cast2.ll
index 8600c48aaac1..1ba11f51baa2 100644
--- a/test/CodeGen/X86/vec_cast2.ll
+++ b/test/CodeGen/X86/vec_cast2.ll
@@ -5,7 +5,7 @@ define <8 x float> @foo1_8(<8 x i8> %src) {
; CHECK-LABEL: foo1_8:
; CHECK: ## BB#0:
; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4,4,5,5,6,6,7,7]
-; CHECK-NEXT: vpmovzxwd %xmm0, %xmm0
+; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; CHECK-NEXT: vpslld $24, %xmm0, %xmm0
; CHECK-NEXT: vpsrad $24, %xmm0, %xmm0
; CHECK-NEXT: vpslld $24, %xmm1, %xmm1
@@ -16,13 +16,9 @@ define <8 x float> @foo1_8(<8 x i8> %src) {
;
; CHECK-WIDE-LABEL: foo1_8:
; CHECK-WIDE: ## BB#0:
-; CHECK-WIDE-NEXT: vpmovzxbd %xmm0, %xmm1
-; CHECK-WIDE-NEXT: vpslld $24, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpsrad $24, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; CHECK-WIDE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; CHECK-WIDE-NEXT: vpslld $24, %xmm0, %xmm0
-; CHECK-WIDE-NEXT: vpsrad $24, %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpmovsxbd %xmm0, %xmm1
+; CHECK-WIDE-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; CHECK-WIDE-NEXT: vpmovsxbd %xmm0, %xmm0
; CHECK-WIDE-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; CHECK-WIDE-NEXT: vcvtdq2ps %ymm0, %ymm0
; CHECK-WIDE-NEXT: retl
@@ -40,9 +36,7 @@ define <4 x float> @foo1_4(<4 x i8> %src) {
;
; CHECK-WIDE-LABEL: foo1_4:
; CHECK-WIDE: ## BB#0:
-; CHECK-WIDE-NEXT: vpmovzxbd %xmm0, %xmm0
-; CHECK-WIDE-NEXT: vpslld $24, %xmm0, %xmm0
-; CHECK-WIDE-NEXT: vpsrad $24, %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpmovsxbd %xmm0, %xmm0
; CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0
; CHECK-WIDE-NEXT: retl
%res = sitofp <4 x i8> %src to <4 x float>
@@ -52,7 +46,7 @@ define <4 x float> @foo1_4(<4 x i8> %src) {
define <8 x float> @foo2_8(<8 x i8> %src) {
; CHECK-LABEL: foo2_8:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovzxwd %xmm0, %xmm1
+; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; CHECK-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; CHECK-NEXT: vandps LCPI2_0, %ymm0, %ymm0
@@ -61,20 +55,9 @@ define <8 x float> @foo2_8(<8 x i8> %src) {
;
; CHECK-WIDE-LABEL: foo2_8:
; CHECK-WIDE: ## BB#0:
-; CHECK-WIDE-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vextractf128 $1, %ymm1, %xmm2
-; CHECK-WIDE-NEXT: vmovdqa {{.*#+}} xmm3 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
-; CHECK-WIDE-NEXT: vpshufb %xmm3, %xmm2, %xmm4
-; CHECK-WIDE-NEXT: vmovdqa {{.*#+}} xmm5 = <2,6,10,14,u,u,u,u,u,u,u,u,u,u,u,u>
-; CHECK-WIDE-NEXT: vpshufb %xmm5, %xmm2, %xmm2
-; CHECK-WIDE-NEXT: vpshufd {{.*#+}} xmm6 = xmm0[1,1,2,3]
-; CHECK-WIDE-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
-; CHECK-WIDE-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
-; CHECK-WIDE-NEXT: vpshufb %xmm3, %xmm1, %xmm3
-; CHECK-WIDE-NEXT: vpshufb %xmm5, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; CHECK-WIDE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; CHECK-WIDE-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; CHECK-WIDE-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; CHECK-WIDE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; CHECK-WIDE-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; CHECK-WIDE-NEXT: vcvtdq2ps %ymm0, %ymm0
; CHECK-WIDE-NEXT: retl
%res = uitofp <8 x i8> %src to <8 x float>
@@ -90,7 +73,7 @@ define <4 x float> @foo2_4(<4 x i8> %src) {
;
; CHECK-WIDE-LABEL: foo2_4:
; CHECK-WIDE: ## BB#0:
-; CHECK-WIDE-NEXT: vpmovzxbd %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0
; CHECK-WIDE-NEXT: retl
%res = uitofp <4 x i8> %src to <4 x float>
@@ -111,37 +94,29 @@ define <8 x i8> @foo3_8(<8 x float> %src) {
;
; CHECK-WIDE-LABEL: foo3_8:
; CHECK-WIDE: ## BB#0:
-; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
-; CHECK-WIDE-NEXT: shll $8, %eax
-; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %ecx
-; CHECK-WIDE-NEXT: movzbl %cl, %ecx
-; CHECK-WIDE-NEXT: orl %eax, %ecx
-; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
-; CHECK-WIDE-NEXT: shll $8, %eax
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %edx
-; CHECK-WIDE-NEXT: movzbl %dl, %edx
-; CHECK-WIDE-NEXT: orl %eax, %edx
-; CHECK-WIDE-NEXT: vpinsrw $0, %edx, %xmm0, %xmm1
-; CHECK-WIDE-NEXT: vpinsrw $1, %ecx, %xmm1, %xmm1
+; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
+; CHECK-WIDE-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
+; CHECK-WIDE-NEXT: vmovshdup %xmm0, %xmm2 ## xmm2 = xmm0[1,1,3,3]
+; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
+; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; CHECK-WIDE-NEXT: vpermilpd $1, %xmm0, %xmm2 ## xmm2 = xmm0[1,0]
+; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
+; CHECK-WIDE-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; CHECK-WIDE-NEXT: vpermilps $231, %xmm0, %xmm2 ## xmm2 = xmm0[3,1,2,3]
+; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
+; CHECK-WIDE-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm0
-; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
+; CHECK-WIDE-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; CHECK-WIDE-NEXT: vmovshdup %xmm0, %xmm2 ## xmm2 = xmm0[1,1,3,3]
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: shll $8, %eax
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
-; CHECK-WIDE-NEXT: movzbl %cl, %ecx
-; CHECK-WIDE-NEXT: orl %eax, %ecx
-; CHECK-WIDE-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
+; CHECK-WIDE-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; CHECK-WIDE-NEXT: vpermilpd $1, %xmm0, %xmm2 ## xmm2 = xmm0[1,0]
; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: shll $8, %eax
-; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
-; CHECK-WIDE-NEXT: movzbl %cl, %ecx
-; CHECK-WIDE-NEXT: orl %eax, %ecx
-; CHECK-WIDE-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm0
+; CHECK-WIDE-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; CHECK-WIDE-NEXT: vpermilps $231, %xmm0, %xmm0 ## xmm0 = xmm0[3,1,2,3]
+; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
+; CHECK-WIDE-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0
; CHECK-WIDE-NEXT: vzeroupper
; CHECK-WIDE-NEXT: retl
%res = fptosi <8 x float> %src to <8 x i8>
@@ -156,21 +131,17 @@ define <4 x i8> @foo3_4(<4 x float> %src) {
;
; CHECK-WIDE-LABEL: foo3_4:
; CHECK-WIDE: ## BB#0:
-; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
-; CHECK-WIDE-NEXT: shll $8, %eax
-; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %ecx
-; CHECK-WIDE-NEXT: movzbl %cl, %ecx
-; CHECK-WIDE-NEXT: orl %eax, %ecx
-; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
-; CHECK-WIDE-NEXT: shll $8, %eax
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %edx
-; CHECK-WIDE-NEXT: movzbl %dl, %edx
-; CHECK-WIDE-NEXT: orl %eax, %edx
-; CHECK-WIDE-NEXT: vpinsrw $0, %edx, %xmm0, %xmm0
-; CHECK-WIDE-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
+; CHECK-WIDE-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
+; CHECK-WIDE-NEXT: vmovshdup %xmm0, %xmm2 ## xmm2 = xmm0[1,1,3,3]
+; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
+; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; CHECK-WIDE-NEXT: vpermilpd $1, %xmm0, %xmm2 ## xmm2 = xmm0[1,0]
+; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
+; CHECK-WIDE-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; CHECK-WIDE-NEXT: vpermilps $231, %xmm0, %xmm0 ## xmm0 = xmm0[3,1,2,3]
+; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
+; CHECK-WIDE-NEXT: vpinsrb $3, %eax, %xmm1, %xmm0
; CHECK-WIDE-NEXT: retl
%res = fptosi <4 x float> %src to <4 x i8>
ret <4 x i8> %res
diff --git a/test/CodeGen/X86/vec_clear.ll b/test/CodeGen/X86/vec_clear.ll
deleted file mode 100644
index 166d4363ec8d..000000000000
--- a/test/CodeGen/X86/vec_clear.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t
-; RUN: not grep and %t
-; RUN: not grep psrldq %t
-; RUN: grep xorps %t
-
-define <4 x float> @test(<4 x float>* %v1) nounwind {
- %tmp = load <4 x float>* %v1 ; <<4 x float>> [#uses=1]
- %tmp15 = bitcast <4 x float> %tmp to <2 x i64> ; <<2 x i64>> [#uses=1]
- %tmp24 = and <2 x i64> %tmp15, bitcast (<4 x i32> < i32 0, i32 0, i32 -1, i32 -1 > to <2 x i64>) ; <<2 x i64>> [#uses=1]
- %tmp31 = bitcast <2 x i64> %tmp24 to <4 x float> ; <<4 x float>> [#uses=1]
- ret <4 x float> %tmp31
-}
-
diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll
index 365fe92220b5..df3eae3399f3 100644
--- a/test/CodeGen/X86/vec_compare.ll
+++ b/test/CodeGen/X86/vec_compare.ll
@@ -45,7 +45,7 @@ define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) nounwind {
; CHECK-LABEL: test5:
; CHECK: pcmpeqd
-; CHECK: pshufd $-79
+; CHECK: pshufd $177
; CHECK: pand
; CHECK: ret
%C = icmp eq <2 x i64> %A, %B
@@ -56,7 +56,7 @@ define <2 x i64> @test5(<2 x i64> %A, <2 x i64> %B) nounwind {
define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) nounwind {
; CHECK-LABEL: test6:
; CHECK: pcmpeqd
-; CHECK: pshufd $-79
+; CHECK: pshufd $177
; CHECK: pand
; CHECK: pcmpeqd
; CHECK: pxor
@@ -77,11 +77,11 @@ define <2 x i64> @test7(<2 x i64> %A, <2 x i64> %B) nounwind {
; CHECK: pxor [[CONSTREG]]
; CHECK: pxor [[CONSTREG]]
; CHECK: pcmpgtd %xmm1
-; CHECK: pshufd $-96
+; CHECK: pshufd $160
; CHECK: pcmpeqd
-; CHECK: pshufd $-11
+; CHECK: pshufd $245
; CHECK: pand
-; CHECK: pshufd $-11
+; CHECK: pshufd $245
; CHECK: por
; CHECK: ret
%C = icmp sgt <2 x i64> %A, %B
@@ -94,11 +94,11 @@ define <2 x i64> @test8(<2 x i64> %A, <2 x i64> %B) nounwind {
; CHECK: pxor
; CHECK: pxor
; CHECK: pcmpgtd %xmm0
-; CHECK: pshufd $-96
+; CHECK: pshufd $160
; CHECK: pcmpeqd
-; CHECK: pshufd $-11
+; CHECK: pshufd $245
; CHECK: pand
-; CHECK: pshufd $-11
+; CHECK: pshufd $245
; CHECK: por
; CHECK: ret
%C = icmp slt <2 x i64> %A, %B
@@ -111,11 +111,11 @@ define <2 x i64> @test9(<2 x i64> %A, <2 x i64> %B) nounwind {
; CHECK: pxor
; CHECK: pxor
; CHECK: pcmpgtd %xmm0
-; CHECK: pshufd $-96
+; CHECK: pshufd $160
; CHECK: pcmpeqd
-; CHECK: pshufd $-11
+; CHECK: pshufd $245
; CHECK: pand
-; CHECK: pshufd $-11
+; CHECK: pshufd $245
; CHECK: por
; CHECK: pcmpeqd
; CHECK: pxor
@@ -130,11 +130,11 @@ define <2 x i64> @test10(<2 x i64> %A, <2 x i64> %B) nounwind {
; CHECK: pxor
; CHECK: pxor
; CHECK: pcmpgtd %xmm1
-; CHECK: pshufd $-96
+; CHECK: pshufd $160
; CHECK: pcmpeqd
-; CHECK: pshufd $-11
+; CHECK: pshufd $245
; CHECK: pand
-; CHECK: pshufd $-11
+; CHECK: pshufd $245
; CHECK: por
; CHECK: pcmpeqd
; CHECK: pxor
@@ -155,11 +155,11 @@ define <2 x i64> @test11(<2 x i64> %A, <2 x i64> %B) nounwind {
; CHECK: pxor [[CONSTREG]]
; CHECK: pxor [[CONSTREG]]
; CHECK: pcmpgtd %xmm1
-; CHECK: pshufd $-96
+; CHECK: pshufd $160
; CHECK: pcmpeqd
-; CHECK: pshufd $-11
+; CHECK: pshufd $245
; CHECK: pand
-; CHECK: pshufd $-11
+; CHECK: pshufd $245
; CHECK: por
; CHECK: ret
%C = icmp ugt <2 x i64> %A, %B
@@ -172,11 +172,11 @@ define <2 x i64> @test12(<2 x i64> %A, <2 x i64> %B) nounwind {
; CHECK: pxor
; CHECK: pxor
; CHECK: pcmpgtd %xmm0
-; CHECK: pshufd $-96
+; CHECK: pshufd $160
; CHECK: pcmpeqd
-; CHECK: pshufd $-11
+; CHECK: pshufd $245
; CHECK: pand
-; CHECK: pshufd $-11
+; CHECK: pshufd $245
; CHECK: por
; CHECK: ret
%C = icmp ult <2 x i64> %A, %B
@@ -189,11 +189,11 @@ define <2 x i64> @test13(<2 x i64> %A, <2 x i64> %B) nounwind {
; CHECK: pxor
; CHECK: pxor
; CHECK: pcmpgtd %xmm0
-; CHECK: pshufd $-96
+; CHECK: pshufd $160
; CHECK: pcmpeqd
-; CHECK: pshufd $-11
+; CHECK: pshufd $245
; CHECK: pand
-; CHECK: pshufd $-11
+; CHECK: pshufd $245
; CHECK: por
; CHECK: pcmpeqd
; CHECK: pxor
@@ -208,11 +208,11 @@ define <2 x i64> @test14(<2 x i64> %A, <2 x i64> %B) nounwind {
; CHECK: pxor
; CHECK: pxor
; CHECK: pcmpgtd %xmm1
-; CHECK: pshufd $-96
+; CHECK: pshufd $160
; CHECK: pcmpeqd
-; CHECK: pshufd $-11
+; CHECK: pshufd $245
; CHECK: pand
-; CHECK: pshufd $-11
+; CHECK: pshufd $245
; CHECK: por
; CHECK: pcmpeqd
; CHECK: pxor
diff --git a/test/CodeGen/X86/vec_extract-mmx.ll b/test/CodeGen/X86/vec_extract-mmx.ll
new file mode 100644
index 000000000000..780066d2da15
--- /dev/null
+++ b/test/CodeGen/X86/vec_extract-mmx.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
+
+define i32 @test0(<1 x i64>* %v4) {
+; CHECK-LABEL: test0:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: pshufw $238, (%[[REG:[a-z]+]]), %mm0
+; CHECK-NEXT: movd %mm0, %eax
+; CHECK-NEXT: addl $32, %eax
+; CHECK-NEXT: retq
+entry:
+ %v5 = load <1 x i64>, <1 x i64>* %v4, align 8
+ %v12 = bitcast <1 x i64> %v5 to <4 x i16>
+ %v13 = bitcast <4 x i16> %v12 to x86_mmx
+ %v14 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v13, i8 -18)
+ %v15 = bitcast x86_mmx %v14 to <4 x i16>
+ %v16 = bitcast <4 x i16> %v15 to <1 x i64>
+ %v17 = extractelement <1 x i64> %v16, i32 0
+ %v18 = bitcast i64 %v17 to <2 x i32>
+ %v19 = extractelement <2 x i32> %v18, i32 0
+ %v20 = add i32 %v19, 32
+ ret i32 %v20
+}
+
+define i32 @test1(i32* nocapture readonly %ptr) {
+; CHECK-LABEL: test1:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: movd (%[[REG]]), %mm0
+; CHECK-NEXT: pshufw $232, %mm0, %mm0
+; CHECK-NEXT: movd %mm0, %eax
+; CHECK-NEXT: emms
+; CHECK-NEXT: retq
+entry:
+ %0 = load i32, i32* %ptr, align 4
+ %1 = insertelement <2 x i32> undef, i32 %0, i32 0
+ %2 = insertelement <2 x i32> %1, i32 0, i32 1
+ %3 = bitcast <2 x i32> %2 to x86_mmx
+ %4 = bitcast x86_mmx %3 to i64
+ %5 = bitcast i64 %4 to <4 x i16>
+ %6 = bitcast <4 x i16> %5 to x86_mmx
+ %7 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %6, i8 -24)
+ %8 = bitcast x86_mmx %7 to <4 x i16>
+ %9 = bitcast <4 x i16> %8 to <1 x i64>
+ %10 = extractelement <1 x i64> %9, i32 0
+ %11 = bitcast i64 %10 to <2 x i32>
+ %12 = extractelement <2 x i32> %11, i32 0
+ tail call void @llvm.x86.mmx.emms()
+ ret i32 %12
+}
+
+define i32 @test2(i32* nocapture readonly %ptr) {
+; CHECK-LABEL: test2:
+; CHECK: # BB#0:{{.*}} %entry
+; CHECK: pshufw $232, (%[[REG]]), %mm0
+; CHECK-NEXT: movd %mm0, %eax
+; CHECK-NEXT: emms
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast i32* %ptr to x86_mmx*
+ %1 = load x86_mmx, x86_mmx* %0, align 8
+ %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 -24)
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ %6 = bitcast i64 %5 to <2 x i32>
+ %7 = extractelement <2 x i32> %6, i32 0
+ tail call void @llvm.x86.mmx.emms()
+ ret i32 %7
+}
+
+declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8)
+declare void @llvm.x86.mmx.emms()
diff --git a/test/CodeGen/X86/vec_extract-sse4.ll b/test/CodeGen/X86/vec_extract-sse4.ll
index 530911add121..9f4210f7847e 100644
--- a/test/CodeGen/X86/vec_extract-sse4.ll
+++ b/test/CodeGen/X86/vec_extract-sse4.ll
@@ -9,7 +9,7 @@ define void @t1(float* %R, <4 x float>* %P1) nounwind {
; CHECK-NEXT: movss %xmm0, (%eax)
; CHECK-NEXT: retl
- %X = load <4 x float>* %P1
+ %X = load <4 x float>, <4 x float>* %P1
%tmp = extractelement <4 x float> %X, i32 3
store float %tmp, float* %R
ret void
@@ -27,7 +27,7 @@ define float @t2(<4 x float>* %P1) nounwind {
; CHECK-NEXT: popl %eax
; CHECK-NEXT: retl
- %X = load <4 x float>* %P1
+ %X = load <4 x float>, <4 x float>* %P1
%tmp = extractelement <4 x float> %X, i32 2
ret float %tmp
}
@@ -41,7 +41,7 @@ define void @t3(i32* %R, <4 x i32>* %P1) nounwind {
; CHECK-NEXT: movl %ecx, (%eax)
; CHECK-NEXT: retl
- %X = load <4 x i32>* %P1
+ %X = load <4 x i32>, <4 x i32>* %P1
%tmp = extractelement <4 x i32> %X, i32 3
store i32 %tmp, i32* %R
ret void
@@ -54,7 +54,7 @@ define i32 @t4(<4 x i32>* %P1) nounwind {
; CHECK-NEXT: movl 12(%eax), %eax
; CHECK-NEXT: retl
- %X = load <4 x i32>* %P1
+ %X = load <4 x i32>, <4 x i32>* %P1
%tmp = extractelement <4 x i32> %X, i32 3
ret i32 %tmp
}
diff --git a/test/CodeGen/X86/vec_extract.ll b/test/CodeGen/X86/vec_extract.ll
index 6df7be7a087b..3b478880590d 100644
--- a/test/CodeGen/X86/vec_extract.ll
+++ b/test/CodeGen/X86/vec_extract.ll
@@ -12,7 +12,7 @@ define void @test1(<4 x float>* %F, float* %f) nounwind {
; CHECK-NEXT: movss %xmm0, (%eax)
; CHECK-NEXT: retl
entry:
- %tmp = load <4 x float>* %F ; <<4 x float>> [#uses=2]
+ %tmp = load <4 x float>, <4 x float>* %F ; <<4 x float>> [#uses=2]
%tmp7 = fadd <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
%tmp2 = extractelement <4 x float> %tmp7, i32 0 ; <float> [#uses=1]
store float %tmp2, float* %f
@@ -32,7 +32,7 @@ define float @test2(<4 x float>* %F, float* %f) nounwind {
; CHECK-NEXT: popl %eax
; CHECK-NEXT: retl
entry:
- %tmp = load <4 x float>* %F ; <<4 x float>> [#uses=2]
+ %tmp = load <4 x float>, <4 x float>* %F ; <<4 x float>> [#uses=2]
%tmp7 = fadd <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
%tmp2 = extractelement <4 x float> %tmp7, i32 2 ; <float> [#uses=1]
ret float %tmp2
@@ -47,7 +47,7 @@ define void @test3(float* %R, <4 x float>* %P1) nounwind {
; CHECK-NEXT: movss %xmm0, (%eax)
; CHECK-NEXT: retl
entry:
- %X = load <4 x float>* %P1 ; <<4 x float>> [#uses=1]
+ %X = load <4 x float>, <4 x float>* %P1 ; <<4 x float>> [#uses=1]
%tmp = extractelement <4 x float> %X, i32 3 ; <float> [#uses=1]
store float %tmp, float* %R
ret void
diff --git a/test/CodeGen/X86/vec_fabs.ll b/test/CodeGen/X86/vec_fabs.ll
index ac02acfed342..bfefbcf5ebd3 100644
--- a/test/CodeGen/X86/vec_fabs.ll
+++ b/test/CodeGen/X86/vec_fabs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s
define <2 x double> @fabs_v2f64(<2 x double> %p)
diff --git a/test/CodeGen/X86/vec_floor.ll b/test/CodeGen/X86/vec_floor.ll
index 4db68bd18223..f35c4ab4a76e 100644
--- a/test/CodeGen/X86/vec_floor.ll
+++ b/test/CodeGen/X86/vec_floor.ll
@@ -180,3 +180,49 @@ define <8 x float> @nearbyint_v8f32(<8 x float> %p)
ret <8 x float> %t
}
declare <8 x float> @llvm.nearbyint.v8f32(<8 x float> %p)
+
+;
+; Constant Folding
+;
+
+define <2 x double> @const_floor_v2f64() {
+ ; CHECK: const_floor_v2f64
+ ; CHECK: movaps {{.*#+}} xmm0 = [-2.000000e+00,2.000000e+00]
+ %t = call <2 x double> @llvm.floor.v2f64(<2 x double> <double -1.5, double 2.5>)
+ ret <2 x double> %t
+}
+
+define <4 x float> @const_floor_v4f32() {
+ ; CHECK: const_floor_v4f32
+ ; CHECK: movaps {{.*#+}} xmm0 = [-4.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
+ %t = call <4 x float> @llvm.floor.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
+ ret <4 x float> %t
+}
+
+define <2 x double> @const_ceil_v2f64() {
+ ; CHECK: const_ceil_v2f64
+ ; CHECK: movaps {{.*#+}} xmm0 = [-1.000000e+00,3.000000e+00]
+ %t = call <2 x double> @llvm.ceil.v2f64(<2 x double> <double -1.5, double 2.5>)
+ ret <2 x double> %t
+}
+
+define <4 x float> @const_ceil_v4f32() {
+ ; CHECK: const_ceil_v4f32
+ ; CHECK: movaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,3.000000e+00]
+ %t = call <4 x float> @llvm.ceil.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
+ ret <4 x float> %t
+}
+
+define <2 x double> @const_trunc_v2f64() {
+ ; CHECK: const_trunc_v2f64
+ ; CHECK: movaps {{.*#+}} xmm0 = [-1.000000e+00,2.000000e+00]
+ %t = call <2 x double> @llvm.trunc.v2f64(<2 x double> <double -1.5, double 2.5>)
+ ret <2 x double> %t
+}
+
+define <4 x float> @const_trunc_v4f32() {
+ ; CHECK: const_trunc_v4f32
+ ; CHECK: movaps {{.*#+}} xmm0 = [-3.000000e+00,6.000000e+00,-9.000000e+00,2.000000e+00]
+ %t = call <4 x float> @llvm.trunc.v4f32(<4 x float> <float -3.5, float 6.0, float -9.0, float 2.5>)
+ ret <4 x float> %t
+}
diff --git a/test/CodeGen/X86/vec_fneg.ll b/test/CodeGen/X86/vec_fneg.ll
index 9743f7148c69..a85ae984d8e6 100644
--- a/test/CodeGen/X86/vec_fneg.ll
+++ b/test/CodeGen/X86/vec_fneg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse | FileCheck %s
; FNEG is defined as subtraction from -0.0.
diff --git a/test/CodeGen/X86/vec_fp_to_int.ll b/test/CodeGen/X86/vec_fp_to_int.ll
new file mode 100644
index 000000000000..9f1c7afa295b
--- /dev/null
+++ b/test/CodeGen/X86/vec_fp_to_int.ll
@@ -0,0 +1,955 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+
+;
+; Double to Signed Integer
+;
+
+define <2 x i64> @fptosi_2vf64(<2 x double> %a) {
+; SSE2-LABEL: fptosi_2vf64:
+; SSE2: # BB#0:
+; SSE2-NEXT: cvttsd2si %xmm0, %rax
+; SSE2-NEXT: movd %rax, %xmm1
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE2-NEXT: cvttsd2si %xmm0, %rax
+; SSE2-NEXT: movd %rax, %xmm0
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptosi_2vf64:
+; AVX: # BB#0:
+; AVX-NEXT: vcvttsd2si %xmm0, %rax
+; AVX-NEXT: vmovq %rax, %xmm1
+; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX-NEXT: vcvttsd2si %xmm0, %rax
+; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: retq
+ %cvt = fptosi <2 x double> %a to <2 x i64>
+ ret <2 x i64> %cvt
+}
+
+define <4 x i32> @fptosi_2vf64_i32(<2 x double> %a) {
+; SSE2-LABEL: fptosi_2vf64_i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: cvttsd2si %xmm0, %rax
+; SSE2-NEXT: movd %rax, %xmm1
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE2-NEXT: cvttsd2si %xmm0, %rax
+; SSE2-NEXT: movd %rax, %xmm0
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptosi_2vf64_i32:
+; AVX: # BB#0:
+; AVX-NEXT: vcvttsd2si %xmm0, %rax
+; AVX-NEXT: vmovq %rax, %xmm1
+; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX-NEXT: vcvttsd2si %xmm0, %rax
+; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX-NEXT: retq
+ %cvt = fptosi <2 x double> %a to <2 x i32>
+ %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ ret <4 x i32> %ext
+}
+
+define <4 x i64> @fptosi_4vf64(<4 x double> %a) {
+; SSE2-LABEL: fptosi_4vf64:
+; SSE2: # BB#0:
+; SSE2-NEXT: cvttsd2si %xmm0, %rax
+; SSE2-NEXT: movd %rax, %xmm2
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE2-NEXT: cvttsd2si %xmm0, %rax
+; SSE2-NEXT: movd %rax, %xmm0
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE2-NEXT: cvttsd2si %xmm1, %rax
+; SSE2-NEXT: movd %rax, %xmm3
+; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; SSE2-NEXT: cvttsd2si %xmm1, %rax
+; SSE2-NEXT: movd %rax, %xmm0
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm3, %xmm1
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptosi_4vf64:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT: vcvttsd2si %xmm1, %rax
+; AVX-NEXT: vmovq %rax, %xmm2
+; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
+; AVX-NEXT: vcvttsd2si %xmm1, %rax
+; AVX-NEXT: vmovq %rax, %xmm1
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX-NEXT: vcvttsd2si %xmm0, %rax
+; AVX-NEXT: vmovq %rax, %xmm2
+; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX-NEXT: vcvttsd2si %xmm0, %rax
+; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+ %cvt = fptosi <4 x double> %a to <4 x i64>
+ ret <4 x i64> %cvt
+}
+
+define <4 x i32> @fptosi_4vf64_i32(<4 x double> %a) {
+; SSE2-LABEL: fptosi_4vf64_i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: cvttsd2si %xmm1, %rax
+; SSE2-NEXT: movd %rax, %xmm2
+; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; SSE2-NEXT: cvttsd2si %xmm1, %rax
+; SSE2-NEXT: movd %rax, %xmm1
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SSE2-NEXT: cvttsd2si %xmm0, %rax
+; SSE2-NEXT: movd %rax, %xmm2
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE2-NEXT: cvttsd2si %xmm0, %rax
+; SSE2-NEXT: movd %rax, %xmm0
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptosi_4vf64_i32:
+; AVX: # BB#0:
+; AVX-NEXT: vcvttpd2dqy %ymm0, %xmm0
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %cvt = fptosi <4 x double> %a to <4 x i32>
+ ret <4 x i32> %cvt
+}
+
+;
+; Double to Unsigned Integer
+;
+
+define <2 x i64> @fptoui_2vf64(<2 x double> %a) {
+; SSE2-LABEL: fptoui_2vf64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; SSE2-NEXT: movapd %xmm0, %xmm1
+; SSE2-NEXT: subsd %xmm2, %xmm1
+; SSE2-NEXT: cvttsd2si %xmm1, %rax
+; SSE2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; SSE2-NEXT: xorq %rcx, %rax
+; SSE2-NEXT: cvttsd2si %xmm0, %rdx
+; SSE2-NEXT: ucomisd %xmm2, %xmm0
+; SSE2-NEXT: cmovaeq %rax, %rdx
+; SSE2-NEXT: movd %rdx, %xmm1
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE2-NEXT: movapd %xmm0, %xmm3
+; SSE2-NEXT: subsd %xmm2, %xmm3
+; SSE2-NEXT: cvttsd2si %xmm3, %rax
+; SSE2-NEXT: xorq %rcx, %rax
+; SSE2-NEXT: cvttsd2si %xmm0, %rcx
+; SSE2-NEXT: ucomisd %xmm2, %xmm0
+; SSE2-NEXT: cmovaeq %rax, %rcx
+; SSE2-NEXT: movd %rcx, %xmm0
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptoui_2vf64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vcvttsd2si %xmm2, %rax
+; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; AVX-NEXT: xorq %rcx, %rax
+; AVX-NEXT: vcvttsd2si %xmm0, %rdx
+; AVX-NEXT: vucomisd %xmm1, %xmm0
+; AVX-NEXT: cmovaeq %rax, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm2
+; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm3
+; AVX-NEXT: vcvttsd2si %xmm3, %rax
+; AVX-NEXT: xorq %rcx, %rax
+; AVX-NEXT: vcvttsd2si %xmm0, %rcx
+; AVX-NEXT: vucomisd %xmm1, %xmm0
+; AVX-NEXT: cmovaeq %rax, %rcx
+; AVX-NEXT: vmovq %rcx, %xmm0
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX-NEXT: retq
+ %cvt = fptoui <2 x double> %a to <2 x i64>
+ ret <2 x i64> %cvt
+}
+
+define <4 x i32> @fptoui_2vf64_i32(<2 x double> %a) {
+; SSE2-LABEL: fptoui_2vf64_i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT: movapd %xmm0, %xmm2
+; SSE2-NEXT: subsd %xmm1, %xmm2
+; SSE2-NEXT: cvttsd2si %xmm2, %rax
+; SSE2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; SSE2-NEXT: xorq %rcx, %rax
+; SSE2-NEXT: cvttsd2si %xmm0, %rdx
+; SSE2-NEXT: ucomisd %xmm1, %xmm0
+; SSE2-NEXT: cmovaeq %rax, %rdx
+; SSE2-NEXT: movd %rdx, %xmm2
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE2-NEXT: movapd %xmm0, %xmm3
+; SSE2-NEXT: subsd %xmm1, %xmm3
+; SSE2-NEXT: cvttsd2si %xmm3, %rax
+; SSE2-NEXT: xorq %rcx, %rax
+; SSE2-NEXT: cvttsd2si %xmm0, %rcx
+; SSE2-NEXT: ucomisd %xmm1, %xmm0
+; SSE2-NEXT: cmovaeq %rax, %rcx
+; SSE2-NEXT: movd %rcx, %xmm0
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptoui_2vf64_i32:
+; AVX: # BB#0:
+; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vcvttsd2si %xmm2, %rax
+; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; AVX-NEXT: xorq %rcx, %rax
+; AVX-NEXT: vcvttsd2si %xmm0, %rdx
+; AVX-NEXT: vucomisd %xmm1, %xmm0
+; AVX-NEXT: cmovaeq %rax, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm2
+; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm3
+; AVX-NEXT: vcvttsd2si %xmm3, %rax
+; AVX-NEXT: xorq %rcx, %rax
+; AVX-NEXT: vcvttsd2si %xmm0, %rcx
+; AVX-NEXT: vucomisd %xmm1, %xmm0
+; AVX-NEXT: cmovaeq %rax, %rcx
+; AVX-NEXT: vmovq %rcx, %xmm0
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX-NEXT: retq
+ %cvt = fptoui <2 x double> %a to <2 x i32>
+ %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ ret <4 x i32> %ext
+}
+
+define <4 x i64> @fptoui_4vf64(<4 x double> %a) {
+; SSE2-LABEL: fptoui_4vf64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movapd %xmm0, %xmm2
+; SSE2-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
+; SSE2-NEXT: {{.*#+}} kill: XMM0<def> XMM2<kill>
+; SSE2-NEXT: subsd %xmm3, %xmm0
+; SSE2-NEXT: cvttsd2si %xmm0, %rcx
+; SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; SSE2-NEXT: xorq %rax, %rcx
+; SSE2-NEXT: cvttsd2si %xmm2, %rdx
+; SSE2-NEXT: ucomisd %xmm3, %xmm2
+; SSE2-NEXT: cmovaeq %rcx, %rdx
+; SSE2-NEXT: movd %rdx, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1,0]
+; SSE2-NEXT: movapd %xmm2, %xmm4
+; SSE2-NEXT: subsd %xmm3, %xmm4
+; SSE2-NEXT: cvttsd2si %xmm4, %rcx
+; SSE2-NEXT: xorq %rax, %rcx
+; SSE2-NEXT: cvttsd2si %xmm2, %rdx
+; SSE2-NEXT: ucomisd %xmm3, %xmm2
+; SSE2-NEXT: cmovaeq %rcx, %rdx
+; SSE2-NEXT: movd %rdx, %xmm2
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-NEXT: movapd %xmm1, %xmm2
+; SSE2-NEXT: subsd %xmm3, %xmm2
+; SSE2-NEXT: cvttsd2si %xmm2, %rcx
+; SSE2-NEXT: xorq %rax, %rcx
+; SSE2-NEXT: cvttsd2si %xmm1, %rdx
+; SSE2-NEXT: ucomisd %xmm3, %xmm1
+; SSE2-NEXT: cmovaeq %rcx, %rdx
+; SSE2-NEXT: movd %rdx, %xmm2
+; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; SSE2-NEXT: movapd %xmm1, %xmm4
+; SSE2-NEXT: subsd %xmm3, %xmm4
+; SSE2-NEXT: cvttsd2si %xmm4, %rcx
+; SSE2-NEXT: xorq %rax, %rcx
+; SSE2-NEXT: cvttsd2si %xmm1, %rax
+; SSE2-NEXT: ucomisd %xmm3, %xmm1
+; SSE2-NEXT: cmovaeq %rcx, %rax
+; SSE2-NEXT: movd %rax, %xmm1
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE2-NEXT: movdqa %xmm2, %xmm1
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptoui_4vf64:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm3
+; AVX-NEXT: vcvttsd2si %xmm3, %rax
+; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; AVX-NEXT: xorq %rcx, %rax
+; AVX-NEXT: vcvttsd2si %xmm2, %rdx
+; AVX-NEXT: vucomisd %xmm1, %xmm2
+; AVX-NEXT: cmovaeq %rax, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm3
+; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
+; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm4
+; AVX-NEXT: vcvttsd2si %xmm4, %rax
+; AVX-NEXT: xorq %rcx, %rax
+; AVX-NEXT: vcvttsd2si %xmm2, %rdx
+; AVX-NEXT: vucomisd %xmm1, %xmm2
+; AVX-NEXT: cmovaeq %rax, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm2
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm3
+; AVX-NEXT: vcvttsd2si %xmm3, %rax
+; AVX-NEXT: xorq %rcx, %rax
+; AVX-NEXT: vcvttsd2si %xmm0, %rdx
+; AVX-NEXT: vucomisd %xmm1, %xmm0
+; AVX-NEXT: cmovaeq %rax, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm3
+; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm4
+; AVX-NEXT: vcvttsd2si %xmm4, %rax
+; AVX-NEXT: xorq %rcx, %rax
+; AVX-NEXT: vcvttsd2si %xmm0, %rcx
+; AVX-NEXT: vucomisd %xmm1, %xmm0
+; AVX-NEXT: cmovaeq %rax, %rcx
+; AVX-NEXT: vmovq %rcx, %xmm0
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
+; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-NEXT: retq
+ %cvt = fptoui <4 x double> %a to <4 x i64>
+ ret <4 x i64> %cvt
+}
+
+define <4 x i32> @fptoui_4vf64_i32(<4 x double> %a) {
+; SSE2-LABEL: fptoui_4vf64_i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; SSE2-NEXT: movapd %xmm1, %xmm3
+; SSE2-NEXT: subsd %xmm2, %xmm3
+; SSE2-NEXT: cvttsd2si %xmm3, %rcx
+; SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; SSE2-NEXT: xorq %rax, %rcx
+; SSE2-NEXT: cvttsd2si %xmm1, %rdx
+; SSE2-NEXT: ucomisd %xmm2, %xmm1
+; SSE2-NEXT: cmovaeq %rcx, %rdx
+; SSE2-NEXT: movd %rdx, %xmm3
+; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; SSE2-NEXT: movapd %xmm1, %xmm4
+; SSE2-NEXT: subsd %xmm2, %xmm4
+; SSE2-NEXT: cvttsd2si %xmm4, %rcx
+; SSE2-NEXT: xorq %rax, %rcx
+; SSE2-NEXT: cvttsd2si %xmm1, %rdx
+; SSE2-NEXT: ucomisd %xmm2, %xmm1
+; SSE2-NEXT: cmovaeq %rcx, %rdx
+; SSE2-NEXT: movd %rdx, %xmm1
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
+; SSE2-NEXT: movapd %xmm0, %xmm3
+; SSE2-NEXT: subsd %xmm2, %xmm3
+; SSE2-NEXT: cvttsd2si %xmm3, %rcx
+; SSE2-NEXT: xorq %rax, %rcx
+; SSE2-NEXT: cvttsd2si %xmm0, %rdx
+; SSE2-NEXT: ucomisd %xmm2, %xmm0
+; SSE2-NEXT: cmovaeq %rcx, %rdx
+; SSE2-NEXT: movd %rdx, %xmm3
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE2-NEXT: movapd %xmm0, %xmm4
+; SSE2-NEXT: subsd %xmm2, %xmm4
+; SSE2-NEXT: cvttsd2si %xmm4, %rcx
+; SSE2-NEXT: xorq %rax, %rcx
+; SSE2-NEXT: cvttsd2si %xmm0, %rax
+; SSE2-NEXT: ucomisd %xmm2, %xmm0
+; SSE2-NEXT: cmovaeq %rcx, %rax
+; SSE2-NEXT: movd %rax, %xmm0
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptoui_4vf64_i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-NEXT: vcvttsd2si %xmm1, %rax
+; AVX-NEXT: vcvttsd2si %xmm0, %rcx
+; AVX-NEXT: vmovd %ecx, %xmm1
+; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT: vcvttsd2si %xmm0, %rax
+; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX-NEXT: vcvttsd2si %xmm0, %rax
+; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %cvt = fptoui <4 x double> %a to <4 x i32>
+ ret <4 x i32> %cvt
+}
+
+;
+; Float to Signed Integer
+;
+
+define <4 x i32> @fptosi_4vf32(<4 x float> %a) {
+; SSE2-LABEL: fptosi_4vf32:
+; SSE2: # BB#0:
+; SSE2-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptosi_4vf32:
+; AVX: # BB#0:
+; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT: retq
+ %cvt = fptosi <4 x float> %a to <4 x i32>
+ ret <4 x i32> %cvt
+}
+
+define <2 x i64> @fptosi_4vf32_i64(<4 x float> %a) {
+; SSE2-LABEL: fptosi_4vf32_i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: cvttss2si %xmm0, %rax
+; SSE2-NEXT: movd %rax, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE2-NEXT: cvttss2si %xmm0, %rax
+; SSE2-NEXT: movd %rax, %xmm0
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptosi_4vf32_i64:
+; AVX: # BB#0:
+; AVX-NEXT: vcvttss2si %xmm0, %rax
+; AVX-NEXT: vmovq %rax, %xmm1
+; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX-NEXT: vcvttss2si %xmm0, %rax
+; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: retq
+ %shuf = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1>
+ %cvt = fptosi <2 x float> %shuf to <2 x i64>
+ ret <2 x i64> %cvt
+}
+
+define <8 x i32> @fptosi_8vf32(<8 x float> %a) {
+; SSE2-LABEL: fptosi_8vf32:
+; SSE2: # BB#0:
+; SSE2-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE2-NEXT: cvttps2dq %xmm1, %xmm1
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptosi_8vf32:
+; AVX: # BB#0:
+; AVX-NEXT: vcvttps2dq %ymm0, %ymm0
+; AVX-NEXT: retq
+ %cvt = fptosi <8 x float> %a to <8 x i32>
+ ret <8 x i32> %cvt
+}
+
+define <4 x i64> @fptosi_8vf32_i64(<8 x float> %a) {
+; SSE2-LABEL: fptosi_8vf32_i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: cvttss2si %xmm0, %rax
+; SSE2-NEXT: movd %rax, %xmm2
+; SSE2-NEXT: movaps %xmm0, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; SSE2-NEXT: cvttss2si %xmm1, %rax
+; SSE2-NEXT: movd %rax, %xmm1
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE2-NEXT: movaps %xmm0, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; SSE2-NEXT: cvttss2si %xmm1, %rax
+; SSE2-NEXT: movd %rax, %xmm3
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE2-NEXT: cvttss2si %xmm0, %rax
+; SSE2-NEXT: movd %rax, %xmm1
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptosi_8vf32_i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; AVX-NEXT: vcvttss2si %xmm1, %rax
+; AVX-NEXT: vmovq %rax, %xmm1
+; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX-NEXT: vcvttss2si %xmm2, %rax
+; AVX-NEXT: vmovq %rax, %xmm2
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX-NEXT: vcvttss2si %xmm0, %rax
+; AVX-NEXT: vmovq %rax, %xmm2
+; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX-NEXT: vcvttss2si %xmm0, %rax
+; AVX-NEXT: vmovq %rax, %xmm0
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+ %shuf = shufflevector <8 x float> %a, <8 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %cvt = fptosi <4 x float> %shuf to <4 x i64>
+ ret <4 x i64> %cvt
+}
+
+;
+; Float to Unsigned Integer
+;
+
+define <4 x i32> @fptoui_4vf32(<4 x float> %a) {
+; SSE2-LABEL: fptoui_4vf32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps %xmm0, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; SSE2-NEXT: cvttss2si %xmm1, %rax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: movaps %xmm0, %xmm2
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; SSE2-NEXT: cvttss2si %xmm2, %rax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: cvttss2si %xmm0, %rax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE2-NEXT: cvttss2si %xmm0, %rax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptoui_4vf32:
+; AVX: # BB#0:
+; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX-NEXT: vcvttss2si %xmm1, %rax
+; AVX-NEXT: vcvttss2si %xmm0, %rcx
+; AVX-NEXT: vmovd %ecx, %xmm1
+; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX-NEXT: vcvttss2si %xmm2, %rax
+; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX-NEXT: vcvttss2si %xmm0, %rax
+; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %cvt = fptoui <4 x float> %a to <4 x i32>
+ ret <4 x i32> %cvt
+}
+
+define <2 x i64> @fptoui_4vf32_i64(<4 x float> %a) {
+; SSE2-LABEL: fptoui_4vf32_i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE2-NEXT: movaps %xmm0, %xmm1
+; SSE2-NEXT: subss %xmm2, %xmm1
+; SSE2-NEXT: cvttss2si %xmm1, %rax
+; SSE2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; SSE2-NEXT: xorq %rcx, %rax
+; SSE2-NEXT: cvttss2si %xmm0, %rdx
+; SSE2-NEXT: ucomiss %xmm2, %xmm0
+; SSE2-NEXT: cmovaeq %rax, %rdx
+; SSE2-NEXT: movd %rdx, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE2-NEXT: movaps %xmm0, %xmm3
+; SSE2-NEXT: subss %xmm2, %xmm3
+; SSE2-NEXT: cvttss2si %xmm3, %rax
+; SSE2-NEXT: xorq %rcx, %rax
+; SSE2-NEXT: cvttss2si %xmm0, %rcx
+; SSE2-NEXT: ucomiss %xmm2, %xmm0
+; SSE2-NEXT: cmovaeq %rax, %rcx
+; SSE2-NEXT: movd %rcx, %xmm0
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptoui_4vf32_i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vcvttss2si %xmm2, %rax
+; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; AVX-NEXT: xorq %rcx, %rax
+; AVX-NEXT: vcvttss2si %xmm0, %rdx
+; AVX-NEXT: vucomiss %xmm1, %xmm0
+; AVX-NEXT: cmovaeq %rax, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm2
+; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm3
+; AVX-NEXT: vcvttss2si %xmm3, %rax
+; AVX-NEXT: xorq %rcx, %rax
+; AVX-NEXT: vcvttss2si %xmm0, %rcx
+; AVX-NEXT: vucomiss %xmm1, %xmm0
+; AVX-NEXT: cmovaeq %rax, %rcx
+; AVX-NEXT: vmovq %rcx, %xmm0
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX-NEXT: retq
+ %shuf = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1>
+ %cvt = fptoui <2 x float> %shuf to <2 x i64>
+ ret <2 x i64> %cvt
+}
+
+define <8 x i32> @fptoui_8vf32(<8 x float> %a) {
+; SSE2-LABEL: fptoui_8vf32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps %xmm0, %xmm2
+; SSE2-NEXT: {{.*#+}} kill: XMM0<def> XMM2<kill>
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; SSE2-NEXT: cvttss2si %xmm0, %rax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: movaps %xmm2, %xmm3
+; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE2-NEXT: cvttss2si %xmm3, %rax
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
+; SSE2-NEXT: cvttss2si %xmm2, %rax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1,0]
+; SSE2-NEXT: cvttss2si %xmm2, %rax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE2-NEXT: movaps %xmm1, %xmm2
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
+; SSE2-NEXT: cvttss2si %xmm2, %rax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: movaps %xmm1, %xmm3
+; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE2-NEXT: cvttss2si %xmm3, %rax
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; SSE2-NEXT: cvttss2si %xmm1, %rax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; SSE2-NEXT: cvttss2si %xmm1, %rax
+; SSE2-NEXT: movd %eax, %xmm1
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE2-NEXT: movdqa %xmm2, %xmm1
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptoui_8vf32:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; AVX-NEXT: vcvttss2si %xmm2, %rax
+; AVX-NEXT: vcvttss2si %xmm1, %rcx
+; AVX-NEXT: vmovd %ecx, %xmm2
+; AVX-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0]
+; AVX-NEXT: vcvttss2si %xmm3, %rax
+; AVX-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; AVX-NEXT: vcvttss2si %xmm1, %rax
+; AVX-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1
+; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; AVX-NEXT: vcvttss2si %xmm2, %rax
+; AVX-NEXT: vcvttss2si %xmm0, %rcx
+; AVX-NEXT: vmovd %ecx, %xmm2
+; AVX-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
+; AVX-NEXT: vcvttss2si %xmm3, %rax
+; AVX-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX-NEXT: vcvttss2si %xmm0, %rax
+; AVX-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+ %cvt = fptoui <8 x float> %a to <8 x i32>
+ ret <8 x i32> %cvt
+}
+
+define <4 x i64> @fptoui_8vf32_i64(<8 x float> %a) {
+; SSE2-LABEL: fptoui_8vf32_i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT: movaps %xmm0, %xmm2
+; SSE2-NEXT: subss %xmm1, %xmm2
+; SSE2-NEXT: cvttss2si %xmm2, %rcx
+; SSE2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; SSE2-NEXT: xorq %rax, %rcx
+; SSE2-NEXT: cvttss2si %xmm0, %rdx
+; SSE2-NEXT: ucomiss %xmm1, %xmm0
+; SSE2-NEXT: cmovaeq %rcx, %rdx
+; SSE2-NEXT: movd %rdx, %xmm2
+; SSE2-NEXT: movaps %xmm0, %xmm3
+; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE2-NEXT: movaps %xmm3, %xmm4
+; SSE2-NEXT: subss %xmm1, %xmm4
+; SSE2-NEXT: cvttss2si %xmm4, %rcx
+; SSE2-NEXT: xorq %rax, %rcx
+; SSE2-NEXT: cvttss2si %xmm3, %rdx
+; SSE2-NEXT: ucomiss %xmm1, %xmm3
+; SSE2-NEXT: cmovaeq %rcx, %rdx
+; SSE2-NEXT: movd %rdx, %xmm3
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE2-NEXT: movaps %xmm0, %xmm3
+; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
+; SSE2-NEXT: movaps %xmm3, %xmm4
+; SSE2-NEXT: subss %xmm1, %xmm4
+; SSE2-NEXT: cvttss2si %xmm4, %rcx
+; SSE2-NEXT: xorq %rax, %rcx
+; SSE2-NEXT: cvttss2si %xmm3, %rdx
+; SSE2-NEXT: ucomiss %xmm1, %xmm3
+; SSE2-NEXT: cmovaeq %rcx, %rdx
+; SSE2-NEXT: movd %rdx, %xmm3
+; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE2-NEXT: movapd %xmm0, %xmm4
+; SSE2-NEXT: subss %xmm1, %xmm4
+; SSE2-NEXT: cvttss2si %xmm4, %rcx
+; SSE2-NEXT: xorq %rax, %rcx
+; SSE2-NEXT: cvttss2si %xmm0, %rax
+; SSE2-NEXT: ucomiss %xmm1, %xmm0
+; SSE2-NEXT: cmovaeq %rcx, %rax
+; SSE2-NEXT: movd %rax, %xmm1
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptoui_8vf32_i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vsubss %xmm1, %xmm2, %xmm3
+; AVX-NEXT: vcvttss2si %xmm3, %rax
+; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; AVX-NEXT: xorq %rcx, %rax
+; AVX-NEXT: vcvttss2si %xmm2, %rdx
+; AVX-NEXT: vucomiss %xmm1, %xmm2
+; AVX-NEXT: cmovaeq %rax, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm2
+; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
+; AVX-NEXT: vsubss %xmm1, %xmm3, %xmm4
+; AVX-NEXT: vcvttss2si %xmm4, %rax
+; AVX-NEXT: xorq %rcx, %rax
+; AVX-NEXT: vcvttss2si %xmm3, %rdx
+; AVX-NEXT: vucomiss %xmm1, %xmm3
+; AVX-NEXT: cmovaeq %rax, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm3
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm3
+; AVX-NEXT: vcvttss2si %xmm3, %rax
+; AVX-NEXT: xorq %rcx, %rax
+; AVX-NEXT: vcvttss2si %xmm0, %rdx
+; AVX-NEXT: vucomiss %xmm1, %xmm0
+; AVX-NEXT: cmovaeq %rax, %rdx
+; AVX-NEXT: vmovq %rdx, %xmm3
+; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm4
+; AVX-NEXT: vcvttss2si %xmm4, %rax
+; AVX-NEXT: xorq %rcx, %rax
+; AVX-NEXT: vcvttss2si %xmm0, %rcx
+; AVX-NEXT: vucomiss %xmm1, %xmm0
+; AVX-NEXT: cmovaeq %rax, %rcx
+; AVX-NEXT: vmovq %rcx, %xmm0
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
+; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-NEXT: retq
+ %shuf = shufflevector <8 x float> %a, <8 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %cvt = fptoui <4 x float> %shuf to <4 x i64>
+ ret <4 x i64> %cvt
+}
+
+;
+; Constant Folding
+;
+
+define <2 x i64> @fptosi_2vf64c() {
+; SSE2-LABEL: fptosi_2vf64c:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptosi_2vf64c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,18446744073709551615]
+; AVX-NEXT: retq
+ %cvt = fptosi <2 x double> <double 1.0, double -1.0> to <2 x i64>
+ ret <2 x i64> %cvt
+}
+
+define <4 x i32> @fptosi_2vf64c_i32() {
+; SSE2-LABEL: fptosi_2vf64c_i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps {{.*#+}} xmm0 = <4294967295,1,u,u>
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptosi_2vf64c_i32:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <4294967295,1,u,u>
+; AVX-NEXT: retq
+ %cvt = fptosi <2 x double> <double -1.0, double 1.0> to <2 x i32>
+ %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ ret <4 x i32> %ext
+}
+
+define <4 x i64> @fptosi_4vf64c() {
+; SSE2-LABEL: fptosi_4vf64c:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
+; SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,18446744073709551613]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptosi_4vf64c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613]
+; AVX-NEXT: retq
+ %cvt = fptosi <4 x double> <double 1.0, double -1.0, double 2.0, double -3.0> to <4 x i64>
+ ret <4 x i64> %cvt
+}
+
+define <4 x i32> @fptosi_4vf64c_i32() {
+; SSE2-LABEL: fptosi_4vf64c_i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptosi_4vf64c_i32:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
+; AVX-NEXT: retq
+ %cvt = fptosi <4 x double> <double -1.0, double 1.0, double -2.0, double 3.0> to <4 x i32>
+ ret <4 x i32> %cvt
+}
+
+define <2 x i64> @fptoui_2vf64c() {
+; SSE2-LABEL: fptoui_2vf64c:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps {{.*#+}} xmm0 = [2,4]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptoui_2vf64c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4]
+; AVX-NEXT: retq
+ %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i64>
+ ret <2 x i64> %cvt
+}
+
+define <4 x i32> @fptoui_2vf64c_i32(<2 x double> %a) {
+; SSE2-LABEL: fptoui_2vf64c_i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps {{.*#+}} xmm0 = <2,4,u,u>
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptoui_2vf64c_i32:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <2,4,u,u>
+; AVX-NEXT: retq
+ %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i32>
+ %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ ret <4 x i32> %ext
+}
+
+define <4 x i64> @fptoui_4vf64c(<4 x double> %a) {
+; SSE2-LABEL: fptoui_4vf64c:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps {{.*#+}} xmm0 = [2,4]
+; SSE2-NEXT: movaps {{.*#+}} xmm1 = [6,8]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptoui_4vf64c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [2,4,6,8]
+; AVX-NEXT: retq
+ %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i64>
+ ret <4 x i64> %cvt
+}
+
+define <4 x i32> @fptoui_4vf64c_i32(<4 x double> %a) {
+; SSE2-LABEL: fptoui_4vf64c_i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps {{.*#+}} xmm0 = [2,4,6,8]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptoui_4vf64c_i32:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4,6,8]
+; AVX-NEXT: retq
+ %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i32>
+ ret <4 x i32> %cvt
+}
+
+define <4 x i32> @fptosi_4vf32c() {
+; SSE2-LABEL: fptosi_4vf32c:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptosi_4vf32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3]
+; AVX-NEXT: retq
+ %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i32>
+ ret <4 x i32> %cvt
+}
+
+define <4 x i64> @fptosi_4vf32c_i64() {
+; SSE2-LABEL: fptosi_4vf32c_i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615]
+; SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,3]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptosi_4vf32c_i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3]
+; AVX-NEXT: retq
+ %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i64>
+ ret <4 x i64> %cvt
+}
+
+define <8 x i32> @fptosi_8vf32c(<8 x float> %a) {
+; SSE2-LABEL: fptosi_8vf32c:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
+; SSE2-NEXT: movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptosi_8vf32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295]
+; AVX-NEXT: retq
+ %cvt = fptosi <8 x float> <float 1.0, float -1.0, float 2.0, float 3.0, float 6.0, float -8.0, float 2.0, float -1.0> to <8 x i32>
+ ret <8 x i32> %cvt
+}
+
+define <4 x i32> @fptoui_4vf32c(<4 x float> %a) {
+; SSE2-LABEL: fptoui_4vf32c:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptoui_4vf32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,2,4,6]
+; AVX-NEXT: retq
+ %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 6.0> to <4 x i32>
+ ret <4 x i32> %cvt
+}
+
+define <4 x i64> @fptoui_4vf32c_i64() {
+; SSE2-LABEL: fptoui_4vf32c_i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,2]
+; SSE2-NEXT: movaps {{.*#+}} xmm1 = [4,8]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptoui_4vf32c_i64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8]
+; AVX-NEXT: retq
+ %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 8.0> to <4 x i64>
+ ret <4 x i64> %cvt
+}
+
+define <8 x i32> @fptoui_8vf32c(<8 x float> %a) {
+; SSE2-LABEL: fptoui_8vf32c:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6]
+; SSE2-NEXT: movaps {{.*#+}} xmm1 = [8,6,4,1]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: fptoui_8vf32c:
+; AVX: # BB#0:
+; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1]
+; AVX-NEXT: retq
+ %cvt = fptoui <8 x float> <float 1.0, float 2.0, float 4.0, float 6.0, float 8.0, float 6.0, float 4.0, float 1.0> to <8 x i32>
+ ret <8 x i32> %cvt
+}
diff --git a/test/CodeGen/X86/vec_fpext.ll b/test/CodeGen/X86/vec_fpext.ll
index b882a5e272b5..bb5409b91ee4 100644
--- a/test/CodeGen/X86/vec_fpext.ll
+++ b/test/CodeGen/X86/vec_fpext.ll
@@ -8,7 +8,7 @@ define void @fpext_frommem(<2 x float>* %in, <2 x double>* %out) {
entry:
; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
; AVX: vcvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
- %0 = load <2 x float>* %in, align 8
+ %0 = load <2 x float>, <2 x float>* %in, align 8
%1 = fpext <2 x float> %0 to <2 x double>
store <2 x double> %1, <2 x double>* %out, align 1
ret void
@@ -21,7 +21,7 @@ entry:
; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
; CHECK: cvtps2pd 8(%{{.+}}), %xmm{{[0-9]+}}
; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
- %0 = load <4 x float>* %in
+ %0 = load <4 x float>, <4 x float>* %in
%1 = fpext <4 x float> %0 to <4 x double>
store <4 x double> %1, <4 x double>* %out, align 1
ret void
@@ -37,8 +37,20 @@ entry:
; CHECK: cvtps2pd 24(%{{.+}}), %xmm{{[0-9]+}}
; AVX: vcvtps2pd 16(%{{.+}}), %ymm{{[0-9]+}}
; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
- %0 = load <8 x float>* %in
+ %0 = load <8 x float>, <8 x float>* %in
%1 = fpext <8 x float> %0 to <8 x double>
store <8 x double> %1, <8 x double>* %out, align 1
ret void
}
+
+define <2 x double> @fpext_fromconst() {
+; CHECK-LABEL: fpext_fromconst:
+; AVX-LABEL: fpext_fromconst:
+entry:
+; CHECK: movaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00]
+; AVX: vmovaps {{.*#+}} xmm0 = [1.000000e+00,-2.000000e+00]
+ %0 = insertelement <2 x float> undef, float 1.0, i32 0
+ %1 = insertelement <2 x float> %0, float -2.0, i32 1
+ %2 = fpext <2 x float> %1 to <2 x double>
+ ret <2 x double> %2
+}
diff --git a/test/CodeGen/X86/vec_i64.ll b/test/CodeGen/X86/vec_i64.ll
index 462e16e13023..48ca1ff021d9 100644
--- a/test/CodeGen/X86/vec_i64.ll
+++ b/test/CodeGen/X86/vec_i64.ll
@@ -5,7 +5,7 @@
define <2 x i64> @foo1(i64* %y) nounwind {
entry:
- %tmp1 = load i64* %y, align 8 ; <i64> [#uses=1]
+ %tmp1 = load i64, i64* %y, align 8 ; <i64> [#uses=1]
%s2v = insertelement <2 x i64> undef, i64 %tmp1, i32 0
%loadl = shufflevector <2 x i64> zeroinitializer, <2 x i64> %s2v, <2 x i32> <i32 2, i32 1>
ret <2 x i64> %loadl
@@ -14,7 +14,7 @@ entry:
define <4 x float> @foo2(i64* %p) nounwind {
entry:
- %load = load i64* %p
+ %load = load i64, i64* %p
%s2v = insertelement <2 x i64> undef, i64 %load, i32 0
%loadl = shufflevector <2 x i64> zeroinitializer, <2 x i64> %s2v, <2 x i32> <i32 2, i32 1>
%0 = bitcast <2 x i64> %loadl to <4 x float>
diff --git a/test/CodeGen/X86/vec_ins_extract.ll b/test/CodeGen/X86/vec_ins_extract.ll
index daf222e395bf..e92f46dbabb5 100644
--- a/test/CodeGen/X86/vec_ins_extract.ll
+++ b/test/CodeGen/X86/vec_ins_extract.ll
@@ -7,7 +7,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
define void @test(<4 x float>* %F, float %f) {
entry:
- %tmp = load <4 x float>* %F ; <<4 x float>> [#uses=2]
+ %tmp = load <4 x float>, <4 x float>* %F ; <<4 x float>> [#uses=2]
%tmp3 = fadd <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
%tmp10 = insertelement <4 x float> %tmp3, float %f, i32 0 ; <<4 x float>> [#uses=2]
%tmp6 = fadd <4 x float> %tmp10, %tmp10 ; <<4 x float>> [#uses=1]
@@ -18,12 +18,12 @@ entry:
define void @test2(<4 x float>* %F, float %f) {
entry:
%G = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
- %tmp = load <4 x float>* %F ; <<4 x float>> [#uses=2]
+ %tmp = load <4 x float>, <4 x float>* %F ; <<4 x float>> [#uses=2]
%tmp3 = fadd <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
store <4 x float> %tmp3, <4 x float>* %G
- %tmp.upgrd.1 = getelementptr <4 x float>* %G, i32 0, i32 2 ; <float*> [#uses=1]
+ %tmp.upgrd.1 = getelementptr <4 x float>, <4 x float>* %G, i32 0, i32 2 ; <float*> [#uses=1]
store float %f, float* %tmp.upgrd.1
- %tmp4 = load <4 x float>* %G ; <<4 x float>> [#uses=2]
+ %tmp4 = load <4 x float>, <4 x float>* %G ; <<4 x float>> [#uses=2]
%tmp6 = fadd <4 x float> %tmp4, %tmp4 ; <<4 x float>> [#uses=1]
store <4 x float> %tmp6, <4 x float>* %F
ret void
@@ -32,18 +32,18 @@ entry:
define void @test3(<4 x float>* %F, float* %f) {
entry:
%G = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
- %tmp = load <4 x float>* %F ; <<4 x float>> [#uses=2]
+ %tmp = load <4 x float>, <4 x float>* %F ; <<4 x float>> [#uses=2]
%tmp3 = fadd <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
store <4 x float> %tmp3, <4 x float>* %G
- %tmp.upgrd.2 = getelementptr <4 x float>* %G, i32 0, i32 2 ; <float*> [#uses=1]
- %tmp.upgrd.3 = load float* %tmp.upgrd.2 ; <float> [#uses=1]
+ %tmp.upgrd.2 = getelementptr <4 x float>, <4 x float>* %G, i32 0, i32 2 ; <float*> [#uses=1]
+ %tmp.upgrd.3 = load float, float* %tmp.upgrd.2 ; <float> [#uses=1]
store float %tmp.upgrd.3, float* %f
ret void
}
define void @test4(<4 x float>* %F, float* %f) {
entry:
- %tmp = load <4 x float>* %F ; <<4 x float>> [#uses=2]
+ %tmp = load <4 x float>, <4 x float>* %F ; <<4 x float>> [#uses=2]
%tmp5.lhs = extractelement <4 x float> %tmp, i32 0 ; <float> [#uses=1]
%tmp5.rhs = extractelement <4 x float> %tmp, i32 0 ; <float> [#uses=1]
%tmp5 = fadd float %tmp5.lhs, %tmp5.rhs ; <float> [#uses=1]
diff --git a/test/CodeGen/X86/vec_insert-3.ll b/test/CodeGen/X86/vec_insert-3.ll
index a8713398e955..75244ae0b71a 100644
--- a/test/CodeGen/X86/vec_insert-3.ll
+++ b/test/CodeGen/X86/vec_insert-3.ll
@@ -1,6 +1,10 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse4.1 | grep punpcklqdq | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse4.1 | FileCheck %s
define <2 x i64> @t1(i64 %s, <2 x i64> %tmp) nounwind {
- %tmp1 = insertelement <2 x i64> %tmp, i64 %s, i32 1
- ret <2 x i64> %tmp1
+; CHECK-LABEL: t1:
+; CHECK: punpcklqdq
+; CHECK-NEXT: retq
+
+ %tmp1 = insertelement <2 x i64> %tmp, i64 %s, i32 1
+ ret <2 x i64> %tmp1
}
diff --git a/test/CodeGen/X86/vec_insert-5.ll b/test/CodeGen/X86/vec_insert-5.ll
index b72044aee30b..4018a21090e7 100644
--- a/test/CodeGen/X86/vec_insert-5.ll
+++ b/test/CodeGen/X86/vec_insert-5.ll
@@ -8,8 +8,8 @@ define void @t1(i32 %a, x86_mmx* %P) nounwind {
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: shll $12, %ecx
; CHECK-NEXT: movd %ecx, %xmm0
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,1]
-; CHECK-NEXT: movlpd %xmm0, (%eax)
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; CHECK-NEXT: movq %xmm0, (%eax)
; CHECK-NEXT: retl
%tmp12 = shl i32 %a, 12
%tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1
@@ -25,10 +25,10 @@ define <4 x float> @t2(<4 x float>* %P) nounwind {
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movaps (%eax), %xmm1
; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
-; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
+; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
; CHECK-NEXT: retl
- %tmp1 = load <4 x float>* %P
+ %tmp1 = load <4 x float>, <4 x float>* %P
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 4, i32 4, i32 0 >
ret <4 x float> %tmp2
}
@@ -37,11 +37,11 @@ define <4 x float> @t3(<4 x float>* %P) nounwind {
; CHECK-LABEL: t3:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movaps (%eax), %xmm0
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,0]
+; CHECK-NEXT: movapd (%eax), %xmm0
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; CHECK-NEXT: retl
- %tmp1 = load <4 x float>* %P
+ %tmp1 = load <4 x float>, <4 x float>* %P
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 >
ret <4 x float> %tmp2
}
@@ -52,10 +52,10 @@ define <4 x float> @t4(<4 x float>* %P) nounwind {
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movaps (%eax), %xmm0
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0]
-; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0]
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
; CHECK-NEXT: retl
- %tmp1 = load <4 x float>* %P
+ %tmp1 = load <4 x float>, <4 x float>* %P
%tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >
ret <4 x float> %tmp2
}
@@ -63,7 +63,7 @@ define <4 x float> @t4(<4 x float>* %P) nounwind {
define <16 x i8> @t5(<16 x i8> %x) nounwind {
; CHECK-LABEL: t5:
; CHECK: # BB#0:
-; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
+; CHECK-NEXT: psrlw $8, %xmm0
; CHECK-NEXT: retl
%s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
ret <16 x i8> %s
@@ -72,7 +72,7 @@ define <16 x i8> @t5(<16 x i8> %x) nounwind {
define <16 x i8> @t6(<16 x i8> %x) nounwind {
; CHECK-LABEL: t6:
; CHECK: # BB#0:
-; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
+; CHECK-NEXT: psrlw $8, %xmm0
; CHECK-NEXT: retl
%s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i8> %s
@@ -86,3 +86,21 @@ define <16 x i8> @t7(<16 x i8> %x) nounwind {
%s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2>
ret <16 x i8> %s
}
+
+define <16 x i8> @t8(<16 x i8> %x) nounwind {
+; CHECK-LABEL: t8:
+; CHECK: # BB#0:
+; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
+; CHECK-NEXT: retl
+ %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
+ ret <16 x i8> %s
+}
+
+define <16 x i8> @t9(<16 x i8> %x) nounwind {
+; CHECK-LABEL: t9:
+; CHECK: # BB#0:
+; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
+; CHECK-NEXT: retl
+ %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 undef, i32 undef>
+ ret <16 x i8> %s
+}
diff --git a/test/CodeGen/X86/vec_insert-mmx.ll b/test/CodeGen/X86/vec_insert-mmx.ll
new file mode 100644
index 000000000000..cbd420885ac1
--- /dev/null
+++ b/test/CodeGen/X86/vec_insert-mmx.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -mtriple=i686-darwin -mattr=+mmx,+sse2 | FileCheck %s -check-prefix=X86-32
+; RUN: llc < %s -mtriple=x86_64-darwin -mattr=+mmx,+sse4.1 | FileCheck %s -check-prefix=X86-64
+
+; This is not an MMX operation; promoted to XMM.
+define x86_mmx @t0(i32 %A) nounwind {
+; X86-32-LABEL: t0:
+; X86-32: ## BB#0:
+; X86-32: movd {{[0-9]+}}(%esp), %xmm0
+; X86-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; X86-32-NEXT: movq %xmm0, (%esp)
+; X86-32-NEXT: movq (%esp), %mm0
+; X86-32-NEXT: addl $12, %esp
+; X86-32-NEXT: retl
+ %tmp3 = insertelement <2 x i32> < i32 0, i32 undef >, i32 %A, i32 1
+ %tmp4 = bitcast <2 x i32> %tmp3 to x86_mmx
+ ret x86_mmx %tmp4
+}
+
+define <8 x i8> @t1(i8 zeroext %x) nounwind {
+; X86-32-LABEL: t1:
+; X86-32: ## BB#0:
+; X86-32-NOT: movl
+; X86-32-NEXT: movd {{[0-9]+}}(%esp), %xmm0
+; X86-32-NEXT: retl
+ %r = insertelement <8 x i8> undef, i8 %x, i32 0
+ ret <8 x i8> %r
+}
+
+; PR2574
+define <2 x float> @t2(<2 x float> %a0) {
+; X86-32-LABEL: t2:
+; X86-32: ## BB#0:
+; X86-32-NEXT: xorps %xmm0, %xmm0
+; X86-32-NEXT: retl
+ %v1 = insertelement <2 x float> %a0, float 0.000000e+00, i32 0
+ %v2 = insertelement <2 x float> %v1, float 0.000000e+00, i32 1
+ ret <2 x float> %v2
+}
+
+@g0 = external global i16
+@g1 = external global <4 x i16>
+
+; PR2562
+define void @t3() {
+; X86-64-LABEL: t3:
+; X86-64: ## BB#0:
+; X86-64: pmovzxwd (%rcx)
+; X86-64-NEXT: movzwl
+; X86-64-NEXT: pinsrd $0
+; X86-64-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; X86-64-NEXT: movq %xmm0
+; X86-64-NEXT: retq
+ load i16, i16* @g0
+ load <4 x i16>, <4 x i16>* @g1
+ insertelement <4 x i16> %2, i16 %1, i32 0
+ store <4 x i16> %3, <4 x i16>* @g1
+ ret void
+}
diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll
new file mode 100644
index 000000000000..5052ff51092e
--- /dev/null
+++ b/test/CodeGen/X86/vec_int_to_fp.ll
@@ -0,0 +1,714 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+
+;
+; Signed Integer to Double
+;
+
+define <2 x double> @sitofp_2vf64(<2 x i64> %a) {
+; SSE2-LABEL: sitofp_2vf64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: sitofp_2vf64:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
+ %cvt = sitofp <2 x i64> %a to <2 x double>
+ ret <2 x double> %cvt
+}
+
+define <2 x double> @sitofp_2vf64_i32(<4 x i32> %a) {
+; SSE2-LABEL: sitofp_2vf64_i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT: movd %xmm1, %rax
+; SSE2-NEXT: cltq
+; SSE2-NEXT: movd %xmm0, %rcx
+; SSE2-NEXT: movslq %ecx, %rcx
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2sdq %rcx, %xmm0
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: sitofp_2vf64_i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: cltq
+; AVX-NEXT: vpextrq $1, %xmm0, %rcx
+; AVX-NEXT: movslq %ecx, %rcx
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vcvtsi2sdq %rcx, %xmm0, %xmm0
+; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: retq
+ %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %cvt = sitofp <2 x i32> %shuf to <2 x double>
+ ret <2 x double> %cvt
+}
+
+define <4 x double> @sitofp_4vf64(<4 x i64> %a) {
+; SSE2-LABEL: sitofp_4vf64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: cvtsi2sdq %rax, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE2-NEXT: movd %xmm1, %rax
+; SSE2-NEXT: cvtsi2sdq %rax, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
+; SSE2-NEXT: movapd %xmm2, %xmm0
+; SSE2-NEXT: movapd %xmm3, %xmm1
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: sitofp_4vf64:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT: vpextrq $1, %xmm1, %rax
+; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2
+; AVX-NEXT: vmovq %xmm1, %rax
+; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+ %cvt = sitofp <4 x i64> %a to <4 x double>
+ ret <4 x double> %cvt
+}
+
+define <4 x double> @sitofp_4vf64_i32(<4 x i32> %a) {
+; SSE2-LABEL: sitofp_4vf64_i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
+; SSE2-NEXT: movd %xmm1, %rax
+; SSE2-NEXT: cltq
+; SSE2-NEXT: cvtsi2sdq %rax, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE2-NEXT: movd %xmm1, %rax
+; SSE2-NEXT: cltq
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: cltq
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: cvtsi2sdq %rax, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: cltq
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2sdq %rax, %xmm0
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT: movapd %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: sitofp_4vf64_i32:
+; AVX: # BB#0:
+; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX-NEXT: retq
+ %cvt = sitofp <4 x i32> %a to <4 x double>
+ ret <4 x double> %cvt
+}
+
+;
+; Unsigned Integer to Double
+;
+
+define <2 x double> @uitofp_2vf64(<2 x i64> %a) {
+; SSE2-LABEL: uitofp_2vf64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
+; SSE2-NEXT: subpd %xmm3, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
+; SSE2-NEXT: addpd %xmm4, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: subpd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
+; SSE2-NEXT: addpd %xmm2, %xmm1
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: uitofp_2vf64:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
+; AVX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
+; AVX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vhaddpd %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX-NEXT: vsubpd %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX-NEXT: retq
+ %cvt = uitofp <2 x i64> %a to <2 x double>
+ ret <2 x double> %cvt
+}
+
+define <2 x double> @uitofp_2vf64_i32(<4 x i32> %a) {
+; SSE2-LABEL: uitofp_2vf64_i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
+; SSE2-NEXT: subpd %xmm3, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
+; SSE2-NEXT: addpd %xmm4, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: subpd %xmm3, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
+; SSE2-NEXT: addpd %xmm2, %xmm1
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: uitofp_2vf64_i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
+; AVX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
+; AVX-NEXT: vsubpd %xmm3, %xmm2, %xmm2
+; AVX-NEXT: vhaddpd %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX-NEXT: vsubpd %xmm3, %xmm0, %xmm0
+; AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX-NEXT: retq
+ %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %cvt = uitofp <2 x i32> %shuf to <2 x double>
+ ret <2 x double> %cvt
+}
+
+define <4 x double> @uitofp_4vf64(<4 x i64> %a) {
+; SSE2-LABEL: uitofp_4vf64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
+; SSE2-NEXT: subpd %xmm4, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
+; SSE2-NEXT: addpd %xmm5, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; SSE2-NEXT: subpd %xmm4, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[2,3,0,1]
+; SSE2-NEXT: addpd %xmm3, %xmm5
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE2-NEXT: subpd %xmm4, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
+; SSE2-NEXT: addpd %xmm5, %xmm1
+; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; SSE2-NEXT: subpd %xmm4, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
+; SSE2-NEXT: addpd %xmm3, %xmm2
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: uitofp_4vf64:
+; AVX: # BB#0:
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
+; AVX-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; AVX-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
+; AVX-NEXT: vsubpd %xmm4, %xmm3, %xmm3
+; AVX-NEXT: vhaddpd %xmm3, %xmm3, %xmm3
+; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; AVX-NEXT: vsubpd %xmm4, %xmm1, %xmm1
+; AVX-NEXT: vhaddpd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm1[0]
+; AVX-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; AVX-NEXT: vsubpd %xmm4, %xmm3, %xmm3
+; AVX-NEXT: vhaddpd %xmm3, %xmm3, %xmm3
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; AVX-NEXT: vsubpd %xmm4, %xmm0, %xmm0
+; AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm3[0],xmm0[0]
+; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+ %cvt = uitofp <4 x i64> %a to <4 x double>
+ ret <4 x double> %cvt
+}
+
+define <4 x double> @uitofp_4vf64_i32(<4 x i32> %a) {
+; SSE2-LABEL: uitofp_4vf64_i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1127219200,1160773632,0,0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE2-NEXT: movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
+; SSE2-NEXT: subpd %xmm4, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
+; SSE2-NEXT: addpd %xmm5, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; SSE2-NEXT: subpd %xmm4, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
+; SSE2-NEXT: addpd %xmm1, %xmm5
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
+; SSE2-NEXT: pand .LCPI7_2(%rip), %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm2[2,3,0,1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE2-NEXT: subpd %xmm4, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
+; SSE2-NEXT: addpd %xmm2, %xmm1
+; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
+; SSE2-NEXT: subpd %xmm4, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,0,1]
+; SSE2-NEXT: addpd %xmm5, %xmm2
+; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: uitofp_4vf64_i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpand .LCPI7_0(%rip), %xmm0, %xmm1
+; AVX-NEXT: vcvtdq2pd %xmm1, %ymm1
+; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
+; AVX-NEXT: vmulpd .LCPI7_1(%rip), %ymm0, %ymm0
+; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+ %cvt = uitofp <4 x i32> %a to <4 x double>
+ ret <4 x double> %cvt
+}
+
+;
+; Signed Integer to Float
+;
+
+define <4 x float> @sitofp_4vf32(<4 x i32> %a) {
+; SSE2-LABEL: sitofp_4vf32:
+; SSE2: # BB#0:
+; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: sitofp_4vf32:
+; AVX: # BB#0:
+; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
+; AVX-NEXT: retq
+ %cvt = sitofp <4 x i32> %a to <4 x float>
+ ret <4 x float> %cvt
+}
+
+define <4 x float> @sitofp_4vf32_i64(<2 x i64> %a) {
+; SSE2-LABEL: sitofp_4vf32_i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: sitofp_4vf32_i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; AVX-NEXT: retq
+ %cvt = sitofp <2 x i64> %a to <2 x float>
+ %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ ret <4 x float> %ext
+}
+
+define <8 x float> @sitofp_8vf32(<8 x i32> %a) {
+; SSE2-LABEL: sitofp_8vf32:
+; SSE2: # BB#0:
+; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
+; SSE2-NEXT: cvtdq2ps %xmm1, %xmm1
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: sitofp_8vf32:
+; AVX: # BB#0:
+; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX-NEXT: retq
+ %cvt = sitofp <8 x i32> %a to <8 x float>
+ ret <8 x float> %cvt
+}
+
+define <4 x float> @sitofp_4vf32_4i64(<4 x i64> %a) {
+; SSE2-LABEL: sitofp_4vf32_4i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movd %xmm1, %rax
+; SSE2-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE2-NEXT: movd %xmm1, %rax
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: sitofp_4vf32_4i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
+; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
+; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %cvt = sitofp <4 x i64> %a to <4 x float>
+ ret <4 x float> %cvt
+}
+
+;
+; Unsigned Integer to Float
+;
+
+define <4 x float> @uitofp_4vf32(<4 x i32> %a) {
+; SSE2-LABEL: uitofp_4vf32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
+; SSE2-NEXT: pand %xmm0, %xmm1
+; SSE2-NEXT: por .LCPI12_1(%rip), %xmm1
+; SSE2-NEXT: psrld $16, %xmm0
+; SSE2-NEXT: por .LCPI12_2(%rip), %xmm0
+; SSE2-NEXT: addps .LCPI12_3(%rip), %xmm0
+; SSE2-NEXT: addps %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: uitofp_4vf32:
+; AVX: # BB#0:
+; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
+; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
+; AVX-NEXT: vaddps .LCPI12_2(%rip), %xmm0, %xmm0
+; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %cvt = uitofp <4 x i32> %a to <4 x float>
+ ret <4 x float> %cvt
+}
+
+define <4 x float> @uitofp_4vf32_i64(<2 x i64> %a) {
+; SSE2-LABEL: uitofp_4vf32_i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: movd %xmm1, %rax
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: testq %rax, %rax
+; SSE2-NEXT: js .LBB13_1
+; SSE2-NEXT: # BB#2:
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE2-NEXT: jmp .LBB13_3
+; SSE2-NEXT: .LBB13_1:
+; SSE2-NEXT: shrq %rax
+; SSE2-NEXT: orq %rax, %rcx
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ssq %rcx, %xmm0
+; SSE2-NEXT: addss %xmm0, %xmm0
+; SSE2-NEXT: .LBB13_3:
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE2-NEXT: movd %xmm1, %rax
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: testq %rax, %rax
+; SSE2-NEXT: js .LBB13_4
+; SSE2-NEXT: # BB#5:
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: retq
+; SSE2-NEXT: .LBB13_4:
+; SSE2-NEXT: shrq %rax
+; SSE2-NEXT: orq %rax, %rcx
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: cvtsi2ssq %rcx, %xmm1
+; SSE2-NEXT: addss %xmm1, %xmm1
+; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: uitofp_4vf32_i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: movl %eax, %ecx
+; AVX-NEXT: andl $1, %ecx
+; AVX-NEXT: testq %rax, %rax
+; AVX-NEXT: js .LBB13_1
+; AVX-NEXT: # BB#2:
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
+; AVX-NEXT: jmp .LBB13_3
+; AVX-NEXT: .LBB13_1:
+; AVX-NEXT: shrq %rax
+; AVX-NEXT: orq %rax, %rcx
+; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm1
+; AVX-NEXT: vaddss %xmm1, %xmm1, %xmm1
+; AVX-NEXT: .LBB13_3:
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: movl %eax, %ecx
+; AVX-NEXT: andl $1, %ecx
+; AVX-NEXT: testq %rax, %rax
+; AVX-NEXT: js .LBB13_4
+; AVX-NEXT: # BB#5:
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
+; AVX-NEXT: jmp .LBB13_6
+; AVX-NEXT: .LBB13_4:
+; AVX-NEXT: shrq %rax
+; AVX-NEXT: orq %rax, %rcx
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0
+; AVX-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; AVX-NEXT: .LBB13_6:
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: testq %rax, %rax
+; AVX-NEXT: js .LBB13_8
+; AVX-NEXT: # BB#7:
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
+; AVX-NEXT: .LBB13_8:
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; AVX-NEXT: retq
+ %cvt = uitofp <2 x i64> %a to <2 x float>
+ %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ ret <4 x float> %ext
+}
+
+define <8 x float> @uitofp_8vf32(<8 x i32> %a) {
+; SSE2-LABEL: uitofp_8vf32:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
+; SSE2-NEXT: movdqa %xmm0, %xmm3
+; SSE2-NEXT: pand %xmm2, %xmm3
+; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200]
+; SSE2-NEXT: por %xmm4, %xmm3
+; SSE2-NEXT: psrld $16, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
+; SSE2-NEXT: por %xmm5, %xmm0
+; SSE2-NEXT: movaps {{.*#+}} xmm6 = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
+; SSE2-NEXT: addps %xmm6, %xmm0
+; SSE2-NEXT: addps %xmm3, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm2
+; SSE2-NEXT: por %xmm4, %xmm2
+; SSE2-NEXT: psrld $16, %xmm1
+; SSE2-NEXT: por %xmm5, %xmm1
+; SSE2-NEXT: addps %xmm6, %xmm1
+; SSE2-NEXT: addps %xmm2, %xmm1
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: uitofp_8vf32:
+; AVX: # BB#0:
+; AVX-NEXT: vandps .LCPI14_0(%rip), %ymm0, %ymm1
+; AVX-NEXT: vcvtdq2ps %ymm1, %ymm1
+; AVX-NEXT: vpsrld $16, %xmm0, %xmm2
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX-NEXT: vmulps .LCPI14_1(%rip), %ymm0, %ymm0
+; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: retq
+ %cvt = uitofp <8 x i32> %a to <8 x float>
+ ret <8 x float> %cvt
+}
+
+define <4 x float> @uitofp_4vf32_4i64(<4 x i64> %a) {
+; SSE2-LABEL: uitofp_4vf32_4i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movd %xmm1, %rax
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: testq %rax, %rax
+; SSE2-NEXT: js .LBB15_1
+; SSE2-NEXT: # BB#2:
+; SSE2-NEXT: cvtsi2ssq %rax, %xmm3
+; SSE2-NEXT: jmp .LBB15_3
+; SSE2-NEXT: .LBB15_1:
+; SSE2-NEXT: shrq %rax
+; SSE2-NEXT: orq %rax, %rcx
+; SSE2-NEXT: cvtsi2ssq %rcx, %xmm3
+; SSE2-NEXT: addss %xmm3, %xmm3
+; SSE2-NEXT: .LBB15_3:
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: testq %rax, %rax
+; SSE2-NEXT: js .LBB15_4
+; SSE2-NEXT: # BB#5:
+; SSE2-NEXT: cvtsi2ssq %rax, %xmm2
+; SSE2-NEXT: jmp .LBB15_6
+; SSE2-NEXT: .LBB15_4:
+; SSE2-NEXT: shrq %rax
+; SSE2-NEXT: orq %rax, %rcx
+; SSE2-NEXT: cvtsi2ssq %rcx, %xmm2
+; SSE2-NEXT: addss %xmm2, %xmm2
+; SSE2-NEXT: .LBB15_6:
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE2-NEXT: movd %xmm1, %rax
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: testq %rax, %rax
+; SSE2-NEXT: js .LBB15_7
+; SSE2-NEXT: # BB#8:
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: cvtsi2ssq %rax, %xmm1
+; SSE2-NEXT: jmp .LBB15_9
+; SSE2-NEXT: .LBB15_7:
+; SSE2-NEXT: shrq %rax
+; SSE2-NEXT: orq %rax, %rcx
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: cvtsi2ssq %rcx, %xmm1
+; SSE2-NEXT: addss %xmm1, %xmm1
+; SSE2-NEXT: .LBB15_9:
+; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: andl $1, %ecx
+; SSE2-NEXT: testq %rax, %rax
+; SSE2-NEXT: js .LBB15_10
+; SSE2-NEXT: # BB#11:
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ssq %rax, %xmm0
+; SSE2-NEXT: jmp .LBB15_12
+; SSE2-NEXT: .LBB15_10:
+; SSE2-NEXT: shrq %rax
+; SSE2-NEXT: orq %rax, %rcx
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ssq %rcx, %xmm0
+; SSE2-NEXT: addss %xmm0, %xmm0
+; SSE2-NEXT: .LBB15_12:
+; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX-LABEL: uitofp_4vf32_4i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: movl %eax, %ecx
+; AVX-NEXT: andl $1, %ecx
+; AVX-NEXT: testq %rax, %rax
+; AVX-NEXT: js .LBB15_1
+; AVX-NEXT: # BB#2:
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1
+; AVX-NEXT: jmp .LBB15_3
+; AVX-NEXT: .LBB15_1:
+; AVX-NEXT: shrq %rax
+; AVX-NEXT: orq %rax, %rcx
+; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm1
+; AVX-NEXT: vaddss %xmm1, %xmm1, %xmm1
+; AVX-NEXT: .LBB15_3:
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: movl %eax, %ecx
+; AVX-NEXT: andl $1, %ecx
+; AVX-NEXT: testq %rax, %rax
+; AVX-NEXT: js .LBB15_4
+; AVX-NEXT: # BB#5:
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
+; AVX-NEXT: jmp .LBB15_6
+; AVX-NEXT: .LBB15_4:
+; AVX-NEXT: shrq %rax
+; AVX-NEXT: orq %rax, %rcx
+; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm2
+; AVX-NEXT: vaddss %xmm2, %xmm2, %xmm2
+; AVX-NEXT: .LBB15_6:
+; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: movl %eax, %ecx
+; AVX-NEXT: andl $1, %ecx
+; AVX-NEXT: testq %rax, %rax
+; AVX-NEXT: js .LBB15_7
+; AVX-NEXT: # BB#8:
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2
+; AVX-NEXT: jmp .LBB15_9
+; AVX-NEXT: .LBB15_7:
+; AVX-NEXT: shrq %rax
+; AVX-NEXT: orq %rax, %rcx
+; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm2
+; AVX-NEXT: vaddss %xmm2, %xmm2, %xmm2
+; AVX-NEXT: .LBB15_9:
+; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: movl %eax, %ecx
+; AVX-NEXT: andl $1, %ecx
+; AVX-NEXT: testq %rax, %rax
+; AVX-NEXT: js .LBB15_10
+; AVX-NEXT: # BB#11:
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+; AVX-NEXT: .LBB15_10:
+; AVX-NEXT: shrq %rax
+; AVX-NEXT: orq %rax, %rcx
+; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0
+; AVX-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %cvt = uitofp <4 x i64> %a to <4 x float>
+ ret <4 x float> %cvt
+}
diff --git a/test/CodeGen/X86/vec_loadsingles.ll b/test/CodeGen/X86/vec_loadsingles.ll
index fd132a52b8f1..ecae5d962826 100644
--- a/test/CodeGen/X86/vec_loadsingles.ll
+++ b/test/CodeGen/X86/vec_loadsingles.ll
@@ -2,10 +2,10 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+slow-unaligned-mem-32 | FileCheck %s --check-prefix=ALL --check-prefix=SLOW32
define <4 x float> @merge_2_floats(float* nocapture %p) nounwind readonly {
- %tmp1 = load float* %p
+ %tmp1 = load float, float* %p
%vecins = insertelement <4 x float> undef, float %tmp1, i32 0
- %add.ptr = getelementptr float* %p, i32 1
- %tmp5 = load float* %add.ptr
+ %add.ptr = getelementptr float, float* %p, i32 1
+ %tmp5 = load float, float* %add.ptr
%vecins7 = insertelement <4 x float> %vecins, float %tmp5, i32 1
ret <4 x float> %vecins7
@@ -17,13 +17,13 @@ define <4 x float> @merge_2_floats(float* nocapture %p) nounwind readonly {
; Test-case generated due to a crash when trying to treat loading the first
; two i64s of a <4 x i64> as a load of two i32s.
define <4 x i64> @merge_2_floats_into_4() {
- %1 = load i64** undef, align 8
- %2 = getelementptr inbounds i64* %1, i64 0
- %3 = load i64* %2
+ %1 = load i64*, i64** undef, align 8
+ %2 = getelementptr inbounds i64, i64* %1, i64 0
+ %3 = load i64, i64* %2
%4 = insertelement <4 x i64> undef, i64 %3, i32 0
- %5 = load i64** undef, align 8
- %6 = getelementptr inbounds i64* %5, i64 1
- %7 = load i64* %6
+ %5 = load i64*, i64** undef, align 8
+ %6 = getelementptr inbounds i64, i64* %5, i64 1
+ %7 = load i64, i64* %6
%8 = insertelement <4 x i64> %4, i64 %7, i32 1
%9 = shufflevector <4 x i64> %8, <4 x i64> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x i64> %9
@@ -34,16 +34,16 @@ define <4 x i64> @merge_2_floats_into_4() {
}
define <4 x float> @merge_4_floats(float* %ptr) {
- %a = load float* %ptr, align 8
+ %a = load float, float* %ptr, align 8
%vec = insertelement <4 x float> undef, float %a, i32 0
- %idx1 = getelementptr inbounds float* %ptr, i64 1
- %b = load float* %idx1, align 8
+ %idx1 = getelementptr inbounds float, float* %ptr, i64 1
+ %b = load float, float* %idx1, align 8
%vec2 = insertelement <4 x float> %vec, float %b, i32 1
- %idx3 = getelementptr inbounds float* %ptr, i64 2
- %c = load float* %idx3, align 8
+ %idx3 = getelementptr inbounds float, float* %ptr, i64 2
+ %c = load float, float* %idx3, align 8
%vec4 = insertelement <4 x float> %vec2, float %c, i32 2
- %idx5 = getelementptr inbounds float* %ptr, i64 3
- %d = load float* %idx5, align 8
+ %idx5 = getelementptr inbounds float, float* %ptr, i64 3
+ %d = load float, float* %idx5, align 8
%vec6 = insertelement <4 x float> %vec4, float %d, i32 3
ret <4 x float> %vec6
@@ -58,28 +58,28 @@ define <4 x float> @merge_4_floats(float* %ptr) {
; 16-byte loads.
define <8 x float> @merge_8_floats(float* %ptr) {
- %a = load float* %ptr, align 4
+ %a = load float, float* %ptr, align 4
%vec = insertelement <8 x float> undef, float %a, i32 0
- %idx1 = getelementptr inbounds float* %ptr, i64 1
- %b = load float* %idx1, align 4
+ %idx1 = getelementptr inbounds float, float* %ptr, i64 1
+ %b = load float, float* %idx1, align 4
%vec2 = insertelement <8 x float> %vec, float %b, i32 1
- %idx3 = getelementptr inbounds float* %ptr, i64 2
- %c = load float* %idx3, align 4
+ %idx3 = getelementptr inbounds float, float* %ptr, i64 2
+ %c = load float, float* %idx3, align 4
%vec4 = insertelement <8 x float> %vec2, float %c, i32 2
- %idx5 = getelementptr inbounds float* %ptr, i64 3
- %d = load float* %idx5, align 4
+ %idx5 = getelementptr inbounds float, float* %ptr, i64 3
+ %d = load float, float* %idx5, align 4
%vec6 = insertelement <8 x float> %vec4, float %d, i32 3
- %idx7 = getelementptr inbounds float* %ptr, i64 4
- %e = load float* %idx7, align 4
+ %idx7 = getelementptr inbounds float, float* %ptr, i64 4
+ %e = load float, float* %idx7, align 4
%vec8 = insertelement <8 x float> %vec6, float %e, i32 4
- %idx9 = getelementptr inbounds float* %ptr, i64 5
- %f = load float* %idx9, align 4
+ %idx9 = getelementptr inbounds float, float* %ptr, i64 5
+ %f = load float, float* %idx9, align 4
%vec10 = insertelement <8 x float> %vec8, float %f, i32 5
- %idx11 = getelementptr inbounds float* %ptr, i64 6
- %g = load float* %idx11, align 4
+ %idx11 = getelementptr inbounds float, float* %ptr, i64 6
+ %g = load float, float* %idx11, align 4
%vec12 = insertelement <8 x float> %vec10, float %g, i32 6
- %idx13 = getelementptr inbounds float* %ptr, i64 7
- %h = load float* %idx13, align 4
+ %idx13 = getelementptr inbounds float, float* %ptr, i64 7
+ %h = load float, float* %idx13, align 4
%vec14 = insertelement <8 x float> %vec12, float %h, i32 7
ret <8 x float> %vec14
@@ -94,16 +94,16 @@ define <8 x float> @merge_8_floats(float* %ptr) {
}
define <4 x double> @merge_4_doubles(double* %ptr) {
- %a = load double* %ptr, align 8
+ %a = load double, double* %ptr, align 8
%vec = insertelement <4 x double> undef, double %a, i32 0
- %idx1 = getelementptr inbounds double* %ptr, i64 1
- %b = load double* %idx1, align 8
+ %idx1 = getelementptr inbounds double, double* %ptr, i64 1
+ %b = load double, double* %idx1, align 8
%vec2 = insertelement <4 x double> %vec, double %b, i32 1
- %idx3 = getelementptr inbounds double* %ptr, i64 2
- %c = load double* %idx3, align 8
+ %idx3 = getelementptr inbounds double, double* %ptr, i64 2
+ %c = load double, double* %idx3, align 8
%vec4 = insertelement <4 x double> %vec2, double %c, i32 2
- %idx5 = getelementptr inbounds double* %ptr, i64 3
- %d = load double* %idx5, align 8
+ %idx5 = getelementptr inbounds double, double* %ptr, i64 3
+ %d = load double, double* %idx5, align 8
%vec6 = insertelement <4 x double> %vec4, double %d, i32 3
ret <4 x double> %vec6
@@ -120,14 +120,14 @@ define <4 x double> @merge_4_doubles(double* %ptr) {
; Recognize and combine consecutive loads even when the
; first of the combined loads is offset from the base address.
define <4 x double> @merge_4_doubles_offset(double* %ptr) {
- %arrayidx4 = getelementptr inbounds double* %ptr, i64 4
- %arrayidx5 = getelementptr inbounds double* %ptr, i64 5
- %arrayidx6 = getelementptr inbounds double* %ptr, i64 6
- %arrayidx7 = getelementptr inbounds double* %ptr, i64 7
- %e = load double* %arrayidx4, align 8
- %f = load double* %arrayidx5, align 8
- %g = load double* %arrayidx6, align 8
- %h = load double* %arrayidx7, align 8
+ %arrayidx4 = getelementptr inbounds double, double* %ptr, i64 4
+ %arrayidx5 = getelementptr inbounds double, double* %ptr, i64 5
+ %arrayidx6 = getelementptr inbounds double, double* %ptr, i64 6
+ %arrayidx7 = getelementptr inbounds double, double* %ptr, i64 7
+ %e = load double, double* %arrayidx4, align 8
+ %f = load double, double* %arrayidx5, align 8
+ %g = load double, double* %arrayidx6, align 8
+ %h = load double, double* %arrayidx7, align 8
%vecinit4 = insertelement <4 x double> undef, double %e, i32 0
%vecinit5 = insertelement <4 x double> %vecinit4, double %f, i32 1
%vecinit6 = insertelement <4 x double> %vecinit5, double %g, i32 2
diff --git a/test/CodeGen/X86/vec_logical.ll b/test/CodeGen/X86/vec_logical.ll
index 1dc0b163aeb3..6ab2d8963abd 100644
--- a/test/CodeGen/X86/vec_logical.ll
+++ b/test/CodeGen/X86/vec_logical.ll
@@ -29,7 +29,7 @@ entry:
define void @t3(<4 x float> %a, <4 x float> %b, <4 x float>* %c, <4 x float>* %d) {
entry:
- %tmp3 = load <4 x float>* %c ; <<4 x float>> [#uses=1]
+ %tmp3 = load <4 x float>, <4 x float>* %c ; <<4 x float>> [#uses=1]
%tmp11 = bitcast <4 x float> %a to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp12 = bitcast <4 x float> %b to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp13 = xor <4 x i32> %tmp11, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_partial.ll b/test/CodeGen/X86/vec_partial.ll
new file mode 100644
index 000000000000..709f326e5027
--- /dev/null
+++ b/test/CodeGen/X86/vec_partial.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+; PR11580
+define <3 x float> @addf3(<3 x float> %x) {
+; CHECK-LABEL: addf3
+; CHECK: # BB#0:
+; CHECK-NEXT: addps .LCPI0_0(%rip), %xmm0
+; CHECK-NEXT: retq
+entry:
+ %add = fadd <3 x float> %x, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+ ret <3 x float> %add
+}
+
+; PR11580
+define <4 x float> @cvtf3_f4(<3 x float> %x) {
+; CHECK-LABEL: cvtf3_f4
+; CHECK: # BB#0:
+; CHECK-NEXT: retq
+entry:
+ %extractVec = shufflevector <3 x float> %x, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+ ret <4 x float> %extractVec
+}
+
+; PR11580
+define <3 x float> @cvtf4_f3(<4 x float> %x) {
+; CHECK-LABEL: cvtf4_f3
+; CHECK: # BB#0:
+; CHECK-NEXT: retq
+entry:
+ %extractVec = shufflevector <4 x float> %x, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+ ret <3 x float> %extractVec
+}
diff --git a/test/CodeGen/X86/vec_reassociate.ll b/test/CodeGen/X86/vec_reassociate.ll
new file mode 100644
index 000000000000..bf2053f78424
--- /dev/null
+++ b/test/CodeGen/X86/vec_reassociate.ll
@@ -0,0 +1,119 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s
+
+define <4 x i32> @add_4i32(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: @add_4i32
+ ;CHECK: # BB#0:
+ ;CHECK-NEXT: paddd %xmm1, %xmm0
+ ;CHECK-NEXT: retq
+ %1 = add <4 x i32> %a0, <i32 1, i32 -2, i32 3, i32 -4>
+ %2 = add <4 x i32> %a1, <i32 -1, i32 2, i32 -3, i32 4>
+ %3 = add <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @add_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: @add_4i32_commute
+ ;CHECK: # BB#0:
+ ;CHECK-NEXT: paddd %xmm1, %xmm0
+ ;CHECK-NEXT: retq
+ %1 = add <4 x i32> <i32 1, i32 -2, i32 3, i32 -4>, %a0
+ %2 = add <4 x i32> <i32 -1, i32 2, i32 -3, i32 4>, %a1
+ %3 = add <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @mul_4i32(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: @mul_4i32
+ ;CHECK: # BB#0:
+ ;CHECK-NEXT: pmulld %xmm1, %xmm0
+ ;CHECK-NEXT: pmulld .LCPI2_0(%rip), %xmm0
+ ;CHECK-NEXT: retq
+ %1 = mul <4 x i32> %a0, <i32 1, i32 2, i32 3, i32 4>
+ %2 = mul <4 x i32> %a1, <i32 4, i32 3, i32 2, i32 1>
+ %3 = mul <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @mul_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: @mul_4i32_commute
+ ;CHECK: # BB#0:
+ ;CHECK-NEXT: pmulld %xmm1, %xmm0
+ ;CHECK-NEXT: pmulld .LCPI3_0(%rip), %xmm0
+ ;CHECK-NEXT: retq
+ %1 = mul <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %a0
+ %2 = mul <4 x i32> <i32 4, i32 3, i32 2, i32 1>, %a1
+ %3 = mul <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @and_4i32(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: @and_4i32
+ ;CHECK: # BB#0:
+ ;CHECK-NEXT: andps %xmm1, %xmm0
+ ;CHECK-NEXT: andps .LCPI4_0(%rip), %xmm0
+ ;CHECK-NEXT: retq
+ %1 = and <4 x i32> %a0, <i32 -2, i32 -2, i32 3, i32 3>
+ %2 = and <4 x i32> %a1, <i32 -1, i32 -1, i32 1, i32 1>
+ %3 = and <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @and_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: @and_4i32_commute
+ ;CHECK: # BB#0:
+ ;CHECK-NEXT: andps %xmm1, %xmm0
+ ;CHECK-NEXT: andps .LCPI5_0(%rip), %xmm0
+ ;CHECK-NEXT: retq
+ %1 = and <4 x i32> <i32 -2, i32 -2, i32 3, i32 3>, %a0
+ %2 = and <4 x i32> <i32 -1, i32 -1, i32 1, i32 1>, %a1
+ %3 = and <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @or_4i32(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: @or_4i32
+ ;CHECK: # BB#0:
+ ;CHECK-NEXT: orps %xmm1, %xmm0
+ ;CHECK-NEXT: orps .LCPI6_0(%rip), %xmm0
+ ;CHECK-NEXT: retq
+ %1 = or <4 x i32> %a0, <i32 -2, i32 -2, i32 3, i32 3>
+ %2 = or <4 x i32> %a1, <i32 -1, i32 -1, i32 1, i32 1>
+ %3 = or <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @or_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: @or_4i32_commute
+ ;CHECK: # BB#0:
+ ;CHECK-NEXT: orps %xmm1, %xmm0
+ ;CHECK-NEXT: orps .LCPI7_0(%rip), %xmm0
+ ;CHECK-NEXT: retq
+ %1 = or <4 x i32> <i32 -2, i32 -2, i32 3, i32 3>, %a0
+ %2 = or <4 x i32> <i32 -1, i32 -1, i32 1, i32 1>, %a1
+ %3 = or <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @xor_4i32(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: @xor_4i32
+ ;CHECK: # BB#0:
+ ;CHECK-NEXT: xorps %xmm1, %xmm0
+ ;CHECK-NEXT: xorps .LCPI8_0(%rip), %xmm0
+ ;CHECK-NEXT: retq
+ %1 = xor <4 x i32> %a0, <i32 -2, i32 -2, i32 3, i32 3>
+ %2 = xor <4 x i32> %a1, <i32 -1, i32 -1, i32 1, i32 1>
+ %3 = xor <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
+
+define <4 x i32> @xor_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
+ ;CHECK-LABEL: @xor_4i32_commute
+ ;CHECK: # BB#0:
+ ;CHECK-NEXT: xorps %xmm1, %xmm0
+ ;CHECK-NEXT: xorps .LCPI9_0(%rip), %xmm0
+ ;CHECK-NEXT: retq
+ %1 = xor <4 x i32> <i32 -2, i32 -2, i32 3, i32 3>, %a0
+ %2 = xor <4 x i32> <i32 -1, i32 -1, i32 1, i32 1>, %a1
+ %3 = xor <4 x i32> %1, %2
+ ret <4 x i32> %3
+}
diff --git a/test/CodeGen/X86/vec_set-7.ll b/test/CodeGen/X86/vec_set-7.ll
index d993178a9892..1701e491da66 100644
--- a/test/CodeGen/X86/vec_set-7.ll
+++ b/test/CodeGen/X86/vec_set-7.ll
@@ -2,7 +2,7 @@
define <2 x i64> @test(<2 x i64>* %p) nounwind {
%tmp = bitcast <2 x i64>* %p to double*
- %tmp.upgrd.1 = load double* %tmp
+ %tmp.upgrd.1 = load double, double* %tmp
%tmp.upgrd.2 = insertelement <2 x double> undef, double %tmp.upgrd.1, i32 0
%tmp5 = insertelement <2 x double> %tmp.upgrd.2, double 0.0, i32 1
%tmp.upgrd.3 = bitcast <2 x double> %tmp5 to <2 x i64>
diff --git a/test/CodeGen/X86/vec_set-F.ll b/test/CodeGen/X86/vec_set-F.ll
index 6dd3cb0abeb9..aa17f9bfbf5c 100644
--- a/test/CodeGen/X86/vec_set-F.ll
+++ b/test/CodeGen/X86/vec_set-F.ll
@@ -4,7 +4,7 @@
define <2 x i64> @t1(<2 x i64>* %ptr) nounwind {
%tmp45 = bitcast <2 x i64>* %ptr to <2 x i32>*
- %tmp615 = load <2 x i32>* %tmp45
+ %tmp615 = load <2 x i32>, <2 x i32>* %tmp45
%tmp7 = bitcast <2 x i32> %tmp615 to i64
%tmp8 = insertelement <2 x i64> zeroinitializer, i64 %tmp7, i32 0
ret <2 x i64> %tmp8
diff --git a/test/CodeGen/X86/vec_setcc-2.ll b/test/CodeGen/X86/vec_setcc-2.ll
index ef916dcd709e..e150882642a1 100644
--- a/test/CodeGen/X86/vec_setcc-2.ll
+++ b/test/CodeGen/X86/vec_setcc-2.ll
@@ -25,13 +25,13 @@ entry:
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %arrayidx1 = getelementptr inbounds <2 x i64>* %in, i64 %indvars.iv
- %arrayidx1.val = load <2 x i64>* %arrayidx1, align 16
+ %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %in, i64 %indvars.iv
+ %arrayidx1.val = load <2 x i64>, <2 x i64>* %arrayidx1, align 16
%0 = bitcast <2 x i64> %arrayidx1.val to <8 x i16>
%cmp.i.i = icmp ult <8 x i16> %0, <i16 26, i16 26, i16 26, i16 26, i16 26, i16 26, i16 26, i16 26>
%sext.i.i = sext <8 x i1> %cmp.i.i to <8 x i16>
%1 = bitcast <8 x i16> %sext.i.i to <2 x i64>
- %arrayidx5 = getelementptr inbounds <2 x i64>* %out, i64 %indvars.iv
+ %arrayidx5 = getelementptr inbounds <2 x i64>, <2 x i64>* %out, i64 %indvars.iv
store <2 x i64> %1, <2 x i64>* %arrayidx5, align 16
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -54,13 +54,13 @@ entry:
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
- %arrayidx1 = getelementptr inbounds <2 x i64>* %in, i64 %indvars.iv
- %arrayidx1.val = load <2 x i64>* %arrayidx1, align 16
+ %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %in, i64 %indvars.iv
+ %arrayidx1.val = load <2 x i64>, <2 x i64>* %arrayidx1, align 16
%0 = bitcast <2 x i64> %arrayidx1.val to <8 x i16>
%cmp.i.i = icmp ult <8 x i16> %0, <i16 0, i16 26, i16 26, i16 26, i16 26, i16 26, i16 26, i16 26>
%sext.i.i = sext <8 x i1> %cmp.i.i to <8 x i16>
%1 = bitcast <8 x i16> %sext.i.i to <2 x i64>
- %arrayidx5 = getelementptr inbounds <2 x i64>* %out, i64 %indvars.iv
+ %arrayidx5 = getelementptr inbounds <2 x i64>, <2 x i64>* %out, i64 %indvars.iv
store <2 x i64> %1, <2 x i64>* %arrayidx5, align 16
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/test/CodeGen/X86/vec_shift5.ll b/test/CodeGen/X86/vec_shift5.ll
index 2e98003ae1cd..499aa22de52d 100644
--- a/test/CodeGen/X86/vec_shift5.ll
+++ b/test/CodeGen/X86/vec_shift5.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=-sse4.1 < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mattr=+sse2 < %s | FileCheck %s
; Verify that we correctly fold target specific packed vector shifts by
; immediate count into a simple build_vector when the elements of the vector
diff --git a/test/CodeGen/X86/vec_shift6.ll b/test/CodeGen/X86/vec_shift6.ll
index df2d9cb04687..b71f9893a9db 100644
--- a/test/CodeGen/X86/vec_shift6.ll
+++ b/test/CodeGen/X86/vec_shift6.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 -check-prefix=AVX2ONLY
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 -check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse4.1 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 -check-prefix=AVX2ONLY
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512f | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 -check-prefix=AVX512
; Verify that we don't scalarize a packed vector shift left of 16-bit
diff --git a/test/CodeGen/X86/vec_shift7.ll b/test/CodeGen/X86/vec_shift7.ll
new file mode 100644
index 000000000000..cdf828976be4
--- /dev/null
+++ b/test/CodeGen/X86/vec_shift7.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+
+
+; Verify that we don't fail when shift by zero is encountered.
+
+define i64 @test1(<2 x i64> %a) {
+entry:
+ %c = shl <2 x i64> %a, <i64 0, i64 2>
+ %d = extractelement <2 x i64> %c, i32 0
+ ret i64 %d
+}
+; CHECK-LABEL: test1
diff --git a/test/CodeGen/X86/vec_split.ll b/test/CodeGen/X86/vec_split.ll
index bc2c6633f20d..1df4cf2b2325 100644
--- a/test/CodeGen/X86/vec_split.ll
+++ b/test/CodeGen/X86/vec_split.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4
-; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
-; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
+; RUN: llc -march=x86-64 -mattr=sse4.1 < %s | FileCheck %s -check-prefix=SSE4
+; RUN: llc -march=x86-64 -mattr=avx < %s | FileCheck %s -check-prefix=AVX1
+; RUN: llc -march=x86-64 -mattr=avx2 < %s | FileCheck %s -check-prefix=AVX2
define <16 x i16> @split16(<16 x i16> %a, <16 x i16> %b, <16 x i8> %__mask) {
; SSE4-LABEL: split16:
diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll
index 80f12a2dec2c..ab5031e267dc 100644
--- a/test/CodeGen/X86/vec_ss_load_fold.ll
+++ b/test/CodeGen/X86/vec_ss_load_fold.ll
@@ -51,7 +51,7 @@ declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32)
declare <4 x float> @f()
define <4 x float> @test3(<4 x float> %A, float *%b, i32 %C) nounwind {
- %a = load float *%b
+ %a = load float , float *%b
%B = insertelement <4 x float> undef, float %a, i32 0
%X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %A, <4 x float> %B, i32 4)
ret <4 x float> %X
@@ -60,7 +60,7 @@ define <4 x float> @test3(<4 x float> %A, float *%b, i32 %C) nounwind {
}
define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind {
- %a = load float *%b
+ %a = load float , float *%b
%B = insertelement <4 x float> undef, float %a, i32 0
%q = call <4 x float> @f()
%X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %q, <4 x float> %B, i32 4)
diff --git a/test/CodeGen/X86/vec_trunc_sext.ll b/test/CodeGen/X86/vec_trunc_sext.ll
index 3c446bba4ea8..dcfe423eb748 100644
--- a/test/CodeGen/X86/vec_trunc_sext.ll
+++ b/test/CodeGen/X86/vec_trunc_sext.ll
@@ -9,7 +9,7 @@
; but that is beyond our current codegen capabilities.
define <4 x i32> @trunc_sext(<4 x i16>* %in) {
- %load = load <4 x i16>* %in
+ %load = load <4 x i16>, <4 x i16>* %in
%trunc = trunc <4 x i16> %load to <4 x i8>
%sext = sext <4 x i8> %trunc to <4 x i32>
ret <4 x i32> %sext
diff --git a/test/CodeGen/X86/vec_zero.ll b/test/CodeGen/X86/vec_zero.ll
index c3ea0ad2023f..1d900a0919f2 100644
--- a/test/CodeGen/X86/vec_zero.ll
+++ b/test/CodeGen/X86/vec_zero.ll
@@ -3,7 +3,7 @@
; CHECK: foo
; CHECK: xorps
define void @foo(<4 x float>* %P) {
- %T = load <4 x float>* %P ; <<4 x float>> [#uses=1]
+ %T = load <4 x float>, <4 x float>* %P ; <<4 x float>> [#uses=1]
%S = fadd <4 x float> zeroinitializer, %T ; <<4 x float>> [#uses=1]
store <4 x float> %S, <4 x float>* %P
ret void
@@ -12,7 +12,7 @@ define void @foo(<4 x float>* %P) {
; CHECK: bar
; CHECK: pxor
define void @bar(<4 x i32>* %P) {
- %T = load <4 x i32>* %P ; <<4 x i32>> [#uses=1]
+ %T = load <4 x i32>, <4 x i32>* %P ; <<4 x i32>> [#uses=1]
%S = sub <4 x i32> zeroinitializer, %T ; <<4 x i32>> [#uses=1]
store <4 x i32> %S, <4 x i32>* %P
ret void
diff --git a/test/CodeGen/X86/vec_zero_cse.ll b/test/CodeGen/X86/vec_zero_cse.ll
index bda3feff2b00..8ed8083a284f 100644
--- a/test/CodeGen/X86/vec_zero_cse.ll
+++ b/test/CodeGen/X86/vec_zero_cse.ll
@@ -1,38 +1,39 @@
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | FileCheck %s
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | FileCheck -check-prefix CHECK2 %s
+; RUN: llc < %s -relocation-model=static -mtriple=i686-unknown -mattr=+mmx,+sse3 | FileCheck %s
; 64-bit stores here do not use MMX.
-; CHECK: xorps
-; CHECK-NOT: xorps
-
-; CHECK2: pcmpeqd
-; CHECK2-NOT: pcmpeqd
-
@M1 = external global <1 x i64>
@M2 = external global <2 x i32>
@S1 = external global <2 x i64>
@S2 = external global <4 x i32>
-define void @test() {
+define void @test1() {
+;CHECK-LABEL: @test1
+;CHECK: xorps
store <1 x i64> zeroinitializer, <1 x i64>* @M1
store <2 x i32> zeroinitializer, <2 x i32>* @M2
ret void
}
define void @test2() {
+;CHECK-LABEL: @test2
+;CHECK: pcmpeqd
store <1 x i64> < i64 -1 >, <1 x i64>* @M1
store <2 x i32> < i32 -1, i32 -1 >, <2 x i32>* @M2
ret void
}
define void @test3() {
+;CHECK-LABEL: @test3
+;CHECK: xorps
store <2 x i64> zeroinitializer, <2 x i64>* @S1
store <4 x i32> zeroinitializer, <4 x i32>* @S2
ret void
}
define void @test4() {
+;CHECK-LABEL: @test4
+;CHECK: pcmpeqd
store <2 x i64> < i64 -1, i64 -1>, <2 x i64>* @S1
store <4 x i32> < i32 -1, i32 -1, i32 -1, i32 -1 >, <4 x i32>* @S2
ret void
diff --git a/test/CodeGen/X86/vector-blend.ll b/test/CodeGen/X86/vector-blend.ll
index f23b82883858..e15daaa54a33 100644
--- a/test/CodeGen/X86/vector-blend.ll
+++ b/test/CodeGen/X86/vector-blend.ll
@@ -9,16 +9,14 @@
define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
; SSE2-LABEL: vsel_float:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: andps {{.*}}(%rip), %xmm1
-; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
-; SSE2-NEXT: orps %xmm1, %xmm0
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_float:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: andps {{.*}}(%rip), %xmm1
-; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
-; SSSE3-NEXT: orps %xmm1, %xmm0
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_float:
@@ -38,13 +36,13 @@ entry:
define <4 x float> @vsel_float2(<4 x float> %v1, <4 x float> %v2) {
; SSE2-LABEL: vsel_float2:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movss %xmm0, %xmm1
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_float2:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movss %xmm0, %xmm1
+; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -55,7 +53,7 @@ define <4 x float> @vsel_float2(<4 x float> %v1, <4 x float> %v2) {
;
; AVX-LABEL: vsel_float2:
; AVX: # BB#0: # %entry
-; AVX-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
entry:
%vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
@@ -65,16 +63,14 @@ entry:
define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
; SSE2-LABEL: vsel_4xi8:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: andps {{.*}}(%rip), %xmm1
-; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
-; SSE2-NEXT: orps %xmm1, %xmm0
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_4xi8:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: andps {{.*}}(%rip), %xmm1
-; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
-; SSSE3-NEXT: orps %xmm1, %xmm0
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_4xi8:
@@ -99,16 +95,16 @@ entry:
define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
; SSE2-LABEL: vsel_4xi16:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: andps {{.*}}(%rip), %xmm1
-; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
-; SSE2-NEXT: orps %xmm1, %xmm0
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
+; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_4xi16:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: andps {{.*}}(%rip), %xmm1
-; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
-; SSSE3-NEXT: orps %xmm1, %xmm0
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
+; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_4xi16:
@@ -133,16 +129,16 @@ entry:
define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
; SSE2-LABEL: vsel_i32:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: andps {{.*}}(%rip), %xmm1
-; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
-; SSE2-NEXT: orps %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_i32:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: andps {{.*}}(%rip), %xmm1
-; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
-; SSSE3-NEXT: orps %xmm1, %xmm0
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_i32:
@@ -167,14 +163,14 @@ entry:
define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
; SSE2-LABEL: vsel_double:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movsd %xmm0, %xmm1
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_double:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movsd %xmm0, %xmm1
-; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_double:
@@ -194,14 +190,14 @@ entry:
define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
; SSE2-LABEL: vsel_i64:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movsd %xmm0, %xmm1
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_i64:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movsd %xmm0, %xmm1
-; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_i64:
@@ -226,16 +222,20 @@ entry:
define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
; SSE2-LABEL: vsel_8xi16:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: andps {{.*}}(%rip), %xmm1
-; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
-; SSE2-NEXT: orps %xmm1, %xmm0
+; SSE2-NEXT: movaps {{.*#+}} xmm2 = [0,65535,65535,65535,0,65535,65535,65535]
+; SSE2-NEXT: andps %xmm2, %xmm1
+; SSE2-NEXT: andnps %xmm0, %xmm2
+; SSE2-NEXT: orps %xmm1, %xmm2
+; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_8xi16:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: andps {{.*}}(%rip), %xmm1
-; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
-; SSSE3-NEXT: orps %xmm1, %xmm0
+; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [0,65535,65535,65535,0,65535,65535,65535]
+; SSSE3-NEXT: andps %xmm2, %xmm1
+; SSSE3-NEXT: andnps %xmm0, %xmm2
+; SSSE3-NEXT: orps %xmm1, %xmm2
+; SSSE3-NEXT: movaps %xmm2, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_8xi16:
@@ -255,29 +255,30 @@ entry:
define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
; SSE2-LABEL: vsel_i8:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: andps {{.*}}(%rip), %xmm1
-; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
-; SSE2-NEXT: orps %xmm1, %xmm0
+; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,255,255,255,255,255,255,255]
+; SSE2-NEXT: andps %xmm2, %xmm0
+; SSE2-NEXT: andnps %xmm1, %xmm2
+; SSE2-NEXT: orps %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_i8:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: andps {{.*}}(%rip), %xmm1
-; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
-; SSSE3-NEXT: orps %xmm1, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3],zero,xmm1[5,6,7],zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[8,9,10,11,12,13,14,15]
+; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_i8:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,0,0,255,0,0,0,255,255,255,255,255,255,255,255]
; SSE41-NEXT: pblendvb %xmm2, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: vsel_i8:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,255,255,255,255,255,255,255]
; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
entry:
@@ -291,16 +292,16 @@ entry:
define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
; SSE2-LABEL: vsel_float8:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movss %xmm0, %xmm2
-; SSE2-NEXT: movss %xmm1, %xmm3
+; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
+; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: movaps %xmm3, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_float8:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movss %xmm0, %xmm2
-; SSSE3-NEXT: movss %xmm1, %xmm3
+; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
+; SSSE3-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
; SSSE3-NEXT: movaps %xmm2, %xmm0
; SSSE3-NEXT: movaps %xmm3, %xmm1
; SSSE3-NEXT: retq
@@ -323,16 +324,16 @@ entry:
define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
; SSE2-LABEL: vsel_i328:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movss %xmm0, %xmm2
-; SSE2-NEXT: movss %xmm1, %xmm3
+; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
+; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: movaps %xmm3, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_i328:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movss %xmm0, %xmm2
-; SSSE3-NEXT: movss %xmm1, %xmm3
+; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
+; SSSE3-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
; SSSE3-NEXT: movaps %xmm2, %xmm0
; SSSE3-NEXT: movaps %xmm3, %xmm1
; SSSE3-NEXT: retq
@@ -360,21 +361,21 @@ entry:
define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
; SSE2-LABEL: vsel_double8:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movsd %xmm0, %xmm4
-; SSE2-NEXT: movsd %xmm2, %xmm6
-; SSE2-NEXT: movaps %xmm4, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
+; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
+; SSE2-NEXT: movapd %xmm4, %xmm0
; SSE2-NEXT: movaps %xmm5, %xmm1
-; SSE2-NEXT: movaps %xmm6, %xmm2
+; SSE2-NEXT: movapd %xmm6, %xmm2
; SSE2-NEXT: movaps %xmm7, %xmm3
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_double8:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movsd %xmm0, %xmm4
-; SSSE3-NEXT: movsd %xmm2, %xmm6
-; SSSE3-NEXT: movaps %xmm4, %xmm0
+; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
+; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
+; SSSE3-NEXT: movapd %xmm4, %xmm0
; SSSE3-NEXT: movaps %xmm5, %xmm1
-; SSSE3-NEXT: movaps %xmm6, %xmm2
+; SSSE3-NEXT: movapd %xmm6, %xmm2
; SSSE3-NEXT: movaps %xmm7, %xmm3
; SSSE3-NEXT: retq
;
@@ -399,21 +400,21 @@ entry:
define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
; SSE2-LABEL: vsel_i648:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movsd %xmm0, %xmm4
-; SSE2-NEXT: movsd %xmm2, %xmm6
-; SSE2-NEXT: movaps %xmm4, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
+; SSE2-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
+; SSE2-NEXT: movapd %xmm4, %xmm0
; SSE2-NEXT: movaps %xmm5, %xmm1
-; SSE2-NEXT: movaps %xmm6, %xmm2
+; SSE2-NEXT: movapd %xmm6, %xmm2
; SSE2-NEXT: movaps %xmm7, %xmm3
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_i648:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movsd %xmm0, %xmm4
-; SSSE3-NEXT: movsd %xmm2, %xmm6
-; SSSE3-NEXT: movaps %xmm4, %xmm0
+; SSSE3-NEXT: movsd {{.*#+}} xmm4 = xmm0[0],xmm4[1]
+; SSSE3-NEXT: movsd {{.*#+}} xmm6 = xmm2[0],xmm6[1]
+; SSSE3-NEXT: movapd %xmm4, %xmm0
; SSSE3-NEXT: movaps %xmm5, %xmm1
-; SSSE3-NEXT: movaps %xmm6, %xmm2
+; SSSE3-NEXT: movapd %xmm6, %xmm2
; SSSE3-NEXT: movaps %xmm7, %xmm3
; SSSE3-NEXT: retq
;
@@ -444,18 +445,18 @@ entry:
define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
; SSE2-LABEL: vsel_double4:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movsd %xmm0, %xmm2
-; SSE2-NEXT: movsd %xmm1, %xmm3
-; SSE2-NEXT: movaps %xmm2, %xmm0
-; SSE2-NEXT: movaps %xmm3, %xmm1
+; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
+; SSE2-NEXT: movapd %xmm2, %xmm0
+; SSE2-NEXT: movapd %xmm3, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: vsel_double4:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movsd %xmm0, %xmm2
-; SSSE3-NEXT: movsd %xmm1, %xmm3
-; SSSE3-NEXT: movaps %xmm2, %xmm0
-; SSSE3-NEXT: movaps %xmm3, %xmm1
+; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
+; SSSE3-NEXT: movapd %xmm2, %xmm0
+; SSSE3-NEXT: movapd %xmm3, %xmm1
; SSSE3-NEXT: retq
;
; SSE41-LABEL: vsel_double4:
@@ -556,16 +557,16 @@ entry:
define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
; SSE2-LABEL: constant_blendvpd_avx:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movsd %xmm1, %xmm3
+; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
; SSE2-NEXT: movaps %xmm2, %xmm0
-; SSE2-NEXT: movaps %xmm3, %xmm1
+; SSE2-NEXT: movapd %xmm3, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: constant_blendvpd_avx:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movsd %xmm1, %xmm3
+; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
; SSSE3-NEXT: movaps %xmm2, %xmm0
-; SSSE3-NEXT: movaps %xmm3, %xmm1
+; SSSE3-NEXT: movapd %xmm3, %xmm1
; SSSE3-NEXT: retq
;
; SSE41-LABEL: constant_blendvpd_avx:
@@ -586,26 +587,22 @@ entry:
define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) {
; SSE2-LABEL: constant_blendvps_avx:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movaps {{.*#+}} xmm4 = [4294967295,4294967295,4294967295,0]
-; SSE2-NEXT: andps %xmm4, %xmm2
-; SSE2-NEXT: movaps {{.*#+}} xmm5 = [0,0,0,4294967295]
-; SSE2-NEXT: andps %xmm5, %xmm0
-; SSE2-NEXT: orps %xmm2, %xmm0
-; SSE2-NEXT: andps %xmm4, %xmm3
-; SSE2-NEXT: andps %xmm5, %xmm1
-; SSE2-NEXT: orps %xmm3, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm2[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm3[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,0]
+; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: movaps %xmm3, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: constant_blendvps_avx:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movaps {{.*#+}} xmm4 = [4294967295,4294967295,4294967295,0]
-; SSSE3-NEXT: andps %xmm4, %xmm2
-; SSSE3-NEXT: movaps {{.*#+}} xmm5 = [0,0,0,4294967295]
-; SSSE3-NEXT: andps %xmm5, %xmm0
-; SSSE3-NEXT: orps %xmm2, %xmm0
-; SSSE3-NEXT: andps %xmm4, %xmm3
-; SSSE3-NEXT: andps %xmm5, %xmm1
-; SSSE3-NEXT: orps %xmm3, %xmm1
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm2[2,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm3[2,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,0]
+; SSSE3-NEXT: movaps %xmm2, %xmm0
+; SSSE3-NEXT: movaps %xmm3, %xmm1
; SSSE3-NEXT: retq
;
; SSE41-LABEL: constant_blendvps_avx:
@@ -626,32 +623,32 @@ entry:
define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
; SSE2-LABEL: constant_pblendvb_avx2:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movaps {{.*#+}} xmm4 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
-; SSE2-NEXT: andps %xmm4, %xmm2
-; SSE2-NEXT: movaps {{.*#+}} xmm5 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
-; SSE2-NEXT: andps %xmm5, %xmm0
-; SSE2-NEXT: orps %xmm2, %xmm0
-; SSE2-NEXT: andps %xmm4, %xmm3
-; SSE2-NEXT: andps %xmm5, %xmm1
-; SSE2-NEXT: orps %xmm3, %xmm1
+; SSE2-NEXT: movaps {{.*#+}} xmm4 = [0,0,255,0,255,255,255,0,255,255,255,255,255,255,255,255]
+; SSE2-NEXT: movaps %xmm4, %xmm5
+; SSE2-NEXT: andnps %xmm2, %xmm5
+; SSE2-NEXT: andps %xmm4, %xmm0
+; SSE2-NEXT: orps %xmm5, %xmm0
+; SSE2-NEXT: andps %xmm4, %xmm1
+; SSE2-NEXT: andnps %xmm3, %xmm4
+; SSE2-NEXT: orps %xmm4, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: constant_pblendvb_avx2:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movaps {{.*#+}} xmm4 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
-; SSSE3-NEXT: andps %xmm4, %xmm2
-; SSSE3-NEXT: movaps {{.*#+}} xmm5 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
-; SSSE3-NEXT: andps %xmm5, %xmm0
-; SSSE3-NEXT: orps %xmm2, %xmm0
-; SSSE3-NEXT: andps %xmm4, %xmm3
-; SSSE3-NEXT: andps %xmm5, %xmm1
-; SSSE3-NEXT: orps %xmm3, %xmm1
+; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,1,128,3,128,128,128,7,128,128,128,128,128,128,128,128]
+; SSSE3-NEXT: pshufb %xmm4, %xmm2
+; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [128,128,2,128,4,5,6,128,8,9,10,11,12,13,14,15]
+; SSSE3-NEXT: pshufb %xmm5, %xmm0
+; SSSE3-NEXT: por %xmm2, %xmm0
+; SSSE3-NEXT: pshufb %xmm4, %xmm3
+; SSSE3-NEXT: pshufb %xmm5, %xmm1
+; SSSE3-NEXT: por %xmm3, %xmm1
; SSSE3-NEXT: retq
;
; SSE41-LABEL: constant_pblendvb_avx2:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: movdqa %xmm0, %xmm4
-; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,0,255,0,255,255,255,0,255,255,255,255,255,255,255,255]
; SSE41-NEXT: pblendvb %xmm4, %xmm2
; SSE41-NEXT: pblendvb %xmm1, %xmm3
; SSE41-NEXT: movdqa %xmm2, %xmm0
@@ -660,14 +657,15 @@ define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
;
; AVX1-LABEL: constant_pblendvb_avx2:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
-; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,255,0,255,255,255,0,255,255,255,255,255,255,255,255]
+; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: constant_pblendvb_avx2:
; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,0,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
entry:
@@ -709,7 +707,7 @@ entry:
define <8 x float> @blend_shufflevector_8xfloat(<8 x float> %a, <8 x float> %b) {
; SSE2-LABEL: blend_shufflevector_8xfloat:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movss %xmm0, %xmm2
+; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm3[3,0]
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[0,2]
; SSE2-NEXT: movaps %xmm2, %xmm0
@@ -718,7 +716,7 @@ define <8 x float> @blend_shufflevector_8xfloat(<8 x float> %a, <8 x float> %b)
;
; SSSE3-LABEL: blend_shufflevector_8xfloat:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movss %xmm0, %xmm2
+; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm3[3,0]
; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[0,2]
; SSSE3-NEXT: movaps %xmm2, %xmm0
@@ -743,14 +741,14 @@ entry:
define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double> %b) {
; SSE2-LABEL: blend_shufflevector_4xdouble:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movsd %xmm0, %xmm2
-; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; SSE2-NEXT: movapd %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: blend_shufflevector_4xdouble:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movsd %xmm0, %xmm2
-; SSSE3-NEXT: movaps %xmm2, %xmm0
+; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; SSSE3-NEXT: movapd %xmm2, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: blend_shufflevector_4xdouble:
@@ -770,13 +768,13 @@ entry:
define <4 x i64> @blend_shufflevector_4xi64(<4 x i64> %a, <4 x i64> %b) {
; SSE2-LABEL: blend_shufflevector_4xi64:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movsd %xmm2, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
; SSE2-NEXT: movaps %xmm3, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: blend_shufflevector_4xi64:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movsd %xmm2, %xmm0
+; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
; SSSE3-NEXT: movaps %xmm3, %xmm1
; SSSE3-NEXT: retq
;
diff --git a/test/CodeGen/X86/vector-ctpop.ll b/test/CodeGen/X86/vector-ctpop.ll
index 7091927a9006..59d67928c6fa 100644
--- a/test/CodeGen/X86/vector-ctpop.ll
+++ b/test/CodeGen/X86/vector-ctpop.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck -check-prefix=AVX2 %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=-popcnt | FileCheck -check-prefix=AVX1-NOPOPCNT %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=-popcnt | FileCheck -check-prefix=AVX2-NOPOPCNT %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck -check-prefix=AVX2 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx -mattr=-popcnt | FileCheck -check-prefix=AVX1-NOPOPCNT %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 -mattr=-popcnt | FileCheck -check-prefix=AVX2-NOPOPCNT %s
; Vector version of:
; v = v - ((v >> 1) & 0x55555555)
diff --git a/test/CodeGen/X86/vector-gep.ll b/test/CodeGen/X86/vector-gep.ll
index 3f7ee3aa3e42..ce98e6759b65 100644
--- a/test/CodeGen/X86/vector-gep.ll
+++ b/test/CodeGen/X86/vector-gep.ll
@@ -12,8 +12,8 @@ entry:
%vecinit2.i = insertelement <4 x i32*> %vecinit.i, i32* %ptr, i32 1
%vecinit4.i = insertelement <4 x i32*> %vecinit2.i, i32* %ptr, i32 2
%vecinit6.i = insertelement <4 x i32*> %vecinit4.i, i32* %ptr, i32 3
- %A2 = getelementptr <4 x i32*> %vecinit6.i, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
- %A3 = getelementptr <4 x i32*> %A2, <4 x i32> <i32 10, i32 14, i32 19, i32 233>
+ %A2 = getelementptr i32, <4 x i32*> %vecinit6.i, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+ %A3 = getelementptr i32, <4 x i32*> %A2, <4 x i32> <i32 10, i32 14, i32 19, i32 233>
ret <4 x i32*> %A3
}
@@ -24,9 +24,9 @@ entry:
;CHECK: vpaddd
;CHECK-NEXT: vpextrd
;CHECK-NEXT: movl
- %A2 = getelementptr <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+ %A2 = getelementptr i32, <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
%k = extractelement <4 x i32*> %A2, i32 3
- %v = load i32* %k
+ %v = load i32, i32* %k
ret i32 %v
;CHECK: ret
}
@@ -37,9 +37,9 @@ entry:
;CHECK-LABEL: AGEP2
;CHECK: vpslld $2
;CHECK-NEXT: vpadd
- %A2 = getelementptr <4 x i32*> %param, <4 x i32> %off
+ %A2 = getelementptr i32, <4 x i32*> %param, <4 x i32> %off
%k = extractelement <4 x i32*> %A2, i32 3
- %v = load i32* %k
+ %v = load i32, i32* %k
ret i32 %v
;CHECK: ret
}
@@ -50,7 +50,7 @@ entry:
;CHECK-LABEL: AGEP3
;CHECK: vpslld $2
;CHECK-NEXT: vpadd
- %A2 = getelementptr <4 x i32*> %param, <4 x i32> %off
+ %A2 = getelementptr i32, <4 x i32*> %param, <4 x i32> %off
%v = alloca i32
%k = insertelement <4 x i32*> %A2, i32* %v, i32 3
ret <4 x i32*> %k
@@ -65,7 +65,7 @@ entry:
;CHECK: vpadd
; add the base to the offset
;CHECK-NEXT: vpadd
- %A = getelementptr <4 x i16*> %param, <4 x i32> %off
+ %A = getelementptr i16, <4 x i16*> %param, <4 x i32> %off
ret <4 x i16*> %A
;CHECK: ret
}
@@ -75,7 +75,7 @@ define <4 x i8*> @AGEP5(<4 x i8*> %param, <4 x i8> %off) nounwind {
entry:
;CHECK-LABEL: AGEP5
;CHECK: vpaddd
- %A = getelementptr <4 x i8*> %param, <4 x i8> %off
+ %A = getelementptr i8, <4 x i8*> %param, <4 x i8> %off
ret <4 x i8*> %A
;CHECK: ret
}
@@ -87,7 +87,7 @@ define <4 x i8*> @AGEP6(<4 x i8*> %param, <4 x i32> %off) nounwind {
entry:
;CHECK-LABEL: AGEP6
;CHECK-NOT: pslld
- %A = getelementptr <4 x i8*> %param, <4 x i32> %off
+ %A = getelementptr i8, <4 x i8*> %param, <4 x i32> %off
ret <4 x i8*> %A
;CHECK: ret
}
diff --git a/test/CodeGen/X86/vector-idiv.ll b/test/CodeGen/X86/vector-idiv.ll
index 4b269dc923c4..2e482a0f1430 100644
--- a/test/CodeGen/X86/vector-idiv.ll
+++ b/test/CodeGen/X86/vector-idiv.ll
@@ -4,20 +4,19 @@
target triple = "x86_64-unknown-unknown"
-define <4 x i32> @test1(<4 x i32> %a) {
+define <4 x i32> @test1(<4 x i32> %a) #0 {
; SSE41-LABEL: test1:
; SSE41: # BB#0:
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: pmuludq %xmm1, %xmm2
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; SSE41-NEXT: pmuludq %xmm1, %xmm3
-; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[1,3]
-; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
-; SSE41-NEXT: psubd %xmm2, %xmm0
+; SSE41-NEXT: pmuludq %xmm2, %xmm3
+; SSE41-NEXT: pmuludq %xmm0, %xmm1
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
+; SSE41-NEXT: psubd %xmm1, %xmm0
; SSE41-NEXT: psrld $1, %xmm0
-; SSE41-NEXT: paddd %xmm2, %xmm0
+; SSE41-NEXT: paddd %xmm1, %xmm0
; SSE41-NEXT: psrld $2, %xmm0
; SSE41-NEXT: retq
;
@@ -26,11 +25,12 @@ define <4 x i32> @test1(<4 x i32> %a) {
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
; SSE-NEXT: movdqa %xmm0, %xmm2
; SSE-NEXT: pmuludq %xmm1, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
; SSE-NEXT: pmuludq %xmm1, %xmm3
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[1,3]
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE-NEXT: psubd %xmm2, %xmm0
; SSE-NEXT: psrld $1, %xmm0
; SSE-NEXT: paddd %xmm2, %xmm0
@@ -40,12 +40,12 @@ define <4 x i32> @test1(<4 x i32> %a) {
; AVX-LABEL: test1:
; AVX: # BB#0:
; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
-; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm2
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
; AVX-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; AVX-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
-; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm2[1,3],xmm1[1,3]
-; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
+; AVX-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
+; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
+; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; AVX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsrld $1, %xmm0, %xmm0
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
@@ -55,26 +55,26 @@ define <4 x i32> @test1(<4 x i32> %a) {
ret <4 x i32> %div
}
-define <8 x i32> @test2(<8 x i32> %a) {
+define <8 x i32> @test2(<8 x i32> %a) #0 {
; SSE41-LABEL: test2:
; SSE41: # BB#0:
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
-; SSE41-NEXT: movdqa %xmm0, %xmm3
-; SSE41-NEXT: pmuludq %xmm2, %xmm3
-; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
-; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
-; SSE41-NEXT: pmuludq %xmm4, %xmm5
-; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,3],xmm5[1,3]
-; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2,1,3]
-; SSE41-NEXT: psubd %xmm3, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; SSE41-NEXT: pmuludq %xmm3, %xmm4
+; SSE41-NEXT: movdqa %xmm0, %xmm5
+; SSE41-NEXT: pmuludq %xmm2, %xmm5
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1],xmm4[2,3],xmm5[4,5],xmm4[6,7]
+; SSE41-NEXT: psubd %xmm5, %xmm0
; SSE41-NEXT: psrld $1, %xmm0
-; SSE41-NEXT: paddd %xmm3, %xmm0
+; SSE41-NEXT: paddd %xmm5, %xmm0
; SSE41-NEXT: psrld $2, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
+; SSE41-NEXT: pmuludq %xmm3, %xmm4
; SSE41-NEXT: pmuludq %xmm1, %xmm2
-; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
-; SSE41-NEXT: pmuludq %xmm4, %xmm3
-; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[1,3]
-; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7]
; SSE41-NEXT: psubd %xmm2, %xmm1
; SSE41-NEXT: psrld $1, %xmm1
; SSE41-NEXT: paddd %xmm2, %xmm1
@@ -86,20 +86,22 @@ define <8 x i32> @test2(<8 x i32> %a) {
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
; SSE-NEXT: movdqa %xmm0, %xmm3
; SSE-NEXT: pmuludq %xmm2, %xmm3
+; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
; SSE-NEXT: pmuludq %xmm4, %xmm5
-; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,3],xmm5[1,3]
-; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2,1,3]
+; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
; SSE-NEXT: psubd %xmm3, %xmm0
; SSE-NEXT: psrld $1, %xmm0
; SSE-NEXT: paddd %xmm3, %xmm0
; SSE-NEXT: psrld $2, %xmm0
; SSE-NEXT: pmuludq %xmm1, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
; SSE-NEXT: pmuludq %xmm4, %xmm3
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[1,3]
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE-NEXT: psubd %xmm2, %xmm1
; SSE-NEXT: psrld $1, %xmm1
; SSE-NEXT: paddd %xmm2, %xmm1
@@ -124,7 +126,7 @@ define <8 x i32> @test2(<8 x i32> %a) {
ret <8 x i32> %div
}
-define <8 x i16> @test3(<8 x i16> %a) {
+define <8 x i16> @test3(<8 x i16> %a) #0 {
; SSE41-LABEL: test3:
; SSE41: # BB#0:
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [9363,9363,9363,9363,9363,9363,9363,9363]
@@ -157,7 +159,7 @@ define <8 x i16> @test3(<8 x i16> %a) {
ret <8 x i16> %div
}
-define <16 x i16> @test4(<16 x i16> %a) {
+define <16 x i16> @test4(<16 x i16> %a) #0 {
; SSE41-LABEL: test4:
; SSE41: # BB#0:
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [9363,9363,9363,9363,9363,9363,9363,9363]
@@ -202,7 +204,7 @@ define <16 x i16> @test4(<16 x i16> %a) {
ret <16 x i16> %div
}
-define <8 x i16> @test5(<8 x i16> %a) {
+define <8 x i16> @test5(<8 x i16> %a) #0 {
; SSE41-LABEL: test5:
; SSE41: # BB#0:
; SSE41-NEXT: pmulhw {{.*}}(%rip), %xmm0
@@ -232,7 +234,7 @@ define <8 x i16> @test5(<8 x i16> %a) {
ret <8 x i16> %div
}
-define <16 x i16> @test6(<16 x i16> %a) {
+define <16 x i16> @test6(<16 x i16> %a) #0 {
; SSE41-LABEL: test6:
; SSE41: # BB#0:
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18725,18725,18725,18725,18725,18725,18725,18725]
@@ -274,7 +276,7 @@ define <16 x i16> @test6(<16 x i16> %a) {
ret <16 x i16> %div
}
-define <16 x i8> @test7(<16 x i8> %a) {
+define <16 x i8> @test7(<16 x i8> %a) #0 {
; SSE41-LABEL: test7:
; SSE41: # BB#0:
; SSE41-NEXT: pextrb $1, %xmm0, %eax
@@ -458,6 +460,9 @@ define <16 x i8> @test7(<16 x i8> %a) {
;
; SSE-LABEL: test7:
; SSE: # BB#0:
+; SSE-NEXT: pushq %rbp
+; SSE-NEXT: pushq %r14
+; SSE-NEXT: pushq %rbx
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: imull $-109, %eax, %ecx
@@ -469,156 +474,156 @@ define <16 x i8> @test7(<16 x i8> %a) {
; SSE-NEXT: addb %al, %cl
; SSE-NEXT: movzbl %cl, %eax
; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %r14d
+; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %edx
+; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %r9d
; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: imull $-109, %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movb %cl, %al
-; SSE-NEXT: shrb $7, %al
-; SSE-NEXT: sarb $2, %cl
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movzbl %cl, %eax
-; SSE-NEXT: movd %eax, %xmm1
+; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %r11d
+; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %ecx
+; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %r8d
+; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %esi
+; SSE-NEXT: imull $-109, %esi, %edi
+; SSE-NEXT: shrl $8, %edi
+; SSE-NEXT: addb %sil, %dil
+; SSE-NEXT: movb %dil, %bl
+; SSE-NEXT: shrb $7, %bl
+; SSE-NEXT: sarb $2, %dil
+; SSE-NEXT: addb %bl, %dil
+; SSE-NEXT: movzbl %dil, %esi
+; SSE-NEXT: movd %esi, %xmm1
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: imull $-109, %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movb %cl, %al
+; SSE-NEXT: imull $-109, %eax, %esi
+; SSE-NEXT: shrl $8, %esi
+; SSE-NEXT: addb %al, %sil
+; SSE-NEXT: movb %sil, %al
; SSE-NEXT: shrb $7, %al
-; SSE-NEXT: sarb $2, %cl
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movzbl %cl, %eax
+; SSE-NEXT: sarb $2, %sil
+; SSE-NEXT: addb %al, %sil
+; SSE-NEXT: movzbl %sil, %eax
; SSE-NEXT: movd %eax, %xmm2
-; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: imull $-109, %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movb %cl, %al
+; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %ebp
+; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %esi
+; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %r10d
+; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %edi
+; SSE-NEXT: imull $-109, %edi, %ebx
+; SSE-NEXT: shrl $8, %ebx
+; SSE-NEXT: addb %dil, %bl
+; SSE-NEXT: movb %bl, %al
; SSE-NEXT: shrb $7, %al
-; SSE-NEXT: sarb $2, %cl
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movzbl %cl, %eax
+; SSE-NEXT: sarb $2, %bl
+; SSE-NEXT: addb %al, %bl
+; SSE-NEXT: movzbl %bl, %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: imull $-109, %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movb %cl, %al
-; SSE-NEXT: shrb $7, %al
-; SSE-NEXT: sarb $2, %cl
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movzbl %cl, %eax
+; SSE-NEXT: imull $-109, %edx, %eax
+; SSE-NEXT: shrl $8, %eax
+; SSE-NEXT: addb %dl, %al
+; SSE-NEXT: movb %al, %dl
+; SSE-NEXT: shrb $7, %dl
+; SSE-NEXT: sarb $2, %al
+; SSE-NEXT: addb %dl, %al
+; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: movd %eax, %xmm1
-; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: imull $-109, %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movb %cl, %al
-; SSE-NEXT: shrb $7, %al
-; SSE-NEXT: sarb $2, %cl
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movzbl %cl, %eax
+; SSE-NEXT: imull $-109, %esi, %eax
+; SSE-NEXT: shrl $8, %eax
+; SSE-NEXT: addb %sil, %al
+; SSE-NEXT: movb %al, %dl
+; SSE-NEXT: shrb $7, %dl
+; SSE-NEXT: sarb $2, %al
+; SSE-NEXT: addb %dl, %al
+; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: movd %eax, %xmm2
; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: imull $-109, %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movb %cl, %al
-; SSE-NEXT: shrb $7, %al
-; SSE-NEXT: sarb $2, %cl
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movzbl %cl, %eax
+; SSE-NEXT: imull $-109, %ecx, %eax
+; SSE-NEXT: shrl $8, %eax
+; SSE-NEXT: addb %cl, %al
+; SSE-NEXT: movb %al, %cl
+; SSE-NEXT: shrb $7, %cl
+; SSE-NEXT: sarb $2, %al
+; SSE-NEXT: addb %cl, %al
+; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: movd %eax, %xmm3
+; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %ecx
; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: imull $-109, %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movb %cl, %al
+; SSE-NEXT: imull $-109, %eax, %edx
+; SSE-NEXT: shrl $8, %edx
+; SSE-NEXT: addb %al, %dl
+; SSE-NEXT: movb %dl, %al
; SSE-NEXT: shrb $7, %al
-; SSE-NEXT: sarb $2, %cl
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movzbl %cl, %eax
+; SSE-NEXT: sarb $2, %dl
+; SSE-NEXT: addb %al, %dl
+; SSE-NEXT: movzbl %dl, %eax
; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: imull $-109, %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movb %cl, %al
-; SSE-NEXT: shrb $7, %al
-; SSE-NEXT: sarb $2, %cl
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movzbl %cl, %eax
+; SSE-NEXT: imull $-109, %r14d, %eax
+; SSE-NEXT: shrl $8, %eax
+; SSE-NEXT: addb %r14b, %al
+; SSE-NEXT: movb %al, %dl
+; SSE-NEXT: shrb $7, %dl
+; SSE-NEXT: sarb $2, %al
+; SSE-NEXT: addb %dl, %al
+; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: movd %eax, %xmm2
-; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: imull $-109, %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movb %cl, %al
-; SSE-NEXT: shrb $7, %al
-; SSE-NEXT: sarb $2, %cl
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movzbl %cl, %eax
+; SSE-NEXT: imull $-109, %ebp, %eax
+; SSE-NEXT: shrl $8, %eax
+; SSE-NEXT: addb %bpl, %al
+; SSE-NEXT: movb %al, %dl
+; SSE-NEXT: shrb $7, %dl
+; SSE-NEXT: sarb $2, %al
+; SSE-NEXT: addb %dl, %al
+; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: imull $-109, %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movb %cl, %al
-; SSE-NEXT: shrb $7, %al
-; SSE-NEXT: sarb $2, %cl
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movzbl %cl, %eax
+; SSE-NEXT: imull $-109, %r11d, %eax
+; SSE-NEXT: shrl $8, %eax
+; SSE-NEXT: addb %r11b, %al
+; SSE-NEXT: movb %al, %dl
+; SSE-NEXT: shrb $7, %dl
+; SSE-NEXT: sarb $2, %al
+; SSE-NEXT: addb %dl, %al
+; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: movd %eax, %xmm3
-; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: imull $-109, %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movb %cl, %al
-; SSE-NEXT: shrb $7, %al
-; SSE-NEXT: sarb $2, %cl
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movzbl %cl, %eax
+; SSE-NEXT: imull $-109, %ecx, %eax
+; SSE-NEXT: shrl $8, %eax
+; SSE-NEXT: addb %cl, %al
+; SSE-NEXT: movb %al, %cl
+; SSE-NEXT: shrb $7, %cl
+; SSE-NEXT: sarb $2, %al
+; SSE-NEXT: addb %cl, %al
+; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: movd %eax, %xmm2
; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: imull $-109, %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movb %cl, %al
-; SSE-NEXT: shrb $7, %al
-; SSE-NEXT: sarb $2, %cl
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movzbl %cl, %eax
+; SSE-NEXT: imull $-109, %r9d, %eax
+; SSE-NEXT: shrl $8, %eax
+; SSE-NEXT: addb %r9b, %al
+; SSE-NEXT: movb %al, %cl
+; SSE-NEXT: shrb $7, %cl
+; SSE-NEXT: sarb $2, %al
+; SSE-NEXT: addb %cl, %al
+; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: imull $-109, %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movb %cl, %al
-; SSE-NEXT: shrb $7, %al
-; SSE-NEXT: sarb $2, %cl
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movzbl %cl, %eax
+; SSE-NEXT: imull $-109, %r10d, %eax
+; SSE-NEXT: shrl $8, %eax
+; SSE-NEXT: addb %r10b, %al
+; SSE-NEXT: movb %al, %cl
+; SSE-NEXT: shrb $7, %cl
+; SSE-NEXT: sarb $2, %al
+; SSE-NEXT: addb %cl, %al
+; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: movd %eax, %xmm3
; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
-; SSE-NEXT: imull $-109, %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movb %cl, %al
-; SSE-NEXT: shrb $7, %al
-; SSE-NEXT: sarb $2, %cl
-; SSE-NEXT: addb %al, %cl
-; SSE-NEXT: movzbl %cl, %eax
+; SSE-NEXT: imull $-109, %r8d, %eax
+; SSE-NEXT: shrl $8, %eax
+; SSE-NEXT: addb %r8b, %al
+; SSE-NEXT: movb %al, %cl
+; SSE-NEXT: shrb $7, %cl
+; SSE-NEXT: sarb $2, %al
+; SSE-NEXT: addb %cl, %al
+; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: movd %eax, %xmm4
; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax
; SSE-NEXT: imull $-109, %eax, %ecx
@@ -634,6 +639,9 @@ define <16 x i8> @test7(<16 x i8> %a) {
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT: popq %rbx
+; SSE-NEXT: popq %r14
+; SSE-NEXT: popq %rbp
; SSE-NEXT: retq
;
; AVX-LABEL: test7:
@@ -819,17 +827,16 @@ define <16 x i8> @test7(<16 x i8> %a) {
ret <16 x i8> %div
}
-define <4 x i32> @test8(<4 x i32> %a) {
+define <4 x i32> @test8(<4 x i32> %a) #0 {
; SSE41-LABEL: test8:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: pmuldq %xmm2, %xmm1
-; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
; SSE41-NEXT: pmuldq %xmm2, %xmm3
-; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm3[1,3]
-; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
+; SSE41-NEXT: pmuldq %xmm0, %xmm1
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
; SSE41-NEXT: paddd %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: psrld $31, %xmm0
@@ -840,22 +847,22 @@ define <4 x i32> @test8(<4 x i32> %a) {
;
; SSE-LABEL: test8:
; SSE: # BB#0:
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
-; SSE-NEXT: movdqa %xmm2, %xmm1
+; SSE-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
+; SSE-NEXT: movdqa %xmm0, %xmm2
+; SSE-NEXT: psrad $31, %xmm2
+; SSE-NEXT: pand %xmm1, %xmm2
+; SSE-NEXT: movdqa %xmm0, %xmm3
+; SSE-NEXT: pmuludq %xmm1, %xmm3
+; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
; SSE-NEXT: psrad $31, %xmm1
; SSE-NEXT: pand %xmm0, %xmm1
-; SSE-NEXT: movdqa %xmm0, %xmm3
-; SSE-NEXT: psrad $31, %xmm3
-; SSE-NEXT: pand %xmm2, %xmm3
-; SSE-NEXT: paddd %xmm1, %xmm3
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: pmuludq %xmm2, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
-; SSE-NEXT: pmuludq %xmm2, %xmm4
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm4[1,3]
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
-; SSE-NEXT: psubd %xmm3, %xmm1
+; SSE-NEXT: paddd %xmm1, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
+; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; SSE-NEXT: pmuludq %xmm4, %xmm3
+; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; SSE-NEXT: psubd %xmm2, %xmm1
; SSE-NEXT: paddd %xmm0, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: psrld $31, %xmm0
@@ -867,12 +874,12 @@ define <4 x i32> @test8(<4 x i32> %a) {
; AVX-LABEL: test8:
; AVX: # BB#0:
; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
-; AVX-NEXT: vpmuldq %xmm1, %xmm0, %xmm2
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
; AVX-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; AVX-NEXT: vpmuldq %xmm1, %xmm3, %xmm1
-; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm2[1,3],xmm1[1,3]
-; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
+; AVX-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
+; AVX-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
+; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; AVX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; AVX-NEXT: vpaddd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpsrld $31, %xmm0, %xmm1
; AVX-NEXT: vpsrad $2, %xmm0, %xmm0
@@ -882,78 +889,80 @@ define <4 x i32> @test8(<4 x i32> %a) {
ret <4 x i32> %div
}
-define <8 x i32> @test9(<8 x i32> %a) {
+define <8 x i32> @test9(<8 x i32> %a) #0 {
; SSE41-LABEL: test9:
; SSE41: # BB#0:
-; SSE41-NEXT: movdqa %xmm1, %xmm2
-; SSE41-NEXT: movdqa %xmm0, %xmm3
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
-; SSE41-NEXT: # kill: XMM0<def> XMM3<kill>
-; SSE41-NEXT: pmuldq %xmm1, %xmm0
-; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
-; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,1,3,3]
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027]
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
; SSE41-NEXT: pmuldq %xmm4, %xmm5
-; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm5[1,3]
-; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; SSE41-NEXT: paddd %xmm3, %xmm0
-; SSE41-NEXT: movdqa %xmm0, %xmm3
-; SSE41-NEXT: psrld $31, %xmm3
-; SSE41-NEXT: psrad $2, %xmm0
-; SSE41-NEXT: paddd %xmm3, %xmm0
-; SSE41-NEXT: pmuldq %xmm2, %xmm1
-; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
-; SSE41-NEXT: pmuldq %xmm4, %xmm3
-; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm3[1,3]
-; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
-; SSE41-NEXT: paddd %xmm2, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm2
-; SSE41-NEXT: psrld $31, %xmm2
-; SSE41-NEXT: psrad $2, %xmm1
-; SSE41-NEXT: paddd %xmm2, %xmm1
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: pmuldq %xmm3, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7]
+; SSE41-NEXT: paddd %xmm0, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: psrld $31, %xmm0
+; SSE41-NEXT: psrad $2, %xmm2
+; SSE41-NEXT: paddd %xmm0, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; SSE41-NEXT: pmuldq %xmm4, %xmm0
+; SSE41-NEXT: pmuldq %xmm1, %xmm3
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm0[2,3],xmm3[4,5],xmm0[6,7]
+; SSE41-NEXT: paddd %xmm1, %xmm3
+; SSE41-NEXT: movdqa %xmm3, %xmm0
+; SSE41-NEXT: psrld $31, %xmm0
+; SSE41-NEXT: psrad $2, %xmm3
+; SSE41-NEXT: paddd %xmm0, %xmm3
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm3, %xmm1
; SSE41-NEXT: retq
;
; SSE-LABEL: test9:
; SSE: # BB#0:
-; SSE-NEXT: movdqa %xmm1, %xmm2
-; SSE-NEXT: movdqa %xmm0, %xmm3
-; SSE-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
-; SSE-NEXT: movdqa %xmm1, %xmm4
+; SSE-NEXT: movdqa %xmm0, %xmm2
+; SSE-NEXT: movdqa {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027]
+; SSE-NEXT: movdqa %xmm3, %xmm4
; SSE-NEXT: psrad $31, %xmm4
; SSE-NEXT: movdqa %xmm4, %xmm0
-; SSE-NEXT: pand %xmm3, %xmm0
-; SSE-NEXT: movdqa %xmm3, %xmm5
+; SSE-NEXT: pand %xmm2, %xmm0
+; SSE-NEXT: movdqa %xmm2, %xmm5
; SSE-NEXT: psrad $31, %xmm5
-; SSE-NEXT: pand %xmm1, %xmm5
+; SSE-NEXT: pand %xmm3, %xmm5
; SSE-NEXT: paddd %xmm0, %xmm5
-; SSE-NEXT: movdqa %xmm3, %xmm0
-; SSE-NEXT: pmuludq %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm1[1,1,3,3]
-; SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3]
+; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: pmuludq %xmm3, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm3[1,1,3,3]
+; SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3]
; SSE-NEXT: pmuludq %xmm6, %xmm7
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm7[1,3]
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,3,2,3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
; SSE-NEXT: psubd %xmm5, %xmm0
-; SSE-NEXT: paddd %xmm3, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm3
-; SSE-NEXT: psrld $31, %xmm3
-; SSE-NEXT: psrad $2, %xmm0
-; SSE-NEXT: paddd %xmm3, %xmm0
-; SSE-NEXT: pand %xmm2, %xmm4
-; SSE-NEXT: movdqa %xmm2, %xmm3
-; SSE-NEXT: psrad $31, %xmm3
-; SSE-NEXT: pand %xmm1, %xmm3
-; SSE-NEXT: paddd %xmm4, %xmm3
-; SSE-NEXT: pmuludq %xmm2, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
-; SSE-NEXT: pmuludq %xmm6, %xmm4
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm4[1,3]
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
-; SSE-NEXT: psubd %xmm3, %xmm1
-; SSE-NEXT: paddd %xmm2, %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm2
+; SSE-NEXT: paddd %xmm2, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm2
; SSE-NEXT: psrld $31, %xmm2
-; SSE-NEXT: psrad $2, %xmm1
-; SSE-NEXT: paddd %xmm2, %xmm1
+; SSE-NEXT: psrad $2, %xmm0
+; SSE-NEXT: paddd %xmm2, %xmm0
+; SSE-NEXT: pand %xmm1, %xmm4
+; SSE-NEXT: movdqa %xmm1, %xmm5
+; SSE-NEXT: psrad $31, %xmm5
+; SSE-NEXT: pand %xmm3, %xmm5
+; SSE-NEXT: paddd %xmm4, %xmm5
+; SSE-NEXT: pmuludq %xmm1, %xmm3
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3]
+; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; SSE-NEXT: pmuludq %xmm6, %xmm3
+; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT: psubd %xmm5, %xmm2
+; SSE-NEXT: paddd %xmm1, %xmm2
+; SSE-NEXT: movdqa %xmm2, %xmm1
+; SSE-NEXT: psrld $31, %xmm1
+; SSE-NEXT: psrad $2, %xmm2
+; SSE-NEXT: paddd %xmm1, %xmm2
+; SSE-NEXT: movdqa %xmm2, %xmm1
; SSE-NEXT: retq
;
; AVX-LABEL: test9:
@@ -974,76 +983,80 @@ define <8 x i32> @test9(<8 x i32> %a) {
ret <8 x i32> %div
}
-define <8 x i32> @test10(<8 x i32> %a) {
+define <8 x i32> @test10(<8 x i32> %a) #0 {
; SSE41-LABEL: test10:
; SSE41: # BB#0:
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
-; SSE41-NEXT: movdqa %xmm0, %xmm3
-; SSE41-NEXT: pmuludq %xmm2, %xmm3
-; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
-; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
-; SSE41-NEXT: pmuludq %xmm4, %xmm5
-; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,3],xmm5[1,3]
-; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2,1,3]
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; SSE41-NEXT: pmuludq %xmm3, %xmm4
; SSE41-NEXT: movdqa %xmm0, %xmm5
-; SSE41-NEXT: psubd %xmm3, %xmm5
-; SSE41-NEXT: psrld $1, %xmm5
-; SSE41-NEXT: paddd %xmm3, %xmm5
-; SSE41-NEXT: psrld $2, %xmm5
-; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [7,7,7,7]
-; SSE41-NEXT: pmulld %xmm3, %xmm5
-; SSE41-NEXT: psubd %xmm5, %xmm0
-; SSE41-NEXT: pmuludq %xmm1, %xmm2
-; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,3,3]
-; SSE41-NEXT: pmuludq %xmm4, %xmm5
-; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm5[1,3]
-; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
-; SSE41-NEXT: movdqa %xmm1, %xmm4
-; SSE41-NEXT: psubd %xmm2, %xmm4
+; SSE41-NEXT: pmuludq %xmm2, %xmm5
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1],xmm4[2,3],xmm5[4,5],xmm4[6,7]
+; SSE41-NEXT: movdqa %xmm0, %xmm4
+; SSE41-NEXT: psubd %xmm5, %xmm4
; SSE41-NEXT: psrld $1, %xmm4
-; SSE41-NEXT: paddd %xmm2, %xmm4
+; SSE41-NEXT: paddd %xmm5, %xmm4
; SSE41-NEXT: psrld $2, %xmm4
-; SSE41-NEXT: pmulld %xmm3, %xmm4
-; SSE41-NEXT: psubd %xmm4, %xmm1
+; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [7,7,7,7]
+; SSE41-NEXT: pmulld %xmm5, %xmm4
+; SSE41-NEXT: psubd %xmm4, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
+; SSE41-NEXT: pmuludq %xmm3, %xmm4
+; SSE41-NEXT: pmuludq %xmm1, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7]
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: psubd %xmm2, %xmm3
+; SSE41-NEXT: psrld $1, %xmm3
+; SSE41-NEXT: paddd %xmm2, %xmm3
+; SSE41-NEXT: psrld $2, %xmm3
+; SSE41-NEXT: pmulld %xmm5, %xmm3
+; SSE41-NEXT: psubd %xmm3, %xmm1
; SSE41-NEXT: retq
;
; SSE-LABEL: test10:
; SSE: # BB#0:
-; SSE-NEXT: movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
-; SSE-NEXT: movdqa %xmm0, %xmm3
-; SSE-NEXT: pmuludq %xmm2, %xmm3
-; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
+; SSE-NEXT: movdqa {{.*#+}} xmm3 = [613566757,613566757,613566757,613566757]
+; SSE-NEXT: movdqa %xmm0, %xmm2
+; SSE-NEXT: pmuludq %xmm3, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
+; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
; SSE-NEXT: pmuludq %xmm4, %xmm5
-; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,3],xmm5[1,3]
-; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2,1,3]
+; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
; SSE-NEXT: movdqa %xmm0, %xmm5
-; SSE-NEXT: psubd %xmm3, %xmm5
+; SSE-NEXT: psubd %xmm2, %xmm5
; SSE-NEXT: psrld $1, %xmm5
-; SSE-NEXT: paddd %xmm3, %xmm5
+; SSE-NEXT: paddd %xmm2, %xmm5
; SSE-NEXT: psrld $2, %xmm5
-; SSE-NEXT: movdqa {{.*#+}} xmm3 = [7,7,7,7]
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [7,7,7,7]
; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
-; SSE-NEXT: pmuludq %xmm3, %xmm5
-; SSE-NEXT: pmuludq %xmm3, %xmm6
-; SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,2],xmm6[0,2]
-; SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,2,1,3]
+; SSE-NEXT: pmuludq %xmm2, %xmm5
+; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3]
+; SSE-NEXT: pmuludq %xmm2, %xmm6
+; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
; SSE-NEXT: psubd %xmm5, %xmm0
-; SSE-NEXT: pmuludq %xmm1, %xmm2
+; SSE-NEXT: pmuludq %xmm1, %xmm3
+; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,3,3]
; SSE-NEXT: pmuludq %xmm4, %xmm5
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm5[1,3]
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,3,2,3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
; SSE-NEXT: movdqa %xmm1, %xmm4
-; SSE-NEXT: psubd %xmm2, %xmm4
+; SSE-NEXT: psubd %xmm3, %xmm4
; SSE-NEXT: psrld $1, %xmm4
-; SSE-NEXT: paddd %xmm2, %xmm4
+; SSE-NEXT: paddd %xmm3, %xmm4
; SSE-NEXT: psrld $2, %xmm4
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
-; SSE-NEXT: pmuludq %xmm3, %xmm4
-; SSE-NEXT: pmuludq %xmm3, %xmm2
-; SSE-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,2],xmm2[0,2]
-; SSE-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,2,1,3]
+; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE-NEXT: pmuludq %xmm2, %xmm4
+; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
+; SSE-NEXT: pmuludq %xmm2, %xmm3
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
; SSE-NEXT: psubd %xmm4, %xmm1
; SSE-NEXT: retq
;
@@ -1068,36 +1081,36 @@ define <8 x i32> @test10(<8 x i32> %a) {
ret <8 x i32> %rem
}
-define <8 x i32> @test11(<8 x i32> %a) {
+define <8 x i32> @test11(<8 x i32> %a) #0 {
; SSE41-LABEL: test11:
; SSE41: # BB#0:
; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
-; SSE41-NEXT: movdqa %xmm0, %xmm3
-; SSE41-NEXT: pmuldq %xmm2, %xmm3
-; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
-; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
-; SSE41-NEXT: pmuldq %xmm4, %xmm5
-; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,3],xmm5[1,3]
-; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2,1,3]
-; SSE41-NEXT: paddd %xmm0, %xmm3
-; SSE41-NEXT: movdqa %xmm3, %xmm5
-; SSE41-NEXT: psrld $31, %xmm5
-; SSE41-NEXT: psrad $2, %xmm3
-; SSE41-NEXT: paddd %xmm5, %xmm3
-; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [7,7,7,7]
-; SSE41-NEXT: pmulld %xmm5, %xmm3
-; SSE41-NEXT: psubd %xmm3, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; SSE41-NEXT: pmuldq %xmm3, %xmm4
+; SSE41-NEXT: movdqa %xmm0, %xmm5
+; SSE41-NEXT: pmuldq %xmm2, %xmm5
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1],xmm4[2,3],xmm5[4,5],xmm4[6,7]
+; SSE41-NEXT: paddd %xmm0, %xmm5
+; SSE41-NEXT: movdqa %xmm5, %xmm4
+; SSE41-NEXT: psrld $31, %xmm4
+; SSE41-NEXT: psrad $2, %xmm5
+; SSE41-NEXT: paddd %xmm4, %xmm5
+; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [7,7,7,7]
+; SSE41-NEXT: pmulld %xmm4, %xmm5
+; SSE41-NEXT: psubd %xmm5, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,3,3]
+; SSE41-NEXT: pmuldq %xmm3, %xmm5
; SSE41-NEXT: pmuldq %xmm1, %xmm2
-; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
-; SSE41-NEXT: pmuldq %xmm4, %xmm3
-; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[1,3]
-; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7]
; SSE41-NEXT: paddd %xmm1, %xmm2
; SSE41-NEXT: movdqa %xmm2, %xmm3
; SSE41-NEXT: psrld $31, %xmm3
; SSE41-NEXT: psrad $2, %xmm2
; SSE41-NEXT: paddd %xmm3, %xmm2
-; SSE41-NEXT: pmulld %xmm5, %xmm2
+; SSE41-NEXT: pmulld %xmm4, %xmm2
; SSE41-NEXT: psubd %xmm2, %xmm1
; SSE41-NEXT: retq
;
@@ -1112,13 +1125,14 @@ define <8 x i32> @test11(<8 x i32> %a) {
; SSE-NEXT: psrad $31, %xmm6
; SSE-NEXT: pand %xmm2, %xmm6
; SSE-NEXT: paddd %xmm4, %xmm6
-; SSE-NEXT: movdqa %xmm0, %xmm7
-; SSE-NEXT: pmuludq %xmm2, %xmm7
+; SSE-NEXT: movdqa %xmm0, %xmm4
+; SSE-NEXT: pmuludq %xmm2, %xmm4
+; SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm4[1,3,2,3]
; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm2[1,1,3,3]
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
; SSE-NEXT: pmuludq %xmm5, %xmm4
-; SSE-NEXT: shufps {{.*#+}} xmm7 = xmm7[1,3],xmm4[1,3]
-; SSE-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,2,1,3]
+; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1]
; SSE-NEXT: psubd %xmm6, %xmm7
; SSE-NEXT: paddd %xmm0, %xmm7
; SSE-NEXT: movdqa %xmm7, %xmm4
@@ -1128,9 +1142,10 @@ define <8 x i32> @test11(<8 x i32> %a) {
; SSE-NEXT: movdqa {{.*#+}} xmm4 = [7,7,7,7]
; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
; SSE-NEXT: pmuludq %xmm4, %xmm7
+; SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm7[0,2,2,3]
; SSE-NEXT: pmuludq %xmm4, %xmm6
-; SSE-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,2],xmm6[0,2]
-; SSE-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,2,1,3]
+; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1]
; SSE-NEXT: psubd %xmm7, %xmm0
; SSE-NEXT: pand %xmm1, %xmm3
; SSE-NEXT: movdqa %xmm1, %xmm6
@@ -1138,10 +1153,11 @@ define <8 x i32> @test11(<8 x i32> %a) {
; SSE-NEXT: pand %xmm2, %xmm6
; SSE-NEXT: paddd %xmm3, %xmm6
; SSE-NEXT: pmuludq %xmm1, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
; SSE-NEXT: pmuludq %xmm5, %xmm3
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[1,3]
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE-NEXT: psubd %xmm6, %xmm2
; SSE-NEXT: paddd %xmm1, %xmm2
; SSE-NEXT: movdqa %xmm2, %xmm3
@@ -1150,9 +1166,10 @@ define <8 x i32> @test11(<8 x i32> %a) {
; SSE-NEXT: paddd %xmm3, %xmm2
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
; SSE-NEXT: pmuludq %xmm4, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; SSE-NEXT: pmuludq %xmm4, %xmm3
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; SSE-NEXT: psubd %xmm2, %xmm1
; SSE-NEXT: retq
;
@@ -1177,7 +1194,7 @@ define <8 x i32> @test11(<8 x i32> %a) {
ret <8 x i32> %rem
}
-define <2 x i16> @test12() {
+define <2 x i16> @test12() #0 {
; SSE41-LABEL: test12:
; SSE41: # BB#0:
; SSE41-NEXT: xorps %xmm0, %xmm0
@@ -1198,20 +1215,19 @@ define <2 x i16> @test12() {
ret <2 x i16> %B9
}
-define <4 x i32> @PR20355(<4 x i32> %a) {
+define <4 x i32> @PR20355(<4 x i32> %a) #0 {
; SSE41-LABEL: PR20355:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1431655766,1431655766,1431655766,1431655766]
-; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; SSE41-NEXT: pmuldq %xmm2, %xmm3
; SSE41-NEXT: pmuldq %xmm1, %xmm0
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; SSE41-NEXT: pmuldq %xmm2, %xmm1
-; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
-; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: psrld $31, %xmm1
-; SSE41-NEXT: paddd %xmm0, %xmm1
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: psrld $31, %xmm0
+; SSE41-NEXT: paddd %xmm1, %xmm0
; SSE41-NEXT: retq
;
; SSE-LABEL: PR20355:
@@ -1226,26 +1242,26 @@ define <4 x i32> @PR20355(<4 x i32> %a) {
; SSE-NEXT: paddd %xmm2, %xmm3
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE-NEXT: pmuludq %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; SSE-NEXT: pmuludq %xmm2, %xmm1
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; SSE-NEXT: psubd %xmm3, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $31, %xmm1
-; SSE-NEXT: paddd %xmm0, %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; SSE-NEXT: pmuludq %xmm2, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1]
+; SSE-NEXT: psubd %xmm3, %xmm4
+; SSE-NEXT: movdqa %xmm4, %xmm0
+; SSE-NEXT: psrld $31, %xmm0
+; SSE-NEXT: paddd %xmm4, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: PR20355:
; AVX: # BB#0: # %entry
; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
-; AVX-NEXT: vpmuldq %xmm1, %xmm0, %xmm2
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; AVX-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; AVX-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
; AVX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm2[1,3],xmm0[1,3]
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
; AVX-NEXT: vpsrld $31, %xmm0, %xmm1
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
@@ -1253,3 +1269,5 @@ entry:
%sdiv = sdiv <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
ret <4 x i32> %sdiv
}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/X86/vector-intrinsics.ll b/test/CodeGen/X86/vector-intrinsics.ll
index cabacb572cea..c140468d3006 100644
--- a/test/CodeGen/X86/vector-intrinsics.ll
+++ b/test/CodeGen/X86/vector-intrinsics.ll
@@ -32,20 +32,20 @@ declare <9 x double> @llvm.pow.v9f64(<9 x double> %a, <9 x double> %b)
declare <9 x double> @llvm.powi.v9f64(<9 x double> %a, i32)
define void @a(<9 x double>* %p) nounwind {
- %a = load <9 x double>* %p
+ %a = load <9 x double>, <9 x double>* %p
%r = call <9 x double> @llvm.exp.v9f64(<9 x double> %a)
store <9 x double> %r, <9 x double>* %p
ret void
}
define void @b(<9 x double>* %p, <9 x double>* %q) nounwind {
- %a = load <9 x double>* %p
- %b = load <9 x double>* %q
+ %a = load <9 x double>, <9 x double>* %p
+ %b = load <9 x double>, <9 x double>* %q
%r = call <9 x double> @llvm.pow.v9f64(<9 x double> %a, <9 x double> %b)
store <9 x double> %r, <9 x double>* %p
ret void
}
define void @c(<9 x double>* %p, i32 %n) nounwind {
- %a = load <9 x double>* %p
+ %a = load <9 x double>, <9 x double>* %p
%r = call <9 x double> @llvm.powi.v9f64(<9 x double> %a, i32 %n)
store <9 x double> %r, <9 x double>* %p
ret void
diff --git a/test/CodeGen/X86/vector-sext.ll b/test/CodeGen/X86/vector-sext.ll
index 758833155a96..e6acc7efaf39 100644
--- a/test/CodeGen/X86/vector-sext.ll
+++ b/test/CodeGen/X86/vector-sext.ll
@@ -10,37 +10,30 @@
define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
; SSE2-LABEL: sext_8i16_to_8i32:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: # kill: XMM0<def> XMM1<kill>
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: pslld $16, %xmm0
-; SSE2-NEXT: psrad $16, %xmm0
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSE2-NEXT: pslld $16, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT: psrad $16, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE2-NEXT: psrad $16, %xmm1
+; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: sext_8i16_to_8i32:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: # kill: XMM0<def> XMM1<kill>
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT: pslld $16, %xmm0
-; SSSE3-NEXT: psrad $16, %xmm0
-; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: pslld $16, %xmm1
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSSE3-NEXT: psrad $16, %xmm2
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSSE3-NEXT: psrad $16, %xmm1
+; SSSE3-NEXT: movdqa %xmm2, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: sext_8i16_to_8i32:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: pmovzxwd %xmm1, %xmm0
-; SSE41-NEXT: pslld $16, %xmm0
-; SSE41-NEXT: psrad $16, %xmm0
-; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSE41-NEXT: pslld $16, %xmm1
-; SSE41-NEXT: psrad $16, %xmm1
+; SSE41-NEXT: pmovsxwd %xmm0, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE41-NEXT: pmovsxwd %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: sext_8i16_to_8i32:
@@ -58,13 +51,10 @@ define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp
;
; X32-SSE41-LABEL: sext_8i16_to_8i32:
; X32-SSE41: # BB#0: # %entry
-; X32-SSE41-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE41-NEXT: pmovzxwd %xmm1, %xmm0
-; X32-SSE41-NEXT: pslld $16, %xmm0
-; X32-SSE41-NEXT: psrad $16, %xmm0
-; X32-SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; X32-SSE41-NEXT: pslld $16, %xmm1
-; X32-SSE41-NEXT: psrad $16, %xmm1
+; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm2
+; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm1
+; X32-SSE41-NEXT: movdqa %xmm2, %xmm0
; X32-SSE41-NEXT: retl
entry:
%B = sext <8 x i16> %A to <8 x i32>
@@ -74,68 +64,31 @@ entry:
define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
; SSE2-LABEL: sext_4i32_to_4i64:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
-; SSE2-NEXT: movd %xmm1, %rax
-; SSE2-NEXT: cltq
-; SSE2-NEXT: movd %rax, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT: movd %xmm1, %rax
-; SSE2-NEXT: cltq
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: cltq
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: cltq
-; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: sext_4i32_to_4i64:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
-; SSSE3-NEXT: movd %xmm1, %rax
-; SSSE3-NEXT: cltq
-; SSSE3-NEXT: movd %rax, %xmm2
-; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSSE3-NEXT: movd %xmm1, %rax
-; SSSE3-NEXT: cltq
-; SSSE3-NEXT: movd %rax, %xmm1
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; SSSE3-NEXT: movd %xmm0, %rax
-; SSSE3-NEXT: cltq
-; SSSE3-NEXT: movd %rax, %xmm1
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSSE3-NEXT: movd %xmm0, %rax
-; SSSE3-NEXT: cltq
-; SSSE3-NEXT: movd %rax, %xmm0
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSSE3-NEXT: movdqa %xmm2, %xmm0
+; SSSE3-NEXT: movdqa %xmm0, %xmm2
+; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSSE3-NEXT: movdqa %xmm1, %xmm2
+; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: sext_4i32_to_4i64:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pmovzxdq %xmm0, %xmm1
-; SSE41-NEXT: pextrq $1, %xmm1, %rax
-; SSE41-NEXT: cltq
-; SSE41-NEXT: movd %rax, %xmm3
-; SSE41-NEXT: movd %xmm1, %rax
-; SSE41-NEXT: cltq
-; SSE41-NEXT: movd %rax, %xmm2
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; SSE41-NEXT: pextrq $1, %xmm0, %rax
-; SSE41-NEXT: cltq
-; SSE41-NEXT: movd %rax, %xmm3
-; SSE41-NEXT: movd %xmm0, %rax
-; SSE41-NEXT: cltq
-; SSE41-NEXT: movd %rax, %xmm1
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE41-NEXT: pmovsxdq %xmm0, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE41-NEXT: pmovsxdq %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
;
@@ -154,20 +107,9 @@ define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp
;
; X32-SSE41-LABEL: sext_4i32_to_4i64:
; X32-SSE41: # BB#0: # %entry
-; X32-SSE41-NEXT: pmovzxdq %xmm0, %xmm2
-; X32-SSE41-NEXT: movd %xmm2, %eax
-; X32-SSE41-NEXT: sarl $31, %eax
-; X32-SSE41-NEXT: pextrd $2, %xmm2, %ecx
-; X32-SSE41-NEXT: pinsrd $1, %eax, %xmm2
-; X32-SSE41-NEXT: sarl $31, %ecx
-; X32-SSE41-NEXT: pinsrd $3, %ecx, %xmm2
-; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
-; X32-SSE41-NEXT: movd %xmm1, %eax
-; X32-SSE41-NEXT: sarl $31, %eax
-; X32-SSE41-NEXT: pextrd $2, %xmm1, %ecx
-; X32-SSE41-NEXT: pinsrd $1, %eax, %xmm1
-; X32-SSE41-NEXT: sarl $31, %ecx
-; X32-SSE41-NEXT: pinsrd $3, %ecx, %xmm1
+; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm2
+; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm1
; X32-SSE41-NEXT: movdqa %xmm2, %xmm0
; X32-SSE41-NEXT: retl
entry:
@@ -206,7 +148,7 @@ define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) {
; X32-SSE41-NEXT: pmovsxwd (%eax), %xmm0
; X32-SSE41-NEXT: retl
entry:
- %X = load <4 x i16>* %ptr
+ %X = load <4 x i16>, <4 x i16>* %ptr
%Y = sext <4 x i16> %X to <4 x i32>
ret <4 x i32>%Y
}
@@ -244,7 +186,7 @@ define <4 x i32> @load_sext_test2(<4 x i8> *%ptr) {
; X32-SSE41-NEXT: pmovsxbd (%eax), %xmm0
; X32-SSE41-NEXT: retl
entry:
- %X = load <4 x i8>* %ptr
+ %X = load <4 x i8>, <4 x i8>* %ptr
%Y = sext <4 x i8> %X to <4 x i32>
ret <4 x i32>%Y
}
@@ -252,20 +194,26 @@ entry:
define <2 x i64> @load_sext_test3(<2 x i8> *%ptr) {
; SSE2-LABEL: load_sext_test3:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movsbq 1(%rdi), %rax
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: movsbq (%rdi), %rax
-; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: movzwl (%rdi), %eax
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: psrad $24, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: load_sext_test3:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movsbq 1(%rdi), %rax
-; SSSE3-NEXT: movd %rax, %xmm1
-; SSSE3-NEXT: movsbq (%rdi), %rax
-; SSSE3-NEXT: movd %rax, %xmm0
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: movzwl (%rdi), %eax
+; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: psrad $24, %xmm0
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: load_sext_test3:
@@ -284,7 +232,7 @@ define <2 x i64> @load_sext_test3(<2 x i8> *%ptr) {
; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0
; X32-SSE41-NEXT: retl
entry:
- %X = load <2 x i8>* %ptr
+ %X = load <2 x i8>, <2 x i8>* %ptr
%Y = sext <2 x i8> %X to <2 x i64>
ret <2 x i64>%Y
}
@@ -292,20 +240,22 @@ entry:
define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) {
; SSE2-LABEL: load_sext_test4:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movswq 2(%rdi), %rax
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: movswq (%rdi), %rax
-; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: movd (%rdi), %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: psrad $16, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: load_sext_test4:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movswq 2(%rdi), %rax
-; SSSE3-NEXT: movd %rax, %xmm1
-; SSSE3-NEXT: movswq (%rdi), %rax
-; SSSE3-NEXT: movd %rax, %xmm0
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: movd (%rdi), %xmm0
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: psrad $16, %xmm0
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: load_sext_test4:
@@ -324,7 +274,7 @@ define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) {
; X32-SSE41-NEXT: pmovsxwq (%eax), %xmm0
; X32-SSE41-NEXT: retl
entry:
- %X = load <2 x i16>* %ptr
+ %X = load <2 x i16>, <2 x i16>* %ptr
%Y = sext <2 x i16> %X to <2 x i64>
ret <2 x i64>%Y
}
@@ -332,20 +282,18 @@ entry:
define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) {
; SSE2-LABEL: load_sext_test5:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movslq 4(%rdi), %rax
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: movslq (%rdi), %rax
-; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: movq (%rdi), %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: load_sext_test5:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movslq 4(%rdi), %rax
-; SSSE3-NEXT: movd %rax, %xmm1
-; SSSE3-NEXT: movslq (%rdi), %rax
-; SSSE3-NEXT: movd %rax, %xmm0
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: movq (%rdi), %xmm0
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: load_sext_test5:
@@ -364,7 +312,7 @@ define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) {
; X32-SSE41-NEXT: pmovsxdq (%eax), %xmm0
; X32-SSE41-NEXT: retl
entry:
- %X = load <2 x i32>* %ptr
+ %X = load <2 x i32>, <2 x i32>* %ptr
%Y = sext <2 x i32> %X to <2 x i64>
ret <2 x i64>%Y
}
@@ -400,7 +348,7 @@ define <8 x i16> @load_sext_test6(<8 x i8> *%ptr) {
; X32-SSE41-NEXT: pmovsxbw (%eax), %xmm0
; X32-SSE41-NEXT: retl
entry:
- %X = load <8 x i8>* %ptr
+ %X = load <8 x i8>, <8 x i8>* %ptr
%Y = sext <8 x i8> %X to <8 x i16>
ret <8 x i16>%Y
}
@@ -410,72 +358,35 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
; SSE2: # BB#0:
; SSE2-NEXT: pslld $31, %xmm0
; SSE2-NEXT: psrad $31, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
-; SSE2-NEXT: movd %xmm1, %rax
-; SSE2-NEXT: cltq
-; SSE2-NEXT: movd %rax, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT: movd %xmm1, %rax
-; SSE2-NEXT: cltq
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: cltq
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: cltq
-; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: sext_4i1_to_4i64:
; SSSE3: # BB#0:
; SSSE3-NEXT: pslld $31, %xmm0
; SSSE3-NEXT: psrad $31, %xmm0
-; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
-; SSSE3-NEXT: movd %xmm1, %rax
-; SSSE3-NEXT: cltq
-; SSSE3-NEXT: movd %rax, %xmm2
-; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSSE3-NEXT: movd %xmm1, %rax
-; SSSE3-NEXT: cltq
-; SSSE3-NEXT: movd %rax, %xmm1
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; SSSE3-NEXT: movd %xmm0, %rax
-; SSSE3-NEXT: cltq
-; SSSE3-NEXT: movd %rax, %xmm1
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSSE3-NEXT: movd %xmm0, %rax
-; SSSE3-NEXT: cltq
-; SSSE3-NEXT: movd %rax, %xmm0
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSSE3-NEXT: movdqa %xmm2, %xmm0
+; SSSE3-NEXT: movdqa %xmm0, %xmm2
+; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSSE3-NEXT: movdqa %xmm1, %xmm2
+; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: sext_4i1_to_4i64:
; SSE41: # BB#0:
; SSE41-NEXT: pslld $31, %xmm0
; SSE41-NEXT: psrad $31, %xmm0
-; SSE41-NEXT: pmovzxdq %xmm0, %xmm1
-; SSE41-NEXT: pextrq $1, %xmm1, %rax
-; SSE41-NEXT: cltq
-; SSE41-NEXT: movd %rax, %xmm3
-; SSE41-NEXT: movd %xmm1, %rax
-; SSE41-NEXT: cltq
-; SSE41-NEXT: movd %rax, %xmm2
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; SSE41-NEXT: pextrq $1, %xmm0, %rax
-; SSE41-NEXT: cltq
-; SSE41-NEXT: movd %rax, %xmm3
-; SSE41-NEXT: movd %xmm0, %rax
-; SSE41-NEXT: cltq
-; SSE41-NEXT: movd %rax, %xmm1
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE41-NEXT: pmovsxdq %xmm0, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE41-NEXT: pmovsxdq %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
;
@@ -500,20 +411,9 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
; X32-SSE41: # BB#0:
; X32-SSE41-NEXT: pslld $31, %xmm0
; X32-SSE41-NEXT: psrad $31, %xmm0
-; X32-SSE41-NEXT: pmovzxdq %xmm0, %xmm2
-; X32-SSE41-NEXT: movd %xmm2, %eax
-; X32-SSE41-NEXT: sarl $31, %eax
-; X32-SSE41-NEXT: pextrd $2, %xmm2, %ecx
-; X32-SSE41-NEXT: pinsrd $1, %eax, %xmm2
-; X32-SSE41-NEXT: sarl $31, %ecx
-; X32-SSE41-NEXT: pinsrd $3, %ecx, %xmm2
-; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
-; X32-SSE41-NEXT: movd %xmm1, %eax
-; X32-SSE41-NEXT: sarl $31, %eax
-; X32-SSE41-NEXT: pextrd $2, %xmm1, %ecx
-; X32-SSE41-NEXT: pinsrd $1, %eax, %xmm1
-; X32-SSE41-NEXT: sarl $31, %ecx
-; X32-SSE41-NEXT: pinsrd $3, %ecx, %xmm1
+; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm2
+; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm1
; X32-SSE41-NEXT: movdqa %xmm2, %xmm0
; X32-SSE41-NEXT: retl
%extmask = sext <4 x i1> %mask to <4 x i64>
@@ -523,46 +423,35 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) {
; SSE2-LABEL: sext_16i8_to_16i16:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movdqa (%rdi), %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: movq (%rdi), %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: psllw $8, %xmm0
; SSE2-NEXT: psraw $8, %xmm0
-; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE2-NEXT: psllw $8, %xmm1
+; SSE2-NEXT: movq 8(%rdi), %xmm1
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: psraw $8, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: sext_16i8_to_16i16:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movdqa (%rdi), %xmm1
-; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: movq (%rdi), %xmm0
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: psllw $8, %xmm0
; SSSE3-NEXT: psraw $8, %xmm0
-; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSSE3-NEXT: psllw $8, %xmm1
+; SSSE3-NEXT: movq 8(%rdi), %xmm1
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: psraw $8, %xmm1
; SSSE3-NEXT: retq
;
; SSE41-LABEL: sext_16i8_to_16i16:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: movdqa (%rdi), %xmm1
-; SSE41-NEXT: pmovzxbw %xmm1, %xmm0
-; SSE41-NEXT: psllw $8, %xmm0
-; SSE41-NEXT: psraw $8, %xmm0
-; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE41-NEXT: psllw $8, %xmm1
-; SSE41-NEXT: psraw $8, %xmm1
+; SSE41-NEXT: pmovsxbw (%rdi), %xmm0
+; SSE41-NEXT: pmovsxbw 8(%rdi), %xmm1
; SSE41-NEXT: retq
;
; AVX1-LABEL: sext_16i8_to_16i16:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovdqa (%rdi), %xmm0
-; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vpmovsxbw (%rdi), %xmm0
+; AVX1-NEXT: vpmovsxbw 8(%rdi), %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: sext_16i8_to_16i16:
@@ -573,16 +462,11 @@ define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) {
; X32-SSE41-LABEL: sext_16i8_to_16i16:
; X32-SSE41: # BB#0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT: movdqa (%eax), %xmm1
-; X32-SSE41-NEXT: pmovzxbw %xmm1, %xmm0
-; X32-SSE41-NEXT: psllw $8, %xmm0
-; X32-SSE41-NEXT: psraw $8, %xmm0
-; X32-SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; X32-SSE41-NEXT: psllw $8, %xmm1
-; X32-SSE41-NEXT: psraw $8, %xmm1
+; X32-SSE41-NEXT: pmovsxbw (%eax), %xmm0
+; X32-SSE41-NEXT: pmovsxbw 8(%eax), %xmm1
; X32-SSE41-NEXT: retl
entry:
- %X = load <16 x i8>* %ptr
+ %X = load <16 x i8>, <16 x i8>* %ptr
%Y = sext <16 x i8> %X to <16 x i16>
ret <16 x i16> %Y
}
@@ -592,72 +476,35 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
; SSE2: # BB#0:
; SSE2-NEXT: pslld $24, %xmm0
; SSE2-NEXT: psrad $24, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
-; SSE2-NEXT: movd %xmm1, %rax
-; SSE2-NEXT: cltq
-; SSE2-NEXT: movd %rax, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT: movd %xmm1, %rax
-; SSE2-NEXT: cltq
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: cltq
-; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: cltq
-; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: movdqa %xmm1, %xmm2
+; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: sext_4i8_to_4i64:
; SSSE3: # BB#0:
; SSSE3-NEXT: pslld $24, %xmm0
; SSSE3-NEXT: psrad $24, %xmm0
-; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
-; SSSE3-NEXT: movd %xmm1, %rax
-; SSSE3-NEXT: cltq
-; SSSE3-NEXT: movd %rax, %xmm2
-; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSSE3-NEXT: movd %xmm1, %rax
-; SSSE3-NEXT: cltq
-; SSSE3-NEXT: movd %rax, %xmm1
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; SSSE3-NEXT: movd %xmm0, %rax
-; SSSE3-NEXT: cltq
-; SSSE3-NEXT: movd %rax, %xmm1
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSSE3-NEXT: movd %xmm0, %rax
-; SSSE3-NEXT: cltq
-; SSSE3-NEXT: movd %rax, %xmm0
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSSE3-NEXT: movdqa %xmm2, %xmm0
+; SSSE3-NEXT: movdqa %xmm0, %xmm2
+; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSSE3-NEXT: movdqa %xmm1, %xmm2
+; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: sext_4i8_to_4i64:
; SSE41: # BB#0:
; SSE41-NEXT: pslld $24, %xmm0
; SSE41-NEXT: psrad $24, %xmm0
-; SSE41-NEXT: pmovzxdq %xmm0, %xmm1
-; SSE41-NEXT: pextrq $1, %xmm1, %rax
-; SSE41-NEXT: cltq
-; SSE41-NEXT: movd %rax, %xmm3
-; SSE41-NEXT: movd %xmm1, %rax
-; SSE41-NEXT: cltq
-; SSE41-NEXT: movd %rax, %xmm2
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; SSE41-NEXT: pextrq $1, %xmm0, %rax
-; SSE41-NEXT: cltq
-; SSE41-NEXT: movd %rax, %xmm3
-; SSE41-NEXT: movd %xmm0, %rax
-; SSE41-NEXT: cltq
-; SSE41-NEXT: movd %rax, %xmm1
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE41-NEXT: pmovsxdq %xmm0, %xmm2
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE41-NEXT: pmovsxdq %xmm0, %xmm1
; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
;
@@ -682,20 +529,9 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
; X32-SSE41: # BB#0:
; X32-SSE41-NEXT: pslld $24, %xmm0
; X32-SSE41-NEXT: psrad $24, %xmm0
-; X32-SSE41-NEXT: pmovzxdq %xmm0, %xmm2
-; X32-SSE41-NEXT: movd %xmm2, %eax
-; X32-SSE41-NEXT: sarl $31, %eax
-; X32-SSE41-NEXT: pextrd $2, %xmm2, %ecx
-; X32-SSE41-NEXT: pinsrd $1, %eax, %xmm2
-; X32-SSE41-NEXT: sarl $31, %ecx
-; X32-SSE41-NEXT: pinsrd $3, %ecx, %xmm2
-; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
-; X32-SSE41-NEXT: movd %xmm1, %eax
-; X32-SSE41-NEXT: sarl $31, %eax
-; X32-SSE41-NEXT: pextrd $2, %xmm1, %ecx
-; X32-SSE41-NEXT: pinsrd $1, %eax, %xmm1
-; X32-SSE41-NEXT: sarl $31, %ecx
-; X32-SSE41-NEXT: pinsrd $3, %ecx, %xmm1
+; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm2
+; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm1
; X32-SSE41-NEXT: movdqa %xmm2, %xmm0
; X32-SSE41-NEXT: retl
%extmask = sext <4 x i8> %mask to <4 x i64>
@@ -705,73 +541,36 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) {
; SSE2-LABEL: load_sext_4i8_to_4i64:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movd (%rdi), %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,1,3]
-; SSE2-NEXT: movd %xmm2, %rax
-; SSE2-NEXT: movsbq %al, %rax
+; SSE2-NEXT: movsbq 1(%rdi), %rax
+; SSE2-NEXT: movd %rax, %xmm1
+; SSE2-NEXT: movsbq (%rdi), %rax
; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; SSE2-NEXT: movd %xmm2, %rax
-; SSE2-NEXT: movsbq %al, %rax
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: movsbq 3(%rdi), %rax
; SSE2-NEXT: movd %rax, %xmm2
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
-; SSE2-NEXT: movd %xmm2, %rax
-; SSE2-NEXT: movsbq %al, %rax
+; SSE2-NEXT: movsbq 2(%rdi), %rax
; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; SSE2-NEXT: movd %xmm2, %rax
-; SSE2-NEXT: movsbq %al, %rax
-; SSE2-NEXT: movd %rax, %xmm2
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: load_sext_4i8_to_4i64:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movd (%rdi), %xmm1
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,1,3]
-; SSSE3-NEXT: movd %xmm2, %rax
-; SSSE3-NEXT: movsbq %al, %rax
+; SSSE3-NEXT: movsbq 1(%rdi), %rax
+; SSSE3-NEXT: movd %rax, %xmm1
+; SSSE3-NEXT: movsbq (%rdi), %rax
; SSSE3-NEXT: movd %rax, %xmm0
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; SSSE3-NEXT: movd %xmm2, %rax
-; SSSE3-NEXT: movsbq %al, %rax
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: movsbq 3(%rdi), %rax
; SSSE3-NEXT: movd %rax, %xmm2
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
-; SSSE3-NEXT: movd %xmm2, %rax
-; SSSE3-NEXT: movsbq %al, %rax
+; SSSE3-NEXT: movsbq 2(%rdi), %rax
; SSSE3-NEXT: movd %rax, %xmm1
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; SSSE3-NEXT: movd %xmm2, %rax
-; SSSE3-NEXT: movsbq %al, %rax
-; SSSE3-NEXT: movd %rax, %xmm2
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: load_sext_4i8_to_4i64:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pmovzxbd (%rdi), %xmm1
-; SSE41-NEXT: pmovzxdq %xmm1, %xmm0
-; SSE41-NEXT: pextrq $1, %xmm0, %rax
-; SSE41-NEXT: movsbq %al, %rax
-; SSE41-NEXT: movd %rax, %xmm2
-; SSE41-NEXT: movd %xmm0, %rax
-; SSE41-NEXT: movsbq %al, %rax
-; SSE41-NEXT: movd %rax, %xmm0
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
-; SSE41-NEXT: pextrq $1, %xmm1, %rax
-; SSE41-NEXT: movsbq %al, %rax
-; SSE41-NEXT: movd %rax, %xmm2
-; SSE41-NEXT: movd %xmm1, %rax
-; SSE41-NEXT: movsbq %al, %rax
-; SSE41-NEXT: movd %rax, %xmm1
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE41-NEXT: pmovsxbq (%rdi), %xmm0
+; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm1
; SSE41-NEXT: retq
;
; AVX1-LABEL: load_sext_4i8_to_4i64:
@@ -791,33 +590,11 @@ define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) {
; X32-SSE41-LABEL: load_sext_4i8_to_4i64:
; X32-SSE41: # BB#0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT: movd (%eax), %xmm0
-; X32-SSE41-NEXT: pmovzxbd %xmm0, %xmm1
-; X32-SSE41-NEXT: pmovzxbq %xmm0, %xmm2
-; X32-SSE41-NEXT: movd %xmm2, %eax
-; X32-SSE41-NEXT: movsbl %al, %eax
-; X32-SSE41-NEXT: movd %eax, %xmm0
-; X32-SSE41-NEXT: sarl $31, %eax
-; X32-SSE41-NEXT: pinsrd $1, %eax, %xmm0
-; X32-SSE41-NEXT: pextrd $2, %xmm2, %eax
-; X32-SSE41-NEXT: movsbl %al, %eax
-; X32-SSE41-NEXT: pinsrd $2, %eax, %xmm0
-; X32-SSE41-NEXT: sarl $31, %eax
-; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0
-; X32-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
-; X32-SSE41-NEXT: movd %xmm2, %eax
-; X32-SSE41-NEXT: movsbl %al, %eax
-; X32-SSE41-NEXT: movd %eax, %xmm1
-; X32-SSE41-NEXT: sarl $31, %eax
-; X32-SSE41-NEXT: pinsrd $1, %eax, %xmm1
-; X32-SSE41-NEXT: pextrd $2, %xmm2, %eax
-; X32-SSE41-NEXT: movsbl %al, %eax
-; X32-SSE41-NEXT: pinsrd $2, %eax, %xmm1
-; X32-SSE41-NEXT: sarl $31, %eax
-; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm1
+; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0
+; X32-SSE41-NEXT: pmovsxbq 2(%eax), %xmm1
; X32-SSE41-NEXT: retl
entry:
- %X = load <4 x i8>* %ptr
+ %X = load <4 x i8>, <4 x i8>* %ptr
%Y = sext <4 x i8> %X to <4 x i64>
ret <4 x i64>%Y
}
@@ -825,72 +602,36 @@ entry:
define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) {
; SSE2-LABEL: load_sext_4i16_to_4i64:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movq (%rdi), %xmm1
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,1,3]
-; SSE2-NEXT: movd %xmm2, %rax
-; SSE2-NEXT: movswq %ax, %rax
+; SSE2-NEXT: movswq 2(%rdi), %rax
+; SSE2-NEXT: movd %rax, %xmm1
+; SSE2-NEXT: movswq (%rdi), %rax
; SSE2-NEXT: movd %rax, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; SSE2-NEXT: movd %xmm2, %rax
-; SSE2-NEXT: movswq %ax, %rax
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: movswq 6(%rdi), %rax
; SSE2-NEXT: movd %rax, %xmm2
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
-; SSE2-NEXT: movd %xmm2, %rax
-; SSE2-NEXT: movswq %ax, %rax
+; SSE2-NEXT: movswq 4(%rdi), %rax
; SSE2-NEXT: movd %rax, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; SSE2-NEXT: movd %xmm2, %rax
-; SSE2-NEXT: movswq %ax, %rax
-; SSE2-NEXT: movd %rax, %xmm2
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: load_sext_4i16_to_4i64:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movq (%rdi), %xmm1
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,1,3]
-; SSSE3-NEXT: movd %xmm2, %rax
-; SSSE3-NEXT: movswq %ax, %rax
+; SSSE3-NEXT: movswq 2(%rdi), %rax
+; SSSE3-NEXT: movd %rax, %xmm1
+; SSSE3-NEXT: movswq (%rdi), %rax
; SSSE3-NEXT: movd %rax, %xmm0
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; SSSE3-NEXT: movd %xmm2, %rax
-; SSSE3-NEXT: movswq %ax, %rax
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: movswq 6(%rdi), %rax
; SSSE3-NEXT: movd %rax, %xmm2
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
-; SSSE3-NEXT: movd %xmm2, %rax
-; SSSE3-NEXT: movswq %ax, %rax
+; SSSE3-NEXT: movswq 4(%rdi), %rax
; SSSE3-NEXT: movd %rax, %xmm1
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
-; SSSE3-NEXT: movd %xmm2, %rax
-; SSSE3-NEXT: movswq %ax, %rax
-; SSSE3-NEXT: movd %rax, %xmm2
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: load_sext_4i16_to_4i64:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: movq (%rdi), %xmm0
-; SSE41-NEXT: pmovzxwd %xmm0, %xmm1
-; SSE41-NEXT: pmovzxwq %xmm0, %xmm0
-; SSE41-NEXT: pextrq $1, %xmm0, %rax
-; SSE41-NEXT: movswq %ax, %rax
-; SSE41-NEXT: movd %rax, %xmm2
-; SSE41-NEXT: movd %xmm0, %rax
-; SSE41-NEXT: movswq %ax, %rax
-; SSE41-NEXT: movd %rax, %xmm0
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
-; SSE41-NEXT: pextrq $1, %xmm1, %rax
-; SSE41-NEXT: movswq %ax, %rax
-; SSE41-NEXT: movd %rax, %xmm2
-; SSE41-NEXT: movd %xmm1, %rax
-; SSE41-NEXT: movswq %ax, %rax
-; SSE41-NEXT: movd %rax, %xmm1
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE41-NEXT: pmovsxwq (%rdi), %xmm0
+; SSE41-NEXT: pmovsxwq 4(%rdi), %xmm1
; SSE41-NEXT: retq
;
; AVX1-LABEL: load_sext_4i16_to_4i64:
@@ -910,33 +651,11 @@ define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) {
; X32-SSE41-LABEL: load_sext_4i16_to_4i64:
; X32-SSE41: # BB#0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT: movsd (%eax), %xmm0
-; X32-SSE41-NEXT: pmovzxwd %xmm0, %xmm1
-; X32-SSE41-NEXT: pmovzxwq %xmm0, %xmm2
-; X32-SSE41-NEXT: movd %xmm2, %eax
-; X32-SSE41-NEXT: cwtl
-; X32-SSE41-NEXT: movd %eax, %xmm0
-; X32-SSE41-NEXT: sarl $31, %eax
-; X32-SSE41-NEXT: pinsrd $1, %eax, %xmm0
-; X32-SSE41-NEXT: pextrd $2, %xmm2, %eax
-; X32-SSE41-NEXT: cwtl
-; X32-SSE41-NEXT: pinsrd $2, %eax, %xmm0
-; X32-SSE41-NEXT: sarl $31, %eax
-; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0
-; X32-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
-; X32-SSE41-NEXT: movd %xmm2, %eax
-; X32-SSE41-NEXT: cwtl
-; X32-SSE41-NEXT: movd %eax, %xmm1
-; X32-SSE41-NEXT: sarl $31, %eax
-; X32-SSE41-NEXT: pinsrd $1, %eax, %xmm1
-; X32-SSE41-NEXT: pextrd $2, %xmm2, %eax
-; X32-SSE41-NEXT: cwtl
-; X32-SSE41-NEXT: pinsrd $2, %eax, %xmm1
-; X32-SSE41-NEXT: sarl $31, %eax
-; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm1
+; X32-SSE41-NEXT: pmovsxwq (%eax), %xmm0
+; X32-SSE41-NEXT: pmovsxwq 4(%eax), %xmm1
; X32-SSE41-NEXT: retl
entry:
- %X = load <4 x i16>* %ptr
+ %X = load <4 x i16>, <4 x i16>* %ptr
%Y = sext <4 x i16> %X to <4 x i64>
ret <4 x i64>%Y
}
diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll
index 14058c912861..53d13c86657b 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
@@ -247,13 +247,34 @@ define <16 x i8> @shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31(
}
define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) {
-; SSE-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
-; SSE: # BB#0:
-; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE-NEXT: movdqa %xmm1, %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
+; SSE2: # BB#0:
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
+; SSE41: # BB#0:
+; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
;
; AVX1-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
; AVX1: # BB#0:
@@ -318,23 +339,20 @@ define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20(
;
; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[3,2,1,0,7,6,5,4]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4],zero,zero,zero,zero,zero,zero,zero,zero
-; SSSE3-NEXT: por %xmm1, %xmm0
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
; SSE41: # BB#0:
-; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[3,2,1,0,7,6,5,4]
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4],zero,zero,zero,zero,zero,zero,zero,zero
-; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
; AVX: # BB#0:
-; AVX-NEXT: vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[3,2,1,0,7,6,5,4]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4],zero,zero,zero,zero,zero,zero,zero,zero
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,4,2,0,14,12,10,8,7,5,3,1,15,13,11,9]
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20>
ret <16 x i8> %shuffle
@@ -343,47 +361,181 @@ define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20(
define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
; SSE2-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
; SSE2: # BB#0:
-; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: movdqa %xmm1, %xmm3
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4]
-; SSE2-NEXT: movdqa %xmm0, %xmm4
-; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[3,2,1,0,4,5,6,7]
-; SSE2-NEXT: movsd %xmm4, %xmm3
-; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm2[3,2,1,0,4,5,6,7]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; SSE2-NEXT: movsd %xmm0, %xmm1
-; SSE2-NEXT: packuswb %xmm3, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: packuswb %xmm3, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[15,14,13,12],zero,zero,zero,zero,xmm1[7,6,5,4]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0],zero,zero,zero,zero,xmm0[11,10,9,8],zero,zero,zero,zero
-; SSSE3-NEXT: por %xmm1, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
; SSE41: # BB#0:
-; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[15,14,13,12],zero,zero,zero,zero,xmm1[7,6,5,4]
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0],zero,zero,zero,zero,xmm0[11,10,9,8],zero,zero,zero,zero
-; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
; AVX: # BB#0:
-; AVX-NEXT: vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[15,14,13,12],zero,zero,zero,zero,xmm1[7,6,5,4]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0],zero,zero,zero,zero,xmm0[11,10,9,8],zero,zero,zero,zero
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,14,13,12,7,6,5,4,u,u,u,u,u,u,u,u]
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,11,10,9,8,u,u,u,u,u,u,u,u]
+; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 31, i32 30, i32 29, i32 28, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20>
ret <16 x i8> %shuffle
}
+define <16 x i8> @shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE2-NEXT: andps %xmm2, %xmm0
+; SSE2-NEXT: andnps %xmm1, %xmm2
+; SSE2-NEXT: orps %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE41-NEXT: pblendvb %xmm2, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
+; SSE2-NEXT: andps %xmm2, %xmm0
+; SSE2-NEXT: andnps %xmm1, %xmm2
+; SSE2-NEXT: orps %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[15]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2],zero,xmm0[4,5,6],zero,xmm0[8,9,10],zero,xmm0[12,13,14],zero
+; SSSE3-NEXT: por %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
+; SSE41-NEXT: pblendvb %xmm2, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0]
+; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
+; SSE2-NEXT: andps %xmm2, %xmm0
+; SSE2-NEXT: andnps %xmm1, %xmm2
+; SSE2-NEXT: orps %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[4],zero,zero,xmm1[7],zero,zero,zero,zero,xmm1[12],zero,zero,xmm1[15]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,xmm0[5,6],zero,xmm0[8,9,10,11],zero,xmm0[13,14],zero
+; SSSE3-NEXT: por %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
+; SSE41-NEXT: pblendvb %xmm2, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,0,255,255,0,255,255,255,255,0,255,255,0]
+; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 13, i32 14, i32 31>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
+; SSE2-NEXT: andps %xmm2, %xmm1
+; SSE2-NEXT: andnps %xmm0, %xmm2
+; SSE2-NEXT: orps %xmm1, %xmm2
+; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[4,5,6,7],zero,zero,xmm0[10,11],zero,xmm0[13],zero,xmm0[15]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3],zero,zero,zero,zero,xmm1[8,9],zero,zero,xmm1[12],zero,xmm1[14],zero
+; SSSE3-NEXT: por %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
+; SSE41: # BB#0:
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
+; SSE41-NEXT: pblendvb %xmm1, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15:
+; AVX: # BB#0:
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,255,255,0,0,0,0,255,255,0,0,255,0,255,0]
+; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 10, i32 11, i32 28, i32 13, i32 30, i32 15>
+ ret <16 x i8> %shuffle
+}
+
define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) {
; SSE2-LABEL: trunc_v4i32_shuffle:
; SSE2: # BB#0:
@@ -429,12 +581,12 @@ entry:
ret <16 x i8> %s.16.0
}
-define <16 x i8> @stress_test1(<16 x i8> %s.0.5, <16 x i8> %s.0.8, <16 x i8> %s.0.9) noinline nounwind {
+define <16 x i8> @undef_test1(<16 x i8> %s.0.5, <16 x i8> %s.0.8, <16 x i8> %s.0.9) noinline nounwind {
; There is nothing interesting to check about these instructions other than
; that they survive codegen. However, we actually do better and delete all of
; them because the result is 'undef'.
;
-; ALL-LABEL: stress_test1:
+; ALL-LABEL: undef_test1:
; ALL: # BB#0: # %entry
; ALL-NEXT: retq
entry:
@@ -460,12 +612,7 @@ define <16 x i8> @PR20540(<8 x i8> %a) {
; SSE2: # BB#0:
; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: packuswb %xmm0, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
-; SSE2-NEXT: packuswb %xmm1, %xmm0
+; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE2-NEXT: retq
;
; SSSE3-LABEL: PR20540:
@@ -487,28 +634,16 @@ define <16 x i8> @PR20540(<8 x i8> %a) {
}
define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
-; SSE2-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSE2: # BB#0:
-; SSE2-NEXT: movzbl %dil, %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSSE3: # BB#0:
-; SSSE3-NEXT: movd %edi, %xmm0
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSSE3-NEXT: retq
-;
-; SSE41-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSE41: # BB#0:
-; SSE41-NEXT: movd %edi, %xmm0
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSE41-NEXT: retq
+; SSE-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE: # BB#0:
+; SSE-NEXT: movzbl %dil, %eax
+; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; AVX: # BB#0:
-; AVX-NEXT: vmovd %edi, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: movzbl %dil, %eax
+; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 0
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -518,27 +653,28 @@ define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE2: # BB#0:
-; SSE2-NEXT: movzbl %dil, %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
+; SSE2-NEXT: shll $8, %edi
+; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: pinsrw $2, %edi, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movd %edi, %xmm0
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: shll $8, %edi
+; SSSE3-NEXT: pxor %xmm0, %xmm0
+; SSSE3-NEXT: pinsrw $2, %edi, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE41: # BB#0:
-; SSE41-NEXT: movd %edi, %xmm0
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: pxor %xmm0, %xmm0
+; SSE41-NEXT: pinsrb $5, %edi, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; AVX: # BB#0:
-; AVX-NEXT: vmovd %edi, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: vpxor %xmm0, %xmm0
+; AVX-NEXT: vpinsrb $5, %edi, %xmm0
; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 0
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -546,16 +682,30 @@ define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
}
define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) {
-; SSE-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
-; SSE: # BB#0:
-; SSE-NEXT: movd %edi, %xmm0
-; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
-; SSE-NEXT: retq
+; SSE2-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
+; SSE2: # BB#0:
+; SSE2-NEXT: shll $8, %edi
+; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: pinsrw $7, %edi, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: shll $8, %edi
+; SSSE3-NEXT: pxor %xmm0, %xmm0
+; SSSE3-NEXT: pinsrw $7, %edi, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
+; SSE41: # BB#0:
+; SSE41-NEXT: pxor %xmm0, %xmm0
+; SSE41-NEXT: pinsrb $15, %edi, %xmm0
+; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
; AVX: # BB#0:
-; AVX-NEXT: vmovd %edi, %xmm0
-; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
+; AVX-NEXT: vpxor %xmm0, %xmm0
+; AVX-NEXT: vpinsrb $15, %edi, %xmm0
; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 0
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
@@ -566,29 +716,27 @@ define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE2: # BB#0:
; SSE2-NEXT: movzbl %dil, %eax
-; SSE2-NEXT: movd %eax, %xmm0
-; SSE2-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: pinsrw $1, %eax, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movd %edi, %xmm0
-; SSSE3-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT: movzbl %dil, %eax
+; SSSE3-NEXT: pxor %xmm0, %xmm0
+; SSSE3-NEXT: pinsrw $1, %eax, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; SSE41: # BB#0:
-; SSE41-NEXT: movd %edi, %xmm0
-; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: pxor %xmm0, %xmm0
+; SSE41-NEXT: pinsrb $2, %edi, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
; AVX: # BB#0:
-; AVX-NEXT: vmovd %edi, %xmm0
-; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: vpxor %xmm0, %xmm0
+; AVX-NEXT: vpinsrb $2, %edi, %xmm0
; AVX-NEXT: retq
%a = insertelement <16 x i8> undef, i8 %i, i32 3
%shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -605,7 +753,7 @@ define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu(
; AVX: # BB#0:
; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
; AVX-NEXT: retq
- %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 09, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 undef, i32 18, i32 undef>
+ %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 undef, i32 18, i32 undef>
ret <16 x i8> %shuffle
}
@@ -800,12 +948,12 @@ define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu(
;
; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
; SSE41: # BB#0:
-; SSE41-NEXT: pmovzxbq %xmm0, %xmm0
+; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
; AVX: # BB#0:
-; AVX-NEXT: vpmovzxbq %xmm0, %xmm0
+; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
ret <16 x i8> %shuffle
@@ -827,12 +975,12 @@ define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz(
;
; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
; SSE41: # BB#0:
-; SSE41-NEXT: pmovzxbq %xmm0, %xmm0
+; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
; AVX: # BB#0:
-; AVX-NEXT: vpmovzxbq %xmm0, %xmm0
+; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 1, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <16 x i8> %shuffle
@@ -853,12 +1001,12 @@ define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu(
;
; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
; SSE41: # BB#0:
-; SSE41-NEXT: pmovzxbd %xmm0, %xmm0
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
; AVX: # BB#0:
-; AVX-NEXT: vpmovzxbd %xmm0, %xmm0
+; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef>
ret <16 x i8> %shuffle
@@ -881,12 +1029,12 @@ define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz(
;
; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
; SSE41: # BB#0:
-; SSE41-NEXT: pmovzxbd %xmm0, %xmm0
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
; AVX: # BB#0:
-; AVX-NEXT: vpmovzxbd %xmm0, %xmm0
+; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 1, i32 21, i32 22, i32 23, i32 2, i32 25, i32 26, i32 27, i32 3, i32 29, i32 30, i32 31>
ret <16 x i8> %shuffle
@@ -905,12 +1053,12 @@ define <16 x i8> @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu(
;
; SSE41-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
; SSE41: # BB#0:
-; SSE41-NEXT: pmovzxbw %xmm0, %xmm0
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
; AVX: # BB#0:
-; AVX-NEXT: vpmovzxbw %xmm0, %xmm0
+; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef>
ret <16 x i8> %shuffle
@@ -931,12 +1079,12 @@ define <16 x i8> @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz(
;
; SSE41-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
; SSE41: # BB#0:
-; SSE41-NEXT: pmovzxbw %xmm0, %xmm0
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
; AVX: # BB#0:
-; AVX-NEXT: vpmovzxbw %xmm0, %xmm0
+; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX-NEXT: retq
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 1, i32 19, i32 2, i32 21, i32 3, i32 23, i32 4, i32 25, i32 5, i32 27, i32 6, i32 29, i32 7, i32 31>
ret <16 x i8> %shuffle
@@ -948,69 +1096,53 @@ define <16 x i8> @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00(
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
-; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,3,2,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,0,3,3,4,5,6,7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[2,0,3,1,4,5,6,7]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm3[2,1,2,3,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,7,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,2,3,1,4,5,6,7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
-; SSE2-NEXT: packuswb %xmm0, %xmm4
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,3,4,5,6,7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7]
-; SSE2-NEXT: packuswb %xmm0, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,3,0,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,1,2,2,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,7,7]
+; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [65535,65535,65535,0,65535,0,0,65535]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,3,1,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6,4]
+; SSE2-NEXT: pand %xmm5, %xmm2
+; SSE2-NEXT: pandn %xmm4, %xmm5
+; SSE2-NEXT: por %xmm2, %xmm5
+; SSE2-NEXT: psrlq $16, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,1,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,4]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE2-NEXT: packuswb %xmm5, %xmm2
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [255,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
+; SSE2-NEXT: pand %xmm0, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,3,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,7]
+; SSE2-NEXT: pandn %xmm1, %xmm0
+; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movdqa %xmm0, %xmm2
-; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[2,7,1,11,u,u,u,u,u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[6,6,2,2,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,7,14,2,3,14,9,0,u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
+; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: pshufb {{.*#+}} xmm2 = xmm2[2,7,1,11,u,u,u,u,u,u,u,u,u,u,u,u]
-; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[6,6,2,2,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[10,7,14,2,3,14,9,0,u,u,u,u,u,u,u,u]
-; SSE41-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
+; SSE41-NEXT: por %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[2,7,1,11,u,u,u,u,u,u,u,u,u,u,u,u]
-; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[6,6,2,2,2,2,3,3,4,4,5,5,6,6,7,7]
-; AVX-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,7,14,2,3,14,9,0,u,u,u,u,u,u,u,u]
-; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[2],zero,zero,zero
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,10,2,7],zero,xmm0[14,7,2],zero,xmm0[3,1,14],zero,xmm0[9,11,0]
+; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 undef, i32 10, i32 2, i32 7, i32 22, i32 14, i32 7, i32 2, i32 18, i32 3, i32 1, i32 14, i32 18, i32 9, i32 11, i32 0>
@@ -1053,6 +1185,108 @@ entry:
ret void
}
+;
+; Shuffle to logical bit shifts
+;
+
+define <16 x i8> @shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
+; SSE: # BB#0:
+; SSE-NEXT: psllw $8, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v16i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
+; AVX: # BB#0:
+; AVX-NEXT: vpsllw $8, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
+; SSE: # BB#0:
+; SSE-NEXT: pslld $24, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v16i8_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
+; AVX: # BB#0:
+; AVX-NEXT: vpslld $24, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08:
+; SSE: # BB#0:
+; SSE-NEXT: psllq $56, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_00_zz_zz_zz_zz_zz_zz_zz_08:
+; AVX: # BB#0:
+; AVX-NEXT: vpsllq $56, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14:
+; SSE: # BB#0:
+; SSE-NEXT: psllq $8, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v16i8_zz_00_uu_02_03_uu_05_06_zz_08_09_uu_11_12_13_14:
+; AVX: # BB#0:
+; AVX-NEXT: vpsllq $8, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 undef, i32 2, i32 3, i32 undef, i32 5, i32 6, i32 16, i32 8, i32 9, i32 undef, i32 11, i32 12, i32 13, i32 14>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz:
+; SSE: # BB#0:
+; SSE-NEXT: psrlw $8, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v16i8_01_uu_uu_uu_uu_zz_uu_zz_uu_zz_11_zz_13_zz_15_zz:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 undef, i32 16, i32 undef, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz:
+; SSE: # BB#0:
+; SSE-NEXT: psrld $16, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v16i8_02_03_zz_zz_06_07_uu_uu_uu_uu_uu_uu_14_15_zz_zz:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 14, i32 15, i32 16, i32 16>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz:
+; SSE: # BB#0:
+; SSE-NEXT: psrlq $56, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v16i8_07_zz_zz_zz_zz_zz_uu_uu_15_uu_uu_uu_uu_uu_zz_zz:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrlq $56, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16>
+ ret <16 x i8> %shuffle
+}
+
define <16 x i8> @PR12412(<16 x i8> %inval1, <16 x i8> %inval2) {
; SSE2-LABEL: PR12412:
; SSE2: # BB#0: # %entry
@@ -1063,26 +1297,62 @@ define <16 x i8> @PR12412(<16 x i8> %inval1, <16 x i8> %inval2) {
; SSE2-NEXT: retq
;
; SSSE3-LABEL: PR12412:
-; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,2,4,6,8,10,12,14]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
-; SSSE3-NEXT: por %xmm1, %xmm0
-; SSSE3-NEXT: retq
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; SSSE3-NEXT: pshufb %xmm2, %xmm1
+; SSSE3-NEXT: pshufb %xmm2, %xmm0
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: retq
;
; SSE41-LABEL: PR12412:
-; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,2,4,6,8,10,12,14]
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: retq
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; SSE41-NEXT: pshufb %xmm2, %xmm1
+; SSE41-NEXT: pshufb %xmm2, %xmm0
+; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT: retq
;
; AVX-LABEL: PR12412:
-; AVX: # BB#0: # %entry
-; AVX-NEXT: vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,2,4,6,8,10,12,14]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
entry:
%0 = shufflevector <16 x i8> %inval1, <16 x i8> %inval2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
ret <16 x i8> %0
}
+
+define <16 x i8> @shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz(<16 x i8> %a) {
+; SSE-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz:
+; SSE: # BB#0:
+; SSE-NEXT: psrld $8, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v16i8_uu_02_03_zz_uu_06_07_zz_uu_10_11_zz_uu_14_15_zz:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrld $8, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 2, i32 3, i32 16, i32 undef, i32 6, i32 7, i32 16, i32 undef, i32 10, i32 11, i32 16, i32 undef, i32 14, i32 15, i32 16>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_bitcast_unpack(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: shuffle_v16i8_bitcast_unpack:
+; SSE: # BB#0:
+; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v16i8_bitcast_unpack:
+; AVX: # BB#0:
+; AVX-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX-NEXT: retq
+ %shuffle8 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 7, i32 23, i32 6, i32 22, i32 5, i32 21, i32 4, i32 20, i32 3, i32 19, i32 2, i32 18, i32 1, i32 17, i32 0, i32 16>
+ %bitcast32 = bitcast <16 x i8> %shuffle8 to <4 x float>
+ %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ %bitcast16 = bitcast <4 x float> %shuffle32 to <8 x i16>
+ %shuffle16 = shufflevector <8 x i16> %bitcast16, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+ %bitcast8 = bitcast <8 x i16> %shuffle16 to <16 x i8>
+ ret <16 x i8> %bitcast8
+}
diff --git a/test/CodeGen/X86/vector-shuffle-128-v2.ll b/test/CodeGen/X86/vector-shuffle-128-v2.ll
index 57fa0e859813..ee68df581bfd 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v2.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v2.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
@@ -105,22 +105,22 @@ define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
;
; SSE3-LABEL: shuffle_v2f64_00:
; SSE3: # BB#0:
-; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2f64_00:
; SSSE3: # BB#0:
-; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2f64_00:
; SSE41: # BB#0:
-; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v2f64_00:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
ret <2 x double> %shuffle
@@ -160,25 +160,22 @@ define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
;
; SSE3-LABEL: shuffle_v2f64_22:
; SSE3: # BB#0:
-; SSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
-; SSE3-NEXT: movapd %xmm1, %xmm0
+; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2f64_22:
; SSSE3: # BB#0:
-; SSSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
-; SSSE3-NEXT: movapd %xmm1, %xmm0
+; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2f64_22:
; SSE41: # BB#0:
-; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
-; SSE41-NEXT: movapd %xmm1, %xmm0
+; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v2f64_22:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0,0]
+; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
; AVX-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
ret <2 x double> %shuffle
@@ -214,20 +211,20 @@ define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
; SSE2-LABEL: shuffle_v2f64_03:
; SSE2: # BB#0:
-; SSE2-NEXT: movsd %xmm0, %xmm1
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2f64_03:
; SSE3: # BB#0:
-; SSE3-NEXT: movsd %xmm0, %xmm1
-; SSE3-NEXT: movaps %xmm1, %xmm0
+; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2f64_03:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movsd %xmm0, %xmm1
-; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2f64_03:
@@ -245,17 +242,17 @@ define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
; SSE2-LABEL: shuffle_v2f64_21:
; SSE2: # BB#0:
-; SSE2-NEXT: movsd %xmm1, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2f64_21:
; SSE3: # BB#0:
-; SSE3-NEXT: movsd %xmm1, %xmm0
+; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2f64_21:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movsd %xmm1, %xmm0
+; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2f64_21:
@@ -302,20 +299,20 @@ define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_03:
; SSE2: # BB#0:
-; SSE2-NEXT: movsd %xmm0, %xmm1
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_03:
; SSE3: # BB#0:
-; SSE3-NEXT: movsd %xmm0, %xmm1
-; SSE3-NEXT: movaps %xmm1, %xmm0
+; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_03:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movsd %xmm0, %xmm1
-; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_03:
@@ -338,20 +335,20 @@ define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_03_copy:
; SSE2: # BB#0:
-; SSE2-NEXT: movsd %xmm1, %xmm2
-; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
+; SSE2-NEXT: movapd %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_03_copy:
; SSE3: # BB#0:
-; SSE3-NEXT: movsd %xmm1, %xmm2
-; SSE3-NEXT: movaps %xmm2, %xmm0
+; SSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
+; SSE3-NEXT: movapd %xmm2, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_03_copy:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movsd %xmm1, %xmm2
-; SSSE3-NEXT: movaps %xmm2, %xmm0
+; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
+; SSSE3-NEXT: movapd %xmm2, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_03_copy:
@@ -492,17 +489,17 @@ define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_21:
; SSE2: # BB#0:
-; SSE2-NEXT: movsd %xmm1, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_21:
; SSE3: # BB#0:
-; SSE3-NEXT: movsd %xmm1, %xmm0
+; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_21:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movsd %xmm1, %xmm0
+; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_21:
@@ -525,20 +522,20 @@ define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
; SSE2-LABEL: shuffle_v2i64_21_copy:
; SSE2: # BB#0:
-; SSE2-NEXT: movsd %xmm2, %xmm1
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_21_copy:
; SSE3: # BB#0:
-; SSE3-NEXT: movsd %xmm2, %xmm1
-; SSE3-NEXT: movaps %xmm1, %xmm0
+; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; SSE3-NEXT: movapd %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_21_copy:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movsd %xmm2, %xmm1
-; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_21_copy:
@@ -653,12 +650,12 @@ define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
; SSE-LABEL: shuffle_v2i64_0z:
; SSE: # BB#0:
-; SSE-NEXT: movq %xmm0, %xmm0
+; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2i64_0z:
; AVX: # BB#0:
-; AVX-NEXT: vmovq %xmm0, %xmm0
+; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: retq
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
ret <2 x i64> %shuffle
@@ -695,20 +692,20 @@ define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
; SSE2-LABEL: shuffle_v2i64_z1:
; SSE2: # BB#0:
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: movsd %xmm1, %xmm0
+; SSE2-NEXT: xorpd %xmm1, %xmm1
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2i64_z1:
; SSE3: # BB#0:
-; SSE3-NEXT: xorps %xmm1, %xmm1
-; SSE3-NEXT: movsd %xmm1, %xmm0
+; SSE3-NEXT: xorpd %xmm1, %xmm1
+; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2i64_z1:
; SSSE3: # BB#0:
-; SSSE3-NEXT: xorps %xmm1, %xmm1
-; SSSE3-NEXT: movsd %xmm1, %xmm0
+; SSSE3-NEXT: xorpd %xmm1, %xmm1
+; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2i64_z1:
@@ -735,12 +732,12 @@ define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
; SSE-LABEL: shuffle_v2f64_0z:
; SSE: # BB#0:
-; SSE-NEXT: movq %xmm0, %xmm0
+; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v2f64_0z:
; AVX: # BB#0:
-; AVX-NEXT: vmovq %xmm0, %xmm0
+; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: retq
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
ret <2 x double> %shuffle
@@ -782,20 +779,20 @@ define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
; SSE2-LABEL: shuffle_v2f64_z1:
; SSE2: # BB#0:
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: movsd %xmm1, %xmm0
+; SSE2-NEXT: xorpd %xmm1, %xmm1
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v2f64_z1:
; SSE3: # BB#0:
-; SSE3-NEXT: xorps %xmm1, %xmm1
-; SSE3-NEXT: movsd %xmm1, %xmm0
+; SSE3-NEXT: xorpd %xmm1, %xmm1
+; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v2f64_z1:
; SSSE3: # BB#0:
-; SSSE3-NEXT: xorps %xmm1, %xmm1
-; SSSE3-NEXT: movsd %xmm1, %xmm0
+; SSSE3-NEXT: xorpd %xmm1, %xmm1
+; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v2f64_z1:
@@ -813,6 +810,25 @@ define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
ret <2 x double> %shuffle
}
+define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
+; SSE-LABEL: shuffle_v2f64_bitcast_1z:
+; SSE: # BB#0:
+; SSE-NEXT: xorpd %xmm1, %xmm1
+; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v2f64_bitcast_1z:
+; AVX: # BB#0:
+; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; AVX-NEXT: retq
+ %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
+ %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
+ %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+ %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
+ ret <2 x double> %bitcast64
+}
+
define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
; SSE-LABEL: insert_reg_and_zero_v2i64:
; SSE: # BB#0:
@@ -831,14 +847,14 @@ define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
; SSE-LABEL: insert_mem_and_zero_v2i64:
; SSE: # BB#0:
-; SSE-NEXT: movq (%rdi), %xmm0
+; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: retq
;
; AVX-LABEL: insert_mem_and_zero_v2i64:
; AVX: # BB#0:
-; AVX-NEXT: vmovq (%rdi), %xmm0
+; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: retq
- %a = load i64* %ptr
+ %a = load i64, i64* %ptr
%v = insertelement <2 x i64> undef, i64 %a, i32 0
%shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
ret <2 x i64> %shuffle
@@ -847,12 +863,12 @@ define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
; SSE-LABEL: insert_reg_and_zero_v2f64:
; SSE: # BB#0:
-; SSE-NEXT: movq %xmm0, %xmm0
+; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-NEXT: retq
;
; AVX-LABEL: insert_reg_and_zero_v2f64:
; AVX: # BB#0:
-; AVX-NEXT: vmovq %xmm0, %xmm0
+; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: retq
%v = insertelement <2 x double> undef, double %a, i32 0
%shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
@@ -862,14 +878,14 @@ define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
; SSE-LABEL: insert_mem_and_zero_v2f64:
; SSE: # BB#0:
-; SSE-NEXT: movsd (%rdi), %xmm0
+; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-NEXT: retq
;
; AVX-LABEL: insert_mem_and_zero_v2f64:
; AVX: # BB#0:
-; AVX-NEXT: vmovsd (%rdi), %xmm0
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: retq
- %a = load double* %ptr
+ %a = load double, double* %ptr
%v = insertelement <2 x double> undef, double %a, i32 0
%shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
ret <2 x double> %shuffle
@@ -879,19 +895,19 @@ define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
; SSE2-LABEL: insert_reg_lo_v2i64:
; SSE2: # BB#0:
; SSE2-NEXT: movd %rdi, %xmm1
-; SSE2-NEXT: movsd %xmm1, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_reg_lo_v2i64:
; SSE3: # BB#0:
; SSE3-NEXT: movd %rdi, %xmm1
-; SSE3-NEXT: movsd %xmm1, %xmm0
+; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_reg_lo_v2i64:
; SSSE3: # BB#0:
; SSSE3-NEXT: movd %rdi, %xmm1
-; SSSE3-NEXT: movsd %xmm1, %xmm0
+; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_reg_lo_v2i64:
@@ -934,22 +950,22 @@ define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
;
; SSE41-LABEL: insert_mem_lo_v2i64:
; SSE41: # BB#0:
-; SSE41-NEXT: movq (%rdi), %xmm1
+; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: insert_mem_lo_v2i64:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovq (%rdi), %xmm1
+; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: insert_mem_lo_v2i64:
; AVX2: # BB#0:
-; AVX2-NEXT: vmovq (%rdi), %xmm1
+; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX2-NEXT: retq
- %a = load i64* %ptr
+ %a = load i64, i64* %ptr
%v = insertelement <2 x i64> undef, i64 %a, i32 0
%shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
ret <2 x i64> %shuffle
@@ -975,16 +991,16 @@ define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
; SSE-LABEL: insert_mem_hi_v2i64:
; SSE: # BB#0:
-; SSE-NEXT: movq (%rdi), %xmm1
+; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: insert_mem_hi_v2i64:
; AVX: # BB#0:
-; AVX-NEXT: vmovq (%rdi), %xmm1
+; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
- %a = load i64* %ptr
+ %a = load i64, i64* %ptr
%v = insertelement <2 x i64> undef, i64 %a, i32 0
%shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
ret <2 x i64> %shuffle
@@ -993,13 +1009,13 @@ define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
; SSE-LABEL: insert_reg_lo_v2f64:
; SSE: # BB#0:
-; SSE-NEXT: movsd %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: insert_reg_lo_v2f64:
; AVX: # BB#0:
-; AVX-NEXT: vmovsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; AVX-NEXT: retq
%v = insertelement <2 x double> undef, double %a, i32 0
%shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
@@ -1016,7 +1032,7 @@ define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
; AVX: # BB#0:
; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
- %a = load double* %ptr
+ %a = load double, double* %ptr
%v = insertelement <2 x double> undef, double %a, i32 0
%shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
ret <2 x double> %shuffle
@@ -1048,7 +1064,7 @@ define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
; AVX: # BB#0:
; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
- %a = load double* %ptr
+ %a = load double, double* %ptr
%v = insertelement <2 x double> undef, double %a, i32 0
%shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
ret <2 x double> %shuffle
@@ -1064,22 +1080,22 @@ define <2 x double> @insert_dup_reg_v2f64(double %a) {
;
; SSE3-LABEL: insert_dup_reg_v2f64:
; SSE3: # BB#0:
-; SSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_dup_reg_v2f64:
; SSSE3: # BB#0:
-; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_dup_reg_v2f64:
; SSE41: # BB#0:
-; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
; SSE41-NEXT: retq
;
; AVX-LABEL: insert_dup_reg_v2f64:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX-NEXT: retq
%v = insertelement <2 x double> undef, double %a, i32 0
%shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
@@ -1088,30 +1104,30 @@ define <2 x double> @insert_dup_reg_v2f64(double %a) {
define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
; SSE2-LABEL: insert_dup_mem_v2f64:
; SSE2: # BB#0:
-; SSE2-NEXT: movsd (%rdi), %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_dup_mem_v2f64:
; SSE3: # BB#0:
-; SSE3-NEXT: movddup (%rdi), %xmm0
+; SSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_dup_mem_v2f64:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movddup (%rdi), %xmm0
+; SSSE3-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_dup_mem_v2f64:
; SSE41: # BB#0:
-; SSE41-NEXT: movddup (%rdi), %xmm0
+; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
; SSE41-NEXT: retq
;
; AVX-LABEL: insert_dup_mem_v2f64:
; AVX: # BB#0:
-; AVX-NEXT: vmovddup (%rdi), %xmm0
+; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; AVX-NEXT: retq
- %a = load double* %ptr
+ %a = load double, double* %ptr
%v = insertelement <2 x double> undef, double %a, i32 0
%shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
ret <2 x double> %shuffle
@@ -1128,7 +1144,7 @@ define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
; AVX: # BB#0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
; AVX-NEXT: retq
- %a = load <2 x double>* %ptr
+ %a = load <2 x double>, <2 x double>* %ptr
%shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
ret <2 x double> %shuffle
}
diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll
index 53fb09e32428..8612a5afa3d2 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
@@ -322,60 +322,150 @@ define <4 x i32> @shuffle_v4i32_0124(<4 x i32> %a, <4 x i32> %b) {
;
; SSE41-LABEL: shuffle_v4i32_0124:
; SSE41: # BB#0:
-; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
; SSE41-NEXT: retq
;
-; AVX-LABEL: shuffle_v4i32_0124:
-; AVX: # BB#0:
-; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v4i32_0124:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4i32_0124:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
+; AVX2-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
-; SSE-LABEL: shuffle_v4i32_0142:
-; SSE: # BB#0:
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
-; SSE-NEXT: retq
+; SSE2-LABEL: shuffle_v4i32_0142:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSE2-NEXT: retq
;
-; AVX-LABEL: shuffle_v4i32_0142:
-; AVX: # BB#0:
-; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
-; AVX-NEXT: retq
+; SSE3-LABEL: shuffle_v4i32_0142:
+; SSE3: # BB#0:
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v4i32_0142:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v4i32_0142:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: shuffle_v4i32_0142:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4i32_0142:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
+; AVX2-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_0412(<4 x i32> %a, <4 x i32> %b) {
-; SSE-LABEL: shuffle_v4i32_0412:
-; SSE: # BB#0:
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
-; SSE-NEXT: movaps %xmm1, %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: shuffle_v4i32_0412:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: retq
;
-; AVX-LABEL: shuffle_v4i32_0412:
-; AVX: # BB#0:
-; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[2,0],xmm0[1,2]
-; AVX-NEXT: retq
+; SSE3-LABEL: shuffle_v4i32_0412:
+; SSE3: # BB#0:
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
+; SSE3-NEXT: movaps %xmm1, %xmm0
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v4i32_0412:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
+; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v4i32_0412:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: shuffle_v4i32_0412:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4i32_0412:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
+; AVX2-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_4012(<4 x i32> %a, <4 x i32> %b) {
-; SSE-LABEL: shuffle_v4i32_4012:
-; SSE: # BB#0:
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
-; SSE-NEXT: movaps %xmm1, %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: shuffle_v4i32_4012:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: retq
;
-; AVX-LABEL: shuffle_v4i32_4012:
-; AVX: # BB#0:
-; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[1,2]
-; AVX-NEXT: retq
+; SSE3-LABEL: shuffle_v4i32_4012:
+; SSE3: # BB#0:
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
+; SSE3-NEXT: movaps %xmm1, %xmm0
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v4i32_4012:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
+; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v4i32_4012:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,2]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: shuffle_v4i32_4012:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,2]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4i32_4012:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,2]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX2-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
ret <4 x i32> %shuffle
}
@@ -393,17 +483,44 @@ define <4 x i32> @shuffle_v4i32_0145(<4 x i32> %a, <4 x i32> %b) {
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
-; SSE-LABEL: shuffle_v4i32_0451:
-; SSE: # BB#0:
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
-; SSE-NEXT: retq
+; SSE2-LABEL: shuffle_v4i32_0451:
+; SSE2: # BB#0:
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; SSE2-NEXT: retq
;
-; AVX-LABEL: shuffle_v4i32_0451:
-; AVX: # BB#0:
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
-; AVX-NEXT: retq
+; SSE3-LABEL: shuffle_v4i32_0451:
+; SSE3: # BB#0:
+; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v4i32_0451:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v4i32_0451:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: shuffle_v4i32_0451:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4i32_0451:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
+; AVX2-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
ret <4 x i32> %shuffle
}
@@ -422,17 +539,44 @@ define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) {
ret <4 x i32> %shuffle
}
define <4 x i32> @shuffle_v4i32_4015(<4 x i32> %a, <4 x i32> %b) {
-; SSE-LABEL: shuffle_v4i32_4015:
-; SSE: # BB#0:
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0,1,3]
-; SSE-NEXT: retq
+; SSE2-LABEL: shuffle_v4i32_4015:
+; SSE2: # BB#0:
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
+; SSE2-NEXT: retq
;
-; AVX-LABEL: shuffle_v4i32_4015:
-; AVX: # BB#0:
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,0,1,3]
-; AVX-NEXT: retq
+; SSE3-LABEL: shuffle_v4i32_4015:
+; SSE3: # BB#0:
+; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v4i32_4015:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v4i32_4015:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: shuffle_v4i32_4015:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4i32_4015:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2],xmm1[3]
+; AVX2-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
ret <4 x i32> %shuffle
}
@@ -441,21 +585,21 @@ define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) {
; SSE2-LABEL: shuffle_v4f32_4zzz:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: movss %xmm0, %xmm1
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v4f32_4zzz:
; SSE3: # BB#0:
; SSE3-NEXT: xorps %xmm1, %xmm1
-; SSE3-NEXT: movss %xmm0, %xmm1
+; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v4f32_4zzz:
; SSSE3: # BB#0:
; SSSE3-NEXT: xorps %xmm1, %xmm1
-; SSSE3-NEXT: movss %xmm0, %xmm1
+; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -478,22 +622,22 @@ define <4 x float> @shuffle_v4f32_z4zz(<4 x float> %a) {
; SSE2-LABEL: shuffle_v4f32_z4zz:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v4f32_z4zz:
; SSE3: # BB#0:
; SSE3-NEXT: xorps %xmm1, %xmm1
-; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
-; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v4f32_z4zz:
; SSSE3: # BB#0:
; SSSE3-NEXT: xorps %xmm1, %xmm1
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v4f32_z4zz:
@@ -513,24 +657,24 @@ define <4 x float> @shuffle_v4f32_zz4z(<4 x float> %a) {
; SSE2-LABEL: shuffle_v4f32_zz4z:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v4f32_zz4z:
; SSE3: # BB#0:
; SSE3-NEXT: xorps %xmm1, %xmm1
-; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
-; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v4f32_zz4z:
; SSSE3: # BB#0:
; SSSE3-NEXT: xorps %xmm1, %xmm1
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
-; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -657,25 +801,191 @@ define <4 x float> @shuffle_v4f32_z6zz(<4 x float> %a) {
ret <4 x float> %shuffle
}
+define <4 x float> @shuffle_v4f32_0z23(<4 x float> %a) {
+; SSE2-LABEL: shuffle_v4f32_0z23:
+; SSE2: # BB#0:
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: shuffle_v4f32_0z23:
+; SSE3: # BB#0:
+; SSE3-NEXT: xorps %xmm1, %xmm1
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
+; SSE3-NEXT: movaps %xmm1, %xmm0
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v4f32_0z23:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: xorps %xmm1, %xmm1
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
+; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v4f32_0z23:
+; SSE41: # BB#0:
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4f32_0z23:
+; AVX: # BB#0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
+; AVX-NEXT: retq
+ %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
+ ret <4 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_01z3(<4 x float> %a) {
+; SSE2-LABEL: shuffle_v4f32_01z3:
+; SSE2: # BB#0:
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: shuffle_v4f32_01z3:
+; SSE3: # BB#0:
+; SSE3-NEXT: xorps %xmm1, %xmm1
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v4f32_01z3:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: xorps %xmm1, %xmm1
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v4f32_01z3:
+; SSE41: # BB#0:
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4f32_01z3:
+; AVX: # BB#0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
+; AVX-NEXT: retq
+ %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+ ret <4 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_012z(<4 x float> %a) {
+; SSE2-LABEL: shuffle_v4f32_012z:
+; SSE2: # BB#0:
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: shuffle_v4f32_012z:
+; SSE3: # BB#0:
+; SSE3-NEXT: xorps %xmm1, %xmm1
+; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v4f32_012z:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: xorps %xmm1, %xmm1
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[2,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v4f32_012z:
+; SSE41: # BB#0:
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4f32_012z:
+; AVX: # BB#0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
+; AVX-NEXT: retq
+ %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+ ret <4 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_0zz3(<4 x float> %a) {
+; SSE2-LABEL: shuffle_v4f32_0zz3:
+; SSE2: # BB#0:
+; SSE2-NEXT: xorps %xmm1, %xmm1
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: shuffle_v4f32_0zz3:
+; SSE3: # BB#0:
+; SSE3-NEXT: xorps %xmm1, %xmm1
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2]
+; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v4f32_0zz3:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: xorps %xmm1, %xmm1
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm1[1,2]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v4f32_0zz3:
+; SSE41: # BB#0:
+; SSE41-NEXT: xorps %xmm1, %xmm1
+; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4f32_0zz3:
+; AVX: # BB#0:
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
+; AVX-NEXT: retq
+ %shuffle = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 3>
+ ret <4 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_u051(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: shuffle_v4f32_u051:
+; SSE: # BB#0:
+; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4f32_u051:
+; AVX: # BB#0:
+; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; AVX-NEXT: retq
+ %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 undef, i32 0, i32 5, i32 1>
+ ret <4 x float> %shuffle
+}
+
define <4 x i32> @shuffle_v4i32_4zzz(<4 x i32> %a) {
; SSE2-LABEL: shuffle_v4i32_4zzz:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: movss %xmm0, %xmm1
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v4i32_4zzz:
; SSE3: # BB#0:
; SSE3-NEXT: xorps %xmm1, %xmm1
-; SSE3-NEXT: movss %xmm0, %xmm1
+; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v4i32_4zzz:
; SSSE3: # BB#0:
; SSSE3-NEXT: xorps %xmm1, %xmm1
-; SSSE3-NEXT: movss %xmm0, %xmm1
+; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -698,21 +1008,21 @@ define <4 x i32> @shuffle_v4i32_z4zz(<4 x i32> %a) {
; SSE2-LABEL: shuffle_v4i32_z4zz:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: movss %xmm0, %xmm1
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v4i32_z4zz:
; SSE3: # BB#0:
; SSE3-NEXT: xorps %xmm1, %xmm1
-; SSE3-NEXT: movss %xmm0, %xmm1
+; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v4i32_z4zz:
; SSSE3: # BB#0:
; SSSE3-NEXT: xorps %xmm1, %xmm1
-; SSSE3-NEXT: movss %xmm0, %xmm1
+; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
; SSSE3-NEXT: retq
;
@@ -737,21 +1047,21 @@ define <4 x i32> @shuffle_v4i32_zz4z(<4 x i32> %a) {
; SSE2-LABEL: shuffle_v4i32_zz4z:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: movss %xmm0, %xmm1
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v4i32_zz4z:
; SSE3: # BB#0:
; SSE3-NEXT: xorps %xmm1, %xmm1
-; SSE3-NEXT: movss %xmm0, %xmm1
+; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v4i32_zz4z:
; SSSE3: # BB#0:
; SSSE3-NEXT: xorps %xmm1, %xmm1
-; SSSE3-NEXT: movss %xmm0, %xmm1
+; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
; SSSE3-NEXT: retq
;
@@ -810,13 +1120,24 @@ define <4 x i32> @shuffle_v4i32_z6zz(<4 x i32> %a) {
;
; SSE41-LABEL: shuffle_v4i32_z6zz:
; SSE41: # BB#0:
-; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
+; SSE41-NEXT: pxor %xmm0, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
; SSE41-NEXT: retq
;
-; AVX-LABEL: shuffle_v4i32_z6zz:
-; AVX: # BB#0:
-; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v4i32_z6zz:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4i32_z6zz:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
+; AVX2-NEXT: retq
%shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
ret <4 x i32> %shuffle
}
@@ -982,6 +1303,21 @@ define <4 x i32> @shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b) {
ret <4 x i32> %shuffle
}
+define <4 x i32> @shuffle_v4i32_40u1(<4 x i32> %a, <4 x i32> %b) {
+; SSE-LABEL: shuffle_v4i32_40u1:
+; SSE: # BB#0:
+; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_40u1:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; AVX-NEXT: retq
+ %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 1>
+ ret <4 x i32> %shuffle
+}
+
define <4 x i32> @shuffle_v4i32_3456(<4 x i32> %a, <4 x i32> %b) {
; SSE2-LABEL: shuffle_v4i32_3456:
; SSE2: # BB#0:
@@ -1033,12 +1369,12 @@ define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) {
;
; SSE41-LABEL: shuffle_v4i32_0u1u:
; SSE41: # BB#0:
-; SSE41-NEXT: pmovzxdq %xmm0, %xmm0
+; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v4i32_0u1u:
; AVX: # BB#0:
-; AVX-NEXT: vpmovzxdq %xmm0, %xmm0
+; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
ret <4 x i32> %shuffle
@@ -1065,17 +1401,196 @@ define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) {
;
; SSE41-LABEL: shuffle_v4i32_0z1z:
; SSE41: # BB#0:
-; SSE41-NEXT: pmovzxdq %xmm0, %xmm0
+; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v4i32_0z1z:
; AVX: # BB#0:
-; AVX-NEXT: vpmovzxdq %xmm0, %xmm0
+; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
ret <4 x i32> %shuffle
}
+define <4 x i32> @shuffle_v4i32_01zu(<4 x i32> %a) {
+; SSE-LABEL: shuffle_v4i32_01zu:
+; SSE: # BB#0:
+; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_01zu:
+; AVX: # BB#0:
+; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX-NEXT: retq
+ %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 7, i32 undef>
+ ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_0z23(<4 x i32> %a) {
+; SSE2-LABEL: shuffle_v4i32_0z23:
+; SSE2: # BB#0:
+; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: shuffle_v4i32_0z23:
+; SSE3: # BB#0:
+; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v4i32_0z23:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v4i32_0z23:
+; SSE41: # BB#0:
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: shuffle_v4i32_0z23:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4i32_0z23:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
+ ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_01z3(<4 x i32> %a) {
+; SSE2-LABEL: shuffle_v4i32_01z3:
+; SSE2: # BB#0:
+; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: shuffle_v4i32_01z3:
+; SSE3: # BB#0:
+; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v4i32_01z3:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v4i32_01z3:
+; SSE41: # BB#0:
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: shuffle_v4i32_01z3:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4i32_01z3:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+ ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_012z(<4 x i32> %a) {
+; SSE2-LABEL: shuffle_v4i32_012z:
+; SSE2: # BB#0:
+; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: shuffle_v4i32_012z:
+; SSE3: # BB#0:
+; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v4i32_012z:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v4i32_012z:
+; SSE41: # BB#0:
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: shuffle_v4i32_012z:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4i32_012z:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+ ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_0zz3(<4 x i32> %a) {
+; SSE2-LABEL: shuffle_v4i32_0zz3:
+; SSE2: # BB#0:
+; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: shuffle_v4i32_0zz3:
+; SSE3: # BB#0:
+; SSE3-NEXT: andps {{.*}}(%rip), %xmm0
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v4i32_0zz3:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v4i32_0zz3:
+; SSE41: # BB#0:
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: shuffle_v4i32_0zz3:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4i32_0zz3:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 3>
+ ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_bitcast_0415(<4 x i32> %a, <4 x i32> %b) {
+; SSE-LABEL: shuffle_v4i32_bitcast_0415:
+; SSE: # BB#0:
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_bitcast_0415:
+; AVX: # BB#0:
+; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX-NEXT: retq
+ %shuffle32 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 0, i32 4>
+ %bitcast64 = bitcast <4 x i32> %shuffle32 to <2 x double>
+ %shuffle64 = shufflevector <2 x double> %bitcast64, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+ %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x i32>
+ ret <4 x i32> %bitcast32
+}
+
define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) {
; SSE-LABEL: insert_reg_and_zero_v4i32:
; SSE: # BB#0:
@@ -1094,14 +1609,14 @@ define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) {
define <4 x i32> @insert_mem_and_zero_v4i32(i32* %ptr) {
; SSE-LABEL: insert_mem_and_zero_v4i32:
; SSE: # BB#0:
-; SSE-NEXT: movd (%rdi), %xmm0
+; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: retq
;
; AVX-LABEL: insert_mem_and_zero_v4i32:
; AVX: # BB#0:
-; AVX-NEXT: vmovd (%rdi), %xmm0
+; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: retq
- %a = load i32* %ptr
+ %a = load i32, i32* %ptr
%v = insertelement <4 x i32> undef, i32 %a, i32 0
%shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x i32> %shuffle
@@ -1111,21 +1626,21 @@ define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
; SSE2-LABEL: insert_reg_and_zero_v4f32:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: movss %xmm0, %xmm1
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_reg_and_zero_v4f32:
; SSE3: # BB#0:
; SSE3-NEXT: xorps %xmm1, %xmm1
-; SSE3-NEXT: movss %xmm0, %xmm1
+; SSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE3-NEXT: movaps %xmm1, %xmm0
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_reg_and_zero_v4f32:
; SSSE3: # BB#0:
; SSSE3-NEXT: xorps %xmm1, %xmm1
-; SSSE3-NEXT: movss %xmm0, %xmm1
+; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -1138,7 +1653,7 @@ define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
; AVX-LABEL: insert_reg_and_zero_v4f32:
; AVX: # BB#0:
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vmovss %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
%v = insertelement <4 x float> undef, float %a, i32 0
%shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -1148,14 +1663,14 @@ define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) {
; SSE-LABEL: insert_mem_and_zero_v4f32:
; SSE: # BB#0:
-; SSE-NEXT: movss (%rdi), %xmm0
+; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-NEXT: retq
;
; AVX-LABEL: insert_mem_and_zero_v4f32:
; AVX: # BB#0:
-; AVX-NEXT: vmovss (%rdi), %xmm0
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-NEXT: retq
- %a = load float* %ptr
+ %a = load float, float* %ptr
%v = insertelement <4 x float> undef, float %a, i32 0
%shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x float> %shuffle
@@ -1165,19 +1680,19 @@ define <4 x i32> @insert_reg_lo_v4i32(i64 %a, <4 x i32> %b) {
; SSE2-LABEL: insert_reg_lo_v4i32:
; SSE2: # BB#0:
; SSE2-NEXT: movd %rdi, %xmm1
-; SSE2-NEXT: movsd %xmm1, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSE3-LABEL: insert_reg_lo_v4i32:
; SSE3: # BB#0:
; SSE3-NEXT: movd %rdi, %xmm1
-; SSE3-NEXT: movsd %xmm1, %xmm0
+; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: insert_reg_lo_v4i32:
; SSSE3: # BB#0:
; SSSE3-NEXT: movd %rdi, %xmm1
-; SSSE3-NEXT: movsd %xmm1, %xmm0
+; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: insert_reg_lo_v4i32:
@@ -1221,22 +1736,22 @@ define <4 x i32> @insert_mem_lo_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
;
; SSE41-LABEL: insert_mem_lo_v4i32:
; SSE41: # BB#0:
-; SSE41-NEXT: movq (%rdi), %xmm1
+; SSE41-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: insert_mem_lo_v4i32:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovq (%rdi), %xmm1
+; AVX1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: insert_mem_lo_v4i32:
; AVX2: # BB#0:
-; AVX2-NEXT: vmovq (%rdi), %xmm1
+; AVX2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX2-NEXT: retq
- %a = load <2 x i32>* %ptr
+ %a = load <2 x i32>, <2 x i32>* %ptr
%v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x i32> %shuffle
@@ -1263,16 +1778,16 @@ define <4 x i32> @insert_reg_hi_v4i32(i64 %a, <4 x i32> %b) {
define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
; SSE-LABEL: insert_mem_hi_v4i32:
; SSE: # BB#0:
-; SSE-NEXT: movq (%rdi), %xmm1
+; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
; AVX-LABEL: insert_mem_hi_v4i32:
; AVX: # BB#0:
-; AVX-NEXT: vmovq (%rdi), %xmm1
+; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
- %a = load <2 x i32>* %ptr
+ %a = load <2 x i32>, <2 x i32>* %ptr
%v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
ret <4 x i32> %shuffle
@@ -1281,13 +1796,13 @@ define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) {
; SSE-LABEL: insert_reg_lo_v4f32:
; SSE: # BB#0:
-; SSE-NEXT: movsd %xmm0, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: insert_reg_lo_v4f32:
; AVX: # BB#0:
-; AVX-NEXT: vmovsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; AVX-NEXT: retq
%a.cast = bitcast double %a to <2 x float>
%v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@@ -1305,7 +1820,7 @@ define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) {
; AVX: # BB#0:
; AVX-NEXT: vmovlpd (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
- %a = load <2 x float>* %ptr
+ %a = load <2 x float>, <2 x float>* %ptr
%v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x float> %shuffle
@@ -1338,7 +1853,7 @@ define <4 x float> @insert_mem_hi_v4f32(<2 x float>* %ptr, <4 x float> %b) {
; AVX: # BB#0:
; AVX-NEXT: vmovhpd (%rdi), %xmm0, %xmm0
; AVX-NEXT: retq
- %a = load <2 x float>* %ptr
+ %a = load <2 x float>, <2 x float>* %ptr
%v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
ret <4 x float> %shuffle
@@ -1355,7 +1870,39 @@ define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) {
; AVX: # BB#0:
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
; AVX-NEXT: retq
- %a = load <4 x float>* %ptr
+ %a = load <4 x float>, <4 x float>* %ptr
%shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %shuffle
}
+
+;
+; Shuffle to logical bit shifts
+;
+
+define <4 x i32> @shuffle_v4i32_z0zX(<4 x i32> %a) {
+; SSE-LABEL: shuffle_v4i32_z0zX:
+; SSE: # BB#0:
+; SSE-NEXT: psllq $32, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_z0zX:
+; AVX: # BB#0:
+; AVX-NEXT: vpsllq $32, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 0, i32 4, i32 undef>
+ ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_1z3z(<4 x i32> %a) {
+; SSE-LABEL: shuffle_v4i32_1z3z:
+; SSE: # BB#0:
+; SSE-NEXT: psrlq $32, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v4i32_1z3z:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
+ ret <4 x i32> %shuffle
+}
diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll
index de25a16a2e00..4007f0b2b13b 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
@@ -952,20 +952,15 @@ define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) {
; SSE-LABEL: shuffle_v8i16_109832ba:
; SSE: # BB#0:
; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[2,0,3,1,4,5,6,7]
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
-; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_109832ba:
; AVX: # BB#0:
; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[2,0,3,1,4,5,6,7]
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
-; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10>
ret <8 x i16> %shuffle
@@ -1023,36 +1018,33 @@ define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_443aXXXX:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,0,65535,65535,65535,65535,65535]
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm0, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,1,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v8i16_443aXXXX:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
-; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,12,13,10,11,12,13,10,11,12,13,14,15]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[4,5,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,6,7],zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v8i16_443aXXXX:
; SSE41: # BB#0:
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
-; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
-; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,12,13,10,11,12,13,10,11,12,13,14,15]
+; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_443aXXXX:
; AVX: # BB#0:
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
-; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,12,13,10,11,12,13,10,11,12,13,14,15]
+; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
@@ -1061,34 +1053,37 @@ define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_032dXXXX:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,0]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v8i16_032dXXXX:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,8,9,6,7,8,9,12,13,12,13,14,15]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v8i16_032dXXXX:
; SSE41: # BB#0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,8,9,6,7,8,9,12,13,12,13,14,15]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
; SSE41-NEXT: retq
;
-; AVX-LABEL: shuffle_v8i16_032dXXXX:
-; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,8,9,6,7,8,9,12,13,12,13,14,15]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v8i16_032dXXXX:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i16_032dXXXX:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
+; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
}
@@ -1109,33 +1104,30 @@ define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_012dXXXX:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,0,3,4,5,6,7]
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; SSE2-NEXT: pandn %xmm1, %xmm2
+; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v8i16_012dXXXX:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v8i16_012dXXXX:
; SSE41: # BB#0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_012dXXXX:
; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
+; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
@@ -1144,41 +1136,37 @@ define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_XXXXcde3:
; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535,65535,65535,65535,0]
+; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v8i16_XXXXcde3:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
-; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u],zero,zero,zero,zero,zero,zero,xmm0[6,7]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,8,9,10,11,12,13],zero,zero
+; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v8i16_XXXXcde3:
; SSE41: # BB#0:
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
-; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: shuffle_v8i16_XXXXcde3:
; AVX1: # BB#0:
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8i16_XXXXcde3:
; AVX2: # BB#0:
; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
-; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3>
ret <8 x i16> %shuffle
@@ -1187,42 +1175,32 @@ define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_cde3XXXX:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,65535,65535,65535]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pandn %xmm0, %xmm2
+; SSE2-NEXT: por %xmm1, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v8i16_cde3XXXX:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
-; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[6,7,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13],zero,zero,xmm1[u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v8i16_cde3XXXX:
; SSE41: # BB#0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
-; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
; SSE41-NEXT: retq
;
-; AVX1-LABEL: shuffle_v8i16_cde3XXXX:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: shuffle_v8i16_cde3XXXX:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
-; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
-; AVX2-NEXT: retq
+; AVX-LABEL: shuffle_v8i16_cde3XXXX:
+; AVX: # BB#0:
+; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
+; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
}
@@ -1230,100 +1208,117 @@ define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_012dcde3:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,2,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,0,3,4,5,6,7]
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,5,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,3,0,2,4,5,6,7]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v8i16_012dcde3:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
-; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
-; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[10,11,8,9,10,11,12,13],zero,zero
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,xmm0[6,7]
+; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v8i16_012dcde3:
; SSE41: # BB#0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
-; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
-; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
-; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: shuffle_v8i16_012dcde3:
; AVX1: # BB#0:
-; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
-; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8i16_012dcde3:
; AVX2: # BB#0:
-; AVX2-NEXT: vpbroadcastq %xmm0, %xmm2
-; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3>
ret <8 x i16> %shuffle
}
+define <8 x i16> @shuffle_v8i16_0923cde7(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_0923cde7:
+; SSE2: # BB#0:
+; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
+; SSE2-NEXT: andps %xmm2, %xmm0
+; SSE2-NEXT: andnps %xmm1, %xmm2
+; SSE2-NEXT: orps %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_0923cde7:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,0,0,0,65535]
+; SSSE3-NEXT: andps %xmm2, %xmm0
+; SSSE3-NEXT: andnps %xmm1, %xmm2
+; SSSE3-NEXT: orps %xmm2, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_0923cde7:
+; SSE41: # BB#0:
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v8i16_0923cde7:
+; AVX: # BB#0:
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
+; AVX-NEXT: retq
+ %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 14, i32 7>
+ ret <8 x i16> %shuffle
+}
+
define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_XXX1X579:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,2,4,5,6,7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,5,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,2,0]
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,65535,0]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: pandn %xmm2, %xmm1
+; SSE2-NEXT: por %xmm0, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v8i16_XXX1X579:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,10,11,14,15,14,15,10,11,12,13,14,15]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,4,5,8,9,8,9,12,13,6,7]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u],zero,zero,xmm1[u,u],zero,zero,zero,zero,xmm1[2,3]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,2,3,u,u,10,11,14,15],zero,zero
+; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v8i16_XXX1X579:
; SSE41: # BB#0:
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,10,11,14,15,14,15,10,11,12,13,14,15]
-; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,4,5,8,9,8,9,12,13,6,7]
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
+; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
+; SSE41-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
; SSE41-NEXT: retq
;
-; AVX-LABEL: shuffle_v8i16_XXX1X579:
-; AVX: # BB#0:
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,10,11,14,15,14,15,10,11,12,13,14,15]
-; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,4,5,8,9,8,9,12,13,6,7]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v8i16_XXX1X579:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
+; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i16_XXX1X579:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
+; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
+; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9>
ret <8 x i16> %shuffle
}
@@ -1331,42 +1326,40 @@ define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: shuffle_v8i16_XX4X8acX:
; SSE2: # BB#0:
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,1,2,0,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,4,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: shuffle_v8i16_XX4X8acX:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
-; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,4,5,8,9,0,1]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u]
+; SSSE3-NEXT: por %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v8i16_XX4X8acX:
; SSE41: # BB#0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
-; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE41-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,4,5,8,9,0,1]
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; SSE41-NEXT: retq
;
-; AVX-LABEL: shuffle_v8i16_XX4X8acX:
-; AVX: # BB#0:
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,4,5,8,9,0,1]
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v8i16_XX4X8acX:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i16_XX4X8acX:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef>
ret <8 x i16> %shuffle
}
@@ -1391,16 +1384,14 @@ define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
; SSE: # BB#0:
-; SSE-NEXT: movzwl %di, %eax
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; SSE-NEXT: pxor %xmm0, %xmm0
+; SSE-NEXT: pinsrw $1, %edi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
; AVX: # BB#0:
-; AVX-NEXT: movzwl %di, %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; AVX-NEXT: vpxor %xmm0, %xmm0
+; AVX-NEXT: vpinsrw $1, %edi, %xmm0
; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
@@ -1410,16 +1401,14 @@ define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
; SSE: # BB#0:
-; SSE-NEXT: movzwl %di, %eax
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; SSE-NEXT: pxor %xmm0, %xmm0
+; SSE-NEXT: pinsrw $5, %edi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
; AVX: # BB#0:
-; AVX-NEXT: movzwl %di, %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; AVX-NEXT: vpxor %xmm0, %xmm0
+; AVX-NEXT: vpinsrw $5, %edi, %xmm0
; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
@@ -1429,14 +1418,14 @@ define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
; SSE: # BB#0:
-; SSE-NEXT: movd %edi, %xmm0
-; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
+; SSE-NEXT: pxor %xmm0, %xmm0
+; SSE-NEXT: pinsrw $7, %edi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
; AVX: # BB#0:
-; AVX-NEXT: vmovd %edi, %xmm0
-; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
+; AVX-NEXT: vpxor %xmm0, %xmm0
+; AVX-NEXT: vpinsrw $7, %edi, %xmm0
; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 0
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
@@ -1446,16 +1435,14 @@ define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
; SSE: # BB#0:
-; SSE-NEXT: movzwl %di, %eax
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
+; SSE-NEXT: pxor %xmm0, %xmm0
+; SSE-NEXT: pinsrw $2, %edi, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
; AVX: # BB#0:
-; AVX-NEXT: movzwl %di, %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
+; AVX-NEXT: vpxor %xmm0, %xmm0
+; AVX-NEXT: vpinsrw $2, %edi, %xmm0
; AVX-NEXT: retq
%a = insertelement <8 x i16> undef, i16 %i, i32 3
%shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -1829,12 +1816,12 @@ define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) {
;
; SSE41-LABEL: shuffle_v8i16_0uuu1uuu:
; SSE41: # BB#0:
-; SSE41-NEXT: pmovzxwq %xmm0, %xmm0
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_0uuu1uuu:
; AVX: # BB#0:
-; AVX-NEXT: vpmovzxwq %xmm0, %xmm0
+; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
ret <8 x i16> %shuffle
@@ -1857,12 +1844,12 @@ define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) {
;
; SSE41-LABEL: shuffle_v8i16_0zzz1zzz:
; SSE41: # BB#0:
-; SSE41-NEXT: pmovzxwq %xmm0, %xmm0
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_0zzz1zzz:
; AVX: # BB#0:
-; AVX-NEXT: vpmovzxwq %xmm0, %xmm0
+; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
ret <8 x i16> %shuffle
@@ -1881,12 +1868,12 @@ define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) {
;
; SSE41-LABEL: shuffle_v8i16_0u1u2u3u:
; SSE41: # BB#0:
-; SSE41-NEXT: pmovzxwd %xmm0, %xmm0
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_0u1u2u3u:
; AVX: # BB#0:
-; AVX-NEXT: vpmovzxwd %xmm0, %xmm0
+; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
ret <8 x i16> %shuffle
@@ -1907,13 +1894,254 @@ define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) {
;
; SSE41-LABEL: shuffle_v8i16_0z1z2z3z:
; SSE41: # BB#0:
-; SSE41-NEXT: pmovzxwd %xmm0, %xmm0
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v8i16_0z1z2z3z:
; AVX: # BB#0:
-; AVX-NEXT: vpmovzxwd %xmm0, %xmm0
+; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX-NEXT: retq
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
ret <8 x i16> %shuffle
}
+
+;
+; Shuffle to logical bit shifts
+;
+define <8 x i16> @shuffle_v8i16_z0z2z4z6(<8 x i16> %a) {
+; SSE-LABEL: shuffle_v8i16_z0z2z4z6:
+; SSE: # BB#0:
+; SSE-NEXT: pslld $16, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v8i16_z0z2z4z6:
+; AVX: # BB#0:
+; AVX-NEXT: vpslld $16, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6>
+ ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_zzz0zzz4(<8 x i16> %a) {
+; SSE-LABEL: shuffle_v8i16_zzz0zzz4:
+; SSE: # BB#0:
+; SSE-NEXT: psllq $48, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v8i16_zzz0zzz4:
+; AVX: # BB#0:
+; AVX-NEXT: vpsllq $48, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4>
+ ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_zz01zX4X(<8 x i16> %a) {
+; SSE-LABEL: shuffle_v8i16_zz01zX4X:
+; SSE: # BB#0:
+; SSE-NEXT: psllq $32, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v8i16_zz01zX4X:
+; AVX: # BB#0:
+; AVX-NEXT: vpsllq $32, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 0, i32 1, i32 8, i32 undef, i32 4, i32 undef>
+ ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_z0X2z456(<8 x i16> %a) {
+; SSE-LABEL: shuffle_v8i16_z0X2z456:
+; SSE: # BB#0:
+; SSE-NEXT: psllq $16, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v8i16_z0X2z456:
+; AVX: # BB#0:
+; AVX-NEXT: vpsllq $16, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 undef, i32 2, i32 8, i32 4, i32 5, i32 6>
+ ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_1z3zXz7z(<8 x i16> %a) {
+; SSE-LABEL: shuffle_v8i16_1z3zXz7z:
+; SSE: # BB#0:
+; SSE-NEXT: psrld $16, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v8i16_1z3zXz7z:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 undef, i32 8, i32 7, i32 8>
+ ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_1X3z567z(<8 x i16> %a) {
+; SSE-LABEL: shuffle_v8i16_1X3z567z:
+; SSE: # BB#0:
+; SSE-NEXT: psrlq $16, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v8i16_1X3z567z:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrlq $16, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8>
+ ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_23zz67zz(<8 x i16> %a) {
+; SSE-LABEL: shuffle_v8i16_23zz67zz:
+; SSE: # BB#0:
+; SSE-NEXT: psrlq $32, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v8i16_23zz67zz:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrlq $32, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 2, i32 3, i32 8, i32 8, i32 6, i32 7, i32 8, i32 8>
+ ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_3zXXXzzz(<8 x i16> %a) {
+; SSE-LABEL: shuffle_v8i16_3zXXXzzz:
+; SSE: # BB#0:
+; SSE-NEXT: psrlq $48, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v8i16_3zXXXzzz:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8>
+ ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_01u3zzuz(<8 x i16> %a) {
+; SSE-LABEL: shuffle_v8i16_01u3zzuz:
+; SSE: # BB#0:
+; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v8i16_01u3zzuz:
+; AVX: # BB#0:
+; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX-NEXT: retq
+ %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 undef, i32 3, i32 8, i32 8, i32 undef, i32 8>
+ ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_0z234567(<8 x i16> %a) {
+; SSE2-LABEL: shuffle_v8i16_0z234567:
+; SSE2: # BB#0:
+; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_0z234567:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_0z234567:
+; SSE41: # BB#0:
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v8i16_0z234567:
+; AVX: # BB#0:
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
+; AVX-NEXT: retq
+ %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_0zzzz5z7(<8 x i16> %a) {
+; SSE2-LABEL: shuffle_v8i16_0zzzz5z7:
+; SSE2: # BB#0:
+; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_0zzzz5z7:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_0zzzz5z7:
+; SSE41: # BB#0:
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v8i16_0zzzz5z7:
+; AVX: # BB#0:
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4],xmm0[5],xmm1[6],xmm0[7]
+; AVX-NEXT: retq
+ %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 5, i32 8, i32 7>
+ ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_0123456z(<8 x i16> %a) {
+; SSE2-LABEL: shuffle_v8i16_0123456z:
+; SSE2: # BB#0:
+; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuffle_v8i16_0123456z:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuffle_v8i16_0123456z:
+; SSE41: # BB#0:
+; SSE41-NEXT: pxor %xmm1, %xmm1
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: shuffle_v8i16_0123456z:
+; AVX: # BB#0:
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6],xmm1[7]
+; AVX-NEXT: retq
+ %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
+ ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_fu3ucc5u(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: shuffle_v8i16_fu3ucc5u:
+; SSE: # BB#0:
+; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
+; SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v8i16_fu3ucc5u:
+; AVX: # BB#0:
+; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; AVX-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
+; AVX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; AVX-NEXT: retq
+ %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 15, i32 undef, i32 3, i32 undef, i32 12, i32 12, i32 5, i32 undef>
+ ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) {
+; SSE-LABEL: shuffle_v8i16_8012345u:
+; SSE: # BB#0:
+; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: shuffle_v8i16_8012345u:
+; AVX: # BB#0:
+; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; AVX-NEXT: retq
+ %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 undef>
+
+ ret <8 x i16> %shuffle
+}
diff --git a/test/CodeGen/X86/vector-shuffle-256-v16.ll b/test/CodeGen/X86/vector-shuffle-256-v16.ll
index 7c38149a700c..df4994da6932 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v16.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
target triple = "x86_64-unknown-unknown"
@@ -151,9 +151,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_0
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,1,4,5,6,7]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
@@ -161,7 +159,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_0
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,u,u,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
@@ -175,9 +173,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_0
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,3,0,4,5,6,7]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,6,7,0,1]
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
@@ -185,10 +181,9 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_0
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,2,3,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
-; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i16> %shuffle
@@ -199,10 +194,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_0
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,3,0,0,4,5,6,7]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
@@ -210,10 +202,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_0
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,4,5,u,u,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
-; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i16> %shuffle
@@ -224,10 +214,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_0
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,0,0,0,4,5,6,7]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1]
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
@@ -235,10 +222,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_0
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,6,7,u,u,u,u,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
-; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i16> %shuffle
@@ -248,11 +233,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_0
; AVX1-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,1,4,5,6,7]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
@@ -260,10 +242,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_0
; AVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,8,9,u,u,u,u,u,u,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
-; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i16> %shuffle
@@ -273,11 +253,8 @@ define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_0
; AVX1-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,3,0,4,5,6,7]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
@@ -285,10 +262,8 @@ define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_0
; AVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,10,11,u,u,u,u,u,u,u,u,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
-; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i16> %shuffle
@@ -298,12 +273,8 @@ define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_0
; AVX1-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,3,0,0,4,5,6,7]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
@@ -311,10 +282,8 @@ define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_0
; AVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,12,13,u,u,u,u,u,u,u,u,u,u,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
-; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i16> %shuffle
@@ -324,12 +293,8 @@ define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_0
; AVX1-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,0,0,0,4,5,6,7]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
@@ -337,10 +302,8 @@ define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_0
; AVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[14,15,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
-; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <16 x i16> %shuffle
@@ -395,7 +358,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_1
;
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
+; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
+; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
ret <16 x i16> %shuffle
@@ -414,7 +378,8 @@ define <16 x i16> @shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_1
;
; AVX2-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,6,7,6,7,6,7,14,15,14,15,14,15,14,15,22,23,22,23,22,23,22,23,30,31,30,31,30,31,30,31]
+; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,3,3,3,4,5,6,7,11,11,11,11,12,13,14,15]
+; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7, i32 11, i32 11, i32 11, i32 11, i32 15, i32 15, i32 15, i32 15>
ret <16 x i16> %shuffle
@@ -433,7 +398,8 @@ define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_1
;
; AVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,16,17,16,17,20,21,20,21,24,25,24,25,28,29,28,29]
+; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,2,2,4,5,6,7,8,8,10,10,12,13,14,15]
+; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,6,8,9,10,11,12,12,14,14]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
ret <16 x i16> %shuffle
@@ -452,7 +418,8 @@ define <16 x i16> @shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_1
;
; AVX2-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15,18,19,18,19,22,23,22,23,26,27,26,27,30,31,30,31]
+; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15]
+; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,5,7,7,8,9,10,11,13,13,15,15]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
ret <16 x i16> %shuffle
@@ -724,9 +691,8 @@ define <16 x i16> @shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_3
define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16(<16 x i16> %a, <16 x i16> %b) {
; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
; AVX1: # BB#0:
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -743,20 +709,18 @@ define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_1
define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24(<16 x i16> %a, <16 x i16> %b) {
; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
-; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,0,1,u,u,0,1,u,u,0,1,u,u,16,17,u,u,16,17,u,u,16,17,u,u,16,17]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,0,1,0,1,0,1,12,13,0,1,16,17,16,17,20,21,16,17,16,17,16,17,28,29,16,17]
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
; AVX2-NEXT: retq
@@ -780,7 +744,7 @@ define <16 x i16> @shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_1
;
; AVX2-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,0,1,0,1,u,u,u,u,u,u,u,u,16,17,16,17,16,17,16,17,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 24, i32 24, i32 24, i32 24, i32 12, i32 13, i32 14, i32 15>
@@ -805,9 +769,9 @@ define <16 x i16> @shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_1
;
; AVX2-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,14,15,12,13,10,11,8,9,u,u,u,u,u,u,u,u,30,31,28,29,26,27,24,25]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5,2,3,0,1,u,u,u,u,u,u,u,u,22,23,20,21,18,19,16,17,u,u,u,u,u,u,u,u]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
+; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 7, i32 6, i32 5, i32 4, i32 27, i32 26, i32 25, i32 24, i32 15, i32 14, i32 13, i32 12>
ret <16 x i16> %shuffle
@@ -817,20 +781,20 @@ define <16 x i16> @shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_0
; AVX1-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [12,13,8,9,4,5,0,1,14,15,10,11,6,7,2,3]
+; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,6,7,4,5,2,3,0,1,u,u,u,u,u,u,u,u,22,23,20,21,18,19,16,17]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5,2,3,0,1,u,u,u,u,u,u,u,u,22,23,20,21,18,19,16,17,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
+; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
+; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 3, i32 2, i32 1, i32 0, i32 27, i32 26, i32 25, i32 24, i32 11, i32 10, i32 9, i32 8>
@@ -1278,7 +1242,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_2
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
; AVX2: # BB#0:
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
+; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
+; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20>
ret <16 x i16> %shuffle
@@ -1298,7 +1263,8 @@ define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_2
; AVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
+; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
+; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20>
ret <16 x i16> %shuffle
@@ -1319,7 +1285,8 @@ define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_2
; AVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
+; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
+; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
ret <16 x i16> %shuffle
@@ -1339,7 +1306,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_2
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
; AVX2: # BB#0:
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
+; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
+; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,4,4,8,9,10,11,12,12,12,12]
; AVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
ret <16 x i16> %shuffle
@@ -1362,3 +1330,1934 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_2
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
ret <16 x i16> %shuffle
}
+
+define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
+; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz(<16 x i16> %a) {
+; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_zz_25_26_27_28_29_30_31_zz:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0>
+ ret <16 x i16> %shuffle
+}
+
+;
+; Shuffle to logical bit shifts
+;
+
+define <16 x i16> @shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14(<16 x i16> %a) {
+; AVX1-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpslld $16, %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpslld $16, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpslld $16, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12(<16 x i16> %a) {
+; AVX1-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpsllq $48, %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpsllq $48, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_zz_zz_zz_00_zz_zz_zz_04_zz_zz_zz_08_zz_zz_zz_12:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz(<16 x i16> %a) {
+; AVX1-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz(<16 x i16> %a) {
+; AVX1-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
+; AVX1: # BB#0:
+; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15, i32 16, i32 16>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz(<16 x i16> %a) {
+; AVX1-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[4,5,2,3,4,5,6,7,6,7,10,11,4,5,6,7]
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
+; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz(<16 x i16> %a) {
+; AVX1-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_16_zz_17_zz_18_zz_19_zz_20_zz_21_zz_22_zz_22_zz:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 16, i32 0, i32 17, i32 0, i32 18, i32 0, i32 19, i32 0, i32 20, i32 0, i32 21, i32 0, i32 22, i32 0, i32 23, i32 0>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_23_00_01_02_03_04_05_06_31_08_09_10_11_12_13_14:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm1[30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_16_09_10_11_12_13_14_15_24:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0,1]
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_17_18_19_20_21_22_23_00_25_26_27_28_29_30_31_8:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 00, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_07_16_17_18_19_20_21_22_15_24_25_26_27_28_29_30:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13],ymm0[30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
+; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_01_02_03_04_05_06_07_00_17_18_19_20_21_22_23_16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16,17]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_07_00_01_02_03_04_05_06_23_16_17_18_19_20_21_22:
+; AVX2: # BB#0:
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,30,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
+; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,0,2,4,5,6,7]
+; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 11, i32 8, i32 9, i32 8, i32 9, i32 10, i32 11, i32 10, i32 11>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_06_07_04_05_02_03_00_09_14_15_12_13_10_11_08_09:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 9, i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
+; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_04_05_06_07_16_17_18_27_12_13_14_15_24_25_26_27:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 27, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,2,3]
+; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,7]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
+; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
+; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
+; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 11, i32 undef, i32 8, i32 undef, i32 9, i32 undef, i32 10, i32 undef, i32 11>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
+; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
+; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
+; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 15, i32 undef, i32 12, i32 undef, i32 13, i32 undef, i32 14, i32 undef, i32 15>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_03_01_02_00_06_07_04_13_11_09_10_08_14_15_12_13:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,0,4,5,6,7]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 13, i32 11, i32 9, i32 10, i32 8, i32 14, i32 15, i32 12, i32 13>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,14,15]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 8, i32 12, i32 12, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_13_10_11_08_09_14_15_12_13:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,3,2]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_02_03_00_02_06_07_04_13_10_11_08_10_14_15_12_13:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,3,0,2,4,5,6,7]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 13, i32 10, i32 11, i32 8, i32 10, i32 14, i32 15, i32 12, i32 13>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
+; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_02_03_00_01_06_07_04_15_10_11_08_09_14_15_12_15:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,0,2,3]
+; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,4,7]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 15, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 15>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
+; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
+; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_07_05_06_04_03_01_02_08_15_13_14_12_11_09_10_08:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
+; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
+; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 8, i32 15, i32 13, i32 14, i32 12, i32 11, i32 9, i32 10, i32 8>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,2,3]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 8, i32 9, i32 8, i32 13, i32 12, i32 13, i32 12, i32 9, i32 8>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,2,3]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 8, i32 13, i32 12, i32 9, i32 8, i32 13, i32 12, i32 9, i32 8>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,2,3]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 12, i32 13, i32 12, i32 9, i32 8, i32 9, i32 8, i32 13, i32 12>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,2,3]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,2,3]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 12>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
+; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
+; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_02_06_04_00_05_01_07_11_10_14_12_08_13_09_15_11:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
+; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
+; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 11, i32 10, i32 14, i32 12, i32 8, i32 13, i32 9, i32 15, i32 11>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
+; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
+; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_02_00_06_04_05_01_07_11_10_08_14_12_13_09_15_11:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
+; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
+; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 11, i32 10, i32 8, i32 14, i32 12, i32 13, i32 9, i32 15, i32 11>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
+; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
+; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_02_06_04_00_01_03_07_13_10_14_12_08_09_11_15_13:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
+; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5],xmm0[6,7]
+; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 13, i32 10, i32 14, i32 12, i32 8, i32 9, i32 11, i32 15, i32 13>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
+; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
+; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
+; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 11, i32 14, i32 14, i32 15, i32 13, i32 9, i32 14, i32 12, i32 11>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,14,15]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,14,15]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 8, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpbroadcastw %xmm1, %xmm2
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,14,15]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 12, i32 12, i32 8, i32 8, i32 8, i32 8, i32 8>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,0,4,5,6,7]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 15, i32 8, i32 12, i32 12, i32 8, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,14,15]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 12, i32 8, i32 undef, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 12, i32 12, i32 12, i32 undef, i32 8, i32 12, i32 12, i32 12, i32 12>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpsllq $48, %xmm1, %xmm2
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,14,15]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 12, i32 undef, i32 12, i32 12, i32 8, i32 12, i32 12, i32 12, i32 12>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
+; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15,16,17,18,19,20,21,30,31,20,21,30,31,28,29,30,31]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpbroadcastq %xmm1, %xmm2
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm2[7]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
+; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3,24,25,26,27,28,29,22,23,24,25,26,27,16,17,18,19]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
+; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
+; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_11:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
+; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
+; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14, i32 11>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_04_05_06_03_00_01_02_15_12_13_14_11_08_09_10_15:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,2,3]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 15, i32 12, i32 13, i32 14, i32 11, i32 8, i32 9, i32 10, i32 15>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
+; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
+; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
+; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm3
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
+; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 13, i32 11, i32 15, i32 9, i32 8, i32 10, i32 15, i32 11, i32 13>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
+; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7]
+; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
+; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 27, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_20_01_21_02_22_03_31_08_28_09_29_10_30_11_31:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15]
+; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm2
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7]
+; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 2, i32 22, i32 3, i32 31, i32 8, i32 28, i32 9, i32 29, i32 10, i32 30, i32 11, i32 31>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
+; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7]
+; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
+; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 31, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_04_16_05_17_06_18_07_27_12_24_13_25_14_26_15_27:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7]
+; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 16, i32 5, i32 17, i32 6, i32 18, i32 7, i32 27, i32 12, i32 24, i32 13, i32 25, i32 14, i32 26, i32 15, i32 27>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[0,3,2,3]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7]
+; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,1,4,5,6,7]
+; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[3]
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,2,1,4,5,6,7]
+; AVX2-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
+; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
+; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15]
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 6, i32 22, i32 7, i32 31, i32 8, i32 24, i32 9, i32 25, i32 14, i32 30, i32 15, i32 31>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[2,0,2,3]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[8,9,10,11,0,1,2,3,2,3,0,1,12,13,2,3]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [8,9,8,9,4,5,10,11,0,1,0,1,12,13,2,3]
+; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm4
+; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
+; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1
+; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,6,7,8,9,9,11,12,13,14,15]
+; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,5,7,7,8,9,10,11,14,13,15,15]
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 20, i32 1, i32 21, i32 6, i32 16, i32 7, i32 25, i32 8, i32 28, i32 9, i32 29, i32 14, i32 24, i32 15, i32 25>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[4,5,0,1,12,13,10,11,8,9,10,11,12,13,10,11]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[1,0,3,2,4,5,6,7]
+; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
+; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,7,5]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,2,0,4,5,6,7]
+; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6,5]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1,2,3,2,3,0,1,8,9,10,11,6,7,4,5]
+; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,4,5,6,7,6,7,4,5,4,5,6,7,18,19,16,17,20,21,22,23,22,23,20,21,20,21,22,23]
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 17, i32 16, i32 3, i32 2, i32 19, i32 26, i32 9, i32 8, i32 25, i32 24, i32 11, i32 10, i32 27, i32 26>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,14,15,14,15,8,9,12,13,14,15]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
+; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
+; AVX2-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
+; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 18, i32 2, i32 19, i32 11, i32 24, i32 8, i32 25, i32 9, i32 26, i32 10, i32 27, i32 11>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,4,5,4,5,0,1,4,5,8,9,14,15]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
+; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
+; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
+; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 4, i32 21, i32 5, i32 22, i32 6, i32 23, i32 15, i32 28, i32 12, i32 29, i32 13, i32 30, i32 14, i32 31, i32 15>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,2,1,3,4,5,6,7]
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,2,1,3,4,5,6,7]
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,8,9,4,5,14,15,0,1,4,5,4,5,6,7]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_02_01_03_20_22_21_31_08_10_09_11_28_30_29_31:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; AVX2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
+; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
+; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,5,7]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 2, i32 1, i32 3, i32 20, i32 22, i32 21, i32 31, i32 8, i32 10, i32 9, i32 11, i32 28, i32 30, i32 29, i32 31>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,1,2,3]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,3,2,4,5,6,7]
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_04_04_03_18_uu_uu_uu_uu_12_12_11_26_uu_uu_uu_uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15]
+; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,1,2,3,6,5,6,7]
+; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,3,2,4,5,6,7,8,8,11,10,12,13,14,15]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 3, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 12, i32 11, i32 26, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3]
+; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_03_02_21_uu_uu_uu_uu_08_11_10_29_uu_uu_uu_uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,6,7,4,5,10,11,0,1,10,11,0,1,2,3,16,17,22,23,20,21,26,27,16,17,26,27,16,17,18,19]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 3, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 11, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_21_uu_uu_uu_uu_uu_uu_uu_29_uu_uu_uu_uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm1[0,2,2,3,4,6,6,7]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[3],xmm2[4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_01_02_21_uu_uu_uu_uu_08_09_10_29_uu_uu_uu_uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 29, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm0[7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_20_21_22_11_uu_uu_uu_uu_28_29_30_11:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,2]
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[3],xmm3[4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_20_21_22_03_uu_uu_uu_uu_28_29_30_11_uu_uu_uu_uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,2,3,6,7,6,7]
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6,7,8,9,10],ymm0[11],ymm1[12,13,14,15]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 21, i32 22, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[0,2,2,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[3,4,5,6],xmm0[7]
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_01_02_21_20_21_22_11_08_09_10_29_28_29_30_11:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,10,11,8,9,10,11,12,13,6,7]
+; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 21, i32 20, i32 21, i32 22, i32 11, i32 8, i32 9, i32 10, i32 29, i32 28, i32 29, i32 30, i32 11>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2,3],xmm2[4,5,6],xmm3[7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6],xmm0[7]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_17_02_03_20_21_22_15_08_25_10_11_28_29_30_15:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6],ymm0[7,8],ymm1[9],ymm0[10,11],ymm1[12,13,14],ymm0[15]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 20, i32 21, i32 22, i32 15, i32 8, i32 25, i32 10, i32 11, i32 28, i32 29, i32 30, i32 15>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,2,0]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,2,1,4,5,6,7]
+; AVX1-NEXT: vpshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,7,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,5,6],xmm1[7]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,1,4,5,6,7]
+; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,2,1,4,5,6,7,8,9,10,9,12,13,14,15]
+; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,7,7,8,9,10,11,12,13,15,15]
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 25, i32 undef, i32 undef, i32 undef, i32 9, i32 undef, i32 13, i32 15, i32 25>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5]
+; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_uu_uu_04_uu_16_18_20_uu_uu_uu_12_uu_24_26_28_uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,4,5,16,17,20,21,20,21,22,23,16,17,20,21,24,25,20,21]
+; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 16, i32 18, i32 20, i32 undef, i32 undef, i32 undef, i32 12, i32 undef, i32 24, i32 26, i32 28, i32 undef>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
+; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_21_22_23_00_01_02_03_12_29_30_31_08_09_10_11_12:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7],ymm0[8,9,10,11,12],ymm1[13,14,15]
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
+; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 12, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_uu_22_uu_uu_01_02_03_uu_uu_30_uu_uu_09_10_11_uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9],ymm1[26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 22, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 30, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_05_06_07_00_01_02_03_12_13_14_15_08_09_10_11_12:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
+; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_uu_06_uu_uu_01_02_03_uu_uu_14_uu_uu_09_10_11_uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,26,27,28,29,30,31,16,17,18,19,20,21,22,23,24,25]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_01_02_03_uu_uu_uu_uu_uu_09_10_11_uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9],zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11]
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4],xmm0[5,6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_19_20_21_22_23_00_01_10_27_28_29_30_31_08_09_10:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7],ymm0[8,9,10],ymm1[11,12,13,14,15]
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
+; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 10, i32 27, i32 28, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_uu_20_21_22_uu_uu_01_uu_uu_28_29_30_uu_uu_09_uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5],ymm1[22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 20, i32 21, i32 22, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 undef, i32 undef, i32 9, i32 undef>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_10:
+; AVX2: # BB#0:
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
+; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_01_uu_uu_12_13_14_uu_uu_09_uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,22,23,24,25,26,27,28,29,30,31,16,17,18,19,20,21]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 9, i32 undef>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_uu_uu_uu_12_13_14_uu_uu_uu_uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,ymm0[22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
+; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,8,9,0,1,4,5,10,11]
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4],xmm1[5,6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_03_04_05_06_07_16_17_26_11_12_13_14_15_24_25_26:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3,4,5,6,7]
+; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 26, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[6,7,8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5]
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_uu_04_05_06_uu_uu_17_uu_uu_12_13_14_uu_uu_25_uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3,4,5],ymm0[22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19,20,21]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 17, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 25, i32 undef>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
+; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3,4,5,6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_05_06_07_16_17_18_19_28_13_14_15_24_25_26_27_28:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4],ymm0[5,6,7],ymm1[8,9,10,11,12],ymm0[13,14,15]
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4],xmm0[5,6,7]
+; AVX2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; AVX2-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 28, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26, i32 27, i32 28>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9]
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_uu_06_uu_uu_17_18_19_uu_uu_14_uu_uu_25_26_27_uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[10,11,12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9],ymm0[26,27,28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 14, i32 undef, i32 undef, i32 25, i32 26, i32 27, i32 undef>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,5,4,4]
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,4,4]
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6],ymm1[7],ymm0[8,9,10,11],ymm1[12],ymm0[13,14],ymm1[15]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,6,7,6,7,8,9,8,9,10,11,14,15,30,31,30,31,22,23,22,23,24,25,24,25,26,27,30,31]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 undef, i32 3, i32 undef, i32 20, i32 20, i32 5, i32 undef, i32 31, i32 undef, i32 11, i32 undef, i32 28, i32 28, i32 13, i32 undef>
+ ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) {
+; ALL-LABEL: insert_v16i16_0elt_into_zero_vector:
+; ALL: # BB#0:
+; ALL-NEXT: movzwl (%rdi), %eax
+; ALL-NEXT: vmovd %eax, %xmm0
+; ALL-NEXT: retq
+ %val = load i16, i16* %ptr
+ %i0 = insertelement <16 x i16> zeroinitializer, i16 %val, i32 0
+ ret <16 x i16> %i0
+}
+
diff --git a/test/CodeGen/X86/vector-shuffle-256-v32.ll b/test/CodeGen/X86/vector-shuffle-256-v32.ll
index c7f4c3512fba..a0f43de75630 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v32.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
target triple = "x86_64-unknown-unknown"
@@ -314,9 +314,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],zero
-; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -339,19 +338,17 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0],zero,xmm0[0]
-; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,u,u,1,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -363,19 +360,17 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[2],zero,zero
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0],zero,xmm0[0,0]
-; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,u,2,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -387,19 +382,17 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[3],zero,zero,zero
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0],zero,xmm0[0,0,0]
-; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,3,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,255,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,0,255,255,u,u,u,u,u,u,u,u,u,u,u,u,255,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -411,19 +404,16 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[4],zero,zero,zero,zero
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0],zero,xmm0[0,0,0,0]
-; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,4,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,255,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -435,19 +425,16 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[5],zero,zero,zero,zero,zero
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0],zero,xmm0[0,0,0,0,0]
-; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,5,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,255,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 21, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -459,19 +446,16 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[6],zero,zero,zero,zero,zero,zero
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0],zero,xmm0[0,0,0,0,0,0]
-; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,6,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,255,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 22, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -483,19 +467,16 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[7],zero,zero,zero,zero,zero,zero,zero
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0],zero,xmm0[0,0,0,0,0,0,0]
-; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,7,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 23, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -516,10 +497,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,8,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -540,10 +519,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,9,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,255,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 25, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -564,10 +541,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,10,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,255,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 26, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -588,10 +563,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,11,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,255,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 27, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -612,10 +585,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_
; AVX2-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,12,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 28, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -636,10 +607,8 @@ define <32 x i8> @shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_
; AVX2-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,13,u,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 29, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -660,10 +629,8 @@ define <32 x i8> @shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
; AVX2-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,14,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 30, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -686,14 +653,10 @@ define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
; AVX2-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # BB#0:
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
; AVX2-NEXT: movl $15, %eax
-; AVX2-NEXT: vmovd %eax, %xmm2
-; AVX2-NEXT: vpxor %ymm3, %ymm3, %ymm3
-; AVX2-NEXT: vinserti128 $0, %xmm2, %ymm3, %ymm2
-; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vmovd %eax, %xmm1
+; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <32 x i8> %shuffle
@@ -947,16 +910,11 @@ define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
define <32 x i8> @shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63(<32 x i8> %a, <32 x i8> %b) {
; AVX1-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
-; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm4
-; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpshufb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpblendvb %xmm4, %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -972,16 +930,11 @@ define <32 x i8> @shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_
define <32 x i8> @shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31(<32 x i8> %a, <32 x i8> %b) {
; AVX1-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
-; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm4
-; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
-; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm5, %xmm1, %xmm1
-; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX1-NEXT: vpblendvb %xmm4, %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpblendvb %xmm4, %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -997,11 +950,8 @@ define <32 x i8> @shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_
define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32(<32 x i8> %a, <32 x i8> %b) {
; AVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32:
; AVX1: # BB#0:
-; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -1020,17 +970,12 @@ define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_
; AVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -1050,15 +995,15 @@ define <32 x i8> @shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_
; AVX1-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,128,128,8,9,10,11,12,13,14,15]
-; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [0,0,0,0,0,0,0,0,128,128,128,128,128,128,128,128]
-; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm4
-; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm5, %xmm1, %xmm1
-; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -1076,23 +1021,22 @@ define <32 x i8> @shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_
; AVX1-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,128,128,15,14,13,12,11,10,9,8]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = <15,14,13,12,11,10,9,8,u,u,u,u,u,u,u,u>
; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [7,6,5,4,3,2,1,0,128,128,128,128,128,128,128,128]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = <7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u>
; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm4
-; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm4[0],xmm2[0]
; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpshufb %xmm5, %xmm1, %xmm1
-; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,15,14,13,12,11,10,9,8,u,u,u,u,u,u,u,u,31,30,29,28,27,26,25,24]
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u,23,22,21,20,19,18,17,16,u,u,u,u,u,u,u,u]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24]
; AVX2-NEXT: retq
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24>
ret <32 x i8> %shuffle
@@ -1102,15 +1046,12 @@ define <32 x i8> @shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_
; AVX1-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,128,128,7,6,5,4,3,2,1,0]
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [14,12,10,8,6,4,2,0,15,13,11,9,7,5,3,1]
; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [7,6,5,4,3,2,1,0,128,128,128,128,128,128,128,128]
-; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm4
-; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb %xmm5, %xmm1, %xmm1
-; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -1520,27 +1461,24 @@ define <32 x i8> @shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_
define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39(<32 x i8> %a, <32 x i8> %b) {
; AVX1-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
; AVX1: # BB#0:
-; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,xmm0[u],zero,xmm0[u,u,u,u,u,u,u,7,u,u,u,u]
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm3[4,3,u,3,u,u,u,u,u,u,u],zero,xmm3[u,u,u,u]
-; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[0,1],zero,xmm2[3],zero,zero,zero,zero,zero,zero,zero,xmm2[11],zero,zero,zero,zero
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT: vpshufb {{.*#+}} xmm5 = xmm4[u,u,4,u,1,6],zero,zero,xmm4[0],zero,xmm4[11,u],zero,zero,zero,zero
-; AVX1-NEXT: vpshufb {{.*#+}} xmm6 = xmm1[u,u],zero,xmm1[u],zero,zero,xmm1[5,0],zero,xmm1[10],zero,xmm1[u,4,2,4,7]
-; AVX1-NEXT: vpor %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vpshufb {{.*#+}} xmm5 = zero,zero,xmm5[2],zero,xmm5[4,5,6,7,8,9,10],zero,xmm5[12,13,14,15]
-; AVX1-NEXT: vpor %xmm2, %xmm5, %xmm2
-; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm3[u,u],zero,zero,xmm3[u,u,u,u,1,6,13,u,u],zero,xmm3[u,u]
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u],zero,zero,zero,xmm0[u,u,12,u,u]
-; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[2,3],zero,zero,zero,zero,xmm0[8,9,10],zero,zero,xmm0[13],zero,zero
-; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = zero,zero,xmm4[u,u],zero,zero,xmm4[12],zero,xmm4[u,u,u],zero,zero,xmm4[u,0,3]
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm2[u,u,4,u,1,6],zero,zero,xmm2[0],zero,xmm2[11,u],zero,zero,zero,zero
+; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm1[u,u],zero,xmm1[u],zero,zero,xmm1[5,0],zero,xmm1[10],zero,xmm1[u,4,2,4,7]
+; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[8,6,u,6,u,u,u,u,u,u,u,15,u,u,u,u]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255]
+; AVX1-NEXT: vpblendvb %xmm6, %xmm3, %xmm5, %xmm3
+; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,xmm2[u,u],zero,zero,xmm2[12],zero,xmm2[u,u,u],zero,zero,xmm2[u,0,3]
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,13,u,u,3,3],zero,xmm1[8,u,u,u,12,1,u],zero,zero
-; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,1],zero,zero,xmm1[4,5,6,7],zero,zero,zero,xmm1[11,12],zero,xmm1[14,15]
-; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm4[u,u],zero,zero,xmm4[u,u,u,u,1,6,13,u,u],zero,xmm4[u,u]
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u],zero,zero,zero,xmm0[u,u,12,u,u]
+; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255]
+; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
@@ -1654,3 +1592,367 @@ define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47>
ret <32 x i8> %shuffle
}
+
+define <32 x i8> @shuffle_v32i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_32_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_48(<32 x i8> %a) {
+; AVX1-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_32_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_48:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_32_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_48:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 32, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 48>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_47_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_63_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(<32 x i8> %a) {
+; AVX1-LABEL: shuffle_v32i8_47_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_63_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_47_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_63_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 47, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 63, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <32 x i8> %shuffle
+}
+
+;
+; Shuffle to logical bit shifts
+;
+
+define <32 x i8> @shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30(<32 x i8> %a) {
+; AVX1-LABEL: shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpsllw $8, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsllw $8, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 0, i32 32, i32 2, i32 32, i32 4, i32 32, i32 6, i32 32, i32 8, i32 32, i32 10, i32 32, i32 12, i32 32, i32 14, i32 32, i32 16, i32 32, i32 18, i32 32, i32 20, i32 32, i32 22, i32 32, i32 24, i32 32, i32 26, i32 32, i32 28, i32 32, i32 30>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29(<32 x i8> %a) {
+; AVX1-LABEL: shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpslld $16, %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpslld $16, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpslld $16, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 0, i32 1, i32 32, i32 32, i32 4, i32 5, i32 32, i32 32, i32 8, i32 9, i32 32, i32 32, i32 12, i32 13, i32 32, i32 32, i32 16, i32 17, i32 32, i32 32, i32 20, i32 21, i32 32, i32 32, i32 24, i32 25, i32 32, i32 32, i32 28, i32 29>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25(<32 x i8> %a) {
+; AVX1-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpsllq $48, %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpsllq $48, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 9, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24, i32 25>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz(<32 x i8> %a) {
+; AVX1-LABEL: shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 32, i32 3, i32 32, i32 5, i32 32, i32 7, i32 32, i32 9, i32 32, i32 11, i32 32, i32 13, i32 32, i32 15, i32 32, i32 17, i32 32, i32 19, i32 32, i32 21, i32 32, i32 23, i32 32, i32 25, i32 32, i32 27, i32 32, i32 29, i32 32, i32 31, i32 32>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz(<32 x i8> %a) {
+; AVX1-LABEL: shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 2, i32 3, i32 32, i32 32, i32 6, i32 7, i32 32, i32 32, i32 10, i32 11, i32 32, i32 32, i32 14, i32 15, i32 32, i32 32, i32 18, i32 19, i32 32, i32 32, i32 22, i32 23, i32 32, i32 32, i32 26, i32 27, i32 32, i32 32, i32 30, i32 31, i32 32, i32 32>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz(<32 x i8> %a) {
+; AVX1-LABEL: shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpsrlq $56, %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpsrlq $56, %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsrlq $56, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 23, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz(<32 x i8> %a) {
+; AVX1-LABEL: shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: retq
+
+ %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 32, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 33, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 34, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 35, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz(<32 x i8> %a) {
+; AVX1-LABEL: shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 32, i32 0, i32 0, i32 0, i32 33, i32 0, i32 0, i32 0, i32 34, i32 0, i32 0, i32 0, i32 35, i32 0, i32 0, i32 0, i32 36, i32 0, i32 0, i32 0, i32 37, i32 0, i32 0, i32 0, i32 38, i32 0, i32 0, i32 0, i32 39, i32 0, i32 0, i32 0>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz(<32 x i8> %a) {
+; AVX1-LABEL: shuffle_v32i8_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 32, i32 0, i32 33, i32 0, i32 34, i32 0, i32 35, i32 0, i32 36, i32 0, i32 37, i32 0, i32 38, i32 0, i32 39, i32 0, i32 40, i32 0, i32 41, i32 0, i32 42, i32 0, i32 43, i32 0, i32 44, i32 0, i32 45, i32 0, i32 46, i32 0, i32 47, i32 0>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 47, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[15],xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_uu_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_uu_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_uu_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 47, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 undef, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 63, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_32_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_48(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_32_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_48:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0]
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_32_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_48:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_00_49_50_51_52_53_54_55_56_57_58_59_60_61_62_63_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_00_49_50_51_52_53_54_55_56_57_58_59_60_61_62_63_16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0]
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_00_49_50_51_52_53_54_55_56_57_58_59_60_61_62_63_16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 00, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 16>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_15_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_31_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_15_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_31_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_15_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_31_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm0[31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_16:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_16:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16>
+ ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
+ ret <32 x i8> %shuffle
+}
diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll
index bca7fb7a276d..1b42a637907c 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
target triple = "x86_64-unknown-unknown"
define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) {
; AVX1-LABEL: shuffle_v4f64_0000:
; AVX1: # BB#0:
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -21,7 +21,7 @@ define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) {
define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) {
; AVX1-LABEL: shuffle_v4f64_0001:
; AVX1: # BB#0:
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0,0]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -38,7 +38,7 @@ define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) {
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -70,7 +70,7 @@ define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) {
; AVX1-LABEL: shuffle_v4f64_1000:
; AVX1: # BB#0:
; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -86,7 +86,7 @@ define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) {
; AVX1-LABEL: shuffle_v4f64_2200:
; AVX1: # BB#0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v4f64_2200:
@@ -101,9 +101,8 @@ define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) {
; AVX1-LABEL: shuffle_v4f64_3330:
; AVX1: # BB#0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[1,1,2,2]
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,2]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3]
+; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
+; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v4f64_3330:
@@ -141,7 +140,7 @@ define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) {
define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
; ALL-LABEL: shuffle_v4f64_0022:
; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
ret <4 x double> %shuffle
@@ -186,7 +185,7 @@ define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) {
define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {
; AVX1-LABEL: shuffle_v4f64_0423:
; AVX1: # BB#0:
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
+; AVX1-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
; AVX1-NEXT: retq
;
@@ -202,8 +201,8 @@ define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {
define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) {
; ALL-LABEL: shuffle_v4f64_0462:
; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
+; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
; ALL-NEXT: retq
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2>
@@ -358,10 +357,19 @@ define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) {
ret <4 x double> %shuffle
}
+define <4 x double> @shuffle_v4f64_u062(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_u062:
+; ALL: # BB#0:
+; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 undef, i32 0, i32 6, i32 2>
+ ret <4 x double> %shuffle
+}
+
define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: shuffle_v4i64_0000:
; AVX1: # BB#0:
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -376,7 +384,7 @@ define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: shuffle_v4i64_0001:
; AVX1: # BB#0:
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0,0]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -393,7 +401,7 @@ define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) {
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -441,7 +449,7 @@ define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: shuffle_v4i64_1000:
; AVX1: # BB#0:
; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -457,7 +465,7 @@ define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: shuffle_v4i64_2200:
; AVX1: # BB#0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v4i64_2200:
@@ -472,9 +480,8 @@ define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: shuffle_v4i64_3330:
; AVX1: # BB#0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[1,1,2,2]
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,2]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3]
+; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
+; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v4i64_3330:
@@ -503,7 +510,7 @@ define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) {
define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: shuffle_v4i64_0124:
; AVX1: # BB#0:
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0,0]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
; AVX1-NEXT: retq
@@ -541,7 +548,7 @@ define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
+; AVX1-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
; AVX1-NEXT: retq
;
@@ -560,7 +567,7 @@ define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
; AVX1-NEXT: retq
@@ -638,9 +645,8 @@ define <4 x i64> @shuffle_v4i64_2u35(<4 x i64> %a, <4 x i64> %b) {
;
; AVX2-LABEL: shuffle_v4i64_2u35:
; AVX2: # BB#0:
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
-; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,3]
-; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1]
; AVX2-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 undef, i32 3, i32 5>
ret <4 x i64> %shuffle
@@ -748,22 +754,53 @@ define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) {
ret <4 x i64> %shuffle
}
-define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) {
-; AVX1-LABEL: stress_test1:
+define <4 x i64> @shuffle_v4i64_z4z6(<4 x i64> %a) {
+; AVX1-LABEL: shuffle_v4i64_z4z6:
; AVX1: # BB#0:
-; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3,0,1]
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm2 = ymm0[1,0,3,2]
-; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2,3]
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
+; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4i64_z4z6:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 0, i32 4, i32 0, i32 6>
+ ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_5zuz(<4 x i64> %a) {
+; AVX1-LABEL: shuffle_v4i64_5zuz:
+; AVX1: # BB#0:
+; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
; AVX1-NEXT: retq
;
-; AVX2-LABEL: stress_test1:
+; AVX2-LABEL: shuffle_v4i64_5zuz:
; AVX2: # BB#0:
-; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm1[3,1,1,0]
-; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[3,3,1,3]
-; AVX2-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
+; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: retq
+ %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 5, i32 0, i32 undef, i32 0>
+ ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_40u2:
+; AVX1: # BB#0:
+; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v4i64_40u2:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 2>
+ ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) {
+; ALL-LABEL: stress_test1:
+; ALL: retq
%c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 0>
%d = shufflevector <4 x i64> %c, <4 x i64> undef, <4 x i32> <i32 3, i32 undef, i32 2, i32 undef>
%e = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 undef>
@@ -776,15 +813,11 @@ define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) {
; AVX1-LABEL: insert_reg_and_zero_v4i64:
; AVX1: # BB#0:
; AVX1-NEXT: vmovq %rdi, %xmm0
-; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: insert_reg_and_zero_v4i64:
; AVX2: # BB#0:
; AVX2-NEXT: vmovq %rdi, %xmm0
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
; AVX2-NEXT: retq
%v = insertelement <4 x i64> undef, i64 %a, i64 0
%shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -794,18 +827,14 @@ define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) {
define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
; AVX1-LABEL: insert_mem_and_zero_v4i64:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovq (%rdi), %xmm0
-; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
-; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX1-NEXT: retq
;
; AVX2-LABEL: insert_mem_and_zero_v4i64:
; AVX2: # BB#0:
-; AVX2-NEXT: vmovq (%rdi), %xmm0
-; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX2-NEXT: retq
- %a = load i64* %ptr
+ %a = load i64, i64* %ptr
%v = insertelement <4 x i64> undef, i64 %a, i64 0
%shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x i64> %shuffle
@@ -814,8 +843,9 @@ define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
; ALL-LABEL: insert_reg_and_zero_v4f64:
; ALL: # BB#0:
-; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; ALL-NEXT: vmovsd %xmm0, %xmm1, %xmm0
+; ALL-NEXT: # kill: XMM0<def> XMM0<kill> YMM0<def>
+; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; ALL-NEXT: retq
%v = insertelement <4 x double> undef, double %a, i32 0
%shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -825,9 +855,9 @@ define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) {
; ALL-LABEL: insert_mem_and_zero_v4f64:
; ALL: # BB#0:
-; ALL-NEXT: vmovsd (%rdi), %xmm0
+; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: retq
- %a = load double* %ptr
+ %a = load double, double* %ptr
%v = insertelement <4 x double> undef, double %a, i32 0
%shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x double> %shuffle
@@ -838,7 +868,7 @@ define <4 x double> @splat_mem_v4f64(double* %ptr) {
; ALL: # BB#0:
; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
; ALL-NEXT: retq
- %a = load double* %ptr
+ %a = load double, double* %ptr
%v = insertelement <4 x double> undef, double %a, i32 0
%shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
ret <4 x double> %shuffle
@@ -847,15 +877,14 @@ define <4 x double> @splat_mem_v4f64(double* %ptr) {
define <4 x i64> @splat_mem_v4i64(i64* %ptr) {
; AVX1-LABEL: splat_mem_v4i64:
; AVX1: # BB#0:
-; AVX1-NEXT: vmovddup (%rdi), %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splat_mem_v4i64:
; AVX2: # BB#0:
; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
; AVX2-NEXT: retq
- %a = load i64* %ptr
+ %a = load i64, i64* %ptr
%v = insertelement <4 x i64> undef, i64 %a, i64 0
%shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
ret <4 x i64> %shuffle
@@ -866,7 +895,7 @@ define <4 x double> @splat_mem_v4f64_2(double* %p) {
; ALL: # BB#0:
; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
; ALL-NEXT: retq
- %1 = load double* %p
+ %1 = load double, double* %p
%2 = insertelement <2 x double> undef, double %1, i32 0
%3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> zeroinitializer
ret <4 x double> %3
@@ -875,7 +904,7 @@ define <4 x double> @splat_mem_v4f64_2(double* %p) {
define <4 x double> @splat_v4f64(<2 x double> %r) {
; AVX1-LABEL: splat_v4f64:
; AVX1: # BB#0:
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -886,3 +915,22 @@ define <4 x double> @splat_v4f64(<2 x double> %r) {
%1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer
ret <4 x double> %1
}
+
+define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: bitcast_v4f64_0426:
+; AVX1: # BB#0:
+; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: bitcast_v4f64_0426:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; AVX2-NEXT: retq
+ %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
+ %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float>
+ %shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+ %bitcast16 = bitcast <8 x float> %shuffle32 to <16 x i16>
+ %shuffle16 = shufflevector <16 x i16> %bitcast16, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
+ %bitcast64 = bitcast <16 x i16> %shuffle16 to <4 x double>
+ ret <4 x double> %bitcast64
+}
diff --git a/test/CodeGen/X86/vector-shuffle-256-v8.ll b/test/CodeGen/X86/vector-shuffle-256-v8.ll
index 77903da35583..bb07077b5559 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v8.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
target triple = "x86_64-unknown-unknown"
@@ -91,9 +91,8 @@ define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) {
; AVX1-LABEL: shuffle_v8f32_00500000:
; AVX1: # BB#0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[u,u,1,u,4,4,4,4]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
-; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3],ymm1[4,5,6,7]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
+; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8f32_00500000:
@@ -109,9 +108,8 @@ define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) {
; AVX1-LABEL: shuffle_v8f32_06000000:
; AVX1: # BB#0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[u,2,u,u,4,4,4,4]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,0,4,5,4,4]
-; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6,7]
+; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8f32_06000000:
@@ -127,18 +125,14 @@ define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
; AVX1-LABEL: shuffle_v8f32_70000000:
; AVX1: # BB#0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,u,u,u,4,4,4,4]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
-; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4,5,6,7]
+; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8f32_70000000:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: movl $7, %eax
-; AVX2-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1
-; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
-; AVX2-NEXT: vinserti128 $0, %xmm1, %ymm2, %ymm1
+; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -148,7 +142,7 @@ define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
; ALL-LABEL: shuffle_v8f32_01014545:
; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
ret <8 x float> %shuffle
@@ -202,7 +196,7 @@ define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
; AVX1: # BB#0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX1-NEXT: retq
@@ -295,11 +289,11 @@ define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) {
define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) {
; AVX1-LABEL: shuffle_v8f32_08991abb:
; AVX1: # BB#0:
-; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[1,0],xmm1[2,0]
-; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[3,3]
-; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
-; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,1]
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
+; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8f32_08991abb:
@@ -336,7 +330,7 @@ define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) {
define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
; AVX1-LABEL: shuffle_v8f32_09ab1def:
; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
; AVX1-NEXT: retq
@@ -426,7 +420,7 @@ define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) {
define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
; ALL-LABEL: shuffle_v8f32_00224466:
; ALL: # BB#0:
-; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
ret <8 x float> %shuffle
@@ -444,7 +438,7 @@ define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) {
define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
; ALL-LABEL: shuffle_v8f32_11335577:
; ALL: # BB#0:
-; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
; ALL-NEXT: retq
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
ret <8 x float> %shuffle
@@ -775,6 +769,29 @@ define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) {
ret <8 x float> %shuffle
}
+define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) {
+; AVX1-LABEL: PR21138:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
+; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: PR21138:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,u,u,1,3,5,7>
+; AVX2-NEXT: vpermps %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = <1,3,5,7,u,u,u,u>
+; AVX2-NEXT: vpermps %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+ ret <8 x float> %shuffle
+}
+
define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) {
; ALL-LABEL: shuffle_v8f32_ba987654:
; ALL: # BB#0:
@@ -795,6 +812,24 @@ define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) {
ret <8 x float> %shuffle
}
+define <8 x float> @shuffle_v8f32_80u1c4u5(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_80u1c4u5:
+; ALL: # BB#0:
+; ALL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 5>
+ ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_a2u3e6f7:
+; ALL: # BB#0:
+; ALL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7>
+ ret <8 x float> %shuffle
+}
+
define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00000000:
; AVX1: # BB#0:
@@ -883,9 +918,8 @@ define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00500000:
; AVX1: # BB#0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[u,u,1,u,4,4,4,4]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
-; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3],ymm1[4,5,6,7]
+; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
+; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8i32_00500000:
@@ -901,9 +935,8 @@ define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_06000000:
; AVX1: # BB#0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[u,2,u,u,4,4,4,4]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,0,4,5,4,4]
-; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6,7]
+; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8i32_06000000:
@@ -919,18 +952,14 @@ define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_70000000:
; AVX1: # BB#0:
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[3,u,u,u,4,4,4,4]
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
-; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4,5,6,7]
+; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8i32_70000000:
; AVX2: # BB#0:
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX2-NEXT: movl $7, %eax
-; AVX2-NEXT: vpinsrd $0, %eax, %xmm1, %xmm1
-; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
-; AVX2-NEXT: vinserti128 $0, %xmm1, %ymm2, %ymm1
+; AVX2-NEXT: vmovd %eax, %xmm1
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -940,7 +969,7 @@ define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_01014545:
; AVX1: # BB#0:
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8i32_01014545:
@@ -954,8 +983,8 @@ define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00112233:
; AVX1: # BB#0:
-; AVX1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1]
-; AVX1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
@@ -1004,7 +1033,7 @@ define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
; AVX1: # BB#0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
-; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX1-NEXT: retq
@@ -1059,9 +1088,8 @@ define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: shuffle_v8i32_9832dc76:
; AVX2: # BB#0:
-; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,3,2,4,5,7,6]
-; AVX2-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
ret <8 x i32> %shuffle
@@ -1123,8 +1151,7 @@ define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
; AVX2: # BB#0:
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
-; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -1134,11 +1161,11 @@ define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_08991abb:
; AVX1: # BB#0:
-; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[1,0],xmm1[2,0]
-; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[3,3]
-; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
-; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,1]
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
+; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8i32_08991abb:
@@ -1164,8 +1191,7 @@ define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
;
; AVX2-LABEL: shuffle_v8i32_091b2d3f:
; AVX2: # BB#0:
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
-; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; AVX2-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
@@ -1175,7 +1201,7 @@ define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_09ab1def:
; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
; AVX1-NEXT: retq
@@ -1305,7 +1331,7 @@ define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) {
define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_00224466:
; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; AVX1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8i32_00224466:
@@ -1333,7 +1359,7 @@ define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) {
define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) {
; AVX1-LABEL: shuffle_v8i32_11335577:
; AVX1: # BB#0:
-; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; AVX1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v8i32_11335577:
@@ -1824,12 +1850,57 @@ define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) {
ret <8 x i32> %shuffle
}
+define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) {
+; AVX1-LABEL: shuffle_v8i32_zuu8zuuc:
+; AVX1: # BB#0:
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i32_zuu8zuuc:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12>
+ ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) {
+; AVX1-LABEL: shuffle_v8i32_9ubzdefz:
+; AVX1: # BB#0:
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4]
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i32_9ubzdefz:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0>
+ ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_80u1b4uu:
+; AVX1: # BB#0:
+; AVX1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i32_80u1b4uu:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef>
+ ret <8 x i32> %shuffle
+}
+
define <8 x float> @splat_mem_v8f32_2(float* %p) {
; ALL-LABEL: splat_mem_v8f32_2:
; ALL: # BB#0:
; ALL-NEXT: vbroadcastss (%rdi), %ymm0
; ALL-NEXT: retq
- %1 = load float* %p
+ %1 = load float, float* %p
%2 = insertelement <4 x float> undef, float %1, i32 0
%3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer
ret <8 x float> %3
@@ -1849,3 +1920,186 @@ define <8 x float> @splat_v8f32(<4 x float> %r) {
%1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer
ret <8 x float> %1
}
+
+;
+; Shuffle to logical bit shifts
+;
+
+define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
+; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:
+; AVX1: # BB#0:
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i32_z0U2zUz6:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsllq $32, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>
+ ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
+; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:
+; AVX1: # BB#0:
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i32_1U3z5zUU:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm0
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>
+ ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_B012F456:
+; AVX1: # BB#0:
+; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4]
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i32_B012F456:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
+ ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_1238567C:
+; AVX1: # BB#0:
+; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i32_1238567C:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
+ ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_9AB0DEF4:
+; AVX1: # BB#0:
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4]
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i32_9AB0DEF4:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4>
+ ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_389A7CDE:
+; AVX1: # BB#0:
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4]
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i32_389A7CDE:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14>
+ ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_30127456:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i32_30127456:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
+ ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_12305674:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v8i32_12305674:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
+; AVX2-NEXT: retq
+ %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
+ ret <8 x i32> %shuffle
+}
+
+define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) {
+; ALL-LABEL: concat_v2f32_1:
+; ALL: # BB#0: # %entry
+; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
+; ALL-NEXT: retq
+entry:
+ %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
+ %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
+ %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <8 x float> %tmp76
+}
+
+define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) {
+; ALL-LABEL: concat_v2f32_2:
+; ALL: # BB#0: # %entry
+; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
+; ALL-NEXT: retq
+entry:
+ %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
+ %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
+ %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <8 x float> %tmp76
+}
+
+define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) {
+; ALL-LABEL: concat_v2f32_3:
+; ALL: # BB#0: # %entry
+; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; ALL-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
+; ALL-NEXT: retq
+entry:
+ %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
+ %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
+ %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <8 x float> %res
+}
+
+define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) {
+; AVX1-LABEL: insert_mem_and_zero_v8i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: insert_mem_and_zero_v8i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX2-NEXT: retq
+ %a = load i32, i32* %ptr
+ %v = insertelement <8 x i32> undef, i32 %a, i32 0
+ %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <8 x i32> %shuffle
+}
+
diff --git a/test/CodeGen/X86/vector-shuffle-512-v16.ll b/test/CodeGen/X86/vector-shuffle-512-v16.ll
index 713ef7835cde..406d52406d95 100644
--- a/test/CodeGen/X86/vector-shuffle-512-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-512-v16.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
target triple = "x86_64-unknown-unknown"
diff --git a/test/CodeGen/X86/vector-shuffle-512-v8.ll b/test/CodeGen/X86/vector-shuffle-512-v8.ll
index 9edd62d7d92a..62d4af7809b6 100644
--- a/test/CodeGen/X86/vector-shuffle-512-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-512-v8.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
target triple = "x86_64-unknown-unknown"
define <8 x double> @shuffle_v8f64_00000000(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00000000:
; ALL: # BB#0:
-; ALL-NEXT: vbroadcastsd %xmm0, %zmm0
+; ALL-NEXT: vbroadcastsd %xmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x double> %shuffle
@@ -48,7 +48,7 @@ define <8 x double> @shuffle_v8f64_00003000(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00040000(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00040000:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; ALL-NEXT: vbroadcastsd %xmm1, %ymm1
; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3]
@@ -61,10 +61,10 @@ define <8 x double> @shuffle_v8f64_00040000(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00500000(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00500000:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,3]
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
+; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2,3]
+; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,1,0]
; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2],ymm0[3]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -74,10 +74,10 @@ define <8 x double> @shuffle_v8f64_00500000(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_06000000:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,2,3]
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
+; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2],ymm0[3]
+; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,0,0]
; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2,3]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -87,10 +87,10 @@ define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_70000000:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3]
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
+; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3]
+; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,0,0,0]
; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -100,7 +100,7 @@ define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_01014545:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
@@ -134,8 +134,8 @@ define <8 x double> @shuffle_v8f64_00001111(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_81a3c5e7(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_81a3c5e7:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
+; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
@@ -159,9 +159,9 @@ define <8 x double> @shuffle_v8f64_08080808(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_08084c4c(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_08084c4c:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
; ALL-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
+; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
; ALL-NEXT: vbroadcastsd %xmm3, %ymm3
; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3]
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
@@ -176,8 +176,8 @@ define <8 x double> @shuffle_v8f64_08084c4c(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_8823cc67(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_8823cc67:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
+; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
; ALL-NEXT: vbroadcastsd %xmm3, %ymm3
; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3]
; ALL-NEXT: vbroadcastsd %xmm1, %ymm1
@@ -221,9 +221,9 @@ define <8 x double> @shuffle_v8f64_9810dc54(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_08194c5d(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_08194c5d:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm2
+; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm2
; ALL-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,0,2,1]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm3
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; ALL-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[0,1,1,3]
; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
@@ -238,9 +238,9 @@ define <8 x double> @shuffle_v8f64_08194c5d(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_2a3b6e7f(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_2a3b6e7f:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm2
+; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm2
; ALL-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,2,2,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm3
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; ALL-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[2,1,3,3]
; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,2,3]
@@ -270,12 +270,11 @@ define <8 x double> @shuffle_v8f64_08192a3b(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_08991abb(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_08991abb:
; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm2 = ymm0[1,0,2,2]
-; ALL-NEXT: vpermpd {{.*#+}} ymm3 = ymm1[0,2,3,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0],ymm3[1,2,3]
-; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,1,1]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT: vpermpd {{.*#+}} ymm2 = ymm1[0,0,1,1]
+; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm0[0],ymm2[1,2,3]
+; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3]
+; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,2,3,3]
+; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
ret <8 x double> %shuffle
@@ -284,7 +283,7 @@ define <8 x double> @shuffle_v8f64_08991abb(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_091b2d3f(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_091b2d3f:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm2
+; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm2
; ALL-NEXT: vpermpd {{.*#+}} ymm3 = ymm0[2,1,3,3]
; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
@@ -298,7 +297,7 @@ define <8 x double> @shuffle_v8f64_091b2d3f(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_09ab1def(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_09ab1def:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm2
+; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm2
; ALL-NEXT: vpermilpd {{.*#+}} ymm3 = ymm0[1,0,2,2]
; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1,2,3]
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
@@ -312,7 +311,7 @@ define <8 x double> @shuffle_v8f64_00014445(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00014445:
; ALL: # BB#0:
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,0,0,1]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -324,7 +323,7 @@ define <8 x double> @shuffle_v8f64_00204464(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00204464:
; ALL: # BB#0:
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,0,2,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -336,7 +335,7 @@ define <8 x double> @shuffle_v8f64_03004744(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_03004744:
; ALL: # BB#0:
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,3,0,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -348,7 +347,7 @@ define <8 x double> @shuffle_v8f64_10005444(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_10005444:
; ALL: # BB#0:
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[1,0,0,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -360,7 +359,7 @@ define <8 x double> @shuffle_v8f64_22006644(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_22006644:
; ALL: # BB#0:
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[2,2,0,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -372,7 +371,7 @@ define <8 x double> @shuffle_v8f64_33307774(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_33307774:
; ALL: # BB#0:
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[3,3,3,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -384,7 +383,7 @@ define <8 x double> @shuffle_v8f64_32107654(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_32107654:
; ALL: # BB#0:
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[3,2,1,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -396,7 +395,7 @@ define <8 x double> @shuffle_v8f64_00234467(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00234467:
; ALL: # BB#0:
; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -407,9 +406,9 @@ define <8 x double> @shuffle_v8f64_00234467(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00224466(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00224466:
; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
@@ -420,7 +419,7 @@ define <8 x double> @shuffle_v8f64_10325476(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_10325476:
; ALL: # BB#0:
; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -432,7 +431,7 @@ define <8 x double> @shuffle_v8f64_11335577(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_11335577:
; ALL: # BB#0:
; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -444,7 +443,7 @@ define <8 x double> @shuffle_v8f64_10235467(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_10235467:
; ALL: # BB#0:
; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,0,2,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -456,7 +455,7 @@ define <8 x double> @shuffle_v8f64_10225466(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_10225466:
; ALL: # BB#0:
; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,0,2,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -468,7 +467,7 @@ define <8 x double> @shuffle_v8f64_00015444(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00015444:
; ALL: # BB#0:
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,0,0,1]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -480,7 +479,7 @@ define <8 x double> @shuffle_v8f64_00204644(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00204644:
; ALL: # BB#0:
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,0,2,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,0,0]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -492,7 +491,7 @@ define <8 x double> @shuffle_v8f64_03004474(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_03004474:
; ALL: # BB#0:
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[0,3,0,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,3,0]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -504,7 +503,7 @@ define <8 x double> @shuffle_v8f64_10004444(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_10004444:
; ALL: # BB#0:
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[1,0,0,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vbroadcastsd %xmm0, %ymm0
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -516,7 +515,7 @@ define <8 x double> @shuffle_v8f64_22006446(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_22006446:
; ALL: # BB#0:
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[2,2,0,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,0,2]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -528,7 +527,7 @@ define <8 x double> @shuffle_v8f64_33307474(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_33307474:
; ALL: # BB#0:
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[3,3,3,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,0,3,0]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -540,7 +539,7 @@ define <8 x double> @shuffle_v8f64_32104567(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_32104567:
; ALL: # BB#0:
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[3,2,1,0]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
@@ -551,7 +550,7 @@ define <8 x double> @shuffle_v8f64_00236744(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00236744:
; ALL: # BB#0:
; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,0]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -562,8 +561,8 @@ define <8 x double> @shuffle_v8f64_00236744(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_00226644(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00226644:
; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -575,7 +574,7 @@ define <8 x double> @shuffle_v8f64_10324567(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_10324567:
; ALL: # BB#0:
; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
@@ -586,7 +585,7 @@ define <8 x double> @shuffle_v8f64_11334567(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_11334567:
; ALL: # BB#0:
; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -596,7 +595,7 @@ define <8 x double> @shuffle_v8f64_11334567(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_01235467(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_01235467:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,3]
; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq
@@ -607,7 +606,7 @@ define <8 x double> @shuffle_v8f64_01235467(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_01235466(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_01235466:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,2]
; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq
@@ -618,8 +617,8 @@ define <8 x double> @shuffle_v8f64_01235466(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_002u6u44:
; ALL: # BB#0:
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,0,0]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -631,7 +630,7 @@ define <8 x double> @shuffle_v8f64_00uu66uu(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_00uu66uu:
; ALL: # BB#0:
; ALL-NEXT: vbroadcastsd %xmm0, %ymm1
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -643,7 +642,7 @@ define <8 x double> @shuffle_v8f64_103245uu(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_103245uu:
; ALL: # BB#0:
; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
@@ -654,7 +653,7 @@ define <8 x double> @shuffle_v8f64_1133uu67(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_1133uu67:
; ALL: # BB#0:
; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
@@ -664,7 +663,7 @@ define <8 x double> @shuffle_v8f64_1133uu67(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_0uu354uu(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_0uu354uu:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,2]
; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq
@@ -675,8 +674,8 @@ define <8 x double> @shuffle_v8f64_0uu354uu(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_uuu3uu66(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_uuu3uu66:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
+; ALL-NEXT: vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
@@ -686,9 +685,9 @@ define <8 x double> @shuffle_v8f64_uuu3uu66(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_c348cda0(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_c348cda0:
; ALL: # BB#0:
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
; ALL-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm2[0,1]
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
+; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
; ALL-NEXT: vbroadcastsd %xmm1, %ymm4
; ALL-NEXT: vblendpd {{.*#+}} ymm4 = ymm3[0,1,2],ymm4[3]
; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm4[0],ymm2[1,2],ymm4[3]
@@ -704,18 +703,17 @@ define <8 x double> @shuffle_v8f64_c348cda0(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_f511235a(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_f511235a:
; ALL: # BB#0:
-; ALL-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
-; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm3
-; ALL-NEXT: vpermpd {{.*#+}} ymm4 = ymm3[0,1,1,3]
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0,1],ymm4[2],ymm2[3]
-; ALL-NEXT: vpermilpd {{.*#+}} ymm4 = ymm1[0,0,2,2]
-; ALL-NEXT: vblendpd {{.*#+}} ymm2 = ymm2[0,1,2],ymm4[3]
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm2
+; ALL-NEXT: vblendpd {{.*#+}} ymm3 = ymm0[0],ymm2[1],ymm0[2,3]
+; ALL-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[2,3,1,3]
+; ALL-NEXT: vmovddup {{.*#+}} ymm4 = ymm1[0,0,2,2]
+; ALL-NEXT: vblendpd {{.*#+}} ymm3 = ymm3[0,1,2],ymm4[3]
; ALL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,1]
-; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm3[1],ymm0[2,3]
-; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm1
+; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2,3]
+; ALL-NEXT: vextractf64x4 $1, %zmm1, %ymm1
; ALL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3]
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
-; ALL-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT: vinsertf64x4 $1, %ymm3, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
ret <8 x double> %shuffle
@@ -766,7 +764,7 @@ define <8 x i64> @shuffle_v8i64_00003000(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00040000(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00040000:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; ALL-NEXT: vpbroadcastq %xmm1, %ymm1
; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5],ymm1[6,7]
@@ -779,10 +777,10 @@ define <8 x i64> @shuffle_v8i64_00040000(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_00500000(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00500000:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
+; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,0]
; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
-; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -792,10 +790,10 @@ define <8 x i64> @shuffle_v8i64_00500000(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_06000000(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_06000000:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
+; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,0,0]
; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
-; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -805,10 +803,10 @@ define <8 x i64> @shuffle_v8i64_06000000(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_70000000:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[3,1,2,3]
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5],ymm1[6,7]
+; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[3,0,0,0]
; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
-; ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1],ymm0[2,3,4,5,6,7]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -818,7 +816,7 @@ define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_01014545(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_01014545:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1
; ALL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
@@ -852,8 +850,8 @@ define <8 x i64> @shuffle_v8i64_00001111(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_81a3c5e7(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_81a3c5e7:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
; ALL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
@@ -877,9 +875,9 @@ define <8 x i64> @shuffle_v8i64_08080808(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_08084c4c(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_08084c4c:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; ALL-NEXT: vinserti128 $1, %xmm2, %ymm2, %ymm2
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
+; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
; ALL-NEXT: vpbroadcastq %xmm3, %ymm3
; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm3[2,3],ymm2[4,5],ymm3[6,7]
; ALL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
@@ -894,8 +892,8 @@ define <8 x i64> @shuffle_v8i64_08084c4c(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_8823cc67(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_8823cc67:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
; ALL-NEXT: vpbroadcastq %xmm3, %ymm3
; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
; ALL-NEXT: vpbroadcastq %xmm1, %ymm1
@@ -939,9 +937,9 @@ define <8 x i64> @shuffle_v8i64_9810dc54(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_08194c5d(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_08194c5d:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
+; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; ALL-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,0,2,1]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; ALL-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,1,1,3]
; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
@@ -956,9 +954,9 @@ define <8 x i64> @shuffle_v8i64_08194c5d(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_2a3b6e7f(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_2a3b6e7f:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
+; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; ALL-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm3
; ALL-NEXT: vpermq {{.*#+}} ymm3 = ymm3[2,1,3,3]
; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
@@ -988,12 +986,11 @@ define <8 x i64> @shuffle_v8i64_08192a3b(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_08991abb(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_08991abb:
; ALL: # BB#0:
-; ALL-NEXT: vpshufd {{.*#+}} ymm2 = ymm0[2,3,2,3,6,7,6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm3 = ymm1[0,2,3,3]
-; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm3[2,3,4,5,6,7]
-; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,1]
-; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
-; ALL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT: vpermq {{.*#+}} ymm2 = ymm1[0,0,1,1]
+; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm0[0,1],ymm2[2,3,4,5,6,7]
+; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5,6,7]
+; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,3]
+; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
ret <8 x i64> %shuffle
@@ -1002,7 +999,7 @@ define <8 x i64> @shuffle_v8i64_08991abb(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_091b2d3f(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_091b2d3f:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
+; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; ALL-NEXT: vpermq {{.*#+}} ymm3 = ymm0[2,1,3,3]
; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
@@ -1016,7 +1013,7 @@ define <8 x i64> @shuffle_v8i64_091b2d3f(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_09ab1def(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_09ab1def:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
+; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; ALL-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[2,3,2,3,6,7,6,7]
; ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3,4,5,6,7]
; ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
@@ -1030,7 +1027,7 @@ define <8 x i64> @shuffle_v8i64_00014445(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00014445:
; ALL: # BB#0:
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,0,1]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1042,7 +1039,7 @@ define <8 x i64> @shuffle_v8i64_00204464(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00204464:
; ALL: # BB#0:
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,2,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1054,7 +1051,7 @@ define <8 x i64> @shuffle_v8i64_03004744(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_03004744:
; ALL: # BB#0:
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,3,0,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1066,7 +1063,7 @@ define <8 x i64> @shuffle_v8i64_10005444(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_10005444:
; ALL: # BB#0:
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[1,0,0,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1078,7 +1075,7 @@ define <8 x i64> @shuffle_v8i64_22006644(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_22006644:
; ALL: # BB#0:
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,2,0,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1090,7 +1087,7 @@ define <8 x i64> @shuffle_v8i64_33307774(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_33307774:
; ALL: # BB#0:
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[3,3,3,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1102,7 +1099,7 @@ define <8 x i64> @shuffle_v8i64_32107654(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_32107654:
; ALL: # BB#0:
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[3,2,1,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1114,7 +1111,7 @@ define <8 x i64> @shuffle_v8i64_00234467(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00234467:
; ALL: # BB#0:
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,2,3]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,3]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1126,7 +1123,7 @@ define <8 x i64> @shuffle_v8i64_00224466(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00224466:
; ALL: # BB#0:
; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1138,7 +1135,7 @@ define <8 x i64> @shuffle_v8i64_10325476(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_10325476:
; ALL: # BB#0:
; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1150,7 +1147,7 @@ define <8 x i64> @shuffle_v8i64_11335577(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_11335577:
; ALL: # BB#0:
; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1162,7 +1159,7 @@ define <8 x i64> @shuffle_v8i64_10235467(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_10235467:
; ALL: # BB#0:
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[1,0,2,3]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,2,3]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1174,7 +1171,7 @@ define <8 x i64> @shuffle_v8i64_10225466(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_10225466:
; ALL: # BB#0:
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[1,0,2,2]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,2,2]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1186,7 +1183,7 @@ define <8 x i64> @shuffle_v8i64_00015444(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00015444:
; ALL: # BB#0:
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,0,1]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1198,7 +1195,7 @@ define <8 x i64> @shuffle_v8i64_00204644(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00204644:
; ALL: # BB#0:
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,2,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,0]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1210,7 +1207,7 @@ define <8 x i64> @shuffle_v8i64_03004474(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_03004474:
; ALL: # BB#0:
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,3,0,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,3,0]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1222,7 +1219,7 @@ define <8 x i64> @shuffle_v8i64_10004444(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_10004444:
; ALL: # BB#0:
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[1,0,0,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpbroadcastq %xmm0, %ymm0
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1234,7 +1231,7 @@ define <8 x i64> @shuffle_v8i64_22006446(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_22006446:
; ALL: # BB#0:
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,2,0,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,0,0,2]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1246,7 +1243,7 @@ define <8 x i64> @shuffle_v8i64_33307474(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_33307474:
; ALL: # BB#0:
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[3,3,3,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,3,0]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1258,7 +1255,7 @@ define <8 x i64> @shuffle_v8i64_32104567(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_32104567:
; ALL: # BB#0:
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[3,2,1,0]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
@@ -1269,7 +1266,7 @@ define <8 x i64> @shuffle_v8i64_00236744(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00236744:
; ALL: # BB#0:
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,0,2,3]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,0]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1281,7 +1278,7 @@ define <8 x i64> @shuffle_v8i64_00226644(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00226644:
; ALL: # BB#0:
; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1293,7 +1290,7 @@ define <8 x i64> @shuffle_v8i64_10324567(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_10324567:
; ALL: # BB#0:
; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
@@ -1304,7 +1301,7 @@ define <8 x i64> @shuffle_v8i64_11334567(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_11334567:
; ALL: # BB#0:
; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -1314,7 +1311,7 @@ define <8 x i64> @shuffle_v8i64_11334567(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_01235467(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_01235467:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,0,2,3]
; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq
@@ -1325,7 +1322,7 @@ define <8 x i64> @shuffle_v8i64_01235467(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_01235466(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_01235466:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; ALL-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,0,2,2]
; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq
@@ -1337,7 +1334,7 @@ define <8 x i64> @shuffle_v8i64_002u6u44(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_002u6u44:
; ALL: # BB#0:
; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,0,0]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1349,7 +1346,7 @@ define <8 x i64> @shuffle_v8i64_00uu66uu(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_00uu66uu:
; ALL: # BB#0:
; ALL-NEXT: vpbroadcastq %xmm0, %ymm1
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
@@ -1361,7 +1358,7 @@ define <8 x i64> @shuffle_v8i64_103245uu(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_103245uu:
; ALL: # BB#0:
; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
@@ -1372,7 +1369,7 @@ define <8 x i64> @shuffle_v8i64_1133uu67(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_1133uu67:
; ALL: # BB#0:
; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
@@ -1382,7 +1379,7 @@ define <8 x i64> @shuffle_v8i64_1133uu67(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_0uu354uu(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_0uu354uu:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[2,3,0,1,6,7,4,5]
; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq
@@ -1393,7 +1390,7 @@ define <8 x i64> @shuffle_v8i64_0uu354uu(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_uuu3uu66(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_uuu3uu66:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; ALL-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq
@@ -1404,7 +1401,7 @@ define <8 x i64> @shuffle_v8i64_uuu3uu66(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_6caa87e5(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_6caa87e5:
; ALL: # BB#0:
-; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; ALL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
; ALL-NEXT: vextracti64x4 $1, %zmm1, %ymm2
; ALL-NEXT: vpblendd {{.*#+}} ymm3 = ymm1[0,1,2,3],ymm2[4,5],ymm1[6,7]
diff --git a/test/CodeGen/X86/vector-shuffle-combining.ll b/test/CodeGen/X86/vector-shuffle-combining.ll
index 4e2bf87fdf64..92c59e2fca08 100644
--- a/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -275,16 +275,18 @@ define <4 x i32> @combine_bitwise_ops_test6(<4 x i32> %a, <4 x i32> %b, <4 x i32
define <4 x i32> @combine_bitwise_ops_test1b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; SSE2-LABEL: combine_bitwise_ops_test1b:
; SSE2: # BB#0:
-; SSE2-NEXT: andps %xmm1, %xmm0
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_bitwise_ops_test1b:
; SSSE3: # BB#0:
-; SSSE3-NEXT: andps %xmm1, %xmm0
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSSE3-NEXT: pand %xmm1, %xmm0
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_bitwise_ops_test1b:
@@ -313,16 +315,18 @@ define <4 x i32> @combine_bitwise_ops_test1b(<4 x i32> %a, <4 x i32> %b, <4 x i3
define <4 x i32> @combine_bitwise_ops_test2b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; SSE2-LABEL: combine_bitwise_ops_test2b:
; SSE2: # BB#0:
-; SSE2-NEXT: orps %xmm1, %xmm0
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_bitwise_ops_test2b:
; SSSE3: # BB#0:
-; SSSE3-NEXT: orps %xmm1, %xmm0
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSSE3-NEXT: por %xmm1, %xmm0
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_bitwise_ops_test2b:
@@ -352,17 +356,13 @@ define <4 x i32> @combine_bitwise_ops_test3b(<4 x i32> %a, <4 x i32> %b, <4 x i3
; SSE2-LABEL: combine_bitwise_ops_test3b:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm1, %xmm0
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_bitwise_ops_test3b:
; SSSE3: # BB#0:
; SSSE3-NEXT: xorps %xmm1, %xmm0
-; SSSE3-NEXT: xorps %xmm1, %xmm1
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_bitwise_ops_test3b:
@@ -394,18 +394,18 @@ define <4 x i32> @combine_bitwise_ops_test3b(<4 x i32> %a, <4 x i32> %b, <4 x i3
define <4 x i32> @combine_bitwise_ops_test4b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; SSE2-LABEL: combine_bitwise_ops_test4b:
; SSE2: # BB#0:
-; SSE2-NEXT: andps %xmm1, %xmm0
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
-; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_bitwise_ops_test4b:
; SSSE3: # BB#0:
-; SSSE3-NEXT: andps %xmm1, %xmm0
-; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
-; SSSE3-NEXT: movaps %xmm2, %xmm0
+; SSSE3-NEXT: pand %xmm1, %xmm0
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_bitwise_ops_test4b:
@@ -434,18 +434,18 @@ define <4 x i32> @combine_bitwise_ops_test4b(<4 x i32> %a, <4 x i32> %b, <4 x i3
define <4 x i32> @combine_bitwise_ops_test5b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; SSE2-LABEL: combine_bitwise_ops_test5b:
; SSE2: # BB#0:
-; SSE2-NEXT: orps %xmm1, %xmm0
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
-; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_bitwise_ops_test5b:
; SSSE3: # BB#0:
-; SSSE3-NEXT: orps %xmm1, %xmm0
-; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
-; SSSE3-NEXT: movaps %xmm2, %xmm0
+; SSSE3-NEXT: por %xmm1, %xmm0
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_bitwise_ops_test5b:
@@ -475,19 +475,13 @@ define <4 x i32> @combine_bitwise_ops_test6b(<4 x i32> %a, <4 x i32> %b, <4 x i3
; SSE2-LABEL: combine_bitwise_ops_test6b:
; SSE2: # BB#0:
; SSE2-NEXT: xorps %xmm1, %xmm0
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_bitwise_ops_test6b:
; SSSE3: # BB#0:
; SSSE3-NEXT: xorps %xmm1, %xmm0
-; SSSE3-NEXT: xorps %xmm1, %xmm1
-; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
-; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: andps {{.*}}(%rip), %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_bitwise_ops_test6b:
@@ -517,17 +511,42 @@ define <4 x i32> @combine_bitwise_ops_test6b(<4 x i32> %a, <4 x i32> %b, <4 x i3
}
define <4 x i32> @combine_bitwise_ops_test1c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-; SSE-LABEL: combine_bitwise_ops_test1c:
-; SSE: # BB#0:
-; SSE-NEXT: andps %xmm1, %xmm0
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
-; SSE-NEXT: retq
+; SSE2-LABEL: combine_bitwise_ops_test1c:
+; SSE2: # BB#0:
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: retq
;
-; AVX-LABEL: combine_bitwise_ops_test1c:
-; AVX: # BB#0:
-; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
-; AVX-NEXT: retq
+; SSSE3-LABEL: combine_bitwise_ops_test1c:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pand %xmm1, %xmm0
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: combine_bitwise_ops_test1c:
+; SSE41: # BB#0:
+; SSE41-NEXT: pand %xmm1, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: combine_bitwise_ops_test1c:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_bitwise_ops_test1c:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX2-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
%and = and <4 x i32> %shuf1, %shuf2
@@ -535,17 +554,42 @@ define <4 x i32> @combine_bitwise_ops_test1c(<4 x i32> %a, <4 x i32> %b, <4 x i3
}
define <4 x i32> @combine_bitwise_ops_test2c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-; SSE-LABEL: combine_bitwise_ops_test2c:
-; SSE: # BB#0:
-; SSE-NEXT: orps %xmm1, %xmm0
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
-; SSE-NEXT: retq
+; SSE2-LABEL: combine_bitwise_ops_test2c:
+; SSE2: # BB#0:
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: retq
;
-; AVX-LABEL: combine_bitwise_ops_test2c:
-; AVX: # BB#0:
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
-; AVX-NEXT: retq
+; SSSE3-LABEL: combine_bitwise_ops_test2c:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: por %xmm1, %xmm0
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: combine_bitwise_ops_test2c:
+; SSE41: # BB#0:
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: combine_bitwise_ops_test2c:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_bitwise_ops_test2c:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX2-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
%or = or <4 x i32> %shuf1, %shuf2
@@ -554,29 +598,33 @@ define <4 x i32> @combine_bitwise_ops_test2c(<4 x i32> %a, <4 x i32> %b, <4 x i3
define <4 x i32> @combine_bitwise_ops_test3c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; SSE2-LABEL: combine_bitwise_ops_test3c:
-; SSE2: # BB#0:
-; SSE2-NEXT: xorps %xmm1, %xmm0
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
-; SSE2-NEXT: retq
+; SSE2: # BB#0:
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_bitwise_ops_test3c:
-; SSSE3: # BB#0:
-; SSSE3-NEXT: xorps %xmm1, %xmm0
-; SSSE3-NEXT: xorps %xmm1, %xmm1
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
-; SSSE3-NEXT: retq
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pxor %xmm1, %xmm0
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_bitwise_ops_test3c:
-; SSE41: # BB#0:
-; SSE41-NEXT: xorps %xmm1, %xmm0
-; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
-; SSE41-NEXT: retq
+; SSE41: # BB#0:
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE41-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
+; SSE41-NEXT: retq
;
; AVX-LABEL: combine_bitwise_ops_test3c:
; AVX: # BB#0:
-; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
@@ -585,18 +633,42 @@ define <4 x i32> @combine_bitwise_ops_test3c(<4 x i32> %a, <4 x i32> %b, <4 x i3
}
define <4 x i32> @combine_bitwise_ops_test4c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-; SSE-LABEL: combine_bitwise_ops_test4c:
-; SSE: # BB#0:
-; SSE-NEXT: andps %xmm1, %xmm0
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
-; SSE-NEXT: movaps %xmm2, %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: combine_bitwise_ops_test4c:
+; SSE2: # BB#0:
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: retq
;
-; AVX-LABEL: combine_bitwise_ops_test4c:
-; AVX: # BB#0:
-; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm2[0,2],xmm0[1,3]
-; AVX-NEXT: retq
+; SSSE3-LABEL: combine_bitwise_ops_test4c:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pand %xmm1, %xmm0
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: combine_bitwise_ops_test4c:
+; SSE41: # BB#0:
+; SSE41-NEXT: pand %xmm1, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: combine_bitwise_ops_test4c:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_bitwise_ops_test4c:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX2-NEXT: retq
%shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
%shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
%and = and <4 x i32> %shuf1, %shuf2
@@ -604,18 +676,42 @@ define <4 x i32> @combine_bitwise_ops_test4c(<4 x i32> %a, <4 x i32> %b, <4 x i3
}
define <4 x i32> @combine_bitwise_ops_test5c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-; SSE-LABEL: combine_bitwise_ops_test5c:
-; SSE: # BB#0:
-; SSE-NEXT: orps %xmm1, %xmm0
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
-; SSE-NEXT: movaps %xmm2, %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: combine_bitwise_ops_test5c:
+; SSE2: # BB#0:
+; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: retq
;
-; AVX-LABEL: combine_bitwise_ops_test5c:
-; AVX: # BB#0:
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm2[0,2],xmm0[1,3]
-; AVX-NEXT: retq
+; SSSE3-LABEL: combine_bitwise_ops_test5c:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: por %xmm1, %xmm0
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: combine_bitwise_ops_test5c:
+; SSE41: # BB#0:
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: combine_bitwise_ops_test5c:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_bitwise_ops_test5c:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX2-NEXT: retq
%shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
%shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
%or = or <4 x i32> %shuf1, %shuf2
@@ -623,20 +719,45 @@ define <4 x i32> @combine_bitwise_ops_test5c(<4 x i32> %a, <4 x i32> %b, <4 x i3
}
define <4 x i32> @combine_bitwise_ops_test6c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-; SSE-LABEL: combine_bitwise_ops_test6c:
-; SSE: # BB#0:
-; SSE-NEXT: xorps %xmm1, %xmm0
-; SSE-NEXT: xorps %xmm1, %xmm1
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
-; SSE-NEXT: movaps %xmm1, %xmm0
-; SSE-NEXT: retq
+; SSE2-LABEL: combine_bitwise_ops_test6c:
+; SSE2: # BB#0:
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
+; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: retq
;
-; AVX-LABEL: combine_bitwise_ops_test6c:
-; AVX: # BB#0:
-; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[1,3]
-; AVX-NEXT: retq
+; SSSE3-LABEL: combine_bitwise_ops_test6c:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: pxor %xmm1, %xmm0
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
+; SSSE3-NEXT: pxor %xmm0, %xmm0
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: combine_bitwise_ops_test6c:
+; SSE41: # BB#0:
+; SSE41-NEXT: pxor %xmm1, %xmm0
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
+; SSE41-NEXT: pxor %xmm0, %xmm0
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: combine_bitwise_ops_test6c:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_bitwise_ops_test6c:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX2-NEXT: retq
%shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
%shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
%xor = xor <4 x i32> %shuf1, %shuf2
@@ -867,19 +988,40 @@ define <4 x i32> @combine_nested_undef_test14(<4 x i32> %A, <4 x i32> %B) {
; it.
define <4 x i32> @combine_nested_undef_test15(<4 x i32> %A, <4 x i32> %B) {
-; SSE-LABEL: combine_nested_undef_test15:
-; SSE: # BB#0:
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,1]
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,1,0,3]
-; SSE-NEXT: retq
+; SSE2-LABEL: combine_nested_undef_test15:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[0,1]
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: retq
;
-; AVX-LABEL: combine_nested_undef_test15:
-; AVX: # BB#0:
-; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[2,0],xmm0[3,1]
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
-; AVX-NEXT: retq
+; SSSE3-LABEL: combine_nested_undef_test15:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[0,1]
+; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: combine_nested_undef_test15:
+; SSE41: # BB#0:
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: combine_nested_undef_test15:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_nested_undef_test15:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
+; AVX2-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 3, i32 1>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
ret <4 x i32> %2
@@ -888,34 +1030,34 @@ define <4 x i32> @combine_nested_undef_test15(<4 x i32> %A, <4 x i32> %B) {
define <4 x i32> @combine_nested_undef_test16(<4 x i32> %A, <4 x i32> %B) {
; SSE2-LABEL: combine_nested_undef_test16:
; SSE2: # BB#0:
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_nested_undef_test16:
; SSSE3: # BB#0:
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_nested_undef_test16:
; SSE41: # BB#0:
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
; SSE41-NEXT: retq
;
; AVX1-LABEL: combine_nested_undef_test16:
; AVX1: # BB#0:
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_nested_undef_test16:
; AVX2: # BB#0:
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
; AVX2-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
@@ -923,19 +1065,35 @@ define <4 x i32> @combine_nested_undef_test16(<4 x i32> %A, <4 x i32> %B) {
}
define <4 x i32> @combine_nested_undef_test17(<4 x i32> %A, <4 x i32> %B) {
-; SSE-LABEL: combine_nested_undef_test17:
-; SSE: # BB#0:
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[3,1]
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,1,0,3]
-; SSE-NEXT: retq
+; SSE2-LABEL: combine_nested_undef_test17:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1],xmm1[0,2]
+; SSE2-NEXT: retq
;
-; AVX-LABEL: combine_nested_undef_test17:
-; AVX: # BB#0:
-; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[3,1]
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
-; AVX-NEXT: retq
+; SSSE3-LABEL: combine_nested_undef_test17:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1],xmm1[0,2]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: combine_nested_undef_test17:
+; SSE41: # BB#0:
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: combine_nested_undef_test17:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_nested_undef_test17:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
+; AVX2-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 3, i32 1>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
ret <4 x i32> %2
@@ -957,55 +1115,107 @@ define <4 x i32> @combine_nested_undef_test18(<4 x i32> %A, <4 x i32> %B) {
}
define <4 x i32> @combine_nested_undef_test19(<4 x i32> %A, <4 x i32> %B) {
-; SSE-LABEL: combine_nested_undef_test19:
-; SSE: # BB#0:
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,0,0]
-; SSE-NEXT: retq
+; SSE2-LABEL: combine_nested_undef_test19:
+; SSE2: # BB#0:
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,0,0,0]
+; SSE2-NEXT: retq
;
-; AVX-LABEL: combine_nested_undef_test19:
-; AVX: # BB#0:
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,0,0,0]
-; AVX-NEXT: retq
+; SSSE3-LABEL: combine_nested_undef_test19:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,0,0,0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: combine_nested_undef_test19:
+; SSE41: # BB#0:
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: combine_nested_undef_test19:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_nested_undef_test19:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
+; AVX2-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
ret <4 x i32> %2
}
define <4 x i32> @combine_nested_undef_test20(<4 x i32> %A, <4 x i32> %B) {
-; SSE-LABEL: combine_nested_undef_test20:
-; SSE: # BB#0:
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2],xmm1[0,0]
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
-; SSE-NEXT: retq
+; SSE2-LABEL: combine_nested_undef_test20:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1]
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: retq
;
-; AVX-LABEL: combine_nested_undef_test20:
-; AVX: # BB#0:
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2],xmm1[0,0]
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
-; AVX-NEXT: retq
+; SSSE3-LABEL: combine_nested_undef_test20:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,3]
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1]
+; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: combine_nested_undef_test20:
+; SSE41: # BB#0:
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,3,0]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: combine_nested_undef_test20:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,3,0]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_nested_undef_test20:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,3,0]
+; AVX2-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 3, i32 2, i32 4, i32 4>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
ret <4 x i32> %2
}
define <4 x i32> @combine_nested_undef_test21(<4 x i32> %A, <4 x i32> %B) {
-; SSE-LABEL: combine_nested_undef_test21:
-; SSE: # BB#0:
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[3,1]
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,3]
-; SSE-NEXT: retq
+; SSE2-LABEL: combine_nested_undef_test21:
+; SSE2: # BB#0:
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,0,3]
+; SSE2-NEXT: retq
;
-; AVX-LABEL: combine_nested_undef_test21:
-; AVX: # BB#0:
-; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[3,1]
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
-; AVX-NEXT: retq
+; SSSE3-LABEL: combine_nested_undef_test21:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,0,3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: combine_nested_undef_test21:
+; SSE41: # BB#0:
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: combine_nested_undef_test21:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_nested_undef_test21:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
+; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
+; AVX2-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 3, i32 1>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 3>
ret <4 x i32> %2
@@ -1148,13 +1358,13 @@ define <4 x float> @combine_test1(<4 x float> %a, <4 x float> %b) {
define <4 x float> @combine_test2(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_test2:
; SSE2: # BB#0:
-; SSE2-NEXT: movss %xmm0, %xmm1
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_test2:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movss %xmm0, %xmm1
+; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -1206,22 +1416,14 @@ define <4 x float> @combine_test4(<4 x float> %a, <4 x float> %b) {
define <4 x float> @combine_test5(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_test5:
; SSE2: # BB#0:
-; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm2[2,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
-; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_test5:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movaps %xmm1, %xmm2
-; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm2[2,0]
-; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
-; SSSE3-NEXT: movaps %xmm2, %xmm0
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_test5:
@@ -1256,13 +1458,13 @@ define <4 x i32> @combine_test6(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @combine_test7(<4 x i32> %a, <4 x i32> %b) {
; SSE2-LABEL: combine_test7:
; SSE2: # BB#0:
-; SSE2-NEXT: movss %xmm0, %xmm1
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_test7:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movss %xmm0, %xmm1
+; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -1319,22 +1521,14 @@ define <4 x i32> @combine_test9(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @combine_test10(<4 x i32> %a, <4 x i32> %b) {
; SSE2-LABEL: combine_test10:
; SSE2: # BB#0:
-; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm2[2,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
-; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_test10:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movaps %xmm1, %xmm2
-; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm2[2,0]
-; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
-; SSSE3-NEXT: movaps %xmm2, %xmm0
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_test10:
@@ -1368,13 +1562,13 @@ define <4 x float> @combine_test11(<4 x float> %a, <4 x float> %b) {
define <4 x float> @combine_test12(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_test12:
; SSE2: # BB#0:
-; SSE2-NEXT: movss %xmm0, %xmm1
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_test12:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movss %xmm0, %xmm1
+; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -1425,20 +1619,14 @@ define <4 x float> @combine_test14(<4 x float> %a, <4 x float> %b) {
define <4 x float> @combine_test15(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_test15:
; SSE2: # BB#0:
-; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[0,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[0,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[2,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_test15:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movaps %xmm0, %xmm2
-; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[0,0]
-; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[0,0]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[2,3]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_test15:
@@ -1467,13 +1655,13 @@ define <4 x i32> @combine_test16(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @combine_test17(<4 x i32> %a, <4 x i32> %b) {
; SSE2-LABEL: combine_test17:
; SSE2: # BB#0:
-; SSE2-NEXT: movss %xmm0, %xmm1
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_test17:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movss %xmm0, %xmm1
+; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -1529,20 +1717,14 @@ define <4 x i32> @combine_test19(<4 x i32> %a, <4 x i32> %b) {
define <4 x i32> @combine_test20(<4 x i32> %a, <4 x i32> %b) {
; SSE2-LABEL: combine_test20:
; SSE2: # BB#0:
-; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[0,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[0,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[2,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_test20:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movaps %xmm0, %xmm2
-; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[0,0]
-; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[0,0]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[2,3]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_test20:
@@ -1568,26 +1750,26 @@ define <4 x i32> @combine_test21(<8 x i32> %a, <4 x i32>* %ptr) {
; SSE-LABEL: combine_test21:
; SSE: # BB#0:
; SSE-NEXT: movdqa %xmm0, %xmm2
-; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
-; SSE-NEXT: movdqa %xmm2,
+; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; SSE-NEXT: movdqa %xmm2, (%rdi)
; SSE-NEXT: retq
;
; AVX1-LABEL: combine_test21:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm0[0],xmm1[0]
-; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
-; AVX1-NEXT: movdqa %xmm2,
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm0[0],xmm1[0]
+; AVX1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX1-NEXT: vmovdqa %xmm2, (%rdi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_test21:
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm0[0],xmm1[0]
-; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
-; AVX2-NEXT: movdqa %xmm2,
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm0[0],xmm1[0]
+; AVX2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX2-NEXT: vmovdqa %xmm2, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%1 = shufflevector <8 x i32> %a, <8 x i32> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
@@ -1599,19 +1781,18 @@ define <4 x i32> @combine_test21(<8 x i32> %a, <4 x i32>* %ptr) {
define <8 x float> @combine_test22(<2 x float>* %a, <2 x float>* %b) {
; SSE-LABEL: combine_test22:
; SSE: # BB#0:
-; SSE-NEXT: movq (%rdi), %xmm0
-; SSE-NEXT: movhpd (%rsi), %xmm0
+; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE-NEXT: movhpd (%rsi), %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: combine_test22:
-; AVX1: # BB#0:
-; AVX1-NEXT: vmovq (%rdi), %xmm0
-; AVX1-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
-; AVX1-NEXT: retq
-;
+; AVX-LABEL: combine_test22:
+; AVX: # BB#0:
+; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT: vmovhpd (%rsi), %xmm0, %xmm0
+; AVX-NEXT: retq
; Current AVX2 lowering of this is still awful, not adding a test case.
- %1 = load <2 x float>* %a, align 8
- %2 = load <2 x float>* %b, align 8
+ %1 = load <2 x float>, <2 x float>* %a, align 8
+ %2 = load <2 x float>, <2 x float>* %b, align 8
%3 = shufflevector <2 x float> %1, <2 x float> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x float> %3
}
@@ -1644,19 +1825,17 @@ define <4 x float> @combine_test2b(<4 x float> %a, <4 x float> %b) {
;
; SSSE3-LABEL: combine_test2b:
; SSSE3: # BB#0:
-; SSSE3-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
-; SSSE3-NEXT: movapd %xmm1, %xmm0
+; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_test2b:
; SSE41: # BB#0:
-; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
-; SSE41-NEXT: movapd %xmm1, %xmm0
+; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_test2b:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0,0]
+; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0]
; AVX-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 0, i32 5>
@@ -1664,21 +1843,28 @@ define <4 x float> @combine_test2b(<4 x float> %a, <4 x float> %b) {
}
define <4 x float> @combine_test3b(<4 x float> %a, <4 x float> %b) {
-; SSE-LABEL: combine_test3b:
-; SSE: # BB#0:
-; SSE-NEXT: movaps %xmm1, %xmm2
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm0[3,0]
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm2[0,2]
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[3,3]
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; SSE-NEXT: retq
+; SSE2-LABEL: combine_test3b:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: combine_test3b:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: combine_test3b:
+; SSE41: # BB#0:
+; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,2,3]
+; SSE41-NEXT: retq
;
; AVX-LABEL: combine_test3b:
; AVX: # BB#0:
-; AVX-NEXT: vshufps {{.*#+}} xmm2 = xmm1[2,0],xmm0[3,0]
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm2[0,2]
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[3,3]
-; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,3,2,3]
; AVX-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 6, i32 3>
%2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 7>
@@ -1707,48 +1893,48 @@ define <4 x float> @combine_test4b(<4 x float> %a, <4 x float> %b) {
define <4 x i8> @combine_test1c(<4 x i8>* %a, <4 x i8>* %b) {
; SSE2-LABEL: combine_test1c:
; SSE2: # BB#0:
-; SSE2-NEXT: movd (%rdi), %xmm1
+; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: movd (%rsi), %xmm0
+; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: movss %xmm1, %xmm0
+; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_test1c:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movd (%rdi), %xmm1
+; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT: movd (%rsi), %xmm0
+; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT: movss %xmm1, %xmm0
+; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_test1c:
; SSE41: # BB#0:
-; SSE41-NEXT: pmovzxbd (%rdi), %xmm1
-; SSE41-NEXT: pmovzxbd (%rsi), %xmm0
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: combine_test1c:
; AVX1: # BB#0:
-; AVX1-NEXT: vpmovzxbd (%rdi), %xmm0
-; AVX1-NEXT: vpmovzxbd (%rsi), %xmm1
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_test1c:
; AVX2: # BB#0:
-; AVX2-NEXT: vpmovzxbd (%rdi), %xmm0
-; AVX2-NEXT: vpmovzxbd (%rsi), %xmm1
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX2-NEXT: retq
- %A = load <4 x i8>* %a
- %B = load <4 x i8>* %b
+ %A = load <4 x i8>, <4 x i8>* %a
+ %B = load <4 x i8>, <4 x i8>* %b
%1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
%2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
ret <4 x i8> %2
@@ -1757,10 +1943,10 @@ define <4 x i8> @combine_test1c(<4 x i8>* %a, <4 x i8>* %b) {
define <4 x i8> @combine_test2c(<4 x i8>* %a, <4 x i8>* %b) {
; SSE2-LABEL: combine_test2c:
; SSE2: # BB#0:
-; SSE2-NEXT: movd (%rdi), %xmm0
+; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: movd (%rsi), %xmm1
+; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
@@ -1768,10 +1954,10 @@ define <4 x i8> @combine_test2c(<4 x i8>* %a, <4 x i8>* %b) {
;
; SSSE3-LABEL: combine_test2c:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movd (%rdi), %xmm0
+; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT: movd (%rsi), %xmm1
+; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
@@ -1779,19 +1965,19 @@ define <4 x i8> @combine_test2c(<4 x i8>* %a, <4 x i8>* %b) {
;
; SSE41-LABEL: combine_test2c:
; SSE41: # BB#0:
-; SSE41-NEXT: pmovzxbd (%rdi), %xmm0
-; SSE41-NEXT: pmovzxbd (%rsi), %xmm1
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_test2c:
; AVX: # BB#0:
-; AVX-NEXT: vpmovzxbd (%rdi), %xmm0
-; AVX-NEXT: vpmovzxbd (%rsi), %xmm1
+; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
- %A = load <4 x i8>* %a
- %B = load <4 x i8>* %b
+ %A = load <4 x i8>, <4 x i8>* %a
+ %B = load <4 x i8>, <4 x i8>* %b
%1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 1, i32 5>
%2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
ret <4 x i8> %2
@@ -1800,10 +1986,10 @@ define <4 x i8> @combine_test2c(<4 x i8>* %a, <4 x i8>* %b) {
define <4 x i8> @combine_test3c(<4 x i8>* %a, <4 x i8>* %b) {
; SSE2-LABEL: combine_test3c:
; SSE2: # BB#0:
-; SSE2-NEXT: movd (%rdi), %xmm1
+; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: movd (%rsi), %xmm0
+; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
@@ -1811,10 +1997,10 @@ define <4 x i8> @combine_test3c(<4 x i8>* %a, <4 x i8>* %b) {
;
; SSSE3-LABEL: combine_test3c:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movd (%rdi), %xmm1
+; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT: movd (%rsi), %xmm0
+; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSSE3-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
@@ -1822,19 +2008,19 @@ define <4 x i8> @combine_test3c(<4 x i8>* %a, <4 x i8>* %b) {
;
; SSE41-LABEL: combine_test3c:
; SSE41: # BB#0:
-; SSE41-NEXT: pmovzxbd (%rdi), %xmm1
-; SSE41-NEXT: pmovzxbd (%rsi), %xmm0
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
; SSE41-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_test3c:
; AVX: # BB#0:
-; AVX-NEXT: vpmovzxbd (%rdi), %xmm0
-; AVX-NEXT: vpmovzxbd (%rsi), %xmm1
+; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
; AVX-NEXT: retq
- %A = load <4 x i8>* %a
- %B = load <4 x i8>* %b
+ %A = load <4 x i8>, <4 x i8>* %a
+ %B = load <4 x i8>, <4 x i8>* %b
%1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
%2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
ret <4 x i8> %2
@@ -1843,56 +2029,50 @@ define <4 x i8> @combine_test3c(<4 x i8>* %a, <4 x i8>* %b) {
define <4 x i8> @combine_test4c(<4 x i8>* %a, <4 x i8>* %b) {
; SSE2-LABEL: combine_test4c:
; SSE2: # BB#0:
-; SSE2-NEXT: movd (%rdi), %xmm1
+; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: movd (%rsi), %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT: movdqa %xmm2, %xmm0
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm0[2,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_test4c:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movd (%rdi), %xmm1
+; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT: movd (%rsi), %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT: movdqa %xmm2, %xmm0
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm0[2,0]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_test4c:
; SSE41: # BB#0:
-; SSE41-NEXT: pmovzxbd (%rdi), %xmm1
-; SSE41-NEXT: pmovzxbd (%rsi), %xmm0
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
; SSE41-NEXT: retq
;
; AVX1-LABEL: combine_test4c:
; AVX1: # BB#0:
-; AVX1-NEXT: vpmovzxbd (%rdi), %xmm0
-; AVX1-NEXT: vpmovzxbd (%rsi), %xmm1
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
; AVX1-NEXT: retq
;
; AVX2-LABEL: combine_test4c:
; AVX2: # BB#0:
-; AVX2-NEXT: vpmovzxbd (%rdi), %xmm0
-; AVX2-NEXT: vpmovzxbd (%rsi), %xmm1
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
; AVX2-NEXT: retq
- %A = load <4 x i8>* %a
- %B = load <4 x i8>* %b
+ %A = load <4 x i8>, <4 x i8>* %a
+ %B = load <4 x i8>, <4 x i8>* %b
%1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
%2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
ret <4 x i8> %2
@@ -1931,12 +2111,12 @@ define <4 x i8> @combine_test4c(<4 x i8>* %a, <4 x i8>* %b) {
define <4 x float> @combine_blend_01(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_blend_01:
; SSE2: # BB#0:
-; SSE2-NEXT: movsd %xmm1, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_blend_01:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movsd %xmm1, %xmm0
+; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_blend_01:
@@ -1956,16 +2136,16 @@ define <4 x float> @combine_blend_01(<4 x float> %a, <4 x float> %b) {
define <4 x float> @combine_blend_02(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_blend_02:
; SSE2: # BB#0:
-; SSE2-NEXT: movss %xmm1, %xmm0
-; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
+; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_blend_02:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movss %xmm1, %xmm0
-; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
+; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_blend_02:
@@ -1985,13 +2165,13 @@ define <4 x float> @combine_blend_02(<4 x float> %a, <4 x float> %b) {
define <4 x float> @combine_blend_123(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_blend_123:
; SSE2: # BB#0:
-; SSE2-NEXT: movss %xmm0, %xmm1
+; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_blend_123:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movss %xmm0, %xmm1
+; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -2065,12 +2245,12 @@ define <4 x i32> @combine_test_movhl_3(<4 x i32> %a, <4 x i32> %b) {
define <4 x float> @combine_undef_input_test1(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_undef_input_test1:
; SSE2: # BB#0:
-; SSE2-NEXT: movsd %xmm1, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_undef_input_test1:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movsd %xmm1, %xmm0
+; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_undef_input_test1:
@@ -2136,14 +2316,14 @@ define <4 x float> @combine_undef_input_test4(<4 x float> %a, <4 x float> %b) {
define <4 x float> @combine_undef_input_test5(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_undef_input_test5:
; SSE2: # BB#0:
-; SSE2-NEXT: movsd %xmm0, %xmm1
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_undef_input_test5:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movsd %xmm0, %xmm1
-; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_undef_input_test5:
@@ -2181,17 +2361,17 @@ define <4 x float> @combine_undef_input_test7(<4 x float> %a) {
;
; SSSE3-LABEL: combine_undef_input_test7:
; SSSE3: # BB#0:
-; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_undef_input_test7:
; SSE41: # BB#0:
-; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_undef_input_test7:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
@@ -2206,17 +2386,17 @@ define <4 x float> @combine_undef_input_test8(<4 x float> %a) {
;
; SSSE3-LABEL: combine_undef_input_test8:
; SSSE3: # BB#0:
-; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_undef_input_test8:
; SSE41: # BB#0:
-; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_undef_input_test8:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
%2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
@@ -2250,12 +2430,12 @@ define <4 x float> @combine_undef_input_test10(<4 x float> %a) {
define <4 x float> @combine_undef_input_test11(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_undef_input_test11:
; SSE2: # BB#0:
-; SSE2-NEXT: movsd %xmm1, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_undef_input_test11:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movsd %xmm1, %xmm0
+; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_undef_input_test11:
@@ -2321,14 +2501,14 @@ define <4 x float> @combine_undef_input_test14(<4 x float> %a, <4 x float> %b) {
define <4 x float> @combine_undef_input_test15(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_undef_input_test15:
; SSE2: # BB#0:
-; SSE2-NEXT: movsd %xmm0, %xmm1
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_undef_input_test15:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movsd %xmm0, %xmm1
-; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSSE3-NEXT: movapd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_undef_input_test15:
@@ -2372,17 +2552,17 @@ define <4 x float> @combine_undef_input_test17(<4 x float> %a) {
;
; SSSE3-LABEL: combine_undef_input_test17:
; SSSE3: # BB#0:
-; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_undef_input_test17:
; SSE41: # BB#0:
-; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_undef_input_test17:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
%2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 5, i32 6, i32 0, i32 1>
@@ -2397,17 +2577,17 @@ define <4 x float> @combine_undef_input_test18(<4 x float> %a) {
;
; SSSE3-LABEL: combine_undef_input_test18:
; SSSE3: # BB#0:
-; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSSE3-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_undef_input_test18:
; SSE41: # BB#0:
-; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
; SSE41-NEXT: retq
;
; AVX-LABEL: combine_undef_input_test18:
; AVX: # BB#0:
-; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
%2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 0, i32 5>
@@ -2499,6 +2679,20 @@ define <8 x i32> @combine_unneeded_subvector2(<8 x i32> %a, <8 x i32> %b) {
}
define <4 x float> @combine_insertps1(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: combine_insertps1:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3]
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: combine_insertps1:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3]
+; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
; SSE41-LABEL: combine_insertps1:
; SSE41: # BB#0:
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm1[2],xmm0[1,2,3]
@@ -2515,6 +2709,20 @@ define <4 x float> @combine_insertps1(<4 x float> %a, <4 x float> %b) {
}
define <4 x float> @combine_insertps2(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: combine_insertps2:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[0,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
+; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: combine_insertps2:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[0,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
+; SSSE3-NEXT: movaps %xmm1, %xmm0
+; SSSE3-NEXT: retq
+;
; SSE41-LABEL: combine_insertps2:
; SSE41: # BB#0:
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[2],xmm0[2,3]
@@ -2531,6 +2739,18 @@ define <4 x float> @combine_insertps2(<4 x float> %a, <4 x float> %b) {
}
define <4 x float> @combine_insertps3(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: combine_insertps3:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: combine_insertps3:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSSE3-NEXT: retq
+;
; SSE41-LABEL: combine_insertps3:
; SSE41: # BB#0:
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
@@ -2547,6 +2767,18 @@ define <4 x float> @combine_insertps3(<4 x float> %a, <4 x float> %b) {
}
define <4 x float> @combine_insertps4(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: combine_insertps4:
+; SSE2: # BB#0:
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: combine_insertps4:
+; SSSE3: # BB#0:
+; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
+; SSSE3-NEXT: retq
+;
; SSE41-LABEL: combine_insertps4:
; SSE41: # BB#0:
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
@@ -2561,3 +2793,115 @@ define <4 x float> @combine_insertps4(<4 x float> %a, <4 x float> %b) {
%d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32><i32 4, i32 1, i32 6, i32 5>
ret <4 x float> %d
}
+
+define <4 x float> @PR22377(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: PR22377:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: movaps %xmm0, %xmm1
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3,1,3]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,0,2]
+; SSE-NEXT: addps %xmm0, %xmm1
+; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: retq
+;
+; AVX-LABEL: PR22377:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,3,1,3]
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,2]
+; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm1
+; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX-NEXT: retq
+entry:
+ %s1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 1, i32 3>
+ %s2 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2>
+ %r2 = fadd <4 x float> %s1, %s2
+ %s3 = shufflevector <4 x float> %s2, <4 x float> %r2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ ret <4 x float> %s3
+}
+
+define <4 x float> @PR22390(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: PR22390:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0,1,2]
+; SSE2-NEXT: movaps %xmm0, %xmm2
+; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; SSE2-NEXT: addps %xmm0, %xmm2
+; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: PR22390:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0,1,2]
+; SSSE3-NEXT: movaps %xmm0, %xmm2
+; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; SSSE3-NEXT: addps %xmm0, %xmm2
+; SSSE3-NEXT: movaps %xmm2, %xmm0
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: PR22390:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0,1,2]
+; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3]
+; SSE41-NEXT: addps %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: PR22390:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,0,1,2]
+; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3]
+; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+entry:
+ %s1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
+ %s2 = shufflevector <4 x float> %s1, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+ %r2 = fadd <4 x float> %s1, %s2
+ ret <4 x float> %r2
+}
+
+define <8 x float> @PR22412(<8 x float> %a, <8 x float> %b) {
+; SSE2-LABEL: PR22412:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; SSE2-NEXT: movapd %xmm2, %xmm0
+; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm3[3,2]
+; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[3,2]
+; SSE2-NEXT: movaps %xmm3, %xmm1
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: PR22412:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; SSSE3-NEXT: movapd %xmm2, %xmm0
+; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm3[3,2]
+; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[3,2]
+; SSSE3-NEXT: movaps %xmm3, %xmm1
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: PR22412:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
+; SSE41-NEXT: movapd %xmm0, %xmm1
+; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm3[3,2]
+; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm0[3,2]
+; SSE41-NEXT: movaps %xmm1, %xmm0
+; SSE41-NEXT: movaps %xmm3, %xmm1
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: PR22412:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,0],ymm1[3,2],ymm0[5,4],ymm1[7,6]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: PR22412:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [1,0,7,6,5,4,3,2]
+; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
+entry:
+ %s1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %s2 = shufflevector <8 x float> %s1, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2>
+ ret <8 x float> %s2
+}
diff --git a/test/CodeGen/X86/vector-shuffle-mmx.ll b/test/CodeGen/X86/vector-shuffle-mmx.ll
new file mode 100644
index 000000000000..dbccd2694b07
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-mmx.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -mtriple=i686-darwin -mattr=+mmx,+sse2 | FileCheck --check-prefix=X32 %s
+; RUN: llc < %s -mtriple=x86_64-darwin -mattr=+mmx,+sse2 | FileCheck --check-prefix=X64 %s
+
+; If there is no explicit MMX type usage, always promote to XMM.
+
+define void @test0(<1 x i64>* %x) {
+; X32-LABEL: test0:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X32-NEXT: movq %xmm0, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: test0:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X64-NEXT: movq %xmm0, (%rdi)
+; X64-NEXT: retq
+entry:
+ %tmp2 = load <1 x i64>, <1 x i64>* %x
+ %tmp6 = bitcast <1 x i64> %tmp2 to <2 x i32>
+ %tmp9 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
+ %tmp10 = bitcast <2 x i32> %tmp9 to <1 x i64>
+ store <1 x i64> %tmp10, <1 x i64>* %x
+ ret void
+}
+
+define void @test1() {
+; X32-LABEL: test1:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: pushl %edi
+; X32-NEXT: Ltmp0:
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: subl $16, %esp
+; X32-NEXT: Ltmp1:
+; X32-NEXT: .cfi_def_cfa_offset 24
+; X32-NEXT: Ltmp2:
+; X32-NEXT: .cfi_offset %edi, -8
+; X32-NEXT: xorps %xmm0, %xmm0
+; X32-NEXT: movlps %xmm0, (%esp)
+; X32-NEXT: movq (%esp), %mm0
+; X32-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7]
+; X32-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X32-NEXT: movq %xmm0, {{[0-9]+}}(%esp)
+; X32-NEXT: movq {{[0-9]+}}(%esp), %mm1
+; X32-NEXT: xorl %edi, %edi
+; X32-NEXT: maskmovq %mm1, %mm0
+; X32-NEXT: addl $16, %esp
+; X32-NEXT: popl %edi
+; X32-NEXT: retl
+;
+; X64-LABEL: test1:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: xorps %xmm0, %xmm0
+; X64-NEXT: movlps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movq -{{[0-9]+}}(%rsp), %mm0
+; X64-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7]
+; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X64-NEXT: movq %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT: movq -{{[0-9]+}}(%rsp), %mm1
+; X64-NEXT: xorl %edi, %edi
+; X64-NEXT: maskmovq %mm1, %mm0
+; X64-NEXT: retq
+entry:
+ %tmp528 = bitcast <8 x i8> zeroinitializer to <2 x i32>
+ %tmp529 = and <2 x i32> %tmp528, bitcast (<4 x i16> < i16 -32640, i16 16448, i16 8224, i16 4112 > to <2 x i32>)
+ %tmp542 = bitcast <2 x i32> %tmp529 to <4 x i16>
+ %tmp543 = add <4 x i16> %tmp542, < i16 0, i16 16448, i16 24672, i16 28784 >
+ %tmp555 = bitcast <4 x i16> %tmp543 to <8 x i8>
+ %tmp556 = bitcast <8 x i8> %tmp555 to x86_mmx
+ %tmp557 = bitcast <8 x i8> zeroinitializer to x86_mmx
+ tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp557, x86_mmx %tmp556, i8* null)
+ ret void
+}
+
+@tmp_V2i = common global <2 x i32> zeroinitializer
+
+define void @test2() nounwind {
+; X32-LABEL: test2:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl L_tmp_V2i$non_lazy_ptr, %eax
+; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X32-NEXT: movlps %xmm0, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: test2:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: movq _tmp_V2i@{{.*}}(%rip), %rax
+; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X64-NEXT: movq %xmm0, (%rax)
+; X64-NEXT: retq
+entry:
+ %0 = load <2 x i32>, <2 x i32>* @tmp_V2i, align 8
+ %1 = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer
+ store <2 x i32> %1, <2 x i32>* @tmp_V2i, align 8
+ ret void
+}
+
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*)
diff --git a/test/CodeGen/X86/vector-shuffle-sse1.ll b/test/CodeGen/X86/vector-shuffle-sse1.ll
index ef60272b6c35..66e53bbb7502 100644
--- a/test/CodeGen/X86/vector-shuffle-sse1.ll
+++ b/test/CodeGen/X86/vector-shuffle-sse1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=x86-64 -mattr=-sse2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=SSE1
+; RUN: llc < %s -mcpu=x86-64 -mattr=-sse2 | FileCheck %s --check-prefix=SSE1
target triple = "x86_64-unknown-unknown"
@@ -95,7 +95,7 @@ define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) {
; SSE1-LABEL: shuffle_v4f32_4zzz:
; SSE1: # BB#0:
; SSE1-NEXT: xorps %xmm1, %xmm1
-; SSE1-NEXT: movss %xmm0, %xmm1
+; SSE1-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE1-NEXT: movaps %xmm1, %xmm0
; SSE1-NEXT: retq
%shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
@@ -106,8 +106,8 @@ define <4 x float> @shuffle_v4f32_z4zz(<4 x float> %a) {
; SSE1-LABEL: shuffle_v4f32_z4zz:
; SSE1: # BB#0:
; SSE1-NEXT: xorps %xmm1, %xmm1
-; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
-; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
+; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
+; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
; SSE1-NEXT: retq
%shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
ret <4 x float> %shuffle
@@ -117,8 +117,8 @@ define <4 x float> @shuffle_v4f32_zz4z(<4 x float> %a) {
; SSE1-LABEL: shuffle_v4f32_zz4z:
; SSE1: # BB#0:
; SSE1-NEXT: xorps %xmm1, %xmm1
-; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
-; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2]
+; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
+; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
; SSE1-NEXT: movaps %xmm1, %xmm0
; SSE1-NEXT: retq
%shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
@@ -163,7 +163,7 @@ define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
; SSE1-LABEL: insert_reg_and_zero_v4f32:
; SSE1: # BB#0:
; SSE1-NEXT: xorps %xmm1, %xmm1
-; SSE1-NEXT: movss %xmm0, %xmm1
+; SSE1-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; SSE1-NEXT: movaps %xmm1, %xmm0
; SSE1-NEXT: retq
%v = insertelement <4 x float> undef, float %a, i32 0
@@ -174,9 +174,9 @@ define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) {
; SSE1-LABEL: insert_mem_and_zero_v4f32:
; SSE1: # BB#0:
-; SSE1-NEXT: movss (%rdi), %xmm0
+; SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE1-NEXT: retq
- %a = load float* %ptr
+ %a = load float, float* %ptr
%v = insertelement <4 x float> undef, float %a, i32 0
%shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
ret <4 x float> %shuffle
@@ -189,15 +189,15 @@ define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) {
; SSE1-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
; SSE1-NEXT: shrq $32, %rax
; SSE1-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
-; SSE1-NEXT: movss -{{[0-9]+}}(%rsp), %xmm1
-; SSE1-NEXT: movss -{{[0-9]+}}(%rsp), %xmm2
+; SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE1-NEXT: xorps %xmm2, %xmm2
-; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,1]
+; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3]
; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
; SSE1-NEXT: movaps %xmm1, %xmm0
; SSE1-NEXT: retq
- %a = load <2 x float>* %ptr
+ %a = load <2 x float>, <2 x float>* %ptr
%v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x float> %shuffle
@@ -210,14 +210,14 @@ define <4 x float> @insert_mem_hi_v4f32(<2 x float>* %ptr, <4 x float> %b) {
; SSE1-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
; SSE1-NEXT: shrq $32, %rax
; SSE1-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
-; SSE1-NEXT: movss -{{[0-9]+}}(%rsp), %xmm1
-; SSE1-NEXT: movss -{{[0-9]+}}(%rsp), %xmm2
+; SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE1-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE1-NEXT: xorps %xmm2, %xmm2
-; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,1]
+; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3]
; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
; SSE1-NEXT: retq
- %a = load <2 x float>* %ptr
+ %a = load <2 x float>, <2 x float>* %ptr
%v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
ret <4 x float> %shuffle
@@ -229,7 +229,7 @@ define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) {
; SSE1-NEXT: movaps (%rdi), %xmm0
; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
; SSE1-NEXT: retq
- %a = load <4 x float>* %ptr
+ %a = load <4 x float>, <4 x float>* %ptr
%shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %shuffle
}
diff --git a/test/CodeGen/X86/vector-trunc.ll b/test/CodeGen/X86/vector-trunc.ll
index 8a5b7488f664..d2eef9af2a25 100644
--- a/test/CodeGen/X86/vector-trunc.ll
+++ b/test/CodeGen/X86/vector-trunc.ll
@@ -3,18 +3,56 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+define <4 x i32> @trunc2x2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: trunc2x2i64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc2x2i64:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc2x2i64:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: trunc2x2i64:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; AVX-NEXT: retq
+
+
+entry:
+ %0 = trunc <2 x i64> %a to <2 x i32>
+ %1 = trunc <2 x i64> %b to <2 x i32>
+ %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i32> %2
+}
+
define i64 @trunc2i64(<2 x i64> %inval) {
-; SSE-LABEL: trunc2i64:
-; SSE: # BB#0: # %entry
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE-NEXT: movd %xmm0, %rax
-; SSE-NEXT: retq
+; SSE-LABEL: trunc2i64:
+; SSE: # BB#0: # %entry
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE-NEXT: movd %xmm0, %rax
+; SSE-NEXT: retq
+;
+; AVX-LABEL: trunc2i64:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: retq
-; AVX-LABEL: trunc2i64:
-; AVX: # BB#0: # %entry
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX-NEXT: vmovq %xmm0, %rax
-; AVX-NEXT: retq
entry:
%0 = trunc <2 x i64> %inval to <2 x i32>
@@ -22,33 +60,82 @@ entry:
ret i64 %1
}
+define <8 x i16> @trunc2x4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: trunc2x4i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc2x4i32:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSSE3-NEXT: pshufb %xmm2, %xmm1
+; SSSE3-NEXT: pshufb %xmm2, %xmm0
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc2x4i32:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSE41-NEXT: pshufb %xmm2, %xmm1
+; SSE41-NEXT: pshufb %xmm2, %xmm0
+; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: trunc2x4i32:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
+
+
+
+
+entry:
+ %0 = trunc <4 x i32> %a to <4 x i16>
+ %1 = trunc <4 x i32> %b to <4 x i16>
+ %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %2
+}
+
; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
define i64 @trunc4i32(<4 x i32> %inval) {
-; SSE2-LABEL: trunc4i32:
-; SSE2: # BB#0: # %entry
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: retq
-
+; SSE2-LABEL: trunc4i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: retq
+;
; SSSE3-LABEL: trunc4i32:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; SSSE3-NEXT: movd %xmm0, %rax
-; SSSE3-NEXT: retq
-
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSSE3-NEXT: movd %xmm0, %rax
+; SSSE3-NEXT: retq
+;
; SSE41-LABEL: trunc4i32:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; SSE41-NEXT: movd %xmm0, %rax
-; SSE41-NEXT: retq
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSE41-NEXT: movd %xmm0, %rax
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: trunc4i32:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: retq
+
+
-; AVX-LABEL: trunc4i32:
-; AVX: # BB#0: # %entry
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; AVX-NEXT: vmovq %xmm0, %rax
-; AVX-NEXT: retq
entry:
%0 = trunc <4 x i32> %inval to <4 x i16>
@@ -56,35 +143,98 @@ entry:
ret i64 %1
}
+define <16 x i8> @trunc2x8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: trunc2x8i16:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: packuswb %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: trunc2x8i16:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; SSSE3-NEXT: pshufb %xmm2, %xmm1
+; SSSE3-NEXT: pshufb %xmm2, %xmm0
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: trunc2x8i16:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; SSE41-NEXT: pshufb %xmm2, %xmm1
+; SSE41-NEXT: pshufb %xmm2, %xmm0
+; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: trunc2x8i16:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT: retq
+
+
+
+
+entry:
+ %0 = trunc <8 x i16> %a to <8 x i8>
+ %1 = trunc <8 x i16> %b to <8 x i8>
+ %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i8> %2
+}
+
; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
define i64 @trunc8i16(<8 x i16> %inval) {
-; SSE2-LABEL: trunc8i16:
-; SSE2: # BB#0: # %entry
-; SSE2-NEXT: pand .LCP{{.*}}(%rip), %xmm0
-; SSE2-NEXT: packuswb %xmm0, %xmm0
-; SSE2-NEXT: movd %xmm0, %rax
-; SSE2-NEXT: retq
-
+; SSE2-LABEL: trunc8i16:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: packuswb %xmm0, %xmm0
+; SSE2-NEXT: movd %xmm0, %rax
+; SSE2-NEXT: retq
+;
; SSSE3-LABEL: trunc8i16:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; SSSE3-NEXT: movd %xmm0, %rax
-; SSSE3-NEXT: retq
-
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: movd %xmm0, %rax
+; SSSE3-NEXT: retq
+;
; SSE41-LABEL: trunc8i16:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; SSE41-NEXT: movd %xmm0, %rax
-; SSE41-NEXT: retq
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: movd %xmm0, %rax
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: trunc8i16:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX-NEXT: vmovq %xmm0, %rax
+; AVX-NEXT: retq
+
+
-; AVX-LABEL: trunc8i16:
-; AVX: # BB#0: # %entry
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; AVX-NEXT: vmovq %xmm0, %rax
-; AVX-NEXT: retq
entry:
%0 = trunc <8 x i16> %inval to <8 x i8>
%1 = bitcast <8 x i8> %0 to i64
ret i64 %1
}
+
+define <16 x i8> @trunc16i64_const() {
+; SSE-LABEL: trunc16i64_const
+; SSE: # BB#0: # %entry
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: trunc16i64_const
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+
+entry:
+ %0 = trunc <16 x i64> zeroinitializer to <16 x i8>
+ %1 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26>
+ ret <16 x i8> %1
+}
diff --git a/test/CodeGen/X86/vector-variable-idx2.ll b/test/CodeGen/X86/vector-variable-idx2.ll
index 6e8ae2e42c94..df65257bac7e 100644
--- a/test/CodeGen/X86/vector-variable-idx2.ll
+++ b/test/CodeGen/X86/vector-variable-idx2.ll
@@ -8,8 +8,8 @@ define i64 @__builtin_ia32_vec_ext_v2di(<2 x i64> %a, i32 %i) nounwind {
%2 = alloca i32, align 4
store <2 x i64> %a, <2 x i64>* %1, align 16
store i32 %i, i32* %2, align 4
- %3 = load <2 x i64>* %1, align 16
- %4 = load i32* %2, align 4
+ %3 = load <2 x i64>, <2 x i64>* %1, align 16
+ %4 = load i32, i32* %2, align 4
%5 = extractelement <2 x i64> %3, i32 %4
ret i64 %5
}
@@ -19,8 +19,8 @@ define <2 x i64> @__builtin_ia32_vec_int_v2di(<2 x i64> %a, i32 %i) nounwind {
%2 = alloca i32, align 4
store <2 x i64> %a, <2 x i64>* %1, align 16
store i32 %i, i32* %2, align 4
- %3 = load <2 x i64>* %1, align 16
- %4 = load i32* %2, align 4
+ %3 = load <2 x i64>, <2 x i64>* %1, align 16
+ %4 = load i32, i32* %2, align 4
%5 = insertelement <2 x i64> %3, i64 1, i32 %4
ret <2 x i64> %5
}
diff --git a/test/CodeGen/X86/vector-zext.ll b/test/CodeGen/X86/vector-zext.ll
index cd09deee4550..42781830ff2f 100644
--- a/test/CodeGen/X86/vector-zext.ll
+++ b/test/CodeGen/X86/vector-zext.ll
@@ -7,47 +7,43 @@
define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
; SSE2-LABEL: zext_8i16_to_8i32:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
-; SSE2-NEXT: pand %xmm1, %xmm2
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: # kill
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; SSE2-NEXT: pand .LCPI0_0(%rip), %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: zext_8i16_to_8i32:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movdqa %xmm0, %xmm2
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
-; SSSE3-NEXT: pand %xmm1, %xmm2
-; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: pand %xmm0, %xmm1
-; SSSE3-NEXT: movdqa %xmm2, %xmm0
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: # kill
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; SSSE3-NEXT: pand .LCPI0_0(%rip), %xmm1
; SSSE3-NEXT: retq
;
; SSE41-LABEL: zext_8i16_to_8i32:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pmovzxwd %xmm0, %xmm2
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
-; SSE41-NEXT: pand %xmm1, %xmm2
-; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSE41-NEXT: pand %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; SSE41-NEXT: pand .LCPI0_0(%rip), %xmm1
; SSE41-NEXT: retq
;
; AVX1-LABEL: zext_8i16_to_8i32:
; AVX1: # BB#0: # %entry
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX1-NEXT: vpmovzxwd %xmm0, %xmm0
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: zext_8i16_to_8i32:
; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vpmovzxwd %xmm0, %ymm0
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: retq
entry:
%B = zext <8 x i16> %A to <8 x i32>
@@ -77,7 +73,7 @@ define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp
;
; SSE41-LABEL: zext_4i32_to_4i64:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pmovzxdq %xmm0, %xmm2
+; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
; SSE41-NEXT: pand %xmm3, %xmm2
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
@@ -89,13 +85,13 @@ define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp
; AVX1: # BB#0: # %entry
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1-NEXT: vpmovzxdq %xmm0, %xmm0
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: zext_4i32_to_4i64:
; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vpmovzxdq %xmm0, %ymm0
+; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX2-NEXT: retq
entry:
%B = zext <4 x i32> %A to <4 x i64>
@@ -127,7 +123,7 @@ define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
;
; SSE41-LABEL: zext_8i8_to_8i32:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pmovzxwd %xmm0, %xmm2
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255]
; SSE41-NEXT: pand %xmm1, %xmm2
; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
@@ -137,7 +133,7 @@ define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
;
; AVX1-LABEL: zext_8i8_to_8i32:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vpmovzxwd %xmm0, %xmm1
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
@@ -145,7 +141,7 @@ define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
;
; AVX2-LABEL: zext_8i8_to_8i32:
; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vpmovzxwd %xmm0, %ymm0
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
@@ -158,47 +154,43 @@ entry:
define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) {
; SSE2-LABEL: zext_16i8_to_16i16:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
-; SSE2-NEXT: pand %xmm1, %xmm2
-; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE2-NEXT: pand %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: # kill
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSE2-NEXT: pand .LCPI3_0(%rip), %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: zext_16i8_to_16i16:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movdqa %xmm0, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
-; SSSE3-NEXT: pand %xmm1, %xmm2
-; SSSE3-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSSE3-NEXT: pand %xmm0, %xmm1
-; SSSE3-NEXT: movdqa %xmm2, %xmm0
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: # kill
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSSE3-NEXT: pand .LCPI3_0(%rip), %xmm1
; SSSE3-NEXT: retq
;
; SSE41-LABEL: zext_16i8_to_16i16:
; SSE41: # BB#0: # %entry
-; SSE41-NEXT: pmovzxbw %xmm0, %xmm2
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT: pand %xmm1, %xmm2
-; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE41-NEXT: pand %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: pmovzxbw %xmm1, %xmm0 {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSE41-NEXT: pand .LCPI3_0(%rip), %xmm1
; SSE41-NEXT: retq
;
; AVX1-LABEL: zext_16i8_to_16i16:
; AVX1: # BB#0: # %entry
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
-; AVX1-NEXT: vpmovzxbw %xmm0, %xmm0
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: zext_16i8_to_16i16:
; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vpmovzxbw %xmm0, %ymm0
+; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX2-NEXT: retq
entry:
%t = zext <16 x i8> %z to <16 x i16>
@@ -208,51 +200,43 @@ entry:
define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
; SSE2-LABEL: load_zext_16i8_to_16i16:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movdqa (%rdi), %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: punpcklbw %xmm0, %xmm0 # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
-; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: punpckhbw %xmm1, %xmm1 # xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: movdqa (%rdi), %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSE2-NEXT: pand .LCPI4_0(%rip), %xmm1
; SSE2-NEXT: retq
-
+;
; SSSE3-LABEL: load_zext_16i8_to_16i16:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movdqa (%rdi), %xmm1
-; SSSE3-NEXT: movdqa %xmm1, %xmm0
-; SSSE3-NEXT: punpcklbw %xmm0, %xmm0 # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
-; SSSE3-NEXT: pand %xmm2, %xmm0
-; SSSE3-NEXT: punpckhbw %xmm1, %xmm1 # xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSSE3-NEXT: pand %xmm2, %xmm1
+; SSSE3-NEXT: movdqa (%rdi), %xmm1
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSSE3-NEXT: pand .LCPI4_0(%rip), %xmm1
; SSSE3-NEXT: retq
-
+;
; SSE41-LABEL: load_zext_16i8_to_16i16:
-; SSE41: # BB#0: # %entry
-; SSE41-NEXT: movdqa (%rdi), %xmm1
-; SSE41-NEXT: pmovzxbw %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT: pand %xmm2, %xmm0
-; SSE41-NEXT: punpckhbw %xmm1, %xmm1 # xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE41-NEXT: pand %xmm2, %xmm1
-; SSE41-NEXT: retq
-
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; SSE41-NEXT: retq
+;
; AVX1-LABEL: load_zext_16i8_to_16i16:
-; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovdqa (%rdi), %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm1 # xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
-; AVX1-NEXT: vpmovzxbw %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: load_zext_16i8_to_16i16:
-; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vpmovzxbw (%rdi), %ymm0
-; AVX2-NEXT: retq
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
+; AVX2-NEXT: retq
entry:
- %X = load <16 x i8>* %ptr
+ %X = load <16 x i8>, <16 x i8>* %ptr
%Y = zext <16 x i8> %X to <16 x i16>
ret <16 x i16> %Y
}
@@ -260,101 +244,234 @@ entry:
define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
; SSE2-LABEL: load_zext_8i16_to_8i32:
; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movdqa (%rdi), %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: punpcklwd %xmm0, %xmm0 # xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
-; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: punpckhwd %xmm1, %xmm1 # xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: movdqa (%rdi), %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: movdqa %xmm1, %xmm0
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; SSE2-NEXT: pand .LCPI5_0(%rip), %xmm1
; SSE2-NEXT: retq
-
+;
; SSSE3-LABEL: load_zext_8i16_to_8i32:
; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movdqa (%rdi), %xmm1
-; SSSE3-NEXT: movdqa %xmm1, %xmm0
-; SSSE3-NEXT: punpcklwd %xmm0, %xmm0 # xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
-; SSSE3-NEXT: pand %xmm2, %xmm0
-; SSSE3-NEXT: punpckhwd %xmm1, %xmm1 # xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: pand %xmm2, %xmm1
+; SSSE3-NEXT: movdqa (%rdi), %xmm1
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; SSSE3-NEXT: pand .LCPI5_0(%rip), %xmm1
; SSSE3-NEXT: retq
-
+;
; SSE41-LABEL: load_zext_8i16_to_8i32:
-; SSE41: # BB#0: # %entry
-; SSE41-NEXT: movdqa (%rdi), %xmm1
-; SSE41-NEXT: pmovzxwd %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
-; SSE41-NEXT: pand %xmm2, %xmm0
-; SSE41-NEXT: punpckhwd %xmm1, %xmm1 # xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSE41-NEXT: pand %xmm2, %xmm1
-; SSE41-NEXT: retq
-
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; SSE41-NEXT: retq
+;
; AVX1-LABEL: load_zext_8i16_to_8i32:
-; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovdqa (%rdi), %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm1 # xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; AVX1-NEXT: vpmovzxwd %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
-
+;
; AVX2-LABEL: load_zext_8i16_to_8i32:
-; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vpmovzxwd (%rdi), %ymm0
-; AVX2-NEXT: retq
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX2-NEXT: retq
entry:
- %X = load <8 x i16>* %ptr
+ %X = load <8 x i16>, <8 x i16>* %ptr
%Y = zext <8 x i16> %X to <8 x i32>
ret <8 x i32>%Y
}
define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
; SSE2-LABEL: load_zext_4i32_to_4i64:
-; SSE2: # BB#0: # %entry
-; SSE2-NEXT: movdqa (%rdi), %xmm1
-; SSE2-NEXT: pshufd $-44, %xmm1, %xmm0 # xmm0 = xmm1[0,1,1,3]
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
-; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: pshufd $-6, %xmm1, %xmm1 # xmm1 = xmm1[2,2,3,3]
-; SSE2-NEXT: pand %xmm2, %xmm1
-; SSE2-NEXT: retq
-
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa (%rdi), %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: retq
+;
; SSSE3-LABEL: load_zext_4i32_to_4i64:
-; SSSE3: # BB#0: # %entry
-; SSSE3-NEXT: movdqa (%rdi), %xmm1
-; SSSE3-NEXT: pshufd $-44, %xmm1, %xmm0 # xmm0 = xmm1[0,1,1,3]
-; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
-; SSSE3-NEXT: pand %xmm2, %xmm0
-; SSSE3-NEXT: pshufd $-6, %xmm1, %xmm1 # xmm1 = xmm1[2,2,3,3]
-; SSSE3-NEXT: pand %xmm2, %xmm1
-; SSSE3-NEXT: retq
-
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa (%rdi), %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
+; SSSE3-NEXT: pand %xmm2, %xmm0
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; SSSE3-NEXT: pand %xmm2, %xmm1
+; SSSE3-NEXT: retq
+;
; SSE41-LABEL: load_zext_4i32_to_4i64:
-; SSE41: # BB#0: # %entry
-; SSE41-NEXT: movdqa (%rdi), %xmm1
-; SSE41-NEXT: pmovzxdq %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
-; SSE41-NEXT: pand %xmm2, %xmm0
-; SSE41-NEXT: pshufd $-6, %xmm1, %xmm1 # xmm1 = xmm1[2,2,3,3]
-; SSE41-NEXT: pand %xmm2, %xmm1
-; SSE41-NEXT: retq
-
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
+; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
+; SSE41-NEXT: retq
+;
; AVX1-LABEL: load_zext_4i32_to_4i64:
-; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovdqa (%rdi), %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm1 # xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX1-NEXT: vpmovzxdq %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: retq
-
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
; AVX2-LABEL: load_zext_4i32_to_4i64:
-; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vpmovzxdq (%rdi), %ymm0
-; AVX2-NEXT: retq
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; AVX2-NEXT: retq
entry:
- %X = load <4 x i32>* %ptr
+ %X = load <4 x i32>, <4 x i32>* %ptr
%Y = zext <4 x i32> %X to <4 x i64>
ret <4 x i64>%Y
}
+
+define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: shuf_zext_8i16_to_8i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: # kill
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuf_zext_8i16_to_8i32:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: # kill
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuf_zext_8i16_to_8i32:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: pxor %xmm2, %xmm2
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: shuf_zext_8i16_to_8i32:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuf_zext_8i16_to_8i32:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: # kill
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT: retq
+entry:
+ %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8>
+ %Z = bitcast <16 x i16> %B to <8 x i32>
+ ret <8 x i32> %Z
+}
+
+define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: shuf_zext_4i32_to_4i64:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: # kill
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuf_zext_4i32_to_4i64:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: # kill
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuf_zext_4i32_to_4i64:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: pxor %xmm2, %xmm2
+; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
+; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: shuf_zext_4i32_to_4i64:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero
+; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuf_zext_4i32_to_4i64:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: # kill
+; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX2-NEXT: retq
+entry:
+ %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4>
+ %Z = bitcast <8 x i32> %B to <4 x i64>
+ ret <4 x i64> %Z
+}
+
+define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
+; SSE2-LABEL: shuf_zext_8i8_to_8i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: pand .LCPI9_0(%rip), %xmm0
+; SSE2-NEXT: packuswb %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
+; SSE2-NEXT: pandn %xmm0, %xmm1
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: shuf_zext_8i8_to_8i32:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: movdqa %xmm0, %xmm1
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: shuf_zext_8i8_to_8i32:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuf_zext_8i8_to_8i32:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX2-NEXT: retq
+entry:
+ %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
+ %Z = bitcast <32 x i8> %B to <8 x i32>
+ ret <8 x i32> %Z
+}
diff --git a/test/CodeGen/X86/vector-zmov.ll b/test/CodeGen/X86/vector-zmov.ll
index 4de2543a1d6b..cf592b1e9f42 100644
--- a/test/CodeGen/X86/vector-zmov.ll
+++ b/test/CodeGen/X86/vector-zmov.ll
@@ -15,7 +15,7 @@ define <4 x i32> @load_zmov_4i32_to_0zzz(<4 x i32> *%ptr) {
; AVX-NEXT: vmovd (%rdi), %xmm0
; AVX-NEXT: retq
entry:
- %X = load <4 x i32>* %ptr
+ %X = load <4 x i32>, <4 x i32>* %ptr
%Y = shufflevector <4 x i32> %X, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
ret <4 x i32>%Y
}
@@ -31,7 +31,7 @@ define <2 x i64> @load_zmov_2i64_to_0z(<2 x i64> *%ptr) {
; AVX-NEXT: vmovq (%rdi), %xmm0
; AVX-NEXT: retq
entry:
- %X = load <2 x i64>* %ptr
+ %X = load <2 x i64>, <2 x i64>* %ptr
%Y = shufflevector <2 x i64> %X, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
ret <2 x i64>%Y
}
diff --git a/test/CodeGen/X86/vector.ll b/test/CodeGen/X86/vector.ll
index 82d20a23f357..39e7f0e0b066 100644
--- a/test/CodeGen/X86/vector.ll
+++ b/test/CodeGen/X86/vector.ll
@@ -13,48 +13,48 @@
;;; TEST HANDLING OF VARIOUS VECTOR SIZES
define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
- %p = load %f1* %P ; <%f1> [#uses=1]
- %q = load %f1* %Q ; <%f1> [#uses=1]
+ %p = load %f1, %f1* %P ; <%f1> [#uses=1]
+ %q = load %f1, %f1* %Q ; <%f1> [#uses=1]
%R = fadd %f1 %p, %q ; <%f1> [#uses=1]
store %f1 %R, %f1* %S
ret void
}
define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
- %p = load %f2* %P ; <%f2> [#uses=1]
- %q = load %f2* %Q ; <%f2> [#uses=1]
+ %p = load %f2, %f2* %P ; <%f2> [#uses=1]
+ %q = load %f2, %f2* %Q ; <%f2> [#uses=1]
%R = fadd %f2 %p, %q ; <%f2> [#uses=1]
store %f2 %R, %f2* %S
ret void
}
define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
- %p = load %f4* %P ; <%f4> [#uses=1]
- %q = load %f4* %Q ; <%f4> [#uses=1]
+ %p = load %f4, %f4* %P ; <%f4> [#uses=1]
+ %q = load %f4, %f4* %Q ; <%f4> [#uses=1]
%R = fadd %f4 %p, %q ; <%f4> [#uses=1]
store %f4 %R, %f4* %S
ret void
}
define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
- %p = load %f8* %P ; <%f8> [#uses=1]
- %q = load %f8* %Q ; <%f8> [#uses=1]
+ %p = load %f8, %f8* %P ; <%f8> [#uses=1]
+ %q = load %f8, %f8* %Q ; <%f8> [#uses=1]
%R = fadd %f8 %p, %q ; <%f8> [#uses=1]
store %f8 %R, %f8* %S
ret void
}
define void @test_fmul(%f8* %P, %f8* %Q, %f8* %S) {
- %p = load %f8* %P ; <%f8> [#uses=1]
- %q = load %f8* %Q ; <%f8> [#uses=1]
+ %p = load %f8, %f8* %P ; <%f8> [#uses=1]
+ %q = load %f8, %f8* %Q ; <%f8> [#uses=1]
%R = fmul %f8 %p, %q ; <%f8> [#uses=1]
store %f8 %R, %f8* %S
ret void
}
define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {
- %p = load %f8* %P ; <%f8> [#uses=1]
- %q = load %f8* %Q ; <%f8> [#uses=1]
+ %p = load %f8, %f8* %P ; <%f8> [#uses=1]
+ %q = load %f8, %f8* %Q ; <%f8> [#uses=1]
%R = fdiv %f8 %p, %q ; <%f8> [#uses=1]
store %f8 %R, %f8* %S
ret void
@@ -63,21 +63,21 @@ define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {
;;; TEST VECTOR CONSTRUCTS
define void @test_cst(%f4* %P, %f4* %S) {
- %p = load %f4* %P ; <%f4> [#uses=1]
+ %p = load %f4, %f4* %P ; <%f4> [#uses=1]
%R = fadd %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float 2.000000e+00, float 4.500000e+00 > ; <%f4> [#uses=1]
store %f4 %R, %f4* %S
ret void
}
define void @test_zero(%f4* %P, %f4* %S) {
- %p = load %f4* %P ; <%f4> [#uses=1]
+ %p = load %f4, %f4* %P ; <%f4> [#uses=1]
%R = fadd %f4 %p, zeroinitializer ; <%f4> [#uses=1]
store %f4 %R, %f4* %S
ret void
}
define void @test_undef(%f4* %P, %f4* %S) {
- %p = load %f4* %P ; <%f4> [#uses=1]
+ %p = load %f4, %f4* %P ; <%f4> [#uses=1]
%R = fadd %f4 %p, undef ; <%f4> [#uses=1]
store %f4 %R, %f4* %S
ret void
@@ -102,19 +102,19 @@ define void @test_scalar_to_vector(float %F, %f4* %S) {
}
define float @test_extract_elt(%f8* %P) {
- %p = load %f8* %P ; <%f8> [#uses=1]
+ %p = load %f8, %f8* %P ; <%f8> [#uses=1]
%R = extractelement %f8 %p, i32 3 ; <float> [#uses=1]
ret float %R
}
define double @test_extract_elt2(%d8* %P) {
- %p = load %d8* %P ; <%d8> [#uses=1]
+ %p = load %d8, %d8* %P ; <%d8> [#uses=1]
%R = extractelement %d8 %p, i32 3 ; <double> [#uses=1]
ret double %R
}
define void @test_cast_1(%f4* %b, %i4* %a) {
- %tmp = load %f4* %b ; <%f4> [#uses=1]
+ %tmp = load %f4, %f4* %b ; <%f4> [#uses=1]
%tmp2 = fadd %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 > ; <%f4> [#uses=1]
%tmp3 = bitcast %f4 %tmp2 to %i4 ; <%i4> [#uses=1]
%tmp4 = add %i4 %tmp3, < i32 1, i32 2, i32 3, i32 4 > ; <%i4> [#uses=1]
@@ -123,7 +123,7 @@ define void @test_cast_1(%f4* %b, %i4* %a) {
}
define void @test_cast_2(%f8* %a, <8 x i32>* %b) {
- %T = load %f8* %a ; <%f8> [#uses=1]
+ %T = load %f8, %f8* %a ; <%f8> [#uses=1]
%T2 = bitcast %f8 %T to <8 x i32> ; <<8 x i32>> [#uses=1]
store <8 x i32> %T2, <8 x i32>* %b
ret void
@@ -137,7 +137,7 @@ define void @splat(%f4* %P, %f4* %Q, float %X) {
%tmp2 = insertelement %f4 %tmp, float %X, i32 1 ; <%f4> [#uses=1]
%tmp4 = insertelement %f4 %tmp2, float %X, i32 2 ; <%f4> [#uses=1]
%tmp6 = insertelement %f4 %tmp4, float %X, i32 3 ; <%f4> [#uses=1]
- %q = load %f4* %Q ; <%f4> [#uses=1]
+ %q = load %f4, %f4* %Q ; <%f4> [#uses=1]
%R = fadd %f4 %q, %tmp6 ; <%f4> [#uses=1]
store %f4 %R, %f4* %P
ret void
@@ -148,7 +148,7 @@ define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) {
%tmp2 = insertelement %i4 %tmp, i32 %X, i32 1 ; <%i4> [#uses=1]
%tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2 ; <%i4> [#uses=1]
%tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3 ; <%i4> [#uses=1]
- %q = load %i4* %Q ; <%i4> [#uses=1]
+ %q = load %i4, %i4* %Q ; <%i4> [#uses=1]
%R = add %i4 %q, %tmp6 ; <%i4> [#uses=1]
store %i4 %R, %i4* %P
ret void
diff --git a/test/CodeGen/X86/viabs.ll b/test/CodeGen/X86/viabs.ll
index d9f2cb074759..fe528fd4ea24 100644
--- a/test/CodeGen/X86/viabs.ll
+++ b/test/CodeGen/X86/viabs.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=x86-64 -mcpu=x86-64 | FileCheck %s -check-prefix=SSE2
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s -check-prefix=SSSE3
-; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s -check-prefix=AVX2
-; RUN: llc < %s -march=x86-64 -mcpu=knl | FileCheck %s -check-prefix=AVX512
+; RUN: llc < %s -march=x86-64 -mattr=sse2 | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -march=x86-64 -mattr=ssse3 | FileCheck %s -check-prefix=SSSE3
+; RUN: llc < %s -march=x86-64 -mattr=avx2 | FileCheck %s -check-prefix=AVX2
+; RUN: llc < %s -march=x86-64 -mattr=avx512f | FileCheck %s -check-prefix=AVX512
define <4 x i32> @test1(<4 x i32> %a) nounwind {
; SSE2-LABEL: test1:
@@ -262,7 +262,7 @@ define <8 x i64> @test13(<8 x i64>* %a.ptr) nounwind {
; AVX512-LABEL: test13:
; AVX512: vpabsq (%
; AVX512-NEXT: ret
- %a = load <8 x i64>* %a.ptr, align 8
+ %a = load <8 x i64>, <8 x i64>* %a.ptr, align 8
%tmp1neg = sub <8 x i64> zeroinitializer, %a
%b = icmp sle <8 x i64> %a, zeroinitializer
%abs = select <8 x i1> %b, <8 x i64> %tmp1neg, <8 x i64> %a
diff --git a/test/CodeGen/X86/visibility2.ll b/test/CodeGen/X86/visibility2.ll
index 72ea7338de61..48a0ac692198 100644
--- a/test/CodeGen/X86/visibility2.ll
+++ b/test/CodeGen/X86/visibility2.ll
@@ -8,7 +8,7 @@
define void @foo1() nounwind ssp {
entry:
- %tmp = load i8** @foo_private_extern_str, align 8
+ %tmp = load i8*, i8** @foo_private_extern_str, align 8
call void @foo3(i8* %tmp)
ret void
}
diff --git a/test/CodeGen/X86/volatile.ll b/test/CodeGen/X86/volatile.ll
index 1a82014536e7..8d521b46f7c9 100644
--- a/test/CodeGen/X86/volatile.ll
+++ b/test/CodeGen/X86/volatile.ll
@@ -4,14 +4,14 @@
@x = external global double
define void @foo() nounwind {
- %a = load volatile double* @x
+ %a = load volatile double, double* @x
store volatile double 0.0, double* @x
store volatile double 0.0, double* @x
- %b = load volatile double* @x
+ %b = load volatile double, double* @x
ret void
}
define void @bar() nounwind {
- %c = load volatile double* @x
+ %c = load volatile double, double* @x
ret void
}
diff --git a/test/CodeGen/X86/vortex-bug.ll b/test/CodeGen/X86/vortex-bug.ll
index 40f11175b20a..b1f42681a508 100644
--- a/test/CodeGen/X86/vortex-bug.ll
+++ b/test/CodeGen/X86/vortex-bug.ll
@@ -16,6 +16,6 @@ bb137.i: ; preds = %bb137.i, %entry
br i1 false, label %bb137.i, label %bb149.i.loopexit
bb149.i.loopexit: ; preds = %bb137.i
- %tmp139.i = getelementptr i8* %FieldName, i64 %tmp139.rec.i ; <i8*> [#uses=0]
+ %tmp139.i = getelementptr i8, i8* %FieldName, i64 %tmp139.rec.i ; <i8*> [#uses=0]
unreachable
}
diff --git a/test/CodeGen/X86/vselect-2.ll b/test/CodeGen/X86/vselect-2.ll
index 0991bdacd9c5..fe4cfba08b8a 100644
--- a/test/CodeGen/X86/vselect-2.ll
+++ b/test/CodeGen/X86/vselect-2.ll
@@ -2,13 +2,13 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) {
-; SSE2-LABEL: test1
-; SSE2: # BB#0:
-; SSE2-NEXT: movsd %xmm0, %xmm1
-; SSE2-NEXT: movaps %xmm1, %xmm0
-; SSE2-NEXT: retq
+; SSE2-LABEL: test1:
+; SSE2: # BB#0:
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: retq
;
-; SSE41-LABEL: test1
+; SSE41-LABEL: test1:
; SSE41: # BB#0:
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; SSE41-NEXT: retq
@@ -17,12 +17,12 @@ define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) {
}
define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) {
-; SSE2-LABEL: test2
-; SSE2: # BB#0:
-; SSE2-NEXT: movsd %xmm1, %xmm0
-; SSE2-NEXT: retq
+; SSE2-LABEL: test2:
+; SSE2: # BB#0:
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE2-NEXT: retq
;
-; SSE41-LABEL: test2
+; SSE41-LABEL: test2:
; SSE41: # BB#0:
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; SSE41-NEXT: retq
@@ -31,13 +31,13 @@ define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) {
}
define <4 x float> @test3(<4 x float> %A, <4 x float> %B) {
-; SSE2-LABEL: test3
-; SSE2: # BB#0:
-; SSE2-NEXT: movsd %xmm0, %xmm1
-; SSE2-NEXT: movaps %xmm1, %xmm0
-; SSE2-NEXT: retq
+; SSE2-LABEL: test3:
+; SSE2: # BB#0:
+; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: retq
;
-; SSE41-LABEL: test3
+; SSE41-LABEL: test3:
; SSE41: # BB#0:
; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
; SSE41-NEXT: retq
@@ -46,12 +46,12 @@ define <4 x float> @test3(<4 x float> %A, <4 x float> %B) {
}
define <4 x float> @test4(<4 x float> %A, <4 x float> %B) {
-; SSE2-LABEL: test4
-; SSE2: # BB#0:
-; SSE2-NEXT: movsd %xmm1, %xmm0
-; SSE2-NEXT: retq
+; SSE2-LABEL: test4:
+; SSE2: # BB#0:
+; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE2-NEXT: retq
;
-; SSE41-LABEL: test4
+; SSE41-LABEL: test4:
; SSE41: # BB#0:
; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; SSE41-NEXT: retq
diff --git a/test/CodeGen/X86/vselect-avx.ll b/test/CodeGen/X86/vselect-avx.ll
index 0c0f4bbf992a..de04a097de02 100644
--- a/test/CodeGen/X86/vselect-avx.ll
+++ b/test/CodeGen/X86/vselect-avx.ll
@@ -14,8 +14,8 @@ target triple = "x86_64-apple-macosx"
; <rdar://problem/18675020>
; CHECK-LABEL: test:
-; CHECK: vmovdqa {{.*#+}} xmm0 = [65535,0,0,65535]
-; CHECK: vmovdqa {{.*#+}} xmm2 = [65533,124,125,14807]
+; CHECK: vmovdqa {{.*#+}} xmm1 = [65533,124,125,14807]
+; CHECK: vmovdqa {{.*#+}} xmm1 = [65535,0,0,65535]
; CHECK: ret
define void @test(<4 x i16>* %a, <4 x i16>* %b) {
body:
@@ -33,17 +33,18 @@ body:
; of the condition.
;
; CHECK-LABEL: test2:
-; CHECK: vpslld $31, %xmm0, %xmm0
-; CHECK-NEXT: vpmovsxdq %xmm0, %xmm1
-; CHECK-NEXT: vpshufd $78, %xmm0, %xmm0 ## xmm0 = xmm0[2,3,0,1]
-; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, [[MASK:%ymm[0-9]+]]
-; CHECK: vblendvpd [[MASK]]
-; CHECK: retq
+; CHECK: vpslld $31, %xmm0, %xmm0
+; CHECK-NEXT: vpsrad $31, %xmm0, %xmm0
+; CHECK-NEXT: vpmovsxdq %xmm0, %xmm1
+; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, [[MASK:%ymm[0-9]+]]
+; CHECK: vblendvpd [[MASK]]
+; CHECK: retq
define void @test2(double** %call1559, i64 %indvars.iv4198, <4 x i1> %tmp1895) {
bb:
- %arrayidx1928 = getelementptr inbounds double** %call1559, i64 %indvars.iv4198
- %tmp1888 = load double** %arrayidx1928, align 8
+ %arrayidx1928 = getelementptr inbounds double*, double** %call1559, i64 %indvars.iv4198
+ %tmp1888 = load double*, double** %arrayidx1928, align 8
%predphi.v.v = select <4 x i1> %tmp1895, <4 x double> <double -5.000000e-01, double -5.000000e-01, double -5.000000e-01, double -5.000000e-01>, <4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
%tmp1900 = bitcast double* %tmp1888 to <4 x double>*
store <4 x double> %predphi.v.v, <4 x double>* %tmp1900, align 8
@@ -59,19 +60,15 @@ bb:
;
; <rdar://problem/18819506>
-; Note: For now, hard code ORIG_MASK and SHRUNK_MASK registers, because we
-; cannot express that ORIG_MASK must not be equal to ORIG_MASK. Otherwise,
-; even a faulty pattern would pass!
-;
; CHECK-LABEL: test3:
-; Compute the original mask.
-; CHECK: vpcmpeqd {{%xmm[0-9]+}}, {{%xmm[0-9]+}}, [[ORIG_MASK:%xmm0]]
-; Shrink the bit of the mask.
-; CHECK-NEXT: vpslld $31, [[ORIG_MASK]], [[SHRUNK_MASK:%xmm3]]
-; Use the shrunk mask in the blend.
-; CHECK-NEXT: vblendvps [[SHRUNK_MASK]], %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
-; Use the original mask in the and.
-; CHECK-NEXT: vpand LCPI2_2(%rip), [[ORIG_MASK]], {{%xmm[0-9]+}}
+; Compute the mask.
+; CHECK: vpcmpeqd {{%xmm[0-9]+}}, {{%xmm[0-9]+}}, [[MASK:%xmm[0-9]+]]
+; Do not shrink the bit of the mask.
+; CHECK-NOT: vpslld $31, [[MASK]], {{%xmm[0-9]+}}
+; Use the mask in the blend.
+; CHECK-NEXT: vblendvps [[MASK]], %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
+; Use the mask in the and.
+; CHECK-NEXT: vpand LCPI2_2(%rip), [[MASK]], {{%xmm[0-9]+}}
; CHECK: retq
define void @test3(<4 x i32> %induction30, <4 x i16>* %tmp16, <4 x i16>* %tmp17, <4 x i16> %tmp3, <4 x i16> %tmp12) {
%tmp6 = srem <4 x i32> %induction30, <i32 3, i32 3, i32 3, i32 3>
@@ -83,3 +80,14 @@ define void @test3(<4 x i32> %induction30, <4 x i16>* %tmp16, <4 x i16>* %tmp17,
store <4 x i16> %predphi, <4 x i16>* %tmp17, align 8
ret void
}
+
+; We shouldn't try to lower this directly using VSELECT because we don't have
+; vpblendvb in AVX1, only in AVX2. Instead, it should be expanded.
+;
+; CHECK-LABEL: PR22706:
+; CHECK: vpcmpgtb
+; CHECK: vpcmpgtb
+define <32 x i8> @PR22706(<32 x i1> %x) {
+ %tmp = select <32 x i1> %x, <32 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <32 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+ ret <32 x i8> %tmp
+}
diff --git a/test/CodeGen/X86/vselect-minmax.ll b/test/CodeGen/X86/vselect-minmax.ll
index 3efe5684c15b..5ed687f50576 100644
--- a/test/CodeGen/X86/vselect-minmax.ll
+++ b/test/CodeGen/X86/vselect-minmax.ll
@@ -11,12 +11,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <16 x i8>*
%ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>* %ptr.b, align 2
+ %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+ %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
%cmp = icmp slt <16 x i8> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -46,12 +46,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <16 x i8>*
%ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>* %ptr.b, align 2
+ %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+ %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
%cmp = icmp sle <16 x i8> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -81,12 +81,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <16 x i8>*
%ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>* %ptr.b, align 2
+ %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+ %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
%cmp = icmp sgt <16 x i8> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -116,12 +116,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <16 x i8>*
%ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>* %ptr.b, align 2
+ %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+ %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
%cmp = icmp sge <16 x i8> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -151,12 +151,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <16 x i8>*
%ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>* %ptr.b, align 2
+ %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+ %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
%cmp = icmp ult <16 x i8> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -186,12 +186,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <16 x i8>*
%ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>* %ptr.b, align 2
+ %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+ %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
%cmp = icmp ule <16 x i8> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -221,12 +221,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <16 x i8>*
%ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>* %ptr.b, align 2
+ %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+ %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
%cmp = icmp ugt <16 x i8> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -256,12 +256,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <16 x i8>*
%ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>* %ptr.b, align 2
+ %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+ %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
%cmp = icmp uge <16 x i8> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -291,12 +291,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <8 x i16>*
%ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>* %ptr.b, align 2
+ %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+ %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
%cmp = icmp slt <8 x i16> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -326,12 +326,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <8 x i16>*
%ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>* %ptr.b, align 2
+ %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+ %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
%cmp = icmp sle <8 x i16> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -361,12 +361,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <8 x i16>*
%ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>* %ptr.b, align 2
+ %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+ %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
%cmp = icmp sgt <8 x i16> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -396,12 +396,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <8 x i16>*
%ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>* %ptr.b, align 2
+ %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+ %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
%cmp = icmp sge <8 x i16> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -431,12 +431,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <8 x i16>*
%ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>* %ptr.b, align 2
+ %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+ %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
%cmp = icmp ult <8 x i16> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -466,12 +466,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <8 x i16>*
%ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>* %ptr.b, align 2
+ %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+ %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
%cmp = icmp ule <8 x i16> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -501,12 +501,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <8 x i16>*
%ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>* %ptr.b, align 2
+ %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+ %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
%cmp = icmp ugt <8 x i16> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -536,12 +536,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <8 x i16>*
%ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>* %ptr.b, align 2
+ %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+ %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
%cmp = icmp uge <8 x i16> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -571,12 +571,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i32>*
%ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>* %ptr.b, align 2
+ %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+ %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
%cmp = icmp slt <4 x i32> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -606,12 +606,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i32>*
%ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>* %ptr.b, align 2
+ %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+ %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
%cmp = icmp sle <4 x i32> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -641,12 +641,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i32>*
%ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>* %ptr.b, align 2
+ %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+ %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
%cmp = icmp sgt <4 x i32> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -676,12 +676,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i32>*
%ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>* %ptr.b, align 2
+ %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+ %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
%cmp = icmp sge <4 x i32> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -711,12 +711,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i32>*
%ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>* %ptr.b, align 2
+ %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+ %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
%cmp = icmp ult <4 x i32> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -746,12 +746,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i32>*
%ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>* %ptr.b, align 2
+ %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+ %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
%cmp = icmp ule <4 x i32> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -781,12 +781,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i32>*
%ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>* %ptr.b, align 2
+ %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+ %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
%cmp = icmp ugt <4 x i32> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -816,12 +816,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i32>*
%ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>* %ptr.b, align 2
+ %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+ %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
%cmp = icmp uge <4 x i32> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -851,12 +851,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <32 x i8>*
%ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>* %ptr.b, align 2
+ %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+ %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
%cmp = icmp slt <32 x i8> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -880,12 +880,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <32 x i8>*
%ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>* %ptr.b, align 2
+ %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+ %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
%cmp = icmp sle <32 x i8> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -909,12 +909,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <32 x i8>*
%ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>* %ptr.b, align 2
+ %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+ %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
%cmp = icmp sgt <32 x i8> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -938,12 +938,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <32 x i8>*
%ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>* %ptr.b, align 2
+ %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+ %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
%cmp = icmp sge <32 x i8> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -967,12 +967,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <32 x i8>*
%ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>* %ptr.b, align 2
+ %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+ %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
%cmp = icmp ult <32 x i8> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -996,12 +996,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <32 x i8>*
%ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>* %ptr.b, align 2
+ %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+ %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
%cmp = icmp ule <32 x i8> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -1025,12 +1025,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <32 x i8>*
%ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>* %ptr.b, align 2
+ %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+ %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
%cmp = icmp ugt <32 x i8> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -1054,12 +1054,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <32 x i8>*
%ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>* %ptr.b, align 2
+ %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+ %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
%cmp = icmp uge <32 x i8> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -1083,12 +1083,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <16 x i16>*
%ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>* %ptr.b, align 2
+ %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+ %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
%cmp = icmp slt <16 x i16> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -1112,12 +1112,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <16 x i16>*
%ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>* %ptr.b, align 2
+ %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+ %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
%cmp = icmp sle <16 x i16> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -1141,12 +1141,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <16 x i16>*
%ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>* %ptr.b, align 2
+ %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+ %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
%cmp = icmp sgt <16 x i16> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -1170,12 +1170,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <16 x i16>*
%ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>* %ptr.b, align 2
+ %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+ %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
%cmp = icmp sge <16 x i16> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -1199,12 +1199,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <16 x i16>*
%ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>* %ptr.b, align 2
+ %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+ %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
%cmp = icmp ult <16 x i16> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -1228,12 +1228,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <16 x i16>*
%ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>* %ptr.b, align 2
+ %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+ %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
%cmp = icmp ule <16 x i16> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -1257,12 +1257,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <16 x i16>*
%ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>* %ptr.b, align 2
+ %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+ %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
%cmp = icmp ugt <16 x i16> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -1286,12 +1286,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <16 x i16>*
%ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>* %ptr.b, align 2
+ %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+ %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
%cmp = icmp uge <16 x i16> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -1315,12 +1315,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i32>*
%ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>* %ptr.b, align 2
+ %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+ %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
%cmp = icmp slt <8 x i32> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -1344,12 +1344,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i32>*
%ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>* %ptr.b, align 2
+ %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+ %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
%cmp = icmp sle <8 x i32> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -1373,12 +1373,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i32>*
%ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>* %ptr.b, align 2
+ %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+ %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
%cmp = icmp sgt <8 x i32> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -1402,12 +1402,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i32>*
%ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>* %ptr.b, align 2
+ %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+ %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
%cmp = icmp sge <8 x i32> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -1431,12 +1431,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i32>*
%ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>* %ptr.b, align 2
+ %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+ %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
%cmp = icmp ult <8 x i32> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -1460,12 +1460,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i32>*
%ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>* %ptr.b, align 2
+ %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+ %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
%cmp = icmp ule <8 x i32> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -1489,12 +1489,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i32>*
%ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>* %ptr.b, align 2
+ %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+ %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
%cmp = icmp ugt <8 x i32> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -1518,12 +1518,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i32>*
%ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>* %ptr.b, align 2
+ %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+ %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
%cmp = icmp uge <8 x i32> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -1547,12 +1547,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <16 x i8>*
%ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>* %ptr.b, align 2
+ %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+ %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
%cmp = icmp slt <16 x i8> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -1582,12 +1582,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <16 x i8>*
%ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>* %ptr.b, align 2
+ %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+ %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
%cmp = icmp sle <16 x i8> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -1617,12 +1617,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <16 x i8>*
%ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>* %ptr.b, align 2
+ %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+ %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
%cmp = icmp sgt <16 x i8> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -1652,12 +1652,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <16 x i8>*
%ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>* %ptr.b, align 2
+ %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+ %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
%cmp = icmp sge <16 x i8> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -1687,12 +1687,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <16 x i8>*
%ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>* %ptr.b, align 2
+ %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+ %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
%cmp = icmp ult <16 x i8> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -1722,12 +1722,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <16 x i8>*
%ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>* %ptr.b, align 2
+ %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+ %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
%cmp = icmp ule <16 x i8> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -1757,12 +1757,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <16 x i8>*
%ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>* %ptr.b, align 2
+ %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+ %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
%cmp = icmp ugt <16 x i8> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -1792,12 +1792,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <16 x i8>*
%ptr.b = bitcast i8* %gep.b to <16 x i8>*
- %load.a = load <16 x i8>* %ptr.a, align 2
- %load.b = load <16 x i8>* %ptr.b, align 2
+ %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+ %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
%cmp = icmp uge <16 x i8> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -1827,12 +1827,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <8 x i16>*
%ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>* %ptr.b, align 2
+ %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+ %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
%cmp = icmp slt <8 x i16> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -1862,12 +1862,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <8 x i16>*
%ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>* %ptr.b, align 2
+ %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+ %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
%cmp = icmp sle <8 x i16> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -1897,12 +1897,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <8 x i16>*
%ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>* %ptr.b, align 2
+ %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+ %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
%cmp = icmp sgt <8 x i16> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -1932,12 +1932,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <8 x i16>*
%ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>* %ptr.b, align 2
+ %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+ %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
%cmp = icmp sge <8 x i16> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -1967,12 +1967,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <8 x i16>*
%ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>* %ptr.b, align 2
+ %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+ %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
%cmp = icmp ult <8 x i16> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -2002,12 +2002,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <8 x i16>*
%ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>* %ptr.b, align 2
+ %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+ %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
%cmp = icmp ule <8 x i16> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -2037,12 +2037,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <8 x i16>*
%ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>* %ptr.b, align 2
+ %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+ %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
%cmp = icmp ugt <8 x i16> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -2072,12 +2072,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <8 x i16>*
%ptr.b = bitcast i16* %gep.b to <8 x i16>*
- %load.a = load <8 x i16>* %ptr.a, align 2
- %load.b = load <8 x i16>* %ptr.b, align 2
+ %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+ %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
%cmp = icmp uge <8 x i16> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -2107,12 +2107,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i32>*
%ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>* %ptr.b, align 2
+ %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+ %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
%cmp = icmp slt <4 x i32> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -2142,12 +2142,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i32>*
%ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>* %ptr.b, align 2
+ %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+ %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
%cmp = icmp sle <4 x i32> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -2177,12 +2177,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i32>*
%ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>* %ptr.b, align 2
+ %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+ %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
%cmp = icmp sgt <4 x i32> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -2212,12 +2212,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i32>*
%ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>* %ptr.b, align 2
+ %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+ %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
%cmp = icmp sge <4 x i32> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -2247,12 +2247,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i32>*
%ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>* %ptr.b, align 2
+ %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+ %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
%cmp = icmp ult <4 x i32> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -2282,12 +2282,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i32>*
%ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>* %ptr.b, align 2
+ %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+ %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
%cmp = icmp ule <4 x i32> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -2317,12 +2317,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i32>*
%ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>* %ptr.b, align 2
+ %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+ %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
%cmp = icmp ugt <4 x i32> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -2352,12 +2352,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i32>*
%ptr.b = bitcast i32* %gep.b to <4 x i32>*
- %load.a = load <4 x i32>* %ptr.a, align 2
- %load.b = load <4 x i32>* %ptr.b, align 2
+ %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+ %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
%cmp = icmp uge <4 x i32> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -2387,12 +2387,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <32 x i8>*
%ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>* %ptr.b, align 2
+ %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+ %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
%cmp = icmp slt <32 x i8> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -2416,12 +2416,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <32 x i8>*
%ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>* %ptr.b, align 2
+ %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+ %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
%cmp = icmp sle <32 x i8> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -2445,12 +2445,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <32 x i8>*
%ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>* %ptr.b, align 2
+ %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+ %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
%cmp = icmp sgt <32 x i8> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -2474,12 +2474,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <32 x i8>*
%ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>* %ptr.b, align 2
+ %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+ %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
%cmp = icmp sge <32 x i8> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -2503,12 +2503,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <32 x i8>*
%ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>* %ptr.b, align 2
+ %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+ %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
%cmp = icmp ult <32 x i8> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -2532,12 +2532,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <32 x i8>*
%ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>* %ptr.b, align 2
+ %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+ %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
%cmp = icmp ule <32 x i8> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -2561,12 +2561,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <32 x i8>*
%ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>* %ptr.b, align 2
+ %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+ %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
%cmp = icmp ugt <32 x i8> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -2590,12 +2590,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <32 x i8>*
%ptr.b = bitcast i8* %gep.b to <32 x i8>*
- %load.a = load <32 x i8>* %ptr.a, align 2
- %load.b = load <32 x i8>* %ptr.b, align 2
+ %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+ %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
%cmp = icmp uge <32 x i8> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -2619,12 +2619,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <16 x i16>*
%ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>* %ptr.b, align 2
+ %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+ %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
%cmp = icmp slt <16 x i16> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -2648,12 +2648,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <16 x i16>*
%ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>* %ptr.b, align 2
+ %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+ %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
%cmp = icmp sle <16 x i16> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -2677,12 +2677,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <16 x i16>*
%ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>* %ptr.b, align 2
+ %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+ %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
%cmp = icmp sgt <16 x i16> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -2706,12 +2706,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <16 x i16>*
%ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>* %ptr.b, align 2
+ %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+ %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
%cmp = icmp sge <16 x i16> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -2735,12 +2735,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <16 x i16>*
%ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>* %ptr.b, align 2
+ %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+ %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
%cmp = icmp ult <16 x i16> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -2764,12 +2764,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <16 x i16>*
%ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>* %ptr.b, align 2
+ %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+ %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
%cmp = icmp ule <16 x i16> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -2793,12 +2793,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <16 x i16>*
%ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>* %ptr.b, align 2
+ %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+ %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
%cmp = icmp ugt <16 x i16> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -2822,12 +2822,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <16 x i16>*
%ptr.b = bitcast i16* %gep.b to <16 x i16>*
- %load.a = load <16 x i16>* %ptr.a, align 2
- %load.b = load <16 x i16>* %ptr.b, align 2
+ %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+ %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
%cmp = icmp uge <16 x i16> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -2851,12 +2851,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i32>*
%ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>* %ptr.b, align 2
+ %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+ %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
%cmp = icmp slt <8 x i32> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -2880,12 +2880,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i32>*
%ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>* %ptr.b, align 2
+ %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+ %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
%cmp = icmp sle <8 x i32> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -2909,12 +2909,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i32>*
%ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>* %ptr.b, align 2
+ %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+ %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
%cmp = icmp sgt <8 x i32> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -2938,12 +2938,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i32>*
%ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>* %ptr.b, align 2
+ %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+ %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
%cmp = icmp sge <8 x i32> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -2967,12 +2967,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i32>*
%ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>* %ptr.b, align 2
+ %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+ %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
%cmp = icmp ult <8 x i32> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -2996,12 +2996,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i32>*
%ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>* %ptr.b, align 2
+ %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+ %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
%cmp = icmp ule <8 x i32> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -3025,12 +3025,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i32>*
%ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>* %ptr.b, align 2
+ %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+ %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
%cmp = icmp ugt <8 x i32> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -3054,12 +3054,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i32>*
%ptr.b = bitcast i32* %gep.b to <8 x i32>*
- %load.a = load <8 x i32>* %ptr.a, align 2
- %load.b = load <8 x i32>* %ptr.b, align 2
+ %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+ %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
%cmp = icmp uge <8 x i32> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -3085,12 +3085,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <64 x i8>*
%ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>* %ptr.b, align 2
+ %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+ %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
%cmp = icmp slt <64 x i8> %load.a, %load.b
%sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3111,12 +3111,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <64 x i8>*
%ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>* %ptr.b, align 2
+ %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+ %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
%cmp = icmp sle <64 x i8> %load.a, %load.b
%sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3137,12 +3137,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <64 x i8>*
%ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>* %ptr.b, align 2
+ %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+ %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
%cmp = icmp sgt <64 x i8> %load.a, %load.b
%sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3163,12 +3163,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <64 x i8>*
%ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>* %ptr.b, align 2
+ %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+ %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
%cmp = icmp sge <64 x i8> %load.a, %load.b
%sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3189,12 +3189,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <64 x i8>*
%ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>* %ptr.b, align 2
+ %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+ %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
%cmp = icmp ult <64 x i8> %load.a, %load.b
%sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3215,12 +3215,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <64 x i8>*
%ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>* %ptr.b, align 2
+ %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+ %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
%cmp = icmp ule <64 x i8> %load.a, %load.b
%sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3241,12 +3241,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <64 x i8>*
%ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>* %ptr.b, align 2
+ %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+ %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
%cmp = icmp ugt <64 x i8> %load.a, %load.b
%sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3267,12 +3267,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <64 x i8>*
%ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>* %ptr.b, align 2
+ %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+ %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
%cmp = icmp uge <64 x i8> %load.a, %load.b
%sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3293,12 +3293,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <32 x i16>*
%ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>* %ptr.b, align 2
+ %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+ %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
%cmp = icmp slt <32 x i16> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -3319,12 +3319,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <32 x i16>*
%ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>* %ptr.b, align 2
+ %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+ %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
%cmp = icmp sle <32 x i16> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -3345,12 +3345,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <32 x i16>*
%ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>* %ptr.b, align 2
+ %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+ %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
%cmp = icmp sgt <32 x i16> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -3371,12 +3371,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <32 x i16>*
%ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>* %ptr.b, align 2
+ %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+ %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
%cmp = icmp sge <32 x i16> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -3397,12 +3397,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <32 x i16>*
%ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>* %ptr.b, align 2
+ %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+ %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
%cmp = icmp ult <32 x i16> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -3423,12 +3423,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <32 x i16>*
%ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>* %ptr.b, align 2
+ %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+ %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
%cmp = icmp ule <32 x i16> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -3449,12 +3449,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <32 x i16>*
%ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>* %ptr.b, align 2
+ %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+ %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
%cmp = icmp ugt <32 x i16> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -3475,12 +3475,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <32 x i16>*
%ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>* %ptr.b, align 2
+ %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+ %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
%cmp = icmp uge <32 x i16> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -3501,12 +3501,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <16 x i32>*
%ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>* %ptr.b, align 2
+ %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+ %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
%cmp = icmp slt <16 x i32> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -3527,12 +3527,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <16 x i32>*
%ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>* %ptr.b, align 2
+ %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+ %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
%cmp = icmp sle <16 x i32> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -3553,12 +3553,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <16 x i32>*
%ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>* %ptr.b, align 2
+ %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+ %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
%cmp = icmp sgt <16 x i32> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -3579,12 +3579,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <16 x i32>*
%ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>* %ptr.b, align 2
+ %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+ %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
%cmp = icmp sge <16 x i32> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -3605,12 +3605,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <16 x i32>*
%ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>* %ptr.b, align 2
+ %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+ %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
%cmp = icmp ult <16 x i32> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -3631,12 +3631,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <16 x i32>*
%ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>* %ptr.b, align 2
+ %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+ %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
%cmp = icmp ule <16 x i32> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -3657,12 +3657,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <16 x i32>*
%ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>* %ptr.b, align 2
+ %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+ %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
%cmp = icmp ugt <16 x i32> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -3683,12 +3683,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <16 x i32>*
%ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>* %ptr.b, align 2
+ %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+ %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
%cmp = icmp uge <16 x i32> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -3709,12 +3709,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i64>*
%ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>* %ptr.b, align 2
+ %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+ %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
%cmp = icmp slt <8 x i64> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -3735,12 +3735,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i64>*
%ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>* %ptr.b, align 2
+ %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+ %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
%cmp = icmp sle <8 x i64> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -3761,12 +3761,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i64>*
%ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>* %ptr.b, align 2
+ %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+ %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
%cmp = icmp sgt <8 x i64> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -3787,12 +3787,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i64>*
%ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>* %ptr.b, align 2
+ %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+ %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
%cmp = icmp sge <8 x i64> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -3813,12 +3813,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i64>*
%ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>* %ptr.b, align 2
+ %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+ %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
%cmp = icmp ult <8 x i64> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -3839,12 +3839,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i64>*
%ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>* %ptr.b, align 2
+ %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+ %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
%cmp = icmp ule <8 x i64> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -3865,12 +3865,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i64>*
%ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>* %ptr.b, align 2
+ %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+ %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
%cmp = icmp ugt <8 x i64> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -3891,12 +3891,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i64>*
%ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>* %ptr.b, align 2
+ %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+ %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
%cmp = icmp uge <8 x i64> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -3917,12 +3917,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <64 x i8>*
%ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>* %ptr.b, align 2
+ %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+ %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
%cmp = icmp slt <64 x i8> %load.a, %load.b
%sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3943,12 +3943,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <64 x i8>*
%ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>* %ptr.b, align 2
+ %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+ %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
%cmp = icmp sle <64 x i8> %load.a, %load.b
%sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3969,12 +3969,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <64 x i8>*
%ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>* %ptr.b, align 2
+ %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+ %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
%cmp = icmp sgt <64 x i8> %load.a, %load.b
%sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3995,12 +3995,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <64 x i8>*
%ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>* %ptr.b, align 2
+ %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+ %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
%cmp = icmp sge <64 x i8> %load.a, %load.b
%sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -4021,12 +4021,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <64 x i8>*
%ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>* %ptr.b, align 2
+ %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+ %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
%cmp = icmp ult <64 x i8> %load.a, %load.b
%sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -4047,12 +4047,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <64 x i8>*
%ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>* %ptr.b, align 2
+ %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+ %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
%cmp = icmp ule <64 x i8> %load.a, %load.b
%sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -4073,12 +4073,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <64 x i8>*
%ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>* %ptr.b, align 2
+ %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+ %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
%cmp = icmp ugt <64 x i8> %load.a, %load.b
%sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -4099,12 +4099,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i8* %a, i64 %index
- %gep.b = getelementptr inbounds i8* %b, i64 %index
+ %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
+ %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
%ptr.a = bitcast i8* %gep.a to <64 x i8>*
%ptr.b = bitcast i8* %gep.b to <64 x i8>*
- %load.a = load <64 x i8>* %ptr.a, align 2
- %load.b = load <64 x i8>* %ptr.b, align 2
+ %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+ %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
%cmp = icmp uge <64 x i8> %load.a, %load.b
%sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -4125,12 +4125,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <32 x i16>*
%ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>* %ptr.b, align 2
+ %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+ %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
%cmp = icmp slt <32 x i16> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -4151,12 +4151,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <32 x i16>*
%ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>* %ptr.b, align 2
+ %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+ %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
%cmp = icmp sle <32 x i16> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -4177,12 +4177,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <32 x i16>*
%ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>* %ptr.b, align 2
+ %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+ %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
%cmp = icmp sgt <32 x i16> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -4203,12 +4203,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <32 x i16>*
%ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>* %ptr.b, align 2
+ %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+ %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
%cmp = icmp sge <32 x i16> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -4229,12 +4229,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <32 x i16>*
%ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>* %ptr.b, align 2
+ %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+ %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
%cmp = icmp ult <32 x i16> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -4255,12 +4255,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <32 x i16>*
%ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>* %ptr.b, align 2
+ %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+ %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
%cmp = icmp ule <32 x i16> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -4281,12 +4281,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <32 x i16>*
%ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>* %ptr.b, align 2
+ %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+ %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
%cmp = icmp ugt <32 x i16> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -4307,12 +4307,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i16* %a, i64 %index
- %gep.b = getelementptr inbounds i16* %b, i64 %index
+ %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
+ %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
%ptr.a = bitcast i16* %gep.a to <32 x i16>*
%ptr.b = bitcast i16* %gep.b to <32 x i16>*
- %load.a = load <32 x i16>* %ptr.a, align 2
- %load.b = load <32 x i16>* %ptr.b, align 2
+ %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+ %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
%cmp = icmp uge <32 x i16> %load.a, %load.b
%sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -4333,12 +4333,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <16 x i32>*
%ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>* %ptr.b, align 2
+ %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+ %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
%cmp = icmp slt <16 x i32> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -4359,12 +4359,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <16 x i32>*
%ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>* %ptr.b, align 2
+ %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+ %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
%cmp = icmp sle <16 x i32> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -4385,12 +4385,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <16 x i32>*
%ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>* %ptr.b, align 2
+ %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+ %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
%cmp = icmp sgt <16 x i32> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -4411,12 +4411,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <16 x i32>*
%ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>* %ptr.b, align 2
+ %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+ %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
%cmp = icmp sge <16 x i32> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -4437,12 +4437,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <16 x i32>*
%ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>* %ptr.b, align 2
+ %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+ %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
%cmp = icmp ult <16 x i32> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -4463,12 +4463,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <16 x i32>*
%ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>* %ptr.b, align 2
+ %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+ %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
%cmp = icmp ule <16 x i32> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -4489,12 +4489,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <16 x i32>*
%ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>* %ptr.b, align 2
+ %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+ %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
%cmp = icmp ugt <16 x i32> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -4515,12 +4515,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <16 x i32>*
%ptr.b = bitcast i32* %gep.b to <16 x i32>*
- %load.a = load <16 x i32>* %ptr.a, align 2
- %load.b = load <16 x i32>* %ptr.b, align 2
+ %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+ %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
%cmp = icmp uge <16 x i32> %load.a, %load.b
%sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -4543,12 +4543,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i64>*
%ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>* %ptr.b, align 2
+ %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+ %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
%cmp = icmp slt <8 x i64> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -4569,12 +4569,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i64>*
%ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>* %ptr.b, align 2
+ %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+ %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
%cmp = icmp sle <8 x i64> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -4595,12 +4595,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i64>*
%ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>* %ptr.b, align 2
+ %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+ %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
%cmp = icmp sgt <8 x i64> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -4621,12 +4621,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i64>*
%ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>* %ptr.b, align 2
+ %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+ %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
%cmp = icmp sge <8 x i64> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -4647,12 +4647,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i64>*
%ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>* %ptr.b, align 2
+ %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+ %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
%cmp = icmp ult <8 x i64> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -4673,12 +4673,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i64>*
%ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>* %ptr.b, align 2
+ %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+ %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
%cmp = icmp ule <8 x i64> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -4699,12 +4699,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i64>*
%ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>* %ptr.b, align 2
+ %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+ %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
%cmp = icmp ugt <8 x i64> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -4725,12 +4725,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <8 x i64>*
%ptr.b = bitcast i32* %gep.b to <8 x i64>*
- %load.a = load <8 x i64>* %ptr.a, align 2
- %load.b = load <8 x i64>* %ptr.b, align 2
+ %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+ %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
%cmp = icmp uge <8 x i64> %load.a, %load.b
%sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -4751,12 +4751,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i64>*
%ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>* %ptr.b, align 2
+ %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+ %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
%cmp = icmp slt <4 x i64> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -4777,12 +4777,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i64>*
%ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>* %ptr.b, align 2
+ %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+ %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
%cmp = icmp sle <4 x i64> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -4803,12 +4803,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i64>*
%ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>* %ptr.b, align 2
+ %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+ %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
%cmp = icmp sgt <4 x i64> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -4829,12 +4829,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i64>*
%ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>* %ptr.b, align 2
+ %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+ %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
%cmp = icmp sge <4 x i64> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -4855,12 +4855,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i64>*
%ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>* %ptr.b, align 2
+ %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+ %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
%cmp = icmp ult <4 x i64> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -4881,12 +4881,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i64>*
%ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>* %ptr.b, align 2
+ %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+ %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
%cmp = icmp ule <4 x i64> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -4907,12 +4907,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i64>*
%ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>* %ptr.b, align 2
+ %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+ %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
%cmp = icmp ugt <4 x i64> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -4933,12 +4933,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i64>*
%ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>* %ptr.b, align 2
+ %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+ %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
%cmp = icmp uge <4 x i64> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -4959,12 +4959,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i64>*
%ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>* %ptr.b, align 2
+ %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+ %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
%cmp = icmp slt <4 x i64> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -4985,12 +4985,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i64>*
%ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>* %ptr.b, align 2
+ %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+ %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
%cmp = icmp sle <4 x i64> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -5011,12 +5011,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i64>*
%ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>* %ptr.b, align 2
+ %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+ %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
%cmp = icmp sgt <4 x i64> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -5037,12 +5037,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i64>*
%ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>* %ptr.b, align 2
+ %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+ %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
%cmp = icmp sge <4 x i64> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -5063,12 +5063,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i64>*
%ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>* %ptr.b, align 2
+ %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+ %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
%cmp = icmp ult <4 x i64> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -5089,12 +5089,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i64>*
%ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>* %ptr.b, align 2
+ %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+ %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
%cmp = icmp ule <4 x i64> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -5115,12 +5115,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i64>*
%ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>* %ptr.b, align 2
+ %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+ %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
%cmp = icmp ugt <4 x i64> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -5141,12 +5141,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <4 x i64>*
%ptr.b = bitcast i32* %gep.b to <4 x i64>*
- %load.a = load <4 x i64>* %ptr.a, align 2
- %load.b = load <4 x i64>* %ptr.b, align 2
+ %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+ %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
%cmp = icmp uge <4 x i64> %load.a, %load.b
%sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -5167,12 +5167,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <2 x i64>*
%ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>* %ptr.b, align 2
+ %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+ %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
%cmp = icmp slt <2 x i64> %load.a, %load.b
%sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5193,12 +5193,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <2 x i64>*
%ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>* %ptr.b, align 2
+ %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+ %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
%cmp = icmp sle <2 x i64> %load.a, %load.b
%sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5219,12 +5219,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <2 x i64>*
%ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>* %ptr.b, align 2
+ %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+ %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
%cmp = icmp sgt <2 x i64> %load.a, %load.b
%sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5245,12 +5245,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <2 x i64>*
%ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>* %ptr.b, align 2
+ %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+ %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
%cmp = icmp sge <2 x i64> %load.a, %load.b
%sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5271,12 +5271,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <2 x i64>*
%ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>* %ptr.b, align 2
+ %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+ %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
%cmp = icmp ult <2 x i64> %load.a, %load.b
%sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5297,12 +5297,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <2 x i64>*
%ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>* %ptr.b, align 2
+ %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+ %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
%cmp = icmp ule <2 x i64> %load.a, %load.b
%sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5323,12 +5323,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <2 x i64>*
%ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>* %ptr.b, align 2
+ %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+ %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
%cmp = icmp ugt <2 x i64> %load.a, %load.b
%sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5349,12 +5349,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <2 x i64>*
%ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>* %ptr.b, align 2
+ %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+ %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
%cmp = icmp uge <2 x i64> %load.a, %load.b
%sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5375,12 +5375,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <2 x i64>*
%ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>* %ptr.b, align 2
+ %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+ %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
%cmp = icmp slt <2 x i64> %load.a, %load.b
%sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5401,12 +5401,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <2 x i64>*
%ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>* %ptr.b, align 2
+ %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+ %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
%cmp = icmp sle <2 x i64> %load.a, %load.b
%sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5427,12 +5427,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <2 x i64>*
%ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>* %ptr.b, align 2
+ %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+ %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
%cmp = icmp sgt <2 x i64> %load.a, %load.b
%sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5453,12 +5453,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <2 x i64>*
%ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>* %ptr.b, align 2
+ %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+ %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
%cmp = icmp sge <2 x i64> %load.a, %load.b
%sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5479,12 +5479,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <2 x i64>*
%ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>* %ptr.b, align 2
+ %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+ %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
%cmp = icmp ult <2 x i64> %load.a, %load.b
%sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5505,12 +5505,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <2 x i64>*
%ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>* %ptr.b, align 2
+ %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+ %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
%cmp = icmp ule <2 x i64> %load.a, %load.b
%sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5531,12 +5531,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <2 x i64>*
%ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>* %ptr.b, align 2
+ %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+ %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
%cmp = icmp ugt <2 x i64> %load.a, %load.b
%sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5557,12 +5557,12 @@ vector.ph:
vector.body: ; preds = %vector.body, %vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %gep.a = getelementptr inbounds i32* %a, i64 %index
- %gep.b = getelementptr inbounds i32* %b, i64 %index
+ %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
+ %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
%ptr.a = bitcast i32* %gep.a to <2 x i64>*
%ptr.b = bitcast i32* %gep.b to <2 x i64>*
- %load.a = load <2 x i64>* %ptr.a, align 2
- %load.b = load <2 x i64>* %ptr.b, align 2
+ %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+ %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
%cmp = icmp uge <2 x i64> %load.a, %load.b
%sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
diff --git a/test/CodeGen/X86/vselect.ll b/test/CodeGen/X86/vselect.ll
index 3bd1dc4cb972..8c8092888834 100644
--- a/test/CodeGen/X86/vselect.ll
+++ b/test/CodeGen/X86/vselect.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=corei7 -mattr=-sse4.1 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2,-sse4.1 < %s | FileCheck %s
; Verify that we don't emit packed vector shifts instructions if the
; condition used by the vector select is a vector of constants.
@@ -6,9 +6,8 @@
define <4 x float> @test1(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: test1:
; CHECK: # BB#0:
-; CHECK-NEXT: andps {{.*}}(%rip), %xmm1
-; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
-; CHECK-NEXT: orps %xmm1, %xmm0
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
; CHECK-NEXT: retq
%1 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %a, <4 x float> %b
ret <4 x float> %1
@@ -17,8 +16,8 @@ define <4 x float> @test1(<4 x float> %a, <4 x float> %b) {
define <4 x float> @test2(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: test2:
; CHECK: # BB#0:
-; CHECK-NEXT: movsd %xmm0, %xmm1
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x float> %a, <4 x float> %b
ret <4 x float> %1
@@ -27,7 +26,7 @@ define <4 x float> @test2(<4 x float> %a, <4 x float> %b) {
define <4 x float> @test3(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: test3:
; CHECK: # BB#0:
-; CHECK-NEXT: movsd %xmm1, %xmm0
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; CHECK-NEXT: retq
%1 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
ret <4 x float> %1
@@ -53,10 +52,6 @@ define <4 x float> @test5(<4 x float> %a, <4 x float> %b) {
define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test6:
; CHECK: # BB#0:
-; CHECK-NEXT: movaps {{.*#+}} xmm1 = [0,65535,0,65535,0,65535,0,65535]
-; CHECK-NEXT: andps %xmm0, %xmm1
-; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
-; CHECK-NEXT: orps %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i16> %a, <8 x i16> %a
ret <8 x i16> %1
@@ -65,9 +60,8 @@ define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @test7(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test7:
; CHECK: # BB#0:
-; CHECK-NEXT: andps {{.*}}(%rip), %xmm1
-; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
-; CHECK-NEXT: orps %xmm1, %xmm0
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %a, <8 x i16> %b
ret <8 x i16> %1
@@ -76,9 +70,7 @@ define <8 x i16> @test7(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @test8(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test8:
; CHECK: # BB#0:
-; CHECK-NEXT: andps {{.*}}(%rip), %xmm1
-; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
-; CHECK-NEXT: orps %xmm1, %xmm0
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; CHECK-NEXT: retq
%1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %a, <8 x i16> %b
ret <8 x i16> %1
@@ -104,7 +96,7 @@ define <8 x i16> @test10(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @test11(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test11:
; CHECK: # BB#0:
-; CHECK-NEXT: movaps {{.*#+}} xmm2 = <0,65535,65535,0,u,65535,65535,u>
+; CHECK-NEXT: movaps {{.*#+}} xmm2 = [0,65535,65535,0,65535,65535,65535,65535]
; CHECK-NEXT: andps %xmm2, %xmm0
; CHECK-NEXT: andnps %xmm1, %xmm2
; CHECK-NEXT: orps %xmm2, %xmm0
@@ -170,7 +162,7 @@ define <8 x i16> @test17(<8 x i16> %a, <8 x i16> %b) {
define <4 x float> @test18(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: test18:
; CHECK: # BB#0:
-; CHECK-NEXT: movss %xmm1, %xmm0
+; CHECK-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; CHECK-NEXT: retq
%1 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
ret <4 x float> %1
@@ -179,7 +171,7 @@ define <4 x float> @test18(<4 x float> %a, <4 x float> %b) {
define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test19:
; CHECK: # BB#0:
-; CHECK-NEXT: movss %xmm1, %xmm0
+; CHECK-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; CHECK-NEXT: retq
%1 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> %a, <4 x i32> %b
ret <4 x i32> %1
@@ -188,7 +180,7 @@ define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
define <2 x double> @test20(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: test20:
; CHECK: # BB#0:
-; CHECK-NEXT: movsd %xmm1, %xmm0
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; CHECK-NEXT: retq
%1 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %b
ret <2 x double> %1
@@ -197,7 +189,7 @@ define <2 x double> @test20(<2 x double> %a, <2 x double> %b) {
define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test21:
; CHECK: # BB#0:
-; CHECK-NEXT: movsd %xmm1, %xmm0
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; CHECK-NEXT: retq
%1 = select <2 x i1> <i1 false, i1 true>, <2 x i64> %a, <2 x i64> %b
ret <2 x i64> %1
@@ -206,7 +198,7 @@ define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
define <4 x float> @test22(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: test22:
; CHECK: # BB#0:
-; CHECK-NEXT: movss %xmm0, %xmm1
+; CHECK-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %a, <4 x float> %b
@@ -216,7 +208,7 @@ define <4 x float> @test22(<4 x float> %a, <4 x float> %b) {
define <4 x i32> @test23(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test23:
; CHECK: # BB#0:
-; CHECK-NEXT: movss %xmm0, %xmm1
+; CHECK-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> %a, <4 x i32> %b
@@ -226,8 +218,8 @@ define <4 x i32> @test23(<4 x i32> %a, <4 x i32> %b) {
define <2 x double> @test24(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: test24:
; CHECK: # BB#0:
-; CHECK-NEXT: movsd %xmm0, %xmm1
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %a, <2 x double> %b
ret <2 x double> %1
@@ -236,8 +228,8 @@ define <2 x double> @test24(<2 x double> %a, <2 x double> %b) {
define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test25:
; CHECK: # BB#0:
-; CHECK-NEXT: movsd %xmm0, %xmm1
-; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = select <2 x i1> <i1 true, i1 false>, <2 x i64> %a, <2 x i64> %b
ret <2 x i64> %1
@@ -276,6 +268,7 @@ define <16 x double> @select_illegal(<16 x double> %a, <16 x double> %b) {
; CHECK-NEXT: movaps %xmm2, 32(%rdi)
; CHECK-NEXT: movaps %xmm1, 16(%rdi)
; CHECK-NEXT: movaps %xmm0, (%rdi)
+; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: retq
%sel = select <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x double> %a, <16 x double> %b
ret <16 x double> %sel
diff --git a/test/CodeGen/X86/vshift-4.ll b/test/CodeGen/X86/vshift-4.ll
index cda9bc893a2d..4ab5db889820 100644
--- a/test/CodeGen/X86/vshift-4.ll
+++ b/test/CodeGen/X86/vshift-4.ll
@@ -13,11 +13,16 @@ entry:
ret void
}
-; shift1b can't use a packed shift
+; shift1b can't use a packed shift but can shift lanes separately and shuffle back together
define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
entry:
; CHECK-LABEL: shift1b:
-; CHECK: shll
+; CHECK: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
+; CHECK-NEXT: movdqa %xmm0, %xmm3
+; CHECK-NEXT: psllq %xmm2, %xmm3
+; CHECK-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
+; CHECK-NEXT: psllq %xmm1, %xmm0
+; CHECK-NEXT: movsd {{.*#+}} xmm3 = xmm0[0],xmm3[1]
%shamt = shufflevector <2 x i64> %sh, <2 x i64> undef, <2 x i32> <i32 0, i32 1>
%shl = shl <2 x i64> %val, %shamt
store <2 x i64> %shl, <2 x i64>* %dst
diff --git a/test/CodeGen/X86/vshift-5.ll b/test/CodeGen/X86/vshift-5.ll
index 562e520c5528..a6ae8d54bef1 100644
--- a/test/CodeGen/X86/vshift-5.ll
+++ b/test/CodeGen/X86/vshift-5.ll
@@ -7,7 +7,7 @@ entry:
; CHECK-LABEL: shift5a:
; CHECK: movd
; CHECK: pslld
- %amt = load i32* %pamt
+ %amt = load i32, i32* %pamt
%tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
%shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
%shl = shl <4 x i32> %val, %shamt
@@ -21,7 +21,7 @@ entry:
; CHECK-LABEL: shift5b:
; CHECK: movd
; CHECK: psrad
- %amt = load i32* %pamt
+ %amt = load i32, i32* %pamt
%tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
%shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
%shr = ashr <4 x i32> %val, %shamt
diff --git a/test/CodeGen/X86/vshift-6.ll b/test/CodeGen/X86/vshift-6.ll
index f50d9a6bb124..551a13850039 100644
--- a/test/CodeGen/X86/vshift-6.ll
+++ b/test/CodeGen/X86/vshift-6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=corei7 -march=x86-64 -mattr=+sse2 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | FileCheck %s
; This test makes sure that the compiler does not crash with an
; assertion failure when trying to fold a vector shift left
@@ -25,7 +25,7 @@
define <16 x i8> @do_not_crash(i8*, i32*, i64*, i32, i64, i8) {
entry:
store i8 %5, i8* %0
- %L5 = load i8* %0
+ %L5 = load i8, i8* %0
%I8 = insertelement <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, i8 %L5, i32 7
%B51 = shl <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, %I8
ret <16 x i8> %B51
diff --git a/test/CodeGen/X86/warn-stack.ll b/test/CodeGen/X86/warn-stack.ll
index a76fd2801a1f..aa09ad8066fe 100644
--- a/test/CodeGen/X86/warn-stack.ll
+++ b/test/CodeGen/X86/warn-stack.ll
@@ -7,7 +7,7 @@
define void @nowarn() nounwind ssp {
entry:
%buffer = alloca [12 x i8], align 1
- %arraydecay = getelementptr inbounds [12 x i8]* %buffer, i64 0, i64 0
+ %arraydecay = getelementptr inbounds [12 x i8], [12 x i8]* %buffer, i64 0, i64 0
call void @doit(i8* %arraydecay) nounwind
ret void
}
@@ -16,7 +16,7 @@ entry:
define void @warn() nounwind ssp {
entry:
%buffer = alloca [80 x i8], align 1
- %arraydecay = getelementptr inbounds [80 x i8]* %buffer, i64 0, i64 0
+ %arraydecay = getelementptr inbounds [80 x i8], [80 x i8]* %buffer, i64 0, i64 0
call void @doit(i8* %arraydecay) nounwind
ret void
}
diff --git a/test/CodeGen/X86/weak_def_can_be_hidden.ll b/test/CodeGen/X86/weak_def_can_be_hidden.ll
index b17f372afed8..8e6d34c89d88 100644
--- a/test/CodeGen/X86/weak_def_can_be_hidden.ll
+++ b/test/CodeGen/X86/weak_def_can_be_hidden.ll
@@ -12,7 +12,7 @@
; CHECK-D89: .weak_definition _v1
define i32 @f1() {
- %x = load i32 * @v1
+ %x = load i32 , i32 * @v1
ret i32 %x
}
@@ -46,6 +46,6 @@ define i32* @f3() {
; CHECK-D89: .weak_definition _v4
define i32 @f4() {
- %x = load i32 * @v4
+ %x = load i32 , i32 * @v4
ret i32 %x
}
diff --git a/test/CodeGen/X86/widen_arith-1.ll b/test/CodeGen/X86/widen_arith-1.ll
index 6041356e6ac7..5663b8b40946 100644
--- a/test/CodeGen/X86/widen_arith-1.ll
+++ b/test/CodeGen/X86/widen_arith-1.ll
@@ -16,25 +16,25 @@ entry:
br label %forcond
forcond: ; preds = %forinc, %entry
- %tmp = load i32* %i ; <i32> [#uses=1]
- %tmp1 = load i32* %n.addr ; <i32> [#uses=1]
+ %tmp = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* %n.addr ; <i32> [#uses=1]
%cmp = icmp slt i32 %tmp, %tmp1 ; <i1> [#uses=1]
br i1 %cmp, label %forbody, label %afterfor
forbody: ; preds = %forcond
- %tmp2 = load i32* %i ; <i32> [#uses=1]
- %tmp3 = load <3 x i8>** %dst.addr ; <<3 x i8>*> [#uses=1]
- %arrayidx = getelementptr <3 x i8>* %tmp3, i32 %tmp2 ; <<3 x i8>*> [#uses=1]
- %tmp4 = load i32* %i ; <i32> [#uses=1]
- %tmp5 = load <3 x i8>** %src.addr ; <<3 x i8>*> [#uses=1]
- %arrayidx6 = getelementptr <3 x i8>* %tmp5, i32 %tmp4 ; <<3 x i8>*> [#uses=1]
- %tmp7 = load <3 x i8>* %arrayidx6 ; <<3 x i8>> [#uses=1]
+ %tmp2 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp3 = load <3 x i8>*, <3 x i8>** %dst.addr ; <<3 x i8>*> [#uses=1]
+ %arrayidx = getelementptr <3 x i8>, <3 x i8>* %tmp3, i32 %tmp2 ; <<3 x i8>*> [#uses=1]
+ %tmp4 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp5 = load <3 x i8>*, <3 x i8>** %src.addr ; <<3 x i8>*> [#uses=1]
+ %arrayidx6 = getelementptr <3 x i8>, <3 x i8>* %tmp5, i32 %tmp4 ; <<3 x i8>*> [#uses=1]
+ %tmp7 = load <3 x i8>, <3 x i8>* %arrayidx6 ; <<3 x i8>> [#uses=1]
%add = add <3 x i8> %tmp7, < i8 1, i8 1, i8 1 > ; <<3 x i8>> [#uses=1]
store <3 x i8> %add, <3 x i8>* %arrayidx
br label %forinc
forinc: ; preds = %forbody
- %tmp8 = load i32* %i ; <i32> [#uses=1]
+ %tmp8 = load i32, i32* %i ; <i32> [#uses=1]
%inc = add i32 %tmp8, 1 ; <i32> [#uses=1]
store i32 %inc, i32* %i
br label %forcond
diff --git a/test/CodeGen/X86/widen_arith-2.ll b/test/CodeGen/X86/widen_arith-2.ll
index 1b81e9f889aa..6c219c1720ef 100644
--- a/test/CodeGen/X86/widen_arith-2.ll
+++ b/test/CodeGen/X86/widen_arith-2.ll
@@ -19,36 +19,36 @@ entry:
br label %forcond
forcond: ; preds = %forinc, %entry
- %tmp = load i32* %i ; <i32> [#uses=1]
- %tmp1 = load i32* %n.addr ; <i32> [#uses=1]
+ %tmp = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* %n.addr ; <i32> [#uses=1]
%cmp = icmp slt i32 %tmp, %tmp1 ; <i1> [#uses=1]
br i1 %cmp, label %forbody, label %afterfor
forbody: ; preds = %forcond
- %tmp2 = load i32* %i ; <i32> [#uses=1]
- %tmp3 = load i64** %dst_i.addr ; <i64*> [#uses=1]
- %arrayidx = getelementptr i64* %tmp3, i32 %tmp2 ; <i64*> [#uses=1]
+ %tmp2 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp3 = load i64*, i64** %dst_i.addr ; <i64*> [#uses=1]
+ %arrayidx = getelementptr i64, i64* %tmp3, i32 %tmp2 ; <i64*> [#uses=1]
%conv = bitcast i64* %arrayidx to <8 x i8>* ; <<8 x i8>*> [#uses=1]
store <8 x i8>* %conv, <8 x i8>** %dst
- %tmp4 = load i32* %i ; <i32> [#uses=1]
- %tmp5 = load i64** %src_i.addr ; <i64*> [#uses=1]
- %arrayidx6 = getelementptr i64* %tmp5, i32 %tmp4 ; <i64*> [#uses=1]
+ %tmp4 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp5 = load i64*, i64** %src_i.addr ; <i64*> [#uses=1]
+ %arrayidx6 = getelementptr i64, i64* %tmp5, i32 %tmp4 ; <i64*> [#uses=1]
%conv7 = bitcast i64* %arrayidx6 to <8 x i8>* ; <<8 x i8>*> [#uses=1]
store <8 x i8>* %conv7, <8 x i8>** %src
- %tmp8 = load i32* %i ; <i32> [#uses=1]
- %tmp9 = load <8 x i8>** %dst ; <<8 x i8>*> [#uses=1]
- %arrayidx10 = getelementptr <8 x i8>* %tmp9, i32 %tmp8 ; <<8 x i8>*> [#uses=1]
- %tmp11 = load i32* %i ; <i32> [#uses=1]
- %tmp12 = load <8 x i8>** %src ; <<8 x i8>*> [#uses=1]
- %arrayidx13 = getelementptr <8 x i8>* %tmp12, i32 %tmp11 ; <<8 x i8>*> [#uses=1]
- %tmp14 = load <8 x i8>* %arrayidx13 ; <<8 x i8>> [#uses=1]
+ %tmp8 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp9 = load <8 x i8>*, <8 x i8>** %dst ; <<8 x i8>*> [#uses=1]
+ %arrayidx10 = getelementptr <8 x i8>, <8 x i8>* %tmp9, i32 %tmp8 ; <<8 x i8>*> [#uses=1]
+ %tmp11 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp12 = load <8 x i8>*, <8 x i8>** %src ; <<8 x i8>*> [#uses=1]
+ %arrayidx13 = getelementptr <8 x i8>, <8 x i8>* %tmp12, i32 %tmp11 ; <<8 x i8>*> [#uses=1]
+ %tmp14 = load <8 x i8>, <8 x i8>* %arrayidx13 ; <<8 x i8>> [#uses=1]
%add = add <8 x i8> %tmp14, < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 > ; <<8 x i8>> [#uses=1]
%and = and <8 x i8> %add, < i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4 > ; <<8 x i8>> [#uses=1]
store <8 x i8> %and, <8 x i8>* %arrayidx10
br label %forinc
forinc: ; preds = %forbody
- %tmp15 = load i32* %i ; <i32> [#uses=1]
+ %tmp15 = load i32, i32* %i ; <i32> [#uses=1]
%inc = add i32 %tmp15, 1 ; <i32> [#uses=1]
store i32 %inc, i32* %i
br label %forcond
diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll
index d2b8e6ee9a75..aea7975a045c 100644
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -21,25 +21,25 @@ entry:
br label %forcond
forcond: ; preds = %forinc, %entry
- %tmp = load i32* %i ; <i32> [#uses=1]
- %tmp1 = load i32* %n.addr ; <i32> [#uses=1]
+ %tmp = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* %n.addr ; <i32> [#uses=1]
%cmp = icmp slt i32 %tmp, %tmp1 ; <i1> [#uses=1]
br i1 %cmp, label %forbody, label %afterfor
forbody: ; preds = %forcond
- %tmp2 = load i32* %i ; <i32> [#uses=1]
- %tmp3 = load <3 x i16>** %dst.addr ; <<3 x i16>*> [#uses=1]
- %arrayidx = getelementptr <3 x i16>* %tmp3, i32 %tmp2 ; <<3 x i16>*> [#uses=1]
- %tmp4 = load i32* %i ; <i32> [#uses=1]
- %tmp5 = load <3 x i16>** %src.addr ; <<3 x i16>*> [#uses=1]
- %arrayidx6 = getelementptr <3 x i16>* %tmp5, i32 %tmp4 ; <<3 x i16>*> [#uses=1]
- %tmp7 = load <3 x i16>* %arrayidx6 ; <<3 x i16>> [#uses=1]
+ %tmp2 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp3 = load <3 x i16>*, <3 x i16>** %dst.addr ; <<3 x i16>*> [#uses=1]
+ %arrayidx = getelementptr <3 x i16>, <3 x i16>* %tmp3, i32 %tmp2 ; <<3 x i16>*> [#uses=1]
+ %tmp4 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp5 = load <3 x i16>*, <3 x i16>** %src.addr ; <<3 x i16>*> [#uses=1]
+ %arrayidx6 = getelementptr <3 x i16>, <3 x i16>* %tmp5, i32 %tmp4 ; <<3 x i16>*> [#uses=1]
+ %tmp7 = load <3 x i16>, <3 x i16>* %arrayidx6 ; <<3 x i16>> [#uses=1]
%add = add <3 x i16> %tmp7, < i16 1, i16 1, i16 1 > ; <<3 x i16>> [#uses=1]
store <3 x i16> %add, <3 x i16>* %arrayidx
br label %forinc
forinc: ; preds = %forbody
- %tmp8 = load i32* %i ; <i32> [#uses=1]
+ %tmp8 = load i32, i32* %i ; <i32> [#uses=1]
%inc = add i32 %tmp8, 1 ; <i32> [#uses=1]
store i32 %inc, i32* %i
br label %forcond
diff --git a/test/CodeGen/X86/widen_arith-4.ll b/test/CodeGen/X86/widen_arith-4.ll
index 5207e1fa9d7b..5dba063d8af4 100644
--- a/test/CodeGen/X86/widen_arith-4.ll
+++ b/test/CodeGen/X86/widen_arith-4.ll
@@ -19,26 +19,26 @@ entry:
br label %forcond
forcond: ; preds = %forinc, %entry
- %tmp = load i32* %i ; <i32> [#uses=1]
- %tmp1 = load i32* %n.addr ; <i32> [#uses=1]
+ %tmp = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* %n.addr ; <i32> [#uses=1]
%cmp = icmp slt i32 %tmp, %tmp1 ; <i1> [#uses=1]
br i1 %cmp, label %forbody, label %afterfor
forbody: ; preds = %forcond
- %tmp2 = load i32* %i ; <i32> [#uses=1]
- %tmp3 = load <5 x i16>** %dst.addr ; <<5 x i16>*> [#uses=1]
- %arrayidx = getelementptr <5 x i16>* %tmp3, i32 %tmp2 ; <<5 x i16>*> [#uses=1]
- %tmp4 = load i32* %i ; <i32> [#uses=1]
- %tmp5 = load <5 x i16>** %src.addr ; <<5 x i16>*> [#uses=1]
- %arrayidx6 = getelementptr <5 x i16>* %tmp5, i32 %tmp4 ; <<5 x i16>*> [#uses=1]
- %tmp7 = load <5 x i16>* %arrayidx6 ; <<5 x i16>> [#uses=1]
+ %tmp2 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp3 = load <5 x i16>*, <5 x i16>** %dst.addr ; <<5 x i16>*> [#uses=1]
+ %arrayidx = getelementptr <5 x i16>, <5 x i16>* %tmp3, i32 %tmp2 ; <<5 x i16>*> [#uses=1]
+ %tmp4 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp5 = load <5 x i16>*, <5 x i16>** %src.addr ; <<5 x i16>*> [#uses=1]
+ %arrayidx6 = getelementptr <5 x i16>, <5 x i16>* %tmp5, i32 %tmp4 ; <<5 x i16>*> [#uses=1]
+ %tmp7 = load <5 x i16>, <5 x i16>* %arrayidx6 ; <<5 x i16>> [#uses=1]
%sub = sub <5 x i16> %tmp7, < i16 271, i16 271, i16 271, i16 271, i16 271 > ; <<5 x i16>> [#uses=1]
%mul = mul <5 x i16> %sub, < i16 2, i16 4, i16 2, i16 2, i16 2 > ; <<5 x i16>> [#uses=1]
store <5 x i16> %mul, <5 x i16>* %arrayidx
br label %forinc
forinc: ; preds = %forbody
- %tmp8 = load i32* %i ; <i32> [#uses=1]
+ %tmp8 = load i32, i32* %i ; <i32> [#uses=1]
%inc = add i32 %tmp8, 1 ; <i32> [#uses=1]
store i32 %inc, i32* %i
br label %forcond
diff --git a/test/CodeGen/X86/widen_arith-5.ll b/test/CodeGen/X86/widen_arith-5.ll
index 70b6a8a239ab..04c9ec2def2a 100644
--- a/test/CodeGen/X86/widen_arith-5.ll
+++ b/test/CodeGen/X86/widen_arith-5.ll
@@ -20,26 +20,26 @@ entry:
br label %forcond
forcond: ; preds = %forinc, %entry
- %tmp = load i32* %i ; <i32> [#uses=1]
- %tmp1 = load i32* %n.addr ; <i32> [#uses=1]
+ %tmp = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* %n.addr ; <i32> [#uses=1]
%cmp = icmp slt i32 %tmp, %tmp1 ; <i1> [#uses=1]
br i1 %cmp, label %forbody, label %afterfor
forbody: ; preds = %forcond
- %tmp2 = load i32* %i ; <i32> [#uses=1]
- %tmp3 = load <3 x i32>** %dst.addr ; <<3 x i32>*> [#uses=1]
- %arrayidx = getelementptr <3 x i32>* %tmp3, i32 %tmp2 ; <<3 x i32>*> [#uses=1]
- %tmp4 = load i32* %i ; <i32> [#uses=1]
- %tmp5 = load <3 x i32>** %src.addr ; <<3 x i32>*> [#uses=1]
- %arrayidx6 = getelementptr <3 x i32>* %tmp5, i32 %tmp4 ; <<3 x i32>*> [#uses=1]
- %tmp7 = load <3 x i32>* %arrayidx6 ; <<3 x i32>> [#uses=1]
+ %tmp2 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp3 = load <3 x i32>*, <3 x i32>** %dst.addr ; <<3 x i32>*> [#uses=1]
+ %arrayidx = getelementptr <3 x i32>, <3 x i32>* %tmp3, i32 %tmp2 ; <<3 x i32>*> [#uses=1]
+ %tmp4 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp5 = load <3 x i32>*, <3 x i32>** %src.addr ; <<3 x i32>*> [#uses=1]
+ %arrayidx6 = getelementptr <3 x i32>, <3 x i32>* %tmp5, i32 %tmp4 ; <<3 x i32>*> [#uses=1]
+ %tmp7 = load <3 x i32>, <3 x i32>* %arrayidx6 ; <<3 x i32>> [#uses=1]
%mul = mul <3 x i32> %tmp7, < i32 4, i32 4, i32 4 > ; <<3 x i32>> [#uses=1]
%sub = sub <3 x i32> %mul, < i32 3, i32 3, i32 3 > ; <<3 x i32>> [#uses=1]
store <3 x i32> %sub, <3 x i32>* %arrayidx
br label %forinc
forinc: ; preds = %forbody
- %tmp8 = load i32* %i ; <i32> [#uses=1]
+ %tmp8 = load i32, i32* %i ; <i32> [#uses=1]
%inc = add i32 %tmp8, 1 ; <i32> [#uses=1]
store i32 %inc, i32* %i
br label %forcond
diff --git a/test/CodeGen/X86/widen_arith-6.ll b/test/CodeGen/X86/widen_arith-6.ll
index 329048ad77b7..09998a027d25 100644
--- a/test/CodeGen/X86/widen_arith-6.ll
+++ b/test/CodeGen/X86/widen_arith-6.ll
@@ -19,27 +19,27 @@ entry:
br label %forcond
forcond: ; preds = %forinc, %entry
- %tmp = load i32* %i ; <i32> [#uses=1]
- %tmp1 = load i32* %n.addr ; <i32> [#uses=1]
+ %tmp = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* %n.addr ; <i32> [#uses=1]
%cmp = icmp slt i32 %tmp, %tmp1 ; <i1> [#uses=1]
br i1 %cmp, label %forbody, label %afterfor
forbody: ; preds = %forcond
- %tmp2 = load i32* %i ; <i32> [#uses=1]
- %tmp3 = load <3 x float>** %dst.addr ; <<3 x float>*> [#uses=1]
- %arrayidx = getelementptr <3 x float>* %tmp3, i32 %tmp2 ; <<3 x float>*> [#uses=1]
- %tmp4 = load i32* %i ; <i32> [#uses=1]
- %tmp5 = load <3 x float>** %src.addr ; <<3 x float>*> [#uses=1]
- %arrayidx6 = getelementptr <3 x float>* %tmp5, i32 %tmp4 ; <<3 x float>*> [#uses=1]
- %tmp7 = load <3 x float>* %arrayidx6 ; <<3 x float>> [#uses=1]
- %tmp8 = load <3 x float>* %v ; <<3 x float>> [#uses=1]
+ %tmp2 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp3 = load <3 x float>*, <3 x float>** %dst.addr ; <<3 x float>*> [#uses=1]
+ %arrayidx = getelementptr <3 x float>, <3 x float>* %tmp3, i32 %tmp2 ; <<3 x float>*> [#uses=1]
+ %tmp4 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp5 = load <3 x float>*, <3 x float>** %src.addr ; <<3 x float>*> [#uses=1]
+ %arrayidx6 = getelementptr <3 x float>, <3 x float>* %tmp5, i32 %tmp4 ; <<3 x float>*> [#uses=1]
+ %tmp7 = load <3 x float>, <3 x float>* %arrayidx6 ; <<3 x float>> [#uses=1]
+ %tmp8 = load <3 x float>, <3 x float>* %v ; <<3 x float>> [#uses=1]
%mul = fmul <3 x float> %tmp7, %tmp8 ; <<3 x float>> [#uses=1]
%add = fadd <3 x float> %mul, < float 0x409EE02900000000, float 0x409EE02900000000, float 0x409EE02900000000 > ; <<3 x float>> [#uses=1]
store <3 x float> %add, <3 x float>* %arrayidx
br label %forinc
forinc: ; preds = %forbody
- %tmp9 = load i32* %i ; <i32> [#uses=1]
+ %tmp9 = load i32, i32* %i ; <i32> [#uses=1]
%inc = add i32 %tmp9, 1 ; <i32> [#uses=1]
store i32 %inc, i32* %i
br label %forcond
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index e0b861f29de8..b0240ddb0433 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -3,12 +3,14 @@
; CHECK: movl
; CHECK: paddw
-; CHECK: movlpd
+; CHECK: movq
+
+; FIXME - if this test cares about scheduling, why isn't it being checked?
; Scheduler causes produce a different instruction order
; ATOM: movl
; ATOM: paddw
-; ATOM: movlpd
+; ATOM: movq
; bitcast a v4i16 to v2i32
@@ -23,25 +25,25 @@ entry:
br label %forcond
forcond: ; preds = %forinc, %entry
- %tmp = load i32* %i ; <i32> [#uses=1]
+ %tmp = load i32, i32* %i ; <i32> [#uses=1]
%cmp = icmp slt i32 %tmp, 4 ; <i1> [#uses=1]
br i1 %cmp, label %forbody, label %afterfor
forbody: ; preds = %forcond
- %tmp1 = load i32* %i ; <i32> [#uses=1]
- %tmp2 = load <2 x i32>** %dst.addr ; <<2 x i32>*> [#uses=1]
- %arrayidx = getelementptr <2 x i32>* %tmp2, i32 %tmp1 ; <<2 x i32>*> [#uses=1]
- %tmp3 = load i32* %i ; <i32> [#uses=1]
- %tmp4 = load <4 x i16>** %src.addr ; <<4 x i16>*> [#uses=1]
- %arrayidx5 = getelementptr <4 x i16>* %tmp4, i32 %tmp3 ; <<4 x i16>*> [#uses=1]
- %tmp6 = load <4 x i16>* %arrayidx5 ; <<4 x i16>> [#uses=1]
+ %tmp1 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp2 = load <2 x i32>*, <2 x i32>** %dst.addr ; <<2 x i32>*> [#uses=1]
+ %arrayidx = getelementptr <2 x i32>, <2 x i32>* %tmp2, i32 %tmp1 ; <<2 x i32>*> [#uses=1]
+ %tmp3 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp4 = load <4 x i16>*, <4 x i16>** %src.addr ; <<4 x i16>*> [#uses=1]
+ %arrayidx5 = getelementptr <4 x i16>, <4 x i16>* %tmp4, i32 %tmp3 ; <<4 x i16>*> [#uses=1]
+ %tmp6 = load <4 x i16>, <4 x i16>* %arrayidx5 ; <<4 x i16>> [#uses=1]
%add = add <4 x i16> %tmp6, < i16 1, i16 1, i16 1, i16 1 > ; <<4 x i16>> [#uses=1]
%conv = bitcast <4 x i16> %add to <2 x i32> ; <<2 x i32>> [#uses=1]
store <2 x i32> %conv, <2 x i32>* %arrayidx
br label %forinc
forinc: ; preds = %forbody
- %tmp7 = load i32* %i ; <i32> [#uses=1]
+ %tmp7 = load i32, i32* %i ; <i32> [#uses=1]
%inc = add i32 %tmp7, 1 ; <i32> [#uses=1]
store i32 %inc, i32* %i
br label %forcond
diff --git a/test/CodeGen/X86/widen_cast-2.ll b/test/CodeGen/X86/widen_cast-2.ll
index 40b42fbf1460..5a9acbd52f20 100644
--- a/test/CodeGen/X86/widen_cast-2.ll
+++ b/test/CodeGen/X86/widen_cast-2.ll
@@ -18,25 +18,25 @@ entry:
br label %forcond
forcond: ; preds = %forinc, %entry
- %tmp = load i32* %i ; <i32> [#uses=1]
+ %tmp = load i32, i32* %i ; <i32> [#uses=1]
%cmp = icmp slt i32 %tmp, 4 ; <i1> [#uses=1]
br i1 %cmp, label %forbody, label %afterfor
forbody: ; preds = %forcond
- %tmp1 = load i32* %i ; <i32> [#uses=1]
- %tmp2 = load <7 x i32>** %dst.addr ; <<2 x i32>*> [#uses=1]
- %arrayidx = getelementptr <7 x i32>* %tmp2, i32 %tmp1 ; <<7 x i32>*> [#uses=1]
- %tmp3 = load i32* %i ; <i32> [#uses=1]
- %tmp4 = load <14 x i16>** %src.addr ; <<4 x i16>*> [#uses=1]
- %arrayidx5 = getelementptr <14 x i16>* %tmp4, i32 %tmp3 ; <<4 x i16>*> [#uses=1]
- %tmp6 = load <14 x i16>* %arrayidx5 ; <<4 x i16>> [#uses=1]
+ %tmp1 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp2 = load <7 x i32>*, <7 x i32>** %dst.addr ; <<2 x i32>*> [#uses=1]
+ %arrayidx = getelementptr <7 x i32>, <7 x i32>* %tmp2, i32 %tmp1 ; <<7 x i32>*> [#uses=1]
+ %tmp3 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp4 = load <14 x i16>*, <14 x i16>** %src.addr ; <<4 x i16>*> [#uses=1]
+ %arrayidx5 = getelementptr <14 x i16>, <14 x i16>* %tmp4, i32 %tmp3 ; <<4 x i16>*> [#uses=1]
+ %tmp6 = load <14 x i16>, <14 x i16>* %arrayidx5 ; <<4 x i16>> [#uses=1]
%add = add <14 x i16> %tmp6, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 > ; <<4 x i16>> [#uses=1]
%conv = bitcast <14 x i16> %add to <7 x i32> ; <<7 x i32>> [#uses=1]
store <7 x i32> %conv, <7 x i32>* %arrayidx
br label %forinc
forinc: ; preds = %forbody
- %tmp7 = load i32* %i ; <i32> [#uses=1]
+ %tmp7 = load i32, i32* %i ; <i32> [#uses=1]
%inc = add i32 %tmp7, 1 ; <i32> [#uses=1]
store i32 %inc, i32* %i
br label %forcond
diff --git a/test/CodeGen/X86/widen_cast-4.ll b/test/CodeGen/X86/widen_cast-4.ll
index 19b84f19a4ff..8ed2785ae73a 100644
--- a/test/CodeGen/X86/widen_cast-4.ll
+++ b/test/CodeGen/X86/widen_cast-4.ll
@@ -18,29 +18,29 @@ entry:
br label %forcond
forcond: ; preds = %forinc, %entry
- %tmp = load i32* %i ; <i32> [#uses=1]
- %tmp1 = load i32* %n.addr ; <i32> [#uses=1]
+ %tmp = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp1 = load i32, i32* %n.addr ; <i32> [#uses=1]
%cmp = icmp slt i32 %tmp, %tmp1 ; <i1> [#uses=1]
br i1 %cmp, label %forbody, label %afterfor
forbody: ; preds = %forcond
- %tmp2 = load i32* %i ; <i32> [#uses=1]
- %tmp3 = load i64** %dst_i.addr ; <i64*> [#uses=1]
- %arrayidx = getelementptr i64* %tmp3, i32 %tmp2 ; <i64*> [#uses=1]
+ %tmp2 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp3 = load i64*, i64** %dst_i.addr ; <i64*> [#uses=1]
+ %arrayidx = getelementptr i64, i64* %tmp3, i32 %tmp2 ; <i64*> [#uses=1]
%conv = bitcast i64* %arrayidx to <8 x i8>* ; <<8 x i8>*> [#uses=1]
store <8 x i8>* %conv, <8 x i8>** %dst
- %tmp4 = load i32* %i ; <i32> [#uses=1]
- %tmp5 = load i64** %src_i.addr ; <i64*> [#uses=1]
- %arrayidx6 = getelementptr i64* %tmp5, i32 %tmp4 ; <i64*> [#uses=1]
+ %tmp4 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp5 = load i64*, i64** %src_i.addr ; <i64*> [#uses=1]
+ %arrayidx6 = getelementptr i64, i64* %tmp5, i32 %tmp4 ; <i64*> [#uses=1]
%conv7 = bitcast i64* %arrayidx6 to <8 x i8>* ; <<8 x i8>*> [#uses=1]
store <8 x i8>* %conv7, <8 x i8>** %src
- %tmp8 = load i32* %i ; <i32> [#uses=1]
- %tmp9 = load <8 x i8>** %dst ; <<8 x i8>*> [#uses=1]
- %arrayidx10 = getelementptr <8 x i8>* %tmp9, i32 %tmp8 ; <<8 x i8>*> [#uses=1]
- %tmp11 = load i32* %i ; <i32> [#uses=1]
- %tmp12 = load <8 x i8>** %src ; <<8 x i8>*> [#uses=1]
- %arrayidx13 = getelementptr <8 x i8>* %tmp12, i32 %tmp11 ; <<8 x i8>*> [#uses=1]
- %tmp14 = load <8 x i8>* %arrayidx13 ; <<8 x i8>> [#uses=1]
+ %tmp8 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp9 = load <8 x i8>*, <8 x i8>** %dst ; <<8 x i8>*> [#uses=1]
+ %arrayidx10 = getelementptr <8 x i8>, <8 x i8>* %tmp9, i32 %tmp8 ; <<8 x i8>*> [#uses=1]
+ %tmp11 = load i32, i32* %i ; <i32> [#uses=1]
+ %tmp12 = load <8 x i8>*, <8 x i8>** %src ; <<8 x i8>*> [#uses=1]
+ %arrayidx13 = getelementptr <8 x i8>, <8 x i8>* %tmp12, i32 %tmp11 ; <<8 x i8>*> [#uses=1]
+ %tmp14 = load <8 x i8>, <8 x i8>* %arrayidx13 ; <<8 x i8>> [#uses=1]
%add = add <8 x i8> %tmp14, < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 > ; <<8 x i8>> [#uses=1]
%shr = ashr <8 x i8> %add, < i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2 > ; <<8 x i8>> [#uses=1]
store <8 x i8> %shr, <8 x i8>* %arrayidx10
@@ -52,7 +52,7 @@ forbody: ; preds = %forcond
; CHECK-NEXT: psraw $8
; CHECK-NEXT: psraw $2
; CHECK-NEXT: pshufb
-; CHECK-NEXT: movlpd
+; CHECK-NEXT: movq
;
; FIXME: We shouldn't require both a movd and an insert.
; CHECK-WIDE: %forbody
@@ -67,7 +67,7 @@ forbody: ; preds = %forcond
; CHECK-WIDE-NEXT: movd
forinc: ; preds = %forbody
- %tmp15 = load i32* %i ; <i32> [#uses=1]
+ %tmp15 = load i32, i32* %i ; <i32> [#uses=1]
%inc = add i32 %tmp15, 1 ; <i32> [#uses=1]
store i32 %inc, i32* %i
br label %forcond
diff --git a/test/CodeGen/X86/widen_cast-5.ll b/test/CodeGen/X86/widen_cast-5.ll
index ccf0bd1d0b62..4e9d2dfdb5de 100644
--- a/test/CodeGen/X86/widen_cast-5.ll
+++ b/test/CodeGen/X86/widen_cast-5.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
; CHECK: movl
-; CHECK: movlpd
+; CHECK: movq
; bitcast a i64 to v2i32
define void @convert(<2 x i32>* %dst.addr, i64 %src) nounwind {
diff --git a/test/CodeGen/X86/widen_conversions.ll b/test/CodeGen/X86/widen_conversions.ll
index 8e5174fbe76e..dd75097a2515 100644
--- a/test/CodeGen/X86/widen_conversions.ll
+++ b/test/CodeGen/X86/widen_conversions.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=x86-64 -x86-experimental-vector-widening-legalization -x86-experimental-vector-shuffle-lowering | FileCheck %s
+; RUN: llc < %s -mcpu=x86-64 -x86-experimental-vector-widening-legalization | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
@@ -12,7 +12,7 @@ define <4 x i32> @zext_v4i8_to_v4i32(<4 x i8>* %ptr) {
; CHECK-NEXT: punpcklwd %[[Z]], %[[X]]
; CHECK-NEXT: ret
- %val = load <4 x i8>* %ptr
+ %val = load <4 x i8>, <4 x i8>* %ptr
%ext = zext <4 x i8> %val to <4 x i32>
ret <4 x i32> %ext
}
diff --git a/test/CodeGen/X86/widen_load-0.ll b/test/CodeGen/X86/widen_load-0.ll
index d5437281b274..edaaa77c1456 100644
--- a/test/CodeGen/X86/widen_load-0.ll
+++ b/test/CodeGen/X86/widen_load-0.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o - -mtriple=x86_64-linux -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -o - -mtriple=x86_64-linux | FileCheck %s
; PR4891
; Both loads should happen before either store.
@@ -10,8 +10,8 @@
define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
entry:
- %0 = load <2 x i16>* %b, align 2 ; <<2 x i16>> [#uses=1]
- %1 = load i32* %c, align 4 ; <i32> [#uses=1]
+ %0 = load <2 x i16>, <2 x i16>* %b, align 2 ; <<2 x i16>> [#uses=1]
+ %1 = load i32, i32* %c, align 4 ; <i32> [#uses=1]
%tmp1 = bitcast i32 %1 to <2 x i16> ; <<2 x i16>> [#uses=1]
store <2 x i16> %tmp1, <2 x i16>* %b, align 2
%tmp5 = bitcast <2 x i16> %0 to <1 x i32> ; <<1 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/widen_load-1.ll b/test/CodeGen/X86/widen_load-1.ll
index c59cc58f40fd..c670b45df747 100644
--- a/test/CodeGen/X86/widen_load-1.ll
+++ b/test/CodeGen/X86/widen_load-1.ll
@@ -24,14 +24,14 @@ define void @reset(<2 x float>* noalias %garbage1) {
"file complex.c, line 27, bb13": ; preds = %"file complex.c, line 27, bb1"
store i32 0, i32* %changed, align 4
- %r2 = getelementptr float* bitcast ([20 x i64]* @compl to float*), i64 32 ; <float*> [#uses=1]
+ %r2 = getelementptr float, float* bitcast ([20 x i64]* @compl to float*), i64 32 ; <float*> [#uses=1]
%r3 = bitcast float* %r2 to <2 x float>* ; <<2 x float>*> [#uses=1]
- %r4 = load <2 x float>* %r3, align 4 ; <<2 x float>> [#uses=1]
+ %r4 = load <2 x float>, <2 x float>* %r3, align 4 ; <<2 x float>> [#uses=1]
call void @killcommon(i32* %changed)
br label %"file complex.c, line 34, bb4"
"file complex.c, line 34, bb4": ; preds = %"file complex.c, line 27, bb13"
- %r5 = load i32* %changed, align 4 ; <i32> [#uses=1]
+ %r5 = load i32, i32* %changed, align 4 ; <i32> [#uses=1]
%r6 = icmp eq i32 %r5, 0 ; <i1> [#uses=1]
%r7 = zext i1 %r6 to i32 ; <i32> [#uses=1]
%r8 = icmp ne i32 %r7, 0 ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll
index 0ec3574d69eb..f5ddc0eacc61 100644
--- a/test/CodeGen/X86/widen_load-2.ll
+++ b/test/CodeGen/X86/widen_load-2.ll
@@ -10,8 +10,8 @@ define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
; CHECK-NEXT: paddd (%{{.*}}), %[[R0]]
; CHECK-NEXT: pextrd $2, %[[R0]], 8(%{{.*}})
; CHECK-NEXT: movq %[[R0]], (%{{.*}})
- %a = load %i32vec3* %ap, align 16
- %b = load %i32vec3* %bp, align 16
+ %a = load %i32vec3, %i32vec3* %ap, align 16
+ %b = load %i32vec3, %i32vec3* %bp, align 16
%x = add %i32vec3 %a, %b
store %i32vec3 %x, %i32vec3* %ret, align 16
ret void
@@ -26,8 +26,8 @@ define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
; CHECK-NEXT: paddd %[[R0]], %[[R1]]
; CHECK-NEXT: pextrd $2, %[[R1]], 8(%{{.*}})
; CHECK-NEXT: movq %[[R1]], (%{{.*}})
- %a = load %i32vec3* %ap, align 8
- %b = load %i32vec3* %bp, align 8
+ %a = load %i32vec3, %i32vec3* %ap, align 8
+ %b = load %i32vec3, %i32vec3* %bp, align 8
%x = add %i32vec3 %a, %b
store %i32vec3 %x, %i32vec3* %ret, align 8
ret void
@@ -43,8 +43,8 @@ define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) {
; CHECK-NEXT: pextrd $2, %[[R1]], 24(%{{.*}})
; CHECK-NEXT: movq %[[R1]], 16(%{{.*}})
; CHECK-NEXT: movdqa %[[R0]], (%{{.*}})
- %a = load %i32vec7* %ap, align 16
- %b = load %i32vec7* %bp, align 16
+ %a = load %i32vec7, %i32vec7* %ap, align 16
+ %b = load %i32vec7, %i32vec7* %bp, align 16
%x = add %i32vec7 %a, %b
store %i32vec7 %x, %i32vec7* %ret, align 16
ret void
@@ -62,8 +62,8 @@ define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) {
; CHECK-NEXT: movdqa %[[R2]], 32(%{{.*}})
; CHECK-NEXT: movdqa %[[R1]], 16(%{{.*}})
; CHECK-NEXT: movdqa %[[R0]], (%{{.*}})
- %a = load %i32vec12* %ap, align 16
- %b = load %i32vec12* %bp, align 16
+ %a = load %i32vec12, %i32vec12* %ap, align 16
+ %b = load %i32vec12, %i32vec12* %bp, align 16
%x = add %i32vec12 %a, %b
store %i32vec12 %x, %i32vec12* %ret, align 16
ret void
@@ -76,13 +76,12 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp
; CHECK: pmovzxwd (%{{.*}}), %[[R0:xmm[0-9]+]]
; CHECK-NEXT: pmovzxwd (%{{.*}}), %[[R1:xmm[0-9]+]]
; CHECK-NEXT: paddd %[[R0]], %[[R1]]
-; CHECK-NEXT: movdqa %[[R1]], %[[R0]]
-; CHECK-NEXT: pshufb {{.*}}, %[[R0]]
-; CHECK-NEXT: pmovzxdq %[[R0]], %[[R0]]
; CHECK-NEXT: pextrw $4, %[[R1]], 4(%{{.*}})
+; CHECK-NEXT: pshufb {{.*}}, %[[R1]]
+; CHECK-NEXT: pmovzxdq %[[R1]], %[[R0]]
; CHECK-NEXT: movd %[[R0]], (%{{.*}})
- %a = load %i16vec3* %ap, align 16
- %b = load %i16vec3* %bp, align 16
+ %a = load %i16vec3, %i16vec3* %ap, align 16
+ %b = load %i16vec3, %i16vec3* %bp, align 16
%x = add %i16vec3 %a, %b
store %i16vec3 %x, %i16vec3* %ret, align 16
ret void
@@ -95,8 +94,8 @@ define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp
; CHECK-NEXT: movq (%{{.*}}), %[[R1:xmm[0-9]+]]
; CHECK-NEXT: paddw %[[R0]], %[[R1]]
; CHECK-NEXT: movq %[[R1]], (%{{.*}})
- %a = load %i16vec4* %ap, align 16
- %b = load %i16vec4* %bp, align 16
+ %a = load %i16vec4, %i16vec4* %ap, align 16
+ %b = load %i16vec4, %i16vec4* %bp, align 16
%x = add %i16vec4 %a, %b
store %i16vec4 %x, %i16vec4* %ret, align 16
ret void
@@ -111,8 +110,8 @@ define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12*
; CHECK-NEXT: paddw 16(%{{.*}}), %[[R1]]
; CHECK-NEXT: movq %[[R1]], 16(%{{.*}})
; CHECK-NEXT: movdqa %[[R0]], (%{{.*}})
- %a = load %i16vec12* %ap, align 16
- %b = load %i16vec12* %bp, align 16
+ %a = load %i16vec12, %i16vec12* %ap, align 16
+ %b = load %i16vec12, %i16vec12* %bp, align 16
%x = add %i16vec12 %a, %b
store %i16vec12 %x, %i16vec12* %ret, align 16
ret void
@@ -130,8 +129,8 @@ define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18*
; CHECK-NEXT: movd %[[R2]], 32(%{{.*}})
; CHECK-NEXT: movdqa %[[R1]], 16(%{{.*}})
; CHECK-NEXT: movdqa %[[R0]], (%{{.*}})
- %a = load %i16vec18* %ap, align 16
- %b = load %i16vec18* %bp, align 16
+ %a = load %i16vec18, %i16vec18* %ap, align 16
+ %b = load %i16vec18, %i16vec18* %bp, align 16
%x = add %i16vec18 %a, %b
store %i16vec18 %x, %i16vec18* %ret, align 16
ret void
@@ -144,14 +143,13 @@ define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) no
; CHECK: pmovzxbd (%{{.*}}), %[[R0:xmm[0-9]+]]
; CHECK-NEXT: pmovzxbd (%{{.*}}), %[[R1:xmm[0-9]+]]
; CHECK-NEXT: paddd %[[R0]], %[[R1]]
-; CHECK-NEXT: movdqa %[[R1]], %[[R0]]
-; CHECK-NEXT: pshufb {{.*}}, %[[R0]]
-; CHECK-NEXT: pmovzxwq %[[R0]], %[[R0]]
; CHECK-NEXT: pextrb $8, %[[R1]], 2(%{{.*}})
+; CHECK-NEXT: pshufb {{.*}}, %[[R1]]
+; CHECK-NEXT: pmovzxwq %[[R1]], %[[R0]]
; CHECK-NEXT: movd %[[R0]], %e[[R2:[abcd]]]x
; CHECK-NEXT: movw %[[R2]]x, (%{{.*}})
- %a = load %i8vec3* %ap, align 16
- %b = load %i8vec3* %bp, align 16
+ %a = load %i8vec3, %i8vec3* %ap, align 16
+ %b = load %i8vec3, %i8vec3* %bp, align 16
%x = add %i8vec3 %a, %b
store %i8vec3 %x, %i8vec3* %ret, align 16
ret void
@@ -169,8 +167,8 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
; CHECK-NEXT: pextrd $2, %[[R1]], 24(%{{.*}})
; CHECK-NEXT: movq %[[R1]], 16(%{{.*}})
; CHECK-NEXT: movdqa %[[R0]], (%{{.*}})
- %a = load %i8vec31* %ap, align 16
- %b = load %i8vec31* %bp, align 16
+ %a = load %i8vec31, %i8vec31* %ap, align 16
+ %b = load %i8vec31, %i8vec31* %bp, align 16
%x = add %i8vec31 %a, %b
store %i8vec31 %x, %i8vec31* %ret, align 16
ret void
@@ -193,8 +191,9 @@ define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pa
; CHECK-NEXT: movd %[[CONSTANT1]], %e[[R1:[abcd]]]x
; CHECK-NEXT: movw %[[R1]]x, (%[[PTR1:.*]])
; CHECK-NEXT: movb $1, 2(%[[PTR1]])
-; CHECK-NEXT: pmovzxbd (%[[PTR0]]), %[[X0:xmm[0-9]+]]
-; CHECK-NEXT: pand {{.*}}, %[[X0]]
+; CHECK-NEXT: movl (%[[PTR0]]), [[TMP1:%e[abcd]+x]]
+; CHECK-NEXT: movl [[TMP1]], [[TMP2:.*]]
+; CHECK-NEXT: pmovzxbd [[TMP2]], %[[X0:xmm[0-9]+]]
; CHECK-NEXT: pextrd $1, %[[X0]], %e[[R0:[abcd]]]x
; CHECK-NEXT: shrl %e[[R0]]x
; CHECK-NEXT: movd %[[X0]], %e[[R1:[abcd]]]x
@@ -206,10 +205,9 @@ define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pa
; CHECK-NEXT: pinsrd $2, %e[[R0]]x, %[[X1]]
; CHECK-NEXT: pextrd $3, %[[X0]], %e[[R0:[abcd]]]x
; CHECK-NEXT: pinsrd $3, %e[[R0]]x, %[[X1]]
-; CHECK-NEXT: movdqa %[[X1]], %[[X2:xmm[0-9]+]]
-; CHECK-NEXT: pshufb %[[SHUFFLE_MASK]], %[[X2]]
-; CHECK-NEXT: pmovzxwq %[[X2]], %[[X3:xmm[0-9]+]]
; CHECK-NEXT: pextrb $8, %[[X1]], 2(%{{.*}})
+; CHECK-NEXT: pshufb %[[SHUFFLE_MASK]], %[[X1]]
+; CHECK-NEXT: pmovzxwq %[[X1]], %[[X3:xmm[0-9]+]]
; CHECK-NEXT: movd %[[X3]], %e[[R0:[abcd]]]x
; CHECK-NEXT: movw %[[R0]]x, (%{{.*}})
@@ -218,9 +216,9 @@ entry:
store <3 x i8> <i8 -98, i8 -98, i8 -98>, <3 x i8>* %storetmp
%storetmp1 = bitcast %i8vec3pack* %rot to <3 x i8>*
store <3 x i8> <i8 1, i8 1, i8 1>, <3 x i8>* %storetmp1
- %tmp = load %i8vec3pack* %X
+ %tmp = load %i8vec3pack, %i8vec3pack* %X
%extractVec = extractvalue %i8vec3pack %tmp, 0
- %tmp2 = load %i8vec3pack* %rot
+ %tmp2 = load %i8vec3pack, %i8vec3pack* %rot
%extractVec3 = extractvalue %i8vec3pack %tmp2, 0
%shr = lshr <3 x i8> %extractVec, %extractVec3
%storetmp4 = bitcast %i8vec3pack* %result to <3 x i8>*
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index 70fdbb7c9c82..302805213d06 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -82,9 +82,9 @@ define void @shuf5(<8 x i8>* %p) nounwind {
; CHECK-LABEL: shuf5:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movdqa {{.*#+}} xmm0 = <4,33,u,u,u,u,u,u>
-; CHECK-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; CHECK-NEXT: movlpd %xmm0, (%eax)
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [33,33,33,33,33,33,33,33]
+; CHECK-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; CHECK-NEXT: movq %xmm0, (%eax)
; CHECK-NEXT: retl
%v = shufflevector <2 x i8> <i8 4, i8 33>, <2 x i8> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
store <8 x i8> %v, <8 x i8>* %p, align 8
diff --git a/test/CodeGen/X86/win32-eh.ll b/test/CodeGen/X86/win32-eh.ll
new file mode 100644
index 000000000000..4d3c34ed6c17
--- /dev/null
+++ b/test/CodeGen/X86/win32-eh.ll
@@ -0,0 +1,91 @@
+; RUN: llc -mtriple=i686-pc-windows-msvc < %s | FileCheck %s
+
+declare void @may_throw_or_crash()
+declare i32 @_except_handler3(...)
+declare i32 @_except_handler4(...)
+declare i32 @__CxxFrameHandler3(...)
+declare void @llvm.eh.begincatch(i8*, i8*)
+declare void @llvm.eh.endcatch()
+
+define void @use_except_handler3() {
+ invoke void @may_throw_or_crash()
+ to label %cont unwind label %catchall
+cont:
+ ret void
+catchall:
+ landingpad { i8*, i32 } personality i32 (...)* @_except_handler3
+ catch i8* null
+ br label %cont
+}
+
+; CHECK-LABEL: _use_except_handler3:
+; CHECK: subl ${{[0-9]+}}, %esp
+; CHECK: movl $-1, 12(%esp)
+; CHECK: movl $L__ehtable$use_except_handler3, 8(%esp)
+; CHECK: movl $__except_handler3, 4(%esp)
+; CHECK: movl %fs:0, %[[next:[^ ,]*]]
+; CHECK: movl %[[next]], (%esp)
+; CHECK: leal (%esp), %[[node:[^ ,]*]]
+; CHECK: movl %[[node]], %fs:0
+; CHECK: calll _may_throw_or_crash
+; CHECK: movl (%esp), %[[next:[^ ,]*]]
+; CHECK: movl %[[next]], %fs:0
+; CHECK: retl
+
+define void @use_except_handler4() {
+ invoke void @may_throw_or_crash()
+ to label %cont unwind label %catchall
+cont:
+ ret void
+catchall:
+ landingpad { i8*, i32 } personality i32 (...)* @_except_handler4
+ catch i8* null
+ br label %cont
+}
+
+; CHECK-LABEL: _use_except_handler4:
+; CHECK: subl ${{[0-9]+}}, %esp
+; CHECK: movl %esp, (%esp)
+; CHECK: movl $-2, 20(%esp)
+; CHECK: movl $L__ehtable$use_except_handler4, 4(%esp)
+; CHECK: leal 8(%esp), %[[node:[^ ,]*]]
+; CHECK: movl $__except_handler4, 12(%esp)
+; CHECK: movl %fs:0, %[[next:[^ ,]*]]
+; CHECK: movl %[[next]], 8(%esp)
+; CHECK: movl %[[node]], %fs:0
+; CHECK: calll _may_throw_or_crash
+; CHECK: movl 8(%esp), %[[next:[^ ,]*]]
+; CHECK: movl %[[next]], %fs:0
+; CHECK: retl
+
+define void @use_CxxFrameHandler3() {
+ invoke void @may_throw_or_crash()
+ to label %cont unwind label %catchall
+cont:
+ ret void
+catchall:
+ %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__CxxFrameHandler3
+ catch i8* null
+ %ehptr = extractvalue { i8*, i32 } %ehvals, 0
+ call void @llvm.eh.begincatch(i8* %ehptr, i8* null)
+ call void @llvm.eh.endcatch()
+ br label %cont
+}
+
+; CHECK-LABEL: _use_CxxFrameHandler3:
+; CHECK: subl ${{[0-9]+}}, %esp
+; CHECK: movl %esp, (%esp)
+; CHECK: movl $-1, 12(%esp)
+; CHECK: leal 4(%esp), %[[node:[^ ,]*]]
+; CHECK: movl $___ehhandler$use_CxxFrameHandler3, 8(%esp)
+; CHECK: movl %fs:0, %[[next:[^ ,]*]]
+; CHECK: movl %[[next]], 4(%esp)
+; CHECK: movl %[[node]], %fs:0
+; CHECK: calll _may_throw_or_crash
+; CHECK: movl 4(%esp), %[[next:[^ ,]*]]
+; CHECK: movl %[[next]], %fs:0
+; CHECK: retl
+
+; CHECK-LABEL: ___ehhandler$use_CxxFrameHandler3:
+; CHECK: movl $L__ehtable$use_CxxFrameHandler3, %eax
+; CHECK: jmp ___CxxFrameHandler3 # TAILCALL
diff --git a/test/CodeGen/X86/win32_sret.ll b/test/CodeGen/X86/win32_sret.ll
index 8728712cece4..b38273ad9594 100644
--- a/test/CodeGen/X86/win32_sret.ll
+++ b/test/CodeGen/X86/win32_sret.ll
@@ -94,7 +94,7 @@ entry:
; LINUX-LABEL: sret4:
; LINUX: retl $4
- %x = getelementptr inbounds %struct.S4* %agg.result, i32 0, i32 0
+ %x = getelementptr inbounds %struct.S4, %struct.S4* %agg.result, i32 0, i32 0
store i32 42, i32* %x, align 4
ret void
}
@@ -106,8 +106,8 @@ define x86_thiscallcc void @"\01?foo@C5@@QAE?AUS5@@XZ"(%struct.S5* noalias sret
entry:
%this.addr = alloca %class.C5*, align 4
store %class.C5* %this, %class.C5** %this.addr, align 4
- %this1 = load %class.C5** %this.addr
- %x = getelementptr inbounds %struct.S5* %agg.result, i32 0, i32 0
+ %this1 = load %class.C5*, %class.C5** %this.addr
+ %x = getelementptr inbounds %struct.S5, %struct.S5* %agg.result, i32 0, i32 0
store i32 42, i32* %x, align 4
ret void
; WIN32-LABEL: {{^}}"?foo@C5@@QAE?AUS5@@XZ":
@@ -155,25 +155,25 @@ define void @test6_f(%struct.test6* %x) nounwind {
; LINUX-LABEL: test6_f:
; The %x argument is moved to %ecx. It will be the this pointer.
-; WIN32: movl 8(%ebp), %ecx
+; WIN32: movl 20(%esp), %ecx
; The %x argument is moved to (%esp). It will be the this pointer. With -O0
; we copy esp to ecx and use (ecx) instead of (esp).
-; MINGW_X86: movl 8(%ebp), %eax
+; MINGW_X86: movl 20(%esp), %eax
; MINGW_X86: movl %eax, (%e{{([a-d]x)|(sp)}})
-; CYGWIN: movl 8(%ebp), %eax
+; CYGWIN: movl 20(%esp), %eax
; CYGWIN: movl %eax, (%e{{([a-d]x)|(sp)}})
; The sret pointer is (%esp)
-; WIN32: leal 8(%esp), %[[REG:e[a-d]x]]
+; WIN32: leal 4(%esp), %[[REG:e[a-d]x]]
; WIN32-NEXT: movl %[[REG]], (%e{{([a-d]x)|(sp)}})
; The sret pointer is %ecx
-; MINGW_X86-NEXT: leal 8(%esp), %ecx
+; MINGW_X86-NEXT: leal 4(%esp), %ecx
; MINGW_X86-NEXT: calll _test6_g
-; CYGWIN-NEXT: leal 8(%esp), %ecx
+; CYGWIN-NEXT: leal 4(%esp), %ecx
; CYGWIN-NEXT: calll _test6_g
%tmp = alloca %struct.test6, align 4
@@ -191,16 +191,16 @@ define void @test7_f(%struct.test7* %x) nounwind {
; LINUX-LABEL: test7_f:
; The %x argument is moved to %ecx on all OSs. It will be the this pointer.
-; WIN32: movl 8(%ebp), %ecx
-; MINGW_X86: movl 8(%ebp), %ecx
-; CYGWIN: movl 8(%ebp), %ecx
+; WIN32: movl 20(%esp), %ecx
+; MINGW_X86: movl 20(%esp), %ecx
+; CYGWIN: movl 20(%esp), %ecx
; The sret pointer is (%esp)
-; WIN32: leal 8(%esp), %[[REG:e[a-d]x]]
+; WIN32: leal 4(%esp), %[[REG:e[a-d]x]]
; WIN32-NEXT: movl %[[REG]], (%e{{([a-d]x)|(sp)}})
-; MINGW_X86: leal 8(%esp), %[[REG:e[a-d]x]]
+; MINGW_X86: leal 4(%esp), %[[REG:e[a-d]x]]
; MINGW_X86-NEXT: movl %[[REG]], (%e{{([a-d]x)|(sp)}})
-; CYGWIN: leal 8(%esp), %[[REG:e[a-d]x]]
+; CYGWIN: leal 4(%esp), %[[REG:e[a-d]x]]
; CYGWIN-NEXT: movl %[[REG]], (%e{{([a-d]x)|(sp)}})
%tmp = alloca %struct.test7, align 4
@@ -209,9 +209,9 @@ define void @test7_f(%struct.test7* %x) nounwind {
}
define x86_thiscallcc void @test7_g(%struct.test7* %in, %struct.test7* sret %out) {
- %s = getelementptr %struct.test7* %in, i32 0, i32 0
- %d = getelementptr %struct.test7* %out, i32 0, i32 0
- %v = load i32* %s
+ %s = getelementptr %struct.test7, %struct.test7* %in, i32 0, i32 0
+ %d = getelementptr %struct.test7, %struct.test7* %out, i32 0, i32 0
+ %v = load i32, i32* %s
store i32 %v, i32* %d
call void @clobber_eax()
ret void
diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.ll b/test/CodeGen/X86/win64_alloca_dynalloca.ll
index aab2eea7ce44..abda2272c662 100644
--- a/test/CodeGen/X86/win64_alloca_dynalloca.ll
+++ b/test/CodeGen/X86/win64_alloca_dynalloca.ll
@@ -14,26 +14,24 @@ entry:
%buf0 = alloca i8, i64 4096, align 1
; ___chkstk_ms does not adjust %rsp.
-; M64: movq %rsp, %rbp
-; M64: $4096, %rax
+; M64: $4096, %eax
; M64: callq ___chkstk_ms
; M64: subq %rax, %rsp
+; M64: leaq 128(%rsp), %rbp
; __chkstk does not adjust %rsp.
-; W64: movq %rsp, %rbp
-; W64: $4096, %rax
+; W64: $4096, %eax
; W64: callq __chkstk
; W64: subq %rax, %rsp
+; W64: leaq 128(%rsp), %rbp
; Use %r11 for the large model.
-; L64: movq %rsp, %rbp
-; L64: $4096, %rax
+; L64: $4096, %eax
; L64: movabsq $__chkstk, %r11
; L64: callq *%r11
; L64: subq %rax, %rsp
; Freestanding
-; EFI: movq %rsp, %rbp
; EFI: $[[B0OFS:4096|4104]], %rsp
; EFI-NOT: call
@@ -68,12 +66,12 @@ entry:
; M64: subq $48, %rsp
; M64: movq %rax, 32(%rsp)
-; M64: leaq -4096(%rbp), %r9
+; M64: leaq -128(%rbp), %r9
; M64: callq bar
; W64: subq $48, %rsp
; W64: movq %rax, 32(%rsp)
-; W64: leaq -4096(%rbp), %r9
+; W64: leaq -128(%rbp), %r9
; W64: callq bar
; EFI: subq $48, %rsp
@@ -83,9 +81,9 @@ entry:
ret i64 %r
-; M64: movq %rbp, %rsp
+; M64: leaq 3968(%rbp), %rsp
-; W64: movq %rbp, %rsp
+; W64: leaq 3968(%rbp), %rsp
}
diff --git a/test/CodeGen/X86/win64_eh.ll b/test/CodeGen/X86/win64_eh.ll
index 4670087b9b4d..d668f43c895e 100644
--- a/test/CodeGen/X86/win64_eh.ll
+++ b/test/CodeGen/X86/win64_eh.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-windows-itanium | FileCheck %s -check-prefix=WIN64
-; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -O0 -mattr=sse2 -mtriple=x86_64-pc-windows-itanium | FileCheck %s -check-prefix=WIN64 -check-prefix=NORM
+; RUN: llc < %s -O0 -mattr=sse2 -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=WIN64 -check-prefix=NORM
+; RUN: llc < %s -O0 -mattr=sse2 -mtriple=x86_64-pc-mingw32 -mcpu=atom | FileCheck %s -check-prefix=WIN64 -check-prefix=ATOM
; Check function without prolog
define void @foo0() uwtable {
@@ -20,7 +21,8 @@ entry:
}
; WIN64-LABEL: foo1:
; WIN64: .seh_proc foo1
-; WIN64: subq $4000, %rsp
+; NORM: subq $4000, %rsp
+; ATOM: leaq -4000(%rsp), %rsp
; WIN64: .seh_stackalloc 4000
; WIN64: .seh_endprologue
; WIN64: addq $4000, %rsp
@@ -35,7 +37,7 @@ entry:
}
; WIN64-LABEL: foo2:
; WIN64: .seh_proc foo2
-; WIN64: movabsq $8000, %rax
+; WIN64: movl $8000, %eax
; WIN64: callq {{__chkstk|___chkstk_ms}}
; WIN64: subq %rax, %rsp
; WIN64: .seh_stackalloc 8000
@@ -60,21 +62,21 @@ entry:
store i32 %d_arg, i32* %d
store i32 %e_arg, i32* %e
store i32 %f_arg, i32* %f
- %tmp = load i32* %a
+ %tmp = load i32, i32* %a
%tmp1 = mul i32 %tmp, 2
- %tmp2 = load i32* %b
+ %tmp2 = load i32, i32* %b
%tmp3 = mul i32 %tmp2, 3
%tmp4 = add i32 %tmp1, %tmp3
- %tmp5 = load i32* %c
+ %tmp5 = load i32, i32* %c
%tmp6 = mul i32 %tmp5, 5
%tmp7 = add i32 %tmp4, %tmp6
- %tmp8 = load i32* %d
+ %tmp8 = load i32, i32* %d
%tmp9 = mul i32 %tmp8, 7
%tmp10 = add i32 %tmp7, %tmp9
- %tmp11 = load i32* %e
+ %tmp11 = load i32, i32* %e
%tmp12 = mul i32 %tmp11, 11
%tmp13 = add i32 %tmp10, %tmp12
- %tmp14 = load i32* %f
+ %tmp14 = load i32, i32* %f
%tmp15 = mul i32 %tmp14, 13
%tmp16 = add i32 %tmp13, %tmp15
ret i32 %tmp16
@@ -83,7 +85,8 @@ entry:
; WIN64: .seh_proc foo3
; WIN64: pushq %rsi
; WIN64: .seh_pushreg 6
-; WIN64: subq $24, %rsp
+; NORM: subq $24, %rsp
+; ATOM: leaq -24(%rsp), %rsp
; WIN64: .seh_stackalloc 24
; WIN64: .seh_endprologue
; WIN64: addq $24, %rsp
@@ -102,7 +105,7 @@ define i32 @foo4() #0 {
entry:
%step = alloca i32, align 4
store i32 0, i32* %step
- %tmp = load i32* %step
+ %tmp = load i32, i32* %step
%tmp1 = invoke i32 @bar()
to label %finally unwind label %landingpad
@@ -120,13 +123,14 @@ landingpad:
unreachable
endtryfinally:
- %tmp10 = load i32* %step
+ %tmp10 = load i32, i32* %step
ret i32 %tmp10
}
; WIN64-LABEL: foo4:
; WIN64: .seh_proc foo4
; WIN64: .seh_handler _d_eh_personality, @unwind, @except
-; WIN64: subq $56, %rsp
+; NORM: subq $56, %rsp
+; ATOM: leaq -56(%rsp), %rsp
; WIN64: .seh_stackalloc 56
; WIN64: .seh_endprologue
; WIN64: addq $56, %rsp
@@ -146,23 +150,24 @@ entry:
; WIN64: .seh_proc foo5
; WIN64: pushq %rbp
; WIN64: .seh_pushreg 5
-; WIN64: movq %rsp, %rbp
; WIN64: pushq %rdi
; WIN64: .seh_pushreg 7
; WIN64: pushq %rbx
; WIN64: .seh_pushreg 3
-; WIN64: andq $-64, %rsp
-; WIN64: subq $128, %rsp
-; WIN64: .seh_stackalloc 48
-; WIN64: .seh_setframe 5, 64
-; WIN64: movaps %xmm7, -32(%rbp) # 16-byte Spill
-; WIN64: movaps %xmm6, -48(%rbp) # 16-byte Spill
-; WIN64: .seh_savexmm 6, 16
-; WIN64: .seh_savexmm 7, 32
+; NORM: subq $96, %rsp
+; ATOM: leaq -96(%rsp), %rsp
+; WIN64: .seh_stackalloc 96
+; WIN64: leaq 96(%rsp), %rbp
+; WIN64: .seh_setframe 5, 96
+; WIN64: movaps %xmm7, -16(%rbp) # 16-byte Spill
+; WIN64: .seh_savexmm 7, 80
+; WIN64: movaps %xmm6, -32(%rbp) # 16-byte Spill
+; WIN64: .seh_savexmm 6, 64
; WIN64: .seh_endprologue
-; WIN64: movaps -48(%rbp), %xmm6 # 16-byte Reload
-; WIN64: movaps -32(%rbp), %xmm7 # 16-byte Reload
-; WIN64: leaq -16(%rbp), %rsp
+; WIN64: andq $-64, %rsp
+; WIN64: movaps -32(%rbp), %xmm6 # 16-byte Reload
+; WIN64: movaps -16(%rbp), %xmm7 # 16-byte Reload
+; WIN64: movq %rbp, %rsp
; WIN64: popq %rbx
; WIN64: popq %rdi
; WIN64: popq %rbp
diff --git a/test/CodeGen/X86/win64_frame.ll b/test/CodeGen/X86/win64_frame.ll
new file mode 100644
index 000000000000..2c62f4918a7f
--- /dev/null
+++ b/test/CodeGen/X86/win64_frame.ll
@@ -0,0 +1,122 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+
+define i32 @f1(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) "no-frame-pointer-elim"="true" {
+ ; CHECK-LABEL: f1:
+ ; CHECK: movl 48(%rbp), %eax
+ ret i32 %p5
+}
+
+define void @f2(i32 %p, ...) "no-frame-pointer-elim"="true" {
+ ; CHECK-LABEL: f2:
+ ; CHECK: .seh_stackalloc 8
+ ; CHECK: movq %rsp, %rbp
+ ; CHECK: .seh_setframe 5, 0
+ ; CHECK: movq %rdx, 32(%rbp)
+ ; CHECK: leaq 32(%rbp), %rax
+ %ap = alloca i8, align 8
+ call void @llvm.va_start(i8* %ap)
+ ret void
+}
+
+define i8* @f3() "no-frame-pointer-elim"="true" {
+ ; CHECK-LABEL: f3:
+ ; CHECK: movq %rsp, %rbp
+ ; CHECK: .seh_setframe 5, 0
+ ; CHECK: movq 8(%rbp), %rax
+ %ra = call i8* @llvm.returnaddress(i32 0)
+ ret i8* %ra
+}
+
+define i8* @f4() "no-frame-pointer-elim"="true" {
+ ; CHECK-LABEL: f4:
+ ; CHECK: pushq %rbp
+ ; CHECK: .seh_pushreg 5
+ ; CHECK: subq $304, %rsp
+ ; CHECK: .seh_stackalloc 304
+ ; CHECK: leaq 128(%rsp), %rbp
+ ; CHECK: .seh_setframe 5, 128
+ ; CHECK: .seh_endprologue
+ ; CHECK: movq 184(%rbp), %rax
+ alloca [300 x i8]
+ %ra = call i8* @llvm.returnaddress(i32 0)
+ ret i8* %ra
+}
+
+declare void @external(i8*)
+
+define void @f5() "no-frame-pointer-elim"="true" {
+ ; CHECK-LABEL: f5:
+ ; CHECK: subq $336, %rsp
+ ; CHECK: .seh_stackalloc 336
+ ; CHECK: leaq 128(%rsp), %rbp
+ ; CHECK: .seh_setframe 5, 128
+ ; CHECK: leaq -92(%rbp), %rcx
+ ; CHECK: callq external
+ %a = alloca [300 x i8]
+ %gep = getelementptr [300 x i8], [300 x i8]* %a, i32 0, i32 0
+ call void @external(i8* %gep)
+ ret void
+}
+
+define void @f6(i32 %p, ...) "no-frame-pointer-elim"="true" {
+ ; CHECK-LABEL: f6:
+ ; CHECK: subq $336, %rsp
+ ; CHECK: .seh_stackalloc 336
+ ; CHECK: leaq 128(%rsp), %rbp
+ ; CHECK: .seh_setframe 5, 128
+ ; CHECK: leaq -92(%rbp), %rcx
+ ; CHECK: callq external
+ %a = alloca [300 x i8]
+ %gep = getelementptr [300 x i8], [300 x i8]* %a, i32 0, i32 0
+ call void @external(i8* %gep)
+ ret void
+}
+
+define i32 @f7(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) "no-frame-pointer-elim"="true" {
+ ; CHECK-LABEL: f7:
+ ; CHECK: pushq %rbp
+ ; CHECK: .seh_pushreg 5
+ ; CHECK: subq $304, %rsp
+ ; CHECK: .seh_stackalloc 304
+ ; CHECK: leaq 128(%rsp), %rbp
+ ; CHECK: .seh_setframe 5, 128
+ ; CHECK: andq $-64, %rsp
+ ; CHECK: movl 224(%rbp), %eax
+ ; CHECK: leaq 176(%rbp), %rsp
+ alloca [300 x i8], align 64
+ ret i32 %e
+}
+
+define i32 @f8(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) "no-frame-pointer-elim"="true" {
+ ; CHECK-LABEL: f8:
+ ; CHECK: subq $352, %rsp
+ ; CHECK: .seh_stackalloc 352
+ ; CHECK: leaq 128(%rsp), %rbp
+ ; CHECK: .seh_setframe 5, 128
+
+ %alloca = alloca [300 x i8], align 64
+ ; CHECK: andq $-64, %rsp
+ ; CHECK: movq %rsp, %rbx
+
+ alloca i32, i32 %a
+ ; CHECK: movl %ecx, %eax
+ ; CHECK: leaq 15(,%rax,4), %rax
+ ; CHECK: andq $-16, %rax
+ ; CHECK: callq __chkstk
+ ; CHECK: subq %rax, %rsp
+
+ %gep = getelementptr [300 x i8], [300 x i8]* %alloca, i32 0, i32 0
+ call void @external(i8* %gep)
+ ; CHECK: subq $32, %rsp
+ ; CHECK: leaq (%rbx), %rcx
+ ; CHECK: callq external
+ ; CHECK: addq $32, %rsp
+
+ ret i32 %e
+ ; CHECK: movl %esi, %eax
+ ; CHECK: leaq 224(%rbp), %rsp
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
+
+declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/X86/win64_nonvol.ll b/test/CodeGen/X86/win64_nonvol.ll
new file mode 100644
index 000000000000..8e5f6cec1ab7
--- /dev/null
+++ b/test/CodeGen/X86/win64_nonvol.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
+
+; Check that, if a Win64 ABI function calls a SysV ABI function, all the
+; Win64 nonvolatile registers get saved.
+
+; CHECK-LABEL: bar:
+define x86_64_win64cc void @bar(i32 %a, i32 %b) {
+; CHECK-DAG: pushq %rdi
+; CHECK-DAG: pushq %rsi
+; CHECK-DAG: movaps %xmm6,
+; CHECK-DAG: movaps %xmm7,
+; CHECK-DAG: movaps %xmm8,
+; CHECK-DAG: movaps %xmm9,
+; CHECK-DAG: movaps %xmm10,
+; CHECK-DAG: movaps %xmm11,
+; CHECK-DAG: movaps %xmm12,
+; CHECK-DAG: movaps %xmm13,
+; CHECK-DAG: movaps %xmm14,
+; CHECK-DAG: movaps %xmm15,
+; CHECK: callq foo
+; CHECK: ret
+ call x86_64_sysvcc void @foo(i32 %a, i32 %b)
+ ret void
+}
+
+declare x86_64_sysvcc void @foo(i32 %a, i32 %b)
+
diff --git a/test/CodeGen/X86/win_cst_pool.ll b/test/CodeGen/X86/win_cst_pool.ll
index d534b126b192..199557dac206 100644
--- a/test/CodeGen/X86/win_cst_pool.ll
+++ b/test/CodeGen/X86/win_cst_pool.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=sse2 | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-windows-msvc"
diff --git a/test/CodeGen/X86/win_eh_prepare.ll b/test/CodeGen/X86/win_eh_prepare.ll
new file mode 100644
index 000000000000..a33dd92ad72a
--- /dev/null
+++ b/test/CodeGen/X86/win_eh_prepare.ll
@@ -0,0 +1,82 @@
+; RUN: opt -S -winehprepare -dwarfehprepare -mtriple x86_64-pc-windows-msvc < %s | FileCheck %s
+
+; FIXME: Add and test outlining here.
+
+declare void @maybe_throw()
+
+@_ZTIi = external constant i8*
+@g = external global i32
+
+declare i32 @__C_specific_handler(...)
+declare i32 @__gxx_personality_seh0(...)
+declare i32 @llvm.eh.typeid.for(i8*) readnone nounwind
+
+define i32 @use_seh() {
+entry:
+ invoke void @maybe_throw()
+ to label %cont unwind label %lpad
+
+cont:
+ ret i32 0
+
+lpad:
+ %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__C_specific_handler
+ cleanup
+ catch i8* bitcast (i32 (i8*, i8*)* @filt_g to i8*)
+ %ehsel = extractvalue { i8*, i32 } %ehvals, 1
+ %filt_g_sel = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @filt_g to i8*))
+ %matches = icmp eq i32 %ehsel, %filt_g_sel
+ br i1 %matches, label %ret1, label %eh.resume
+
+ret1:
+ ret i32 1
+
+eh.resume:
+ resume { i8*, i32 } %ehvals
+}
+
+define internal i32 @filt_g(i8*, i8*) {
+ %g = load i32, i32* @g
+ ret i32 %g
+}
+
+; CHECK-LABEL: define i32 @use_seh()
+; CHECK: invoke void @maybe_throw()
+; CHECK-NEXT: to label %cont unwind label %lpad
+; CHECK: landingpad
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: catch
+; CHECK-NEXT: call i8* (...) @llvm.eh.actions({{.*}})
+
+
+; A MinGW64-ish EH style. It could happen if a binary uses both MSVC CRT and
+; mingw CRT and is linked with LTO.
+define i32 @use_gcc() {
+entry:
+ invoke void @maybe_throw()
+ to label %cont unwind label %lpad
+
+cont:
+ ret i32 0
+
+lpad:
+ %ehvals = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_seh0
+ cleanup
+ catch i8* bitcast (i8** @_ZTIi to i8*)
+ %ehsel = extractvalue { i8*, i32 } %ehvals, 1
+ %filt_g_sel = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @filt_g to i8*))
+ %matches = icmp eq i32 %ehsel, %filt_g_sel
+ br i1 %matches, label %ret1, label %eh.resume
+
+ret1:
+ ret i32 1
+
+eh.resume:
+ resume { i8*, i32 } %ehvals
+}
+
+; CHECK-LABEL: define i32 @use_gcc()
+; CHECK: invoke void @maybe_throw()
+; CHECK-NEXT: to label %cont unwind label %lpad
+; CHECK: eh.resume:
+; CHECK: call void @_Unwind_Resume(i8* %exn.obj)
diff --git a/test/CodeGen/X86/x32-function_pointer-1.ll b/test/CodeGen/X86/x32-function_pointer-1.ll
index 2baf92a99790..952add91d52b 100644
--- a/test/CodeGen/X86/x32-function_pointer-1.ll
+++ b/test/CodeGen/X86/x32-function_pointer-1.ll
@@ -8,11 +8,11 @@
define void @bar(i8* %h) nounwind uwtable {
entry:
- %0 = load void (i8*)** @foo1, align 4
+ %0 = load void (i8*)*, void (i8*)** @foo1, align 4
; CHECK: movl foo1(%rip), %e{{[^,]*}}
tail call void %0(i8* %h) nounwind
; CHECK: callq *%r{{[^,]*}}
- %1 = load void (i8*)** @foo2, align 4
+ %1 = load void (i8*)*, void (i8*)** @foo2, align 4
; CHECK: movl foo2(%rip), %e{{[^,]*}}
tail call void %1(i8* %h) nounwind
; CHECK: jmpq *%r{{[^,]*}}
diff --git a/test/CodeGen/X86/x32-lea-1.ll b/test/CodeGen/X86/x32-lea-1.ll
new file mode 100644
index 000000000000..2f7d71e2baf1
--- /dev/null
+++ b/test/CodeGen/X86/x32-lea-1.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -O0 | FileCheck %s
+; CHECK: leal {{[-0-9]*}}(%r{{s|b}}p),
+; CHECK-NOT: leal {{[-0-9]*}}(%e{{s|b}}p),
+
+define void @foo(i32** %p) {
+ %a = alloca i32, i32 10
+ %addr = getelementptr i32, i32* %a, i32 4
+ store i32* %addr, i32** %p
+ ret void
+}
diff --git a/test/CodeGen/X86/x86-32-vector-calling-conv.ll b/test/CodeGen/X86/x86-32-vector-calling-conv.ll
new file mode 100644
index 000000000000..b2bda7ab8d01
--- /dev/null
+++ b/test/CodeGen/X86/x86-32-vector-calling-conv.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=DARWIN
+; RUN: llc < %s -mtriple=i686-pc-linux -mattr=+avx512f | FileCheck %s --check-prefix=LINUX
+
+; CHECK-LABEL: test_sse:
+; DARWIN-DAG: vpaddd %xmm1, %xmm0, %xmm0
+; DARWIN-DAG: vpaddd %xmm3, %xmm2, %xmm1
+; DARWIN: vpaddd %xmm1, %xmm0, %xmm0
+; LINUX-DAG: vpaddd %xmm1, %xmm0, %xmm0
+; LINUX-DAG: vpaddd {{[0-9]+}}(%e{{s|b}}p), %xmm2, %xmm1
+; LINUX: vpaddd %xmm1, %xmm0, %xmm0
+define <4 x i32> @test_sse(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) nounwind {
+ %r0 = add <4 x i32> %a, %b
+ %r1 = add <4 x i32> %c, %d
+ %ret = add <4 x i32> %r0, %r1
+ ret <4 x i32> %ret
+}
+
+; CHECK-LABEL: test_avx:
+; DARWIN-DAG: vpaddd %ymm1, %ymm0, %ymm0
+; DARWIN-DAG: vpaddd %ymm3, %ymm2, %ymm1
+; DARWIN: vpaddd %ymm1, %ymm0, %ymm0
+; LINUX-DAG: vpaddd %ymm1, %ymm0, %ymm0
+; LINUX-DAG: vpaddd {{[0-9]+}}(%e{{s|b}}p), %ymm2, %ymm1
+; LINUX: vpaddd %ymm1, %ymm0, %ymm0
+define <8 x i32> @test_avx(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) nounwind {
+ %r0 = add <8 x i32> %a, %b
+ %r1 = add <8 x i32> %c, %d
+ %ret = add <8 x i32> %r0, %r1
+ ret <8 x i32> %ret
+}
+
+; CHECK-LABEL: test_avx512:
+; DARWIN-DAG: vpaddd %zmm1, %zmm0, %zmm0
+; DARWIN-DAG: vpaddd %zmm3, %zmm2, %zmm1
+; DARWIN: vpaddd %zmm1, %zmm0, %zmm0
+; LINUX-DAG: vpaddd %zmm1, %zmm0, %zmm0
+; LINUX-DAG: vpaddd {{[0-9]+}}(%e{{s|b}}p), %zmm2, %zmm1
+; LINUX: vpaddd %zmm1, %zmm0, %zmm0
+define <16 x i32> @test_avx512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i32> %d) nounwind {
+ %r0 = add <16 x i32> %a, %b
+ %r1 = add <16 x i32> %c, %d
+ %ret = add <16 x i32> %r0, %r1
+ ret <16 x i32> %ret
+}
diff --git a/test/CodeGen/X86/x86-64-and-mask.ll b/test/CodeGen/X86/x86-64-and-mask.ll
index bc6c612482b0..c8a832a6d3a2 100644
--- a/test/CodeGen/X86/x86-64-and-mask.ll
+++ b/test/CodeGen/X86/x86-64-and-mask.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=corei7 < %s | FileCheck %s
+; RUN: llc < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/x86-64-asm.ll b/test/CodeGen/X86/x86-64-asm.ll
index 2640e593ec18..f103ab75af99 100644
--- a/test/CodeGen/X86/x86-64-asm.ll
+++ b/test/CodeGen/X86/x86-64-asm.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-unknown-linux-gnu"
define void @frame_dummy() {
entry:
- %tmp1 = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0,~{dirflag},~{fpsr},~{flags}"( void (i8*)* null ) ; <void (i8*)*> [#uses=0]
+ %tmp1 = tail call void (i8*)* (void (i8*)*) asm "", "=r,0,~{dirflag},~{fpsr},~{flags}"( void (i8*)* null ) ; <void (i8*)*> [#uses=0]
ret void
}
diff --git a/test/CodeGen/X86/x86-64-baseptr.ll b/test/CodeGen/X86/x86-64-baseptr.ll
new file mode 100644
index 000000000000..7fd94fa10f6c
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-baseptr.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mtriple=x86_64-pc-linux -force-align-stack -stack-alignment=32 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnux32 -force-align-stack -stack-alignment=32 < %s | FileCheck -check-prefix=X32ABI %s
+; This should run with NaCl as well ( -mtriple=x86_64-pc-nacl ) but currently doesn't due to PR22655
+
+; Make sure the correct register gets set up as the base pointer
+; This should be rbx for x64 and 64-bit NaCl and ebx for x32
+; CHECK-LABEL: base
+; CHECK: subq $32, %rsp
+; CHECK: movq %rsp, %rbx
+; X32ABI-LABEL: base
+; X32ABI: subl $32, %esp
+; X32ABI: movl %esp, %ebx
+; NACL-LABEL: base
+; NACL: subq $32, %rsp
+; NACL: movq %rsp, %rbx
+
+declare i32 @helper() nounwind
+define void @base() #0 {
+entry:
+ %k = call i32 @helper()
+ %a = alloca i32, i32 %k, align 4
+ store i32 0, i32* %a, align 4
+ ret void
+}
+
+attributes #0 = { nounwind uwtable "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"}
diff --git a/test/CodeGen/X86/x86-64-disp.ll b/test/CodeGen/X86/x86-64-disp.ll
index d8059ebb1c19..790634c1e4a5 100644
--- a/test/CodeGen/X86/x86-64-disp.ll
+++ b/test/CodeGen/X86/x86-64-disp.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu"
@call_used_regs = external global [53 x i8], align 32
define fastcc void @foo() nounwind {
- %t = getelementptr [53 x i8]* @call_used_regs, i64 0, i64 4294967295
+ %t = getelementptr [53 x i8], [53 x i8]* @call_used_regs, i64 0, i64 4294967295
store i8 1, i8* %t, align 1
ret void
}
diff --git a/test/CodeGen/X86/x86-64-gv-offset.ll b/test/CodeGen/X86/x86-64-gv-offset.ll
index 365e4af63fc1..f60d2cff3951 100644
--- a/test/CodeGen/X86/x86-64-gv-offset.ll
+++ b/test/CodeGen/X86/x86-64-gv-offset.ll
@@ -5,8 +5,8 @@
define i32 @main() nounwind {
entry:
- %tmp2 = load float* getelementptr (%struct.x* @X, i32 0, i32 0), align 16 ; <float> [#uses=1]
- %tmp4 = load double* getelementptr (%struct.x* @X, i32 0, i32 1), align 8 ; <double> [#uses=1]
+ %tmp2 = load float, float* getelementptr (%struct.x, %struct.x* @X, i32 0, i32 0), align 16 ; <float> [#uses=1]
+ %tmp4 = load double, double* getelementptr (%struct.x, %struct.x* @X, i32 0, i32 1), align 8 ; <double> [#uses=1]
tail call void @t( float %tmp2, double %tmp4 ) nounwind
ret i32 0
}
diff --git a/test/CodeGen/X86/x86-64-jumps.ll b/test/CodeGen/X86/x86-64-jumps.ll
index 11b40c897618..fcb6d9ed592a 100644
--- a/test/CodeGen/X86/x86-64-jumps.ll
+++ b/test/CodeGen/X86/x86-64-jumps.ll
@@ -22,12 +22,12 @@ define void @test2(i32 %i) nounwind ssp {
entry:
%i.addr = alloca i32 ; <i32*> [#uses=2]
store i32 %i, i32* %i.addr
- %tmp = load i32* %i.addr ; <i32> [#uses=1]
+ %tmp = load i32, i32* %i.addr ; <i32> [#uses=1]
%idxprom = sext i32 %tmp to i64 ; <i64> [#uses=1]
- %arrayidx = getelementptr inbounds i32* getelementptr inbounds ([3 x i32]* @test.array, i32 0, i32 0), i64 %idxprom ; <i32*> [#uses=1]
- %tmp1 = load i32* %arrayidx ; <i32> [#uses=1]
+ %arrayidx = getelementptr inbounds i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @test.array, i32 0, i32 0), i64 %idxprom ; <i32*> [#uses=1]
+ %tmp1 = load i32, i32* %arrayidx ; <i32> [#uses=1]
%idx.ext = sext i32 %tmp1 to i64 ; <i64> [#uses=1]
- %add.ptr = getelementptr i8* blockaddress(@test2, %foo), i64 %idx.ext ; <i8*> [#uses=1]
+ %add.ptr = getelementptr i8, i8* blockaddress(@test2, %foo), i64 %idx.ext ; <i8*> [#uses=1]
br label %indirectgoto
foo: ; preds = %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto
diff --git a/test/CodeGen/X86/x86-64-mem.ll b/test/CodeGen/X86/x86-64-mem.ll
index d15f516cddee..557f9486ad6a 100644
--- a/test/CodeGen/X86/x86-64-mem.ll
+++ b/test/CodeGen/X86/x86-64-mem.ll
@@ -17,18 +17,18 @@
@bdst = internal global [500000 x i32] zeroinitializer, align 32 ; <[500000 x i32]*> [#uses=0]
define void @test1() nounwind {
- %tmp = load i32* getelementptr ([0 x i32]* @src, i32 0, i32 0) ; <i32> [#uses=1]
- store i32 %tmp, i32* getelementptr ([0 x i32]* @dst, i32 0, i32 0)
+ %tmp = load i32, i32* getelementptr ([0 x i32], [0 x i32]* @src, i32 0, i32 0) ; <i32> [#uses=1]
+ store i32 %tmp, i32* getelementptr ([0 x i32], [0 x i32]* @dst, i32 0, i32 0)
ret void
}
define void @test2() nounwind {
- store i32* getelementptr ([0 x i32]* @dst, i32 0, i32 0), i32** @ptr
+ store i32* getelementptr ([0 x i32], [0 x i32]* @dst, i32 0, i32 0), i32** @ptr
ret void
}
define void @test3() nounwind {
- store i32* getelementptr ([500 x i32]* @ldst, i32 0, i32 0), i32** @lptr
+ store i32* getelementptr ([500 x i32], [500 x i32]* @ldst, i32 0, i32 0), i32** @lptr
br label %return
return: ; preds = %0
diff --git a/test/CodeGen/X86/x86-64-pic-4.ll b/test/CodeGen/X86/x86-64-pic-4.ll
index 33b08c4b4b04..42d08cc2057d 100644
--- a/test/CodeGen/X86/x86-64-pic-4.ll
+++ b/test/CodeGen/X86/x86-64-pic-4.ll
@@ -5,6 +5,6 @@
define i32 @get_a() {
entry:
- %tmp1 = load i32* @a, align 4
+ %tmp1 = load i32, i32* @a, align 4
ret i32 %tmp1
}
diff --git a/test/CodeGen/X86/x86-64-pic-5.ll b/test/CodeGen/X86/x86-64-pic-5.ll
index 234bc0d2f4f1..d217a5c47dfb 100644
--- a/test/CodeGen/X86/x86-64-pic-5.ll
+++ b/test/CodeGen/X86/x86-64-pic-5.ll
@@ -6,6 +6,6 @@
define i32 @get_a() {
entry:
- %tmp1 = load i32* @a, align 4
+ %tmp1 = load i32, i32* @a, align 4
ret i32 %tmp1
}
diff --git a/test/CodeGen/X86/x86-64-pic-6.ll b/test/CodeGen/X86/x86-64-pic-6.ll
index ae5b5835928d..8671023daa8a 100644
--- a/test/CodeGen/X86/x86-64-pic-6.ll
+++ b/test/CodeGen/X86/x86-64-pic-6.ll
@@ -6,6 +6,6 @@
define i32 @get_a() nounwind {
entry:
- %tmp1 = load i32* @a, align 4
+ %tmp1 = load i32, i32* @a, align 4
ret i32 %tmp1
}
diff --git a/test/CodeGen/X86/x86-64-psub.ll b/test/CodeGen/X86/x86-64-psub.ll
index 183ddf446f3d..2e39c145919f 100644
--- a/test/CodeGen/X86/x86-64-psub.ll
+++ b/test/CodeGen/X86/x86-64-psub.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux -mattr=mmx < %s | FileCheck %s
; MMX packed sub opcodes were wrongly marked as commutative.
; This test checks that the operands of packed sub instructions are
diff --git a/test/CodeGen/X86/x86-64-ptr-arg-simple.ll b/test/CodeGen/X86/x86-64-ptr-arg-simple.ll
index 6d466639890b..11dfc802d513 100644
--- a/test/CodeGen/X86/x86-64-ptr-arg-simple.ll
+++ b/test/CodeGen/X86/x86-64-ptr-arg-simple.ll
@@ -22,7 +22,7 @@ entry:
define void @bar(i32* nocapture %pOut, i32* nocapture %pIn) nounwind {
entry:
- %0 = load i32* %pIn, align 4
+ %0 = load i32, i32* %pIn, align 4
store i32 %0, i32* %pOut, align 4
ret void
}
diff --git a/test/CodeGen/X86/x86-64-sret-return.ll b/test/CodeGen/X86/x86-64-sret-return.ll
index 2d001142d7a1..a0c43488db1e 100644
--- a/test/CodeGen/X86/x86-64-sret-return.ll
+++ b/test/CodeGen/X86/x86-64-sret-return.ll
@@ -17,42 +17,42 @@ entry:
%memtmp = alloca %struct.foo, align 8 ; <%struct.foo*> [#uses=1]
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
store %struct.foo* %d, %struct.foo** %d_addr
- %tmp = load %struct.foo** %d_addr, align 8 ; <%struct.foo*> [#uses=1]
- %tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0 ; <[4 x i64]*> [#uses=4]
- %tmp2 = getelementptr %struct.foo* %tmp, i32 0, i32 0 ; <[4 x i64]*> [#uses=4]
- %tmp3 = getelementptr [4 x i64]* %tmp1, i32 0, i32 0 ; <i64*> [#uses=1]
- %tmp4 = getelementptr [4 x i64]* %tmp2, i32 0, i32 0 ; <i64*> [#uses=1]
- %tmp5 = load i64* %tmp4, align 8 ; <i64> [#uses=1]
+ %tmp = load %struct.foo*, %struct.foo** %d_addr, align 8 ; <%struct.foo*> [#uses=1]
+ %tmp1 = getelementptr %struct.foo, %struct.foo* %agg.result, i32 0, i32 0 ; <[4 x i64]*> [#uses=4]
+ %tmp2 = getelementptr %struct.foo, %struct.foo* %tmp, i32 0, i32 0 ; <[4 x i64]*> [#uses=4]
+ %tmp3 = getelementptr [4 x i64], [4 x i64]* %tmp1, i32 0, i32 0 ; <i64*> [#uses=1]
+ %tmp4 = getelementptr [4 x i64], [4 x i64]* %tmp2, i32 0, i32 0 ; <i64*> [#uses=1]
+ %tmp5 = load i64, i64* %tmp4, align 8 ; <i64> [#uses=1]
store i64 %tmp5, i64* %tmp3, align 8
- %tmp6 = getelementptr [4 x i64]* %tmp1, i32 0, i32 1 ; <i64*> [#uses=1]
- %tmp7 = getelementptr [4 x i64]* %tmp2, i32 0, i32 1 ; <i64*> [#uses=1]
- %tmp8 = load i64* %tmp7, align 8 ; <i64> [#uses=1]
+ %tmp6 = getelementptr [4 x i64], [4 x i64]* %tmp1, i32 0, i32 1 ; <i64*> [#uses=1]
+ %tmp7 = getelementptr [4 x i64], [4 x i64]* %tmp2, i32 0, i32 1 ; <i64*> [#uses=1]
+ %tmp8 = load i64, i64* %tmp7, align 8 ; <i64> [#uses=1]
store i64 %tmp8, i64* %tmp6, align 8
- %tmp9 = getelementptr [4 x i64]* %tmp1, i32 0, i32 2 ; <i64*> [#uses=1]
- %tmp10 = getelementptr [4 x i64]* %tmp2, i32 0, i32 2 ; <i64*> [#uses=1]
- %tmp11 = load i64* %tmp10, align 8 ; <i64> [#uses=1]
+ %tmp9 = getelementptr [4 x i64], [4 x i64]* %tmp1, i32 0, i32 2 ; <i64*> [#uses=1]
+ %tmp10 = getelementptr [4 x i64], [4 x i64]* %tmp2, i32 0, i32 2 ; <i64*> [#uses=1]
+ %tmp11 = load i64, i64* %tmp10, align 8 ; <i64> [#uses=1]
store i64 %tmp11, i64* %tmp9, align 8
- %tmp12 = getelementptr [4 x i64]* %tmp1, i32 0, i32 3 ; <i64*> [#uses=1]
- %tmp13 = getelementptr [4 x i64]* %tmp2, i32 0, i32 3 ; <i64*> [#uses=1]
- %tmp14 = load i64* %tmp13, align 8 ; <i64> [#uses=1]
+ %tmp12 = getelementptr [4 x i64], [4 x i64]* %tmp1, i32 0, i32 3 ; <i64*> [#uses=1]
+ %tmp13 = getelementptr [4 x i64], [4 x i64]* %tmp2, i32 0, i32 3 ; <i64*> [#uses=1]
+ %tmp14 = load i64, i64* %tmp13, align 8 ; <i64> [#uses=1]
store i64 %tmp14, i64* %tmp12, align 8
- %tmp15 = getelementptr %struct.foo* %memtmp, i32 0, i32 0 ; <[4 x i64]*> [#uses=4]
- %tmp16 = getelementptr %struct.foo* %agg.result, i32 0, i32 0 ; <[4 x i64]*> [#uses=4]
- %tmp17 = getelementptr [4 x i64]* %tmp15, i32 0, i32 0 ; <i64*> [#uses=1]
- %tmp18 = getelementptr [4 x i64]* %tmp16, i32 0, i32 0 ; <i64*> [#uses=1]
- %tmp19 = load i64* %tmp18, align 8 ; <i64> [#uses=1]
+ %tmp15 = getelementptr %struct.foo, %struct.foo* %memtmp, i32 0, i32 0 ; <[4 x i64]*> [#uses=4]
+ %tmp16 = getelementptr %struct.foo, %struct.foo* %agg.result, i32 0, i32 0 ; <[4 x i64]*> [#uses=4]
+ %tmp17 = getelementptr [4 x i64], [4 x i64]* %tmp15, i32 0, i32 0 ; <i64*> [#uses=1]
+ %tmp18 = getelementptr [4 x i64], [4 x i64]* %tmp16, i32 0, i32 0 ; <i64*> [#uses=1]
+ %tmp19 = load i64, i64* %tmp18, align 8 ; <i64> [#uses=1]
store i64 %tmp19, i64* %tmp17, align 8
- %tmp20 = getelementptr [4 x i64]* %tmp15, i32 0, i32 1 ; <i64*> [#uses=1]
- %tmp21 = getelementptr [4 x i64]* %tmp16, i32 0, i32 1 ; <i64*> [#uses=1]
- %tmp22 = load i64* %tmp21, align 8 ; <i64> [#uses=1]
+ %tmp20 = getelementptr [4 x i64], [4 x i64]* %tmp15, i32 0, i32 1 ; <i64*> [#uses=1]
+ %tmp21 = getelementptr [4 x i64], [4 x i64]* %tmp16, i32 0, i32 1 ; <i64*> [#uses=1]
+ %tmp22 = load i64, i64* %tmp21, align 8 ; <i64> [#uses=1]
store i64 %tmp22, i64* %tmp20, align 8
- %tmp23 = getelementptr [4 x i64]* %tmp15, i32 0, i32 2 ; <i64*> [#uses=1]
- %tmp24 = getelementptr [4 x i64]* %tmp16, i32 0, i32 2 ; <i64*> [#uses=1]
- %tmp25 = load i64* %tmp24, align 8 ; <i64> [#uses=1]
+ %tmp23 = getelementptr [4 x i64], [4 x i64]* %tmp15, i32 0, i32 2 ; <i64*> [#uses=1]
+ %tmp24 = getelementptr [4 x i64], [4 x i64]* %tmp16, i32 0, i32 2 ; <i64*> [#uses=1]
+ %tmp25 = load i64, i64* %tmp24, align 8 ; <i64> [#uses=1]
store i64 %tmp25, i64* %tmp23, align 8
- %tmp26 = getelementptr [4 x i64]* %tmp15, i32 0, i32 3 ; <i64*> [#uses=1]
- %tmp27 = getelementptr [4 x i64]* %tmp16, i32 0, i32 3 ; <i64*> [#uses=1]
- %tmp28 = load i64* %tmp27, align 8 ; <i64> [#uses=1]
+ %tmp26 = getelementptr [4 x i64], [4 x i64]* %tmp15, i32 0, i32 3 ; <i64*> [#uses=1]
+ %tmp27 = getelementptr [4 x i64], [4 x i64]* %tmp16, i32 0, i32 3 ; <i64*> [#uses=1]
+ %tmp28 = load i64, i64* %tmp27, align 8 ; <i64> [#uses=1]
store i64 %tmp28, i64* %tmp26, align 8
br label %return
diff --git a/test/CodeGen/X86/x86-64-static-relo-movl.ll b/test/CodeGen/X86/x86-64-static-relo-movl.ll
index 71e52bb99191..5da3a4705039 100644
--- a/test/CodeGen/X86/x86-64-static-relo-movl.ll
+++ b/test/CodeGen/X86/x86-64-static-relo-movl.ll
@@ -11,8 +11,8 @@ define void @setup() {
%pending = alloca %struct.MatchInfo, align 8
%t = bitcast %struct.MatchInfo* %pending to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t, i8* bitcast (%struct.MatchInfo* @NO_MATCH to i8*), i64 512, i32 8, i1 false)
- %u = getelementptr inbounds %struct.MatchInfo* %pending, i32 0, i32 2
- %v = load i64* %u, align 8
+ %u = getelementptr inbounds %struct.MatchInfo, %struct.MatchInfo* %pending, i32 0, i32 2
+ %v = load i64, i64* %u, align 8
br label %done
done:
ret void
diff --git a/test/CodeGen/X86/x86-64-tls-1.ll b/test/CodeGen/X86/x86-64-tls-1.ll
index 2879fb4e1e74..2c954dbc9c97 100644
--- a/test/CodeGen/X86/x86-64-tls-1.ll
+++ b/test/CodeGen/X86/x86-64-tls-1.ll
@@ -3,7 +3,7 @@
define i64 @z() nounwind {
; CHECK: movq $tm_nest_level@TPOFF, %r[[R0:[abcd]]]x
; CHECK-NEXT: addl %fs:0, %e[[R0]]x
-; CHECK-NEXT: andq $100, %r[[R0]]x
+; CHECK-NEXT: andl $100, %e[[R0]]x
ret i64 and (i64 ptrtoint (i32* @tm_nest_level to i64), i64 100)
}
diff --git a/test/CodeGen/X86/x86-64-varargs.ll b/test/CodeGen/X86/x86-64-varargs.ll
index 428f4493b069..ed07bde631dc 100644
--- a/test/CodeGen/X86/x86-64-varargs.ll
+++ b/test/CodeGen/X86/x86-64-varargs.ll
@@ -6,6 +6,6 @@ declare i32 @printf(i8*, ...) nounwind
define i32 @main() nounwind {
entry:
- %tmp10.i = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([26 x i8]* @.str, i32 0, i64 0), i32 12, double 0x3FF3EB8520000000, i32 120, i64 123456677890, i32 -10, double 4.500000e+15 ) nounwind ; <i32> [#uses=0]
+ %tmp10.i = tail call i32 (i8*, ...) @printf( i8* getelementptr ([26 x i8], [26 x i8]* @.str, i32 0, i64 0), i32 12, double 0x3FF3EB8520000000, i32 120, i64 123456677890, i32 -10, double 4.500000e+15 ) nounwind ; <i32> [#uses=0]
ret i32 0
}
diff --git a/test/CodeGen/X86/x86-fold-pshufb.ll b/test/CodeGen/X86/x86-fold-pshufb.ll
new file mode 100644
index 000000000000..c29e592bfe83
--- /dev/null
+++ b/test/CodeGen/X86/x86-fold-pshufb.ll
@@ -0,0 +1,17 @@
+; RUN: llc -relocation-model=pic -march=x86-64 -mtriple=x86_64-unknown-unknown -mattr=+ssse3 < %s | FileCheck %s
+
+; Verify that the backend correctly folds the shuffle in function 'fold_pshufb'
+; into a simple load from constant pool.
+
+define <2 x i64> @fold_pshufb() {
+; CHECK-LABEL: fold_pshufb:
+; CHECK: # BB#0:
+; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,1,0,0,0,2,0,0,0,3,0,0,0]
+; CHECK-NEXT: retq
+entry:
+ %0 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 2, i8 0, i8 0, i8 0, i8 3, i8 0, i8 0, i8 0>, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+ %1 = bitcast <16 x i8> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
diff --git a/test/CodeGen/X86/x86-framelowering-trap.ll b/test/CodeGen/X86/x86-framelowering-trap.ll
new file mode 100644
index 000000000000..58a1da23a294
--- /dev/null
+++ b/test/CodeGen/X86/x86-framelowering-trap.ll
@@ -0,0 +1,15 @@
+; RUN: llc %s -o - | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: bar:
+; CHECK: ud2
+; CHECK-NEXT: retq
+define void @bar() {
+entry:
+ call void @llvm.trap()
+ ret void
+}
+
+; Function Attrs: noreturn nounwind
+declare void @llvm.trap()
diff --git a/test/CodeGen/X86/x86-mixed-alignment-dagcombine.ll b/test/CodeGen/X86/x86-mixed-alignment-dagcombine.ll
index fcf7eaec0544..8892a69abf08 100644
--- a/test/CodeGen/X86/x86-mixed-alignment-dagcombine.ll
+++ b/test/CodeGen/X86/x86-mixed-alignment-dagcombine.ll
@@ -10,8 +10,8 @@ define void @test1(i1 %cmp) align 2 {
%1 = alloca <2 x double>, align 16
%2 = alloca <2 x double>, align 8
- %val = load <2 x double>* %1, align 16
- %val2 = load <2 x double>* %2, align 8
+ %val = load <2 x double>, <2 x double>* %1, align 16
+ %val2 = load <2 x double>, <2 x double>* %2, align 8
%val3 = select i1 %cmp, <2 x double> %val, <2 x double> %val2
call void @sink(<2 x double> %val3)
ret void
@@ -24,8 +24,8 @@ define void @test2(i1 %cmp) align 2 {
%1 = alloca <2 x double>, align 16
%2 = alloca <2 x double>, align 8
- %val = load <2 x double>* %1, align 16
- %val2 = load <2 x double>* %2, align 16
+ %val = load <2 x double>, <2 x double>* %1, align 16
+ %val2 = load <2 x double>, <2 x double>* %2, align 16
%val3 = select i1 %cmp, <2 x double> %val, <2 x double> %val2
call void @sink(<2 x double> %val3)
ret void
diff --git a/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll b/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
index 4317d8ab6a26..a2c5b3a6eedf 100644
--- a/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
+++ b/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
@@ -72,3 +72,18 @@ define <4 x float> @foo3(<4 x float> %val, <4 x float> %test) nounwind {
%result = sitofp <4 x i32> %and to <4 x float>
ret <4 x float> %result
}
+
+; Test the general purpose constant folding of uint->fp.
+define void @foo4(<4 x float>* noalias %result) nounwind {
+; CHECK-LABEL: LCPI4_0:
+; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
+; CHECK-NEXT: .long 1123942400 ## float 1.270000e+02
+; CHECK-NEXT: .long 1124073472 ## float 1.280000e+02
+; CHECK-NEXT: .long 1132396544 ## float 2.550000e+02
+; CHECK-LABEL: foo4:
+; CHECK: movaps LCPI4_0(%rip), %xmm0
+
+ %val = uitofp <4 x i8> <i8 1, i8 127, i8 -128, i8 -1> to <4 x float>
+ store <4 x float> %val, <4 x float>* %result
+ ret void
+}
diff --git a/test/CodeGen/X86/x86-shifts.ll b/test/CodeGen/X86/x86-shifts.ll
index ec479330ed6b..0cbfab148bb5 100644
--- a/test/CodeGen/X86/x86-shifts.ll
+++ b/test/CodeGen/X86/x86-shifts.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s
; Splat patterns below
@@ -118,10 +118,16 @@ entry:
define <2 x i64> @shr2_nosplat(<2 x i64> %A) nounwind {
entry:
-; CHECK: shr2_nosplat
-; CHECK-NOT: psrlq
-; CHECK-NOT: psrlq
-; CHECK: ret
+; CHECK-LABEL: shr2_nosplat
+; CHECK: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrlq $1, %xmm1
+; CHECK-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NEXT: psrlq $8, %xmm2
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; CHECK-NEXT: xorpd %xmm0, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: ret
%B = lshr <2 x i64> %A, < i64 8, i64 1>
%C = lshr <2 x i64> %A, < i64 1, i64 0>
%K = xor <2 x i64> %B, %C
diff --git a/test/CodeGen/X86/x86-shrink-wrapping.ll b/test/CodeGen/X86/x86-shrink-wrapping.ll
new file mode 100644
index 000000000000..5848eddf4375
--- /dev/null
+++ b/test/CodeGen/X86/x86-shrink-wrapping.ll
@@ -0,0 +1,600 @@
+; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
+; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
+;
+; Note: Lots of tests use inline asm instead of regular calls.
+; This allows to have a better control on what the allocation will do.
+; Otherwise, we may have spill right in the entry block, defeating
+; shrink-wrapping. Moreover, some of the inline asm statement (nop)
+; are here to ensure that the related paths do not end up as critical
+; edges.
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+
+; Initial motivating example: Simple diamond with a call just on one side.
+; CHECK-LABEL: foo:
+;
+; Compare the arguments and jump to exit.
+; No prologue needed.
+; ENABLE: movl %edi, [[ARG0CPY:%e[a-z]+]]
+; ENABLE-NEXT: cmpl %esi, [[ARG0CPY]]
+; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; (What we push does not matter. It should be some random sratch register.)
+; CHECK: pushq
+;
+; Compare the arguments and jump to exit.
+; After the prologue is set.
+; DISABLE: movl %edi, [[ARG0CPY:%e[a-z]+]]
+; DISABLE-NEXT: cmpl %esi, [[ARG0CPY]]
+; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Store %a in the alloca.
+; CHECK: movl [[ARG0CPY]], 4(%rsp)
+; Set the alloca address in the second argument.
+; CHECK-NEXT: leaq 4(%rsp), %rsi
+; Set the first argument to zero.
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: callq _doSomething
+;
+; With shrink-wrapping, epilogue is just after the call.
+; ENABLE-NEXT: addq $8, %rsp
+;
+; CHECK: [[EXIT_LABEL]]:
+;
+; Without shrink-wrapping, epilogue is in the exit block.
+; Epilogue code. (What we pop does not matter.)
+; DISABLE-NEXT: popq
+;
+; CHECK-NEXT: retq
+define i32 @foo(i32 %a, i32 %b) {
+ %tmp = alloca i32, align 4
+ %tmp2 = icmp slt i32 %a, %b
+ br i1 %tmp2, label %true, label %false
+
+true:
+ store i32 %a, i32* %tmp, align 4
+ %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
+ br label %false
+
+false:
+ %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
+ ret i32 %tmp.0
+}
+
+; Function Attrs: optsize
+declare i32 @doSomething(i32, i32*)
+
+
+; Check that we do not perform the restore inside the loop whereas the save
+; is outside.
+; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:
+;
+; Shrink-wrapping allows to skip the prologue in the else case.
+; ENABLE: testl %edi, %edi
+; ENABLE: je [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: rbx.
+; CHECK: pushq %rbx
+;
+; DISABLE: testl %edi, %edi
+; DISABLE: je [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; SUM is in %esi because it is coalesced with the second
+; argument on the else path.
+; CHECK: xorl [[SUM:%esi]], [[SUM]]
+; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]]
+;
+; Next BB.
+; CHECK: [[LOOP:LBB[0-9_]+]]: ## %for.body
+; CHECK: movl $1, [[TMP:%e[a-z]+]]
+; CHECK: addl [[TMP]], [[SUM]]
+; CHECK-NEXT: decl [[IV]]
+; CHECK-NEXT: jne [[LOOP]]
+;
+; Next BB.
+; SUM << 3.
+; CHECK: shll $3, [[SUM]]
+;
+; Jump to epilogue.
+; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]]
+;
+; DISABLE: [[ELSE_LABEL]]: ## %if.else
+; Shift second argument by one and store into returned register.
+; DISABLE: addl %esi, %esi
+; DISABLE: [[EPILOG_BB]]: ## %if.end
+;
+; Epilogue code.
+; CHECK-DAG: popq %rbx
+; CHECK-DAG: movl %esi, %eax
+; CHECK: retq
+;
+; ENABLE: [[ELSE_LABEL]]: ## %if.else
+; Shift second argument by one and store into returned register.
+; ENABLE: addl %esi, %esi
+; ENABLE-NEXT: movl %esi, %eax
+; ENABLE-NEXT: retq
+define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+ tail call void asm "nop", ""()
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+ %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
+ %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+ %add = add nsw i32 %call, %sum.04
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ %shl = shl i32 %add, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %for.end
+ %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+ ret i32 %sum.1
+}
+
+declare i32 @something(...)
+
+; Check that we do not perform the shrink-wrapping inside the loop even
+; though that would be legal. The cost model must prevent that.
+; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2:
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: rbx.
+; CHECK: pushq %rbx
+; CHECK: nop
+; CHECK: xorl [[SUM:%e[a-z]+]], [[SUM]]
+; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]]
+; Next BB.
+; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body
+; CHECK: movl $1, [[TMP:%e[a-z]+]]
+; CHECK: addl [[TMP]], [[SUM]]
+; CHECK-NEXT: decl [[IV]]
+; CHECK-NEXT: jne [[LOOP_LABEL]]
+; Next BB.
+; CHECK: ## %for.exit
+; CHECK: nop
+; CHECK: popq %rbx
+; CHECK-NEXT: retq
+define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
+entry:
+ br label %for.preheader
+
+for.preheader:
+ tail call void asm "nop", ""()
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
+ %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
+ %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+ %add = add nsw i32 %call, %sum.03
+ %inc = add nuw nsw i32 %i.04, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.exit, label %for.body
+
+for.exit:
+ tail call void asm "nop", ""()
+ br label %for.end
+
+for.end: ; preds = %for.body
+ ret i32 %add
+}
+
+; Check with a more complex case that we do not have save within the loop and
+; restore outside.
+; CHECK-LABEL: loopInfoSaveOutsideLoop:
+;
+; ENABLE: testl %edi, %edi
+; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: rbx.
+; CHECK: pushq %rbx
+;
+; DISABLE: testl %edi, %edi
+; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; CHECK: nop
+; CHECK: xorl [[SUM:%esi]], [[SUM]]
+; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]]
+;
+; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body
+; CHECK: movl $1, [[TMP:%e[a-z]+]]
+; CHECK: addl [[TMP]], [[SUM]]
+; CHECK-NEXT: decl [[IV]]
+; CHECK-NEXT: jne [[LOOP_LABEL]]
+; Next BB.
+; CHECK: nop
+; CHECK: shll $3, [[SUM]]
+;
+; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]]
+;
+; DISABLE: [[ELSE_LABEL]]: ## %if.else
+; Shift second argument by one and store into returned register.
+; DISABLE: addl %esi, %esi
+; DISABLE: [[EPILOG_BB]]: ## %if.end
+;
+; Epilogue code.
+; CHECK-DAG: popq %rbx
+; CHECK-DAG: movl %esi, %eax
+; CHECK: retq
+;
+; ENABLE: [[ELSE_LABEL]]: ## %if.else
+; Shift second argument by one and store into returned register.
+; ENABLE: addl %esi, %esi
+; ENABLE-NEXT: movl %esi, %eax
+; ENABLE-NEXT: retq
+define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+ tail call void asm "nop", ""()
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+ %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
+ %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+ %add = add nsw i32 %call, %sum.04
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ tail call void asm "nop", "~{ebx}"()
+ %shl = shl i32 %add, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %for.end
+ %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+ ret i32 %sum.1
+}
+
+declare void @somethingElse(...)
+
+; Check with a more complex case that we do not have restore within the loop and
+; save outside.
+; CHECK-LABEL: loopInfoRestoreOutsideLoop:
+;
+; ENABLE: testl %edi, %edi
+; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: rbx.
+; CHECK: pushq %rbx
+;
+; DISABLE: testl %edi, %edi
+; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; CHECK: nop
+; CHECK: xorl [[SUM:%esi]], [[SUM]]
+; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]]
+;
+; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body
+; CHECK: movl $1, [[TMP:%e[a-z]+]]
+; CHECK: addl [[TMP]], [[SUM]]
+; CHECK-NEXT: decl [[IV]]
+; CHECK-NEXT: jne [[LOOP_LABEL]]
+; Next BB.
+; CHECK: shll $3, [[SUM]]
+;
+; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]]
+;
+; DISABLE: [[ELSE_LABEL]]: ## %if.else
+
+; Shift second argument by one and store into returned register.
+; DISABLE: addl %esi, %esi
+; DISABLE: [[EPILOG_BB]]: ## %if.end
+;
+; Epilogue code.
+; CHECK-DAG: popq %rbx
+; CHECK-DAG: movl %esi, %eax
+; CHECK: retq
+;
+; ENABLE: [[ELSE_LABEL]]: ## %if.else
+; Shift second argument by one and store into returned register.
+; ENABLE: addl %esi, %esi
+; ENABLE-NEXT: movl %esi, %eax
+; ENABLE-NEXT: retq
+define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ tail call void asm "nop", "~{ebx}"()
+ br label %for.body
+
+for.body: ; preds = %for.body, %if.then
+ %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
+ %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
+ %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+ %add = add nsw i32 %call, %sum.04
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ %shl = shl i32 %add, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %for.end
+ %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+ ret i32 %sum.1
+}
+
+; Check that we handle function with no frame information correctly.
+; CHECK-LABEL: emptyFrame:
+; CHECK: ## %entry
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: retq
+define i32 @emptyFrame() {
+entry:
+ ret i32 0
+}
+
+; Check that we handle inline asm correctly.
+; CHECK-LABEL: inlineAsm:
+;
+; ENABLE: testl %edi, %edi
+; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: rbx.
+; CHECK: pushq %rbx
+;
+; DISABLE: testl %edi, %edi
+; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; CHECK: nop
+; CHECK: movl $10, [[IV:%e[a-z]+]]
+;
+; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: ## %for.body
+; Inline asm statement.
+; CHECK: addl $1, %ebx
+; CHECK: decl [[IV]]
+; CHECK-NEXT: jne [[LOOP_LABEL]]
+; Next BB.
+; CHECK: nop
+; CHECK: xorl %esi, %esi
+;
+; DISABLE: jmp [[EPILOG_BB:LBB[0-9_]+]]
+;
+; DISABLE: [[ELSE_LABEL]]: ## %if.else
+; Shift second argument by one and store into returned register.
+; DISABLE: addl %esi, %esi
+; DISABLE: [[EPILOG_BB]]: ## %if.end
+;
+; Epilogue code.
+; CHECK-DAG: popq %rbx
+; CHECK-DAG: movl %esi, %eax
+; CHECK: retq
+;
+; ENABLE: [[ELSE_LABEL]]: ## %if.else
+; Shift second argument by one and store into returned register.
+; ENABLE: addl %esi, %esi
+; ENABLE-NEXT: movl %esi, %eax
+; ENABLE-NEXT: retq
+define i32 @inlineAsm(i32 %cond, i32 %N) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+ tail call void asm "nop", ""()
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+ tail call void asm "addl $$1, %ebx", "~{ebx}"()
+ %inc = add nuw nsw i32 %i.03, 1
+ %exitcond = icmp eq i32 %inc, 10
+ br i1 %exitcond, label %for.exit, label %for.body
+
+for.exit:
+ tail call void asm "nop", ""()
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %for.body, %if.else
+ %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ]
+ ret i32 %sum.0
+}
+
+; Check that we handle calls to variadic functions correctly.
+; CHECK-LABEL: callVariadicFunc:
+;
+; ENABLE: testl %edi, %edi
+; ENABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; CHECK: pushq
+;
+; DISABLE: testl %edi, %edi
+; DISABLE-NEXT: je [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Setup of the varags.
+; CHECK: movl %esi, (%rsp)
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: %esi, %edi
+; CHECK-NEXT: %esi, %edx
+; CHECK-NEXT: %esi, %r8d
+; CHECK-NEXT: %esi, %r9d
+; CHECK-NEXT: %esi, %ecx
+; CHECK-NEXT: callq _someVariadicFunc
+; CHECK-NEXT: movl %eax, %esi
+; CHECK-NEXT: shll $3, %esi
+;
+; ENABLE-NEXT: addq $8, %rsp
+; ENABLE-NEXT: movl %esi, %eax
+; ENABLE-NEXT: retq
+;
+; DISABLE: jmp [[IFEND_LABEL:LBB[0-9_]+]]
+;
+; CHECK: [[ELSE_LABEL]]: ## %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: addl %esi, %esi
+;
+; DISABLE: [[IFEND_LABEL]]: ## %if.end
+;
+; Epilogue code.
+; CHECK-NEXT: movl %esi, %eax
+; DISABLE-NEXT: popq
+; CHECK-NEXT: retq
+define i32 @callVariadicFunc(i32 %cond, i32 %N) {
+entry:
+ %tobool = icmp eq i32 %cond, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N)
+ %shl = shl i32 %call, 3
+ br label %if.end
+
+if.else: ; preds = %entry
+ %mul = shl nsw i32 %N, 1
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ]
+ ret i32 %sum.0
+}
+
+declare i32 @someVariadicFunc(i32, ...)
+
+; Check that we use LEA not to clobber EFLAGS.
+%struct.temp_slot = type { %struct.temp_slot*, %struct.rtx_def*, %struct.rtx_def*, i32, i64, %union.tree_node*, %union.tree_node*, i8, i8, i32, i32, i64, i64 }
+%union.tree_node = type { %struct.tree_decl }
+%struct.tree_decl = type { %struct.tree_common, i8*, i32, i32, %union.tree_node*, i48, %union.anon, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %union.anon.1, %union.tree_node*, %union.tree_node*, %union.tree_node*, i64, %struct.lang_decl* }
+%struct.tree_common = type { %union.tree_node*, %union.tree_node*, i32 }
+%union.anon = type { i64 }
+%union.anon.1 = type { %struct.function* }
+%struct.function = type { %struct.eh_status*, %struct.stmt_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, i8*, %union.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.ix86_args, %struct.rtx_def*, %struct.rtx_def*, i8*, %struct.initial_value_struct*, i32, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i64, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.rtx_def**, %struct.temp_slot*, i32, i32, i32, %struct.var_refs_queue*, i32, i32, i8*, %union.tree_node*, %struct.rtx_def*, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.rtx_def*, i24 }
+%struct.eh_status = type opaque
+%struct.stmt_status = type opaque
+%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.sequence_stack*, i32, i32, i8*, i32, i8*, %union.tree_node**, %struct.rtx_def** }
+%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %union.tree_node*, %struct.sequence_stack* }
+%struct.varasm_status = type opaque
+%struct.ix86_args = type { i32, i32, i32, i32, i32, i32, i32 }
+%struct.initial_value_struct = type opaque
+%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+%struct.machine_function = type opaque
+%struct.language_function = type opaque
+%struct.lang_decl = type opaque
+%struct.rtx_def = type { i32, [1 x %union.rtunion_def] }
+%union.rtunion_def = type { i64 }
+
+declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rtx_def* readonly)
+
+; CHECK-LABEL: useLEA:
+; DISABLE: pushq
+;
+; CHECK: testq %rdi, %rdi
+; CHECK-NEXT: je [[CLEANUP:LBB[0-9_]+]]
+;
+; CHECK: movzwl (%rdi), [[BF_LOAD:%e[a-z]+]]
+; CHECK-NEXT: cmpl $66, [[BF_LOAD]]
+; CHECK-NEXT: jne [[CLEANUP]]
+;
+; CHECK: movq 8(%rdi), %rdi
+; CHECK-NEXT: movzwl (%rdi), %e[[BF_LOAD2:[a-z]+]]
+; CHECK-NEXT: leal -54(%r[[BF_LOAD2]]), [[TMP:%e[a-z]+]]
+; CHECK-NEXT: cmpl $14, [[TMP]]
+; CHECK-NEXT: ja [[LOR_LHS_FALSE:LBB[0-9_]+]]
+;
+; CHECK: movl $24599, [[TMP2:%e[a-z]+]]
+; CHECK-NEXT: btl [[TMP]], [[TMP2]]
+; CHECK-NEXT: jb [[CLEANUP]]
+;
+; CHECK: [[LOR_LHS_FALSE]]: ## %lor.lhs.false
+; CHECK: cmpl $134, %e[[BF_LOAD2]]
+; CHECK-NEXT: je [[CLEANUP]]
+;
+; CHECK: cmpl $140, %e[[BF_LOAD2]]
+; CHECK-NEXT: je [[CLEANUP]]
+;
+; ENABLE: pushq
+; CHECK: callq _find_temp_slot_from_address
+; CHECK-NEXT: testq %rax, %rax
+;
+; The adjustment must use LEA here (or be moved above the test).
+; ENABLE-NEXT: leaq 8(%rsp), %rsp
+;
+; CHECK-NEXT: je [[CLEANUP]]
+;
+; CHECK: movb $1, 57(%rax)
+;
+; CHECK: [[CLEANUP]]: ## %cleanup
+; DISABLE: popq
+; CHECK-NEXT: retq
+define void @useLEA(%struct.rtx_def* readonly %x) {
+entry:
+ %cmp = icmp eq %struct.rtx_def* %x, null
+ br i1 %cmp, label %cleanup, label %if.end
+
+if.end: ; preds = %entry
+ %tmp = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %x, i64 0, i32 0
+ %bf.load = load i32, i32* %tmp, align 8
+ %bf.clear = and i32 %bf.load, 65535
+ %cmp1 = icmp eq i32 %bf.clear, 66
+ br i1 %cmp1, label %lor.lhs.false, label %cleanup
+
+lor.lhs.false: ; preds = %if.end
+ %arrayidx = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %x, i64 0, i32 1, i64 0
+ %rtx = bitcast %union.rtunion_def* %arrayidx to %struct.rtx_def**
+ %tmp1 = load %struct.rtx_def*, %struct.rtx_def** %rtx, align 8
+ %tmp2 = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %tmp1, i64 0, i32 0
+ %bf.load2 = load i32, i32* %tmp2, align 8
+ %bf.clear3 = and i32 %bf.load2, 65535
+ switch i32 %bf.clear3, label %if.end.55 [
+ i32 67, label %cleanup
+ i32 68, label %cleanup
+ i32 54, label %cleanup
+ i32 55, label %cleanup
+ i32 58, label %cleanup
+ i32 134, label %cleanup
+ i32 56, label %cleanup
+ i32 140, label %cleanup
+ ]
+
+if.end.55: ; preds = %lor.lhs.false
+ %call = tail call fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rtx_def* %tmp1) #2
+ %cmp59 = icmp eq %struct.temp_slot* %call, null
+ br i1 %cmp59, label %cleanup, label %if.then.60
+
+if.then.60: ; preds = %if.end.55
+ %addr_taken = getelementptr inbounds %struct.temp_slot, %struct.temp_slot* %call, i64 0, i32 8
+ store i8 1, i8* %addr_taken, align 1
+ br label %cleanup
+
+cleanup: ; preds = %if.then.60, %if.end.55, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %lor.lhs.false, %if.end, %entry
+ ret void
+}
diff --git a/test/CodeGen/X86/x86-upgrade-avx2-vbroadcast.ll b/test/CodeGen/X86/x86-upgrade-avx2-vbroadcast.ll
new file mode 100644
index 000000000000..d4813ea47a3d
--- /dev/null
+++ b/test/CodeGen/X86/x86-upgrade-avx2-vbroadcast.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mattr=+avx2 < %s | FileCheck %s
+
+; Check that we properly upgrade the AVX2 vbroadcast intrinsic to IR.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+define <4 x i64> @broadcast128(<2 x i64> %src) {
+ ; CHECK-LABEL: broadcast128
+ ; CHECK: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+ %1 = alloca <2 x i64>, align 16
+ %2 = bitcast <2 x i64>* %1 to i8*
+ store <2 x i64> %src, <2 x i64>* %1, align 16
+ %3 = call <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8* %2)
+ ret <4 x i64> %3
+}
+
+declare <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8*) #1
diff --git a/test/CodeGen/X86/xaluo.ll b/test/CodeGen/X86/xaluo.ll
index 668628c69ede..7c4b60d264c9 100644
--- a/test/CodeGen/X86/xaluo.ll
+++ b/test/CodeGen/X86/xaluo.ll
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=x86_64-darwin-unknown < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SDAG
-; RUN: llc -mtriple=x86_64-darwin-unknown -fast-isel -fast-isel-abort < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FAST
+; RUN: llc -mtriple=x86_64-darwin-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FAST
;
; Get the actual value of the overflow bit.
diff --git a/test/CodeGen/X86/xmulo.ll b/test/CodeGen/X86/xmulo.ll
index 71efac4e99a1..825efa6361b5 100644
--- a/test/CodeGen/X86/xmulo.ll
+++ b/test/CodeGen/X86/xmulo.ll
@@ -17,7 +17,7 @@ define i32 @t1() nounwind {
%2 = extractvalue {i64, i1} %1, 0
%3 = extractvalue {i64, i1} %1, 1
%4 = zext i1 %3 to i32
- %5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), i64 %2, i32 %4)
+ %5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), i64 %2, i32 %4)
ret i32 0
}
@@ -31,7 +31,7 @@ define i32 @t2() nounwind {
%2 = extractvalue {i64, i1} %1, 0
%3 = extractvalue {i64, i1} %1, 1
%4 = zext i1 %3 to i32
- %5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), i64 %2, i32 %4)
+ %5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), i64 %2, i32 %4)
ret i32 0
}
@@ -45,6 +45,6 @@ define i32 @t3() nounwind {
%2 = extractvalue {i64, i1} %1, 0
%3 = extractvalue {i64, i1} %1, 1
%4 = zext i1 %3 to i32
- %5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), i64 %2, i32 %4)
+ %5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), i64 %2, i32 %4)
ret i32 0
}
diff --git a/test/CodeGen/X86/xop-intrinsics-x86_64.ll b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
index 8af782cd2f19..2516116f7697 100644
--- a/test/CodeGen/X86/xop-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
@@ -8,14 +8,14 @@ define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double>
define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x double> %a2) {
; CHECK-NOT: vmovaps
; CHECK: vpermil2pd
- %vec = load <2 x double>* %a1
+ %vec = load <2 x double>, <2 x double>* %a1
%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x double> %a2, i8 1) ; [#uses=1]
ret <2 x double> %res
}
define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double>* %a2) {
; CHECK-NOT: vmovaps
; CHECK: vpermil2pd
- %vec = load <2 x double>* %a2
+ %vec = load <2 x double>, <2 x double>* %a2
%res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %vec, i8 1) ; [#uses=1]
ret <2 x double> %res
}
@@ -31,7 +31,7 @@ define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, <4 x d
; CHECK-NOT: vmovaps
; CHECK: vpermil2pd
; CHECK: ymm
- %vec = load <4 x double>* %a1
+ %vec = load <4 x double>, <4 x double>* %a1
%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x double> %a2, i8 2) ;
ret <4 x double> %res
}
@@ -39,7 +39,7 @@ define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x d
; CHECK-NOT: vmovaps
; CHECK: vpermil2pd
; CHECK: ymm
- %vec = load <4 x double>* %a2
+ %vec = load <4 x double>, <4 x double>* %a2
%res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %vec, i8 2) ;
ret <4 x double> %res
}
@@ -77,7 +77,7 @@ define <4 x i64> @test_int_x86_xop_vpcmov_256_mr(<4 x i64> %a0, <4 x i64>* %a1,
; CHECK-NOT: vmovaps
; CHECK: vpcmov
; CHECK: ymm
- %vec = load <4 x i64>* %a1
+ %vec = load <4 x i64>, <4 x i64>* %a1
%res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %vec, <4 x i64> %a2) ;
ret <4 x i64> %res
}
@@ -85,462 +85,462 @@ define <4 x i64> @test_int_x86_xop_vpcmov_256_rm(<4 x i64> %a0, <4 x i64> %a1, <
; CHECK-NOT: vmovaps
; CHECK: vpcmov
; CHECK: ymm
- %vec = load <4 x i64>* %a2
+ %vec = load <4 x i64>, <4 x i64>* %a2
%res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %vec) ;
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK:vpcomb
+ ; CHECK:vpcomeqb
%res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) ;
ret <16 x i8> %res
}
define <16 x i8> @test_int_x86_xop_vpcomeqb_mem(<16 x i8> %a0, <16 x i8>* %a1) {
; CHECK-NOT: vmovaps
- ; CHECK:vpcomb
- %vec = load <16 x i8>* %a1
+ ; CHECK:vpcomeqb
+ %vec = load <16 x i8>, <16 x i8>* %a1
%res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %vec) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpcomw
+ ; CHECK: vpcomeqw
%res = call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpcomd
+ ; CHECK: vpcomeqd
%res = call <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vpcomq
+ ; CHECK: vpcomeqq
%res = call <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomequb(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcomub
+ ; CHECK: vpcomequb
%res = call <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8> %a0, <16 x i8> %a1) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomequd(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpcomud
+ ; CHECK: vpcomequd
%res = call <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32> %a0, <4 x i32> %a1) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomequq(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vpcomuq
+ ; CHECK: vpcomequq
%res = call <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64> %a0, <2 x i64> %a1) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomequw(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpcomuw
+ ; CHECK: vpcomequw
%res = call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a0, <8 x i16> %a1) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcomb
+ ; CHECK: vpcomfalseb
%res = call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpcomd
+ ; CHECK: vpcomfalsed
%res = call <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vpcomq
+ ; CHECK: vpcomfalseq
%res = call <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcomub
+ ; CHECK: vpcomfalseub
%res = call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpcomud
+ ; CHECK: vpcomfalseud
%res = call <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vpcomuq
+ ; CHECK: vpcomfalseuq
%res = call <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpcomuw
+ ; CHECK: vpcomfalseuw
%res = call <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpcomw
+ ; CHECK: vpcomfalsew
%res = call <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcomb
+ ; CHECK: vpcomgeb
%res = call <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomged(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpcomd
+ ; CHECK: vpcomged
%res = call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a0, <4 x i32> %a1) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vpcomq
+ ; CHECK: vpcomgeq
%res = call <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcomub
+ ; CHECK: vpcomgeub
%res = call <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpcomud
+ ; CHECK: vpcomgeud
%res = call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vpcomuq
+ ; CHECK: vpcomgeuq
%res = call <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpcomuw
+ ; CHECK: vpcomgeuw
%res = call <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomgew(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpcomw
+ ; CHECK: vpcomgew
%res = call <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16> %a0, <8 x i16> %a1) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcomb
+ ; CHECK: vpcomgtb
%res = call <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpcomd
+ ; CHECK: vpcomgtd
%res = call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vpcomq
+ ; CHECK: vpcomgtq
%res = call <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcomub
+ ; CHECK: vpcomgtub
%res = call <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpcomud
+ ; CHECK: vpcomgtud
%res = call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vpcomuq
+ ; CHECK: vpcomgtuq
%res = call <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpcomuw
+ ; CHECK: vpcomgtuw
%res = call <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpcomw
+ ; CHECK: vpcomgtw
%res = call <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomleb(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcomb
+ ; CHECK: vpcomleb
%res = call <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8> %a0, <16 x i8> %a1) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomled(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpcomd
+ ; CHECK: vpcomled
%res = call <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32> %a0, <4 x i32> %a1) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomleq(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vpcomq
+ ; CHECK: vpcomleq
%res = call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a0, <2 x i64> %a1) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomleub(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcomub
+ ; CHECK: vpcomleub
%res = call <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8> %a0, <16 x i8> %a1) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomleud(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpcomud
+ ; CHECK: vpcomleud
%res = call <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32> %a0, <4 x i32> %a1) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vpcomuq
+ ; CHECK: vpcomleuq
%res = call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpcomuw
+ ; CHECK: vpcomleuw
%res = call <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomlew(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpcomw
+ ; CHECK: vpcomlew
%res = call <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16> %a0, <8 x i16> %a1) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomltb(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcomb
+ ; CHECK: vpcomltb
%res = call <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8> %a0, <16 x i8> %a1) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomltd(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpcomd
+ ; CHECK: vpcomltd
%res = call <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32> %a0, <4 x i32> %a1) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomltq(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vpcomq
+ ; CHECK: vpcomltq
%res = call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a0, <2 x i64> %a1) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomltub(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcomub
+ ; CHECK: vpcomltub
%res = call <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8> %a0, <16 x i8> %a1) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomltud(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpcomud
+ ; CHECK: vpcomltud
%res = call <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32> %a0, <4 x i32> %a1) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vpcomuq
+ ; CHECK: vpcomltuq
%res = call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpcomuw
+ ; CHECK: vpcomltuw
%res = call <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomltw(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpcomw
+ ; CHECK: vpcomltw
%res = call <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16> %a0, <8 x i16> %a1) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomneb(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcomb
+ ; CHECK: vpcomneqb
%res = call <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8> %a0, <16 x i8> %a1) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomned(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpcomd
+ ; CHECK: vpcomneqd
%res = call <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32> %a0, <4 x i32> %a1) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomneq(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vpcomq
+ ; CHECK: vpcomneqq
%res = call <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64> %a0, <2 x i64> %a1) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomneub(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcomub
+ ; CHECK: vpcomnequb
%res = call <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8> %a0, <16 x i8> %a1) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomneud(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpcomud
+ ; CHECK: vpcomnequd
%res = call <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32> %a0, <4 x i32> %a1) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vpcomuq
+ ; CHECK: vpcomnequq
%res = call <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpcomuw
+ ; CHECK: vpcomnequw
%res = call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomnew(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpcomw
+ ; CHECK: vpcomneqw
%res = call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a0, <8 x i16> %a1) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcomb
+ ; CHECK: vpcomtrueb
%res = call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpcomd
+ ; CHECK: vpcomtrued
%res = call <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vpcomq
+ ; CHECK: vpcomtrueq
%res = call <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone
define <16 x i8> @test_int_x86_xop_vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) {
- ; CHECK: vpcomub
+ ; CHECK: vpcomtrueub
%res = call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) ;
ret <16 x i8> %res
}
declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone
define <4 x i32> @test_int_x86_xop_vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) {
- ; CHECK: vpcomud
+ ; CHECK: vpcomtrueud
%res = call <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone
define <2 x i64> @test_int_x86_xop_vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) {
- ; CHECK: vpcomuq
+ ; CHECK: vpcomtrueuq
%res = call <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpcomuw
+ ; CHECK: vpcomtrueuw
%res = call <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) ;
ret <8 x i16> %res
}
declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone
define <8 x i16> @test_int_x86_xop_vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) {
- ; CHECK: vpcomw
+ ; CHECK: vpcomtruew
%res = call <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) ;
ret <8 x i16> %res
}
@@ -645,7 +645,7 @@ define <2 x i64> @test_int_x86_xop_vphsubdq(<4 x i32> %a0) {
define <2 x i64> @test_int_x86_xop_vphsubdq_mem(<4 x i32>* %a0) {
; CHECK-NOT: vmovaps
; CHECK: vphsubdq
- %vec = load <4 x i32>* %a0
+ %vec = load <4 x i32>, <4 x i32>* %a0
%res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %vec) ;
ret <2 x i64> %res
}
@@ -659,7 +659,7 @@ define <4 x i32> @test_int_x86_xop_vphsubwd(<8 x i16> %a0) {
define <4 x i32> @test_int_x86_xop_vphsubwd_mem(<8 x i16>* %a0) {
; CHECK-NOT: vmovaps
; CHECK: vphsubwd
- %vec = load <8 x i16>* %a0
+ %vec = load <8 x i16>, <8 x i16>* %a0
%res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %vec) ;
ret <4 x i32> %res
}
@@ -750,7 +750,7 @@ define <4 x i32> @test_int_x86_xop_vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x
define <4 x i32> @test_int_x86_xop_vpmadcswd_mem(<8 x i16> %a0, <8 x i16>* %a1, <4 x i32> %a2) {
; CHECK-NOT: vmovaps
; CHECK: vpmadcswd
- %vec = load <8 x i16>* %a1
+ %vec = load <8 x i16>, <8 x i16>* %a1
%res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %vec, <4 x i32> %a2) ;
ret <4 x i32> %res
}
@@ -764,14 +764,14 @@ define <16 x i8> @test_int_x86_xop_vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8
define <16 x i8> @test_int_x86_xop_vpperm_rm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %a2) {
; CHECK-NOT: vmovaps
; CHECK: vpperm
- %vec = load <16 x i8>* %a2
+ %vec = load <16 x i8>, <16 x i8>* %a2
%res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %vec) ;
ret <16 x i8> %res
}
define <16 x i8> @test_int_x86_xop_vpperm_mr(<16 x i8> %a0, <16 x i8>* %a1, <16 x i8> %a2) {
; CHECK-NOT: vmovaps
; CHECK: vpperm
- %vec = load <16 x i8>* %a1
+ %vec = load <16 x i8>, <16 x i8>* %a1
%res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %vec, <16 x i8> %a2) ;
ret <16 x i8> %res
}
@@ -862,14 +862,14 @@ define <8 x i16> @test_int_x86_xop_vpshlw(<8 x i16> %a0, <8 x i16> %a1) {
define <8 x i16> @test_int_x86_xop_vpshlw_rm(<8 x i16> %a0, <8 x i16>* %a1) {
; CHECK-NOT: vmovaps
; CHECK: vpshlw
- %vec = load <8 x i16>* %a1
+ %vec = load <8 x i16>, <8 x i16>* %a1
%res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %vec) ;
ret <8 x i16> %res
}
define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) {
; CHECK-NOT: vmovaps
; CHECK: vpshlw
- %vec = load <8 x i16>* %a0
+ %vec = load <8 x i16>, <8 x i16>* %a0
%res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %vec, <8 x i16> %a1) ;
ret <8 x i16> %res
}
@@ -884,7 +884,7 @@ define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0) {
define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(float* %a0) {
; CHECK-NOT: mov
; CHECK: vfrczss
- %elem = load float* %a0
+ %elem = load float, float* %a0
%vec = insertelement <4 x float> undef, float %elem, i32 0
%res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %vec) ;
ret <4 x float> %res
@@ -900,7 +900,7 @@ define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0) {
define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(double* %a0) {
; CHECK-NOT: mov
; CHECK: vfrczsd
- %elem = load double* %a0
+ %elem = load double, double* %a0
%vec = insertelement <2 x double> undef, double %elem, i32 0
%res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %vec) ;
ret <2 x double> %res
@@ -915,7 +915,7 @@ define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) {
define <2 x double> @test_int_x86_xop_vfrcz_pd_mem(<2 x double>* %a0) {
; CHECK-NOT: vmovaps
; CHECK: vfrczpd
- %vec = load <2 x double>* %a0
+ %vec = load <2 x double>, <2 x double>* %a0
%res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %vec) ;
ret <2 x double> %res
}
@@ -931,7 +931,7 @@ define <4 x double> @test_int_x86_xop_vfrcz_pd_256_mem(<4 x double>* %a0) {
; CHECK-NOT: vmovaps
; CHECK: vfrczpd
; CHECK: ymm
- %vec = load <4 x double>* %a0
+ %vec = load <4 x double>, <4 x double>* %a0
%res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %vec) ;
ret <4 x double> %res
}
@@ -945,7 +945,7 @@ define <4 x float> @test_int_x86_xop_vfrcz_ps(<4 x float> %a0) {
define <4 x float> @test_int_x86_xop_vfrcz_ps_mem(<4 x float>* %a0) {
; CHECK-NOT: vmovaps
; CHECK: vfrczps
- %vec = load <4 x float>* %a0
+ %vec = load <4 x float>, <4 x float>* %a0
%res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %vec) ;
ret <4 x float> %res
}
@@ -961,7 +961,7 @@ define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(<8 x float>* %a0) {
; CHECK-NOT: vmovaps
; CHECK: vfrczps
; CHECK: ymm
- %vec = load <8 x float>* %a0
+ %vec = load <8 x float>, <8 x float>* %a0
%res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %vec) ;
ret <8 x float> %res
}
diff --git a/test/CodeGen/X86/xor-icmp.ll b/test/CodeGen/X86/xor-icmp.ll
index dd1fcca48f61..397e5bc10f5b 100644
--- a/test/CodeGen/X86/xor-icmp.ll
+++ b/test/CodeGen/X86/xor-icmp.ll
@@ -24,11 +24,11 @@ entry:
br i1 %4, label %bb1, label %bb
bb: ; preds = %entry
- %5 = tail call i32 (...)* @foo() nounwind ; <i32> [#uses=1]
+ %5 = tail call i32 (...) @foo() nounwind ; <i32> [#uses=1]
ret i32 %5
bb1: ; preds = %entry
- %6 = tail call i32 (...)* @bar() nounwind ; <i32> [#uses=1]
+ %6 = tail call i32 (...) @bar() nounwind ; <i32> [#uses=1]
ret i32 %6
}
@@ -59,7 +59,7 @@ entry:
br i1 %2, label %bb, label %return
bb: ; preds = %entry
- %3 = tail call i32 (...)* @foo() nounwind ; <i32> [#uses=0]
+ %3 = tail call i32 (...) @foo() nounwind ; <i32> [#uses=0]
ret i32 undef
return: ; preds = %entry
diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll
index fd8e1b4cebaa..829be41e5127 100644
--- a/test/CodeGen/X86/xor.ll
+++ b/test/CodeGen/X86/xor.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse2 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mcpu=corei7 -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mcpu=corei7 -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2 | FileCheck %s -check-prefix=X64
; Though it is undefined, we want xor undef,undef to produce zero.
define <4 x i32> @test1() nounwind {
@@ -181,3 +181,15 @@ define i32 @PR17487(i1 %tobool) {
; X32-LABEL: PR17487:
; X32: andn
}
+
+define i32 @test11(i32 %b) {
+ %shl = shl i32 1, %b
+ %neg = xor i32 %shl, -1
+ ret i32 %neg
+; X64-LABEL: test11:
+; X64: movl $-2, %[[REG:.*]]
+; X64: roll %{{.*}}, %[[REG]]
+; X32-LABEL: test11:
+; X32: movl $-2, %[[REG:.*]]
+; X32: roll %{{.*}}, %[[REG]]
+}
diff --git a/test/CodeGen/X86/zext-extract_subreg.ll b/test/CodeGen/X86/zext-extract_subreg.ll
index 43e79c77acc2..9e34abb69b39 100644
--- a/test/CodeGen/X86/zext-extract_subreg.ll
+++ b/test/CodeGen/X86/zext-extract_subreg.ll
@@ -6,7 +6,7 @@ entry:
br i1 undef, label %return, label %if.end.i
if.end.i: ; preds = %entry
- %tmp7.i = load i32* undef, align 4
+ %tmp7.i = load i32, i32* undef, align 4
br i1 undef, label %return, label %if.end
if.end: ; preds = %if.end.i
diff --git a/test/CodeGen/X86/zext-sext.ll b/test/CodeGen/X86/zext-sext.ll
index 5b2713dc6fc1..01f871159d3b 100644
--- a/test/CodeGen/X86/zext-sext.ll
+++ b/test/CodeGen/X86/zext-sext.ll
@@ -8,15 +8,15 @@
define void @func([40 x i16]* %a, i32* %b, i16** %c, i64* %d) nounwind {
entry:
- %tmp103 = getelementptr inbounds [40 x i16]* %a, i64 0, i64 4
- %tmp104 = load i16* %tmp103, align 2
+ %tmp103 = getelementptr inbounds [40 x i16], [40 x i16]* %a, i64 0, i64 4
+ %tmp104 = load i16, i16* %tmp103, align 2
%tmp105 = sext i16 %tmp104 to i32
- %tmp106 = load i32* %b, align 4
+ %tmp106 = load i32, i32* %b, align 4
%tmp107 = sub nsw i32 4, %tmp106
- %tmp108 = load i16** %c, align 8
+ %tmp108 = load i16*, i16** %c, align 8
%tmp109 = sext i32 %tmp107 to i64
- %tmp110 = getelementptr inbounds i16* %tmp108, i64 %tmp109
- %tmp111 = load i16* %tmp110, align 1
+ %tmp110 = getelementptr inbounds i16, i16* %tmp108, i64 %tmp109
+ %tmp111 = load i16, i16* %tmp110, align 1
%tmp112 = sext i16 %tmp111 to i32
%tmp = mul i32 355244649, %tmp112
%tmp1 = mul i32 %tmp, %tmp105
@@ -34,11 +34,12 @@ entry:
%tmp12 = add i64 %tmp11, 5089792279245435153
; CHECK: addl $2138875574, %e[[REGISTER_zext:[a-z0-9]+]]
-; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]]
; CHECK: cmpl $-8608074, %e[[REGISTER_zext]]
+; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]]
; CHECK-NOT: [[REGISTER_zext]]
-; CHECK-DAG: testl %e[[REGISTER_zext]]
-; CHECK: subq %r[[REGISTER_zext]], [[REGISTER_sext]]
+; CHECK-DAG: cmpl $2138875573, %e[[REGISTER_zext]]
+; CHECK: movq [[REGISTER_sext]], [[REGISTER_sext2:%[a-z0-9]+]]
+; CHECK: subq %r[[REGISTER_zext]], [[REGISTER_sext2]]
%tmp13 = sub i64 %tmp12, 2138875574
%tmp14 = zext i32 %tmp4 to i64
@@ -49,7 +50,7 @@ entry:
%tmp19 = sub i64 %tmp18, 5386586244038704851
%tmp20 = add i64 %tmp19, -1368057358110947217
%tmp21 = mul i64 %tmp20, -422037402840850817
- %tmp115 = load i64* %d, align 8
+ %tmp115 = load i64, i64* %d, align 8
%alphaX = mul i64 468858157810230901, %tmp21
%alphaXbetaY = add i64 %alphaX, %tmp115
%transformed = add i64 %alphaXbetaY, 9040145182981852475
diff --git a/test/CodeGen/X86/zlib-longest-match.ll b/test/CodeGen/X86/zlib-longest-match.ll
index d1598dce02d7..7be03f662dae 100644
--- a/test/CodeGen/X86/zlib-longest-match.ll
+++ b/test/CodeGen/X86/zlib-longest-match.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86-64 < %s -block-placement-exit-block-bias=20 | FileCheck %s
+; RUN: llc -march=x86-64 < %s -block-placement-exit-block-bias=20 -no-phi-elim-live-out-early-exit | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
@@ -25,56 +25,55 @@ target triple = "x86_64-apple-macosx10.9.0"
; the two hot blocks are laid out close to each other.
; CHECK-NEXT: %land.rhs131
; CHECK: jne
-; CHECK: jmp
define i32 @longest_match(%struct.internal_state* nocapture %s, i32 %cur_match) nounwind {
entry:
- %max_chain_length = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 31
- %0 = load i32* %max_chain_length, align 4
- %window = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 14
- %1 = load i8** %window, align 8
- %strstart = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 27
- %2 = load i32* %strstart, align 4
+ %max_chain_length = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 31
+ %0 = load i32, i32* %max_chain_length, align 4
+ %window = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 14
+ %1 = load i8*, i8** %window, align 8
+ %strstart = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 27
+ %2 = load i32, i32* %strstart, align 4
%idx.ext = zext i32 %2 to i64
- %add.ptr = getelementptr inbounds i8* %1, i64 %idx.ext
- %prev_length = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 30
- %3 = load i32* %prev_length, align 4
- %nice_match1 = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 36
- %4 = load i32* %nice_match1, align 4
- %w_size = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 11
- %5 = load i32* %w_size, align 4
+ %add.ptr = getelementptr inbounds i8, i8* %1, i64 %idx.ext
+ %prev_length = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 30
+ %3 = load i32, i32* %prev_length, align 4
+ %nice_match1 = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 36
+ %4 = load i32, i32* %nice_match1, align 4
+ %w_size = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 11
+ %5 = load i32, i32* %w_size, align 4
%sub = add i32 %5, -262
%cmp = icmp ugt i32 %2, %sub
%sub6 = sub i32 %2, %sub
%sub6. = select i1 %cmp, i32 %sub6, i32 0
- %prev7 = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 16
- %6 = load i16** %prev7, align 8
- %w_mask = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 13
- %7 = load i32* %w_mask, align 4
+ %prev7 = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 16
+ %6 = load i16*, i16** %prev7, align 8
+ %w_mask = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 13
+ %7 = load i32, i32* %w_mask, align 4
%add.ptr11.sum = add i64 %idx.ext, 258
- %add.ptr12 = getelementptr inbounds i8* %1, i64 %add.ptr11.sum
+ %add.ptr12 = getelementptr inbounds i8, i8* %1, i64 %add.ptr11.sum
%sub13 = add nsw i32 %3, -1
%idxprom = sext i32 %sub13 to i64
%add.ptr.sum = add i64 %idxprom, %idx.ext
- %arrayidx = getelementptr inbounds i8* %1, i64 %add.ptr.sum
- %8 = load i8* %arrayidx, align 1
+ %arrayidx = getelementptr inbounds i8, i8* %1, i64 %add.ptr.sum
+ %8 = load i8, i8* %arrayidx, align 1
%idxprom14 = sext i32 %3 to i64
%add.ptr.sum213 = add i64 %idxprom14, %idx.ext
- %arrayidx15 = getelementptr inbounds i8* %1, i64 %add.ptr.sum213
- %9 = load i8* %arrayidx15, align 1
- %good_match = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 35
- %10 = load i32* %good_match, align 4
+ %arrayidx15 = getelementptr inbounds i8, i8* %1, i64 %add.ptr.sum213
+ %9 = load i8, i8* %arrayidx15, align 1
+ %good_match = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 35
+ %10 = load i32, i32* %good_match, align 4
%cmp17 = icmp ult i32 %3, %10
%shr = lshr i32 %0, 2
%chain_length.0 = select i1 %cmp17, i32 %0, i32 %shr
- %lookahead = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 29
- %11 = load i32* %lookahead, align 4
+ %lookahead = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 29
+ %11 = load i32, i32* %lookahead, align 4
%cmp18 = icmp ugt i32 %4, %11
%. = select i1 %cmp18, i32 %11, i32 %4
- %match_start = getelementptr inbounds %struct.internal_state* %s, i64 0, i32 28
+ %match_start = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 28
%add.ptr.sum217 = add i64 %idx.ext, 1
- %arrayidx44 = getelementptr inbounds i8* %1, i64 %add.ptr.sum217
+ %arrayidx44 = getelementptr inbounds i8, i8* %1, i64 %add.ptr.sum217
%add.ptr.sum218 = add i64 %idx.ext, 2
- %add.ptr50 = getelementptr inbounds i8* %1, i64 %add.ptr.sum218
+ %add.ptr50 = getelementptr inbounds i8, i8* %1, i64 %add.ptr.sum218
%sub.ptr.lhs.cast = ptrtoint i8* %add.ptr12 to i64
br label %do.body
@@ -85,11 +84,11 @@ do.body: ; preds = %land.rhs131, %entry
%scan_end1.0 = phi i8 [ %scan_end1.1, %land.rhs131 ], [ %8, %entry ]
%scan_end.0 = phi i8 [ %scan_end.1, %land.rhs131 ], [ %9, %entry ]
%idx.ext23 = zext i32 %cur_match.addr.0 to i64
- %add.ptr24 = getelementptr inbounds i8* %1, i64 %idx.ext23
+ %add.ptr24 = getelementptr inbounds i8, i8* %1, i64 %idx.ext23
%idxprom25 = sext i32 %best_len.0 to i64
%add.ptr24.sum = add i64 %idx.ext23, %idxprom25
- %arrayidx26 = getelementptr inbounds i8* %1, i64 %add.ptr24.sum
- %12 = load i8* %arrayidx26, align 1
+ %arrayidx26 = getelementptr inbounds i8, i8* %1, i64 %add.ptr24.sum
+ %12 = load i8, i8* %arrayidx26, align 1
%cmp28 = icmp eq i8 %12, %scan_end.0
br i1 %cmp28, label %lor.lhs.false, label %do.cond125
@@ -97,93 +96,93 @@ lor.lhs.false: ; preds = %do.body
%sub30 = add nsw i32 %best_len.0, -1
%idxprom31 = sext i32 %sub30 to i64
%add.ptr24.sum214 = add i64 %idx.ext23, %idxprom31
- %arrayidx32 = getelementptr inbounds i8* %1, i64 %add.ptr24.sum214
- %13 = load i8* %arrayidx32, align 1
+ %arrayidx32 = getelementptr inbounds i8, i8* %1, i64 %add.ptr24.sum214
+ %13 = load i8, i8* %arrayidx32, align 1
%cmp35 = icmp eq i8 %13, %scan_end1.0
br i1 %cmp35, label %lor.lhs.false37, label %do.cond125
lor.lhs.false37: ; preds = %lor.lhs.false
- %14 = load i8* %add.ptr24, align 1
- %15 = load i8* %add.ptr, align 1
+ %14 = load i8, i8* %add.ptr24, align 1
+ %15 = load i8, i8* %add.ptr, align 1
%cmp40 = icmp eq i8 %14, %15
br i1 %cmp40, label %lor.lhs.false42, label %do.cond125
lor.lhs.false42: ; preds = %lor.lhs.false37
%add.ptr24.sum215 = add i64 %idx.ext23, 1
- %incdec.ptr = getelementptr inbounds i8* %1, i64 %add.ptr24.sum215
- %16 = load i8* %incdec.ptr, align 1
- %17 = load i8* %arrayidx44, align 1
+ %incdec.ptr = getelementptr inbounds i8, i8* %1, i64 %add.ptr24.sum215
+ %16 = load i8, i8* %incdec.ptr, align 1
+ %17 = load i8, i8* %arrayidx44, align 1
%cmp46 = icmp eq i8 %16, %17
br i1 %cmp46, label %if.end49, label %do.cond125
if.end49: ; preds = %lor.lhs.false42
%incdec.ptr.sum = add i64 %idx.ext23, 2
- %incdec.ptr51 = getelementptr inbounds i8* %1, i64 %incdec.ptr.sum
+ %incdec.ptr51 = getelementptr inbounds i8, i8* %1, i64 %incdec.ptr.sum
br label %do.cond
do.cond: ; preds = %land.lhs.true100, %if.end49
%match.0 = phi i8* [ %incdec.ptr51, %if.end49 ], [ %incdec.ptr103, %land.lhs.true100 ]
%scan.1 = phi i8* [ %add.ptr50, %if.end49 ], [ %incdec.ptr101, %land.lhs.true100 ]
- %incdec.ptr53 = getelementptr inbounds i8* %scan.1, i64 1
- %18 = load i8* %incdec.ptr53, align 1
- %incdec.ptr55 = getelementptr inbounds i8* %match.0, i64 1
- %19 = load i8* %incdec.ptr55, align 1
+ %incdec.ptr53 = getelementptr inbounds i8, i8* %scan.1, i64 1
+ %18 = load i8, i8* %incdec.ptr53, align 1
+ %incdec.ptr55 = getelementptr inbounds i8, i8* %match.0, i64 1
+ %19 = load i8, i8* %incdec.ptr55, align 1
%cmp57 = icmp eq i8 %18, %19
br i1 %cmp57, label %land.lhs.true, label %do.end
land.lhs.true: ; preds = %do.cond
- %incdec.ptr59 = getelementptr inbounds i8* %scan.1, i64 2
- %20 = load i8* %incdec.ptr59, align 1
- %incdec.ptr61 = getelementptr inbounds i8* %match.0, i64 2
- %21 = load i8* %incdec.ptr61, align 1
+ %incdec.ptr59 = getelementptr inbounds i8, i8* %scan.1, i64 2
+ %20 = load i8, i8* %incdec.ptr59, align 1
+ %incdec.ptr61 = getelementptr inbounds i8, i8* %match.0, i64 2
+ %21 = load i8, i8* %incdec.ptr61, align 1
%cmp63 = icmp eq i8 %20, %21
br i1 %cmp63, label %land.lhs.true65, label %do.end
land.lhs.true65: ; preds = %land.lhs.true
- %incdec.ptr66 = getelementptr inbounds i8* %scan.1, i64 3
- %22 = load i8* %incdec.ptr66, align 1
- %incdec.ptr68 = getelementptr inbounds i8* %match.0, i64 3
- %23 = load i8* %incdec.ptr68, align 1
+ %incdec.ptr66 = getelementptr inbounds i8, i8* %scan.1, i64 3
+ %22 = load i8, i8* %incdec.ptr66, align 1
+ %incdec.ptr68 = getelementptr inbounds i8, i8* %match.0, i64 3
+ %23 = load i8, i8* %incdec.ptr68, align 1
%cmp70 = icmp eq i8 %22, %23
br i1 %cmp70, label %land.lhs.true72, label %do.end
land.lhs.true72: ; preds = %land.lhs.true65
- %incdec.ptr73 = getelementptr inbounds i8* %scan.1, i64 4
- %24 = load i8* %incdec.ptr73, align 1
- %incdec.ptr75 = getelementptr inbounds i8* %match.0, i64 4
- %25 = load i8* %incdec.ptr75, align 1
+ %incdec.ptr73 = getelementptr inbounds i8, i8* %scan.1, i64 4
+ %24 = load i8, i8* %incdec.ptr73, align 1
+ %incdec.ptr75 = getelementptr inbounds i8, i8* %match.0, i64 4
+ %25 = load i8, i8* %incdec.ptr75, align 1
%cmp77 = icmp eq i8 %24, %25
br i1 %cmp77, label %land.lhs.true79, label %do.end
land.lhs.true79: ; preds = %land.lhs.true72
- %incdec.ptr80 = getelementptr inbounds i8* %scan.1, i64 5
- %26 = load i8* %incdec.ptr80, align 1
- %incdec.ptr82 = getelementptr inbounds i8* %match.0, i64 5
- %27 = load i8* %incdec.ptr82, align 1
+ %incdec.ptr80 = getelementptr inbounds i8, i8* %scan.1, i64 5
+ %26 = load i8, i8* %incdec.ptr80, align 1
+ %incdec.ptr82 = getelementptr inbounds i8, i8* %match.0, i64 5
+ %27 = load i8, i8* %incdec.ptr82, align 1
%cmp84 = icmp eq i8 %26, %27
br i1 %cmp84, label %land.lhs.true86, label %do.end
land.lhs.true86: ; preds = %land.lhs.true79
- %incdec.ptr87 = getelementptr inbounds i8* %scan.1, i64 6
- %28 = load i8* %incdec.ptr87, align 1
- %incdec.ptr89 = getelementptr inbounds i8* %match.0, i64 6
- %29 = load i8* %incdec.ptr89, align 1
+ %incdec.ptr87 = getelementptr inbounds i8, i8* %scan.1, i64 6
+ %28 = load i8, i8* %incdec.ptr87, align 1
+ %incdec.ptr89 = getelementptr inbounds i8, i8* %match.0, i64 6
+ %29 = load i8, i8* %incdec.ptr89, align 1
%cmp91 = icmp eq i8 %28, %29
br i1 %cmp91, label %land.lhs.true93, label %do.end
land.lhs.true93: ; preds = %land.lhs.true86
- %incdec.ptr94 = getelementptr inbounds i8* %scan.1, i64 7
- %30 = load i8* %incdec.ptr94, align 1
- %incdec.ptr96 = getelementptr inbounds i8* %match.0, i64 7
- %31 = load i8* %incdec.ptr96, align 1
+ %incdec.ptr94 = getelementptr inbounds i8, i8* %scan.1, i64 7
+ %30 = load i8, i8* %incdec.ptr94, align 1
+ %incdec.ptr96 = getelementptr inbounds i8, i8* %match.0, i64 7
+ %31 = load i8, i8* %incdec.ptr96, align 1
%cmp98 = icmp eq i8 %30, %31
br i1 %cmp98, label %land.lhs.true100, label %do.end
land.lhs.true100: ; preds = %land.lhs.true93
- %incdec.ptr101 = getelementptr inbounds i8* %scan.1, i64 8
- %32 = load i8* %incdec.ptr101, align 1
- %incdec.ptr103 = getelementptr inbounds i8* %match.0, i64 8
- %33 = load i8* %incdec.ptr103, align 1
+ %incdec.ptr101 = getelementptr inbounds i8, i8* %scan.1, i64 8
+ %32 = load i8, i8* %incdec.ptr101, align 1
+ %incdec.ptr103 = getelementptr inbounds i8, i8* %match.0, i64 8
+ %33 = load i8, i8* %incdec.ptr103, align 1
%cmp105 = icmp eq i8 %32, %33
%cmp107 = icmp ult i8* %incdec.ptr101, %add.ptr12
%or.cond = and i1 %cmp105, %cmp107
@@ -207,12 +206,12 @@ if.end118: ; preds = %if.then114
%sub119 = add nsw i32 %sub110, -1
%idxprom120 = sext i32 %sub119 to i64
%add.ptr111.sum = add i64 %idxprom120, %idx.ext
- %arrayidx121 = getelementptr inbounds i8* %1, i64 %add.ptr111.sum
- %34 = load i8* %arrayidx121, align 1
+ %arrayidx121 = getelementptr inbounds i8, i8* %1, i64 %add.ptr111.sum
+ %34 = load i8, i8* %arrayidx121, align 1
%idxprom122 = sext i32 %sub110 to i64
%add.ptr111.sum216 = add i64 %idxprom122, %idx.ext
- %arrayidx123 = getelementptr inbounds i8* %1, i64 %add.ptr111.sum216
- %35 = load i8* %arrayidx123, align 1
+ %arrayidx123 = getelementptr inbounds i8, i8* %1, i64 %add.ptr111.sum216
+ %35 = load i8, i8* %arrayidx123, align 1
br label %do.cond125
do.cond125: ; preds = %if.end118, %do.end, %lor.lhs.false42, %lor.lhs.false37, %lor.lhs.false, %do.body
@@ -221,8 +220,8 @@ do.cond125: ; preds = %if.end118, %do.end,
%scan_end.1 = phi i8 [ %scan_end.0, %do.body ], [ %scan_end.0, %lor.lhs.false ], [ %scan_end.0, %lor.lhs.false37 ], [ %scan_end.0, %lor.lhs.false42 ], [ %35, %if.end118 ], [ %scan_end.0, %do.end ]
%and = and i32 %cur_match.addr.0, %7
%idxprom126 = zext i32 %and to i64
- %arrayidx127 = getelementptr inbounds i16* %6, i64 %idxprom126
- %36 = load i16* %arrayidx127, align 2
+ %arrayidx127 = getelementptr inbounds i16, i16* %6, i64 %idxprom126
+ %36 = load i16, i16* %arrayidx127, align 2
%conv128 = zext i16 %36 to i32
%cmp129 = icmp ugt i32 %conv128, %sub6.
br i1 %cmp129, label %land.rhs131, label %do.end135
diff --git a/test/CodeGen/XCore/2009-01-08-Crash.ll b/test/CodeGen/XCore/2009-01-08-Crash.ll
index a31ea1e2e9be..5eddbc33761c 100644
--- a/test/CodeGen/XCore/2009-01-08-Crash.ll
+++ b/test/CodeGen/XCore/2009-01-08-Crash.ll
@@ -6,7 +6,7 @@
define i32 @test(i32 %bar) nounwind readnone {
entry:
%bar_addr = alloca i32
- %0 = getelementptr i32* %bar_addr, i32 -1
- %1 = load i32* %0, align 4
+ %0 = getelementptr i32, i32* %bar_addr, i32 -1
+ %1 = load i32, i32* %0, align 4
ret i32 %1
}
diff --git a/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll b/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll
index 6ad9a73899d1..693e6f0f136b 100644
--- a/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll
+++ b/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll
@@ -14,9 +14,9 @@ entry:
bb3.i15.i.i: ; preds = %bb3.i15.i.i, %entry
%indvar.i.i.i = phi i32 [ %indvar.next.i.i.i, %bb3.i15.i.i ], [ 0, %entry ] ; <i32> [#uses=2]
%tmp137 = sub i32 0, %indvar.i.i.i ; <i32> [#uses=1]
- %scevgep13.i.i.i = getelementptr i32* undef, i32 %tmp137 ; <i32*> [#uses=2]
+ %scevgep13.i.i.i = getelementptr i32, i32* undef, i32 %tmp137 ; <i32*> [#uses=2]
%scevgep1314.i.i.i = bitcast i32* %scevgep13.i.i.i to %struct.dwarf_fde** ; <%struct.dwarf_fde**> [#uses=1]
- %0 = load %struct.dwarf_fde** %scevgep1314.i.i.i, align 4 ; <%struct.dwarf_fde*> [#uses=0]
+ %0 = load %struct.dwarf_fde*, %struct.dwarf_fde** %scevgep1314.i.i.i, align 4 ; <%struct.dwarf_fde*> [#uses=0]
store i32 undef, i32* %scevgep13.i.i.i
%indvar.next.i.i.i = add i32 %indvar.i.i.i, 1 ; <i32> [#uses=1]
br label %bb3.i15.i.i
diff --git a/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll b/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll
index f8fe0d2136ff..40c9654371fd 100644
--- a/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll
+++ b/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll
@@ -5,6 +5,6 @@
define i32 @test_entry() nounwind {
entry:
- %0 = load i32* getelementptr inbounds (%struct.st* @x, i32 0, i32 3), align 2
+ %0 = load i32, i32* getelementptr inbounds (%struct.st, %struct.st* @x, i32 0, i32 3), align 2
ret i32 %0
}
diff --git a/test/CodeGen/XCore/atomic.ll b/test/CodeGen/XCore/atomic.ll
index 6ca80cf5d9e7..13579dbd8197 100644
--- a/test/CodeGen/XCore/atomic.ll
+++ b/test/CodeGen/XCore/atomic.ll
@@ -25,27 +25,27 @@ entry:
; CHECK-NEXT: ldaw r[[R1:[0-9]+]], dp[pool]
; CHECK-NEXT: #MEMBARRIER
; CHECK-NEXT: ldc r[[R2:[0-9]+]], 0
- %0 = load atomic i32* bitcast (i64* @pool to i32*) acquire, align 4
+ %0 = load atomic i32, i32* bitcast (i64* @pool to i32*) acquire, align 4
; CHECK-NEXT: ld16s r3, r[[R1]][r[[R2]]]
; CHECK-NEXT: #MEMBARRIER
- %1 = load atomic i16* bitcast (i64* @pool to i16*) acquire, align 2
+ %1 = load atomic i16, i16* bitcast (i64* @pool to i16*) acquire, align 2
; CHECK-NEXT: ld8u r11, r[[R1]][r[[R2]]]
; CHECK-NEXT: #MEMBARRIER
- %2 = load atomic i8* bitcast (i64* @pool to i8*) acquire, align 1
+ %2 = load atomic i8, i8* bitcast (i64* @pool to i8*) acquire, align 1
; CHECK-NEXT: ldw r4, dp[pool]
; CHECK-NEXT: #MEMBARRIER
- %3 = load atomic i32* bitcast (i64* @pool to i32*) seq_cst, align 4
+ %3 = load atomic i32, i32* bitcast (i64* @pool to i32*) seq_cst, align 4
; CHECK-NEXT: ld16s r5, r[[R1]][r[[R2]]]
; CHECK-NEXT: #MEMBARRIER
- %4 = load atomic i16* bitcast (i64* @pool to i16*) seq_cst, align 2
+ %4 = load atomic i16, i16* bitcast (i64* @pool to i16*) seq_cst, align 2
; CHECK-NEXT: ld8u r6, r[[R1]][r[[R2]]]
; CHECK-NEXT: #MEMBARRIER
- %5 = load atomic i8* bitcast (i64* @pool to i8*) seq_cst, align 1
+ %5 = load atomic i8, i8* bitcast (i64* @pool to i8*) seq_cst, align 1
; CHECK-NEXT: #MEMBARRIER
; CHECK-NEXT: stw r[[R0]], dp[pool]
@@ -80,11 +80,11 @@ entry:
; CHECK-NEXT: st16 r[[R0]], r[[R1]][r[[R2]]]
; CHECK-NEXT: ld8u r[[R0]], r[[R1]][r[[R2]]]
; CHECK-NEXT: st8 r[[R0]], r[[R1]][r[[R2]]]
- %6 = load atomic i32* bitcast (i64* @pool to i32*) monotonic, align 4
+ %6 = load atomic i32, i32* bitcast (i64* @pool to i32*) monotonic, align 4
store atomic i32 %6, i32* bitcast (i64* @pool to i32*) monotonic, align 4
- %7 = load atomic i16* bitcast (i64* @pool to i16*) monotonic, align 2
+ %7 = load atomic i16, i16* bitcast (i64* @pool to i16*) monotonic, align 2
store atomic i16 %7, i16* bitcast (i64* @pool to i16*) monotonic, align 2
- %8 = load atomic i8* bitcast (i64* @pool to i8*) monotonic, align 1
+ %8 = load atomic i8, i8* bitcast (i64* @pool to i8*) monotonic, align 1
store atomic i8 %8, i8* bitcast (i64* @pool to i8*) monotonic, align 1
ret void
diff --git a/test/CodeGen/XCore/codemodel.ll b/test/CodeGen/XCore/codemodel.ll
index 0245893c478d..4e637995c5a8 100644
--- a/test/CodeGen/XCore/codemodel.ll
+++ b/test/CodeGen/XCore/codemodel.ll
@@ -96,22 +96,22 @@ entry:
; LARGE: retsp 0
define i32 @f(i32* %i) {
entry:
- %0 = getelementptr inbounds i32* %i, i32 16383
- %1 = load i32* %0
- %2 = getelementptr inbounds i32* %i, i32 16384
- %3 = load i32* %2
+ %0 = getelementptr inbounds i32, i32* %i, i32 16383
+ %1 = load i32, i32* %0
+ %2 = getelementptr inbounds i32, i32* %i, i32 16384
+ %3 = load i32, i32* %2
%4 = add nsw i32 %1, %3
- %5 = load i32* getelementptr inbounds ([100 x i32]* @l, i32 0, i32 0)
+ %5 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @l, i32 0, i32 0)
%6 = add nsw i32 %4, %5
- %7 = load i32* getelementptr inbounds ([100 x i32]* @l, i32 0, i32 1)
+ %7 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @l, i32 0, i32 1)
%8 = add nsw i32 %6, %7
- %9 = load i32* getelementptr inbounds ([100 x i32]* @l, i32 0, i32 98)
+ %9 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @l, i32 0, i32 98)
%10 = add nsw i32 %8, %9
- %11 = load i32* getelementptr inbounds ([100 x i32]* @l, i32 0, i32 99)
+ %11 = load i32, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @l, i32 0, i32 99)
%12 = add nsw i32 %10, %11
- %13 = load i32* getelementptr inbounds ([10 x i32]* @s, i32 0, i32 0)
+ %13 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @s, i32 0, i32 0)
%14 = add nsw i32 %12, %13
- %15 = load i32* getelementptr inbounds ([10 x i32]* @s, i32 0, i32 9)
+ %15 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @s, i32 0, i32 9)
%16 = add nsw i32 %14, %15
ret i32 %16
}
@@ -132,7 +132,7 @@ entry:
@NoSize = external global [0 x i32]
define i32 @UnknownSize() nounwind {
entry:
- %0 = load i32* getelementptr inbounds ([0 x i32]* @NoSize, i32 0, i32 10)
+ %0 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @NoSize, i32 0, i32 10)
ret i32 %0
}
diff --git a/test/CodeGen/XCore/dwarf_debug.ll b/test/CodeGen/XCore/dwarf_debug.ll
index 8c9c47de6497..ba71dc798a04 100644
--- a/test/CodeGen/XCore/dwarf_debug.ll
+++ b/test/CodeGen/XCore/dwarf_debug.ll
@@ -6,15 +6,15 @@
; CHECK-LABEL: f
; CHECK: entsp 2
; ...the prologue...
-; CHECK: .loc 1 2 0 prologue_end # :2:0
+; CHECK: .loc 1 2 0 prologue_end # test.c:2:0
; CHECK: add r0, r0, 1
; CHECK: retsp 2
define i32 @f(i32 %a) {
entry:
%a.addr = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
- call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !11, metadata !{!"0x102"}), !dbg !12
- %0 = load i32* %a.addr, align 4, !dbg !12
+ call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !11, metadata !DIExpression()), !dbg !12
+ %0 = load i32, i32* %a.addr, align 4, !dbg !12
%add = add nsw i32 %0, 1, !dbg !12
ret i32 %add, !dbg !12
}
@@ -23,17 +23,16 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !10}
-!0 = !{!"0x11\0012\00\000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ]
-!1 = !{!"", !""}
+!0 = !DICompileUnit(language: DW_LANG_C99, isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "test.c", directory: "")
!2 = !{}
!3 = !{!4}
-!4 = !{!"0x2e\00f\00f\00\002\000\001\000\006\00256\000\002", !1, !5, !6, null, i32 (i32)* @f, null, null, !2} ; [ DW_TAG_subprogram ]
-!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ]
-!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ]
+!4 = !DISubprogram(name: "f", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !1, type: !6, function: i32 (i32)* @f, variables: !2)
+!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8}
-!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !{i32 2, !"Dwarf Version", i32 4}
-!10 = !{i32 2, !"Debug Info Version", i32 2}
-!11 = !{!"0x101\00a\0016777218\000", !4, !5, !8} ; [ DW_TAG_arg_variable ]
-!12 = !MDLocation(line: 2, scope: !4)
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 2, arg: 1, scope: !4, file: !1, type: !8)
+!12 = !DILocation(line: 2, scope: !4)
diff --git a/test/CodeGen/XCore/epilogue_prologue.ll b/test/CodeGen/XCore/epilogue_prologue.ll
index 99978145ed36..923cc4a09e05 100644
--- a/test/CodeGen/XCore/epilogue_prologue.ll
+++ b/test/CodeGen/XCore/epilogue_prologue.ll
@@ -199,9 +199,9 @@ declare void @f5(i32*)
define i32 @f6(i32 %i) {
entry:
%0 = alloca [200000 x i32]
- %1 = getelementptr inbounds [200000 x i32]* %0, i32 0, i32 0
+ %1 = getelementptr inbounds [200000 x i32], [200000 x i32]* %0, i32 0, i32 0
call void @f5(i32* %1)
- %2 = getelementptr inbounds [200000 x i32]* %0, i32 0, i32 199999
+ %2 = getelementptr inbounds [200000 x i32], [200000 x i32]* %0, i32 0, i32 199999
call void @f5(i32* %2)
ret i32 %i
}
@@ -229,7 +229,7 @@ entry:
define void @f8() nounwind {
entry:
%0 = alloca [256 x i32]
- %1 = getelementptr inbounds [256 x i32]* %0, i32 0, i32 253
+ %1 = getelementptr inbounds [256 x i32], [256 x i32]* %0, i32 0, i32 253
call void @f5(i32* %1)
ret void
}
@@ -257,7 +257,7 @@ entry:
define void @f9() nounwind {
entry:
%0 = alloca [32768 x i32]
- %1 = getelementptr inbounds [32768 x i32]* %0, i32 0, i32 32765
+ %1 = getelementptr inbounds [32768 x i32], [32768 x i32]* %0, i32 0, i32 32765
call void @f5(i32* %1)
ret void
}
diff --git a/test/CodeGen/XCore/exception.ll b/test/CodeGen/XCore/exception.ll
index fec83eb15ea5..705c6b42ade7 100644
--- a/test/CodeGen/XCore/exception.ll
+++ b/test/CodeGen/XCore/exception.ll
@@ -39,10 +39,10 @@ entry:
unreachable
}
-; CHECK-LABEL: fn_catch
+; CHECK-LABEL: fn_catch:
+; CHECK-NEXT: [[START:.L[a-zA-Z0-9_]+]]
; CHECK: .cfi_startproc
; CHECK: .cfi_personality 0, __gxx_personality_v0
-; CHECK: [[START:.L[a-zA-Z0-9_]+]]
; CHECK: .cfi_lsda 0, [[LSDA:.L[a-zA-Z0-9_]+]]
; CHECK: entsp 4
; CHECK: .cfi_def_cfa_offset 16
@@ -78,21 +78,22 @@ cont:
; CHECK: bl __cxa_end_catch
lpad:
%0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
catch i8* bitcast (i8** @_ZTIi to i8*)
catch i8* bitcast (i8** @_ZTId to i8*)
%1 = extractvalue { i8*, i32 } %0, 0
%2 = extractvalue { i8*, i32 } %0, 1
%3 = call i8* @__cxa_begin_catch(i8* %1) nounwind
%4 = bitcast i8* %3 to i32*
- %5 = load i32* %4
+ %5 = load i32, i32* %4
call void @__cxa_end_catch() nounwind
; CHECK: eq r0, r6, r5
; CHECK: bf r0, [[RETURN]]
; CHECK: mov r0, r4
; CHECK: bl _Unwind_Resume
-; CHECK: .cfi_endproc
; CHECK: [[END:.L[a-zA-Z0-9_]+]]
+; CHECK: .cfi_endproc
%6 = icmp eq i32 %5, %2
br i1 %6, label %Resume, label %Exit
Resume:
@@ -110,13 +111,14 @@ Exit:
; CHECK: .long [[PRE_G]]-[[START]]
; CHECK: .long [[POST_G]]-[[PRE_G]]
; CHECK: .long [[LANDING]]-[[START]]
-; CHECK: .byte 3
+; CHECK: .byte 5
; CHECK: .long [[POST_G]]-[[START]]
; CHECK: .long [[END]]-[[POST_G]]
; CHECK: .long 0
; CHECK: .byte 0
-; CHECK: .byte 1
; CHECK: .byte 0
+; CHECK: .byte 1
+; CHECK: .byte 125
; CHECK: .byte 2
; CHECK: .byte 125
; CHECK: .long _ZTIi
diff --git a/test/CodeGen/XCore/indirectbr.ll b/test/CodeGen/XCore/indirectbr.ll
index d7758ea1d57c..9723cdcbdf14 100644
--- a/test/CodeGen/XCore/indirectbr.ll
+++ b/test/CodeGen/XCore/indirectbr.ll
@@ -6,7 +6,7 @@
define internal i32 @foo(i32 %i) nounwind {
; CHECK-LABEL: foo:
entry:
- %0 = load i8** @nextaddr, align 4 ; <i8*> [#uses=2]
+ %0 = load i8*, i8** @nextaddr, align 4 ; <i8*> [#uses=2]
%1 = icmp eq i8* %0, null ; <i1> [#uses=1]
br i1 %1, label %bb3, label %bb2
@@ -16,8 +16,8 @@ bb2: ; preds = %entry, %bb3
indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
bb3: ; preds = %entry
- %2 = getelementptr inbounds [5 x i8*]* @C.0.2070, i32 0, i32 %i ; <i8**> [#uses=1]
- %gotovar.4.0.pre = load i8** %2, align 4 ; <i8*> [#uses=1]
+ %2 = getelementptr inbounds [5 x i8*], [5 x i8*]* @C.0.2070, i32 0, i32 %i ; <i8**> [#uses=1]
+ %gotovar.4.0.pre = load i8*, i8** %2, align 4 ; <i8*> [#uses=1]
br label %bb2
L5: ; preds = %bb2
diff --git a/test/CodeGen/XCore/llvm-intrinsics.ll b/test/CodeGen/XCore/llvm-intrinsics.ll
index b436282615c2..b7868d350b4a 100644
--- a/test/CodeGen/XCore/llvm-intrinsics.ll
+++ b/test/CodeGen/XCore/llvm-intrinsics.ll
@@ -122,7 +122,7 @@ entry:
; CHECK-NEXT: ldw r0, sp[2]
; CHECK-NEXT: set sp, r2
; CHECK-NEXT: bau r3
- call void (...)* @foo()
+ call void (...) @foo()
call void @llvm.eh.return.i32(i32 %offset, i8* %handler)
unreachable
}
@@ -144,8 +144,8 @@ entry:
; CHECK-NEXT: ldw r0, sp[2]
; CHECK-NEXT: set sp, r2
; CHECK-NEXT: bau r3
- call void (...)* @foo()
- %0 = load i32* @offset
+ call void (...) @foo()
+ %0 = load i32, i32* @offset
call void @llvm.eh.return.i32(i32 %0, i8* @handler)
unreachable
}
@@ -244,7 +244,7 @@ define void @Unwind0() {
; CHECK: ldw r4, sp[9]
; CHECK: retsp 10
define void @Unwind1() {
- call void (...)* @foo()
+ call void (...) @foo()
call void @llvm.eh.unwind.init()
ret void
}
diff --git a/test/CodeGen/XCore/load.ll b/test/CodeGen/XCore/load.ll
index c7fc2a33db1a..bba7f72301f9 100644
--- a/test/CodeGen/XCore/load.ll
+++ b/test/CodeGen/XCore/load.ll
@@ -4,8 +4,8 @@ define i32 @load32(i32* %p, i32 %offset) nounwind {
entry:
; CHECK-LABEL: load32:
; CHECK: ldw r0, r0[r1]
- %0 = getelementptr i32* %p, i32 %offset
- %1 = load i32* %0, align 4
+ %0 = getelementptr i32, i32* %p, i32 %offset
+ %1 = load i32, i32* %0, align 4
ret i32 %1
}
@@ -13,8 +13,8 @@ define i32 @load32_imm(i32* %p) nounwind {
entry:
; CHECK-LABEL: load32_imm:
; CHECK: ldw r0, r0[11]
- %0 = getelementptr i32* %p, i32 11
- %1 = load i32* %0, align 4
+ %0 = getelementptr i32, i32* %p, i32 11
+ %1 = load i32, i32* %0, align 4
ret i32 %1
}
@@ -23,8 +23,8 @@ entry:
; CHECK-LABEL: load16:
; CHECK: ld16s r0, r0[r1]
; CHECK-NOT: sext
- %0 = getelementptr i16* %p, i32 %offset
- %1 = load i16* %0, align 2
+ %0 = getelementptr i16, i16* %p, i32 %offset
+ %1 = load i16, i16* %0, align 2
%2 = sext i16 %1 to i32
ret i32 %2
}
@@ -34,8 +34,8 @@ entry:
; CHECK-LABEL: load8:
; CHECK: ld8u r0, r0[r1]
; CHECK-NOT: zext
- %0 = getelementptr i8* %p, i32 %offset
- %1 = load i8* %0, align 1
+ %0 = getelementptr i8, i8* %p, i32 %offset
+ %1 = load i8, i8* %0, align 1
%2 = zext i8 %1 to i32
ret i32 %2
}
@@ -45,6 +45,6 @@ define i32 @load_cp() nounwind {
entry:
; CHECK-LABEL: load_cp:
; CHECK: ldw r0, cp[GConst]
- %0 = load i32* @GConst
+ %0 = load i32, i32* @GConst
ret i32 %0
}
diff --git a/test/CodeGen/XCore/offset_folding.ll b/test/CodeGen/XCore/offset_folding.ll
index 8085a0fd28a7..ab29ad587a50 100644
--- a/test/CodeGen/XCore/offset_folding.ll
+++ b/test/CodeGen/XCore/offset_folding.ll
@@ -8,7 +8,7 @@ entry:
; CHECK-LABEL: f1:
; CHECK: ldaw r11, cp[a+4]
; CHECK: mov r0, r11
- %0 = getelementptr [0 x i32]* @a, i32 0, i32 1
+ %0 = getelementptr [0 x i32], [0 x i32]* @a, i32 0, i32 1
ret i32* %0
}
@@ -16,7 +16,7 @@ define i32 *@f2() nounwind {
entry:
; CHECK-LABEL: f2:
; CHECK: ldaw r0, dp[b+4]
- %0 = getelementptr [0 x i32]* @b, i32 0, i32 1
+ %0 = getelementptr [0 x i32], [0 x i32]* @b, i32 0, i32 1
ret i32* %0
}
@@ -28,7 +28,7 @@ entry:
; CHECK-LABEL: f3:
; CHECK: ldaw r11, cp[a]
; CHECK: sub r0, r11, 4
- %0 = getelementptr [0 x i32]* @a, i32 0, i32 -1
+ %0 = getelementptr [0 x i32], [0 x i32]* @a, i32 0, i32 -1
ret i32* %0
}
@@ -37,6 +37,6 @@ entry:
; CHECK-LABEL: f4:
; CHECK: ldaw [[REG:r[0-9]+]], dp[b]
; CHECK: sub r0, [[REG]], 4
- %0 = getelementptr [0 x i32]* @b, i32 0, i32 -1
+ %0 = getelementptr [0 x i32], [0 x i32]* @b, i32 0, i32 -1
ret i32* %0
}
diff --git a/test/CodeGen/XCore/private.ll b/test/CodeGen/XCore/private.ll
index 474448a50888..a188864a866c 100644
--- a/test/CodeGen/XCore/private.ll
+++ b/test/CodeGen/XCore/private.ll
@@ -14,7 +14,7 @@ define i32 @bar() {
; CHECK: bl .Lfoo
; CHECK: ldw r0, dp[.Lbaz]
call void @foo()
- %1 = load i32* @baz, align 4
+ %1 = load i32, i32* @baz, align 4
ret i32 %1
}
diff --git a/test/CodeGen/XCore/scavenging.ll b/test/CodeGen/XCore/scavenging.ll
index a0c8a2e09379..7b6f54ebec24 100644
--- a/test/CodeGen/XCore/scavenging.ll
+++ b/test/CodeGen/XCore/scavenging.ll
@@ -17,21 +17,21 @@
define void @f() nounwind {
entry:
%x = alloca [100 x i32], align 4 ; <[100 x i32]*> [#uses=2]
- %0 = load i32* @size, align 4 ; <i32> [#uses=1]
+ %0 = load i32, i32* @size, align 4 ; <i32> [#uses=1]
%1 = alloca i32, i32 %0, align 4 ; <i32*> [#uses=1]
- %2 = load volatile i32* @g0, align 4 ; <i32> [#uses=1]
- %3 = load volatile i32* @g1, align 4 ; <i32> [#uses=1]
- %4 = load volatile i32* @g2, align 4 ; <i32> [#uses=1]
- %5 = load volatile i32* @g3, align 4 ; <i32> [#uses=1]
- %6 = load volatile i32* @g4, align 4 ; <i32> [#uses=1]
- %7 = load volatile i32* @g5, align 4 ; <i32> [#uses=1]
- %8 = load volatile i32* @g6, align 4 ; <i32> [#uses=1]
- %9 = load volatile i32* @g7, align 4 ; <i32> [#uses=1]
- %10 = load volatile i32* @g8, align 4 ; <i32> [#uses=1]
- %11 = load volatile i32* @g9, align 4 ; <i32> [#uses=1]
- %12 = load volatile i32* @g10, align 4 ; <i32> [#uses=1]
- %13 = load volatile i32* @g11, align 4 ; <i32> [#uses=2]
- %14 = getelementptr [100 x i32]* %x, i32 0, i32 50 ; <i32*> [#uses=1]
+ %2 = load volatile i32, i32* @g0, align 4 ; <i32> [#uses=1]
+ %3 = load volatile i32, i32* @g1, align 4 ; <i32> [#uses=1]
+ %4 = load volatile i32, i32* @g2, align 4 ; <i32> [#uses=1]
+ %5 = load volatile i32, i32* @g3, align 4 ; <i32> [#uses=1]
+ %6 = load volatile i32, i32* @g4, align 4 ; <i32> [#uses=1]
+ %7 = load volatile i32, i32* @g5, align 4 ; <i32> [#uses=1]
+ %8 = load volatile i32, i32* @g6, align 4 ; <i32> [#uses=1]
+ %9 = load volatile i32, i32* @g7, align 4 ; <i32> [#uses=1]
+ %10 = load volatile i32, i32* @g8, align 4 ; <i32> [#uses=1]
+ %11 = load volatile i32, i32* @g9, align 4 ; <i32> [#uses=1]
+ %12 = load volatile i32, i32* @g10, align 4 ; <i32> [#uses=1]
+ %13 = load volatile i32, i32* @g11, align 4 ; <i32> [#uses=2]
+ %14 = getelementptr [100 x i32], [100 x i32]* %x, i32 0, i32 50 ; <i32*> [#uses=1]
store i32 %13, i32* %14, align 4
store volatile i32 %13, i32* @g11, align 4
store volatile i32 %12, i32* @g10, align 4
@@ -45,7 +45,7 @@ entry:
store volatile i32 %4, i32* @g2, align 4
store volatile i32 %3, i32* @g1, align 4
store volatile i32 %2, i32* @g0, align 4
- %x1 = getelementptr [100 x i32]* %x, i32 0, i32 0 ; <i32*> [#uses=1]
+ %x1 = getelementptr [100 x i32], [100 x i32]* %x, i32 0, i32 0 ; <i32*> [#uses=1]
call void @g(i32* %x1, i32* %1) nounwind
ret void
}
@@ -103,15 +103,15 @@ declare void @g(i32*, i32*)
define void @ScavengeSlots(i32 %r0, i32 %r1, i32 %r2, i32 %r3, i32 %r4) nounwind {
entry:
%Data = alloca [100000 x i32]
- %i0 = getelementptr inbounds [100000 x i32]* %Data, i32 0, i32 80000
+ %i0 = getelementptr inbounds [100000 x i32], [100000 x i32]* %Data, i32 0, i32 80000
store volatile i32 %r0, i32* %i0
- %i1 = getelementptr inbounds [100000 x i32]* %Data, i32 0, i32 81000
+ %i1 = getelementptr inbounds [100000 x i32], [100000 x i32]* %Data, i32 0, i32 81000
store volatile i32 %r1, i32* %i1
- %i2 = getelementptr inbounds [100000 x i32]* %Data, i32 0, i32 82000
+ %i2 = getelementptr inbounds [100000 x i32], [100000 x i32]* %Data, i32 0, i32 82000
store volatile i32 %r2, i32* %i2
- %i3 = getelementptr inbounds [100000 x i32]* %Data, i32 0, i32 83000
+ %i3 = getelementptr inbounds [100000 x i32], [100000 x i32]* %Data, i32 0, i32 83000
store volatile i32 %r3, i32* %i3
- %i4 = getelementptr inbounds [100000 x i32]* %Data, i32 0, i32 84000
+ %i4 = getelementptr inbounds [100000 x i32], [100000 x i32]* %Data, i32 0, i32 84000
store volatile i32 %r4, i32* %i4
ret void
}
diff --git a/test/CodeGen/XCore/store.ll b/test/CodeGen/XCore/store.ll
index 87553d8da18a..a42b444bdff4 100644
--- a/test/CodeGen/XCore/store.ll
+++ b/test/CodeGen/XCore/store.ll
@@ -4,7 +4,7 @@ define void @store32(i32* %p, i32 %offset, i32 %val) nounwind {
entry:
; CHECK-LABEL: store32:
; CHECK: stw r2, r0[r1]
- %0 = getelementptr i32* %p, i32 %offset
+ %0 = getelementptr i32, i32* %p, i32 %offset
store i32 %val, i32* %0, align 4
ret void
}
@@ -13,7 +13,7 @@ define void @store32_imm(i32* %p, i32 %val) nounwind {
entry:
; CHECK-LABEL: store32_imm:
; CHECK: stw r1, r0[11]
- %0 = getelementptr i32* %p, i32 11
+ %0 = getelementptr i32, i32* %p, i32 11
store i32 %val, i32* %0, align 4
ret void
}
@@ -22,7 +22,7 @@ define void @store16(i16* %p, i32 %offset, i16 %val) nounwind {
entry:
; CHECK-LABEL: store16:
; CHECK: st16 r2, r0[r1]
- %0 = getelementptr i16* %p, i32 %offset
+ %0 = getelementptr i16, i16* %p, i32 %offset
store i16 %val, i16* %0, align 2
ret void
}
@@ -31,7 +31,7 @@ define void @store8(i8* %p, i32 %offset, i8 %val) nounwind {
entry:
; CHECK-LABEL: store8:
; CHECK: st8 r2, r0[r1]
- %0 = getelementptr i8* %p, i32 %offset
+ %0 = getelementptr i8, i8* %p, i32 %offset
store i8 %val, i8* %0, align 1
ret void
}
diff --git a/test/CodeGen/XCore/threads.ll b/test/CodeGen/XCore/threads.ll
index c50da1d5934e..0c25314295d7 100644
--- a/test/CodeGen/XCore/threads.ll
+++ b/test/CodeGen/XCore/threads.ll
@@ -78,7 +78,7 @@ define i32* @f_tl() {
; CHECK: ldc [[R2:r[0-9]]], 12
; r0 = id*12 + 8 + &tl
; CHECK: lmul {{r[0-9]}}, r0, r11, [[R2]], [[R0]], [[R1]]
- ret i32* getelementptr inbounds ([3 x i32]* @tl, i32 0, i32 2)
+ ret i32* getelementptr inbounds ([3 x i32], [3 x i32]* @tl, i32 0, i32 2)
}
define i32* @f_tle() {
@@ -88,7 +88,7 @@ define i32* @f_tle() {
; CHECK: ldaw [[R1:r[0-9]]], dp[tle]
; r0 = &tl + id*8
; CHECK: add r0, [[R1]], [[R0]]
- ret i32* getelementptr inbounds ([2 x i32]* @tle, i32 0, i32 0)
+ ret i32* getelementptr inbounds ([2 x i32], [2 x i32]* @tle, i32 0, i32 0)
}
define i32 @f_tlExpr () {
@@ -99,8 +99,8 @@ define i32 @f_tlExpr () {
; CHECK: add [[R2:r[0-9]]], [[R1]], [[R0]]
; CHECK: add r0, [[R2]], [[R2]]
ret i32 add(
- i32 ptrtoint( i32* getelementptr inbounds ([2 x i32]* @tle, i32 0, i32 0) to i32),
- i32 ptrtoint( i32* getelementptr inbounds ([2 x i32]* @tle, i32 0, i32 0) to i32))
+ i32 ptrtoint( i32* getelementptr inbounds ([2 x i32], [2 x i32]* @tle, i32 0, i32 0) to i32),
+ i32 ptrtoint( i32* getelementptr inbounds ([2 x i32], [2 x i32]* @tle, i32 0, i32 0) to i32))
}
define void @phiNode1() {
@@ -113,8 +113,8 @@ define void @phiNode1() {
entry:
br label %ConstantExpPhiNode
ConstantExpPhiNode:
- %ptr = phi i32* [ getelementptr inbounds ([3 x i32]* @tl, i32 0, i32 0), %entry ],
- [ getelementptr inbounds ([3 x i32]* @tl, i32 0, i32 0), %ConstantExpPhiNode ]
+ %ptr = phi i32* [ getelementptr inbounds ([3 x i32], [3 x i32]* @tl, i32 0, i32 0), %entry ],
+ [ getelementptr inbounds ([3 x i32], [3 x i32]* @tl, i32 0, i32 0), %ConstantExpPhiNode ]
br label %ConstantExpPhiNode
exit:
ret void
@@ -134,8 +134,8 @@ define void @phiNode2( i1 %bool) {
entry:
br i1 %bool, label %ConstantExpPhiNode, label %exit
ConstantExpPhiNode:
- %ptr = phi i32* [ getelementptr inbounds ([3 x i32]* @tl, i32 0, i32 0), %entry ],
- [ getelementptr inbounds ([3 x i32]* @tl, i32 0, i32 0), %ConstantExpPhiNode ]
+ %ptr = phi i32* [ getelementptr inbounds ([3 x i32], [3 x i32]* @tl, i32 0, i32 0), %entry ],
+ [ getelementptr inbounds ([3 x i32], [3 x i32]* @tl, i32 0, i32 0), %ConstantExpPhiNode ]
br label %ConstantExpPhiNode
exit:
ret void
diff --git a/test/CodeGen/XCore/trampoline.ll b/test/CodeGen/XCore/trampoline.ll
index 7ca331a60673..a7280000500f 100644
--- a/test/CodeGen/XCore/trampoline.ll
+++ b/test/CodeGen/XCore/trampoline.ll
@@ -9,14 +9,14 @@ entry:
; CHECK: stw r11, sp[7]
%TRAMP.23 = alloca [20 x i8], align 2
%FRAME.0 = alloca %struct.FRAME.f, align 4
- %TRAMP.23.sub = getelementptr inbounds [20 x i8]* %TRAMP.23, i32 0, i32 0
+ %TRAMP.23.sub = getelementptr inbounds [20 x i8], [20 x i8]* %TRAMP.23, i32 0, i32 0
%FRAME.02 = bitcast %struct.FRAME.f* %FRAME.0 to i8*
call void @llvm.init.trampoline(i8* %TRAMP.23.sub, i8* bitcast (i32 (%struct.FRAME.f*)* @g.1101 to i8*), i8* %FRAME.02)
%tramp = call i8* @llvm.adjust.trampoline(i8* %TRAMP.23.sub)
- %0 = getelementptr inbounds %struct.FRAME.f* %FRAME.0, i32 0, i32 1
+ %0 = getelementptr inbounds %struct.FRAME.f, %struct.FRAME.f* %FRAME.0, i32 0, i32 1
%1 = bitcast i8* %tramp to i32 ()*
store i32 ()* %1, i32 ()** %0, align 4
- %2 = getelementptr inbounds %struct.FRAME.f* %FRAME.0, i32 0, i32 0
+ %2 = getelementptr inbounds %struct.FRAME.f, %struct.FRAME.f* %FRAME.0, i32 0, i32 0
store i32 1, i32* %2, align 4
call void @h(i32 ()* %1) nounwind
ret void
@@ -28,8 +28,8 @@ entry:
; CHECK: ldw r11, sp[0]
; CHECK-NEXT: ldw r0, r11[0]
; CHECK-NEXT: retsp 0
- %0 = getelementptr inbounds %struct.FRAME.f* %CHAIN.1, i32 0, i32 0
- %1 = load i32* %0, align 4
+ %0 = getelementptr inbounds %struct.FRAME.f, %struct.FRAME.f* %CHAIN.1, i32 0, i32 0
+ %1 = load i32, i32* %0, align 4
ret i32 %1
}
diff --git a/test/CodeGen/XCore/unaligned_load.ll b/test/CodeGen/XCore/unaligned_load.ll
index b8b88275538c..325da8f4432d 100644
--- a/test/CodeGen/XCore/unaligned_load.ll
+++ b/test/CodeGen/XCore/unaligned_load.ll
@@ -5,7 +5,7 @@
; CHECK: bl __misaligned_load
define i32 @align1(i32* %p) nounwind {
entry:
- %0 = load i32* %p, align 1 ; <i32> [#uses=1]
+ %0 = load i32, i32* %p, align 1 ; <i32> [#uses=1]
ret i32 %0
}
@@ -16,7 +16,7 @@ entry:
; CHECK: or
define i32 @align2(i32* %p) nounwind {
entry:
- %0 = load i32* %p, align 2 ; <i32> [#uses=1]
+ %0 = load i32, i32* %p, align 2 ; <i32> [#uses=1]
ret i32 %0
}
@@ -29,6 +29,6 @@ entry:
; CHECK: or
define i32 @align3() nounwind {
entry:
- %0 = load i32* bitcast (i8* getelementptr ([5 x i8]* @a, i32 0, i32 1) to i32*), align 1
+ %0 = load i32, i32* bitcast (i8* getelementptr ([5 x i8], [5 x i8]* @a, i32 0, i32 1) to i32*), align 1
ret i32 %0
}
diff --git a/test/CodeGen/XCore/unaligned_store_combine.ll b/test/CodeGen/XCore/unaligned_store_combine.ll
index d1f4e6c15cd5..4b29a05984a1 100644
--- a/test/CodeGen/XCore/unaligned_store_combine.ll
+++ b/test/CodeGen/XCore/unaligned_store_combine.ll
@@ -7,7 +7,7 @@ entry:
; CHECK-LABEL: f:
; CHECK: ldc r2, 8
; CHECK: bl memmove
- %0 = load i64* %src, align 1
+ %0 = load i64, i64* %src, align 1
store i64 %0, i64* %dst, align 1
ret void
}
diff --git a/test/CodeGen/XCore/zextfree.ll b/test/CodeGen/XCore/zextfree.ll
index 48dce8865328..d1e2b7f85784 100644
--- a/test/CodeGen/XCore/zextfree.ll
+++ b/test/CodeGen/XCore/zextfree.ll
@@ -4,7 +4,7 @@
; CHECK-NOT: zext
define void @test(i8* %s1) {
entry:
- %u8 = load i8* %s1, align 1
+ %u8 = load i8, i8* %s1, align 1
%bool = icmp eq i8 %u8, 0
br label %BB1
BB1: